diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000000..2b65f6fe3cc80
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+*.bat text eol=crlf
+*.cmd text eol=crlf
diff --git a/.gitignore b/.gitignore
index 34939e3a97aaa..9757054a50f9e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,18 +5,22 @@
 *.ipr
 *.iml
 *.iws
+*.pyc
 .idea/
 .idea_modules/
-sbt/*.jar
+build/*.jar
 .settings
 .cache
+cache
 .generated-mima*
-/build/
 work/
 out/
 .DS_Store
 third_party/libmesos.so
 third_party/libmesos.dylib
+build/apache-maven*
+build/zinc*
+build/scala*
 conf/java-opts
 conf/*.sh
 conf/*.cmd
@@ -49,9 +53,12 @@ dependency-reduced-pom.xml
 checkpoint
 derby.log
 dist/
-spark-*-bin.tar.gz
+dev/create-release/*txt
+dev/create-release/*final
+spark-*-bin-*.tgz
 unit-tests.log
 /lib/
+ec2/lib/
 rat-results.txt
 scalastyle.txt
 scalastyle-output.xml
diff --git a/.rat-excludes b/.rat-excludes
index b14ad53720f32..769defbac11b7 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -1,5 +1,6 @@
 target
 .gitignore
+.gitattributes
 .project
 .classpath
 .mima-excludes
@@ -43,11 +44,13 @@ SparkImports.scala
 SparkJLineCompletion.scala
 SparkJLineReader.scala
 SparkMemberHandlers.scala
+SparkReplReporter.scala
 sbt
 sbt-launch-lib.bash
 plugins.sbt
 work
 .*\.q
+.*\.qv
 golden
 test.out/*
 .*iml
@@ -61,3 +64,4 @@ dist/*
 logs
 .*scalastyle-output.xml
 .*dependency-reduced-pom.xml
+known_translations
diff --git a/LICENSE b/LICENSE
index a7eee041129cb..0a42d389e4c3c 100644
--- a/LICENSE
+++ b/LICENSE
@@ -646,7 +646,8 @@ THE SOFTWARE.
 
 ========================================================================
 For Scala Interpreter classes (all .scala files in repl/src/main/scala
-except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala):
+except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala),
+and for SerializableMapWrapper in JavaUtils.scala:
 ========================================================================
 
 Copyright (c) 2002-2013 EPFL
@@ -712,18 +713,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-========================================================================
-For colt:
-========================================================================
-
-Copyright (c) 1999 CERN - European Organization for Nuclear Research.
-Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose is hereby granted without fee, provided that the above copyright notice appear in all copies and that both that copyright notice and this permission notice appear in supporting documentation. CERN makes no representations about the suitability of this software for any purpose. It is provided "as is" without expressed or implied warranty.
-
-Packages hep.aida.*
-
-Written by Pavel Binko, Dino Ferrero Merlino, Wolfgang Hoschek, Tony Johnson, Andreas Pfeiffer, and others. Check the FreeHEP home page for more info. Permission to use and/or redistribute this work is granted under the terms of the LGPL License, with the exception that any usage related to military applications is expressly forbidden. The software and documentation made available under the terms of this license are provided with no warranty.
-
-
 ========================================================================
 For SnapTree:
 ========================================================================
@@ -766,7 +755,7 @@ SUCH DAMAGE.
 
 
 ========================================================================
-For Timsort (core/src/main/java/org/apache/spark/util/collection/Sorter.java):
+For Timsort (core/src/main/java/org/apache/spark/util/collection/TimSort.java):
 ========================================================================
 Copyright (C) 2008 The Android Open Source Project
 
@@ -783,6 +772,25 @@ See the License for the specific language governing permissions and
 limitations under the License.
 
 
+========================================================================
+For LimitedInputStream
+  (network/common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java):
+========================================================================
+Copyright (C) 2007 The Guava Authors
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+
 ========================================================================
 BSD-style licenses
 ========================================================================
diff --git a/README.md b/README.md
index dbf53dcd76b2d..af02339578195 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,8 @@ and Spark Streaming for stream processing.
 ## Online Documentation
 
 You can find the latest Spark documentation, including a programming
-guide, on the [project web page](http://spark.apache.org/documentation.html).
+guide, on the [project web page](http://spark.apache.org/documentation.html)
+and [project wiki](https://cwiki.apache.org/confluence/display/SPARK).
 This README file only contains basic setup instructions.
 
 ## Building Spark
@@ -25,7 +26,7 @@ To build Spark and its example programs, run:
 
 (You do not need to do this if you downloaded a pre-built package.)
 More detailed documentation is available from the project site, at
-["Building Spark with Maven"](http://spark.apache.org/docs/latest/building-with-maven.html).
+["Building Spark"](http://spark.apache.org/docs/latest/building-spark.html).
 
 ## Interactive Scala Shell
 
@@ -84,7 +85,7 @@ storage systems. Because the protocols have changed in different versions of
 Hadoop, you must build Spark against the same version that your cluster runs.
 
 Please refer to the build documentation at
-["Specifying the Hadoop Version"](http://spark.apache.org/docs/latest/building-spark.html#specifying-the-hadoop-version)
+["Specifying the Hadoop Version"](http://spark.apache.org/docs/latest/building-with-maven.html#specifying-the-hadoop-version)
 for detailed guidance on building for a particular distribution of Hadoop, including
 building for particular Hive and Hive Thriftserver distributions. See also
 ["Third Party Hadoop Distributions"](http://spark.apache.org/docs/latest/hadoop-third-party-distributions.html)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 31a01e4d8e1de..1bb5a671f5390 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -43,12 +43,6 @@
   </properties>
 
   <dependencies>
-    <!-- Promote Guava to compile scope in this module so it's included while shading. -->
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-      <scope>compile</scope>
-    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -66,22 +60,22 @@
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-repl_${scala.binary.version}</artifactId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <artifactId>spark-graphx_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-graphx_${scala.binary.version}</artifactId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <artifactId>spark-repl_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
   </dependencies>
@@ -133,20 +127,6 @@
               <goal>shade</goal>
             </goals>
             <configuration>
-              <relocations>
-                <relocation>
-                  <pattern>com.google</pattern>
-                  <shadedPattern>org.spark-project.guava</shadedPattern>
-                  <includes>
-                    <include>com.google.common.**</include>
-                  </includes>
-                  <excludes>
-                    <exclude>com/google/common/base/Absent*</exclude>
-                    <exclude>com/google/common/base/Optional*</exclude>
-                    <exclude>com/google/common/base/Present*</exclude>
-                  </excludes>
-                </relocation>
-              </relocations>
               <transformers>
                 <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
                 <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
@@ -169,16 +149,6 @@
   </build>
 
   <profiles>
-    <profile>
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.spark</groupId>
-          <artifactId>spark-yarn-alpha_${scala.binary.version}</artifactId>
-          <version>${project.version}</version>
-        </dependency>
-      </dependencies>
-    </profile>
     <profile>
       <id>yarn</id>
       <dependencies>
@@ -197,6 +167,11 @@
           <artifactId>spark-hive_${scala.binary.version}</artifactId>
           <version>${project.version}</version>
         </dependency>
+      </dependencies>
+    </profile>
+    <profile>
+      <id>hive-thriftserver</id>
+      <dependencies>
         <dependency>
           <groupId>org.apache.spark</groupId>
           <artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>
@@ -359,5 +334,25 @@
         </dependency>
       </dependencies>
     </profile>
+
+    <!-- Profiles that disable inclusion of certain dependencies. -->
+    <profile>
+      <id>hadoop-provided</id>
+      <properties>
+        <hadoop.deps.scope>provided</hadoop.deps.scope>
+      </properties>
+    </profile>
+    <profile>
+      <id>hive-provided</id>
+      <properties>
+        <hive.deps.scope>provided</hive.deps.scope>
+      </properties>
+    </profile>
+    <profile>
+      <id>parquet-provided</id>
+      <properties>
+        <parquet.deps.scope>provided</parquet.deps.scope>
+      </properties>
+    </profile>
   </profiles>
 </project>
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 93db0d5efda5f..510e92640eff8 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -40,15 +40,6 @@
       <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-server</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
@@ -58,11 +49,5 @@
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <plugins>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-      </plugin>
-    </plugins>
   </build>
 </project>
diff --git a/bagel/src/test/resources/log4j.properties b/bagel/src/test/resources/log4j.properties
index 789869f72e3b0..853ef0ed2986f 100644
--- a/bagel/src/test/resources/log4j.properties
+++ b/bagel/src/test/resources/log4j.properties
@@ -15,10 +15,10 @@
 # limitations under the License.
 #
 
-# Set everything to be logged to the file bagel/target/unit-tests.log
+# Set everything to be logged to the file target/unit-tests.log
 log4j.rootCategory=INFO, file
 log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.append=false
+log4j.appender.file.append=true
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
diff --git a/bin/beeline.cmd b/bin/beeline.cmd
new file mode 100644
index 0000000000000..8293f311029dd
--- /dev/null
+++ b/bin/beeline.cmd
@@ -0,0 +1,21 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+set SPARK_HOME=%~dp0..
+cmd /V /E /C %SPARK_HOME%\bin\spark-class.cmd org.apache.hive.beeline.BeeLine %*
diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
index 3cd0579aea8d3..088f993954d9e 100644
--- a/bin/compute-classpath.cmd
+++ b/bin/compute-classpath.cmd
@@ -1,117 +1,124 @@
-@echo off
-
-rem
-rem Licensed to the Apache Software Foundation (ASF) under one or more
-rem contributor license agreements.  See the NOTICE file distributed with
-rem this work for additional information regarding copyright ownership.
-rem The ASF licenses this file to You under the Apache License, Version 2.0
-rem (the "License"); you may not use this file except in compliance with
-rem the License.  You may obtain a copy of the License at
-rem
-rem    http://www.apache.org/licenses/LICENSE-2.0
-rem
-rem Unless required by applicable law or agreed to in writing, software
-rem distributed under the License is distributed on an "AS IS" BASIS,
-rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-rem See the License for the specific language governing permissions and
-rem limitations under the License.
-rem
-
-rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
-rem script and the ExecutorRunner in standalone cluster mode.
-
-rem If we're called from spark-class2.cmd, it already set enabledelayedexpansion and setting
-rem it here would stop us from affecting its copy of the CLASSPATH variable; otherwise we
-rem need to set it here because we use !datanucleus_jars! below.
-if "%DONT_PRINT_CLASSPATH%"=="1" goto skip_delayed_expansion
-setlocal enabledelayedexpansion
-:skip_delayed_expansion
-
-set SCALA_VERSION=2.10
-
-rem Figure out where the Spark framework is installed
-set FWDIR=%~dp0..\
-
-rem Load environment variables from conf\spark-env.cmd, if it exists
-if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
-
-rem Build up classpath
-set CLASSPATH=%SPARK_CLASSPATH%;%SPARK_SUBMIT_CLASSPATH%
-
-if not "x%SPARK_CONF_DIR%"=="x" (
-  set CLASSPATH=%CLASSPATH%;%SPARK_CONF_DIR%
-) else (
-  set CLASSPATH=%CLASSPATH%;%FWDIR%conf
-)
-
-if exist "%FWDIR%RELEASE" (
-  for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
-    set ASSEMBLY_JAR=%%d
-  )
-) else (
-  for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
-    set ASSEMBLY_JAR=%%d
-  )
-)
-
-set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
-
-rem When Hive support is needed, Datanucleus jars must be included on the classpath.
-rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
-rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
-rem built with Hive, so look for them there.
-if exist "%FWDIR%RELEASE" (
-  set datanucleus_dir=%FWDIR%lib
-) else (
-  set datanucleus_dir=%FWDIR%lib_managed\jars
-)
-set "datanucleus_jars="
-for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
-  set datanucleus_jars=!datanucleus_jars!;%%d
-)
-set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
-
-set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%tools\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\classes
-set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\classes
-
-set SPARK_TEST_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\test-classes
-set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\test-classes
-set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\test-classes
-set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\test-classes
-set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\test-classes
-set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\test-classes
-set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\test-classes
-set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\test-classes
-set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\test-classes
-
-if "x%SPARK_TESTING%"=="x1" (
-  rem Add test clases to path - note, add SPARK_CLASSES and SPARK_TEST_CLASSES before CLASSPATH
-  rem so that local compilation takes precedence over assembled jar
-  set CLASSPATH=%SPARK_CLASSES%;%SPARK_TEST_CLASSES%;%CLASSPATH%
-)
-
-rem Add hadoop conf dir - else FileSystem.*, etc fail
-rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
-rem the configurtion files.
-if "x%HADOOP_CONF_DIR%"=="x" goto no_hadoop_conf_dir
-  set CLASSPATH=%CLASSPATH%;%HADOOP_CONF_DIR%
-:no_hadoop_conf_dir
-
-if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
-  set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
-:no_yarn_conf_dir
-
-rem A bit of a hack to allow calling this script within run2.cmd without seeing output
-if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
-
-echo %CLASSPATH%
-
-:exit
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
+rem script and the ExecutorRunner in standalone cluster mode.
+
+rem If we're called from spark-class2.cmd, it already set enabledelayedexpansion and setting
+rem it here would stop us from affecting its copy of the CLASSPATH variable; otherwise we
+rem need to set it here because we use !datanucleus_jars! below.
+if "%DONT_PRINT_CLASSPATH%"=="1" goto skip_delayed_expansion
+setlocal enabledelayedexpansion
+:skip_delayed_expansion
+
+set SCALA_VERSION=2.10
+
+rem Figure out where the Spark framework is installed
+set FWDIR=%~dp0..\
+
+rem Load environment variables from conf\spark-env.cmd, if it exists
+if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
+
+rem Build up classpath
+set CLASSPATH=%SPARK_CLASSPATH%;%SPARK_SUBMIT_CLASSPATH%
+
+if not "x%SPARK_CONF_DIR%"=="x" (
+  set CLASSPATH=%CLASSPATH%;%SPARK_CONF_DIR%
+) else (
+  set CLASSPATH=%CLASSPATH%;%FWDIR%conf
+)
+
+if exist "%FWDIR%RELEASE" (
+  for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
+    set ASSEMBLY_JAR=%%d
+  )
+) else (
+  for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
+    set ASSEMBLY_JAR=%%d
+  )
+)
+
+set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
+
+rem When Hive support is needed, Datanucleus jars must be included on the classpath.
+rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
+rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
+rem built with Hive, so look for them there.
+if exist "%FWDIR%RELEASE" (
+  set datanucleus_dir=%FWDIR%lib
+) else (
+  set datanucleus_dir=%FWDIR%lib_managed\jars
+)
+set "datanucleus_jars="
+for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
+  set datanucleus_jars=!datanucleus_jars!;%%d
+)
+set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
+
+set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%tools\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\classes
+set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\classes
+
+set SPARK_TEST_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\test-classes
+set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\test-classes
+set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\test-classes
+set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\test-classes
+set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\test-classes
+set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\test-classes
+set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\test-classes
+set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\test-classes
+set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\test-classes
+
+if "x%SPARK_TESTING%"=="x1" (
+  rem Add test clases to path - note, add SPARK_CLASSES and SPARK_TEST_CLASSES before CLASSPATH
+  rem so that local compilation takes precedence over assembled jar
+  set CLASSPATH=%SPARK_CLASSES%;%SPARK_TEST_CLASSES%;%CLASSPATH%
+)
+
+rem Add hadoop conf dir - else FileSystem.*, etc fail
+rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
+rem the configurtion files.
+if "x%HADOOP_CONF_DIR%"=="x" goto no_hadoop_conf_dir
+  set CLASSPATH=%CLASSPATH%;%HADOOP_CONF_DIR%
+:no_hadoop_conf_dir
+
+if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
+  set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
+:no_yarn_conf_dir
+
+rem To allow for distributions to append needed libraries to the classpath (e.g. when
+rem using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
+rem append it to tbe final classpath.
+if not "x%$SPARK_DIST_CLASSPATH%"=="x" (
+  set CLASSPATH=%CLASSPATH%;%SPARK_DIST_CLASSPATH%
+)
+
+rem A bit of a hack to allow calling this script within run2.cmd without seeing output
+if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
+
+echo %CLASSPATH%
+
+:exit
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index 905bbaf99b374..a8c344b1ca594 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -20,14 +20,16 @@
 # This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
 # script and the ExecutorRunner in standalone cluster mode.
 
-SCALA_VERSION=2.10
-
 # Figure out where Spark is installed
 FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 
 . "$FWDIR"/bin/load-spark-env.sh
 
-CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH"
+if [ -n "$SPARK_CLASSPATH" ]; then
+  CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH"
+else
+  CLASSPATH="$SPARK_SUBMIT_CLASSPATH"
+fi
 
 # Build up classpath
 if [ -n "$SPARK_CONF_DIR" ]; then
@@ -36,7 +38,7 @@ else
   CLASSPATH="$CLASSPATH:$FWDIR/conf"
 fi
 
-ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SCALA_VERSION"
+ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SPARK_SCALA_VERSION"
 
 if [ -n "$JAVA_HOME" ]; then
   JAR_CMD="$JAVA_HOME/bin/jar"
@@ -48,19 +50,21 @@ fi
 if [ -n "$SPARK_PREPEND_CLASSES" ]; then
   echo "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark"\
     "classes ahead of assembly." >&2
-  CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/classes"
+  # Spark classes
+  CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/tools/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/hive-thriftserver/target/scala-$SPARK_SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/yarn/stable/target/scala-$SPARK_SCALA_VERSION/classes"
+  # Jars for shaded deps in their original form (copied here during build)
   CLASSPATH="$CLASSPATH:$FWDIR/core/target/jars/*"
-  CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/tools/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/sql/hive-thriftserver/target/scala-$SCALA_VERSION/classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/yarn/stable/target/scala-$SCALA_VERSION/classes"
 fi
 
 # Use spark-assembly jar from either RELEASE or assembly directory
@@ -70,22 +74,25 @@ else
   assembly_folder="$ASSEMBLY_DIR"
 fi
 
-num_jars="$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)"
-if [ "$num_jars" -eq "0" ]; then
-  echo "Failed to find Spark assembly in $assembly_folder"
-  echo "You need to build Spark before running this program."
-  exit 1
-fi
+num_jars=0
+
+for f in ${assembly_folder}/spark-assembly*hadoop*.jar; do
+  if [[ ! -e "$f" ]]; then
+    echo "Failed to find Spark assembly in $assembly_folder" 1>&2
+    echo "You need to build Spark before running this program." 1>&2
+    exit 1
+  fi
+  ASSEMBLY_JAR="$f"
+  num_jars=$((num_jars+1))
+done
+
 if [ "$num_jars" -gt "1" ]; then
-  jars_list=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*.jar")
-  echo "Found multiple Spark assembly jars in $assembly_folder:"
-  echo "$jars_list"
-  echo "Please remove all but one jar."
+  echo "Found multiple Spark assembly jars in $assembly_folder:" 1>&2
+  ls ${assembly_folder}/spark-assembly*hadoop*.jar 1>&2
+  echo "Please remove all but one jar." 1>&2
   exit 1
 fi
 
-ASSEMBLY_JAR="$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)"
-
 # Verify that versions of java used to build the jars and run Spark are compatible
 jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
 if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then
@@ -110,7 +117,7 @@ else
   datanucleus_dir="$FWDIR"/lib_managed/jars
 fi
 
-datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")"
+datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar$")"
 datanucleus_jars="$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)"
 
 if [ -n "$datanucleus_jars" ]; then
@@ -123,15 +130,15 @@ fi
 
 # Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1
 if [[ $SPARK_TESTING == 1 ]]; then
-  CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SPARK_SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SPARK_SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SPARK_SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SPARK_SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SPARK_SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SPARK_SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SPARK_SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SPARK_SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SPARK_SCALA_VERSION/test-classes"
 fi
 
 # Add hadoop conf dir if given -- otherwise FileSystem.*, etc fail !
@@ -144,4 +151,11 @@ if [ -n "$YARN_CONF_DIR" ]; then
   CLASSPATH="$CLASSPATH:$YARN_CONF_DIR"
 fi
 
+# To allow for distributions to append needed libraries to the classpath (e.g. when
+# using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
+# append it to tbe final classpath.
+if [ -n "$SPARK_DIST_CLASSPATH" ]; then
+  CLASSPATH="$CLASSPATH:$SPARK_DIST_CLASSPATH"
+fi
+
 echo "$CLASSPATH"
diff --git a/bin/load-spark-env.sh b/bin/load-spark-env.sh
index 6d4231b204595..356b3d49b2ffe 100644
--- a/bin/load-spark-env.sh
+++ b/bin/load-spark-env.sh
@@ -36,3 +36,23 @@ if [ -z "$SPARK_ENV_LOADED" ]; then
     set +a
   fi
 fi
+
+# Setting SPARK_SCALA_VERSION if not already set.
+
+if [ -z "$SPARK_SCALA_VERSION" ]; then
+
+    ASSEMBLY_DIR2="$FWDIR/assembly/target/scala-2.11"
+    ASSEMBLY_DIR1="$FWDIR/assembly/target/scala-2.10"
+    
+    if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then
+        echo -e "Presence of build for both scala versions(SCALA 2.10 and SCALA 2.11) detected." 1>&2
+        echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION=2.11 in spark-env.sh.' 1>&2
+        exit 1
+    fi
+
+    if [ -d "$ASSEMBLY_DIR2" ]; then
+        export SPARK_SCALA_VERSION="2.11"
+    else
+        export SPARK_SCALA_VERSION="2.10"
+    fi        
+fi
diff --git a/bin/pyspark b/bin/pyspark
index 96f30a260a09e..0b4f695dd06dd 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -25,7 +25,7 @@ export SPARK_HOME="$FWDIR"
 
 source "$FWDIR/bin/utils.sh"
 
-SCALA_VERSION=2.10
+source "$FWDIR"/bin/load-spark-env.sh
 
 function usage() {
   echo "Usage: ./bin/pyspark [options]" 1>&2
@@ -40,7 +40,7 @@ fi
 # Exit if the user hasn't compiled Spark
 if [ ! -f "$FWDIR/RELEASE" ]; then
   # Exit if the user hasn't compiled Spark
-  ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
+  ls "$FWDIR"/assembly/target/scala-$SPARK_SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
   if [[ $? != 0 ]]; then
     echo "Failed to find Spark assembly in $FWDIR/assembly/target" 1>&2
     echo "You need to build Spark before running this program" 1>&2
@@ -48,8 +48,6 @@ if [ ! -f "$FWDIR/RELEASE" ]; then
   fi
 fi
 
-. "$FWDIR"/bin/load-spark-env.sh
-
 # In Spark <= 1.1, setting IPYTHON=1 would cause the driver to be launched using the `ipython`
 # executable, while the worker would still be launched using PYSPARK_PYTHON.
 #
@@ -134,7 +132,5 @@ if [[ "$1" =~ \.py$ ]]; then
   gatherSparkSubmitOpts "$@"
   exec "$FWDIR"/bin/spark-submit "${SUBMISSION_OPTS[@]}" "$primary" "${APPLICATION_OPTS[@]}"
 else
-  # PySpark shell requires special handling downstream
-  export PYSPARK_SHELL=1
   exec "$PYSPARK_DRIVER_PYTHON" $PYSPARK_DRIVER_PYTHON_OPTS
 fi
diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd
index a0e66abcc26c9..a542ec80b49d6 100644
--- a/bin/pyspark2.cmd
+++ b/bin/pyspark2.cmd
@@ -59,7 +59,11 @@ for /f %%i in ('echo %1^| findstr /R "\.py"') do (
 )
 
 if [%PYTHON_FILE%] == [] (
-  %PYSPARK_PYTHON%
+  if [%IPYTHON%] == [1] (
+	ipython %IPYTHON_OPTS%
+  ) else (
+	%PYSPARK_PYTHON%
+  ) 
 ) else (
   echo.
   echo WARNING: Running python applications through ./bin/pyspark.cmd is deprecated as of Spark 1.0.
diff --git a/bin/run-example b/bin/run-example
index 34dd71c71880e..c567acf9a6b5c 100755
--- a/bin/run-example
+++ b/bin/run-example
@@ -17,12 +17,12 @@
 # limitations under the License.
 #
 
-SCALA_VERSION=2.10
-
 FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 export SPARK_HOME="$FWDIR"
 EXAMPLES_DIR="$FWDIR"/examples
 
+. "$FWDIR"/bin/load-spark-env.sh
+
 if [ -n "$1" ]; then
   EXAMPLE_CLASS="$1"
   shift
@@ -35,17 +35,32 @@ else
 fi
 
 if [ -f "$FWDIR/RELEASE" ]; then
-  export SPARK_EXAMPLES_JAR="`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`"
-elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
-  export SPARK_EXAMPLES_JAR="`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar`"
+  JAR_PATH="${FWDIR}/lib"
+else
+  JAR_PATH="${EXAMPLES_DIR}/target/scala-${SPARK_SCALA_VERSION}"
 fi
 
-if [[ -z "$SPARK_EXAMPLES_JAR" ]]; then
-  echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
-  echo "You need to build Spark before running this program" 1>&2
+JAR_COUNT=0
+
+for f in ${JAR_PATH}/spark-examples-*hadoop*.jar; do
+  if [[ ! -e "$f" ]]; then
+    echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
+    echo "You need to build Spark before running this program" 1>&2
+    exit 1
+  fi
+  SPARK_EXAMPLES_JAR="$f"
+  JAR_COUNT=$((JAR_COUNT+1))
+done
+
+if [ "$JAR_COUNT" -gt "1" ]; then
+  echo "Found multiple Spark examples assembly jars in ${JAR_PATH}" 1>&2
+  ls ${JAR_PATH}/spark-examples-*hadoop*.jar 1>&2
+  echo "Please remove all but one jar." 1>&2
   exit 1
 fi
 
+export SPARK_EXAMPLES_JAR
+
 EXAMPLE_MASTER=${MASTER:-"local[*]"}
 
 if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then
diff --git a/bin/spark-class b/bin/spark-class
index 91d858bc063d0..2f0441bb3c1c2 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -24,13 +24,12 @@ case "`uname`" in
     CYGWIN*) cygwin=true;;
 esac
 
-SCALA_VERSION=2.10
-
 # Figure out where Spark is installed
 FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
+export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"$SPARK_HOME/conf"}"
 
 . "$FWDIR"/bin/load-spark-env.sh
 
@@ -73,6 +72,8 @@ case "$1" in
   'org.apache.spark.executor.MesosExecutorBackend')
     OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
     OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
+    export PYTHONPATH="$FWDIR/python:$PYTHONPATH"
+    export PYTHONPATH="$FWDIR/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
     ;;
 
   # Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_OPTS +
@@ -81,7 +82,11 @@ case "$1" in
     OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS"
     OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
     if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then
-      OUR_JAVA_OPTS="$OUR_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
+      if [[ $OSTYPE == darwin* ]]; then
+       export DYLD_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DYLD_LIBRARY_PATH"
+      else
+       export LD_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$LD_LIBRARY_PATH"
+      fi
     fi
     if [ -n "$SPARK_SUBMIT_DRIVER_MEMORY" ]; then
       OUR_JAVA_MEM="$SPARK_SUBMIT_DRIVER_MEMORY"
@@ -116,17 +121,17 @@ fi
 JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
 
 # Load extra JAVA_OPTS from conf/java-opts, if it exists
-if [ -e "$FWDIR/conf/java-opts" ] ; then
-  JAVA_OPTS="$JAVA_OPTS `cat "$FWDIR"/conf/java-opts`"
+if [ -e "$SPARK_CONF_DIR/java-opts" ] ; then
+  JAVA_OPTS="$JAVA_OPTS `cat "$SPARK_CONF_DIR"/java-opts`"
 fi
 
 # Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
 
 TOOLS_DIR="$FWDIR"/tools
 SPARK_TOOLS_JAR=""
-if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar ]; then
+if [ -e "$TOOLS_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-tools*[0-9Tg].jar ]; then
   # Use the JAR from the SBT build
-  export SPARK_TOOLS_JAR="`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar`"
+  export SPARK_TOOLS_JAR="`ls "$TOOLS_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-tools*[0-9Tg].jar`"
 fi
 if [ -e "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar ]; then
   # Use the JAR from the Maven build
@@ -145,8 +150,8 @@ fi
 
 if [[ "$1" =~ org.apache.spark.tools.* ]]; then
   if test -z "$SPARK_TOOLS_JAR"; then
-    echo "Failed to find Spark Tools Jar in $FWDIR/tools/target/scala-$SCALA_VERSION/" 1>&2
-    echo "You need to build Spark before running $1." 1>&2
+    echo "Failed to find Spark Tools Jar in $FWDIR/tools/target/scala-$SPARK_SCALA_VERSION/" 1>&2
+    echo "You need to run \"build/sbt tools/package\" before running $1." 1>&2
     exit 1
   fi
   CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
diff --git a/bin/spark-shell b/bin/spark-shell
index 4a0670fc6c8aa..cca5aa0676123 100755
--- a/bin/spark-shell
+++ b/bin/spark-shell
@@ -45,6 +45,13 @@ source "$FWDIR"/bin/utils.sh
 SUBMIT_USAGE_FUNCTION=usage
 gatherSparkSubmitOpts "$@"
 
+# SPARK-4161: scala does not assume use of the java classpath,
+# so we need to add the "-Dscala.usejavacp=true" flag mnually. We
+# do this specifically for the Spark shell because the scala REPL
+# has its own class loader, and any additional classpath specified
+# through spark.driver.extraClassPath is not automatically propagated.
+SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Dscala.usejavacp=true"
+
 function main() {
   if $cygwin; then
     # Workaround for issue involving JLine and Cygwin
diff --git a/bin/spark-shell2.cmd b/bin/spark-shell2.cmd
index 2ee60b4e2a2b3..1d1a40da315eb 100644
--- a/bin/spark-shell2.cmd
+++ b/bin/spark-shell2.cmd
@@ -19,4 +19,23 @@ rem
 
 set SPARK_HOME=%~dp0..
 
-cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd --class org.apache.spark.repl.Main %* spark-shell
+echo "%*" | findstr " --help -h" >nul
+if %ERRORLEVEL% equ 0 (
+  call :usage
+  exit /b 0
+)
+
+call %SPARK_HOME%\bin\windows-utils.cmd %*
+if %ERRORLEVEL% equ 1 (
+  call :usage
+  exit /b 1
+)
+
+cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd --class org.apache.spark.repl.Main %SUBMISSION_OPTS% spark-shell %APPLICATION_OPTS%
+
+exit /b 0
+
+:usage
+echo "Usage: .\bin\spark-shell.cmd [options]" >&2
+%SPARK_HOME%\bin\spark-submit --help 2>&1 | findstr /V "Usage" 1>&2
+exit /b 0
diff --git a/bin/spark-sql b/bin/spark-sql
index 63d00437d508d..3b6cc420fea81 100755
--- a/bin/spark-sql
+++ b/bin/spark-sql
@@ -23,6 +23,8 @@
 # Enter posix mode for bash
 set -o posix
 
+# NOTE: This exact class name is matched downstream by SparkSubmit.
+# Any changes need to be reflected there.
 CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
 
 # Figure out where Spark is installed
diff --git a/bin/spark-submit b/bin/spark-submit
index c557311b4b20e..3e5cbdbb24394 100755
--- a/bin/spark-submit
+++ b/bin/spark-submit
@@ -22,6 +22,9 @@
 export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
 ORIG_ARGS=("$@")
 
+# Set COLUMNS for progress bar
+export COLUMNS=`tput cols`
+
 while (($#)); do
   if [ "$1" = "--deploy-mode" ]; then
     SPARK_SUBMIT_DEPLOY_MODE=$2
@@ -35,11 +38,19 @@ while (($#)); do
     export SPARK_SUBMIT_CLASSPATH=$2
   elif [ "$1" = "--driver-java-options" ]; then
     export SPARK_SUBMIT_OPTS=$2
+  elif [ "$1" = "--master" ]; then
+    export MASTER=$2
   fi
   shift
 done
 
-DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf"
+if [ -z "$SPARK_CONF_DIR" ]; then
+  export SPARK_CONF_DIR="$SPARK_HOME/conf"
+fi
+DEFAULT_PROPERTIES_FILE="$SPARK_CONF_DIR/spark-defaults.conf"
+if [ "$MASTER" == "yarn-cluster" ]; then
+  SPARK_SUBMIT_DEPLOY_MODE=cluster
+fi
 export SPARK_SUBMIT_DEPLOY_MODE=${SPARK_SUBMIT_DEPLOY_MODE:-"client"}
 export SPARK_SUBMIT_PROPERTIES_FILE=${SPARK_SUBMIT_PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"}
 
diff --git a/bin/spark-submit2.cmd b/bin/spark-submit2.cmd
index cf6046d1547ad..12244a9cb04fb 100644
--- a/bin/spark-submit2.cmd
+++ b/bin/spark-submit2.cmd
@@ -24,7 +24,11 @@ set ORIG_ARGS=%*
 
 rem Reset the values of all variables used
 set SPARK_SUBMIT_DEPLOY_MODE=client
-set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_HOME%\conf\spark-defaults.conf
+
+if not defined %SPARK_CONF_DIR% (
+  set SPARK_CONF_DIR=%SPARK_HOME%\conf
+)
+set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_CONF_DIR%\spark-defaults.conf
 set SPARK_SUBMIT_DRIVER_MEMORY=
 set SPARK_SUBMIT_LIBRARY_PATH=
 set SPARK_SUBMIT_CLASSPATH=
@@ -45,11 +49,17 @@ if [%1] == [] goto continue
     set SPARK_SUBMIT_CLASSPATH=%2
   ) else if [%1] == [--driver-java-options] (
     set SPARK_SUBMIT_OPTS=%2
+  ) else if [%1] == [--master] (
+    set MASTER=%2
   )
   shift
 goto loop
 :continue
 
+if [%MASTER%] == [yarn-cluster] (
+  set SPARK_SUBMIT_DEPLOY_MODE=cluster
+)
+
 rem For client mode, the driver will be launched in the same JVM that launches
 rem SparkSubmit, so we may need to read the properties file for any extra class
 rem paths, library paths, java options and memory early on. Otherwise, it will
diff --git a/bin/utils.sh b/bin/utils.sh
index 22ea2b9a6d586..2241200082018 100755
--- a/bin/utils.sh
+++ b/bin/utils.sh
@@ -26,14 +26,14 @@ function gatherSparkSubmitOpts() {
     exit 1
   fi
 
-  # NOTE: If you add or remove spark-sumbmit options,
+  # NOTE: If you add or remove spark-submit options,
   # modify NOT ONLY this script but also SparkSubmitArgument.scala
   SUBMISSION_OPTS=()
   APPLICATION_OPTS=()
   while (($#)); do
     case "$1" in
-      --master | --deploy-mode | --class | --name | --jars | --py-files | --files | \
-      --conf | --properties-file | --driver-memory | --driver-java-options | \
+      --master | --deploy-mode | --class | --name | --jars | --packages | --py-files | --files | \
+      --conf | --repositories | --properties-file | --driver-memory | --driver-java-options | \
       --driver-library-path | --driver-class-path | --executor-memory | --driver-cores | \
       --total-executor-cores | --executor-cores | --queue | --num-executors | --archives)
         if [[ $# -lt 2 ]]; then
diff --git a/bin/windows-utils.cmd b/bin/windows-utils.cmd
new file mode 100644
index 0000000000000..567b8733f7f77
--- /dev/null
+++ b/bin/windows-utils.cmd
@@ -0,0 +1,59 @@
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem Gather all spark-submit options into SUBMISSION_OPTS
+
+set SUBMISSION_OPTS=
+set APPLICATION_OPTS=
+
+rem NOTE: If you add or remove spark-sumbmit options,
+rem modify NOT ONLY this script but also SparkSubmitArgument.scala
+
+:OptsLoop
+if "x%1"=="x" (
+  goto :OptsLoopEnd
+)
+
+SET opts="\<--master\> \<--deploy-mode\> \<--class\> \<--name\> \<--jars\> \<--py-files\> \<--files\>"
+SET opts="%opts:~1,-1% \<--conf\> \<--properties-file\> \<--driver-memory\> \<--driver-java-options\>"
+SET opts="%opts:~1,-1% \<--driver-library-path\> \<--driver-class-path\> \<--executor-memory\>"
+SET opts="%opts:~1,-1% \<--driver-cores\> \<--total-executor-cores\> \<--executor-cores\> \<--queue\>"
+SET opts="%opts:~1,-1% \<--num-executors\> \<--archives\> \<--packages\> \<--repositories\>"
+
+echo %1 | findstr %opts% >nul
+if %ERRORLEVEL% equ 0 (
+  if "x%2"=="x" (
+    echo "%1" requires an argument. >&2
+    exit /b 1
+  )
+  set SUBMISSION_OPTS=%SUBMISSION_OPTS% %1 %2
+  shift
+  shift
+  goto :OptsLoop
+)
+echo %1 | findstr "\<--verbose\> \<-v\> \<--supervise\>" >nul
+if %ERRORLEVEL% equ 0 (
+  set SUBMISSION_OPTS=%SUBMISSION_OPTS% %1
+  shift
+  goto :OptsLoop
+)
+set APPLICATION_OPTS=%APPLICATION_OPTS% %1
+shift
+goto :OptsLoop
+
+:OptsLoopEnd
+exit /b 0
diff --git a/build/mvn b/build/mvn
new file mode 100755
index 0000000000000..a87c5a26230c8
--- /dev/null
+++ b/build/mvn
@@ -0,0 +1,149 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Determine the current working directory
+_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+# Preserve the calling directory
+_CALLING_DIR="$(pwd)"
+
+# Installs any application tarball given a URL, the expected tarball name,
+# and, optionally, a checkable binary path to determine if the binary has
+# already been installed
+## Arg1 - URL
+## Arg2 - Tarball Name
+## Arg3 - Checkable Binary
+install_app() {
+  local remote_tarball="$1/$2"
+  local local_tarball="${_DIR}/$2"
+  local binary="${_DIR}/$3"
+
+  # setup `curl` and `wget` silent options if we're running on Jenkins
+  local curl_opts=""
+  local wget_opts=""
+  if [ -n "$AMPLAB_JENKINS" ]; then
+    curl_opts="-s"
+    wget_opts="--quiet"
+  else
+    curl_opts="--progress-bar"
+    wget_opts="--progress=bar:force"
+  fi
+
+  if [ -z "$3" -o ! -f "$binary" ]; then
+    # check if we already have the tarball
+    # check if we have curl installed
+    # download application
+    [ ! -f "${local_tarball}" ] && [ $(command -v curl) ] && \
+      echo "exec: curl ${curl_opts} ${remote_tarball}" && \
+      curl ${curl_opts} "${remote_tarball}" > "${local_tarball}"
+    # if the file still doesn't exist, lets try `wget` and cross our fingers
+    [ ! -f "${local_tarball}" ] && [ $(command -v wget) ] && \
+      echo "exec: wget ${wget_opts} ${remote_tarball}" && \
+      wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}"
+    # if both were unsuccessful, exit
+    [ ! -f "${local_tarball}" ] && \
+      echo -n "ERROR: Cannot download $2 with cURL or wget; " && \
+      echo "please install manually and try again." && \
+      exit 2
+    cd "${_DIR}" && tar -xzf "$2"
+    rm -rf "$local_tarball"
+  fi
+}
+
+# Install maven under the build/ folder
+install_mvn() {
+  install_app \
+    "http://archive.apache.org/dist/maven/maven-3/3.2.5/binaries" \
+    "apache-maven-3.2.5-bin.tar.gz" \
+    "apache-maven-3.2.5/bin/mvn"
+  MVN_BIN="${_DIR}/apache-maven-3.2.5/bin/mvn"
+}
+
+# Install zinc under the build/ folder
+install_zinc() {
+  local zinc_path="zinc-0.3.5.3/bin/zinc"
+  [ ! -f "${zinc_path}" ] && ZINC_INSTALL_FLAG=1
+  install_app \
+    "http://downloads.typesafe.com/zinc/0.3.5.3" \
+    "zinc-0.3.5.3.tgz" \
+    "${zinc_path}"
+  ZINC_BIN="${_DIR}/${zinc_path}"
+}
+
+# Determine the Scala version from the root pom.xml file, set the Scala URL,
+# and, with that, download the specific version of Scala necessary under
+# the build/ folder
+install_scala() {
+  # determine the Scala version used in Spark
+  local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | \
+                       head -1 | cut -f2 -d'>' | cut -f1 -d'<'`
+  local scala_bin="${_DIR}/scala-${scala_version}/bin/scala"
+
+  install_app \
+    "http://downloads.typesafe.com/scala/${scala_version}" \
+    "scala-${scala_version}.tgz" \
+    "scala-${scala_version}/bin/scala"
+
+  SCALA_COMPILER="$(cd "$(dirname ${scala_bin})/../lib" && pwd)/scala-compiler.jar"
+  SCALA_LIBRARY="$(cd "$(dirname ${scala_bin})/../lib" && pwd)/scala-library.jar"
+}
+
+# Determines if a given application is already installed. If not, will attempt
+# to install
+## Arg1 - application name
+## Arg2 - Alternate path to local install under build/ dir
+check_and_install_app() {
+  # create the local environment variable in uppercase
+  local app_bin="`echo $1 | awk '{print toupper(\$0)}'`_BIN"
+  # some black magic to set the generated app variable (i.e. MVN_BIN) into the
+  # environment
+  eval "${app_bin}=`which $1 2>/dev/null`"
+
+  if [ -z "`which $1 2>/dev/null`" ]; then
+    install_$1
+  fi
+}
+
+# Setup healthy defaults for the Zinc port if none were provided from
+# the environment
+ZINC_PORT=${ZINC_PORT:-"3030"}
+
+# Check and install all applications necessary to build Spark
+check_and_install_app "mvn"
+
+# Install the proper version of Scala and Zinc for the build
+install_zinc
+install_scala
+
+# Reset the current working directory
+cd "${_CALLING_DIR}"
+
+# Now that zinc is ensured to be installed, check its status and, if its
+# not running or just installed, start it
+if [ -n "${ZINC_INSTALL_FLAG}" -o -z "`${ZINC_BIN} -status`" ]; then
+  ${ZINC_BIN} -shutdown
+  ${ZINC_BIN} -start -port ${ZINC_PORT} \
+    -scala-compiler "${SCALA_COMPILER}" \
+    -scala-library "${SCALA_LIBRARY}" &>/dev/null
+fi
+
+# Set any `mvn` options if not already present
+export MAVEN_OPTS=${MAVEN_OPTS:-"-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"}
+
+# Last, call the `mvn` command as usual
+${MVN_BIN} "$@"
diff --git a/build/sbt b/build/sbt
new file mode 100755
index 0000000000000..28ebb64f7197c
--- /dev/null
+++ b/build/sbt
@@ -0,0 +1,128 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so
+# that we can run Hive to generate the golden answer.  This is not required for normal development
+# or testing.
+for i in "$HIVE_HOME"/lib/*
+do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i"
+done
+export HADOOP_CLASSPATH
+
+realpath () {
+(
+  TARGET_FILE="$1"
+
+  cd "$(dirname "$TARGET_FILE")"
+  TARGET_FILE="$(basename "$TARGET_FILE")"
+
+  COUNT=0
+  while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ]
+  do
+      TARGET_FILE="$(readlink "$TARGET_FILE")"
+      cd $(dirname "$TARGET_FILE")
+      TARGET_FILE="$(basename $TARGET_FILE)"
+      COUNT=$(($COUNT + 1))
+  done
+
+  echo "$(pwd -P)/"$TARGET_FILE""
+)
+}
+
+. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash
+
+
+declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
+declare -r sbt_opts_file=".sbtopts"
+declare -r etc_sbt_opts_file="/etc/sbt/sbtopts"
+
+usage() {
+ cat <<EOM
+Usage: $script_name [options]
+
+  -h | -help         print this message
+  -v | -verbose      this runner is chattier
+  -d | -debug        set sbt log level to debug
+  -no-colors         disable ANSI color codes
+  -sbt-create        start sbt even if current directory contains no sbt project
+  -sbt-dir   <path>  path to global settings/plugins directory (default: ~/.sbt)
+  -sbt-boot  <path>  path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
+  -ivy       <path>  path to local Ivy repository (default: ~/.ivy2)
+  -mem    <integer>  set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem))
+  -no-share          use all local caches; no sharing
+  -no-global         uses global caches, but does not use global ~/.sbt directory.
+  -jvm-debug <port>  Turn on JVM debugging, open at the given port.
+  -batch             Disable interactive mode
+
+  # sbt version (default: from project/build.properties if present, else latest release)
+  -sbt-version  <version>   use the specified version of sbt
+  -sbt-jar      <path>      use the specified jar as the sbt launcher
+  -sbt-rc                   use an RC version of sbt
+  -sbt-snapshot             use a snapshot version of sbt
+
+  # java version (default: java from PATH, currently $(java -version 2>&1 | grep version))
+  -java-home <path>         alternate JAVA_HOME
+
+  # jvm options and output control
+  JAVA_OPTS          environment variable, if unset uses "$java_opts"
+  SBT_OPTS           environment variable, if unset uses "$default_sbt_opts"
+  .sbtopts           if this file exists in the current directory, it is
+                     prepended to the runner args
+  /etc/sbt/sbtopts   if this file exists, it is prepended to the runner args
+  -Dkey=val          pass -Dkey=val directly to the java runtime
+  -J-X               pass option -X directly to the java runtime
+                     (-J is stripped)
+  -S-X               add -X to sbt's scalacOptions (-S is stripped)
+  -PmavenProfiles    Enable a maven profile for the build.
+
+In the case of duplicated or conflicting options, the order above
+shows precedence: JAVA_OPTS lowest, command line options highest.
+EOM
+}
+
+process_my_args () {
+  while [[ $# -gt 0 ]]; do
+    case "$1" in
+     -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;;
+      -no-share) addJava "$noshare_opts" && shift ;;
+     -no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;;
+      -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;;
+       -sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;;
+     -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;;
+         -batch) exec </dev/null && shift ;;
+
+    -sbt-create) sbt_create=true && shift ;;
+
+              *) addResidual "$1" && shift ;;
+    esac
+  done
+
+  # Now, ensure sbt version is used.
+  [[ "${sbt_version}XXX" != "XXX" ]] && addJava "-Dsbt.version=$sbt_version"
+}
+
+loadConfigFile() {
+  cat "$1" | sed '/^\#/d'
+}
+
+# if sbtopts files exist, prepend their contents to $@ so it can be processed by this runner
+[[ -f "$etc_sbt_opts_file" ]] && set -- $(loadConfigFile "$etc_sbt_opts_file") "$@"
+[[ -f "$sbt_opts_file" ]] && set -- $(loadConfigFile "$sbt_opts_file") "$@"
+
+run "$@"
diff --git a/sbt/sbt-launch-lib.bash b/build/sbt-launch-lib.bash
similarity index 91%
rename from sbt/sbt-launch-lib.bash
rename to build/sbt-launch-lib.bash
index 7f05d2ef491a3..5e0c640fa5919 100755
--- a/sbt/sbt-launch-lib.bash
+++ b/build/sbt-launch-lib.bash
@@ -37,10 +37,10 @@ dlog () {
 }
 
 acquire_sbt_jar () {
-  SBT_VERSION=`awk -F "=" '/sbt\\.version/ {print $2}' ./project/build.properties`
+  SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties`
   URL1=http://typesafe.artifactoryonline.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
   URL2=http://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
-  JAR=sbt/sbt-launch-${SBT_VERSION}.jar
+  JAR=build/sbt-launch-${SBT_VERSION}.jar
 
   sbt_jar=$JAR
 
@@ -50,9 +50,9 @@ acquire_sbt_jar () {
     # Download
     printf "Attempting to fetch sbt\n"
     JAR_DL="${JAR}.part"
-    if hash curl 2>/dev/null; then
+    if [ $(command -v curl) ]; then
       (curl --silent ${URL1} > "${JAR_DL}" || curl --silent ${URL2} > "${JAR_DL}") && mv "${JAR_DL}" "${JAR}"
-    elif hash wget 2>/dev/null; then
+    elif [ $(command -v wget) ]; then
       (wget --quiet ${URL1} -O "${JAR_DL}" || wget --quiet ${URL2} -O "${JAR_DL}") && mv "${JAR_DL}" "${JAR}"
     else
       printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n"
@@ -104,7 +104,7 @@ addResidual () {
   residual_args=( "${residual_args[@]}" "$1" )
 }
 addDebugger () {
-  addJava "-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=$1"
+  addJava "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$1"
 }
 
 # a ham-fisted attempt to move some memory settings in concert
@@ -124,7 +124,8 @@ require_arg () {
   local opt="$2"
   local arg="$3"
   if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then
-    die "$opt requires <$type> argument"
+    echo "$opt requires <$type> argument" 1>&2
+    exit 1
   fi
 }
 
@@ -149,7 +150,7 @@ process_args () {
      -java-home) require_arg path "$1" "$2" && java_cmd="$2/bin/java" && export JAVA_HOME=$2 && shift 2 ;;
 
             -D*) addJava "$1" && shift ;;
-            -J*) addJava "${1:2}" && shift ;; 
+            -J*) addJava "${1:2}" && shift ;;
             -P*) enableProfile "$1" && shift ;;
               *) addResidual "$1" && shift ;;
     esac
@@ -185,10 +186,3 @@ run() {
     "${sbt_commands[@]}" \
     "${residual_args[@]}"
 }
-
-runAlternateBoot() {
-  local bootpropsfile="$1"
-  shift
-  addJava "-Dsbt.boot.properties=$bootpropsfile"
-  run $@
-}
diff --git a/conf/metrics.properties.template b/conf/metrics.properties.template
index 30bcab0c93302..464c14457e53f 100644
--- a/conf/metrics.properties.template
+++ b/conf/metrics.properties.template
@@ -77,8 +77,8 @@
 #   sample    false      Whether to show entire set of samples for histograms ('false' or 'true')
 #
 # * Default path is /metrics/json for all instances except the master. The master has two paths:
-#     /metrics/aplications/json # App information
-#     /metrics/master/json      # Master information
+#     /metrics/applications/json # App information
+#     /metrics/master/json       # Master information
 
 # org.apache.spark.metrics.sink.GraphiteSink
 #   Name:     Default:      Description:
@@ -87,6 +87,7 @@
 #   period    10            Poll period
 #   unit      seconds       Units of poll period
 #   prefix    EMPTY STRING  Prefix to prepend to metric name
+#   protocol  tcp           Protocol ("tcp" or "udp") to use
 
 ## Examples
 # Enable JmxSink for all instances by class name
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index f8ffbf64278fb..0886b0276fb90 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -28,7 +28,7 @@
 # - SPARK_YARN_DIST_FILES, Comma separated list of files to be distributed with the job.
 # - SPARK_YARN_DIST_ARCHIVES, Comma separated list of archives to be distributed with the job.
 
-# Options for the daemons used in the standalone deploy mode:
+# Options for the daemons used in the standalone deploy mode
 # - SPARK_MASTER_IP, to bind the master to a different IP address or hostname
 # - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
 # - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
@@ -41,3 +41,10 @@
 # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
 # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
 # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
+
+# Generic options for the daemons used in the standalone deploy mode
+# - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
+# - SPARK_LOG_DIR       Where log files are stored.  (Default: ${SPARK_HOME}/logs)
+# - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
+# - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)
+# - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
diff --git a/core/pom.xml b/core/pom.xml
index a5a178079bc57..2dc5f747f2b71 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -34,6 +34,38 @@
   <name>Spark Project Core</name>
   <url>http://spark.apache.org/</url>
   <dependencies>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.twitter</groupId>
+      <artifactId>chill_${scala.binary.version}</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>org.ow2.asm</groupId>
+          <artifactId>asm</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.ow2.asm</groupId>
+          <artifactId>asm-commons</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.twitter</groupId>
+      <artifactId>chill-java</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>org.ow2.asm</groupId>
+          <artifactId>asm</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.ow2.asm</groupId>
+          <artifactId>asm-commons</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-client</artifactId>
@@ -44,6 +76,16 @@
         </exclusion>
       </exclusions>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-network-common_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-network-shuffle_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
     <dependency>
       <groupId>net.java.dev.jets3t</groupId>
       <artifactId>jets3t</artifactId>
@@ -52,32 +94,45 @@
       <groupId>org.apache.curator</groupId>
       <artifactId>curator-recipes</artifactId>
     </dependency>
+
+    <!-- Jetty dependencies promoted to compile here so they are shaded
+         and inlined into spark-core jar -->
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-plus</artifactId>
+      <scope>compile</scope>
     </dependency>
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-security</artifactId>
+      <scope>compile</scope>
     </dependency>
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-util</artifactId>
+      <scope>compile</scope>
     </dependency>
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-server</artifactId>
+      <scope>compile</scope>
     </dependency>
-    <!--
-      Promote Guava to "compile" so that maven-shade-plugin picks it up (for packaging the Optional
-      class exposed in the Java API). The plugin will then remove this dependency from the published
-      pom, so that Guava does not pollute the client's compilation classpath.
-    -->
     <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-http</artifactId>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-continuation</artifactId>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-servlet</artifactId>
       <scope>compile</scope>
     </dependency>
+
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-lang3</artifactId>
@@ -85,8 +140,6 @@
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-math3</artifactId>
-      <version>3.3</version>
-      <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>com.google.code.findbugs</groupId>
@@ -125,12 +178,8 @@
       <artifactId>lz4</artifactId>
     </dependency>
     <dependency>
-      <groupId>com.twitter</groupId>
-      <artifactId>chill_${scala.binary.version}</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.twitter</groupId>
-      <artifactId>chill-java</artifactId>
+      <groupId>org.roaringbitmap</groupId>
+      <artifactId>RoaringBitmap</artifactId>
     </dependency>
     <dependency>
       <groupId>commons-net</groupId>
@@ -158,10 +207,6 @@
       <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
       <version>3.2.10</version>
     </dependency>
-    <dependency>
-      <groupId>colt</groupId>
-      <artifactId>colt</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.apache.mesos</groupId>
       <artifactId>mesos</artifactId>
@@ -176,19 +221,19 @@
       <artifactId>stream</artifactId>
     </dependency>
     <dependency>
-      <groupId>com.codahale.metrics</groupId>
+      <groupId>io.dropwizard.metrics</groupId>
       <artifactId>metrics-core</artifactId>
     </dependency>
     <dependency>
-      <groupId>com.codahale.metrics</groupId>
+      <groupId>io.dropwizard.metrics</groupId>
       <artifactId>metrics-jvm</artifactId>
     </dependency>
     <dependency>
-      <groupId>com.codahale.metrics</groupId>
+      <groupId>io.dropwizard.metrics</groupId>
       <artifactId>metrics-json</artifactId>
     </dependency>
     <dependency>
-      <groupId>com.codahale.metrics</groupId>
+      <groupId>io.dropwizard.metrics</groupId>
       <artifactId>metrics-graphite</artifactId>
     </dependency>
     <dependency>
@@ -196,6 +241,17 @@
       <artifactId>derby</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.ivy</groupId>
+      <artifactId>ivy</artifactId>
+      <version>${ivy.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>oro</groupId>
+      <!-- oro is needed by ivy, but only listed as an optional dependency, so we include it. -->
+      <artifactId>oro</artifactId>
+      <version>${oro.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.tachyonproject</groupId>
       <artifactId>tachyon-client</artifactId>
@@ -244,8 +300,8 @@
       </exclusions>
     </dependency>
     <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <groupId>org.seleniumhq.selenium</groupId>
+      <artifactId>selenium-java</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -293,17 +349,6 @@
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-        <configuration>
-          <environmentVariables>
-            <SPARK_HOME>${basedir}/..</SPARK_HOME>
-            <SPARK_TESTING>1</SPARK_TESTING>
-            <SPARK_CLASSPATH>${spark.classpath}</SPARK_CLASSPATH>
-          </environmentVariables>
-        </configuration>
-      </plugin>
       <!-- Unzip py4j so we can include its files in the jar -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
@@ -317,9 +362,9 @@
           </execution>
         </executions>
         <configuration>
-          <tasks>
+          <target>
             <unzip src="../python/lib/py4j-0.8.2.1-src.zip" dest="../python/build" />
-          </tasks>
+          </target>
         </configuration>
       </plugin>
       <plugin>
@@ -333,59 +378,28 @@
           <verbose>true</verbose>
         </configuration>
       </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-shade-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>package</phase>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-            <configuration>
-              <shadedArtifactAttached>false</shadedArtifactAttached>
-              <artifactSet>
-                <includes>
-                  <include>com.google.guava:guava</include>
-                </includes>
-              </artifactSet>
-              <filters>
-                <!-- See comment in the guava dependency declaration above. -->
-                <filter>
-                  <artifact>com.google.guava:guava</artifact>
-                  <includes>
-                    <include>com/google/common/base/Absent*</include>
-                    <include>com/google/common/base/Optional*</include>
-                    <include>com/google/common/base/Present*</include>
-                  </includes>
-                </filter>
-              </filters>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-      <!--
-        Copy guava to the build directory. This is needed to make the SPARK_PREPEND_CLASSES
-        option work in compute-classpath.sh, since it would put the non-shaded Spark classes in
-        the runtime classpath.
-      -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-dependency-plugin</artifactId>
         <executions>
+          <!-- When using SPARK_PREPEND_CLASSES Spark classes compiled locally don't use
+               shaded deps. So here we store jars in their original form which are added
+               when the classpath is computed. -->
           <execution>
             <id>copy-dependencies</id>
             <phase>package</phase>
             <goals>
               <goal>copy-dependencies</goal>
             </goals>
-            <configuration>
+           <configuration>
               <outputDirectory>${project.build.directory}</outputDirectory>
               <overWriteReleases>false</overWriteReleases>
               <overWriteSnapshots>false</overWriteSnapshots>
               <overWriteIfNewer>true</overWriteIfNewer>
               <useSubDirectoryPerType>true</useSubDirectoryPerType>
-              <includeArtifactIds>guava</includeArtifactIds>
+              <includeArtifactIds>
+                guava,jetty-io,jetty-servlet,jetty-continuation,jetty-http,jetty-plus,jetty-util,jetty-server
+              </includeArtifactIds>
               <silent>true</silent>
             </configuration>
           </execution>
@@ -411,4 +425,5 @@
       </resource>
     </resources>
   </build>
+
 </project>
diff --git a/core/src/main/java/org/apache/spark/JavaSparkListener.java b/core/src/main/java/org/apache/spark/JavaSparkListener.java
new file mode 100644
index 0000000000000..646496f313507
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/JavaSparkListener.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark;
+
+import org.apache.spark.scheduler.SparkListener;
+import org.apache.spark.scheduler.SparkListenerApplicationEnd;
+import org.apache.spark.scheduler.SparkListenerApplicationStart;
+import org.apache.spark.scheduler.SparkListenerBlockManagerAdded;
+import org.apache.spark.scheduler.SparkListenerBlockManagerRemoved;
+import org.apache.spark.scheduler.SparkListenerEnvironmentUpdate;
+import org.apache.spark.scheduler.SparkListenerExecutorAdded;
+import org.apache.spark.scheduler.SparkListenerExecutorMetricsUpdate;
+import org.apache.spark.scheduler.SparkListenerExecutorRemoved;
+import org.apache.spark.scheduler.SparkListenerJobEnd;
+import org.apache.spark.scheduler.SparkListenerJobStart;
+import org.apache.spark.scheduler.SparkListenerStageCompleted;
+import org.apache.spark.scheduler.SparkListenerStageSubmitted;
+import org.apache.spark.scheduler.SparkListenerTaskEnd;
+import org.apache.spark.scheduler.SparkListenerTaskGettingResult;
+import org.apache.spark.scheduler.SparkListenerTaskStart;
+import org.apache.spark.scheduler.SparkListenerUnpersistRDD;
+
+/**
+ * Java clients should extend this class instead of implementing
+ * SparkListener directly. This is to prevent java clients
+ * from breaking when new events are added to the SparkListener
+ * trait.
+ *
+ * This is a concrete class instead of abstract to enforce
+ * new events get added to both the SparkListener and this adapter
+ * in lockstep.
+ */
+public class JavaSparkListener implements SparkListener {
+
+  @Override
+  public void onStageCompleted(SparkListenerStageCompleted stageCompleted) { }
+
+  @Override
+  public void onStageSubmitted(SparkListenerStageSubmitted stageSubmitted) { }
+
+  @Override
+  public void onTaskStart(SparkListenerTaskStart taskStart) { }
+
+  @Override
+  public void onTaskGettingResult(SparkListenerTaskGettingResult taskGettingResult) { }
+
+  @Override
+  public void onTaskEnd(SparkListenerTaskEnd taskEnd) { }
+
+  @Override
+  public void onJobStart(SparkListenerJobStart jobStart) { }
+
+  @Override
+  public void onJobEnd(SparkListenerJobEnd jobEnd) { }
+
+  @Override
+  public void onEnvironmentUpdate(SparkListenerEnvironmentUpdate environmentUpdate) { }
+
+  @Override
+  public void onBlockManagerAdded(SparkListenerBlockManagerAdded blockManagerAdded) { }
+
+  @Override
+  public void onBlockManagerRemoved(SparkListenerBlockManagerRemoved blockManagerRemoved) { }
+
+  @Override
+  public void onUnpersistRDD(SparkListenerUnpersistRDD unpersistRDD) { }
+
+  @Override
+  public void onApplicationStart(SparkListenerApplicationStart applicationStart) { }
+
+  @Override
+  public void onApplicationEnd(SparkListenerApplicationEnd applicationEnd) { }
+
+  @Override
+  public void onExecutorMetricsUpdate(SparkListenerExecutorMetricsUpdate executorMetricsUpdate) { }
+
+  @Override
+  public void onExecutorAdded(SparkListenerExecutorAdded executorAdded) { }
+
+  @Override
+  public void onExecutorRemoved(SparkListenerExecutorRemoved executorRemoved) { }
+}
diff --git a/core/src/main/java/org/apache/spark/JobExecutionStatus.java b/core/src/main/java/org/apache/spark/JobExecutionStatus.java
new file mode 100644
index 0000000000000..6e161313702bb
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/JobExecutionStatus.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark;
+
+public enum JobExecutionStatus {
+  RUNNING,
+  SUCCEEDED,
+  FAILED,
+  UNKNOWN
+}
diff --git a/core/src/main/java/org/apache/spark/SparkFirehoseListener.java b/core/src/main/java/org/apache/spark/SparkFirehoseListener.java
new file mode 100644
index 0000000000000..fbc5666959055
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/SparkFirehoseListener.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark;
+
+import org.apache.spark.scheduler.*;
+
+/**
+ * Class that allows users to receive all SparkListener events.
+ * Users should override the onEvent method.
+ *
+ * This is a concrete Java class in order to ensure that we don't forget to update it when adding
+ * new methods to SparkListener: forgetting to add a method will result in a compilation error (if
+ * this was a concrete Scala class, default implementations of new event handlers would be inherited
+ * from the SparkListener trait).
+ */
+public class SparkFirehoseListener implements SparkListener {
+
+    public void onEvent(SparkListenerEvent event) { }
+
+    @Override
+    public final void onStageCompleted(SparkListenerStageCompleted stageCompleted) {
+        onEvent(stageCompleted);
+    }
+
+    @Override
+    public final void onStageSubmitted(SparkListenerStageSubmitted stageSubmitted) {
+        onEvent(stageSubmitted);
+    }
+
+    @Override
+    public final void onTaskStart(SparkListenerTaskStart taskStart) {
+        onEvent(taskStart);
+    }
+
+    @Override
+    public final void onTaskGettingResult(SparkListenerTaskGettingResult taskGettingResult) {
+        onEvent(taskGettingResult);
+    }
+
+    @Override
+    public final void onTaskEnd(SparkListenerTaskEnd taskEnd) {
+        onEvent(taskEnd);
+    }
+
+    @Override
+    public final void onJobStart(SparkListenerJobStart jobStart) {
+        onEvent(jobStart);
+    }
+
+    @Override
+    public final void onJobEnd(SparkListenerJobEnd jobEnd) {
+        onEvent(jobEnd);
+    }
+
+    @Override
+    public final void onEnvironmentUpdate(SparkListenerEnvironmentUpdate environmentUpdate) {
+        onEvent(environmentUpdate);
+    }
+
+    @Override
+    public final void onBlockManagerAdded(SparkListenerBlockManagerAdded blockManagerAdded) {
+        onEvent(blockManagerAdded);
+    }
+
+    @Override
+    public final void onBlockManagerRemoved(SparkListenerBlockManagerRemoved blockManagerRemoved) {
+        onEvent(blockManagerRemoved);
+    }
+
+    @Override
+    public final void onUnpersistRDD(SparkListenerUnpersistRDD unpersistRDD) {
+        onEvent(unpersistRDD);
+    }
+
+    @Override
+    public final void onApplicationStart(SparkListenerApplicationStart applicationStart) {
+        onEvent(applicationStart);
+    }
+
+    @Override
+    public final void onApplicationEnd(SparkListenerApplicationEnd applicationEnd) {
+        onEvent(applicationEnd);
+    }
+
+    @Override
+    public final void onExecutorMetricsUpdate(
+            SparkListenerExecutorMetricsUpdate executorMetricsUpdate) {
+        onEvent(executorMetricsUpdate);
+    }
+
+    @Override
+    public final void onExecutorAdded(SparkListenerExecutorAdded executorAdded) {
+        onEvent(executorAdded);
+    }
+
+    @Override
+    public final void onExecutorRemoved(SparkListenerExecutorRemoved executorRemoved) {
+        onEvent(executorRemoved);
+    }
+}
diff --git a/core/src/main/java/org/apache/spark/SparkJobInfo.java b/core/src/main/java/org/apache/spark/SparkJobInfo.java
new file mode 100644
index 0000000000000..e31c4401632a6
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/SparkJobInfo.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark;
+
+import java.io.Serializable;
+
+/**
+ * Exposes information about Spark Jobs.
+ *
+ * This interface is not designed to be implemented outside of Spark.  We may add additional methods
+ * which may break binary compatibility with outside implementations.
+ */
+public interface SparkJobInfo extends Serializable {
+  int jobId();
+  int[] stageIds();
+  JobExecutionStatus status();
+}
diff --git a/core/src/main/java/org/apache/spark/SparkStageInfo.java b/core/src/main/java/org/apache/spark/SparkStageInfo.java
new file mode 100644
index 0000000000000..b7d462abd72d6
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/SparkStageInfo.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark;
+
+import java.io.Serializable;
+
+/**
+ * Exposes information about Spark Stages.
+ *
+ * This interface is not designed to be implemented outside of Spark.  We may add additional methods
+ * which may break binary compatibility with outside implementations.
+ */
+public interface SparkStageInfo extends Serializable {
+  int stageId();
+  int currentAttemptId();
+  long submissionTime();
+  String name();
+  int numTasks();
+  int numActiveTasks();
+  int numCompletedTasks();
+  int numFailedTasks();
+}
diff --git a/core/src/main/java/org/apache/spark/TaskContext.java b/core/src/main/java/org/apache/spark/TaskContext.java
deleted file mode 100644
index 2d998d4c7a5d9..0000000000000
--- a/core/src/main/java/org/apache/spark/TaskContext.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark;
-
-import java.io.Serializable;
-
-import scala.Function0;
-import scala.Function1;
-import scala.Unit;
-
-import org.apache.spark.annotation.DeveloperApi;
-import org.apache.spark.executor.TaskMetrics;
-import org.apache.spark.util.TaskCompletionListener;
-
-/**
- * Contextual information about a task which can be read or mutated during
- * execution. To access the TaskContext for a running task use
- * TaskContext.get().
- */
-public abstract class TaskContext implements Serializable {
-  /**
-   * Return the currently active TaskContext. This can be called inside of
-   * user functions to access contextual information about running tasks.
-   */
-  public static TaskContext get() {
-    return taskContext.get();
-  }
-
-  private static ThreadLocal<TaskContext> taskContext =
-    new ThreadLocal<TaskContext>();
-
-  static void setTaskContext(TaskContext tc) {
-    taskContext.set(tc);
-  }
-
-  static void unset() {
-    taskContext.remove();
-  }
-
-  /**
-   * Whether the task has completed.
-   */
-  public abstract boolean isCompleted();
-
-  /**
-   * Whether the task has been killed.
-   */
-  public abstract boolean isInterrupted();
-
-  /** @deprecated: use isRunningLocally() */
-  @Deprecated
-  public abstract boolean runningLocally();
-
-  public abstract boolean isRunningLocally();
-
-  /**
-   * Add a (Java friendly) listener to be executed on task completion.
-   * This will be called in all situation - success, failure, or cancellation.
-   * <p/>
-   * An example use is for HadoopRDD to register a callback to close the input stream.
-   */
-  public abstract TaskContext addTaskCompletionListener(TaskCompletionListener listener);
-
-  /**
-   * Add a listener in the form of a Scala closure to be executed on task completion.
-   * This will be called in all situations - success, failure, or cancellation.
-   * <p/>
-   * An example use is for HadoopRDD to register a callback to close the input stream.
-   */
-  public abstract TaskContext addTaskCompletionListener(final Function1<TaskContext, Unit> f);
-
-  /**
-   * Add a callback function to be executed on task completion. An example use
-   * is for HadoopRDD to register a callback to close the input stream.
-   * Will be called in any situation - success, failure, or cancellation.
-   *
-   * @deprecated: use addTaskCompletionListener
-   *
-   * @param f Callback function.
-   */
-  @Deprecated
-  public abstract void addOnCompleteCallback(final Function0<Unit> f);
-
-  public abstract int stageId();
-
-  public abstract int partitionId();
-
-  public abstract long attemptId();
-
-  /** ::DeveloperApi:: */
-  @DeveloperApi
-  public abstract TaskMetrics taskMetrics();
-}
diff --git a/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java b/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java
index abd9bcc07ac61..99bf240a17225 100644
--- a/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java
+++ b/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java
@@ -22,7 +22,8 @@
 import scala.Tuple2;
 
 /**
- * A function that returns key-value pairs (Tuple2<K, V>), and can be used to construct PairRDDs.
+ * A function that returns key-value pairs (Tuple2&lt;K, V&gt;), and can be used to
+ * construct PairRDDs.
  */
 public interface PairFunction<T, K, V> extends Serializable {
   public Tuple2<K, V> call(T t) throws Exception;
diff --git a/core/src/main/java/org/apache/spark/api/java/function/package.scala b/core/src/main/java/org/apache/spark/api/java/function/package.scala
index 7f91de653a64a..0f9bac7164162 100644
--- a/core/src/main/java/org/apache/spark/api/java/function/package.scala
+++ b/core/src/main/java/org/apache/spark/api/java/function/package.scala
@@ -22,4 +22,4 @@ package org.apache.spark.api.java
  * these interfaces to pass functions to various Java API methods for Spark. Please visit Spark's
  * Java programming guide for more details.
  */
-package object function 
\ No newline at end of file
+package object function 
diff --git a/core/src/main/java/org/apache/spark/util/collection/Sorter.java b/core/src/main/java/org/apache/spark/util/collection/Sorter.java
deleted file mode 100644
index 64ad18c0e463a..0000000000000
--- a/core/src/main/java/org/apache/spark/util/collection/Sorter.java
+++ /dev/null
@@ -1,915 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.util.collection;
-
-import java.util.Comparator;
-
-/**
- * A port of the Android Timsort class, which utilizes a "stable, adaptive, iterative mergesort."
- * See the method comment on sort() for more details.
- *
- * This has been kept in Java with the original style in order to match very closely with the
- * Anroid source code, and thus be easy to verify correctness.
- *
- * The purpose of the port is to generalize the interface to the sort to accept input data formats
- * besides simple arrays where every element is sorted individually. For instance, the AppendOnlyMap
- * uses this to sort an Array with alternating elements of the form [key, value, key, value].
- * This generalization comes with minimal overhead -- see SortDataFormat for more information.
- */
-class Sorter<K, Buffer> {
-
-  /**
-   * This is the minimum sized sequence that will be merged.  Shorter
-   * sequences will be lengthened by calling binarySort.  If the entire
-   * array is less than this length, no merges will be performed.
-   *
-   * This constant should be a power of two.  It was 64 in Tim Peter's C
-   * implementation, but 32 was empirically determined to work better in
-   * this implementation.  In the unlikely event that you set this constant
-   * to be a number that's not a power of two, you'll need to change the
-   * minRunLength computation.
-   *
-   * If you decrease this constant, you must change the stackLen
-   * computation in the TimSort constructor, or you risk an
-   * ArrayOutOfBounds exception.  See listsort.txt for a discussion
-   * of the minimum stack length required as a function of the length
-   * of the array being sorted and the minimum merge sequence length.
-   */
-  private static final int MIN_MERGE = 32;
-
-  private final SortDataFormat<K, Buffer> s;
-
-  public Sorter(SortDataFormat<K, Buffer> sortDataFormat) {
-    this.s = sortDataFormat;
-  }
-
-  /**
-   * A stable, adaptive, iterative mergesort that requires far fewer than
-   * n lg(n) comparisons when running on partially sorted arrays, while
-   * offering performance comparable to a traditional mergesort when run
-   * on random arrays.  Like all proper mergesorts, this sort is stable and
-   * runs O(n log n) time (worst case).  In the worst case, this sort requires
-   * temporary storage space for n/2 object references; in the best case,
-   * it requires only a small constant amount of space.
-   *
-   * This implementation was adapted from Tim Peters's list sort for
-   * Python, which is described in detail here:
-   *
-   *   http://svn.python.org/projects/python/trunk/Objects/listsort.txt
-   *
-   * Tim's C code may be found here:
-   *
-   *   http://svn.python.org/projects/python/trunk/Objects/listobject.c
-   *
-   * The underlying techniques are described in this paper (and may have
-   * even earlier origins):
-   *
-   *  "Optimistic Sorting and Information Theoretic Complexity"
-   *  Peter McIlroy
-   *  SODA (Fourth Annual ACM-SIAM Symposium on Discrete Algorithms),
-   *  pp 467-474, Austin, Texas, 25-27 January 1993.
-   *
-   * While the API to this class consists solely of static methods, it is
-   * (privately) instantiable; a TimSort instance holds the state of an ongoing
-   * sort, assuming the input array is large enough to warrant the full-blown
-   * TimSort. Small arrays are sorted in place, using a binary insertion sort.
-   *
-   * @author Josh Bloch
-   */
-  void sort(Buffer a, int lo, int hi, Comparator<? super K> c) {
-    assert c != null;
-
-    int nRemaining  = hi - lo;
-    if (nRemaining < 2)
-      return;  // Arrays of size 0 and 1 are always sorted
-
-    // If array is small, do a "mini-TimSort" with no merges
-    if (nRemaining < MIN_MERGE) {
-      int initRunLen = countRunAndMakeAscending(a, lo, hi, c);
-      binarySort(a, lo, hi, lo + initRunLen, c);
-      return;
-    }
-
-    /**
-     * March over the array once, left to right, finding natural runs,
-     * extending short natural runs to minRun elements, and merging runs
-     * to maintain stack invariant.
-     */
-    SortState sortState = new SortState(a, c, hi - lo);
-    int minRun = minRunLength(nRemaining);
-    do {
-      // Identify next run
-      int runLen = countRunAndMakeAscending(a, lo, hi, c);
-
-      // If run is short, extend to min(minRun, nRemaining)
-      if (runLen < minRun) {
-        int force = nRemaining <= minRun ? nRemaining : minRun;
-        binarySort(a, lo, lo + force, lo + runLen, c);
-        runLen = force;
-      }
-
-      // Push run onto pending-run stack, and maybe merge
-      sortState.pushRun(lo, runLen);
-      sortState.mergeCollapse();
-
-      // Advance to find next run
-      lo += runLen;
-      nRemaining -= runLen;
-    } while (nRemaining != 0);
-
-    // Merge all remaining runs to complete sort
-    assert lo == hi;
-    sortState.mergeForceCollapse();
-    assert sortState.stackSize == 1;
-  }
-
-  /**
-   * Sorts the specified portion of the specified array using a binary
-   * insertion sort.  This is the best method for sorting small numbers
-   * of elements.  It requires O(n log n) compares, but O(n^2) data
-   * movement (worst case).
-   *
-   * If the initial part of the specified range is already sorted,
-   * this method can take advantage of it: the method assumes that the
-   * elements from index {@code lo}, inclusive, to {@code start},
-   * exclusive are already sorted.
-   *
-   * @param a the array in which a range is to be sorted
-   * @param lo the index of the first element in the range to be sorted
-   * @param hi the index after the last element in the range to be sorted
-   * @param start the index of the first element in the range that is
-   *        not already known to be sorted ({@code lo <= start <= hi})
-   * @param c comparator to used for the sort
-   */
-  @SuppressWarnings("fallthrough")
-  private void binarySort(Buffer a, int lo, int hi, int start, Comparator<? super K> c) {
-    assert lo <= start && start <= hi;
-    if (start == lo)
-      start++;
-
-    Buffer pivotStore = s.allocate(1);
-    for ( ; start < hi; start++) {
-      s.copyElement(a, start, pivotStore, 0);
-      K pivot = s.getKey(pivotStore, 0);
-
-      // Set left (and right) to the index where a[start] (pivot) belongs
-      int left = lo;
-      int right = start;
-      assert left <= right;
-      /*
-       * Invariants:
-       *   pivot >= all in [lo, left).
-       *   pivot <  all in [right, start).
-       */
-      while (left < right) {
-        int mid = (left + right) >>> 1;
-        if (c.compare(pivot, s.getKey(a, mid)) < 0)
-          right = mid;
-        else
-          left = mid + 1;
-      }
-      assert left == right;
-
-      /*
-       * The invariants still hold: pivot >= all in [lo, left) and
-       * pivot < all in [left, start), so pivot belongs at left.  Note
-       * that if there are elements equal to pivot, left points to the
-       * first slot after them -- that's why this sort is stable.
-       * Slide elements over to make room for pivot.
-       */
-      int n = start - left;  // The number of elements to move
-      // Switch is just an optimization for arraycopy in default case
-      switch (n) {
-        case 2:  s.copyElement(a, left + 1, a, left + 2);
-        case 1:  s.copyElement(a, left, a, left + 1);
-          break;
-        default: s.copyRange(a, left, a, left + 1, n);
-      }
-      s.copyElement(pivotStore, 0, a, left);
-    }
-  }
-
-  /**
-   * Returns the length of the run beginning at the specified position in
-   * the specified array and reverses the run if it is descending (ensuring
-   * that the run will always be ascending when the method returns).
-   *
-   * A run is the longest ascending sequence with:
-   *
-   *    a[lo] <= a[lo + 1] <= a[lo + 2] <= ...
-   *
-   * or the longest descending sequence with:
-   *
-   *    a[lo] >  a[lo + 1] >  a[lo + 2] >  ...
-   *
-   * For its intended use in a stable mergesort, the strictness of the
-   * definition of "descending" is needed so that the call can safely
-   * reverse a descending sequence without violating stability.
-   *
-   * @param a the array in which a run is to be counted and possibly reversed
-   * @param lo index of the first element in the run
-   * @param hi index after the last element that may be contained in the run.
-  It is required that {@code lo < hi}.
-   * @param c the comparator to used for the sort
-   * @return  the length of the run beginning at the specified position in
-   *          the specified array
-   */
-  private int countRunAndMakeAscending(Buffer a, int lo, int hi, Comparator<? super K> c) {
-    assert lo < hi;
-    int runHi = lo + 1;
-    if (runHi == hi)
-      return 1;
-
-    // Find end of run, and reverse range if descending
-    if (c.compare(s.getKey(a, runHi++), s.getKey(a, lo)) < 0) { // Descending
-      while (runHi < hi && c.compare(s.getKey(a, runHi), s.getKey(a, runHi - 1)) < 0)
-        runHi++;
-      reverseRange(a, lo, runHi);
-    } else {                              // Ascending
-      while (runHi < hi && c.compare(s.getKey(a, runHi), s.getKey(a, runHi - 1)) >= 0)
-        runHi++;
-    }
-
-    return runHi - lo;
-  }
-
-  /**
-   * Reverse the specified range of the specified array.
-   *
-   * @param a the array in which a range is to be reversed
-   * @param lo the index of the first element in the range to be reversed
-   * @param hi the index after the last element in the range to be reversed
-   */
-  private void reverseRange(Buffer a, int lo, int hi) {
-    hi--;
-    while (lo < hi) {
-      s.swap(a, lo, hi);
-      lo++;
-      hi--;
-    }
-  }
-
-  /**
-   * Returns the minimum acceptable run length for an array of the specified
-   * length. Natural runs shorter than this will be extended with
-   * {@link #binarySort}.
-   *
-   * Roughly speaking, the computation is:
-   *
-   *  If n < MIN_MERGE, return n (it's too small to bother with fancy stuff).
-   *  Else if n is an exact power of 2, return MIN_MERGE/2.
-   *  Else return an int k, MIN_MERGE/2 <= k <= MIN_MERGE, such that n/k
-   *   is close to, but strictly less than, an exact power of 2.
-   *
-   * For the rationale, see listsort.txt.
-   *
-   * @param n the length of the array to be sorted
-   * @return the length of the minimum run to be merged
-   */
-  private int minRunLength(int n) {
-    assert n >= 0;
-    int r = 0;      // Becomes 1 if any 1 bits are shifted off
-    while (n >= MIN_MERGE) {
-      r |= (n & 1);
-      n >>= 1;
-    }
-    return n + r;
-  }
-
-  private class SortState {
-
-    /**
-     * The Buffer being sorted.
-     */
-    private final Buffer a;
-
-    /**
-     * Length of the sort Buffer.
-     */
-    private final int aLength;
-
-    /**
-     * The comparator for this sort.
-     */
-    private final Comparator<? super K> c;
-
-    /**
-     * When we get into galloping mode, we stay there until both runs win less
-     * often than MIN_GALLOP consecutive times.
-     */
-    private static final int  MIN_GALLOP = 7;
-
-    /**
-     * This controls when we get *into* galloping mode.  It is initialized
-     * to MIN_GALLOP.  The mergeLo and mergeHi methods nudge it higher for
-     * random data, and lower for highly structured data.
-     */
-    private int minGallop = MIN_GALLOP;
-
-    /**
-     * Maximum initial size of tmp array, which is used for merging.  The array
-     * can grow to accommodate demand.
-     *
-     * Unlike Tim's original C version, we do not allocate this much storage
-     * when sorting smaller arrays.  This change was required for performance.
-     */
-    private static final int INITIAL_TMP_STORAGE_LENGTH = 256;
-
-    /**
-     * Temp storage for merges.
-     */
-    private Buffer tmp; // Actual runtime type will be Object[], regardless of T
-
-    /**
-     * Length of the temp storage.
-     */
-    private int tmpLength = 0;
-
-    /**
-     * A stack of pending runs yet to be merged.  Run i starts at
-     * address base[i] and extends for len[i] elements.  It's always
-     * true (so long as the indices are in bounds) that:
-     *
-     *     runBase[i] + runLen[i] == runBase[i + 1]
-     *
-     * so we could cut the storage for this, but it's a minor amount,
-     * and keeping all the info explicit simplifies the code.
-     */
-    private int stackSize = 0;  // Number of pending runs on stack
-    private final int[] runBase;
-    private final int[] runLen;
-
-    /**
-     * Creates a TimSort instance to maintain the state of an ongoing sort.
-     *
-     * @param a the array to be sorted
-     * @param c the comparator to determine the order of the sort
-     */
-    private SortState(Buffer a, Comparator<? super K> c, int len) {
-      this.aLength = len;
-      this.a = a;
-      this.c = c;
-
-      // Allocate temp storage (which may be increased later if necessary)
-      tmpLength = len < 2 * INITIAL_TMP_STORAGE_LENGTH ? len >>> 1 : INITIAL_TMP_STORAGE_LENGTH;
-      tmp = s.allocate(tmpLength);
-
-      /*
-       * Allocate runs-to-be-merged stack (which cannot be expanded).  The
-       * stack length requirements are described in listsort.txt.  The C
-       * version always uses the same stack length (85), but this was
-       * measured to be too expensive when sorting "mid-sized" arrays (e.g.,
-       * 100 elements) in Java.  Therefore, we use smaller (but sufficiently
-       * large) stack lengths for smaller arrays.  The "magic numbers" in the
-       * computation below must be changed if MIN_MERGE is decreased.  See
-       * the MIN_MERGE declaration above for more information.
-       */
-      int stackLen = (len <    120  ?  5 :
-                      len <   1542  ? 10 :
-                      len < 119151  ? 19 : 40);
-      runBase = new int[stackLen];
-      runLen = new int[stackLen];
-    }
-
-    /**
-     * Pushes the specified run onto the pending-run stack.
-     *
-     * @param runBase index of the first element in the run
-     * @param runLen  the number of elements in the run
-     */
-    private void pushRun(int runBase, int runLen) {
-      this.runBase[stackSize] = runBase;
-      this.runLen[stackSize] = runLen;
-      stackSize++;
-    }
-
-    /**
-     * Examines the stack of runs waiting to be merged and merges adjacent runs
-     * until the stack invariants are reestablished:
-     *
-     *     1. runLen[i - 3] > runLen[i - 2] + runLen[i - 1]
-     *     2. runLen[i - 2] > runLen[i - 1]
-     *
-     * This method is called each time a new run is pushed onto the stack,
-     * so the invariants are guaranteed to hold for i < stackSize upon
-     * entry to the method.
-     */
-    private void mergeCollapse() {
-      while (stackSize > 1) {
-        int n = stackSize - 2;
-        if (n > 0 && runLen[n-1] <= runLen[n] + runLen[n+1]) {
-          if (runLen[n - 1] < runLen[n + 1])
-            n--;
-          mergeAt(n);
-        } else if (runLen[n] <= runLen[n + 1]) {
-          mergeAt(n);
-        } else {
-          break; // Invariant is established
-        }
-      }
-    }
-
-    /**
-     * Merges all runs on the stack until only one remains.  This method is
-     * called once, to complete the sort.
-     */
-    private void mergeForceCollapse() {
-      while (stackSize > 1) {
-        int n = stackSize - 2;
-        if (n > 0 && runLen[n - 1] < runLen[n + 1])
-          n--;
-        mergeAt(n);
-      }
-    }
-
-    /**
-     * Merges the two runs at stack indices i and i+1.  Run i must be
-     * the penultimate or antepenultimate run on the stack.  In other words,
-     * i must be equal to stackSize-2 or stackSize-3.
-     *
-     * @param i stack index of the first of the two runs to merge
-     */
-    private void mergeAt(int i) {
-      assert stackSize >= 2;
-      assert i >= 0;
-      assert i == stackSize - 2 || i == stackSize - 3;
-
-      int base1 = runBase[i];
-      int len1 = runLen[i];
-      int base2 = runBase[i + 1];
-      int len2 = runLen[i + 1];
-      assert len1 > 0 && len2 > 0;
-      assert base1 + len1 == base2;
-
-      /*
-       * Record the length of the combined runs; if i is the 3rd-last
-       * run now, also slide over the last run (which isn't involved
-       * in this merge).  The current run (i+1) goes away in any case.
-       */
-      runLen[i] = len1 + len2;
-      if (i == stackSize - 3) {
-        runBase[i + 1] = runBase[i + 2];
-        runLen[i + 1] = runLen[i + 2];
-      }
-      stackSize--;
-
-      /*
-       * Find where the first element of run2 goes in run1. Prior elements
-       * in run1 can be ignored (because they're already in place).
-       */
-      int k = gallopRight(s.getKey(a, base2), a, base1, len1, 0, c);
-      assert k >= 0;
-      base1 += k;
-      len1 -= k;
-      if (len1 == 0)
-        return;
-
-      /*
-       * Find where the last element of run1 goes in run2. Subsequent elements
-       * in run2 can be ignored (because they're already in place).
-       */
-      len2 = gallopLeft(s.getKey(a, base1 + len1 - 1), a, base2, len2, len2 - 1, c);
-      assert len2 >= 0;
-      if (len2 == 0)
-        return;
-
-      // Merge remaining runs, using tmp array with min(len1, len2) elements
-      if (len1 <= len2)
-        mergeLo(base1, len1, base2, len2);
-      else
-        mergeHi(base1, len1, base2, len2);
-    }
-
-    /**
-     * Locates the position at which to insert the specified key into the
-     * specified sorted range; if the range contains an element equal to key,
-     * returns the index of the leftmost equal element.
-     *
-     * @param key the key whose insertion point to search for
-     * @param a the array in which to search
-     * @param base the index of the first element in the range
-     * @param len the length of the range; must be > 0
-     * @param hint the index at which to begin the search, 0 <= hint < n.
-     *     The closer hint is to the result, the faster this method will run.
-     * @param c the comparator used to order the range, and to search
-     * @return the int k,  0 <= k <= n such that a[b + k - 1] < key <= a[b + k],
-     *    pretending that a[b - 1] is minus infinity and a[b + n] is infinity.
-     *    In other words, key belongs at index b + k; or in other words,
-     *    the first k elements of a should precede key, and the last n - k
-     *    should follow it.
-     */
-    private int gallopLeft(K key, Buffer a, int base, int len, int hint, Comparator<? super K> c) {
-      assert len > 0 && hint >= 0 && hint < len;
-      int lastOfs = 0;
-      int ofs = 1;
-      if (c.compare(key, s.getKey(a, base + hint)) > 0) {
-        // Gallop right until a[base+hint+lastOfs] < key <= a[base+hint+ofs]
-        int maxOfs = len - hint;
-        while (ofs < maxOfs && c.compare(key, s.getKey(a, base + hint + ofs)) > 0) {
-          lastOfs = ofs;
-          ofs = (ofs << 1) + 1;
-          if (ofs <= 0)   // int overflow
-            ofs = maxOfs;
-        }
-        if (ofs > maxOfs)
-          ofs = maxOfs;
-
-        // Make offsets relative to base
-        lastOfs += hint;
-        ofs += hint;
-      } else { // key <= a[base + hint]
-        // Gallop left until a[base+hint-ofs] < key <= a[base+hint-lastOfs]
-        final int maxOfs = hint + 1;
-        while (ofs < maxOfs && c.compare(key, s.getKey(a, base + hint - ofs)) <= 0) {
-          lastOfs = ofs;
-          ofs = (ofs << 1) + 1;
-          if (ofs <= 0)   // int overflow
-            ofs = maxOfs;
-        }
-        if (ofs > maxOfs)
-          ofs = maxOfs;
-
-        // Make offsets relative to base
-        int tmp = lastOfs;
-        lastOfs = hint - ofs;
-        ofs = hint - tmp;
-      }
-      assert -1 <= lastOfs && lastOfs < ofs && ofs <= len;
-
-      /*
-       * Now a[base+lastOfs] < key <= a[base+ofs], so key belongs somewhere
-       * to the right of lastOfs but no farther right than ofs.  Do a binary
-       * search, with invariant a[base + lastOfs - 1] < key <= a[base + ofs].
-       */
-      lastOfs++;
-      while (lastOfs < ofs) {
-        int m = lastOfs + ((ofs - lastOfs) >>> 1);
-
-        if (c.compare(key, s.getKey(a, base + m)) > 0)
-          lastOfs = m + 1;  // a[base + m] < key
-        else
-          ofs = m;          // key <= a[base + m]
-      }
-      assert lastOfs == ofs;    // so a[base + ofs - 1] < key <= a[base + ofs]
-      return ofs;
-    }
-
-    /**
-     * Like gallopLeft, except that if the range contains an element equal to
-     * key, gallopRight returns the index after the rightmost equal element.
-     *
-     * @param key the key whose insertion point to search for
-     * @param a the array in which to search
-     * @param base the index of the first element in the range
-     * @param len the length of the range; must be > 0
-     * @param hint the index at which to begin the search, 0 <= hint < n.
-     *     The closer hint is to the result, the faster this method will run.
-     * @param c the comparator used to order the range, and to search
-     * @return the int k,  0 <= k <= n such that a[b + k - 1] <= key < a[b + k]
-     */
-    private int gallopRight(K key, Buffer a, int base, int len, int hint, Comparator<? super K> c) {
-      assert len > 0 && hint >= 0 && hint < len;
-
-      int ofs = 1;
-      int lastOfs = 0;
-      if (c.compare(key, s.getKey(a, base + hint)) < 0) {
-        // Gallop left until a[b+hint - ofs] <= key < a[b+hint - lastOfs]
-        int maxOfs = hint + 1;
-        while (ofs < maxOfs && c.compare(key, s.getKey(a, base + hint - ofs)) < 0) {
-          lastOfs = ofs;
-          ofs = (ofs << 1) + 1;
-          if (ofs <= 0)   // int overflow
-            ofs = maxOfs;
-        }
-        if (ofs > maxOfs)
-          ofs = maxOfs;
-
-        // Make offsets relative to b
-        int tmp = lastOfs;
-        lastOfs = hint - ofs;
-        ofs = hint - tmp;
-      } else { // a[b + hint] <= key
-        // Gallop right until a[b+hint + lastOfs] <= key < a[b+hint + ofs]
-        int maxOfs = len - hint;
-        while (ofs < maxOfs && c.compare(key, s.getKey(a, base + hint + ofs)) >= 0) {
-          lastOfs = ofs;
-          ofs = (ofs << 1) + 1;
-          if (ofs <= 0)   // int overflow
-            ofs = maxOfs;
-        }
-        if (ofs > maxOfs)
-          ofs = maxOfs;
-
-        // Make offsets relative to b
-        lastOfs += hint;
-        ofs += hint;
-      }
-      assert -1 <= lastOfs && lastOfs < ofs && ofs <= len;
-
-      /*
-       * Now a[b + lastOfs] <= key < a[b + ofs], so key belongs somewhere to
-       * the right of lastOfs but no farther right than ofs.  Do a binary
-       * search, with invariant a[b + lastOfs - 1] <= key < a[b + ofs].
-       */
-      lastOfs++;
-      while (lastOfs < ofs) {
-        int m = lastOfs + ((ofs - lastOfs) >>> 1);
-
-        if (c.compare(key, s.getKey(a, base + m)) < 0)
-          ofs = m;          // key < a[b + m]
-        else
-          lastOfs = m + 1;  // a[b + m] <= key
-      }
-      assert lastOfs == ofs;    // so a[b + ofs - 1] <= key < a[b + ofs]
-      return ofs;
-    }
-
-    /**
-     * Merges two adjacent runs in place, in a stable fashion.  The first
-     * element of the first run must be greater than the first element of the
-     * second run (a[base1] > a[base2]), and the last element of the first run
-     * (a[base1 + len1-1]) must be greater than all elements of the second run.
-     *
-     * For performance, this method should be called only when len1 <= len2;
-     * its twin, mergeHi should be called if len1 >= len2.  (Either method
-     * may be called if len1 == len2.)
-     *
-     * @param base1 index of first element in first run to be merged
-     * @param len1  length of first run to be merged (must be > 0)
-     * @param base2 index of first element in second run to be merged
-     *        (must be aBase + aLen)
-     * @param len2  length of second run to be merged (must be > 0)
-     */
-    private void mergeLo(int base1, int len1, int base2, int len2) {
-      assert len1 > 0 && len2 > 0 && base1 + len1 == base2;
-
-      // Copy first run into temp array
-      Buffer a = this.a; // For performance
-      Buffer tmp = ensureCapacity(len1);
-      s.copyRange(a, base1, tmp, 0, len1);
-
-      int cursor1 = 0;       // Indexes into tmp array
-      int cursor2 = base2;   // Indexes int a
-      int dest = base1;      // Indexes int a
-
-      // Move first element of second run and deal with degenerate cases
-      s.copyElement(a, cursor2++, a, dest++);
-      if (--len2 == 0) {
-        s.copyRange(tmp, cursor1, a, dest, len1);
-        return;
-      }
-      if (len1 == 1) {
-        s.copyRange(a, cursor2, a, dest, len2);
-        s.copyElement(tmp, cursor1, a, dest + len2); // Last elt of run 1 to end of merge
-        return;
-      }
-
-      Comparator<? super K> c = this.c;  // Use local variable for performance
-      int minGallop = this.minGallop;    //  "    "       "     "      "
-      outer:
-      while (true) {
-        int count1 = 0; // Number of times in a row that first run won
-        int count2 = 0; // Number of times in a row that second run won
-
-        /*
-         * Do the straightforward thing until (if ever) one run starts
-         * winning consistently.
-         */
-        do {
-          assert len1 > 1 && len2 > 0;
-          if (c.compare(s.getKey(a, cursor2), s.getKey(tmp, cursor1)) < 0) {
-            s.copyElement(a, cursor2++, a, dest++);
-            count2++;
-            count1 = 0;
-            if (--len2 == 0)
-              break outer;
-          } else {
-            s.copyElement(tmp, cursor1++, a, dest++);
-            count1++;
-            count2 = 0;
-            if (--len1 == 1)
-              break outer;
-          }
-        } while ((count1 | count2) < minGallop);
-
-        /*
-         * One run is winning so consistently that galloping may be a
-         * huge win. So try that, and continue galloping until (if ever)
-         * neither run appears to be winning consistently anymore.
-         */
-        do {
-          assert len1 > 1 && len2 > 0;
-          count1 = gallopRight(s.getKey(a, cursor2), tmp, cursor1, len1, 0, c);
-          if (count1 != 0) {
-            s.copyRange(tmp, cursor1, a, dest, count1);
-            dest += count1;
-            cursor1 += count1;
-            len1 -= count1;
-            if (len1 <= 1) // len1 == 1 || len1 == 0
-              break outer;
-          }
-          s.copyElement(a, cursor2++, a, dest++);
-          if (--len2 == 0)
-            break outer;
-
-          count2 = gallopLeft(s.getKey(tmp, cursor1), a, cursor2, len2, 0, c);
-          if (count2 != 0) {
-            s.copyRange(a, cursor2, a, dest, count2);
-            dest += count2;
-            cursor2 += count2;
-            len2 -= count2;
-            if (len2 == 0)
-              break outer;
-          }
-          s.copyElement(tmp, cursor1++, a, dest++);
-          if (--len1 == 1)
-            break outer;
-          minGallop--;
-        } while (count1 >= MIN_GALLOP | count2 >= MIN_GALLOP);
-        if (minGallop < 0)
-          minGallop = 0;
-        minGallop += 2;  // Penalize for leaving gallop mode
-      }  // End of "outer" loop
-      this.minGallop = minGallop < 1 ? 1 : minGallop;  // Write back to field
-
-      if (len1 == 1) {
-        assert len2 > 0;
-        s.copyRange(a, cursor2, a, dest, len2);
-        s.copyElement(tmp, cursor1, a, dest + len2); //  Last elt of run 1 to end of merge
-      } else if (len1 == 0) {
-        throw new IllegalArgumentException(
-            "Comparison method violates its general contract!");
-      } else {
-        assert len2 == 0;
-        assert len1 > 1;
-        s.copyRange(tmp, cursor1, a, dest, len1);
-      }
-    }
-
-    /**
-     * Like mergeLo, except that this method should be called only if
-     * len1 >= len2; mergeLo should be called if len1 <= len2.  (Either method
-     * may be called if len1 == len2.)
-     *
-     * @param base1 index of first element in first run to be merged
-     * @param len1  length of first run to be merged (must be > 0)
-     * @param base2 index of first element in second run to be merged
-     *        (must be aBase + aLen)
-     * @param len2  length of second run to be merged (must be > 0)
-     */
-    private void mergeHi(int base1, int len1, int base2, int len2) {
-      assert len1 > 0 && len2 > 0 && base1 + len1 == base2;
-
-      // Copy second run into temp array
-      Buffer a = this.a; // For performance
-      Buffer tmp = ensureCapacity(len2);
-      s.copyRange(a, base2, tmp, 0, len2);
-
-      int cursor1 = base1 + len1 - 1;  // Indexes into a
-      int cursor2 = len2 - 1;          // Indexes into tmp array
-      int dest = base2 + len2 - 1;     // Indexes into a
-
-      // Move last element of first run and deal with degenerate cases
-      s.copyElement(a, cursor1--, a, dest--);
-      if (--len1 == 0) {
-        s.copyRange(tmp, 0, a, dest - (len2 - 1), len2);
-        return;
-      }
-      if (len2 == 1) {
-        dest -= len1;
-        cursor1 -= len1;
-        s.copyRange(a, cursor1 + 1, a, dest + 1, len1);
-        s.copyElement(tmp, cursor2, a, dest);
-        return;
-      }
-
-      Comparator<? super K> c = this.c;  // Use local variable for performance
-      int minGallop = this.minGallop;    //  "    "       "     "      "
-      outer:
-      while (true) {
-        int count1 = 0; // Number of times in a row that first run won
-        int count2 = 0; // Number of times in a row that second run won
-
-        /*
-         * Do the straightforward thing until (if ever) one run
-         * appears to win consistently.
-         */
-        do {
-          assert len1 > 0 && len2 > 1;
-          if (c.compare(s.getKey(tmp, cursor2), s.getKey(a, cursor1)) < 0) {
-            s.copyElement(a, cursor1--, a, dest--);
-            count1++;
-            count2 = 0;
-            if (--len1 == 0)
-              break outer;
-          } else {
-            s.copyElement(tmp, cursor2--, a, dest--);
-            count2++;
-            count1 = 0;
-            if (--len2 == 1)
-              break outer;
-          }
-        } while ((count1 | count2) < minGallop);
-
-        /*
-         * One run is winning so consistently that galloping may be a
-         * huge win. So try that, and continue galloping until (if ever)
-         * neither run appears to be winning consistently anymore.
-         */
-        do {
-          assert len1 > 0 && len2 > 1;
-          count1 = len1 - gallopRight(s.getKey(tmp, cursor2), a, base1, len1, len1 - 1, c);
-          if (count1 != 0) {
-            dest -= count1;
-            cursor1 -= count1;
-            len1 -= count1;
-            s.copyRange(a, cursor1 + 1, a, dest + 1, count1);
-            if (len1 == 0)
-              break outer;
-          }
-          s.copyElement(tmp, cursor2--, a, dest--);
-          if (--len2 == 1)
-            break outer;
-
-          count2 = len2 - gallopLeft(s.getKey(a, cursor1), tmp, 0, len2, len2 - 1, c);
-          if (count2 != 0) {
-            dest -= count2;
-            cursor2 -= count2;
-            len2 -= count2;
-            s.copyRange(tmp, cursor2 + 1, a, dest + 1, count2);
-            if (len2 <= 1)  // len2 == 1 || len2 == 0
-              break outer;
-          }
-          s.copyElement(a, cursor1--, a, dest--);
-          if (--len1 == 0)
-            break outer;
-          minGallop--;
-        } while (count1 >= MIN_GALLOP | count2 >= MIN_GALLOP);
-        if (minGallop < 0)
-          minGallop = 0;
-        minGallop += 2;  // Penalize for leaving gallop mode
-      }  // End of "outer" loop
-      this.minGallop = minGallop < 1 ? 1 : minGallop;  // Write back to field
-
-      if (len2 == 1) {
-        assert len1 > 0;
-        dest -= len1;
-        cursor1 -= len1;
-        s.copyRange(a, cursor1 + 1, a, dest + 1, len1);
-        s.copyElement(tmp, cursor2, a, dest); // Move first elt of run2 to front of merge
-      } else if (len2 == 0) {
-        throw new IllegalArgumentException(
-            "Comparison method violates its general contract!");
-      } else {
-        assert len1 == 0;
-        assert len2 > 0;
-        s.copyRange(tmp, 0, a, dest - (len2 - 1), len2);
-      }
-    }
-
-    /**
-     * Ensures that the external array tmp has at least the specified
-     * number of elements, increasing its size if necessary.  The size
-     * increases exponentially to ensure amortized linear time complexity.
-     *
-     * @param minCapacity the minimum required capacity of the tmp array
-     * @return tmp, whether or not it grew
-     */
-    private Buffer ensureCapacity(int minCapacity) {
-      if (tmpLength < minCapacity) {
-        // Compute smallest power of 2 > minCapacity
-        int newSize = minCapacity;
-        newSize |= newSize >> 1;
-        newSize |= newSize >> 2;
-        newSize |= newSize >> 4;
-        newSize |= newSize >> 8;
-        newSize |= newSize >> 16;
-        newSize++;
-
-        if (newSize < 0) // Not bloody likely!
-          newSize = minCapacity;
-        else
-          newSize = Math.min(newSize, aLength >>> 1);
-
-        tmp = s.allocate(newSize);
-        tmpLength = newSize;
-      }
-      return tmp;
-    }
-  }
-}
diff --git a/core/src/main/java/org/apache/spark/util/collection/TimSort.java b/core/src/main/java/org/apache/spark/util/collection/TimSort.java
new file mode 100644
index 0000000000000..409e1a41c5d49
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/util/collection/TimSort.java
@@ -0,0 +1,940 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection;
+
+import java.util.Comparator;
+
+/**
+ * A port of the Android TimSort class, which utilizes a "stable, adaptive, iterative mergesort."
+ * See the method comment on sort() for more details.
+ *
+ * This has been kept in Java with the original style in order to match very closely with the
+ * Android source code, and thus be easy to verify correctness. The class is package private. We put
+ * a simple Scala wrapper {@link org.apache.spark.util.collection.Sorter}, which is available to
+ * package org.apache.spark.
+ *
+ * The purpose of the port is to generalize the interface to the sort to accept input data formats
+ * besides simple arrays where every element is sorted individually. For instance, the AppendOnlyMap
+ * uses this to sort an Array with alternating elements of the form [key, value, key, value].
+ * This generalization comes with minimal overhead -- see SortDataFormat for more information.
+ *
+ * We allow key reuse to prevent creating many key objects -- see SortDataFormat.
+ *
+ * @see org.apache.spark.util.collection.SortDataFormat
+ * @see org.apache.spark.util.collection.Sorter
+ */
+class TimSort<K, Buffer> {
+
+  /**
+   * This is the minimum sized sequence that will be merged.  Shorter
+   * sequences will be lengthened by calling binarySort.  If the entire
+   * array is less than this length, no merges will be performed.
+   *
+   * This constant should be a power of two.  It was 64 in Tim Peter's C
+   * implementation, but 32 was empirically determined to work better in
+   * this implementation.  In the unlikely event that you set this constant
+   * to be a number that's not a power of two, you'll need to change the
+   * minRunLength computation.
+   *
+   * If you decrease this constant, you must change the stackLen
+   * computation in the TimSort constructor, or you risk an
+   * ArrayOutOfBounds exception.  See listsort.txt for a discussion
+   * of the minimum stack length required as a function of the length
+   * of the array being sorted and the minimum merge sequence length.
+   */
+  private static final int MIN_MERGE = 32;
+
+  private final SortDataFormat<K, Buffer> s;
+
+  public TimSort(SortDataFormat<K, Buffer> sortDataFormat) {
+    this.s = sortDataFormat;
+  }
+
+  /**
+   * A stable, adaptive, iterative mergesort that requires far fewer than
+   * n lg(n) comparisons when running on partially sorted arrays, while
+   * offering performance comparable to a traditional mergesort when run
+   * on random arrays.  Like all proper mergesorts, this sort is stable and
+   * runs O(n log n) time (worst case).  In the worst case, this sort requires
+   * temporary storage space for n/2 object references; in the best case,
+   * it requires only a small constant amount of space.
+   *
+   * This implementation was adapted from Tim Peters's list sort for
+   * Python, which is described in detail here:
+   *
+   *   http://svn.python.org/projects/python/trunk/Objects/listsort.txt
+   *
+   * Tim's C code may be found here:
+   *
+   *   http://svn.python.org/projects/python/trunk/Objects/listobject.c
+   *
+   * The underlying techniques are described in this paper (and may have
+   * even earlier origins):
+   *
+   *  "Optimistic Sorting and Information Theoretic Complexity"
+   *  Peter McIlroy
+   *  SODA (Fourth Annual ACM-SIAM Symposium on Discrete Algorithms),
+   *  pp 467-474, Austin, Texas, 25-27 January 1993.
+   *
+   * While the API to this class consists solely of static methods, it is
+   * (privately) instantiable; a TimSort instance holds the state of an ongoing
+   * sort, assuming the input array is large enough to warrant the full-blown
+   * TimSort. Small arrays are sorted in place, using a binary insertion sort.
+   *
+   * @author Josh Bloch
+   */
+  public void sort(Buffer a, int lo, int hi, Comparator<? super K> c) {
+    assert c != null;
+
+    int nRemaining  = hi - lo;
+    if (nRemaining < 2)
+      return;  // Arrays of size 0 and 1 are always sorted
+
+    // If array is small, do a "mini-TimSort" with no merges
+    if (nRemaining < MIN_MERGE) {
+      int initRunLen = countRunAndMakeAscending(a, lo, hi, c);
+      binarySort(a, lo, hi, lo + initRunLen, c);
+      return;
+    }
+
+    /**
+     * March over the array once, left to right, finding natural runs,
+     * extending short natural runs to minRun elements, and merging runs
+     * to maintain stack invariant.
+     */
+    SortState sortState = new SortState(a, c, hi - lo);
+    int minRun = minRunLength(nRemaining);
+    do {
+      // Identify next run
+      int runLen = countRunAndMakeAscending(a, lo, hi, c);
+
+      // If run is short, extend to min(minRun, nRemaining)
+      if (runLen < minRun) {
+        int force = nRemaining <= minRun ? nRemaining : minRun;
+        binarySort(a, lo, lo + force, lo + runLen, c);
+        runLen = force;
+      }
+
+      // Push run onto pending-run stack, and maybe merge
+      sortState.pushRun(lo, runLen);
+      sortState.mergeCollapse();
+
+      // Advance to find next run
+      lo += runLen;
+      nRemaining -= runLen;
+    } while (nRemaining != 0);
+
+    // Merge all remaining runs to complete sort
+    assert lo == hi;
+    sortState.mergeForceCollapse();
+    assert sortState.stackSize == 1;
+  }
+
+  /**
+   * Sorts the specified portion of the specified array using a binary
+   * insertion sort.  This is the best method for sorting small numbers
+   * of elements.  It requires O(n log n) compares, but O(n^2) data
+   * movement (worst case).
+   *
+   * If the initial part of the specified range is already sorted,
+   * this method can take advantage of it: the method assumes that the
+   * elements from index {@code lo}, inclusive, to {@code start},
+   * exclusive are already sorted.
+   *
+   * @param a the array in which a range is to be sorted
+   * @param lo the index of the first element in the range to be sorted
+   * @param hi the index after the last element in the range to be sorted
+   * @param start the index of the first element in the range that is
+   *        not already known to be sorted ({@code lo <= start <= hi})
+   * @param c comparator to used for the sort
+   */
+  @SuppressWarnings("fallthrough")
+  private void binarySort(Buffer a, int lo, int hi, int start, Comparator<? super K> c) {
+    assert lo <= start && start <= hi;
+    if (start == lo)
+      start++;
+
+    K key0 = s.newKey();
+    K key1 = s.newKey();
+
+    Buffer pivotStore = s.allocate(1);
+    for ( ; start < hi; start++) {
+      s.copyElement(a, start, pivotStore, 0);
+      K pivot = s.getKey(pivotStore, 0, key0);
+
+      // Set left (and right) to the index where a[start] (pivot) belongs
+      int left = lo;
+      int right = start;
+      assert left <= right;
+      /*
+       * Invariants:
+       *   pivot >= all in [lo, left).
+       *   pivot <  all in [right, start).
+       */
+      while (left < right) {
+        int mid = (left + right) >>> 1;
+        if (c.compare(pivot, s.getKey(a, mid, key1)) < 0)
+          right = mid;
+        else
+          left = mid + 1;
+      }
+      assert left == right;
+
+      /*
+       * The invariants still hold: pivot >= all in [lo, left) and
+       * pivot < all in [left, start), so pivot belongs at left.  Note
+       * that if there are elements equal to pivot, left points to the
+       * first slot after them -- that's why this sort is stable.
+       * Slide elements over to make room for pivot.
+       */
+      int n = start - left;  // The number of elements to move
+      // Switch is just an optimization for arraycopy in default case
+      switch (n) {
+        case 2:  s.copyElement(a, left + 1, a, left + 2);
+        case 1:  s.copyElement(a, left, a, left + 1);
+          break;
+        default: s.copyRange(a, left, a, left + 1, n);
+      }
+      s.copyElement(pivotStore, 0, a, left);
+    }
+  }
+
+  /**
+   * Returns the length of the run beginning at the specified position in
+   * the specified array and reverses the run if it is descending (ensuring
+   * that the run will always be ascending when the method returns).
+   *
+   * A run is the longest ascending sequence with:
+   *
+   *    a[lo] <= a[lo + 1] <= a[lo + 2] <= ...
+   *
+   * or the longest descending sequence with:
+   *
+   *    a[lo] >  a[lo + 1] >  a[lo + 2] >  ...
+   *
+   * For its intended use in a stable mergesort, the strictness of the
+   * definition of "descending" is needed so that the call can safely
+   * reverse a descending sequence without violating stability.
+   *
+   * @param a the array in which a run is to be counted and possibly reversed
+   * @param lo index of the first element in the run
+   * @param hi index after the last element that may be contained in the run.
+  It is required that {@code lo < hi}.
+   * @param c the comparator to used for the sort
+   * @return  the length of the run beginning at the specified position in
+   *          the specified array
+   */
+  private int countRunAndMakeAscending(Buffer a, int lo, int hi, Comparator<? super K> c) {
+    assert lo < hi;
+    int runHi = lo + 1;
+    if (runHi == hi)
+      return 1;
+
+    K key0 = s.newKey();
+    K key1 = s.newKey();
+
+    // Find end of run, and reverse range if descending
+    if (c.compare(s.getKey(a, runHi++, key0), s.getKey(a, lo, key1)) < 0) { // Descending
+      while (runHi < hi && c.compare(s.getKey(a, runHi, key0), s.getKey(a, runHi - 1, key1)) < 0)
+        runHi++;
+      reverseRange(a, lo, runHi);
+    } else {                              // Ascending
+      while (runHi < hi && c.compare(s.getKey(a, runHi, key0), s.getKey(a, runHi - 1, key1)) >= 0)
+        runHi++;
+    }
+
+    return runHi - lo;
+  }
+
+  /**
+   * Reverse the specified range of the specified array.
+   *
+   * @param a the array in which a range is to be reversed
+   * @param lo the index of the first element in the range to be reversed
+   * @param hi the index after the last element in the range to be reversed
+   */
+  private void reverseRange(Buffer a, int lo, int hi) {
+    hi--;
+    while (lo < hi) {
+      s.swap(a, lo, hi);
+      lo++;
+      hi--;
+    }
+  }
+
+  /**
+   * Returns the minimum acceptable run length for an array of the specified
+   * length. Natural runs shorter than this will be extended with
+   * {@link #binarySort}.
+   *
+   * Roughly speaking, the computation is:
+   *
+   *  If n < MIN_MERGE, return n (it's too small to bother with fancy stuff).
+   *  Else if n is an exact power of 2, return MIN_MERGE/2.
+   *  Else return an int k, MIN_MERGE/2 <= k <= MIN_MERGE, such that n/k
+   *   is close to, but strictly less than, an exact power of 2.
+   *
+   * For the rationale, see listsort.txt.
+   *
+   * @param n the length of the array to be sorted
+   * @return the length of the minimum run to be merged
+   */
+  private int minRunLength(int n) {
+    assert n >= 0;
+    int r = 0;      // Becomes 1 if any 1 bits are shifted off
+    while (n >= MIN_MERGE) {
+      r |= (n & 1);
+      n >>= 1;
+    }
+    return n + r;
+  }
+
+  private class SortState {
+
+    /**
+     * The Buffer being sorted.
+     */
+    private final Buffer a;
+
+    /**
+     * Length of the sort Buffer.
+     */
+    private final int aLength;
+
+    /**
+     * The comparator for this sort.
+     */
+    private final Comparator<? super K> c;
+
+    /**
+     * When we get into galloping mode, we stay there until both runs win less
+     * often than MIN_GALLOP consecutive times.
+     */
+    private static final int  MIN_GALLOP = 7;
+
+    /**
+     * This controls when we get *into* galloping mode.  It is initialized
+     * to MIN_GALLOP.  The mergeLo and mergeHi methods nudge it higher for
+     * random data, and lower for highly structured data.
+     */
+    private int minGallop = MIN_GALLOP;
+
+    /**
+     * Maximum initial size of tmp array, which is used for merging.  The array
+     * can grow to accommodate demand.
+     *
+     * Unlike Tim's original C version, we do not allocate this much storage
+     * when sorting smaller arrays.  This change was required for performance.
+     */
+    private static final int INITIAL_TMP_STORAGE_LENGTH = 256;
+
+    /**
+     * Temp storage for merges.
+     */
+    private Buffer tmp; // Actual runtime type will be Object[], regardless of T
+
+    /**
+     * Length of the temp storage.
+     */
+    private int tmpLength = 0;
+
+    /**
+     * A stack of pending runs yet to be merged.  Run i starts at
+     * address base[i] and extends for len[i] elements.  It's always
+     * true (so long as the indices are in bounds) that:
+     *
+     *     runBase[i] + runLen[i] == runBase[i + 1]
+     *
+     * so we could cut the storage for this, but it's a minor amount,
+     * and keeping all the info explicit simplifies the code.
+     */
+    private int stackSize = 0;  // Number of pending runs on stack
+    private final int[] runBase;
+    private final int[] runLen;
+
+    /**
+     * Creates a TimSort instance to maintain the state of an ongoing sort.
+     *
+     * @param a the array to be sorted
+     * @param c the comparator to determine the order of the sort
+     */
+    private SortState(Buffer a, Comparator<? super K> c, int len) {
+      this.aLength = len;
+      this.a = a;
+      this.c = c;
+
+      // Allocate temp storage (which may be increased later if necessary)
+      tmpLength = len < 2 * INITIAL_TMP_STORAGE_LENGTH ? len >>> 1 : INITIAL_TMP_STORAGE_LENGTH;
+      tmp = s.allocate(tmpLength);
+
+      /*
+       * Allocate runs-to-be-merged stack (which cannot be expanded).  The
+       * stack length requirements are described in listsort.txt.  The C
+       * version always uses the same stack length (85), but this was
+       * measured to be too expensive when sorting "mid-sized" arrays (e.g.,
+       * 100 elements) in Java.  Therefore, we use smaller (but sufficiently
+       * large) stack lengths for smaller arrays.  The "magic numbers" in the
+       * computation below must be changed if MIN_MERGE is decreased.  See
+       * the MIN_MERGE declaration above for more information.
+       */
+      int stackLen = (len <    120  ?  5 :
+                      len <   1542  ? 10 :
+                      len < 119151  ? 19 : 40);
+      runBase = new int[stackLen];
+      runLen = new int[stackLen];
+    }
+
+    /**
+     * Pushes the specified run onto the pending-run stack.
+     *
+     * @param runBase index of the first element in the run
+     * @param runLen  the number of elements in the run
+     */
+    private void pushRun(int runBase, int runLen) {
+      this.runBase[stackSize] = runBase;
+      this.runLen[stackSize] = runLen;
+      stackSize++;
+    }
+
+    /**
+     * Examines the stack of runs waiting to be merged and merges adjacent runs
+     * until the stack invariants are reestablished:
+     *
+     *     1. runLen[i - 3] > runLen[i - 2] + runLen[i - 1]
+     *     2. runLen[i - 2] > runLen[i - 1]
+     *
+     * This method is called each time a new run is pushed onto the stack,
+     * so the invariants are guaranteed to hold for i < stackSize upon
+     * entry to the method.
+     */
+    private void mergeCollapse() {
+      while (stackSize > 1) {
+        int n = stackSize - 2;
+        if (n > 0 && runLen[n-1] <= runLen[n] + runLen[n+1]) {
+          if (runLen[n - 1] < runLen[n + 1])
+            n--;
+          mergeAt(n);
+        } else if (runLen[n] <= runLen[n + 1]) {
+          mergeAt(n);
+        } else {
+          break; // Invariant is established
+        }
+      }
+    }
+
+    /**
+     * Merges all runs on the stack until only one remains.  This method is
+     * called once, to complete the sort.
+     */
+    private void mergeForceCollapse() {
+      while (stackSize > 1) {
+        int n = stackSize - 2;
+        if (n > 0 && runLen[n - 1] < runLen[n + 1])
+          n--;
+        mergeAt(n);
+      }
+    }
+
+    /**
+     * Merges the two runs at stack indices i and i+1.  Run i must be
+     * the penultimate or antepenultimate run on the stack.  In other words,
+     * i must be equal to stackSize-2 or stackSize-3.
+     *
+     * @param i stack index of the first of the two runs to merge
+     */
+    private void mergeAt(int i) {
+      assert stackSize >= 2;
+      assert i >= 0;
+      assert i == stackSize - 2 || i == stackSize - 3;
+
+      int base1 = runBase[i];
+      int len1 = runLen[i];
+      int base2 = runBase[i + 1];
+      int len2 = runLen[i + 1];
+      assert len1 > 0 && len2 > 0;
+      assert base1 + len1 == base2;
+
+      /*
+       * Record the length of the combined runs; if i is the 3rd-last
+       * run now, also slide over the last run (which isn't involved
+       * in this merge).  The current run (i+1) goes away in any case.
+       */
+      runLen[i] = len1 + len2;
+      if (i == stackSize - 3) {
+        runBase[i + 1] = runBase[i + 2];
+        runLen[i + 1] = runLen[i + 2];
+      }
+      stackSize--;
+
+      K key0 = s.newKey();
+
+      /*
+       * Find where the first element of run2 goes in run1. Prior elements
+       * in run1 can be ignored (because they're already in place).
+       */
+      int k = gallopRight(s.getKey(a, base2, key0), a, base1, len1, 0, c);
+      assert k >= 0;
+      base1 += k;
+      len1 -= k;
+      if (len1 == 0)
+        return;
+
+      /*
+       * Find where the last element of run1 goes in run2. Subsequent elements
+       * in run2 can be ignored (because they're already in place).
+       */
+      len2 = gallopLeft(s.getKey(a, base1 + len1 - 1, key0), a, base2, len2, len2 - 1, c);
+      assert len2 >= 0;
+      if (len2 == 0)
+        return;
+
+      // Merge remaining runs, using tmp array with min(len1, len2) elements
+      if (len1 <= len2)
+        mergeLo(base1, len1, base2, len2);
+      else
+        mergeHi(base1, len1, base2, len2);
+    }
+
+    /**
+     * Locates the position at which to insert the specified key into the
+     * specified sorted range; if the range contains an element equal to key,
+     * returns the index of the leftmost equal element.
+     *
+     * @param key the key whose insertion point to search for
+     * @param a the array in which to search
+     * @param base the index of the first element in the range
+     * @param len the length of the range; must be > 0
+     * @param hint the index at which to begin the search, 0 <= hint < n.
+     *     The closer hint is to the result, the faster this method will run.
+     * @param c the comparator used to order the range, and to search
+     * @return the int k,  0 <= k <= n such that a[b + k - 1] < key <= a[b + k],
+     *    pretending that a[b - 1] is minus infinity and a[b + n] is infinity.
+     *    In other words, key belongs at index b + k; or in other words,
+     *    the first k elements of a should precede key, and the last n - k
+     *    should follow it.
+     */
+    private int gallopLeft(K key, Buffer a, int base, int len, int hint, Comparator<? super K> c) {
+      assert len > 0 && hint >= 0 && hint < len;
+      int lastOfs = 0;
+      int ofs = 1;
+      K key0 = s.newKey();
+
+      if (c.compare(key, s.getKey(a, base + hint, key0)) > 0) {
+        // Gallop right until a[base+hint+lastOfs] < key <= a[base+hint+ofs]
+        int maxOfs = len - hint;
+        while (ofs < maxOfs && c.compare(key, s.getKey(a, base + hint + ofs, key0)) > 0) {
+          lastOfs = ofs;
+          ofs = (ofs << 1) + 1;
+          if (ofs <= 0)   // int overflow
+            ofs = maxOfs;
+        }
+        if (ofs > maxOfs)
+          ofs = maxOfs;
+
+        // Make offsets relative to base
+        lastOfs += hint;
+        ofs += hint;
+      } else { // key <= a[base + hint]
+        // Gallop left until a[base+hint-ofs] < key <= a[base+hint-lastOfs]
+        final int maxOfs = hint + 1;
+        while (ofs < maxOfs && c.compare(key, s.getKey(a, base + hint - ofs, key0)) <= 0) {
+          lastOfs = ofs;
+          ofs = (ofs << 1) + 1;
+          if (ofs <= 0)   // int overflow
+            ofs = maxOfs;
+        }
+        if (ofs > maxOfs)
+          ofs = maxOfs;
+
+        // Make offsets relative to base
+        int tmp = lastOfs;
+        lastOfs = hint - ofs;
+        ofs = hint - tmp;
+      }
+      assert -1 <= lastOfs && lastOfs < ofs && ofs <= len;
+
+      /*
+       * Now a[base+lastOfs] < key <= a[base+ofs], so key belongs somewhere
+       * to the right of lastOfs but no farther right than ofs.  Do a binary
+       * search, with invariant a[base + lastOfs - 1] < key <= a[base + ofs].
+       */
+      lastOfs++;
+      while (lastOfs < ofs) {
+        int m = lastOfs + ((ofs - lastOfs) >>> 1);
+
+        if (c.compare(key, s.getKey(a, base + m, key0)) > 0)
+          lastOfs = m + 1;  // a[base + m] < key
+        else
+          ofs = m;          // key <= a[base + m]
+      }
+      assert lastOfs == ofs;    // so a[base + ofs - 1] < key <= a[base + ofs]
+      return ofs;
+    }
+
+    /**
+     * Like gallopLeft, except that if the range contains an element equal to
+     * key, gallopRight returns the index after the rightmost equal element.
+     *
+     * @param key the key whose insertion point to search for
+     * @param a the array in which to search
+     * @param base the index of the first element in the range
+     * @param len the length of the range; must be > 0
+     * @param hint the index at which to begin the search, 0 <= hint < n.
+     *     The closer hint is to the result, the faster this method will run.
+     * @param c the comparator used to order the range, and to search
+     * @return the int k,  0 <= k <= n such that a[b + k - 1] <= key < a[b + k]
+     */
+    private int gallopRight(K key, Buffer a, int base, int len, int hint, Comparator<? super K> c) {
+      assert len > 0 && hint >= 0 && hint < len;
+
+      int ofs = 1;
+      int lastOfs = 0;
+      K key1 = s.newKey();
+
+      if (c.compare(key, s.getKey(a, base + hint, key1)) < 0) {
+        // Gallop left until a[b+hint - ofs] <= key < a[b+hint - lastOfs]
+        int maxOfs = hint + 1;
+        while (ofs < maxOfs && c.compare(key, s.getKey(a, base + hint - ofs, key1)) < 0) {
+          lastOfs = ofs;
+          ofs = (ofs << 1) + 1;
+          if (ofs <= 0)   // int overflow
+            ofs = maxOfs;
+        }
+        if (ofs > maxOfs)
+          ofs = maxOfs;
+
+        // Make offsets relative to b
+        int tmp = lastOfs;
+        lastOfs = hint - ofs;
+        ofs = hint - tmp;
+      } else { // a[b + hint] <= key
+        // Gallop right until a[b+hint + lastOfs] <= key < a[b+hint + ofs]
+        int maxOfs = len - hint;
+        while (ofs < maxOfs && c.compare(key, s.getKey(a, base + hint + ofs, key1)) >= 0) {
+          lastOfs = ofs;
+          ofs = (ofs << 1) + 1;
+          if (ofs <= 0)   // int overflow
+            ofs = maxOfs;
+        }
+        if (ofs > maxOfs)
+          ofs = maxOfs;
+
+        // Make offsets relative to b
+        lastOfs += hint;
+        ofs += hint;
+      }
+      assert -1 <= lastOfs && lastOfs < ofs && ofs <= len;
+
+      /*
+       * Now a[b + lastOfs] <= key < a[b + ofs], so key belongs somewhere to
+       * the right of lastOfs but no farther right than ofs.  Do a binary
+       * search, with invariant a[b + lastOfs - 1] <= key < a[b + ofs].
+       */
+      lastOfs++;
+      while (lastOfs < ofs) {
+        int m = lastOfs + ((ofs - lastOfs) >>> 1);
+
+        if (c.compare(key, s.getKey(a, base + m, key1)) < 0)
+          ofs = m;          // key < a[b + m]
+        else
+          lastOfs = m + 1;  // a[b + m] <= key
+      }
+      assert lastOfs == ofs;    // so a[b + ofs - 1] <= key < a[b + ofs]
+      return ofs;
+    }
+
+    /**
+     * Merges two adjacent runs in place, in a stable fashion.  The first
+     * element of the first run must be greater than the first element of the
+     * second run (a[base1] > a[base2]), and the last element of the first run
+     * (a[base1 + len1-1]) must be greater than all elements of the second run.
+     *
+     * For performance, this method should be called only when len1 <= len2;
+     * its twin, mergeHi should be called if len1 >= len2.  (Either method
+     * may be called if len1 == len2.)
+     *
+     * @param base1 index of first element in first run to be merged
+     * @param len1  length of first run to be merged (must be > 0)
+     * @param base2 index of first element in second run to be merged
+     *        (must be aBase + aLen)
+     * @param len2  length of second run to be merged (must be > 0)
+     */
+    private void mergeLo(int base1, int len1, int base2, int len2) {
+      assert len1 > 0 && len2 > 0 && base1 + len1 == base2;
+
+      // Copy first run into temp array
+      Buffer a = this.a; // For performance
+      Buffer tmp = ensureCapacity(len1);
+      s.copyRange(a, base1, tmp, 0, len1);
+
+      int cursor1 = 0;       // Indexes into tmp array
+      int cursor2 = base2;   // Indexes int a
+      int dest = base1;      // Indexes int a
+
+      // Move first element of second run and deal with degenerate cases
+      s.copyElement(a, cursor2++, a, dest++);
+      if (--len2 == 0) {
+        s.copyRange(tmp, cursor1, a, dest, len1);
+        return;
+      }
+      if (len1 == 1) {
+        s.copyRange(a, cursor2, a, dest, len2);
+        s.copyElement(tmp, cursor1, a, dest + len2); // Last elt of run 1 to end of merge
+        return;
+      }
+
+      K key0 = s.newKey();
+      K key1 = s.newKey();
+
+      Comparator<? super K> c = this.c;  // Use local variable for performance
+      int minGallop = this.minGallop;    //  "    "       "     "      "
+      outer:
+      while (true) {
+        int count1 = 0; // Number of times in a row that first run won
+        int count2 = 0; // Number of times in a row that second run won
+
+        /*
+         * Do the straightforward thing until (if ever) one run starts
+         * winning consistently.
+         */
+        do {
+          assert len1 > 1 && len2 > 0;
+          if (c.compare(s.getKey(a, cursor2, key0), s.getKey(tmp, cursor1, key1)) < 0) {
+            s.copyElement(a, cursor2++, a, dest++);
+            count2++;
+            count1 = 0;
+            if (--len2 == 0)
+              break outer;
+          } else {
+            s.copyElement(tmp, cursor1++, a, dest++);
+            count1++;
+            count2 = 0;
+            if (--len1 == 1)
+              break outer;
+          }
+        } while ((count1 | count2) < minGallop);
+
+        /*
+         * One run is winning so consistently that galloping may be a
+         * huge win. So try that, and continue galloping until (if ever)
+         * neither run appears to be winning consistently anymore.
+         */
+        do {
+          assert len1 > 1 && len2 > 0;
+          count1 = gallopRight(s.getKey(a, cursor2, key0), tmp, cursor1, len1, 0, c);
+          if (count1 != 0) {
+            s.copyRange(tmp, cursor1, a, dest, count1);
+            dest += count1;
+            cursor1 += count1;
+            len1 -= count1;
+            if (len1 <= 1) // len1 == 1 || len1 == 0
+              break outer;
+          }
+          s.copyElement(a, cursor2++, a, dest++);
+          if (--len2 == 0)
+            break outer;
+
+          count2 = gallopLeft(s.getKey(tmp, cursor1, key0), a, cursor2, len2, 0, c);
+          if (count2 != 0) {
+            s.copyRange(a, cursor2, a, dest, count2);
+            dest += count2;
+            cursor2 += count2;
+            len2 -= count2;
+            if (len2 == 0)
+              break outer;
+          }
+          s.copyElement(tmp, cursor1++, a, dest++);
+          if (--len1 == 1)
+            break outer;
+          minGallop--;
+        } while (count1 >= MIN_GALLOP | count2 >= MIN_GALLOP);
+        if (minGallop < 0)
+          minGallop = 0;
+        minGallop += 2;  // Penalize for leaving gallop mode
+      }  // End of "outer" loop
+      this.minGallop = minGallop < 1 ? 1 : minGallop;  // Write back to field
+
+      if (len1 == 1) {
+        assert len2 > 0;
+        s.copyRange(a, cursor2, a, dest, len2);
+        s.copyElement(tmp, cursor1, a, dest + len2); //  Last elt of run 1 to end of merge
+      } else if (len1 == 0) {
+        throw new IllegalArgumentException(
+            "Comparison method violates its general contract!");
+      } else {
+        assert len2 == 0;
+        assert len1 > 1;
+        s.copyRange(tmp, cursor1, a, dest, len1);
+      }
+    }
+
+    /**
+     * Like mergeLo, except that this method should be called only if
+     * len1 >= len2; mergeLo should be called if len1 <= len2.  (Either method
+     * may be called if len1 == len2.)
+     *
+     * @param base1 index of first element in first run to be merged
+     * @param len1  length of first run to be merged (must be > 0)
+     * @param base2 index of first element in second run to be merged
+     *        (must be aBase + aLen)
+     * @param len2  length of second run to be merged (must be > 0)
+     */
+    private void mergeHi(int base1, int len1, int base2, int len2) {
+      assert len1 > 0 && len2 > 0 && base1 + len1 == base2;
+
+      // Copy second run into temp array
+      Buffer a = this.a; // For performance
+      Buffer tmp = ensureCapacity(len2);
+      s.copyRange(a, base2, tmp, 0, len2);
+
+      int cursor1 = base1 + len1 - 1;  // Indexes into a
+      int cursor2 = len2 - 1;          // Indexes into tmp array
+      int dest = base2 + len2 - 1;     // Indexes into a
+
+      K key0 = s.newKey();
+      K key1 = s.newKey();
+
+      // Move last element of first run and deal with degenerate cases
+      s.copyElement(a, cursor1--, a, dest--);
+      if (--len1 == 0) {
+        s.copyRange(tmp, 0, a, dest - (len2 - 1), len2);
+        return;
+      }
+      if (len2 == 1) {
+        dest -= len1;
+        cursor1 -= len1;
+        s.copyRange(a, cursor1 + 1, a, dest + 1, len1);
+        s.copyElement(tmp, cursor2, a, dest);
+        return;
+      }
+
+      Comparator<? super K> c = this.c;  // Use local variable for performance
+      int minGallop = this.minGallop;    //  "    "       "     "      "
+      outer:
+      while (true) {
+        int count1 = 0; // Number of times in a row that first run won
+        int count2 = 0; // Number of times in a row that second run won
+
+        /*
+         * Do the straightforward thing until (if ever) one run
+         * appears to win consistently.
+         */
+        do {
+          assert len1 > 0 && len2 > 1;
+          if (c.compare(s.getKey(tmp, cursor2, key0), s.getKey(a, cursor1, key1)) < 0) {
+            s.copyElement(a, cursor1--, a, dest--);
+            count1++;
+            count2 = 0;
+            if (--len1 == 0)
+              break outer;
+          } else {
+            s.copyElement(tmp, cursor2--, a, dest--);
+            count2++;
+            count1 = 0;
+            if (--len2 == 1)
+              break outer;
+          }
+        } while ((count1 | count2) < minGallop);
+
+        /*
+         * One run is winning so consistently that galloping may be a
+         * huge win. So try that, and continue galloping until (if ever)
+         * neither run appears to be winning consistently anymore.
+         */
+        do {
+          assert len1 > 0 && len2 > 1;
+          count1 = len1 - gallopRight(s.getKey(tmp, cursor2, key0), a, base1, len1, len1 - 1, c);
+          if (count1 != 0) {
+            dest -= count1;
+            cursor1 -= count1;
+            len1 -= count1;
+            s.copyRange(a, cursor1 + 1, a, dest + 1, count1);
+            if (len1 == 0)
+              break outer;
+          }
+          s.copyElement(tmp, cursor2--, a, dest--);
+          if (--len2 == 1)
+            break outer;
+
+          count2 = len2 - gallopLeft(s.getKey(a, cursor1, key0), tmp, 0, len2, len2 - 1, c);
+          if (count2 != 0) {
+            dest -= count2;
+            cursor2 -= count2;
+            len2 -= count2;
+            s.copyRange(tmp, cursor2 + 1, a, dest + 1, count2);
+            if (len2 <= 1)  // len2 == 1 || len2 == 0
+              break outer;
+          }
+          s.copyElement(a, cursor1--, a, dest--);
+          if (--len1 == 0)
+            break outer;
+          minGallop--;
+        } while (count1 >= MIN_GALLOP | count2 >= MIN_GALLOP);
+        if (minGallop < 0)
+          minGallop = 0;
+        minGallop += 2;  // Penalize for leaving gallop mode
+      }  // End of "outer" loop
+      this.minGallop = minGallop < 1 ? 1 : minGallop;  // Write back to field
+
+      if (len2 == 1) {
+        assert len1 > 0;
+        dest -= len1;
+        cursor1 -= len1;
+        s.copyRange(a, cursor1 + 1, a, dest + 1, len1);
+        s.copyElement(tmp, cursor2, a, dest); // Move first elt of run2 to front of merge
+      } else if (len2 == 0) {
+        throw new IllegalArgumentException(
+            "Comparison method violates its general contract!");
+      } else {
+        assert len1 == 0;
+        assert len2 > 0;
+        s.copyRange(tmp, 0, a, dest - (len2 - 1), len2);
+      }
+    }
+
+    /**
+     * Ensures that the external array tmp has at least the specified
+     * number of elements, increasing its size if necessary.  The size
+     * increases exponentially to ensure amortized linear time complexity.
+     *
+     * @param minCapacity the minimum required capacity of the tmp array
+     * @return tmp, whether or not it grew
+     */
+    private Buffer ensureCapacity(int minCapacity) {
+      if (tmpLength < minCapacity) {
+        // Compute smallest power of 2 > minCapacity
+        int newSize = minCapacity;
+        newSize |= newSize >> 1;
+        newSize |= newSize >> 2;
+        newSize |= newSize >> 4;
+        newSize |= newSize >> 8;
+        newSize |= newSize >> 16;
+        newSize++;
+
+        if (newSize < 0) // Not bloody likely!
+          newSize = minCapacity;
+        else
+          newSize = Math.min(newSize, aLength >>> 1);
+
+        tmp = s.allocate(newSize);
+        tmpLength = newSize;
+      }
+      return tmp;
+    }
+  }
+}
diff --git a/core/src/main/resources/org/apache/spark/ui/static/additional-metrics.js b/core/src/main/resources/org/apache/spark/ui/static/additional-metrics.js
new file mode 100644
index 0000000000000..14ba37d7c9bd9
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/additional-metrics.js
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Register functions to show/hide columns based on checkboxes. These need
+ * to be registered after the page loads. */
+$(function() {
+    $("span.expand-additional-metrics").click(function(){
+        // Expand the list of additional metrics.
+        var additionalMetricsDiv = $(this).parent().find('.additional-metrics');
+        $(additionalMetricsDiv).toggleClass('collapsed');
+
+        // Switch the class of the arrow from open to closed.
+        $(this).find('.expand-additional-metrics-arrow').toggleClass('arrow-open');
+        $(this).find('.expand-additional-metrics-arrow').toggleClass('arrow-closed');
+    });
+
+    stripeSummaryTable();
+
+    $("input:checkbox").click(function() {
+        var column = "table ." + $(this).attr("name");
+        $(column).toggle();
+        stripeSummaryTable();
+    });
+
+    $("#select-all-metrics").click(function() {
+       if (this.checked) {
+          // Toggle all un-checked options.
+          $('input:checkbox:not(:checked)').trigger('click');
+       } else {
+          // Toggle all checked options.
+          $('input:checkbox:checked').trigger('click');
+       }
+    });
+
+    // Trigger a click on the checkbox if a user clicks the label next to it.
+    $("span.additional-metric-title").click(function() {
+        $(this).parent().find('input:checkbox').trigger('click');
+    });
+});
diff --git a/core/src/main/resources/org/apache/spark/ui/static/table.js b/core/src/main/resources/org/apache/spark/ui/static/table.js
new file mode 100644
index 0000000000000..656147e40d13e
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/table.js
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Adds background colors to stripe table rows in the summary table (on the stage page). This is
+ * necessary (instead of using css or the table striping provided by bootstrap) because the summary
+ * table has hidden rows.
+ *
+ * An ID selector (rather than a class selector) is used to ensure this runs quickly even on pages
+ * with thousands of task rows (ID selectors are much faster than class selectors). */
+function stripeSummaryTable() {
+    $("#task-summary-table").find("tr:not(:hidden)").each(function (index) {
+       if (index % 2 == 1) {
+         $(this).css("background-color", "#f9f9f9");
+       } else {
+         $(this).css("background-color", "#ffffff");
+       }
+    });
+}
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css
index 152bde5f6994f..f23ba9dba167f 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css
@@ -19,6 +19,7 @@
   height: 50px;
   font-size: 15px;
   margin-bottom: 15px;
+  min-width: 1200px
 }
 
 .navbar .navbar-inner {
@@ -39,12 +40,12 @@
 
 .navbar .nav > li a {
   height: 30px;
-  line-height: 30px;
+  line-height: 2;
 }
 
 .navbar-text {
   height: 50px;
-  line-height: 50px;
+  line-height: 3.3;
 }
 
 table.sortable thead {
@@ -120,7 +121,76 @@ pre {
   border: none;
 }
 
+.description-input {
+  overflow: hidden;
+  text-overflow: ellipsis;
+  width: 100%;
+  white-space: nowrap;
+  display: block;
+}
+
+.stacktrace-details {
+  max-height: 300px;
+  overflow-y: auto;
+  margin: 0;
+  transition: max-height 0.5s ease-out, padding 0.5s ease-out;
+}
+
+.stacktrace-details.collapsed {
+  max-height: 0;
+  padding-top: 0;
+  padding-bottom: 0;
+  border: none;
+}
+
+span.expand-additional-metrics {
+  cursor: pointer;
+}
+
+span.additional-metric-title {
+  cursor: pointer;
+}
+
+.additional-metrics.collapsed {
+  display: none;
+}
+
 .tooltip {
  font-weight: normal;
 }
 
+.arrow-open {
+  width: 0;
+  height: 0;
+  border-left: 5px solid transparent;
+  border-right: 5px solid transparent;
+  border-top: 5px solid black;
+  float: left;
+  margin-top: 6px;
+}
+
+.arrow-closed {
+  width: 0;
+  height: 0;
+  border-top: 5px solid transparent;
+  border-bottom: 5px solid transparent;
+  border-left: 5px solid black;
+  display: inline-block;
+}
+
+.version {
+  line-height: 2.5;
+  vertical-align: bottom;
+  font-size: 12px;
+  padding: 0;
+  margin: 0;
+  font-weight: bold;
+  color: #777;
+}
+
+/* Hide all additional metrics by default. This is done here rather than using JavaScript to
+ * avoid slow page loads for stage pages with large numbers (e.g., thousands) of tasks. */
+.scheduler_delay, .deserialization_time, .fetch_wait_time, .serialization_time,
+.getting_result_time {
+  display: none;
+}
diff --git a/core/src/main/scala/org/apache/spark/Accumulators.scala b/core/src/main/scala/org/apache/spark/Accumulators.scala
index 12f2fe031cb1d..5f31bfba3f8d6 100644
--- a/core/src/main/scala/org/apache/spark/Accumulators.scala
+++ b/core/src/main/scala/org/apache/spark/Accumulators.scala
@@ -18,12 +18,15 @@
 package org.apache.spark
 
 import java.io.{ObjectInputStream, Serializable}
+import java.util.concurrent.atomic.AtomicLong
+import java.lang.ThreadLocal
 
 import scala.collection.generic.Growable
 import scala.collection.mutable.Map
 import scala.reflect.ClassTag
 
 import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.util.Utils
 
 /**
  * A data type that can be accumulated, ie has an commutative and associative "add" operation,
@@ -126,7 +129,7 @@ class Accumulable[R, T] (
   }
 
   // Called by Java when deserializing an object
-  private def readObject(in: ObjectInputStream) {
+  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
     in.defaultReadObject()
     value_ = zero
     deserialized = true
@@ -227,6 +230,7 @@ GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Serializa
  */
 class Accumulator[T](@transient initialValue: T, param: AccumulatorParam[T], name: Option[String])
     extends Accumulable[T,T](initialValue, param, name) {
+
   def this(initialValue: T, param: AccumulatorParam[T]) = this(initialValue, param, None)
 }
 
@@ -243,15 +247,47 @@ trait AccumulatorParam[T] extends AccumulableParam[T, T] {
   }
 }
 
+object AccumulatorParam {
+
+  // The following implicit objects were in SparkContext before 1.2 and users had to
+  // `import SparkContext._` to enable them. Now we move them here to make the compiler find
+  // them automatically. However, as there are duplicate codes in SparkContext for backward
+  // compatibility, please update them accordingly if you modify the following implicit objects.
+
+  implicit object DoubleAccumulatorParam extends AccumulatorParam[Double] {
+    def addInPlace(t1: Double, t2: Double): Double = t1 + t2
+    def zero(initialValue: Double) = 0.0
+  }
+
+  implicit object IntAccumulatorParam extends AccumulatorParam[Int] {
+    def addInPlace(t1: Int, t2: Int): Int = t1 + t2
+    def zero(initialValue: Int) = 0
+  }
+
+  implicit object LongAccumulatorParam extends AccumulatorParam[Long] {
+    def addInPlace(t1: Long, t2: Long) = t1 + t2
+    def zero(initialValue: Long) = 0L
+  }
+
+  implicit object FloatAccumulatorParam extends AccumulatorParam[Float] {
+    def addInPlace(t1: Float, t2: Float) = t1 + t2
+    def zero(initialValue: Float) = 0f
+  }
+
+  // TODO: Add AccumulatorParams for other types, e.g. lists and strings
+}
+
 // TODO: The multi-thread support in accumulators is kind of lame; check
 // if there's a more intuitive way of doing it right
-private object Accumulators {
+private[spark] object Accumulators {
   // TODO: Use soft references? => need to make readObject work properly then
   val originals = Map[Long, Accumulable[_, _]]()
-  val localAccums = Map[Thread, Map[Long, Accumulable[_, _]]]()
+  val localAccums = new ThreadLocal[Map[Long, Accumulable[_, _]]]() {
+    override protected def initialValue() = Map[Long, Accumulable[_, _]]()
+  }
   var lastId: Long = 0
 
-  def newId: Long = synchronized {
+  def newId(): Long = synchronized {
     lastId += 1
     lastId
   }
@@ -260,22 +296,21 @@ private object Accumulators {
     if (original) {
       originals(a.id) = a
     } else {
-      val accums = localAccums.getOrElseUpdate(Thread.currentThread, Map())
-      accums(a.id) = a
+      localAccums.get()(a.id) = a
     }
   }
 
   // Clear the local (non-original) accumulators for the current thread
   def clear() {
     synchronized {
-      localAccums.remove(Thread.currentThread)
+      localAccums.get.clear
     }
   }
 
   // Get the values of the local accumulators for the current thread (by ID)
   def values: Map[Long, Any] = synchronized {
     val ret = Map[Long, Any]()
-    for ((id, accum) <- localAccums.getOrElse(Thread.currentThread, Map())) {
+    for ((id, accum) <- localAccums.get) {
       ret(id) = accum.localValue
     }
     return ret
diff --git a/core/src/main/scala/org/apache/spark/Aggregator.scala b/core/src/main/scala/org/apache/spark/Aggregator.scala
index 79c9c451d273d..3b684bbeceaf2 100644
--- a/core/src/main/scala/org/apache/spark/Aggregator.scala
+++ b/core/src/main/scala/org/apache/spark/Aggregator.scala
@@ -34,7 +34,9 @@ case class Aggregator[K, V, C] (
     mergeValue: (C, V) => C,
     mergeCombiners: (C, C) => C) {
 
-  private val externalSorting = SparkEnv.get.conf.getBoolean("spark.shuffle.spill", true)
+  // When spilling is enabled sorting will happen externally, but not necessarily with an 
+  // ExternalSorter. 
+  private val isSpillEnabled = SparkEnv.get.conf.getBoolean("spark.shuffle.spill", true)
 
   @deprecated("use combineValuesByKey with TaskContext argument", "0.9.0")
   def combineValuesByKey(iter: Iterator[_ <: Product2[K, V]]): Iterator[(K, C)] =
@@ -42,7 +44,7 @@ case class Aggregator[K, V, C] (
 
   def combineValuesByKey(iter: Iterator[_ <: Product2[K, V]],
                          context: TaskContext): Iterator[(K, C)] = {
-    if (!externalSorting) {
+    if (!isSpillEnabled) {
       val combiners = new AppendOnlyMap[K,C]
       var kv: Product2[K, V] = null
       val update = (hadValue: Boolean, oldValue: C) => {
@@ -59,8 +61,8 @@ case class Aggregator[K, V, C] (
       // Update task metrics if context is not null
       // TODO: Make context non optional in a future release
       Option(context).foreach { c =>
-        c.taskMetrics.memoryBytesSpilled += combiners.memoryBytesSpilled
-        c.taskMetrics.diskBytesSpilled += combiners.diskBytesSpilled
+        c.taskMetrics.incMemoryBytesSpilled(combiners.memoryBytesSpilled)
+        c.taskMetrics.incDiskBytesSpilled(combiners.diskBytesSpilled)
       }
       combiners.iterator
     }
@@ -71,9 +73,9 @@ case class Aggregator[K, V, C] (
     combineCombinersByKey(iter, null)
 
   def combineCombinersByKey(iter: Iterator[_ <: Product2[K, C]], context: TaskContext)
-      : Iterator[(K, C)] =
+    : Iterator[(K, C)] =
   {
-    if (!externalSorting) {
+    if (!isSpillEnabled) {
       val combiners = new AppendOnlyMap[K,C]
       var kc: Product2[K, C] = null
       val update = (hadValue: Boolean, oldValue: C) => {
@@ -93,8 +95,8 @@ case class Aggregator[K, V, C] (
       // Update task metrics if context is not null
       // TODO: Make context non-optional in a future release
       Option(context).foreach { c =>
-        c.taskMetrics.memoryBytesSpilled += combiners.memoryBytesSpilled
-        c.taskMetrics.diskBytesSpilled += combiners.diskBytesSpilled
+        c.taskMetrics.incMemoryBytesSpilled(combiners.memoryBytesSpilled)
+        c.taskMetrics.incDiskBytesSpilled(combiners.diskBytesSpilled)
       }
       combiners.iterator
     }
diff --git a/core/src/main/scala/org/apache/spark/CacheManager.scala b/core/src/main/scala/org/apache/spark/CacheManager.scala
index 80da62c44edc5..a0c0372b7f0ef 100644
--- a/core/src/main/scala/org/apache/spark/CacheManager.scala
+++ b/core/src/main/scala/org/apache/spark/CacheManager.scala
@@ -44,7 +44,11 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
     blockManager.get(key) match {
       case Some(blockResult) =>
         // Partition is already materialized, so just return its values
-        context.taskMetrics.inputMetrics = Some(blockResult.inputMetrics)
+        val inputMetrics = blockResult.inputMetrics
+        val existingMetrics = context.taskMetrics
+          .getInputMetricsForReadMethod(inputMetrics.readMethod)
+        existingMetrics.addBytesRead(inputMetrics.bytesRead)
+
         new InterruptibleIterator(context, blockResult.data.asInstanceOf[Iterator[T]])
 
       case None =>
diff --git a/core/src/main/scala/org/apache/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala
index ab2594cfc02eb..9a7cd4523e5ab 100644
--- a/core/src/main/scala/org/apache/spark/Dependency.scala
+++ b/core/src/main/scala/org/apache/spark/Dependency.scala
@@ -60,6 +60,9 @@ abstract class NarrowDependency[T](_rdd: RDD[T]) extends Dependency[T] {
  * @param serializer [[org.apache.spark.serializer.Serializer Serializer]] to use. If set to None,
  *                   the default serializer, as specified by `spark.serializer` config option, will
  *                   be used.
+ * @param keyOrdering key ordering for RDD's shuffles
+ * @param aggregator map/reduce-side aggregator for RDD's shuffle
+ * @param mapSideCombine whether to perform partial aggregation (also known as map-side combine)
  */
 @DeveloperApi
 class ShuffleDependency[K, V, C](
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
new file mode 100644
index 0000000000000..a46a81eabd965
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+/**
+ * A client that communicates with the cluster manager to request or kill executors.
+ */
+private[spark] trait ExecutorAllocationClient {
+
+  /**
+   * Request an additional number of executors from the cluster manager.
+   * Return whether the request is acknowledged by the cluster manager.
+   */
+  def requestExecutors(numAdditionalExecutors: Int): Boolean
+
+  /**
+   * Request that the cluster manager kill the specified executors.
+   * Return whether the request is acknowledged by the cluster manager.
+   */
+  def killExecutors(executorIds: Seq[String]): Boolean
+
+  /**
+   * Request that the cluster manager kill the specified executor.
+   * Return whether the request is acknowledged by the cluster manager.
+   */
+  def killExecutor(executorId: String): Boolean = killExecutors(Seq(executorId))
+}
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
new file mode 100644
index 0000000000000..5d5288bb6e60d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -0,0 +1,557 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import scala.collection.mutable
+
+import org.apache.spark.scheduler._
+
+/**
+ * An agent that dynamically allocates and removes executors based on the workload.
+ *
+ * The add policy depends on whether there are backlogged tasks waiting to be scheduled. If
+ * the scheduler queue is not drained in N seconds, then new executors are added. If the queue
+ * persists for another M seconds, then more executors are added and so on. The number added
+ * in each round increases exponentially from the previous round until an upper bound on the
+ * number of executors has been reached. The upper bound is based both on a configured property
+ * and on the number of tasks pending: the policy will never increase the number of executor
+ * requests past the number needed to handle all pending tasks.
+ *
+ * The rationale for the exponential increase is twofold: (1) Executors should be added slowly
+ * in the beginning in case the number of extra executors needed turns out to be small. Otherwise,
+ * we may add more executors than we need just to remove them later. (2) Executors should be added
+ * quickly over time in case the maximum number of executors is very high. Otherwise, it will take
+ * a long time to ramp up under heavy workloads.
+ *
+ * The remove policy is simpler: If an executor has been idle for K seconds, meaning it has not
+ * been scheduled to run any tasks, then it is removed.
+ *
+ * There is no retry logic in either case because we make the assumption that the cluster manager
+ * will eventually fulfill all requests it receives asynchronously.
+ *
+ * The relevant Spark properties include the following:
+ *
+ *   spark.dynamicAllocation.enabled - Whether this feature is enabled
+ *   spark.dynamicAllocation.minExecutors - Lower bound on the number of executors
+ *   spark.dynamicAllocation.maxExecutors - Upper bound on the number of executors
+ *   spark.dynamicAllocation.initialExecutors - Number of executors to start with
+ *
+ *   spark.dynamicAllocation.schedulerBacklogTimeout (M) -
+ *     If there are backlogged tasks for this duration, add new executors
+ *
+ *   spark.dynamicAllocation.sustainedSchedulerBacklogTimeout (N) -
+ *     If the backlog is sustained for this duration, add more executors
+ *     This is used only after the initial backlog timeout is exceeded
+ *
+ *   spark.dynamicAllocation.executorIdleTimeout (K) -
+ *     If an executor has been idle for this duration, remove it
+ */
+private[spark] class ExecutorAllocationManager(
+    client: ExecutorAllocationClient,
+    listenerBus: LiveListenerBus,
+    conf: SparkConf)
+  extends Logging {
+
+  allocationManager =>
+
+  import ExecutorAllocationManager._
+
+  // Lower and upper bounds on the number of executors.
+  private val minNumExecutors = conf.getInt("spark.dynamicAllocation.minExecutors", 0)
+  private val maxNumExecutors = conf.getInt("spark.dynamicAllocation.maxExecutors",
+    Integer.MAX_VALUE)
+
+  // How long there must be backlogged tasks for before an addition is triggered
+  private val schedulerBacklogTimeout = conf.getLong(
+    "spark.dynamicAllocation.schedulerBacklogTimeout", 60)
+
+  // Same as above, but used only after `schedulerBacklogTimeout` is exceeded
+  private val sustainedSchedulerBacklogTimeout = conf.getLong(
+    "spark.dynamicAllocation.sustainedSchedulerBacklogTimeout", schedulerBacklogTimeout)
+
+  // How long an executor must be idle for before it is removed
+  private val executorIdleTimeout = conf.getLong(
+    "spark.dynamicAllocation.executorIdleTimeout", 600)
+
+  // During testing, the methods to actually kill and add executors are mocked out
+  private val testing = conf.getBoolean("spark.dynamicAllocation.testing", false)
+
+  // TODO: The default value of 1 for spark.executor.cores works right now because dynamic
+  // allocation is only supported for YARN and the default number of cores per executor in YARN is
+  // 1, but it might need to be attained differently for different cluster managers
+  private val tasksPerExecutor =
+    conf.getInt("spark.executor.cores", 1) / conf.getInt("spark.task.cpus", 1)
+
+  validateSettings()
+
+  // Number of executors to add in the next round
+  private var numExecutorsToAdd = 1
+
+  // Number of executors that have been requested but have not registered yet
+  private var numExecutorsPending = 0
+
+  // Executors that have been requested to be removed but have not been killed yet
+  private val executorsPendingToRemove = new mutable.HashSet[String]
+
+  // All known executors
+  private val executorIds = new mutable.HashSet[String]
+
+  // A timestamp of when an addition should be triggered, or NOT_SET if it is not set
+  // This is set when pending tasks are added but not scheduled yet
+  private var addTime: Long = NOT_SET
+
+  // A timestamp for each executor of when the executor should be removed, indexed by the ID
+  // This is set when an executor is no longer running a task, or when it first registers
+  private val removeTimes = new mutable.HashMap[String, Long]
+
+  // Polling loop interval (ms)
+  private val intervalMillis: Long = 100
+
+  // Clock used to schedule when executors should be added and removed
+  private var clock: Clock = new RealClock
+
+  // Listener for Spark events that impact the allocation policy
+  private val listener = new ExecutorAllocationListener
+
+  /**
+   * Verify that the settings specified through the config are valid.
+   * If not, throw an appropriate exception.
+   */
+  private def validateSettings(): Unit = {
+    if (minNumExecutors < 0 || maxNumExecutors < 0) {
+      throw new SparkException("spark.dynamicAllocation.{min/max}Executors must be positive!")
+    }
+    if (maxNumExecutors == 0) {
+      throw new SparkException("spark.dynamicAllocation.maxExecutors cannot be 0!")
+    }
+    if (minNumExecutors > maxNumExecutors) {
+      throw new SparkException(s"spark.dynamicAllocation.minExecutors ($minNumExecutors) must " +
+        s"be less than or equal to spark.dynamicAllocation.maxExecutors ($maxNumExecutors)!")
+    }
+    if (schedulerBacklogTimeout <= 0) {
+      throw new SparkException("spark.dynamicAllocation.schedulerBacklogTimeout must be > 0!")
+    }
+    if (sustainedSchedulerBacklogTimeout <= 0) {
+      throw new SparkException(
+        "spark.dynamicAllocation.sustainedSchedulerBacklogTimeout must be > 0!")
+    }
+    if (executorIdleTimeout <= 0) {
+      throw new SparkException("spark.dynamicAllocation.executorIdleTimeout must be > 0!")
+    }
+    // Require external shuffle service for dynamic allocation
+    // Otherwise, we may lose shuffle files when killing executors
+    if (!conf.getBoolean("spark.shuffle.service.enabled", false) && !testing) {
+      throw new SparkException("Dynamic allocation of executors requires the external " +
+        "shuffle service. You may enable this through spark.shuffle.service.enabled.")
+    }
+    if (tasksPerExecutor == 0) {
+      throw new SparkException("spark.executor.cores must not be less than spark.task.cpus.")
+    }
+  }
+
+  /**
+   * Use a different clock for this allocation manager. This is mainly used for testing.
+   */
+  def setClock(newClock: Clock): Unit = {
+    clock = newClock
+  }
+
+  /**
+   * Register for scheduler callbacks to decide when to add and remove executors.
+   */
+  def start(): Unit = {
+    listenerBus.addListener(listener)
+    startPolling()
+  }
+
+  /**
+   * Start the main polling thread that keeps track of when to add and remove executors.
+   */
+  private def startPolling(): Unit = {
+    val t = new Thread {
+      override def run(): Unit = {
+        while (true) {
+          try {
+            schedule()
+          } catch {
+            case e: Exception => logError("Exception in dynamic executor allocation thread!", e)
+          }
+          Thread.sleep(intervalMillis)
+        }
+      }
+    }
+    t.setName("spark-dynamic-executor-allocation")
+    t.setDaemon(true)
+    t.start()
+  }
+
+  /**
+   * If the add time has expired, request new executors and refresh the add time.
+   * If the remove time for an existing executor has expired, kill the executor.
+   * This is factored out into its own method for testing.
+   */
+  private def schedule(): Unit = synchronized {
+    val now = clock.getTimeMillis
+    if (addTime != NOT_SET && now >= addTime) {
+      addExecutors()
+      logDebug(s"Starting timer to add more executors (to " +
+        s"expire in $sustainedSchedulerBacklogTimeout seconds)")
+      addTime += sustainedSchedulerBacklogTimeout * 1000
+    }
+
+    removeTimes.retain { case (executorId, expireTime) =>
+      val expired = now >= expireTime
+      if (expired) {
+        removeExecutor(executorId)
+      }
+      !expired
+    }
+  }
+
+  /**
+   * Request a number of executors from the cluster manager.
+   * If the cap on the number of executors is reached, give up and reset the
+   * number of executors to add next round instead of continuing to double it.
+   * Return the number actually requested.
+   */
+  private def addExecutors(): Int = synchronized {
+    // Do not request more executors if we have already reached the upper bound
+    val numExistingExecutors = executorIds.size + numExecutorsPending
+    if (numExistingExecutors >= maxNumExecutors) {
+      logDebug(s"Not adding executors because there are already ${executorIds.size} " +
+        s"registered and $numExecutorsPending pending executor(s) (limit $maxNumExecutors)")
+      numExecutorsToAdd = 1
+      return 0
+    }
+
+    // The number of executors needed to satisfy all pending tasks is the number of tasks pending
+    // divided by the number of tasks each executor can fit, rounded up.
+    val maxNumExecutorsPending =
+      (listener.totalPendingTasks() + tasksPerExecutor - 1) / tasksPerExecutor
+    if (numExecutorsPending >= maxNumExecutorsPending) {
+      logDebug(s"Not adding executors because there are already $numExecutorsPending " +
+        s"pending and pending tasks could only fill $maxNumExecutorsPending")
+      numExecutorsToAdd = 1
+      return 0
+    }
+
+    // It's never useful to request more executors than could satisfy all the pending tasks, so
+    // cap request at that amount.
+    // Also cap request with respect to the configured upper bound.
+    val maxNumExecutorsToAdd = math.min(
+      maxNumExecutorsPending - numExecutorsPending,
+      maxNumExecutors - numExistingExecutors)
+    assert(maxNumExecutorsToAdd > 0)
+
+    val actualNumExecutorsToAdd = math.min(numExecutorsToAdd, maxNumExecutorsToAdd)
+
+    val newTotalExecutors = numExistingExecutors + actualNumExecutorsToAdd
+    val addRequestAcknowledged = testing || client.requestExecutors(actualNumExecutorsToAdd)
+    if (addRequestAcknowledged) {
+      logInfo(s"Requesting $actualNumExecutorsToAdd new executor(s) because " +
+        s"tasks are backlogged (new desired total will be $newTotalExecutors)")
+      numExecutorsToAdd =
+        if (actualNumExecutorsToAdd == numExecutorsToAdd) numExecutorsToAdd * 2 else 1
+      numExecutorsPending += actualNumExecutorsToAdd
+      actualNumExecutorsToAdd
+    } else {
+      logWarning(s"Unable to reach the cluster manager " +
+        s"to request $actualNumExecutorsToAdd executors!")
+      0
+    }
+  }
+
+  /**
+   * Request the cluster manager to remove the given executor.
+   * Return whether the request is received.
+   */
+  private def removeExecutor(executorId: String): Boolean = synchronized {
+    // Do not kill the executor if we are not aware of it (should never happen)
+    if (!executorIds.contains(executorId)) {
+      logWarning(s"Attempted to remove unknown executor $executorId!")
+      return false
+    }
+
+    // Do not kill the executor again if it is already pending to be killed (should never happen)
+    if (executorsPendingToRemove.contains(executorId)) {
+      logWarning(s"Attempted to remove executor $executorId " +
+        s"when it is already pending to be removed!")
+      return false
+    }
+
+    // Do not kill the executor if we have already reached the lower bound
+    val numExistingExecutors = executorIds.size - executorsPendingToRemove.size
+    if (numExistingExecutors - 1 < minNumExecutors) {
+      logDebug(s"Not removing idle executor $executorId because there are only " +
+        s"$numExistingExecutors executor(s) left (limit $minNumExecutors)")
+      return false
+    }
+
+    // Send a request to the backend to kill this executor
+    val removeRequestAcknowledged = testing || client.killExecutor(executorId)
+    if (removeRequestAcknowledged) {
+      logInfo(s"Removing executor $executorId because it has been idle for " +
+        s"$executorIdleTimeout seconds (new desired total will be ${numExistingExecutors - 1})")
+      executorsPendingToRemove.add(executorId)
+      true
+    } else {
+      logWarning(s"Unable to reach the cluster manager to kill executor $executorId!")
+      false
+    }
+  }
+
+  /**
+   * Callback invoked when the specified executor has been added.
+   */
+  private def onExecutorAdded(executorId: String): Unit = synchronized {
+    if (!executorIds.contains(executorId)) {
+      executorIds.add(executorId)
+      // If an executor (call this executor X) is not removed because the lower bound
+      // has been reached, it will no longer be marked as idle. When new executors join,
+      // however, we are no longer at the lower bound, and so we must mark executor X
+      // as idle again so as not to forget that it is a candidate for removal. (see SPARK-4951)
+      executorIds.filter(listener.isExecutorIdle).foreach(onExecutorIdle)
+      logInfo(s"New executor $executorId has registered (new total is ${executorIds.size})")
+      if (numExecutorsPending > 0) {
+        numExecutorsPending -= 1
+        logDebug(s"Decremented number of pending executors ($numExecutorsPending left)")
+      }
+    } else {
+      logWarning(s"Duplicate executor $executorId has registered")
+    }
+  }
+
+  /**
+   * Callback invoked when the specified executor has been removed.
+   */
+  private def onExecutorRemoved(executorId: String): Unit = synchronized {
+    if (executorIds.contains(executorId)) {
+      executorIds.remove(executorId)
+      removeTimes.remove(executorId)
+      logInfo(s"Existing executor $executorId has been removed (new total is ${executorIds.size})")
+      if (executorsPendingToRemove.contains(executorId)) {
+        executorsPendingToRemove.remove(executorId)
+        logDebug(s"Executor $executorId is no longer pending to " +
+          s"be removed (${executorsPendingToRemove.size} left)")
+      }
+    } else {
+      logWarning(s"Unknown executor $executorId has been removed!")
+    }
+  }
+
+  /**
+   * Callback invoked when the scheduler receives new pending tasks.
+   * This sets a time in the future that decides when executors should be added
+   * if it is not already set.
+   */
+  private def onSchedulerBacklogged(): Unit = synchronized {
+    if (addTime == NOT_SET) {
+      logDebug(s"Starting timer to add executors because pending tasks " +
+        s"are building up (to expire in $schedulerBacklogTimeout seconds)")
+      addTime = clock.getTimeMillis + schedulerBacklogTimeout * 1000
+    }
+  }
+
+  /**
+   * Callback invoked when the scheduler queue is drained.
+   * This resets all variables used for adding executors.
+   */
+  private def onSchedulerQueueEmpty(): Unit = synchronized {
+    logDebug(s"Clearing timer to add executors because there are no more pending tasks")
+    addTime = NOT_SET
+    numExecutorsToAdd = 1
+  }
+
+  /**
+   * Callback invoked when the specified executor is no longer running any tasks.
+   * This sets a time in the future that decides when this executor should be removed if
+   * the executor is not already marked as idle.
+   */
+  private def onExecutorIdle(executorId: String): Unit = synchronized {
+    if (executorIds.contains(executorId)) {
+      if (!removeTimes.contains(executorId) && !executorsPendingToRemove.contains(executorId)) {
+        logDebug(s"Starting idle timer for $executorId because there are no more tasks " +
+          s"scheduled to run on the executor (to expire in $executorIdleTimeout seconds)")
+        removeTimes(executorId) = clock.getTimeMillis + executorIdleTimeout * 1000
+      }
+    } else {
+      logWarning(s"Attempted to mark unknown executor $executorId idle")
+    }
+  }
+
+  /**
+   * Callback invoked when the specified executor is now running a task.
+   * This resets all variables used for removing this executor.
+   */
+  private def onExecutorBusy(executorId: String): Unit = synchronized {
+    logDebug(s"Clearing idle timer for $executorId because it is now running a task")
+    removeTimes.remove(executorId)
+  }
+
+  /**
+   * A listener that notifies the given allocation manager of when to add and remove executors.
+   *
+   * This class is intentionally conservative in its assumptions about the relative ordering
+   * and consistency of events returned by the listener. For simplicity, it does not account
+   * for speculated tasks.
+   */
+  private class ExecutorAllocationListener extends SparkListener {
+
+    private val stageIdToNumTasks = new mutable.HashMap[Int, Int]
+    private val stageIdToTaskIndices = new mutable.HashMap[Int, mutable.HashSet[Int]]
+    private val executorIdToTaskIds = new mutable.HashMap[String, mutable.HashSet[Long]]
+
+    override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = {
+      val stageId = stageSubmitted.stageInfo.stageId
+      val numTasks = stageSubmitted.stageInfo.numTasks
+      allocationManager.synchronized {
+        stageIdToNumTasks(stageId) = numTasks
+        allocationManager.onSchedulerBacklogged()
+      }
+    }
+
+    override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = {
+      val stageId = stageCompleted.stageInfo.stageId
+      allocationManager.synchronized {
+        stageIdToNumTasks -= stageId
+        stageIdToTaskIndices -= stageId
+
+        // If this is the last stage with pending tasks, mark the scheduler queue as empty
+        // This is needed in case the stage is aborted for any reason
+        if (stageIdToNumTasks.isEmpty) {
+          allocationManager.onSchedulerQueueEmpty()
+        }
+      }
+    }
+
+    override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
+      val stageId = taskStart.stageId
+      val taskId = taskStart.taskInfo.taskId
+      val taskIndex = taskStart.taskInfo.index
+      val executorId = taskStart.taskInfo.executorId
+
+      allocationManager.synchronized {
+        // This guards against the race condition in which the `SparkListenerTaskStart`
+        // event is posted before the `SparkListenerBlockManagerAdded` event, which is
+        // possible because these events are posted in different threads. (see SPARK-4951)
+        if (!allocationManager.executorIds.contains(executorId)) {
+          allocationManager.onExecutorAdded(executorId)
+        }
+
+        // If this is the last pending task, mark the scheduler queue as empty
+        stageIdToTaskIndices.getOrElseUpdate(stageId, new mutable.HashSet[Int]) += taskIndex
+        val numTasksScheduled = stageIdToTaskIndices(stageId).size
+        val numTasksTotal = stageIdToNumTasks.getOrElse(stageId, -1)
+        if (numTasksScheduled == numTasksTotal) {
+          // No more pending tasks for this stage
+          stageIdToNumTasks -= stageId
+          if (stageIdToNumTasks.isEmpty) {
+            allocationManager.onSchedulerQueueEmpty()
+          }
+        }
+
+        // Mark the executor on which this task is scheduled as busy
+        executorIdToTaskIds.getOrElseUpdate(executorId, new mutable.HashSet[Long]) += taskId
+        allocationManager.onExecutorBusy(executorId)
+      }
+    }
+
+    override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
+      val executorId = taskEnd.taskInfo.executorId
+      val taskId = taskEnd.taskInfo.taskId
+      allocationManager.synchronized {
+        // If the executor is no longer running scheduled any tasks, mark it as idle
+        if (executorIdToTaskIds.contains(executorId)) {
+          executorIdToTaskIds(executorId) -= taskId
+          if (executorIdToTaskIds(executorId).isEmpty) {
+            executorIdToTaskIds -= executorId
+            allocationManager.onExecutorIdle(executorId)
+          }
+        }
+      }
+    }
+
+    override def onBlockManagerAdded(blockManagerAdded: SparkListenerBlockManagerAdded): Unit = {
+      val executorId = blockManagerAdded.blockManagerId.executorId
+      if (executorId != SparkContext.DRIVER_IDENTIFIER) {
+        // This guards against the race condition in which the `SparkListenerTaskStart`
+        // event is posted before the `SparkListenerBlockManagerAdded` event, which is
+        // possible because these events are posted in different threads. (see SPARK-4951)
+        if (!allocationManager.executorIds.contains(executorId)) {
+          allocationManager.onExecutorAdded(executorId)
+        }
+      }
+    }
+
+    override def onBlockManagerRemoved(
+        blockManagerRemoved: SparkListenerBlockManagerRemoved): Unit = {
+      allocationManager.onExecutorRemoved(blockManagerRemoved.blockManagerId.executorId)
+    }
+
+    /**
+     * An estimate of the total number of pending tasks remaining for currently running stages. Does
+     * not account for tasks which may have failed and been resubmitted.
+     *
+     * Note: This is not thread-safe without the caller owning the `allocationManager` lock.
+     */
+    def totalPendingTasks(): Int = {
+      stageIdToNumTasks.map { case (stageId, numTasks) =>
+        numTasks - stageIdToTaskIndices.get(stageId).map(_.size).getOrElse(0)
+      }.sum
+    }
+
+    /**
+     * Return true if an executor is not currently running a task, and false otherwise.
+     *
+     * Note: This is not thread-safe without the caller owning the `allocationManager` lock.
+     */
+    def isExecutorIdle(executorId: String): Boolean = {
+      !executorIdToTaskIds.contains(executorId)
+    }
+  }
+
+}
+
+private object ExecutorAllocationManager {
+  val NOT_SET = Long.MaxValue
+}
+
+/**
+ * An abstract clock for measuring elapsed time.
+ */
+private trait Clock {
+  def getTimeMillis: Long
+}
+
+/**
+ * A clock backed by a monotonically increasing time source.
+ * The time returned by this clock does not correspond to any notion of wall-clock time.
+ */
+private class RealClock extends Clock {
+  override def getTimeMillis: Long = System.nanoTime / (1000 * 1000)
+}
+
+/**
+ * A clock that allows the caller to customize the time.
+ * This is used mainly for testing.
+ */
+private class TestClock(startTimeMillis: Long) extends Clock {
+  private var time: Long = startTimeMillis
+  override def getTimeMillis: Long = time
+  def tick(ms: Long): Unit = { time += ms }
+}
diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala b/core/src/main/scala/org/apache/spark/FutureAction.scala
index d5c8f9d76c476..e97a7375a267b 100644
--- a/core/src/main/scala/org/apache/spark/FutureAction.scala
+++ b/core/src/main/scala/org/apache/spark/FutureAction.scala
@@ -210,7 +210,11 @@ class ComplexFutureAction[T] extends FutureAction[T] {
       } catch {
         case e: Exception => p.failure(e)
       } finally {
-        thread = null
+        // This lock guarantees when calling `thread.interrupt()` in `cancel`,
+        // thread won't be set to null.
+        ComplexFutureAction.this.synchronized {
+          thread = null
+        }
       }
     }
     this
diff --git a/core/src/main/scala/org/apache/spark/HttpFileServer.scala b/core/src/main/scala/org/apache/spark/HttpFileServer.scala
index edc3889c9ae51..3f33332a81eaf 100644
--- a/core/src/main/scala/org/apache/spark/HttpFileServer.scala
+++ b/core/src/main/scala/org/apache/spark/HttpFileServer.scala
@@ -24,6 +24,7 @@ import com.google.common.io.Files
 import org.apache.spark.util.Utils
 
 private[spark] class HttpFileServer(
+    conf: SparkConf,
     securityManager: SecurityManager,
     requestedPort: Int = 0)
   extends Logging {
@@ -35,13 +36,13 @@ private[spark] class HttpFileServer(
   var serverUri : String = null
 
   def initialize() {
-    baseDir = Utils.createTempDir()
+    baseDir = Utils.createTempDir(Utils.getLocalDir(conf), "httpd")
     fileDir = new File(baseDir, "files")
     jarDir = new File(baseDir, "jars")
     fileDir.mkdir()
     jarDir.mkdir()
     logInfo("HTTP File server directory is " + baseDir)
-    httpServer = new HttpServer(baseDir, securityManager, requestedPort, "HTTP file server")
+    httpServer = new HttpServer(conf, baseDir, securityManager, requestedPort, "HTTP file server")
     httpServer.start()
     serverUri = httpServer.uri
     logDebug("HTTP file server started at: " + serverUri)
diff --git a/core/src/main/scala/org/apache/spark/HttpServer.scala b/core/src/main/scala/org/apache/spark/HttpServer.scala
index 912558d0cab7d..09a9ccc226721 100644
--- a/core/src/main/scala/org/apache/spark/HttpServer.scala
+++ b/core/src/main/scala/org/apache/spark/HttpServer.scala
@@ -19,6 +19,7 @@ package org.apache.spark
 
 import java.io.File
 
+import org.eclipse.jetty.server.ssl.SslSocketConnector
 import org.eclipse.jetty.util.security.{Constraint, Password}
 import org.eclipse.jetty.security.authentication.DigestAuthenticator
 import org.eclipse.jetty.security.{ConstraintMapping, ConstraintSecurityHandler, HashLoginService}
@@ -42,6 +43,7 @@ private[spark] class ServerStateException(message: String) extends Exception(mes
  * around a Jetty server.
  */
 private[spark] class HttpServer(
+    conf: SparkConf,
     resourceBase: File,
     securityManager: SecurityManager,
     requestedPort: Int = 0,
@@ -57,7 +59,7 @@ private[spark] class HttpServer(
     } else {
       logInfo("Starting HTTP Server")
       val (actualServer, actualPort) =
-        Utils.startServiceOnPort[Server](requestedPort, doStart, serverName)
+        Utils.startServiceOnPort[Server](requestedPort, doStart, conf, serverName)
       server = actualServer
       port = actualPort
     }
@@ -71,7 +73,10 @@ private[spark] class HttpServer(
    */
   private def doStart(startPort: Int): (Server, Int) = {
     val server = new Server()
-    val connector = new SocketConnector
+
+    val connector = securityManager.fileServerSSLOptions.createJettySslContextFactory()
+      .map(new SslSocketConnector(_)).getOrElse(new SocketConnector)
+
     connector.setMaxIdleTime(60 * 1000)
     connector.setSoLingerTime(-1)
     connector.setPort(startPort)
@@ -148,13 +153,14 @@ private[spark] class HttpServer(
   }
 
   /**
-   * Get the URI of this HTTP server (http://host:port)
+   * Get the URI of this HTTP server (http://host:port or https://host:port)
    */
   def uri: String = {
     if (server == null) {
       throw new ServerStateException("Server is not started")
     } else {
-      "http://" + Utils.localIpAddress + ":" + port
+      val scheme = if (securityManager.fileServerSSLOptions.enabled) "https" else "http"
+      s"$scheme://${Utils.localIpAddress}:$port"
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/Logging.scala b/core/src/main/scala/org/apache/spark/Logging.scala
index d4f2624061e35..419d093d55643 100644
--- a/core/src/main/scala/org/apache/spark/Logging.scala
+++ b/core/src/main/scala/org/apache/spark/Logging.scala
@@ -118,15 +118,17 @@ trait Logging {
     // org.slf4j.impl.Log4jLoggerFactory, from the log4j 2.0 binding, currently
     // org.apache.logging.slf4j.Log4jLoggerFactory
     val usingLog4j12 = "org.slf4j.impl.Log4jLoggerFactory".equals(binderClass)
-    val log4j12Initialized = LogManager.getRootLogger.getAllAppenders.hasMoreElements
-    if (!log4j12Initialized && usingLog4j12) {
-      val defaultLogProps = "org/apache/spark/log4j-defaults.properties"
-      Option(Utils.getSparkClassLoader.getResource(defaultLogProps)) match {
-        case Some(url) =>
-          PropertyConfigurator.configure(url)
-          System.err.println(s"Using Spark's default log4j profile: $defaultLogProps")
-        case None =>
-          System.err.println(s"Spark was unable to load $defaultLogProps")
+    if (usingLog4j12) {
+      val log4j12Initialized = LogManager.getRootLogger.getAllAppenders.hasMoreElements
+      if (!log4j12Initialized) {
+        val defaultLogProps = "org/apache/spark/log4j-defaults.properties"
+        Option(Utils.getSparkClassLoader.getResource(defaultLogProps)) match {
+          case Some(url) =>
+            PropertyConfigurator.configure(url)
+            System.err.println(s"Using Spark's default log4j profile: $defaultLogProps")
+          case None =>
+            System.err.println(s"Spark was unable to load $defaultLogProps")
+        }
       }
     }
     Logging.initialized = true
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 4cb0bd4142435..6e4edc7c80d7a 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -72,20 +72,22 @@ private[spark] class MapOutputTrackerMasterActor(tracker: MapOutputTrackerMaster
 /**
  * Class that keeps track of the location of the map output of
  * a stage. This is abstract because different versions of MapOutputTracker
- * (driver and worker) use different HashMap to store its metadata.
+ * (driver and executor) use different HashMap to store its metadata.
  */
 private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging {
   private val timeout = AkkaUtils.askTimeout(conf)
+  private val retryAttempts = AkkaUtils.numRetries(conf)
+  private val retryIntervalMs = AkkaUtils.retryWaitMs(conf)
 
   /** Set to the MapOutputTrackerActor living on the driver. */
   var trackerActor: ActorRef = _
 
   /**
-   * This HashMap has different behavior for the master and the workers.
+   * This HashMap has different behavior for the driver and the executors.
    *
-   * On the master, it serves as the source of map outputs recorded from ShuffleMapTasks.
-   * On the workers, it simply serves as a cache, in which a miss triggers a fetch from the
-   * master's corresponding HashMap.
+   * On the driver, it serves as the source of map outputs recorded from ShuffleMapTasks.
+   * On the executors, it simply serves as a cache, in which a miss triggers a fetch from the
+   * driver's corresponding HashMap.
    *
    * Note: because mapStatuses is accessed concurrently, subclasses should make sure it's a
    * thread-safe map.
@@ -99,7 +101,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
   protected var epoch: Long = 0
   protected val epochLock = new AnyRef
 
-  /** Remembers which map output locations are currently being fetched on a worker. */
+  /** Remembers which map output locations are currently being fetched on an executor. */
   private val fetching = new HashSet[Int]
 
   /**
@@ -108,8 +110,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
    */
   protected def askTracker(message: Any): Any = {
     try {
-      val future = trackerActor.ask(message)(timeout)
-      Await.result(future, timeout)
+      AkkaUtils.askWithReply(message, trackerActor, retryAttempts, retryIntervalMs, timeout)
     } catch {
       case e: Exception =>
         logError("Error communicating with MapOutputTracker", e)
@@ -136,14 +137,12 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
       logInfo("Don't have map outputs for shuffle " + shuffleId + ", fetching them")
       var fetchedStatuses: Array[MapStatus] = null
       fetching.synchronized {
-        if (fetching.contains(shuffleId)) {
-          // Someone else is fetching it; wait for them to be done
-          while (fetching.contains(shuffleId)) {
-            try {
-              fetching.wait()
-            } catch {
-              case e: InterruptedException =>
-            }
+        // Someone else is fetching it; wait for them to be done
+        while (fetching.contains(shuffleId)) {
+          try {
+            fetching.wait()
+          } catch {
+            case e: InterruptedException =>
           }
         }
 
@@ -178,6 +177,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
           return MapOutputTracker.convertMapStatuses(shuffleId, reduceId, fetchedStatuses)
         }
       } else {
+        logError("Missing all output locations for shuffle " + shuffleId)
         throw new MetadataFetchFailedException(
           shuffleId, reduceId, "Missing all output locations for shuffle " + shuffleId)
       }
@@ -197,8 +197,8 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
 
   /**
    * Called from executors to update the epoch number, potentially clearing old outputs
-   * because of a fetch failure. Each worker task calls this with the latest epoch
-   * number on the master at the time it was created.
+   * because of a fetch failure. Each executor task calls this with the latest epoch
+   * number on the driver at the time it was created.
    */
   def updateEpoch(newEpoch: Long) {
     epochLock.synchronized {
@@ -230,7 +230,7 @@ private[spark] class MapOutputTrackerMaster(conf: SparkConf)
   private var cacheEpoch = epoch
 
   /**
-   * Timestamp based HashMap for storing mapStatuses and cached serialized statuses in the master,
+   * Timestamp based HashMap for storing mapStatuses and cached serialized statuses in the driver,
    * so that statuses are dropped only by explicit de-registering or by TTL-based cleaning (if set).
    * Other than these two scenarios, nothing should be dropped from this HashMap.
    */
@@ -340,7 +340,7 @@ private[spark] class MapOutputTrackerMaster(conf: SparkConf)
 }
 
 /**
- * MapOutputTracker for the workers, which fetches map output information from the driver's
+ * MapOutputTracker for the executors, which fetches map output information from the driver's
  * MapOutputTrackerMaster.
  */
 private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTracker(conf) {
@@ -348,7 +348,7 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
     new ConcurrentHashMap[Int, Array[MapStatus]]
 }
 
-private[spark] object MapOutputTracker {
+private[spark] object MapOutputTracker extends Logging {
 
   // Serialize an array of map output locations into an efficient byte format so that we can send
   // it to reduce tasks. We do this by compressing the serialized bytes using GZIP. They will
@@ -381,6 +381,7 @@ private[spark] object MapOutputTracker {
     statuses.map {
       status =>
         if (status == null) {
+          logError("Missing an output location for shuffle " + shuffleId)
           throw new MetadataFetchFailedException(
             shuffleId, reduceId, "Missing an output location for shuffle " + shuffleId)
         } else {
diff --git a/core/src/main/scala/org/apache/spark/Partition.scala b/core/src/main/scala/org/apache/spark/Partition.scala
index 27892dbd2a0bc..dd3f28e4197e3 100644
--- a/core/src/main/scala/org/apache/spark/Partition.scala
+++ b/core/src/main/scala/org/apache/spark/Partition.scala
@@ -18,11 +18,11 @@
 package org.apache.spark
 
 /**
- * A partition of an RDD.
+ * An identifier for a partition in an RDD.
  */
 trait Partition extends Serializable {
   /**
-   * Get the split's index within its parent RDD
+   * Get the partition's index within its parent RDD
    */
   def index: Int
 
diff --git a/core/src/main/scala/org/apache/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala
index 37053bb6f37ad..e53a78ead2c0e 100644
--- a/core/src/main/scala/org/apache/spark/Partitioner.scala
+++ b/core/src/main/scala/org/apache/spark/Partitioner.scala
@@ -204,7 +204,7 @@ class RangePartitioner[K : Ordering : ClassTag, V](
   }
 
   @throws(classOf[IOException])
-  private def writeObject(out: ObjectOutputStream) {
+  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
     val sfactory = SparkEnv.get.serializer
     sfactory match {
       case js: JavaSerializer => out.defaultWriteObject()
@@ -222,7 +222,7 @@ class RangePartitioner[K : Ordering : ClassTag, V](
   }
 
   @throws(classOf[IOException])
-  private def readObject(in: ObjectInputStream) {
+  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
     val sfactory = SparkEnv.get.serializer
     sfactory match {
       case js: JavaSerializer => in.defaultReadObject()
diff --git a/core/src/main/scala/org/apache/spark/SSLOptions.scala b/core/src/main/scala/org/apache/spark/SSLOptions.scala
new file mode 100644
index 0000000000000..2cdc167f85af0
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/SSLOptions.scala
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import java.io.File
+
+import com.typesafe.config.{Config, ConfigFactory, ConfigValueFactory}
+import org.eclipse.jetty.util.ssl.SslContextFactory
+
+/**
+ * SSLOptions class is a common container for SSL configuration options. It offers methods to
+ * generate specific objects to configure SSL for different communication protocols.
+ *
+ * SSLOptions is intended to provide the maximum common set of SSL settings, which are supported
+ * by the protocol, which it can generate the configuration for. Since Akka doesn't support client
+ * authentication with SSL, SSLOptions cannot support it either.
+ *
+ * @param enabled             enables or disables SSL; if it is set to false, the rest of the
+ *                            settings are disregarded
+ * @param keyStore            a path to the key-store file
+ * @param keyStorePassword    a password to access the key-store file
+ * @param keyPassword         a password to access the private key in the key-store
+ * @param trustStore          a path to the trust-store file
+ * @param trustStorePassword  a password to access the trust-store file
+ * @param protocol            SSL protocol (remember that SSLv3 was compromised) supported by Java
+ * @param enabledAlgorithms   a set of encryption algorithms to use
+ */
+private[spark] case class SSLOptions(
+    enabled: Boolean = false,
+    keyStore: Option[File] = None,
+    keyStorePassword: Option[String] = None,
+    keyPassword: Option[String] = None,
+    trustStore: Option[File] = None,
+    trustStorePassword: Option[String] = None,
+    protocol: Option[String] = None,
+    enabledAlgorithms: Set[String] = Set.empty) {
+
+  /**
+   * Creates a Jetty SSL context factory according to the SSL settings represented by this object.
+   */
+  def createJettySslContextFactory(): Option[SslContextFactory] = {
+    if (enabled) {
+      val sslContextFactory = new SslContextFactory()
+
+      keyStore.foreach(file => sslContextFactory.setKeyStorePath(file.getAbsolutePath))
+      trustStore.foreach(file => sslContextFactory.setTrustStore(file.getAbsolutePath))
+      keyStorePassword.foreach(sslContextFactory.setKeyStorePassword)
+      trustStorePassword.foreach(sslContextFactory.setTrustStorePassword)
+      keyPassword.foreach(sslContextFactory.setKeyManagerPassword)
+      protocol.foreach(sslContextFactory.setProtocol)
+      sslContextFactory.setIncludeCipherSuites(enabledAlgorithms.toSeq: _*)
+
+      Some(sslContextFactory)
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Creates an Akka configuration object which contains all the SSL settings represented by this
+   * object. It can be used then to compose the ultimate Akka configuration.
+   */
+  def createAkkaConfig: Option[Config] = {
+    import scala.collection.JavaConversions._
+    if (enabled) {
+      Some(ConfigFactory.empty()
+        .withValue("akka.remote.netty.tcp.security.key-store",
+          ConfigValueFactory.fromAnyRef(keyStore.map(_.getAbsolutePath).getOrElse("")))
+        .withValue("akka.remote.netty.tcp.security.key-store-password",
+          ConfigValueFactory.fromAnyRef(keyStorePassword.getOrElse("")))
+        .withValue("akka.remote.netty.tcp.security.trust-store",
+          ConfigValueFactory.fromAnyRef(trustStore.map(_.getAbsolutePath).getOrElse("")))
+        .withValue("akka.remote.netty.tcp.security.trust-store-password",
+          ConfigValueFactory.fromAnyRef(trustStorePassword.getOrElse("")))
+        .withValue("akka.remote.netty.tcp.security.key-password",
+          ConfigValueFactory.fromAnyRef(keyPassword.getOrElse("")))
+        .withValue("akka.remote.netty.tcp.security.random-number-generator",
+          ConfigValueFactory.fromAnyRef(""))
+        .withValue("akka.remote.netty.tcp.security.protocol",
+          ConfigValueFactory.fromAnyRef(protocol.getOrElse("")))
+        .withValue("akka.remote.netty.tcp.security.enabled-algorithms",
+          ConfigValueFactory.fromIterable(enabledAlgorithms.toSeq))
+        .withValue("akka.remote.netty.tcp.enable-ssl",
+          ConfigValueFactory.fromAnyRef(true)))
+    } else {
+      None
+    }
+  }
+
+  /** Returns a string representation of this SSLOptions with all the passwords masked. */
+  override def toString: String = s"SSLOptions{enabled=$enabled, " +
+      s"keyStore=$keyStore, keyStorePassword=${keyStorePassword.map(_ => "xxx")}, " +
+      s"trustStore=$trustStore, trustStorePassword=${trustStorePassword.map(_ => "xxx")}, " +
+      s"protocol=$protocol, enabledAlgorithms=$enabledAlgorithms}"
+
+}
+
+private[spark] object SSLOptions extends Logging {
+
+  /** Resolves SSLOptions settings from a given Spark configuration object at a given namespace.
+    *
+    * The following settings are allowed:
+    * $ - `[ns].enabled` - `true` or `false`, to enable or disable SSL respectively
+    * $ - `[ns].keyStore` - a path to the key-store file; can be relative to the current directory
+    * $ - `[ns].keyStorePassword` - a password to the key-store file
+    * $ - `[ns].keyPassword` - a password to the private key
+    * $ - `[ns].trustStore` - a path to the trust-store file; can be relative to the current
+    *                         directory
+    * $ - `[ns].trustStorePassword` - a password to the trust-store file
+    * $ - `[ns].protocol` - a protocol name supported by a particular Java version
+    * $ - `[ns].enabledAlgorithms` - a comma separated list of ciphers
+    *
+    * For a list of protocols and ciphers supported by particular Java versions, you may go to
+    * [[https://blogs.oracle.com/java-platform-group/entry/diagnosing_tls_ssl_and_https Oracle
+    * blog page]].
+    *
+    * You can optionally specify the default configuration. If you do, for each setting which is
+    * missing in SparkConf, the corresponding setting is used from the default configuration.
+    *
+    * @param conf Spark configuration object where the settings are collected from
+    * @param ns the namespace name
+    * @param defaults the default configuration
+    * @return [[org.apache.spark.SSLOptions]] object
+    */
+  def parse(conf: SparkConf, ns: String, defaults: Option[SSLOptions] = None): SSLOptions = {
+    val enabled = conf.getBoolean(s"$ns.enabled", defaultValue = defaults.exists(_.enabled))
+
+    val keyStore = conf.getOption(s"$ns.keyStore").map(new File(_))
+        .orElse(defaults.flatMap(_.keyStore))
+
+    val keyStorePassword = conf.getOption(s"$ns.keyStorePassword")
+        .orElse(defaults.flatMap(_.keyStorePassword))
+
+    val keyPassword = conf.getOption(s"$ns.keyPassword")
+        .orElse(defaults.flatMap(_.keyPassword))
+
+    val trustStore = conf.getOption(s"$ns.trustStore").map(new File(_))
+        .orElse(defaults.flatMap(_.trustStore))
+
+    val trustStorePassword = conf.getOption(s"$ns.trustStorePassword")
+        .orElse(defaults.flatMap(_.trustStorePassword))
+
+    val protocol = conf.getOption(s"$ns.protocol")
+        .orElse(defaults.flatMap(_.protocol))
+
+    val enabledAlgorithms = conf.getOption(s"$ns.enabledAlgorithms")
+        .map(_.split(",").map(_.trim).filter(_.nonEmpty).toSet)
+        .orElse(defaults.map(_.enabledAlgorithms))
+        .getOrElse(Set.empty)
+
+    new SSLOptions(
+      enabled,
+      keyStore,
+      keyStorePassword,
+      keyPassword,
+      trustStore,
+      trustStorePassword,
+      protocol,
+      enabledAlgorithms)
+  }
+
+}
+
diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index 0e0f1a7b2377e..88d35a4bacc6e 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -18,10 +18,15 @@
 package org.apache.spark
 
 import java.net.{Authenticator, PasswordAuthentication}
+import java.security.KeyStore
+import java.security.cert.X509Certificate
+import javax.net.ssl._
 
+import com.google.common.io.Files
 import org.apache.hadoop.io.Text
 
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.network.sasl.SecretKeyHolder
 
 /**
  * Spark class responsible for security.
@@ -54,7 +59,7 @@ import org.apache.spark.deploy.SparkHadoopUtil
  * Spark also has a set of admin acls (`spark.admin.acls`) which is a set of users/administrators
  * who always have permission to view or modify the Spark application.
  *
- * Spark does not currently support encryption after authentication.
+ * Starting from version 1.3, Spark has partial support for encrypted connections with SSL.
  *
  * At this point spark has multiple communication protocols that need to be secured and
  * different underlying mechanisms are used depending on the protocol:
@@ -66,8 +71,9 @@ import org.apache.spark.deploy.SparkHadoopUtil
  *            to connect to the server. There is no control of the underlying
  *            authentication mechanism so its not clear if the password is passed in
  *            plaintext or uses DIGEST-MD5 or some other mechanism.
- *            Akka also has an option to turn on SSL, this option is not currently supported
- *            but we could add a configuration option in the future.
+ *
+ *            Akka also has an option to turn on SSL, this option is currently supported (see
+ *            the details below).
  *
  *  - HTTP for broadcast and file server (via HttpServer) ->  Spark currently uses Jetty
  *            for the HttpServer. Jetty supports multiple authentication mechanisms -
@@ -76,15 +82,16 @@ import org.apache.spark.deploy.SparkHadoopUtil
  *            to authenticate using DIGEST-MD5 via a single user and the shared secret.
  *            Since we are using DIGEST-MD5, the shared secret is not passed on the wire
  *            in plaintext.
- *            We currently do not support SSL (https), but Jetty can be configured to use it
- *            so we could add a configuration option for this in the future.
+ *
+ *            We currently support SSL (https) for this communication protocol (see the details
+ *            below).
  *
  *            The Spark HttpServer installs the HashLoginServer and configures it to DIGEST-MD5.
  *            Any clients must specify the user and password. There is a default
  *            Authenticator installed in the SecurityManager to how it does the authentication
  *            and in this case gets the user name and password from the request.
  *
- *  - ConnectionManager -> The Spark ConnectionManager uses java nio to asynchronously
+ *  - BlockTransferService -> The Spark BlockTransferServices uses java nio to asynchronously
  *            exchange messages.  For this we use the Java SASL
  *            (Simple Authentication and Security Layer) API and again use DIGEST-MD5
  *            as the authentication mechanism. This means the shared secret is not passed
@@ -92,31 +99,35 @@ import org.apache.spark.deploy.SparkHadoopUtil
  *            Note that SASL is pluggable as to what mechanism it uses.  We currently use
  *            DIGEST-MD5 but this could be changed to use Kerberos or other in the future.
  *            Spark currently supports "auth" for the quality of protection, which means
- *            the connection is not supporting integrity or privacy protection (encryption)
+ *            the connection does not support integrity or privacy protection (encryption)
  *            after authentication. SASL also supports "auth-int" and "auth-conf" which
- *            SPARK could be support in the future to allow the user to specify the quality
+ *            SPARK could support in the future to allow the user to specify the quality
  *            of protection they want. If we support those, the messages will also have to
  *            be wrapped and unwrapped via the SaslServer/SaslClient.wrap/unwrap API's.
  *
- *            Since the connectionManager does asynchronous messages passing, the SASL
+ *            Since the NioBlockTransferService does asynchronous messages passing, the SASL
  *            authentication is a bit more complex. A ConnectionManager can be both a client
- *            and a Server, so for a particular connection is has to determine what to do.
+ *            and a Server, so for a particular connection it has to determine what to do.
  *            A ConnectionId was added to be able to track connections and is used to
  *            match up incoming messages with connections waiting for authentication.
- *            The ConnectionManager tracks all the sendingConnections using the ConnectionId
- *            and waits for the response from the server and does the handshake before sending
+ *            The ConnectionManager tracks all the sendingConnections using the ConnectionId,
+ *            waits for the response from the server, and does the handshake before sending
  *            the real message.
  *
+ *            The NettyBlockTransferService ensures that SASL authentication is performed
+ *            synchronously prior to any other communication on a connection. This is done in
+ *            SaslClientBootstrap on the client side and SaslRpcHandler on the server side.
+ *
  *  - HTTP for the Spark UI -> the UI was changed to use servlets so that javax servlet filters
  *            can be used. Yarn requires a specific AmIpFilter be installed for security to work
- *            properly. For non-Yarn deployments, users can write a filter to go through a
- *            companies normal login service. If an authentication filter is in place then the
+ *            properly. For non-Yarn deployments, users can write a filter to go through their
+ *            organization's normal login service. If an authentication filter is in place then the
  *            SparkUI can be configured to check the logged in user against the list of users who
  *            have view acls to see if that user is authorized.
  *            The filters can also be used for many different purposes. For instance filters
  *            could be used for logging, encryption, or compression.
  *
- *  The exact mechanisms used to generate/distributed the shared secret is deployment specific.
+ *  The exact mechanisms used to generate/distribute the shared secret are deployment-specific.
  *
  *  For Yarn deployments, the secret is automatically generated using the Akka remote
  *  Crypt.generateSecureCookie() API. The secret is placed in the Hadoop UGI which gets passed
@@ -133,21 +144,52 @@ import org.apache.spark.deploy.SparkHadoopUtil
  *  All the nodes (Master and Workers) and the applications need to have the same shared secret.
  *  This again is not ideal as one user could potentially affect another users application.
  *  This should be enhanced in the future to provide better protection.
- *  If the UI needs to be secured the user needs to install a javax servlet filter to do the
+ *  If the UI needs to be secure, the user needs to install a javax servlet filter to do the
  *  authentication. Spark will then use that user to compare against the view acls to do
  *  authorization. If not filter is in place the user is generally null and no authorization
  *  can take place.
+ *
+ *  Connection encryption (SSL) configuration is organized hierarchically. The user can configure
+ *  the default SSL settings which will be used for all the supported communication protocols unless
+ *  they are overwritten by protocol specific settings. This way the user can easily provide the
+ *  common settings for all the protocols without disabling the ability to configure each one
+ *  individually.
+ *
+ *  All the SSL settings like `spark.ssl.xxx` where `xxx` is a particular configuration property,
+ *  denote the global configuration for all the supported protocols. In order to override the global
+ *  configuration for the particular protocol, the properties must be overwritten in the
+ *  protocol-specific namespace. Use `spark.ssl.yyy.xxx` settings to overwrite the global
+ *  configuration for particular protocol denoted by `yyy`. Currently `yyy` can be either `akka` for
+ *  Akka based connections or `fs` for broadcast and file server.
+ *
+ *  Refer to [[org.apache.spark.SSLOptions]] documentation for the list of
+ *  options that can be specified.
+ *
+ *  SecurityManager initializes SSLOptions objects for different protocols separately. SSLOptions
+ *  object parses Spark configuration at a given namespace and builds the common representation
+ *  of SSL settings. SSLOptions is then used to provide protocol-specific configuration like
+ *  TypeSafe configuration for Akka or SSLContextFactory for Jetty.
+ *
+ *  SSL must be configured on each node and configured for each component involved in
+ *  communication using the particular protocol. In YARN clusters, the key-store can be prepared on
+ *  the client side then distributed and used by the executors as the part of the application
+ *  (YARN allows the user to deploy files before the application is started).
+ *  In standalone deployment, the user needs to provide key-stores and configuration
+ *  options for master and workers. In this mode, the user may allow the executors to use the SSL
+ *  settings inherited from the worker which spawned that executor. It can be accomplished by
+ *  setting `spark.ssl.useNodeLocalConf` to `true`.
  */
 
-private[spark] class SecurityManager(sparkConf: SparkConf) extends Logging {
+private[spark] class SecurityManager(sparkConf: SparkConf)
+  extends Logging with SecretKeyHolder {
 
   // key used to store the spark secret in the Hadoop UGI
   private val sparkSecretLookupKey = "sparkCookie"
 
   private val authOn = sparkConf.getBoolean("spark.authenticate", false)
   // keep spark.ui.acls.enable for backwards compatibility with 1.0
-  private var aclsOn = sparkConf.getOption("spark.acls.enable").getOrElse(
-    sparkConf.get("spark.ui.acls.enable", "false")).toBoolean
+  private var aclsOn =
+    sparkConf.getBoolean("spark.acls.enable", sparkConf.getBoolean("spark.ui.acls.enable", false))
 
   // admin acls should be set before view or modify acls
   private var adminAcls: Set[String] =
@@ -191,6 +233,57 @@ private[spark] class SecurityManager(sparkConf: SparkConf) extends Logging {
     )
   }
 
+  // the default SSL configuration - it will be used by all communication layers unless overwritten
+  private val defaultSSLOptions = SSLOptions.parse(sparkConf, "spark.ssl", defaults = None)
+
+  // SSL configuration for different communication layers - they can override the default
+  // configuration at a specified namespace. The namespace *must* start with spark.ssl.
+  val fileServerSSLOptions = SSLOptions.parse(sparkConf, "spark.ssl.fs", Some(defaultSSLOptions))
+  val akkaSSLOptions = SSLOptions.parse(sparkConf, "spark.ssl.akka", Some(defaultSSLOptions))
+
+  logDebug(s"SSLConfiguration for file server: $fileServerSSLOptions")
+  logDebug(s"SSLConfiguration for Akka: $akkaSSLOptions")
+
+  val (sslSocketFactory, hostnameVerifier) = if (fileServerSSLOptions.enabled) {
+    val trustStoreManagers =
+      for (trustStore <- fileServerSSLOptions.trustStore) yield {
+        val input = Files.asByteSource(fileServerSSLOptions.trustStore.get).openStream()
+
+        try {
+          val ks = KeyStore.getInstance(KeyStore.getDefaultType)
+          ks.load(input, fileServerSSLOptions.trustStorePassword.get.toCharArray)
+
+          val tmf = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm)
+          tmf.init(ks)
+          tmf.getTrustManagers
+        } finally {
+          input.close()
+        }
+      }
+
+    lazy val credulousTrustStoreManagers = Array({
+      logWarning("Using 'accept-all' trust manager for SSL connections.")
+      new X509TrustManager {
+        override def getAcceptedIssuers: Array[X509Certificate] = null
+
+        override def checkClientTrusted(x509Certificates: Array[X509Certificate], s: String) {}
+
+        override def checkServerTrusted(x509Certificates: Array[X509Certificate], s: String) {}
+      }: TrustManager
+    })
+
+    val sslContext = SSLContext.getInstance(fileServerSSLOptions.protocol.getOrElse("Default"))
+    sslContext.init(null, trustStoreManagers.getOrElse(credulousTrustStoreManagers), null)
+
+    val hostVerifier = new HostnameVerifier {
+      override def verify(s: String, sslSession: SSLSession): Boolean = true
+    }
+
+    (Some(sslContext.getSocketFactory), Some(hostVerifier))
+  } else {
+    (None, None)
+  }
+
   /**
    * Split a comma separated String, filter out any empty items, and return a Set of strings
    */
@@ -337,4 +430,8 @@ private[spark] class SecurityManager(sparkConf: SparkConf) extends Logging {
    * @return the secret key as a String if authentication is enabled, otherwise returns null
    */
   def getSecretKey(): String = secretKey
+
+  // Default SecurityManager only has a single secret key, so ignore appId.
+  override def getSaslUser(appId: String): String = getSaslUser()
+  override def getSecretKey(appId: String): String = getSecretKey()
 }
diff --git a/core/src/main/scala/org/apache/spark/SerializableWritable.scala b/core/src/main/scala/org/apache/spark/SerializableWritable.scala
index e50b9ac2291f9..55cb25946c2ad 100644
--- a/core/src/main/scala/org/apache/spark/SerializableWritable.scala
+++ b/core/src/main/scala/org/apache/spark/SerializableWritable.scala
@@ -24,18 +24,19 @@ import org.apache.hadoop.io.ObjectWritable
 import org.apache.hadoop.io.Writable
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.util.Utils
 
 @DeveloperApi
 class SerializableWritable[T <: Writable](@transient var t: T) extends Serializable {
   def value = t
   override def toString = t.toString
 
-  private def writeObject(out: ObjectOutputStream) {
+  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
     out.defaultWriteObject()
     new ObjectWritable(t).write(out)
   }
 
-  private def readObject(in: ObjectInputStream) {
+  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
     in.defaultReadObject()
     val ow = new ObjectWritable()
     ow.setConf(new Configuration())
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index dbbcc23305c50..13aa9960ac33a 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -17,9 +17,13 @@
 
 package org.apache.spark
 
+import java.util.concurrent.ConcurrentHashMap
+
 import scala.collection.JavaConverters._
-import scala.collection.mutable.{HashMap, LinkedHashSet}
+import scala.collection.mutable.LinkedHashSet
+
 import org.apache.spark.serializer.KryoSerializer
+import org.apache.spark.util.Utils
 
 /**
  * Configuration for a Spark application. Used to set various Spark parameters as key-value pairs.
@@ -46,12 +50,12 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
   /** Create a SparkConf that loads defaults from system properties and the classpath */
   def this() = this(true)
 
-  private[spark] val settings = new HashMap[String, String]()
+  private val settings = new ConcurrentHashMap[String, String]()
 
   if (loadDefaults) {
     // Load any spark.* system properties
-    for ((k, v) <- System.getProperties.asScala if k.startsWith("spark.")) {
-      settings(k) = v
+    for ((key, value) <- Utils.getSystemProperties if key.startsWith("spark.")) {
+      set(key, value)
     }
   }
 
@@ -61,9 +65,9 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
       throw new NullPointerException("null key")
     }
     if (value == null) {
-      throw new NullPointerException("null value")
+      throw new NullPointerException("null value for " + key)
     }
-    settings(key) = value
+    settings.put(key, value)
     this
   }
 
@@ -129,15 +133,13 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
 
   /** Set multiple parameters together */
   def setAll(settings: Traversable[(String, String)]) = {
-    this.settings ++= settings
+    this.settings.putAll(settings.toMap.asJava)
     this
   }
 
   /** Set a parameter if it isn't already configured */
   def setIfMissing(key: String, value: String): SparkConf = {
-    if (!settings.contains(key)) {
-      settings(key) = value
-    }
+    settings.putIfAbsent(key, value)
     this
   }
 
@@ -163,21 +165,23 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
 
   /** Get a parameter; throws a NoSuchElementException if it's not set */
   def get(key: String): String = {
-    settings.getOrElse(key, throw new NoSuchElementException(key))
+    getOption(key).getOrElse(throw new NoSuchElementException(key))
   }
 
   /** Get a parameter, falling back to a default if not set */
   def get(key: String, defaultValue: String): String = {
-    settings.getOrElse(key, defaultValue)
+    getOption(key).getOrElse(defaultValue)
   }
 
   /** Get a parameter as an Option */
   def getOption(key: String): Option[String] = {
-    settings.get(key)
+    Option(settings.get(key))
   }
 
   /** Get all parameters as a list of pairs */
-  def getAll: Array[(String, String)] = settings.clone().toArray
+  def getAll: Array[(String, String)] = {
+    settings.entrySet().asScala.map(x => (x.getKey, x.getValue)).toArray
+  }
 
   /** Get a parameter as an integer, falling back to a default if not set */
   def getInt(key: String, defaultValue: Int): Int = {
@@ -217,12 +221,18 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
      */
     getAll.filter { case (k, _) => isAkkaConf(k) }
 
+  /**
+   * Returns the Spark application id, valid in the Driver after TaskScheduler registration and
+   * from the start in the Executor.
+   */
+  def getAppId: String = get("spark.app.id")
+
   /** Does the configuration contain a given parameter? */
-  def contains(key: String): Boolean = settings.contains(key)
+  def contains(key: String): Boolean = settings.containsKey(key)
 
   /** Copy this object */
   override def clone: SparkConf = {
-    new SparkConf(false).setAll(settings)
+    new SparkConf(false).setAll(getAll)
   }
 
   /**
@@ -234,7 +244,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
   /** Checks for illegal or deprecated config settings. Throws an exception for the former. Not
     * idempotent - may mutate this conf object to convert deprecated settings to supported ones. */
   private[spark] def validateSettings() {
-    if (settings.contains("spark.local.dir")) {
+    if (contains("spark.local.dir")) {
       val msg = "In Spark 1.0 and later spark.local.dir will be overridden by the value set by " +
         "the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone and LOCAL_DIRS in YARN)."
       logWarning(msg)
@@ -244,9 +254,22 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
     val executorClasspathKey = "spark.executor.extraClassPath"
     val driverOptsKey = "spark.driver.extraJavaOptions"
     val driverClassPathKey = "spark.driver.extraClassPath"
+    val driverLibraryPathKey = "spark.driver.extraLibraryPath"
+
+    // Used by Yarn in 1.1 and before
+    sys.props.get("spark.driver.libraryPath").foreach { value =>
+      val warning =
+        s"""
+          |spark.driver.libraryPath was detected (set to '$value').
+          |This is deprecated in Spark 1.2+.
+          |
+          |Please instead use: $driverLibraryPathKey
+        """.stripMargin
+      logWarning(warning)
+    }
 
     // Validate spark.executor.extraJavaOptions
-    settings.get(executorOptsKey).map { javaOpts =>
+    getOption(executorOptsKey).map { javaOpts =>
       if (javaOpts.contains("-Dspark")) {
         val msg = s"$executorOptsKey is not allowed to set Spark options (was '$javaOpts'). " +
           "Set them directly on a SparkConf or in a properties file when using ./bin/spark-submit."
@@ -326,7 +349,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
    * configuration out for debugging.
    */
   def toDebugString: String = {
-    settings.toArray.sorted.map{case (k, v) => k + "=" + v}.mkString("\n")
+    getAll.sorted.map{case (k, v) => k + "=" + v}.mkString("\n")
   }
 }
 
@@ -347,11 +370,14 @@ private[spark] object SparkConf {
     isAkkaConf(name) ||
     name.startsWith("spark.akka") ||
     name.startsWith("spark.auth") ||
+    name.startsWith("spark.ssl") ||
     isSparkPortConf(name)
   }
 
   /**
-   * Return whether the given config is a Spark port config.
+   * Return true if the given config matches either `spark.*.port` or `spark.port.*`.
    */
-  def isSparkPortConf(name: String): Boolean = name.startsWith("spark.") && name.endsWith(".port")
+  def isSparkPortConf(name: String): Boolean = {
+    (name.startsWith("spark.") && name.endsWith(".port")) || name.startsWith("spark.port.")
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index ac7935b8c231e..a7adddb6c83ec 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -20,10 +20,10 @@ package org.apache.spark
 import scala.language.implicitConversions
 
 import java.io._
+import java.lang.reflect.Constructor
 import java.net.URI
-import java.util.Arrays
+import java.util.{Arrays, Properties, UUID}
 import java.util.concurrent.atomic.AtomicInteger
-import java.util.{Properties, UUID}
 import java.util.UUID.randomUUID
 import scala.collection.{Map, Set}
 import scala.collection.JavaConversions._
@@ -42,7 +42,8 @@ import akka.actor.Props
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.{LocalSparkCluster, SparkHadoopUtil}
-import org.apache.spark.input.WholeTextFileInputFormat
+import org.apache.spark.executor.TriggerThreadDump
+import org.apache.spark.input.{StreamInputFormat, PortableDataStream, WholeTextFileInputFormat, FixedLengthBinaryInputFormat}
 import org.apache.spark.partial.{ApproximateEvaluator, PartialResult}
 import org.apache.spark.rdd._
 import org.apache.spark.scheduler._
@@ -50,24 +51,49 @@ import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, SparkD
 import org.apache.spark.scheduler.cluster.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend}
 import org.apache.spark.scheduler.local.LocalBackend
 import org.apache.spark.storage._
-import org.apache.spark.ui.SparkUI
-import org.apache.spark.util.{CallSite, ClosureCleaner, MetadataCleaner, MetadataCleanerType, TimeStampedWeakValueHashMap, Utils}
+import org.apache.spark.ui.{SparkUI, ConsoleProgressBar}
+import org.apache.spark.ui.jobs.JobProgressListener
+import org.apache.spark.util._
 
 /**
  * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
  * cluster, and can be used to create RDDs, accumulators and broadcast variables on that cluster.
  *
+ * Only one SparkContext may be active per JVM.  You must `stop()` the active SparkContext before
+ * creating a new one.  This limitation may eventually be removed; see SPARK-2243 for more details.
+ *
  * @param config a Spark Config object describing the application configuration. Any settings in
  *   this config overrides the default configs as well as system properties.
  */
+class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationClient {
+
+  // The call site where this SparkContext was constructed.
+  private val creationSite: CallSite = Utils.getCallSite()
 
-class SparkContext(config: SparkConf) extends Logging {
+  // If true, log warnings instead of throwing exceptions when multiple SparkContexts are active
+  private val allowMultipleContexts: Boolean =
+    config.getBoolean("spark.driver.allowMultipleContexts", false)
+
+  // In order to prevent multiple SparkContexts from being active at the same time, mark this
+  // context as having started construction.
+  // NOTE: this must be placed at the beginning of the SparkContext constructor.
+  SparkContext.markPartiallyConstructed(this, allowMultipleContexts)
 
   // This is used only by YARN for now, but should be relevant to other cluster types (Mesos,
   // etc) too. This is typically generated from InputFormatInfo.computePreferredLocations. It
   // contains a map from hostname to a list of input format splits on the host.
   private[spark] var preferredNodeLocationData: Map[String, Set[SplitInfo]] = Map()
 
+  val startTime = System.currentTimeMillis()
+
+  @volatile private var stopped: Boolean = false
+
+  private def assertNotStopped(): Unit = {
+    if (stopped) {
+      throw new IllegalStateException("Cannot call methods on a stopped SparkContext")
+    }
+  }
+
   /**
    * Create a SparkContext that loads settings from system properties (for instance, when
    * launching with ./bin/spark-submit).
@@ -155,6 +181,9 @@ class SparkContext(config: SparkConf) extends Logging {
   private[spark] def this(master: String, appName: String, sparkHome: String, jars: Seq[String]) =
     this(master, appName, sparkHome, jars, Map(), Map())
 
+  // log out Spark Version in Spark driver log
+  logInfo(s"Running Spark version $SPARK_VERSION")
+  
   private[spark] val conf = config.clone()
   conf.validateSettings()
 
@@ -209,16 +238,10 @@ class SparkContext(config: SparkConf) extends Logging {
   // An asynchronous listener bus for Spark events
   private[spark] val listenerBus = new LiveListenerBus
 
+  conf.set("spark.executor.id", SparkContext.DRIVER_IDENTIFIER)
+
   // Create the Spark execution environment (cache, map output tracker, etc)
-  conf.set("spark.executor.id", "driver")
-  private[spark] val env = SparkEnv.create(
-    conf,
-    "<driver>",
-    conf.get("spark.driver.host"),
-    conf.get("spark.driver.port").toInt,
-    isDriver = true,
-    isLocal = isLocal,
-    listenerBus = listenerBus)
+  private[spark] val env = SparkEnv.createDriverEnv(conf, isLocal, listenerBus)
   SparkEnv.set(env)
 
   // Used to store a URL for each static file/jar together with the file's local timestamp
@@ -230,10 +253,24 @@ class SparkContext(config: SparkConf) extends Logging {
   private[spark] val metadataCleaner =
     new MetadataCleaner(MetadataCleanerType.SPARK_CONTEXT, this.cleanup, conf)
 
-  // Initialize the Spark UI, registering all associated listeners
+
+  private[spark] val jobProgressListener = new JobProgressListener(conf)
+  listenerBus.addListener(jobProgressListener)
+
+  val statusTracker = new SparkStatusTracker(this)
+
+  private[spark] val progressBar: Option[ConsoleProgressBar] =
+    if (conf.getBoolean("spark.ui.showConsoleProgress", true) && !log.isInfoEnabled) {
+      Some(new ConsoleProgressBar(this))
+    } else {
+      None
+    }
+
+  // Initialize the Spark UI
   private[spark] val ui: Option[SparkUI] =
     if (conf.getBoolean("spark.ui.enabled", true)) {
-      Some(new SparkUI(this))
+      Some(SparkUI.createLiveUI(this, conf, listenerBus, jobProgressListener,
+        env.securityManager,appName))
     } else {
       // For tests, do not enable the UI
       None
@@ -246,8 +283,6 @@ class SparkContext(config: SparkConf) extends Logging {
   /** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */
   val hadoopConfiguration = SparkHadoopUtil.get.newConfiguration(conf)
 
-  val startTime = System.currentTimeMillis()
-
   // Add each JAR given through the constructor
   if (jars != null) {
     jars.foreach(addJar)
@@ -295,15 +330,21 @@ class SparkContext(config: SparkConf) extends Logging {
   executorEnvs("SPARK_USER") = sparkUser
 
   // Create and start the scheduler
-  private[spark] var taskScheduler = SparkContext.createTaskScheduler(this, master)
+  private[spark] var (schedulerBackend, taskScheduler) =
+    SparkContext.createTaskScheduler(this, master)
   private val heartbeatReceiver = env.actorSystem.actorOf(
     Props(new HeartbeatReceiver(taskScheduler)), "HeartbeatReceiver")
   @volatile private[spark] var dagScheduler: DAGScheduler = _
   try {
     dagScheduler = new DAGScheduler(this)
   } catch {
-    case e: Exception => throw
-      new SparkException("DAGScheduler cannot be initialized due to %s".format(e.getMessage))
+    case e: Exception => {
+      try {
+        stop()
+      } finally {
+        throw new SparkException("Error while constructing DAGScheduler", e)
+      }
+    }
   }
 
   // start TaskScheduler after taskScheduler sets DAGScheduler reference in DAGScheduler's
@@ -313,11 +354,15 @@ class SparkContext(config: SparkConf) extends Logging {
   val applicationId: String = taskScheduler.applicationId()
   conf.set("spark.app.id", applicationId)
 
+  env.blockManager.initialize(applicationId)
+
   val metricsSystem = env.metricsSystem
 
   // The metrics system for Driver need to be set spark.app.id to app ID.
   // So it should start after we get app ID from the task scheduler and set spark.app.id.
   metricsSystem.start()
+  // Attach the driver metrics servlet handler to the web ui after the metrics system is started.
+  metricsSystem.getServletHandlers.foreach(handler => ui.foreach(_.attachHandler(handler)))
 
   // Optionally log Spark events
   private[spark] val eventLogger: Option[EventLoggingListener] = {
@@ -330,8 +375,18 @@ class SparkContext(config: SparkConf) extends Logging {
     } else None
   }
 
-  // At this point, all relevant SparkListeners have been registered, so begin releasing events
-  listenerBus.start()
+  // Optionally scale number of executors dynamically based on workload. Exposed for testing.
+  private val dynamicAllocationEnabled = conf.getBoolean("spark.dynamicAllocation.enabled", false)
+  private val dynamicAllocationTesting = conf.getBoolean("spark.dynamicAllocation.testing", false)
+  private[spark] val executorAllocationManager: Option[ExecutorAllocationManager] =
+    if (dynamicAllocationEnabled) {
+      assert(master.contains("yarn") || dynamicAllocationTesting,
+        "Dynamic allocation of executors is currently only supported in YARN mode")
+      Some(new ExecutorAllocationManager(this, listenerBus, conf))
+    } else {
+      None
+    }
+  executorAllocationManager.foreach(_.start())
 
   private[spark] val cleaner: Option[ContextCleaner] = {
     if (conf.getBoolean("spark.cleaner.referenceTracking", true)) {
@@ -342,6 +397,7 @@ class SparkContext(config: SparkConf) extends Logging {
   }
   cleaner.foreach(_.start())
 
+  setupAndStartListenerBus()
   postEnvironmentUpdate()
   postApplicationStart()
 
@@ -352,6 +408,29 @@ class SparkContext(config: SparkConf) extends Logging {
     override protected def childValue(parent: Properties): Properties = new Properties(parent)
   }
 
+  /**
+   * Called by the web UI to obtain executor thread dumps.  This method may be expensive.
+   * Logs an error and returns None if we failed to obtain a thread dump, which could occur due
+   * to an executor being dead or unresponsive or due to network issues while sending the thread
+   * dump message back to the driver.
+   */
+  private[spark] def getExecutorThreadDump(executorId: String): Option[Array[ThreadStackTrace]] = {
+    try {
+      if (executorId == SparkContext.DRIVER_IDENTIFIER) {
+        Some(Utils.getThreadDump())
+      } else {
+        val (host, port) = env.blockManager.master.getActorSystemHostPortForExecutor(executorId).get
+        val actorRef = AkkaUtils.makeExecutorRef("ExecutorActor", conf, host, port, env.actorSystem)
+        Some(AkkaUtils.askWithReply[Array[ThreadStackTrace]](TriggerThreadDump, actorRef,
+          AkkaUtils.numRetries(conf), AkkaUtils.retryWaitMs(conf), AkkaUtils.askTimeout(conf)))
+      }
+    } catch {
+      case e: Exception =>
+        logError(s"Exception getting thread dump from executor $executorId", e)
+        None
+    }
+  }
+
   private[spark] def getLocalProperties: Properties = localProperties.get()
 
   private[spark] def setLocalProperties(props: Properties) {
@@ -386,7 +465,6 @@ class SparkContext(config: SparkConf) extends Logging {
     Option(localProperties.get).map(_.getProperty(key)).getOrElse(null)
 
   /** Set a human readable description of the current job. */
-  @deprecated("use setJobGroup", "0.8.1")
   def setJobDescription(value: String) {
     setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, value)
   }
@@ -449,12 +527,12 @@ class SparkContext(config: SparkConf) extends Logging {
 
   /** Distribute a local Scala collection to form an RDD.
    *
-   * @note Parallelize acts lazily. If `seq` is a mutable collection and is
-   * altered after the call to parallelize and before the first action on the
-   * RDD, the resultant RDD will reflect the modified collection. Pass a copy of
-   * the argument to avoid this.
+   * @note Parallelize acts lazily. If `seq` is a mutable collection and is altered after the call
+   * to parallelize and before the first action on the RDD, the resultant RDD will reflect the
+   * modified collection. Pass a copy of the argument to avoid this.
    */
   def parallelize[T: ClassTag](seq: Seq[T], numSlices: Int = defaultParallelism): RDD[T] = {
+    assertNotStopped()
     new ParallelCollectionRDD[T](this, seq, numSlices, Map[Int, Seq[String]]())
   }
 
@@ -470,6 +548,7 @@ class SparkContext(config: SparkConf) extends Logging {
     * location preferences (hostnames of Spark nodes) for each object.
     * Create a new partition for each collection item. */
   def makeRDD[T: ClassTag](seq: Seq[(T, Seq[String])]): RDD[T] = {
+    assertNotStopped()
     val indexToPrefs = seq.zipWithIndex.map(t => (t._2, t._1._2)).toMap
     new ParallelCollectionRDD[T](this, seq.map(_._1), seq.size, indexToPrefs)
   }
@@ -479,6 +558,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Hadoop-supported file system URI, and return it as an RDD of Strings.
    */
   def textFile(path: String, minPartitions: Int = defaultMinPartitions): RDD[String] = {
+    assertNotStopped()
     hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text],
       minPartitions).map(pair => pair._2.toString).setName(path)
   }
@@ -512,6 +592,7 @@ class SparkContext(config: SparkConf) extends Logging {
    */
   def wholeTextFiles(path: String, minPartitions: Int = defaultMinPartitions):
   RDD[(String, String)] = {
+    assertNotStopped()
     val job = new NewHadoopJob(hadoopConfiguration)
     NewFileInputFormat.addInputPath(job, new Path(path))
     val updateConf = job.getConfiguration
@@ -524,6 +605,82 @@ class SparkContext(config: SparkConf) extends Logging {
       minPartitions).setName(path)
   }
 
+
+  /**
+   * :: Experimental ::
+   *
+   * Get an RDD for a Hadoop-readable dataset as PortableDataStream for each file
+   * (useful for binary data)
+   *
+   * For example, if you have the following files:
+   * {{{
+   *   hdfs://a-hdfs-path/part-00000
+   *   hdfs://a-hdfs-path/part-00001
+   *   ...
+   *   hdfs://a-hdfs-path/part-nnnnn
+   * }}}
+   *
+   * Do
+   * `val rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
+   *
+   * then `rdd` contains
+   * {{{
+   *   (a-hdfs-path/part-00000, its content)
+   *   (a-hdfs-path/part-00001, its content)
+   *   ...
+   *   (a-hdfs-path/part-nnnnn, its content)
+   * }}}
+   *
+   * @param minPartitions A suggestion value of the minimal splitting number for input data.
+   *
+   * @note Small files are preferred; very large files may cause bad performance.
+   */
+  @Experimental
+  def binaryFiles(path: String, minPartitions: Int = defaultMinPartitions):
+      RDD[(String, PortableDataStream)] = {
+    assertNotStopped()
+    val job = new NewHadoopJob(hadoopConfiguration)
+    NewFileInputFormat.addInputPath(job, new Path(path))
+    val updateConf = job.getConfiguration
+    new BinaryFileRDD(
+      this,
+      classOf[StreamInputFormat],
+      classOf[String],
+      classOf[PortableDataStream],
+      updateConf,
+      minPartitions).setName(path)
+  }
+
+  /**
+   * :: Experimental ::
+   *
+   * Load data from a flat binary file, assuming the length of each record is constant.
+   *
+   * '''Note:''' We ensure that the byte array for each record in the resulting RDD
+   * has the provided record length.
+   *
+   * @param path Directory to the input data files
+   * @param recordLength The length at which to split the records
+   * @return An RDD of data with values, represented as byte arrays
+   */
+  @Experimental
+  def binaryRecords(path: String, recordLength: Int, conf: Configuration = hadoopConfiguration)
+      : RDD[Array[Byte]] = {
+    assertNotStopped()
+    conf.setInt(FixedLengthBinaryInputFormat.RECORD_LENGTH_PROPERTY, recordLength)
+    val br = newAPIHadoopFile[LongWritable, BytesWritable, FixedLengthBinaryInputFormat](path,
+      classOf[FixedLengthBinaryInputFormat],
+      classOf[LongWritable],
+      classOf[BytesWritable],
+      conf=conf)
+    val data = br.map { case (k, v) =>
+      val bytes = v.getBytes
+      assert(bytes.length == recordLength, "Byte array does not have correct length")
+      bytes
+    }
+    data
+  }
+
   /**
    * Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf given its InputFormat and other
    * necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable),
@@ -536,9 +693,10 @@ class SparkContext(config: SparkConf) extends Logging {
    * @param minPartitions Minimum number of Hadoop Splits to generate.
    *
    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
-   * record, directly caching the returned RDD will create many references to the same object.
-   * If you plan to directly cache Hadoop writable objects, you should first copy them using
-   * a `map` function.
+   * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
+   * operation will create many references to the same object.
+   * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
+   * copy them using a `map` function.
    */
   def hadoopRDD[K, V](
       conf: JobConf,
@@ -547,18 +705,20 @@ class SparkContext(config: SparkConf) extends Logging {
       valueClass: Class[V],
       minPartitions: Int = defaultMinPartitions
       ): RDD[(K, V)] = {
+    assertNotStopped()
     // Add necessary security credentials to the JobConf before broadcasting it.
     SparkHadoopUtil.get.addCredentials(conf)
     new HadoopRDD(this, conf, inputFormatClass, keyClass, valueClass, minPartitions)
   }
 
   /** Get an RDD for a Hadoop file with an arbitrary InputFormat
-    *
-    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
-    * record, directly caching the returned RDD will create many references to the same object.
-    * If you plan to directly cache Hadoop writable objects, you should first copy them using
-    * a `map` function.
-    * */
+   *
+   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
+   * operation will create many references to the same object.
+   * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
+   * copy them using a `map` function.
+   */
   def hadoopFile[K, V](
       path: String,
       inputFormatClass: Class[_ <: InputFormat[K, V]],
@@ -566,6 +726,7 @@ class SparkContext(config: SparkConf) extends Logging {
       valueClass: Class[V],
       minPartitions: Int = defaultMinPartitions
       ): RDD[(K, V)] = {
+    assertNotStopped()
     // A Hadoop configuration can be about 10 KB, which is pretty big, so broadcast it.
     val confBroadcast = broadcast(new SerializableWritable(hadoopConfiguration))
     val setInputPathsFunc = (jobConf: JobConf) => FileInputFormat.setInputPaths(jobConf, path)
@@ -588,9 +749,10 @@ class SparkContext(config: SparkConf) extends Logging {
    * }}}
    *
    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
-   * record, directly caching the returned RDD will create many references to the same object.
-   * If you plan to directly cache Hadoop writable objects, you should first copy them using
-   * a `map` function.
+   * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
+   * operation will create many references to the same object.
+   * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
+   * copy them using a `map` function.
    */
   def hadoopFile[K, V, F <: InputFormat[K, V]]
       (path: String, minPartitions: Int)
@@ -611,9 +773,10 @@ class SparkContext(config: SparkConf) extends Logging {
    * }}}
    *
    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
-   * record, directly caching the returned RDD will create many references to the same object.
-   * If you plan to directly cache Hadoop writable objects, you should first copy them using
-   * a `map` function.
+   * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
+   * operation will create many references to the same object.
+   * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
+   * copy them using a `map` function.
    */
   def hadoopFile[K, V, F <: InputFormat[K, V]](path: String)
       (implicit km: ClassTag[K], vm: ClassTag[V], fm: ClassTag[F]): RDD[(K, V)] =
@@ -635,9 +798,10 @@ class SparkContext(config: SparkConf) extends Logging {
    * and extra configuration options to pass to the input format.
    *
    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
-   * record, directly caching the returned RDD will create many references to the same object.
-   * If you plan to directly cache Hadoop writable objects, you should first copy them using
-   * a `map` function.
+   * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
+   * operation will create many references to the same object.
+   * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
+   * copy them using a `map` function.
    */
   def newAPIHadoopFile[K, V, F <: NewInputFormat[K, V]](
       path: String,
@@ -645,6 +809,9 @@ class SparkContext(config: SparkConf) extends Logging {
       kClass: Class[K],
       vClass: Class[V],
       conf: Configuration = hadoopConfiguration): RDD[(K, V)] = {
+    assertNotStopped()
+    // The call to new NewHadoopJob automatically adds security credentials to conf, 
+    // so we don't need to explicitly add them ourselves
     val job = new NewHadoopJob(conf)
     NewFileInputFormat.addInputPath(job, new Path(path))
     val updatedConf = job.getConfiguration
@@ -656,30 +823,37 @@ class SparkContext(config: SparkConf) extends Logging {
    * and extra configuration options to pass to the input format.
    *
    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
-   * record, directly caching the returned RDD will create many references to the same object.
-   * If you plan to directly cache Hadoop writable objects, you should first copy them using
-   * a `map` function.
+   * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
+   * operation will create many references to the same object.
+   * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
+   * copy them using a `map` function.
    */
   def newAPIHadoopRDD[K, V, F <: NewInputFormat[K, V]](
       conf: Configuration = hadoopConfiguration,
       fClass: Class[F],
       kClass: Class[K],
       vClass: Class[V]): RDD[(K, V)] = {
-    new NewHadoopRDD(this, fClass, kClass, vClass, conf)
+    assertNotStopped()
+    // Add necessary security credentials to the JobConf. Required to access secure HDFS.
+    val jconf = new JobConf(conf)
+    SparkHadoopUtil.get.addCredentials(jconf)
+    new NewHadoopRDD(this, fClass, kClass, vClass, jconf)
   }
 
   /** Get an RDD for a Hadoop SequenceFile with given key and value types.
     *
     * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
-    * record, directly caching the returned RDD will create many references to the same object.
-    * If you plan to directly cache Hadoop writable objects, you should first copy them using
-    * a `map` function.
+    * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
+    * operation will create many references to the same object.
+    * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
+    * copy them using a `map` function.
     */
   def sequenceFile[K, V](path: String,
       keyClass: Class[K],
       valueClass: Class[V],
       minPartitions: Int
       ): RDD[(K, V)] = {
+    assertNotStopped()
     val inputFormatClass = classOf[SequenceFileInputFormat[K, V]]
     hadoopFile(path, inputFormatClass, keyClass, valueClass, minPartitions)
   }
@@ -687,13 +861,15 @@ class SparkContext(config: SparkConf) extends Logging {
   /** Get an RDD for a Hadoop SequenceFile with given key and value types.
     *
     * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
-    * record, directly caching the returned RDD will create many references to the same object.
-    * If you plan to directly cache Hadoop writable objects, you should first copy them using
-    * a `map` function.
+    * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
+    * operation will create many references to the same object.
+    * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
+    * copy them using a `map` function.
     * */
-  def sequenceFile[K, V](path: String, keyClass: Class[K], valueClass: Class[V]
-      ): RDD[(K, V)] =
+  def sequenceFile[K, V](path: String, keyClass: Class[K], valueClass: Class[V]): RDD[(K, V)] = {
+    assertNotStopped()
     sequenceFile(path, keyClass, valueClass, defaultMinPartitions)
+  }
 
   /**
    * Version of sequenceFile() for types implicitly convertible to Writables through a
@@ -712,15 +888,17 @@ class SparkContext(config: SparkConf) extends Logging {
    * allow it to figure out the Writable class to use in the subclass case.
    *
    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
-   * record, directly caching the returned RDD will create many references to the same object.
-   * If you plan to directly cache Hadoop writable objects, you should first copy them using
-   * a `map` function.
+   * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
+   * operation will create many references to the same object.
+   * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
+   * copy them using a `map` function.
    */
    def sequenceFile[K, V]
        (path: String, minPartitions: Int = defaultMinPartitions)
        (implicit km: ClassTag[K], vm: ClassTag[V],
         kcf: () => WritableConverter[K], vcf: () => WritableConverter[V])
       : RDD[(K, V)] = {
+    assertNotStopped()
     val kc = kcf()
     val vc = vcf()
     val format = classOf[SequenceFileInputFormat[Writable, Writable]]
@@ -742,6 +920,7 @@ class SparkContext(config: SparkConf) extends Logging {
       path: String,
       minPartitions: Int = defaultMinPartitions
       ): RDD[T] = {
+    assertNotStopped()
     sequenceFile(path, classOf[NullWritable], classOf[BytesWritable], minPartitions)
       .flatMap(x => Utils.deserialize[Array[T]](x._2.getBytes, Utils.getContextOrSparkClassLoader))
   }
@@ -817,6 +996,13 @@ class SparkContext(config: SparkConf) extends Logging {
    * The variable will be sent to each cluster only once.
    */
   def broadcast[T: ClassTag](value: T): Broadcast[T] = {
+    assertNotStopped()
+    if (classOf[RDD[_]].isAssignableFrom(classTag[T].runtimeClass)) {
+      // This is a warning instead of an exception in order to avoid breaking user programs that
+      // might have created RDD broadcast variables but not used them:
+      logWarning("Can not directly broadcast RDDs; instead, call collect() and "
+        + "broadcast the result (see SPARK-5063)")
+    }
     val bc = env.broadcastManager.newBroadcast[T](value, isLocal)
     val callSite = getCallSite
     logInfo("Created broadcast " + bc.id + " from " + callSite.shortForm)
@@ -837,11 +1023,12 @@ class SparkContext(config: SparkConf) extends Logging {
       case "local"       => "file:" + uri.getPath
       case _             => path
     }
-    addedFiles(key) = System.currentTimeMillis
+    val timestamp = System.currentTimeMillis
+    addedFiles(key) = timestamp
 
     // Fetch the file locally in case a job is executed using DAGScheduler.runLocally().
     Utils.fetchFile(path, new File(SparkFiles.getRootDirectory()), conf, env.securityManager,
-      hadoopConfiguration)
+      hadoopConfiguration, timestamp, useCache = false)
 
     logInfo("Added file " + path + " at " + key + " with timestamp " + addedFiles(key))
     postEnvironmentUpdate()
@@ -856,6 +1043,50 @@ class SparkContext(config: SparkConf) extends Logging {
     listenerBus.addListener(listener)
   }
 
+  /**
+   * :: DeveloperApi ::
+   * Request an additional number of executors from the cluster manager.
+   * This is currently only supported in Yarn mode. Return whether the request is received.
+   */
+  @DeveloperApi
+  override def requestExecutors(numAdditionalExecutors: Int): Boolean = {
+    assert(master.contains("yarn") || dynamicAllocationTesting,
+      "Requesting executors is currently only supported in YARN mode")
+    schedulerBackend match {
+      case b: CoarseGrainedSchedulerBackend =>
+        b.requestExecutors(numAdditionalExecutors)
+      case _ =>
+        logWarning("Requesting executors is only supported in coarse-grained mode")
+        false
+    }
+  }
+
+  /**
+   * :: DeveloperApi ::
+   * Request that the cluster manager kill the specified executors.
+   * This is currently only supported in Yarn mode. Return whether the request is received.
+   */
+  @DeveloperApi
+  override def killExecutors(executorIds: Seq[String]): Boolean = {
+    assert(master.contains("yarn") || dynamicAllocationTesting,
+      "Killing executors is currently only supported in YARN mode")
+    schedulerBackend match {
+      case b: CoarseGrainedSchedulerBackend =>
+        b.killExecutors(executorIds)
+      case _ =>
+        logWarning("Killing executors is only supported in coarse-grained mode")
+        false
+    }
+  }
+
+  /**
+   * :: DeveloperApi ::
+   * Request that cluster manager the kill the specified executor.
+   * This is currently only supported in Yarn mode. Return whether the request is received.
+   */
+  @DeveloperApi
+  override def killExecutor(executorId: String): Boolean = super.killExecutor(executorId)
+
   /** The version of Spark on which this application is running. */
   def version = SPARK_VERSION
 
@@ -864,6 +1095,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * memory available for caching.
    */
   def getExecutorMemoryStatus: Map[String, (Long, Long)] = {
+    assertNotStopped()
     env.blockManager.master.getMemoryStatus.map { case(blockManagerId, mem) =>
       (blockManagerId.host + ":" + blockManagerId.port, mem)
     }
@@ -876,6 +1108,7 @@ class SparkContext(config: SparkConf) extends Logging {
    */
   @DeveloperApi
   def getRDDStorageInfo: Array[RDDInfo] = {
+    assertNotStopped()
     val rddInfos = persistentRdds.values.map(RDDInfo.fromRdd).toArray
     StorageUtils.updateRddInfo(rddInfos, getExecutorStorageStatus)
     rddInfos.filter(_.isCached)
@@ -893,6 +1126,7 @@ class SparkContext(config: SparkConf) extends Logging {
    */
   @DeveloperApi
   def getExecutorStorageStatus: Array[StorageStatus] = {
+    assertNotStopped()
     env.blockManager.master.getStorageStatus
   }
 
@@ -902,6 +1136,7 @@ class SparkContext(config: SparkConf) extends Logging {
    */
   @DeveloperApi
   def getAllPools: Seq[Schedulable] = {
+    assertNotStopped()
     // TODO(xiajunluan): We should take nested pools into account
     taskScheduler.rootPool.schedulableQueue.toSeq
   }
@@ -912,6 +1147,7 @@ class SparkContext(config: SparkConf) extends Logging {
    */
   @DeveloperApi
   def getPoolForName(pool: String): Option[Schedulable] = {
+    assertNotStopped()
     Option(taskScheduler.rootPool.schedulableNameToSchedulable.get(pool))
   }
 
@@ -919,6 +1155,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Return current scheduling mode
    */
   def getSchedulingMode: SchedulingMode.SchedulingMode = {
+    assertNotStopped()
     taskScheduler.schedulingMode
   }
 
@@ -993,7 +1230,19 @@ class SparkContext(config: SparkConf) extends Logging {
                   null
               }
             } else {
-              env.httpFileServer.addJar(new File(uri.getPath))
+              try {
+                env.httpFileServer.addJar(new File(uri.getPath))
+              } catch {
+                case exc: FileNotFoundException =>
+                  logError(s"Jar not found at $path")
+                  null
+                case e: Exception =>
+                  // For now just log an error but allow to go through so spark examples work.
+                  // The spark examples don't really need the jar distributed since its also
+                  // the app jar.
+                  logError("Error adding jar (" + e + "), was the --addJars option used?")
+                  null
+              }
             }
           // A JAR file which exists locally on every worker node
           case "local" =>
@@ -1021,27 +1270,28 @@ class SparkContext(config: SparkConf) extends Logging {
 
   /** Shut down the SparkContext. */
   def stop() {
-    postApplicationEnd()
-    ui.foreach(_.stop())
-    // Do this only if not stopped already - best case effort.
-    // prevent NPE if stopped more than once.
-    val dagSchedulerCopy = dagScheduler
-    dagScheduler = null
-    if (dagSchedulerCopy != null) {
-      env.metricsSystem.report()
-      metadataCleaner.cancel()
-      env.actorSystem.stop(heartbeatReceiver)
-      cleaner.foreach(_.stop())
-      dagSchedulerCopy.stop()
-      taskScheduler = null
-      // TODO: Cache.stop()?
-      env.stop()
-      SparkEnv.set(null)
-      listenerBus.stop()
-      eventLogger.foreach(_.stop())
-      logInfo("Successfully stopped SparkContext")
-    } else {
-      logInfo("SparkContext already stopped")
+    SparkContext.SPARK_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
+      postApplicationEnd()
+      ui.foreach(_.stop())
+      if (!stopped) {
+        stopped = true
+        env.metricsSystem.report()
+        metadataCleaner.cancel()
+        env.actorSystem.stop(heartbeatReceiver)
+        cleaner.foreach(_.stop())
+        dagScheduler.stop()
+        dagScheduler = null
+        taskScheduler = null
+        // TODO: Cache.stop()?
+        env.stop()
+        SparkEnv.set(null)
+        listenerBus.stop()
+        eventLogger.foreach(_.stop())
+        logInfo("Successfully stopped SparkContext")
+        SparkContext.clearActiveContext()
+      } else {
+        logInfo("SparkContext already stopped")
+      }
     }
   }
 
@@ -1104,14 +1354,15 @@ class SparkContext(config: SparkConf) extends Logging {
       partitions: Seq[Int],
       allowLocal: Boolean,
       resultHandler: (Int, U) => Unit) {
-    if (dagScheduler == null) {
-      throw new SparkException("SparkContext has been shutdown")
+    if (stopped) {
+      throw new IllegalStateException("SparkContext has been shutdown")
     }
     val callSite = getCallSite
     val cleanedFunc = clean(func)
     logInfo("Starting job: " + callSite.shortForm)
     dagScheduler.runJob(rdd, cleanedFunc, partitions, callSite, allowLocal,
       resultHandler, localProperties.get)
+    progressBar.foreach(_.finishAll())
     rdd.doCheckpoint()
   }
 
@@ -1191,6 +1442,7 @@ class SparkContext(config: SparkConf) extends Logging {
       func: (TaskContext, Iterator[T]) => U,
       evaluator: ApproximateEvaluator[U, R],
       timeout: Long): PartialResult[R] = {
+    assertNotStopped()
     val callSite = getCallSite
     logInfo("Starting job: " + callSite.shortForm)
     val start = System.nanoTime
@@ -1213,6 +1465,7 @@ class SparkContext(config: SparkConf) extends Logging {
       resultHandler: (Int, U) => Unit,
       resultFunc: => R): SimpleFutureAction[R] =
   {
+    assertNotStopped()
     val cleanF = clean(processPartition)
     val callSite = getCallSite
     val waiter = dagScheduler.submitJob(
@@ -1231,11 +1484,13 @@ class SparkContext(config: SparkConf) extends Logging {
    * for more information.
    */
   def cancelJobGroup(groupId: String) {
+    assertNotStopped()
     dagScheduler.cancelJobGroup(groupId)
   }
 
   /** Cancel all jobs that have been scheduled or are running.  */
   def cancelAllJobs() {
+    assertNotStopped()
     dagScheduler.cancelAllJobs()
   }
 
@@ -1282,13 +1537,20 @@ class SparkContext(config: SparkConf) extends Logging {
   def getCheckpointDir = checkpointDir
 
   /** Default level of parallelism to use when not given by user (e.g. parallelize and makeRDD). */
-  def defaultParallelism: Int = taskScheduler.defaultParallelism
+  def defaultParallelism: Int = {
+    assertNotStopped()
+    taskScheduler.defaultParallelism
+  }
 
   /** Default min number of partitions for Hadoop RDDs when not given by user */
   @deprecated("use defaultMinPartitions", "1.0.0")
   def defaultMinSplits: Int = math.min(defaultParallelism, 2)
 
-  /** Default min number of partitions for Hadoop RDDs when not given by user */
+  /** 
+   * Default min number of partitions for Hadoop RDDs when not given by user 
+   * Notice that we use math.min so the "defaultMinPartitions" cannot be higher than 2.
+   * The reasons for this are discussed in https://github.com/mesos/spark/pull/718
+   */
   def defaultMinPartitions: Int = math.min(defaultParallelism, 2)
 
   private val nextShuffleId = new AtomicInteger(0)
@@ -1300,6 +1562,58 @@ class SparkContext(config: SparkConf) extends Logging {
   /** Register a new RDD, returning its RDD ID */
   private[spark] def newRddId(): Int = nextRddId.getAndIncrement()
 
+  /**
+   * Registers listeners specified in spark.extraListeners, then starts the listener bus.
+   * This should be called after all internal listeners have been registered with the listener bus
+   * (e.g. after the web UI and event logging listeners have been registered).
+   */
+  private def setupAndStartListenerBus(): Unit = {
+    // Use reflection to instantiate listeners specified via `spark.extraListeners`
+    try {
+      val listenerClassNames: Seq[String] =
+        conf.get("spark.extraListeners", "").split(',').map(_.trim).filter(_ != "")
+      for (className <- listenerClassNames) {
+        // Use reflection to find the right constructor
+        val constructors = {
+          val listenerClass = Class.forName(className)
+          listenerClass.getConstructors.asInstanceOf[Array[Constructor[_ <: SparkListener]]]
+        }
+        val constructorTakingSparkConf = constructors.find { c =>
+          c.getParameterTypes.sameElements(Array(classOf[SparkConf]))
+        }
+        lazy val zeroArgumentConstructor = constructors.find { c =>
+          c.getParameterTypes.isEmpty
+        }
+        val listener: SparkListener = {
+          if (constructorTakingSparkConf.isDefined) {
+            constructorTakingSparkConf.get.newInstance(conf)
+          } else if (zeroArgumentConstructor.isDefined) {
+            zeroArgumentConstructor.get.newInstance()
+          } else {
+            throw new SparkException(
+              s"$className did not have a zero-argument constructor or a" +
+                " single-argument constructor that accepts SparkConf. Note: if the class is" +
+                " defined inside of another Scala class, then its constructors may accept an" +
+                " implicit parameter that references the enclosing class; in this case, you must" +
+                " define the listener as a top-level class in order to prevent this extra" +
+                " parameter from breaking Spark's ability to find a valid constructor.")
+          }
+        }
+        listenerBus.addListener(listener)
+        logInfo(s"Registered listener $className")
+      }
+    } catch {
+      case e: Exception =>
+        try {
+          stop()
+        } finally {
+          throw new SparkException(s"Exception when registering SparkListener", e)
+        }
+    }
+
+    listenerBus.start()
+  }
+
   /** Post the application start event */
   private def postApplicationStart() {
     // Note: this code assumes that the task scheduler has been initialized and has contacted
@@ -1330,6 +1644,11 @@ class SparkContext(config: SparkConf) extends Logging {
   private[spark] def cleanup(cleanupTime: Long) {
     persistentRdds.clearOldValues(cleanupTime)
   }
+
+  // In order to prevent multiple SparkContexts from being active at the same time, mark this
+  // context as having finished construction.
+  // NOTE: this must be placed at the end of the SparkContext constructor.
+  SparkContext.setActiveContext(this, allowMultipleContexts)
 }
 
 /**
@@ -1338,6 +1657,107 @@ class SparkContext(config: SparkConf) extends Logging {
  */
 object SparkContext extends Logging {
 
+  /**
+   * Lock that guards access to global variables that track SparkContext construction.
+   */
+  private val SPARK_CONTEXT_CONSTRUCTOR_LOCK = new Object()
+
+  /**
+   * The active, fully-constructed SparkContext.  If no SparkContext is active, then this is `None`.
+   *
+   * Access to this field is guarded by SPARK_CONTEXT_CONSTRUCTOR_LOCK
+   */
+  private var activeContext: Option[SparkContext] = None
+
+  /**
+   * Points to a partially-constructed SparkContext if some thread is in the SparkContext
+   * constructor, or `None` if no SparkContext is being constructed.
+   *
+   * Access to this field is guarded by SPARK_CONTEXT_CONSTRUCTOR_LOCK
+   */
+  private var contextBeingConstructed: Option[SparkContext] = None
+
+  /**
+   * Called to ensure that no other SparkContext is running in this JVM.
+   *
+   * Throws an exception if a running context is detected and logs a warning if another thread is
+   * constructing a SparkContext.  This warning is necessary because the current locking scheme
+   * prevents us from reliably distinguishing between cases where another context is being
+   * constructed and cases where another constructor threw an exception.
+   */
+  private def assertNoOtherContextIsRunning(
+      sc: SparkContext,
+      allowMultipleContexts: Boolean): Unit = {
+    SPARK_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
+      contextBeingConstructed.foreach { otherContext =>
+        if (otherContext ne sc) {  // checks for reference equality
+          // Since otherContext might point to a partially-constructed context, guard against
+          // its creationSite field being null:
+          val otherContextCreationSite =
+            Option(otherContext.creationSite).map(_.longForm).getOrElse("unknown location")
+          val warnMsg = "Another SparkContext is being constructed (or threw an exception in its" +
+            " constructor).  This may indicate an error, since only one SparkContext may be" +
+            " running in this JVM (see SPARK-2243)." +
+            s" The other SparkContext was created at:\n$otherContextCreationSite"
+          logWarning(warnMsg)
+        }
+
+        activeContext.foreach { ctx =>
+          val errMsg = "Only one SparkContext may be running in this JVM (see SPARK-2243)." +
+            " To ignore this error, set spark.driver.allowMultipleContexts = true. " +
+            s"The currently running SparkContext was created at:\n${ctx.creationSite.longForm}"
+          val exception = new SparkException(errMsg)
+          if (allowMultipleContexts) {
+            logWarning("Multiple running SparkContexts detected in the same JVM!", exception)
+          } else {
+            throw exception
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Called at the beginning of the SparkContext constructor to ensure that no SparkContext is
+   * running.  Throws an exception if a running context is detected and logs a warning if another
+   * thread is constructing a SparkContext.  This warning is necessary because the current locking
+   * scheme prevents us from reliably distinguishing between cases where another context is being
+   * constructed and cases where another constructor threw an exception.
+   */
+  private[spark] def markPartiallyConstructed(
+      sc: SparkContext,
+      allowMultipleContexts: Boolean): Unit = {
+    SPARK_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
+      assertNoOtherContextIsRunning(sc, allowMultipleContexts)
+      contextBeingConstructed = Some(sc)
+    }
+  }
+
+  /**
+   * Called at the end of the SparkContext constructor to ensure that no other SparkContext has
+   * raced with this constructor and started.
+   */
+  private[spark] def setActiveContext(
+      sc: SparkContext,
+      allowMultipleContexts: Boolean): Unit = {
+    SPARK_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
+      assertNoOtherContextIsRunning(sc, allowMultipleContexts)
+      contextBeingConstructed = None
+      activeContext = Some(sc)
+    }
+  }
+
+  /**
+   * Clears the active SparkContext metadata.  This is called by `SparkContext#stop()`.  It's
+   * also called in unit tests to prevent a flood of warnings from test suites that don't / can't
+   * properly clean up their SparkContexts.
+   */
+  private[spark] def clearActiveContext(): Unit = {
+    SPARK_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
+      activeContext = None
+    }
+  }
+
   private[spark] val SPARK_JOB_DESCRIPTION = "spark.job.description"
 
   private[spark] val SPARK_JOB_GROUP_ID = "spark.jobGroup.id"
@@ -1346,63 +1766,113 @@ object SparkContext extends Logging {
 
   private[spark] val SPARK_UNKNOWN_USER = "<unknown>"
 
-  implicit object DoubleAccumulatorParam extends AccumulatorParam[Double] {
+  private[spark] val DRIVER_IDENTIFIER = "<driver>"
+
+  // The following deprecated objects have already been copied to `object AccumulatorParam` to
+  // make the compiler find them automatically. They are duplicate codes only for backward
+  // compatibility, please update `object AccumulatorParam` accordingly if you plan to modify the
+  // following ones.
+
+  @deprecated("Replaced by implicit objects in AccumulatorParam. This is kept here only for " +
+    "backward compatibility.", "1.3.0")
+  object DoubleAccumulatorParam extends AccumulatorParam[Double] {
     def addInPlace(t1: Double, t2: Double): Double = t1 + t2
     def zero(initialValue: Double) = 0.0
   }
 
-  implicit object IntAccumulatorParam extends AccumulatorParam[Int] {
+  @deprecated("Replaced by implicit objects in AccumulatorParam. This is kept here only for " +
+    "backward compatibility.", "1.3.0")
+  object IntAccumulatorParam extends AccumulatorParam[Int] {
     def addInPlace(t1: Int, t2: Int): Int = t1 + t2
     def zero(initialValue: Int) = 0
   }
 
-  implicit object LongAccumulatorParam extends AccumulatorParam[Long] {
+  @deprecated("Replaced by implicit objects in AccumulatorParam. This is kept here only for " +
+    "backward compatibility.", "1.3.0")
+  object LongAccumulatorParam extends AccumulatorParam[Long] {
     def addInPlace(t1: Long, t2: Long) = t1 + t2
     def zero(initialValue: Long) = 0L
   }
 
-  implicit object FloatAccumulatorParam extends AccumulatorParam[Float] {
+  @deprecated("Replaced by implicit objects in AccumulatorParam. This is kept here only for " +
+    "backward compatibility.", "1.3.0")
+  object FloatAccumulatorParam extends AccumulatorParam[Float] {
     def addInPlace(t1: Float, t2: Float) = t1 + t2
     def zero(initialValue: Float) = 0f
   }
 
-  // TODO: Add AccumulatorParams for other types, e.g. lists and strings
+  // The following deprecated functions have already been moved to `object RDD` to
+  // make the compiler find them automatically. They are still kept here for backward compatibility
+  // and just call the corresponding functions in `object RDD`.
 
-  implicit def rddToPairRDDFunctions[K, V](rdd: RDD[(K, V)])
+  @deprecated("Replaced by implicit functions in the RDD companion object. This is " +
+    "kept here only for backward compatibility.", "1.3.0")
+  def rddToPairRDDFunctions[K, V](rdd: RDD[(K, V)])
       (implicit kt: ClassTag[K], vt: ClassTag[V], ord: Ordering[K] = null) = {
-    new PairRDDFunctions(rdd)
+    RDD.rddToPairRDDFunctions(rdd)
   }
 
-  implicit def rddToAsyncRDDActions[T: ClassTag](rdd: RDD[T]) = new AsyncRDDActions(rdd)
+  @deprecated("Replaced by implicit functions in the RDD companion object. This is " +
+    "kept here only for backward compatibility.", "1.3.0")
+  def rddToAsyncRDDActions[T: ClassTag](rdd: RDD[T]) = RDD.rddToAsyncRDDActions(rdd)
 
-  implicit def rddToSequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable: ClassTag](
-      rdd: RDD[(K, V)]) =
-    new SequenceFileRDDFunctions(rdd)
+  @deprecated("Replaced by implicit functions in the RDD companion object. This is " +
+    "kept here only for backward compatibility.", "1.3.0")
+  def rddToSequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable: ClassTag](
+      rdd: RDD[(K, V)]) = {
+    val kf = implicitly[K => Writable]
+    val vf = implicitly[V => Writable]
+    // Set the Writable class to null and `SequenceFileRDDFunctions` will use Reflection to get it
+    implicit val keyWritableFactory = new WritableFactory[K](_ => null, kf)
+    implicit val valueWritableFactory = new WritableFactory[V](_ => null, vf)
+    RDD.rddToSequenceFileRDDFunctions(rdd)
+  }
 
-  implicit def rddToOrderedRDDFunctions[K : Ordering : ClassTag, V: ClassTag](
+  @deprecated("Replaced by implicit functions in the RDD companion object. This is " +
+    "kept here only for backward compatibility.", "1.3.0")
+  def rddToOrderedRDDFunctions[K : Ordering : ClassTag, V: ClassTag](
       rdd: RDD[(K, V)]) =
-    new OrderedRDDFunctions[K, V, (K, V)](rdd)
+    RDD.rddToOrderedRDDFunctions(rdd)
 
-  implicit def doubleRDDToDoubleRDDFunctions(rdd: RDD[Double]) = new DoubleRDDFunctions(rdd)
+  @deprecated("Replaced by implicit functions in the RDD companion object. This is " +
+    "kept here only for backward compatibility.", "1.3.0")
+  def doubleRDDToDoubleRDDFunctions(rdd: RDD[Double]) = RDD.doubleRDDToDoubleRDDFunctions(rdd)
 
-  implicit def numericRDDToDoubleRDDFunctions[T](rdd: RDD[T])(implicit num: Numeric[T]) =
-    new DoubleRDDFunctions(rdd.map(x => num.toDouble(x)))
+  @deprecated("Replaced by implicit functions in the RDD companion object. This is " +
+    "kept here only for backward compatibility.", "1.3.0")
+  def numericRDDToDoubleRDDFunctions[T](rdd: RDD[T])(implicit num: Numeric[T]) =
+    RDD.numericRDDToDoubleRDDFunctions(rdd)
 
-  // Implicit conversions to common Writable types, for saveAsSequenceFile
+  // The following deprecated functions have already been moved to `object WritableFactory` to
+  // make the compiler find them automatically. They are still kept here for backward compatibility.
 
-  implicit def intToIntWritable(i: Int) = new IntWritable(i)
+  @deprecated("Replaced by implicit functions in the WritableFactory companion object. This is " +
+    "kept here only for backward compatibility.", "1.3.0")
+  implicit def intToIntWritable(i: Int): IntWritable = new IntWritable(i)
 
-  implicit def longToLongWritable(l: Long) = new LongWritable(l)
+  @deprecated("Replaced by implicit functions in the WritableFactory companion object. This is " +
+    "kept here only for backward compatibility.", "1.3.0")
+  implicit def longToLongWritable(l: Long): LongWritable = new LongWritable(l)
 
-  implicit def floatToFloatWritable(f: Float) = new FloatWritable(f)
+  @deprecated("Replaced by implicit functions in the WritableFactory companion object. This is " +
+    "kept here only for backward compatibility.", "1.3.0")
+  implicit def floatToFloatWritable(f: Float): FloatWritable = new FloatWritable(f)
 
-  implicit def doubleToDoubleWritable(d: Double) = new DoubleWritable(d)
+  @deprecated("Replaced by implicit functions in the WritableFactory companion object. This is " +
+    "kept here only for backward compatibility.", "1.3.0")
+  implicit def doubleToDoubleWritable(d: Double): DoubleWritable = new DoubleWritable(d)
 
-  implicit def boolToBoolWritable (b: Boolean) = new BooleanWritable(b)
+  @deprecated("Replaced by implicit functions in the WritableFactory companion object. This is " +
+    "kept here only for backward compatibility.", "1.3.0")
+  implicit def boolToBoolWritable (b: Boolean): BooleanWritable = new BooleanWritable(b)
 
-  implicit def bytesToBytesWritable (aob: Array[Byte]) = new BytesWritable(aob)
+  @deprecated("Replaced by implicit functions in the WritableFactory companion object. This is " +
+    "kept here only for backward compatibility.", "1.3.0")
+  implicit def bytesToBytesWritable (aob: Array[Byte]): BytesWritable = new BytesWritable(aob)
 
-  implicit def stringToText(s: String) = new Text(s)
+  @deprecated("Replaced by implicit functions in the WritableFactory companion object. This is " +
+    "kept here only for backward compatibility.", "1.3.0")
+  implicit def stringToText(s: String): Text = new Text(s)
 
   private implicit def arrayToArrayWritable[T <% Writable: ClassTag](arr: Traversable[T])
     : ArrayWritable = {
@@ -1412,40 +1882,49 @@ object SparkContext extends Logging {
         arr.map(x => anyToWritable(x)).toArray)
   }
 
-  // Helper objects for converting common types to Writable
-  private def simpleWritableConverter[T, W <: Writable: ClassTag](convert: W => T)
-      : WritableConverter[T] = {
-    val wClass = classTag[W].runtimeClass.asInstanceOf[Class[W]]
-    new WritableConverter[T](_ => wClass, x => convert(x.asInstanceOf[W]))
-  }
+  // The following deprecated functions have already been moved to `object WritableConverter` to
+  // make the compiler find them automatically. They are still kept here for backward compatibility
+  // and just call the corresponding functions in `object WritableConverter`.
 
-  implicit def intWritableConverter(): WritableConverter[Int] =
-    simpleWritableConverter[Int, IntWritable](_.get)
+  @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " +
+    "backward compatibility.", "1.3.0")
+  def intWritableConverter(): WritableConverter[Int] =
+    WritableConverter.intWritableConverter()
 
-  implicit def longWritableConverter(): WritableConverter[Long] =
-    simpleWritableConverter[Long, LongWritable](_.get)
+  @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " +
+    "backward compatibility.", "1.3.0")
+  def longWritableConverter(): WritableConverter[Long] =
+    WritableConverter.longWritableConverter()
 
-  implicit def doubleWritableConverter(): WritableConverter[Double] =
-    simpleWritableConverter[Double, DoubleWritable](_.get)
+  @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " +
+    "backward compatibility.", "1.3.0")
+  def doubleWritableConverter(): WritableConverter[Double] =
+    WritableConverter.doubleWritableConverter()
 
-  implicit def floatWritableConverter(): WritableConverter[Float] =
-    simpleWritableConverter[Float, FloatWritable](_.get)
+  @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " +
+    "backward compatibility.", "1.3.0")
+  def floatWritableConverter(): WritableConverter[Float] =
+    WritableConverter.floatWritableConverter()
 
-  implicit def booleanWritableConverter(): WritableConverter[Boolean] =
-    simpleWritableConverter[Boolean, BooleanWritable](_.get)
+  @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " +
+    "backward compatibility.", "1.3.0")
+  def booleanWritableConverter(): WritableConverter[Boolean] =
+    WritableConverter.booleanWritableConverter()
 
-  implicit def bytesWritableConverter(): WritableConverter[Array[Byte]] = {
-    simpleWritableConverter[Array[Byte], BytesWritable](bw =>
-      // getBytes method returns array which is longer then data to be returned
-      Arrays.copyOfRange(bw.getBytes, 0, bw.getLength)
-    )
-  }
+  @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " +
+    "backward compatibility.", "1.3.0")
+  def bytesWritableConverter(): WritableConverter[Array[Byte]] =
+    WritableConverter.bytesWritableConverter()
 
-  implicit def stringWritableConverter(): WritableConverter[String] =
-    simpleWritableConverter[String, Text](_.toString)
+  @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " +
+    "backward compatibility.", "1.3.0")
+  def stringWritableConverter(): WritableConverter[String] =
+    WritableConverter.stringWritableConverter()
 
-  implicit def writableWritableConverter[T <: Writable]() =
-    new WritableConverter[T](_.runtimeClass.asInstanceOf[Class[T]], _.asInstanceOf[T])
+  @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " +
+    "backward compatibility.", "1.3.0")
+  def writableWritableConverter[T <: Writable](): WritableConverter[T] =
+    WritableConverter.writableWritableConverter()
 
   /**
    * Find the JAR from which a given class was loaded, to make it easy for users to pass
@@ -1501,8 +1980,13 @@ object SparkContext extends Logging {
     res
   }
 
-  /** Creates a task scheduler based on a given master URL. Extracted for testing. */
-  private def createTaskScheduler(sc: SparkContext, master: String): TaskScheduler = {
+  /**
+   * Create a task scheduler based on a given master URL.
+   * Return a 2-tuple of the scheduler backend and the task scheduler.
+   */
+  private def createTaskScheduler(
+      sc: SparkContext,
+      master: String): (SchedulerBackend, TaskScheduler) = {
     // Regular expression used for local[N] and local[*] master formats
     val LOCAL_N_REGEX = """local\[([0-9]+|\*)\]""".r
     // Regular expression for local[N, maxRetries], used in tests with failing tasks
@@ -1524,16 +2008,19 @@ object SparkContext extends Logging {
         val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
         val backend = new LocalBackend(scheduler, 1)
         scheduler.initialize(backend)
-        scheduler
+        (backend, scheduler)
 
       case LOCAL_N_REGEX(threads) =>
         def localCpuCount = Runtime.getRuntime.availableProcessors()
         // local[*] estimates the number of cores on the machine; local[N] uses exactly N threads.
         val threadCount = if (threads == "*") localCpuCount else threads.toInt
+        if (threadCount <= 0) {
+          throw new SparkException(s"Asked to run locally with $threadCount threads")
+        }
         val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
         val backend = new LocalBackend(scheduler, threadCount)
         scheduler.initialize(backend)
-        scheduler
+        (backend, scheduler)
 
       case LOCAL_N_FAILURES_REGEX(threads, maxFailures) =>
         def localCpuCount = Runtime.getRuntime.availableProcessors()
@@ -1543,14 +2030,14 @@ object SparkContext extends Logging {
         val scheduler = new TaskSchedulerImpl(sc, maxFailures.toInt, isLocal = true)
         val backend = new LocalBackend(scheduler, threadCount)
         scheduler.initialize(backend)
-        scheduler
+        (backend, scheduler)
 
       case SPARK_REGEX(sparkUrl) =>
         val scheduler = new TaskSchedulerImpl(sc)
         val masterUrls = sparkUrl.split(",").map("spark://" + _)
         val backend = new SparkDeploySchedulerBackend(scheduler, sc, masterUrls)
         scheduler.initialize(backend)
-        scheduler
+        (backend, scheduler)
 
       case LOCAL_CLUSTER_REGEX(numSlaves, coresPerSlave, memoryPerSlave) =>
         // Check to make sure memory requested <= memoryPerSlave. Otherwise Spark will just hang.
@@ -1570,7 +2057,7 @@ object SparkContext extends Logging {
         backend.shutdownCallback = (backend: SparkDeploySchedulerBackend) => {
           localCluster.stop()
         }
-        scheduler
+        (backend, scheduler)
 
       case "yarn-standalone" | "yarn-cluster" =>
         if (master == "yarn-standalone") {
@@ -1599,12 +2086,12 @@ object SparkContext extends Logging {
           }
         }
         scheduler.initialize(backend)
-        scheduler
+        (backend, scheduler)
 
       case "yarn-client" =>
         val scheduler = try {
           val clazz =
-            Class.forName("org.apache.spark.scheduler.cluster.YarnClientClusterScheduler")
+            Class.forName("org.apache.spark.scheduler.cluster.YarnScheduler")
           val cons = clazz.getConstructor(classOf[SparkContext])
           cons.newInstance(sc).asInstanceOf[TaskSchedulerImpl]
 
@@ -1626,7 +2113,7 @@ object SparkContext extends Logging {
         }
 
         scheduler.initialize(backend)
-        scheduler
+        (backend, scheduler)
 
       case mesosUrl @ MESOS_REGEX(_) =>
         MesosNativeLibrary.load()
@@ -1639,13 +2126,13 @@ object SparkContext extends Logging {
           new MesosSchedulerBackend(scheduler, sc, url)
         }
         scheduler.initialize(backend)
-        scheduler
+        (backend, scheduler)
 
       case SIMR_REGEX(simrUrl) =>
         val scheduler = new TaskSchedulerImpl(sc)
         val backend = new SimrSchedulerBackend(scheduler, sc, simrUrl)
         scheduler.initialize(backend)
-        scheduler
+        (backend, scheduler)
 
       case _ =>
         throw new SparkException("Could not parse Master URL: '" + master + "'")
@@ -1664,3 +2151,89 @@ private[spark] class WritableConverter[T](
     val writableClass: ClassTag[T] => Class[_ <: Writable],
     val convert: Writable => T)
   extends Serializable
+
+object WritableConverter {
+
+  // Helper objects for converting common types to Writable
+  private[spark] def simpleWritableConverter[T, W <: Writable: ClassTag](convert: W => T)
+  : WritableConverter[T] = {
+    val wClass = classTag[W].runtimeClass.asInstanceOf[Class[W]]
+    new WritableConverter[T](_ => wClass, x => convert(x.asInstanceOf[W]))
+  }
+
+  // The following implicit functions were in SparkContext before 1.3 and users had to
+  // `import SparkContext._` to enable them. Now we move them here to make the compiler find
+  // them automatically. However, we still keep the old functions in SparkContext for backward
+  // compatibility and forward to the following functions directly.
+
+  implicit def intWritableConverter(): WritableConverter[Int] =
+    simpleWritableConverter[Int, IntWritable](_.get)
+
+  implicit def longWritableConverter(): WritableConverter[Long] =
+    simpleWritableConverter[Long, LongWritable](_.get)
+
+  implicit def doubleWritableConverter(): WritableConverter[Double] =
+    simpleWritableConverter[Double, DoubleWritable](_.get)
+
+  implicit def floatWritableConverter(): WritableConverter[Float] =
+    simpleWritableConverter[Float, FloatWritable](_.get)
+
+  implicit def booleanWritableConverter(): WritableConverter[Boolean] =
+    simpleWritableConverter[Boolean, BooleanWritable](_.get)
+
+  implicit def bytesWritableConverter(): WritableConverter[Array[Byte]] = {
+    simpleWritableConverter[Array[Byte], BytesWritable] { bw =>
+      // getBytes method returns array which is longer then data to be returned
+      Arrays.copyOfRange(bw.getBytes, 0, bw.getLength)
+    }
+  }
+
+  implicit def stringWritableConverter(): WritableConverter[String] =
+    simpleWritableConverter[String, Text](_.toString)
+
+  implicit def writableWritableConverter[T <: Writable](): WritableConverter[T] =
+    new WritableConverter[T](_.runtimeClass.asInstanceOf[Class[T]], _.asInstanceOf[T])
+}
+
+/**
+ * A class encapsulating how to convert some type T to Writable. It stores both the Writable class
+ * corresponding to T (e.g. IntWritable for Int) and a function for doing the conversion.
+ * The Writable class will be used in `SequenceFileRDDFunctions`.
+ */
+private[spark] class WritableFactory[T](
+    val writableClass: ClassTag[T] => Class[_ <: Writable],
+    val convert: T => Writable) extends Serializable
+
+object WritableFactory {
+
+  private[spark] def simpleWritableFactory[T: ClassTag, W <: Writable : ClassTag](convert: T => W)
+    : WritableFactory[T] = {
+    val writableClass = implicitly[ClassTag[W]].runtimeClass.asInstanceOf[Class[W]]
+    new WritableFactory[T](_ => writableClass, convert)
+  }
+
+  implicit def intWritableFactory: WritableFactory[Int] =
+    simpleWritableFactory(new IntWritable(_))
+
+  implicit def longWritableFactory: WritableFactory[Long] =
+    simpleWritableFactory(new LongWritable(_))
+
+  implicit def floatWritableFactory: WritableFactory[Float] =
+    simpleWritableFactory(new FloatWritable(_))
+
+  implicit def doubleWritableFactory: WritableFactory[Double] =
+    simpleWritableFactory(new DoubleWritable(_))
+
+  implicit def booleanWritableFactory: WritableFactory[Boolean] =
+    simpleWritableFactory(new BooleanWritable(_))
+
+  implicit def bytesWritableFactory: WritableFactory[Array[Byte]] =
+    simpleWritableFactory(new BytesWritable(_))
+
+  implicit def stringWritableFactory: WritableFactory[String] =
+    simpleWritableFactory(new Text(_))
+
+  implicit def writableWritableFactory[T <: Writable: ClassTag]: WritableFactory[T] =
+    simpleWritableFactory(w => w)
+
+}
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index aba713cb4267a..f25db7f8de565 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -32,6 +32,7 @@ import org.apache.spark.api.python.PythonWorkerFactory
 import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.network.BlockTransferService
+import org.apache.spark.network.netty.NettyBlockTransferService
 import org.apache.spark.network.nio.NioBlockTransferService
 import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.serializer.Serializer
@@ -68,6 +69,7 @@ class SparkEnv (
     val shuffleMemoryManager: ShuffleMemoryManager,
     val conf: SparkConf) extends Logging {
 
+  private[spark] var isStopped = false
   private val pythonWorkers = mutable.HashMap[(String, Map[String, String]), PythonWorkerFactory]()
 
   // A general, soft-reference map for metadata needed during HadoopRDD split computation
@@ -75,6 +77,7 @@ class SparkEnv (
   private[spark] val hadoopJobMetadata = new MapMaker().softValues().makeMap[String, Any]()
 
   private[spark] def stop() {
+    isStopped = true
     pythonWorkers.foreach { case(key, worker) => worker.stop() }
     Option(httpFileServer).foreach(_.stop())
     mapOutputTracker.stop()
@@ -142,14 +145,64 @@ object SparkEnv extends Logging {
     env
   }
 
-  private[spark] def create(
+  /**
+   * Create a SparkEnv for the driver.
+   */
+  private[spark] def createDriverEnv(
+      conf: SparkConf,
+      isLocal: Boolean,
+      listenerBus: LiveListenerBus): SparkEnv = {
+    assert(conf.contains("spark.driver.host"), "spark.driver.host is not set on the driver!")
+    assert(conf.contains("spark.driver.port"), "spark.driver.port is not set on the driver!")
+    val hostname = conf.get("spark.driver.host")
+    val port = conf.get("spark.driver.port").toInt
+    create(
+      conf,
+      SparkContext.DRIVER_IDENTIFIER,
+      hostname,
+      port,
+      isDriver = true,
+      isLocal = isLocal,
+      listenerBus = listenerBus
+    )
+  }
+
+  /**
+   * Create a SparkEnv for an executor.
+   * In coarse-grained mode, the executor provides an actor system that is already instantiated.
+   */
+  private[spark] def createExecutorEnv(
+      conf: SparkConf,
+      executorId: String,
+      hostname: String,
+      port: Int,
+      numCores: Int,
+      isLocal: Boolean): SparkEnv = {
+    val env = create(
+      conf,
+      executorId,
+      hostname,
+      port,
+      isDriver = false,
+      isLocal = isLocal,
+      numUsableCores = numCores
+    )
+    SparkEnv.set(env)
+    env
+  }
+
+  /**
+   * Helper method to create a SparkEnv for a driver or an executor.
+   */
+  private def create(
       conf: SparkConf,
       executorId: String,
       hostname: String,
       port: Int,
       isDriver: Boolean,
       isLocal: Boolean,
-      listenerBus: LiveListenerBus = null): SparkEnv = {
+      listenerBus: LiveListenerBus = null,
+      numUsableCores: Int = 0): SparkEnv = {
 
     // Listener bus is only used on the driver
     if (isDriver) {
@@ -157,14 +210,18 @@ object SparkEnv extends Logging {
     }
 
     val securityManager = new SecurityManager(conf)
-    val actorSystemName = if (isDriver) driverActorSystemName else executorActorSystemName
-    val (actorSystem, boundPort) = AkkaUtils.createActorSystem(
-      actorSystemName, hostname, port, conf, securityManager)
+
+    // Create the ActorSystem for Akka and get the port it binds to.
+    val (actorSystem, boundPort) = {
+      val actorSystemName = if (isDriver) driverActorSystemName else executorActorSystemName
+      AkkaUtils.createActorSystem(actorSystemName, hostname, port, conf, securityManager)
+    }
 
     // Figure out which port Akka actually bound to in case the original port is 0 or occupied.
-    // This is so that we tell the executors the correct port to connect to.
     if (isDriver) {
       conf.set("spark.driver.port", boundPort.toString)
+    } else {
+      conf.set("spark.executor.port", boundPort.toString)
     }
 
     // Create an instance of the class with the given name, possibly initializing it with our conf
@@ -231,14 +288,22 @@ object SparkEnv extends Logging {
 
     val shuffleMemoryManager = new ShuffleMemoryManager(conf)
 
-    val blockTransferService = new NioBlockTransferService(conf, securityManager)
+    val blockTransferService =
+      conf.get("spark.shuffle.blockTransferService", "netty").toLowerCase match {
+        case "netty" =>
+          new NettyBlockTransferService(conf, securityManager, numUsableCores)
+        case "nio" =>
+          new NioBlockTransferService(conf, securityManager)
+      }
 
     val blockManagerMaster = new BlockManagerMaster(registerOrLookup(
       "BlockManagerMaster",
       new BlockManagerMasterActor(isLocal, conf, listenerBus)), conf, isDriver)
 
+    // NB: blockManager is not valid until initialize() is called later.
     val blockManager = new BlockManager(executorId, actorSystem, blockManagerMaster,
-      serializer, conf, mapOutputTracker, shuffleManager, blockTransferService)
+      serializer, conf, mapOutputTracker, shuffleManager, blockTransferService, securityManager,
+      numUsableCores)
 
     val broadcastManager = new BroadcastManager(isDriver, conf, securityManager)
 
@@ -247,7 +312,7 @@ object SparkEnv extends Logging {
     val httpFileServer =
       if (isDriver) {
         val fileServerPort = conf.getInt("spark.fileserver.port", 0)
-        val server = new HttpFileServer(securityManager, fileServerPort)
+        val server = new HttpFileServer(conf, securityManager, fileServerPort)
         server.initialize()
         conf.set("spark.fileserver.uri",  server.serverUri)
         server
@@ -261,6 +326,10 @@ object SparkEnv extends Logging {
       // Then we can start the metrics system.
       MetricsSystem.createMetricsSystem("driver", conf, securityManager)
     } else {
+      // We need to set the executor ID before the MetricsSystem is created because sources and
+      // sinks specified in the metrics configuration file will want to incorporate this executor's
+      // ID into the metrics they report.
+      conf.set("spark.executor.id", executorId)
       val ms = MetricsSystem.createMetricsSystem("executor", conf, securityManager)
       ms.start()
       ms
@@ -270,7 +339,7 @@ object SparkEnv extends Logging {
     // this is a temporary directory; in distributed mode, this is the executor's current working
     // directory.
     val sparkFilesDir: String = if (isDriver) {
-      Utils.createTempDir().getAbsolutePath
+      Utils.createTempDir(Utils.getLocalDir(conf), "userFiles").getAbsolutePath
     } else {
       "."
     }
@@ -330,7 +399,7 @@ object SparkEnv extends Logging {
     val sparkProperties = (conf.getAll ++ schedulerMode).sorted
 
     // System properties that are not java classpaths
-    val systemProperties = System.getProperties.iterator.toSeq
+    val systemProperties = Utils.getSystemProperties.toSeq
     val otherProperties = systemProperties.filter { case (k, _) =>
       k != "java.class.path" && !k.startsWith("spark.")
     }.sorted
diff --git a/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
index 376e69cd997d5..40237596570de 100644
--- a/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
+++ b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
@@ -26,6 +26,7 @@ import org.apache.hadoop.mapred._
 import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.mapred.SparkHadoopMapRedUtil
 import org.apache.spark.rdd.HadoopRDD
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/SparkSaslClient.scala b/core/src/main/scala/org/apache/spark/SparkSaslClient.scala
deleted file mode 100644
index 65003b6ac6a0a..0000000000000
--- a/core/src/main/scala/org/apache/spark/SparkSaslClient.scala
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark
-
-import java.io.IOException
-import javax.security.auth.callback.Callback
-import javax.security.auth.callback.CallbackHandler
-import javax.security.auth.callback.NameCallback
-import javax.security.auth.callback.PasswordCallback
-import javax.security.auth.callback.UnsupportedCallbackException
-import javax.security.sasl.RealmCallback
-import javax.security.sasl.RealmChoiceCallback
-import javax.security.sasl.Sasl
-import javax.security.sasl.SaslClient
-import javax.security.sasl.SaslException
-
-import scala.collection.JavaConversions.mapAsJavaMap
-
-/**
- * Implements SASL Client logic for Spark
- */
-private[spark] class SparkSaslClient(securityMgr: SecurityManager)  extends Logging {
-
-  /**
-   * Used to respond to server's counterpart, SaslServer with SASL tokens
-   * represented as byte arrays.
-   *
-   * The authentication mechanism used here is DIGEST-MD5. This could be changed to be
-   * configurable in the future.
-   */
-  private var saslClient: SaslClient = Sasl.createSaslClient(Array[String](SparkSaslServer.DIGEST),
-    null, null, SparkSaslServer.SASL_DEFAULT_REALM, SparkSaslServer.SASL_PROPS,
-    new SparkSaslClientCallbackHandler(securityMgr))
-
-  /**
-   * Used to initiate SASL handshake with server.
-   * @return response to challenge if needed
-   */
-  def firstToken(): Array[Byte] = {
-    synchronized {
-      val saslToken: Array[Byte] =
-        if (saslClient != null && saslClient.hasInitialResponse()) {
-          logDebug("has initial response")
-          saslClient.evaluateChallenge(new Array[Byte](0))
-        } else {
-          new Array[Byte](0)
-        }
-      saslToken
-    }
-  }
-
-  /**
-   * Determines whether the authentication exchange has completed.
-   * @return true is complete, otherwise false
-   */
-  def isComplete(): Boolean = {
-    synchronized {
-      if (saslClient != null) saslClient.isComplete() else false
-    }
-  }
-
-  /**
-   * Respond to server's SASL token.
-   * @param saslTokenMessage contains server's SASL token
-   * @return client's response SASL token
-   */
-  def saslResponse(saslTokenMessage: Array[Byte]): Array[Byte] = {
-    synchronized {
-      if (saslClient != null) saslClient.evaluateChallenge(saslTokenMessage) else new Array[Byte](0)
-    }
-  }
-
-  /**
-   * Disposes of any system resources or security-sensitive information the
-   * SaslClient might be using.
-   */
-  def dispose() {
-    synchronized {
-      if (saslClient != null) {
-        try {
-          saslClient.dispose()
-        } catch {
-          case e: SaslException => // ignored
-        } finally {
-          saslClient = null
-        }
-      }
-    }
-  }
-
-  /**
-   * Implementation of javax.security.auth.callback.CallbackHandler
-   * that works with share secrets.
-   */
-  private class SparkSaslClientCallbackHandler(securityMgr: SecurityManager) extends
-    CallbackHandler {
-
-    private val userName: String =
-      SparkSaslServer.encodeIdentifier(securityMgr.getSaslUser().getBytes("utf-8"))
-    private val secretKey = securityMgr.getSecretKey()
-    private val userPassword: Array[Char] = SparkSaslServer.encodePassword(
-        if (secretKey != null) secretKey.getBytes("utf-8") else "".getBytes("utf-8"))
-
-    /**
-     * Implementation used to respond to SASL request from the server.
-     *
-     * @param callbacks objects that indicate what credential information the
-     *                  server's SaslServer requires from the client.
-     */
-    override def handle(callbacks: Array[Callback]) {
-      logDebug("in the sasl client callback handler")
-      callbacks foreach {
-        case  nc: NameCallback => {
-          logDebug("handle: SASL client callback: setting username: " + userName)
-          nc.setName(userName)
-        }
-        case pc: PasswordCallback => {
-          logDebug("handle: SASL client callback: setting userPassword")
-          pc.setPassword(userPassword)
-        }
-        case rc: RealmCallback => {
-          logDebug("handle: SASL client callback: setting realm: " + rc.getDefaultText())
-          rc.setText(rc.getDefaultText())
-        }
-        case cb: RealmChoiceCallback => {}
-        case cb: Callback => throw
-          new UnsupportedCallbackException(cb, "handle: Unrecognized SASL client callback")
-      }
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/SparkSaslServer.scala b/core/src/main/scala/org/apache/spark/SparkSaslServer.scala
deleted file mode 100644
index f6b0a9132aca4..0000000000000
--- a/core/src/main/scala/org/apache/spark/SparkSaslServer.scala
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark
-
-import javax.security.auth.callback.Callback
-import javax.security.auth.callback.CallbackHandler
-import javax.security.auth.callback.NameCallback
-import javax.security.auth.callback.PasswordCallback
-import javax.security.auth.callback.UnsupportedCallbackException
-import javax.security.sasl.AuthorizeCallback
-import javax.security.sasl.RealmCallback
-import javax.security.sasl.Sasl
-import javax.security.sasl.SaslException
-import javax.security.sasl.SaslServer
-import scala.collection.JavaConversions.mapAsJavaMap
-import org.apache.commons.net.util.Base64
-
-/**
- * Encapsulates SASL server logic
- */
-private[spark] class SparkSaslServer(securityMgr: SecurityManager) extends Logging {
-
-  /**
-   * Actual SASL work done by this object from javax.security.sasl.
-   */
-  private var saslServer: SaslServer = Sasl.createSaslServer(SparkSaslServer.DIGEST, null,
-    SparkSaslServer.SASL_DEFAULT_REALM, SparkSaslServer.SASL_PROPS,
-    new SparkSaslDigestCallbackHandler(securityMgr))
-
-  /**
-   * Determines whether the authentication exchange has completed.
-   * @return true is complete, otherwise false
-   */
-  def isComplete(): Boolean = {
-    synchronized {
-      if (saslServer != null) saslServer.isComplete() else false
-    }
-  }
-
-  /**
-   * Used to respond to server SASL tokens.
-   * @param token Server's SASL token
-   * @return response to send back to the server.
-   */
-  def response(token: Array[Byte]): Array[Byte] = {
-    synchronized {
-      if (saslServer != null) saslServer.evaluateResponse(token) else new Array[Byte](0)
-    }
-  }
-
-  /**
-   * Disposes of any system resources or security-sensitive information the
-   * SaslServer might be using.
-   */
-  def dispose() {
-    synchronized {
-      if (saslServer != null) {
-        try {
-          saslServer.dispose()
-        } catch {
-          case e: SaslException => // ignore
-        } finally {
-          saslServer = null
-        }
-      }
-    }
-  }
-
-  /**
-   * Implementation of javax.security.auth.callback.CallbackHandler
-   * for SASL DIGEST-MD5 mechanism
-   */
-  private class SparkSaslDigestCallbackHandler(securityMgr: SecurityManager)
-    extends CallbackHandler {
-
-    private val userName: String =
-      SparkSaslServer.encodeIdentifier(securityMgr.getSaslUser().getBytes("utf-8"))
-
-    override def handle(callbacks: Array[Callback]) {
-      logDebug("In the sasl server callback handler")
-      callbacks foreach {
-        case nc: NameCallback => {
-          logDebug("handle: SASL server callback: setting username")
-          nc.setName(userName)
-        }
-        case pc: PasswordCallback => {
-          logDebug("handle: SASL server callback: setting userPassword")
-          val password: Array[Char] =
-            SparkSaslServer.encodePassword(securityMgr.getSecretKey().getBytes("utf-8"))
-          pc.setPassword(password)
-        }
-        case rc: RealmCallback => {
-          logDebug("handle: SASL server callback: setting realm: " + rc.getDefaultText())
-          rc.setText(rc.getDefaultText())
-        }
-        case ac: AuthorizeCallback => {
-          val authid = ac.getAuthenticationID()
-          val authzid = ac.getAuthorizationID()
-          if (authid.equals(authzid)) {
-            logDebug("set auth to true")
-            ac.setAuthorized(true)
-          } else {
-            logDebug("set auth to false")
-            ac.setAuthorized(false)
-          }
-          if (ac.isAuthorized()) {
-            logDebug("sasl server is authorized")
-            ac.setAuthorizedID(authzid)
-          }
-        }
-        case cb: Callback => throw
-          new UnsupportedCallbackException(cb, "handle: Unrecognized SASL DIGEST-MD5 Callback")
-      }
-    }
-  }
-}
-
-private[spark] object SparkSaslServer {
-
-  /**
-   * This is passed as the server name when creating the sasl client/server.
-   * This could be changed to be configurable in the future.
-   */
-  val  SASL_DEFAULT_REALM = "default"
-
-  /**
-   * The authentication mechanism used here is DIGEST-MD5. This could be changed to be
-   * configurable in the future.
-   */
-  val DIGEST = "DIGEST-MD5"
-
-  /**
-   * The quality of protection is just "auth". This means that we are doing
-   * authentication only, we are not supporting integrity or privacy protection of the
-   * communication channel after authentication. This could be changed to be configurable
-   * in the future.
-   */
-  val SASL_PROPS = Map(Sasl.QOP -> "auth", Sasl.SERVER_AUTH ->"true")
-
-  /**
-   * Encode a byte[] identifier as a Base64-encoded string.
-   *
-   * @param identifier identifier to encode
-   * @return Base64-encoded string
-   */
-  def encodeIdentifier(identifier: Array[Byte]): String = {
-    new String(Base64.encodeBase64(identifier), "utf-8")
-  }
-
-  /**
-   * Encode a password as a base64-encoded char[] array.
-   * @param password as a byte array.
-   * @return password as a char array.
-   */
-  def encodePassword(password: Array[Byte]): Array[Char] = {
-    new String(Base64.encodeBase64(password), "utf-8").toCharArray()
-  }
-}
-
diff --git a/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala b/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala
new file mode 100644
index 0000000000000..edbdda8a0bcb6
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+/**
+ * Low-level status reporting APIs for monitoring job and stage progress.
+ *
+ * These APIs intentionally provide very weak consistency semantics; consumers of these APIs should
+ * be prepared to handle empty / missing information.  For example, a job's stage ids may be known
+ * but the status API may not have any information about the details of those stages, so
+ * `getStageInfo` could potentially return `None` for a valid stage id.
+ *
+ * To limit memory usage, these APIs only provide information on recent jobs / stages.  These APIs
+ * will provide information for the last `spark.ui.retainedStages` stages and
+ * `spark.ui.retainedJobs` jobs.
+ *
+ * NOTE: this class's constructor should be considered private and may be subject to change.
+ */
+class SparkStatusTracker private[spark] (sc: SparkContext) {
+
+  private val jobProgressListener = sc.jobProgressListener
+
+  /**
+   * Return a list of all known jobs in a particular job group.  If `jobGroup` is `null`, then
+   * returns all known jobs that are not associated with a job group.
+   *
+   * The returned list may contain running, failed, and completed jobs, and may vary across
+   * invocations of this method.  This method does not guarantee the order of the elements in
+   * its result.
+   */
+  def getJobIdsForGroup(jobGroup: String): Array[Int] = {
+    jobProgressListener.synchronized {
+      val jobData = jobProgressListener.jobIdToData.valuesIterator
+      jobData.filter(_.jobGroup.orNull == jobGroup).map(_.jobId).toArray
+    }
+  }
+
+  /**
+   * Returns an array containing the ids of all active stages.
+   *
+   * This method does not guarantee the order of the elements in its result.
+   */
+  def getActiveStageIds(): Array[Int] = {
+    jobProgressListener.synchronized {
+      jobProgressListener.activeStages.values.map(_.stageId).toArray
+    }
+  }
+
+  /**
+   * Returns an array containing the ids of all active jobs.
+   *
+   * This method does not guarantee the order of the elements in its result.
+   */
+  def getActiveJobIds(): Array[Int] = {
+    jobProgressListener.synchronized {
+      jobProgressListener.activeJobs.values.map(_.jobId).toArray
+    }
+  }
+
+  /**
+   * Returns job information, or `None` if the job info could not be found or was garbage collected.
+   */
+  def getJobInfo(jobId: Int): Option[SparkJobInfo] = {
+    jobProgressListener.synchronized {
+      jobProgressListener.jobIdToData.get(jobId).map { data =>
+        new SparkJobInfoImpl(jobId, data.stageIds.toArray, data.status)
+      }
+    }
+  }
+
+  /**
+   * Returns stage information, or `None` if the stage info could not be found or was
+   * garbage collected.
+   */
+  def getStageInfo(stageId: Int): Option[SparkStageInfo] = {
+    jobProgressListener.synchronized {
+      for (
+        info <- jobProgressListener.stageIdToInfo.get(stageId);
+        data <- jobProgressListener.stageIdToData.get((stageId, info.attemptId))
+      ) yield {
+        new SparkStageInfoImpl(
+          stageId,
+          info.attemptId,
+          info.submissionTime.getOrElse(0),
+          info.name,
+          info.numTasks,
+          data.numActiveTasks,
+          data.numCompleteTasks,
+          data.numFailedTasks)
+      }
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/StatusAPIImpl.scala b/core/src/main/scala/org/apache/spark/StatusAPIImpl.scala
new file mode 100644
index 0000000000000..e5c7c8d0db578
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/StatusAPIImpl.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+private class SparkJobInfoImpl (
+  val jobId: Int,
+  val stageIds: Array[Int],
+  val status: JobExecutionStatus)
+ extends SparkJobInfo
+
+private class SparkStageInfoImpl(
+  val stageId: Int,
+  val currentAttemptId: Int,
+  val submissionTime: Long,
+  val name: String,
+  val numTasks: Int,
+  val numActiveTasks: Int,
+  val numCompletedTasks: Int,
+  val numFailedTasks: Int)
+ extends SparkStageInfo
diff --git a/core/src/main/scala/org/apache/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala
new file mode 100644
index 0000000000000..7d7fe1a446313
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/TaskContext.scala
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import java.io.Serializable
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.util.TaskCompletionListener
+
+
+object TaskContext {
+  /**
+   * Return the currently active TaskContext. This can be called inside of
+   * user functions to access contextual information about running tasks.
+   */
+  def get(): TaskContext = taskContext.get
+
+  private val taskContext: ThreadLocal[TaskContext] = new ThreadLocal[TaskContext]
+
+  // Note: protected[spark] instead of private[spark] to prevent the following two from
+  // showing up in JavaDoc.
+  /**
+   * Set the thread local TaskContext. Internal to Spark.
+   */
+  protected[spark] def setTaskContext(tc: TaskContext): Unit = taskContext.set(tc)
+
+  /**
+   * Unset the thread local TaskContext. Internal to Spark.
+   */
+  protected[spark] def unset(): Unit = taskContext.remove()
+}
+
+
+/**
+ * Contextual information about a task which can be read or mutated during
+ * execution. To access the TaskContext for a running task, use:
+ * {{{
+ *   org.apache.spark.TaskContext.get()
+ * }}}
+ */
+abstract class TaskContext extends Serializable {
+  // Note: TaskContext must NOT define a get method. Otherwise it will prevent the Scala compiler
+  // from generating a static get method (based on the companion object's get method).
+
+  // Note: Update JavaTaskContextCompileCheck when new methods are added to this class.
+
+  // Note: getters in this class are defined with parentheses to maintain backward compatibility.
+
+  /**
+   * Returns true if the task has completed.
+   */
+  def isCompleted(): Boolean
+
+  /**
+   * Returns true if the task has been killed.
+   */
+  def isInterrupted(): Boolean
+
+  @deprecated("use isRunningLocally", "1.2.0")
+  def runningLocally(): Boolean
+
+  /**
+   * Returns true if the task is running locally in the driver program.
+   * @return
+   */
+  def isRunningLocally(): Boolean
+
+  /**
+   * Adds a (Java friendly) listener to be executed on task completion.
+   * This will be called in all situation - success, failure, or cancellation.
+   * An example use is for HadoopRDD to register a callback to close the input stream.
+   */
+  def addTaskCompletionListener(listener: TaskCompletionListener): TaskContext
+
+  /**
+   * Adds a listener in the form of a Scala closure to be executed on task completion.
+   * This will be called in all situations - success, failure, or cancellation.
+   * An example use is for HadoopRDD to register a callback to close the input stream.
+   */
+  def addTaskCompletionListener(f: (TaskContext) => Unit): TaskContext
+
+  /**
+   * Adds a callback function to be executed on task completion. An example use
+   * is for HadoopRDD to register a callback to close the input stream.
+   * Will be called in any situation - success, failure, or cancellation.
+   *
+   * @param f Callback function.
+   */
+  @deprecated("use addTaskCompletionListener", "1.2.0")
+  def addOnCompleteCallback(f: () => Unit)
+
+  /**
+   * The ID of the stage that this task belong to.
+   */
+  def stageId(): Int
+
+  /**
+   * The ID of the RDD partition that is computed by this task.
+   */
+  def partitionId(): Int
+
+  /**
+   * How many times this task has been attempted.  The first task attempt will be assigned
+   * attemptNumber = 0, and subsequent attempts will have increasing attempt numbers.
+   */
+  def attemptNumber(): Int
+
+  @deprecated("use attemptNumber", "1.3.0")
+  def attemptId(): Long
+
+  /**
+   * An ID that is unique to this task attempt (within the same SparkContext, no two task attempts
+   * will share the same attempt ID).  This is roughly equivalent to Hadoop's TaskAttemptID.
+   */
+  def taskAttemptId(): Long
+
+  /** ::DeveloperApi:: */
+  @DeveloperApi
+  def taskMetrics(): TaskMetrics
+}
diff --git a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
index afd2b85d33a77..337c8e4ebebcd 100644
--- a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
@@ -22,14 +22,19 @@ import org.apache.spark.util.{TaskCompletionListener, TaskCompletionListenerExce
 
 import scala.collection.mutable.ArrayBuffer
 
-private[spark] class TaskContextImpl(val stageId: Int,
+private[spark] class TaskContextImpl(
+    val stageId: Int,
     val partitionId: Int,
-    val attemptId: Long,
+    override val taskAttemptId: Long,
+    override val attemptNumber: Int,
     val runningLocally: Boolean = false,
     val taskMetrics: TaskMetrics = TaskMetrics.empty)
   extends TaskContext
   with Logging {
 
+  // For backwards-compatibility; this method is now deprecated as of 1.3.0.
+  override def attemptId(): Long = taskAttemptId
+
   // List of callback functions to execute when the task completes.
   @transient private val onCompleteCallbacks = new ArrayBuffer[TaskCompletionListener]
 
@@ -82,10 +87,10 @@ private[spark] class TaskContextImpl(val stageId: Int,
     interrupted = true
   }
 
-  override def isCompleted: Boolean = completed
+  override def isCompleted(): Boolean = completed
 
-  override def isRunningLocally: Boolean = runningLocally
+  override def isRunningLocally(): Boolean = runningLocally
 
-  override def isInterrupted: Boolean = interrupted
+  override def isInterrupted(): Boolean = interrupted
 }
 
diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
index 8f0c5e78416c2..af5fd8e0ac00c 100644
--- a/core/src/main/scala/org/apache/spark/TaskEndReason.scala
+++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
@@ -69,11 +69,13 @@ case class FetchFailed(
     bmAddress: BlockManagerId,  // Note that bmAddress can be null
     shuffleId: Int,
     mapId: Int,
-    reduceId: Int)
+    reduceId: Int,
+    message: String)
   extends TaskFailedReason {
   override def toErrorString: String = {
     val bmAddressString = if (bmAddress == null) "null" else bmAddress.toString
-    s"FetchFailed($bmAddressString, shuffleId=$shuffleId, mapId=$mapId, reduceId=$reduceId)"
+    s"FetchFailed($bmAddressString, shuffleId=$shuffleId, mapId=$mapId, reduceId=$reduceId, " +
+      s"message=\n$message\n)"
   }
 }
 
@@ -81,15 +83,48 @@ case class FetchFailed(
  * :: DeveloperApi ::
  * Task failed due to a runtime exception. This is the most common failure case and also captures
  * user program exceptions.
+ *
+ * `stackTrace` contains the stack trace of the exception itself. It still exists for backward
+ * compatibility. It's better to use `this(e: Throwable, metrics: Option[TaskMetrics])` to
+ * create `ExceptionFailure` as it will handle the backward compatibility properly.
+ *
+ * `fullStackTrace` is a better representation of the stack trace because it contains the whole
+ * stack trace including the exception and its causes
  */
 @DeveloperApi
 case class ExceptionFailure(
     className: String,
     description: String,
     stackTrace: Array[StackTraceElement],
+    fullStackTrace: String,
     metrics: Option[TaskMetrics])
   extends TaskFailedReason {
-  override def toErrorString: String = Utils.exceptionString(className, description, stackTrace)
+
+  private[spark] def this(e: Throwable, metrics: Option[TaskMetrics]) {
+    this(e.getClass.getName, e.getMessage, e.getStackTrace, Utils.exceptionString(e), metrics)
+  }
+
+  override def toErrorString: String =
+    if (fullStackTrace == null) {
+      // fullStackTrace is added in 1.2.0
+      // If fullStackTrace is null, use the old error string for backward compatibility
+      exceptionString(className, description, stackTrace)
+    } else {
+      fullStackTrace
+    }
+
+  /**
+   * Return a nice string representation of the exception, including the stack trace.
+   * Note: It does not include the exception's causes, and is only used for backward compatibility.
+   */
+  private def exceptionString(
+      className: String,
+      description: String,
+      stackTrace: Array[StackTraceElement]): String = {
+    val desc = if (description == null) "" else description
+    val st = if (stackTrace == null) "" else stackTrace.map("        " + _).mkString("\n")
+    s"$className: $desc\n$st"
+  }
 }
 
 /**
@@ -117,8 +152,8 @@ case object TaskKilled extends TaskFailedReason {
  * the task crashed the JVM.
  */
 @DeveloperApi
-case object ExecutorLostFailure extends TaskFailedReason {
-  override def toErrorString: String = "ExecutorLostFailure (executor lost)"
+case class ExecutorLostFailure(execId: String) extends TaskFailedReason {
+  override def toErrorString: String = s"ExecutorLostFailure (executor ${execId} lost)"
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/TaskNotSerializableException.scala b/core/src/main/scala/org/apache/spark/TaskNotSerializableException.scala
new file mode 100644
index 0000000000000..9df61062e1f85
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/TaskNotSerializableException.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * Exception thrown when a task cannot be serialized.
+ */
+private[spark] class TaskNotSerializableException(error: Throwable) extends Exception(error)
diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index e72826dc25f41..34078142f5385 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -23,8 +23,8 @@ import java.util.jar.{JarEntry, JarOutputStream}
 
 import scala.collection.JavaConversions._
 
+import com.google.common.io.{ByteStreams, Files}
 import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
-import com.google.common.io.Files
 
 import org.apache.spark.util.Utils
 
@@ -64,12 +64,7 @@ private[spark] object TestUtils {
       jarStream.putNextEntry(jarEntry)
 
       val in = new FileInputStream(file)
-      val buffer = new Array[Byte](10240)
-      var nRead = 0
-      while (nRead <= 0) {
-        nRead = in.read(buffer, 0, buffer.length)
-        jarStream.write(buffer, 0, nRead)
-      }
+      ByteStreams.copy(in, jarStream)
       in.close()
     }
     jarStream.close()
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
index a6123bd108c11..8e8f7f6c4fda2 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
@@ -114,7 +114,7 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[JDouble, Ja
    * Return an RDD with the elements from `this` that are not in `other`.
    *
    * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
-   * RDD will be <= us.
+   * RDD will be &lt;= us.
    */
   def subtract(other: JavaDoubleRDD): JavaDoubleRDD =
     fromRDD(srdd.subtract(other))
@@ -233,11 +233,11 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[JDouble, Ja
    * to the left except for the last which is closed
    *  e.g. for the array
    *  [1,10,20,50] the buckets are [1,10) [10,20) [20,50]
-   *  e.g 1<=x<10 , 10<=x<20, 20<=x<50
+   *  e.g 1&lt;=x&lt;10 , 10&lt;=x&lt;20, 20&lt;=x&lt;50
    *  And on the input of 1 and 50 we would have a histogram of 1,0,0
    *
    * Note: if your histogram is evenly spaced (e.g. [0, 10, 20, 30]) this can be switched
-   * from an O(log n) inseration to O(1) per element. (where n = # buckets) if you set evenBuckets
+   * from an O(log n) insertion to O(1) per element. (where n = # buckets) if you set evenBuckets
    * to true.
    * buckets must be sorted and not contain any duplicates.
    * buckets array must be at least two elements
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index c38b96528d037..7af3538262fd6 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -32,13 +32,13 @@ import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat}
 
 import org.apache.spark.{HashPartitioner, Partitioner}
 import org.apache.spark.Partitioner._
-import org.apache.spark.SparkContext.rddToPairRDDFunctions
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
 import org.apache.spark.api.java.JavaUtils.mapAsSerializableJavaMap
 import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2, PairFunction}
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.rdd.{OrderedRDDFunctions, RDD}
+import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
@@ -392,7 +392,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * Return an RDD with the elements from `this` that are not in `other`.
    *
    * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
-   * RDD will be <= us.
+   * RDD will be &lt;= us.
    */
   def subtract(other: JavaPairRDD[K, V]): JavaPairRDD[K, V] =
     fromRDD(rdd.subtract(other))
@@ -413,7 +413,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * Return an RDD with the pairs from `this` whose keys are not in `other`.
    *
    * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
-   * RDD will be <= us.
+   * RDD will be &lt;= us.
    */
   def subtractByKey[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, V] = {
     implicit val ctag: ClassTag[W] = fakeClassTag
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index efb8978f7ce12..0f91c942ecd50 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -28,7 +28,6 @@ import com.google.common.base.Optional
 import org.apache.hadoop.io.compress.CompressionCodec
 
 import org.apache.spark._
-import org.apache.spark.SparkContext._
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaPairRDD._
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
@@ -39,6 +38,10 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
+/**
+ * Defines operations common to several Java RDD implementations.
+ * Note that this trait is not intended to be implemented by user code.
+ */
 trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   def wrapRDD(rdd: RDD[T]): This
 
@@ -212,8 +215,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    * Return an RDD of grouped elements. Each group consists of a key and a sequence of elements
    * mapping to that key.
    */
-  def groupBy[K](f: JFunction[T, K]): JavaPairRDD[K, JIterable[T]] = {
-    implicit val ctagK: ClassTag[K] = fakeClassTag
+  def groupBy[U](f: JFunction[T, U]): JavaPairRDD[U, JIterable[T]] = {
+    // The type parameter is U instead of K in order to work around a compiler bug; see SPARK-4459
+    implicit val ctagK: ClassTag[U] = fakeClassTag
     implicit val ctagV: ClassTag[JList[T]] = fakeClassTag
     JavaPairRDD.fromRDD(groupByResultToJava(rdd.groupBy(f)(fakeClassTag)))
   }
@@ -222,10 +226,11 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    * Return an RDD of grouped elements. Each group consists of a key and a sequence of elements
    * mapping to that key.
    */
-  def groupBy[K](f: JFunction[T, K], numPartitions: Int): JavaPairRDD[K, JIterable[T]] = {
-    implicit val ctagK: ClassTag[K] = fakeClassTag
+  def groupBy[U](f: JFunction[T, U], numPartitions: Int): JavaPairRDD[U, JIterable[T]] = {
+    // The type parameter is U instead of K in order to work around a compiler bug; see SPARK-4459
+    implicit val ctagK: ClassTag[U] = fakeClassTag
     implicit val ctagV: ClassTag[JList[T]] = fakeClassTag
-    JavaPairRDD.fromRDD(groupByResultToJava(rdd.groupBy(f, numPartitions)(fakeClassTag[K])))
+    JavaPairRDD.fromRDD(groupByResultToJava(rdd.groupBy(f, numPartitions)(fakeClassTag[U])))
   }
 
   /**
@@ -343,6 +348,19 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    */
   def reduce(f: JFunction2[T, T, T]): T = rdd.reduce(f)
 
+  /**
+   * Reduces the elements of this RDD in a multi-level tree pattern.
+   *
+   * @param depth suggested depth of the tree
+   * @see [[org.apache.spark.api.java.JavaRDDLike#reduce]]
+   */
+  def treeReduce(f: JFunction2[T, T, T], depth: Int): T = rdd.treeReduce(f, depth)
+
+  /**
+   * [[org.apache.spark.api.java.JavaRDDLike#treeReduce]] with suggested depth 2.
+   */
+  def treeReduce(f: JFunction2[T, T, T]): T = treeReduce(f, 2)
+
   /**
    * Aggregate the elements of each partition, and then the results for all the partitions, using a
    * given associative function and a neutral "zero value". The function op(t1, t2) is allowed to
@@ -364,6 +382,30 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
     combOp: JFunction2[U, U, U]): U =
     rdd.aggregate(zeroValue)(seqOp, combOp)(fakeClassTag[U])
 
+  /**
+   * Aggregates the elements of this RDD in a multi-level tree pattern.
+   *
+   * @param depth suggested depth of the tree
+   * @see [[org.apache.spark.api.java.JavaRDDLike#aggregate]]
+   */
+  def treeAggregate[U](
+      zeroValue: U,
+      seqOp: JFunction2[U, T, U],
+      combOp: JFunction2[U, U, U],
+      depth: Int): U = {
+    rdd.treeAggregate(zeroValue)(seqOp, combOp, depth)(fakeClassTag[U])
+  }
+
+  /**
+   * [[org.apache.spark.api.java.JavaRDDLike#treeAggregate]] with suggested depth 2.
+   */
+  def treeAggregate[U](
+      zeroValue: U,
+      seqOp: JFunction2[U, T, U],
+      combOp: JFunction2[U, U, U]): U = {
+    treeAggregate(zeroValue, seqOp, combOp, 2)
+  }
+
   /**
    * Return the number of elements in the RDD.
    */
@@ -434,6 +476,12 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    */
   def first(): T = rdd.first()
 
+  /**
+   * @return true if and only if the RDD contains no elements at all. Note that an RDD
+   *         may be empty even when it has at least 1 partition.
+   */
+  def isEmpty(): Boolean = rdd.isEmpty()
+
   /**
    * Save this RDD as a text file, using string representations of elements.
    */
@@ -459,8 +507,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   /**
    * Creates tuples of the elements in this RDD by applying `f`.
    */
-  def keyBy[K](f: JFunction[T, K]): JavaPairRDD[K, T] = {
-    implicit val ctag: ClassTag[K] = fakeClassTag
+  def keyBy[U](f: JFunction[T, U]): JavaPairRDD[U, T] = {
+    // The type parameter is U instead of K in order to work around a compiler bug; see SPARK-4459
+    implicit val ctag: ClassTag[U] = fakeClassTag
     JavaPairRDD.fromRDD(rdd.keyBy(f))
   }
 
@@ -493,9 +542,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   }
 
   /**
-   * Returns the top K elements from this RDD as defined by
+   * Returns the top k (largest) elements from this RDD as defined by
    * the specified Comparator[T].
-   * @param num the number of top elements to return
+   * @param num k, the number of top elements to return
    * @param comp the comparator that defines the order
    * @return an array of top elements
    */
@@ -507,9 +556,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   }
 
   /**
-   * Returns the top K elements from this RDD using the
+   * Returns the top k (largest) elements from this RDD using the
    * natural ordering for T.
-   * @param num the number of top elements to return
+   * @param num k, the number of top elements to return
    * @return an array of top elements
    */
   def top(num: Int): JList[T] = {
@@ -518,9 +567,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   }
 
   /**
-   * Returns the first K elements from this RDD as defined by
+   * Returns the first k (smallest) elements from this RDD as defined by
    * the specified Comparator[T] and maintains the order.
-   * @param num the number of top elements to return
+   * @param num k, the number of elements to return
    * @param comp the comparator that defines the order
    * @return an array of top elements
    */
@@ -552,9 +601,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   }
 
   /**
-   * Returns the first K elements from this RDD using the
+   * Returns the first k (smallest) elements from this RDD using the
    * natural ordering for T while maintain the order.
-   * @param num the number of top elements to return
+   * @param num k, the number of top elements to return
    * @return an array of top elements
    */
   def takeOrdered(num: Int): JList[T] = {
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 791d853a015a1..97f5c9f257e09 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -28,11 +28,13 @@ import scala.reflect.ClassTag
 
 import com.google.common.base.Optional
 import org.apache.hadoop.conf.Configuration
+import org.apache.spark.input.PortableDataStream
 import org.apache.hadoop.mapred.{InputFormat, JobConf}
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 
 import org.apache.spark._
-import org.apache.spark.SparkContext.{DoubleAccumulatorParam, IntAccumulatorParam}
+import org.apache.spark.AccumulatorParam._
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, NewHadoopRDD, RDD}
@@ -40,6 +42,9 @@ import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, NewHadoopRDD, RDD}
 /**
  * A Java-friendly version of [[org.apache.spark.SparkContext]] that returns
  * [[org.apache.spark.api.java.JavaRDD]]s and works with Java collections instead of Scala ones.
+ *
+ * Only one SparkContext may be active per JVM.  You must `stop()` the active SparkContext before
+ * creating a new one.  This limitation may eventually be removed; see SPARK-2243 for more details.
  */
 class JavaSparkContext(val sc: SparkContext)
   extends JavaSparkContextVarargsWorkaround with Closeable {
@@ -103,6 +108,8 @@ class JavaSparkContext(val sc: SparkContext)
 
   private[spark] val env = sc.env
 
+  def statusTracker = new JavaSparkStatusTracker(sc)
+
   def isLocal: java.lang.Boolean = sc.isLocal
 
   def sparkUser: String = sc.sparkUser
@@ -183,6 +190,8 @@ class JavaSparkContext(val sc: SparkContext)
   def textFile(path: String, minPartitions: Int): JavaRDD[String] =
     sc.textFile(path, minPartitions)
 
+
+
   /**
    * Read a directory of text files from HDFS, a local file system (available on all nodes), or any
    * Hadoop-supported file system URI. Each file is read as a single record and returned in a
@@ -196,7 +205,10 @@ class JavaSparkContext(val sc: SparkContext)
    *   hdfs://a-hdfs-path/part-nnnnn
    * }}}
    *
-   * Do `JavaPairRDD<String, String> rdd = sparkContext.wholeTextFiles("hdfs://a-hdfs-path")`,
+   * Do
+   * {{{
+   *   JavaPairRDD<String, String> rdd = sparkContext.wholeTextFiles("hdfs://a-hdfs-path")
+   * }}}
    *
    * <p> then `rdd` contains
    * {{{
@@ -223,6 +235,84 @@ class JavaSparkContext(val sc: SparkContext)
   def wholeTextFiles(path: String): JavaPairRDD[String, String] =
     new JavaPairRDD(sc.wholeTextFiles(path))
 
+  /**
+   * Read a directory of binary files from HDFS, a local file system (available on all nodes),
+   * or any Hadoop-supported file system URI as a byte array. Each file is read as a single
+   * record and returned in a key-value pair, where the key is the path of each file,
+   * the value is the content of each file.
+   *
+   * For example, if you have the following files:
+   * {{{
+   *   hdfs://a-hdfs-path/part-00000
+   *   hdfs://a-hdfs-path/part-00001
+   *   ...
+   *   hdfs://a-hdfs-path/part-nnnnn
+   * }}}
+   *
+   * Do
+   * `JavaPairRDD<String, byte[]> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
+   *
+   * then `rdd` contains
+   * {{{
+   *   (a-hdfs-path/part-00000, its content)
+   *   (a-hdfs-path/part-00001, its content)
+   *   ...
+   *   (a-hdfs-path/part-nnnnn, its content)
+   * }}}
+   *
+   * @note Small files are preferred; very large files but may cause bad performance.
+   *
+   * @param minPartitions A suggestion value of the minimal splitting number for input data.
+   */
+  def binaryFiles(path: String, minPartitions: Int): JavaPairRDD[String, PortableDataStream] =
+    new JavaPairRDD(sc.binaryFiles(path, minPartitions))
+
+  /**
+   * :: Experimental ::
+   *
+   * Read a directory of binary files from HDFS, a local file system (available on all nodes),
+   * or any Hadoop-supported file system URI as a byte array. Each file is read as a single
+   * record and returned in a key-value pair, where the key is the path of each file,
+   * the value is the content of each file.
+   *
+   * For example, if you have the following files:
+   * {{{
+   *   hdfs://a-hdfs-path/part-00000
+   *   hdfs://a-hdfs-path/part-00001
+   *   ...
+   *   hdfs://a-hdfs-path/part-nnnnn
+   * }}}
+   *
+   * Do
+   * `JavaPairRDD<String, byte[]> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
+   *
+   * then `rdd` contains
+   * {{{
+   *   (a-hdfs-path/part-00000, its content)
+   *   (a-hdfs-path/part-00001, its content)
+   *   ...
+   *   (a-hdfs-path/part-nnnnn, its content)
+   * }}}
+   *
+   * @note Small files are preferred; very large files but may cause bad performance.
+   */
+  @Experimental
+  def binaryFiles(path: String): JavaPairRDD[String, PortableDataStream] =
+    new JavaPairRDD(sc.binaryFiles(path, defaultMinPartitions))
+
+  /**
+   * :: Experimental ::
+   *
+   * Load data from a flat binary file, assuming the length of each record is constant.
+   *
+   * @param path Directory to the input data files
+   * @return An RDD of data with values, represented as byte arrays
+   */
+  @Experimental
+  def binaryRecords(path: String, recordLength: Int): JavaRDD[Array[Byte]] = {
+    new JavaRDD(sc.binaryRecords(path, recordLength))
+  }
+
   /** Get an RDD for a Hadoop SequenceFile with given key and value types.
     *
     * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkStatusTracker.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkStatusTracker.scala
new file mode 100644
index 0000000000000..3300cad9efbab
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkStatusTracker.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.java
+
+import org.apache.spark.{SparkStageInfo, SparkJobInfo, SparkContext}
+
+/**
+ * Low-level status reporting APIs for monitoring job and stage progress.
+ *
+ * These APIs intentionally provide very weak consistency semantics; consumers of these APIs should
+ * be prepared to handle empty / missing information.  For example, a job's stage ids may be known
+ * but the status API may not have any information about the details of those stages, so
+ * `getStageInfo` could potentially return `null` for a valid stage id.
+ *
+ * To limit memory usage, these APIs only provide information on recent jobs / stages.  These APIs
+ * will provide information for the last `spark.ui.retainedStages` stages and
+ * `spark.ui.retainedJobs` jobs.
+ *
+ * NOTE: this class's constructor should be considered private and may be subject to change.
+ */
+class JavaSparkStatusTracker private[spark] (sc: SparkContext) {
+
+  /**
+   * Return a list of all known jobs in a particular job group.  If `jobGroup` is `null`, then
+   * returns all known jobs that are not associated with a job group.
+   *
+   * The returned list may contain running, failed, and completed jobs, and may vary across
+   * invocations of this method.  This method does not guarantee the order of the elements in
+   * its result.
+   */
+  def getJobIdsForGroup(jobGroup: String): Array[Int] = sc.statusTracker.getJobIdsForGroup(jobGroup)
+
+  /**
+   * Returns an array containing the ids of all active stages.
+   *
+   * This method does not guarantee the order of the elements in its result.
+   */
+  def getActiveStageIds(): Array[Int] = sc.statusTracker.getActiveStageIds()
+
+  /**
+   * Returns an array containing the ids of all active jobs.
+   *
+   * This method does not guarantee the order of the elements in its result.
+   */
+  def getActiveJobIds(): Array[Int] = sc.statusTracker.getActiveJobIds()
+
+  /**
+   * Returns job information, or `null` if the job info could not be found or was garbage collected.
+   */
+  def getJobInfo(jobId: Int): SparkJobInfo = sc.statusTracker.getJobInfo(jobId).orNull
+
+  /**
+   * Returns stage information, or `null` if the stage info could not be found or was
+   * garbage collected.
+   */
+  def getStageInfo(stageId: Int): SparkStageInfo = sc.statusTracker.getStageInfo(stageId).orNull
+}
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala b/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala
index b52d0a5028e84..71b26737b8c02 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala
@@ -19,7 +19,8 @@ package org.apache.spark.api.java
 
 import com.google.common.base.Optional
 
-import scala.collection.convert.Wrappers.MapWrapper
+import java.{util => ju}
+import scala.collection.mutable
 
 private[spark] object JavaUtils {
   def optionToOptional[T](option: Option[T]): Optional[T] =
@@ -32,7 +33,64 @@ private[spark] object JavaUtils {
   def mapAsSerializableJavaMap[A, B](underlying: collection.Map[A, B]) =
     new SerializableMapWrapper(underlying)
 
+  // Implementation is copied from scala.collection.convert.Wrappers.MapWrapper,
+  // but implements java.io.Serializable. It can't just be subclassed to make it
+  // Serializable since the MapWrapper class has no no-arg constructor. This class
+  // doesn't need a no-arg constructor though.
   class SerializableMapWrapper[A, B](underlying: collection.Map[A, B])
-    extends MapWrapper(underlying) with java.io.Serializable
+    extends ju.AbstractMap[A, B] with java.io.Serializable { self =>
 
+    override def size = underlying.size
+
+    override def get(key: AnyRef): B = try {
+      underlying get key.asInstanceOf[A] match {
+        case None => null.asInstanceOf[B]
+        case Some(v) => v
+      }
+    } catch {
+      case ex: ClassCastException => null.asInstanceOf[B]
+    }
+
+    override def entrySet: ju.Set[ju.Map.Entry[A, B]] = new ju.AbstractSet[ju.Map.Entry[A, B]] {
+      def size = self.size
+
+      def iterator = new ju.Iterator[ju.Map.Entry[A, B]] {
+        val ui = underlying.iterator
+        var prev : Option[A] = None
+
+        def hasNext = ui.hasNext
+
+        def next() = {
+          val (k, v) = ui.next
+          prev = Some(k)
+          new ju.Map.Entry[A, B] {
+            import scala.util.hashing.byteswap32
+            def getKey = k
+            def getValue = v
+            def setValue(v1 : B) = self.put(k, v1)
+            override def hashCode = byteswap32(k.hashCode) + (byteswap32(v.hashCode) << 16)
+            override def equals(other: Any) = other match {
+              case e: ju.Map.Entry[_, _] => k == e.getKey && v == e.getValue
+              case _ => false
+            }
+          }
+        }
+
+        def remove() {
+          prev match {
+            case Some(k) =>
+              underlying match {
+                case mm: mutable.Map[A, _] =>
+                  mm remove k
+                  prev = None
+                case _ =>
+                  throw new UnsupportedOperationException("remove")
+              }
+            case _ =>
+              throw new IllegalStateException("next must be called at least once before remove")
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala b/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala
index 49dc95f349eac..c9181a29d4756 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala
@@ -61,8 +61,7 @@ private[python] object Converter extends Logging {
  * Other objects are passed through without conversion.
  */
 private[python] class WritableToJavaConverter(
-    conf: Broadcast[SerializableWritable[Configuration]],
-    batchSize: Int) extends Converter[Any, Any] {
+    conf: Broadcast[SerializableWritable[Configuration]]) extends Converter[Any, Any] {
 
   /**
    * Converts a [[org.apache.hadoop.io.Writable]] to the underlying primitive, String or
@@ -94,8 +93,7 @@ private[python] class WritableToJavaConverter(
           map.put(convertWritable(k), convertWritable(v))
         }
         map
-      case w: Writable =>
-        if (batchSize > 1) WritableUtils.clone(w, conf.value.value) else w
+      case w: Writable => WritableUtils.clone(w, conf.value.value)
       case other => other
     }
   }
@@ -140,6 +138,11 @@ private[python] class JavaToWritableConverter extends Converter[Any, Writable] {
           mapWritable.put(convertToWritable(k), convertToWritable(v))
         }
         mapWritable
+      case array: Array[Any] => {
+        val arrayWriteable = new ArrayWritable(classOf[Writable])
+        arrayWriteable.set(array.map(convertToWritable(_)))
+        arrayWriteable
+      }
       case other => throw new SparkException(
         s"Data of type ${other.getClass.getName} cannot be used")
     }
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 29ca751519abd..b89effc16d36d 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -19,22 +19,21 @@ package org.apache.spark.api.python
 
 import java.io._
 import java.net._
-import java.nio.charset.Charset
-import java.util.{List => JList, ArrayList => JArrayList, Map => JMap, Collections}
+import java.util.{List => JList, ArrayList => JArrayList, Map => JMap, UUID, Collections}
+
+import org.apache.spark.input.PortableDataStream
 
 import scala.collection.JavaConversions._
-import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.language.existentials
 
-import net.razorvine.pickle.{Pickler, Unpickler}
+import com.google.common.base.Charsets.UTF_8
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.io.compress.CompressionCodec
 import org.apache.hadoop.mapred.{InputFormat, OutputFormat, JobConf}
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat, OutputFormat => NewOutputFormat}
 import org.apache.spark._
-import org.apache.spark.SparkContext._
 import org.apache.spark.api.java.{JavaSparkContext, JavaPairRDD, JavaRDD}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
@@ -47,7 +46,7 @@ private[spark] class PythonRDD(
     pythonIncludes: JList[String],
     preservePartitoning: Boolean,
     pythonExec: String,
-    broadcastVars: JList[Broadcast[Array[Byte]]],
+    broadcastVars: JList[Broadcast[PythonBroadcast]],
     accumulator: Accumulator[JList[Array[Byte]]])
   extends RDD[Array[Byte]](parent) {
 
@@ -68,16 +67,16 @@ private[spark] class PythonRDD(
       envVars += ("SPARK_REUSE_WORKER" -> "1")
     }
     val worker: Socket = env.createPythonWorker(pythonExec, envVars.toMap)
+    // Whether is the worker released into idle pool
+    @volatile var released = false
 
     // Start a thread to feed the process input from our parent's iterator
     val writerThread = new WriterThread(env, worker, split, context)
 
-    var complete_cleanly = false
     context.addTaskCompletionListener { context =>
       writerThread.shutdownOnTaskCompletion()
-      if (reuse_worker && complete_cleanly) {
-        env.releasePythonWorker(pythonExec, envVars.toMap, worker)
-      } else {
+      writerThread.join()
+      if (!reuse_worker || !released) {
         try {
           worker.close()
         } catch {
@@ -125,15 +124,15 @@ private[spark] class PythonRDD(
                 init, finish))
               val memoryBytesSpilled = stream.readLong()
               val diskBytesSpilled = stream.readLong()
-              context.taskMetrics.memoryBytesSpilled += memoryBytesSpilled
-              context.taskMetrics.diskBytesSpilled += diskBytesSpilled
+              context.taskMetrics.incMemoryBytesSpilled(memoryBytesSpilled)
+              context.taskMetrics.incDiskBytesSpilled(diskBytesSpilled)
               read()
             case SpecialLengths.PYTHON_EXCEPTION_THROWN =>
               // Signals that an exception has been thrown in python
               val exLength = stream.readInt()
               val obj = new Array[Byte](exLength)
               stream.readFully(obj)
-              throw new PythonException(new String(obj, "utf-8"),
+              throw new PythonException(new String(obj, UTF_8),
                 writerThread.exception.getOrElse(null))
             case SpecialLengths.END_OF_DATA_SECTION =>
               // We've finished the data section of the output, but we can still
@@ -145,7 +144,13 @@ private[spark] class PythonRDD(
                 stream.readFully(update)
                 accumulator += Collections.singletonList(update)
               }
-               complete_cleanly = true
+              // Check whether the worker is ready to be re-used.
+              if (stream.readInt() == SpecialLengths.END_OF_STREAM) {
+                if (reuse_worker) {
+                  env.releasePythonWorker(pythonExec, envVars.toMap, worker)
+                  released = true
+                }
+              }
               null
           }
         } catch {
@@ -154,6 +159,10 @@ private[spark] class PythonRDD(
             logDebug("Exception thrown after task interruption", e)
             throw new TaskKilledException
 
+          case e: Exception if env.isStopped =>
+            logDebug("Exception thrown after context is stopped", e)
+            null  // exit silently
+
           case e: Exception if writerThread.exception.isDefined =>
             logError("Python worker exited unexpectedly (crashed)", e)
             logError("This may have been caused by a prior exception:", writerThread.exception.get)
@@ -223,8 +232,7 @@ private[spark] class PythonRDD(
           if (!oldBids.contains(broadcast.id)) {
             // send new broadcast
             dataOut.writeLong(broadcast.id)
-            dataOut.writeInt(broadcast.value.length)
-            dataOut.write(broadcast.value)
+            PythonRDD.writeUTF(broadcast.value.path, dataOut)
             oldBids.add(broadcast.id)
           }
         }
@@ -235,6 +243,7 @@ private[spark] class PythonRDD(
         // Data values
         PythonRDD.writeIteratorToStream(firstParent.iterator(split, context), dataOut)
         dataOut.writeInt(SpecialLengths.END_OF_DATA_SECTION)
+        dataOut.writeInt(SpecialLengths.END_OF_STREAM)
         dataOut.flush()
       } catch {
         case e: Exception if context.isCompleted || context.isInterrupted =>
@@ -306,10 +315,11 @@ private object SpecialLengths {
   val END_OF_DATA_SECTION = -1
   val PYTHON_EXCEPTION_THROWN = -2
   val TIMING_DATA = -3
+  val END_OF_STREAM = -4
+  val NULL = -5
 }
 
 private[spark] object PythonRDD extends Logging {
-  val UTF8 = Charset.forName("UTF-8")
 
   // remember the broadcasts sent to each worker
   private val workerBroadcasts = new mutable.WeakHashMap[Socket, mutable.Set[Long]]()
@@ -360,56 +370,30 @@ private[spark] object PythonRDD extends Logging {
     }
   }
 
-  def readBroadcastFromFile(sc: JavaSparkContext, filename: String): Broadcast[Array[Byte]] = {
-    val file = new DataInputStream(new FileInputStream(filename))
-    try {
-      val length = file.readInt()
-      val obj = new Array[Byte](length)
-      file.readFully(obj)
-      sc.broadcast(obj)
-    } finally {
-      file.close()
-    }
+  def readBroadcastFromFile(sc: JavaSparkContext, path: String): Broadcast[PythonBroadcast] = {
+    sc.broadcast(new PythonBroadcast(path))
   }
 
   def writeIteratorToStream[T](iter: Iterator[T], dataOut: DataOutputStream) {
-    // The right way to implement this would be to use TypeTags to get the full
-    // type of T.  Since I don't want to introduce breaking changes throughout the
-    // entire Spark API, I have to use this hacky approach:
-    if (iter.hasNext) {
-      val first = iter.next()
-      val newIter = Seq(first).iterator ++ iter
-      first match {
-        case arr: Array[Byte] =>
-          newIter.asInstanceOf[Iterator[Array[Byte]]].foreach { bytes =>
-            dataOut.writeInt(bytes.length)
-            dataOut.write(bytes)
-          }
-        case string: String =>
-          newIter.asInstanceOf[Iterator[String]].foreach { str =>
-            writeUTF(str, dataOut)
-          }
-        case pair: Tuple2[_, _] =>
-          pair._1 match {
-            case bytePair: Array[Byte] =>
-              newIter.asInstanceOf[Iterator[Tuple2[Array[Byte], Array[Byte]]]].foreach { pair =>
-                dataOut.writeInt(pair._1.length)
-                dataOut.write(pair._1)
-                dataOut.writeInt(pair._2.length)
-                dataOut.write(pair._2)
-              }
-            case stringPair: String =>
-              newIter.asInstanceOf[Iterator[Tuple2[String, String]]].foreach { pair =>
-                writeUTF(pair._1, dataOut)
-                writeUTF(pair._2, dataOut)
-              }
-            case other =>
-              throw new SparkException("Unexpected Tuple2 element type " + pair._1.getClass)
-          }
-        case other =>
-          throw new SparkException("Unexpected element type " + first.getClass)
-      }
+
+    def write(obj: Any): Unit = obj match {
+      case null =>
+        dataOut.writeInt(SpecialLengths.NULL)
+      case arr: Array[Byte] =>
+        dataOut.writeInt(arr.length)
+        dataOut.write(arr)
+      case str: String =>
+        writeUTF(str, dataOut)
+      case stream: PortableDataStream =>
+        write(stream.toArray())
+      case (key, value) =>
+        write(key)
+        write(value)
+      case other =>
+        throw new SparkException("Unexpected element type " + other.getClass)
     }
+
+    iter.foreach(write)
   }
 
   /**
@@ -434,7 +418,7 @@ private[spark] object PythonRDD extends Logging {
     val rdd = sc.sc.sequenceFile[K, V](path, kc, vc, minSplits)
     val confBroadcasted = sc.sc.broadcast(new SerializableWritable(sc.hadoopConfiguration()))
     val converted = convertRDD(rdd, keyConverterClass, valueConverterClass,
-      new WritableToJavaConverter(confBroadcasted, batchSize))
+      new WritableToJavaConverter(confBroadcasted))
     JavaRDD.fromRDD(SerDeUtil.pairRDDToPython(converted, batchSize))
   }
 
@@ -460,7 +444,7 @@ private[spark] object PythonRDD extends Logging {
         Some(path), inputFormatClass, keyClass, valueClass, mergedConf)
     val confBroadcasted = sc.sc.broadcast(new SerializableWritable(mergedConf))
     val converted = convertRDD(rdd, keyConverterClass, valueConverterClass,
-      new WritableToJavaConverter(confBroadcasted, batchSize))
+      new WritableToJavaConverter(confBroadcasted))
     JavaRDD.fromRDD(SerDeUtil.pairRDDToPython(converted, batchSize))
   }
 
@@ -486,7 +470,7 @@ private[spark] object PythonRDD extends Logging {
         None, inputFormatClass, keyClass, valueClass, conf)
     val confBroadcasted = sc.sc.broadcast(new SerializableWritable(conf))
     val converted = convertRDD(rdd, keyConverterClass, valueConverterClass,
-      new WritableToJavaConverter(confBroadcasted, batchSize))
+      new WritableToJavaConverter(confBroadcasted))
     JavaRDD.fromRDD(SerDeUtil.pairRDDToPython(converted, batchSize))
   }
 
@@ -529,7 +513,7 @@ private[spark] object PythonRDD extends Logging {
         Some(path), inputFormatClass, keyClass, valueClass, mergedConf)
     val confBroadcasted = sc.sc.broadcast(new SerializableWritable(mergedConf))
     val converted = convertRDD(rdd, keyConverterClass, valueConverterClass,
-      new WritableToJavaConverter(confBroadcasted, batchSize))
+      new WritableToJavaConverter(confBroadcasted))
     JavaRDD.fromRDD(SerDeUtil.pairRDDToPython(converted, batchSize))
   }
 
@@ -555,7 +539,7 @@ private[spark] object PythonRDD extends Logging {
         None, inputFormatClass, keyClass, valueClass, conf)
     val confBroadcasted = sc.sc.broadcast(new SerializableWritable(conf))
     val converted = convertRDD(rdd, keyConverterClass, valueConverterClass,
-      new WritableToJavaConverter(confBroadcasted, batchSize))
+      new WritableToJavaConverter(confBroadcasted))
     JavaRDD.fromRDD(SerDeUtil.pairRDDToPython(converted, batchSize))
   }
 
@@ -577,7 +561,7 @@ private[spark] object PythonRDD extends Logging {
   }
 
   def writeUTF(str: String, dataOut: DataOutputStream) {
-    val bytes = str.getBytes(UTF8)
+    val bytes = str.getBytes(UTF_8)
     dataOut.writeInt(bytes.length)
     dataOut.write(bytes)
   }
@@ -738,109 +722,11 @@ private[spark] object PythonRDD extends Logging {
       converted.saveAsHadoopDataset(new JobConf(conf))
     }
   }
-
-
-  /**
-   * Convert an RDD of serialized Python dictionaries to Scala Maps (no recursive conversions).
-   */
-  @deprecated("PySpark does not use it anymore", "1.1")
-  def pythonToJavaMap(pyRDD: JavaRDD[Array[Byte]]): JavaRDD[Map[String, _]] = {
-    pyRDD.rdd.mapPartitions { iter =>
-      val unpickle = new Unpickler
-      SerDeUtil.initialize()
-      iter.flatMap { row =>
-        unpickle.loads(row) match {
-          // in case of objects are pickled in batch mode
-          case objs: JArrayList[JMap[String, _] @unchecked] => objs.map(_.toMap)
-          // not in batch mode
-          case obj: JMap[String @unchecked, _] => Seq(obj.toMap)
-        }
-      }
-    }
-  }
-
-  /**
-   * Convert an RDD of serialized Python tuple to Array (no recursive conversions).
-   * It is only used by pyspark.sql.
-   */
-  def pythonToJavaArray(pyRDD: JavaRDD[Array[Byte]], batched: Boolean): JavaRDD[Array[_]] = {
-
-    def toArray(obj: Any): Array[_] = {
-      obj match {
-        case objs: JArrayList[_] =>
-          objs.toArray
-        case obj if obj.getClass.isArray =>
-          obj.asInstanceOf[Array[_]].toArray
-      }
-    }
-
-    pyRDD.rdd.mapPartitions { iter =>
-      val unpickle = new Unpickler
-      iter.flatMap { row =>
-        val obj = unpickle.loads(row)
-        if (batched) {
-          obj.asInstanceOf[JArrayList[_]].map(toArray)
-        } else {
-          Seq(toArray(obj))
-        }
-      }
-    }.toJavaRDD()
-  }
-
-  private[spark] class AutoBatchedPickler(iter: Iterator[Any]) extends Iterator[Array[Byte]] {
-    private val pickle = new Pickler()
-    private var batch = 1
-    private val buffer = new mutable.ArrayBuffer[Any]
-
-    override def hasNext(): Boolean = iter.hasNext
-
-    override def next(): Array[Byte] = {
-      while (iter.hasNext && buffer.length < batch) {
-        buffer += iter.next()
-      }
-      val bytes = pickle.dumps(buffer.toArray)
-      val size = bytes.length
-      // let  1M < size < 10M
-      if (size < 1024 * 1024) {
-        batch *= 2
-      } else if (size > 1024 * 1024 * 10 && batch > 1) {
-        batch /= 2
-      }
-      buffer.clear()
-      bytes
-    }
-  }
-
-  /**
-   * Convert an RDD of Java objects to an RDD of serialized Python objects, that is usable by
-   * PySpark.
-   */
-  def javaToPython(jRDD: JavaRDD[Any]): JavaRDD[Array[Byte]] = {
-    jRDD.rdd.mapPartitions { iter => new AutoBatchedPickler(iter) }
-  }
-
-  /**
-    * Convert an RDD of serialized Python objects to RDD of objects, that is usable by PySpark.
-    */
-  def pythonToJava(pyRDD: JavaRDD[Array[Byte]], batched: Boolean): JavaRDD[Any] = {
-    pyRDD.rdd.mapPartitions { iter =>
-      SerDeUtil.initialize()
-      val unpickle = new Unpickler
-      iter.flatMap { row =>
-        val obj = unpickle.loads(row)
-        if (batched) {
-          obj.asInstanceOf[JArrayList[_]].asScala
-        } else {
-          Seq(obj)
-        }
-      }
-    }.toJavaRDD()
-  }
 }
 
 private
 class BytesToString extends org.apache.spark.api.java.function.Function[Array[Byte], String] {
-  override def call(arr: Array[Byte]) : String = new String(arr, PythonRDD.UTF8)
+  override def call(arr: Array[Byte]) : String = new String(arr, UTF_8)
 }
 
 /**
@@ -895,3 +781,49 @@ private class PythonAccumulatorParam(@transient serverHost: String, serverPort:
     }
   }
 }
+
+/**
+ * An Wrapper for Python Broadcast, which is written into disk by Python. It also will
+ * write the data into disk after deserialization, then Python can read it from disks.
+ */
+private[spark] class PythonBroadcast(@transient var path: String) extends Serializable {
+
+  /**
+   * Read data from disks, then copy it to `out`
+   */
+  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
+    val in = new FileInputStream(new File(path))
+    try {
+      Utils.copyStream(in, out)
+    } finally {
+      in.close()
+    }
+  }
+
+  /**
+   * Write data into disk, using randomly generated name.
+   */
+  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
+    val dir = new File(Utils.getLocalDir(SparkEnv.get.conf))
+    val file = File.createTempFile("broadcast", "", dir)
+    path = file.getAbsolutePath
+    val out = new FileOutputStream(file)
+    try {
+      Utils.copyStream(in, out)
+    } finally {
+      out.close()
+    }
+  }
+
+  /**
+   * Delete the file once the object is GCed.
+   */
+  override def finalize() {
+    if (!path.isEmpty) {
+      val file = new File(path)
+      if (file.exists()) {
+        file.delete()
+      }
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index be5ebfa9219d3..acbaba6791850 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -17,11 +17,14 @@
 
 package org.apache.spark.api.python
 
-import java.io.{File, InputStream, IOException, OutputStream}
+import java.io.{File}
+import java.util.{List => JList}
 
+import scala.collection.JavaConversions._
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.SparkContext
+import org.apache.spark.api.java.{JavaSparkContext, JavaRDD}
 
 private[spark] object PythonUtils {
   /** Get the PYTHONPATH for PySpark, either from SPARK_HOME, if it is set, or from our JAR */
@@ -39,4 +42,15 @@ private[spark] object PythonUtils {
   def mergePythonPaths(paths: String*): String = {
     paths.filter(_ != "").mkString(File.pathSeparator)
   }
+
+  def generateRDDWithNull(sc: JavaSparkContext): JavaRDD[String] = {
+    sc.parallelize(List("a", null, "b"))
+  }
+
+  /**
+   * Convert list of T into seq of T (for calling API with varargs)
+   */
+  def toSeq[T](cols: JList[T]): Seq[T] = {
+    cols.toList.toSeq
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
index ebdc3533e0992..fb52a960e0765 100644
--- a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
@@ -18,8 +18,13 @@
 package org.apache.spark.api.python
 
 import java.nio.ByteOrder
+import java.util.{ArrayList => JArrayList}
+
+import org.apache.spark.api.java.JavaRDD
 
 import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
+import scala.collection.mutable
 import scala.util.Failure
 import scala.util.Try
 
@@ -89,6 +94,76 @@ private[spark] object SerDeUtil extends Logging {
   }
   initialize()
 
+
+  /**
+   * Convert an RDD of Java objects to Array (no recursive conversions).
+   * It is only used by pyspark.sql.
+   */
+  def toJavaArray(jrdd: JavaRDD[Any]): JavaRDD[Array[_]] = {
+    jrdd.rdd.map {
+      case objs: JArrayList[_] =>
+        objs.toArray
+      case obj if obj.getClass.isArray =>
+        obj.asInstanceOf[Array[_]].toArray
+    }.toJavaRDD()
+  }
+
+  /**
+   * Choose batch size based on size of objects
+   */
+  private[spark] class AutoBatchedPickler(iter: Iterator[Any]) extends Iterator[Array[Byte]] {
+    private val pickle = new Pickler()
+    private var batch = 1
+    private val buffer = new mutable.ArrayBuffer[Any]
+
+    override def hasNext: Boolean = iter.hasNext
+
+    override def next(): Array[Byte] = {
+      while (iter.hasNext && buffer.length < batch) {
+        buffer += iter.next()
+      }
+      val bytes = pickle.dumps(buffer.toArray)
+      val size = bytes.length
+      // let  1M < size < 10M
+      if (size < 1024 * 1024) {
+        batch *= 2
+      } else if (size > 1024 * 1024 * 10 && batch > 1) {
+        batch /= 2
+      }
+      buffer.clear()
+      bytes
+    }
+  }
+
+  /**
+   * Convert an RDD of Java objects to an RDD of serialized Python objects, that is usable by
+   * PySpark.
+   */
+  private[spark] def javaToPython(jRDD: JavaRDD[_]): JavaRDD[Array[Byte]] = {
+    jRDD.rdd.mapPartitions { iter => new AutoBatchedPickler(iter) }
+  }
+
+  /**
+   * Convert an RDD of serialized Python objects to RDD of objects, that is usable by PySpark.
+   */
+  def pythonToJava(pyRDD: JavaRDD[Array[Byte]], batched: Boolean): JavaRDD[Any] = {
+    pyRDD.rdd.mapPartitions { iter =>
+      initialize()
+      val unpickle = new Unpickler
+      iter.flatMap { row =>
+        val obj = unpickle.loads(row)
+        if (batched) {
+          obj match {
+            case array: Array[Any] => array.toSeq
+            case _ => obj.asInstanceOf[JArrayList[_]].asScala
+          }
+        } else {
+          Seq(obj)
+        }
+      }
+    }.toJavaRDD()
+  }
+
   private def checkPickle(t: (Any, Any)): (Boolean, Boolean) = {
     val pickle = new Pickler
     val kt = Try {
@@ -127,18 +202,22 @@ private[spark] object SerDeUtil extends Logging {
    * representation is serialized
    */
   def pairRDDToPython(rdd: RDD[(Any, Any)], batchSize: Int): RDD[Array[Byte]] = {
-    val (keyFailed, valueFailed) = checkPickle(rdd.first())
+    val (keyFailed, valueFailed) = rdd.take(1) match {
+      case Array() => (false, false)
+      case Array(first) => checkPickle(first)
+    }
+
     rdd.mapPartitions { iter =>
-      val pickle = new Pickler
       val cleaned = iter.map { case (k, v) =>
         val key = if (keyFailed) k.toString else k
         val value = if (valueFailed) v.toString else v
         Array[Any](key, value)
       }
-      if (batchSize > 1) {
-        cleaned.grouped(batchSize).map(batched => pickle.dumps(seqAsJavaList(batched)))
+      if (batchSize == 0) {
+        new AutoBatchedPickler(cleaned)
       } else {
-        cleaned.map(pickle.dumps(_))
+        val pickle = new Pickler
+        cleaned.grouped(batchSize).map(batched => pickle.dumps(seqAsJavaList(batched)))
       }
     }
   }
@@ -146,36 +225,24 @@ private[spark] object SerDeUtil extends Logging {
   /**
    * Convert an RDD of serialized Python tuple (K, V) to RDD[(K, V)].
    */
-  def pythonToPairRDD[K, V](pyRDD: RDD[Array[Byte]], batchSerialized: Boolean): RDD[(K, V)] = {
+  def pythonToPairRDD[K, V](pyRDD: RDD[Array[Byte]], batched: Boolean): RDD[(K, V)] = {
     def isPair(obj: Any): Boolean = {
-      Option(obj.getClass.getComponentType).map(!_.isPrimitive).getOrElse(false) &&
+      Option(obj.getClass.getComponentType).exists(!_.isPrimitive) &&
         obj.asInstanceOf[Array[_]].length == 2
     }
-    pyRDD.mapPartitions { iter =>
-      initialize()
-      val unpickle = new Unpickler
-      val unpickled =
-        if (batchSerialized) {
-          iter.flatMap { batch =>
-            unpickle.loads(batch) match {
-              case objs: java.util.List[_] => collectionAsScalaIterable(objs)
-              case other => throw new SparkException(
-                s"Unexpected type ${other.getClass.getName} for batch serialized Python RDD")
-            }
-          }
-        } else {
-          iter.map(unpickle.loads(_))
-        }
-      unpickled.map {
-        case obj if isPair(obj) =>
-          // we only accept (K, V)
-          val arr = obj.asInstanceOf[Array[_]]
-          (arr.head.asInstanceOf[K], arr.last.asInstanceOf[V])
-        case other => throw new SparkException(
-          s"RDD element of type ${other.getClass.getName} cannot be used")
-      }
+
+    val rdd = pythonToJava(pyRDD, batched).rdd
+    rdd.take(1) match {
+      case Array(obj) if isPair(obj) =>
+        // we only accept (K, V)
+      case Array() =>
+        // we also accept empty collections
+      case Array(other) => throw new SparkException(
+        s"RDD element of type ${other.getClass.getName} cannot be used")
+    }
+    rdd.map { obj =>
+      val arr = obj.asInstanceOf[Array[_]]
+      (arr.head.asInstanceOf[K], arr.last.asInstanceOf[V])
     }
   }
-
 }
-
diff --git a/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala b/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala
index d11db978b842e..cf289fb3ae39f 100644
--- a/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.api.python
 
 import java.io.{DataOutput, DataInput}
-import java.nio.charset.Charset
+
+import com.google.common.base.Charsets.UTF_8
 
 import org.apache.hadoop.io._
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
@@ -106,7 +107,6 @@ private[python] class WritableToDoubleArrayConverter extends Converter[Any, Arra
  * given directory (probably a temp directory)
  */
 object WriteInputFormatTestDataGenerator {
-  import SparkContext._
 
   def main(args: Array[String]) {
     val path = args(0)
@@ -136,7 +136,7 @@ object WriteInputFormatTestDataGenerator {
     sc.parallelize(intKeys).saveAsSequenceFile(intPath)
     sc.parallelize(intKeys.map{ case (k, v) => (k.toDouble, v) }).saveAsSequenceFile(doublePath)
     sc.parallelize(intKeys.map{ case (k, v) => (k.toString, v) }).saveAsSequenceFile(textPath)
-    sc.parallelize(intKeys.map{ case (k, v) => (k, v.getBytes(Charset.forName("UTF-8"))) }
+    sc.parallelize(intKeys.map{ case (k, v) => (k, v.getBytes(UTF_8)) }
       ).saveAsSequenceFile(bytesPath)
     val bools = Seq((1, true), (2, true), (2, false), (3, true), (2, false), (1, false))
     sc.parallelize(bools).saveAsSequenceFile(boolPath)
@@ -175,11 +175,11 @@ object WriteInputFormatTestDataGenerator {
 
     // Create test data for arbitrary custom writable TestWritable
     val testClass = Seq(
-      ("1", TestWritable("test1", 123, 54.0)),
-      ("2", TestWritable("test2", 456, 8762.3)),
-      ("1", TestWritable("test3", 123, 423.1)),
-      ("3", TestWritable("test56", 456, 423.5)),
-      ("2", TestWritable("test2", 123, 5435.2))
+      ("1", TestWritable("test1", 1, 1.0)),
+      ("2", TestWritable("test2", 2, 2.3)),
+      ("3", TestWritable("test3", 3, 3.1)),
+      ("5", TestWritable("test56", 5, 5.5)),
+      ("4", TestWritable("test4", 4, 4.2))
     )
     val rdd = sc.parallelize(testClass, numSlices = 2).map{ case (k, v) => (new Text(k), v) }
     rdd.saveAsNewAPIHadoopFile(classPath,
diff --git a/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
index 15fd30e65761d..a5ea478f231d7 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
@@ -20,6 +20,8 @@ package org.apache.spark.broadcast
 import java.io.Serializable
 
 import org.apache.spark.SparkException
+import org.apache.spark.Logging
+import org.apache.spark.util.Utils
 
 import scala.reflect.ClassTag
 
@@ -37,7 +39,7 @@ import scala.reflect.ClassTag
  *
  * {{{
  * scala> val broadcastVar = sc.broadcast(Array(1, 2, 3))
- * broadcastVar: spark.Broadcast[Array[Int]] = spark.Broadcast(b5c40191-a864-4c7d-b9bf-d87e1a4e787c)
+ * broadcastVar: org.apache.spark.broadcast.Broadcast[Array[Int]] = Broadcast(0)
  *
  * scala> broadcastVar.value
  * res0: Array[Int] = Array(1, 2, 3)
@@ -52,7 +54,7 @@ import scala.reflect.ClassTag
  * @param id A unique identifier for the broadcast variable.
  * @tparam T Type of the data contained in the broadcast variable.
  */
-abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable {
+abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable with Logging {
 
   /**
    * Flag signifying whether the broadcast variable is valid
@@ -60,6 +62,8 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable {
    */
   @volatile private var _isValid = true
 
+  private var _destroySite = ""
+
   /** Get the broadcasted value. */
   def value: T = {
     assertValid()
@@ -84,13 +88,26 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable {
     doUnpersist(blocking)
   }
 
+
+  /**
+   * Destroy all data and metadata related to this broadcast variable. Use this with caution;
+   * once a broadcast variable has been destroyed, it cannot be used again.
+   * This method blocks until destroy has completed
+   */
+  def destroy() {
+    destroy(blocking = true)
+  }
+
   /**
    * Destroy all data and metadata related to this broadcast variable. Use this with caution;
    * once a broadcast variable has been destroyed, it cannot be used again.
+   * @param blocking Whether to block until destroy has completed
    */
   private[spark] def destroy(blocking: Boolean) {
     assertValid()
     _isValid = false
+    _destroySite = Utils.getCallSite().shortForm
+    logInfo("Destroying %s (from %s)".format(toString, _destroySite))
     doDestroy(blocking)
   }
 
@@ -124,7 +141,8 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable {
   /** Check if this broadcast is valid. If not valid, exception is thrown. */
   protected def assertValid() {
     if (!_isValid) {
-      throw new SparkException("Attempted to use %s after it has been destroyed!".format(toString))
+      throw new SparkException(
+        "Attempted to use %s after it was destroyed (%s) ".format(toString, _destroySite))
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
index 4cd4f4f96fd16..1444c0dd3d2d6 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
@@ -72,13 +72,13 @@ private[spark] class HttpBroadcast[T: ClassTag](
   }
 
   /** Used by the JVM when serializing this object. */
-  private def writeObject(out: ObjectOutputStream) {
+  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
     assertValid()
     out.defaultWriteObject()
   }
 
   /** Used by the JVM when deserializing this object. */
-  private def readObject(in: ObjectInputStream) {
+  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
     in.defaultReadObject()
     HttpBroadcast.synchronized {
       SparkEnv.get.blockManager.getSingle(blockId) match {
@@ -151,9 +151,10 @@ private[broadcast] object HttpBroadcast extends Logging {
   }
 
   private def createServer(conf: SparkConf) {
-    broadcastDir = Utils.createTempDir(Utils.getLocalDir(conf))
+    broadcastDir = Utils.createTempDir(Utils.getLocalDir(conf), "broadcast")
     val broadcastPort = conf.getInt("spark.broadcast.port", 0)
-    server = new HttpServer(broadcastDir, securityManager, broadcastPort, "HTTP broadcast server")
+    server =
+      new HttpServer(conf, broadcastDir, securityManager, broadcastPort, "HTTP broadcast server")
     server.start()
     serverUri = server.uri
     logInfo("Broadcast server started at " + serverUri)
@@ -191,11 +192,14 @@ private[broadcast] object HttpBroadcast extends Logging {
       logDebug("broadcast security enabled")
       val newuri = Utils.constructURIForAuthentication(new URI(url), securityManager)
       uc = newuri.toURL.openConnection()
+      uc.setConnectTimeout(httpReadTimeout)
       uc.setAllowUserInteraction(false)
     } else {
       logDebug("broadcast not using security")
       uc = new URL(url).openConnection()
+      uc.setConnectTimeout(httpReadTimeout)
     }
+    Utils.setupSecureURLConnection(uc, securityManager)
 
     val in = {
       uc.setReadTimeout(httpReadTimeout)
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index 99af2e9608ea7..94142d33369c7 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -28,7 +28,7 @@ import org.apache.spark.{Logging, SparkConf, SparkEnv, SparkException}
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.storage.{BroadcastBlockId, StorageLevel}
-import org.apache.spark.util.ByteBufferInputStream
+import org.apache.spark.util.{ByteBufferInputStream, Utils}
 import org.apache.spark.util.io.ByteArrayChunkOutputStream
 
 /**
@@ -56,11 +56,13 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   extends Broadcast[T](id) with Logging with Serializable {
 
   /**
-   * Value of the broadcast object. On driver, this is set directly by the constructor.
-   * On executors, this is reconstructed by [[readObject]], which builds this value by reading
-   * blocks from the driver and/or other executors.
+   * Value of the broadcast object on executors. This is reconstructed by [[readBroadcastBlock]],
+   * which builds this value by reading blocks from the driver and/or other executors.
+   *
+   * On the driver, if the value is required, it is read lazily from the block manager.
    */
-  @transient private var _value: T = obj
+  @transient private lazy val _value: T = readBroadcastBlock()
+
   /** The compression codec to use, or None if compression is disabled */
   @transient private var compressionCodec: Option[CompressionCodec] = _
   /** Size of each block. Default value is 4MB.  This value is only read by the broadcaster. */
@@ -79,22 +81,24 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   private val broadcastId = BroadcastBlockId(id)
 
   /** Total number of blocks this broadcast variable contains. */
-  private val numBlocks: Int = writeBlocks()
+  private val numBlocks: Int = writeBlocks(obj)
 
-  override protected def getValue() = _value
+  override protected def getValue() = {
+    _value
+  }
 
   /**
    * Divide the object into multiple blocks and put those blocks in the block manager.
-   *
+   * @param value the object to divide
    * @return number of blocks this broadcast variable is divided into
    */
-  private def writeBlocks(): Int = {
+  private def writeBlocks(value: T): Int = {
     // Store a copy of the broadcast variable in the driver so that tasks run on the driver
     // do not create a duplicate copy of the broadcast variable's value.
-    SparkEnv.get.blockManager.putSingle(broadcastId, _value, StorageLevel.MEMORY_AND_DISK,
+    SparkEnv.get.blockManager.putSingle(broadcastId, value, StorageLevel.MEMORY_AND_DISK,
       tellMaster = false)
     val blocks =
-      TorrentBroadcast.blockifyObject(_value, blockSize, SparkEnv.get.serializer, compressionCodec)
+      TorrentBroadcast.blockifyObject(value, blockSize, SparkEnv.get.serializer, compressionCodec)
     blocks.zipWithIndex.foreach { case (block, i) =>
       SparkEnv.get.blockManager.putBytes(
         BroadcastBlockId(id, "piece" + i),
@@ -152,36 +156,35 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   }
 
   /** Used by the JVM when serializing this object. */
-  private def writeObject(out: ObjectOutputStream) {
+  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
     assertValid()
     out.defaultWriteObject()
   }
 
-  /** Used by the JVM when deserializing this object. */
-  private def readObject(in: ObjectInputStream) {
-    in.defaultReadObject()
+  private def readBroadcastBlock(): T = Utils.tryOrIOException {
     TorrentBroadcast.synchronized {
       setConf(SparkEnv.get.conf)
       SparkEnv.get.blockManager.getLocal(broadcastId).map(_.data.next()) match {
         case Some(x) =>
-          _value = x.asInstanceOf[T]
+          x.asInstanceOf[T]
 
         case None =>
           logInfo("Started reading broadcast variable " + id)
-          val start = System.nanoTime()
+          val startTimeMs = System.currentTimeMillis()
           val blocks = readBlocks()
-          val time = (System.nanoTime() - start) / 1e9
-          logInfo("Reading broadcast variable " + id + " took " + time + " s")
+          logInfo("Reading broadcast variable " + id + " took" + Utils.getUsedTimeMs(startTimeMs))
 
-          _value =
-            TorrentBroadcast.unBlockifyObject[T](blocks, SparkEnv.get.serializer, compressionCodec)
+          val obj = TorrentBroadcast.unBlockifyObject[T](
+            blocks, SparkEnv.get.serializer, compressionCodec)
           // Store the merged copy in BlockManager so other tasks on this executor don't
           // need to re-fetch it.
           SparkEnv.get.blockManager.putSingle(
-            broadcastId, _value, StorageLevel.MEMORY_AND_DISK, tellMaster = false)
+            broadcastId, obj, StorageLevel.MEMORY_AND_DISK, tellMaster = false)
+          obj
       }
     }
   }
+
 }
 
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala b/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
index 65a1a8fd7e929..ae55b4ff40b74 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
@@ -28,5 +28,14 @@ private[spark] class ApplicationDescription(
 
   val user = System.getProperty("user.name", "<unknown>")
 
+  def copy(
+      name: String = name,
+      maxCores: Option[Int] = maxCores,
+      memoryPerSlave: Int = memoryPerSlave,
+      command: Command = command,
+      appUiUrl: String = appUiUrl,
+      eventLogDir: Option[String] = eventLogDir): ApplicationDescription =
+    new ApplicationDescription(name, maxCores, memoryPerSlave, command, appUiUrl, eventLogDir)
+
   override def toString: String = "ApplicationDescription(" + name + ")"
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/Client.scala b/core/src/main/scala/org/apache/spark/deploy/Client.scala
index f2687ce6b42b4..38b3da0b13756 100644
--- a/core/src/main/scala/org/apache/spark/deploy/Client.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/Client.scala
@@ -39,7 +39,8 @@ private class ClientActor(driverArgs: ClientArguments, conf: SparkConf)
   val timeout = AkkaUtils.askTimeout(conf)
 
   override def preStart() = {
-    masterActor = context.actorSelection(Master.toAkkaUrl(driverArgs.master))
+    masterActor = context.actorSelection(
+      Master.toAkkaUrl(driverArgs.master, AkkaUtils.protocol(context.system)))
 
     context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
 
@@ -160,6 +161,8 @@ object Client {
     val (actorSystem, _) = AkkaUtils.createActorSystem(
       "driverClient", Utils.localHostName(), 0, conf, new SecurityManager(conf))
 
+    // Verify driverArgs.master is a valid url so that we can use it in ClientActor safely
+    Master.toAkkaUrl(driverArgs.master, AkkaUtils.protocol(actorSystem))
     actorSystem.actorOf(Props(classOf[ClientActor], driverArgs, conf))
 
     actorSystem.awaitTermination()
diff --git a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
index 39150deab863c..e5873ce724b9f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
@@ -17,11 +17,13 @@
 
 package org.apache.spark.deploy
 
+import java.net.{URI, URISyntaxException}
+
 import scala.collection.mutable.ListBuffer
 
 import org.apache.log4j.Level
 
-import org.apache.spark.util.MemoryParam
+import org.apache.spark.util.{IntParam, MemoryParam}
 
 /**
  * Command-line parser for the driver client.
@@ -49,8 +51,8 @@ private[spark] class ClientArguments(args: Array[String]) {
   parse(args.toList)
 
   def parse(args: List[String]): Unit = args match {
-    case ("--cores" | "-c") :: value :: tail =>
-      cores = value.toInt
+    case ("--cores" | "-c") :: IntParam(value) :: tail =>
+      cores = value
       parse(tail)
 
     case ("--memory" | "-m") :: MemoryParam(value) :: tail =>
@@ -73,7 +75,8 @@ private[spark] class ClientArguments(args: Array[String]) {
 
       if (!ClientArguments.isValidJarUrl(_jarUrl)) {
         println(s"Jar url '${_jarUrl}' is not in valid format.")
-        println(s"Must be a jar file path in URL format (e.g. hdfs://XX.jar, file://XX.jar)")
+        println(s"Must be a jar file path in URL format " +
+          "(e.g. hdfs://host:port/XX.jar, file:///XX.jar)")
         printUsageAndExit(-1)
       }
 
@@ -114,5 +117,12 @@ private[spark] class ClientArguments(args: Array[String]) {
 }
 
 object ClientArguments {
-  def isValidJarUrl(s: String): Boolean = s.matches("(.+):(.+)jar")
+  def isValidJarUrl(s: String): Boolean = {
+    try {
+      val uri = new URI(s)
+      uri.getScheme != null && uri.getPath != null && uri.getPath.endsWith(".jar")
+    } catch {
+      case _: URISyntaxException => false
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
index b9dd8557ee904..243d8edb72ed3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
@@ -88,10 +88,14 @@ private[deploy] object DeployMessages {
 
   case class KillDriver(driverId: String) extends DeployMessage
 
+  case class ApplicationFinished(id: String)
+
   // Worker internal
 
   case object WorkDirCleanup      // Sent to Worker actor periodically for cleaning up app folders
 
+  case object ReregisterWithMaster // used when a worker attempts to reconnect to a master
+
   // AppClient to Master
 
   case class RegisterApplication(appDescription: ApplicationDescription)
@@ -173,4 +177,5 @@ private[deploy] object DeployMessages {
   // Liveness checks in various places
 
   case object SendHeartbeat
+
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/DriverDescription.scala b/core/src/main/scala/org/apache/spark/deploy/DriverDescription.scala
index 58c95dc4f9116..b056a19ce6598 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DriverDescription.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DriverDescription.scala
@@ -25,5 +25,13 @@ private[spark] class DriverDescription(
     val command: Command)
   extends Serializable {
 
+  def copy(
+      jarUrl: String = jarUrl,
+      mem: Int = mem,
+      cores: Int = cores,
+      supervise: Boolean = supervise,
+      command: Command = command): DriverDescription =
+    new DriverDescription(jarUrl, mem, cores, supervise, command)
+
   override def toString: String = s"DriverDescription (${command.mainClass})"
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
index af94b05ce3847..53e18c4bcec23 100644
--- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
@@ -26,7 +26,7 @@ import org.apache.spark.api.python.PythonUtils
 import org.apache.spark.util.{RedirectThread, Utils}
 
 /**
- * A main class used by spark-submit to launch Python applications. It executes python as a
+ * A main class used to launch Python applications. It executes python as a
  * subprocess and then has it connect back to the JVM to access system properties, etc.
  */
 object PythonRunner {
@@ -87,8 +87,8 @@ object PythonRunner {
     // Strip the URI scheme from the path
     formattedPath =
       new URI(formattedPath).getScheme match {
-        case Utils.windowsDrive(d) if windows => formattedPath
         case null => formattedPath
+        case Utils.windowsDrive(d) if windows => formattedPath
         case _ => new URI(formattedPath).getPath
       }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index fe0ad9ebbca12..d68854214ef06 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -17,15 +17,20 @@
 
 package org.apache.spark.deploy
 
+import java.lang.reflect.Method
 import java.security.PrivilegedExceptionAction
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.fs.FileSystem.Statistics
 import org.apache.hadoop.mapred.JobConf
+import org.apache.hadoop.mapreduce.{JobContext, TaskAttemptContext}
 import org.apache.hadoop.security.Credentials
 import org.apache.hadoop.security.UserGroupInformation
 
 import org.apache.spark.{Logging, SparkContext, SparkConf, SparkException}
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.util.Utils
 
 import scala.collection.JavaConversions._
 
@@ -121,6 +126,71 @@ class SparkHadoopUtil extends Logging {
     UserGroupInformation.loginUserFromKeytab(principalName, keytabFilename)
   }
 
+  /**
+   * Returns a function that can be called to find Hadoop FileSystem bytes read. If
+   * getFSBytesReadOnThreadCallback is called from thread r at time t, the returned callback will
+   * return the bytes read on r since t.  Reflection is required because thread-level FileSystem
+   * statistics are only available as of Hadoop 2.5 (see HADOOP-10688).
+   * Returns None if the required method can't be found.
+   */
+  private[spark] def getFSBytesReadOnThreadCallback(): Option[() => Long] = {
+    try {
+      val threadStats = getFileSystemThreadStatistics()
+      val getBytesReadMethod = getFileSystemThreadStatisticsMethod("getBytesRead")
+      val f = () => threadStats.map(getBytesReadMethod.invoke(_).asInstanceOf[Long]).sum
+      val baselineBytesRead = f()
+      Some(() => f() - baselineBytesRead)
+    } catch {
+      case e @ (_: NoSuchMethodException | _: ClassNotFoundException) => {
+        logDebug("Couldn't find method for retrieving thread-level FileSystem input data", e)
+        None
+      }
+    }
+  }
+
+  /**
+   * Returns a function that can be called to find Hadoop FileSystem bytes written. If
+   * getFSBytesWrittenOnThreadCallback is called from thread r at time t, the returned callback will
+   * return the bytes written on r since t.  Reflection is required because thread-level FileSystem
+   * statistics are only available as of Hadoop 2.5 (see HADOOP-10688).
+   * Returns None if the required method can't be found.
+   */
+  private[spark] def getFSBytesWrittenOnThreadCallback(): Option[() => Long] = {
+    try {
+      val threadStats = getFileSystemThreadStatistics()
+      val getBytesWrittenMethod = getFileSystemThreadStatisticsMethod("getBytesWritten")
+      val f = () => threadStats.map(getBytesWrittenMethod.invoke(_).asInstanceOf[Long]).sum
+      val baselineBytesWritten = f()
+      Some(() => f() - baselineBytesWritten)
+    } catch {
+      case e @ (_: NoSuchMethodException | _: ClassNotFoundException) => {
+        logDebug("Couldn't find method for retrieving thread-level FileSystem output data", e)
+        None
+      }
+    }
+  }
+
+  private def getFileSystemThreadStatistics(): Seq[AnyRef] = {
+    val stats = FileSystem.getAllStatistics()
+    stats.map(Utils.invoke(classOf[Statistics], _, "getThreadStatistics"))
+  }
+
+  private def getFileSystemThreadStatisticsMethod(methodName: String): Method = {
+    val statisticsDataClass =
+      Class.forName("org.apache.hadoop.fs.FileSystem$Statistics$StatisticsData")
+    statisticsDataClass.getDeclaredMethod(methodName)
+  }
+
+  /**
+   * Using reflection to get the Configuration from JobContext/TaskAttemptContext. If we directly
+   * call `JobContext/TaskAttemptContext.getConfiguration`, it will generate different byte codes
+   * for Hadoop 1.+ and Hadoop 2.+ because JobContext/TaskAttemptContext is class in Hadoop 1.+
+   * while it's interface in Hadoop 2.+.
+   */
+  def getConfigurationFromJobContext(context: JobContext): Configuration = {
+    val method = context.getClass.getMethod("getConfiguration")
+    method.invoke(context).asInstanceOf[Configuration]
+  }
 }
 
 object SparkHadoopUtil {
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index f97bf67fa5a3b..8bbfcd2997dc6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -23,6 +23,19 @@ import java.net.URL
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
 
+import org.apache.hadoop.fs.Path
+
+import org.apache.ivy.Ivy
+import org.apache.ivy.core.LogOptions
+import org.apache.ivy.core.module.descriptor.{DefaultExcludeRule, DefaultDependencyDescriptor, DefaultModuleDescriptor}
+import org.apache.ivy.core.module.id.{ModuleId, ArtifactId, ModuleRevisionId}
+import org.apache.ivy.core.report.ResolveReport
+import org.apache.ivy.core.resolve.{IvyNode, ResolveOptions}
+import org.apache.ivy.core.retrieve.RetrieveOptions
+import org.apache.ivy.core.settings.IvySettings
+import org.apache.ivy.plugins.matcher.GlobPatternMatcher
+import org.apache.ivy.plugins.resolver.{ChainResolver, IBiblioResolver}
+
 import org.apache.spark.executor.ExecutorURLClassLoader
 import org.apache.spark.util.Utils
 
@@ -134,19 +147,38 @@ object SparkSubmit {
       }
     }
 
+    val isYarnCluster = clusterManager == YARN && deployMode == CLUSTER
+
+    // Require all python files to be local, so we can add them to the PYTHONPATH
+    // In YARN cluster mode, python files are distributed as regular files, which can be non-local
+    if (args.isPython && !isYarnCluster) {
+      if (Utils.nonLocalPaths(args.primaryResource).nonEmpty) {
+        printErrorAndExit(s"Only local python files are supported: $args.primaryResource")
+      }
+      val nonLocalPyFiles = Utils.nonLocalPaths(args.pyFiles).mkString(",")
+      if (nonLocalPyFiles.nonEmpty) {
+        printErrorAndExit(s"Only local additional python files are supported: $nonLocalPyFiles")
+      }
+    }
+
     // The following modes are not supported or applicable
     (clusterManager, deployMode) match {
       case (MESOS, CLUSTER) =>
         printErrorAndExit("Cluster deploy mode is currently not supported for Mesos clusters.")
-      case (_, CLUSTER) if args.isPython =>
-        printErrorAndExit("Cluster deploy mode is currently not supported for python applications.")
+      case (STANDALONE, CLUSTER) if args.isPython =>
+        printErrorAndExit("Cluster deploy mode is currently not supported for python " +
+          "applications on standalone clusters.")
       case (_, CLUSTER) if isShell(args.primaryResource) =>
         printErrorAndExit("Cluster deploy mode is not applicable to Spark shells.")
+      case (_, CLUSTER) if isSqlShell(args.mainClass) =>
+        printErrorAndExit("Cluster deploy mode is not applicable to Spark SQL shell.")
+      case (_, CLUSTER) if isThriftServer(args.mainClass) =>
+        printErrorAndExit("Cluster deploy mode is not applicable to Spark Thrift server.")
       case _ =>
     }
 
     // If we're running a python app, set the main class to our specific python runner
-    if (args.isPython) {
+    if (args.isPython && deployMode == CLIENT) {
       if (args.primaryResource == PYSPARK_SHELL) {
         args.mainClass = "py4j.GatewayServer"
         args.childArgs = ArrayBuffer("--die-on-broken-pipe", "0")
@@ -158,13 +190,33 @@ object SparkSubmit {
         args.files = mergeFileLists(args.files, args.primaryResource)
       }
       args.files = mergeFileLists(args.files, args.pyFiles)
-      // Format python file paths properly before adding them to the PYTHONPATH
-      sysProps("spark.submit.pyFiles") = PythonRunner.formatPaths(args.pyFiles).mkString(",")
+      if (args.pyFiles != null) {
+        sysProps("spark.submit.pyFiles") = args.pyFiles
+      }
+    }
+
+    // In yarn-cluster mode for a python app, add primary resource and pyFiles to files
+    // that can be distributed with the job
+    if (args.isPython && isYarnCluster) {
+      args.files = mergeFileLists(args.files, args.primaryResource)
+      args.files = mergeFileLists(args.files, args.pyFiles)
     }
 
     // Special flag to avoid deprecation warnings at the client
     sysProps("SPARK_SUBMIT") = "true"
 
+    // Resolve maven dependencies if there are any and add classpath to jars
+    val resolvedMavenCoordinates =
+      SparkSubmitUtils.resolveMavenCoordinates(
+        args.packages, Option(args.repositories), Option(args.ivyRepoPath))
+    if (!resolvedMavenCoordinates.trim.isEmpty) {
+      if (args.jars == null || args.jars.trim.isEmpty) {
+        args.jars = resolvedMavenCoordinates
+      } else {
+        args.jars += s",$resolvedMavenCoordinates"
+      }
+    }
+
     // A list of rules to map each argument to system properties or command-line options in
     // each deploy mode; we iterate through these below
     val options = List[OptionAssigner](
@@ -173,6 +225,7 @@ object SparkSubmit {
       OptionAssigner(args.master, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.master"),
       OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.app.name"),
       OptionAssigner(args.jars, ALL_CLUSTER_MGRS, CLIENT, sysProp = "spark.jars"),
+      OptionAssigner(args.ivyRepoPath, ALL_CLUSTER_MGRS, CLIENT, sysProp = "spark.jars.ivy"),
       OptionAssigner(args.driverMemory, ALL_CLUSTER_MGRS, CLIENT,
         sysProp = "spark.driver.memory"),
       OptionAssigner(args.driverExtraClassPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
@@ -184,6 +237,7 @@ object SparkSubmit {
 
       // Standalone cluster only
       OptionAssigner(args.jars, STANDALONE, CLUSTER, sysProp = "spark.jars"),
+      OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, sysProp = "spark.jars.ivy"),
       OptionAssigner(args.driverMemory, STANDALONE, CLUSTER, clOption = "--memory"),
       OptionAssigner(args.driverCores, STANDALONE, CLUSTER, clOption = "--cores"),
 
@@ -197,6 +251,7 @@ object SparkSubmit {
       // Yarn cluster only
       OptionAssigner(args.name, YARN, CLUSTER, clOption = "--name"),
       OptionAssigner(args.driverMemory, YARN, CLUSTER, clOption = "--driver-memory"),
+      OptionAssigner(args.driverCores, YARN, CLUSTER, clOption = "--driver-cores"),
       OptionAssigner(args.queue, YARN, CLUSTER, clOption = "--queue"),
       OptionAssigner(args.numExecutors, YARN, CLUSTER, clOption = "--num-executors"),
       OptionAssigner(args.executorMemory, YARN, CLUSTER, clOption = "--executor-memory"),
@@ -239,7 +294,6 @@ object SparkSubmit {
     // Add the application jar automatically so the user doesn't have to call sc.addJar
     // For YARN cluster mode, the jar is already distributed on each node as "app.jar"
     // For python files, the primary resource is already distributed as a regular file
-    val isYarnCluster = clusterManager == YARN && deployMode == CLUSTER
     if (!isYarnCluster && !args.isPython) {
       var jars = sysProps.get("spark.jars").map(x => x.split(",").toSeq).getOrElse(Seq.empty)
       if (isUserJar(args.primaryResource)) {
@@ -264,24 +318,58 @@ object SparkSubmit {
     // In yarn-cluster mode, use yarn.Client as a wrapper around the user class
     if (isYarnCluster) {
       childMainClass = "org.apache.spark.deploy.yarn.Client"
-      if (args.primaryResource != SPARK_INTERNAL) {
-        childArgs += ("--jar", args.primaryResource)
+      if (args.isPython) {
+        val mainPyFile = new Path(args.primaryResource).getName
+        childArgs += ("--primary-py-file", mainPyFile)
+        if (args.pyFiles != null) {
+          // These files will be distributed to each machine's working directory, so strip the
+          // path prefix
+          val pyFilesNames = args.pyFiles.split(",").map(p => (new Path(p)).getName).mkString(",")
+          childArgs += ("--py-files", pyFilesNames)
+        }
+        childArgs += ("--class", "org.apache.spark.deploy.PythonRunner")
+      } else {
+        if (args.primaryResource != SPARK_INTERNAL) {
+          childArgs += ("--jar", args.primaryResource)
+        }
+        childArgs += ("--class", args.mainClass)
       }
-      childArgs += ("--class", args.mainClass)
       if (args.childArgs != null) {
         args.childArgs.foreach { arg => childArgs += ("--arg", arg) }
       }
     }
 
-    // Properties given with --conf are superceded by other options, but take precedence over
-    // properties in the defaults file.
+    // Load any properties specified through --conf and the default properties file
     for ((k, v) <- args.sparkProperties) {
       sysProps.getOrElseUpdate(k, v)
     }
 
-    // Read from default spark properties, if any
-    for ((k, v) <- args.defaultSparkProperties) {
-      sysProps.getOrElseUpdate(k, v)
+    // Ignore invalid spark.driver.host in cluster modes.
+    if (deployMode == CLUSTER) {
+      sysProps -= ("spark.driver.host")
+    }
+
+    // Resolve paths in certain spark properties
+    val pathConfigs = Seq(
+      "spark.jars",
+      "spark.files",
+      "spark.yarn.jar",
+      "spark.yarn.dist.files",
+      "spark.yarn.dist.archives")
+    pathConfigs.foreach { config =>
+      // Replace old URIs with resolved URIs, if they exist
+      sysProps.get(config).foreach { oldValue =>
+        sysProps(config) = Utils.resolveURIs(oldValue)
+      }
+    }
+
+    // Resolve and format python file paths properly before adding them to the PYTHONPATH.
+    // The resolving part is redundant in the case of --py-files, but necessary if the user
+    // explicitly sets `spark.submit.pyFiles` in his/her default properties file.
+    sysProps.get("spark.submit.pyFiles").foreach { pyFiles =>
+      val resolvedPyFiles = Utils.resolveURIs(pyFiles)
+      val formattedPyFiles = PythonRunner.formatPaths(resolvedPyFiles).mkString(",")
+      sysProps("spark.submit.pyFiles") = formattedPyFiles
     }
 
     (childArgs, childClasspath, sysProps, childMainClass)
@@ -321,12 +409,17 @@ object SparkSubmit {
       case e: ClassNotFoundException =>
         e.printStackTrace(printStream)
         if (childMainClass.contains("thriftserver")) {
-          println(s"Failed to load main class $childMainClass.")
-          println("You need to build Spark with -Phive.")
+          printStream.println(s"Failed to load main class $childMainClass.")
+          printStream.println("You need to build Spark with -Phive and -Phive-thriftserver.")
         }
         System.exit(CLASS_NOT_FOUND_EXIT_STATUS)
     }
 
+    // SPARK-4170
+    if (classOf[scala.App].isAssignableFrom(mainClass)) {
+      printWarning("Subclasses of scala.App may not work correctly. Use a main() method instead.")
+    }
+
     val mainMethod = mainClass.getMethod("main", new Array[String](0).getClass)
     if (!Modifier.isStatic(mainMethod.getModifiers)) {
       throw new IllegalStateException("The main method in the given main class must be static")
@@ -370,6 +463,20 @@ object SparkSubmit {
     primaryResource == SPARK_SHELL || primaryResource == PYSPARK_SHELL
   }
 
+  /**
+   * Return whether the given main class represents a sql shell.
+   */
+  private[spark] def isSqlShell(mainClass: String): Boolean = {
+    mainClass == "org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
+  }
+
+  /**
+   * Return whether the given main class represents a thrift server.
+   */
+  private[spark] def isThriftServer(mainClass: String): Boolean = {
+    mainClass == "org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"
+  }
+
   /**
    * Return whether the given primary resource requires running python.
    */
@@ -393,6 +500,194 @@ object SparkSubmit {
   }
 }
 
+/** Provides utility functions to be used inside SparkSubmit. */
+private[spark] object SparkSubmitUtils {
+
+  // Exposed for testing
+  private[spark] var printStream = SparkSubmit.printStream
+
+  /**
+   * Represents a Maven Coordinate
+   * @param groupId the groupId of the coordinate
+   * @param artifactId the artifactId of the coordinate
+   * @param version the version of the coordinate
+   */
+  private[spark] case class MavenCoordinate(groupId: String, artifactId: String, version: String)
+
+/**
+ * Extracts maven coordinates from a comma-delimited string
+ * @param coordinates Comma-delimited string of maven coordinates
+ * @return Sequence of Maven coordinates
+ */
+  private[spark] def extractMavenCoordinates(coordinates: String): Seq[MavenCoordinate] = {
+    coordinates.split(",").map { p =>
+      val splits = p.split(":")
+      require(splits.length == 3, s"Provided Maven Coordinates must be in the form " +
+        s"'groupId:artifactId:version'. The coordinate provided is: $p")
+      require(splits(0) != null && splits(0).trim.nonEmpty, s"The groupId cannot be null or " +
+        s"be whitespace. The groupId provided is: ${splits(0)}")
+      require(splits(1) != null && splits(1).trim.nonEmpty, s"The artifactId cannot be null or " +
+        s"be whitespace. The artifactId provided is: ${splits(1)}")
+      require(splits(2) != null && splits(2).trim.nonEmpty, s"The version cannot be null or " +
+        s"be whitespace. The version provided is: ${splits(2)}")
+      new MavenCoordinate(splits(0), splits(1), splits(2))
+    }
+  }
+
+  /**
+   * Extracts maven coordinates from a comma-delimited string
+   * @param remoteRepos Comma-delimited string of remote repositories
+   * @return A ChainResolver used by Ivy to search for and resolve dependencies.
+   */
+  private[spark] def createRepoResolvers(remoteRepos: Option[String]): ChainResolver = {
+    // We need a chain resolver if we want to check multiple repositories
+    val cr = new ChainResolver
+    cr.setName("list")
+
+    // the biblio resolver resolves POM declared dependencies
+    val br: IBiblioResolver = new IBiblioResolver
+    br.setM2compatible(true)
+    br.setUsepoms(true)
+    br.setName("central")
+    cr.add(br)
+
+    val repositoryList = remoteRepos.getOrElse("")
+    // add any other remote repositories other than maven central
+    if (repositoryList.trim.nonEmpty) {
+      repositoryList.split(",").zipWithIndex.foreach { case (repo, i) =>
+        val brr: IBiblioResolver = new IBiblioResolver
+        brr.setM2compatible(true)
+        brr.setUsepoms(true)
+        brr.setRoot(repo)
+        brr.setName(s"repo-${i + 1}")
+        cr.add(brr)
+        printStream.println(s"$repo added as a remote repository with the name: ${brr.getName}")
+      }
+    }
+    cr
+  }
+
+  /**
+   * Output a comma-delimited list of paths for the downloaded jars to be added to the classpath
+   * (will append to jars in SparkSubmit). The name of the jar is given
+   * after a '!' by Ivy. It also sometimes contains '(bundle)' after '.jar'. Remove that as well.
+   * @param artifacts Sequence of dependencies that were resolved and retrieved
+   * @param cacheDirectory directory where jars are cached
+   * @return a comma-delimited list of paths for the dependencies
+   */
+  private[spark] def resolveDependencyPaths(
+      artifacts: Array[AnyRef],
+      cacheDirectory: File): String = {
+    artifacts.map { artifactInfo =>
+      val artifactString = artifactInfo.toString
+      val jarName = artifactString.drop(artifactString.lastIndexOf("!") + 1)
+      cacheDirectory.getAbsolutePath + File.separator +
+        jarName.substring(0, jarName.lastIndexOf(".jar") + 4)
+    }.mkString(",")
+  }
+
+  /** Adds the given maven coordinates to Ivy's module descriptor. */
+  private[spark] def addDependenciesToIvy(
+      md: DefaultModuleDescriptor,
+      artifacts: Seq[MavenCoordinate],
+      ivyConfName: String): Unit = {
+    artifacts.foreach { mvn =>
+      val ri = ModuleRevisionId.newInstance(mvn.groupId, mvn.artifactId, mvn.version)
+      val dd = new DefaultDependencyDescriptor(ri, false, false)
+      dd.addDependencyConfiguration(ivyConfName, ivyConfName)
+      printStream.println(s"${dd.getDependencyId} added as a dependency")
+      md.addDependency(dd)
+    }
+  }
+
+  /** A nice function to use in tests as well. Values are dummy strings. */
+  private[spark] def getModuleDescriptor = DefaultModuleDescriptor.newDefaultInstance(
+    ModuleRevisionId.newInstance("org.apache.spark", "spark-submit-parent", "1.0"))
+
+  /**
+   * Resolves any dependencies that were supplied through maven coordinates
+   * @param coordinates Comma-delimited string of maven coordinates
+   * @param remoteRepos Comma-delimited string of remote repositories other than maven central
+   * @param ivyPath The path to the local ivy repository
+   * @return The comma-delimited path to the jars of the given maven artifacts including their
+   *         transitive dependencies
+   */
+  private[spark] def resolveMavenCoordinates(
+      coordinates: String,
+      remoteRepos: Option[String],
+      ivyPath: Option[String],
+      isTest: Boolean = false): String = {
+    if (coordinates == null || coordinates.trim.isEmpty) {
+      ""
+    } else {
+      val artifacts = extractMavenCoordinates(coordinates)
+      // Default configuration name for ivy
+      val ivyConfName = "default"
+      // set ivy settings for location of cache
+      val ivySettings: IvySettings = new IvySettings
+      // Directories for caching downloads through ivy and storing the jars when maven coordinates
+      // are supplied to spark-submit
+      val alternateIvyCache = ivyPath.getOrElse("")
+      val packagesDirectory: File =
+        if (alternateIvyCache.trim.isEmpty) {
+          new File(ivySettings.getDefaultIvyUserDir, "jars")
+        } else {
+          ivySettings.setDefaultCache(new File(alternateIvyCache, "cache"))
+          new File(alternateIvyCache, "jars")
+        }
+      printStream.println(
+        s"Ivy Default Cache set to: ${ivySettings.getDefaultCache.getAbsolutePath}")
+      printStream.println(s"The jars for the packages stored in: $packagesDirectory")
+      // create a pattern matcher
+      ivySettings.addMatcher(new GlobPatternMatcher)
+      // create the dependency resolvers
+      val repoResolver = createRepoResolvers(remoteRepos)
+      ivySettings.addResolver(repoResolver)
+      ivySettings.setDefaultResolver(repoResolver.getName)
+
+      val ivy = Ivy.newInstance(ivySettings)
+      // Set resolve options to download transitive dependencies as well
+      val resolveOptions = new ResolveOptions
+      resolveOptions.setTransitive(true)
+      val retrieveOptions = new RetrieveOptions
+      // Turn downloading and logging off for testing
+      if (isTest) {
+        resolveOptions.setDownload(false)
+        resolveOptions.setLog(LogOptions.LOG_QUIET)
+        retrieveOptions.setLog(LogOptions.LOG_QUIET)
+      } else {
+        resolveOptions.setDownload(true)
+      }
+
+      // A Module descriptor must be specified. Entries are dummy strings
+      val md = getModuleDescriptor
+      md.setDefaultConf(ivyConfName)
+
+      // Add an exclusion rule for Spark
+      val sparkArtifacts = new ArtifactId(new ModuleId("org.apache.spark", "*"), "*", "*", "*")
+      val sparkDependencyExcludeRule =
+        new DefaultExcludeRule(sparkArtifacts, ivySettings.getMatcher("glob"), null)
+      sparkDependencyExcludeRule.addConfiguration(ivyConfName)
+
+      // Exclude any Spark dependencies, and add all supplied maven artifacts as dependencies
+      md.addExcludeRule(sparkDependencyExcludeRule)
+      addDependenciesToIvy(md, artifacts, ivyConfName)
+
+      // resolve dependencies
+      val rr: ResolveReport = ivy.resolve(md, resolveOptions)
+      if (rr.hasError) {
+        throw new RuntimeException(rr.getAllProblemMessages.toString)
+      }
+      // retrieve all resolved dependencies
+      ivy.retrieve(rr.getModuleDescriptor.getModuleRevisionId,
+        packagesDirectory.getAbsolutePath + File.separator + "[artifact](-[classifier]).[ext]",
+        retrieveOptions.setConfs(Array(ivyConfName)))
+
+      resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory)
+    }
+  }
+}
+
 /**
  * Provides an indirection layer for passing arguments as system properties or flags to
  * the user's driver program or to downstream launcher tools.
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 72a452e0aefb5..5cadc534f4baa 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -17,9 +17,9 @@
 
 package org.apache.spark.deploy
 
+import java.net.URI
 import java.util.jar.JarFile
 
-import scala.collection.JavaConversions._
 import scala.collection.mutable.{ArrayBuffer, HashMap}
 
 import org.apache.spark.util.Utils
@@ -50,6 +50,9 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
   var name: String = null
   var childArgs: ArrayBuffer[String] = new ArrayBuffer[String]()
   var jars: String = null
+  var packages: String = null
+  var repositories: String = null
+  var ivyRepoPath: String = null
   var verbose: Boolean = false
   var isPython: Boolean = false
   var pyFiles: String = null
@@ -72,57 +75,92 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
     defaultProperties
   }
 
-  // Respect SPARK_*_MEMORY for cluster mode
-  driverMemory = sys.env.get("SPARK_DRIVER_MEMORY").orNull
-  executorMemory = sys.env.get("SPARK_EXECUTOR_MEMORY").orNull
-
+  // Set parameters from command line arguments
   parseOpts(args.toList)
-  mergeSparkProperties()
+  // Populate `sparkProperties` map from properties file
+  mergeDefaultSparkProperties()
+  // Use `sparkProperties` map along with env vars to fill in any missing parameters
+  loadEnvironmentArguments()
+
   checkRequiredArguments()
 
   /**
-   * Fill in any undefined values based on the default properties file or options passed in through
-   * the '--conf' flag.
+   * Merge values from the default properties file with those specified through --conf.
+   * When this is called, `sparkProperties` is already filled with configs from the latter.
    */
-  private def mergeSparkProperties(): Unit = {
+  private def mergeDefaultSparkProperties(): Unit = {
     // Use common defaults file, if not specified by user
     propertiesFile = Option(propertiesFile).getOrElse(Utils.getDefaultPropertiesFile(env))
+    // Honor --conf before the defaults file
+    defaultSparkProperties.foreach { case (k, v) =>
+      if (!sparkProperties.contains(k)) {
+        sparkProperties(k) = v
+      }
+    }
+  }
 
-    val properties = HashMap[String, String]()
-    properties.putAll(defaultSparkProperties)
-    properties.putAll(sparkProperties)
-
-    // Use properties file as fallback for values which have a direct analog to
-    // arguments in this script.
-    master = Option(master).orElse(properties.get("spark.master")).orNull
-    executorMemory = Option(executorMemory).orElse(properties.get("spark.executor.memory")).orNull
-    executorCores = Option(executorCores).orElse(properties.get("spark.executor.cores")).orNull
+  /**
+   * Load arguments from environment variables, Spark properties etc.
+   */
+  private def loadEnvironmentArguments(): Unit = {
+    master = Option(master)
+      .orElse(sparkProperties.get("spark.master"))
+      .orElse(env.get("MASTER"))
+      .orNull
+    driverMemory = Option(driverMemory)
+      .orElse(sparkProperties.get("spark.driver.memory"))
+      .orElse(env.get("SPARK_DRIVER_MEMORY"))
+      .orNull
+    driverCores = Option(driverCores)
+      .orElse(sparkProperties.get("spark.driver.cores"))
+      .orNull
+    executorMemory = Option(executorMemory)
+      .orElse(sparkProperties.get("spark.executor.memory"))
+      .orElse(env.get("SPARK_EXECUTOR_MEMORY"))
+      .orNull
+    executorCores = Option(executorCores)
+      .orElse(sparkProperties.get("spark.executor.cores"))
+      .orNull
     totalExecutorCores = Option(totalExecutorCores)
-      .orElse(properties.get("spark.cores.max"))
+      .orElse(sparkProperties.get("spark.cores.max"))
       .orNull
-    name = Option(name).orElse(properties.get("spark.app.name")).orNull
-    jars = Option(jars).orElse(properties.get("spark.jars")).orNull
-
-    // This supports env vars in older versions of Spark
-    master = Option(master).orElse(env.get("MASTER")).orNull
+    name = Option(name).orElse(sparkProperties.get("spark.app.name")).orNull
+    jars = Option(jars).orElse(sparkProperties.get("spark.jars")).orNull
+    ivyRepoPath = sparkProperties.get("spark.jars.ivy").orNull
     deployMode = Option(deployMode).orElse(env.get("DEPLOY_MODE")).orNull
+    numExecutors = Option(numExecutors)
+      .getOrElse(sparkProperties.get("spark.executor.instances").orNull)
 
     // Try to set main class from JAR if no --class argument is given
     if (mainClass == null && !isPython && primaryResource != null) {
-      try {
-        val jar = new JarFile(primaryResource)
-        // Note that this might still return null if no main-class is set; we catch that later
-        mainClass = jar.getManifest.getMainAttributes.getValue("Main-Class")
-      } catch {
-        case e: Exception =>
-          SparkSubmit.printErrorAndExit("Cannot load main class from JAR: " + primaryResource)
-          return
+      val uri = new URI(primaryResource)
+      val uriScheme = uri.getScheme()
+
+      uriScheme match {
+        case "file" =>
+          try {
+            val jar = new JarFile(uri.getPath)
+            // Note that this might still return null if no main-class is set; we catch that later
+            mainClass = jar.getManifest.getMainAttributes.getValue("Main-Class")
+          } catch {
+            case e: Exception =>
+              SparkSubmit.printErrorAndExit(s"Cannot load main class from JAR $primaryResource")
+          }
+        case _ =>
+          SparkSubmit.printErrorAndExit(
+            s"Cannot load main class from JAR $primaryResource with URI $uriScheme. " +
+            "Please specify a class through --class.")
       }
     }
 
     // Global defaults. These should be keep to minimum to avoid confusing behavior.
     master = Option(master).getOrElse("local[*]")
 
+    // In YARN mode, app name can be set via SPARK_YARN_APP_NAME (see SPARK-5222)
+    if (master.startsWith("yarn")) {
+      name = Option(name).orElse(env.get("SPARK_YARN_APP_NAME")).orNull
+    }
+
     // Set name from main class if not given
     name = Option(name).orElse(Option(mainClass)).orNull
     if (name == null && primaryResource != null) {
@@ -131,7 +169,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
   }
 
   /** Ensure that required fields exists. Call this only once all defaults are loaded. */
-  private def checkRequiredArguments() = {
+  private def checkRequiredArguments(): Unit = {
     if (args.length == 0) {
       printUsageAndExit(-1)
     }
@@ -145,18 +183,6 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
       SparkSubmit.printErrorAndExit("--py-files given but primary resource is not a Python script")
     }
 
-    // Require all python files to be local, so we can add them to the PYTHONPATH
-    if (isPython) {
-      if (Utils.nonLocalPaths(primaryResource).nonEmpty) {
-        SparkSubmit.printErrorAndExit(s"Only local python files are supported: $primaryResource")
-      }
-      val nonLocalPyFiles = Utils.nonLocalPaths(pyFiles).mkString(",")
-      if (nonLocalPyFiles.nonEmpty) {
-        SparkSubmit.printErrorAndExit(
-          s"Only local additional python files are supported: $nonLocalPyFiles")
-      }
-    }
-
     if (master.startsWith("yarn")) {
       val hasHadoopEnv = env.contains("HADOOP_CONF_DIR") || env.contains("YARN_CONF_DIR")
       if (!hasHadoopEnv && !Utils.isTesting) {
@@ -166,7 +192,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
     }
   }
 
-  override def toString =  {
+  override def toString = {
     s"""Parsed arguments:
     |  master                  $master
     |  deployMode              $deployMode
@@ -174,7 +200,6 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
     |  executorCores           $executorCores
     |  totalExecutorCores      $totalExecutorCores
     |  propertiesFile          $propertiesFile
-    |  extraSparkProperties    $sparkProperties
     |  driverMemory            $driverMemory
     |  driverCores             $driverCores
     |  driverExtraClassPath    $driverExtraClassPath
@@ -191,14 +216,20 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
     |  name                    $name
     |  childArgs               [${childArgs.mkString(" ")}]
     |  jars                    $jars
+    |  packages                $packages
+    |  repositories            $repositories
     |  verbose                 $verbose
     |
-    |Default properties from $propertiesFile:
-    |${defaultSparkProperties.mkString("  ", "\n  ", "\n")}
+    |Spark properties used, including those specified through
+    | --conf and those from the properties file $propertiesFile:
+    |${sparkProperties.mkString("  ", "\n  ", "\n")}
     """.stripMargin
   }
 
-  /** Fill in values by parsing user options. */
+  /**
+   * Fill in values by parsing user options.
+   * NOTE: Any changes here must be reflected in YarnClientSchedulerBackend.
+   */
   private def parseOpts(opts: Seq[String]): Unit = {
     val EQ_SEPARATED_OPT="""(--[^=]+)=(.+)""".r
 
@@ -293,6 +324,14 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
         jars = Utils.resolveURIs(value)
         parse(tail)
 
+      case ("--packages") :: value :: tail =>
+        packages = value
+        parse(tail)
+
+      case ("--repositories") :: value :: tail =>
+        repositories = value
+        parse(tail)
+
       case ("--conf" | "-c") :: value :: tail =>
         value.split("=", 2).toSeq match {
           case Seq(k, v) => sparkProperties(k) = v
@@ -327,7 +366,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
     }
   }
 
-  private def printUsageAndExit(exitCode: Int, unknownParam: Any = null) {
+  private def printUsageAndExit(exitCode: Int, unknownParam: Any = null): Unit = {
     val outStream = SparkSubmit.printStream
     if (unknownParam != null) {
       outStream.println("Unknown/unsupported param " + unknownParam)
@@ -343,6 +382,13 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
         |  --name NAME                 A name of your application.
         |  --jars JARS                 Comma-separated list of local jars to include on the driver
         |                              and executor classpaths.
+        |  --packages                  Comma-separated list of maven coordinates of jars to include
+        |                              on the driver and executor classpaths. Will search the local
+        |                              maven repo, then maven central and any additional remote
+        |                              repositories given by --repositories. The format for the
+        |                              coordinates should be groupId:artifactId:version.
+        |  --repositories              Comma-separated list of additional remote repositories to
+        |                              search for the maven coordinates given with --packages.
         |  --py-files PY_FILES         Comma-separated list of .zip, .egg, or .py files to place
         |                              on the PYTHONPATH for Python apps.
         |  --files FILES               Comma-separated list of files to be placed in the working
@@ -372,11 +418,14 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
         |  --total-executor-cores NUM  Total cores for all executors.
         |
         | YARN-only:
+        |  --driver-cores NUM          Number of cores used by the driver, only in cluster mode
+        |                              (Default: 1).
         |  --executor-cores NUM        Number of cores per executor (Default: 1).
         |  --queue QUEUE_NAME          The YARN queue to submit to (Default: "default").
         |  --num-executors NUM         Number of executors to launch (Default: 2).
         |  --archives ARCHIVES         Comma separated list of archives to be extracted into the
-        |                              working directory of each executor.""".stripMargin
+        |                              working directory of each executor.
+      """.stripMargin
     )
     SparkSubmit.exitFn()
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala
index 0125330589da5..2eab9981845e8 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala
@@ -82,17 +82,8 @@ private[spark] object SparkSubmitDriverBootstrapper {
       .orElse(confDriverMemory)
       .getOrElse(defaultDriverMemory)
 
-    val newLibraryPath =
-      if (submitLibraryPath.isDefined) {
-        // SPARK_SUBMIT_LIBRARY_PATH is already captured in JAVA_OPTS
-        ""
-      } else {
-        confLibraryPath.map("-Djava.library.path=" + _).getOrElse("")
-      }
-
     val newClasspath =
       if (submitClasspath.isDefined) {
-        // SPARK_SUBMIT_CLASSPATH is already captured in CLASSPATH
         classpath
       } else {
         classpath + confClasspath.map(sys.props("path.separator") + _).getOrElse("")
@@ -114,7 +105,6 @@ private[spark] object SparkSubmitDriverBootstrapper {
     val command: Seq[String] =
       Seq(runner) ++
       Seq("-cp", newClasspath) ++
-      Seq(newLibraryPath) ++
       filteredJavaOpts ++
       Seq(s"-Xms$newDriverMemory", s"-Xmx$newDriverMemory") ++
       Seq("org.apache.spark.deploy.SparkSubmit") ++
@@ -130,8 +120,25 @@ private[spark] object SparkSubmitDriverBootstrapper {
     // Start the driver JVM
     val filteredCommand = command.filter(_.nonEmpty)
     val builder = new ProcessBuilder(filteredCommand)
+    val env = builder.environment()
+
+    if (submitLibraryPath.isEmpty && confLibraryPath.nonEmpty) {
+      val libraryPaths = confLibraryPath ++ sys.env.get(Utils.libraryPathEnvName)
+      env.put(Utils.libraryPathEnvName, libraryPaths.mkString(sys.props("path.separator")))
+    }
+
     val process = builder.start()
 
+    // If we kill an app while it's running, its sub-process should be killed too.
+    Runtime.getRuntime().addShutdownHook(new Thread() {
+      override def run() = {
+        if (process != null) {
+          process.destroy()
+          process.waitFor()
+        }
+      }
+    })
+
     // Redirect stdout and stderr from the child JVM
     val stdoutThread = new RedirectThread(process.getInputStream, System.out, "redirect stdout")
     val stderrThread = new RedirectThread(process.getErrorStream, System.err, "redirect stderr")
@@ -142,14 +149,16 @@ private[spark] object SparkSubmitDriverBootstrapper {
     // subprocess there already reads directly from our stdin, so we should avoid spawning a
     // thread that contends with the subprocess in reading from System.in.
     val isWindows = Utils.isWindows
-    val isPySparkShell = sys.env.contains("PYSPARK_SHELL")
+    val isSubprocess = sys.env.contains("IS_SUBPROCESS")
     if (!isWindows) {
-      val stdinThread = new RedirectThread(System.in, process.getOutputStream, "redirect stdin")
+      val stdinThread = new RedirectThread(System.in, process.getOutputStream, "redirect stdin",
+        propagateEof = true)
       stdinThread.start()
-      // For the PySpark shell, Spark submit itself runs as a python subprocess, and so this JVM
-      // should terminate on broken pipe, which signals that the parent process has exited. In
-      // Windows, the termination logic for the PySpark shell is handled in java_gateway.py
-      if (isPySparkShell) {
+      // Spark submit (JVM) may run as a subprocess, and so this JVM should terminate on
+      // broken pipe, signaling that the parent process has exited. This is the case if the
+      // application is launched directly from python, as in the PySpark shell. In Windows,
+      // the termination logic is handled in java_gateway.py
+      if (isSubprocess) {
         stdinThread.join()
         process.destroy()
       }
diff --git a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
index 98a93d1fcb2a3..ffe940fbda2fb 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
@@ -26,7 +26,7 @@ import akka.actor._
 import akka.pattern.ask
 import akka.remote.{AssociationErrorEvent, DisassociatedEvent, RemotingLifecycleEvent}
 
-import org.apache.spark.{Logging, SparkConf, SparkException}
+import org.apache.spark.{Logging, SparkConf}
 import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
 import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.deploy.master.Master
@@ -47,6 +47,8 @@ private[spark] class AppClient(
     conf: SparkConf)
   extends Logging {
 
+  val masterAkkaUrls = masterUrls.map(Master.toAkkaUrl(_, AkkaUtils.protocol(actorSystem)))
+
   val REGISTRATION_TIMEOUT = 20.seconds
   val REGISTRATION_RETRIES = 3
 
@@ -75,9 +77,9 @@ private[spark] class AppClient(
     }
 
     def tryRegisterAllMasters() {
-      for (masterUrl <- masterUrls) {
-        logInfo("Connecting to master " + masterUrl + "...")
-        val actor = context.actorSelection(Master.toAkkaUrl(masterUrl))
+      for (masterAkkaUrl <- masterAkkaUrls) {
+        logInfo("Connecting to master " + masterAkkaUrl + "...")
+        val actor = context.actorSelection(masterAkkaUrl)
         actor ! RegisterApplication(appDescription)
       }
     }
@@ -103,20 +105,15 @@ private[spark] class AppClient(
     }
 
     def changeMaster(url: String) {
+      // activeMasterUrl is a valid Spark url since we receive it from master.
       activeMasterUrl = url
-      master = context.actorSelection(Master.toAkkaUrl(activeMasterUrl))
-      masterAddress = activeMasterUrl match {
-        case Master.sparkUrlRegex(host, port) =>
-          Address("akka.tcp", Master.systemName, host, port.toInt)
-        case x =>
-          throw new SparkException("Invalid spark URL: " + x)
-      }
+      master = context.actorSelection(
+        Master.toAkkaUrl(activeMasterUrl, AkkaUtils.protocol(actorSystem)))
+      masterAddress = Master.toAkkaAddress(activeMasterUrl, AkkaUtils.protocol(actorSystem))
     }
 
     private def isPossibleMaster(remoteUrl: Address) = {
-      masterUrls.map(s => Master.toAkkaUrl(s))
-        .map(u => AddressFromURIString(u).hostPort)
-        .contains(remoteUrl.hostPort)
+      masterAkkaUrls.map(AddressFromURIString(_).hostPort).contains(remoteUrl.hostPort)
     }
 
     override def receiveWithLogging = {
@@ -134,6 +131,7 @@ private[spark] class AppClient(
         val fullId = appId + "/" + id
         logInfo("Executor added: %s on %s (%s) with %d cores".format(fullId, workerId, hostPort,
           cores))
+        master ! ExecutorStateChanged(appId, id, ExecutorState.RUNNING, None, None)
         listener.executorAdded(fullId, workerId, hostPort, cores, memory)
 
       case ExecutorUpdated(id, state, message, exitStatus) =>
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
index fbe39b27649f6..553bf3cb945ab 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
@@ -25,7 +25,8 @@ private[spark] case class ApplicationHistoryInfo(
     startTime: Long,
     endTime: Long,
     lastUpdated: Long,
-    sparkUser: String)
+    sparkUser: String,
+    completed: Boolean = false)
 
 private[spark] abstract class ApplicationHistoryProvider {
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 481f6c93c6a8d..0ae45f4ad9130 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -17,34 +17,41 @@
 
 package org.apache.spark.deploy.history
 
-import java.io.FileNotFoundException
+import java.io.{BufferedInputStream, FileNotFoundException, InputStream}
 
 import scala.collection.mutable
 
 import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.fs.permission.AccessControlException
 
 import org.apache.spark.{Logging, SecurityManager, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.io.CompressionCodec
 import org.apache.spark.scheduler._
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.Utils
 
+/**
+ * A class that provides application history from event logs stored in the file system.
+ * This provider checks for new finished applications in the background periodically and
+ * renders the history application UI by parsing the associated event logs.
+ */
 private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHistoryProvider
   with Logging {
 
+  import FsHistoryProvider._
+
   private val NOT_STARTED = "<Not Started>"
 
   // Interval between each check for event log updates
   private val UPDATE_INTERVAL_MS = conf.getInt("spark.history.fs.updateInterval",
     conf.getInt("spark.history.updateInterval", 10)) * 1000
 
-  private val logDir = conf.get("spark.history.fs.logDirectory", null)
-  private val resolvedLogDir = Option(logDir)
-    .map { d => Utils.resolveURI(d) }
-    .getOrElse { throw new IllegalArgumentException("Logging directory must be specified.") }
+  private val logDir = conf.getOption("spark.history.fs.logDirectory")
+    .map { d => Utils.resolveURI(d).toString }
+    .getOrElse(DEFAULT_LOG_DIR)
 
-  private val fs = Utils.getHadoopFileSystem(resolvedLogDir,
-    SparkHadoopUtil.get.newConfiguration(conf))
+  private val fs = Utils.getHadoopFileSystem(logDir, SparkHadoopUtil.get.newConfiguration(conf))
 
   // A timestamp of when the disk was last accessed to check for log updates
   private var lastLogCheckTimeMs = -1L
@@ -59,6 +66,12 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
   @volatile private var applications: mutable.LinkedHashMap[String, FsApplicationHistoryInfo]
     = new mutable.LinkedHashMap()
 
+  // Constants used to parse Spark 1.0.0 log directories.
+  private[history] val LOG_PREFIX = "EVENT_LOG_"
+  private[history] val SPARK_VERSION_PREFIX = "SPARK_VERSION_"
+  private[history] val COMPRESSION_CODEC_PREFIX = "COMPRESSION_CODEC_"
+  private[history] val APPLICATION_COMPLETE = "APPLICATION_COMPLETE"
+
   /**
    * A background thread that periodically checks for event log updates on disk.
    *
@@ -85,21 +98,28 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
 
   initialize()
 
-  private def initialize() {
+  private def initialize(): Unit = {
     // Validate the log directory.
-    val path = new Path(resolvedLogDir)
+    val path = new Path(logDir)
     if (!fs.exists(path)) {
-      throw new IllegalArgumentException(
-        "Logging directory specified does not exist: %s".format(resolvedLogDir))
+      var msg = s"Log directory specified does not exist: $logDir."
+      if (logDir == DEFAULT_LOG_DIR) {
+        msg += " Did you configure the correct one through spark.fs.history.logDirectory?"
+      }
+      throw new IllegalArgumentException(msg)
     }
     if (!fs.getFileStatus(path).isDir) {
       throw new IllegalArgumentException(
-        "Logging directory specified is not a directory: %s".format(resolvedLogDir))
+        "Logging directory specified is not a directory: %s".format(logDir))
     }
 
     checkForLogs()
-    logCheckingThread.setDaemon(true)
-    logCheckingThread.start()
+
+    // Disable the background thread during tests.
+    if (!conf.contains("spark.testing")) {
+      logCheckingThread.setDaemon(true)
+      logCheckingThread.start()
+    }
   }
 
   override def getListing() = applications.values
@@ -107,25 +127,26 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
   override def getAppUI(appId: String): Option[SparkUI] = {
     try {
       applications.get(appId).map { info =>
-        val (replayBus, appListener) = createReplayBus(fs.getFileStatus(
-          new Path(logDir, info.logDir)))
+        val replayBus = new ReplayListenerBus()
         val ui = {
           val conf = this.conf.clone()
           val appSecManager = new SecurityManager(conf)
-          new SparkUI(conf, appSecManager, replayBus, appId,
+          SparkUI.createHistoryUI(conf, replayBus, appSecManager, appId,
             s"${HistoryServer.UI_PATH_PREFIX}/$appId")
           // Do not call ui.bind() to avoid creating a new server for each application
         }
 
-        replayBus.replay()
+        val appListener = new ApplicationEventListener()
+        replayBus.addListener(appListener)
+        val appInfo = replay(fs.getFileStatus(new Path(logDir, info.logPath)), replayBus)
 
-        ui.setAppName(s"${appListener.appName.getOrElse(NOT_STARTED)} ($appId)")
+        ui.setAppName(s"${appInfo.name} ($appId)")
 
         val uiAclsEnabled = conf.getBoolean("spark.history.ui.acls.enable", false)
         ui.getSecurityManager.setAcls(uiAclsEnabled)
         // make sure to set admin acls before view acls so they are properly picked up
         ui.getSecurityManager.setAdminAcls(appListener.adminAcls.getOrElse(""))
-        ui.getSecurityManager.setViewAcls(appListener.sparkUser.getOrElse(NOT_STARTED),
+        ui.getSecurityManager.setViewAcls(appInfo.sparkUser,
           appListener.viewAcls.getOrElse(""))
         ui
       }
@@ -134,53 +155,45 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
     }
   }
 
-  override def getConfig(): Map[String, String] =
-    Map("Event Log Location" -> resolvedLogDir.toString)
+  override def getConfig(): Map[String, String] = Map("Event log directory" -> logDir.toString)
 
   /**
    * Builds the application list based on the current contents of the log directory.
    * Tries to reuse as much of the data already in memory as possible, by not reading
    * applications that haven't been updated since last time the logs were checked.
    */
-  private def checkForLogs() = {
+  private[history] def checkForLogs(): Unit = {
     lastLogCheckTimeMs = getMonotonicTimeMs()
     logDebug("Checking for logs. Time is now %d.".format(lastLogCheckTimeMs))
-    try {
-      val logStatus = fs.listStatus(new Path(resolvedLogDir))
-      val logDirs = if (logStatus != null) logStatus.filter(_.isDir).toSeq else Seq[FileStatus]()
 
-      // Load all new logs from the log directory. Only directories that have a modification time
-      // later than the last known log directory will be loaded.
+    try {
       var newLastModifiedTime = lastModifiedTime
-      val logInfos = logDirs
-        .filter { dir =>
-          if (fs.isFile(new Path(dir.getPath(), EventLoggingListener.APPLICATION_COMPLETE))) {
-            val modTime = getModificationTime(dir)
+      val statusList = Option(fs.listStatus(new Path(logDir))).map(_.toSeq)
+        .getOrElse(Seq[FileStatus]())
+      val logInfos = statusList
+        .filter { entry =>
+          try {
+            val modTime = getModificationTime(entry)
             newLastModifiedTime = math.max(newLastModifiedTime, modTime)
-            modTime > lastModifiedTime
-          } else {
-            false
+            modTime >= lastModifiedTime
+          } catch {
+            case e: AccessControlException =>
+              // Do not use "logInfo" since these messages can get pretty noisy if printed on
+              // every poll.
+              logDebug(s"No permission to read $entry, ignoring.")
+              false
           }
         }
-        .flatMap { dir =>
+        .flatMap { entry =>
           try {
-            val (replayBus, appListener) = createReplayBus(dir)
-            replayBus.replay()
-            Some(new FsApplicationHistoryInfo(
-              dir.getPath().getName(),
-              appListener.appId.getOrElse(dir.getPath().getName()),
-              appListener.appName.getOrElse(NOT_STARTED),
-              appListener.startTime.getOrElse(-1L),
-              appListener.endTime.getOrElse(-1L),
-              getModificationTime(dir),
-              appListener.sparkUser.getOrElse(NOT_STARTED)))
+            Some(replay(entry, new ReplayListenerBus()))
           } catch {
             case e: Exception =>
-              logInfo(s"Failed to load application log data from $dir.", e)
+              logError(s"Failed to load application log data from $entry.", e)
               None
           }
         }
-        .sortBy { info => -info.endTime }
+        .sortBy { info => (-info.endTime, -info.startTime) }
 
       lastModifiedTime = newLastModifiedTime
 
@@ -190,7 +203,9 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
       if (!logInfos.isEmpty) {
         val newApps = new mutable.LinkedHashMap[String, FsApplicationHistoryInfo]()
         def addIfAbsent(info: FsApplicationHistoryInfo) = {
-          if (!newApps.contains(info.id)) {
+          if (!newApps.contains(info.id) ||
+              newApps(info.id).logPath.endsWith(EventLoggingListener.IN_PROGRESS) &&
+              !info.logPath.endsWith(EventLoggingListener.IN_PROGRESS)) {
             newApps += (info.id -> info)
           }
         }
@@ -210,46 +225,126 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
         applications = newApps
       }
     } catch {
-      case t: Throwable => logError("Exception in checking for event log updates", t)
+      case e: Exception => logError("Exception in checking for event log updates", e)
     }
   }
 
-  private def createReplayBus(logDir: FileStatus): (ReplayListenerBus, ApplicationEventListener) = {
-    val path = logDir.getPath()
-    val elogInfo = EventLoggingListener.parseLoggingInfo(path, fs)
-    val replayBus = new ReplayListenerBus(elogInfo.logPaths, fs, elogInfo.compressionCodec)
-    val appListener = new ApplicationEventListener
-    replayBus.addListener(appListener)
-    (replayBus, appListener)
+  /**
+   * Replays the events in the specified log file and returns information about the associated
+   * application.
+   */
+  private def replay(eventLog: FileStatus, bus: ReplayListenerBus): FsApplicationHistoryInfo = {
+    val logPath = eventLog.getPath()
+    val (logInput, sparkVersion) =
+      if (isLegacyLogDirectory(eventLog)) {
+        openLegacyEventLog(logPath)
+      } else {
+        EventLoggingListener.openEventLog(logPath, fs)
+      }
+    try {
+      val appListener = new ApplicationEventListener
+      bus.addListener(appListener)
+      bus.replay(logInput, sparkVersion)
+      new FsApplicationHistoryInfo(
+        logPath.getName(),
+        appListener.appId.getOrElse(logPath.getName()),
+        appListener.appName.getOrElse(NOT_STARTED),
+        appListener.startTime.getOrElse(-1L),
+        appListener.endTime.getOrElse(-1L),
+        getModificationTime(eventLog),
+        appListener.sparkUser.getOrElse(NOT_STARTED),
+        isApplicationCompleted(eventLog))
+    } finally {
+      logInput.close()
+    }
   }
 
-  /** Return when this directory was last modified. */
-  private def getModificationTime(dir: FileStatus): Long = {
-    try {
-      val logFiles = fs.listStatus(dir.getPath)
-      if (logFiles != null && !logFiles.isEmpty) {
-        logFiles.map(_.getModificationTime).max
-      } else {
-        dir.getModificationTime
+  /**
+   * Loads a legacy log directory. This assumes that the log directory contains a single event
+   * log file (along with other metadata files), which is the case for directories generated by
+   * the code in previous releases.
+   *
+   * @return 2-tuple of (input stream of the events, version of Spark which wrote the log)
+   */
+  private[history] def openLegacyEventLog(dir: Path): (InputStream, String) = {
+    val children = fs.listStatus(dir)
+    var eventLogPath: Path = null
+    var codecName: Option[String] = None
+    var sparkVersion: String = null
+
+    children.foreach { child =>
+      child.getPath().getName() match {
+        case name if name.startsWith(LOG_PREFIX) =>
+          eventLogPath = child.getPath()
+
+        case codec if codec.startsWith(COMPRESSION_CODEC_PREFIX) =>
+          codecName = Some(codec.substring(COMPRESSION_CODEC_PREFIX.length()))
+
+        case version if version.startsWith(SPARK_VERSION_PREFIX) =>
+          sparkVersion = version.substring(SPARK_VERSION_PREFIX.length())
+
+        case _ =>
       }
-    } catch {
-      case t: Throwable =>
-        logError("Exception in accessing modification time of %s".format(dir.getPath), t)
-        -1L
+    }
+
+    if (eventLogPath == null || sparkVersion == null) {
+      throw new IllegalArgumentException(s"$dir is not a Spark application log directory.")
+    }
+
+    val codec = try {
+        codecName.map { c => CompressionCodec.createCodec(conf, c) }
+      } catch {
+        case e: Exception =>
+          throw new IllegalArgumentException(s"Unknown compression codec $codecName.")
+      }
+
+    val in = new BufferedInputStream(fs.open(eventLogPath))
+    (codec.map(_.compressedInputStream(in)).getOrElse(in), sparkVersion)
+  }
+
+  /**
+   * Return whether the specified event log path contains a old directory-based event log.
+   * Previously, the event log of an application comprises of multiple files in a directory.
+   * As of Spark 1.3, these files are consolidated into a single one that replaces the directory.
+   * See SPARK-2261 for more detail.
+   */
+  private def isLegacyLogDirectory(entry: FileStatus): Boolean = entry.isDir()
+
+  private def getModificationTime(fsEntry: FileStatus): Long = {
+    if (fsEntry.isDir) {
+      fs.listStatus(fsEntry.getPath).map(_.getModificationTime()).max
+    } else {
+      fsEntry.getModificationTime()
     }
   }
 
   /** Returns the system's mononotically increasing time. */
-  private def getMonotonicTimeMs() = System.nanoTime() / (1000 * 1000)
+  private def getMonotonicTimeMs(): Long = System.nanoTime() / (1000 * 1000)
+
+  /**
+   * Return true when the application has completed.
+   */
+  private def isApplicationCompleted(entry: FileStatus): Boolean = {
+    if (isLegacyLogDirectory(entry)) {
+      fs.exists(new Path(entry.getPath(), APPLICATION_COMPLETE))
+    } else {
+      !entry.getPath().getName().endsWith(EventLoggingListener.IN_PROGRESS)
+    }
+  }
+
+}
 
+private object FsHistoryProvider {
+  val DEFAULT_LOG_DIR = "file:/tmp/spark-events"
 }
 
 private class FsApplicationHistoryInfo(
-    val logDir: String,
+    val logPath: String,
     id: String,
     name: String,
     startTime: Long,
     endTime: Long,
     lastUpdated: Long,
-    sparkUser: String)
-  extends ApplicationHistoryInfo(id, name, startTime, endTime, lastUpdated, sparkUser)
+    sparkUser: String,
+    completed: Boolean = true)
+  extends ApplicationHistoryInfo(id, name, startTime, endTime, lastUpdated, sparkUser, completed)
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
index d25c29113d6da..e4e7bc2216014 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
@@ -26,12 +26,15 @@ import org.apache.spark.ui.{WebUIPage, UIUtils}
 private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") {
 
   private val pageSize = 20
+  private val plusOrMinus = 2
 
   def render(request: HttpServletRequest): Seq[Node] = {
     val requestedPage = Option(request.getParameter("page")).getOrElse("1").toInt
     val requestedFirst = (requestedPage - 1) * pageSize
+    val requestedIncomplete =
+      Option(request.getParameter("showIncomplete")).getOrElse("false").toBoolean
 
-    val allApps = parent.getApplicationList()
+    val allApps = parent.getApplicationList().filter(_.completed != requestedIncomplete)
     val actualFirst = if (requestedFirst < allApps.size) requestedFirst else 0
     val apps = allApps.slice(actualFirst, Math.min(actualFirst + pageSize, allApps.size))
 
@@ -39,6 +42,9 @@ private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") {
     val last = Math.min(actualFirst + pageSize, allApps.size) - 1
     val pageCount = allApps.size / pageSize + (if (allApps.size % pageSize > 0) 1 else 0)
 
+    val secondPageFromLeft = 2
+    val secondPageFromRight = pageCount - 1
+
     val appTable = UIUtils.listingTable(appHeader, appRow, apps)
     val providerConfig = parent.getProviderConfig()
     val content =
@@ -48,19 +54,60 @@ private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") {
             {providerConfig.map { case (k, v) => <li><strong>{k}:</strong> {v}</li> }}
           </ul>
           {
+            // This displays the indices of pages that are within `plusOrMinus` pages of
+            // the current page. Regardless of where the current page is, this also links
+            // to the first and last page. If the current page +/- `plusOrMinus` is greater
+            // than the 2nd page from the first page or less than the 2nd page from the last
+            // page, `...` will be displayed.
             if (allApps.size > 0) {
+              val leftSideIndices =
+                rangeIndices(actualPage - plusOrMinus until actualPage, 1 < _)
+              val rightSideIndices =
+                rangeIndices(actualPage + 1 to actualPage + plusOrMinus, _ < pageCount)
+
               <h4>
                 Showing {actualFirst + 1}-{last + 1} of {allApps.size}
+                {if (requestedIncomplete) "(Incomplete applications)"}
                 <span style="float: right">
-                  {if (actualPage > 1) <a href={"/?page=" + (actualPage - 1)}>&lt;</a>}
-                  {if (actualPage < pageCount) <a href={"/?page=" + (actualPage + 1)}>&gt;</a>}
+                  {
+                    if (actualPage > 1) {
+                      <a href={makePageLink(actualPage - 1, requestedIncomplete)}>&lt; </a>
+                      <a href={makePageLink(1, requestedIncomplete)}>1</a>
+                    }
+                  }
+                  {if (actualPage - plusOrMinus > secondPageFromLeft) " ... "}
+                  {leftSideIndices}
+                  {actualPage}
+                  {rightSideIndices}
+                  {if (actualPage + plusOrMinus < secondPageFromRight) " ... "}
+                  {
+                    if (actualPage < pageCount) {
+                      <a href={makePageLink(pageCount, requestedIncomplete)}>{pageCount}</a>
+                      <a href={makePageLink(actualPage + 1, requestedIncomplete)}> &gt;</a>
+                    }
+                  }
                 </span>
               </h4> ++
               appTable
             } else {
-              <h4>No Completed Applications Found</h4>
+              <h4>No completed applications found!</h4> ++
+              <p>Did you specify the correct logging directory?
+                Please verify your setting of <span style="font-style:italic">
+                spark.history.fs.logDirectory</span> and whether you have the permissions to
+                access it.<br /> It is also possible that your application did not run to
+                completion or did not stop the SparkContext.
+              </p>
             }
           }
+          <a href={makePageLink(actualPage, !requestedIncomplete)}>
+            {
+              if (requestedIncomplete) {
+                "Back to completed applications"
+              } else {
+                "Show incomplete applications"
+              }
+            }
+          </a>
         </div>
       </div>
     UIUtils.basicSparkPage(content, "History Server")
@@ -75,20 +122,32 @@ private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") {
     "Spark User",
     "Last Updated")
 
+  private def rangeIndices(range: Seq[Int], condition: Int => Boolean): Seq[Node] = {
+    range.filter(condition).map(nextPage => <a href={"/?page=" + nextPage}> {nextPage} </a>)
+  }
+
   private def appRow(info: ApplicationHistoryInfo): Seq[Node] = {
     val uiAddress = HistoryServer.UI_PATH_PREFIX + s"/${info.id}"
     val startTime = UIUtils.formatDate(info.startTime)
-    val endTime = UIUtils.formatDate(info.endTime)
-    val duration = UIUtils.formatDuration(info.endTime - info.startTime)
+    val endTime = if (info.endTime > 0) UIUtils.formatDate(info.endTime) else "-"
+    val duration =
+      if (info.endTime > 0) UIUtils.formatDuration(info.endTime - info.startTime) else "-"
     val lastUpdated = UIUtils.formatDate(info.lastUpdated)
     <tr>
       <td><a href={uiAddress}>{info.id}</a></td>
       <td>{info.name}</td>
-      <td>{startTime}</td>
-      <td>{endTime}</td>
-      <td>{duration}</td>
+      <td sorttable_customkey={info.startTime.toString}>{startTime}</td>
+      <td sorttable_customkey={info.endTime.toString}>{endTime}</td>
+      <td sorttable_customkey={(info.endTime - info.startTime).toString}>{duration}</td>
       <td>{info.sparkUser}</td>
-      <td>{lastUpdated}</td>
+      <td sorttable_customkey={info.lastUpdated.toString}>{lastUpdated}</td>
     </tr>
   }
+
+  private def makePageLink(linkPage: Int, showIncomplete: Boolean): String = {
+    "/?" + Array(
+      "page=" + linkPage,
+      "showIncomplete=" + showIncomplete
+    ).mkString("&")
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index ce00c0ffd21e0..fa9bfe5426b6c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -158,11 +158,12 @@ class HistoryServer(
 
 /**
  * The recommended way of starting and stopping a HistoryServer is through the scripts
- * start-history-server.sh and stop-history-server.sh. The path to a base log directory
- * is must be specified, while the requested UI port is optional. For example:
+ * start-history-server.sh and stop-history-server.sh. The path to a base log directory,
+ * as well as any other relevant history server configuration, should be specified via
+ * the $SPARK_HISTORY_OPTS environment variable. For example:
  *
- *   ./sbin/spark-history-server.sh /tmp/spark-events
- *   ./sbin/spark-history-server.sh hdfs://1.2.3.4:9000/spark-events
+ *   export SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=/tmp/spark-events"
+ *   ./sbin/start-history-server.sh
  *
  * This launches the HistoryServer as a Spark daemon.
  */
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
index 5bce32a04d16d..b1270ade9f750 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
@@ -17,14 +17,13 @@
 
 package org.apache.spark.deploy.history
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{Logging, SparkConf}
 import org.apache.spark.util.Utils
 
 /**
  * Command-line parser for the master.
  */
-private[spark] class HistoryServerArguments(conf: SparkConf, args: Array[String]) {
-  private var logDir: String = null
+private[spark] class HistoryServerArguments(conf: SparkConf, args: Array[String]) extends Logging {
   private var propertiesFile: String = null
 
   parse(args.toList)
@@ -32,7 +31,8 @@ private[spark] class HistoryServerArguments(conf: SparkConf, args: Array[String]
   private def parse(args: List[String]): Unit = {
     args match {
       case ("--dir" | "-d") :: value :: tail =>
-        logDir = value
+        logWarning("Setting log directory through the command line is deprecated as of " +
+          "Spark 1.1.0. Please set this through spark.history.fs.logDirectory instead.")
         conf.set("spark.history.fs.logDirectory", value)
         System.setProperty("spark.history.fs.logDirectory", value)
         parse(tail)
@@ -78,9 +78,10 @@ private[spark] class HistoryServerArguments(conf: SparkConf, args: Array[String]
       |                                     (default 50)
       |FsHistoryProvider options:
       |
-      |  spark.history.fs.logDirectory      Directory where app logs are stored (required)
-      |  spark.history.fs.updateInterval    How often to reload log data from storage (in seconds,
-      |                                     default 10)
+      |  spark.history.fs.logDirectory      Directory where app logs are stored
+      |                                     (default: file:/tmp/spark-events)
+      |  spark.history.fs.updateInterval    How often to reload log data from storage
+      |                                     (in seconds, default: 10)
       |""".stripMargin)
     System.exit(exitCode)
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
index c3ca43f8d0734..ede0a9dbefb8d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
@@ -24,7 +24,9 @@ import scala.collection.mutable.ArrayBuffer
 
 import akka.actor.ActorRef
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.deploy.ApplicationDescription
+import org.apache.spark.util.Utils
 
 private[spark] class ApplicationInfo(
     val startTime: Long,
@@ -36,8 +38,8 @@ private[spark] class ApplicationInfo(
   extends Serializable {
 
   @transient var state: ApplicationState.Value = _
-  @transient var executors: mutable.HashMap[Int, ExecutorInfo] = _
-  @transient var removedExecutors: ArrayBuffer[ExecutorInfo] = _
+  @transient var executors: mutable.HashMap[Int, ExecutorDesc] = _
+  @transient var removedExecutors: ArrayBuffer[ExecutorDesc] = _
   @transient var coresGranted: Int = _
   @transient var endTime: Long = _
   @transient var appSource: ApplicationSource = _
@@ -46,19 +48,19 @@ private[spark] class ApplicationInfo(
 
   init()
 
-  private def readObject(in: java.io.ObjectInputStream): Unit = {
+  private def readObject(in: java.io.ObjectInputStream): Unit = Utils.tryOrIOException {
     in.defaultReadObject()
     init()
   }
 
   private def init() {
     state = ApplicationState.WAITING
-    executors = new mutable.HashMap[Int, ExecutorInfo]
+    executors = new mutable.HashMap[Int, ExecutorDesc]
     coresGranted = 0
     endTime = -1L
     appSource = new ApplicationSource(this)
     nextExecutorId = 0
-    removedExecutors = new ArrayBuffer[ExecutorInfo]
+    removedExecutors = new ArrayBuffer[ExecutorDesc]
   }
 
   private def newExecutorId(useID: Option[Int] = None): Int = {
@@ -73,14 +75,14 @@ private[spark] class ApplicationInfo(
     }
   }
 
-  def addExecutor(worker: WorkerInfo, cores: Int, useID: Option[Int] = None): ExecutorInfo = {
-    val exec = new ExecutorInfo(newExecutorId(useID), this, worker, cores, desc.memoryPerSlave)
+  def addExecutor(worker: WorkerInfo, cores: Int, useID: Option[Int] = None): ExecutorDesc = {
+    val exec = new ExecutorDesc(newExecutorId(useID), this, worker, cores, desc.memoryPerSlave)
     executors(exec.id) = exec
     coresGranted += cores
     exec
   }
 
-  def removeExecutor(exec: ExecutorInfo) {
+  def removeExecutor(exec: ExecutorDesc) {
     if (executors.contains(exec.id)) {
       removedExecutors += executors(exec.id)
       executors -= exec.id
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala
index 80b570a44af18..9d3d7938c6ccb 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala
@@ -19,7 +19,9 @@ package org.apache.spark.deploy.master
 
 import java.util.Date
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.deploy.DriverDescription
+import org.apache.spark.util.Utils
 
 private[spark] class DriverInfo(
     val startTime: Long,
@@ -36,7 +38,7 @@ private[spark] class DriverInfo(
 
   init()
 
-  private def readObject(in: java.io.ObjectInputStream): Unit = {
+  private def readObject(in: java.io.ObjectInputStream): Unit = Utils.tryOrIOException {
     in.defaultReadObject()
     init()
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala
new file mode 100644
index 0000000000000..5d620dfcabad5
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.master
+
+import org.apache.spark.deploy.{ExecutorDescription, ExecutorState}
+
+private[spark] class ExecutorDesc(
+    val id: Int,
+    val application: ApplicationInfo,
+    val worker: WorkerInfo,
+    val cores: Int,
+    val memory: Int) {
+
+  var state = ExecutorState.LAUNCHING
+
+  /** Copy all state (non-val) variables from the given on-the-wire ExecutorDescription. */
+  def copyState(execDesc: ExecutorDescription) {
+    state = execDesc.state
+  }
+
+  def fullId: String = application.id + "/" + id
+
+  override def equals(other: Any): Boolean = {
+    other match {
+      case info: ExecutorDesc =>
+        fullId == info.fullId &&
+        worker.id == info.worker.id &&
+        cores == info.cores &&
+        memory == info.memory
+      case _ => false
+    }
+  }
+
+  override def toString: String = fullId
+
+  override def hashCode: Int = toString.hashCode()
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ExecutorInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorInfo.scala
deleted file mode 100644
index d417070c51016..0000000000000
--- a/core/src/main/scala/org/apache/spark/deploy/master/ExecutorInfo.scala
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.master
-
-import org.apache.spark.deploy.{ExecutorDescription, ExecutorState}
-
-private[spark] class ExecutorInfo(
-    val id: Int,
-    val application: ApplicationInfo,
-    val worker: WorkerInfo,
-    val cores: Int,
-    val memory: Int) {
-
-  var state = ExecutorState.LAUNCHING
-
-  /** Copy all state (non-val) variables from the given on-the-wire ExecutorDescription. */
-  def copyState(execDesc: ExecutorDescription) {
-    state = execDesc.state
-  }
-
-  def fullId: String = application.id + "/" + id
-
-  override def equals(other: Any): Boolean = {
-    other match {
-      case info: ExecutorInfo =>
-        fullId == info.fullId &&
-        worker.id == info.worker.id &&
-        cores == info.cores &&
-        memory == info.memory
-      case _ => false
-    }
-  }
-
-  override def toString: String = fullId
-
-  override def hashCode: Int = toString.hashCode()
-}
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
index 08a99bbe68578..36a2e2c6a6349 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
@@ -19,10 +19,13 @@ package org.apache.spark.deploy.master
 
 import java.io._
 
+import scala.reflect.ClassTag
+
 import akka.serialization.Serialization
 
 import org.apache.spark.Logging
 
+
 /**
  * Stores data in a single on-disk directory with one file per application and worker.
  * Files are deleted when applications and workers are removed.
@@ -37,51 +40,24 @@ private[spark] class FileSystemPersistenceEngine(
 
   new File(dir).mkdir()
 
-  override def addApplication(app: ApplicationInfo) {
-    val appFile = new File(dir + File.separator + "app_" + app.id)
-    serializeIntoFile(appFile, app)
-  }
-
-  override def removeApplication(app: ApplicationInfo) {
-    new File(dir + File.separator + "app_" + app.id).delete()
-  }
-
-  override def addDriver(driver: DriverInfo) {
-    val driverFile = new File(dir + File.separator + "driver_" + driver.id)
-    serializeIntoFile(driverFile, driver)
-  }
-
-  override def removeDriver(driver: DriverInfo) {
-    new File(dir + File.separator + "driver_" + driver.id).delete()
-  }
-
-  override def addWorker(worker: WorkerInfo) {
-    val workerFile = new File(dir + File.separator + "worker_" + worker.id)
-    serializeIntoFile(workerFile, worker)
+  override def persist(name: String, obj: Object): Unit = {
+    serializeIntoFile(new File(dir + File.separator + name), obj)
   }
 
-  override def removeWorker(worker: WorkerInfo) {
-    new File(dir + File.separator + "worker_" + worker.id).delete()
+  override def unpersist(name: String): Unit = {
+    new File(dir + File.separator + name).delete()
   }
 
-  override def readPersistedData(): (Seq[ApplicationInfo], Seq[DriverInfo], Seq[WorkerInfo]) = {
-    val sortedFiles = new File(dir).listFiles().sortBy(_.getName)
-    val appFiles = sortedFiles.filter(_.getName.startsWith("app_"))
-    val apps = appFiles.map(deserializeFromFile[ApplicationInfo])
-    val driverFiles = sortedFiles.filter(_.getName.startsWith("driver_"))
-    val drivers = driverFiles.map(deserializeFromFile[DriverInfo])
-    val workerFiles = sortedFiles.filter(_.getName.startsWith("worker_"))
-    val workers = workerFiles.map(deserializeFromFile[WorkerInfo])
-    (apps, drivers, workers)
+  override def read[T: ClassTag](prefix: String) = {
+    val files = new File(dir).listFiles().filter(_.getName.startsWith(prefix))
+    files.map(deserializeFromFile[T])
   }
 
   private def serializeIntoFile(file: File, value: AnyRef) {
     val created = file.createNewFile()
     if (!created) { throw new IllegalStateException("Could not create file: " + file) }
-
     val serializer = serialization.findSerializerFor(value)
     val serialized = serializer.toBinary(value)
-
     val out = new FileOutputStream(file)
     try {
       out.write(serialized)
@@ -90,7 +66,7 @@ private[spark] class FileSystemPersistenceEngine(
     }
   }
 
-  def deserializeFromFile[T](file: File)(implicit m: Manifest[T]): T = {
+  private def deserializeFromFile[T](file: File)(implicit m: ClassTag[T]): T = {
     val fileData = new Array[Byte](file.length().asInstanceOf[Int])
     val dis = new DataInputStream(new FileInputStream(file))
     try {
@@ -98,9 +74,9 @@ private[spark] class FileSystemPersistenceEngine(
     } finally {
       dis.close()
     }
-
     val clazz = m.runtimeClass.asInstanceOf[Class[T]]
     val serializer = serialization.serializerFor(clazz)
     serializer.fromBinary(fileData).asInstanceOf[T]
   }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/LeaderElectionAgent.scala b/core/src/main/scala/org/apache/spark/deploy/master/LeaderElectionAgent.scala
index 4433a2ec29be6..cf77c86d760cf 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/LeaderElectionAgent.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/LeaderElectionAgent.scala
@@ -17,30 +17,27 @@
 
 package org.apache.spark.deploy.master
 
-import akka.actor.{Actor, ActorRef}
-
-import org.apache.spark.deploy.master.MasterMessages.ElectedLeader
+import org.apache.spark.annotation.DeveloperApi
 
 /**
- * A LeaderElectionAgent keeps track of whether the current Master is the leader, meaning it
- * is the only Master serving requests.
- * In addition to the API provided, the LeaderElectionAgent will use of the following messages
- * to inform the Master of leader changes:
- * [[org.apache.spark.deploy.master.MasterMessages.ElectedLeader ElectedLeader]]
- * [[org.apache.spark.deploy.master.MasterMessages.RevokedLeadership RevokedLeadership]]
+ * :: DeveloperApi ::
+ *
+ * A LeaderElectionAgent tracks current master and is a common interface for all election Agents.
  */
-private[spark] trait LeaderElectionAgent extends Actor {
-  // TODO: LeaderElectionAgent does not necessary to be an Actor anymore, need refactoring.
-  val masterActor: ActorRef
+@DeveloperApi
+trait LeaderElectionAgent {
+  val masterActor: LeaderElectable
+  def stop() {} // to avoid noops in implementations.
 }
 
-/** Single-node implementation of LeaderElectionAgent -- we're initially and always the leader. */
-private[spark] class MonarchyLeaderAgent(val masterActor: ActorRef) extends LeaderElectionAgent {
-  override def preStart() {
-    masterActor ! ElectedLeader
-  }
+@DeveloperApi
+trait LeaderElectable {
+  def electedLeader()
+  def revokedLeadership()
+}
 
-  override def receive = {
-    case _ =>
-  }
+/** Single-node implementation of LeaderElectionAgent -- we're initially and always the leader. */
+private[spark] class MonarchyLeaderAgent(val masterActor: LeaderElectable)
+  extends LeaderElectionAgent {
+  masterActor.electedLeader()
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 3b6bb9fe128a4..5eeb9fe526248 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.deploy.master
 
+import java.io.FileNotFoundException
 import java.net.URLEncoder
 import java.text.SimpleDateFormat
 import java.util.Date
@@ -30,7 +31,9 @@ import scala.util.Random
 import akka.actor._
 import akka.pattern.ask
 import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}
+import akka.serialization.Serialization
 import akka.serialization.SerializationExtension
+import org.apache.hadoop.fs.Path
 
 import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.{ApplicationDescription, DriverDescription,
@@ -50,18 +53,18 @@ private[spark] class Master(
     port: Int,
     webUiPort: Int,
     val securityMgr: SecurityManager)
-  extends Actor with ActorLogReceive with Logging {
+  extends Actor with ActorLogReceive with Logging with LeaderElectable {
 
   import context.dispatcher   // to use Akka's scheduler.schedule()
 
   val conf = new SparkConf
+  val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
 
   def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss")  // For application IDs
   val WORKER_TIMEOUT = conf.getLong("spark.worker.timeout", 60) * 1000
   val RETAINED_APPLICATIONS = conf.getInt("spark.deploy.retainedApplications", 200)
   val RETAINED_DRIVERS = conf.getInt("spark.deploy.retainedDrivers", 200)
   val REAPER_ITERATIONS = conf.getInt("spark.dead.worker.persistence", 15)
-  val RECOVERY_DIR = conf.get("spark.deploy.recoveryDirectory", "")
   val RECOVERY_MODE = conf.get("spark.deploy.recoveryMode", "NONE")
 
   val workers = new HashSet[WorkerInfo]
@@ -103,7 +106,7 @@ private[spark] class Master(
 
   var persistenceEngine: PersistenceEngine = _
 
-  var leaderElectionAgent: ActorRef = _
+  var leaderElectionAgent: LeaderElectionAgent = _
 
   private var recoveryCompletionTask: Cancellable = _
 
@@ -120,6 +123,7 @@ private[spark] class Master(
 
   override def preStart() {
     logInfo("Starting Spark master at " + masterUrl)
+    logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}")
     // Listen for remote client disconnection events, since they don't go through Akka's watch()
     context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
     webUi.bind()
@@ -129,24 +133,32 @@ private[spark] class Master(
     masterMetricsSystem.registerSource(masterSource)
     masterMetricsSystem.start()
     applicationMetricsSystem.start()
+    // Attach the master and app metrics servlet handler to the web ui after the metrics systems are
+    // started.
+    masterMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)
+    applicationMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)
 
-    persistenceEngine = RECOVERY_MODE match {
+    val (persistenceEngine_, leaderElectionAgent_) = RECOVERY_MODE match {
       case "ZOOKEEPER" =>
         logInfo("Persisting recovery state to ZooKeeper")
-        new ZooKeeperPersistenceEngine(SerializationExtension(context.system), conf)
+        val zkFactory =
+          new ZooKeeperRecoveryModeFactory(conf, SerializationExtension(context.system))
+        (zkFactory.createPersistenceEngine(), zkFactory.createLeaderElectionAgent(this))
       case "FILESYSTEM" =>
-        logInfo("Persisting recovery state to directory: " + RECOVERY_DIR)
-        new FileSystemPersistenceEngine(RECOVERY_DIR, SerializationExtension(context.system))
+        val fsFactory =
+          new FileSystemRecoveryModeFactory(conf, SerializationExtension(context.system))
+        (fsFactory.createPersistenceEngine(), fsFactory.createLeaderElectionAgent(this))
+      case "CUSTOM" =>
+        val clazz = Class.forName(conf.get("spark.deploy.recoveryMode.factory"))
+        val factory = clazz.getConstructor(conf.getClass, Serialization.getClass)
+          .newInstance(conf, SerializationExtension(context.system))
+          .asInstanceOf[StandaloneRecoveryModeFactory]
+        (factory.createPersistenceEngine(), factory.createLeaderElectionAgent(this))
       case _ =>
-        new BlackHolePersistenceEngine()
+        (new BlackHolePersistenceEngine(), new MonarchyLeaderAgent(this))
     }
-
-    leaderElectionAgent = RECOVERY_MODE match {
-        case "ZOOKEEPER" =>
-          context.actorOf(Props(classOf[ZooKeeperLeaderElectionAgent], self, masterUrl, conf))
-        case _ =>
-          context.actorOf(Props(classOf[MonarchyLeaderAgent], self))
-      }
+    persistenceEngine = persistenceEngine_
+    leaderElectionAgent = leaderElectionAgent_
   }
 
   override def preRestart(reason: Throwable, message: Option[Any]) {
@@ -165,7 +177,15 @@ private[spark] class Master(
     masterMetricsSystem.stop()
     applicationMetricsSystem.stop()
     persistenceEngine.close()
-    context.stop(leaderElectionAgent)
+    leaderElectionAgent.stop()
+  }
+
+  override def electedLeader() {
+    self ! ElectedLeader
+  }
+
+  override def revokedLeadership() {
+    self ! RevokedLeadership
   }
 
   override def receiveWithLogging = {
@@ -498,7 +518,7 @@ private[spark] class Master(
     val shuffledAliveWorkers = Random.shuffle(workers.toSeq.filter(_.state == WorkerState.ALIVE))
     val numWorkersAlive = shuffledAliveWorkers.size
     var curPos = 0
-    
+
     for (driver <- waitingDrivers.toList) { // iterate over a copy of waitingDrivers
       // We assign workers to each waiting driver in a round-robin fashion. For each driver, we
       // start from the last worker that was assigned a driver, and continue onwards until we have
@@ -561,7 +581,7 @@ private[spark] class Master(
     }
   }
 
-  def launchExecutor(worker: WorkerInfo, exec: ExecutorInfo) {
+  def launchExecutor(worker: WorkerInfo, exec: ExecutorDesc) {
     logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
     worker.addExecutor(exec)
     worker.actor ! LaunchExecutor(masterUrl,
@@ -685,6 +705,11 @@ private[spark] class Master(
       }
       persistenceEngine.removeApplication(app)
       schedule()
+
+      // Tell all workers that the application has finished, so they can clean up any app state.
+      workers.foreach { w =>
+        w.actor ! ApplicationFinished(app.id)
+      }
     }
   }
 
@@ -695,41 +720,51 @@ private[spark] class Master(
   def rebuildSparkUI(app: ApplicationInfo): Boolean = {
     val appName = app.desc.name
     val notFoundBasePath = HistoryServer.UI_PATH_PREFIX + "/not-found"
-    val eventLogDir = app.desc.eventLogDir.getOrElse {
-      // Event logging is not enabled for this application
-      app.desc.appUiUrl = notFoundBasePath
-      return false
-    }
-
-    val appEventLogDir = EventLoggingListener.getLogDirPath(eventLogDir, app.id)
-    val fileSystem = Utils.getHadoopFileSystem(appEventLogDir,
-      SparkHadoopUtil.get.newConfiguration(conf))
-    val eventLogInfo = EventLoggingListener.parseLoggingInfo(appEventLogDir, fileSystem)
-    val eventLogPaths = eventLogInfo.logPaths
-    val compressionCodec = eventLogInfo.compressionCodec
-
-    if (eventLogPaths.isEmpty) {
-      // Event logging is enabled for this application, but no event logs are found
-      val title = s"Application history not found (${app.id})"
-      var msg = s"No event logs found for application $appName in $appEventLogDir."
-      logWarning(msg)
-      msg += " Did you specify the correct logging directory?"
-      msg = URLEncoder.encode(msg, "UTF-8")
-      app.desc.appUiUrl = notFoundBasePath + s"?msg=$msg&title=$title"
-      return false
-    }
-
     try {
-      val replayBus = new ReplayListenerBus(eventLogPaths, fileSystem, compressionCodec)
-      val ui = new SparkUI(new SparkConf, replayBus, appName + " (completed)",
-        HistoryServer.UI_PATH_PREFIX + s"/${app.id}")
-      replayBus.replay()
+      val eventLogFile = app.desc.eventLogDir
+        .map { dir => EventLoggingListener.getLogPath(dir, app.id) }
+        .getOrElse {
+          // Event logging is not enabled for this application
+          app.desc.appUiUrl = notFoundBasePath
+          return false
+        }
+        
+      val fs = Utils.getHadoopFileSystem(eventLogFile, hadoopConf)
+
+      if (fs.exists(new Path(eventLogFile + EventLoggingListener.IN_PROGRESS))) {
+        // Event logging is enabled for this application, but the application is still in progress
+        val title = s"Application history not found (${app.id})"
+        var msg = s"Application $appName is still in progress."
+        logWarning(msg)
+        msg = URLEncoder.encode(msg, "UTF-8")
+        app.desc.appUiUrl = notFoundBasePath + s"?msg=$msg&title=$title"
+        return false
+      }
+
+      val (logInput, sparkVersion) = EventLoggingListener.openEventLog(new Path(eventLogFile), fs)
+      val replayBus = new ReplayListenerBus()
+      val ui = SparkUI.createHistoryUI(new SparkConf, replayBus, new SecurityManager(conf),
+        appName + " (completed)", HistoryServer.UI_PATH_PREFIX + s"/${app.id}")
+      try {
+        replayBus.replay(logInput, sparkVersion)
+      } finally {
+        logInput.close()
+      }
       appIdToUI(app.id) = ui
       webUi.attachSparkUI(ui)
       // Application UI is successfully rebuilt, so link the Master UI to it
-      app.desc.appUiUrl = ui.getBasePath
+      app.desc.appUiUrl = ui.basePath
       true
     } catch {
+      case fnf: FileNotFoundException =>
+        // Event logging is enabled for this application, but no event logs are found
+        val title = s"Application history not found (${app.id})"
+        var msg = s"No event logs found for application $appName in ${app.desc.eventLogDir}."
+        logWarning(msg)
+        msg += " Did you specify the correct logging directory?"
+        msg = URLEncoder.encode(msg, "UTF-8")
+        app.desc.appUiUrl = notFoundBasePath + s"?msg=$msg&title=$title"
+        false
       case e: Exception =>
         // Relay exception message to application UI page
         val title = s"Application history load error (${app.id})"
@@ -811,7 +846,6 @@ private[spark] class Master(
 private[spark] object Master extends Logging {
   val systemName = "sparkMaster"
   private val actorName = "Master"
-  val sparkUrlRegex = "spark://([^:]+):([0-9]+)".r
 
   def main(argStrings: Array[String]) {
     SignalLogger.register(log)
@@ -821,14 +855,24 @@ private[spark] object Master extends Logging {
     actorSystem.awaitTermination()
   }
 
-  /** Returns an `akka.tcp://...` URL for the Master actor given a sparkUrl `spark://host:ip`. */
-  def toAkkaUrl(sparkUrl: String): String = {
-    sparkUrl match {
-      case sparkUrlRegex(host, port) =>
-        "akka.tcp://%s@%s:%s/user/%s".format(systemName, host, port, actorName)
-      case _ =>
-        throw new SparkException("Invalid master URL: " + sparkUrl)
-    }
+  /**
+   * Returns an `akka.tcp://...` URL for the Master actor given a sparkUrl `spark://host:port`.
+   *
+   * @throws SparkException if the url is invalid
+   */
+  def toAkkaUrl(sparkUrl: String, protocol: String): String = {
+    val (host, port) = Utils.extractHostPortFromSparkUrl(sparkUrl)
+    AkkaUtils.address(protocol, systemName, host, port, actorName)
+  }
+
+  /**
+   * Returns an akka `Address` for the Master actor given a sparkUrl `spark://host:port`.
+   *
+   * @throws SparkException if the url is invalid
+   */
+  def toAkkaAddress(sparkUrl: String, protocol: String): Address = {
+    val (host, port) = Utils.extractHostPortFromSparkUrl(sparkUrl)
+    Address(protocol, systemName, host, port)
   }
 
   def startSystemAndActor(
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
index e3640ea4f7e64..2e0e1e7036ac8 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.deploy.master
 
+import org.apache.spark.annotation.DeveloperApi
+
+import scala.reflect.ClassTag
+
 /**
  * Allows Master to persist any state that is necessary in order to recover from a failure.
  * The following semantics are required:
@@ -25,36 +29,70 @@ package org.apache.spark.deploy.master
  * Given these two requirements, we will have all apps and workers persisted, but
  * we might not have yet deleted apps or workers that finished (so their liveness must be verified
  * during recovery).
+ *
+ * The implementation of this trait defines how name-object pairs are stored or retrieved.
  */
-private[spark] trait PersistenceEngine {
-  def addApplication(app: ApplicationInfo)
+@DeveloperApi
+trait PersistenceEngine {
 
-  def removeApplication(app: ApplicationInfo)
+  /**
+   * Defines how the object is serialized and persisted. Implementation will
+   * depend on the store used.
+   */
+  def persist(name: String, obj: Object)
 
-  def addWorker(worker: WorkerInfo)
+  /**
+   * Defines how the object referred by its name is removed from the store.
+   */
+  def unpersist(name: String)
 
-  def removeWorker(worker: WorkerInfo)
+  /**
+   * Gives all objects, matching a prefix. This defines how objects are
+   * read/deserialized back.
+   */
+  def read[T: ClassTag](prefix: String): Seq[T]
 
-  def addDriver(driver: DriverInfo)
+  final def addApplication(app: ApplicationInfo): Unit = {
+    persist("app_" + app.id, app)
+  }
 
-  def removeDriver(driver: DriverInfo)
+  final def removeApplication(app: ApplicationInfo): Unit = {
+    unpersist("app_" + app.id)
+  }
+
+  final def addWorker(worker: WorkerInfo): Unit = {
+    persist("worker_" + worker.id, worker)
+  }
+
+  final def removeWorker(worker: WorkerInfo): Unit = {
+    unpersist("worker_" + worker.id)
+  }
+
+  final def addDriver(driver: DriverInfo): Unit = {
+    persist("driver_" + driver.id, driver)
+  }
+
+  final def removeDriver(driver: DriverInfo): Unit = {
+    unpersist("driver_" + driver.id)
+  }
 
   /**
    * Returns the persisted data sorted by their respective ids (which implies that they're
    * sorted by time of creation).
    */
-  def readPersistedData(): (Seq[ApplicationInfo], Seq[DriverInfo], Seq[WorkerInfo])
+  final def readPersistedData(): (Seq[ApplicationInfo], Seq[DriverInfo], Seq[WorkerInfo]) = {
+    (read[ApplicationInfo]("app_"), read[DriverInfo]("driver_"), read[WorkerInfo]("worker_"))
+  }
 
   def close() {}
 }
 
 private[spark] class BlackHolePersistenceEngine extends PersistenceEngine {
-  override def addApplication(app: ApplicationInfo) {}
-  override def removeApplication(app: ApplicationInfo) {}
-  override def addWorker(worker: WorkerInfo) {}
-  override def removeWorker(worker: WorkerInfo) {}
-  override def addDriver(driver: DriverInfo) {}
-  override def removeDriver(driver: DriverInfo) {}
-
-  override def readPersistedData() = (Nil, Nil, Nil)
+
+  override def persist(name: String, obj: Object): Unit = {}
+
+  override def unpersist(name: String): Unit = {}
+
+  override def read[T: ClassTag](name: String): Seq[T] = Nil
+
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala b/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala
new file mode 100644
index 0000000000000..1096eb0368357
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.master
+
+import akka.serialization.Serialization
+
+import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * ::DeveloperApi::
+ *
+ * Implementation of this class can be plugged in as recovery mode alternative for Spark's
+ * Standalone mode.
+ *
+ */
+@DeveloperApi
+abstract class StandaloneRecoveryModeFactory(conf: SparkConf, serializer: Serialization) {
+
+  /**
+   * PersistenceEngine defines how the persistent data(Information about worker, driver etc..)
+   * is handled for recovery.
+   *
+   */
+  def createPersistenceEngine(): PersistenceEngine
+
+  /**
+   * Create an instance of LeaderAgent that decides who gets elected as master.
+   */
+  def createLeaderElectionAgent(master: LeaderElectable): LeaderElectionAgent
+}
+
+/**
+ * LeaderAgent in this case is a no-op. Since leader is forever leader as the actual
+ * recovery is made by restoring from filesystem.
+ */
+private[spark] class FileSystemRecoveryModeFactory(conf: SparkConf, serializer: Serialization)
+  extends StandaloneRecoveryModeFactory(conf, serializer) with Logging {
+  val RECOVERY_DIR = conf.get("spark.deploy.recoveryDirectory", "")
+
+  def createPersistenceEngine() = {
+    logInfo("Persisting recovery state to directory: " + RECOVERY_DIR)
+    new FileSystemPersistenceEngine(RECOVERY_DIR, serializer)
+  }
+
+  def createLeaderElectionAgent(master: LeaderElectable) = new MonarchyLeaderAgent(master)
+}
+
+private[spark] class ZooKeeperRecoveryModeFactory(conf: SparkConf, serializer: Serialization)
+  extends StandaloneRecoveryModeFactory(conf, serializer) {
+  def createPersistenceEngine() = new ZooKeeperPersistenceEngine(conf, serializer)
+
+  def createLeaderElectionAgent(master: LeaderElectable) =
+    new ZooKeeperLeaderElectionAgent(master, conf)
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
index c5fa9cf7d7c2d..e94aae93e4495 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
@@ -21,6 +21,7 @@ import scala.collection.mutable
 
 import akka.actor.ActorRef
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.util.Utils
 
 private[spark] class WorkerInfo(
@@ -37,7 +38,7 @@ private[spark] class WorkerInfo(
   Utils.checkHost(host, "Expected hostname")
   assert (port > 0)
 
-  @transient var executors: mutable.HashMap[String, ExecutorInfo] = _ // executorId => info
+  @transient var executors: mutable.HashMap[String, ExecutorDesc] = _ // executorId => info
   @transient var drivers: mutable.HashMap[String, DriverInfo] = _ // driverId => info
   @transient var state: WorkerState.Value = _
   @transient var coresUsed: Int = _
@@ -50,7 +51,7 @@ private[spark] class WorkerInfo(
   def coresFree: Int = cores - coresUsed
   def memoryFree: Int = memory - memoryUsed
 
-  private def readObject(in: java.io.ObjectInputStream) : Unit = {
+  private def readObject(in: java.io.ObjectInputStream): Unit = Utils.tryOrIOException {
     in.defaultReadObject()
     init()
   }
@@ -69,13 +70,13 @@ private[spark] class WorkerInfo(
     host + ":" + port
   }
 
-  def addExecutor(exec: ExecutorInfo) {
+  def addExecutor(exec: ExecutorDesc) {
     executors(exec.fullId) = exec
     coresUsed += exec.cores
     memoryUsed += exec.memory
   }
 
-  def removeExecutor(exec: ExecutorInfo) {
+  def removeExecutor(exec: ExecutorDesc) {
     if (executors.contains(exec.fullId)) {
       executors -= exec.fullId
       coresUsed -= exec.cores
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
index 285f9b014e291..8eaa0ad948519 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
@@ -24,9 +24,8 @@ import org.apache.spark.deploy.master.MasterMessages._
 import org.apache.curator.framework.CuratorFramework
 import org.apache.curator.framework.recipes.leader.{LeaderLatchListener, LeaderLatch}
 
-private[spark] class ZooKeeperLeaderElectionAgent(val masterActor: ActorRef,
-    masterUrl: String, conf: SparkConf)
-  extends LeaderElectionAgent with LeaderLatchListener with Logging  {
+private[spark] class ZooKeeperLeaderElectionAgent(val masterActor: LeaderElectable,
+    conf: SparkConf) extends LeaderLatchListener with LeaderElectionAgent with Logging  {
 
   val WORKING_DIR = conf.get("spark.deploy.zookeeper.dir", "/spark") + "/leader_election"
 
@@ -34,30 +33,21 @@ private[spark] class ZooKeeperLeaderElectionAgent(val masterActor: ActorRef,
   private var leaderLatch: LeaderLatch = _
   private var status = LeadershipStatus.NOT_LEADER
 
-  override def preStart() {
+  start()
 
+  def start() {
     logInfo("Starting ZooKeeper LeaderElection agent")
     zk = SparkCuratorUtil.newClient(conf)
     leaderLatch = new LeaderLatch(zk, WORKING_DIR)
     leaderLatch.addListener(this)
-
     leaderLatch.start()
   }
 
-  override def preRestart(reason: scala.Throwable, message: scala.Option[scala.Any]) {
-    logError("LeaderElectionAgent failed...", reason)
-    super.preRestart(reason, message)
-  }
-
-  override def postStop() {
+  override def stop() {
     leaderLatch.close()
     zk.close()
   }
 
-  override def receive = {
-    case _ =>
-  }
-
   override def isLeader() {
     synchronized {
       // could have lost leadership by now.
@@ -85,10 +75,10 @@ private[spark] class ZooKeeperLeaderElectionAgent(val masterActor: ActorRef,
   def updateLeadershipStatus(isLeader: Boolean) {
     if (isLeader && status == LeadershipStatus.NOT_LEADER) {
       status = LeadershipStatus.LEADER
-      masterActor ! ElectedLeader
+      masterActor.electedLeader()
     } else if (!isLeader && status == LeadershipStatus.LEADER) {
       status = LeadershipStatus.NOT_LEADER
-      masterActor ! RevokedLeadership
+      masterActor.revokedLeadership()
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
index 834dfedee52ce..e11ac031fb9c6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
@@ -17,15 +17,18 @@
 
 package org.apache.spark.deploy.master
 
+import akka.serialization.Serialization
+
 import scala.collection.JavaConversions._
+import scala.reflect.ClassTag
 
-import akka.serialization.Serialization
 import org.apache.curator.framework.CuratorFramework
 import org.apache.zookeeper.CreateMode
 
 import org.apache.spark.{Logging, SparkConf}
 
-class ZooKeeperPersistenceEngine(serialization: Serialization, conf: SparkConf)
+
+private[spark] class ZooKeeperPersistenceEngine(conf: SparkConf, val serialization: Serialization)
   extends PersistenceEngine
   with Logging
 {
@@ -34,52 +37,31 @@ class ZooKeeperPersistenceEngine(serialization: Serialization, conf: SparkConf)
 
   SparkCuratorUtil.mkdir(zk, WORKING_DIR)
 
-  override def addApplication(app: ApplicationInfo) {
-    serializeIntoFile(WORKING_DIR + "/app_" + app.id, app)
-  }
 
-  override def removeApplication(app: ApplicationInfo) {
-    zk.delete().forPath(WORKING_DIR + "/app_" + app.id)
+  override def persist(name: String, obj: Object): Unit = {
+    serializeIntoFile(WORKING_DIR + "/" + name, obj)
   }
 
-  override def addDriver(driver: DriverInfo) {
-    serializeIntoFile(WORKING_DIR + "/driver_" + driver.id, driver)
+  override def unpersist(name: String): Unit = {
+    zk.delete().forPath(WORKING_DIR + "/" + name)
   }
 
-  override def removeDriver(driver: DriverInfo) {
-    zk.delete().forPath(WORKING_DIR + "/driver_" + driver.id)
-  }
-
-  override def addWorker(worker: WorkerInfo) {
-    serializeIntoFile(WORKING_DIR + "/worker_" + worker.id, worker)
-  }
-
-  override def removeWorker(worker: WorkerInfo) {
-    zk.delete().forPath(WORKING_DIR + "/worker_" + worker.id)
+  override def read[T: ClassTag](prefix: String) = {
+    val file = zk.getChildren.forPath(WORKING_DIR).filter(_.startsWith(prefix))
+    file.map(deserializeFromFile[T]).flatten
   }
 
   override def close() {
     zk.close()
   }
 
-  override def readPersistedData(): (Seq[ApplicationInfo], Seq[DriverInfo], Seq[WorkerInfo]) = {
-    val sortedFiles = zk.getChildren().forPath(WORKING_DIR).toList.sorted
-    val appFiles = sortedFiles.filter(_.startsWith("app_"))
-    val apps = appFiles.map(deserializeFromFile[ApplicationInfo]).flatten
-    val driverFiles = sortedFiles.filter(_.startsWith("driver_"))
-    val drivers = driverFiles.map(deserializeFromFile[DriverInfo]).flatten
-    val workerFiles = sortedFiles.filter(_.startsWith("worker_"))
-    val workers = workerFiles.map(deserializeFromFile[WorkerInfo]).flatten
-    (apps, drivers, workers)
-  }
-
   private def serializeIntoFile(path: String, value: AnyRef) {
     val serializer = serialization.findSerializerFor(value)
     val serialized = serializer.toBinary(value)
     zk.create().withMode(CreateMode.PERSISTENT).forPath(path, serialized)
   }
 
-  def deserializeFromFile[T](filename: String)(implicit m: Manifest[T]): Option[T] = {
+  def deserializeFromFile[T](filename: String)(implicit m: ClassTag[T]): Option[T] = {
     val fileData = zk.getData().forPath(WORKING_DIR + "/" + filename)
     val clazz = m.runtimeClass.asInstanceOf[Class[T]]
     val serializer = serialization.serializerFor(clazz)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index 4588c130ef439..3aae2b95d7396 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -27,7 +27,7 @@ import org.json4s.JValue
 
 import org.apache.spark.deploy.{ExecutorState, JsonProtocol}
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
-import org.apache.spark.deploy.master.ExecutorInfo
+import org.apache.spark.deploy.master.ExecutorDesc
 import org.apache.spark.ui.{UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
 
@@ -109,7 +109,7 @@ private[spark] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app
     UIUtils.basicSparkPage(content, "Application: " + app.desc.name)
   }
 
-  private def executorRow(executor: ExecutorInfo): Seq[Node] = {
+  private def executorRow(executor: ExecutorDesc): Seq[Node] = {
     <tr>
       <td>{executor.id}</td>
       <td>
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
index d86ec1e03e45c..73400c5affb5d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
@@ -41,8 +41,6 @@ class MasterWebUI(val master: Master, requestedPort: Int)
     attachPage(new HistoryNotFoundPage(this))
     attachPage(new MasterPage(this))
     attachHandler(createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR, "/static"))
-    master.masterMetricsSystem.getServletHandlers.foreach(attachHandler)
-    master.applicationMetricsSystem.getServletHandlers.foreach(attachHandler)
   }
 
   /** Attach a reconstructed UI to this Master UI. Only valid after bind(). */
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
index 2e9be2a180c68..28e9662db5da9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
@@ -20,6 +20,8 @@ package org.apache.spark.deploy.worker
 import java.io.{File, FileOutputStream, InputStream, IOException}
 import java.lang.System._
 
+import scala.collection.Map
+
 import org.apache.spark.Logging
 import org.apache.spark.deploy.Command
 import org.apache.spark.util.Utils
@@ -29,7 +31,29 @@ import org.apache.spark.util.Utils
  */
 private[spark]
 object CommandUtils extends Logging {
-  def buildCommandSeq(command: Command, memory: Int, sparkHome: String): Seq[String] = {
+
+  /**
+   * Build a ProcessBuilder based on the given parameters.
+   * The `env` argument is exposed for testing.
+   */
+  def buildProcessBuilder(
+      command: Command,
+      memory: Int,
+      sparkHome: String,
+      substituteArguments: String => String,
+      classPaths: Seq[String] = Seq[String](),
+      env: Map[String, String] = sys.env): ProcessBuilder = {
+    val localCommand = buildLocalCommand(command, substituteArguments, classPaths, env)
+    val commandSeq = buildCommandSeq(localCommand, memory, sparkHome)
+    val builder = new ProcessBuilder(commandSeq: _*)
+    val environment = builder.environment()
+    for ((key, value) <- localCommand.environment) {
+      environment.put(key, value)
+    }
+    builder
+  }
+
+  private def buildCommandSeq(command: Command, memory: Int, sparkHome: String): Seq[String] = {
     val runner = sys.env.get("JAVA_HOME").map(_ + "/bin/java").getOrElse("java")
 
     // SPARK-698: do not call the run.cmd script, as process.destroy()
@@ -38,11 +62,41 @@ object CommandUtils extends Logging {
       command.arguments
   }
 
+  /**
+   * Build a command based on the given one, taking into account the local environment
+   * of where this command is expected to run, substitute any placeholders, and append
+   * any extra class paths.
+   */
+  private def buildLocalCommand(
+      command: Command,
+      substituteArguments: String => String,
+      classPath: Seq[String] = Seq[String](),
+      env: Map[String, String]): Command = {
+    val libraryPathName = Utils.libraryPathEnvName
+    val libraryPathEntries = command.libraryPathEntries
+    val cmdLibraryPath = command.environment.get(libraryPathName)
+
+    val newEnvironment = if (libraryPathEntries.nonEmpty && libraryPathName.nonEmpty) {
+      val libraryPaths = libraryPathEntries ++ cmdLibraryPath ++ env.get(libraryPathName)
+      command.environment + ((libraryPathName, libraryPaths.mkString(File.pathSeparator)))
+    } else {
+      command.environment
+    }
+
+    Command(
+      command.mainClass,
+      command.arguments.map(substituteArguments),
+      newEnvironment,
+      command.classPathEntries ++ classPath,
+      Seq[String](), // library path already captured in environment variable
+      command.javaOpts)
+  }
+
   /**
    * Attention: this must always be aligned with the environment variables in the run scripts and
    * the way the JAVA_OPTS are assembled there.
    */
-  def buildJavaOpts(command: Command, memory: Int, sparkHome: String): Seq[String] = {
+  private def buildJavaOpts(command: Command, memory: Int, sparkHome: String): Seq[String] = {
     val memoryOpts = Seq(s"-Xms${memory}M", s"-Xmx${memory}M")
 
     // Exists for backwards compatibility with older Spark versions
@@ -53,14 +107,6 @@ object CommandUtils extends Logging {
       logWarning("Set SPARK_LOCAL_DIRS for node-specific storage locations.")
     }
 
-    val libraryOpts =
-      if (command.libraryPathEntries.size > 0) {
-        val joined = command.libraryPathEntries.mkString(File.pathSeparator)
-        Seq(s"-Djava.library.path=$joined")
-      } else {
-        Seq()
-      }
-
     // Figure out our classpath with the external compute-classpath script
     val ext = if (System.getProperty("os.name").startsWith("Windows")) ".cmd" else ".sh"
     val classPath = Utils.executeAndGetOutput(
@@ -71,7 +117,7 @@ object CommandUtils extends Logging {
     val javaVersion = System.getProperty("java.version")
     val permGenOpt = if (!javaVersion.startsWith("1.8")) Some("-XX:MaxPermSize=128m") else None
     Seq("-cp", userClassPath.filterNot(_.isEmpty).mkString(File.pathSeparator)) ++
-      permGenOpt ++ libraryOpts ++ workerLocalOpts ++ command.javaOpts ++ memoryOpts
+      permGenOpt ++ workerLocalOpts ++ command.javaOpts ++ memoryOpts
   }
 
   /** Spawn a thread that will redirect a given stream to a file */
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
index 9f9911762505a..28cab36c7b9e2 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConversions._
 import scala.collection.Map
 
 import akka.actor.ActorRef
-import com.google.common.base.Charsets
+import com.google.common.base.Charsets.UTF_8
 import com.google.common.io.Files
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileUtil, Path}
@@ -76,17 +76,9 @@ private[spark] class DriverRunner(
 
           // Make sure user application jar is on the classpath
           // TODO: If we add ability to submit multiple jars they should also be added here
-          val classPath = driverDesc.command.classPathEntries ++ Seq(s"$localJarFilename")
-          val newCommand = Command(
-            driverDesc.command.mainClass,
-            driverDesc.command.arguments.map(substituteVariables),
-            driverDesc.command.environment,
-            classPath,
-            driverDesc.command.libraryPathEntries,
-            driverDesc.command.javaOpts)
-          val command = CommandUtils.buildCommandSeq(newCommand, driverDesc.mem,
-            sparkHome.getAbsolutePath)
-          launchDriver(command, driverDesc.command.environment, driverDir, driverDesc.supervise)
+          val builder = CommandUtils.buildProcessBuilder(driverDesc.command, driverDesc.mem,
+            sparkHome.getAbsolutePath, substituteVariables, Seq(localJarFilename))
+          launchDriver(builder, driverDir, driverDesc.supervise)
         }
         catch {
           case e: Exception => finalException = Some(e)
@@ -165,11 +157,8 @@ private[spark] class DriverRunner(
     localJarFilename
   }
 
-  private def launchDriver(command: Seq[String], envVars: Map[String, String], baseDir: File,
-                           supervise: Boolean) {
-    val builder = new ProcessBuilder(command: _*).directory(baseDir)
-    envVars.map{ case(k,v) => builder.environment().put(k, v) }
-
+  private def launchDriver(builder: ProcessBuilder, baseDir: File, supervise: Boolean) {
+    builder.directory(baseDir)
     def initialize(process: Process) = {
       // Redirect stdout and stderr to files
       val stdout = new File(baseDir, "stdout")
@@ -177,8 +166,8 @@ private[spark] class DriverRunner(
 
       val stderr = new File(baseDir, "stderr")
       val header = "Launch Command: %s\n%s\n\n".format(
-        command.mkString("\"", "\" \"", "\""), "=" * 40)
-      Files.append(header, stderr, Charsets.UTF_8)
+        builder.command.mkString("\"", "\" \"", "\""), "=" * 40)
+      Files.append(header, stderr, UTF_8)
       CommandUtils.redirectStream(process.getErrorStream, stderr)
     }
     runCommandWithRetry(ProcessBuilderLike(builder), initialize, supervise)
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index 71d7385b08eb9..bc9f78b9e5c77 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -19,12 +19,14 @@ package org.apache.spark.deploy.worker
 
 import java.io._
 
+import scala.collection.JavaConversions._
+
 import akka.actor.ActorRef
-import com.google.common.base.Charsets
+import com.google.common.base.Charsets.UTF_8
 import com.google.common.io.Files
 
 import org.apache.spark.{SparkConf, Logging}
-import org.apache.spark.deploy.{ApplicationDescription, Command, ExecutorState}
+import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
 import org.apache.spark.deploy.DeployMessages.ExecutorStateChanged
 import org.apache.spark.util.logging.FileAppender
 
@@ -45,6 +47,7 @@ private[spark] class ExecutorRunner(
     val executorDir: File,
     val workerUrl: String,
     val conf: SparkConf,
+    val appLocalDirs: Seq[String],
     var state: ExecutorState.Value)
   extends Logging {
 
@@ -75,7 +78,7 @@ private[spark] class ExecutorRunner(
   /**
    * Kill executor process, wait for exit and notify worker to update resource status.
    *
-   * @param message the exception message which caused the executor's death 
+   * @param message the exception message which caused the executor's death
    */
   private def killProcess(message: Option[String]) {
     var exitCode: Option[Int] = None
@@ -115,33 +118,22 @@ private[spark] class ExecutorRunner(
     case other => other
   }
 
-  def getCommandSeq = {
-    val command = Command(
-      appDesc.command.mainClass,
-      appDesc.command.arguments.map(substituteVariables),
-      appDesc.command.environment,
-      appDesc.command.classPathEntries,
-      appDesc.command.libraryPathEntries,
-      appDesc.command.javaOpts)
-    CommandUtils.buildCommandSeq(command, memory, sparkHome.getAbsolutePath)
-  }
-
   /**
    * Download and run the executor described in our ApplicationDescription
    */
   def fetchAndRunExecutor() {
     try {
       // Launch the process
-      val command = getCommandSeq
+      val builder = CommandUtils.buildProcessBuilder(appDesc.command, memory,
+        sparkHome.getAbsolutePath, substituteVariables)
+      val command = builder.command()
       logInfo("Launch command: " + command.mkString("\"", "\" \"", "\""))
-      val builder = new ProcessBuilder(command: _*).directory(executorDir)
-      val env = builder.environment()
-      for ((key, value) <- appDesc.command.environment) {
-        env.put(key, value)
-      }
+
+      builder.directory(executorDir)
+      builder.environment.put("SPARK_LOCAL_DIRS", appLocalDirs.mkString(","))
       // In case we are running this from within the Spark Shell, avoid creating a "scala"
       // parent process for the executor command
-      env.put("SPARK_LAUNCH_WITH_SCALA", "0")
+      builder.environment.put("SPARK_LAUNCH_WITH_SCALA", "0")
       process = builder.start()
       val header = "Spark Executor Command: %s\n%s\n\n".format(
         command.mkString("\"", "\" \"", "\""), "=" * 40)
@@ -151,11 +143,9 @@ private[spark] class ExecutorRunner(
       stdoutAppender = FileAppender(process.getInputStream, stdout, conf)
 
       val stderr = new File(executorDir, "stderr")
-      Files.write(header, stderr, Charsets.UTF_8)
+      Files.write(header, stderr, UTF_8)
       stderrAppender = FileAppender(process.getErrorStream, stderr, conf)
 
-      state = ExecutorState.RUNNING
-      worker ! ExecutorStateChanged(appId, execId, state, None, None)
       // Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown)
       // or with nonzero exit code
       val exitCode = process.waitFor()
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/StandaloneWorkerShuffleService.scala b/core/src/main/scala/org/apache/spark/deploy/worker/StandaloneWorkerShuffleService.scala
new file mode 100644
index 0000000000000..b9798963bab0a
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/StandaloneWorkerShuffleService.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.worker
+
+import org.apache.spark.{Logging, SparkConf, SecurityManager}
+import org.apache.spark.network.TransportContext
+import org.apache.spark.network.netty.SparkTransportConf
+import org.apache.spark.network.sasl.SaslRpcHandler
+import org.apache.spark.network.server.TransportServer
+import org.apache.spark.network.shuffle.ExternalShuffleBlockHandler
+
+/**
+ * Provides a server from which Executors can read shuffle files (rather than reading directly from
+ * each other), to provide uninterrupted access to the files in the face of executors being turned
+ * off or killed.
+ *
+ * Optionally requires SASL authentication in order to read. See [[SecurityManager]].
+ */
+private[worker]
+class StandaloneWorkerShuffleService(sparkConf: SparkConf, securityManager: SecurityManager)
+  extends Logging {
+
+  private val enabled = sparkConf.getBoolean("spark.shuffle.service.enabled", false)
+  private val port = sparkConf.getInt("spark.shuffle.service.port", 7337)
+  private val useSasl: Boolean = securityManager.isAuthenticationEnabled()
+
+  private val transportConf = SparkTransportConf.fromSparkConf(sparkConf, numUsableCores = 0)
+  private val blockHandler = new ExternalShuffleBlockHandler(transportConf)
+  private val transportContext: TransportContext = {
+    val handler = if (useSasl) new SaslRpcHandler(blockHandler, securityManager) else blockHandler
+    new TransportContext(transportConf, handler)
+  }
+
+  private var server: TransportServer = _
+
+  /** Starts the external shuffle service if the user has configured us to. */
+  def startIfEnabled() {
+    if (enabled) {
+      require(server == null, "Shuffle server already started")
+      logInfo(s"Starting shuffle service on port $port with useSasl = $useSasl")
+      server = transportContext.createServer(port)
+    }
+  }
+
+  def stop() {
+    if (enabled && server != null) {
+      server.close()
+      server = null
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index c4a8ec2e5e7b0..b20f5c0c82895 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -21,10 +21,9 @@ import java.io.File
 import java.io.IOException
 import java.text.SimpleDateFormat
 import java.util.{UUID, Date}
-import java.util.concurrent.TimeUnit
 
 import scala.collection.JavaConversions._
-import scala.collection.mutable.HashMap
+import scala.collection.mutable.{HashMap, HashSet}
 import scala.concurrent.duration._
 import scala.language.postfixOps
 import scala.util.Random
@@ -32,8 +31,8 @@ import scala.util.Random
 import akka.actor._
 import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}
 
-import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkException}
-import org.apache.spark.deploy.{ExecutorDescription, ExecutorState}
+import org.apache.spark.{Logging, SecurityManager, SparkConf}
+import org.apache.spark.deploy.{Command, ExecutorDescription, ExecutorState}
 import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.deploy.master.{DriverState, Master}
 import org.apache.spark.deploy.worker.ui.WorkerWebUI
@@ -41,7 +40,7 @@ import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.util.{ActorLogReceive, AkkaUtils, SignalLogger, Utils}
 
 /**
-  * @param masterUrls Each url should look like spark://host:port.
+  * @param masterAkkaUrls Each url should be a valid akka url.
   */
 private[spark] class Worker(
     host: String,
@@ -49,7 +48,7 @@ private[spark] class Worker(
     webUiPort: Int,
     cores: Int,
     memory: Int,
-    masterUrls: Array[String],
+    masterAkkaUrls: Array[String],
     actorSystemName: String,
     actorName: String,
     workDirPath: String = null,
@@ -94,7 +93,12 @@ private[spark] class Worker(
   var masterAddress: Address = null
   var activeMasterUrl: String = ""
   var activeMasterWebUiUrl : String = ""
-  val akkaUrl = "akka.tcp://%s@%s:%s/user/%s".format(actorSystemName, host, port, actorName)
+  val akkaUrl = AkkaUtils.address(
+    AkkaUtils.protocol(context.system),
+    actorSystemName,
+    host,
+    port,
+    actorName)
   @volatile var registered = false
   @volatile var connected = false
   val workerId = generateWorkerId()
@@ -110,6 +114,11 @@ private[spark] class Worker(
   val finishedExecutors = new HashMap[String, ExecutorRunner]
   val drivers = new HashMap[String, DriverRunner]
   val finishedDrivers = new HashMap[String, DriverRunner]
+  val appDirectories = new HashMap[String, Seq[String]]
+  val finishedApps = new HashSet[String]
+
+  // The shuffle service is not actually started unless configured.
+  val shuffleService = new StandaloneWorkerShuffleService(conf, securityMgr)
 
   val publicAddress = {
     val envVar = System.getenv("SPARK_PUBLIC_DNS")
@@ -151,52 +160,89 @@ private[spark] class Worker(
     assert(!registered)
     logInfo("Starting Spark worker %s:%d with %d cores, %s RAM".format(
       host, port, cores, Utils.megabytesToString(memory)))
+    logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}")
     logInfo("Spark home: " + sparkHome)
     createWorkDir()
     context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
+    shuffleService.startIfEnabled()
     webUi = new WorkerWebUI(this, workDir, webUiPort)
     webUi.bind()
     registerWithMaster()
 
     metricsSystem.registerSource(workerSource)
     metricsSystem.start()
+    // Attach the worker metrics servlet handler to the web ui after the metrics system is started.
+    metricsSystem.getServletHandlers.foreach(webUi.attachHandler)
   }
 
   def changeMaster(url: String, uiUrl: String) {
+    // activeMasterUrl it's a valid Spark url since we receive it from master.
     activeMasterUrl = url
     activeMasterWebUiUrl = uiUrl
-    master = context.actorSelection(Master.toAkkaUrl(activeMasterUrl))
-    masterAddress = activeMasterUrl match {
-      case Master.sparkUrlRegex(_host, _port) =>
-        Address("akka.tcp", Master.systemName, _host, _port.toInt)
-      case x =>
-        throw new SparkException("Invalid spark URL: " + x)
-    }
+    master = context.actorSelection(
+      Master.toAkkaUrl(activeMasterUrl, AkkaUtils.protocol(context.system)))
+    masterAddress = Master.toAkkaAddress(activeMasterUrl, AkkaUtils.protocol(context.system))
     connected = true
+    // Cancel any outstanding re-registration attempts because we found a new master
+    registrationRetryTimer.foreach(_.cancel())
+    registrationRetryTimer = None
   }
 
   private def tryRegisterAllMasters() {
-    for (masterUrl <- masterUrls) {
-      logInfo("Connecting to master " + masterUrl + "...")
-      val actor = context.actorSelection(Master.toAkkaUrl(masterUrl))
+    for (masterAkkaUrl <- masterAkkaUrls) {
+      logInfo("Connecting to master " + masterAkkaUrl + "...")
+      val actor = context.actorSelection(masterAkkaUrl)
       actor ! RegisterWorker(workerId, host, port, cores, memory, webUi.boundPort, publicAddress)
     }
   }
 
-  private def retryConnectToMaster() {
+  /**
+   * Re-register with the master because a network failure or a master failure has occurred.
+   * If the re-registration attempt threshold is exceeded, the worker exits with error.
+   * Note that for thread-safety this should only be called from the actor.
+   */
+  private def reregisterWithMaster(): Unit = {
     Utils.tryOrExit {
       connectionAttemptCount += 1
-      logInfo(s"Attempting to connect to master (attempt # $connectionAttemptCount")
       if (registered) {
         registrationRetryTimer.foreach(_.cancel())
         registrationRetryTimer = None
       } else if (connectionAttemptCount <= TOTAL_REGISTRATION_RETRIES) {
-        tryRegisterAllMasters()
+        logInfo(s"Retrying connection to master (attempt # $connectionAttemptCount)")
+        /**
+         * Re-register with the active master this worker has been communicating with. If there
+         * is none, then it means this worker is still bootstrapping and hasn't established a
+         * connection with a master yet, in which case we should re-register with all masters.
+         *
+         * It is important to re-register only with the active master during failures. Otherwise,
+         * if the worker unconditionally attempts to re-register with all masters, the following
+         * race condition may arise and cause a "duplicate worker" error detailed in SPARK-4592:
+         *
+         *   (1) Master A fails and Worker attempts to reconnect to all masters
+         *   (2) Master B takes over and notifies Worker
+         *   (3) Worker responds by registering with Master B
+         *   (4) Meanwhile, Worker's previous reconnection attempt reaches Master B,
+         *       causing the same Worker to register with Master B twice
+         *
+         * Instead, if we only register with the known active master, we can assume that the
+         * old master must have died because another master has taken over. Note that this is
+         * still not safe if the old master recovers within this interval, but this is a much
+         * less likely scenario.
+         */
+        if (master != null) {
+          master ! RegisterWorker(
+            workerId, host, port, cores, memory, webUi.boundPort, publicAddress)
+        } else {
+          // We are retrying the initial registration
+          tryRegisterAllMasters()
+        }
+        // We have exceeded the initial registration retry threshold
+        // All retries from now on should use a higher interval
         if (connectionAttemptCount == INITIAL_REGISTRATION_RETRIES) {
           registrationRetryTimer.foreach(_.cancel())
           registrationRetryTimer = Some {
             context.system.scheduler.schedule(PROLONGED_REGISTRATION_RETRY_INTERVAL,
-              PROLONGED_REGISTRATION_RETRY_INTERVAL)(retryConnectToMaster)
+              PROLONGED_REGISTRATION_RETRY_INTERVAL, self, ReregisterWithMaster)
           }
         }
       } else {
@@ -216,7 +262,7 @@ private[spark] class Worker(
         connectionAttemptCount = 0
         registrationRetryTimer = Some {
           context.system.scheduler.schedule(INITIAL_REGISTRATION_RETRY_INTERVAL,
-            INITIAL_REGISTRATION_RETRY_INTERVAL)(retryConnectToMaster)
+            INITIAL_REGISTRATION_RETRY_INTERVAL, self, ReregisterWithMaster)
         }
       case Some(_) =>
         logInfo("Not spawning another attempt to register with the master, since there is an" +
@@ -253,7 +299,7 @@ private[spark] class Worker(
           val isAppStillRunning = executors.values.map(_.appId).contains(appIdFromDir)
           dir.isDirectory && !isAppStillRunning &&
           !Utils.doesDirectoryContainAnyNewFiles(dir, APP_DATA_RETENTION_SECS)
-        }.foreach { dir => 
+        }.foreach { dir =>
           logInfo(s"Removing directory: ${dir.getPath}")
           Utils.deleteRecursively(dir)
         }
@@ -298,8 +344,29 @@ private[spark] class Worker(
             throw new IOException("Failed to create directory " + executorDir)
           }
 
-          val manager = new ExecutorRunner(appId, execId, appDesc, cores_, memory_,
-            self, workerId, host, sparkHome, executorDir, akkaUrl, conf, ExecutorState.LOADING)
+          // Create local dirs for the executor. These are passed to the executor via the
+          // SPARK_LOCAL_DIRS environment variable, and deleted by the Worker when the
+          // application finishes.
+          val appLocalDirs = appDirectories.get(appId).getOrElse {
+            Utils.getOrCreateLocalRootDirs(conf).map { dir =>
+              Utils.createDirectory(dir).getAbsolutePath()
+            }.toSeq
+          }
+          appDirectories(appId) = appLocalDirs
+          val manager = new ExecutorRunner(
+            appId,
+            execId,
+            appDesc.copy(command = Worker.maybeUpdateSSLSettings(appDesc.command, conf)),
+            cores_,
+            memory_,
+            self,
+            workerId,
+            host,
+            sparkHome,
+            executorDir,
+            akkaUrl,
+            conf,
+            appLocalDirs, ExecutorState.LOADING)
           executors(appId + "/" + execId) = manager
           manager.start()
           coresUsed += cores_
@@ -336,6 +403,7 @@ private[spark] class Worker(
               message.map(" message " + _).getOrElse("") +
               exitStatus.map(" exitStatus " + _).getOrElse(""))
         }
+        maybeCleanupApplication(appId)
       }
 
     case KillExecutor(masterUrl, appId, execId) =>
@@ -354,7 +422,14 @@ private[spark] class Worker(
 
     case LaunchDriver(driverId, driverDesc) => {
       logInfo(s"Asked to launch driver $driverId")
-      val driver = new DriverRunner(conf, driverId, workDir, sparkHome, driverDesc, self, akkaUrl)
+      val driver = new DriverRunner(
+        conf,
+        driverId,
+        workDir,
+        sparkHome,
+        driverDesc.copy(command = Worker.maybeUpdateSSLSettings(driverDesc.command, conf)),
+        self,
+        akkaUrl)
       drivers(driverId) = driver
       driver.start()
 
@@ -396,12 +471,18 @@ private[spark] class Worker(
       logInfo(s"$x Disassociated !")
       masterDisconnected()
 
-    case RequestWorkerState => {
+    case RequestWorkerState =>
       sender ! WorkerStateResponse(host, port, workerId, executors.values.toList,
         finishedExecutors.values.toList, drivers.values.toList,
         finishedDrivers.values.toList, activeMasterUrl, cores, memory,
         coresUsed, memoryUsed, activeMasterWebUiUrl)
-    }
+
+    case ReregisterWithMaster =>
+      reregisterWithMaster()
+
+    case ApplicationFinished(id) =>
+      finishedApps += id
+      maybeCleanupApplication(id)
   }
 
   private def masterDisconnected() {
@@ -410,6 +491,19 @@ private[spark] class Worker(
     registerWithMaster()
   }
 
+  private def maybeCleanupApplication(id: String): Unit = {
+    val shouldCleanup = finishedApps.contains(id) && !executors.values.exists(_.appId == id)
+    if (shouldCleanup) {
+      finishedApps -= id
+      appDirectories.remove(id).foreach { dirList =>
+        logInfo(s"Cleaning up local directories for application $id")
+        dirList.foreach { dir =>
+          Utils.deleteRecursively(new File(dir))
+        }
+      }
+    }
+  }
+
   def generateWorkerId(): String = {
     "worker-%s-%s-%d".format(createDateFormat.format(new Date), host, port)
   }
@@ -419,6 +513,7 @@ private[spark] class Worker(
     registrationRetryTimer.foreach(_.cancel())
     executors.values.foreach(_.kill())
     drivers.values.foreach(_.kill())
+    shuffleService.stop()
     webUi.stop()
     metricsSystem.stop()
   }
@@ -441,7 +536,8 @@ private[spark] object Worker extends Logging {
       cores: Int,
       memory: Int,
       masterUrls: Array[String],
-      workDir: String, workerNumber: Option[Int] = None): (ActorSystem, Int) = {
+      workDir: String,
+      workerNumber: Option[Int] = None): (ActorSystem, Int) = {
 
     // The LocalSparkCluster runs multiple local sparkWorkerX actor systems
     val conf = new SparkConf
@@ -450,9 +546,32 @@ private[spark] object Worker extends Logging {
     val securityMgr = new SecurityManager(conf)
     val (actorSystem, boundPort) = AkkaUtils.createActorSystem(systemName, host, port,
       conf = conf, securityManager = securityMgr)
+    val masterAkkaUrls = masterUrls.map(Master.toAkkaUrl(_, AkkaUtils.protocol(actorSystem)))
     actorSystem.actorOf(Props(classOf[Worker], host, boundPort, webUiPort, cores, memory,
-      masterUrls, systemName, actorName,  workDir, conf, securityMgr), name = actorName)
+      masterAkkaUrls, systemName, actorName,  workDir, conf, securityMgr), name = actorName)
     (actorSystem, boundPort)
   }
 
+  private[spark] def isUseLocalNodeSSLConfig(cmd: Command): Boolean = {
+    val pattern = """\-Dspark\.ssl\.useNodeLocalConf\=(.+)""".r
+    val result = cmd.javaOpts.collectFirst {
+      case pattern(_result) => _result.toBoolean
+    }
+    result.getOrElse(false)
+  }
+
+  private[spark] def maybeUpdateSSLSettings(cmd: Command, conf: SparkConf): Command = {
+    val prefix = "spark.ssl."
+    val useNLC = "spark.ssl.useNodeLocalConf"
+    if (isUseLocalNodeSSLConfig(cmd)) {
+      val newJavaOpts = cmd.javaOpts
+          .filter(opt => !opt.startsWith(s"-D$prefix")) ++
+          conf.getAll.collect { case (key, value) if key.startsWith(prefix) => s"-D$key=$value" } :+
+          s"-D$useNLC=true"
+      cmd.copy(javaOpts = newJavaOpts)
+    } else {
+      cmd
+    }
+  }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
index b07942a9ca729..7ac81a2d87efd 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
@@ -50,7 +50,6 @@ class WorkerWebUI(
     attachHandler(createStaticHandler(WorkerWebUI.STATIC_RESOURCE_BASE, "/static"))
     attachHandler(createServletHandler("/log",
       (request: HttpServletRequest) => logPage.renderLog(request), worker.securityMgr))
-    worker.metricsSystem.getServletHandlers.foreach(attachHandler)
   }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index c40a3e16675ad..bc72c8970319c 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -38,7 +38,7 @@ private[spark] class CoarseGrainedExecutorBackend(
     executorId: String,
     hostPort: String,
     cores: Int,
-    sparkProperties: Seq[(String, String)])
+    env: SparkEnv)
   extends Actor with ActorLogReceive with ExecutorBackend with Logging {
 
   Utils.checkHostPort(hostPort, "Expected hostport")
@@ -56,9 +56,8 @@ private[spark] class CoarseGrainedExecutorBackend(
   override def receiveWithLogging = {
     case RegisteredExecutor =>
       logInfo("Successfully registered with driver")
-      // Make this host instead of hostPort ?
-      executor = new Executor(executorId, Utils.parseHostPort(hostPort)._1, sparkProperties,
-        false)
+      val (hostname, _) = Utils.parseHostPort(hostPort)
+      executor = new Executor(executorId, hostname, env, isLocal = false)
 
     case RegisterExecutorFailed(message) =>
       logError("Slave registration failed: " + message)
@@ -69,10 +68,11 @@ private[spark] class CoarseGrainedExecutorBackend(
         logError("Received LaunchTask command but executor was null")
         System.exit(1)
       } else {
-        val ser = SparkEnv.get.closureSerializer.newInstance()
+        val ser = env.closureSerializer.newInstance()
         val taskDesc = ser.deserialize[TaskDescription](data.value)
         logInfo("Got assigned task " + taskDesc.taskId)
-        executor.launchTask(this, taskDesc.taskId, taskDesc.name, taskDesc.serializedTask)
+        executor.launchTask(this, taskId = taskDesc.taskId, attemptNumber = taskDesc.attemptNumber,
+          taskDesc.name, taskDesc.serializedTask)
       }
 
     case KillTask(taskId, _, interruptThread) =>
@@ -84,8 +84,12 @@ private[spark] class CoarseGrainedExecutorBackend(
       }
 
     case x: DisassociatedEvent =>
-      logError(s"Driver $x disassociated! Shutting down.")
-      System.exit(1)
+      if (x.remoteAddress == driver.anchorPath.address) {
+        logError(s"Driver $x disassociated! Shutting down.")
+        System.exit(1)
+      } else {
+        logWarning(s"Received irrelevant DisassociatedEvent $x")
+      }
 
     case StopExecutor =>
       logInfo("Driver commanded a shutdown")
@@ -119,7 +123,11 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
       val executorConf = new SparkConf
       val port = executorConf.getInt("spark.executor.port", 0)
       val (fetcher, _) = AkkaUtils.createActorSystem(
-        "driverPropsFetcher", hostname, port, executorConf, new SecurityManager(executorConf))
+        "driverPropsFetcher",
+        hostname,
+        port,
+        executorConf,
+        new SecurityManager(executorConf))
       val driver = fetcher.actorSelection(driverUrl)
       val timeout = AkkaUtils.askTimeout(executorConf)
       val fut = Patterns.ask(driver, RetrieveSparkProps, timeout)
@@ -127,20 +135,33 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
         Seq[(String, String)](("spark.app.id", appId))
       fetcher.shutdown()
 
-      // Create a new ActorSystem using driver's Spark properties to run the backend.
-      val driverConf = new SparkConf().setAll(props)
-      val (actorSystem, boundPort) = AkkaUtils.createActorSystem(
-        "sparkExecutor", hostname, port, driverConf, new SecurityManager(driverConf))
-      // set it
+      // Create SparkEnv using properties we fetched from the driver.
+      val driverConf = new SparkConf()
+      for ((key, value) <- props) {
+        // this is required for SSL in standalone mode
+        if (SparkConf.isExecutorStartupConf(key)) {
+          driverConf.setIfMissing(key, value)
+        } else {
+          driverConf.set(key, value)
+        }
+      }
+      val env = SparkEnv.createExecutorEnv(
+        driverConf, executorId, hostname, port, cores, isLocal = false)
+
+      // SparkEnv sets spark.driver.port so it shouldn't be 0 anymore.
+      val boundPort = env.conf.getInt("spark.executor.port", 0)
+      assert(boundPort != 0)
+
+      // Start the CoarseGrainedExecutorBackend actor.
       val sparkHostPort = hostname + ":" + boundPort
-      actorSystem.actorOf(
+      env.actorSystem.actorOf(
         Props(classOf[CoarseGrainedExecutorBackend],
-          driverUrl, executorId, sparkHostPort, cores, props),
+          driverUrl, executorId, sparkHostPort, cores, env),
         name = "Executor")
       workerUrl.foreach { url =>
-        actorSystem.actorOf(Props(classOf[WorkerWatcher], url), name = "WorkerWatcher")
+        env.actorSystem.actorOf(Props(classOf[WorkerWatcher], url), name = "WorkerWatcher")
       }
-      actorSystem.awaitTermination()
+      env.actorSystem.awaitTermination()
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 616c7e6a46368..312bb3a1daaa3 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -26,23 +26,29 @@ import scala.collection.JavaConversions._
 import scala.collection.mutable.{ArrayBuffer, HashMap}
 import scala.util.control.NonFatal
 
+import akka.actor.Props
+
 import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.scheduler._
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.storage.{StorageLevel, TaskResultBlockId}
-import org.apache.spark.util.{AkkaUtils, Utils}
+import org.apache.spark.util.{SparkUncaughtExceptionHandler, AkkaUtils, Utils}
 
 /**
  * Spark executor used with Mesos, YARN, and the standalone scheduler.
+ * In coarse-grained mode, an existing actor system is provided.
  */
 private[spark] class Executor(
     executorId: String,
-    slaveHostname: String,
-    properties: Seq[(String, String)],
+    executorHostname: String,
+    env: SparkEnv,
     isLocal: Boolean = false)
   extends Logging
 {
+
+  logInfo(s"Starting executor ID $executorId on host $executorHostname")
+
   // Application dependencies (added through SparkContext) that we've fetched so far on this node.
   // Each map holds the master's timestamp for the version of that file or JAR we got.
   private val currentFiles: HashMap[String, Long] = new HashMap[String, Long]()
@@ -50,43 +56,36 @@ private[spark] class Executor(
 
   private val EMPTY_BYTE_BUFFER = ByteBuffer.wrap(new Array[Byte](0))
 
+  private val conf = env.conf
+
   @volatile private var isStopped = false
 
   // No ip or host:port - just hostname
-  Utils.checkHost(slaveHostname, "Expected executed slave to be a hostname")
+  Utils.checkHost(executorHostname, "Expected executed slave to be a hostname")
   // must not have port specified.
-  assert (0 == Utils.parseHostPort(slaveHostname)._2)
+  assert (0 == Utils.parseHostPort(executorHostname)._2)
 
   // Make sure the local hostname we report matches the cluster scheduler's name for this host
-  Utils.setCustomHostname(slaveHostname)
-
-  // Set spark.* properties from executor arg
-  val conf = new SparkConf(true)
-  conf.setAll(properties)
+  Utils.setCustomHostname(executorHostname)
 
   if (!isLocal) {
     // Setup an uncaught exception handler for non-local mode.
     // Make any thread terminations due to uncaught exceptions kill the entire
     // executor process to avoid surprising stalls.
-    Thread.setDefaultUncaughtExceptionHandler(ExecutorUncaughtExceptionHandler)
+    Thread.setDefaultUncaughtExceptionHandler(SparkUncaughtExceptionHandler)
   }
 
   val executorSource = new ExecutorSource(this, executorId)
 
-  // Initialize Spark environment (using system properties read above)
-  conf.set("spark.executor.id", "executor." + executorId)
-  private val env = {
-    if (!isLocal) {
-      val _env = SparkEnv.create(conf, executorId, slaveHostname, 0,
-        isDriver = false, isLocal = false)
-      SparkEnv.set(_env)
-      _env.metricsSystem.registerSource(executorSource)
-      _env
-    } else {
-      SparkEnv.get
-    }
+  if (!isLocal) {
+    env.metricsSystem.registerSource(executorSource)
+    env.blockManager.initialize(conf.getAppId)
   }
 
+  // Create an actor for receiving RPCs from the driver
+  private val executorActor = env.actorSystem.actorOf(
+    Props(new ExecutorActor(executorId)), "ExecutorActor")
+
   // Create our ClassLoader
   // do this after SparkEnv creation so can access the SecurityManager
   private val urlClassLoader = createClassLoader()
@@ -99,6 +98,9 @@ private[spark] class Executor(
   // to send the result back.
   private val akkaFrameSize = AkkaUtils.maxFrameSizeBytes(conf)
 
+  // Limit of bytes for total size of results (default is 1GB)
+  private val maxResultSize = Utils.getMaxResultSize(conf)
+
   // Start worker thread pool
   val threadPool = Utils.newDaemonCachedThreadPool("Executor task launch worker")
 
@@ -108,8 +110,13 @@ private[spark] class Executor(
   startDriverHeartbeater()
 
   def launchTask(
-      context: ExecutorBackend, taskId: Long, taskName: String, serializedTask: ByteBuffer) {
-    val tr = new TaskRunner(context, taskId, taskName, serializedTask)
+      context: ExecutorBackend,
+      taskId: Long,
+      attemptNumber: Int,
+      taskName: String,
+      serializedTask: ByteBuffer) {
+    val tr = new TaskRunner(context, taskId = taskId, attemptNumber = attemptNumber, taskName,
+      serializedTask)
     runningTasks.put(taskId, tr)
     threadPool.execute(tr)
   }
@@ -123,6 +130,7 @@ private[spark] class Executor(
 
   def stop() {
     env.metricsSystem.report()
+    env.actorSystem.stop(executorActor)
     isStopped = true
     threadPool.shutdown()
     if (!isLocal) {
@@ -130,13 +138,20 @@ private[spark] class Executor(
     }
   }
 
+  private def gcTime = ManagementFactory.getGarbageCollectorMXBeans.map(_.getCollectionTime).sum
+
   class TaskRunner(
-      execBackend: ExecutorBackend, val taskId: Long, taskName: String, serializedTask: ByteBuffer)
+      execBackend: ExecutorBackend,
+      val taskId: Long,
+      val attemptNumber: Int,
+      taskName: String,
+      serializedTask: ByteBuffer)
     extends Runnable {
 
     @volatile private var killed = false
     @volatile var task: Task[Any] = _
     @volatile var attemptedTask: Option[Task[Any]] = None
+    @volatile var startGCTime: Long = _
 
     def kill(interruptThread: Boolean) {
       logInfo(s"Executor is trying to kill $taskName (TID $taskId)")
@@ -147,17 +162,15 @@ private[spark] class Executor(
     }
 
     override def run() {
-      val startTime = System.currentTimeMillis()
+      val deserializeStartTime = System.currentTimeMillis()
       Thread.currentThread.setContextClassLoader(replClassLoader)
-      val ser = SparkEnv.get.closureSerializer.newInstance()
+      val ser = env.closureSerializer.newInstance()
       logInfo(s"Running $taskName (TID $taskId)")
       execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)
       var taskStart: Long = 0
-      def gcTime = ManagementFactory.getGarbageCollectorMXBeans.map(_.getCollectionTime).sum
-      val startGCTime = gcTime
+      startGCTime = gcTime
 
       try {
-        Accumulators.clear()
         val (taskFiles, taskJars, taskBytes) = Task.deserializeWithDependencies(serializedTask)
         updateDependencies(taskFiles, taskJars)
         task = ser.deserialize[Task[Any]](taskBytes, Thread.currentThread.getContextClassLoader)
@@ -178,7 +191,7 @@ private[spark] class Executor(
 
         // Run the actual task and measure its runtime.
         taskStart = System.currentTimeMillis()
-        val value = task.run(taskId.toInt)
+        val value = task.run(taskAttemptId = taskId, attemptNumber = attemptNumber)
         val taskFinish = System.currentTimeMillis()
 
         // If the task has been killed, let's fail it.
@@ -186,16 +199,16 @@ private[spark] class Executor(
           throw new TaskKilledException
         }
 
-        val resultSer = SparkEnv.get.serializer.newInstance()
+        val resultSer = env.serializer.newInstance()
         val beforeSerialization = System.currentTimeMillis()
         val valueBytes = resultSer.serialize(value)
         val afterSerialization = System.currentTimeMillis()
 
         for (m <- task.metrics) {
-          m.executorDeserializeTime = taskStart - startTime
-          m.executorRunTime = taskFinish - taskStart
-          m.jvmGCTime = gcTime - startGCTime
-          m.resultSerializationTime = afterSerialization - beforeSerialization
+          m.setExecutorDeserializeTime(taskStart - deserializeStartTime)
+          m.setExecutorRunTime(taskFinish - taskStart)
+          m.setJvmGCTime(gcTime - startGCTime)
+          m.setResultSerializationTime(afterSerialization - beforeSerialization)
         }
 
         val accumUpdates = Accumulators.values
@@ -205,25 +218,27 @@ private[spark] class Executor(
         val resultSize = serializedDirectResult.limit
 
         // directSend = sending directly back to the driver
-        val (serializedResult, directSend) = {
-          if (resultSize >= akkaFrameSize - AkkaUtils.reservedSizeBytes) {
+        val serializedResult = {
+          if (maxResultSize > 0 && resultSize > maxResultSize) {
+            logWarning(s"Finished $taskName (TID $taskId). Result is larger than maxResultSize " +
+              s"(${Utils.bytesToString(resultSize)} > ${Utils.bytesToString(maxResultSize)}), " +
+              s"dropping it.")
+            ser.serialize(new IndirectTaskResult[Any](TaskResultBlockId(taskId), resultSize))
+          } else if (resultSize >= akkaFrameSize - AkkaUtils.reservedSizeBytes) {
             val blockId = TaskResultBlockId(taskId)
             env.blockManager.putBytes(
               blockId, serializedDirectResult, StorageLevel.MEMORY_AND_DISK_SER)
-            (ser.serialize(new IndirectTaskResult[Any](blockId)), false)
+            logInfo(
+              s"Finished $taskName (TID $taskId). $resultSize bytes result sent via BlockManager)")
+            ser.serialize(new IndirectTaskResult[Any](blockId, resultSize))
           } else {
-            (serializedDirectResult, true)
+            logInfo(s"Finished $taskName (TID $taskId). $resultSize bytes result sent to driver")
+            serializedDirectResult
           }
         }
 
         execBackend.statusUpdate(taskId, TaskState.FINISHED, serializedResult)
 
-        if (directSend) {
-          logInfo(s"Finished $taskName (TID $taskId). $resultSize bytes result sent to driver")
-        } else {
-          logInfo(
-            s"Finished $taskName (TID $taskId). $resultSize bytes result sent via BlockManager)")
-        }
       } catch {
         case ffe: FetchFailedException => {
           val reason = ffe.toTaskEndReason
@@ -244,16 +259,16 @@ private[spark] class Executor(
           val serviceTime = System.currentTimeMillis() - taskStart
           val metrics = attemptedTask.flatMap(t => t.metrics)
           for (m <- metrics) {
-            m.executorRunTime = serviceTime
-            m.jvmGCTime = gcTime - startGCTime
+            m.setExecutorRunTime(serviceTime)
+            m.setJvmGCTime(gcTime - startGCTime)
           }
-          val reason = ExceptionFailure(t.getClass.getName, t.getMessage, t.getStackTrace, metrics)
+          val reason = new ExceptionFailure(t, metrics)
           execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))
 
           // Don't forcibly exit unless the exception was inherently fatal, to avoid
           // stopping other tasks unnecessarily.
           if (Utils.isFatalError(t)) {
-            ExecutorUncaughtExceptionHandler.uncaughtException(t)
+            SparkUncaughtExceptionHandler.uncaughtException(t)
           }
         }
       } finally {
@@ -261,6 +276,8 @@ private[spark] class Executor(
         env.shuffleMemoryManager.releaseMemoryForThisThread()
         // Release memory used by this thread for unrolling blocks
         env.blockManager.memoryStore.releaseUnrollMemoryForThisThread()
+        // Release memory used by this thread for accumulators
+        Accumulators.clear()
         runningTasks.remove(taskId)
       }
     }
@@ -317,19 +334,21 @@ private[spark] class Executor(
    * SparkContext. Also adds any new JARs we fetched to the class loader.
    */
   private def updateDependencies(newFiles: HashMap[String, Long], newJars: HashMap[String, Long]) {
-    val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
+    lazy val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
     synchronized {
       // Fetch missing dependencies
       for ((name, timestamp) <- newFiles if currentFiles.getOrElse(name, -1L) < timestamp) {
         logInfo("Fetching " + name + " with timestamp " + timestamp)
-        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory), conf, env.securityManager,
-          hadoopConf)
+        // Fetch file with useCache mode, close cache for local mode.
+        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory), conf,
+          env.securityManager, hadoopConf, timestamp, useCache = !isLocal)
         currentFiles(name) = timestamp
       }
       for ((name, timestamp) <- newJars if currentJars.getOrElse(name, -1L) < timestamp) {
         logInfo("Fetching " + name + " with timestamp " + timestamp)
-        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory), conf, env.securityManager,
-          hadoopConf)
+        // Fetch file with useCache mode, close cache for local mode.
+        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory), conf,
+          env.securityManager, hadoopConf, timestamp, useCache = !isLocal)
         currentJars(name) = timestamp
         // Add it to our class loader
         val localName = name.split("/").last
@@ -356,10 +375,15 @@ private[spark] class Executor(
 
         while (!isStopped) {
           val tasksMetrics = new ArrayBuffer[(Long, TaskMetrics)]()
+          val curGCTime = gcTime
+
           for (taskRunner <- runningTasks.values()) {
-            if (!taskRunner.attemptedTask.isEmpty) {
+            if (taskRunner.attemptedTask.nonEmpty) {
               Option(taskRunner.task).flatMap(_.metrics).foreach { metrics =>
-                metrics.updateShuffleReadMetrics
+                metrics.updateShuffleReadMetrics()
+                metrics.updateInputMetrics()
+                metrics.setJvmGCTime(curGCTime - taskRunner.startGCTime)
+
                 if (isLocal) {
                   // JobProgressListener will hold an reference of it during
                   // onExecutorMetricsUpdate(), then JobProgressListener can not see
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorActor.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorActor.scala
new file mode 100644
index 0000000000000..41925f7e97e84
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorActor.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.executor
+
+import akka.actor.Actor
+import org.apache.spark.Logging
+
+import org.apache.spark.util.{Utils, ActorLogReceive}
+
+/**
+ * Driver -> Executor message to trigger a thread dump.
+ */
+private[spark] case object TriggerThreadDump
+
+/**
+ * Actor that runs inside of executors to enable driver -> executor RPC.
+ */
+private[spark]
+class ExecutorActor(executorId: String) extends Actor with ActorLogReceive with Logging {
+
+  override def receiveWithLogging = {
+    case TriggerThreadDump =>
+      sender ! Utils.getThreadDump()
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
index 38be2c58b333f..52862ae0ca5e4 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.executor
 
+import org.apache.spark.util.SparkExitCode._
+
 /**
  * These are exit codes that executors should use to provide the master with information about
  * executor failures assuming that cluster management framework can capture the exit codes (but
@@ -27,16 +29,6 @@ package org.apache.spark.executor
  */
 private[spark]
 object ExecutorExitCode {
-  /** The default uncaught exception handler was reached. */
-  val UNCAUGHT_EXCEPTION = 50
-
-  /** The default uncaught exception handler was called and an exception was encountered while
-      logging the exception. */
-  val UNCAUGHT_EXCEPTION_TWICE = 51
-
-  /** The default uncaught exception handler was reached, and the uncaught exception was an
-      OutOfMemoryError. */
-  val OOM = 52
 
   /** DiskStore failed to create a local temporary directory after many attempts. */
   val DISK_STORE_FAILED_TO_CREATE_DIR = 53
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorUncaughtExceptionHandler.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorUncaughtExceptionHandler.scala
deleted file mode 100644
index b0e984c03964c..0000000000000
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorUncaughtExceptionHandler.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.executor
-
-import org.apache.spark.Logging
-import org.apache.spark.util.Utils
-
-/**
- * The default uncaught exception handler for Executors terminates the whole process, to avoid
- * getting into a bad state indefinitely. Since Executors are relatively lightweight, it's better
- * to fail fast when things go wrong.
- */
-private[spark] object ExecutorUncaughtExceptionHandler
-  extends Thread.UncaughtExceptionHandler with Logging {
-
-  override def uncaughtException(thread: Thread, exception: Throwable) {
-    try {
-      logError("Uncaught exception in thread " + thread, exception)
-
-      // We may have been called from a shutdown hook. If so, we must not call System.exit().
-      // (If we do, we will deadlock.)
-      if (!Utils.inShutdown()) {
-        if (exception.isInstanceOf[OutOfMemoryError]) {
-          System.exit(ExecutorExitCode.OOM)
-        } else {
-          System.exit(ExecutorExitCode.UNCAUGHT_EXCEPTION)
-        }
-      }
-    } catch {
-      case oom: OutOfMemoryError => Runtime.getRuntime.halt(ExecutorExitCode.OOM)
-      case t: Throwable => Runtime.getRuntime.halt(ExecutorExitCode.UNCAUGHT_EXCEPTION_TWICE)
-    }
-  }
-
-  def uncaughtException(exception: Throwable) {
-    uncaughtException(Thread.currentThread(), exception)
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index bca0b152268ad..cfd672e1d8a97 100644
--- a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -19,13 +19,16 @@ package org.apache.spark.executor
 
 import java.nio.ByteBuffer
 
+import scala.collection.JavaConversions._
+
 import org.apache.mesos.protobuf.ByteString
-import org.apache.mesos.{Executor => MesosExecutor, ExecutorDriver, MesosExecutorDriver, MesosNativeLibrary}
+import org.apache.mesos.{Executor => MesosExecutor, ExecutorDriver, MesosExecutorDriver}
 import org.apache.mesos.Protos.{TaskStatus => MesosTaskStatus, _}
 
-import org.apache.spark.{Logging, TaskState}
+import org.apache.spark.{Logging, TaskState, SparkConf, SparkEnv}
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.scheduler.cluster.mesos.{MesosTaskLaunchData}
 import org.apache.spark.util.{SignalLogger, Utils}
 
 private[spark] class MesosExecutorBackend
@@ -50,22 +53,39 @@ private[spark] class MesosExecutorBackend
       executorInfo: ExecutorInfo,
       frameworkInfo: FrameworkInfo,
       slaveInfo: SlaveInfo) {
-    logInfo("Registered with Mesos as executor ID " + executorInfo.getExecutorId.getValue)
+
+    // Get num cores for this task from ExecutorInfo, created in MesosSchedulerBackend.
+    val cpusPerTask = executorInfo.getResourcesList
+      .find(_.getName == "cpus")
+      .map(_.getScalar.getValue.toInt)
+      .getOrElse(0)
+    val executorId = executorInfo.getExecutorId.getValue
+
+    logInfo(s"Registered with Mesos as executor ID $executorId with $cpusPerTask cpus")
     this.driver = driver
     val properties = Utils.deserialize[Array[(String, String)]](executorInfo.getData.toByteArray) ++
       Seq[(String, String)](("spark.app.id", frameworkInfo.getId.getValue))
+    val conf = new SparkConf(loadDefaults = true).setAll(properties)
+    val port = conf.getInt("spark.executor.port", 0)
+    val env = SparkEnv.createExecutorEnv(
+      conf, executorId, slaveInfo.getHostname, port, cpusPerTask, isLocal = false)
+
     executor = new Executor(
-      executorInfo.getExecutorId.getValue,
+      executorId,
       slaveInfo.getHostname,
-      properties)
+      env)
   }
 
   override def launchTask(d: ExecutorDriver, taskInfo: TaskInfo) {
     val taskId = taskInfo.getTaskId.getValue.toLong
+    val taskData = MesosTaskLaunchData.fromByteString(taskInfo.getData)
     if (executor == null) {
       logError("Received launchTask but executor was null")
     } else {
-      executor.launchTask(this, taskId, taskInfo.getName, taskInfo.getData.asReadOnlyByteBuffer)
+      SparkHadoopUtil.get.runAsSparkUser { () =>
+        executor.launchTask(this, taskId = taskId, attemptNumber = taskData.attemptNumber,
+          taskInfo.getName, taskData.serializedTask)
+      }
     }
   }
 
@@ -97,11 +117,8 @@ private[spark] class MesosExecutorBackend
 private[spark] object MesosExecutorBackend extends Logging {
   def main(args: Array[String]) {
     SignalLogger.register(log)
-    SparkHadoopUtil.get.runAsSparkUser { () =>
-        MesosNativeLibrary.load()
-        // Create a new Executor and start it running
-        val runner = new MesosExecutorBackend()
-        new MesosExecutorDriver(runner).run()
-    }
+    // Create a new Executor and start it running
+    val runner = new MesosExecutorBackend()
+    new MesosExecutorDriver(runner).run()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
index 3e49b6235aff3..97912c68c5982 100644
--- a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.executor
 
+import java.util.concurrent.atomic.AtomicLong
+
+import org.apache.spark.executor.DataReadMethod.DataReadMethod
+
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.annotation.DeveloperApi
@@ -39,48 +43,84 @@ class TaskMetrics extends Serializable {
   /**
    * Host's name the task runs on
    */
-  var hostname: String = _
-
+  private var _hostname: String = _
+  def hostname = _hostname
+  private[spark] def setHostname(value: String) = _hostname = value
+  
   /**
    * Time taken on the executor to deserialize this task
    */
-  var executorDeserializeTime: Long = _
-
+  private var _executorDeserializeTime: Long = _
+  def executorDeserializeTime = _executorDeserializeTime
+  private[spark] def setExecutorDeserializeTime(value: Long) = _executorDeserializeTime = value
+  
+  
   /**
    * Time the executor spends actually running the task (including fetching shuffle data)
    */
-  var executorRunTime: Long = _
-
+  private var _executorRunTime: Long = _
+  def executorRunTime = _executorRunTime
+  private[spark] def setExecutorRunTime(value: Long) = _executorRunTime = value
+  
   /**
    * The number of bytes this task transmitted back to the driver as the TaskResult
    */
-  var resultSize: Long = _
+  private var _resultSize: Long = _
+  def resultSize = _resultSize
+  private[spark] def setResultSize(value: Long) = _resultSize = value
+
 
   /**
    * Amount of time the JVM spent in garbage collection while executing this task
    */
-  var jvmGCTime: Long = _
+  private var _jvmGCTime: Long = _
+  def jvmGCTime = _jvmGCTime
+  private[spark] def setJvmGCTime(value: Long) = _jvmGCTime = value
 
   /**
    * Amount of time spent serializing the task result
    */
-  var resultSerializationTime: Long = _
+  private var _resultSerializationTime: Long = _
+  def resultSerializationTime = _resultSerializationTime
+  private[spark] def setResultSerializationTime(value: Long) = _resultSerializationTime = value
 
   /**
    * The number of in-memory bytes spilled by this task
    */
-  var memoryBytesSpilled: Long = _
+  private var _memoryBytesSpilled: Long = _
+  def memoryBytesSpilled = _memoryBytesSpilled
+  private[spark] def incMemoryBytesSpilled(value: Long) = _memoryBytesSpilled += value
+  private[spark] def decMemoryBytesSpilled(value: Long) = _memoryBytesSpilled -= value
 
   /**
    * The number of on-disk bytes spilled by this task
    */
-  var diskBytesSpilled: Long = _
+  private var _diskBytesSpilled: Long = _
+  def diskBytesSpilled = _diskBytesSpilled
+  def incDiskBytesSpilled(value: Long) = _diskBytesSpilled += value
+  def decDiskBytesSpilled(value: Long) = _diskBytesSpilled -= value
 
   /**
    * If this task reads from a HadoopRDD or from persisted data, metrics on how much data was read
    * are stored here.
    */
-  var inputMetrics: Option[InputMetrics] = None
+  private var _inputMetrics: Option[InputMetrics] = None
+
+  def inputMetrics = _inputMetrics
+
+  /**
+   * This should only be used when recreating TaskMetrics, not when updating input metrics in
+   * executors
+   */
+  private[spark] def setInputMetrics(inputMetrics: Option[InputMetrics]) {
+    _inputMetrics = inputMetrics
+  }
+
+  /**
+   * If this task writes data externally (e.g. to a distributed filesystem), metrics on how much
+   * data was written are stored here.
+   */
+  var outputMetrics: Option[OutputMetrics] = None
 
   /**
    * If this task reads from shuffle output, metrics on getting shuffle data will be collected here.
@@ -127,19 +167,47 @@ class TaskMetrics extends Serializable {
     readMetrics
   }
 
+  /**
+   * Returns the input metrics object that the task should use. Currently, if
+   * there exists an input metric with the same readMethod, we return that one
+   * so the caller can accumulate bytes read. If the readMethod is different
+   * than previously seen by this task, we return a new InputMetric but don't
+   * record it.
+   *
+   * Once https://issues.apache.org/jira/browse/SPARK-5225 is addressed,
+   * we can store all the different inputMetrics (one per readMethod).
+   */
+  private[spark] def getInputMetricsForReadMethod(readMethod: DataReadMethod):
+    InputMetrics =synchronized {
+    _inputMetrics match {
+      case None =>
+        val metrics = new InputMetrics(readMethod)
+        _inputMetrics = Some(metrics)
+        metrics
+      case Some(metrics @ InputMetrics(method)) if method == readMethod =>
+        metrics
+      case Some(InputMetrics(method)) =>
+       new InputMetrics(readMethod)
+    }
+  }
+
   /**
    * Aggregates shuffle read metrics for all registered dependencies into shuffleReadMetrics.
    */
   private[spark] def updateShuffleReadMetrics() = synchronized {
     val merged = new ShuffleReadMetrics()
     for (depMetrics <- depsShuffleReadMetrics) {
-      merged.fetchWaitTime += depMetrics.fetchWaitTime
-      merged.localBlocksFetched += depMetrics.localBlocksFetched
-      merged.remoteBlocksFetched += depMetrics.remoteBlocksFetched
-      merged.remoteBytesRead += depMetrics.remoteBytesRead
+      merged.incFetchWaitTime(depMetrics.fetchWaitTime)
+      merged.incLocalBlocksFetched(depMetrics.localBlocksFetched)
+      merged.incRemoteBlocksFetched(depMetrics.remoteBlocksFetched)
+      merged.incRemoteBytesRead(depMetrics.remoteBytesRead)
     }
     _shuffleReadMetrics = Some(merged)
   }
+
+  private[spark] def updateInputMetrics() = synchronized {
+    inputMetrics.foreach(_.updateBytesRead())
+  }
 }
 
 private[spark] object TaskMetrics {
@@ -157,51 +225,115 @@ object DataReadMethod extends Enumeration with Serializable {
   val Memory, Disk, Hadoop, Network = Value
 }
 
+/**
+ * :: DeveloperApi ::
+ * Method by which output data was written.
+ */
+@DeveloperApi
+object DataWriteMethod extends Enumeration with Serializable {
+  type DataWriteMethod = Value
+  val Hadoop = Value
+}
+
 /**
  * :: DeveloperApi ::
  * Metrics about reading input data.
  */
 @DeveloperApi
 case class InputMetrics(readMethod: DataReadMethod.Value) {
+
+  private val _bytesRead: AtomicLong = new AtomicLong()
+
   /**
    * Total bytes read.
    */
-  var bytesRead: Long = 0L
-}
+  def bytesRead: Long = _bytesRead.get()
+  @volatile @transient var bytesReadCallback: Option[() => Long] = None
 
+  /**
+   * Adds additional bytes read for this read method.
+   */
+  def addBytesRead(bytes: Long) = {
+    _bytesRead.addAndGet(bytes)
+  }
+
+  /**
+   * Invoke the bytesReadCallback and mutate bytesRead.
+   */
+  def updateBytesRead() {
+    bytesReadCallback.foreach { c =>
+      _bytesRead.set(c())
+    }
+  }
+
+ /**
+  * Register a function that can be called to get up-to-date information on how many bytes the task
+  * has read from an input source.
+  */
+  def setBytesReadCallback(f: Option[() => Long]) {
+    bytesReadCallback = f
+  }
+}
 
 /**
  * :: DeveloperApi ::
- * Metrics pertaining to shuffle data read in a given task.
+ * Metrics about writing output data.
  */
 @DeveloperApi
-class ShuffleReadMetrics extends Serializable {
+case class OutputMetrics(writeMethod: DataWriteMethod.Value) {
   /**
-   * Number of blocks fetched in this shuffle by this task (remote or local)
+   * Total bytes written
    */
-  def totalBlocksFetched: Int = remoteBlocksFetched + localBlocksFetched
+  private var _bytesWritten: Long = _
+  def bytesWritten = _bytesWritten
+  private[spark] def setBytesWritten(value : Long) = _bytesWritten = value
+}
 
+/**
+ * :: DeveloperApi ::
+ * Metrics pertaining to shuffle data read in a given task.
+ */
+@DeveloperApi
+class ShuffleReadMetrics extends Serializable {
   /**
    * Number of remote blocks fetched in this shuffle by this task
    */
-  var remoteBlocksFetched: Int = _
-
+  private var _remoteBlocksFetched: Int = _
+  def remoteBlocksFetched = _remoteBlocksFetched
+  private[spark] def incRemoteBlocksFetched(value: Int) = _remoteBlocksFetched += value
+  private[spark] def defRemoteBlocksFetched(value: Int) = _remoteBlocksFetched -= value
+  
   /**
    * Number of local blocks fetched in this shuffle by this task
    */
-  var localBlocksFetched: Int = _
+  private var _localBlocksFetched: Int = _
+  def localBlocksFetched = _localBlocksFetched
+  private[spark] def incLocalBlocksFetched(value: Int) = _localBlocksFetched += value
+  private[spark] def defLocalBlocksFetched(value: Int) = _localBlocksFetched -= value
+
 
   /**
    * Time the task spent waiting for remote shuffle blocks. This only includes the time
    * blocking on shuffle input data. For instance if block B is being fetched while the task is
    * still not finished processing block A, it is not considered to be blocking on block B.
    */
-  var fetchWaitTime: Long = _
-
+  private var _fetchWaitTime: Long = _
+  def fetchWaitTime = _fetchWaitTime
+  private[spark] def incFetchWaitTime(value: Long) = _fetchWaitTime += value
+  private[spark] def decFetchWaitTime(value: Long) = _fetchWaitTime -= value
+  
   /**
    * Total number of remote bytes read from the shuffle by this task
    */
-  var remoteBytesRead: Long = _
+  private var _remoteBytesRead: Long = _
+  def remoteBytesRead = _remoteBytesRead
+  private[spark] def incRemoteBytesRead(value: Long) = _remoteBytesRead += value
+  private[spark] def decRemoteBytesRead(value: Long) = _remoteBytesRead -= value
+
+  /**
+   * Number of blocks fetched in this shuffle by this task (remote or local)
+   */
+  def totalBlocksFetched = _remoteBlocksFetched + _localBlocksFetched
 }
 
 /**
@@ -213,10 +345,18 @@ class ShuffleWriteMetrics extends Serializable {
   /**
    * Number of bytes written for the shuffle by this task
    */
-  @volatile var shuffleBytesWritten: Long = _
-
+  @volatile private var _shuffleBytesWritten: Long = _
+  def shuffleBytesWritten = _shuffleBytesWritten
+  private[spark] def incShuffleBytesWritten(value: Long) = _shuffleBytesWritten += value
+  private[spark] def decShuffleBytesWritten(value: Long) = _shuffleBytesWritten -= value
+  
   /**
    * Time the task spent blocking on writes to disk or buffer cache, in nanoseconds
    */
-  @volatile var shuffleWriteTime: Long = _
+  @volatile private var _shuffleWriteTime: Long = _
+  def shuffleWriteTime= _shuffleWriteTime
+  private[spark] def incShuffleWriteTime(value: Long) = _shuffleWriteTime += value
+  private[spark] def decShuffleWriteTime(value: Long) = _shuffleWriteTime -= value
+  
+
 }
diff --git a/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryInputFormat.scala b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryInputFormat.scala
new file mode 100644
index 0000000000000..c219d21fbefa9
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryInputFormat.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.input
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.io.{BytesWritable, LongWritable}
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
+import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext}
+import org.apache.spark.deploy.SparkHadoopUtil
+
+/**
+ * Custom Input Format for reading and splitting flat binary files that contain records,
+ * each of which are a fixed size in bytes. The fixed record size is specified through
+ * a parameter recordLength in the Hadoop configuration.
+ */
+private[spark] object FixedLengthBinaryInputFormat {
+  /** Property name to set in Hadoop JobConfs for record length */
+  val RECORD_LENGTH_PROPERTY = "org.apache.spark.input.FixedLengthBinaryInputFormat.recordLength"
+
+  /** Retrieves the record length property from a Hadoop configuration */
+  def getRecordLength(context: JobContext): Int = {
+    SparkHadoopUtil.get.getConfigurationFromJobContext(context).get(RECORD_LENGTH_PROPERTY).toInt
+  }
+}
+
+private[spark] class FixedLengthBinaryInputFormat
+  extends FileInputFormat[LongWritable, BytesWritable] {
+
+  private var recordLength = -1
+
+  /**
+   * Override of isSplitable to ensure initial computation of the record length
+   */
+  override def isSplitable(context: JobContext, filename: Path): Boolean = {
+    if (recordLength == -1) {
+      recordLength = FixedLengthBinaryInputFormat.getRecordLength(context)
+    }
+    if (recordLength <= 0) {
+      println("record length is less than 0, file cannot be split")
+      false
+    } else {
+      true
+    }
+  }
+
+  /**
+   * This input format overrides computeSplitSize() to make sure that each split
+   * only contains full records. Each InputSplit passed to FixedLengthBinaryRecordReader
+   * will start at the first byte of a record, and the last byte will the last byte of a record.
+   */
+  override def computeSplitSize(blockSize: Long, minSize: Long, maxSize: Long): Long = {
+    val defaultSize = super.computeSplitSize(blockSize, minSize, maxSize)
+    // If the default size is less than the length of a record, make it equal to it
+    // Otherwise, make sure the split size is as close to possible as the default size,
+    // but still contains a complete set of records, with the first record
+    // starting at the first byte in the split and the last record ending with the last byte
+    if (defaultSize < recordLength) {
+      recordLength.toLong
+    } else {
+      (Math.floor(defaultSize / recordLength) * recordLength).toLong
+    }
+  }
+
+  /**
+   * Create a FixedLengthBinaryRecordReader
+   */
+  override def createRecordReader(split: InputSplit, context: TaskAttemptContext)
+      : RecordReader[LongWritable, BytesWritable] = {
+    new FixedLengthBinaryRecordReader
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala
new file mode 100644
index 0000000000000..67a96925da019
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.input
+
+import java.io.IOException
+
+import org.apache.hadoop.fs.FSDataInputStream
+import org.apache.hadoop.io.compress.CompressionCodecFactory
+import org.apache.hadoop.io.{BytesWritable, LongWritable}
+import org.apache.hadoop.mapreduce.{InputSplit, RecordReader, TaskAttemptContext}
+import org.apache.hadoop.mapreduce.lib.input.FileSplit
+import org.apache.spark.deploy.SparkHadoopUtil
+
+/**
+ * FixedLengthBinaryRecordReader is returned by FixedLengthBinaryInputFormat.
+ * It uses the record length set in FixedLengthBinaryInputFormat to
+ * read one record at a time from the given InputSplit.
+ *
+ * Each call to nextKeyValue() updates the LongWritable key and BytesWritable value.
+ *
+ * key = record index (Long)
+ * value = the record itself (BytesWritable)
+ */
+private[spark] class FixedLengthBinaryRecordReader
+  extends RecordReader[LongWritable, BytesWritable] {
+
+  private var splitStart: Long = 0L
+  private var splitEnd: Long = 0L
+  private var currentPosition: Long = 0L
+  private var recordLength: Int = 0
+  private var fileInputStream: FSDataInputStream = null
+  private var recordKey: LongWritable = null
+  private var recordValue: BytesWritable = null
+
+  override def close() {
+    if (fileInputStream != null) {
+      fileInputStream.close()
+    }
+  }
+
+  override def getCurrentKey: LongWritable = {
+    recordKey
+  }
+
+  override def getCurrentValue: BytesWritable = {
+    recordValue
+  }
+
+  override def getProgress: Float = {
+    splitStart match {
+      case x if x == splitEnd => 0.0.toFloat
+      case _ => Math.min(
+        ((currentPosition - splitStart) / (splitEnd - splitStart)).toFloat, 1.0
+      ).toFloat
+    }
+  }
+
+  override def initialize(inputSplit: InputSplit, context: TaskAttemptContext) {
+    // the file input
+    val fileSplit = inputSplit.asInstanceOf[FileSplit]
+
+    // the byte position this fileSplit starts at
+    splitStart = fileSplit.getStart
+
+    // splitEnd byte marker that the fileSplit ends at
+    splitEnd = splitStart + fileSplit.getLength
+
+    // the actual file we will be reading from
+    val file = fileSplit.getPath
+    // job configuration
+    val job = SparkHadoopUtil.get.getConfigurationFromJobContext(context)
+    // check compression
+    val codec = new CompressionCodecFactory(job).getCodec(file)
+    if (codec != null) {
+      throw new IOException("FixedLengthRecordReader does not support reading compressed files")
+    }
+    // get the record length
+    recordLength = FixedLengthBinaryInputFormat.getRecordLength(context)
+    // get the filesystem
+    val fs = file.getFileSystem(job)
+    // open the File
+    fileInputStream = fs.open(file)
+    // seek to the splitStart position
+    fileInputStream.seek(splitStart)
+    // set our current position
+    currentPosition = splitStart
+  }
+
+  override def nextKeyValue(): Boolean = {
+    if (recordKey == null) {
+      recordKey = new LongWritable()
+    }
+    // the key is a linear index of the record, given by the
+    // position the record starts divided by the record length
+    recordKey.set(currentPosition / recordLength)
+    // the recordValue to place the bytes into
+    if (recordValue == null) {
+      recordValue = new BytesWritable(new Array[Byte](recordLength))
+    }
+    // read a record if the currentPosition is less than the split end
+    if (currentPosition < splitEnd) {
+      // setup a buffer to store the record
+      val buffer = recordValue.getBytes
+      fileInputStream.readFully(buffer)
+      // update our current position
+      currentPosition = currentPosition + recordLength
+      // return true
+      return true
+    }
+    false
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
new file mode 100644
index 0000000000000..593a62b3e3b32
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.input
+
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
+
+import scala.collection.JavaConversions._
+
+import com.google.common.io.ByteStreams
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext}
+import org.apache.hadoop.mapreduce.lib.input.{CombineFileInputFormat, CombineFileRecordReader, CombineFileSplit}
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.deploy.SparkHadoopUtil
+
+/**
+ * A general format for reading whole files in as streams, byte arrays,
+ * or other functions to be added
+ */
+private[spark] abstract class StreamFileInputFormat[T]
+  extends CombineFileInputFormat[String, T]
+{
+  override protected def isSplitable(context: JobContext, file: Path): Boolean = false
+
+  /**
+   * Allow minPartitions set by end-user in order to keep compatibility with old Hadoop API
+   * which is set through setMaxSplitSize
+   */
+  def setMinPartitions(context: JobContext, minPartitions: Int) {
+    val files = listStatus(context)
+    val totalLen = files.map { file =>
+      if (file.isDir) 0L else file.getLen
+    }.sum
+
+    val maxSplitSize = Math.ceil(totalLen * 1.0 / files.length).toLong
+    super.setMaxSplitSize(maxSplitSize)
+  }
+
+  def createRecordReader(split: InputSplit, taContext: TaskAttemptContext): RecordReader[String, T]
+
+}
+
+/**
+ * An abstract class of [[org.apache.hadoop.mapreduce.RecordReader RecordReader]]
+ * to reading files out as streams
+ */
+private[spark] abstract class StreamBasedRecordReader[T](
+    split: CombineFileSplit,
+    context: TaskAttemptContext,
+    index: Integer)
+  extends RecordReader[String, T] {
+
+  // True means the current file has been processed, then skip it.
+  private var processed = false
+
+  private var key = ""
+  private var value: T = null.asInstanceOf[T]
+
+  override def initialize(split: InputSplit, context: TaskAttemptContext) = {}
+  override def close() = {}
+
+  override def getProgress = if (processed) 1.0f else 0.0f
+
+  override def getCurrentKey = key
+
+  override def getCurrentValue = value
+
+  override def nextKeyValue = {
+    if (!processed) {
+      val fileIn = new PortableDataStream(split, context, index)
+      value = parseStream(fileIn)
+      fileIn.close() // if it has not been open yet, close does nothing
+      key = fileIn.getPath
+      processed = true
+      true
+    } else {
+      false
+    }
+  }
+
+  /**
+   * Parse the stream (and close it afterwards) and return the value as in type T
+   * @param inStream the stream to be read in
+   * @return the data formatted as
+   */
+  def parseStream(inStream: PortableDataStream): T
+}
+
+/**
+ * Reads the record in directly as a stream for other objects to manipulate and handle
+ */
+private[spark] class StreamRecordReader(
+    split: CombineFileSplit,
+    context: TaskAttemptContext,
+    index: Integer)
+  extends StreamBasedRecordReader[PortableDataStream](split, context, index) {
+
+  def parseStream(inStream: PortableDataStream): PortableDataStream = inStream
+}
+
+/**
+ * The format for the PortableDataStream files
+ */
+private[spark] class StreamInputFormat extends StreamFileInputFormat[PortableDataStream] {
+  override def createRecordReader(split: InputSplit, taContext: TaskAttemptContext) = {
+    new CombineFileRecordReader[String, PortableDataStream](
+      split.asInstanceOf[CombineFileSplit], taContext, classOf[StreamRecordReader])
+  }
+}
+
+/**
+ * A class that allows DataStreams to be serialized and moved around by not creating them
+ * until they need to be read
+ * @note TaskAttemptContext is not serializable resulting in the confBytes construct
+ * @note CombineFileSplit is not serializable resulting in the splitBytes construct
+ */
+@Experimental
+class PortableDataStream(
+    @transient isplit: CombineFileSplit,
+    @transient context: TaskAttemptContext,
+    index: Integer)
+  extends Serializable {
+
+  // transient forces file to be reopened after being serialization
+  // it is also used for non-serializable classes
+
+  @transient private var fileIn: DataInputStream = null
+  @transient private var isOpen = false
+
+  private val confBytes = {
+    val baos = new ByteArrayOutputStream()
+    SparkHadoopUtil.get.getConfigurationFromJobContext(context).
+      write(new DataOutputStream(baos))
+    baos.toByteArray
+  }
+
+  private val splitBytes = {
+    val baos = new ByteArrayOutputStream()
+    isplit.write(new DataOutputStream(baos))
+    baos.toByteArray
+  }
+
+  @transient private lazy val split = {
+    val bais = new ByteArrayInputStream(splitBytes)
+    val nsplit = new CombineFileSplit()
+    nsplit.readFields(new DataInputStream(bais))
+    nsplit
+  }
+
+  @transient private lazy val conf = {
+    val bais = new ByteArrayInputStream(confBytes)
+    val nconf = new Configuration()
+    nconf.readFields(new DataInputStream(bais))
+    nconf
+  }
+  /**
+   * Calculate the path name independently of opening the file
+   */
+  @transient private lazy val path = {
+    val pathp = split.getPath(index)
+    pathp.toString
+  }
+
+  /**
+   * Create a new DataInputStream from the split and context
+   */
+  def open(): DataInputStream = {
+    if (!isOpen) {
+      val pathp = split.getPath(index)
+      val fs = pathp.getFileSystem(conf)
+      fileIn = fs.open(pathp)
+      isOpen = true
+    }
+    fileIn
+  }
+
+  /**
+   * Read the file as a byte array
+   */
+  def toArray(): Array[Byte] = {
+    open()
+    val innerBuffer = ByteStreams.toByteArray(fileIn)
+    close()
+    innerBuffer
+  }
+
+  /**
+   * Close the file (if it is currently open)
+   */
+  def close() = {
+    if (isOpen) {
+      try {
+        fileIn.close()
+        isOpen = false
+      } catch {
+        case ioe: java.io.IOException => // do nothing
+      }
+    }
+  }
+
+  def getPath(): String = path
+}
+
diff --git a/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala b/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala
index 4cb450577796a..aaef7c74eea33 100644
--- a/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala
+++ b/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala
@@ -25,8 +25,6 @@ import org.apache.hadoop.mapreduce.JobContext
 import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat
 import org.apache.hadoop.mapreduce.RecordReader
 import org.apache.hadoop.mapreduce.TaskAttemptContext
-import org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader
-import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit
 
 /**
  * A [[org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat CombineFileInputFormat]] for
@@ -34,23 +32,26 @@ import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit
  * the value is the entire content of file.
  */
 
-private[spark] class WholeTextFileInputFormat extends CombineFileInputFormat[String, String] {
+private[spark] class WholeTextFileInputFormat
+  extends CombineFileInputFormat[String, String] with Configurable {
+
   override protected def isSplitable(context: JobContext, file: Path): Boolean = false
 
   override def createRecordReader(
       split: InputSplit,
       context: TaskAttemptContext): RecordReader[String, String] = {
 
-    new CombineFileRecordReader[String, String](
-      split.asInstanceOf[CombineFileSplit],
-      context,
-      classOf[WholeTextFileRecordReader])
+    val reader =
+      new ConfigurableCombineFileRecordReader(split, context, classOf[WholeTextFileRecordReader])
+    reader.setConf(getConf)
+    reader
   }
 
   /**
-   * Allow minPartitions set by end-user in order to keep compatibility with old Hadoop API.
+   * Allow minPartitions set by end-user in order to keep compatibility with old Hadoop API,
+   * which is set through setMaxSplitSize
    */
-  def setMaxSplitSize(context: JobContext, minPartitions: Int) {
+  def setMinPartitions(context: JobContext, minPartitions: Int) {
     val files = listStatus(context)
     val totalLen = files.map { file =>
       if (file.isDir) 0L else file.getLen
diff --git a/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala b/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala
index 3564ab2e2a162..31bde8a78f3c6 100644
--- a/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala
+++ b/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala
@@ -17,13 +17,28 @@
 
 package org.apache.spark.input
 
+import org.apache.hadoop.conf.{Configuration, Configurable => HConfigurable}
 import com.google.common.io.{ByteStreams, Closeables}
 
 import org.apache.hadoop.io.Text
+import org.apache.hadoop.io.compress.CompressionCodecFactory
 import org.apache.hadoop.mapreduce.InputSplit
-import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit
+import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, CombineFileRecordReader}
 import org.apache.hadoop.mapreduce.RecordReader
 import org.apache.hadoop.mapreduce.TaskAttemptContext
+import org.apache.spark.deploy.SparkHadoopUtil
+
+
+/**
+ * A trait to implement [[org.apache.hadoop.conf.Configurable Configurable]] interface.
+ */
+private[spark] trait Configurable extends HConfigurable {
+  private var conf: Configuration = _
+  def setConf(c: Configuration) {
+    conf = c
+  }
+  def getConf: Configuration = conf
+}
 
 /**
  * A [[org.apache.hadoop.mapreduce.RecordReader RecordReader]] for reading a single whole text file
@@ -34,10 +49,11 @@ private[spark] class WholeTextFileRecordReader(
     split: CombineFileSplit,
     context: TaskAttemptContext,
     index: Integer)
-  extends RecordReader[String, String] {
+  extends RecordReader[String, String] with Configurable {
 
   private[this] val path = split.getPath(index)
-  private[this] val fs = path.getFileSystem(context.getConfiguration)
+  private[this] val fs = path.getFileSystem(
+    SparkHadoopUtil.get.getConfigurationFromJobContext(context))
 
   // True means the current file has been processed, then skip it.
   private[this] var processed = false
@@ -57,8 +73,16 @@ private[spark] class WholeTextFileRecordReader(
 
   override def nextKeyValue(): Boolean = {
     if (!processed) {
+      val conf = new Configuration
+      val factory = new CompressionCodecFactory(conf)
+      val codec = factory.getCodec(path)  // infers from file ext.
       val fileIn = fs.open(path)
-      val innerBuffer = ByteStreams.toByteArray(fileIn)
+      val innerBuffer = if (codec != null) {
+        ByteStreams.toByteArray(codec.createInputStream(fileIn))
+      } else {
+        ByteStreams.toByteArray(fileIn)
+      }
+
       value = new Text(innerBuffer).toString
       Closeables.close(fileIn, false)
       processed = true
@@ -68,3 +92,28 @@ private[spark] class WholeTextFileRecordReader(
     }
   }
 }
+
+
+/**
+ * A [[org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader CombineFileRecordReader]]
+ * that can pass Hadoop Configuration to [[org.apache.hadoop.conf.Configurable Configurable]]
+ * RecordReaders.
+ */
+private[spark] class ConfigurableCombineFileRecordReader[K, V](
+    split: InputSplit,
+    context: TaskAttemptContext,
+    recordReaderClass: Class[_ <: RecordReader[K, V] with HConfigurable])
+  extends CombineFileRecordReader[K, V](
+    split.asInstanceOf[CombineFileSplit],
+    context,
+    recordReaderClass
+  ) with Configurable {
+
+  override def initNextRecordReader(): Boolean = {
+    val r = super.initNextRecordReader()
+    if (r) {
+      this.curReader.asInstanceOf[HConfigurable].setConf(getConf)
+    }
+    r
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index 1ac7f4e448eb1..f856890d279f4 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -21,11 +21,12 @@ import java.io.{InputStream, OutputStream}
 
 import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream}
 import net.jpountz.lz4.{LZ4BlockInputStream, LZ4BlockOutputStream}
-import org.xerial.snappy.{SnappyInputStream, SnappyOutputStream}
+import org.xerial.snappy.{Snappy, SnappyInputStream, SnappyOutputStream}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.util.Utils
+import org.apache.spark.Logging
 
 /**
  * :: DeveloperApi ::
@@ -44,25 +45,33 @@ trait CompressionCodec {
   def compressedInputStream(s: InputStream): InputStream
 }
 
-
 private[spark] object CompressionCodec {
 
+  private val configKey = "spark.io.compression.codec"
   private val shortCompressionCodecNames = Map(
     "lz4" -> classOf[LZ4CompressionCodec].getName,
     "lzf" -> classOf[LZFCompressionCodec].getName,
     "snappy" -> classOf[SnappyCompressionCodec].getName)
 
   def createCodec(conf: SparkConf): CompressionCodec = {
-    createCodec(conf, conf.get("spark.io.compression.codec", DEFAULT_COMPRESSION_CODEC))
+    createCodec(conf, conf.get(configKey, DEFAULT_COMPRESSION_CODEC))
   }
 
   def createCodec(conf: SparkConf, codecName: String): CompressionCodec = {
     val codecClass = shortCompressionCodecNames.getOrElse(codecName.toLowerCase, codecName)
-    val ctor = Class.forName(codecClass, true, Utils.getContextOrSparkClassLoader)
-      .getConstructor(classOf[SparkConf])
-    ctor.newInstance(conf).asInstanceOf[CompressionCodec]
+    val codec = try {
+      val ctor = Class.forName(codecClass, true, Utils.getContextOrSparkClassLoader)
+        .getConstructor(classOf[SparkConf])
+      Some(ctor.newInstance(conf).asInstanceOf[CompressionCodec])
+    } catch {
+      case e: ClassNotFoundException => None
+      case e: IllegalArgumentException => None
+    }
+    codec.getOrElse(throw new IllegalArgumentException(s"Codec [$codecName] is not available. " +
+      s"Consider setting $configKey=$FALLBACK_COMPRESSION_CODEC"))
   }
 
+  val FALLBACK_COMPRESSION_CODEC = "lzf"
   val DEFAULT_COMPRESSION_CODEC = "snappy"
   val ALL_COMPRESSION_CODECS = shortCompressionCodecNames.values.toSeq
 }
@@ -120,6 +129,12 @@ class LZFCompressionCodec(conf: SparkConf) extends CompressionCodec {
 @DeveloperApi
 class SnappyCompressionCodec(conf: SparkConf) extends CompressionCodec {
 
+  try {
+    Snappy.getNativeLibraryVersion
+  } catch {
+    case e: Error => throw new IllegalArgumentException
+  }
+
   override def compressedOutputStream(s: OutputStream): OutputStream = {
     val blockSize = conf.getInt("spark.io.compression.snappy.block.size", 32768)
     new SnappyOutputStream(s, blockSize)
diff --git a/core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala b/core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala
similarity index 79%
rename from core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala
rename to core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala
index 0c47afae54c8b..21b782edd2a9e 100644
--- a/core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala
+++ b/core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala
@@ -15,15 +15,24 @@
  * limitations under the License.
  */
 
-package org.apache.hadoop.mapred
+package org.apache.spark.mapred
 
-private[apache]
+import java.lang.reflect.Modifier
+
+import org.apache.hadoop.mapred.{TaskAttemptID, JobID, JobConf, JobContext, TaskAttemptContext}
+
+private[spark]
 trait SparkHadoopMapRedUtil {
   def newJobContext(conf: JobConf, jobId: JobID): JobContext = {
     val klass = firstAvailableClass("org.apache.hadoop.mapred.JobContextImpl",
       "org.apache.hadoop.mapred.JobContext")
     val ctor = klass.getDeclaredConstructor(classOf[JobConf],
       classOf[org.apache.hadoop.mapreduce.JobID])
+    // In Hadoop 1.0.x, JobContext is an interface, and JobContextImpl is package private.
+    // Make it accessible if it's not in order to access it.
+    if (!Modifier.isPublic(ctor.getModifiers)) {
+      ctor.setAccessible(true)
+    }
     ctor.newInstance(conf, jobId).asInstanceOf[JobContext]
   }
 
@@ -31,6 +40,10 @@ trait SparkHadoopMapRedUtil {
     val klass = firstAvailableClass("org.apache.hadoop.mapred.TaskAttemptContextImpl",
       "org.apache.hadoop.mapred.TaskAttemptContext")
     val ctor = klass.getDeclaredConstructor(classOf[JobConf], classOf[TaskAttemptID])
+    // See above
+    if (!Modifier.isPublic(ctor.getModifiers)) {
+      ctor.setAccessible(true)
+    }
     ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext]
   }
 
diff --git a/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
similarity index 96%
rename from core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala
rename to core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
index 1fca5729c6092..3340673f91156 100644
--- a/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala
+++ b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
@@ -15,13 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.hadoop.mapreduce
+package org.apache.spark.mapreduce
 
 import java.lang.{Boolean => JBoolean, Integer => JInteger}
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.mapreduce.{JobContext, JobID, TaskAttemptContext, TaskAttemptID}
 
-private[apache]
+private[spark]
 trait SparkHadoopMapReduceUtil {
   def newJobContext(conf: Configuration, jobId: JobID): JobContext = {
     val klass = firstAvailableClass(
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index 5dd67b0cbf683..83e8eb71260eb 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -76,22 +76,36 @@ private[spark] class MetricsSystem private (
   private val sources = new mutable.ArrayBuffer[Source]
   private val registry = new MetricRegistry()
 
+  private var running: Boolean = false
+
   // Treat MetricsServlet as a special sink as it should be exposed to add handlers to web ui
   private var metricsServlet: Option[MetricsServlet] = None
 
-  /** Get any UI handlers used by this metrics system. */
-  def getServletHandlers = metricsServlet.map(_.getHandlers).getOrElse(Array())
+  /**
+   * Get any UI handlers used by this metrics system; can only be called after start().
+   */
+  def getServletHandlers = {
+    require(running, "Can only call getServletHandlers on a running MetricsSystem")
+    metricsServlet.map(_.getHandlers).getOrElse(Array())
+  }
 
   metricsConfig.initialize()
 
   def start() {
+    require(!running, "Attempting to start a MetricsSystem that is already running")
+    running = true
     registerSources()
     registerSinks()
     sinks.foreach(_.start)
   }
 
   def stop() {
-    sinks.foreach(_.stop)
+    if (running) {
+      sinks.foreach(_.stop)
+    } else {
+      logWarning("Stopping a MetricsSystem that is not running")
+    }
+    running = false
   }
 
   def report() {
@@ -107,7 +121,7 @@ private[spark] class MetricsSystem private (
    * @return An unique metric name for each combination of
    *         application, executor/driver and metric source.
    */
-  def buildRegistryName(source: Source): String = {
+  private[spark] def buildRegistryName(source: Source): String = {
     val appId = conf.getOption("spark.app.id")
     val executorId = conf.getOption("spark.executor.id")
     val defaultName = MetricRegistry.name(source.sourceName)
@@ -116,8 +130,8 @@ private[spark] class MetricsSystem private (
       if (appId.isDefined && executorId.isDefined) {
         MetricRegistry.name(appId.get, executorId.get, source.sourceName)
       } else {
-        // Only Driver and Executor are set spark.app.id and spark.executor.id.
-        // For instance, Master and Worker are not related to a specific application.
+        // Only Driver and Executor set spark.app.id and spark.executor.id.
+        // Other instance types, e.g. Master and Worker, are not related to a specific application.
         val warningMsg = s"Using default name $defaultName for source because %s is not set."
         if (appId.isEmpty) { logWarning(warningMsg.format("spark.app.id")) }
         if (executorId.isEmpty) { logWarning(warningMsg.format("spark.executor.id")) }
@@ -144,7 +158,7 @@ private[spark] class MetricsSystem private (
     })
   }
 
-  def registerSources() {
+  private def registerSources() {
     val instConfig = metricsConfig.getInstance(instance)
     val sourceConfigs = metricsConfig.subProperties(instConfig, MetricsSystem.SOURCE_REGEX)
 
@@ -160,7 +174,7 @@ private[spark] class MetricsSystem private (
     }
   }
 
-  def registerSinks() {
+  private def registerSinks() {
     val instConfig = metricsConfig.getInstance(instance)
     val sinkConfigs = metricsConfig.subProperties(instConfig, MetricsSystem.SINK_REGEX)
 
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
index d7b5f5c40efae..2d25ebd66159f 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
@@ -22,7 +22,7 @@ import java.util.Properties
 import java.util.concurrent.TimeUnit
 
 import com.codahale.metrics.MetricRegistry
-import com.codahale.metrics.graphite.{Graphite, GraphiteReporter}
+import com.codahale.metrics.graphite.{GraphiteUDP, Graphite, GraphiteReporter}
 
 import org.apache.spark.SecurityManager
 import org.apache.spark.metrics.MetricsSystem
@@ -38,6 +38,7 @@ private[spark] class GraphiteSink(val property: Properties, val registry: Metric
   val GRAPHITE_KEY_PERIOD = "period"
   val GRAPHITE_KEY_UNIT = "unit"
   val GRAPHITE_KEY_PREFIX = "prefix"
+  val GRAPHITE_KEY_PROTOCOL = "protocol"
 
   def propertyToOption(prop: String): Option[String] = Option(property.getProperty(prop))
 
@@ -66,7 +67,11 @@ private[spark] class GraphiteSink(val property: Properties, val registry: Metric
 
   MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)
 
-  val graphite: Graphite = new Graphite(new InetSocketAddress(host, port))
+  val graphite = propertyToOption(GRAPHITE_KEY_PROTOCOL).map(_.toLowerCase) match {
+    case Some("udp") => new GraphiteUDP(new InetSocketAddress(host, port))
+    case Some("tcp") | None => new Graphite(new InetSocketAddress(host, port))
+    case Some(p) => throw new Exception(s"Invalid Graphite protocol: $p")
+  }
 
   val reporter: GraphiteReporter = GraphiteReporter.forRegistry(registry)
       .convertDurationsTo(TimeUnit.MILLISECONDS)
diff --git a/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
index e0e91724271c8..1745d52c81923 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
@@ -17,20 +17,20 @@
 
 package org.apache.spark.network
 
-import org.apache.spark.storage.StorageLevel
-
+import org.apache.spark.network.buffer.ManagedBuffer
+import org.apache.spark.storage.{BlockId, StorageLevel}
 
+private[spark]
 trait BlockDataManager {
 
   /**
-   * Interface to get local block data.
-   *
-   * @return Some(buffer) if the block exists locally, and None if it doesn't.
+   * Interface to get local block data. Throws an exception if the block cannot be found or
+   * cannot be read successfully.
    */
-  def getBlockData(blockId: String): Option[ManagedBuffer]
+  def getBlockData(blockId: BlockId): ManagedBuffer
 
   /**
    * Put the block locally, using the given storage level.
    */
-  def putBlockData(blockId: String, data: ManagedBuffer, level: StorageLevel): Unit
+  def putBlockData(blockId: BlockId, data: ManagedBuffer, level: StorageLevel): Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/network/BlockFetchingListener.scala b/core/src/main/scala/org/apache/spark/network/BlockFetchingListener.scala
deleted file mode 100644
index 34acaa563ca58..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/BlockFetchingListener.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network
-
-import java.util.EventListener
-
-
-/**
- * Listener callback interface for [[BlockTransferService.fetchBlocks]].
- */
-trait BlockFetchingListener extends EventListener {
-
-  /**
-   * Called once per successfully fetched block.
-   */
-  def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit
-
-  /**
-   * Called upon failures. For each failure, this is called only once (i.e. not once per block).
-   */
-  def onBlockFetchFailure(exception: Throwable): Unit
-}
diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
index 84d991fa6808c..dcbda5a8515dd 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
@@ -17,13 +17,19 @@
 
 package org.apache.spark.network
 
-import scala.concurrent.{Await, Future}
-import scala.concurrent.duration.Duration
+import java.io.Closeable
+import java.nio.ByteBuffer
 
-import org.apache.spark.storage.StorageLevel
+import scala.concurrent.{Promise, Await, Future}
+import scala.concurrent.duration.Duration
 
+import org.apache.spark.Logging
+import org.apache.spark.network.buffer.{NioManagedBuffer, ManagedBuffer}
+import org.apache.spark.network.shuffle.{ShuffleClient, BlockFetchingListener}
+import org.apache.spark.storage.{BlockManagerId, BlockId, StorageLevel}
 
-abstract class BlockTransferService {
+private[spark]
+abstract class BlockTransferService extends ShuffleClient with Closeable with Logging {
 
   /**
    * Initialize the transfer service by giving it the BlockDataManager that can be used to fetch
@@ -34,7 +40,7 @@ abstract class BlockTransferService {
   /**
    * Tear down the transfer service.
    */
-  def stop(): Unit
+  def close(): Unit
 
   /**
    * Port number the service is listening on, available only after [[init]] is invoked.
@@ -50,17 +56,15 @@ abstract class BlockTransferService {
    * Fetch a sequence of blocks from a remote node asynchronously,
    * available only after [[init]] is invoked.
    *
-   * Note that [[BlockFetchingListener.onBlockFetchSuccess]] is called once per block,
-   * while [[BlockFetchingListener.onBlockFetchFailure]] is called once per failure (not per block).
-   *
    * Note that this API takes a sequence so the implementation can batch requests, and does not
    * return a future so the underlying implementation can invoke onBlockFetchSuccess as soon as
    * the data of a block is fetched, rather than waiting for all blocks to be fetched.
    */
-  def fetchBlocks(
-      hostName: String,
+  override def fetchBlocks(
+      host: String,
       port: Int,
-      blockIds: Seq[String],
+      execId: String,
+      blockIds: Array[String],
       listener: BlockFetchingListener): Unit
 
   /**
@@ -69,7 +73,8 @@ abstract class BlockTransferService {
   def uploadBlock(
       hostname: String,
       port: Int,
-      blockId: String,
+      execId: String,
+      blockId: BlockId,
       blockData: ManagedBuffer,
       level: StorageLevel): Future[Unit]
 
@@ -78,40 +83,23 @@ abstract class BlockTransferService {
    *
    * It is also only available after [[init]] is invoked.
    */
-  def fetchBlockSync(hostName: String, port: Int, blockId: String): ManagedBuffer = {
+  def fetchBlockSync(host: String, port: Int, execId: String, blockId: String): ManagedBuffer = {
     // A monitor for the thread to wait on.
-    val lock = new Object
-    @volatile var result: Either[ManagedBuffer, Throwable] = null
-    fetchBlocks(hostName, port, Seq(blockId), new BlockFetchingListener {
-      override def onBlockFetchFailure(exception: Throwable): Unit = {
-        lock.synchronized {
-          result = Right(exception)
-          lock.notify()
+    val result = Promise[ManagedBuffer]()
+    fetchBlocks(host, port, execId, Array(blockId),
+      new BlockFetchingListener {
+        override def onBlockFetchFailure(blockId: String, exception: Throwable): Unit = {
+          result.failure(exception)
         }
-      }
-      override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {
-        lock.synchronized {
-          result = Left(data)
-          lock.notify()
-        }
-      }
-    })
-
-    // Sleep until result is no longer null
-    lock.synchronized {
-      while (result == null) {
-        try {
-          lock.wait()
-        } catch {
-          case e: InterruptedException =>
+        override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {
+          val ret = ByteBuffer.allocate(data.size.toInt)
+          ret.put(data.nioByteBuffer())
+          ret.flip()
+          result.success(new NioManagedBuffer(ret))
         }
-      }
-    }
+      })
 
-    result match {
-      case Left(data) => data
-      case Right(e) => throw e
-    }
+    Await.result(result.future, Duration.Inf)
   }
 
   /**
@@ -123,9 +111,10 @@ abstract class BlockTransferService {
   def uploadBlockSync(
       hostname: String,
       port: Int,
-      blockId: String,
+      execId: String,
+      blockId: BlockId,
       blockData: ManagedBuffer,
       level: StorageLevel): Unit = {
-    Await.result(uploadBlock(hostname, port, blockId, blockData, level), Duration.Inf)
+    Await.result(uploadBlock(hostname, port, execId, blockId, blockData, level), Duration.Inf)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala b/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala
deleted file mode 100644
index 4c9ca97a2a6b7..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network
-
-import java.io._
-import java.nio.ByteBuffer
-import java.nio.channels.FileChannel
-import java.nio.channels.FileChannel.MapMode
-
-import scala.util.Try
-
-import com.google.common.io.ByteStreams
-import io.netty.buffer.{ByteBufInputStream, ByteBuf}
-
-import org.apache.spark.util.{ByteBufferInputStream, Utils}
-
-
-/**
- * This interface provides an immutable view for data in the form of bytes. The implementation
- * should specify how the data is provided:
- *
- * - FileSegmentManagedBuffer: data backed by part of a file
- * - NioByteBufferManagedBuffer: data backed by a NIO ByteBuffer
- * - NettyByteBufManagedBuffer: data backed by a Netty ByteBuf
- */
-sealed abstract class ManagedBuffer {
-  // Note that all the methods are defined with parenthesis because their implementations can
-  // have side effects (io operations).
-
-  /** Number of bytes of the data. */
-  def size: Long
-
-  /**
-   * Exposes this buffer's data as an NIO ByteBuffer. Changing the position and limit of the
-   * returned ByteBuffer should not affect the content of this buffer.
-   */
-  def nioByteBuffer(): ByteBuffer
-
-  /**
-   * Exposes this buffer's data as an InputStream. The underlying implementation does not
-   * necessarily check for the length of bytes read, so the caller is responsible for making sure
-   * it does not go over the limit.
-   */
-  def inputStream(): InputStream
-}
-
-
-/**
- * A [[ManagedBuffer]] backed by a segment in a file
- */
-final class FileSegmentManagedBuffer(val file: File, val offset: Long, val length: Long)
-  extends ManagedBuffer {
-
-  /**
-   * Memory mapping is expensive and can destabilize the JVM (SPARK-1145, SPARK-3889).
-   * Avoid unless there's a good reason not to.
-   */
-  private val MIN_MEMORY_MAP_BYTES = 2 * 1024 * 1024;
-
-  override def size: Long = length
-
-  override def nioByteBuffer(): ByteBuffer = {
-    var channel: FileChannel = null
-    try {
-      channel = new RandomAccessFile(file, "r").getChannel
-      // Just copy the buffer if it's sufficiently small, as memory mapping has a high overhead.
-      if (length < MIN_MEMORY_MAP_BYTES) {
-        val buf = ByteBuffer.allocate(length.toInt)
-        channel.read(buf, offset)
-        buf.flip()
-        buf
-      } else {
-        channel.map(MapMode.READ_ONLY, offset, length)
-      }
-    } catch {
-      case e: IOException =>
-        Try(channel.size).toOption match {
-          case Some(fileLen) =>
-            throw new IOException(s"Error in reading $this (actual file length $fileLen)", e)
-          case None =>
-            throw new IOException(s"Error in opening $this", e)
-        }
-    } finally {
-      if (channel != null) {
-        Utils.tryLog(channel.close())
-      }
-    }
-  }
-
-  override def inputStream(): InputStream = {
-    var is: FileInputStream = null
-    try {
-      is = new FileInputStream(file)
-      is.skip(offset)
-      ByteStreams.limit(is, length)
-    } catch {
-      case e: IOException =>
-        if (is != null) {
-          Utils.tryLog(is.close())
-        }
-        Try(file.length).toOption match {
-          case Some(fileLen) =>
-            throw new IOException(s"Error in reading $this (actual file length $fileLen)", e)
-          case None =>
-            throw new IOException(s"Error in opening $this", e)
-        }
-      case e: Throwable =>
-        if (is != null) {
-          Utils.tryLog(is.close())
-        }
-        throw e
-    }
-  }
-
-  override def toString: String = s"${getClass.getName}($file, $offset, $length)"
-}
-
-
-/**
- * A [[ManagedBuffer]] backed by [[java.nio.ByteBuffer]].
- */
-final class NioByteBufferManagedBuffer(buf: ByteBuffer) extends ManagedBuffer {
-
-  override def size: Long = buf.remaining()
-
-  override def nioByteBuffer() = buf.duplicate()
-
-  override def inputStream() = new ByteBufferInputStream(buf)
-}
-
-
-/**
- * A [[ManagedBuffer]] backed by a Netty [[ByteBuf]].
- */
-final class NettyByteBufManagedBuffer(buf: ByteBuf) extends ManagedBuffer {
-
-  override def size: Long = buf.readableBytes()
-
-  override def nioByteBuffer() = buf.nioBuffer()
-
-  override def inputStream() = new ByteBufInputStream(buf)
-
-  // TODO(rxin): Promote this to top level ManagedBuffer interface and add documentation for it.
-  def release(): Unit = buf.release()
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
new file mode 100644
index 0000000000000..b089da8596e2b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty
+
+import java.nio.ByteBuffer
+
+import scala.collection.JavaConversions._
+
+import org.apache.spark.Logging
+import org.apache.spark.network.BlockDataManager
+import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.client.{RpcResponseCallback, TransportClient}
+import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager}
+import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock}
+import org.apache.spark.serializer.Serializer
+import org.apache.spark.storage.{BlockId, StorageLevel}
+
+/**
+ * Serves requests to open blocks by simply registering one chunk per block requested.
+ * Handles opening and uploading arbitrary BlockManager blocks.
+ *
+ * Opened blocks are registered with the "one-for-one" strategy, meaning each Transport-layer Chunk
+ * is equivalent to one Spark-level shuffle block.
+ */
+class NettyBlockRpcServer(
+    serializer: Serializer,
+    blockManager: BlockDataManager)
+  extends RpcHandler with Logging {
+
+  private val streamManager = new OneForOneStreamManager()
+
+  override def receive(
+      client: TransportClient,
+      messageBytes: Array[Byte],
+      responseContext: RpcResponseCallback): Unit = {
+    val message = BlockTransferMessage.Decoder.fromByteArray(messageBytes)
+    logTrace(s"Received request: $message")
+
+    message match {
+      case openBlocks: OpenBlocks =>
+        val blocks: Seq[ManagedBuffer] =
+          openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData)
+        val streamId = streamManager.registerStream(blocks.iterator)
+        logTrace(s"Registered streamId $streamId with ${blocks.size} buffers")
+        responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteArray)
+
+      case uploadBlock: UploadBlock =>
+        // StorageLevel is serialized as bytes using our JavaSerializer.
+        val level: StorageLevel =
+          serializer.newInstance().deserialize(ByteBuffer.wrap(uploadBlock.metadata))
+        val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData))
+        blockManager.putBlockData(BlockId(uploadBlock.blockId), data, level)
+        responseContext.onSuccess(new Array[Byte](0))
+    }
+  }
+
+  override def getStreamManager(): StreamManager = streamManager
+}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
new file mode 100644
index 0000000000000..3f0950dae1f24
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty
+
+import scala.collection.JavaConversions._
+import scala.concurrent.{Future, Promise}
+
+import org.apache.spark.{SecurityManager, SparkConf}
+import org.apache.spark.network._
+import org.apache.spark.network.buffer.ManagedBuffer
+import org.apache.spark.network.client.{TransportClientBootstrap, RpcResponseCallback, TransportClientFactory}
+import org.apache.spark.network.sasl.{SaslRpcHandler, SaslClientBootstrap}
+import org.apache.spark.network.server._
+import org.apache.spark.network.shuffle.{RetryingBlockFetcher, BlockFetchingListener, OneForOneBlockFetcher}
+import org.apache.spark.network.shuffle.protocol.UploadBlock
+import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.storage.{BlockId, StorageLevel}
+import org.apache.spark.util.Utils
+
+/**
+ * A BlockTransferService that uses Netty to fetch a set of blocks at at time.
+ */
+class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManager, numCores: Int)
+  extends BlockTransferService {
+
+  // TODO: Don't use Java serialization, use a more cross-version compatible serialization format.
+  private val serializer = new JavaSerializer(conf)
+  private val authEnabled = securityManager.isAuthenticationEnabled()
+  private val transportConf = SparkTransportConf.fromSparkConf(conf, numCores)
+
+  private[this] var transportContext: TransportContext = _
+  private[this] var server: TransportServer = _
+  private[this] var clientFactory: TransportClientFactory = _
+  private[this] var appId: String = _
+
+  override def init(blockDataManager: BlockDataManager): Unit = {
+    val (rpcHandler: RpcHandler, bootstrap: Option[TransportClientBootstrap]) = {
+      val nettyRpcHandler = new NettyBlockRpcServer(serializer, blockDataManager)
+      if (!authEnabled) {
+        (nettyRpcHandler, None)
+      } else {
+        (new SaslRpcHandler(nettyRpcHandler, securityManager),
+          Some(new SaslClientBootstrap(transportConf, conf.getAppId, securityManager)))
+      }
+    }
+    transportContext = new TransportContext(transportConf, rpcHandler)
+    clientFactory = transportContext.createClientFactory(bootstrap.toList)
+    server = transportContext.createServer(conf.getInt("spark.blockManager.port", 0))
+    appId = conf.getAppId
+    logInfo("Server created on " + server.getPort)
+  }
+
+  override def fetchBlocks(
+      host: String,
+      port: Int,
+      execId: String,
+      blockIds: Array[String],
+      listener: BlockFetchingListener): Unit = {
+    logTrace(s"Fetch blocks from $host:$port (executor id $execId)")
+    try {
+      val blockFetchStarter = new RetryingBlockFetcher.BlockFetchStarter {
+        override def createAndStart(blockIds: Array[String], listener: BlockFetchingListener) {
+          val client = clientFactory.createClient(host, port)
+          new OneForOneBlockFetcher(client, appId, execId, blockIds.toArray, listener).start()
+        }
+      }
+
+      val maxRetries = transportConf.maxIORetries()
+      if (maxRetries > 0) {
+        // Note this Fetcher will correctly handle maxRetries == 0; we avoid it just in case there's
+        // a bug in this code. We should remove the if statement once we're sure of the stability.
+        new RetryingBlockFetcher(transportConf, blockFetchStarter, blockIds, listener).start()
+      } else {
+        blockFetchStarter.createAndStart(blockIds, listener)
+      }
+    } catch {
+      case e: Exception =>
+        logError("Exception while beginning fetchBlocks", e)
+        blockIds.foreach(listener.onBlockFetchFailure(_, e))
+    }
+  }
+
+  override def hostName: String = Utils.localHostName()
+
+  override def port: Int = server.getPort
+
+  override def uploadBlock(
+      hostname: String,
+      port: Int,
+      execId: String,
+      blockId: BlockId,
+      blockData: ManagedBuffer,
+      level: StorageLevel): Future[Unit] = {
+    val result = Promise[Unit]()
+    val client = clientFactory.createClient(hostname, port)
+
+    // StorageLevel is serialized as bytes using our JavaSerializer. Everything else is encoded
+    // using our binary protocol.
+    val levelBytes = serializer.newInstance().serialize(level).array()
+
+    // Convert or copy nio buffer into array in order to serialize it.
+    val nioBuffer = blockData.nioByteBuffer()
+    val array = if (nioBuffer.hasArray) {
+      nioBuffer.array()
+    } else {
+      val data = new Array[Byte](nioBuffer.remaining())
+      nioBuffer.get(data)
+      data
+    }
+
+    client.sendRpc(new UploadBlock(appId, execId, blockId.toString, levelBytes, array).toByteArray,
+      new RpcResponseCallback {
+        override def onSuccess(response: Array[Byte]): Unit = {
+          logTrace(s"Successfully uploaded block $blockId")
+          result.success()
+        }
+        override def onFailure(e: Throwable): Unit = {
+          logError(s"Error while uploading block $blockId", e)
+          result.failure(e)
+        }
+      })
+
+    result.future
+  }
+
+  override def close(): Unit = {
+    server.close()
+    clientFactory.close()
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyConfig.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyConfig.scala
deleted file mode 100644
index b5870152c5a64..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyConfig.scala
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty
-
-import org.apache.spark.SparkConf
-
-/**
- * A central location that tracks all the settings we exposed to users.
- */
-private[spark]
-class NettyConfig(conf: SparkConf) {
-
-  /** Port the server listens on. Default to a random port. */
-  private[netty] val serverPort = conf.getInt("spark.shuffle.io.port", 0)
-
-  /** IO mode: nio, oio, epoll, or auto (try epoll first and then nio). */
-  private[netty] val ioMode = conf.get("spark.shuffle.io.mode", "nio").toLowerCase
-
-  /** Connect timeout in secs. Default 60 secs. */
-  private[netty] val connectTimeoutMs = conf.getInt("spark.shuffle.io.connectionTimeout", 60) * 1000
-
-  /**
-   * Percentage of the desired amount of time spent for I/O in the child event loops.
-   * Only applicable in nio and epoll.
-   */
-  private[netty] val ioRatio = conf.getInt("spark.shuffle.io.netty.ioRatio", 80)
-
-  /** Requested maximum length of the queue of incoming connections. */
-  private[netty] val backLog: Option[Int] = conf.getOption("spark.shuffle.io.backLog").map(_.toInt)
-
-  /**
-   * Receive buffer size (SO_RCVBUF).
-   * Note: the optimal size for receive buffer and send buffer should be
-   *  latency * network_bandwidth.
-   * Assuming latency = 1ms, network_bandwidth = 10Gbps
-   *  buffer size should be ~ 1.25MB
-   */
-  private[netty] val receiveBuf: Option[Int] =
-    conf.getOption("spark.shuffle.io.sendBuffer").map(_.toInt)
-
-  /** Send buffer size (SO_SNDBUF). */
-  private[netty] val sendBuf: Option[Int] =
-    conf.getOption("spark.shuffle.io.sendBuffer").map(_.toInt)
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/PathResolver.scala b/core/src/main/scala/org/apache/spark/network/netty/PathResolver.scala
deleted file mode 100644
index 0d7695072a7b1..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/PathResolver.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty
-
-import org.apache.spark.storage.{BlockId, FileSegment}
-
-trait PathResolver {
-  /** Get the file segment in which the given block resides. */
-  def getBlockLocation(blockId: BlockId): FileSegment
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/SparkTransportConf.scala b/core/src/main/scala/org/apache/spark/network/netty/SparkTransportConf.scala
new file mode 100644
index 0000000000000..cef203006d685
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/network/netty/SparkTransportConf.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty
+
+import org.apache.spark.SparkConf
+import org.apache.spark.network.util.{TransportConf, ConfigProvider}
+
+/**
+ * Provides a utility for transforming from a SparkConf inside a Spark JVM (e.g., Executor,
+ * Driver, or a standalone shuffle service) into a TransportConf with details on our environment
+ * like the number of cores that are allocated to this JVM.
+ */
+object SparkTransportConf {
+  /**
+   * Specifies an upper bound on the number of Netty threads that Spark requires by default.
+   * In practice, only 2-4 cores should be required to transfer roughly 10 Gb/s, and each core
+   * that we use will have an initial overhead of roughly 32 MB of off-heap memory, which comes
+   * at a premium.
+   *
+   * Thus, this value should still retain maximum throughput and reduce wasted off-heap memory
+   * allocation. It can be overridden by setting the number of serverThreads and clientThreads
+   * manually in Spark's configuration.
+   */
+  private val MAX_DEFAULT_NETTY_THREADS = 8
+
+  /**
+   * Utility for creating a [[TransportConf]] from a [[SparkConf]].
+   * @param numUsableCores if nonzero, this will restrict the server and client threads to only
+   *                       use the given number of cores, rather than all of the machine's cores.
+   *                       This restriction will only occur if these properties are not already set.
+   */
+  def fromSparkConf(_conf: SparkConf, numUsableCores: Int = 0): TransportConf = {
+    val conf = _conf.clone
+
+    // Specify thread configuration based on our JVM's allocation of cores (rather than necessarily
+    // assuming we have all the machine's cores).
+    // NB: Only set if serverThreads/clientThreads not already set.
+    val numThreads = defaultNumThreads(numUsableCores)
+    conf.set("spark.shuffle.io.serverThreads",
+      conf.get("spark.shuffle.io.serverThreads", numThreads.toString))
+    conf.set("spark.shuffle.io.clientThreads",
+      conf.get("spark.shuffle.io.clientThreads", numThreads.toString))
+
+    new TransportConf(new ConfigProvider {
+      override def get(name: String): String = conf.get(name)
+    })
+  }
+
+  /**
+   * Returns the default number of threads for both the Netty client and server thread pools.
+   * If numUsableCores is 0, we will use Runtime get an approximate number of available cores.
+   */
+  private def defaultNumThreads(numUsableCores: Int): Int = {
+    val availableCores =
+      if (numUsableCores > 0) numUsableCores else Runtime.getRuntime.availableProcessors()
+    math.min(availableCores, MAX_DEFAULT_NETTY_THREADS)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/client/BlockClientListener.scala b/core/src/main/scala/org/apache/spark/network/netty/client/BlockClientListener.scala
deleted file mode 100644
index e28219dd7745b..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/client/BlockClientListener.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty.client
-
-import java.util.EventListener
-
-
-trait BlockClientListener extends EventListener {
-
-  def onFetchSuccess(blockId: String, data: ReferenceCountedBuffer): Unit
-
-  def onFetchFailure(blockId: String, errorMsg: String): Unit
-
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClient.scala b/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClient.scala
deleted file mode 100644
index 5aea7ba2f3673..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClient.scala
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty.client
-
-import java.util.concurrent.TimeoutException
-
-import io.netty.bootstrap.Bootstrap
-import io.netty.buffer.PooledByteBufAllocator
-import io.netty.channel.socket.SocketChannel
-import io.netty.channel.{ChannelFutureListener, ChannelFuture, ChannelInitializer, ChannelOption}
-import io.netty.handler.codec.LengthFieldBasedFrameDecoder
-import io.netty.handler.codec.string.StringEncoder
-import io.netty.util.CharsetUtil
-
-import org.apache.spark.Logging
-
-/**
- * Client for fetching data blocks from [[org.apache.spark.network.netty.server.BlockServer]].
- * Use [[BlockFetchingClientFactory]] to instantiate this client.
- *
- * The constructor blocks until a connection is successfully established.
- *
- * See [[org.apache.spark.network.netty.server.BlockServer]] for client/server protocol.
- *
- * Concurrency: thread safe and can be called from multiple threads.
- */
-@throws[TimeoutException]
-private[spark]
-class BlockFetchingClient(factory: BlockFetchingClientFactory, hostname: String, port: Int)
-  extends Logging {
-
-  private val handler = new BlockFetchingClientHandler
-
-  /** Netty Bootstrap for creating the TCP connection. */
-  private val bootstrap: Bootstrap = {
-    val b = new Bootstrap
-    b.group(factory.workerGroup)
-      .channel(factory.socketChannelClass)
-      // Use pooled buffers to reduce temporary buffer allocation
-      .option(ChannelOption.ALLOCATOR, PooledByteBufAllocator.DEFAULT)
-      // Disable Nagle's Algorithm since we don't want packets to wait
-      .option(ChannelOption.TCP_NODELAY, java.lang.Boolean.TRUE)
-      .option(ChannelOption.SO_KEEPALIVE, java.lang.Boolean.TRUE)
-      .option[Integer](ChannelOption.CONNECT_TIMEOUT_MILLIS, factory.conf.connectTimeoutMs)
-
-    b.handler(new ChannelInitializer[SocketChannel] {
-      override def initChannel(ch: SocketChannel): Unit = {
-        ch.pipeline
-          .addLast("encoder", new StringEncoder(CharsetUtil.UTF_8))
-          // maxFrameLength = 2G, lengthFieldOffset = 0, lengthFieldLength = 4
-          .addLast("framedLengthDecoder", new LengthFieldBasedFrameDecoder(Int.MaxValue, 0, 4))
-          .addLast("handler", handler)
-      }
-    })
-    b
-  }
-
-  /** Netty ChannelFuture for the connection. */
-  private val cf: ChannelFuture = bootstrap.connect(hostname, port)
-  if (!cf.awaitUninterruptibly(factory.conf.connectTimeoutMs)) {
-    throw new TimeoutException(
-      s"Connecting to $hostname:$port timed out (${factory.conf.connectTimeoutMs} ms)")
-  }
-
-  /**
-   * Ask the remote server for a sequence of blocks, and execute the callback.
-   *
-   * Note that this is asynchronous and returns immediately. Upstream caller should throttle the
-   * rate of fetching; otherwise we could run out of memory.
-   *
-   * @param blockIds sequence of block ids to fetch.
-   * @param listener callback to fire on fetch success / failure.
-   */
-  def fetchBlocks(blockIds: Seq[String], listener: BlockClientListener): Unit = {
-    // It's best to limit the number of "write" calls since it needs to traverse the whole pipeline.
-    // It's also best to limit the number of "flush" calls since it requires system calls.
-    // Let's concatenate the string and then call writeAndFlush once.
-    // This is also why this implementation might be more efficient than multiple, separate
-    // fetch block calls.
-    var startTime: Long = 0
-    logTrace {
-      startTime = System.nanoTime
-      s"Sending request $blockIds to $hostname:$port"
-    }
-
-    blockIds.foreach { blockId =>
-      handler.addRequest(blockId, listener)
-    }
-
-    val writeFuture = cf.channel().writeAndFlush(blockIds.mkString("\n") + "\n")
-    writeFuture.addListener(new ChannelFutureListener {
-      override def operationComplete(future: ChannelFuture): Unit = {
-        if (future.isSuccess) {
-          logTrace {
-            val timeTaken = (System.nanoTime - startTime).toDouble / 1000000
-            s"Sending request $blockIds to $hostname:$port took $timeTaken ms"
-          }
-        } else {
-          // Fail all blocks.
-          val errorMsg =
-            s"Failed to send request $blockIds to $hostname:$port: ${future.cause.getMessage}"
-          logError(errorMsg, future.cause)
-          blockIds.foreach { blockId =>
-            listener.onFetchFailure(blockId, errorMsg)
-            handler.removeRequest(blockId)
-          }
-        }
-      }
-    })
-  }
-
-  def waitForClose(): Unit = {
-    cf.channel().closeFuture().sync()
-  }
-
-  def close(): Unit = cf.channel().close()
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClientFactory.scala b/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClientFactory.scala
deleted file mode 100644
index 2b28402c52b49..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClientFactory.scala
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty.client
-
-import io.netty.channel.epoll.{EpollEventLoopGroup, EpollSocketChannel}
-import io.netty.channel.nio.NioEventLoopGroup
-import io.netty.channel.oio.OioEventLoopGroup
-import io.netty.channel.socket.nio.NioSocketChannel
-import io.netty.channel.socket.oio.OioSocketChannel
-import io.netty.channel.{EventLoopGroup, Channel}
-
-import org.apache.spark.SparkConf
-import org.apache.spark.network.netty.NettyConfig
-import org.apache.spark.util.Utils
-
-/**
- * Factory for creating [[BlockFetchingClient]] by using createClient. This factory reuses
- * the worker thread pool for Netty.
- *
- * Concurrency: createClient is safe to be called from multiple threads concurrently.
- */
-private[spark]
-class BlockFetchingClientFactory(val conf: NettyConfig) {
-
-  def this(sparkConf: SparkConf) = this(new NettyConfig(sparkConf))
-
-  /** A thread factory so the threads are named (for debugging). */
-  val threadFactory = Utils.namedThreadFactory("spark-shuffle-client")
-
-  /** The following two are instantiated by the [[init]] method, depending ioMode. */
-  var socketChannelClass: Class[_ <: Channel] = _
-  var workerGroup: EventLoopGroup = _
-
-  init()
-
-  /** Initialize [[socketChannelClass]] and [[workerGroup]] based on ioMode. */
-  private def init(): Unit = {
-    def initOio(): Unit = {
-      socketChannelClass = classOf[OioSocketChannel]
-      workerGroup = new OioEventLoopGroup(0, threadFactory)
-    }
-    def initNio(): Unit = {
-      socketChannelClass = classOf[NioSocketChannel]
-      workerGroup = new NioEventLoopGroup(0, threadFactory)
-    }
-    def initEpoll(): Unit = {
-      socketChannelClass = classOf[EpollSocketChannel]
-      workerGroup = new EpollEventLoopGroup(0, threadFactory)
-    }
-
-    conf.ioMode match {
-      case "nio" => initNio()
-      case "oio" => initOio()
-      case "epoll" => initEpoll()
-      case "auto" =>
-        // For auto mode, first try epoll (only available on Linux), then nio.
-        try {
-          initEpoll()
-        } catch {
-          // TODO: Should we log the throwable? But that always happen on non-Linux systems.
-          // Perhaps the right thing to do is to check whether the system is Linux, and then only
-          // call initEpoll on Linux.
-          case e: Throwable => initNio()
-        }
-    }
-  }
-
-  /**
-   * Create a new BlockFetchingClient connecting to the given remote host / port.
-   *
-   * This blocks until a connection is successfully established.
-   *
-   * Concurrency: This method is safe to call from multiple threads.
-   */
-  def createClient(remoteHost: String, remotePort: Int): BlockFetchingClient = {
-    new BlockFetchingClient(this, remoteHost, remotePort)
-  }
-
-  def stop(): Unit = {
-    if (workerGroup != null) {
-      workerGroup.shutdownGracefully()
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClientHandler.scala b/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClientHandler.scala
deleted file mode 100644
index 83265b164299d..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/client/BlockFetchingClientHandler.scala
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty.client
-
-import io.netty.buffer.ByteBuf
-import io.netty.channel.{ChannelHandlerContext, SimpleChannelInboundHandler}
-
-import org.apache.spark.Logging
-
-
-/**
- * Handler that processes server responses. It uses the protocol documented in
- * [[org.apache.spark.network.netty.server.BlockServer]].
- *
- * Concurrency: thread safe and can be called from multiple threads.
- */
-private[client]
-class BlockFetchingClientHandler extends SimpleChannelInboundHandler[ByteBuf] with Logging {
-
-  /** Tracks the list of outstanding requests and their listeners on success/failure. */
-  private val outstandingRequests = java.util.Collections.synchronizedMap {
-    new java.util.HashMap[String, BlockClientListener]
-  }
-
-  def addRequest(blockId: String, listener: BlockClientListener): Unit = {
-    outstandingRequests.put(blockId, listener)
-  }
-
-  def removeRequest(blockId: String): Unit = {
-    outstandingRequests.remove(blockId)
-  }
-
-  override def exceptionCaught(ctx: ChannelHandlerContext, cause: Throwable): Unit = {
-    val errorMsg = s"Exception in connection from ${ctx.channel.remoteAddress}: ${cause.getMessage}"
-    logError(errorMsg, cause)
-
-    // Fire the failure callback for all outstanding blocks
-    outstandingRequests.synchronized {
-      val iter = outstandingRequests.entrySet().iterator()
-      while (iter.hasNext) {
-        val entry = iter.next()
-        entry.getValue.onFetchFailure(entry.getKey, errorMsg)
-      }
-      outstandingRequests.clear()
-    }
-
-    ctx.close()
-  }
-
-  override def channelRead0(ctx: ChannelHandlerContext, in: ByteBuf) {
-    val totalLen = in.readInt()
-    val blockIdLen = in.readInt()
-    val blockIdBytes = new Array[Byte](math.abs(blockIdLen))
-    in.readBytes(blockIdBytes)
-    val blockId = new String(blockIdBytes)
-    val blockSize = totalLen - math.abs(blockIdLen) - 4
-
-    def server = ctx.channel.remoteAddress.toString
-
-    // blockIdLen is negative when it is an error message.
-    if (blockIdLen < 0) {
-      val errorMessageBytes = new Array[Byte](blockSize)
-      in.readBytes(errorMessageBytes)
-      val errorMsg = new String(errorMessageBytes)
-      logTrace(s"Received block $blockId ($blockSize B) with error $errorMsg from $server")
-
-      val listener = outstandingRequests.get(blockId)
-      if (listener == null) {
-        // Ignore callback
-        logWarning(s"Got a response for block $blockId but it is not in our outstanding requests")
-      } else {
-        outstandingRequests.remove(blockId)
-        listener.onFetchFailure(blockId, errorMsg)
-      }
-    } else {
-      logTrace(s"Received block $blockId ($blockSize B) from $server")
-
-      val listener = outstandingRequests.get(blockId)
-      if (listener == null) {
-        // Ignore callback
-        logWarning(s"Got a response for block $blockId but it is not in our outstanding requests")
-      } else {
-        outstandingRequests.remove(blockId)
-        listener.onFetchSuccess(blockId, new ReferenceCountedBuffer(in))
-      }
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/client/LazyInitIterator.scala b/core/src/main/scala/org/apache/spark/network/netty/client/LazyInitIterator.scala
deleted file mode 100644
index 9740ee64d1f2d..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/client/LazyInitIterator.scala
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty.client
-
-/**
- * A simple iterator that lazily initializes the underlying iterator.
- *
- * The use case is that sometimes we might have many iterators open at the same time, and each of
- * the iterator might initialize its own buffer (e.g. decompression buffer, deserialization buffer).
- * This could lead to too many buffers open. If this iterator is used, we lazily initialize those
- * buffers.
- */
-private[spark]
-class LazyInitIterator(createIterator: => Iterator[Any]) extends Iterator[Any] {
-
-  lazy val proxy = createIterator
-
-  override def hasNext: Boolean = {
-    val gotNext = proxy.hasNext
-    if (!gotNext) {
-      close()
-    }
-    gotNext
-  }
-
-  override def next(): Any = proxy.next()
-
-  def close(): Unit = Unit
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/client/ReferenceCountedBuffer.scala b/core/src/main/scala/org/apache/spark/network/netty/client/ReferenceCountedBuffer.scala
deleted file mode 100644
index ea1abf5eccc26..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/client/ReferenceCountedBuffer.scala
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty.client
-
-import java.io.InputStream
-import java.nio.ByteBuffer
-
-import io.netty.buffer.{ByteBuf, ByteBufInputStream}
-
-
-/**
- * A buffer abstraction based on Netty's ByteBuf so we don't expose Netty.
- * This is a Scala value class.
- *
- * The buffer's life cycle is NOT managed by the JVM, and thus requiring explicit declaration of
- * reference by the retain method and release method.
- */
-private[spark]
-class ReferenceCountedBuffer(val underlying: ByteBuf) extends AnyVal {
-
-  /** Return the nio ByteBuffer view of the underlying buffer. */
-  def byteBuffer(): ByteBuffer = underlying.nioBuffer
-
-  /** Creates a new input stream that starts from the current position of the buffer. */
-  def inputStream(): InputStream = new ByteBufInputStream(underlying)
-
-  /** Increment the reference counter by one. */
-  def retain(): Unit = underlying.retain()
-
-  /** Decrement the reference counter by one and release the buffer if the ref count is 0. */
-  def release(): Unit = underlying.release()
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/server/BlockHeader.scala b/core/src/main/scala/org/apache/spark/network/netty/server/BlockHeader.scala
deleted file mode 100644
index 162e9cc6828d4..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/server/BlockHeader.scala
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty.server
-
-/**
- * Header describing a block. This is used only in the server pipeline.
- *
- * [[BlockServerHandler]] creates this, and [[BlockHeaderEncoder]] encodes it.
- *
- * @param blockSize length of the block content, excluding the length itself.
- *                 If positive, this is the header for a block (not part of the header).
- *                 If negative, this is the header and content for an error message.
- * @param blockId block id
- * @param error some error message from reading the block
- */
-private[server]
-class BlockHeader(val blockSize: Int, val blockId: String, val error: Option[String] = None)
diff --git a/core/src/main/scala/org/apache/spark/network/netty/server/BlockHeaderEncoder.scala b/core/src/main/scala/org/apache/spark/network/netty/server/BlockHeaderEncoder.scala
deleted file mode 100644
index 8e4dda4ef8595..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/server/BlockHeaderEncoder.scala
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty.server
-
-import io.netty.buffer.ByteBuf
-import io.netty.channel.ChannelHandlerContext
-import io.netty.handler.codec.MessageToByteEncoder
-
-/**
- * A simple encoder for BlockHeader. See [[BlockServer]] for the server to client protocol.
- */
-private[server]
-class BlockHeaderEncoder extends MessageToByteEncoder[BlockHeader] {
-  override def encode(ctx: ChannelHandlerContext, msg: BlockHeader, out: ByteBuf): Unit = {
-    // message = message length (4 bytes) + block id length (4 bytes) + block id + block data
-    // message length = block id length (4 bytes) + size of block id + size of block data
-    val blockIdBytes = msg.blockId.getBytes
-    msg.error match {
-      case Some(errorMsg) =>
-        val errorBytes = errorMsg.getBytes
-        out.writeInt(4 + blockIdBytes.length + errorBytes.size)
-        out.writeInt(-blockIdBytes.length)  // use negative block id length to represent errors
-        out.writeBytes(blockIdBytes)  // next is blockId itself
-        out.writeBytes(errorBytes)  // error message
-      case None =>
-        out.writeInt(4 + blockIdBytes.length + msg.blockSize)
-        out.writeInt(blockIdBytes.length)  // First 4 bytes is blockId length
-        out.writeBytes(blockIdBytes)  // next is blockId itself
-        // msg of size blockSize will be written by ServerHandler
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/server/BlockServer.scala b/core/src/main/scala/org/apache/spark/network/netty/server/BlockServer.scala
deleted file mode 100644
index 7b2f9a8d4dfd0..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/server/BlockServer.scala
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty.server
-
-import java.net.InetSocketAddress
-
-import io.netty.bootstrap.ServerBootstrap
-import io.netty.buffer.PooledByteBufAllocator
-import io.netty.channel.{ChannelFuture, ChannelInitializer, ChannelOption}
-import io.netty.channel.epoll.{EpollEventLoopGroup, EpollServerSocketChannel}
-import io.netty.channel.nio.NioEventLoopGroup
-import io.netty.channel.oio.OioEventLoopGroup
-import io.netty.channel.socket.SocketChannel
-import io.netty.channel.socket.nio.NioServerSocketChannel
-import io.netty.channel.socket.oio.OioServerSocketChannel
-import io.netty.handler.codec.LineBasedFrameDecoder
-import io.netty.handler.codec.string.StringDecoder
-import io.netty.util.CharsetUtil
-
-import org.apache.spark.{Logging, SparkConf}
-import org.apache.spark.network.netty.NettyConfig
-import org.apache.spark.storage.BlockDataProvider
-import org.apache.spark.util.Utils
-
-
-/**
- * Server for serving Spark data blocks.
- * This should be used together with [[org.apache.spark.network.netty.client.BlockFetchingClient]].
- *
- * Protocol for requesting blocks (client to server):
- *   One block id per line, e.g. to request 3 blocks: "block1\nblock2\nblock3\n"
- *
- * Protocol for sending blocks (server to client):
- *   frame-length (4 bytes), block-id-length (4 bytes), block-id, block-data.
- *
- *   frame-length should not include the length of itself.
- *   If block-id-length is negative, then this is an error message rather than block-data. The real
- *   length is the absolute value of the frame-length.
- *
- */
-private[spark]
-class BlockServer(conf: NettyConfig, dataProvider: BlockDataProvider) extends Logging {
-
-  def this(sparkConf: SparkConf, dataProvider: BlockDataProvider) = {
-    this(new NettyConfig(sparkConf), dataProvider)
-  }
-
-  def port: Int = _port
-
-  def hostName: String = _hostName
-
-  private var _port: Int = conf.serverPort
-  private var _hostName: String = ""
-  private var bootstrap: ServerBootstrap = _
-  private var channelFuture: ChannelFuture = _
-
-  init()
-
-  /** Initialize the server. */
-  private def init(): Unit = {
-    bootstrap = new ServerBootstrap
-    val bossThreadFactory = Utils.namedThreadFactory("spark-shuffle-server-boss")
-    val workerThreadFactory = Utils.namedThreadFactory("spark-shuffle-server-worker")
-
-    // Use only one thread to accept connections, and 2 * num_cores for worker.
-    def initNio(): Unit = {
-      val bossGroup = new NioEventLoopGroup(1, bossThreadFactory)
-      val workerGroup = new NioEventLoopGroup(0, workerThreadFactory)
-      workerGroup.setIoRatio(conf.ioRatio)
-      bootstrap.group(bossGroup, workerGroup).channel(classOf[NioServerSocketChannel])
-    }
-    def initOio(): Unit = {
-      val bossGroup = new OioEventLoopGroup(1, bossThreadFactory)
-      val workerGroup = new OioEventLoopGroup(0, workerThreadFactory)
-      bootstrap.group(bossGroup, workerGroup).channel(classOf[OioServerSocketChannel])
-    }
-    def initEpoll(): Unit = {
-      val bossGroup = new EpollEventLoopGroup(1, bossThreadFactory)
-      val workerGroup = new EpollEventLoopGroup(0, workerThreadFactory)
-      workerGroup.setIoRatio(conf.ioRatio)
-      bootstrap.group(bossGroup, workerGroup).channel(classOf[EpollServerSocketChannel])
-    }
-
-    conf.ioMode match {
-      case "nio" => initNio()
-      case "oio" => initOio()
-      case "epoll" => initEpoll()
-      case "auto" =>
-        // For auto mode, first try epoll (only available on Linux), then nio.
-        try {
-          initEpoll()
-        } catch {
-          // TODO: Should we log the throwable? But that always happen on non-Linux systems.
-          // Perhaps the right thing to do is to check whether the system is Linux, and then only
-          // call initEpoll on Linux.
-          case e: Throwable => initNio()
-        }
-    }
-
-    // Use pooled buffers to reduce temporary buffer allocation
-    bootstrap.option(ChannelOption.ALLOCATOR, PooledByteBufAllocator.DEFAULT)
-    bootstrap.childOption(ChannelOption.ALLOCATOR, PooledByteBufAllocator.DEFAULT)
-
-    // Various (advanced) user-configured settings.
-    conf.backLog.foreach { backLog =>
-      bootstrap.option[java.lang.Integer](ChannelOption.SO_BACKLOG, backLog)
-    }
-    conf.receiveBuf.foreach { receiveBuf =>
-      bootstrap.option[java.lang.Integer](ChannelOption.SO_RCVBUF, receiveBuf)
-    }
-    conf.sendBuf.foreach { sendBuf =>
-      bootstrap.option[java.lang.Integer](ChannelOption.SO_SNDBUF, sendBuf)
-    }
-
-    bootstrap.childHandler(new ChannelInitializer[SocketChannel] {
-      override def initChannel(ch: SocketChannel): Unit = {
-        ch.pipeline
-          .addLast("frameDecoder", new LineBasedFrameDecoder(1024))  // max block id length 1024
-          .addLast("stringDecoder", new StringDecoder(CharsetUtil.UTF_8))
-          .addLast("blockHeaderEncoder", new BlockHeaderEncoder)
-          .addLast("handler", new BlockServerHandler(dataProvider))
-      }
-    })
-
-    channelFuture = bootstrap.bind(new InetSocketAddress(_port))
-    channelFuture.sync()
-
-    val addr = channelFuture.channel.localAddress.asInstanceOf[InetSocketAddress]
-    _port = addr.getPort
-    _hostName = addr.getHostName
-  }
-
-  /** Shutdown the server. */
-  def stop(): Unit = {
-    if (channelFuture != null) {
-      channelFuture.channel().close().awaitUninterruptibly()
-      channelFuture = null
-    }
-    if (bootstrap != null && bootstrap.group() != null) {
-      bootstrap.group().shutdownGracefully()
-    }
-    if (bootstrap != null && bootstrap.childGroup() != null) {
-      bootstrap.childGroup().shutdownGracefully()
-    }
-    bootstrap = null
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/server/BlockServerChannelInitializer.scala b/core/src/main/scala/org/apache/spark/network/netty/server/BlockServerChannelInitializer.scala
deleted file mode 100644
index cc70bd0c5c477..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/server/BlockServerChannelInitializer.scala
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty.server
-
-import io.netty.channel.ChannelInitializer
-import io.netty.channel.socket.SocketChannel
-import io.netty.handler.codec.LineBasedFrameDecoder
-import io.netty.handler.codec.string.StringDecoder
-import io.netty.util.CharsetUtil
-import org.apache.spark.storage.BlockDataProvider
-
-
-/** Channel initializer that sets up the pipeline for the BlockServer. */
-private[netty]
-class BlockServerChannelInitializer(dataProvider: BlockDataProvider)
-  extends ChannelInitializer[SocketChannel] {
-
-  override def initChannel(ch: SocketChannel): Unit = {
-    ch.pipeline
-      .addLast("frameDecoder", new LineBasedFrameDecoder(1024))  // max block id length 1024
-      .addLast("stringDecoder", new StringDecoder(CharsetUtil.UTF_8))
-      .addLast("blockHeaderEncoder", new BlockHeaderEncoder)
-      .addLast("handler", new BlockServerHandler(dataProvider))
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/server/BlockServerHandler.scala b/core/src/main/scala/org/apache/spark/network/netty/server/BlockServerHandler.scala
deleted file mode 100644
index 40dd5e5d1a2ac..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/netty/server/BlockServerHandler.scala
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty.server
-
-import java.io.FileInputStream
-import java.nio.ByteBuffer
-import java.nio.channels.FileChannel
-
-import io.netty.buffer.Unpooled
-import io.netty.channel._
-
-import org.apache.spark.Logging
-import org.apache.spark.storage.{FileSegment, BlockDataProvider}
-
-
-/**
- * A handler that processes requests from clients and writes block data back.
- *
- * The messages should have been processed by a LineBasedFrameDecoder and a StringDecoder first
- * so channelRead0 is called once per line (i.e. per block id).
- */
-private[server]
-class BlockServerHandler(dataProvider: BlockDataProvider)
-  extends SimpleChannelInboundHandler[String] with Logging {
-
-  override def exceptionCaught(ctx: ChannelHandlerContext, cause: Throwable): Unit = {
-    logError(s"Exception in connection from ${ctx.channel.remoteAddress}", cause)
-    ctx.close()
-  }
-
-  override def channelRead0(ctx: ChannelHandlerContext, blockId: String): Unit = {
-    def client = ctx.channel.remoteAddress.toString
-
-    // A helper function to send error message back to the client.
-    def respondWithError(error: String): Unit = {
-      ctx.writeAndFlush(new BlockHeader(-1, blockId, Some(error))).addListener(
-        new ChannelFutureListener {
-          override def operationComplete(future: ChannelFuture) {
-            if (!future.isSuccess) {
-              // TODO: Maybe log the success case as well.
-              logError(s"Error sending error back to $client", future.cause)
-              ctx.close()
-            }
-          }
-        }
-      )
-    }
-
-    def writeFileSegment(segment: FileSegment): Unit = {
-      // Send error message back if the block is too large. Even though we are capable of sending
-      // large (2G+) blocks, the receiving end cannot handle it so let's fail fast.
-      // Once we fixed the receiving end to be able to process large blocks, this should be removed.
-      // Also make sure we update BlockHeaderEncoder to support length > 2G.
-
-      // See [[BlockHeaderEncoder]] for the way length is encoded.
-      if (segment.length + blockId.length + 4 > Int.MaxValue) {
-        respondWithError(s"Block $blockId size ($segment.length) greater than 2G")
-        return
-      }
-
-      var fileChannel: FileChannel = null
-      try {
-        fileChannel = new FileInputStream(segment.file).getChannel
-      } catch {
-        case e: Exception =>
-          logError(
-            s"Error opening channel for $blockId in ${segment.file} for request from $client", e)
-          respondWithError(e.getMessage)
-      }
-
-      // Found the block. Send it back.
-      if (fileChannel != null) {
-        // Write the header and block data. In the case of failures, the listener on the block data
-        // write should close the connection.
-        ctx.write(new BlockHeader(segment.length.toInt, blockId))
-
-        val region = new DefaultFileRegion(fileChannel, segment.offset, segment.length)
-        ctx.writeAndFlush(region).addListener(new ChannelFutureListener {
-          override def operationComplete(future: ChannelFuture) {
-            if (future.isSuccess) {
-              logTrace(s"Sent block $blockId (${segment.length} B) back to $client")
-            } else {
-              logError(s"Error sending block $blockId to $client; closing connection", future.cause)
-              ctx.close()
-            }
-          }
-        })
-      }
-    }
-
-    def writeByteBuffer(buf: ByteBuffer): Unit = {
-      ctx.write(new BlockHeader(buf.remaining, blockId))
-      ctx.writeAndFlush(Unpooled.wrappedBuffer(buf)).addListener(new ChannelFutureListener {
-        override def operationComplete(future: ChannelFuture) {
-          if (future.isSuccess) {
-            logTrace(s"Sent block $blockId (${buf.remaining} B) back to $client")
-          } else {
-            logError(s"Error sending block $blockId to $client; closing connection", future.cause)
-            ctx.close()
-          }
-        }
-      })
-    }
-
-    logTrace(s"Received request from $client to fetch block $blockId")
-
-    var blockData: Either[FileSegment, ByteBuffer] = null
-
-    // First make sure we can find the block. If not, send error back to the user.
-    try {
-      blockData = dataProvider.getBlockData(blockId)
-    } catch {
-      case e: Exception =>
-        logError(s"Error opening block $blockId for request from $client", e)
-        respondWithError(e.getMessage)
-        return
-    }
-
-    blockData match {
-      case Left(segment) => writeFileSegment(segment)
-      case Right(buf) => writeByteBuffer(buf)
-    }
-
-  }  // end of channelRead0
-}
diff --git a/core/src/main/scala/org/apache/spark/network/nio/Connection.scala b/core/src/main/scala/org/apache/spark/network/nio/Connection.scala
index 4f6f5e235811d..c2d9578be7ebb 100644
--- a/core/src/main/scala/org/apache/spark/network/nio/Connection.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/Connection.scala
@@ -23,12 +23,13 @@ import java.nio.channels._
 import java.util.concurrent.ConcurrentLinkedQueue
 import java.util.LinkedList
 
-import org.apache.spark._
-
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{ArrayBuffer, HashMap}
 import scala.util.control.NonFatal
 
+import org.apache.spark._
+import org.apache.spark.network.sasl.{SparkSaslClient, SparkSaslServer}
+
 private[nio]
 abstract class Connection(val channel: SocketChannel, val selector: Selector,
     val socketRemoteConnectionManagerId: ConnectionManagerId, val connectionId: ConnectionId,
diff --git a/core/src/main/scala/org/apache/spark/network/nio/ConnectionManager.scala b/core/src/main/scala/org/apache/spark/network/nio/ConnectionManager.scala
index bda4bf50932c3..ee22c6656e69e 100644
--- a/core/src/main/scala/org/apache/spark/network/nio/ConnectionManager.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/ConnectionManager.scala
@@ -18,20 +18,24 @@
 package org.apache.spark.network.nio
 
 import java.io.IOException
+import java.lang.ref.WeakReference
 import java.net._
 import java.nio._
 import java.nio.channels._
 import java.nio.channels.spi._
 import java.util.concurrent.atomic.AtomicInteger
 import java.util.concurrent.{LinkedBlockingDeque, ThreadPoolExecutor, TimeUnit}
-import java.util.{Timer, TimerTask}
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, SynchronizedMap, SynchronizedQueue}
 import scala.concurrent.duration._
 import scala.concurrent.{Await, ExecutionContext, Future, Promise}
 import scala.language.postfixOps
 
+import com.google.common.base.Charsets.UTF_8
+import io.netty.util.{Timeout, TimerTask, HashedWheelTimer}
+
 import org.apache.spark._
+import org.apache.spark.network.sasl.{SparkSaslClient, SparkSaslServer}
 import org.apache.spark.util.Utils
 
 import scala.util.Try
@@ -74,13 +78,27 @@ private[nio] class ConnectionManager(
   }
 
   private val selector = SelectorProvider.provider.openSelector()
-  private val ackTimeoutMonitor = new Timer("AckTimeoutMonitor", true)
-
-  private val ackTimeout = conf.getInt("spark.core.connection.ack.wait.timeout", 60)
+  private val ackTimeoutMonitor =
+    new HashedWheelTimer(Utils.namedThreadFactory("AckTimeoutMonitor"))
+
+  private val ackTimeout =
+    conf.getInt("spark.core.connection.ack.wait.timeout", conf.getInt("spark.network.timeout", 120))
+
+  // Get the thread counts from the Spark Configuration.
+  // 
+  // Even though the ThreadPoolExecutor constructor takes both a minimum and maximum value,
+  // we only query for the minimum value because we are using LinkedBlockingDeque.
+  // 
+  // The JavaDoc for ThreadPoolExecutor points out that when using a LinkedBlockingDeque (which is 
+  // an unbounded queue) no more than corePoolSize threads will ever be created, so only the "min"
+  // parameter is necessary.
+  private val handlerThreadCount = conf.getInt("spark.core.connection.handler.threads.min", 20)
+  private val ioThreadCount = conf.getInt("spark.core.connection.io.threads.min", 4)
+  private val connectThreadCount = conf.getInt("spark.core.connection.connect.threads.min", 1)
 
   private val handleMessageExecutor = new ThreadPoolExecutor(
-    conf.getInt("spark.core.connection.handler.threads.min", 20),
-    conf.getInt("spark.core.connection.handler.threads.max", 60),
+    handlerThreadCount,
+    handlerThreadCount,
     conf.getInt("spark.core.connection.handler.threads.keepalive", 60), TimeUnit.SECONDS,
     new LinkedBlockingDeque[Runnable](),
     Utils.namedThreadFactory("handle-message-executor")) {
@@ -91,12 +109,11 @@ private[nio] class ConnectionManager(
         logError("Error in handleMessageExecutor is not handled properly", t)
       }
     }
-
   }
 
   private val handleReadWriteExecutor = new ThreadPoolExecutor(
-    conf.getInt("spark.core.connection.io.threads.min", 4),
-    conf.getInt("spark.core.connection.io.threads.max", 32),
+    ioThreadCount,
+    ioThreadCount,
     conf.getInt("spark.core.connection.io.threads.keepalive", 60), TimeUnit.SECONDS,
     new LinkedBlockingDeque[Runnable](),
     Utils.namedThreadFactory("handle-read-write-executor")) {
@@ -107,14 +124,13 @@ private[nio] class ConnectionManager(
         logError("Error in handleReadWriteExecutor is not handled properly", t)
       }
     }
-
   }
 
   // Use a different, yet smaller, thread pool - infrequently used with very short lived tasks :
   // which should be executed asap
   private val handleConnectExecutor = new ThreadPoolExecutor(
-    conf.getInt("spark.core.connection.connect.threads.min", 1),
-    conf.getInt("spark.core.connection.connect.threads.max", 8),
+    connectThreadCount,
+    connectThreadCount,
     conf.getInt("spark.core.connection.connect.threads.keepalive", 60), TimeUnit.SECONDS,
     new LinkedBlockingDeque[Runnable](),
     Utils.namedThreadFactory("handle-connect-executor")) {
@@ -125,7 +141,6 @@ private[nio] class ConnectionManager(
         logError("Error in handleConnectExecutor is not handled properly", t)
       }
     }
-
   }
 
   private val serverChannel = ServerSocketChannel.open()
@@ -136,7 +151,10 @@ private[nio] class ConnectionManager(
     new HashMap[SelectionKey, Connection] with SynchronizedMap[SelectionKey, Connection]
   private val connectionsById = new HashMap[ConnectionManagerId, SendingConnection]
     with SynchronizedMap[ConnectionManagerId, SendingConnection]
-  private val messageStatuses = new HashMap[Int, MessageStatus]
+  // Tracks sent messages for which we are awaiting acknowledgements.  Entries are added to this
+  // map when messages are sent and are removed when acknowledgement messages are received or when
+  // acknowledgement timeouts expire
+  private val messageStatuses = new HashMap[Int, MessageStatus]  // [MessageId, MessageStatus]
   private val keyInterestChangeRequests = new SynchronizedQueue[(SelectionKey, Int)]
   private val registerRequests = new SynchronizedQueue[SendingConnection]
 
@@ -156,7 +174,7 @@ private[nio] class ConnectionManager(
     serverChannel.socket.bind(new InetSocketAddress(port))
     (serverChannel, serverChannel.socket.getLocalPort)
   }
-  Utils.startServiceOnPort[ServerSocketChannel](port, startService, name)
+  Utils.startServiceOnPort[ServerSocketChannel](port, startService, conf, name)
   serverChannel.register(selector, SelectionKey.OP_ACCEPT)
 
   val id = new ConnectionManagerId(Utils.localHostName, serverChannel.socket.getLocalPort)
@@ -166,14 +184,16 @@ private[nio] class ConnectionManager(
   // to be able to track asynchronous messages
   private val idCount: AtomicInteger = new AtomicInteger(1)
 
+  private val writeRunnableStarted: HashSet[SelectionKey] = new HashSet[SelectionKey]()
+  private val readRunnableStarted: HashSet[SelectionKey] = new HashSet[SelectionKey]()
+
   private val selectorThread = new Thread("connection-manager-thread") {
     override def run() = ConnectionManager.this.run()
   }
   selectorThread.setDaemon(true)
+  // start this thread last, since it invokes run(), which accesses members above
   selectorThread.start()
 
-  private val writeRunnableStarted: HashSet[SelectionKey] = new HashSet[SelectionKey]()
-
   private def triggerWrite(key: SelectionKey) {
     val conn = connectionsByKey.getOrElse(key, null)
     if (conn == null) return
@@ -214,7 +234,6 @@ private[nio] class ConnectionManager(
     } )
   }
 
-  private val readRunnableStarted: HashSet[SelectionKey] = new HashSet[SelectionKey]()
 
   private def triggerRead(key: SelectionKey) {
     val conn = connectionsByKey.getOrElse(key, null)
@@ -598,7 +617,7 @@ private[nio] class ConnectionManager(
     } else {
       var replyToken : Array[Byte] = null
       try {
-        replyToken = waitingConn.sparkSaslClient.saslResponse(securityMsg.getToken)
+        replyToken = waitingConn.sparkSaslClient.response(securityMsg.getToken)
         if (waitingConn.isSaslComplete()) {
           logDebug("Client sasl completed after evaluate for id: " + waitingConn.connectionId)
           connectionsAwaitingSasl -= waitingConn.connectionId
@@ -632,7 +651,7 @@ private[nio] class ConnectionManager(
         connection.synchronized {
           if (connection.sparkSaslServer == null) {
             logDebug("Creating sasl Server")
-            connection.sparkSaslServer = new SparkSaslServer(securityManager)
+            connection.sparkSaslServer = new SparkSaslServer(conf.getAppId, securityManager)
           }
         }
         replyToken = connection.sparkSaslServer.response(securityMsg.getToken)
@@ -776,7 +795,7 @@ private[nio] class ConnectionManager(
     if (!conn.isSaslComplete()) {
       conn.synchronized {
         if (conn.sparkSaslClient == null) {
-          conn.sparkSaslClient = new SparkSaslClient(securityManager)
+          conn.sparkSaslClient = new SparkSaslClient(conf.getAppId, securityManager)
           var firstResponse: Array[Byte] = null
           try {
             firstResponse = conn.sparkSaslClient.firstToken()
@@ -896,22 +915,41 @@ private[nio] class ConnectionManager(
       : Future[Message] = {
     val promise = Promise[Message]()
 
-    val timeoutTask = new TimerTask {
-      override def run(): Unit = {
+    // It's important that the TimerTask doesn't capture a reference to `message`, which can cause
+    // memory leaks since cancelled TimerTasks won't necessarily be garbage collected until the time
+    // at which they would originally be scheduled to run.  Therefore, extract the message id
+    // from outside of the TimerTask closure (see SPARK-4393 for more context).
+    val messageId = message.id
+    // Keep a weak reference to the promise so that the completed promise may be garbage-collected
+    val promiseReference = new WeakReference(promise)
+    val timeoutTask: TimerTask = new TimerTask {
+      override def run(timeout: Timeout): Unit = {
         messageStatuses.synchronized {
-          messageStatuses.remove(message.id).foreach ( s => {
+          messageStatuses.remove(messageId).foreach { s =>
             val e = new IOException("sendMessageReliably failed because ack " +
               s"was not received within $ackTimeout sec")
-            if (!promise.tryFailure(e)) {
-              logWarning("Ignore error because promise is completed", e)
+            val p = promiseReference.get
+            if (p != null) {
+              // Attempt to fail the promise with a Timeout exception
+              if (!p.tryFailure(e)) {
+                // If we reach here, then someone else has already signalled success or failure
+                // on this promise, so log a warning:
+                logError("Ignore error because promise is completed", e)
+              }
+            } else {
+              // The WeakReference was empty, which should never happen because
+              // sendMessageReliably's caller should have a strong reference to promise.future;
+              logError("Promise was garbage collected; this should never happen!", e)
             }
-          })
+          }
         }
       }
     }
 
+    val timeoutTaskHandle = ackTimeoutMonitor.newTimeout(timeoutTask, ackTimeout, TimeUnit.SECONDS)
+
     val status = new MessageStatus(message, connectionManagerId, s => {
-      timeoutTask.cancel()
+      timeoutTaskHandle.cancel()
       s match {
         case scala.util.Failure(e) =>
           // Indicates a failure where we either never sent or never got ACK'd
@@ -923,7 +961,7 @@ private[nio] class ConnectionManager(
             val errorMsgByteBuf = ackMessage.asInstanceOf[BufferMessage].buffers.head
             val errorMsgBytes = new Array[Byte](errorMsgByteBuf.limit())
             errorMsgByteBuf.get(errorMsgBytes)
-            val errorMsg = new String(errorMsgBytes, "utf-8")
+            val errorMsg = new String(errorMsgBytes, UTF_8)
             val e = new IOException(
               s"sendMessageReliably failed with ACK that signalled a remote error: $errorMsg")
             if (!promise.tryFailure(e)) {
@@ -940,7 +978,6 @@ private[nio] class ConnectionManager(
       messageStatuses += ((message.id, status))
     }
 
-    ackTimeoutMonitor.schedule(timeoutTask, ackTimeout * 1000)
     sendMessage(connectionManagerId, message)
     promise.future
   }
@@ -950,7 +987,7 @@ private[nio] class ConnectionManager(
   }
 
   def stop() {
-    ackTimeoutMonitor.cancel()
+    ackTimeoutMonitor.stop()
     selectorThread.interrupt()
     selectorThread.join()
     selector.close()
diff --git a/core/src/main/scala/org/apache/spark/network/nio/Message.scala b/core/src/main/scala/org/apache/spark/network/nio/Message.scala
index 3ad04591da658..fb4a979b824c3 100644
--- a/core/src/main/scala/org/apache/spark/network/nio/Message.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/Message.scala
@@ -22,6 +22,8 @@ import java.nio.ByteBuffer
 
 import scala.collection.mutable.ArrayBuffer
 
+import com.google.common.base.Charsets.UTF_8
+
 import org.apache.spark.util.Utils
 
 private[nio] abstract class Message(val typ: Long, val id: Int) {
@@ -92,7 +94,7 @@ private[nio] object Message {
    */
   def createErrorMessage(exception: Exception, ackId: Int): BufferMessage = {
     val exceptionString = Utils.exceptionString(exception)
-    val serializedExceptionString = ByteBuffer.wrap(exceptionString.getBytes("utf-8"))
+    val serializedExceptionString = ByteBuffer.wrap(exceptionString.getBytes(UTF_8))
     val errorMessage = createBufferMessage(serializedExceptionString, ackId)
     errorMessage.hasError = true
     errorMessage
diff --git a/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala
index 5add4fc433fb3..b2aec160635c7 100644
--- a/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala
@@ -19,12 +19,14 @@ package org.apache.spark.network.nio
 
 import java.nio.ByteBuffer
 
-import scala.concurrent.Future
-
-import org.apache.spark.{SparkException, Logging, SecurityManager, SparkConf}
 import org.apache.spark.network._
+import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.shuffle.BlockFetchingListener
 import org.apache.spark.storage.{BlockId, StorageLevel}
 import org.apache.spark.util.Utils
+import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkException}
+
+import scala.concurrent.Future
 
 
 /**
@@ -71,20 +73,21 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa
   /**
    * Tear down the transfer service.
    */
-  override def stop(): Unit = {
+  override def close(): Unit = {
     if (cm != null) {
       cm.stop()
     }
   }
 
   override def fetchBlocks(
-      hostName: String,
+      host: String,
       port: Int,
-      blockIds: Seq[String],
+      execId: String,
+      blockIds: Array[String],
       listener: BlockFetchingListener): Unit = {
     checkInit()
 
-    val cmId = new ConnectionManagerId(hostName, port)
+    val cmId = new ConnectionManagerId(host, port)
     val blockMessageArray = new BlockMessageArray(blockIds.map { blockId =>
       BlockMessage.fromGetBlock(GetBlock(BlockId(blockId)))
     })
@@ -96,21 +99,33 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa
       val bufferMessage = message.asInstanceOf[BufferMessage]
       val blockMessageArray = BlockMessageArray.fromBufferMessage(bufferMessage)
 
-      for (blockMessage <- blockMessageArray) {
-        if (blockMessage.getType != BlockMessage.TYPE_GOT_BLOCK) {
-          listener.onBlockFetchFailure(
-            new SparkException(s"Unexpected message ${blockMessage.getType} received from $cmId"))
-        } else {
-          val blockId = blockMessage.getId
-          val networkSize = blockMessage.getData.limit()
-          listener.onBlockFetchSuccess(
-            blockId.toString, new NioByteBufferManagedBuffer(blockMessage.getData))
+      // SPARK-4064: In some cases(eg. Remote block was removed) blockMessageArray may be empty.
+      if (blockMessageArray.isEmpty) {
+        blockIds.foreach { id =>
+          listener.onBlockFetchFailure(id, new SparkException(s"Received empty message from $cmId"))
+        }
+      } else {
+        for (blockMessage: BlockMessage <- blockMessageArray) {
+          val msgType = blockMessage.getType
+          if (msgType != BlockMessage.TYPE_GOT_BLOCK) {
+            if (blockMessage.getId != null) {
+              listener.onBlockFetchFailure(blockMessage.getId.toString,
+                new SparkException(s"Unexpected message $msgType received from $cmId"))
+            }
+          } else {
+            val blockId = blockMessage.getId
+            val networkSize = blockMessage.getData.limit()
+            listener.onBlockFetchSuccess(
+              blockId.toString, new NioManagedBuffer(blockMessage.getData))
+          }
         }
       }
     }(cm.futureExecContext)
 
     future.onFailure { case exception =>
-      listener.onBlockFetchFailure(exception)
+      blockIds.foreach { blockId =>
+        listener.onBlockFetchFailure(blockId, exception)
+      }
     }(cm.futureExecContext)
   }
 
@@ -122,12 +137,13 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa
   override def uploadBlock(
       hostname: String,
       port: Int,
-      blockId: String,
+      execId: String,
+      blockId: BlockId,
       blockData: ManagedBuffer,
       level: StorageLevel)
     : Future[Unit] = {
     checkInit()
-    val msg = PutBlock(BlockId(blockId), blockData.nioByteBuffer(), level)
+    val msg = PutBlock(blockId, blockData.nioByteBuffer(), level)
     val blockMessageArray = new BlockMessageArray(BlockMessage.fromPutBlock(msg))
     val remoteCmId = new ConnectionManagerId(hostName, port)
     val reply = cm.sendMessageReliably(remoteCmId, blockMessageArray.toBufferMessage)
@@ -149,10 +165,9 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa
           val responseMessages = blockMessages.map(processBlockMessage).filter(_ != None).map(_.get)
           Some(new BlockMessageArray(responseMessages).toBufferMessage)
         } catch {
-          case e: Exception => {
+          case e: Exception =>
             logError("Exception handling buffer message", e)
             Some(Message.createErrorMessage(e, msg.id))
-          }
         }
 
       case otherMessage: Any =>
@@ -167,13 +182,13 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa
       case BlockMessage.TYPE_PUT_BLOCK =>
         val msg = PutBlock(blockMessage.getId, blockMessage.getData, blockMessage.getLevel)
         logDebug("Received [" + msg + "]")
-        putBlock(msg.id.toString, msg.data, msg.level)
+        putBlock(msg.id, msg.data, msg.level)
         None
 
       case BlockMessage.TYPE_GET_BLOCK =>
         val msg = new GetBlock(blockMessage.getId)
         logDebug("Received [" + msg + "]")
-        val buffer = getBlock(msg.id.toString)
+        val buffer = getBlock(msg.id)
         if (buffer == null) {
           return None
         }
@@ -183,20 +198,20 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa
     }
   }
 
-  private def putBlock(blockId: String, bytes: ByteBuffer, level: StorageLevel) {
+  private def putBlock(blockId: BlockId, bytes: ByteBuffer, level: StorageLevel) {
     val startTimeMs = System.currentTimeMillis()
     logDebug("PutBlock " + blockId + " started from " + startTimeMs + " with data: " + bytes)
-    blockDataManager.putBlockData(blockId, new NioByteBufferManagedBuffer(bytes), level)
+    blockDataManager.putBlockData(blockId, new NioManagedBuffer(bytes), level)
     logDebug("PutBlock " + blockId + " used " + Utils.getUsedTimeMs(startTimeMs)
       + " with data size: " + bytes.limit)
   }
 
-  private def getBlock(blockId: String): ByteBuffer = {
+  private def getBlock(blockId: BlockId): ByteBuffer = {
     val startTimeMs = System.currentTimeMillis()
     logDebug("GetBlock " + blockId + " started from " + startTimeMs)
-    val buffer = blockDataManager.getBlockData(blockId).orNull
+    val buffer = blockDataManager.getBlockData(blockId)
     logDebug("GetBlock " + blockId + " used " + Utils.getUsedTimeMs(startTimeMs)
       + " and got buffer " + buffer)
-    if (buffer == null) null else buffer.nioByteBuffer()
+    buffer.nioByteBuffer()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/package.scala b/core/src/main/scala/org/apache/spark/package.scala
index e2fc9c649925e..b6249b492150a 100644
--- a/core/src/main/scala/org/apache/spark/package.scala
+++ b/core/src/main/scala/org/apache/spark/package.scala
@@ -27,8 +27,7 @@ package org.apache
  * contains operations available only on RDDs of Doubles; and
  * [[org.apache.spark.rdd.SequenceFileRDDFunctions]] contains operations available on RDDs that can
  * be saved as SequenceFiles. These operations are automatically available on any RDD of the right
- * type (e.g. RDD[(Int, Int)] through implicit conversions when you
- * `import org.apache.spark.SparkContext._`.
+ * type (e.g. RDD[(Int, Int)] through implicit conversions.
  *
  * Java programmers should reference the [[org.apache.spark.api.java]] package
  * for Spark programming APIs in Java.
@@ -44,5 +43,5 @@ package org.apache
 
 package object spark {
   // For package docs only
-  val SPARK_VERSION = "1.2.0-SNAPSHOT"
+  val SPARK_VERSION = "1.3.0-SNAPSHOT"
 }
diff --git a/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala
index 3155dfe165664..637492a97551b 100644
--- a/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.partial
 
-import cern.jet.stat.Probability
+import org.apache.commons.math3.distribution.NormalDistribution
 
 /**
  * An ApproximateEvaluator for counts.
@@ -46,7 +46,8 @@ private[spark] class CountEvaluator(totalOutputs: Int, confidence: Double)
       val mean = (sum + 1 - p) / p
       val variance = (sum + 1) * (1 - p) / (p * p)
       val stdev = math.sqrt(variance)
-      val confFactor = Probability.normalInverse(1 - (1 - confidence) / 2)
+      val confFactor = new NormalDistribution().
+        inverseCumulativeProbability(1 - (1 - confidence) / 2)
       val low = mean - confFactor * stdev
       val high = mean + confFactor * stdev
       new BoundedDouble(mean, confidence, low, high)
diff --git a/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala
index 8bb78123e3c9c..3ef3cc219dec6 100644
--- a/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala
@@ -24,7 +24,7 @@ import scala.collection.Map
 import scala.collection.mutable.HashMap
 import scala.reflect.ClassTag
 
-import cern.jet.stat.Probability
+import org.apache.commons.math3.distribution.NormalDistribution
 
 import org.apache.spark.util.collection.OpenHashMap
 
@@ -55,7 +55,8 @@ private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, conf
       new HashMap[T, BoundedDouble]
     } else {
       val p = outputsMerged.toDouble / totalOutputs
-      val confFactor = Probability.normalInverse(1 - (1 - confidence) / 2)
+      val confFactor = new NormalDistribution().
+        inverseCumulativeProbability(1 - (1 - confidence) / 2)
       val result = new JHashMap[T, BoundedDouble](sums.size)
       sums.foreach { case (key, sum) =>
         val mean = (sum + 1 - p) / p
diff --git a/core/src/main/scala/org/apache/spark/partial/MeanEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/MeanEvaluator.scala
index d24959cba8727..787a21a61fdcf 100644
--- a/core/src/main/scala/org/apache/spark/partial/MeanEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/MeanEvaluator.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.partial
 
-import cern.jet.stat.Probability
+import org.apache.commons.math3.distribution.{NormalDistribution, TDistribution}
 
 import org.apache.spark.util.StatCounter
 
@@ -45,9 +45,10 @@ private[spark] class MeanEvaluator(totalOutputs: Int, confidence: Double)
       val stdev = math.sqrt(counter.sampleVariance / counter.count)
       val confFactor = {
         if (counter.count > 100) {
-          Probability.normalInverse(1 - (1 - confidence) / 2)
+          new NormalDistribution().inverseCumulativeProbability(1 - (1 - confidence) / 2)
         } else {
-          Probability.studentTInverse(1 - confidence, (counter.count - 1).toInt)
+          val degreesOfFreedom = (counter.count - 1).toInt
+          new TDistribution(degreesOfFreedom).inverseCumulativeProbability(1 - (1 - confidence) / 2)
         }
       }
       val low = mean - confFactor * stdev
diff --git a/core/src/main/scala/org/apache/spark/partial/StudentTCacher.scala b/core/src/main/scala/org/apache/spark/partial/StudentTCacher.scala
index 92915ee66d29f..828bf96c2c0bd 100644
--- a/core/src/main/scala/org/apache/spark/partial/StudentTCacher.scala
+++ b/core/src/main/scala/org/apache/spark/partial/StudentTCacher.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.partial
 
-import cern.jet.stat.Probability
+import org.apache.commons.math3.distribution.{TDistribution, NormalDistribution}
 
 /**
  * A utility class for caching Student's T distribution values for a given confidence level
@@ -25,8 +25,10 @@ import cern.jet.stat.Probability
  * confidence intervals for many keys.
  */
 private[spark] class StudentTCacher(confidence: Double) {
+
   val NORMAL_APPROX_SAMPLE_SIZE = 100  // For samples bigger than this, use Gaussian approximation
-  val normalApprox = Probability.normalInverse(1 - (1 - confidence) / 2)
+
+  val normalApprox = new NormalDistribution().inverseCumulativeProbability(1 - (1 - confidence) / 2)
   val cache = Array.fill[Double](NORMAL_APPROX_SAMPLE_SIZE)(-1.0)
 
   def get(sampleSize: Long): Double = {
@@ -35,7 +37,8 @@ private[spark] class StudentTCacher(confidence: Double) {
     } else {
       val size = sampleSize.toInt
       if (cache(size) < 0) {
-        cache(size) = Probability.studentTInverse(1 - confidence, size - 1)
+        val tDist = new TDistribution(size - 1)
+        cache(size) = tDist.inverseCumulativeProbability(1 - (1 - confidence) / 2)
       }
       cache(size)
     }
diff --git a/core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala
index d5336284571d2..1753c2561b678 100644
--- a/core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.partial
 
-import cern.jet.stat.Probability
+import org.apache.commons.math3.distribution.{TDistribution, NormalDistribution}
 
 import org.apache.spark.util.StatCounter
 
@@ -55,9 +55,10 @@ private[spark] class SumEvaluator(totalOutputs: Int, confidence: Double)
       val sumStdev = math.sqrt(sumVar)
       val confFactor = {
         if (counter.count > 100) {
-          Probability.normalInverse(1 - (1 - confidence) / 2)
+          new NormalDistribution().inverseCumulativeProbability(1 - (1 - confidence) / 2)
         } else {
-          Probability.studentTInverse(1 - confidence, (counter.count - 1).toInt)
+          val degreesOfFreedom = (counter.count - 1).toInt
+          new TDistribution(degreesOfFreedom).inverseCumulativeProbability(1 - (1 - confidence) / 2)
         }
       }
       val low = sumEstimate - confFactor * sumStdev
diff --git a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
index 9f9f10b7ebc3a..646df283ac069 100644
--- a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
@@ -27,7 +27,6 @@ import org.apache.spark.{ComplexFutureAction, FutureAction, Logging}
 
 /**
  * A set of asynchronous RDD actions available through an implicit conversion.
- * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions.
  */
 class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Logging {
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
new file mode 100644
index 0000000000000..1f755db485812
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.rdd
+
+import org.apache.hadoop.conf.{ Configurable, Configuration }
+import org.apache.hadoop.io.Writable
+import org.apache.hadoop.mapreduce._
+import org.apache.spark.input.StreamFileInputFormat
+import org.apache.spark.{ Partition, SparkContext }
+
+private[spark] class BinaryFileRDD[T](
+    sc: SparkContext,
+    inputFormatClass: Class[_ <: StreamFileInputFormat[T]],
+    keyClass: Class[String],
+    valueClass: Class[T],
+    @transient conf: Configuration,
+    minPartitions: Int)
+  extends NewHadoopRDD[String, T](sc, inputFormatClass, keyClass, valueClass, conf) {
+
+  override def getPartitions: Array[Partition] = {
+    val inputFormat = inputFormatClass.newInstance
+    inputFormat match {
+      case configurable: Configurable =>
+        configurable.setConf(conf)
+      case _ =>
+    }
+    val jobContext = newJobContext(conf, jobId)
+    inputFormat.setMinPartitions(jobContext, minPartitions)
+    val rawSplits = inputFormat.getSplits(jobContext).toArray
+    val result = new Array[Partition](rawSplits.size)
+    for (i <- 0 until rawSplits.size) {
+      result(i) = new NewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable])
+    }
+    result
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
index 2673ec22509e9..fffa1911f5bc2 100644
--- a/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
@@ -84,5 +84,9 @@ class BlockRDD[T: ClassTag](@transient sc: SparkContext, @transient val blockIds
         "Attempted to use %s after its blocks have been removed!".format(toString))
     }
   }
+
+  protected def getBlockIdLocations(): Map[BlockId, Seq[String]] = {
+    locations_
+  }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
index 4908711d17db7..1cbd684224b7c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
@@ -22,6 +22,7 @@ import java.io.{IOException, ObjectOutputStream}
 import scala.reflect.ClassTag
 
 import org.apache.spark._
+import org.apache.spark.util.Utils
 
 private[spark]
 class CartesianPartition(
@@ -36,7 +37,7 @@ class CartesianPartition(
   override val index: Int = idx
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     // Update the reference to parent split at the time of task serialization
     s1 = rdd1.partitions(s1Index)
     s2 = rdd2.partitions(s2Index)
diff --git a/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
index 7ba1182f0ed27..1c13e2c372845 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
@@ -95,7 +95,8 @@ private[spark] object CheckpointRDD extends Logging {
 
     val finalOutputName = splitIdToFile(ctx.partitionId)
     val finalOutputPath = new Path(outputDir, finalOutputName)
-    val tempOutputPath = new Path(outputDir, "." + finalOutputName + "-attempt-" + ctx.attemptId)
+    val tempOutputPath =
+      new Path(outputDir, "." + finalOutputName + "-attempt-" + ctx.attemptNumber)
 
     if (fs.exists(tempOutputPath)) {
       throw new IOException("Checkpoint failed: temporary path " +
@@ -119,7 +120,7 @@ private[spark] object CheckpointRDD extends Logging {
         logInfo("Deleting tempOutputPath " + tempOutputPath)
         fs.delete(tempOutputPath, false)
         throw new IOException("Checkpoint failed: failed to save output of task: "
-          + ctx.attemptId + " and final output path does not exist")
+          + ctx.attemptNumber + " and final output path does not exist")
       } else {
         // Some other copy of this task must've finished before us and renamed it
         logInfo("Final output path " + finalOutputPath + " already exists; not overwriting it")
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index fabb882cdd4b3..07398a6fa62f6 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -27,6 +27,7 @@ import org.apache.spark.{InterruptibleIterator, Partition, Partitioner, SparkEnv
 import org.apache.spark.{Dependency, OneToOneDependency, ShuffleDependency}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.util.collection.{ExternalAppendOnlyMap, AppendOnlyMap, CompactBuffer}
+import org.apache.spark.util.Utils
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.ShuffleHandle
 
@@ -39,7 +40,7 @@ private[spark] case class NarrowCoGroupSplitDep(
   ) extends CoGroupSplitDep {
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     // Update the reference to parent split at the time of task serialization
     split = rdd.partitions(splitIndex)
     oos.defaultWriteObject()
@@ -59,7 +60,7 @@ private[spark] class CoGroupPartition(idx: Int, val deps: Array[CoGroupSplitDep]
  * A RDD that cogroups its parents. For each key k in parent RDDs, the resulting RDD contains a
  * tuple with the list of values for that key.
  *
- * Note: This is an internal API. We recommend users use RDD.coGroup(...) instead of
+ * Note: This is an internal API. We recommend users use RDD.cogroup(...) instead of
  * instantiating this directly.
 
  * @param rdds parent RDDs.
@@ -69,8 +70,8 @@ private[spark] class CoGroupPartition(idx: Int, val deps: Array[CoGroupSplitDep]
 class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part: Partitioner)
   extends RDD[(K, Array[Iterable[_]])](rdds.head.context, Nil) {
 
-  // For example, `(k, a) cogroup (k, b)` produces k -> Seq(ArrayBuffer as, ArrayBuffer bs).
-  // Each ArrayBuffer is represented as a CoGroup, and the resulting Seq as a CoGroupCombiner.
+  // For example, `(k, a) cogroup (k, b)` produces k -> Array(ArrayBuffer as, ArrayBuffer bs).
+  // Each ArrayBuffer is represented as a CoGroup, and the resulting Array as a CoGroupCombiner.
   // CoGroupValue is the intermediate state of each value before being merged in compute.
   private type CoGroup = CompactBuffer[Any]
   private type CoGroupValue = (Any, Int)  // Int is dependency number
@@ -158,8 +159,8 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part:
       for ((it, depNum) <- rddIterators) {
         map.insertAll(it.map(pair => (pair._1, new CoGroupValue(pair._2, depNum))))
       }
-      context.taskMetrics.memoryBytesSpilled += map.memoryBytesSpilled
-      context.taskMetrics.diskBytesSpilled += map.diskBytesSpilled
+      context.taskMetrics.incMemoryBytesSpilled(map.memoryBytesSpilled)
+      context.taskMetrics.incDiskBytesSpilled(map.diskBytesSpilled)
       new InterruptibleIterator(context,
         map.iterator.asInstanceOf[Iterator[(K, Array[Iterable[_]])]])
     }
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
index 11ebafbf6d457..b073eba8a1574 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
@@ -25,6 +25,7 @@ import scala.language.existentials
 import scala.reflect.ClassTag
 
 import org.apache.spark._
+import org.apache.spark.util.Utils
 
 /**
  * Class that captures a coalesced RDD by essentially keeping track of parent partitions
@@ -34,15 +35,14 @@ import org.apache.spark._
  * @param preferredLocation the preferred location for this partition
  */
 private[spark] case class CoalescedRDDPartition(
-                                  index: Int,
-                                  @transient rdd: RDD[_],
-                                  parentsIndices: Array[Int],
-                                  @transient preferredLocation: String = ""
-                                  ) extends Partition {
+    index: Int,
+    @transient rdd: RDD[_],
+    parentsIndices: Array[Int],
+    @transient preferredLocation: Option[String] = None) extends Partition {
   var parents: Seq[Partition] = parentsIndices.map(rdd.partitions(_))
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     // Update the reference to parent partition at the time of task serialization
     parents = parentsIndices.map(rdd.partitions(_))
     oos.defaultWriteObject()
@@ -54,9 +54,10 @@ private[spark] case class CoalescedRDDPartition(
    * @return locality of this coalesced partition between 0 and 1
    */
   def localFraction: Double = {
-    val loc = parents.count(p =>
-      rdd.context.getPreferredLocs(rdd, p.index).map(tl => tl.host).contains(preferredLocation))
-
+    val loc = parents.count { p =>
+      val parentPreferredLocations = rdd.context.getPreferredLocs(rdd, p.index).map(_.host)
+      preferredLocation.exists(parentPreferredLocations.contains)
+    }
     if (parents.size == 0) 0.0 else (loc.toDouble / parents.size.toDouble)
   }
 }
@@ -72,9 +73,9 @@ private[spark] case class CoalescedRDDPartition(
  * @param balanceSlack used to trade-off balance and locality. 1.0 is all locality, 0 is all balance
  */
 private[spark] class CoalescedRDD[T: ClassTag](
-                                      @transient var prev: RDD[T],
-                                      maxPartitions: Int,
-                                      balanceSlack: Double = 0.10)
+    @transient var prev: RDD[T],
+    maxPartitions: Int,
+    balanceSlack: Double = 0.10)
   extends RDD[T](prev.context, Nil) {  // Nil since we implement getDependencies
 
   override def getPartitions: Array[Partition] = {
@@ -112,7 +113,7 @@ private[spark] class CoalescedRDD[T: ClassTag](
    * @return the machine most preferred by split
    */
   override def getPreferredLocations(partition: Partition): Seq[String] = {
-    List(partition.asInstanceOf[CoalescedRDDPartition].preferredLocation)
+    partition.asInstanceOf[CoalescedRDDPartition].preferredLocation.toSeq
   }
 }
 
@@ -146,7 +147,7 @@ private[spark] class CoalescedRDD[T: ClassTag](
  *
  */
 
-private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) {
+private class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) {
 
   def compare(o1: PartitionGroup, o2: PartitionGroup): Boolean = o1.size < o2.size
   def compare(o1: Option[PartitionGroup], o2: Option[PartitionGroup]): Boolean =
@@ -340,8 +341,14 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc
   }
 }
 
-private[spark] case class PartitionGroup(prefLoc: String = "") {
+private case class PartitionGroup(prefLoc: Option[String] = None) {
   var arr = mutable.ArrayBuffer[Partition]()
-
   def size = arr.size
 }
+
+private object PartitionGroup {
+  def apply(prefLoc: String): PartitionGroup = {
+    require(prefLoc != "", "Preferred location must not be empty")
+    PartitionGroup(Some(prefLoc))
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
index e0494ee39657c..e66f83bb34e30 100644
--- a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
@@ -27,7 +27,6 @@ import org.apache.spark.util.StatCounter
 
 /**
  * Extra functions available on RDDs of Doubles through an implicit conversion.
- * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions.
  */
 class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
   /** Add up the elements in this RDD. */
diff --git a/core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala b/core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala
deleted file mode 100644
index 9e41b3d1e2d4f..0000000000000
--- a/core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.rdd
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.{Partition, TaskContext}
-
-private[spark] class FilteredRDD[T: ClassTag](
-    prev: RDD[T],
-    f: T => Boolean)
-  extends RDD[T](prev) {
-
-  override def getPartitions: Array[Partition] = firstParent[T].partitions
-
-  override val partitioner = prev.partitioner    // Since filter cannot change a partition's keys
-
-  override def compute(split: Partition, context: TaskContext) =
-    firstParent[T].iterator(split, context).filter(f)
-}
diff --git a/core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala
deleted file mode 100644
index d8f87d4e3690e..0000000000000
--- a/core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.rdd
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.{Partition, TaskContext}
-
-private[spark]
-class FlatMappedRDD[U: ClassTag, T: ClassTag](
-    prev: RDD[T],
-    f: T => TraversableOnce[U])
-  extends RDD[U](prev) {
-
-  override def getPartitions: Array[Partition] = firstParent[T].partitions
-
-  override def compute(split: Partition, context: TaskContext) =
-    firstParent[T].iterator(split, context).flatMap(f)
-}
diff --git a/core/src/main/scala/org/apache/spark/rdd/FlatMappedValuesRDD.scala b/core/src/main/scala/org/apache/spark/rdd/FlatMappedValuesRDD.scala
deleted file mode 100644
index 7c9023f62d3b6..0000000000000
--- a/core/src/main/scala/org/apache/spark/rdd/FlatMappedValuesRDD.scala
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.rdd
-
-import org.apache.spark.{Partition, TaskContext}
-
-private[spark]
-class FlatMappedValuesRDD[K, V, U](prev: RDD[_ <: Product2[K, V]], f: V => TraversableOnce[U])
-  extends RDD[(K, U)](prev) {
-
-  override def getPartitions = firstParent[Product2[K, V]].partitions
-
-  override val partitioner = firstParent[Product2[K, V]].partitioner
-
-  override def compute(split: Partition, context: TaskContext) = {
-    firstParent[Product2[K, V]].iterator(split, context).flatMap { case Product2(k, v) =>
-      f(v).map(x => (k, x))
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala
deleted file mode 100644
index f6463fa715a71..0000000000000
--- a/core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.rdd
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.{Partition, TaskContext}
-
-private[spark] class GlommedRDD[T: ClassTag](prev: RDD[T])
-  extends RDD[Array[T]](prev) {
-
-  override def getPartitions: Array[Partition] = firstParent[T].partitions
-
-  override def compute(split: Partition, context: TaskContext) =
-    Array(firstParent[T].iterator(split, context).toArray).iterator
-}
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 775141775e06c..89adddcf0ac36 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -35,17 +35,18 @@ import org.apache.hadoop.mapred.Reporter
 import org.apache.hadoop.mapred.JobID
 import org.apache.hadoop.mapred.TaskAttemptID
 import org.apache.hadoop.mapred.TaskID
+import org.apache.hadoop.mapred.lib.CombineFileSplit
 import org.apache.hadoop.util.ReflectionUtils
 
 import org.apache.spark._
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.executor.{DataReadMethod, InputMetrics}
+import org.apache.spark.executor.DataReadMethod
 import org.apache.spark.rdd.HadoopRDD.HadoopMapPartitionsWithSplitRDD
 import org.apache.spark.util.{NextIterator, Utils}
 import org.apache.spark.scheduler.{HostTaskLocation, HDFSCacheTaskLocation}
-
+import org.apache.spark.storage.StorageLevel
 
 /**
  * A Spark split class that wraps around a Hadoop InputSplit.
@@ -132,7 +133,7 @@ class HadoopRDD[K, V](
   // used to build JobTracker ID
   private val createTime = new Date()
 
-  private val shouldCloneJobConf = sc.conf.get("spark.hadoop.cloneConf", "false").toBoolean
+  private val shouldCloneJobConf = sc.conf.getBoolean("spark.hadoop.cloneConf", false)
 
   // Returns a JobConf that will be used on slaves to obtain input splits for Hadoop reads.
   protected def getJobConf(): JobConf = {
@@ -212,11 +213,26 @@ class HadoopRDD[K, V](
 
       val split = theSplit.asInstanceOf[HadoopPartition]
       logInfo("Input split: " + split.inputSplit)
-      var reader: RecordReader[K, V] = null
       val jobConf = getJobConf()
+
+      val inputMetrics = context.taskMetrics
+        .getInputMetricsForReadMethod(DataReadMethod.Hadoop)
+
+      // Find a function that will return the FileSystem bytes read by this thread. Do this before
+      // creating RecordReader, because RecordReader's constructor might read some bytes
+      val bytesReadCallback = inputMetrics.bytesReadCallback.orElse {
+        split.inputSplit.value match {
+          case _: FileSplit | _: CombineFileSplit =>
+            SparkHadoopUtil.get.getFSBytesReadOnThreadCallback()
+          case _ => None
+        }
+      }
+      inputMetrics.setBytesReadCallback(bytesReadCallback)
+
+      var reader: RecordReader[K, V] = null
       val inputFormat = getInputFormat(jobConf)
       HadoopRDD.addLocalConfiguration(new SimpleDateFormat("yyyyMMddHHmm").format(createTime),
-        context.stageId, theSplit.index, context.attemptId.toInt, jobConf)
+        context.stageId, theSplit.index, context.attemptNumber, jobConf)
       reader = inputFormat.getRecordReader(split.inputSplit.value, jobConf, Reporter.NULL)
 
       // Register an on-task-completion callback to close the input stream.
@@ -224,19 +240,6 @@ class HadoopRDD[K, V](
       val key: K = reader.createKey()
       val value: V = reader.createValue()
 
-      // Set the task input metrics.
-      val inputMetrics = new InputMetrics(DataReadMethod.Hadoop)
-      try {
-        /* bytesRead may not exactly equal the bytes read by a task: split boundaries aren't
-         * always at record boundaries, so tasks may need to read into other splits to complete
-         * a record. */
-        inputMetrics.bytesRead = split.inputSplit.value.getLength()
-      } catch {
-        case e: java.io.IOException =>
-          logWarning("Unable to get input size to set InputMetrics for task", e)
-      }
-      context.taskMetrics.inputMetrics = Some(inputMetrics)
-
       override def getNext() = {
         try {
           finished = !reader.next(key, value)
@@ -244,12 +247,26 @@ class HadoopRDD[K, V](
           case eof: EOFException =>
             finished = true
         }
+
         (key, value)
       }
 
       override def close() {
         try {
           reader.close()
+          if (bytesReadCallback.isDefined) {
+            inputMetrics.updateBytesRead()
+          } else if (split.inputSplit.value.isInstanceOf[FileSplit] ||
+                     split.inputSplit.value.isInstanceOf[CombineFileSplit]) {
+            // If we can't get the bytes read from the FS stats, fall back to the split size,
+            // which may be inaccurate.
+            try {
+              inputMetrics.addBytesRead(split.inputSplit.value.getLength)
+            } catch {
+              case e: java.io.IOException =>
+                logWarning("Unable to get input size to set InputMetrics for task", e)
+            }
+          }
         } catch {
           case e: Exception => {
             if (!Utils.inShutdown()) {
@@ -292,6 +309,15 @@ class HadoopRDD[K, V](
     // Do nothing. Hadoop RDD should not be checkpointed.
   }
 
+  override def persist(storageLevel: StorageLevel): this.type = {
+    if (storageLevel.deserialized) {
+      logWarning("Caching NewHadoopRDDs as deserialized objects usually leads to undesired" +
+        " behavior because Hadoop's RecordReader reuses the same Writable object for all records." +
+        " Use a map transformation to make copies of the records.")
+    }
+    super.persist(storageLevel)
+  }
+
   def getConf: Configuration = getJobConf()
 }
 
@@ -302,6 +328,9 @@ private[spark] object HadoopRDD extends Logging {
    */
   val CONFIGURATION_INSTANTIATION_LOCK = new Object()
 
+  /** Update the input bytes read metric each time this number of records has been read */
+  val RECORDS_BETWEEN_BYTES_READ_METRIC_UPDATES = 256
+
   /**
    * The three methods below are helpers for accessing the local map, a property of the SparkEnv of
    * the local process.
diff --git a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
index 0e38f224ac81d..642a12c1edf6c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
@@ -21,8 +21,11 @@ import java.sql.{Connection, ResultSet}
 
 import scala.reflect.ClassTag
 
-import org.apache.spark.{Logging, Partition, SparkContext, TaskContext}
+import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
+import org.apache.spark.api.java.function.{Function => JFunction}
+import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.util.NextIterator
+import org.apache.spark.{Logging, Partition, SparkContext, TaskContext}
 
 private[spark] class JdbcPartition(idx: Int, val lower: Long, val upper: Long) extends Partition {
   override def index = idx
@@ -125,5 +128,82 @@ object JdbcRDD {
   def resultSetToObjectArray(rs: ResultSet): Array[Object] = {
     Array.tabulate[Object](rs.getMetaData.getColumnCount)(i => rs.getObject(i + 1))
   }
-}
 
+  trait ConnectionFactory extends Serializable {
+    @throws[Exception]
+    def getConnection: Connection
+  }
+
+  /**
+   * Create an RDD that executes an SQL query on a JDBC connection and reads results.
+   * For usage example, see test case JavaAPISuite.testJavaJdbcRDD.
+   *
+   * @param connectionFactory a factory that returns an open Connection.
+   *   The RDD takes care of closing the connection.
+   * @param sql the text of the query.
+   *   The query must contain two ? placeholders for parameters used to partition the results.
+   *   E.g. "select title, author from books where ? <= id and id <= ?"
+   * @param lowerBound the minimum value of the first placeholder
+   * @param upperBound the maximum value of the second placeholder
+   *   The lower and upper bounds are inclusive.
+   * @param numPartitions the number of partitions.
+   *   Given a lowerBound of 1, an upperBound of 20, and a numPartitions of 2,
+   *   the query would be executed twice, once with (1, 10) and once with (11, 20)
+   * @param mapRow a function from a ResultSet to a single row of the desired result type(s).
+   *   This should only call getInt, getString, etc; the RDD takes care of calling next.
+   *   The default maps a ResultSet to an array of Object.
+   */
+  def create[T](
+      sc: JavaSparkContext,
+      connectionFactory: ConnectionFactory,
+      sql: String,
+      lowerBound: Long,
+      upperBound: Long,
+      numPartitions: Int,
+      mapRow: JFunction[ResultSet, T]): JavaRDD[T] = {
+
+    val jdbcRDD = new JdbcRDD[T](
+      sc.sc,
+      () => connectionFactory.getConnection,
+      sql,
+      lowerBound,
+      upperBound,
+      numPartitions,
+      (resultSet: ResultSet) => mapRow.call(resultSet))(fakeClassTag)
+
+    new JavaRDD[T](jdbcRDD)(fakeClassTag)
+  }
+
+  /**
+   * Create an RDD that executes an SQL query on a JDBC connection and reads results. Each row is
+   * converted into a `Object` array. For usage example, see test case JavaAPISuite.testJavaJdbcRDD.
+   *
+   * @param connectionFactory a factory that returns an open Connection.
+   *   The RDD takes care of closing the connection.
+   * @param sql the text of the query.
+   *   The query must contain two ? placeholders for parameters used to partition the results.
+   *   E.g. "select title, author from books where ? <= id and id <= ?"
+   * @param lowerBound the minimum value of the first placeholder
+   * @param upperBound the maximum value of the second placeholder
+   *   The lower and upper bounds are inclusive.
+   * @param numPartitions the number of partitions.
+   *   Given a lowerBound of 1, an upperBound of 20, and a numPartitions of 2,
+   *   the query would be executed twice, once with (1, 10) and once with (11, 20)
+   */
+  def create(
+      sc: JavaSparkContext,
+      connectionFactory: ConnectionFactory,
+      sql: String,
+      lowerBound: Long,
+      upperBound: Long,
+      numPartitions: Int): JavaRDD[Array[Object]] = {
+
+    val mapRow = new JFunction[ResultSet, Array[Object]] {
+      override def call(resultSet: ResultSet): Array[Object] = {
+        resultSetToObjectArray(resultSet)
+      }
+    }
+
+    create(sc, connectionFactory, sql, lowerBound, upperBound, numPartitions, mapRow)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala
deleted file mode 100644
index 8d7c288593665..0000000000000
--- a/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.rdd
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.{Partition, TaskContext}
-
-private[spark]
-class MappedRDD[U: ClassTag, T: ClassTag](prev: RDD[T], f: T => U)
-  extends RDD[U](prev) {
-
-  override def getPartitions: Array[Partition] = firstParent[T].partitions
-
-  override def compute(split: Partition, context: TaskContext) =
-    firstParent[T].iterator(split, context).map(f)
-}
diff --git a/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala
deleted file mode 100644
index a60952eee5901..0000000000000
--- a/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.rdd
-
-import org.apache.spark.{Partition, TaskContext}
-
-private[spark]
-class MappedValuesRDD[K, V, U](prev: RDD[_ <: Product2[K, V]], f: V => U)
-  extends RDD[(K, U)](prev) {
-
-  override def getPartitions = firstParent[Product2[K, U]].partitions
-
-  override val partitioner = firstParent[Product2[K, U]].partitioner
-
-  override def compute(split: Partition, context: TaskContext): Iterator[(K, U)] = {
-    firstParent[Product2[K, V]].iterator(split, context).map { pair => (pair._1, f(pair._2)) }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index 0cccdefc5ee09..44b9ffd2a53fd 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -25,17 +25,17 @@ import scala.reflect.ClassTag
 import org.apache.hadoop.conf.{Configurable, Configuration}
 import org.apache.hadoop.io.Writable
 import org.apache.hadoop.mapreduce._
+import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, FileSplit}
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.input.WholeTextFileInputFormat
-import org.apache.spark.InterruptibleIterator
-import org.apache.spark.Logging
-import org.apache.spark.Partition
-import org.apache.spark.SerializableWritable
-import org.apache.spark.{SparkContext, TaskContext}
-import org.apache.spark.executor.{DataReadMethod, InputMetrics}
+import org.apache.spark._
+import org.apache.spark.executor.DataReadMethod
+import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil
 import org.apache.spark.rdd.NewHadoopRDD.NewHadoopMapPartitionsWithSplitRDD
 import org.apache.spark.util.Utils
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.storage.StorageLevel
 
 private[spark] class NewHadoopPartition(
     rddId: Int,
@@ -105,6 +105,21 @@ class NewHadoopRDD[K, V](
       val split = theSplit.asInstanceOf[NewHadoopPartition]
       logInfo("Input split: " + split.serializableHadoopSplit)
       val conf = confBroadcast.value.value
+
+      val inputMetrics = context.taskMetrics
+        .getInputMetricsForReadMethod(DataReadMethod.Hadoop)
+
+      // Find a function that will return the FileSystem bytes read by this thread. Do this before
+      // creating RecordReader, because RecordReader's constructor might read some bytes
+      val bytesReadCallback = inputMetrics.bytesReadCallback.orElse {
+        split.serializableHadoopSplit.value match {
+          case _: FileSplit | _: CombineFileSplit =>
+            SparkHadoopUtil.get.getFSBytesReadOnThreadCallback()
+          case _ => None
+        }
+      }
+      inputMetrics.setBytesReadCallback(bytesReadCallback)
+
       val attemptId = newTaskAttemptID(jobTrackerId, id, isMap = true, split.index, 0)
       val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId)
       val format = inputFormatClass.newInstance
@@ -117,22 +132,11 @@ class NewHadoopRDD[K, V](
         split.serializableHadoopSplit.value, hadoopAttemptContext)
       reader.initialize(split.serializableHadoopSplit.value, hadoopAttemptContext)
 
-      val inputMetrics = new InputMetrics(DataReadMethod.Hadoop)
-      try {
-        /* bytesRead may not exactly equal the bytes read by a task: split boundaries aren't
-         * always at record boundaries, so tasks may need to read into other splits to complete
-         * a record. */
-        inputMetrics.bytesRead = split.serializableHadoopSplit.value.getLength()
-      } catch {
-        case e: Exception =>
-          logWarning("Unable to get input split size in order to set task input bytes", e)
-      }
-      context.taskMetrics.inputMetrics = Some(inputMetrics)
-
       // Register an on-task-completion callback to close the input stream.
       context.addTaskCompletionListener(context => close())
       var havePair = false
       var finished = false
+      var recordsSinceMetricsUpdate = 0
 
       override def hasNext: Boolean = {
         if (!finished && !havePair) {
@@ -147,12 +151,26 @@ class NewHadoopRDD[K, V](
           throw new java.util.NoSuchElementException("End of stream")
         }
         havePair = false
+
         (reader.getCurrentKey, reader.getCurrentValue)
       }
 
       private def close() {
         try {
           reader.close()
+          if (bytesReadCallback.isDefined) {
+            inputMetrics.updateBytesRead()
+          } else if (split.serializableHadoopSplit.value.isInstanceOf[FileSplit] ||
+                     split.serializableHadoopSplit.value.isInstanceOf[CombineFileSplit]) {
+            // If we can't get the bytes read from the FS stats, fall back to the split size,
+            // which may be inaccurate.
+            try {
+              inputMetrics.addBytesRead(split.serializableHadoopSplit.value.getLength)
+            } catch {
+              case e: java.io.IOException =>
+                logWarning("Unable to get input size to set InputMetrics for task", e)
+            }
+          }
         } catch {
           case e: Exception => {
             if (!Utils.inShutdown()) {
@@ -190,6 +208,16 @@ class NewHadoopRDD[K, V](
     locs.getOrElse(split.getLocations.filter(_ != "localhost"))
   }
 
+  override def persist(storageLevel: StorageLevel): this.type = {
+    if (storageLevel.deserialized) {
+      logWarning("Caching NewHadoopRDDs as deserialized objects usually leads to undesired" +
+        " behavior because Hadoop's RecordReader reuses the same Writable object for all records." +
+        " Use a map transformation to make copies of the records.")
+    }
+    super.persist(storageLevel)
+  }
+
+
   def getConf: Configuration = confBroadcast.value.value
 }
 
@@ -233,7 +261,7 @@ private[spark] class WholeTextFileRDD(
       case _ =>
     }
     val jobContext = newJobContext(conf, jobId)
-    inputFormat.setMaxSplitSize(jobContext, minPartitions)
+    inputFormat.setMinPartitions(jobContext, minPartitions)
     val rawSplits = inputFormat.getSplits(jobContext).toArray
     val result = new Array[Partition](rawSplits.size)
     for (i <- 0 until rawSplits.size) {
diff --git a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
index d0dbfef35d03c..144f679a59460 100644
--- a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
@@ -24,10 +24,9 @@ import org.apache.spark.annotation.DeveloperApi
 
 /**
  * Extra functions available on RDDs of (key, value) pairs where the key is sortable through
- * an implicit conversion. Import `org.apache.spark.SparkContext._` at the top of your program to
- * use these functions. They will work with any key type `K` that has an implicit `Ordering[K]` in
- * scope.  Ordering objects already exist for all of the standard primitive types.  Users can also
- * define their own orderings for custom types, or to override the default ordering.  The implicit
+ * an implicit conversion. They will work with any key type `K` that has an implicit `Ordering[K]`
+ * in scope. Ordering objects already exist for all of the standard primitive types. Users can also
+ * define their own orderings for custom types, or to override the default ordering. The implicit
  * ordering that is in the closest scope will be used.
  *
  * {{{
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index da89f634abaea..49b88a90ab5af 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -25,21 +25,23 @@ import scala.collection.{Map, mutable}
 import scala.collection.JavaConversions._
 import scala.collection.mutable.ArrayBuffer
 import scala.reflect.ClassTag
+import scala.util.DynamicVariable
 
 import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus
 import org.apache.hadoop.conf.{Configurable, Configuration}
-import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.io.SequenceFile.CompressionType
 import org.apache.hadoop.io.compress.CompressionCodec
 import org.apache.hadoop.mapred.{FileOutputCommitter, FileOutputFormat, JobConf, OutputFormat}
 import org.apache.hadoop.mapreduce.{Job => NewAPIHadoopJob, OutputFormat => NewOutputFormat,
-RecordWriter => NewRecordWriter, SparkHadoopMapReduceUtil}
+RecordWriter => NewRecordWriter}
 
 import org.apache.spark._
 import org.apache.spark.Partitioner.defaultPartitioner
-import org.apache.spark.SparkContext._
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.executor.{DataWriteMethod, OutputMetrics}
+import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.util.Utils
@@ -48,7 +50,6 @@ import org.apache.spark.util.random.StratifiedSamplingUtils
 
 /**
  * Extra functions available on RDDs of (key, value) pairs through an implicit conversion.
- * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions.
  */
 class PairRDDFunctions[K, V](self: RDD[(K, V)])
     (implicit kt: ClassTag[K], vt: ClassTag[V], ord: Ordering[K] = null)
@@ -84,7 +85,10 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
         throw new SparkException("Default partitioner cannot partition array keys.")
       }
     }
-    val aggregator = new Aggregator[K, V, C](createCombiner, mergeValue, mergeCombiners)
+    val aggregator = new Aggregator[K, V, C](
+      self.context.clean(createCombiner),
+      self.context.clean(mergeValue),
+      self.context.clean(mergeCombiners))
     if (self.partitioner == Some(partitioner)) {
       self.mapPartitions(iter => {
         val context = TaskContext.get()
@@ -120,11 +124,11 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
   def aggregateByKey[U: ClassTag](zeroValue: U, partitioner: Partitioner)(seqOp: (U, V) => U,
       combOp: (U, U) => U): RDD[(K, U)] = {
     // Serialize the zero value to a byte array so that we can get a new clone of it on each key
-    val zeroBuffer = SparkEnv.get.closureSerializer.newInstance().serialize(zeroValue)
+    val zeroBuffer = SparkEnv.get.serializer.newInstance().serialize(zeroValue)
     val zeroArray = new Array[Byte](zeroBuffer.limit)
     zeroBuffer.get(zeroArray)
 
-    lazy val cachedSerializer = SparkEnv.get.closureSerializer.newInstance()
+    lazy val cachedSerializer = SparkEnv.get.serializer.newInstance()
     val createZero = () => cachedSerializer.deserialize[U](ByteBuffer.wrap(zeroArray))
 
     combineByKey[U]((v: V) => seqOp(createZero(), v), seqOp, combOp, partitioner)
@@ -165,12 +169,12 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    */
   def foldByKey(zeroValue: V, partitioner: Partitioner)(func: (V, V) => V): RDD[(K, V)] = {
     // Serialize the zero value to a byte array so that we can get a new clone of it on each key
-    val zeroBuffer = SparkEnv.get.closureSerializer.newInstance().serialize(zeroValue)
+    val zeroBuffer = SparkEnv.get.serializer.newInstance().serialize(zeroValue)
     val zeroArray = new Array[Byte](zeroBuffer.limit)
     zeroBuffer.get(zeroArray)
 
     // When deserializing, use a lazy val to create just one instance of the serializer per task
-    lazy val cachedSerializer = SparkEnv.get.closureSerializer.newInstance()
+    lazy val cachedSerializer = SparkEnv.get.serializer.newInstance()
     val createZero = () => cachedSerializer.deserialize[V](ByteBuffer.wrap(zeroArray))
 
     combineByKey[V]((v: V) => func(createZero(), v), func, func, partitioner)
@@ -433,6 +437,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * Note: This operation may be very expensive. If you are grouping in order to perform an
    * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
    * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
+   *
+   * Note: As currently implemented, groupByKey must be able to hold all the key-value pairs for any
+   * key in memory. If a key has too many values, it can result in an [[OutOfMemoryError]].
    */
   def groupByKey(partitioner: Partitioner): RDD[(K, Iterable[V])] = {
     // groupByKey shouldn't use map side combine because map side combine does not
@@ -454,6 +461,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * Note: This operation may be very expensive. If you are grouping in order to perform an
    * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
    * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
+   *
+   * Note: As currently implemented, groupByKey must be able to hold all the key-value pairs for any
+   * key in memory. If a key has too many values, it can result in an [[OutOfMemoryError]].
    */
   def groupByKey(numPartitions: Int): RDD[(K, Iterable[V])] = {
     groupByKey(new HashPartitioner(numPartitions))
@@ -480,7 +490,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    */
   def join[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, W))] = {
     this.cogroup(other, partitioner).flatMapValues( pair =>
-      for (v <- pair._1; w <- pair._2) yield (v, w)
+      for (v <- pair._1.iterator; w <- pair._2.iterator) yield (v, w)
     )
   }
 
@@ -493,9 +503,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
   def leftOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, Option[W]))] = {
     this.cogroup(other, partitioner).flatMapValues { pair =>
       if (pair._2.isEmpty) {
-        pair._1.map(v => (v, None))
+        pair._1.iterator.map(v => (v, None))
       } else {
-        for (v <- pair._1; w <- pair._2) yield (v, Some(w))
+        for (v <- pair._1.iterator; w <- pair._2.iterator) yield (v, Some(w))
       }
     }
   }
@@ -510,9 +520,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
       : RDD[(K, (Option[V], W))] = {
     this.cogroup(other, partitioner).flatMapValues { pair =>
       if (pair._1.isEmpty) {
-        pair._2.map(w => (None, w))
+        pair._2.iterator.map(w => (None, w))
       } else {
-        for (v <- pair._1; w <- pair._2) yield (Some(v), w)
+        for (v <- pair._1.iterator; w <- pair._2.iterator) yield (Some(v), w)
       }
     }
   }
@@ -528,9 +538,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
   def fullOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner)
       : RDD[(K, (Option[V], Option[W]))] = {
     this.cogroup(other, partitioner).flatMapValues {
-      case (vs, Seq()) => vs.map(v => (Some(v), None))
-      case (Seq(), ws) => ws.map(w => (None, Some(w)))
-      case (vs, ws) => for (v <- vs; w <- ws) yield (Some(v), Some(w))
+      case (vs, Seq()) => vs.iterator.map(v => (Some(v), None))
+      case (Seq(), ws) => ws.iterator.map(w => (None, Some(w)))
+      case (vs, ws) => for (v <- vs.iterator; w <- ws.iterator) yield (Some(v), Some(w))
     }
   }
 
@@ -660,7 +670,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    */
   def mapValues[U](f: V => U): RDD[(K, U)] = {
     val cleanF = self.context.clean(f)
-    new MappedValuesRDD(self, cleanF)
+    new MapPartitionsRDD[(K, U), (K, V)](self,
+      (context, pid, iter) => iter.map { case (k, v) => (k, cleanF(v)) },
+      preservesPartitioning = true)
   }
 
   /**
@@ -669,7 +681,11 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    */
   def flatMapValues[U](f: V => TraversableOnce[U]): RDD[(K, U)] = {
     val cleanF = self.context.clean(f)
-    new FlatMappedValuesRDD(self, cleanF)
+    new MapPartitionsRDD[(K, U), (K, V)](self,
+      (context, pid, iter) => iter.flatMap { case (k, v) =>
+        cleanF(v).map(x => (k, x))
+      },
+      preservesPartitioning = true)
   }
 
   /**
@@ -955,36 +971,43 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     val outfmt = job.getOutputFormatClass
     val jobFormat = outfmt.newInstance
 
-    if (self.conf.getBoolean("spark.hadoop.validateOutputSpecs", true)) {
+    if (isOutputSpecValidationEnabled) {
       // FileOutputFormat ignores the filesystem parameter
       jobFormat.checkOutputSpecs(job)
     }
 
     val writeShard = (context: TaskContext, iter: Iterator[(K,V)]) => {
-      // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
-      // around by taking a mod. We expect that no task will be attempted 2 billion times.
-      val attemptNumber = (context.attemptId % Int.MaxValue).toInt
+      val config = wrappedConf.value
       /* "reduce task" <split #> <attempt # = spark task #> */
       val attemptId = newTaskAttemptID(jobtrackerID, stageId, isMap = false, context.partitionId,
-        attemptNumber)
-      val hadoopContext = newTaskAttemptContext(wrappedConf.value, attemptId)
+        context.attemptNumber)
+      val hadoopContext = newTaskAttemptContext(config, attemptId)
       val format = outfmt.newInstance
       format match {
-        case c: Configurable => c.setConf(wrappedConf.value)
+        case c: Configurable => c.setConf(config)
         case _ => ()
       }
       val committer = format.getOutputCommitter(hadoopContext)
       committer.setupTask(hadoopContext)
+
+      val (outputMetrics, bytesWrittenCallback) = initHadoopOutputMetrics(context)
+
       val writer = format.getRecordWriter(hadoopContext).asInstanceOf[NewRecordWriter[K,V]]
       try {
+        var recordsWritten = 0L
         while (iter.hasNext) {
           val pair = iter.next()
           writer.write(pair._1, pair._2)
+
+          // Update bytes written metric every few records
+          maybeUpdateOutputMetrics(bytesWrittenCallback, outputMetrics, recordsWritten)
+          recordsWritten += 1
         }
       } finally {
         writer.close(hadoopContext)
       }
       committer.commitTask(hadoopContext)
+      bytesWrittenCallback.foreach { fn => outputMetrics.setBytesWritten(fn()) }
       1
     } : Int
 
@@ -1005,6 +1028,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
   def saveAsHadoopDataset(conf: JobConf) {
     // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038).
     val hadoopConf = conf
+    val wrappedConf = new SerializableWritable(hadoopConf)
     val outputFormatInstance = hadoopConf.getOutputFormat
     val keyClass = hadoopConf.getOutputKeyClass
     val valueClass = hadoopConf.getOutputValueClass
@@ -1022,7 +1046,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     logDebug("Saving as hadoop file of type (" + keyClass.getSimpleName + ", " +
       valueClass.getSimpleName + ")")
 
-    if (self.conf.getBoolean("spark.hadoop.validateOutputSpecs", true)) {
+    if (isOutputSpecValidationEnabled) {
       // FileOutputFormat ignores the filesystem parameter
       val ignoredFs = FileSystem.get(hadoopConf)
       hadoopConf.getOutputFormat.checkOutputSpecs(ignoredFs, hadoopConf)
@@ -1032,27 +1056,53 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     writer.preSetup()
 
     val writeToFile = (context: TaskContext, iter: Iterator[(K, V)]) => {
+      val config = wrappedConf.value
       // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
       // around by taking a mod. We expect that no task will be attempted 2 billion times.
-      val attemptNumber = (context.attemptId % Int.MaxValue).toInt
+      val taskAttemptId = (context.taskAttemptId % Int.MaxValue).toInt
+
+      val (outputMetrics, bytesWrittenCallback) = initHadoopOutputMetrics(context)
 
-      writer.setup(context.stageId, context.partitionId, attemptNumber)
+      writer.setup(context.stageId, context.partitionId, taskAttemptId)
       writer.open()
       try {
+        var recordsWritten = 0L
         while (iter.hasNext) {
           val record = iter.next()
           writer.write(record._1.asInstanceOf[AnyRef], record._2.asInstanceOf[AnyRef])
+
+          // Update bytes written metric every few records
+          maybeUpdateOutputMetrics(bytesWrittenCallback, outputMetrics, recordsWritten)
+          recordsWritten += 1
         }
       } finally {
         writer.close()
       }
       writer.commit()
+      bytesWrittenCallback.foreach { fn => outputMetrics.setBytesWritten(fn()) }
     }
 
     self.context.runJob(self, writeToFile)
     writer.commitJob()
   }
 
+  private def initHadoopOutputMetrics(context: TaskContext): (OutputMetrics, Option[() => Long]) = {
+    val bytesWrittenCallback = SparkHadoopUtil.get.getFSBytesWrittenOnThreadCallback()
+    val outputMetrics = new OutputMetrics(DataWriteMethod.Hadoop)
+    if (bytesWrittenCallback.isDefined) {
+      context.taskMetrics.outputMetrics = Some(outputMetrics)
+    }
+    (outputMetrics, bytesWrittenCallback)
+  }
+
+  private def maybeUpdateOutputMetrics(bytesWrittenCallback: Option[() => Long],
+      outputMetrics: OutputMetrics, recordsWritten: Long): Unit = {
+    if (recordsWritten % PairRDDFunctions.RECORDS_BETWEEN_BYTES_WRITTEN_METRIC_UPDATES == 0
+        && bytesWrittenCallback.isDefined) {
+      bytesWrittenCallback.foreach { fn => outputMetrics.setBytesWritten(fn()) }
+    }
+  }
+
   /**
    * Return an RDD with the keys of each tuple.
    */
@@ -1068,4 +1118,22 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
   private[spark] def valueClass: Class[_] = vt.runtimeClass
 
   private[spark] def keyOrdering: Option[Ordering[K]] = Option(ord)
+
+  // Note: this needs to be a function instead of a 'val' so that the disableOutputSpecValidation
+  // setting can take effect:
+  private def isOutputSpecValidationEnabled: Boolean = {
+    val validationDisabled = PairRDDFunctions.disableOutputSpecValidation.value
+    val enabledInConf = self.conf.getBoolean("spark.hadoop.validateOutputSpecs", true)
+    enabledInConf && !validationDisabled
+  }
+}
+
+private[spark] object PairRDDFunctions {
+  val RECORDS_BETWEEN_BYTES_WRITTEN_METRIC_UPDATES = 256
+
+  /**
+   * Allows for the `spark.hadoop.validateOutputSpecs` checks to be disabled on a case-by-case
+   * basis; see SPARK-4835 for more details.
+   */
+  val disableOutputSpecValidation: DynamicVariable[Boolean] = new DynamicVariable[Boolean](false)
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
index 66c71bf7e8bb5..f12d0cffaba34 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
@@ -48,7 +48,7 @@ private[spark] class ParallelCollectionPartition[T: ClassTag](
   override def index: Int = slice
 
   @throws(classOf[IOException])
-  private def writeObject(out: ObjectOutputStream): Unit = {
+  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
 
     val sfactory = SparkEnv.get.serializer
 
@@ -67,7 +67,7 @@ private[spark] class ParallelCollectionPartition[T: ClassTag](
   }
 
   @throws(classOf[IOException])
-  private def readObject(in: ObjectInputStream): Unit = {
+  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
 
     val sfactory = SparkEnv.get.serializer
     sfactory match {
@@ -111,7 +111,8 @@ private object ParallelCollectionRDD {
   /**
    * Slice a collection into numSlices sub-collections. One extra thing we do here is to treat Range
    * collections specially, encoding the slices as other Ranges to minimize memory cost. This makes
-   * it efficient to run Spark over RDDs representing large sets of numbers.
+   * it efficient to run Spark over RDDs representing large sets of numbers. And if the collection
+   * is an inclusive Range, we use inclusive range for the last slice.
    */
   def slice[T: ClassTag](seq: Seq[T], numSlices: Int): Seq[Seq[T]] = {
     if (numSlices < 1) {
@@ -127,19 +128,15 @@ private object ParallelCollectionRDD {
       })
     }
     seq match {
-      case r: Range.Inclusive => {
-        val sign = if (r.step < 0) {
-          -1
-        } else {
-          1
-        }
-        slice(new Range(
-          r.start, r.end + sign, r.step).asInstanceOf[Seq[T]], numSlices)
-      }
       case r: Range => {
-        positions(r.length, numSlices).map({
-          case (start, end) =>
+        positions(r.length, numSlices).zipWithIndex.map({ case ((start, end), index) =>
+          // If the range is inclusive, use inclusive range for the last slice
+          if (r.isInclusive && index == numSlices - 1) {
+            new Range.Inclusive(r.start + start * r.step, r.end, r.step)
+          }
+          else {
             new Range(r.start + start * r.step, r.start + end * r.step, r.step)
+          }
         }).toSeq.asInstanceOf[Seq[Seq[T]]]
       }
       case nr: NumericRange[_] => {
diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
index 0c2cd7a24783b..92b0641d0fb6e 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
@@ -22,6 +22,7 @@ import java.io.{IOException, ObjectOutputStream}
 import scala.reflect.ClassTag
 
 import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext}
+import org.apache.spark.util.Utils
 
 /**
  * Class representing partitions of PartitionerAwareUnionRDD, which maintains the list of
@@ -38,7 +39,7 @@ class PartitionerAwareUnionRDDPartition(
   override def hashCode(): Int = idx
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     // Update the reference to parent partition at the time of task serialization
     parents = rdds.map(_.partitions(index)).toArray
     oos.defaultWriteObject()
diff --git a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
index 56ac7a69be0d3..ed79032893d33 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
@@ -63,7 +63,7 @@ private[spark] class PipedRDD[T: ClassTag](
 
   /**
    * A FilenameFilter that accepts anything that isn't equal to the name passed in.
-   * @param name of file or directory to leave out
+   * @param filterName of file or directory to leave out
    */
   class NotEqualsFileNameFilter(filterName: String) extends FilenameFilter {
     def accept(dir: File, name: String): Boolean = {
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index b7f125d01dfaf..fe55a5124f3b6 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -17,33 +17,31 @@
 
 package org.apache.spark.rdd
 
-import java.util.{Properties, Random}
+import java.util.Random
 
 import scala.collection.{mutable, Map}
 import scala.collection.mutable.ArrayBuffer
+import scala.language.implicitConversions
 import scala.reflect.{classTag, ClassTag}
 
 import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus
-import org.apache.hadoop.io.BytesWritable
+import org.apache.hadoop.io.{Writable, BytesWritable, NullWritable, Text}
 import org.apache.hadoop.io.compress.CompressionCodec
-import org.apache.hadoop.io.NullWritable
-import org.apache.hadoop.io.Text
 import org.apache.hadoop.mapred.TextOutputFormat
 
 import org.apache.spark._
 import org.apache.spark.Partitioner._
-import org.apache.spark.SparkContext._
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.api.java.JavaRDD
-import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.partial.BoundedDouble
 import org.apache.spark.partial.CountEvaluator
 import org.apache.spark.partial.GroupedCountEvaluator
 import org.apache.spark.partial.PartialResult
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.util.{BoundedPriorityQueue, Utils, CallSite}
+import org.apache.spark.util.{BoundedPriorityQueue, Utils}
 import org.apache.spark.util.collection.OpenHashMap
-import org.apache.spark.util.random.{BernoulliSampler, PoissonSampler, SamplingUtils}
+import org.apache.spark.util.random.{BernoulliSampler, PoissonSampler, BernoulliCellSampler,
+  SamplingUtils}
 
 /**
  * A Resilient Distributed Dataset (RDD), the basic abstraction in Spark. Represents an immutable,
@@ -55,8 +53,8 @@ import org.apache.spark.util.random.{BernoulliSampler, PoissonSampler, SamplingU
  * Doubles; and
  * [[org.apache.spark.rdd.SequenceFileRDDFunctions]] contains operations available on RDDs that
  * can be saved as SequenceFiles.
- * These operations are automatically available on any RDD of the right type (e.g. RDD[(Int, Int)]
- * through implicit conversions when you `import org.apache.spark.SparkContext._`.
+ * All operations are automatically available on any RDD of the right type (e.g. RDD[(Int, Int)]
+ * through implicit.
  *
  * Internally, each RDD is characterized by five main properties:
  *
@@ -74,10 +72,27 @@ import org.apache.spark.util.random.{BernoulliSampler, PoissonSampler, SamplingU
  * on RDD internals.
  */
 abstract class RDD[T: ClassTag](
-    @transient private var sc: SparkContext,
+    @transient private var _sc: SparkContext,
     @transient private var deps: Seq[Dependency[_]]
   ) extends Serializable with Logging {
 
+  if (classOf[RDD[_]].isAssignableFrom(elementClassTag.runtimeClass)) {
+    // This is a warning instead of an exception in order to avoid breaking user programs that
+    // might have defined nested RDDs without running jobs with them.
+    logWarning("Spark does not support nested RDDs (see SPARK-5063)")
+  }
+
+  private def sc: SparkContext = {
+    if (_sc == null) {
+      throw new SparkException(
+        "RDD transformations and actions can only be invoked by the driver, not inside of other " +
+        "transformations; for example, rdd1.map(x => rdd2.values.count() * x) is invalid because " +
+        "the values transformation and count action cannot be performed inside of the rdd1.map " +
+        "transformation. For more information, see SPARK-5063.")
+    }
+    _sc
+  }
+
   /** Construct an RDD with just a one-to-one dependency on one parent */
   def this(@transient oneParent: RDD[_]) =
     this(oneParent.context , List(new OneToOneDependency(oneParent)))
@@ -267,19 +282,30 @@ abstract class RDD[T: ClassTag](
   /**
    * Return a new RDD by applying a function to all elements of this RDD.
    */
-  def map[U: ClassTag](f: T => U): RDD[U] = new MappedRDD(this, sc.clean(f))
+  def map[U: ClassTag](f: T => U): RDD[U] = {
+    val cleanF = sc.clean(f)
+    new MapPartitionsRDD[U, T](this, (context, pid, iter) => iter.map(cleanF))
+  }
 
   /**
    *  Return a new RDD by first applying a function to all elements of this
    *  RDD, and then flattening the results.
    */
-  def flatMap[U: ClassTag](f: T => TraversableOnce[U]): RDD[U] =
-    new FlatMappedRDD(this, sc.clean(f))
+  def flatMap[U: ClassTag](f: T => TraversableOnce[U]): RDD[U] = {
+    val cleanF = sc.clean(f)
+    new MapPartitionsRDD[U, T](this, (context, pid, iter) => iter.flatMap(cleanF))
+  }
 
   /**
    * Return a new RDD containing only the elements that satisfy a predicate.
    */
-  def filter(f: T => Boolean): RDD[T] = new FilteredRDD(this, sc.clean(f))
+  def filter(f: T => Boolean): RDD[T] = {
+    val cleanF = sc.clean(f)
+    new MapPartitionsRDD[T, T](
+      this,
+      (context, pid, iter) => iter.filter(cleanF),
+      preservesPartitioning = true)
+  }
 
   /**
    * Return a new RDD containing the distinct elements in this RDD.
@@ -375,7 +401,8 @@ abstract class RDD[T: ClassTag](
     val sum = weights.sum
     val normalizedCumWeights = weights.map(_ / sum).scanLeft(0.0d)(_ + _)
     normalizedCumWeights.sliding(2).map { x =>
-      new PartitionwiseSampledRDD[T, T](this, new BernoulliSampler[T](x(0), x(1)), true, seed)
+      new PartitionwiseSampledRDD[T, T](
+        this, new BernoulliCellSampler[T](x(0), x(1)), true, seed)
     }.toArray
   }
 
@@ -499,7 +526,9 @@ abstract class RDD[T: ClassTag](
   /**
    * Return an RDD created by coalescing all elements within each partition into an array.
    */
-  def glom(): RDD[Array[T]] = new GlommedRDD(this)
+  def glom(): RDD[Array[T]] = {
+    new MapPartitionsRDD[Array[T], T](this, (context, pid, iter) => Iterator(iter.toArray))
+  }
 
   /**
    * Return the Cartesian product of this RDD and another one, that is, the RDD of all pairs of
@@ -571,8 +600,8 @@ abstract class RDD[T: ClassTag](
    *                        print line function (like out.println()) as the 2nd parameter.
    *                        An example of pipe the RDD data of groupBy() in a streaming way,
    *                        instead of constructing a huge String to concat all the elements:
-   *                        def printRDDElement(record:(String, Seq[String]), f:String=>Unit) =
-   *                          for (e <- record._2){f(e)}
+   *                        def printRDDElement(record:(String, Seq[String]), f:String=&gt;Unit) =
+   *                          for (e &lt;- record._2){f(e)}
    * @param separateWorkingDir Use separate working directories for each task.
    * @return the result RDD
    */
@@ -808,7 +837,7 @@ abstract class RDD[T: ClassTag](
    * Return an RDD with the elements from `this` that are not in `other`.
    *
    * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
-   * RDD will be <= us.
+   * RDD will be &lt;= us.
    */
   def subtract(other: RDD[T]): RDD[T] =
     subtract(other, partitioner.getOrElse(new HashPartitioner(partitions.size)))
@@ -867,6 +896,38 @@ abstract class RDD[T: ClassTag](
     jobResult.getOrElse(throw new UnsupportedOperationException("empty collection"))
   }
 
+  /**
+   * Reduces the elements of this RDD in a multi-level tree pattern.
+   *
+   * @param depth suggested depth of the tree (default: 2)
+   * @see [[org.apache.spark.rdd.RDD#reduce]]
+   */
+  def treeReduce(f: (T, T) => T, depth: Int = 2): T = {
+    require(depth >= 1, s"Depth must be greater than or equal to 1 but got $depth.")
+    val cleanF = context.clean(f)
+    val reducePartition: Iterator[T] => Option[T] = iter => {
+      if (iter.hasNext) {
+        Some(iter.reduceLeft(cleanF))
+      } else {
+        None
+      }
+    }
+    val partiallyReduced = mapPartitions(it => Iterator(reducePartition(it)))
+    val op: (Option[T], Option[T]) => Option[T] = (c, x) => {
+      if (c.isDefined && x.isDefined) {
+        Some(cleanF(c.get, x.get))
+      } else if (c.isDefined) {
+        c
+      } else if (x.isDefined) {
+        x
+      } else {
+        None
+      }
+    }
+    partiallyReduced.treeAggregate(Option.empty[T])(op, op, depth)
+      .getOrElse(throw new UnsupportedOperationException("empty collection"))
+  }
+
   /**
    * Aggregate the elements of each partition, and then the results for all the partitions, using a
    * given associative function and a neutral "zero value". The function op(t1, t2) is allowed to
@@ -902,6 +963,37 @@ abstract class RDD[T: ClassTag](
     jobResult
   }
 
+  /**
+   * Aggregates the elements of this RDD in a multi-level tree pattern.
+   *
+   * @param depth suggested depth of the tree (default: 2)
+   * @see [[org.apache.spark.rdd.RDD#aggregate]]
+   */
+  def treeAggregate[U: ClassTag](zeroValue: U)(
+      seqOp: (U, T) => U,
+      combOp: (U, U) => U,
+      depth: Int = 2): U = {
+    require(depth >= 1, s"Depth must be greater than or equal to 1 but got $depth.")
+    if (partitions.size == 0) {
+      return Utils.clone(zeroValue, context.env.closureSerializer.newInstance())
+    }
+    val cleanSeqOp = context.clean(seqOp)
+    val cleanCombOp = context.clean(combOp)
+    val aggregatePartition = (it: Iterator[T]) => it.aggregate(zeroValue)(cleanSeqOp, cleanCombOp)
+    var partiallyAggregated = mapPartitions(it => Iterator(aggregatePartition(it)))
+    var numPartitions = partiallyAggregated.partitions.size
+    val scale = math.max(math.ceil(math.pow(numPartitions, 1.0 / depth)).toInt, 2)
+    // If creating an extra level doesn't help reduce the wall-clock time, we stop tree aggregation.
+    while (numPartitions > scale + numPartitions / scale) {
+      numPartitions /= scale
+      val curNumPartitions = numPartitions
+      partiallyAggregated = partiallyAggregated.mapPartitionsWithIndex { (i, iter) =>
+        iter.map((i % curNumPartitions, _))
+      }.reduceByKey(new HashPartitioner(curNumPartitions), cleanCombOp).values
+    }
+    partiallyAggregated.reduce(cleanCombOp)
+  }
+
   /**
    * Return the number of elements in the RDD.
    */
@@ -931,7 +1023,7 @@ abstract class RDD[T: ClassTag](
    *
    * Note that this method should only be used if the resulting map is expected to be small, as
    * the whole thing is loaded into the driver's memory.
-   * To handle very large results, consider using rdd.map(x => (x, 1L)).reduceByKey(_ + _), which
+   * To handle very large results, consider using rdd.map(x =&gt; (x, 1L)).reduceByKey(_ + _), which
    * returns an RDD[T, Long] instead of a map.
    */
   def countByValue()(implicit ord: Ordering[T] = null): Map[T, Long] = {
@@ -969,7 +1061,7 @@ abstract class RDD[T: ClassTag](
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
    * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
    *
-   * The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting a nonzero `sp > p`
+   * The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting a nonzero `sp &gt; p`
    * would trigger sparse representation of registers, which may reduce the memory consumption
    * and increase accuracy when the cardinality is small.
    *
@@ -1094,7 +1186,7 @@ abstract class RDD[T: ClassTag](
   }
 
   /**
-   * Returns the top K (largest) elements from this RDD as defined by the specified
+   * Returns the top k (largest) elements from this RDD as defined by the specified
    * implicit Ordering[T]. This does the opposite of [[takeOrdered]]. For example:
    * {{{
    *   sc.parallelize(Seq(10, 4, 2, 12, 3)).top(1)
@@ -1104,14 +1196,14 @@ abstract class RDD[T: ClassTag](
    *   // returns Array(6, 5)
    * }}}
    *
-   * @param num the number of top elements to return
+   * @param num k, the number of top elements to return
    * @param ord the implicit ordering for T
    * @return an array of top elements
    */
   def top(num: Int)(implicit ord: Ordering[T]): Array[T] = takeOrdered(num)(ord.reverse)
 
   /**
-   * Returns the first K (smallest) elements from this RDD as defined by the specified
+   * Returns the first k (smallest) elements from this RDD as defined by the specified
    * implicit Ordering[T] and maintains the ordering. This does the opposite of [[top]].
    * For example:
    * {{{
@@ -1122,7 +1214,7 @@ abstract class RDD[T: ClassTag](
    *   // returns Array(2, 3)
    * }}}
    *
-   * @param num the number of top elements to return
+   * @param num k, the number of elements to return
    * @param ord the implicit ordering for T
    * @return an array of top elements
    */
@@ -1130,15 +1222,20 @@ abstract class RDD[T: ClassTag](
     if (num == 0) {
       Array.empty
     } else {
-      mapPartitions { items =>
+      val mapRDDs = mapPartitions { items =>
         // Priority keeps the largest elements, so let's reverse the ordering.
         val queue = new BoundedPriorityQueue[T](num)(ord.reverse)
         queue ++= util.collection.Utils.takeOrdered(items, num)(ord)
         Iterator.single(queue)
-      }.reduce { (queue1, queue2) =>
-        queue1 ++= queue2
-        queue1
-      }.toArray.sorted(ord)
+      }
+      if (mapRDDs.partitions.size == 0) {
+        Array.empty
+      } else {
+        mapRDDs.reduce { (queue1, queue2) =>
+          queue1 ++= queue2
+          queue1
+        }.toArray.sorted(ord)
+      }
     }
   }
 
@@ -1154,11 +1251,36 @@ abstract class RDD[T: ClassTag](
    * */
   def min()(implicit ord: Ordering[T]): T = this.reduce(ord.min)
 
+  /**
+   * @return true if and only if the RDD contains no elements at all. Note that an RDD
+   *         may be empty even when it has at least 1 partition.
+   */
+  def isEmpty(): Boolean = partitions.length == 0 || take(1).length == 0
+
   /**
    * Save this RDD as a text file, using string representations of elements.
    */
   def saveAsTextFile(path: String) {
-    this.map(x => (NullWritable.get(), new Text(x.toString)))
+    // https://issues.apache.org/jira/browse/SPARK-2075
+    //
+    // NullWritable is a `Comparable` in Hadoop 1.+, so the compiler cannot find an implicit
+    // Ordering for it and will use the default `null`. However, it's a `Comparable[NullWritable]`
+    // in Hadoop 2.+, so the compiler will call the implicit `Ordering.ordered` method to create an
+    // Ordering for `NullWritable`. That's why the compiler will generate different anonymous
+    // classes for `saveAsTextFile` in Hadoop 1.+ and Hadoop 2.+.
+    //
+    // Therefore, here we provide an explicit Ordering `null` to make sure the compiler generate
+    // same bytecodes for `saveAsTextFile`.
+    val nullWritableClassTag = implicitly[ClassTag[NullWritable]]
+    val textClassTag = implicitly[ClassTag[Text]]
+    val r = this.mapPartitions { iter =>
+      val text = new Text()
+      iter.map { x =>
+        text.set(x.toString)
+        (NullWritable.get(), text)
+      }
+    }
+    RDD.rddToPairRDDFunctions(r)(nullWritableClassTag, textClassTag, null)
       .saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path)
   }
 
@@ -1166,7 +1288,17 @@ abstract class RDD[T: ClassTag](
    * Save this RDD as a compressed text file, using string representations of elements.
    */
   def saveAsTextFile(path: String, codec: Class[_ <: CompressionCodec]) {
-    this.map(x => (NullWritable.get(), new Text(x.toString)))
+    // https://issues.apache.org/jira/browse/SPARK-2075
+    val nullWritableClassTag = implicitly[ClassTag[NullWritable]]
+    val textClassTag = implicitly[ClassTag[Text]]
+    val r = this.mapPartitions { iter =>
+      val text = new Text()
+      iter.map { x =>
+        text.set(x.toString)
+        (NullWritable.get(), text)
+      }
+    }
+    RDD.rddToPairRDDFunctions(r)(nullWritableClassTag, textClassTag, null)
       .saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path, codec)
   }
 
@@ -1200,7 +1332,7 @@ abstract class RDD[T: ClassTag](
    */
   def checkpoint() {
     if (context.checkpointDir.isEmpty) {
-      throw new Exception("Checkpoint directory has not been set in the SparkContext")
+      throw new SparkException("Checkpoint directory has not been set in the SparkContext")
     } else if (checkpointData.isEmpty) {
       checkpointData = Some(new RDDCheckpointData(this))
       checkpointData.get.markForCheckpoint()
@@ -1247,7 +1379,7 @@ abstract class RDD[T: ClassTag](
 
   /**
    * Private API for changing an RDD's ClassTag.
-   * Used for internal Java <-> Scala API compatibility.
+   * Used for internal Java-Scala API compatibility.
    */
   private[spark] def retag(cls: Class[T]): RDD[T] = {
     val classTag: ClassTag[T] = ClassTag.apply(cls)
@@ -1256,7 +1388,7 @@ abstract class RDD[T: ClassTag](
 
   /**
    * Private API for changing an RDD's ClassTag.
-   * Used for internal Java <-> Scala API compatibility.
+   * Used for internal Java-Scala API compatibility.
    */
   private[spark] def retag(implicit classTag: ClassTag[T]): RDD[T] = {
     this.mapPartitions(identity, preservesPartitioning = true)(classTag)
@@ -1307,7 +1439,7 @@ abstract class RDD[T: ClassTag](
     def debugSelf (rdd: RDD[_]): Seq[String] = {
       import Utils.bytesToString
 
-      val persistence = storageLevel.description
+      val persistence = if (storageLevel != StorageLevel.NONE) storageLevel.description else ""
       val storageInfo = rdd.context.getRDDStorageInfo.filter(_.id == rdd.id).map(info =>
         "    CachedPartitions: %d; MemorySize: %s; TachyonSize: %s; DiskSize: %s".format(
           info.numCachedPartitions, bytesToString(info.memSize),
@@ -1381,3 +1513,52 @@ abstract class RDD[T: ClassTag](
     new JavaRDD(this)(elementClassTag)
   }
 }
+
+
+/**
+ * Defines implicit functions that provide extra functionalities on RDDs of specific types.
+ *
+ * For example, [[RDD.rddToPairRDDFunctions]] converts an RDD into a [[PairRDDFunctions]] for
+ * key-value-pair RDDs, and enabling extra functionalities such as [[PairRDDFunctions.reduceByKey]].
+ */
+object RDD {
+
+  // The following implicit functions were in SparkContext before 1.3 and users had to
+  // `import SparkContext._` to enable them. Now we move them here to make the compiler find
+  // them automatically. However, we still keep the old functions in SparkContext for backward
+  // compatibility and forward to the following functions directly.
+
+  implicit def rddToPairRDDFunctions[K, V](rdd: RDD[(K, V)])
+    (implicit kt: ClassTag[K], vt: ClassTag[V], ord: Ordering[K] = null): PairRDDFunctions[K, V] = {
+    new PairRDDFunctions(rdd)
+  }
+
+  implicit def rddToAsyncRDDActions[T: ClassTag](rdd: RDD[T]): AsyncRDDActions[T] = {
+    new AsyncRDDActions(rdd)
+  }
+
+  implicit def rddToSequenceFileRDDFunctions[K, V](rdd: RDD[(K, V)])
+      (implicit kt: ClassTag[K], vt: ClassTag[V],
+                keyWritableFactory: WritableFactory[K],
+                valueWritableFactory: WritableFactory[V])
+    : SequenceFileRDDFunctions[K, V] = {
+    implicit val keyConverter = keyWritableFactory.convert
+    implicit val valueConverter = valueWritableFactory.convert
+    new SequenceFileRDDFunctions(rdd,
+      keyWritableFactory.writableClass(kt), valueWritableFactory.writableClass(vt))
+  }
+
+  implicit def rddToOrderedRDDFunctions[K : Ordering : ClassTag, V: ClassTag](rdd: RDD[(K, V)])
+    : OrderedRDDFunctions[K, V, (K, V)] = {
+    new OrderedRDDFunctions[K, V, (K, V)](rdd)
+  }
+
+  implicit def doubleRDDToDoubleRDDFunctions(rdd: RDD[Double]): DoubleRDDFunctions = {
+    new DoubleRDDFunctions(rdd)
+  }
+
+  implicit def numericRDDToDoubleRDDFunctions[T](rdd: RDD[T])(implicit num: Numeric[T])
+    : DoubleRDDFunctions = {
+    new DoubleRDDFunctions(rdd.map(x => num.toDouble(x)))
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
index b097c30f8c231..9e8cee5331cf8 100644
--- a/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
@@ -21,8 +21,7 @@ import java.util.Random
 
 import scala.reflect.ClassTag
 
-import cern.jet.random.Poisson
-import cern.jet.random.engine.DRand
+import org.apache.commons.math3.distribution.PoissonDistribution
 
 import org.apache.spark.{Partition, TaskContext}
 
@@ -53,9 +52,11 @@ private[spark] class SampledRDD[T: ClassTag](
     if (withReplacement) {
       // For large datasets, the expected number of occurrences of each element in a sample with
       // replacement is Poisson(frac). We use that to get a count for each element.
-      val poisson = new Poisson(frac, new DRand(split.seed))
+      val poisson = new PoissonDistribution(frac)
+      poisson.reseedRandomGenerator(split.seed)
+
       firstParent[T].iterator(split.prev, context).flatMap { element =>
-        val count = poisson.nextInt()
+        val count = poisson.sample()
         if (count == 0) {
           Iterator.empty  // Avoid object allocation when we return 0 items, which is quite often
         } else {
diff --git a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
index 9a1efc83cbe6a..059f8963691f0 100644
--- a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
@@ -24,20 +24,41 @@ import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapred.SequenceFileOutputFormat
 
 import org.apache.spark.Logging
-import org.apache.spark.SparkContext._
 
 /**
  * Extra functions available on RDDs of (key, value) pairs to create a Hadoop SequenceFile,
  * through an implicit conversion. Note that this can't be part of PairRDDFunctions because
  * we need more implicit parameters to convert our keys and values to Writable.
  *
- * Import `org.apache.spark.SparkContext._` at the top of their program to use these functions.
  */
 class SequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable : ClassTag](
-    self: RDD[(K, V)])
+    self: RDD[(K, V)],
+    _keyWritableClass: Class[_ <: Writable],
+    _valueWritableClass: Class[_ <: Writable])
   extends Logging
   with Serializable {
 
+  @deprecated("It's used to provide backward compatibility for pre 1.3.0.", "1.3.0")
+  def this(self: RDD[(K, V)]) {
+    this(self, null, null)
+  }
+
+  private val keyWritableClass =
+    if (_keyWritableClass == null) {
+      // pre 1.3.0, we need to use Reflection to get the Writable class
+      getWritableClass[K]()
+    } else {
+      _keyWritableClass
+    }
+
+  private val valueWritableClass =
+    if (_valueWritableClass == null) {
+      // pre 1.3.0, we need to use Reflection to get the Writable class
+      getWritableClass[V]()
+    } else {
+      _valueWritableClass
+    }
+
   private def getWritableClass[T <% Writable: ClassTag](): Class[_ <: Writable] = {
     val c = {
       if (classOf[Writable].isAssignableFrom(classTag[T].runtimeClass)) {
@@ -56,6 +77,7 @@ class SequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable : ClassTag
     c.asInstanceOf[Class[_ <: Writable]]
   }
 
+
   /**
    * Output the RDD as a Hadoop SequenceFile using the Writable types we infer from the RDD's key
    * and value types. If the key or value are Writable, then we use their classes directly;
@@ -66,26 +88,28 @@ class SequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable : ClassTag
   def saveAsSequenceFile(path: String, codec: Option[Class[_ <: CompressionCodec]] = None) {
     def anyToWritable[U <% Writable](u: U): Writable = u
 
-    val keyClass = getWritableClass[K]
-    val valueClass = getWritableClass[V]
-    val convertKey = !classOf[Writable].isAssignableFrom(self.keyClass)
-    val convertValue = !classOf[Writable].isAssignableFrom(self.valueClass)
+    // TODO We cannot force the return type of `anyToWritable` be same as keyWritableClass and
+    // valueWritableClass at the compile time. To implement that, we need to add type parameters to
+    // SequenceFileRDDFunctions. however, SequenceFileRDDFunctions is a public class so it will be a
+    // breaking change.
+    val convertKey = self.keyClass != keyWritableClass
+    val convertValue = self.valueClass != valueWritableClass
 
-    logInfo("Saving as sequence file of type (" + keyClass.getSimpleName + "," +
-      valueClass.getSimpleName + ")" )
+    logInfo("Saving as sequence file of type (" + keyWritableClass.getSimpleName + "," +
+      valueWritableClass.getSimpleName + ")" )
     val format = classOf[SequenceFileOutputFormat[Writable, Writable]]
     val jobConf = new JobConf(self.context.hadoopConfiguration)
     if (!convertKey && !convertValue) {
-      self.saveAsHadoopFile(path, keyClass, valueClass, format, jobConf, codec)
+      self.saveAsHadoopFile(path, keyWritableClass, valueWritableClass, format, jobConf, codec)
     } else if (!convertKey && convertValue) {
       self.map(x => (x._1,anyToWritable(x._2))).saveAsHadoopFile(
-        path, keyClass, valueClass, format, jobConf, codec)
+        path, keyWritableClass, valueWritableClass, format, jobConf, codec)
     } else if (convertKey && !convertValue) {
       self.map(x => (anyToWritable(x._1),x._2)).saveAsHadoopFile(
-        path, keyClass, valueClass, format, jobConf, codec)
+        path, keyWritableClass, valueWritableClass, format, jobConf, codec)
     } else if (convertKey && convertValue) {
       self.map(x => (anyToWritable(x._1),anyToWritable(x._2))).saveAsHadoopFile(
-        path, keyClass, valueClass, format, jobConf, codec)
+        path, keyWritableClass, valueWritableClass, format, jobConf, codec)
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
index 0c97eb0aaa51f..aece683ff3199 100644
--- a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
@@ -24,6 +24,7 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext}
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.util.Utils
 
 /**
  * Partition for UnionRDD.
@@ -48,7 +49,7 @@ private[spark] class UnionPartition[T: ClassTag](
   override val index: Int = idx
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     // Update the reference to parent split at the time of task serialization
     parentPartition = rdd.partitions(parentRddPartitionIndex)
     oos.defaultWriteObject()
diff --git a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
index f3d30f6c9b32f..95b2dd954e9f4 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
@@ -22,6 +22,7 @@ import java.io.{IOException, ObjectOutputStream}
 import scala.reflect.ClassTag
 
 import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext}
+import org.apache.spark.util.Utils
 
 private[spark] class ZippedPartitionsPartition(
     idx: Int,
@@ -34,7 +35,7 @@ private[spark] class ZippedPartitionsPartition(
   def partitions = partitionValues
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     // Update the reference to parent split at the time of task serialization
     partitionValues = rdds.map(rdd => rdd.partitions(idx))
     oos.defaultWriteObject()
@@ -76,7 +77,7 @@ private[spark] abstract class ZippedPartitionsBaseRDD[V: ClassTag](
 
 private[spark] class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag](
     sc: SparkContext,
-    f: (Iterator[A], Iterator[B]) => Iterator[V],
+    var f: (Iterator[A], Iterator[B]) => Iterator[V],
     var rdd1: RDD[A],
     var rdd2: RDD[B],
     preservesPartitioning: Boolean = false)
@@ -91,13 +92,14 @@ private[spark] class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag]
     super.clearDependencies()
     rdd1 = null
     rdd2 = null
+    f = null
   }
 }
 
 private[spark] class ZippedPartitionsRDD3
   [A: ClassTag, B: ClassTag, C: ClassTag, V: ClassTag](
     sc: SparkContext,
-    f: (Iterator[A], Iterator[B], Iterator[C]) => Iterator[V],
+    var f: (Iterator[A], Iterator[B], Iterator[C]) => Iterator[V],
     var rdd1: RDD[A],
     var rdd2: RDD[B],
     var rdd3: RDD[C],
@@ -116,13 +118,14 @@ private[spark] class ZippedPartitionsRDD3
     rdd1 = null
     rdd2 = null
     rdd3 = null
+    f = null
   }
 }
 
 private[spark] class ZippedPartitionsRDD4
   [A: ClassTag, B: ClassTag, C: ClassTag, D:ClassTag, V: ClassTag](
     sc: SparkContext,
-    f: (Iterator[A], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V],
+    var f: (Iterator[A], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V],
     var rdd1: RDD[A],
     var rdd2: RDD[B],
     var rdd3: RDD[C],
@@ -144,5 +147,6 @@ private[spark] class ZippedPartitionsRDD4
     rdd2 = null
     rdd3 = null
     rdd4 = null
+    f = null
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala
index e2c301603b4a5..8c43a559409f2 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala
@@ -39,21 +39,24 @@ class ZippedWithIndexRDDPartition(val prev: Partition, val startIndex: Long)
 private[spark]
 class ZippedWithIndexRDD[T: ClassTag](@transient prev: RDD[T]) extends RDD[(T, Long)](prev) {
 
-  override def getPartitions: Array[Partition] = {
+  /** The start index of each partition. */
+  @transient private val startIndices: Array[Long] = {
     val n = prev.partitions.size
-    val startIndices: Array[Long] =
-      if (n == 0) {
-        Array[Long]()
-      } else if (n == 1) {
-        Array(0L)
-      } else {
-        prev.context.runJob(
-          prev,
-          Utils.getIteratorSize _,
-          0 until n - 1, // do not need to count the last partition
-          false
-        ).scanLeft(0L)(_ + _)
-      }
+    if (n == 0) {
+      Array[Long]()
+    } else if (n == 1) {
+      Array(0L)
+    } else {
+      prev.context.runJob(
+        prev,
+        Utils.getIteratorSize _,
+        0 until n - 1, // do not need to count the last partition
+        allowLocal = false
+      ).scanLeft(0L)(_ + _)
+    }
+  }
+
+  override def getPartitions: Array[Partition] = {
     firstParent[T].partitions.map(x => new ZippedWithIndexRDDPartition(x, startIndices(x.index)))
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index f81fa6d8089fc..1cfe98673773a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -19,6 +19,7 @@ package org.apache.spark.scheduler
 
 import java.io.NotSerializableException
 import java.util.Properties
+import java.util.concurrent.{TimeUnit, Executors}
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map, Stack}
@@ -28,8 +29,6 @@ import scala.language.postfixOps
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
-import akka.actor._
-import akka.actor.SupervisorStrategy.Stop
 import akka.pattern.ask
 import akka.util.Timeout
 
@@ -39,7 +38,7 @@ import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.partial.{ApproximateActionListener, ApproximateEvaluator, PartialResult}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage._
-import org.apache.spark.util.{CallSite, SystemClock, Clock, Utils}
+import org.apache.spark.util.{CallSite, EventLoop, SystemClock, Clock, Utils}
 import org.apache.spark.storage.BlockManagerMessages.BlockManagerHeartbeat
 
 /**
@@ -67,8 +66,6 @@ class DAGScheduler(
     clock: Clock = SystemClock)
   extends Logging {
 
-  import DAGScheduler._
-
   def this(sc: SparkContext, taskScheduler: TaskScheduler) = {
     this(
       sc,
@@ -112,38 +109,31 @@ class DAGScheduler(
   //       stray messages to detect.
   private val failedEpoch = new HashMap[String, Long]
 
-  private val dagSchedulerActorSupervisor =
-    env.actorSystem.actorOf(Props(new DAGSchedulerActorSupervisor(this)))
-
   // A closure serializer that we reuse.
   // This is only safe because DAGScheduler runs in a single thread.
   private val closureSerializer = SparkEnv.get.closureSerializer.newInstance()
 
-  private[scheduler] var eventProcessActor: ActorRef = _
 
   /** If enabled, we may run certain actions like take() and first() locally. */
   private val localExecutionEnabled = sc.getConf.getBoolean("spark.localExecution.enabled", false)
 
-  private def initializeEventProcessActor() {
-    // blocking the thread until supervisor is started, which ensures eventProcessActor is
-    // not null before any job is submitted
-    implicit val timeout = Timeout(30 seconds)
-    val initEventActorReply =
-      dagSchedulerActorSupervisor ? Props(new DAGSchedulerEventProcessActor(this))
-    eventProcessActor = Await.result(initEventActorReply, timeout.duration).
-      asInstanceOf[ActorRef]
-  }
+  /** If enabled, FetchFailed will not cause stage retry, in order to surface the problem. */
+  private val disallowStageRetryForTest = sc.getConf.getBoolean("spark.test.noStageRetry", false)
 
-  initializeEventProcessActor()
+  private val messageScheduler =
+    Executors.newScheduledThreadPool(1, Utils.namedThreadFactory("dag-scheduler-message"))
+
+  private[scheduler] val eventProcessLoop = new DAGSchedulerEventProcessLoop(this)
+  taskScheduler.setDAGScheduler(this)
 
   // Called by TaskScheduler to report task's starting.
   def taskStarted(task: Task[_], taskInfo: TaskInfo) {
-    eventProcessActor ! BeginEvent(task, taskInfo)
+    eventProcessLoop.post(BeginEvent(task, taskInfo))
   }
 
   // Called to report that a task has completed and results are being fetched remotely.
   def taskGettingResult(taskInfo: TaskInfo) {
-    eventProcessActor ! GettingResultEvent(taskInfo)
+    eventProcessLoop.post(GettingResultEvent(taskInfo))
   }
 
   // Called by TaskScheduler to report task completions or failures.
@@ -154,7 +144,8 @@ class DAGScheduler(
       accumUpdates: Map[Long, Any],
       taskInfo: TaskInfo,
       taskMetrics: TaskMetrics) {
-    eventProcessActor ! CompletionEvent(task, reason, result, accumUpdates, taskInfo, taskMetrics)
+    eventProcessLoop.post(
+      CompletionEvent(task, reason, result, accumUpdates, taskInfo, taskMetrics))
   }
 
   /**
@@ -176,18 +167,18 @@ class DAGScheduler(
 
   // Called by TaskScheduler when an executor fails.
   def executorLost(execId: String) {
-    eventProcessActor ! ExecutorLost(execId)
+    eventProcessLoop.post(ExecutorLost(execId))
   }
 
   // Called by TaskScheduler when a host is added
   def executorAdded(execId: String, host: String) {
-    eventProcessActor ! ExecutorAdded(execId, host)
+    eventProcessLoop.post(ExecutorAdded(execId, host))
   }
 
   // Called by TaskScheduler to cancel an entire TaskSet due to either repeated failures or
   // cancellation of the job itself.
   def taskSetFailed(taskSet: TaskSet, reason: String) {
-    eventProcessActor ! TaskSetFailed(taskSet, reason)
+    eventProcessLoop.post(TaskSetFailed(taskSet, reason))
   }
 
   private def getCacheLocs(rdd: RDD[_]): Array[Seq[TaskLocation]] = {
@@ -446,7 +437,6 @@ class DAGScheduler(
               }
               // data structures based on StageId
               stageIdToStage -= stageId
-
               logDebug("After removal of stage %d, remaining stages = %d"
                 .format(stageId, stageIdToStage.size))
             }
@@ -493,8 +483,8 @@ class DAGScheduler(
     assert(partitions.size > 0)
     val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
     val waiter = new JobWaiter(this, jobId, partitions.size, resultHandler)
-    eventProcessActor ! JobSubmitted(
-      jobId, rdd, func2, partitions.toArray, allowLocal, callSite, waiter, properties)
+    eventProcessLoop.post(JobSubmitted(
+      jobId, rdd, func2, partitions.toArray, allowLocal, callSite, waiter, properties))
     waiter
   }
 
@@ -534,8 +524,8 @@ class DAGScheduler(
     val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
     val partitions = (0 until rdd.partitions.size).toArray
     val jobId = nextJobId.getAndIncrement()
-    eventProcessActor ! JobSubmitted(
-      jobId, rdd, func2, partitions, allowLocal = false, callSite, listener, properties)
+    eventProcessLoop.post(JobSubmitted(
+      jobId, rdd, func2, partitions, allowLocal = false, callSite, listener, properties))
     listener.awaitResult()    // Will throw an exception if the job fails
   }
 
@@ -544,19 +534,19 @@ class DAGScheduler(
    */
   def cancelJob(jobId: Int) {
     logInfo("Asked to cancel job " + jobId)
-    eventProcessActor ! JobCancelled(jobId)
+    eventProcessLoop.post(JobCancelled(jobId))
   }
 
   def cancelJobGroup(groupId: String) {
     logInfo("Asked to cancel job group " + groupId)
-    eventProcessActor ! JobGroupCancelled(groupId)
+    eventProcessLoop.post(JobGroupCancelled(groupId))
   }
 
   /**
    * Cancel all jobs that are running or waiting in the queue.
    */
   def cancelAllJobs() {
-    eventProcessActor ! AllJobsCancelled
+    eventProcessLoop.post(AllJobsCancelled)
   }
 
   private[scheduler] def doCancelAllJobs() {
@@ -572,7 +562,7 @@ class DAGScheduler(
    * Cancel all jobs associated with a running or scheduled stage.
    */
   def cancelStage(stageId: Int) {
-    eventProcessActor ! StageCancelled(stageId)
+    eventProcessLoop.post(StageCancelled(stageId))
   }
 
   /**
@@ -632,8 +622,8 @@ class DAGScheduler(
     try {
       val rdd = job.finalStage.rdd
       val split = rdd.partitions(job.partitions(0))
-      val taskContext =
-        new TaskContextImpl(job.finalStage.id, job.partitions(0), 0, true)
+      val taskContext = new TaskContextImpl(job.finalStage.id, job.partitions(0), taskAttemptId = 0,
+        attemptNumber = 0, runningLocally = true)
       TaskContextHelper.setTaskContext(taskContext)
       try {
         val result = job.func(taskContext, rdd.iterator(split, taskContext))
@@ -658,7 +648,7 @@ class DAGScheduler(
       // completion events or stage abort
       stageIdToStage -= s.id
       jobIdToStageIds -= job.jobId
-      listenerBus.post(SparkListenerJobEnd(job.jobId, jobResult))
+      listenerBus.post(SparkListenerJobEnd(job.jobId, clock.getTime(), jobResult))
     }
   }
 
@@ -707,7 +697,7 @@ class DAGScheduler(
         stage.latestInfo.stageFailed(stageFailedMessage)
         listenerBus.post(SparkListenerStageCompleted(stage.latestInfo))
       }
-      listenerBus.post(SparkListenerJobEnd(job.jobId, JobFailed(error)))
+      listenerBus.post(SparkListenerJobEnd(job.jobId, clock.getTime(), JobFailed(error)))
     }
   }
 
@@ -746,16 +736,20 @@ class DAGScheduler(
       logInfo("Missing parents: " + getMissingParentStages(finalStage))
       val shouldRunLocally =
         localExecutionEnabled && allowLocal && finalStage.parents.isEmpty && partitions.length == 1
+      val jobSubmissionTime = clock.getTime()
       if (shouldRunLocally) {
         // Compute very short actions like first() or take() with no parent stages locally.
-        listenerBus.post(SparkListenerJobStart(job.jobId, Array[Int](), properties))
+        listenerBus.post(
+          SparkListenerJobStart(job.jobId, jobSubmissionTime, Seq.empty, properties))
         runLocally(job)
       } else {
         jobIdToActiveJob(jobId) = job
         activeJobs += job
         finalStage.resultOfJob = Some(job)
-        listenerBus.post(SparkListenerJobStart(job.jobId, jobIdToStageIds(jobId).toArray,
-          properties))
+        val stageIds = jobIdToStageIds(jobId).toArray
+        val stageInfos = stageIds.flatMap(id => stageIdToStage.get(id).map(_.latestInfo))
+        listenerBus.post(
+          SparkListenerJobStart(job.jobId, jobSubmissionTime, stageInfos, properties))
         submitStage(finalStage)
       }
     }
@@ -862,26 +856,6 @@ class DAGScheduler(
     }
 
     if (tasks.size > 0) {
-      // Preemptively serialize a task to make sure it can be serialized. We are catching this
-      // exception here because it would be fairly hard to catch the non-serializable exception
-      // down the road, where we have several different implementations for local scheduler and
-      // cluster schedulers.
-      //
-      // We've already serialized RDDs and closures in taskBinary, but here we check for all other
-      // objects such as Partition.
-      try {
-        closureSerializer.serialize(tasks.head)
-      } catch {
-        case e: NotSerializableException =>
-          abortStage(stage, "Task not serializable: " + e.toString)
-          runningStages -= stage
-          return
-        case NonFatal(e) => // Other exceptions, such as IllegalArgumentException from Kryo.
-          abortStage(stage, s"Task serialization failed: $e\n${e.getStackTraceString}")
-          runningStages -= stage
-          return
-      }
-
       logInfo("Submitting " + tasks.size + " missing tasks from " + stage + " (" + stage.rdd + ")")
       stage.pendingTasks ++= tasks
       logDebug("New pending tasks: " + stage.pendingTasks)
@@ -898,6 +872,34 @@ class DAGScheduler(
     }
   }
 
+  /** Merge updates from a task to our local accumulator values */
+  private def updateAccumulators(event: CompletionEvent): Unit = {
+    val task = event.task
+    val stage = stageIdToStage(task.stageId)
+    if (event.accumUpdates != null) {
+      try {
+        Accumulators.add(event.accumUpdates)
+        event.accumUpdates.foreach { case (id, partialValue) =>
+          val acc = Accumulators.originals(id).asInstanceOf[Accumulable[Any, Any]]
+          // To avoid UI cruft, ignore cases where value wasn't updated
+          if (acc.name.isDefined && partialValue != acc.zero) {
+            val name = acc.name.get
+            val stringPartialValue = Accumulators.stringifyPartialValue(partialValue)
+            val stringValue = Accumulators.stringifyValue(acc.value)
+            stage.latestInfo.accumulables(id) = AccumulableInfo(id, name, stringValue)
+            event.taskInfo.accumulables +=
+              AccumulableInfo(id, name, Some(stringPartialValue), stringValue)
+          }
+        }
+      } catch {
+        // If we see an exception during accumulator update, just log the
+        // error and move on.
+        case e: Exception =>
+          logError(s"Failed to update accumulators for $task", e)
+      }
+    }
+  }
+
   /**
    * Responds to a task finishing. This is called inside the event loop so it assumes that it can
    * modify the scheduler's internal state. Use taskEnded() to post a task end event from outside.
@@ -938,27 +940,6 @@ class DAGScheduler(
     }
     event.reason match {
       case Success =>
-        if (event.accumUpdates != null) {
-          try {
-            Accumulators.add(event.accumUpdates)
-            event.accumUpdates.foreach { case (id, partialValue) =>
-              val acc = Accumulators.originals(id).asInstanceOf[Accumulable[Any, Any]]
-              // To avoid UI cruft, ignore cases where value wasn't updated
-              if (acc.name.isDefined && partialValue != acc.zero) {
-                val name = acc.name.get
-                val stringPartialValue = Accumulators.stringifyPartialValue(partialValue)
-                val stringValue = Accumulators.stringifyValue(acc.value)
-                stage.latestInfo.accumulables(id) = AccumulableInfo(id, name, stringValue)
-                event.taskInfo.accumulables +=
-                  AccumulableInfo(id, name, Some(stringPartialValue), stringValue)
-              }
-            }
-          } catch {
-            // If we see an exception during accumulator update, just log the error and move on.
-            case e: Exception =>
-              logError(s"Failed to update accumulators for $task", e)
-          }
-        }
         listenerBus.post(SparkListenerTaskEnd(stageId, stage.latestInfo.attemptId, taskType,
           event.reason, event.taskInfo, event.taskMetrics))
         stage.pendingTasks -= task
@@ -967,13 +948,15 @@ class DAGScheduler(
             stage.resultOfJob match {
               case Some(job) =>
                 if (!job.finished(rt.outputId)) {
+                  updateAccumulators(event)
                   job.finished(rt.outputId) = true
                   job.numFinished += 1
                   // If the whole job has finished, remove it
                   if (job.numFinished == job.numPartitions) {
                     markStageAsFinished(stage)
                     cleanupStateForJobAndIndependentStages(job)
-                    listenerBus.post(SparkListenerJobEnd(job.jobId, JobSucceeded))
+                    listenerBus.post(
+                      SparkListenerJobEnd(job.jobId, clock.getTime(), JobSucceeded))
                   }
 
                   // taskSucceeded runs some user code that might throw an exception. Make sure
@@ -991,6 +974,7 @@ class DAGScheduler(
             }
 
           case smt: ShuffleMapTask =>
+            updateAccumulators(event)
             val status = event.result.asInstanceOf[MapStatus]
             val execId = status.location.executorId
             logDebug("ShuffleMapTask finished on " + execId)
@@ -1050,7 +1034,7 @@ class DAGScheduler(
         logInfo("Resubmitted " + task + ", so marking it as still running")
         stage.pendingTasks += task
 
-      case FetchFailed(bmAddress, shuffleId, mapId, reduceId) =>
+      case FetchFailed(bmAddress, shuffleId, mapId, reduceId, failureMessage) =>
         val failedStage = stageIdToStage(task.stageId)
         val mapStage = shuffleToMapStage(shuffleId)
 
@@ -1060,24 +1044,24 @@ class DAGScheduler(
         if (runningStages.contains(failedStage)) {
           logInfo(s"Marking $failedStage (${failedStage.name}) as failed " +
             s"due to a fetch failure from $mapStage (${mapStage.name})")
-          markStageAsFinished(failedStage, Some("Fetch failure"))
+          markStageAsFinished(failedStage, Some(failureMessage))
           runningStages -= failedStage
         }
 
-        if (failedStages.isEmpty && eventProcessActor != null) {
+        if (disallowStageRetryForTest) {
+          abortStage(failedStage, "Fetch failure will not retry stage due to testing config")
+        } else if (failedStages.isEmpty) {
           // Don't schedule an event to resubmit failed stages if failed isn't empty, because
-          // in that case the event will already have been scheduled. eventProcessActor may be
-          // null during unit tests.
+          // in that case the event will already have been scheduled.
           // TODO: Cancel running tasks in the stage
-          import env.actorSystem.dispatcher
           logInfo(s"Resubmitting $mapStage (${mapStage.name}) and " +
             s"$failedStage (${failedStage.name}) due to fetch failure")
-          env.actorSystem.scheduler.scheduleOnce(
-            RESUBMIT_TIMEOUT, eventProcessActor, ResubmitFailedStages)
+          messageScheduler.schedule(new Runnable {
+            override def run(): Unit = eventProcessLoop.post(ResubmitFailedStages)
+          }, DAGScheduler.RESUBMIT_TIMEOUT, TimeUnit.MILLISECONDS)
         }
         failedStages += failedStage
         failedStages += mapStage
-
         // Mark the map whose fetch failed as broken in the map stage
         if (mapId != -1) {
           mapStage.removeOutputLoc(mapId, bmAddress)
@@ -1086,10 +1070,10 @@ class DAGScheduler(
 
         // TODO: mark the executor as failed only if there were lots of fetch failures on it
         if (bmAddress != null) {
-          handleExecutorLost(bmAddress.executorId, Some(task.epoch))
+          handleExecutorLost(bmAddress.executorId, fetchFailed = true, Some(task.epoch))
         }
 
-      case ExceptionFailure(className, description, stackTrace, metrics) =>
+      case ExceptionFailure(className, description, stackTrace, fullStackTrace, metrics) =>
         // Do nothing here, left up to the TaskScheduler to decide how to handle user failures
 
       case TaskResultLost =>
@@ -1106,25 +1090,35 @@ class DAGScheduler(
    * Responds to an executor being lost. This is called inside the event loop, so it assumes it can
    * modify the scheduler's internal state. Use executorLost() to post a loss event from outside.
    *
+   * We will also assume that we've lost all shuffle blocks associated with the executor if the
+   * executor serves its own blocks (i.e., we're not using external shuffle) OR a FetchFailed
+   * occurred, in which case we presume all shuffle data related to this executor to be lost.
+   *
    * Optionally the epoch during which the failure was caught can be passed to avoid allowing
    * stray fetch failures from possibly retriggering the detection of a node as lost.
    */
-  private[scheduler] def handleExecutorLost(execId: String, maybeEpoch: Option[Long] = None) {
+  private[scheduler] def handleExecutorLost(
+      execId: String,
+      fetchFailed: Boolean,
+      maybeEpoch: Option[Long] = None) {
     val currentEpoch = maybeEpoch.getOrElse(mapOutputTracker.getEpoch)
     if (!failedEpoch.contains(execId) || failedEpoch(execId) < currentEpoch) {
       failedEpoch(execId) = currentEpoch
       logInfo("Executor lost: %s (epoch %d)".format(execId, currentEpoch))
       blockManagerMaster.removeExecutor(execId)
-      // TODO: This will be really slow if we keep accumulating shuffle map stages
-      for ((shuffleId, stage) <- shuffleToMapStage) {
-        stage.removeOutputsOnExecutor(execId)
-        val locs = stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray
-        mapOutputTracker.registerMapOutputs(shuffleId, locs, changeEpoch = true)
-      }
-      if (shuffleToMapStage.isEmpty) {
-        mapOutputTracker.incrementEpoch()
+
+      if (!env.blockManager.externalShuffleServiceEnabled || fetchFailed) {
+        // TODO: This will be really slow if we keep accumulating shuffle map stages
+        for ((shuffleId, stage) <- shuffleToMapStage) {
+          stage.removeOutputsOnExecutor(execId)
+          val locs = stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray
+          mapOutputTracker.registerMapOutputs(shuffleId, locs, changeEpoch = true)
+        }
+        if (shuffleToMapStage.isEmpty) {
+          mapOutputTracker.incrementEpoch()
+        }
+        clearCacheLocs()
       }
-      clearCacheLocs()
     } else {
       logDebug("Additional executor lost message for " + execId +
                "(epoch " + currentEpoch + ")")
@@ -1230,7 +1224,7 @@ class DAGScheduler(
     if (ableToCancelStages) {
       job.listener.jobFailed(error)
       cleanupStateForJobAndIndependentStages(job)
-      listenerBus.post(SparkListenerJobEnd(job.jobId, JobFailed(error)))
+      listenerBus.post(SparkListenerJobEnd(job.jobId, clock.getTime(), JobFailed(error)))
     }
   }
 
@@ -1322,46 +1316,21 @@ class DAGScheduler(
 
   def stop() {
     logInfo("Stopping DAGScheduler")
-    dagSchedulerActorSupervisor ! PoisonPill
+    eventProcessLoop.stop()
     taskScheduler.stop()
   }
-}
-
-private[scheduler] class DAGSchedulerActorSupervisor(dagScheduler: DAGScheduler)
-  extends Actor with Logging {
-
-  override val supervisorStrategy =
-    OneForOneStrategy() {
-      case x: Exception =>
-        logError("eventProcesserActor failed; shutting down SparkContext", x)
-        try {
-          dagScheduler.doCancelAllJobs()
-        } catch {
-          case t: Throwable => logError("DAGScheduler failed to cancel all jobs.", t)
-        }
-        dagScheduler.sc.stop()
-        Stop
-    }
 
-  def receive = {
-    case p: Props => sender ! context.actorOf(p)
-    case _ => logWarning("received unknown message in DAGSchedulerActorSupervisor")
-  }
+  // Start the event thread at the end of the constructor
+  eventProcessLoop.start()
 }
 
-private[scheduler] class DAGSchedulerEventProcessActor(dagScheduler: DAGScheduler)
-  extends Actor with Logging {
-
-  override def preStart() {
-    // set DAGScheduler for taskScheduler to ensure eventProcessActor is always
-    // valid when the messages arrive
-    dagScheduler.taskScheduler.setDAGScheduler(dagScheduler)
-  }
+private[scheduler] class DAGSchedulerEventProcessLoop(dagScheduler: DAGScheduler)
+  extends EventLoop[DAGSchedulerEvent]("dag-scheduler-event-loop") with Logging {
 
   /**
    * The main event loop of the DAG scheduler.
    */
-  def receive = {
+  override def onReceive(event: DAGSchedulerEvent): Unit = event match {
     case JobSubmitted(jobId, rdd, func, partitions, allowLocal, callSite, listener, properties) =>
       dagScheduler.handleJobSubmitted(jobId, rdd, func, partitions, allowLocal, callSite,
         listener, properties)
@@ -1382,7 +1351,7 @@ private[scheduler] class DAGSchedulerEventProcessActor(dagScheduler: DAGSchedule
       dagScheduler.handleExecutorAdded(execId, host)
 
     case ExecutorLost(execId) =>
-      dagScheduler.handleExecutorLost(execId)
+      dagScheduler.handleExecutorLost(execId, fetchFailed = false)
 
     case BeginEvent(task, taskInfo) =>
       dagScheduler.handleBeginEvent(task, taskInfo)
@@ -1400,7 +1369,17 @@ private[scheduler] class DAGSchedulerEventProcessActor(dagScheduler: DAGSchedule
       dagScheduler.resubmitFailedStages()
   }
 
-  override def postStop() {
+  override def onError(e: Throwable): Unit = {
+    logError("DAGSchedulerEventProcessLoop failed; shutting down SparkContext", e)
+    try {
+      dagScheduler.doCancelAllJobs()
+    } catch {
+      case t: Throwable => logError("DAGScheduler failed to cancel all jobs.", t)
+    }
+    dagScheduler.sc.stop()
+  }
+
+  override def onStop() {
     // Cancel any active jobs in postStop hook
     dagScheduler.cleanUpAfterSchedulerStop()
   }
@@ -1410,9 +1389,5 @@ private[spark] object DAGScheduler {
   // The time, in millis, to wait for fetch failure events to stop coming in after one is detected;
   // this is a simplistic way to avoid resubmitting tasks in the non-fetchable map stage one by one
   // as more failure events come in
-  val RESUBMIT_TIMEOUT = 200.milliseconds
-
-  // The time, in millis, to wake up between polls of the completion queue in order to potentially
-  // resubmit failed stages
-  val POLL_TIMEOUT = 10L
+  val RESUBMIT_TIMEOUT = 200
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
index 100c9ba9b7809..30075c172bdb1 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
@@ -17,20 +17,23 @@
 
 package org.apache.spark.scheduler
 
+import java.io._
+import java.net.URI
+
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
+import com.google.common.base.Charsets
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.fs.{FileSystem, FSDataOutputStream, Path}
 import org.apache.hadoop.fs.permission.FsPermission
 import org.json4s.JsonAST.JValue
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.{Logging, SparkConf, SparkContext}
+import org.apache.spark.{Logging, SparkConf, SPARK_VERSION}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.io.CompressionCodec
-import org.apache.spark.SPARK_VERSION
-import org.apache.spark.util.{FileLogger, JsonProtocol, Utils}
+import org.apache.spark.util.{JsonProtocol, Utils}
 
 /**
  * A SparkListener that logs events to persistent storage.
@@ -58,36 +61,78 @@ private[spark] class EventLoggingListener(
   private val shouldOverwrite = sparkConf.getBoolean("spark.eventLog.overwrite", false)
   private val testing = sparkConf.getBoolean("spark.eventLog.testing", false)
   private val outputBufferSize = sparkConf.getInt("spark.eventLog.buffer.kb", 100) * 1024
-  val logDir = EventLoggingListener.getLogDirPath(logBaseDir, appId)
-  val logDirName: String = logDir.split("/").last
-  protected val logger = new FileLogger(logDir, sparkConf, hadoopConf, outputBufferSize,
-    shouldCompress, shouldOverwrite, Some(LOG_FILE_PERMISSIONS))
+  private val fileSystem = Utils.getHadoopFileSystem(new URI(logBaseDir), hadoopConf)
+
+  // Only defined if the file system scheme is not local
+  private var hadoopDataStream: Option[FSDataOutputStream] = None
+
+  // The Hadoop APIs have changed over time, so we use reflection to figure out
+  // the correct method to use to flush a hadoop data stream. See SPARK-1518
+  // for details.
+  private val hadoopFlushMethod = {
+    val cls = classOf[FSDataOutputStream]
+    scala.util.Try(cls.getMethod("hflush")).getOrElse(cls.getMethod("sync"))
+  }
+
+  private var writer: Option[PrintWriter] = None
 
   // For testing. Keep track of all JSON serialized events that have been logged.
   private[scheduler] val loggedEvents = new ArrayBuffer[JValue]
 
+  // Visible for tests only.
+  private[scheduler] val logPath = getLogPath(logBaseDir, appId)
+
   /**
-   * Begin logging events.
-   * If compression is used, log a file that indicates which compression library is used.
+   * Creates the log file in the configured log directory.
    */
   def start() {
-    logger.start()
-    logInfo("Logging events to %s".format(logDir))
-    if (shouldCompress) {
-      val codec =
-        sparkConf.get("spark.io.compression.codec", CompressionCodec.DEFAULT_COMPRESSION_CODEC)
-      logger.newFile(COMPRESSION_CODEC_PREFIX + codec)
+    if (!fileSystem.isDirectory(new Path(logBaseDir))) {
+      throw new IllegalArgumentException(s"Log directory $logBaseDir does not exist.")
     }
-    logger.newFile(SPARK_VERSION_PREFIX + SPARK_VERSION)
-    logger.newFile(LOG_PREFIX + logger.fileIndex)
+
+    val workingPath = logPath + IN_PROGRESS
+    val uri = new URI(workingPath)
+    val path = new Path(workingPath)
+    val defaultFs = FileSystem.getDefaultUri(hadoopConf).getScheme
+    val isDefaultLocal = defaultFs == null || defaultFs == "file"
+
+    if (shouldOverwrite && fileSystem.exists(path)) {
+      logWarning(s"Event log $path already exists. Overwriting...")
+      fileSystem.delete(path, true)
+    }
+
+    /* The Hadoop LocalFileSystem (r1.0.4) has known issues with syncing (HADOOP-7844).
+     * Therefore, for local files, use FileOutputStream instead. */
+    val dstream =
+      if ((isDefaultLocal && uri.getScheme == null) || uri.getScheme == "file") {
+        new FileOutputStream(uri.getPath)
+      } else {
+        hadoopDataStream = Some(fileSystem.create(path))
+        hadoopDataStream.get
+      }
+
+    val compressionCodec =
+      if (shouldCompress) {
+        Some(CompressionCodec.createCodec(sparkConf))
+      } else {
+        None
+      }
+
+    fileSystem.setPermission(path, LOG_FILE_PERMISSIONS)
+    val logStream = initEventLog(new BufferedOutputStream(dstream, outputBufferSize),
+      compressionCodec)
+    writer = Some(new PrintWriter(logStream))
+
+    logInfo("Logging events to %s".format(logPath))
   }
 
   /** Log the event as JSON. */
   private def logEvent(event: SparkListenerEvent, flushLogger: Boolean = false) {
     val eventJson = JsonProtocol.sparkEventToJson(event)
-    logger.logLine(compact(render(eventJson)))
+    writer.foreach(_.println(compact(render(eventJson))))
     if (flushLogger) {
-      logger.flush()
+      writer.foreach(_.flush())
+      hadoopDataStream.foreach(hadoopFlushMethod.invoke(_))
     }
     if (testing) {
       loggedEvents += eventJson
@@ -123,130 +168,168 @@ private[spark] class EventLoggingListener(
     logEvent(event, flushLogger = true)
   override def onApplicationEnd(event: SparkListenerApplicationEnd) =
     logEvent(event, flushLogger = true)
+  override def onExecutorAdded(event: SparkListenerExecutorAdded) =
+    logEvent(event, flushLogger = true)
+  override def onExecutorRemoved(event: SparkListenerExecutorRemoved) =
+    logEvent(event, flushLogger = true)
+
   // No-op because logging every update would be overkill
   override def onExecutorMetricsUpdate(event: SparkListenerExecutorMetricsUpdate) { }
 
   /**
-   * Stop logging events.
-   * In addition, create an empty special file to indicate application completion.
+   * Stop logging events. The event log file will be renamed so that it loses the
+   * ".inprogress" suffix.
    */
   def stop() = {
-    logger.newFile(APPLICATION_COMPLETE)
-    logger.stop()
+    writer.foreach(_.close())
+
+    val target = new Path(logPath)
+    if (fileSystem.exists(target)) {
+      if (shouldOverwrite) {
+        logWarning(s"Event log $target already exists. Overwriting...")
+        fileSystem.delete(target, true)
+      } else {
+        throw new IOException("Target log file already exists (%s)".format(logPath))
+      }
+    }
+    fileSystem.rename(new Path(logPath + IN_PROGRESS), target)
   }
+
 }
 
 private[spark] object EventLoggingListener extends Logging {
+  // Suffix applied to the names of files still being written by applications.
+  val IN_PROGRESS = ".inprogress"
   val DEFAULT_LOG_DIR = "/tmp/spark-events"
-  val LOG_PREFIX = "EVENT_LOG_"
-  val SPARK_VERSION_PREFIX = "SPARK_VERSION_"
-  val COMPRESSION_CODEC_PREFIX = "COMPRESSION_CODEC_"
-  val APPLICATION_COMPLETE = "APPLICATION_COMPLETE"
-  val LOG_FILE_PERMISSIONS = FsPermission.createImmutable(Integer.parseInt("770", 8).toShort)
 
-  // A cache for compression codecs to avoid creating the same codec many times
-  private val codecMap = new mutable.HashMap[String, CompressionCodec]
+  private val LOG_FILE_PERMISSIONS = new FsPermission(Integer.parseInt("770", 8).toShort)
 
-  def isEventLogFile(fileName: String): Boolean = {
-    fileName.startsWith(LOG_PREFIX)
-  }
+  // Marker for the end of header data in a log file. After this marker, log data, potentially
+  // compressed, will be found.
+  private val HEADER_END_MARKER = "=== LOG_HEADER_END ==="
 
-  def isSparkVersionFile(fileName: String): Boolean = {
-    fileName.startsWith(SPARK_VERSION_PREFIX)
-  }
+  // To avoid corrupted files causing the heap to fill up. Value is arbitrary.
+  private val MAX_HEADER_LINE_LENGTH = 4096
 
-  def isCompressionCodecFile(fileName: String): Boolean = {
-    fileName.startsWith(COMPRESSION_CODEC_PREFIX)
-  }
+  // A cache for compression codecs to avoid creating the same codec many times
+  private val codecMap = new mutable.HashMap[String, CompressionCodec]
 
-  def isApplicationCompleteFile(fileName: String): Boolean = {
-    fileName == APPLICATION_COMPLETE
-  }
+  /**
+   * Write metadata about the event log to the given stream.
+   *
+   * The header is a serialized version of a map, except it does not use Java serialization to
+   * avoid incompatibilities between different JDKs. It writes one map entry per line, in
+   * "key=value" format.
+   *
+   * The very last entry in the header is the `HEADER_END_MARKER` marker, so that the parsing code
+   * can know when to stop.
+   *
+   * The format needs to be kept in sync with the openEventLog() method below. Also, it cannot
+   * change in new Spark versions without some other way of detecting the change (like some
+   * metadata encoded in the file name).
+   *
+   * @param logStream Raw output stream to the even log file.
+   * @param compressionCodec Optional compression codec to use.
+   * @return A stream where to write event log data. This may be a wrapper around the original
+   *         stream (for example, when compression is enabled).
+   */
+  def initEventLog(
+      logStream: OutputStream,
+      compressionCodec: Option[CompressionCodec]): OutputStream = {
+    val meta = mutable.HashMap(("version" -> SPARK_VERSION))
+    compressionCodec.foreach { codec =>
+      meta += ("compressionCodec" -> codec.getClass().getName())
+    }
 
-  def parseSparkVersion(fileName: String): String = {
-    if (isSparkVersionFile(fileName)) {
-      fileName.replaceAll(SPARK_VERSION_PREFIX, "")
-    } else ""
-  }
+    def write(entry: String) = {
+      val bytes = entry.getBytes(Charsets.UTF_8)
+      if (bytes.length > MAX_HEADER_LINE_LENGTH) {
+        throw new IOException(s"Header entry too long: ${entry}")
+      }
+      logStream.write(bytes, 0, bytes.length)
+    }
 
-  def parseCompressionCodec(fileName: String): String = {
-    if (isCompressionCodecFile(fileName)) {
-      fileName.replaceAll(COMPRESSION_CODEC_PREFIX, "")
-    } else ""
+    meta.foreach { case (k, v) => write(s"$k=$v\n") }
+    write(s"$HEADER_END_MARKER\n")
+    compressionCodec.map(_.compressedOutputStream(logStream)).getOrElse(logStream)
   }
 
   /**
-   * Return a file-system-safe path to the log directory for the given application.
+   * Return a file-system-safe path to the log file for the given application.
    *
-   * @param logBaseDir A base directory for the path to the log directory for given application.
+   * @param logBaseDir Directory where the log file will be written.
    * @param appId A unique app ID.
    * @return A path which consists of file-system-safe characters.
    */
-  def getLogDirPath(logBaseDir: String, appId: String): String = {
+  def getLogPath(logBaseDir: String, appId: String): String = {
     val name = appId.replaceAll("[ :/]", "-").replaceAll("[${}'\"]", "_").toLowerCase
     Utils.resolveURI(logBaseDir) + "/" + name.stripSuffix("/")
   }
 
   /**
-   * Parse the event logging information associated with the logs in the given directory.
+   * Opens an event log file and returns an input stream to the event data.
    *
-   * Specifically, this looks for event log files, the Spark version file, the compression
-   * codec file (if event logs are compressed), and the application completion file (if the
-   * application has run to completion).
+   * @return 2-tuple (event input stream, Spark version of event data)
    */
-  def parseLoggingInfo(logDir: Path, fileSystem: FileSystem): EventLoggingInfo = {
+  def openEventLog(log: Path, fs: FileSystem): (InputStream, String) = {
+    // It's not clear whether FileSystem.open() throws FileNotFoundException or just plain
+    // IOException when a file does not exist, so try our best to throw a proper exception.
+    if (!fs.exists(log)) {
+      throw new FileNotFoundException(s"File $log does not exist.")
+    }
+
+    val in = new BufferedInputStream(fs.open(log))
+    // Read a single line from the input stream without buffering.
+    // We cannot use BufferedReader because we must avoid reading
+    // beyond the end of the header, after which the content of the
+    // file may be compressed.
+    def readLine(): String = {
+      val bytes = new ByteArrayOutputStream()
+      var next = in.read()
+      var count = 0
+      while (next != '\n') {
+        if (next == -1) {
+          throw new IOException("Unexpected end of file.")
+        }
+        bytes.write(next)
+        count = count + 1
+        if (count > MAX_HEADER_LINE_LENGTH) {
+          throw new IOException("Maximum header line length exceeded.")
+        }
+        next = in.read()
+      }
+      new String(bytes.toByteArray(), Charsets.UTF_8)
+    }
+
+    // Parse the header metadata in the form of k=v pairs
+    // This assumes that every line before the header end marker follows this format
     try {
-      val fileStatuses = fileSystem.listStatus(logDir)
-      val filePaths =
-        if (fileStatuses != null) {
-          fileStatuses.filter(!_.isDir).map(_.getPath).toSeq
-        } else {
-          Seq[Path]()
+      val meta = new mutable.HashMap[String, String]()
+      var foundEndMarker = false
+      while (!foundEndMarker) {
+        readLine() match {
+          case HEADER_END_MARKER =>
+            foundEndMarker = true
+          case entry =>
+            val prop = entry.split("=", 2)
+            if (prop.length != 2) {
+              throw new IllegalArgumentException("Invalid metadata in log file.")
+            }
+            meta += (prop(0) -> prop(1))
         }
-      if (filePaths.isEmpty) {
-        logWarning("No files found in logging directory %s".format(logDir))
       }
-      EventLoggingInfo(
-        logPaths = filePaths.filter { path => isEventLogFile(path.getName) },
-        sparkVersion = filePaths
-          .find { path => isSparkVersionFile(path.getName) }
-          .map { path => parseSparkVersion(path.getName) }
-          .getOrElse("<Unknown>"),
-        compressionCodec = filePaths
-          .find { path => isCompressionCodecFile(path.getName) }
-          .map { path =>
-            val codec = EventLoggingListener.parseCompressionCodec(path.getName)
-            val conf = new SparkConf
-            conf.set("spark.io.compression.codec", codec)
-            codecMap.getOrElseUpdate(codec, CompressionCodec.createCodec(conf))
-          },
-        applicationComplete = filePaths.exists { path => isApplicationCompleteFile(path.getName) }
-      )
+
+      val sparkVersion = meta.get("version").getOrElse(
+        throw new IllegalArgumentException("Missing Spark version in log metadata."))
+      val codec = meta.get("compressionCodec").map { codecName =>
+        codecMap.getOrElseUpdate(codecName, CompressionCodec.createCodec(new SparkConf, codecName))
+      }
+      (codec.map(_.compressedInputStream(in)).getOrElse(in), sparkVersion)
     } catch {
       case e: Exception =>
-        logError("Exception in parsing logging info from directory %s".format(logDir), e)
-        EventLoggingInfo.empty
+        in.close()
+        throw e
     }
   }
 
-  /**
-   * Parse the event logging information associated with the logs in the given directory.
-   */
-  def parseLoggingInfo(logDir: String, fileSystem: FileSystem): EventLoggingInfo = {
-    parseLoggingInfo(new Path(logDir), fileSystem)
-  }
-}
-
-
-/**
- * Information needed to process the event logs associated with an application.
- */
-private[spark] case class EventLoggingInfo(
-    logPaths: Seq[Path],
-    sparkVersion: String,
-    compressionCodec: Option[CompressionCodec],
-    applicationComplete: Boolean = false)
-
-private[spark] object EventLoggingInfo {
-  def empty = EventLoggingInfo(Seq[Path](), "<Unknown>", None, applicationComplete = false)
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
index 54904bffdf10b..3bb54855bae44 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
@@ -158,6 +158,11 @@ class JobLogger(val user: String, val logDirName: String) extends SparkListener
         " INPUT_BYTES=" + metrics.bytesRead
       case None => ""
     }
+    val outputMetrics = taskMetrics.outputMetrics match {
+      case Some(metrics) =>
+        " OUTPUT_BYTES=" + metrics.bytesWritten
+      case None => ""
+    }
     val shuffleReadMetrics = taskMetrics.shuffleReadMetrics match {
       case Some(metrics) =>
         " BLOCK_FETCHED_TOTAL=" + metrics.totalBlocksFetched +
@@ -173,7 +178,7 @@ class JobLogger(val user: String, val logDirName: String) extends SparkListener
         " SHUFFLE_WRITE_TIME=" + metrics.shuffleWriteTime
       case None => ""
     }
-    stageLogInfo(stageId, status + info + executorRunTime + gcTime + inputMetrics +
+    stageLogInfo(stageId, status + info + executorRunTime + gcTime + inputMetrics + outputMetrics +
       shuffleReadMetrics + writeMetrics)
   }
 
@@ -215,7 +220,7 @@ class JobLogger(val user: String, val logDirName: String) extends SparkListener
         taskStatus += " STATUS=RESUBMITTED TID=" + taskInfo.taskId +
                       " STAGE_ID=" + taskEnd.stageId
         stageLogInfo(taskEnd.stageId, taskStatus)
-      case FetchFailed(bmAddress, shuffleId, mapId, reduceId) =>
+      case FetchFailed(bmAddress, shuffleId, mapId, reduceId, message) =>
         taskStatus += " STATUS=FETCHFAILED TID=" + taskInfo.taskId + " STAGE_ID=" +
                       taskEnd.stageId + " SHUFFLE_ID=" + shuffleId + " MAP_ID=" +
                       mapId + " REDUCE_ID=" + reduceId
diff --git a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
index 36a6e6338faa6..be23056e7d423 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
@@ -17,10 +17,9 @@
 
 package org.apache.spark.scheduler
 
-import java.util.concurrent.{LinkedBlockingQueue, Semaphore}
+import java.util.concurrent.atomic.AtomicBoolean
 
-import org.apache.spark.Logging
-import org.apache.spark.util.Utils
+import org.apache.spark.util.AsynchronousListenerBus
 
 /**
  * Asynchronously passes SparkListenerEvents to registered SparkListeners.
@@ -29,113 +28,19 @@ import org.apache.spark.util.Utils
  * has started will events be actually propagated to all attached listeners. This listener bus
  * is stopped when it receives a SparkListenerShutdown event, which is posted using stop().
  */
-private[spark] class LiveListenerBus extends SparkListenerBus with Logging {
-
-  /* Cap the capacity of the SparkListenerEvent queue so we get an explicit error (rather than
-   * an OOM exception) if it's perpetually being added to more quickly than it's being drained. */
-  private val EVENT_QUEUE_CAPACITY = 10000
-  private val eventQueue = new LinkedBlockingQueue[SparkListenerEvent](EVENT_QUEUE_CAPACITY)
-  private var queueFullErrorMessageLogged = false
-  private var started = false
-
-  // A counter that represents the number of events produced and consumed in the queue
-  private val eventLock = new Semaphore(0)
-
-  private val listenerThread = new Thread("SparkListenerBus") {
-    setDaemon(true)
-    override def run(): Unit = Utils.logUncaughtExceptions {
-      while (true) {
-        eventLock.acquire()
-        // Atomically remove and process this event
-        LiveListenerBus.this.synchronized {
-          val event = eventQueue.poll
-          if (event == SparkListenerShutdown) {
-            // Get out of the while loop and shutdown the daemon thread
-            return
-          }
-          Option(event).foreach(postToAll)
-        }
-      }
-    }
-  }
-
-  /**
-   * Start sending events to attached listeners.
-   *
-   * This first sends out all buffered events posted before this listener bus has started, then
-   * listens for any additional events asynchronously while the listener bus is still running.
-   * This should only be called once.
-   */
-  def start() {
-    if (started) {
-      throw new IllegalStateException("Listener bus already started!")
+private[spark] class LiveListenerBus
+  extends AsynchronousListenerBus[SparkListener, SparkListenerEvent]("SparkListenerBus")
+  with SparkListenerBus {
+
+  private val logDroppedEvent = new AtomicBoolean(false)
+
+  override def onDropEvent(event: SparkListenerEvent): Unit = {
+    if (logDroppedEvent.compareAndSet(false, true)) {
+      // Only log the following message once to avoid duplicated annoying logs.
+      logError("Dropping SparkListenerEvent because no remaining room in event queue. " +
+        "This likely means one of the SparkListeners is too slow and cannot keep up with " +
+        "the rate at which tasks are being started by the scheduler.")
     }
-    listenerThread.start()
-    started = true
   }
 
-  def post(event: SparkListenerEvent) {
-    val eventAdded = eventQueue.offer(event)
-    if (eventAdded) {
-      eventLock.release()
-    } else {
-      logQueueFullErrorMessage()
-    }
-  }
-
-  /**
-   * For testing only. Wait until there are no more events in the queue, or until the specified
-   * time has elapsed. Return true if the queue has emptied and false is the specified time
-   * elapsed before the queue emptied.
-   */
-  def waitUntilEmpty(timeoutMillis: Int): Boolean = {
-    val finishTime = System.currentTimeMillis + timeoutMillis
-    while (!queueIsEmpty) {
-      if (System.currentTimeMillis > finishTime) {
-        return false
-      }
-      /* Sleep rather than using wait/notify, because this is used only for testing and
-       * wait/notify add overhead in the general case. */
-      Thread.sleep(10)
-    }
-    true
-  }
-
-  /**
-   * For testing only. Return whether the listener daemon thread is still alive.
-   */
-  def listenerThreadIsAlive: Boolean = synchronized { listenerThread.isAlive }
-
-  /**
-   * Return whether the event queue is empty.
-   *
-   * The use of synchronized here guarantees that all events that once belonged to this queue
-   * have already been processed by all attached listeners, if this returns true.
-   */
-  def queueIsEmpty: Boolean = synchronized { eventQueue.isEmpty }
-
-  /**
-   * Log an error message to indicate that the event queue is full. Do this only once.
-   */
-  private def logQueueFullErrorMessage(): Unit = {
-    if (!queueFullErrorMessageLogged) {
-      if (listenerThread.isAlive) {
-        logError("Dropping SparkListenerEvent because no remaining room in event queue. " +
-          "This likely means one of the SparkListeners is too slow and cannot keep up with" +
-          "the rate at which tasks are being started by the scheduler.")
-      } else {
-        logError("SparkListenerBus thread is dead! This means SparkListenerEvents have not" +
-          "been (and will no longer be) propagated to listeners for some time.")
-      }
-      queueFullErrorMessageLogged = true
-    }
-  }
-
-  def stop() {
-    if (!started) {
-      throw new IllegalStateException("Attempted to stop a listener bus that has not yet started!")
-    }
-    post(SparkListenerShutdown)
-    listenerThread.join()
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
index e25096ea92d70..1efce124c0a6b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
@@ -19,7 +19,10 @@ package org.apache.spark.scheduler
 
 import java.io.{Externalizable, ObjectInput, ObjectOutput}
 
+import org.roaringbitmap.RoaringBitmap
+
 import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.util.Utils
 
 /**
  * Result returned by a ShuffleMapTask to a scheduler. Includes the block manager address that the
@@ -29,7 +32,12 @@ private[spark] sealed trait MapStatus {
   /** Location where this task was run. */
   def location: BlockManagerId
 
-  /** Estimated size for the reduce block, in bytes. */
+  /**
+   * Estimated size for the reduce block, in bytes.
+   *
+   * If a block is non-empty, then this method MUST return a non-zero size.  This invariant is
+   * necessary for correctness, since block fetchers are allowed to skip zero-size blocks.
+   */
   def getSizeForBlock(reduceId: Int): Long
 }
 
@@ -38,7 +46,7 @@ private[spark] object MapStatus {
 
   def apply(loc: BlockManagerId, uncompressedSizes: Array[Long]): MapStatus = {
     if (uncompressedSizes.length > 2000) {
-      new HighlyCompressedMapStatus(loc, uncompressedSizes)
+      HighlyCompressedMapStatus(loc, uncompressedSizes)
     } else {
       new CompressedMapStatus(loc, uncompressedSizes)
     }
@@ -98,13 +106,13 @@ private[spark] class CompressedMapStatus(
     MapStatus.decompressSize(compressedSizes(reduceId))
   }
 
-  override def writeExternal(out: ObjectOutput): Unit = {
+  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
     loc.writeExternal(out)
     out.writeInt(compressedSizes.length)
     out.write(compressedSizes)
   }
 
-  override def readExternal(in: ObjectInput): Unit = {
+  override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
     loc = BlockManagerId(in)
     val len = in.readInt()
     compressedSizes = new Array[Byte](len)
@@ -112,35 +120,80 @@ private[spark] class CompressedMapStatus(
   }
 }
 
-
 /**
- * A [[MapStatus]] implementation that only stores the average size of the blocks.
+ * A [[MapStatus]] implementation that only stores the average size of non-empty blocks,
+ * plus a bitmap for tracking which blocks are empty.  During serialization, this bitmap
+ * is compressed.
  *
- * @param loc location where the task is being executed.
- * @param avgSize average size of all the blocks
+ * @param loc location where the task is being executed
+ * @param numNonEmptyBlocks the number of non-empty blocks
+ * @param emptyBlocks a bitmap tracking which blocks are empty
+ * @param avgSize average size of the non-empty blocks
  */
-private[spark] class HighlyCompressedMapStatus(
+private[spark] class HighlyCompressedMapStatus private (
     private[this] var loc: BlockManagerId,
+    private[this] var numNonEmptyBlocks: Int,
+    private[this] var emptyBlocks: RoaringBitmap,
     private[this] var avgSize: Long)
   extends MapStatus with Externalizable {
 
-  def this(loc: BlockManagerId, uncompressedSizes: Array[Long]) {
-    this(loc, uncompressedSizes.sum / uncompressedSizes.length)
-  }
+  // loc could be null when the default constructor is called during deserialization
+  require(loc == null || avgSize > 0 || numNonEmptyBlocks == 0,
+    "Average size can only be zero for map stages that produced no output")
 
-  protected def this() = this(null, 0L)  // For deserialization only
+  protected def this() = this(null, -1, null, -1)  // For deserialization only
 
   override def location: BlockManagerId = loc
 
-  override def getSizeForBlock(reduceId: Int): Long = avgSize
+  override def getSizeForBlock(reduceId: Int): Long = {
+    if (emptyBlocks.contains(reduceId)) {
+      0
+    } else {
+      avgSize
+    }
+  }
 
-  override def writeExternal(out: ObjectOutput): Unit = {
+  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
     loc.writeExternal(out)
+    emptyBlocks.writeExternal(out)
     out.writeLong(avgSize)
   }
 
-  override def readExternal(in: ObjectInput): Unit = {
+  override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
     loc = BlockManagerId(in)
+    emptyBlocks = new RoaringBitmap()
+    emptyBlocks.readExternal(in)
     avgSize = in.readLong()
   }
 }
+
+private[spark] object HighlyCompressedMapStatus {
+  def apply(loc: BlockManagerId, uncompressedSizes: Array[Long]): HighlyCompressedMapStatus = {
+    // We must keep track of which blocks are empty so that we don't report a zero-sized
+    // block as being non-empty (or vice-versa) when using the average block size.
+    var i = 0
+    var numNonEmptyBlocks: Int = 0
+    var totalSize: Long = 0
+    // From a compression standpoint, it shouldn't matter whether we track empty or non-empty
+    // blocks. From a performance standpoint, we benefit from tracking empty blocks because
+    // we expect that there will be far fewer of them, so we will perform fewer bitmap insertions.
+    val emptyBlocks = new RoaringBitmap()
+    val totalNumBlocks = uncompressedSizes.length
+    while (i < totalNumBlocks) {
+      var size = uncompressedSizes(i)
+      if (size > 0) {
+        numNonEmptyBlocks += 1
+        totalSize += size
+      } else {
+        emptyBlocks.add(i)
+      }
+      i += 1
+    }
+    val avgSize = if (numNonEmptyBlocks > 0) {
+      totalSize / numNonEmptyBlocks
+    } else {
+      0
+    }
+    new HighlyCompressedMapStatus(loc, numNonEmptyBlocks, emptyBlocks, avgSize)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
index f89724d4ea196..584f4e7789d1a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
@@ -17,74 +17,45 @@
 
 package org.apache.spark.scheduler
 
-import java.io.{BufferedInputStream, InputStream}
+import java.io.{InputStream, IOException}
 
 import scala.io.Source
 
-import org.apache.hadoop.fs.{Path, FileSystem}
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.Logging
-import org.apache.spark.io.CompressionCodec
 import org.apache.spark.util.JsonProtocol
 
 /**
- * A SparkListenerBus that replays logged events from persisted storage.
- *
- * This assumes the given paths are valid log files, where each line can be deserialized into
- * exactly one SparkListenerEvent.
+ * A SparkListenerBus that can be used to replay events from serialized event data.
  */
-private[spark] class ReplayListenerBus(
-    logPaths: Seq[Path],
-    fileSystem: FileSystem,
-    compressionCodec: Option[CompressionCodec])
-  extends SparkListenerBus with Logging {
-
-  private var replayed = false
-
-  if (logPaths.length == 0) {
-    logWarning("Log path provided contains no log files.")
-  }
+private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
 
   /**
-   * Replay each event in the order maintained in the given logs.
-   * This should only be called exactly once.
+   * Replay each event in the order maintained in the given stream. The stream is expected to
+   * contain one JSON-encoded SparkListenerEvent per line.
+   *
+   * This method can be called multiple times, but the listener behavior is undefined after any
+   * error is thrown by this method.
+   *
+   * @param logData Stream containing event log data.
+   * @param version Spark version that generated the events.
    */
-  def replay() {
-    assert(!replayed, "ReplayListenerBus cannot replay events more than once")
-    logPaths.foreach { path =>
-      // Keep track of input streams at all levels to close them later
-      // This is necessary because an exception can occur in between stream initializations
-      var fileStream: Option[InputStream] = None
-      var bufferedStream: Option[InputStream] = None
-      var compressStream: Option[InputStream] = None
-      var currentLine = "<not started>"
-      try {
-        fileStream = Some(fileSystem.open(path))
-        bufferedStream = Some(new BufferedInputStream(fileStream.get))
-        compressStream = Some(wrapForCompression(bufferedStream.get))
-
-        // Parse each line as an event and post the event to all attached listeners
-        val lines = Source.fromInputStream(compressStream.get).getLines()
-        lines.foreach { line =>
-          currentLine = line
-          postToAll(JsonProtocol.sparkEventFromJson(parse(line)))
-        }
-      } catch {
-        case e: Exception =>
-          logError("Exception in parsing Spark event log %s".format(path), e)
-          logError("Malformed line: %s\n".format(currentLine))
-      } finally {
-        fileStream.foreach(_.close())
-        bufferedStream.foreach(_.close())
-        compressStream.foreach(_.close())
+  def replay(logData: InputStream, version: String) {
+    var currentLine: String = null
+    try {
+      val lines = Source.fromInputStream(logData).getLines()
+      lines.foreach { line =>
+        currentLine = line
+        postToAll(JsonProtocol.sparkEventFromJson(parse(line)))
       }
+    } catch {
+      case ioe: IOException =>
+        throw ioe
+      case e: Exception =>
+        logError("Exception in parsing Spark event log.", e)
+        logError("Malformed line: %s\n".format(currentLine))
     }
-    replayed = true
   }
 
-  /** If a compression codec is specified, wrap the given stream in a compression stream. */
-  private def wrapForCompression(stream: InputStream): InputStream = {
-    compressionCodec.map(_.compressedInputStream(stream)).getOrElse(stream)
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
index 86afe3bd5265f..dd28ddb31de1f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
@@ -25,6 +25,7 @@ import scala.collection.mutable
 import org.apache.spark.{Logging, TaskEndReason}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.util.{Distribution, Utils}
 
@@ -56,11 +57,23 @@ case class SparkListenerTaskEnd(
   extends SparkListenerEvent
 
 @DeveloperApi
-case class SparkListenerJobStart(jobId: Int, stageIds: Seq[Int], properties: Properties = null)
-  extends SparkListenerEvent
+case class SparkListenerJobStart(
+    jobId: Int,
+    time: Long,
+    stageInfos: Seq[StageInfo],
+    properties: Properties = null)
+  extends SparkListenerEvent {
+  // Note: this is here for backwards-compatibility with older versions of this event which
+  // only stored stageIds and not StageInfos:
+  val stageIds: Seq[Int] = stageInfos.map(_.stageId)
+}
 
 @DeveloperApi
-case class SparkListenerJobEnd(jobId: Int, jobResult: JobResult) extends SparkListenerEvent
+case class SparkListenerJobEnd(
+    jobId: Int,
+    time: Long,
+    jobResult: JobResult)
+  extends SparkListenerEvent
 
 @DeveloperApi
 case class SparkListenerEnvironmentUpdate(environmentDetails: Map[String, Seq[(String, String)]])
@@ -77,6 +90,14 @@ case class SparkListenerBlockManagerRemoved(time: Long, blockManagerId: BlockMan
 @DeveloperApi
 case class SparkListenerUnpersistRDD(rddId: Int) extends SparkListenerEvent
 
+@DeveloperApi
+case class SparkListenerExecutorAdded(time: Long, executorId: String, executorInfo: ExecutorInfo)
+  extends SparkListenerEvent
+
+@DeveloperApi
+case class SparkListenerExecutorRemoved(time: Long, executorId: String, reason: String)
+  extends SparkListenerEvent
+
 /**
  * Periodic updates from executors.
  * @param execId executor id
@@ -95,14 +116,12 @@ case class SparkListenerApplicationStart(appName: String, appId: Option[String],
 @DeveloperApi
 case class SparkListenerApplicationEnd(time: Long) extends SparkListenerEvent
 
-/** An event used in the listener to shutdown the listener daemon thread. */
-private[spark] case object SparkListenerShutdown extends SparkListenerEvent
-
 
 /**
  * :: DeveloperApi ::
  * Interface for listening to events from the Spark scheduler. Note that this is an internal
- * interface which might change in different Spark releases.
+ * interface which might change in different Spark releases. Java clients should extend
+ * {@link JavaSparkListener}
  */
 @DeveloperApi
 trait SparkListener {
@@ -176,6 +195,16 @@ trait SparkListener {
    * Called when the driver receives task metrics from an executor in a heartbeat.
    */
   def onExecutorMetricsUpdate(executorMetricsUpdate: SparkListenerExecutorMetricsUpdate) { }
+
+  /**
+   * Called when the driver registers a new executor.
+   */
+  def onExecutorAdded(executorAdded: SparkListenerExecutorAdded) { }
+
+  /**
+   * Called when the driver removes an executor.
+   */
+  def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved) { }
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
index e79ffd7a3587d..fe8a19a2c0cb9 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
@@ -17,74 +17,47 @@
 
 package org.apache.spark.scheduler
 
-import scala.collection.mutable
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.spark.Logging
-import org.apache.spark.util.Utils
+import org.apache.spark.util.ListenerBus
 
 /**
- * A SparkListenerEvent bus that relays events to its listeners
+ * A [[SparkListenerEvent]] bus that relays [[SparkListenerEvent]]s to its listeners
  */
-private[spark] trait SparkListenerBus extends Logging {
-
-  // SparkListeners attached to this event bus
-  protected val sparkListeners = new ArrayBuffer[SparkListener]
-    with mutable.SynchronizedBuffer[SparkListener]
-
-  def addListener(listener: SparkListener) {
-    sparkListeners += listener
-  }
+private[spark] trait SparkListenerBus extends ListenerBus[SparkListener, SparkListenerEvent] {
 
-  /**
-   * Post an event to all attached listeners.
-   * This does nothing if the event is SparkListenerShutdown.
-   */
-  def postToAll(event: SparkListenerEvent) {
+  override def onPostEvent(listener: SparkListener, event: SparkListenerEvent): Unit = {
     event match {
       case stageSubmitted: SparkListenerStageSubmitted =>
-        foreachListener(_.onStageSubmitted(stageSubmitted))
+        listener.onStageSubmitted(stageSubmitted)
       case stageCompleted: SparkListenerStageCompleted =>
-        foreachListener(_.onStageCompleted(stageCompleted))
+        listener.onStageCompleted(stageCompleted)
       case jobStart: SparkListenerJobStart =>
-        foreachListener(_.onJobStart(jobStart))
+        listener.onJobStart(jobStart)
       case jobEnd: SparkListenerJobEnd =>
-        foreachListener(_.onJobEnd(jobEnd))
+        listener.onJobEnd(jobEnd)
       case taskStart: SparkListenerTaskStart =>
-        foreachListener(_.onTaskStart(taskStart))
+        listener.onTaskStart(taskStart)
       case taskGettingResult: SparkListenerTaskGettingResult =>
-        foreachListener(_.onTaskGettingResult(taskGettingResult))
+        listener.onTaskGettingResult(taskGettingResult)
       case taskEnd: SparkListenerTaskEnd =>
-        foreachListener(_.onTaskEnd(taskEnd))
+        listener.onTaskEnd(taskEnd)
       case environmentUpdate: SparkListenerEnvironmentUpdate =>
-        foreachListener(_.onEnvironmentUpdate(environmentUpdate))
+        listener.onEnvironmentUpdate(environmentUpdate)
       case blockManagerAdded: SparkListenerBlockManagerAdded =>
-        foreachListener(_.onBlockManagerAdded(blockManagerAdded))
+        listener.onBlockManagerAdded(blockManagerAdded)
       case blockManagerRemoved: SparkListenerBlockManagerRemoved =>
-        foreachListener(_.onBlockManagerRemoved(blockManagerRemoved))
+        listener.onBlockManagerRemoved(blockManagerRemoved)
       case unpersistRDD: SparkListenerUnpersistRDD =>
-        foreachListener(_.onUnpersistRDD(unpersistRDD))
+        listener.onUnpersistRDD(unpersistRDD)
       case applicationStart: SparkListenerApplicationStart =>
-        foreachListener(_.onApplicationStart(applicationStart))
+        listener.onApplicationStart(applicationStart)
       case applicationEnd: SparkListenerApplicationEnd =>
-        foreachListener(_.onApplicationEnd(applicationEnd))
+        listener.onApplicationEnd(applicationEnd)
       case metricsUpdate: SparkListenerExecutorMetricsUpdate =>
-        foreachListener(_.onExecutorMetricsUpdate(metricsUpdate))
-      case SparkListenerShutdown =>
-    }
-  }
-
-  /**
-   * Apply the given function to all attached listeners, catching and logging any exception.
-   */
-  private def foreachListener(f: SparkListener => Unit): Unit = {
-    sparkListeners.foreach { listener =>
-      try {
-        f(listener)
-      } catch {
-        case e: Exception =>
-          logError(s"Listener ${Utils.getFormattedClassName(listener)} threw an exception", e)
-      }
+        listener.onExecutorMetricsUpdate(metricsUpdate)
+      case executorAdded: SparkListenerExecutorAdded =>
+        listener.onExecutorAdded(executorAdded)
+      case executorRemoved: SparkListenerExecutorRemoved =>
+        listener.onExecutorRemoved(executorRemoved)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Stage.scala b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
index 071568cdfb429..cc13f57a49b89 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
@@ -102,6 +102,11 @@ private[spark] class Stage(
     }
   }
 
+  /**
+   * Removes all shuffle outputs associated with this executor. Note that this will also remove
+   * outputs which are served by an external shuffle server (if one exists), as they are still
+   * registered with this execId.
+   */
   def removeOutputsOnExecutor(execId: String) {
     var becameUnavailable = false
     for (partition <- 0 until numPartitions) {
@@ -131,4 +136,9 @@ private[spark] class Stage(
   override def toString = "Stage " + id
 
   override def hashCode(): Int = id
+
+  override def equals(other: Any): Boolean = other match {
+    case stage: Stage => stage != null && stage.id == id
+    case _ => false
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 2552d03d18d06..847a4912eec13 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -44,10 +44,18 @@ import org.apache.spark.util.Utils
  */
 private[spark] abstract class Task[T](val stageId: Int, var partitionId: Int) extends Serializable {
 
-  final def run(attemptId: Long): T = {
-    context = new TaskContextImpl(stageId, partitionId, attemptId, false)
+  /**
+   * Called by Executor to run this task.
+   *
+   * @param taskAttemptId an identifier for this task attempt that is unique within a SparkContext.
+   * @param attemptNumber how many times this task has been attempted (0 for the first attempt)
+   * @return the result of the task
+   */
+  final def run(taskAttemptId: Long, attemptNumber: Int): T = {
+    context = new TaskContextImpl(stageId = stageId, partitionId = partitionId,
+      taskAttemptId = taskAttemptId, attemptNumber = attemptNumber, runningLocally = false)
     TaskContextHelper.setTaskContext(context)
-    context.taskMetrics.hostname = Utils.localHostName()
+    context.taskMetrics.setHostname(Utils.localHostName())
     taskThread = Thread.currentThread()
     if (_killed) {
       kill(interruptThread = false)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala
index 4c96b9e5fef60..1c7c81c488c3a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala
@@ -27,6 +27,7 @@ import org.apache.spark.util.SerializableBuffer
  */
 private[spark] class TaskDescription(
     val taskId: Long,
+    val attemptNumber: Int,
     val executorId: String,
     val name: String,
     val index: Int,    // Index within this task's TaskSet
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
index d49d8fb887007..1f114a0207f7b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
@@ -31,8 +31,8 @@ import org.apache.spark.util.Utils
 private[spark] sealed trait TaskResult[T]
 
 /** A reference to a DirectTaskResult that has been stored in the worker's BlockManager. */
-private[spark]
-case class IndirectTaskResult[T](blockId: BlockId) extends TaskResult[T] with Serializable
+private[spark] case class IndirectTaskResult[T](blockId: BlockId, size: Int)
+  extends TaskResult[T] with Serializable
 
 /** A TaskResult that contains the task's return value and accumulator updates. */
 private[spark]
@@ -42,7 +42,7 @@ class DirectTaskResult[T](var valueBytes: ByteBuffer, var accumUpdates: Map[Long
 
   def this() = this(null.asInstanceOf[ByteBuffer], null, null)
 
-  override def writeExternal(out: ObjectOutput) {
+  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
 
     out.writeInt(valueBytes.remaining);
     Utils.writeByteBuffer(valueBytes, out)
@@ -55,7 +55,7 @@ class DirectTaskResult[T](var valueBytes: ByteBuffer, var accumUpdates: Map[Long
     out.writeObject(metrics)
   }
 
-  override def readExternal(in: ObjectInput) {
+  override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
 
     val blen = in.readInt()
     val byteVal = new Array[Byte](blen)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index 3f345ceeaaf7a..774f3d8cdb275 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -19,6 +19,7 @@ package org.apache.spark.scheduler
 
 import java.nio.ByteBuffer
 
+import scala.language.existentials
 import scala.util.control.NonFatal
 
 import org.apache.spark._
@@ -47,9 +48,18 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
     getTaskResultExecutor.execute(new Runnable {
       override def run(): Unit = Utils.logUncaughtExceptions {
         try {
-          val result = serializer.get().deserialize[TaskResult[_]](serializedData) match {
-            case directResult: DirectTaskResult[_] => directResult
-            case IndirectTaskResult(blockId) =>
+          val (result, size) = serializer.get().deserialize[TaskResult[_]](serializedData) match {
+            case directResult: DirectTaskResult[_] =>
+              if (!taskSetManager.canFetchMoreResults(serializedData.limit())) {
+                return
+              }
+              (directResult, serializedData.limit())
+            case IndirectTaskResult(blockId, size) =>
+              if (!taskSetManager.canFetchMoreResults(size)) {
+                // dropped by executor if size is larger than maxResultSize
+                sparkEnv.blockManager.master.removeBlock(blockId)
+                return
+              }
               logDebug("Fetching indirect task result for TID %s".format(tid))
               scheduler.handleTaskGettingResult(taskSetManager, tid)
               val serializedTaskResult = sparkEnv.blockManager.getRemoteBytes(blockId)
@@ -64,9 +74,10 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
               val deserializedResult = serializer.get().deserialize[DirectTaskResult[_]](
                 serializedTaskResult.get)
               sparkEnv.blockManager.master.removeBlock(blockId)
-              deserializedResult
+              (deserializedResult, size)
           }
-          result.metrics.resultSize = serializedData.limit()
+
+          result.metrics.setResultSize(size)
           scheduler.handleSuccessfulTask(taskSetManager, tid, result)
         } catch {
           case cnf: ClassNotFoundException =>
@@ -93,7 +104,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
           }
         } catch {
           case cnd: ClassNotFoundException =>
-            // Log an error but keep going here -- the task failed, so not catastropic if we can't
+            // Log an error but keep going here -- the task failed, so not catastrophic if we can't
             // deserialize the reason.
             val loader = Utils.getContextOrSparkClassLoader
             logError(
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
index a129a434c9a1a..f095915352b17 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
@@ -23,7 +23,7 @@ import org.apache.spark.storage.BlockManagerId
 
 /**
  * Low-level task scheduler interface, currently implemented exclusively by TaskSchedulerImpl.
- * This interface allows plugging in different task schedulers. Each TaskScheduler schedulers tasks
+ * This interface allows plugging in different task schedulers. Each TaskScheduler schedules tasks
  * for a single SparkContext. These schedulers get sets of tasks submitted to them from the
  * DAGScheduler for each stage, and are responsible for sending the tasks to the cluster, running
  * them, retrying if there are failures, and mitigating stragglers. They return events to the
@@ -41,7 +41,7 @@ private[spark] trait TaskScheduler {
 
   // Invoked after system has successfully initialized (typically in spark context).
   // Yarn uses this to bootstrap allocation of resources based on preferred locations,
-  // wait for slave registerations, etc.
+  // wait for slave registrations, etc.
   def postStartHook() { }
 
   // Disconnect from the cluster.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 2b39c7fc872da..79f84e70df9d5 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -31,10 +31,10 @@ import scala.util.Random
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
+import org.apache.spark.scheduler.TaskLocality.TaskLocality
 import org.apache.spark.util.Utils
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.storage.BlockManagerId
-import akka.actor.Props
 
 /**
  * Schedules tasks for multiple types of clusters by acting through a SchedulerBackend.
@@ -168,7 +168,7 @@ private[spark] class TaskSchedulerImpl(
             if (!hasLaunchedTask) {
               logWarning("Initial job has not accepted any resources; " +
                 "check your cluster UI to ensure that workers are registered " +
-                "and have sufficient memory")
+                "and have sufficient resources")
             } else {
               this.cancel()
             }
@@ -210,6 +210,40 @@ private[spark] class TaskSchedulerImpl(
       .format(manager.taskSet.id, manager.parent.name))
   }
 
+  private def resourceOfferSingleTaskSet(
+      taskSet: TaskSetManager,
+      maxLocality: TaskLocality,
+      shuffledOffers: Seq[WorkerOffer],
+      availableCpus: Array[Int],
+      tasks: Seq[ArrayBuffer[TaskDescription]]) : Boolean = {
+    var launchedTask = false
+    for (i <- 0 until shuffledOffers.size) {
+      val execId = shuffledOffers(i).executorId
+      val host = shuffledOffers(i).host
+      if (availableCpus(i) >= CPUS_PER_TASK) {
+        try {
+          for (task <- taskSet.resourceOffer(execId, host, maxLocality)) {
+            tasks(i) += task
+            val tid = task.taskId
+            taskIdToTaskSetId(tid) = taskSet.taskSet.id
+            taskIdToExecutorId(tid) = execId
+            executorsByHost(host) += execId
+            availableCpus(i) -= CPUS_PER_TASK
+            assert(availableCpus(i) >= 0)
+            launchedTask = true
+          }
+        } catch {
+          case e: TaskNotSerializableException =>
+            logError(s"Resource offer failed, task set ${taskSet.name} was not serializable")
+            // Do not offer resources for this task, but don't throw an error to allow other
+            // task sets to be submitted.
+            return launchedTask
+        }
+      }
+    }
+    return launchedTask
+  }
+
   /**
    * Called by cluster manager to offer resources on slaves. We respond by asking our active task
    * sets for tasks in order of priority. We fill each node with tasks in a round-robin manner so
@@ -252,23 +286,8 @@ private[spark] class TaskSchedulerImpl(
     var launchedTask = false
     for (taskSet <- sortedTaskSets; maxLocality <- taskSet.myLocalityLevels) {
       do {
-        launchedTask = false
-        for (i <- 0 until shuffledOffers.size) {
-          val execId = shuffledOffers(i).executorId
-          val host = shuffledOffers(i).host
-          if (availableCpus(i) >= CPUS_PER_TASK) {
-            for (task <- taskSet.resourceOffer(execId, host, maxLocality)) {
-              tasks(i) += task
-              val tid = task.taskId
-              taskIdToTaskSetId(tid) = taskSet.taskSet.id
-              taskIdToExecutorId(tid) = execId
-              executorsByHost(host) += execId
-              availableCpus(i) -= CPUS_PER_TASK
-              assert(availableCpus(i) >= 0)
-              launchedTask = true
-            }
-          }
-        }
+        launchedTask = resourceOfferSingleTaskSet(
+            taskSet, maxLocality, shuffledOffers, availableCpus, tasks)
       } while (launchedTask)
     }
 
@@ -342,7 +361,7 @@ private[spark] class TaskSchedulerImpl(
     dagScheduler.executorHeartbeatReceived(execId, metricsWithStageIds, blockManagerId)
   }
 
-  def handleTaskGettingResult(taskSetManager: TaskSetManager, tid: Long) {
+  def handleTaskGettingResult(taskSetManager: TaskSetManager, tid: Long): Unit = synchronized {
     taskSetManager.handleTaskGettingResult(tid)
   }
 
@@ -395,9 +414,6 @@ private[spark] class TaskSchedulerImpl(
       taskResultGetter.stop()
     }
     starvationTimer.cancel()
-
-    // sleeping for an arbitrary 1 seconds to ensure that messages are sent out.
-    Thread.sleep(1000L)
   }
 
   override def defaultParallelism() = backend.defaultParallelism()
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index a6c23fc85a1b0..55024ecd55e61 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -18,18 +18,19 @@
 package org.apache.spark.scheduler
 
 import java.io.NotSerializableException
+import java.nio.ByteBuffer
 import java.util.Arrays
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashMap
 import scala.collection.mutable.HashSet
-import scala.math.max
-import scala.math.min
+import scala.math.{min, max}
+import scala.util.control.NonFatal
 
 import org.apache.spark._
-import org.apache.spark.TaskState.TaskState
 import org.apache.spark.executor.TaskMetrics
-import org.apache.spark.util.{Clock, SystemClock}
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.util.{Clock, SystemClock, Utils}
 
 /**
  * Schedules the tasks within a single TaskSet in the TaskSchedulerImpl. This class keeps track of
@@ -68,6 +69,9 @@ private[spark] class TaskSetManager(
   val SPECULATION_QUANTILE = conf.getDouble("spark.speculation.quantile", 0.75)
   val SPECULATION_MULTIPLIER = conf.getDouble("spark.speculation.multiplier", 1.5)
 
+  // Limit of bytes for total size of results (default is 1GB)
+  val maxResultSize = Utils.getMaxResultSize(conf)
+
   // Serializer for closures and tasks.
   val env = SparkEnv.get
   val ser = env.closureSerializer.newInstance()
@@ -89,6 +93,8 @@ private[spark] class TaskSetManager(
   var stageId = taskSet.stageId
   var name = "TaskSet_" + taskSet.stageId.toString
   var parent: Pool = null
+  var totalResultSize = 0L
+  var calculatedTasks = 0
 
   val runningTasksSet = new HashSet[Long]
   override def runningTasks = runningTasksSet.size
@@ -245,7 +251,7 @@ private[spark] class TaskSetManager(
    * This method also cleans up any tasks in the list that have already
    * been launched, since we want that to happen lazily.
    */
-  private def findTaskFromList(execId: String, list: ArrayBuffer[Int]): Option[Int] = {
+  private def dequeueTaskFromList(execId: String, list: ArrayBuffer[Int]): Option[Int] = {
     var indexOffset = list.size
     while (indexOffset > 0) {
       indexOffset -= 1
@@ -286,7 +292,7 @@ private[spark] class TaskSetManager(
    * an attempt running on this host, in case the host is slow. In addition, the task should meet
    * the given locality constraint.
    */
-  private def findSpeculativeTask(execId: String, host: String, locality: TaskLocality.Value)
+  private def dequeueSpeculativeTask(execId: String, host: String, locality: TaskLocality.Value)
     : Option[(Int, TaskLocality.Value)] =
   {
     speculatableTasks.retain(index => !successful(index)) // Remove finished tasks from set
@@ -362,22 +368,22 @@ private[spark] class TaskSetManager(
    *
    * @return An option containing (task index within the task set, locality, is speculative?)
    */
-  private def findTask(execId: String, host: String, maxLocality: TaskLocality.Value)
+  private def dequeueTask(execId: String, host: String, maxLocality: TaskLocality.Value)
     : Option[(Int, TaskLocality.Value, Boolean)] =
   {
-    for (index <- findTaskFromList(execId, getPendingTasksForExecutor(execId))) {
+    for (index <- dequeueTaskFromList(execId, getPendingTasksForExecutor(execId))) {
       return Some((index, TaskLocality.PROCESS_LOCAL, false))
     }
 
     if (TaskLocality.isAllowed(maxLocality, TaskLocality.NODE_LOCAL)) {
-      for (index <- findTaskFromList(execId, getPendingTasksForHost(host))) {
+      for (index <- dequeueTaskFromList(execId, getPendingTasksForHost(host))) {
         return Some((index, TaskLocality.NODE_LOCAL, false))
       }
     }
 
     if (TaskLocality.isAllowed(maxLocality, TaskLocality.NO_PREF)) {
       // Look for noPref tasks after NODE_LOCAL for minimize cross-rack traffic
-      for (index <- findTaskFromList(execId, pendingTasksWithNoPrefs)) {
+      for (index <- dequeueTaskFromList(execId, pendingTasksWithNoPrefs)) {
         return Some((index, TaskLocality.PROCESS_LOCAL, false))
       }
     }
@@ -385,20 +391,20 @@ private[spark] class TaskSetManager(
     if (TaskLocality.isAllowed(maxLocality, TaskLocality.RACK_LOCAL)) {
       for {
         rack <- sched.getRackForHost(host)
-        index <- findTaskFromList(execId, getPendingTasksForRack(rack))
+        index <- dequeueTaskFromList(execId, getPendingTasksForRack(rack))
       } {
         return Some((index, TaskLocality.RACK_LOCAL, false))
       }
     }
 
     if (TaskLocality.isAllowed(maxLocality, TaskLocality.ANY)) {
-      for (index <- findTaskFromList(execId, allPendingTasks)) {
+      for (index <- dequeueTaskFromList(execId, allPendingTasks)) {
         return Some((index, TaskLocality.ANY, false))
       }
     }
 
     // find a speculative task if all others tasks have been scheduled
-    findSpeculativeTask(execId, host, maxLocality).map {
+    dequeueSpeculativeTask(execId, host, maxLocality).map {
       case (taskIndex, allowedLocality) => (taskIndex, allowedLocality, true)}
   }
 
@@ -413,6 +419,7 @@ private[spark] class TaskSetManager(
    * @param host  the host Id of the offered resource
    * @param maxLocality the maximum locality we want to schedule the tasks at
    */
+  @throws[TaskNotSerializableException]
   def resourceOffer(
       execId: String,
       host: String,
@@ -432,7 +439,7 @@ private[spark] class TaskSetManager(
         }
       }
 
-      findTask(execId, host, allowedLocality) match {
+      dequeueTask(execId, host, allowedLocality) match {
         case Some((index, taskLocality, speculative)) => {
           // Found a task; do some bookkeeping and return a task description
           val task = tasks(index)
@@ -452,10 +459,17 @@ private[spark] class TaskSetManager(
           }
           // Serialize and return the task
           val startTime = clock.getTime()
-          // We rely on the DAGScheduler to catch non-serializable closures and RDDs, so in here
-          // we assume the task can be serialized without exceptions.
-          val serializedTask = Task.serializeWithDependencies(
-            task, sched.sc.addedFiles, sched.sc.addedJars, ser)
+          val serializedTask: ByteBuffer = try {
+            Task.serializeWithDependencies(task, sched.sc.addedFiles, sched.sc.addedJars, ser)
+          } catch {
+            // If the task cannot be serialized, then there's no point to re-attempt the task,
+            // as it will always fail. So just abort the whole task-set.
+            case NonFatal(e) =>
+              val msg = s"Failed to serialize task $taskId, not attempting to retry it."
+              logError(msg, e)
+              abort(s"$msg Exception during serialization: $e")
+              throw new TaskNotSerializableException(e)
+          }
           if (serializedTask.limit > TaskSetManager.TASK_SIZE_TO_WARN_KB * 1024 &&
               !emittedTaskSizeWarning) {
             emittedTaskSizeWarning = true
@@ -473,7 +487,8 @@ private[spark] class TaskSetManager(
               taskName, taskId, host, taskLocality, serializedTask.limit))
 
           sched.dagScheduler.taskStarted(task, info)
-          return Some(new TaskDescription(taskId, execId, taskName, index, serializedTask))
+          return Some(new TaskDescription(taskId = taskId, attemptNumber = attemptNum, execId,
+            taskName, index, serializedTask))
         }
         case _ =>
       }
@@ -491,13 +506,64 @@ private[spark] class TaskSetManager(
    * Get the level we can launch tasks according to delay scheduling, based on current wait time.
    */
   private def getAllowedLocalityLevel(curTime: Long): TaskLocality.TaskLocality = {
-    while (curTime - lastLaunchTime >= localityWaits(currentLocalityIndex) &&
-        currentLocalityIndex < myLocalityLevels.length - 1)
-    {
-      // Jump to the next locality level, and remove our waiting time for the current one since
-      // we don't want to count it again on the next one
-      lastLaunchTime += localityWaits(currentLocalityIndex)
-      currentLocalityIndex += 1
+    // Remove the scheduled or finished tasks lazily
+    def tasksNeedToBeScheduledFrom(pendingTaskIds: ArrayBuffer[Int]): Boolean = {
+      var indexOffset = pendingTaskIds.size
+      while (indexOffset > 0) {
+        indexOffset -= 1
+        val index = pendingTaskIds(indexOffset)
+        if (copiesRunning(index) == 0 && !successful(index)) {
+          return true
+        } else {
+          pendingTaskIds.remove(indexOffset)
+        }
+      }
+      false
+    }
+    // Walk through the list of tasks that can be scheduled at each location and returns true
+    // if there are any tasks that still need to be scheduled. Lazily cleans up tasks that have
+    // already been scheduled.
+    def moreTasksToRunIn(pendingTasks: HashMap[String, ArrayBuffer[Int]]): Boolean = {
+      val emptyKeys = new ArrayBuffer[String]
+      val hasTasks = pendingTasks.exists {
+        case (id: String, tasks: ArrayBuffer[Int]) =>
+          if (tasksNeedToBeScheduledFrom(tasks)) {
+            true
+          } else {
+            emptyKeys += id
+            false
+          }
+      }
+      // The key could be executorId, host or rackId
+      emptyKeys.foreach(id => pendingTasks.remove(id))
+      hasTasks
+    }
+
+    while (currentLocalityIndex < myLocalityLevels.length - 1) {
+      val moreTasks = myLocalityLevels(currentLocalityIndex) match {
+        case TaskLocality.PROCESS_LOCAL => moreTasksToRunIn(pendingTasksForExecutor)
+        case TaskLocality.NODE_LOCAL => moreTasksToRunIn(pendingTasksForHost)
+        case TaskLocality.NO_PREF => pendingTasksWithNoPrefs.nonEmpty
+        case TaskLocality.RACK_LOCAL => moreTasksToRunIn(pendingTasksForRack)
+      }
+      if (!moreTasks) {
+        // This is a performance optimization: if there are no more tasks that can
+        // be scheduled at a particular locality level, there is no point in waiting
+        // for the locality wait timeout (SPARK-4939).
+        lastLaunchTime = curTime
+        logDebug(s"No tasks for locality level ${myLocalityLevels(currentLocalityIndex)}, " +
+          s"so moving to locality level ${myLocalityLevels(currentLocalityIndex + 1)}")
+        currentLocalityIndex += 1
+      } else if (curTime - lastLaunchTime >= localityWaits(currentLocalityIndex)) {
+        // Jump to the next locality level, and reset lastLaunchTime so that the next locality
+        // wait timer doesn't immediately expire
+        lastLaunchTime += localityWaits(currentLocalityIndex)
+        currentLocalityIndex += 1
+        logDebug(s"Moving to ${myLocalityLevels(currentLocalityIndex)} after waiting for " +
+          s"${localityWaits(currentLocalityIndex)}ms")
+      } else {
+        return myLocalityLevels(currentLocalityIndex)
+      }
     }
     myLocalityLevels(currentLocalityIndex)
   }
@@ -515,12 +581,33 @@ private[spark] class TaskSetManager(
     index
   }
 
+  /**
+   * Marks the task as getting result and notifies the DAG Scheduler
+   */
   def handleTaskGettingResult(tid: Long) = {
     val info = taskInfos(tid)
     info.markGettingResult()
     sched.dagScheduler.taskGettingResult(info)
   }
 
+  /**
+   * Check whether has enough quota to fetch the result with `size` bytes
+   */
+  def canFetchMoreResults(size: Long): Boolean = sched.synchronized {
+    totalResultSize += size
+    calculatedTasks += 1
+    if (maxResultSize > 0 && totalResultSize > maxResultSize) {
+      val msg = s"Total size of serialized results of ${calculatedTasks} tasks " +
+        s"(${Utils.bytesToString(totalResultSize)}) is bigger than spark.driver.maxResultSize " +
+        s"(${Utils.bytesToString(maxResultSize)})"
+      logError(msg)
+      abort(msg)
+      false
+    } else {
+      true
+    }
+  }
+
   /**
    * Marks the task as successful and notifies the DAGScheduler that a task has ended.
    */
@@ -635,7 +722,7 @@ private[spark] class TaskSetManager(
     maybeFinishTaskSet()
   }
 
-  def abort(message: String) {
+  def abort(message: String): Unit = sched.synchronized {
     // TODO: Kill running tasks if we were not terminated due to a Mesos error
     sched.dagScheduler.taskSetFailed(taskSet, message)
     isZombie = true
@@ -679,7 +766,7 @@ private[spark] class TaskSetManager(
 
     // Re-enqueue pending tasks for this host based on the status of the cluster. Note
     // that it's okay if we add a task to the same queue twice (if it had multiple preferred
-    // locations), because findTaskFromList will skip already-running tasks.
+    // locations), because dequeueTaskFromList will skip already-running tasks.
     for (index <- getPendingTasksForExecutor(execId)) {
       addPendingTask(index, readding=true)
     }
@@ -687,10 +774,11 @@ private[spark] class TaskSetManager(
       addPendingTask(index, readding=true)
     }
 
-    // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage.
+    // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage,
+    // and we are not using an external shuffle server which could serve the shuffle outputs.
     // The reason is the next stage wouldn't be able to fetch the data from this dead executor
     // so we would need to rerun these tasks on other executors.
-    if (tasks(0).isInstanceOf[ShuffleMapTask]) {
+    if (tasks(0).isInstanceOf[ShuffleMapTask] && !env.blockManager.externalShuffleServiceEnabled) {
       for ((tid, info) <- taskInfos if info.executorId == execId) {
         val index = taskInfos(tid).index
         if (successful(index)) {
@@ -706,7 +794,7 @@ private[spark] class TaskSetManager(
     }
     // Also re-enqueue any tasks that were running on the node
     for ((tid, info) <- taskInfos if info.running && info.executorId == execId) {
-      handleFailedTask(tid, TaskState.FAILED, ExecutorLostFailure)
+      handleFailedTask(tid, TaskState.FAILED, ExecutorLostFailure(execId))
     }
     // recalculate valid locality levels and waits when executor is lost
     recomputeLocality()
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
index fb8160abc59db..1da6fe976da5b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -66,7 +66,19 @@ private[spark] object CoarseGrainedClusterMessages {
 
   case class RemoveExecutor(executorId: String, reason: String) extends CoarseGrainedClusterMessage
 
-  case class AddWebUIFilter(filterName:String, filterParams: Map[String, String], proxyBase :String)
+  // Exchanged between the driver and the AM in Yarn client mode
+  case class AddWebUIFilter(filterName:String, filterParams: Map[String, String], proxyBase: String)
     extends CoarseGrainedClusterMessage
 
+  // Messages exchanged between the driver and the cluster manager for executor allocation
+  // In Yarn mode, these are exchanged between the driver and the AM
+
+  case object RegisterClusterManager extends CoarseGrainedClusterMessage
+
+  // Request executors by specifying the new total number of executors desired
+  // This includes executors already pending or running
+  case class RequestExecutors(requestedTotal: Int) extends CoarseGrainedClusterMessage
+
+  case class KillExecutors(executorIds: Seq[String]) extends CoarseGrainedClusterMessage
+
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 59aed6b72fe42..103a5c053c289 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -27,11 +27,10 @@ import akka.actor._
 import akka.pattern.ask
 import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}
 
-import org.apache.spark.{SparkEnv, Logging, SparkException, TaskState}
-import org.apache.spark.scheduler.{SchedulerBackend, SlaveLost, TaskDescription, TaskSchedulerImpl, WorkerOffer}
+import org.apache.spark.{ExecutorAllocationClient, Logging, SparkEnv, SparkException, TaskState}
+import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
 import org.apache.spark.util.{ActorLogReceive, SerializableBuffer, AkkaUtils, Utils}
-import org.apache.spark.ui.JettyUtils
 
 /**
  * A scheduler backend that waits for coarse grained executors to connect to it through Akka.
@@ -42,11 +41,12 @@ import org.apache.spark.ui.JettyUtils
  * (spark.deploy.*).
  */
 private[spark]
-class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: ActorSystem)
-  extends SchedulerBackend with Logging
+class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val actorSystem: ActorSystem)
+  extends ExecutorAllocationClient with SchedulerBackend with Logging
 {
   // Use an atomic variable to track total number of cores in the cluster for simplicity and speed
   var totalCoreCount = new AtomicInteger(0)
+  // Total number of executors that are currently registered
   var totalRegisteredExecutors = new AtomicInteger(0)
   val conf = scheduler.sc.conf
   private val timeout = AkkaUtils.askTimeout(conf)
@@ -61,10 +61,19 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: A
     conf.getInt("spark.scheduler.maxRegisteredResourcesWaitingTime", 30000)
   val createTime = System.currentTimeMillis()
 
+  private val executorDataMap = new HashMap[String, ExecutorData]
+
+  // Number of executors requested from the cluster manager that have not registered yet
+  private var numPendingExecutors = 0
+
+  private val listenerBus = scheduler.sc.listenerBus
+
+  // Executors we have requested the cluster manager to kill that have not died yet
+  private val executorsPendingToRemove = new HashSet[String]
+
   class DriverActor(sparkProperties: Seq[(String, String)]) extends Actor with ActorLogReceive {
     override protected def log = CoarseGrainedSchedulerBackend.this.log
     private val addressToExecutorId = new HashMap[Address, String]
-    private val executorDataMap = new HashMap[String, ExecutorData]
 
     override def preStart() {
       // Listen for remote client disconnection events, since they don't go through Akka's watch()
@@ -84,12 +93,23 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: A
         } else {
           logInfo("Registered executor: " + sender + " with ID " + executorId)
           sender ! RegisteredExecutor
-          executorDataMap.put(executorId, new ExecutorData(sender, sender.path.address,
-            Utils.parseHostPort(hostPort)._1, cores, cores))
 
           addressToExecutorId(sender.path.address) = executorId
           totalCoreCount.addAndGet(cores)
           totalRegisteredExecutors.addAndGet(1)
+          val (host, _) = Utils.parseHostPort(hostPort)
+          val data = new ExecutorData(sender, sender.path.address, host, cores, cores)
+          // This must be synchronized because variables mutated
+          // in this block are read when requesting executors
+          CoarseGrainedSchedulerBackend.this.synchronized {
+            executorDataMap.put(executorId, data)
+            if (numPendingExecutors > 0) {
+              numPendingExecutors -= 1
+              logDebug(s"Decremented number of pending executors ($numPendingExecutors left)")
+            }
+          }
+          listenerBus.post(
+            SparkListenerExecutorAdded(System.currentTimeMillis(), executorId, data))
           makeOffers()
         }
 
@@ -111,7 +131,13 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: A
         makeOffers()
 
       case KillTask(taskId, executorId, interruptThread) =>
-        executorDataMap(executorId).executorActor ! KillTask(taskId, executorId, interruptThread)
+        executorDataMap.get(executorId) match {
+          case Some(executorInfo) =>
+            executorInfo.executorActor ! KillTask(taskId, executorId, interruptThread)
+          case None =>
+            // Ignoring the task kill since the executor is not registered.
+            logWarning(s"Attempted to kill task $taskId for unknown executor $executorId.")
+        }
 
       case StopDriver =>
         sender ! true
@@ -128,10 +154,6 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: A
         removeExecutor(executorId, reason)
         sender ! true
 
-      case AddWebUIFilter(filterName, filterParams, proxyBase) =>
-        addWebUIFilter(filterName, filterParams, proxyBase)
-        sender ! true
-
       case DisassociatedEvent(_, address, _) =>
         addressToExecutorId.get(address).foreach(removeExecutor(_,
           "remote Akka client disassociated"))
@@ -183,13 +205,21 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: A
     }
 
     // Remove a disconnected slave from the cluster
-    def removeExecutor(executorId: String, reason: String) {
+    def removeExecutor(executorId: String, reason: String): Unit = {
       executorDataMap.get(executorId) match {
         case Some(executorInfo) =>
-          executorDataMap -= executorId
+          // This must be synchronized because variables mutated
+          // in this block are read when requesting executors
+          CoarseGrainedSchedulerBackend.this.synchronized {
+            executorDataMap -= executorId
+            executorsPendingToRemove -= executorId
+          }
           totalCoreCount.addAndGet(-executorInfo.totalCores)
+          totalRegisteredExecutors.addAndGet(-1)
           scheduler.executorLost(executorId, SlaveLost(reason))
-        case None => logError(s"Asked to remove non existant executor $executorId")
+          listenerBus.post(
+            SparkListenerExecutorRemoved(System.currentTimeMillis(), executorId, reason))
+        case None => logError(s"Asked to remove non-existent executor $executorId")
       }
     }
   }
@@ -274,21 +304,62 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: A
     false
   }
 
-  // Add filters to the SparkUI
-  def addWebUIFilter(filterName: String, filterParams: Map[String, String], proxyBase: String) {
-    if (proxyBase != null && proxyBase.nonEmpty) {
-      System.setProperty("spark.ui.proxyBase", proxyBase)
-    }
+  /**
+   * Return the number of executors currently registered with this backend.
+   */
+  def numExistingExecutors: Int = executorDataMap.size
+
+  /**
+   * Request an additional number of executors from the cluster manager.
+   * Return whether the request is acknowledged.
+   */
+  final override def requestExecutors(numAdditionalExecutors: Int): Boolean = synchronized {
+    logInfo(s"Requesting $numAdditionalExecutors additional executor(s) from the cluster manager")
+    logDebug(s"Number of pending executors is now $numPendingExecutors")
+    numPendingExecutors += numAdditionalExecutors
+    // Account for executors pending to be added or removed
+    val newTotal = numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size
+    doRequestTotalExecutors(newTotal)
+  }
 
-    val hasFilter = (filterName != null && filterName.nonEmpty &&
-      filterParams != null && filterParams.nonEmpty)
-    if (hasFilter) {
-      logInfo(s"Add WebUI Filter. $filterName, $filterParams, $proxyBase")
-      conf.set("spark.ui.filters", filterName)
-      filterParams.foreach { case (k, v) => conf.set(s"spark.$filterName.param.$k", v) }
-      scheduler.sc.ui.foreach { ui => JettyUtils.addFilters(ui.getHandlers, conf) }
+  /**
+   * Request executors from the cluster manager by specifying the total number desired,
+   * including existing pending and running executors.
+   *
+   * The semantics here guarantee that we do not over-allocate executors for this application,
+   * since a later request overrides the value of any prior request. The alternative interface
+   * of requesting a delta of executors risks double counting new executors when there are
+   * insufficient resources to satisfy the first request. We make the assumption here that the
+   * cluster manager will eventually fulfill all requests when resources free up.
+   *
+   * Return whether the request is acknowledged.
+   */
+  protected def doRequestTotalExecutors(requestedTotal: Int): Boolean = false
+
+  /**
+   * Request that the cluster manager kill the specified executors.
+   * Return whether the kill request is acknowledged.
+   */
+  final override def killExecutors(executorIds: Seq[String]): Boolean = synchronized {
+    logInfo(s"Requesting to kill executor(s) ${executorIds.mkString(", ")}")
+    val filteredExecutorIds = new ArrayBuffer[String]
+    executorIds.foreach { id =>
+      if (executorDataMap.contains(id)) {
+        filteredExecutorIds += id
+      } else {
+        logWarning(s"Executor to kill $id does not exist!")
+      }
     }
+    executorsPendingToRemove ++= filteredExecutorIds
+    doKillExecutors(filteredExecutorIds)
   }
+
+  /**
+   * Kill the given list of executors through the cluster manager.
+   * Return whether the kill request is acknowledged.
+   */
+  protected def doKillExecutors(executorIds: Seq[String]): Boolean = false
+
 }
 
 private[spark] object CoarseGrainedSchedulerBackend {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
index b71bd5783d6df..eb52ddfb1eab1 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
@@ -31,7 +31,7 @@ import akka.actor.{Address, ActorRef}
 private[cluster] class ExecutorData(
    val executorActor: ActorRef,
    val executorAddress: Address,
-   val executorHost: String ,
+   override val executorHost: String,
    var freeCores: Int,
-   val totalCores: Int
-)
+   override val totalCores: Int
+) extends ExecutorInfo(executorHost, totalCores)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala
new file mode 100644
index 0000000000000..b4738e64c9391
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.scheduler.cluster
+
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * :: DeveloperApi ::
+ * Stores information about an executor to pass from the scheduler to SparkListeners.
+ */
+@DeveloperApi
+class ExecutorInfo(
+   val executorHost: String,
+   val totalCores: Int
+) {
+
+  def canEqual(other: Any): Boolean = other.isInstanceOf[ExecutorInfo]
+
+  override def equals(other: Any): Boolean = other match {
+    case that: ExecutorInfo =>
+      (that canEqual this) &&
+        executorHost == that.executorHost &&
+        totalCores == that.totalCores
+    case _ => false
+  }
+
+  override def hashCode(): Int = {
+    val state = Seq(executorHost, totalCores)
+    state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala
index ee10aa061f4e9..06786a59524e7 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala
@@ -22,6 +22,7 @@ import org.apache.hadoop.fs.{Path, FileSystem}
 import org.apache.spark.{Logging, SparkContext, SparkEnv}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.scheduler.TaskSchedulerImpl
+import org.apache.spark.util.AkkaUtils
 
 private[spark] class SimrSchedulerBackend(
     scheduler: TaskSchedulerImpl,
@@ -38,7 +39,8 @@ private[spark] class SimrSchedulerBackend(
   override def start() {
     super.start()
 
-    val driverUrl = "akka.tcp://%s@%s:%s/user/%s".format(
+    val driverUrl = AkkaUtils.address(
+      AkkaUtils.protocol(actorSystem),
       SparkEnv.driverActorSystemName,
       sc.conf.get("spark.driver.host"),
       sc.conf.get("spark.driver.port"),
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index 8c7de75600b5f..d2e1680a5fd1b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -21,7 +21,7 @@ import org.apache.spark.{Logging, SparkConf, SparkContext, SparkEnv}
 import org.apache.spark.deploy.{ApplicationDescription, Command}
 import org.apache.spark.deploy.client.{AppClient, AppClientListener}
 import org.apache.spark.scheduler.{ExecutorExited, ExecutorLossReason, SlaveLost, TaskSchedulerImpl}
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{AkkaUtils, Utils}
 
 private[spark] class SparkDeploySchedulerBackend(
     scheduler: TaskSchedulerImpl,
@@ -46,7 +46,8 @@ private[spark] class SparkDeploySchedulerBackend(
     super.start()
 
     // The endpoint for executors to talk to us
-    val driverUrl = "akka.tcp://%s@%s:%s/user/%s".format(
+    val driverUrl = AkkaUtils.address(
+      AkkaUtils.protocol(actorSystem),
       SparkEnv.driverActorSystemName,
       conf.get("spark.driver.host"),
       conf.get("spark.driver.port"),
@@ -55,19 +56,26 @@ private[spark] class SparkDeploySchedulerBackend(
       "{{WORKER_URL}}")
     val extraJavaOpts = sc.conf.getOption("spark.executor.extraJavaOptions")
       .map(Utils.splitCommandString).getOrElse(Seq.empty)
-    val classPathEntries = sc.conf.getOption("spark.executor.extraClassPath").toSeq.flatMap { cp =>
-      cp.split(java.io.File.pathSeparator)
-    }
-    val libraryPathEntries =
-      sc.conf.getOption("spark.executor.extraLibraryPath").toSeq.flatMap { cp =>
-        cp.split(java.io.File.pathSeparator)
+    val classPathEntries = sc.conf.getOption("spark.executor.extraClassPath")
+      .map(_.split(java.io.File.pathSeparator).toSeq).getOrElse(Nil)
+    val libraryPathEntries = sc.conf.getOption("spark.executor.extraLibraryPath")
+      .map(_.split(java.io.File.pathSeparator).toSeq).getOrElse(Nil)
+
+    // When testing, expose the parent class path to the child. This is processed by
+    // compute-classpath.{cmd,sh} and makes all needed jars available to child processes
+    // when the assembly is built with the "*-provided" profiles enabled.
+    val testingClassPath =
+      if (sys.props.contains("spark.testing")) {
+        sys.props("java.class.path").split(java.io.File.pathSeparator).toSeq
+      } else {
+        Nil
       }
 
     // Start executors with a few necessary configs for registering with the scheduler
     val sparkJavaOpts = Utils.sparkJavaOpts(conf, SparkConf.isExecutorStartupConf)
     val javaOpts = sparkJavaOpts ++ extraJavaOpts
     val command = Command("org.apache.spark.executor.CoarseGrainedExecutorBackend",
-      args, sc.executorEnvs, classPathEntries, libraryPathEntries, javaOpts)
+      args, sc.executorEnvs, classPathEntries ++ testingClassPath, libraryPathEntries, javaOpts)
     val appUIAddress = sc.ui.map(_.appUIAddress).getOrElse("")
     val appDesc = new ApplicationDescription(sc.appName, maxCores, sc.executorMemory, command,
       appUIAddress, sc.eventLogDir)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
new file mode 100644
index 0000000000000..f14aaeea0a25c
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+import scala.concurrent.{Future, ExecutionContext}
+
+import akka.actor.{Actor, ActorRef, Props}
+import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}
+
+import org.apache.spark.SparkContext
+import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
+import org.apache.spark.scheduler.TaskSchedulerImpl
+import org.apache.spark.ui.JettyUtils
+import org.apache.spark.util.{AkkaUtils, Utils}
+
+import scala.util.control.NonFatal
+
+/**
+ * Abstract Yarn scheduler backend that contains common logic
+ * between the client and cluster Yarn scheduler backends.
+ */
+private[spark] abstract class YarnSchedulerBackend(
+    scheduler: TaskSchedulerImpl,
+    sc: SparkContext)
+  extends CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem) {
+
+  if (conf.getOption("spark.scheduler.minRegisteredResourcesRatio").isEmpty) {
+    minRegisteredRatio = 0.8
+  }
+
+  protected var totalExpectedExecutors = 0
+
+  private val yarnSchedulerActor: ActorRef =
+    actorSystem.actorOf(
+      Props(new YarnSchedulerActor),
+      name = YarnSchedulerBackend.ACTOR_NAME)
+
+  private implicit val askTimeout = AkkaUtils.askTimeout(sc.conf)
+
+  /**
+   * Request executors from the ApplicationMaster by specifying the total number desired.
+   * This includes executors already pending or running.
+   */
+  override def doRequestTotalExecutors(requestedTotal: Int): Boolean = {
+    AkkaUtils.askWithReply[Boolean](
+      RequestExecutors(requestedTotal), yarnSchedulerActor, askTimeout)
+  }
+
+  /**
+   * Request that the ApplicationMaster kill the specified executors.
+   */
+  override def doKillExecutors(executorIds: Seq[String]): Boolean = {
+    AkkaUtils.askWithReply[Boolean](
+      KillExecutors(executorIds), yarnSchedulerActor, askTimeout)
+  }
+
+  override def sufficientResourcesRegistered(): Boolean = {
+    totalRegisteredExecutors.get() >= totalExpectedExecutors * minRegisteredRatio
+  }
+
+  /**
+   * Add filters to the SparkUI.
+   */
+  private def addWebUIFilter(
+      filterName: String,
+      filterParams: Map[String, String],
+      proxyBase: String): Unit = {
+    if (proxyBase != null && proxyBase.nonEmpty) {
+      System.setProperty("spark.ui.proxyBase", proxyBase)
+    }
+
+    val hasFilter =
+      filterName != null && filterName.nonEmpty &&
+      filterParams != null && filterParams.nonEmpty
+    if (hasFilter) {
+      logInfo(s"Add WebUI Filter. $filterName, $filterParams, $proxyBase")
+      conf.set("spark.ui.filters", filterName)
+      filterParams.foreach { case (k, v) => conf.set(s"spark.$filterName.param.$k", v) }
+      scheduler.sc.ui.foreach { ui => JettyUtils.addFilters(ui.getHandlers, conf) }
+    }
+  }
+
+  /**
+   * An actor that communicates with the ApplicationMaster.
+   */
+  private class YarnSchedulerActor extends Actor {
+    private var amActor: Option[ActorRef] = None
+
+    implicit val askAmActorExecutor = ExecutionContext.fromExecutor(
+      Utils.newDaemonCachedThreadPool("yarn-scheduler-ask-am-executor"))
+
+    override def preStart(): Unit = {
+      // Listen for disassociation events
+      context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
+    }
+
+    override def receive = {
+      case RegisterClusterManager =>
+        logInfo(s"ApplicationMaster registered as $sender")
+        amActor = Some(sender)
+
+      case r: RequestExecutors =>
+        amActor match {
+          case Some(actor) =>
+            val driverActor = sender
+            Future {
+              driverActor ! AkkaUtils.askWithReply[Boolean](r, actor, askTimeout)
+            } onFailure {
+              case NonFatal(e) => logError(s"Sending $r to AM was unsuccessful", e)
+            }
+          case None =>
+            logWarning("Attempted to request executors before the AM has registered!")
+            sender ! false
+        }
+
+      case k: KillExecutors =>
+        amActor match {
+          case Some(actor) =>
+            val driverActor = sender
+            Future {
+              driverActor ! AkkaUtils.askWithReply[Boolean](k, actor, askTimeout)
+            } onFailure {
+              case NonFatal(e) => logError(s"Sending $k to AM was unsuccessful", e)
+            }
+          case None =>
+            logWarning("Attempted to kill executors before the AM has registered!")
+            sender ! false
+        }
+
+      case AddWebUIFilter(filterName, filterParams, proxyBase) =>
+        addWebUIFilter(filterName, filterParams, proxyBase)
+        sender ! true
+
+      case d: DisassociatedEvent =>
+        if (amActor.isDefined && sender == amActor.get) {
+          logWarning(s"ApplicationMaster has disassociated: $d")
+        }
+    }
+  }
+}
+
+private[spark] object YarnSchedulerBackend {
+  val ACTOR_NAME = "YarnScheduler"
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala
index d7f88de4b40aa..0d1c2a916ca7f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala
@@ -31,6 +31,7 @@ import org.apache.mesos.Protos.{TaskInfo => MesosTaskInfo, TaskState => MesosTas
 import org.apache.spark.{Logging, SparkContext, SparkEnv, SparkException}
 import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
+import org.apache.spark.util.{Utils, AkkaUtils}
 
 /**
  * A SchedulerBackend that runs tasks on Mesos, but uses "coarse-grained" tasks, where it holds
@@ -92,7 +93,7 @@ private[spark] class CoarseMesosSchedulerBackend(
         setDaemon(true)
         override def run() {
           val scheduler = CoarseMesosSchedulerBackend.this
-          val fwInfo = FrameworkInfo.newBuilder().setUser("").setName(sc.appName).build()
+          val fwInfo = FrameworkInfo.newBuilder().setUser(sc.sparkUser).setName(sc.appName).build()
           driver = new MesosSchedulerDriver(scheduler, fwInfo, master)
           try { {
             val ret = driver.run()
@@ -120,16 +121,18 @@ private[spark] class CoarseMesosSchedulerBackend(
       environment.addVariables(
         Environment.Variable.newBuilder().setName("SPARK_CLASSPATH").setValue(cp).build())
     }
-    val extraJavaOpts = conf.getOption("spark.executor.extraJavaOptions")
+    val extraJavaOpts = conf.get("spark.executor.extraJavaOptions", "")
 
-    val libraryPathOption = "spark.executor.extraLibraryPath"
-    val extraLibraryPath = conf.getOption(libraryPathOption).map(p => s"-Djava.library.path=$p")
-    val extraOpts = Seq(extraJavaOpts, extraLibraryPath).flatten.mkString(" ")
+    // Set the environment variable through a command prefix
+    // to append to the existing value of the variable
+    val prefixEnv = conf.getOption("spark.executor.extraLibraryPath").map { p =>
+      Utils.libraryPathEnvPrefix(Seq(p))
+    }.getOrElse("")
 
     environment.addVariables(
       Environment.Variable.newBuilder()
         .setName("SPARK_EXECUTOR_OPTS")
-        .setValue(extraOpts)
+        .setValue(extraJavaOpts)
         .build())
 
     sc.executorEnvs.foreach { case (key, value) =>
@@ -140,7 +143,8 @@ private[spark] class CoarseMesosSchedulerBackend(
     }
     val command = CommandInfo.newBuilder()
       .setEnvironment(environment)
-    val driverUrl = "akka.tcp://%s@%s:%s/user/%s".format(
+    val driverUrl = AkkaUtils.address(
+      AkkaUtils.protocol(sc.env.actorSystem),
       SparkEnv.driverActorSystemName,
       conf.get("spark.driver.host"),
       conf.get("spark.driver.port"),
@@ -150,16 +154,17 @@ private[spark] class CoarseMesosSchedulerBackend(
     if (uri == null) {
       val runScript = new File(executorSparkHome, "./bin/spark-class").getCanonicalPath
       command.setValue(
-        "\"%s\" org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %d %s".format(
-          runScript, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores, appId))
+        "%s \"%s\" org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %d %s".format(
+          prefixEnv, runScript, driverUrl, offer.getSlaveId.getValue,
+          offer.getHostname, numCores, appId))
     } else {
       // Grab everything to the first '.'. We'll use that and '*' to
       // glob the directory "correctly".
       val basename = uri.split('/').last.split('.').head
       command.setValue(
-        ("cd %s*; " +
+        ("cd %s*; %s " +
           "./bin/spark-class org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %d %s")
-          .format(basename, driverUrl, offer.getSlaveId.getValue,
+          .format(basename, prefixEnv, driverUrl, offer.getSlaveId.getValue,
             offer.getHostname, numCores, appId))
       command.addUris(CommandInfo.URI.newBuilder().setValue(uri))
     }
@@ -238,8 +243,7 @@ private[spark] class CoarseMesosSchedulerBackend(
     for (r <- res if r.getName == name) {
       return r.getScalar.getValue
     }
-    // If we reached here, no resource with the required name was present
-    throw new IllegalArgumentException("No resource called " + name + " in " + res)
+    0
   }
 
   /** Build a Mesos resource protobuf object */
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
index e0f2fd622f54c..c3c546be6da15 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
@@ -22,14 +22,17 @@ import java.util.{ArrayList => JArrayList, List => JList}
 import java.util.Collections
 
 import scala.collection.JavaConversions._
-import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
+import scala.collection.mutable.{HashMap, HashSet}
 
 import org.apache.mesos.protobuf.ByteString
 import org.apache.mesos.{Scheduler => MScheduler}
 import org.apache.mesos._
-import org.apache.mesos.Protos.{TaskInfo => MesosTaskInfo, TaskState => MesosTaskState, _}
+import org.apache.mesos.Protos.{TaskInfo => MesosTaskInfo, TaskState => MesosTaskState,
+  ExecutorInfo => MesosExecutorInfo, _}
 
+import org.apache.spark.executor.MesosExecutorBackend
 import org.apache.spark.{Logging, SparkContext, SparkException, TaskState}
+import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.scheduler._
 import org.apache.spark.util.Utils
 
@@ -62,6 +65,9 @@ private[spark] class MesosSchedulerBackend(
 
   var classLoader: ClassLoader = null
 
+  // The listener bus to publish executor added/removed events.
+  val listenerBus = sc.listenerBus
+
   @volatile var appId: String = _
 
   override def start() {
@@ -72,7 +78,7 @@ private[spark] class MesosSchedulerBackend(
         setDaemon(true)
         override def run() {
           val scheduler = MesosSchedulerBackend.this
-          val fwInfo = FrameworkInfo.newBuilder().setUser("").setName(sc.appName).build()
+          val fwInfo = FrameworkInfo.newBuilder().setUser(sc.sparkUser).setName(sc.appName).build()
           driver = new MesosSchedulerDriver(scheduler, fwInfo, master)
           try {
             val ret = driver.run()
@@ -87,7 +93,7 @@ private[spark] class MesosSchedulerBackend(
     }
   }
 
-  def createExecutorInfo(execId: String): ExecutorInfo = {
+  def createExecutorInfo(execId: String): MesosExecutorInfo = {
     val executorSparkHome = sc.conf.getOption("spark.mesos.executor.home")
       .orElse(sc.getSparkHome()) // Fall back to driver Spark home for backward compatibility
       .getOrElse {
@@ -98,15 +104,16 @@ private[spark] class MesosSchedulerBackend(
       environment.addVariables(
         Environment.Variable.newBuilder().setName("SPARK_CLASSPATH").setValue(cp).build())
     }
-    val extraJavaOpts = sc.conf.getOption("spark.executor.extraJavaOptions")
-    val extraLibraryPath = sc.conf.getOption("spark.executor.extraLibraryPath").map { lp =>
-      s"-Djava.library.path=$lp"
-    }
-    val extraOpts = Seq(extraJavaOpts, extraLibraryPath).flatten.mkString(" ")
+    val extraJavaOpts = sc.conf.getOption("spark.executor.extraJavaOptions").getOrElse("")
+
+    val prefixEnv = sc.conf.getOption("spark.executor.extraLibraryPath").map { p =>
+      Utils.libraryPathEnvPrefix(Seq(p))
+    }.getOrElse("")
+
     environment.addVariables(
       Environment.Variable.newBuilder()
         .setName("SPARK_EXECUTOR_OPTS")
-        .setValue(extraOpts)
+        .setValue(extraJavaOpts)
         .build())
     sc.executorEnvs.foreach { case (key, value) =>
       environment.addVariables(Environment.Variable.newBuilder()
@@ -117,13 +124,15 @@ private[spark] class MesosSchedulerBackend(
     val command = CommandInfo.newBuilder()
       .setEnvironment(environment)
     val uri = sc.conf.get("spark.executor.uri", null)
+    val executorBackendName = classOf[MesosExecutorBackend].getName
     if (uri == null) {
-      command.setValue(new File(executorSparkHome, "/sbin/spark-executor").getCanonicalPath)
+      val executorPath = new File(executorSparkHome, "/bin/spark-class").getCanonicalPath
+      command.setValue(s"$prefixEnv $executorPath $executorBackendName")
     } else {
       // Grab everything to the first '.'. We'll use that and '*' to
       // glob the directory "correctly".
       val basename = uri.split('/').last.split('.').head
-      command.setValue("cd %s*; ./sbin/spark-executor".format(basename))
+      command.setValue(s"cd ${basename}*; $prefixEnv ./bin/spark-class $executorBackendName")
       command.addUris(CommandInfo.URI.newBuilder().setValue(uri))
     }
     val cpus = Resource.newBuilder()
@@ -139,7 +148,7 @@ private[spark] class MesosSchedulerBackend(
         Value.Scalar.newBuilder()
           .setValue(MemoryUtils.calculateTotalMemory(sc)).build())
       .build()
-    ExecutorInfo.newBuilder()
+    MesosExecutorInfo.newBuilder()
       .setExecutorId(ExecutorID.newBuilder().setValue(execId).build())
       .setCommand(command)
       .setData(ByteString.copyFrom(createExecArg()))
@@ -164,29 +173,16 @@ private[spark] class MesosSchedulerBackend(
     execArgs
   }
 
-  private def setClassLoader(): ClassLoader = {
-    val oldClassLoader = Thread.currentThread.getContextClassLoader
-    Thread.currentThread.setContextClassLoader(classLoader)
-    oldClassLoader
-  }
-
-  private def restoreClassLoader(oldClassLoader: ClassLoader) {
-    Thread.currentThread.setContextClassLoader(oldClassLoader)
-  }
-
   override def offerRescinded(d: SchedulerDriver, o: OfferID) {}
 
   override def registered(d: SchedulerDriver, frameworkId: FrameworkID, masterInfo: MasterInfo) {
-    val oldClassLoader = setClassLoader()
-    try {
+    inClassLoader() {
       appId = frameworkId.getValue
       logInfo("Registered as framework ID " + appId)
       registeredLock.synchronized {
         isRegistered = true
         registeredLock.notifyAll()
       }
-    } finally {
-      restoreClassLoader(oldClassLoader)
     }
   }
 
@@ -198,6 +194,16 @@ private[spark] class MesosSchedulerBackend(
     }
   }
 
+  private def inClassLoader()(fun: => Unit) = {
+    val oldClassLoader = Thread.currentThread.getContextClassLoader
+    Thread.currentThread.setContextClassLoader(classLoader)
+    try {
+      fun
+    } finally {
+      Thread.currentThread.setContextClassLoader(oldClassLoader)
+    }
+  }
+
   override def disconnected(d: SchedulerDriver) {}
 
   override def reregistered(d: SchedulerDriver, masterInfo: MasterInfo) {}
@@ -208,66 +214,75 @@ private[spark] class MesosSchedulerBackend(
    * tasks are balanced across the cluster.
    */
   override def resourceOffers(d: SchedulerDriver, offers: JList[Offer]) {
-    val oldClassLoader = setClassLoader()
-    try {
-      synchronized {
-        // Build a big list of the offerable workers, and remember their indices so that we can
-        // figure out which Offer to reply to for each worker
-        val offerableWorkers = new ArrayBuffer[WorkerOffer]
-        val offerableIndices = new HashMap[String, Int]
-
-        def sufficientOffer(o: Offer) = {
-          val mem = getResource(o.getResourcesList, "mem")
-          val cpus = getResource(o.getResourcesList, "cpus")
-          val slaveId = o.getSlaveId.getValue
-          (mem >= MemoryUtils.calculateTotalMemory(sc) &&
-            // need at least 1 for executor, 1 for task
-            cpus >= 2 * scheduler.CPUS_PER_TASK) ||
-            (slaveIdsWithExecutors.contains(slaveId) &&
-              cpus >= scheduler.CPUS_PER_TASK)
-        }
+    inClassLoader() {
+      // Fail-fast on offers we know will be rejected
+      val (usableOffers, unUsableOffers) = offers.partition { o =>
+        val mem = getResource(o.getResourcesList, "mem")
+        val cpus = getResource(o.getResourcesList, "cpus")
+        val slaveId = o.getSlaveId.getValue
+        // TODO(pwendell): Should below be 1 + scheduler.CPUS_PER_TASK?
+        (mem >= MemoryUtils.calculateTotalMemory(sc) &&
+          // need at least 1 for executor, 1 for task
+          cpus >= 2 * scheduler.CPUS_PER_TASK) ||
+          (slaveIdsWithExecutors.contains(slaveId) &&
+            cpus >= scheduler.CPUS_PER_TASK)
+      }
 
-        for ((offer, index) <- offers.zipWithIndex if sufficientOffer(offer)) {
-          val slaveId = offer.getSlaveId.getValue
-          offerableIndices.put(slaveId, index)
-          val cpus = if (slaveIdsWithExecutors.contains(slaveId)) {
-            getResource(offer.getResourcesList, "cpus").toInt
-          } else {
-            // If the executor doesn't exist yet, subtract CPU for executor
-            getResource(offer.getResourcesList, "cpus").toInt -
-              scheduler.CPUS_PER_TASK
-          }
-          offerableWorkers += new WorkerOffer(
-            offer.getSlaveId.getValue,
-            offer.getHostname,
-            cpus)
+      val workerOffers = usableOffers.map { o =>
+        val cpus = if (slaveIdsWithExecutors.contains(o.getSlaveId.getValue)) {
+          getResource(o.getResourcesList, "cpus").toInt
+        } else {
+          // If the executor doesn't exist yet, subtract CPU for executor
+          // TODO(pwendell): Should below just subtract "1"?
+          getResource(o.getResourcesList, "cpus").toInt -
+            scheduler.CPUS_PER_TASK
         }
+        new WorkerOffer(
+          o.getSlaveId.getValue,
+          o.getHostname,
+          cpus)
+      }
+
+      val slaveIdToOffer = usableOffers.map(o => o.getSlaveId.getValue -> o).toMap
+      val slaveIdToWorkerOffer = workerOffers.map(o => o.executorId -> o).toMap
+
+      val mesosTasks = new HashMap[String, JArrayList[MesosTaskInfo]]
 
-        // Call into the TaskSchedulerImpl
-        val taskLists = scheduler.resourceOffers(offerableWorkers)
-
-        // Build a list of Mesos tasks for each slave
-        val mesosTasks = offers.map(o => new JArrayList[MesosTaskInfo]())
-        for ((taskList, index) <- taskLists.zipWithIndex) {
-          if (!taskList.isEmpty) {
-            for (taskDesc <- taskList) {
-              val slaveId = taskDesc.executorId
-              val offerNum = offerableIndices(slaveId)
-              slaveIdsWithExecutors += slaveId
-              taskIdToSlaveId(taskDesc.taskId) = slaveId
-              mesosTasks(offerNum).add(createMesosTask(taskDesc, slaveId))
-            }
+      val slavesIdsOfAcceptedOffers = HashSet[String]()
+
+      // Call into the TaskSchedulerImpl
+      val acceptedOffers = scheduler.resourceOffers(workerOffers).filter(!_.isEmpty)
+      acceptedOffers
+        .foreach { offer =>
+          offer.foreach { taskDesc =>
+            val slaveId = taskDesc.executorId
+            slaveIdsWithExecutors += slaveId
+            slavesIdsOfAcceptedOffers += slaveId
+            taskIdToSlaveId(taskDesc.taskId) = slaveId
+            mesosTasks.getOrElseUpdate(slaveId, new JArrayList[MesosTaskInfo])
+              .add(createMesosTask(taskDesc, slaveId))
           }
         }
 
-        // Reply to the offers
-        val filters = Filters.newBuilder().setRefuseSeconds(1).build() // TODO: lower timeout?
-        for (i <- 0 until offers.size) {
-          d.launchTasks(Collections.singleton(offers(i).getId), mesosTasks(i), filters)
-        }
+      // Reply to the offers
+      val filters = Filters.newBuilder().setRefuseSeconds(1).build() // TODO: lower timeout?
+
+      mesosTasks.foreach { case (slaveId, tasks) =>
+        slaveIdToWorkerOffer.get(slaveId).foreach(o =>
+          listenerBus.post(SparkListenerExecutorAdded(System.currentTimeMillis(), slaveId,
+            new ExecutorInfo(o.host, o.cores)))
+        )
+        d.launchTasks(Collections.singleton(slaveIdToOffer(slaveId).getId), tasks, filters)
       }
-    } finally {
-      restoreClassLoader(oldClassLoader)
+
+      // Decline offers that weren't used
+      // NOTE: This logic assumes that we only get a single offer for each host in a given batch
+      for (o <- usableOffers if !slavesIdsOfAcceptedOffers.contains(o.getSlaveId.getValue)) {
+        d.declineOffer(o.getId)
+      }
+
+      // Decline offers we ruled out immediately
+      unUsableOffers.foreach(o => d.declineOffer(o.getId))
     }
   }
 
@@ -276,8 +291,7 @@ private[spark] class MesosSchedulerBackend(
     for (r <- res if r.getName == name) {
       return r.getScalar.getValue
     }
-    // If we reached here, no resource with the required name was present
-    throw new IllegalArgumentException("No resource called " + name + " in " + res)
+    0
   }
 
   /** Turn a Spark TaskDescription into a Mesos task */
@@ -294,7 +308,7 @@ private[spark] class MesosSchedulerBackend(
       .setExecutor(createExecutorInfo(slaveId))
       .setName(task.name)
       .addResources(cpuResource)
-      .setData(ByteString.copyFrom(task.serializedTask))
+      .setData(MesosTaskLaunchData(task.serializedTask, task.attemptNumber).toByteString)
       .build()
   }
 
@@ -307,32 +321,26 @@ private[spark] class MesosSchedulerBackend(
   }
 
   override def statusUpdate(d: SchedulerDriver, status: TaskStatus) {
-    val oldClassLoader = setClassLoader()
-    try {
+    inClassLoader() {
       val tid = status.getTaskId.getValue.toLong
       val state = TaskState.fromMesos(status.getState)
       synchronized {
         if (status.getState == MesosTaskState.TASK_LOST && taskIdToSlaveId.contains(tid)) {
           // We lost the executor on this slave, so remember that it's gone
-          slaveIdsWithExecutors -= taskIdToSlaveId(tid)
+          removeExecutor(taskIdToSlaveId(tid), "Lost executor")
         }
         if (isFinished(status.getState)) {
           taskIdToSlaveId.remove(tid)
         }
       }
       scheduler.statusUpdate(tid, state, status.getData.asReadOnlyByteBuffer)
-    } finally {
-      restoreClassLoader(oldClassLoader)
     }
   }
 
   override def error(d: SchedulerDriver, message: String) {
-    val oldClassLoader = setClassLoader()
-    try {
+    inClassLoader() {
       logError("Mesos error: " + message)
       scheduler.error(message)
-    } finally {
-      restoreClassLoader(oldClassLoader)
     }
   }
 
@@ -348,16 +356,21 @@ private[spark] class MesosSchedulerBackend(
 
   override def frameworkMessage(d: SchedulerDriver, e: ExecutorID, s: SlaveID, b: Array[Byte]) {}
 
+  /**
+   * Remove executor associated with slaveId in a thread safe manner.
+   */
+  private def removeExecutor(slaveId: String, reason: String) = {
+    synchronized {
+      listenerBus.post(SparkListenerExecutorRemoved(System.currentTimeMillis(), slaveId, reason))
+      slaveIdsWithExecutors -= slaveId
+    }
+  }
+
   private def recordSlaveLost(d: SchedulerDriver, slaveId: SlaveID, reason: ExecutorLossReason) {
-    val oldClassLoader = setClassLoader()
-    try {
+    inClassLoader() {
       logInfo("Mesos slave lost: " + slaveId.getValue)
-      synchronized {
-        slaveIdsWithExecutors -= slaveId.getValue
-      }
+      removeExecutor(slaveId.getValue, reason.toString)
       scheduler.executorLost(slaveId.getValue, reason)
-    } finally {
-      restoreClassLoader(oldClassLoader)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosTaskLaunchData.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosTaskLaunchData.scala
new file mode 100644
index 0000000000000..5e7e6567a3e06
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosTaskLaunchData.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster.mesos
+
+import java.nio.ByteBuffer
+
+import org.apache.mesos.protobuf.ByteString
+
+import org.apache.spark.Logging
+
+/**
+ * Wrapper for serializing the data sent when launching Mesos tasks.
+ */
+private[spark] case class MesosTaskLaunchData(
+  serializedTask: ByteBuffer,
+  attemptNumber: Int) extends Logging {
+
+  def toByteString: ByteString = {
+    val dataBuffer = ByteBuffer.allocate(4 + serializedTask.limit)
+    dataBuffer.putInt(attemptNumber)
+    dataBuffer.put(serializedTask)
+    dataBuffer.rewind
+    logDebug(s"ByteBuffer size: [${dataBuffer.remaining}]")
+    ByteString.copyFrom(dataBuffer)
+  }
+}
+
+private[spark] object MesosTaskLaunchData extends Logging {
+  def fromByteString(byteString: ByteString): MesosTaskLaunchData = {
+    val byteBuffer = byteString.asReadOnlyByteBuffer()
+    logDebug(s"ByteBuffer size: [${byteBuffer.remaining}]")
+    val attemptNumber = byteBuffer.getInt // updates the position by 4 bytes
+    val serializedTask = byteBuffer.slice() // subsequence starting at the current position
+    MesosTaskLaunchData(serializedTask, attemptNumber)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala
index 58b78f041cd85..4676b828d3d89 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala
@@ -19,9 +19,11 @@ package org.apache.spark.scheduler.local
 
 import java.nio.ByteBuffer
 
+import scala.concurrent.duration._
+
 import akka.actor.{Actor, ActorRef, Props}
 
-import org.apache.spark.{Logging, SparkEnv, TaskState}
+import org.apache.spark.{Logging, SparkContext, SparkEnv, TaskState}
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.executor.{Executor, ExecutorBackend}
 import org.apache.spark.scheduler.{SchedulerBackend, TaskSchedulerImpl, WorkerOffer}
@@ -41,17 +43,20 @@ private case class StopExecutor()
  * and the TaskSchedulerImpl.
  */
 private[spark] class LocalActor(
-  scheduler: TaskSchedulerImpl,
-  executorBackend: LocalBackend,
-  private val totalCores: Int) extends Actor with ActorLogReceive with Logging {
+    scheduler: TaskSchedulerImpl,
+    executorBackend: LocalBackend,
+    private val totalCores: Int)
+  extends Actor with ActorLogReceive with Logging {
+
+  import context.dispatcher   // to use Akka's scheduler.scheduleOnce()
 
   private var freeCores = totalCores
 
-  private val localExecutorId = "localhost"
+  private val localExecutorId = SparkContext.DRIVER_IDENTIFIER
   private val localExecutorHostname = "localhost"
 
-  val executor = new Executor(
-    localExecutorId, localExecutorHostname, scheduler.conf.getAll, isLocal = true)
+  private val executor = new Executor(
+    localExecutorId, localExecutorHostname, SparkEnv.get, isLocal = true)
 
   override def receiveWithLogging = {
     case ReviveOffers =>
@@ -73,9 +78,15 @@ private[spark] class LocalActor(
 
   def reviveOffers() {
     val offers = Seq(new WorkerOffer(localExecutorId, localExecutorHostname, freeCores))
-    for (task <- scheduler.resourceOffers(offers).flatten) {
+    val tasks = scheduler.resourceOffers(offers).flatten
+    for (task <- tasks) {
       freeCores -= scheduler.CPUS_PER_TASK
-      executor.launchTask(executorBackend, task.taskId, task.name, task.serializedTask)
+      executor.launchTask(executorBackend, taskId = task.taskId, attemptNumber = task.attemptNumber,
+        task.name, task.serializedTask)
+    }
+    if (tasks.isEmpty && scheduler.activeTaskSets.nonEmpty) {
+      // Try to reviveOffer after 1 second, because scheduler may wait for locality timeout
+      context.system.scheduler.scheduleOnce(1000 millis, self, ReviveOffers)
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
index 554a33ce7f1a6..1baa0e009f3ae 100644
--- a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
@@ -27,7 +27,8 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.util.ByteBufferInputStream
 import org.apache.spark.util.Utils
 
-private[spark] class JavaSerializationStream(out: OutputStream, counterReset: Int)
+private[spark] class JavaSerializationStream(
+    out: OutputStream, counterReset: Int, extraDebugInfo: Boolean)
   extends SerializationStream {
   private val objOut = new ObjectOutputStream(out)
   private var counter = 0
@@ -39,7 +40,12 @@ private[spark] class JavaSerializationStream(out: OutputStream, counterReset: In
    * the stream 'resets' object class descriptions have to be re-written)
    */
   def writeObject[T: ClassTag](t: T): SerializationStream = {
-    objOut.writeObject(t)
+    try {
+      objOut.writeObject(t)
+    } catch {
+      case e: NotSerializableException if extraDebugInfo =>
+        throw SerializationDebugger.improveException(t, e)
+    }
     counter += 1
     if (counterReset > 0 && counter >= counterReset) {
       objOut.reset()
@@ -64,7 +70,8 @@ extends DeserializationStream {
 }
 
 
-private[spark] class JavaSerializerInstance(counterReset: Int, defaultClassLoader: ClassLoader)
+private[spark] class JavaSerializerInstance(
+    counterReset: Int, extraDebugInfo: Boolean, defaultClassLoader: ClassLoader)
   extends SerializerInstance {
 
   override def serialize[T: ClassTag](t: T): ByteBuffer = {
@@ -88,11 +95,11 @@ private[spark] class JavaSerializerInstance(counterReset: Int, defaultClassLoade
   }
 
   override def serializeStream(s: OutputStream): SerializationStream = {
-    new JavaSerializationStream(s, counterReset)
+    new JavaSerializationStream(s, counterReset, extraDebugInfo)
   }
 
   override def deserializeStream(s: InputStream): DeserializationStream = {
-    new JavaDeserializationStream(s, Utils.getContextOrSparkClassLoader)
+    new JavaDeserializationStream(s, defaultClassLoader)
   }
 
   def deserializeStream(s: InputStream, loader: ClassLoader): DeserializationStream = {
@@ -111,17 +118,20 @@ private[spark] class JavaSerializerInstance(counterReset: Int, defaultClassLoade
 @DeveloperApi
 class JavaSerializer(conf: SparkConf) extends Serializer with Externalizable {
   private var counterReset = conf.getInt("spark.serializer.objectStreamReset", 100)
+  private var extraDebugInfo = conf.getBoolean("spark.serializer.extraDebugInfo", true)
 
   override def newInstance(): SerializerInstance = {
     val classLoader = defaultClassLoader.getOrElse(Thread.currentThread.getContextClassLoader)
-    new JavaSerializerInstance(counterReset, classLoader)
+    new JavaSerializerInstance(counterReset, extraDebugInfo, classLoader)
   }
 
-  override def writeExternal(out: ObjectOutput) {
+  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
     out.writeInt(counterReset)
+    out.writeBoolean(extraDebugInfo)
   }
 
-  override def readExternal(in: ObjectInput) {
+  override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
     counterReset = in.readInt()
+    extraDebugInfo = in.readBoolean()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index 621a951c27d07..d56e23ce4478a 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -26,9 +26,10 @@ import com.esotericsoftware.kryo.serializers.{JavaSerializer => KryoJavaSerializ
 import com.twitter.chill.{AllScalaRegistrar, EmptyScalaKryoInstantiator}
 
 import org.apache.spark._
+import org.apache.spark.api.python.PythonBroadcast
 import org.apache.spark.broadcast.HttpBroadcast
 import org.apache.spark.network.nio.{PutBlock, GotBlock, GetBlock}
-import org.apache.spark.scheduler.MapStatus
+import org.apache.spark.scheduler.{CompressedMapStatus, HighlyCompressedMapStatus}
 import org.apache.spark.storage._
 import org.apache.spark.util.BoundedPriorityQueue
 import org.apache.spark.util.collection.CompactBuffer
@@ -90,6 +91,7 @@ class KryoSerializer(conf: SparkConf)
     // Allow sending SerializableWritable
     kryo.register(classOf[SerializableWritable[_]], new KryoJavaSerializer())
     kryo.register(classOf[HttpBroadcast[_]], new KryoJavaSerializer())
+    kryo.register(classOf[PythonBroadcast], new KryoJavaSerializer())
 
     try {
       // Use the default classloader when calling the user registrator.
@@ -205,7 +207,8 @@ private[serializer] object KryoSerializer {
     classOf[PutBlock],
     classOf[GotBlock],
     classOf[GetBlock],
-    classOf[MapStatus],
+    classOf[CompressedMapStatus],
+    classOf[HighlyCompressedMapStatus],
     classOf[CompactBuffer[_]],
     classOf[BlockManagerId],
     classOf[Array[Byte]],
diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
new file mode 100644
index 0000000000000..cecb992579655
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
@@ -0,0 +1,307 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.serializer
+
+import java.io.{NotSerializableException, ObjectOutput, ObjectStreamClass, ObjectStreamField}
+import java.lang.reflect.{Field, Method}
+import java.security.AccessController
+
+import scala.annotation.tailrec
+import scala.collection.mutable
+
+import org.apache.spark.Logging
+
+private[serializer] object SerializationDebugger extends Logging {
+
+  /**
+   * Improve the given NotSerializableException with the serialization path leading from the given
+   * object to the problematic object. This is turned off automatically if
+   * `sun.io.serialization.extendedDebugInfo` flag is turned on for the JVM.
+   */
+  def improveException(obj: Any, e: NotSerializableException): NotSerializableException = {
+    if (enableDebugging && reflect != null) {
+      new NotSerializableException(
+        e.getMessage + "\nSerialization stack:\n" + find(obj).map("\t- " + _).mkString("\n"))
+    } else {
+      e
+    }
+  }
+
+  /**
+   * Find the path leading to a not serializable object. This method is modeled after OpenJDK's
+   * serialization mechanism, and handles the following cases:
+   * - primitives
+   * - arrays of primitives
+   * - arrays of non-primitive objects
+   * - Serializable objects
+   * - Externalizable objects
+   * - writeReplace
+   *
+   * It does not yet handle writeObject override, but that shouldn't be too hard to do either.
+   */
+  def find(obj: Any): List[String] = {
+    new SerializationDebugger().visit(obj, List.empty)
+  }
+
+  private[serializer] var enableDebugging: Boolean = {
+    !AccessController.doPrivileged(new sun.security.action.GetBooleanAction(
+      "sun.io.serialization.extendedDebugInfo")).booleanValue()
+  }
+
+  private class SerializationDebugger {
+
+    /** A set to track the list of objects we have visited, to avoid cycles in the graph. */
+    private val visited = new mutable.HashSet[Any]
+
+    /**
+     * Visit the object and its fields and stop when we find an object that is not serializable.
+     * Return the path as a list. If everything can be serialized, return an empty list.
+     */
+    def visit(o: Any, stack: List[String]): List[String] = {
+      if (o == null) {
+        List.empty
+      } else if (visited.contains(o)) {
+        List.empty
+      } else {
+        visited += o
+        o match {
+          // Primitive value, string, and primitive arrays are always serializable
+          case _ if o.getClass.isPrimitive => List.empty
+          case _: String => List.empty
+          case _ if o.getClass.isArray && o.getClass.getComponentType.isPrimitive => List.empty
+
+          // Traverse non primitive array.
+          case a: Array[_] if o.getClass.isArray && !o.getClass.getComponentType.isPrimitive =>
+            val elem = s"array (class ${a.getClass.getName}, size ${a.length})"
+            visitArray(o.asInstanceOf[Array[_]], elem :: stack)
+
+          case e: java.io.Externalizable =>
+            val elem = s"externalizable object (class ${e.getClass.getName}, $e)"
+            visitExternalizable(e, elem :: stack)
+
+          case s: Object with java.io.Serializable =>
+            val elem = s"object (class ${s.getClass.getName}, $s)"
+            visitSerializable(s, elem :: stack)
+
+          case _ =>
+            // Found an object that is not serializable!
+            s"object not serializable (class: ${o.getClass.getName}, value: $o)" :: stack
+        }
+      }
+    }
+
+    private def visitArray(o: Array[_], stack: List[String]): List[String] = {
+      var i = 0
+      while (i < o.length) {
+        val childStack = visit(o(i), s"element of array (index: $i)" :: stack)
+        if (childStack.nonEmpty) {
+          return childStack
+        }
+        i += 1
+      }
+      return List.empty
+    }
+
+    private def visitExternalizable(o: java.io.Externalizable, stack: List[String]): List[String] =
+    {
+      val fieldList = new ListObjectOutput
+      o.writeExternal(fieldList)
+      val childObjects = fieldList.outputArray
+      var i = 0
+      while (i < childObjects.length) {
+        val childStack = visit(childObjects(i), "writeExternal data" :: stack)
+        if (childStack.nonEmpty) {
+          return childStack
+        }
+        i += 1
+      }
+      return List.empty
+    }
+
+    private def visitSerializable(o: Object, stack: List[String]): List[String] = {
+      // An object contains multiple slots in serialization.
+      // Get the slots and visit fields in all of them.
+      val (finalObj, desc) = findObjectAndDescriptor(o)
+      val slotDescs = desc.getSlotDescs
+      var i = 0
+      while (i < slotDescs.length) {
+        val slotDesc = slotDescs(i)
+        if (slotDesc.hasWriteObjectMethod) {
+          // TODO: Handle classes that specify writeObject method.
+        } else {
+          val fields: Array[ObjectStreamField] = slotDesc.getFields
+          val objFieldValues: Array[Object] = new Array[Object](slotDesc.getNumObjFields)
+          val numPrims = fields.length - objFieldValues.length
+          desc.getObjFieldValues(finalObj, objFieldValues)
+
+          var j = 0
+          while (j < objFieldValues.length) {
+            val fieldDesc = fields(numPrims + j)
+            val elem = s"field (class: ${slotDesc.getName}" +
+              s", name: ${fieldDesc.getName}" +
+              s", type: ${fieldDesc.getType})"
+            val childStack = visit(objFieldValues(j), elem :: stack)
+            if (childStack.nonEmpty) {
+              return childStack
+            }
+            j += 1
+          }
+
+        }
+        i += 1
+      }
+      return List.empty
+    }
+  }
+
+  /**
+   * Find the object to serialize and the associated [[ObjectStreamClass]]. This method handles
+   * writeReplace in Serializable. It starts with the object itself, and keeps calling the
+   * writeReplace method until there is no more
+   */
+  @tailrec
+  private def findObjectAndDescriptor(o: Object): (Object, ObjectStreamClass) = {
+    val cl = o.getClass
+    val desc = ObjectStreamClass.lookupAny(cl)
+    if (!desc.hasWriteReplaceMethod) {
+      (o, desc)
+    } else {
+      // write place
+      findObjectAndDescriptor(desc.invokeWriteReplace(o))
+    }
+  }
+
+  /**
+   * A dummy [[ObjectOutput]] that simply saves the list of objects written by a writeExternal
+   * call, and returns them through `outputArray`.
+   */
+  private class ListObjectOutput extends ObjectOutput {
+    private val output = new mutable.ArrayBuffer[Any]
+    def outputArray: Array[Any] = output.toArray
+    override def writeObject(o: Any): Unit = output += o
+    override def flush(): Unit = {}
+    override def write(i: Int): Unit = {}
+    override def write(bytes: Array[Byte]): Unit = {}
+    override def write(bytes: Array[Byte], i: Int, i1: Int): Unit = {}
+    override def close(): Unit = {}
+    override def writeFloat(v: Float): Unit = {}
+    override def writeChars(s: String): Unit = {}
+    override def writeDouble(v: Double): Unit = {}
+    override def writeUTF(s: String): Unit = {}
+    override def writeShort(i: Int): Unit = {}
+    override def writeInt(i: Int): Unit = {}
+    override def writeBoolean(b: Boolean): Unit = {}
+    override def writeBytes(s: String): Unit = {}
+    override def writeChar(i: Int): Unit = {}
+    override def writeLong(l: Long): Unit = {}
+    override def writeByte(i: Int): Unit = {}
+  }
+
+  /** An implicit class that allows us to call private methods of ObjectStreamClass. */
+  implicit class ObjectStreamClassMethods(val desc: ObjectStreamClass) extends AnyVal {
+    def getSlotDescs: Array[ObjectStreamClass] = {
+      reflect.GetClassDataLayout.invoke(desc).asInstanceOf[Array[Object]].map {
+        classDataSlot => reflect.DescField.get(classDataSlot).asInstanceOf[ObjectStreamClass]
+      }
+    }
+
+    def hasWriteObjectMethod: Boolean = {
+      reflect.HasWriteObjectMethod.invoke(desc).asInstanceOf[Boolean]
+    }
+
+    def hasWriteReplaceMethod: Boolean = {
+      reflect.HasWriteReplaceMethod.invoke(desc).asInstanceOf[Boolean]
+    }
+
+    def invokeWriteReplace(obj: Object): Object = {
+      reflect.InvokeWriteReplace.invoke(desc, obj)
+    }
+
+    def getNumObjFields: Int = {
+      reflect.GetNumObjFields.invoke(desc).asInstanceOf[Int]
+    }
+
+    def getObjFieldValues(obj: Object, out: Array[Object]): Unit = {
+      reflect.GetObjFieldValues.invoke(desc, obj, out)
+    }
+  }
+
+  /**
+   * Object to hold all the reflection objects. If we run on a JVM that we cannot understand,
+   * this field will be null and this the debug helper should be disabled.
+   */
+  private val reflect: ObjectStreamClassReflection = try {
+    new ObjectStreamClassReflection
+  } catch {
+    case e: Exception =>
+      logWarning("Cannot find private methods using reflection", e)
+      null
+  }
+
+  private class ObjectStreamClassReflection {
+    /** ObjectStreamClass.getClassDataLayout */
+    val GetClassDataLayout: Method = {
+      val f = classOf[ObjectStreamClass].getDeclaredMethod("getClassDataLayout")
+      f.setAccessible(true)
+      f
+    }
+
+    /** ObjectStreamClass.hasWriteObjectMethod */
+    val HasWriteObjectMethod: Method = {
+      val f = classOf[ObjectStreamClass].getDeclaredMethod("hasWriteObjectMethod")
+      f.setAccessible(true)
+      f
+    }
+
+    /** ObjectStreamClass.hasWriteReplaceMethod */
+    val HasWriteReplaceMethod: Method = {
+      val f = classOf[ObjectStreamClass].getDeclaredMethod("hasWriteReplaceMethod")
+      f.setAccessible(true)
+      f
+    }
+
+    /** ObjectStreamClass.invokeWriteReplace */
+    val InvokeWriteReplace: Method = {
+      val f = classOf[ObjectStreamClass].getDeclaredMethod("invokeWriteReplace", classOf[Object])
+      f.setAccessible(true)
+      f
+    }
+
+    /** ObjectStreamClass.getNumObjFields */
+    val GetNumObjFields: Method = {
+      val f = classOf[ObjectStreamClass].getDeclaredMethod("getNumObjFields")
+      f.setAccessible(true)
+      f
+    }
+
+    /** ObjectStreamClass.getObjFieldValues */
+    val GetObjFieldValues: Method = {
+      val f = classOf[ObjectStreamClass].getDeclaredMethod(
+        "getObjFieldValues", classOf[Object], classOf[Array[Object]])
+      f.setAccessible(true)
+      f
+    }
+
+    /** ObjectStreamClass$ClassDataSlot.desc field */
+    val DescField: Field = {
+      val f = Class.forName("java.io.ObjectStreamClass$ClassDataSlot").getDeclaredField("desc")
+      f.setAccessible(true)
+      f
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
index a9144cdd97b8c..ca6e971d227fb 100644
--- a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
@@ -17,14 +17,14 @@
 
 package org.apache.spark.serializer
 
-import java.io.{ByteArrayOutputStream, EOFException, InputStream, OutputStream}
+import java.io._
 import java.nio.ByteBuffer
 
 import scala.reflect.ClassTag
 
-import org.apache.spark.SparkEnv
+import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.util.{ByteBufferInputStream, NextIterator}
+import org.apache.spark.util.{Utils, ByteBufferInputStream, NextIterator}
 
 /**
  * :: DeveloperApi ::
diff --git a/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala b/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala
index 71c08e9d5a8c3..be184464e0ae9 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala
@@ -19,6 +19,7 @@ package org.apache.spark.shuffle
 
 import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.{FetchFailed, TaskEndReason}
+import org.apache.spark.util.Utils
 
 /**
  * Failed to fetch a shuffle block. The executor catches this exception and propagates it
@@ -30,13 +31,22 @@ private[spark] class FetchFailedException(
     bmAddress: BlockManagerId,
     shuffleId: Int,
     mapId: Int,
-    reduceId: Int)
-  extends Exception {
+    reduceId: Int,
+    message: String,
+    cause: Throwable = null)
+  extends Exception(message, cause) {
 
-  override def getMessage: String =
-    "Fetch failed: %s %d %d %d".format(bmAddress, shuffleId, mapId, reduceId)
+  def this(
+      bmAddress: BlockManagerId,
+      shuffleId: Int,
+      mapId: Int,
+      reduceId: Int,
+      cause: Throwable) {
+    this(bmAddress, shuffleId, mapId, reduceId, cause.getMessage, cause)
+  }
 
-  def toTaskEndReason: TaskEndReason = FetchFailed(bmAddress, shuffleId, mapId, reduceId)
+  def toTaskEndReason: TaskEndReason = FetchFailed(bmAddress, shuffleId, mapId, reduceId,
+    Utils.exceptionString(this))
 }
 
 /**
@@ -46,7 +56,4 @@ private[spark] class MetadataFetchFailedException(
     shuffleId: Int,
     reduceId: Int,
     message: String)
-  extends FetchFailedException(null, shuffleId, -1, reduceId) {
-
-  override def getMessage: String = message
-}
+  extends FetchFailedException(null, shuffleId, -1, reduceId, message)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
index 439981d232349..7de2f9cbb2866 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
@@ -24,9 +24,10 @@ import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.JavaConversions._
 
-import org.apache.spark.{SparkEnv, SparkConf, Logging}
+import org.apache.spark.{Logging, SparkConf, SparkEnv}
 import org.apache.spark.executor.ShuffleWriteMetrics
-import org.apache.spark.network.{FileSegmentManagedBuffer, ManagedBuffer}
+import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
+import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.FileShuffleBlockManager.ShuffleFileGroup
 import org.apache.spark.storage._
@@ -62,11 +63,14 @@ private[spark] trait ShuffleWriterGroup {
  * each block stored in each file. In order to find the location of a shuffle block, we search the
  * files within a ShuffleFileGroups associated with the block's reducer.
  */
-
+// Note: Changes to the format in this file should be kept in sync with
+// org.apache.spark.network.shuffle.StandaloneShuffleBlockManager#getHashBasedShuffleBlockData().
 private[spark]
 class FileShuffleBlockManager(conf: SparkConf)
   extends ShuffleBlockManager with Logging {
 
+  private val transportConf = SparkTransportConf.fromSparkConf(conf)
+
   private lazy val blockManager = SparkEnv.get.blockManager
 
   // Turning off shuffle file consolidation causes all shuffle Blocks to get their own file.
@@ -181,13 +185,14 @@ class FileShuffleBlockManager(conf: SparkConf)
         val segmentOpt = iter.next.getFileSegmentFor(blockId.mapId, blockId.reduceId)
         if (segmentOpt.isDefined) {
           val segment = segmentOpt.get
-          return new FileSegmentManagedBuffer(segment.file, segment.offset, segment.length)
+          return new FileSegmentManagedBuffer(
+            transportConf, segment.file, segment.offset, segment.length)
         }
       }
       throw new IllegalStateException("Failed to find shuffle block: " + blockId)
     } else {
       val file = blockManager.diskBlockManager.getFile(blockId)
-      new FileSegmentManagedBuffer(file, 0, file.length)
+      new FileSegmentManagedBuffer(transportConf, file, 0, file.length)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala
index 4ab34336d3f01..b292587d37028 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala
@@ -20,8 +20,11 @@ package org.apache.spark.shuffle
 import java.io._
 import java.nio.ByteBuffer
 
-import org.apache.spark.SparkEnv
-import org.apache.spark.network.{ManagedBuffer, FileSegmentManagedBuffer}
+import com.google.common.io.ByteStreams
+
+import org.apache.spark.{SparkConf, SparkEnv}
+import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
+import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.storage._
 
 /**
@@ -33,11 +36,15 @@ import org.apache.spark.storage._
  * as the filename postfix for data file, and ".index" as the filename postfix for index file.
  *
  */
+// Note: Changes to the format in this file should be kept in sync with
+// org.apache.spark.network.shuffle.StandaloneShuffleBlockManager#getSortBasedShuffleBlockData().
 private[spark]
-class IndexShuffleBlockManager extends ShuffleBlockManager {
+class IndexShuffleBlockManager(conf: SparkConf) extends ShuffleBlockManager {
 
   private lazy val blockManager = SparkEnv.get.blockManager
 
+  private val transportConf = SparkTransportConf.fromSparkConf(conf)
+
   /**
    * Mapping to a single shuffleBlockId with reduce ID 0.
    * */
@@ -101,10 +108,11 @@ class IndexShuffleBlockManager extends ShuffleBlockManager {
 
     val in = new DataInputStream(new FileInputStream(indexFile))
     try {
-      in.skip(blockId.reduceId * 8)
+      ByteStreams.skipFully(in, blockId.reduceId * 8)
       val offset = in.readLong()
       val nextOffset = in.readLong()
       new FileSegmentManagedBuffer(
+        transportConf,
         getDataFile(blockId.shuffleId, blockId.mapId),
         offset,
         nextOffset - offset)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala
index 63863cc0250a3..b521f0c7fc77e 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.shuffle
 
 import java.nio.ByteBuffer
-
-import org.apache.spark.network.ManagedBuffer
+import org.apache.spark.network.buffer.ManagedBuffer
 import org.apache.spark.storage.ShuffleBlockId
 
 private[spark]
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
index 801ae54086053..a44a8e1249256 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
@@ -20,8 +20,8 @@ package org.apache.spark.shuffle
 import org.apache.spark.{TaskContext, ShuffleDependency}
 
 /**
- * Pluggable interface for shuffle systems. A ShuffleManager is created in SparkEnv on both the
- * driver and executors, based on the spark.shuffle.manager setting. The driver registers shuffles
+ * Pluggable interface for shuffle systems. A ShuffleManager is created in SparkEnv on the driver
+ * and on each executor, based on the spark.shuffle.manager setting. The driver registers shuffles
  * with it, and executors (or tasks running locally in the driver) can ask to read and write data.
  *
  * NOTE: this will be instantiated by SparkEnv so its constructor can take a SparkConf and
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleMemoryManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleMemoryManager.scala
index ee91a368b76ea..3bcc7178a3d8b 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleMemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleMemoryManager.scala
@@ -66,8 +66,9 @@ private[spark] class ShuffleMemoryManager(maxMemory: Long) extends Logging {
       val curMem = threadMemory(threadId)
       val freeMemory = maxMemory - threadMemory.values.sum
 
-      // How much we can grant this thread; don't let it grow to more than 1 / numActiveThreads
-      val maxToGrant = math.min(numBytes, (maxMemory / numActiveThreads) - curMem)
+      // How much we can grant this thread; don't let it grow to more than 1 / numActiveThreads;
+      // don't let it be negative
+      val maxToGrant = math.min(numBytes, math.max(0, (maxMemory / numActiveThreads) - curMem))
 
       if (curMem < maxMemory / (2 * numActiveThreads)) {
         // We want to let each thread get at least 1 / (2 * numActiveThreads) before blocking;
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleReader.scala
index b30e366d06006..292e48314ee10 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleReader.scala
@@ -24,6 +24,10 @@ private[spark] trait ShuffleReader[K, C] {
   /** Read the combined key-values for this reduce task */
   def read(): Iterator[Product2[K, C]]
 
-  /** Close this reader */
-  def stop(): Unit
+  /**
+   * Close this reader.
+   * TODO: Add this back when we make the ShuffleReader a developer API that others can implement
+   * (at which point this will likely be necessary).
+   */
+  // def stop(): Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/BlockStoreShuffleFetcher.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/BlockStoreShuffleFetcher.scala
index 6cf9305977a3c..e3e7434df45b0 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/hash/BlockStoreShuffleFetcher.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/hash/BlockStoreShuffleFetcher.scala
@@ -19,6 +19,7 @@ package org.apache.spark.shuffle.hash
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashMap
+import scala.util.{Failure, Success, Try}
 
 import org.apache.spark._
 import org.apache.spark.serializer.Serializer
@@ -52,21 +53,21 @@ private[hash] object BlockStoreShuffleFetcher extends Logging {
         (address, splits.map(s => (ShuffleBlockId(shuffleId, s._1, reduceId), s._2)))
     }
 
-    def unpackBlock(blockPair: (BlockId, Option[Iterator[Any]])) : Iterator[T] = {
+    def unpackBlock(blockPair: (BlockId, Try[Iterator[Any]])) : Iterator[T] = {
       val blockId = blockPair._1
       val blockOption = blockPair._2
       blockOption match {
-        case Some(block) => {
+        case Success(block) => {
           block.asInstanceOf[Iterator[T]]
         }
-        case None => {
+        case Failure(e) => {
           blockId match {
             case ShuffleBlockId(shufId, mapId, _) =>
               val address = statuses(mapId.toInt)._1
-              throw new FetchFailedException(address, shufId.toInt, mapId.toInt, reduceId)
+              throw new FetchFailedException(address, shufId.toInt, mapId.toInt, reduceId, e)
             case _ =>
               throw new SparkException(
-                "Failed to get block " + blockId + ", which is not a shuffle block")
+                "Failed to get block " + blockId + ", which is not a shuffle block", e)
           }
         }
       }
@@ -74,7 +75,7 @@ private[hash] object BlockStoreShuffleFetcher extends Logging {
 
     val blockFetcherItr = new ShuffleBlockFetcherIterator(
       context,
-      SparkEnv.get.blockTransferService,
+      SparkEnv.get.blockManager.shuffleClient,
       blockManager,
       blocksByAddress,
       serializer,
diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleReader.scala
index 88a5f1e5ddf58..41bafabde05b9 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleReader.scala
@@ -45,9 +45,9 @@ private[spark] class HashShuffleReader[K, C](
       } else {
         new InterruptibleIterator(context, dep.aggregator.get.combineValuesByKey(iter, context))
       }
-    } else if (dep.aggregator.isEmpty && dep.mapSideCombine) {
-      throw new IllegalStateException("Aggregator is empty for map-side combine")
     } else {
+      require(!dep.mapSideCombine, "Map-side combine without Aggregator specified!")
+
       // Convert the Product2s to pairs since this is what downstream RDDs currently expect
       iter.asInstanceOf[Iterator[Product2[K, C]]].map(pair => (pair._1, pair._2))
     }
@@ -59,14 +59,11 @@ private[spark] class HashShuffleReader[K, C](
         // the ExternalSorter won't spill to disk.
         val sorter = new ExternalSorter[K, C, C](ordering = Some(keyOrd), serializer = Some(ser))
         sorter.insertAll(aggregatedIter)
-        context.taskMetrics.memoryBytesSpilled += sorter.memoryBytesSpilled
-        context.taskMetrics.diskBytesSpilled += sorter.diskBytesSpilled
+        context.taskMetrics.incMemoryBytesSpilled(sorter.memoryBytesSpilled)
+        context.taskMetrics.incDiskBytesSpilled(sorter.diskBytesSpilled)
         sorter.iterator
       case None =>
         aggregatedIter
     }
   }
-
-  /** Close this reader */
-  override def stop(): Unit = ???
 }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala
index 746ed33b54c00..755f17d6aa15a 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala
@@ -56,9 +56,8 @@ private[spark] class HashShuffleWriter[K, V](
       } else {
         records
       }
-    } else if (dep.aggregator.isEmpty && dep.mapSideCombine) {
-      throw new IllegalStateException("Aggregator is empty for map-side combine")
     } else {
+      require(!dep.mapSideCombine, "Map-side combine without Aggregator specified!")
       records
     }
 
@@ -107,7 +106,7 @@ private[spark] class HashShuffleWriter[K, V](
       writer.commitAndClose()
       writer.fileSegment().length
     }
-    MapStatus(blockManager.blockManagerId, sizes)
+    MapStatus(blockManager.shuffleServerId, sizes)
   }
 
   private def revertWrites(): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index b727438ae7e47..bda30a56d808e 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -25,7 +25,7 @@ import org.apache.spark.shuffle.hash.HashShuffleReader
 
 private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager {
 
-  private val indexShuffleBlockManager = new IndexShuffleBlockManager()
+  private val indexShuffleBlockManager = new IndexShuffleBlockManager(conf)
   private val shuffleMapNumber = new ConcurrentHashMap[Int, Int]()
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
index 927481b72cf4f..27496c5a289cb 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
@@ -50,9 +50,7 @@ private[spark] class SortShuffleWriter[K, V, C](
   /** Write a bunch of records to this task's output */
   override def write(records: Iterator[_ <: Product2[K, V]]): Unit = {
     if (dep.mapSideCombine) {
-      if (!dep.aggregator.isDefined) {
-        throw new IllegalStateException("Aggregator is empty for map-side combine")
-      }
+      require(dep.aggregator.isDefined, "Map-side combine without Aggregator specified!")
       sorter = new ExternalSorter[K, V, C](
         dep.aggregator, Some(dep.partitioner), dep.keyOrdering, dep.serializer)
       sorter.insertAll(records)
@@ -70,7 +68,7 @@ private[spark] class SortShuffleWriter[K, V, C](
     val partitionLengths = sorter.writePartitionedFile(blockId, context, outputFile)
     shuffleBlockManager.writeIndexFile(dep.shuffleId, mapId, partitionLengths)
 
-    mapStatus = MapStatus(blockManager.blockManagerId, partitionLengths)
+    mapStatus = MapStatus(blockManager.shuffleServerId, partitionLengths)
   }
 
   /** Close this writer, passing along whether the map completed */
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockDataProvider.scala b/core/src/main/scala/org/apache/spark/storage/BlockDataProvider.scala
deleted file mode 100644
index 5b6d086630834..0000000000000
--- a/core/src/main/scala/org/apache/spark/storage/BlockDataProvider.scala
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.storage
-
-import java.nio.ByteBuffer
-
-
-/**
- * An interface for providing data for blocks.
- *
- * getBlockData returns either a FileSegment (for zero-copy send), or a ByteBuffer.
- *
- * Aside from unit tests, [[BlockManager]] is the main class that implements this.
- */
-private[spark] trait BlockDataProvider {
-  def getBlockData(blockId: String): Either[FileSegment, ByteBuffer]
-}
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockId.scala b/core/src/main/scala/org/apache/spark/storage/BlockId.scala
index 8df5ec6bde184..1f012941c85ab 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockId.scala
@@ -53,6 +53,8 @@ case class RDDBlockId(rddId: Int, splitIndex: Int) extends BlockId {
   def name = "rdd_" + rddId + "_" + splitIndex
 }
 
+// Format of the shuffle block ids (including data and index) should be kept in sync with
+// org.apache.spark.network.shuffle.StandaloneShuffleBlockManager#getBlockData().
 @DeveloperApi
 case class ShuffleBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
   def name = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 4cc97923658bc..8bc5a1cd18b64 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -17,14 +17,12 @@
 
 package org.apache.spark.storage
 
-import java.io.{File, InputStream, OutputStream, BufferedOutputStream, ByteArrayOutputStream}
+import java.io.{BufferedOutputStream, ByteArrayOutputStream, File, InputStream, OutputStream}
 import java.nio.{ByteBuffer, MappedByteBuffer}
 
-import scala.concurrent.ExecutionContext.Implicits.global
-
-import scala.collection.mutable
 import scala.collection.mutable.{ArrayBuffer, HashMap}
 import scala.concurrent.{Await, Future}
+import scala.concurrent.ExecutionContext.Implicits.global
 import scala.concurrent.duration._
 import scala.util.Random
 
@@ -35,11 +33,15 @@ import org.apache.spark._
 import org.apache.spark.executor._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.network._
+import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.netty.SparkTransportConf
+import org.apache.spark.network.shuffle.ExternalShuffleClient
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.ShuffleManager
+import org.apache.spark.shuffle.hash.HashShuffleManager
 import org.apache.spark.util._
 
-
 private[spark] sealed trait BlockValues
 private[spark] case class ByteBufferValues(buffer: ByteBuffer) extends BlockValues
 private[spark] case class IteratorValues(iterator: Iterator[Any]) extends BlockValues
@@ -51,9 +53,15 @@ private[spark] class BlockResult(
     readMethod: DataReadMethod.Value,
     bytes: Long) {
   val inputMetrics = new InputMetrics(readMethod)
-  inputMetrics.bytesRead = bytes
+  inputMetrics.addBytesRead(bytes)
 }
 
+/**
+ * Manager running on every node (driver and executors) which provides interfaces for putting and
+ * retrieving blocks both locally and remotely into various stores (memory, disk, and off-heap).
+ *
+ * Note that #initialize() must be called before the BlockManager is usable.
+ */
 private[spark] class BlockManager(
     executorId: String,
     actorSystem: ActorSystem,
@@ -63,11 +71,11 @@ private[spark] class BlockManager(
     val conf: SparkConf,
     mapOutputTracker: MapOutputTracker,
     shuffleManager: ShuffleManager,
-    blockTransferService: BlockTransferService)
+    blockTransferService: BlockTransferService,
+    securityManager: SecurityManager,
+    numUsableCores: Int)
   extends BlockDataManager with Logging {
 
-  blockTransferService.init(this)
-
   val diskBlockManager = new DiskBlockManager(this, conf)
 
   private val blockInfo = new TimeStampedHashMap[BlockId, BlockInfo]
@@ -87,8 +95,37 @@ private[spark] class BlockManager(
     new TachyonStore(this, tachyonBlockManager)
   }
 
-  val blockManagerId = BlockManagerId(
-    executorId, blockTransferService.hostName, blockTransferService.port)
+  private[spark]
+  val externalShuffleServiceEnabled = conf.getBoolean("spark.shuffle.service.enabled", false)
+
+  // Port used by the external shuffle service. In Yarn mode, this may be already be
+  // set through the Hadoop configuration as the server is launched in the Yarn NM.
+  private val externalShuffleServicePort =
+    Utils.getSparkOrYarnConfig(conf, "spark.shuffle.service.port", "7337").toInt
+
+  // Check that we're not using external shuffle service with consolidated shuffle files.
+  if (externalShuffleServiceEnabled
+      && conf.getBoolean("spark.shuffle.consolidateFiles", false)
+      && shuffleManager.isInstanceOf[HashShuffleManager]) {
+    throw new UnsupportedOperationException("Cannot use external shuffle service with consolidated"
+      + " shuffle files in hash-based shuffle. Please disable spark.shuffle.consolidateFiles or "
+      + " switch to sort-based shuffle.")
+  }
+
+  var blockManagerId: BlockManagerId = _
+
+  // Address of the server that serves this executor's shuffle files. This is either an external
+  // service, or just our own Executor's BlockManager.
+  private[spark] var shuffleServerId: BlockManagerId = _
+
+  // Client to read other executors' shuffle files. This is either an external service, or just the
+  // standard BlockTransferService to directly connect to other Executors.
+  private[spark] val shuffleClient = if (externalShuffleServiceEnabled) {
+    val transConf = SparkTransportConf.fromSparkConf(conf, numUsableCores)
+    new ExternalShuffleClient(transConf, securityManager, securityManager.isAuthenticationEnabled())
+  } else {
+    blockTransferService
+  }
 
   // Whether to compress broadcast variables that are stored
   private val compressBroadcast = conf.getBoolean("spark.broadcast.compress", true)
@@ -118,8 +155,6 @@ private[spark] class BlockManager(
   private val peerFetchLock = new Object
   private var lastPeerFetchTime = 0L
 
-  initialize()
-
   /* The compression codec to use. Note that the "lazy" val is necessary because we want to delay
    * the initialization of the compression codec until it is first used. The reason is that a Spark
    * program could be using a user-defined codec in a third party jar, which is loaded in
@@ -138,17 +173,66 @@ private[spark] class BlockManager(
       conf: SparkConf,
       mapOutputTracker: MapOutputTracker,
       shuffleManager: ShuffleManager,
-      blockTransferService: BlockTransferService) = {
+      blockTransferService: BlockTransferService,
+      securityManager: SecurityManager,
+      numUsableCores: Int) = {
     this(execId, actorSystem, master, serializer, BlockManager.getMaxMemory(conf),
-      conf, mapOutputTracker, shuffleManager, blockTransferService)
+      conf, mapOutputTracker, shuffleManager, blockTransferService, securityManager, numUsableCores)
   }
 
   /**
-   * Initialize the BlockManager. Register to the BlockManagerMaster, and start the
-   * BlockManagerWorker actor.
+   * Initializes the BlockManager with the given appId. This is not performed in the constructor as
+   * the appId may not be known at BlockManager instantiation time (in particular for the driver,
+   * where it is only learned after registration with the TaskScheduler).
+   *
+   * This method initializes the BlockTransferService and ShuffleClient, registers with the
+   * BlockManagerMaster, starts the BlockManagerWorker actor, and registers with a local shuffle
+   * service if configured.
    */
-  private def initialize(): Unit = {
+  def initialize(appId: String): Unit = {
+    blockTransferService.init(this)
+    shuffleClient.init(appId)
+
+    blockManagerId = BlockManagerId(
+      executorId, blockTransferService.hostName, blockTransferService.port)
+
+    shuffleServerId = if (externalShuffleServiceEnabled) {
+      BlockManagerId(executorId, blockTransferService.hostName, externalShuffleServicePort)
+    } else {
+      blockManagerId
+    }
+
     master.registerBlockManager(blockManagerId, maxMemory, slaveActor)
+
+    // Register Executors' configuration with the local shuffle service, if one should exist.
+    if (externalShuffleServiceEnabled && !blockManagerId.isDriver) {
+      registerWithExternalShuffleServer()
+    }
+  }
+
+  private def registerWithExternalShuffleServer() {
+    logInfo("Registering executor with local external shuffle service.")
+    val shuffleConfig = new ExecutorShuffleInfo(
+      diskBlockManager.localDirs.map(_.toString),
+      diskBlockManager.subDirsPerLocalDir,
+      shuffleManager.getClass.getName)
+
+    val MAX_ATTEMPTS = 3
+    val SLEEP_TIME_SECS = 5
+
+    for (i <- 1 to MAX_ATTEMPTS) {
+      try {
+        // Synchronous and will throw an exception if we cannot connect.
+        shuffleClient.asInstanceOf[ExternalShuffleClient].registerWithShuffleServer(
+          shuffleServerId.host, shuffleServerId.port, shuffleServerId.executorId, shuffleConfig)
+        return
+      } catch {
+        case e: Exception if i < MAX_ATTEMPTS =>
+          logError(s"Failed to connect to external shuffle server, will retry ${MAX_ATTEMPTS - i}}"
+            + s" more times after waiting $SLEEP_TIME_SECS seconds...", e)
+          Thread.sleep(SLEEP_TIME_SECS * 1000)
+      }
+    }
   }
 
   /**
@@ -212,21 +296,20 @@ private[spark] class BlockManager(
   }
 
   /**
-   * Interface to get local block data.
-   *
-   * @return Some(buffer) if the block exists locally, and None if it doesn't.
+   * Interface to get local block data. Throws an exception if the block cannot be found or
+   * cannot be read successfully.
    */
-  override def getBlockData(blockId: String): Option[ManagedBuffer] = {
-    val bid = BlockId(blockId)
-    if (bid.isShuffle) {
-      Some(shuffleManager.shuffleBlockManager.getBlockData(bid.asInstanceOf[ShuffleBlockId]))
+  override def getBlockData(blockId: BlockId): ManagedBuffer = {
+    if (blockId.isShuffle) {
+      shuffleManager.shuffleBlockManager.getBlockData(blockId.asInstanceOf[ShuffleBlockId])
     } else {
-      val blockBytesOpt = doGetLocal(bid, asBlockResult = false).asInstanceOf[Option[ByteBuffer]]
+      val blockBytesOpt = doGetLocal(blockId, asBlockResult = false)
+        .asInstanceOf[Option[ByteBuffer]]
       if (blockBytesOpt.isDefined) {
         val buffer = blockBytesOpt.get
-        Some(new NioByteBufferManagedBuffer(buffer))
+        new NioManagedBuffer(buffer)
       } else {
-        None
+        throw new BlockNotFoundException(blockId.toString)
       }
     }
   }
@@ -234,8 +317,8 @@ private[spark] class BlockManager(
   /**
    * Put the block locally, using the given storage level.
    */
-  override def putBlockData(blockId: String, data: ManagedBuffer, level: StorageLevel): Unit = {
-    putBytes(BlockId(blockId), data.nioByteBuffer(), level)
+  override def putBlockData(blockId: BlockId, data: ManagedBuffer, level: StorageLevel): Unit = {
+    putBytes(blockId, data.nioByteBuffer(), level)
   }
 
   /**
@@ -340,17 +423,6 @@ private[spark] class BlockManager(
     locations
   }
 
-  /**
-   * A short-circuited method to get blocks directly from disk. This is used for getting
-   * shuffle blocks. It is safe to do so without a lock on block info since disk store
-   * never deletes (recent) items.
-   */
-  def getLocalShuffleFromDisk(blockId: BlockId, serializer: Serializer): Option[Iterator[Any]] = {
-    val buf = shuffleManager.shuffleBlockManager.getBlockData(blockId.asInstanceOf[ShuffleBlockId])
-    val is = wrapForCompression(blockId, buf.inputStream())
-    Some(serializer.newInstance().deserializeStream(is).asIterator)
-  }
-
   /**
    * Get block from local block manager.
    */
@@ -520,7 +592,7 @@ private[spark] class BlockManager(
     for (loc <- locations) {
       logDebug(s"Getting remote block $blockId from $loc")
       val data = blockTransferService.fetchBlockSync(
-        loc.host, loc.port, blockId.toString).nioByteBuffer()
+        loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer()
 
       if (data != null) {
         if (asBlockResult) {
@@ -869,9 +941,9 @@ private[spark] class BlockManager(
             data.rewind()
             logTrace(s"Trying to replicate $blockId of ${data.limit()} bytes to $peer")
             blockTransferService.uploadBlockSync(
-              peer.host, peer.port, blockId.toString, new NioByteBufferManagedBuffer(data), tLevel)
-            logTrace(s"Replicated $blockId of ${data.limit()} bytes to $peer in %d ms"
-              .format((System.currentTimeMillis - onePeerStartTime)))
+              peer.host, peer.port, peer.executorId, blockId, new NioManagedBuffer(data), tLevel)
+            logTrace(s"Replicated $blockId of ${data.limit()} bytes to $peer in %s ms"
+              .format(System.currentTimeMillis - onePeerStartTime))
             peersReplicatedTo += peer
             peersForReplication -= peer
             replicationFailed = false
@@ -941,8 +1013,10 @@ private[spark] class BlockManager(
           // If we get here, the block write failed.
           logWarning(s"Block $blockId was marked as failure. Nothing to drop")
           return None
+        } else if (blockInfo.get(blockId).isEmpty) {
+          logWarning(s"Block $blockId was already dropped.")
+          return None
         }
-
         var blockIsUpdated = false
         val level = info.level
 
@@ -1126,7 +1200,11 @@ private[spark] class BlockManager(
   }
 
   def stop(): Unit = {
-    blockTransferService.stop()
+    blockTransferService.close()
+    if (shuffleClient ne blockTransferService) {
+      // Closing should be idempotent, but maybe not for the NioBlockTransferService.
+      shuffleClient.close()
+    }
     diskBlockManager.stop()
     actorSystem.stop(slaveActor)
     blockInfo.clear()
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
index 142285094342c..b177a59c721df 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
@@ -20,6 +20,7 @@ package org.apache.spark.storage
 import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
 import java.util.concurrent.ConcurrentHashMap
 
+import org.apache.spark.SparkContext
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.util.Utils
 
@@ -59,15 +60,15 @@ class BlockManagerId private (
 
   def port: Int = port_
 
-  def isDriver: Boolean = (executorId == "<driver>")
+  def isDriver: Boolean = { executorId == SparkContext.DRIVER_IDENTIFIER }
 
-  override def writeExternal(out: ObjectOutput) {
+  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
     out.writeUTF(executorId_)
     out.writeUTF(host_)
     out.writeInt(port_)
   }
 
-  override def readExternal(in: ObjectInput) {
+  override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
     executorId_ = in.readUTF()
     host_ = in.readUTF()
     port_ = in.readInt()
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
index d08e1419e3e41..b63c7f191155c 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
@@ -88,6 +88,10 @@ class BlockManagerMaster(
     askDriverWithReply[Seq[BlockManagerId]](GetPeers(blockManagerId))
   }
 
+  def getActorSystemHostPortForExecutor(executorId: String): Option[(String, Int)] = {
+    askDriverWithReply[Option[(String, Int)]](GetActorSystemHostPortForExecutor(executorId))
+  }
+
   /**
    * Remove a block from the slaves that have it. This can only be used to remove
    * blocks that the driver knows about.
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala
index 088f06e389d83..64133464d8daa 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala
@@ -52,8 +52,7 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
 
   private val akkaTimeout = AkkaUtils.askTimeout(conf)
 
-  val slaveTimeout = conf.getLong("spark.storage.blockManagerSlaveTimeoutMs",
-    math.max(conf.getInt("spark.executor.heartbeatInterval", 10000) * 3, 45000))
+  val slaveTimeout = conf.getLong("spark.storage.blockManagerSlaveTimeoutMs", 120 * 1000)
 
   val checkTimeoutInterval = conf.getLong("spark.storage.blockManagerTimeoutIntervalMs", 60000)
 
@@ -73,9 +72,8 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
 
     case UpdateBlockInfo(
       blockManagerId, blockId, storageLevel, deserializedSize, size, tachyonSize) =>
-      // TODO: Ideally we want to handle all the message replies in receive instead of in the
-      // individual private methods.
-      updateBlockInfo(blockManagerId, blockId, storageLevel, deserializedSize, size, tachyonSize)
+      sender ! updateBlockInfo(
+        blockManagerId, blockId, storageLevel, deserializedSize, size, tachyonSize)
 
     case GetLocations(blockId) =>
       sender ! getLocations(blockId)
@@ -86,6 +84,9 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
     case GetPeers(blockManagerId) =>
       sender ! getPeers(blockManagerId)
 
+    case GetActorSystemHostPortForExecutor(executorId) =>
+      sender ! getActorSystemHostPortForExecutor(executorId)
+
     case GetMemoryStatus =>
       sender ! memoryStatus
 
@@ -203,6 +204,7 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
       }
     }
     listenerBus.post(SparkListenerBlockManagerRemoved(System.currentTimeMillis(), blockManagerId))
+    logInfo(s"Removing block manager $blockManagerId")
   }
 
   private def expireDeadHosts() {
@@ -327,20 +329,20 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
     val time = System.currentTimeMillis()
     if (!blockManagerInfo.contains(id)) {
       blockManagerIdByExecutor.get(id.executorId) match {
-        case Some(manager) =>
-          // A block manager of the same executor already exists.
-          // This should never happen. Let's just quit.
-          logError("Got two different block manager registrations on " + id.executorId)
-          System.exit(1)
+        case Some(oldId) =>
+          // A block manager of the same executor already exists, so remove it (assumed dead)
+          logError("Got two different block manager registrations on same executor - " 
+              + s" will replace old one $oldId with new one $id")
+          removeExecutor(id.executorId)  
         case None =>
-          blockManagerIdByExecutor(id.executorId) = id
       }
-
-      logInfo("Registering block manager %s with %s RAM".format(
-        id.hostPort, Utils.bytesToString(maxMemSize)))
-
-      blockManagerInfo(id) =
-        new BlockManagerInfo(id, time, maxMemSize, slaveActor)
+      logInfo("Registering block manager %s with %s RAM, %s".format(
+        id.hostPort, Utils.bytesToString(maxMemSize), id))
+      
+      blockManagerIdByExecutor(id.executorId) = id
+      
+      blockManagerInfo(id) = new BlockManagerInfo(
+        id, System.currentTimeMillis(), maxMemSize, slaveActor)
     }
     listenerBus.post(SparkListenerBlockManagerAdded(time, id, maxMemSize))
   }
@@ -351,23 +353,21 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
       storageLevel: StorageLevel,
       memSize: Long,
       diskSize: Long,
-      tachyonSize: Long) {
+      tachyonSize: Long): Boolean = {
 
     if (!blockManagerInfo.contains(blockManagerId)) {
       if (blockManagerId.isDriver && !isLocal) {
         // We intentionally do not register the master (except in local mode),
         // so we should not indicate failure.
-        sender ! true
+        return true
       } else {
-        sender ! false
+        return false
       }
-      return
     }
 
     if (blockId == null) {
       blockManagerInfo(blockManagerId).updateLastSeenMs()
-      sender ! true
-      return
+      return true
     }
 
     blockManagerInfo(blockManagerId).updateBlockInfo(
@@ -391,7 +391,7 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
     if (locations.size == 0) {
       blockLocations.remove(blockId)
     }
-    sender ! true
+    true
   }
 
   private def getLocations(blockId: BlockId): Seq[BlockManagerId] = {
@@ -411,6 +411,21 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus
       Seq.empty
     }
   }
+
+  /**
+   * Returns the hostname and port of an executor's actor system, based on the Akka address of its
+   * BlockManagerSlaveActor.
+   */
+  private def getActorSystemHostPortForExecutor(executorId: String): Option[(String, Int)] = {
+    for (
+      blockManagerId <- blockManagerIdByExecutor.get(executorId);
+      info <- blockManagerInfo.get(blockManagerId);
+      host <- info.slaveActor.path.address.host;
+      port <- info.slaveActor.path.address.port
+    ) yield {
+      (host, port)
+    }
+  }
 }
 
 @DeveloperApi
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
index 3db5dd9774ae8..3f32099d08cc9 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
@@ -21,6 +21,8 @@ import java.io.{Externalizable, ObjectInput, ObjectOutput}
 
 import akka.actor.ActorRef
 
+import org.apache.spark.util.Utils
+
 private[spark] object BlockManagerMessages {
   //////////////////////////////////////////////////////////////////////////////////
   // Messages from the master to slaves.
@@ -65,7 +67,7 @@ private[spark] object BlockManagerMessages {
 
     def this() = this(null, null, null, 0, 0, 0)  // For deserialization only
 
-    override def writeExternal(out: ObjectOutput) {
+    override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
       blockManagerId.writeExternal(out)
       out.writeUTF(blockId.name)
       storageLevel.writeExternal(out)
@@ -74,7 +76,7 @@ private[spark] object BlockManagerMessages {
       out.writeLong(tachyonSize)
     }
 
-    override def readExternal(in: ObjectInput) {
+    override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
       blockManagerId = BlockManagerId(in)
       blockId = BlockId(in.readUTF())
       storageLevel = StorageLevel(in)
@@ -90,6 +92,8 @@ private[spark] object BlockManagerMessages {
 
   case class GetPeers(blockManagerId: BlockManagerId) extends ToBlockManagerMaster
 
+  case class GetActorSystemHostPortForExecutor(executorId: String) extends ToBlockManagerMaster
+
   case class RemoveExecutor(execId: String) extends ToBlockManagerMaster
 
   case object StopBlockManagerMaster extends ToBlockManagerMaster
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockNotFoundException.scala b/core/src/main/scala/org/apache/spark/storage/BlockNotFoundException.scala
index 9ef453605f4f1..81f5f2d31dbd8 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockNotFoundException.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockNotFoundException.scala
@@ -17,5 +17,4 @@
 
 package org.apache.spark.storage
 
-
 class BlockNotFoundException(blockId: String) extends Exception(s"Block $blockId not found")
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
index 9c469370ffe1f..3198d766fca37 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
@@ -160,14 +160,14 @@ private[spark] class DiskBlockObjectWriter(
     }
     finalPosition = file.length()
     // In certain compression codecs, more bytes are written after close() is called
-    writeMetrics.shuffleBytesWritten += (finalPosition - reportedPosition)
+    writeMetrics.incShuffleBytesWritten(finalPosition - reportedPosition)
   }
 
   // Discard current writes. We do this by flushing the outstanding writes and then
   // truncating the file to its initial position.
   override def revertPartialWritesAndClose() {
     try {
-      writeMetrics.shuffleBytesWritten -= (reportedPosition - initialPosition)
+      writeMetrics.decShuffleBytesWritten(reportedPosition - initialPosition)
 
       if (initialized) {
         objOut.flush()
@@ -212,14 +212,14 @@ private[spark] class DiskBlockObjectWriter(
    */
   private def updateBytesWritten() {
     val pos = channel.position()
-    writeMetrics.shuffleBytesWritten += (pos - reportedPosition)
+    writeMetrics.incShuffleBytesWritten(pos - reportedPosition)
     reportedPosition = pos
   }
 
   private def callWithTiming(f: => Unit) = {
     val start = System.nanoTime()
     f
-    writeMetrics.shuffleWriteTime += (System.nanoTime() - start)
+    writeMetrics.incShuffleWriteTime(System.nanoTime() - start)
   }
 
   // For testing
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
index 6633a1db57e59..53eaedacbf291 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
@@ -17,9 +17,8 @@
 
 package org.apache.spark.storage
 
-import java.io.File
-import java.text.SimpleDateFormat
-import java.util.{Date, Random, UUID}
+import java.util.UUID
+import java.io.{IOException, File}
 
 import org.apache.spark.{SparkConf, Logging}
 import org.apache.spark.executor.ExecutorExitCode
@@ -37,13 +36,13 @@ import org.apache.spark.util.Utils
 private[spark] class DiskBlockManager(blockManager: BlockManager, conf: SparkConf)
   extends Logging {
 
-  private val MAX_DIR_CREATION_ATTEMPTS: Int = 10
-  private val subDirsPerLocalDir = blockManager.conf.getInt("spark.diskStore.subDirectories", 64)
+  private[spark]
+  val subDirsPerLocalDir = blockManager.conf.getInt("spark.diskStore.subDirectories", 64)
 
   /* Create one local directory for each path mentioned in spark.local.dir; then, inside this
    * directory, create multiple subdirectories that we will hash files into, in order to avoid
    * having really large inodes at the top level. */
-  val localDirs: Array[File] = createLocalDirs(conf)
+  private[spark] val localDirs: Array[File] = createLocalDirs(conf)
   if (localDirs.isEmpty) {
     logError("Failed to create any local dir.")
     System.exit(ExecutorExitCode.DISK_STORE_FAILED_TO_CREATE_DIR)
@@ -52,6 +51,9 @@ private[spark] class DiskBlockManager(blockManager: BlockManager, conf: SparkCon
 
   addShutdownHook()
 
+  /** Looks up a file by hashing it into one of our local subdirectories. */
+  // This method should be kept in sync with
+  // org.apache.spark.network.shuffle.StandaloneShuffleBlockManager#getFile().
   def getFile(filename: String): File = {
     // Figure out which local directory it hashes to, and which subdirectory in that
     val hash = Utils.nonNegativeHash(filename)
@@ -67,7 +69,9 @@ private[spark] class DiskBlockManager(blockManager: BlockManager, conf: SparkCon
           old
         } else {
           val newDir = new File(localDirs(dirId), "%02x".format(subDirId))
-          newDir.mkdir()
+          if (!newDir.exists() && !newDir.mkdir()) {
+            throw new IOException(s"Failed to create local dir in $newDir.")
+          }
           subDirs(dirId)(subDirId) = newDir
           newDir
         }
@@ -117,39 +121,20 @@ private[spark] class DiskBlockManager(blockManager: BlockManager, conf: SparkCon
   }
 
   private def createLocalDirs(conf: SparkConf): Array[File] = {
-    val dateFormat = new SimpleDateFormat("yyyyMMddHHmmss")
     Utils.getOrCreateLocalRootDirs(conf).flatMap { rootDir =>
-      var foundLocalDir = false
-      var localDir: File = null
-      var localDirId: String = null
-      var tries = 0
-      val rand = new Random()
-      while (!foundLocalDir && tries < MAX_DIR_CREATION_ATTEMPTS) {
-        tries += 1
-        try {
-          localDirId = "%s-%04x".format(dateFormat.format(new Date), rand.nextInt(65536))
-          localDir = new File(rootDir, s"spark-local-$localDirId")
-          if (!localDir.exists) {
-            foundLocalDir = localDir.mkdirs()
-          }
-        } catch {
-          case e: Exception =>
-            logWarning(s"Attempt $tries to create local dir $localDir failed", e)
-        }
-      }
-      if (!foundLocalDir) {
-        logError(s"Failed $MAX_DIR_CREATION_ATTEMPTS attempts to create local dir in $rootDir." +
-                  " Ignoring this directory.")
-        None
-      } else {
+      try {
+        val localDir = Utils.createDirectory(rootDir, "blockmgr")
         logInfo(s"Created local directory at $localDir")
         Some(localDir)
+      } catch {
+        case e: IOException =>
+          logError(s"Failed to create local dir in $rootDir. Ignoring this directory.", e)
+          None
       }
     }
   }
 
   private def addShutdownHook() {
-    localDirs.foreach(localDir => Utils.registerShutdownDeleteDir(localDir))
     Runtime.getRuntime.addShutdownHook(new Thread("delete Spark local dirs") {
       override def run(): Unit = Utils.logUncaughtExceptions {
         logDebug("Shutdown hook called")
@@ -160,13 +145,16 @@ private[spark] class DiskBlockManager(blockManager: BlockManager, conf: SparkCon
 
   /** Cleanup local dirs and stop shuffle sender. */
   private[spark] def stop() {
-    localDirs.foreach { localDir =>
-      if (localDir.isDirectory() && localDir.exists()) {
-        try {
-          if (!Utils.hasRootAsShutdownDeleteDir(localDir)) Utils.deleteRecursively(localDir)
-        } catch {
-          case e: Exception =>
-            logError(s"Exception while deleting local spark dir: $localDir", e)
+    // Only perform cleanup if an external service is not serving our shuffle files.
+    if (!blockManager.externalShuffleServiceEnabled || blockManager.blockManagerId.isDriver) {
+      localDirs.foreach { localDir =>
+        if (localDir.isDirectory() && localDir.exists()) {
+          try {
+            if (!Utils.hasRootAsShutdownDeleteDir(localDir)) Utils.deleteRecursively(localDir)
+          } catch {
+            case e: Exception =>
+              logError(s"Exception while deleting local spark dir: $localDir", e)
+          }
         }
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index bac459e835a3f..61ef5ff168791 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.storage
 
-import java.io.{File, FileOutputStream, RandomAccessFile}
+import java.io.{IOException, File, FileOutputStream, RandomAccessFile}
 import java.nio.ByteBuffer
 import java.nio.channels.FileChannel.MapMode
 
@@ -31,7 +31,8 @@ import org.apache.spark.util.Utils
 private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBlockManager)
   extends BlockStore(blockManager) with Logging {
 
-  val minMemoryMapBytes = blockManager.conf.getLong("spark.storage.memoryMapThreshold", 2 * 4096L)
+  val minMemoryMapBytes = blockManager.conf.getLong(
+    "spark.storage.memoryMapThreshold", 2 * 1024L * 1024L)
 
   override def getSize(blockId: BlockId): Long = {
     diskManager.getFile(blockId.name).length
@@ -110,7 +111,13 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc
       // For small files, directly read rather than memory map
       if (length < minMemoryMapBytes) {
         val buf = ByteBuffer.allocate(length.toInt)
-        channel.read(buf, offset)
+        channel.position(offset)
+        while (buf.remaining() != 0) {
+          if (channel.read(buf) == -1) {
+            throw new IOException("Reached EOF before filling buffer\n" +
+              s"offset=$offset\nfile=${file.getAbsolutePath}\nbuf.remaining=${buf.remaining}")
+          }
+        }
         buf.flip()
         Some(buf)
       } else {
diff --git a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
index edbc729c17ade..71305a46bf570 100644
--- a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
@@ -56,6 +56,16 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
     (maxMemory * unrollFraction).toLong
   }
 
+  // Initial memory to request before unrolling any block
+  private val unrollMemoryThreshold: Long =
+    conf.getLong("spark.storage.unrollMemoryThreshold", 1024 * 1024)
+
+  if (maxMemory < unrollMemoryThreshold) {
+    logWarning(s"Max memory ${Utils.bytesToString(maxMemory)} is less than the initial memory " +
+      s"threshold ${Utils.bytesToString(unrollMemoryThreshold)} needed to store a block in " +
+      s"memory. Please configure Spark with more memory.")
+  }
+
   logInfo("MemoryStore started with capacity %s".format(Utils.bytesToString(maxMemory)))
 
   /** Free memory not occupied by existing blocks. Note that this does not include unroll memory. */
@@ -213,7 +223,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
     // Whether there is still enough memory for us to continue unrolling this block
     var keepUnrolling = true
     // Initial per-thread memory to request for unrolling blocks (bytes). Exposed for testing.
-    val initialMemoryThreshold = conf.getLong("spark.storage.unrollMemoryThreshold", 1024 * 1024)
+    val initialMemoryThreshold = unrollMemoryThreshold
     // How often to check whether we need to request more memory
     val memoryCheckPeriod = 16
     // Memory currently reserved by this thread for this particular unrolling operation
@@ -228,6 +238,11 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
     // Request enough memory to begin unrolling
     keepUnrolling = reserveUnrollMemoryForThisThread(initialMemoryThreshold)
 
+    if (!keepUnrolling) {
+      logWarning(s"Failed to reserve initial memory threshold of " +
+        s"${Utils.bytesToString(initialMemoryThreshold)} for computing block $blockId in memory.")
+    }
+
     // Unroll this block safely, checking whether we have exceeded our threshold periodically
     try {
       while (values.hasNext && keepUnrolling) {
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index 71b276b5f18e4..ab9ee4f0096bf 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -17,17 +17,18 @@
 
 package org.apache.spark.storage
 
+import java.io.{InputStream, IOException}
 import java.util.concurrent.LinkedBlockingQueue
 
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashSet
-import scala.collection.mutable.Queue
+import scala.collection.mutable.{ArrayBuffer, HashSet, Queue}
+import scala.util.{Failure, Success, Try}
 
-import org.apache.spark.{TaskContext, Logging}
-import org.apache.spark.network.{ManagedBuffer, BlockFetchingListener, BlockTransferService}
+import org.apache.spark.{Logging, TaskContext}
+import org.apache.spark.network.BlockTransferService
+import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient}
+import org.apache.spark.network.buffer.ManagedBuffer
 import org.apache.spark.serializer.Serializer
-import org.apache.spark.util.Utils
-
+import org.apache.spark.util.{CompletionIterator, Utils}
 
 /**
  * An iterator that fetches multiple blocks. For local blocks, it fetches from the local block
@@ -40,8 +41,8 @@ import org.apache.spark.util.Utils
  * using too much memory.
  *
  * @param context [[TaskContext]], used for metrics update
- * @param blockTransferService [[BlockTransferService]] for fetching remote blocks
- * @param blockManager  [[BlockManager]] for reading local blocks
+ * @param shuffleClient [[ShuffleClient]] for fetching remote blocks
+ * @param blockManager [[BlockManager]] for reading local blocks
  * @param blocksByAddress list of blocks to fetch grouped by the [[BlockManagerId]].
  *                        For each block we also require the size (in bytes as a long field) in
  *                        order to throttle the memory usage.
@@ -51,12 +52,12 @@ import org.apache.spark.util.Utils
 private[spark]
 final class ShuffleBlockFetcherIterator(
     context: TaskContext,
-    blockTransferService: BlockTransferService,
+    shuffleClient: ShuffleClient,
     blockManager: BlockManager,
     blocksByAddress: Seq[(BlockManagerId, Seq[(BlockId, Long)])],
     serializer: Serializer,
     maxBytesInFlight: Long)
-  extends Iterator[(BlockId, Option[Iterator[Any]])] with Logging {
+  extends Iterator[(BlockId, Try[Iterator[Any]])] with Logging {
 
   import ShuffleBlockFetcherIterator._
 
@@ -88,17 +89,53 @@ final class ShuffleBlockFetcherIterator(
    */
   private[this] val results = new LinkedBlockingQueue[FetchResult]
 
-  // Queue of fetch requests to issue; we'll pull requests off this gradually to make sure that
-  // the number of bytes in flight is limited to maxBytesInFlight
+  /**
+   * Current [[FetchResult]] being processed. We track this so we can release the current buffer
+   * in case of a runtime exception when processing the current buffer.
+   */
+  @volatile private[this] var currentResult: FetchResult = null
+
+  /**
+   * Queue of fetch requests to issue; we'll pull requests off this gradually to make sure that
+   * the number of bytes in flight is limited to maxBytesInFlight.
+   */
   private[this] val fetchRequests = new Queue[FetchRequest]
 
-  // Current bytes in flight from our requests
+  /** Current bytes in flight from our requests */
   private[this] var bytesInFlight = 0L
 
   private[this] val shuffleMetrics = context.taskMetrics.createShuffleReadMetricsForDependency()
 
+  /**
+   * Whether the iterator is still active. If isZombie is true, the callback interface will no
+   * longer place fetched blocks into [[results]].
+   */
+  @volatile private[this] var isZombie = false
+
   initialize()
 
+  /**
+   * Mark the iterator as zombie, and release all buffers that haven't been deserialized yet.
+   */
+  private[this] def cleanup() {
+    isZombie = true
+    // Release the current buffer if necessary
+    currentResult match {
+      case SuccessFetchResult(_, _, buf) => buf.release()
+      case _ =>
+    }
+
+    // Release buffers in the results queue
+    val iter = results.iterator()
+    while (iter.hasNext) {
+      val result = iter.next()
+      result match {
+        case SuccessFetchResult(_, _, buf) => buf.release()
+        case _ =>
+      }
+    }
+  }
+
   private[this] def sendRequest(req: FetchRequest) {
     logDebug("Sending request for %d blocks (%s) from %s".format(
       req.blocks.size, Utils.bytesToString(req.size), req.address.hostPort))
@@ -108,26 +145,26 @@ final class ShuffleBlockFetcherIterator(
     val sizeMap = req.blocks.map { case (blockId, size) => (blockId.toString, size) }.toMap
     val blockIds = req.blocks.map(_._1.toString)
 
-    blockTransferService.fetchBlocks(req.address.host, req.address.port, blockIds,
+    val address = req.address
+    shuffleClient.fetchBlocks(address.host, address.port, address.executorId, blockIds.toArray,
       new BlockFetchingListener {
-        override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {
-          results.put(new FetchResult(BlockId(blockId), sizeMap(blockId),
-            () => serializer.newInstance().deserializeStream(
-              blockManager.wrapForCompression(BlockId(blockId), data.inputStream())).asIterator
-          ))
-          shuffleMetrics.remoteBytesRead += data.size
-          shuffleMetrics.remoteBlocksFetched += 1
-          logDebug("Got remote block " + blockId + " after " + Utils.getUsedTimeMs(startTime))
+        override def onBlockFetchSuccess(blockId: String, buf: ManagedBuffer): Unit = {
+          // Only add the buffer to results queue if the iterator is not zombie,
+          // i.e. cleanup() has not been called yet.
+          if (!isZombie) {
+            // Increment the ref count because we need to pass this to a different thread.
+            // This needs to be released after use.
+            buf.retain()
+            results.put(new SuccessFetchResult(BlockId(blockId), sizeMap(blockId), buf))
+            shuffleMetrics.incRemoteBytesRead(buf.size)
+            shuffleMetrics.incRemoteBlocksFetched(1)
+          }
+          logTrace("Got remote block " + blockId + " after " + Utils.getUsedTimeMs(startTime))
         }
 
-        override def onBlockFetchFailure(e: Throwable): Unit = {
+        override def onBlockFetchFailure(blockId: String, e: Throwable): Unit = {
           logError(s"Failed to get block(s) from ${req.address.host}:${req.address.port}", e)
-          // Note that there is a chance that some blocks have been fetched successfully, but we
-          // still add them to the failed queue. This is fine because when the caller see a
-          // FetchFailedException, it is going to fail the entire task anyway.
-          for ((blockId, size) <- req.blocks) {
-            results.put(new FetchResult(blockId, -1, null))
-          }
+          results.put(new FailureFetchResult(BlockId(blockId), e))
         }
       }
     )
@@ -138,7 +175,7 @@ final class ShuffleBlockFetcherIterator(
     // smaller than maxBytesInFlight is to allow multiple, parallel fetches from up to 5
     // nodes, rather than blocking on reading output from one node.
     val targetRequestSize = math.max(maxBytesInFlight / 5, 1L)
-    logInfo("maxBytesInFlight: " + maxBytesInFlight + ", targetRequestSize: " + targetRequestSize)
+    logDebug("maxBytesInFlight: " + maxBytesInFlight + ", targetRequestSize: " + targetRequestSize)
 
     // Split local and remote blocks. Remote blocks are further split into FetchRequests of size
     // at most maxBytesInFlight in order to limit the amount of data in flight.
@@ -148,7 +185,7 @@ final class ShuffleBlockFetcherIterator(
     var totalBlocks = 0
     for ((address, blockInfos) <- blocksByAddress) {
       totalBlocks += blockInfos.size
-      if (address == blockManager.blockManagerId) {
+      if (address.executorId == blockManager.blockManagerId.executorId) {
         // Filter out zero-sized blocks
         localBlocks ++= blockInfos.filter(_._2 != 0).map(_._1)
         numBlocksToFetch += localBlocks.size
@@ -185,26 +222,34 @@ final class ShuffleBlockFetcherIterator(
     remoteRequests
   }
 
+  /**
+   * Fetch the local blocks while we are fetching remote blocks. This is ok because
+   * [[ManagedBuffer]]'s memory is allocated lazily when we create the input stream, so all we
+   * track in-memory are the ManagedBuffer references themselves.
+   */
   private[this] def fetchLocalBlocks() {
-    // Get the local blocks while remote blocks are being fetched. Note that it's okay to do
-    // these all at once because they will just memory-map some files, so they won't consume
-    // any memory that might exceed our maxBytesInFlight
-    for (id <- localBlocks) {
+    val iter = localBlocks.iterator
+    while (iter.hasNext) {
+      val blockId = iter.next()
       try {
-        shuffleMetrics.localBlocksFetched += 1
-        results.put(new FetchResult(
-          id, 0, () => blockManager.getLocalShuffleFromDisk(id, serializer).get))
-        logDebug("Got local block " + id)
+        val buf = blockManager.getBlockData(blockId)
+        shuffleMetrics.incLocalBlocksFetched(1)
+        buf.retain()
+        results.put(new SuccessFetchResult(blockId, 0, buf))
       } catch {
         case e: Exception =>
+          // If we see an exception, stop immediately.
           logError(s"Error occurred while fetching local blocks", e)
-          results.put(new FetchResult(id, -1, null))
+          results.put(new FailureFetchResult(blockId, e))
           return
       }
     }
   }
 
   private[this] def initialize(): Unit = {
+    // Add a task completion callback (called in both success case and failure case) to cleanup.
+    context.addTaskCompletionListener(_ => cleanup())
+
     // Split local and remote blocks.
     val remoteRequests = splitLocalRemoteBlocks()
     // Add the remote requests into our queue in a random order
@@ -221,26 +266,49 @@ final class ShuffleBlockFetcherIterator(
 
     // Get Local Blocks
     fetchLocalBlocks()
-    logDebug("Got local blocks in " + Utils.getUsedTimeMs(startTime) + " ms")
+    logDebug("Got local blocks in " + Utils.getUsedTimeMs(startTime))
   }
 
   override def hasNext: Boolean = numBlocksProcessed < numBlocksToFetch
 
-  override def next(): (BlockId, Option[Iterator[Any]]) = {
+  override def next(): (BlockId, Try[Iterator[Any]]) = {
     numBlocksProcessed += 1
     val startFetchWait = System.currentTimeMillis()
-    val result = results.take()
+    currentResult = results.take()
+    val result = currentResult
     val stopFetchWait = System.currentTimeMillis()
-    shuffleMetrics.fetchWaitTime += (stopFetchWait - startFetchWait)
-    if (!result.failed) {
-      bytesInFlight -= result.size
+    shuffleMetrics.incFetchWaitTime(stopFetchWait - startFetchWait)
+
+    result match {
+      case SuccessFetchResult(_, size, _) => bytesInFlight -= size
+      case _ =>
     }
     // Send fetch requests up to maxBytesInFlight
     while (fetchRequests.nonEmpty &&
       (bytesInFlight == 0 || bytesInFlight + fetchRequests.front.size <= maxBytesInFlight)) {
       sendRequest(fetchRequests.dequeue())
     }
-    (result.blockId, if (result.failed) None else Some(result.deserialize()))
+
+    val iteratorTry: Try[Iterator[Any]] = result match {
+      case FailureFetchResult(_, e) =>
+        Failure(e)
+      case SuccessFetchResult(blockId, _, buf) =>
+        // There is a chance that createInputStream can fail (e.g. fetching a local file that does
+        // not exist, SPARK-4085). In that case, we should propagate the right exception so
+        // the scheduler gets a FetchFailedException.
+        Try(buf.createInputStream()).map { is0 =>
+          val is = blockManager.wrapForCompression(blockId, is0)
+          val iter = serializer.newInstance().deserializeStream(is).asIterator
+          CompletionIterator[Any, Iterator[Any]](iter, {
+            // Once the iterator is exhausted, release the buffer and set currentResult to null
+            // so we don't release it again in cleanup.
+            currentResult = null
+            buf.release()
+          })
+        }
+    }
+
+    (result.blockId, iteratorTry)
   }
 }
 
@@ -254,18 +322,35 @@ object ShuffleBlockFetcherIterator {
    * @param blocks Sequence of tuple, where the first element is the block id,
    *               and the second element is the estimated size, used to calculate bytesInFlight.
    */
-  class FetchRequest(val address: BlockManagerId, val blocks: Seq[(BlockId, Long)]) {
+  case class FetchRequest(address: BlockManagerId, blocks: Seq[(BlockId, Long)]) {
     val size = blocks.map(_._2).sum
   }
 
   /**
-   * Result of a fetch from a remote block. A failure is represented as size == -1.
+   * Result of a fetch from a remote block.
+   */
+  private[storage] sealed trait FetchResult {
+    val blockId: BlockId
+  }
+
+  /**
+   * Result of a fetch from a remote block successfully.
    * @param blockId block id
    * @param size estimated size of the block, used to calculate bytesInFlight.
    *             Note that this is NOT the exact bytes.
-   * @param deserialize closure to return the result in the form of an Iterator.
+   * @param buf [[ManagedBuffer]] for the content.
    */
-  class FetchResult(val blockId: BlockId, val size: Long, val deserialize: () => Iterator[Any]) {
-    def failed: Boolean = size == -1
+  private[storage] case class SuccessFetchResult(blockId: BlockId, size: Long, buf: ManagedBuffer)
+    extends FetchResult {
+    require(buf != null)
+    require(size >= 0)
   }
+
+  /**
+   * Result of a fetch from a remote block unsuccessfully.
+   * @param blockId block id
+   * @param e the failure exception
+   */
+  private[storage] case class FailureFetchResult(blockId: BlockId, e: Throwable)
+    extends FetchResult
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
index 1e35abaab5353..e5e1cf5a69a19 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
@@ -18,8 +18,10 @@
 package org.apache.spark.storage
 
 import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
+import java.util.concurrent.ConcurrentHashMap
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.util.Utils
 
 /**
  * :: DeveloperApi ::
@@ -97,12 +99,12 @@ class StorageLevel private(
     ret
   }
 
-  override def writeExternal(out: ObjectOutput) {
+  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
     out.writeByte(toInt)
     out.writeByte(_replication)
   }
 
-  override def readExternal(in: ObjectInput) {
+  override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
     val flags = in.readByte()
     _useDisk = (flags & 8) != 0
     _useMemory = (flags & 4) != 0
@@ -219,8 +221,7 @@ object StorageLevel {
     getCachedStorageLevel(obj)
   }
 
-  private[spark] val storageLevelCache =
-    new java.util.concurrent.ConcurrentHashMap[StorageLevel, StorageLevel]()
+  private[spark] val storageLevelCache = new ConcurrentHashMap[StorageLevel, StorageLevel]()
 
   private[spark] def getCachedStorageLevel(level: StorageLevel): StorageLevel = {
     storageLevelCache.putIfAbsent(level, level)
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
index d9066f766476e..def49e80a3605 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
@@ -19,6 +19,7 @@ package org.apache.spark.storage
 
 import scala.collection.mutable
 
+import org.apache.spark.SparkContext
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.scheduler._
 
@@ -59,10 +60,9 @@ class StorageStatusListener extends SparkListener {
     val info = taskEnd.taskInfo
     val metrics = taskEnd.taskMetrics
     if (info != null && metrics != null) {
-      val execId = formatExecutorId(info.executorId)
       val updatedBlocks = metrics.updatedBlocks.getOrElse(Seq[(BlockId, BlockStatus)]())
       if (updatedBlocks.length > 0) {
-        updateStorageStatus(execId, updatedBlocks)
+        updateStorageStatus(info.executorId, updatedBlocks)
       }
     }
   }
@@ -88,13 +88,4 @@ class StorageStatusListener extends SparkListener {
     }
   }
 
-  /**
-   * In the local mode, there is a discrepancy between the executor ID according to the
-   * task ("localhost") and that according to SparkEnv ("<driver>"). In the UI, this
-   * results in duplicate rows for the same executor. Thus, in this mode, we aggregate
-   * these two rows and use the executor ID of "<driver>" to be consistent.
-   */
-  def formatExecutorId(execId: String): String = {
-    if (execId == "localhost") "<driver>" else execId
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
index 6908a59a79e60..af873034215a9 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
@@ -148,6 +148,7 @@ private[spark] class TachyonBlockManager(
               logError("Exception while deleting tachyon spark dir: " + tachyonDir, e)
           }
         }
+        client.close()
       }
     })
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
index 932b5616043b4..233d1e2b7c616 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
@@ -20,6 +20,7 @@ package org.apache.spark.storage
 import java.io.IOException
 import java.nio.ByteBuffer
 
+import com.google.common.io.ByteStreams
 import tachyon.client.{ReadType, WriteType}
 
 import org.apache.spark.Logging
@@ -105,25 +106,19 @@ private[spark] class TachyonStore(
       return None
     }
     val is = file.getInStream(ReadType.CACHE)
-    var buffer: ByteBuffer = null
+    assert (is != null)
     try {
-      if (is != null) {
-        val size = file.length
-        val bs = new Array[Byte](size.asInstanceOf[Int])
-        val fetchSize = is.read(bs, 0, size.asInstanceOf[Int])
-        buffer = ByteBuffer.wrap(bs)
-        if (fetchSize != size) {
-          logWarning(s"Failed to fetch the block $blockId from Tachyon: Size $size " +
-            s"is not equal to fetched size $fetchSize")
-          return None
-        }
-      }
+      val size = file.length
+      val bs = new Array[Byte](size.asInstanceOf[Int])
+      ByteStreams.readFully(is, bs)
+      Some(ByteBuffer.wrap(bs))
     } catch {
       case ioe: IOException =>
         logWarning(s"Failed to fetch the block $blockId from Tachyon", ioe)
-        return None
+        None
+    } finally {
+      is.close()
     }
-    Some(buffer)
   }
 
   override def contains(blockId: BlockId): Boolean = {
diff --git a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
new file mode 100644
index 0000000000000..27ba9e18237b5
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui
+
+import java.util.{Timer, TimerTask}
+
+import org.apache.spark._
+
+/**
+ * ConsoleProgressBar shows the progress of stages in the next line of the console. It poll the
+ * status of active stages from `sc.statusTracker` periodically, the progress bar will be showed
+ * up after the stage has ran at least 500ms. If multiple stages run in the same time, the status
+ * of them will be combined together, showed in one line.
+ */
+private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging {
+
+  // Carrige return
+  val CR = '\r'
+  // Update period of progress bar, in milliseconds
+  val UPDATE_PERIOD = 200L
+  // Delay to show up a progress bar, in milliseconds
+  val FIRST_DELAY = 500L
+
+  // The width of terminal
+  val TerminalWidth = if (!sys.env.getOrElse("COLUMNS", "").isEmpty) {
+    sys.env.get("COLUMNS").get.toInt
+  } else {
+    80
+  }
+
+  var lastFinishTime = 0L
+  var lastUpdateTime = 0L
+  var lastProgressBar = ""
+
+  // Schedule a refresh thread to run periodically
+  private val timer = new Timer("refresh progress", true)
+  timer.schedule(new TimerTask{
+    override def run() {
+      refresh()
+    }
+  }, FIRST_DELAY, UPDATE_PERIOD)
+
+  /**
+   * Try to refresh the progress bar in every cycle
+   */
+  private def refresh(): Unit = synchronized {
+    val now = System.currentTimeMillis()
+    if (now - lastFinishTime < FIRST_DELAY) {
+      return
+    }
+    val stageIds = sc.statusTracker.getActiveStageIds()
+    val stages = stageIds.map(sc.statusTracker.getStageInfo).flatten.filter(_.numTasks() > 1)
+      .filter(now - _.submissionTime() > FIRST_DELAY).sortBy(_.stageId())
+    if (stages.size > 0) {
+      show(now, stages.take(3))  // display at most 3 stages in same time
+    }
+  }
+
+  /**
+   * Show progress bar in console. The progress bar is displayed in the next line
+   * after your last output, keeps overwriting itself to hold in one line. The logging will follow
+   * the progress bar, then progress bar will be showed in next line without overwrite logs.
+   */
+  private def show(now: Long, stages: Seq[SparkStageInfo]) {
+    val width = TerminalWidth / stages.size
+    val bar = stages.map { s =>
+      val total = s.numTasks()
+      val header = s"[Stage ${s.stageId()}:"
+      val tailer = s"(${s.numCompletedTasks()} + ${s.numActiveTasks()}) / $total]"
+      val w = width - header.size - tailer.size
+      val bar = if (w > 0) {
+        val percent = w * s.numCompletedTasks() / total
+        (0 until w).map { i =>
+          if (i < percent) "=" else if (i == percent) ">" else " "
+        }.mkString("")
+      } else {
+        ""
+      }
+      header + bar + tailer
+    }.mkString("")
+
+    // only refresh if it's changed of after 1 minute (or the ssh connection will be closed
+    // after idle some time)
+    if (bar != lastProgressBar || now - lastUpdateTime > 60 * 1000L) {
+      System.err.print(CR + bar)
+      lastUpdateTime = now
+    }
+    lastProgressBar = bar
+  }
+
+  /**
+   * Clear the progress bar if showed.
+   */
+  private def clear() {
+    if (!lastProgressBar.isEmpty) {
+      System.err.printf(CR + " " * TerminalWidth + CR)
+      lastProgressBar = ""
+    }
+  }
+
+  /**
+   * Mark all the stages as finished, clear the progress bar if showed, then the progress will not
+   * interweave with output of jobs.
+   */
+  def finishAll(): Unit = synchronized {
+    clear()
+    lastFinishTime = System.currentTimeMillis()
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index 2a27d49d2de05..88fed833f922d 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -201,7 +201,7 @@ private[spark] object JettyUtils extends Logging {
       }
     }
 
-    val (server, boundPort) = Utils.startServiceOnPort[Server](port, connect, serverName)
+    val (server, boundPort) = Utils.startServiceOnPort[Server](port, connect, conf, serverName)
     ServerInfo(server, boundPort, collection)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index cccd59d122a92..0c24ad2760e08 100644
--- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -21,60 +21,44 @@ import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext}
 import org.apache.spark.scheduler._
 import org.apache.spark.storage.StorageStatusListener
 import org.apache.spark.ui.JettyUtils._
-import org.apache.spark.ui.env.EnvironmentTab
-import org.apache.spark.ui.exec.ExecutorsTab
-import org.apache.spark.ui.jobs.JobProgressTab
-import org.apache.spark.ui.storage.StorageTab
+import org.apache.spark.ui.env.{EnvironmentListener, EnvironmentTab}
+import org.apache.spark.ui.exec.{ExecutorsListener, ExecutorsTab}
+import org.apache.spark.ui.jobs.{JobsTab, JobProgressListener, StagesTab}
+import org.apache.spark.ui.storage.{StorageListener, StorageTab}
 
 /**
  * Top level user interface for a Spark application.
  */
-private[spark] class SparkUI(
-    val sc: SparkContext,
+private[spark] class SparkUI private (
+    val sc: Option[SparkContext],
     val conf: SparkConf,
     val securityManager: SecurityManager,
-    val listenerBus: SparkListenerBus,
+    val environmentListener: EnvironmentListener,
+    val storageStatusListener: StorageStatusListener,
+    val executorsListener: ExecutorsListener,
+    val jobProgressListener: JobProgressListener,
+    val storageListener: StorageListener,
     var appName: String,
-    val basePath: String = "")
+    val basePath: String)
   extends WebUI(securityManager, SparkUI.getUIPort(conf), conf, basePath, "SparkUI")
   with Logging {
 
-  def this(sc: SparkContext) = this(sc, sc.conf, sc.env.securityManager, sc.listenerBus, sc.appName)
-  def this(conf: SparkConf, listenerBus: SparkListenerBus, appName: String, basePath: String) =
-    this(null, conf, new SecurityManager(conf), listenerBus, appName, basePath)
-
-  def this(
-      conf: SparkConf,
-      securityManager: SecurityManager,
-      listenerBus: SparkListenerBus,
-      appName: String,
-      basePath: String) =
-    this(null, conf, securityManager, listenerBus, appName, basePath)
-
-  // If SparkContext is not provided, assume the associated application is not live
-  val live = sc != null
-
-  // Maintain executor storage status through Spark events
-  val storageStatusListener = new StorageStatusListener
-
-  initialize()
+  val killEnabled = sc.map(_.conf.getBoolean("spark.ui.killEnabled", true)).getOrElse(false)
 
   /** Initialize all components of the server. */
   def initialize() {
-    listenerBus.addListener(storageStatusListener)
-    val jobProgressTab = new JobProgressTab(this)
-    attachTab(jobProgressTab)
+    attachTab(new JobsTab(this))
+    val stagesTab = new StagesTab(this)
+    attachTab(stagesTab)
     attachTab(new StorageTab(this))
     attachTab(new EnvironmentTab(this))
     attachTab(new ExecutorsTab(this))
     attachHandler(createStaticHandler(SparkUI.STATIC_RESOURCE_DIR, "/static"))
-    attachHandler(createRedirectHandler("/", "/stages", basePath = basePath))
+    attachHandler(createRedirectHandler("/", "/jobs", basePath = basePath))
     attachHandler(
-      createRedirectHandler("/stages/stage/kill", "/stages", jobProgressTab.handleKillRequest))
-    if (live) {
-      sc.env.metricsSystem.getServletHandlers.foreach(attachHandler)
-    }
+      createRedirectHandler("/stages/stage/kill", "/stages", stagesTab.handleKillRequest))
   }
+  initialize()
 
   def getAppName = appName
 
@@ -83,11 +67,6 @@ private[spark] class SparkUI(
     appName = name
   }
 
-  /** Register the given listener with the listener bus. */
-  def registerListener(listener: SparkListener) {
-    listenerBus.addListener(listener)
-  }
-
   /** Stop the server behind this web interface. Only valid after bind(). */
   override def stop() {
     super.stop()
@@ -116,4 +95,60 @@ private[spark] object SparkUI {
   def getUIPort(conf: SparkConf): Int = {
     conf.getInt("spark.ui.port", SparkUI.DEFAULT_PORT)
   }
+
+  def createLiveUI(
+      sc: SparkContext,
+      conf: SparkConf,
+      listenerBus: SparkListenerBus,
+      jobProgressListener: JobProgressListener,
+      securityManager: SecurityManager,
+      appName: String): SparkUI =  {
+    create(Some(sc), conf, listenerBus, securityManager, appName,
+      jobProgressListener = Some(jobProgressListener))
+  }
+
+  def createHistoryUI(
+      conf: SparkConf,
+      listenerBus: SparkListenerBus,
+      securityManager: SecurityManager,
+      appName: String,
+      basePath: String): SparkUI = {
+    create(None, conf, listenerBus, securityManager, appName, basePath)
+  }
+
+  /**
+   * Create a new Spark UI.
+   *
+   * @param sc optional SparkContext; this can be None when reconstituting a UI from event logs.
+   * @param jobProgressListener if supplied, this JobProgressListener will be used; otherwise, the
+   *                            web UI will create and register its own JobProgressListener.
+   */
+  private def create(
+      sc: Option[SparkContext],
+      conf: SparkConf,
+      listenerBus: SparkListenerBus,
+      securityManager: SecurityManager,
+      appName: String,
+      basePath: String = "",
+      jobProgressListener: Option[JobProgressListener] = None): SparkUI = {
+
+    val _jobProgressListener: JobProgressListener = jobProgressListener.getOrElse {
+      val listener = new JobProgressListener(conf)
+      listenerBus.addListener(listener)
+      listener
+    }
+
+    val environmentListener = new EnvironmentListener
+    val storageStatusListener = new StorageStatusListener
+    val executorsListener = new ExecutorsListener(storageStatusListener)
+    val storageListener = new StorageListener(storageStatusListener)
+
+    listenerBus.addListener(environmentListener)
+    listenerBus.addListener(storageStatusListener)
+    listenerBus.addListener(executorsListener)
+    listenerBus.addListener(storageListener)
+
+    new SparkUI(sc, conf, securityManager, environmentListener, storageStatusListener,
+      executorsListener, _jobProgressListener, storageListener, appName, basePath)
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/ToolTips.scala b/core/src/main/scala/org/apache/spark/ui/ToolTips.scala
index 9ced9b8107ebf..4307029d44fbb 100644
--- a/core/src/main/scala/org/apache/spark/ui/ToolTips.scala
+++ b/core/src/main/scala/org/apache/spark/ui/ToolTips.scala
@@ -24,11 +24,30 @@ private[spark] object ToolTips {
        scheduler delay is large, consider decreasing the size of tasks or decreasing the size
        of task results."""
 
+  val TASK_DESERIALIZATION_TIME = "Time spent deserializing the task closure on the executor."
+
+  val SHUFFLE_READ_BLOCKED_TIME =
+    "Time that the task spent blocked waiting for shuffle data to be read from remote machines."
+
   val INPUT = "Bytes read from Hadoop or from Spark storage."
 
+  val OUTPUT = "Bytes written to Hadoop."
+
   val SHUFFLE_WRITE = "Bytes written to disk in order to be read by a shuffle in a future stage."
 
   val SHUFFLE_READ =
     """Bytes read from remote executors. Typically less than shuffle write bytes
        because this does not include shuffle data read locally."""
+
+  val GETTING_RESULT_TIME =
+    """Time that the driver spends fetching task results from workers. If this is large, consider
+       decreasing the amount of data returned from each task."""
+
+  val RESULT_SERIALIZATION_TIME =
+    """Time spent serializing the task result on the executor before sending it back to the
+       driver."""
+
+  val GC_TIME =
+    """Time that the executor spent paused for Java garbage collection while the task was
+       running."""
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 32e6b15bb0999..b5022fe853c49 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -20,13 +20,14 @@ package org.apache.spark.ui
 import java.text.SimpleDateFormat
 import java.util.{Locale, Date}
 
-import scala.xml.Node
+import scala.xml.{Node, Text}
 
 import org.apache.spark.Logging
 
 /** Utility functions for generating XML pages with spark content. */
 private[spark] object UIUtils extends Logging {
-  val TABLE_CLASS = "table table-bordered table-striped table-condensed sortable"
+  val TABLE_CLASS_NOT_STRIPED = "table table-bordered table-condensed sortable"
+  val TABLE_CLASS_STRIPED = TABLE_CLASS_NOT_STRIPED + " table-striped"
 
   // SimpleDateFormat is not thread-safe. Don't expose it to avoid improper use.
   private val dateFormat = new ThreadLocal[SimpleDateFormat]() {
@@ -160,6 +161,8 @@ private[spark] object UIUtils extends Logging {
     <script src={prependBaseUri("/static/jquery-1.11.1.min.js")}></script>
     <script src={prependBaseUri("/static/bootstrap-tooltip.js")}></script>
     <script src={prependBaseUri("/static/initialize-tooltips.js")}></script>
+    <script src={prependBaseUri("/static/table.js")}></script>
+    <script src={prependBaseUri("/static/additional-metrics.js")}></script>
   }
 
   /** Returns a spark page with correctly formatted headers */
@@ -167,15 +170,21 @@ private[spark] object UIUtils extends Logging {
       title: String,
       content: => Seq[Node],
       activeTab: SparkUITab,
-      refreshInterval: Option[Int] = None): Seq[Node] = {
+      refreshInterval: Option[Int] = None,
+      helpText: Option[String] = None): Seq[Node] = {
 
     val appName = activeTab.appName
     val shortAppName = if (appName.length < 36) appName else appName.take(32) + "..."
     val header = activeTab.headerTabs.map { tab =>
       <li class={if (tab == activeTab) "active" else ""}>
-        <a href={prependBaseUri(activeTab.basePath, "/" + tab.prefix)}>{tab.name}</a>
+        <a href={prependBaseUri(activeTab.basePath, "/" + tab.prefix + "/")}>{tab.name}</a>
       </li>
     }
+    val helpButton: Seq[Node] = helpText.map { helpText =>
+      <sup>
+        (<a data-toggle="tooltip" data-placement="bottom" title={helpText}>?</a>)
+      </sup>
+    }.getOrElse(Seq.empty)
 
     <html>
       <head>
@@ -185,9 +194,12 @@ private[spark] object UIUtils extends Logging {
       <body>
         <div class="navbar navbar-static-top">
           <div class="navbar-inner">
-            <a href={prependBaseUri("/")} class="brand">
-              <img src={prependBaseUri("/static/spark-logo-77x50px-hd.png")} />
-            </a>
+            <div class="brand">
+              <a href={prependBaseUri("/")} class="brand">
+                <img src={prependBaseUri("/static/spark-logo-77x50px-hd.png")} />
+                <span class="version">{org.apache.spark.SPARK_VERSION}</span>
+              </a>
+            </div>
             <ul class="nav">{header}</ul>
             <p class="navbar-text pull-right">
               <strong title={appName}>{shortAppName}</strong> application UI
@@ -199,6 +211,7 @@ private[spark] object UIUtils extends Logging {
             <div class="span12">
               <h3 style="vertical-align: bottom; display: inline-block;">
                 {title}
+                {helpButton}
               </h3>
             </div>
           </div>
@@ -221,8 +234,9 @@ private[spark] object UIUtils extends Logging {
             <div class="span12">
               <h3 style="vertical-align: middle; display: inline-block;">
                 <a style="text-decoration: none" href={prependBaseUri("/")}>
-                  <img src={prependBaseUri("/static/spark-logo-77x50px-hd.png")}
-                       style="margin-right: 15px;" />
+                  <img src={prependBaseUri("/static/spark-logo-77x50px-hd.png")} />
+                  <span class="version" 
+                        style="margin-right: 15px;">{org.apache.spark.SPARK_VERSION}</span>
                 </a>
                 {title}
               </h3>
@@ -239,35 +253,64 @@ private[spark] object UIUtils extends Logging {
       headers: Seq[String],
       generateDataRow: T => Seq[Node],
       data: Iterable[T],
-      fixedWidth: Boolean = false): Seq[Node] = {
+      fixedWidth: Boolean = false,
+      id: Option[String] = None,
+      headerClasses: Seq[String] = Seq.empty,
+      stripeRowsWithCss: Boolean = true): Seq[Node] = {
 
-    var listingTableClass = TABLE_CLASS
-    if (fixedWidth) {
-      listingTableClass += " table-fixed"
-    }
+    val listingTableClass = if (stripeRowsWithCss) TABLE_CLASS_STRIPED else TABLE_CLASS_NOT_STRIPED
     val colWidth = 100.toDouble / headers.size
     val colWidthAttr = if (fixedWidth) colWidth + "%" else ""
-    val headerRow: Seq[Node] = {
-      // if none of the headers have "\n" in them
-      if (headers.forall(!_.contains("\n"))) {
-        // represent header as simple text
-        headers.map(h => <th width={colWidthAttr}>{h}</th>)
+
+    def getClass(index: Int): String = {
+      if (index < headerClasses.size) {
+        headerClasses(index)
       } else {
-        // represent header text as list while respecting "\n"
-        headers.map { case h =>
-          <th width={colWidthAttr}>
-            <ul class ="unstyled">
-              { h.split("\n").map { case t => <li> {t} </li> } }
-            </ul>
-          </th>
-        }
+        ""
+      }
+    }
+
+    val newlinesInHeader = headers.exists(_.contains("\n"))
+    def getHeaderContent(header: String): Seq[Node] = {
+      if (newlinesInHeader) {
+        <ul class="unstyled">
+          { header.split("\n").map { case t => <li> {t} </li> } }
+        </ul>
+      } else {
+        Text(header)
+      }
+    }
+
+    val headerRow: Seq[Node] = {
+      headers.view.zipWithIndex.map { x =>
+        <th width={colWidthAttr} class={getClass(x._2)}>{getHeaderContent(x._1)}</th>
       }
     }
-    <table class={listingTableClass}>
+    <table class={listingTableClass} id={id.map(Text.apply)}>
       <thead>{headerRow}</thead>
       <tbody>
         {data.map(r => generateDataRow(r))}
       </tbody>
     </table>
   }
+
+  def makeProgressBar(
+      started: Int,
+      completed: Int,
+      failed: Int,
+      skipped:Int,
+      total: Int): Seq[Node] = {
+    val completeWidth = "width: %s%%".format((completed.toDouble/total)*100)
+    val startWidth = "width: %s%%".format((started.toDouble/total)*100)
+
+    <div class="progress">
+      <span style="text-align:center; position:absolute; width:100%; left:0;">
+        {completed}/{total}
+        { if (failed > 0) s"($failed failed)" }
+        { if (skipped > 0) s"($skipped skipped)" }
+      </span>
+      <div class="bar bar-completed" style={completeWidth}></div>
+      <div class="bar bar-running" style={startWidth}></div>
+    </div>
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
index 18d2b5075aa08..fc1844600f1cb 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
@@ -20,23 +20,25 @@ package org.apache.spark.ui
 import scala.util.Random
 
 import org.apache.spark.{SparkConf, SparkContext}
-import org.apache.spark.SparkContext._
 import org.apache.spark.scheduler.SchedulingMode
 
+// scalastyle:off
 /**
  * Continuously generates jobs that expose various features of the WebUI (internal testing tool).
  *
- * Usage: ./bin/spark-class org.apache.spark.ui.UIWorkloadGenerator [master] [FIFO|FAIR]
+ * Usage: ./bin/spark-class org.apache.spark.ui.UIWorkloadGenerator [master] [FIFO|FAIR] [#job set (4 jobs per set)]
  */
+// scalastyle:on
 private[spark] object UIWorkloadGenerator {
 
   val NUM_PARTITIONS = 100
   val INTER_JOB_WAIT_MS = 5000
 
   def main(args: Array[String]) {
-    if (args.length < 2) {
+    if (args.length < 3) {
       println(
-        "usage: ./bin/spark-class org.apache.spark.ui.UIWorkloadGenerator [master] [FIFO|FAIR]")
+        "usage: ./bin/spark-class org.apache.spark.ui.UIWorkloadGenerator " +
+          "[master] [FIFO|FAIR] [#job set (4 jobs per set)]")
       System.exit(1)
     }
 
@@ -46,6 +48,7 @@ private[spark] object UIWorkloadGenerator {
     if (schedulingMode == SchedulingMode.FAIR) {
       conf.set("spark.scheduler.mode", "FAIR")
     }
+    val nJobSet = args(2).toInt
     val sc = new SparkContext(conf)
 
     def setProperties(s: String) = {
@@ -85,7 +88,7 @@ private[spark] object UIWorkloadGenerator {
       ("Job with delays", baseData.map(x => Thread.sleep(100)).count)
     )
 
-    while (true) {
+    (1 to nJobSet).foreach { _ =>
       for ((desc, job) <- jobs) {
         new Thread {
           override def run() {
@@ -102,5 +105,6 @@ private[spark] object UIWorkloadGenerator {
         Thread.sleep(INTER_JOB_WAIT_MS)
       }
     }
+    sc.stop()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
index 5d88ca403a674..9be65a4a39a09 100644
--- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
@@ -82,7 +82,7 @@ private[spark] abstract class WebUI(
   }
 
   /** Detach a handler from this UI. */
-  def detachHandler(handler: ServletContextHandler) {
+  protected def detachHandler(handler: ServletContextHandler) {
     handlers -= handler
     serverInfo.foreach { info =>
       info.rootHandler.removeHandler(handler)
diff --git a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala
index 0d158fbe638d3..f62260c6f6e1d 100644
--- a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala
@@ -22,10 +22,8 @@ import org.apache.spark.scheduler._
 import org.apache.spark.ui._
 
 private[ui] class EnvironmentTab(parent: SparkUI) extends SparkUITab(parent, "environment") {
-  val listener = new EnvironmentListener
-
+  val listener = parent.environmentListener
   attachPage(new EnvironmentPage(this))
-  parent.registerListener(listener)
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
new file mode 100644
index 0000000000000..c82730f524eb7
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.exec
+
+import java.net.URLDecoder
+import javax.servlet.http.HttpServletRequest
+
+import scala.util.Try
+import scala.xml.{Text, Node}
+
+import org.apache.spark.ui.{UIUtils, WebUIPage}
+
+private[ui] class ExecutorThreadDumpPage(parent: ExecutorsTab) extends WebUIPage("threadDump") {
+
+  private val sc = parent.sc
+
+  def render(request: HttpServletRequest): Seq[Node] = {
+    val executorId = Option(request.getParameter("executorId")).map {
+      executorId =>
+        // Due to YARN-2844, "<driver>" in the url will be encoded to "%25253Cdriver%25253E" when
+        // running in yarn-cluster mode. `request.getParameter("executorId")` will return
+        // "%253Cdriver%253E". Therefore we need to decode it until we get the real id.
+        var id = executorId
+        var decodedId = URLDecoder.decode(id, "UTF-8")
+        while (id != decodedId) {
+          id = decodedId
+          decodedId = URLDecoder.decode(id, "UTF-8")
+        }
+        id
+    }.getOrElse {
+      return Text(s"Missing executorId parameter")
+    }
+    val time = System.currentTimeMillis()
+    val maybeThreadDump = sc.get.getExecutorThreadDump(executorId)
+
+    val content = maybeThreadDump.map { threadDump =>
+      val dumpRows = threadDump.map { thread =>
+        <div class="accordion-group">
+          <div class="accordion-heading" onclick="$(this).next().toggleClass('hidden')">
+            <a class="accordion-toggle">
+              Thread {thread.threadId}: {thread.threadName} ({thread.threadState})
+            </a>
+          </div>
+          <div class="accordion-body hidden">
+            <div class="accordion-inner">
+              <pre>{thread.stackTrace}</pre>
+            </div>
+          </div>
+        </div>
+      }
+
+      <div class="row-fluid">
+        <p>Updated at {UIUtils.formatDate(time)}</p>
+        {
+          // scalastyle:off
+          <p><a class="expandbutton"
+                onClick="$('.accordion-body').removeClass('hidden'); $('.expandbutton').toggleClass('hidden')">
+            Expand All
+          </a></p>
+          <p><a class="expandbutton hidden"
+                onClick="$('.accordion-body').addClass('hidden'); $('.expandbutton').toggleClass('hidden')">
+            Collapse All
+          </a></p>
+          // scalastyle:on
+        }
+        <div class="accordion">{dumpRows}</div>
+      </div>
+    }.getOrElse(Text("Error fetching thread dump"))
+    UIUtils.headerSparkPage(s"Thread dump for executor $executorId", content, parent)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
index b0e3bb3b552fd..363cb96de7998 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.ui.exec
 
+import java.net.URLEncoder
 import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
@@ -41,7 +42,10 @@ private case class ExecutorSummaryInfo(
     totalShuffleWrite: Long,
     maxMemory: Long)
 
-private[ui] class ExecutorsPage(parent: ExecutorsTab) extends WebUIPage("") {
+private[ui] class ExecutorsPage(
+    parent: ExecutorsTab,
+    threadDumpEnabled: Boolean)
+  extends WebUIPage("") {
   private val listener = parent.listener
 
   def render(request: HttpServletRequest): Seq[Node] = {
@@ -53,7 +57,7 @@ private[ui] class ExecutorsPage(parent: ExecutorsTab) extends WebUIPage("") {
     val execInfoSorted = execInfo.sortBy(_.id)
 
     val execTable =
-      <table class={UIUtils.TABLE_CLASS}>
+      <table class={UIUtils.TABLE_CLASS_STRIPED}>
         <thead>
           <th>Executor ID</th>
           <th>Address</th>
@@ -75,6 +79,7 @@ private[ui] class ExecutorsPage(parent: ExecutorsTab) extends WebUIPage("") {
               Shuffle Write
             </span>
           </th>
+          {if (threadDumpEnabled) <th class="sorttable_nosort">Thread Dump</th> else Seq.empty}
         </thead>
         <tbody>
           {execInfoSorted.map(execRow)}
@@ -133,6 +138,16 @@ private[ui] class ExecutorsPage(parent: ExecutorsTab) extends WebUIPage("") {
       <td sorttable_customkey={info.totalShuffleWrite.toString}>
         {Utils.bytesToString(info.totalShuffleWrite)}
       </td>
+      {
+        if (threadDumpEnabled) {
+          val encodedId = URLEncoder.encode(info.id, "UTF-8")
+          <td>
+            <a href={s"threadDump/?executorId=${encodedId}"}>Thread Dump</a>
+          </td>
+        } else {
+          Seq.empty
+        }
+      }
     </tr>
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
index 61eb111cd9100..dd1c2b78c4094 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
@@ -26,10 +26,15 @@ import org.apache.spark.storage.StorageStatusListener
 import org.apache.spark.ui.{SparkUI, SparkUITab}
 
 private[ui] class ExecutorsTab(parent: SparkUI) extends SparkUITab(parent, "executors") {
-  val listener = new ExecutorsListener(parent.storageStatusListener)
+  val listener = parent.executorsListener
+  val sc = parent.sc
+  val threadDumpEnabled =
+    sc.isDefined && parent.conf.getBoolean("spark.ui.threadDumpsEnabled", true)
 
-  attachPage(new ExecutorsPage(this))
-  parent.registerListener(listener)
+  attachPage(new ExecutorsPage(this, threadDumpEnabled))
+  if (threadDumpEnabled) {
+    attachPage(new ExecutorThreadDumpPage(this))
+  }
 }
 
 /**
@@ -43,20 +48,21 @@ class ExecutorsListener(storageStatusListener: StorageStatusListener) extends Sp
   val executorToTasksFailed = HashMap[String, Int]()
   val executorToDuration = HashMap[String, Long]()
   val executorToInputBytes = HashMap[String, Long]()
+  val executorToOutputBytes = HashMap[String, Long]()
   val executorToShuffleRead = HashMap[String, Long]()
   val executorToShuffleWrite = HashMap[String, Long]()
 
   def storageStatusList = storageStatusListener.storageStatusList
 
   override def onTaskStart(taskStart: SparkListenerTaskStart) = synchronized {
-    val eid = formatExecutorId(taskStart.taskInfo.executorId)
+    val eid = taskStart.taskInfo.executorId
     executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 0) + 1
   }
 
   override def onTaskEnd(taskEnd: SparkListenerTaskEnd) = synchronized {
     val info = taskEnd.taskInfo
     if (info != null) {
-      val eid = formatExecutorId(info.executorId)
+      val eid = info.executorId
       executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 1) - 1
       executorToDuration(eid) = executorToDuration.getOrElse(eid, 0L) + info.duration
       taskEnd.reason match {
@@ -73,6 +79,10 @@ class ExecutorsListener(storageStatusListener: StorageStatusListener) extends Sp
           executorToInputBytes(eid) =
             executorToInputBytes.getOrElse(eid, 0L) + inputMetrics.bytesRead
         }
+        metrics.outputMetrics.foreach { outputMetrics =>
+          executorToOutputBytes(eid) =
+            executorToOutputBytes.getOrElse(eid, 0L) + outputMetrics.bytesWritten
+        }
         metrics.shuffleReadMetrics.foreach { shuffleRead =>
           executorToShuffleRead(eid) =
             executorToShuffleRead.getOrElse(eid, 0L) + shuffleRead.remoteBytesRead
@@ -85,6 +95,4 @@ class ExecutorsListener(storageStatusListener: StorageStatusListener) extends Sp
     }
   }
 
-  // This addresses executor ID inconsistencies in the local mode
-  private def formatExecutorId(execId: String) = storageStatusListener.formatExecutorId(execId)
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
new file mode 100644
index 0000000000000..045c69da06feb
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+import scala.xml.{Node, NodeSeq}
+
+import javax.servlet.http.HttpServletRequest
+
+import org.apache.spark.ui.{WebUIPage, UIUtils}
+import org.apache.spark.ui.jobs.UIData.JobUIData
+
+/** Page showing list of all ongoing and recently finished jobs */
+private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
+  private val startTime: Option[Long] = parent.sc.map(_.startTime)
+  private val listener = parent.listener
+
+  private def jobsTable(jobs: Seq[JobUIData]): Seq[Node] = {
+    val someJobHasJobGroup = jobs.exists(_.jobGroup.isDefined)
+
+    val columns: Seq[Node] = {
+      <th>{if (someJobHasJobGroup) "Job Id (Job Group)" else "Job Id"}</th>
+      <th>Description</th>
+      <th>Submitted</th>
+      <th>Duration</th>
+      <th class="sorttable_nosort">Stages: Succeeded/Total</th>
+      <th class="sorttable_nosort">Tasks (for all stages): Succeeded/Total</th>
+    }
+
+    def makeRow(job: JobUIData): Seq[Node] = {
+      val lastStageInfo = listener.stageIdToInfo.get(job.stageIds.max)
+      val lastStageData = lastStageInfo.flatMap { s =>
+        listener.stageIdToData.get((s.stageId, s.attemptId))
+      }
+
+      val lastStageName = lastStageInfo.map(_.name).getOrElse("(Unknown Stage Name)")
+      val lastStageDescription = lastStageData.flatMap(_.description).getOrElse("")
+      val duration: Option[Long] = {
+        job.submissionTime.map { start =>
+          val end = job.completionTime.getOrElse(System.currentTimeMillis())
+          end - start
+        }
+      }
+      val formattedDuration = duration.map(d => UIUtils.formatDuration(d)).getOrElse("Unknown")
+      val formattedSubmissionTime = job.submissionTime.map(UIUtils.formatDate).getOrElse("Unknown")
+      val detailUrl =
+        "%s/jobs/job?id=%s".format(UIUtils.prependBaseUri(parent.basePath), job.jobId)
+      <tr>
+        <td sorttable_customkey={job.jobId.toString}>
+          {job.jobId} {job.jobGroup.map(id => s"($id)").getOrElse("")}
+        </td>
+        <td>
+          <span class="description-input" title={lastStageDescription}>{lastStageDescription}</span>
+          <a href={detailUrl}>{lastStageName}</a>
+        </td>
+        <td sorttable_customkey={job.submissionTime.getOrElse(-1).toString}>
+          {formattedSubmissionTime}
+        </td>
+        <td sorttable_customkey={duration.getOrElse(-1).toString}>{formattedDuration}</td>
+        <td class="stage-progress-cell">
+          {job.completedStageIndices.size}/{job.stageIds.size - job.numSkippedStages}
+          {if (job.numFailedStages > 0) s"(${job.numFailedStages} failed)"}
+          {if (job.numSkippedStages > 0) s"(${job.numSkippedStages} skipped)"}
+        </td>
+        <td class="progress-cell">
+          {UIUtils.makeProgressBar(started = job.numActiveTasks, completed = job.numCompletedTasks,
+           failed = job.numFailedTasks, skipped = job.numSkippedTasks,
+           total = job.numTasks - job.numSkippedTasks)}
+        </td>
+      </tr>
+    }
+
+    <table class="table table-bordered table-striped table-condensed sortable">
+      <thead>{columns}</thead>
+      <tbody>
+        {jobs.map(makeRow)}
+      </tbody>
+    </table>
+  }
+
+  def render(request: HttpServletRequest): Seq[Node] = {
+    listener.synchronized {
+      val activeJobs = listener.activeJobs.values.toSeq
+      val completedJobs = listener.completedJobs.reverse.toSeq
+      val failedJobs = listener.failedJobs.reverse.toSeq
+      val now = System.currentTimeMillis
+
+      val activeJobsTable =
+        jobsTable(activeJobs.sortBy(_.submissionTime.getOrElse(-1L)).reverse)
+      val completedJobsTable =
+        jobsTable(completedJobs.sortBy(_.completionTime.getOrElse(-1L)).reverse)
+      val failedJobsTable =
+        jobsTable(failedJobs.sortBy(_.completionTime.getOrElse(-1L)).reverse)
+
+      val shouldShowActiveJobs = activeJobs.nonEmpty
+      val shouldShowCompletedJobs = completedJobs.nonEmpty
+      val shouldShowFailedJobs = failedJobs.nonEmpty
+
+      val summary: NodeSeq =
+        <div>
+          <ul class="unstyled">
+            {if (startTime.isDefined) {
+              // Total duration is not meaningful unless the UI is live
+              <li>
+                <strong>Total Duration: </strong>
+                {UIUtils.formatDuration(now - startTime.get)}
+              </li>
+            }}
+            <li>
+              <strong>Scheduling Mode: </strong>
+              {listener.schedulingMode.map(_.toString).getOrElse("Unknown")}
+            </li>
+            {
+              if (shouldShowActiveJobs) {
+                <li>
+                  <a href="#active"><strong>Active Jobs:</strong></a>
+                  {activeJobs.size}
+                </li>
+              }
+            }
+            {
+              if (shouldShowCompletedJobs) {
+                <li>
+                  <a href="#completed"><strong>Completed Jobs:</strong></a>
+                  {completedJobs.size}
+                </li>
+              }
+            }
+            {
+              if (shouldShowFailedJobs) {
+                <li>
+                  <a href="#failed"><strong>Failed Jobs:</strong></a>
+                  {failedJobs.size}
+                </li>
+              }
+            }
+          </ul>
+        </div>
+
+      var content = summary
+      if (shouldShowActiveJobs) {
+        content ++= <h4 id="active">Active Jobs ({activeJobs.size})</h4> ++
+          activeJobsTable
+      }
+      if (shouldShowCompletedJobs) {
+        content ++= <h4 id="completed">Completed Jobs ({completedJobs.size})</h4> ++
+          completedJobsTable
+      }
+      if (shouldShowFailedJobs) {
+        content ++= <h4 id ="failed">Failed Jobs ({failedJobs.size})</h4> ++
+          failedJobsTable
+      }
+      val helpText = """A job is triggered by an action, like "count()" or "saveAsTextFile()".""" +
+        " Click on a job's title to see information about the stages of tasks associated with" +
+        " the job."
+
+      UIUtils.headerSparkPage("Spark Jobs", content, parent, helpText = Some(helpText))
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
new file mode 100644
index 0000000000000..527f960af2dfc
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+import javax.servlet.http.HttpServletRequest
+
+import scala.xml.{Node, NodeSeq}
+
+import org.apache.spark.scheduler.Schedulable
+import org.apache.spark.ui.{WebUIPage, UIUtils}
+
+/** Page showing list of all ongoing and recently finished stages and pools */
+private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") {
+  private val sc = parent.sc
+  private val listener = parent.listener
+  private def isFairScheduler = parent.isFairScheduler
+
+  def render(request: HttpServletRequest): Seq[Node] = {
+    listener.synchronized {
+      val activeStages = listener.activeStages.values.toSeq
+      val pendingStages = listener.pendingStages.values.toSeq
+      val completedStages = listener.completedStages.reverse.toSeq
+      val numCompletedStages = listener.numCompletedStages
+      val failedStages = listener.failedStages.reverse.toSeq
+      val numFailedStages = listener.numFailedStages
+      val now = System.currentTimeMillis
+
+      val activeStagesTable =
+        new StageTableBase(activeStages.sortBy(_.submissionTime).reverse,
+          parent.basePath, parent.listener, isFairScheduler = parent.isFairScheduler,
+          killEnabled = parent.killEnabled)
+      val pendingStagesTable =
+        new StageTableBase(pendingStages.sortBy(_.submissionTime).reverse,
+          parent.basePath, parent.listener, isFairScheduler = parent.isFairScheduler,
+          killEnabled = false)
+      val completedStagesTable =
+        new StageTableBase(completedStages.sortBy(_.submissionTime).reverse, parent.basePath,
+          parent.listener, isFairScheduler = parent.isFairScheduler, killEnabled = false)
+      val failedStagesTable =
+        new FailedStageTable(failedStages.sortBy(_.submissionTime).reverse, parent.basePath,
+          parent.listener, isFairScheduler = parent.isFairScheduler)
+
+      // For now, pool information is only accessible in live UIs
+      val pools = sc.map(_.getAllPools).getOrElse(Seq.empty[Schedulable])
+      val poolTable = new PoolTable(pools, parent)
+
+      val shouldShowActiveStages = activeStages.nonEmpty
+      val shouldShowPendingStages = pendingStages.nonEmpty
+      val shouldShowCompletedStages = completedStages.nonEmpty
+      val shouldShowFailedStages = failedStages.nonEmpty
+
+      val summary: NodeSeq =
+        <div>
+          <ul class="unstyled">
+            {
+              if (sc.isDefined) {
+                // Total duration is not meaningful unless the UI is live
+                <li>
+                  <strong>Total Duration: </strong>
+                  {UIUtils.formatDuration(now - sc.get.startTime)}
+                </li>
+              }
+            }
+            <li>
+              <strong>Scheduling Mode: </strong>
+              {listener.schedulingMode.map(_.toString).getOrElse("Unknown")}
+            </li>
+            {
+              if (shouldShowActiveStages) {
+                <li>
+                  <a href="#active"><strong>Active Stages:</strong></a>
+                  {activeStages.size}
+                </li>
+              }
+            }
+            {
+              if (shouldShowPendingStages) {
+                <li>
+                  <a href="#pending"><strong>Pending Stages:</strong></a>
+                  {pendingStages.size}
+                </li>
+              }
+            }
+            {
+              if (shouldShowCompletedStages) {
+                <li>
+                  <a href="#completed"><strong>Completed Stages:</strong></a>
+                  {numCompletedStages}
+                </li>
+              }
+            }
+            {
+              if (shouldShowFailedStages) {
+                <li>
+                  <a href="#failed"><strong>Failed Stages:</strong></a>
+                  {numFailedStages}
+                </li>
+              }
+            }
+          </ul>
+        </div>
+
+      var content = summary ++
+        {
+          if (sc.isDefined && isFairScheduler) {
+            <h4>{pools.size} Fair Scheduler Pools</h4> ++ poolTable.toNodeSeq
+          } else {
+            Seq[Node]()
+          }
+        }
+      if (shouldShowActiveStages) {
+        content ++= <h4 id="active">Active Stages ({activeStages.size})</h4> ++
+        activeStagesTable.toNodeSeq
+      }
+      if (shouldShowPendingStages) {
+        content ++= <h4 id="pending">Pending Stages ({pendingStages.size})</h4> ++
+        pendingStagesTable.toNodeSeq
+      }
+      if (shouldShowCompletedStages) {
+        content ++= <h4 id="completed">Completed Stages ({numCompletedStages})</h4> ++
+        completedStagesTable.toNodeSeq
+      }
+      if (shouldShowFailedStages) {
+        content ++= <h4 id ="failed">Failed Stages ({numFailedStages})</h4> ++
+        failedStagesTable.toNodeSeq
+      }
+      UIUtils.headerSparkPage("Spark Stages (for all jobs)", content, parent)
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala
index f0e43fbf70976..9836d11a6d85f 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala
@@ -25,7 +25,7 @@ import org.apache.spark.ui.jobs.UIData.StageUIData
 import org.apache.spark.util.Utils
 
 /** Stage summary grouped by executors. */
-private[ui] class ExecutorTable(stageId: Int, stageAttemptId: Int, parent: JobProgressTab) {
+private[ui] class ExecutorTable(stageId: Int, stageAttemptId: Int, parent: StagesTab) {
   private val listener = parent.listener
 
   def toNodeSeq: Seq[Node] = {
@@ -36,7 +36,7 @@ private[ui] class ExecutorTable(stageId: Int, stageAttemptId: Int, parent: JobPr
 
   /** Special table which merges two header cells. */
   private def executorTable[T](): Seq[Node] = {
-    <table class={UIUtils.TABLE_CLASS}>
+    <table class={UIUtils.TABLE_CLASS_STRIPED}>
       <thead>
         <th>Executor ID</th>
         <th>Address</th>
@@ -45,6 +45,7 @@ private[ui] class ExecutorTable(stageId: Int, stageAttemptId: Int, parent: JobPr
         <th>Failed Tasks</th>
         <th>Succeeded Tasks</th>
         <th><span data-toggle="tooltip" title={ToolTips.INPUT}>Input</span></th>
+        <th><span data-toggle="tooltip" title={ToolTips.OUTPUT}>Output</span></th>
         <th><span data-toggle="tooltip" title={ToolTips.SHUFFLE_READ}>Shuffle Read</span></th>
         <th><span data-toggle="tooltip" title={ToolTips.SHUFFLE_WRITE}>Shuffle Write</span></th>
         <th>Shuffle Spill (Memory)</th>
@@ -77,6 +78,8 @@ private[ui] class ExecutorTable(stageId: Int, stageAttemptId: Int, parent: JobPr
             <td>{v.succeededTasks}</td>
             <td sorttable_customkey={v.inputBytes.toString}>
               {Utils.bytesToString(v.inputBytes)}</td>
+            <td sorttable_customkey={v.outputBytes.toString}>
+              {Utils.bytesToString(v.outputBytes)}</td>
             <td sorttable_customkey={v.shuffleRead.toString}>
               {Utils.bytesToString(v.shuffleRead)}</td>
             <td sorttable_customkey={v.shuffleWrite.toString}>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
new file mode 100644
index 0000000000000..77d36209c6048
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+import scala.collection.mutable
+import scala.xml.{NodeSeq, Node}
+
+import javax.servlet.http.HttpServletRequest
+
+import org.apache.spark.JobExecutionStatus
+import org.apache.spark.scheduler.StageInfo
+import org.apache.spark.ui.{UIUtils, WebUIPage}
+
+/** Page showing statistics and stage list for a given job */
+private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") {
+  private val listener = parent.listener
+
+  def render(request: HttpServletRequest): Seq[Node] = {
+    listener.synchronized {
+      val jobId = request.getParameter("id").toInt
+      val jobDataOption = listener.jobIdToData.get(jobId)
+      if (jobDataOption.isEmpty) {
+        val content =
+          <div>
+            <p>No information to display for job {jobId}</p>
+          </div>
+        return UIUtils.headerSparkPage(
+          s"Details for Job $jobId", content, parent)
+      }
+      val jobData = jobDataOption.get
+      val isComplete = jobData.status != JobExecutionStatus.RUNNING
+      val stages = jobData.stageIds.map { stageId =>
+        // This could be empty if the JobProgressListener hasn't received information about the
+        // stage or if the stage information has been garbage collected
+        listener.stageIdToInfo.getOrElse(stageId,
+          new StageInfo(stageId, 0, "Unknown", 0, Seq.empty, "Unknown"))
+      }
+
+      val activeStages = mutable.Buffer[StageInfo]()
+      val completedStages = mutable.Buffer[StageInfo]()
+      // If the job is completed, then any pending stages are displayed as "skipped":
+      val pendingOrSkippedStages = mutable.Buffer[StageInfo]()
+      val failedStages = mutable.Buffer[StageInfo]()
+      for (stage <- stages) {
+        if (stage.submissionTime.isEmpty) {
+          pendingOrSkippedStages += stage
+        } else if (stage.completionTime.isDefined) {
+          if (stage.failureReason.isDefined) {
+            failedStages += stage
+          } else {
+            completedStages += stage
+          }
+        } else {
+          activeStages += stage
+        }
+      }
+
+      val activeStagesTable =
+        new StageTableBase(activeStages.sortBy(_.submissionTime).reverse,
+          parent.basePath, parent.listener, isFairScheduler = parent.isFairScheduler,
+          killEnabled = parent.killEnabled)
+      val pendingOrSkippedStagesTable =
+        new StageTableBase(pendingOrSkippedStages.sortBy(_.stageId).reverse,
+          parent.basePath, parent.listener, isFairScheduler = parent.isFairScheduler,
+          killEnabled = false)
+      val completedStagesTable =
+        new StageTableBase(completedStages.sortBy(_.submissionTime).reverse, parent.basePath,
+          parent.listener, isFairScheduler = parent.isFairScheduler, killEnabled = false)
+      val failedStagesTable =
+        new FailedStageTable(failedStages.sortBy(_.submissionTime).reverse, parent.basePath,
+          parent.listener, isFairScheduler = parent.isFairScheduler)
+
+      val shouldShowActiveStages = activeStages.nonEmpty
+      val shouldShowPendingStages = !isComplete && pendingOrSkippedStages.nonEmpty
+      val shouldShowCompletedStages = completedStages.nonEmpty
+      val shouldShowSkippedStages = isComplete && pendingOrSkippedStages.nonEmpty
+      val shouldShowFailedStages = failedStages.nonEmpty
+
+      val summary: NodeSeq =
+        <div>
+          <ul class="unstyled">
+            <li>
+              <Strong>Status:</Strong>
+              {jobData.status}
+            </li>
+            {
+              if (jobData.jobGroup.isDefined) {
+                <li>
+                  <strong>Job Group:</strong>
+                  {jobData.jobGroup.get}
+                </li>
+              }
+            }
+            {
+              if (shouldShowActiveStages) {
+                <li>
+                  <a href="#active"><strong>Active Stages:</strong></a>
+                  {activeStages.size}
+                </li>
+              }
+            }
+            {
+              if (shouldShowPendingStages) {
+                <li>
+                  <a href="#pending">
+                    <strong>Pending Stages:</strong>
+                  </a>{pendingOrSkippedStages.size}
+                </li>
+              }
+            }
+            {
+              if (shouldShowCompletedStages) {
+                <li>
+                  <a href="#completed"><strong>Completed Stages:</strong></a>
+                  {completedStages.size}
+                </li>
+              }
+            }
+            {
+              if (shouldShowSkippedStages) {
+              <li>
+                <a href="#skipped"><strong>Skipped Stages:</strong></a>
+                {pendingOrSkippedStages.size}
+              </li>
+            }
+            }
+            {
+              if (shouldShowFailedStages) {
+                <li>
+                  <a href="#failed"><strong>Failed Stages:</strong></a>
+                  {failedStages.size}
+                </li>
+              }
+            }
+          </ul>
+        </div>
+
+      var content = summary
+      if (shouldShowActiveStages) {
+        content ++= <h4 id="active">Active Stages ({activeStages.size})</h4> ++
+          activeStagesTable.toNodeSeq
+      }
+      if (shouldShowPendingStages) {
+        content ++= <h4 id="pending">Pending Stages ({pendingOrSkippedStages.size})</h4> ++
+          pendingOrSkippedStagesTable.toNodeSeq
+      }
+      if (shouldShowCompletedStages) {
+        content ++= <h4 id="completed">Completed Stages ({completedStages.size})</h4> ++
+          completedStagesTable.toNodeSeq
+      }
+      if (shouldShowSkippedStages) {
+        content ++= <h4 id="skipped">Skipped Stages ({pendingOrSkippedStages.size})</h4> ++
+          pendingOrSkippedStagesTable.toNodeSeq
+      }
+      if (shouldShowFailedStages) {
+        content ++= <h4 id ="failed">Failed Stages ({failedStages.size})</h4> ++
+          failedStagesTable.toNodeSeq
+      }
+      UIUtils.headerSparkPage(s"Details for Job $jobId", content, parent)
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
index eaeb861f59e5a..4d200eeda86b9 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ui.jobs
 
-import scala.collection.mutable.{HashMap, ListBuffer}
+import scala.collection.mutable.{HashMap, HashSet, ListBuffer}
 
 import org.apache.spark._
 import org.apache.spark.annotation.DeveloperApi
@@ -40,29 +40,185 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
 
   import JobProgressListener._
 
-  // How many stages to remember
-  val retainedStages = conf.getInt("spark.ui.retainedStages", DEFAULT_RETAINED_STAGES)
+  // Define a handful of type aliases so that data structures' types can serve as documentation.
+  // These type aliases are public because they're used in the types of public fields:
 
-  // Map from stageId to StageInfo
-  val activeStages = new HashMap[Int, StageInfo]
+  type JobId = Int
+  type StageId = Int
+  type StageAttemptId = Int
+  type PoolName = String
+  type ExecutorId = String
 
-  // Map from (stageId, attemptId) to StageUIData
-  val stageIdToData = new HashMap[(Int, Int), StageUIData]
+  // Jobs:
+  val activeJobs = new HashMap[JobId, JobUIData]
+  val completedJobs = ListBuffer[JobUIData]()
+  val failedJobs = ListBuffer[JobUIData]()
+  val jobIdToData = new HashMap[JobId, JobUIData]
 
+  // Stages:
+  val pendingStages = new HashMap[StageId, StageInfo]
+  val activeStages = new HashMap[StageId, StageInfo]
   val completedStages = ListBuffer[StageInfo]()
+  val skippedStages = ListBuffer[StageInfo]()
   val failedStages = ListBuffer[StageInfo]()
+  val stageIdToData = new HashMap[(StageId, StageAttemptId), StageUIData]
+  val stageIdToInfo = new HashMap[StageId, StageInfo]
+  val stageIdToActiveJobIds = new HashMap[StageId, HashSet[JobId]]
+  val poolToActiveStages = HashMap[PoolName, HashMap[StageId, StageInfo]]()
+  // Total of completed and failed stages that have ever been run.  These may be greater than
+  // `completedStages.size` and `failedStages.size` if we have run more stages or jobs than
+  // JobProgressListener's retention limits.
+  var numCompletedStages = 0
+  var numFailedStages = 0
+
+  // Misc:
+  val executorIdToBlockManagerId = HashMap[ExecutorId, BlockManagerId]()
+  def blockManagerIds = executorIdToBlockManagerId.values.toSeq
 
-  // Map from pool name to a hash map (map from stage id to StageInfo).
-  val poolToActiveStages = HashMap[String, HashMap[Int, StageInfo]]()
+  var schedulingMode: Option[SchedulingMode] = None
 
-  val executorIdToBlockManagerId = HashMap[String, BlockManagerId]()
+  // To limit the total memory usage of JobProgressListener, we only track information for a fixed
+  // number of non-active jobs and stages (there is no limit for active jobs and stages):
 
-  var schedulingMode: Option[SchedulingMode] = None
+  val retainedStages = conf.getInt("spark.ui.retainedStages", DEFAULT_RETAINED_STAGES)
+  val retainedJobs = conf.getInt("spark.ui.retainedJobs", DEFAULT_RETAINED_JOBS)
+
+  // We can test for memory leaks by ensuring that collections that track non-active jobs and
+  // stages do not grow without bound and that collections for active jobs/stages eventually become
+  // empty once Spark is idle.  Let's partition our collections into ones that should be empty
+  // once Spark is idle and ones that should have a hard- or soft-limited sizes.
+  // These methods are used by unit tests, but they're defined here so that people don't forget to
+  // update the tests when adding new collections.  Some collections have multiple levels of
+  // nesting, etc, so this lets us customize our notion of "size" for each structure:
+
+  // These collections should all be empty once Spark is idle (no active stages / jobs):
+  private[spark] def getSizesOfActiveStateTrackingCollections: Map[String, Int] = {
+    Map(
+      "activeStages" -> activeStages.size,
+      "activeJobs" -> activeJobs.size,
+      "poolToActiveStages" -> poolToActiveStages.values.map(_.size).sum,
+      "stageIdToActiveJobIds" -> stageIdToActiveJobIds.values.map(_.size).sum
+    )
+  }
 
-  def blockManagerIds = executorIdToBlockManagerId.values.toSeq
+  // These collections should stop growing once we have run at least `spark.ui.retainedStages`
+  // stages and `spark.ui.retainedJobs` jobs:
+  private[spark] def getSizesOfHardSizeLimitedCollections: Map[String, Int] = {
+    Map(
+      "completedJobs" -> completedJobs.size,
+      "failedJobs" -> failedJobs.size,
+      "completedStages" -> completedStages.size,
+      "skippedStages" -> skippedStages.size,
+      "failedStages" -> failedStages.size
+    )
+  }
+  
+  // These collections may grow arbitrarily, but once Spark becomes idle they should shrink back to
+  // some bound based on the `spark.ui.retainedStages` and `spark.ui.retainedJobs` settings:
+  private[spark] def getSizesOfSoftSizeLimitedCollections: Map[String, Int] = {
+    Map(
+      "jobIdToData" -> jobIdToData.size,
+      "stageIdToData" -> stageIdToData.size,
+      "stageIdToStageInfo" -> stageIdToInfo.size
+    )
+  }
+
+  /** If stages is too large, remove and garbage collect old stages */
+  private def trimStagesIfNecessary(stages: ListBuffer[StageInfo]) = synchronized {
+    if (stages.size > retainedStages) {
+      val toRemove = math.max(retainedStages / 10, 1)
+      stages.take(toRemove).foreach { s =>
+        stageIdToData.remove((s.stageId, s.attemptId))
+        stageIdToInfo.remove(s.stageId)
+      }
+      stages.trimStart(toRemove)
+    }
+  }
+
+  /** If jobs is too large, remove and garbage collect old jobs */
+  private def trimJobsIfNecessary(jobs: ListBuffer[JobUIData]) = synchronized {
+    if (jobs.size > retainedJobs) {
+      val toRemove = math.max(retainedJobs / 10, 1)
+      jobs.take(toRemove).foreach { job =>
+        jobIdToData.remove(job.jobId)
+      }
+      jobs.trimStart(toRemove)
+    }
+  }
+
+  override def onJobStart(jobStart: SparkListenerJobStart) = synchronized {
+    val jobGroup = for (
+      props <- Option(jobStart.properties);
+      group <- Option(props.getProperty(SparkContext.SPARK_JOB_GROUP_ID))
+    ) yield group
+    val jobData: JobUIData =
+      new JobUIData(
+        jobId = jobStart.jobId,
+        submissionTime = Option(jobStart.time).filter(_ >= 0),
+        stageIds = jobStart.stageIds,
+        jobGroup = jobGroup,
+        status = JobExecutionStatus.RUNNING)
+    jobStart.stageInfos.foreach(x => pendingStages(x.stageId) = x)
+    // Compute (a potential underestimate of) the number of tasks that will be run by this job.
+    // This may be an underestimate because the job start event references all of the result
+    // stages' transitive stage dependencies, but some of these stages might be skipped if their
+    // output is available from earlier runs.
+    // See https://github.com/apache/spark/pull/3009 for a more extensive discussion.
+    jobData.numTasks = {
+      val allStages = jobStart.stageInfos
+      val missingStages = allStages.filter(_.completionTime.isEmpty)
+      missingStages.map(_.numTasks).sum
+    }
+    jobIdToData(jobStart.jobId) = jobData
+    activeJobs(jobStart.jobId) = jobData
+    for (stageId <- jobStart.stageIds) {
+      stageIdToActiveJobIds.getOrElseUpdate(stageId, new HashSet[StageId]).add(jobStart.jobId)
+    }
+    // If there's no information for a stage, store the StageInfo received from the scheduler
+    // so that we can display stage descriptions for pending stages:
+    for (stageInfo <- jobStart.stageInfos) {
+      stageIdToInfo.getOrElseUpdate(stageInfo.stageId, stageInfo)
+      stageIdToData.getOrElseUpdate((stageInfo.stageId, stageInfo.attemptId), new StageUIData)
+    }
+  }
+
+  override def onJobEnd(jobEnd: SparkListenerJobEnd) = synchronized {
+    val jobData = activeJobs.remove(jobEnd.jobId).getOrElse {
+      logWarning(s"Job completed for unknown job ${jobEnd.jobId}")
+      new JobUIData(jobId = jobEnd.jobId)
+    }
+    jobData.completionTime = Option(jobEnd.time).filter(_ >= 0)
+
+    jobData.stageIds.foreach(pendingStages.remove)
+    jobEnd.jobResult match {
+      case JobSucceeded =>
+        completedJobs += jobData
+        trimJobsIfNecessary(completedJobs)
+        jobData.status = JobExecutionStatus.SUCCEEDED
+      case JobFailed(exception) =>
+        failedJobs += jobData
+        trimJobsIfNecessary(failedJobs)
+        jobData.status = JobExecutionStatus.FAILED
+    }
+    for (stageId <- jobData.stageIds) {
+      stageIdToActiveJobIds.get(stageId).foreach { jobsUsingStage =>
+        jobsUsingStage.remove(jobEnd.jobId)
+        stageIdToInfo.get(stageId).foreach { stageInfo =>
+          if (stageInfo.submissionTime.isEmpty) {
+            // if this stage is pending, it won't complete, so mark it as "skipped":
+            skippedStages += stageInfo
+            trimStagesIfNecessary(skippedStages)
+            jobData.numSkippedStages += 1
+            jobData.numSkippedTasks += stageInfo.numTasks
+          }
+        }
+      }
+    }
+  }
 
   override def onStageCompleted(stageCompleted: SparkListenerStageCompleted) = synchronized {
     val stage = stageCompleted.stageInfo
+    stageIdToInfo(stage.stageId) = stage
     val stageData = stageIdToData.getOrElseUpdate((stage.stageId, stage.attemptId), {
       logWarning("Stage completed for unknown stage " + stage.stageId)
       new StageUIData
@@ -78,19 +234,25 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
     activeStages.remove(stage.stageId)
     if (stage.failureReason.isEmpty) {
       completedStages += stage
-      trimIfNecessary(completedStages)
+      numCompletedStages += 1
+      trimStagesIfNecessary(completedStages)
     } else {
       failedStages += stage
-      trimIfNecessary(failedStages)
+      numFailedStages += 1
+      trimStagesIfNecessary(failedStages)
     }
-  }
 
-  /** If stages is too large, remove and garbage collect old stages */
-  private def trimIfNecessary(stages: ListBuffer[StageInfo]) = synchronized {
-    if (stages.size > retainedStages) {
-      val toRemove = math.max(retainedStages / 10, 1)
-      stages.take(toRemove).foreach { s => stageIdToData.remove((s.stageId, s.attemptId)) }
-      stages.trimStart(toRemove)
+    for (
+      activeJobsDependentOnStage <- stageIdToActiveJobIds.get(stage.stageId);
+      jobId <- activeJobsDependentOnStage;
+      jobData <- jobIdToData.get(jobId)
+    ) {
+      jobData.numActiveStages -= 1
+      if (stage.failureReason.isEmpty) {
+        jobData.completedStageIndices.add(stage.stageId)
+      } else {
+        jobData.numFailedStages += 1
+      }
     }
   }
 
@@ -98,11 +260,12 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
   override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) = synchronized {
     val stage = stageSubmitted.stageInfo
     activeStages(stage.stageId) = stage
-
+    pendingStages.remove(stage.stageId)
     val poolName = Option(stageSubmitted.properties).map {
       p => p.getProperty("spark.scheduler.pool", DEFAULT_POOL_NAME)
     }.getOrElse(DEFAULT_POOL_NAME)
 
+    stageIdToInfo(stage.stageId) = stage
     val stageData = stageIdToData.getOrElseUpdate((stage.stageId, stage.attemptId), new StageUIData)
     stageData.schedulingPool = poolName
 
@@ -112,6 +275,14 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
 
     val stages = poolToActiveStages.getOrElseUpdate(poolName, new HashMap[Int, StageInfo])
     stages(stage.stageId) = stage
+
+    for (
+      activeJobsDependentOnStage <- stageIdToActiveJobIds.get(stage.stageId);
+      jobId <- activeJobsDependentOnStage;
+      jobData <- jobIdToData.get(jobId)
+    ) {
+      jobData.numActiveStages += 1
+    }
   }
 
   override def onTaskStart(taskStart: SparkListenerTaskStart) = synchronized {
@@ -124,6 +295,13 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
       stageData.numActiveTasks += 1
       stageData.taskData.put(taskInfo.taskId, new TaskUIData(taskInfo))
     }
+    for (
+      activeJobsDependentOnStage <- stageIdToActiveJobIds.get(taskStart.stageId);
+      jobId <- activeJobsDependentOnStage;
+      jobData <- jobIdToData.get(jobId)
+    ) {
+      jobData.numActiveTasks += 1
+    }
   }
 
   override def onTaskGettingResult(taskGettingResult: SparkListenerTaskGettingResult) {
@@ -134,7 +312,7 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
   override def onTaskEnd(taskEnd: SparkListenerTaskEnd) = synchronized {
     val info = taskEnd.taskInfo
     // If stage attempt id is -1, it means the DAGScheduler had no idea which attempt this task
-    // compeletion event is for. Let's just drop it here. This means we might have some speculation
+    // completion event is for. Let's just drop it here. This means we might have some speculation
     // tasks on the web ui that's never marked as complete.
     if (info != null && taskEnd.stageAttemptId != -1) {
       val stageData = stageIdToData.getOrElseUpdate((taskEnd.stageId, taskEnd.stageAttemptId), {
@@ -181,6 +359,20 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
       taskData.taskInfo = info
       taskData.taskMetrics = metrics
       taskData.errorMessage = errorMessage
+
+      for (
+        activeJobsDependentOnStage <- stageIdToActiveJobIds.get(taskEnd.stageId);
+        jobId <- activeJobsDependentOnStage;
+        jobData <- jobIdToData.get(jobId)
+      ) {
+        jobData.numActiveTasks -= 1
+        taskEnd.reason match {
+          case Success =>
+            jobData.numCompletedTasks += 1
+          case _ =>
+            jobData.numFailedTasks += 1
+        }
+      }
     }
   }
 
@@ -214,6 +406,12 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
     stageData.inputBytes += inputBytesDelta
     execSummary.inputBytes += inputBytesDelta
 
+    val outputBytesDelta =
+      (taskMetrics.outputMetrics.map(_.bytesWritten).getOrElse(0L)
+        - oldMetrics.flatMap(_.outputMetrics).map(_.bytesWritten).getOrElse(0L))
+    stageData.outputBytes += outputBytesDelta
+    execSummary.outputBytes += outputBytesDelta
+
     val diskSpillDelta =
       taskMetrics.diskBytesSpilled - oldMetrics.map(_.diskBytesSpilled).getOrElse(0L)
     stageData.diskBytesSpilled += diskSpillDelta
@@ -277,4 +475,5 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
 private object JobProgressListener {
   val DEFAULT_POOL_NAME = "default"
   val DEFAULT_RETAINED_STAGES = 1000
+  val DEFAULT_RETAINED_JOBS = 1000
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressPage.scala
deleted file mode 100644
index 1e02f1225d344..0000000000000
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressPage.scala
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ui.jobs
-
-import javax.servlet.http.HttpServletRequest
-
-import scala.xml.{Node, NodeSeq}
-
-import org.apache.spark.scheduler.Schedulable
-import org.apache.spark.ui.{WebUIPage, UIUtils}
-
-/** Page showing list of all ongoing and recently finished stages and pools */
-private[ui] class JobProgressPage(parent: JobProgressTab) extends WebUIPage("") {
-  private val live = parent.live
-  private val sc = parent.sc
-  private val listener = parent.listener
-  private def isFairScheduler = parent.isFairScheduler
-
-  def render(request: HttpServletRequest): Seq[Node] = {
-    listener.synchronized {
-      val activeStages = listener.activeStages.values.toSeq
-      val completedStages = listener.completedStages.reverse.toSeq
-      val failedStages = listener.failedStages.reverse.toSeq
-      val now = System.currentTimeMillis
-
-      val activeStagesTable =
-        new StageTableBase(activeStages.sortBy(_.submissionTime).reverse,
-          parent, parent.killEnabled)
-      val completedStagesTable =
-        new StageTableBase(completedStages.sortBy(_.submissionTime).reverse, parent)
-      val failedStagesTable =
-        new FailedStageTable(failedStages.sortBy(_.submissionTime).reverse, parent)
-
-      // For now, pool information is only accessible in live UIs
-      val pools = if (live) sc.getAllPools else Seq[Schedulable]()
-      val poolTable = new PoolTable(pools, parent)
-
-      val summary: NodeSeq =
-        <div>
-          <ul class="unstyled">
-            {if (live) {
-              // Total duration is not meaningful unless the UI is live
-              <li>
-                <strong>Total Duration: </strong>
-                {UIUtils.formatDuration(now - sc.startTime)}
-              </li>
-            }}
-            <li>
-              <strong>Scheduling Mode: </strong>
-              {listener.schedulingMode.map(_.toString).getOrElse("Unknown")}
-            </li>
-            <li>
-              <a href="#active"><strong>Active Stages:</strong></a>
-              {activeStages.size}
-            </li>
-            <li>
-              <a href="#completed"><strong>Completed Stages:</strong></a>
-              {completedStages.size}
-            </li>
-             <li>
-             <a href="#failed"><strong>Failed Stages:</strong></a>
-              {failedStages.size}
-            </li>
-          </ul>
-        </div>
-
-      val content = summary ++
-        {if (live && isFairScheduler) {
-          <h4>{pools.size} Fair Scheduler Pools</h4> ++ poolTable.toNodeSeq
-        } else {
-          Seq[Node]()
-        }} ++
-        <h4 id="active">Active Stages ({activeStages.size})</h4> ++
-        activeStagesTable.toNodeSeq ++
-        <h4 id="completed">Completed Stages ({completedStages.size})</h4> ++
-        completedStagesTable.toNodeSeq ++
-        <h4 id ="failed">Failed Stages ({failedStages.size})</h4> ++
-        failedStagesTable.toNodeSeq
-
-      UIUtils.headerSparkPage("Spark Stages", content, parent)
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressTab.scala
deleted file mode 100644
index c16542c9db30f..0000000000000
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressTab.scala
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ui.jobs
-
-import javax.servlet.http.HttpServletRequest
-
-import org.apache.spark.SparkConf
-import org.apache.spark.scheduler.SchedulingMode
-import org.apache.spark.ui.{SparkUI, SparkUITab}
-
-/** Web UI showing progress status of all jobs in the given SparkContext. */
-private[ui] class JobProgressTab(parent: SparkUI) extends SparkUITab(parent, "stages") {
-  val live = parent.live
-  val sc = parent.sc
-  val conf = if (live) sc.conf else new SparkConf
-  val killEnabled = conf.getBoolean("spark.ui.killEnabled", true)
-  val listener = new JobProgressListener(conf)
-
-  attachPage(new JobProgressPage(this))
-  attachPage(new StagePage(this))
-  attachPage(new PoolPage(this))
-  parent.registerListener(listener)
-
-  def isFairScheduler = listener.schedulingMode.exists(_ == SchedulingMode.FAIR)
-
-  def handleKillRequest(request: HttpServletRequest) =  {
-    if ((killEnabled) && (parent.securityManager.checkModifyPermissions(request.getRemoteUser))) {
-      val killFlag = Option(request.getParameter("terminate")).getOrElse("false").toBoolean
-      val stageId = Option(request.getParameter("id")).getOrElse("-1").toInt
-      if (stageId >= 0 && killFlag && listener.activeStages.contains(stageId)) {
-        sc.cancelStage(stageId)
-      }
-      // Do a quick pause here to give Spark time to kill the stage so it shows up as
-      // killed after the refresh. Note that this will block the serving thread so the
-      // time should be limited in duration.
-      Thread.sleep(100)
-    }
-  }
-
-}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
new file mode 100644
index 0000000000000..b2bbfdee56946
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+import org.apache.spark.scheduler.SchedulingMode
+import org.apache.spark.ui.{SparkUI, SparkUITab}
+
+/** Web UI showing progress status of all jobs in the given SparkContext. */
+private[ui] class JobsTab(parent: SparkUI) extends SparkUITab(parent, "jobs") {
+  val sc = parent.sc
+  val killEnabled = parent.killEnabled
+  def isFairScheduler = listener.schedulingMode.exists(_ == SchedulingMode.FAIR)
+  val listener = parent.jobProgressListener
+
+  attachPage(new AllJobsPage(this))
+  attachPage(new JobPage(this))
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
index 7a6c7d1a497ed..5fc6cc7533150 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
@@ -25,8 +25,7 @@ import org.apache.spark.scheduler.{Schedulable, StageInfo}
 import org.apache.spark.ui.{WebUIPage, UIUtils}
 
 /** Page showing specific pool details */
-private[ui] class PoolPage(parent: JobProgressTab) extends WebUIPage("pool") {
-  private val live = parent.live
+private[ui] class PoolPage(parent: StagesTab) extends WebUIPage("pool") {
   private val sc = parent.sc
   private val listener = parent.listener
 
@@ -38,11 +37,12 @@ private[ui] class PoolPage(parent: JobProgressTab) extends WebUIPage("pool") {
         case Some(s) => s.values.toSeq
         case None => Seq[StageInfo]()
       }
-      val activeStagesTable =
-        new StageTableBase(activeStages.sortBy(_.submissionTime).reverse, parent)
+      val activeStagesTable = new StageTableBase(activeStages.sortBy(_.submissionTime).reverse,
+        parent.basePath, parent.listener, isFairScheduler = parent.isFairScheduler,
+        killEnabled = parent.killEnabled)
 
       // For now, pool information is only accessible in live UIs
-      val pools = if (live) Seq(sc.getPoolForName(poolName).get) else Seq[Schedulable]()
+      val pools = sc.map(_.getPoolForName(poolName).get).toSeq
       val poolTable = new PoolTable(pools, parent)
 
       val content =
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala
index 64178e1e33d41..df1899e7a9b84 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala
@@ -24,7 +24,7 @@ import org.apache.spark.scheduler.{Schedulable, StageInfo}
 import org.apache.spark.ui.UIUtils
 
 /** Table showing list of pools */
-private[ui] class PoolTable(pools: Seq[Schedulable], parent: JobProgressTab) {
+private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) {
   private val listener = parent.listener
 
   def toNodeSeq: Seq[Node] = {
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 2414e4c65237e..d8be1b20b3acd 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -22,13 +22,16 @@ import javax.servlet.http.HttpServletRequest
 
 import scala.xml.{Node, Unparsed}
 
+import org.apache.commons.lang3.StringEscapeUtils
+
+import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.ui.{ToolTips, WebUIPage, UIUtils}
 import org.apache.spark.ui.jobs.UIData._
 import org.apache.spark.util.{Utils, Distribution}
-import org.apache.spark.scheduler.AccumulableInfo
+import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
 
 /** Page showing statistics and task list for a given stage */
-private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
+private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
   private val listener = parent.listener
 
   def render(request: HttpServletRequest): Seq[Node] = {
@@ -52,12 +55,13 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
 
       val numCompleted = tasks.count(_.taskInfo.finished)
       val accumulables = listener.stageIdToData((stageId, stageAttemptId)).accumulables
+      val hasAccumulators = accumulables.size > 0
       val hasInput = stageData.inputBytes > 0
+      val hasOutput = stageData.outputBytes > 0
       val hasShuffleRead = stageData.shuffleReadBytes > 0
       val hasShuffleWrite = stageData.shuffleWriteBytes > 0
       val hasBytesSpilled = stageData.memoryBytesSpilled > 0 && stageData.diskBytesSpilled > 0
 
-      // scalastyle:off
       val summary =
         <div>
           <ul class="unstyled">
@@ -65,55 +69,132 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
               <strong>Total task time across all tasks: </strong>
               {UIUtils.formatDuration(stageData.executorRunTime)}
             </li>
-            {if (hasInput)
+            {if (hasInput) {
               <li>
                 <strong>Input: </strong>
                 {Utils.bytesToString(stageData.inputBytes)}
               </li>
-            }
-            {if (hasShuffleRead)
+            }}
+            {if (hasOutput) {
+              <li>
+                <strong>Output: </strong>
+                {Utils.bytesToString(stageData.outputBytes)}
+              </li>
+            }}
+            {if (hasShuffleRead) {
               <li>
                 <strong>Shuffle read: </strong>
                 {Utils.bytesToString(stageData.shuffleReadBytes)}
               </li>
-            }
-            {if (hasShuffleWrite)
+            }}
+            {if (hasShuffleWrite) {
               <li>
                 <strong>Shuffle write: </strong>
                 {Utils.bytesToString(stageData.shuffleWriteBytes)}
               </li>
-            }
-            {if (hasBytesSpilled)
-            <li>
-              <strong>Shuffle spill (memory): </strong>
-              {Utils.bytesToString(stageData.memoryBytesSpilled)}
-            </li>
-            <li>
-              <strong>Shuffle spill (disk): </strong>
-              {Utils.bytesToString(stageData.diskBytesSpilled)}
-            </li>
-            }
+            }}
+            {if (hasBytesSpilled) {
+              <li>
+                <strong>Shuffle spill (memory): </strong>
+                {Utils.bytesToString(stageData.memoryBytesSpilled)}
+              </li>
+              <li>
+                <strong>Shuffle spill (disk): </strong>
+                {Utils.bytesToString(stageData.diskBytesSpilled)}
+              </li>
+            }}
           </ul>
         </div>
-        // scalastyle:on
+
+      val showAdditionalMetrics =
+        <div>
+          <span class="expand-additional-metrics">
+            <span class="expand-additional-metrics-arrow arrow-closed"></span>
+            <strong>Show additional metrics</strong>
+          </span>
+          <div class="additional-metrics collapsed">
+            <ul style="list-style-type:none">
+              <li>
+                  <input type="checkbox" id="select-all-metrics"/>
+                  <span class="additional-metric-title"><em>(De)select All</em></span>
+              </li>
+              <li>
+                <span data-toggle="tooltip"
+                      title={ToolTips.SCHEDULER_DELAY} data-placement="right">
+                  <input type="checkbox" name={TaskDetailsClassNames.SCHEDULER_DELAY}/>
+                  <span class="additional-metric-title">Scheduler Delay</span>
+                </span>
+              </li>
+              <li>
+                <span data-toggle="tooltip"
+                      title={ToolTips.TASK_DESERIALIZATION_TIME} data-placement="right">
+                  <input type="checkbox" name={TaskDetailsClassNames.TASK_DESERIALIZATION_TIME}/>
+                  <span class="additional-metric-title">Task Deserialization Time</span>
+                </span>
+              </li>
+              {if (hasShuffleRead) {
+                <li>
+                  <span data-toggle="tooltip"
+                        title={ToolTips.SHUFFLE_READ_BLOCKED_TIME} data-placement="right">
+                    <input type="checkbox" name={TaskDetailsClassNames.SHUFFLE_READ_BLOCKED_TIME}/>
+                    <span class="additional-metric-title">Shuffle Read Blocked Time</span>
+                  </span>
+                </li>
+              }}
+              <li>
+                <span data-toggle="tooltip"
+                      title={ToolTips.RESULT_SERIALIZATION_TIME} data-placement="right">
+                  <input type="checkbox" name={TaskDetailsClassNames.RESULT_SERIALIZATION_TIME}/>
+                  <span class="additional-metric-title">Result Serialization Time</span>
+                </span>
+              </li>
+              <li>
+                <span data-toggle="tooltip"
+                      title={ToolTips.GETTING_RESULT_TIME} data-placement="right">
+                  <input type="checkbox" name={TaskDetailsClassNames.GETTING_RESULT_TIME}/>
+                  <span class="additional-metric-title">Getting Result Time</span>
+                </span>
+              </li>
+            </ul>
+          </div>
+        </div>
+
       val accumulableHeaders: Seq[String] = Seq("Accumulable", "Value")
       def accumulableRow(acc: AccumulableInfo) = <tr><td>{acc.name}</td><td>{acc.value}</td></tr>
       val accumulableTable = UIUtils.listingTable(accumulableHeaders, accumulableRow,
         accumulables.values.toSeq)
 
-      val taskHeaders: Seq[String] =
+      val taskHeadersAndCssClasses: Seq[(String, String)] =
         Seq(
-          "Index", "ID", "Attempt", "Status", "Locality Level", "Executor ID / Host",
-          "Launch Time", "Duration", "GC Time", "Accumulators") ++
-        {if (hasInput) Seq("Input") else Nil} ++
-        {if (hasShuffleRead) Seq("Shuffle Read")  else Nil} ++
-        {if (hasShuffleWrite) Seq("Write Time", "Shuffle Write") else Nil} ++
-        {if (hasBytesSpilled) Seq("Shuffle Spill (Memory)", "Shuffle Spill (Disk)") else Nil} ++
-        Seq("Errors")
+          ("Index", ""), ("ID", ""), ("Attempt", ""), ("Status", ""), ("Locality Level", ""),
+          ("Executor ID / Host", ""), ("Launch Time", ""), ("Duration", ""),
+          ("Scheduler Delay", TaskDetailsClassNames.SCHEDULER_DELAY),
+          ("Task Deserialization Time", TaskDetailsClassNames.TASK_DESERIALIZATION_TIME),
+          ("GC Time", ""),
+          ("Result Serialization Time", TaskDetailsClassNames.RESULT_SERIALIZATION_TIME),
+          ("Getting Result Time", TaskDetailsClassNames.GETTING_RESULT_TIME)) ++
+        {if (hasAccumulators) Seq(("Accumulators", "")) else Nil} ++
+        {if (hasInput) Seq(("Input", "")) else Nil} ++
+        {if (hasOutput) Seq(("Output", "")) else Nil} ++
+        {if (hasShuffleRead) {
+          Seq(("Shuffle Read Blocked Time", TaskDetailsClassNames.SHUFFLE_READ_BLOCKED_TIME),
+            ("Shuffle Read", ""))
+        } else {
+          Nil
+        }} ++
+        {if (hasShuffleWrite) Seq(("Write Time", ""), ("Shuffle Write", "")) else Nil} ++
+        {if (hasBytesSpilled) Seq(("Shuffle Spill (Memory)", ""), ("Shuffle Spill (Disk)", ""))
+          else Nil} ++
+        Seq(("Errors", ""))
+
+      val unzipped = taskHeadersAndCssClasses.unzip
 
       val taskTable = UIUtils.listingTable(
-        taskHeaders, taskRow(hasInput, hasShuffleRead, hasShuffleWrite, hasBytesSpilled), tasks)
-
+        unzipped._1,
+        taskRow(hasAccumulators, hasInput, hasOutput, hasShuffleRead, hasShuffleWrite,
+          hasBytesSpilled),
+        tasks,
+        headerClasses = unzipped._2)
       // Excludes tasks which failed and have incomplete metrics
       val validTasks = tasks.filter(t => t.taskInfo.status == "SUCCESS" && t.taskMetrics.isDefined)
 
@@ -122,18 +203,48 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
           None
         }
         else {
-          val serializationTimes = validTasks.map { case TaskUIData(_, metrics, _) =>
-            metrics.get.resultSerializationTime.toDouble
+          def getFormattedTimeQuantiles(times: Seq[Double]): Seq[Node] = {
+            Distribution(times).get.getQuantiles().map { millis =>
+              <td>{UIUtils.formatDuration(millis.toLong)}</td>
+            }
           }
-          val serializationQuantiles =
-            <td>Result serialization time</td> +: Distribution(serializationTimes).
-              get.getQuantiles().map(ms => <td>{UIUtils.formatDuration(ms.toLong)}</td>)
+
+          val deserializationTimes = validTasks.map { case TaskUIData(_, metrics, _) =>
+            metrics.get.executorDeserializeTime.toDouble
+          }
+          val deserializationQuantiles =
+            <td>
+              <span data-toggle="tooltip" title={ToolTips.TASK_DESERIALIZATION_TIME}
+                    data-placement="right">
+                Task Deserialization Time
+              </span>
+            </td> +: getFormattedTimeQuantiles(deserializationTimes)
 
           val serviceTimes = validTasks.map { case TaskUIData(_, metrics, _) =>
             metrics.get.executorRunTime.toDouble
           }
-          val serviceQuantiles = <td>Duration</td> +: Distribution(serviceTimes).get.getQuantiles()
-            .map(ms => <td>{UIUtils.formatDuration(ms.toLong)}</td>)
+          val serviceQuantiles = <td>Duration</td> +: getFormattedTimeQuantiles(serviceTimes)
+
+          val gcTimes = validTasks.map { case TaskUIData(_, metrics, _) =>
+            metrics.get.jvmGCTime.toDouble
+          }
+          val gcQuantiles =
+            <td>
+              <span data-toggle="tooltip"
+                  title={ToolTips.GC_TIME} data-placement="right">GC Time
+              </span>
+            </td> +: getFormattedTimeQuantiles(gcTimes)
+
+          val serializationTimes = validTasks.map { case TaskUIData(_, metrics, _) =>
+            metrics.get.resultSerializationTime.toDouble
+          }
+          val serializationQuantiles =
+            <td>
+              <span data-toggle="tooltip"
+                    title={ToolTips.RESULT_SERIALIZATION_TIME} data-placement="right">
+                Result Serialization Time
+              </span>
+            </td> +: getFormattedTimeQuantiles(serializationTimes)
 
           val gettingResultTimes = validTasks.map { case TaskUIData(info, _, _) =>
             if (info.gettingResultTime > 0) {
@@ -142,76 +253,104 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
               0.0
             }
           }
-          val gettingResultQuantiles = <td>Time spent fetching task results</td> +:
-            Distribution(gettingResultTimes).get.getQuantiles().map { millis =>
-              <td>{UIUtils.formatDuration(millis.toLong)}</td>
-            }
+          val gettingResultQuantiles =
+            <td>
+              <span data-toggle="tooltip"
+                  title={ToolTips.GETTING_RESULT_TIME} data-placement="right">
+                Getting Result Time
+              </span>
+            </td> +:
+            getFormattedTimeQuantiles(gettingResultTimes)
           // The scheduler delay includes the network delay to send the task to the worker
           // machine and to send back the result (but not the time to fetch the task result,
           // if it needed to be fetched from the block manager on the worker).
           val schedulerDelays = validTasks.map { case TaskUIData(info, metrics, _) =>
-            val totalExecutionTime = {
-              if (info.gettingResultTime > 0) {
-                (info.gettingResultTime - info.launchTime).toDouble
-              } else {
-                (info.finishTime - info.launchTime).toDouble
-              }
-            }
-            totalExecutionTime - metrics.get.executorRunTime
+            getSchedulerDelay(info, metrics.get).toDouble
           }
           val schedulerDelayTitle = <td><span data-toggle="tooltip"
-            title={ToolTips.SCHEDULER_DELAY} data-placement="right">Scheduler delay</span></td>
+            title={ToolTips.SCHEDULER_DELAY} data-placement="right">Scheduler Delay</span></td>
           val schedulerDelayQuantiles = schedulerDelayTitle +:
-            Distribution(schedulerDelays).get.getQuantiles().map { millis =>
-              <td>{UIUtils.formatDuration(millis.toLong)}</td>
-            }
+            getFormattedTimeQuantiles(schedulerDelays)
 
-          def getQuantileCols(data: Seq[Double]) =
+          def getFormattedSizeQuantiles(data: Seq[Double]) =
             Distribution(data).get.getQuantiles().map(d => <td>{Utils.bytesToString(d.toLong)}</td>)
 
           val inputSizes = validTasks.map { case TaskUIData(_, metrics, _) =>
             metrics.get.inputMetrics.map(_.bytesRead).getOrElse(0L).toDouble
           }
-          val inputQuantiles = <td>Input</td> +: getQuantileCols(inputSizes)
+          val inputQuantiles = <td>Input</td> +: getFormattedSizeQuantiles(inputSizes)
+
+          val outputSizes = validTasks.map { case TaskUIData(_, metrics, _) =>
+            metrics.get.outputMetrics.map(_.bytesWritten).getOrElse(0L).toDouble
+          }
+          val outputQuantiles = <td>Output</td> +: getFormattedSizeQuantiles(outputSizes)
+
+          val shuffleReadBlockedTimes = validTasks.map { case TaskUIData(_, metrics, _) =>
+            metrics.get.shuffleReadMetrics.map(_.fetchWaitTime).getOrElse(0L).toDouble
+          }
+          val shuffleReadBlockedQuantiles = <td>Shuffle Read Blocked Time</td> +:
+            getFormattedTimeQuantiles(shuffleReadBlockedTimes)
 
           val shuffleReadSizes = validTasks.map { case TaskUIData(_, metrics, _) =>
             metrics.get.shuffleReadMetrics.map(_.remoteBytesRead).getOrElse(0L).toDouble
           }
           val shuffleReadQuantiles = <td>Shuffle Read (Remote)</td> +:
-            getQuantileCols(shuffleReadSizes)
+            getFormattedSizeQuantiles(shuffleReadSizes)
 
           val shuffleWriteSizes = validTasks.map { case TaskUIData(_, metrics, _) =>
             metrics.get.shuffleWriteMetrics.map(_.shuffleBytesWritten).getOrElse(0L).toDouble
           }
-          val shuffleWriteQuantiles = <td>Shuffle Write</td> +: getQuantileCols(shuffleWriteSizes)
+          val shuffleWriteQuantiles = <td>Shuffle Write</td> +:
+            getFormattedSizeQuantiles(shuffleWriteSizes)
 
           val memoryBytesSpilledSizes = validTasks.map { case TaskUIData(_, metrics, _) =>
             metrics.get.memoryBytesSpilled.toDouble
           }
           val memoryBytesSpilledQuantiles = <td>Shuffle spill (memory)</td> +:
-            getQuantileCols(memoryBytesSpilledSizes)
+            getFormattedSizeQuantiles(memoryBytesSpilledSizes)
 
           val diskBytesSpilledSizes = validTasks.map { case TaskUIData(_, metrics, _) =>
             metrics.get.diskBytesSpilled.toDouble
           }
           val diskBytesSpilledQuantiles = <td>Shuffle spill (disk)</td> +:
-            getQuantileCols(diskBytesSpilledSizes)
+            getFormattedSizeQuantiles(diskBytesSpilledSizes)
 
           val listings: Seq[Seq[Node]] = Seq(
-            serializationQuantiles,
-            serviceQuantiles,
-            gettingResultQuantiles,
-            schedulerDelayQuantiles,
-            if (hasInput) inputQuantiles else Nil,
-            if (hasShuffleRead) shuffleReadQuantiles else Nil,
-            if (hasShuffleWrite) shuffleWriteQuantiles else Nil,
-            if (hasBytesSpilled) memoryBytesSpilledQuantiles else Nil,
-            if (hasBytesSpilled) diskBytesSpilledQuantiles else Nil)
+            <tr>{serviceQuantiles}</tr>,
+            <tr class={TaskDetailsClassNames.SCHEDULER_DELAY}>{schedulerDelayQuantiles}</tr>,
+            <tr class={TaskDetailsClassNames.TASK_DESERIALIZATION_TIME}>
+              {deserializationQuantiles}
+            </tr>
+            <tr>{gcQuantiles}</tr>,
+            <tr class={TaskDetailsClassNames.RESULT_SERIALIZATION_TIME}>
+              {serializationQuantiles}
+            </tr>,
+            <tr class={TaskDetailsClassNames.GETTING_RESULT_TIME}>{gettingResultQuantiles}</tr>,
+            if (hasInput) <tr>{inputQuantiles}</tr> else Nil,
+            if (hasOutput) <tr>{outputQuantiles}</tr> else Nil,
+            if (hasShuffleRead) {
+              <tr class={TaskDetailsClassNames.SHUFFLE_READ_BLOCKED_TIME}>
+                {shuffleReadBlockedQuantiles}
+              </tr>
+              <tr>{shuffleReadQuantiles}</tr>
+            } else {
+              Nil
+            },
+            if (hasShuffleWrite) <tr>{shuffleWriteQuantiles}</tr> else Nil,
+            if (hasBytesSpilled) <tr>{memoryBytesSpilledQuantiles}</tr> else Nil,
+            if (hasBytesSpilled) <tr>{diskBytesSpilledQuantiles}</tr> else Nil)
 
           val quantileHeaders = Seq("Metric", "Min", "25th percentile",
             "Median", "75th percentile", "Max")
-          def quantileRow(data: Seq[Node]): Seq[Node] = <tr>{data}</tr>
-          Some(UIUtils.listingTable(quantileHeaders, quantileRow, listings, fixedWidth = true))
+          // The summary table does not use CSS to stripe rows, which doesn't work with hidden
+          // rows (instead, JavaScript in table.js is used to stripe the non-hidden rows).
+          Some(UIUtils.listingTable(
+            quantileHeaders,
+            identity[Seq[Node]],
+            listings,
+            fixedWidth = true,
+            id = Some("task-summary-table"),
+            stripeRowsWithCss = false))
         }
 
       val executorTable = new ExecutorTable(stageId, stageAttemptId, parent)
@@ -221,6 +360,7 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
 
       val content =
         summary ++
+        showAdditionalMetrics ++
         <h4>Summary Metrics for {numCompleted} Completed Tasks</h4> ++
         <div>{summaryTable.getOrElse("No tasks have reported metrics yet.")}</div> ++
         <h4>Aggregated Metrics by Executor</h4> ++ executorTable.toNodeSeq ++
@@ -232,7 +372,9 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
   }
 
   def taskRow(
+      hasAccumulators: Boolean,
       hasInput: Boolean,
+      hasOutput: Boolean,
       hasShuffleRead: Boolean,
       hasShuffleWrite: Boolean,
       hasBytesSpilled: Boolean)(taskData: TaskUIData): Seq[Node] = {
@@ -241,8 +383,14 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
         else metrics.map(_.executorRunTime).getOrElse(1L)
       val formatDuration = if (info.status == "RUNNING") UIUtils.formatDuration(duration)
         else metrics.map(m => UIUtils.formatDuration(m.executorRunTime)).getOrElse("")
+      val schedulerDelay = metrics.map(getSchedulerDelay(info, _)).getOrElse(0L)
       val gcTime = metrics.map(_.jvmGCTime).getOrElse(0L)
+      val taskDeserializationTime = metrics.map(_.executorDeserializeTime).getOrElse(0L)
       val serializationTime = metrics.map(_.resultSerializationTime).getOrElse(0L)
+      val gettingResultTime = info.gettingResultTime
+
+      val maybeAccumulators = info.accumulables
+      val accumulatorsReadable = maybeAccumulators.map{acc => s"${acc.name}: ${acc.update.get}"}
 
       val maybeInput = metrics.flatMap(_.inputMetrics)
       val inputSortable = maybeInput.map(_.bytesRead.toString).getOrElse("")
@@ -250,6 +398,17 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
         .map(m => s"${Utils.bytesToString(m.bytesRead)} (${m.readMethod.toString.toLowerCase()})")
         .getOrElse("")
 
+      val maybeOutput = metrics.flatMap(_.outputMetrics)
+      val outputSortable = maybeOutput.map(_.bytesWritten.toString).getOrElse("")
+      val outputReadable = maybeOutput
+        .map(m => s"${Utils.bytesToString(m.bytesWritten)}")
+        .getOrElse("")
+
+      val maybeShuffleReadBlockedTime = metrics.flatMap(_.shuffleReadMetrics).map(_.fetchWaitTime)
+      val shuffleReadBlockedTimeSortable = maybeShuffleReadBlockedTime.map(_.toString).getOrElse("")
+      val shuffleReadBlockedTimeReadable =
+        maybeShuffleReadBlockedTime.map(ms => UIUtils.formatDuration(ms)).getOrElse("")
+
       val maybeShuffleRead = metrics.flatMap(_.shuffleReadMetrics).map(_.remoteBytesRead)
       val shuffleReadSortable = maybeShuffleRead.map(_.toString).getOrElse("")
       val shuffleReadReadable = maybeShuffleRead.map(Utils.bytesToString).getOrElse("")
@@ -287,26 +446,45 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
         <td sorttable_customkey={duration.toString}>
           {formatDuration}
         </td>
+        <td sorttable_customkey={schedulerDelay.toString}
+            class={TaskDetailsClassNames.SCHEDULER_DELAY}>
+          {UIUtils.formatDuration(schedulerDelay.toLong)}
+        </td>
+        <td sorttable_customkey={taskDeserializationTime.toString}
+            class={TaskDetailsClassNames.TASK_DESERIALIZATION_TIME}>
+          {UIUtils.formatDuration(taskDeserializationTime.toLong)}
+        </td>
         <td sorttable_customkey={gcTime.toString}>
           {if (gcTime > 0) UIUtils.formatDuration(gcTime) else ""}
         </td>
-        <td>
-          {Unparsed(
-            info.accumulables.map{acc => s"${acc.name}: ${acc.update.get}"}.mkString("<br/>")
-          )}
+        <td sorttable_customkey={serializationTime.toString}
+            class={TaskDetailsClassNames.RESULT_SERIALIZATION_TIME}>
+          {UIUtils.formatDuration(serializationTime)}
         </td>
-        <!--
-        TODO: Add this back after we add support to hide certain columns.
-        <td sorttable_customkey={serializationTime.toString}>
-          {if (serializationTime > 0) UIUtils.formatDuration(serializationTime) else ""}
+        <td sorttable_customkey={gettingResultTime.toString}
+            class={TaskDetailsClassNames.GETTING_RESULT_TIME}>
+          {UIUtils.formatDuration(gettingResultTime)}
         </td>
-        -->
+        {if (hasAccumulators) {
+          <td>
+            {Unparsed(accumulatorsReadable.mkString("<br/>"))}
+          </td>
+        }}
         {if (hasInput) {
           <td sorttable_customkey={inputSortable}>
             {inputReadable}
           </td>
         }}
+        {if (hasOutput) {
+          <td sorttable_customkey={outputSortable}>
+            {outputReadable}
+          </td>
+        }}
         {if (hasShuffleRead) {
+           <td sorttable_customkey={shuffleReadBlockedTimeSortable}
+             class={TaskDetailsClassNames.SHUFFLE_READ_BLOCKED_TIME}>
+             {shuffleReadBlockedTimeReadable}
+           </td>
            <td sorttable_customkey={shuffleReadSortable}>
              {shuffleReadReadable}
            </td>
@@ -327,10 +505,47 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
             {diskBytesSpilledReadable}
           </td>
         }}
-        <td>
-          {errorMessage.map { e => <pre>{e}</pre> }.getOrElse("")}
-        </td>
+        {errorMessageCell(errorMessage)}
       </tr>
     }
   }
+
+  private def errorMessageCell(errorMessage: Option[String]): Seq[Node] = {
+    val error = errorMessage.getOrElse("")
+    val isMultiline = error.indexOf('\n') >= 0
+    // Display the first line by default
+    val errorSummary = StringEscapeUtils.escapeHtml4(
+      if (isMultiline) {
+        error.substring(0, error.indexOf('\n'))
+      } else {
+        error
+      })
+    val details = if (isMultiline) {
+      // scalastyle:off
+      <span onclick="this.parentNode.querySelector('.stacktrace-details').classList.toggle('collapsed')"
+            class="expand-details">
+        +details
+      </span> ++
+        <div class="stacktrace-details collapsed">
+          <pre>{error}</pre>
+        </div>
+      // scalastyle:on
+    } else {
+      ""
+    }
+    <td>{errorSummary}{details}</td>
+  }
+
+  private def getSchedulerDelay(info: TaskInfo, metrics: TaskMetrics): Long = {
+    val totalExecutionTime = {
+      if (info.gettingResultTime > 0) {
+        (info.gettingResultTime - info.launchTime)
+      } else {
+        (info.finishTime - info.launchTime)
+      }
+    }
+    val executorOverhead = (metrics.executorDeserializeTime +
+      metrics.resultSerializationTime)
+    totalExecutionTime - metrics.executorRunTime - executorOverhead
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
index 4ee7f08ab47a2..703d43f9c640d 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
@@ -22,6 +22,8 @@ import scala.xml.Text
 
 import java.util.Date
 
+import org.apache.commons.lang3.StringEscapeUtils
+
 import org.apache.spark.scheduler.StageInfo
 import org.apache.spark.ui.{ToolTips, UIUtils}
 import org.apache.spark.util.Utils
@@ -29,11 +31,10 @@ import org.apache.spark.util.Utils
 /** Page showing list of all ongoing and recently finished stages */
 private[ui] class StageTableBase(
     stages: Seq[StageInfo],
-    parent: JobProgressTab,
-    killEnabled: Boolean = false) {
-
-  private val listener = parent.listener
-  protected def isFairScheduler = parent.isFairScheduler
+    basePath: String,
+    listener: JobProgressListener,
+    isFairScheduler: Boolean,
+    killEnabled: Boolean) {
 
   protected def columns: Seq[Node] = {
     <th>Stage Id</th> ++
@@ -43,6 +44,7 @@ private[ui] class StageTableBase(
     <th>Duration</th>
     <th>Tasks: Succeeded/Total</th>
     <th><span data-toggle="tooltip" title={ToolTips.INPUT}>Input</span></th>
+    <th><span data-toggle="tooltip" title={ToolTips.OUTPUT}>Output</span></th>
     <th><span data-toggle="tooltip" title={ToolTips.SHUFFLE_READ}>Shuffle Read</span></th>
     <th>
       <!-- Place the shuffle write tooltip on the left (rather than the default position
@@ -70,25 +72,11 @@ private[ui] class StageTableBase(
     </table>
   }
 
-  private def makeProgressBar(started: Int, completed: Int, failed: Int, total: Int): Seq[Node] =
-  {
-    val completeWidth = "width: %s%%".format((completed.toDouble/total)*100)
-    val startWidth = "width: %s%%".format((started.toDouble/total)*100)
-
-    <div class="progress">
-      <span style="text-align:center; position:absolute; width:100%; left:0;">
-        {completed}/{total} { if (failed > 0) s"($failed failed)" else "" }
-      </span>
-      <div class="bar bar-completed" style={completeWidth}></div>
-      <div class="bar bar-running" style={startWidth}></div>
-    </div>
-  }
-
   private def makeDescription(s: StageInfo): Seq[Node] = {
     // scalastyle:off
     val killLink = if (killEnabled) {
       val killLinkUri = "%s/stages/stage/kill?id=%s&terminate=true"
-        .format(UIUtils.prependBaseUri(parent.basePath), s.stageId)
+        .format(UIUtils.prependBaseUri(basePath), s.stageId)
       val confirm = "return window.confirm('Are you sure you want to kill stage %s ?');"
         .format(s.stageId)
       <span class="kill-link">
@@ -98,7 +86,7 @@ private[ui] class StageTableBase(
     // scalastyle:on
 
     val nameLinkUri ="%s/stages/stage?id=%s&attempt=%s"
-      .format(UIUtils.prependBaseUri(parent.basePath), s.stageId, s.attemptId)
+      .format(UIUtils.prependBaseUri(basePath), s.stageId, s.attemptId)
     val nameLink = <a href={nameLinkUri}>{s.name}</a>
 
     val cachedRddInfos = s.rddInfos.filter(_.numCachedPartitions > 0)
@@ -112,7 +100,7 @@ private[ui] class StageTableBase(
           Text("RDD: ") ++
           // scalastyle:off
           cachedRddInfos.map { i =>
-            <a href={"%s/storage/rdd?id=%d".format(UIUtils.prependBaseUri(parent.basePath), i.id)}>{i.name}</a>
+            <a href={"%s/storage/rdd?id=%d".format(UIUtils.prependBaseUri(basePath), i.id)}>{i.name}</a>
           }
           // scalastyle:on
         }}
@@ -124,9 +112,8 @@ private[ui] class StageTableBase(
       stageData <- listener.stageIdToData.get((s.stageId, s.attemptId))
       desc <- stageData.description
     } yield {
-      <div><em>{desc}</em></div>
+      <span class="description-input" title={desc}>{desc}</span>
     }
-
     <div>{stageDesc.getOrElse("")} {killLink} {nameLink} {details}</div>
   }
 
@@ -149,6 +136,8 @@ private[ui] class StageTableBase(
 
     val inputRead = stageData.inputBytes
     val inputReadWithUnit = if (inputRead > 0) Utils.bytesToString(inputRead) else ""
+    val outputWrite = stageData.outputBytes
+    val outputWriteWithUnit = if (outputWrite > 0) Utils.bytesToString(outputWrite) else ""
     val shuffleRead = stageData.shuffleReadBytes
     val shuffleReadWithUnit = if (shuffleRead > 0) Utils.bytesToString(shuffleRead) else ""
     val shuffleWrite = stageData.shuffleWriteBytes
@@ -162,7 +151,7 @@ private[ui] class StageTableBase(
     {if (isFairScheduler) {
       <td>
         <a href={"%s/stages/pool?poolname=%s"
-          .format(UIUtils.prependBaseUri(parent.basePath), stageData.schedulingPool)}>
+          .format(UIUtils.prependBaseUri(basePath), stageData.schedulingPool)}>
           {stageData.schedulingPool}
         </a>
       </td>
@@ -170,13 +159,17 @@ private[ui] class StageTableBase(
       Seq.empty
     }} ++
     <td>{makeDescription(s)}</td>
-    <td valign="middle">{submissionTime}</td>
+    <td sorttable_customkey={s.submissionTime.getOrElse(0).toString} valign="middle">
+      {submissionTime}
+    </td>
     <td sorttable_customkey={duration.getOrElse(-1).toString}>{formattedDuration}</td>
     <td class="progress-cell">
-      {makeProgressBar(stageData.numActiveTasks, stageData.completedIndices.size,
-        stageData.numFailedTasks, s.numTasks)}
+      {UIUtils.makeProgressBar(started = stageData.numActiveTasks,
+        completed = stageData.completedIndices.size, failed = stageData.numFailedTasks,
+        skipped = 0, total = s.numTasks)}
     </td>
     <td sorttable_customkey={inputRead.toString}>{inputReadWithUnit}</td>
+    <td sorttable_customkey={outputWrite.toString}>{outputWriteWithUnit}</td>
     <td sorttable_customkey={shuffleRead.toString}>{shuffleReadWithUnit}</td>
     <td sorttable_customkey={shuffleWrite.toString}>{shuffleWriteWithUnit}</td>
   }
@@ -187,15 +180,38 @@ private[ui] class StageTableBase(
 
 private[ui] class FailedStageTable(
     stages: Seq[StageInfo],
-    parent: JobProgressTab,
-    killEnabled: Boolean = false)
-  extends StageTableBase(stages, parent, killEnabled) {
+    basePath: String,
+    listener: JobProgressListener,
+    isFairScheduler: Boolean)
+  extends StageTableBase(stages, basePath, listener, isFairScheduler, killEnabled = false) {
 
   override protected def columns: Seq[Node] = super.columns ++ <th>Failure Reason</th>
 
   override protected def stageRow(s: StageInfo): Seq[Node] = {
     val basicColumns = super.stageRow(s)
-    val failureReason = <td valign="middle"><pre>{s.failureReason.getOrElse("")}</pre></td>
-    basicColumns ++ failureReason
+    val failureReason = s.failureReason.getOrElse("")
+    val isMultiline = failureReason.indexOf('\n') >= 0
+    // Display the first line by default
+    val failureReasonSummary = StringEscapeUtils.escapeHtml4(
+      if (isMultiline) {
+        failureReason.substring(0, failureReason.indexOf('\n'))
+      } else {
+        failureReason
+      })
+    val details = if (isMultiline) {
+      // scalastyle:off
+      <span onclick="this.parentNode.querySelector('.stacktrace-details').classList.toggle('collapsed')"
+            class="expand-details">
+        +details
+      </span> ++
+        <div class="stacktrace-details collapsed">
+          <pre>{failureReason}</pre>
+        </div>
+      // scalastyle:on
+    } else {
+      ""
+    }
+    val failureReasonHtml = <td valign="middle">{failureReasonSummary}{details}</td>
+    basicColumns ++ failureReasonHtml
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
new file mode 100644
index 0000000000000..937261de00e3a
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+import javax.servlet.http.HttpServletRequest
+
+import org.apache.spark.scheduler.SchedulingMode
+import org.apache.spark.ui.{SparkUI, SparkUITab}
+
+/** Web UI showing progress status of all stages in the given SparkContext. */
+private[ui] class StagesTab(parent: SparkUI) extends SparkUITab(parent, "stages") {
+  val sc = parent.sc
+  val killEnabled = parent.killEnabled
+  val listener = parent.jobProgressListener
+
+  attachPage(new AllStagesPage(this))
+  attachPage(new StagePage(this))
+  attachPage(new PoolPage(this))
+
+  def isFairScheduler = listener.schedulingMode.exists(_ == SchedulingMode.FAIR)
+
+  def handleKillRequest(request: HttpServletRequest) =  {
+    if ((killEnabled) && (parent.securityManager.checkModifyPermissions(request.getRemoteUser))) {
+      val killFlag = Option(request.getParameter("terminate")).getOrElse("false").toBoolean
+      val stageId = Option(request.getParameter("id")).getOrElse("-1").toInt
+      if (stageId >= 0 && killFlag && listener.activeStages.contains(stageId)) {
+        sc.get.cancelStage(stageId)
+      }
+      // Do a quick pause here to give Spark time to kill the stage so it shows up as
+      // killed after the refresh. Note that this will block the serving thread so the
+      // time should be limited in duration.
+      Thread.sleep(100)
+    }
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/TaskDetailsClassNames.scala b/core/src/main/scala/org/apache/spark/ui/jobs/TaskDetailsClassNames.scala
new file mode 100644
index 0000000000000..37cf2c207ba40
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/TaskDetailsClassNames.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+/**
+ * Names of the CSS classes corresponding to each type of task detail. Used to allow users
+ * to optionally show/hide columns.
+ *
+ * If new optional metrics are added here, they should also be added to the end of webui.css
+ * to have the style set to "display: none;" by default.
+ */
+private[spark] object TaskDetailsClassNames {
+  val SCHEDULER_DELAY = "scheduler_delay"
+  val TASK_DESERIALIZATION_TIME = "deserialization_time"
+  val SHUFFLE_READ_BLOCKED_TIME = "fetch_wait_time"
+  val RESULT_SERIALIZATION_TIME = "serialization_time"
+  val GETTING_RESULT_TIME = "getting_result_time"
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
index a336bf7e1ed02..01f7e23212c3d 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.ui.jobs
 
+import org.apache.spark.JobExecutionStatus
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
 import org.apache.spark.util.collection.OpenHashSet
@@ -30,12 +31,39 @@ private[jobs] object UIData {
     var failedTasks : Int = 0
     var succeededTasks : Int = 0
     var inputBytes : Long = 0
+    var outputBytes : Long = 0
     var shuffleRead : Long = 0
     var shuffleWrite : Long = 0
     var memoryBytesSpilled : Long = 0
     var diskBytesSpilled : Long = 0
   }
 
+  class JobUIData(
+    var jobId: Int = -1,
+    var submissionTime: Option[Long] = None,
+    var completionTime: Option[Long] = None,
+    var stageIds: Seq[Int] = Seq.empty,
+    var jobGroup: Option[String] = None,
+    var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN,
+    /* Tasks */
+    // `numTasks` is a potential underestimate of the true number of tasks that this job will run.
+    // This may be an underestimate because the job start event references all of the result
+    // stages' transitive stage dependencies, but some of these stages might be skipped if their
+    // output is available from earlier runs.
+    // See https://github.com/apache/spark/pull/3009 for a more extensive discussion.
+    var numTasks: Int = 0,
+    var numActiveTasks: Int = 0,
+    var numCompletedTasks: Int = 0,
+    var numSkippedTasks: Int = 0,
+    var numFailedTasks: Int = 0,
+    /* Stages */
+    var numActiveStages: Int = 0,
+    // This needs to be a set instead of a simple count to prevent double-counting of rerun stages:
+    var completedStageIndices: OpenHashSet[Int] = new OpenHashSet[Int](),
+    var numSkippedStages: Int = 0,
+    var numFailedStages: Int = 0
+  )
+
   class StageUIData {
     var numActiveTasks: Int = _
     var numCompleteTasks: Int = _
@@ -45,6 +73,7 @@ private[jobs] object UIData {
     var executorRunTime: Long = _
 
     var inputBytes: Long = _
+    var outputBytes: Long = _
     var shuffleReadBytes: Long = _
     var shuffleWriteBytes: Long = _
     var memoryBytesSpilled: Long = _
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
index 8a0075ae8daf7..12d23a92878cf 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
@@ -39,7 +39,8 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
 
     // Worker table
     val workers = storageStatusList.map((rddId, _))
-    val workerTable = UIUtils.listingTable(workerHeader, workerRow, workers)
+    val workerTable = UIUtils.listingTable(workerHeader, workerRow, workers,
+      id = Some("rdd-storage-by-worker-table"))
 
     // Block table
     val blockLocations = StorageUtils.getRddBlockLocations(rddId, storageStatusList)
@@ -49,7 +50,8 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
       .map { case (blockId, status) =>
         (blockId, status, blockLocations.get(blockId).getOrElse(Seq[String]("Unknown")))
       }
-    val blockTable = UIUtils.listingTable(blockHeader, blockRow, blocks)
+    val blockTable = UIUtils.listingTable(blockHeader, blockRow, blocks,
+      id = Some("rdd-storage-by-block-table"))
 
     val content =
       <div class="row-fluid">
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
index 83489ca0679ee..6ced6052d2b18 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
@@ -31,7 +31,7 @@ private[ui] class StoragePage(parent: StorageTab) extends WebUIPage("") {
 
   def render(request: HttpServletRequest): Seq[Node] = {
     val rdds = listener.rddInfoList
-    val content = UIUtils.listingTable(rddHeader, rddRow, rdds)
+    val content = UIUtils.listingTable(rddHeader, rddRow, rdds, id = Some("storage-by-rdd-table"))
     UIUtils.headerSparkPage("Storage", content, parent)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
index 76097f1c51f8e..a81291d505583 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
@@ -26,11 +26,10 @@ import org.apache.spark.storage._
 
 /** Web UI showing storage status of all RDD's in the given SparkContext. */
 private[ui] class StorageTab(parent: SparkUI) extends SparkUITab(parent, "storage") {
-  val listener = new StorageListener(parent.storageStatusListener)
+  val listener = parent.storageListener
 
   attachPage(new StoragePage(this))
   attachPage(new RDDPage(this))
-  parent.registerListener(listener)
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
index f41c8d0315cb3..3d9c6192ff7f7 100644
--- a/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
@@ -20,6 +20,7 @@ package org.apache.spark.util
 import scala.collection.JavaConversions.mapAsJavaMap
 import scala.concurrent.Await
 import scala.concurrent.duration.{Duration, FiniteDuration}
+import scala.util.Try
 
 import akka.actor.{ActorRef, ActorSystem, ExtendedActorSystem}
 import akka.pattern.ask
@@ -53,7 +54,7 @@ private[spark] object AkkaUtils extends Logging {
     val startService: Int => (ActorSystem, Int) = { actualPort =>
       doCreateActorSystem(name, host, actualPort, conf, securityManager)
     }
-    Utils.startServiceOnPort(port, startService, name)
+    Utils.startServiceOnPort(port, startService, conf, name)
   }
 
   private def doCreateActorSystem(
@@ -65,7 +66,7 @@ private[spark] object AkkaUtils extends Logging {
 
     val akkaThreads   = conf.getInt("spark.akka.threads", 4)
     val akkaBatchSize = conf.getInt("spark.akka.batchSize", 15)
-    val akkaTimeout = conf.getInt("spark.akka.timeout", 100)
+    val akkaTimeout = conf.getInt("spark.akka.timeout", conf.getInt("spark.network.timeout", 120))
     val akkaFrameSize = maxFrameSizeBytes(conf)
     val akkaLogLifecycleEvents = conf.getBoolean("spark.akka.logLifecycleEvents", false)
     val lifecycleEvents = if (akkaLogLifecycleEvents) "on" else "off"
@@ -89,10 +90,13 @@ private[spark] object AkkaUtils extends Logging {
     }
     val requireCookie = if (isAuthOn) "on" else "off"
     val secureCookie = if (isAuthOn) secretKey else ""
-    logDebug("In createActorSystem, requireCookie is: " + requireCookie)
+    logDebug(s"In createActorSystem, requireCookie is: $requireCookie")
 
-    val akkaConf = ConfigFactory.parseMap(conf.getAkkaConf.toMap[String, String]).withFallback(
-      ConfigFactory.parseString(
+    val akkaSslConfig = securityManager.akkaSSLOptions.createAkkaConfig
+        .getOrElse(ConfigFactory.empty())
+
+    val akkaConf = ConfigFactory.parseMap(conf.getAkkaConf.toMap[String, String])
+      .withFallback(akkaSslConfig).withFallback(ConfigFactory.parseString(
       s"""
       |akka.daemonic = on
       |akka.loggers = [""akka.event.slf4j.Slf4jLogger""]
@@ -134,9 +138,16 @@ private[spark] object AkkaUtils extends Logging {
     Duration.create(conf.getLong("spark.akka.lookupTimeout", 30), "seconds")
   }
 
+  private val AKKA_MAX_FRAME_SIZE_IN_MB = Int.MaxValue / 1024 / 1024
+
   /** Returns the configured max frame size for Akka messages in bytes. */
   def maxFrameSizeBytes(conf: SparkConf): Int = {
-    conf.getInt("spark.akka.frameSize", 10) * 1024 * 1024
+    val frameSizeInMB = conf.getInt("spark.akka.frameSize", 10)
+    if (frameSizeInMB > AKKA_MAX_FRAME_SIZE_IN_MB) {
+      throw new IllegalArgumentException(
+        s"spark.akka.frameSize should not be greater than $AKKA_MAX_FRAME_SIZE_IN_MB MB")
+    }
+    frameSizeInMB * 1024 * 1024
   }
 
   /** Space reserved for extra data in an Akka message besides serialized task or task result. */
@@ -159,17 +170,28 @@ private[spark] object AkkaUtils extends Logging {
   def askWithReply[T](
       message: Any,
       actor: ActorRef,
-      retryAttempts: Int,
+      timeout: FiniteDuration): T = {
+    askWithReply[T](message, actor, maxAttempts = 1, retryInterval = Int.MaxValue, timeout)
+  }
+
+  /**
+   * Send a message to the given actor and get its result within a default timeout, or
+   * throw a SparkException if this fails even after the specified number of retries.
+   */
+  def askWithReply[T](
+      message: Any,
+      actor: ActorRef,
+      maxAttempts: Int,
       retryInterval: Int,
       timeout: FiniteDuration): T = {
     // TODO: Consider removing multiple attempts
     if (actor == null) {
-      throw new SparkException("Error sending message as driverActor is null " +
-        "[message = " + message + "]")
+      throw new SparkException(s"Error sending message [message = $message]" +
+        " as actor is null ")
     }
     var attempts = 0
     var lastException: Exception = null
-    while (attempts < retryAttempts) {
+    while (attempts < maxAttempts) {
       attempts += 1
       try {
         val future = actor.ask(message)(timeout)
@@ -182,13 +204,13 @@ private[spark] object AkkaUtils extends Logging {
         case ie: InterruptedException => throw ie
         case e: Exception =>
           lastException = e
-          logWarning("Error sending message in " + attempts + " attempts", e)
+          logWarning(s"Error sending message [message = $message] in $attempts attempts", e)
       }
       Thread.sleep(retryInterval)
     }
 
     throw new SparkException(
-      "Error sending message [message = " + message + "]", lastException)
+      s"Error sending message [message = $message]", lastException)
   }
 
   def makeDriverRef(name: String, conf: SparkConf, actorSystem: ActorSystem): ActorRef = {
@@ -196,9 +218,47 @@ private[spark] object AkkaUtils extends Logging {
     val driverHost: String = conf.get("spark.driver.host", "localhost")
     val driverPort: Int = conf.getInt("spark.driver.port", 7077)
     Utils.checkHost(driverHost, "Expected hostname")
-    val url = s"akka.tcp://$driverActorSystemName@$driverHost:$driverPort/user/$name"
+    val url = address(protocol(actorSystem), driverActorSystemName, driverHost, driverPort, name)
+    val timeout = AkkaUtils.lookupTimeout(conf)
+    logInfo(s"Connecting to $name: $url")
+    Await.result(actorSystem.actorSelection(url).resolveOne(timeout), timeout)
+  }
+
+  def makeExecutorRef(
+      name: String,
+      conf: SparkConf,
+      host: String,
+      port: Int,
+      actorSystem: ActorSystem): ActorRef = {
+    val executorActorSystemName = SparkEnv.executorActorSystemName
+    Utils.checkHost(host, "Expected hostname")
+    val url = address(protocol(actorSystem), executorActorSystemName, host, port, name)
     val timeout = AkkaUtils.lookupTimeout(conf)
     logInfo(s"Connecting to $name: $url")
     Await.result(actorSystem.actorSelection(url).resolveOne(timeout), timeout)
   }
+
+  def protocol(actorSystem: ActorSystem): String = {
+    val akkaConf = actorSystem.settings.config
+    val sslProp = "akka.remote.netty.tcp.enable-ssl"
+    protocol(akkaConf.hasPath(sslProp) && akkaConf.getBoolean(sslProp))
+  }
+
+  def protocol(ssl: Boolean = false): String = {
+    if (ssl) {
+      "akka.ssl.tcp"
+    } else {
+      "akka.tcp"
+    }
+  }
+
+  def address(
+      protocol: String,
+      systemName: String,
+      host: String,
+      port: Any,
+      actorName: String): String = {
+    s"$protocol://$systemName@$host:$port/user/$actorName"
+  }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/util/AsynchronousListenerBus.scala b/core/src/main/scala/org/apache/spark/util/AsynchronousListenerBus.scala
new file mode 100644
index 0000000000000..18c627e8c7a15
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/AsynchronousListenerBus.scala
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.util.concurrent._
+import java.util.concurrent.atomic.AtomicBoolean
+
+import com.google.common.annotations.VisibleForTesting
+
+/**
+ * Asynchronously passes events to registered listeners.
+ *
+ * Until `start()` is called, all posted events are only buffered. Only after this listener bus
+ * has started will events be actually propagated to all attached listeners. This listener bus
+ * is stopped when `stop()` is called, and it will drop further events after stopping.
+ *
+ * @param name name of the listener bus, will be the name of the listener thread.
+ * @tparam L type of listener
+ * @tparam E type of event
+ */
+private[spark] abstract class AsynchronousListenerBus[L <: AnyRef, E](name: String)
+  extends ListenerBus[L, E] {
+
+  self =>
+
+  /* Cap the capacity of the event queue so we get an explicit error (rather than
+   * an OOM exception) if it's perpetually being added to more quickly than it's being drained. */
+  private val EVENT_QUEUE_CAPACITY = 10000
+  private val eventQueue = new LinkedBlockingQueue[E](EVENT_QUEUE_CAPACITY)
+
+  // Indicate if `start()` is called
+  private val started = new AtomicBoolean(false)
+  // Indicate if `stop()` is called
+  private val stopped = new AtomicBoolean(false)
+
+  // Indicate if we are processing some event
+  // Guarded by `self`
+  private var processingEvent = false
+
+  // A counter that represents the number of events produced and consumed in the queue
+  private val eventLock = new Semaphore(0)
+
+  private val listenerThread = new Thread(name) {
+    setDaemon(true)
+    override def run(): Unit = Utils.logUncaughtExceptions {
+      while (true) {
+        eventLock.acquire()
+        self.synchronized {
+          processingEvent = true
+        }
+        try {
+          val event = eventQueue.poll
+          if (event == null) {
+            // Get out of the while loop and shutdown the daemon thread
+            if (!stopped.get) {
+              throw new IllegalStateException("Polling `null` from eventQueue means" +
+                " the listener bus has been stopped. So `stopped` must be true")
+            }
+            return
+          }
+          postToAll(event)
+        } finally {
+          self.synchronized {
+            processingEvent = false
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Start sending events to attached listeners.
+   *
+   * This first sends out all buffered events posted before this listener bus has started, then
+   * listens for any additional events asynchronously while the listener bus is still running.
+   * This should only be called once.
+   */
+  def start() {
+    if (started.compareAndSet(false, true)) {
+      listenerThread.start()
+    } else {
+      throw new IllegalStateException(s"$name already started!")
+    }
+  }
+
+  def post(event: E) {
+    if (stopped.get) {
+      // Drop further events to make `listenerThread` exit ASAP
+      logError(s"$name has already stopped! Dropping event $event")
+      return
+    }
+    val eventAdded = eventQueue.offer(event)
+    if (eventAdded) {
+      eventLock.release()
+    } else {
+      onDropEvent(event)
+    }
+  }
+
+  /**
+   * For testing only. Wait until there are no more events in the queue, or until the specified
+   * time has elapsed. Return true if the queue has emptied and false is the specified time
+   * elapsed before the queue emptied.
+   */
+  @VisibleForTesting
+  def waitUntilEmpty(timeoutMillis: Int): Boolean = {
+    val finishTime = System.currentTimeMillis + timeoutMillis
+    while (!queueIsEmpty) {
+      if (System.currentTimeMillis > finishTime) {
+        return false
+      }
+      /* Sleep rather than using wait/notify, because this is used only for testing and
+       * wait/notify add overhead in the general case. */
+      Thread.sleep(10)
+    }
+    true
+  }
+
+  /**
+   * For testing only. Return whether the listener daemon thread is still alive.
+   */
+  @VisibleForTesting
+  def listenerThreadIsAlive: Boolean = listenerThread.isAlive
+
+  /**
+   * Return whether the event queue is empty.
+   *
+   * The use of synchronized here guarantees that all events that once belonged to this queue
+   * have already been processed by all attached listeners, if this returns true.
+   */
+  private def queueIsEmpty: Boolean = synchronized { eventQueue.isEmpty && !processingEvent }
+
+  /**
+   * Stop the listener bus. It will wait until the queued events have been processed, but drop the
+   * new events after stopping.
+   */
+  def stop() {
+    if (!started.get()) {
+      throw new IllegalStateException(s"Attempted to stop $name that has not yet started!")
+    }
+    if (stopped.compareAndSet(false, true)) {
+      // Call eventLock.release() so that listenerThread will poll `null` from `eventQueue` and know
+      // `stop` is called.
+      eventLock.release()
+      listenerThread.join()
+    } else {
+      // Keep quiet
+    }
+  }
+
+  /**
+   * If the event queue exceeds its capacity, the new events will be dropped. The subclasses will be
+   * notified with the dropped events.
+   *
+   * Note: `onDropEvent` can be called in any thread.
+   */
+  def onDropEvent(event: E): Unit
+}
diff --git a/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
index b6a099825f01b..390310243ee0a 100644
--- a/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
+++ b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
@@ -25,10 +25,13 @@ private[spark]
 // scalastyle:off
 abstract class CompletionIterator[ +A, +I <: Iterator[A]](sub: I) extends Iterator[A] {
 // scalastyle:on
+
+  private[this] var completed = false
   def next() = sub.next()
   def hasNext = {
     val r = sub.hasNext
-    if (!r) {
+    if (!r && !completed) {
+      completed = true
       completion()
     }
     r
diff --git a/core/src/main/scala/org/apache/spark/util/EventLoop.scala b/core/src/main/scala/org/apache/spark/util/EventLoop.scala
new file mode 100644
index 0000000000000..b0ed908b84424
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/EventLoop.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.util.concurrent.atomic.AtomicBoolean
+import java.util.concurrent.{BlockingQueue, LinkedBlockingDeque}
+
+import scala.util.control.NonFatal
+
+import org.apache.spark.Logging
+
+/**
+ * An event loop to receive events from the caller and process all events in the event thread. It
+ * will start an exclusive event thread to process all events.
+ *
+ * Note: The event queue will grow indefinitely. So subclasses should make sure `onReceive` can
+ * handle events in time to avoid the potential OOM.
+ */
+private[spark] abstract class EventLoop[E](name: String) extends Logging {
+
+  private val eventQueue: BlockingQueue[E] = new LinkedBlockingDeque[E]()
+
+  private val stopped = new AtomicBoolean(false)
+
+  private val eventThread = new Thread(name) {
+    setDaemon(true)
+
+    override def run(): Unit = {
+      try {
+        while (!stopped.get) {
+          val event = eventQueue.take()
+          try {
+            onReceive(event)
+          } catch {
+            case NonFatal(e) => {
+              try {
+                onError(e)
+              } catch {
+                case NonFatal(e) => logError("Unexpected error in " + name, e)
+              }
+            }
+          }
+        }
+      } catch {
+        case ie: InterruptedException => // exit even if eventQueue is not empty
+        case NonFatal(e) => logError("Unexpected error in " + name, e)
+      }
+    }
+
+  }
+
+  def start(): Unit = {
+    if (stopped.get) {
+      throw new IllegalStateException(name + " has already been stopped")
+    }
+    // Call onStart before starting the event thread to make sure it happens before onReceive
+    onStart()
+    eventThread.start()
+  }
+
+  def stop(): Unit = {
+    if (stopped.compareAndSet(false, true)) {
+      eventThread.interrupt()
+      eventThread.join()
+      // Call onStop after the event thread exits to make sure onReceive happens before onStop
+      onStop()
+    } else {
+      // Keep quiet to allow calling `stop` multiple times.
+    }
+  }
+
+  /**
+   * Put the event into the event queue. The event thread will process it later.
+   */
+  def post(event: E): Unit = {
+    eventQueue.put(event)
+  }
+
+  /**
+   * Return if the event thread has already been started but not yet stopped.
+   */
+  def isActive: Boolean = eventThread.isAlive
+
+  /**
+   * Invoked when `start()` is called but before the event thread starts.
+   */
+  protected def onStart(): Unit = {}
+
+  /**
+   * Invoked when `stop()` is called and the event thread exits.
+   */
+  protected def onStop(): Unit = {}
+
+  /**
+   * Invoked in the event thread when polling events from the event queue.
+   *
+   * Note: Should avoid calling blocking actions in `onReceive`, or the event thread will be blocked
+   * and cannot process events in time. If you want to call some blocking actions, run them in
+   * another thread.
+   */
+  protected def onReceive(event: E): Unit
+
+  /**
+   * Invoked if `onReceive` throws any non fatal error. Any non fatal error thrown from `onError`
+   * will be ignored.
+   */
+  protected def onError(e: Throwable): Unit
+
+}
diff --git a/core/src/main/scala/org/apache/spark/util/FileLogger.scala b/core/src/main/scala/org/apache/spark/util/FileLogger.scala
deleted file mode 100644
index fdc73f08261a6..0000000000000
--- a/core/src/main/scala/org/apache/spark/util/FileLogger.scala
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.util
-
-import java.io.{BufferedOutputStream, FileOutputStream, IOException, PrintWriter}
-import java.net.URI
-import java.text.SimpleDateFormat
-import java.util.Date
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileSystem, FSDataOutputStream, Path}
-import org.apache.hadoop.fs.permission.FsPermission
-
-import org.apache.spark.{Logging, SparkConf}
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.io.CompressionCodec
-
-/**
- * A generic class for logging information to file.
- *
- * @param logDir Path to the directory in which files are logged
- * @param outputBufferSize The buffer size to use when writing to an output stream in bytes
- * @param compress Whether to compress output
- * @param overwrite Whether to overwrite existing files
- */
-private[spark] class FileLogger(
-    logDir: String,
-    sparkConf: SparkConf,
-    hadoopConf: Configuration,
-    outputBufferSize: Int = 8 * 1024, // 8 KB
-    compress: Boolean = false,
-    overwrite: Boolean = true,
-    dirPermissions: Option[FsPermission] = None)
-  extends Logging {
-
-  def this(
-      logDir: String,
-      sparkConf: SparkConf,
-      compress: Boolean,
-      overwrite: Boolean) = {
-    this(logDir, sparkConf, SparkHadoopUtil.get.newConfiguration(sparkConf), compress = compress,
-      overwrite = overwrite)
-  }
-
-  def this(
-      logDir: String,
-      sparkConf: SparkConf,
-      compress: Boolean) = {
-    this(logDir, sparkConf, SparkHadoopUtil.get.newConfiguration(sparkConf), compress = compress,
-      overwrite = true)
-  }
-
-  def this(
-      logDir: String,
-      sparkConf: SparkConf) = {
-    this(logDir, sparkConf, SparkHadoopUtil.get.newConfiguration(sparkConf), compress = false,
-      overwrite = true)
-  }
-
-  private val dateFormat = new ThreadLocal[SimpleDateFormat]() {
-    override def initialValue(): SimpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
-  }
-
-  /**
-   * To avoid effects of FileSystem#close or FileSystem.closeAll called from other modules,
-   * create unique FileSystem instance only for FileLogger
-   */
-  private val fileSystem = {
-    val conf = SparkHadoopUtil.get.newConfiguration(sparkConf)
-    val logUri = new URI(logDir)
-    val scheme = logUri.getScheme
-    if (scheme == "hdfs") {
-      conf.setBoolean("fs.hdfs.impl.disable.cache", true)
-    }
-    FileSystem.get(logUri, conf)
-  }
-
-  var fileIndex = 0
-
-  // Only used if compression is enabled
-  private lazy val compressionCodec = CompressionCodec.createCodec(sparkConf)
-
-  // Only defined if the file system scheme is not local
-  private var hadoopDataStream: Option[FSDataOutputStream] = None
-
-  // The Hadoop APIs have changed over time, so we use reflection to figure out
-  // the correct method to use to flush a hadoop data stream. See SPARK-1518
-  // for details.
-  private val hadoopFlushMethod = {
-    val cls = classOf[FSDataOutputStream]
-    scala.util.Try(cls.getMethod("hflush")).getOrElse(cls.getMethod("sync"))
-  }
-
-  private var writer: Option[PrintWriter] = None
-
-  /**
-   * Start this logger by creating the logging directory.
-   */
-  def start() {
-    createLogDir()
-  }
-
-  /**
-   * Create a logging directory with the given path.
-   */
-  private def createLogDir() {
-    val path = new Path(logDir)
-    if (fileSystem.exists(path)) {
-      if (overwrite) {
-        logWarning("Log directory %s already exists. Overwriting...".format(logDir))
-        // Second parameter is whether to delete recursively
-        fileSystem.delete(path, true)
-      } else {
-        throw new IOException("Log directory %s already exists!".format(logDir))
-      }
-    }
-    if (!fileSystem.mkdirs(path)) {
-      throw new IOException("Error in creating log directory: %s".format(logDir))
-    }
-    if (dirPermissions.isDefined) {
-      val fsStatus = fileSystem.getFileStatus(path)
-      if (fsStatus.getPermission.toShort != dirPermissions.get.toShort) {
-        fileSystem.setPermission(path, dirPermissions.get)
-      }
-    }
-  }
-
-  /**
-   * Create a new writer for the file identified by the given path.
-   * If the permissions are not passed in, it will default to use the permissions
-   * (dirPermissions) used when class was instantiated.
-   */
-  private def createWriter(fileName: String, perms: Option[FsPermission] = None): PrintWriter = {
-    val logPath = logDir + "/" + fileName
-    val uri = new URI(logPath)
-    val path = new Path(logPath)
-    val defaultFs = FileSystem.getDefaultUri(hadoopConf).getScheme
-    val isDefaultLocal = defaultFs == null || defaultFs == "file"
-
-    /* The Hadoop LocalFileSystem (r1.0.4) has known issues with syncing (HADOOP-7844).
-     * Therefore, for local files, use FileOutputStream instead. */
-    val dstream =
-      if ((isDefaultLocal && uri.getScheme == null) || uri.getScheme == "file") {
-        // Second parameter is whether to append
-        new FileOutputStream(uri.getPath, !overwrite)
-      } else {
-        hadoopDataStream = Some(fileSystem.create(path, overwrite))
-        hadoopDataStream.get
-      }
-
-    perms.orElse(dirPermissions).foreach { p => fileSystem.setPermission(path, p) }
-    val bstream = new BufferedOutputStream(dstream, outputBufferSize)
-    val cstream = if (compress) compressionCodec.compressedOutputStream(bstream) else bstream
-    new PrintWriter(cstream)
-  }
-
-  /**
-   * Log the message to the given writer.
-   * @param msg The message to be logged
-   * @param withTime Whether to prepend message with a timestamp
-   */
-  def log(msg: String, withTime: Boolean = false) {
-    val writeInfo = if (!withTime) {
-      msg
-    } else {
-      val date = new Date(System.currentTimeMillis)
-      dateFormat.get.format(date) + ": " + msg
-    }
-    writer.foreach(_.print(writeInfo))
-  }
-
-  /**
-   * Log the message to the given writer as a new line.
-   * @param msg The message to be logged
-   * @param withTime Whether to prepend message with a timestamp
-   */
-  def logLine(msg: String, withTime: Boolean = false) = log(msg + "\n", withTime)
-
-  /**
-   * Flush the writer to disk manually.
-   *
-   * When using a Hadoop filesystem, we need to invoke the hflush or sync
-   * method. In HDFS, hflush guarantees that the data gets to all the
-   * DataNodes.
-   */
-  def flush() {
-    writer.foreach(_.flush())
-    hadoopDataStream.foreach(hadoopFlushMethod.invoke(_))
-  }
-
-  /**
-   * Close the writer. Any subsequent calls to log or flush will have no effect.
-   */
-  def close() {
-    writer.foreach(_.close())
-    writer = None
-  }
-
-  /**
-   * Start a writer for a new file, closing the existing one if it exists.
-   * @param fileName Name of the new file, defaulting to the file index if not provided.
-   * @param perms Permissions to put on the new file.
-   */
-  def newFile(fileName: String = "", perms: Option[FsPermission] = None) {
-    fileIndex += 1
-    writer.foreach(_.close())
-    val name = fileName match {
-      case "" => fileIndex.toString
-      case _ => fileName
-    }
-    writer = Some(createWriter(name, perms))
-  }
-
-  /**
-   * Close all open writers, streams, and file systems. Any subsequent uses of this FileLogger
-   * instance will throw exceptions.
-   */
-  def stop() {
-    hadoopDataStream.foreach(_.close())
-    writer.foreach(_.close())
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 5b2e7d3a7edb9..8e0e41ad3782e 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -19,6 +19,8 @@ package org.apache.spark.util
 
 import java.util.{Properties, UUID}
 
+import org.apache.spark.scheduler.cluster.ExecutorInfo
+
 import scala.collection.JavaConverters._
 import scala.collection.Map
 
@@ -26,13 +28,26 @@ import org.json4s.DefaultFormats
 import org.json4s.JsonDSL._
 import org.json4s.JsonAST._
 
-
-import org.apache.spark.executor.{DataReadMethod, InputMetrics, ShuffleReadMetrics,
-  ShuffleWriteMetrics, TaskMetrics}
+import org.apache.spark.executor._
 import org.apache.spark.scheduler._
 import org.apache.spark.storage._
 import org.apache.spark._
 
+/**
+ * Serializes SparkListener events to/from JSON.  This protocol provides strong backwards-
+ * and forwards-compatibility guarantees: any version of Spark should be able to read JSON output
+ * written by any other version, including newer versions.
+ *
+ * JsonProtocolSuite contains backwards-compatibility tests which check that the current version of
+ * JsonProtocol is able to read output written by earlier versions.  We do not currently have tests
+ * for reading newer JSON output with older Spark versions.
+ *
+ * To ensure that we provide these guarantees, follow these rules when modifying these methods:
+ *
+ *  - Never delete any JSON fields.
+ *  - Any new JSON fields should be optional; use `Utils.jsonOption` when reading these fields
+ *    in `*FromJson` methods.
+ */
 private[spark] object JsonProtocol {
   // TODO: Remove this file and put JSON serialization into each individual class.
 
@@ -70,9 +85,11 @@ private[spark] object JsonProtocol {
         applicationStartToJson(applicationStart)
       case applicationEnd: SparkListenerApplicationEnd =>
         applicationEndToJson(applicationEnd)
-
+      case executorAdded: SparkListenerExecutorAdded =>
+        executorAddedToJson(executorAdded)
+      case executorRemoved: SparkListenerExecutorRemoved =>
+        executorRemovedToJson(executorRemoved)
       // These aren't used, but keeps compiler happy
-      case SparkListenerShutdown => JNothing
       case SparkListenerExecutorMetricsUpdate(_, _) => JNothing
     }
   }
@@ -123,6 +140,8 @@ private[spark] object JsonProtocol {
     val properties = propertiesToJson(jobStart.properties)
     ("Event" -> Utils.getFormattedClassName(jobStart)) ~
     ("Job ID" -> jobStart.jobId) ~
+    ("Submission Time" -> jobStart.time) ~
+    ("Stage Infos" -> jobStart.stageInfos.map(stageInfoToJson)) ~  // Added in Spark 1.2.0
     ("Stage IDs" -> jobStart.stageIds) ~
     ("Properties" -> properties)
   }
@@ -131,6 +150,7 @@ private[spark] object JsonProtocol {
     val jobResult = jobResultToJson(jobEnd.jobResult)
     ("Event" -> Utils.getFormattedClassName(jobEnd)) ~
     ("Job ID" -> jobEnd.jobId) ~
+    ("Completion Time" -> jobEnd.time) ~
     ("Job Result" -> jobResult)
   }
 
@@ -180,6 +200,19 @@ private[spark] object JsonProtocol {
     ("Timestamp" -> applicationEnd.time)
   }
 
+  def executorAddedToJson(executorAdded: SparkListenerExecutorAdded): JValue = {
+    ("Event" -> Utils.getFormattedClassName(executorAdded)) ~
+    ("Timestamp" -> executorAdded.time) ~
+    ("Executor ID" -> executorAdded.executorId) ~
+    ("Executor Info" -> executorInfoToJson(executorAdded.executorInfo))
+  }
+
+  def executorRemovedToJson(executorRemoved: SparkListenerExecutorRemoved): JValue = {
+    ("Event" -> Utils.getFormattedClassName(executorRemoved)) ~
+    ("Timestamp" -> executorRemoved.time) ~
+    ("Executor ID" -> executorRemoved.executorId) ~
+    ("Removed Reason" -> executorRemoved.reason)
+  }
 
   /** ------------------------------------------------------------------- *
    * JSON serialization methods for classes SparkListenerEvents depend on |
@@ -232,6 +265,8 @@ private[spark] object JsonProtocol {
       taskMetrics.shuffleWriteMetrics.map(shuffleWriteMetricsToJson).getOrElse(JNothing)
     val inputMetrics =
       taskMetrics.inputMetrics.map(inputMetricsToJson).getOrElse(JNothing)
+    val outputMetrics =
+      taskMetrics.outputMetrics.map(outputMetricsToJson).getOrElse(JNothing)
     val updatedBlocks =
       taskMetrics.updatedBlocks.map { blocks =>
         JArray(blocks.toList.map { case (id, status) =>
@@ -250,6 +285,7 @@ private[spark] object JsonProtocol {
     ("Shuffle Read Metrics" -> shuffleReadMetrics) ~
     ("Shuffle Write Metrics" -> shuffleWriteMetrics) ~
     ("Input Metrics" -> inputMetrics) ~
+    ("Output Metrics" -> outputMetrics) ~
     ("Updated Blocks" -> updatedBlocks)
   }
 
@@ -270,23 +306,32 @@ private[spark] object JsonProtocol {
     ("Bytes Read" -> inputMetrics.bytesRead)
   }
 
+  def outputMetricsToJson(outputMetrics: OutputMetrics): JValue = {
+    ("Data Write Method" -> outputMetrics.writeMethod.toString) ~
+    ("Bytes Written" -> outputMetrics.bytesWritten)
+  }
+
   def taskEndReasonToJson(taskEndReason: TaskEndReason): JValue = {
     val reason = Utils.getFormattedClassName(taskEndReason)
-    val json = taskEndReason match {
+    val json: JObject = taskEndReason match {
       case fetchFailed: FetchFailed =>
         val blockManagerAddress = Option(fetchFailed.bmAddress).
           map(blockManagerIdToJson).getOrElse(JNothing)
         ("Block Manager Address" -> blockManagerAddress) ~
         ("Shuffle ID" -> fetchFailed.shuffleId) ~
         ("Map ID" -> fetchFailed.mapId) ~
-        ("Reduce ID" -> fetchFailed.reduceId)
+        ("Reduce ID" -> fetchFailed.reduceId) ~
+        ("Message" -> fetchFailed.message)
       case exceptionFailure: ExceptionFailure =>
         val stackTrace = stackTraceToJson(exceptionFailure.stackTrace)
         val metrics = exceptionFailure.metrics.map(taskMetricsToJson).getOrElse(JNothing)
         ("Class Name" -> exceptionFailure.className) ~
         ("Description" -> exceptionFailure.description) ~
         ("Stack Trace" -> stackTrace) ~
+        ("Full Stack Trace" -> exceptionFailure.fullStackTrace) ~
         ("Metrics" -> metrics)
+      case ExecutorLostFailure(executorId) =>
+        ("Executor ID" -> executorId)
       case _ => Utils.emptyJson
     }
     ("Reason" -> reason) ~ json
@@ -336,6 +381,10 @@ private[spark] object JsonProtocol {
     ("Disk Size" -> blockStatus.diskSize)
   }
 
+  def executorInfoToJson(executorInfo: ExecutorInfo): JValue = {
+    ("Host" -> executorInfo.executorHost) ~
+    ("Total Cores" -> executorInfo.totalCores)
+  }
 
   /** ------------------------------ *
    * Util JSON serialization methods |
@@ -390,6 +439,8 @@ private[spark] object JsonProtocol {
     val unpersistRDD = Utils.getFormattedClassName(SparkListenerUnpersistRDD)
     val applicationStart = Utils.getFormattedClassName(SparkListenerApplicationStart)
     val applicationEnd = Utils.getFormattedClassName(SparkListenerApplicationEnd)
+    val executorAdded = Utils.getFormattedClassName(SparkListenerExecutorAdded)
+    val executorRemoved = Utils.getFormattedClassName(SparkListenerExecutorRemoved)
 
     (json \ "Event").extract[String] match {
       case `stageSubmitted` => stageSubmittedFromJson(json)
@@ -405,6 +456,8 @@ private[spark] object JsonProtocol {
       case `unpersistRDD` => unpersistRDDFromJson(json)
       case `applicationStart` => applicationStartFromJson(json)
       case `applicationEnd` => applicationEndFromJson(json)
+      case `executorAdded` => executorAddedFromJson(json)
+      case `executorRemoved` => executorRemovedFromJson(json)
     }
   }
 
@@ -443,15 +496,24 @@ private[spark] object JsonProtocol {
 
   def jobStartFromJson(json: JValue): SparkListenerJobStart = {
     val jobId = (json \ "Job ID").extract[Int]
+    val submissionTime =
+      Utils.jsonOption(json \ "Submission Time").map(_.extract[Long]).getOrElse(-1L)
     val stageIds = (json \ "Stage IDs").extract[List[JValue]].map(_.extract[Int])
     val properties = propertiesFromJson(json \ "Properties")
-    SparkListenerJobStart(jobId, stageIds, properties)
+    // The "Stage Infos" field was added in Spark 1.2.0
+    val stageInfos = Utils.jsonOption(json \ "Stage Infos")
+      .map(_.extract[Seq[JValue]].map(stageInfoFromJson)).getOrElse {
+        stageIds.map(id => new StageInfo(id, 0, "unknown", 0, Seq.empty, "unknown"))
+      }
+    SparkListenerJobStart(jobId, submissionTime, stageInfos, properties)
   }
 
   def jobEndFromJson(json: JValue): SparkListenerJobEnd = {
     val jobId = (json \ "Job ID").extract[Int]
+    val completionTime =
+      Utils.jsonOption(json \ "Completion Time").map(_.extract[Long]).getOrElse(-1L)
     val jobResult = jobResultFromJson(json \ "Job Result")
-    SparkListenerJobEnd(jobId, jobResult)
+    SparkListenerJobEnd(jobId, completionTime, jobResult)
   }
 
   def environmentUpdateFromJson(json: JValue): SparkListenerEnvironmentUpdate = {
@@ -492,6 +554,19 @@ private[spark] object JsonProtocol {
     SparkListenerApplicationEnd((json \ "Timestamp").extract[Long])
   }
 
+  def executorAddedFromJson(json: JValue): SparkListenerExecutorAdded = {
+    val time = (json \ "Timestamp").extract[Long]
+    val executorId = (json \ "Executor ID").extract[String]
+    val executorInfo = executorInfoFromJson(json \ "Executor Info")
+    SparkListenerExecutorAdded(time, executorId, executorInfo)
+  }
+
+  def executorRemovedFromJson(json: JValue): SparkListenerExecutorRemoved = {
+    val time = (json \ "Timestamp").extract[Long]
+    val executorId = (json \ "Executor ID").extract[String]
+    val reason = (json \ "Removed Reason").extract[String]
+    SparkListenerExecutorRemoved(time, executorId, reason)
+  }
 
   /** --------------------------------------------------------------------- *
    * JSON deserialization methods for classes SparkListenerEvents depend on |
@@ -499,7 +574,7 @@ private[spark] object JsonProtocol {
 
   def stageInfoFromJson(json: JValue): StageInfo = {
     val stageId = (json \ "Stage ID").extract[Int]
-    val attemptId = (json \ "Attempt ID").extractOpt[Int].getOrElse(0)
+    val attemptId = (json \ "Stage Attempt ID").extractOpt[Int].getOrElse(0)
     val stageName = (json \ "Stage Name").extract[String]
     val numTasks = (json \ "Number of Tasks").extract[Int]
     val rddInfos = (json \ "RDD Info").extract[List[JValue]].map(rddInfoFromJson(_))
@@ -561,20 +636,22 @@ private[spark] object JsonProtocol {
       return TaskMetrics.empty
     }
     val metrics = new TaskMetrics
-    metrics.hostname = (json \ "Host Name").extract[String]
-    metrics.executorDeserializeTime = (json \ "Executor Deserialize Time").extract[Long]
-    metrics.executorRunTime = (json \ "Executor Run Time").extract[Long]
-    metrics.resultSize = (json \ "Result Size").extract[Long]
-    metrics.jvmGCTime = (json \ "JVM GC Time").extract[Long]
-    metrics.resultSerializationTime = (json \ "Result Serialization Time").extract[Long]
-    metrics.memoryBytesSpilled = (json \ "Memory Bytes Spilled").extract[Long]
-    metrics.diskBytesSpilled = (json \ "Disk Bytes Spilled").extract[Long]
+    metrics.setHostname((json \ "Host Name").extract[String])
+    metrics.setExecutorDeserializeTime((json \ "Executor Deserialize Time").extract[Long])
+    metrics.setExecutorRunTime((json \ "Executor Run Time").extract[Long])
+    metrics.setResultSize((json \ "Result Size").extract[Long])
+    metrics.setJvmGCTime((json \ "JVM GC Time").extract[Long])
+    metrics.setResultSerializationTime((json \ "Result Serialization Time").extract[Long])
+    metrics.incMemoryBytesSpilled((json \ "Memory Bytes Spilled").extract[Long])
+    metrics.incDiskBytesSpilled((json \ "Disk Bytes Spilled").extract[Long])
     metrics.setShuffleReadMetrics(
       Utils.jsonOption(json \ "Shuffle Read Metrics").map(shuffleReadMetricsFromJson))
     metrics.shuffleWriteMetrics =
       Utils.jsonOption(json \ "Shuffle Write Metrics").map(shuffleWriteMetricsFromJson)
-    metrics.inputMetrics =
-      Utils.jsonOption(json \ "Input Metrics").map(inputMetricsFromJson)
+    metrics.setInputMetrics(
+      Utils.jsonOption(json \ "Input Metrics").map(inputMetricsFromJson))
+    metrics.outputMetrics =
+      Utils.jsonOption(json \ "Output Metrics").map(outputMetricsFromJson)
     metrics.updatedBlocks =
       Utils.jsonOption(json \ "Updated Blocks").map { value =>
         value.extract[List[JValue]].map { block =>
@@ -588,24 +665,31 @@ private[spark] object JsonProtocol {
 
   def shuffleReadMetricsFromJson(json: JValue): ShuffleReadMetrics = {
     val metrics = new ShuffleReadMetrics
-    metrics.remoteBlocksFetched = (json \ "Remote Blocks Fetched").extract[Int]
-    metrics.localBlocksFetched = (json \ "Local Blocks Fetched").extract[Int]
-    metrics.fetchWaitTime = (json \ "Fetch Wait Time").extract[Long]
-    metrics.remoteBytesRead = (json \ "Remote Bytes Read").extract[Long]
+    metrics.incRemoteBlocksFetched((json \ "Remote Blocks Fetched").extract[Int])
+    metrics.incLocalBlocksFetched((json \ "Local Blocks Fetched").extract[Int])
+    metrics.incFetchWaitTime((json \ "Fetch Wait Time").extract[Long])
+    metrics.incRemoteBytesRead((json \ "Remote Bytes Read").extract[Long])
     metrics
   }
 
   def shuffleWriteMetricsFromJson(json: JValue): ShuffleWriteMetrics = {
     val metrics = new ShuffleWriteMetrics
-    metrics.shuffleBytesWritten = (json \ "Shuffle Bytes Written").extract[Long]
-    metrics.shuffleWriteTime = (json \ "Shuffle Write Time").extract[Long]
+    metrics.incShuffleBytesWritten((json \ "Shuffle Bytes Written").extract[Long])
+    metrics.incShuffleWriteTime((json \ "Shuffle Write Time").extract[Long])
     metrics
   }
 
   def inputMetricsFromJson(json: JValue): InputMetrics = {
     val metrics = new InputMetrics(
       DataReadMethod.withName((json \ "Data Read Method").extract[String]))
-    metrics.bytesRead = (json \ "Bytes Read").extract[Long]
+    metrics.addBytesRead((json \ "Bytes Read").extract[Long])
+    metrics
+  }
+
+  def outputMetricsFromJson(json: JValue): OutputMetrics = {
+    val metrics = new OutputMetrics(
+      DataWriteMethod.withName((json \ "Data Write Method").extract[String]))
+    metrics.setBytesWritten((json \ "Bytes Written").extract[Long])
     metrics
   }
 
@@ -627,21 +711,31 @@ private[spark] object JsonProtocol {
         val shuffleId = (json \ "Shuffle ID").extract[Int]
         val mapId = (json \ "Map ID").extract[Int]
         val reduceId = (json \ "Reduce ID").extract[Int]
-        new FetchFailed(blockManagerAddress, shuffleId, mapId, reduceId)
+        val message = Utils.jsonOption(json \ "Message").map(_.extract[String])
+        new FetchFailed(blockManagerAddress, shuffleId, mapId, reduceId,
+          message.getOrElse("Unknown reason"))
       case `exceptionFailure` =>
         val className = (json \ "Class Name").extract[String]
         val description = (json \ "Description").extract[String]
         val stackTrace = stackTraceFromJson(json \ "Stack Trace")
+        val fullStackTrace = Utils.jsonOption(json \ "Full Stack Trace").
+          map(_.extract[String]).orNull
         val metrics = Utils.jsonOption(json \ "Metrics").map(taskMetricsFromJson)
-        new ExceptionFailure(className, description, stackTrace, metrics)
+        ExceptionFailure(className, description, stackTrace, fullStackTrace, metrics)
       case `taskResultLost` => TaskResultLost
       case `taskKilled` => TaskKilled
-      case `executorLostFailure` => ExecutorLostFailure
+      case `executorLostFailure` =>
+        val executorId = Utils.jsonOption(json \ "Executor ID").map(_.extract[String])
+        ExecutorLostFailure(executorId.getOrElse("Unknown"))
       case `unknownReason` => UnknownReason
     }
   }
 
   def blockManagerIdFromJson(json: JValue): BlockManagerId = {
+    // On metadata fetch fail, block manager ID can be null (SPARK-4471)
+    if (json == JNothing) {
+      return null
+    }
     val executorId = (json \ "Executor ID").extract[String]
     val host = (json \ "Host").extract[String]
     val port = (json \ "Port").extract[Int]
@@ -695,6 +789,11 @@ private[spark] object JsonProtocol {
     BlockStatus(storageLevel, memorySize, diskSize, tachyonSize)
   }
 
+  def executorInfoFromJson(json: JValue): ExecutorInfo = {
+    val executorHost = (json \ "Host").extract[String]
+    val totalCores = (json \ "Total Cores").extract[Int]
+    new ExecutorInfo(executorHost, totalCores)
+  }
 
   /** -------------------------------- *
    * Util JSON deserialization methods |
diff --git a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
new file mode 100644
index 0000000000000..d60b8b9a31a9b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.util.concurrent.CopyOnWriteArrayList
+
+import scala.util.control.NonFatal
+
+import org.apache.spark.Logging
+
+/**
+ * An event bus which posts events to its listeners.
+ */
+private[spark] trait ListenerBus[L <: AnyRef, E] extends Logging {
+
+  // Marked `private[spark]` for access in tests.
+  private[spark] val listeners = new CopyOnWriteArrayList[L]
+
+  /**
+   * Add a listener to listen events. This method is thread-safe and can be called in any thread.
+   */
+  final def addListener(listener: L) {
+    listeners.add(listener)
+  }
+
+  /**
+   * Post the event to all registered listeners. The `postToAll` caller should guarantee calling
+   * `postToAll` in the same thread for all events.
+   */
+  final def postToAll(event: E): Unit = {
+    // JavaConversions will create a JIterableWrapper if we use some Scala collection functions.
+    // However, this method will be called frequently. To avoid the wrapper cost, here ewe use
+    // Java Iterator directly.
+    val iter = listeners.iterator
+    while (iter.hasNext) {
+      val listener = iter.next()
+      try {
+        onPostEvent(listener, event)
+      } catch {
+        case NonFatal(e) =>
+          logError(s"Listener ${Utils.getFormattedClassName(listener)} threw an exception", e)
+      }
+    }
+  }
+
+  /**
+   * Post an event to the specified listener. `onPostEvent` is guaranteed to be called in the same
+   * thread.
+   */
+  def onPostEvent(listener: L, event: E): Unit
+
+}
diff --git a/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala b/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala
index 2889e171f627e..ac40f19ed6799 100644
--- a/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala
@@ -52,7 +52,7 @@ private[spark] class MetadataCleaner(
     logDebug(
       "Starting metadata cleaner for " + name + " with delay of " + delaySeconds + " seconds " +
       "and period of " + periodSeconds + " secs")
-    timer.schedule(task, periodSeconds * 1000, periodSeconds * 1000)
+    timer.schedule(task, delaySeconds * 1000, periodSeconds * 1000)
   }
 
   def cancel() {
diff --git a/core/src/main/scala/org/apache/spark/util/SerializableBuffer.scala b/core/src/main/scala/org/apache/spark/util/SerializableBuffer.scala
index 2b452ad33b021..770ff9d5ad6ae 100644
--- a/core/src/main/scala/org/apache/spark/util/SerializableBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/util/SerializableBuffer.scala
@@ -29,7 +29,7 @@ private[spark]
 class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
   def value = buffer
 
-  private def readObject(in: ObjectInputStream) {
+  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
     val length = in.readInt()
     buffer = ByteBuffer.allocate(length)
     var amountRead = 0
@@ -44,7 +44,7 @@ class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable
     buffer.rewind() // Allow us to read it later
   }
 
-  private def writeObject(out: ObjectOutputStream) {
+  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
     out.writeInt(buffer.limit())
     if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
       throw new IOException("Could not fully write buffer to output stream")
diff --git a/core/src/main/scala/org/apache/spark/util/SparkExitCode.scala b/core/src/main/scala/org/apache/spark/util/SparkExitCode.scala
new file mode 100644
index 0000000000000..c93b1cca9f564
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/SparkExitCode.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+private[spark] object SparkExitCode {
+  /** The default uncaught exception handler was reached. */
+  val UNCAUGHT_EXCEPTION = 50
+
+  /** The default uncaught exception handler was called and an exception was encountered while
+      logging the exception. */
+  val UNCAUGHT_EXCEPTION_TWICE = 51
+
+  /** The default uncaught exception handler was reached, and the uncaught exception was an
+      OutOfMemoryError. */
+  val OOM = 52
+
+}
diff --git a/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala b/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
new file mode 100644
index 0000000000000..ad3db1fbb57ed
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import org.apache.spark.Logging
+
+/**
+ * The default uncaught exception handler for Executors terminates the whole process, to avoid
+ * getting into a bad state indefinitely. Since Executors are relatively lightweight, it's better
+ * to fail fast when things go wrong.
+ */
+private[spark] object SparkUncaughtExceptionHandler
+  extends Thread.UncaughtExceptionHandler with Logging {
+
+  override def uncaughtException(thread: Thread, exception: Throwable) {
+    try {
+      logError("Uncaught exception in thread " + thread, exception)
+
+      // We may have been called from a shutdown hook. If so, we must not call System.exit().
+      // (If we do, we will deadlock.)
+      if (!Utils.inShutdown()) {
+        if (exception.isInstanceOf[OutOfMemoryError]) {
+          System.exit(SparkExitCode.OOM)
+        } else {
+          System.exit(SparkExitCode.UNCAUGHT_EXCEPTION)
+        }
+      }
+    } catch {
+      case oom: OutOfMemoryError => Runtime.getRuntime.halt(SparkExitCode.OOM)
+      case t: Throwable => Runtime.getRuntime.halt(SparkExitCode.UNCAUGHT_EXCEPTION_TWICE)
+    }
+  }
+
+  def uncaughtException(exception: Throwable) {
+    uncaughtException(Thread.currentThread(), exception)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala b/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
new file mode 100644
index 0000000000000..d4e0ad93b966a
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+/**
+ * Used for shipping per-thread stacktraces from the executors to driver.
+ */
+private[spark] case class ThreadStackTrace(
+  threadId: Long,
+  threadName: String,
+  threadState: Thread.State,
+  stackTrace: String)
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 0aeff6455b3fe..3c8a0e40bf785 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -18,12 +18,12 @@
 package org.apache.spark.util
 
 import java.io._
+import java.lang.management.ManagementFactory
 import java.net._
 import java.nio.ByteBuffer
 import java.util.{Properties, Locale, Random, UUID}
 import java.util.concurrent.{ThreadFactory, ConcurrentHashMap, Executors, ThreadPoolExecutor}
-
-import org.eclipse.jetty.util.MultiException
+import javax.net.ssl.HttpsURLConnection
 
 import scala.collection.JavaConversions._
 import scala.collection.Map
@@ -33,17 +33,18 @@ import scala.reflect.ClassTag
 import scala.util.Try
 import scala.util.control.{ControlThrowable, NonFatal}
 
-import com.google.common.io.Files
+import com.google.common.io.{ByteStreams, Files}
 import com.google.common.util.concurrent.ThreadFactoryBuilder
 import org.apache.commons.lang3.SystemUtils
 import org.apache.hadoop.conf.Configuration
-import org.apache.log4j.PropertyConfigurator
 import org.apache.hadoop.fs.{FileSystem, FileUtil, Path}
+import org.apache.log4j.PropertyConfigurator
+import org.eclipse.jetty.util.MultiException
 import org.json4s._
-import tachyon.client.{TachyonFile,TachyonFS}
+import tachyon.client.{TachyonFS, TachyonFile}
 
 import org.apache.spark._
-import org.apache.spark.executor.ExecutorUncaughtExceptionHandler
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance}
 
 /** CallSite represents a place in user code. It can have a short and a long form. */
@@ -60,6 +61,8 @@ private[spark] object CallSite {
 private[spark] object Utils extends Logging {
   val random = new Random()
 
+  private val MAX_DIR_CREATION_ATTEMPTS: Int = 10
+
   /** Serialize an object using Java serialization */
   def serialize[T](o: T): Array[Byte] = {
     val bos = new ByteArrayOutputStream()
@@ -246,10 +249,28 @@ private[spark] object Utils extends Logging {
     retval
   }
 
-  /** Create a temporary directory inside the given parent directory */
-  def createTempDir(root: String = System.getProperty("java.io.tmpdir")): File = {
+  /**
+   * JDK equivalent of `chmod 700 file`.
+   *
+   * @param file the file whose permissions will be modified
+   * @return true if the permissions were successfully changed, false otherwise.
+   */
+  def chmod700(file: File): Boolean = {
+    file.setReadable(false, false) &&
+    file.setReadable(true, true) &&
+    file.setWritable(false, false) &&
+    file.setWritable(true, true) &&
+    file.setExecutable(false, false) &&
+    file.setExecutable(true, true)
+  }
+
+  /**
+   * Create a directory inside the given parent directory. The directory is guaranteed to be
+   * newly created, and is not marked for automatic deletion.
+   */
+  def createDirectory(root: String, namePrefix: String = "spark"): File = {
     var attempts = 0
-    val maxAttempts = 10
+    val maxAttempts = MAX_DIR_CREATION_ATTEMPTS
     var dir: File = null
     while (dir == null) {
       attempts += 1
@@ -258,13 +279,31 @@ private[spark] object Utils extends Logging {
           maxAttempts + " attempts!")
       }
       try {
-        dir = new File(root, "spark-" + UUID.randomUUID.toString)
+        dir = new File(root, namePrefix + "-" + UUID.randomUUID.toString)
         if (dir.exists() || !dir.mkdirs()) {
           dir = null
+        } else {
+          // Restrict file permissions via chmod if available.
+          // For Windows this step is ignored.
+          if (!isWindows && !chmod700(dir)) {
+            dir.delete()
+            dir = null
+          }
         }
-      } catch { case e: IOException => ; }
+      } catch { case e: SecurityException => dir = null; }
     }
 
+    dir
+  }
+
+  /**
+   * Create a temporary directory inside the given parent directory. The directory will be
+   * automatically deleted when the VM shuts down.
+   */
+  def createTempDir(
+      root: String = System.getProperty("java.io.tmpdir"),
+      namePrefix: String = "spark"): File = {
+    val dir = createDirectory(root, namePrefix)
     registerShutdownDeleteDir(dir)
     dir
   }
@@ -347,17 +386,179 @@ private[spark] object Utils extends Logging {
   }
 
   /**
-   * Download a file requested by the executor. Supports fetching the file in a variety of ways,
+   * Download a file to target directory. Supports fetching the file in a variety of ways,
+   * including HTTP, HDFS and files on a standard filesystem, based on the URL parameter.
+   *
+   * If `useCache` is true, first attempts to fetch the file to a local cache that's shared
+   * across executors running the same application. `useCache` is used mainly for
+   * the executors, and not in local mode.
+   *
+   * Throws SparkException if the target file already exists and has different contents than
+   * the requested file.
+   */
+  def fetchFile(
+      url: String,
+      targetDir: File,
+      conf: SparkConf,
+      securityMgr: SecurityManager,
+      hadoopConf: Configuration,
+      timestamp: Long,
+      useCache: Boolean) {
+    val fileName = url.split("/").last
+    val targetFile = new File(targetDir, fileName)
+    if (useCache) {
+      val cachedFileName = s"${url.hashCode}${timestamp}_cache"
+      val lockFileName = s"${url.hashCode}${timestamp}_lock"
+      val localDir = new File(getLocalDir(conf))
+      val lockFile = new File(localDir, lockFileName)
+      val raf = new RandomAccessFile(lockFile, "rw")
+      // Only one executor entry.
+      // The FileLock is only used to control synchronization for executors download file,
+      // it's always safe regardless of lock type (mandatory or advisory).
+      val lock = raf.getChannel().lock()
+      val cachedFile = new File(localDir, cachedFileName)
+      try {
+        if (!cachedFile.exists()) {
+          doFetchFile(url, localDir, cachedFileName, conf, securityMgr, hadoopConf)
+        }
+      } finally {
+        lock.release()
+      }
+      copyFile(
+        url,
+        cachedFile,
+        targetFile,
+        conf.getBoolean("spark.files.overwrite", false)
+      )
+    } else {
+      doFetchFile(url, targetDir, fileName, conf, securityMgr, hadoopConf)
+    }
+
+    // Decompress the file if it's a .tar or .tar.gz
+    if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {
+      logInfo("Untarring " + fileName)
+      executeAndGetOutput(Seq("tar", "-xzf", fileName), targetDir)
+    } else if (fileName.endsWith(".tar")) {
+      logInfo("Untarring " + fileName)
+      executeAndGetOutput(Seq("tar", "-xf", fileName), targetDir)
+    }
+    // Make the file executable - That's necessary for scripts
+    FileUtil.chmod(targetFile.getAbsolutePath, "a+x")
+  }
+
+  /**
+   * Download `in` to `tempFile`, then move it to `destFile`.
+   *
+   * If `destFile` already exists:
+   *   - no-op if its contents equal those of `sourceFile`,
+   *   - throw an exception if `fileOverwrite` is false,
+   *   - attempt to overwrite it otherwise.
+   *
+   * @param url URL that `sourceFile` originated from, for logging purposes.
+   * @param in InputStream to download.
+   * @param tempFile File path to download `in` to.
+   * @param destFile File path to move `tempFile` to.
+   * @param fileOverwrite Whether to delete/overwrite an existing `destFile` that does not match
+   *                      `sourceFile`
+   */
+  private def downloadFile(
+      url: String,
+      in: InputStream,
+      tempFile: File,
+      destFile: File,
+      fileOverwrite: Boolean): Unit = {
+
+    try {
+      val out = new FileOutputStream(tempFile)
+      Utils.copyStream(in, out, closeStreams = true)
+      copyFile(url, tempFile, destFile, fileOverwrite, removeSourceFile = true)
+    } finally {
+      // Catch-all for the couple of cases where for some reason we didn't move `tempFile` to
+      // `destFile`.
+      if (tempFile.exists()) {
+        tempFile.delete()
+      }
+    }
+  }
+
+  /**
+   * Copy `sourceFile` to `destFile`.
+   *
+   * If `destFile` already exists:
+   *   - no-op if its contents equal those of `sourceFile`,
+   *   - throw an exception if `fileOverwrite` is false,
+   *   - attempt to overwrite it otherwise.
+   *
+   * @param url URL that `sourceFile` originated from, for logging purposes.
+   * @param sourceFile File path to copy/move from.
+   * @param destFile File path to copy/move to.
+   * @param fileOverwrite Whether to delete/overwrite an existing `destFile` that does not match
+   *                      `sourceFile`
+   * @param removeSourceFile Whether to remove `sourceFile` after / as part of moving/copying it to
+   *                         `destFile`.
+   */
+  private def copyFile(
+      url: String,
+      sourceFile: File,
+      destFile: File,
+      fileOverwrite: Boolean,
+      removeSourceFile: Boolean = false): Unit = {
+
+    if (destFile.exists) {
+      if (!Files.equal(sourceFile, destFile)) {
+        if (fileOverwrite) {
+          logInfo(
+            s"File $destFile exists and does not match contents of $url, replacing it with $url"
+          )
+          if (!destFile.delete()) {
+            throw new SparkException(
+              "Failed to delete %s while attempting to overwrite it with %s".format(
+                destFile.getAbsolutePath,
+                sourceFile.getAbsolutePath
+              )
+            )
+          }
+        } else {
+          throw new SparkException(
+            s"File $destFile exists and does not match contents of $url")
+        }
+      } else {
+        // Do nothing if the file contents are the same, i.e. this file has been copied
+        // previously.
+        logInfo(
+          "%s has been previously copied to %s".format(
+            sourceFile.getAbsolutePath,
+            destFile.getAbsolutePath
+          )
+        )
+        return
+      }
+    }
+
+    // The file does not exist in the target directory. Copy or move it there.
+    if (removeSourceFile) {
+      Files.move(sourceFile, destFile)
+    } else {
+      logInfo(s"Copying ${sourceFile.getAbsolutePath} to ${destFile.getAbsolutePath}")
+      Files.copy(sourceFile, destFile)
+    }
+  }
+
+  /**
+   * Download a file to target directory. Supports fetching the file in a variety of ways,
    * including HTTP, HDFS and files on a standard filesystem, based on the URL parameter.
    *
    * Throws SparkException if the target file already exists and has different contents than
    * the requested file.
    */
-  def fetchFile(url: String, targetDir: File, conf: SparkConf, securityMgr: SecurityManager,
-    hadoopConf: Configuration) {
-    val filename = url.split("/").last
-    val tempDir = getLocalDir(conf)
-    val tempFile =  File.createTempFile("fetchFileTemp", null, new File(tempDir))
+  private def doFetchFile(
+      url: String,
+      targetDir: File,
+      filename: String,
+      conf: SparkConf,
+      securityMgr: SecurityManager,
+      hadoopConf: Configuration) {
+    val tempFile = File.createTempFile("fetchFileTemp", null, new File(targetDir.getAbsolutePath))
     val targetFile = new File(targetDir, filename)
     val uri = new URI(url)
     val fileOverwrite = conf.getBoolean("spark.files.overwrite", defaultValue = false)
@@ -375,84 +576,25 @@ private[spark] object Utils extends Logging {
           logDebug("fetchFile not using security")
           uc = new URL(url).openConnection()
         }
+        Utils.setupSecureURLConnection(uc, securityMgr)
 
         val timeout = conf.getInt("spark.files.fetchTimeout", 60) * 1000
         uc.setConnectTimeout(timeout)
         uc.setReadTimeout(timeout)
         uc.connect()
         val in = uc.getInputStream()
-        val out = new FileOutputStream(tempFile)
-        Utils.copyStream(in, out, closeStreams = true)
-        if (targetFile.exists && !Files.equal(tempFile, targetFile)) {
-          if (fileOverwrite) {
-            targetFile.delete()
-            logInfo(("File %s exists and does not match contents of %s, " +
-              "replacing it with %s").format(targetFile, url, url))
-          } else {
-            tempFile.delete()
-            throw new SparkException(
-              "File " + targetFile + " exists and does not match contents of" + " " + url)
-          }
-        }
-        Files.move(tempFile, targetFile)
+        downloadFile(url, in, tempFile, targetFile, fileOverwrite)
       case "file" =>
         // In the case of a local file, copy the local file to the target directory.
         // Note the difference between uri vs url.
         val sourceFile = if (uri.isAbsolute) new File(uri) else new File(url)
-        var shouldCopy = true
-        if (targetFile.exists) {
-          if (!Files.equal(sourceFile, targetFile)) {
-            if (fileOverwrite) {
-              targetFile.delete()
-              logInfo(("File %s exists and does not match contents of %s, " +
-                "replacing it with %s").format(targetFile, url, url))
-            } else {
-              throw new SparkException(
-                "File " + targetFile + " exists and does not match contents of" + " " + url)
-            }
-          } else {
-            // Do nothing if the file contents are the same, i.e. this file has been copied
-            // previously.
-            logInfo(sourceFile.getAbsolutePath + " has been previously copied to "
-              + targetFile.getAbsolutePath)
-            shouldCopy = false
-          }
-        }
-
-        if (shouldCopy) {
-          // The file does not exist in the target directory. Copy it there.
-          logInfo("Copying " + sourceFile.getAbsolutePath + " to " + targetFile.getAbsolutePath)
-          Files.copy(sourceFile, targetFile)
-        }
+        copyFile(url, sourceFile, targetFile, fileOverwrite)
       case _ =>
         // Use the Hadoop filesystem library, which supports file://, hdfs://, s3://, and others
         val fs = getHadoopFileSystem(uri, hadoopConf)
         val in = fs.open(new Path(uri))
-        val out = new FileOutputStream(tempFile)
-        Utils.copyStream(in, out, closeStreams = true)
-        if (targetFile.exists && !Files.equal(tempFile, targetFile)) {
-          if (fileOverwrite) {
-            targetFile.delete()
-            logInfo(("File %s exists and does not match contents of %s, " +
-              "replacing it with %s").format(targetFile, url, url))
-          } else {
-            tempFile.delete()
-            throw new SparkException(
-              "File " + targetFile + " exists and does not match contents of" + " " + url)
-          }
-        }
-        Files.move(tempFile, targetFile)
-    }
-    // Decompress the file if it's a .tar or .tar.gz
-    if (filename.endsWith(".tar.gz") || filename.endsWith(".tgz")) {
-      logInfo("Untarring " + filename)
-      Utils.execute(Seq("tar", "-xzf", filename), targetDir)
-    } else if (filename.endsWith(".tar")) {
-      logInfo("Untarring " + filename)
-      Utils.execute(Seq("tar", "-xf", filename), targetDir)
+        downloadFile(url, in, tempFile, targetFile, fileOverwrite)
     }
-    // Make the file executable - That's necessary for scripts
-    FileUtil.chmod(targetFile.getAbsolutePath, "a+x")
   }
 
   /**
@@ -485,26 +627,35 @@ private[spark] object Utils extends Logging {
    * If no directories could be created, this will return an empty list.
    */
   private[spark] def getOrCreateLocalRootDirs(conf: SparkConf): Array[String] = {
-    val confValue = if (isRunningInYarnContainer(conf)) {
+    if (isRunningInYarnContainer(conf)) {
       // If we are in yarn mode, systems can have different disk layouts so we must set it
-      // to what Yarn on this system said was available.
-      getYarnLocalDirs(conf)
+      // to what Yarn on this system said was available. Note this assumes that Yarn has
+      // created the directories already, and that they are secured so that only the
+      // user has access to them.
+      getYarnLocalDirs(conf).split(",")
     } else {
-      Option(conf.getenv("SPARK_LOCAL_DIRS")).getOrElse(
-        conf.get("spark.local.dir", System.getProperty("java.io.tmpdir")))
-    }
-    val rootDirs = confValue.split(',')
-    logDebug(s"Getting/creating local root dirs at '$confValue'")
-
-    rootDirs.flatMap { rootDir =>
-      val localDir: File = new File(rootDir)
-      val foundLocalDir = localDir.exists || localDir.mkdirs()
-      if (!foundLocalDir) {
-        logError(s"Failed to create local root dir in $rootDir.  Ignoring this directory.")
-        None
-      } else {
-        Some(rootDir)
-      }
+      // In non-Yarn mode (or for the driver in yarn-client mode), we cannot trust the user
+      // configuration to point to a secure directory. So create a subdirectory with restricted
+      // permissions under each listed directory.
+      Option(conf.getenv("SPARK_LOCAL_DIRS"))
+        .getOrElse(conf.get("spark.local.dir", System.getProperty("java.io.tmpdir")))
+        .split(",")
+        .flatMap { root =>
+          try {
+            val rootDir = new File(root)
+            if (rootDir.exists || rootDir.mkdirs()) {
+              Some(createDirectory(root).getAbsolutePath())
+            } else {
+              logError(s"Failed to create dir in $root. Ignoring this directory.")
+              None
+            }
+          } catch {
+            case e: IOException =>
+            logError(s"Failed to create local root dir in $root. Ignoring this directory.")
+            None
+          }
+        }
+        .toArray
     }
   }
 
@@ -587,7 +738,7 @@ private[spark] object Utils extends Logging {
     }
   }
 
-  private var customHostname: Option[String] = None
+  private var customHostname: Option[String] = sys.env.get("SPARK_LOCAL_HOSTNAME")
 
   /**
    * Allow setting a custom host name because when we run on Mesos we need to use the same
@@ -680,16 +831,21 @@ private[spark] object Utils extends Logging {
   }
 
   private def listFilesSafely(file: File): Seq[File] = {
-    val files = file.listFiles()
-    if (files == null) {
-      throw new IOException("Failed to list files for dir: " + file)
+    if (file.exists()) {
+      val files = file.listFiles()
+      if (files == null) {
+        throw new IOException("Failed to list files for dir: " + file)
+      }
+      files
+    } else {
+      List()
     }
-    files
   }
 
   /**
    * Delete a file or directory and its contents recursively.
    * Don't follow directories if they are symlinks.
+   * Throws an exception if deletion is unsuccessful.
    */
   def deleteRecursively(file: File) {
     if (file != null) {
@@ -837,25 +993,25 @@ private[spark] object Utils extends Logging {
   }
 
   /**
-   * Execute a command in the given working directory, throwing an exception if it completes
-   * with an exit code other than 0.
+   * Execute a command and return the process running the command.
    */
-  def execute(command: Seq[String], workingDir: File) {
-    val process = new ProcessBuilder(command: _*)
-        .directory(workingDir)
-        .redirectErrorStream(true)
-        .start()
-    new Thread("read stdout for " + command(0)) {
-      override def run() {
-        for (line <- Source.fromInputStream(process.getInputStream).getLines()) {
-          System.err.println(line)
-        }
-      }
-    }.start()
-    val exitCode = process.waitFor()
-    if (exitCode != 0) {
-      throw new SparkException("Process " + command + " exited with code " + exitCode)
+  def executeCommand(
+      command: Seq[String],
+      workingDir: File = new File("."),
+      extraEnvironment: Map[String, String] = Map.empty,
+      redirectStderr: Boolean = true): Process = {
+    val builder = new ProcessBuilder(command: _*).directory(workingDir)
+    val environment = builder.environment()
+    for ((key, value) <- extraEnvironment) {
+      environment.put(key, value)
+    }
+    val process = builder.start()
+    if (redirectStderr) {
+      val threadName = "redirect stderr for command " + command(0)
+      def log(s: String): Unit = logInfo(s)
+      processStreamByLine(threadName, process.getErrorStream, log)
     }
+    process
   }
 
   /**
@@ -864,30 +1020,13 @@ private[spark] object Utils extends Logging {
   def executeAndGetOutput(
       command: Seq[String],
       workingDir: File = new File("."),
-      extraEnvironment: Map[String, String] = Map.empty): String = {
-    val builder = new ProcessBuilder(command: _*)
-        .directory(workingDir)
-    val environment = builder.environment()
-    for ((key, value) <- extraEnvironment) {
-      environment.put(key, value)
-    }
-    val process = builder.start()
-    new Thread("read stderr for " + command(0)) {
-      override def run() {
-        for (line <- Source.fromInputStream(process.getErrorStream).getLines()) {
-          System.err.println(line)
-        }
-      }
-    }.start()
+      extraEnvironment: Map[String, String] = Map.empty,
+      redirectStderr: Boolean = true): String = {
+    val process = executeCommand(command, workingDir, extraEnvironment, redirectStderr)
     val output = new StringBuffer
-    val stdoutThread = new Thread("read stdout for " + command(0)) {
-      override def run() {
-        for (line <- Source.fromInputStream(process.getInputStream).getLines()) {
-          output.append(line)
-        }
-      }
-    }
-    stdoutThread.start()
+    val threadName = "read stdout for " + command(0)
+    def appendToOutput(s: String): Unit = output.append(s)
+    val stdoutThread = processStreamByLine(threadName, process.getInputStream, appendToOutput)
     val exitCode = process.waitFor()
     stdoutThread.join()   // Wait for it to finish reading output
     if (exitCode != 0) {
@@ -897,6 +1036,25 @@ private[spark] object Utils extends Logging {
     output.toString
   }
 
+  /**
+   * Return and start a daemon thread that processes the content of the input stream line by line.
+   */
+  def processStreamByLine(
+      threadName: String,
+      inputStream: InputStream,
+      processLine: String => Unit): Thread = {
+    val t = new Thread(threadName) {
+      override def run() {
+        for (line <- Source.fromInputStream(inputStream).getLines()) {
+          processLine(line)
+        }
+      }
+    }
+    t.setDaemon(true)
+    t.start()
+    t
+  }
+
   /**
    * Execute a block of code that evaluates to Unit, forwarding any uncaught exceptions to the
    * default UncaughtExceptionHandler
@@ -906,7 +1064,37 @@ private[spark] object Utils extends Logging {
       block
     } catch {
       case e: ControlThrowable => throw e
-      case t: Throwable => ExecutorUncaughtExceptionHandler.uncaughtException(t)
+      case t: Throwable => SparkUncaughtExceptionHandler.uncaughtException(t)
+    }
+  }
+
+  /**
+   * Execute a block of code that evaluates to Unit, re-throwing any non-fatal uncaught
+   * exceptions as IOException.  This is used when implementing Externalizable and Serializable's
+   * read and write methods, since Java's serializer will not report non-IOExceptions properly;
+   * see SPARK-4080 for more context.
+   */
+  def tryOrIOException(block: => Unit) {
+    try {
+      block
+    } catch {
+      case e: IOException => throw e
+      case NonFatal(t) => throw new IOException(t)
+    }
+  }
+
+  /**
+   * Execute a block of code that returns a value, re-throwing any non-fatal uncaught
+   * exceptions as IOException. This is used when implementing Externalizable and Serializable's
+   * read and write methods, since Java's serializer will not report non-IOExceptions properly;
+   * see SPARK-4080 for more context.
+   */
+  def tryOrIOException[T](block: => T): T = {
+    try {
+      block
+    } catch {
+      case e: IOException => throw e
+      case NonFatal(t) => throw new IOException(t)
     }
   }
 
@@ -914,7 +1102,8 @@ private[spark] object Utils extends Logging {
   private def coreExclusionFunction(className: String): Boolean = {
     // A regular expression to match classes of the "core" Spark API that we want to skip when
     // finding the call site of a method.
-    val SPARK_CORE_CLASS_REGEX = """^org\.apache\.spark(\.api\.java)?(\.util)?(\.rdd)?\.[A-Z]""".r
+    val SPARK_CORE_CLASS_REGEX =
+      """^org\.apache\.spark(\.api\.java)?(\.util)?(\.rdd)?(\.broadcast)?\.[A-Z]""".r
     val SCALA_CLASS_REGEX = """^scala""".r
     val isSparkCoreClass = SPARK_CORE_CLASS_REGEX.findFirstIn(className).isDefined
     val isScalaClass = SCALA_CLASS_REGEX.findFirstIn(className).isDefined
@@ -930,13 +1119,6 @@ private[spark] object Utils extends Logging {
    * @param skipClass Function that is used to exclude non-user-code classes.
    */
   def getCallSite(skipClass: String => Boolean = coreExclusionFunction): CallSite = {
-    val trace = Thread.currentThread.getStackTrace().filterNot { ste: StackTraceElement =>
-      // When running under some profilers, the current stack trace might contain some bogus
-      // frames. This is intended to ensure that we don't crash in these situations by
-      // ignoring any frames that we can't examine.
-      ste == null || ste.getMethodName == null || ste.getMethodName.contains("getStackTrace")
-    }
-
     // Keep crawling up the stack trace until we find the first function not inside of the spark
     // package. We track the last (shallowest) contiguous Spark method. This might be an RDD
     // transformation, a SparkContext function (such as parallelize), or anything else that leads
@@ -947,26 +1129,33 @@ private[spark] object Utils extends Logging {
     var insideSpark = true
     var callStack = new ArrayBuffer[String]() :+ "<unknown>"
 
-    for (el <- trace) {
-      if (insideSpark) {
-        if (skipClass(el.getClassName)) {
-          lastSparkMethod = if (el.getMethodName == "<init>") {
-            // Spark method is a constructor; get its class name
-            el.getClassName.substring(el.getClassName.lastIndexOf('.') + 1)
+    Thread.currentThread.getStackTrace().foreach { ste: StackTraceElement =>
+      // When running under some profilers, the current stack trace might contain some bogus
+      // frames. This is intended to ensure that we don't crash in these situations by
+      // ignoring any frames that we can't examine.
+      if (ste != null && ste.getMethodName != null
+        && !ste.getMethodName.contains("getStackTrace")) {
+        if (insideSpark) {
+          if (skipClass(ste.getClassName)) {
+            lastSparkMethod = if (ste.getMethodName == "<init>") {
+              // Spark method is a constructor; get its class name
+              ste.getClassName.substring(ste.getClassName.lastIndexOf('.') + 1)
+            } else {
+              ste.getMethodName
+            }
+            callStack(0) = ste.toString // Put last Spark method on top of the stack trace.
           } else {
-            el.getMethodName
+            firstUserLine = ste.getLineNumber
+            firstUserFile = ste.getFileName
+            callStack += ste.toString
+            insideSpark = false
           }
-          callStack(0) = el.toString // Put last Spark method on top of the stack trace.
         } else {
-          firstUserLine = el.getLineNumber
-          firstUserFile = el.getFileName
-          callStack += el.toString
-          insideSpark = false
+          callStack += ste.toString
         }
-      } else {
-        callStack += el.toString
       }
     }
+
     val callStackDepth = System.getProperty("spark.callstack.depth", "20").toInt
     CallSite(
       shortForm = s"$lastSparkMethod at $firstUserFile:$firstUserLine",
@@ -983,8 +1172,8 @@ private[spark] object Utils extends Logging {
     val stream = new FileInputStream(file)
 
     try {
-      stream.skip(effectiveStart)
-      stream.read(buff)
+      ByteStreams.skipFully(stream, effectiveStart)
+      ByteStreams.readFully(stream, buff)
     } finally {
       stream.close()
     }
@@ -1145,6 +1334,8 @@ private[spark] object Utils extends Logging {
   }
 
   // Handles idiosyncracies with hash (add more as required)
+  // This method should be kept in sync with
+  // org.apache.spark.network.util.JavaUtils#nonNegativeHash().
   def nonNegativeHash(obj: AnyRef): Int = {
 
     // Required ?
@@ -1158,9 +1349,14 @@ private[spark] object Utils extends Logging {
     hashAbs
   }
 
-  /** Returns a copy of the system properties that is thread-safe to iterator over. */
-  def getSystemProperties(): Map[String, String] = {
-    System.getProperties.clone().asInstanceOf[java.util.Properties].toMap[String, String]
+  /** Returns the system properties map that is thread-safe to iterator over. It gets the
+    * properties which have been set explicitly, as well as those for which only a default value
+    * has been defined. */
+  def getSystemProperties: Map[String, String] = {
+    val sysProps = for (key <- System.getProperties.stringPropertyNames()) yield
+      (key, System.getProperty(key))
+
+    sysProps.toMap
   }
 
   /**
@@ -1178,12 +1374,28 @@ private[spark] object Utils extends Logging {
   /**
    * Timing method based on iterations that permit JVM JIT optimization.
    * @param numIters number of iterations
-   * @param f function to be executed
+   * @param f function to be executed. If prepare is not None, the running time of each call to f
+   *          must be an order of magnitude longer than one millisecond for accurate timing.
+   * @param prepare function to be executed before each call to f. Its running time doesn't count.
+   * @return the total time across all iterations (not couting preparation time)
    */
-  def timeIt(numIters: Int)(f: => Unit): Long = {
-    val start = System.currentTimeMillis
-    times(numIters)(f)
-    System.currentTimeMillis - start
+  def timeIt(numIters: Int)(f: => Unit, prepare: Option[() => Unit] = None): Long = {
+    if (prepare.isEmpty) {
+      val start = System.currentTimeMillis
+      times(numIters)(f)
+      System.currentTimeMillis - start
+    } else {
+      var i = 0
+      var sum = 0L
+      while (i < numIters) {
+        prepare.get.apply()
+        val start = System.currentTimeMillis
+        f
+        sum += System.currentTimeMillis - start
+        i += 1
+      }
+      sum
+    }
   }
 
   /**
@@ -1272,6 +1484,11 @@ private[spark] object Utils extends Logging {
    */
   val isWindows = SystemUtils.IS_OS_WINDOWS
 
+  /**
+   * Whether the underlying operating system is Mac OS X.
+   */
+  val isMac = SystemUtils.IS_OS_MAC_OSX
+
   /**
    * Pattern for matching a Windows drive, which contains only a single alphabet character.
    */
@@ -1479,19 +1696,31 @@ private[spark] object Utils extends Logging {
       .orNull
   }
 
-  /** Return a nice string representation of the exception, including the stack trace. */
-  def exceptionString(e: Exception): String = {
-    if (e == null) "" else exceptionString(getFormattedClassName(e), e.getMessage, e.getStackTrace)
+  /**
+   * Return a nice string representation of the exception. It will call "printStackTrace" to
+   * recursively generate the stack trace including the exception and its causes.
+   */
+  def exceptionString(e: Throwable): String = {
+    if (e == null) {
+      ""
+    } else {
+      // Use e.printStackTrace here because e.getStackTrace doesn't include the cause
+      val stringWriter = new StringWriter()
+      e.printStackTrace(new PrintWriter(stringWriter))
+      stringWriter.toString
+    }
   }
 
-  /** Return a nice string representation of the exception, including the stack trace. */
-  def exceptionString(
-      className: String,
-      description: String,
-      stackTrace: Array[StackTraceElement]): String = {
-    val desc = if (description == null) "" else description
-    val st = if (stackTrace == null) "" else stackTrace.map("        " + _).mkString("\n")
-    s"$className: $desc\n$st"
+  /** Return a thread dump of all threads' stacktraces.  Used to capture dumps for the web UI */
+  def getThreadDump(): Array[ThreadStackTrace] = {
+    // We need to filter out null values here because dumpAllThreads() may return null array
+    // elements for threads that are dead / don't exist.
+    val threadInfos = ManagementFactory.getThreadMXBean.dumpAllThreads(true, true).filter(_ != null)
+    threadInfos.sortBy(_.getThreadId).map { case threadInfo =>
+      val stackTrace = threadInfo.getStackTrace.map(_.toString).mkString("\n")
+      ThreadStackTrace(threadInfo.getThreadId, threadInfo.getThreadName,
+        threadInfo.getThreadState, stackTrace)
+    }
   }
 
   /**
@@ -1504,17 +1733,15 @@ private[spark] object Utils extends Logging {
   }
 
   /**
-   * Default maximum number of retries when binding to a port before giving up.
+   * Maximum number of retries when binding to a port before giving up.
    */
-  val portMaxRetries: Int = {
-    if (sys.props.contains("spark.testing")) {
+  def portMaxRetries(conf: SparkConf): Int = {
+    val maxRetries = conf.getOption("spark.port.maxRetries").map(_.toInt)
+    if (conf.contains("spark.testing")) {
       // Set a higher number of retries for tests...
-      sys.props.get("spark.port.maxRetries").map(_.toInt).getOrElse(100)
+      maxRetries.getOrElse(100)
     } else {
-      Option(SparkEnv.get)
-        .flatMap(_.conf.getOption("spark.port.maxRetries"))
-        .map(_.toInt)
-        .getOrElse(16)
+      maxRetries.getOrElse(16)
     }
   }
 
@@ -1523,17 +1750,18 @@ private[spark] object Utils extends Logging {
    * Each subsequent attempt uses 1 + the port used in the previous attempt (unless the port is 0).
    *
    * @param startPort The initial port to start the service on.
-   * @param maxRetries Maximum number of retries to attempt.
-   *                   A value of 3 means attempting ports n, n+1, n+2, and n+3, for example.
    * @param startService Function to start service on a given port.
    *                     This is expected to throw java.net.BindException on port collision.
+   * @param conf A SparkConf used to get the maximum number of retries when binding to a port.
+   * @param serviceName Name of the service.
    */
   def startServiceOnPort[T](
       startPort: Int,
       startService: Int => (T, Int),
-      serviceName: String = "",
-      maxRetries: Int = portMaxRetries): (T, Int) = {
+      conf: SparkConf,
+      serviceName: String = ""): (T, Int) = {
     val serviceString = if (serviceName.isEmpty) "" else s" '$serviceName'"
+    val maxRetries = portMaxRetries(conf)
     for (offset <- 0 to maxRetries) {
       // Do not increment port if startPort is 0, which is treated as a special port
       val tryPort = if (startPort == 0) {
@@ -1570,7 +1798,7 @@ private[spark] object Utils extends Logging {
   def isBindCollision(exception: Throwable): Boolean = {
     exception match {
       case e: BindException =>
-        if (e.getMessage != null && e.getMessage.contains("Address already in use")) {
+        if (e.getMessage != null) {
           return true
         }
         isBindCollision(e.getCause)
@@ -1594,24 +1822,140 @@ private[spark] object Utils extends Logging {
     PropertyConfigurator.configure(pro)
   }
 
+  /**
+   * If the given URL connection is HttpsURLConnection, it sets the SSL socket factory and
+   * the host verifier from the given security manager.
+   */
+  def setupSecureURLConnection(urlConnection: URLConnection, sm: SecurityManager): URLConnection = {
+    urlConnection match {
+      case https: HttpsURLConnection =>
+        sm.sslSocketFactory.foreach(https.setSSLSocketFactory)
+        sm.hostnameVerifier.foreach(https.setHostnameVerifier)
+        https
+      case connection => connection
+    }
+  }
+
+  def invoke(
+      clazz: Class[_],
+      obj: AnyRef,
+      methodName: String,
+      args: (Class[_], AnyRef)*): AnyRef = {
+    val (types, values) = args.unzip
+    val method = clazz.getDeclaredMethod(methodName, types: _*)
+    method.setAccessible(true)
+    method.invoke(obj, values.toSeq: _*)
+  }
+
+  // Limit of bytes for total size of results (default is 1GB)
+  def getMaxResultSize(conf: SparkConf): Long = {
+    memoryStringToMb(conf.get("spark.driver.maxResultSize", "1g")).toLong << 20
+  }
+
+  /**
+   * Return the current system LD_LIBRARY_PATH name
+   */
+  def libraryPathEnvName: String = {
+    if (isWindows) {
+      "PATH"
+    } else if (isMac) {
+      "DYLD_LIBRARY_PATH"
+    } else {
+      "LD_LIBRARY_PATH"
+    }
+  }
+
+  /**
+   * Return the prefix of a command that appends the given library paths to the
+   * system-specific library path environment variable. On Unix, for instance,
+   * this returns the string LD_LIBRARY_PATH="path1:path2:$LD_LIBRARY_PATH".
+   */
+  def libraryPathEnvPrefix(libraryPaths: Seq[String]): String = {
+    val libraryPathScriptVar = if (isWindows) {
+      s"%${libraryPathEnvName}%"
+    } else {
+      "$" + libraryPathEnvName
+    }
+    val libraryPath = (libraryPaths :+ libraryPathScriptVar).mkString("\"",
+      File.pathSeparator, "\"")
+    val ampersand = if (Utils.isWindows) {
+      " &"
+    } else {
+      ""
+    }
+    s"$libraryPathEnvName=$libraryPath$ampersand"
+  }
+
+  /**
+   * Return the value of a config either through the SparkConf or the Hadoop configuration
+   * if this is Yarn mode. In the latter case, this defaults to the value set through SparkConf
+   * if the key is not set in the Hadoop configuration.
+   */
+  def getSparkOrYarnConfig(conf: SparkConf, key: String, default: String): String = {
+    val sparkValue = conf.get(key, default)
+    if (SparkHadoopUtil.get.isYarnMode) {
+      SparkHadoopUtil.get.newConfiguration(conf).get(key, sparkValue)
+    } else {
+      sparkValue
+    }
+  }
+
+  /**
+   * Return a pair of host and port extracted from the `sparkUrl`.
+   *
+   * A spark url (`spark://host:port`) is a special URI that its scheme is `spark` and only contains
+   * host and port.
+   *
+   * @throws SparkException if `sparkUrl` is invalid.
+   */
+  def extractHostPortFromSparkUrl(sparkUrl: String): (String, Int) = {
+    try {
+      val uri = new java.net.URI(sparkUrl)
+      val host = uri.getHost
+      val port = uri.getPort
+      if (uri.getScheme != "spark" ||
+        host == null ||
+        port < 0 ||
+        (uri.getPath != null && !uri.getPath.isEmpty) || // uri.getPath returns "" instead of null
+        uri.getFragment != null ||
+        uri.getQuery != null ||
+        uri.getUserInfo != null) {
+        throw new SparkException("Invalid master URL: " + sparkUrl)
+      }
+      (host, port)
+    } catch {
+      case e: java.net.URISyntaxException =>
+        throw new SparkException("Invalid master URL: " + sparkUrl, e)
+    }
+  }
 }
 
 /**
  * A utility class to redirect the child process's stdout or stderr.
  */
-private[spark] class RedirectThread(in: InputStream, out: OutputStream, name: String)
+private[spark] class RedirectThread(
+    in: InputStream,
+    out: OutputStream,
+    name: String,
+    propagateEof: Boolean = false)
   extends Thread(name) {
 
   setDaemon(true)
   override def run() {
     scala.util.control.Exception.ignoring(classOf[IOException]) {
       // FIXME: We copy the stream on the level of bytes to avoid encoding problems.
-      val buf = new Array[Byte](1024)
-      var len = in.read(buf)
-      while (len != -1) {
-        out.write(buf, 0, len)
-        out.flush()
-        len = in.read(buf)
+      try {
+        val buf = new Array[Byte](1024)
+        var len = in.read(buf)
+        while (len != -1) {
+          out.write(buf, 0, len)
+          out.flush()
+          len = in.read(buf)
+        }
+      } finally {
+        if (propagateEof) {
+          out.close()
+        }
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/util/Vector.scala b/core/src/main/scala/org/apache/spark/util/Vector.scala
index c6cab82c3e546..2ed827eab46df 100644
--- a/core/src/main/scala/org/apache/spark/util/Vector.scala
+++ b/core/src/main/scala/org/apache/spark/util/Vector.scala
@@ -24,9 +24,9 @@ import org.apache.spark.util.random.XORShiftRandom
 
 @deprecated("Use Vectors.dense from Spark's mllib.linalg package instead.", "1.0.0")
 class Vector(val elements: Array[Double]) extends Serializable {
-  def length = elements.length
+  def length: Int = elements.length
 
-  def apply(index: Int) = elements(index)
+  def apply(index: Int): Double = elements(index)
 
   def + (other: Vector): Vector = {
     if (length != other.length) {
@@ -35,7 +35,7 @@ class Vector(val elements: Array[Double]) extends Serializable {
     Vector(length, i => this(i) + other(i))
   }
 
-  def add(other: Vector) = this + other
+  def add(other: Vector): Vector = this + other
 
   def - (other: Vector): Vector = {
     if (length != other.length) {
@@ -44,7 +44,7 @@ class Vector(val elements: Array[Double]) extends Serializable {
     Vector(length, i => this(i) - other(i))
   }
 
-  def subtract(other: Vector) = this - other
+  def subtract(other: Vector): Vector = this - other
 
   def dot(other: Vector): Double = {
     if (length != other.length) {
@@ -93,19 +93,19 @@ class Vector(val elements: Array[Double]) extends Serializable {
     this
   }
 
-  def addInPlace(other: Vector) = this +=other
+  def addInPlace(other: Vector): Vector = this +=other
 
   def * (scale: Double): Vector = Vector(length, i => this(i) * scale)
 
-  def multiply (d: Double) = this * d
+  def multiply (d: Double): Vector = this * d
 
   def / (d: Double): Vector = this * (1 / d)
 
-  def divide (d: Double) = this / d
+  def divide (d: Double): Vector = this / d
 
-  def unary_- = this * -1
+  def unary_- : Vector = this * -1
 
-  def sum = elements.reduceLeft(_ + _)
+  def sum: Double = elements.reduceLeft(_ + _)
 
   def squaredDist(other: Vector): Double = {
     var ans = 0.0
@@ -119,40 +119,40 @@ class Vector(val elements: Array[Double]) extends Serializable {
 
   def dist(other: Vector): Double = math.sqrt(squaredDist(other))
 
-  override def toString = elements.mkString("(", ", ", ")")
+  override def toString: String = elements.mkString("(", ", ", ")")
 }
 
 object Vector {
-  def apply(elements: Array[Double]) = new Vector(elements)
+  def apply(elements: Array[Double]): Vector = new Vector(elements)
 
-  def apply(elements: Double*) = new Vector(elements.toArray)
+  def apply(elements: Double*): Vector = new Vector(elements.toArray)
 
   def apply(length: Int, initializer: Int => Double): Vector = {
     val elements: Array[Double] = Array.tabulate(length)(initializer)
     new Vector(elements)
   }
 
-  def zeros(length: Int) = new Vector(new Array[Double](length))
+  def zeros(length: Int): Vector = new Vector(new Array[Double](length))
 
-  def ones(length: Int) = Vector(length, _ => 1)
+  def ones(length: Int): Vector = Vector(length, _ => 1)
 
   /**
    * Creates this [[org.apache.spark.util.Vector]] of given length containing random numbers
    * between 0.0 and 1.0. Optional scala.util.Random number generator can be provided.
    */
-  def random(length: Int, random: Random = new XORShiftRandom()) =
+  def random(length: Int, random: Random = new XORShiftRandom()): Vector =
     Vector(length, _ => random.nextDouble())
 
   class Multiplier(num: Double) {
-    def * (vec: Vector) = vec * num
+    def * (vec: Vector): Vector = vec * num
   }
 
-  implicit def doubleToMultiplier(num: Double) = new Multiplier(num)
+  implicit def doubleToMultiplier(num: Double): Multiplier = new Multiplier(num)
 
   implicit object VectorAccumParam extends org.apache.spark.AccumulatorParam[Vector] {
-    def addInPlace(t1: Vector, t2: Vector) = t1 + t2
+    def addInPlace(t1: Vector, t2: Vector): Vector = t1 + t2
 
-    def zero(initialValue: Vector) = Vector.zeros(initialValue.length)
+    def zero(initialValue: Vector): Vector = Vector.zeros(initialValue.length)
   }
 
 }
diff --git a/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala b/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala
index d44e15e3c97ea..4d43d8d5cc8d8 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.util.collection
 
+import scala.reflect.ClassTag
+
 /**
  * An append-only buffer similar to ArrayBuffer, but more memory-efficient for small buffers.
  * ArrayBuffer always allocates an Object array to store the data, with 16 entries by default,
@@ -25,7 +27,7 @@ package org.apache.spark.util.collection
  * entries than that. This makes it more efficient for operations like groupBy where we expect
  * some keys to have very few elements.
  */
-private[spark] class CompactBuffer[T] extends Seq[T] with Serializable {
+private[spark] class CompactBuffer[T: ClassTag] extends Seq[T] with Serializable {
   // First two elements
   private var element0: T = _
   private var element1: T = _
@@ -34,7 +36,7 @@ private[spark] class CompactBuffer[T] extends Seq[T] with Serializable {
   private var curSize = 0
 
   // Array for extra elements
-  private var otherElements: Array[AnyRef] = null
+  private var otherElements: Array[T] = null
 
   def apply(position: Int): T = {
     if (position < 0 || position >= curSize) {
@@ -45,7 +47,7 @@ private[spark] class CompactBuffer[T] extends Seq[T] with Serializable {
     } else if (position == 1) {
       element1
     } else {
-      otherElements(position - 2).asInstanceOf[T]
+      otherElements(position - 2)
     }
   }
 
@@ -58,7 +60,7 @@ private[spark] class CompactBuffer[T] extends Seq[T] with Serializable {
     } else if (position == 1) {
       element1 = value
     } else {
-      otherElements(position - 2) = value.asInstanceOf[AnyRef]
+      otherElements(position - 2) = value
     }
   }
 
@@ -72,7 +74,7 @@ private[spark] class CompactBuffer[T] extends Seq[T] with Serializable {
       curSize = 2
     } else {
       growToSize(curSize + 1)
-      otherElements(newIndex - 2) = value.asInstanceOf[AnyRef]
+      otherElements(newIndex - 2) = value
     }
     this
   }
@@ -139,7 +141,7 @@ private[spark] class CompactBuffer[T] extends Seq[T] with Serializable {
           newArrayLen = Int.MaxValue - 2
         }
       }
-      val newArray = new Array[AnyRef](newArrayLen)
+      val newArray = new Array[T](newArrayLen)
       if (otherElements != null) {
         System.arraycopy(otherElements, 0, newArray, 0, otherElements.length)
       }
@@ -150,9 +152,9 @@ private[spark] class CompactBuffer[T] extends Seq[T] with Serializable {
 }
 
 private[spark] object CompactBuffer {
-  def apply[T](): CompactBuffer[T] = new CompactBuffer[T]
+  def apply[T: ClassTag](): CompactBuffer[T] = new CompactBuffer[T]
 
-  def apply[T](value: T): CompactBuffer[T] = {
+  def apply[T: ClassTag](value: T): CompactBuffer[T] = {
     val buf = new CompactBuffer[T]
     buf += value
   }
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
index 26fa0cb6d7bde..8a0f5a602de12 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -76,10 +76,6 @@ class ExternalAppendOnlyMap[K, V, C](
   private val sparkConf = SparkEnv.get.conf
   private val diskBlockManager = blockManager.diskBlockManager
 
-  // Number of pairs inserted since last spill; note that we count them even if a value is merged
-  // with a previous key in case we're doing something like groupBy where the result grows
-  protected[this] var elementsRead = 0L
-
   /**
    * Size of object batches when reading/writing from serializers.
    *
@@ -132,7 +128,7 @@ class ExternalAppendOnlyMap[K, V, C](
         currentMap = new SizeTrackingAppendOnlyMap[K, C]
       }
       currentMap.changeValue(curEntry._1, update)
-      elementsRead += 1
+      addElementsRead()
     }
   }
 
@@ -209,8 +205,6 @@ class ExternalAppendOnlyMap[K, V, C](
     }
 
     spilledMaps.append(new DiskMapIterator(file, blockId, batchSizes))
-
-    elementsRead = 0
   }
 
   def diskBytesSpilled: Long = _diskBytesSpilled
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
index c1ce13683b569..6ba03841f746b 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
@@ -119,10 +119,6 @@ private[spark] class ExternalSorter[K, V, C](
   private var map = new SizeTrackingAppendOnlyMap[(Int, K), C]
   private var buffer = new SizeTrackingPairBuffer[(Int, K), C]
 
-  // Number of pairs read from input since last spill; note that we count them even if a value is
-  // merged with a previous key in case we're doing something like groupBy where the result grows
-  protected[this] var elementsRead = 0L
-
   // Total spilling statistics
   private var _diskBytesSpilled = 0L
 
@@ -204,15 +200,22 @@ private[spark] class ExternalSorter[K, V, C](
         if (hadValue) mergeValue(oldValue, kv._2) else createCombiner(kv._2)
       }
       while (records.hasNext) {
-        elementsRead += 1
+        addElementsRead()
         kv = records.next()
         map.changeValue((getPartition(kv._1), kv._1), update)
         maybeSpillCollection(usingMap = true)
       }
+    } else if (bypassMergeSort) {
+      // SPARK-4479: Also bypass buffering if merge sort is bypassed to avoid defensive copies
+      if (records.hasNext) {
+        spillToPartitionFiles(records.map { kv =>
+          ((getPartition(kv._1), kv._1), kv._2.asInstanceOf[C])
+        })
+      }
     } else {
       // Stick values into our buffer
       while (records.hasNext) {
-        elementsRead += 1
+        addElementsRead()
         val kv = records.next()
         buffer.insert((getPartition(kv._1), kv._1), kv._2.asInstanceOf[C])
         maybeSpillCollection(usingMap = false)
@@ -340,6 +343,10 @@ private[spark] class ExternalSorter[K, V, C](
    * @param collection whichever collection we're using (map or buffer)
    */
   private def spillToPartitionFiles(collection: SizeTrackingPairCollection[(Int, K), C]): Unit = {
+    spillToPartitionFiles(collection.iterator)
+  }
+
+  private def spillToPartitionFiles(iterator: Iterator[((Int, K), C)]): Unit = {
     assert(bypassMergeSort)
 
     // Create our file writers if we haven't done so yet
@@ -354,9 +361,9 @@ private[spark] class ExternalSorter[K, V, C](
       }
     }
 
-    val it = collection.iterator  // No need to sort stuff, just write each element out
-    while (it.hasNext) {
-      val elem = it.next()
+    // No need to sort stuff, just write each element out
+    while (iterator.hasNext) {
+      val elem = iterator.next()
       val partitionId = elem._1._1
       val key = elem._1._2
       val value = elem._2
@@ -750,8 +757,14 @@ private[spark] class ExternalSorter[K, V, C](
       }
     }
 
-    context.taskMetrics.memoryBytesSpilled += memoryBytesSpilled
-    context.taskMetrics.diskBytesSpilled += diskBytesSpilled
+    context.taskMetrics.incMemoryBytesSpilled(memoryBytesSpilled)
+    context.taskMetrics.incDiskBytesSpilled(diskBytesSpilled)
+    context.taskMetrics.shuffleWriteMetrics.filter(_ => bypassMergeSort).foreach { m =>
+      if (curWriteMetrics != null) {
+        m.incShuffleBytesWritten(curWriteMetrics.shuffleBytesWritten)
+        m.incShuffleWriteTime(curWriteMetrics.shuffleWriteTime)
+      }
+    }
 
     lengths
   }
diff --git a/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala b/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala
index ac1528969f0be..4f0bf8384afc9 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala
@@ -27,33 +27,51 @@ import scala.reflect.ClassTag
  * Example format: an array of numbers, where each element is also the key.
  * See [[KVArraySortDataFormat]] for a more exciting format.
  *
- * This trait extends Any to ensure it is universal (and thus compiled to a Java interface).
+ * Note: Declaring and instantiating multiple subclasses of this class would prevent JIT inlining
+ * overridden methods and hence decrease the shuffle performance.
  *
  * @tparam K Type of the sort key of each element
  * @tparam Buffer Internal data structure used by a particular format (e.g., Array[Int]).
  */
 // TODO: Making Buffer a real trait would be a better abstraction, but adds some complexity.
-private[spark] trait SortDataFormat[K, Buffer] extends Any {
+private[spark]
+abstract class SortDataFormat[K, Buffer] {
+
+  /**
+   * Creates a new mutable key for reuse. This should be implemented if you want to override
+   * [[getKey(Buffer, Int, K)]].
+   */
+  def newKey(): K = null.asInstanceOf[K]
+
   /** Return the sort key for the element at the given index. */
   protected def getKey(data: Buffer, pos: Int): K
 
+  /**
+   * Returns the sort key for the element at the given index and reuse the input key if possible.
+   * The default implementation ignores the reuse parameter and invokes [[getKey(Buffer, Int]].
+   * If you want to override this method, you must implement [[newKey()]].
+   */
+  def getKey(data: Buffer, pos: Int, reuse: K): K = {
+    getKey(data, pos)
+  }
+
   /** Swap two elements. */
-  protected def swap(data: Buffer, pos0: Int, pos1: Int): Unit
+  def swap(data: Buffer, pos0: Int, pos1: Int): Unit
 
   /** Copy a single element from src(srcPos) to dst(dstPos). */
-  protected def copyElement(src: Buffer, srcPos: Int, dst: Buffer, dstPos: Int): Unit
+  def copyElement(src: Buffer, srcPos: Int, dst: Buffer, dstPos: Int): Unit
 
   /**
    * Copy a range of elements starting at src(srcPos) to dst, starting at dstPos.
    * Overlapping ranges are allowed.
    */
-  protected def copyRange(src: Buffer, srcPos: Int, dst: Buffer, dstPos: Int, length: Int): Unit
+  def copyRange(src: Buffer, srcPos: Int, dst: Buffer, dstPos: Int, length: Int): Unit
 
   /**
    * Allocates a Buffer that can hold up to 'length' elements.
    * All elements of the buffer should be considered invalid until data is explicitly copied in.
    */
-  protected def allocate(length: Int): Buffer
+  def allocate(length: Int): Buffer
 }
 
 /**
@@ -67,9 +85,9 @@ private[spark] trait SortDataFormat[K, Buffer] extends Any {
 private[spark]
 class KVArraySortDataFormat[K, T <: AnyRef : ClassTag] extends SortDataFormat[K, Array[T]] {
 
-  override protected def getKey(data: Array[T], pos: Int): K = data(2 * pos).asInstanceOf[K]
+  override def getKey(data: Array[T], pos: Int): K = data(2 * pos).asInstanceOf[K]
 
-  override protected def swap(data: Array[T], pos0: Int, pos1: Int) {
+  override def swap(data: Array[T], pos0: Int, pos1: Int) {
     val tmpKey = data(2 * pos0)
     val tmpVal = data(2 * pos0 + 1)
     data(2 * pos0)     = data(2 * pos1)
@@ -78,17 +96,16 @@ class KVArraySortDataFormat[K, T <: AnyRef : ClassTag] extends SortDataFormat[K,
     data(2 * pos1 + 1) = tmpVal
   }
 
-  override protected def copyElement(src: Array[T], srcPos: Int, dst: Array[T], dstPos: Int) {
+  override def copyElement(src: Array[T], srcPos: Int, dst: Array[T], dstPos: Int) {
     dst(2 * dstPos) = src(2 * srcPos)
     dst(2 * dstPos + 1) = src(2 * srcPos + 1)
   }
 
-  override protected def copyRange(src: Array[T], srcPos: Int,
-                                   dst: Array[T], dstPos: Int, length: Int) {
+  override def copyRange(src: Array[T], srcPos: Int, dst: Array[T], dstPos: Int, length: Int) {
     System.arraycopy(src, 2 * srcPos, dst, 2 * dstPos, 2 * length)
   }
 
-  override protected def allocate(length: Int): Array[T] = {
+  override def allocate(length: Int): Array[T] = {
     new Array[T](2 * length)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/collection/Sorter.scala b/core/src/main/scala/org/apache/spark/util/collection/Sorter.scala
new file mode 100644
index 0000000000000..39f66b8c428c6
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/collection/Sorter.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import java.util.Comparator
+
+/**
+ * A simple wrapper over the Java implementation [[TimSort]].
+ *
+ * The Java implementation is package private, and hence it cannot be called outside package
+ * org.apache.spark.util.collection. This is a simple wrapper of it that is available to spark.
+ */
+private[spark]
+class Sorter[K, Buffer](private val s: SortDataFormat[K, Buffer]) {
+
+  private val timSort = new TimSort(s)
+
+  /**
+   * Sorts the input buffer within range [lo, hi).
+   */
+  def sort(a: Buffer, lo: Int, hi: Int, c: Comparator[_ >: K]): Unit = {
+    timSort.sort(a, lo, hi, c)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala b/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
index d7dccd4af8c6e..9f54312074856 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
@@ -24,10 +24,7 @@ import org.apache.spark.SparkEnv
  * Spills contents of an in-memory collection to disk when the memory threshold
  * has been exceeded.
  */
-private[spark] trait Spillable[C] {
-
-  this: Logging =>
-
+private[spark] trait Spillable[C] extends Logging {
   /**
    * Spills the current in-memory collection to disk, and releases the memory.
    *
@@ -36,16 +33,29 @@ private[spark] trait Spillable[C] {
   protected def spill(collection: C): Unit
 
   // Number of elements read from input since last spill
-  protected var elementsRead: Long
+  protected def elementsRead: Long = _elementsRead
+
+  // Called by subclasses every time a record is read
+  // It's used for checking spilling frequency
+  protected def addElementsRead(): Unit = { _elementsRead += 1 }
 
   // Memory manager that can be used to acquire/release memory
   private[this] val shuffleMemoryManager = SparkEnv.get.shuffleMemoryManager
 
-  // What threshold of elementsRead we start estimating collection size at
+  // Threshold for `elementsRead` before we start tracking this collection's memory usage
   private[this] val trackMemoryThreshold = 1000
 
-  // How much of the shared memory pool this collection has claimed
-  private[this] var myMemoryThreshold = 0L
+  // Initial threshold for the size of a collection before we start tracking its memory usage
+  // Exposed for testing
+  private[this] val initialMemoryThreshold: Long =
+    SparkEnv.get.conf.getLong("spark.shuffle.spill.initialMemoryThreshold", 5 * 1024 * 1024)
+
+  // Threshold for this collection's size in bytes before we start tracking its memory usage
+  // To avoid a large number of small spills, initialize this to a value orders of magnitude > 0
+  private[this] var myMemoryThreshold = initialMemoryThreshold
+
+  // Number of elements read from input since last spill
+  private[this] var _elementsRead = 0L
 
   // Number of bytes spilled in total
   private[this] var _memoryBytesSpilled = 0L
@@ -76,6 +86,7 @@ private[spark] trait Spillable[C] {
 
         spill(collection)
 
+        _elementsRead = 0
         // Keep track of spills, and release memory
         _memoryBytesSpilled += currentMemory
         releaseMemoryForThisThread()
@@ -94,8 +105,9 @@ private[spark] trait Spillable[C] {
    * Release our memory back to the shuffle pool so that other threads can grab it.
    */
   private def releaseMemoryForThisThread(): Unit = {
-    shuffleMemoryManager.release(myMemoryThreshold)
-    myMemoryThreshold = 0L
+    // The amount we requested does not include the initial memory tracking threshold
+    shuffleMemoryManager.release(myMemoryThreshold - initialMemoryThreshold)
+    myMemoryThreshold = initialMemoryThreshold
   }
 
   /**
@@ -105,7 +117,8 @@ private[spark] trait Spillable[C] {
    */
   @inline private def logSpillage(size: Long) {
     val threadId = Thread.currentThread().getId
-    logInfo("Thread %d spilling in-memory map of %d MB to disk (%d time%s so far)"
-        .format(threadId, size / (1024 * 1024), _spillCount, if (_spillCount > 1) "s" else ""))
+    logInfo("Thread %d spilling in-memory map of %s to disk (%d time%s so far)"
+      .format(threadId, org.apache.spark.util.Utils.bytesToString(size),
+        _spillCount, if (_spillCount > 1) "s" else ""))
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
index 32c5fdad75e58..76e7a2760bcd1 100644
--- a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
@@ -19,8 +19,10 @@ package org.apache.spark.util.random
 
 import java.util.Random
 
-import cern.jet.random.Poisson
-import cern.jet.random.engine.DRand
+import scala.reflect.ClassTag
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.commons.math3.distribution.PoissonDistribution
 
 import org.apache.spark.annotation.DeveloperApi
 
@@ -39,13 +41,47 @@ trait RandomSampler[T, U] extends Pseudorandom with Cloneable with Serializable
   /** take a random sample */
   def sample(items: Iterator[T]): Iterator[U]
 
+  /** return a copy of the RandomSampler object */
   override def clone: RandomSampler[T, U] =
     throw new NotImplementedError("clone() is not implemented.")
 }
 
+private[spark]
+object RandomSampler {
+  /** Default random number generator used by random samplers. */
+  def newDefaultRNG: Random = new XORShiftRandom
+
+  /**
+   * Default maximum gap-sampling fraction.
+   * For sampling fractions <= this value, the gap sampling optimization will be applied.
+   * Above this value, it is assumed that "tradtional" Bernoulli sampling is faster.  The
+   * optimal value for this will depend on the RNG.  More expensive RNGs will tend to make
+   * the optimal value higher.  The most reliable way to determine this value for a new RNG
+   * is to experiment.  When tuning for a new RNG, I would expect a value of 0.5 to be close
+   * in most cases, as an initial guess.
+   */
+  val defaultMaxGapSamplingFraction = 0.4
+
+  /**
+   * Default epsilon for floating point numbers sampled from the RNG.
+   * The gap-sampling compute logic requires taking log(x), where x is sampled from an RNG.
+   * To guard against errors from taking log(0), a positive epsilon lower bound is applied.
+   * A good value for this parameter is at or near the minimum positive floating
+   * point value returned by "nextDouble()" (or equivalent), for the RNG being used.
+   */
+  val rngEpsilon = 5e-11
+
+  /**
+   * Sampling fraction arguments may be results of computation, and subject to floating
+   * point jitter.  I check the arguments with this epsilon slop factor to prevent spurious
+   * warnings for cases such as summing some numbers to get a sampling fraction of 1.000000001
+   */
+  val roundingEpsilon = 1e-6
+}
+
 /**
  * :: DeveloperApi ::
- * A sampler based on Bernoulli trials.
+ * A sampler based on Bernoulli trials for partitioning a data sequence.
  *
  * @param lb lower bound of the acceptance range
  * @param ub upper bound of the acceptance range
@@ -53,56 +89,262 @@ trait RandomSampler[T, U] extends Pseudorandom with Cloneable with Serializable
  * @tparam T item type
  */
 @DeveloperApi
-class BernoulliSampler[T](lb: Double, ub: Double, complement: Boolean = false)
+class BernoulliCellSampler[T](lb: Double, ub: Double, complement: Boolean = false)
   extends RandomSampler[T, T] {
 
-  private[random] var rng: Random = new XORShiftRandom
+  /** epsilon slop to avoid failure from floating point jitter. */
+  require(
+    lb <= (ub + RandomSampler.roundingEpsilon),
+    s"Lower bound ($lb) must be <= upper bound ($ub)")
+  require(
+    lb >= (0.0 - RandomSampler.roundingEpsilon),
+    s"Lower bound ($lb) must be >= 0.0")
+  require(
+    ub <= (1.0 + RandomSampler.roundingEpsilon),
+    s"Upper bound ($ub) must be <= 1.0")
 
-  def this(ratio: Double) = this(0.0d, ratio)
+  private val rng: Random = new XORShiftRandom
 
   override def setSeed(seed: Long) = rng.setSeed(seed)
 
   override def sample(items: Iterator[T]): Iterator[T] = {
-    items.filter { item =>
-      val x = rng.nextDouble()
-      (x >= lb && x < ub) ^ complement
+    if (ub - lb <= 0.0) {
+      if (complement) items else Iterator.empty
+    } else {
+      if (complement) {
+        items.filter { item => {
+          val x = rng.nextDouble()
+          (x < lb) || (x >= ub)
+        }}
+      } else {
+        items.filter { item => {
+          val x = rng.nextDouble()
+          (x >= lb) && (x < ub)
+        }}
+      }
     }
   }
 
   /**
    *  Return a sampler that is the complement of the range specified of the current sampler.
    */
-  def cloneComplement(): BernoulliSampler[T] = new BernoulliSampler[T](lb, ub, !complement)
+  def cloneComplement(): BernoulliCellSampler[T] =
+    new BernoulliCellSampler[T](lb, ub, !complement)
+
+  override def clone = new BernoulliCellSampler[T](lb, ub, complement)
+}
+
+
+/**
+ * :: DeveloperApi ::
+ * A sampler based on Bernoulli trials.
+ *
+ * @param fraction the sampling fraction, aka Bernoulli sampling probability
+ * @tparam T item type
+ */
+@DeveloperApi
+class BernoulliSampler[T: ClassTag](fraction: Double) extends RandomSampler[T, T] {
+
+  /** epsilon slop to avoid failure from floating point jitter */
+  require(
+    fraction >= (0.0 - RandomSampler.roundingEpsilon)
+      && fraction <= (1.0 + RandomSampler.roundingEpsilon),
+    s"Sampling fraction ($fraction) must be on interval [0, 1]")
 
-  override def clone = new BernoulliSampler[T](lb, ub, complement)
+  private val rng: Random = RandomSampler.newDefaultRNG
+
+  override def setSeed(seed: Long) = rng.setSeed(seed)
+
+  override def sample(items: Iterator[T]): Iterator[T] = {
+    if (fraction <= 0.0) {
+      Iterator.empty
+    } else if (fraction >= 1.0) {
+      items
+    } else if (fraction <= RandomSampler.defaultMaxGapSamplingFraction) {
+      new GapSamplingIterator(items, fraction, rng, RandomSampler.rngEpsilon)
+    } else {
+      items.filter { _ => rng.nextDouble() <= fraction }
+    }
+  }
+
+  override def clone = new BernoulliSampler[T](fraction)
 }
 
+
 /**
  * :: DeveloperApi ::
- * A sampler based on values drawn from Poisson distribution.
+ * A sampler for sampling with replacement, based on values drawn from Poisson distribution.
  *
- * @param mean Poisson mean
+ * @param fraction the sampling fraction (with replacement)
  * @tparam T item type
  */
 @DeveloperApi
-class PoissonSampler[T](mean: Double) extends RandomSampler[T, T] {
+class PoissonSampler[T: ClassTag](fraction: Double) extends RandomSampler[T, T] {
+
+  /** Epsilon slop to avoid failure from floating point jitter. */
+  require(
+    fraction >= (0.0 - RandomSampler.roundingEpsilon),
+    s"Sampling fraction ($fraction) must be >= 0")
 
-  private[random] var rng = new Poisson(mean, new DRand)
+  // PoissonDistribution throws an exception when fraction <= 0
+  // If fraction is <= 0, Iterator.empty is used below, so we can use any placeholder value.
+  private val rng = new PoissonDistribution(if (fraction > 0.0) fraction else 1.0)
+  private val rngGap = RandomSampler.newDefaultRNG
 
   override def setSeed(seed: Long) {
-    rng = new Poisson(mean, new DRand(seed.toInt))
+    rng.reseedRandomGenerator(seed)
+    rngGap.setSeed(seed)
   }
 
   override def sample(items: Iterator[T]): Iterator[T] = {
-    items.flatMap { item =>
-      val count = rng.nextInt()
-      if (count == 0) {
-        Iterator.empty
-      } else {
-        Iterator.fill(count)(item)
-      }
+    if (fraction <= 0.0) {
+      Iterator.empty
+    } else if (fraction <= RandomSampler.defaultMaxGapSamplingFraction) {
+        new GapSamplingReplacementIterator(items, fraction, rngGap, RandomSampler.rngEpsilon)
+    } else {
+      items.flatMap { item => {
+        val count = rng.sample()
+        if (count == 0) Iterator.empty else Iterator.fill(count)(item)
+      }}
+    }
+  }
+
+  override def clone = new PoissonSampler[T](fraction)
+}
+
+
+private[spark]
+class GapSamplingIterator[T: ClassTag](
+    var data: Iterator[T],
+    f: Double,
+    rng: Random = RandomSampler.newDefaultRNG,
+    epsilon: Double = RandomSampler.rngEpsilon) extends Iterator[T] {
+
+  require(f > 0.0  &&  f < 1.0, s"Sampling fraction ($f) must reside on open interval (0, 1)")
+  require(epsilon > 0.0, s"epsilon ($epsilon) must be > 0")
+
+  /** implement efficient linear-sequence drop until Scala includes fix for jira SI-8835. */
+  private val iterDrop: Int => Unit = {
+    val arrayClass = Array.empty[T].iterator.getClass
+    val arrayBufferClass = ArrayBuffer.empty[T].iterator.getClass
+    data.getClass match {
+      case `arrayClass` => ((n: Int) => { data = data.drop(n) })
+      case `arrayBufferClass` => ((n: Int) => { data = data.drop(n) })
+      case _ => ((n: Int) => {
+          var j = 0
+          while (j < n && data.hasNext) {
+            data.next()
+            j += 1
+          }
+        })
+    }
+  }
+
+  override def hasNext: Boolean = data.hasNext
+
+  override def next(): T = {
+    val r = data.next()
+    advance
+    r
+  }
+
+  private val lnq = math.log1p(-f)
+
+  /** skip elements that won't be sampled, according to geometric dist P(k) = (f)(1-f)^k. */
+  private def advance: Unit = {
+    val u = math.max(rng.nextDouble(), epsilon)
+    val k = (math.log(u) / lnq).toInt
+    iterDrop(k)
+  }
+
+  /** advance to first sample as part of object construction. */
+  advance
+  // Attempting to invoke this closer to the top with other object initialization
+  // was causing it to break in strange ways, so I'm invoking it last, which seems to
+  // work reliably.
+}
+
+private[spark]
+class GapSamplingReplacementIterator[T: ClassTag](
+    var data: Iterator[T],
+    f: Double,
+    rng: Random = RandomSampler.newDefaultRNG,
+    epsilon: Double = RandomSampler.rngEpsilon) extends Iterator[T] {
+
+  require(f > 0.0, s"Sampling fraction ($f) must be > 0")
+  require(epsilon > 0.0, s"epsilon ($epsilon) must be > 0")
+
+  /** implement efficient linear-sequence drop until scala includes fix for jira SI-8835. */
+  private val iterDrop: Int => Unit = {
+    val arrayClass = Array.empty[T].iterator.getClass
+    val arrayBufferClass = ArrayBuffer.empty[T].iterator.getClass
+    data.getClass match {
+      case `arrayClass` => ((n: Int) => { data = data.drop(n) })
+      case `arrayBufferClass` => ((n: Int) => { data = data.drop(n) })
+      case _ => ((n: Int) => {
+          var j = 0
+          while (j < n && data.hasNext) {
+            data.next()
+            j += 1
+          }
+        })
+    }
+  }
+
+  /** current sampling value, and its replication factor, as we are sampling with replacement. */
+  private var v: T = _
+  private var rep: Int = 0
+
+  override def hasNext: Boolean = data.hasNext || rep > 0
+
+  override def next(): T = {
+    val r = v
+    rep -= 1
+    if (rep <= 0) advance
+    r
+  }
+
+  /**
+   * Skip elements with replication factor zero (i.e. elements that won't be sampled).
+   * Samples 'k' from geometric distribution  P(k) = (1-q)(q)^k, where q = e^(-f), that is
+   * q is the probabililty of Poisson(0; f)
+   */
+  private def advance: Unit = {
+    val u = math.max(rng.nextDouble(), epsilon)
+    val k = (math.log(u) / (-f)).toInt
+    iterDrop(k)
+    // set the value and replication factor for the next value
+    if (data.hasNext) {
+      v = data.next()
+      rep = poissonGE1
+    }
+  }
+
+  private val q = math.exp(-f)
+
+  /**
+   * Sample from Poisson distribution, conditioned such that the sampled value is >= 1.
+   * This is an adaptation from the algorithm for Generating Poisson distributed random variables:
+   * http://en.wikipedia.org/wiki/Poisson_distribution
+   */
+  private def poissonGE1: Int = {
+    // simulate that the standard poisson sampling
+    // gave us at least one iteration, for a sample of >= 1
+    var pp = q + ((1.0 - q) * rng.nextDouble())
+    var r = 1
+
+    // now continue with standard poisson sampling algorithm
+    pp *= rng.nextDouble()
+    while (pp > q) {
+      r += 1
+      pp *= rng.nextDouble()
     }
+    r
   }
 
-  override def clone = new PoissonSampler[T](mean)
+  /** advance to first sample as part of object construction. */
+  advance
+  // Attempting to invoke this closer to the top with other object initialization
+  // was causing it to break in strange ways, so I'm invoking it last, which seems to
+  // work reliably.
 }
diff --git a/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala b/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala
index 8f95d7c6b799b..2ae308dacf1ae 100644
--- a/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala
@@ -22,11 +22,9 @@ import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.reflect.ClassTag
 
-import cern.jet.random.Poisson
-import cern.jet.random.engine.DRand
+import org.apache.commons.math3.distribution.PoissonDistribution
 
 import org.apache.spark.Logging
-import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 
 /**
@@ -209,7 +207,7 @@ private[spark] object StratifiedSamplingUtils extends Logging {
       samplingRateByKey = computeThresholdByKey(finalResult, fractions)
     }
     (idx: Int, iter: Iterator[(K, V)]) => {
-      val rng = new RandomDataGenerator
+      val rng = new RandomDataGenerator()
       rng.reSeed(seed + idx)
       // Must use the same invoke pattern on the rng as in getSeqOp for without replacement
       // in order to generate the same sequence of random numbers when creating the sample
@@ -245,9 +243,9 @@ private[spark] object StratifiedSamplingUtils extends Logging {
           // Must use the same invoke pattern on the rng as in getSeqOp for with replacement
           // in order to generate the same sequence of random numbers when creating the sample
           val copiesAccepted = if (acceptBound == 0) 0L else rng.nextPoisson(acceptBound)
-          val copiesWailisted = rng.nextPoisson(finalResult(key).waitListBound)
+          val copiesWaitlisted = rng.nextPoisson(finalResult(key).waitListBound)
           val copiesInSample = copiesAccepted +
-            (0 until copiesWailisted).count(i => rng.nextUniform() < thresholdByKey(key))
+            (0 until copiesWaitlisted).count(i => rng.nextUniform() < thresholdByKey(key))
           if (copiesInSample > 0) {
             Iterator.fill(copiesInSample.toInt)(item)
           } else {
@@ -261,10 +259,10 @@ private[spark] object StratifiedSamplingUtils extends Logging {
         rng.reSeed(seed + idx)
         iter.flatMap { item =>
           val count = rng.nextPoisson(fractions(item._1))
-          if (count > 0) {
-            Iterator.fill(count)(item)
-          } else {
+          if (count == 0) {
             Iterator.empty
+          } else {
+            Iterator.fill(count)(item)
           }
         }
       }
@@ -274,15 +272,24 @@ private[spark] object StratifiedSamplingUtils extends Logging {
   /** A random data generator that generates both uniform values and Poisson values. */
   private class RandomDataGenerator {
     val uniform = new XORShiftRandom()
-    var poisson = new Poisson(1.0, new DRand)
+    // commons-math3 doesn't have a method to generate Poisson from an arbitrary mean;
+    // maintain a cache of Poisson(m) distributions for various m
+    val poissonCache = mutable.Map[Double, PoissonDistribution]()
+    var poissonSeed = 0L
 
-    def reSeed(seed: Long) {
+    def reSeed(seed: Long): Unit = {
       uniform.setSeed(seed)
-      poisson = new Poisson(1.0, new DRand(seed.toInt))
+      poissonSeed = seed
+      poissonCache.clear()
     }
 
     def nextPoisson(mean: Double): Int = {
-      poisson.nextInt(mean)
+      val poisson = poissonCache.getOrElseUpdate(mean, {
+        val newPoisson = new PoissonDistribution(mean)
+        newPoisson.reseedRandomGenerator(poissonSeed)
+        newPoisson
+      })
+      poisson.sample()
     }
 
     def nextUniform(): Double = {
diff --git a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
index 55b5713706178..467b890fb4bb9 100644
--- a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
@@ -96,13 +96,9 @@ private[spark] object XORShiftRandom {
       xorRand.nextInt()
     }
 
-    val iters = timeIt(numIters)(_)
-
     /* Return results as a map instead of just printing to screen
     in case the user wants to do something with them */
-    Map("javaTime" -> iters {javaRand.nextInt()},
-        "xorTime" -> iters {xorRand.nextInt()})
-
+    Map("javaTime" -> timeIt(numIters) { javaRand.nextInt() },
+        "xorTime" -> timeIt(numIters) { xorRand.nextInt() })
   }
-
 }
diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java
index 814e40c4f77cc..b16a1e9460286 100644
--- a/core/src/test/java/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/java/org/apache/spark/JavaAPISuite.java
@@ -18,10 +18,13 @@
 package org.apache.spark;
 
 import java.io.*;
+import java.nio.channels.FileChannel;
+import java.nio.ByteBuffer;
 import java.net.URI;
 import java.util.*;
 import java.util.concurrent.*;
 
+import org.apache.spark.input.PortableDataStream;
 import scala.Tuple2;
 import scala.Tuple3;
 import scala.Tuple4;
@@ -140,11 +143,10 @@ public void intersection() {
   public void sample() {
     List<Integer> ints = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
     JavaRDD<Integer> rdd = sc.parallelize(ints);
-    JavaRDD<Integer> sample20 = rdd.sample(true, 0.2, 11);
-    // expected 2 but of course result varies randomly a bit
-    Assert.assertEquals(3, sample20.count());
-    JavaRDD<Integer> sample20NoReplacement = rdd.sample(false, 0.2, 11);
-    Assert.assertEquals(2, sample20NoReplacement.count());
+    JavaRDD<Integer> sample20 = rdd.sample(true, 0.2, 3);
+    Assert.assertEquals(2, sample20.count());
+    JavaRDD<Integer> sample20WithoutReplacement = rdd.sample(false, 0.2, 5);
+    Assert.assertEquals(2, sample20WithoutReplacement.count());
   }
 
   @Test
@@ -182,6 +184,7 @@ public void sortByKey() {
     Assert.assertEquals(new Tuple2<Integer, Integer>(3, 2), sortedPairs.get(2));
   }
 
+  @SuppressWarnings("unchecked")
   @Test
   public void repartitionAndSortWithinPartitions() {
     List<Tuple2<Integer, Integer>> pairs = new ArrayList<Tuple2<Integer, Integer>>();
@@ -321,6 +324,47 @@ public Boolean call(Integer x) {
     Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds
   }
 
+  @Test
+  public void groupByOnPairRDD() {
+    // Regression test for SPARK-4459
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
+    Function<Tuple2<Integer, Integer>, Boolean> areOdd =
+      new Function<Tuple2<Integer, Integer>, Boolean>() {
+        @Override
+        public Boolean call(Tuple2<Integer, Integer> x) {
+          return (x._1() % 2 == 0) && (x._2() % 2 == 0);
+        }
+      };
+    JavaPairRDD<Integer, Integer> pairRDD = rdd.zip(rdd);
+    JavaPairRDD<Boolean, Iterable<Tuple2<Integer, Integer>>> oddsAndEvens = pairRDD.groupBy(areOdd);
+    Assert.assertEquals(2, oddsAndEvens.count());
+    Assert.assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0)));  // Evens
+    Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds
+
+    oddsAndEvens = pairRDD.groupBy(areOdd, 1);
+    Assert.assertEquals(2, oddsAndEvens.count());
+    Assert.assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0)));  // Evens
+    Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void keyByOnPairRDD() {
+    // Regression test for SPARK-4459
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
+    Function<Tuple2<Integer, Integer>, String> sumToString =
+      new Function<Tuple2<Integer, Integer>, String>() {
+        @Override
+        public String call(Tuple2<Integer, Integer> x) {
+          return String.valueOf(x._1() + x._2());
+        }
+      };
+    JavaPairRDD<Integer, Integer> pairRDD = rdd.zip(rdd);
+    JavaPairRDD<String, Tuple2<Integer, Integer>> keyed = pairRDD.keyBy(sumToString);
+    Assert.assertEquals(7, keyed.count());
+    Assert.assertEquals(1, (long) keyed.lookup("2").get(0)._1());
+  }
+
   @SuppressWarnings("unchecked")
   @Test
   public void cogroup() {
@@ -448,6 +492,37 @@ public Integer call(Integer a, Integer b) {
     Assert.assertEquals(33, sum);
   }
 
+  @Test
+  public void treeReduce() {
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(-5, -4, -3, -2, -1, 1, 2, 3, 4), 10);
+    Function2<Integer, Integer, Integer> add = new Function2<Integer, Integer, Integer>() {
+      @Override
+      public Integer call(Integer a, Integer b) {
+        return a + b;
+      }
+    };
+    for (int depth = 1; depth <= 10; depth++) {
+      int sum = rdd.treeReduce(add, depth);
+      Assert.assertEquals(-5, sum);
+    }
+  }
+
+  @Test
+  public void treeAggregate() {
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(-5, -4, -3, -2, -1, 1, 2, 3, 4), 10);
+    Function2<Integer, Integer, Integer> add = new Function2<Integer, Integer, Integer>() {
+      @Override
+      public Integer call(Integer a, Integer b) {
+        return a + b;
+      }
+    };
+    for (int depth = 1; depth <= 10; depth++) {
+      int sum = rdd.treeAggregate(0, add, add, depth);
+      Assert.assertEquals(-5, sum);
+    }
+  }
+
+  @SuppressWarnings("unchecked")
   @Test
   public void aggregateByKey() {
     JavaPairRDD<Integer, Integer> pairs = sc.parallelizePairs(
@@ -561,6 +636,27 @@ public void take() {
     rdd.takeSample(false, 2, 42);
   }
 
+  @Test
+  public void isEmpty() {
+    Assert.assertTrue(sc.emptyRDD().isEmpty());
+    Assert.assertTrue(sc.parallelize(new ArrayList<Integer>()).isEmpty());
+    Assert.assertFalse(sc.parallelize(Arrays.asList(1)).isEmpty());
+    Assert.assertTrue(sc.parallelize(Arrays.asList(1, 2, 3), 3).filter(
+        new Function<Integer,Boolean>() {
+          @Override
+          public Boolean call(Integer i) {
+            return i < 0;
+          }
+        }).isEmpty());
+    Assert.assertFalse(sc.parallelize(Arrays.asList(1, 2, 3)).filter(
+        new Function<Integer, Boolean>() {
+          @Override
+          public Boolean call(Integer i) {
+            return i > 1;
+          }
+        }).isEmpty());
+  }
+
   @Test
   public void cartesian() {
     JavaDoubleRDD doubleRDD = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0));
@@ -775,7 +871,7 @@ public void persist() {
   @Test
   public void iterator() {
     JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 2);
-    TaskContext context = new TaskContextImpl(0, 0, 0L, false, new TaskMetrics());
+    TaskContext context = new TaskContextImpl(0, 0, 0L, 0, false, new TaskMetrics());
     Assert.assertEquals(1, rdd.iterator(rdd.partitions().get(0), context).next().intValue());
   }
 
@@ -864,6 +960,82 @@ public Tuple2<Integer, String> call(Tuple2<IntWritable, Text> pair) {
     Assert.assertEquals(pairs, readRDD.collect());
   }
 
+  @Test
+  public void binaryFiles() throws Exception {
+    // Reusing the wholeText files example
+    byte[] content1 = "spark is easy to use.\n".getBytes("utf-8");
+
+    String tempDirName = tempDir.getAbsolutePath();
+    File file1 = new File(tempDirName + "/part-00000");
+
+    FileOutputStream fos1 = new FileOutputStream(file1);
+
+    FileChannel channel1 = fos1.getChannel();
+    ByteBuffer bbuf = java.nio.ByteBuffer.wrap(content1);
+    channel1.write(bbuf);
+    channel1.close();
+    JavaPairRDD<String, PortableDataStream> readRDD = sc.binaryFiles(tempDirName, 3);
+    List<Tuple2<String, PortableDataStream>> result = readRDD.collect();
+    for (Tuple2<String, PortableDataStream> res : result) {
+      Assert.assertArrayEquals(content1, res._2().toArray());
+    }
+  }
+
+  @Test
+  public void binaryFilesCaching() throws Exception {
+    // Reusing the wholeText files example
+    byte[] content1 = "spark is easy to use.\n".getBytes("utf-8");
+
+    String tempDirName = tempDir.getAbsolutePath();
+    File file1 = new File(tempDirName + "/part-00000");
+
+    FileOutputStream fos1 = new FileOutputStream(file1);
+
+    FileChannel channel1 = fos1.getChannel();
+    ByteBuffer bbuf = java.nio.ByteBuffer.wrap(content1);
+    channel1.write(bbuf);
+    channel1.close();
+
+    JavaPairRDD<String, PortableDataStream> readRDD = sc.binaryFiles(tempDirName).cache();
+    readRDD.foreach(new VoidFunction<Tuple2<String,PortableDataStream>>() {
+      @Override
+      public void call(Tuple2<String, PortableDataStream> pair) throws Exception {
+        pair._2().toArray(); // force the file to read
+      }
+    });
+
+    List<Tuple2<String, PortableDataStream>> result = readRDD.collect();
+    for (Tuple2<String, PortableDataStream> res : result) {
+      Assert.assertArrayEquals(content1, res._2().toArray());
+    }
+  }
+
+  @Test
+  public void binaryRecords() throws Exception {
+    // Reusing the wholeText files example
+    byte[] content1 = "spark isn't always easy to use.\n".getBytes("utf-8");
+    int numOfCopies = 10;
+    String tempDirName = tempDir.getAbsolutePath();
+    File file1 = new File(tempDirName + "/part-00000");
+
+    FileOutputStream fos1 = new FileOutputStream(file1);
+
+    FileChannel channel1 = fos1.getChannel();
+
+    for (int i = 0; i < numOfCopies; i++) {
+      ByteBuffer bbuf = java.nio.ByteBuffer.wrap(content1);
+      channel1.write(bbuf);
+    }
+    channel1.close();
+
+    JavaRDD<byte[]> readRDD = sc.binaryRecords(tempDirName, content1.length);
+    Assert.assertEquals(numOfCopies,readRDD.count());
+    List<byte[]> result = readRDD.collect();
+    for (byte[] res : result) {
+      Assert.assertArrayEquals(content1, res);
+    }
+  }
+
   @SuppressWarnings("unchecked")
   @Test
   public void writeWithNewAPIHadoopFile() {
@@ -1238,6 +1410,19 @@ public Tuple2<Integer, int[]> call(Integer x) {
     pairRDD.collectAsMap();  // Used to crash with ClassCastException
   }
 
+  @SuppressWarnings("unchecked")
+  @Test
+  public void collectAsMapAndSerialize() throws Exception {
+    JavaPairRDD<String,Integer> rdd =
+        sc.parallelizePairs(Arrays.asList(new Tuple2<String,Integer>("foo", 1)));
+    Map<String,Integer> map = rdd.collectAsMap();
+    ByteArrayOutputStream bytes = new ByteArrayOutputStream();
+    new ObjectOutputStream(bytes).writeObject(map);
+    Map<String,Integer> deserializedMap = (Map<String,Integer>)
+        new ObjectInputStream(new ByteArrayInputStream(bytes.toByteArray())).readObject();
+    Assert.assertEquals(1, deserializedMap.get("foo").intValue());
+  }
+
   @Test
   @SuppressWarnings("unchecked")
   public void sampleByKey() {
@@ -1424,7 +1609,7 @@ static class Class2 {}
   @Test
   public void testRegisterKryoClasses() {
     SparkConf conf = new SparkConf();
-    conf.registerKryoClasses(new Class[]{ Class1.class, Class2.class });
+    conf.registerKryoClasses(new Class<?>[]{ Class1.class, Class2.class });
     Assert.assertEquals(
         Class1.class.getName() + "," + Class2.class.getName(),
         conf.get("spark.kryo.classesToRegister"));
diff --git a/core/src/test/java/org/apache/spark/JavaJdbcRDDSuite.java b/core/src/test/java/org/apache/spark/JavaJdbcRDDSuite.java
new file mode 100644
index 0000000000000..7fe452a48d89b
--- /dev/null
+++ b/core/src/test/java/org/apache/spark/JavaJdbcRDDSuite.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark;
+
+import java.io.Serializable;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.rdd.JdbcRDD;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class JavaJdbcRDDSuite implements Serializable {
+  private transient JavaSparkContext sc;
+
+  @Before
+  public void setUp() throws ClassNotFoundException, SQLException {
+    sc = new JavaSparkContext("local", "JavaAPISuite");
+
+    Class.forName("org.apache.derby.jdbc.EmbeddedDriver");
+    Connection connection =
+      DriverManager.getConnection("jdbc:derby:target/JavaJdbcRDDSuiteDb;create=true");
+
+    try {
+      Statement create = connection.createStatement();
+      create.execute(
+        "CREATE TABLE FOO(" +
+        "ID INTEGER NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 1, INCREMENT BY 1)," +
+        "DATA INTEGER)");
+      create.close();
+
+      PreparedStatement insert = connection.prepareStatement("INSERT INTO FOO(DATA) VALUES(?)");
+      for (int i = 1; i <= 100; i++) {
+        insert.setInt(1, i * 2);
+        insert.executeUpdate();
+      }
+      insert.close();
+    } catch (SQLException e) {
+      // If table doesn't exist...
+      if (e.getSQLState().compareTo("X0Y32") != 0) {
+        throw e;
+      }
+    } finally {
+      connection.close();
+    }
+  }
+
+  @After
+  public void tearDown() throws SQLException {
+    try {
+      DriverManager.getConnection("jdbc:derby:target/JavaJdbcRDDSuiteDb;shutdown=true");
+    } catch(SQLException e) {
+      // Throw if not normal single database shutdown
+      // https://db.apache.org/derby/docs/10.2/ref/rrefexcept71493.html
+      if (e.getSQLState().compareTo("08006") != 0) {
+        throw e;
+      }
+    }
+
+    sc.stop();
+    sc = null;
+  }
+
+  @Test
+  public void testJavaJdbcRDD() throws Exception {
+    JavaRDD<Integer> rdd = JdbcRDD.create(
+      sc,
+      new JdbcRDD.ConnectionFactory() {
+        @Override
+        public Connection getConnection() throws SQLException {
+          return DriverManager.getConnection("jdbc:derby:target/JavaJdbcRDDSuiteDb");
+        }
+      },
+      "SELECT DATA FROM FOO WHERE ? <= ID AND ID <= ?",
+      1, 100, 1,
+      new Function<ResultSet, Integer>() {
+        @Override
+        public Integer call(ResultSet r) throws Exception {
+          return r.getInt(1);
+        }
+      }
+    ).cache();
+
+    Assert.assertEquals(100, rdd.count());
+    Assert.assertEquals(
+      Integer.valueOf(10100),
+      rdd.reduce(new Function2<Integer, Integer, Integer>() {
+        @Override
+        public Integer call(Integer i1, Integer i2) {
+          return i1 + i2;
+        }
+      }));
+  }
+}
diff --git a/core/src/test/java/org/apache/spark/util/JavaTaskCompletionListenerImpl.java b/core/src/test/java/test/org/apache/spark/JavaTaskCompletionListenerImpl.java
similarity index 93%
rename from core/src/test/java/org/apache/spark/util/JavaTaskCompletionListenerImpl.java
rename to core/src/test/java/test/org/apache/spark/JavaTaskCompletionListenerImpl.java
index e9ec700e32e15..e38bc38949d7c 100644
--- a/core/src/test/java/org/apache/spark/util/JavaTaskCompletionListenerImpl.java
+++ b/core/src/test/java/test/org/apache/spark/JavaTaskCompletionListenerImpl.java
@@ -15,9 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.util;
+package test.org.apache.spark;
 
 import org.apache.spark.TaskContext;
+import org.apache.spark.util.TaskCompletionListener;
 
 
 /**
diff --git a/core/src/test/java/test/org/apache/spark/JavaTaskContextCompileCheck.java b/core/src/test/java/test/org/apache/spark/JavaTaskContextCompileCheck.java
new file mode 100644
index 0000000000000..4a918f725dc91
--- /dev/null
+++ b/core/src/test/java/test/org/apache/spark/JavaTaskContextCompileCheck.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark;
+
+import org.apache.spark.TaskContext;
+
+/**
+ * Something to make sure that TaskContext can be used in Java.
+ */
+public class JavaTaskContextCompileCheck {
+
+  public static void test() {
+    TaskContext tc = TaskContext.get();
+
+    tc.isCompleted();
+    tc.isInterrupted();
+    tc.isRunningLocally();
+
+    tc.addTaskCompletionListener(new JavaTaskCompletionListenerImpl());
+
+    tc.attemptNumber();
+    tc.partitionId();
+    tc.stageId();
+    tc.taskAttemptId();
+  }
+}
diff --git a/core/src/test/resources/keystore b/core/src/test/resources/keystore
new file mode 100644
index 0000000000000..f8310e39ba1e0
Binary files /dev/null and b/core/src/test/resources/keystore differ
diff --git a/core/src/test/resources/log4j.properties b/core/src/test/resources/log4j.properties
index 9dd05f17f012b..287c8e3563503 100644
--- a/core/src/test/resources/log4j.properties
+++ b/core/src/test/resources/log4j.properties
@@ -15,10 +15,10 @@
 # limitations under the License.
 #
 
-# Set everything to be logged to the file core/target/unit-tests.log
+# Set everything to be logged to the file target/unit-tests.log
 log4j.rootCategory=INFO, file
 log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.append=false
+log4j.appender.file.append=true
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
diff --git a/core/src/test/resources/truststore b/core/src/test/resources/truststore
new file mode 100644
index 0000000000000..a6b1d46e1f391
Binary files /dev/null and b/core/src/test/resources/truststore differ
diff --git a/core/src/test/resources/untrusted-keystore b/core/src/test/resources/untrusted-keystore
new file mode 100644
index 0000000000000..6015b02caa128
Binary files /dev/null and b/core/src/test/resources/untrusted-keystore differ
diff --git a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
index 52d1d5277658e..f087fc550dde3 100644
--- a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
@@ -22,7 +22,6 @@ import scala.collection.mutable
 import org.scalatest.FunSuite
 import org.scalatest.Matchers
 
-import org.apache.spark.SparkContext._
 
 class AccumulatorSuite extends FunSuite with Matchers with LocalSparkContext {
 
diff --git a/core/src/test/scala/org/apache/spark/CacheManagerSuite.scala b/core/src/test/scala/org/apache/spark/CacheManagerSuite.scala
index c0735f448d193..d7d9dc7b50f30 100644
--- a/core/src/test/scala/org/apache/spark/CacheManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/CacheManagerSuite.scala
@@ -66,7 +66,7 @@ class CacheManagerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
     // in blockManager.put is a losing battle. You have been warned.
     blockManager = sc.env.blockManager
     cacheManager = sc.env.cacheManager
-    val context = new TaskContextImpl(0, 0, 0)
+    val context = new TaskContextImpl(0, 0, 0, 0)
     val computeValue = cacheManager.getOrCompute(rdd, split, context, StorageLevel.MEMORY_ONLY)
     val getValue = blockManager.get(RDDBlockId(rdd.id, split.index))
     assert(computeValue.toList === List(1, 2, 3, 4))
@@ -81,7 +81,7 @@ class CacheManagerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
     }
 
     whenExecuting(blockManager) {
-      val context = new TaskContextImpl(0, 0, 0)
+      val context = new TaskContextImpl(0, 0, 0, 0)
       val value = cacheManager.getOrCompute(rdd, split, context, StorageLevel.MEMORY_ONLY)
       assert(value.toList === List(5, 6, 7))
     }
@@ -94,7 +94,7 @@ class CacheManagerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
     }
 
     whenExecuting(blockManager) {
-      val context = new TaskContextImpl(0, 0, 0, true)
+      val context = new TaskContextImpl(0, 0, 0, 0, true)
       val value = cacheManager.getOrCompute(rdd, split, context, StorageLevel.MEMORY_ONLY)
       assert(value.toList === List(1, 2, 3, 4))
     }
@@ -102,7 +102,7 @@ class CacheManagerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar
 
   test("verify task metrics updated correctly") {
     cacheManager = sc.env.cacheManager
-    val context = new TaskContextImpl(0, 0, 0)
+    val context = new TaskContextImpl(0, 0, 0, 0)
     cacheManager.getOrCompute(rdd3, split, context, StorageLevel.MEMORY_ONLY)
     assert(context.taskMetrics.updatedBlocks.getOrElse(Seq()).size === 2)
   }
diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
index a41914a1a9d0c..3b10b3a042317 100644
--- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
@@ -23,7 +23,6 @@ import scala.reflect.ClassTag
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.SparkContext._
 import org.apache.spark.rdd._
 import org.apache.spark.storage.{BlockId, StorageLevel, TestBlockId}
 import org.apache.spark.util.Utils
diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
index 2e3fc5ef0e336..ae2ae7ed0d3aa 100644
--- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
@@ -28,7 +28,6 @@ import org.scalatest.concurrent.{PatienceConfiguration, Eventually}
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
 
-import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage._
 import org.apache.spark.shuffle.hash.HashShuffleManager
diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index 81b64c36ddca1..97ea3578aa8ba 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -17,29 +17,21 @@
 
 package org.apache.spark
 
-import org.scalatest.BeforeAndAfter
 import org.scalatest.FunSuite
 import org.scalatest.concurrent.Timeouts._
 import org.scalatest.Matchers
 import org.scalatest.time.{Millis, Span}
 
-import org.apache.spark.SparkContext._
 import org.apache.spark.storage.{RDDBlockId, StorageLevel}
 
 class NotSerializableClass
 class NotSerializableExn(val notSer: NotSerializableClass) extends Throwable() {}
 
 
-class DistributedSuite extends FunSuite with Matchers with BeforeAndAfter
-  with LocalSparkContext {
+class DistributedSuite extends FunSuite with Matchers with LocalSparkContext {
 
   val clusterUrl = "local-cluster[2,1,512]"
 
-  after {
-    System.clearProperty("spark.reducer.maxMbInFlight")
-    System.clearProperty("spark.storage.memoryFraction")
-  }
-
   test("task throws not serializable exception") {
     // Ensures that executors do not crash when an exn is not serializable. If executors crash,
     // this test will hang. Correct behavior is that executors don't crash but fail tasks
@@ -85,15 +77,14 @@ class DistributedSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("groupByKey where map output sizes exceed maxMbInFlight") {
-    System.setProperty("spark.reducer.maxMbInFlight", "1")
-    sc = new SparkContext(clusterUrl, "test")
+    val conf = new SparkConf().set("spark.reducer.maxMbInFlight", "1")
+    sc = new SparkContext(clusterUrl, "test", conf)
     // This data should be around 20 MB, so even with 4 mappers and 2 reducers, each map output
     // file should be about 2.5 MB
     val pairs = sc.parallelize(1 to 2000, 4).map(x => (x % 16, new Array[Byte](10000)))
     val groups = pairs.groupByKey(2).map(x => (x._1, x._2.size)).collect()
     assert(groups.length === 16)
     assert(groups.map(_._2).sum === 2000)
-    // Note that spark.reducer.maxMbInFlight will be cleared in the test suite's after{} block
   }
 
   test("accumulators") {
@@ -202,7 +193,8 @@ class DistributedSuite extends FunSuite with Matchers with BeforeAndAfter
     val blockManager = SparkEnv.get.blockManager
     val blockTransfer = SparkEnv.get.blockTransferService
     blockManager.master.getLocations(blockId).foreach { cmId =>
-      val bytes = blockTransfer.fetchBlockSync(cmId.host, cmId.port, blockId.toString)
+      val bytes = blockTransfer.fetchBlockSync(cmId.host, cmId.port, cmId.executorId,
+        blockId.toString)
       val deserialized = blockManager.dataDeserialize(blockId, bytes.nioByteBuffer())
         .asInstanceOf[Iterator[Int]].toList
       assert(deserialized === (1 to 100).toList)
@@ -210,7 +202,6 @@ class DistributedSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("compute without caching when no partitions fit in memory") {
-    System.setProperty("spark.storage.memoryFraction", "0.0001")
     sc = new SparkContext(clusterUrl, "test")
     // data will be 4 million * 4 bytes = 16 MB in size, but our memoryFraction set the cache
     // to only 50 KB (0.0001 of 512 MB), so no partitions should fit in memory
@@ -218,12 +209,11 @@ class DistributedSuite extends FunSuite with Matchers with BeforeAndAfter
     assert(data.count() === 4000000)
     assert(data.count() === 4000000)
     assert(data.count() === 4000000)
-    System.clearProperty("spark.storage.memoryFraction")
   }
 
   test("compute when only some partitions fit in memory") {
-    System.setProperty("spark.storage.memoryFraction", "0.01")
-    sc = new SparkContext(clusterUrl, "test")
+    val conf = new SparkConf().set("spark.storage.memoryFraction", "0.01")
+    sc = new SparkContext(clusterUrl, "test", conf)
     // data will be 4 million * 4 bytes = 16 MB in size, but our memoryFraction set the cache
     // to only 5 MB (0.01 of 512 MB), so not all of it will fit in memory; we use 20 partitions
     // to make sure that *some* of them do fit though
@@ -231,7 +221,6 @@ class DistributedSuite extends FunSuite with Matchers with BeforeAndAfter
     assert(data.count() === 4000000)
     assert(data.count() === 4000000)
     assert(data.count() === 4000000)
-    System.clearProperty("spark.storage.memoryFraction")
   }
 
   test("passing environment variables to cluster") {
diff --git a/core/src/test/scala/org/apache/spark/DriverSuite.scala b/core/src/test/scala/org/apache/spark/DriverSuite.scala
index 5265ba904032f..9bd5dfec8703a 100644
--- a/core/src/test/scala/org/apache/spark/DriverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DriverSuite.scala
@@ -28,29 +28,30 @@ import org.apache.spark.util.Utils
 
 class DriverSuite extends FunSuite with Timeouts {
 
-  test("driver should exit after finishing") {
+  test("driver should exit after finishing without cleanup (SPARK-530)") {
     val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
-    // Regression test for SPARK-530: "Spark driver process doesn't exit after finishing"
-    val masters = Table(("master"), ("local"), ("local-cluster[2,1,512]"))
+    val masters = Table("master", "local", "local-cluster[2,1,512]")
     forAll(masters) { (master: String) =>
-      failAfter(60 seconds) {
-        Utils.executeAndGetOutput(
-          Seq("./bin/spark-class", "org.apache.spark.DriverWithoutCleanup", master),
-          new File(sparkHome),
-          Map("SPARK_TESTING" -> "1", "SPARK_HOME" -> sparkHome))
-      }
+      val process = Utils.executeCommand(
+        Seq(s"$sparkHome/bin/spark-class", "org.apache.spark.DriverWithoutCleanup", master),
+        new File(sparkHome),
+        Map("SPARK_TESTING" -> "1", "SPARK_HOME" -> sparkHome))
+      failAfter(60 seconds) { process.waitFor() }
+      // Ensure we still kill the process in case it timed out
+      process.destroy()
     }
   }
 }
 
 /**
- * Program that creates a Spark driver but doesn't call SparkContext.stop() or
- * Sys.exit() after finishing.
+ * Program that creates a Spark driver but doesn't call SparkContext#stop() or
+ * sys.exit() after finishing.
  */
 object DriverWithoutCleanup {
   def main(args: Array[String]) {
     Utils.configTestLog4j("INFO")
-    val sc = new SparkContext(args(0), "DriverWithoutCleanup")
+    val conf = new SparkConf
+    val sc = new SparkContext(args(0), "DriverWithoutCleanup", conf)
     sc.parallelize(1 to 100, 4).count()
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
new file mode 100644
index 0000000000000..57081ddd959a5
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -0,0 +1,759 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import scala.collection.mutable
+
+import org.scalatest.{FunSuite, PrivateMethodTester}
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.scheduler._
+import org.apache.spark.storage.BlockManagerId
+
+/**
+ * Test add and remove behavior of ExecutorAllocationManager.
+ */
+class ExecutorAllocationManagerSuite extends FunSuite with LocalSparkContext {
+  import ExecutorAllocationManager._
+  import ExecutorAllocationManagerSuite._
+
+  test("verify min/max executors") {
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName("test-executor-allocation-manager")
+      .set("spark.dynamicAllocation.enabled", "true")
+      .set("spark.dynamicAllocation.testing", "true")
+    val sc0 = new SparkContext(conf)
+    assert(sc0.executorAllocationManager.isDefined)
+    sc0.stop()
+
+    // Min < 0
+    val conf1 = conf.clone().set("spark.dynamicAllocation.minExecutors", "-1")
+    intercept[SparkException] { new SparkContext(conf1) }
+    SparkEnv.get.stop()
+    SparkContext.clearActiveContext()
+
+    // Max < 0
+    val conf2 = conf.clone().set("spark.dynamicAllocation.maxExecutors", "-1")
+    intercept[SparkException] { new SparkContext(conf2) }
+    SparkEnv.get.stop()
+    SparkContext.clearActiveContext()
+
+    // Both min and max, but min > max
+    intercept[SparkException] { createSparkContext(2, 1) }
+    SparkEnv.get.stop()
+    SparkContext.clearActiveContext()
+
+    // Both min and max, and min == max
+    val sc1 = createSparkContext(1, 1)
+    assert(sc1.executorAllocationManager.isDefined)
+    sc1.stop()
+
+    // Both min and max, and min < max
+    val sc2 = createSparkContext(1, 2)
+    assert(sc2.executorAllocationManager.isDefined)
+    sc2.stop()
+  }
+
+  test("starting state") {
+    sc = createSparkContext()
+    val manager = sc.executorAllocationManager.get
+    assert(numExecutorsPending(manager) === 0)
+    assert(executorsPendingToRemove(manager).isEmpty)
+    assert(executorIds(manager).isEmpty)
+    assert(addTime(manager) === ExecutorAllocationManager.NOT_SET)
+    assert(removeTimes(manager).isEmpty)
+  }
+
+  test("add executors") {
+    sc = createSparkContext(1, 10)
+    val manager = sc.executorAllocationManager.get
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(0, 1000)))
+
+    // Keep adding until the limit is reached
+    assert(numExecutorsPending(manager) === 0)
+    assert(numExecutorsToAdd(manager) === 1)
+    assert(addExecutors(manager) === 1)
+    assert(numExecutorsPending(manager) === 1)
+    assert(numExecutorsToAdd(manager) === 2)
+    assert(addExecutors(manager) === 2)
+    assert(numExecutorsPending(manager) === 3)
+    assert(numExecutorsToAdd(manager) === 4)
+    assert(addExecutors(manager) === 4)
+    assert(numExecutorsPending(manager) === 7)
+    assert(numExecutorsToAdd(manager) === 8)
+    assert(addExecutors(manager) === 3) // reached the limit of 10
+    assert(numExecutorsPending(manager) === 10)
+    assert(numExecutorsToAdd(manager) === 1)
+    assert(addExecutors(manager) === 0)
+    assert(numExecutorsPending(manager) === 10)
+    assert(numExecutorsToAdd(manager) === 1)
+
+    // Register previously requested executors
+    onExecutorAdded(manager, "first")
+    assert(numExecutorsPending(manager) === 9)
+    onExecutorAdded(manager, "second")
+    onExecutorAdded(manager, "third")
+    onExecutorAdded(manager, "fourth")
+    assert(numExecutorsPending(manager) === 6)
+    onExecutorAdded(manager, "first") // duplicates should not count
+    onExecutorAdded(manager, "second")
+    assert(numExecutorsPending(manager) === 6)
+
+    // Try adding again
+    // This should still fail because the number pending + running is still at the limit
+    assert(addExecutors(manager) === 0)
+    assert(numExecutorsPending(manager) === 6)
+    assert(numExecutorsToAdd(manager) === 1)
+    assert(addExecutors(manager) === 0)
+    assert(numExecutorsPending(manager) === 6)
+    assert(numExecutorsToAdd(manager) === 1)
+  }
+
+  test("add executors capped by num pending tasks") {
+    sc = createSparkContext(1, 10)
+    val manager = sc.executorAllocationManager.get
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(0, 5)))
+
+    // Verify that we're capped at number of tasks in the stage
+    assert(numExecutorsPending(manager) === 0)
+    assert(numExecutorsToAdd(manager) === 1)
+    assert(addExecutors(manager) === 1)
+    assert(numExecutorsPending(manager) === 1)
+    assert(numExecutorsToAdd(manager) === 2)
+    assert(addExecutors(manager) === 2)
+    assert(numExecutorsPending(manager) === 3)
+    assert(numExecutorsToAdd(manager) === 4)
+    assert(addExecutors(manager) === 2)
+    assert(numExecutorsPending(manager) === 5)
+    assert(numExecutorsToAdd(manager) === 1)
+
+    // Verify that running a task reduces the cap
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(1, 3)))
+    sc.listenerBus.postToAll(SparkListenerBlockManagerAdded(
+      0L, BlockManagerId("executor-1", "host1", 1), 100L))
+    sc.listenerBus.postToAll(SparkListenerTaskStart(1, 0, createTaskInfo(0, 0, "executor-1")))
+    assert(numExecutorsPending(manager) === 4)
+    assert(addExecutors(manager) === 1)
+    assert(numExecutorsPending(manager) === 5)
+    assert(numExecutorsToAdd(manager) === 2)
+    assert(addExecutors(manager) === 2)
+    assert(numExecutorsPending(manager) === 7)
+    assert(numExecutorsToAdd(manager) === 4)
+    assert(addExecutors(manager) === 0)
+    assert(numExecutorsPending(manager) === 7)
+    assert(numExecutorsToAdd(manager) === 1)
+
+    // Verify that re-running a task doesn't reduce the cap further
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(2, 3)))
+    sc.listenerBus.postToAll(SparkListenerTaskStart(2, 0, createTaskInfo(0, 0, "executor-1")))
+    sc.listenerBus.postToAll(SparkListenerTaskStart(2, 0, createTaskInfo(1, 0, "executor-1")))
+    assert(addExecutors(manager) === 1)
+    assert(numExecutorsPending(manager) === 8)
+    assert(numExecutorsToAdd(manager) === 2)
+    assert(addExecutors(manager) === 1)
+    assert(numExecutorsPending(manager) === 9)
+    assert(numExecutorsToAdd(manager) === 1)
+
+    // Verify that running a task once we're at our limit doesn't blow things up
+    sc.listenerBus.postToAll(SparkListenerTaskStart(2, 0, createTaskInfo(0, 1, "executor-1")))
+    assert(addExecutors(manager) === 0)
+    assert(numExecutorsPending(manager) === 9)
+  }
+
+  test("remove executors") {
+    sc = createSparkContext(5, 10)
+    val manager = sc.executorAllocationManager.get
+    (1 to 10).map(_.toString).foreach { id => onExecutorAdded(manager, id) }
+
+    // Keep removing until the limit is reached
+    assert(executorsPendingToRemove(manager).isEmpty)
+    assert(removeExecutor(manager, "1"))
+    assert(executorsPendingToRemove(manager).size === 1)
+    assert(executorsPendingToRemove(manager).contains("1"))
+    assert(removeExecutor(manager, "2"))
+    assert(removeExecutor(manager, "3"))
+    assert(executorsPendingToRemove(manager).size === 3)
+    assert(executorsPendingToRemove(manager).contains("2"))
+    assert(executorsPendingToRemove(manager).contains("3"))
+    assert(!removeExecutor(manager, "100")) // remove non-existent executors
+    assert(!removeExecutor(manager, "101"))
+    assert(executorsPendingToRemove(manager).size === 3)
+    assert(removeExecutor(manager, "4"))
+    assert(removeExecutor(manager, "5"))
+    assert(!removeExecutor(manager, "6")) // reached the limit of 5
+    assert(executorsPendingToRemove(manager).size === 5)
+    assert(executorsPendingToRemove(manager).contains("4"))
+    assert(executorsPendingToRemove(manager).contains("5"))
+    assert(!executorsPendingToRemove(manager).contains("6"))
+
+    // Kill executors previously requested to remove
+    onExecutorRemoved(manager, "1")
+    assert(executorsPendingToRemove(manager).size === 4)
+    assert(!executorsPendingToRemove(manager).contains("1"))
+    onExecutorRemoved(manager, "2")
+    onExecutorRemoved(manager, "3")
+    assert(executorsPendingToRemove(manager).size === 2)
+    assert(!executorsPendingToRemove(manager).contains("2"))
+    assert(!executorsPendingToRemove(manager).contains("3"))
+    onExecutorRemoved(manager, "2") // duplicates should not count
+    onExecutorRemoved(manager, "3")
+    assert(executorsPendingToRemove(manager).size === 2)
+    onExecutorRemoved(manager, "4")
+    onExecutorRemoved(manager, "5")
+    assert(executorsPendingToRemove(manager).isEmpty)
+
+    // Try removing again
+    // This should still fail because the number pending + running is still at the limit
+    assert(!removeExecutor(manager, "7"))
+    assert(executorsPendingToRemove(manager).isEmpty)
+    assert(!removeExecutor(manager, "8"))
+    assert(executorsPendingToRemove(manager).isEmpty)
+  }
+
+  test ("interleaving add and remove") {
+    sc = createSparkContext(5, 10)
+    val manager = sc.executorAllocationManager.get
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(0, 1000)))
+
+    // Add a few executors
+    assert(addExecutors(manager) === 1)
+    assert(addExecutors(manager) === 2)
+    assert(addExecutors(manager) === 4)
+    onExecutorAdded(manager, "1")
+    onExecutorAdded(manager, "2")
+    onExecutorAdded(manager, "3")
+    onExecutorAdded(manager, "4")
+    onExecutorAdded(manager, "5")
+    onExecutorAdded(manager, "6")
+    onExecutorAdded(manager, "7")
+    assert(executorIds(manager).size === 7)
+
+    // Remove until limit
+    assert(removeExecutor(manager, "1"))
+    assert(removeExecutor(manager, "2"))
+    assert(!removeExecutor(manager, "3")) // lower limit reached
+    assert(!removeExecutor(manager, "4"))
+    onExecutorRemoved(manager, "1")
+    onExecutorRemoved(manager, "2")
+    assert(executorIds(manager).size === 5)
+
+    // Add until limit
+    assert(addExecutors(manager) === 5) // upper limit reached
+    assert(addExecutors(manager) === 0)
+    assert(!removeExecutor(manager, "3")) // still at lower limit
+    assert(!removeExecutor(manager, "4"))
+    onExecutorAdded(manager, "8")
+    onExecutorAdded(manager, "9")
+    onExecutorAdded(manager, "10")
+    onExecutorAdded(manager, "11")
+    onExecutorAdded(manager, "12")
+    assert(executorIds(manager).size === 10)
+
+    // Remove succeeds again, now that we are no longer at the lower limit
+    assert(removeExecutor(manager, "3"))
+    assert(removeExecutor(manager, "4"))
+    assert(removeExecutor(manager, "5"))
+    assert(removeExecutor(manager, "6"))
+    assert(executorIds(manager).size === 10)
+    assert(addExecutors(manager) === 0) // still at upper limit
+    onExecutorRemoved(manager, "3")
+    onExecutorRemoved(manager, "4")
+    assert(executorIds(manager).size === 8)
+
+    // Add succeeds again, now that we are no longer at the upper limit
+    // Number of executors added restarts at 1
+    assert(addExecutors(manager) === 1)
+    assert(addExecutors(manager) === 1) // upper limit reached again
+    assert(addExecutors(manager) === 0)
+    assert(executorIds(manager).size === 8)
+    onExecutorRemoved(manager, "5")
+    onExecutorRemoved(manager, "6")
+    onExecutorAdded(manager, "13")
+    onExecutorAdded(manager, "14")
+    assert(executorIds(manager).size === 8)
+    assert(addExecutors(manager) === 1)
+    assert(addExecutors(manager) === 1) // upper limit reached again
+    assert(addExecutors(manager) === 0)
+    onExecutorAdded(manager, "15")
+    onExecutorAdded(manager, "16")
+    assert(executorIds(manager).size === 10)
+  }
+
+  test("starting/canceling add timer") {
+    sc = createSparkContext(2, 10)
+    val clock = new TestClock(8888L)
+    val manager = sc.executorAllocationManager.get
+    manager.setClock(clock)
+
+    // Starting add timer is idempotent
+    assert(addTime(manager) === NOT_SET)
+    onSchedulerBacklogged(manager)
+    val firstAddTime = addTime(manager)
+    assert(firstAddTime === clock.getTimeMillis + schedulerBacklogTimeout * 1000)
+    clock.tick(100L)
+    onSchedulerBacklogged(manager)
+    assert(addTime(manager) === firstAddTime) // timer is already started
+    clock.tick(200L)
+    onSchedulerBacklogged(manager)
+    assert(addTime(manager) === firstAddTime)
+    onSchedulerQueueEmpty(manager)
+
+    // Restart add timer
+    clock.tick(1000L)
+    assert(addTime(manager) === NOT_SET)
+    onSchedulerBacklogged(manager)
+    val secondAddTime = addTime(manager)
+    assert(secondAddTime === clock.getTimeMillis + schedulerBacklogTimeout * 1000)
+    clock.tick(100L)
+    onSchedulerBacklogged(manager)
+    assert(addTime(manager) === secondAddTime) // timer is already started
+    assert(addTime(manager) !== firstAddTime)
+    assert(firstAddTime !== secondAddTime)
+  }
+
+  test("starting/canceling remove timers") {
+    sc = createSparkContext(2, 10)
+    val clock = new TestClock(14444L)
+    val manager = sc.executorAllocationManager.get
+    manager.setClock(clock)
+
+    executorIds(manager).asInstanceOf[mutable.Set[String]] ++= List("1", "2", "3")
+
+    // Starting remove timer is idempotent for each executor
+    assert(removeTimes(manager).isEmpty)
+    onExecutorIdle(manager, "1")
+    assert(removeTimes(manager).size === 1)
+    assert(removeTimes(manager).contains("1"))
+    val firstRemoveTime = removeTimes(manager)("1")
+    assert(firstRemoveTime === clock.getTimeMillis + executorIdleTimeout * 1000)
+    clock.tick(100L)
+    onExecutorIdle(manager, "1")
+    assert(removeTimes(manager)("1") === firstRemoveTime) // timer is already started
+    clock.tick(200L)
+    onExecutorIdle(manager, "1")
+    assert(removeTimes(manager)("1") === firstRemoveTime)
+    clock.tick(300L)
+    onExecutorIdle(manager, "2")
+    assert(removeTimes(manager)("2") !== firstRemoveTime) // different executor
+    assert(removeTimes(manager)("2") === clock.getTimeMillis + executorIdleTimeout * 1000)
+    clock.tick(400L)
+    onExecutorIdle(manager, "3")
+    assert(removeTimes(manager)("3") !== firstRemoveTime)
+    assert(removeTimes(manager)("3") === clock.getTimeMillis + executorIdleTimeout * 1000)
+    assert(removeTimes(manager).size === 3)
+    assert(removeTimes(manager).contains("2"))
+    assert(removeTimes(manager).contains("3"))
+
+    // Restart remove timer
+    clock.tick(1000L)
+    onExecutorBusy(manager, "1")
+    assert(removeTimes(manager).size === 2)
+    onExecutorIdle(manager, "1")
+    assert(removeTimes(manager).size === 3)
+    assert(removeTimes(manager).contains("1"))
+    val secondRemoveTime = removeTimes(manager)("1")
+    assert(secondRemoveTime === clock.getTimeMillis + executorIdleTimeout * 1000)
+    assert(removeTimes(manager)("1") === secondRemoveTime) // timer is already started
+    assert(removeTimes(manager)("1") !== firstRemoveTime)
+    assert(firstRemoveTime !== secondRemoveTime)
+  }
+
+  test("mock polling loop with no events") {
+    sc = createSparkContext(1, 20)
+    val manager = sc.executorAllocationManager.get
+    val clock = new TestClock(2020L)
+    manager.setClock(clock)
+
+    // No events - we should not be adding or removing
+    assert(numExecutorsPending(manager) === 0)
+    assert(executorsPendingToRemove(manager).isEmpty)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 0)
+    assert(executorsPendingToRemove(manager).isEmpty)
+    clock.tick(100L)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 0)
+    assert(executorsPendingToRemove(manager).isEmpty)
+    clock.tick(1000L)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 0)
+    assert(executorsPendingToRemove(manager).isEmpty)
+    clock.tick(10000L)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 0)
+    assert(executorsPendingToRemove(manager).isEmpty)
+  }
+
+  test("mock polling loop add behavior") {
+    sc = createSparkContext(1, 20)
+    val clock = new TestClock(2020L)
+    val manager = sc.executorAllocationManager.get
+    manager.setClock(clock)
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(0, 1000)))
+
+    // Scheduler queue backlogged
+    onSchedulerBacklogged(manager)
+    clock.tick(schedulerBacklogTimeout * 1000 / 2)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 0) // timer not exceeded yet
+    clock.tick(schedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 1) // first timer exceeded
+    clock.tick(sustainedSchedulerBacklogTimeout * 1000 / 2)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 1) // second timer not exceeded yet
+    clock.tick(sustainedSchedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 1 + 2) // second timer exceeded
+    clock.tick(sustainedSchedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 1 + 2 + 4) // third timer exceeded
+
+    // Scheduler queue drained
+    onSchedulerQueueEmpty(manager)
+    clock.tick(sustainedSchedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 7) // timer is canceled
+    clock.tick(sustainedSchedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 7)
+
+    // Scheduler queue backlogged again
+    onSchedulerBacklogged(manager)
+    clock.tick(schedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 7 + 1) // timer restarted
+    clock.tick(sustainedSchedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 7 + 1 + 2)
+    clock.tick(sustainedSchedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 7 + 1 + 2 + 4)
+    clock.tick(sustainedSchedulerBacklogTimeout * 1000)
+    schedule(manager)
+    assert(numExecutorsPending(manager) === 20) // limit reached
+  }
+
+  test("mock polling loop remove behavior") {
+    sc = createSparkContext(1, 20)
+    val clock = new TestClock(2020L)
+    val manager = sc.executorAllocationManager.get
+    manager.setClock(clock)
+
+    // Remove idle executors on timeout
+    onExecutorAdded(manager, "executor-1")
+    onExecutorAdded(manager, "executor-2")
+    onExecutorAdded(manager, "executor-3")
+    assert(removeTimes(manager).size === 3)
+    assert(executorsPendingToRemove(manager).isEmpty)
+    clock.tick(executorIdleTimeout * 1000 / 2)
+    schedule(manager)
+    assert(removeTimes(manager).size === 3) // idle threshold not reached yet
+    assert(executorsPendingToRemove(manager).isEmpty)
+    clock.tick(executorIdleTimeout * 1000)
+    schedule(manager)
+    assert(removeTimes(manager).isEmpty) // idle threshold exceeded
+    assert(executorsPendingToRemove(manager).size === 2) // limit reached (1 executor remaining)
+
+    // Mark a subset as busy - only idle executors should be removed
+    onExecutorAdded(manager, "executor-4")
+    onExecutorAdded(manager, "executor-5")
+    onExecutorAdded(manager, "executor-6")
+    onExecutorAdded(manager, "executor-7")
+    assert(removeTimes(manager).size === 5)              // 5 active executors
+    assert(executorsPendingToRemove(manager).size === 2) // 2 pending to be removed
+    onExecutorBusy(manager, "executor-4")
+    onExecutorBusy(manager, "executor-5")
+    onExecutorBusy(manager, "executor-6") // 3 busy and 2 idle (of the 5 active ones)
+    schedule(manager)
+    assert(removeTimes(manager).size === 2) // remove only idle executors
+    assert(!removeTimes(manager).contains("executor-4"))
+    assert(!removeTimes(manager).contains("executor-5"))
+    assert(!removeTimes(manager).contains("executor-6"))
+    assert(executorsPendingToRemove(manager).size === 2)
+    clock.tick(executorIdleTimeout * 1000)
+    schedule(manager)
+    assert(removeTimes(manager).isEmpty) // idle executors are removed
+    assert(executorsPendingToRemove(manager).size === 4)
+    assert(!executorsPendingToRemove(manager).contains("executor-4"))
+    assert(!executorsPendingToRemove(manager).contains("executor-5"))
+    assert(!executorsPendingToRemove(manager).contains("executor-6"))
+
+    // Busy executors are now idle and should be removed
+    onExecutorIdle(manager, "executor-4")
+    onExecutorIdle(manager, "executor-5")
+    onExecutorIdle(manager, "executor-6")
+    schedule(manager)
+    assert(removeTimes(manager).size === 3) // 0 busy and 3 idle
+    assert(removeTimes(manager).contains("executor-4"))
+    assert(removeTimes(manager).contains("executor-5"))
+    assert(removeTimes(manager).contains("executor-6"))
+    assert(executorsPendingToRemove(manager).size === 4)
+    clock.tick(executorIdleTimeout * 1000)
+    schedule(manager)
+    assert(removeTimes(manager).isEmpty)
+    assert(executorsPendingToRemove(manager).size === 6) // limit reached (1 executor remaining)
+  }
+
+  test("listeners trigger add executors correctly") {
+    sc = createSparkContext(2, 10)
+    val manager = sc.executorAllocationManager.get
+    assert(addTime(manager) === NOT_SET)
+
+    // Starting a stage should start the add timer
+    val numTasks = 10
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(0, numTasks)))
+    assert(addTime(manager) !== NOT_SET)
+
+    // Starting a subset of the tasks should not cancel the add timer
+    val taskInfos = (0 to numTasks - 1).map { i => createTaskInfo(i, i, "executor-1") }
+    taskInfos.tail.foreach { info => sc.listenerBus.postToAll(SparkListenerTaskStart(0, 0, info)) }
+    assert(addTime(manager) !== NOT_SET)
+
+    // Starting all remaining tasks should cancel the add timer
+    sc.listenerBus.postToAll(SparkListenerTaskStart(0, 0, taskInfos.head))
+    assert(addTime(manager) === NOT_SET)
+
+    // Start two different stages
+    // The add timer should be canceled only if all tasks in both stages start running
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(1, numTasks)))
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(2, numTasks)))
+    assert(addTime(manager) !== NOT_SET)
+    taskInfos.foreach { info => sc.listenerBus.postToAll(SparkListenerTaskStart(1, 0, info)) }
+    assert(addTime(manager) !== NOT_SET)
+    taskInfos.foreach { info => sc.listenerBus.postToAll(SparkListenerTaskStart(2, 0, info)) }
+    assert(addTime(manager) === NOT_SET)
+  }
+
+  test("listeners trigger remove executors correctly") {
+    sc = createSparkContext(2, 10)
+    val manager = sc.executorAllocationManager.get
+    assert(removeTimes(manager).isEmpty)
+
+    // Added executors should start the remove timers for each executor
+    (1 to 5).map("executor-" + _).foreach { id => onExecutorAdded(manager, id) }
+    assert(removeTimes(manager).size === 5)
+
+    // Starting a task cancel the remove timer for that executor
+    sc.listenerBus.postToAll(SparkListenerTaskStart(0, 0, createTaskInfo(0, 0, "executor-1")))
+    sc.listenerBus.postToAll(SparkListenerTaskStart(0, 0, createTaskInfo(1, 1, "executor-1")))
+    sc.listenerBus.postToAll(SparkListenerTaskStart(0, 0, createTaskInfo(2, 2, "executor-2")))
+    assert(removeTimes(manager).size === 3)
+    assert(!removeTimes(manager).contains("executor-1"))
+    assert(!removeTimes(manager).contains("executor-2"))
+
+    // Finishing all tasks running on an executor should start the remove timer for that executor
+    sc.listenerBus.postToAll(SparkListenerTaskEnd(
+      0, 0, "task-type", Success, createTaskInfo(0, 0, "executor-1"), new TaskMetrics))
+    sc.listenerBus.postToAll(SparkListenerTaskEnd(
+      0, 0, "task-type", Success, createTaskInfo(2, 2, "executor-2"), new TaskMetrics))
+    assert(removeTimes(manager).size === 4)
+    assert(!removeTimes(manager).contains("executor-1")) // executor-1 has not finished yet
+    assert(removeTimes(manager).contains("executor-2"))
+    sc.listenerBus.postToAll(SparkListenerTaskEnd(
+      0, 0, "task-type", Success, createTaskInfo(1, 1, "executor-1"), new TaskMetrics))
+    assert(removeTimes(manager).size === 5)
+    assert(removeTimes(manager).contains("executor-1")) // executor-1 has now finished
+  }
+
+  test("listeners trigger add and remove executor callbacks correctly") {
+    sc = createSparkContext(2, 10)
+    val manager = sc.executorAllocationManager.get
+    assert(executorIds(manager).isEmpty)
+    assert(removeTimes(manager).isEmpty)
+
+    // New executors have registered
+    sc.listenerBus.postToAll(SparkListenerBlockManagerAdded(
+      0L, BlockManagerId("executor-1", "host1", 1), 100L))
+    assert(executorIds(manager).size === 1)
+    assert(executorIds(manager).contains("executor-1"))
+    assert(removeTimes(manager).size === 1)
+    assert(removeTimes(manager).contains("executor-1"))
+    sc.listenerBus.postToAll(SparkListenerBlockManagerAdded(
+      0L, BlockManagerId("executor-2", "host2", 1), 100L))
+    assert(executorIds(manager).size === 2)
+    assert(executorIds(manager).contains("executor-2"))
+    assert(removeTimes(manager).size === 2)
+    assert(removeTimes(manager).contains("executor-2"))
+
+    // Existing executors have disconnected
+    sc.listenerBus.postToAll(SparkListenerBlockManagerRemoved(
+      0L, BlockManagerId("executor-1", "host1", 1)))
+    assert(executorIds(manager).size === 1)
+    assert(!executorIds(manager).contains("executor-1"))
+    assert(removeTimes(manager).size === 1)
+    assert(!removeTimes(manager).contains("executor-1"))
+
+    // Unknown executor has disconnected
+    sc.listenerBus.postToAll(SparkListenerBlockManagerRemoved(
+      0L, BlockManagerId("executor-3", "host3", 1)))
+    assert(executorIds(manager).size === 1)
+    assert(removeTimes(manager).size === 1)
+  }
+
+  test("SPARK-4951: call onTaskStart before onBlockManagerAdded") {
+    sc = createSparkContext(2, 10)
+    val manager = sc.executorAllocationManager.get
+    assert(executorIds(manager).isEmpty)
+    assert(removeTimes(manager).isEmpty)
+
+    sc.listenerBus.postToAll(SparkListenerTaskStart(0, 0, createTaskInfo(0, 0, "executor-1")))
+    sc.listenerBus.postToAll(SparkListenerBlockManagerAdded(
+      0L, BlockManagerId("executor-1", "host1", 1), 100L))
+    assert(executorIds(manager).size === 1)
+    assert(executorIds(manager).contains("executor-1"))
+    assert(removeTimes(manager).size === 0)
+  }
+
+  test("SPARK-4951: onExecutorAdded should not add a busy executor to removeTimes") {
+    sc = createSparkContext(2, 10)
+    val manager = sc.executorAllocationManager.get
+    assert(executorIds(manager).isEmpty)
+    assert(removeTimes(manager).isEmpty)
+    sc.listenerBus.postToAll(SparkListenerBlockManagerAdded(
+      0L, BlockManagerId("executor-1", "host1", 1), 100L))
+    sc.listenerBus.postToAll(SparkListenerTaskStart(0, 0, createTaskInfo(0, 0, "executor-1")))
+
+    assert(executorIds(manager).size === 1)
+    assert(executorIds(manager).contains("executor-1"))
+    assert(removeTimes(manager).size === 0)
+
+    sc.listenerBus.postToAll(SparkListenerBlockManagerAdded(
+      0L, BlockManagerId("executor-2", "host1", 1), 100L))
+    assert(executorIds(manager).size === 2)
+    assert(executorIds(manager).contains("executor-2"))
+    assert(removeTimes(manager).size === 1)
+    assert(removeTimes(manager).contains("executor-2"))
+    assert(!removeTimes(manager).contains("executor-1"))
+  }
+}
+
+/**
+ * Helper methods for testing ExecutorAllocationManager.
+ * This includes methods to access private methods and fields in ExecutorAllocationManager.
+ */
+private object ExecutorAllocationManagerSuite extends PrivateMethodTester {
+  private val schedulerBacklogTimeout = 1L
+  private val sustainedSchedulerBacklogTimeout = 2L
+  private val executorIdleTimeout = 3L
+
+  private def createSparkContext(minExecutors: Int = 1, maxExecutors: Int = 5): SparkContext = {
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName("test-executor-allocation-manager")
+      .set("spark.dynamicAllocation.enabled", "true")
+      .set("spark.dynamicAllocation.minExecutors", minExecutors.toString)
+      .set("spark.dynamicAllocation.maxExecutors", maxExecutors.toString)
+      .set("spark.dynamicAllocation.schedulerBacklogTimeout", schedulerBacklogTimeout.toString)
+      .set("spark.dynamicAllocation.sustainedSchedulerBacklogTimeout",
+        sustainedSchedulerBacklogTimeout.toString)
+      .set("spark.dynamicAllocation.executorIdleTimeout", executorIdleTimeout.toString)
+      .set("spark.dynamicAllocation.testing", "true")
+    new SparkContext(conf)
+  }
+
+  private def createStageInfo(stageId: Int, numTasks: Int): StageInfo = {
+    new StageInfo(stageId, 0, "name", numTasks, Seq.empty, "no details")
+  }
+
+  private def createTaskInfo(taskId: Int, taskIndex: Int, executorId: String): TaskInfo = {
+    new TaskInfo(taskId, taskIndex, 0, 0, executorId, "", TaskLocality.ANY, speculative = false)
+  }
+
+  /* ------------------------------------------------------- *
+   | Helper methods for accessing private methods and fields |
+   * ------------------------------------------------------- */
+
+  private val _numExecutorsToAdd = PrivateMethod[Int]('numExecutorsToAdd)
+  private val _numExecutorsPending = PrivateMethod[Int]('numExecutorsPending)
+  private val _executorsPendingToRemove =
+    PrivateMethod[collection.Set[String]]('executorsPendingToRemove)
+  private val _executorIds = PrivateMethod[collection.Set[String]]('executorIds)
+  private val _addTime = PrivateMethod[Long]('addTime)
+  private val _removeTimes = PrivateMethod[collection.Map[String, Long]]('removeTimes)
+  private val _schedule = PrivateMethod[Unit]('schedule)
+  private val _addExecutors = PrivateMethod[Int]('addExecutors)
+  private val _removeExecutor = PrivateMethod[Boolean]('removeExecutor)
+  private val _onExecutorAdded = PrivateMethod[Unit]('onExecutorAdded)
+  private val _onExecutorRemoved = PrivateMethod[Unit]('onExecutorRemoved)
+  private val _onSchedulerBacklogged = PrivateMethod[Unit]('onSchedulerBacklogged)
+  private val _onSchedulerQueueEmpty = PrivateMethod[Unit]('onSchedulerQueueEmpty)
+  private val _onExecutorIdle = PrivateMethod[Unit]('onExecutorIdle)
+  private val _onExecutorBusy = PrivateMethod[Unit]('onExecutorBusy)
+
+  private def numExecutorsToAdd(manager: ExecutorAllocationManager): Int = {
+    manager invokePrivate _numExecutorsToAdd()
+  }
+
+  private def numExecutorsPending(manager: ExecutorAllocationManager): Int = {
+    manager invokePrivate _numExecutorsPending()
+  }
+
+  private def executorsPendingToRemove(
+      manager: ExecutorAllocationManager): collection.Set[String] = {
+    manager invokePrivate _executorsPendingToRemove()
+  }
+
+  private def executorIds(manager: ExecutorAllocationManager): collection.Set[String] = {
+    manager invokePrivate _executorIds()
+  }
+
+  private def addTime(manager: ExecutorAllocationManager): Long = {
+    manager invokePrivate _addTime()
+  }
+
+  private def removeTimes(manager: ExecutorAllocationManager): collection.Map[String, Long] = {
+    manager invokePrivate _removeTimes()
+  }
+
+  private def schedule(manager: ExecutorAllocationManager): Unit = {
+    manager invokePrivate _schedule()
+  }
+
+  private def addExecutors(manager: ExecutorAllocationManager): Int = {
+    manager invokePrivate _addExecutors()
+  }
+
+  private def removeExecutor(manager: ExecutorAllocationManager, id: String): Boolean = {
+    manager invokePrivate _removeExecutor(id)
+  }
+
+  private def onExecutorAdded(manager: ExecutorAllocationManager, id: String): Unit = {
+    manager invokePrivate _onExecutorAdded(id)
+  }
+
+  private def onExecutorRemoved(manager: ExecutorAllocationManager, id: String): Unit = {
+    manager invokePrivate _onExecutorRemoved(id)
+  }
+
+  private def onSchedulerBacklogged(manager: ExecutorAllocationManager): Unit = {
+    manager invokePrivate _onSchedulerBacklogged()
+  }
+
+  private def onSchedulerQueueEmpty(manager: ExecutorAllocationManager): Unit = {
+    manager invokePrivate _onSchedulerQueueEmpty()
+  }
+
+  private def onExecutorIdle(manager: ExecutorAllocationManager, id: String): Unit = {
+    manager invokePrivate _onExecutorIdle(id)
+  }
+
+  private def onExecutorBusy(manager: ExecutorAllocationManager, id: String): Unit = {
+    manager invokePrivate _onExecutorBusy(id)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
new file mode 100644
index 0000000000000..bac6fdbcdc976
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.network.TransportContext
+import org.apache.spark.network.netty.SparkTransportConf
+import org.apache.spark.network.server.TransportServer
+import org.apache.spark.network.shuffle.{ExternalShuffleBlockHandler, ExternalShuffleClient}
+
+/**
+ * This suite creates an external shuffle server and routes all shuffle fetches through it.
+ * Note that failures in this suite may arise due to changes in Spark that invalidate expectations
+ * set up in [[ExternalShuffleBlockHandler]], such as changing the format of shuffle files or how
+ * we hash files into folders.
+ */
+class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll {
+  var server: TransportServer = _
+  var rpcHandler: ExternalShuffleBlockHandler = _
+
+  override def beforeAll() {
+    val transportConf = SparkTransportConf.fromSparkConf(conf, numUsableCores = 2)
+    rpcHandler = new ExternalShuffleBlockHandler(transportConf)
+    val transportContext = new TransportContext(transportConf, rpcHandler)
+    server = transportContext.createServer()
+
+    conf.set("spark.shuffle.manager", "sort")
+    conf.set("spark.shuffle.service.enabled", "true")
+    conf.set("spark.shuffle.service.port", server.getPort.toString)
+  }
+
+  override def afterAll() {
+    server.close()
+  }
+
+  // This test ensures that the external shuffle service is actually in use for the other tests.
+  test("using external shuffle service") {
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
+    sc.env.blockManager.externalShuffleServiceEnabled should equal(true)
+    sc.env.blockManager.shuffleClient.getClass should equal(classOf[ExternalShuffleClient])
+
+    val rdd = sc.parallelize(0 until 1000, 10).map(i => (i, 1)).reduceByKey(_ + _)
+
+    rdd.count()
+    rdd.count()
+
+    // Invalidate the registered executors, disallowing access to their shuffle blocks (without
+    // deleting the actual shuffle files, so we could access them without the shuffle service).
+    rpcHandler.applicationRemoved(sc.conf.getAppId, false /* cleanupLocalDirs */)
+
+    // Now Spark will receive FetchFailed, and not retry the stage due to "spark.test.noStageRetry"
+    // being set.
+    val e = intercept[SparkException] {
+      rdd.count()
+    }
+    e.getMessage should include ("Fetch failure will not retry stage due to testing config")
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/FailureSuite.scala b/core/src/test/scala/org/apache/spark/FailureSuite.scala
index 2229e6acc425d..1212d0b43207d 100644
--- a/core/src/test/scala/org/apache/spark/FailureSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FailureSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.SparkContext._
 import org.apache.spark.util.NonSerializable
 
 import java.io.NotSerializableException
diff --git a/core/src/test/scala/org/apache/spark/FileServerSuite.scala b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
index a8867020e457d..5fdf6bc2777e3 100644
--- a/core/src/test/scala/org/apache/spark/FileServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
@@ -18,23 +18,30 @@
 package org.apache.spark
 
 import java.io._
+import java.net.URI
 import java.util.jar.{JarEntry, JarOutputStream}
+import javax.net.ssl.SSLHandshakeException
 
+import com.google.common.io.ByteStreams
+import org.apache.commons.io.{FileUtils, IOUtils}
+import org.apache.commons.lang3.RandomUtils
 import org.scalatest.FunSuite
 
-import org.apache.spark.SparkContext._
 import org.apache.spark.util.Utils
 
+import SSLSampleConfigs._
+
 class FileServerSuite extends FunSuite with LocalSparkContext {
 
   @transient var tmpDir: File = _
   @transient var tmpFile: File = _
   @transient var tmpJarUrl: String = _
 
+  def newConf: SparkConf = new SparkConf(loadDefaults = false).set("spark.authenticate", "false")
+
   override def beforeEach() {
     super.beforeEach()
     resetSparkContext()
-    System.setProperty("spark.authenticate", "false")
   }
 
   override def beforeAll() {
@@ -52,18 +59,12 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
     val jarFile = new File(testTempDir, "test.jar")
     val jarStream = new FileOutputStream(jarFile)
     val jar = new JarOutputStream(jarStream, new java.util.jar.Manifest())
-    System.setProperty("spark.authenticate", "false")
 
     val jarEntry = new JarEntry(textFile.getName)
     jar.putNextEntry(jarEntry)
 
     val in = new FileInputStream(textFile)
-    val buffer = new Array[Byte](10240)
-    var nRead = 0
-    while (nRead <= 0) {
-      nRead = in.read(buffer, 0, buffer.length)
-      jar.write(buffer, 0, nRead)
-    }
+    ByteStreams.copy(in, jar)
 
     in.close()
     jar.close()
@@ -79,7 +80,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
   }
 
   test("Distributing files locally") {
-    sc = new SparkContext("local[4]", "test")
+    sc = new SparkContext("local[4]", "test", newConf)
     sc.addFile(tmpFile.toString)
     val testData = Array((1,1), (1,1), (2,1), (3,5), (2,2), (3,0))
     val result = sc.parallelize(testData).reduceByKey {
@@ -113,7 +114,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
 
   test("Distributing files locally using URL as input") {
     // addFile("file:///....")
-    sc = new SparkContext("local[4]", "test")
+    sc = new SparkContext("local[4]", "test", newConf)
     sc.addFile(new File(tmpFile.toString).toURI.toString)
     val testData = Array((1,1), (1,1), (2,1), (3,5), (2,2), (3,0))
     val result = sc.parallelize(testData).reduceByKey {
@@ -127,7 +128,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
   }
 
   test ("Dynamically adding JARS locally") {
-    sc = new SparkContext("local[4]", "test")
+    sc = new SparkContext("local[4]", "test", newConf)
     sc.addJar(tmpJarUrl)
     val testData = Array((1, 1))
     sc.parallelize(testData).foreach { x =>
@@ -138,7 +139,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
   }
 
   test("Distributing files on a standalone cluster") {
-    sc = new SparkContext("local-cluster[1,1,512]", "test")
+    sc = new SparkContext("local-cluster[1,1,512]", "test", newConf)
     sc.addFile(tmpFile.toString)
     val testData = Array((1,1), (1,1), (2,1), (3,5), (2,2), (3,0))
     val result = sc.parallelize(testData).reduceByKey {
@@ -152,7 +153,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
   }
 
   test ("Dynamically adding JARS on a standalone cluster") {
-    sc = new SparkContext("local-cluster[1,1,512]", "test")
+    sc = new SparkContext("local-cluster[1,1,512]", "test", newConf)
     sc.addJar(tmpJarUrl)
     val testData = Array((1,1))
     sc.parallelize(testData).foreach { x =>
@@ -163,7 +164,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
   }
 
   test ("Dynamically adding JARS on a standalone cluster using local: URL") {
-    sc = new SparkContext("local-cluster[1,1,512]", "test")
+    sc = new SparkContext("local-cluster[1,1,512]", "test", newConf)
     sc.addJar(tmpJarUrl.replace("file", "local"))
     val testData = Array((1,1))
     sc.parallelize(testData).foreach { x =>
@@ -173,4 +174,88 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
     }
   }
 
+  test ("HttpFileServer should work with SSL") {
+    val sparkConf = sparkSSLConfig()
+    val sm = new SecurityManager(sparkConf)
+    val server = new HttpFileServer(sparkConf, sm, 0)
+    try {
+      server.initialize()
+
+      fileTransferTest(server, sm)
+    } finally {
+      server.stop()
+    }
+  }
+
+  test ("HttpFileServer should work with SSL and good credentials") {
+    val sparkConf = sparkSSLConfig()
+    sparkConf.set("spark.authenticate", "true")
+    sparkConf.set("spark.authenticate.secret", "good")
+
+    val sm = new SecurityManager(sparkConf)
+    val server = new HttpFileServer(sparkConf, sm, 0)
+    try {
+      server.initialize()
+
+      fileTransferTest(server, sm)
+    } finally {
+      server.stop()
+    }
+  }
+
+  test ("HttpFileServer should not work with valid SSL and bad credentials") {
+    val sparkConf = sparkSSLConfig()
+    sparkConf.set("spark.authenticate", "true")
+    sparkConf.set("spark.authenticate.secret", "bad")
+
+    val sm = new SecurityManager(sparkConf)
+    val server = new HttpFileServer(sparkConf, sm, 0)
+    try {
+      server.initialize()
+
+      intercept[IOException] {
+        fileTransferTest(server)
+      }
+    } finally {
+      server.stop()
+    }
+  }
+
+  test ("HttpFileServer should not work with SSL when the server is untrusted") {
+    val sparkConf = sparkSSLConfigUntrusted()
+    val sm = new SecurityManager(sparkConf)
+    val server = new HttpFileServer(sparkConf, sm, 0)
+    try {
+      server.initialize()
+
+      intercept[SSLHandshakeException] {
+        fileTransferTest(server)
+      }
+    } finally {
+      server.stop()
+    }
+  }
+
+  def fileTransferTest(server: HttpFileServer, sm: SecurityManager = null): Unit = {
+    val randomContent = RandomUtils.nextBytes(100)
+    val file = File.createTempFile("FileServerSuite", "sslTests", tmpDir)
+    FileUtils.writeByteArrayToFile(file, randomContent)
+    server.addFile(file)
+
+    val uri = new URI(server.serverUri + "/files/" + file.getName)
+
+    val connection = if (sm != null && sm.isAuthenticationEnabled()) {
+      Utils.constructURIForAuthentication(uri, sm).toURL.openConnection()
+    } else {
+      uri.toURL.openConnection()
+    }
+
+    if (sm != null) {
+      Utils.setupSecureURLConnection(connection, sm)
+    }
+
+    val buf = IOUtils.toByteArray(connection.getInputStream)
+    assert(buf === randomContent)
+  }
+
 }
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index a2b74c4419d46..7acd27c735727 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -19,6 +19,9 @@ package org.apache.spark
 
 import java.io.{File, FileWriter}
 
+import org.apache.spark.input.PortableDataStream
+import org.apache.spark.storage.StorageLevel
+
 import scala.io.Source
 
 import org.apache.hadoop.io._
@@ -29,7 +32,6 @@ import org.apache.hadoop.mapreduce.lib.input.{FileSplit => NewFileSplit, TextInp
 import org.apache.hadoop.mapreduce.lib.output.{TextOutputFormat => NewTextOutputFormat}
 import org.scalatest.FunSuite
 
-import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.{NewHadoopRDD, HadoopRDD}
 import org.apache.spark.util.Utils
 
@@ -224,6 +226,187 @@ class FileSuite extends FunSuite with LocalSparkContext {
     assert(output.map(_.toString).collect().toList === List("(1,a)", "(2,aa)", "(3,aaa)"))
   }
 
+  test("binary file input as byte array") {
+    sc = new SparkContext("local", "test")
+    val outFile = new File(tempDir, "record-bytestream-00000.bin")
+    val outFileName = outFile.getAbsolutePath()
+
+    // create file
+    val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
+    val bbuf = java.nio.ByteBuffer.wrap(testOutput)
+    // write data to file
+    val file = new java.io.FileOutputStream(outFile)
+    val channel = file.getChannel
+    channel.write(bbuf)
+    channel.close()
+    file.close()
+
+    val inRdd = sc.binaryFiles(outFileName)
+    val (infile: String, indata: PortableDataStream) = inRdd.collect.head
+
+    // Make sure the name and array match
+    assert(infile.contains(outFileName)) // a prefix may get added
+    assert(indata.toArray === testOutput)
+  }
+
+  test("portabledatastream caching tests") {
+    sc = new SparkContext("local", "test")
+    val outFile = new File(tempDir, "record-bytestream-00000.bin")
+    val outFileName = outFile.getAbsolutePath()
+
+    // create file
+    val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
+    val bbuf = java.nio.ByteBuffer.wrap(testOutput)
+    // write data to file
+    val file = new java.io.FileOutputStream(outFile)
+    val channel = file.getChannel
+    channel.write(bbuf)
+    channel.close()
+    file.close()
+
+    val inRdd = sc.binaryFiles(outFileName).cache()
+    inRdd.foreach{
+      curData: (String, PortableDataStream) =>
+       curData._2.toArray() // force the file to read
+    }
+    val mappedRdd = inRdd.map {
+      curData: (String, PortableDataStream) =>
+        (curData._2.getPath(), curData._2)
+    }
+    val (infile: String, indata: PortableDataStream) = mappedRdd.collect.head
+
+    // Try reading the output back as an object file
+
+    assert(indata.toArray === testOutput)
+  }
+
+  test("portabledatastream persist disk storage") {
+    sc = new SparkContext("local", "test")
+    val outFile = new File(tempDir, "record-bytestream-00000.bin")
+    val outFileName = outFile.getAbsolutePath()
+
+    // create file
+    val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
+    val bbuf = java.nio.ByteBuffer.wrap(testOutput)
+    // write data to file
+    val file = new java.io.FileOutputStream(outFile)
+    val channel = file.getChannel
+    channel.write(bbuf)
+    channel.close()
+    file.close()
+
+    val inRdd = sc.binaryFiles(outFileName).persist(StorageLevel.DISK_ONLY)
+    inRdd.foreach{
+      curData: (String, PortableDataStream) =>
+        curData._2.toArray() // force the file to read
+    }
+    val mappedRdd = inRdd.map {
+      curData: (String, PortableDataStream) =>
+        (curData._2.getPath(), curData._2)
+    }
+    val (infile: String, indata: PortableDataStream) = mappedRdd.collect.head
+
+    // Try reading the output back as an object file
+
+    assert(indata.toArray === testOutput)
+  }
+
+  test("portabledatastream flatmap tests") {
+    sc = new SparkContext("local", "test")
+    val outFile = new File(tempDir, "record-bytestream-00000.bin")
+    val outFileName = outFile.getAbsolutePath()
+
+    // create file
+    val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
+    val numOfCopies = 3
+    val bbuf = java.nio.ByteBuffer.wrap(testOutput)
+    // write data to file
+    val file = new java.io.FileOutputStream(outFile)
+    val channel = file.getChannel
+    channel.write(bbuf)
+    channel.close()
+    file.close()
+
+    val inRdd = sc.binaryFiles(outFileName)
+    val mappedRdd = inRdd.map {
+      curData: (String, PortableDataStream) =>
+        (curData._2.getPath(), curData._2)
+    }
+    val copyRdd = mappedRdd.flatMap {
+      curData: (String, PortableDataStream) =>
+        for(i <- 1 to numOfCopies) yield (i, curData._2)
+    }
+
+    val copyArr: Array[(Int, PortableDataStream)] = copyRdd.collect()
+
+    // Try reading the output back as an object file
+    assert(copyArr.length == numOfCopies)
+    copyArr.foreach{
+      cEntry: (Int, PortableDataStream) =>
+        assert(cEntry._2.toArray === testOutput)
+    }
+
+  }
+
+  test("fixed record length binary file as byte array") {
+    // a fixed length of 6 bytes
+
+    sc = new SparkContext("local", "test")
+
+    val outFile = new File(tempDir, "record-bytestream-00000.bin")
+    val outFileName = outFile.getAbsolutePath()
+
+    // create file
+    val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
+    val testOutputCopies = 10
+
+    // write data to file
+    val file = new java.io.FileOutputStream(outFile)
+    val channel = file.getChannel
+    for(i <- 1 to testOutputCopies) {
+      val bbuf = java.nio.ByteBuffer.wrap(testOutput)
+      channel.write(bbuf)
+    }
+    channel.close()
+    file.close()
+
+    val inRdd = sc.binaryRecords(outFileName, testOutput.length)
+    // make sure there are enough elements
+    assert(inRdd.count == testOutputCopies)
+
+    // now just compare the first one
+    val indata: Array[Byte] = inRdd.collect.head
+    assert(indata === testOutput)
+  }
+
+  test ("negative binary record length should raise an exception") {
+    // a fixed length of 6 bytes
+    sc = new SparkContext("local", "test")
+
+    val outFile = new File(tempDir, "record-bytestream-00000.bin")
+    val outFileName = outFile.getAbsolutePath()
+
+    // create file
+    val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
+    val testOutputCopies = 10
+
+    // write data to file
+    val file = new java.io.FileOutputStream(outFile)
+    val channel = file.getChannel
+    for(i <- 1 to testOutputCopies) {
+      val bbuf = java.nio.ByteBuffer.wrap(testOutput)
+      channel.write(bbuf)
+    }
+    channel.close()
+    file.close()
+
+    val inRdd = sc.binaryRecords(outFileName, -1)
+
+    intercept[SparkException] {
+      inRdd.count
+    }
+  }
+
   test("file caching") {
     sc = new SparkContext("local", "test")
     val out = new FileWriter(tempDir + "/input")
diff --git a/core/src/test/scala/org/apache/spark/FutureActionSuite.scala b/core/src/test/scala/org/apache/spark/FutureActionSuite.scala
index db9c25fc457a4..f5cdb01ec9504 100644
--- a/core/src/test/scala/org/apache/spark/FutureActionSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FutureActionSuite.scala
@@ -22,7 +22,6 @@ import scala.concurrent.duration.Duration
 
 import org.scalatest.{BeforeAndAfter, FunSuite, Matchers}
 
-import org.apache.spark.SparkContext._
 
 class FutureActionSuite extends FunSuite with BeforeAndAfter with Matchers with LocalSparkContext {
 
diff --git a/core/src/test/scala/org/apache/spark/HashShuffleSuite.scala b/core/src/test/scala/org/apache/spark/HashShuffleSuite.scala
index 2acc02a54fa3d..19180e88ebe0a 100644
--- a/core/src/test/scala/org/apache/spark/HashShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/HashShuffleSuite.scala
@@ -24,10 +24,6 @@ class HashShuffleSuite extends ShuffleSuite with BeforeAndAfterAll {
   // This test suite should run all tests in ShuffleSuite with hash-based shuffle.
 
   override def beforeAll() {
-    System.setProperty("spark.shuffle.manager", "hash")
-  }
-
-  override def afterAll() {
-    System.clearProperty("spark.shuffle.manager")
+    conf.set("spark.shuffle.manager", "hash")
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/ImplicitOrderingSuite.scala b/core/src/test/scala/org/apache/spark/ImplicitOrderingSuite.scala
index 8e4a9e2c9f56c..d895230ecf330 100644
--- a/core/src/test/scala/org/apache/spark/ImplicitOrderingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ImplicitOrderingSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark
 import org.scalatest.FunSuite
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.SparkContext._
 
 class ImplicitOrderingSuite extends FunSuite with LocalSparkContext {
   // Tests that PairRDDFunctions grabs an implicit Ordering in various cases where it should.
diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
index a57430e829ced..21487bc24d58a 100644
--- a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
@@ -27,7 +27,6 @@ import scala.concurrent.future
 import org.scalatest.{BeforeAndAfter, FunSuite}
 import org.scalatest.Matchers
 
-import org.apache.spark.SparkContext._
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskStart}
 
 /**
@@ -41,12 +40,11 @@ class JobCancellationSuite extends FunSuite with Matchers with BeforeAndAfter
   override def afterEach() {
     super.afterEach()
     resetSparkContext()
-    System.clearProperty("spark.scheduler.mode")
   }
 
   test("local mode, FIFO scheduler") {
-    System.setProperty("spark.scheduler.mode", "FIFO")
-    sc = new SparkContext("local[2]", "test")
+    val conf = new SparkConf().set("spark.scheduler.mode", "FIFO")
+    sc = new SparkContext("local[2]", "test", conf)
     testCount()
     testTake()
     // Make sure we can still launch tasks.
@@ -54,10 +52,10 @@ class JobCancellationSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("local mode, fair scheduler") {
-    System.setProperty("spark.scheduler.mode", "FAIR")
+    val conf = new SparkConf().set("spark.scheduler.mode", "FAIR")
     val xmlPath = getClass.getClassLoader.getResource("fairscheduler.xml").getFile()
-    System.setProperty("spark.scheduler.allocation.file", xmlPath)
-    sc = new SparkContext("local[2]", "test")
+    conf.set("spark.scheduler.allocation.file", xmlPath)
+    sc = new SparkContext("local[2]", "test", conf)
     testCount()
     testTake()
     // Make sure we can still launch tasks.
@@ -65,8 +63,8 @@ class JobCancellationSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("cluster mode, FIFO scheduler") {
-    System.setProperty("spark.scheduler.mode", "FIFO")
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    val conf = new SparkConf().set("spark.scheduler.mode", "FIFO")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
     testCount()
     testTake()
     // Make sure we can still launch tasks.
@@ -74,10 +72,10 @@ class JobCancellationSuite extends FunSuite with Matchers with BeforeAndAfter
   }
 
   test("cluster mode, fair scheduler") {
-    System.setProperty("spark.scheduler.mode", "FAIR")
+    val conf = new SparkConf().set("spark.scheduler.mode", "FAIR")
     val xmlPath = getClass.getClassLoader.getResource("fairscheduler.xml").getFile()
-    System.setProperty("spark.scheduler.allocation.file", xmlPath)
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    conf.set("spark.scheduler.allocation.file", xmlPath)
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
     testCount()
     testTake()
     // Make sure we can still launch tasks.
@@ -173,11 +171,11 @@ class JobCancellationSuite extends FunSuite with Matchers with BeforeAndAfter
     assert(jobB.get() === 100)
   }
 
-  ignore("two jobs sharing the same stage") {
+  test("two jobs sharing the same stage") {
     // sem1: make sure cancel is issued after some tasks are launched
-    // sem2: make sure the first stage is not finished until cancel is issued
+    // twoJobsSharingStageSemaphore:
+    //   make sure the first stage is not finished until cancel is issued
     val sem1 = new Semaphore(0)
-    val sem2 = new Semaphore(0)
 
     sc = new SparkContext("local[2]", "test")
     sc.addSparkListener(new SparkListener {
@@ -188,7 +186,7 @@ class JobCancellationSuite extends FunSuite with Matchers with BeforeAndAfter
 
     // Create two actions that would share the some stages.
     val rdd = sc.parallelize(1 to 10, 2).map { i =>
-      sem2.acquire()
+      JobCancellationSuite.twoJobsSharingStageSemaphore.acquire()
       (i, i)
     }.reduceByKey(_+_)
     val f1 = rdd.collectAsync()
@@ -198,13 +196,13 @@ class JobCancellationSuite extends FunSuite with Matchers with BeforeAndAfter
     future {
       sem1.acquire()
       f1.cancel()
-      sem2.release(10)
+      JobCancellationSuite.twoJobsSharingStageSemaphore.release(10)
     }
 
-    // Expect both to fail now.
-    // TODO: update this test when we change Spark so cancelling f1 wouldn't affect f2.
+    // Expect f1 to fail due to cancellation,
     intercept[SparkException] { f1.get() }
-    intercept[SparkException] { f2.get() }
+    // but f2 should not be affected
+    f2.get()
   }
 
   def testCount() {
@@ -270,4 +268,5 @@ class JobCancellationSuite extends FunSuite with Matchers with BeforeAndAfter
 object JobCancellationSuite {
   val taskStartedSemaphore = new Semaphore(0)
   val taskCancelledSemaphore = new Semaphore(0)
+  val twoJobsSharingStageSemaphore = new Semaphore(0)
 }
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index cbc0bd178d894..ccfe0678cb1c3 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.util.AkkaUtils
 
-class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
+class MapOutputTrackerSuite extends FunSuite {
   private val conf = new SparkConf
 
   test("master start and stop") {
@@ -37,6 +37,7 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     tracker.trackerActor =
       actorSystem.actorOf(Props(new MapOutputTrackerMasterActor(tracker, conf)))
     tracker.stop()
+    actorSystem.shutdown()
   }
 
   test("master register shuffle and fetch") {
@@ -56,6 +57,7 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     assert(statuses.toSeq === Seq((BlockManagerId("a", "hostA", 1000), size1000),
                                   (BlockManagerId("b", "hostB", 1000), size10000)))
     tracker.stop()
+    actorSystem.shutdown()
   }
 
   test("master register and unregister shuffle") {
@@ -74,6 +76,9 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     tracker.unregisterShuffle(10)
     assert(!tracker.containsShuffle(10))
     assert(tracker.getServerStatuses(10, 0).isEmpty)
+
+    tracker.stop()
+    actorSystem.shutdown()
   }
 
   test("master register shuffle and unregister map output and fetch") {
@@ -97,6 +102,9 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     // this should cause it to fail, and the scheduler will ignore the failure due to the
     // stage already being aborted.
     intercept[FetchFailedException] { tracker.getServerStatuses(10, 1) }
+
+    tracker.stop()
+    actorSystem.shutdown()
   }
 
   test("remote fetch") {
@@ -112,7 +120,7 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
       securityManager = new SecurityManager(conf))
     val slaveTracker = new MapOutputTrackerWorker(conf)
     val selection = slaveSystem.actorSelection(
-      s"akka.tcp://spark@localhost:$boundPort/user/MapOutputTracker")
+      AkkaUtils.address(AkkaUtils.protocol(slaveSystem), "spark", "localhost", boundPort, "MapOutputTracker"))
     val timeout = AkkaUtils.lookupTimeout(conf)
     slaveTracker.trackerActor = Await.result(selection.resolveOne(timeout), timeout)
 
@@ -136,6 +144,11 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
 
     // failure should be cached
     intercept[FetchFailedException] { slaveTracker.getServerStatuses(10, 0) }
+
+    masterTracker.stop()
+    slaveTracker.stop()
+    actorSystem.shutdown()
+    slaveSystem.shutdown()
   }
 
   test("remote fetch below akka frame size") {
@@ -154,6 +167,9 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
     masterTracker.registerMapOutput(10, 0, MapStatus(
       BlockManagerId("88", "mph", 1000), Array.fill[Long](10)(0)))
     masterActor.receive(GetMapOutputStatuses(10))
+
+//    masterTracker.stop() // this throws an exception
+    actorSystem.shutdown()
   }
 
   test("remote fetch exceeds akka frame size") {
@@ -176,5 +192,8 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
         BlockManagerId("999", "mps", 1000), Array.fill[Long](4000000)(0)))
     }
     intercept[SparkException] { masterActor.receive(GetMapOutputStatuses(20)) }
+
+//    masterTracker.stop() // this throws an exception
+    actorSystem.shutdown()
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/PartitioningSuite.scala b/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
index 646ede30ae6ff..b7532314ada01 100644
--- a/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
+++ b/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
@@ -22,7 +22,6 @@ import scala.math.abs
 
 import org.scalatest.{FunSuite, PrivateMethodTester}
 
-import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.StatCounter
 
diff --git a/core/src/test/scala/org/apache/spark/SSLOptionsSuite.scala b/core/src/test/scala/org/apache/spark/SSLOptionsSuite.scala
new file mode 100644
index 0000000000000..444a33371bd71
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/SSLOptionsSuite.scala
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import java.io.File
+
+import com.google.common.io.Files
+import org.apache.spark.util.Utils
+import org.scalatest.{BeforeAndAfterAll, FunSuite}
+
+class SSLOptionsSuite extends FunSuite with BeforeAndAfterAll {
+
+  test("test resolving property file as spark conf ") {
+    val keyStorePath = new File(this.getClass.getResource("/keystore").toURI).getAbsolutePath
+    val trustStorePath = new File(this.getClass.getResource("/truststore").toURI).getAbsolutePath
+
+    val conf = new SparkConf
+    conf.set("spark.ssl.enabled", "true")
+    conf.set("spark.ssl.keyStore", keyStorePath)
+    conf.set("spark.ssl.keyStorePassword", "password")
+    conf.set("spark.ssl.keyPassword", "password")
+    conf.set("spark.ssl.trustStore", trustStorePath)
+    conf.set("spark.ssl.trustStorePassword", "password")
+    conf.set("spark.ssl.enabledAlgorithms", "TLS_RSA_WITH_AES_128_CBC_SHA, TLS_RSA_WITH_AES_256_CBC_SHA")
+    conf.set("spark.ssl.protocol", "SSLv3")
+
+    val opts = SSLOptions.parse(conf, "spark.ssl")
+
+    assert(opts.enabled === true)
+    assert(opts.trustStore.isDefined === true)
+    assert(opts.trustStore.get.getName === "truststore")
+    assert(opts.trustStore.get.getAbsolutePath === trustStorePath)
+    assert(opts.keyStore.isDefined === true)
+    assert(opts.keyStore.get.getName === "keystore")
+    assert(opts.keyStore.get.getAbsolutePath === keyStorePath)
+    assert(opts.trustStorePassword === Some("password"))
+    assert(opts.keyStorePassword === Some("password"))
+    assert(opts.keyPassword === Some("password"))
+    assert(opts.protocol === Some("SSLv3"))
+    assert(opts.enabledAlgorithms === Set("TLS_RSA_WITH_AES_128_CBC_SHA", "TLS_RSA_WITH_AES_256_CBC_SHA"))
+  }
+
+  test("test resolving property with defaults specified ") {
+    val keyStorePath = new File(this.getClass.getResource("/keystore").toURI).getAbsolutePath
+    val trustStorePath = new File(this.getClass.getResource("/truststore").toURI).getAbsolutePath
+
+    val conf = new SparkConf
+    conf.set("spark.ssl.enabled", "true")
+    conf.set("spark.ssl.keyStore", keyStorePath)
+    conf.set("spark.ssl.keyStorePassword", "password")
+    conf.set("spark.ssl.keyPassword", "password")
+    conf.set("spark.ssl.trustStore", trustStorePath)
+    conf.set("spark.ssl.trustStorePassword", "password")
+    conf.set("spark.ssl.enabledAlgorithms", "TLS_RSA_WITH_AES_128_CBC_SHA, TLS_RSA_WITH_AES_256_CBC_SHA")
+    conf.set("spark.ssl.protocol", "SSLv3")
+
+    val defaultOpts = SSLOptions.parse(conf, "spark.ssl", defaults = None)
+    val opts = SSLOptions.parse(conf, "spark.ui.ssl", defaults = Some(defaultOpts))
+
+    assert(opts.enabled === true)
+    assert(opts.trustStore.isDefined === true)
+    assert(opts.trustStore.get.getName === "truststore")
+    assert(opts.trustStore.get.getAbsolutePath === trustStorePath)
+    assert(opts.keyStore.isDefined === true)
+    assert(opts.keyStore.get.getName === "keystore")
+    assert(opts.keyStore.get.getAbsolutePath === keyStorePath)
+    assert(opts.trustStorePassword === Some("password"))
+    assert(opts.keyStorePassword === Some("password"))
+    assert(opts.keyPassword === Some("password"))
+    assert(opts.protocol === Some("SSLv3"))
+    assert(opts.enabledAlgorithms === Set("TLS_RSA_WITH_AES_128_CBC_SHA", "TLS_RSA_WITH_AES_256_CBC_SHA"))
+  }
+
+  test("test whether defaults can be overridden ") {
+    val keyStorePath = new File(this.getClass.getResource("/keystore").toURI).getAbsolutePath
+    val trustStorePath = new File(this.getClass.getResource("/truststore").toURI).getAbsolutePath
+
+    val conf = new SparkConf
+    conf.set("spark.ssl.enabled", "true")
+    conf.set("spark.ui.ssl.enabled", "false")
+    conf.set("spark.ssl.keyStore", keyStorePath)
+    conf.set("spark.ssl.keyStorePassword", "password")
+    conf.set("spark.ui.ssl.keyStorePassword", "12345")
+    conf.set("spark.ssl.keyPassword", "password")
+    conf.set("spark.ssl.trustStore", trustStorePath)
+    conf.set("spark.ssl.trustStorePassword", "password")
+    conf.set("spark.ssl.enabledAlgorithms", "TLS_RSA_WITH_AES_128_CBC_SHA, TLS_RSA_WITH_AES_256_CBC_SHA")
+    conf.set("spark.ui.ssl.enabledAlgorithms", "ABC, DEF")
+    conf.set("spark.ssl.protocol", "SSLv3")
+
+    val defaultOpts = SSLOptions.parse(conf, "spark.ssl", defaults = None)
+    val opts = SSLOptions.parse(conf, "spark.ui.ssl", defaults = Some(defaultOpts))
+
+    assert(opts.enabled === false)
+    assert(opts.trustStore.isDefined === true)
+    assert(opts.trustStore.get.getName === "truststore")
+    assert(opts.trustStore.get.getAbsolutePath === trustStorePath)
+    assert(opts.keyStore.isDefined === true)
+    assert(opts.keyStore.get.getName === "keystore")
+    assert(opts.keyStore.get.getAbsolutePath === keyStorePath)
+    assert(opts.trustStorePassword === Some("password"))
+    assert(opts.keyStorePassword === Some("12345"))
+    assert(opts.keyPassword === Some("password"))
+    assert(opts.protocol === Some("SSLv3"))
+    assert(opts.enabledAlgorithms === Set("ABC", "DEF"))
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/SSLSampleConfigs.scala b/core/src/test/scala/org/apache/spark/SSLSampleConfigs.scala
new file mode 100644
index 0000000000000..ace8123a8961f
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/SSLSampleConfigs.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import java.io.File
+
+object SSLSampleConfigs {
+  val keyStorePath = new File(this.getClass.getResource("/keystore").toURI).getAbsolutePath
+  val untrustedKeyStorePath = new File(this.getClass.getResource("/untrusted-keystore").toURI).getAbsolutePath
+  val trustStorePath = new File(this.getClass.getResource("/truststore").toURI).getAbsolutePath
+
+  def sparkSSLConfig() = {
+    val conf = new SparkConf(loadDefaults = false)
+    conf.set("spark.ssl.enabled", "true")
+    conf.set("spark.ssl.keyStore", keyStorePath)
+    conf.set("spark.ssl.keyStorePassword", "password")
+    conf.set("spark.ssl.keyPassword", "password")
+    conf.set("spark.ssl.trustStore", trustStorePath)
+    conf.set("spark.ssl.trustStorePassword", "password")
+    conf.set("spark.ssl.enabledAlgorithms",
+      "TLS_RSA_WITH_AES_128_CBC_SHA, SSL_RSA_WITH_DES_CBC_SHA")
+    conf.set("spark.ssl.protocol", "TLSv1")
+    conf
+  }
+
+  def sparkSSLConfigUntrusted() = {
+    val conf = new SparkConf(loadDefaults = false)
+    conf.set("spark.ssl.enabled", "true")
+    conf.set("spark.ssl.keyStore", untrustedKeyStorePath)
+    conf.set("spark.ssl.keyStorePassword", "password")
+    conf.set("spark.ssl.keyPassword", "password")
+    conf.set("spark.ssl.trustStore", trustStorePath)
+    conf.set("spark.ssl.trustStorePassword", "password")
+    conf.set("spark.ssl.enabledAlgorithms",
+      "TLS_RSA_WITH_AES_128_CBC_SHA, SSL_RSA_WITH_DES_CBC_SHA")
+    conf.set("spark.ssl.protocol", "TLSv1")
+    conf
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
index fcca0867b8072..43fbd3ff3f756 100644
--- a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark
 
-import scala.collection.mutable.ArrayBuffer
+import java.io.File
 
 import org.scalatest.FunSuite
 
@@ -125,6 +125,54 @@ class SecurityManagerSuite extends FunSuite {
 
   }
 
+  test("ssl on setup") {
+    val conf = SSLSampleConfigs.sparkSSLConfig()
+
+    val securityManager = new SecurityManager(conf)
+
+    assert(securityManager.fileServerSSLOptions.enabled === true)
+    assert(securityManager.akkaSSLOptions.enabled === true)
+
+    assert(securityManager.sslSocketFactory.isDefined === true)
+    assert(securityManager.hostnameVerifier.isDefined === true)
+
+    assert(securityManager.fileServerSSLOptions.trustStore.isDefined === true)
+    assert(securityManager.fileServerSSLOptions.trustStore.get.getName === "truststore")
+    assert(securityManager.fileServerSSLOptions.keyStore.isDefined === true)
+    assert(securityManager.fileServerSSLOptions.keyStore.get.getName === "keystore")
+    assert(securityManager.fileServerSSLOptions.trustStorePassword === Some("password"))
+    assert(securityManager.fileServerSSLOptions.keyStorePassword === Some("password"))
+    assert(securityManager.fileServerSSLOptions.keyPassword === Some("password"))
+    assert(securityManager.fileServerSSLOptions.protocol === Some("TLSv1"))
+    assert(securityManager.fileServerSSLOptions.enabledAlgorithms ===
+        Set("TLS_RSA_WITH_AES_128_CBC_SHA", "SSL_RSA_WITH_DES_CBC_SHA"))
+
+    assert(securityManager.akkaSSLOptions.trustStore.isDefined === true)
+    assert(securityManager.akkaSSLOptions.trustStore.get.getName === "truststore")
+    assert(securityManager.akkaSSLOptions.keyStore.isDefined === true)
+    assert(securityManager.akkaSSLOptions.keyStore.get.getName === "keystore")
+    assert(securityManager.akkaSSLOptions.trustStorePassword === Some("password"))
+    assert(securityManager.akkaSSLOptions.keyStorePassword === Some("password"))
+    assert(securityManager.akkaSSLOptions.keyPassword === Some("password"))
+    assert(securityManager.akkaSSLOptions.protocol === Some("TLSv1"))
+    assert(securityManager.akkaSSLOptions.enabledAlgorithms ===
+        Set("TLS_RSA_WITH_AES_128_CBC_SHA", "SSL_RSA_WITH_DES_CBC_SHA"))
+  }
+
+  test("ssl off setup") {
+    val file = File.createTempFile("SSLOptionsSuite", "conf")
+    file.deleteOnExit()
+
+    System.setProperty("spark.ssl.configFile", file.getAbsolutePath)
+    val conf = new SparkConf()
+
+    val securityManager = new SecurityManager(conf)
+
+    assert(securityManager.fileServerSSLOptions.enabled === false)
+    assert(securityManager.akkaSSLOptions.enabled === false)
+    assert(securityManager.sslSocketFactory.isDefined === false)
+    assert(securityManager.hostnameVerifier.isDefined === false)
+  }
 
 }
 
diff --git a/core/src/test/scala/org/apache/spark/SharedSparkContext.scala b/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
index 0b6511a80df1d..3d2700b7e6be4 100644
--- a/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
+++ b/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
@@ -30,7 +30,7 @@ trait SharedSparkContext extends BeforeAndAfterAll { self: Suite =>
   var conf = new SparkConf(false)
 
   override def beforeAll() {
-    _sc = new SparkContext("local", "test", conf)
+    _sc = new SparkContext("local[4]", "test", conf)
     super.beforeAll()
   }
 
diff --git a/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala b/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala
index d7b2d2e1e330f..d78c99c2e1e06 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala
@@ -24,10 +24,6 @@ class ShuffleNettySuite extends ShuffleSuite with BeforeAndAfterAll {
   // This test suite should run all tests in ShuffleSuite with Netty shuffle mode.
 
   override def beforeAll() {
-    System.setProperty("spark.shuffle.use.netty", "true")
-  }
-
-  override def afterAll() {
-    System.clearProperty("spark.shuffle.use.netty")
+    conf.set("spark.shuffle.blockTransferService", "netty")
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index 2bdd84ce69ab8..f57921b768310 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -20,34 +20,34 @@ package org.apache.spark
 import org.scalatest.FunSuite
 import org.scalatest.Matchers
 
-import org.apache.spark.SparkContext._
 import org.apache.spark.ShuffleSuite.NonJavaSerializableClass
 import org.apache.spark.rdd.{CoGroupedRDD, OrderedRDDFunctions, RDD, ShuffledRDD, SubtractedRDD}
 import org.apache.spark.serializer.KryoSerializer
+import org.apache.spark.storage.{ShuffleDataBlockId, ShuffleBlockId}
 import org.apache.spark.util.MutablePair
 
 abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
 
   val conf = new SparkConf(loadDefaults = false)
 
+  // Ensure that the DAGScheduler doesn't retry stages whose fetches fail, so that we accurately
+  // test that the shuffle works (rather than retrying until all blocks are local to one Executor).
+  conf.set("spark.test.noStageRetry", "true")
+
   test("groupByKey without compression") {
-    try {
-      System.setProperty("spark.shuffle.compress", "false")
-      sc = new SparkContext("local", "test")
-      val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (2, 1)), 4)
-      val groups = pairs.groupByKey(4).collect()
-      assert(groups.size === 2)
-      val valuesFor1 = groups.find(_._1 == 1).get._2
-      assert(valuesFor1.toList.sorted === List(1, 2, 3))
-      val valuesFor2 = groups.find(_._1 == 2).get._2
-      assert(valuesFor2.toList.sorted === List(1))
-    } finally {
-      System.setProperty("spark.shuffle.compress", "true")
-    }
+    val myConf = conf.clone().set("spark.shuffle.compress", "false")
+    sc = new SparkContext("local", "test", myConf)
+    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (2, 1)), 4)
+    val groups = pairs.groupByKey(4).collect()
+    assert(groups.size === 2)
+    val valuesFor1 = groups.find(_._1 == 1).get._2
+    assert(valuesFor1.toList.sorted === List(1, 2, 3))
+    val valuesFor2 = groups.find(_._1 == 2).get._2
+    assert(valuesFor2.toList.sorted === List(1))
   }
 
   test("shuffle non-zero block size") {
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
     val NUM_BLOCKS = 3
 
     val a = sc.parallelize(1 to 10, 2)
@@ -73,7 +73,7 @@ abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContex
 
   test("shuffle serializer") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
     val a = sc.parallelize(1 to 10, 2)
     val b = a.map { x =>
       (x, new NonJavaSerializableClass(x * 2))
@@ -89,16 +89,16 @@ abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContex
 
   test("zero sized blocks") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
 
-    // 10 partitions from 4 keys
-    val NUM_BLOCKS = 10
+    // 201 partitions (greater than "spark.shuffle.sort.bypassMergeThreshold") from 4 keys
+    val NUM_BLOCKS = 201
     val a = sc.parallelize(1 to 4, NUM_BLOCKS)
     val b = a.map(x => (x, x*2))
 
     // NOTE: The default Java serializer doesn't create zero-sized blocks.
     //       So, use Kryo
-    val c = new ShuffledRDD[Int, Int, Int](b, new HashPartitioner(10))
+    val c = new ShuffledRDD[Int, Int, Int](b, new HashPartitioner(NUM_BLOCKS))
       .setSerializer(new KryoSerializer(conf))
 
     val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]].shuffleId
@@ -116,15 +116,15 @@ abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContex
 
   test("zero sized blocks without kryo") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
 
-    // 10 partitions from 4 keys
-    val NUM_BLOCKS = 10
+    // 201 partitions (greater than "spark.shuffle.sort.bypassMergeThreshold") from 4 keys
+    val NUM_BLOCKS = 201
     val a = sc.parallelize(1 to 4, NUM_BLOCKS)
     val b = a.map(x => (x, x*2))
 
     // NOTE: The default Java serializer should create zero-sized blocks
-    val c = new ShuffledRDD[Int, Int, Int](b, new HashPartitioner(10))
+    val c = new ShuffledRDD[Int, Int, Int](b, new HashPartitioner(NUM_BLOCKS))
 
     val shuffleId = c.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]].shuffleId
     assert(c.count === 4)
@@ -141,7 +141,7 @@ abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContex
 
   test("shuffle on mutable pairs") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
     def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2)
     val data = Array(p(1, 1), p(1, 2), p(1, 3), p(2, 1))
     val pairs: RDD[MutablePair[Int, Int]] = sc.parallelize(data, 2)
@@ -154,7 +154,7 @@ abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContex
   test("sorting on mutable pairs") {
     // This is not in SortingSuite because of the local cluster setup.
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
     def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2)
     val data = Array(p(1, 11), p(3, 33), p(100, 100), p(2, 22))
     val pairs: RDD[MutablePair[Int, Int]] = sc.parallelize(data, 2)
@@ -168,7 +168,7 @@ abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContex
 
   test("cogroup using mutable pairs") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
     def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2)
     val data1 = Seq(p(1, 1), p(1, 2), p(1, 3), p(2, 1))
     val data2 = Seq(p(1, "11"), p(1, "12"), p(2, "22"), p(3, "3"))
@@ -195,7 +195,7 @@ abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContex
 
   test("subtract mutable pairs") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
     def p[T1, T2](_1: T1, _2: T2) = MutablePair(_1, _2)
     val data1 = Seq(p(1, 1), p(1, 2), p(1, 3), p(2, 1), p(3, 33))
     val data2 = Seq(p(1, "11"), p(1, "12"), p(2, "22"))
@@ -209,11 +209,8 @@ abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContex
 
   test("sort with Java non serializable class - Kryo") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    val conf = new SparkConf()
-      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-      .setAppName("test")
-      .setMaster("local-cluster[2,1,512]")
-    sc = new SparkContext(conf)
+    val myConf = conf.clone().set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+    sc = new SparkContext("local-cluster[2,1,512]", "test", myConf)
     val a = sc.parallelize(1 to 10, 2)
     val b = a.map { x =>
       (new NonJavaSerializableClass(x), x)
@@ -226,10 +223,7 @@ abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContex
 
   test("sort with Java non serializable class - Java") {
     // Use a local cluster with 2 processes to make sure there are both local and remote blocks
-    val conf = new SparkConf()
-      .setAppName("test")
-      .setMaster("local-cluster[2,1,512]")
-    sc = new SparkContext(conf)
+    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)
     val a = sc.parallelize(1 to 10, 2)
     val b = a.map { x =>
       (new NonJavaSerializableClass(x), x)
@@ -266,13 +260,34 @@ abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContex
       }
     }
   }
+
+  test("[SPARK-4085] rerun map stage if reduce stage cannot find its local shuffle file") {
+    val myConf = conf.clone().set("spark.test.noStageRetry", "false")
+    sc = new SparkContext("local", "test", myConf)
+    val rdd = sc.parallelize(1 to 10, 2).map((_, 1)).reduceByKey(_ + _)
+    rdd.count()
+
+    // Delete one of the local shuffle blocks.
+    val hashFile = sc.env.blockManager.diskBlockManager.getFile(new ShuffleBlockId(0, 0, 0))
+    val sortFile = sc.env.blockManager.diskBlockManager.getFile(new ShuffleDataBlockId(0, 0, 0))
+    assert(hashFile.exists() || sortFile.exists())
+
+    if (hashFile.exists()) {
+      hashFile.delete()
+    }
+    if (sortFile.exists()) {
+      sortFile.delete()
+    }
+
+    // This count should retry the execution of the previous stage and rerun shuffle.
+    rdd.count()
+  }
 }
 
 object ShuffleSuite {
 
   def mergeCombineException(x: Int, y: Int): Int = {
     throw new SparkException("Exception for map-side combine.")
-    x + y
   }
 
   class NonJavaSerializableClass(val value: Int) extends Comparable[NonJavaSerializableClass] {
diff --git a/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala b/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala
index 639e56c488db4..63358172ea1f4 100644
--- a/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala
@@ -24,10 +24,6 @@ class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll {
   // This test suite should run all tests in ShuffleSuite with sort-based shuffle.
 
   override def beforeAll() {
-    System.setProperty("spark.shuffle.manager", "sort")
-  }
-
-  override def afterAll() {
-    System.clearProperty("spark.shuffle.manager")
+    conf.set("spark.shuffle.manager", "sort")
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 5d018ea9868a7..e08210ae60d17 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -17,29 +17,26 @@
 
 package org.apache.spark
 
+import java.util.concurrent.{TimeUnit, Executors}
+
+import scala.util.{Try, Random}
+
 import org.scalatest.FunSuite
 import org.apache.spark.serializer.{KryoRegistrator, KryoSerializer}
+import org.apache.spark.util.ResetSystemProperties
 import com.esotericsoftware.kryo.Kryo
 
-class SparkConfSuite extends FunSuite with LocalSparkContext {
+class SparkConfSuite extends FunSuite with LocalSparkContext with ResetSystemProperties {
   test("loading from system properties") {
-    try {
-      System.setProperty("spark.test.testProperty", "2")
-      val conf = new SparkConf()
-      assert(conf.get("spark.test.testProperty") === "2")
-    } finally {
-      System.clearProperty("spark.test.testProperty")
-    }
+    System.setProperty("spark.test.testProperty", "2")
+    val conf = new SparkConf()
+    assert(conf.get("spark.test.testProperty") === "2")
   }
 
   test("initializing without loading defaults") {
-    try {
-      System.setProperty("spark.test.testProperty", "2")
-      val conf = new SparkConf(false)
-      assert(!conf.contains("spark.test.testProperty"))
-    } finally {
-      System.clearProperty("spark.test.testProperty")
-    }
+    System.setProperty("spark.test.testProperty", "2")
+    val conf = new SparkConf(false)
+    assert(!conf.contains("spark.test.testProperty"))
   }
 
   test("named set methods") {
@@ -117,22 +114,37 @@ class SparkConfSuite extends FunSuite with LocalSparkContext {
 
   test("nested property names") {
     // This wasn't supported by some external conf parsing libraries
+    System.setProperty("spark.test.a", "a")
+    System.setProperty("spark.test.a.b", "a.b")
+    System.setProperty("spark.test.a.b.c", "a.b.c")
+    val conf = new SparkConf()
+    assert(conf.get("spark.test.a") === "a")
+    assert(conf.get("spark.test.a.b") === "a.b")
+    assert(conf.get("spark.test.a.b.c") === "a.b.c")
+    conf.set("spark.test.a.b", "A.B")
+    assert(conf.get("spark.test.a") === "a")
+    assert(conf.get("spark.test.a.b") === "A.B")
+    assert(conf.get("spark.test.a.b.c") === "a.b.c")
+  }
+
+  test("Thread safeness - SPARK-5425") {
+    import scala.collection.JavaConversions._
+    val executor = Executors.newSingleThreadScheduledExecutor()
+    val sf = executor.scheduleAtFixedRate(new Runnable {
+      override def run(): Unit =
+        System.setProperty("spark.5425." + Random.nextInt(), Random.nextInt().toString)
+    }, 0, 1, TimeUnit.MILLISECONDS)
+
     try {
-      System.setProperty("spark.test.a", "a")
-      System.setProperty("spark.test.a.b", "a.b")
-      System.setProperty("spark.test.a.b.c", "a.b.c")
-      val conf = new SparkConf()
-      assert(conf.get("spark.test.a") === "a")
-      assert(conf.get("spark.test.a.b") === "a.b")
-      assert(conf.get("spark.test.a.b.c") === "a.b.c")
-      conf.set("spark.test.a.b", "A.B")
-      assert(conf.get("spark.test.a") === "a")
-      assert(conf.get("spark.test.a.b") === "A.B")
-      assert(conf.get("spark.test.a.b.c") === "a.b.c")
+      val t0 = System.currentTimeMillis()
+      while ((System.currentTimeMillis() - t0) < 1000) {
+        val conf = Try(new SparkConf(loadDefaults = true))
+        assert(conf.isSuccess === true)
+      }
     } finally {
-      System.clearProperty("spark.test.a")
-      System.clearProperty("spark.test.a.b")
-      System.clearProperty("spark.test.a.b.c")
+      executor.shutdownNow()
+      for (key <- System.getProperties.stringPropertyNames() if key.startsWith("spark.5425."))
+        System.getProperties.remove(key)
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
index 495a0d48633a4..bbed8ddc6bafc 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
@@ -17,22 +17,26 @@
 
 package org.apache.spark
 
-import org.scalatest.{BeforeAndAfterEach, FunSuite, PrivateMethodTester}
+import org.scalatest.{FunSuite, PrivateMethodTester}
 
-import org.apache.spark.scheduler.{TaskScheduler, TaskSchedulerImpl}
+import org.apache.spark.scheduler.{SchedulerBackend, TaskScheduler, TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.{SimrSchedulerBackend, SparkDeploySchedulerBackend}
 import org.apache.spark.scheduler.cluster.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend}
 import org.apache.spark.scheduler.local.LocalBackend
 
 class SparkContextSchedulerCreationSuite
-  extends FunSuite with PrivateMethodTester with Logging with BeforeAndAfterEach {
+  extends FunSuite with LocalSparkContext with PrivateMethodTester with Logging {
 
-  def createTaskScheduler(master: String): TaskSchedulerImpl = {
+  def createTaskScheduler(master: String): TaskSchedulerImpl =
+    createTaskScheduler(master, new SparkConf())
+
+  def createTaskScheduler(master: String, conf: SparkConf): TaskSchedulerImpl = {
     // Create local SparkContext to setup a SparkEnv. We don't actually want to start() the
     // real schedulers, so we don't want to create a full SparkContext with the desired scheduler.
-    val sc = new SparkContext("local", "test")
-    val createTaskSchedulerMethod = PrivateMethod[TaskScheduler]('createTaskScheduler)
-    val sched = SparkContext invokePrivate createTaskSchedulerMethod(sc, master)
+    sc = new SparkContext("local", "test", conf)
+    val createTaskSchedulerMethod =
+      PrivateMethod[Tuple2[SchedulerBackend, TaskScheduler]]('createTaskScheduler)
+    val (_, sched) = SparkContext invokePrivate createTaskSchedulerMethod(sc, master)
     sched.asInstanceOf[TaskSchedulerImpl]
   }
 
@@ -101,19 +105,13 @@ class SparkContextSchedulerCreationSuite
   }
 
   test("local-default-parallelism") {
-    val defaultParallelism = System.getProperty("spark.default.parallelism")
-    System.setProperty("spark.default.parallelism", "16")
-    val sched = createTaskScheduler("local")
+    val conf = new SparkConf().set("spark.default.parallelism", "16")
+    val sched = createTaskScheduler("local", conf)
 
     sched.backend match {
       case s: LocalBackend => assert(s.defaultParallelism() === 16)
       case _ => fail()
     }
-
-    Option(defaultParallelism) match {
-      case Some(v) => System.setProperty("spark.default.parallelism", v)
-      case _ => System.clearProperty("spark.default.parallelism")
-    }
   }
 
   test("simr") {
@@ -151,12 +149,13 @@ class SparkContextSchedulerCreationSuite
   }
 
   test("yarn-client") {
-    testYarn("yarn-client", "org.apache.spark.scheduler.cluster.YarnClientClusterScheduler")
+    testYarn("yarn-client", "org.apache.spark.scheduler.cluster.YarnScheduler")
   }
 
-  def testMesos(master: String, expectedClass: Class[_]) {
+  def testMesos(master: String, expectedClass: Class[_], coarse: Boolean) {
+    val conf = new SparkConf().set("spark.mesos.coarse", coarse.toString)
     try {
-      val sched = createTaskScheduler(master)
+      val sched = createTaskScheduler(master, conf)
       assert(sched.backend.getClass === expectedClass)
     } catch {
       case e: UnsatisfiedLinkError =>
@@ -167,17 +166,14 @@ class SparkContextSchedulerCreationSuite
   }
 
   test("mesos fine-grained") {
-    System.setProperty("spark.mesos.coarse", "false")
-    testMesos("mesos://localhost:1234", classOf[MesosSchedulerBackend])
+    testMesos("mesos://localhost:1234", classOf[MesosSchedulerBackend], coarse = false)
   }
 
   test("mesos coarse-grained") {
-    System.setProperty("spark.mesos.coarse", "true")
-    testMesos("mesos://localhost:1234", classOf[CoarseMesosSchedulerBackend])
+    testMesos("mesos://localhost:1234", classOf[CoarseMesosSchedulerBackend], coarse = true)
   }
 
   test("mesos with zookeeper") {
-    System.setProperty("spark.mesos.coarse", "false")
-    testMesos("zk://localhost:1234,localhost:2345", classOf[MesosSchedulerBackend])
+    testMesos("zk://localhost:1234,localhost:2345", classOf[MesosSchedulerBackend], coarse = false)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 31edad1c56c73..8b3c6871a7b39 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -21,15 +21,50 @@ import org.scalatest.FunSuite
 
 import org.apache.hadoop.io.BytesWritable
 
-class SparkContextSuite extends FunSuite {
-  //Regression test for SPARK-3121
+class SparkContextSuite extends FunSuite with LocalSparkContext {
+
+  test("Only one SparkContext may be active at a time") {
+    // Regression test for SPARK-4180
+    val conf = new SparkConf().setAppName("test").setMaster("local")
+      .set("spark.driver.allowMultipleContexts", "false")
+    sc = new SparkContext(conf)
+    // A SparkContext is already running, so we shouldn't be able to create a second one
+    intercept[SparkException] { new SparkContext(conf) }
+    // After stopping the running context, we should be able to create a new one
+    resetSparkContext()
+    sc = new SparkContext(conf)
+  }
+
+  test("Can still construct a new SparkContext after failing to construct a previous one") {
+    val conf = new SparkConf().set("spark.driver.allowMultipleContexts", "false")
+    // This is an invalid configuration (no app name or master URL)
+    intercept[SparkException] {
+      new SparkContext(conf)
+    }
+    // Even though those earlier calls failed, we should still be able to create a new context
+    sc = new SparkContext(conf.setMaster("local").setAppName("test"))
+  }
+
+  test("Check for multiple SparkContexts can be disabled via undocumented debug option") {
+    var secondSparkContext: SparkContext = null
+    try {
+      val conf = new SparkConf().setAppName("test").setMaster("local")
+        .set("spark.driver.allowMultipleContexts", "true")
+      sc = new SparkContext(conf)
+      secondSparkContext = new SparkContext(conf)
+    } finally {
+      Option(secondSparkContext).foreach(_.stop())
+    }
+  }
+
   test("BytesWritable implicit conversion is correct") {
+    // Regression test for SPARK-3121
     val bytesWritable = new BytesWritable()
     val inputArray = (1 to 10).map(_.toByte).toArray
     bytesWritable.set(inputArray, 0, 10)
     bytesWritable.set(inputArray, 0, 5)
 
-    val converter = SparkContext.bytesWritableConverter()
+    val converter = WritableConverter.bytesWritableConverter()
     val byteArray = converter.convert(bytesWritable)
     assert(byteArray.length === 5)
 
diff --git a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
new file mode 100644
index 0000000000000..41d6ea29d5b06
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import scala.concurrent.duration._
+import scala.language.implicitConversions
+import scala.language.postfixOps
+
+import org.scalatest.{Matchers, FunSuite}
+import org.scalatest.concurrent.Eventually._
+
+import org.apache.spark.JobExecutionStatus._
+
+class StatusTrackerSuite extends FunSuite with Matchers with LocalSparkContext {
+
+  test("basic status API usage") {
+    sc = new SparkContext("local", "test", new SparkConf(false))
+    val jobFuture = sc.parallelize(1 to 10000, 2).map(identity).groupBy(identity).collectAsync()
+    val jobId: Int = eventually(timeout(10 seconds)) {
+      val jobIds = jobFuture.jobIds
+      jobIds.size should be(1)
+      jobIds.head
+    }
+    val jobInfo = eventually(timeout(10 seconds)) {
+      sc.statusTracker.getJobInfo(jobId).get
+    }
+    jobInfo.status() should not be FAILED
+    val stageIds = jobInfo.stageIds()
+    stageIds.size should be(2)
+
+    val firstStageInfo = eventually(timeout(10 seconds)) {
+      sc.statusTracker.getStageInfo(stageIds(0)).get
+    }
+    firstStageInfo.stageId() should be(stageIds(0))
+    firstStageInfo.currentAttemptId() should be(0)
+    firstStageInfo.numTasks() should be(2)
+    eventually(timeout(10 seconds)) {
+      val updatedFirstStageInfo = sc.statusTracker.getStageInfo(stageIds(0)).get
+      updatedFirstStageInfo.numCompletedTasks() should be(2)
+      updatedFirstStageInfo.numActiveTasks() should be(0)
+      updatedFirstStageInfo.numFailedTasks() should be(0)
+    }
+  }
+
+  test("getJobIdsForGroup()") {
+    sc = new SparkContext("local", "test", new SparkConf(false))
+    // Passing `null` should return jobs that were not run in a job group:
+    val defaultJobGroupFuture = sc.parallelize(1 to 1000).countAsync()
+    val defaultJobGroupJobId = eventually(timeout(10 seconds)) {
+      defaultJobGroupFuture.jobIds.head
+    }
+    eventually(timeout(10 seconds)) {
+      sc.statusTracker.getJobIdsForGroup(null).toSet should be (Set(defaultJobGroupJobId))
+    }
+    // Test jobs submitted in job groups:
+    sc.setJobGroup("my-job-group", "description")
+    sc.statusTracker.getJobIdsForGroup("my-job-group") should be (Seq.empty)
+    val firstJobFuture = sc.parallelize(1 to 1000).countAsync()
+    val firstJobId = eventually(timeout(10 seconds)) {
+      firstJobFuture.jobIds.head
+    }
+    eventually(timeout(10 seconds)) {
+      sc.statusTracker.getJobIdsForGroup("my-job-group") should be (Seq(firstJobId))
+    }
+    val secondJobFuture = sc.parallelize(1 to 1000).countAsync()
+    val secondJobId = eventually(timeout(10 seconds)) {
+      secondJobFuture.jobIds.head
+    }
+    eventually(timeout(10 seconds)) {
+      sc.statusTracker.getJobIdsForGroup("my-job-group").toSet should be (Set(firstJobId, secondJobId))
+    }
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/scala/org/apache/spark/api/python/PythonBroadcastSuite.scala b/core/src/test/scala/org/apache/spark/api/python/PythonBroadcastSuite.scala
new file mode 100644
index 0000000000000..8959a843dbd7d
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/api/python/PythonBroadcastSuite.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.python
+
+import scala.io.Source
+
+import java.io.{PrintWriter, File}
+
+import org.scalatest.{Matchers, FunSuite}
+
+import org.apache.spark.{SharedSparkContext, SparkConf}
+import org.apache.spark.serializer.KryoSerializer
+import org.apache.spark.util.Utils
+
+// This test suite uses SharedSparkContext because we need a SparkEnv in order to deserialize
+// a PythonBroadcast:
+class PythonBroadcastSuite extends FunSuite with Matchers with SharedSparkContext {
+  test("PythonBroadcast can be serialized with Kryo (SPARK-4882)") {
+    val tempDir = Utils.createTempDir()
+    val broadcastedString = "Hello, world!"
+    def assertBroadcastIsValid(broadcast: PythonBroadcast): Unit = {
+      val source = Source.fromFile(broadcast.path)
+      val contents = source.mkString
+      source.close()
+      contents should be (broadcastedString)
+    }
+    try {
+      val broadcastDataFile: File = {
+        val file = new File(tempDir, "broadcastData")
+        val printWriter = new PrintWriter(file)
+        printWriter.write(broadcastedString)
+        printWriter.close()
+        file
+      }
+      val broadcast = new PythonBroadcast(broadcastDataFile.getAbsolutePath)
+      assertBroadcastIsValid(broadcast)
+      val conf = new SparkConf().set("spark.kryo.registrationRequired", "true")
+      val deserializedBroadcast =
+        Utils.clone[PythonBroadcast](broadcast, new KryoSerializer(conf).newInstance())
+      assertBroadcastIsValid(deserializedBroadcast)
+    } finally {
+      Utils.deleteRecursively(tempDir)
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/api/python/PythonRDDSuite.scala b/core/src/test/scala/org/apache/spark/api/python/PythonRDDSuite.scala
index 7b866f08a0e9f..c63d834f9048b 100644
--- a/core/src/test/scala/org/apache/spark/api/python/PythonRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/api/python/PythonRDDSuite.scala
@@ -23,11 +23,22 @@ import org.scalatest.FunSuite
 
 class PythonRDDSuite extends FunSuite {
 
-    test("Writing large strings to the worker") {
-        val input: List[String] = List("a"*100000)
-        val buffer = new DataOutputStream(new ByteArrayOutputStream)
-        PythonRDD.writeIteratorToStream(input.iterator, buffer)
-    }
+  test("Writing large strings to the worker") {
+    val input: List[String] = List("a"*100000)
+    val buffer = new DataOutputStream(new ByteArrayOutputStream)
+    PythonRDD.writeIteratorToStream(input.iterator, buffer)
+  }
 
+  test("Handle nulls gracefully") {
+    val buffer = new DataOutputStream(new ByteArrayOutputStream)
+    // Should not have NPE when write an Iterator with null in it
+    // The correctness will be tested in Python
+    PythonRDD.writeIteratorToStream(Iterator("a", null), buffer)
+    PythonRDD.writeIteratorToStream(Iterator(null, "a"), buffer)
+    PythonRDD.writeIteratorToStream(Iterator("a".getBytes, null), buffer)
+    PythonRDD.writeIteratorToStream(Iterator(null, "a".getBytes), buffer)
+    PythonRDD.writeIteratorToStream(Iterator((null, null), ("a", null), (null, "b")), buffer)
+    PythonRDD.writeIteratorToStream(
+      Iterator((null, null), ("a".getBytes, null), (null, "b".getBytes)), buffer)
+  }
 }
-
diff --git a/core/src/test/scala/org/apache/spark/api/python/SerDeUtilSuite.scala b/core/src/test/scala/org/apache/spark/api/python/SerDeUtilSuite.scala
new file mode 100644
index 0000000000000..f8c39326145e1
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/api/python/SerDeUtilSuite.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.python
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SharedSparkContext
+
+class SerDeUtilSuite extends FunSuite with SharedSparkContext {
+
+  test("Converting an empty pair RDD to python does not throw an exception (SPARK-5441)") {
+    val emptyRdd = sc.makeRDD(Seq[(Any, Any)]())
+    SerDeUtil.pairRDDToPython(emptyRdd, 10)
+  }
+
+  test("Converting an empty python RDD to pair RDD does not throw an exception (SPARK-5441)") {
+    val emptyRdd = sc.makeRDD(Seq[(Any, Any)]())
+    val javaRdd = emptyRdd.toJavaRDD()
+    val pythonRdd = SerDeUtil.javaToPython(javaRdd)
+    SerDeUtil.pythonToPairRDD(pythonRdd, false)
+  }
+}
+
diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index e096c8c3e9b46..af3272692d7a1 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -19,13 +19,30 @@ package org.apache.spark.broadcast
 
 import scala.util.Random
 
-import org.scalatest.FunSuite
+import org.scalatest.{Assertions, FunSuite}
 
-import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException}
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException, SparkEnv}
 import org.apache.spark.io.SnappyCompressionCodec
+import org.apache.spark.rdd.RDD
 import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.storage._
 
+// Dummy class that creates a broadcast variable but doesn't use it
+class DummyBroadcastClass(rdd: RDD[Int]) extends Serializable {
+  @transient val list = List(1, 2, 3, 4)
+  val broadcast = rdd.context.broadcast(list)
+  val bid = broadcast.id
+
+  def doSomething() = {
+    rdd.map { x =>
+      val bm = SparkEnv.get.blockManager
+      // Check if broadcast block was fetched
+      val isFound = bm.getLocal(BroadcastBlockId(bid)).isDefined
+      (x, isFound)
+    }.collect().toSet
+  }
+}
+
 class BroadcastSuite extends FunSuite with LocalSparkContext {
 
   private val httpConf = broadcastConf("HttpBroadcastFactory")
@@ -105,6 +122,17 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
     }
   }
 
+  test("Test Lazy Broadcast variables with TorrentBroadcast") {
+    val numSlaves = 2
+    val conf = torrentConf.clone
+    sc = new SparkContext("local-cluster[%d, 1, 512]".format(numSlaves), "test", conf)
+    val rdd = sc.parallelize(1 to numSlaves)
+
+    val results = new DummyBroadcastClass(rdd).doSomething()
+
+    assert(results.toSet === (1 to numSlaves).map(x => (x, false)).toSet)
+  }
+
   test("Unpersisting HttpBroadcast on executors only in local mode") {
     testUnpersistHttpBroadcast(distributed = false, removeFromDriver = false)
   }
@@ -136,6 +164,21 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
   test("Unpersisting TorrentBroadcast on executors and driver in distributed mode") {
     testUnpersistTorrentBroadcast(distributed = true, removeFromDriver = true)
   }
+
+  test("Using broadcast after destroy prints callsite") {
+    sc = new SparkContext("local", "test")
+    testPackage.runCallSiteTest(sc)
+  }
+
+  test("Broadcast variables cannot be created after SparkContext is stopped (SPARK-5065)") {
+    sc = new SparkContext("local", "test")
+    sc.stop()
+    val thrown = intercept[IllegalStateException] {
+      sc.broadcast(Seq(1, 2, 3))
+    }
+    assert(thrown.getMessage.toLowerCase.contains("stopped"))
+  }
+
   /**
    * Verify the persistence of state associated with an HttpBroadcast in either local mode or
    * local-cluster mode (when distributed = true).
@@ -311,3 +354,14 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
     conf
   }
 }
+
+package object testPackage extends Assertions {
+
+  def runCallSiteTest(sc: SparkContext) {
+    val broadcast = sc.broadcast(Array(1, 2, 3, 4))
+    broadcast.destroy()
+    val thrown = intercept[SparkException] { broadcast.value }
+    assert(thrown.getMessage.contains("BroadcastSuite.scala"))
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala
index 4161aede1d1d0..518073dcbb64e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala
@@ -23,12 +23,28 @@ import org.scalatest.Matchers
 class ClientSuite extends FunSuite with Matchers {
   test("correctly validates driver jar URL's") {
     ClientArguments.isValidJarUrl("http://someHost:8080/foo.jar") should be (true)
-    ClientArguments.isValidJarUrl("file://some/path/to/a/jarFile.jar") should be (true)
+    ClientArguments.isValidJarUrl("https://someHost:8080/foo.jar") should be (true)
+
+    // file scheme with authority and path is valid.
+    ClientArguments.isValidJarUrl("file://somehost/path/to/a/jarFile.jar") should be (true)
+
+    // file scheme without path is not valid.
+    // In this case, jarFile.jar is recognized as authority.
+    ClientArguments.isValidJarUrl("file://jarFile.jar") should be (false)
+
+    // file scheme without authority but with triple slash is valid.
+    ClientArguments.isValidJarUrl("file:///some/path/to/a/jarFile.jar") should be (true)
     ClientArguments.isValidJarUrl("hdfs://someHost:1234/foo.jar") should be (true)
 
     ClientArguments.isValidJarUrl("hdfs://someHost:1234/foo") should be (false)
     ClientArguments.isValidJarUrl("/missing/a/protocol/jarfile.jar") should be (false)
     ClientArguments.isValidJarUrl("not-even-a-path.jar") should be (false)
+
+    // This URI doesn't have authority and path.
+    ClientArguments.isValidJarUrl("hdfs:someHost:1234/jarfile.jar") should be (false)
+
+    // Invalid syntax.
+    ClientArguments.isValidJarUrl("hdfs:") should be (false)
   }
 
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/CommandUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/CommandUtilsSuite.scala
new file mode 100644
index 0000000000000..7915ee75d8778
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/CommandUtilsSuite.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy
+
+import org.apache.spark.deploy.worker.CommandUtils
+import org.apache.spark.util.Utils
+
+import org.scalatest.{FunSuite, Matchers}
+
+class CommandUtilsSuite extends FunSuite with Matchers {
+
+  test("set libraryPath correctly") {
+    val appId = "12345-worker321-9876"
+    val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
+    val cmd = new Command("mainClass", Seq(), Map(), Seq(), Seq("libraryPathToB"), Seq())
+    val builder = CommandUtils.buildProcessBuilder(cmd, 512, sparkHome, t => t)
+    val libraryPath = Utils.libraryPathEnvName
+    val env = builder.environment
+    env.keySet should contain(libraryPath)
+    assert(env.get(libraryPath).startsWith("libraryPathToB"))
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
index 3f1cd0752e766..aa65f7e8915e6 100644
--- a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
@@ -119,7 +119,7 @@ class JsonProtocolSuite extends FunSuite {
   def createExecutorRunner(): ExecutorRunner = {
     new ExecutorRunner("appId", 123, createAppDesc(), 4, 1234, null, "workerId", "host",
       new File("sparkHome"), new File("workDir"), "akka://worker",
-      new SparkConf, ExecutorState.RUNNING)
+      new SparkConf, Seq("localDir"), ExecutorState.RUNNING)
   }
 
   def createDriverRunner(): DriverRunner = {
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 1cdf50d5c08c7..3f1355f82893e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -21,23 +21,28 @@ import java.io._
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.{SparkConf, SparkContext, SparkEnv, SparkException, TestUtils}
-import org.apache.spark.deploy.SparkSubmit._
-import org.apache.spark.util.Utils
 import org.scalatest.FunSuite
 import org.scalatest.Matchers
+import org.scalatest.concurrent.Timeouts
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark._
+import org.apache.spark.deploy.SparkSubmit._
+import org.apache.spark.util.{ResetSystemProperties, Utils}
 
-class SparkSubmitSuite extends FunSuite with Matchers {
+// Note: this suite mixes in ResetSystemProperties because SparkSubmit.main() sets a bunch
+// of properties that neeed to be cleared after tests.
+class SparkSubmitSuite extends FunSuite with Matchers with ResetSystemProperties with Timeouts {
   def beforeAll() {
     System.setProperty("spark.testing", "true")
   }
 
-  val noOpOutputStream = new OutputStream {
+  private val noOpOutputStream = new OutputStream {
     def write(b: Int) = {}
   }
 
   /** Simple PrintStream that reads data into a buffer */
-  class BufferPrintStream extends PrintStream(noOpOutputStream) {
+  private class BufferPrintStream extends PrintStream(noOpOutputStream) {
     var lineBuffer = ArrayBuffer[String]()
     override def println(line: String) {
       lineBuffer += line
@@ -45,7 +50,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
   }
 
   /** Returns true if the script exits and the given search string is printed. */
-  def testPrematureExit(input: Array[String], searchString: String) = {
+  private def testPrematureExit(input: Array[String], searchString: String) = {
     val printStream = new BufferPrintStream()
     SparkSubmit.printStream = printStream
 
@@ -292,7 +297,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
     runSparkSubmit(args)
   }
 
-  test("spark submit includes jars passed in through --jar") {
+  test("includes jars passed in through --jars") {
     val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
     val jar1 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA"))
     val jar2 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassB"))
@@ -302,10 +307,128 @@ class SparkSubmitSuite extends FunSuite with Matchers {
       "--name", "testApp",
       "--master", "local-cluster[2,1,512]",
       "--jars", jarsString,
-      unusedJar.toString)
+      unusedJar.toString, "SparkSubmitClassA", "SparkSubmitClassB")
     runSparkSubmit(args)
   }
 
+  test("includes jars passed in through --packages") {
+    val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
+    val packagesString = "com.databricks:spark-csv_2.10:0.1,com.databricks:spark-avro_2.10:0.1"
+    val args = Seq(
+      "--class", JarCreationTest.getClass.getName.stripSuffix("$"),
+      "--name", "testApp",
+      "--master", "local-cluster[2,1,512]",
+      "--packages", packagesString,
+      "--conf", "spark.ui.enabled=false",
+      unusedJar.toString,
+      "com.databricks.spark.csv.DefaultSource", "com.databricks.spark.avro.DefaultSource")
+    runSparkSubmit(args)
+  }
+
+  test("resolves command line argument paths correctly") {
+    val jars = "/jar1,/jar2"                 // --jars
+    val files = "hdfs:/file1,file2"          // --files
+    val archives = "file:/archive1,archive2" // --archives
+    val pyFiles = "py-file1,py-file2"        // --py-files
+
+    // Test jars and files
+    val clArgs = Seq(
+      "--master", "local",
+      "--class", "org.SomeClass",
+      "--jars", jars,
+      "--files", files,
+      "thejar.jar")
+    val appArgs = new SparkSubmitArguments(clArgs)
+    val sysProps = SparkSubmit.createLaunchEnv(appArgs)._3
+    appArgs.jars should be (Utils.resolveURIs(jars))
+    appArgs.files should be (Utils.resolveURIs(files))
+    sysProps("spark.jars") should be (Utils.resolveURIs(jars + ",thejar.jar"))
+    sysProps("spark.files") should be (Utils.resolveURIs(files))
+
+    // Test files and archives (Yarn)
+    val clArgs2 = Seq(
+      "--master", "yarn-client",
+      "--class", "org.SomeClass",
+      "--files", files,
+      "--archives", archives,
+      "thejar.jar"
+    )
+    val appArgs2 = new SparkSubmitArguments(clArgs2)
+    val sysProps2 = SparkSubmit.createLaunchEnv(appArgs2)._3
+    appArgs2.files should be (Utils.resolveURIs(files))
+    appArgs2.archives should be (Utils.resolveURIs(archives))
+    sysProps2("spark.yarn.dist.files") should be (Utils.resolveURIs(files))
+    sysProps2("spark.yarn.dist.archives") should be (Utils.resolveURIs(archives))
+
+    // Test python files
+    val clArgs3 = Seq(
+      "--master", "local",
+      "--py-files", pyFiles,
+      "mister.py"
+    )
+    val appArgs3 = new SparkSubmitArguments(clArgs3)
+    val sysProps3 = SparkSubmit.createLaunchEnv(appArgs3)._3
+    appArgs3.pyFiles should be (Utils.resolveURIs(pyFiles))
+    sysProps3("spark.submit.pyFiles") should be (
+      PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(","))
+  }
+
+  test("resolves config paths correctly") {
+    val jars = "/jar1,/jar2" // spark.jars
+    val files = "hdfs:/file1,file2" // spark.files / spark.yarn.dist.files
+    val archives = "file:/archive1,archive2" // spark.yarn.dist.archives
+    val pyFiles = "py-file1,py-file2" // spark.submit.pyFiles
+
+    // Test jars and files
+    val f1 = File.createTempFile("test-submit-jars-files", "")
+    val writer1 = new PrintWriter(f1)
+    writer1.println("spark.jars " + jars)
+    writer1.println("spark.files " + files)
+    writer1.close()
+    val clArgs = Seq(
+      "--master", "local",
+      "--class", "org.SomeClass",
+      "--properties-file", f1.getPath,
+      "thejar.jar"
+    )
+    val appArgs = new SparkSubmitArguments(clArgs)
+    val sysProps = SparkSubmit.createLaunchEnv(appArgs)._3
+    sysProps("spark.jars") should be(Utils.resolveURIs(jars + ",thejar.jar"))
+    sysProps("spark.files") should be(Utils.resolveURIs(files))
+
+    // Test files and archives (Yarn)
+    val f2 = File.createTempFile("test-submit-files-archives", "")
+    val writer2 = new PrintWriter(f2)
+    writer2.println("spark.yarn.dist.files " + files)
+    writer2.println("spark.yarn.dist.archives " + archives)
+    writer2.close()
+    val clArgs2 = Seq(
+      "--master", "yarn-client",
+      "--class", "org.SomeClass",
+      "--properties-file", f2.getPath,
+      "thejar.jar"
+    )
+    val appArgs2 = new SparkSubmitArguments(clArgs2)
+    val sysProps2 = SparkSubmit.createLaunchEnv(appArgs2)._3
+    sysProps2("spark.yarn.dist.files") should be(Utils.resolveURIs(files))
+    sysProps2("spark.yarn.dist.archives") should be(Utils.resolveURIs(archives))
+
+    // Test python files
+    val f3 = File.createTempFile("test-submit-python-files", "")
+    val writer3 = new PrintWriter(f3)
+    writer3.println("spark.submit.pyFiles " + pyFiles)
+    writer3.close()
+    val clArgs3 = Seq(
+      "--master", "local",
+      "--properties-file", f3.getPath,
+      "mister.py"
+    )
+    val appArgs3 = new SparkSubmitArguments(clArgs3)
+    val sysProps3 = SparkSubmit.createLaunchEnv(appArgs3)._3
+    sysProps3("spark.submit.pyFiles") should be(
+      PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(","))
+  }
+
   test("SPARK_CONF_DIR overrides spark-defaults.conf") {
     forConfDir(Map("spark.executor.memory" -> "2.3g")) { path =>
       val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
@@ -322,15 +445,18 @@ class SparkSubmitSuite extends FunSuite with Matchers {
   }
 
   // NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
-  def runSparkSubmit(args: Seq[String]): String = {
+  private def runSparkSubmit(args: Seq[String]): Unit = {
     val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
-    Utils.executeAndGetOutput(
+    val process = Utils.executeCommand(
       Seq("./bin/spark-submit") ++ args,
       new File(sparkHome),
       Map("SPARK_TESTING" -> "1", "SPARK_HOME" -> sparkHome))
+    failAfter(60 seconds) { process.waitFor() }
+    // Ensure we still kill the process in case it timed out
+    process.destroy()
   }
 
-  def forConfDir(defaults: Map[String, String]) (f: String => Unit) = {
+  private def forConfDir(defaults: Map[String, String]) (f: String => Unit) = {
     val tmpDir = Utils.createTempDir()
 
     val defaultsConf = new File(tmpDir.getAbsolutePath, "spark-defaults.conf")
@@ -347,24 +473,25 @@ class SparkSubmitSuite extends FunSuite with Matchers {
   }
 }
 
-object JarCreationTest {
+object JarCreationTest extends Logging {
   def main(args: Array[String]) {
     Utils.configTestLog4j("INFO")
     val conf = new SparkConf()
     val sc = new SparkContext(conf)
     val result = sc.makeRDD(1 to 100, 10).mapPartitions { x =>
-      var foundClasses = false
+      var exception: String = null
       try {
-        Class.forName("SparkSubmitClassA", true, Thread.currentThread().getContextClassLoader)
-        Class.forName("SparkSubmitClassA", true, Thread.currentThread().getContextClassLoader)
-        foundClasses = true
+        Class.forName(args(0), true, Thread.currentThread().getContextClassLoader)
+        Class.forName(args(1), true, Thread.currentThread().getContextClassLoader)
       } catch {
-        case _: Throwable => // catch all
+        case t: Throwable =>
+          exception = t + "\n" + t.getStackTraceString
+          exception = exception.replaceAll("\n", "\n\t")
       }
-      Seq(foundClasses).iterator
+      Option(exception).toSeq.iterator
     }.collect()
-    if (result.contains(false)) {
-      throw new Exception("Could not load user defined classes inside of executors")
+    if (result.nonEmpty) {
+      throw new Exception("Could not load user class from jar:\n" + result(0))
     }
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
new file mode 100644
index 0000000000000..53665350013cd
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy
+
+import java.io.{PrintStream, OutputStream, File}
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.scalatest.{BeforeAndAfterAll, FunSuite}
+
+import org.apache.ivy.core.module.descriptor.MDArtifact
+import org.apache.ivy.plugins.resolver.IBiblioResolver
+
+class SparkSubmitUtilsSuite extends FunSuite with BeforeAndAfterAll {
+
+  private val noOpOutputStream = new OutputStream {
+    def write(b: Int) = {}
+  }
+
+  /** Simple PrintStream that reads data into a buffer */
+  private class BufferPrintStream extends PrintStream(noOpOutputStream) {
+    var lineBuffer = ArrayBuffer[String]()
+    override def println(line: String) {
+      lineBuffer += line
+    }
+  }
+
+  override def beforeAll() {
+    super.beforeAll()
+    // We don't want to write logs during testing
+    SparkSubmitUtils.printStream = new BufferPrintStream
+  }
+
+  test("incorrect maven coordinate throws error") {
+    val coordinates = Seq("a:b: ", " :a:b", "a: :b", "a:b:", ":a:b", "a::b", "::", "a:b", "a")
+    for (coordinate <- coordinates) {
+      intercept[IllegalArgumentException] {
+        SparkSubmitUtils.extractMavenCoordinates(coordinate)
+      }
+    }
+  }
+
+  test("create repo resolvers") {
+    val resolver1 = SparkSubmitUtils.createRepoResolvers(None)
+    // should have central by default
+    assert(resolver1.getResolvers.size() === 1)
+    assert(resolver1.getResolvers.get(0).asInstanceOf[IBiblioResolver].getName === "central")
+
+    val repos = "a/1,b/2,c/3"
+    val resolver2 = SparkSubmitUtils.createRepoResolvers(Option(repos))
+    assert(resolver2.getResolvers.size() === 4)
+    val expected = repos.split(",").map(r => s"$r/")
+    resolver2.getResolvers.toArray.zipWithIndex.foreach { case (resolver: IBiblioResolver, i) =>
+      if (i == 0) {
+        assert(resolver.getName === "central")
+      } else {
+        assert(resolver.getName === s"repo-$i")
+        assert(resolver.getRoot === expected(i - 1))
+      }
+    }
+  }
+
+  test("add dependencies works correctly") {
+    val md = SparkSubmitUtils.getModuleDescriptor
+    val artifacts = SparkSubmitUtils.extractMavenCoordinates("com.databricks:spark-csv_2.10:0.1," +
+      "com.databricks:spark-avro_2.10:0.1")
+
+    SparkSubmitUtils.addDependenciesToIvy(md, artifacts, "default")
+    assert(md.getDependencies.length === 2)
+  }
+
+  test("ivy path works correctly") {
+    val ivyPath = "dummy/ivy"
+    val md = SparkSubmitUtils.getModuleDescriptor
+    val artifacts = for (i <- 0 until 3) yield new MDArtifact(md, s"jar-$i", "jar", "jar")
+    var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(ivyPath))
+    for (i <- 0 until 3) {
+      val index = jPaths.indexOf(ivyPath)
+      assert(index >= 0)
+      jPaths = jPaths.substring(index + ivyPath.length)
+    }
+    // end to end
+    val jarPath = SparkSubmitUtils.resolveMavenCoordinates(
+      "com.databricks:spark-csv_2.10:0.1", None, Option(ivyPath), true)
+    assert(jarPath.indexOf(ivyPath) >= 0, "should use non-default ivy path")
+  }
+
+  test("search for artifact at other repositories") {
+    val path = SparkSubmitUtils.resolveMavenCoordinates("com.agimatec:agimatec-validation:0.9.3",
+      Option("https://oss.sonatype.org/content/repositories/agimatec/"), None, true)
+    assert(path.indexOf("agimatec-validation") >= 0, "should find package. If it doesn't, check" +
+      "if package still exists. If it has been removed, replace the example in this test.")
+  }
+
+  test("dependency not found throws RuntimeException") {
+    intercept[RuntimeException] {
+      SparkSubmitUtils.resolveMavenCoordinates("a:b:c", None, None, true)
+    }
+  }
+
+  test("neglects Spark and Spark's dependencies") {
+    val path = SparkSubmitUtils.resolveMavenCoordinates(
+      "org.apache.spark:spark-core_2.10:1.2.0", None, None, true)
+    assert(path === "", "should return empty path")
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
new file mode 100644
index 0000000000000..3fbc1a21d10ed
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import java.io.{File, FileOutputStream, OutputStreamWriter}
+
+import scala.io.Source
+
+import com.google.common.io.Files
+import org.apache.hadoop.fs.Path
+import org.json4s.jackson.JsonMethods._
+import org.scalatest.{BeforeAndAfter, FunSuite}
+import org.scalatest.Matchers
+
+import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.io._
+import org.apache.spark.scheduler._
+import org.apache.spark.util.{JsonProtocol, Utils}
+
+class FsHistoryProviderSuite extends FunSuite with BeforeAndAfter with Matchers with Logging {
+
+  private var testDir: File = null
+
+  private var provider: FsHistoryProvider = null
+
+  before {
+    testDir = Utils.createTempDir()
+    provider = new FsHistoryProvider(new SparkConf()
+      .set("spark.history.fs.logDirectory", testDir.getAbsolutePath())
+      .set("spark.history.fs.updateInterval", "0"))
+  }
+
+  after {
+    Utils.deleteRecursively(testDir)
+  }
+
+  test("Parse new and old application logs") {
+    val conf = new SparkConf()
+      .set("spark.history.fs.logDirectory", testDir.getAbsolutePath())
+      .set("spark.history.fs.updateInterval", "0")
+    val provider = new FsHistoryProvider(conf)
+
+    // Write a new-style application log.
+    val logFile1 = new File(testDir, "new1")
+    writeFile(logFile1, true, None,
+      SparkListenerApplicationStart("app1-1", None, 1L, "test"),
+      SparkListenerApplicationEnd(2L)
+      )
+
+    // Write an unfinished app, new-style.
+    val logFile2 = new File(testDir, "new2" + EventLoggingListener.IN_PROGRESS)
+    writeFile(logFile2, true, None,
+      SparkListenerApplicationStart("app2-2", None, 1L, "test")
+      )
+
+    // Write an old-style application log.
+    val oldLog = new File(testDir, "old1")
+    oldLog.mkdir()
+    createEmptyFile(new File(oldLog, provider.SPARK_VERSION_PREFIX + "1.0"))
+    writeFile(new File(oldLog, provider.LOG_PREFIX + "1"), false, None,
+      SparkListenerApplicationStart("app3", None, 2L, "test"),
+      SparkListenerApplicationEnd(3L)
+      )
+    createEmptyFile(new File(oldLog, provider.APPLICATION_COMPLETE))
+
+    // Write an unfinished app, old-style.
+    val oldLog2 = new File(testDir, "old2")
+    oldLog2.mkdir()
+    createEmptyFile(new File(oldLog2, provider.SPARK_VERSION_PREFIX + "1.0"))
+    writeFile(new File(oldLog2, provider.LOG_PREFIX + "1"), false, None,
+      SparkListenerApplicationStart("app4", None, 2L, "test")
+      )
+
+    // Force a reload of data from the log directory, and check that both logs are loaded.
+    // Take the opportunity to check that the offset checks work as expected.
+    provider.checkForLogs()
+
+    val list = provider.getListing().toSeq
+    list should not be (null)
+    list.size should be (4)
+    list.count(e => e.completed) should be (2)
+
+    list(0) should be (ApplicationHistoryInfo(oldLog.getName(), "app3", 2L, 3L,
+      oldLog.lastModified(), "test", true))
+    list(1) should be (ApplicationHistoryInfo(logFile1.getName(), "app1-1", 1L, 2L,
+      logFile1.lastModified(), "test", true))
+    list(2) should be (ApplicationHistoryInfo(oldLog2.getName(), "app4", 2L, -1L,
+      oldLog2.lastModified(), "test", false))
+     list(3) should be (ApplicationHistoryInfo(logFile2.getName(), "app2-2", 1L, -1L,
+      logFile2.lastModified(), "test", false))
+
+    // Make sure the UI can be rendered.
+    list.foreach { case info =>
+      val appUi = provider.getAppUI(info.id)
+      appUi should not be null
+    }
+  }
+
+  test("Parse legacy logs with compression codec set") {
+    val testCodecs = List((classOf[LZFCompressionCodec].getName(), true),
+      (classOf[SnappyCompressionCodec].getName(), true),
+      ("invalid.codec", false))
+
+    testCodecs.foreach { case (codecName, valid) =>
+      val codec = if (valid) CompressionCodec.createCodec(new SparkConf(), codecName) else null
+      val logDir = new File(testDir, codecName)
+      logDir.mkdir()
+      createEmptyFile(new File(logDir, provider.SPARK_VERSION_PREFIX + "1.0"))
+      writeFile(new File(logDir, provider.LOG_PREFIX + "1"), false, Option(codec),
+        SparkListenerApplicationStart("app2", None, 2L, "test"),
+        SparkListenerApplicationEnd(3L)
+        )
+      createEmptyFile(new File(logDir, provider.COMPRESSION_CODEC_PREFIX + codecName))
+
+      val logPath = new Path(logDir.getAbsolutePath())
+      try {
+        val (logInput, sparkVersion) = provider.openLegacyEventLog(logPath)
+        try {
+          Source.fromInputStream(logInput).getLines().toSeq.size should be (2)
+        } finally {
+          logInput.close()
+        }
+      } catch {
+        case e: IllegalArgumentException =>
+          valid should be (false)
+      }
+    }
+  }
+
+  test("SPARK-3697: ignore directories that cannot be read.") {
+    val logFile1 = new File(testDir, "new1")
+    writeFile(logFile1, true, None,
+      SparkListenerApplicationStart("app1-1", None, 1L, "test"),
+      SparkListenerApplicationEnd(2L)
+      )
+    val logFile2 = new File(testDir, "new2")
+    writeFile(logFile2, true, None,
+      SparkListenerApplicationStart("app1-2", None, 1L, "test"),
+      SparkListenerApplicationEnd(2L)
+      )
+    logFile2.setReadable(false, false)
+
+    val conf = new SparkConf()
+      .set("spark.history.fs.logDirectory", testDir.getAbsolutePath())
+      .set("spark.history.fs.updateInterval", "0")
+    val provider = new FsHistoryProvider(conf)
+    provider.checkForLogs()
+
+    val list = provider.getListing().toSeq
+    list should not be (null)
+    list.size should be (1)
+  }
+
+  test("history file is renamed from inprogress to completed") {
+    val conf = new SparkConf()
+      .set("spark.history.fs.logDirectory", testDir.getAbsolutePath())
+      .set("spark.testing", "true")
+    val provider = new FsHistoryProvider(conf)
+
+    val logFile1 = new File(testDir, "app1" + EventLoggingListener.IN_PROGRESS)
+    writeFile(logFile1, true, None,
+      SparkListenerApplicationStart("app1", Some("app1"), 1L, "test"),
+      SparkListenerApplicationEnd(2L)
+    )
+    provider.checkForLogs()
+    val appListBeforeRename = provider.getListing()
+    appListBeforeRename.size should be (1)
+    appListBeforeRename.head.logPath should endWith(EventLoggingListener.IN_PROGRESS)
+
+    logFile1.renameTo(new File(testDir, "app1"))
+    provider.checkForLogs()
+    val appListAfterRename = provider.getListing()
+    appListAfterRename.size should be (1)
+    appListAfterRename.head.logPath should not endWith(EventLoggingListener.IN_PROGRESS)
+  }
+
+  private def writeFile(file: File, isNewFormat: Boolean, codec: Option[CompressionCodec],
+    events: SparkListenerEvent*) = {
+    val out =
+      if (isNewFormat) {
+        EventLoggingListener.initEventLog(new FileOutputStream(file), codec)
+      } else {
+        val fileStream = new FileOutputStream(file)
+        codec.map(_.compressedOutputStream(fileStream)).getOrElse(fileStream)
+      }
+    val writer = new OutputStreamWriter(out, "UTF-8")
+    try {
+      events.foreach(e => writer.write(compact(render(JsonProtocol.sparkEventToJson(e))) + "\n"))
+    } finally {
+      writer.close()
+    }
+  }
+
+  private def createEmptyFile(file: File) = {
+    new FileOutputStream(file).close()
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
new file mode 100644
index 0000000000000..34c74d87f0a62
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.master
+
+import akka.actor.Address
+import org.scalatest.FunSuite
+
+import org.apache.spark.{SSLOptions, SparkConf, SparkException}
+
+class MasterSuite extends FunSuite {
+
+  test("toAkkaUrl") {
+    val conf = new SparkConf(loadDefaults = false)
+    val akkaUrl = Master.toAkkaUrl("spark://1.2.3.4:1234", "akka.tcp")
+    assert("akka.tcp://sparkMaster@1.2.3.4:1234/user/Master" === akkaUrl)
+  }
+
+  test("toAkkaUrl with SSL") {
+    val conf = new SparkConf(loadDefaults = false)
+    val akkaUrl = Master.toAkkaUrl("spark://1.2.3.4:1234", "akka.ssl.tcp")
+    assert("akka.ssl.tcp://sparkMaster@1.2.3.4:1234/user/Master" === akkaUrl)
+  }
+
+  test("toAkkaUrl: a typo url") {
+    val conf = new SparkConf(loadDefaults = false)
+    val e = intercept[SparkException] {
+      Master.toAkkaUrl("spark://1.2. 3.4:1234", "akka.tcp")
+    }
+    assert("Invalid master URL: spark://1.2. 3.4:1234" === e.getMessage)
+  }
+
+  test("toAkkaAddress") {
+    val conf = new SparkConf(loadDefaults = false)
+    val address = Master.toAkkaAddress("spark://1.2.3.4:1234", "akka.tcp")
+    assert(Address("akka.tcp", "sparkMaster", "1.2.3.4", 1234) === address)
+  }
+
+  test("toAkkaAddress with SSL") {
+    val conf = new SparkConf(loadDefaults = false)
+    val address = Master.toAkkaAddress("spark://1.2.3.4:1234", "akka.ssl.tcp")
+    assert(Address("akka.ssl.tcp", "sparkMaster", "1.2.3.4", 1234) === address)
+  }
+
+  test("toAkkaAddress: a typo url") {
+    val conf = new SparkConf(loadDefaults = false)
+    val e = intercept[SparkException] {
+      Master.toAkkaAddress("spark://1.2. 3.4:1234", "akka.tcp")
+    }
+    assert("Invalid master URL: spark://1.2. 3.4:1234" === e.getMessage)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
index 5e2592e8d2e8d..6f233d7cf97aa 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
@@ -19,6 +19,8 @@ package org.apache.spark.deploy.worker
 
 import java.io.File
 
+import scala.collection.JavaConversions._
+
 import org.scalatest.FunSuite
 
 import org.apache.spark.deploy.{ApplicationDescription, Command, ExecutorState}
@@ -31,7 +33,9 @@ class ExecutorRunnerTest extends FunSuite {
     val appDesc = new ApplicationDescription("app name", Some(8), 500,
       Command("foo", Seq(appId), Map(), Seq(), Seq(), Seq()), "appUiUrl")
     val er = new ExecutorRunner(appId, 1, appDesc, 8, 500, null, "blah", "worker321",
-      new File(sparkHome), new File("ooga"), "blah", new SparkConf, ExecutorState.RUNNING)
-    assert(er.getCommandSeq.last === appId)
+      new File(sparkHome), new File("ooga"), "blah", new SparkConf, Seq("localDir"),
+      ExecutorState.RUNNING)
+    val builder = CommandUtils.buildProcessBuilder(appDesc.command, 512, sparkHome, er.substituteVariables)
+    assert(builder.command().last === appId)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala
index 1a28a9a187cd7..372d7aa453008 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala
@@ -43,7 +43,7 @@ class WorkerArgumentsTest extends FunSuite {
       }
 
       override def clone: SparkConf = {
-        new MySparkConf().setAll(settings)
+        new MySparkConf().setAll(getAll)
       }
     }
     val conf = new MySparkConf()
@@ -62,7 +62,7 @@ class WorkerArgumentsTest extends FunSuite {
       }
 
       override def clone: SparkConf = {
-        new MySparkConf().setAll(settings)
+        new MySparkConf().setAll(getAll)
       }
     }
     val conf = new MySparkConf()
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
new file mode 100644
index 0000000000000..84e2fd7ad936d
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.worker
+
+import org.apache.spark.SparkConf
+import org.apache.spark.deploy.Command
+
+import org.scalatest.{Matchers, FunSuite}
+
+class WorkerSuite extends FunSuite with Matchers {
+
+  def cmd(javaOpts: String*) = Command("", Seq.empty, Map.empty, Seq.empty, Seq.empty, Seq(javaOpts:_*))
+  def conf(opts: (String, String)*) = new SparkConf(loadDefaults = false).setAll(opts)
+
+  test("test isUseLocalNodeSSLConfig") {
+    Worker.isUseLocalNodeSSLConfig(cmd("-Dasdf=dfgh")) shouldBe false
+    Worker.isUseLocalNodeSSLConfig(cmd("-Dspark.ssl.useNodeLocalConf=true")) shouldBe true
+    Worker.isUseLocalNodeSSLConfig(cmd("-Dspark.ssl.useNodeLocalConf=false")) shouldBe false
+    Worker.isUseLocalNodeSSLConfig(cmd("-Dspark.ssl.useNodeLocalConf=")) shouldBe false
+  }
+
+  test("test maybeUpdateSSLSettings") {
+    Worker.maybeUpdateSSLSettings(
+      cmd("-Dasdf=dfgh", "-Dspark.ssl.opt1=x"),
+      conf("spark.ssl.opt1" -> "y", "spark.ssl.opt2" -> "z"))
+        .javaOpts should contain theSameElementsInOrderAs Seq(
+          "-Dasdf=dfgh", "-Dspark.ssl.opt1=x")
+
+    Worker.maybeUpdateSSLSettings(
+      cmd("-Dspark.ssl.useNodeLocalConf=false", "-Dspark.ssl.opt1=x"),
+      conf("spark.ssl.opt1" -> "y", "spark.ssl.opt2" -> "z"))
+        .javaOpts should contain theSameElementsInOrderAs Seq(
+          "-Dspark.ssl.useNodeLocalConf=false", "-Dspark.ssl.opt1=x")
+
+    Worker.maybeUpdateSSLSettings(
+      cmd("-Dspark.ssl.useNodeLocalConf=true", "-Dspark.ssl.opt1=x"),
+      conf("spark.ssl.opt1" -> "y", "spark.ssl.opt2" -> "z"))
+        .javaOpts should contain theSameElementsAs Seq(
+          "-Dspark.ssl.useNodeLocalConf=true", "-Dspark.ssl.opt1=y", "-Dspark.ssl.opt2=z")
+
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
index 12d1c7b2faba6..98b0a16ce88ba 100644
--- a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
@@ -30,6 +30,7 @@ import org.apache.hadoop.io.Text
 
 import org.apache.spark.SparkContext
 import org.apache.spark.util.Utils
+import org.apache.hadoop.io.compress.{DefaultCodec, CompressionCodecFactory, GzipCodec}
 
 /**
  * Tests the correctness of
@@ -38,20 +39,32 @@ import org.apache.spark.util.Utils
  */
 class WholeTextFileRecordReaderSuite extends FunSuite with BeforeAndAfterAll {
   private var sc: SparkContext = _
+  private var factory: CompressionCodecFactory = _
 
   override def beforeAll() {
     sc = new SparkContext("local", "test")
 
     // Set the block size of local file system to test whether files are split right or not.
     sc.hadoopConfiguration.setLong("fs.local.block.size", 32)
+    sc.hadoopConfiguration.set("io.compression.codecs",
+      "org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec")
+    factory = new CompressionCodecFactory(sc.hadoopConfiguration)
   }
 
   override def afterAll() {
     sc.stop()
   }
 
-  private def createNativeFile(inputDir: File, fileName: String, contents: Array[Byte]) = {
-    val out = new DataOutputStream(new FileOutputStream(s"${inputDir.toString}/$fileName"))
+  private def createNativeFile(inputDir: File, fileName: String, contents: Array[Byte],
+                               compress: Boolean) = {
+    val out = if (compress) {
+      val codec = new GzipCodec
+      val path = s"${inputDir.toString}/$fileName${codec.getDefaultExtension}"
+      codec.createOutputStream(new DataOutputStream(new FileOutputStream(path)))
+    } else {
+      val path = s"${inputDir.toString}/$fileName"
+      new DataOutputStream(new FileOutputStream(path))
+    }
     out.write(contents, 0, contents.length)
     out.close()
   }
@@ -68,7 +81,7 @@ class WholeTextFileRecordReaderSuite extends FunSuite with BeforeAndAfterAll {
     println(s"Local disk address is ${dir.toString}.")
 
     WholeTextFileRecordReaderSuite.files.foreach { case (filename, contents) =>
-      createNativeFile(dir, filename, contents)
+      createNativeFile(dir, filename, contents, false)
     }
 
     val res = sc.wholeTextFiles(dir.toString, 3).collect()
@@ -86,6 +99,31 @@ class WholeTextFileRecordReaderSuite extends FunSuite with BeforeAndAfterAll {
 
     Utils.deleteRecursively(dir)
   }
+
+  test("Correctness of WholeTextFileRecordReader with GzipCodec.") {
+    val dir = Utils.createTempDir()
+    println(s"Local disk address is ${dir.toString}.")
+
+    WholeTextFileRecordReaderSuite.files.foreach { case (filename, contents) =>
+      createNativeFile(dir, filename, contents, true)
+    }
+
+    val res = sc.wholeTextFiles(dir.toString, 3).collect()
+
+    assert(res.size === WholeTextFileRecordReaderSuite.fileNames.size,
+      "Number of files read out does not fit with the actual value.")
+
+    for ((filename, contents) <- res) {
+      val shortName = filename.split('/').last.split('.')(0)
+
+      assert(WholeTextFileRecordReaderSuite.fileNames.contains(shortName),
+        s"Missing file name $filename.")
+      assert(contents === new Text(WholeTextFileRecordReaderSuite.files(shortName)).toString,
+        s"file $filename contents can not match.")
+    }
+
+    Utils.deleteRecursively(dir)
+  }
 }
 
 /**
diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
index 25be7f25c21bb..8c6035fb367fe 100644
--- a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
+++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
@@ -85,4 +85,10 @@ class CompressionCodecSuite extends FunSuite {
     assert(codec.getClass === classOf[SnappyCompressionCodec])
     testCodec(codec)
   }
+
+  test("bad compression codec") {
+    intercept[IllegalArgumentException] {
+      CompressionCodec.createCodec(conf, "foobar")
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
new file mode 100644
index 0000000000000..81db66ae17464
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
@@ -0,0 +1,321 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics
+
+import java.io.{File, FileWriter, PrintWriter}
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.scalatest.FunSuite
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.io.{LongWritable, Text}
+import org.apache.hadoop.mapred.{FileSplit => OldFileSplit, InputSplit => OldInputSplit, JobConf,
+  LineRecordReader => OldLineRecordReader, RecordReader => OldRecordReader, Reporter,
+  TextInputFormat => OldTextInputFormat}
+import org.apache.hadoop.mapred.lib.{CombineFileInputFormat => OldCombineFileInputFormat,
+  CombineFileSplit => OldCombineFileSplit, CombineFileRecordReader => OldCombineFileRecordReader}
+import org.apache.hadoop.mapreduce.{InputSplit => NewInputSplit, RecordReader => NewRecordReader,
+  TaskAttemptContext}
+import org.apache.hadoop.mapreduce.lib.input.{CombineFileInputFormat => NewCombineFileInputFormat,
+  CombineFileRecordReader => NewCombineFileRecordReader, CombineFileSplit => NewCombineFileSplit,
+  FileSplit => NewFileSplit, TextInputFormat => NewTextInputFormat}
+
+import org.apache.spark.SharedSparkContext
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
+import org.apache.spark.util.Utils
+
+class InputOutputMetricsSuite extends FunSuite with SharedSparkContext {
+
+  @transient var tmpDir: File = _
+  @transient var tmpFile: File = _
+  @transient var tmpFilePath: String = _
+
+  override def beforeAll() {
+    super.beforeAll()
+
+    tmpDir = Utils.createTempDir()
+    val testTempDir = new File(tmpDir, "test")
+    testTempDir.mkdir()
+
+    tmpFile = new File(testTempDir, getClass.getSimpleName + ".txt")
+    val pw = new PrintWriter(new FileWriter(tmpFile))
+    for (x <- 1 to 1000000) {
+      pw.println("s")
+    }
+    pw.close()
+
+    // Path to tmpFile
+    tmpFilePath = "file://" + tmpFile.getAbsolutePath
+  }
+
+  override def afterAll() {
+    super.afterAll()
+    Utils.deleteRecursively(tmpDir)
+  }
+
+  test("input metrics for old hadoop with coalesce") {
+    val bytesRead = runAndReturnBytesRead {
+      sc.textFile(tmpFilePath, 4).count()
+    }
+    val bytesRead2 = runAndReturnBytesRead {
+      sc.textFile(tmpFilePath, 4).coalesce(2).count()
+    }
+    assert(bytesRead != 0)
+    assert(bytesRead == bytesRead2)
+    assert(bytesRead2 >= tmpFile.length())
+  }
+
+  test("input metrics with cache and coalesce") {
+    // prime the cache manager
+    val rdd = sc.textFile(tmpFilePath, 4).cache()
+    rdd.collect()
+
+    val bytesRead = runAndReturnBytesRead {
+      rdd.count()
+    }
+    val bytesRead2 = runAndReturnBytesRead {
+      rdd.coalesce(4).count()
+    }
+
+    // for count and coelesce, the same bytes should be read.
+    assert(bytesRead != 0)
+    assert(bytesRead2 == bytesRead)
+  }
+
+  /**
+   * This checks the situation where we have interleaved reads from
+   * different sources. Currently, we only accumulate fron the first
+   * read method we find in the task. This test uses cartesian to create
+   * the interleaved reads.
+   *
+   * Once https://issues.apache.org/jira/browse/SPARK-5225 is fixed
+   * this test should break.
+   */
+  test("input metrics with mixed read method") {
+    // prime the cache manager
+    val numPartitions = 2
+    val rdd = sc.parallelize(1 to 100, numPartitions).cache()
+    rdd.collect()
+
+    val rdd2 = sc.textFile(tmpFilePath, numPartitions)
+
+    val bytesRead = runAndReturnBytesRead {
+      rdd.count()
+    }
+    val bytesRead2 = runAndReturnBytesRead {
+      rdd2.count()
+    }
+
+    val cartRead = runAndReturnBytesRead {
+      rdd.cartesian(rdd2).count()
+    }
+
+    assert(cartRead != 0)
+    assert(bytesRead != 0)
+    // We read from the first rdd of the cartesian once per partition.
+    assert(cartRead == bytesRead * numPartitions)
+  }
+
+  test("input metrics for new Hadoop API with coalesce") {
+    val bytesRead = runAndReturnBytesRead {
+      sc.newAPIHadoopFile(tmpFilePath, classOf[NewTextInputFormat], classOf[LongWritable],
+        classOf[Text]).count()
+    }
+    val bytesRead2 = runAndReturnBytesRead {
+      sc.newAPIHadoopFile(tmpFilePath, classOf[NewTextInputFormat], classOf[LongWritable],
+        classOf[Text]).coalesce(5).count()
+    }
+    assert(bytesRead != 0)
+    assert(bytesRead2 == bytesRead)
+    assert(bytesRead >= tmpFile.length())
+  }
+
+  test("input metrics when reading text file") {
+    val bytesRead = runAndReturnBytesRead {
+      sc.textFile(tmpFilePath, 2).count()
+    }
+    assert(bytesRead >= tmpFile.length())
+  }
+
+  test("input metrics with interleaved reads") {
+    val numPartitions = 2
+    val cartVector = 0 to 9
+    val cartFile = new File(tmpDir, getClass.getSimpleName + "_cart.txt")
+    val cartFilePath = "file://" + cartFile.getAbsolutePath
+
+    // write files to disk so we can read them later.
+    sc.parallelize(cartVector).saveAsTextFile(cartFilePath)
+    val aRdd = sc.textFile(cartFilePath, numPartitions)
+
+    val tmpRdd = sc.textFile(tmpFilePath, numPartitions)
+
+    val firstSize= runAndReturnBytesRead {
+      aRdd.count()
+    }
+    val secondSize = runAndReturnBytesRead {
+      tmpRdd.count()
+    }
+
+    val cartesianBytes = runAndReturnBytesRead {
+      aRdd.cartesian(tmpRdd).count()
+    }
+
+    // Computing the amount of bytes read for a cartesian operation is a little involved.
+    // Cartesian interleaves reads between two partitions eg. p1 and p2.
+    // Here are the steps:
+    //  1) First it creates an iterator for p1
+    //  2) Creates an iterator for p2
+    //  3) Reads the first element of p1 and then all the elements of p2
+    //  4) proceeds to the next element of p1
+    //  5) Creates a new iterator for p2
+    //  6) rinse and repeat.
+    // As a result we read from the second partition n times where n is the number of keys in
+    // p1. Thus the math below for the test.
+    assert(cartesianBytes != 0)
+    assert(cartesianBytes == firstSize * numPartitions + (cartVector.length  * secondSize))
+  }
+
+  private def runAndReturnBytesRead(job : => Unit): Long = {
+    val taskBytesRead = new ArrayBuffer[Long]()
+    sc.addSparkListener(new SparkListener() {
+      override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+        taskBytesRead += taskEnd.taskMetrics.inputMetrics.get.bytesRead
+      }
+    })
+
+    job
+
+    sc.listenerBus.waitUntilEmpty(500)
+    taskBytesRead.sum
+  }
+
+  test("output metrics when writing text file") {
+    val fs = FileSystem.getLocal(new Configuration())
+    val outPath = new Path(fs.getWorkingDirectory, "outdir")
+
+    if (SparkHadoopUtil.get.getFSBytesWrittenOnThreadCallback().isDefined) {
+      val taskBytesWritten = new ArrayBuffer[Long]()
+      sc.addSparkListener(new SparkListener() {
+        override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+          taskBytesWritten += taskEnd.taskMetrics.outputMetrics.get.bytesWritten
+        }
+      })
+
+      val rdd = sc.parallelize(Array("a", "b", "c", "d"), 2)
+
+      try {
+        rdd.saveAsTextFile(outPath.toString)
+        sc.listenerBus.waitUntilEmpty(500)
+        assert(taskBytesWritten.length == 2)
+        val outFiles = fs.listStatus(outPath).filter(_.getPath.getName != "_SUCCESS")
+        taskBytesWritten.zip(outFiles).foreach { case (bytes, fileStatus) =>
+          assert(bytes >= fileStatus.getLen)
+        }
+      } finally {
+        fs.delete(outPath, true)
+      }
+    }
+  }
+
+  test("input metrics with old CombineFileInputFormat") {
+    val bytesRead = runAndReturnBytesRead {
+      sc.hadoopFile(tmpFilePath, classOf[OldCombineTextInputFormat], classOf[LongWritable],
+        classOf[Text], 2).count()
+    }
+    assert(bytesRead >= tmpFile.length())
+  }
+
+  test("input metrics with new CombineFileInputFormat") {
+    val bytesRead = runAndReturnBytesRead {
+      sc.newAPIHadoopFile(tmpFilePath, classOf[NewCombineTextInputFormat], classOf[LongWritable],
+        classOf[Text], new Configuration()).count()
+    }
+    assert(bytesRead >= tmpFile.length())
+  }
+}
+
+/**
+ * Hadoop 2 has a version of this, but we can't use it for backwards compatibility
+ */
+class OldCombineTextInputFormat extends OldCombineFileInputFormat[LongWritable, Text] {
+  override def getRecordReader(split: OldInputSplit, conf: JobConf, reporter: Reporter)
+  : OldRecordReader[LongWritable, Text] = {
+    new OldCombineFileRecordReader[LongWritable, Text](conf,
+      split.asInstanceOf[OldCombineFileSplit], reporter, classOf[OldCombineTextRecordReaderWrapper]
+        .asInstanceOf[Class[OldRecordReader[LongWritable, Text]]])
+  }
+}
+
+class OldCombineTextRecordReaderWrapper(
+    split: OldCombineFileSplit,
+    conf: Configuration,
+    reporter: Reporter,
+    idx: Integer) extends OldRecordReader[LongWritable, Text] {
+
+  val fileSplit = new OldFileSplit(split.getPath(idx),
+    split.getOffset(idx),
+    split.getLength(idx),
+    split.getLocations())
+
+  val delegate: OldLineRecordReader = new OldTextInputFormat().getRecordReader(fileSplit,
+    conf.asInstanceOf[JobConf], reporter).asInstanceOf[OldLineRecordReader]
+
+  override def next(key: LongWritable, value: Text): Boolean = delegate.next(key, value)
+  override def createKey(): LongWritable = delegate.createKey()
+  override def createValue(): Text = delegate.createValue()
+  override def getPos(): Long = delegate.getPos
+  override def close(): Unit = delegate.close()
+  override def getProgress(): Float = delegate.getProgress
+}
+
+/**
+ * Hadoop 2 has a version of this, but we can't use it for backwards compatibility
+ */
+class NewCombineTextInputFormat extends NewCombineFileInputFormat[LongWritable,Text] {
+  def createRecordReader(split: NewInputSplit, context: TaskAttemptContext)
+  : NewRecordReader[LongWritable, Text] = {
+    new NewCombineFileRecordReader[LongWritable,Text](split.asInstanceOf[NewCombineFileSplit],
+      context, classOf[NewCombineTextRecordReaderWrapper])
+  }
+}
+
+class NewCombineTextRecordReaderWrapper(
+    split: NewCombineFileSplit,
+    context: TaskAttemptContext,
+    idx: Integer) extends NewRecordReader[LongWritable, Text] {
+
+  val fileSplit = new NewFileSplit(split.getPath(idx),
+    split.getOffset(idx),
+    split.getLength(idx),
+    split.getLocations())
+
+  val delegate = new NewTextInputFormat().createRecordReader(fileSplit, context)
+
+  override def initialize(split: NewInputSplit, context: TaskAttemptContext): Unit = {
+    delegate.initialize(fileSplit, context)
+  }
+
+  override def nextKeyValue(): Boolean = delegate.nextKeyValue()
+  override def getCurrentKey(): LongWritable = delegate.getCurrentKey
+  override def getCurrentValue(): Text = delegate.getCurrentValue
+  override def getProgress(): Float = delegate.getProgress
+  override def close(): Unit = delegate.close()
+}
\ No newline at end of file
diff --git a/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
index 3925f0ccbdbf0..bbdc9568a6ddb 100644
--- a/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
@@ -121,7 +121,7 @@ class MetricsSystemSuite extends FunSuite with BeforeAndAfter with PrivateMethod
     }
 
     val appId = "testId"
-    val executorId = "executor.1"
+    val executorId = "1"
     conf.set("spark.app.id", appId)
     conf.set("spark.executor.id", executorId)
 
@@ -138,7 +138,7 @@ class MetricsSystemSuite extends FunSuite with BeforeAndAfter with PrivateMethod
       override val metricRegistry = new MetricRegistry()
     }
 
-    val executorId = "executor.1"
+    val executorId = "1"
     conf.set("spark.executor.id", executorId)
 
     val instanceName = "executor"
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
new file mode 100644
index 0000000000000..94bfa67451892
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty
+
+import java.nio._
+import java.util.concurrent.TimeUnit
+
+import scala.concurrent.duration._
+import scala.concurrent.{Await, Promise}
+import scala.util.{Failure, Success, Try}
+
+import org.apache.commons.io.IOUtils
+import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.shuffle.BlockFetchingListener
+import org.apache.spark.network.{BlockDataManager, BlockTransferService}
+import org.apache.spark.storage.{BlockId, ShuffleBlockId}
+import org.apache.spark.{SecurityManager, SparkConf}
+import org.mockito.Mockito._
+import org.scalatest.mock.MockitoSugar
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite, ShouldMatchers}
+
+class NettyBlockTransferSecuritySuite extends FunSuite with MockitoSugar with ShouldMatchers {
+  test("security default off") {
+    val conf = new SparkConf()
+      .set("spark.app.id", "app-id")
+    testConnection(conf, conf) match {
+      case Success(_) => // expected
+      case Failure(t) => fail(t)
+    }
+  }
+
+  test("security on same password") {
+    val conf = new SparkConf()
+      .set("spark.authenticate", "true")
+      .set("spark.authenticate.secret", "good")
+      .set("spark.app.id", "app-id")
+    testConnection(conf, conf) match {
+      case Success(_) => // expected
+      case Failure(t) => fail(t)
+    }
+  }
+
+  test("security on mismatch password") {
+    val conf0 = new SparkConf()
+      .set("spark.authenticate", "true")
+      .set("spark.authenticate.secret", "good")
+      .set("spark.app.id", "app-id")
+    val conf1 = conf0.clone.set("spark.authenticate.secret", "bad")
+    testConnection(conf0, conf1) match {
+      case Success(_) => fail("Should have failed")
+      case Failure(t) => t.getMessage should include ("Mismatched response")
+    }
+  }
+
+  test("security mismatch auth off on server") {
+    val conf0 = new SparkConf()
+      .set("spark.authenticate", "true")
+      .set("spark.authenticate.secret", "good")
+      .set("spark.app.id", "app-id")
+    val conf1 = conf0.clone.set("spark.authenticate", "false")
+    testConnection(conf0, conf1) match {
+      case Success(_) => fail("Should have failed")
+      case Failure(t) => // any funny error may occur, sever will interpret SASL token as RPC
+    }
+  }
+
+  test("security mismatch auth off on client") {
+    val conf0 = new SparkConf()
+      .set("spark.authenticate", "false")
+      .set("spark.authenticate.secret", "good")
+      .set("spark.app.id", "app-id")
+    val conf1 = conf0.clone.set("spark.authenticate", "true")
+    testConnection(conf0, conf1) match {
+      case Success(_) => fail("Should have failed")
+      case Failure(t) => t.getMessage should include ("Expected SaslMessage")
+    }
+  }
+
+  /**
+   * Creates two servers with different configurations and sees if they can talk.
+   * Returns Success() if they can transfer a block, and Failure() if the block transfer was failed
+   * properly. We will throw an out-of-band exception if something other than that goes wrong.
+   */
+  private def testConnection(conf0: SparkConf, conf1: SparkConf): Try[Unit] = {
+    val blockManager = mock[BlockDataManager]
+    val blockId = ShuffleBlockId(0, 1, 2)
+    val blockString = "Hello, world!"
+    val blockBuffer = new NioManagedBuffer(ByteBuffer.wrap(blockString.getBytes))
+    when(blockManager.getBlockData(blockId)).thenReturn(blockBuffer)
+
+    val securityManager0 = new SecurityManager(conf0)
+    val exec0 = new NettyBlockTransferService(conf0, securityManager0, numCores = 1)
+    exec0.init(blockManager)
+
+    val securityManager1 = new SecurityManager(conf1)
+    val exec1 = new NettyBlockTransferService(conf1, securityManager1, numCores = 1)
+    exec1.init(blockManager)
+
+    val result = fetchBlock(exec0, exec1, "1", blockId) match {
+      case Success(buf) =>
+        IOUtils.toString(buf.createInputStream()) should equal(blockString)
+        buf.release()
+        Success()
+      case Failure(t) =>
+        Failure(t)
+    }
+    exec0.close()
+    exec1.close()
+    result
+  }
+
+  /** Synchronously fetches a single block, acting as the given executor fetching from another. */
+  private def fetchBlock(
+      self: BlockTransferService,
+      from: BlockTransferService,
+      execId: String,
+      blockId: BlockId): Try[ManagedBuffer] = {
+
+    val promise = Promise[ManagedBuffer]()
+
+    self.fetchBlocks(from.hostName, from.port, execId, Array(blockId.toString),
+      new BlockFetchingListener {
+        override def onBlockFetchFailure(blockId: String, exception: Throwable): Unit = {
+          promise.failure(exception)
+        }
+
+        override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {
+          promise.success(data.retain())
+        }
+      })
+
+    Await.ready(promise.future, FiniteDuration(1000, TimeUnit.MILLISECONDS))
+    promise.future.value.get
+  }
+}
+
diff --git a/core/src/test/scala/org/apache/spark/network/netty/ServerClientIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/ServerClientIntegrationSuite.scala
deleted file mode 100644
index 02d0ffc86f58f..0000000000000
--- a/core/src/test/scala/org/apache/spark/network/netty/ServerClientIntegrationSuite.scala
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty
-
-import java.io.{RandomAccessFile, File}
-import java.nio.ByteBuffer
-import java.util.{Collections, HashSet}
-import java.util.concurrent.{TimeUnit, Semaphore}
-
-import scala.collection.JavaConversions._
-
-import io.netty.buffer.{ByteBufUtil, Unpooled}
-
-import org.scalatest.{BeforeAndAfterAll, FunSuite}
-
-import org.apache.spark.SparkConf
-import org.apache.spark.network.netty.client.{BlockClientListener, ReferenceCountedBuffer, BlockFetchingClientFactory}
-import org.apache.spark.network.netty.server.BlockServer
-import org.apache.spark.storage.{FileSegment, BlockDataProvider}
-
-
-/**
- * Test suite that makes sure the server and the client implementations share the same protocol.
- */
-class ServerClientIntegrationSuite extends FunSuite with BeforeAndAfterAll {
-
-  val bufSize = 100000
-  var buf: ByteBuffer = _
-  var testFile: File = _
-  var server: BlockServer = _
-  var clientFactory: BlockFetchingClientFactory = _
-
-  val bufferBlockId = "buffer_block"
-  val fileBlockId = "file_block"
-
-  val fileContent = new Array[Byte](1024)
-  scala.util.Random.nextBytes(fileContent)
-
-  override def beforeAll() = {
-    buf = ByteBuffer.allocate(bufSize)
-    for (i <- 1 to bufSize) {
-      buf.put(i.toByte)
-    }
-    buf.flip()
-
-    testFile = File.createTempFile("netty-test-file", "txt")
-    val fp = new RandomAccessFile(testFile, "rw")
-    fp.write(fileContent)
-    fp.close()
-
-    server = new BlockServer(new SparkConf, new BlockDataProvider {
-      override def getBlockData(blockId: String): Either[FileSegment, ByteBuffer] = {
-        if (blockId == bufferBlockId) {
-          Right(buf)
-        } else if (blockId == fileBlockId) {
-          Left(new FileSegment(testFile, 10, testFile.length - 25))
-        } else {
-          throw new Exception("Unknown block id " + blockId)
-        }
-      }
-    })
-
-    clientFactory = new BlockFetchingClientFactory(new SparkConf)
-  }
-
-  override def afterAll() = {
-    server.stop()
-    clientFactory.stop()
-  }
-
-  /** A ByteBuf for buffer_block */
-  lazy val byteBufferBlockReference = Unpooled.wrappedBuffer(buf)
-
-  /** A ByteBuf for file_block */
-  lazy val fileBlockReference = Unpooled.wrappedBuffer(fileContent, 10, fileContent.length - 25)
-
-  def fetchBlocks(blockIds: Seq[String]): (Set[String], Set[ReferenceCountedBuffer], Set[String]) =
-  {
-    val client = clientFactory.createClient(server.hostName, server.port)
-    val sem = new Semaphore(0)
-    val receivedBlockIds = Collections.synchronizedSet(new HashSet[String])
-    val errorBlockIds = Collections.synchronizedSet(new HashSet[String])
-    val receivedBuffers = Collections.synchronizedSet(new HashSet[ReferenceCountedBuffer])
-
-    client.fetchBlocks(
-      blockIds,
-      new BlockClientListener {
-        override def onFetchFailure(blockId: String, errorMsg: String): Unit = {
-          errorBlockIds.add(blockId)
-          sem.release()
-        }
-
-        override def onFetchSuccess(blockId: String, data: ReferenceCountedBuffer): Unit = {
-          receivedBlockIds.add(blockId)
-          data.retain()
-          receivedBuffers.add(data)
-          sem.release()
-        }
-      }
-    )
-    if (!sem.tryAcquire(blockIds.size, 30, TimeUnit.SECONDS)) {
-      fail("Timeout getting response from the server")
-    }
-    client.close()
-    (receivedBlockIds.toSet, receivedBuffers.toSet, errorBlockIds.toSet)
-  }
-
-  test("fetch a ByteBuffer block") {
-    val (blockIds, buffers, failBlockIds) = fetchBlocks(Seq(bufferBlockId))
-    assert(blockIds === Set(bufferBlockId))
-    assert(buffers.map(_.underlying) === Set(byteBufferBlockReference))
-    assert(failBlockIds.isEmpty)
-    buffers.foreach(_.release())
-  }
-
-  test("fetch a FileSegment block via zero-copy send") {
-    val (blockIds, buffers, failBlockIds) = fetchBlocks(Seq(fileBlockId))
-    assert(blockIds === Set(fileBlockId))
-    assert(buffers.map(_.underlying) === Set(fileBlockReference))
-    assert(failBlockIds.isEmpty)
-    buffers.foreach(_.release())
-  }
-
-  test("fetch a non-existent block") {
-    val (blockIds, buffers, failBlockIds) = fetchBlocks(Seq("random-block"))
-    assert(blockIds.isEmpty)
-    assert(buffers.isEmpty)
-    assert(failBlockIds === Set("random-block"))
-  }
-
-  test("fetch both ByteBuffer block and FileSegment block") {
-    val (blockIds, buffers, failBlockIds) = fetchBlocks(Seq(bufferBlockId, fileBlockId))
-    assert(blockIds === Set(bufferBlockId, fileBlockId))
-    assert(buffers.map(_.underlying) === Set(byteBufferBlockReference, fileBlockReference))
-    assert(failBlockIds.isEmpty)
-    buffers.foreach(_.release())
-  }
-
-  test("fetch both ByteBuffer block and a non-existent block") {
-    val (blockIds, buffers, failBlockIds) = fetchBlocks(Seq(bufferBlockId, "random-block"))
-    assert(blockIds === Set(bufferBlockId))
-    assert(buffers.map(_.underlying) === Set(byteBufferBlockReference))
-    assert(failBlockIds === Set("random-block"))
-    buffers.foreach(_.release())
-  }
-}
diff --git a/core/src/test/scala/org/apache/spark/network/netty/client/BlockFetchingClientHandlerSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/client/BlockFetchingClientHandlerSuite.scala
deleted file mode 100644
index 903ab09ae4322..0000000000000
--- a/core/src/test/scala/org/apache/spark/network/netty/client/BlockFetchingClientHandlerSuite.scala
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty.client
-
-import java.nio.ByteBuffer
-
-import io.netty.buffer.Unpooled
-import io.netty.channel.embedded.EmbeddedChannel
-
-import org.scalatest.{PrivateMethodTester, FunSuite}
-
-
-class BlockFetchingClientHandlerSuite extends FunSuite with PrivateMethodTester {
-
-  test("handling block data (successful fetch)") {
-    val blockId = "test_block"
-    val blockData = "blahblahblahblahblah"
-    val totalLength = 4 + blockId.length + blockData.length
-
-    var parsedBlockId: String = ""
-    var parsedBlockData: String = ""
-    val handler = new BlockFetchingClientHandler
-    handler.addRequest(blockId,
-      new BlockClientListener {
-        override def onFetchFailure(blockId: String, errorMsg: String): Unit = ???
-        override def onFetchSuccess(bid: String, refCntBuf: ReferenceCountedBuffer): Unit = {
-          parsedBlockId = bid
-          val bytes = new Array[Byte](refCntBuf.byteBuffer().remaining)
-          refCntBuf.byteBuffer().get(bytes)
-          parsedBlockData = new String(bytes)
-        }
-      }
-    )
-
-    val outstandingRequests = PrivateMethod[java.util.Map[_, _]]('outstandingRequests)
-    assert(handler.invokePrivate(outstandingRequests()).size === 1)
-
-    val channel = new EmbeddedChannel(handler)
-    val buf = ByteBuffer.allocate(totalLength + 4)  // 4 bytes for the length field itself
-    buf.putInt(totalLength)
-    buf.putInt(blockId.length)
-    buf.put(blockId.getBytes)
-    buf.put(blockData.getBytes)
-    buf.flip()
-
-    channel.writeInbound(Unpooled.wrappedBuffer(buf))
-    assert(parsedBlockId === blockId)
-    assert(parsedBlockData === blockData)
-
-    assert(handler.invokePrivate(outstandingRequests()).size === 0)
-
-    channel.close()
-  }
-
-  test("handling error message (failed fetch)") {
-    val blockId = "test_block"
-    val errorMsg = "error erro5r error err4or error3 error6 error erro1r"
-    val totalLength = 4 + blockId.length + errorMsg.length
-
-    var parsedBlockId: String = ""
-    var parsedErrorMsg: String = ""
-    val handler = new BlockFetchingClientHandler
-    handler.addRequest(blockId, new BlockClientListener {
-      override def onFetchFailure(bid: String, msg: String) ={
-        parsedBlockId = bid
-        parsedErrorMsg = msg
-      }
-      override def onFetchSuccess(bid: String, refCntBuf: ReferenceCountedBuffer) = ???
-    })
-
-    val outstandingRequests = PrivateMethod[java.util.Map[_, _]]('outstandingRequests)
-    assert(handler.invokePrivate(outstandingRequests()).size === 1)
-
-    val channel = new EmbeddedChannel(handler)
-    val buf = ByteBuffer.allocate(totalLength + 4)  // 4 bytes for the length field itself
-    buf.putInt(totalLength)
-    buf.putInt(-blockId.length)
-    buf.put(blockId.getBytes)
-    buf.put(errorMsg.getBytes)
-    buf.flip()
-
-    channel.writeInbound(Unpooled.wrappedBuffer(buf))
-    assert(parsedBlockId === blockId)
-    assert(parsedErrorMsg === errorMsg)
-
-    assert(handler.invokePrivate(outstandingRequests()).size === 0)
-
-    channel.close()
-  }
-}
diff --git a/core/src/test/scala/org/apache/spark/network/netty/server/BlockHeaderEncoderSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/server/BlockHeaderEncoderSuite.scala
deleted file mode 100644
index 3ee281cb1350b..0000000000000
--- a/core/src/test/scala/org/apache/spark/network/netty/server/BlockHeaderEncoderSuite.scala
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty.server
-
-import io.netty.buffer.ByteBuf
-import io.netty.channel.embedded.EmbeddedChannel
-
-import org.scalatest.FunSuite
-
-
-class BlockHeaderEncoderSuite extends FunSuite {
-
-  test("encode normal block data") {
-    val blockId = "test_block"
-    val channel = new EmbeddedChannel(new BlockHeaderEncoder)
-    channel.writeOutbound(new BlockHeader(17, blockId, None))
-    val out = channel.readOutbound().asInstanceOf[ByteBuf]
-    assert(out.readInt() === 4 + blockId.length + 17)
-    assert(out.readInt() === blockId.length)
-
-    val blockIdBytes = new Array[Byte](blockId.length)
-    out.readBytes(blockIdBytes)
-    assert(new String(blockIdBytes) === blockId)
-    assert(out.readableBytes() === 0)
-
-    channel.close()
-  }
-
-  test("encode error message") {
-    val blockId = "error_block"
-    val errorMsg = "error encountered"
-    val channel = new EmbeddedChannel(new BlockHeaderEncoder)
-    channel.writeOutbound(new BlockHeader(17, blockId, Some(errorMsg)))
-    val out = channel.readOutbound().asInstanceOf[ByteBuf]
-    assert(out.readInt() === 4 + blockId.length + errorMsg.length)
-    assert(out.readInt() === -blockId.length)
-
-    val blockIdBytes = new Array[Byte](blockId.length)
-    out.readBytes(blockIdBytes)
-    assert(new String(blockIdBytes) === blockId)
-
-    val errorMsgBytes = new Array[Byte](errorMsg.length)
-    out.readBytes(errorMsgBytes)
-    assert(new String(errorMsgBytes) === errorMsg)
-    assert(out.readableBytes() === 0)
-
-    channel.close()
-  }
-}
diff --git a/core/src/test/scala/org/apache/spark/network/netty/server/BlockServerHandlerSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/server/BlockServerHandlerSuite.scala
deleted file mode 100644
index 3239c710f1639..0000000000000
--- a/core/src/test/scala/org/apache/spark/network/netty/server/BlockServerHandlerSuite.scala
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty.server
-
-import java.io.{RandomAccessFile, File}
-import java.nio.ByteBuffer
-
-import io.netty.buffer.{Unpooled, ByteBuf}
-import io.netty.channel.{ChannelHandlerContext, SimpleChannelInboundHandler, DefaultFileRegion}
-import io.netty.channel.embedded.EmbeddedChannel
-
-import org.scalatest.FunSuite
-
-import org.apache.spark.storage.{BlockDataProvider, FileSegment}
-
-
-class BlockServerHandlerSuite extends FunSuite {
-
-  test("ByteBuffer block") {
-    val expectedBlockId = "test_bytebuffer_block"
-    val buf = ByteBuffer.allocate(10000)
-    for (i <- 1 to 10000) {
-      buf.put(i.toByte)
-    }
-    buf.flip()
-
-    val channel = new EmbeddedChannel(new BlockServerHandler(new BlockDataProvider {
-      override def getBlockData(blockId: String): Either[FileSegment, ByteBuffer] = Right(buf)
-    }))
-
-    channel.writeInbound(expectedBlockId)
-    assert(channel.outboundMessages().size === 2)
-
-    val out1 = channel.readOutbound().asInstanceOf[BlockHeader]
-    val out2 = channel.readOutbound().asInstanceOf[ByteBuf]
-
-    assert(out1.blockId === expectedBlockId)
-    assert(out1.blockSize === buf.remaining)
-    assert(out1.error === None)
-
-    assert(out2.equals(Unpooled.wrappedBuffer(buf)))
-
-    channel.close()
-  }
-
-  test("FileSegment block via zero-copy") {
-    val expectedBlockId = "test_file_block"
-
-    // Create random file data
-    val fileContent = new Array[Byte](1024)
-    scala.util.Random.nextBytes(fileContent)
-    val testFile = File.createTempFile("netty-test-file", "txt")
-    val fp = new RandomAccessFile(testFile, "rw")
-    fp.write(fileContent)
-    fp.close()
-
-    val channel = new EmbeddedChannel(new BlockServerHandler(new BlockDataProvider {
-      override def getBlockData(blockId: String): Either[FileSegment, ByteBuffer] = {
-        Left(new FileSegment(testFile, 15, testFile.length - 25))
-      }
-    }))
-
-    channel.writeInbound(expectedBlockId)
-    assert(channel.outboundMessages().size === 2)
-
-    val out1 = channel.readOutbound().asInstanceOf[BlockHeader]
-    val out2 = channel.readOutbound().asInstanceOf[DefaultFileRegion]
-
-    assert(out1.blockId === expectedBlockId)
-    assert(out1.blockSize === testFile.length - 25)
-    assert(out1.error === None)
-
-    assert(out2.count === testFile.length - 25)
-    assert(out2.position === 15)
-  }
-
-  test("pipeline exception propagation") {
-    val blockServerHandler = new BlockServerHandler(new BlockDataProvider {
-      override def getBlockData(blockId: String): Either[FileSegment, ByteBuffer] = ???
-    })
-    val exceptionHandler = new SimpleChannelInboundHandler[String]() {
-      override def channelRead0(ctx: ChannelHandlerContext, msg: String): Unit = {
-        throw new Exception("this is an error")
-      }
-    }
-
-    val channel = new EmbeddedChannel(exceptionHandler, blockServerHandler)
-    assert(channel.isOpen)
-    channel.writeInbound("a message to trigger the error")
-    assert(!channel.isOpen)
-  }
-}
diff --git a/core/src/test/scala/org/apache/spark/network/nio/ConnectionManagerSuite.scala b/core/src/test/scala/org/apache/spark/network/nio/ConnectionManagerSuite.scala
index b70734dfe37cf..716f875d30b8a 100644
--- a/core/src/test/scala/org/apache/spark/network/nio/ConnectionManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/nio/ConnectionManagerSuite.scala
@@ -60,6 +60,7 @@ class ConnectionManagerSuite extends FunSuite {
     val conf = new SparkConf
     conf.set("spark.authenticate", "true")
     conf.set("spark.authenticate.secret", "good")
+    conf.set("spark.app.id", "app-id")
     val securityManager = new SecurityManager(conf)
     val manager = new ConnectionManager(0, conf, securityManager)
     var numReceivedMessages = 0
@@ -95,6 +96,7 @@ class ConnectionManagerSuite extends FunSuite {
   test("security mismatch password") {
     val conf = new SparkConf
     conf.set("spark.authenticate", "true")
+    conf.set("spark.app.id", "app-id")
     conf.set("spark.authenticate.secret", "good")
     val securityManager = new SecurityManager(conf)
     val manager = new ConnectionManager(0, conf, securityManager)
@@ -105,9 +107,7 @@ class ConnectionManagerSuite extends FunSuite {
       None
     })
 
-    val badconf = new SparkConf
-    badconf.set("spark.authenticate", "true")
-    badconf.set("spark.authenticate.secret", "bad")
+    val badconf = conf.clone.set("spark.authenticate.secret", "bad")
     val badsecurityManager = new SecurityManager(badconf)
     val managerServer = new ConnectionManager(0, badconf, badsecurityManager)
     var numReceivedServerMessages = 0
diff --git a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
index 3b833f2e41867..f2b0ea1063a72 100644
--- a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
@@ -27,7 +27,6 @@ import org.scalatest.{BeforeAndAfterAll, FunSuite}
 import org.scalatest.concurrent.Timeouts
 import org.scalatest.time.SpanSugar._
 
-import org.apache.spark.SparkContext._
 import org.apache.spark.{SparkContext, SparkException, LocalSparkContext}
 
 class AsyncRDDActionsSuite extends FunSuite with BeforeAndAfterAll with Timeouts {
diff --git a/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala
index f89bdb6e07dea..de306533752c1 100644
--- a/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.rdd
 import org.scalatest.FunSuite
 
 import org.apache.spark._
-import org.apache.spark.SparkContext._
 
 class DoubleRDDSuite extends FunSuite with SharedSparkContext {
   // Verify tests on the histogram functionality. We test with both evenly
diff --git a/core/src/test/scala/org/apache/spark/rdd/JdbcRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/JdbcRDDSuite.scala
index 76e317d754ba3..6138d0bbd57f6 100644
--- a/core/src/test/scala/org/apache/spark/rdd/JdbcRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/JdbcRDDSuite.scala
@@ -65,10 +65,11 @@ class JdbcRDDSuite extends FunSuite with BeforeAndAfter with LocalSparkContext {
 
   after {
     try {
-      DriverManager.getConnection("jdbc:derby:;shutdown=true")
+      DriverManager.getConnection("jdbc:derby:target/JdbcRDDSuiteDb;shutdown=true")
     } catch {
-      case se: SQLException if se.getSQLState == "XJ015" =>
-        // normal shutdown
+      case se: SQLException if se.getSQLState == "08006" =>
+        // Normal single database shutdown
+        // https://db.apache.org/derby/docs/10.2/ref/rrefexcept71493.html
     }
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 3620e251cc139..108f70af43f37 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -29,7 +29,6 @@ import org.apache.hadoop.mapreduce.{JobContext => NewJobContext, OutputCommitter
 OutputFormat => NewOutputFormat, RecordWriter => NewRecordWriter,
 TaskAttemptContext => NewTaskAttempContext}
 import org.apache.spark.{Partitioner, SharedSparkContext}
-import org.apache.spark.SparkContext._
 import org.apache.spark.util.Utils
 
 import org.scalatest.FunSuite
diff --git a/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala b/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala
index 1b112f1a41ca9..cd193ae4f5238 100644
--- a/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala
@@ -76,6 +76,7 @@ class ParallelCollectionSplitSuite extends FunSuite with Checkers {
     assert(slices(0).mkString(",") === (0 to 32).mkString(","))
     assert(slices(1).mkString(",") === (33 to 66).mkString(","))
     assert(slices(2).mkString(",") === (67 to 100).mkString(","))
+    assert(slices(2).isInstanceOf[Range.Inclusive])
   }
 
   test("empty data") {
@@ -227,4 +228,28 @@ class ParallelCollectionSplitSuite extends FunSuite with Checkers {
     assert(slices.map(_.size).reduceLeft(_+_) === 100)
     assert(slices.forall(_.isInstanceOf[NumericRange[_]]))
   }
+
+  test("inclusive ranges with Int.MaxValue and Int.MinValue") {
+    val data1 = 1 to Int.MaxValue
+    val slices1 = ParallelCollectionRDD.slice(data1, 3)
+    assert(slices1.size === 3)
+    assert(slices1.map(_.size).sum === Int.MaxValue)
+    assert(slices1(2).isInstanceOf[Range.Inclusive])
+    val data2 = -2 to Int.MinValue by -1
+    val slices2 = ParallelCollectionRDD.slice(data2, 3)
+    assert(slices2.size == 3)
+    assert(slices2.map(_.size).sum === Int.MaxValue)
+    assert(slices2(2).isInstanceOf[Range.Inclusive])
+  }
+
+  test("empty ranges with Int.MaxValue and Int.MinValue") {
+    val data1 = Int.MaxValue until Int.MaxValue
+    val slices1 = ParallelCollectionRDD.slice(data1, 5)
+    assert(slices1.size === 5)
+    for (i <- 0 until 5) assert(slices1(i).size === 0)
+    val data2 = Int.MaxValue until Int.MaxValue
+    val slices2 = ParallelCollectionRDD.slice(data2, 5)
+    assert(slices2.size === 5)
+    for (i <- 0 until 5) assert(slices2(i).size === 0)
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
index 271a90c6646bb..1a9a0e857e546 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
@@ -174,7 +174,7 @@ class PipedRDDSuite extends FunSuite with SharedSparkContext {
       }
       val hadoopPart1 = generateFakeHadoopPartition()
       val pipedRdd = new PipedRDD(nums, "printenv " + varName)
-      val tContext = new TaskContextImpl(0, 0, 0)
+      val tContext = new TaskContextImpl(0, 0, 0, 0)
       val rddIter = pipedRdd.compute(hadoopPart1, tContext)
       val arr = rddIter.toArray
       assert(arr(0) == "/some/path")
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index 465c1a8a43a79..bede1ffb3e2d0 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.rdd
 
+import java.io.{ObjectInputStream, ObjectOutputStream, IOException}
+
+import com.esotericsoftware.kryo.KryoException
+
 import scala.collection.mutable.{ArrayBuffer, HashMap}
 import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
@@ -24,11 +28,9 @@ import scala.reflect.ClassTag
 import org.scalatest.FunSuite
 
 import org.apache.spark._
-import org.apache.spark.SparkContext._
-import org.apache.spark.util.Utils
-
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.rdd.RDDSuiteUtils._
+import org.apache.spark.util.Utils
 
 class RDDSuite extends FunSuite with SharedSparkContext {
 
@@ -38,8 +40,8 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     assert(nums.toLocalIterator.toList === List(1, 2, 3, 4))
     val dups = sc.makeRDD(Array(1, 1, 2, 2, 3, 3, 4, 4), 2)
     assert(dups.distinct().count() === 4)
-    assert(dups.distinct.count === 4)  // Can distinct and count be called without parentheses?
-    assert(dups.distinct.collect === dups.distinct().collect)
+    assert(dups.distinct().count === 4)  // Can distinct and count be called without parentheses?
+    assert(dups.distinct().collect === dups.distinct().collect)
     assert(dups.distinct(2).collect === dups.distinct().collect)
     assert(nums.reduce(_ + _) === 10)
     assert(nums.fold(0)(_ + _) === 10)
@@ -50,6 +52,7 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     assert(nums.glom().map(_.toList).collect().toList === List(List(1, 2), List(3, 4)))
     assert(nums.collect({ case i if i >= 3 => i.toString }).collect().toList === List("3", "4"))
     assert(nums.keyBy(_.toString).collect().toList === List(("1", 1), ("2", 2), ("3", 3), ("4", 4)))
+    assert(!nums.isEmpty())
     assert(nums.max() === 4)
     assert(nums.min() === 1)
     val partitionSums = nums.mapPartitions(iter => Iterator(iter.reduceLeft(_ + _)))
@@ -97,7 +100,6 @@ class RDDSuite extends FunSuite with SharedSparkContext {
   }
 
   test("partitioner aware union") {
-    import SparkContext._
     def makeRDDWithPartitioner(seq: Seq[Int]) = {
       sc.makeRDD(seq, 1)
         .map(x => (x, null))
@@ -155,6 +157,24 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     assert(result.toSet === Set(("a", 6), ("b", 2), ("c", 5)))
   }
 
+  test("treeAggregate") {
+    val rdd = sc.makeRDD(-1000 until 1000, 10)
+    def seqOp = (c: Long, x: Int) => c + x
+    def combOp = (c1: Long, c2: Long) => c1 + c2
+    for (depth <- 1 until 10) {
+      val sum = rdd.treeAggregate(0L)(seqOp, combOp, depth)
+      assert(sum === -1000L)
+    }
+  }
+
+  test("treeReduce") {
+    val rdd = sc.makeRDD(-1000 until 1000, 10)
+    for (depth <- 1 until 10) {
+      val sum = rdd.treeReduce(_ + _, depth)
+      assert(sum === -1000)
+    }
+  }
+
   test("basic caching") {
     val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache()
     assert(rdd.collect().toList === List(1, 2, 3, 4))
@@ -297,7 +317,7 @@ class RDDSuite extends FunSuite with SharedSparkContext {
   test("coalesced RDDs with locality") {
     val data3 = sc.makeRDD(List((1,List("a","c")), (2,List("a","b","c")), (3,List("b"))))
     val coal3 = data3.coalesce(3)
-    val list3 = coal3.partitions.map(p => p.asInstanceOf[CoalescedRDDPartition].preferredLocation)
+    val list3 = coal3.partitions.flatMap(_.asInstanceOf[CoalescedRDDPartition].preferredLocation)
     assert(list3.sorted === Array("a","b","c"), "Locality preferences are dropped")
 
     // RDD with locality preferences spread (non-randomly) over 6 machines, m0 through m5
@@ -459,6 +479,11 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     for (i <- 0 until sample.size) assert(sample(i) === checkSample(i))
   }
 
+  test("collect large number of empty partitions") {
+    // Regression test for SPARK-4019
+    assert(sc.makeRDD(0 until 10, 1000).repartition(2001).collect().toSet === (0 until 10).toSet)
+  }
+
   test("take") {
     var nums = sc.makeRDD(Range(1, 1000), 1)
     assert(nums.take(0).size === 0)
@@ -539,6 +564,14 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     assert(sortedTopK === nums.sorted(ord).take(5))
   }
 
+  test("isEmpty") {
+    assert(sc.emptyRDD.isEmpty())
+    assert(sc.parallelize(Seq[Int]()).isEmpty())
+    assert(!sc.parallelize(Seq(1)).isEmpty())
+    assert(sc.parallelize(Seq(1,2,3), 3).filter(_ < 0).isEmpty())
+    assert(!sc.parallelize(Seq(1,2,3), 3).filter(_ > 1).isEmpty())
+  }
+
   test("sample preserves partitioner") {
     val partitioner = new HashPartitioner(2)
     val rdd = sc.parallelize(Seq((0, 1), (2, 3))).partitionBy(partitioner)
@@ -614,9 +647,9 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     for(seed <- 1 to 5) {
       val splits = data.randomSplit(Array(1.0, 2.0, 3.0), seed)
       assert(splits.size == 3, "wrong number of splits")
-      assert(splits.flatMap(_.collect).sorted.toList == data.collect.toList,
+      assert(splits.flatMap(_.collect()).sorted.toList == data.collect().toList,
         "incomplete or wrong split")
-      val s = splits.map(_.count)
+      val s = splits.map(_.count())
       assert(math.abs(s(0) - 100) < 50) // std =  9.13
       assert(math.abs(s(1) - 200) < 50) // std = 11.55
       assert(math.abs(s(2) - 300) < 50) // std = 12.25
@@ -734,6 +767,11 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     }
   }
 
+  test("zipWithIndex chained with other RDDs (SPARK-4433)") {
+    val count = sc.parallelize(0 until 10, 2).zipWithIndex().repartition(4).count()
+    assert(count === 10)
+  }
+
   test("zipWithUniqueId") {
     val n = 10
     val data = sc.parallelize(0 until n, 3)
@@ -754,8 +792,8 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     val rdd3 = rdd2.map(_ + 1)
     val rdd4 = new UnionRDD(sc, List(rdd1, rdd2, rdd3))
     assert(rdd4.parent(0).isInstanceOf[ParallelCollectionRDD[_]])
-    assert(rdd4.parent(1).isInstanceOf[FilteredRDD[_]])
-    assert(rdd4.parent(2).isInstanceOf[MappedRDD[_, _]])
+    assert(rdd4.parent[Int](1) === rdd2)
+    assert(rdd4.parent[Int](2) === rdd3)
   }
 
   test("getNarrowAncestors") {
@@ -773,20 +811,18 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     // Simple dependency tree with a single branch
     assert(ancestors1.size === 0)
     assert(ancestors2.size === 2)
-    assert(ancestors2.count(_.isInstanceOf[ParallelCollectionRDD[_]]) === 1)
-    assert(ancestors2.count(_.isInstanceOf[FilteredRDD[_]]) === 1)
+    assert(ancestors2.count(_ === rdd1) === 1)
+    assert(ancestors2.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 1)
     assert(ancestors3.size === 5)
-    assert(ancestors3.count(_.isInstanceOf[ParallelCollectionRDD[_]]) === 1)
-    assert(ancestors3.count(_.isInstanceOf[FilteredRDD[_]]) === 2)
-    assert(ancestors3.count(_.isInstanceOf[MappedRDD[_, _]]) === 2)
+    assert(ancestors3.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 4)
 
     // Any ancestors before the shuffle are not considered
     assert(ancestors4.size === 0)
     assert(ancestors4.count(_.isInstanceOf[ShuffledRDD[_, _, _]]) === 0)
     assert(ancestors5.size === 3)
     assert(ancestors5.count(_.isInstanceOf[ShuffledRDD[_, _, _]]) === 1)
-    assert(ancestors5.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 0)
-    assert(ancestors5.count(_.isInstanceOf[MappedValuesRDD[_, _, _]]) === 2)
+    assert(ancestors5.count(_ === rdd3) === 0)
+    assert(ancestors5.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 2)
   }
 
   test("getNarrowAncestors with multiple parents") {
@@ -807,16 +843,16 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     // Simple dependency tree with multiple branches
     assert(ancestors6.size === 3)
     assert(ancestors6.count(_.isInstanceOf[ParallelCollectionRDD[_]]) === 2)
-    assert(ancestors6.count(_.isInstanceOf[MappedRDD[_, _]]) === 1)
+    assert(ancestors6.count(_ === rdd2) === 1)
     assert(ancestors7.size === 5)
     assert(ancestors7.count(_.isInstanceOf[ParallelCollectionRDD[_]]) === 3)
-    assert(ancestors7.count(_.isInstanceOf[MappedRDD[_, _]]) === 1)
-    assert(ancestors7.count(_.isInstanceOf[FilteredRDD[_]]) === 1)
+    assert(ancestors7.count(_ === rdd2) === 1)
+    assert(ancestors7.count(_ === rdd3) === 1)
 
     // Dependency tree with duplicate nodes (e.g. rdd1 should not be reported twice)
     assert(ancestors8.size === 7)
-    assert(ancestors8.count(_.isInstanceOf[MappedRDD[_, _]]) === 1)
-    assert(ancestors8.count(_.isInstanceOf[FilteredRDD[_]]) === 1)
+    assert(ancestors8.count(_ === rdd2) === 1)
+    assert(ancestors8.count(_ === rdd3) === 1)
     assert(ancestors8.count(_.isInstanceOf[UnionRDD[_]]) === 2)
     assert(ancestors8.count(_.isInstanceOf[ParallelCollectionRDD[_]]) === 3)
     assert(ancestors8.count(_ == rdd1) === 1)
@@ -826,7 +862,6 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     // Any ancestors before the shuffle are not considered
     assert(ancestors9.size === 2)
     assert(ancestors9.count(_.isInstanceOf[CoGroupedRDD[_]]) === 1)
-    assert(ancestors9.count(_.isInstanceOf[MappedValuesRDD[_, _, _]]) === 1)
   }
 
   /**
@@ -860,12 +895,10 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     val ancestors3 = rdd3.getNarrowAncestors
     val ancestors4 = rdd4.getNarrowAncestors
     assert(ancestors3.size === 4)
-    assert(ancestors3.count(_.isInstanceOf[MappedRDD[_, _]]) === 2)
-    assert(ancestors3.count(_.isInstanceOf[FilteredRDD[_]]) === 2)
+    assert(ancestors3.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 4)
     assert(ancestors3.count(_ == rdd3) === 0)
     assert(ancestors4.size === 4)
-    assert(ancestors4.count(_.isInstanceOf[MappedRDD[_, _]]) === 2)
-    assert(ancestors4.count(_.isInstanceOf[FilteredRDD[_]]) === 1)
+    assert(ancestors4.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 3)
     assert(ancestors4.count(_.isInstanceOf[CyclicalDependencyRDD[_]]) === 1)
     assert(ancestors4.count(_ == rdd3) === 1)
     assert(ancestors4.count(_ == rdd4) === 0)
@@ -873,8 +906,7 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     // Cycles that do not involve the root
     val ancestors5 = rdd5.getNarrowAncestors
     assert(ancestors5.size === 6)
-    assert(ancestors5.count(_.isInstanceOf[MappedRDD[_, _]]) === 3)
-    assert(ancestors5.count(_.isInstanceOf[FilteredRDD[_]]) === 2)
+    assert(ancestors5.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 5)
     assert(ancestors5.count(_.isInstanceOf[CyclicalDependencyRDD[_]]) === 1)
     assert(ancestors4.count(_ == rdd3) === 1)
 
@@ -882,11 +914,27 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     val ancestors6 = rdd6.getNarrowAncestors
     assert(ancestors6.size === 12)
     assert(ancestors6.count(_.isInstanceOf[UnionRDD[_]]) === 2)
-    assert(ancestors6.count(_.isInstanceOf[MappedRDD[_, _]]) === 4)
-    assert(ancestors6.count(_.isInstanceOf[FilteredRDD[_]]) === 3)
+    assert(ancestors6.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 7)
     assert(ancestors6.count(_.isInstanceOf[CyclicalDependencyRDD[_]]) === 3)
   }
 
+  test("task serialization exception should not hang scheduler") {
+    class BadSerializable extends Serializable {
+      @throws(classOf[IOException])
+      private def writeObject(out: ObjectOutputStream): Unit = throw new KryoException("Bad serialization")
+
+      @throws(classOf[IOException])
+      private def readObject(in: ObjectInputStream): Unit = {}
+    }
+    // Note that in the original bug, SPARK-4349, that this verifies, the job would only hang if there were
+    // more threads in the Spark Context than there were number of objects in this sequence.
+    intercept[Throwable] {
+      sc.parallelize(Seq(new BadSerializable, new BadSerializable)).collect
+    }
+    // Check that the context has not crashed
+    sc.parallelize(1 to 100).map(x => x*2).collect
+  }
+
   /** A contrived RDD that allows the manual addition of dependencies after creation. */
   private class CyclicalDependencyRDD[T: ClassTag] extends RDD[T](sc, Nil) {
     private val mutableDependencies: ArrayBuffer[Dependency[_]] = ArrayBuffer.empty
@@ -897,4 +945,45 @@ class RDDSuite extends FunSuite with SharedSparkContext {
       mutableDependencies += dep
     }
   }
+
+  test("nested RDDs are not supported (SPARK-5063)") {
+    val rdd: RDD[Int] = sc.parallelize(1 to 100)
+    val rdd2: RDD[Int] = sc.parallelize(1 to 100)
+    val thrown = intercept[SparkException] {
+      val nestedRDD: RDD[RDD[Int]] = rdd.mapPartitions { x => Seq(rdd2.map(x => x)).iterator }
+      nestedRDD.count()
+    }
+    assert(thrown.getMessage.contains("SPARK-5063"))
+  }
+
+  test("actions cannot be performed inside of transformations (SPARK-5063)") {
+    val rdd: RDD[Int] = sc.parallelize(1 to 100)
+    val rdd2: RDD[Int] = sc.parallelize(1 to 100)
+    val thrown = intercept[SparkException] {
+      rdd.map(x => x * rdd2.count).collect()
+    }
+    assert(thrown.getMessage.contains("SPARK-5063"))
+  }
+
+  test("cannot run actions after SparkContext has been stopped (SPARK-5063)") {
+    val existingRDD = sc.parallelize(1 to 100)
+    sc.stop()
+    val thrown = intercept[IllegalStateException] {
+      existingRDD.count()
+    }
+    assert(thrown.getMessage.contains("shutdown"))
+  }
+
+  test("cannot call methods on a stopped SparkContext (SPARK-5063)") {
+    sc.stop()
+    def assertFails(block: => Any): Unit = {
+      val thrown = intercept[IllegalStateException] {
+        block
+      }
+      assert(thrown.getMessage.contains("stopped"))
+    }
+    assertFails { sc.parallelize(1 to 100) }
+    assertFails { sc.textFile("/nonexistent-path") }
+  }
+
 }
diff --git a/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
index 656917628f7a8..a40f2ffeffdf9 100644
--- a/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
@@ -21,7 +21,6 @@ import org.scalatest.FunSuite
 import org.scalatest.Matchers
 
 import org.apache.spark.{Logging, SharedSparkContext}
-import org.apache.spark.SparkContext._
 
 class SortingSuite extends FunSuite with SharedSparkContext with Matchers with Logging {
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index a2e4f712db55b..eb116213f69fc 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -19,25 +19,29 @@ package org.apache.spark.scheduler
 
 import scala.collection.mutable.{ArrayBuffer, HashSet, HashMap, Map}
 import scala.language.reflectiveCalls
+import scala.util.control.NonFatal
 
-import akka.actor._
-import akka.testkit.{ImplicitSender, TestKit, TestActorRef}
 import org.scalatest.{BeforeAndAfter, FunSuiteLike}
 import org.scalatest.concurrent.Timeouts
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark._
-import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 import org.apache.spark.storage.{BlockId, BlockManagerId, BlockManagerMaster}
 import org.apache.spark.util.CallSite
 import org.apache.spark.executor.TaskMetrics
 
-class BuggyDAGEventProcessActor extends Actor {
-  val state = 0
-  def receive = {
-    case _ => throw new SparkException("error")
+class DAGSchedulerEventProcessLoopTester(dagScheduler: DAGScheduler)
+  extends DAGSchedulerEventProcessLoop(dagScheduler) {
+
+  override def post(event: DAGSchedulerEvent): Unit = {
+    try {
+      // Forward event to `onReceive` directly to avoid processing event asynchronously.
+      onReceive(event)
+    } catch {
+      case NonFatal(e) => onError(e)
+    }
   }
 }
 
@@ -66,8 +70,7 @@ class MyRDD(
 
 class DAGSchedulerSuiteDummyException extends Exception
 
-class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with FunSuiteLike
-  with ImplicitSender with BeforeAndAfter with LocalSparkContext with Timeouts {
+class DAGSchedulerSuite extends FunSuiteLike  with BeforeAndAfter with LocalSparkContext with Timeouts {
 
   val conf = new SparkConf
   /** Set of TaskSets the DAGScheduler has requested executed. */
@@ -114,7 +117,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
 
   var mapOutputTracker: MapOutputTrackerMaster = null
   var scheduler: DAGScheduler = null
-  var dagEventProcessTestActor: TestActorRef[DAGSchedulerEventProcessActor] = null
+  var dagEventProcessLoopTester: DAGSchedulerEventProcessLoop = null
 
   /**
    * Set of cache locations to return from our mock BlockManagerMaster.
@@ -168,13 +171,11 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
         runLocallyWithinThread(job)
       }
     }
-    dagEventProcessTestActor = TestActorRef[DAGSchedulerEventProcessActor](
-      Props(classOf[DAGSchedulerEventProcessActor], scheduler))(system)
+    dagEventProcessLoopTester = new DAGSchedulerEventProcessLoopTester(scheduler)
   }
 
   override def afterAll() {
     super.afterAll()
-    TestKit.shutdownActorSystem(system)
   }
 
   /**
@@ -191,7 +192,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
    * DAGScheduler event loop.
    */
   private def runEvent(event: DAGSchedulerEvent) {
-    dagEventProcessTestActor.receive(event)
+    dagEventProcessLoopTester.post(event)
   }
 
   /**
@@ -207,7 +208,18 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
     assert(taskSet.tasks.size >= results.size)
     for ((result, i) <- results.zipWithIndex) {
       if (i < taskSet.tasks.size) {
-        runEvent(CompletionEvent(taskSet.tasks(i), result._1, result._2, Map[Long, Any](), null, null))
+        runEvent(CompletionEvent(taskSet.tasks(i), result._1, result._2, null, null, null))
+      }
+    }
+  }
+
+  private def completeWithAccumulator(accumId: Long, taskSet: TaskSet,
+                                      results: Seq[(TaskEndReason, Any)]) {
+    assert(taskSet.tasks.size >= results.size)
+    for ((result, i) <- results.zipWithIndex) {
+      if (i < taskSet.tasks.size) {
+        runEvent(CompletionEvent(taskSet.tasks(i), result._1, result._2,
+          Map[Long, Any]((accumId, 1)), null, null))
       }
     }
   }
@@ -237,6 +249,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
   test("[SPARK-3353] parent stage should have lower stage id") {
     sparkListener.stageByOrderOfExecution.clear()
     sc.parallelize(1 to 10).map(x => (x, x)).reduceByKey(_ + _, 4).count()
+    assert(sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
     assert(sparkListener.stageByOrderOfExecution.length === 2)
     assert(sparkListener.stageByOrderOfExecution(0) < sparkListener.stageByOrderOfExecution(1))
   }
@@ -386,8 +399,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
         runLocallyWithinThread(job)
       }
     }
-    dagEventProcessTestActor = TestActorRef[DAGSchedulerEventProcessActor](
-      Props(classOf[DAGSchedulerEventProcessActor], noKillScheduler))(system)
+    dagEventProcessLoopTester = new DAGSchedulerEventProcessLoopTester(noKillScheduler)
     val jobId = submit(new MyRDD(sc, 1, Nil), Array(0))
     cancel(jobId)
     // Because the job wasn't actually cancelled, we shouldn't have received a failure message.
@@ -431,7 +443,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
     // the 2nd ResultTask failed
     complete(taskSets(1), Seq(
         (Success, 42),
-        (FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0), null)))
+        (FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"), null)))
     // this will get called
     // blockManagerMaster.removeExecutor("exec-hostA")
     // ask the scheduler to try it again
@@ -461,7 +473,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
     // The first result task fails, with a fetch failure for the output from the first mapper.
     runEvent(CompletionEvent(
       taskSets(1).tasks(0),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
       null,
       Map[Long, Any](),
       null,
@@ -472,7 +484,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
     // The second ResultTask fails, with a fetch failure for the output from the second mapper.
     runEvent(CompletionEvent(
       taskSets(1).tasks(0),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 1, 1),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 1, 1, "ignored"),
       null,
       Map[Long, Any](),
       null,
@@ -493,17 +505,16 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
     runEvent(ExecutorLost("exec-hostA"))
     val newEpoch = mapOutputTracker.getEpoch
     assert(newEpoch > oldEpoch)
-    val noAccum = Map[Long, Any]()
     val taskSet = taskSets(0)
     // should be ignored for being too old
-    runEvent(CompletionEvent(taskSet.tasks(0), Success, makeMapStatus("hostA", 1), noAccum, null, null))
+    runEvent(CompletionEvent(taskSet.tasks(0), Success, makeMapStatus("hostA", 1), null, null, null))
     // should work because it's a non-failed host
-    runEvent(CompletionEvent(taskSet.tasks(0), Success, makeMapStatus("hostB", 1), noAccum, null, null))
+    runEvent(CompletionEvent(taskSet.tasks(0), Success, makeMapStatus("hostB", 1), null, null, null))
     // should be ignored for being too old
-    runEvent(CompletionEvent(taskSet.tasks(0), Success, makeMapStatus("hostA", 1), noAccum, null, null))
+    runEvent(CompletionEvent(taskSet.tasks(0), Success, makeMapStatus("hostA", 1), null, null, null))
     // should work because it's a new epoch
     taskSet.tasks(1).epoch = newEpoch
-    runEvent(CompletionEvent(taskSet.tasks(1), Success, makeMapStatus("hostA", 1), noAccum, null, null))
+    runEvent(CompletionEvent(taskSet.tasks(1), Success, makeMapStatus("hostA", 1), null, null, null))
     assert(mapOutputTracker.getServerStatuses(shuffleId, 0).map(_._1) ===
            Array(makeBlockManagerId("hostB"), makeBlockManagerId("hostA")))
     complete(taskSets(1), Seq((Success, 42), (Success, 43)))
@@ -624,7 +635,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
         (Success, makeMapStatus("hostC", 1))))
     // fail the third stage because hostA went down
     complete(taskSets(2), Seq(
-        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0), null)))
+        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0, "ignored"), null)))
     // TODO assert this:
     // blockManagerMaster.removeExecutor("exec-hostA")
     // have DAGScheduler try again
@@ -655,7 +666,7 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
         (Success, makeMapStatus("hostB", 1))))
     // pretend stage 0 failed because hostA went down
     complete(taskSets(2), Seq(
-        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0), null)))
+        (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0, "ignored"), null)))
     // TODO assert this:
     // blockManagerMaster.removeExecutor("exec-hostA")
     // DAGScheduler should notice the cached copy of the second shuffle and try to get it rerun.
@@ -716,16 +727,16 @@ class DAGSchedulerSuite extends TestKit(ActorSystem("DAGSchedulerSuite")) with F
     assert(sc.parallelize(1 to 10, 2).first() === 1)
   }
 
-  test("DAGSchedulerActorSupervisor closes the SparkContext when EventProcessActor crashes") {
-    val actorSystem = ActorSystem("test")
-    val supervisor = actorSystem.actorOf(
-      Props(classOf[DAGSchedulerActorSupervisor], scheduler), "dagSupervisor")
-    supervisor ! Props[BuggyDAGEventProcessActor]
-    val child = expectMsgType[ActorRef]
-    watch(child)
-    child ! "hi"
-    expectMsgPF(){ case Terminated(child) => () }
-    assert(scheduler.sc.dagScheduler === null)
+  test("accumulator not calculated for resubmitted result stage") {
+    //just for register
+    val accum = new Accumulator[Int](0, AccumulatorParam.IntAccumulatorParam)
+    val finalRdd = new MyRDD(sc, 1, Nil)
+    submit(finalRdd, Array(0))
+    completeWithAccumulator(accum.id, taskSets(0), Seq((Success, 42)))
+    completeWithAccumulator(accum.id, taskSets(0), Seq((Success, 42)))
+    assert(results === Map(0 -> 42))
+    assert(Accumulators.originals(accum.id).value === 1)
+    assertDataStructuresEmpty
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
index abc300fcffaf9..437d8693c0b1f 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
@@ -17,69 +17,59 @@
 
 package org.apache.spark.scheduler
 
+import java.io.{File, FileOutputStream, InputStream, IOException}
+
 import scala.collection.mutable
 import scala.io.Source
 
-import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.fs.Path
 import org.json4s.jackson.JsonMethods._
 import org.scalatest.{BeforeAndAfter, FunSuite}
 
-import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.{Logging, SparkConf, SparkContext}
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.io.CompressionCodec
-import org.apache.spark.SPARK_VERSION
+import org.apache.spark.io._
 import org.apache.spark.util.{JsonProtocol, Utils}
 
-import java.io.File
-
 /**
  * Test whether EventLoggingListener logs events properly.
  *
- * This tests whether EventLoggingListener actually creates special files while logging events,
- * whether the parsing of these special files is correct, and whether the logged events can be
- * read and deserialized into actual SparkListenerEvents.
+ * This tests whether EventLoggingListener actually log files with expected name patterns while
+ * logging events, whether the parsing of the file names is correct, and whether the logged events
+ * can be read and deserialized into actual SparkListenerEvents.
  */
-class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
+class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter with Logging {
+  import EventLoggingListenerSuite._
+
   private val fileSystem = Utils.getHadoopFileSystem("/",
     SparkHadoopUtil.get.newConfiguration(new SparkConf()))
-  private val allCompressionCodecs = Seq[String](
-    "org.apache.spark.io.LZFCompressionCodec",
-    "org.apache.spark.io.SnappyCompressionCodec"
-  )
   private var testDir: File = _
-  private var logDirPath: Path = _
+  private var testDirPath: Path = _
 
   before {
     testDir = Utils.createTempDir()
-    logDirPath = Utils.getFilePath(testDir, "spark-events")
+    testDir.deleteOnExit()
+    testDirPath = new Path(testDir.getAbsolutePath())
   }
 
   after {
     Utils.deleteRecursively(testDir)
   }
 
-  test("Parse names of special files") {
-    testParsingFileName()
-  }
-
-  test("Verify special files exist") {
-    testSpecialFilesExist()
-  }
-
-  test("Verify special files exist with compression") {
-    allCompressionCodecs.foreach { codec =>
-      testSpecialFilesExist(compressionCodec = Some(codec))
-    }
-  }
+  test("Verify log file exist") {
+    // Verify logging directory exists
+    val conf = getLoggingConf(testDirPath)
+    val eventLogger = new EventLoggingListener("test", testDirPath.toUri().toString(), conf)
+    eventLogger.start()
 
-  test("Parse event logging info") {
-    testParsingLogInfo()
-  }
+    val logPath = new Path(eventLogger.logPath + EventLoggingListener.IN_PROGRESS)
+    assert(fileSystem.exists(logPath))
+    val logStatus = fileSystem.getFileStatus(logPath)
+    assert(!logStatus.isDir)
 
-  test("Parse event logging info with compression") {
-    allCompressionCodecs.foreach { codec =>
-      testParsingLogInfo(compressionCodec = Some(codec))
-    }
+    // Verify log is renamed after stop()
+    eventLogger.stop()
+    assert(!fileSystem.getFileStatus(new Path(eventLogger.logPath)).isDir)
   }
 
   test("Basic event logging") {
@@ -87,7 +77,7 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
   }
 
   test("Basic event logging with compression") {
-    allCompressionCodecs.foreach { codec =>
+    CompressionCodec.ALL_COMPRESSION_CODECS.foreach { codec =>
       testEventLogging(compressionCodec = Some(codec))
     }
   }
@@ -97,11 +87,25 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
   }
 
   test("End-to-end event logging with compression") {
-    allCompressionCodecs.foreach { codec =>
+    CompressionCodec.ALL_COMPRESSION_CODECS.foreach { codec =>
       testApplicationEventLogging(compressionCodec = Some(codec))
     }
   }
 
+  test("Log overwriting") {
+    val log = new FileOutputStream(new File(testDir, "test"))
+    log.close()
+    try {
+      testEventLogging()
+      assert(false)
+    } catch {
+      case e: IOException =>
+        // Expected, since we haven't enabled log overwrite.
+    }
+
+    // Try again, but enable overwriting.
+    testEventLogging(extraConf = Map("spark.eventLog.overwrite" -> "true"))
+  }
 
   /* ----------------- *
    * Actual test logic *
@@ -109,130 +113,19 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
 
   import EventLoggingListenerSuite._
 
-  /**
-   * Test whether names of special files are correctly identified and parsed.
-   */
-  private def testParsingFileName() {
-    val logPrefix = EventLoggingListener.LOG_PREFIX
-    val sparkVersionPrefix = EventLoggingListener.SPARK_VERSION_PREFIX
-    val compressionCodecPrefix = EventLoggingListener.COMPRESSION_CODEC_PREFIX
-    val applicationComplete = EventLoggingListener.APPLICATION_COMPLETE
-    assert(EventLoggingListener.isEventLogFile(logPrefix + "0"))
-    assert(EventLoggingListener.isEventLogFile(logPrefix + "100"))
-    assert(EventLoggingListener.isEventLogFile(logPrefix + "ANYTHING"))
-    assert(EventLoggingListener.isSparkVersionFile(sparkVersionPrefix + "0.9.1"))
-    assert(EventLoggingListener.isSparkVersionFile(sparkVersionPrefix + "1.0.0"))
-    assert(EventLoggingListener.isSparkVersionFile(sparkVersionPrefix + "ANYTHING"))
-    assert(EventLoggingListener.isApplicationCompleteFile(applicationComplete))
-    allCompressionCodecs.foreach { codec =>
-      assert(EventLoggingListener.isCompressionCodecFile(compressionCodecPrefix + codec))
-    }
-
-    // Negatives
-    assert(!EventLoggingListener.isEventLogFile("The greatest man of all mankind"))
-    assert(!EventLoggingListener.isSparkVersionFile("Will never falter in the face of death!"))
-    assert(!EventLoggingListener.isCompressionCodecFile("Unless he chooses to leave behind"))
-    assert(!EventLoggingListener.isApplicationCompleteFile("The very treasure he calls Macbeth"))
-
-    // Verify that parsing is correct
-    assert(EventLoggingListener.parseSparkVersion(sparkVersionPrefix + "1.0.0") === "1.0.0")
-    allCompressionCodecs.foreach { codec =>
-      assert(EventLoggingListener.parseCompressionCodec(compressionCodecPrefix + codec) === codec)
-    }
-  }
-
-  /**
-   * Test whether the special files produced by EventLoggingListener exist.
-   *
-   * There should be exactly one event log and one spark version file throughout the entire
-   * execution. If a compression codec is specified, then the compression codec file should
-   * also exist. Only after the application has completed does the test expect the application
-   * completed file to be present.
-   */
-  private def testSpecialFilesExist(compressionCodec: Option[String] = None) {
-
-    def assertFilesExist(logFiles: Array[FileStatus], loggerStopped: Boolean) {
-      val numCompressionCodecFiles = if (compressionCodec.isDefined) 1 else 0
-      val numApplicationCompleteFiles = if (loggerStopped) 1 else 0
-      assert(logFiles.size === 2 + numCompressionCodecFiles + numApplicationCompleteFiles)
-      assert(eventLogsExist(logFiles))
-      assert(sparkVersionExists(logFiles))
-      assert(compressionCodecExists(logFiles) === compressionCodec.isDefined)
-      assert(applicationCompleteExists(logFiles) === loggerStopped)
-      assertSparkVersionIsValid(logFiles)
-      compressionCodec.foreach { codec =>
-        assertCompressionCodecIsValid(logFiles, codec)
-      }
-    }
-
-    // Verify logging directory exists
-    val conf = getLoggingConf(logDirPath, compressionCodec)
-    val logBaseDir = conf.get("spark.eventLog.dir")
-    val appId = EventLoggingListenerSuite.getUniqueApplicationId
-    val eventLogger = new EventLoggingListener(appId, logBaseDir, conf)
-    eventLogger.start()
-    val logPath = new Path(eventLogger.logDir)
-    assert(fileSystem.exists(logPath))
-    val logDir = fileSystem.getFileStatus(logPath)
-    assert(logDir.isDir)
-
-    // Verify special files are as expected before stop()
-    var logFiles = fileSystem.listStatus(logPath)
-    assert(logFiles != null)
-    assertFilesExist(logFiles, loggerStopped = false)
-
-    // Verify special files are as expected after stop()
-    eventLogger.stop()
-    logFiles = fileSystem.listStatus(logPath)
-    assertFilesExist(logFiles, loggerStopped = true)
-  }
-
-  /**
-   * Test whether EventLoggingListener correctly parses the correct information from the logs.
-   *
-   * This includes whether it returns the correct Spark version, compression codec (if any),
-   * and the application's completion status.
-   */
-  private def testParsingLogInfo(compressionCodec: Option[String] = None) {
-
-    def assertInfoCorrect(info: EventLoggingInfo, loggerStopped: Boolean) {
-      assert(info.logPaths.size > 0)
-      assert(info.sparkVersion === SPARK_VERSION)
-      assert(info.compressionCodec.isDefined === compressionCodec.isDefined)
-      info.compressionCodec.foreach { codec =>
-        assert(compressionCodec.isDefined)
-        val expectedCodec = compressionCodec.get.split('.').last
-        assert(codec.getClass.getSimpleName === expectedCodec)
-      }
-      assert(info.applicationComplete === loggerStopped)
-    }
-
-    // Verify that all information is correctly parsed before stop()
-    val conf = getLoggingConf(logDirPath, compressionCodec)
-    val logBaseDir = conf.get("spark.eventLog.dir")
-    val appId = EventLoggingListenerSuite.getUniqueApplicationId
-    val eventLogger = new EventLoggingListener(appId, logBaseDir, conf)
-    eventLogger.start()
-    var eventLoggingInfo = EventLoggingListener.parseLoggingInfo(eventLogger.logDir, fileSystem)
-    assertInfoCorrect(eventLoggingInfo, loggerStopped = false)
-
-    // Verify that all information is correctly parsed after stop()
-    eventLogger.stop()
-    eventLoggingInfo = EventLoggingListener.parseLoggingInfo(eventLogger.logDir, fileSystem)
-    assertInfoCorrect(eventLoggingInfo, loggerStopped = true)
-  }
-
   /**
    * Test basic event logging functionality.
    *
    * This creates two simple events, posts them to the EventLoggingListener, and verifies that
    * exactly these two events are logged in the expected file.
    */
-  private def testEventLogging(compressionCodec: Option[String] = None) {
-    val conf = getLoggingConf(logDirPath, compressionCodec)
-    val logBaseDir = conf.get("spark.eventLog.dir")
-    val appId = EventLoggingListenerSuite.getUniqueApplicationId
-    val eventLogger = new EventLoggingListener(appId, logBaseDir, conf)
+  private def testEventLogging(
+      compressionCodec: Option[String] = None,
+      extraConf: Map[String, String] = Map()) {
+    val conf = getLoggingConf(testDirPath, compressionCodec)
+    extraConf.foreach { case (k, v) => conf.set(k, v) }
+    val logName = compressionCodec.map("test-" + _).getOrElse("test")
+    val eventLogger = new EventLoggingListener(logName, testDirPath.toUri().toString(), conf)
     val listenerBus = new LiveListenerBus
     val applicationStart = SparkListenerApplicationStart("Greatest App (N)ever", None,
       125L, "Mickey")
@@ -244,17 +137,21 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
     listenerBus.addListener(eventLogger)
     listenerBus.postToAll(applicationStart)
     listenerBus.postToAll(applicationEnd)
+    eventLogger.stop()
 
     // Verify file contains exactly the two events logged
-    val eventLoggingInfo = EventLoggingListener.parseLoggingInfo(eventLogger.logDir, fileSystem)
-    assert(eventLoggingInfo.logPaths.size > 0)
-    val lines = readFileLines(eventLoggingInfo.logPaths.head, eventLoggingInfo.compressionCodec)
-    assert(lines.size === 2)
-    assert(lines(0).contains("SparkListenerApplicationStart"))
-    assert(lines(1).contains("SparkListenerApplicationEnd"))
-    assert(JsonProtocol.sparkEventFromJson(parse(lines(0))) === applicationStart)
-    assert(JsonProtocol.sparkEventFromJson(parse(lines(1))) === applicationEnd)
-    eventLogger.stop()
+    val (logData, version) = EventLoggingListener.openEventLog(new Path(eventLogger.logPath),
+      fileSystem)
+    try {
+      val lines = readLines(logData)
+      assert(lines.size === 2)
+      assert(lines(0).contains("SparkListenerApplicationStart"))
+      assert(lines(1).contains("SparkListenerApplicationEnd"))
+      assert(JsonProtocol.sparkEventFromJson(parse(lines(0))) === applicationStart)
+      assert(JsonProtocol.sparkEventFromJson(parse(lines(1))) === applicationEnd)
+    } finally {
+      logData.close()
+    }
   }
 
   /**
@@ -262,12 +159,12 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
    * This runs a simple Spark job and asserts that the expected events are logged when expected.
    */
   private def testApplicationEventLogging(compressionCodec: Option[String] = None) {
-    val conf = getLoggingConf(logDirPath, compressionCodec)
-    val sc = new SparkContext("local", "test", conf)
+    val conf = getLoggingConf(testDirPath, compressionCodec)
+    val sc = new SparkContext("local-cluster[2,2,512]", "test", conf)
     assert(sc.eventLogger.isDefined)
     val eventLogger = sc.eventLogger.get
-    val expectedLogDir = logDirPath.toString
-    assert(eventLogger.logDir.contains(expectedLogDir))
+    val expectedLogDir = testDir.toURI().toString()
+    assert(eventLogger.logPath.startsWith(expectedLogDir + "/"))
 
     // Begin listening for events that trigger asserts
     val eventExistenceListener = new EventExistenceListener(eventLogger)
@@ -279,16 +176,23 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
 
     // Ensure all asserts have actually been triggered
     eventExistenceListener.assertAllCallbacksInvoked()
-  }
 
-  /**
-   * Assert that all of the specified events are logged by the given EventLoggingListener.
-   */
-  private def assertEventsExist(eventLogger: EventLoggingListener, events: Seq[String]) {
-    val eventLoggingInfo = EventLoggingListener.parseLoggingInfo(eventLogger.logDir, fileSystem)
-    assert(eventLoggingInfo.logPaths.size > 0)
-    val lines = readFileLines(eventLoggingInfo.logPaths.head, eventLoggingInfo.compressionCodec)
-    val eventSet = mutable.Set(events: _*)
+    // Make sure expected events exist in the log file.
+    val (logData, version) = EventLoggingListener.openEventLog(new Path(eventLogger.logPath),
+      fileSystem)
+    val lines = readLines(logData)
+    val eventSet = mutable.Set(
+      SparkListenerApplicationStart,
+      SparkListenerBlockManagerAdded,
+      SparkListenerExecutorAdded,
+      SparkListenerEnvironmentUpdate,
+      SparkListenerJobStart,
+      SparkListenerJobEnd,
+      SparkListenerStageSubmitted,
+      SparkListenerStageCompleted,
+      SparkListenerTaskStart,
+      SparkListenerTaskEnd,
+      SparkListenerApplicationEnd).map(Utils.getFormattedClassName)
     lines.foreach { line =>
       eventSet.foreach { event =>
         if (line.contains(event)) {
@@ -303,19 +207,8 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
     assert(eventSet.isEmpty, "The following events are missing: " + eventSet.toSeq)
   }
 
-  /**
-   * Read all lines from the file specified by the given path.
-   * If a compression codec is specified, use it to read the file.
-   */
-  private def readFileLines(
-      filePath: Path,
-      compressionCodec: Option[CompressionCodec]): Seq[String] = {
-    val fstream = fileSystem.open(filePath)
-    val cstream =
-      compressionCodec.map { codec =>
-        codec.compressedInputStream(fstream)
-      }.getOrElse(fstream)
-    Source.fromInputStream(cstream).getLines().toSeq
+  private def readLines(in: InputStream): Seq[String] = {
+    Source.fromInputStream(in).getLines().toSeq
   }
 
   /**
@@ -328,30 +221,14 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
     var appEnded = false
 
     override def onJobStart(jobStart: SparkListenerJobStart) {
-      assertEventsExist(eventLogger, Seq[String](
-        Utils.getFormattedClassName(SparkListenerApplicationStart),
-        Utils.getFormattedClassName(SparkListenerBlockManagerAdded),
-        Utils.getFormattedClassName(SparkListenerEnvironmentUpdate)
-      ))
       jobStarted = true
     }
 
     override def onJobEnd(jobEnd: SparkListenerJobEnd) {
-      assertEventsExist(eventLogger, Seq[String](
-        Utils.getFormattedClassName(SparkListenerJobStart),
-        Utils.getFormattedClassName(SparkListenerJobEnd),
-        Utils.getFormattedClassName(SparkListenerStageSubmitted),
-        Utils.getFormattedClassName(SparkListenerStageCompleted),
-        Utils.getFormattedClassName(SparkListenerTaskStart),
-        Utils.getFormattedClassName(SparkListenerTaskEnd)
-      ))
       jobEnded = true
     }
 
     override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd) {
-      assertEventsExist(eventLogger, Seq[String](
-        Utils.getFormattedClassName(SparkListenerApplicationEnd)
-      ))
       appEnded = true
     }
 
@@ -362,39 +239,6 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter {
     }
   }
 
-
-  /* -------------------------------------------------------- *
-   * Helper methods for validating state of the special files *
-   * -------------------------------------------------------- */
-
-  private def eventLogsExist(logFiles: Array[FileStatus]): Boolean = {
-    logFiles.map(_.getPath.getName).exists(EventLoggingListener.isEventLogFile)
-  }
-
-  private def sparkVersionExists(logFiles: Array[FileStatus]): Boolean = {
-    logFiles.map(_.getPath.getName).exists(EventLoggingListener.isSparkVersionFile)
-  }
-
-  private def compressionCodecExists(logFiles: Array[FileStatus]): Boolean = {
-    logFiles.map(_.getPath.getName).exists(EventLoggingListener.isCompressionCodecFile)
-  }
-
-  private def applicationCompleteExists(logFiles: Array[FileStatus]): Boolean = {
-    logFiles.map(_.getPath.getName).exists(EventLoggingListener.isApplicationCompleteFile)
-  }
-
-  private def assertSparkVersionIsValid(logFiles: Array[FileStatus]) {
-    val file = logFiles.map(_.getPath.getName).find(EventLoggingListener.isSparkVersionFile)
-    assert(file.isDefined)
-    assert(EventLoggingListener.parseSparkVersion(file.get) === SPARK_VERSION)
-  }
-
-  private def assertCompressionCodecIsValid(logFiles: Array[FileStatus], compressionCodec: String) {
-    val file = logFiles.map(_.getPath.getName).find(EventLoggingListener.isCompressionCodecFile)
-    assert(file.isDefined)
-    assert(EventLoggingListener.parseCompressionCodec(file.get) === compressionCodec)
-  }
-
 }
 
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
index 79e04f046e4c4..950c6dc58e332 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
@@ -23,6 +23,7 @@ import org.scalatest.FunSuite
 import org.apache.spark.SparkConf
 import org.apache.spark.serializer.JavaSerializer
 
+import scala.util.Random
 
 class MapStatusSuite extends FunSuite {
 
@@ -46,6 +47,26 @@ class MapStatusSuite extends FunSuite {
     }
   }
 
+  test("MapStatus should never report non-empty blocks' sizes as 0") {
+    import Math._
+    for (
+      numSizes <- Seq(1, 10, 100, 1000, 10000);
+      mean <- Seq(0L, 100L, 10000L, Int.MaxValue.toLong);
+      stddev <- Seq(0.0, 0.01, 0.5, 1.0)
+    ) {
+      val sizes = Array.fill[Long](numSizes)(abs(round(Random.nextGaussian() * stddev)) + mean)
+      val status = MapStatus(BlockManagerId("a", "b", 10), sizes)
+      val status1 = compressAndDecompressMapStatus(status)
+      for (i <- 0 until numSizes) {
+        if (sizes(i) != 0) {
+          val failureMessage = s"Failed with $numSizes sizes with mean=$mean, stddev=$stddev"
+          assert(status.getSizeForBlock(i) !== 0, failureMessage)
+          assert(status1.getSizeForBlock(i) !== 0, failureMessage)
+        }
+      }
+    }
+  }
+
   test("large tasks should use " + classOf[HighlyCompressedMapStatus].getName) {
     val sizes = Array.fill[Long](2001)(150L)
     val status = MapStatus(null, sizes)
@@ -56,37 +77,25 @@ class MapStatusSuite extends FunSuite {
     assert(status.getSizeForBlock(2000) === 150L)
   }
 
-  test(classOf[HighlyCompressedMapStatus].getName + ": estimated size is within 10%") {
-    val sizes = Array.tabulate[Long](50) { i => i.toLong }
+  test("HighlyCompressedMapStatus: estimated size should be the average non-empty block size") {
+    val sizes = Array.tabulate[Long](3000) { i => i.toLong }
+    val avg = sizes.sum / sizes.filter(_ != 0).length
     val loc = BlockManagerId("a", "b", 10)
     val status = MapStatus(loc, sizes)
-    val ser = new JavaSerializer(new SparkConf)
-    val buf = ser.newInstance().serialize(status)
-    val status1 = ser.newInstance().deserialize[MapStatus](buf)
+    val status1 = compressAndDecompressMapStatus(status)
+    assert(status1.isInstanceOf[HighlyCompressedMapStatus])
     assert(status1.location == loc)
-    for (i <- 0 until sizes.length) {
-      // make sure the estimated size is within 10% of the input; note that we skip the very small
-      // sizes because the compression is very lossy there.
+    for (i <- 0 until 3000) {
       val estimate = status1.getSizeForBlock(i)
-      if (estimate > 100) {
-        assert(math.abs(estimate - sizes(i)) * 10 <= sizes(i),
-          s"incorrect estimated size $estimate, original was ${sizes(i)}")
+      if (sizes(i) > 0) {
+        assert(estimate === avg)
       }
     }
   }
 
-  test(classOf[HighlyCompressedMapStatus].getName + ": estimated size should be the average size") {
-    val sizes = Array.tabulate[Long](3000) { i => i.toLong }
-    val avg = sizes.sum / sizes.length
-    val loc = BlockManagerId("a", "b", 10)
-    val status = MapStatus(loc, sizes)
+  def compressAndDecompressMapStatus(status: MapStatus): MapStatus = {
     val ser = new JavaSerializer(new SparkConf)
     val buf = ser.newInstance().serialize(status)
-    val status1 = ser.newInstance().deserialize[MapStatus](buf)
-    assert(status1.location == loc)
-    for (i <- 0 until 3000) {
-      val estimate = status1.getSizeForBlock(i)
-      assert(estimate === avg)
-    }
+    ser.newInstance().deserialize[MapStatus](buf)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/NotSerializableFakeTask.scala b/core/src/test/scala/org/apache/spark/scheduler/NotSerializableFakeTask.scala
new file mode 100644
index 0000000000000..6b75c98839e03
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/scheduler/NotSerializableFakeTask.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler
+
+import java.io.{ObjectInputStream, ObjectOutputStream, IOException}
+
+import org.apache.spark.TaskContext
+
+/**
+ * A Task implementation that fails to serialize.
+ */
+private[spark] class NotSerializableFakeTask(myId: Int, stageId: Int) extends Task[Array[Byte]](stageId, 0) {
+  override def runTask(context: TaskContext): Array[Byte] = Array.empty[Byte]
+  override def preferredLocations: Seq[TaskLocation] = Seq[TaskLocation]()
+
+  @throws(classOf[IOException])
+  private def writeObject(out: ObjectOutputStream): Unit = {
+    if (stageId == 0) {
+      throw new IllegalStateException("Cannot serialize")
+    }
+  }
+
+  @throws(classOf[IOException])
+  private def readObject(in: ObjectInputStream): Unit = {}
+}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala
new file mode 100644
index 0000000000000..e8f461e2f56c9
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler
+
+import java.util.Properties
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext}
+
+/**
+ * Tests that pools and the associated scheduling algorithms for FIFO and fair scheduling work
+ * correctly.
+ */
+class PoolSuite extends FunSuite with LocalSparkContext {
+
+  def createTaskSetManager(stageId: Int, numTasks: Int, taskScheduler: TaskSchedulerImpl)
+    : TaskSetManager = {
+    val tasks = Array.tabulate[Task[_]](numTasks) { i =>
+      new FakeTask(i, Nil)
+    }
+    new TaskSetManager(taskScheduler, new TaskSet(tasks, stageId, 0, 0, null), 0)
+  }
+
+  def scheduleTaskAndVerifyId(taskId: Int, rootPool: Pool, expectedStageId: Int) {
+    val taskSetQueue = rootPool.getSortedTaskSetQueue
+    val nextTaskSetToSchedule =
+      taskSetQueue.find(t => (t.runningTasks + t.tasksSuccessful) < t.numTasks)
+    assert(nextTaskSetToSchedule.isDefined)
+    nextTaskSetToSchedule.get.addRunningTask(taskId)
+    assert(nextTaskSetToSchedule.get.stageId === expectedStageId)
+  }
+
+  test("FIFO Scheduler Test") {
+    sc = new SparkContext("local", "TaskSchedulerImplSuite")
+    val taskScheduler = new TaskSchedulerImpl(sc)
+
+    val rootPool = new Pool("", SchedulingMode.FIFO, 0, 0)
+    val schedulableBuilder = new FIFOSchedulableBuilder(rootPool)
+    schedulableBuilder.buildPools()
+
+    val taskSetManager0 = createTaskSetManager(0, 2, taskScheduler)
+    val taskSetManager1 = createTaskSetManager(1, 2, taskScheduler)
+    val taskSetManager2 = createTaskSetManager(2, 2, taskScheduler)
+    schedulableBuilder.addTaskSetManager(taskSetManager0, null)
+    schedulableBuilder.addTaskSetManager(taskSetManager1, null)
+    schedulableBuilder.addTaskSetManager(taskSetManager2, null)
+
+    scheduleTaskAndVerifyId(0, rootPool, 0)
+    scheduleTaskAndVerifyId(1, rootPool, 0)
+    scheduleTaskAndVerifyId(2, rootPool, 1)
+    scheduleTaskAndVerifyId(3, rootPool, 1)
+    scheduleTaskAndVerifyId(4, rootPool, 2)
+    scheduleTaskAndVerifyId(5, rootPool, 2)
+  }
+
+  /**
+   * This test creates three scheduling pools, and creates task set managers in the first
+   * two scheduling pools. The test verifies that as tasks are scheduled, the fair scheduling
+   * algorithm properly orders the two scheduling pools.
+   */
+  test("Fair Scheduler Test") {
+    val xmlPath = getClass.getClassLoader.getResource("fairscheduler.xml").getFile()
+    val conf = new SparkConf().set("spark.scheduler.allocation.file", xmlPath)
+    sc = new SparkContext("local", "TaskSchedulerImplSuite", conf)
+    val taskScheduler = new TaskSchedulerImpl(sc)
+
+    val rootPool = new Pool("", SchedulingMode.FAIR, 0, 0)
+    val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf)
+    schedulableBuilder.buildPools()
+
+    // Ensure that the XML file was read in correctly.
+    assert(rootPool.getSchedulableByName("default") != null)
+    assert(rootPool.getSchedulableByName("1") != null)
+    assert(rootPool.getSchedulableByName("2") != null)
+    assert(rootPool.getSchedulableByName("3") != null)
+    assert(rootPool.getSchedulableByName("1").minShare === 2)
+    assert(rootPool.getSchedulableByName("1").weight === 1)
+    assert(rootPool.getSchedulableByName("2").minShare === 3)
+    assert(rootPool.getSchedulableByName("2").weight === 1)
+    assert(rootPool.getSchedulableByName("3").minShare === 0)
+    assert(rootPool.getSchedulableByName("3").weight === 1)
+
+    val properties1 = new Properties()
+    properties1.setProperty("spark.scheduler.pool","1")
+    val properties2 = new Properties()
+    properties2.setProperty("spark.scheduler.pool","2")
+
+    val taskSetManager10 = createTaskSetManager(0, 1, taskScheduler)
+    val taskSetManager11 = createTaskSetManager(1, 1, taskScheduler)
+    val taskSetManager12 = createTaskSetManager(2, 2, taskScheduler)
+    schedulableBuilder.addTaskSetManager(taskSetManager10, properties1)
+    schedulableBuilder.addTaskSetManager(taskSetManager11, properties1)
+    schedulableBuilder.addTaskSetManager(taskSetManager12, properties1)
+
+    val taskSetManager23 = createTaskSetManager(3, 2, taskScheduler)
+    val taskSetManager24 = createTaskSetManager(4, 2, taskScheduler)
+    schedulableBuilder.addTaskSetManager(taskSetManager23, properties2)
+    schedulableBuilder.addTaskSetManager(taskSetManager24, properties2)
+
+    // Pool 1 share ratio: 0. Pool 2 share ratio: 0. 1 gets scheduled based on ordering of names.
+    scheduleTaskAndVerifyId(0, rootPool, 0)
+    // Pool 1 share ratio: 1/2. Pool 2 share ratio: 0. 2 gets scheduled because ratio is lower.
+    scheduleTaskAndVerifyId(1, rootPool, 3)
+    // Pool 1 share ratio: 1/2. Pool 2 share ratio: 1/3. 2 gets scheduled because ratio is lower.
+    scheduleTaskAndVerifyId(2, rootPool, 3)
+    // Pool 1 share ratio: 1/2. Pool 2 share ratio: 2/3. 1 gets scheduled because ratio is lower.
+    scheduleTaskAndVerifyId(3, rootPool, 1)
+    // Pool 1 share ratio: 1. Pool 2 share ratio: 2/3. 2 gets scheduled because ratio is lower.
+    scheduleTaskAndVerifyId(4, rootPool, 4)
+    // Neither pool is needy so ordering is based on number of running tasks.
+    // Pool 1 running tasks: 2, Pool 2 running tasks: 3. 1 gets scheduled because fewer running
+    // tasks.
+    scheduleTaskAndVerifyId(5, rootPool, 2)
+    // Pool 1 running tasks: 3, Pool 2 running tasks: 3. 1 gets scheduled because of naming
+    // ordering.
+    scheduleTaskAndVerifyId(6, rootPool, 2)
+    // Pool 1 running tasks: 4, Pool 2 running tasks: 3. 2 gets scheduled because fewer running
+    // tasks.
+    scheduleTaskAndVerifyId(7, rootPool, 4)
+  }
+
+  test("Nested Pool Test") {
+    sc = new SparkContext("local", "TaskSchedulerImplSuite")
+    val taskScheduler = new TaskSchedulerImpl(sc)
+
+    val rootPool = new Pool("", SchedulingMode.FAIR, 0, 0)
+    val pool0 = new Pool("0", SchedulingMode.FAIR, 3, 1)
+    val pool1 = new Pool("1", SchedulingMode.FAIR, 4, 1)
+    rootPool.addSchedulable(pool0)
+    rootPool.addSchedulable(pool1)
+
+    val pool00 = new Pool("00", SchedulingMode.FAIR, 2, 2)
+    val pool01 = new Pool("01", SchedulingMode.FAIR, 1, 1)
+    pool0.addSchedulable(pool00)
+    pool0.addSchedulable(pool01)
+
+    val pool10 = new Pool("10", SchedulingMode.FAIR, 2, 2)
+    val pool11 = new Pool("11", SchedulingMode.FAIR, 2, 1)
+    pool1.addSchedulable(pool10)
+    pool1.addSchedulable(pool11)
+
+    val taskSetManager000 = createTaskSetManager(0, 5, taskScheduler)
+    val taskSetManager001 = createTaskSetManager(1, 5, taskScheduler)
+    pool00.addSchedulable(taskSetManager000)
+    pool00.addSchedulable(taskSetManager001)
+
+    val taskSetManager010 = createTaskSetManager(2, 5, taskScheduler)
+    val taskSetManager011 = createTaskSetManager(3, 5, taskScheduler)
+    pool01.addSchedulable(taskSetManager010)
+    pool01.addSchedulable(taskSetManager011)
+
+    val taskSetManager100 = createTaskSetManager(4, 5, taskScheduler)
+    val taskSetManager101 = createTaskSetManager(5, 5, taskScheduler)
+    pool10.addSchedulable(taskSetManager100)
+    pool10.addSchedulable(taskSetManager101)
+
+    val taskSetManager110 = createTaskSetManager(6, 5, taskScheduler)
+    val taskSetManager111 = createTaskSetManager(7, 5, taskScheduler)
+    pool11.addSchedulable(taskSetManager110)
+    pool11.addSchedulable(taskSetManager111)
+
+    scheduleTaskAndVerifyId(0, rootPool, 0)
+    scheduleTaskAndVerifyId(1, rootPool, 4)
+    scheduleTaskAndVerifyId(2, rootPool, 6)
+    scheduleTaskAndVerifyId(3, rootPool, 2)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
index e05f373392d4a..7e360cc6082ec 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
@@ -22,7 +22,7 @@ import java.io.{File, PrintWriter}
 import org.json4s.jackson.JsonMethods._
 import org.scalatest.{BeforeAndAfter, FunSuite}
 
-import org.apache.spark.SparkContext._
+import org.apache.spark.{SparkConf, SparkContext, SPARK_VERSION}
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.io.CompressionCodec
@@ -34,7 +34,6 @@ import org.apache.spark.util.{JsonProtocol, Utils}
 class ReplayListenerSuite extends FunSuite with BeforeAndAfter {
   private val fileSystem = Utils.getHadoopFileSystem("/",
     SparkHadoopUtil.get.newConfiguration(new SparkConf()))
-  private val allCompressionCodecs = CompressionCodec.ALL_COMPRESSION_CODECS
   private var testDir: File = _
 
   before {
@@ -46,13 +45,29 @@ class ReplayListenerSuite extends FunSuite with BeforeAndAfter {
   }
 
   test("Simple replay") {
-    testSimpleReplay()
-  }
+    val logFilePath = Utils.getFilePath(testDir, "events.txt")
+    val fstream = fileSystem.create(logFilePath)
+    val writer = new PrintWriter(fstream)
+    val applicationStart = SparkListenerApplicationStart("Greatest App (N)ever", None,
+      125L, "Mickey")
+    val applicationEnd = SparkListenerApplicationEnd(1000L)
+    writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationStart))))
+    writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationEnd))))
+    writer.close()
 
-  test("Simple replay with compression") {
-    allCompressionCodecs.foreach { codec =>
-      testSimpleReplay(Some(codec))
+    val conf = EventLoggingListenerSuite.getLoggingConf(logFilePath)
+    val logData = fileSystem.open(logFilePath)
+    val eventMonster = new EventMonster(conf)
+    try {
+      val replayer = new ReplayListenerBus()
+      replayer.addListener(eventMonster)
+      replayer.replay(logData, SPARK_VERSION)
+    } finally {
+      logData.close()
     }
+    assert(eventMonster.loggedEvents.size === 2)
+    assert(eventMonster.loggedEvents(0) === JsonProtocol.sparkEventToJson(applicationStart))
+    assert(eventMonster.loggedEvents(1) === JsonProtocol.sparkEventToJson(applicationEnd))
   }
 
   // This assumes the correctness of EventLoggingListener
@@ -62,7 +77,7 @@ class ReplayListenerSuite extends FunSuite with BeforeAndAfter {
 
   // This assumes the correctness of EventLoggingListener
   test("End-to-end replay with compression") {
-    allCompressionCodecs.foreach { codec =>
+    CompressionCodec.ALL_COMPRESSION_CODECS.foreach { codec =>
       testApplicationReplay(Some(codec))
     }
   }
@@ -72,31 +87,6 @@ class ReplayListenerSuite extends FunSuite with BeforeAndAfter {
    * Actual test logic *
    * ----------------- */
 
-  /**
-   * Test simple replaying of events.
-   */
-  private def testSimpleReplay(codecName: Option[String] = None) {
-    val logFilePath = Utils.getFilePath(testDir, "events.txt")
-    val codec = codecName.map(getCompressionCodec)
-    val fstream = fileSystem.create(logFilePath)
-    val cstream = codec.map(_.compressedOutputStream(fstream)).getOrElse(fstream)
-    val writer = new PrintWriter(cstream)
-    val applicationStart = SparkListenerApplicationStart("Greatest App (N)ever", None,
-      125L, "Mickey")
-    val applicationEnd = SparkListenerApplicationEnd(1000L)
-    writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationStart))))
-    writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationEnd))))
-    writer.close()
-    val replayer = new ReplayListenerBus(Seq(logFilePath), fileSystem, codec)
-    val conf = EventLoggingListenerSuite.getLoggingConf(logFilePath, codecName)
-    val eventMonster = new EventMonster(conf)
-    replayer.addListener(eventMonster)
-    replayer.replay()
-    assert(eventMonster.loggedEvents.size === 2)
-    assert(eventMonster.loggedEvents(0) === JsonProtocol.sparkEventToJson(applicationStart))
-    assert(eventMonster.loggedEvents(1) === JsonProtocol.sparkEventToJson(applicationEnd))
-  }
-
   /**
    * Test end-to-end replaying of events.
    *
@@ -106,6 +96,8 @@ class ReplayListenerSuite extends FunSuite with BeforeAndAfter {
    */
   private def testApplicationReplay(codecName: Option[String] = None) {
     val logDirPath = Utils.getFilePath(testDir, "test-replay")
+    fileSystem.mkdirs(logDirPath)
+
     val conf = EventLoggingListenerSuite.getLoggingConf(logDirPath, codecName)
     val sc = new SparkContext("local-cluster[2,1,512]", "Test replay", conf)
 
@@ -117,22 +109,21 @@ class ReplayListenerSuite extends FunSuite with BeforeAndAfter {
     sc.stop()
 
     // Prepare information needed for replay
-    val codec = codecName.map(getCompressionCodec)
     val applications = fileSystem.listStatus(logDirPath)
     assert(applications != null && applications.size > 0)
-    val eventLogDir = applications.sortBy(_.getAccessTime).last
-    assert(eventLogDir.isDir)
-    val logFiles = fileSystem.listStatus(eventLogDir.getPath)
-    assert(logFiles != null && logFiles.size > 0)
-    val logFile = logFiles.find(_.getPath.getName.startsWith("EVENT_LOG_"))
-    assert(logFile.isDefined)
-    val logFilePath = logFile.get.getPath
+    val eventLog = applications.sortBy(_.getModificationTime).last
+    assert(!eventLog.isDir)
 
     // Replay events
-    val replayer = new ReplayListenerBus(Seq(logFilePath), fileSystem, codec)
+    val (logData, version) = EventLoggingListener.openEventLog(eventLog.getPath(), fileSystem)
     val eventMonster = new EventMonster(conf)
-    replayer.addListener(eventMonster)
-    replayer.replay()
+    try {
+      val replayer = new ReplayListenerBus()
+      replayer.addListener(eventMonster)
+      replayer.replay(logData, version)
+    } finally {
+      logData.close()
+    }
 
     // Verify the same events are replayed in the same order
     assert(sc.eventLogger.isDefined)
@@ -155,7 +146,9 @@ class ReplayListenerSuite extends FunSuite with BeforeAndAfter {
    */
   private class EventMonster(conf: SparkConf)
     extends EventLoggingListener("test", "testdir", conf) {
-    logger.close()
+
+    override def start() { }
+
   }
 
   private def getCompressionCodec(codecName: String) = {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
index ab35e8edc4ebf..3a41ee8d4ae0c 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
@@ -20,27 +20,21 @@ package org.apache.spark.scheduler
 import java.util.concurrent.Semaphore
 
 import scala.collection.mutable
+import scala.collection.JavaConversions._
 
-import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite}
-import org.scalatest.Matchers
+import org.scalatest.{FunSuite, Matchers}
 
-import org.apache.spark.{LocalSparkContext, SparkContext}
-import org.apache.spark.SparkContext._
 import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.util.ResetSystemProperties
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext}
 
 class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
-  with BeforeAndAfter with BeforeAndAfterAll {
+  with ResetSystemProperties {
 
   /** Length of time to wait while draining listener events. */
   val WAIT_TIMEOUT_MILLIS = 10000
 
-  before {
-    sc = new SparkContext("local", "SparkListenerSuite")
-  }
-
-  override def afterAll() {
-    System.clearProperty("spark.akka.frameSize")
-  }
+  val jobCompletionTime = 1421191296660L
 
   test("basic creation and shutdown of LiveListenerBus") {
     val counter = new BasicJobCounter
@@ -48,7 +42,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
     bus.addListener(counter)
 
     // Listener bus hasn't started yet, so posting events should not increment counter
-    (1 to 5).foreach { _ => bus.post(SparkListenerJobEnd(0, JobSucceeded)) }
+    (1 to 5).foreach { _ => bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded)) }
     assert(counter.count === 0)
 
     // Starting listener bus should flush all buffered events
@@ -58,7 +52,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
 
     // After listener bus has stopped, posting events should not increment counter
     bus.stop()
-    (1 to 5).foreach { _ => bus.post(SparkListenerJobEnd(0, JobSucceeded)) }
+    (1 to 5).foreach { _ => bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded)) }
     assert(counter.count === 5)
 
     // Listener bus must not be started twice
@@ -103,7 +97,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
 
     bus.addListener(blockingListener)
     bus.start()
-    bus.post(SparkListenerJobEnd(0, JobSucceeded))
+    bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded))
 
     listenerStarted.acquire()
     // Listener should be blocked after start
@@ -129,6 +123,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
   }
 
   test("basic creation of StageInfo") {
+    sc = new SparkContext("local", "SparkListenerSuite")
     val listener = new SaveStageAndTaskInfo
     sc.addSparkListener(listener)
     val rdd1 = sc.parallelize(1 to 100, 4)
@@ -150,6 +145,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
   }
 
   test("basic creation of StageInfo with shuffle") {
+    sc = new SparkContext("local", "SparkListenerSuite")
     val listener = new SaveStageAndTaskInfo
     sc.addSparkListener(listener)
     val rdd1 = sc.parallelize(1 to 100, 4)
@@ -187,6 +183,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
   }
 
   test("StageInfo with fewer tasks than partitions") {
+    sc = new SparkContext("local", "SparkListenerSuite")
     val listener = new SaveStageAndTaskInfo
     sc.addSparkListener(listener)
     val rdd1 = sc.parallelize(1 to 100, 4)
@@ -203,6 +200,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
   }
 
   test("local metrics") {
+    sc = new SparkContext("local", "SparkListenerSuite")
     val listener = new SaveStageAndTaskInfo
     sc.addSparkListener(listener)
     sc.addSparkListener(new StatsReportListener)
@@ -252,6 +250,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
         taskMetrics.resultSize should be > (0l)
         if (stageInfo.rddInfos.exists(info => info.name == d2.name || info.name == d3.name)) {
           taskMetrics.inputMetrics should not be ('defined)
+          taskMetrics.outputMetrics should not be ('defined)
           taskMetrics.shuffleWriteMetrics should be ('defined)
           taskMetrics.shuffleWriteMetrics.get.shuffleBytesWritten should be > (0l)
         }
@@ -268,6 +267,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
   }
 
   test("onTaskGettingResult() called when result fetched remotely") {
+    sc = new SparkContext("local", "SparkListenerSuite")
     val listener = new SaveTaskEvents
     sc.addSparkListener(listener)
 
@@ -288,6 +288,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
   }
 
   test("onTaskGettingResult() not called when result sent directly") {
+    sc = new SparkContext("local", "SparkListenerSuite")
     val listener = new SaveTaskEvents
     sc.addSparkListener(listener)
 
@@ -303,6 +304,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
   }
 
   test("onTaskEnd() should be called for all started tasks, even after job has been killed") {
+    sc = new SparkContext("local", "SparkListenerSuite")
     val WAIT_TIMEOUT_MILLIS = 10000
     val listener = new SaveTaskEvents
     sc.addSparkListener(listener)
@@ -348,7 +350,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
     bus.start()
 
     // Post events to all listeners, and wait until the queue is drained
-    (1 to 5).foreach { _ => bus.post(SparkListenerJobEnd(0, JobSucceeded)) }
+    (1 to 5).foreach { _ => bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded)) }
     assert(bus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
 
     // The exception should be caught, and the event should be propagated to other listeners
@@ -357,6 +359,17 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
     assert(jobCounter2.count === 5)
   }
 
+  test("registering listeners via spark.extraListeners") {
+    val conf = new SparkConf().setMaster("local").setAppName("test")
+      .set("spark.extraListeners", classOf[ListenerThatAcceptsSparkConf].getName + "," +
+        classOf[BasicJobCounter].getName)
+    sc = new SparkContext(conf)
+    sc.listenerBus.listeners.collect { case x: BasicJobCounter => x}.size should be (1)
+    sc.listenerBus.listeners.collect {
+      case x: ListenerThatAcceptsSparkConf => x
+    }.size should be (1)
+  }
+
   /**
    * Assert that the given list of numbers has an average that is greater than zero.
    */
@@ -364,14 +377,6 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
     assert(m.sum / m.size.toDouble > 0.0, msg)
   }
 
-  /**
-   * A simple listener that counts the number of jobs observed.
-   */
-  private class BasicJobCounter extends SparkListener {
-    var count = 0
-    override def onJobEnd(job: SparkListenerJobEnd) = count += 1
-  }
-
   /**
    * A simple listener that saves all task infos and task metrics.
    */
@@ -424,3 +429,19 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
   }
 
 }
+
+// These classes can't be declared inside of the SparkListenerSuite class because we don't want
+// their constructors to contain references to SparkListenerSuite:
+
+/**
+ * A simple listener that counts the number of jobs observed.
+ */
+private class BasicJobCounter extends SparkListener {
+  var count = 0
+  override def onJobEnd(job: SparkListenerJobEnd) = count += 1
+}
+
+private class ListenerThatAcceptsSparkConf(conf: SparkConf) extends SparkListener {
+  var count = 0
+  override def onJobEnd(job: SparkListenerJobEnd) = count += 1
+}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
new file mode 100644
index 0000000000000..623a687c359a2
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler
+
+import org.apache.spark.scheduler.cluster.ExecutorInfo
+import org.apache.spark.{SparkContext, LocalSparkContext}
+
+import org.scalatest.{FunSuite, BeforeAndAfter, BeforeAndAfterAll}
+
+import scala.collection.mutable
+
+/**
+ * Unit tests for SparkListener that require a local cluster.
+ */
+class SparkListenerWithClusterSuite extends FunSuite with LocalSparkContext
+  with BeforeAndAfter with BeforeAndAfterAll {
+
+  /** Length of time to wait while draining listener events. */
+  val WAIT_TIMEOUT_MILLIS = 10000
+
+  before {
+    sc = new SparkContext("local-cluster[2,1,512]", "SparkListenerSuite")
+  }
+
+  test("SparkListener sends executor added message") {
+    val listener = new SaveExecutorInfo
+    sc.addSparkListener(listener)
+
+    val rdd1 = sc.parallelize(1 to 100, 4)
+    val rdd2 = rdd1.map(_.toString)
+    rdd2.setName("Target RDD")
+    rdd2.count()
+
+    assert(sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
+    assert(listener.addedExecutorInfo.size == 2)
+    assert(listener.addedExecutorInfo("0").totalCores == 1)
+    assert(listener.addedExecutorInfo("1").totalCores == 1)
+  }
+
+  private class SaveExecutorInfo extends SparkListener {
+    val addedExecutorInfo = mutable.Map[String, ExecutorInfo]()
+
+    override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
+      addedExecutorInfo(executor.executorId) = executor.executorInfo
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
index 561a5e9cd90c4..057e226916027 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
@@ -45,13 +45,13 @@ class TaskContextSuite extends FunSuite with BeforeAndAfter with LocalSparkConte
     val task = new ResultTask[String, String](
       0, sc.broadcast(closureSerializer.serialize((rdd, func)).array), rdd.partitions(0), Seq(), 0)
     intercept[RuntimeException] {
-      task.run(0)
+      task.run(0, 0)
     }
     assert(TaskContextSuite.completed === true)
   }
 
   test("all TaskCompletionListeners should be called even if some fail") {
-    val context = new TaskContextImpl(0, 0, 0)
+    val context = new TaskContextImpl(0, 0, 0, 0)
     val listener = mock(classOf[TaskCompletionListener])
     context.addTaskCompletionListener(_ => throw new Exception("blah"))
     context.addTaskCompletionListener(listener)
@@ -63,6 +63,33 @@ class TaskContextSuite extends FunSuite with BeforeAndAfter with LocalSparkConte
 
     verify(listener, times(1)).onTaskCompletion(any())
   }
+
+  test("TaskContext.attemptNumber should return attempt number, not task id (SPARK-4014)") {
+    sc = new SparkContext("local[1,2]", "test")  // use maxRetries = 2 because we test failed tasks
+    // Check that attemptIds are 0 for all tasks' initial attempts
+    val attemptIds = sc.parallelize(Seq(1, 2), 2).mapPartitions { iter =>
+      Seq(TaskContext.get().attemptNumber).iterator
+    }.collect()
+    assert(attemptIds.toSet === Set(0))
+
+    // Test a job with failed tasks
+    val attemptIdsWithFailedTask = sc.parallelize(Seq(1, 2), 2).mapPartitions { iter =>
+      val attemptId = TaskContext.get().attemptNumber
+      if (iter.next() == 1 && attemptId == 0) {
+        throw new Exception("First execution of task failed")
+      }
+      Seq(attemptId).iterator
+    }.collect()
+    assert(attemptIdsWithFailedTask.toSet === Set(0, 1))
+  }
+
+  test("TaskContext.attemptId returns taskAttemptId for backwards-compatibility (SPARK-4014)") {
+    sc = new SparkContext("local", "test")
+    val attemptIds = sc.parallelize(Seq(1, 2, 3, 4), 4).mapPartitions { iter =>
+      Seq(TaskContext.get().attemptId).iterator
+    }.collect()
+    assert(attemptIds.toSet === Set(0, 1, 2, 3))
+  }
 }
 
 private object TaskContextSuite {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
index c4e7a4bb7d385..e3a3803e6483a 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
@@ -19,9 +19,14 @@ package org.apache.spark.scheduler
 
 import java.nio.ByteBuffer
 
-import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite}
+import scala.concurrent.duration._
+import scala.language.postfixOps
+import scala.util.control.NonFatal
 
-import org.apache.spark.{LocalSparkContext, SparkContext, SparkEnv}
+import org.scalatest.{BeforeAndAfter, FunSuite}
+import org.scalatest.concurrent.Eventually._
+
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkEnv}
 import org.apache.spark.storage.TaskResultBlockId
 
 /**
@@ -34,14 +39,25 @@ class ResultDeletingTaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedule
   extends TaskResultGetter(sparkEnv, scheduler) {
   var removedResult = false
 
+  @volatile var removeBlockSuccessfully = false
+
   override def enqueueSuccessfulTask(
     taskSetManager: TaskSetManager, tid: Long, serializedData: ByteBuffer) {
     if (!removedResult) {
       // Only remove the result once, since we'd like to test the case where the task eventually
       // succeeds.
       serializer.get().deserialize[TaskResult[_]](serializedData) match {
-        case IndirectTaskResult(blockId) =>
+        case IndirectTaskResult(blockId, size) =>
           sparkEnv.blockManager.master.removeBlock(blockId)
+          // removeBlock is asynchronous. Need to wait it's removed successfully
+          try {
+            eventually(timeout(3 seconds), interval(200 milliseconds)) {
+              assert(!sparkEnv.blockManager.master.contains(blockId))
+            }
+            removeBlockSuccessfully = true
+          } catch {
+            case NonFatal(e) => removeBlockSuccessfully = false
+          }
         case directResult: DirectTaskResult[_] =>
           taskSetManager.abort("Internal error: expect only indirect results")
       }
@@ -55,27 +71,20 @@ class ResultDeletingTaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedule
 /**
  * Tests related to handling task results (both direct and indirect).
  */
-class TaskResultGetterSuite extends FunSuite with BeforeAndAfter with BeforeAndAfterAll
-  with LocalSparkContext {
+class TaskResultGetterSuite extends FunSuite with BeforeAndAfter with LocalSparkContext {
 
-  override def beforeAll {
-    // Set the Akka frame size to be as small as possible (it must be an integer, so 1 is as small
-    // as we can make it) so the tests don't take too long.
-    System.setProperty("spark.akka.frameSize", "1")
-  }
-
-  override def afterAll {
-    System.clearProperty("spark.akka.frameSize")
-  }
+  // Set the Akka frame size to be as small as possible (it must be an integer, so 1 is as small
+  // as we can make it) so the tests don't take too long.
+  def conf: SparkConf = new SparkConf().set("spark.akka.frameSize", "1")
 
   test("handling results smaller than Akka frame size") {
-    sc = new SparkContext("local", "test")
+    sc = new SparkContext("local", "test", conf)
     val result = sc.parallelize(Seq(1), 1).map(x => 2 * x).reduce((x, y) => x)
     assert(result === 2)
   }
 
   test("handling results larger than Akka frame size") {
-    sc = new SparkContext("local", "test")
+    sc = new SparkContext("local", "test", conf)
     val akkaFrameSize =
       sc.env.actorSystem.settings.config.getBytes("akka.remote.netty.tcp.maximum-frame-size").toInt
     val result = sc.parallelize(Seq(1), 1).map(x => 1.to(akkaFrameSize).toArray).reduce((x, y) => x)
@@ -89,7 +98,7 @@ class TaskResultGetterSuite extends FunSuite with BeforeAndAfter with BeforeAndA
   test("task retried if result missing from block manager") {
     // Set the maximum number of task failures to > 0, so that the task set isn't aborted
     // after the result is missing.
-    sc = new SparkContext("local[1,2]", "test")
+    sc = new SparkContext("local[1,2]", "test", conf)
     // If this test hangs, it's probably because no resource offers were made after the task
     // failed.
     val scheduler: TaskSchedulerImpl = sc.taskScheduler match {
@@ -99,10 +108,12 @@ class TaskResultGetterSuite extends FunSuite with BeforeAndAfter with BeforeAndA
         assert(false, "Expect local cluster to use TaskSchedulerImpl")
         throw new ClassCastException
     }
-    scheduler.taskResultGetter = new ResultDeletingTaskResultGetter(sc.env, scheduler)
+    val resultGetter = new ResultDeletingTaskResultGetter(sc.env, scheduler)
+    scheduler.taskResultGetter = resultGetter
     val akkaFrameSize =
       sc.env.actorSystem.settings.config.getBytes("akka.remote.netty.tcp.maximum-frame-size").toInt
     val result = sc.parallelize(Seq(1), 1).map(x => 1.to(akkaFrameSize).toArray).reduce((x, y) => x)
+    assert(resultGetter.removeBlockSuccessfully)
     assert(result === 1.to(akkaFrameSize).toArray)
 
     // Make sure two tasks were run (one failed one, and a second retried one).
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index 7532da88c6065..add13f5b21765 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -30,238 +30,8 @@ class FakeSchedulerBackend extends SchedulerBackend {
   def defaultParallelism() = 1
 }
 
-class FakeTaskSetManager(
-    initPriority: Int,
-    initStageId: Int,
-    initNumTasks: Int,
-    taskScheduler: TaskSchedulerImpl,
-    taskSet: TaskSet)
-  extends TaskSetManager(taskScheduler, taskSet, 0) {
-
-  parent = null
-  weight = 1
-  minShare = 2
-  priority = initPriority
-  stageId = initStageId
-  name = "TaskSet_"+stageId
-  override val numTasks = initNumTasks
-  tasksSuccessful = 0
-
-  var numRunningTasks = 0
-  override def runningTasks = numRunningTasks
-
-  def increaseRunningTasks(taskNum: Int) {
-    numRunningTasks += taskNum
-    if (parent != null) {
-      parent.increaseRunningTasks(taskNum)
-    }
-  }
-
-  def decreaseRunningTasks(taskNum: Int) {
-    numRunningTasks -= taskNum
-    if (parent != null) {
-      parent.decreaseRunningTasks(taskNum)
-    }
-  }
-
-  override def addSchedulable(schedulable: Schedulable) {
-  }
-
-  override def removeSchedulable(schedulable: Schedulable) {
-  }
-
-  override def getSchedulableByName(name: String): Schedulable = {
-    null
-  }
-
-  override def executorLost(executorId: String, host: String): Unit = {
-  }
-
-  override def resourceOffer(
-      execId: String,
-      host: String,
-      maxLocality: TaskLocality.TaskLocality)
-    : Option[TaskDescription] =
-  {
-    if (tasksSuccessful + numRunningTasks < numTasks) {
-      increaseRunningTasks(1)
-      Some(new TaskDescription(0, execId, "task 0:0", 0, null))
-    } else {
-      None
-    }
-  }
-
-  override def checkSpeculatableTasks(): Boolean = {
-    true
-  }
-
-  def taskFinished() {
-    decreaseRunningTasks(1)
-    tasksSuccessful +=1
-    if (tasksSuccessful == numTasks) {
-      parent.removeSchedulable(this)
-    }
-  }
-
-  def abort() {
-    decreaseRunningTasks(numRunningTasks)
-    parent.removeSchedulable(this)
-  }
-}
-
 class TaskSchedulerImplSuite extends FunSuite with LocalSparkContext with Logging {
 
-  def createDummyTaskSetManager(priority: Int, stage: Int, numTasks: Int, cs: TaskSchedulerImpl,
-      taskSet: TaskSet): FakeTaskSetManager = {
-    new FakeTaskSetManager(priority, stage, numTasks, cs , taskSet)
-  }
-
-  def resourceOffer(rootPool: Pool): Int = {
-    val taskSetQueue = rootPool.getSortedTaskSetQueue
-    /* Just for Test*/
-    for (manager <- taskSetQueue) {
-       logInfo("parentName:%s, parent running tasks:%d, name:%s,runningTasks:%d".format(
-         manager.parent.name, manager.parent.runningTasks, manager.name, manager.runningTasks))
-    }
-    for (taskSet <- taskSetQueue) {
-      taskSet.resourceOffer("execId_1", "hostname_1", TaskLocality.ANY) match {
-        case Some(task) =>
-          return taskSet.stageId
-        case None => {}
-      }
-    }
-    -1
-  }
-
-  def checkTaskSetId(rootPool: Pool, expectedTaskSetId: Int) {
-    assert(resourceOffer(rootPool) === expectedTaskSetId)
-  }
-
-  test("FIFO Scheduler Test") {
-    sc = new SparkContext("local", "TaskSchedulerImplSuite")
-    val taskScheduler = new TaskSchedulerImpl(sc)
-    val taskSet = FakeTask.createTaskSet(1)
-
-    val rootPool = new Pool("", SchedulingMode.FIFO, 0, 0)
-    val schedulableBuilder = new FIFOSchedulableBuilder(rootPool)
-    schedulableBuilder.buildPools()
-
-    val taskSetManager0 = createDummyTaskSetManager(0, 0, 2, taskScheduler, taskSet)
-    val taskSetManager1 = createDummyTaskSetManager(0, 1, 2, taskScheduler, taskSet)
-    val taskSetManager2 = createDummyTaskSetManager(0, 2, 2, taskScheduler, taskSet)
-    schedulableBuilder.addTaskSetManager(taskSetManager0, null)
-    schedulableBuilder.addTaskSetManager(taskSetManager1, null)
-    schedulableBuilder.addTaskSetManager(taskSetManager2, null)
-
-    checkTaskSetId(rootPool, 0)
-    resourceOffer(rootPool)
-    checkTaskSetId(rootPool, 1)
-    resourceOffer(rootPool)
-    taskSetManager1.abort()
-    checkTaskSetId(rootPool, 2)
-  }
-
-  test("Fair Scheduler Test") {
-    sc = new SparkContext("local", "TaskSchedulerImplSuite")
-    val taskScheduler = new TaskSchedulerImpl(sc)
-    val taskSet = FakeTask.createTaskSet(1)
-
-    val xmlPath = getClass.getClassLoader.getResource("fairscheduler.xml").getFile()
-    System.setProperty("spark.scheduler.allocation.file", xmlPath)
-    val rootPool = new Pool("", SchedulingMode.FAIR, 0, 0)
-    val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf)
-    schedulableBuilder.buildPools()
-
-    assert(rootPool.getSchedulableByName("default") != null)
-    assert(rootPool.getSchedulableByName("1") != null)
-    assert(rootPool.getSchedulableByName("2") != null)
-    assert(rootPool.getSchedulableByName("3") != null)
-    assert(rootPool.getSchedulableByName("1").minShare === 2)
-    assert(rootPool.getSchedulableByName("1").weight === 1)
-    assert(rootPool.getSchedulableByName("2").minShare === 3)
-    assert(rootPool.getSchedulableByName("2").weight === 1)
-    assert(rootPool.getSchedulableByName("3").minShare === 0)
-    assert(rootPool.getSchedulableByName("3").weight === 1)
-
-    val properties1 = new Properties()
-    properties1.setProperty("spark.scheduler.pool","1")
-    val properties2 = new Properties()
-    properties2.setProperty("spark.scheduler.pool","2")
-
-    val taskSetManager10 = createDummyTaskSetManager(1, 0, 1, taskScheduler, taskSet)
-    val taskSetManager11 = createDummyTaskSetManager(1, 1, 1, taskScheduler, taskSet)
-    val taskSetManager12 = createDummyTaskSetManager(1, 2, 2, taskScheduler, taskSet)
-    schedulableBuilder.addTaskSetManager(taskSetManager10, properties1)
-    schedulableBuilder.addTaskSetManager(taskSetManager11, properties1)
-    schedulableBuilder.addTaskSetManager(taskSetManager12, properties1)
-
-    val taskSetManager23 = createDummyTaskSetManager(2, 3, 2, taskScheduler, taskSet)
-    val taskSetManager24 = createDummyTaskSetManager(2, 4, 2, taskScheduler, taskSet)
-    schedulableBuilder.addTaskSetManager(taskSetManager23, properties2)
-    schedulableBuilder.addTaskSetManager(taskSetManager24, properties2)
-
-    checkTaskSetId(rootPool, 0)
-    checkTaskSetId(rootPool, 3)
-    checkTaskSetId(rootPool, 3)
-    checkTaskSetId(rootPool, 1)
-    checkTaskSetId(rootPool, 4)
-    checkTaskSetId(rootPool, 2)
-    checkTaskSetId(rootPool, 2)
-    checkTaskSetId(rootPool, 4)
-
-    taskSetManager12.taskFinished()
-    assert(rootPool.getSchedulableByName("1").runningTasks === 3)
-    taskSetManager24.abort()
-    assert(rootPool.getSchedulableByName("2").runningTasks === 2)
-  }
-
-  test("Nested Pool Test") {
-    sc = new SparkContext("local", "TaskSchedulerImplSuite")
-    val taskScheduler = new TaskSchedulerImpl(sc)
-    val taskSet = FakeTask.createTaskSet(1)
-
-    val rootPool = new Pool("", SchedulingMode.FAIR, 0, 0)
-    val pool0 = new Pool("0", SchedulingMode.FAIR, 3, 1)
-    val pool1 = new Pool("1", SchedulingMode.FAIR, 4, 1)
-    rootPool.addSchedulable(pool0)
-    rootPool.addSchedulable(pool1)
-
-    val pool00 = new Pool("00", SchedulingMode.FAIR, 2, 2)
-    val pool01 = new Pool("01", SchedulingMode.FAIR, 1, 1)
-    pool0.addSchedulable(pool00)
-    pool0.addSchedulable(pool01)
-
-    val pool10 = new Pool("10", SchedulingMode.FAIR, 2, 2)
-    val pool11 = new Pool("11", SchedulingMode.FAIR, 2, 1)
-    pool1.addSchedulable(pool10)
-    pool1.addSchedulable(pool11)
-
-    val taskSetManager000 = createDummyTaskSetManager(0, 0, 5, taskScheduler, taskSet)
-    val taskSetManager001 = createDummyTaskSetManager(0, 1, 5, taskScheduler, taskSet)
-    pool00.addSchedulable(taskSetManager000)
-    pool00.addSchedulable(taskSetManager001)
-
-    val taskSetManager010 = createDummyTaskSetManager(1, 2, 5, taskScheduler, taskSet)
-    val taskSetManager011 = createDummyTaskSetManager(1, 3, 5, taskScheduler, taskSet)
-    pool01.addSchedulable(taskSetManager010)
-    pool01.addSchedulable(taskSetManager011)
-
-    val taskSetManager100 = createDummyTaskSetManager(2, 4, 5, taskScheduler, taskSet)
-    val taskSetManager101 = createDummyTaskSetManager(2, 5, 5, taskScheduler, taskSet)
-    pool10.addSchedulable(taskSetManager100)
-    pool10.addSchedulable(taskSetManager101)
-
-    val taskSetManager110 = createDummyTaskSetManager(3, 6, 5, taskScheduler, taskSet)
-    val taskSetManager111 = createDummyTaskSetManager(3, 7, 5, taskScheduler, taskSet)
-    pool11.addSchedulable(taskSetManager110)
-    pool11.addSchedulable(taskSetManager111)
-
-    checkTaskSetId(rootPool, 0)
-    checkTaskSetId(rootPool, 4)
-    checkTaskSetId(rootPool, 6)
-    checkTaskSetId(rootPool, 2)
-  }
-
   test("Scheduler does not always schedule tasks on the same workers") {
     sc = new SparkContext("local", "TaskSchedulerImplSuite")
     val taskScheduler = new TaskSchedulerImpl(sc)
@@ -305,7 +75,6 @@ class TaskSchedulerImplSuite extends FunSuite with LocalSparkContext with Loggin
       override def taskStarted(task: Task[_], taskInfo: TaskInfo) {}
       override def executorAdded(execId: String, host: String) {}
     }
-    taskScheduler.setDAGScheduler(dagScheduler)
     // Give zero core offers. Should not generate any tasks
     val zeroCoreWorkerOffers = Seq(new WorkerOffer("executor0", "host0", 0),
       new WorkerOffer("executor1", "host1", 0))
@@ -331,4 +100,34 @@ class TaskSchedulerImplSuite extends FunSuite with LocalSparkContext with Loggin
     assert(1 === taskDescriptions.length)
     assert("executor0" === taskDescriptions(0).executorId)
   }
+
+  test("Scheduler does not crash when tasks are not serializable") {
+    sc = new SparkContext("local", "TaskSchedulerImplSuite")
+    val taskCpus = 2
+
+    sc.conf.set("spark.task.cpus", taskCpus.toString)
+    val taskScheduler = new TaskSchedulerImpl(sc)
+    taskScheduler.initialize(new FakeSchedulerBackend)
+    // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks.
+    val dagScheduler = new DAGScheduler(sc, taskScheduler) {
+      override def taskStarted(task: Task[_], taskInfo: TaskInfo) {}
+      override def executorAdded(execId: String, host: String) {}
+    }
+    val numFreeCores = 1
+    taskScheduler.setDAGScheduler(dagScheduler)
+    var taskSet = new TaskSet(Array(new NotSerializableFakeTask(1, 0), new NotSerializableFakeTask(0, 1)), 0, 0, 0, null)
+    val multiCoreWorkerOffers = Seq(new WorkerOffer("executor0", "host0", taskCpus),
+      new WorkerOffer("executor1", "host1", numFreeCores))
+    taskScheduler.submitTasks(taskSet)
+    var taskDescriptions = taskScheduler.resourceOffers(multiCoreWorkerOffers).flatten
+    assert(0 === taskDescriptions.length)
+
+    // Now check that we can still submit tasks
+    // Even if one of the tasks has not-serializable tasks, the other task set should still be processed without error
+    taskScheduler.submitTasks(taskSet)
+    taskScheduler.submitTasks(FakeTask.createTaskSet(1))
+    taskDescriptions = taskScheduler.resourceOffers(multiCoreWorkerOffers).flatten
+    assert(taskDescriptions.map(_.executorId) === Seq("executor0"))
+  }
+
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index c0b07649eb6dd..59580561cb45a 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.scheduler
 
+import java.io.{ObjectInputStream, ObjectOutputStream, IOException}
 import java.util.Random
 
 import scala.collection.mutable.ArrayBuffer
@@ -313,7 +314,8 @@ class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
 
   test("delay scheduling with failed hosts") {
     sc = new SparkContext("local", "test")
-    val sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
+    val sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2"),
+      ("exec3", "host3"))
     val taskSet = FakeTask.createTaskSet(3,
       Seq(TaskLocation("host1")),
       Seq(TaskLocation("host2")),
@@ -563,6 +565,44 @@ class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
     assert(manager.emittedTaskSizeWarning)
   }
 
+  test("Not serializable exception thrown if the task cannot be serialized") {
+    sc = new SparkContext("local", "test")
+    val sched = new FakeTaskScheduler(sc, ("exec1", "host1"))
+
+    val taskSet = new TaskSet(Array(new NotSerializableFakeTask(1, 0), new NotSerializableFakeTask(0, 1)), 0, 0, 0, null)
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES)
+
+    intercept[TaskNotSerializableException] {
+      manager.resourceOffer("exec1", "host1", ANY)
+    }
+    assert(manager.isZombie)
+  }
+
+  test("abort the job if total size of results is too large") {
+    val conf = new SparkConf().set("spark.driver.maxResultSize", "2m")
+    sc = new SparkContext("local", "test", conf)
+
+    def genBytes(size: Int) = { (x: Int) =>
+      val bytes = Array.ofDim[Byte](size)
+      scala.util.Random.nextBytes(bytes)
+      bytes
+    }
+
+    // multiple 1k result
+    val r = sc.makeRDD(0 until 10, 10).map(genBytes(1024)).collect()
+    assert(10 === r.size )
+
+    // single 10M result
+    val thrown = intercept[SparkException] {sc.makeRDD(genBytes(10 << 20)(0), 1).collect()}
+    assert(thrown.getMessage().contains("bigger than spark.driver.maxResultSize"))
+
+    // multiple 1M results
+    val thrown2 = intercept[SparkException] {
+      sc.makeRDD(0 until 10, 10).map(genBytes(1 << 20)).collect()
+    }
+    assert(thrown2.getMessage().contains("bigger than spark.driver.maxResultSize"))
+  }
+
   test("speculative and noPref task should be scheduled after node-local") {
     sc = new SparkContext("local", "test")
     val sched = new FakeTaskScheduler(sc, ("execA", "host1"), ("execB", "host2"), ("execC", "host3"))
@@ -610,6 +650,47 @@ class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
     assert(manager.resourceOffer("execA", "host3", NO_PREF).get.index === 2)
   }
 
+  test("SPARK-4939: node-local tasks should be scheduled right after process-local tasks finished") {
+    sc = new SparkContext("local", "test")
+    val sched = new FakeTaskScheduler(sc, ("execA", "host1"), ("execB", "host2"))
+    val taskSet = FakeTask.createTaskSet(4,
+      Seq(TaskLocation("host1")),
+      Seq(TaskLocation("host2")),
+      Seq(ExecutorCacheTaskLocation("host1", "execA")),
+      Seq(ExecutorCacheTaskLocation("host2", "execB")))
+    val clock = new FakeClock
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock)
+
+    // process-local tasks are scheduled first
+    assert(manager.resourceOffer("execA", "host1", NODE_LOCAL).get.index === 2)
+    assert(manager.resourceOffer("execB", "host2", NODE_LOCAL).get.index === 3)
+    // node-local tasks are scheduled without delay
+    assert(manager.resourceOffer("execA", "host1", NODE_LOCAL).get.index === 0)
+    assert(manager.resourceOffer("execB", "host2", NODE_LOCAL).get.index === 1)
+    assert(manager.resourceOffer("execA", "host1", NODE_LOCAL) == None)
+    assert(manager.resourceOffer("execB", "host2", NODE_LOCAL) == None)
+  }
+
+  test("SPARK-4939: no-pref tasks should be scheduled after process-local tasks finished") {
+    sc = new SparkContext("local", "test")
+    val sched = new FakeTaskScheduler(sc, ("execA", "host1"), ("execB", "host2"))
+    val taskSet = FakeTask.createTaskSet(3,
+      Seq(),
+      Seq(ExecutorCacheTaskLocation("host1", "execA")),
+      Seq(ExecutorCacheTaskLocation("host2", "execB")))
+    val clock = new FakeClock
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock)
+
+    // process-local tasks are scheduled first
+    assert(manager.resourceOffer("execA", "host1", PROCESS_LOCAL).get.index === 1)
+    assert(manager.resourceOffer("execB", "host2", PROCESS_LOCAL).get.index === 2)
+    // no-pref tasks are scheduled without delay
+    assert(manager.resourceOffer("execA", "host1", PROCESS_LOCAL) == None)
+    assert(manager.resourceOffer("execA", "host1", NODE_LOCAL) == None)
+    assert(manager.resourceOffer("execA", "host1", NO_PREF).get.index === 0)
+    assert(manager.resourceOffer("execA", "host1", ANY) == None)
+  }
+
   test("Ensure TaskSetManager is usable after addition of levels") {
     // Regression test for SPARK-2931
     sc = new SparkContext("local", "test")
diff --git a/core/src/test/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackendSuite.scala
new file mode 100644
index 0000000000000..f2ff98eb72daf
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackendSuite.scala
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.mesos
+
+import org.apache.spark.executor.MesosExecutorBackend
+import org.scalatest.FunSuite
+import org.apache.spark.{SparkConf, SparkContext, LocalSparkContext}
+import org.apache.spark.scheduler.{SparkListenerExecutorAdded, LiveListenerBus,
+  TaskDescription, WorkerOffer, TaskSchedulerImpl}
+import org.apache.spark.scheduler.cluster.ExecutorInfo
+import org.apache.spark.scheduler.cluster.mesos.{MemoryUtils, MesosSchedulerBackend}
+import org.apache.mesos.SchedulerDriver
+import org.apache.mesos.Protos.{ExecutorInfo => MesosExecutorInfo, _}
+import org.apache.mesos.Protos.Value.Scalar
+import org.easymock.{Capture, EasyMock}
+import java.nio.ByteBuffer
+import java.util.Collections
+import java.util
+import org.scalatest.mock.EasyMockSugar
+
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+
+class MesosSchedulerBackendSuite extends FunSuite with LocalSparkContext with EasyMockSugar {
+
+  test("check spark-class location correctly") {
+    val conf = new SparkConf
+    conf.set("spark.mesos.executor.home" , "/mesos-home")
+
+    val listenerBus = EasyMock.createMock(classOf[LiveListenerBus])
+    listenerBus.post(SparkListenerExecutorAdded(EasyMock.anyLong, "s1", new ExecutorInfo("host1", 2)))
+    EasyMock.replay(listenerBus)
+
+    val sc = EasyMock.createMock(classOf[SparkContext])
+    EasyMock.expect(sc.getSparkHome()).andReturn(Option("/spark-home")).anyTimes()
+    EasyMock.expect(sc.conf).andReturn(conf).anyTimes()
+    EasyMock.expect(sc.executorEnvs).andReturn(new mutable.HashMap).anyTimes()
+    EasyMock.expect(sc.executorMemory).andReturn(100).anyTimes()
+    EasyMock.expect(sc.listenerBus).andReturn(listenerBus)
+    EasyMock.replay(sc)
+    val taskScheduler = EasyMock.createMock(classOf[TaskSchedulerImpl])
+    EasyMock.expect(taskScheduler.CPUS_PER_TASK).andReturn(2).anyTimes()
+    EasyMock.replay(taskScheduler)
+
+    val mesosSchedulerBackend = new MesosSchedulerBackend(taskScheduler, sc, "master")
+
+    // uri is null.
+    val executorInfo = mesosSchedulerBackend.createExecutorInfo("test-id")
+    assert(executorInfo.getCommand.getValue === s" /mesos-home/bin/spark-class ${classOf[MesosExecutorBackend].getName}")
+
+    // uri exists.
+    conf.set("spark.executor.uri", "hdfs:///test-app-1.0.0.tgz")
+    val executorInfo1 = mesosSchedulerBackend.createExecutorInfo("test-id")
+    assert(executorInfo1.getCommand.getValue === s"cd test-app-1*;  ./bin/spark-class ${classOf[MesosExecutorBackend].getName}")
+  }
+
+  test("mesos resource offers result in launching tasks") {
+    def createOffer(id: Int, mem: Int, cpu: Int) = {
+      val builder = Offer.newBuilder()
+      builder.addResourcesBuilder()
+        .setName("mem")
+        .setType(Value.Type.SCALAR)
+        .setScalar(Scalar.newBuilder().setValue(mem))
+      builder.addResourcesBuilder()
+        .setName("cpus")
+        .setType(Value.Type.SCALAR)
+        .setScalar(Scalar.newBuilder().setValue(cpu))
+      builder.setId(OfferID.newBuilder().setValue(s"o${id.toString}").build()).setFrameworkId(FrameworkID.newBuilder().setValue("f1"))
+        .setSlaveId(SlaveID.newBuilder().setValue(s"s${id.toString}")).setHostname(s"host${id.toString}").build()
+    }
+
+    val driver = EasyMock.createMock(classOf[SchedulerDriver])
+    val taskScheduler = EasyMock.createMock(classOf[TaskSchedulerImpl])
+
+    val listenerBus = EasyMock.createMock(classOf[LiveListenerBus])
+    listenerBus.post(SparkListenerExecutorAdded(EasyMock.anyLong, "s1", new ExecutorInfo("host1", 2)))
+    EasyMock.replay(listenerBus)
+
+    val sc = EasyMock.createMock(classOf[SparkContext])
+    EasyMock.expect(sc.executorMemory).andReturn(100).anyTimes()
+    EasyMock.expect(sc.getSparkHome()).andReturn(Option("/path")).anyTimes()
+    EasyMock.expect(sc.executorEnvs).andReturn(new mutable.HashMap).anyTimes()
+    EasyMock.expect(sc.conf).andReturn(new SparkConf).anyTimes()
+    EasyMock.expect(sc.listenerBus).andReturn(listenerBus)
+    EasyMock.replay(sc)
+
+    val minMem = MemoryUtils.calculateTotalMemory(sc).toInt
+    val minCpu = 4
+
+    val mesosOffers = new java.util.ArrayList[Offer]
+    mesosOffers.add(createOffer(1, minMem, minCpu))
+    mesosOffers.add(createOffer(2, minMem - 1, minCpu))
+    mesosOffers.add(createOffer(3, minMem, minCpu))
+
+    val backend = new MesosSchedulerBackend(taskScheduler, sc, "master")
+
+    val expectedWorkerOffers = new ArrayBuffer[WorkerOffer](2)
+    expectedWorkerOffers.append(new WorkerOffer(
+      mesosOffers.get(0).getSlaveId.getValue,
+      mesosOffers.get(0).getHostname,
+      2
+    ))
+    expectedWorkerOffers.append(new WorkerOffer(
+      mesosOffers.get(2).getSlaveId.getValue,
+      mesosOffers.get(2).getHostname,
+      2
+    ))
+    val taskDesc = new TaskDescription(1L, 0, "s1", "n1", 0, ByteBuffer.wrap(new Array[Byte](0)))
+    EasyMock.expect(taskScheduler.resourceOffers(EasyMock.eq(expectedWorkerOffers))).andReturn(Seq(Seq(taskDesc)))
+    EasyMock.expect(taskScheduler.CPUS_PER_TASK).andReturn(2).anyTimes()
+    EasyMock.replay(taskScheduler)
+
+    val capture = new Capture[util.Collection[TaskInfo]]
+    EasyMock.expect(
+      driver.launchTasks(
+        EasyMock.eq(Collections.singleton(mesosOffers.get(0).getId)),
+        EasyMock.capture(capture),
+        EasyMock.anyObject(classOf[Filters])
+      )
+    ).andReturn(Status.valueOf(1)).once
+    EasyMock.expect(driver.declineOffer(mesosOffers.get(1).getId)).andReturn(Status.valueOf(1)).times(1)
+    EasyMock.expect(driver.declineOffer(mesosOffers.get(2).getId)).andReturn(Status.valueOf(1)).times(1)
+    EasyMock.replay(driver)
+
+    backend.resourceOffers(driver, mesosOffers)
+
+    EasyMock.verify(driver)
+    assert(capture.getValue.size() == 1)
+    val taskInfo = capture.getValue.iterator().next()
+    assert(taskInfo.getName.equals("n1"))
+    val cpus = taskInfo.getResourcesList.get(0)
+    assert(cpus.getName.equals("cpus"))
+    assert(cpus.getScalar.getValue.equals(2.0))
+    assert(taskInfo.getSlaveId.getValue.equals("s1"))
+
+    // Unwanted resources offered on an existing node. Make sure they are declined
+    val mesosOffers2 = new java.util.ArrayList[Offer]
+    mesosOffers2.add(createOffer(1, minMem, minCpu))
+    EasyMock.reset(taskScheduler)
+    EasyMock.reset(driver)
+    EasyMock.expect(taskScheduler.resourceOffers(EasyMock.anyObject(classOf[Seq[WorkerOffer]])).andReturn(Seq(Seq())))
+    EasyMock.expect(taskScheduler.CPUS_PER_TASK).andReturn(2).anyTimes()
+    EasyMock.replay(taskScheduler)
+    EasyMock.expect(driver.declineOffer(mesosOffers2.get(0).getId)).andReturn(Status.valueOf(1)).times(1)
+    EasyMock.replay(driver)
+
+    backend.resourceOffers(driver, mesosOffers2)
+    EasyMock.verify(driver)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/mesos/MesosTaskLaunchDataSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/mesos/MesosTaskLaunchDataSuite.scala
new file mode 100644
index 0000000000000..86a42a7398e4d
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/scheduler/mesos/MesosTaskLaunchDataSuite.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.mesos
+
+import java.nio.ByteBuffer
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.scheduler.cluster.mesos.MesosTaskLaunchData
+
+class MesosTaskLaunchDataSuite extends FunSuite {
+  test("serialize and deserialize data must be same") {
+    val serializedTask = ByteBuffer.allocate(40)
+    (Range(100, 110).map(serializedTask.putInt(_)))
+    serializedTask.rewind
+    val attemptNumber = 100
+    val byteString = MesosTaskLaunchData(serializedTask, attemptNumber).toByteString
+    serializedTask.rewind
+    val mesosTaskLaunchData = MesosTaskLaunchData.fromByteString(byteString)
+    assert(mesosTaskLaunchData.attemptNumber == attemptNumber)
+    assert(mesosTaskLaunchData.serializedTask.equals(serializedTask))
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
index 11e8c9c4cb37f..855f1b6276089 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
@@ -23,7 +23,6 @@ import com.esotericsoftware.kryo.Kryo
 import org.scalatest.FunSuite
 
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkEnv, TestUtils}
-import org.apache.spark.SparkContext._
 import org.apache.spark.serializer.KryoDistributedTest._
 
 class KryoSerializerDistributedSuite extends FunSuite {
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index 64ac6d2d920d2..a70f67af2e62e 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -201,12 +201,17 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext {
     assert(control.sum === result)
   }
 
-  // TODO: this still doesn't work
-  ignore("kryo with fold") {
+  test("kryo with fold") {
     val control = 1 :: 2 :: Nil
+    // zeroValue must not be a ClassWithoutNoArgConstructor instance because it will be
+    // serialized by spark.closure.serializer but spark.closure.serializer only supports
+    // the default Java serializer.
     val result = sc.parallelize(control, 2).map(new ClassWithoutNoArgConstructor(_))
-        .fold(new ClassWithoutNoArgConstructor(10))((t1, t2) => new ClassWithoutNoArgConstructor(t1.x + t2.x)).x
-    assert(10 + control.sum === result)
+      .fold(null)((t1, t2) => {
+      val t1x = if (t1 == null) 0 else t1.x
+      new ClassWithoutNoArgConstructor(t1x + t2.x)
+    }).x
+    assert(control.sum === result)
   }
 
   test("kryo with nonexistent custom registrator should fail") {
diff --git a/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala
new file mode 100644
index 0000000000000..e62828c4fbac6
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.serializer
+
+import java.io.{ObjectOutput, ObjectInput}
+
+import org.scalatest.{BeforeAndAfterEach, FunSuite}
+
+
+class SerializationDebuggerSuite extends FunSuite with BeforeAndAfterEach {
+
+  import SerializationDebugger.find
+
+  override def beforeEach(): Unit = {
+    SerializationDebugger.enableDebugging = true
+  }
+
+  test("primitives, strings, and nulls") {
+    assert(find(1) === List.empty)
+    assert(find(1L) === List.empty)
+    assert(find(1.toShort) === List.empty)
+    assert(find(1.0) === List.empty)
+    assert(find("1") === List.empty)
+    assert(find(null) === List.empty)
+  }
+
+  test("primitive arrays") {
+    assert(find(Array[Int](1, 2)) === List.empty)
+    assert(find(Array[Long](1, 2)) === List.empty)
+  }
+
+  test("non-primitive arrays") {
+    assert(find(Array("aa", "bb")) === List.empty)
+    assert(find(Array(new SerializableClass1)) === List.empty)
+  }
+
+  test("serializable object") {
+    assert(find(new Foo(1, "b", 'c', 'd', null, null, null)) === List.empty)
+  }
+
+  test("nested arrays") {
+    val foo1 = new Foo(1, "b", 'c', 'd', null, null, null)
+    val foo2 = new Foo(1, "b", 'c', 'd', null, Array(foo1), null)
+    assert(find(new Foo(1, "b", 'c', 'd', null, Array(foo2), null)) === List.empty)
+  }
+
+  test("nested objects") {
+    val foo1 = new Foo(1, "b", 'c', 'd', null, null, null)
+    val foo2 = new Foo(1, "b", 'c', 'd', null, null, foo1)
+    assert(find(new Foo(1, "b", 'c', 'd', null, null, foo2)) === List.empty)
+  }
+
+  test("cycles (should not loop forever)") {
+    val foo1 = new Foo(1, "b", 'c', 'd', null, null, null)
+    foo1.g = foo1
+    assert(find(new Foo(1, "b", 'c', 'd', null, null, foo1)) === List.empty)
+  }
+
+  test("root object not serializable") {
+    val s = find(new NotSerializable)
+    assert(s.size === 1)
+    assert(s.head.contains("NotSerializable"))
+  }
+
+  test("array containing not serializable element") {
+    val s = find(new SerializableArray(Array(new NotSerializable)))
+    assert(s.size === 5)
+    assert(s(0).contains("NotSerializable"))
+    assert(s(1).contains("element of array"))
+    assert(s(2).contains("array"))
+    assert(s(3).contains("arrayField"))
+    assert(s(4).contains("SerializableArray"))
+  }
+
+  test("object containing not serializable field") {
+    val s = find(new SerializableClass2(new NotSerializable))
+    assert(s.size === 3)
+    assert(s(0).contains("NotSerializable"))
+    assert(s(1).contains("objectField"))
+    assert(s(2).contains("SerializableClass2"))
+  }
+
+  test("externalizable class writing out not serializable object") {
+    val s = find(new ExternalizableClass)
+    assert(s.size === 5)
+    assert(s(0).contains("NotSerializable"))
+    assert(s(1).contains("objectField"))
+    assert(s(2).contains("SerializableClass2"))
+    assert(s(3).contains("writeExternal"))
+    assert(s(4).contains("ExternalizableClass"))
+  }
+}
+
+
+class SerializableClass1 extends Serializable
+
+
+class SerializableClass2(val objectField: Object) extends Serializable
+
+
+class SerializableArray(val arrayField: Array[Object]) extends Serializable
+
+
+class ExternalizableClass extends java.io.Externalizable {
+  override def writeExternal(out: ObjectOutput): Unit = {
+    out.writeInt(1)
+    out.writeObject(new SerializableClass2(new NotSerializable))
+  }
+
+  override def readExternal(in: ObjectInput): Unit = {}
+}
+
+
+class Foo(
+    a: Int,
+    b: String,
+    c: Char,
+    d: Byte,
+    e: Array[Int],
+    f: Array[Object],
+    var g: Foo) extends Serializable
+
+
+class NotSerializable
diff --git a/core/src/test/scala/org/apache/spark/serializer/TestSerializer.scala b/core/src/test/scala/org/apache/spark/serializer/TestSerializer.scala
new file mode 100644
index 0000000000000..0ade1bab18d7e
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/serializer/TestSerializer.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.serializer
+
+import java.io.{EOFException, OutputStream, InputStream}
+import java.nio.ByteBuffer
+
+import scala.reflect.ClassTag
+
+
+/**
+ * A serializer implementation that always return a single element in a deserialization stream.
+ */
+class TestSerializer extends Serializer {
+  override def newInstance() = new TestSerializerInstance
+}
+
+
+class TestSerializerInstance extends SerializerInstance {
+  override def serialize[T: ClassTag](t: T): ByteBuffer = ???
+
+  override def serializeStream(s: OutputStream): SerializationStream = ???
+
+  override def deserializeStream(s: InputStream) = new TestDeserializationStream
+
+  override def deserialize[T: ClassTag](bytes: ByteBuffer): T = ???
+
+  override def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T = ???
+}
+
+
+class TestDeserializationStream extends DeserializationStream {
+
+  private var count = 0
+
+  override def readObject[T: ClassTag](): T = {
+    count += 1
+    if (count == 2) {
+      throw new EOFException
+    }
+    new Object().asInstanceOf[T]
+  }
+
+  override def close(): Unit = {}
+}
diff --git a/core/src/test/scala/org/apache/spark/shuffle/ShuffleMemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/ShuffleMemoryManagerSuite.scala
index d31bc22ee74f7..e0e646f0a3652 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/ShuffleMemoryManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/ShuffleMemoryManagerSuite.scala
@@ -159,7 +159,7 @@ class ShuffleMemoryManagerSuite extends FunSuite with Timeouts {
 
   test("threads can block to get at least 1 / 2N memory") {
     // t1 grabs 1000 bytes and then waits until t2 is ready to make a request. It sleeps
-    // for a bit and releases 250 bytes, which should then be greanted to t2. Further requests
+    // for a bit and releases 250 bytes, which should then be granted to t2. Further requests
     // by t2 will return false right away because it now has 1 / 2N of the memory.
 
     val manager = new ShuffleMemoryManager(1000L)
@@ -291,4 +291,19 @@ class ShuffleMemoryManagerSuite extends FunSuite with Timeouts {
       assert(state.t2WaitTime > 200, s"t2 waited less than 200 ms (${state.t2WaitTime})")
     }
   }
+
+  test("threads should not be granted a negative size") {
+    val manager = new ShuffleMemoryManager(1000L)
+    manager.tryToAcquire(700L)
+
+    val latch = new CountDownLatch(1)
+    startThread("t1") {
+      manager.tryToAcquire(300L)
+      latch.countDown()
+    }
+    latch.await() // Wait until `t1` calls `tryToAcquire`
+
+    val granted = manager.tryToAcquire(300L)
+    assert(0 === granted, "granted is negative")
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/shuffle/hash/HashShuffleManagerSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/hash/HashShuffleManagerSuite.scala
index ba47fe5e25b9b..6790388f96603 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/hash/HashShuffleManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/hash/HashShuffleManagerSuite.scala
@@ -25,7 +25,7 @@ import org.scalatest.FunSuite
 
 import org.apache.spark.{SparkEnv, SparkContext, LocalSparkContext, SparkConf}
 import org.apache.spark.executor.ShuffleWriteMetrics
-import org.apache.spark.network.{FileSegmentManagedBuffer, ManagedBuffer}
+import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
 import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.shuffle.FileShuffleBlockManager
 import org.apache.spark.storage.{ShuffleBlockId, FileSegment}
@@ -36,9 +36,9 @@ class HashShuffleManagerSuite extends FunSuite with LocalSparkContext {
   private def checkSegments(expected: FileSegment, buffer: ManagedBuffer) {
     assert(buffer.isInstanceOf[FileSegmentManagedBuffer])
     val segment = buffer.asInstanceOf[FileSegmentManagedBuffer]
-    assert(expected.file.getCanonicalPath === segment.file.getCanonicalPath)
-    assert(expected.offset === segment.offset)
-    assert(expected.length === segment.length)
+    assert(expected.file.getCanonicalPath === segment.getFile.getCanonicalPath)
+    assert(expected.offset === segment.getOffset)
+    assert(expected.length === segment.getLength)
   }
 
   test("consolidated shuffle can write to shuffle group without messing existing offsets/lengths") {
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
index 1f1d53a1ee3b0..c2903c8597997 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -27,7 +27,7 @@ import org.mockito.Mockito.{mock, when}
 import org.scalatest.{BeforeAndAfter, FunSuite, Matchers, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually._
 
-import org.apache.spark.{MapOutputTrackerMaster, SecurityManager, SparkConf}
+import org.apache.spark.{MapOutputTrackerMaster, SparkConf, SparkContext, SecurityManager}
 import org.apache.spark.network.BlockTransferService
 import org.apache.spark.network.nio.NioBlockTransferService
 import org.apache.spark.scheduler.LiveListenerBus
@@ -57,10 +57,13 @@ class BlockManagerReplicationSuite extends FunSuite with Matchers with BeforeAnd
   // Implicitly convert strings to BlockIds for test clarity.
   implicit def StringToBlockId(value: String): BlockId = new TestBlockId(value)
 
-  private def makeBlockManager(maxMem: Long, name: String = "<driver>"): BlockManager = {
+  private def makeBlockManager(
+      maxMem: Long,
+      name: String = SparkContext.DRIVER_IDENTIFIER): BlockManager = {
     val transfer = new NioBlockTransferService(conf, securityMgr)
     val store = new BlockManager(name, actorSystem, master, serializer, maxMem, conf,
-      mapOutputTracker, shuffleManager, transfer)
+      mapOutputTracker, shuffleManager, transfer, securityMgr, 0)
+    store.initialize("app-id")
     allStores += store
     store
   }
@@ -108,7 +111,7 @@ class BlockManagerReplicationSuite extends FunSuite with Matchers with BeforeAnd
       storeIds.filterNot { _ == stores(2).blockManagerId })
 
     // Add driver store and test whether it is filtered out
-    val driverStore = makeBlockManager(1000, "<driver>")
+    val driverStore = makeBlockManager(1000, SparkContext.DRIVER_IDENTIFIER)
     assert(master.getPeers(stores(0).blockManagerId).forall(!_.isDriver))
     assert(master.getPeers(stores(1).blockManagerId).forall(!_.isDriver))
     assert(master.getPeers(stores(2).blockManagerId).forall(!_.isDriver))
@@ -260,7 +263,8 @@ class BlockManagerReplicationSuite extends FunSuite with Matchers with BeforeAnd
     when(failableTransfer.hostName).thenReturn("some-hostname")
     when(failableTransfer.port).thenReturn(1000)
     val failableStore = new BlockManager("failable-store", actorSystem, master, serializer,
-      10000, conf, mapOutputTracker, shuffleManager, failableTransfer)
+      10000, conf, mapOutputTracker, shuffleManager, failableTransfer, securityMgr, 0)
+    failableStore.initialize("app-id")
     allStores += failableStore // so that this gets stopped after test
     assert(master.getPeers(store.blockManagerId).toSet === Set(failableStore.blockManagerId))
 
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 9d96202a3e7ac..ffe6f039145ea 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -33,29 +33,28 @@ import akka.util.Timeout
 
 import org.mockito.Mockito.{mock, when}
 
-import org.scalatest.{BeforeAndAfter, FunSuite, Matchers, PrivateMethodTester}
+import org.scalatest._
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.concurrent.Timeouts._
 
-import org.apache.spark.{MapOutputTrackerMaster, SecurityManager, SparkConf}
+import org.apache.spark.{MapOutputTrackerMaster, SparkConf, SparkContext, SecurityManager}
 import org.apache.spark.executor.DataReadMethod
 import org.apache.spark.network.nio.NioBlockTransferService
 import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
 import org.apache.spark.shuffle.hash.HashShuffleManager
 import org.apache.spark.storage.BlockManagerMessages.BlockManagerHeartbeat
-import org.apache.spark.util.{AkkaUtils, ByteBufferInputStream, SizeEstimator, Utils}
+import org.apache.spark.util._
 
 
-class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
-  with PrivateMethodTester {
+class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfterEach
+  with PrivateMethodTester with ResetSystemProperties {
 
   private val conf = new SparkConf(false)
   var store: BlockManager = null
   var store2: BlockManager = null
   var actorSystem: ActorSystem = null
   var master: BlockManagerMaster = null
-  var oldArch: String = null
   conf.set("spark.authenticate", "false")
   val securityMgr = new SecurityManager(conf)
   val mapOutputTracker = new MapOutputTrackerMaster(conf)
@@ -69,19 +68,23 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   implicit def StringToBlockId(value: String): BlockId = new TestBlockId(value)
   def rdd(rddId: Int, splitId: Int) = RDDBlockId(rddId, splitId)
 
-  private def makeBlockManager(maxMem: Long, name: String = "<driver>"): BlockManager = {
+  private def makeBlockManager(
+      maxMem: Long,
+      name: String = SparkContext.DRIVER_IDENTIFIER): BlockManager = {
     val transfer = new NioBlockTransferService(conf, securityMgr)
-    new BlockManager(name, actorSystem, master, serializer, maxMem, conf,
-      mapOutputTracker, shuffleManager, transfer)
+    val manager = new BlockManager(name, actorSystem, master, serializer, maxMem, conf,
+      mapOutputTracker, shuffleManager, transfer, securityMgr, 0)
+    manager.initialize("app-id")
+    manager
   }
 
-  before {
+  override def beforeEach(): Unit = {
     val (actorSystem, boundPort) = AkkaUtils.createActorSystem(
       "test", "localhost", 0, conf = conf, securityManager = securityMgr)
     this.actorSystem = actorSystem
 
     // Set the arch to 64-bit and compressedOops to true to get a deterministic test-case
-    oldArch = System.setProperty("os.arch", "amd64")
+    System.setProperty("os.arch", "amd64")
     conf.set("os.arch", "amd64")
     conf.set("spark.test.useCompressedOops", "true")
     conf.set("spark.driver.port", boundPort.toString)
@@ -96,7 +99,7 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
     SizeEstimator invokePrivate initialize()
   }
 
-  after {
+  override def afterEach(): Unit = {
     if (store != null) {
       store.stop()
       store = null
@@ -109,14 +112,6 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
     actorSystem.awaitTermination()
     actorSystem = null
     master = null
-
-    if (oldArch != null) {
-      conf.set("os.arch", oldArch)
-    } else {
-      System.clearProperty("os.arch")
-    }
-
-    System.clearProperty("spark.test.useCompressedOops")
   }
 
   test("StorageLevel object caching") {
@@ -790,8 +785,9 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfter
   test("block store put failure") {
     // Use Java serializer so we can create an unserializable error.
     val transfer = new NioBlockTransferService(conf, securityMgr)
-    store = new BlockManager("<driver>", actorSystem, master, new JavaSerializer(conf), 1200, conf,
-      mapOutputTracker, shuffleManager, transfer)
+    store = new BlockManager(SparkContext.DRIVER_IDENTIFIER, actorSystem, master,
+      new JavaSerializer(conf), 1200, conf, mapOutputTracker, shuffleManager, transfer, securityMgr,
+      0)
 
     // The put should fail since a1 is not serializable.
     class UnserializableClass
diff --git a/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala b/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala
index dae7bf0e336de..8cf951adb354b 100644
--- a/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala
@@ -49,7 +49,7 @@ class LocalDirsSuite extends FunSuite {
       }
 
       override def clone: SparkConf = {
-        new MySparkConf().setAll(settings)
+        new MySparkConf().setAll(getAll)
       }
     }
     // spark.local.dir only contains invalid directories, but that's not a problem since
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index a8c049d749015..37b593b2c5f79 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -17,167 +17,221 @@
 
 package org.apache.spark.storage
 
-import org.apache.spark.{TaskContextImpl, TaskContext}
-import org.apache.spark.network.{BlockFetchingListener, BlockTransferService}
+import java.util.concurrent.Semaphore
+
+import scala.concurrent.future
+import scala.concurrent.ExecutionContext.Implicits.global
 
-import org.mockito.Mockito._
 import org.mockito.Matchers.{any, eq => meq}
+import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
-
 import org.scalatest.FunSuite
 
+import org.apache.spark.{SparkConf, TaskContextImpl}
+import org.apache.spark.network._
+import org.apache.spark.network.buffer.ManagedBuffer
+import org.apache.spark.network.shuffle.BlockFetchingListener
+import org.apache.spark.serializer.TestSerializer
 
 class ShuffleBlockFetcherIteratorSuite extends FunSuite {
+  // Some of the tests are quite tricky because we are testing the cleanup behavior
+  // in the presence of faults.
 
-  test("handle local read failures in BlockManager") {
+  /** Creates a mock [[BlockTransferService]] that returns data from the given map. */
+  private def createMockTransfer(data: Map[BlockId, ManagedBuffer]): BlockTransferService = {
     val transfer = mock(classOf[BlockTransferService])
-    val blockManager = mock(classOf[BlockManager])
-    doReturn(BlockManagerId("test-client", "test-client", 1)).when(blockManager).blockManagerId
-
-    val blIds = Array[BlockId](
-      ShuffleBlockId(0,0,0),
-      ShuffleBlockId(0,1,0),
-      ShuffleBlockId(0,2,0),
-      ShuffleBlockId(0,3,0),
-      ShuffleBlockId(0,4,0))
-
-    val optItr = mock(classOf[Option[Iterator[Any]]])
-    val answer = new Answer[Option[Iterator[Any]]] {
-      override def answer(invocation: InvocationOnMock) = Option[Iterator[Any]] {
-        throw new Exception
+    when(transfer.fetchBlocks(any(), any(), any(), any(), any())).thenAnswer(new Answer[Unit] {
+      override def answer(invocation: InvocationOnMock): Unit = {
+        val blocks = invocation.getArguments()(3).asInstanceOf[Array[String]]
+        val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
+
+        for (blockId <- blocks) {
+          if (data.contains(BlockId(blockId))) {
+            listener.onBlockFetchSuccess(blockId, data(BlockId(blockId)))
+          } else {
+            listener.onBlockFetchFailure(blockId, new BlockNotFoundException(blockId))
+          }
+        }
       }
+    })
+    transfer
+  }
+
+  private val conf = new SparkConf
+
+  test("successful 3 local reads + 2 remote reads") {
+    val blockManager = mock(classOf[BlockManager])
+    val localBmId = BlockManagerId("test-client", "test-client", 1)
+    doReturn(localBmId).when(blockManager).blockManagerId
+
+    // Make sure blockManager.getBlockData would return the blocks
+    val localBlocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockId(0, 0, 0) -> mock(classOf[ManagedBuffer]),
+      ShuffleBlockId(0, 1, 0) -> mock(classOf[ManagedBuffer]),
+      ShuffleBlockId(0, 2, 0) -> mock(classOf[ManagedBuffer]))
+    localBlocks.foreach { case (blockId, buf) =>
+      doReturn(buf).when(blockManager).getBlockData(meq(blockId))
     }
 
-    // 3rd block is going to fail
-    doReturn(optItr).when(blockManager).getLocalShuffleFromDisk(meq(blIds(0)), any())
-    doReturn(optItr).when(blockManager).getLocalShuffleFromDisk(meq(blIds(1)), any())
-    doAnswer(answer).when(blockManager).getLocalShuffleFromDisk(meq(blIds(2)), any())
-    doReturn(optItr).when(blockManager).getLocalShuffleFromDisk(meq(blIds(3)), any())
-    doReturn(optItr).when(blockManager).getLocalShuffleFromDisk(meq(blIds(4)), any())
+    // Make sure remote blocks would return
+    val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
+    val remoteBlocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockId(0, 3, 0) -> mock(classOf[ManagedBuffer]),
+      ShuffleBlockId(0, 4, 0) -> mock(classOf[ManagedBuffer])
+    )
+
+    val transfer = createMockTransfer(remoteBlocks)
 
-    val bmId = BlockManagerId("test-client", "test-client", 1)
     val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (bmId, blIds.map(blId => (blId, 1.asInstanceOf[Long])).toSeq)
+      (localBmId, localBlocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq),
+      (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)
     )
 
     val iterator = new ShuffleBlockFetcherIterator(
-      new TaskContextImpl(0, 0, 0),
+      new TaskContextImpl(0, 0, 0, 0),
       transfer,
       blockManager,
       blocksByAddress,
-      null,
+      new TestSerializer,
       48 * 1024 * 1024)
 
-    // Without exhausting the iterator, the iterator should be lazy and not call
-    // getLocalShuffleFromDisk.
-    verify(blockManager, times(0)).getLocalShuffleFromDisk(any(), any())
-
-    assert(iterator.hasNext, "iterator should have 5 elements but actually has no elements")
-    // the 2nd element of the tuple returned by iterator.next should be defined when
-    // fetching successfully
-    assert(iterator.next()._2.isDefined,
-      "1st element should be defined but is not actually defined")
-    verify(blockManager, times(1)).getLocalShuffleFromDisk(any(), any())
-
-    assert(iterator.hasNext, "iterator should have 5 elements but actually has 1 element")
-    assert(iterator.next()._2.isDefined,
-      "2nd element should be defined but is not actually defined")
-    verify(blockManager, times(2)).getLocalShuffleFromDisk(any(), any())
-
-    assert(iterator.hasNext, "iterator should have 5 elements but actually has 2 elements")
-    // 3rd fetch should be failed
-    intercept[Exception] {
-      iterator.next()
+    // 3 local blocks fetched in initialization
+    verify(blockManager, times(3)).getBlockData(any())
+
+    for (i <- 0 until 5) {
+      assert(iterator.hasNext, s"iterator should have 5 elements but actually has $i elements")
+      val (blockId, subIterator) = iterator.next()
+      assert(subIterator.isSuccess,
+        s"iterator should have 5 elements defined but actually has $i elements")
+
+      // Make sure we release the buffer once the iterator is exhausted.
+      val mockBuf = localBlocks.getOrElse(blockId, remoteBlocks(blockId))
+      verify(mockBuf, times(0)).release()
+      subIterator.get.foreach(_ => Unit)  // exhaust the iterator
+      verify(mockBuf, times(1)).release()
     }
-    verify(blockManager, times(3)).getLocalShuffleFromDisk(any(), any())
+
+    // 3 local blocks, and 2 remote blocks
+    // (but from the same block manager so one call to fetchBlocks)
+    verify(blockManager, times(3)).getBlockData(any())
+    verify(transfer, times(1)).fetchBlocks(any(), any(), any(), any(), any())
   }
 
-  test("handle local read successes") {
-    val transfer = mock(classOf[BlockTransferService])
+  test("release current unexhausted buffer in case the task completes early") {
     val blockManager = mock(classOf[BlockManager])
-    doReturn(BlockManagerId("test-client", "test-client", 1)).when(blockManager).blockManagerId
-
-    val blIds = Array[BlockId](
-      ShuffleBlockId(0,0,0),
-      ShuffleBlockId(0,1,0),
-      ShuffleBlockId(0,2,0),
-      ShuffleBlockId(0,3,0),
-      ShuffleBlockId(0,4,0))
+    val localBmId = BlockManagerId("test-client", "test-client", 1)
+    doReturn(localBmId).when(blockManager).blockManagerId
+
+    // Make sure remote blocks would return
+    val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
+    val blocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockId(0, 0, 0) -> mock(classOf[ManagedBuffer]),
+      ShuffleBlockId(0, 1, 0) -> mock(classOf[ManagedBuffer]),
+      ShuffleBlockId(0, 2, 0) -> mock(classOf[ManagedBuffer])
+    )
 
-    val optItr = mock(classOf[Option[Iterator[Any]]])
+    // Semaphore to coordinate event sequence in two different threads.
+    val sem = new Semaphore(0)
 
-    // All blocks should be fetched successfully
-    doReturn(optItr).when(blockManager).getLocalShuffleFromDisk(meq(blIds(0)), any())
-    doReturn(optItr).when(blockManager).getLocalShuffleFromDisk(meq(blIds(1)), any())
-    doReturn(optItr).when(blockManager).getLocalShuffleFromDisk(meq(blIds(2)), any())
-    doReturn(optItr).when(blockManager).getLocalShuffleFromDisk(meq(blIds(3)), any())
-    doReturn(optItr).when(blockManager).getLocalShuffleFromDisk(meq(blIds(4)), any())
+    val transfer = mock(classOf[BlockTransferService])
+    when(transfer.fetchBlocks(any(), any(), any(), any(), any())).thenAnswer(new Answer[Unit] {
+      override def answer(invocation: InvocationOnMock): Unit = {
+        val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
+        future {
+          // Return the first two blocks, and wait till task completion before returning the 3rd one
+          listener.onBlockFetchSuccess(
+            ShuffleBlockId(0, 0, 0).toString, blocks(ShuffleBlockId(0, 0, 0)))
+          listener.onBlockFetchSuccess(
+            ShuffleBlockId(0, 1, 0).toString, blocks(ShuffleBlockId(0, 1, 0)))
+          sem.acquire()
+          listener.onBlockFetchSuccess(
+            ShuffleBlockId(0, 2, 0).toString, blocks(ShuffleBlockId(0, 2, 0)))
+        }
+      }
+    })
 
-    val bmId = BlockManagerId("test-client", "test-client", 1)
     val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (bmId, blIds.map(blId => (blId, 1.asInstanceOf[Long])).toSeq)
-    )
+      (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq))
 
+    val taskContext = new TaskContextImpl(0, 0, 0, 0)
     val iterator = new ShuffleBlockFetcherIterator(
-      new TaskContextImpl(0, 0, 0),
+      taskContext,
       transfer,
       blockManager,
       blocksByAddress,
-      null,
+      new TestSerializer,
       48 * 1024 * 1024)
 
-    // Without exhausting the iterator, the iterator should be lazy and not call getLocalShuffleFromDisk.
-    verify(blockManager, times(0)).getLocalShuffleFromDisk(any(), any())
-
-    assert(iterator.hasNext, "iterator should have 5 elements but actually has no elements")
-    assert(iterator.next()._2.isDefined,
-      "All elements should be defined but 1st element is not actually defined")
-    assert(iterator.hasNext, "iterator should have 5 elements but actually has 1 element")
-    assert(iterator.next()._2.isDefined,
-      "All elements should be defined but 2nd element is not actually defined")
-    assert(iterator.hasNext, "iterator should have 5 elements but actually has 2 elements")
-    assert(iterator.next()._2.isDefined,
-      "All elements should be defined but 3rd element is not actually defined")
-    assert(iterator.hasNext, "iterator should have 5 elements but actually has 3 elements")
-    assert(iterator.next()._2.isDefined,
-      "All elements should be defined but 4th element is not actually defined")
-    assert(iterator.hasNext, "iterator should have 5 elements but actually has 4 elements")
-    assert(iterator.next()._2.isDefined,
-      "All elements should be defined but 5th element is not actually defined")
-
-    verify(blockManager, times(5)).getLocalShuffleFromDisk(any(), any())
+    // Exhaust the first block, and then it should be released.
+    iterator.next()._2.get.foreach(_ => Unit)
+    verify(blocks(ShuffleBlockId(0, 0, 0)), times(1)).release()
+
+    // Get the 2nd block but do not exhaust the iterator
+    val subIter = iterator.next()._2.get
+
+    // Complete the task; then the 2nd block buffer should be exhausted
+    verify(blocks(ShuffleBlockId(0, 1, 0)), times(0)).release()
+    taskContext.markTaskCompleted()
+    verify(blocks(ShuffleBlockId(0, 1, 0)), times(1)).release()
+
+    // The 3rd block should not be retained because the iterator is already in zombie state
+    sem.release()
+    verify(blocks(ShuffleBlockId(0, 2, 0)), times(0)).retain()
+    verify(blocks(ShuffleBlockId(0, 2, 0)), times(0)).release()
   }
 
-  test("handle remote fetch failures in BlockTransferService") {
+  test("fail all blocks if any of the remote request fails") {
+    val blockManager = mock(classOf[BlockManager])
+    val localBmId = BlockManagerId("test-client", "test-client", 1)
+    doReturn(localBmId).when(blockManager).blockManagerId
+
+    // Make sure remote blocks would return
+    val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
+    val blocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockId(0, 0, 0) -> mock(classOf[ManagedBuffer]),
+      ShuffleBlockId(0, 1, 0) -> mock(classOf[ManagedBuffer]),
+      ShuffleBlockId(0, 2, 0) -> mock(classOf[ManagedBuffer])
+    )
+
+    // Semaphore to coordinate event sequence in two different threads.
+    val sem = new Semaphore(0)
+
     val transfer = mock(classOf[BlockTransferService])
-    when(transfer.fetchBlocks(any(), any(), any(), any())).thenAnswer(new Answer[Unit] {
+    when(transfer.fetchBlocks(any(), any(), any(), any(), any())).thenAnswer(new Answer[Unit] {
       override def answer(invocation: InvocationOnMock): Unit = {
-        val listener = invocation.getArguments()(3).asInstanceOf[BlockFetchingListener]
-        listener.onBlockFetchFailure(new Exception("blah"))
+        val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
+        future {
+          // Return the first block, and then fail.
+          listener.onBlockFetchSuccess(
+            ShuffleBlockId(0, 0, 0).toString, blocks(ShuffleBlockId(0, 0, 0)))
+          listener.onBlockFetchFailure(
+            ShuffleBlockId(0, 1, 0).toString, new BlockNotFoundException("blah"))
+          listener.onBlockFetchFailure(
+            ShuffleBlockId(0, 2, 0).toString, new BlockNotFoundException("blah"))
+          sem.release()
+        }
       }
     })
 
-    val blockManager = mock(classOf[BlockManager])
-
-    when(blockManager.blockManagerId).thenReturn(BlockManagerId("test-client", "test-client", 1))
-
-    val blId1 = ShuffleBlockId(0, 0, 0)
-    val blId2 = ShuffleBlockId(0, 1, 0)
-    val bmId = BlockManagerId("test-server", "test-server", 1)
     val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (bmId, Seq((blId1, 1L), (blId2, 1L))))
+      (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq))
 
+    val taskContext = new TaskContextImpl(0, 0, 0, 0)
     val iterator = new ShuffleBlockFetcherIterator(
-      new TaskContextImpl(0, 0, 0),
+      taskContext,
       transfer,
       blockManager,
       blocksByAddress,
-      null,
+      new TestSerializer,
       48 * 1024 * 1024)
 
-    iterator.foreach { case (_, iterOption) =>
-      assert(!iterOption.isDefined)
-    }
+    // Continue only after the mock calls onBlockFetchFailure
+    sem.acquire()
+
+    // The first block should be defined, and the last two are not defined (due to failure)
+    assert(iterator.next()._2.isSuccess)
+    assert(iterator.next()._2.isFailure)
+    assert(iterator.next()._2.isFailure)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
new file mode 100644
index 0000000000000..e85a436cdba17
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -0,0 +1,302 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui
+
+import scala.collection.JavaConversions._
+
+import org.openqa.selenium.{By, WebDriver}
+import org.openqa.selenium.htmlunit.HtmlUnitDriver
+import org.scalatest._
+import org.scalatest.concurrent.Eventually._
+import org.scalatest.selenium.WebBrowser
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark._
+import org.apache.spark.LocalSparkContext._
+import org.apache.spark.api.java.StorageLevels
+import org.apache.spark.shuffle.FetchFailedException
+
+/**
+ * Selenium tests for the Spark Web UI.  These tests are not run by default
+ * because they're slow.
+ */
+@DoNotDiscover
+class UISeleniumSuite extends FunSuite with WebBrowser with Matchers {
+  implicit val webDriver: WebDriver = new HtmlUnitDriver
+
+  /**
+   * Create a test SparkContext with the SparkUI enabled.
+   * It is safe to `get` the SparkUI directly from the SparkContext returned here.
+   */
+  private def newSparkContext(): SparkContext = {
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName("test")
+      .set("spark.ui.enabled", "true")
+    val sc = new SparkContext(conf)
+    assert(sc.ui.isDefined)
+    sc
+  }
+
+  test("effects of unpersist() / persist() should be reflected") {
+    // Regression test for SPARK-2527
+    withSpark(newSparkContext()) { sc =>
+      val ui = sc.ui.get
+      val rdd = sc.parallelize(Seq(1, 2, 3))
+      rdd.persist(StorageLevels.DISK_ONLY).count()
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to (ui.appUIAddress.stripSuffix("/") + "/storage")
+        val tableRowText = findAll(cssSelector("#storage-by-rdd-table td")).map(_.text).toSeq
+        tableRowText should contain (StorageLevels.DISK_ONLY.description)
+      }
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to (ui.appUIAddress.stripSuffix("/") + "/storage/rdd/?id=0")
+        val tableRowText = findAll(cssSelector("#rdd-storage-by-block-table td")).map(_.text).toSeq
+        tableRowText should contain (StorageLevels.DISK_ONLY.description)
+      }
+
+      rdd.unpersist()
+      rdd.persist(StorageLevels.MEMORY_ONLY).count()
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to (ui.appUIAddress.stripSuffix("/") + "/storage")
+        val tableRowText = findAll(cssSelector("#storage-by-rdd-table td")).map(_.text).toSeq
+        tableRowText should contain (StorageLevels.MEMORY_ONLY.description)
+      }
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to (ui.appUIAddress.stripSuffix("/") + "/storage/rdd/?id=0")
+        val tableRowText = findAll(cssSelector("#rdd-storage-by-block-table td")).map(_.text).toSeq
+        tableRowText should contain (StorageLevels.MEMORY_ONLY.description)
+      }
+    }
+  }
+
+  test("failed stages should not appear to be active") {
+    withSpark(newSparkContext()) { sc =>
+      // Regression test for SPARK-3021
+      intercept[SparkException] {
+        sc.parallelize(1 to 10).map { x => throw new Exception()}.collect()
+      }
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/stages")
+        find(id("active")).get.text should be("Active Stages (0)")
+        find(id("failed")).get.text should be("Failed Stages (1)")
+      }
+
+      // Regression test for SPARK-2105
+      class NotSerializable
+      val unserializableObject = new NotSerializable
+      intercept[SparkException] {
+        sc.parallelize(1 to 10).map { x => unserializableObject}.collect()
+      }
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/stages")
+        find(id("active")).get.text should be("Active Stages (0)")
+        // The failure occurs before the stage becomes active, hence we should still show only one
+        // failed stage, not two:
+        find(id("failed")).get.text should be("Failed Stages (1)")
+      }
+    }
+  }
+
+  test("spark.ui.killEnabled should properly control kill button display") {
+    def getSparkContext(killEnabled: Boolean): SparkContext = {
+      val conf = new SparkConf()
+        .setMaster("local")
+        .setAppName("test")
+        .set("spark.ui.enabled", "true")
+        .set("spark.ui.killEnabled", killEnabled.toString)
+      new SparkContext(conf)
+    }
+
+    def hasKillLink = find(className("kill-link")).isDefined
+    def runSlowJob(sc: SparkContext) {
+      sc.parallelize(1 to 10).map{x => Thread.sleep(10000); x}.countAsync()
+    }
+
+    withSpark(getSparkContext(killEnabled = true)) { sc =>
+      runSlowJob(sc)
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/stages")
+        assert(hasKillLink)
+      }
+    }
+
+    withSpark(getSparkContext(killEnabled = false)) { sc =>
+      runSlowJob(sc)
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/stages")
+        assert(!hasKillLink)
+      }
+    }
+  }
+
+  test("jobs page should not display job group name unless some job was submitted in a job group") {
+    withSpark(newSparkContext()) { sc =>
+      // If no job has been run in a job group, then "(Job Group)" should not appear in the header
+      sc.parallelize(Seq(1, 2, 3)).count()
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs")
+        val tableHeaders = findAll(cssSelector("th")).map(_.text).toSeq
+        tableHeaders should not contain "Job Id (Job Group)"
+      }
+      // Once at least one job has been run in a job group, then we should display the group name:
+      sc.setJobGroup("my-job-group", "my-job-group-description")
+      sc.parallelize(Seq(1, 2, 3)).count()
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs")
+        val tableHeaders = findAll(cssSelector("th")).map(_.text).toSeq
+        tableHeaders should contain ("Job Id (Job Group)")
+      }
+    }
+  }
+
+  test("job progress bars should handle stage / task failures") {
+    withSpark(newSparkContext()) { sc =>
+      val data = sc.parallelize(Seq(1, 2, 3)).map(identity).groupBy(identity)
+      val shuffleHandle =
+        data.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]].shuffleHandle
+      // Simulate fetch failures:
+      val mappedData = data.map { x =>
+        val taskContext = TaskContext.get
+        if (taskContext.attemptNumber == 0) {  // Cause this stage to fail on its first attempt.
+          val env = SparkEnv.get
+          val bmAddress = env.blockManager.blockManagerId
+          val shuffleId = shuffleHandle.shuffleId
+          val mapId = 0
+          val reduceId = taskContext.partitionId()
+          val message = "Simulated fetch failure"
+          throw new FetchFailedException(bmAddress, shuffleId, mapId, reduceId, message)
+        } else {
+          x
+        }
+      }
+      mappedData.count()
+      eventually(timeout(5 seconds), interval(50 milliseconds)) {
+        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs")
+        find(cssSelector(".stage-progress-cell")).get.text should be ("2/2 (1 failed)")
+        // Ideally, the following test would pass, but currently we overcount completed tasks
+        // if task recomputations occur:
+        // find(cssSelector(".progress-cell .progress")).get.text should be ("2/2 (1 failed)")
+        // Instead, we guarantee that the total number of tasks is always correct, while the number
+        // of completed tasks may be higher:
+        find(cssSelector(".progress-cell .progress")).get.text should be ("3/2 (1 failed)")
+      }
+    }
+  }
+
+  test("job details page should display useful information for stages that haven't started") {
+    withSpark(newSparkContext()) { sc =>
+      // Create a multi-stage job with a long delay in the first stage:
+      val rdd = sc.parallelize(Seq(1, 2, 3)).map { x =>
+        // This long sleep call won't slow down the tests because we don't actually need to wait
+        // for the job to finish.
+        Thread.sleep(20000)
+      }.groupBy(identity).map(identity).groupBy(identity).map(identity)
+      // Start the job:
+      rdd.countAsync()
+      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs/job/?id=0")
+        find(id("active")).get.text should be ("Active Stages (1)")
+        find(id("pending")).get.text should be ("Pending Stages (2)")
+        // Essentially, we want to check that none of the stage rows show
+        // "No data available for this stage". Checking for the absence of that string is brittle
+        // because someone could change the error message and cause this test to pass by accident.
+        // Instead, it's safer to check that each row contains a link to a stage details page.
+        findAll(cssSelector("tbody tr")).foreach { row =>
+          val link = row.underlying.findElement(By.xpath(".//a"))
+          link.getAttribute("href") should include ("stage")
+        }
+      }
+    }
+  }
+
+  test("job progress bars / cells reflect skipped stages / tasks") {
+    withSpark(newSparkContext()) { sc =>
+      // Create an RDD that involves multiple stages:
+      val rdd = sc.parallelize(1 to 8, 8)
+        .map(x => x).groupBy((x: Int) => x, numPartitions = 8)
+        .flatMap(x => x._2).groupBy((x: Int) => x, numPartitions = 8)
+      // Run it twice; this will cause the second job to have two "phantom" stages that were
+      // mentioned in its job start event but which were never actually executed:
+      rdd.count()
+      rdd.count()
+      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs")
+        // The completed jobs table should have two rows. The first row will be the most recent job:
+        val firstRow = find(cssSelector("tbody tr")).get.underlying
+        val firstRowColumns = firstRow.findElements(By.tagName("td"))
+        firstRowColumns(0).getText should be ("1")
+        firstRowColumns(4).getText should be ("1/1 (2 skipped)")
+        firstRowColumns(5).getText should be ("8/8 (16 skipped)")
+        // The second row is the first run of the job, where nothing was skipped:
+        val secondRow = findAll(cssSelector("tbody tr")).toSeq(1).underlying
+        val secondRowColumns = secondRow.findElements(By.tagName("td"))
+        secondRowColumns(0).getText should be ("0")
+        secondRowColumns(4).getText should be ("3/3")
+        secondRowColumns(5).getText should be ("24/24")
+      }
+    }
+  }
+
+  test("stages that aren't run appear as 'skipped stages' after a job finishes") {
+    withSpark(newSparkContext()) { sc =>
+      // Create an RDD that involves multiple stages:
+      val rdd =
+        sc.parallelize(Seq(1, 2, 3)).map(identity).groupBy(identity).map(identity).groupBy(identity)
+      // Run it twice; this will cause the second job to have two "phantom" stages that were
+      // mentioned in its job start event but which were never actually executed:
+      rdd.count()
+      rdd.count()
+      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs/job/?id=1")
+        find(id("pending")) should be (None)
+        find(id("active")) should be (None)
+        find(id("failed")) should be (None)
+        find(id("completed")).get.text should be ("Completed Stages (1)")
+        find(id("skipped")).get.text should be ("Skipped Stages (2)")
+        // Essentially, we want to check that none of the stage rows show
+        // "No data available for this stage". Checking for the absence of that string is brittle
+        // because someone could change the error message and cause this test to pass by accident.
+        // Instead, it's safer to check that each row contains a link to a stage details page.
+        findAll(cssSelector("tbody tr")).foreach { row =>
+          val link = row.underlying.findElement(By.xpath(".//a"))
+          link.getAttribute("href") should include ("stage")
+        }
+      }
+    }
+  }
+
+  test("jobs with stages that are skipped should show correct link descriptions on all jobs page") {
+    withSpark(newSparkContext()) { sc =>
+      // Create an RDD that involves multiple stages:
+      val rdd =
+        sc.parallelize(Seq(1, 2, 3)).map(identity).groupBy(identity).map(identity).groupBy(identity)
+      // Run it twice; this will cause the second job to have two "phantom" stages that were
+      // mentioned in its job start event but which were never actually executed:
+      rdd.count()
+      rdd.count()
+      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs")
+        findAll(cssSelector("tbody tr a")).foreach { link =>
+          link.text.toLowerCase should include ("count")
+          link.text.toLowerCase should not include "unknown"
+        }
+      }
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala b/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
index 3370dd4156c3f..68074ae32a672 100644
--- a/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
@@ -28,32 +28,108 @@ import org.apache.spark.util.Utils
 
 class JobProgressListenerSuite extends FunSuite with LocalSparkContext with Matchers {
 
-  test("test LRU eviction of stages") {
-    val conf = new SparkConf()
-    conf.set("spark.ui.retainedStages", 5.toString)
-    val listener = new JobProgressListener(conf)
+  val jobSubmissionTime = 1421191042750L
+  val jobCompletionTime = 1421191296660L
 
-    def createStageStartEvent(stageId: Int) = {
-      val stageInfo = new StageInfo(stageId, 0, stageId.toString, 0, null, "")
-      SparkListenerStageSubmitted(stageInfo)
+  private def createStageStartEvent(stageId: Int) = {
+    val stageInfo = new StageInfo(stageId, 0, stageId.toString, 0, null, "")
+    SparkListenerStageSubmitted(stageInfo)
+  }
+
+  private def createStageEndEvent(stageId: Int, failed: Boolean = false) = {
+    val stageInfo = new StageInfo(stageId, 0, stageId.toString, 0, null, "")
+    if (failed) {
+      stageInfo.failureReason = Some("Failed!")
     }
+    SparkListenerStageCompleted(stageInfo)
+  }
 
-    def createStageEndEvent(stageId: Int) = {
-      val stageInfo = new StageInfo(stageId, 0, stageId.toString, 0, null, "")
-      SparkListenerStageCompleted(stageInfo)
+  private def createJobStartEvent(jobId: Int, stageIds: Seq[Int]) = {
+    val stageInfos = stageIds.map { stageId =>
+      new StageInfo(stageId, 0, stageId.toString, 0, null, "")
     }
+    SparkListenerJobStart(jobId, jobSubmissionTime, stageInfos)
+  }
+
+  private def createJobEndEvent(jobId: Int, failed: Boolean = false) = {
+    val result = if (failed) JobFailed(new Exception("dummy failure")) else JobSucceeded
+    SparkListenerJobEnd(jobId, jobCompletionTime, result)
+  }
+
+  private def runJob(listener: SparkListener, jobId: Int, shouldFail: Boolean = false) {
+    val stagesThatWontBeRun = jobId * 200 to jobId * 200 + 10
+    val stageIds = jobId * 100 to jobId * 100 + 50
+    listener.onJobStart(createJobStartEvent(jobId, stageIds ++ stagesThatWontBeRun))
+    for (stageId <- stageIds) {
+      listener.onStageSubmitted(createStageStartEvent(stageId))
+      listener.onStageCompleted(createStageEndEvent(stageId, failed = stageId % 2 == 0))
+    }
+    listener.onJobEnd(createJobEndEvent(jobId, shouldFail))
+  }
+
+  private def assertActiveJobsStateIsEmpty(listener: JobProgressListener) {
+    listener.getSizesOfActiveStateTrackingCollections.foreach { case (fieldName, size) =>
+      assert(size === 0, s"$fieldName was not empty")
+    }
+  }
+
+  test("test LRU eviction of stages") {
+    val conf = new SparkConf()
+    conf.set("spark.ui.retainedStages", 5.toString)
+    val listener = new JobProgressListener(conf)
 
     for (i <- 1 to 50) {
       listener.onStageSubmitted(createStageStartEvent(i))
       listener.onStageCompleted(createStageEndEvent(i))
     }
+    assertActiveJobsStateIsEmpty(listener)
 
     listener.completedStages.size should be (5)
-    listener.completedStages.count(_.stageId == 50) should be (1)
-    listener.completedStages.count(_.stageId == 49) should be (1)
-    listener.completedStages.count(_.stageId == 48) should be (1)
-    listener.completedStages.count(_.stageId == 47) should be (1)
-    listener.completedStages.count(_.stageId == 46) should be (1)
+    listener.completedStages.map(_.stageId).toSet should be (Set(50, 49, 48, 47, 46))
+  }
+
+  test("test LRU eviction of jobs") {
+    val conf = new SparkConf()
+    conf.set("spark.ui.retainedStages", 5.toString)
+    conf.set("spark.ui.retainedJobs", 5.toString)
+    val listener = new JobProgressListener(conf)
+
+    // Run a bunch of jobs to get the listener into a state where we've exceeded both the
+    // job and stage retention limits:
+    for (jobId <- 1 to 10) {
+      runJob(listener, jobId, shouldFail = false)
+    }
+    for (jobId <- 200 to 210) {
+      runJob(listener, jobId, shouldFail = true)
+    }
+    assertActiveJobsStateIsEmpty(listener)
+    // Snapshot the sizes of various soft- and hard-size-limited collections:
+    val softLimitSizes = listener.getSizesOfSoftSizeLimitedCollections
+    val hardLimitSizes = listener.getSizesOfHardSizeLimitedCollections
+    // Run some more jobs:
+    for (jobId <- 11 to 50) {
+      runJob(listener, jobId, shouldFail = false)
+      // We shouldn't exceed the hard / soft limit sizes after the jobs have finished:
+      listener.getSizesOfSoftSizeLimitedCollections should be (softLimitSizes)
+      listener.getSizesOfHardSizeLimitedCollections should be (hardLimitSizes)
+    }
+
+    listener.completedJobs.size should be (5)
+    listener.completedJobs.map(_.jobId).toSet should be (Set(50, 49, 48, 47, 46))
+
+    for (jobId <- 51 to 100) {
+      runJob(listener, jobId, shouldFail = true)
+      // We shouldn't exceed the hard / soft limit sizes after the jobs have finished:
+      listener.getSizesOfSoftSizeLimitedCollections should be (softLimitSizes)
+      listener.getSizesOfHardSizeLimitedCollections should be (hardLimitSizes)
+    }
+    assertActiveJobsStateIsEmpty(listener)
+
+    // Completed and failed jobs each their own size limits, so this should still be the same:
+    listener.completedJobs.size should be (5)
+    listener.completedJobs.map(_.jobId).toSet should be (Set(50, 49, 48, 47, 46))
+    listener.failedJobs.size should be (5)
+    listener.failedJobs.map(_.jobId).toSet should be (Set(100, 99, 98, 97, 96))
   }
 
   test("test executor id to summary") {
@@ -64,7 +140,7 @@ class JobProgressListenerSuite extends FunSuite with LocalSparkContext with Matc
     assert(listener.stageIdToData.size === 0)
 
     // finish this task, should get updated shuffleRead
-    shuffleReadMetrics.remoteBytesRead = 1000
+    shuffleReadMetrics.incRemoteBytesRead(1000)
     taskMetrics.setShuffleReadMetrics(Some(shuffleReadMetrics))
     var taskInfo = new TaskInfo(1234L, 0, 1, 0L, "exe-1", "host1", TaskLocality.NODE_LOCAL, false)
     taskInfo.finishTime = 1
@@ -115,11 +191,11 @@ class JobProgressListenerSuite extends FunSuite with LocalSparkContext with Matc
     // Go through all the failure cases to make sure we are counting them as failures.
     val taskFailedReasons = Seq(
       Resubmitted,
-      new FetchFailed(null, 0, 0, 0),
-      new ExceptionFailure("Exception", "description", null, None),
+      new FetchFailed(null, 0, 0, 0, "ignored"),
+      ExceptionFailure("Exception", "description", null, null, None),
       TaskResultLost,
       TaskKilled,
-      ExecutorLostFailure,
+      ExecutorLostFailure("0"),
       UnknownReason)
     var failCount = 0
     for (reason <- taskFailedReasons) {
@@ -150,15 +226,18 @@ class JobProgressListenerSuite extends FunSuite with LocalSparkContext with Matc
       val shuffleWriteMetrics = new ShuffleWriteMetrics()
       taskMetrics.setShuffleReadMetrics(Some(shuffleReadMetrics))
       taskMetrics.shuffleWriteMetrics = Some(shuffleWriteMetrics)
-      shuffleReadMetrics.remoteBytesRead = base + 1
-      shuffleReadMetrics.remoteBlocksFetched = base + 2
-      shuffleWriteMetrics.shuffleBytesWritten = base + 3
-      taskMetrics.executorRunTime = base + 4
-      taskMetrics.diskBytesSpilled = base + 5
-      taskMetrics.memoryBytesSpilled = base + 6
+      shuffleReadMetrics.incRemoteBytesRead(base + 1)
+      shuffleReadMetrics.incRemoteBlocksFetched(base + 2)
+      shuffleWriteMetrics.incShuffleBytesWritten(base + 3)
+      taskMetrics.setExecutorRunTime(base + 4)
+      taskMetrics.incDiskBytesSpilled(base + 5)
+      taskMetrics.incMemoryBytesSpilled(base + 6)
       val inputMetrics = new InputMetrics(DataReadMethod.Hadoop)
-      taskMetrics.inputMetrics = Some(inputMetrics)
-      inputMetrics.bytesRead = base + 7
+      taskMetrics.setInputMetrics(Some(inputMetrics))
+      inputMetrics.addBytesRead(base + 7)
+      val outputMetrics = new OutputMetrics(DataWriteMethod.Hadoop)
+      taskMetrics.outputMetrics = Some(outputMetrics)
+      outputMetrics.setBytesWritten(base + 8)
       taskMetrics
     }
 
@@ -193,6 +272,8 @@ class JobProgressListenerSuite extends FunSuite with LocalSparkContext with Matc
     assert(stage1Data.memoryBytesSpilled == 206)
     assert(stage0Data.inputBytes == 114)
     assert(stage1Data.inputBytes == 207)
+    assert(stage0Data.outputBytes == 116)
+    assert(stage1Data.outputBytes == 208)
     assert(stage0Data.taskData.get(1234L).get.taskMetrics.get.shuffleReadMetrics.get
       .totalBlocksFetched == 2)
     assert(stage0Data.taskData.get(1235L).get.taskMetrics.get.shuffleReadMetrics.get
@@ -221,6 +302,8 @@ class JobProgressListenerSuite extends FunSuite with LocalSparkContext with Matc
     assert(stage1Data.memoryBytesSpilled == 612)
     assert(stage0Data.inputBytes == 414)
     assert(stage1Data.inputBytes == 614)
+    assert(stage0Data.outputBytes == 416)
+    assert(stage1Data.outputBytes == 616)
     assert(stage0Data.taskData.get(1234L).get.taskMetrics.get.shuffleReadMetrics.get
       .totalBlocksFetched == 302)
     assert(stage1Data.taskData.get(1237L).get.taskMetrics.get.shuffleReadMetrics.get
diff --git a/core/src/test/scala/org/apache/spark/util/AkkaUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/AkkaUtilsSuite.scala
index 7bca1711ae226..39e5d367d676c 100644
--- a/core/src/test/scala/org/apache/spark/util/AkkaUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/AkkaUtilsSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.util
 
+import java.util.concurrent.TimeoutException
+
 import scala.concurrent.Await
 
 import akka.actor._
@@ -26,12 +28,13 @@ import org.scalatest.FunSuite
 import org.apache.spark._
 import org.apache.spark.scheduler.MapStatus
 import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.SSLSampleConfigs._
 
 
 /**
   * Test the AkkaUtils with various security settings.
   */
-class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
+class AkkaUtilsSuite extends FunSuite with LocalSparkContext with ResetSystemProperties {
 
   test("remote fetch security bad password") {
     val conf = new SparkConf
@@ -47,7 +50,7 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
 
     val masterTracker = new MapOutputTrackerMaster(conf)
     masterTracker.trackerActor = actorSystem.actorOf(
-        Props(new MapOutputTrackerMasterActor(masterTracker, conf)), "MapOutputTracker")
+      Props(new MapOutputTrackerMasterActor(masterTracker, conf)), "MapOutputTracker")
 
     val badconf = new SparkConf
     badconf.set("spark.authenticate", "true")
@@ -60,7 +63,7 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
       conf = conf, securityManager = securityManagerBad)
     val slaveTracker = new MapOutputTrackerWorker(conf)
     val selection = slaveSystem.actorSelection(
-      s"akka.tcp://spark@localhost:$boundPort/user/MapOutputTracker")
+      AkkaUtils.address(AkkaUtils.protocol(slaveSystem), "spark", "localhost", boundPort, "MapOutputTracker"))
     val timeout = AkkaUtils.lookupTimeout(conf)
     intercept[akka.actor.ActorNotFound] {
       slaveTracker.trackerActor = Await.result(selection.resolveOne(timeout), timeout)
@@ -74,7 +77,7 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
     val conf = new SparkConf
     conf.set("spark.authenticate", "false")
     conf.set("spark.authenticate.secret", "bad")
-    val securityManager = new SecurityManager(conf);
+    val securityManager = new SecurityManager(conf)
 
     val hostname = "localhost"
     val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, 0,
@@ -85,18 +88,18 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
 
     val masterTracker = new MapOutputTrackerMaster(conf)
     masterTracker.trackerActor = actorSystem.actorOf(
-        Props(new MapOutputTrackerMasterActor(masterTracker, conf)), "MapOutputTracker")
+      Props(new MapOutputTrackerMasterActor(masterTracker, conf)), "MapOutputTracker")
 
     val badconf = new SparkConf
     badconf.set("spark.authenticate", "false")
     badconf.set("spark.authenticate.secret", "good")
-    val securityManagerBad = new SecurityManager(badconf);
+    val securityManagerBad = new SecurityManager(badconf)
 
     val (slaveSystem, _) = AkkaUtils.createActorSystem("spark-slave", hostname, 0,
       conf = badconf, securityManager = securityManagerBad)
     val slaveTracker = new MapOutputTrackerWorker(conf)
     val selection = slaveSystem.actorSelection(
-      s"akka.tcp://spark@localhost:$boundPort/user/MapOutputTracker")
+      AkkaUtils.address(AkkaUtils.protocol(slaveSystem), "spark", "localhost", boundPort, "MapOutputTracker"))
     val timeout = AkkaUtils.lookupTimeout(conf)
     slaveTracker.trackerActor = Await.result(selection.resolveOne(timeout), timeout)
 
@@ -124,7 +127,7 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
     val conf = new SparkConf
     conf.set("spark.authenticate", "true")
     conf.set("spark.authenticate.secret", "good")
-    val securityManager = new SecurityManager(conf);
+    val securityManager = new SecurityManager(conf)
 
     val hostname = "localhost"
     val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, 0,
@@ -135,12 +138,12 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
 
     val masterTracker = new MapOutputTrackerMaster(conf)
     masterTracker.trackerActor = actorSystem.actorOf(
-        Props(new MapOutputTrackerMasterActor(masterTracker, conf)), "MapOutputTracker")
+      Props(new MapOutputTrackerMasterActor(masterTracker, conf)), "MapOutputTracker")
 
     val goodconf = new SparkConf
     goodconf.set("spark.authenticate", "true")
     goodconf.set("spark.authenticate.secret", "good")
-    val securityManagerGood = new SecurityManager(goodconf);
+    val securityManagerGood = new SecurityManager(goodconf)
 
     assert(securityManagerGood.isAuthenticationEnabled() === true)
 
@@ -148,7 +151,7 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
       conf = goodconf, securityManager = securityManagerGood)
     val slaveTracker = new MapOutputTrackerWorker(conf)
     val selection = slaveSystem.actorSelection(
-      s"akka.tcp://spark@localhost:$boundPort/user/MapOutputTracker")
+      AkkaUtils.address(AkkaUtils.protocol(slaveSystem), "spark", "localhost", boundPort, "MapOutputTracker"))
     val timeout = AkkaUtils.lookupTimeout(conf)
     slaveTracker.trackerActor = Await.result(selection.resolveOne(timeout), timeout)
 
@@ -175,7 +178,7 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
     conf.set("spark.authenticate", "true")
     conf.set("spark.authenticate.secret", "good")
 
-    val securityManager = new SecurityManager(conf);
+    val securityManager = new SecurityManager(conf)
 
     val hostname = "localhost"
     val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, 0,
@@ -186,12 +189,12 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
 
     val masterTracker = new MapOutputTrackerMaster(conf)
     masterTracker.trackerActor = actorSystem.actorOf(
-        Props(new MapOutputTrackerMasterActor(masterTracker, conf)), "MapOutputTracker")
+      Props(new MapOutputTrackerMasterActor(masterTracker, conf)), "MapOutputTracker")
 
     val badconf = new SparkConf
     badconf.set("spark.authenticate", "false")
     badconf.set("spark.authenticate.secret", "bad")
-    val securityManagerBad = new SecurityManager(badconf);
+    val securityManagerBad = new SecurityManager(badconf)
 
     assert(securityManagerBad.isAuthenticationEnabled() === false)
 
@@ -199,7 +202,7 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
       conf = badconf, securityManager = securityManagerBad)
     val slaveTracker = new MapOutputTrackerWorker(conf)
     val selection = slaveSystem.actorSelection(
-      s"akka.tcp://spark@localhost:$boundPort/user/MapOutputTracker")
+      AkkaUtils.address(AkkaUtils.protocol(slaveSystem), "spark", "localhost", boundPort, "MapOutputTracker"))
     val timeout = AkkaUtils.lookupTimeout(conf)
     intercept[akka.actor.ActorNotFound] {
       slaveTracker.trackerActor = Await.result(selection.resolveOne(timeout), timeout)
@@ -209,4 +212,170 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext {
     slaveSystem.shutdown()
   }
 
+  test("remote fetch ssl on") {
+    val conf = sparkSSLConfig()
+    val securityManager = new SecurityManager(conf)
+
+    val hostname = "localhost"
+    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, 0,
+      conf = conf, securityManager = securityManager)
+    System.setProperty("spark.hostPort", hostname + ":" + boundPort)
+
+    assert(securityManager.isAuthenticationEnabled() === false)
+
+    val masterTracker = new MapOutputTrackerMaster(conf)
+    masterTracker.trackerActor = actorSystem.actorOf(
+      Props(new MapOutputTrackerMasterActor(masterTracker, conf)), "MapOutputTracker")
+
+    val slaveConf = sparkSSLConfig()
+    val securityManagerBad = new SecurityManager(slaveConf)
+
+    val (slaveSystem, _) = AkkaUtils.createActorSystem("spark-slave", hostname, 0,
+      conf = slaveConf, securityManager = securityManagerBad)
+    val slaveTracker = new MapOutputTrackerWorker(conf)
+    val selection = slaveSystem.actorSelection(
+      AkkaUtils.address(AkkaUtils.protocol(slaveSystem), "spark", "localhost", boundPort, "MapOutputTracker"))
+    val timeout = AkkaUtils.lookupTimeout(conf)
+    slaveTracker.trackerActor = Await.result(selection.resolveOne(timeout), timeout)
+
+    assert(securityManagerBad.isAuthenticationEnabled() === false)
+
+    masterTracker.registerShuffle(10, 1)
+    masterTracker.incrementEpoch()
+    slaveTracker.updateEpoch(masterTracker.getEpoch)
+
+    val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L))
+    masterTracker.registerMapOutput(10, 0,
+      MapStatus(BlockManagerId("a", "hostA", 1000), Array(1000L)))
+    masterTracker.incrementEpoch()
+    slaveTracker.updateEpoch(masterTracker.getEpoch)
+
+    // this should succeed since security off
+    assert(slaveTracker.getServerStatuses(10, 0).toSeq ===
+      Seq((BlockManagerId("a", "hostA", 1000), size1000)))
+
+    actorSystem.shutdown()
+    slaveSystem.shutdown()
+  }
+
+
+  test("remote fetch ssl on and security enabled") {
+    val conf = sparkSSLConfig()
+    conf.set("spark.authenticate", "true")
+    conf.set("spark.authenticate.secret", "good")
+    val securityManager = new SecurityManager(conf)
+
+    val hostname = "localhost"
+    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, 0,
+      conf = conf, securityManager = securityManager)
+    System.setProperty("spark.hostPort", hostname + ":" + boundPort)
+
+    assert(securityManager.isAuthenticationEnabled() === true)
+
+    val masterTracker = new MapOutputTrackerMaster(conf)
+    masterTracker.trackerActor = actorSystem.actorOf(
+      Props(new MapOutputTrackerMasterActor(masterTracker, conf)), "MapOutputTracker")
+
+    val slaveConf = sparkSSLConfig()
+    slaveConf.set("spark.authenticate", "true")
+    slaveConf.set("spark.authenticate.secret", "good")
+    val securityManagerBad = new SecurityManager(slaveConf)
+
+    val (slaveSystem, _) = AkkaUtils.createActorSystem("spark-slave", hostname, 0,
+      conf = slaveConf, securityManager = securityManagerBad)
+    val slaveTracker = new MapOutputTrackerWorker(conf)
+    val selection = slaveSystem.actorSelection(
+      AkkaUtils.address(AkkaUtils.protocol(slaveSystem), "spark", "localhost", boundPort, "MapOutputTracker"))
+    val timeout = AkkaUtils.lookupTimeout(conf)
+    slaveTracker.trackerActor = Await.result(selection.resolveOne(timeout), timeout)
+
+    assert(securityManagerBad.isAuthenticationEnabled() === true)
+
+    masterTracker.registerShuffle(10, 1)
+    masterTracker.incrementEpoch()
+    slaveTracker.updateEpoch(masterTracker.getEpoch)
+
+    val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L))
+    masterTracker.registerMapOutput(10, 0,
+      MapStatus(BlockManagerId("a", "hostA", 1000), Array(1000L)))
+    masterTracker.incrementEpoch()
+    slaveTracker.updateEpoch(masterTracker.getEpoch)
+
+    assert(slaveTracker.getServerStatuses(10, 0).toSeq ===
+      Seq((BlockManagerId("a", "hostA", 1000), size1000)))
+
+    actorSystem.shutdown()
+    slaveSystem.shutdown()
+  }
+
+
+  test("remote fetch ssl on and security enabled - bad credentials") {
+    val conf = sparkSSLConfig()
+    conf.set("spark.authenticate", "true")
+    conf.set("spark.authenticate.secret", "good")
+    val securityManager = new SecurityManager(conf)
+
+    val hostname = "localhost"
+    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, 0,
+      conf = conf, securityManager = securityManager)
+    System.setProperty("spark.hostPort", hostname + ":" + boundPort)
+
+    assert(securityManager.isAuthenticationEnabled() === true)
+
+    val masterTracker = new MapOutputTrackerMaster(conf)
+    masterTracker.trackerActor = actorSystem.actorOf(
+      Props(new MapOutputTrackerMasterActor(masterTracker, conf)), "MapOutputTracker")
+
+    val slaveConf = sparkSSLConfig()
+    slaveConf.set("spark.authenticate", "true")
+    slaveConf.set("spark.authenticate.secret", "bad")
+    val securityManagerBad = new SecurityManager(slaveConf)
+
+    val (slaveSystem, _) = AkkaUtils.createActorSystem("spark-slave", hostname, 0,
+      conf = slaveConf, securityManager = securityManagerBad)
+    val slaveTracker = new MapOutputTrackerWorker(conf)
+    val selection = slaveSystem.actorSelection(
+      AkkaUtils.address(AkkaUtils.protocol(slaveSystem), "spark", "localhost", boundPort, "MapOutputTracker"))
+    val timeout = AkkaUtils.lookupTimeout(conf)
+    intercept[akka.actor.ActorNotFound] {
+      slaveTracker.trackerActor = Await.result(selection.resolveOne(timeout), timeout)
+    }
+
+    actorSystem.shutdown()
+    slaveSystem.shutdown()
+  }
+
+
+  test("remote fetch ssl on - untrusted server") {
+    val conf = sparkSSLConfigUntrusted()
+    val securityManager = new SecurityManager(conf)
+
+    val hostname = "localhost"
+    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, 0,
+      conf = conf, securityManager = securityManager)
+    System.setProperty("spark.hostPort", hostname + ":" + boundPort)
+
+    assert(securityManager.isAuthenticationEnabled() === false)
+
+    val masterTracker = new MapOutputTrackerMaster(conf)
+    masterTracker.trackerActor = actorSystem.actorOf(
+      Props(new MapOutputTrackerMasterActor(masterTracker, conf)), "MapOutputTracker")
+
+    val slaveConf = sparkSSLConfig()
+    val securityManagerBad = new SecurityManager(slaveConf)
+
+    val (slaveSystem, _) = AkkaUtils.createActorSystem("spark-slave", hostname, 0,
+      conf = slaveConf, securityManager = securityManagerBad)
+    val slaveTracker = new MapOutputTrackerWorker(conf)
+    val selection = slaveSystem.actorSelection(
+      AkkaUtils.address(AkkaUtils.protocol(slaveSystem), "spark", "localhost", boundPort, "MapOutputTracker"))
+    val timeout = AkkaUtils.lookupTimeout(conf)
+    intercept[TimeoutException] {
+      slaveTracker.trackerActor = Await.result(selection.resolveOne(timeout), timeout)
+    }
+
+    actorSystem.shutdown()
+    slaveSystem.shutdown()
+  }
+
 }
diff --git a/core/src/test/scala/org/apache/spark/util/CompletionIteratorSuite.scala b/core/src/test/scala/org/apache/spark/util/CompletionIteratorSuite.scala
new file mode 100644
index 0000000000000..3755d43e25ea8
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/CompletionIteratorSuite.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import org.scalatest.FunSuite
+
+class CompletionIteratorSuite extends FunSuite {
+  test("basic test") {
+    var numTimesCompleted = 0
+    val iter = List(1, 2, 3).iterator
+    val completionIter = CompletionIterator[Int, Iterator[Int]](iter, { numTimesCompleted += 1 })
+
+    assert(completionIter.hasNext)
+    assert(completionIter.next() === 1)
+    assert(numTimesCompleted === 0)
+
+    assert(completionIter.hasNext)
+    assert(completionIter.next() === 2)
+    assert(numTimesCompleted === 0)
+
+    assert(completionIter.hasNext)
+    assert(completionIter.next() === 3)
+    assert(numTimesCompleted === 0)
+
+    assert(!completionIter.hasNext)
+    assert(numTimesCompleted === 1)
+
+    // SPARK-4264: Calling hasNext should not trigger the completion callback again.
+    assert(!completionIter.hasNext)
+    assert(numTimesCompleted === 1)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/EventLoopSuite.scala b/core/src/test/scala/org/apache/spark/util/EventLoopSuite.scala
new file mode 100644
index 0000000000000..1026cb2aa7cae
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/EventLoopSuite.scala
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.util.concurrent.CountDownLatch
+
+import scala.collection.mutable
+import scala.concurrent.duration._
+import scala.language.postfixOps
+
+import org.scalatest.concurrent.Eventually._
+import org.scalatest.concurrent.Timeouts
+import org.scalatest.FunSuite
+
+class EventLoopSuite extends FunSuite with Timeouts {
+
+  test("EventLoop") {
+    val buffer = new mutable.ArrayBuffer[Int] with mutable.SynchronizedBuffer[Int]
+    val eventLoop = new EventLoop[Int]("test") {
+
+      override def onReceive(event: Int): Unit = {
+        buffer += event
+      }
+
+      override def onError(e: Throwable): Unit = {}
+    }
+    eventLoop.start()
+    (1 to 100).foreach(eventLoop.post)
+    eventually(timeout(5 seconds), interval(5 millis)) {
+      assert((1 to 100) === buffer.toSeq)
+    }
+    eventLoop.stop()
+  }
+
+  test("EventLoop: start and stop") {
+    val eventLoop = new EventLoop[Int]("test") {
+
+      override def onReceive(event: Int): Unit = {}
+
+      override def onError(e: Throwable): Unit = {}
+    }
+    assert(false === eventLoop.isActive)
+    eventLoop.start()
+    assert(true === eventLoop.isActive)
+    eventLoop.stop()
+    assert(false === eventLoop.isActive)
+  }
+
+  test("EventLoop: onError") {
+    val e = new RuntimeException("Oops")
+    @volatile var receivedError: Throwable = null
+    val eventLoop = new EventLoop[Int]("test") {
+
+      override def onReceive(event: Int): Unit = {
+        throw e
+      }
+
+      override def onError(e: Throwable): Unit = {
+        receivedError = e
+      }
+    }
+    eventLoop.start()
+    eventLoop.post(1)
+    eventually(timeout(5 seconds), interval(5 millis)) {
+      assert(e === receivedError)
+    }
+    eventLoop.stop()
+  }
+
+  test("EventLoop: error thrown from onError should not crash the event thread") {
+    val e = new RuntimeException("Oops")
+    @volatile var receivedError: Throwable = null
+    val eventLoop = new EventLoop[Int]("test") {
+
+      override def onReceive(event: Int): Unit = {
+        throw e
+      }
+
+      override def onError(e: Throwable): Unit = {
+        receivedError = e
+        throw new RuntimeException("Oops")
+      }
+    }
+    eventLoop.start()
+    eventLoop.post(1)
+    eventually(timeout(5 seconds), interval(5 millis)) {
+      assert(e === receivedError)
+      assert(eventLoop.isActive)
+    }
+    eventLoop.stop()
+  }
+
+  test("EventLoop: calling stop multiple times should only call onStop once") {
+    var onStopTimes = 0
+    val eventLoop = new EventLoop[Int]("test") {
+
+      override def onReceive(event: Int): Unit = {
+      }
+
+      override def onError(e: Throwable): Unit = {
+      }
+
+      override def onStop(): Unit = {
+        onStopTimes += 1
+      }
+    }
+
+    eventLoop.start()
+
+    eventLoop.stop()
+    eventLoop.stop()
+    eventLoop.stop()
+
+    assert(1 === onStopTimes)
+  }
+
+  test("EventLoop: post event in multiple threads") {
+    @volatile var receivedEventsCount = 0
+    val eventLoop = new EventLoop[Int]("test") {
+
+      override def onReceive(event: Int): Unit = {
+        receivedEventsCount += 1
+      }
+
+      override def onError(e: Throwable): Unit = {
+      }
+
+    }
+    eventLoop.start()
+
+    val threadNum = 5
+    val eventsFromEachThread = 100
+    (1 to threadNum).foreach { _ =>
+      new Thread() {
+        override def run(): Unit = {
+          (1 to eventsFromEachThread).foreach(eventLoop.post)
+        }
+      }.start()
+    }
+
+    eventually(timeout(5 seconds), interval(5 millis)) {
+      assert(threadNum * eventsFromEachThread === receivedEventsCount)
+    }
+    eventLoop.stop()
+  }
+
+  test("EventLoop: onReceive swallows InterruptException") {
+    val onReceiveLatch = new CountDownLatch(1)
+    val eventLoop = new EventLoop[Int]("test") {
+
+      override def onReceive(event: Int): Unit = {
+        onReceiveLatch.countDown()
+        try {
+          Thread.sleep(5000)
+        } catch {
+          case ie: InterruptedException => // swallow
+        }
+      }
+
+      override def onError(e: Throwable): Unit = {
+      }
+
+    }
+    eventLoop.start()
+    eventLoop.post(1)
+    failAfter(5 seconds) {
+      // Wait until we enter `onReceive`
+      onReceiveLatch.await()
+      eventLoop.stop()
+    }
+    assert(false === eventLoop.isActive)
+  }
+
+  test("EventLoop: stop in eventThread") {
+    val eventLoop = new EventLoop[Int]("test") {
+
+      override def onReceive(event: Int): Unit = {
+        stop()
+      }
+
+      override def onError(e: Throwable): Unit = {
+      }
+
+    }
+    eventLoop.start()
+    eventLoop.post(1)
+    eventually(timeout(5 seconds), interval(5 millis)) {
+      assert(!eventLoop.isActive)
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
index d2bee448d4d3b..4dc5b6103db74 100644
--- a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
@@ -18,13 +18,13 @@
 package org.apache.spark.util
 
 import java.io._
-import java.nio.charset.Charset
 
 import scala.collection.mutable.HashSet
 import scala.reflect._
 
 import org.scalatest.{BeforeAndAfter, FunSuite}
 
+import com.google.common.base.Charsets.UTF_8
 import com.google.common.io.Files
 
 import org.apache.spark.{Logging, SparkConf}
@@ -44,11 +44,11 @@ class FileAppenderSuite extends FunSuite with BeforeAndAfter with Logging {
 
   test("basic file appender") {
     val testString = (1 to 1000).mkString(", ")
-    val inputStream = new ByteArrayInputStream(testString.getBytes(Charset.forName("UTF-8")))
+    val inputStream = new ByteArrayInputStream(testString.getBytes(UTF_8))
     val appender = new FileAppender(inputStream, testFile)
     inputStream.close()
     appender.awaitTermination()
-    assert(Files.toString(testFile, Charset.forName("UTF-8")) === testString)
+    assert(Files.toString(testFile, UTF_8) === testString)
   }
 
   test("rolling file appender - time-based rolling") {
@@ -96,7 +96,7 @@ class FileAppenderSuite extends FunSuite with BeforeAndAfter with Logging {
     val allGeneratedFiles = new HashSet[String]()
     val items = (1 to 10).map { _.toString * 10000 }
     for (i <- 0 until items.size) {
-      testOutputStream.write(items(i).getBytes(Charset.forName("UTF-8")))
+      testOutputStream.write(items(i).getBytes(UTF_8))
       testOutputStream.flush()
       allGeneratedFiles ++= RollingFileAppender.getSortedRolledOverFiles(
         testFile.getParentFile.toString, testFile.getName).map(_.toString)
@@ -199,7 +199,7 @@ class FileAppenderSuite extends FunSuite with BeforeAndAfter with Logging {
     // send data to appender through the input stream, and wait for the data to be written
     val expectedText = textToAppend.mkString("")
     for (i <- 0 until textToAppend.size) {
-      outputStream.write(textToAppend(i).getBytes(Charset.forName("UTF-8")))
+      outputStream.write(textToAppend(i).getBytes(UTF_8))
       outputStream.flush()
       Thread.sleep(sleepTimeBetweenTexts)
     }
@@ -214,7 +214,7 @@ class FileAppenderSuite extends FunSuite with BeforeAndAfter with Logging {
     logInfo("Filtered files: \n" + generatedFiles.mkString("\n"))
     assert(generatedFiles.size > 1)
     val allText = generatedFiles.map { file =>
-      Files.toString(file, Charset.forName("UTF-8"))
+      Files.toString(file, UTF_8)
     }.mkString("")
     assert(allText === expectedText)
     generatedFiles
diff --git a/core/src/test/scala/org/apache/spark/util/FileLoggerSuite.scala b/core/src/test/scala/org/apache/spark/util/FileLoggerSuite.scala
deleted file mode 100644
index 72466a3aa1130..0000000000000
--- a/core/src/test/scala/org/apache/spark/util/FileLoggerSuite.scala
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.util
-
-import java.io.{File, IOException}
-
-import scala.io.Source
-
-import org.apache.hadoop.fs.Path
-import org.scalatest.{BeforeAndAfter, FunSuite}
-
-import org.apache.spark.SparkConf
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.io.CompressionCodec
-
-/**
- * Test writing files through the FileLogger.
- */
-class FileLoggerSuite extends FunSuite with BeforeAndAfter {
-  private val fileSystem = Utils.getHadoopFileSystem("/",
-    SparkHadoopUtil.get.newConfiguration(new SparkConf()))
-  private val allCompressionCodecs = Seq[String](
-    "org.apache.spark.io.LZFCompressionCodec",
-    "org.apache.spark.io.SnappyCompressionCodec"
-  )
-  private var testDir: File = _
-  private var logDirPath: Path = _
-  private var logDirPathString: String = _
-
-  before {
-    testDir = Utils.createTempDir()
-    logDirPath = Utils.getFilePath(testDir, "test-file-logger")
-    logDirPathString = logDirPath.toString
-  }
-
-  after {
-    Utils.deleteRecursively(testDir)
-  }
-
-  test("Simple logging") {
-    testSingleFile()
-  }
-
-  test ("Simple logging with compression") {
-    allCompressionCodecs.foreach { codec =>
-      testSingleFile(Some(codec))
-    }
-  }
-
-  test("Logging multiple files") {
-    testMultipleFiles()
-  }
-
-  test("Logging multiple files with compression") {
-    allCompressionCodecs.foreach { codec =>
-      testMultipleFiles(Some(codec))
-    }
-  }
-
-  test("Logging when directory already exists") {
-    // Create the logging directory multiple times
-    new FileLogger(logDirPathString, new SparkConf, compress = false, overwrite = true).start()
-    new FileLogger(logDirPathString, new SparkConf, compress = false, overwrite = true).start()
-    new FileLogger(logDirPathString, new SparkConf, compress = false, overwrite = true).start()
-
-    // If overwrite is not enabled, an exception should be thrown
-    intercept[IOException] {
-      new FileLogger(logDirPathString, new SparkConf, compress = false, overwrite = false).start()
-    }
-  }
-
-
-  /* ----------------- *
-   * Actual test logic *
-   * ----------------- */
-
-  /**
-   * Test logging to a single file.
-   */
-  private def testSingleFile(codecName: Option[String] = None) {
-    val conf = getLoggingConf(codecName)
-    val codec = codecName.map { c => CompressionCodec.createCodec(conf) }
-    val logger =
-      if (codecName.isDefined) {
-        new FileLogger(logDirPathString, conf, compress = true)
-      } else {
-        new FileLogger(logDirPathString, conf)
-      }
-    logger.start()
-    assert(fileSystem.exists(logDirPath))
-    assert(fileSystem.getFileStatus(logDirPath).isDir)
-    assert(fileSystem.listStatus(logDirPath).size === 0)
-
-    logger.newFile()
-    val files = fileSystem.listStatus(logDirPath)
-    assert(files.size === 1)
-    val firstFile = files.head
-    val firstFilePath = firstFile.getPath
-
-    logger.log("hello")
-    logger.flush()
-    assert(readFileContent(firstFilePath, codec) === "hello")
-
-    logger.log(" world")
-    logger.close()
-    assert(readFileContent(firstFilePath, codec) === "hello world")
-  }
-
-  /**
-   * Test logging to multiple files.
-   */
-  private def testMultipleFiles(codecName: Option[String] = None) {
-    val conf = getLoggingConf(codecName)
-    val codec = codecName.map { c => CompressionCodec.createCodec(conf) }
-    val logger =
-      if (codecName.isDefined) {
-        new FileLogger(logDirPathString, conf, compress = true)
-      } else {
-        new FileLogger(logDirPathString, conf)
-      }
-    logger.start()
-    logger.newFile("Jean_Valjean")
-    logger.logLine("Who am I?")
-    logger.logLine("Destiny?")
-    logger.newFile("John_Valjohn")
-    logger.logLine("One")
-    logger.logLine("Two three...")
-    logger.newFile("Wolverine")
-    logger.logLine("There was a time")
-    logger.logLine("A time when our enemies knew honor.")
-    logger.close()
-    assert(readFileContent(new Path(logDirPath, "Jean_Valjean"), codec) === "Who am I?\nDestiny?")
-    assert(readFileContent(new Path(logDirPath, "John_Valjohn"), codec) === "One\nTwo three...")
-    assert(readFileContent(new Path(logDirPath, "Wolverine"), codec) ===
-      "There was a time\nA time when our enemies knew honor.")
-  }
-
-  /**
-   * Read the content of the file specified by the given path.
-   * If a compression codec is specified, use it to read the file.
-   */
-  private def readFileContent(logPath: Path, codec: Option[CompressionCodec] = None): String = {
-    val fstream = fileSystem.open(logPath)
-    val cstream = codec.map(_.compressedInputStream(fstream)).getOrElse(fstream)
-    Source.fromInputStream(cstream).getLines().mkString("\n")
-  }
-
-  private def getLoggingConf(codecName: Option[String]) = {
-    val conf = new SparkConf
-    codecName.foreach { c => conf.set("spark.io.compression.codec", c) }
-    conf
-  }
-
-}
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index f1f88c5fd3634..6577ebaa2e9a8 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -19,11 +19,11 @@ package org.apache.spark.util
 
 import java.util.Properties
 
+import org.apache.spark.scheduler.cluster.ExecutorInfo
+import org.apache.spark.shuffle.MetadataFetchFailedException
+
 import scala.collection.Map
 
-import org.json4s.DefaultFormats
-import org.json4s.JsonDSL._
-import org.json4s.JsonAST._
 import org.json4s.jackson.JsonMethods._
 import org.scalatest.FunSuite
 
@@ -34,6 +34,12 @@ import org.apache.spark.storage._
 
 class JsonProtocolSuite extends FunSuite {
 
+  val jobSubmissionTime = 1421191042750L
+  val jobCompletionTime = 1421191296660L
+
+  val executorAddedTime = 1421458410000L
+  val executorRemovedTime = 1421458922000L
+
   test("SparkListenerEvent") {
     val stageSubmitted =
       SparkListenerStageSubmitted(makeStageInfo(100, 200, 300, 400L, 500L), properties)
@@ -43,12 +49,20 @@ class JsonProtocolSuite extends FunSuite {
       SparkListenerTaskGettingResult(makeTaskInfo(1000L, 2000, 5, 3000L, true))
     val taskEnd = SparkListenerTaskEnd(1, 0, "ShuffleMapTask", Success,
       makeTaskInfo(123L, 234, 67, 345L, false),
-      makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, hasHadoopInput = false))
+      makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, hasHadoopInput = false, hasOutput = false))
     val taskEndWithHadoopInput = SparkListenerTaskEnd(1, 0, "ShuffleMapTask", Success,
       makeTaskInfo(123L, 234, 67, 345L, false),
-      makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, hasHadoopInput = true))
-    val jobStart = SparkListenerJobStart(10, Seq[Int](1, 2, 3, 4), properties)
-    val jobEnd = SparkListenerJobEnd(20, JobSucceeded)
+      makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, hasHadoopInput = true, hasOutput = false))
+    val taskEndWithOutput = SparkListenerTaskEnd(1, 0, "ResultTask", Success,
+      makeTaskInfo(123L, 234, 67, 345L, false),
+      makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, hasHadoopInput = true, hasOutput = true))
+    val jobStart = {
+      val stageIds = Seq[Int](1, 2, 3, 4)
+      val stageInfos = stageIds.map(x =>
+        makeStageInfo(x, x * 200, x * 300, x * 400L, x * 500L))
+      SparkListenerJobStart(10, jobSubmissionTime, stageInfos, properties)
+    }
+    val jobEnd = SparkListenerJobEnd(20, jobCompletionTime, JobSucceeded)
     val environmentUpdate = SparkListenerEnvironmentUpdate(Map[String, Seq[(String, String)]](
       "JVM Information" -> Seq(("GC speed", "9999 objects/s"), ("Java home", "Land of coffee")),
       "Spark Properties" -> Seq(("Job throughput", "80000 jobs/s, regardless of job type")),
@@ -62,6 +76,9 @@ class JsonProtocolSuite extends FunSuite {
     val unpersistRdd = SparkListenerUnpersistRDD(12345)
     val applicationStart = SparkListenerApplicationStart("The winner of all", None, 42L, "Garfield")
     val applicationEnd = SparkListenerApplicationEnd(42L)
+    val executorAdded = SparkListenerExecutorAdded(executorAddedTime, "exec1",
+      new ExecutorInfo("Hostee.awesome.com", 11))
+    val executorRemoved = SparkListenerExecutorRemoved(executorRemovedTime, "exec2", "test reason")
 
     testEvent(stageSubmitted, stageSubmittedJsonString)
     testEvent(stageCompleted, stageCompletedJsonString)
@@ -69,6 +86,7 @@ class JsonProtocolSuite extends FunSuite {
     testEvent(taskGettingResult, taskGettingResultJsonString)
     testEvent(taskEnd, taskEndJsonString)
     testEvent(taskEndWithHadoopInput, taskEndWithHadoopInputJsonString)
+    testEvent(taskEndWithOutput, taskEndWithOutputJsonString)
     testEvent(jobStart, jobStartJsonString)
     testEvent(jobEnd, jobEndJsonString)
     testEvent(environmentUpdate, environmentUpdateJsonString)
@@ -77,14 +95,18 @@ class JsonProtocolSuite extends FunSuite {
     testEvent(unpersistRdd, unpersistRDDJsonString)
     testEvent(applicationStart, applicationStartJsonString)
     testEvent(applicationEnd, applicationEndJsonString)
+    testEvent(executorAdded, executorAddedJsonString)
+    testEvent(executorRemoved, executorRemovedJsonString)
   }
 
   test("Dependent Classes") {
     testRDDInfo(makeRddInfo(2, 3, 4, 5L, 6L))
     testStageInfo(makeStageInfo(10, 20, 30, 40L, 50L))
     testTaskInfo(makeTaskInfo(999L, 888, 55, 777L, false))
-    testTaskMetrics(makeTaskMetrics(33333L, 44444L, 55555L, 66666L, 7, 8, hasHadoopInput = false))
+    testTaskMetrics(makeTaskMetrics(
+      33333L, 44444L, 55555L, 66666L, 7, 8, hasHadoopInput = false, hasOutput = false))
     testBlockManagerId(BlockManagerId("Hong", "Kong", 500))
+    testExecutorInfo(new ExecutorInfo("host", 43))
 
     // StorageLevel
     testStorageLevel(StorageLevel.NONE)
@@ -107,15 +129,19 @@ class JsonProtocolSuite extends FunSuite {
     testJobResult(jobFailed)
 
     // TaskEndReason
-    val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 18, 19)
-    val exceptionFailure = ExceptionFailure("To be", "or not to be", stackTrace, None)
+    val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 18, 19,
+      "Some exception")
+    val fetchMetadataFailed = new MetadataFetchFailedException(17,
+      19, "metadata Fetch failed exception").toTaskEndReason
+    val exceptionFailure = new ExceptionFailure(exception, None)
     testTaskEndReason(Success)
     testTaskEndReason(Resubmitted)
     testTaskEndReason(fetchFailed)
+    testTaskEndReason(fetchMetadataFailed)
     testTaskEndReason(exceptionFailure)
     testTaskEndReason(TaskResultLost)
     testTaskEndReason(TaskKilled)
-    testTaskEndReason(ExecutorLostFailure)
+    testTaskEndReason(ExecutorLostFailure("100"))
     testTaskEndReason(UnknownReason)
 
     // BlockId
@@ -126,6 +152,13 @@ class JsonProtocolSuite extends FunSuite {
     testBlockId(StreamBlockId(1, 2L))
   }
 
+  test("ExceptionFailure backward compatibility") {
+    val exceptionFailure = ExceptionFailure("To be", "or not to be", stackTrace, null, None)
+    val oldEvent = JsonProtocol.taskEndReasonToJson(exceptionFailure)
+      .removeField({ _._1 == "Full Stack Trace" })
+    assertEquals(exceptionFailure, JsonProtocol.taskEndReasonFromJson(oldEvent))
+  }
+
   test("StageInfo backward compatibility") {
     val info = makeStageInfo(1, 2, 3, 4L, 5L)
     val newJson = JsonProtocol.stageInfoToJson(info)
@@ -146,7 +179,7 @@ class JsonProtocolSuite extends FunSuite {
 
   test("InputMetrics backward compatibility") {
     // InputMetrics were added after 1.0.1.
-    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, hasHadoopInput = true)
+    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, hasHadoopInput = true, hasOutput = false)
     assert(metrics.inputMetrics.nonEmpty)
     val newJson = JsonProtocol.taskMetricsToJson(metrics)
     val oldJson = newJson.removeField { case (field, _) => field == "Input Metrics" }
@@ -154,6 +187,16 @@ class JsonProtocolSuite extends FunSuite {
     assert(newMetrics.inputMetrics.isEmpty)
   }
 
+  test("OutputMetrics backward compatibility") {
+    // OutputMetrics were added after 1.1
+    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, hasHadoopInput = false, hasOutput = true)
+    assert(metrics.outputMetrics.nonEmpty)
+    val newJson = JsonProtocol.taskMetricsToJson(metrics)
+    val oldJson = newJson.removeField { case (field, _) => field == "Output Metrics" }
+    val newMetrics = JsonProtocol.taskMetricsFromJson(oldJson)
+    assert(newMetrics.outputMetrics.isEmpty)
+  }
+
   test("BlockManager events backward compatibility") {
     // SparkListenerBlockManagerAdded/Removed in Spark 1.0.0 do not have a "time" property.
     val blockManagerAdded = SparkListenerBlockManagerAdded(1L,
@@ -176,6 +219,17 @@ class JsonProtocolSuite extends FunSuite {
       deserializedBmRemoved)
   }
 
+  test("FetchFailed backwards compatibility") {
+    // FetchFailed in Spark 1.1.0 does not have an "Message" property.
+    val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 18, 19,
+      "ignored")
+    val oldEvent = JsonProtocol.taskEndReasonToJson(fetchFailed)
+      .removeField({ _._1 == "Message" })
+    val expectedFetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 18, 19,
+      "Unknown reason")
+    assert(expectedFetchFailed === JsonProtocol.taskEndReasonFromJson(oldEvent))
+  }
+
   test("SparkListenerApplicationStart backwards compatibility") {
     // SparkListenerApplicationStart in Spark 1.0.0 do not have an "appId" property.
     val applicationStart = SparkListenerApplicationStart("test", None, 1L, "user")
@@ -184,6 +238,46 @@ class JsonProtocolSuite extends FunSuite {
     assert(applicationStart === JsonProtocol.applicationStartFromJson(oldEvent))
   }
 
+  test("ExecutorLostFailure backward compatibility") {
+    // ExecutorLostFailure in Spark 1.1.0 does not have an "Executor ID" property.
+    val executorLostFailure = ExecutorLostFailure("100")
+    val oldEvent = JsonProtocol.taskEndReasonToJson(executorLostFailure)
+      .removeField({ _._1 == "Executor ID" })
+    val expectedExecutorLostFailure = ExecutorLostFailure("Unknown")
+    assert(expectedExecutorLostFailure === JsonProtocol.taskEndReasonFromJson(oldEvent))
+  }
+
+  test("SparkListenerJobStart backward compatibility") {
+    // Prior to Spark 1.2.0, SparkListenerJobStart did not have a "Stage Infos" property.
+    val stageIds = Seq[Int](1, 2, 3, 4)
+    val stageInfos = stageIds.map(x => makeStageInfo(x, x * 200, x * 300, x * 400, x * 500))
+    val dummyStageInfos =
+      stageIds.map(id => new StageInfo(id, 0, "unknown", 0, Seq.empty, "unknown"))
+    val jobStart = SparkListenerJobStart(10, jobSubmissionTime, stageInfos, properties)
+    val oldEvent = JsonProtocol.jobStartToJson(jobStart).removeField({_._1 == "Stage Infos"})
+    val expectedJobStart =
+      SparkListenerJobStart(10, jobSubmissionTime, dummyStageInfos, properties)
+    assertEquals(expectedJobStart, JsonProtocol.jobStartFromJson(oldEvent))
+  }
+
+  test("SparkListenerJobStart and SparkListenerJobEnd backward compatibility") {
+    // Prior to Spark 1.3.0, SparkListenerJobStart did not have a "Submission Time" property.
+    // Also, SparkListenerJobEnd did not have a "Completion Time" property.
+    val stageIds = Seq[Int](1, 2, 3, 4)
+    val stageInfos = stageIds.map(x => makeStageInfo(x * 10, x * 20, x * 30, x * 40, x * 50))
+    val jobStart = SparkListenerJobStart(11, jobSubmissionTime, stageInfos, properties)
+    val oldStartEvent = JsonProtocol.jobStartToJson(jobStart)
+      .removeField({ _._1 == "Submission Time"})
+    val expectedJobStart = SparkListenerJobStart(11, -1, stageInfos, properties)
+    assertEquals(expectedJobStart, JsonProtocol.jobStartFromJson(oldStartEvent))
+
+    val jobEnd = SparkListenerJobEnd(11, jobCompletionTime, JobSucceeded)
+    val oldEndEvent = JsonProtocol.jobEndToJson(jobEnd)
+      .removeField({ _._1 == "Completion Time"})
+    val expectedJobEnd = SparkListenerJobEnd(11, -1, JobSucceeded)
+    assertEquals(expectedJobEnd, JsonProtocol.jobEndFromJson(oldEndEvent))
+  }
+
   /** -------------------------- *
    | Helper test running methods |
    * --------------------------- */
@@ -217,7 +311,7 @@ class JsonProtocolSuite extends FunSuite {
 
   private def testBlockManagerId(id: BlockManagerId) {
     val newId = JsonProtocol.blockManagerIdFromJson(JsonProtocol.blockManagerIdToJson(id))
-    assertEquals(id, newId)
+    assert(id === newId)
   }
 
   private def testTaskInfo(info: TaskInfo) {
@@ -240,6 +334,10 @@ class JsonProtocolSuite extends FunSuite {
     assert(blockId === newBlockId)
   }
 
+  private def testExecutorInfo(info: ExecutorInfo) {
+    val newInfo = JsonProtocol.executorInfoFromJson(JsonProtocol.executorInfoToJson(info))
+    assertEquals(info, newInfo)
+  }
 
   /** -------------------------------- *
    | Util methods for comparing events |
@@ -266,28 +364,19 @@ class JsonProtocolSuite extends FunSuite {
       case (e1: SparkListenerJobStart, e2: SparkListenerJobStart) =>
         assert(e1.jobId === e2.jobId)
         assert(e1.properties === e2.properties)
-        assertSeqEquals(e1.stageIds, e2.stageIds, (i1: Int, i2: Int) => assert(i1 === i2))
+        assert(e1.stageIds === e2.stageIds)
       case (e1: SparkListenerJobEnd, e2: SparkListenerJobEnd) =>
         assert(e1.jobId === e2.jobId)
         assertEquals(e1.jobResult, e2.jobResult)
       case (e1: SparkListenerEnvironmentUpdate, e2: SparkListenerEnvironmentUpdate) =>
         assertEquals(e1.environmentDetails, e2.environmentDetails)
-      case (e1: SparkListenerBlockManagerAdded, e2: SparkListenerBlockManagerAdded) =>
-        assert(e1.maxMem === e2.maxMem)
-        assert(e1.time === e2.time)
-        assertEquals(e1.blockManagerId, e2.blockManagerId)
-      case (e1: SparkListenerBlockManagerRemoved, e2: SparkListenerBlockManagerRemoved) =>
-        assert(e1.time === e2.time)
-        assertEquals(e1.blockManagerId, e2.blockManagerId)
-      case (e1: SparkListenerUnpersistRDD, e2: SparkListenerUnpersistRDD) =>
-        assert(e1.rddId == e2.rddId)
-      case (e1: SparkListenerApplicationStart, e2: SparkListenerApplicationStart) =>
-        assert(e1.appName == e2.appName)
-        assert(e1.time == e2.time)
-        assert(e1.sparkUser == e2.sparkUser)
-      case (e1: SparkListenerApplicationEnd, e2: SparkListenerApplicationEnd) =>
-        assert(e1.time == e2.time)
-      case (SparkListenerShutdown, SparkListenerShutdown) =>
+      case (e1: SparkListenerExecutorAdded, e2: SparkListenerExecutorAdded) =>
+        assert(e1.executorId == e1.executorId)
+        assertEquals(e1.executorInfo, e2.executorInfo)
+      case (e1: SparkListenerExecutorRemoved, e2: SparkListenerExecutorRemoved) =>
+        assert(e1.executorId == e1.executorId)
+      case (e1, e2) =>
+        assert(e1 === e2)
       case _ => fail("Events don't match in types!")
     }
   }
@@ -338,6 +427,11 @@ class JsonProtocolSuite extends FunSuite {
     assert(info1.accumulables === info2.accumulables)
   }
 
+  private def assertEquals(info1: ExecutorInfo, info2: ExecutorInfo) {
+    assert(info1.executorHost == info2.executorHost)
+    assert(info1.totalCores == info2.totalCores)
+  }
+
   private def assertEquals(metrics1: TaskMetrics, metrics2: TaskMetrics) {
     assert(metrics1.hostname === metrics2.hostname)
     assert(metrics1.executorDeserializeTime === metrics2.executorDeserializeTime)
@@ -372,12 +466,6 @@ class JsonProtocolSuite extends FunSuite {
     assert(metrics1.bytesRead === metrics2.bytesRead)
   }
 
-  private def assertEquals(bm1: BlockManagerId, bm2: BlockManagerId) {
-    assert(bm1.executorId === bm2.executorId)
-    assert(bm1.host === bm2.host)
-    assert(bm1.port === bm2.port)
-  }
-
   private def assertEquals(result1: JobResult, result2: JobResult) {
     (result1, result2) match {
       case (JobSucceeded, JobSucceeded) =>
@@ -395,15 +483,18 @@ class JsonProtocolSuite extends FunSuite {
         assert(r1.shuffleId === r2.shuffleId)
         assert(r1.mapId === r2.mapId)
         assert(r1.reduceId === r2.reduceId)
-        assertEquals(r1.bmAddress, r2.bmAddress)
+        assert(r1.bmAddress === r2.bmAddress)
+        assert(r1.message === r2.message)
       case (r1: ExceptionFailure, r2: ExceptionFailure) =>
         assert(r1.className === r2.className)
         assert(r1.description === r2.description)
         assertSeqEquals(r1.stackTrace, r2.stackTrace, assertStackTraceElementEquals)
+        assert(r1.fullStackTrace === r2.fullStackTrace)
         assertOptionEquals(r1.metrics, r2.metrics, assertTaskMetricsEquals)
       case (TaskResultLost, TaskResultLost) =>
       case (TaskKilled, TaskKilled) =>
-      case (ExecutorLostFailure, ExecutorLostFailure) =>
+      case (ExecutorLostFailure(execId1), ExecutorLostFailure(execId2)) =>
+        assert(execId1 === execId2)
       case (UnknownReason, UnknownReason) =>
       case _ => fail("Task end reasons don't match in types!")
     }
@@ -550,32 +641,39 @@ class JsonProtocolSuite extends FunSuite {
       d: Long,
       e: Int,
       f: Int,
-      hasHadoopInput: Boolean) = {
+      hasHadoopInput: Boolean,
+      hasOutput: Boolean) = {
     val t = new TaskMetrics
-    val sw = new ShuffleWriteMetrics
-    t.hostname = "localhost"
-    t.executorDeserializeTime = a
-    t.executorRunTime = b
-    t.resultSize = c
-    t.jvmGCTime = d
-    t.resultSerializationTime = a + b
-    t.memoryBytesSpilled = a + c
+    t.setHostname("localhost")
+    t.setExecutorDeserializeTime(a)
+    t.setExecutorRunTime(b)
+    t.setResultSize(c)
+    t.setJvmGCTime(d)
+    t.setResultSerializationTime(a + b)
+    t.incMemoryBytesSpilled(a + c)
 
     if (hasHadoopInput) {
       val inputMetrics = new InputMetrics(DataReadMethod.Hadoop)
-      inputMetrics.bytesRead = d + e + f
-      t.inputMetrics = Some(inputMetrics)
+      inputMetrics.addBytesRead(d + e + f)
+      t.setInputMetrics(Some(inputMetrics))
     } else {
       val sr = new ShuffleReadMetrics
-      sr.remoteBytesRead = b + d
-      sr.localBlocksFetched = e
-      sr.fetchWaitTime = a + d
-      sr.remoteBlocksFetched = f
+      sr.incRemoteBytesRead(b + d)
+      sr.incLocalBlocksFetched(e)
+      sr.incFetchWaitTime(a + d)
+      sr.incRemoteBlocksFetched(f)
       t.setShuffleReadMetrics(Some(sr))
     }
-    sw.shuffleBytesWritten = a + b + c
-    sw.shuffleWriteTime = b + c + d
-    t.shuffleWriteMetrics = Some(sw)
+    if (hasOutput) {
+      val outputMetrics = new OutputMetrics(DataWriteMethod.Hadoop)
+      outputMetrics.setBytesWritten(a + b + c)
+      t.outputMetrics = Some(outputMetrics)
+    } else {
+      val sw = new ShuffleWriteMetrics
+      sw.incShuffleBytesWritten(a + b + c)
+      sw.incShuffleWriteTime(b + c + d)
+      t.shuffleWriteMetrics = Some(sw)
+    }
     // Make at most 6 blocks
     t.updatedBlocks = Some((1 to (e % 5 + 1)).map { i =>
       (RDDBlockId(e % i, f % i), BlockStatus(StorageLevel.MEMORY_AND_DISK_SER_2, a % i, b % i, c%i))
@@ -915,11 +1013,347 @@ class JsonProtocolSuite extends FunSuite {
       |}
     """
 
+  private val taskEndWithOutputJsonString =
+    """
+      |{
+      |  "Event": "SparkListenerTaskEnd",
+      |  "Stage ID": 1,
+      |  "Stage Attempt ID": 0,
+      |  "Task Type": "ResultTask",
+      |  "Task End Reason": {
+      |    "Reason": "Success"
+      |  },
+      |  "Task Info": {
+      |    "Task ID": 123,
+      |    "Index": 234,
+      |    "Attempt": 67,
+      |    "Launch Time": 345,
+      |    "Executor ID": "executor",
+      |    "Host": "your kind sir",
+      |    "Locality": "NODE_LOCAL",
+      |    "Speculative": false,
+      |    "Getting Result Time": 0,
+      |    "Finish Time": 0,
+      |    "Failed": false,
+      |    "Accumulables": [
+      |      {
+      |        "ID": 1,
+      |        "Name": "Accumulable1",
+      |        "Update": "delta1",
+      |        "Value": "val1"
+      |      },
+      |      {
+      |        "ID": 2,
+      |        "Name": "Accumulable2",
+      |        "Update": "delta2",
+      |        "Value": "val2"
+      |      },
+      |      {
+      |        "ID": 3,
+      |        "Name": "Accumulable3",
+      |        "Update": "delta3",
+      |        "Value": "val3"
+      |      }
+      |    ]
+      |  },
+      |  "Task Metrics": {
+      |    "Host Name": "localhost",
+      |    "Executor Deserialize Time": 300,
+      |    "Executor Run Time": 400,
+      |    "Result Size": 500,
+      |    "JVM GC Time": 600,
+      |    "Result Serialization Time": 700,
+      |    "Memory Bytes Spilled": 800,
+      |    "Disk Bytes Spilled": 0,
+      |    "Input Metrics": {
+      |      "Data Read Method": "Hadoop",
+      |      "Bytes Read": 2100
+      |    },
+      |    "Output Metrics": {
+      |      "Data Write Method": "Hadoop",
+      |      "Bytes Written": 1200
+      |    },
+      |    "Updated Blocks": [
+      |      {
+      |        "Block ID": "rdd_0_0",
+      |        "Status": {
+      |          "Storage Level": {
+      |            "Use Disk": true,
+      |            "Use Memory": true,
+      |            "Use Tachyon": false,
+      |            "Deserialized": false,
+      |            "Replication": 2
+      |          },
+      |          "Memory Size": 0,
+      |          "Tachyon Size": 0,
+      |          "Disk Size": 0
+      |        }
+      |      }
+      |    ]
+      |  }
+      |}
+    """
+
   private val jobStartJsonString =
     """
       |{
       |  "Event": "SparkListenerJobStart",
       |  "Job ID": 10,
+      |  "Submission Time": 1421191042750,
+      |  "Stage Infos": [
+      |    {
+      |      "Stage ID": 1,
+      |      "Stage Attempt ID": 0,
+      |      "Stage Name": "greetings",
+      |      "Number of Tasks": 200,
+      |      "RDD Info": [
+      |        {
+      |          "RDD ID": 1,
+      |          "Name": "mayor",
+      |          "Storage Level": {
+      |            "Use Disk": true,
+      |            "Use Memory": true,
+      |            "Use Tachyon": false,
+      |            "Deserialized": true,
+      |            "Replication": 1
+      |          },
+      |          "Number of Partitions": 200,
+      |          "Number of Cached Partitions": 300,
+      |          "Memory Size": 400,
+      |          "Tachyon Size": 0,
+      |          "Disk Size": 500
+      |        }
+      |      ],
+      |      "Details": "details",
+      |      "Accumulables": [
+      |        {
+      |          "ID": 2,
+      |          "Name": " Accumulable 2",
+      |          "Update": "delta2",
+      |          "Value": "val2"
+      |        },
+      |        {
+      |          "ID": 1,
+      |          "Name": " Accumulable 1",
+      |          "Update": "delta1",
+      |          "Value": "val1"
+      |        }
+      |      ]
+      |    },
+      |    {
+      |      "Stage ID": 2,
+      |      "Stage Attempt ID": 0,
+      |      "Stage Name": "greetings",
+      |      "Number of Tasks": 400,
+      |      "RDD Info": [
+      |        {
+      |          "RDD ID": 2,
+      |          "Name": "mayor",
+      |          "Storage Level": {
+      |            "Use Disk": true,
+      |            "Use Memory": true,
+      |            "Use Tachyon": false,
+      |            "Deserialized": true,
+      |            "Replication": 1
+      |          },
+      |          "Number of Partitions": 400,
+      |          "Number of Cached Partitions": 600,
+      |          "Memory Size": 800,
+      |          "Tachyon Size": 0,
+      |          "Disk Size": 1000
+      |        },
+      |        {
+      |          "RDD ID": 3,
+      |          "Name": "mayor",
+      |          "Storage Level": {
+      |            "Use Disk": true,
+      |            "Use Memory": true,
+      |            "Use Tachyon": false,
+      |            "Deserialized": true,
+      |            "Replication": 1
+      |          },
+      |          "Number of Partitions": 401,
+      |          "Number of Cached Partitions": 601,
+      |          "Memory Size": 801,
+      |          "Tachyon Size": 0,
+      |          "Disk Size": 1001
+      |        }
+      |      ],
+      |      "Details": "details",
+      |      "Accumulables": [
+      |        {
+      |          "ID": 2,
+      |          "Name": " Accumulable 2",
+      |          "Update": "delta2",
+      |          "Value": "val2"
+      |        },
+      |        {
+      |          "ID": 1,
+      |          "Name": " Accumulable 1",
+      |          "Update": "delta1",
+      |          "Value": "val1"
+      |        }
+      |      ]
+      |    },
+      |    {
+      |      "Stage ID": 3,
+      |      "Stage Attempt ID": 0,
+      |      "Stage Name": "greetings",
+      |      "Number of Tasks": 600,
+      |      "RDD Info": [
+      |        {
+      |          "RDD ID": 3,
+      |          "Name": "mayor",
+      |          "Storage Level": {
+      |            "Use Disk": true,
+      |            "Use Memory": true,
+      |            "Use Tachyon": false,
+      |            "Deserialized": true,
+      |            "Replication": 1
+      |          },
+      |          "Number of Partitions": 600,
+      |          "Number of Cached Partitions": 900,
+      |          "Memory Size": 1200,
+      |          "Tachyon Size": 0,
+      |          "Disk Size": 1500
+      |        },
+      |        {
+      |          "RDD ID": 4,
+      |          "Name": "mayor",
+      |          "Storage Level": {
+      |            "Use Disk": true,
+      |            "Use Memory": true,
+      |            "Use Tachyon": false,
+      |            "Deserialized": true,
+      |            "Replication": 1
+      |          },
+      |          "Number of Partitions": 601,
+      |          "Number of Cached Partitions": 901,
+      |          "Memory Size": 1201,
+      |          "Tachyon Size": 0,
+      |          "Disk Size": 1501
+      |        },
+      |        {
+      |          "RDD ID": 5,
+      |          "Name": "mayor",
+      |          "Storage Level": {
+      |            "Use Disk": true,
+      |            "Use Memory": true,
+      |            "Use Tachyon": false,
+      |            "Deserialized": true,
+      |            "Replication": 1
+      |          },
+      |          "Number of Partitions": 602,
+      |          "Number of Cached Partitions": 902,
+      |          "Memory Size": 1202,
+      |          "Tachyon Size": 0,
+      |          "Disk Size": 1502
+      |        }
+      |      ],
+      |      "Details": "details",
+      |      "Accumulables": [
+      |        {
+      |          "ID": 2,
+      |          "Name": " Accumulable 2",
+      |          "Update": "delta2",
+      |          "Value": "val2"
+      |        },
+      |        {
+      |          "ID": 1,
+      |          "Name": " Accumulable 1",
+      |          "Update": "delta1",
+      |          "Value": "val1"
+      |        }
+      |      ]
+      |    },
+      |    {
+      |      "Stage ID": 4,
+      |      "Stage Attempt ID": 0,
+      |      "Stage Name": "greetings",
+      |      "Number of Tasks": 800,
+      |      "RDD Info": [
+      |        {
+      |          "RDD ID": 4,
+      |          "Name": "mayor",
+      |          "Storage Level": {
+      |            "Use Disk": true,
+      |            "Use Memory": true,
+      |            "Use Tachyon": false,
+      |            "Deserialized": true,
+      |            "Replication": 1
+      |          },
+      |          "Number of Partitions": 800,
+      |          "Number of Cached Partitions": 1200,
+      |          "Memory Size": 1600,
+      |          "Tachyon Size": 0,
+      |          "Disk Size": 2000
+      |        },
+      |        {
+      |          "RDD ID": 5,
+      |          "Name": "mayor",
+      |          "Storage Level": {
+      |            "Use Disk": true,
+      |            "Use Memory": true,
+      |            "Use Tachyon": false,
+      |            "Deserialized": true,
+      |            "Replication": 1
+      |          },
+      |          "Number of Partitions": 801,
+      |          "Number of Cached Partitions": 1201,
+      |          "Memory Size": 1601,
+      |          "Tachyon Size": 0,
+      |          "Disk Size": 2001
+      |        },
+      |        {
+      |          "RDD ID": 6,
+      |          "Name": "mayor",
+      |          "Storage Level": {
+      |            "Use Disk": true,
+      |            "Use Memory": true,
+      |            "Use Tachyon": false,
+      |            "Deserialized": true,
+      |            "Replication": 1
+      |          },
+      |          "Number of Partitions": 802,
+      |          "Number of Cached Partitions": 1202,
+      |          "Memory Size": 1602,
+      |          "Tachyon Size": 0,
+      |          "Disk Size": 2002
+      |        },
+      |        {
+      |          "RDD ID": 7,
+      |          "Name": "mayor",
+      |          "Storage Level": {
+      |            "Use Disk": true,
+      |            "Use Memory": true,
+      |            "Use Tachyon": false,
+      |            "Deserialized": true,
+      |            "Replication": 1
+      |          },
+      |          "Number of Partitions": 803,
+      |          "Number of Cached Partitions": 1203,
+      |          "Memory Size": 1603,
+      |          "Tachyon Size": 0,
+      |          "Disk Size": 2003
+      |        }
+      |      ],
+      |      "Details": "details",
+      |      "Accumulables": [
+      |        {
+      |          "ID": 2,
+      |          "Name": " Accumulable 2",
+      |          "Update": "delta2",
+      |          "Value": "val2"
+      |        },
+      |        {
+      |          "ID": 1,
+      |          "Name": " Accumulable 1",
+      |          "Update": "delta1",
+      |          "Value": "val1"
+      |        }
+      |      ]
+      |    }
+      |  ],
       |  "Stage IDs": [
       |    1,
       |    2,
@@ -940,6 +1374,7 @@ class JsonProtocolSuite extends FunSuite {
       |{
       |  "Event": "SparkListenerJobEnd",
       |  "Job ID": 20,
+      |  "Completion Time": 1421191296660,
       |  "Job Result": {
       |    "Result": "JobSucceeded"
       |  }
@@ -1019,4 +1454,27 @@ class JsonProtocolSuite extends FunSuite {
       |  "Timestamp": 42
       |}
     """
+
+  private val executorAddedJsonString =
+    s"""
+      |{
+      |  "Event": "SparkListenerExecutorAdded",
+      |  "Timestamp": ${executorAddedTime},
+      |  "Executor ID": "exec1",
+      |  "Executor Info": {
+      |    "Host": "Hostee.awesome.com",
+      |    "Total Cores": 11
+      |  }
+      |}
+    """
+
+  private val executorRemovedJsonString =
+    s"""
+      |{
+      |  "Event": "SparkListenerExecutorRemoved",
+      |  "Timestamp": ${executorRemovedTime},
+      |  "Executor ID": "exec2",
+      |  "Removed Reason": "test reason"
+      |}
+    """
 }
diff --git a/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala b/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala
new file mode 100644
index 0000000000000..bad1aa99952cf
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.util.Properties
+
+import org.apache.commons.lang3.SerializationUtils
+import org.scalatest.{BeforeAndAfterEach, Suite}
+
+/**
+ * Mixin for automatically resetting system properties that are modified in ScalaTest tests.
+ * This resets the properties after each individual test.
+ *
+ * The order in which fixtures are mixed in affects the order in which they are invoked by tests.
+ * If we have a suite `MySuite extends FunSuite with Foo with Bar`, then
+ * Bar's `super` is Foo, so Bar's beforeEach() will and afterEach() methods will be invoked first
+ * by the rest runner.
+ *
+ * This means that ResetSystemProperties should appear as the last trait in test suites that it's
+ * mixed into in order to ensure that the system properties snapshot occurs as early as possible.
+ * ResetSystemProperties calls super.afterEach() before performing its own cleanup, ensuring that
+ * the old properties are restored as late as possible.
+ *
+ * See the "Composing fixtures by stacking traits" section at
+ * http://www.scalatest.org/user_guide/sharing_fixtures for more details about this pattern.
+ */
+private[spark] trait ResetSystemProperties extends BeforeAndAfterEach { this: Suite =>
+  var oldProperties: Properties = null
+
+  override def beforeEach(): Unit = {
+    // we need SerializationUtils.clone instead of `new Properties(System.getProperties()` because
+    // the later way of creating a copy does not copy the properties but it initializes a new
+    // Properties object with the given properties as defaults. They are not recognized at all
+    // by standard Scala wrapper over Java Properties then.
+    oldProperties = SerializationUtils.clone(System.getProperties)
+    super.beforeEach()
+  }
+
+  override def afterEach(): Unit = {
+    try {
+      super.afterEach()
+    } finally {
+      System.setProperties(oldProperties)
+      oldProperties = null
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
index f9d1af88f3a13..7424c2e91d4f2 100644
--- a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
@@ -17,9 +17,7 @@
 
 package org.apache.spark.util
 
-import org.scalatest.BeforeAndAfterAll
-import org.scalatest.FunSuite
-import org.scalatest.PrivateMethodTester
+import org.scalatest.{BeforeAndAfterEach, BeforeAndAfterAll, FunSuite, PrivateMethodTester}
 
 class DummyClass1 {}
 
@@ -46,20 +44,12 @@ class DummyString(val arr: Array[Char]) {
 }
 
 class SizeEstimatorSuite
-  extends FunSuite with BeforeAndAfterAll with PrivateMethodTester {
+  extends FunSuite with BeforeAndAfterEach with PrivateMethodTester with ResetSystemProperties {
 
-  var oldArch: String = _
-  var oldOops: String = _
-
-  override def beforeAll() {
+  override def beforeEach() {
     // Set the arch to 64-bit and compressedOops to true to get a deterministic test-case
-    oldArch = System.setProperty("os.arch", "amd64")
-    oldOops = System.setProperty("spark.test.useCompressedOops", "true")
-  }
-
-  override def afterAll() {
-    resetOrClear("os.arch", oldArch)
-    resetOrClear("spark.test.useCompressedOops", oldOops)
+    System.setProperty("os.arch", "amd64")
+    System.setProperty("spark.test.useCompressedOops", "true")
   }
 
   test("simple classes") {
@@ -118,11 +108,11 @@ class SizeEstimatorSuite
     // TODO: If we sample 100 elements, this should always be 4176 ?
     val estimatedSize = SizeEstimator.estimate(Array.fill(1000)(d1))
     assert(estimatedSize >= 4000, "Estimated size " + estimatedSize + " should be more than 4000")
-    assert(estimatedSize <= 4200, "Estimated size " + estimatedSize + " should be less than 4100")
+    assert(estimatedSize <= 4200, "Estimated size " + estimatedSize + " should be less than 4200")
   }
 
   test("32-bit arch") {
-    val arch = System.setProperty("os.arch", "x86")
+    System.setProperty("os.arch", "x86")
 
     val initialize = PrivateMethod[Unit]('initialize)
     SizeEstimator invokePrivate initialize()
@@ -131,14 +121,13 @@ class SizeEstimatorSuite
     assertResult(48)(SizeEstimator.estimate(DummyString("a")))
     assertResult(48)(SizeEstimator.estimate(DummyString("ab")))
     assertResult(56)(SizeEstimator.estimate(DummyString("abcdefgh")))
-    resetOrClear("os.arch", arch)
   }
 
   // NOTE: The String class definition varies across JDK versions (1.6 vs. 1.7) and vendors
   // (Sun vs IBM). Use a DummyString class to make tests deterministic.
   test("64-bit arch with no compressed oops") {
-    val arch = System.setProperty("os.arch", "amd64")
-    val oops = System.setProperty("spark.test.useCompressedOops", "false")
+    System.setProperty("os.arch", "amd64")
+    System.setProperty("spark.test.useCompressedOops", "false")
     val initialize = PrivateMethod[Unit]('initialize)
     SizeEstimator invokePrivate initialize()
 
@@ -146,16 +135,5 @@ class SizeEstimatorSuite
     assertResult(64)(SizeEstimator.estimate(DummyString("a")))
     assertResult(64)(SizeEstimator.estimate(DummyString("ab")))
     assertResult(72)(SizeEstimator.estimate(DummyString("abcdefgh")))
-
-    resetOrClear("os.arch", arch)
-    resetOrClear("spark.test.useCompressedOops", oops)
-  }
-
-  def resetOrClear(prop: String, oldValue: String) {
-    if (oldValue != null) {
-      System.setProperty(prop, oldValue)
-    } else {
-      System.clearProperty(prop)
-    }
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index ea7ef0524d1e1..4544382094f96 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -22,14 +22,16 @@ import scala.util.Random
 import java.io.{File, ByteArrayOutputStream, ByteArrayInputStream, FileOutputStream}
 import java.net.{BindException, ServerSocket, URI}
 import java.nio.{ByteBuffer, ByteOrder}
+import java.text.DecimalFormatSymbols
+import java.util.Locale
 
-import com.google.common.base.Charsets
+import com.google.common.base.Charsets.UTF_8
 import com.google.common.io.Files
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkConf
 
-class UtilsSuite extends FunSuite {
+class UtilsSuite extends FunSuite with ResetSystemProperties {
 
   test("bytesToString") {
     assert(Utils.bytesToString(10) === "10.0 B")
@@ -103,14 +105,16 @@ class UtilsSuite extends FunSuite {
     val hour = minute * 60
     def str = Utils.msDurationToString(_)
 
+    val sep = new DecimalFormatSymbols(Locale.getDefault()).getDecimalSeparator()
+
     assert(str(123) === "123 ms")
-    assert(str(second) === "1.0 s")
-    assert(str(second + 462) === "1.5 s")
-    assert(str(hour) === "1.00 h")
-    assert(str(minute) === "1.0 m")
-    assert(str(minute + 4 * second + 34) === "1.1 m")
-    assert(str(10 * hour + minute + 4 * second) === "10.02 h")
-    assert(str(10 * hour + 59 * minute + 59 * second + 999) === "11.00 h")
+    assert(str(second) === "1" + sep + "0 s")
+    assert(str(second + 462) === "1" + sep + "5 s")
+    assert(str(hour) === "1" + sep + "00 h")
+    assert(str(minute) === "1" + sep + "0 m")
+    assert(str(minute + 4 * second + 34) === "1" + sep + "1 m")
+    assert(str(10 * hour + minute + 4 * second) === "10" + sep + "02 h")
+    assert(str(10 * hour + 59 * minute + 59 * second + 999) === "11" + sep + "00 h")
   }
 
   test("reading offset bytes of a file") {
@@ -118,7 +122,7 @@ class UtilsSuite extends FunSuite {
     tmpDir2.deleteOnExit()
     val f1Path = tmpDir2 + "/f1"
     val f1 = new FileOutputStream(f1Path)
-    f1.write("1\n2\n3\n4\n5\n6\n7\n8\n9\n".getBytes(Charsets.UTF_8))
+    f1.write("1\n2\n3\n4\n5\n6\n7\n8\n9\n".getBytes(UTF_8))
     f1.close()
 
     // Read first few bytes
@@ -146,9 +150,9 @@ class UtilsSuite extends FunSuite {
     val tmpDir = Utils.createTempDir()
     tmpDir.deleteOnExit()
     val files = (1 to 3).map(i => new File(tmpDir, i.toString))
-    Files.write("0123456789", files(0), Charsets.UTF_8)
-    Files.write("abcdefghij", files(1), Charsets.UTF_8)
-    Files.write("ABCDEFGHIJ", files(2), Charsets.UTF_8)
+    Files.write("0123456789", files(0), UTF_8)
+    Files.write("abcdefghij", files(1), UTF_8)
+    Files.write("ABCDEFGHIJ", files(2), UTF_8)
 
     // Read first few bytes in the 1st file
     assert(Utils.offsetBytes(files, 0, 5) === "01234")
@@ -217,9 +221,14 @@ class UtilsSuite extends FunSuite {
 
   test("resolveURI") {
     def assertResolves(before: String, after: String, testWindows: Boolean = false): Unit = {
-      assume(before.split(",").length == 1)
-      assert(Utils.resolveURI(before, testWindows) === new URI(after))
-      assert(Utils.resolveURI(after, testWindows) === new URI(after))
+      // This should test only single paths
+      assume(before.split(",").length === 1)
+      // Repeated invocations of resolveURI should yield the same result
+      def resolve(uri: String): String = Utils.resolveURI(uri, testWindows).toString
+      assert(resolve(after) === after)
+      assert(resolve(resolve(after)) === after)
+      assert(resolve(resolve(resolve(after))) === after)
+      // Also test resolveURIs with single paths
       assert(new URI(Utils.resolveURIs(before, testWindows)) === new URI(after))
       assert(new URI(Utils.resolveURIs(after, testWindows)) === new URI(after))
     }
@@ -235,16 +244,27 @@ class UtilsSuite extends FunSuite {
     assertResolves("file:/C:/file.txt#alias.txt", "file:/C:/file.txt#alias.txt", testWindows = true)
     intercept[IllegalArgumentException] { Utils.resolveURI("file:foo") }
     intercept[IllegalArgumentException] { Utils.resolveURI("file:foo:baby") }
+  }
 
-    // Test resolving comma-delimited paths
-    assert(Utils.resolveURIs("jar1,jar2") === s"file:$cwd/jar1,file:$cwd/jar2")
-    assert(Utils.resolveURIs("file:/jar1,file:/jar2") === "file:/jar1,file:/jar2")
-    assert(Utils.resolveURIs("hdfs:/jar1,file:/jar2,jar3") ===
-      s"hdfs:/jar1,file:/jar2,file:$cwd/jar3")
-    assert(Utils.resolveURIs("hdfs:/jar1,file:/jar2,jar3,jar4#jar5") ===
+  test("resolveURIs with multiple paths") {
+    def assertResolves(before: String, after: String, testWindows: Boolean = false): Unit = {
+      assume(before.split(",").length > 1)
+      assert(Utils.resolveURIs(before, testWindows) === after)
+      assert(Utils.resolveURIs(after, testWindows) === after)
+      // Repeated invocations of resolveURIs should yield the same result
+      def resolve(uri: String): String = Utils.resolveURIs(uri, testWindows)
+      assert(resolve(after) === after)
+      assert(resolve(resolve(after)) === after)
+      assert(resolve(resolve(resolve(after))) === after)
+    }
+    val cwd = System.getProperty("user.dir")
+    assertResolves("jar1,jar2", s"file:$cwd/jar1,file:$cwd/jar2")
+    assertResolves("file:/jar1,file:/jar2", "file:/jar1,file:/jar2")
+    assertResolves("hdfs:/jar1,file:/jar2,jar3", s"hdfs:/jar1,file:/jar2,file:$cwd/jar3")
+    assertResolves("hdfs:/jar1,file:/jar2,jar3,jar4#jar5",
       s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4#jar5")
-    assert(Utils.resolveURIs("hdfs:/jar1,file:/jar2,jar3,C:\\pi.py#py.pi", testWindows = true) ===
-      s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py#py.pi")
+    assertResolves("hdfs:/jar1,file:/jar2,jar3,C:\\pi.py#py.pi",
+      s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py#py.pi", testWindows = true)
   }
 
   test("nonLocalPaths") {
@@ -284,12 +304,11 @@ class UtilsSuite extends FunSuite {
     assert(!Utils.isBindCollision(new Exception))
     assert(!Utils.isBindCollision(new Exception(new Exception)))
     assert(!Utils.isBindCollision(new Exception(new BindException)))
-    assert(!Utils.isBindCollision(new Exception(new BindException("Random message"))))
 
     // Positives
-    val be = new BindException("Address already in use")
-    val be1 = new Exception(new BindException("Address already in use"))
-    val be2 = new Exception(new Exception(new BindException("Address already in use")))
+    val be = new BindException("Random Message")
+    val be1 = new Exception(new BindException("Random Message"))
+    val be2 = new Exception(new Exception(new BindException("Random Message")))
     assert(Utils.isBindCollision(be))
     assert(Utils.isBindCollision(be1))
     assert(Utils.isBindCollision(be2))
@@ -339,7 +358,7 @@ class UtilsSuite extends FunSuite {
     try {
       System.setProperty("spark.test.fileNameLoadB", "2")
       Files.write("spark.test.fileNameLoadA true\n" +
-        "spark.test.fileNameLoadB 1\n", outFile, Charsets.UTF_8)
+        "spark.test.fileNameLoadB 1\n", outFile, UTF_8)
       val properties = Utils.getPropertiesFromFile(outFile.getAbsolutePath)
       properties
         .filter { case (k, v) => k.startsWith("spark.")}
@@ -351,4 +370,15 @@ class UtilsSuite extends FunSuite {
       outFile.delete()
     }
   }
+
+  test("timeIt with prepare") {
+    var cnt = 0
+    val prepare = () => {
+      cnt += 1
+      Thread.sleep(1000)
+    }
+    val time = Utils.timeIt(2)({}, Some(prepare))
+    require(cnt === 2, "prepare should be called twice")
+    require(time < 500, "preparation time should not count")
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
index 511d76c9144cc..48f79ea651018 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
@@ -22,7 +22,6 @@ import scala.collection.mutable.ArrayBuffer
 import org.scalatest.FunSuite
 
 import org.apache.spark._
-import org.apache.spark.SparkContext._
 import org.apache.spark.io.CompressionCodec
 
 class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext {
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
index f26e40fbd4b36..72d96798b1141 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
@@ -22,7 +22,6 @@ import scala.collection.mutable.ArrayBuffer
 import org.scalatest.{PrivateMethodTester, FunSuite}
 
 import org.apache.spark._
-import org.apache.spark.SparkContext._
 
 import scala.util.Random
 
@@ -127,6 +126,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext with PrivateMe
   test("empty partitions with spilling") {
     val conf = createSparkConf(false)
     conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.spill.initialMemoryThreshold", "512")
     conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
     sc = new SparkContext("local", "test", conf)
 
@@ -152,6 +152,7 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext with PrivateMe
   test("empty partitions with spilling, bypass merge-sort") {
     val conf = createSparkConf(false)
     conf.set("spark.shuffle.memoryFraction", "0.001")
+    conf.set("spark.shuffle.spill.initialMemoryThreshold", "512")
     conf.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.SortShuffleManager")
     sc = new SparkContext("local", "test", conf)
 
diff --git a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
index 6fe1079c2719a..0cb1ed7397655 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.util.collection
 
-import java.lang.{Float => JFloat}
+import java.lang.{Float => JFloat, Integer => JInteger}
 import java.util.{Arrays, Comparator}
 
 import org.scalatest.FunSuite
@@ -30,11 +30,15 @@ class SorterSuite extends FunSuite {
     val rand = new XORShiftRandom(123)
     val data0 = Array.tabulate[Int](10000) { i => rand.nextInt() }
     val data1 = data0.clone()
+    val data2 = data0.clone()
 
     Arrays.sort(data0)
     new Sorter(new IntArraySortDataFormat).sort(data1, 0, data1.length, Ordering.Int)
+    new Sorter(new KeyReuseIntArraySortDataFormat)
+      .sort(data2, 0, data2.length, Ordering[IntWrapper])
 
-    data0.zip(data1).foreach { case (x, y) => assert(x === y) }
+    assert(data0.view === data1.view)
+    assert(data0.view === data2.view)
   }
 
   test("KVArraySorter") {
@@ -61,10 +65,33 @@ class SorterSuite extends FunSuite {
     }
   }
 
+  /** Runs an experiment several times. */
+  def runExperiment(name: String, skip: Boolean = false)(f: => Unit, prepare: () => Unit): Unit = {
+    if (skip) {
+      println(s"Skipped experiment $name.")
+      return
+    }
+
+    val firstTry = org.apache.spark.util.Utils.timeIt(1)(f, Some(prepare))
+    System.gc()
+
+    var i = 0
+    var next10: Long = 0
+    while (i < 10) {
+      val time = org.apache.spark.util.Utils.timeIt(1)(f, Some(prepare))
+      next10 += time
+      println(s"$name: Took $time ms")
+      i += 1
+    }
+
+    println(s"$name: ($firstTry ms first try, ${next10 / 10} ms average)")
+  }
+
   /**
    * This provides a simple benchmark for comparing the Sorter with Java internal sorting.
    * Ideally these would be executed one at a time, each in their own JVM, so their listing
-   * here is mainly to have the code.
+   * here is mainly to have the code. Running multiple tests within the same JVM session would
+   * prevent JIT inlining overridden methods and hence hurt the performance.
    *
    * The goal of this code is to sort an array of key-value pairs, where the array physically
    * has the keys and values alternating. The basic Java sorts work only on the keys, so the
@@ -72,96 +99,167 @@ class SorterSuite extends FunSuite {
    * those, while the Sorter approach can work directly on the input data format.
    *
    * Note that the Java implementation varies tremendously between Java 6 and Java 7, when
-   * the Java sort changed from merge sort to Timsort.
+   * the Java sort changed from merge sort to TimSort.
    */
-  ignore("Sorter benchmark") {
-
-    /** Runs an experiment several times. */
-    def runExperiment(name: String)(f: => Unit): Unit = {
-      val firstTry = org.apache.spark.util.Utils.timeIt(1)(f)
-      System.gc()
-
-      var i = 0
-      var next10: Long = 0
-      while (i < 10) {
-        val time = org.apache.spark.util.Utils.timeIt(1)(f)
-        next10 += time
-        println(s"$name: Took $time ms")
-        i += 1
-      }
-
-      println(s"$name: ($firstTry ms first try, ${next10 / 10} ms average)")
-    }
-
+  ignore("Sorter benchmark for key-value pairs") {
     val numElements = 25000000 // 25 mil
     val rand = new XORShiftRandom(123)
 
-    val keys = Array.tabulate[JFloat](numElements) { i =>
-      new JFloat(rand.nextFloat())
+    // Test our key-value pairs where each element is a Tuple2[Float, Integer].
+
+    val kvTuples = Array.tabulate(numElements) { i =>
+      (new JFloat(rand.nextFloat()), new JInteger(i))
     }
 
-    // Test our key-value pairs where each element is a Tuple2[Float, Integer)
-    val kvTupleArray = Array.tabulate[AnyRef](numElements) { i =>
-      (keys(i / 2): Float, i / 2: Int)
+    val kvTupleArray = new Array[AnyRef](numElements)
+    val prepareKvTupleArray = () => {
+      System.arraycopy(kvTuples, 0, kvTupleArray, 0, numElements)
     }
-    runExperiment("Tuple-sort using Arrays.sort()") {
+    runExperiment("Tuple-sort using Arrays.sort()")({
       Arrays.sort(kvTupleArray, new Comparator[AnyRef] {
         override def compare(x: AnyRef, y: AnyRef): Int =
-          Ordering.Float.compare(x.asInstanceOf[(Float, _)]._1, y.asInstanceOf[(Float, _)]._1)
+          x.asInstanceOf[(JFloat, _)]._1.compareTo(y.asInstanceOf[(JFloat, _)]._1)
       })
-    }
+    }, prepareKvTupleArray)
 
     // Test our Sorter where each element alternates between Float and Integer, non-primitive
-    val keyValueArray = Array.tabulate[AnyRef](numElements * 2) { i =>
-      if (i % 2 == 0) keys(i / 2) else new Integer(i / 2)
+
+    val keyValues = {
+      val data = new Array[AnyRef](numElements * 2)
+      var i = 0
+      while (i < numElements) {
+        data(2 * i) = kvTuples(i)._1
+        data(2 * i + 1) = kvTuples(i)._2
+        i += 1
+      }
+      data
     }
+
+    val keyValueArray = new Array[AnyRef](numElements * 2)
+    val prepareKeyValueArray = () => {
+      System.arraycopy(keyValues, 0, keyValueArray, 0, numElements * 2)
+    }
+
     val sorter = new Sorter(new KVArraySortDataFormat[JFloat, AnyRef])
-    runExperiment("KV-sort using Sorter") {
-      sorter.sort(keyValueArray, 0, keys.length, new Comparator[JFloat] {
-        override def compare(x: JFloat, y: JFloat): Int = Ordering.Float.compare(x, y)
+    runExperiment("KV-sort using Sorter")({
+      sorter.sort(keyValueArray, 0, numElements, new Comparator[JFloat] {
+        override def compare(x: JFloat, y: JFloat): Int = x.compareTo(y)
       })
+    }, prepareKeyValueArray)
+  }
+
+  /**
+   * Tests for sorting with primitive keys with/without key reuse. Java's Arrays.sort is used as
+   * reference, which is expected to be faster but it can only sort a single array. Sorter can be
+   * used to sort parallel arrays.
+   *
+   * Ideally these would be executed one at a time, each in their own JVM, so their listing
+   * here is mainly to have the code. Running multiple tests within the same JVM session would
+   * prevent JIT inlining overridden methods and hence hurt the performance.
+   */
+  ignore("Sorter benchmark for primitive int array") {
+    val numElements = 25000000 // 25 mil
+    val rand = new XORShiftRandom(123)
+
+    val ints = Array.fill(numElements)(rand.nextInt())
+    val intObjects = {
+      val data = new Array[JInteger](numElements)
+      var i = 0
+      while (i < numElements) {
+        data(i) = new JInteger(ints(i))
+        i += 1
+      }
+      data
     }
 
-    // Test non-primitive sort on float array
-    runExperiment("Java Arrays.sort()") {
-      Arrays.sort(keys, new Comparator[JFloat] {
-        override def compare(x: JFloat, y: JFloat): Int = Ordering.Float.compare(x, y)
-      })
+    val intObjectArray = new Array[JInteger](numElements)
+    val prepareIntObjectArray = () => {
+      System.arraycopy(intObjects, 0, intObjectArray, 0, numElements)
     }
 
-    // Test primitive sort on float array
-    val primitiveKeys = Array.tabulate[Float](numElements) { i => rand.nextFloat() }
-    runExperiment("Java Arrays.sort() on primitive keys") {
-      Arrays.sort(primitiveKeys)
+    runExperiment("Java Arrays.sort() on non-primitive int array")({
+      Arrays.sort(intObjectArray, new Comparator[JInteger] {
+        override def compare(x: JInteger, y: JInteger): Int = x.compareTo(y)
+      })
+    }, prepareIntObjectArray)
+
+    val intPrimitiveArray = new Array[Int](numElements)
+    val prepareIntPrimitiveArray = () => {
+      System.arraycopy(ints, 0, intPrimitiveArray, 0, numElements)
     }
-  }
-}
 
+    runExperiment("Java Arrays.sort() on primitive int array")({
+      Arrays.sort(intPrimitiveArray)
+    }, prepareIntPrimitiveArray)
 
-/** Format to sort a simple Array[Int]. Could be easily generified and specialized. */
-class IntArraySortDataFormat extends SortDataFormat[Int, Array[Int]] {
-  override protected def getKey(data: Array[Int], pos: Int): Int = {
-    data(pos)
+    val sorterWithoutKeyReuse = new Sorter(new IntArraySortDataFormat)
+    runExperiment("Sorter without key reuse on primitive int array")({
+      sorterWithoutKeyReuse.sort(intPrimitiveArray, 0, numElements, Ordering[Int])
+    }, prepareIntPrimitiveArray)
+
+    val sorterWithKeyReuse = new Sorter(new KeyReuseIntArraySortDataFormat)
+    runExperiment("Sorter with key reuse on primitive int array")({
+      sorterWithKeyReuse.sort(intPrimitiveArray, 0, numElements, Ordering[IntWrapper])
+    }, prepareIntPrimitiveArray)
   }
+}
 
-  override protected def swap(data: Array[Int], pos0: Int, pos1: Int): Unit = {
+abstract class AbstractIntArraySortDataFormat[K] extends SortDataFormat[K, Array[Int]] {
+
+  override def swap(data: Array[Int], pos0: Int, pos1: Int): Unit = {
     val tmp = data(pos0)
     data(pos0) = data(pos1)
     data(pos1) = tmp
   }
 
-  override protected def copyElement(src: Array[Int], srcPos: Int, dst: Array[Int], dstPos: Int) {
+  override def copyElement(src: Array[Int], srcPos: Int, dst: Array[Int], dstPos: Int) {
     dst(dstPos) = src(srcPos)
   }
 
   /** Copy a range of elements starting at src(srcPos) to dest, starting at destPos. */
-  override protected def copyRange(src: Array[Int], srcPos: Int,
-                                   dst: Array[Int], dstPos: Int, length: Int) {
+  override def copyRange(src: Array[Int], srcPos: Int, dst: Array[Int], dstPos: Int, length: Int) {
     System.arraycopy(src, srcPos, dst, dstPos, length)
   }
 
   /** Allocates a new structure that can hold up to 'length' elements. */
-  override protected def allocate(length: Int): Array[Int] = {
+  override def allocate(length: Int): Array[Int] = {
     new Array[Int](length)
   }
 }
+
+/** Format to sort a simple Array[Int]. Could be easily generified and specialized. */
+class IntArraySortDataFormat extends AbstractIntArraySortDataFormat[Int] {
+
+  override protected def getKey(data: Array[Int], pos: Int): Int = {
+    data(pos)
+  }
+}
+
+/** Wrapper of Int for key reuse. */
+class IntWrapper(var key: Int = 0) extends Ordered[IntWrapper] {
+
+  override def compare(that: IntWrapper): Int = {
+    Ordering.Int.compare(key, that.key)
+  }
+}
+
+/** SortDataFormat for Array[Int] with reused keys. */
+class KeyReuseIntArraySortDataFormat extends AbstractIntArraySortDataFormat[IntWrapper] {
+
+  override def newKey(): IntWrapper = {
+    new IntWrapper()
+  }
+
+  override def getKey(data: Array[Int], pos: Int, reuse: IntWrapper): IntWrapper = {
+    if (reuse == null) {
+      new IntWrapper(data(pos))
+    } else {
+      reuse.key = data(pos)
+      reuse
+    }
+  }
+
+  override protected def getKey(data: Array[Int], pos: Int): IntWrapper = {
+    getKey(data, pos, null)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala b/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
index 36877476e708e..20944b62473c5 100644
--- a/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
@@ -18,96 +18,523 @@
 package org.apache.spark.util.random
 
 import java.util.Random
+import scala.collection.mutable.ArrayBuffer
+import org.apache.commons.math3.distribution.PoissonDistribution
 
-import cern.jet.random.Poisson
-import org.scalatest.{BeforeAndAfter, FunSuite}
-import org.scalatest.mock.EasyMockSugar
-
-class RandomSamplerSuite extends FunSuite with BeforeAndAfter with EasyMockSugar {
-
-  val a = List(1, 2, 3, 4, 5, 6, 7, 8, 9)
-
-  var random: Random = _
-  var poisson: Poisson = _
-
-  before {
-    random = mock[Random]
-    poisson = mock[Poisson]
-  }
-
-  test("BernoulliSamplerWithRange") {
-    expecting {
-      for(x <- Seq(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)) {
-        random.nextDouble().andReturn(x)
-      }
-    }
-    whenExecuting(random) {
-      val sampler = new BernoulliSampler[Int](0.25, 0.55)
-      sampler.rng = random
-      assert(sampler.sample(a.iterator).toList == List(3, 4, 5))
-    }
-  }
-
-  test("BernoulliSamplerWithRangeInverse") {
-    expecting {
-      for(x <- Seq(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)) {
-        random.nextDouble().andReturn(x)
-      }
-    }
-    whenExecuting(random) {
-      val sampler = new BernoulliSampler[Int](0.25, 0.55, true)
-      sampler.rng = random
-      assert(sampler.sample(a.iterator).toList === List(1, 2, 6, 7, 8, 9))
-    }
-  }
-
-  test("BernoulliSamplerWithRatio") {
-    expecting {
-      for(x <- Seq(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)) {
-        random.nextDouble().andReturn(x)
-      }
-    }
-    whenExecuting(random) {
-      val sampler = new BernoulliSampler[Int](0.35)
-      sampler.rng = random
-      assert(sampler.sample(a.iterator).toList == List(1, 2, 3))
-    }
-  }
-
-  test("BernoulliSamplerWithComplement") {
-    expecting {
-      for(x <- Seq(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)) {
-        random.nextDouble().andReturn(x)
-      }
-    }
-    whenExecuting(random) {
-      val sampler = new BernoulliSampler[Int](0.25, 0.55, true)
-      sampler.rng = random
-      assert(sampler.sample(a.iterator).toList == List(1, 2, 6, 7, 8, 9))
-    }
-  }
-
-  test("BernoulliSamplerSetSeed") {
-    expecting {
-      random.setSeed(10L)
-    }
-    whenExecuting(random) {
-      val sampler = new BernoulliSampler[Int](0.2)
-      sampler.rng = random
-      sampler.setSeed(10L)
-    }
-  }
-
-  test("PoissonSampler") {
-    expecting {
-      for(x <- Seq(0, 1, 2, 0, 1, 1, 0, 0, 0)) {
-        poisson.nextInt().andReturn(x)
-      }
-    }
-    whenExecuting(poisson) {
-      val sampler = new PoissonSampler[Int](0.2)
-      sampler.rng = poisson
-      assert(sampler.sample(a.iterator).toList == List(2, 3, 3, 5, 6))
-    }
+import org.scalatest.{FunSuite, Matchers}
+
+class RandomSamplerSuite extends FunSuite with Matchers {
+  /**
+   * My statistical testing methodology is to run a Kolmogorov-Smirnov (KS) test
+   * between the random samplers and simple reference samplers (known to work correctly).
+   * The sampling gap sizes between chosen samples should show up as having the same
+   * distributions between test and reference, if things are working properly.  That is,
+   * the KS test will fail to strongly reject the null hypothesis that the distributions of
+   * sampling gaps are the same.
+   * There are no actual KS tests implemented for scala (that I can find) - and so what I
+   * have done here is pre-compute "D" - the KS statistic - that corresponds to a "weak"
+   * p-value for a particular sample size.  I can then test that my measured KS stats
+   * are less than D.  Computing D-values is easy, and implemented below.
+   *
+   * I used the scipy 'kstwobign' distribution to pre-compute my D value:
+   *
+   * def ksdval(q=0.1, n=1000):
+   *     en = np.sqrt(float(n) / 2.0)
+   *     return stats.kstwobign.isf(float(q)) / (en + 0.12 + 0.11 / en)
+   *
+   * When comparing KS stats I take the median of a small number of independent test runs
+   * to compensate for the issue that any sampled statistic will show "false positive" with
+   * some probability.  Even when two distributions are the same, they will register as
+   * different 10% of the time at a p-value of 0.1
+   */
+
+  // This D value is the precomputed KS statistic for p-value 0.1, sample size 1000:
+  val sampleSize = 1000
+  val D = 0.0544280747619
+
+  // I'm not a big fan of fixing seeds, but unit testing based on running statistical tests
+  // will always fail with some nonzero probability, so I'll fix the seed to prevent these
+  // tests from generating random failure noise in CI testing, etc.
+  val rngSeed: Random = RandomSampler.newDefaultRNG
+  rngSeed.setSeed(235711)
+
+  // Reference implementation of sampling without replacement (bernoulli)
+  def sample[T](data: Iterator[T], f: Double): Iterator[T] = {
+    val rng: Random = RandomSampler.newDefaultRNG
+    rng.setSeed(rngSeed.nextLong)
+    data.filter(_ => (rng.nextDouble <= f))
+  }
+
+  // Reference implementation of sampling with replacement
+  def sampleWR[T](data: Iterator[T], f: Double): Iterator[T] = {
+    val rng = new PoissonDistribution(f)
+    rng.reseedRandomGenerator(rngSeed.nextLong)
+    data.flatMap { v => {
+      val rep = rng.sample()
+      if (rep == 0) Iterator.empty else Iterator.fill(rep)(v)
+    }}
+  }
+
+  // Returns iterator over gap lengths between samples.
+  // This function assumes input data is integers sampled from the sequence of 
+  // increasing integers: {0, 1, 2, ...}.  This works because that is how I generate them,
+  // and the samplers preserve their input order
+  def gaps(data: Iterator[Int]): Iterator[Int] = {
+    data.sliding(2).withPartial(false).map { x => x(1) - x(0) }
+  }
+
+  // Returns the cumulative distribution from a histogram
+  def cumulativeDist(hist: Array[Int]): Array[Double] = {
+    val n = hist.sum.toDouble
+    assert(n > 0.0)
+    hist.scanLeft(0)(_ + _).drop(1).map { _.toDouble / n }
+  }
+
+  // Returns aligned cumulative distributions from two arrays of data
+  def cumulants(d1: Array[Int], d2: Array[Int],
+      ss: Int = sampleSize): (Array[Double], Array[Double]) = {
+    assert(math.min(d1.length, d2.length) > 0)
+    assert(math.min(d1.min, d2.min)  >=  0)
+    val m = 1 + math.max(d1.max, d2.max)
+    val h1 = Array.fill[Int](m)(0)
+    val h2 = Array.fill[Int](m)(0)
+    for (v <- d1) { h1(v) += 1 }
+    for (v <- d2) { h2(v) += 1 }
+    assert(h1.sum == h2.sum)
+    assert(h1.sum == ss)
+    (cumulativeDist(h1), cumulativeDist(h2))
+  }
+
+  // Computes the Kolmogorov-Smirnov 'D' statistic from two cumulative distributions
+  def KSD(cdf1: Array[Double], cdf2: Array[Double]): Double = {
+    assert(cdf1.length == cdf2.length)
+    val n = cdf1.length
+    assert(n > 0)
+    assert(cdf1(n-1) == 1.0)
+    assert(cdf2(n-1) == 1.0)
+    cdf1.zip(cdf2).map { x => Math.abs(x._1 - x._2) }.max
+  }
+
+  // Returns the median KS 'D' statistic between two samples, over (m) sampling trials
+  def medianKSD(data1: => Iterator[Int], data2: => Iterator[Int], m: Int = 5): Double = {
+    val t = Array.fill[Double](m) {
+      val (c1, c2) = cumulants(data1.take(sampleSize).toArray,
+                               data2.take(sampleSize).toArray)
+      KSD(c1, c2)
+    }.sorted
+    // return the median KS statistic
+    t(m / 2)
+  }
+
+  test("utilities") {
+    val s1 = Array(0, 1, 1, 0, 2)
+    val s2 = Array(1, 0, 3, 2, 1)
+    val (c1, c2) = cumulants(s1, s2, ss = 5)
+    c1 should be (Array(0.4, 0.8, 1.0, 1.0))
+    c2 should be (Array(0.2, 0.6, 0.8, 1.0))
+    KSD(c1, c2) should be (0.2 +- 0.000001)
+    KSD(c2, c1) should be (KSD(c1, c2))
+    gaps(List(0, 1, 1, 2, 4, 11).iterator).toArray should be (Array(1, 0, 1, 2, 7))
+  }
+
+  test("sanity check medianKSD against references") {
+    var d: Double = 0.0
+
+    // should be statistically same, i.e. fail to reject null hypothesis strongly
+    d = medianKSD(gaps(sample(Iterator.from(0), 0.5)), gaps(sample(Iterator.from(0), 0.5)))
+    d should be < D
+
+    // should be statistically different - null hypothesis will have high D value,
+    // corresponding to low p-value that rejects the null hypothesis
+    d = medianKSD(gaps(sample(Iterator.from(0), 0.4)), gaps(sample(Iterator.from(0), 0.5)))
+    d should be > D
+
+    // same!
+    d = medianKSD(gaps(sampleWR(Iterator.from(0), 0.5)), gaps(sampleWR(Iterator.from(0), 0.5)))
+    d should be < D
+
+    // different!
+    d = medianKSD(gaps(sampleWR(Iterator.from(0), 0.5)), gaps(sampleWR(Iterator.from(0), 0.6)))
+    d should be > D
+  }
+
+  test("bernoulli sampling") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+
+    var sampler: RandomSampler[Int, Int] = new BernoulliSampler[Int](0.5)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.5)))
+    d should be < D
+
+    sampler = new BernoulliSampler[Int](0.7)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.7)))
+    d should be < D
+
+    sampler = new BernoulliSampler[Int](0.9)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.9)))
+    d should be < D
+
+    // sampling at different frequencies should show up as statistically different:
+    sampler = new BernoulliSampler[Int](0.5)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.6)))
+    d should be > D
+  }
+
+  test("bernoulli sampling with gap sampling optimization") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+
+    var sampler: RandomSampler[Int, Int] = new BernoulliSampler[Int](0.01)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.01)))
+    d should be < D
+
+    sampler = new BernoulliSampler[Int](0.1)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.1)))
+    d should be < D
+
+    sampler = new BernoulliSampler[Int](0.3)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.3)))
+    d should be < D
+
+    // sampling at different frequencies should show up as statistically different:
+    sampler = new BernoulliSampler[Int](0.3)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.4)))
+    d should be > D
+  }
+
+  test("bernoulli boundary cases") {
+    val data = (1 to 100).toArray
+
+    var sampler = new BernoulliSampler[Int](0.0)
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+
+    sampler = new BernoulliSampler[Int](1.0)
+    sampler.sample(data.iterator).toArray should be (data)
+
+    sampler = new BernoulliSampler[Int](0.0 - (RandomSampler.roundingEpsilon / 2.0))
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+
+    sampler = new BernoulliSampler[Int](1.0 + (RandomSampler.roundingEpsilon / 2.0))
+    sampler.sample(data.iterator).toArray should be (data)
+  }
+
+  test("bernoulli data types") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+    var sampler = new BernoulliSampler[Int](0.1)
+    sampler.setSeed(rngSeed.nextLong)
+
+    // Array iterator (indexable type)
+    d = medianKSD(
+      gaps(sampler.sample(Iterator.from(0).take(20*sampleSize).toArray.iterator)),
+      gaps(sample(Iterator.from(0), 0.1)))
+    d should be < D
+
+    // ArrayBuffer iterator (indexable type)
+    d = medianKSD(
+      gaps(sampler.sample(Iterator.from(0).take(20*sampleSize).to[ArrayBuffer].iterator)),
+      gaps(sample(Iterator.from(0), 0.1)))
+    d should be < D
+
+    // List iterator (non-indexable type)
+    d = medianKSD(
+      gaps(sampler.sample(Iterator.from(0).take(20*sampleSize).toList.iterator)),
+      gaps(sample(Iterator.from(0), 0.1)))
+    d should be < D
+  }
+
+  test("bernoulli clone") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d = 0.0
+    var sampler = new BernoulliSampler[Int](0.1).clone
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.1)))
+    d should be < D
+
+    sampler = new BernoulliSampler[Int](0.9).clone
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.9)))
+    d should be < D
+  }
+
+  test("bernoulli set seed") {
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+    var sampler1 = new BernoulliSampler[Int](0.2)
+    var sampler2 = new BernoulliSampler[Int](0.2)
+
+    // distributions should be identical if seeds are set same
+    sampler1.setSeed(73)
+    sampler2.setSeed(73)
+    d = medianKSD(gaps(sampler1.sample(Iterator.from(0))), gaps(sampler2.sample(Iterator.from(0))))
+    d should be (0.0)
+
+    // should be different for different seeds
+    sampler1.setSeed(73)
+    sampler2.setSeed(37)
+    d = medianKSD(gaps(sampler1.sample(Iterator.from(0))), gaps(sampler2.sample(Iterator.from(0))))
+    d should be > 0.0
+    d should be < D
+
+    sampler1 = new BernoulliSampler[Int](0.8)
+    sampler2 = new BernoulliSampler[Int](0.8)
+
+    // distributions should be identical if seeds are set same
+    sampler1.setSeed(73)
+    sampler2.setSeed(73)
+    d = medianKSD(gaps(sampler1.sample(Iterator.from(0))), gaps(sampler2.sample(Iterator.from(0))))
+    d should be (0.0)
+
+    // should be different for different seeds
+    sampler1.setSeed(73)
+    sampler2.setSeed(37)
+    d = medianKSD(gaps(sampler1.sample(Iterator.from(0))), gaps(sampler2.sample(Iterator.from(0))))
+    d should be > 0.0
+    d should be < D
+  }
+
+  test("replacement sampling") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+
+    var sampler = new PoissonSampler[Int](0.5)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.5)))
+    d should be < D
+
+    sampler = new PoissonSampler[Int](0.7)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.7)))
+    d should be < D
+
+    sampler = new PoissonSampler[Int](0.9)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.9)))
+    d should be < D
+
+    // sampling at different frequencies should show up as statistically different:
+    sampler = new PoissonSampler[Int](0.5)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.6)))
+    d should be > D
+  }
+
+  test("replacement sampling with gap sampling") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+
+    var sampler = new PoissonSampler[Int](0.01)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.01)))
+    d should be < D
+
+    sampler = new PoissonSampler[Int](0.1)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.1)))
+    d should be < D
+
+    sampler = new PoissonSampler[Int](0.3)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.3)))
+    d should be < D
+
+    // sampling at different frequencies should show up as statistically different:
+    sampler = new PoissonSampler[Int](0.3)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.4)))
+    d should be > D
+  }
+
+  test("replacement boundary cases") {
+    val data = (1 to 100).toArray
+
+    var sampler = new PoissonSampler[Int](0.0)
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+
+    sampler = new PoissonSampler[Int](0.0 - (RandomSampler.roundingEpsilon / 2.0))
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+
+    // sampling with replacement has no upper bound on sampling fraction
+    sampler = new PoissonSampler[Int](2.0)
+    sampler.sample(data.iterator).length should be > (data.length)
+  }
+
+  test("replacement data types") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+    var sampler = new PoissonSampler[Int](0.1)
+    sampler.setSeed(rngSeed.nextLong)
+
+    // Array iterator (indexable type)
+    d = medianKSD(
+      gaps(sampler.sample(Iterator.from(0).take(20*sampleSize).toArray.iterator)),
+      gaps(sampleWR(Iterator.from(0), 0.1)))
+    d should be < D
+
+    // ArrayBuffer iterator (indexable type)
+    d = medianKSD(
+      gaps(sampler.sample(Iterator.from(0).take(20*sampleSize).to[ArrayBuffer].iterator)),
+      gaps(sampleWR(Iterator.from(0), 0.1)))
+    d should be < D
+
+    // List iterator (non-indexable type)
+    d = medianKSD(
+      gaps(sampler.sample(Iterator.from(0).take(20*sampleSize).toList.iterator)),
+      gaps(sampleWR(Iterator.from(0), 0.1)))
+    d should be < D
+  }
+
+  test("replacement clone") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d = 0.0
+    var sampler = new PoissonSampler[Int](0.1).clone
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.1)))
+    d should be < D
+
+    sampler = new PoissonSampler[Int](0.9).clone
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sampleWR(Iterator.from(0), 0.9)))
+    d should be < D
+  }
+
+  test("replacement set seed") {
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+    var sampler1 = new PoissonSampler[Int](0.2)
+    var sampler2 = new PoissonSampler[Int](0.2)
+
+    // distributions should be identical if seeds are set same
+    sampler1.setSeed(73)
+    sampler2.setSeed(73)
+    d = medianKSD(gaps(sampler1.sample(Iterator.from(0))), gaps(sampler2.sample(Iterator.from(0))))
+    d should be (0.0)
+
+    // should be different for different seeds
+    sampler1.setSeed(73)
+    sampler2.setSeed(37)
+    d = medianKSD(gaps(sampler1.sample(Iterator.from(0))), gaps(sampler2.sample(Iterator.from(0))))
+    d should be > 0.0
+    d should be < D
+
+    sampler1 = new PoissonSampler[Int](0.8)
+    sampler2 = new PoissonSampler[Int](0.8)
+
+    // distributions should be identical if seeds are set same
+    sampler1.setSeed(73)
+    sampler2.setSeed(73)
+    d = medianKSD(gaps(sampler1.sample(Iterator.from(0))), gaps(sampler2.sample(Iterator.from(0))))
+    d should be (0.0)
+
+    // should be different for different seeds
+    sampler1.setSeed(73)
+    sampler2.setSeed(37)
+    d = medianKSD(gaps(sampler1.sample(Iterator.from(0))), gaps(sampler2.sample(Iterator.from(0))))
+    d should be > 0.0
+    d should be < D
+  }
+
+  test("bernoulli partitioning sampling") {
+    var d: Double = 0.0
+
+    var sampler = new BernoulliCellSampler[Int](0.1, 0.2)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.1)))
+    d should be < D
+
+    sampler = new BernoulliCellSampler[Int](0.1, 0.2, true)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(sampler.sample(Iterator.from(0))), gaps(sample(Iterator.from(0), 0.9)))
+    d should be < D
+  }
+
+  test("bernoulli partitioning boundary cases") {
+    val data = (1 to 100).toArray
+    val d = RandomSampler.roundingEpsilon / 2.0
+
+    var sampler = new BernoulliCellSampler[Int](0.0, 0.0)
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+
+    sampler = new BernoulliCellSampler[Int](0.5, 0.5)
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+
+    sampler = new BernoulliCellSampler[Int](1.0, 1.0)
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+
+    sampler = new BernoulliCellSampler[Int](0.0, 1.0)
+    sampler.sample(data.iterator).toArray should be (data)
+
+    sampler = new BernoulliCellSampler[Int](0.0 - d, 1.0 + d)
+    sampler.sample(data.iterator).toArray should be (data)
+
+    sampler = new BernoulliCellSampler[Int](0.5, 0.5 - d)
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+  }
+
+  test("bernoulli partitioning data") {
+    val seed = rngSeed.nextLong
+    val data = (1 to 100).toArray
+
+    var sampler = new BernoulliCellSampler[Int](0.4, 0.6)
+    sampler.setSeed(seed)
+    val s1 = sampler.sample(data.iterator).toArray
+    s1.length should be > 0
+
+    sampler = new BernoulliCellSampler[Int](0.4, 0.6, true)
+    sampler.setSeed(seed)
+    val s2 = sampler.sample(data.iterator).toArray
+    s2.length should be > 0
+
+    (s1 ++ s2).sorted should be (data)
+
+    sampler = new BernoulliCellSampler[Int](0.5, 0.5)
+    sampler.sample(data.iterator).toArray should be (Array.empty[Int])
+
+    sampler = new BernoulliCellSampler[Int](0.5, 0.5, true)
+    sampler.sample(data.iterator).toArray should be (data)
+  }
+
+  test("bernoulli partitioning clone") {
+    val seed = rngSeed.nextLong
+    val data = (1 to 100).toArray
+    val base = new BernoulliCellSampler[Int](0.35, 0.65)
+
+    var sampler = base.clone
+    sampler.setSeed(seed)
+    val s1 = sampler.sample(data.iterator).toArray
+    s1.length should be > 0
+
+    sampler = base.cloneComplement
+    sampler.setSeed(seed)
+    val s2 = sampler.sample(data.iterator).toArray
+    s2.length should be > 0
+
+    (s1 ++ s2).sorted should be (data)
   }
 }
diff --git a/core/src/test/scala/org/apache/sparktest/ImplicitSuite.scala b/core/src/test/scala/org/apache/sparktest/ImplicitSuite.scala
new file mode 100644
index 0000000000000..daa795a043495
--- /dev/null
+++ b/core/src/test/scala/org/apache/sparktest/ImplicitSuite.scala
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.sparktest
+
+/**
+ * A test suite to make sure all `implicit` functions work correctly.
+ * Please don't `import org.apache.spark.SparkContext._` in this class.
+ *
+ * As `implicit` is a compiler feature, we don't need to run this class.
+ * What we need to do is making the compiler happy.
+ */
+class ImplicitSuite {
+
+  // We only want to test if `implict` works well with the compiler, so we don't need a real
+  // SparkContext.
+  def mockSparkContext[T]: org.apache.spark.SparkContext = null
+
+  // We only want to test if `implict` works well with the compiler, so we don't need a real RDD.
+  def mockRDD[T]: org.apache.spark.rdd.RDD[T] = null
+
+  def testRddToPairRDDFunctions(): Unit = {
+    val rdd: org.apache.spark.rdd.RDD[(Int, Int)] = mockRDD
+    rdd.groupByKey()
+  }
+
+  def testRddToAsyncRDDActions(): Unit = {
+    val rdd: org.apache.spark.rdd.RDD[Int] = mockRDD
+    rdd.countAsync()
+  }
+
+  def testRddToSequenceFileRDDFunctions(): Unit = {
+    val rdd: org.apache.spark.rdd.RDD[(Int, Int)] = mockRDD
+    rdd.saveAsSequenceFile("/a/test/path")
+  }
+
+  def testRddToSequenceFileRDDFunctionsWithWritable(): Unit = {
+    val rdd: org.apache.spark.rdd.RDD[(org.apache.hadoop.io.IntWritable, org.apache.hadoop.io.Text)]
+      = mockRDD
+    rdd.saveAsSequenceFile("/a/test/path")
+  }
+
+  def testRddToSequenceFileRDDFunctionsWithBytesArray(): Unit = {
+    val rdd: org.apache.spark.rdd.RDD[(Int, Array[Byte])] = mockRDD
+    rdd.saveAsSequenceFile("/a/test/path")
+  }
+
+  def testRddToOrderedRDDFunctions(): Unit = {
+    val rdd: org.apache.spark.rdd.RDD[(Int, Int)] = mockRDD
+    rdd.sortByKey()
+  }
+
+  def testDoubleRDDToDoubleRDDFunctions(): Unit = {
+    val rdd: org.apache.spark.rdd.RDD[Double] = mockRDD
+    rdd.stats()
+  }
+
+  def testNumericRDDToDoubleRDDFunctions(): Unit = {
+    val rdd: org.apache.spark.rdd.RDD[Int] = mockRDD
+    rdd.stats()
+  }
+
+  def testDoubleAccumulatorParam(): Unit = {
+    val sc = mockSparkContext
+    sc.accumulator(123.4)
+  }
+
+  def testIntAccumulatorParam(): Unit = {
+    val sc = mockSparkContext
+    sc.accumulator(123)
+  }
+
+  def testLongAccumulatorParam(): Unit = {
+    val sc = mockSparkContext
+    sc.accumulator(123L)
+  }
+
+  def testFloatAccumulatorParam(): Unit = {
+    val sc = mockSparkContext
+    sc.accumulator(123F)
+  }
+
+  def testIntWritableConverter(): Unit = {
+    val sc = mockSparkContext
+    sc.sequenceFile[Int, Int]("/a/test/path")
+  }
+
+  def testLongWritableConverter(): Unit = {
+    val sc = mockSparkContext
+    sc.sequenceFile[Long, Long]("/a/test/path")
+  }
+
+  def testDoubleWritableConverter(): Unit = {
+    val sc = mockSparkContext
+    sc.sequenceFile[Double, Double]("/a/test/path")
+  }
+
+  def testFloatWritableConverter(): Unit = {
+    val sc = mockSparkContext
+    sc.sequenceFile[Float, Float]("/a/test/path")
+  }
+
+  def testBooleanWritableConverter(): Unit = {
+    val sc = mockSparkContext
+    sc.sequenceFile[Boolean, Boolean]("/a/test/path")
+  }
+
+  def testBytesWritableConverter(): Unit = {
+    val sc = mockSparkContext
+    sc.sequenceFile[Array[Byte], Array[Byte]]("/a/test/path")
+  }
+
+  def testStringWritableConverter(): Unit = {
+    val sc = mockSparkContext
+    sc.sequenceFile[String, String]("/a/test/path")
+  }
+
+  def testWritableWritableConverter(): Unit = {
+    val sc = mockSparkContext
+    sc.sequenceFile[org.apache.hadoop.io.Text, org.apache.hadoop.io.Text]("/a/test/path")
+  }
+}
diff --git a/dev/audit-release/audit_release.py b/dev/audit-release/audit_release.py
index 16ea1a71290dc..0b7069f6e116a 100755
--- a/dev/audit-release/audit_release.py
+++ b/dev/audit-release/audit_release.py
@@ -30,71 +30,84 @@
 import time
 import urllib2
 
-# Fill in release details here:
-RELEASE_URL = "http://people.apache.org/~pwendell/spark-1.0.0-rc1/"
-RELEASE_KEY = "9E4FE3AF"
-RELEASE_REPOSITORY = "https://repository.apache.org/content/repositories/orgapachespark-1006/"
-RELEASE_VERSION = "1.0.0"
+# Note: The following variables must be set before use!
+RELEASE_URL = "http://people.apache.org/~andrewor14/spark-1.1.1-rc1/"
+RELEASE_KEY = "XXXXXXXX" # Your 8-digit hex
+RELEASE_REPOSITORY = "https://repository.apache.org/content/repositories/orgapachespark-1033"
+RELEASE_VERSION = "1.1.1"
 SCALA_VERSION = "2.10.4"
 SCALA_BINARY_VERSION = "2.10"
-#
 
+# Do not set these
 LOG_FILE_NAME = "spark_audit_%s" % time.strftime("%h_%m_%Y_%I_%M_%S")
 LOG_FILE = open(LOG_FILE_NAME, 'w')
 WORK_DIR = "/tmp/audit_%s" % int(time.time())
 MAVEN_CMD = "mvn"
 GPG_CMD = "gpg"
+SBT_CMD = "sbt -Dsbt.log.noformat=true"
 
-print "Starting tests, log output in %s. Test results printed below:" % LOG_FILE_NAME
-
-# Track failures
+# Track failures to print them at the end
 failures = []
 
+# Log a message. Use sparingly because this flushes every write.
+def log(msg):
+    LOG_FILE.write(msg + "\n")
+    LOG_FILE.flush()
 
+def log_and_print(msg):
+    print msg
+    log(msg)
+
+# Prompt the user to delete the scratch directory used
 def clean_work_files():
-    print "OK to delete scratch directory '%s'? (y/N): " % WORK_DIR
-    response = raw_input()
+    response = raw_input("OK to delete scratch directory '%s'? (y/N) " % WORK_DIR)
     if response == "y":
         shutil.rmtree(WORK_DIR)
-    print "Should I delete the log output file '%s'? (y/N): " % LOG_FILE_NAME
-    response = raw_input()
-    if response == "y":
-        os.unlink(LOG_FILE_NAME)
-
 
+# Run the given command and log its output to the log file
 def run_cmd(cmd, exit_on_failure=True):
-    print >> LOG_FILE, "Running command: %s" % cmd
+    log("Running command: %s" % cmd)
     ret = subprocess.call(cmd, shell=True, stdout=LOG_FILE, stderr=LOG_FILE)
     if ret != 0 and exit_on_failure:
-        print "Command failed: %s" % cmd
+        log_and_print("Command failed: %s" % cmd)
         clean_work_files()
         sys.exit(-1)
     return ret
 
-
 def run_cmd_with_output(cmd):
-    print >> sys.stderr, "Running command: %s" % cmd
+    log_and_print("Running command: %s" % cmd)
     return subprocess.check_output(cmd, shell=True, stderr=LOG_FILE)
 
+# Test if the given condition is successful
+# If so, print the pass message; otherwise print the failure message
+def test(cond, msg):
+    return passed(msg) if cond else failed(msg)
 
-def test(bool, str):
-    if bool:
-        return passed(str)
-    failed(str)
-
-
-def passed(str):
-    print "[PASSED] %s" % str
-
-
-def failed(str):
-    failures.append(str)
-    print "[**FAILED**] %s" % str
+def passed(msg):
+    log_and_print("[PASSED] %s" % msg)
 
+def failed(msg):
+    failures.append(msg)
+    log_and_print("[**FAILED**] %s" % msg)
 
 def get_url(url):
     return urllib2.urlopen(url).read()
 
+# If the path exists, prompt the user to delete it
+# If the resource is not deleted, abort
+def ensure_path_not_present(path):
+    full_path = os.path.expanduser(path)
+    if os.path.exists(full_path):
+        print "Found %s locally." % full_path
+        response = raw_input("This can interfere with testing published artifacts. OK to delete? (y/N) ")
+        if response == "y":
+            shutil.rmtree(full_path)
+        else:
+            print "Abort."
+            sys.exit(-1)
+
+log_and_print("|-------- Starting Spark audit tests for release %s --------|" % RELEASE_VERSION)
+log_and_print("Log output can be found in %s" % LOG_FILE_NAME)
 
 original_dir = os.getcwd()
 
@@ -114,37 +127,36 @@ def get_url(url):
 cache_ivy_spark = "~/.ivy2/cache/org.apache.spark"
 local_maven_kafka = "~/.m2/repository/org/apache/kafka"
 local_maven_kafka = "~/.m2/repository/org/apache/spark"
-
-
-def ensure_path_not_present(x):
-    if os.path.exists(os.path.expanduser(x)):
-        print "Please remove %s, it can interfere with testing published artifacts." % x
-        sys.exit(-1)
-
 map(ensure_path_not_present, [local_ivy_spark, cache_ivy_spark, local_maven_kafka])
 
 # SBT build tests
+log_and_print("==== Building SBT modules ====")
 os.chdir("blank_sbt_build")
 os.environ["SPARK_VERSION"] = RELEASE_VERSION
 os.environ["SCALA_VERSION"] = SCALA_VERSION
 os.environ["SPARK_RELEASE_REPOSITORY"] = RELEASE_REPOSITORY
 os.environ["SPARK_AUDIT_MASTER"] = "local"
 for module in modules:
+    log("==== Building module %s in SBT ====" % module)
     os.environ["SPARK_MODULE"] = module
-    ret = run_cmd("sbt clean update", exit_on_failure=False)
-    test(ret == 0, "sbt build against '%s' module" % module)
+    ret = run_cmd("%s clean update" % SBT_CMD, exit_on_failure=False)
+    test(ret == 0, "SBT build against '%s' module" % module)
 os.chdir(original_dir)
 
 # SBT application tests
+log_and_print("==== Building SBT applications ====")
 for app in ["sbt_app_core", "sbt_app_graphx", "sbt_app_streaming", "sbt_app_sql", "sbt_app_hive", "sbt_app_kinesis"]:
+    log("==== Building application %s in SBT ====" % app)
     os.chdir(app)
-    ret = run_cmd("sbt clean run", exit_on_failure=False)
-    test(ret == 0, "sbt application (%s)" % app)
+    ret = run_cmd("%s clean run" % SBT_CMD, exit_on_failure=False)
+    test(ret == 0, "SBT application (%s)" % app)
     os.chdir(original_dir)
 
 # Maven build tests
 os.chdir("blank_maven_build")
+log_and_print("==== Building Maven modules ====")
 for module in modules:
+    log("==== Building module %s in maven ====" % module)
     cmd = ('%s --update-snapshots -Dspark.release.repository="%s" -Dspark.version="%s" '
            '-Dspark.module="%s" clean compile' %
            (MAVEN_CMD, RELEASE_REPOSITORY, RELEASE_VERSION, module))
@@ -152,6 +164,8 @@ def ensure_path_not_present(x):
     test(ret == 0, "maven build against '%s' module" % module)
 os.chdir(original_dir)
 
+# Maven application tests
+log_and_print("==== Building Maven applications ====")
 os.chdir("maven_app_core")
 mvn_exec_cmd = ('%s --update-snapshots -Dspark.release.repository="%s" -Dspark.version="%s" '
                 '-Dscala.binary.version="%s" clean compile '
@@ -172,15 +186,14 @@ def ensure_path_not_present(x):
 artifact_regex = r = re.compile("<a href=\"(.*.tgz)\">")
 artifacts = r.findall(index_page)
 
+# Verify artifact integrity
 for artifact in artifacts:
-    print "==== Verifying download integrity for artifact: %s ====" % artifact
+    log_and_print("==== Verifying download integrity for artifact: %s ====" % artifact)
 
     artifact_url = "%s/%s" % (RELEASE_URL, artifact)
-    run_cmd("wget %s" % artifact_url)
-
     key_file = "%s.asc" % artifact
+    run_cmd("wget %s" % artifact_url)
     run_cmd("wget %s/%s" % (RELEASE_URL, key_file))
-
     run_cmd("wget %s%s" % (artifact_url, ".sha"))
 
     # Verify signature
@@ -208,31 +221,17 @@ def ensure_path_not_present(x):
 
     os.chdir(WORK_DIR)
 
-for artifact in artifacts:
-    print "==== Verifying build and tests for artifact: %s ====" % artifact
-    os.chdir(os.path.join(WORK_DIR, dir_name))
-
-    os.environ["MAVEN_OPTS"] = "-Xmx3g -XX:MaxPermSize=1g -XX:ReservedCodeCacheSize=1g"
-    # Verify build
-    print "==> Running build"
-    run_cmd("sbt assembly")
-    passed("sbt build successful")
-    run_cmd("%s package -DskipTests" % MAVEN_CMD)
-    passed("Maven build successful")
-
-    # Verify tests
-    print "==> Performing unit tests"
-    run_cmd("%s test" % MAVEN_CMD)
-    passed("Tests successful")
-    os.chdir(WORK_DIR)
-
-clean_work_files()
-
+# Report result
+log_and_print("\n")
 if len(failures) == 0:
-    print "ALL TESTS PASSED"
+    log_and_print("*** ALL TESTS PASSED ***")
 else:
-    print "SOME TESTS DID NOT PASS"
+    log_and_print("XXXXX SOME TESTS DID NOT PASS XXXXX")
     for f in failures:
-        print f
-
+        log_and_print("  %s" % f)
 os.chdir(original_dir)
+
+# Clean up
+clean_work_files()
+
+log_and_print("|-------- Spark release audit complete --------|")
diff --git a/dev/audit-release/blank_sbt_build/build.sbt b/dev/audit-release/blank_sbt_build/build.sbt
index 696c7f651837c..62815542e5bd9 100644
--- a/dev/audit-release/blank_sbt_build/build.sbt
+++ b/dev/audit-release/blank_sbt_build/build.sbt
@@ -19,10 +19,12 @@ name := "Spark Release Auditor"
 
 version := "1.0"
 
-scalaVersion := "2.9.3"
+scalaVersion := System.getenv.get("SCALA_VERSION")
 
 libraryDependencies += "org.apache.spark" % System.getenv.get("SPARK_MODULE") % System.getenv.get("SPARK_VERSION")
 
 resolvers ++= Seq(
   "Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
+  "Eclipse Paho Repository" at "https://repo.eclipse.org/content/repositories/paho-releases/",
+  "Maven Repository" at "http://repo1.maven.org/maven2/",
   "Spray Repository" at "http://repo.spray.cc/")
diff --git a/dev/audit-release/sbt_app_hive/build.sbt b/dev/audit-release/sbt_app_hive/build.sbt
index a0d4f25da5842..c8824f2b15e55 100644
--- a/dev/audit-release/sbt_app_hive/build.sbt
+++ b/dev/audit-release/sbt_app_hive/build.sbt
@@ -25,4 +25,5 @@ libraryDependencies += "org.apache.spark" %% "spark-hive" % System.getenv.get("S
 
 resolvers ++= Seq(
   "Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
+  "Maven Repository" at "http://repo1.maven.org/maven2/",
   "Spray Repository" at "http://repo.spray.cc/")
diff --git a/dev/audit-release/sbt_app_hive/src/main/resources/hive-site.xml b/dev/audit-release/sbt_app_hive/src/main/resources/hive-site.xml
deleted file mode 100644
index 93b835813d535..0000000000000
--- a/dev/audit-release/sbt_app_hive/src/main/resources/hive-site.xml
+++ /dev/null
@@ -1,213 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-
-<configuration>
-
-<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files  -->
-<!-- that are implied by Hadoop setup variables.                                                -->
-<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive    -->
-<!-- users do not have to edit hadoop configuration files (that may be managed as a centralized -->
-<!-- resource).                                                                                 -->
-
-<!-- Hive Execution Parameters -->
-
-<property name="build.dir" value="build" />
-
-<property> 
-  <name>build.dir</name>
-  <value>${user.dir}/build</value>
-</property>
-
-<property> 
-  <name>build.dir.hive</name>
-  <value>${build.dir}/hive</value>
-</property>
-
-<property>
-  <name>hadoop.tmp.dir</name>
-  <value>${build.dir.hive}/test/hadoop-${user.name}</value>
-  <description>A base for other temporary directories.</description>
-</property>
-
-<!--
-<property>
-  <name>hive.exec.reducers.max</name>
-  <value>1</value>
-  <description>maximum number of reducers</description>
-</property>
--->
-
-<property>
-  <name>hive.exec.scratchdir</name>
-  <value>${build.dir}/scratchdir</value>
-  <description>Scratch space for Hive jobs</description>
-</property>
-
-<property>
-  <name>hive.exec.local.scratchdir</name>
-  <value>${build.dir}/localscratchdir/</value>
-  <description>Local scratch space for Hive jobs</description>
-</property>
-
-<property>
-  <name>javax.jdo.option.ConnectionURL</name>
-  <!-- note: variable substituion not working here because it's loaded by jdo, not Hive -->
-  <value>jdbc:derby:;databaseName=../build/test/junit_metastore_db;create=true</value>
-</property>
-
-<property>
-  <name>javax.jdo.option.ConnectionDriverName</name>
-  <value>org.apache.derby.jdbc.EmbeddedDriver</value>
-</property>
-
-<property>
-  <name>javax.jdo.option.ConnectionUserName</name>
-  <value>APP</value>
-</property>
-
-<property>
-  <name>javax.jdo.option.ConnectionPassword</name>
-  <value>mine</value>
-</property>
-
-<property>
-  <!--  this should eventually be deprecated since the metastore should supply this -->
-  <name>hive.metastore.warehouse.dir</name>
-  <value>${test.warehouse.dir}</value>
-  <description></description>
-</property>
-
-<property>
-  <name>hive.metastore.metadb.dir</name>
-  <value>${build.dir}/test/data/metadb/</value>
-  <description>
-  Required by metastore server or if the uris argument below is not supplied
-  </description>
-</property>
-
-<property>
-  <name>test.log.dir</name>
-  <value>${build.dir}/test/logs</value>
-  <description></description>
-</property>
-
-<property>
-  <name>test.src.dir</name>
-  <value>${build.dir}/src/test</value>
-  <description></description>
-</property>
-
-<!--
-<property>
-  <name>test.data.files</name>
-  <value>${user.dir}/../data/files</value>
-  <description></description>
-</property>
-
-<property>
-  <name>test.query.file1</name>
-  <value>file://${user.dir}/../ql/src/test/org/apache/hadoop/hive/ql/input2.q</value>
-  <value></value>
-  <description></description>
-</property>
--->
-
-<property>
-  <name>hive.jar.path</name>
-  <value>${build.dir.hive}/ql/hive-exec-${version}.jar</value>
-  <description></description>
-</property>
-
-<property>
-  <name>hive.metastore.rawstore.impl</name>
-  <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
-  <description>Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. This class is used to store and retrieval of raw metadata objects such as table, database</description>
-</property>
-
-<property>
-  <name>hive.querylog.location</name>
-  <value>${build.dir}/tmp</value>
-  <description>Location of the structured hive logs</description>
-</property>
-
-<!--
-<property>
-  <name>hive.exec.pre.hooks</name>
-  <value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
-  <description>Pre Execute Hook for Tests</description>
-</property>
-<property>
-  <name>hive.exec.post.hooks</name>
-  <value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value>
-  <description>Post Execute Hook for Tests</description>
-</property>
--->
-
-<property>
-  <name>hive.task.progress</name>
-  <value>false</value>
-  <description>Track progress of a task</description>
-</property>
-
-<property>
-  <name>hive.support.concurrency</name>
-  <value>false</value>
-  <description>Whether hive supports concurrency or not. A zookeeper instance must be up and running for the default hive lock manager to support read-write locks.</description>
-</property>
-
-<property>
-  <name>fs.pfile.impl</name>
-  <value>org.apache.hadoop.fs.ProxyLocalFileSystem</value>
-  <description>A proxy for local file system used for cross file system testing</description>
-</property>
-
-<property>
-  <name>hive.exec.mode.local.auto</name>
-  <value>false</value>
-  <description>
-    Let hive determine whether to run in local mode automatically
-    Disabling this for tests so that minimr is not affected
-  </description>
-</property>
-
-<property>
-  <name>hive.auto.convert.join</name>
-  <value>false</value>
-  <description>Whether Hive enable the optimization about converting common join into mapjoin based on the input file size</description>
-</property>
-
-<property>
-  <name>hive.ignore.mapjoin.hint</name>
-  <value>false</value>
-  <description>Whether Hive ignores the mapjoin hint</description>
-</property>
-
-<property>
-  <name>hive.input.format</name>
-  <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
-  <description>The default input format, if it is not specified, the system assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always overwrite it - if there is a bug in CombineHiveInputFormat, it can always be manually set to HiveInputFormat. </description>
-</property>
-
-<property>
-  <name>hive.default.rcfile.serde</name>
-  <value>org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe</value>
-  <description>The default SerDe hive will use for the rcfile format</description>
-</property>
-
-</configuration>
diff --git a/dev/change-version-to-2.10.sh b/dev/change-version-to-2.10.sh
new file mode 100755
index 0000000000000..7473c20d28e09
--- /dev/null
+++ b/dev/change-version-to-2.10.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+find . -name 'pom.xml' | grep -v target \
+  | xargs -I {} sed -i -e 's|\(artifactId.*\)_2.11|\1_2.10|g' {}  
diff --git a/dev/change-version-to-2.11.sh b/dev/change-version-to-2.11.sh
new file mode 100755
index 0000000000000..3957a9f3ba258
--- /dev/null
+++ b/dev/change-version-to-2.11.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+find . -name 'pom.xml' | grep -v target \
+  | xargs -I {} sed -i -e 's|\(artifactId.*\)_2.10|\1_2.11|g' {} 
diff --git a/dev/check-license b/dev/check-license
index 72b1013479964..a006f65710d6d 100755
--- a/dev/check-license
+++ b/dev/check-license
@@ -27,17 +27,17 @@ acquire_rat_jar () {
   if [[ ! -f "$rat_jar" ]]; then
     # Download rat launch jar if it hasn't been downloaded yet
     if [ ! -f "$JAR" ]; then
-    # Download
-    printf "Attempting to fetch rat\n"
-    JAR_DL="${JAR}.part"
-    if hash curl 2>/dev/null; then
-      curl --silent "${URL}" > "$JAR_DL" && mv "$JAR_DL" "$JAR"
-    elif hash wget 2>/dev/null; then
-      wget --quiet ${URL} -O "$JAR_DL" && mv "$JAR_DL" "$JAR"
-    else
-      printf "You do not have curl or wget installed, please install rat manually.\n"
-      exit -1
-    fi
+      # Download
+      printf "Attempting to fetch rat\n"
+      JAR_DL="${JAR}.part"
+      if [ $(command -v curl) ]; then
+        curl --silent "${URL}" > "$JAR_DL" && mv "$JAR_DL" "$JAR"
+      elif [ $(command -v wget) ]; then
+        wget --quiet ${URL} -O "$JAR_DL" && mv "$JAR_DL" "$JAR"
+      else
+        printf "You do not have curl or wget installed, please install rat manually.\n"
+        exit -1
+      fi
     fi
 
     unzip -tq $JAR &> /dev/null
diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh
index 281e8d4de6d71..607ce1c803507 100755
--- a/dev/create-release/create-release.sh
+++ b/dev/create-release/create-release.sh
@@ -22,18 +22,25 @@
 # Expects to be run in a totally empty directory.
 #
 # Options:
-#  --package-only   only packages an existing release candidate
-#
+#  --skip-create-release	Assume the desired release tag already exists
+#  --skip-publish 		Do not publish to Maven central
+#  --skip-package		Do not package and upload binary artifacts
 # Would be nice to add:
 #  - Send output to stderr and have useful logging in stdout
 
-GIT_USERNAME=${GIT_USERNAME:-pwendell}
-GIT_PASSWORD=${GIT_PASSWORD:-XXX}
+# Note: The following variables must be set before use!
+ASF_USERNAME=${ASF_USERNAME:-pwendell}
+ASF_PASSWORD=${ASF_PASSWORD:-XXX}
 GPG_PASSPHRASE=${GPG_PASSPHRASE:-XXX}
 GIT_BRANCH=${GIT_BRANCH:-branch-1.0}
-RELEASE_VERSION=${RELEASE_VERSION:-1.0.0}
+RELEASE_VERSION=${RELEASE_VERSION:-1.2.0}
+NEXT_VERSION=${NEXT_VERSION:-1.2.1}
 RC_NAME=${RC_NAME:-rc2}
-USER_NAME=${USER_NAME:-pwendell}
+
+M2_REPO=~/.m2/repository
+SPARK_REPO=$M2_REPO/org/apache/spark
+NEXUS_ROOT=https://repository.apache.org/service/local/staging
+NEXUS_PROFILE=d63f592e7eac0 # Profile for Spark staging uploads
 
 if [ -z "$JAVA_HOME" ]; then
   echo "Error: JAVA_HOME is not set, cannot proceed."
@@ -45,109 +52,201 @@ set -e
 
 GIT_TAG=v$RELEASE_VERSION-$RC_NAME
 
-if [[ ! "$@" =~ --package-only ]]; then
-  echo "Creating and publishing release"
+if [[ ! "$@" =~ --skip-create-release ]]; then
+  echo "Creating release commit and publishing to Apache repository"
   # Artifact publishing
-  git clone https://git-wip-us.apache.org/repos/asf/spark.git -b $GIT_BRANCH
-  cd spark
+  git clone https://$ASF_USERNAME:$ASF_PASSWORD@git-wip-us.apache.org/repos/asf/spark.git \
+    -b $GIT_BRANCH
+  pushd spark
   export MAVEN_OPTS="-Xmx3g -XX:MaxPermSize=1g -XX:ReservedCodeCacheSize=1g"
 
-  mvn -Pyarn release:clean
-
-  mvn -DskipTests \
-    -Darguments="-DskipTests=true -Dmaven.javadoc.skip=true -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 -Dgpg.passphrase=${GPG_PASSPHRASE}" \
-    -Dusername=$GIT_USERNAME -Dpassword=$GIT_PASSWORD \
-    -Dmaven.javadoc.skip=true \
-    -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
-    -Dtag=$GIT_TAG -DautoVersionSubmodules=true \
-    -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
-    --batch-mode release:prepare
+  # Create release commits and push them to github
+  # NOTE: This is done "eagerly" i.e. we don't check if we can succesfully build
+  # or before we coin the release commit. This helps avoid races where
+  # other people add commits to this branch while we are in the middle of building.
+  cur_ver="${RELEASE_VERSION}-SNAPSHOT"
+  rel_ver="${RELEASE_VERSION}"
+  next_ver="${NEXT_VERSION}-SNAPSHOT"
+
+  old="^\( \{2,4\}\)<version>${cur_ver}<\/version>$"
+  new="\1<version>${rel_ver}<\/version>"
+  find . -name pom.xml | grep -v dev | xargs -I {} sed -i \
+    -e "s/${old}/${new}/" {}
+  find . -name package.scala | grep -v dev | xargs -I {} sed -i \
+    -e "s/${old}/${new}/" {}
+
+  git commit -a -m "Preparing Spark release $GIT_TAG"
+  echo "Creating tag $GIT_TAG at the head of $GIT_BRANCH"
+  git tag $GIT_TAG
+
+  old="^\( \{2,4\}\)<version>${rel_ver}<\/version>$"
+  new="\1<version>${next_ver}<\/version>"
+  find . -name pom.xml | grep -v dev | xargs -I {} sed -i \
+    -e "s/$old/$new/" {}
+  find . -name package.scala | grep -v dev | xargs -I {} sed -i \
+    -e "s/${old}/${new}/" {}
+  git commit -a -m "Preparing development version $next_ver"
+  git push origin $GIT_TAG
+  git push origin HEAD:$GIT_BRANCH
+  popd
+  rm -rf spark
+fi
 
-  mvn -DskipTests \
-    -Darguments="-DskipTests=true -Dmaven.javadoc.skip=true -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 -Dgpg.passphrase=${GPG_PASSPHRASE}" \
-    -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
-    -Dmaven.javadoc.skip=true \
+if [[ ! "$@" =~ --skip-publish ]]; then
+  git clone https://$ASF_USERNAME:$ASF_PASSWORD@git-wip-us.apache.org/repos/asf/spark.git
+  pushd spark
+  git checkout --force $GIT_TAG 
+  
+  # Using Nexus API documented here:
+  # https://support.sonatype.com/entries/39720203-Uploading-to-a-Staging-Repository-via-REST-API
+  echo "Creating Nexus staging repository"
+  repo_request="<promoteRequest><data><description>Apache Spark $GIT_TAG</description></data></promoteRequest>"
+  out=$(curl -X POST -d "$repo_request" -u $ASF_USERNAME:$ASF_PASSWORD \
+    -H "Content-Type:application/xml" -v \
+    $NEXUS_ROOT/profiles/$NEXUS_PROFILE/start)
+  staged_repo_id=$(echo $out | sed -e "s/.*\(orgapachespark-[0-9]\{4\}\).*/\1/")
+  echo "Created Nexus staging repository: $staged_repo_id"
+
+  rm -rf $SPARK_REPO
+
+  mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
     -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
-    release:perform
+    clean install
 
-  cd ..
+  ./dev/change-version-to-2.11.sh
+  
+  mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
+    -Dscala-2.11 -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
+    clean install
+
+  ./dev/change-version-to-2.10.sh
+
+  pushd $SPARK_REPO
+
+  # Remove any extra files generated during install
+  find . -type f |grep -v \.jar |grep -v \.pom | xargs rm
+
+  echo "Creating hash and signature files"
+  for file in $(find . -type f)
+  do
+    echo $GPG_PASSPHRASE | gpg --passphrase-fd 0 --output $file.asc --detach-sig --armour $file;
+    if [ $(command -v md5) ]; then
+      # Available on OS X; -q to keep only hash
+      md5 -q $file > $file.md5
+    else
+      # Available on Linux; cut to keep only hash
+      md5sum $file | cut -f1 -d' ' > $file.md5
+    fi
+    shasum -a 1 $file | cut -f1 -d' ' > $file.sha1
+  done
+
+  nexus_upload=$NEXUS_ROOT/deployByRepositoryId/$staged_repo_id
+  echo "Uplading files to $nexus_upload"
+  for file in $(find . -type f)
+  do
+    # strip leading ./
+    file_short=$(echo $file | sed -e "s/\.\///")
+    dest_url="$nexus_upload/org/apache/spark/$file_short"
+    echo "  Uploading $file_short"
+    curl -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url
+  done
+
+  echo "Closing nexus staging repository"
+  repo_request="<promoteRequest><data><stagedRepositoryId>$staged_repo_id</stagedRepositoryId><description>Apache Spark $GIT_TAG</description></data></promoteRequest>"
+  out=$(curl -X POST -d "$repo_request" -u $ASF_USERNAME:$ASF_PASSWORD \
+    -H "Content-Type:application/xml" -v \
+    $NEXUS_ROOT/profiles/$NEXUS_PROFILE/finish)
+  echo "Closed Nexus staging repository: $staged_repo_id"
+
+  popd
+  popd
   rm -rf spark
 fi
 
-# Source and binary tarballs
-echo "Packaging release tarballs"
-git clone https://git-wip-us.apache.org/repos/asf/spark.git
-cd spark
-git checkout --force $GIT_TAG
-release_hash=`git rev-parse HEAD`
-
-rm .gitignore
-rm -rf .git
-cd ..
-
-cp -r spark spark-$RELEASE_VERSION
-tar cvzf spark-$RELEASE_VERSION.tgz spark-$RELEASE_VERSION
-echo $GPG_PASSPHRASE | gpg --passphrase-fd 0 --armour --output spark-$RELEASE_VERSION.tgz.asc \
-  --detach-sig spark-$RELEASE_VERSION.tgz
-echo $GPG_PASSPHRASE | gpg --passphrase-fd 0 --print-md MD5 spark-$RELEASE_VERSION.tgz > \
-  spark-$RELEASE_VERSION.tgz.md5
-echo $GPG_PASSPHRASE | gpg --passphrase-fd 0 --print-md SHA512 spark-$RELEASE_VERSION.tgz > \
-  spark-$RELEASE_VERSION.tgz.sha
-rm -rf spark-$RELEASE_VERSION
-
-make_binary_release() {
-  NAME=$1
-  FLAGS=$2
-  cp -r spark spark-$RELEASE_VERSION-bin-$NAME
-  
-  cd spark-$RELEASE_VERSION-bin-$NAME
-  ./make-distribution.sh --name $NAME --tgz $FLAGS
+if [[ ! "$@" =~ --skip-package ]]; then
+  # Source and binary tarballs
+  echo "Packaging release tarballs"
+  git clone https://git-wip-us.apache.org/repos/asf/spark.git
+  cd spark
+  git checkout --force $GIT_TAG
+  release_hash=`git rev-parse HEAD`
+
+  rm .gitignore
+  rm -rf .git
   cd ..
-  cp spark-$RELEASE_VERSION-bin-$NAME/spark-$RELEASE_VERSION-bin-$NAME.tgz .
-  rm -rf spark-$RELEASE_VERSION-bin-$NAME
-
-  echo $GPG_PASSPHRASE | gpg --passphrase-fd 0 --armour \
-    --output spark-$RELEASE_VERSION-bin-$NAME.tgz.asc \
-    --detach-sig spark-$RELEASE_VERSION-bin-$NAME.tgz
-  echo $GPG_PASSPHRASE | gpg --passphrase-fd 0 --print-md \
-    MD5 spark-$RELEASE_VERSION-bin-$NAME.tgz > \
-    spark-$RELEASE_VERSION-bin-$NAME.tgz.md5
-  echo $GPG_PASSPHRASE | gpg --passphrase-fd 0 --print-md \
-    SHA512 spark-$RELEASE_VERSION-bin-$NAME.tgz > \
-    spark-$RELEASE_VERSION-bin-$NAME.tgz.sha
-}
-
-make_binary_release "hadoop1" "-Phive -Dhadoop.version=1.0.4" &
-make_binary_release "cdh4" "-Phive -Dhadoop.version=2.0.0-mr1-cdh4.2.0" &
-make_binary_release "hadoop2.3" "-Phadoop-2.3 -Phive -Pyarn" &
-make_binary_release "hadoop2.4" "-Phadoop-2.4 -Phive -Pyarn" &
-make_binary_release "hadoop2.4-without-hive" "-Phadoop-2.4 -Pyarn" &
-make_binary_release "mapr3" "-Pmapr3 -Phive" &
-make_binary_release "mapr4" "-Pmapr4 -Pyarn -Phive" &
-wait
-
-# Copy data
-echo "Copying release tarballs"
-rc_folder=spark-$RELEASE_VERSION-$RC_NAME
-ssh $USER_NAME@people.apache.org \
-  mkdir /home/$USER_NAME/public_html/$rc_folder
-scp spark-* \
-  $USER_NAME@people.apache.org:/home/$USER_NAME/public_html/$rc_folder/
-
-# Docs
-cd spark
-sbt/sbt clean
-cd docs
-# Compile docs with Java 7 to use nicer format
-JAVA_HOME=$JAVA_7_HOME PRODUCTION=1 jekyll build
-echo "Copying release documentation"
-rc_docs_folder=${rc_folder}-docs
-ssh $USER_NAME@people.apache.org \
-  mkdir /home/$USER_NAME/public_html/$rc_docs_folder
-rsync -r _site/* $USER_NAME@people.apache.org:/home/$USER_NAME/public_html/$rc_docs_folder
-
-echo "Release $RELEASE_VERSION completed:"
-echo "Git tag:\t $GIT_TAG"
-echo "Release commit:\t $release_hash"
-echo "Binary location:\t http://people.apache.org/~$USER_NAME/$rc_folder"
-echo "Doc location:\t http://people.apache.org/~$USER_NAME/$rc_docs_folder"
+
+  cp -r spark spark-$RELEASE_VERSION
+  tar cvzf spark-$RELEASE_VERSION.tgz spark-$RELEASE_VERSION
+  echo $GPG_PASSPHRASE | gpg --passphrase-fd 0 --armour --output spark-$RELEASE_VERSION.tgz.asc \
+    --detach-sig spark-$RELEASE_VERSION.tgz
+  echo $GPG_PASSPHRASE | gpg --passphrase-fd 0 --print-md MD5 spark-$RELEASE_VERSION.tgz > \
+    spark-$RELEASE_VERSION.tgz.md5
+  echo $GPG_PASSPHRASE | gpg --passphrase-fd 0 --print-md SHA512 spark-$RELEASE_VERSION.tgz > \
+    spark-$RELEASE_VERSION.tgz.sha
+  rm -rf spark-$RELEASE_VERSION
+
+  make_binary_release() {
+    NAME=$1
+    FLAGS=$2
+    cp -r spark spark-$RELEASE_VERSION-bin-$NAME
+    
+    cd spark-$RELEASE_VERSION-bin-$NAME
+
+    # TODO There should probably be a flag to make-distribution to allow 2.11 support
+    if [[ $FLAGS == *scala-2.11* ]]; then
+      ./dev/change-version-to-2.11.sh
+    fi
+
+    ./make-distribution.sh --name $NAME --tgz $FLAGS 2>&1 | tee ../binary-release-$NAME.log
+    cd ..
+    cp spark-$RELEASE_VERSION-bin-$NAME/spark-$RELEASE_VERSION-bin-$NAME.tgz .
+    rm -rf spark-$RELEASE_VERSION-bin-$NAME
+
+    echo $GPG_PASSPHRASE | gpg --passphrase-fd 0 --armour \
+      --output spark-$RELEASE_VERSION-bin-$NAME.tgz.asc \
+      --detach-sig spark-$RELEASE_VERSION-bin-$NAME.tgz
+    echo $GPG_PASSPHRASE | gpg --passphrase-fd 0 --print-md \
+      MD5 spark-$RELEASE_VERSION-bin-$NAME.tgz > \
+      spark-$RELEASE_VERSION-bin-$NAME.tgz.md5
+    echo $GPG_PASSPHRASE | gpg --passphrase-fd 0 --print-md \
+      SHA512 spark-$RELEASE_VERSION-bin-$NAME.tgz > \
+      spark-$RELEASE_VERSION-bin-$NAME.tgz.sha
+  }
+
+
+  make_binary_release "hadoop1" "-Phive -Phive-thriftserver -Dhadoop.version=1.0.4" &
+  make_binary_release "hadoop1-scala2.11" "-Phive -Dscala-2.11" &
+  make_binary_release "cdh4" "-Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" &
+  make_binary_release "hadoop2.3" "-Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn" &
+  make_binary_release "hadoop2.4" "-Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn" &
+  make_binary_release "mapr3" "-Pmapr3 -Phive -Phive-thriftserver" &
+  make_binary_release "mapr4" "-Pmapr4 -Pyarn -Phive -Phive-thriftserver" &
+  make_binary_release "hadoop2.4-without-hive" "-Phadoop-2.4 -Pyarn" &
+  wait
+
+  # Copy data
+  echo "Copying release tarballs"
+  rc_folder=spark-$RELEASE_VERSION-$RC_NAME
+  ssh $ASF_USERNAME@people.apache.org \
+    mkdir /home/$ASF_USERNAME/public_html/$rc_folder
+  scp spark-* \
+    $ASF_USERNAME@people.apache.org:/home/$ASF_USERNAME/public_html/$rc_folder/
+
+  # Docs
+  cd spark
+  sbt/sbt clean
+  cd docs
+  # Compile docs with Java 7 to use nicer format
+  JAVA_HOME=$JAVA_7_HOME PRODUCTION=1 jekyll build
+  echo "Copying release documentation"
+  rc_docs_folder=${rc_folder}-docs
+  ssh $ASF_USERNAME@people.apache.org \
+    mkdir /home/$ASF_USERNAME/public_html/$rc_docs_folder
+  rsync -r _site/* $ASF_USERNAME@people.apache.org:/home/$ASF_USERNAME/public_html/$rc_docs_folder
+
+  echo "Release $RELEASE_VERSION completed:"
+  echo "Git tag:\t $GIT_TAG"
+  echo "Release commit:\t $release_hash"
+  echo "Binary location:\t http://people.apache.org/~$ASF_USERNAME/$rc_folder"
+  echo "Doc location:\t http://people.apache.org/~$ASF_USERNAME/$rc_docs_folder"
+fi
diff --git a/dev/create-release/generate-contributors.py b/dev/create-release/generate-contributors.py
new file mode 100755
index 0000000000000..8aaa250bd7e29
--- /dev/null
+++ b/dev/create-release/generate-contributors.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This script automates the process of creating release notes.
+
+import os
+import re
+import sys
+
+from releaseutils import *
+
+# You must set the following before use!
+JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
+RELEASE_TAG = os.environ.get("RELEASE_TAG", "v1.2.0-rc2")
+PREVIOUS_RELEASE_TAG = os.environ.get("PREVIOUS_RELEASE_TAG", "v1.1.0")
+
+# If the release tags are not provided, prompt the user to provide them
+while not tag_exists(RELEASE_TAG):
+    RELEASE_TAG = raw_input("Please provide a valid release tag: ")
+while not tag_exists(PREVIOUS_RELEASE_TAG):
+    print "Please specify the previous release tag."
+    PREVIOUS_RELEASE_TAG = raw_input(\
+      "For instance, if you are releasing v1.2.0, you should specify v1.1.0: ")
+
+# Gather commits found in the new tag but not in the old tag.
+# This filters commits based on both the git hash and the PR number.
+# If either is present in the old tag, then we ignore the commit.
+print "Gathering new commits between tags %s and %s" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG)
+release_commits = get_commits(RELEASE_TAG)
+previous_release_commits = get_commits(PREVIOUS_RELEASE_TAG)
+previous_release_hashes = set()
+previous_release_prs = set()
+for old_commit in previous_release_commits:
+    previous_release_hashes.add(old_commit.get_hash())
+    if old_commit.get_pr_number():
+        previous_release_prs.add(old_commit.get_pr_number())
+new_commits = []
+for this_commit in release_commits:
+    this_hash = this_commit.get_hash()
+    this_pr_number = this_commit.get_pr_number()
+    if this_hash in previous_release_hashes:
+        continue
+    if this_pr_number and this_pr_number in previous_release_prs:
+        continue
+    new_commits.append(this_commit)
+if not new_commits:
+    sys.exit("There are no new commits between %s and %s!" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG))
+
+# Prompt the user for confirmation that the commit range is correct
+print "\n=================================================================================="
+print "JIRA server: %s" % JIRA_API_BASE
+print "Release tag: %s" % RELEASE_TAG
+print "Previous release tag: %s" % PREVIOUS_RELEASE_TAG
+print "Number of commits in this range: %s" % len(new_commits)
+print
+def print_indented(_list):
+    for x in _list: print "  %s" % x
+if yesOrNoPrompt("Show all commits?"):
+    print_indented(new_commits)
+print "==================================================================================\n"
+if not yesOrNoPrompt("Does this look correct?"):
+    sys.exit("Ok, exiting")
+
+# Filter out special commits
+releases = []
+maintenance = []
+reverts = []
+nojiras = []
+filtered_commits = []
+def is_release(commit_title):
+    return re.findall("\[release\]", commit_title.lower()) or\
+      "preparing spark release" in commit_title.lower() or\
+      "preparing development version" in commit_title.lower() or\
+      "CHANGES.txt" in commit_title
+def is_maintenance(commit_title):
+    return "maintenance" in commit_title.lower() or\
+      "manually close" in commit_title.lower()
+def has_no_jira(commit_title):
+    return not re.findall("SPARK-[0-9]+", commit_title.upper())
+def is_revert(commit_title):
+    return "revert" in commit_title.lower()
+def is_docs(commit_title):
+    return re.findall("docs*", commit_title.lower()) or\
+      "programming guide" in commit_title.lower()
+for c in new_commits:
+    t = c.get_title()
+    if not t: continue
+    elif is_release(t): releases.append(c)
+    elif is_maintenance(t): maintenance.append(c)
+    elif is_revert(t): reverts.append(c)
+    elif is_docs(t): filtered_commits.append(c) # docs may not have JIRA numbers
+    elif has_no_jira(t): nojiras.append(c)
+    else: filtered_commits.append(c)
+
+# Warn against ignored commits
+if releases or maintenance or reverts or nojiras:
+    print "\n=================================================================================="
+    if releases: print "Found %d release commits" % len(releases)
+    if maintenance: print "Found %d maintenance commits" % len(maintenance)
+    if reverts: print "Found %d revert commits" % len(reverts)
+    if nojiras: print "Found %d commits with no JIRA" % len(nojiras)
+    print "* Warning: these commits will be ignored.\n"
+    if yesOrNoPrompt("Show ignored commits?"):
+        if releases: print "Release (%d)" % len(releases); print_indented(releases)
+        if maintenance: print "Maintenance (%d)" % len(maintenance); print_indented(maintenance)
+        if reverts: print "Revert (%d)" % len(reverts); print_indented(reverts)
+        if nojiras: print "No JIRA (%d)" % len(nojiras); print_indented(nojiras)
+    print "==================== Warning: the above commits will be ignored ==================\n"
+prompt_msg = "%d commits left to process after filtering. Ok to proceed?" % len(filtered_commits)
+if not yesOrNoPrompt(prompt_msg):
+    sys.exit("Ok, exiting.")
+
+# Keep track of warnings to tell the user at the end
+warnings = []
+
+# Mapping from the invalid author name to its associated JIRA issues
+# E.g. andrewor14 -> set("SPARK-2413", "SPARK-3551", "SPARK-3471")
+invalid_authors = {}
+
+# Populate a map that groups issues and components by author
+# It takes the form: Author name -> { Contribution type -> Spark components }
+# For instance,
+# {
+#   'Andrew Or': {
+#     'bug fixes': ['windows', 'core', 'web ui'],
+#     'improvements': ['core']
+#   },
+#   'Tathagata Das' : {
+#     'bug fixes': ['streaming']
+#     'new feature': ['streaming']
+#   }
+# }
+#
+author_info = {}
+jira_options = { "server": JIRA_API_BASE }
+jira_client = JIRA(options = jira_options)
+print "\n=========================== Compiling contributor list ==========================="
+for commit in filtered_commits:
+    _hash = commit.get_hash()
+    title = commit.get_title()
+    issues = re.findall("SPARK-[0-9]+", title.upper())
+    author = commit.get_author()
+    date = get_date(_hash)
+    # If the author name is invalid, keep track of it along
+    # with all associated issues so we can translate it later
+    if is_valid_author(author):
+        author = capitalize_author(author)
+    else:
+        if author not in invalid_authors:
+            invalid_authors[author] = set()
+        for issue in issues:
+            invalid_authors[author].add(issue)
+    # Parse components from the commit title, if any
+    commit_components = find_components(title, _hash)
+    # Populate or merge an issue into author_info[author]
+    def populate(issue_type, components):
+        components = components or [CORE_COMPONENT] # assume core if no components provided
+        if author not in author_info:
+            author_info[author] = {}
+        if issue_type not in author_info[author]:
+            author_info[author][issue_type] = set()
+        for component in components:
+            author_info[author][issue_type].add(component)
+    # Find issues and components associated with this commit
+    for issue in issues:
+        jira_issue = jira_client.issue(issue)
+        jira_type = jira_issue.fields.issuetype.name
+        jira_type = translate_issue_type(jira_type, issue, warnings)
+        jira_components = [translate_component(c.name, _hash, warnings)\
+          for c in jira_issue.fields.components]
+        all_components = set(jira_components + commit_components)
+        populate(jira_type, all_components)
+    # For docs without an associated JIRA, manually add it ourselves
+    if is_docs(title) and not issues:
+        populate("documentation", commit_components)
+    print "  Processed commit %s authored by %s on %s" % (_hash, author, date)
+print "==================================================================================\n"
+
+# Write to contributors file ordered by author names
+# Each line takes the format " * Author name -- semi-colon delimited contributions"
+# e.g. * Andrew Or -- Bug fixes in Windows, Core, and Web UI; improvements in Core
+# e.g. * Tathagata Das -- Bug fixes and new features in Streaming
+contributors_file = open(contributors_file_name, "w")
+authors = author_info.keys()
+authors.sort()
+for author in authors:
+    contribution = ""
+    components = set()
+    issue_types = set()
+    for issue_type, comps in author_info[author].items():
+        components.update(comps)
+        issue_types.add(issue_type)
+    # If there is only one component, mention it only once
+    # e.g. Bug fixes, improvements in MLlib
+    if len(components) == 1:
+        contribution = "%s in %s" % (nice_join(issue_types), next(iter(components)))
+    # Otherwise, group contributions by issue types instead of modules
+    # e.g. Bug fixes in MLlib, Core, and Streaming; documentation in YARN
+    else:
+        contributions = ["%s in %s" % (issue_type, nice_join(comps)) \
+          for issue_type, comps in author_info[author].items()]
+        contribution = "; ".join(contributions)
+    # Do not use python's capitalize() on the whole string to preserve case
+    assert contribution
+    contribution = contribution[0].capitalize() + contribution[1:]
+    # If the author name is invalid, use an intermediate format that
+    # can be translated through translate-contributors.py later
+    # E.g. andrewor14/SPARK-3425/SPARK-1157/SPARK-6672
+    if author in invalid_authors and invalid_authors[author]:
+        author = author + "/" + "/".join(invalid_authors[author])
+    line = " * %s -- %s" % (author, contribution)
+    contributors_file.write(line + "\n")
+contributors_file.close()
+print "Contributors list is successfully written to %s!" % contributors_file_name
+
+# Prompt the user to translate author names if necessary
+if invalid_authors:
+    warnings.append("Found the following invalid authors:")
+    for a in invalid_authors:
+        warnings.append("\t%s" % a)
+    warnings.append("Please run './translate-contributors.py' to translate them.")
+
+# Log any warnings encountered in the process
+if warnings:
+    print "\n============ Warnings encountered while creating the contributor list ============"
+    for w in warnings: print w
+    print "Please correct these in the final contributors list at %s." % contributors_file_name
+    print "==================================================================================\n"
+
diff --git a/dev/create-release/known_translations b/dev/create-release/known_translations
new file mode 100644
index 0000000000000..b74e4ee8a330b
--- /dev/null
+++ b/dev/create-release/known_translations
@@ -0,0 +1,59 @@
+# This is a mapping of names to be translated through translate-contributors.py
+# The format expected on each line should be: <old name> - <new name>
+CodingCat - Nan Zhu
+CrazyJvm - Chao Chen
+EugenCepoi - Eugen Cepoi
+GraceH - Jie Huang
+JerryLead - Lijie Xu
+Leolh - Liu Hao
+Lewuathe - Kai Sasaki
+RongGu - Rong Gu
+Shiti - Shiti Saxena
+Victsm - Min Shen
+WangTaoTheTonic - Wang Tao
+XuTingjun - Tingjun Xu
+YanTangZhai - Yantang Zhai
+alexdebrie - Alex DeBrie
+alokito - Alok Saldanha
+anantasty - Anant Asthana
+andrewor14 - Andrew Or
+aniketbhatnagar - Aniket Bhatnagar
+arahuja - Arun Ahuja
+brkyvz - Burak Yavuz
+chesterxgchen - Chester Chen
+chiragaggarwal - Chirag Aggarwal
+chouqin - Qiping Li
+cocoatomo - Tomohiko K.
+coderfi - Fairiz Azizi
+coderxiang - Shuo Xiang
+davies - Davies Liu
+epahomov - Egor Pahomov
+falaki - Hossein Falaki
+freeman-lab - Jeremy Freeman
+industrial-sloth - Jascha Swisher
+jackylk - Jacky Li
+jayunit100 - Jay Vyas
+jerryshao - Saisai Shao
+jkbradley - Joseph Bradley
+lianhuiwang - Lianhui Wang
+lirui-intel - Rui Li
+luluorta - Lu Lu
+luogankun - Gankun Luo
+maji2014 - Derek Ma
+mccheah - Matthew Cheah
+mengxr - Xiangrui Meng
+nartz - Nathan Artz
+odedz - Oded Zimerman
+ravipesala - Ravindra Pesala
+roxchkplusony - Victor Tso
+scwf - Wang Fei
+shimingfei - Shiming Fei
+surq - Surong Quan
+suyanNone - Su Yan
+tedyu - Ted Yu
+tigerquoll - Dale Richardson
+wangxiaojing - Xiaojing Wang
+watermen - Yadong Qi
+witgo - Guoqiang Li
+xinyunh - Xinyun Huang
+zsxwing - Shixiong Zhu
diff --git a/dev/create-release/releaseutils.py b/dev/create-release/releaseutils.py
new file mode 100755
index 0000000000000..26221b270394e
--- /dev/null
+++ b/dev/create-release/releaseutils.py
@@ -0,0 +1,256 @@
+#!/usr/bin/env python
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This file contains helper methods used in creating a release.
+
+import re
+import sys
+from subprocess import Popen, PIPE
+
+try:
+    from jira.client import JIRA
+    from jira.exceptions import JIRAError
+except ImportError:
+    print "This tool requires the jira-python library"
+    print "Install using 'sudo pip install jira-python'"
+    sys.exit(-1)
+
+try:
+    from github import Github
+    from github import GithubException
+except ImportError:
+    print "This tool requires the PyGithub library"
+    print "Install using 'sudo pip install PyGithub'"
+    sys.exit(-1)
+
+try:
+    import unidecode
+except ImportError:
+    print "This tool requires the unidecode library to decode obscure github usernames"
+    print "Install using 'sudo pip install unidecode'"
+    sys.exit(-1)
+
+# Contributors list file name
+contributors_file_name = "contributors.txt"
+
+# Prompt the user to answer yes or no until they do so
+def yesOrNoPrompt(msg):
+    response = raw_input("%s [y/n]: " % msg)
+    while response != "y" and response != "n":
+        return yesOrNoPrompt(msg)
+    return response == "y"
+
+# Utility functions run git commands (written with Git 1.8.5)
+def run_cmd(cmd): return Popen(cmd, stdout=PIPE).communicate()[0]
+def run_cmd_error(cmd): return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1]
+def get_date(commit_hash):
+    return run_cmd(["git", "show", "--quiet", "--pretty=format:%cd", commit_hash])
+def tag_exists(tag):
+    stderr = run_cmd_error(["git", "show", tag])
+    return "error" not in stderr
+
+# A type-safe representation of a commit
+class Commit:
+    def __init__(self, _hash, author, title, pr_number = None):
+        self._hash = _hash
+        self.author = author
+        self.title = title
+        self.pr_number = pr_number
+    def get_hash(self): return self._hash
+    def get_author(self): return self.author
+    def get_title(self): return self.title
+    def get_pr_number(self): return self.pr_number
+    def __str__(self):
+        closes_pr = "(Closes #%s)" % self.pr_number if self.pr_number else ""
+        return "%s %s %s %s" % (self._hash, self.author, self.title, closes_pr)
+
+# Return all commits that belong to the specified tag.
+#
+# Under the hood, this runs a `git log` on that tag and parses the fields
+# from the command output to construct a list of Commit objects. Note that
+# because certain fields reside in the commit description and cannot be parsed
+# through the Github API itself, we need to do some intelligent regex parsing
+# to extract those fields.
+#
+# This is written using Git 1.8.5.
+def get_commits(tag):
+    commit_start_marker = "|=== COMMIT START MARKER ===|"
+    commit_end_marker = "|=== COMMIT END MARKER ===|"
+    field_end_marker = "|=== COMMIT FIELD END MARKER ===|"
+    log_format =\
+        commit_start_marker + "%h" +\
+        field_end_marker + "%an" +\
+        field_end_marker + "%s" +\
+        commit_end_marker + "%b"
+    output = run_cmd(["git", "log", "--quiet", "--pretty=format:" + log_format, tag])
+    commits = []
+    raw_commits = [c for c in output.split(commit_start_marker) if c]
+    for commit in raw_commits:
+        if commit.count(commit_end_marker) != 1:
+            print "Commit end marker not found in commit: "
+            for line in commit.split("\n"): print line
+            sys.exit(1)
+        # Separate commit digest from the body
+        # From the digest we extract the hash, author and the title
+        # From the body, we extract the PR number and the github username
+        [commit_digest, commit_body] = commit.split(commit_end_marker)
+        if commit_digest.count(field_end_marker) != 2:
+            sys.exit("Unexpected format in commit: %s" % commit_digest)
+        [_hash, author, title] = commit_digest.split(field_end_marker)
+        # The PR number and github username is in the commit message
+        # itself and cannot be accessed through any Github API
+        pr_number = None
+        match = re.search("Closes #([0-9]+) from ([^/\\s]+)/", commit_body)
+        if match:
+            [pr_number, github_username] = match.groups()
+            # If the author name is not valid, use the github
+            # username so we can translate it properly later
+            if not is_valid_author(author):
+                author = github_username
+        # Guard against special characters
+        author = unidecode.unidecode(unicode(author, "UTF-8")).strip()
+        commit = Commit(_hash, author, title, pr_number)
+        commits.append(commit)
+    return commits
+
+# Maintain a mapping for translating issue types to contributions in the release notes
+# This serves an additional function of warning the user against unknown issue types
+# Note: This list is partially derived from this link:
+# https://issues.apache.org/jira/plugins/servlet/project-config/SPARK/issuetypes
+# Keep these in lower case
+known_issue_types = {
+    "bug": "bug fixes",
+    "build": "build fixes",
+    "dependency upgrade": "build fixes",
+    "improvement": "improvements",
+    "new feature": "new features",
+    "documentation": "documentation",
+    "test": "test",
+    "task": "improvement",
+    "sub-task": "improvement"
+}
+
+# Maintain a mapping for translating component names when creating the release notes
+# This serves an additional function of warning the user against unknown components
+# Note: This list is largely derived from this link:
+# https://issues.apache.org/jira/plugins/servlet/project-config/SPARK/components
+CORE_COMPONENT = "Core"
+known_components = {
+    "block manager": CORE_COMPONENT,
+    "build": CORE_COMPONENT,
+    "deploy": CORE_COMPONENT,
+    "documentation": CORE_COMPONENT,
+    "ec2": "EC2",
+    "examples": CORE_COMPONENT,
+    "graphx": "GraphX",
+    "input/output": CORE_COMPONENT,
+    "java api": "Java API",
+    "mesos": "Mesos",
+    "ml": "MLlib",
+    "mllib": "MLlib",
+    "project infra": "Project Infra",
+    "pyspark": "PySpark",
+    "shuffle": "Shuffle",
+    "spark core": CORE_COMPONENT,
+    "spark shell": CORE_COMPONENT,
+    "sql": "SQL",
+    "streaming": "Streaming",
+    "web ui": "Web UI",
+    "windows": "Windows",
+    "yarn": "YARN"
+}
+
+# Translate issue types using a format appropriate for writing contributions
+# If an unknown issue type is encountered, warn the user
+def translate_issue_type(issue_type, issue_id, warnings):
+    issue_type = issue_type.lower()
+    if issue_type in known_issue_types:
+        return known_issue_types[issue_type]
+    else:
+        warnings.append("Unknown issue type \"%s\" (see %s)" % (issue_type, issue_id))
+        return issue_type
+
+# Translate component names using a format appropriate for writing contributions
+# If an unknown component is encountered, warn the user
+def translate_component(component, commit_hash, warnings):
+    component = component.lower()
+    if component in known_components:
+        return known_components[component]
+    else:
+        warnings.append("Unknown component \"%s\" (see %s)" % (component, commit_hash))
+        return component
+
+# Parse components in the commit message
+# The returned components are already filtered and translated
+def find_components(commit, commit_hash):
+    components = re.findall("\[\w*\]", commit.lower())
+    components = [translate_component(c, commit_hash)\
+        for c in components if c in known_components]
+    return components
+
+# Join a list of strings in a human-readable manner
+# e.g. ["Juice"] -> "Juice"
+# e.g. ["Juice", "baby"] -> "Juice and baby"
+# e.g. ["Juice", "baby", "moon"] -> "Juice, baby, and moon"
+def nice_join(str_list):
+    str_list = list(str_list) # sometimes it's a set
+    if not str_list:
+        return ""
+    elif len(str_list) == 1:
+        return next(iter(str_list))
+    elif len(str_list) == 2:
+        return " and ".join(str_list)
+    else:
+        return ", ".join(str_list[:-1]) + ", and " + str_list[-1]
+
+# Return the full name of the specified user on Github
+# If the user doesn't exist, return None
+def get_github_name(author, github_client):
+    if github_client:
+        try:
+            return github_client.get_user(author).name
+        except GithubException as e:
+            # If this is not a "not found" exception
+            if e.status != 404:
+                raise e
+    return None
+
+# Return the full name of the specified user on JIRA
+# If the user doesn't exist, return None
+def get_jira_name(author, jira_client):
+    if jira_client:
+        try:
+            return jira_client.user(author).displayName
+        except JIRAError as e:
+            # If this is not a "not found" exception
+            if e.status_code != 404:
+                raise e
+    return None
+
+# Return whether the given name is in the form <First Name><space><Last Name>
+def is_valid_author(author):
+    if not author: return False
+    return " " in author and not re.findall("[0-9]", author)
+
+# Capitalize the first letter of each word in the given author name
+def capitalize_author(author):
+    if not author: return None
+    words = author.split(" ")
+    words = [w[0].capitalize() + w[1:] for w in words if w]
+    return " ".join(words)
+
diff --git a/dev/create-release/translate-contributors.py b/dev/create-release/translate-contributors.py
new file mode 100755
index 0000000000000..86fa02d87b9a0
--- /dev/null
+++ b/dev/create-release/translate-contributors.py
@@ -0,0 +1,253 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script translates invalid authors in the contributors list generated
+# by generate-contributors.py. When the script encounters an author name that
+# is considered invalid, it searches Github and JIRA in an attempt to search
+# for replacements. This tool runs in two modes:
+#
+# (1) Interactive mode: For each invalid author name, this script presents
+# all candidate replacements to the user and awaits user response. In this
+# mode, the user may also input a custom name. This is the default.
+#
+# (2) Non-interactive mode: For each invalid author name, this script replaces
+# the name with the first valid candidate it can find. If there is none, it
+# uses the original name. This can be enabled through the --non-interactive flag.
+
+import os
+import sys
+
+from releaseutils import *
+
+# You must set the following before use!
+JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
+JIRA_USERNAME = os.environ.get("JIRA_USERNAME", None)
+JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", None)
+GITHUB_API_TOKEN = os.environ.get("GITHUB_API_TOKEN", None)
+if not JIRA_USERNAME or not JIRA_PASSWORD:
+    sys.exit("Both JIRA_USERNAME and JIRA_PASSWORD must be set")
+if not GITHUB_API_TOKEN:
+    sys.exit("GITHUB_API_TOKEN must be set")
+
+# Write new contributors list to <old_file_name>.final
+if not os.path.isfile(contributors_file_name):
+    print "Contributors file %s does not exist!" % contributors_file_name
+    print "Have you run ./generate-contributors.py yet?"
+    sys.exit(1)
+contributors_file = open(contributors_file_name, "r")
+warnings = []
+
+# In non-interactive mode, this script will choose the first replacement that is valid
+INTERACTIVE_MODE = True
+if len(sys.argv) > 1:
+    options = set(sys.argv[1:])
+    if "--non-interactive" in options:
+        INTERACTIVE_MODE = False
+if INTERACTIVE_MODE:
+    print "Running in interactive mode. To disable this, provide the --non-interactive flag."
+
+# Setup Github and JIRA clients
+jira_options = { "server": JIRA_API_BASE }
+jira_client = JIRA(options = jira_options, basic_auth = (JIRA_USERNAME, JIRA_PASSWORD))
+github_client = Github(GITHUB_API_TOKEN)
+
+# Load known author translations that are cached locally
+known_translations = {}
+known_translations_file_name = "known_translations"
+known_translations_file = open(known_translations_file_name, "r")
+for line in known_translations_file:
+    if line.startswith("#"): continue
+    [old_name, new_name] = line.strip("\n").split(" - ")
+    known_translations[old_name] = new_name
+known_translations_file.close()
+
+# Open again in case the user adds new mappings
+known_translations_file = open(known_translations_file_name, "a")
+
+# Generate candidates for the given author. This should only be called if the given author
+# name does not represent a full name as this operation is somewhat expensive. Under the
+# hood, it makes several calls to the Github and JIRA API servers to find the candidates.
+#
+# This returns a list of (candidate name, source) 2-tuples. E.g.
+# [
+#   (NOT_FOUND, "No full name found for Github user andrewor14"),
+#   ("Andrew Or", "Full name of JIRA user andrewor14"),
+#   ("Andrew Orso", "Full name of SPARK-1444 assignee andrewor14"),
+#   ("Andrew Ordall", "Full name of SPARK-1663 assignee andrewor14"),
+#   (NOT_FOUND, "No assignee found for SPARK-1763")
+# ]
+NOT_FOUND = "Not found"
+def generate_candidates(author, issues):
+    candidates = []
+    # First check for full name of Github user
+    github_name = get_github_name(author, github_client)
+    if github_name:
+        candidates.append((github_name, "Full name of Github user %s" % author))
+    else:
+        candidates.append((NOT_FOUND, "No full name found for Github user %s" % author))
+    # Then do the same for JIRA user
+    jira_name = get_jira_name(author, jira_client)
+    if jira_name:
+        candidates.append((jira_name, "Full name of JIRA user %s" % author))
+    else:
+        candidates.append((NOT_FOUND, "No full name found for JIRA user %s" % author))
+    # Then do the same for the assignee of each of the associated JIRAs
+    # Note that a given issue may not have an assignee, or the assignee may not have a full name
+    for issue in issues:
+        try:
+            jira_issue = jira_client.issue(issue)
+        except JIRAError as e:
+            # Do not exit just because an issue is not found!
+            if e.status_code == 404:
+                warnings.append("Issue %s not found!" % issue)
+                continue
+            raise e
+        jira_assignee = jira_issue.fields.assignee
+        if jira_assignee:
+            user_name = jira_assignee.name
+            display_name = jira_assignee.displayName
+            if display_name:
+                candidates.append((display_name, "Full name of %s assignee %s" % (issue, user_name)))
+            else:
+                candidates.append((NOT_FOUND, "No full name found for %s assignee %" % (issue, user_name)))
+        else:
+            candidates.append((NOT_FOUND, "No assignee found for %s" % issue))
+    # Guard against special characters in candidate names
+    # Note that the candidate name may already be in unicode (JIRA returns this)
+    for i, (candidate, source) in enumerate(candidates):
+        try:
+            candidate = unicode(candidate, "UTF-8")
+        except TypeError:
+            # already in unicode
+            pass
+        candidate = unidecode.unidecode(candidate).strip()
+        candidates[i] = (candidate, source)
+    return candidates
+
+# Translate each invalid author by searching for possible candidates from Github and JIRA
+# In interactive mode, this script presents the user with a list of choices and have the user
+# select from this list. Additionally, the user may also choose to enter a custom name.
+# In non-interactive mode, this script picks the first valid author name from the candidates
+# If no such name exists, the original name is used (without the JIRA numbers).
+print "\n========================== Translating contributor list =========================="
+lines = contributors_file.readlines()
+contributions = []
+for i, line in enumerate(lines):
+    temp_author = line.strip(" * ").split(" -- ")[0]
+    print "Processing author %s (%d/%d)" % (temp_author, i + 1, len(lines))
+    if not temp_author:
+        error_msg = "    ERROR: Expected the following format \" * <author> -- <contributions>\"\n"
+        error_msg += "    ERROR: Actual = %s" % line
+        print error_msg
+        warnings.append(error_msg)
+        contributions.append(line)
+        continue
+    author = temp_author.split("/")[0]
+    # Use the local copy of known translations where possible
+    if author in known_translations:
+        line = line.replace(temp_author, known_translations[author])
+    elif not is_valid_author(author):
+        new_author = author
+        issues = temp_author.split("/")[1:]
+        candidates = generate_candidates(author, issues)
+        # Print out potential replacement candidates along with the sources, e.g.
+        #   [X] No full name found for Github user andrewor14
+        #   [X] No assignee found for SPARK-1763
+        #   [0] Andrew Or - Full name of JIRA user andrewor14
+        #   [1] Andrew Orso - Full name of SPARK-1444 assignee andrewor14
+        #   [2] Andrew Ordall - Full name of SPARK-1663 assignee andrewor14
+        #   [3] andrewor14 - Raw Github username
+        #   [4] Custom
+        candidate_names = []
+        bad_prompts = [] # Prompts that can't actually be selected; print these first.
+        good_prompts = [] # Prompts that contain valid choices
+        for candidate, source in candidates:
+            if candidate == NOT_FOUND:
+                bad_prompts.append("    [X] %s" % source)
+            else:
+                index = len(candidate_names)
+                candidate_names.append(candidate)
+                good_prompts.append("    [%d] %s - %s" % (index, candidate, source))
+        raw_index = len(candidate_names)
+        custom_index = len(candidate_names) + 1
+        for p in bad_prompts: print p
+        if bad_prompts: print "    ---"
+        for p in good_prompts: print p
+        # In interactive mode, additionally provide "custom" option and await user response
+        if INTERACTIVE_MODE:
+            print "    [%d] %s - Raw Github username" % (raw_index, author)
+            print "    [%d] Custom" % custom_index
+            response = raw_input("    Your choice: ")
+            last_index = custom_index
+            while not response.isdigit() or int(response) > last_index:
+                response = raw_input("    Please enter an integer between 0 and %d: " % last_index)
+            response = int(response)
+            if response == custom_index:
+                new_author = raw_input("    Please type a custom name for this author: ")
+            elif response != raw_index:
+                new_author = candidate_names[response]
+        # In non-interactive mode, just pick the first candidate
+        else:
+            valid_candidate_names = [name for name, _ in candidates\
+                if is_valid_author(name) and name != NOT_FOUND]
+            if valid_candidate_names:
+                new_author = valid_candidate_names[0]
+        # Finally, capitalize the author and replace the original one with it
+        # If the final replacement is still invalid, log a warning
+        if is_valid_author(new_author):
+            new_author = capitalize_author(new_author)
+        else:
+            warnings.append("Unable to find a valid name %s for author %s" % (author, temp_author))
+        print "    * Replacing %s with %s" % (author, new_author)
+        # If we are in interactive mode, prompt the user whether we want to remember this new mapping
+        if INTERACTIVE_MODE and\
+          author not in known_translations and\
+          yesOrNoPrompt("    Add mapping %s -> %s to known translations file?" % (author, new_author)):
+            known_translations_file.write("%s - %s\n" % (author, new_author))
+            known_translations_file.flush()
+        line = line.replace(temp_author, author)
+    contributions.append(line)
+print "==================================================================================\n"
+contributors_file.close()
+known_translations_file.close()
+
+# Sort the contributions before writing them to the new file.
+# Additionally, check if there are any duplicate author rows.
+# This could happen if the same user has both a valid full
+# name (e.g. Andrew Or) and an invalid one (andrewor14).
+# If so, warn the user about this at the end.
+contributions.sort()
+all_authors = set()
+new_contributors_file_name = contributors_file_name + ".final"
+new_contributors_file = open(new_contributors_file_name, "w")
+for line in contributions:
+    author = line.strip(" * ").split(" -- ")[0]
+    if author in all_authors:
+        warnings.append("Detected duplicate author name %s. Please merge these manually." % author)
+    all_authors.add(author)
+    new_contributors_file.write(line)
+new_contributors_file.close()
+
+print "Translated contributors list successfully written to %s!" % new_contributors_file_name
+
+# Log any warnings encountered in the process
+if warnings:
+    print "\n========== Warnings encountered while translating the contributor list ==========="
+    for w in warnings: print w
+    print "Please manually correct these in the final contributors list at %s." % new_contributors_file_name
+    print "==================================================================================\n"
+
diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index 02ac20984add9..dfa924d2aa0ba 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -214,15 +214,10 @@ def fix_version_from_branch(branch, versions):
         return filter(lambda x: x.name.startswith(branch_ver), versions)[-1]
 
 
-def resolve_jira(title, merge_branches, comment):
+def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
     asf_jira = jira.client.JIRA({'server': JIRA_API_BASE},
                                 basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
 
-    default_jira_id = ""
-    search = re.findall("SPARK-[0-9]{4,5}", title)
-    if len(search) > 0:
-        default_jira_id = search[0]
-
     jira_id = raw_input("Enter a JIRA id [%s]: " % default_jira_id)
     if jira_id == "":
         jira_id = default_jira_id
@@ -280,6 +275,15 @@ def get_version_json(version_str):
     print "Succesfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions)
 
 
+def resolve_jira_issues(title, merge_branches, comment):
+    jira_ids = re.findall("SPARK-[0-9]{4,5}", title)
+
+    if len(jira_ids) == 0:
+        resolve_jira_issue(merge_branches, comment)
+    for jira_id in jira_ids:
+        resolve_jira_issue(merge_branches, comment, jira_id)
+
+
 branches = get_json("%s/branches" % GITHUB_API_BASE)
 branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches])
 # Assumes branch names can be sorted lexicographically
@@ -338,7 +342,7 @@ def get_version_json(version_str):
     if JIRA_USERNAME and JIRA_PASSWORD:
         continue_maybe("Would you like to update an associated JIRA?")
         jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num)
-        resolve_jira(title, merged_refs, jira_comment)
+        resolve_jira_issues(title, merged_refs, jira_comment)
     else:
         print "JIRA_USERNAME and JIRA_PASSWORD not set"
         print "Exiting without trying to close the associated JIRA."
diff --git a/dev/mima b/dev/mima
index 40603166c21ae..bed5cd042634e 100755
--- a/dev/mima
+++ b/dev/mima
@@ -24,13 +24,13 @@ set -e
 FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 cd "$FWDIR"
 
-echo -e "q\n" | sbt/sbt oldDeps/update
+echo -e "q\n" | build/sbt oldDeps/update
 rm -f .generated-mima*
 
-# Generate Mima Ignore is called twice, first with latest built jars 
+# Generate Mima Ignore is called twice, first with latest built jars
 # on the classpath and then again with previous version jars on the classpath.
 # Because of a bug in GenerateMIMAIgnore that when old jars are ahead on classpath
-# it did not process the new classes (which are in assembly jar). 
+# it did not process the new classes (which are in assembly jar).
 ./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
 
 export SPARK_CLASSPATH="`find lib_managed \( -name '*spark*jar' -a -type f \) | tr "\\n" ":"`"
@@ -38,7 +38,7 @@ echo "SPARK_CLASSPATH=$SPARK_CLASSPATH"
 
 ./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
 
-echo -e "q\n" | sbt/sbt mima-report-binary-issues | grep -v -e "info.*Resolving"
+echo -e "q\n" | build/sbt mima-report-binary-issues | grep -v -e "info.*Resolving"
 ret_val=$?
 
 if [ $ret_val != 0 ]; then
diff --git a/dev/run-tests b/dev/run-tests
index f47fcf66ff7e7..2257a566bb1bb 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -21,8 +21,10 @@
 FWDIR="$(cd "`dirname $0`"/..; pwd)"
 cd "$FWDIR"
 
-# Remove work directory
+# Clean up work directory and caches
 rm -rf ./work
+rm -rf ~/.ivy2/local/org.apache.spark
+rm -rf ~/.ivy2/cache/org.apache.spark
 
 source "$FWDIR/dev/run-tests-codes.sh"
 
@@ -59,17 +61,17 @@ export SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Pkinesis-asl"
 {
   if test -x "$JAVA_HOME/bin/java"; then
       declare java_cmd="$JAVA_HOME/bin/java"
-  else 
+  else
       declare java_cmd=java
   fi
-  
+
   # We can't use sed -r -e due to OS X / BSD compatibility; hence, all the parentheses.
   JAVA_VERSION=$(
     $java_cmd -version 2>&1 \
     | grep -e "^java version" --max-count=1 \
     | sed "s/java version \"\(.*\)\.\(.*\)\.\(.*\)\"/\1\2/"
   )
-  
+
   if [ "$JAVA_VERSION" -lt 18 ]; then
     echo "[warn] Java 8 tests will not run because JDK version is < 1.8."
   fi
@@ -79,7 +81,7 @@ export SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Pkinesis-asl"
 # Partial solution for SPARK-1455.
 if [ -n "$AMPLAB_JENKINS" ]; then
   git fetch origin master:master
-  
+
   sql_diffs=$(
     git diff --name-only master \
     | grep -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh"
@@ -93,7 +95,7 @@ if [ -n "$AMPLAB_JENKINS" ]; then
   if [ -n "$sql_diffs" ]; then
     echo "[info] Detected changes in SQL. Will run Hive test suite."
     _RUN_SQL_TESTS=true
-    
+
     if [ -z "$non_sql_diffs" ]; then
       echo "[info] Detected no changes except in SQL. Will only run SQL tests."
       _SQL_TESTS_ONLY=true
@@ -139,20 +141,28 @@ echo "========================================================================="
 CURRENT_BLOCK=$BLOCK_BUILD
 
 {
-  # We always build with Hive because the PySpark Spark SQL tests need it.
-  BUILD_MVN_PROFILE_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"
-
-  echo "[info] Building Spark with these arguments: $BUILD_MVN_PROFILE_ARGS"
 
   # NOTE: echo "q" is needed because sbt on encountering a build file with failure
-  #+ (either resolution or compilation) prompts the user for input either q, r, etc
-  #+ to quit or retry. This echo is there to make it not block.
-  # NOTE: Do not quote $BUILD_MVN_PROFILE_ARGS or else it will be interpreted as a 
-  #+ single argument!
+  # (either resolution or compilation) prompts the user for input either q, r, etc
+  # to quit or retry. This echo is there to make it not block.
+  # NOTE: Do not quote $BUILD_MVN_PROFILE_ARGS or else it will be interpreted as a
+  # single argument!
   # QUESTION: Why doesn't 'yes "q"' work?
   # QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
+  # First build with Hive 0.12.0 to ensure patches do not break the Hive 0.12.0 build
+  HIVE_12_BUILD_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver -Phive-0.12.0"
+  echo "[info] Compile with Hive 0.12.0"
   echo -e "q\n" \
-    | sbt/sbt $BUILD_MVN_PROFILE_ARGS clean package assembly/assembly \
+    | build/sbt $HIVE_12_BUILD_ARGS clean hive/compile hive-thriftserver/compile \
+    | grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
+
+  # Then build with default Hive version (0.13.1) because tests are based on this version
+  echo "[info] Compile with Hive 0.13.1"
+  rm -rf lib_managed
+  echo "[info] Building Spark with these arguments: $SBT_MAVEN_PROFILES_ARGS"\
+    " -Phive -Phive-thriftserver"
+  echo -e "q\n" \
+    | build/sbt $SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver package assembly/assembly  \
     | grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
 }
 
@@ -167,29 +177,29 @@ CURRENT_BLOCK=$BLOCK_SPARK_UNIT_TESTS
   # If the Spark SQL tests are enabled, run the tests with the Hive profiles enabled.
   # This must be a single argument, as it is.
   if [ -n "$_RUN_SQL_TESTS" ]; then
-    SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"
+    SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver"
   fi
-  
+
   if [ -n "$_SQL_TESTS_ONLY" ]; then
     # This must be an array of individual arguments. Otherwise, having one long string
-    #+ will be interpreted as a single test, which doesn't work.
-    SBT_MAVEN_TEST_ARGS=("catalyst/test" "sql/test" "hive/test" "hive-thriftserver/test")
+    # will be interpreted as a single test, which doesn't work.
+    SBT_MAVEN_TEST_ARGS=("catalyst/test" "sql/test" "hive/test" "mllib/test")
   else
     SBT_MAVEN_TEST_ARGS=("test")
   fi
-  
+
   echo "[info] Running Spark tests with these arguments: $SBT_MAVEN_PROFILES_ARGS ${SBT_MAVEN_TEST_ARGS[@]}"
-  
+
   # NOTE: echo "q" is needed because sbt on encountering a build file with failure
-  #+ (either resolution or compilation) prompts the user for input either q, r, etc
-  #+ to quit or retry. This echo is there to make it not block.
-  # NOTE: Do not quote $SBT_MAVEN_PROFILES_ARGS or else it will be interpreted as a 
-  #+ single argument!
-  #+ "${SBT_MAVEN_TEST_ARGS[@]}" is cool because it's an array.
+  # (either resolution or compilation) prompts the user for input either q, r, etc
+  # to quit or retry. This echo is there to make it not block.
+  # NOTE: Do not quote $SBT_MAVEN_PROFILES_ARGS or else it will be interpreted as a
+  # single argument!
+  # "${SBT_MAVEN_TEST_ARGS[@]}" is cool because it's an array.
   # QUESTION: Why doesn't 'yes "q"' work?
   # QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
   echo -e "q\n" \
-    | sbt/sbt $SBT_MAVEN_PROFILES_ARGS "${SBT_MAVEN_TEST_ARGS[@]}" \
+    | build/sbt $SBT_MAVEN_PROFILES_ARGS "${SBT_MAVEN_TEST_ARGS[@]}" \
     | grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
 }
 
@@ -204,7 +214,7 @@ CURRENT_BLOCK=$BLOCK_PYSPARK_UNIT_TESTS
 
 echo ""
 echo "========================================================================="
-echo "Detecting binary incompatibilites with MiMa"
+echo "Detecting binary incompatibilities with MiMa"
 echo "========================================================================="
 
 CURRENT_BLOCK=$BLOCK_MIMA
diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins
index 451f3b771cc76..6a849e4f77207 100755
--- a/dev/run-tests-jenkins
+++ b/dev/run-tests-jenkins
@@ -53,9 +53,9 @@ function post_message () {
   local message=$1
   local data="{\"body\": \"$message\"}"
   local HTTP_CODE_HEADER="HTTP Response Code: "
-  
+
   echo "Attempting to post to Github..."
-  
+
   local curl_output=$(
     curl `#--dump-header -` \
       --silent \
@@ -75,12 +75,12 @@ function post_message () {
       echo " > data: ${data}" >&2
       # exit $curl_status
   fi
-  
+
   local api_response=$(
     echo "${curl_output}" \
     | grep -v -e "^${HTTP_CODE_HEADER}"
   )
-  
+
   local http_code=$(
     echo "${curl_output}" \
     | grep -e "^${HTTP_CODE_HEADER}" \
@@ -92,12 +92,45 @@ function post_message () {
       echo " > api_response: ${api_response}" >&2
       echo " > data: ${data}" >&2
   fi
-  
+
   if [ "$curl_status" -eq 0 ] && [ "$http_code" -eq "201" ]; then
     echo " > Post successful."
   fi
 }
 
+function send_archived_logs () {
+  echo "Archiving unit tests logs..."
+
+  local log_files=$(
+    find .\
+      -name "unit-tests.log" -o\
+      -path "./sql/hive/target/HiveCompatibilitySuite.failed" -o\
+      -path "./sql/hive/target/HiveCompatibilitySuite.hiveFailed" -o\
+      -path "./sql/hive/target/HiveCompatibilitySuite.wrong"
+  )
+
+  if [ -z "$log_files" ]; then
+    echo "> No log files found." >&2
+  else
+    local log_archive="unit-tests-logs.tar.gz"
+    echo "$log_files" | xargs tar czf ${log_archive}
+
+    local jenkins_build_dir=${JENKINS_HOME}/jobs/${JOB_NAME}/builds/${BUILD_NUMBER}
+    local scp_output=$(scp ${log_archive} amp-jenkins-master:${jenkins_build_dir}/${log_archive})
+    local scp_status="$?"
+
+    if [ "$scp_status" -ne 0 ]; then
+      echo "Failed to send archived unit tests logs to Jenkins master." >&2
+      echo "> scp_status: ${scp_status}" >&2
+      echo "> scp_output: ${scp_output}" >&2
+    else
+      echo "> Send successful."
+    fi
+
+    rm -f ${log_archive}
+  fi
+}
+
 
 # We diff master...$ghprbActualCommit because that gets us changes introduced in the PR
 #+ and not anything else added to master since the PR was branched.
@@ -109,7 +142,7 @@ function post_message () {
   else
     merge_note=" * This patch merges cleanly."
   fi
-  
+
   source_files=$(
       git diff master...$ghprbActualCommit --name-only  `# diff patch against master from branch point` \
     | grep -v -e "\/test"                               `# ignore files in test directories` \
@@ -144,12 +177,12 @@ function post_message () {
 # post start message
 {
   start_message="\
-  [QA tests have started](${BUILD_URL}consoleFull) for \
+  [Test build ${BUILD_DISPLAY_NAME} has started](${BUILD_URL}consoleFull) for \
   PR $ghprbPullId at commit [\`${SHORT_COMMIT_HASH}\`](${COMMIT_URL})."
-  
+
   start_message="${start_message}\n${merge_note}"
   # start_message="${start_message}\n${public_classes_note}"
-  
+
   post_message "$start_message"
 }
 
@@ -159,7 +192,7 @@ function post_message () {
   test_result="$?"
 
   if [ "$test_result" -eq "124" ]; then
-    fail_message="**[Tests timed out](${BUILD_URL}consoleFull)** \
+    fail_message="**[Test build ${BUILD_DISPLAY_NAME} timed out](${BUILD_URL}consoleFull)** \
     for PR $ghprbPullId at commit [\`${SHORT_COMMIT_HASH}\`](${COMMIT_URL}) \
     after a configured wait of \`${TESTS_TIMEOUT}\`."
 
@@ -187,15 +220,17 @@ function post_message () {
     else
       failing_test="some tests"
     fi
-    
+
     test_result_note=" * This patch **fails $failing_test**."
   fi
+
+  send_archived_logs
 }
 
 # post end message
 {
   result_message="\
-  [QA tests have finished](${BUILD_URL}consoleFull) for \
+  [Test build ${BUILD_DISPLAY_NAME} has finished](${BUILD_URL}consoleFull) for \
   PR $ghprbPullId at commit [\`${SHORT_COMMIT_HASH}\`](${COMMIT_URL})."
 
   result_message="${result_message}\n${test_result_note}"
diff --git a/dev/scalastyle b/dev/scalastyle
index c3b356bcb3c06..86919227ed1ab 100755
--- a/dev/scalastyle
+++ b/dev/scalastyle
@@ -17,15 +17,12 @@
 # limitations under the License.
 #
 
-echo -e "q\n" | sbt/sbt -Phive scalastyle > scalastyle.txt
-# Check style with YARN alpha built too
-echo -e "q\n" | sbt/sbt -Pyarn-alpha -Phadoop-0.23 -Dhadoop.version=0.23.9 yarn-alpha/scalastyle \
-  >> scalastyle.txt
+echo -e "q\n" | build/sbt -Phive -Phive-thriftserver scalastyle > scalastyle.txt
 # Check style with YARN built too
-echo -e "q\n" | sbt/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 yarn/scalastyle \
+echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 scalastyle \
   >> scalastyle.txt
 
-ERRORS=$(cat scalastyle.txt | grep -e "\<error\>")
+ERRORS=$(cat scalastyle.txt | awk '{if($1~/error/)print}')
 rm scalastyle.txt
 
 if test ! -z "$ERRORS"; then
diff --git a/docs/README.md b/docs/README.md
index d2d58e435d4c4..8a54724c4beae 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -21,7 +21,7 @@ read those text files directly if you want. Start with index.md.
 
 The markdown code can be compiled to HTML using the [Jekyll tool](http://jekyllrb.com).
 `Jekyll` and a few dependencies must be installed for this to work. We recommend
-installing via the Ruby Gem dependency manager. Since the exact HTML output 
+installing via the Ruby Gem dependency manager. Since the exact HTML output
 varies between versions of Jekyll and its dependencies, we list specific versions here
 in some cases:
 
@@ -43,7 +43,7 @@ You can modify the default Jekyll build as follows:
 ## Pygments
 
 We also use pygments (http://pygments.org) for syntax highlighting in documentation markdown pages,
-so you will also need to install that (it requires Python) by running `sudo easy_install Pygments`.
+so you will also need to install that (it requires Python) by running `sudo pip install Pygments`.
 
 To mark a block of code in your markdown to be syntax highlighted by jekyll during the compile
 phase, use the following sytax:
@@ -53,9 +53,14 @@ phase, use the following sytax:
     // supported languages too.
     {% endhighlight %}
 
+## Sphinx
+
+We use Sphinx to generate Python API docs, so you will need to install it by running
+`sudo pip install sphinx`.
+
 ## API Docs (Scaladoc and Sphinx)
 
-You can build just the Spark scaladoc by running `sbt/sbt doc` from the SPARK_PROJECT_ROOT directory.
+You can build just the Spark scaladoc by running `build/sbt doc` from the SPARK_PROJECT_ROOT directory.
 
 Similarly, you can build just the PySpark docs by running `make html` from the
 SPARK_PROJECT_ROOT/python/docs directory. Documentation is only generated for classes that are listed as
@@ -63,7 +68,7 @@ public in `__init__.py`.
 
 When you run `jekyll` in the `docs` directory, it will also copy over the scaladoc for the various
 Spark subprojects into the `docs` directory (and then also into the `_site` directory). We use a
-jekyll plugin to run `sbt/sbt doc` before building the site so if you haven't run it (recently) it
+jekyll plugin to run `build/sbt doc` before building the site so if you haven't run it (recently) it
 may take some time as it generates all of the scaladoc.  The jekyll plugin also generates the
 PySpark docs [Sphinx](http://sphinx-doc.org/).
 
diff --git a/docs/_config.yml b/docs/_config.yml
index f4bf242ac191b..e2db274e1f619 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -11,12 +11,12 @@ kramdown:
 include:
   - _static
 
-# These allow the documentation to be updated with nerw releases
+# These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 1.0.0-SNAPSHOT
-SPARK_VERSION_SHORT: 1.0.0
+SPARK_VERSION: 1.3.0-SNAPSHOT
+SPARK_VERSION_SHORT: 1.3.0
 SCALA_BINARY_VERSION: "2.10"
 SCALA_VERSION: "2.10.4"
-MESOS_VERSION: 0.18.1
+MESOS_VERSION: 0.21.0
 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK
 SPARK_GITHUB_URL: https://github.com/apache/spark
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index 627ed37de4a9c..8841f7675d35e 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -33,7 +33,7 @@
         <!-- Google analytics script -->
         <script type="text/javascript">
           var _gaq = _gaq || [];
-          _gaq.push(['_setAccount', 'UA-32518208-1']);
+          _gaq.push(['_setAccount', 'UA-32518208-2']);
           _gaq.push(['_trackPageview']);
 
           (function() {
@@ -79,9 +79,9 @@
                         <li class="dropdown">
                             <a href="#" class="dropdown-toggle" data-toggle="dropdown">API Docs<b class="caret"></b></a>
                             <ul class="dropdown-menu">
-                                <li><a href="api/scala/index.html#org.apache.spark.package">Scaladoc</a></li>
-                                <li><a href="api/java/index.html">Javadoc</a></li>
-                                <li><a href="api/python/index.html">Python API</a></li>
+                                <li><a href="api/scala/index.html#org.apache.spark.package">Scala</a></li>
+                                <li><a href="api/java/index.html">Java</a></li>
+                                <li><a href="api/python/index.html">Python</a></li>
                             </ul>
                         </li>
 
@@ -91,10 +91,11 @@
                                 <li><a href="cluster-overview.html">Overview</a></li>
                                 <li><a href="submitting-applications.html">Submitting Applications</a></li>
                                 <li class="divider"></li>
-                                <li><a href="ec2-scripts.html">Amazon EC2</a></li>
-                                <li><a href="spark-standalone.html">Standalone Mode</a></li>
+                                <li><a href="spark-standalone.html">Spark Standalone</a></li>
                                 <li><a href="running-on-mesos.html">Mesos</a></li>
                                 <li><a href="running-on-yarn.html">YARN</a></li>
+                                <li class="divider"></li>
+                                <li><a href="ec2-scripts.html">Amazon EC2</a></li>
                             </ul>
                         </li>
 
diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index 4566a2fff562b..3c626a0b7f54b 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -25,8 +25,8 @@
   curr_dir = pwd
   cd("..")
 
-  puts "Running 'sbt/sbt -Pkinesis-asl compile unidoc' from " + pwd + "; this may take a few minutes..."
-  puts `sbt/sbt -Pkinesis-asl compile unidoc`
+  puts "Running 'build/sbt -Pkinesis-asl compile unidoc' from " + pwd + "; this may take a few minutes..."
+  puts `build/sbt -Pkinesis-asl compile unidoc`
 
   puts "Moving back into docs dir."
   cd("docs")
diff --git a/docs/building-spark.md b/docs/building-spark.md
index b2940ee4029e8..db69905813817 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -9,6 +9,15 @@ redirect_from: "building-with-maven.html"
 
 Building Spark using Maven requires Maven 3.0.4 or newer and Java 6+.
 
+# Building with `build/mvn`
+
+Spark now comes packaged with a self-contained Maven installation to ease building and deployment of Spark from source located under the `build/` directory. This script will automatically download and setup all necessary build requirements ([Maven](https://maven.apache.org/), [Scala](http://www.scala-lang.org/), and [Zinc](https://github.com/typesafehub/zinc)) locally within the `build/` directory itself. It honors any `mvn` binary if present already, however, will pull down its own copy of Scala and Zinc regardless to ensure proper version requirements are met. `build/mvn` execution acts as a pass through to the `mvn` call allowing easy transition from previous build methods. As an example, one can build a version of Spark as follows:
+
+{% highlight bash %}
+build/mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -DskipTests clean package
+{% endhighlight %}
+
+Other build examples can be found below.
 
 # Setting up Maven's Memory Usage
 
@@ -28,7 +37,9 @@ If you don't run this, you may see errors like the following:
 
 You can fix this by setting the `MAVEN_OPTS` variable as discussed before.
 
-**Note:** *For Java 8 and above this step is not required.*
+**Note:**
+* *For Java 8 and above this step is not required.*
+* *If using `build/mvn` and `MAVEN_OPTS` were not already set, the script will automate this for you.*
 
 # Specifying the Hadoop Version
 
@@ -60,59 +71,58 @@ mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -DskipTests clean package
 mvn -Phadoop-0.23 -Dhadoop.version=0.23.7 -DskipTests clean package
 {% endhighlight %}
 
-For Apache Hadoop 2.x, 0.23.x, Cloudera CDH, and other Hadoop versions with YARN, you can enable the "yarn-alpha" or "yarn" profile and optionally set the "yarn.version" property if it is different from "hadoop.version". The additional build profile required depends on the YARN version:
-
-<table class="table">
-  <thead>
-    <tr><th>YARN version</th><th>Profile required</th></tr>
-  </thead>
-  <tbody>
-    <tr><td>0.23.x to 2.1.x</td><td>yarn-alpha</td></tr>
-    <tr><td>2.2.x and later</td><td>yarn</td></tr>
-  </tbody>
-</table>
+You can enable the "yarn" profile and optionally set the "yarn.version" property if it is different from "hadoop.version". Spark only supports YARN versions 2.2.0 and later.
 
 Examples:
 
 {% highlight bash %}
-# Apache Hadoop 2.0.5-alpha
-mvn -Pyarn-alpha -Dhadoop.version=2.0.5-alpha -DskipTests clean package
-
-# Cloudera CDH 4.2.0
-mvn -Pyarn-alpha -Dhadoop.version=2.0.0-cdh4.2.0 -DskipTests clean package
-
-# Apache Hadoop 0.23.x
-mvn -Pyarn-alpha -Phadoop-0.23 -Dhadoop.version=0.23.7 -DskipTests clean package
-
 # Apache Hadoop 2.2.X
 mvn -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 -DskipTests clean package
 
 # Apache Hadoop 2.3.X
 mvn -Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0 -DskipTests clean package
 
-# Apache Hadoop 2.4.X
-mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -DskipTests clean package
+# Apache Hadoop 2.4.X or 2.5.X
+mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=VERSION -DskipTests clean package
+
+Versions of Hadoop after 2.5.X may or may not work with the -Phadoop-2.4 profile (they were
+released after this version of Spark).
 
 # Different versions of HDFS and YARN.
-mvn -Pyarn-alpha -Phadoop-2.3 -Dhadoop.version=2.3.0 -Dyarn.version=0.23.7 -DskipTests clean package
+mvn -Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0 -Dyarn.version=2.2.0 -DskipTests clean package
 {% endhighlight %}
 
 # Building With Hive and JDBC Support
 To enable Hive integration for Spark SQL along with its JDBC server and CLI,
-add the `-Phive` profile to your existing build options.
+add the `-Phive` and `Phive-thriftserver` profiles to your existing build options.
+By default Spark will build with Hive 0.13.1 bindings. You can also build for
+Hive 0.12.0 using the `-Phive-0.12.0` profile.
 {% highlight bash %}
-# Apache Hadoop 2.4.X with Hive support
-mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -DskipTests clean package
+# Apache Hadoop 2.4.X with Hive 13 support
+mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -Phive-thriftserver -DskipTests clean package
+
+# Apache Hadoop 2.4.X with Hive 12 support
+mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -Phive-0.12.0 -Phive-thriftserver -DskipTests clean package
 {% endhighlight %}
 
+# Building for Scala 2.11
+To produce a Spark package compiled with Scala 2.11, use the `-Dscala-2.11` property:
+
+    dev/change-version-to-2.11.sh
+    mvn -Pyarn -Phadoop-2.4 -Dscala-2.11 -DskipTests clean package
+
+Scala 2.11 support in Spark is experimental and does not support a few features.
+Specifically, Spark's external Kafka library and JDBC component are not yet
+supported in Scala 2.11 builds.
+
 # Spark Tests in Maven
 
-Tests are run by default via the [ScalaTest Maven plugin](http://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin). 
+Tests are run by default via the [ScalaTest Maven plugin](http://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin).
 
 Some of the tests require Spark to be packaged first, so always run `mvn package` with `-DskipTests` the first time.  The following is an example of a correct (build, test) sequence:
 
-    mvn -Pyarn -Phadoop-2.3 -DskipTests -Phive clean package
-    mvn -Pyarn -Phadoop-2.3 -Phive test
+    mvn -Pyarn -Phadoop-2.3 -DskipTests -Phive -Phive-thriftserver clean package
+    mvn -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver test
 
 The ScalaTest plugin also supports running only a specific test suite as follows:
 
@@ -125,11 +135,29 @@ We use the scala-maven-plugin which supports incremental and continuous compilat
 
     mvn scala:cc
 
-should run continuous compilation (i.e. wait for changes). However, this has not been tested extensively.
+should run continuous compilation (i.e. wait for changes). However, this has not been tested
+extensively. A couple of gotchas to note:
 
-# Using With IntelliJ IDEA
+* it only scans the paths `src/main` and `src/test` (see
+[docs](http://scala-tools.org/mvnsites/maven-scala-plugin/usage_cc.html)), so it will only work
+from within certain submodules that have that structure.
 
-This setup works fine in IntelliJ IDEA 11.1.4. After opening the project via the pom.xml file in the project root folder, you only need to activate either the hadoop1 or hadoop2 profile in the "Maven Properties" popout. We have not tried Eclipse/Scala IDE with this.
+* you'll typically need to run `mvn install` from the project root for compilation within
+specific submodules to work; this is because submodules that depend on other submodules do so via
+the `spark-parent` module).
+
+Thus, the full flow for running continuous-compilation of the `core` submodule may look more like:
+
+```
+ $ mvn install
+ $ cd core
+ $ mvn scala:cc
+```
+
+# Building Spark with IntelliJ IDEA or Eclipse
+
+For help in setting up IntelliJ IDEA or Eclipse for Spark development, and troubleshooting, refer to the
+[wiki page for IDE setup](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-IDESetup).
 
 # Building Spark Debian Packages
 
@@ -144,9 +172,9 @@ The debian package can then be found under assembly/target. We added the short c
 Running only Java 8 tests and nothing else.
 
     mvn install -DskipTests -Pjava8-tests
-    
-Java 8 tests are run when `-Pjava8-tests` profile is enabled, they will run in spite of `-DskipTests`. 
-For these tests to run your system must have a JDK 8 installation. 
+
+Java 8 tests are run when `-Pjava8-tests` profile is enabled, they will run in spite of `-DskipTests`.
+For these tests to run your system must have a JDK 8 installation.
 If you have JDK 8 installed but it is not the system default, you can set JAVA_HOME to point to JDK 8 before running the tests.
 
 # Building for PySpark on YARN
@@ -158,7 +186,7 @@ then ship it over to the cluster. We are investigating the exact cause for this.
 
 # Packaging without Hadoop Dependencies for YARN
 
-The assembly jar produced by `mvn package` will, by default, include all of Spark's dependencies, including Hadoop and some of its ecosystem projects. On YARN deployments, this causes multiple versions of these to appear on executor classpaths: the version packaged in the Spark assembly and the version on each node, included with yarn.application.classpath.  The `hadoop-provided` profile builds the assembly without including Hadoop-ecosystem projects, like ZooKeeper and Hadoop itself. 
+The assembly jar produced by `mvn package` will, by default, include all of Spark's dependencies, including Hadoop and some of its ecosystem projects. On YARN deployments, this causes multiple versions of these to appear on executor classpaths: the version packaged in the Spark assembly and the version on each node, included with yarn.application.classpath.  The `hadoop-provided` profile builds the assembly without including Hadoop-ecosystem projects, like ZooKeeper and Hadoop itself.
 
 # Building with SBT
 
@@ -169,22 +197,22 @@ compilation. More advanced developers may wish to use SBT.
 The SBT build is derived from the Maven POM files, and so the same Maven profiles and variables
 can be set to control the SBT build. For example:
 
-    sbt/sbt -Pyarn -Phadoop-2.3 assembly
+    build/sbt -Pyarn -Phadoop-2.3 assembly
 
 # Testing with SBT
 
-Some of the tests require Spark to be packaged first, so always run `sbt/sbt assembly` the first time.  The following is an example of a correct (build, test) sequence:
+Some of the tests require Spark to be packaged first, so always run `build/sbt assembly` the first time.  The following is an example of a correct (build, test) sequence:
 
-    sbt/sbt -Pyarn -Phadoop-2.3 -Phive assembly
-    sbt/sbt -Pyarn -Phadoop-2.3 -Phive test
+    build/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver assembly
+    build/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver test
 
 To run only a specific test suite as follows:
 
-    sbt/sbt -Pyarn -Phadoop-2.3 -Phive "test-only org.apache.spark.repl.ReplSuite"
+    build/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver "test-only org.apache.spark.repl.ReplSuite"
 
 To run test suites of a specific sub project as follows:
 
-    sbt/sbt -Pyarn -Phadoop-2.3 -Phive core/test
+    build/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver core/test
 
 # Speeding up Compilation with Zinc
 
@@ -192,4 +220,10 @@ To run test suites of a specific sub project as follows:
 compiler. When run locally as a background process, it speeds up builds of Scala-based projects
 like Spark. Developers who regularly recompile Spark with Maven will be the most interested in
 Zinc. The project site gives instructions for building and running `zinc`; OS X users can
-install it using `brew install zinc`.
\ No newline at end of file
+install it using `brew install zinc`.
+
+If using the `build/mvn` package `zinc` will automatically be downloaded and leveraged for all
+builds. This process will auto-start after the first time `build/mvn` is called and bind to port
+3030 unless the `ZINC_PORT` environment variable is set. The `zinc` process can subsequently be
+shut down at any time by running `build/zinc-<version>/bin/zinc -shutdown` and will automatically
+restart whenever `build/mvn` is called.
diff --git a/docs/configuration.md b/docs/configuration.md
index 66738d3ca754e..8b1d7598c47e4 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -8,7 +8,7 @@ title: Spark Configuration
 Spark provides three locations to configure the system:
 
 * [Spark properties](#spark-properties) control most application parameters and can be set by using
-  a [SparkConf](api/core/index.html#org.apache.spark.SparkConf) object, or through Java
+  a [SparkConf](api/scala/index.html#org.apache.spark.SparkConf) object, or through Java
   system properties.
 * [Environment variables](#environment-variables) can be used to set per-machine settings, such as
   the IP address, through the `conf/spark-env.sh` script on each node.
@@ -21,16 +21,22 @@ application. These properties can be set directly on a
 [SparkConf](api/scala/index.html#org.apache.spark.SparkConf) passed to your
 `SparkContext`. `SparkConf` allows you to configure some of the common properties
 (e.g. master URL and application name), as well as arbitrary key-value pairs through the
-`set()` method. For example, we could initialize an application as follows:
+`set()` method. For example, we could initialize an application with two threads as follows:
+
+Note that we run with local[2], meaning two threads - which represents "minimal" parallelism,
+which can help detect bugs that only exist when we run in a distributed context.
 
 {% highlight scala %}
 val conf = new SparkConf()
-             .setMaster("local")
+             .setMaster("local[2]")
              .setAppName("CountingSheep")
              .set("spark.executor.memory", "1g")
 val sc = new SparkContext(conf)
 {% endhighlight %}
 
+Note that we can have more than 1 thread in local mode, and in cases like spark streaming, we may actually
+require one to prevent any sort of starvation issues.
+
 ## Dynamically Loading Spark Properties
 In some cases, you may want to avoid hard-coding certain configurations in a `SparkConf`. For
 instance, if you'd like to run the same application with different masters or different
@@ -42,11 +48,11 @@ val sc = new SparkContext(new SparkConf())
 
 Then, you can supply configuration values at runtime:
 {% highlight bash %}
-./bin/spark-submit --name "My app" --master local[4] --conf spark.shuffle.spill=false 
-  --conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps" myApp.jar 
+./bin/spark-submit --name "My app" --master local[4] --conf spark.shuffle.spill=false
+  --conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps" myApp.jar
 {% endhighlight %}
 
-The Spark shell and [`spark-submit`](cluster-overview.html#launching-applications-with-spark-submit)
+The Spark shell and [`spark-submit`](submitting-applications.html)
 tool support two ways to load configurations dynamically. The first are command line options,
 such as `--master`, as shown above. `spark-submit` can accept any Spark property using the `--conf`
 flag, but uses special flags for properties that play a part in launching the Spark application.
@@ -69,8 +75,8 @@ in the `spark-defaults.conf` file.
 
 The application web UI at `http://<driver>:4040` lists Spark properties in the "Environment" tab.
 This is a useful place to check to make sure that your properties have been set correctly. Note
-that only values explicitly specified through either `spark-defaults.conf` or SparkConf will
-appear. For all other configuration properties, you can assume the default value is used.
+that only values explicitly specified through `spark-defaults.conf`, `SparkConf`, or the command
+line will appear. For all other configuration properties, you can assume the default value is used.
 
 ## Available Properties
 
@@ -92,15 +98,14 @@ of the most common options to set are:
   <td>(none)</td>
   <td>
     The cluster manager to connect to. See the list of
-    <a href="scala-programming-guide.html#master-urls"> allowed master URL's</a>.
+    <a href="submitting-applications.html#master-urls"> allowed master URL's</a>.
   </td>
 </tr>
 <tr>
-  <td><code>spark.executor.memory</code></td>
-  <td>512m</td>
+  <td><code>spark.driver.cores</code></td>
+  <td>1</td>
   <td>
-    Amount of memory to use per executor process, in the same format as JVM memory strings
-    (e.g. <code>512m</code>, <code>2g</code>).
+    Number of cores to use for the driver process, only in cluster mode.
   </td>
 </tr>
 <tr>
@@ -111,6 +116,26 @@ of the most common options to set are:
     (e.g. <code>512m</code>, <code>2g</code>).
   </td>
 </tr>
+<tr>
+  <td><code>spark.executor.memory</code></td>
+  <td>512m</td>
+  <td>
+    Amount of memory to use per executor process, in the same format as JVM memory strings
+    (e.g. <code>512m</code>, <code>2g</code>).
+  </td>
+</tr>
+<tr>
+  <td><code>spark.driver.maxResultSize</code></td>
+  <td>1g</td>
+  <td>
+    Limit of total size of serialized results of all partitions for each Spark action (e.g. collect).
+    Should be at least 1M, or 0 for unlimited. Jobs will be aborted if the total size
+    is above this limit.
+    Having a high limit may cause out-of-memory errors in driver (depends on spark.driver.memory
+    and memory overhead of objects in JVM). Setting a proper limit can protect the driver from
+    out-of-memory errors.
+  </td>
+</tr>
 <tr>
   <td><code>spark.serializer</code></td>
   <td>org.apache.spark.serializer.<br />JavaSerializer</td>
@@ -165,6 +190,17 @@ of the most common options to set are:
     Logs the effective SparkConf as INFO when a SparkContext is started.
   </td>
 </tr>
+<tr>
+  <td><code>spark.extraListeners</code></td>
+  <td>(none)</td>
+  <td>
+    A comma-separated list of classes that implement <code>SparkListener</code>; when initializing
+    SparkContext, instances of these classes will be created and registered with Spark's listener
+    bus.  If a class has a single-argument constructor that accepts a SparkConf, that constructor
+    will be called; otherwise, a zero-argument constructor will be called. If no valid constructor
+    can be found, the SparkContext creation will fail with an exception.
+  </td>
+</tr>
 </table>
 
 Apart from these, the following properties are also available, and may be useful in some situations:
@@ -172,6 +208,27 @@ Apart from these, the following properties are also available, and may be useful
 #### Runtime Environment
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+<tr>
+  <td><code>spark.driver.extraJavaOptions</code></td>
+  <td>(none)</td>
+  <td>
+    A string of extra JVM options to pass to the driver. For instance, GC settings or other logging.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.driver.extraClassPath</code></td>
+  <td>(none)</td>
+  <td>
+    Extra classpath entries to append to the classpath of the driver.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.driver.extraLibraryPath</code></td>
+  <td>(none)</td>
+  <td>
+    Set a special library path to use when launching the driver JVM.
+  </td>
+</tr>
 <tr>
   <td><code>spark.executor.extraJavaOptions</code></td>
   <td>(none)</td>
@@ -199,6 +256,45 @@ Apart from these, the following properties are also available, and may be useful
     Set a special library path to use when launching executor JVM's.
   </td>
 </tr>
+<tr>
+  <td><code>spark.executor.logs.rolling.strategy</code></td>
+  <td>(none)</td>
+  <td>
+    Set the strategy of rolling of executor logs. By default it is disabled. It can
+    be set to "time" (time-based rolling) or "size" (size-based rolling). For "time",
+    use <code>spark.executor.logs.rolling.time.interval</code> to set the rolling interval.
+    For "size", use <code>spark.executor.logs.rolling.size.maxBytes</code> to set
+    the maximum file size for rolling.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.executor.logs.rolling.time.interval</code></td>
+  <td>daily</td>
+  <td>
+    Set the time interval by which the executor logs will be rolled over.
+    Rolling is disabled by default. Valid values are `daily`, `hourly`, `minutely` or
+    any interval in seconds. See <code>spark.executor.logs.rolling.maxRetainedFiles</code>
+    for automatic cleaning of old logs.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.executor.logs.rolling.size.maxBytes</code></td>
+  <td>(none)</td>
+  <td>
+    Set the max size of the file by which the executor logs will be rolled over.
+    Rolling is disabled by default. Value is set in terms of bytes.
+    See <code>spark.executor.logs.rolling.maxRetainedFiles</code>
+    for automatic cleaning of old logs.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.executor.logs.rolling.maxRetainedFiles</code></td>
+  <td>(none)</td>
+  <td>
+    Sets the number of latest rolling log files that are going to be retained by the system.
+    Older log files will be deleted. Disabled by default.
+  </td>
+</tr>
 <tr>
   <td><code>spark.files.userClassPathFirst</code></td>
   <td>false</td>
@@ -206,6 +302,7 @@ Apart from these, the following properties are also available, and may be useful
     (Experimental) Whether to give user-added jars precedence over Spark's own jars when
     loading classes in Executors. This feature can be used to mitigate conflicts between
     Spark's dependencies and user dependencies. It is currently an experimental feature.
+    (Currently, this setting does not work for YARN, see <a href="https://issues.apache.org/jira/browse/SPARK-2996">SPARK-2996</a> for more details).
   </td>
 </tr>
 <tr>
@@ -225,16 +322,20 @@ Apart from these, the following properties are also available, and may be useful
     or it will be displayed before the driver exiting. It also can be dumped into disk by
     `sc.dump_profiles(path)`. If some of the profile results had been displayed maually,
     they will not be displayed automatically before driver exiting.
+
+    By default the `pyspark.profiler.BasicProfiler` will be used, but this can be overridden by
+    passing a profiler class in as a parameter to the `SparkContext` constructor.
   </td>
 </tr>
 <tr>
   <td><code>spark.python.profile.dump</code></td>
   <td>(none)</td>
   <td>
-    The directory which is used to dump the profile result before driver exiting. 
+    The directory which is used to dump the profile result before driver exiting.
     The results will be dumped as separated file for each RDD. They can be loaded
     by ptats.Stats(). If this is specified, the profile result will not be displayed
     automatically.
+  </td>
 </tr>
 <tr>
   <td><code>spark.python.worker.reuse</code></td>
@@ -250,8 +351,8 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.executorEnv.[EnvironmentVariableName]</code></td>
   <td>(none)</td>
   <td>
-    Add the environment variable specified by <code>EnvironmentVariableName</code> to the Executor 
-    process. The user can specify multiple of these and to set multiple environment variables. 
+    Add the environment variable specified by <code>EnvironmentVariableName</code> to the Executor
+    process. The user can specify multiple of these to set multiple environment variables.
   </td>
 </tr>
 <tr>
@@ -359,6 +460,16 @@ Apart from these, the following properties are also available, and may be useful
     map-side aggregation and there are at most this many reduce partitions.
   </td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.blockTransferService</code></td>
+  <td>netty</td>
+  <td>
+    Implementation to use for transferring shuffle and cached blocks between executors. There
+    are two implementations available: <code>netty</code> and <code>nio</code>. Netty-based
+    block transfer is intended to be simpler but equally efficient and is the default option
+    starting in 1.2.
+  </td>
+</tr>
 </table>
 
 #### Spark UI
@@ -375,7 +486,16 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.ui.retainedStages</code></td>
   <td>1000</td>
   <td>
-    How many stages the Spark UI remembers before garbage collecting.
+    How many stages the Spark UI and status APIs remember before garbage
+    collecting.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.ui.retainedJobs</code></td>
+  <td>1000</td>
+  <td>
+    How many jobs the Spark UI and status APIs remember before garbage
+    collecting.
   </td>
 </tr>
 <tr>
@@ -437,9 +557,9 @@ Apart from these, the following properties are also available, and may be useful
   <td>
     The codec used to compress internal data such as RDD partitions, broadcast variables and
     shuffle outputs. By default, Spark provides three codecs: <code>lz4</code>, <code>lzf</code>,
-    and <code>snappy</code>. You can also use fully qualified class names to specify the codec, 
-    e.g. 
-    <code>org.apache.spark.io.LZ4CompressionCodec</code>,    
+    and <code>snappy</code>. You can also use fully qualified class names to specify the codec,
+    e.g.
+    <code>org.apache.spark.io.LZ4CompressionCodec</code>,
     <code>org.apache.spark.io.LZFCompressionCodec</code>,
     and <code>org.apache.spark.io.SnappyCompressionCodec</code>.
   </td>
@@ -525,6 +645,9 @@ Apart from these, the following properties are also available, and may be useful
 <tr>
   <td><code>spark.default.parallelism</code></td>
   <td>
+    For distributed shuffle operations like <code>reduceByKey</code> and <code>join</code>, the
+    largest number of partitions in a parent RDD.  For operations like <code>parallelize</code>
+    with no parent RDDs, it depends on the cluster manager:
     <ul>
       <li>Local mode: number of cores on the local machine</li>
       <li>Mesos fine grained mode: 8</li>
@@ -532,8 +655,8 @@ Apart from these, the following properties are also available, and may be useful
     </ul>
   </td>
   <td>
-    Default number of tasks to use across the cluster for distributed shuffle operations
-    (<code>groupByKey</code>, <code>reduceByKey</code>, etc) when not set by user.
+    Default number of partitions in RDDs returned by transformations like <code>join</code>,
+    <code>reduceByKey</code>, and <code>parallelize</code> when not set by user.
   </td>
 </tr>
 <tr>
@@ -597,7 +720,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.storage.memoryMapThreshold</code></td>
-  <td>8192</td>
+  <td>2097152</td>
   <td>
     Size of a block, in bytes, above which Spark memory maps when reading a block from disk.
     This prevents Spark from memory mapping very small blocks. In general, memory
@@ -628,7 +751,9 @@ Apart from these, the following properties are also available, and may be useful
     <td>If set to true, validates the output specification (e.g. checking if the output directory already exists)
     used in saveAsHadoopFile and other variants. This can be disabled to silence exceptions due to pre-existing
     output directories. We recommend that users do not disable this except if trying to achieve compatibility with
-    previous versions of Spark. Simply use Hadoop's FileSystem API to delete output directories by hand.</td>
+    previous versions of Spark. Simply use Hadoop's FileSystem API to delete output directories by hand.
+    This setting is ignored for jobs generated through Spark Streaming's StreamingContext, since
+    data may need to be rewritten to pre-existing output directories during checkpoint recovery.</td>
 </tr>
 <tr>
     <td><code>spark.hadoop.cloneConf</code></td>
@@ -735,6 +860,16 @@ Apart from these, the following properties are also available, and may be useful
     Communication timeout between Spark nodes, in seconds.
   </td>
 </tr>
+<tr>
+  <td><code>spark.network.timeout</code></td>
+  <td>120</td>
+  <td>
+    Default timeout for all network interactions, in seconds. This config will be used in 
+    place of <code>spark.core.connection.ack.wait.timeout</code>, <code>spark.akka.timeout</code>, 
+    <code>spark.storage.blockManagerSlaveTimeoutMs</code> or
+    <code>spark.shuffle.io.connectionTimeout</code>, if they are not configured.
+  </td>
+</tr>
 <tr>
   <td><code>spark.akka.heartbeat.pauses</code></td>
   <td>6000</td>
@@ -771,6 +906,41 @@ Apart from these, the following properties are also available, and may be useful
     between nodes leading to flooding the network with those.
   </td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.io.preferDirectBufs</code></td>
+  <td>true</td>
+  <td>
+    (Netty only) Off-heap buffers are used to reduce garbage collection during shuffle and cache 
+    block transfer. For environments where off-heap memory is tightly limited, users may wish to 
+    turn this off to force all allocations from Netty to be on-heap.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.shuffle.io.numConnectionsPerPeer</code></td>
+  <td>1</td>
+  <td>
+    (Netty only) Connections between hosts are reused in order to reduce connection buildup for 
+    large clusters. For clusters with many hard disks and few hosts, this may result in insufficient
+    concurrency to saturate all disks, and so users may consider increasing this value.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.shuffle.io.maxRetries</code></td>
+  <td>3</td>
+  <td>
+    (Netty only) Fetches that fail due to IO-related exceptions are automatically retried if this is
+    set to a non-zero value. This retry logic helps stabilize large shuffles in the face of long GC 
+    pauses or transient network connectivity issues.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.shuffle.io.retryWait</code></td>
+  <td>5</td>
+  <td>
+    (Netty only) Seconds to wait between retries of fetches. The maximum delay caused by retrying
+    is simply <code>maxRetries * retryWait</code>, by default 15 seconds. 
+  </td>
+</tr>
 </table>
 
 #### Scheduling
@@ -898,13 +1068,13 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 </tr>
   <td><code>spark.scheduler.minRegisteredResourcesRatio</code></td>
-  <td>0</td>
+  <td>0.0 for Mesos and Standalone mode, 0.8 for YARN</td>
   <td>
     The minimum ratio of registered resources (registered resources / total expected resources)
     (resources are executors in yarn mode, CPU cores in standalone mode)
-    to wait for before scheduling begins. Specified as a double between 0 and 1.
+    to wait for before scheduling begins. Specified as a double between 0.0 and 1.0.
     Regardless of whether the minimum ratio of resources has been reached,
-    the maximum amount of time it will wait before scheduling begins is controlled by config 
+    the maximum amount of time it will wait before scheduling begins is controlled by config
     <code>spark.scheduler.maxRegisteredResourcesWaitingTime</code>.
   </td>
 </tr>
@@ -913,7 +1083,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>30000</td>
   <td>
     Maximum amount of time to wait for resources to register before scheduling begins
-    (in milliseconds).  
+    (in milliseconds).
   </td>
 </tr>
 <tr>
@@ -927,6 +1097,75 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 </table>
 
+#### Dynamic allocation
+<table class="table">
+<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+<tr>
+  <td><code>spark.dynamicAllocation.enabled</code></td>
+  <td>false</td>
+  <td>
+    Whether to use dynamic resource allocation, which scales the number of executors registered
+    with this application up and down based on the workload. Note that this is currently only
+    available on YARN mode. For more detail, see the description
+    <a href="job-scheduling.html#dynamic-resource-allocation">here</a>.
+    <br><br>
+    This requires <code>spark.shuffle.service.enabled</code> to be set.
+    The following configurations are also relevant:
+    <code>spark.dynamicAllocation.minExecutors</code>,
+    <code>spark.dynamicAllocation.maxExecutors</code>, and
+    <code>spark.dynamicAllocation.initialExecutors</code>
+  </td>
+</tr>
+<tr>
+  <td><code>spark.dynamicAllocation.minExecutors</code></td>
+  <td>0</td>
+  <td>
+    Lower bound for the number of executors if dynamic allocation is enabled.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.dynamicAllocation.maxExecutors</code></td>
+  <td>Integer.MAX_VALUE</td>
+  <td>
+    Upper bound for the number of executors if dynamic allocation is enabled.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.dynamicAllocation.maxExecutors</code></td>
+  <td><code>spark.dynamicAllocation.minExecutors</code></td>
+  <td>
+    Initial number of executors to run if dynamic allocation is enabled.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.dynamicAllocation.schedulerBacklogTimeout</code></td>
+  <td>60</td>
+  <td>
+    If dynamic allocation is enabled and there have been pending tasks backlogged for more than
+    this duration (in seconds), new executors will be requested. For more detail, see this
+    <a href="job-scheduling.html#resource-allocation-policy">description</a>.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.dynamicAllocation.sustainedSchedulerBacklogTimeout</code></td>
+  <td><code>schedulerBacklogTimeout</code></td>
+  <td>
+    Same as <code>spark.dynamicAllocation.schedulerBacklogTimeout</code>, but used only for
+    subsequent executor requests. For more detail, see this
+    <a href="job-scheduling.html#resource-allocation-policy">description</a>.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.dynamicAllocation.executorIdleTimeout</code></td>
+  <td>600</td>
+  <td>
+    If dynamic allocation is enabled and an executor has been idle for more than this duration
+    (in seconds), the executor will be removed. For more detail, see this
+    <a href="job-scheduling.html#resource-allocation-policy">description</a>.
+  </td>
+</tr>
+</table>
+
 #### Security
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
@@ -982,7 +1221,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>false</td>
   <td>
     Whether Spark acls should are enabled. If enabled, this checks to see if the user has
-    access permissions to view or modify the job.  Note this requires the user to be known, 
+    access permissions to view or modify the job.  Note this requires the user to be known,
     so if the user comes across as null no checks are done. Filters can be used with the UI
     to authenticate and set the user.
   </td>
@@ -1014,6 +1253,86 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 </table>
 
+#### Encryption
+
+<table class="table">
+    <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+    <tr>
+        <td><code>spark.ssl.enabled</code></td>
+        <td>false</td>
+        <td>
+            <p>Whether to enable SSL connections on all supported protocols.</p>
+
+            <p>All the SSL settings like <code>spark.ssl.xxx</code> where <code>xxx</code> is a
+            particular configuration property, denote the global configuration for all the supported
+            protocols. In order to override the global configuration for the particular protocol,
+            the properties must be overwritten in the protocol-specific namespace.</p>
+
+            <p>Use <code>spark.ssl.YYY.XXX</code> settings to overwrite the global configuration for
+            particular protocol denoted by <code>YYY</code>. Currently <code>YYY</code> can be
+            either <code>akka</code> for Akka based connections or <code>fs</code> for broadcast and
+            file server.</p>
+        </td>
+    </tr>
+    <tr>
+        <td><code>spark.ssl.keyStore</code></td>
+        <td>None</td>
+        <td>
+            A path to a key-store file. The path can be absolute or relative to the directory where
+            the component is started in.
+        </td>
+    </tr>
+    <tr>
+        <td><code>spark.ssl.keyStorePassword</code></td>
+        <td>None</td>
+        <td>
+            A password to the key-store.
+        </td>
+    </tr>
+    <tr>
+        <td><code>spark.ssl.keyPassword</code></td>
+        <td>None</td>
+        <td>
+            A password to the private key in key-store.
+        </td>
+    </tr>
+    <tr>
+        <td><code>spark.ssl.trustStore</code></td>
+        <td>None</td>
+        <td>
+            A path to a trust-store file. The path can be absolute or relative to the directory
+            where the component is started in.
+        </td>
+    </tr>
+    <tr>
+        <td><code>spark.ssl.trustStorePassword</code></td>
+        <td>None</td>
+        <td>
+            A password to the trust-store.
+        </td>
+    </tr>
+    <tr>
+        <td><code>spark.ssl.protocol</code></td>
+        <td>None</td>
+        <td>
+            A protocol name. The protocol must be supported by JVM. The reference list of protocols
+            one can find on <a href="https://blogs.oracle.com/java-platform-group/entry/diagnosing_tls_ssl_and_https">this</a>
+            page.
+        </td>
+    </tr>
+    <tr>
+        <td><code>spark.ssl.enabledAlgorithms</code></td>
+        <td>Empty</td>
+        <td>
+            A comma separated list of ciphers. The specified ciphers must be supported by JVM.
+            The reference list of protocols one can find on
+            <a href="https://blogs.oracle.com/java-platform-group/entry/diagnosing_tls_ssl_and_https">this</a>
+            page.
+        </td>
+    </tr>
+</table>
+
+
 #### Spark Streaming
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
@@ -1021,17 +1340,31 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.streaming.blockInterval</code></td>
   <td>200</td>
   <td>
-    Interval (milliseconds) at which data received by Spark Streaming receivers is coalesced
-    into blocks of data before storing them in Spark.
+    Interval (milliseconds) at which data received by Spark Streaming receivers is chunked
+    into blocks of data before storing them in Spark. Minimum recommended - 50 ms. See the
+    <a href="streaming-programming-guide.html#level-of-parallelism-in-data-receiving">performance
+     tuning</a> section in the Spark Streaming programing guide for more details.
   </td>
 </tr>
 <tr>
   <td><code>spark.streaming.receiver.maxRate</code></td>
   <td>infinite</td>
   <td>
-    Maximum rate (per second) at which each receiver will push data into blocks. Effectively,
-    each stream will consume at most this number of records per second.
+    Maximum number records per second at which each receiver will receive data.
+    Effectively, each stream will consume at most this number of records per second.
     Setting this configuration to 0 or a negative number will put no limit on the rate.
+    See the <a href="streaming-programming-guide.html#deploying-applications">deployment guide</a>
+    in the Spark Streaming programing guide for mode details.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.streaming.receiver.writeAheadLog.enable</code></td>
+  <td>false</td>
+  <td>
+    Enable write ahead logs for receivers. All the input data received through receivers
+    will be saved to write ahead logs that will allow it to be recovered after driver failures.
+    See the <a href="streaming-programming-guide.html#deploying-applications">deployment guide</a>
+    in the Spark Streaming programing guide for more details.
   </td>
 </tr>
 <tr>
@@ -1045,45 +1378,6 @@ Apart from these, the following properties are also available, and may be useful
     higher memory usage in Spark.
   </td>
 </tr>
-<tr>
-  <td><code>spark.executor.logs.rolling.strategy</code></td>
-  <td>(none)</td>
-  <td>
-    Set the strategy of rolling of executor logs. By default it is disabled. It can
-    be set to "time" (time-based rolling) or "size" (size-based rolling). For "time",
-    use <code>spark.executor.logs.rolling.time.interval</code> to set the rolling interval.
-    For "size", use <code>spark.executor.logs.rolling.size.maxBytes</code> to set
-    the maximum file size for rolling.
-  </td>
-</tr>
-<tr>
-  <td><code>spark.executor.logs.rolling.time.interval</code></td>
-  <td>daily</td>
-  <td>
-    Set the time interval by which the executor logs will be rolled over.
-    Rolling is disabled by default. Valid values are `daily`, `hourly`, `minutely` or
-    any interval in seconds. See <code>spark.executor.logs.rolling.maxRetainedFiles</code>
-    for automatic cleaning of old logs.
-  </td>
-</tr>
-<tr>
-  <td><code>spark.executor.logs.rolling.size.maxBytes</code></td>
-  <td>(none)</td>
-  <td>
-    Set the max size of the file by which the executor logs will be rolled over.
-    Rolling is disabled by default. Value is set in terms of bytes.
-    See <code>spark.executor.logs.rolling.maxRetainedFiles</code>
-    for automatic cleaning of old logs.
-  </td>
-</tr>
-<tr>
-  <td><code>spark.executor.logs.rolling.maxRetainedFiles</code></td>
-  <td>(none)</td>
-  <td>
-    Sets the number of latest rolling log files that are going to be retained by the system.
-    Older log files will be deleted. Disabled by default.
-  </td>
-</tr>
 </table>
 
 #### Cluster Managers
diff --git a/docs/ec2-scripts.md b/docs/ec2-scripts.md
index 530798f2b8022..d50f445d7ecc7 100644
--- a/docs/ec2-scripts.md
+++ b/docs/ec2-scripts.md
@@ -12,16 +12,14 @@ on the [Amazon Web Services site](http://aws.amazon.com/).
 
 `spark-ec2` is designed to manage multiple named clusters. You can
 launch a new cluster (telling the script its size and giving it a name),
-shutdown an existing cluster, or log into a cluster. Each cluster
-launches a set of instances, which are tagged with the cluster name,
-and placed into EC2 security groups.  If you don't specify a security
-group, the `spark-ec2` script will create security groups based on the
-cluster name you request. For example, a cluster named
+shutdown an existing cluster, or log into a cluster. Each cluster is
+identified by placing its machines into EC2 security groups whose names
+are derived from the name of the cluster. For example, a cluster named
 `test` will contain a master node in a security group called
 `test-master`, and a number of slave nodes in a security group called
-`test-slaves`.  You can also specify a security group prefix to be used
-in place of the cluster name.  Machines in a cluster can be identified
-by looking for the "Name" tag of the instance in the Amazon EC2 Console.
+`test-slaves`. The `spark-ec2` script will create these security groups
+for you based on the cluster name you request. You can also use them to
+identify machines belonging to each cluster in the Amazon EC2 Console.
 
 
 # Before You Start
@@ -87,10 +85,34 @@ another.
      specified version of Spark. The `<version>` can be a version number
      (e.g. "0.7.3") or a specific git hash. By default, a recent
      version will be used.
+-    `--spark-git-repo=<repository url>` will let you run a custom version of
+     Spark that is built from the given git repository. By default, the
+     [Apache Github mirror](https://github.com/apache/spark) will be used.
+     When using a custom Spark version, `--spark-version` must be set to git
+     commit hash, such as 317e114, instead of a version number.
 -    If one of your launches fails due to e.g. not having the right
 permissions on your private key file, you can run `launch` with the
 `--resume` option to restart the setup process on an existing cluster.
 
+# Launching a Cluster in a VPC
+
+-   Run
+    `./spark-ec2 -k <keypair> -i <key-file> -s <num-slaves> --vpc-id=<vpc-id> --subnet-id=<subnet-id> launch <cluster-name>`,
+    where `<keypair>` is the name of your EC2 key pair (that you gave it
+    when you created it), `<key-file>` is the private key file for your
+    key pair, `<num-slaves>` is the number of slave nodes to launch (try
+    1 at first), `<vpc-id>` is the name of your VPC, `<subnet-id>` is the
+    name of your subnet, and `<cluster-name>` is the name to give to your
+    cluster.
+
+    For example:
+
+    ```bash
+    export AWS_SECRET_ACCESS_KEY=AaBbCcDdEeFGgHhIiJjKkLlMmNnOoPpQqRrSsTtU
+export AWS_ACCESS_KEY_ID=ABCDEFG1234567890123
+./spark-ec2 --key-pair=awskey --identity-file=awskey.pem --region=us-west-1 --zone=us-west-1a --vpc-id=vpc-a28d24c7 --subnet-id=subnet-4eb27b39 --spark-version=1.1.0 launch my-spark-cluster
+    ```
+
 # Running Applications
 
 -   Go into the `ec2` directory in the release of Spark you downloaded.
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index fdb9f98e214e5..e298c51f8a5b7 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -6,6 +6,47 @@ title: GraphX Programming Guide
 * This will become a table of contents (this text will be scraped).
 {:toc}
 
+<!-- All the documentation links  -->
+
+[EdgeRDD]: api/scala/index.html#org.apache.spark.graphx.EdgeRDD
+[Edge]: api/scala/index.html#org.apache.spark.graphx.Edge
+[EdgeTriplet]: api/scala/index.html#org.apache.spark.graphx.EdgeTriplet
+[Graph]: api/scala/index.html#org.apache.spark.graphx.Graph
+[GraphOps]: api/scala/index.html#org.apache.spark.graphx.GraphOps
+[Graph.mapVertices]: api/scala/index.html#org.apache.spark.graphx.Graph@mapVertices[VD2]((VertexId,VD)⇒VD2)(ClassTag[VD2]):Graph[VD2,ED]
+[Graph.reverse]: api/scala/index.html#org.apache.spark.graphx.Graph@reverse:Graph[VD,ED]
+[Graph.subgraph]: api/scala/index.html#org.apache.spark.graphx.Graph@subgraph((EdgeTriplet[VD,ED])⇒Boolean,(VertexId,VD)⇒Boolean):Graph[VD,ED]
+[Graph.mask]: api/scala/index.html#org.apache.spark.graphx.Graph@mask[VD2,ED2](Graph[VD2,ED2])(ClassTag[VD2],ClassTag[ED2]):Graph[VD,ED]
+[Graph.groupEdges]: api/scala/index.html#org.apache.spark.graphx.Graph@groupEdges((ED,ED)⇒ED):Graph[VD,ED]
+[GraphOps.joinVertices]: api/scala/index.html#org.apache.spark.graphx.GraphOps@joinVertices[U](RDD[(VertexId,U)])((VertexId,VD,U)⇒VD)(ClassTag[U]):Graph[VD,ED]
+[Graph.outerJoinVertices]: api/scala/index.html#org.apache.spark.graphx.Graph@outerJoinVertices[U,VD2](RDD[(VertexId,U)])((VertexId,VD,Option[U])⇒VD2)(ClassTag[U],ClassTag[VD2]):Graph[VD2,ED]
+[Graph.aggregateMessages]: api/scala/index.html#org.apache.spark.graphx.Graph@aggregateMessages[A]((EdgeContext[VD,ED,A])⇒Unit,(A,A)⇒A,TripletFields)(ClassTag[A]):VertexRDD[A]
+[EdgeContext]: api/scala/index.html#org.apache.spark.graphx.EdgeContext
+[Graph.mapReduceTriplets]: api/scala/index.html#org.apache.spark.graphx.Graph@mapReduceTriplets[A](mapFunc:org.apache.spark.graphx.EdgeTriplet[VD,ED]=&gt;Iterator[(org.apache.spark.graphx.VertexId,A)],reduceFunc:(A,A)=&gt;A,activeSetOpt:Option[(org.apache.spark.graphx.VertexRDD[_],org.apache.spark.graphx.EdgeDirection)])(implicitevidence$10:scala.reflect.ClassTag[A]):org.apache.spark.graphx.VertexRDD[A]
+[GraphOps.collectNeighborIds]: api/scala/index.html#org.apache.spark.graphx.GraphOps@collectNeighborIds(EdgeDirection):VertexRDD[Array[VertexId]]
+[GraphOps.collectNeighbors]: api/scala/index.html#org.apache.spark.graphx.GraphOps@collectNeighbors(EdgeDirection):VertexRDD[Array[(VertexId,VD)]]
+[RDD Persistence]: programming-guide.html#rdd-persistence
+[Graph.cache]: api/scala/index.html#org.apache.spark.graphx.Graph@cache():Graph[VD,ED]
+[GraphOps.pregel]: api/scala/index.html#org.apache.spark.graphx.GraphOps@pregel[A](A,Int,EdgeDirection)((VertexId,VD,A)⇒VD,(EdgeTriplet[VD,ED])⇒Iterator[(VertexId,A)],(A,A)⇒A)(ClassTag[A]):Graph[VD,ED]
+[PartitionStrategy]: api/scala/index.html#org.apache.spark.graphx.PartitionStrategy$
+[GraphLoader.edgeListFile]: api/scala/index.html#org.apache.spark.graphx.GraphLoader$@edgeListFile(SparkContext,String,Boolean,Int):Graph[Int,Int]
+[Graph.apply]: api/scala/index.html#org.apache.spark.graphx.Graph$@apply[VD,ED](RDD[(VertexId,VD)],RDD[Edge[ED]],VD)(ClassTag[VD],ClassTag[ED]):Graph[VD,ED]
+[Graph.fromEdgeTuples]: api/scala/index.html#org.apache.spark.graphx.Graph$@fromEdgeTuples[VD](RDD[(VertexId,VertexId)],VD,Option[PartitionStrategy])(ClassTag[VD]):Graph[VD,Int]
+[Graph.fromEdges]: api/scala/index.html#org.apache.spark.graphx.Graph$@fromEdges[VD,ED](RDD[Edge[ED]],VD)(ClassTag[VD],ClassTag[ED]):Graph[VD,ED]
+[PartitionStrategy]: api/scala/index.html#org.apache.spark.graphx.PartitionStrategy
+[Graph.partitionBy]: api/scala/index.html#org.apache.spark.graphx.Graph$@partitionBy(partitionStrategy:org.apache.spark.graphx.PartitionStrategy):org.apache.spark.graphx.Graph[VD,ED]
+[PageRank]: api/scala/index.html#org.apache.spark.graphx.lib.PageRank$
+[ConnectedComponents]: api/scala/index.html#org.apache.spark.graphx.lib.ConnectedComponents$
+[TriangleCount]: api/scala/index.html#org.apache.spark.graphx.lib.TriangleCount$
+[Graph.partitionBy]: api/scala/index.html#org.apache.spark.graphx.Graph@partitionBy(PartitionStrategy):Graph[VD,ED]
+[EdgeContext.sendToSrc]: api/scala/index.html#org.apache.spark.graphx.EdgeContext@sendToSrc(msg:A):Unit
+[EdgeContext.sendToDst]: api/scala/index.html#org.apache.spark.graphx.EdgeContext@sendToDst(msg:A):Unit
+[TripletFields]: api/java/org/apache/spark/graphx/TripletFields.html
+[TripletFields.All]: api/java/org/apache/spark/graphx/TripletFields.html#All
+[TripletFields.None]: api/java/org/apache/spark/graphx/TripletFields.html#None
+[TripletFields.Src]: api/java/org/apache/spark/graphx/TripletFields.html#Src
+[TripletFields.Dst]: api/java/org/apache/spark/graphx/TripletFields.html#Dst
+
 <p style="text-align: center;">
   <img src="img/graphx_logo.png"
        title="GraphX Logo"
@@ -16,83 +57,30 @@ title: GraphX Programming Guide
 
 # Overview
 
-GraphX is the new (alpha) Spark API for graphs and graph-parallel computation. At a high-level,
-GraphX extends the Spark [RDD](api/scala/index.html#org.apache.spark.rdd.RDD) by introducing the
-[Resilient Distributed Property Graph](#property_graph): a directed multigraph with properties
+GraphX is a new component in Spark for graphs and graph-parallel computation. At a high level,
+GraphX extends the Spark [RDD](api/scala/index.html#org.apache.spark.rdd.RDD) by introducing a
+new [Graph](#property_graph) abstraction: a directed multigraph with properties
 attached to each vertex and edge.  To support graph computation, GraphX exposes a set of fundamental
 operators (e.g., [subgraph](#structural_operators), [joinVertices](#join_operators), and
-[mapReduceTriplets](#mrTriplets)) as well as an optimized variant of the [Pregel](#pregel) API. In
-addition, GraphX includes a growing collection of graph [algorithms](#graph_algorithms) and
+[aggregateMessages](#aggregateMessages)) as well as an optimized variant of the [Pregel](#pregel) API. In addition, GraphX includes a growing collection of graph [algorithms](#graph_algorithms) and
 [builders](#graph_builders) to simplify graph analytics tasks.
 
-**GraphX is currently an alpha component. While we will minimize API changes, some APIs may change in future releases.**
 
-## Background on Graph-Parallel Computation
+## Migrating from Spark 1.1
 
-From social networks to language modeling, the growing scale and importance of
-graph data has driven the development of numerous new *graph-parallel* systems
-(e.g., [Giraph](http://giraph.apache.org) and
-[GraphLab](http://graphlab.org)).  By restricting the types of computation that can be
-expressed and introducing new techniques to partition and distribute graphs,
-these systems can efficiently execute sophisticated graph algorithms orders of
-magnitude faster than more general *data-parallel* systems.
+GraphX in Spark {{site.SPARK_VERSION}} contains a few user facing API changes:
 
-<p style="text-align: center;">
-  <img src="img/data_parallel_vs_graph_parallel.png"
-       title="Data-Parallel vs. Graph-Parallel"
-       alt="Data-Parallel vs. Graph-Parallel"
-       width="50%" />
-  <!-- Images are downsized intentionally to improve quality on retina displays -->
-</p>
+1. To improve performance we have introduced a new version of
+[`mapReduceTriplets`][Graph.mapReduceTriplets] called
+[`aggregateMessages`][Graph.aggregateMessages] which takes the messages previously returned from
+[`mapReduceTriplets`][Graph.mapReduceTriplets] through a callback ([`EdgeContext`][EdgeContext])
+rather than by return value.
+We are deprecating [`mapReduceTriplets`][Graph.mapReduceTriplets] and encourage users to consult
+the [transition guide](#mrTripletsTransition).
 
-However, the same restrictions that enable these substantial performance gains also make it
-difficult to express many of the important stages in a typical graph-analytics pipeline:
-constructing the graph, modifying its structure, or expressing computation that spans multiple
-graphs.  Furthermore, how we look at data depends on our objectives and the same raw data may have
-many different table and graph views.
-
-<p style="text-align: center;">
-  <img src="img/tables_and_graphs.png"
-       title="Tables and Graphs"
-       alt="Tables and Graphs"
-       width="50%" />
-  <!-- Images are downsized intentionally to improve quality on retina displays -->
-</p>
-
-As a consequence, it is often necessary to be able to move between table and graph views of the same
-physical data and to leverage the properties of each view to easily and efficiently express
-computation.  However, existing graph analytics pipelines must compose graph-parallel and data-
-parallel systems, leading to extensive data movement and duplication and a complicated programming
-model.
-
-<p style="text-align: center;">
-  <img src="img/graph_analytics_pipeline.png"
-       title="Graph Analytics Pipeline"
-       alt="Graph Analytics Pipeline"
-       width="50%" />
-  <!-- Images are downsized intentionally to improve quality on retina displays -->
-</p>
-
-The goal of the GraphX project is to unify graph-parallel and data-parallel computation in one
-system with a single composable API. The GraphX API enables users to view data both as a graph and
-as collections (i.e., RDDs) without data movement or duplication. By incorporating recent advances
-in graph-parallel systems, GraphX is able to optimize the execution of graph operations.
-
-## GraphX Replaces the Spark Bagel API
-
-Prior to the release of GraphX, graph computation in Spark was expressed using Bagel, an
-implementation of Pregel.  GraphX improves upon Bagel by exposing a richer property graph API, a
-more streamlined version of the Pregel abstraction, and system optimizations to improve performance
-and reduce memory overhead.  While we plan to eventually deprecate Bagel, we will continue to
-support the [Bagel API](api/scala/index.html#org.apache.spark.bagel.package) and
-[Bagel programming guide](bagel-programming-guide.html). However, we encourage Bagel users to
-explore the new GraphX API and comment on issues that may complicate the transition from Bagel.
-
-## Migrating from Spark 0.9.1
-
-GraphX in Spark {{site.SPARK_VERSION}} contains one user-facing interface change from Spark 0.9.1. [`EdgeRDD`][EdgeRDD] may now store adjacent vertex attributes to construct the triplets, so it has gained a type parameter. The edges of a graph of type `Graph[VD, ED]` are of type `EdgeRDD[ED, VD]` rather than `EdgeRDD[ED]`.
-
-[EdgeRDD]: api/scala/index.html#org.apache.spark.graphx.EdgeRDD
+2. In Spark 1.0 and 1.1, the type signature of [`EdgeRDD`][EdgeRDD] switched from
+`EdgeRDD[ED]` to `EdgeRDD[ED, VD]` to enable some caching optimizations.  We have since discovered
+a more elegant solution and have restored the type signature to the more natural `EdgeRDD[ED]` type.
 
 # Getting Started
 
@@ -108,9 +96,10 @@ import org.apache.spark.rdd.RDD
 If you are not using the Spark shell you will also need a `SparkContext`.  To learn more about
 getting started with Spark refer to the [Spark Quick Start Guide](quick-start.html).
 
-# The Property Graph
 <a name="property_graph"></a>
 
+# The Property Graph
+
 The [property graph](api/scala/index.html#org.apache.spark.graphx.Graph) is a directed multigraph
 with user defined objects attached to each vertex and edge.  A directed multigraph is a directed
 graph with potentially multiple parallel edges sharing the same source and destination vertex.  The
@@ -123,7 +112,7 @@ identifiers.
 The property graph is parameterized over the vertex (`VD`) and edge (`ED`) types.  These
 are the types of the objects associated with each vertex and edge respectively.
 
-> GraphX optimizes the representation of vertex and edge types when they are plain old data-types
+> GraphX optimizes the representation of vertex and edge types when they are primitive data types
 > (e.g., int, double, etc...) reducing the in memory footprint by storing them in specialized
 > arrays.
 
@@ -142,8 +131,8 @@ var graph: Graph[VertexProperty, String] = null
 Like RDDs, property graphs are immutable, distributed, and fault-tolerant.  Changes to the values or
 structure of the graph are accomplished by producing a new graph with the desired changes.  Note
 that substantial parts of the original graph (i.e., unaffected structure, attributes, and indicies)
-are reused in the new graph reducing the cost of this inherently functional data-structure.  The
-graph is partitioned across the executors using a range of vertex-partitioning heuristics.  As with
+are reused in the new graph reducing the cost of this inherently functional data structure.  The
+graph is partitioned across the executors using a range of vertex partitioning heuristics.  As with
 RDDs, each partition of the graph can be recreated on a different machine in the event of a failure.
 
 Logically the property graph corresponds to a pair of typed collections (RDDs) encoding the
@@ -153,12 +142,12 @@ the vertices and edges of the graph:
 {% highlight scala %}
 class Graph[VD, ED] {
   val vertices: VertexRDD[VD]
-  val edges: EdgeRDD[ED, VD]
+  val edges: EdgeRDD[ED]
 }
 {% endhighlight %}
 
-The classes `VertexRDD[VD]` and `EdgeRDD[ED, VD]` extend and are optimized versions of `RDD[(VertexID,
-VD)]` and `RDD[Edge[ED]]` respectively.  Both `VertexRDD[VD]` and `EdgeRDD[ED, VD]` provide  additional
+The classes `VertexRDD[VD]` and `EdgeRDD[ED]` extend and are optimized versions of `RDD[(VertexID,
+VD)]` and `RDD[Edge[ED]]` respectively.  Both `VertexRDD[VD]` and `EdgeRDD[ED]` provide  additional
 functionality built around graph computation and leverage internal optimizations.  We discuss the
 `VertexRDD` and `EdgeRDD` API in greater detail in the section on [vertex and edge
 RDDs](#vertex_and_edge_rdds) but for now they can be thought of as simply RDDs of the form:
@@ -211,7 +200,6 @@ In the above example we make use of the [`Edge`][Edge] case class. Edges have a
 `dstId` corresponding to the source and destination vertex identifiers. In addition, the `Edge`
 class has an `attr` member which stores the edge property.
 
-[Edge]: api/scala/index.html#org.apache.spark.graphx.Edge
 
 We can deconstruct a graph into the respective vertex and edge views by using the `graph.vertices`
 and `graph.edges` members respectively.
@@ -237,7 +225,6 @@ The triplet view logically joins the vertex and edge properties yielding an
 `RDD[EdgeTriplet[VD, ED]]` containing instances of the [`EdgeTriplet`][EdgeTriplet] class. This
 *join* can be expressed in the following SQL expression:
 
-[EdgeTriplet]: api/scala/index.html#org.apache.spark.graphx.EdgeTriplet
 
 {% highlight sql %}
 SELECT src.id, dst.id, src.attr, e.attr, dst.attr
@@ -278,9 +265,6 @@ core operators are defined in [`GraphOps`][GraphOps].  However, thanks to Scala
 operators in `GraphOps` are automatically available as members of `Graph`.  For example, we can
 compute the in-degree of each vertex (defined in `GraphOps`) by the following:
 
-[Graph]: api/scala/index.html#org.apache.spark.graphx.Graph
-[GraphOps]: api/scala/index.html#org.apache.spark.graphx.GraphOps
-
 {% highlight scala %}
 val graph: Graph[(String, String), String]
 // Use the implicit GraphOps.inDegrees operator
@@ -310,7 +294,7 @@ class Graph[VD, ED] {
   val degrees: VertexRDD[Int]
   // Views of the graph as collections =============================================================
   val vertices: VertexRDD[VD]
-  val edges: EdgeRDD[ED, VD]
+  val edges: EdgeRDD[ED]
   val triplets: RDD[EdgeTriplet[VD, ED]]
   // Functions for caching graphs ==================================================================
   def persist(newLevel: StorageLevel = StorageLevel.MEMORY_ONLY): Graph[VD, ED]
@@ -341,10 +325,10 @@ class Graph[VD, ED] {
   // Aggregate information about adjacent triplets =================================================
   def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexID]]
   def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[Array[(VertexID, VD)]]
-  def mapReduceTriplets[A: ClassTag](
-      mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexID, A)],
-      reduceFunc: (A, A) => A,
-      activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None)
+  def aggregateMessages[Msg: ClassTag](
+      sendMsg: EdgeContext[VD, ED, Msg] => Unit,
+      mergeMsg: (Msg, Msg) => Msg,
+      tripletFields: TripletFields = TripletFields.All)
     : VertexRDD[A]
   // Iterative graph-parallel computation ==========================================================
   def pregel[A](initialMsg: A, maxIterations: Int, activeDirection: EdgeDirection)(
@@ -363,8 +347,7 @@ class Graph[VD, ED] {
 
 ## Property Operators
 
-In direct analogy to the RDD `map` operator, the property
-graph contains the following:
+Like the RDD `map` operator, the property graph contains the following:
 
 {% highlight scala %}
 class Graph[VD, ED] {
@@ -377,7 +360,7 @@ class Graph[VD, ED] {
 Each of these operators yields a new graph with the vertex or edge properties modified by the user
 defined `map` function.
 
-> Note that in all cases the graph structure is unaffected. This is a key feature of these operators
+> Note that in each case the graph structure is unaffected. This is a key feature of these operators
 > which allows the resulting graph to reuse the structural indices of the original graph. The
 > following snippets are logically equivalent, but the first one does not preserve the structural
 > indices and would not benefit from the GraphX system optimizations:
@@ -390,14 +373,13 @@ val newGraph = Graph(newVertices, graph.edges)
 val newGraph = graph.mapVertices((id, attr) => mapUdf(id, attr))
 {% endhighlight %}
 
-[Graph.mapVertices]: api/scala/index.html#org.apache.spark.graphx.Graph@mapVertices[VD2]((VertexId,VD)⇒VD2)(ClassTag[VD2]):Graph[VD2,ED]
 
 These operators are often used to initialize the graph for a particular computation or project away
-unnecessary properties.  For example, given a graph with the out-degrees as the vertex properties
+unnecessary properties.  For example, given a graph with the out degrees as the vertex properties
 (we describe how to construct such a graph later), we initialize it for PageRank:
 
 {% highlight scala %}
-// Given a graph where the vertex property is the out-degree
+// Given a graph where the vertex property is the out degree
 val inputGraph: Graph[Int, String] =
   graph.outerJoinVertices(graph.outDegrees)((vid, _, degOpt) => degOpt.getOrElse(0))
 // Construct a graph where each edge contains the weight
@@ -406,9 +388,10 @@ val outputGraph: Graph[Double, Double] =
   inputGraph.mapTriplets(triplet => 1.0 / triplet.srcAttr).mapVertices((id, _) => 1.0)
 {% endhighlight %}
 
-## Structural Operators
 <a name="structural_operators"></a>
 
+## Structural Operators
+
 Currently GraphX supports only a simple set of commonly used structural operators and we expect to
 add more in the future.  The following is a list of the basic structural operators.
 
@@ -425,9 +408,8 @@ class Graph[VD, ED] {
 The [`reverse`][Graph.reverse] operator returns a new graph with all the edge directions reversed.
 This can be useful when, for example, trying to compute the inverse PageRank.  Because the reverse
 operation does not modify vertex or edge properties or change the number of edges, it can be
-implemented efficiently without data-movement or duplication.
+implemented efficiently without data movement or duplication.
 
-[Graph.reverse]: api/scala/index.html#org.apache.spark.graphx.Graph@reverse:Graph[VD,ED]
 
 The [`subgraph`][Graph.subgraph] operator takes vertex and edge predicates and returns the graph
 containing only the vertices that satisfy the vertex predicate (evaluate to true) and edges that
@@ -435,7 +417,6 @@ satisfy the edge predicate *and connect vertices that satisfy the vertex predica
 operator can be used in number of situations to restrict the graph to the vertices and edges of
 interest or eliminate broken links. For example in the following code we remove broken links:
 
-[Graph.subgraph]: api/scala/index.html#org.apache.spark.graphx.Graph@subgraph((EdgeTriplet[VD,ED])⇒Boolean,(VertexId,VD)⇒Boolean):Graph[VD,ED]
 
 {% highlight scala %}
 // Create an RDD for the vertices
@@ -469,13 +450,12 @@ validGraph.triplets.map(
 > Note in the above example only the vertex predicate is provided.  The `subgraph` operator defaults
 > to `true` if the vertex or edge predicates are not provided.
 
-The [`mask`][Graph.mask] operator also constructs a subgraph by returning a graph that contains the
+The [`mask`][Graph.mask] operator constructs a subgraph by returning a graph that contains the
 vertices and edges that are also found in the input graph.  This can be used in conjunction with the
 `subgraph` operator to restrict a graph based on the properties in another related graph.  For
 example, we might run connected components using the graph with missing vertices and then restrict
 the answer to the valid subgraph.
 
-[Graph.mask]: api/scala/index.html#org.apache.spark.graphx.Graph@mask[VD2,ED2](Graph[VD2,ED2])(ClassTag[VD2],ClassTag[ED2]):Graph[VD,ED]
 
 {% highlight scala %}
 // Run Connected Components
@@ -490,10 +470,9 @@ The [`groupEdges`][Graph.groupEdges] operator merges parallel edges (i.e., dupli
 pairs of vertices) in the multigraph.  In many numerical applications, parallel edges can be *added*
 (their weights combined) into a single edge thereby reducing the size of the graph.
 
-[Graph.groupEdges]: api/scala/index.html#org.apache.spark.graphx.Graph@groupEdges((ED,ED)⇒ED):Graph[VD,ED]
+<a name="join_operators"></a>
 
 ## Join Operators
-<a name="join_operators"></a>
 
 In many cases it is necessary to join data from external collections (RDDs) with graphs.  For
 example, we might have extra user properties that we want to merge with an existing graph or we
@@ -514,10 +493,8 @@ returns a new graph with the vertex properties obtained by applying the user def
 to the result of the joined vertices.  Vertices without a matching value in the RDD retain their
 original value.
 
-[GraphOps.joinVertices]: api/scala/index.html#org.apache.spark.graphx.GraphOps@joinVertices[U](RDD[(VertexId,U)])((VertexId,VD,U)⇒VD)(ClassTag[U]):Graph[VD,ED]
-
-> Note that if the RDD contains more than one value for a given vertex only one will be used.   It
-> is therefore recommended that the input RDD be first made unique using the following which will
+> Note that if the RDD contains more than one value for a given vertex only one will be used.  It
+> is therefore recommended that the input RDD be made unique using the following which will
 > also *pre-index* the resulting values to substantially accelerate the subsequent join.
 > {% highlight scala %}
 val nonUniqueCosts: RDD[(VertexID, Double)]
@@ -533,8 +510,6 @@ property type.  Because not all vertices may have a matching value in the input
 function takes an `Option` type.  For example, we can setup a graph for PageRank by initializing
 vertex properties with their `outDegree`.
 
-[Graph.outerJoinVertices]: api/scala/index.html#org.apache.spark.graphx.Graph@outerJoinVertices[U,VD2](RDD[(VertexId,U)])((VertexId,VD,Option[U])⇒VD2)(ClassTag[U],ClassTag[VD2]):Graph[VD2,ED]
-
 
 {% highlight scala %}
 val outDegrees: VertexRDD[Int] = graph.outDegrees
@@ -555,65 +530,76 @@ val joinedGraph = graph.joinVertices(uniqueCosts,
   (id: VertexID, oldCost: Double, extraCost: Double) => oldCost + extraCost)
 {% endhighlight %}
 
+>
+
+<a name="neighborhood-aggregation">
 
 ## Neighborhood Aggregation
 
-A key part of graph computation is aggregating information about the neighborhood of each vertex.
-For example we might want to know the number of followers each user has or the average age of the
+A key step in may graph analytics tasks is aggregating information about the neighborhood of each
+vertex.
+For example, we might want to know the number of followers each user has or the average age of the
 the followers of each user.  Many iterative graph algorithms (e.g., PageRank, Shortest Path, and
 connected components) repeatedly aggregate properties of neighboring vertices (e.g., current
 PageRank Value, shortest path to the source, and smallest reachable vertex id).
 
-### Map Reduce Triplets (mapReduceTriplets)
-<a name="mrTriplets"></a>
+> To improve performance the primary aggregation operator changed from
+`graph.mapReduceTriplets` to the new `graph.AggregateMessages`.  While the changes in the API are
+relatively small, we provide a transition guide below.
 
-[Graph.mapReduceTriplets]: api/scala/index.html#org.apache.spark.graphx.Graph@mapReduceTriplets[A](mapFunc:org.apache.spark.graphx.EdgeTriplet[VD,ED]=&gt;Iterator[(org.apache.spark.graphx.VertexId,A)],reduceFunc:(A,A)=&gt;A,activeSetOpt:Option[(org.apache.spark.graphx.VertexRDD[_],org.apache.spark.graphx.EdgeDirection)])(implicitevidence$10:scala.reflect.ClassTag[A]):org.apache.spark.graphx.VertexRDD[A]
+<a name="aggregateMessages"></a>
 
-The core (heavily optimized) aggregation primitive in GraphX is the
-[`mapReduceTriplets`][Graph.mapReduceTriplets] operator:
+### Aggregate Messages (aggregateMessages)
+
+The core aggregation operation in GraphX is [`aggregateMessages`][Graph.aggregateMessages].
+This operator applies a user defined `sendMsg` function to each <i>edge triplet</i> in the graph
+and then uses the `mergeMsg` function to aggregate those messages at their destination vertex.
 
 {% highlight scala %}
 class Graph[VD, ED] {
-  def mapReduceTriplets[A](
-      map: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
-      reduce: (A, A) => A)
-    : VertexRDD[A]
+  def aggregateMessages[Msg: ClassTag](
+      sendMsg: EdgeContext[VD, ED, Msg] => Unit,
+      mergeMsg: (Msg, Msg) => Msg,
+      tripletFields: TripletFields = TripletFields.All)
+    : VertexRDD[Msg]
 }
 {% endhighlight %}
 
-The [`mapReduceTriplets`][Graph.mapReduceTriplets] operator takes a user defined map function which
-is applied to each triplet and can yield *messages* destined to either (none or both) vertices in
-the triplet.  To facilitate optimized pre-aggregation, we currently only support messages destined
-to the source or destination vertex of the triplet.  The user defined `reduce` function combines the
-messages destined to each vertex.  The `mapReduceTriplets` operator returns a `VertexRDD[A]`
-containing the aggregate message (of type `A`) destined to each vertex.  Vertices that do not
+The user defined `sendMsg` function takes an [`EdgeContext`][EdgeContext], which exposes the
+source and destination attributes along with the edge attribute and functions
+([`sendToSrc`][EdgeContext.sendToSrc], and [`sendToDst`][EdgeContext.sendToDst]) to send
+messages to the source and destination attributes.  Think of `sendMsg` as the <i>map</i>
+function in map-reduce.
+The user defined `mergeMsg` function takes two messages destined to the same vertex and
+yields a single message.  Think of `mergeMsg` as the <i>reduce</i> function in map-reduce.
+The  [`aggregateMessages`][Graph.aggregateMessages] operator returns a `VertexRDD[Msg]`
+containing the aggregate message (of type `Msg`) destined to each vertex.  Vertices that did not
 receive a message are not included in the returned `VertexRDD`.
 
-<blockquote>
-
-<p>Note that <code>mapReduceTriplets</code> takes an additional optional <code>activeSet</code>
-(not shown above see API docs for details) which restricts the map phase to edges adjacent to the
-vertices in the provided <code>VertexRDD</code>: </p>
-
-{% highlight scala %}
-  activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None
-{% endhighlight %}
-
-<p>The EdgeDirection specifies which edges adjacent to the vertex set are included in the map
-phase. If the direction is <code>In</code>, then the user defined <code>map</code> function will
-only be run only on edges with the destination vertex in the active set. If the direction is
-<code>Out</code>, then the <code>map</code> function will only be run only on edges originating from
-vertices in the active set.  If the direction is <code>Either</code>, then the <code>map</code>
-function will be run only on edges with <i>either</i> vertex in the active set.  If the direction is
-<code>Both</code>, then the <code>map</code> function will be run only on edges with both vertices
-in the active set.  The active set must be derived from the set of vertices in the graph.
-Restricting computation to triplets adjacent to a subset of the vertices is often necessary in
-incremental iterative computation and is a key part of the GraphX implementation of Pregel. </p>
-
-</blockquote>
-
-In the following example we use the `mapReduceTriplets` operator to compute the average age of the
-more senior followers of each user.
+<!--
+> An [`EdgeContext`][EdgeContext] is provided in place of a [`EdgeTriplet`][EdgeTriplet] to
+expose the additional ([`sendToSrc`][EdgeContext.sendToSrc],
+and [`sendToDst`][EdgeContext.sendToDst]) which GraphX uses to optimize message routing.
+ -->
+
+In addition, [`aggregateMessages`][Graph.aggregateMessages] takes an optional
+`tripletsFields` which indicates what data is accessed in the [`EdgeContext`][EdgeContext]
+(i.e., the source vertex attribute but not the destination vertex attribute).
+The possible options for the `tripletsFields` are defined in [`TripletFields`][TripletFields] and
+the default value is [`TripletFields.All`][TripletFields.All] which indicates that the user
+defined `sendMsg` function may access any of the fields in the [`EdgeContext`][EdgeContext].
+The `tripletFields` argument can be used to notify GraphX that only part of the
+[`EdgeContext`][EdgeContext] will be needed allowing GraphX to select an optimized join strategy.
+For example if we are computing the average age of the followers of each user we would only require
+the source field and so we would use [`TripletFields.Src`][TripletFields.Src] to indicate that we
+only require the source field
+
+> In earlier versions of GraphX we used byte code inspection to infer the
+[`TripletFields`][TripletFields] however we have found that bytecode inspection to be
+slightly unreliable and instead opted for more explicit user control.
+
+In the following example we use the [`aggregateMessages`][Graph.aggregateMessages] operator to
+compute the average age of the more senior followers of each user.
 
 {% highlight scala %}
 // Import random graph generation library
@@ -622,14 +608,11 @@ import org.apache.spark.graphx.util.GraphGenerators
 val graph: Graph[Double, Int] =
   GraphGenerators.logNormalGraph(sc, numVertices = 100).mapVertices( (id, _) => id.toDouble )
 // Compute the number of older followers and their total age
-val olderFollowers: VertexRDD[(Int, Double)] = graph.mapReduceTriplets[(Int, Double)](
+val olderFollowers: VertexRDD[(Int, Double)] = graph.aggregateMessages[(Int, Double)](
   triplet => { // Map Function
     if (triplet.srcAttr > triplet.dstAttr) {
       // Send message to destination vertex containing counter and age
-      Iterator((triplet.dstId, (1, triplet.srcAttr)))
-    } else {
-      // Don't send a message for this triplet
-      Iterator.empty
+      triplet.sendToDst(1, triplet.srcAttr)
     }
   },
   // Add counter and age
@@ -642,10 +625,57 @@ val avgAgeOfOlderFollowers: VertexRDD[Double] =
 avgAgeOfOlderFollowers.collect.foreach(println(_))
 {% endhighlight %}
 
-> Note that the `mapReduceTriplets` operation performs optimally when the messages (and the sums of
-> messages) are constant sized (e.g., floats and addition instead of lists and concatenation).  More
-> precisely, the result of `mapReduceTriplets` should ideally be sub-linear in the degree of each
-> vertex.
+> The `aggregateMessages` operation performs optimally when the messages (and the sums of
+> messages) are constant sized (e.g., floats and addition instead of lists and concatenation).
+
+<a name="mrTripletsTransition"></a>
+
+### Map Reduce Triplets Transition Guide (Legacy)
+
+In earlier versions of GraphX we neighborhood aggregation was accomplished using the
+[`mapReduceTriplets`][Graph.mapReduceTriplets] operator:
+
+{% highlight scala %}
+class Graph[VD, ED] {
+  def mapReduceTriplets[Msg](
+      map: EdgeTriplet[VD, ED] => Iterator[(VertexId, Msg)],
+      reduce: (Msg, Msg) => Msg)
+    : VertexRDD[Msg]
+}
+{% endhighlight %}
+
+The [`mapReduceTriplets`][Graph.mapReduceTriplets] operator takes a user defined map function which
+is applied to each triplet and can yield *messages* which are aggregated using the user defined
+`reduce` function.
+However, we found the user of the returned iterator to be expensive and it inhibited our ability to
+apply additional optimizations (e.g., local vertex renumbering).
+In [`aggregateMessages`][Graph.aggregateMessages] we introduced the EdgeContext which exposes the
+triplet fields and also functions to explicitly send messages to the source and destination vertex.
+Furthermore we removed bytecode inspection and instead require the user to indicate what fields
+in the triplet are actually required.
+
+The following code block using `mapReduceTriplets`:
+
+{% highlight scala %}
+val graph: Graph[Int, Float] = ...
+def msgFun(triplet: Triplet[Int, Float]): Iterator[(Int, String)] = {
+  Iterator((triplet.dstId, "Hi"))
+}
+def reduceFun(a: Int, b: Int): Int = a + b
+val result = graph.mapReduceTriplets[String](msgFun, reduceFun)
+{% endhighlight %}
+
+can be rewritten using `aggregateMessages` as:
+
+{% highlight scala %}
+val graph: Graph[Int, Float] = ...
+def msgFun(triplet: EdgeContext[Int, Float, String]) {
+  triplet.sendToDst("Hi")
+}
+def reduceFun(a: Int, b: Int): Int = a + b
+val result = graph.aggregateMessages[String](msgFun, reduceFun)
+{% endhighlight %}
+
 
 ### Computing Degree Information
 
@@ -673,10 +703,6 @@ attributes at each vertex. This can be easily accomplished using the
 [`collectNeighborIds`][GraphOps.collectNeighborIds] and the
 [`collectNeighbors`][GraphOps.collectNeighbors] operators.
 
-[GraphOps.collectNeighborIds]: api/scala/index.html#org.apache.spark.graphx.GraphOps@collectNeighborIds(EdgeDirection):VertexRDD[Array[VertexId]]
-[GraphOps.collectNeighbors]: api/scala/index.html#org.apache.spark.graphx.GraphOps@collectNeighbors(EdgeDirection):VertexRDD[Array[(VertexId,VD)]]
-
-
 {% highlight scala %}
 class GraphOps[VD, ED] {
   def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexId]]
@@ -684,36 +710,34 @@ class GraphOps[VD, ED] {
 }
 {% endhighlight %}
 
-> Note that these operators can be quite costly as they duplicate information and require
+> These operators can be quite costly as they duplicate information and require
 > substantial communication.  If possible try expressing the same computation using the
-> `mapReduceTriplets` operator directly.
+> [`aggregateMessages`][Graph.aggregateMessages]  operator directly.
 
 ## Caching and Uncaching
 
 In Spark, RDDs are not persisted in memory by default. To avoid recomputation, they must be explicitly cached when using them multiple times (see the [Spark Programming Guide][RDD Persistence]). Graphs in GraphX behave the same way. **When using a graph multiple times, make sure to call [`Graph.cache()`][Graph.cache] on it first.**
 
-[RDD Persistence]: programming-guide.html#rdd-persistence
-[Graph.cache]: api/scala/index.html#org.apache.spark.graphx.Graph@cache():Graph[VD,ED]
 
 In iterative computations, *uncaching* may also be necessary for best performance. By default, cached RDDs and graphs will remain in memory until memory pressure forces them to be evicted in LRU order. For iterative computation, intermediate results from previous iterations will fill up the cache. Though they will eventually be evicted, the unnecessary data stored in memory will slow down garbage collection. It would be more efficient to uncache intermediate results as soon as they are no longer necessary. This involves materializing (caching and forcing) a graph or RDD every iteration, uncaching all other datasets, and only using the materialized dataset in future iterations. However, because graphs are composed of multiple RDDs, it can be difficult to unpersist them correctly. **For iterative computation we recommend using the Pregel API, which correctly unpersists intermediate results.**
 
-# Pregel API
 <a name="pregel"></a>
 
-Graphs are inherently recursive data-structures as properties of vertices depend on properties of
+# Pregel API
+
+Graphs are inherently recursive data structures as properties of vertices depend on properties of
 their neighbors which in turn depend on properties of *their* neighbors.  As a
 consequence many important graph algorithms iteratively recompute the properties of each vertex
 until a fixed-point condition is reached.  A range of graph-parallel abstractions have been proposed
-to express these iterative algorithms.  GraphX exposes a Pregel-like operator which is a fusion of
-the widely used Pregel and GraphLab abstractions.
+to express these iterative algorithms.  GraphX exposes a variant of the Pregel API.
 
-At a high-level the Pregel operator in GraphX is a bulk-synchronous parallel messaging abstraction
-*constrained to the topology of the graph*.  The Pregel operator executes in a series of super-steps
-in which vertices receive the *sum* of their inbound messages from the previous super- step, compute
+At a high level the Pregel operator in GraphX is a bulk-synchronous parallel messaging abstraction
+*constrained to the topology of the graph*.  The Pregel operator executes in a series of super steps
+in which vertices receive the *sum* of their inbound messages from the previous super step, compute
 a new value for the vertex property, and then send messages to neighboring vertices in the next
-super-step.  Unlike Pregel and instead more like GraphLab messages are computed in parallel as a
+super step.  Unlike Pregel, messages are computed in parallel as a
 function of the edge triplet and the message computation has access to both the source and
-destination vertex attributes.  Vertices that do not receive a message are skipped within a super-
+destination vertex attributes.  Vertices that do not receive a message are skipped within a super
 step.  The Pregel operators terminates iteration and returns the final graph when there are no
 messages remaining.
 
@@ -724,8 +748,6 @@ messages remaining.
 The following is the type signature of the [Pregel operator][GraphOps.pregel] as well as a *sketch*
 of its implementation (note calls to graph.cache have been removed):
 
-[GraphOps.pregel]: api/scala/index.html#org.apache.spark.graphx.GraphOps@pregel[A](A,Int,EdgeDirection)((VertexId,VD,A)⇒VD,(EdgeTriplet[VD,ED])⇒Iterator[(VertexId,A)],(A,A)⇒A)(ClassTag[A]):Graph[VD,ED]
-
 {% highlight scala %}
 class GraphOps[VD, ED] {
   def pregel[A]
@@ -795,9 +817,10 @@ val sssp = initialGraph.pregel(Double.PositiveInfinity)(
 println(sssp.vertices.collect.mkString("\n"))
 {% endhighlight %}
 
-# Graph Builders
 <a name="graph_builders"></a>
 
+# Graph Builders
+
 GraphX provides several ways of building a graph from a collection of vertices and edges in an RDD or on disk. None of the graph builders repartitions the graph's edges by default; instead, edges are left in their default partitions (such as their original blocks in HDFS). [`Graph.groupEdges`][Graph.groupEdges] requires the graph to be repartitioned because it assumes identical edges will be colocated on the same partition, so you must call [`Graph.partitionBy`][Graph.partitionBy] before calling `groupEdges`.
 
 {% highlight scala %}
@@ -848,18 +871,12 @@ object Graph {
 
 [`Graph.fromEdgeTuples`][Graph.fromEdgeTuples] allows creating a graph from only an RDD of edge tuples, assigning the edges the value 1, and automatically creating any vertices mentioned by edges and assigning them the default value. It also supports deduplicating the edges; to deduplicate, pass `Some` of a [`PartitionStrategy`][PartitionStrategy] as the `uniqueEdges` parameter (for example, `uniqueEdges = Some(PartitionStrategy.RandomVertexCut)`). A partition strategy is necessary to colocate identical edges on the same partition so they can be deduplicated.
 
-[PartitionStrategy]: api/scala/index.html#org.apache.spark.graphx.PartitionStrategy$
-
-[GraphLoader.edgeListFile]: api/scala/index.html#org.apache.spark.graphx.GraphLoader$@edgeListFile(SparkContext,String,Boolean,Int):Graph[Int,Int]
-[Graph.apply]: api/scala/index.html#org.apache.spark.graphx.Graph$@apply[VD,ED](RDD[(VertexId,VD)],RDD[Edge[ED]],VD)(ClassTag[VD],ClassTag[ED]):Graph[VD,ED]
-[Graph.fromEdgeTuples]: api/scala/index.html#org.apache.spark.graphx.Graph$@fromEdgeTuples[VD](RDD[(VertexId,VertexId)],VD,Option[PartitionStrategy])(ClassTag[VD]):Graph[VD,Int]
-[Graph.fromEdges]: api/scala/index.html#org.apache.spark.graphx.Graph$@fromEdges[VD,ED](RDD[Edge[ED]],VD)(ClassTag[VD],ClassTag[ED]):Graph[VD,ED]
+<a name="vertex_and_edge_rdds"></a>
 
 # Vertex and Edge RDDs
-<a name="vertex_and_edge_rdds"></a>
 
 GraphX exposes `RDD` views of the vertices and edges stored within the graph.  However, because
-GraphX maintains the vertices and edges in optimized data-structures and these data-structures
+GraphX maintains the vertices and edges in optimized data structures and these data structures
 provide additional functionality, the vertices and edges are returned as `VertexRDD` and `EdgeRDD`
 respectively.  In this section we review some of the additional useful functionality in these types.
 
@@ -870,7 +887,7 @@ The `VertexRDD[A]` extends `RDD[(VertexID, A)]` and adds the additional constrai
 attribute of type `A`.  Internally, this is achieved by storing the vertex attributes in a reusable
 hash-map data-structure.  As a consequence if two `VertexRDD`s are derived from the same base
 `VertexRDD` (e.g., by `filter` or `mapValues`) they can be joined in constant time without hash
-evaluations. To leverage this indexed data-structure, the `VertexRDD` exposes the following
+evaluations. To leverage this indexed data structure, the `VertexRDD` exposes the following
 additional functionality:
 
 {% highlight scala %}
@@ -893,7 +910,7 @@ class VertexRDD[VD] extends RDD[(VertexID, VD)] {
 Notice, for example,  how the `filter` operator returns an `VertexRDD`.  Filter is actually
 implemented using a `BitSet` thereby reusing the index and preserving the ability to do fast joins
 with other `VertexRDD`s.  Likewise, the `mapValues` operators do not allow the `map` function to
-change the `VertexID` thereby enabling the same `HashMap` data-structures to be reused.  Both the
+change the `VertexID` thereby enabling the same `HashMap` data structures to be reused.  Both the
 `leftJoin` and `innerJoin` are able to identify when joining two `VertexRDD`s derived from the same
 `HashMap` and implement the join by linear scan rather than costly point lookups.
 
@@ -916,21 +933,19 @@ val setC: VertexRDD[Double] = setA.innerJoin(setB)((id, a, b) => a + b)
 
 ## EdgeRDDs
 
-The `EdgeRDD[ED, VD]`, which extends `RDD[Edge[ED]]` organizes the edges in blocks partitioned using one
+The `EdgeRDD[ED]`, which extends `RDD[Edge[ED]]` organizes the edges in blocks partitioned using one
 of the various partitioning strategies defined in [`PartitionStrategy`][PartitionStrategy].  Within
 each partition, edge attributes and adjacency structure, are stored separately enabling maximum
 reuse when changing attribute values.
 
-[PartitionStrategy]: api/scala/index.html#org.apache.spark.graphx.PartitionStrategy
-
 The three additional functions exposed by the `EdgeRDD` are:
 {% highlight scala %}
 // Transform the edge attributes while preserving the structure
-def mapValues[ED2](f: Edge[ED] => ED2): EdgeRDD[ED2, VD]
+def mapValues[ED2](f: Edge[ED] => ED2): EdgeRDD[ED2]
 // Revere the edges reusing both attributes and structure
-def reverse: EdgeRDD[ED, VD]
+def reverse: EdgeRDD[ED]
 // Join two `EdgeRDD`s partitioned using the same partitioning strategy.
-def innerJoin[ED2, ED3](other: EdgeRDD[ED2, VD])(f: (VertexId, VertexId, ED, ED2) => ED3): EdgeRDD[ED3, VD]
+def innerJoin[ED2, ED3](other: EdgeRDD[ED2])(f: (VertexId, VertexId, ED, ED2) => ED3): EdgeRDD[ED3]
 {% endhighlight %}
 
 In most applications we have found that operations on the `EdgeRDD` are accomplished through the
@@ -960,7 +975,6 @@ the [`Graph.partitionBy`][Graph.partitionBy] operator.  The default partitioning
 the initial partitioning of the edges as provided on graph construction.  However, users can easily
 switch to 2D-partitioning or other heuristics included in GraphX.
 
-[Graph.partitionBy]: api/scala/index.html#org.apache.spark.graphx.Graph$@partitionBy(partitionStrategy:org.apache.spark.graphx.PartitionStrategy):org.apache.spark.graphx.Graph[VD,ED]
 
 <p style="text-align: center;">
   <img src="img/vertex_routing_edge_tables.png"
@@ -975,24 +989,24 @@ efficiently joining vertex attributes with the edges.  Because real-world graphs
 edges than vertices, we move vertex attributes to the edges.  Because not all partitions will
 contain edges adjacent to all vertices we internally maintain a routing table which identifies where
 to broadcast vertices when implementing the join required for operations like `triplets` and
-`mapReduceTriplets`.
+`aggregateMessages`.
 
-# Graph Algorithms
 <a name="graph_algorithms"></a>
 
+# Graph Algorithms
+
 GraphX includes a set of graph algorithms to simplify analytics tasks. The algorithms are contained in the `org.apache.spark.graphx.lib` package and can be accessed directly as methods on `Graph` via [`GraphOps`][GraphOps]. This section describes the algorithms and how they are used.
 
-## PageRank
 <a name="pagerank"></a>
 
+## PageRank
+
 PageRank measures the importance of each vertex in a graph, assuming an edge from *u* to *v* represents an endorsement of *v*'s importance by *u*. For example, if a Twitter user is followed by many others, the user will be ranked highly.
 
 GraphX comes with static and dynamic implementations of PageRank as methods on the [`PageRank` object][PageRank]. Static PageRank runs for a fixed number of iterations, while dynamic PageRank runs until the ranks converge (i.e., stop changing by more than a specified tolerance). [`GraphOps`][GraphOps] allows calling these algorithms directly as methods on `Graph`.
 
 GraphX also includes an example social network dataset that we can run PageRank on. A set of users is given in `graphx/data/users.txt`, and a set of relationships between users is given in `graphx/data/followers.txt`. We compute the PageRank of each user as follows:
 
-[PageRank]: api/scala/index.html#org.apache.spark.graphx.lib.PageRank$
-
 {% highlight scala %}
 // Load the edges as a graph
 val graph = GraphLoader.edgeListFile(sc, "graphx/data/followers.txt")
@@ -1014,8 +1028,6 @@ println(ranksByUsername.collect().mkString("\n"))
 
 The connected components algorithm labels each connected component of the graph with the ID of its lowest-numbered vertex. For example, in a social network, connected components can approximate clusters. GraphX contains an implementation of the algorithm in the [`ConnectedComponents` object][ConnectedComponents], and we compute the connected components of the example social network dataset from the [PageRank section](#pagerank) as follows:
 
-[ConnectedComponents]: api/scala/index.html#org.apache.spark.graphx.lib.ConnectedComponents$
-
 {% highlight scala %}
 // Load the graph as in the PageRank example
 val graph = GraphLoader.edgeListFile(sc, "graphx/data/followers.txt")
@@ -1037,9 +1049,6 @@ println(ccByUsername.collect().mkString("\n"))
 
 A vertex is part of a triangle when it has two adjacent vertices with an edge between them. GraphX implements a triangle counting algorithm in the [`TriangleCount` object][TriangleCount] that determines the number of triangles passing through each vertex, providing a measure of clustering. We compute the triangle count of the social network dataset from the [PageRank section](#pagerank). *Note that `TriangleCount` requires the edges to be in canonical orientation (`srcId < dstId`) and the graph to be partitioned using [`Graph.partitionBy`][Graph.partitionBy].*
 
-[TriangleCount]: api/scala/index.html#org.apache.spark.graphx.lib.TriangleCount$
-[Graph.partitionBy]: api/scala/index.html#org.apache.spark.graphx.Graph@partitionBy(PartitionStrategy):Graph[VD,ED]
-
 {% highlight scala %}
 // Load the edges in canonical order and partition the graph for triangle count
 val graph = GraphLoader.edgeListFile(sc, "graphx/data/followers.txt", true).partitionBy(PartitionStrategy.RandomVertexCut)
diff --git a/docs/hadoop-third-party-distributions.md b/docs/hadoop-third-party-distributions.md
index dd73e9dc54440..87dcc58feb494 100644
--- a/docs/hadoop-third-party-distributions.md
+++ b/docs/hadoop-third-party-distributions.md
@@ -18,7 +18,7 @@ see the guide on [building with maven](building-spark.html#specifying-the-hadoop
 
 The table below lists the corresponding `hadoop.version` code for each CDH/HDP release. Note that
 some Hadoop releases are binary compatible across client versions. This means the pre-built Spark
-distribution may "just work" without you needing to compile. That said, we recommend compiling with 
+distribution may "just work" without you needing to compile. That said, we recommend compiling with
 the _exact_ Hadoop version you are running to avoid any compatibility errors.
 
 <table>
@@ -50,7 +50,7 @@ the _exact_ Hadoop version you are running to avoid any compatibility errors.
 
 In SBT, the equivalent can be achieved by setting the the `hadoop.version` property:
 
-    sbt/sbt -Dhadoop.version=1.0.4 assembly
+    build/sbt -Dhadoop.version=1.0.4 assembly
 
 # Linking Applications to the Hadoop Version
 
@@ -98,11 +98,11 @@ Spark can run in a variety of deployment modes:
 
 * Using dedicated set of Spark nodes in your cluster. These nodes should be co-located with your
   Hadoop installation.
-* Running on the same nodes as an existing Hadoop installation, with a fixed amount memory and 
+* Running on the same nodes as an existing Hadoop installation, with a fixed amount memory and
   cores dedicated to Spark on each node.
 * Run Spark alongside Hadoop using a cluster resource manager, such as YARN or Mesos.
 
-These options are identical for those using CDH and HDP. 
+These options are identical for those using CDH and HDP.
 
 # Inheriting Cluster Configuration
 
@@ -116,5 +116,5 @@ The location of these configuration files varies across CDH and HDP versions, bu
 a common location is inside of `/etc/hadoop/conf`. Some tools, such as Cloudera Manager, create
 configurations on-the-fly, but offer a mechanisms to download copies of them.
 
-To make these files visible to Spark, set `HADOOP_CONF_DIR` in `$SPARK_HOME/spark-env.sh` 
+To make these files visible to Spark, set `HADOOP_CONF_DIR` in `$SPARK_HOME/spark-env.sh`
 to a location containing the configuration files.
diff --git a/docs/img/PIClusteringFiveCirclesInputsAndOutputs.png b/docs/img/PIClusteringFiveCirclesInputsAndOutputs.png
new file mode 100644
index 0000000000000..ed9adad11d03a
Binary files /dev/null and b/docs/img/PIClusteringFiveCirclesInputsAndOutputs.png differ
diff --git a/docs/img/data_parallel_vs_graph_parallel.png b/docs/img/data_parallel_vs_graph_parallel.png
deleted file mode 100644
index d3918f01d8f3b..0000000000000
Binary files a/docs/img/data_parallel_vs_graph_parallel.png and /dev/null differ
diff --git a/docs/img/graph_analytics_pipeline.png b/docs/img/graph_analytics_pipeline.png
deleted file mode 100644
index 6d606e01894ae..0000000000000
Binary files a/docs/img/graph_analytics_pipeline.png and /dev/null differ
diff --git a/docs/img/ml-Pipeline.png b/docs/img/ml-Pipeline.png
new file mode 100644
index 0000000000000..607928906bedd
Binary files /dev/null and b/docs/img/ml-Pipeline.png differ
diff --git a/docs/img/ml-PipelineModel.png b/docs/img/ml-PipelineModel.png
new file mode 100644
index 0000000000000..9ebc16719d365
Binary files /dev/null and b/docs/img/ml-PipelineModel.png differ
diff --git a/docs/img/ml-Pipelines.pptx b/docs/img/ml-Pipelines.pptx
new file mode 100644
index 0000000000000..1f773376abc7a
Binary files /dev/null and b/docs/img/ml-Pipelines.pptx differ
diff --git a/docs/img/tables_and_graphs.png b/docs/img/tables_and_graphs.png
deleted file mode 100644
index ec37bb45a62f0..0000000000000
Binary files a/docs/img/tables_and_graphs.png and /dev/null differ
diff --git a/docs/index.md b/docs/index.md
index edd622ec90f64..171d6ddad62f3 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -112,6 +112,7 @@ options for deployment:
 **External Resources:**
 
 * [Spark Homepage](http://spark.apache.org)
+* [Spark Wiki](https://cwiki.apache.org/confluence/display/SPARK)
 * [Mailing Lists](http://spark.apache.org/mailing-lists.html): ask questions about Spark here
 * [AMP Camps](http://ampcamp.berkeley.edu/): a series of training camps at UC Berkeley that featured talks and
   exercises about Spark, Spark Streaming, Mesos, and more. [Videos](http://ampcamp.berkeley.edu/3/),
diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md
index 94604f301dd46..5295e351dd711 100644
--- a/docs/job-scheduling.md
+++ b/docs/job-scheduling.md
@@ -56,6 +56,113 @@ the same RDDs. For example, the [Shark](http://shark.cs.berkeley.edu) JDBC serve
 queries. In future releases, in-memory storage systems such as [Tachyon](http://tachyon-project.org) will
 provide another approach to share RDDs.
 
+## Dynamic Resource Allocation
+
+Spark 1.2 introduces the ability to dynamically scale the set of cluster resources allocated to
+your application up and down based on the workload. This means that your application may give
+resources back to the cluster if they are no longer used and request them again later when there
+is demand. This feature is particularly useful if multiple applications share resources in your
+Spark cluster. If a subset of the resources allocated to an application becomes idle, it can be
+returned to the cluster's pool of resources and acquired by other applications. In Spark, dynamic
+resource allocation is performed on the granularity of the executor and can be enabled through
+`spark.dynamicAllocation.enabled`.
+
+This feature is currently disabled by default and available only on [YARN](running-on-yarn.html).
+A future release will extend this to [standalone mode](spark-standalone.html) and
+[Mesos coarse-grained mode](running-on-mesos.html#mesos-run-modes). Note that although Spark on
+Mesos already has a similar notion of dynamic resource sharing in fine-grained mode, enabling
+dynamic allocation allows your Mesos application to take advantage of coarse-grained low-latency
+scheduling while sharing cluster resources efficiently.
+
+### Configuration and Setup
+
+All configurations used by this feature live under the `spark.dynamicAllocation.*` namespace.
+To enable this feature, your application must set `spark.dynamicAllocation.enabled` to `true`.
+Other relevant configurations are described on the
+[configurations page](configuration.html#dynamic-allocation) and in the subsequent sections in
+detail.
+
+Additionally, your application must use an external shuffle service. The purpose of the service is
+to preserve the shuffle files written by executors so the executors can be safely removed (more
+detail described [below](job-scheduling.html#graceful-decommission-of-executors)). To enable
+this service, set `spark.shuffle.service.enabled` to `true`. In YARN, this external shuffle service
+is implemented in `org.apache.spark.yarn.network.YarnShuffleService` that runs in each `NodeManager`
+in your cluster. To start this service, follow these steps:
+
+1. Build Spark with the [YARN profile](building-spark.html). Skip this step if you are using a
+pre-packaged distribution.
+2. Locate the `spark-<version>-yarn-shuffle.jar`. This should be under
+`$SPARK_HOME/network/yarn/target/scala-<version>` if you are building Spark yourself, and under
+`lib` if you are using a distribution.
+2. Add this jar to the classpath of all `NodeManager`s in your cluster.
+3. In the `yarn-site.xml` on each node, add `spark_shuffle` to `yarn.nodemanager.aux-services`,
+then set `yarn.nodemanager.aux-services.spark_shuffle.class` to
+`org.apache.spark.network.yarn.YarnShuffleService`. Additionally, set all relevant
+`spark.shuffle.service.*` [configurations](configuration.html).
+4. Restart all `NodeManager`s in your cluster.
+
+### Resource Allocation Policy
+
+At a high level, Spark should relinquish executors when they are no longer used and acquire
+executors when they are needed. Since there is no definitive way to predict whether an executor
+that is about to be removed will run a task in the near future, or whether a new executor that is
+about to be added will actually be idle, we need a set of heuristics to determine when to remove
+and request executors.
+
+#### Request Policy
+
+A Spark application with dynamic allocation enabled requests additional executors when it has
+pending tasks waiting to be scheduled. This condition necessarily implies that the existing set
+of executors is insufficient to simultaneously saturate all tasks that have been submitted but
+not yet finished.
+
+Spark requests executors in rounds. The actual request is triggered when there have been pending
+tasks for `spark.dynamicAllocation.schedulerBacklogTimeout` seconds, and then triggered again
+every `spark.dynamicAllocation.sustainedSchedulerBacklogTimeout` seconds thereafter if the queue
+of pending tasks persists. Additionally, the number of executors requested in each round increases
+exponentially from the previous round. For instance, an application will add 1 executor in the
+first round, and then 2, 4, 8 and so on executors in the subsequent rounds.
+
+The motivation for an exponential increase policy is twofold. First, an application should request
+executors cautiously in the beginning in case it turns out that only a few additional executors is
+sufficient. This echoes the justification for TCP slow start. Second, the application should be
+able to ramp up its resource usage in a timely manner in case it turns out that many executors are
+actually needed.
+
+#### Remove Policy
+
+The policy for removing executors is much simpler. A Spark application removes an executor when
+it has been idle for more than `spark.dynamicAllocation.executorIdleTimeout` seconds. Note that,
+under most circumstances, this condition is mutually exclusive with the request condition, in that
+an executor should not be idle if there are still pending tasks to be scheduled.
+
+### Graceful Decommission of Executors
+
+Before dynamic allocation, a Spark executor exits either on failure or when the associated
+application has also exited. In both scenarios, all state associated with the executor is no
+longer needed and can be safely discarded. With dynamic allocation, however, the application
+is still running when an executor is explicitly removed. If the application attempts to access
+state stored in or written by the executor, it will have to perform a recompute the state. Thus,
+Spark needs a mechanism to decommission an executor gracefully by preserving its state before
+removing it.
+
+This requirement is especially important for shuffles. During a shuffle, the Spark executor first
+writes its own map outputs locally to disk, and then acts as the server for those files when other
+executors attempt to fetch them. In the event of stragglers, which are tasks that run for much
+longer than their peers, dynamic allocation may remove an executor before the shuffle completes,
+in which case the shuffle files written by that executor must be recomputed unnecessarily.
+
+The solution for preserving shuffle files is to use an external shuffle service, also introduced
+in Spark 1.2. This service refers to a long-running process that runs on each node of your cluster
+independently of your Spark applications and their executors. If the service is enabled, Spark
+executors will fetch shuffle files from the service instead of from each other. This means any
+shuffle state written by an executor may continue to be served beyond the executor's lifetime.
+
+In addition to writing shuffle files, executors also cache data either on disk or in memory.
+When an executor is removed, however, all cached data will no longer be accessible. There is
+currently not yet a solution for this in Spark 1.2. In future releases, the cached data may be
+preserved through an off-heap storage similar in spirit to how shuffle files are preserved through
+the external shuffle service.
 
 # Scheduling Within an Application
 
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
new file mode 100644
index 0000000000000..be178d7689fdd
--- /dev/null
+++ b/docs/ml-guide.md
@@ -0,0 +1,691 @@
+---
+layout: global
+title: Spark ML Programming Guide
+---
+
+`spark.ml` is a new package introduced in Spark 1.2, which aims to provide a uniform set of
+high-level APIs that help users create and tune practical machine learning pipelines.
+It is currently an alpha component, and we would like to hear back from the community about
+how it fits real-world use cases and how it could be improved.
+
+Note that we will keep supporting and adding features to `spark.mllib` along with the
+development of `spark.ml`.
+Users should be comfortable using `spark.mllib` features and expect more features coming.
+Developers should contribute new algorithms to `spark.mllib` and can optionally contribute
+to `spark.ml`.
+
+**Table of Contents**
+
+* This will become a table of contents (this text will be scraped).
+{:toc}
+
+# Main Concepts
+
+Spark ML standardizes APIs for machine learning algorithms to make it easier to combine multiple algorithms into a single pipeline, or workflow.  This section covers the key concepts introduced by the Spark ML API.
+
+* **[ML Dataset](ml-guide.html#ml-dataset)**: Spark ML uses the [`SchemaRDD`](api/scala/index.html#org.apache.spark.sql.SchemaRDD) from Spark SQL as a dataset which can hold a variety of data types.
+E.g., a dataset could have different columns storing text, feature vectors, true labels, and predictions.
+
+* **[`Transformer`](ml-guide.html#transformers)**: A `Transformer` is an algorithm which can transform one `SchemaRDD` into another `SchemaRDD`.
+E.g., an ML model is a `Transformer` which transforms an RDD with features into an RDD with predictions.
+
+* **[`Estimator`](ml-guide.html#estimators)**: An `Estimator` is an algorithm which can be fit on a `SchemaRDD` to produce a `Transformer`.
+E.g., a learning algorithm is an `Estimator` which trains on a dataset and produces a model.
+
+* **[`Pipeline`](ml-guide.html#pipeline)**: A `Pipeline` chains multiple `Transformer`s and `Estimator`s together to specify an ML workflow.
+
+* **[`Param`](ml-guide.html#parameters)**: All `Transformer`s and `Estimator`s now share a common API for specifying parameters.
+
+## ML Dataset
+
+Machine learning can be applied to a wide variety of data types, such as vectors, text, images, and structured data.
+Spark ML adopts the [`SchemaRDD`](api/scala/index.html#org.apache.spark.sql.SchemaRDD) from Spark SQL in order to support a variety of data types under a unified Dataset concept.
+
+`SchemaRDD` supports many basic and structured types; see the [Spark SQL datatype reference](sql-programming-guide.html#spark-sql-datatype-reference) for a list of supported types.
+In addition to the types listed in the Spark SQL guide, `SchemaRDD` can use ML [`Vector`](api/scala/index.html#org.apache.spark.mllib.linalg.Vector) types.
+
+A `SchemaRDD` can be created either implicitly or explicitly from a regular `RDD`.  See the code examples below and the [Spark SQL programming guide](sql-programming-guide.html) for examples.
+
+Columns in a `SchemaRDD` are named.  The code examples below use names such as "text," "features," and "label."
+
+## ML Algorithms
+
+### Transformers
+
+A [`Transformer`](api/scala/index.html#org.apache.spark.ml.Transformer) is an abstraction which includes feature transformers and learned models.  Technically, a `Transformer` implements a method `transform()` which converts one `SchemaRDD` into another, generally by appending one or more columns.
+For example:
+
+* A feature transformer might take a dataset, read a column (e.g., text), convert it into a new column (e.g., feature vectors), append the new column to the dataset, and output the updated dataset.
+* A learning model might take a dataset, read the column containing feature vectors, predict the label for each feature vector, append the labels as a new column, and output the updated dataset.
+
+### Estimators
+
+An [`Estimator`](api/scala/index.html#org.apache.spark.ml.Estimator) abstracts the concept of a learning algorithm or any algorithm which fits or trains on data.  Technically, an `Estimator` implements a method `fit()` which accepts a `SchemaRDD` and produces a `Transformer`.
+For example, a learning algorithm such as `LogisticRegression` is an `Estimator`, and calling `fit()` trains a `LogisticRegressionModel`, which is a `Transformer`.
+
+### Properties of ML Algorithms
+
+`Transformer`s and `Estimator`s are both stateless.  In the future, stateful algorithms may be supported via alternative concepts.
+
+Each instance of a `Transformer` or `Estimator` has a unique ID, which is useful in specifying parameters (discussed below).
+
+## Pipeline
+
+In machine learning, it is common to run a sequence of algorithms to process and learn from data.
+E.g., a simple text document processing workflow might include several stages:
+
+* Split each document's text into words.
+* Convert each document's words into a numerical feature vector.
+* Learn a prediction model using the feature vectors and labels.
+
+Spark ML represents such a workflow as a [`Pipeline`](api/scala/index.html#org.apache.spark.ml.Pipeline),
+which consists of a sequence of [`PipelineStage`s](api/scala/index.html#org.apache.spark.ml.PipelineStage) (`Transformer`s and `Estimator`s) to be run in a specific order.  We will use this simple workflow as a running example in this section.
+
+### How It Works
+
+A `Pipeline` is specified as a sequence of stages, and each stage is either a `Transformer` or an `Estimator`.
+These stages are run in order, and the input dataset is modified as it passes through each stage.
+For `Transformer` stages, the `transform()` method is called on the dataset.
+For `Estimator` stages, the `fit()` method is called to produce a `Transformer` (which becomes part of the `PipelineModel`, or fitted `Pipeline`), and that `Transformer`'s `transform()` method is called on the dataset.
+
+We illustrate this for the simple text document workflow.  The figure below is for the *training time* usage of a `Pipeline`.
+
+<p style="text-align: center;">
+  <img
+    src="img/ml-Pipeline.png"
+    title="Spark ML Pipeline Example"
+    alt="Spark ML Pipeline Example"
+    width="80%"
+  />
+</p>
+
+Above, the top row represents a `Pipeline` with three stages.
+The first two (`Tokenizer` and `HashingTF`) are `Transformer`s (blue), and the third (`LogisticRegression`) is an `Estimator` (red).
+The bottom row represents data flowing through the pipeline, where cylinders indicate `SchemaRDD`s.
+The `Pipeline.fit()` method is called on the original dataset which has raw text documents and labels.
+The `Tokenizer.transform()` method splits the raw text documents into words, adding a new column with words into the dataset.
+The `HashingTF.transform()` method converts the words column into feature vectors, adding a new column with those vectors to the dataset.
+Now, since `LogisticRegression` is an `Estimator`, the `Pipeline` first calls `LogisticRegression.fit()` to produce a `LogisticRegressionModel`.
+If the `Pipeline` had more stages, it would call the `LogisticRegressionModel`'s `transform()` method on the dataset before passing the dataset to the next stage.
+
+A `Pipeline` is an `Estimator`.
+Thus, after a `Pipeline`'s `fit()` method runs, it produces a `PipelineModel` which is a `Transformer`.  This `PipelineModel` is used at *test time*; the figure below illustrates this usage.
+
+<p style="text-align: center;">
+  <img
+    src="img/ml-PipelineModel.png"
+    title="Spark ML PipelineModel Example"
+    alt="Spark ML PipelineModel Example"
+    width="80%"
+  />
+</p>
+
+In the figure above, the `PipelineModel` has the same number of stages as the original `Pipeline`, but all `Estimator`s in the original `Pipeline` have become `Transformer`s.
+When the `PipelineModel`'s `transform()` method is called on a test dataset, the data are passed through the `Pipeline` in order.
+Each stage's `transform()` method updates the dataset and passes it to the next stage.
+
+`Pipeline`s and `PipelineModel`s help to ensure that training and test data go through identical feature processing steps.
+
+### Details
+
+*DAG `Pipeline`s*: A `Pipeline`'s stages are specified as an ordered array.  The examples given here are all for linear `Pipeline`s, i.e., `Pipeline`s in which each stage uses data produced by the previous stage.  It is possible to create non-linear `Pipeline`s as long as the data flow graph forms a Directed Acyclic Graph (DAG).  This graph is currently specified implicitly based on the input and output column names of each stage (generally specified as parameters).  If the `Pipeline` forms a DAG, then the stages must be specified in topological order.
+
+*Runtime checking*: Since `Pipeline`s can operate on datasets with varied types, they cannot use compile-time type checking.  `Pipeline`s and `PipelineModel`s instead do runtime checking before actually running the `Pipeline`.  This type checking is done using the dataset *schema*, a description of the data types of columns in the `SchemaRDD`.
+
+## Parameters
+
+Spark ML `Estimator`s and `Transformer`s use a uniform API for specifying parameters.
+
+A [`Param`](api/scala/index.html#org.apache.spark.ml.param.Param) is a named parameter with self-contained documentation.
+A [`ParamMap`](api/scala/index.html#org.apache.spark.ml.param.ParamMap) is a set of (parameter, value) pairs.
+
+There are two main ways to pass parameters to an algorithm:
+
+1. Set parameters for an instance.  E.g., if `lr` is an instance of `LogisticRegression`, one could call `lr.setMaxIter(10)` to make `lr.fit()` use at most 10 iterations.  This API resembles the API used in MLlib.
+2. Pass a `ParamMap` to `fit()` or `transform()`.  Any parameters in the `ParamMap` will override parameters previously specified via setter methods.
+
+Parameters belong to specific instances of `Estimator`s and `Transformer`s.
+For example, if we have two `LogisticRegression` instances `lr1` and `lr2`, then we can build a `ParamMap` with both `maxIter` parameters specified: `ParamMap(lr1.maxIter -> 10, lr2.maxIter -> 20)`.
+This is useful if there are two algorithms with the `maxIter` parameter in a `Pipeline`.
+
+# Code Examples
+
+This section gives code examples illustrating the functionality discussed above.
+There is not yet documentation for specific algorithms in Spark ML.  For more info, please refer to the [API Documentation](api/scala/index.html#org.apache.spark.ml.package).  Spark ML algorithms are currently wrappers for MLlib algorithms, and the [MLlib programming guide](mllib-guide.html) has details on specific algorithms.
+
+## Example: Estimator, Transformer, and Param
+
+This example covers the concepts of `Estimator`, `Transformer`, and `Param`.
+
+<div class="codetabs">
+
+<div data-lang="scala">
+{% highlight scala %}
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.ml.classification.LogisticRegression
+import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.sql.{Row, SQLContext}
+
+val conf = new SparkConf().setAppName("SimpleParamsExample")
+val sc = new SparkContext(conf)
+val sqlContext = new SQLContext(sc)
+import sqlContext._
+
+// Prepare training data.
+// We use LabeledPoint, which is a case class.  Spark SQL can convert RDDs of case classes
+// into SchemaRDDs, where it uses the case class metadata to infer the schema.
+val training = sparkContext.parallelize(Seq(
+  LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
+  LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)),
+  LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
+  LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5))))
+
+// Create a LogisticRegression instance.  This instance is an Estimator.
+val lr = new LogisticRegression()
+// Print out the parameters, documentation, and any default values.
+println("LogisticRegression parameters:\n" + lr.explainParams() + "\n")
+
+// We may set parameters using setter methods.
+lr.setMaxIter(10)
+  .setRegParam(0.01)
+
+// Learn a LogisticRegression model.  This uses the parameters stored in lr.
+val model1 = lr.fit(training)
+// Since model1 is a Model (i.e., a Transformer produced by an Estimator),
+// we can view the parameters it used during fit().
+// This prints the parameter (name: value) pairs, where names are unique IDs for this
+// LogisticRegression instance.
+println("Model 1 was fit using parameters: " + model1.fittingParamMap)
+
+// We may alternatively specify parameters using a ParamMap,
+// which supports several methods for specifying parameters.
+val paramMap = ParamMap(lr.maxIter -> 20)
+paramMap.put(lr.maxIter, 30) // Specify 1 Param.  This overwrites the original maxIter.
+paramMap.put(lr.regParam -> 0.1, lr.threshold -> 0.5) // Specify multiple Params.
+
+// One can also combine ParamMaps.
+val paramMap2 = ParamMap(lr.scoreCol -> "probability") // Changes output column name.
+val paramMapCombined = paramMap ++ paramMap2
+
+// Now learn a new model using the paramMapCombined parameters.
+// paramMapCombined overrides all parameters set earlier via lr.set* methods.
+val model2 = lr.fit(training, paramMapCombined)
+println("Model 2 was fit using parameters: " + model2.fittingParamMap)
+
+// Prepare test documents.
+val test = sparkContext.parallelize(Seq(
+  LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
+  LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)),
+  LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))))
+
+// Make predictions on test documents using the Transformer.transform() method.
+// LogisticRegression.transform will only use the 'features' column.
+// Note that model2.transform() outputs a 'probability' column instead of the usual 'score'
+// column since we renamed the lr.scoreCol parameter previously.
+model2.transform(test)
+  .select('features, 'label, 'probability, 'prediction)
+  .collect()
+  .foreach { case Row(features: Vector, label: Double, prob: Double, prediction: Double) =>
+    println("(" + features + ", " + label + ") -> prob=" + prob + ", prediction=" + prediction)
+  }
+{% endhighlight %}
+</div>
+
+<div data-lang="java">
+{% highlight java %}
+import java.util.List;
+import com.google.common.collect.Lists;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.classification.LogisticRegressionModel;
+import org.apache.spark.ml.param.ParamMap;
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.sql.api.java.JavaSQLContext;
+import org.apache.spark.sql.api.java.JavaSchemaRDD;
+import org.apache.spark.sql.api.java.Row;
+
+SparkConf conf = new SparkConf().setAppName("JavaSimpleParamsExample");
+JavaSparkContext jsc = new JavaSparkContext(conf);
+JavaSQLContext jsql = new JavaSQLContext(jsc);
+
+// Prepare training data.
+// We use LabeledPoint, which is a case class.  Spark SQL can convert RDDs of case classes
+// into SchemaRDDs, where it uses the case class metadata to infer the schema.
+List<LabeledPoint> localTraining = Lists.newArrayList(
+  new LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
+  new LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)),
+  new LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
+  new LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5)));
+JavaSchemaRDD training = jsql.applySchema(jsc.parallelize(localTraining), LabeledPoint.class);
+
+// Create a LogisticRegression instance.  This instance is an Estimator.
+LogisticRegression lr = new LogisticRegression();
+// Print out the parameters, documentation, and any default values.
+System.out.println("LogisticRegression parameters:\n" + lr.explainParams() + "\n");
+
+// We may set parameters using setter methods.
+lr.setMaxIter(10)
+  .setRegParam(0.01);
+
+// Learn a LogisticRegression model.  This uses the parameters stored in lr.
+LogisticRegressionModel model1 = lr.fit(training);
+// Since model1 is a Model (i.e., a Transformer produced by an Estimator),
+// we can view the parameters it used during fit().
+// This prints the parameter (name: value) pairs, where names are unique IDs for this
+// LogisticRegression instance.
+System.out.println("Model 1 was fit using parameters: " + model1.fittingParamMap());
+
+// We may alternatively specify parameters using a ParamMap.
+ParamMap paramMap = new ParamMap();
+paramMap.put(lr.maxIter(), 20); // Specify 1 Param.
+paramMap.put(lr.maxIter(), 30); // This overwrites the original maxIter.
+paramMap.put(lr.regParam(), 0.1);
+
+// One can also combine ParamMaps.
+ParamMap paramMap2 = new ParamMap();
+paramMap2.put(lr.scoreCol(), "probability"); // Changes output column name.
+ParamMap paramMapCombined = paramMap.$plus$plus(paramMap2);
+
+// Now learn a new model using the paramMapCombined parameters.
+// paramMapCombined overrides all parameters set earlier via lr.set* methods.
+LogisticRegressionModel model2 = lr.fit(training, paramMapCombined);
+System.out.println("Model 2 was fit using parameters: " + model2.fittingParamMap());
+
+// Prepare test documents.
+List<LabeledPoint> localTest = Lists.newArrayList(
+    new LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
+    new LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)),
+    new LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5)));
+JavaSchemaRDD test = jsql.applySchema(jsc.parallelize(localTest), LabeledPoint.class);
+
+// Make predictions on test documents using the Transformer.transform() method.
+// LogisticRegression.transform will only use the 'features' column.
+// Note that model2.transform() outputs a 'probability' column instead of the usual 'score'
+// column since we renamed the lr.scoreCol parameter previously.
+model2.transform(test).registerAsTable("results");
+JavaSchemaRDD results =
+    jsql.sql("SELECT features, label, probability, prediction FROM results");
+for (Row r: results.collect()) {
+  System.out.println("(" + r.get(0) + ", " + r.get(1) + ") -> prob=" + r.get(2)
+      + ", prediction=" + r.get(3));
+}
+{% endhighlight %}
+</div>
+
+</div>
+
+## Example: Pipeline
+
+This example follows the simple text document `Pipeline` illustrated in the figures above.
+
+<div class="codetabs">
+
+<div data-lang="scala">
+{% highlight scala %}
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.classification.LogisticRegression
+import org.apache.spark.ml.feature.{HashingTF, Tokenizer}
+import org.apache.spark.sql.{Row, SQLContext}
+
+// Labeled and unlabeled instance types.
+// Spark SQL can infer schema from case classes.
+case class LabeledDocument(id: Long, text: String, label: Double)
+case class Document(id: Long, text: String)
+
+// Set up contexts.  Import implicit conversions to SchemaRDD from sqlContext.
+val conf = new SparkConf().setAppName("SimpleTextClassificationPipeline")
+val sc = new SparkContext(conf)
+val sqlContext = new SQLContext(sc)
+import sqlContext._
+
+// Prepare training documents, which are labeled.
+val training = sparkContext.parallelize(Seq(
+  LabeledDocument(0L, "a b c d e spark", 1.0),
+  LabeledDocument(1L, "b d", 0.0),
+  LabeledDocument(2L, "spark f g h", 1.0),
+  LabeledDocument(3L, "hadoop mapreduce", 0.0)))
+
+// Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
+val tokenizer = new Tokenizer()
+  .setInputCol("text")
+  .setOutputCol("words")
+val hashingTF = new HashingTF()
+  .setNumFeatures(1000)
+  .setInputCol(tokenizer.getOutputCol)
+  .setOutputCol("features")
+val lr = new LogisticRegression()
+  .setMaxIter(10)
+  .setRegParam(0.01)
+val pipeline = new Pipeline()
+  .setStages(Array(tokenizer, hashingTF, lr))
+
+// Fit the pipeline to training documents.
+val model = pipeline.fit(training)
+
+// Prepare test documents, which are unlabeled.
+val test = sparkContext.parallelize(Seq(
+  Document(4L, "spark i j k"),
+  Document(5L, "l m n"),
+  Document(6L, "mapreduce spark"),
+  Document(7L, "apache hadoop")))
+
+// Make predictions on test documents.
+model.transform(test)
+  .select('id, 'text, 'score, 'prediction)
+  .collect()
+  .foreach { case Row(id: Long, text: String, score: Double, prediction: Double) =>
+    println("(" + id + ", " + text + ") --> score=" + score + ", prediction=" + prediction)
+  }
+{% endhighlight %}
+</div>
+
+<div data-lang="java">
+{% highlight java %}
+import java.io.Serializable;
+import java.util.List;
+import com.google.common.collect.Lists;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.Pipeline;
+import org.apache.spark.ml.PipelineModel;
+import org.apache.spark.ml.PipelineStage;
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.feature.HashingTF;
+import org.apache.spark.ml.feature.Tokenizer;
+import org.apache.spark.sql.api.java.JavaSQLContext;
+import org.apache.spark.sql.api.java.JavaSchemaRDD;
+import org.apache.spark.sql.api.java.Row;
+import org.apache.spark.SparkConf;
+
+// Labeled and unlabeled instance types.
+// Spark SQL can infer schema from Java Beans.
+public class Document implements Serializable {
+  private Long id;
+  private String text;
+
+  public Document(Long id, String text) {
+    this.id = id;
+    this.text = text;
+  }
+
+  public Long getId() { return this.id; }
+  public void setId(Long id) { this.id = id; }
+
+  public String getText() { return this.text; }
+  public void setText(String text) { this.text = text; }
+}
+
+public class LabeledDocument extends Document implements Serializable {
+  private Double label;
+
+  public LabeledDocument(Long id, String text, Double label) {
+    super(id, text);
+    this.label = label;
+  }
+
+  public Double getLabel() { return this.label; }
+  public void setLabel(Double label) { this.label = label; }
+}
+
+// Set up contexts.
+SparkConf conf = new SparkConf().setAppName("JavaSimpleTextClassificationPipeline");
+JavaSparkContext jsc = new JavaSparkContext(conf);
+JavaSQLContext jsql = new JavaSQLContext(jsc);
+
+// Prepare training documents, which are labeled.
+List<LabeledDocument> localTraining = Lists.newArrayList(
+  new LabeledDocument(0L, "a b c d e spark", 1.0),
+  new LabeledDocument(1L, "b d", 0.0),
+  new LabeledDocument(2L, "spark f g h", 1.0),
+  new LabeledDocument(3L, "hadoop mapreduce", 0.0));
+JavaSchemaRDD training =
+  jsql.applySchema(jsc.parallelize(localTraining), LabeledDocument.class);
+
+// Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
+Tokenizer tokenizer = new Tokenizer()
+  .setInputCol("text")
+  .setOutputCol("words");
+HashingTF hashingTF = new HashingTF()
+  .setNumFeatures(1000)
+  .setInputCol(tokenizer.getOutputCol())
+  .setOutputCol("features");
+LogisticRegression lr = new LogisticRegression()
+  .setMaxIter(10)
+  .setRegParam(0.01);
+Pipeline pipeline = new Pipeline()
+  .setStages(new PipelineStage[] {tokenizer, hashingTF, lr});
+
+// Fit the pipeline to training documents.
+PipelineModel model = pipeline.fit(training);
+
+// Prepare test documents, which are unlabeled.
+List<Document> localTest = Lists.newArrayList(
+  new Document(4L, "spark i j k"),
+  new Document(5L, "l m n"),
+  new Document(6L, "mapreduce spark"),
+  new Document(7L, "apache hadoop"));
+JavaSchemaRDD test =
+  jsql.applySchema(jsc.parallelize(localTest), Document.class);
+
+// Make predictions on test documents.
+model.transform(test).registerAsTable("prediction");
+JavaSchemaRDD predictions = jsql.sql("SELECT id, text, score, prediction FROM prediction");
+for (Row r: predictions.collect()) {
+  System.out.println("(" + r.get(0) + ", " + r.get(1) + ") --> score=" + r.get(2)
+      + ", prediction=" + r.get(3));
+}
+{% endhighlight %}
+</div>
+
+</div>
+
+## Example: Model Selection via Cross-Validation
+
+An important task in ML is *model selection*, or using data to find the best model or parameters for a given task.  This is also called *tuning*.
+`Pipeline`s facilitate model selection by making it easy to tune an entire `Pipeline` at once, rather than tuning each element in the `Pipeline` separately.
+
+Currently, `spark.ml` supports model selection using the [`CrossValidator`](api/scala/index.html#org.apache.spark.ml.tuning.CrossValidator) class, which takes an `Estimator`, a set of `ParamMap`s, and an [`Evaluator`](api/scala/index.html#org.apache.spark.ml.Evaluator).
+`CrossValidator` begins by splitting the dataset into a set of *folds* which are used as separate training and test datasets; e.g., with `$k=3$` folds, `CrossValidator` will generate 3 (training, test) dataset pairs, each of which uses 2/3 of the data for training and 1/3 for testing.
+`CrossValidator` iterates through the set of `ParamMap`s. For each `ParamMap`, it trains the given `Estimator` and evaluates it using the given `Evaluator`.
+The `ParamMap` which produces the best evaluation metric (averaged over the `$k$` folds) is selected as the best model.
+`CrossValidator` finally fits the `Estimator` using the best `ParamMap` and the entire dataset.
+
+The following example demonstrates using `CrossValidator` to select from a grid of parameters.
+To help construct the parameter grid, we use the [`ParamGridBuilder`](api/scala/index.html#org.apache.spark.ml.tuning.ParamGridBuilder) utility.
+
+Note that cross-validation over a grid of parameters is expensive.
+E.g., in the example below, the parameter grid has 3 values for `hashingTF.numFeatures` and 2 values for `lr.regParam`, and `CrossValidator` uses 2 folds.  This multiplies out to `$(3 \times 2) \times 2 = 12$` different models being trained.
+In realistic settings, it can be common to try many more parameters and use more folds (`$k=3$` and `$k=10$` are common).
+In other words, using `CrossValidator` can be very expensive.
+However, it is also a well-established method for choosing parameters which is more statistically sound than heuristic hand-tuning.
+
+<div class="codetabs">
+
+<div data-lang="scala">
+{% highlight scala %}
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.SparkContext._
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.classification.LogisticRegression
+import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
+import org.apache.spark.ml.feature.{HashingTF, Tokenizer}
+import org.apache.spark.ml.tuning.{ParamGridBuilder, CrossValidator}
+import org.apache.spark.sql.{Row, SQLContext}
+
+val conf = new SparkConf().setAppName("CrossValidatorExample")
+val sc = new SparkContext(conf)
+val sqlContext = new SQLContext(sc)
+import sqlContext._
+
+// Prepare training documents, which are labeled.
+val training = sparkContext.parallelize(Seq(
+  LabeledDocument(0L, "a b c d e spark", 1.0),
+  LabeledDocument(1L, "b d", 0.0),
+  LabeledDocument(2L, "spark f g h", 1.0),
+  LabeledDocument(3L, "hadoop mapreduce", 0.0),
+  LabeledDocument(4L, "b spark who", 1.0),
+  LabeledDocument(5L, "g d a y", 0.0),
+  LabeledDocument(6L, "spark fly", 1.0),
+  LabeledDocument(7L, "was mapreduce", 0.0),
+  LabeledDocument(8L, "e spark program", 1.0),
+  LabeledDocument(9L, "a e c l", 0.0),
+  LabeledDocument(10L, "spark compile", 1.0),
+  LabeledDocument(11L, "hadoop software", 0.0)))
+
+// Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
+val tokenizer = new Tokenizer()
+  .setInputCol("text")
+  .setOutputCol("words")
+val hashingTF = new HashingTF()
+  .setInputCol(tokenizer.getOutputCol)
+  .setOutputCol("features")
+val lr = new LogisticRegression()
+  .setMaxIter(10)
+val pipeline = new Pipeline()
+  .setStages(Array(tokenizer, hashingTF, lr))
+
+// We now treat the Pipeline as an Estimator, wrapping it in a CrossValidator instance.
+// This will allow us to jointly choose parameters for all Pipeline stages.
+// A CrossValidator requires an Estimator, a set of Estimator ParamMaps, and an Evaluator.
+val crossval = new CrossValidator()
+  .setEstimator(pipeline)
+  .setEvaluator(new BinaryClassificationEvaluator)
+// We use a ParamGridBuilder to construct a grid of parameters to search over.
+// With 3 values for hashingTF.numFeatures and 2 values for lr.regParam,
+// this grid will have 3 x 2 = 6 parameter settings for CrossValidator to choose from.
+val paramGrid = new ParamGridBuilder()
+  .addGrid(hashingTF.numFeatures, Array(10, 100, 1000))
+  .addGrid(lr.regParam, Array(0.1, 0.01))
+  .build()
+crossval.setEstimatorParamMaps(paramGrid)
+crossval.setNumFolds(2) // Use 3+ in practice
+
+// Run cross-validation, and choose the best set of parameters.
+val cvModel = crossval.fit(training)
+// Get the best LogisticRegression model (with the best set of parameters from paramGrid).
+val lrModel = cvModel.bestModel
+
+// Prepare test documents, which are unlabeled.
+val test = sparkContext.parallelize(Seq(
+  Document(4L, "spark i j k"),
+  Document(5L, "l m n"),
+  Document(6L, "mapreduce spark"),
+  Document(7L, "apache hadoop")))
+
+// Make predictions on test documents. cvModel uses the best model found (lrModel).
+cvModel.transform(test)
+  .select('id, 'text, 'score, 'prediction)
+  .collect()
+  .foreach { case Row(id: Long, text: String, score: Double, prediction: Double) =>
+  println("(" + id + ", " + text + ") --> score=" + score + ", prediction=" + prediction)
+}
+{% endhighlight %}
+</div>
+
+<div data-lang="java">
+{% highlight java %}
+import java.util.List;
+import com.google.common.collect.Lists;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.Model;
+import org.apache.spark.ml.Pipeline;
+import org.apache.spark.ml.PipelineStage;
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator;
+import org.apache.spark.ml.feature.HashingTF;
+import org.apache.spark.ml.feature.Tokenizer;
+import org.apache.spark.ml.param.ParamMap;
+import org.apache.spark.ml.tuning.CrossValidator;
+import org.apache.spark.ml.tuning.CrossValidatorModel;
+import org.apache.spark.ml.tuning.ParamGridBuilder;
+import org.apache.spark.sql.api.java.JavaSQLContext;
+import org.apache.spark.sql.api.java.JavaSchemaRDD;
+import org.apache.spark.sql.api.java.Row;
+
+SparkConf conf = new SparkConf().setAppName("JavaCrossValidatorExample");
+JavaSparkContext jsc = new JavaSparkContext(conf);
+JavaSQLContext jsql = new JavaSQLContext(jsc);
+
+// Prepare training documents, which are labeled.
+List<LabeledDocument> localTraining = Lists.newArrayList(
+  new LabeledDocument(0L, "a b c d e spark", 1.0),
+  new LabeledDocument(1L, "b d", 0.0),
+  new LabeledDocument(2L, "spark f g h", 1.0),
+  new LabeledDocument(3L, "hadoop mapreduce", 0.0),
+  new LabeledDocument(4L, "b spark who", 1.0),
+  new LabeledDocument(5L, "g d a y", 0.0),
+  new LabeledDocument(6L, "spark fly", 1.0),
+  new LabeledDocument(7L, "was mapreduce", 0.0),
+  new LabeledDocument(8L, "e spark program", 1.0),
+  new LabeledDocument(9L, "a e c l", 0.0),
+  new LabeledDocument(10L, "spark compile", 1.0),
+  new LabeledDocument(11L, "hadoop software", 0.0));
+JavaSchemaRDD training =
+    jsql.applySchema(jsc.parallelize(localTraining), LabeledDocument.class);
+
+// Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
+Tokenizer tokenizer = new Tokenizer()
+  .setInputCol("text")
+  .setOutputCol("words");
+HashingTF hashingTF = new HashingTF()
+  .setNumFeatures(1000)
+  .setInputCol(tokenizer.getOutputCol())
+  .setOutputCol("features");
+LogisticRegression lr = new LogisticRegression()
+  .setMaxIter(10)
+  .setRegParam(0.01);
+Pipeline pipeline = new Pipeline()
+  .setStages(new PipelineStage[] {tokenizer, hashingTF, lr});
+
+// We now treat the Pipeline as an Estimator, wrapping it in a CrossValidator instance.
+// This will allow us to jointly choose parameters for all Pipeline stages.
+// A CrossValidator requires an Estimator, a set of Estimator ParamMaps, and an Evaluator.
+CrossValidator crossval = new CrossValidator()
+    .setEstimator(pipeline)
+    .setEvaluator(new BinaryClassificationEvaluator());
+// We use a ParamGridBuilder to construct a grid of parameters to search over.
+// With 3 values for hashingTF.numFeatures and 2 values for lr.regParam,
+// this grid will have 3 x 2 = 6 parameter settings for CrossValidator to choose from.
+ParamMap[] paramGrid = new ParamGridBuilder()
+    .addGrid(hashingTF.numFeatures(), new int[]{10, 100, 1000})
+    .addGrid(lr.regParam(), new double[]{0.1, 0.01})
+    .build();
+crossval.setEstimatorParamMaps(paramGrid);
+crossval.setNumFolds(2); // Use 3+ in practice
+
+// Run cross-validation, and choose the best set of parameters.
+CrossValidatorModel cvModel = crossval.fit(training);
+// Get the best LogisticRegression model (with the best set of parameters from paramGrid).
+Model lrModel = cvModel.bestModel();
+
+// Prepare test documents, which are unlabeled.
+List<Document> localTest = Lists.newArrayList(
+  new Document(4L, "spark i j k"),
+  new Document(5L, "l m n"),
+  new Document(6L, "mapreduce spark"),
+  new Document(7L, "apache hadoop"));
+JavaSchemaRDD test = jsql.applySchema(jsc.parallelize(localTest), Document.class);
+
+// Make predictions on test documents. cvModel uses the best model found (lrModel).
+cvModel.transform(test).registerAsTable("prediction");
+JavaSchemaRDD predictions = jsql.sql("SELECT id, text, score, prediction FROM prediction");
+for (Row r: predictions.collect()) {
+  System.out.println("(" + r.get(0) + ", " + r.get(1) + ") --> score=" + r.get(2)
+      + ", prediction=" + r.get(3));
+}
+{% endhighlight %}
+</div>
+
+</div>
+
+# Dependencies
+
+Spark ML currently depends on MLlib and has the same dependencies.
+Please see the [MLlib Dependencies guide](mllib-guide.html#Dependencies) for more info.
+
+Spark ML also depends upon Spark SQL, but the relevant parts of Spark SQL do not bring additional dependencies.
diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index 7978e934fb36b..413b824e369da 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -34,7 +34,27 @@ a given dataset, the algorithm returns the best clustering result).
 * *initializationSteps* determines the number of steps in the k-means\|\| algorithm.
 * *epsilon* determines the distance threshold within which we consider k-means to have converged. 
 
-## Examples
+### Power Iteration Clustering
+
+Power iteration clustering is a scalable and efficient algorithm for clustering points given pointwise mutual affinity values.  Internally the algorithm:
+
+* accepts a [Graph](https://spark.apache.org/docs/0.9.2/api/graphx/index.html#org.apache.spark.graphx.Graph) that represents a  normalized pairwise affinity between all input points.
+* calculates the principal eigenvalue and eigenvector
+* Clusters each of the input points according to their principal eigenvector component value
+
+Details of this algorithm are found within [Power Iteration Clustering, Lin and Cohen]{www.icml2010.org/papers/387.pdf}
+
+Example outputs for a dataset inspired by the paper - but with five clusters instead of three- have he following output from our implementation:
+
+<p style="text-align: center;">
+  <img src="img/PIClusteringFiveCirclesInputsAndOutputs.png"
+       title="The Property Graph"
+       alt="The Property Graph"
+       width="50%" />
+  <!-- Images are downsized intentionally to improve quality on retina displays -->
+</p>
+
+### Examples
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
@@ -153,3 +173,97 @@ provided in the [Self-Contained Applications](quick-start.html#self-contained-ap
 section of the Spark
 Quick Start guide. Be sure to also include *spark-mllib* to your build file as
 a dependency.
+
+## Streaming clustering
+
+When data arrive in a stream, we may want to estimate clusters dynamically, 
+updating them as new data arrive. MLlib provides support for streaming k-means clustering, 
+with parameters to control the decay (or "forgetfulness") of the estimates. The algorithm 
+uses a generalization of the mini-batch k-means update rule. For each batch of data, we assign 
+all points to their nearest cluster, compute new cluster centers, then update each cluster using:
+
+`\begin{equation}
+    c_{t+1} = \frac{c_tn_t\alpha + x_tm_t}{n_t\alpha+m_t}
+\end{equation}`
+`\begin{equation}
+    n_{t+1} = n_t + m_t  
+\end{equation}`
+
+Where `$c_t$` is the previous center for the cluster, `$n_t$` is the number of points assigned 
+to the cluster thus far, `$x_t$` is the new cluster center from the current batch, and `$m_t$` 
+is the number of points added to the cluster in the current batch. The decay factor `$\alpha$` 
+can be used to ignore the past: with `$\alpha$=1` all data will be used from the beginning; 
+with `$\alpha$=0` only the most recent data will be used. This is analogous to an 
+exponentially-weighted moving average. 
+
+The decay can be specified using a `halfLife` parameter, which determines the 
+correct decay factor `a` such that, for data acquired
+at time `t`, its contribution by time `t + halfLife` will have dropped to 0.5.
+The unit of time can be specified either as `batches` or `points` and the update rule
+will be adjusted accordingly.
+
+### Examples
+
+This example shows how to estimate clusters on streaming data.
+
+<div class="codetabs">
+
+<div data-lang="scala" markdown="1">
+
+First we import the neccessary classes.
+
+{% highlight scala %}
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.clustering.StreamingKMeans
+
+{% endhighlight %}
+
+Then we make an input stream of vectors for training, as well as a stream of labeled data 
+points for testing. We assume a StreamingContext `ssc` has been created, see 
+[Spark Streaming Programming Guide](streaming-programming-guide.html#initializing) for more info.  
+
+{% highlight scala %}
+
+val trainingData = ssc.textFileStream("/training/data/dir").map(Vectors.parse)
+val testData = ssc.textFileStream("/testing/data/dir").map(LabeledPoint.parse)
+
+{% endhighlight %}
+
+We create a model with random clusters and specify the number of clusters to find
+
+{% highlight scala %}
+
+val numDimensions = 3
+val numClusters = 2
+val model = new StreamingKMeans()
+  .setK(numClusters)
+  .setDecayFactor(1.0)
+  .setRandomCenters(numDimensions, 0.0)
+
+{% endhighlight %}
+
+Now register the streams for training and testing and start the job, printing 
+the predicted cluster assignments on new data points as they arrive.
+
+{% highlight scala %}
+
+model.trainOn(trainingData)
+model.predictOnValues(testData).print()
+
+ssc.start()
+ssc.awaitTermination()
+ 
+{% endhighlight %}
+
+As you add new text files with data the cluster centers will update. Each training 
+point should be formatted as `[x1, x2, x3]`, and each test data point
+should be formatted as `(y, [x1, x2, x3])`, where `y` is some useful label or identifier 
+(e.g. a true category assignment). Anytime a text file is placed in `/training/data/dir` 
+the model will update. Anytime a text file is placed in `/testing/data/dir` 
+you will see predictions. With new data, the cluster centers will change!
+
+</div>
+
+</div>
diff --git a/docs/mllib-collaborative-filtering.md b/docs/mllib-collaborative-filtering.md
index 2094963392295..ef18cec9371d6 100644
--- a/docs/mllib-collaborative-filtering.md
+++ b/docs/mllib-collaborative-filtering.md
@@ -192,12 +192,11 @@ We use the default ALS.train() method which assumes ratings are explicit. We eva
 recommendation by measuring the Mean Squared Error of rating prediction.
 
 {% highlight python %}
-from pyspark.mllib.recommendation import ALS
-from numpy import array
+from pyspark.mllib.recommendation import ALS, Rating
 
 # Load and parse the data
 data = sc.textFile("data/mllib/als/test.data")
-ratings = data.map(lambda line: array([float(x) for x in line.split(',')]))
+ratings = data.map(lambda l: l.split(',')).map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2])))
 
 # Build the recommendation model using Alternating Least Squares
 rank = 10
@@ -205,10 +204,10 @@ numIterations = 20
 model = ALS.train(ratings, rank, numIterations)
 
 # Evaluate the model on training data
-testdata = ratings.map(lambda p: (int(p[0]), int(p[1])))
+testdata = ratings.map(lambda p: (p[0], p[1]))
 predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
 ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
-MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).reduce(lambda x, y: x + y)/ratesAndPreds.count()
+MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).reduce(lambda x, y: x + y) / ratesAndPreds.count()
 print("Mean Squared Error = " + str(MSE))
 {% endhighlight %}
 
@@ -217,7 +216,7 @@ signals), you can use the trainImplicit method to get better results.
 
 {% highlight python %}
 # Build the recommendation model using Alternating Least Squares based on implicit ratings
-model = ALS.trainImplicit(ratings, rank, numIterations, alpha = 0.01)
+model = ALS.trainImplicit(ratings, rank, numIterations, alpha=0.01)
 {% endhighlight %}
 </div>
 
diff --git a/docs/mllib-decision-tree.md b/docs/mllib-decision-tree.md
index 12a6afbeea829..fc8e732251a30 100644
--- a/docs/mllib-decision-tree.md
+++ b/docs/mllib-decision-tree.md
@@ -11,7 +11,7 @@ displayTitle: <a href="mllib-guide.html">MLlib</a> - Decision Tree
 and their ensembles are popular methods for the machine learning tasks of
 classification and regression. Decision trees are widely used since they are easy to interpret,
 handle categorical features, extend to the multiclass classification setting, do not require
-feature scaling and are able to capture nonlinearities and feature interactions. Tree ensemble
+feature scaling, and are able to capture non-linearities and feature interactions. Tree ensemble
 algorithms such as random forests and boosting are among the top performers for classification and
 regression tasks.
 
@@ -19,6 +19,8 @@ MLlib supports decision trees for binary and multiclass classification and for r
 using both continuous and categorical features. The implementation partitions data by rows,
 allowing distributed training with millions of instances.
 
+Ensembles of trees (Random Forests and Gradient-Boosted Trees) are described in the [Ensembles guide](mllib-ensembles.html).
+
 ## Basic algorithm
 
 The decision tree is a greedy algorithm that performs a recursive binary partitioning of the feature
@@ -42,18 +44,18 @@ impurity measure for regression (variance).
     <tr>
       <td>Gini impurity</td>
 	  <td>Classification</td>
-	  <td>$\sum_{i=1}^{M} f_i(1-f_i)$</td><td>$f_i$ is the frequency of label $i$ at a node and $M$ is the number of unique labels.</td>
+	  <td>$\sum_{i=1}^{C} f_i(1-f_i)$</td><td>$f_i$ is the frequency of label $i$ at a node and $C$ is the number of unique labels.</td>
     </tr>
     <tr>
       <td>Entropy</td>
 	  <td>Classification</td>
-	  <td>$\sum_{i=1}^{M} -f_ilog(f_i)$</td><td>$f_i$ is the frequency of label $i$ at a node and $M$ is the number of unique labels.</td>
+	  <td>$\sum_{i=1}^{C} -f_ilog(f_i)$</td><td>$f_i$ is the frequency of label $i$ at a node and $C$ is the number of unique labels.</td>
     </tr>
     <tr>
       <td>Variance</td>
 	  <td>Regression</td>
-     <td>$\frac{1}{n} \sum_{i=1}^{N} (x_i - \mu)^2$</td><td>$y_i$ is label for an instance,
-	  $N$ is the number of instances and $\mu$ is the mean given by $\frac{1}{N} \sum_{i=1}^n x_i$.</td>
+     <td>$\frac{1}{N} \sum_{i=1}^{N} (x_i - \mu)^2$</td><td>$y_i$ is label for an instance,
+	  $N$ is the number of instances and $\mu$ is the mean given by $\frac{1}{N} \sum_{i=1}^N x_i$.</td>
     </tr>
   </tbody>
 </table>
@@ -103,36 +105,73 @@ and the resulting `$M-1$` split candidates are considered.
 
 ### Stopping rule
 
-The recursive tree construction is stopped at a node when one of the two conditions is met:
+The recursive tree construction is stopped at a node when one of the following conditions is met:
 
 1. The node depth is equal to the `maxDepth` training parameter.
-2. No split candidate leads to an information gain at the node.
+2. No split candidate leads to an information gain greater than `minInfoGain`.
+3. No split candidate produces child nodes which each have at least `minInstancesPerNode` training instances.
+
+## Usage tips
+
+We include a few guidelines for using decision trees by discussing the various parameters.
+The parameters are listed below roughly in order of descending importance.  New users should mainly consider the "Problem specification parameters" section and the `maxDepth` parameter.
+
+### Problem specification parameters
+
+These parameters describe the problem you want to solve and your dataset.
+They should be specified and do not require tuning.
+
+* **`algo`**: `Classification` or `Regression`
+
+* **`numClasses`**: Number of classes (for `Classification` only)
+
+* **`categoricalFeaturesInfo`**: Specifies which features are categorical and how many categorical values each of those features can take.  This is given as a map from feature indices to feature arity (number of categories).  Any features not in this map are treated as continuous.
+  * E.g., `Map(0 -> 2, 4 -> 10)` specifies that feature `0` is binary (taking values `0` or `1`) and that feature `4` has 10 categories (values `{0, 1, ..., 9}`).  Note that feature indices are 0-based: features `0` and `4` are the 1st and 5th elements of an instance's feature vector.
+  * Note that you do not have to specify `categoricalFeaturesInfo`.  The algorithm will still run and may get reasonable results.  However, performance should be better if categorical features are properly designated.
+
+### Stopping criteria
+
+These parameters determine when the tree stops building (adding new nodes).
+When tuning these parameters, be careful to validate on held-out test data to avoid overfitting.
+
+* **`maxDepth`**: Maximum depth of a tree.  Deeper trees are more expressive (potentially allowing higher accuracy), but they are also more costly to train and are more likely to overfit.
+
+* **`minInstancesPerNode`**: For a node to be split further, each of its children must receive at least this number of training instances.  This is commonly used with [RandomForest](api/scala/index.html#org.apache.spark.mllib.tree.RandomForest) since those are often trained deeper than individual trees.
+
+* **`minInfoGain`**: For a node to be split further, the split must improve at least this much (in terms of information gain).
+
+### Tunable parameters
 
-## Implementation details
+These parameters may be tuned.  Be careful to validate on held-out test data when tuning in order to avoid overfitting.
 
-### Max memory requirements
+* **`maxBins`**: Number of bins used when discretizing continuous features.
+  * Increasing `maxBins` allows the algorithm to consider more split candidates and make fine-grained split decisions.  However, it also increases computation and communication.
+  * Note that the `maxBins` parameter must be at least the maximum number of categories `$M$` for any categorical feature.
 
-For faster processing, the decision tree algorithm performs simultaneous histogram computations for
-all nodes at each level of the tree. This could lead to high memory requirements at deeper levels
-of the tree, potentially leading to memory overflow errors. To alleviate this problem, a `maxMemoryInMB`
-training parameter specifies the maximum amount of memory at the workers (twice as much at the
-master) to be allocated to the histogram computation. The default value is conservatively chosen to
-be 256 MB to allow the decision algorithm to work in most scenarios. Once the memory requirements
-for a level-wise computation cross the `maxMemoryInMB` threshold, the node training tasks at each
-subsequent level are split into smaller tasks.
+* **`maxMemoryInMB`**: Amount of memory to be used for collecting sufficient statistics.
+  * The default value is conservatively chosen to be 256 MB to allow the decision algorithm to work in most scenarios.  Increasing `maxMemoryInMB` can lead to faster training (if the memory is available) by allowing fewer passes over the data.  However, there may be decreasing returns as `maxMemoryInMB` grows since the amount of communication on each iteration can be proportional to `maxMemoryInMB`.
+  * *Implementation details*: For faster processing, the decision tree algorithm collects statistics about groups of nodes to split (rather than 1 node at a time).  The number of nodes which can be handled in one group is determined by the memory requirements (which vary per features).  The `maxMemoryInMB` parameter specifies the memory limit in terms of megabytes which each worker can use for these statistics.
 
-Note that, if you have a large amount of memory, increasing `maxMemoryInMB` can lead to faster
-training by requiring fewer passes over the data.
+* **`subsamplingRate`**: Fraction of the training data used for learning the decision tree.  This parameter is most relevant for training ensembles of trees (using [`RandomForest`](api/scala/index.html#org.apache.spark.mllib.tree.RandomForest) and [`GradientBoostedTrees`](api/scala/index.html#org.apache.spark.mllib.tree.GradientBoostedTrees)), where it can be useful to subsample the original data.  For training a single decision tree, this parameter is less useful since the number of training instances is generally not the main constraint.
 
-### Binning feature values
+* **`impurity`**: Impurity measure (discussed above) used to choose between candidate splits.  This measure must match the `algo` parameter.
 
-Increasing `maxBins` allows the algorithm to consider more split candidates and make fine-grained
-split decisions.  However, it also increases computation and communication.
+### Caching and checkpointing
 
-Note that the `maxBins` parameter must be at least the maximum number of categories `$M$` for
-any categorical feature.
+MLlib 1.2 adds several features for scaling up to larger (deeper) trees and tree ensembles.  When `maxDepth` is set to be large, it can be useful to turn on node ID caching and checkpointing.  These parameters are also useful for [RandomForest](api/scala/index.html#org.apache.spark.mllib.tree.RandomForest) when `numTrees` is set to be large.
 
-### Scaling
+* **`useNodeIdCache`**: If this is set to true, the algorithm will avoid passing the current model (tree or trees) to executors on each iteration.
+  * This can be useful with deep trees (speeding up computation on workers) and for large Random Forests (reducing communication on each iteration).
+  * *Implementation details*: By default, the algorithm communicates the current model to executors so that executors can match training instances with tree nodes.  When this setting is turned on, then the algorithm will instead cache this information.
+
+Node ID caching generates a sequence of RDDs (1 per iteration).  This long lineage can cause performance problems, but checkpointing intermediate RDDs can alleviate those problems.
+Note that checkpointing is only applicable when `useNodeIdCache` is set to true.
+
+* **`checkpointDir`**: Directory for checkpointing node ID cache RDDs.
+
+* **`checkpointInterval`**: Frequency for checkpointing node ID cache RDDs.  Setting this too low will cause extra overhead from writing to HDFS; setting this too high can cause problems if executors fail and the RDD needs to be recomputed.
+
+## Scaling
 
 Computation scales approximately linearly in the number of training instances,
 in the number of features, and in the `maxBins` parameter.
@@ -148,7 +187,7 @@ The example below demonstrates how to load a
 [LIBSVM data file](http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/),
 parse it as an RDD of `LabeledPoint` and then
 perform classification using a decision tree with Gini impurity as an impurity measure and a
-maximum tree depth of 5. The training error is calculated to measure the algorithm accuracy.
+maximum tree depth of 5. The test error is calculated to measure the algorithm accuracy.
 
 <div class="codetabs">
 
@@ -158,8 +197,10 @@ import org.apache.spark.mllib.tree.DecisionTree
 import org.apache.spark.mllib.util.MLUtils
 
 // Load and parse the data file.
-// Cache the data since we will use it again to compute training error.
-val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").cache()
+val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+// Split the data into training and test sets (30% held out for testing)
+val splits = data.randomSplit(Array(0.7, 0.3))
+val (trainingData, testData) = (splits(0), splits(1))
 
 // Train a DecisionTree model.
 //  Empty categoricalFeaturesInfo indicates all features are continuous.
@@ -169,17 +210,17 @@ val impurity = "gini"
 val maxDepth = 5
 val maxBins = 32
 
-val model = DecisionTree.trainClassifier(data, numClasses, categoricalFeaturesInfo, impurity,
-  maxDepth, maxBins)
+val model = DecisionTree.trainClassifier(trainingData, numClasses, categoricalFeaturesInfo,
+  impurity, maxDepth, maxBins)
 
-// Evaluate model on training instances and compute training error
-val labelAndPreds = data.map { point =>
+// Evaluate model on test instances and compute test error
+val labelAndPreds = testData.map { point =>
   val prediction = model.predict(point.features)
   (point.label, prediction)
 }
-val trainErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / data.count
-println("Training Error = " + trainErr)
-println("Learned classification tree model:\n" + model)
+val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData.count()
+println("Test Error = " + testErr)
+println("Learned classification tree model:\n" + model.toDebugString)
 {% endhighlight %}
 </div>
 
@@ -187,7 +228,6 @@ println("Learned classification tree model:\n" + model)
 {% highlight java %}
 import java.util.HashMap;
 import scala.Tuple2;
-import org.apache.spark.api.java.function.Function2;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
@@ -203,37 +243,42 @@ SparkConf sparkConf = new SparkConf().setAppName("JavaDecisionTree");
 JavaSparkContext sc = new JavaSparkContext(sparkConf);
 
 // Load and parse the data file.
-// Cache the data since we will use it again to compute training error.
 String datapath = "data/mllib/sample_libsvm_data.txt";
-JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD().cache();
+JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD();
+// Split the data into training and test sets (30% held out for testing)
+JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3});
+JavaRDD<LabeledPoint> trainingData = splits[0];
+JavaRDD<LabeledPoint> testData = splits[1];
 
 // Set parameters.
 //  Empty categoricalFeaturesInfo indicates all features are continuous.
 Integer numClasses = 2;
-HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
+Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
 String impurity = "gini";
 Integer maxDepth = 5;
 Integer maxBins = 32;
 
 // Train a DecisionTree model for classification.
-final DecisionTreeModel model = DecisionTree.trainClassifier(data, numClasses,
+final DecisionTreeModel model = DecisionTree.trainClassifier(trainingData, numClasses,
   categoricalFeaturesInfo, impurity, maxDepth, maxBins);
 
-// Evaluate model on training instances and compute training error
+// Evaluate model on test instances and compute test error
 JavaPairRDD<Double, Double> predictionAndLabel =
-  data.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
-    @Override public Tuple2<Double, Double> call(LabeledPoint p) {
+  testData.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
+    @Override
+    public Tuple2<Double, Double> call(LabeledPoint p) {
       return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
     }
   });
-Double trainErr =
+Double testErr =
   1.0 * predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {
-    @Override public Boolean call(Tuple2<Double, Double> pl) {
+    @Override
+    public Boolean call(Tuple2<Double, Double> pl) {
       return !pl._1().equals(pl._2());
     }
-  }).count() / data.count();
-System.out.println("Training error: " + trainErr);
-System.out.println("Learned classification tree model:\n" + model);
+  }).count() / testData.count();
+System.out.println("Test Error: " + testErr);
+System.out.println("Learned classification tree model:\n" + model.toDebugString());
 {% endhighlight %}
 </div>
 
@@ -244,26 +289,23 @@ from pyspark.mllib.tree import DecisionTree
 from pyspark.mllib.util import MLUtils
 
 # Load and parse the data file into an RDD of LabeledPoint.
-# Cache the data since we will use it again to compute training error.
-data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt').cache()
+data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt')
+# Split the data into training and test sets (30% held out for testing)
+(trainingData, testData) = data.randomSplit([0.7, 0.3])
 
 # Train a DecisionTree model.
 #  Empty categoricalFeaturesInfo indicates all features are continuous.
-model = DecisionTree.trainClassifier(data, numClasses=2, categoricalFeaturesInfo={},
+model = DecisionTree.trainClassifier(trainingData, numClasses=2, categoricalFeaturesInfo={},
                                      impurity='gini', maxDepth=5, maxBins=32)
 
-# Evaluate model on training instances and compute training error
-predictions = model.predict(data.map(lambda x: x.features))
-labelsAndPredictions = data.map(lambda lp: lp.label).zip(predictions)
-trainErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(data.count())
-print('Training Error = ' + str(trainErr))
+# Evaluate model on test instances and compute test error
+predictions = model.predict(testData.map(lambda x: x.features))
+labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
+testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
+print('Test Error = ' + str(testErr))
 print('Learned classification tree model:')
-print(model)
+print(model.toDebugString())
 {% endhighlight %}
-
-Note: When making predictions for a dataset, it is more efficient to do batch prediction rather
-than separately calling `predict` on each data point.  This is because the Python code makes calls
-to an underlying `DecisionTree` model in Scala.
 </div>
 
 </div>
@@ -285,8 +327,10 @@ import org.apache.spark.mllib.tree.DecisionTree
 import org.apache.spark.mllib.util.MLUtils
 
 // Load and parse the data file.
-// Cache the data since we will use it again to compute training error.
-val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").cache()
+val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+// Split the data into training and test sets (30% held out for testing)
+val splits = data.randomSplit(Array(0.7, 0.3))
+val (trainingData, testData) = (splits(0), splits(1))
 
 // Train a DecisionTree model.
 //  Empty categoricalFeaturesInfo indicates all features are continuous.
@@ -295,17 +339,17 @@ val impurity = "variance"
 val maxDepth = 5
 val maxBins = 32
 
-val model = DecisionTree.trainRegressor(data, categoricalFeaturesInfo, impurity,
+val model = DecisionTree.trainRegressor(trainingData, categoricalFeaturesInfo, impurity,
   maxDepth, maxBins)
 
-// Evaluate model on training instances and compute training error
-val labelsAndPredictions = data.map { point =>
+// Evaluate model on test instances and compute test error
+val labelsAndPredictions = testData.map { point =>
   val prediction = model.predict(point.features)
   (point.label, prediction)
 }
-val trainMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean()
-println("Training Mean Squared Error = " + trainMSE)
-println("Learned regression tree model:\n" + model)
+val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean()
+println("Test Mean Squared Error = " + testMSE)
+println("Learned regression tree model:\n" + model.toDebugString)
 {% endhighlight %}
 </div>
 
@@ -325,45 +369,51 @@ import org.apache.spark.mllib.tree.model.DecisionTreeModel;
 import org.apache.spark.mllib.util.MLUtils;
 import org.apache.spark.SparkConf;
 
-// Load and parse the data file.
-// Cache the data since we will use it again to compute training error.
-String datapath = "data/mllib/sample_libsvm_data.txt";
-JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD().cache();
-
 SparkConf sparkConf = new SparkConf().setAppName("JavaDecisionTree");
 JavaSparkContext sc = new JavaSparkContext(sparkConf);
 
+// Load and parse the data file.
+String datapath = "data/mllib/sample_libsvm_data.txt";
+JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD();
+// Split the data into training and test sets (30% held out for testing)
+JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3});
+JavaRDD<LabeledPoint> trainingData = splits[0];
+JavaRDD<LabeledPoint> testData = splits[1];
+
 // Set parameters.
 //  Empty categoricalFeaturesInfo indicates all features are continuous.
-HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
+Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
 String impurity = "variance";
 Integer maxDepth = 5;
 Integer maxBins = 32;
 
 // Train a DecisionTree model.
-final DecisionTreeModel model = DecisionTree.trainRegressor(data,
+final DecisionTreeModel model = DecisionTree.trainRegressor(trainingData,
   categoricalFeaturesInfo, impurity, maxDepth, maxBins);
 
-// Evaluate model on training instances and compute training error
+// Evaluate model on test instances and compute test error
 JavaPairRDD<Double, Double> predictionAndLabel =
-  data.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
-    @Override public Tuple2<Double, Double> call(LabeledPoint p) {
+  testData.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
+    @Override
+    public Tuple2<Double, Double> call(LabeledPoint p) {
       return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
     }
   });
-Double trainMSE =
+Double testMSE =
   predictionAndLabel.map(new Function<Tuple2<Double, Double>, Double>() {
-    @Override public Double call(Tuple2<Double, Double> pl) {
+    @Override
+    public Double call(Tuple2<Double, Double> pl) {
       Double diff = pl._1() - pl._2();
       return diff * diff;
     }
   }).reduce(new Function2<Double, Double, Double>() {
-    @Override public Double call(Double a, Double b) {
+    @Override
+    public Double call(Double a, Double b) {
       return a + b;
     }
   }) / data.count();
-System.out.println("Training Mean Squared Error: " + trainMSE);
-System.out.println("Learned regression tree model:\n" + model);
+System.out.println("Test Mean Squared Error: " + testMSE);
+System.out.println("Learned regression tree model:\n" + model.toDebugString());
 {% endhighlight %}
 </div>
 
@@ -374,26 +424,23 @@ from pyspark.mllib.tree import DecisionTree
 from pyspark.mllib.util import MLUtils
 
 # Load and parse the data file into an RDD of LabeledPoint.
-# Cache the data since we will use it again to compute training error.
-data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt').cache()
+data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt')
+# Split the data into training and test sets (30% held out for testing)
+(trainingData, testData) = data.randomSplit([0.7, 0.3])
 
 # Train a DecisionTree model.
 #  Empty categoricalFeaturesInfo indicates all features are continuous.
-model = DecisionTree.trainRegressor(data, categoricalFeaturesInfo={},
+model = DecisionTree.trainRegressor(trainingData, categoricalFeaturesInfo={},
                                     impurity='variance', maxDepth=5, maxBins=32)
 
-# Evaluate model on training instances and compute training error
-predictions = model.predict(data.map(lambda x: x.features))
-labelsAndPredictions = data.map(lambda lp: lp.label).zip(predictions)
-trainMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() / float(data.count())
-print('Training Mean Squared Error = ' + str(trainMSE))
+# Evaluate model on test instances and compute test error
+predictions = model.predict(testData.map(lambda x: x.features))
+labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
+testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() / float(testData.count())
+print('Test Mean Squared Error = ' + str(testMSE))
 print('Learned regression tree model:')
-print(model)
+print(model.toDebugString())
 {% endhighlight %}
-
-Note: When making predictions for a dataset, it is more efficient to do batch prediction rather
-than separately calling `predict` on each data point.  This is because the Python code makes calls
-to an underlying `DecisionTree` model in Scala.
 </div>
 
 </div>
diff --git a/docs/mllib-ensembles.md b/docs/mllib-ensembles.md
new file mode 100644
index 0000000000000..23ede04b62d5b
--- /dev/null
+++ b/docs/mllib-ensembles.md
@@ -0,0 +1,653 @@
+---
+layout: global
+title: Ensembles - MLlib
+displayTitle: <a href="mllib-guide.html">MLlib</a> - Ensembles
+---
+
+* Table of contents
+{:toc}
+
+An [ensemble method](http://en.wikipedia.org/wiki/Ensemble_learning)
+is a learning algorithm which creates a model composed of a set of other base models.
+MLlib supports two major ensemble algorithms: [`GradientBoostedTrees`](api/scala/index.html#org.apache.spark.mllib.tree.GradientBosotedTrees) and [`RandomForest`](api/scala/index.html#org.apache.spark.mllib.tree.RandomForest).
+Both use [decision trees](mllib-decision-tree.html) as their base models.
+
+## Gradient-Boosted Trees vs. Random Forests
+
+Both [Gradient-Boosted Trees (GBTs)](mllib-ensembles.html#Gradient-Boosted-Trees-(GBTS)) and [Random Forests](mllib-ensembles.html#Random-Forests) are algorithms for learning ensembles of trees, but the training processes are different.  There are several practical trade-offs:
+
+ * GBTs train one tree at a time, so they can take longer to train than random forests.  Random Forests can train multiple trees in parallel.
+   * On the other hand, it is often reasonable to use smaller (shallower) trees with GBTs than with Random Forests, and training smaller trees takes less time.
+ * Random Forests can be less prone to overfitting.  Training more trees in a Random Forest reduces the likelihood of overfitting, but training more trees with GBTs increases the likelihood of overfitting.  (In statistical language, Random Forests reduce variance by using more trees, whereas GBTs reduce bias by using more trees.)
+ * Random Forests can be easier to tune since performance improves monotonically with the number of trees (whereas performance can start to decrease for GBTs if the number of trees grows too large).
+
+In short, both algorithms can be effective, and the choice should be based on the particular dataset.
+
+## Random Forests
+
+[Random forests](http://en.wikipedia.org/wiki/Random_forest)
+are ensembles of [decision trees](mllib-decision-tree.html).
+Random forests are one of the most successful machine learning models for classification and
+regression.  They combine many decision trees in order to reduce the risk of overfitting.
+Like decision trees, random forests handle categorical features,
+extend to the multiclass classification setting, do not require
+feature scaling, and are able to capture non-linearities and feature interactions.
+
+MLlib supports random forests for binary and multiclass classification and for regression,
+using both continuous and categorical features.
+MLlib implements random forests using the existing [decision tree](mllib-decision-tree.html)
+implementation.  Please see the decision tree guide for more information on trees.
+
+### Basic algorithm
+
+Random forests train a set of decision trees separately, so the training can be done in parallel.
+The algorithm injects randomness into the training process so that each decision tree is a bit
+different.  Combining the predictions from each tree reduces the variance of the predictions,
+improving the performance on test data.
+
+#### Training
+
+The randomness injected into the training process includes:
+
+* Subsampling the original dataset on each iteration to get a different training set (a.k.a. bootstrapping).
+* Considering different random subsets of features to split on at each tree node.
+
+Apart from these randomizations, decision tree training is done in the same way as for individual decision trees.
+
+#### Prediction
+
+To make a prediction on a new instance, a random forest must aggregate the predictions from its set of decision trees.  This aggregation is done differently for classification and regression.
+
+*Classification*: Majority vote. Each tree's prediction is counted as a vote for one class.  The label is predicted to be the class which receives the most votes.
+
+*Regression*: Averaging. Each tree predicts a real value.  The label is predicted to be the average of the tree predictions.
+
+### Usage tips
+
+We include a few guidelines for using random forests by discussing the various parameters.
+We omit some decision tree parameters since those are covered in the [decision tree guide](mllib-decision-tree.html).
+
+The first two parameters we mention are the most important, and tuning them can often improve performance:
+
+* **`numTrees`**: Number of trees in the forest.
+  * Increasing the number of trees will decrease the variance in predictions, improving the model's test-time accuracy.
+  * Training time increases roughly linearly in the number of trees.
+
+* **`maxDepth`**: Maximum depth of each tree in the forest.
+  * Increasing the depth makes the model more expressive and powerful.  However, deep trees take longer to train and are also more prone to overfitting.
+  * In general, it is acceptable to train deeper trees when using random forests than when using a single decision tree.  One tree is more likely to overfit than a random forest (because of the variance reduction from averaging multiple trees in the forest).
+
+The next two parameters generally do not require tuning.  However, they can be tuned to speed up training.
+
+* **`subsamplingRate`**: This parameter specifies the size of the dataset used for training each tree in the forest, as a fraction of the size of the original dataset.  The default (1.0) is recommended, but decreasing this fraction can speed up training.
+
+* **`featureSubsetStrategy`**: Number of features to use as candidates for splitting at each tree node.  The number is specified as a fraction or function of the total number of features.  Decreasing this number will speed up training, but can sometimes impact performance if too low.
+
+### Examples
+
+#### Classification
+
+The example below demonstrates how to load a
+[LIBSVM data file](http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/),
+parse it as an RDD of `LabeledPoint` and then
+perform classification using a Random Forest.
+The test error is calculated to measure the algorithm accuracy.
+
+<div class="codetabs">
+
+<div data-lang="scala">
+{% highlight scala %}
+import org.apache.spark.mllib.tree.RandomForest
+import org.apache.spark.mllib.util.MLUtils
+
+// Load and parse the data file.
+val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+// Split the data into training and test sets (30% held out for testing)
+val splits = data.randomSplit(Array(0.7, 0.3))
+val (trainingData, testData) = (splits(0), splits(1))
+
+// Train a RandomForest model.
+//  Empty categoricalFeaturesInfo indicates all features are continuous.
+val numClasses = 2
+val categoricalFeaturesInfo = Map[Int, Int]()
+val numTrees = 3 // Use more in practice.
+val featureSubsetStrategy = "auto" // Let the algorithm choose.
+val impurity = "gini"
+val maxDepth = 4
+val maxBins = 32
+
+val model = RandomForest.trainClassifier(trainingData, numClasses, categoricalFeaturesInfo,
+  numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins)
+
+// Evaluate model on test instances and compute test error
+val labelAndPreds = testData.map { point =>
+  val prediction = model.predict(point.features)
+  (point.label, prediction)
+}
+val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData.count()
+println("Test Error = " + testErr)
+println("Learned classification forest model:\n" + model.toDebugString)
+{% endhighlight %}
+</div>
+
+<div data-lang="java">
+{% highlight java %}
+import scala.Tuple2;
+import java.util.HashMap;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.RandomForest;
+import org.apache.spark.mllib.tree.model.RandomForestModel;
+import org.apache.spark.mllib.util.MLUtils;
+
+SparkConf sparkConf = new SparkConf().setAppName("JavaRandomForestClassification");
+JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+// Load and parse the data file.
+String datapath = "data/mllib/sample_libsvm_data.txt";
+JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD();
+// Split the data into training and test sets (30% held out for testing)
+JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3});
+JavaRDD<LabeledPoint> trainingData = splits[0];
+JavaRDD<LabeledPoint> testData = splits[1];
+
+// Train a RandomForest model.
+//  Empty categoricalFeaturesInfo indicates all features are continuous.
+Integer numClasses = 2;
+HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
+Integer numTrees = 3; // Use more in practice.
+String featureSubsetStrategy = "auto"; // Let the algorithm choose.
+String impurity = "gini";
+Integer maxDepth = 5;
+Integer maxBins = 32;
+Integer seed = 12345;
+
+final RandomForestModel model = RandomForest.trainClassifier(trainingData, numClasses,
+  categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins,
+  seed);
+
+// Evaluate model on test instances and compute test error
+JavaPairRDD<Double, Double> predictionAndLabel =
+  testData.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
+    @Override
+    public Tuple2<Double, Double> call(LabeledPoint p) {
+      return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
+    }
+  });
+Double testErr =
+  1.0 * predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {
+    @Override
+    public Boolean call(Tuple2<Double, Double> pl) {
+      return !pl._1().equals(pl._2());
+    }
+  }).count() / testData.count();
+System.out.println("Test Error: " + testErr);
+System.out.println("Learned classification forest model:\n" + model.toDebugString());
+{% endhighlight %}
+</div>
+
+<div data-lang="python">
+{% highlight python %}
+from pyspark.mllib.tree import RandomForest
+from pyspark.mllib.util import MLUtils
+
+# Load and parse the data file into an RDD of LabeledPoint.
+data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt')
+# Split the data into training and test sets (30% held out for testing)
+(trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+# Train a RandomForest model.
+#  Empty categoricalFeaturesInfo indicates all features are continuous.
+#  Note: Use larger numTrees in practice.
+#  Setting featureSubsetStrategy="auto" lets the algorithm choose.
+model = RandomForest.trainClassifier(trainingData, numClasses=2, categoricalFeaturesInfo={},
+                                     numTrees=3, featureSubsetStrategy="auto",
+                                     impurity='gini', maxDepth=4, maxBins=32)
+
+# Evaluate model on test instances and compute test error
+predictions = model.predict(testData.map(lambda x: x.features))
+labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
+testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
+print('Test Error = ' + str(testErr))
+print('Learned classification forest model:')
+print(model.toDebugString())
+{% endhighlight %}
+</div>
+
+</div>
+
+#### Regression
+
+The example below demonstrates how to load a
+[LIBSVM data file](http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/),
+parse it as an RDD of `LabeledPoint` and then
+perform regression using a Random Forest.
+The Mean Squared Error (MSE) is computed at the end to evaluate
+[goodness of fit](http://en.wikipedia.org/wiki/Goodness_of_fit).
+
+<div class="codetabs">
+
+<div data-lang="scala">
+{% highlight scala %}
+import org.apache.spark.mllib.tree.RandomForest
+import org.apache.spark.mllib.util.MLUtils
+
+// Load and parse the data file.
+val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+// Split the data into training and test sets (30% held out for testing)
+val splits = data.randomSplit(Array(0.7, 0.3))
+val (trainingData, testData) = (splits(0), splits(1))
+
+// Train a RandomForest model.
+//  Empty categoricalFeaturesInfo indicates all features are continuous.
+val numClasses = 2
+val categoricalFeaturesInfo = Map[Int, Int]()
+val numTrees = 3 // Use more in practice.
+val featureSubsetStrategy = "auto" // Let the algorithm choose.
+val impurity = "variance"
+val maxDepth = 4
+val maxBins = 32
+
+val model = RandomForest.trainRegressor(trainingData, categoricalFeaturesInfo,
+  numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins)
+
+// Evaluate model on test instances and compute test error
+val labelsAndPredictions = testData.map { point =>
+  val prediction = model.predict(point.features)
+  (point.label, prediction)
+}
+val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean()
+println("Test Mean Squared Error = " + testMSE)
+println("Learned regression forest model:\n" + model.toDebugString)
+{% endhighlight %}
+</div>
+
+<div data-lang="java">
+{% highlight java %}
+import java.util.HashMap;
+import scala.Tuple2;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.RandomForest;
+import org.apache.spark.mllib.tree.model.RandomForestModel;
+import org.apache.spark.mllib.util.MLUtils;
+import org.apache.spark.SparkConf;
+
+SparkConf sparkConf = new SparkConf().setAppName("JavaRandomForest");
+JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+// Load and parse the data file.
+String datapath = "data/mllib/sample_libsvm_data.txt";
+JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD();
+// Split the data into training and test sets (30% held out for testing)
+JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3});
+JavaRDD<LabeledPoint> trainingData = splits[0];
+JavaRDD<LabeledPoint> testData = splits[1];
+
+// Set parameters.
+//  Empty categoricalFeaturesInfo indicates all features are continuous.
+Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
+String impurity = "variance";
+Integer maxDepth = 4;
+Integer maxBins = 32;
+
+// Train a RandomForest model.
+final RandomForestModel model = RandomForest.trainRegressor(trainingData,
+  categoricalFeaturesInfo, impurity, maxDepth, maxBins);
+
+// Evaluate model on test instances and compute test error
+JavaPairRDD<Double, Double> predictionAndLabel =
+  testData.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
+    @Override
+    public Tuple2<Double, Double> call(LabeledPoint p) {
+      return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
+    }
+  });
+Double testMSE =
+  predictionAndLabel.map(new Function<Tuple2<Double, Double>, Double>() {
+    @Override
+    public Double call(Tuple2<Double, Double> pl) {
+      Double diff = pl._1() - pl._2();
+      return diff * diff;
+    }
+  }).reduce(new Function2<Double, Double, Double>() {
+    @Override
+    public Double call(Double a, Double b) {
+      return a + b;
+    }
+  }) / testData.count();
+System.out.println("Test Mean Squared Error: " + testMSE);
+System.out.println("Learned regression forest model:\n" + model.toDebugString());
+{% endhighlight %}
+</div>
+
+<div data-lang="python">
+{% highlight python %}
+from pyspark.mllib.tree import RandomForest
+from pyspark.mllib.util import MLUtils
+
+# Load and parse the data file into an RDD of LabeledPoint.
+data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt')
+# Split the data into training and test sets (30% held out for testing)
+(trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+# Train a RandomForest model.
+#  Empty categoricalFeaturesInfo indicates all features are continuous.
+#  Note: Use larger numTrees in practice.
+#  Setting featureSubsetStrategy="auto" lets the algorithm choose.
+model = RandomForest.trainRegressor(trainingData, categoricalFeaturesInfo={},
+                                    numTrees=3, featureSubsetStrategy="auto",
+                                    impurity='variance', maxDepth=4, maxBins=32)
+
+# Evaluate model on test instances and compute test error
+predictions = model.predict(testData.map(lambda x: x.features))
+labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
+testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() / float(testData.count())
+print('Test Mean Squared Error = ' + str(testMSE))
+print('Learned regression forest model:')
+print(model.toDebugString())
+{% endhighlight %}
+</div>
+
+</div>
+
+## Gradient-Boosted Trees (GBTs)
+
+[Gradient-Boosted Trees (GBTs)](http://en.wikipedia.org/wiki/Gradient_boosting)
+are ensembles of [decision trees](mllib-decision-tree.html).
+GBTs iteratively train decision trees in order to minimize a loss function.
+Like decision trees, GBTs handle categorical features,
+extend to the multiclass classification setting, do not require
+feature scaling, and are able to capture non-linearities and feature interactions.
+
+MLlib supports GBTs for binary classification and for regression,
+using both continuous and categorical features.
+MLlib implements GBTs using the existing [decision tree](mllib-decision-tree.html) implementation.  Please see the decision tree guide for more information on trees.
+
+*Note*: GBTs do not yet support multiclass classification.  For multiclass problems, please use
+[decision trees](mllib-decision-tree.html) or [Random Forests](mllib-ensembles.html#Random-Forest).
+
+### Basic algorithm
+
+Gradient boosting iteratively trains a sequence of decision trees.
+On each iteration, the algorithm uses the current ensemble to predict the label of each training instance and then compares the prediction with the true label.  The dataset is re-labeled to put more emphasis on training instances with poor predictions.  Thus, in the next iteration, the decision tree will help correct for previous mistakes.
+
+The specific mechanism for re-labeling instances is defined by a loss function (discussed below).  With each iteration, GBTs further reduce this loss function on the training data.
+
+#### Losses
+
+The table below lists the losses currently supported by GBTs in MLlib.
+Note that each loss is applicable to one of classification or regression, not both.
+
+Notation: $N$ = number of instances. $y_i$ = label of instance $i$.  $x_i$ = features of instance $i$.  $F(x_i)$ = model's predicted label for instance $i$.
+
+<table class="table">
+  <thead>
+    <tr><th>Loss</th><th>Task</th><th>Formula</th><th>Description</th></tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>Log Loss</td>
+	  <td>Classification</td>
+	  <td>$2 \sum_{i=1}^{N} \log(1+\exp(-2 y_i F(x_i)))$</td><td>Twice binomial negative log likelihood.</td>
+    </tr>
+    <tr>
+      <td>Squared Error</td>
+	  <td>Regression</td>
+	  <td>$\sum_{i=1}^{N} (y_i - F(x_i))^2$</td><td>Also called L2 loss.  Default loss for regression tasks.</td>
+    </tr>
+    <tr>
+      <td>Absolute Error</td>
+	  <td>Regression</td>
+     <td>$\sum_{i=1}^{N} |y_i - F(x_i)|$</td><td>Also called L1 loss.  Can be more robust to outliers than Squared Error.</td>
+    </tr>
+  </tbody>
+</table>
+
+### Usage tips
+
+We include a few guidelines for using GBTs by discussing the various parameters.
+We omit some decision tree parameters since those are covered in the [decision tree guide](mllib-decision-tree.html).
+
+* **`loss`**: See the section above for information on losses and their applicability to tasks (classification vs. regression).  Different losses can give significantly different results, depending on the dataset.
+
+* **`numIterations`**: This sets the number of trees in the ensemble.  Each iteration produces one tree.  Increasing this number makes the model more expressive, improving training data accuracy.  However, test-time accuracy may suffer if this is too large.
+
+* **`learningRate`**: This parameter should not need to be tuned.  If the algorithm behavior seems unstable, decreasing this value may improve stability.
+
+* **`algo`**: The algorithm or task (classification vs. regression) is set using the tree [Strategy] parameter.
+
+
+### Examples
+
+GBTs currently have APIs in Scala and Java.  Examples in both languages are shown below.
+
+#### Classification
+
+The example below demonstrates how to load a
+[LIBSVM data file](http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/),
+parse it as an RDD of `LabeledPoint` and then
+perform classification using Gradient-Boosted Trees with log loss.
+The test error is calculated to measure the algorithm accuracy.
+
+<div class="codetabs">
+
+<div data-lang="scala">
+{% highlight scala %}
+import org.apache.spark.mllib.tree.GradientBoostedTrees
+import org.apache.spark.mllib.tree.configuration.BoostingStrategy
+import org.apache.spark.mllib.util.MLUtils
+
+// Load and parse the data file.
+val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+// Split the data into training and test sets (30% held out for testing)
+val splits = data.randomSplit(Array(0.7, 0.3))
+val (trainingData, testData) = (splits(0), splits(1))
+
+// Train a GradientBoostedTrees model.
+//  The defaultParams for Classification use LogLoss by default.
+val boostingStrategy = BoostingStrategy.defaultParams("Classification")
+boostingStrategy.numIterations = 3 // Note: Use more iterations in practice.
+boostingStrategy.treeStrategy.numClassesForClassification = 2
+boostingStrategy.treeStrategy.maxDepth = 5
+//  Empty categoricalFeaturesInfo indicates all features are continuous.
+boostingStrategy.treeStrategy.categoricalFeaturesInfo = Map[Int, Int]()
+
+val model = GradientBoostedTrees.train(trainingData, boostingStrategy)
+
+// Evaluate model on test instances and compute test error
+val labelAndPreds = testData.map { point =>
+  val prediction = model.predict(point.features)
+  (point.label, prediction)
+}
+val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData.count()
+println("Test Error = " + testErr)
+println("Learned classification GBT model:\n" + model.toDebugString)
+{% endhighlight %}
+</div>
+
+<div data-lang="java">
+{% highlight java %}
+import scala.Tuple2;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.GradientBoostedTrees;
+import org.apache.spark.mllib.tree.configuration.BoostingStrategy;
+import org.apache.spark.mllib.tree.model.GradientBoostedTreesModel;
+import org.apache.spark.mllib.util.MLUtils;
+
+SparkConf sparkConf = new SparkConf().setAppName("JavaGradientBoostedTrees");
+JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+// Load and parse the data file.
+String datapath = "data/mllib/sample_libsvm_data.txt";
+JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD();
+// Split the data into training and test sets (30% held out for testing)
+JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3});
+JavaRDD<LabeledPoint> trainingData = splits[0];
+JavaRDD<LabeledPoint> testData = splits[1];
+
+// Train a GradientBoostedTrees model.
+//  The defaultParams for Classification use LogLoss by default.
+BoostingStrategy boostingStrategy = BoostingStrategy.defaultParams("Classification");
+boostingStrategy.setNumIterations(3); // Note: Use more iterations in practice.
+boostingStrategy.getTreeStrategy().setNumClassesForClassification(2);
+boostingStrategy.getTreeStrategy().setMaxDepth(5);
+//  Empty categoricalFeaturesInfo indicates all features are continuous.
+Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
+boostingStrategy.treeStrategy().setCategoricalFeaturesInfo(categoricalFeaturesInfo);
+
+final GradientBoostedTreesModel model =
+  GradientBoostedTrees.train(trainingData, boostingStrategy);
+
+// Evaluate model on test instances and compute test error
+JavaPairRDD<Double, Double> predictionAndLabel =
+  testData.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
+    @Override
+    public Tuple2<Double, Double> call(LabeledPoint p) {
+      return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
+    }
+  });
+Double testErr =
+  1.0 * predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {
+    @Override
+    public Boolean call(Tuple2<Double, Double> pl) {
+      return !pl._1().equals(pl._2());
+    }
+  }).count() / testData.count();
+System.out.println("Test Error: " + testErr);
+System.out.println("Learned classification GBT model:\n" + model.toDebugString());
+{% endhighlight %}
+</div>
+
+</div>
+
+#### Regression
+
+The example below demonstrates how to load a
+[LIBSVM data file](http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/),
+parse it as an RDD of `LabeledPoint` and then
+perform regression using Gradient-Boosted Trees with Squared Error as the loss.
+The Mean Squared Error (MSE) is computed at the end to evaluate
+[goodness of fit](http://en.wikipedia.org/wiki/Goodness_of_fit).
+
+<div class="codetabs">
+
+<div data-lang="scala">
+{% highlight scala %}
+import org.apache.spark.mllib.tree.GradientBoostedTrees
+import org.apache.spark.mllib.tree.configuration.BoostingStrategy
+import org.apache.spark.mllib.util.MLUtils
+
+// Load and parse the data file.
+val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+// Split the data into training and test sets (30% held out for testing)
+val splits = data.randomSplit(Array(0.7, 0.3))
+val (trainingData, testData) = (splits(0), splits(1))
+
+// Train a GradientBoostedTrees model.
+//  The defaultParams for Regression use SquaredError by default.
+val boostingStrategy = BoostingStrategy.defaultParams("Regression")
+boostingStrategy.numIterations = 3 // Note: Use more iterations in practice.
+boostingStrategy.treeStrategy.maxDepth = 5
+//  Empty categoricalFeaturesInfo indicates all features are continuous.
+boostingStrategy.treeStrategy.categoricalFeaturesInfo = Map[Int, Int]()
+
+val model = GradientBoostedTrees.train(trainingData, boostingStrategy)
+
+// Evaluate model on test instances and compute test error
+val labelsAndPredictions = testData.map { point =>
+  val prediction = model.predict(point.features)
+  (point.label, prediction)
+}
+val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean()
+println("Test Mean Squared Error = " + testMSE)
+println("Learned regression GBT model:\n" + model.toDebugString)
+{% endhighlight %}
+</div>
+
+<div data-lang="java">
+{% highlight java %}
+import scala.Tuple2;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.GradientBoostedTrees;
+import org.apache.spark.mllib.tree.configuration.BoostingStrategy;
+import org.apache.spark.mllib.tree.model.GradientBoostedTreesModel;
+import org.apache.spark.mllib.util.MLUtils;
+
+SparkConf sparkConf = new SparkConf().setAppName("JavaGradientBoostedTrees");
+JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+// Load and parse the data file.
+String datapath = "data/mllib/sample_libsvm_data.txt";
+JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD();
+// Split the data into training and test sets (30% held out for testing)
+JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3});
+JavaRDD<LabeledPoint> trainingData = splits[0];
+JavaRDD<LabeledPoint> testData = splits[1];
+
+// Train a GradientBoostedTrees model.
+//  The defaultParams for Regression use SquaredError by default.
+BoostingStrategy boostingStrategy = BoostingStrategy.defaultParams("Regression");
+boostingStrategy.setNumIterations(3); // Note: Use more iterations in practice.
+boostingStrategy.getTreeStrategy().setMaxDepth(5);
+//  Empty categoricalFeaturesInfo indicates all features are continuous.
+Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
+boostingStrategy.treeStrategy().setCategoricalFeaturesInfo(categoricalFeaturesInfo);
+
+final GradientBoostedTreesModel model =
+  GradientBoostedTrees.train(trainingData, boostingStrategy);
+
+// Evaluate model on test instances and compute test error
+JavaPairRDD<Double, Double> predictionAndLabel =
+  testData.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
+    @Override
+    public Tuple2<Double, Double> call(LabeledPoint p) {
+      return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
+    }
+  });
+Double testMSE =
+  predictionAndLabel.map(new Function<Tuple2<Double, Double>, Double>() {
+    @Override
+    public Double call(Tuple2<Double, Double> pl) {
+      Double diff = pl._1() - pl._2();
+      return diff * diff;
+    }
+  }).reduce(new Function2<Double, Double, Double>() {
+    @Override
+    public Double call(Double a, Double b) {
+      return a + b;
+    }
+  }) / data.count();
+System.out.println("Test Mean Squared Error: " + testMSE);
+System.out.println("Learned regression GBT model:\n" + model.toDebugString());
+{% endhighlight %}
+</div>
+
+</div>
diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md
index 11622414494e4..d4a61a7fbf3d7 100644
--- a/docs/mllib-feature-extraction.md
+++ b/docs/mllib-feature-extraction.md
@@ -95,8 +95,49 @@ tf.cache()
 val idf = new IDF(minDocFreq = 2).fit(tf)
 val tfidf: RDD[Vector] = idf.transform(tf)
 {% endhighlight %}
+</div>
+<div data-lang="python" markdown="1">
+
+TF and IDF are implemented in [HashingTF](api/python/pyspark.mllib.html#pyspark.mllib.feature.HashingTF)
+and [IDF](api/python/pyspark.mllib.html#pyspark.mllib.feature.IDF).
+`HashingTF` takes an RDD of list as the input.
+Each record could be an iterable of strings or other types.
+
+{% highlight python %}
+from pyspark import SparkContext
+from pyspark.mllib.feature import HashingTF
+
+sc = SparkContext()
+
+# Load documents (one per line).
+documents = sc.textFile("...").map(lambda line: line.split(" "))
+
+hashingTF = HashingTF()
+tf = hashingTF.transform(documents)
+{% endhighlight %}
+
+While applying `HashingTF` only needs a single pass to the data, applying `IDF` needs two passes: 
+first to compute the IDF vector and second to scale the term frequencies by IDF.
+
+{% highlight python %}
+from pyspark.mllib.feature import IDF
+
+# ... continue from the previous example
+tf.cache()
+idf = IDF().fit(tf)
+tfidf = idf.transform(tf)
+{% endhighlight %}
 
+MLLib's IDF implementation provides an option for ignoring terms which occur in less than a
+minimum number of documents.  In such cases, the IDF for these terms is set to 0.  This feature
+can be used by passing the `minDocFreq` value to the IDF constructor.
 
+{% highlight python %}
+# ... continue from the previous example
+tf.cache()
+idf = IDF(minDocFreq=2).fit(tf)
+tfidf = idf.transform(tf)
+{% endhighlight %}
 </div>
 </div>
 
@@ -162,6 +203,23 @@ for((synonym, cosineSimilarity) <- synonyms) {
 }
 {% endhighlight %}
 </div>
+<div data-lang="python">
+{% highlight python %}
+from pyspark import SparkContext
+from pyspark.mllib.feature import Word2Vec
+
+sc = SparkContext(appName='Word2Vec')
+inp = sc.textFile("text8_lines").map(lambda row: row.split(" "))
+
+word2vec = Word2Vec()
+model = word2vec.fit(inp)
+
+synonyms = model.findSynonyms('china', 40)
+
+for word, cosine_distance in synonyms:
+    print "{}: {}".format(word, cosine_distance)
+{% endhighlight %}
+</div>
 </div>
 
 ## StandardScaler
@@ -182,11 +240,11 @@ following parameters in the constructor:
 
 * `withMean` False by default. Centers the data with mean before scaling. It will build a dense
 output, so this does not work on sparse input and will raise an exception.
-* `withStd` True by default. Scales the data to unit variance.
+* `withStd` True by default. Scales the data to unit standard deviation.
 
 We provide a [`fit`](api/scala/index.html#org.apache.spark.mllib.feature.StandardScaler) method in
 `StandardScaler` which can take an input of `RDD[Vector]`, learn the summary statistics, and then
-return a model which can transform the input dataset into unit variance and/or zero mean features
+return a model which can transform the input dataset into unit standard deviation and/or zero mean features
 depending how we configure the `StandardScaler`.
 
 This model implements [`VectorTransformer`](api/scala/index.html#org.apache.spark.mllib.feature.VectorTransformer)
@@ -199,7 +257,7 @@ for that feature.
 ### Example
 
 The example below demonstrates how to load a dataset in libsvm format, and standardize the features
-so that the new features have unit variance and/or zero mean.
+so that the new features have unit standard deviation and/or zero mean.
 
 <div class="codetabs">
 <div data-lang="scala">
@@ -213,6 +271,8 @@ val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
 
 val scaler1 = new StandardScaler().fit(data.map(x => x.features))
 val scaler2 = new StandardScaler(withMean = true, withStd = true).fit(data.map(x => x.features))
+// scaler3 is an identical model to scaler2, and will produce identical transformations
+val scaler3 = new StandardScalerModel(scaler2.std, scaler2.mean)
 
 // data1 will be unit variance.
 val data1 = data.map(x => (x.label, scaler1.transform(x.features)))
@@ -223,6 +283,32 @@ val data1 = data.map(x => (x.label, scaler1.transform(x.features)))
 val data2 = data.map(x => (x.label, scaler2.transform(Vectors.dense(x.features.toArray))))
 {% endhighlight %}
 </div>
+
+<div data-lang="python">
+{% highlight python %}
+from pyspark.mllib.util import MLUtils
+from pyspark.mllib.linalg import Vectors
+from pyspark.mllib.feature import StandardScaler
+
+data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+label = data.map(lambda x: x.label)
+features = data.map(lambda x: x.features)
+
+scaler1 = StandardScaler().fit(features)
+scaler2 = StandardScaler(withMean=True, withStd=True).fit(features)
+# scaler3 is an identical model to scaler2, and will produce identical transformations
+scaler3 = StandardScalerModel(scaler2.std, scaler2.mean)
+
+
+# data1 will be unit variance.
+data1 = label.zip(scaler1.transform(features))
+
+# Without converting the features into dense vectors, transformation with zero mean will raise
+# exception on sparse vector.
+# data2 will be unit variance and zero mean.
+data2 = label.zip(scaler1.transform(features.map(lambda x: Vectors.dense(x.toArray()))))
+{% endhighlight %}
+</div>
 </div>
 
 ## Normalizer
@@ -267,4 +353,25 @@ val data1 = data.map(x => (x.label, normalizer1.transform(x.features)))
 val data2 = data.map(x => (x.label, normalizer2.transform(x.features)))
 {% endhighlight %}
 </div>
+
+<div data-lang="python">
+{% highlight python %}
+from pyspark.mllib.util import MLUtils
+from pyspark.mllib.linalg import Vectors
+from pyspark.mllib.feature import Normalizer
+
+data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+labels = data.map(lambda x: x.label)
+features = data.map(lambda x: x.features)
+
+normalizer1 = Normalizer()
+normalizer2 = Normalizer(p=float("inf"))
+
+# Each sample in data1 will be normalized using $L^2$ norm.
+data1 = labels.zip(normalizer1.transform(features))
+
+# Each sample in data2 will be normalized using $L^\infty$ norm.
+data2 = labels.zip(normalizer2.transform(features))
+{% endhighlight %}
+</div>
 </div>
diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index 94fc98ce4fabe..39c64d06926bf 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -1,6 +1,6 @@
 ---
 layout: global
-title: Machine Learning Library (MLlib)
+title: Machine Learning Library (MLlib) Programming Guide
 ---
 
 MLlib is Spark's scalable machine learning library consisting of common learning algorithms and utilities,
@@ -16,8 +16,9 @@ filtering, dimensionality reduction, as well as underlying optimization primitiv
   * random data generation  
 * [Classification and regression](mllib-classification-regression.html)
   * [linear models (SVMs, logistic regression, linear regression)](mllib-linear-methods.html)
-  * [decision trees](mllib-decision-tree.html)
   * [naive Bayes](mllib-naive-bayes.html)
+  * [decision trees](mllib-decision-tree.html)
+  * [ensembles of trees](mllib-ensembles.html) (Random Forests and Gradient-Boosted Trees)
 * [Collaborative filtering](mllib-collaborative-filtering.html)
   * alternating least squares (ALS)
 * [Clustering](mllib-clustering.html)
@@ -34,6 +35,21 @@ MLlib is under active development.
 The APIs marked `Experimental`/`DeveloperApi` may change in future releases, 
 and the migration guide below will explain all changes between releases.
 
+# spark.ml: high-level APIs for ML pipelines
+
+Spark 1.2 includes a new package called `spark.ml`, which aims to provide a uniform set of
+high-level APIs that help users create and tune practical machine learning pipelines.
+It is currently an alpha component, and we would like to hear back from the community about
+how it fits real-world use cases and how it could be improved.
+
+Note that we will keep supporting and adding features to `spark.mllib` along with the
+development of `spark.ml`.
+Users should be comfortable using `spark.mllib` features and expect more features coming.
+Developers should contribute new algorithms to `spark.mllib` and can optionally contribute
+to `spark.ml`.
+
+See the **[spark.ml programming guide](ml-guide.html)** for more information on this package.
+
 # Dependencies
 
 MLlib uses the linear algebra package [Breeze](http://www.scalanlp.org/),
@@ -60,6 +76,32 @@ To use MLlib in Python, you will need [NumPy](http://www.numpy.org) version 1.4
 
 # Migration Guide
 
+## From 1.1 to 1.2
+
+The only API changes in MLlib v1.2 are in
+[`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree),
+which continues to be an experimental API in MLlib 1.2:
+
+1. *(Breaking change)* The Scala API for classification takes a named argument specifying the number
+of classes.  In MLlib v1.1, this argument was called `numClasses` in Python and
+`numClassesForClassification` in Scala.  In MLlib v1.2, the names are both set to `numClasses`.
+This `numClasses` parameter is specified either via
+[`Strategy`](api/scala/index.html#org.apache.spark.mllib.tree.configuration.Strategy)
+or via [`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree)
+static `trainClassifier` and `trainRegressor` methods.
+
+2. *(Breaking change)* The API for
+[`Node`](api/scala/index.html#org.apache.spark.mllib.tree.model.Node) has changed.
+This should generally not affect user code, unless the user manually constructs decision trees
+(instead of using the `trainClassifier` or `trainRegressor` methods).
+The tree `Node` now includes more information, including the probability of the predicted label
+(for classification).
+
+3. Printing methods' output has changed.  The `toString` (Scala/Java) and `__repr__` (Python) methods used to print the full model; they now print a summary.  For the full model, use `toDebugString`.
+
+Examples in the Spark distribution and examples in the
+[Decision Trees Guide](mllib-decision-tree.html#examples) have been updated accordingly.
+
 ## From 1.0 to 1.1
 
 The only API changes in MLlib v1.1 are in
diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index bc914a1899801..44b7f67c57734 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -110,12 +110,16 @@ However, L1 regularization can help promote sparsity in weights leading to small
 It is not recommended to train models without any regularization,
 especially when the number of training examples is small.
 
+### Optimization
+
+Under the hood, linear methods use convex optimization methods to optimize the objective functions.  MLlib uses two methods, SGD and L-BFGS, described in the [optimization section](mllib-optimization.html).  Currently, most algorithm APIs support Stochastic Gradient Descent (SGD), and a few support L-BFGS. Refer to [this optimization section](mllib-optimization.html#Choosing-an-Optimization-Method) for guidelines on choosing between optimization methods.
+
 ## Binary classification
 
 [Binary classification](http://en.wikipedia.org/wiki/Binary_classification)
 aims to divide items into two categories: positive and negative.  MLlib
-supports two linear methods for binary classification: linear support vector
-machines (SVMs) and logistic regression. For both methods, MLlib supports
+supports two linear methods for binary classification: linear Support Vector
+Machines (SVMs) and logistic regression. For both methods, MLlib supports
 L1 and L2 regularized variants. The training data set is represented by an RDD
 of [LabeledPoint](mllib-data-types.html) in MLlib.  Note that, in the
 mathematical formulation in this guide, a training label $y$ is denoted as
@@ -123,7 +127,7 @@ either $+1$ (positive) or $-1$ (negative), which is convenient for the
 formulation.  *However*, the negative label is represented by $0$ in MLlib
 instead of $-1$, to be consistent with multiclass labeling.
 
-### Linear support vector machines (SVMs)
+### Linear Support Vector Machines (SVMs)
 
 The [linear SVM](http://en.wikipedia.org/wiki/Support_vector_machine#Linear_SVM)
 is a standard method for large-scale classification tasks. It is a linear method as described above in equation `$\eqref{eq:regPrimal}$`, with the loss function in the formulation given by the hinge loss:
diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md
index 7f9d4c6563944..d5b044d94fdd7 100644
--- a/docs/mllib-naive-bayes.md
+++ b/docs/mllib-naive-bayes.md
@@ -88,11 +88,11 @@ JavaPairRDD<Double, Double> predictionAndLabel =
       return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
     }
   });
-double accuracy = 1.0 * predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {
+double accuracy = predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {
     @Override public Boolean call(Tuple2<Double, Double> pl) {
-      return pl._1() == pl._2();
+      return pl._1().equals(pl._2());
     }
-  }).count() / test.count();
+  }).count() / (double) test.count();
 {% endhighlight %}
 </div>
 
diff --git a/docs/mllib-optimization.md b/docs/mllib-optimization.md
index 45141c235be90..4d101afca2c97 100644
--- a/docs/mllib-optimization.md
+++ b/docs/mllib-optimization.md
@@ -138,6 +138,12 @@ vertical scalability issue (the number of training features) when computing the
 explicitly in Newton's method. As a result, L-BFGS often achieves rapider convergence compared with 
 other first-order optimization. 
 
+### Choosing an Optimization Method
+
+[Linear methods](mllib-linear-methods.html) use optimization internally, and some linear methods in MLlib support both SGD and L-BFGS.
+Different optimization methods can have different convergence guarantees depending on the properties of the objective function, and we cannot cover the literature here.
+In general, when L-BFGS is available, we recommend using it instead of SGD since L-BFGS tends to converge faster (in fewer iterations).
+
 ## Implementation in MLlib
 
 ### Gradient descent and stochastic gradient descent
@@ -168,10 +174,7 @@ descent. All updaters in MLlib use a step size at the t-th step equal to
 * `regParam` is the regularization parameter when using L1 or L2 regularization.
 * `miniBatchFraction` is the fraction of the total data that is sampled in 
 each iteration, to compute the gradient direction.
-
-Available algorithms for gradient descent:
-
-* [GradientDescent](api/scala/index.html#org.apache.spark.mllib.optimization.GradientDescent)
+  * Sampling still requires a pass over the entire RDD, so decreasing `miniBatchFraction` may not speed up optimization much.  Users will see the greatest speedup when the gradient is expensive to compute, for only the chosen samples are used for computing the gradient.
 
 ### L-BFGS
 L-BFGS is currently only a low-level optimization primitive in `MLlib`. If you want to use L-BFGS in various 
@@ -359,13 +362,15 @@ public class LBFGSExample {
 {% endhighlight %}
 </div>
 </div>
-#### Developer's note
+
+## Developer's notes
+
 Since the Hessian is constructed approximately from previous gradient evaluations, 
 the objective function can not be changed during the optimization process. 
 As a result, Stochastic L-BFGS will not work naively by just using miniBatch; 
 therefore, we don't provide this until we have better understanding.
 
-* `Updater` is a class originally designed for gradient decent which computes 
+`Updater` is a class originally designed for gradient decent which computes 
 the actual gradient descent step. However, we're able to take the gradient and 
 loss of objective function of regularization for L-BFGS by ignoring the part of logic
 only for gradient decent such as adaptive step size stuff. We will refactorize
diff --git a/docs/mllib-statistics.md b/docs/mllib-statistics.md
index 10a5131c07414..ca8c29218f52d 100644
--- a/docs/mllib-statistics.md
+++ b/docs/mllib-statistics.md
@@ -380,6 +380,46 @@ for (ChiSqTestResult result : featureTestResults) {
 {% endhighlight %}
 </div>
 
+<div data-lang="python" markdown="1">
+[`Statistics`](api/python/index.html#pyspark.mllib.stat.Statistics$) provides methods to
+run Pearson's chi-squared tests. The following example demonstrates how to run and interpret
+hypothesis tests.
+
+{% highlight python %}
+from pyspark import SparkContext
+from pyspark.mllib.linalg import Vectors, Matrices
+from pyspark.mllib.regresssion import LabeledPoint
+from pyspark.mllib.stat import Statistics
+
+sc = SparkContext()
+
+vec = Vectors.dense(...) # a vector composed of the frequencies of events
+
+# compute the goodness of fit. If a second vector to test against is not supplied as a parameter,
+# the test runs against a uniform distribution.
+goodnessOfFitTestResult = Statistics.chiSqTest(vec)
+print goodnessOfFitTestResult # summary of the test including the p-value, degrees of freedom,
+                              # test statistic, the method used, and the null hypothesis.
+
+mat = Matrices.dense(...) # a contingency matrix
+
+# conduct Pearson's independence test on the input contingency matrix
+independenceTestResult = Statistics.chiSqTest(mat)
+print independenceTestResult  # summary of the test including the p-value, degrees of freedom...
+
+obs = sc.parallelize(...)  # LabeledPoint(feature, label) .
+
+# The contingency table is constructed from an RDD of LabeledPoint and used to conduct
+# the independence test. Returns an array containing the ChiSquaredTestResult for every feature
+# against the label.
+featureTestResults = Statistics.chiSqTest(obs)
+
+for i, result in enumerate(featureTestResults):
+    print "Column $d:" % (i + 1)
+    print result
+{% endhighlight %}
+</div>
+
 </div>
 
 ## Random data generation
diff --git a/docs/monitoring.md b/docs/monitoring.md
index e3f81a76acdbb..f32cdef240d31 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -79,7 +79,7 @@ follows:
   </tr>
   <tr>
     <td>spark.history.fs.logDirectory</td>
-    <td>(none)</td>
+    <td>file:/tmp/spark-events</td>
     <td>
      Directory that contains application event logs to be loaded by the history server
     </td>
diff --git a/docs/programming-guide.md b/docs/programming-guide.md
index 18420afb27e3c..6486614e71354 100644
--- a/docs/programming-guide.md
+++ b/docs/programming-guide.md
@@ -117,6 +117,8 @@ The first thing a Spark program must do is to create a [SparkContext](api/scala/
 how to access a cluster. To create a `SparkContext` you first need to build a [SparkConf](api/scala/index.html#org.apache.spark.SparkConf) object
 that contains information about your application.
 
+Only one SparkContext may be active per JVM.  You must `stop()` the active SparkContext before creating a new one.
+
 {% highlight scala %}
 val conf = new SparkConf().setAppName(appName).setMaster(master)
 new SparkContext(conf)
@@ -884,7 +886,7 @@ for details.
   <td> <b>groupByKey</b>([<i>numTasks</i>]) </td>
   <td> When called on a dataset of (K, V) pairs, returns a dataset of (K, Iterable&lt;V&gt;) pairs. <br />
     <b>Note:</b> If you are grouping in order to perform an aggregation (such as a sum or
-      average) over each key, using <code>reduceByKey</code> or <code>combineByKey</code> will yield much better 
+      average) over each key, using <code>reduceByKey</code> or <code>aggregateByKey</code> will yield much better 
       performance.
     <br />
     <b>Note:</b> By default, the level of parallelism in the output depends on the number of partitions of the parent RDD.
@@ -911,7 +913,7 @@ for details.
 </tr>
 <tr>
   <td> <b>cogroup</b>(<i>otherDataset</i>, [<i>numTasks</i>]) </td>
-  <td> When called on datasets of type (K, V) and (K, W), returns a dataset of (K, Iterable&lt;V&gt;, Iterable&lt;W&gt;) tuples. This operation is also called <code>groupWith</code>. </td>
+  <td> When called on datasets of type (K, V) and (K, W), returns a dataset of (K, (Iterable&lt;V&gt;, Iterable&lt;W&gt;)) tuples. This operation is also called <code>groupWith</code>. </td>
 </tr>
 <tr>
   <td> <b>cartesian</b>(<i>otherDataset</i>) </td>
@@ -932,6 +934,12 @@ for details.
   <td> Reshuffle the data in the RDD randomly to create either more or fewer partitions and balance it across them.
     This always shuffles all data over the network. </td>
 </tr>
+<tr>
+  <td> <b>repartitionAndSortWithinPartitions</b>(<i>partitioner</i>) </td>
+  <td> Repartition the RDD according to the given partitioner and, within each resulting partition,
+  sort records by their keys. This is more efficient than calling <code>repartition</code> and then sorting within 
+  each partition because it can push the sorting down into the shuffle machinery. </td>
+</tr>
 </table>
 
 ### Actions
@@ -1131,7 +1139,7 @@ method. The code below shows this:
 
 {% highlight scala %}
 scala> val broadcastVar = sc.broadcast(Array(1, 2, 3))
-broadcastVar: spark.Broadcast[Array[Int]] = spark.Broadcast(b5c40191-a864-4c7d-b9bf-d87e1a4e787c)
+broadcastVar: org.apache.spark.broadcast.Broadcast[Array[Int]] = Broadcast(0)
 
 scala> broadcastVar.value
 res0: Array[Int] = Array(1, 2, 3)
@@ -1175,7 +1183,7 @@ Accumulators are variables that are only "added" to through an associative opera
 therefore be efficiently supported in parallel. They can be used to implement counters (as in
 MapReduce) or sums. Spark natively supports accumulators of numeric types, and programmers
 can add support for new types. If accumulators are created with a name, they will be
-displayed in Spark's UI. This can can be useful for understanding the progress of 
+displayed in Spark's UI. This can be useful for understanding the progress of 
 running stages (NOTE: this is not yet supported in Python).
 
 An accumulator is created from an initial value `v` by calling `SparkContext.accumulator(v)`. Tasks
@@ -1304,6 +1312,40 @@ vecAccum = sc.accumulator(Vector(...), VectorAccumulatorParam())
 
 </div>
 
+For accumulator updates performed inside <b>actions only</b>, Spark guarantees that each task's update to the accumulator 
+will only be applied once, i.e. restarted tasks will not update the value. In transformations, users should be aware 
+of that each task's update may be applied more than once if tasks or job stages are re-executed.
+
+Accumulators do not change the lazy evaluation model of Spark. If they are being updated within an operation on an RDD, their value is only updated once that RDD is computed as part of an action. Consequently, accumulator updates are not guaranteed to be executed when made within a lazy transformation like `map()`. The below code fragment demonstrates this property:
+
+<div class="codetabs">
+
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+val acc = sc.accumulator(0)
+data.map(x => acc += x; f(x))
+// Here, acc is still 0 because no actions have cause the `map` to be computed.
+{% endhighlight %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% highlight java %}
+Accumulator<Integer> accum = sc.accumulator(0);
+data.map(x -> accum.add(x); f(x););
+// Here, accum is still 0 because no actions have cause the `map` to be computed.
+{% endhighlight %}
+</div>
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+accum = sc.accumulator(0)
+data.map(lambda x => acc.add(x); f(x))
+# Here, acc is still 0 because no actions have cause the `map` to be computed.
+{% endhighlight %}
+</div>
+
+</div>
+
 # Deploying to a Cluster
 
 The [application submission guide](submitting-applications.html) describes how to submit applications to a cluster.
diff --git a/docs/quick-start.md b/docs/quick-start.md
index 6236de0e1f2c4..bf643bb70e153 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -244,6 +244,9 @@ object SimpleApp {
 }
 {% endhighlight %}
 
+Note that applications should define a `main()` method instead of extending `scala.App`.
+Subclasses of `scala.App` may not work correctly.
+
 This program just counts the number of lines containing 'a' and the number containing 'b' in the
 Spark README. Note that you'll need to replace YOUR_SPARK_HOME with the location where Spark is
 installed. Unlike the earlier examples with the Spark shell, which initializes its own SparkContext,
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index 1073abb202c56..78358499fd01f 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -2,6 +2,8 @@
 layout: global
 title: Running Spark on Mesos
 ---
+* This will become a table of contents (this text will be scraped).
+{:toc}
 
 Spark can run on hardware clusters managed by [Apache Mesos](http://mesos.apache.org/).
 
@@ -183,6 +185,49 @@ node. Please refer to [Hadoop on Mesos](https://github.com/mesos/hadoop).
 In either case, HDFS runs separately from Hadoop MapReduce, without being scheduled through Mesos.
 
 
+# Configuration
+
+See the [configuration page](configuration.html) for information on Spark configurations.  The following configs are specific for Spark on Mesos.
+
+#### Spark Properties
+
+<table class="table">
+<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+<tr>
+  <td><code>spark.mesos.coarse</code></td>
+  <td>false</td>
+  <td>
+    Set the run mode for Spark on Mesos. For more information about the run mode, refer to #Mesos Run Mode section above.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.mesos.extra.cores</code></td>
+  <td>0</td>
+  <td>
+    Set the extra amount of cpus to request per task. This setting is only used for Mesos coarse grain mode.
+    The total amount of cores requested per task is the number of cores in the offer plus the extra cores configured.
+    Note that total amount of cores the executor will request in total will not exceed the spark.cores.max setting.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.mesos.executor.home</code></td>
+  <td>SPARK_HOME</td>
+  <td>
+    The location where the mesos executor will look for Spark binaries to execute, and uses the SPARK_HOME setting on default.
+    This variable is only used when no spark.executor.uri is provided, and assumes Spark is installed on the specified location
+    on each slave.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.mesos.executor.memoryOverhead</code></td>
+  <td>384</td>
+  <td>
+    The amount of memory that Mesos executor will request for the task to account for the overhead of running the executor itself.
+    The final total amount of memory allocated is the maximum value between executor memory plus memoryOverhead, and overhead fraction (1.07) plus the executor memory.
+  </td>
+</tr>
+</table>
+
 # Troubleshooting and Debugging
 
 A few places to look during debugging:
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 695813a2ba881..68ab127bcf087 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -4,7 +4,7 @@ title: Running Spark on YARN
 ---
 
 Support for running on [YARN (Hadoop
-NextGen)](http://hadoop.apache.org/docs/r2.0.2-alpha/hadoop-yarn/hadoop-yarn-site/YARN.html)
+NextGen)](http://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/YARN.html)
 was added to Spark in version 0.6.0, and improved in subsequent releases.
 
 # Preparations
@@ -22,15 +22,42 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
 <tr>
-  <td><code>spark.yarn.applicationMaster.waitTries</code></td>
-  <td>10</td>
+  <td><code>spark.yarn.am.memory</code></td>
+  <td>512m</td>
   <td>
-    Set the number of times the ApplicationMaster waits for the the Spark master and then also the number of tries it waits for the SparkContext to be initialized
+    Amount of memory to use for the YARN Application Master in client mode, in the same format as JVM memory strings (e.g. <code>512m</code>, <code>2g</code>).
+    In cluster mode, use <code>spark.driver.memory</code> instead.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.driver.cores</code></td>
+  <td>1</td>
+  <td>
+    Number of cores used by the driver in YARN cluster mode.
+    Since the driver is run in the same JVM as the YARN Application Master in cluster mode, this also controls the cores used by the YARN AM.
+    In client mode, use <code>spark.yarn.am.cores</code> to control the number of cores used by the YARN AM instead.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.yarn.am.cores</code></td>
+  <td>1</td>
+  <td>
+    Number of cores to use for the YARN Application Master in client mode.
+    In cluster mode, use <code>spark.driver.cores</code> instead.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.yarn.am.waitTime</code></td>
+  <td>100000</td>
+  <td>
+    In yarn-cluster mode, time in milliseconds for the application master to wait for the
+    SparkContext to be initialized. In yarn-client mode, time for the application master to wait
+    for the driver to connect to it.
   </td>
 </tr>
 <tr>
   <td><code>spark.yarn.submit.file.replication</code></td>
-  <td>3</td>
+  <td>The default HDFS replication (usually 3)</td>
   <td>
     HDFS replication level for the files uploaded into HDFS for the application. These include things like the Spark jar, the app jar, and any distributed cache files/archives.
   </td>
@@ -39,7 +66,7 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
   <td><code>spark.yarn.preserve.staging.files</code></td>
   <td>false</td>
   <td>
-    Set to true to preserve the staged files (Spark jar, app jar, distributed cache files) at the end of the job rather then delete them.
+    Set to true to preserve the staged files (Spark jar, app jar, distributed cache files) at the end of the job rather than delete them.
   </td>
 </tr>
 <tr>
@@ -88,7 +115,21 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
   <td><code>spark.yarn.driver.memoryOverhead</code></td>
   <td>driverMemory * 0.07, with minimum of 384 </td>
   <td>
-    The amount of off heap memory (in megabytes) to be allocated per driver. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the container size (typically 6-10%).
+    The amount of off heap memory (in megabytes) to be allocated per driver in cluster mode. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the container size (typically 6-10%).
+  </td>
+</tr>
+<tr>
+  <td><code>spark.yarn.am.memoryOverhead</code></td>
+  <td>AM memory * 0.07, with minimum of 384 </td>
+  <td>
+    Same as <code>spark.yarn.driver.memoryOverhead</code>, but for the Application Master in client mode.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.yarn.queue</code></td>
+  <td>default</td>
+  <td>
+    The name of the YARN queue to which the application is submitted.
   </td>
 </tr>
 <tr>
@@ -132,6 +173,22 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
     The maximum number of threads to use in the application master for launching executor containers.
   </td>
 </tr>
+<tr>
+  <td><code>spark.yarn.am.extraJavaOptions</code></td>
+  <td>(none)</td>
+  <td>
+  A string of extra JVM options to pass to the YARN Application Master in client mode.
+  In cluster mode, use spark.driver.extraJavaOptions instead.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.yarn.maxAppAttempts</code></td>
+  <td>yarn.resourcemanager.am.max-attempts in YARN</td>
+  <td>
+  The maximum number of attempts that will be made to submit the application.
+  It should be no larger than the global number of max attempts in the YARN configuration.
+  </td>
+</tr>
 </table>
 
 # Launching Spark on YARN
@@ -159,7 +216,7 @@ For example:
         lib/spark-examples*.jar \
         10
 
-The above starts a YARN client program which starts the default Application Master. Then SparkPi will be run as a child thread of Application Master. The client will periodically poll the Application Master for status updates and display them in the console. The client will exit once your application has finished running.  Refer to the "Viewing Logs" section below for how to see driver and executor logs.
+The above starts a YARN client program which starts the default Application Master. Then SparkPi will be run as a child thread of Application Master. The client will periodically poll the Application Master for status updates and display them in the console. The client will exit once your application has finished running.  Refer to the "Debugging your Application" section below for how to see driver and executor logs.
 
 To launch a Spark application in yarn-client mode, do the same, but replace "yarn-cluster" with "yarn-client".  To run spark-shell:
 
@@ -181,7 +238,7 @@ In YARN terminology, executors and application masters run inside "containers".
 
     yarn logs -applicationId <app ID>
     
-will print out the contents of all log files from all containers from the given application.
+will print out the contents of all log files from all containers from the given application. You can also view the container log files directly in HDFS using the HDFS shell or API. The directory where they are located can be found by looking at your YARN configs (`yarn.nodemanager.remote-app-log-dir` and `yarn.nodemanager.remote-app-log-dir-suffix`).
 
 When log aggregation isn't turned on, logs are retained locally on each machine under `YARN_APP_LOGS_DIR`, which is usually configured to `/tmp/logs` or `$HADOOP_HOME/logs/userlogs` depending on the Hadoop version and installation. Viewing logs for a container requires going to the host that contains them and looking in this directory.  Subdirectories organize log files by application ID and container ID.
 
@@ -194,22 +251,22 @@ settings and a restart of all node managers. Thus, this is not applicable to hos
 
 To use a custom log4j configuration for the application master or executors, there are two options:
 
-- upload a custom log4j.properties using spark-submit, by adding it to the "--files" list of files
+- upload a custom `log4j.properties` using `spark-submit`, by adding it to the `--files` list of files
   to be uploaded with the application.
-- add "-Dlog4j.configuration=<location of configuration file>" to "spark.driver.extraJavaOptions"
-  (for the driver) or "spark.executor.extraJavaOptions" (for executors). Note that if using a file,
-  the "file:" protocol should be explicitly provided, and the file needs to exist locally on all
+- add `-Dlog4j.configuration=<location of configuration file>` to `spark.driver.extraJavaOptions`
+  (for the driver) or `spark.executor.extraJavaOptions` (for executors). Note that if using a file,
+  the `file:` protocol should be explicitly provided, and the file needs to exist locally on all
   the nodes.
 
 Note that for the first option, both executors and the application master will share the same
 log4j configuration, which may cause issues when they run on the same node (e.g. trying to write
 to the same log file).
 
-If you need a reference to the proper location to put log files in the YARN so that YARN can properly display and aggregate them, use "${spark.yarn.app.container.log.dir}" in your log4j.properties. For example, log4j.appender.file_appender.File=${spark.yarn.app.container.log.dir}/spark.log. For streaming application, configuring RollingFileAppender and setting file location to YARN's log directory will avoid disk overflow caused by large log file, and logs can be accessed using YARN's log utility.
+If you need a reference to the proper location to put log files in the YARN so that YARN can properly display and aggregate them, use `spark.yarn.app.container.log.dir` in your log4j.properties. For example, `log4j.appender.file_appender.File=${spark.yarn.app.container.log.dir}/spark.log`. For streaming application, configuring `RollingFileAppender` and setting file location to YARN's log directory will avoid disk overflow caused by large log file, and logs can be accessed using YARN's log utility.
 
 # Important notes
 
-- Before Hadoop 2.2, YARN does not support cores in container resource requests. Thus, when running against an earlier version, the numbers of cores given via command line arguments cannot be passed to YARN.  Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured.
+- Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured.
 - The local directories used by Spark executors will be the local directories configured for YARN (Hadoop YARN config `yarn.nodemanager.local-dirs`). If the user specifies `spark.local.dir`, it will be ignored.
 - The `--files` and `--archives` options support specifying file names with the # similar to Hadoop. For example you can specify: `--files localtest.txt#appSees.txt` and this will upload the file you have locally named localtest.txt into HDFS but this will be linked to by the name `appSees.txt`, and your application should use the name as `appSees.txt` to reference it when running on YARN.
 - The `--jars` option allows the `SparkContext.addJar` function to work if you are using it with local files and running in `yarn-cluster` mode. It does not need to be used if you are using it with HDFS, HTTP, HTTPS, or FTP files.
diff --git a/docs/security.md b/docs/security.md
index ec0523184d665..6e0a54fbc4ad7 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -7,7 +7,6 @@ Spark currently supports authentication via a shared secret. Authentication can
 
 * For Spark on [YARN](running-on-yarn.html) deployments, configuring `spark.authenticate` to `true` will automatically handle generating and distributing the shared secret. Each application will use a unique shared secret. 
 * For other types of Spark deployments, the Spark parameter `spark.authenticate.secret` should be configured on each of the nodes. This secret will be used by all the Master/Workers and applications.
-* **IMPORTANT NOTE:** *The experimental Netty shuffle path (`spark.shuffle.use.netty`) is not secured, so do not use Netty for shuffles if running with authentication.*
 
 ## Web UI
 
@@ -21,6 +20,30 @@ Spark allows for a set of administrators to be specified in the acls who always
 
 If your applications are using event logging, the directory where the event logs go (`spark.eventLog.dir`) should be manually created and have the proper permissions set on it. If you want those log files secured, the permissions should be set to `drwxrwxrwxt` for that directory. The owner of the directory should be the super user who is running the history server and the group permissions should be restricted to super user group. This will allow all users to write to the directory but will prevent unprivileged users from removing or renaming a file unless they own the file or directory. The event log files will be created by Spark with permissions such that only the user and group have read and write access.
 
+## Encryption
+
+Spark supports SSL for Akka and HTTP (for broadcast and file server) protocols. However SSL is not supported yet for WebUI and block transfer service.
+
+Connection encryption (SSL) configuration is organized hierarchically. The user can configure the default SSL settings which will be used for all the supported communication protocols unless they are overwritten by protocol-specific settings. This way the user can easily provide the common settings for all the protocols without disabling the ability to configure each one individually. The common SSL settings are at `spark.ssl` namespace in Spark configuration, while Akka SSL configuration is at `spark.ssl.akka` and HTTP for broadcast and file server SSL configuration is at `spark.ssl.fs`. The full breakdown can be found on the [configuration page](configuration.html).
+
+SSL must be configured on each node and configured for each component involved in communication using the particular protocol.
+
+### YARN mode
+The key-store can be prepared on the client side and then distributed and used by the executors as the part of the application. It is possible because the user is able to deploy files before the application is started in YARN by using `spark.yarn.dist.files` or `spark.yarn.dist.archives` configuration settings. The responsibility for encryption of transferring these files is on YARN side and has nothing to do with Spark.
+
+### Standalone mode
+The user needs to provide key-stores and configuration options for master and workers. They have to be set by attaching appropriate Java system properties in `SPARK_MASTER_OPTS` and in `SPARK_WORKER_OPTS` environment variables, or just in `SPARK_DAEMON_JAVA_OPTS`. In this mode, the user may allow the executors to use the SSL settings inherited from the worker which spawned that executor. It can be accomplished by setting `spark.ssl.useNodeLocalConf` to `true`. If that parameter is set, the settings provided by user on the client side, are not used by the executors.
+
+### Preparing the key-stores
+Key-stores can be generated by `keytool` program. The reference documentation for this tool is
+[here](https://docs.oracle.com/javase/7/docs/technotes/tools/solaris/keytool.html). The most basic
+steps to configure the key-stores and the trust-store for the standalone deployment mode is as
+follows:
+* Generate a keys pair for each node
+* Export the public key of the key pair to a file on each node
+* Import all exported public keys into a single trust-store
+* Distribute the trust-store over the nodes
+
 ## Configuring Ports for Network Security
 
 Spark makes heavy use of the network, and some environments have strict requirements for using tight
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index a3028aa86dc45..5c6084fb46255 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -10,7 +10,7 @@ In addition to running on the Mesos or YARN cluster managers, Spark also provide
 
 # Installing Spark Standalone to a Cluster
 
-To install Spark Standalone mode, you simply place a compiled version of Spark on each node on the cluster. You can obtain pre-built versions of Spark with each release or [build it yourself](index.html#building).
+To install Spark Standalone mode, you simply place a compiled version of Spark on each node on the cluster. You can obtain pre-built versions of Spark with each release or [build it yourself](building-spark.html).
 
 # Starting a Cluster Manually
 
@@ -34,8 +34,12 @@ Finally, the following configuration options can be passed to the master and wor
 <table class="table">
   <tr><th style="width:21%">Argument</th><th>Meaning</th></tr>
   <tr>
-    <td><code>-i IP</code>, <code>--ip IP</code></td>
-    <td>IP address or DNS name to listen on</td>
+    <td><code>-h HOST</code>, <code>--host HOST</code></td>
+    <td>Hostname to listen on</td>
+  </tr>
+  <tr>
+    <td><code>-i HOST</code>, <code>--ip HOST</code></td>
+    <td>Hostname to listen on (deprecated, use -h or --host)</td>
   </tr>
   <tr>
     <td><code>-p PORT</code>, <code>--port PORT</code></td>
@@ -57,6 +61,10 @@ Finally, the following configuration options can be passed to the master and wor
     <td><code>-d DIR</code>, <code>--work-dir DIR</code></td>
     <td>Directory to use for scratch space and job output logs (default: SPARK_HOME/work); only on worker</td>
   </tr>
+  <tr>
+    <td><code>--properties-file FILE</code></td>
+    <td>Path to a custom Spark properties file to load (default: conf/spark-defaults.conf)</td>
+  </tr>
 </table>
 
 
@@ -249,7 +257,7 @@ To run an interactive Spark shell against the cluster, run the following command
 
 You can also pass an option `--total-executor-cores <numCores>` to control the number of cores that spark-shell uses on the cluster.
 
-# Launching Compiled Spark Applications
+# Launching Spark Applications
 
 The [`spark-submit` script](submitting-applications.html) provides the most straightforward way to
 submit a compiled Spark application to the cluster. For standalone clusters, Spark currently
@@ -264,6 +272,15 @@ should specify them through the `--jars` flag using comma as a delimiter (e.g. `
 To control the application's configuration or execution environment, see
 [Spark Configuration](configuration.html).
 
+Additionally, standalone `cluster` mode supports restarting your application automatically if it
+exited with non-zero exit code. To use this feature, you may pass in the `--supervise` flag to
+`spark-submit` when launching your application. Then, if you wish to kill an application that is
+failing repeatedly, you may do so through:
+
+    ./bin/spark-class org.apache.spark.deploy.Client kill <master url> <driver ID>
+
+You can find the driver ID through the standalone Master web UI at `http://<master url>:8080`.
+
 # Resource Scheduling
 
 The standalone cluster mode currently only supports a simple FIFO scheduler across applications.
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 368c3d0008b07..22664b419f5cb 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -14,7 +14,7 @@ title: Spark SQL Programming Guide
 Spark SQL allows relational queries expressed in SQL, HiveQL, or Scala to be executed using
 Spark.  At the core of this component is a new type of RDD,
 [SchemaRDD](api/scala/index.html#org.apache.spark.sql.SchemaRDD).  SchemaRDDs are composed of
-[Row](api/scala/index.html#org.apache.spark.sql.catalyst.expressions.Row) objects, along with
+[Row](api/scala/index.html#org.apache.spark.sql.package@Row:org.apache.spark.sql.catalyst.expressions.Row.type) objects, along with
 a schema that describes the data types of each column in the row.  A SchemaRDD is similar to a table
 in a traditional relational database.  A SchemaRDD can be created from an existing RDD, a [Parquet](http://parquet.io)
 file, a JSON dataset, or by running HiveQL against data stored in [Apache Hive](http://hive.apache.org/).
@@ -146,7 +146,7 @@ describes the various methods for loading data into a SchemaRDD.
 
 Spark SQL supports two different methods for converting existing RDDs into SchemaRDDs.  The first
 method uses reflection to infer the schema of an RDD that contains specific types of objects.  This
-reflection based approach leads to more concise code and works well when you already know the schema 
+reflection based approach leads to more concise code and works well when you already know the schema
 while writing your Spark application.
 
 The second method for creating SchemaRDDs is through a programmatic interface that allows you to
@@ -278,7 +278,7 @@ performed on JSON files.
 from pyspark.sql import SQLContext, Row
 sqlContext = SQLContext(sc)
 
-# Load a text file and convert each line to a dictionary.
+# Load a text file and convert each line to a Row.
 lines = sc.textFile("examples/src/main/resources/people.txt")
 parts = lines.map(lambda l: l.split(","))
 people = parts.map(lambda p: Row(name=p[0], age=int(p[1])))
@@ -566,7 +566,7 @@ for teenName in teenNames.collect():
 
 ### Configuration
 
-Configuration of Parquet can be done using the `setConf` method on SQLContext or by running 
+Configuration of Parquet can be done using the `setConf` method on SQLContext or by running
 `SET key=value` commands using SQL.
 
 <table class="table">
@@ -575,26 +575,53 @@ Configuration of Parquet can be done using the `setConf` method on SQLContext or
   <td><code>spark.sql.parquet.binaryAsString</code></td>
   <td>false</td>
   <td>
-    Some other Parquet-producing systems, in particular Impala and older versions of Spark SQL, do 
-    not differentiate between binary data and strings when writing out the Parquet schema.  This 
+    Some other Parquet-producing systems, in particular Impala and older versions of Spark SQL, do
+    not differentiate between binary data and strings when writing out the Parquet schema.  This
     flag tells Spark SQL to interpret binary data as a string to provide compatibility with these systems.
   </td>
 </tr>
+<tr>
+  <td><code>spark.sql.parquet.int96AsTimestamp</code></td>
+  <td>true</td>
+  <td>
+    Some Parquet-producing systems, in particular Impala, store Timestamp into INT96. Spark would also
+    store Timestamp as INT96 because we need to avoid precision lost of the nanoseconds field. This
+    flag tells Spark SQL to interpret INT96 data as a timestamp to provide compatibility with these systems.
+  </td>
+</tr>
 <tr>
   <td><code>spark.sql.parquet.cacheMetadata</code></td>
-  <td>false</td>
+  <td>true</td>
   <td>
     Turns on caching of Parquet schema metadata.  Can speed up querying of static data.
   </td>
 </tr>
 <tr>
   <td><code>spark.sql.parquet.compression.codec</code></td>
-  <td>snappy</td>
+  <td>gzip</td>
   <td>
-    Sets the compression codec use when writing Parquet files. Acceptable values include: 
+    Sets the compression codec use when writing Parquet files. Acceptable values include:
     uncompressed, snappy, gzip, lzo.
   </td>
 </tr>
+<tr>
+  <td><code>spark.sql.parquet.filterPushdown</code></td>
+  <td>false</td>
+  <td>
+    Turn on Parquet filter pushdown optimization. This feature is turned off by default because of a known
+    bug in Paruet 1.6.0rc3 (<a href="https://issues.apache.org/jira/browse/PARQUET-136">PARQUET-136</a>).
+    However, if your table doesn't contain any nullable string or binary columns, it's still safe to turn
+    this feature on.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.sql.hive.convertMetastoreParquet</code></td>
+  <td>true</td>
+  <td>
+    When set to false, Spark SQL will use the Hive SerDe for parquet tables instead of the built in
+    support.
+  </td>
+</tr>
 </table>
 
 ## JSON Datasets
@@ -607,6 +634,10 @@ This conversion can be done using one of two methods in a SQLContext:
 * `jsonFile` - loads data from a directory of JSON files where each line of the files is a JSON object.
 * `jsonRDD` - loads data from an existing RDD where each element of the RDD is a string containing a JSON object.
 
+Note that the file that is offered as _jsonFile_ is not a typical JSON file. Each
+line must contain a separate, self-contained valid JSON object. As a consequence,
+a regular multi-line JSON file will most often fail.
+
 {% highlight scala %}
 // sc is an existing SparkContext.
 val sqlContext = new org.apache.spark.sql.SQLContext(sc)
@@ -645,6 +676,10 @@ This conversion can be done using one of two methods in a JavaSQLContext :
 * `jsonFile` - loads data from a directory of JSON files where each line of the files is a JSON object.
 * `jsonRDD` - loads data from an existing RDD where each element of the RDD is a string containing a JSON object.
 
+Note that the file that is offered as _jsonFile_ is not a typical JSON file. Each
+line must contain a separate, self-contained valid JSON object. As a consequence,
+a regular multi-line JSON file will most often fail.
+
 {% highlight java %}
 // sc is an existing JavaSparkContext.
 JavaSQLContext sqlContext = new org.apache.spark.sql.api.java.JavaSQLContext(sc);
@@ -683,6 +718,10 @@ This conversion can be done using one of two methods in a SQLContext:
 * `jsonFile` - loads data from a directory of JSON files where each line of the files is a JSON object.
 * `jsonRDD` - loads data from an existing RDD where each element of the RDD is a string containing a JSON object.
 
+Note that the file that is offered as _jsonFile_ is not a typical JSON file. Each
+line must contain a separate, self-contained valid JSON object. As a consequence,
+a regular multi-line JSON file will most often fail.
+
 {% highlight python %}
 # sc is an existing SparkContext.
 from pyspark.sql import SQLContext
@@ -720,7 +759,7 @@ anotherPeople = sqlContext.jsonRDD(anotherPeopleRDD)
 
 Spark SQL also supports reading and writing data stored in [Apache Hive](http://hive.apache.org/).
 However, since Hive has a large number of dependencies, it is not included in the default Spark assembly.
-In order to use Hive you must first run "`sbt/sbt -Phive assembly/assembly`" (or use `-Phive` for maven).
+Hive support is enabled by adding the `-Phive` and `-Phive-thriftserver` flags to Spark's build.
 This command builds a new assembly jar that includes Hive. Note that this Hive assembly jar must also be present
 on all of the worker nodes, as they will need access to the Hive serialization and deserialization libraries
 (SerDes) in order to access data stored in Hive.
@@ -801,13 +840,10 @@ turning on some experimental options.
 
 ## Caching Data In Memory
 
-Spark SQL can cache tables using an in-memory columnar format by calling `sqlContext.cacheTable("tableName")`.
+Spark SQL can cache tables using an in-memory columnar format by calling `sqlContext.cacheTable("tableName")` or `schemaRDD.cache()`.
 Then Spark SQL will scan only required columns and will automatically tune compression to minimize
 memory usage and GC pressure. You can call `sqlContext.uncacheTable("tableName")` to remove the table from memory.
 
-Note that if you call `schemaRDD.cache()` rather than `sqlContext.cacheTable(...)`, tables will _not_ be cached using
-the in-memory columnar format, and therefore `sqlContext.cacheTable(...)` is strongly recommended for this use case.
-
 Configuration of in-memory caching can be done using the `setConf` method on SQLContext or by running
 `SET key=value` commands using SQL.
 
@@ -815,7 +851,7 @@ Configuration of in-memory caching can be done using the `setConf` method on SQL
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
 <tr>
   <td><code>spark.sql.inMemoryColumnarStorage.compressed</code></td>
-  <td>false</td>
+  <td>true</td>
   <td>
     When set to true Spark SQL will automatically select a compression codec for each column based
     on statistics of the data.
@@ -823,7 +859,7 @@ Configuration of in-memory caching can be done using the `setConf` method on SQL
 </tr>
 <tr>
   <td><code>spark.sql.inMemoryColumnarStorage.batchSize</code></td>
-  <td>1000</td>
+  <td>10000</td>
   <td>
     Controls the size of batches for columnar caching.  Larger batch sizes can improve memory utilization
     and compression, but risk OOMs when caching data.
@@ -841,7 +877,7 @@ that these options will be deprecated in future release as more optimizations ar
   <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
   <tr>
     <td><code>spark.sql.autoBroadcastJoinThreshold</code></td>
-    <td>10000</td>
+    <td>10485760 (10 MB)</td>
     <td>
       Configures the maximum size in bytes for a table that will be broadcast to all worker nodes when
       performing a join.  By setting this value to -1 broadcasting can be disabled.  Note that currently
@@ -892,7 +928,6 @@ export HIVE_SERVER2_THRIFT_BIND_HOST=<listening-host>
 ./sbin/start-thriftserver.sh \
   --master <master-uri> \
   ...
-```
 {% endhighlight %}
 
 or system properties:
@@ -903,7 +938,6 @@ or system properties:
   --hiveconf hive.server2.thrift.bind.host=<listening-host> \
   --master <master-uri>
   ...
-```
 {% endhighlight %}
 
 Now you can use beeline to test the Thrift JDBC/ODBC server:
@@ -922,6 +956,18 @@ Configuration of Hive is done by placing your `hive-site.xml` file in `conf/`.
 
 You may also use the beeline script that comes with Hive.
 
+Thrift JDBC server also supports sending thrift RPC messages over HTTP transport. 
+Use the following setting to enable HTTP mode as system property or in `hive-site.xml` file in `conf/`: 
+
+    hive.server2.transport.mode - Set this to value: http 
+    hive.server2.thrift.http.port - HTTP port number fo listen on; default is 10001
+    hive.server2.http.endpoint - HTTP endpoint; default is cliservice
+
+To test, use beeline to connect to the JDBC/ODBC server in http mode with:
+
+    beeline> !connect jdbc:hive2://<host>:<port>/<database>?hive.server2.transport.mode=http;hive.server2.thrift.http.path=<http_endpoint>
+
+
 ## Running the Spark SQL CLI
 
 The Spark SQL CLI is a convenient tool to run the Hive metastore service in local mode and execute
@@ -939,7 +985,7 @@ options.
 
 ## Migration Guide for Shark User
 
-### Scheduling 
+### Scheduling
 To set a [Fair Scheduler](job-scheduling.html#fair-scheduler-pools) pool for a JDBC client session,
 users can set the `spark.sql.thriftserver.scheduler.pool` variable:
 
@@ -970,12 +1016,11 @@ let user control table caching explicitly:
     CACHE TABLE logs_last_month;
     UNCACHE TABLE logs_last_month;
 
-**NOTE:** `CACHE TABLE tbl` is lazy, similar to `.cache` on an RDD. This command only marks `tbl` to ensure that
-partitions are cached when calculated but doesn't actually cache it until a query that touches `tbl` is executed.
-To force the table to be cached, you may simply count the table immediately after executing `CACHE TABLE`:
+**NOTE:** `CACHE TABLE tbl` is now __eager__ by default not __lazy__. Don’t need to trigger cache materialization manually anymore.
 
-    CACHE TABLE logs_last_month;
-    SELECT COUNT(1) FROM logs_last_month;
+Spark SQL newly introduced a statement to let user control table caching whether or not lazy since Spark 1.2.0:
+
+	CACHE [LAZY] TABLE [AS SELECT] ...
 
 Several caching related features are not supported yet:
 
@@ -986,7 +1031,7 @@ Several caching related features are not supported yet:
 ## Compatibility with Apache Hive
 
 Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs.  Currently Spark
-SQL is based on Hive 0.12.0.
+SQL is based on Hive 0.12.0 and 0.13.1.
 
 #### Deploying in Existing Hive Warehouses
 
@@ -1025,6 +1070,7 @@ Spark SQL supports the vast majority of Hive features, such as:
 * Sampling
 * Explain
 * Partitioned tables
+* View
 * All Hive DDL Functions, including:
   * `CREATE TABLE`
   * `CREATE TABLE AS SELECT`
@@ -1040,6 +1086,7 @@ Spark SQL supports the vast majority of Hive features, such as:
   * `STRING`
   * `BINARY`
   * `TIMESTAMP`
+  * `DATE`
   * `ARRAY<>`
   * `MAP<>`
   * `STRUCT<>`
@@ -1051,7 +1098,6 @@ in Hive deployments.
 
 **Major Hive Features**
 
-* Spark SQL does not currently support inserting to tables using dynamic partitioning.
 * Tables with buckets: bucket is the hash partitioning within a Hive table partition. Spark SQL
   doesn't support buckets yet.
 
@@ -1141,6 +1187,7 @@ evaluated by the SQL execution engine.  A full list of the functions supported c
 * Datetime type
     - `TimestampType`: Represents values comprising values of fields year, month, day,
     hour, minute, and second.
+    - `DateType`: Represents values comprising values of fields year, month, day.
 * Complex types
     - `ArrayType(elementType, containsNull)`: Represents values comprising a sequence of
     elements with the type of `elementType`. `containsNull` is used to indicate if
@@ -1215,7 +1262,7 @@ import  org.apache.spark.sql._
 </tr>
 <tr>
   <td> <b>DecimalType</b> </td>
-  <td> scala.math.sql.BigDecimal </td>
+  <td> scala.math.BigDecimal </td>
   <td>
   DecimalType
   </td>
@@ -1248,6 +1295,13 @@ import  org.apache.spark.sql._
   TimestampType
   </td>
 </tr>
+<tr>
+  <td> <b>DateType</b> </td>
+  <td> java.sql.Date </td>
+  <td>
+  DateType
+  </td>
+</tr>
 <tr>
   <td> <b>ArrayType</b> </td>
   <td> scala.collection.Seq </td>
@@ -1288,9 +1342,9 @@ import  org.apache.spark.sql._
 <div data-lang="java" markdown="1">
 
 All data types of Spark SQL are located in the package of
-`org.apache.spark.sql.api.java`. To access or create a data type,
+`org.apache.spark.sql.types`. To access or create a data type,
 please use factory methods provided in
-`org.apache.spark.sql.api.java.DataType`.
+`org.apache.spark.sql.types.DataTypes`.
 
 <table class="table">
 <tr>
@@ -1301,102 +1355,110 @@ please use factory methods provided in
   <td> <b>ByteType</b> </td>
   <td> byte or Byte </td>
   <td>
-  DataType.ByteType
+  DataTypes.ByteType
   </td>
 </tr>
 <tr>
   <td> <b>ShortType</b> </td>
   <td> short or Short </td>
   <td>
-  DataType.ShortType
+  DataTypes.ShortType
   </td>
 </tr>
 <tr>
   <td> <b>IntegerType</b> </td>
   <td> int or Integer </td>
   <td>
-  DataType.IntegerType
+  DataTypes.IntegerType
   </td>
 </tr>
 <tr>
   <td> <b>LongType</b> </td>
   <td> long or Long </td>
   <td>
-  DataType.LongType
+  DataTypes.LongType
   </td>
 </tr>
 <tr>
   <td> <b>FloatType</b> </td>
   <td> float or Float </td>
   <td>
-  DataType.FloatType
+  DataTypes.FloatType
   </td>
 </tr>
 <tr>
   <td> <b>DoubleType</b> </td>
   <td> double or Double </td>
   <td>
-  DataType.DoubleType
+  DataTypes.DoubleType
   </td>
 </tr>
 <tr>
   <td> <b>DecimalType</b> </td>
   <td> java.math.BigDecimal </td>
   <td>
-  DataType.DecimalType
+  DataTypes.createDecimalType()<br />
+  DataTypes.createDecimalType(<i>precision</i>, <i>scale</i>).
   </td>
 </tr>
 <tr>
   <td> <b>StringType</b> </td>
   <td> String </td>
   <td>
-  DataType.StringType
+  DataTypes.StringType
   </td>
 </tr>
 <tr>
   <td> <b>BinaryType</b> </td>
   <td> byte[] </td>
   <td>
-  DataType.BinaryType
+  DataTypes.BinaryType
   </td>
 </tr>
 <tr>
   <td> <b>BooleanType</b> </td>
   <td> boolean or Boolean </td>
   <td>
-  DataType.BooleanType
+  DataTypes.BooleanType
   </td>
 </tr>
 <tr>
   <td> <b>TimestampType</b> </td>
   <td> java.sql.Timestamp </td>
   <td>
-  DataType.TimestampType
+  DataTypes.TimestampType
+  </td>
+</tr>
+<tr>
+  <td> <b>DateType</b> </td>
+  <td> java.sql.Date </td>
+  <td>
+  DataTypes.DateType
   </td>
 </tr>
 <tr>
   <td> <b>ArrayType</b> </td>
   <td> java.util.List </td>
   <td>
-  DataType.createArrayType(<i>elementType</i>)<br />
+  DataTypes.createArrayType(<i>elementType</i>)<br />
   <b>Note:</b> The value of <i>containsNull</i> will be <i>true</i><br />
-  DataType.createArrayType(<i>elementType</i>, <i>containsNull</i>).
+  DataTypes.createArrayType(<i>elementType</i>, <i>containsNull</i>).
   </td>
 </tr>
 <tr>
   <td> <b>MapType</b> </td>
   <td> java.util.Map </td>
   <td>
-  DataType.createMapType(<i>keyType</i>, <i>valueType</i>)<br />
+  DataTypes.createMapType(<i>keyType</i>, <i>valueType</i>)<br />
   <b>Note:</b> The value of <i>valueContainsNull</i> will be <i>true</i>.<br />
-  DataType.createMapType(<i>keyType</i>, <i>valueType</i>, <i>valueContainsNull</i>)<br />
+  DataTypes.createMapType(<i>keyType</i>, <i>valueType</i>, <i>valueContainsNull</i>)<br />
   </td>
 </tr>
 <tr>
   <td> <b>StructType</b> </td>
   <td> org.apache.spark.sql.api.java.Row </td>
   <td>
-  DataType.createStructType(<i>fields</i>)<br />
+  DataTypes.createStructType(<i>fields</i>)<br />
   <b>Note:</b> <i>fields</i> is a List or an array of StructFields.
   Also, two fields with the same name are not allowed.
   </td>
@@ -1406,7 +1468,7 @@ please use factory methods provided in
   <td> The value type in Java of the data type of this field
   (For example, int for a StructField with the data type IntegerType) </td>
   <td>
-  DataType.createStructField(<i>name</i>, <i>dataType</i>, <i>nullable</i>)
+  DataTypes.createStructField(<i>name</i>, <i>dataType</i>, <i>nullable</i>)
   </td>
 </tr>
 </table>
@@ -1521,6 +1583,13 @@ from pyspark.sql import *
   TimestampType()
   </td>
 </tr>
+<tr>
+  <td> <b>DateType</b> </td>
+  <td> datetime.date </td>
+  <td>
+  DateType()
+  </td>
+</tr>
 <tr>
   <td> <b>ArrayType</b> </td>
   <td> list, tuple, or array </td>
diff --git a/docs/streaming-custom-receivers.md b/docs/streaming-custom-receivers.md
index 27cd085782f66..6a2048121f8bf 100644
--- a/docs/streaming-custom-receivers.md
+++ b/docs/streaming-custom-receivers.md
@@ -7,25 +7,30 @@ Spark Streaming can receive streaming data from any arbitrary data source beyond
 the one's for which it has in-built support (that is, beyond Flume, Kafka, Kinesis, files, sockets, etc.).
 This requires the developer to implement a *receiver* that is customized for receiving data from
 the concerned data source. This guide walks through the process of implementing a custom receiver
-and using it in a Spark Streaming application.
+and using it in a Spark Streaming application. Note that custom receivers can be implemented
+in Scala or Java.
 
-### Implementing a Custom Receiver
+## Implementing a Custom Receiver
 
-This starts with implementing a [Receiver](api/scala/index.html#org.apache.spark.streaming.receiver.Receiver).
+This starts with implementing a **Receiver**
+([Scala doc](api/scala/index.html#org.apache.spark.streaming.receiver.Receiver),
+[Java doc](api/java/org/apache/spark/streaming/receiver/Receiver.html)).
 A custom receiver must extend this abstract class by implementing two methods
+
 - `onStart()`: Things to do to start receiving data.
 - `onStop()`: Things to do to stop receiving data.
 
-Note that `onStart()` and `onStop()` must not block indefinitely. Typically, onStart() would start the threads
+Both `onStart()` and `onStop()` must not block indefinitely. Typically, `onStart()` would start the threads
 that responsible for receiving the data and `onStop()` would ensure that the receiving by those threads
 are stopped. The receiving threads can also use `isStopped()`, a `Receiver` method, to check whether they
 should stop receiving data.
 
 Once the data is received, that data can be stored inside Spark
-by calling `store(data)`, which is a method provided by the
-[Receiver](api/scala/index.html#org.apache.spark.streaming.receiver.Receiver) class.
+by calling `store(data)`, which is a method provided by the Receiver class.
 There are number of flavours of `store()` which allow you store the received data
-record-at-a-time or as whole collection of objects / serialized bytes.
+record-at-a-time or as whole collection of objects / serialized bytes. Note that the flavour of
+`store()` used to implemented a receiver affects its reliability and fault-tolerance semantics.
+This is discussed [later](#receiver-reliability) in more detail.
 
 Any exception in the receiving threads should be caught and handled properly to avoid silent
 failures of the receiver. `restart(<exception>)` will restart the receiver by
@@ -158,7 +163,7 @@ public class JavaCustomReceiver extends Receiver<String> {
 </div>
 
 
-### Using the custom receiver in a Spark Streaming application
+## Using the custom receiver in a Spark Streaming application
 
 The custom receiver can be used in a Spark Streaming application by using
 `streamingContext.receiverStream(<instance of custom receiver>)`. This will create
@@ -191,9 +196,68 @@ The full source code is in the example [JavaCustomReceiver.java](https://github.
 </div>
 </div>
 
-
-
-### Implementing and Using a Custom Actor-based Receiver
+## Receiver Reliability
+As discussed in brief in the
+[Spark Streaming Programming Guide](streaming-programming-guide.html#receiver-reliability),
+there are two kinds of receivers based on their reliability and fault-tolerance semantics.
+
+1. *Reliable Receiver* - For *reliable sources* that allow sent data to be acknowledged, a
+  *reliable receiver* correctly acknowledges to the source that the data has been received
+  and stored in Spark reliably (that is, replicated successfully). Usually,
+  implementing this receiver involves careful consideration of the semantics of source
+  acknowledgements.
+1. *Unreliable Receiver* - These are receivers for unreliable sources that do not support
+  acknowledging. Even for reliable sources, one may implement an unreliable receiver that
+  do not go into the complexity of acknowledging correctly.
+
+To implement a *reliable receiver*, you have to use `store(multiple-records)` to store data.
+This flavour of `store` is a blocking call which returns only after all the given records have
+been stored inside Spark. If the receiver's configured storage level uses replication
+(enabled by default), then this call returns after replication has completed.
+Thus it ensures that the data is reliably stored, and the receiver can now acknowledge the
+source appropriately. This ensures that no data is caused when the receiver fails in the middle
+of replicating data -- the buffered data will not be acknowledged and hence will be later resent
+by the source.
+
+An *unreliable receiver* does not have to implement any of this logic. It can simply receive
+records from the source and insert them one-at-a-time using `store(single-record)`. While it does
+not get the reliability guarantees of `store(multiple-records)`, it has the following advantages.
+
+- The system takes care of chunking that data into appropriate sized blocks (look for block
+interval in the [Spark Streaming Programming Guide](streaming-programming-guide.html)).
+- The system takes care of controlling the receiving rates if the rate limits have been specified.
+- Because of these two, unreliable receivers are simpler to implement than reliable receivers.
+
+The following table summarizes the characteristics of both types of receivers
+
+<table class="table">
+<tr>
+  <th>Receiver Type</th>
+  <th>Characteristics</th>
+</tr>
+<tr>
+  <td><b>Unreliable Receivers</b></td>
+  <td>
+    Simple to implement.<br>
+    System takes care of block generation and rate control.
+    No fault-tolerance guarantees, can lose data on receiver failure.
+  </td>
+</tr>
+<tr>
+  <td><b>Reliable Receivers</b></td>
+  <td>
+    Strong fault-tolerance guarantees, can ensure zero data loss.<br/>
+    Block generation and rate control to be handled by the receiver implementation.<br/>
+    Implementation complexity depends on the acknowledgement mechanisms of the source.
+  </td>
+</tr>
+<tr>
+  <td></td>
+  <td></td>
+</tr>
+</table>
+
+## Implementing and Using a Custom Actor-based Receiver
 
 Custom [Akka Actors](http://doc.akka.io/docs/akka/2.2.4/scala/actors.html) can also be used to
 receive data. The [`ActorHelper`](api/scala/index.html#org.apache.spark.streaming.receiver.ActorHelper)
@@ -203,7 +267,7 @@ trait can be applied on any Akka actor, which allows received data to be stored
 {% highlight scala %}
 class CustomActor extends Actor with ActorHelper {
   def receive = {
-   case data: String => store(data)
+    case data: String => store(data)
   }
 }
 {% endhighlight %}
@@ -217,5 +281,3 @@ val lines = ssc.actorStream[String](Props(new CustomActor()), "CustomReceiver")
 
 See [ActorWordCount.scala](https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/ActorWordCount.scala)
 for an end-to-end example.
-
-
diff --git a/docs/streaming-flume-integration.md b/docs/streaming-flume-integration.md
index d57c3e0ef9ba0..ac01dd3d8019a 100644
--- a/docs/streaming-flume-integration.md
+++ b/docs/streaming-flume-integration.md
@@ -66,9 +66,16 @@ configuring Flume agents.
 
 ## Approach 2 (Experimental): Pull-based Approach using a Custom Sink
 Instead of Flume pushing data directly to Spark Streaming, this approach runs a custom Flume sink that allows the following.
+
 - Flume pushes data into the sink, and the data stays buffered.
-- Spark Streaming uses transactions to pull data from the sink. Transactions succeed only after data is received and replicated by Spark Streaming.
-This ensures that better reliability and fault-tolerance than the previous approach. However, this requires configuring Flume to run a custom sink. Here are the configuration steps.
+- Spark Streaming uses a [reliable Flume receiver](streaming-programming-guide.html#receiver-reliability)
+  and transactions to pull data from the sink. Transactions succeed only after data is received and
+  replicated by Spark Streaming.
+
+This ensures stronger reliability and
+[fault-tolerance guarantees](streaming-programming-guide.html#fault-tolerance-semantics)
+than the previous approach. However, this requires configuring Flume to run a custom sink.
+Here are the configuration steps.
 
 #### General Requirements
 Choose a machine that will run the custom sink in a Flume agent. The rest of the Flume pipeline is configured to send data to that agent. Machines in the Spark cluster should have access to the chosen machine running the custom sink.
@@ -104,7 +111,7 @@ See the [Flume's documentation](https://flume.apache.org/documentation.html) for
 configuring Flume agents.
 
 #### Configuring Spark Streaming Application
-1. **Linking:** In your SBT/Maven projrect definition, link your streaming application against the `spark-streaming-flume_{{site.SCALA_BINARY_VERSION}}` (see [Linking section](streaming-programming-guide.html#linking) in the main programming guide).
+1. **Linking:** In your SBT/Maven project definition, link your streaming application against the `spark-streaming-flume_{{site.SCALA_BINARY_VERSION}}` (see [Linking section](streaming-programming-guide.html#linking) in the main programming guide).
 
 2. **Programming:** In the streaming application code, import `FlumeUtils` and create input DStream as follows.
 
diff --git a/docs/streaming-kafka-integration.md b/docs/streaming-kafka-integration.md
index a3b705d4c31d0..77c0abbbacbd0 100644
--- a/docs/streaming-kafka-integration.md
+++ b/docs/streaming-kafka-integration.md
@@ -4,7 +4,7 @@ title: Spark Streaming + Kafka Integration Guide
 ---
 [Apache Kafka](http://kafka.apache.org/) is publish-subscribe messaging rethought as a distributed, partitioned, replicated commit log service.  Here we explain how to configure Spark Streaming to receive data from Kafka.
 
-1. **Linking:** In your SBT/Maven projrect definition, link your streaming application against the following artifact (see [Linking section](streaming-programming-guide.html#linking) in the main programming guide for further information).
+1. **Linking:** In your SBT/Maven project definition, link your streaming application against the following artifact (see [Linking section](streaming-programming-guide.html#linking) in the main programming guide for further information).
 
 		groupId = org.apache.spark
 		artifactId = spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}}
@@ -20,7 +20,7 @@ title: Spark Streaming + Kafka Integration Guide
         	streamingContext, [zookeeperQuorum], [group id of the consumer], [per-topic number of Kafka partitions to consume])
 
 	See the [API docs](api/scala/index.html#org.apache.spark.streaming.kafka.KafkaUtils$)
-	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala).
+	and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala).
 	</div>
 	<div data-lang="java" markdown="1">
 		import org.apache.spark.streaming.kafka.*;
@@ -29,7 +29,7 @@ title: Spark Streaming + Kafka Integration Guide
         	streamingContext, [zookeeperQuorum], [group id of the consumer], [per-topic number of Kafka partitions to consume]);
 
 	See the [API docs](api/java/index.html?org/apache/spark/streaming/kafka/KafkaUtils.html)
-	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java).
+	and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java).
 	</div>
 	</div>
 
@@ -40,3 +40,20 @@ title: Spark Streaming + Kafka Integration Guide
 	- Multiple Kafka input DStreams can be created with different groups and topics for parallel receiving of data using multiple receivers.
 
 3. **Deploying:** Package `spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}}` and its dependencies (except `spark-core_{{site.SCALA_BINARY_VERSION}}` and `spark-streaming_{{site.SCALA_BINARY_VERSION}}` which are provided by `spark-submit`) into the application JAR. Then use `spark-submit` to launch your application (see [Deploying section](streaming-programming-guide.html#deploying-applications) in the main programming guide).
+
+Note that the Kafka receiver used by default is an
+[*unreliable* receiver](streaming-programming-guide.html#receiver-reliability) section in the
+programming guide). In Spark 1.2, we have added an experimental *reliable* Kafka receiver that
+provides stronger
+[fault-tolerance guarantees](streaming-programming-guide.html#fault-tolerance-semantics) of zero
+data loss on failures. This receiver is automatically used when the write ahead log
+(also introduced in Spark 1.2) is enabled
+(see [Deployment](#deploying-applications.html) section in the programming guide). This
+may reduce the receiving throughput of individual Kafka receivers compared to the unreliable
+receivers, but this can be corrected by running
+[more receivers in parallel](streaming-programming-guide.html#level-of-parallelism-in-data-receiving)
+to increase aggregate throughput. Additionally, it is recommended that the replication of the
+received data within Spark be disabled when the write ahead log is enabled as the log is already stored
+in a replicated storage system. This can be done by setting the storage level for the input
+stream to `StorageLevel.MEMORY_AND_DISK_SER` (that is, use
+`KafkaUtils.createStream(..., StorageLevel.MEMORY_AND_DISK_SER)`).
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 8bbba88b31978..e37a2bb37b9a4 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -7,13 +7,13 @@ title: Spark Streaming Programming Guide
 {:toc}
 
 # Overview
-Spark Streaming is an extension of the core Spark API that allows enables scalable, high-throughput,
+Spark Streaming is an extension of the core Spark API that enables scalable, high-throughput,
 fault-tolerant stream processing of live data streams. Data can be ingested from many sources
-like Kafka, Flume, Twitter, ZeroMQ, Kinesis or plain old TCP sockets and be processed using complex
+like Kafka, Flume, Twitter, ZeroMQ, Kinesis or TCP sockets can be processed using complex
 algorithms expressed with high-level functions like `map`, `reduce`, `join` and `window`.
 Finally, processed data can be pushed out to filesystems, databases,
 and live dashboards. In fact, you can apply Spark's
-[machine learning](mllib-guide.html) algorithms, and
+[machine learning](mllib-guide.html) and
 [graph processing](graphx-programming-guide.html) algorithms on data streams.
 
 <p style="text-align: center;">
@@ -38,16 +38,25 @@ stream of results in batches.
 
 Spark Streaming provides a high-level abstraction called *discretized stream* or *DStream*,
 which represents a continuous stream of data. DStreams can be created either from input data
-stream from sources such as Kafka, Flume, and Kinesis, or by applying high-level
+streams from sources such as Kafka, Flume, and Kinesis, or by applying high-level
 operations on other DStreams. Internally, a DStream is represented as a sequence of
 [RDDs](api/scala/index.html#org.apache.spark.rdd.RDD).
 
 This guide shows you how to start writing Spark Streaming programs with DStreams. You can
-write Spark Streaming programs in Scala or Java, both of which are presented in this guide. You
-will find tabs throughout this guide that let you choose between Scala and Java
-code snippets.
+write Spark Streaming programs in Scala, Java or Python (introduced in Spark 1.2),
+all of which are presented in this guide.
+You will find tabs throughout this guide that let you choose between code snippets of
+different languages.
+
+**Note:** Python API for Spark Streaming has been introduced in Spark 1.2. It has all the DStream
+transformations and almost all the output operations available in Scala and Java interfaces.
+However, it has only support for basic sources like text files and text data over sockets.
+APIs for additional sources, like Kafka and Flume, will be available in the future.
+Further information about available features in the Python API are mentioned throughout this
+document; look out for the tag
+<span class="badge" style="background-color: grey">Python API</span>.
 
-***************************************************************************************************  
+***************************************************************************************************
 
 # A Quick Example
 Before we go into the details of how to write your own Spark Streaming program,
@@ -66,15 +75,17 @@ main entry point for all streaming functionality. We create a local StreamingCon
 {% highlight scala %}
 import org.apache.spark._
 import org.apache.spark.streaming._
-import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.streaming.StreamingContext._ // not necessary in Spark 1.3+
+
+// Create a local StreamingContext with two working thread and batch interval of 1 second.
+// The master requires 2 cores to prevent from a starvation scenario.
 
-// Create a local StreamingContext with two working thread and batch interval of 1 second
 val conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount")
 val ssc = new StreamingContext(conf, Seconds(1))
 {% endhighlight %}
 
 Using this context, we can create a DStream that represents streaming data from a TCP
-source hostname, e.g. `localhost`, and port, e.g. `9999`
+source, specified as hostname (e.g. `localhost`) and port (e.g. `9999`).
 
 {% highlight scala %}
 // Create a DStream that will connect to hostname:port, like localhost:9999
@@ -96,7 +107,7 @@ each line will be split into multiple words and the stream of words is represent
 `words` DStream.  Next, we want to count these words.
 
 {% highlight scala %}
-import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.streaming.StreamingContext._ // not necessary in Spark 1.3+
 // Count each word in each batch
 val pairs = words.map(word => (word, 1))
 val wordCounts = pairs.reduceByKey(_ + _)
@@ -139,11 +150,11 @@ import scala.Tuple2;
 
 // Create a local StreamingContext with two working thread and batch interval of 1 second
 SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount")
-JavaStreamingContext jssc = new JavaStreamingContext(conf, new Duration(1000))
+JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(1))
 {% endhighlight %}
 
 Using this context, we can create a DStream that represents streaming data from a TCP
-source hostname, e.g. `localhost`, and port, e.g. `9999`
+source, specified as hostname (e.g. `localhost`) and port (e.g. `9999`).
 
 {% highlight java %}
 // Create a DStream that will connect to hostname:port, like localhost:9999
@@ -214,7 +225,7 @@ The complete code can be found in the Spark Streaming example
 
 </div>
 <div data-lang="python"  markdown="1" >
-First, we import StreamingContext, which is the main entry point for all streaming functionality. We create a local StreamingContext with two execution threads, and batch interval of 1 second.
+First, we import [StreamingContext](api/python/pyspark.streaming.html#pyspark.streaming.StreamingContext), which is the main entry point for all streaming functionality. We create a local StreamingContext with two execution threads, and batch interval of 1 second.
 
 {% highlight python %}
 from pyspark import SparkContext
@@ -226,7 +237,7 @@ ssc = StreamingContext(sc, 1)
 {% endhighlight %}
 
 Using this context, we can create a DStream that represents streaming data from a TCP
-source hostname, e.g. `localhost`, and port, e.g. `9999`
+source, specified as hostname (e.g. `localhost`) and port (e.g. `9999`).
 
 {% highlight python %}
 # Create a DStream that will connect to hostname:port, like localhost:9999
@@ -306,7 +317,7 @@ $ ./bin/spark-submit examples/src/main/python/streaming/network_wordcount.py loc
 
 
 Then, any lines typed in the terminal running the netcat server will be counted and printed on
-screen every second. It will look something like this.
+screen every second. It will look something like the following.
 
 <table width="100%">
     <td>
@@ -370,7 +381,7 @@ Time: 2014-10-14 15:25:21
 ...
 {% endhighlight %}
 </div>
-</div>    
+</div>
     </td>
 </table>
 
@@ -380,8 +391,7 @@ Time: 2014-10-14 15:25:21
 
 # Basic Concepts
 
-Next, we move beyond the simple example and elaborate on the basics of Spark Streaming that you
-need to know to write your streaming applications.
+Next, we move beyond the simple example and elaborate on the basics of Spark Streaming.
 
 ## Linking
 
@@ -412,7 +422,7 @@ some of the common ones are as follows.
 <tr><th>Source</th><th>Artifact</th></tr>
 <tr><td> Kafka </td><td> spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}} </td></tr>
 <tr><td> Flume </td><td> spark-streaming-flume_{{site.SCALA_BINARY_VERSION}} </td></tr>
-<tr><td> Kinesis<br/></td><td>spark-streaming-kinesis-asl_{{site.SCALA_BINARY_VERSION}} [Apache Software License] </td></tr>
+<tr><td> Kinesis<br/></td><td>spark-streaming-kinesis-asl_{{site.SCALA_BINARY_VERSION}} [Amazon Software License] </td></tr>
 <tr><td> Twitter </td><td> spark-streaming-twitter_{{site.SCALA_BINARY_VERSION}} </td></tr>
 <tr><td> ZeroMQ </td><td> spark-streaming-zeromq_{{site.SCALA_BINARY_VERSION}} </td></tr>
 <tr><td> MQTT </td><td> spark-streaming-mqtt_{{site.SCALA_BINARY_VERSION}} </td></tr>
@@ -444,7 +454,7 @@ val ssc = new StreamingContext(conf, Seconds(1))
 
 The `appName` parameter is a name for your application to show on the cluster UI.
 `master` is a [Spark, Mesos or YARN cluster URL](submitting-applications.html#master-urls),
-or a special __"local[\*]"__ string to run in local mode. In practice, when running on a cluster, 
+or a special __"local[\*]"__ string to run in local mode. In practice, when running on a cluster,
 you will not want to hardcode `master` in the program,
 but rather [launch the application with `spark-submit`](submitting-applications.html) and
 receive it there. However, for local testing and unit tests, you can pass "local[\*]" to run Spark Streaming
@@ -479,7 +489,7 @@ JavaStreamingContext ssc = new JavaStreamingContext(conf, Duration(1000));
 
 The `appName` parameter is a name for your application to show on the cluster UI.
 `master` is a [Spark, Mesos or YARN cluster URL](submitting-applications.html#master-urls),
-or a special __"local[\*]"__ string to run in local mode. In practice, when running on a cluster, 
+or a special __"local[\*]"__ string to run in local mode. In practice, when running on a cluster,
 you will not want to hardcode `master` in the program,
 but rather [launch the application with `spark-submit`](submitting-applications.html) and
 receive it there. However, for local testing and unit tests, you can pass "local[*]" to run Spark Streaming
@@ -495,8 +505,8 @@ A `JavaStreamingContext` object can also be created from an existing `JavaSparkC
 import org.apache.spark.streaming.api.java.*;
 
 JavaSparkContext sc = ...   //existing JavaSparkContext
-JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(1000));
-{% endhighlight %} 
+JavaStreamingContext ssc = new JavaStreamingContext(sc, Durations.seconds(1));
+{% endhighlight %}
 </div>
 <div data-lang="python" markdown="1">
 
@@ -512,7 +522,7 @@ ssc = StreamingContext(sc, 1)
 
 The `appName` parameter is a name for your application to show on the cluster UI.
 `master` is a [Spark, Mesos or YARN cluster URL](submitting-applications.html#master-urls),
-or a special __"local[\*]"__ string to run in local mode. In practice, when running on a cluster, 
+or a special __"local[\*]"__ string to run in local mode. In practice, when running on a cluster,
 you will not want to hardcode `master` in the program,
 but rather [launch the application with `spark-submit`](submitting-applications.html) and
 receive it there. However, for local testing and unit tests, you can pass "local[\*]" to run Spark Streaming
@@ -524,17 +534,18 @@ section for more details.
 </div>
 </div>
 
-After a context is defined, you have to do the follow steps.
+After a context is defined, you have to do the following.
 
-1. Define the input sources.
-1. Setup the streaming computations.
-1. Start the receiving and procesing of data using `streamingContext.start()`.
-1. The processing will continue until `streamingContext.stop()` is called.
+1. Define the input sources by creating input DStreams.
+1. Define the streaming computations by applying transformation and output operations to DStreams.
+1. Start receiving data and processing it using `streamingContext.start()`.
+1. Wait for the processing to be stopped (manually or due to any error) using `streamingContext.awaitTermination()`.
+1. The processing can be manually stopped using `streamingContext.stop()`.
 
 ##### Points to remember:
 {:.no_toc}
-- Once a context has been started, no new streaming computations can be setup or added to it.
-- Once a context has been stopped, it cannot be started (that is, re-used) again.
+- Once a context has been started, no new streaming computations can be set up or added to it.
+- Once a context has been stopped, it cannot be restarted.
 - Only one StreamingContext can be active in a JVM at the same time.
 - stop() on StreamingContext also stops the SparkContext. To stop only the StreamingContext, set optional parameter of `stop()` called `stopSparkContext` to false.
 - A SparkContext can be re-used to create multiple StreamingContexts, as long as the previous StreamingContext is stopped (without stopping the SparkContext) before the next StreamingContext is created.
@@ -575,27 +586,54 @@ These operations are discussed in detail in later sections.
 
 ***
 
-## Input DStreams
-Input DStreams are DStreams representing the stream of raw data received from streaming sources.
-Spark Streaming has two categories of streaming sources.
-
-- *Basic sources*: Sources directly available in the StreamingContext API. Example: file systems, socket connections, and Akka actors.
-- *Advanced sources*: Sources like Kafka, Flume, Kinesis, Twitter, etc. are available through extra utility classes. These require linking against extra dependencies as discussed in the [linking](#linking) section.
-
-Every input DStream (except file stream) is associated with a single [Receiver](api/scala/index.html#org.apache.spark.streaming.receiver.Receiver) object which receives the data from a source and stores it in Spark's memory for processing. So every input DStream receives a single stream of data. Note that in a streaming application, you can create multiple input DStreams to receive multiple streams of data in parallel. This is discussed later in the [Performance Tuning](#level-of-parallelism-in-data-receiving) section.
+## Input DStreams and Receivers
+Input DStreams are DStreams representing the stream of input data received from streaming
+sources. In the [quick example](#a-quick-example), `lines` was an input DStream as it represented
+the stream of data received from the netcat server. Every input DStream
+(except file stream, discussed later in this section) is associated with a **Receiver**
+([Scala doc](api/scala/index.html#org.apache.spark.streaming.receiver.Receiver),
+[Java doc](api/java/org/apache/spark/streaming/receiver/Receiver.html)) object which receives the
+data from a source and stores it in Spark's memory for processing.
+
+Spark Streaming provides two categories of built-in streaming sources.
+
+- *Basic sources*: Sources directly available in the StreamingContext API.
+  Example: file systems, socket connections, and Akka actors.
+- *Advanced sources*: Sources like Kafka, Flume, Kinesis, Twitter, etc. are available through
+  extra utility classes. These require linking against extra dependencies as discussed in the
+  [linking](#linking) section.
+
+We are going to discuss some of the sources present in each category later in this section.
+
+Note that, if you want to receive multiple streams of data in parallel in your streaming
+application, you can create multiple input DStreams (discussed
+further in the [Performance Tuning](#level-of-parallelism-in-data-receiving) section). This will
+create multiple receivers which will simultaneously receive multiple data streams. But note that
+Spark worker/executor as a long-running task, hence it occupies one of the cores allocated to the
+Spark Streaming application. Hence, it is important to remember that Spark Streaming application
+needs to be allocated enough cores (or threads, if running locally) to process the received data,
+as well as, to run the receiver(s).
+
+##### Points to remember
+{:.no_toc}
 
-A receiver is run within a Spark worker/executor as a long-running task, hence it occupies one of the cores allocated to the Spark Streaming application. Hence, it is important to remember that Spark Streaming application needs to be allocated enough cores to process the received data, as well as, to run the receiver(s). Therefore, few important points to remember are:
+- When running a Spark Streaming program locally, do not use "local" or "local[1]" as the master URL.
+  Either of these means that only one thread will be used for running tasks locally. If you are using
+  a input DStream based on a receiver (e.g. sockets, Kafka, Flume, etc.), then the single thread will
+  be used to run the receiver, leaving no thread for processing the received data. Hence, when
+  running locally, always use "local[*n*]" as the master URL where *n* > number of receivers to run
+  (see [Spark Properties](configuration.html#spark-properties.html) for information on how to set
+  the master).
 
-##### Points to remember:
-{:.no_toc}
-- If the number of cores allocated to the application is less than or equal to the number of input DStreams / receivers, then the system will receive data, but not be able to process them.
-- When running locally, if you master URL is set to "local", then there is only one core to run tasks.  That is insufficient for programs with even one input DStream (file streams are okay) as the receiver will occupy that core and there will be no core left to process the data.
+- Extending the logic to running on a cluster, the number of cores allocated to the Spark Streaming
+  application must be more than the number of receivers. Otherwise the system will receive  data, but
+  not be able to process them.
 
 ### Basic Sources
 {:.no_toc}
 
-We have already taken a look at the `ssc.socketTextStream(...)` in the [quick
-example](#a-quick-example) which creates a DStream from text
+We have already taken a look at the `ssc.socketTextStream(...)` in the [quick example](#a-quick-example)
+which creates a DStream from text
 data received over a TCP socket connection. Besides sockets, the StreamingContext API provides
 methods for creating DStreams from files and Akka actors as input sources.
 
@@ -603,10 +641,10 @@ methods for creating DStreams from files and Akka actors as input sources.
 
     <div class="codetabs">
     <div data-lang="scala" markdown="1">
-		streamingContext.fileStream[keyClass, valueClass, inputFormatClass](dataDirectory)
+        streamingContext.fileStream[KeyClass, ValueClass, InputFormatClass](dataDirectory)
     </div>
     <div data-lang="java" markdown="1">
-		streamingContext.fileStream<keyClass, valueClass, inputFormatClass>(dataDirectory);
+		streamingContext.fileStream<KeyClass, ValueClass, InputFormatClass>(dataDirectory);
     </div>
     <div data-lang="python" markdown="1">
 		streamingContext.textFileStream(dataDirectory)
@@ -622,22 +660,42 @@ methods for creating DStreams from files and Akka actors as input sources.
 
 	For simple text files, there is an easier method `streamingContext.textFileStream(dataDirectory)`. And file streams do not require running a receiver, hence does not require allocating cores.
 
-- **Streams based on Custom Actors:** DStreams can be created with data streams received through Akka actors by using `streamingContext.actorStream(actorProps, actor-name)`. See the [Custom Receiver Guide](streaming-custom-receivers.html#implementing-and-using-a-custom-actor-based-receiver) for more details.
+	<span class="badge" style="background-color: grey">Python API</span>	As of Spark 1.2,
+	`fileStream` is not available in the Python API, only	`textFileStream` is	available.
+
+- **Streams based on Custom Actors:** DStreams can be created with data streams received through Akka
+  actors by using `streamingContext.actorStream(actorProps, actor-name)`. See the [Custom Receiver
+  Guide](streaming-custom-receivers.html) for more details.
+
+  <span class="badge" style="background-color: grey">Python API</span> Since actors are available only in the Java and Scala
+  libraries, `actorStream` is not available in the Python API.
 
 - **Queue of RDDs as a Stream:** For testing a Spark Streaming application with test data, one can also create a DStream based on a queue of RDDs, using `streamingContext.queueStream(queueOfRDDs)`. Each RDD pushed into the queue will be treated as a batch of data in the DStream, and processed like a stream.
 
 For more details on streams from sockets, files, and actors,
 see the API documentations of the relevant functions in
 [StreamingContext](api/scala/index.html#org.apache.spark.streaming.StreamingContext) for
-Scala and [JavaStreamingContext](api/java/index.html?org/apache/spark/streaming/api/java/JavaStreamingContext.html) for Java.
+Scala, [JavaStreamingContext](api/java/index.html?org/apache/spark/streaming/api/java/JavaStreamingContext.html)
+for Java, and [StreamingContext](api/python/pyspark.streaming.html#pyspark.streaming.StreamingContext) for Python.
 
 ### Advanced Sources
 {:.no_toc}
-This category of sources require interfacing with external non-Spark libraries, some of them with complex dependencies (e.g., Kafka and Flume). Hence, to minimize issues related to version conflicts of dependencies, the functionality to create DStreams from these sources have been moved to separate libraries, that can be [linked to](#linking) explicitly as necessary.  For example, if you want to create a DStream using data from Twitter's stream of tweets, you have to do the following.
-
-1. *Linking*: Add the artifact `spark-streaming-twitter_{{site.SCALA_BINARY_VERSION}}` to the SBT/Maven project dependencies.
-1. *Programming*: Import the `TwitterUtils` class and create a DStream with `TwitterUtils.createStream` as shown below.
-1. *Deploying*: Generate an uber JAR with all the dependencies (including the dependency `spark-streaming-twitter_{{site.SCALA_BINARY_VERSION}}` and its transitive dependencies) and then deploy the application. This is further explained in the [Deploying section](#deploying-applications).
+<span class="badge" style="background-color: grey">Python API</span> As of Spark 1.2,
+these sources are not available in the Python API.
+
+This category of sources require interfacing with external non-Spark libraries, some of them with
+complex dependencies (e.g., Kafka and Flume). Hence, to minimize issues related to version conflicts
+of dependencies, the functionality to create DStreams from these sources have been moved to separate
+libraries, that can be [linked](#linking) to explicitly when necessary. For example, if you want to
+create a DStream using data from Twitter's stream of tweets, you have to do the following.
+
+1. *Linking*: Add the artifact `spark-streaming-twitter_{{site.SCALA_BINARY_VERSION}}` to the
+  SBT/Maven project dependencies.
+1. *Programming*: Import the `TwitterUtils` class and create a DStream with
+  `TwitterUtils.createStream` as shown below.
+1. *Deploying*: Generate an uber JAR with all the dependencies (including the dependency
+  `spark-streaming-twitter_{{site.SCALA_BINARY_VERSION}}` and its transitive dependencies) and
+  then deploy the application. This is further explained in the [Deploying section](#deploying-applications).
 
 <div class="codetabs">
 <div data-lang="scala">
@@ -656,17 +714,21 @@ TwitterUtils.createStream(jssc);
 </div>
 </div>
 
-Note that these advanced sources are not available in the `spark-shell`, hence applications based on these
-advanced sources cannot be tested in the shell.
+Note that these advanced sources are not available in the Spark shell, hence applications based on
+these advanced sources cannot be tested in the shell. If you really want to use them in the Spark
+shell you will have to download the corresponding Maven artifact's JAR along with its dependencies
+and it in the classpath.
 
 Some of these advanced sources are as follows.
 
 - **Twitter:** Spark Streaming's TwitterUtils uses Twitter4j 3.0.3 to get the public stream of tweets using
-    [Twitter's Streaming API](https://dev.twitter.com/docs/streaming-apis). Authentication information
-    can be provided by any of the [methods](http://twitter4j.org/en/configuration.html) supported by
-    Twitter4J library. You can either get the public stream, or get the filtered stream based on a
-    keywords. See the API documentation ([Scala](api/scala/index.html#org.apache.spark.streaming.twitter.TwitterUtils$), [Java](api/java/index.html?org/apache/spark/streaming/twitter/TwitterUtils.html)) and examples ([TwitterPopularTags]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterPopularTags.scala) and
-    [TwitterAlgebirdCMS]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterAlgebirdCMS.scala)).
+  [Twitter's Streaming API](https://dev.twitter.com/docs/streaming-apis). Authentication information
+  can be provided by any of the [methods](http://twitter4j.org/en/configuration.html) supported by
+  Twitter4J library. You can either get the public stream, or get the filtered stream based on a
+  keywords. See the API documentation ([Scala](api/scala/index.html#org.apache.spark.streaming.twitter.TwitterUtils$),
+  [Java](api/java/index.html?org/apache/spark/streaming/twitter/TwitterUtils.html)) and examples
+  ([TwitterPopularTags]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterPopularTags.scala)
+  and [TwitterAlgebirdCMS]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterAlgebirdCMS.scala)).
 
 - **Flume:** Spark Streaming {{site.SPARK_VERSION_SHORT}} can received data from Flume 1.4.0. See the [Flume Integration Guide](streaming-flume-integration.html) for more details.
 
@@ -676,14 +738,37 @@ Some of these advanced sources are as follows.
 
 ### Custom Sources
 {:.no_toc}
-Input DStreams can also be created out of custom data sources. All you have to do is implement an user-defined **receiver** (see next section to understand what that is) that can receive data from the custom sources and push it into Spark. See the
-[Custom Receiver Guide](streaming-custom-receivers.html) for details.
+
+<span class="badge" style="background-color: grey">Python API</span> As of Spark 1.2,
+these sources are not available in the Python API.
+
+Input DStreams can also be created out of custom data sources. All you have to do is implement an
+user-defined **receiver** (see next section to understand what that is) that can receive data from
+the custom sources and push it into Spark. See the [Custom Receiver
+Guide](streaming-custom-receivers.html) for details.
+
+### Receiver Reliability
+{:.no_toc}
+
+There can be two kinds of data sources based on their *reliability*. Sources
+(like Kafka and Flume) allow the transferred data to be acknowledged. If the system receiving
+data from these *reliable* sources acknowledge the received data correctly, it can be ensured
+that no data gets lost due to any kind of failure. This leads to two kinds of receivers.
+
+1. *Reliable Receiver* - A *reliable receiver* correctly acknowledges a reliable
+  source that the data has been received and stored in Spark with replication.
+1. *Unreliable Receiver* - These are receivers for sources that do not support acknowledging. Even
+  for reliable sources, one may implement an unreliable receiver that do not go into the complexity
+  of acknowledging correctly.
+
+The details of how to write a reliable receiver are discussed in the
+[Custom Receiver Guide](streaming-custom-receivers.html).
 
 ***
 
 ## Transformations on DStreams
 Similar to that of RDDs, transformations allow the data from the input DStream to be modified.
-DStreams support many of the transformations available on normal Spark RDD's. 
+DStreams support many of the transformations available on normal Spark RDD's.
 Some of the common ones are as follows.
 
 <table class="table">
@@ -837,6 +922,10 @@ the `(word, 1)` pairs) and the `runningCount` having the previous count. For the
 Scala code, take a look at the example
 [stateful_network_wordcount.py]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/python/streaming/stateful_network_wordcount.py).
 
+Note that using `updateStateByKey` requires the checkpoint directory to be configured, which is
+discussed in detail in the [checkpointing](#checkpointing) section.
+
+
 #### Transform Operation
 {:.no_toc}
 The `transform` operation (along with its variations like `transformWith`) allows
@@ -944,7 +1033,7 @@ Function2<Integer, Integer, Integer> reduceFunc = new Function2<Integer, Integer
 };
 
 // Reduce last 30 seconds of data, every 10 seconds
-JavaPairDStream<String, Integer> windowedWordCounts = pairs.reduceByKeyAndWindow(reduceFunc, new Duration(30000), new Duration(10000));
+JavaPairDStream<String, Integer> windowedWordCounts = pairs.reduceByKeyAndWindow(reduceFunc, Durations.seconds(30), Durations.seconds(10));
 {% endhighlight %}
 
 </div>
@@ -1001,7 +1090,8 @@ said two parameters - <i>windowLength</i> and <i>slideInterval</i>.
   of keys as the window slides. However, it is applicable to only "invertible reduce functions",
   that is, those reduce functions which have a corresponding "inverse reduce" function (taken as
   parameter <i>invFunc</i>. Like in <code>reduceByKeyAndWindow</code>, the number of reduce tasks
-  is configurable through an optional argument.
+  is configurable through an optional argument. Note that [checkpointing](#checkpointing) must be
+  enabled for using this operation.
 </td>
 </tr>
 <tr>
@@ -1022,49 +1112,58 @@ see [DStream](api/scala/index.html#org.apache.spark.streaming.dstream.DStream)
 and [PairDStreamFunctions](api/scala/index.html#org.apache.spark.streaming.dstream.PairDStreamFunctions).
 For the Java API, see [JavaDStream](api/java/index.html?org/apache/spark/streaming/api/java/JavaDStream.html)
 and [JavaPairDStream](api/java/index.html?org/apache/spark/streaming/api/java/JavaPairDStream.html).
-For the Python API, see [DStream](api/python/pyspark.streaming.html#pyspark.streaming.DStream)
+For the Python API, see [DStream](api/python/pyspark.streaming.html#pyspark.streaming.DStream).
 
 ***
 
 ## Output Operations on DStreams
 Output operations allow DStream's data to be pushed out external systems like a database or a file systems.
 Since the output operations actually allow the transformed data to be consumed by external systems,
-they trigger the actual execution of all the DStream transformations (similar to actions for RDDs). 
+they trigger the actual execution of all the DStream transformations (similar to actions for RDDs).
 Currently, the following output operations are defined:
 
 <table class="table">
 <tr><th style="width:30%">Output Operation</th><th>Meaning</th></tr>
 <tr>
   <td> <b>print</b>()</td>
-  <td> Prints first ten elements of every batch of data in a DStream on the driver. 
-  This is useful for development and debugging. 
+  <td> Prints first ten elements of every batch of data in a DStream on the driver node running
+  the streaming application. This is useful for development and debugging.
   <br/>
-  <b>PS</b>: called <b>pprint</b>() in Python)
+  <span class="badge" style="background-color: grey">Python API</span> This is called
+  <b>pprint()</b> in the Python API.
   </td>
 </tr>
+<tr>
+  <td> <b>saveAsTextFiles</b>(<i>prefix</i>, [<i>suffix</i>]) </td>
+  <td> Save this DStream's contents as a text files. The file name at each batch interval is
+  generated based on <i>prefix</i> and <i>suffix</i>: <i>"prefix-TIME_IN_MS[.suffix]"</i>. </td>
+</tr>
 <tr>
   <td> <b>saveAsObjectFiles</b>(<i>prefix</i>, [<i>suffix</i>]) </td>
-  <td> Save this DStream's contents as a <code>SequenceFile</code> of serialized objects. The file
+  <td> Save this DStream's contents as a <code>SequenceFile</code> of serialized Java objects. The file
   name at each batch interval is generated based on <i>prefix</i> and
   <i>suffix</i>: <i>"prefix-TIME_IN_MS[.suffix]"</i>.
+  <br/>
+  <span class="badge" style="background-color: grey">Python API</span> This is not available in
+  the Python API.
   </td>
 </tr>
-<tr>
-  <td> <b>saveAsTextFiles</b>(<i>prefix</i>, [<i>suffix</i>]) </td>
-  <td> Save this DStream's contents as a text files. The file name at each batch interval is
-  generated based on <i>prefix</i> and <i>suffix</i>: <i>"prefix-TIME_IN_MS[.suffix]"</i>. </td>
-</tr>
 <tr>
   <td> <b>saveAsHadoopFiles</b>(<i>prefix</i>, [<i>suffix</i>]) </td>
   <td> Save this DStream's contents as a Hadoop file. The file name at each batch interval is
-  generated based on <i>prefix</i> and <i>suffix</i>: <i>"prefix-TIME_IN_MS[.suffix]"</i>. </td>
+  generated based on <i>prefix</i> and <i>suffix</i>: <i>"prefix-TIME_IN_MS[.suffix]"</i>.
+  <br>
+  <span class="badge" style="background-color: grey">Python API</span> This is not available in
+  the Python API.
+  </td>
 </tr>
 <tr>
   <td> <b>foreachRDD</b>(<i>func</i>) </td>
   <td> The most generic output operator that applies a function, <i>func</i>, to each RDD generated from
   the stream. This function should push the data in each RDD to a external system, like saving the RDD to
   files, or writing it over the network to a database. Note that the function <i>func</i> is executed
-  at the driver, and will usually have RDD actions in it that will force the computation of the streaming RDDs.</td>
+  in the driver process running the streaming application, and will usually have RDD actions in it
+  that will force the computation of the streaming RDDs.</td>
 </tr>
 <tr><td></td><td></td></tr>
 </table>
@@ -1075,86 +1174,86 @@ Currently, the following output operations are defined:
 However, it is important to understand how to use this primitive correctly and efficiently.
 Some of the common mistakes to avoid are as follows.
 
-- Often writing data to external system requires creating a connection object
+Often writing data to external system requires creating a connection object
 (e.g. TCP connection to a remote server) and using it to send data to a remote system.
-For this purpose, a developer may inadvertantly try creating a connection object at
+For this purpose, a developer may inadvertently try creating a connection object at
 the Spark driver, but try to use it in a Spark worker to save records in the RDDs.
 For example (in Scala),
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-
 {% highlight scala %}
-        dstream.foreachRDD(rdd => {
-            val connection = createNewConnection()  // executed at the driver
-            rdd.foreach(record => {
-                connection.send(record) // executed at the worker
-            })
-        })
+dstream.foreachRDD { rdd =>
+  val connection = createNewConnection()  // executed at the driver
+  rdd.foreach { record =>
+    connection.send(record) // executed at the worker
+  }
+}
 {% endhighlight %}
-
 </div>
 <div data-lang="python" markdown="1">
-
 {% highlight python %}
 def sendRecord(rdd):
     connection = createNewConnection()  # executed at the driver
     rdd.foreach(lambda record: connection.send(record))
     connection.close()
-        
+
 dstream.foreachRDD(sendRecord)
 {% endhighlight %}
-
 </div>
 </div>
 
-  This is incorrect as this requires the connection object to be serialized and sent from the driver to the worker. Such connection objects are rarely transferrable across machines. This error may manifest as serialization errors (connection object not serializable), initialization errors (connection object needs to be initialized at the workers), etc. The correct solution is to create the connection object at the worker.
+This is incorrect as this requires the connection object to be serialized and sent from the
+driver to the worker. Such connection objects are rarely transferrable across machines. This
+error may manifest as serialization errors (connection object not serializable), initialization
+errors (connection object needs to be initialized at the workers), etc. The correct solution is
+to create the connection object at the worker.
 
-- However, this can lead to another common mistake - creating a new connection for every record. For example,
+However, this can lead to another common mistake - creating a new connection for every record.
+For example,
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-
 {% highlight scala %}
-        dstream.foreachRDD(rdd => {
-            rdd.foreach(record => {
-                val connection = createNewConnection()
-                connection.send(record)
-                connection.close()
-            })
-        })
+dstream.foreachRDD { rdd =>
+  rdd.foreach { record =>
+    val connection = createNewConnection()
+    connection.send(record)
+    connection.close()
+  }
+}
 {% endhighlight %}
-
 </div>
 <div data-lang="python" markdown="1">
-
 {% highlight python %}
 def sendRecord(record):
     connection = createNewConnection()
     connection.send(record)
     connection.close()
-        
+
 dstream.foreachRDD(lambda rdd: rdd.foreach(sendRecord))
 {% endhighlight %}
-
 </div>
 </div>
 
-  Typically, creating a connection object has time and resource overheads. Therefore, creating and destroying a connection object for each record can incur unnecessarily high overheads and can significantly reduce the overall throughput of the system. A better solution is to use `rdd.foreachPartition` - create a single connection object and send all the records in a RDD partition using that connection.
+Typically, creating a connection object has time and resource overheads. Therefore, creating and
+destroying a connection object for each record can incur unnecessarily high overheads and can
+significantly reduce the overall throughput of the system. A better solution is to use
+`rdd.foreachPartition` - create a single connection object and send all the records in  a RDD
+partition using that connection.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 {% highlight scala %}
-        dstream.foreachRDD(rdd => {
-            rdd.foreachPartition(partitionOfRecords => {
-                val connection = createNewConnection()
-                partitionOfRecords.foreach(record => connection.send(record))
-                connection.close()
-            })
-        })
+dstream.foreachRDD { rdd =>
+  rdd.foreachPartition { partitionOfRecords =>
+    val connection = createNewConnection()
+    partitionOfRecords.foreach(record => connection.send(record))
+    connection.close()
+  }
+}
 {% endhighlight %}
 </div>
-
 <div data-lang="python" markdown="1">
 {% highlight python %}
 def sendPartition(iter):
@@ -1162,29 +1261,29 @@ def sendPartition(iter):
     for record in iter:
         connection.send(record)
     connection.close()
-    
+
 dstream.foreachRDD(lambda rdd: rdd.foreachPartition(sendPartition))
 {% endhighlight %}
 </div>
-</div>    
+</div>
 
   This amortizes the connection creation overheads over many records.
 
-- Finally, this can be further optimized by reusing connection objects across multiple RDDs/batches.
-	One can maintain a static pool of connection objects than can be reused as
-    RDDs of multiple batches are pushed to the external system, thus further reducing the overheads.
-    
+Finally, this can be further optimized by reusing connection objects across multiple RDDs/batches.
+One can maintain a static pool of connection objects than can be reused as
+RDDs of multiple batches are pushed to the external system, thus further reducing the overheads.
+
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 {% highlight scala %}
-        dstream.foreachRDD(rdd => {
-            rdd.foreachPartition(partitionOfRecords => {
-                // ConnectionPool is a static, lazily initialized pool of connections
-                val connection = ConnectionPool.getConnection()
-                partitionOfRecords.foreach(record => connection.send(record))
-                ConnectionPool.returnConnection(connection)  // return to the pool for future reuse
-            })
-        })
+dstream.foreachRDD { rdd =>
+  rdd.foreachPartition { partitionOfRecords =>
+    // ConnectionPool is a static, lazily initialized pool of connections
+    val connection = ConnectionPool.getConnection()
+    partitionOfRecords.foreach(record => connection.send(record))
+    ConnectionPool.returnConnection(connection)  // return to the pool for future reuse
+  }
+}
 {% endhighlight %}
 </div>
 
@@ -1197,11 +1296,11 @@ def sendPartition(iter):
         connection.send(record)
     # return to the pool for future reuse
     ConnectionPool.returnConnection(connection)
-    
+
 dstream.foreachRDD(lambda rdd: rdd.foreachPartition(sendPartition))
 {% endhighlight %}
 </div>
-</div> 
+</div>
 
 Note that the connections in the pool should be lazily created on demand and timed out if not used for a while. This achieves the most efficient sending of data to external systems.
 
@@ -1216,7 +1315,7 @@ Note that the connections in the pool should be lazily created on demand and tim
 
 ## Caching / Persistence
 Similar to RDDs, DStreams also allow developers to persist the stream's data in memory. That is,
-using `persist()` method on a DStream would automatically persist every RDD of that DStream in
+using `persist()` method on a DStream will automatically persist every RDD of that DStream in
 memory. This is useful if the data in the DStream will be computed multiple times (e.g., multiple
 operations on the same data). For window-based operations like `reduceByWindow` and
 `reduceByKeyAndWindow` and state-based operations like `updateStateByKey`, this is implicitly true.
@@ -1234,49 +1333,260 @@ information on different persistence levels can be found in
 ***
 
 ## Checkpointing
-A _stateful operation_ is one which operates over multiple batches of data. This includes all
-window-based operations and the `updateStateByKey` operation. Since stateful operations have a
-dependency on previous batches of data, they continuously accumulate metadata over time.
-To clear this metadata, streaming supports periodic _checkpointing_ by saving intermediate data
-to HDFS. Note that checkpointing also incurs the cost of saving to HDFS which may cause the
-corresponding batch to take longer to process. Hence, the interval of checkpointing needs to be
-set carefully. At small batch sizes (say 1 second), checkpointing every batch may significantly
-reduce operation throughput. Conversely, checkpointing too slowly causes the lineage and task
-sizes to grow which may have detrimental effects. Typically, a checkpoint interval of 5 - 10
-times of sliding interval of a DStream is good setting to try.
-
-To enable checkpointing, the developer has to provide the HDFS path to which RDD will be saved.
-This is done by using
+A streaming application must operate 24/7 and hence must be resilient to failures unrelated
+to the application logic (e.g., system failures, JVM crashes, etc.). For this to be possible,
+Spark Streaming needs to *checkpoints* enough information to a fault-
+tolerant storage system such that it can recover from failures. There are two types of data
+that are checkpointed.
+
+- *Metadata checkpointing* - Saving of the information defining the streaming computation to
+  fault-tolerant storage like HDFS. This is used to recover from failure of the node running the
+  driver of the streaming application (discussed in detail later). Metadata includes:
+  +  *Configuration* - The configuration that were used to create the streaming application.
+  +  *DStream operations* - The set of DStream operations that define the streaming application.
+  +  *Incomplete batches* - Batches whose jobs are queued but have not completed yet.
+- *Data checkpointing* - Saving of the generated RDDs to reliable storage. This is necessary
+  in some *stateful* transformations that combine data across multiple batches. In such
+  transformations, the generated RDDs depends on RDDs of previous batches, which causes the length
+  of the dependency chain to keep increasing with time. To avoid such unbounded increase in recovery
+   time (proportional to dependency chain), intermediate RDDs of stateful transformations are periodically
+  *checkpointed* to reliable storage (e.g. HDFS) to cut off the dependency chains.
+
+To summarize, metadata checkpointing is primarily needed for recovery from driver failures,
+whereas data or RDD checkpointing is necessary even for basic functioning if stateful
+transformations are used.
+
+#### When to enable Checkpointing
+{:.no_toc}
+
+Checkpointing must be enabled for applications with any of the following requirements:
+
+- *Usage of stateful transformations* - If either `updateStateByKey` or `reduceByKeyAndWindow` (with
+  inverse function) is used in the application, then the checkpoint directory must be provided for
+  allowing periodic RDD checkpointing.
+- *Recovering from failures of the driver running the application* - Metadata checkpoints are used
+  for to recover with progress information.
+
+Note that simple streaming applications without the aforementioned stateful transformations can be
+run without enabling checkpointing. The recovery from driver failures will also be partial in
+that case (some received but unprocessed data may be lost). This is often acceptable and many run
+Spark Streaming applications in this way. Support for non-Hadoop environments is expected
+to improve in the future.
+
+#### How to configure Checkpointing
+{:.no_toc}
+
+Checkpointing can be enabled by setting a directory in a fault-tolerant,
+reliable file system (e.g., HDFS, S3, etc.) to which the checkpoint information will be saved.
+This is done by using `streamingContext.checkpoint(checkpointDirectory)`. This will allow you to
+use the aforementioned stateful transformations. Additionally,
+if you want make the application recover from driver failures, you should rewrite your
+streaming application to have the following behavior.
+
+  + When the program is being started for the first time, it will create a new StreamingContext,
+    set up all the streams and then call start().
+  + When the program is being restarted after failure, it will re-create a StreamingContext
+    from the checkpoint data in the checkpoint directory.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+This behavior is made simple by using `StreamingContext.getOrCreate`. This is used as follows.
 
 {% highlight scala %}
-ssc.checkpoint(hdfsPath) // assuming ssc is the StreamingContext or JavaStreamingContext
+// Function to create and setup a new StreamingContext
+def functionToCreateContext(): StreamingContext = {
+    val ssc = new StreamingContext(...)   // new context
+    val lines = ssc.socketTextStream(...) // create DStreams
+    ...
+    ssc.checkpoint(checkpointDirectory)   // set checkpoint directory
+    ssc
+}
+
+// Get StreamingContext from checkpoint data or create a new one
+val context = StreamingContext.getOrCreate(checkpointDirectory, functionToCreateContext _)
+
+// Do additional setup on context that needs to be done,
+// irrespective of whether it is being started or restarted
+context. ...
+
+// Start the context
+context.start()
+context.awaitTermination()
 {% endhighlight %}
 
-The interval of checkpointing of a DStream can be set by using
+If the `checkpointDirectory` exists, then the context will be recreated from the checkpoint data.
+If the directory does not exist (i.e., running for the first time),
+then the function `functionToCreateContext` will be called to create a new
+context and set up the DStreams. See the Scala example
+[RecoverableNetworkWordCount]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala).
+This example appends the word counts of network data into a file.
+
+</div>
+<div data-lang="java" markdown="1">
+
+This behavior is made simple by using `JavaStreamingContext.getOrCreate`. This is used as follows.
 
-{% highlight scala %}
-dstream.checkpoint(checkpointInterval)
+{% highlight java %}
+// Create a factory object that can create a and setup a new JavaStreamingContext
+JavaStreamingContextFactory contextFactory = new JavaStreamingContextFactory() {
+  @Override public JavaStreamingContext create() {
+    JavaStreamingContext jssc = new JavaStreamingContext(...);  // new context
+    JavaDStream<String> lines = jssc.socketTextStream(...);     // create DStreams
+    ...
+    jssc.checkpoint(checkpointDirectory);                       // set checkpoint directory
+    return jssc;
+  }
+};
+
+// Get JavaStreamingContext from checkpoint data or create a new one
+JavaStreamingContext context = JavaStreamingContext.getOrCreate(checkpointDirectory, contextFactory);
+
+// Do additional setup on context that needs to be done,
+// irrespective of whether it is being started or restarted
+context. ...
+
+// Start the context
+context.start();
+context.awaitTermination();
 {% endhighlight %}
 
-For DStreams that must be checkpointed (that is, DStreams created by `updateStateByKey` and
-`reduceByKeyAndWindow` with inverse function), the checkpoint interval of the DStream is by
-default set to a multiple of the DStream's sliding interval such that its at least 10 seconds.
+If the `checkpointDirectory` exists, then the context will be recreated from the checkpoint data.
+If the directory does not exist (i.e., running for the first time),
+then the function `contextFactory` will be called to create a new
+context and set up the DStreams. See the Scala example
+[JavaRecoverableNetworkWordCount]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java).
+This example appends the word counts of network data into a file.
+
+</div>
+<div data-lang="python" markdown="1">
+
+This behavior is made simple by using `StreamingContext.getOrCreate`. This is used as follows.
+
+{% highlight python %}
+# Function to create and setup a new StreamingContext
+def functionToCreateContext():
+    sc = SparkContext(...)   # new context
+    ssc = new StreamingContext(...)
+    lines = ssc.socketTextStream(...) # create DStreams
+    ...
+    ssc.checkpoint(checkpointDirectory)   # set checkpoint directory
+    return ssc
+
+# Get StreamingContext from checkpoint data or create a new one
+context = StreamingContext.getOrCreate(checkpointDirectory, functionToCreateContext)
+
+# Do additional setup on context that needs to be done,
+# irrespective of whether it is being started or restarted
+context. ...
+
+# Start the context
+context.start()
+context.awaitTermination()
+{% endhighlight %}
+
+If the `checkpointDirectory` exists, then the context will be recreated from the checkpoint data.
+If the directory does not exist (i.e., running for the first time),
+then the function `functionToCreateContext` will be called to create a new
+context and set up the DStreams. See the Python example
+[recoverable_network_wordcount.py]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python/streaming/recoverable_network_wordcount.py).
+This example appends the word counts of network data into a file.
+
+You can also explicitly create a `StreamingContext` from the checkpoint data and start the
+ computation by using `StreamingContext.getOrCreate(checkpointDirectory, None)`.
+
+</div>
+</div>
+
+In addition to using `getOrCreate` one also needs to ensure that the driver process gets
+restarted automatically on failure. This can only be done by the deployment infrastructure that is
+used to run the application. This is further discussed in the
+[Deployment](#deploying-applications.html) section.
+
+Note that checkpointing of RDDs incurs the cost of saving to reliable storage.
+This may cause an increase in the processing time of those batches where RDDs get checkpointed.
+Hence, the interval of
+checkpointing needs to be set carefully. At small batch sizes (say 1 second), checkpointing every
+batch may significantly reduce operation throughput. Conversely, checkpointing too infrequently
+causes the lineage and task sizes to grow which may have detrimental effects. For stateful
+transformations that require RDD checkpointing, the default interval is a multiple of the
+batch interval that is at least 10 seconds. It can be set by using
+`dstream.checkpoint(checkpointInterval)`. Typically, a checkpoint interval of 5 - 10 times of
+sliding interval of a DStream is good setting to try.
 
 ***
 
 ## Deploying Applications
-A Spark Streaming application is deployed on a cluster in the same way as any other Spark application.
-Please refer to the [deployment guide](cluster-overview.html) for more details.
+This section discusses the steps to deploy a Spark Streaming application.
 
-Note that the applications
-that use [advanced sources](#advanced-sources) (e.g. Kafka, Flume, Twitter) are also required to package the
-extra artifact they link to, along with their dependencies, in the JAR that is used to deploy the application.
-For example, an application using `TwitterUtils` will have to include
-`spark-streaming-twitter_{{site.SCALA_BINARY_VERSION}}` and all its transitive
-dependencies in the application JAR.
+### Requirements
+{:.no_toc}
+
+To run a Spark Streaming applications, you need to have the following.
+
+- *Cluster with a cluster manager* - This is the general requirement of any Spark application,
+  and discussed in detail in the [deployment guide](cluster-overview.html).
+
+- *Package the application JAR* - You have to compile your streaming application into a JAR.
+  If you are using [`spark-submit`](submitting-applications.html) to start the
+  application, then you will not need to provide Spark and Spark Streaming in the JAR. However,
+  if your application uses [advanced sources](#advanced-sources) (e.g. Kafka, Flume, Twitter),
+  then you will have to package the extra artifact they link to, along with their dependencies,
+  in the JAR that is used to deploy the application. For example, an application using `TwitterUtils`
+  will have to include `spark-streaming-twitter_{{site.SCALA_BINARY_VERSION}}` and all its
+  transitive dependencies in the application JAR.
+
+- *Configuring sufficient memory for the executors* - Since the received data must be stored in
+  memory, the executors must be configured with sufficient memory to hold the received data. Note
+  that if you are doing 10 minute window operations, the system has to keep at least last 10 minutes
+  of data in memory. So the memory requirements for the application depends on the operations
+  used in it.
+
+- *Configuring checkpointing* - If the stream application requires it, then a directory in the
+  Hadoop API compatible fault-tolerant storage (e.g. HDFS, S3, etc.) must be configured as the
+  checkpoint directory and the streaming application written in a way that checkpoint
+  information can be used for failure recovery. See the [checkpointing](#checkpointing) section
+  for more details.
+
+- *Configuring automatic restart of the application driver* - To automatically recover from a
+  driver failure, the deployment infrastructure that is
+  used to run the streaming application must monitor the driver process and relaunch the driver
+  if it fails. Different [cluster managers](cluster-overview.html#cluster-manager-types)
+  have different tools to achieve this.
+    + *Spark Standalone* - A Spark application driver can be submitted to run within the Spark
+      Standalone cluster (see
+      [cluster deploy mode](spark-standalone.html#launching-spark-applications)), that is, the
+      application driver itself runs on one of the worker nodes. Furthermore, the
+      Standalone cluster manager can be instructed to *supervise* the driver,
+      and relaunch it if the driver fails either due to non-zero exit code,
+      or due to failure of the node running the driver. See *cluster mode* and *supervise* in the
+      [Spark Standalone guide](spark-standalone.html) for more details.
+    + *YARN* - Yarn supports a similar mechanism for automatically restarting an application.
+      Please refer to YARN documentation for more details.
+    + *Mesos* - [Marathon](https://github.com/mesosphere/marathon) has been used to achieve this
+      with Mesos.
+
+
+- *[Experimental in Spark 1.2] Configuring write ahead logs* - In Spark 1.2,
+  we have introduced a new experimental feature of write ahead logs for achieving strong
+  fault-tolerance guarantees. If enabled,  all the data received from a receiver gets written into
+  a write ahead log in the configuration checkpoint directory. This prevents data loss on driver
+  recovery, thus ensuring zero data loss (discussed in detail in the
+  [Fault-tolerance Semantics](#fault-tolerance-semantics) section). This can be enabled by setting
+  the [configuration parameter](configuration.html#spark-streaming)
+  `spark.streaming.receiver.writeAheadLog.enable` to `true`. However, these stronger semantics may
+  come at the cost of the receiving throughput of individual receivers. This can be corrected by
+  running [more receivers in parallel](#level-of-parallelism-in-data-receiving)
+  to increase aggregate throughput. Additionally, it is recommended that the replication of the
+  received data within Spark be disabled when the write ahead log is enabled as the log is already
+  stored in a replicated storage system. This can be done by setting the storage level for the
+  input stream to `StorageLevel.MEMORY_AND_DISK_SER`.
+
+### Upgrading Application Code
+{:.no_toc}
 
-If a running Spark Streaming application needs to be upgraded (with new application code), then
-there are two possible mechanism.
+If a running Spark Streaming application needs to be upgraded with new
+application code, then there are two possible mechanism.
 
 - The upgraded Spark Streaming application is started and run in parallel to the existing application.
 Once the new one (receiving the same data as the old one) has been warmed up and ready
@@ -1290,8 +1600,18 @@ for graceful shutdown options) which ensure data that have been received is comp
 processed before shutdown. Then the
 upgraded application can be started, which will start processing from the same point where the earlier
 application left off. Note that this can be done only with input sources that support source-side buffering
-(like Kafka, and Flume) as data needs to be buffered while the previous application down and
-the upgraded application is not yet up.
+(like Kafka, and Flume) as data needs to be buffered while the previous application was down and
+the upgraded application is not yet up. And restarting from earlier checkpoint
+information of pre-upgrade code cannot be done. The checkpoint information essentially
+contains serialized Scala/Java/Python objects and trying to deserialize objects with new,
+modified classes may lead to errors. In this case, either start the upgraded app with a different
+checkpoint directory, or delete the previous checkpoint directory.
+
+### Other Considerations
+{:.no_toc}
+If the data is being received by the receivers faster than what can be processed,
+you can limit the rate by setting the [configuration parameter](configuration.html#spark-streaming)
+`spark.streaming.receiver.maxRate`.
 
 ***
 
@@ -1304,11 +1624,14 @@ receivers are active, number of records received, receiver error, etc.)
 and completed batches (batch processing times, queueing delays, etc.). This can be used to
 monitor the progress of the streaming application.
 
-The following two metrics in web UI is particularly important -
-*Processing Time* and *Scheduling Delay* (under *Batch Processing Statistics*). The first is the
-time to process each batch of data, and the second is the time a batch waits in a queue
-for the processing of previous batches to finish. If the batch processing time is consistently more
-than the batch interval and/or the queueing delay keeps increasing, then it indicates the system is
+The following two metrics in web UI are particularly important:
+
+- *Processing Time* - The time to process each batch of data.
+- *Scheduling Delay* - the time a batch waits in a queue for the processing of previous batches
+  to finish.
+
+If the batch processing time is consistently more than the batch interval and/or the queueing
+delay keeps increasing, then it indicates the system is
 not able to process the batches as fast they are being generated and falling behind.
 In that case, consider
 [reducing](#reducing-the-processing-time-of-each-batch) the batch processing time.
@@ -1372,13 +1695,18 @@ unifiedStream.print();
 </div>
 </div>
 
-
-Another parameter that should be considered is the receiver's blocking interval. For most receivers,
-the received data is coalesced together into large blocks of data before storing inside Spark's memory.
-The number of blocks in each batch determines the number of tasks that will be used to process those
-the received data in a map-like transformation. This blocking interval is determined by the
-[configuration parameter](configuration.html) `spark.streaming.blockInterval` and the default value
-is 200 milliseconds.
+Another parameter that should be considered is the receiver's blocking interval,
+which is determined by the [configuration parameter](configuration.html#spark-streaming)
+`spark.streaming.blockInterval`. For most receivers, the received data is coalesced together into
+blocks of data before storing inside Spark's memory. The number of blocks in each batch
+determines the number of tasks that will be used to process those
+the received data in a map-like transformation. The number of tasks per receiver per batch will be
+approximately (batch interval / block interval). For example, block interval of 200 ms will
+create 10 tasks per 2 second batches. Too low the number of tasks (that is, less than the number
+of cores per machine), then it will be inefficient as all available cores will not be used to
+process the data. To increase the number of tasks for a given batch interval, reduce the
+block interval. However, the recommended minimum value of block interval is about 50 ms,
+below which the task launching overheads may be a problem.
 
 An alternative to receiving data with multiple input streams / receivers is to explicitly repartition
 the input data stream (using `inputStream.repartition(<number of partitions>)`).
@@ -1389,12 +1717,12 @@ before further processing.
 {:.no_toc}
 Cluster resources can be under-utilized if the number of parallel tasks used in any stage of the
 computation is not high enough. For example, for distributed reduce operations like `reduceByKey`
-and `reduceByKeyAndWindow`, the default number of parallel tasks is decided by the [config property]
-(configuration.html#spark-properties) `spark.default.parallelism`. You can pass the level of
-parallelism as an argument (see [`PairDStreamFunctions`]
-(api/scala/index.html#org.apache.spark.streaming.dstream.PairDStreamFunctions)
-documentation), or set the [config property](configuration.html#spark-properties)
-`spark.default.parallelism` to change the default.
+and `reduceByKeyAndWindow`, the default number of parallel tasks is controlled by
+the`spark.default.parallelism` [configuration property](configuration.html#spark-properties). You
+can pass the level of parallelism as an argument (see
+[`PairDStreamFunctions`](api/scala/index.html#org.apache.spark.streaming.dstream.PairDStreamFunctions)
+documentation), or set the `spark.default.parallelism`
+[configuration property](configuration.html#spark-properties) to change the default.
 
 ### Data Serialization
 {:.no_toc}
@@ -1489,294 +1817,120 @@ consistent batch processing times.
 ***************************************************************************************************
 ***************************************************************************************************
 
-# Fault-tolerance Properties
-In this section, we are going to discuss the behavior of Spark Streaming application in the event
-of a node failure. To understand this, let us remember the basic fault-tolerance properties of
+# Fault-tolerance Semantics
+In this section, we will discuss the behavior of Spark Streaming applications in the event
+of node failures. To understand this, let us remember the basic fault-tolerance semantics of
 Spark's RDDs.
 
- 1. An RDD is an immutable, deterministically re-computable, distributed dataset. Each RDD
- remembers the lineage of deterministic operations that were used on a fault-tolerant input
- dataset to create it.
- 1. If any partition of an RDD is lost due to a worker node failure, then that partition can be
- re-computed from the original fault-tolerant dataset using the lineage of operations.
-
-Since all data transformations in Spark Streaming are based on RDD operations, as long as the input
-dataset is present, all intermediate data can recomputed. Keeping these properties in mind, we are
-going to discuss the failure semantics in more detail.
-
-## Failure of a Worker Node
-There are two failure behaviors based on which input sources are used.
-
-1. _Using HDFS files as input source_ - Since the data is reliably stored on HDFS, all data can
-re-computed and therefore no data will be lost due to any failure.
-1. _Using any input source that receives data through a network_ - For network-based data sources
-like Kafka and Flume, the received input data is replicated in memory between nodes of the cluster
-(default replication factor is 2). So if a worker node fails, then the system can recompute the
-lost from the the left over copy of the input data. However, if the worker node where a network
-receiver was running fails, then a tiny bit of data may be lost, that is, the data received by
-the system but not yet replicated to other node(s). The receiver will be started on a different
-node and it will continue to receive data.
+1. An RDD is an immutable, deterministically re-computable, distributed dataset. Each RDD
+remembers the lineage of deterministic operations that were used on a fault-tolerant input
+dataset to create it.
+1. If any partition of an RDD is lost due to a worker node failure, then that partition can be
+re-computed from the original fault-tolerant dataset using the lineage of operations.
+1. Assuming that all of the RDD transformations are deterministic, the data in the final transformed
+   RDD will always be the same irrespective of failures in the Spark cluster.
+
+Spark operates on data on fault-tolerant file systems like HDFS or S3. Hence,
+all of the RDDs generated from the fault-tolerant data are also fault-tolerant. However, this is not
+the case for Spark Streaming as the data in most cases is received over the network (except when
+`fileStream` is used). To achieve the same fault-tolerance properties for all of the generated RDDs,
+the received data is replicated among multiple Spark executors in worker nodes in the cluster
+(default replication factor is 2). This leads to two kinds of data in the
+system that needs to recovered in the event of failures:
+
+1. *Data received and replicated* - This data survives failure of a single worker node as a copy
+  of it exists on one of the nodes.
+1. *Data received but buffered for replication* - Since this is not replicated,
+   the only way to recover that data is to get it again from the source.
+
+Furthermore, there are two kinds of failures that we should be concerned about:
+
+1. *Failure of a Worker Node* - Any of the worker nodes running executors can fail,
+   and all in-memory data on those nodes will be lost. If any receivers were running on failed
+   nodes, then their buffered data will be lost.
+1. *Failure of the Driver Node* - If the driver node running the Spark Streaming application
+   fails, then obviously the SparkContext is lost, and all executors with their in-memory
+   data are lost.
+
+With this basic knowledge, let us understand the fault-tolerance semantics of Spark Streaming.
+
+## Semantics with files as input source
+{:.no_toc}
+If all of the input data is already present in a fault-tolerant files system like
+HDFS, Spark Streaming can always recover from any failure and process all the data. This gives
+*exactly-once* semantics, that all the data will be processed exactly once no matter what fails.
+
+## Semantics with input sources based on receivers
+{:.no_toc}
+For input sources based on receivers, the fault-tolerance semantics depend on both the failure
+scenario and the type of receiver.
+As we discussed [earlier](#receiver-reliability), there are two types of receivers:
+
+1. *Reliable Receiver* - These receivers acknowledge reliable sources only after ensuring that
+  the received data has been replicated. If such a receiver fails,
+  the buffered (unreplicated) data does not get acknowledged to the source. If the receiver is
+  restarted, the source will resend the data, and therefore no data will be lost due to the failure.
+1. *Unreliable Receiver* - Such receivers can lose data when they fail due to worker
+  or driver failures.
+
+Depending on what type of receivers are used we achieve the following semantics.
+If a worker node fails, then there is no data loss with reliable receivers. With unreliable
+receivers, data received but not replicated can get lost. If the driver node fails,
+then besides these losses, all the past data that was received and replicated in memory will be
+lost. This will affect the results of the stateful transformations.
+
+To avoid this loss of past received data, Spark 1.2 introduces an experimental feature of _write
+ahead logs_ which saves the received data to fault-tolerant storage. With the [write ahead logs
+enabled](#deploying-applications) and reliable receivers, there is zero data loss and
+exactly-once semantics.
+
+The following table summarizes the semantics under failures:
+
+<table class="table">
+  <tr>
+    <th style="width:30%">Deployment Scenario</th>
+    <th>Worker Failure</th>
+    <th>Driver Failure</th>
+  </tr>
+  <tr>
+    <td>
+      <b>Spark 1.1 or earlier, or</b><br/>
+      <b>Spark 1.2 without write ahead log</b>
+    </td>
+    <td>
+      Buffered data lost with unreliable receivers<br/>
+      Zero data loss with reliable receivers and files<br/>
+    </td>
+    <td>
+      Buffered data lost with unreliable receivers<br/>
+      Past data lost with all receivers<br/>
+      Zero data loss with files
+      </td>
+  </tr>
+  <tr>
+    <td><b>Spark 1.2 with write ahead log</b></td>
+    <td>Zero data loss with reliable receivers and files</td>
+    <td>Zero data loss with reliable receivers and files</td>
+  </tr>
+  <tr>
+    <td></td>
+    <td></td>
+    <td></td>
+  </tr>
+</table>
 
+## Semantics of output operations
+{:.no_toc}
 Since all data is modeled as RDDs with their lineage of deterministic operations, any recomputation
  always leads to the same result. As a result, all DStream transformations are guaranteed to have
  _exactly-once_ semantics. That is, the final transformed result will be same even if there were
  was a worker node failure. However, output operations (like `foreachRDD`) have _at-least once_
  semantics, that is, the transformed data may get written to an external entity more than once in
  the event of a worker failure. While this is acceptable for saving to HDFS using the
- `saveAs*Files` operations (as the file will simply get over-written by the same data),
+ `saveAs***Files` operations (as the file will simply get over-written by the same data),
  additional transactions-like mechanisms may be necessary to achieve exactly-once semantics
  for output operations.
 
-## Failure of the Driver Node
-For a streaming application to operate 24/7, Spark Streaming allows a streaming computation
-to be resumed even after the failure of the driver node. Spark Streaming periodically writes the
-metadata information of the DStreams setup through the `StreamingContext` to a
-HDFS directory (can be any Hadoop-compatible filesystem). This periodic
-*checkpointing* can be enabled by setting the checkpoint
-directory using `ssc.checkpoint(<checkpoint directory>)` as described
-[earlier](#rdd-checkpointing). On failure of the driver node,
-the lost `StreamingContext` can be recovered from this information, and restarted.
-
-To allow a Spark Streaming program to be recoverable, it must be written in a way such that
-it has the following behavior:
-
-1.  When the program is being started for the first time, it will create a new StreamingContext,
-    set up all the streams and then call start().
-1.  When the program is being restarted after failure, it will re-create a StreamingContext
-    from the checkpoint data in the checkpoint directory.
-
-<div class="codetabs">
-<div data-lang="scala" markdown="1">
-
-This behavior is made simple by using `StreamingContext.getOrCreate`. This is used as follows.
-
-{% highlight scala %}
-// Function to create and setup a new StreamingContext
-def functionToCreateContext(): StreamingContext = {
-    val ssc = new StreamingContext(...)   // new context
-    val lines = ssc.socketTextStream(...) // create DStreams
-    ...
-    ssc.checkpoint(checkpointDirectory)   // set checkpoint directory
-    ssc
-}
-
-// Get StreamingContext from checkpoint data or create a new one
-val context = StreamingContext.getOrCreate(checkpointDirectory, functionToCreateContext _)
-
-// Do additional setup on context that needs to be done,
-// irrespective of whether it is being started or restarted
-context. ...
-
-// Start the context
-context.start()
-context.awaitTermination()
-{% endhighlight %}
-
-If the `checkpointDirectory` exists, then the context will be recreated from the checkpoint data.
-If the directory does not exist (i.e., running for the first time),
-then the function `functionToCreateContext` will be called to create a new
-context and set up the DStreams. See the Scala example
-[RecoverableNetworkWordCount]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala).
-This example appends the word counts of network data into a file.
-
-You can also explicitly create a `StreamingContext` from the checkpoint data and start the
- computation by using `new StreamingContext(checkpointDirectory)`.
-
-</div>
-<div data-lang="java" markdown="1">
-
-This behavior is made simple by using `JavaStreamingContext.getOrCreate`. This is used as follows.
-
-{% highlight java %}
-// Create a factory object that can create a and setup a new JavaStreamingContext
-JavaStreamingContextFactory contextFactory = new JavaStreamingContextFactory() {
-  @Override public JavaStreamingContext create() {
-    JavaStreamingContext jssc = new JavaStreamingContext(...);  // new context
-    JavaDStream<String> lines = jssc.socketTextStream(...);     // create DStreams
-    ...
-    jssc.checkpoint(checkpointDirectory);                       // set checkpoint directory
-    return jssc;
-  }
-};
-
-// Get JavaStreamingContext from checkpoint data or create a new one
-JavaStreamingContext context = JavaStreamingContext.getOrCreate(checkpointDirectory, contextFactory);
-
-// Do additional setup on context that needs to be done,
-// irrespective of whether it is being started or restarted
-context. ...
-
-// Start the context
-context.start();
-context.awaitTermination();
-{% endhighlight %}
-
-If the `checkpointDirectory` exists, then the context will be recreated from the checkpoint data.
-If the directory does not exist (i.e., running for the first time),
-then the function `contextFactory` will be called to create a new
-context and set up the DStreams.
-
-You can also explicitly create a `JavaStreamingContext` from the checkpoint data and start
-the computation by using `new JavaStreamingContext(checkpointDirectory)`.
-
-</div>
-<div data-lang="python" markdown="1">
-
-This behavior is made simple by using `StreamingContext.getOrCreate`. This is used as follows.
-
-{% highlight python %}
-# Function to create and setup a new StreamingContext
-def functionToCreateContext():
-    sc = SparkContext(...)   # new context
-    ssc = new StreamingContext(...)  
-    lines = ssc.socketTextStream(...) # create DStreams
-    ...
-    ssc.checkpoint(checkpointDirectory)   # set checkpoint directory
-    return ssc
-
-# Get StreamingContext from checkpoint data or create a new one
-context = StreamingContext.getOrCreate(checkpointDirectory, functionToCreateContext)
-
-# Do additional setup on context that needs to be done,
-# irrespective of whether it is being started or restarted
-context. ...
-
-# Start the context
-context.start()
-context.awaitTermination()
-{% endhighlight %}
-
-If the `checkpointDirectory` exists, then the context will be recreated from the checkpoint data.
-If the directory does not exist (i.e., running for the first time),
-then the function `functionToCreateContext` will be called to create a new
-context and set up the DStreams. See the Python example
-[recoverable_network_wordcount.py]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python/streaming/recoverable_network_wordcount.py).
-This example appends the word counts of network data into a file.
-
-You can also explicitly create a `StreamingContext` from the checkpoint data and start the
- computation by using `StreamingContext.getOrCreate(checkpointDirectory, None)`.
-
-</div>
-
-</div>
-
-**Note**: If Spark Streaming and/or the Spark Streaming program is recompiled,
-you *must* create a new `StreamingContext` or `JavaStreamingContext`,
-not recreate from checkpoint data. This is because trying to load a
-context from checkpoint data may fail if the data was generated before recompilation of the
-classes. So, if you are using `getOrCreate`, then make sure that the checkpoint directory is
-explicitly deleted every time recompiled code needs to be launched.
-
-This failure recovery can be done automatically using Spark's
-[standalone cluster mode](spark-standalone.html), which allows the driver of any Spark application
-to be launched within the cluster and be restarted on failure (see
-[supervise mode](spark-standalone.html#launching-applications-inside-the-cluster)). This can be
-tested locally by launching the above example using the supervise mode in a
-local standalone cluster and killing the java process running the driver (will be shown as
-*DriverWrapper* when `jps` is run to show all active Java processes). The driver should be
-automatically restarted, and the word counts will cont
-
-For other deployment environments like Mesos and Yarn, you have to restart the driver through other
-mechanisms.
-
-#### Recovery Semantics
-{:.no_toc}
-
-There are two different failure behaviors based on which input sources are used.
-
-1. _Using HDFS files as input source_ - Since the data is reliably stored on HDFS, all data can
-re-computed and therefore no data will be lost due to any failure.
-1. _Using any input source that receives data through a network_ - The received input data is
-replicated in memory to multiple nodes. Since all the data in the Spark worker's memory is lost
-when the Spark driver fails, the past input data will not be accessible and driver recovers.
-Hence, if stateful and window-based operations are used
-(like `updateStateByKey`, `window`, `countByValueAndWindow`, etc.), then the intermediate state
-will not be recovered completely.
-
-In future releases, we will support full recoverability for all input sources. Note that for
-non-stateful transformations like `map`, `count`, and `reduceByKey`, with _all_ input streams,
-the system, upon restarting, will continue to receive and process new data.
-
-To better understand the behavior of the system under driver failure with a HDFS source, let's
-consider what will happen with a file input stream. Specifically, in the case of the file input
-stream, it will correctly identify new files that were created while the driver was down and
-process them in the same way as it would have if the driver had not failed. To explain further
-in the case of file input stream, we shall use an example. Let's say, files are being generated
-every second, and a Spark Streaming program reads every new file and output the number of lines
-in the file. This is what the sequence of outputs would be with and without a driver failure.
-
-<table class="table">
-    <!-- Results table headers -->
-    <tr>
-      <th> Time </th>
-      <th> Number of lines in input file </th>
-      <th> Output without driver failure </th>
-      <th> Output with driver failure </th>
-    </tr>
-    <tr>
-      <td>1</td>
-      <td>10</td>
-      <td>10</td>
-      <td>10</td>
-    </tr>
-    <tr>
-      <td>2</td>
-      <td>20</td>
-      <td>20</td>
-      <td>20</td>
-    </tr>
-    <tr>
-      <td>3</td>
-      <td>30</td>
-      <td>30</td>
-      <td>30</td>
-    </tr>
-    <tr>
-      <td>4</td>
-      <td>40</td>
-      <td>40</td>
-      <td>[DRIVER FAILS]<br />no output</td>
-    </tr>
-    <tr>
-      <td>5</td>
-      <td>50</td>
-      <td>50</td>
-      <td>no output</td>
-    </tr>
-    <tr>
-      <td>6</td>
-      <td>60</td>
-      <td>60</td>
-      <td>no output</td>
-    </tr>
-    <tr>
-      <td>7</td>
-      <td>70</td>
-      <td>70</td>
-      <td>[DRIVER RECOVERS]<br />40, 50, 60, 70</td>
-    </tr>
-    <tr>
-      <td>8</td>
-      <td>80</td>
-      <td>80</td>
-      <td>80</td>
-    </tr>
-    <tr>
-      <td>9</td>
-      <td>90</td>
-      <td>90</td>
-      <td>90</td>
-    </tr>
-    <tr>
-      <td>10</td>
-      <td>100</td>
-      <td>100</td>
-      <td>100</td>
-    </tr>
-</table>
-
-If the driver had crashed in the middle of the processing of time 3, then it will process time 3
-and output 30 after recovery.
 
 ***************************************************************************************************
 ***************************************************************************************************
@@ -1860,5 +2014,5 @@ package and renamed for better clarity.
 
 * More examples in [Scala]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples/streaming)
   and [Java]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/java/org/apache/spark/examples/streaming)
-  and [Python] ({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python/streaming)
+  and [Python]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python/streaming)
 * [Paper](http://www.eecs.berkeley.edu/Pubs/TechRpts/2012/EECS-2012-259.pdf) and [video](http://youtu.be/g171ndOHgJ0) describing Spark Streaming.
diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md
index 45b70b1a5457a..14a87f8436984 100644
--- a/docs/submitting-applications.md
+++ b/docs/submitting-applications.md
@@ -10,7 +10,7 @@ through a uniform interface so you don't have to configure your application spec
 # Bundling Your Application's Dependencies
 If your code depends on other projects, you will need to package them alongside
 your application in order to distribute the code to a Spark cluster. To do this,
-to create an assembly jar (or "uber" jar) containing your code and its dependencies. Both
+create an assembly jar (or "uber" jar) containing your code and its dependencies. Both
 [sbt](https://github.com/sbt/sbt-assembly) and
 [Maven](http://maven.apache.org/plugins/maven-shade-plugin/)
 have assembly plugins. When creating assembly jars, list Spark and Hadoop
@@ -43,28 +43,33 @@ Some of the commonly used options are:
 
 * `--class`: The entry point for your application (e.g. `org.apache.spark.examples.SparkPi`)
 * `--master`: The [master URL](#master-urls) for the cluster (e.g. `spark://23.195.26.187:7077`)
-* `--deploy-mode`: Whether to deploy your driver on the worker nodes (`cluster`) or locally as an external client (`client`) (default: `client`)*
+* `--deploy-mode`: Whether to deploy your driver on the worker nodes (`cluster`) or locally as an external client (`client`) (default: `client`) <b> &#8224; </b>
 * `--conf`: Arbitrary Spark configuration property in key=value format. For values that contain spaces wrap "key=value" in quotes (as shown).
 * `application-jar`: Path to a bundled jar including your application and all dependencies. The URL must be globally visible inside of your cluster, for instance, an `hdfs://` path or a `file://` path that is present on all nodes.
 * `application-arguments`: Arguments passed to the main method of your main class, if any
 
-*A common deployment strategy is to submit your application from a gateway machine that is
+<b>&#8224;</b> A common deployment strategy is to submit your application from a gateway machine
+that is
 physically co-located with your worker machines (e.g. Master node in a standalone EC2 cluster).
 In this setup, `client` mode is appropriate. In `client` mode, the driver is launched directly
-within the client `spark-submit` process, with the input and output of the application attached
-to the console. Thus, this mode is especially suitable for applications that involve the REPL
-(e.g. Spark shell).
+within the `spark-submit` process which acts as a *client* to the cluster. The input and
+output of the application is attached to the console. Thus, this mode is especially suitable
+for applications that involve the REPL (e.g. Spark shell).
 
 Alternatively, if your application is submitted from a machine far from the worker machines (e.g.
 locally on your laptop), it is common to use `cluster` mode to minimize network latency between
-the drivers and the executors. Note that `cluster` mode is currently not supported for standalone
-clusters, Mesos clusters, or python applications.
+the drivers and the executors. Note that `cluster` mode is currently not supported for
+Mesos clusters or Python applications.
 
 For Python applications, simply pass a `.py` file in the place of `<application-jar>` instead of a JAR,
 and add Python `.zip`, `.egg` or `.py` files to the search path with `--py-files`.
 
-To enumerate all options available to `spark-submit` run it with `--help`. Here are a few
-examples of common options:
+There are a few options available that are specific to the
+[cluster manager](#cluster-overview.html#cluster-manager-types) that is being used.
+For example, with a [Spark Standalone](#spark-standalone) cluster with `cluster` deploy mode,
+you can also specify `--supervise` to make sure that the driver is automatically restarted if it
+fails with non-zero exit code. To enumerate all such options available to `spark-submit`,
+run it with `--help`. Here are a few examples of common options:
 
 {% highlight bash %}
 # Run application locally on 8 cores
@@ -74,7 +79,7 @@ examples of common options:
   /path/to/examples.jar \
   100
 
-# Run on a Spark standalone cluster
+# Run on a Spark Standalone cluster in client deploy mode
 ./bin/spark-submit \
   --class org.apache.spark.examples.SparkPi \
   --master spark://207.184.161.138:7077 \
@@ -83,6 +88,17 @@ examples of common options:
   /path/to/examples.jar \
   1000
 
+# Run on a Spark Standalone cluster in cluster deploy mode with supervise
+./bin/spark-submit \
+  --class org.apache.spark.examples.SparkPi \
+  --master spark://207.184.161.138:7077 \
+  --deploy-mode cluster
+  --supervise
+  --executor-memory 20G \
+  --total-executor-cores 100 \
+  /path/to/examples.jar \
+  1000
+
 # Run on a YARN cluster
 export HADOOP_CONF_DIR=XXX
 ./bin/spark-submit \
@@ -93,7 +109,7 @@ export HADOOP_CONF_DIR=XXX
   /path/to/examples.jar \
   1000
 
-# Run a Python application on a cluster
+# Run a Python application on a Spark Standalone cluster
 ./bin/spark-submit \
   --master spark://207.184.161.138:7077 \
   examples/src/main/python/pi.py \
@@ -158,10 +174,10 @@ This can use up a significant amount of space over time and will need to be clea
 is handled automatically, and with Spark standalone, automatic cleanup can be configured with the
 `spark.worker.cleanup.appDataTtl` property.
 
-For python, the equivalent `--py-files` option can be used to distribute `.egg`, `.zip` and `.py` libraries
+For Python, the equivalent `--py-files` option can be used to distribute `.egg`, `.zip` and `.py` libraries
 to executors.
 
 # More Information
 
-Once you have deployed your application, the [cluster mode overview](cluster-overview.html) describes 
+Once you have deployed your application, the [cluster mode overview](cluster-overview.html) describes
 the components involved in distributed execution, and how to monitor and debug applications.
diff --git a/docs/tuning.md b/docs/tuning.md
index 9b5c9adac6a4f..efaac9d3d405f 100644
--- a/docs/tuning.md
+++ b/docs/tuning.md
@@ -51,7 +51,7 @@ To register your own custom classes with Kryo, use the `registerKryoClasses` met
 
 {% highlight scala %}
 val conf = new SparkConf().setMaster(...).setAppName(...)
-conf.registerKryoClasses(Seq(classOf[MyClass1], classOf[MyClass2]))
+conf.registerKryoClasses(Array(classOf[MyClass1], classOf[MyClass2]))
 val sc = new SparkContext(conf)
 {% endhighlight %}
 
@@ -111,7 +111,7 @@ pointer-based data structures and wrapper objects. There are several ways to do
 3. Consider using numeric IDs or enumeration objects instead of strings for keys.
 4. If you have less than 32 GB of RAM, set the JVM flag `-XX:+UseCompressedOops` to make pointers be
    four bytes instead of eight. You can add these options in
-   [`spark-env.sh`](configuration.html#environment-variables-in-spark-envsh).
+   [`spark-env.sh`](configuration.html#environment-variables).
 
 ## Serialized RDD Storage
 
@@ -143,8 +143,7 @@ the space allocated to the RDD cache to mitigate this.
 **Measuring the Impact of GC**
 
 The first step in GC tuning is to collect statistics on how frequently garbage collection occurs and the amount of
-time spent GC. This can be done by adding `-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps` to your
-`SPARK_JAVA_OPTS` environment variable. Next time your Spark job is run, you will see messages printed in the worker's logs
+time spent GC. This can be done by adding `-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps` to the Java options.  (See the [configuration guide](configuration.html#Dynamically-Loading-Spark-Properties) for info on passing Java options to Spark jobs.)  Next time your Spark job is run, you will see messages printed in the worker's logs
 each time a garbage collection occurs. Note these logs will be on your cluster's worker nodes (in the `stdout` files in
 their work directories), *not* on your driver program.
 
@@ -155,7 +154,7 @@ By default, Spark uses 60% of the configured executor memory (`spark.executor.me
 cache RDDs. This means that 40% of memory is available for any objects created during task execution.
 
 In case your tasks slow down and you find that your JVM is garbage-collecting frequently or running out of
-memory, lowering this value will help reduce the memory consumption. To change this to say 50%, you can call
+memory, lowering this value will help reduce the memory consumption. To change this to, say, 50%, you can call
 `conf.set("spark.storage.memoryFraction", "0.5")` on your SparkConf. Combined with the use of serialized caching,
 using a smaller cache should be sufficient to mitigate most of the garbage collection problems.
 In case you are interested in further tuning the Java GC, continue reading below.
@@ -191,7 +190,7 @@ temporary objects created during task execution. Some steps which may be useful
 
 * As an example, if your task is reading data from HDFS, the amount of memory used by the task can be estimated using
   the size of the data block read from HDFS. Note that the size of a decompressed block is often 2 or 3 times the
-  size of the block. So if we wish to have 3 or 4 tasks worth of working space, and the HDFS block size is 64 MB,
+  size of the block. So if we wish to have 3 or 4 tasks' worth of working space, and the HDFS block size is 64 MB,
   we can estimate size of Eden to be `4*3*64MB`.
 
 * Monitor how the frequency and time taken by garbage collection changes with the new settings.
@@ -220,7 +219,7 @@ working set of one of your tasks, such as one of the reduce tasks in `groupByKey
 Spark's shuffle operations (`sortByKey`, `groupByKey`, `reduceByKey`, `join`, etc) build a hash table
 within each task to perform the grouping, which can often be large. The simplest fix here is to
 *increase the level of parallelism*, so that each task's input set is smaller. Spark can efficiently
-support tasks as short as 200 ms, because it reuses one worker JVMs across all tasks and it has
+support tasks as short as 200 ms, because it reuses one executor JVM across many tasks and it has
 a low task launching cost, so you can safely increase the level of parallelism to more than the
 number of cores in your clusters.
 
@@ -234,6 +233,39 @@ Spark prints the serialized size of each task on the master, so you can look at
 decide whether your tasks are too large; in general tasks larger than about 20 KB are probably
 worth optimizing.
 
+## Data Locality
+
+Data locality can have a major impact on the performance of Spark jobs.  If data and the code that
+operates on it are together than computation tends to be fast.  But if code and data are separated,
+one must move to the other.  Typically it is faster to ship serialized code from place to place than
+a chunk of data because code size is much smaller than data.  Spark builds its scheduling around
+this general principle of data locality.
+
+Data locality is how close data is to the code processing it.  There are several levels of
+locality based on the data's current location.  In order from closest to farthest:
+
+- `PROCESS_LOCAL` data is in the same JVM as the running code.  This is the best locality
+  possible
+- `NODE_LOCAL` data is on the same node.  Examples might be in HDFS on the same node, or in
+  another executor on the same node.  This is a little slower than `PROCESS_LOCAL` because the data
+  has to travel between processes
+- `NO_PREF` data is accessed equally quickly from anywhere and has no locality preference
+- `RACK_LOCAL` data is on the same rack of servers.  Data is on a different server on the same rack
+  so needs to be sent over the network, typically through a single switch
+- `ANY` data is elsewhere on the network and not in the same rack
+
+Spark prefers to schedule all tasks at the best locality level, but this is not always possible.  In
+situations where there is no unprocessed data on any idle executor, Spark switches to lower locality
+levels. There are two options: a) wait until a busy CPU frees up to start a task on data on the same
+server, or b) immediately start a new task in a farther away place that requires moving data there.
+
+What Spark typically does is wait a bit in the hopes that a busy CPU frees up.  Once that timeout
+expires, it starts moving the data from far away to the free CPU.  The wait timeout for fallback
+between each level can be configured individually or all together in one parameter; see the
+`spark.locality` parameters on the [configuration page](configuration.html#scheduling) for details.
+You should increase these settings if your tasks are long and see poor locality, but the default
+usually works well.
+
 # Summary
 
 This has been a short guide to point out the main concerns you should know about when tuning a
diff --git a/ec2/spark-ec2 b/ec2/spark-ec2
index 31f9771223e51..26e7d22655694 100755
--- a/ec2/spark-ec2
+++ b/ec2/spark-ec2
@@ -18,5 +18,8 @@
 # limitations under the License.
 #
 
-cd "`dirname $0`"
-PYTHONPATH="./third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH" python ./spark_ec2.py "$@"
+# Preserve the user's CWD so that relative paths are passed correctly to 
+#+ the underlying Python script.
+SPARK_EC2_DIR="$(dirname "$0")"
+
+python -Wdefault "${SPARK_EC2_DIR}/spark_ec2.py" "$@"
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 0d6b82b4944f3..abab209a05ba0 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -21,6 +21,7 @@
 
 from __future__ import with_statement
 
+import hashlib
 import logging
 import os
 import pipes
@@ -29,20 +30,70 @@
 import string
 import subprocess
 import sys
+import tarfile
 import tempfile
 import time
 import urllib2
 import warnings
+from datetime import datetime
 from optparse import OptionParser
 from sys import stderr
-import boto
-from boto.ec2.blockdevicemapping import BlockDeviceMapping, BlockDeviceType, EBSBlockDeviceType
-from boto import ec2
 
-DEFAULT_SPARK_VERSION = "1.1.0"
+VALID_SPARK_VERSIONS = set([
+    "0.7.3",
+    "0.8.0",
+    "0.8.1",
+    "0.9.0",
+    "0.9.1",
+    "0.9.2",
+    "1.0.0",
+    "1.0.1",
+    "1.0.2",
+    "1.1.0",
+    "1.1.1",
+    "1.2.0",
+])
+
+DEFAULT_SPARK_VERSION = "1.2.0"
+DEFAULT_SPARK_GITHUB_REPO = "https://github.com/apache/spark"
+SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__))
+MESOS_SPARK_EC2_BRANCH = "branch-1.3"
 
 # A URL prefix from which to fetch AMI information
-AMI_PREFIX = "https://raw.github.com/mesos/spark-ec2/v2/ami-list"
+AMI_PREFIX = "https://raw.github.com/mesos/spark-ec2/{b}/ami-list".format(b=MESOS_SPARK_EC2_BRANCH)
+
+
+def setup_boto():
+    # Download Boto if it's not already present in the SPARK_EC2_DIR/lib folder:
+    version = "boto-2.34.0"
+    md5 = "5556223d2d0cc4d06dd4829e671dcecd"
+    url = "https://pypi.python.org/packages/source/b/boto/%s.tar.gz" % version
+    lib_dir = os.path.join(SPARK_EC2_DIR, "lib")
+    if not os.path.exists(lib_dir):
+        os.mkdir(lib_dir)
+    boto_lib_dir = os.path.join(lib_dir, version)
+    if not os.path.isdir(boto_lib_dir):
+        tgz_file_path = os.path.join(lib_dir, "%s.tar.gz" % version)
+        print "Downloading Boto from PyPi"
+        download_stream = urllib2.urlopen(url)
+        with open(tgz_file_path, "wb") as tgz_file:
+            tgz_file.write(download_stream.read())
+        with open(tgz_file_path) as tar:
+            if hashlib.md5(tar.read()).hexdigest() != md5:
+                print >> stderr, "ERROR: Got wrong md5sum for Boto"
+                sys.exit(1)
+        tar = tarfile.open(tgz_file_path)
+        tar.extractall(path=lib_dir)
+        tar.close()
+        os.remove(tgz_file_path)
+        print "Finished downloading Boto"
+    sys.path.insert(0, boto_lib_dir)
+
+
+setup_boto()
+import boto
+from boto.ec2.blockdevicemapping import BlockDeviceMapping, BlockDeviceType, EBSBlockDeviceType
+from boto import ec2
 
 
 class UsageError(Exception):
@@ -84,15 +135,15 @@ def parse_args():
         "-z", "--zone", default="",
         help="Availability zone to launch instances in, or 'all' to spread " +
              "slaves across multiple (an additional $0.01/Gb for bandwidth" +
-             "between zones applies)")
+             "between zones applies) (default: a single zone chosen at random)")
     parser.add_option("-a", "--ami", help="Amazon Machine Image ID to use")
     parser.add_option(
         "-v", "--spark-version", default=DEFAULT_SPARK_VERSION,
         help="Version of Spark to use: 'X.Y.Z' or a specific git hash (default: %default)")
     parser.add_option(
         "--spark-git-repo",
-        default="https://github.com/apache/spark",
-        help="Github repo from which to checkout supplied commit hash")
+        default=DEFAULT_SPARK_GITHUB_REPO,
+        help="Github repo from which to checkout supplied commit hash (default: %default)")
     parser.add_option(
         "--hadoop-major-version", default="1",
         help="Major version of Hadoop (default: %default)")
@@ -117,6 +168,10 @@ def parse_args():
              "Only possible on EBS-backed AMIs. " +
              "EBS volumes are only attached if --ebs-vol-size > 0." +
              "Only support up to 8 EBS volumes.")
+    parser.add_option("--placement-group", type="string", default=None,
+                      help="Which placement group to try and launch " +
+                      "instances into. Assumes placement group is already " +
+                      "created.")
     parser.add_option(
         "--swap", metavar="SWAP", type="int", default=1024,
         help="Swap space to set up per node, in MB (default: %default)")
@@ -136,7 +191,7 @@ def parse_args():
         help="The SSH user you want to connect as (default: %default)")
     parser.add_option(
         "--delete-groups", action="store_true", default=False,
-        help="When destroying a cluster, delete the security groups that were created.")
+        help="When destroying a cluster, delete the security groups that were created")
     parser.add_option(
         "--use-existing-master", action="store_true", default=False,
         help="Launch fresh slaves, but use an existing stopped master if possible")
@@ -150,9 +205,6 @@ def parse_args():
     parser.add_option(
         "--user-data", type="string", default="",
         help="Path to a user-data file (most AMI's interpret this as an initialization script)")
-    parser.add_option(
-        "--security-group-prefix", type="string", default=None,
-        help="Use this prefix for the security group rather than the cluster name.")
     parser.add_option(
         "--authorized-address", type="string", default="0.0.0.0/0",
         help="Address to authorize on created security groups (default: %default)")
@@ -162,6 +214,10 @@ def parse_args():
     parser.add_option(
         "--copy-aws-credentials", action="store_true", default=False,
         help="Add AWS credentials to hadoop configuration to allow Spark to access S3")
+    parser.add_option(
+        "--subnet-id", default=None, help="VPC subnet to launch instances in")
+    parser.add_option(
+        "--vpc-id", default=None, help="VPC to launch instances in")
 
     (opts, args) = parser.parse_args()
     if len(args) != 2:
@@ -186,14 +242,34 @@ def parse_args():
 
 
 # Get the EC2 security group of the given name, creating it if it doesn't exist
-def get_or_make_group(conn, name):
+def get_or_make_group(conn, name, vpc_id):
     groups = conn.get_all_security_groups()
     group = [g for g in groups if g.name == name]
     if len(group) > 0:
         return group[0]
     else:
         print "Creating security group " + name
-        return conn.create_security_group(name, "Spark EC2 group")
+        return conn.create_security_group(name, "Spark EC2 group", vpc_id)
+
+
+def get_validate_spark_version(version, repo):
+    if "." in version:
+        version = version.replace("v", "")
+        if version not in VALID_SPARK_VERSIONS:
+            print >> stderr, "Don't know about Spark version: {v}".format(v=version)
+            sys.exit(1)
+        return version
+    else:
+        github_commit_url = "{repo}/commit/{commit_hash}".format(repo=repo, commit_hash=version)
+        request = urllib2.Request(github_commit_url)
+        request.get_method = lambda: 'HEAD'
+        try:
+            response = urllib2.urlopen(request)
+        except urllib2.HTTPError, e:
+            print >> stderr, "Couldn't validate Spark commit: {url}".format(url=github_commit_url)
+            print >> stderr, "Received HTTP response code of {code}.".format(code=e.code)
+            sys.exit(1)
+        return version
 
 
 # Check whether a given EC2 instance object is in a state we consider active,
@@ -203,26 +279,6 @@ def is_active(instance):
     return (instance.state in ['pending', 'running', 'stopping', 'stopped'])
 
 
-# Return correct versions of Spark and Shark, given the supplied Spark version
-def get_spark_shark_version(opts):
-    spark_shark_map = {
-        "0.7.3": "0.7.1",
-        "0.8.0": "0.8.0",
-        "0.8.1": "0.8.1",
-        "0.9.0": "0.9.0",
-        "0.9.1": "0.9.1",
-        "1.0.0": "1.0.0",
-        "1.0.1": "1.0.1",
-        "1.0.2": "1.0.2",
-        "1.1.0": "1.1.0",
-    }
-    version = opts.spark_version.replace("v", "")
-    if version not in spark_shark_map:
-        print >> stderr, "Don't know about Spark version: %s" % version
-        sys.exit(1)
-    return (version, spark_shark_map[version])
-
-
 # Attempt to resolve an appropriate AMI given the architecture and region of the request.
 # Source: http://aws.amazon.com/amazon-linux-ami/instance-type-matrix/
 # Last Updated: 2014-06-20
@@ -303,16 +359,26 @@ def launch_cluster(conn, opts, cluster_name):
             user_data_content = user_data_file.read()
 
     print "Setting up security groups..."
-    if opts.security_group_prefix is None:
-        master_group = get_or_make_group(conn, cluster_name + "-master")
-        slave_group = get_or_make_group(conn, cluster_name + "-slaves")
-    else:
-        master_group = get_or_make_group(conn, opts.security_group_prefix + "-master")
-        slave_group = get_or_make_group(conn, opts.security_group_prefix + "-slaves")
+    master_group = get_or_make_group(conn, cluster_name + "-master", opts.vpc_id)
+    slave_group = get_or_make_group(conn, cluster_name + "-slaves", opts.vpc_id)
     authorized_address = opts.authorized_address
     if master_group.rules == []:  # Group was just now created
-        master_group.authorize(src_group=master_group)
-        master_group.authorize(src_group=slave_group)
+        if opts.vpc_id is None:
+            master_group.authorize(src_group=master_group)
+            master_group.authorize(src_group=slave_group)
+        else:
+            master_group.authorize(ip_protocol='icmp', from_port=-1, to_port=-1,
+                                   src_group=master_group)
+            master_group.authorize(ip_protocol='tcp', from_port=0, to_port=65535,
+                                   src_group=master_group)
+            master_group.authorize(ip_protocol='udp', from_port=0, to_port=65535,
+                                   src_group=master_group)
+            master_group.authorize(ip_protocol='icmp', from_port=-1, to_port=-1,
+                                   src_group=slave_group)
+            master_group.authorize(ip_protocol='tcp', from_port=0, to_port=65535,
+                                   src_group=slave_group)
+            master_group.authorize(ip_protocol='udp', from_port=0, to_port=65535,
+                                   src_group=slave_group)
         master_group.authorize('tcp', 22, 22, authorized_address)
         master_group.authorize('tcp', 8080, 8081, authorized_address)
         master_group.authorize('tcp', 18080, 18080, authorized_address)
@@ -324,8 +390,22 @@ def launch_cluster(conn, opts, cluster_name):
         if opts.ganglia:
             master_group.authorize('tcp', 5080, 5080, authorized_address)
     if slave_group.rules == []:  # Group was just now created
-        slave_group.authorize(src_group=master_group)
-        slave_group.authorize(src_group=slave_group)
+        if opts.vpc_id is None:
+            slave_group.authorize(src_group=master_group)
+            slave_group.authorize(src_group=slave_group)
+        else:
+            slave_group.authorize(ip_protocol='icmp', from_port=-1, to_port=-1,
+                                  src_group=master_group)
+            slave_group.authorize(ip_protocol='tcp', from_port=0, to_port=65535,
+                                  src_group=master_group)
+            slave_group.authorize(ip_protocol='udp', from_port=0, to_port=65535,
+                                  src_group=master_group)
+            slave_group.authorize(ip_protocol='icmp', from_port=-1, to_port=-1,
+                                  src_group=slave_group)
+            slave_group.authorize(ip_protocol='tcp', from_port=0, to_port=65535,
+                                  src_group=slave_group)
+            slave_group.authorize(ip_protocol='udp', from_port=0, to_port=65535,
+                                  src_group=slave_group)
         slave_group.authorize('tcp', 22, 22, authorized_address)
         slave_group.authorize('tcp', 8080, 8081, authorized_address)
         slave_group.authorize('tcp', 50060, 50060, authorized_address)
@@ -333,22 +413,24 @@ def launch_cluster(conn, opts, cluster_name):
         slave_group.authorize('tcp', 60060, 60060, authorized_address)
         slave_group.authorize('tcp', 60075, 60075, authorized_address)
 
-    # Check if instances are already running with the cluster name
+    # Check if instances are already running in our groups
     existing_masters, existing_slaves = get_existing_cluster(conn, opts, cluster_name,
                                                              die_on_error=False)
     if existing_slaves or (existing_masters and not opts.use_existing_master):
-        print >> stderr, ("ERROR: There are already instances for name: %s " % cluster_name)
+        print >> stderr, ("ERROR: There are already instances running in " +
+                          "group %s or %s" % (master_group.name, slave_group.name))
         sys.exit(1)
 
     # Figure out Spark AMI
     if opts.ami is None:
         opts.ami = get_spark_ami(opts)
 
-    additional_groups = []
+    # we use group ids to work around https://github.com/boto/boto/issues/350
+    additional_group_ids = []
     if opts.additional_security_group:
-        additional_groups = [sg
-                             for sg in conn.get_all_security_groups()
-                             if opts.additional_security_group in (sg.name, sg.id)]
+        additional_group_ids = [sg.id
+                                for sg in conn.get_all_security_groups()
+                                if opts.additional_security_group in (sg.name, sg.id)]
     print "Launching instances..."
 
     try:
@@ -395,9 +477,11 @@ def launch_cluster(conn, opts, cluster_name):
                 placement=zone,
                 count=num_slaves_this_zone,
                 key_name=opts.key_pair,
-                security_groups=[slave_group] + additional_groups,
+                security_group_ids=[slave_group.id] + additional_group_ids,
                 instance_type=opts.instance_type,
                 block_device_map=block_map,
+                subnet_id=opts.subnet_id,
+                placement_group=opts.placement_group,
                 user_data=user_data_content)
             my_req_ids += [req.id for req in slave_reqs]
             i += 1
@@ -411,23 +495,19 @@ def launch_cluster(conn, opts, cluster_name):
                 for r in reqs:
                     id_to_req[r.id] = r
                 active_instance_ids = []
-                outstanding_request_ids = []
                 for i in my_req_ids:
-                    if i in id_to_req:
-                        if id_to_req[i].state == "active":
-                            active_instance_ids.append(id_to_req[i].instance_id)
-                        else:
-                            outstanding_request_ids.append(i)
+                    if i in id_to_req and id_to_req[i].state == "active":
+                        active_instance_ids.append(id_to_req[i].instance_id)
                 if len(active_instance_ids) == opts.slaves:
                     print "All %d slaves granted" % opts.slaves
-                    reservations = conn.get_all_instances(active_instance_ids)
+                    reservations = conn.get_all_reservations(active_instance_ids)
                     slave_nodes = []
                     for r in reservations:
                         slave_nodes += r.instances
                     break
                 else:
-                    print "%d of %d slaves granted, waiting longer for request ids including %s" % (
-                        len(active_instance_ids), opts.slaves, outstanding_request_ids[0:10])
+                    print "%d of %d slaves granted, waiting longer" % (
+                        len(active_instance_ids), opts.slaves)
         except:
             print "Canceling spot instance requests"
             conn.cancel_spot_instance_requests(my_req_ids)
@@ -448,12 +528,14 @@ def launch_cluster(conn, opts, cluster_name):
             num_slaves_this_zone = get_partition(opts.slaves, num_zones, i)
             if num_slaves_this_zone > 0:
                 slave_res = image.run(key_name=opts.key_pair,
-                                      security_groups=[slave_group] + additional_groups,
+                                      security_group_ids=[slave_group.id] + additional_group_ids,
                                       instance_type=opts.instance_type,
                                       placement=zone,
                                       min_count=num_slaves_this_zone,
                                       max_count=num_slaves_this_zone,
                                       block_device_map=block_map,
+                                      subnet_id=opts.subnet_id,
+                                      placement_group=opts.placement_group,
                                       user_data=user_data_content)
                 slave_nodes += slave_res.instances
                 print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone,
@@ -474,71 +556,49 @@ def launch_cluster(conn, opts, cluster_name):
         if opts.zone == 'all':
             opts.zone = random.choice(conn.get_all_zones()).name
         master_res = image.run(key_name=opts.key_pair,
-                               security_groups=[master_group] + additional_groups,
+                               security_group_ids=[master_group.id] + additional_group_ids,
                                instance_type=master_type,
                                placement=opts.zone,
                                min_count=1,
                                max_count=1,
                                block_device_map=block_map,
+                               subnet_id=opts.subnet_id,
+                               placement_group=opts.placement_group,
                                user_data=user_data_content)
+
         master_nodes = master_res.instances
         print "Launched master in %s, regid = %s" % (zone, master_res.id)
 
     # Give the instances descriptive names
     for master in master_nodes:
-        name = '{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id)
-        tag_instance(master, name)
-
+        master.add_tag(
+            key='Name',
+            value='{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id))
     for slave in slave_nodes:
-        name = '{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id)
-        tag_instance(slave, name)
+        slave.add_tag(
+            key='Name',
+            value='{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id))
 
     # Return all the instances
     return (master_nodes, slave_nodes)
 
 
-def tag_instance(instance, name):
-    for i in range(0, 5):
-        try:
-            instance.add_tag(key='Name', value=name)
-            break
-        except:
-            print "Failed attempt %i of 5 to tag %s" % ((i + 1), name)
-            if i == 5:
-                raise "Error - failed max attempts to add name tag"
-            time.sleep(5)
-
 # Get the EC2 instances in an existing cluster if available.
 # Returns a tuple of lists of EC2 instance objects for the masters and slaves
 
 
 def get_existing_cluster(conn, opts, cluster_name, die_on_error=True):
     print "Searching for existing cluster " + cluster_name + "..."
-    # Search all the spot instance requests, and copy any tags from the spot
-    # instance request to the cluster.
-    spot_instance_requests = conn.get_all_spot_instance_requests()
-    for req in spot_instance_requests:
-        if req.state != u'active':
-            continue
-        name = req.tags.get(u'Name', "")
-        if name.startswith(cluster_name):
-            reservations = conn.get_all_instances(instance_ids=[req.instance_id])
-            for res in reservations:
-                active = [i for i in res.instances if is_active(i)]
-                for instance in active:
-                    if instance.tags.get(u'Name') is None:
-                        tag_instance(instance, name)
-    # Now proceed to detect master and slaves instances.
-    reservations = conn.get_all_instances()
+    reservations = conn.get_all_reservations()
     master_nodes = []
     slave_nodes = []
     for res in reservations:
         active = [i for i in res.instances if is_active(i)]
         for inst in active:
-            name = inst.tags.get(u'Name', "")
-            if name.startswith(cluster_name + "-master"):
+            group_names = [g.name for g in inst.groups]
+            if (cluster_name + "-master") in group_names:
                 master_nodes.append(inst)
-            elif name.startswith(cluster_name + "-slave"):
+            elif (cluster_name + "-slaves") in group_names:
                 slave_nodes.append(inst)
     if any((master_nodes, slave_nodes)):
         print "Found %d master(s), %d slaves" % (len(master_nodes), len(slave_nodes))
@@ -546,12 +606,12 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True):
         return (master_nodes, slave_nodes)
     else:
         if master_nodes == [] and slave_nodes != []:
-            print >> sys.stderr, "ERROR: Could not find master in with name " + \
-                cluster_name + "-master"
+            print >> sys.stderr, "ERROR: Could not find master in group " + cluster_name + "-master"
         else:
             print >> sys.stderr, "ERROR: Could not find any existing cluster"
         sys.exit(1)
 
+
 # Deploy configuration files and run setup scripts on a newly launched
 # or started EC2 cluster.
 
@@ -572,7 +632,7 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
             print slave.public_dns_name
             ssh_write(slave.public_dns_name, opts, ['tar', 'x'], dot_ssh_tar)
 
-    modules = ['spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs',
+    modules = ['spark', 'ephemeral-hdfs', 'persistent-hdfs',
                'mapreduce', 'spark-standalone', 'tachyon']
 
     if opts.hadoop_major_version == "1":
@@ -583,22 +643,29 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
 
     # NOTE: We should clone the repository before running deploy_files to
     # prevent ec2-variables.sh from being overwritten
-    ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git -b v4")
+    ssh(
+        host=master,
+        opts=opts,
+        command="rm -rf spark-ec2"
+        + " && "
+        + "git clone https://github.com/mesos/spark-ec2.git -b {b}".format(b=MESOS_SPARK_EC2_BRANCH)
+    )
 
     print "Deploying files to master..."
-    deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, modules)
+    deploy_files(
+        conn=conn,
+        root_dir=SPARK_EC2_DIR + "/" + "deploy.generic",
+        opts=opts,
+        master_nodes=master_nodes,
+        slave_nodes=slave_nodes,
+        modules=modules
+    )
 
     print "Running setup on master..."
     setup_spark_cluster(master, opts)
     print "Done!"
 
 
-def setup_standalone_cluster(master, slave_nodes, opts):
-    slave_ips = '\n'.join([i.public_dns_name for i in slave_nodes])
-    ssh(master, opts, "echo \"%s\" > spark/conf/slaves" % (slave_ips))
-    ssh(master, opts, "/root/spark/sbin/start-all.sh")
-
-
 def setup_spark_cluster(master, opts):
     ssh(master, opts, "chmod u+x spark-ec2/setup.sh")
     ssh(master, opts, "spark-ec2/setup.sh")
@@ -609,7 +676,9 @@ def setup_spark_cluster(master, opts):
 
 
 def is_ssh_available(host, opts):
-    "Checks if SSH is available on the host."
+    """
+    Check if SSH is available on a host.
+    """
     try:
         with open(os.devnull, 'w') as devnull:
             ret = subprocess.check_call(
@@ -624,6 +693,9 @@ def is_ssh_available(host, opts):
 
 
 def is_cluster_ssh_available(cluster_instances, opts):
+    """
+    Check if SSH is available on all the instances in a cluster.
+    """
     for i in cluster_instances:
         if not is_ssh_available(host=i.ip_address, opts=opts):
             return False
@@ -631,8 +703,10 @@ def is_cluster_ssh_available(cluster_instances, opts):
         return True
 
 
-def wait_for_cluster_state(cluster_instances, cluster_state, opts):
+def wait_for_cluster_state(conn, opts, cluster_instances, cluster_state):
     """
+    Wait for all the instances in the cluster to reach a designated state.
+
     cluster_instances: a list of boto.ec2.instance.Instance
     cluster_state: a string representing the desired state of all the instances in the cluster
            value can be 'ssh-ready' or a valid value from boto.ec2.instance.InstanceState such as
@@ -640,20 +714,25 @@ def wait_for_cluster_state(cluster_instances, cluster_state, opts):
            (would be nice to replace this with a proper enum: http://stackoverflow.com/a/1695250)
     """
     sys.stdout.write(
-        "Waiting for all instances in cluster to enter '{s}' state.".format(s=cluster_state)
+        "Waiting for cluster to enter '{s}' state.".format(s=cluster_state)
     )
     sys.stdout.flush()
 
+    start_time = datetime.now()
     num_attempts = 0
 
     while True:
-        time.sleep(3 * num_attempts)
+        time.sleep(5 * num_attempts)  # seconds
 
         for i in cluster_instances:
-            s = i.update()  # capture output to suppress print to screen in newer versions of boto
+            i.update()
+
+        statuses = conn.get_all_instance_status(instance_ids=[i.id for i in cluster_instances])
 
         if cluster_state == 'ssh-ready':
             if all(i.state == 'running' for i in cluster_instances) and \
+               all(s.system_status.status == 'ok' for s in statuses) and \
+               all(s.instance_status.status == 'ok' for s in statuses) and \
                is_cluster_ssh_available(cluster_instances, opts):
                 break
         else:
@@ -667,6 +746,12 @@ def wait_for_cluster_state(cluster_instances, cluster_state, opts):
 
     sys.stdout.write("\n")
 
+    end_time = datetime.now()
+    print "Cluster is now in '{s}' state. Waited {t} seconds.".format(
+        s=cluster_state,
+        t=(end_time - start_time).seconds
+    )
+
 
 # Get number of local disks available for a given EC2 instance type.
 def get_num_disks(instance_type):
@@ -723,6 +808,8 @@ def get_num_disks(instance_type):
 # cluster (e.g. lists of masters and slaves). Files are only deployed to
 # the first master instance in the cluster, and we expect the setup
 # script to be run on that instance to copy them to other nodes.
+#
+# root_dir should be an absolute path to the directory with the files we want to deploy.
 def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
     active_master = master_nodes[0].public_dns_name
 
@@ -739,13 +826,11 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
     cluster_url = "%s:7077" % active_master
 
     if "." in opts.spark_version:
-        # Pre-built spark & shark deploy
-        (spark_v, shark_v) = get_spark_shark_version(opts)
+        # Pre-built Spark deploy
+        spark_v = get_validate_spark_version(opts.spark_version, opts.spark_git_repo)
     else:
         # Spark-only custom deploy
         spark_v = "%s|%s" % (opts.spark_git_repo, opts.spark_version)
-        shark_v = ""
-        modules = filter(lambda x: x != "shark", modules)
 
     template_vars = {
         "master_list": '\n'.join([i.public_dns_name for i in master_nodes]),
@@ -758,7 +843,6 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
         "swap": str(opts.swap),
         "modules": '\n'.join(modules),
         "spark_version": spark_v,
-        "shark_version": shark_v,
         "hadoop_major_version": opts.hadoop_major_version,
         "spark_worker_instances": "%d" % opts.worker_instances,
         "spark_master_opts": opts.master_opts
@@ -907,13 +991,15 @@ def real_main():
     (opts, action, cluster_name) = parse_args()
 
     # Input parameter validation
+    get_validate_spark_version(opts.spark_version, opts.spark_git_repo)
+
     if opts.wait is not None:
         # NOTE: DeprecationWarnings are silent in 2.7+ by default.
         #       To show them, run Python with the -Wdefault switch.
         # See: https://docs.python.org/3.5/whatsnew/2.7.html
         warnings.warn(
             "This option is deprecated and has no effect. "
-            "spark-ec2 automatically waits as long as necessary for clusters to startup.",
+            "spark-ec2 automatically waits as long as necessary for clusters to start up.",
             DeprecationWarning
         )
 
@@ -940,9 +1026,10 @@ def real_main():
         else:
             (master_nodes, slave_nodes) = launch_cluster(conn, opts, cluster_name)
         wait_for_cluster_state(
+            conn=conn,
+            opts=opts,
             cluster_instances=(master_nodes + slave_nodes),
-            cluster_state='ssh-ready',
-            opts=opts
+            cluster_state='ssh-ready'
         )
         setup_cluster(conn, master_nodes, slave_nodes, opts, True)
 
@@ -967,15 +1054,12 @@ def real_main():
             # Delete security groups as well
             if opts.delete_groups:
                 print "Deleting security groups (this will take some time)..."
-                if opts.security_group_prefix is None:
-                    group_names = [cluster_name + "-master", cluster_name + "-slaves"]
-                else:
-                    group_names = [opts.security_group_prefix + "-master",
-                                   opts.security_group_prefix + "-slaves"]
+                group_names = [cluster_name + "-master", cluster_name + "-slaves"]
                 wait_for_cluster_state(
+                    conn=conn,
+                    opts=opts,
                     cluster_instances=(master_nodes + slave_nodes),
-                    cluster_state='terminated',
-                    opts=opts
+                    cluster_state='terminated'
                 )
                 attempt = 1
                 while attempt <= 3:
@@ -1077,9 +1161,10 @@ def real_main():
             if inst.state not in ["shutting-down", "terminated"]:
                 inst.start()
         wait_for_cluster_state(
+            conn=conn,
+            opts=opts,
             cluster_instances=(master_nodes + slave_nodes),
-            cluster_state='ssh-ready',
-            opts=opts
+            cluster_state='ssh-ready'
         )
         setup_cluster(conn, master_nodes, slave_nodes, opts, False)
 
diff --git a/ec2/third_party/boto-2.4.1.zip b/ec2/third_party/boto-2.4.1.zip
deleted file mode 100644
index 49886b89aeaea..0000000000000
Binary files a/ec2/third_party/boto-2.4.1.zip and /dev/null differ
diff --git a/examples/pom.xml b/examples/pom.xml
index eb49a0e5af22d..8caad2bc2e27a 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -34,31 +34,7 @@
   <name>Spark Project Examples</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <id>kinesis-asl</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.spark</groupId>
-          <artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>
-          <version>${project.version}</version>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.httpcomponents</groupId>
-          <artifactId>httpclient</artifactId>
-          <version>${commons.httpclient.version}</version>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
-    <!-- Promote Guava to compile scope in this module so it's included while shading. -->
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-      <scope>compile</scope>
-    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -102,12 +78,12 @@
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming-kafka_${scala.binary.version}</artifactId>
+      <artifactId>spark-streaming-flume_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming-flume_${scala.binary.version}</artifactId>
+      <artifactId>spark-streaming-mqtt_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
@@ -116,50 +92,153 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming-mqtt_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-testing-util</artifactId>
+      <version>${hbase.version}</version>
+      <scope>${hbase.deps.scope}</scope>
+      <exclusions>
+        <exclusion>
+          <!-- SPARK-4455 -->
+          <groupId>org.apache.hbase</groupId>
+          <artifactId>hbase-annotations</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.jruby</groupId>
+          <artifactId>jruby-complete</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase</artifactId>
+      <artifactId>hbase-protocol</artifactId>
       <version>${hbase.version}</version>
+      <scope>${hbase.deps.scope}</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-common</artifactId>
+      <version>${hbase.version}</version>
+      <scope>${hbase.deps.scope}</scope>
       <exclusions>
         <exclusion>
-          <groupId>asm</groupId>
-          <artifactId>asm</artifactId>
+          <!-- SPARK-4455 -->
+          <groupId>org.apache.hbase</groupId>
+          <artifactId>hbase-annotations</artifactId>
         </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-client</artifactId>
+      <version>${hbase.version}</version>
+      <scope>${hbase.deps.scope}</scope>
+      <exclusions>
         <exclusion>
-          <groupId>org.jboss.netty</groupId>
-          <artifactId>netty</artifactId>
+          <!-- SPARK-4455 -->
+          <groupId>org.apache.hbase</groupId>
+          <artifactId>hbase-annotations</artifactId>
         </exclusion>
+       <exclusion>
+        <groupId>io.netty</groupId>
+        <artifactId>netty</artifactId>
+       </exclusion>
+     </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-server</artifactId>
+      <version>${hbase.version}</version>
+      <scope>${hbase.deps.scope}</scope>
+      <exclusions>
         <exclusion>
-          <groupId>io.netty</groupId>
-          <artifactId>netty</artifactId>
+          <!-- SPARK-4455 -->
+          <groupId>org.apache.hbase</groupId>
+          <artifactId>hbase-annotations</artifactId>
         </exclusion>
         <exclusion>
-          <groupId>commons-logging</groupId>
-          <artifactId>commons-logging</artifactId>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-core</artifactId>
         </exclusion>
         <exclusion>
-          <groupId>org.jruby</groupId>
-          <artifactId>jruby-complete</artifactId>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-mapreduce-client-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-auth</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-annotations</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-hdfs</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.hbase</groupId>
+          <artifactId>hbase-hadoop1-compat</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.commons</groupId>
+          <artifactId>commons-math</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.slf4j</groupId>
+          <artifactId>slf4j-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-server</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-json</artifactId>
+        </exclusion>
+        <exclusion>
+          <!-- hbase uses v2.4, which is better, but ...-->
+          <groupId>commons-io</groupId>
+          <artifactId>commons-io</artifactId>
         </exclusion>
       </exclusions>
     </dependency>
     <dependency>
-      <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-server</artifactId>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-hadoop-compat</artifactId>
+      <version>${hbase.version}</version>
+      <scope>${hbase.deps.scope}</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-hadoop-compat</artifactId>
+      <version>${hbase.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
     </dependency>
     <dependency>
-      <groupId>com.twitter</groupId>
-      <artifactId>algebird-core_${scala.binary.version}</artifactId>
-      <version>0.1.11</version>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-math3</artifactId>
     </dependency>
     <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
+      <groupId>com.twitter</groupId>
+      <artifactId>algebird-core_${scala.binary.version}</artifactId>
+      <version>0.8.1</version>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
@@ -234,12 +313,6 @@
             </includes>
           </artifactSet>
           <filters>
-            <filter>
-              <artifact>com.google.guava:guava</artifact>
-              <excludes>
-                <exclude>com/google/common/base/Optional*</exclude>
-              </excludes>
-            </filter>
             <filter>
               <artifact>*:*</artifact>
               <excludes>
@@ -249,39 +322,111 @@
               </excludes>
             </filter>
           </filters>
+          <relocations combine.children="append">
+            <relocation>
+              <pattern>org.apache.commons.math3</pattern>
+              <shadedPattern>org.spark-project.commons.math3</shadedPattern>
+            </relocation>
+          </relocations>
+          <transformers>
+            <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
+            <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+              <resource>reference.conf</resource>
+            </transformer>
+            <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
+              <resource>log4j.properties</resource>
+            </transformer>
+          </transformers>
         </configuration>
-        <executions>
-          <execution>
-            <phase>package</phase>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-            <configuration>
-              <relocations>
-                <relocation>
-                  <pattern>com.google</pattern>
-                  <shadedPattern>org.spark-project.guava</shadedPattern>
-                  <includes>
-                    <include>com.google.common.**</include>
-                  </includes>
-                  <excludes>
-                    <exclude>com.google.common.base.Optional**</exclude>
-                  </excludes>
-                </relocation>
-              </relocations>
-              <transformers>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
-                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
-                  <resource>reference.conf</resource>
-                </transformer>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
-                  <resource>log4j.properties</resource>
-                </transformer>
-              </transformers>
-            </configuration>
-          </execution>
-        </executions>
       </plugin>
     </plugins>
   </build>
+  <profiles>
+    <profile>
+      <id>kinesis-asl</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>
+          <version>${project.version}</version>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.httpcomponents</groupId>
+          <artifactId>httpclient</artifactId>
+          <version>${commons.httpclient.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
+    <profile>
+      <!-- We add a source directory specific to Scala 2.10 since Kafka
+           only works with it -->
+      <id>scala-2.10</id>
+      <activation>
+        <property><name>!scala-2.11</name></property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-streaming-kafka_${scala.binary.version}</artifactId>
+          <version>${project.version}</version>
+        </dependency>
+      </dependencies>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>build-helper-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>add-scala-sources</id>
+                <phase>generate-sources</phase>
+                <goals>
+                  <goal>add-source</goal>
+                </goals>
+                <configuration>
+                  <sources>
+                    <source>src/main/scala</source>
+                    <source>scala-2.10/src/main/scala</source>
+                    <source>scala-2.10/src/main/java</source>
+                  </sources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+
+    <!-- Profiles that disable inclusion of certain dependencies. -->
+    <profile>
+      <id>flume-provided</id>
+      <properties>
+        <flume.deps.scope>provided</flume.deps.scope>
+      </properties>
+    </profile>
+    <profile>
+      <id>hadoop-provided</id>
+      <properties>
+        <hadoop.deps.scope>provided</hadoop.deps.scope>
+      </properties>
+    </profile>
+    <profile>
+      <id>hbase-provided</id>
+      <properties>
+        <hbase.deps.scope>provided</hbase.deps.scope>
+      </properties>
+    </profile>
+    <profile>
+      <id>hive-provided</id>
+      <properties>
+        <hive.deps.scope>provided</hive.deps.scope>
+      </properties>
+    </profile>
+    <profile>
+      <id>parquet-provided</id>
+      <properties>
+        <parquet.deps.scope>provided</parquet.deps.scope>
+      </properties>
+    </profile>
+  </profiles>
 </project>
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java b/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java
similarity index 100%
rename from examples/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java
rename to examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala b/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala
similarity index 97%
rename from examples/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala
rename to examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala
index c9e1511278ede..387c0e421334b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala
+++ b/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala
@@ -22,7 +22,6 @@ import java.util.Properties
 import kafka.producer._
 
 import org.apache.spark.streaming._
-import org.apache.spark.streaming.StreamingContext._
 import org.apache.spark.streaming.kafka._
 import org.apache.spark.SparkConf
 
@@ -77,7 +76,7 @@ object KafkaWordCountProducer {
 
     val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args
 
-    // Zookeper connection properties
+    // Zookeeper connection properties
     val props = new Properties()
     props.put("metadata.broker.list", brokers)
     props.put("serializer.class", "kafka.serializer.StringEncoder")
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
index 6c177de359b60..31a79ddd3fff1 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
@@ -30,12 +30,25 @@
 
 /**
  * Logistic regression based classification.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+ * org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
  */
 public final class JavaHdfsLR {
 
   private static final int D = 10;   // Number of dimensions
   private static final Random rand = new Random(42);
 
+  static void showWarning() {
+    String warning = "WARN: This is a naive implementation of Logistic Regression " +
+            "and is given as an example!\n" +
+            "Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD " +
+            "or org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS " +
+            "for more conventional use.";
+    System.err.println(warning);
+  }
+
   static class DataPoint implements Serializable {
     DataPoint(double[] x, double y) {
       this.x = x;
@@ -109,6 +122,8 @@ public static void main(String[] args) {
       System.exit(1);
     }
 
+    showWarning();
+
     SparkConf sparkConf = new SparkConf().setAppName("JavaHdfsLR");
     JavaSparkContext sc = new JavaSparkContext(sparkConf);
     JavaRDD<String> lines = sc.textFile(args[0]);
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
index c22506491fbff..a5db8accdf138 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
@@ -45,10 +45,21 @@
  * URL         neighbor URL
  * ...
  * where URL and their neighbors are separated by space(s).
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.graphx.lib.PageRank
  */
 public final class JavaPageRank {
   private static final Pattern SPACES = Pattern.compile("\\s+");
 
+  static void showWarning() {
+    String warning = "WARN: This is a naive implementation of PageRank " +
+            "and is given as an example! \n" +
+            "Please use the PageRank implementation found in " +
+            "org.apache.spark.graphx.lib.PageRank for more conventional use.";
+    System.err.println(warning);
+  }
+
   private static class Sum implements Function2<Double, Double, Double> {
     @Override
     public Double call(Double a, Double b) {
@@ -62,6 +73,8 @@ public static void main(String[] args) throws Exception {
       System.exit(1);
     }
 
+    showWarning();
+
     SparkConf sparkConf = new SparkConf().setAppName("JavaPageRank");
     JavaSparkContext ctx = new JavaSparkContext(sparkConf);
 
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaStatusTrackerDemo.java b/examples/src/main/java/org/apache/spark/examples/JavaStatusTrackerDemo.java
new file mode 100644
index 0000000000000..e68ec74c3ed54
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/JavaStatusTrackerDemo.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkJobInfo;
+import org.apache.spark.SparkStageInfo;
+import org.apache.spark.api.java.JavaFutureAction;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Example of using Spark's status APIs from Java.
+ */
+public final class JavaStatusTrackerDemo {
+
+  public static final String APP_NAME = "JavaStatusAPIDemo";
+
+  public static final class IdentityWithDelay<T> implements Function<T, T> {
+    @Override
+    public T call(T x) throws Exception {
+      Thread.sleep(2 * 1000);  // 2 seconds
+      return x;
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    SparkConf sparkConf = new SparkConf().setAppName(APP_NAME);
+    final JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+    // Example of implementing a progress reporter for a simple job.
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5).map(
+        new IdentityWithDelay<Integer>());
+    JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync();
+    while (!jobFuture.isDone()) {
+      Thread.sleep(1000);  // 1 second
+      List<Integer> jobIds = jobFuture.jobIds();
+      if (jobIds.isEmpty()) {
+        continue;
+      }
+      int currentJobId = jobIds.get(jobIds.size() - 1);
+      SparkJobInfo jobInfo = sc.statusTracker().getJobInfo(currentJobId);
+      SparkStageInfo stageInfo = sc.statusTracker().getStageInfo(jobInfo.stageIds()[0]);
+      System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() +
+          " active, " + stageInfo.numCompletedTasks() + " complete");
+    }
+
+    System.out.println("Job results are: " + jobFuture.get());
+    sc.stop();
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaCrossValidatorExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaCrossValidatorExample.java
new file mode 100644
index 0000000000000..0fbee6e433608
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaCrossValidatorExample.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.List;
+
+import com.google.common.collect.Lists;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.Pipeline;
+import org.apache.spark.ml.PipelineStage;
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator;
+import org.apache.spark.ml.feature.HashingTF;
+import org.apache.spark.ml.feature.Tokenizer;
+import org.apache.spark.ml.param.ParamMap;
+import org.apache.spark.ml.tuning.CrossValidator;
+import org.apache.spark.ml.tuning.CrossValidatorModel;
+import org.apache.spark.ml.tuning.ParamGridBuilder;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.Row;
+
+/**
+ * A simple example demonstrating model selection using CrossValidator.
+ * This example also demonstrates how Pipelines are Estimators.
+ *
+ * This example uses the Java bean classes {@link org.apache.spark.examples.ml.LabeledDocument} and
+ * {@link org.apache.spark.examples.ml.Document} defined in the Scala example
+ * {@link org.apache.spark.examples.ml.SimpleTextClassificationPipeline}.
+ *
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaCrossValidatorExample
+ * </pre>
+ */
+public class JavaCrossValidatorExample {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaCrossValidatorExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    // Prepare training documents, which are labeled.
+    List<LabeledDocument> localTraining = Lists.newArrayList(
+      new LabeledDocument(0L, "a b c d e spark", 1.0),
+      new LabeledDocument(1L, "b d", 0.0),
+      new LabeledDocument(2L, "spark f g h", 1.0),
+      new LabeledDocument(3L, "hadoop mapreduce", 0.0),
+      new LabeledDocument(4L, "b spark who", 1.0),
+      new LabeledDocument(5L, "g d a y", 0.0),
+      new LabeledDocument(6L, "spark fly", 1.0),
+      new LabeledDocument(7L, "was mapreduce", 0.0),
+      new LabeledDocument(8L, "e spark program", 1.0),
+      new LabeledDocument(9L, "a e c l", 0.0),
+      new LabeledDocument(10L, "spark compile", 1.0),
+      new LabeledDocument(11L, "hadoop software", 0.0));
+    DataFrame training = jsql.applySchema(jsc.parallelize(localTraining), LabeledDocument.class);
+
+    // Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
+    Tokenizer tokenizer = new Tokenizer()
+      .setInputCol("text")
+      .setOutputCol("words");
+    HashingTF hashingTF = new HashingTF()
+      .setNumFeatures(1000)
+      .setInputCol(tokenizer.getOutputCol())
+      .setOutputCol("features");
+    LogisticRegression lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(0.01);
+    Pipeline pipeline = new Pipeline()
+      .setStages(new PipelineStage[] {tokenizer, hashingTF, lr});
+
+    // We now treat the Pipeline as an Estimator, wrapping it in a CrossValidator instance.
+    // This will allow us to jointly choose parameters for all Pipeline stages.
+    // A CrossValidator requires an Estimator, a set of Estimator ParamMaps, and an Evaluator.
+    CrossValidator crossval = new CrossValidator()
+        .setEstimator(pipeline)
+        .setEvaluator(new BinaryClassificationEvaluator());
+    // We use a ParamGridBuilder to construct a grid of parameters to search over.
+    // With 3 values for hashingTF.numFeatures and 2 values for lr.regParam,
+    // this grid will have 3 x 2 = 6 parameter settings for CrossValidator to choose from.
+    ParamMap[] paramGrid = new ParamGridBuilder()
+        .addGrid(hashingTF.numFeatures(), new int[]{10, 100, 1000})
+        .addGrid(lr.regParam(), new double[]{0.1, 0.01})
+        .build();
+    crossval.setEstimatorParamMaps(paramGrid);
+    crossval.setNumFolds(2); // Use 3+ in practice
+
+    // Run cross-validation, and choose the best set of parameters.
+    CrossValidatorModel cvModel = crossval.fit(training);
+
+    // Prepare test documents, which are unlabeled.
+    List<Document> localTest = Lists.newArrayList(
+      new Document(4L, "spark i j k"),
+      new Document(5L, "l m n"),
+      new Document(6L, "mapreduce spark"),
+      new Document(7L, "apache hadoop"));
+    DataFrame test = jsql.applySchema(jsc.parallelize(localTest), Document.class);
+
+    // Make predictions on test documents. cvModel uses the best model found (lrModel).
+    cvModel.transform(test).registerTempTable("prediction");
+    DataFrame predictions = jsql.sql("SELECT id, text, score, prediction FROM prediction");
+    for (Row r: predictions.collect()) {
+      System.out.println("(" + r.get(0) + ", " + r.get(1) + ") --> score=" + r.get(2)
+          + ", prediction=" + r.get(3));
+    }
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java
new file mode 100644
index 0000000000000..eaaa344be49c8
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.List;
+
+import com.google.common.collect.Lists;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.classification.LogisticRegressionModel;
+import org.apache.spark.ml.param.ParamMap;
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.Row;
+
+/**
+ * A simple example demonstrating ways to specify parameters for Estimators and Transformers.
+ * Run with
+ * {{{
+ * bin/run-example ml.JavaSimpleParamsExample
+ * }}}
+ */
+public class JavaSimpleParamsExample {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaSimpleParamsExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    // Prepare training data.
+    // We use LabeledPoint, which is a JavaBean.  Spark SQL can convert RDDs of JavaBeans
+    // into DataFrames, where it uses the bean metadata to infer the schema.
+    List<LabeledPoint> localTraining = Lists.newArrayList(
+      new LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
+      new LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)),
+      new LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
+      new LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5)));
+    DataFrame training = jsql.applySchema(jsc.parallelize(localTraining), LabeledPoint.class);
+
+    // Create a LogisticRegression instance.  This instance is an Estimator.
+    LogisticRegression lr = new LogisticRegression();
+    // Print out the parameters, documentation, and any default values.
+    System.out.println("LogisticRegression parameters:\n" + lr.explainParams() + "\n");
+
+    // We may set parameters using setter methods.
+    lr.setMaxIter(10)
+      .setRegParam(0.01);
+
+    // Learn a LogisticRegression model.  This uses the parameters stored in lr.
+    LogisticRegressionModel model1 = lr.fit(training);
+    // Since model1 is a Model (i.e., a Transformer produced by an Estimator),
+    // we can view the parameters it used during fit().
+    // This prints the parameter (name: value) pairs, where names are unique IDs for this
+    // LogisticRegression instance.
+    System.out.println("Model 1 was fit using parameters: " + model1.fittingParamMap());
+
+    // We may alternatively specify parameters using a ParamMap.
+    ParamMap paramMap = new ParamMap();
+    paramMap.put(lr.maxIter().w(20)); // Specify 1 Param.
+    paramMap.put(lr.maxIter(), 30); // This overwrites the original maxIter.
+    paramMap.put(lr.regParam().w(0.1), lr.threshold().w(0.55)); // Specify multiple Params.
+
+    // One can also combine ParamMaps.
+    ParamMap paramMap2 = new ParamMap();
+    paramMap2.put(lr.scoreCol().w("probability")); // Change output column name
+    ParamMap paramMapCombined = paramMap.$plus$plus(paramMap2);
+
+    // Now learn a new model using the paramMapCombined parameters.
+    // paramMapCombined overrides all parameters set earlier via lr.set* methods.
+    LogisticRegressionModel model2 = lr.fit(training, paramMapCombined);
+    System.out.println("Model 2 was fit using parameters: " + model2.fittingParamMap());
+
+    // Prepare test documents.
+    List<LabeledPoint> localTest = Lists.newArrayList(
+        new LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
+        new LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)),
+        new LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5)));
+    DataFrame test = jsql.applySchema(jsc.parallelize(localTest), LabeledPoint.class);
+
+    // Make predictions on test documents using the Transformer.transform() method.
+    // LogisticRegression.transform will only use the 'features' column.
+    // Note that model2.transform() outputs a 'probability' column instead of the usual 'score'
+    // column since we renamed the lr.scoreCol parameter previously.
+    model2.transform(test).registerTempTable("results");
+    DataFrame results =
+        jsql.sql("SELECT features, label, probability, prediction FROM results");
+    for (Row r: results.collect()) {
+      System.out.println("(" + r.get(0) + ", " + r.get(1) + ") -> prob=" + r.get(2)
+          + ", prediction=" + r.get(3));
+    }
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java
new file mode 100644
index 0000000000000..82d665a3e1386
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.List;
+
+import com.google.common.collect.Lists;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.Pipeline;
+import org.apache.spark.ml.PipelineModel;
+import org.apache.spark.ml.PipelineStage;
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.feature.HashingTF;
+import org.apache.spark.ml.feature.Tokenizer;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.Row;
+
+/**
+ * A simple text classification pipeline that recognizes "spark" from input text. It uses the Java
+ * bean classes {@link LabeledDocument} and {@link Document} defined in the Scala counterpart of
+ * this example {@link SimpleTextClassificationPipeline}. Run with
+ * <pre>
+ * bin/run-example ml.JavaSimpleTextClassificationPipeline
+ * </pre>
+ */
+public class JavaSimpleTextClassificationPipeline {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaSimpleTextClassificationPipeline");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    // Prepare training documents, which are labeled.
+    List<LabeledDocument> localTraining = Lists.newArrayList(
+      new LabeledDocument(0L, "a b c d e spark", 1.0),
+      new LabeledDocument(1L, "b d", 0.0),
+      new LabeledDocument(2L, "spark f g h", 1.0),
+      new LabeledDocument(3L, "hadoop mapreduce", 0.0));
+    DataFrame training = jsql.applySchema(jsc.parallelize(localTraining), LabeledDocument.class);
+
+    // Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
+    Tokenizer tokenizer = new Tokenizer()
+      .setInputCol("text")
+      .setOutputCol("words");
+    HashingTF hashingTF = new HashingTF()
+      .setNumFeatures(1000)
+      .setInputCol(tokenizer.getOutputCol())
+      .setOutputCol("features");
+    LogisticRegression lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(0.01);
+    Pipeline pipeline = new Pipeline()
+      .setStages(new PipelineStage[] {tokenizer, hashingTF, lr});
+
+    // Fit the pipeline to training documents.
+    PipelineModel model = pipeline.fit(training);
+
+    // Prepare test documents, which are unlabeled.
+    List<Document> localTest = Lists.newArrayList(
+      new Document(4L, "spark i j k"),
+      new Document(5L, "l m n"),
+      new Document(6L, "mapreduce spark"),
+      new Document(7L, "apache hadoop"));
+    DataFrame test = jsql.applySchema(jsc.parallelize(localTest), Document.class);
+
+    // Make predictions on test documents.
+    model.transform(test).registerTempTable("prediction");
+    DataFrame predictions = jsql.sql("SELECT id, text, score, prediction FROM prediction");
+    for (Row r: predictions.collect()) {
+      System.out.println("(" + r.get(0) + ", " + r.get(1) + ") --> score=" + r.get(2)
+          + ", prediction=" + r.get(3));
+    }
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostedTreesRunner.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostedTreesRunner.java
new file mode 100644
index 0000000000000..a1844d5d07ad4
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostedTreesRunner.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import scala.Tuple2;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.GradientBoostedTrees;
+import org.apache.spark.mllib.tree.configuration.BoostingStrategy;
+import org.apache.spark.mllib.tree.model.GradientBoostedTreesModel;
+import org.apache.spark.mllib.util.MLUtils;
+
+/**
+ * Classification and regression using gradient-boosted decision trees.
+ */
+public final class JavaGradientBoostedTreesRunner {
+
+  private static void usage() {
+    System.err.println("Usage: JavaGradientBoostedTreesRunner <libsvm format data file>" +
+        " <Classification/Regression>");
+    System.exit(-1);
+  }
+
+  public static void main(String[] args) {
+    String datapath = "data/mllib/sample_libsvm_data.txt";
+    String algo = "Classification";
+    if (args.length >= 1) {
+      datapath = args[0];
+    }
+    if (args.length >= 2) {
+      algo = args[1];
+    }
+    if (args.length > 2) {
+      usage();
+    }
+    SparkConf sparkConf = new SparkConf().setAppName("JavaGradientBoostedTreesRunner");
+    JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD().cache();
+
+    // Set parameters.
+    //  Note: All features are treated as continuous.
+    BoostingStrategy boostingStrategy = BoostingStrategy.defaultParams(algo);
+    boostingStrategy.setNumIterations(10);
+    boostingStrategy.treeStrategy().setMaxDepth(5);
+
+    if (algo.equals("Classification")) {
+      // Compute the number of classes from the data.
+      Integer numClasses = data.map(new Function<LabeledPoint, Double>() {
+        @Override public Double call(LabeledPoint p) {
+          return p.label();
+        }
+      }).countByValue().size();
+      boostingStrategy.treeStrategy().setNumClasses(numClasses);
+
+      // Train a GradientBoosting model for classification.
+      final GradientBoostedTreesModel model = GradientBoostedTrees.train(data, boostingStrategy);
+
+      // Evaluate model on training instances and compute training error
+      JavaPairRDD<Double, Double> predictionAndLabel =
+          data.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
+            @Override public Tuple2<Double, Double> call(LabeledPoint p) {
+              return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
+            }
+          });
+      Double trainErr =
+          1.0 * predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {
+            @Override public Boolean call(Tuple2<Double, Double> pl) {
+              return !pl._1().equals(pl._2());
+            }
+          }).count() / data.count();
+      System.out.println("Training error: " + trainErr);
+      System.out.println("Learned classification tree model:\n" + model);
+    } else if (algo.equals("Regression")) {
+      // Train a GradientBoosting model for classification.
+      final GradientBoostedTreesModel model = GradientBoostedTrees.train(data, boostingStrategy);
+
+      // Evaluate model on training instances and compute training error
+      JavaPairRDD<Double, Double> predictionAndLabel =
+          data.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
+            @Override public Tuple2<Double, Double> call(LabeledPoint p) {
+              return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
+            }
+          });
+      Double trainMSE =
+          predictionAndLabel.map(new Function<Tuple2<Double, Double>, Double>() {
+            @Override public Double call(Tuple2<Double, Double> pl) {
+              Double diff = pl._1() - pl._2();
+              return diff * diff;
+            }
+          }).reduce(new Function2<Double, Double, Double>() {
+            @Override public Double call(Double a, Double b) {
+              return a + b;
+            }
+          }) / data.count();
+      System.out.println("Training Mean Squared Error: " + trainMSE);
+      System.out.println("Learned regression tree model:\n" + model);
+    } else {
+      usage();
+    }
+
+    sc.stop();
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestExample.java
new file mode 100644
index 0000000000000..89a4e092a5af7
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestExample.java
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import scala.Tuple2;
+
+import java.util.HashMap;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.RandomForest;
+import org.apache.spark.mllib.tree.model.RandomForestModel;
+import org.apache.spark.mllib.util.MLUtils;
+
+public final class JavaRandomForestExample {
+
+  /**
+   * Note: This example illustrates binary classification.
+   * For information on multiclass classification, please refer to the JavaDecisionTree.java
+   * example.
+   */
+  private static void testClassification(JavaRDD<LabeledPoint> trainingData,
+                                         JavaRDD<LabeledPoint> testData) {
+    // Train a RandomForest model.
+    //  Empty categoricalFeaturesInfo indicates all features are continuous.
+    Integer numClasses = 2;
+    HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
+    Integer numTrees = 3; // Use more in practice.
+    String featureSubsetStrategy = "auto"; // Let the algorithm choose.
+    String impurity = "gini";
+    Integer maxDepth = 4;
+    Integer maxBins = 32;
+    Integer seed = 12345;
+
+    final RandomForestModel model = RandomForest.trainClassifier(trainingData, numClasses,
+        categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins,
+        seed);
+
+    // Evaluate model on test instances and compute test error
+    JavaPairRDD<Double, Double> predictionAndLabel =
+        testData.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
+          @Override
+          public Tuple2<Double, Double> call(LabeledPoint p) {
+            return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
+          }
+        });
+    Double testErr =
+        1.0 * predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {
+          @Override
+          public Boolean call(Tuple2<Double, Double> pl) {
+            return !pl._1().equals(pl._2());
+          }
+        }).count() / testData.count();
+    System.out.println("Test Error: " + testErr);
+    System.out.println("Learned classification forest model:\n" + model.toDebugString());
+  }
+
+  private static void testRegression(JavaRDD<LabeledPoint> trainingData,
+                                     JavaRDD<LabeledPoint> testData) {
+    // Train a RandomForest model.
+    //  Empty categoricalFeaturesInfo indicates all features are continuous.
+    HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>();
+    Integer numTrees = 3; // Use more in practice.
+    String featureSubsetStrategy = "auto"; // Let the algorithm choose.
+    String impurity = "variance";
+    Integer maxDepth = 4;
+    Integer maxBins = 32;
+    Integer seed = 12345;
+
+    final RandomForestModel model = RandomForest.trainRegressor(trainingData,
+        categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins,
+        seed);
+
+    // Evaluate model on test instances and compute test error
+    JavaPairRDD<Double, Double> predictionAndLabel =
+        testData.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
+          @Override
+          public Tuple2<Double, Double> call(LabeledPoint p) {
+            return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
+          }
+        });
+    Double testMSE =
+        predictionAndLabel.map(new Function<Tuple2<Double, Double>, Double>() {
+          @Override
+          public Double call(Tuple2<Double, Double> pl) {
+            Double diff = pl._1() - pl._2();
+            return diff * diff;
+          }
+        }).reduce(new Function2<Double, Double, Double>() {
+          @Override
+          public Double call(Double a, Double b) {
+            return a + b;
+          }
+        }) / testData.count();
+    System.out.println("Test Mean Squared Error: " + testMSE);
+    System.out.println("Learned regression forest model:\n" + model.toDebugString());
+  }
+
+  public static void main(String[] args) {
+    SparkConf sparkConf = new SparkConf().setAppName("JavaRandomForestExample");
+    JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+    // Load and parse the data file.
+    String datapath = "data/mllib/sample_libsvm_data.txt";
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD();
+    // Split the data into training and test sets (30% held out for testing)
+    JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3});
+    JavaRDD<LabeledPoint> trainingData = splits[0];
+    JavaRDD<LabeledPoint> testData = splits[1];
+
+    System.out.println("\nRunning example of classification using RandomForest\n");
+    testClassification(trainingData, testData);
+
+    System.out.println("\nRunning example of regression using RandomForest\n");
+    testRegression(trainingData, testData);
+    sc.stop();
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
index 01c77bd44337e..8defb769ffaaf 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
@@ -26,9 +26,9 @@
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.Function;
 
-import org.apache.spark.sql.api.java.JavaSQLContext;
-import org.apache.spark.sql.api.java.JavaSchemaRDD;
-import org.apache.spark.sql.api.java.Row;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SQLContext;
 
 public class JavaSparkSQL {
   public static class Person implements Serializable {
@@ -55,7 +55,7 @@ public void setAge(int age) {
   public static void main(String[] args) throws Exception {
     SparkConf sparkConf = new SparkConf().setAppName("JavaSparkSQL");
     JavaSparkContext ctx = new JavaSparkContext(sparkConf);
-    JavaSQLContext sqlCtx = new JavaSQLContext(ctx);
+    SQLContext sqlCtx = new SQLContext(ctx);
 
     System.out.println("=== Data source: RDD ===");
     // Load a text file and convert each line to a Java Bean.
@@ -74,15 +74,15 @@ public Person call(String line) {
       });
 
     // Apply a schema to an RDD of Java Beans and register it as a table.
-    JavaSchemaRDD schemaPeople = sqlCtx.applySchema(people, Person.class);
+    DataFrame schemaPeople = sqlCtx.applySchema(people, Person.class);
     schemaPeople.registerTempTable("people");
 
     // SQL can be run over RDDs that have been registered as tables.
-    JavaSchemaRDD teenagers = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
+    DataFrame teenagers = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
 
-    // The results of SQL queries are SchemaRDDs and support all the normal RDD operations.
+    // The results of SQL queries are DataFrames and support all the normal RDD operations.
     // The columns of a row in the result can be accessed by ordinal.
-    List<String> teenagerNames = teenagers.map(new Function<Row, String>() {
+    List<String> teenagerNames = teenagers.toJavaRDD().map(new Function<Row, String>() {
       @Override
       public String call(Row row) {
         return "Name: " + row.getString(0);
@@ -93,19 +93,19 @@ public String call(Row row) {
     }
 
     System.out.println("=== Data source: Parquet File ===");
-    // JavaSchemaRDDs can be saved as parquet files, maintaining the schema information.
+    // DataFrames can be saved as parquet files, maintaining the schema information.
     schemaPeople.saveAsParquetFile("people.parquet");
 
     // Read in the parquet file created above.
     // Parquet files are self-describing so the schema is preserved.
-    // The result of loading a parquet file is also a JavaSchemaRDD.
-    JavaSchemaRDD parquetFile = sqlCtx.parquetFile("people.parquet");
+    // The result of loading a parquet file is also a DataFrame.
+    DataFrame parquetFile = sqlCtx.parquetFile("people.parquet");
 
     //Parquet files can also be registered as tables and then used in SQL statements.
     parquetFile.registerTempTable("parquetFile");
-    JavaSchemaRDD teenagers2 =
+    DataFrame teenagers2 =
       sqlCtx.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
-    teenagerNames = teenagers2.map(new Function<Row, String>() {
+    teenagerNames = teenagers2.toJavaRDD().map(new Function<Row, String>() {
       @Override
       public String call(Row row) {
           return "Name: " + row.getString(0);
@@ -119,8 +119,8 @@ public String call(Row row) {
     // A JSON dataset is pointed by path.
     // The path can be either a single text file or a directory storing text files.
     String path = "examples/src/main/resources/people.json";
-    // Create a JavaSchemaRDD from the file(s) pointed by path
-    JavaSchemaRDD peopleFromJsonFile = sqlCtx.jsonFile(path);
+    // Create a DataFrame from the file(s) pointed by path
+    DataFrame peopleFromJsonFile = sqlCtx.jsonFile(path);
 
     // Because the schema of a JSON dataset is automatically inferred, to write queries,
     // it is better to take a look at what is the schema.
@@ -130,15 +130,15 @@ public String call(Row row) {
     //  |-- age: IntegerType
     //  |-- name: StringType
 
-    // Register this JavaSchemaRDD as a table.
+    // Register this DataFrame as a table.
     peopleFromJsonFile.registerTempTable("people");
 
     // SQL statements can be run by using the sql methods provided by sqlCtx.
-    JavaSchemaRDD teenagers3 = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
+    DataFrame teenagers3 = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
 
-    // The results of SQL queries are JavaSchemaRDDs and support all the normal RDD operations.
+    // The results of SQL queries are DataFrame and support all the normal RDD operations.
     // The columns of a row in the result can be accessed by ordinal.
-    teenagerNames = teenagers3.map(new Function<Row, String>() {
+    teenagerNames = teenagers3.toJavaRDD().map(new Function<Row, String>() {
       @Override
       public String call(Row row) { return "Name: " + row.getString(0); }
     }).collect();
@@ -146,14 +146,14 @@ public String call(Row row) {
       System.out.println(name);
     }
 
-    // Alternatively, a JavaSchemaRDD can be created for a JSON dataset represented by
+    // Alternatively, a DataFrame can be created for a JSON dataset represented by
     // a RDD[String] storing one JSON object per string.
     List<String> jsonData = Arrays.asList(
           "{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}");
     JavaRDD<String> anotherPeopleRDD = ctx.parallelize(jsonData);
-    JavaSchemaRDD peopleFromJsonRDD = sqlCtx.jsonRDD(anotherPeopleRDD);
+    DataFrame peopleFromJsonRDD = sqlCtx.jsonRDD(anotherPeopleRDD.rdd());
 
-    // Take a look at the schema of this new JavaSchemaRDD.
+    // Take a look at the schema of this new DataFrame.
     peopleFromJsonRDD.printSchema();
     // The schema of anotherPeople is ...
     // root
@@ -164,8 +164,8 @@ public String call(Row row) {
 
     peopleFromJsonRDD.registerTempTable("people2");
 
-    JavaSchemaRDD peopleWithCity = sqlCtx.sql("SELECT name, address.city FROM people2");
-    List<String> nameAndCity = peopleWithCity.map(new Function<Row, String>() {
+    DataFrame peopleWithCity = sqlCtx.sql("SELECT name, address.city FROM people2");
+    List<String> nameAndCity = peopleWithCity.toJavaRDD().map(new Function<Row, String>() {
       @Override
       public String call(Row row) {
         return "Name: " + row.getString(0) + ", City: " + row.getString(1);
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java
index 981bc4f0613a9..99df259b4e8e6 100644
--- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java
@@ -70,7 +70,7 @@ public static void main(String[] args) {
     // Create a input stream with the custom receiver on target ip:port and count the
     // words in input stream of \n delimited text (eg. generated by 'nc')
     JavaReceiverInputDStream<String> lines = ssc.receiverStream(
-      new JavaCustomReceiver(args[1], Integer.parseInt(args[2])));
+      new JavaCustomReceiver(args[0], Integer.parseInt(args[1])));
     JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
       @Override
       public Iterable<String> call(String x) {
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java
index 45bcedebb4117..3e9f0f4b8f127 100644
--- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java
@@ -25,7 +25,7 @@
 import org.apache.spark.api.java.function.Function2;
 import org.apache.spark.api.java.function.PairFunction;
 import org.apache.spark.api.java.StorageLevels;
-import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.Durations;
 import org.apache.spark.streaming.api.java.JavaDStream;
 import org.apache.spark.streaming.api.java.JavaPairDStream;
 import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
@@ -35,8 +35,9 @@
 
 /**
  * Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
+ *
  * Usage: JavaNetworkWordCount <hostname> <port>
- *   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
+ * <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
  *
  * To run this on your local machine, you need to first run a Netcat server
  *    `$ nc -lk 9999`
@@ -56,7 +57,7 @@ public static void main(String[] args) {
 
     // Create the context with a 1 second batch size
     SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount");
-    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf,  new Duration(1000));
+    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
 
     // Create a JavaReceiverInputDStream on target ip:port and count the
     // words in input stream of \n delimited text (eg. generated by 'nc')
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java
new file mode 100644
index 0000000000000..bceda97f058ea
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+import com.google.common.collect.Lists;
+import com.google.common.io.Files;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.streaming.Durations;
+import org.apache.spark.streaming.Time;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.apache.spark.streaming.api.java.JavaStreamingContextFactory;
+
+/**
+ * Counts words in text encoded with UTF8 received from the network every second.
+ *
+ * Usage: JavaRecoverableNetworkWordCount <hostname> <port> <checkpoint-directory> <output-file>
+ *   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive
+ *   data. <checkpoint-directory> directory to HDFS-compatible file system which checkpoint data
+ *   <output-file> file to which the word counts will be appended
+ *
+ * <checkpoint-directory> and <output-file> must be absolute paths
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ *
+ *      `$ nc -lk 9999`
+ *
+ * and run the example as
+ *
+ *      `$ ./bin/run-example org.apache.spark.examples.streaming.JavaRecoverableNetworkWordCount \
+ *              localhost 9999 ~/checkpoint/ ~/out`
+ *
+ * If the directory ~/checkpoint/ does not exist (e.g. running for the first time), it will create
+ * a new StreamingContext (will print "Creating new context" to the console). Otherwise, if
+ * checkpoint data exists in ~/checkpoint/, then it will create StreamingContext from
+ * the checkpoint data.
+ *
+ * Refer to the online documentation for more details.
+ */
+public final class JavaRecoverableNetworkWordCount {
+  private static final Pattern SPACE = Pattern.compile(" ");
+
+  private static JavaStreamingContext createContext(String ip,
+                                                    int port,
+                                                    String checkpointDirectory,
+                                                    String outputPath) {
+
+    // If you do not see this printed, that means the StreamingContext has been loaded
+    // from the new checkpoint
+    System.out.println("Creating new context");
+    final File outputFile = new File(outputPath);
+    if (outputFile.exists()) {
+      outputFile.delete();
+    }
+    SparkConf sparkConf = new SparkConf().setAppName("JavaRecoverableNetworkWordCount");
+    // Create the context with a 1 second batch size
+    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
+    ssc.checkpoint(checkpointDirectory);
+
+    // Create a socket stream on target ip:port and count the
+    // words in input stream of \n delimited text (eg. generated by 'nc')
+    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(ip, port);
+    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
+      @Override
+      public Iterable<String> call(String x) {
+        return Lists.newArrayList(SPACE.split(x));
+      }
+    });
+    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
+      new PairFunction<String, String, Integer>() {
+        @Override
+        public Tuple2<String, Integer> call(String s) {
+          return new Tuple2<String, Integer>(s, 1);
+        }
+      }).reduceByKey(new Function2<Integer, Integer, Integer>() {
+        @Override
+        public Integer call(Integer i1, Integer i2) {
+          return i1 + i2;
+        }
+      });
+
+    wordCounts.foreachRDD(new Function2<JavaPairRDD<String, Integer>, Time, Void>() {
+      @Override
+      public Void call(JavaPairRDD<String, Integer> rdd, Time time) throws IOException {
+        String counts = "Counts at time " + time + " " + rdd.collect();
+        System.out.println(counts);
+        System.out.println("Appending to " + outputFile.getAbsolutePath());
+        Files.append(counts + "\n", outputFile, Charset.defaultCharset());
+        return null;
+      }
+    });
+
+    return ssc;
+  }
+
+  public static void main(String[] args) {
+    if (args.length != 4) {
+      System.err.println("You arguments were " + Arrays.asList(args));
+      System.err.println(
+          "Usage: JavaRecoverableNetworkWordCount <hostname> <port> <checkpoint-directory>\n" +
+          "     <output-file>. <hostname> and <port> describe the TCP server that Spark\n" +
+          "     Streaming would connect to receive data. <checkpoint-directory> directory to\n" +
+          "     HDFS-compatible file system which checkpoint data <output-file> file to which\n" +
+          "     the word counts will be appended\n" +
+          "\n" +
+          "In local mode, <master> should be 'local[n]' with n > 1\n" +
+          "Both <checkpoint-directory> and <output-file> must be absolute paths");
+      System.exit(1);
+    }
+
+    final String ip = args[0];
+    final int port = Integer.parseInt(args[1]);
+    final String checkpointDirectory = args[2];
+    final String outputPath = args[3];
+    JavaStreamingContextFactory factory = new JavaStreamingContextFactory() {
+      @Override
+      public JavaStreamingContext create() {
+        return createContext(ip, port, checkpointDirectory, outputPath);
+      }
+    };
+    JavaStreamingContext ssc = JavaStreamingContext.getOrCreate(checkpointDirectory, factory);
+    ssc.start();
+    ssc.awaitTermination();
+  }
+}
diff --git a/examples/src/main/python/ml/simple_text_classification_pipeline.py b/examples/src/main/python/ml/simple_text_classification_pipeline.py
new file mode 100644
index 0000000000000..c7df3d7b74767
--- /dev/null
+++ b/examples/src/main/python/ml/simple_text_classification_pipeline.py
@@ -0,0 +1,79 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext, Row
+from pyspark.ml import Pipeline
+from pyspark.ml.feature import HashingTF, Tokenizer
+from pyspark.ml.classification import LogisticRegression
+
+
+"""
+A simple text classification pipeline that recognizes "spark" from
+input text. This is to show how to create and configure a Spark ML
+pipeline in Python. Run with:
+
+  bin/spark-submit examples/src/main/python/ml/simple_text_classification_pipeline.py
+"""
+
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="SimpleTextClassificationPipeline")
+    sqlCtx = SQLContext(sc)
+
+    # Prepare training documents, which are labeled.
+    LabeledDocument = Row('id', 'text', 'label')
+    training = sqlCtx.inferSchema(
+        sc.parallelize([(0L, "a b c d e spark", 1.0),
+                        (1L, "b d", 0.0),
+                        (2L, "spark f g h", 1.0),
+                        (3L, "hadoop mapreduce", 0.0)])
+          .map(lambda x: LabeledDocument(*x)))
+
+    # Configure an ML pipeline, which consists of tree stages: tokenizer, hashingTF, and lr.
+    tokenizer = Tokenizer() \
+        .setInputCol("text") \
+        .setOutputCol("words")
+    hashingTF = HashingTF() \
+        .setInputCol(tokenizer.getOutputCol()) \
+        .setOutputCol("features")
+    lr = LogisticRegression() \
+        .setMaxIter(10) \
+        .setRegParam(0.01)
+    pipeline = Pipeline() \
+        .setStages([tokenizer, hashingTF, lr])
+
+    # Fit the pipeline to training documents.
+    model = pipeline.fit(training)
+
+    # Prepare test documents, which are unlabeled.
+    Document = Row('id', 'text')
+    test = sqlCtx.inferSchema(
+        sc.parallelize([(4L, "spark i j k"),
+                        (5L, "l m n"),
+                        (6L, "mapreduce spark"),
+                        (7L, "apache hadoop")])
+          .map(lambda x: Document(*x)))
+
+    # Make predictions on test documents and print columns of interest.
+    prediction = model.transform(test)
+    prediction.registerTempTable("prediction")
+    selected = sqlCtx.sql("SELECT id, text, prediction from prediction")
+    for row in selected.collect():
+        print row
+
+    sc.stop()
diff --git a/examples/src/main/python/mllib/dataset_example.py b/examples/src/main/python/mllib/dataset_example.py
new file mode 100644
index 0000000000000..b5a70db2b9a3c
--- /dev/null
+++ b/examples/src/main/python/mllib/dataset_example.py
@@ -0,0 +1,62 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example of how to use DataFrame as a dataset for ML. Run with::
+    bin/spark-submit examples/src/main/python/mllib/dataset_example.py
+"""
+
+import os
+import sys
+import tempfile
+import shutil
+
+from pyspark import SparkContext
+from pyspark.sql import SQLContext
+from pyspark.mllib.util import MLUtils
+from pyspark.mllib.stat import Statistics
+
+
+def summarize(dataset):
+    print "schema: %s" % dataset.schema().json()
+    labels = dataset.map(lambda r: r.label)
+    print "label average: %f" % labels.mean()
+    features = dataset.map(lambda r: r.features)
+    summary = Statistics.colStats(features)
+    print "features average: %r" % summary.mean()
+
+if __name__ == "__main__":
+    if len(sys.argv) > 2:
+        print >> sys.stderr, "Usage: dataset_example.py <libsvm file>"
+        exit(-1)
+    sc = SparkContext(appName="DatasetExample")
+    sqlCtx = SQLContext(sc)
+    if len(sys.argv) == 2:
+        input = sys.argv[1]
+    else:
+        input = "data/mllib/sample_libsvm_data.txt"
+    points = MLUtils.loadLibSVMFile(sc, input)
+    dataset0 = sqlCtx.inferSchema(points).setName("dataset0").cache()
+    summarize(dataset0)
+    tempdir = tempfile.NamedTemporaryFile(delete=False).name
+    os.unlink(tempdir)
+    print "Save dataset as a Parquet file to %s." % tempdir
+    dataset0.saveAsParquetFile(tempdir)
+    print "Load it back and summarize it again."
+    dataset1 = sqlCtx.parquetFile(tempdir).setName("dataset1").cache()
+    summarize(dataset1)
+    shutil.rmtree(tempdir)
diff --git a/examples/src/main/python/mllib/decision_tree_runner.py b/examples/src/main/python/mllib/decision_tree_runner.py
index 61ea4e06ecf3a..fccabd841b139 100755
--- a/examples/src/main/python/mllib/decision_tree_runner.py
+++ b/examples/src/main/python/mllib/decision_tree_runner.py
@@ -106,8 +106,7 @@ def reindexClassLabels(data):
 
 def usage():
     print >> sys.stderr, \
-        "Usage: decision_tree_runner [libsvm format data filepath]\n" + \
-        " Note: This only supports binary classification."
+        "Usage: decision_tree_runner [libsvm format data filepath]"
     exit(1)
 
 
@@ -127,16 +126,20 @@ def usage():
 
     # Re-index class labels if needed.
     (reindexedData, origToNewLabels) = reindexClassLabels(points)
+    numClasses = len(origToNewLabels)
 
     # Train a classifier.
     categoricalFeaturesInfo = {}  # no categorical features
-    model = DecisionTree.trainClassifier(reindexedData, numClasses=2,
+    model = DecisionTree.trainClassifier(reindexedData, numClasses=numClasses,
                                          categoricalFeaturesInfo=categoricalFeaturesInfo)
     # Print learned tree and stats.
     print "Trained DecisionTree for classification:"
-    print "  Model numNodes: %d\n" % model.numNodes()
-    print "  Model depth: %d\n" % model.depth()
-    print "  Training accuracy: %g\n" % getAccuracy(model, reindexedData)
-    print model
+    print "  Model numNodes: %d" % model.numNodes()
+    print "  Model depth: %d" % model.depth()
+    print "  Training accuracy: %g" % getAccuracy(model, reindexedData)
+    if model.numNodes() < 20:
+        print model.toDebugString()
+    else:
+        print model
 
     sc.stop()
diff --git a/examples/src/main/python/mllib/gaussian_mixture_model.py b/examples/src/main/python/mllib/gaussian_mixture_model.py
new file mode 100644
index 0000000000000..a2cd626c9f19d
--- /dev/null
+++ b/examples/src/main/python/mllib/gaussian_mixture_model.py
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+A Gaussian Mixture Model clustering program using MLlib.
+"""
+import sys
+import random
+import argparse
+import numpy as np
+
+from pyspark import SparkConf, SparkContext
+from pyspark.mllib.clustering import GaussianMixture
+
+
+def parseVector(line):
+    return np.array([float(x) for x in line.split(' ')])
+
+
+if __name__ == "__main__":
+    """
+    Parameters
+    ----------
+    :param inputFile:        Input file path which contains data points
+    :param k:                Number of mixture components
+    :param convergenceTol:   Convergence threshold. Default to 1e-3
+    :param maxIterations:    Number of EM iterations to perform. Default to 100
+    :param seed:             Random seed
+    """
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('inputFile', help='Input File')
+    parser.add_argument('k', type=int, help='Number of clusters')
+    parser.add_argument('--convergenceTol', default=1e-3, type=float, help='convergence threshold')
+    parser.add_argument('--maxIterations', default=100, type=int, help='Number of iterations')
+    parser.add_argument('--seed', default=random.getrandbits(19),
+                        type=long, help='Random seed')
+    args = parser.parse_args()
+
+    conf = SparkConf().setAppName("GMM")
+    sc = SparkContext(conf=conf)
+
+    lines = sc.textFile(args.inputFile)
+    data = lines.map(parseVector)
+    model = GaussianMixture.train(data, args.k, args.convergenceTol,
+                                  args.maxIterations, args.seed)
+    for i in range(args.k):
+        print ("weight = ", model.weights[i], "mu = ", model.gaussians[i].mu,
+               "sigma = ", model.gaussians[i].sigma.toArray())
+    print ("Cluster labels (first 100): ", model.predict(data).take(100))
+    sc.stop()
diff --git a/examples/src/main/python/mllib/gradient_boosted_trees.py b/examples/src/main/python/mllib/gradient_boosted_trees.py
new file mode 100644
index 0000000000000..e647773ad9060
--- /dev/null
+++ b/examples/src/main/python/mllib/gradient_boosted_trees.py
@@ -0,0 +1,76 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Gradient boosted Trees classification and regression using MLlib.
+"""
+
+import sys
+
+from pyspark.context import SparkContext
+from pyspark.mllib.tree import GradientBoostedTrees
+from pyspark.mllib.util import MLUtils
+
+
+def testClassification(trainingData, testData):
+    # Train a GradientBoostedTrees model.
+    #  Empty categoricalFeaturesInfo indicates all features are continuous.
+    model = GradientBoostedTrees.trainClassifier(trainingData, categoricalFeaturesInfo={},
+                                                 numIterations=30, maxDepth=4)
+    # Evaluate model on test instances and compute test error
+    predictions = model.predict(testData.map(lambda x: x.features))
+    labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
+    testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() \
+        / float(testData.count())
+    print('Test Error = ' + str(testErr))
+    print('Learned classification ensemble model:')
+    print(model.toDebugString())
+
+
+def testRegression(trainingData, testData):
+    # Train a GradientBoostedTrees model.
+    #  Empty categoricalFeaturesInfo indicates all features are continuous.
+    model = GradientBoostedTrees.trainRegressor(trainingData, categoricalFeaturesInfo={},
+                                                numIterations=30, maxDepth=4)
+    # Evaluate model on test instances and compute test error
+    predictions = model.predict(testData.map(lambda x: x.features))
+    labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
+    testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() \
+        / float(testData.count())
+    print('Test Mean Squared Error = ' + str(testMSE))
+    print('Learned regression ensemble model:')
+    print(model.toDebugString())
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        print >> sys.stderr, "Usage: gradient_boosted_trees"
+        exit(1)
+    sc = SparkContext(appName="PythonGradientBoostedTrees")
+
+    # Load and parse the data file into an RDD of LabeledPoint.
+    data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt')
+    # Split the data into training and test sets (30% held out for testing)
+    (trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+    print('\nRunning example of classification using GradientBoostedTrees\n')
+    testClassification(trainingData, testData)
+
+    print('\nRunning example of regression using GradientBoostedTrees\n')
+    testRegression(trainingData, testData)
+
+    sc.stop()
diff --git a/examples/src/main/python/mllib/random_forest_example.py b/examples/src/main/python/mllib/random_forest_example.py
new file mode 100755
index 0000000000000..d3c24f7664329
--- /dev/null
+++ b/examples/src/main/python/mllib/random_forest_example.py
@@ -0,0 +1,89 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Random Forest classification and regression using MLlib.
+
+Note: This example illustrates binary classification.
+      For information on multiclass classification, please refer to the decision_tree_runner.py
+      example.
+"""
+
+import sys
+
+from pyspark.context import SparkContext
+from pyspark.mllib.tree import RandomForest
+from pyspark.mllib.util import MLUtils
+
+
+def testClassification(trainingData, testData):
+    # Train a RandomForest model.
+    #  Empty categoricalFeaturesInfo indicates all features are continuous.
+    #  Note: Use larger numTrees in practice.
+    #  Setting featureSubsetStrategy="auto" lets the algorithm choose.
+    model = RandomForest.trainClassifier(trainingData, numClasses=2,
+                                         categoricalFeaturesInfo={},
+                                         numTrees=3, featureSubsetStrategy="auto",
+                                         impurity='gini', maxDepth=4, maxBins=32)
+
+    # Evaluate model on test instances and compute test error
+    predictions = model.predict(testData.map(lambda x: x.features))
+    labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
+    testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count()\
+        / float(testData.count())
+    print('Test Error = ' + str(testErr))
+    print('Learned classification forest model:')
+    print(model.toDebugString())
+
+
+def testRegression(trainingData, testData):
+    # Train a RandomForest model.
+    #  Empty categoricalFeaturesInfo indicates all features are continuous.
+    #  Note: Use larger numTrees in practice.
+    #  Setting featureSubsetStrategy="auto" lets the algorithm choose.
+    model = RandomForest.trainRegressor(trainingData, categoricalFeaturesInfo={},
+                                        numTrees=3, featureSubsetStrategy="auto",
+                                        impurity='variance', maxDepth=4, maxBins=32)
+
+    # Evaluate model on test instances and compute test error
+    predictions = model.predict(testData.map(lambda x: x.features))
+    labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
+    testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum()\
+        / float(testData.count())
+    print('Test Mean Squared Error = ' + str(testMSE))
+    print('Learned regression forest model:')
+    print(model.toDebugString())
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        print >> sys.stderr, "Usage: random_forest_example"
+        exit(1)
+    sc = SparkContext(appName="PythonRandomForestExample")
+
+    # Load and parse the data file into an RDD of LabeledPoint.
+    data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt')
+    # Split the data into training and test sets (30% held out for testing)
+    (trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+    print('\nRunning example of classification using RandomForest\n')
+    testClassification(trainingData, testData)
+
+    print('\nRunning example of regression using RandomForest\n')
+    testRegression(trainingData, testData)
+
+    sc.stop()
diff --git a/examples/src/main/python/mllib/word2vec.py b/examples/src/main/python/mllib/word2vec.py
new file mode 100644
index 0000000000000..99fef4276a369
--- /dev/null
+++ b/examples/src/main/python/mllib/word2vec.py
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This example uses text8 file from http://mattmahoney.net/dc/text8.zip
+# The file was downloadded, unziped and split into multiple lines using
+#
+# wget http://mattmahoney.net/dc/text8.zip
+# unzip text8.zip
+# grep -o -E '\w+(\W+\w+){0,15}' text8 > text8_lines
+# This was done so that the example can be run in local mode
+
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.mllib.feature import Word2Vec
+
+USAGE = ("bin/spark-submit --driver-memory 4g "
+         "examples/src/main/python/mllib/word2vec.py text8_lines")
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print USAGE
+        sys.exit("Argument for file not provided")
+    file_path = sys.argv[1]
+    sc = SparkContext(appName='Word2Vec')
+    inp = sc.textFile(file_path).map(lambda row: row.split(" "))
+
+    word2vec = Word2Vec()
+    model = word2vec.fit(inp)
+
+    synonyms = model.findSynonyms('china', 40)
+
+    for word, cosine_distance in synonyms:
+        print "{}: {}".format(word, cosine_distance)
+    sc.stop()
diff --git a/examples/src/main/python/pagerank.py b/examples/src/main/python/pagerank.py
index b539c4128cdcc..a5f25d78c1146 100755
--- a/examples/src/main/python/pagerank.py
+++ b/examples/src/main/python/pagerank.py
@@ -15,6 +15,11 @@
 # limitations under the License.
 #
 
+"""
+This is an example implementation of PageRank. For more conventional use,
+Please refer to PageRank implementation provided by graphx
+"""
+
 import re
 import sys
 from operator import add
@@ -40,6 +45,9 @@ def parseNeighbors(urls):
         print >> sys.stderr, "Usage: pagerank <file> <iterations>"
         exit(-1)
 
+    print >> sys.stderr,  """WARN: This is a naive implementation of PageRank and is
+          given as an example! Please refer to PageRank implementation provided by graphx"""
+
     # Initialize the spark context.
     sc = SparkContext(appName="PythonPageRank")
 
diff --git a/examples/src/main/python/sql.py b/examples/src/main/python/sql.py
index d2c5ca48c6cb8..7f5c68e3d0fe2 100644
--- a/examples/src/main/python/sql.py
+++ b/examples/src/main/python/sql.py
@@ -30,18 +30,18 @@
     some_rdd = sc.parallelize([Row(name="John", age=19),
                               Row(name="Smith", age=23),
                               Row(name="Sarah", age=18)])
-    # Infer schema from the first row, create a SchemaRDD and print the schema
-    some_schemardd = sqlContext.inferSchema(some_rdd)
-    some_schemardd.printSchema()
+    # Infer schema from the first row, create a DataFrame and print the schema
+    some_df = sqlContext.inferSchema(some_rdd)
+    some_df.printSchema()
 
     # Another RDD is created from a list of tuples
     another_rdd = sc.parallelize([("John", 19), ("Smith", 23), ("Sarah", 18)])
     # Schema with two fields - person_name and person_age
     schema = StructType([StructField("person_name", StringType(), False),
                         StructField("person_age", IntegerType(), False)])
-    # Create a SchemaRDD by applying the schema to the RDD and print the schema
-    another_schemardd = sqlContext.applySchema(another_rdd, schema)
-    another_schemardd.printSchema()
+    # Create a DataFrame by applying the schema to the RDD and print the schema
+    another_df = sqlContext.applySchema(another_rdd, schema)
+    another_df.printSchema()
     # root
     #  |-- age: integer (nullable = true)
     #  |-- name: string (nullable = true)
@@ -49,7 +49,7 @@
     # A JSON dataset is pointed to by path.
     # The path can be either a single text file or a directory storing text files.
     path = os.path.join(os.environ['SPARK_HOME'], "examples/src/main/resources/people.json")
-    # Create a SchemaRDD from the file(s) pointed to by path
+    # Create a DataFrame from the file(s) pointed to by path
     people = sqlContext.jsonFile(path)
     # root
     #  |-- person_name: string (nullable = false)
@@ -61,7 +61,7 @@
     #  |-- age: IntegerType
     #  |-- name: StringType
 
-    # Register this SchemaRDD as a table.
+    # Register this DataFrame as a table.
     people.registerAsTable("people")
 
     # SQL statements can be run by using the sql methods provided by sqlContext
diff --git a/examples/src/main/python/streaming/hdfs_wordcount.py b/examples/src/main/python/streaming/hdfs_wordcount.py
index 40faff0ccc7db..f7ffb5379681e 100644
--- a/examples/src/main/python/streaming/hdfs_wordcount.py
+++ b/examples/src/main/python/streaming/hdfs_wordcount.py
@@ -21,7 +21,7 @@
    <directory> is the directory that Spark Streaming will use to find and read new text files.
 
  To run this on your local machine on directory `localdir`, run this example
-    $ bin/spark-submit examples/src/main/python/streaming/network_wordcount.py localdir
+    $ bin/spark-submit examples/src/main/python/streaming/hdfs_wordcount.py localdir
 
  Then create a text file in `localdir` and the words in the file will get counted.
 """
diff --git a/examples/src/main/python/streaming/kafka_wordcount.py b/examples/src/main/python/streaming/kafka_wordcount.py
new file mode 100644
index 0000000000000..ed398a82b8bb0
--- /dev/null
+++ b/examples/src/main/python/streaming/kafka_wordcount.py
@@ -0,0 +1,54 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+ Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
+ Usage: network_wordcount.py <zk> <topic>
+
+ To run this on your local machine, you need to setup Kafka and create a producer first, see
+ http://kafka.apache.org/documentation.html#quickstart
+
+ and then run the example
+    `$ bin/spark-submit --driver-class-path external/kafka-assembly/target/scala-*/\
+      spark-streaming-kafka-assembly-*.jar examples/src/main/python/streaming/kafka_wordcount.py \
+      localhost:2181 test`
+"""
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+from pyspark.streaming.kafka import KafkaUtils
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print >> sys.stderr, "Usage: kafka_wordcount.py <zk> <topic>"
+        exit(-1)
+
+    sc = SparkContext(appName="PythonStreamingKafkaWordCount")
+    ssc = StreamingContext(sc, 1)
+
+    zkQuorum, topic = sys.argv[1:]
+    kvs = KafkaUtils.createStream(ssc, zkQuorum, "spark-streaming-consumer", {topic: 1})
+    lines = kvs.map(lambda x: x[1])
+    counts = lines.flatMap(lambda line: line.split(" ")) \
+        .map(lambda word: (word, 1)) \
+        .reduceByKey(lambda a, b: a+b)
+    counts.pprint()
+
+    ssc.start()
+    ssc.awaitTermination()
diff --git a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
index 973049b95a7bd..1b53f3edbe92e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
@@ -28,20 +28,15 @@ object BroadcastTest {
     val bcName = if (args.length > 2) args(2) else "Http"
     val blockSize = if (args.length > 3) args(3) else "4096"
 
-    System.setProperty("spark.broadcast.factory", "org.apache.spark.broadcast." + bcName +
-      "BroadcastFactory")
-    System.setProperty("spark.broadcast.blockSize", blockSize)
     val sparkConf = new SparkConf().setAppName("Broadcast Test")
-
+      .set("spark.broadcast.factory", s"org.apache.spark.broadcast.${bcName}BroaddcastFactory")
+      .set("spark.broadcast.blockSize", blockSize)
     val sc = new SparkContext(sparkConf)
 
     val slices = if (args.length > 0) args(0).toInt else 2
     val num = if (args.length > 1) args(1).toInt else 1000000
 
-    val arr1 = new Array[Int](num)
-    for (i <- 0 until arr1.length) {
-      arr1(i) = i
-    }
+    val arr1 = (0 until num).toArray
 
     for (i <- 0 until 3) {
       println("Iteration " + i)
diff --git a/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala b/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
index 65251e93190f0..e757283823fc3 100644
--- a/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
@@ -19,6 +19,8 @@ package org.apache.spark.examples
 
 import scala.collection.JavaConversions._
 
+import org.apache.spark.util.Utils
+
 /** Prints out environmental information, sleeps, and then exits. Made to
   * test driver submission in the standalone scheduler. */
 object DriverSubmissionTest {
@@ -30,7 +32,7 @@ object DriverSubmissionTest {
     val numSecondsToSleep = args(0).toInt
 
     val env = System.getenv()
-    val properties = System.getProperties()
+    val properties = Utils.getSystemProperties
 
     println("Environment variables containing SPARK_TEST:")
     env.filter{case (k, v) => k.contains("SPARK_TEST")}.foreach(println)
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
index 1f576319b3ca8..3d5259463003d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
@@ -17,11 +17,7 @@
 
 package org.apache.spark.examples
 
-import scala.math.sqrt
-
-import cern.colt.matrix._
-import cern.colt.matrix.linalg._
-import cern.jet.math._
+import org.apache.commons.math3.linear._
 
 /**
  * Alternating least squares matrix factorization.
@@ -30,84 +26,70 @@ import cern.jet.math._
  * please refer to org.apache.spark.mllib.recommendation.ALS
  */
 object LocalALS {
+
   // Parameters set through command line arguments
   var M = 0 // Number of movies
   var U = 0 // Number of users
   var F = 0 // Number of features
   var ITERATIONS = 0
-
   val LAMBDA = 0.01 // Regularization coefficient
 
-  // Some COLT objects
-  val factory2D = DoubleFactory2D.dense
-  val factory1D = DoubleFactory1D.dense
-  val algebra = Algebra.DEFAULT
-  val blas = SeqBlas.seqBlas
-
-  def generateR(): DoubleMatrix2D = {
-    val mh = factory2D.random(M, F)
-    val uh = factory2D.random(U, F)
-    algebra.mult(mh, algebra.transpose(uh))
+  def generateR(): RealMatrix = {
+    val mh = randomMatrix(M, F)
+    val uh = randomMatrix(U, F)
+    mh.multiply(uh.transpose())
   }
 
-  def rmse(targetR: DoubleMatrix2D, ms: Array[DoubleMatrix1D],
-    us: Array[DoubleMatrix1D]): Double =
-  {
-    val r = factory2D.make(M, U)
+  def rmse(targetR: RealMatrix, ms: Array[RealVector], us: Array[RealVector]): Double = {
+    val r = new Array2DRowRealMatrix(M, U)
     for (i <- 0 until M; j <- 0 until U) {
-      r.set(i, j, blas.ddot(ms(i), us(j)))
+      r.setEntry(i, j, ms(i).dotProduct(us(j)))
     }
-    blas.daxpy(-1, targetR, r)
-    val sumSqs = r.aggregate(Functions.plus, Functions.square)
-    sqrt(sumSqs / (M * U))
+    val diffs = r.subtract(targetR)
+    var sumSqs = 0.0
+    for (i <- 0 until M; j <- 0 until U) {
+      val diff = diffs.getEntry(i, j)
+      sumSqs += diff * diff
+    }
+    math.sqrt(sumSqs / (M.toDouble * U.toDouble))
   }
 
-  def updateMovie(i: Int, m: DoubleMatrix1D, us: Array[DoubleMatrix1D],
-    R: DoubleMatrix2D) : DoubleMatrix1D =
-  {
-    val XtX = factory2D.make(F, F)
-    val Xty = factory1D.make(F)
+  def updateMovie(i: Int, m: RealVector, us: Array[RealVector], R: RealMatrix) : RealVector = {
+    var XtX: RealMatrix = new Array2DRowRealMatrix(F, F)
+    var Xty: RealVector = new ArrayRealVector(F)
     // For each user that rated the movie
     for (j <- 0 until U) {
       val u = us(j)
       // Add u * u^t to XtX
-      blas.dger(1, u, u, XtX)
+      XtX = XtX.add(u.outerProduct(u))
       // Add u * rating to Xty
-      blas.daxpy(R.get(i, j), u, Xty)
+      Xty = Xty.add(u.mapMultiply(R.getEntry(i, j)))
     }
-    // Add regularization coefs to diagonal terms
+    // Add regularization coefficients to diagonal terms
     for (d <- 0 until F) {
-      XtX.set(d, d, XtX.get(d, d) + LAMBDA * U)
+      XtX.addToEntry(d, d, LAMBDA * U)
     }
     // Solve it with Cholesky
-    val ch = new CholeskyDecomposition(XtX)
-    val Xty2D = factory2D.make(Xty.toArray, F)
-    val solved2D = ch.solve(Xty2D)
-    solved2D.viewColumn(0)
+    new CholeskyDecomposition(XtX).getSolver.solve(Xty)
   }
 
-  def updateUser(j: Int, u: DoubleMatrix1D, ms: Array[DoubleMatrix1D],
-    R: DoubleMatrix2D) : DoubleMatrix1D =
-  {
-    val XtX = factory2D.make(F, F)
-    val Xty = factory1D.make(F)
+  def updateUser(j: Int, u: RealVector, ms: Array[RealVector], R: RealMatrix) : RealVector = {
+    var XtX: RealMatrix = new Array2DRowRealMatrix(F, F)
+    var Xty: RealVector = new ArrayRealVector(F)
     // For each movie that the user rated
     for (i <- 0 until M) {
       val m = ms(i)
       // Add m * m^t to XtX
-      blas.dger(1, m, m, XtX)
+      XtX = XtX.add(m.outerProduct(m))
       // Add m * rating to Xty
-      blas.daxpy(R.get(i, j), m, Xty)
+      Xty = Xty.add(m.mapMultiply(R.getEntry(i, j)))
     }
-    // Add regularization coefs to diagonal terms
+    // Add regularization coefficients to diagonal terms
     for (d <- 0 until F) {
-      XtX.set(d, d, XtX.get(d, d) + LAMBDA * M)
+      XtX.addToEntry(d, d, LAMBDA * M)
     }
     // Solve it with Cholesky
-    val ch = new CholeskyDecomposition(XtX)
-    val Xty2D = factory2D.make(Xty.toArray, F)
-    val solved2D = ch.solve(Xty2D)
-    solved2D.viewColumn(0)
+    new CholeskyDecomposition(XtX).getSolver.solve(Xty)
   }
 
   def showWarning() {
@@ -135,21 +117,28 @@ object LocalALS {
 
     showWarning()
 
-    printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS)
+    println(s"Running with M=$M, U=$U, F=$F, iters=$ITERATIONS")
 
     val R = generateR()
 
     // Initialize m and u randomly
-    var ms = Array.fill(M)(factory1D.random(F))
-    var us = Array.fill(U)(factory1D.random(F))
+    var ms = Array.fill(M)(randomVector(F))
+    var us = Array.fill(U)(randomVector(F))
 
     // Iteratively update movies then users
     for (iter <- 1 to ITERATIONS) {
-      println("Iteration " + iter + ":")
+      println(s"Iteration $iter:")
       ms = (0 until M).map(i => updateMovie(i, ms(i), us, R)).toArray
       us = (0 until U).map(j => updateUser(j, us(j), ms, R)).toArray
       println("RMSE = " + rmse(R, ms, us))
       println()
     }
   }
+
+  private def randomVector(n: Int): RealVector =
+    new ArrayRealVector(Array.fill(n)(math.random))
+
+  private def randomMatrix(rows: Int, cols: Int): RealMatrix =
+    new Array2DRowRealMatrix(Array.fill(rows, cols)(math.random))
+
 }
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
index 931faac5463c4..ac2ea35bbd0e0 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
@@ -25,7 +25,8 @@ import breeze.linalg.{Vector, DenseVector}
  * Logistic regression based classification.
  *
  * This is an example implementation for learning how to use Spark. For more conventional use,
- * please refer to org.apache.spark.mllib.classification.LogisticRegression
+ * please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+ * org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
  */
 object LocalFileLR {
   val D = 10   // Numer of dimensions
@@ -41,7 +42,8 @@ object LocalFileLR {
   def showWarning() {
     System.err.println(
       """WARN: This is a naive implementation of Logistic Regression and is given as an example!
-        |Please use the LogisticRegression method found in org.apache.spark.mllib.classification
+        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
         |for more conventional use.
       """.stripMargin)
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
index 2d75b9d2590f8..92a683ad57ea1 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
@@ -25,7 +25,8 @@ import breeze.linalg.{Vector, DenseVector}
  * Logistic regression based classification.
  *
  * This is an example implementation for learning how to use Spark. For more conventional use,
- * please refer to org.apache.spark.mllib.classification.LogisticRegression
+ * please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+ * org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
  */
 object LocalLR {
   val N = 10000  // Number of data points
@@ -48,7 +49,8 @@ object LocalLR {
   def showWarning() {
     System.err.println(
       """WARN: This is a naive implementation of Logistic Regression and is given as an example!
-        |Please use the LogisticRegression method found in org.apache.spark.mllib.classification
+        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
         |for more conventional use.
       """.stripMargin)
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
index fde8ffeedf8b4..6c0ac8013ce34 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
@@ -17,11 +17,7 @@
 
 package org.apache.spark.examples
 
-import scala.math.sqrt
-
-import cern.colt.matrix._
-import cern.colt.matrix.linalg._
-import cern.jet.math._
+import org.apache.commons.math3.linear._
 
 import org.apache.spark._
 
@@ -32,62 +28,53 @@ import org.apache.spark._
  * please refer to org.apache.spark.mllib.recommendation.ALS
  */
 object SparkALS {
+
   // Parameters set through command line arguments
   var M = 0 // Number of movies
   var U = 0 // Number of users
   var F = 0 // Number of features
   var ITERATIONS = 0
-
   val LAMBDA = 0.01 // Regularization coefficient
 
-  // Some COLT objects
-  val factory2D = DoubleFactory2D.dense
-  val factory1D = DoubleFactory1D.dense
-  val algebra = Algebra.DEFAULT
-  val blas = SeqBlas.seqBlas
-
-  def generateR(): DoubleMatrix2D = {
-    val mh = factory2D.random(M, F)
-    val uh = factory2D.random(U, F)
-    algebra.mult(mh, algebra.transpose(uh))
+  def generateR(): RealMatrix = {
+    val mh = randomMatrix(M, F)
+    val uh = randomMatrix(U, F)
+    mh.multiply(uh.transpose())
   }
 
-  def rmse(targetR: DoubleMatrix2D, ms: Array[DoubleMatrix1D],
-    us: Array[DoubleMatrix1D]): Double =
-  {
-    val r = factory2D.make(M, U)
+  def rmse(targetR: RealMatrix, ms: Array[RealVector], us: Array[RealVector]): Double = {
+    val r = new Array2DRowRealMatrix(M, U)
     for (i <- 0 until M; j <- 0 until U) {
-      r.set(i, j, blas.ddot(ms(i), us(j)))
+      r.setEntry(i, j, ms(i).dotProduct(us(j)))
     }
-    blas.daxpy(-1, targetR, r)
-    val sumSqs = r.aggregate(Functions.plus, Functions.square)
-    sqrt(sumSqs / (M * U))
+    val diffs = r.subtract(targetR)
+    var sumSqs = 0.0
+    for (i <- 0 until M; j <- 0 until U) {
+      val diff = diffs.getEntry(i, j)
+      sumSqs += diff * diff
+    }
+    math.sqrt(sumSqs / (M.toDouble * U.toDouble))
   }
 
-  def update(i: Int, m: DoubleMatrix1D, us: Array[DoubleMatrix1D],
-    R: DoubleMatrix2D) : DoubleMatrix1D =
-  {
+  def update(i: Int, m: RealVector, us: Array[RealVector], R: RealMatrix) : RealVector = {
     val U = us.size
-    val F = us(0).size
-    val XtX = factory2D.make(F, F)
-    val Xty = factory1D.make(F)
+    val F = us(0).getDimension
+    var XtX: RealMatrix = new Array2DRowRealMatrix(F, F)
+    var Xty: RealVector = new ArrayRealVector(F)
     // For each user that rated the movie
     for (j <- 0 until U) {
       val u = us(j)
       // Add u * u^t to XtX
-      blas.dger(1, u, u, XtX)
+      XtX = XtX.add(u.outerProduct(u))
       // Add u * rating to Xty
-      blas.daxpy(R.get(i, j), u, Xty)
+      Xty = Xty.add(u.mapMultiply(R.getEntry(i, j)))
     }
     // Add regularization coefs to diagonal terms
     for (d <- 0 until F) {
-      XtX.set(d, d, XtX.get(d, d) + LAMBDA * U)
+      XtX.addToEntry(d, d, LAMBDA * U)
     }
     // Solve it with Cholesky
-    val ch = new CholeskyDecomposition(XtX)
-    val Xty2D = factory2D.make(Xty.toArray, F)
-    val solved2D = ch.solve(Xty2D)
-    solved2D.viewColumn(0)
+    new CholeskyDecomposition(XtX).getSolver.solve(Xty)
   }
 
   def showWarning() {
@@ -118,7 +105,7 @@ object SparkALS {
 
     showWarning()
 
-    printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS)
+    println(s"Running with M=$M, U=$U, F=$F, iters=$ITERATIONS")
 
     val sparkConf = new SparkConf().setAppName("SparkALS")
     val sc = new SparkContext(sparkConf)
@@ -126,21 +113,21 @@ object SparkALS {
     val R = generateR()
 
     // Initialize m and u randomly
-    var ms = Array.fill(M)(factory1D.random(F))
-    var us = Array.fill(U)(factory1D.random(F))
+    var ms = Array.fill(M)(randomVector(F))
+    var us = Array.fill(U)(randomVector(F))
 
     // Iteratively update movies then users
     val Rc  = sc.broadcast(R)
     var msb = sc.broadcast(ms)
     var usb = sc.broadcast(us)
     for (iter <- 1 to ITERATIONS) {
-      println("Iteration " + iter + ":")
+      println(s"Iteration $iter:")
       ms = sc.parallelize(0 until M, slices)
                 .map(i => update(i, msb.value(i), usb.value, Rc.value))
                 .collect()
       msb = sc.broadcast(ms) // Re-broadcast ms because it was updated
       us = sc.parallelize(0 until U, slices)
-                .map(i => update(i, usb.value(i), msb.value, algebra.transpose(Rc.value)))
+                .map(i => update(i, usb.value(i), msb.value, Rc.value.transpose()))
                 .collect()
       usb = sc.broadcast(us) // Re-broadcast us because it was updated
       println("RMSE = " + rmse(R, ms, us))
@@ -149,4 +136,11 @@ object SparkALS {
 
     sc.stop()
   }
+
+  private def randomVector(n: Int): RealVector =
+    new ArrayRealVector(Array.fill(n)(math.random))
+
+  private def randomMatrix(rows: Int, cols: Int): RealMatrix =
+    new Array2DRowRealMatrix(Array.fill(rows, cols)(math.random))
+
 }
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
index 3258510894372..9099c2fcc90b3 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
@@ -32,7 +32,8 @@ import org.apache.spark.scheduler.InputFormatInfo
  * Logistic regression based classification.
  *
  * This is an example implementation for learning how to use Spark. For more conventional use,
- * please refer to org.apache.spark.mllib.classification.LogisticRegression
+ * please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+ * org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
  */
 object SparkHdfsLR {
   val D = 10   // Numer of dimensions
@@ -54,7 +55,8 @@ object SparkHdfsLR {
   def showWarning() {
     System.err.println(
       """WARN: This is a naive implementation of Logistic Regression and is given as an example!
-        |Please use the LogisticRegression method found in org.apache.spark.mllib.classification
+        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
         |for more conventional use.
       """.stripMargin)
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
index fc23308fc4adf..257a7d29f922a 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
@@ -30,7 +30,8 @@ import org.apache.spark._
  * Usage: SparkLR [slices]
  *
  * This is an example implementation for learning how to use Spark. For more conventional use,
- * please refer to org.apache.spark.mllib.classification.LogisticRegression
+ * please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+ * org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
  */
 object SparkLR {
   val N = 10000  // Number of data points
@@ -53,7 +54,8 @@ object SparkLR {
   def showWarning() {
     System.err.println(
       """WARN: This is a naive implementation of Logistic Regression and is given as an example!
-        |Please use the LogisticRegression method found in org.apache.spark.mllib.classification
+        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
         |for more conventional use.
       """.stripMargin)
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
index 4c7e006da0618..8d092b6506d33 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
@@ -28,13 +28,28 @@ import org.apache.spark.{SparkConf, SparkContext}
  * URL         neighbor URL
  * ...
  * where URL and their neighbors are separated by space(s).
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.graphx.lib.PageRank
  */
 object SparkPageRank {
+
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of PageRank and is given as an example!
+        |Please use the PageRank implementation found in org.apache.spark.graphx.lib.PageRank
+        |for more conventional use.
+      """.stripMargin)
+  }
+
   def main(args: Array[String]) {
     if (args.length < 1) {
       System.err.println("Usage: SparkPageRank <file> <iter>")
       System.exit(1)
     }
+
+    showWarning()
+
     val sparkConf = new SparkConf().setAppName("PageRank")
     val iters = if (args.length > 0) args(1).toInt else 10
     val ctx = new SparkContext(sparkConf)
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
index 9fbb0a800d735..35b8dd6c29b66 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
@@ -27,8 +27,8 @@ object SparkPi {
     val conf = new SparkConf().setAppName("Spark Pi")
     val spark = new SparkContext(conf)
     val slices = if (args.length > 0) args(0).toInt else 2
-    val n = 100000 * slices
-    val count = spark.parallelize(1 to n, slices).map { i =>
+    val n = math.min(100000L * slices, Int.MaxValue).toInt // avoid overflow
+    val count = spark.parallelize(1 until n, slices).map { i =>
       val x = random * 2 - 1
       val y = random * 2 - 1
       if (x*x + y*y < 1) 1 else 0
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkTachyonHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkTachyonHdfsLR.scala
index 96d13612e46dd..4393b99e636b6 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkTachyonHdfsLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkTachyonHdfsLR.scala
@@ -32,11 +32,24 @@ import org.apache.spark.storage.StorageLevel
 /**
  * Logistic regression based classification.
  * This example uses Tachyon to persist rdds during computation.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+ * org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
  */
 object SparkTachyonHdfsLR {
   val D = 10   // Numer of dimensions
   val rand = new Random(42)
 
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
+        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
+        |for more conventional use.
+      """.stripMargin)
+  }
+
   case class DataPoint(x: Vector[Double], y: Double)
 
   def parsePoint(line: String): DataPoint = {
@@ -51,6 +64,9 @@ object SparkTachyonHdfsLR {
   }
 
   def main(args: Array[String]) {
+
+    showWarning()
+
     val inputPath = args(0)
     val sparkConf = new SparkConf().setAppName("SparkTachyonHdfsLR")
     val conf = new Configuration()
diff --git a/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala b/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala
index d70d93608a57c..409721b01c8fd 100644
--- a/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala
@@ -33,6 +33,10 @@ object Analytics extends Logging {
     if (args.length < 2) {
       System.err.println(
         "Usage: Analytics <taskType> <file> --numEPart=<num_edge_partitions> [other options]")
+      System.err.println("Supported 'taskType' as follows:")
+      System.err.println("  pagerank    Compute PageRank")
+      System.err.println("  cc          Compute the connected components of vertices")
+      System.err.println("  triangles   Count the number of triangles")
       System.exit(1)
     }
 
@@ -46,7 +50,7 @@ object Analytics extends Logging {
     }
     val options = mutable.Map(optionsList: _*)
 
-    val conf = new SparkConf().set("spark.locality.wait", "100000")
+    val conf = new SparkConf()
     GraphXUtils.registerKryoClasses(conf)
 
     val numEPart = options.remove("numEPart").map(_.toInt).getOrElse {
@@ -77,7 +81,7 @@ object Analytics extends Logging {
         val sc = new SparkContext(conf.setAppName("PageRank(" + fname + ")"))
 
         val unpartitionedGraph = GraphLoader.edgeListFile(sc, fname,
-          minEdgePartitions = numEPart,
+          numEdgePartitions = numEPart,
           edgeStorageLevel = edgeStorageLevel,
           vertexStorageLevel = vertexStorageLevel).cache()
         val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_))
@@ -110,7 +114,7 @@ object Analytics extends Logging {
 
         val sc = new SparkContext(conf.setAppName("ConnectedComponents(" + fname + ")"))
         val unpartitionedGraph = GraphLoader.edgeListFile(sc, fname,
-          minEdgePartitions = numEPart,
+          numEdgePartitions = numEPart,
           edgeStorageLevel = edgeStorageLevel,
           vertexStorageLevel = vertexStorageLevel).cache()
         val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_))
@@ -131,7 +135,7 @@ object Analytics extends Logging {
         val sc = new SparkContext(conf.setAppName("TriangleCount(" + fname + ")"))
         val graph = GraphLoader.edgeListFile(sc, fname,
           canonicalOrientation = true,
-          minEdgePartitions = numEPart,
+          numEdgePartitions = numEPart,
           edgeStorageLevel = edgeStorageLevel,
           vertexStorageLevel = vertexStorageLevel)
           // TriangleCount requires the graph to be partitioned
diff --git a/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala b/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
index 05676021718d9..3ec20d594b784 100644
--- a/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
@@ -67,7 +67,7 @@ object SynthBenchmark {
 
     options.foreach {
       case ("app", v) => app = v
-      case ("niter", v) => niter = v.toInt
+      case ("niters", v) => niter = v.toInt
       case ("nverts", v) => numVertices = v.toInt
       case ("numEPart", v) => numEPart = Some(v.toInt)
       case ("partStrategy", v) => partitionStrategy = Some(PartitionStrategy.fromString(v))
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/CrossValidatorExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/CrossValidatorExample.scala
new file mode 100644
index 0000000000000..b6c30a007d88f
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/CrossValidatorExample.scala
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.classification.LogisticRegression
+import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
+import org.apache.spark.ml.feature.{HashingTF, Tokenizer}
+import org.apache.spark.ml.tuning.{ParamGridBuilder, CrossValidator}
+import org.apache.spark.sql.{Row, SQLContext}
+
+/**
+ * A simple example demonstrating model selection using CrossValidator.
+ * This example also demonstrates how Pipelines are Estimators.
+ *
+ * This example uses the [[LabeledDocument]] and [[Document]] case classes from
+ * [[SimpleTextClassificationPipeline]].
+ *
+ * Run with
+ * {{{
+ * bin/run-example ml.CrossValidatorExample
+ * }}}
+ */
+object CrossValidatorExample {
+
+  def main(args: Array[String]) {
+    val conf = new SparkConf().setAppName("CrossValidatorExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+    import sqlContext.implicits._
+
+    // Prepare training documents, which are labeled.
+    val training = sc.parallelize(Seq(
+      LabeledDocument(0L, "a b c d e spark", 1.0),
+      LabeledDocument(1L, "b d", 0.0),
+      LabeledDocument(2L, "spark f g h", 1.0),
+      LabeledDocument(3L, "hadoop mapreduce", 0.0),
+      LabeledDocument(4L, "b spark who", 1.0),
+      LabeledDocument(5L, "g d a y", 0.0),
+      LabeledDocument(6L, "spark fly", 1.0),
+      LabeledDocument(7L, "was mapreduce", 0.0),
+      LabeledDocument(8L, "e spark program", 1.0),
+      LabeledDocument(9L, "a e c l", 0.0),
+      LabeledDocument(10L, "spark compile", 1.0),
+      LabeledDocument(11L, "hadoop software", 0.0)))
+
+    // Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
+    val tokenizer = new Tokenizer()
+      .setInputCol("text")
+      .setOutputCol("words")
+    val hashingTF = new HashingTF()
+      .setInputCol(tokenizer.getOutputCol)
+      .setOutputCol("features")
+    val lr = new LogisticRegression()
+      .setMaxIter(10)
+    val pipeline = new Pipeline()
+      .setStages(Array(tokenizer, hashingTF, lr))
+
+    // We now treat the Pipeline as an Estimator, wrapping it in a CrossValidator instance.
+    // This will allow us to jointly choose parameters for all Pipeline stages.
+    // A CrossValidator requires an Estimator, a set of Estimator ParamMaps, and an Evaluator.
+    val crossval = new CrossValidator()
+      .setEstimator(pipeline)
+      .setEvaluator(new BinaryClassificationEvaluator)
+    // We use a ParamGridBuilder to construct a grid of parameters to search over.
+    // With 3 values for hashingTF.numFeatures and 2 values for lr.regParam,
+    // this grid will have 3 x 2 = 6 parameter settings for CrossValidator to choose from.
+    val paramGrid = new ParamGridBuilder()
+      .addGrid(hashingTF.numFeatures, Array(10, 100, 1000))
+      .addGrid(lr.regParam, Array(0.1, 0.01))
+      .build()
+    crossval.setEstimatorParamMaps(paramGrid)
+    crossval.setNumFolds(2) // Use 3+ in practice
+
+    // Run cross-validation, and choose the best set of parameters.
+    val cvModel = crossval.fit(training)
+
+    // Prepare test documents, which are unlabeled.
+    val test = sc.parallelize(Seq(
+      Document(4L, "spark i j k"),
+      Document(5L, "l m n"),
+      Document(6L, "mapreduce spark"),
+      Document(7L, "apache hadoop")))
+
+    // Make predictions on test documents. cvModel uses the best model found (lrModel).
+    cvModel.transform(test)
+      .select("id", "text", "score", "prediction")
+      .collect()
+      .foreach { case Row(id: Long, text: String, score: Double, prediction: Double) =>
+      println("(" + id + ", " + text + ") --> score=" + score + ", prediction=" + prediction)
+    }
+
+    sc.stop()
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/MovieLensALS.scala b/examples/src/main/scala/org/apache/spark/examples/ml/MovieLensALS.scala
new file mode 100644
index 0000000000000..836ea2e01201e
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/MovieLensALS.scala
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.examples.mllib.AbstractParams
+import org.apache.spark.ml.recommendation.ALS
+import org.apache.spark.sql.{Row, SQLContext}
+
+/**
+ * An example app for ALS on MovieLens data (http://grouplens.org/datasets/movielens/).
+ * Run with
+ * {{{
+ * bin/run-example ml.MovieLensALS
+ * }}}
+ */
+object MovieLensALS {
+
+  case class Rating(userId: Int, movieId: Int, rating: Float, timestamp: Long)
+
+  object Rating {
+    def parseRating(str: String): Rating = {
+      val fields = str.split("::")
+      assert(fields.size == 4)
+      Rating(fields(0).toInt, fields(1).toInt, fields(2).toFloat, fields(3).toLong)
+    }
+  }
+
+  case class Movie(movieId: Int, title: String, genres: Seq[String])
+
+  object Movie {
+    def parseMovie(str: String): Movie = {
+      val fields = str.split("::")
+      assert(fields.size == 3)
+      Movie(fields(0).toInt, fields(1), fields(2).split("|"))
+    }
+  }
+
+  case class Params(
+      ratings: String = null,
+      movies: String = null,
+      maxIter: Int = 10,
+      regParam: Double = 0.1,
+      rank: Int = 10,
+      numBlocks: Int = 10) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("MovieLensALS") {
+      head("MovieLensALS: an example app for ALS on MovieLens data.")
+      opt[String]("ratings")
+        .required()
+        .text("path to a MovieLens dataset of ratings")
+        .action((x, c) => c.copy(ratings = x))
+      opt[String]("movies")
+        .required()
+        .text("path to a MovieLens dataset of movies")
+        .action((x, c) => c.copy(movies = x))
+      opt[Int]("rank")
+        .text(s"rank, default: ${defaultParams.rank}}")
+        .action((x, c) => c.copy(rank = x))
+      opt[Int]("maxIter")
+        .text(s"max number of iterations, default: ${defaultParams.maxIter}")
+        .action((x, c) => c.copy(maxIter = x))
+      opt[Double]("regParam")
+        .text(s"regularization parameter, default: ${defaultParams.regParam}")
+        .action((x, c) => c.copy(regParam = x))
+      opt[Int]("numBlocks")
+        .text(s"number of blocks, default: ${defaultParams.numBlocks}")
+        .action((x, c) => c.copy(numBlocks = x))
+      note(
+        """
+          |Example command line to run this app:
+          |
+          | bin/spark-submit --class org.apache.spark.examples.ml.MovieLensALS \
+          |  examples/target/scala-*/spark-examples-*.jar \
+          |  --rank 10 --maxIter 15 --regParam 0.1 \
+          |  --movies path/to/movielens/movies.dat \
+          |  --ratings path/to/movielens/ratings.dat
+        """.stripMargin)
+    }
+
+    parser.parse(args, defaultParams).map { params =>
+      run(params)
+    } getOrElse {
+      System.exit(1)
+    }
+  }
+
+  def run(params: Params) {
+    val conf = new SparkConf().setAppName(s"MovieLensALS with $params")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+    import sqlContext.implicits._
+
+    val ratings = sc.textFile(params.ratings).map(Rating.parseRating).cache()
+
+    val numRatings = ratings.count()
+    val numUsers = ratings.map(_.userId).distinct().count()
+    val numMovies = ratings.map(_.movieId).distinct().count()
+
+    println(s"Got $numRatings ratings from $numUsers users on $numMovies movies.")
+
+    val splits = ratings.randomSplit(Array(0.8, 0.2), 0L)
+    val training = splits(0).cache()
+    val test = splits(1).cache()
+
+    val numTraining = training.count()
+    val numTest = test.count()
+    println(s"Training: $numTraining, test: $numTest.")
+
+    ratings.unpersist(blocking = false)
+
+    val als = new ALS()
+      .setUserCol("userId")
+      .setItemCol("movieId")
+      .setRank(params.rank)
+      .setMaxIter(params.maxIter)
+      .setRegParam(params.regParam)
+      .setNumBlocks(params.numBlocks)
+
+    val model = als.fit(training)
+
+    val predictions = model.transform(test).cache()
+
+    // Evaluate the model.
+    // TODO: Create an evaluator to compute RMSE.
+    val mse = predictions.select("rating", "prediction").rdd
+      .flatMap { case Row(rating: Float, prediction: Float) =>
+        val err = rating.toDouble - prediction
+        val err2 = err * err
+        if (err2.isNaN) {
+          None
+        } else {
+          Some(err2)
+        }
+      }.mean()
+    val rmse = math.sqrt(mse)
+    println(s"Test RMSE = $rmse.")
+
+    // Inspect false positives.
+    predictions.registerTempTable("prediction")
+    sc.textFile(params.movies).map(Movie.parseMovie).registerTempTable("movie")
+    sqlContext.sql(
+      """
+        |SELECT userId, prediction.movieId, title, rating, prediction
+        |  FROM prediction JOIN movie ON prediction.movieId = movie.movieId
+        |  WHERE rating <= 1 AND prediction >= 4
+        |  LIMIT 100
+      """.stripMargin)
+      .collect()
+      .foreach(println)
+
+    sc.stop()
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala
new file mode 100644
index 0000000000000..4d1530cd1349f
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.ml.classification.LogisticRegression
+import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.sql.{Row, SQLContext}
+
+/**
+ * A simple example demonstrating ways to specify parameters for Estimators and Transformers.
+ * Run with
+ * {{{
+ * bin/run-example ml.SimpleParamsExample
+ * }}}
+ */
+object SimpleParamsExample {
+
+  def main(args: Array[String]) {
+    val conf = new SparkConf().setAppName("SimpleParamsExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+    import sqlContext.implicits._
+
+    // Prepare training data.
+    // We use LabeledPoint, which is a case class.  Spark SQL can convert RDDs of Java Beans
+    // into DataFrames, where it uses the bean metadata to infer the schema.
+    val training = sc.parallelize(Seq(
+      LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
+      LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)),
+      LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
+      LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5))))
+
+    // Create a LogisticRegression instance.  This instance is an Estimator.
+    val lr = new LogisticRegression()
+    // Print out the parameters, documentation, and any default values.
+    println("LogisticRegression parameters:\n" + lr.explainParams() + "\n")
+
+    // We may set parameters using setter methods.
+    lr.setMaxIter(10)
+      .setRegParam(0.01)
+
+    // Learn a LogisticRegression model.  This uses the parameters stored in lr.
+    val model1 = lr.fit(training)
+    // Since model1 is a Model (i.e., a Transformer produced by an Estimator),
+    // we can view the parameters it used during fit().
+    // This prints the parameter (name: value) pairs, where names are unique IDs for this
+    // LogisticRegression instance.
+    println("Model 1 was fit using parameters: " + model1.fittingParamMap)
+
+    // We may alternatively specify parameters using a ParamMap,
+    // which supports several methods for specifying parameters.
+    val paramMap = ParamMap(lr.maxIter -> 20)
+    paramMap.put(lr.maxIter, 30) // Specify 1 Param.  This overwrites the original maxIter.
+    paramMap.put(lr.regParam -> 0.1, lr.threshold -> 0.55) // Specify multiple Params.
+
+    // One can also combine ParamMaps.
+    val paramMap2 = ParamMap(lr.scoreCol -> "probability") // Change output column name
+    val paramMapCombined = paramMap ++ paramMap2
+
+    // Now learn a new model using the paramMapCombined parameters.
+    // paramMapCombined overrides all parameters set earlier via lr.set* methods.
+    val model2 = lr.fit(training, paramMapCombined)
+    println("Model 2 was fit using parameters: " + model2.fittingParamMap)
+
+    // Prepare test documents.
+    val test = sc.parallelize(Seq(
+      LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
+      LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)),
+      LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))))
+
+    // Make predictions on test documents using the Transformer.transform() method.
+    // LogisticRegression.transform will only use the 'features' column.
+    // Note that model2.transform() outputs a 'probability' column instead of the usual 'score'
+    // column since we renamed the lr.scoreCol parameter previously.
+    model2.transform(test)
+      .select("features", "label", "probability", "prediction")
+      .collect()
+      .foreach { case Row(features: Vector, label: Double, prob: Double, prediction: Double) =>
+        println("(" + features + ", " + label + ") -> prob=" + prob + ", prediction=" + prediction)
+      }
+
+    sc.stop()
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala
new file mode 100644
index 0000000000000..dbbe01dd5ce8e
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import scala.beans.BeanInfo
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.classification.LogisticRegression
+import org.apache.spark.ml.feature.{HashingTF, Tokenizer}
+import org.apache.spark.sql.{Row, SQLContext}
+
+@BeanInfo
+case class LabeledDocument(id: Long, text: String, label: Double)
+
+@BeanInfo
+case class Document(id: Long, text: String)
+
+/**
+ * A simple text classification pipeline that recognizes "spark" from input text. This is to show
+ * how to create and configure an ML pipeline. Run with
+ * {{{
+ * bin/run-example ml.SimpleTextClassificationPipeline
+ * }}}
+ */
+object SimpleTextClassificationPipeline {
+
+  def main(args: Array[String]) {
+    val conf = new SparkConf().setAppName("SimpleTextClassificationPipeline")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+    import sqlContext.implicits._
+
+    // Prepare training documents, which are labeled.
+    val training = sc.parallelize(Seq(
+      LabeledDocument(0L, "a b c d e spark", 1.0),
+      LabeledDocument(1L, "b d", 0.0),
+      LabeledDocument(2L, "spark f g h", 1.0),
+      LabeledDocument(3L, "hadoop mapreduce", 0.0)))
+
+    // Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
+    val tokenizer = new Tokenizer()
+      .setInputCol("text")
+      .setOutputCol("words")
+    val hashingTF = new HashingTF()
+      .setNumFeatures(1000)
+      .setInputCol(tokenizer.getOutputCol)
+      .setOutputCol("features")
+    val lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(0.01)
+    val pipeline = new Pipeline()
+      .setStages(Array(tokenizer, hashingTF, lr))
+
+    // Fit the pipeline to training documents.
+    val model = pipeline.fit(training)
+
+    // Prepare test documents, which are unlabeled.
+    val test = sc.parallelize(Seq(
+      Document(4L, "spark i j k"),
+      Document(5L, "l m n"),
+      Document(6L, "mapreduce spark"),
+      Document(7L, "apache hadoop")))
+
+    // Make predictions on test documents.
+    model.transform(test)
+      .select("id", "text", "score", "prediction")
+      .collect()
+      .foreach { case Row(id: Long, text: String, score: Double, prediction: Double) =>
+        println("(" + id + ", " + text + ") --> score=" + score + ", prediction=" + prediction)
+      }
+
+    sc.stop()
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
index 1edd2432a0352..a113653810b93 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
@@ -55,7 +55,7 @@ object BinaryClassification {
       stepSize: Double = 1.0,
       algorithm: Algorithm = LR,
       regType: RegType = L2,
-      regParam: Double = 0.1) extends AbstractParams[Params]
+      regParam: Double = 0.01) extends AbstractParams[Params]
 
   def main(args: Array[String]) {
     val defaultParams = Params()
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala
new file mode 100644
index 0000000000000..89b6255991a38
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import java.io.File
+
+import com.google.common.io.Files
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{Row, SQLContext, DataFrame}
+
+/**
+ * An example of how to use [[org.apache.spark.sql.DataFrame]] as a Dataset for ML. Run with
+ * {{{
+ * ./bin/run-example org.apache.spark.examples.mllib.DatasetExample [options]
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object DatasetExample {
+
+  case class Params(
+      input: String = "data/mllib/sample_libsvm_data.txt",
+      dataFormat: String = "libsvm") extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("DatasetExample") {
+      head("Dataset: an example app using DataFrame as a Dataset for ML.")
+      opt[String]("input")
+        .text(s"input path to dataset")
+        .action((x, c) => c.copy(input = x))
+      opt[String]("dataFormat")
+        .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
+        .action((x, c) => c.copy(input = x))
+      checkConfig { params =>
+        success
+      }
+    }
+
+    parser.parse(args, defaultParams).map { params =>
+      run(params)
+    }.getOrElse {
+      sys.exit(1)
+    }
+  }
+
+  def run(params: Params) {
+
+    val conf = new SparkConf().setAppName(s"DatasetExample with $params")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+    import sqlContext.implicits._  // for implicit conversions
+
+    // Load input data
+    val origData: RDD[LabeledPoint] = params.dataFormat match {
+      case "dense" => MLUtils.loadLabeledPoints(sc, params.input)
+      case "libsvm" => MLUtils.loadLibSVMFile(sc, params.input)
+    }
+    println(s"Loaded ${origData.count()} instances from file: ${params.input}")
+
+    // Convert input data to DataFrame explicitly.
+    val df: DataFrame = origData.toDataFrame
+    println(s"Inferred schema:\n${df.schema.prettyJson}")
+    println(s"Converted to DataFrame with ${df.count()} records")
+
+    // Select columns, using implicit conversion to DataFrames.
+    val labelsDf: DataFrame = origData.select("label")
+    val labels: RDD[Double] = labelsDf.map { case Row(v: Double) => v }
+    val numLabels = labels.count()
+    val meanLabel = labels.fold(0.0)(_ + _) / numLabels
+    println(s"Selected label column with average value $meanLabel")
+
+    val featuresDf: DataFrame = origData.select("features")
+    val features: RDD[Vector] = featuresDf.map { case Row(v: Vector) => v }
+    val featureSummary = features.aggregate(new MultivariateOnlineSummarizer())(
+      (summary, feat) => summary.add(feat),
+      (sum1, sum2) => sum1.merge(sum2))
+    println(s"Selected features column with average values:\n ${featureSummary.mean.toString}")
+
+    val tmpDir = Files.createTempDir()
+    tmpDir.deleteOnExit()
+    val outputDir = new File(tmpDir, "dataset").toString
+    println(s"Saving to $outputDir as Parquet file.")
+    df.saveAsParquetFile(outputDir)
+
+    println(s"Loading Parquet file with UDT from $outputDir.")
+    val newDataset = sqlContext.parquetFile(outputDir)
+
+    println(s"Schema from Parquet: ${newDataset.schema.prettyJson}")
+    val newFeatures = newDataset.select("features").map { case Row(v: Vector) => v }
+    val newFeaturesSummary = newFeatures.aggregate(new MultivariateOnlineSummarizer())(
+      (summary, feat) => summary.add(feat),
+      (sum1, sum2) => sum1.merge(sum2))
+    println(s"Selected features column with average values:\n ${newFeaturesSummary.mean.toString}")
+
+    sc.stop()
+  }
+
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
index 0890e6263e165..205d80dd02682 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
@@ -17,16 +17,18 @@
 
 package org.apache.spark.examples.mllib
 
+import scala.language.reflectiveCalls
+
 import scopt.OptionParser
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.SparkContext._
 import org.apache.spark.mllib.evaluation.MulticlassMetrics
+import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.tree.{RandomForest, DecisionTree, impurity}
+import org.apache.spark.mllib.tree.{DecisionTree, RandomForest, impurity}
 import org.apache.spark.mllib.tree.configuration.{Algo, Strategy}
 import org.apache.spark.mllib.tree.configuration.Algo._
-import org.apache.spark.mllib.tree.model.{RandomForestModel, DecisionTreeModel}
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.Utils
@@ -62,7 +64,10 @@ object DecisionTreeRunner {
       minInfoGain: Double = 0.0,
       numTrees: Int = 1,
       featureSubsetStrategy: String = "auto",
-      fracTest: Double = 0.2) extends AbstractParams[Params]
+      fracTest: Double = 0.2,
+      useNodeIdCache: Boolean = false,
+      checkpointDir: Option[String] = None,
+      checkpointInterval: Int = 10) extends AbstractParams[Params]
 
   def main(args: Array[String]) {
     val defaultParams = Params()
@@ -102,6 +107,21 @@ object DecisionTreeRunner {
         .text(s"fraction of data to hold out for testing.  If given option testInput, " +
           s"this option is ignored. default: ${defaultParams.fracTest}")
         .action((x, c) => c.copy(fracTest = x))
+      opt[Boolean]("useNodeIdCache")
+        .text(s"whether to use node Id cache during training, " +
+          s"default: ${defaultParams.useNodeIdCache}")
+        .action((x, c) => c.copy(useNodeIdCache = x))
+      opt[String]("checkpointDir")
+        .text(s"checkpoint directory where intermediate node Id caches will be stored, " +
+         s"default: ${defaultParams.checkpointDir match {
+           case Some(strVal) => strVal
+           case None => "None"
+         }}")
+        .action((x, c) => c.copy(checkpointDir = Some(x)))
+      opt[Int]("checkpointInterval")
+        .text(s"how often to checkpoint the node Id cache, " +
+         s"default: ${defaultParams.checkpointInterval}")
+        .action((x, c) => c.copy(checkpointInterval = x))
       opt[String]("testInput")
         .text(s"input path to test dataset.  If given, option fracTest is ignored." +
           s" default: ${defaultParams.testInput}")
@@ -136,20 +156,30 @@ object DecisionTreeRunner {
     }
   }
 
-  def run(params: Params) {
-
-    val conf = new SparkConf().setAppName(s"DecisionTreeRunner with $params")
-    val sc = new SparkContext(conf)
-
-    println(s"DecisionTreeRunner with parameters:\n$params")
-
+  /**
+   * Load training and test data from files.
+   * @param input  Path to input dataset.
+   * @param dataFormat  "libsvm" or "dense"
+   * @param testInput  Path to test dataset.
+   * @param algo  Classification or Regression
+   * @param fracTest  Fraction of input data to hold out for testing.  Ignored if testInput given.
+   * @return  (training dataset, test dataset, number of classes),
+   *          where the number of classes is inferred from data (and set to 0 for Regression)
+   */
+  private[mllib] def loadDatasets(
+      sc: SparkContext,
+      input: String,
+      dataFormat: String,
+      testInput: String,
+      algo: Algo,
+      fracTest: Double): (RDD[LabeledPoint], RDD[LabeledPoint], Int) = {
     // Load training data and cache it.
-    val origExamples = params.dataFormat match {
-      case "dense" => MLUtils.loadLabeledPoints(sc, params.input).cache()
-      case "libsvm" => MLUtils.loadLibSVMFile(sc, params.input).cache()
+    val origExamples = dataFormat match {
+      case "dense" => MLUtils.loadLabeledPoints(sc, input).cache()
+      case "libsvm" => MLUtils.loadLibSVMFile(sc, input).cache()
     }
     // For classification, re-index classes if needed.
-    val (examples, classIndexMap, numClasses) = params.algo match {
+    val (examples, classIndexMap, numClasses) = algo match {
       case Classification => {
         // classCounts: class --> # examples in class
         val classCounts = origExamples.map(_.label).countByValue()
@@ -187,14 +217,14 @@ object DecisionTreeRunner {
     }
 
     // Create training, test sets.
-    val splits = if (params.testInput != "") {
+    val splits = if (testInput != "") {
       // Load testInput.
       val numFeatures = examples.take(1)(0).features.size
-      val origTestExamples = params.dataFormat match {
-        case "dense" => MLUtils.loadLabeledPoints(sc, params.testInput)
-        case "libsvm" => MLUtils.loadLibSVMFile(sc, params.testInput, numFeatures)
+      val origTestExamples = dataFormat match {
+        case "dense" => MLUtils.loadLabeledPoints(sc, testInput)
+        case "libsvm" => MLUtils.loadLibSVMFile(sc, testInput, numFeatures)
       }
-      params.algo match {
+      algo match {
         case Classification => {
           // classCounts: class --> # examples in class
           val testExamples = {
@@ -211,17 +241,31 @@ object DecisionTreeRunner {
       }
     } else {
       // Split input into training, test.
-      examples.randomSplit(Array(1.0 - params.fracTest, params.fracTest))
+      examples.randomSplit(Array(1.0 - fracTest, fracTest))
     }
     val training = splits(0).cache()
     val test = splits(1).cache()
+
     val numTraining = training.count()
     val numTest = test.count()
-
     println(s"numTraining = $numTraining, numTest = $numTest.")
 
     examples.unpersist(blocking = false)
 
+    (training, test, numClasses)
+  }
+
+  def run(params: Params) {
+
+    val conf = new SparkConf().setAppName(s"DecisionTreeRunner with $params")
+    val sc = new SparkContext(conf)
+
+    println(s"DecisionTreeRunner with parameters:\n$params")
+
+    // Load training and test data and cache it.
+    val (training, test, numClasses) = loadDatasets(sc, params.input, params.dataFormat,
+      params.testInput, params.algo, params.fracTest)
+
     val impurityCalculator = params.impurity match {
       case Gini => impurity.Gini
       case Entropy => impurity.Entropy
@@ -234,9 +278,12 @@ object DecisionTreeRunner {
           impurity = impurityCalculator,
           maxDepth = params.maxDepth,
           maxBins = params.maxBins,
-          numClassesForClassification = numClasses,
+          numClasses = numClasses,
           minInstancesPerNode = params.minInstancesPerNode,
-          minInfoGain = params.minInfoGain)
+          minInfoGain = params.minInfoGain,
+          useNodeIdCache = params.useNodeIdCache,
+          checkpointDir = params.checkpointDir,
+          checkpointInterval = params.checkpointInterval)
     if (params.numTrees == 1) {
       val startTime = System.nanoTime()
       val model = DecisionTree.train(training, strategy)
@@ -307,19 +354,11 @@ object DecisionTreeRunner {
   /**
    * Calculates the mean squared error for regression.
    */
-  private def meanSquaredError(tree: DecisionTreeModel, data: RDD[LabeledPoint]): Double = {
-    data.map { y =>
-      val err = tree.predict(y.features) - y.label
-      err * err
-    }.mean()
-  }
-
-  /**
-   * Calculates the mean squared error for regression.
-   */
-  private def meanSquaredError(tree: RandomForestModel, data: RDD[LabeledPoint]): Double = {
+  private[mllib] def meanSquaredError(
+      model: { def predict(features: Vector): Double },
+      data: RDD[LabeledPoint]): Double = {
     data.map { y =>
-      val err = tree.predict(y.features) - y.label
+      val err = model.predict(y.features) - y.label
       err * err
     }.mean()
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DenseGaussianMixture.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DenseGaussianMixture.scala
new file mode 100644
index 0000000000000..df76b45e50810
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DenseGaussianMixture.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.clustering.GaussianMixture
+import org.apache.spark.mllib.linalg.Vectors
+
+/**
+ * An example Gaussian Mixture Model EM app. Run with
+ * {{{
+ * ./bin/run-example mllib.DenseGaussianMixture <input> <k> <convergenceTol>
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object DenseGaussianMixture {
+  def main(args: Array[String]): Unit = {
+    if (args.length < 3) {
+      println("usage: DenseGmmEM <input file> <k> <convergenceTol> [maxIterations]")
+    } else {
+      val maxIterations = if (args.length > 3) args(3).toInt else 100
+      run(args(0), args(1).toInt, args(2).toDouble, maxIterations)
+    }
+  }
+
+  private def run(inputFile: String, k: Int, convergenceTol: Double, maxIterations: Int) {
+    val conf = new SparkConf().setAppName("Gaussian Mixture Model EM example")
+    val ctx  = new SparkContext(conf)
+    
+    val data = ctx.textFile(inputFile).map { line =>
+      Vectors.dense(line.trim.split(' ').map(_.toDouble))
+    }.cache()
+      
+    val clusters = new GaussianMixture()
+      .setK(k)
+      .setConvergenceTol(convergenceTol)
+      .setMaxIterations(maxIterations)
+      .run(data)
+    
+    for (i <- 0 until clusters.k) {
+      println("weight=%f\nmu=%s\nsigma=\n%s\n" format 
+        (clusters.weights(i), clusters.gaussians(i).mu, clusters.gaussians(i).sigma))
+    }
+    
+    println("Cluster labels (first <= 100):")
+    val clusterLabels = clusters.predict(data)
+    clusterLabels.take(100).foreach { x =>
+      print(" " + x)
+    }
+    println()
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
new file mode 100644
index 0000000000000..431ead8c0c165
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.evaluation.MulticlassMetrics
+import org.apache.spark.mllib.tree.GradientBoostedTrees
+import org.apache.spark.mllib.tree.configuration.{BoostingStrategy, Algo}
+import org.apache.spark.util.Utils
+
+/**
+ * An example runner for Gradient Boosting using decision trees as weak learners. Run with
+ * {{{
+ * ./bin/run-example mllib.GradientBoostedTreesRunner [options]
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ *
+ * Note: This script treats all features as real-valued (not categorical).
+ *       To include categorical features, modify categoricalFeaturesInfo.
+ */
+object GradientBoostedTreesRunner {
+
+  case class Params(
+      input: String = null,
+      testInput: String = "",
+      dataFormat: String = "libsvm",
+      algo: String = "Classification",
+      maxDepth: Int = 5,
+      numIterations: Int = 10,
+      fracTest: Double = 0.2) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("GradientBoostedTrees") {
+      head("GradientBoostedTrees: an example decision tree app.")
+      opt[String]("algo")
+        .text(s"algorithm (${Algo.values.mkString(",")}), default: ${defaultParams.algo}")
+        .action((x, c) => c.copy(algo = x))
+      opt[Int]("maxDepth")
+        .text(s"max depth of the tree, default: ${defaultParams.maxDepth}")
+        .action((x, c) => c.copy(maxDepth = x))
+      opt[Int]("numIterations")
+        .text(s"number of iterations of boosting," + s" default: ${defaultParams.numIterations}")
+        .action((x, c) => c.copy(numIterations = x))
+      opt[Double]("fracTest")
+        .text(s"fraction of data to hold out for testing.  If given option testInput, " +
+          s"this option is ignored. default: ${defaultParams.fracTest}")
+        .action((x, c) => c.copy(fracTest = x))
+      opt[String]("testInput")
+        .text(s"input path to test dataset.  If given, option fracTest is ignored." +
+          s" default: ${defaultParams.testInput}")
+        .action((x, c) => c.copy(testInput = x))
+      opt[String]("<dataFormat>")
+        .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
+        .action((x, c) => c.copy(dataFormat = x))
+      arg[String]("<input>")
+        .text("input path to labeled examples")
+        .required()
+        .action((x, c) => c.copy(input = x))
+      checkConfig { params =>
+        if (params.fracTest < 0 || params.fracTest > 1) {
+          failure(s"fracTest ${params.fracTest} value incorrect; should be in [0,1].")
+        } else {
+          success
+        }
+      }
+    }
+
+    parser.parse(args, defaultParams).map { params =>
+      run(params)
+    }.getOrElse {
+      sys.exit(1)
+    }
+  }
+
+  def run(params: Params) {
+
+    val conf = new SparkConf().setAppName(s"GradientBoostedTreesRunner with $params")
+    val sc = new SparkContext(conf)
+
+    println(s"GradientBoostedTreesRunner with parameters:\n$params")
+
+    // Load training and test data and cache it.
+    val (training, test, numClasses) = DecisionTreeRunner.loadDatasets(sc, params.input,
+      params.dataFormat, params.testInput, Algo.withName(params.algo), params.fracTest)
+
+    val boostingStrategy = BoostingStrategy.defaultParams(params.algo)
+    boostingStrategy.treeStrategy.numClasses = numClasses
+    boostingStrategy.numIterations = params.numIterations
+    boostingStrategy.treeStrategy.maxDepth = params.maxDepth
+
+    val randomSeed = Utils.random.nextInt()
+    if (params.algo == "Classification") {
+      val startTime = System.nanoTime()
+      val model = GradientBoostedTrees.train(training, boostingStrategy)
+      val elapsedTime = (System.nanoTime() - startTime) / 1e9
+      println(s"Training time: $elapsedTime seconds")
+      if (model.totalNumNodes < 30) {
+        println(model.toDebugString) // Print full model.
+      } else {
+        println(model) // Print model summary.
+      }
+      val trainAccuracy =
+        new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label)))
+          .precision
+      println(s"Train accuracy = $trainAccuracy")
+      val testAccuracy =
+        new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).precision
+      println(s"Test accuracy = $testAccuracy")
+    } else if (params.algo == "Regression") {
+      val startTime = System.nanoTime()
+      val model = GradientBoostedTrees.train(training, boostingStrategy)
+      val elapsedTime = (System.nanoTime() - startTime) / 1e9
+      println(s"Training time: $elapsedTime seconds")
+      if (model.totalNumNodes < 30) {
+        println(model.toDebugString) // Print full model.
+      } else {
+        println(model) // Print model summary.
+      }
+      val trainMSE = DecisionTreeRunner.meanSquaredError(model, training)
+      println(s"Train mean squared error = $trainMSE")
+      val testMSE = DecisionTreeRunner.meanSquaredError(model, test)
+      println(s"Test mean squared error = $testMSE")
+    }
+
+    sc.stop()
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
new file mode 100644
index 0000000000000..f4c545ad70e96
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
@@ -0,0 +1,283 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import java.text.BreakIterator
+
+import scala.collection.mutable
+
+import scopt.OptionParser
+
+import org.apache.log4j.{Level, Logger}
+
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.mllib.clustering.LDA
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.rdd.RDD
+
+
+/**
+ * An example Latent Dirichlet Allocation (LDA) app. Run with
+ * {{{
+ * ./bin/run-example mllib.LDAExample [options] <input>
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object LDAExample {
+
+  private case class Params(
+      input: Seq[String] = Seq.empty,
+      k: Int = 20,
+      maxIterations: Int = 10,
+      docConcentration: Double = -1,
+      topicConcentration: Double = -1,
+      vocabSize: Int = 10000,
+      stopwordFile: String = "",
+      checkpointDir: Option[String] = None,
+      checkpointInterval: Int = 10) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("LDAExample") {
+      head("LDAExample: an example LDA app for plain text data.")
+      opt[Int]("k")
+        .text(s"number of topics. default: ${defaultParams.k}")
+        .action((x, c) => c.copy(k = x))
+      opt[Int]("maxIterations")
+        .text(s"number of iterations of learning. default: ${defaultParams.maxIterations}")
+        .action((x, c) => c.copy(maxIterations = x))
+      opt[Double]("docConcentration")
+        .text(s"amount of topic smoothing to use (> 1.0) (-1=auto)." +
+        s"  default: ${defaultParams.docConcentration}")
+        .action((x, c) => c.copy(docConcentration = x))
+      opt[Double]("topicConcentration")
+        .text(s"amount of term (word) smoothing to use (> 1.0) (-1=auto)." +
+        s"  default: ${defaultParams.topicConcentration}")
+        .action((x, c) => c.copy(topicConcentration = x))
+      opt[Int]("vocabSize")
+        .text(s"number of distinct word types to use, chosen by frequency. (-1=all)" +
+          s"  default: ${defaultParams.vocabSize}")
+        .action((x, c) => c.copy(vocabSize = x))
+      opt[String]("stopwordFile")
+        .text(s"filepath for a list of stopwords. Note: This must fit on a single machine." +
+        s"  default: ${defaultParams.stopwordFile}")
+        .action((x, c) => c.copy(stopwordFile = x))
+      opt[String]("checkpointDir")
+        .text(s"Directory for checkpointing intermediate results." +
+        s"  Checkpointing helps with recovery and eliminates temporary shuffle files on disk." +
+        s"  default: ${defaultParams.checkpointDir}")
+        .action((x, c) => c.copy(checkpointDir = Some(x)))
+      opt[Int]("checkpointInterval")
+        .text(s"Iterations between each checkpoint.  Only used if checkpointDir is set." +
+        s" default: ${defaultParams.checkpointInterval}")
+        .action((x, c) => c.copy(checkpointInterval = x))
+      arg[String]("<input>...")
+        .text("input paths (directories) to plain text corpora." +
+        "  Each text file line should hold 1 document.")
+        .unbounded()
+        .required()
+        .action((x, c) => c.copy(input = c.input :+ x))
+    }
+
+    parser.parse(args, defaultParams).map { params =>
+      run(params)
+    }.getOrElse {
+      parser.showUsageAsError
+      sys.exit(1)
+    }
+  }
+
+  private def run(params: Params) {
+    val conf = new SparkConf().setAppName(s"LDAExample with $params")
+    val sc = new SparkContext(conf)
+
+    Logger.getRootLogger.setLevel(Level.WARN)
+
+    // Load documents, and prepare them for LDA.
+    val preprocessStart = System.nanoTime()
+    val (corpus, vocabArray, actualNumTokens) =
+      preprocess(sc, params.input, params.vocabSize, params.stopwordFile)
+    corpus.cache()
+    val actualCorpusSize = corpus.count()
+    val actualVocabSize = vocabArray.size
+    val preprocessElapsed = (System.nanoTime() - preprocessStart) / 1e9
+
+    println()
+    println(s"Corpus summary:")
+    println(s"\t Training set size: $actualCorpusSize documents")
+    println(s"\t Vocabulary size: $actualVocabSize terms")
+    println(s"\t Training set size: $actualNumTokens tokens")
+    println(s"\t Preprocessing time: $preprocessElapsed sec")
+    println()
+
+    // Run LDA.
+    val lda = new LDA()
+    lda.setK(params.k)
+      .setMaxIterations(params.maxIterations)
+      .setDocConcentration(params.docConcentration)
+      .setTopicConcentration(params.topicConcentration)
+      .setCheckpointInterval(params.checkpointInterval)
+    if (params.checkpointDir.nonEmpty) {
+      lda.setCheckpointDir(params.checkpointDir.get)
+    }
+    val startTime = System.nanoTime()
+    val ldaModel = lda.run(corpus)
+    val elapsed = (System.nanoTime() - startTime) / 1e9
+
+    println(s"Finished training LDA model.  Summary:")
+    println(s"\t Training time: $elapsed sec")
+    val avgLogLikelihood = ldaModel.logLikelihood / actualCorpusSize.toDouble
+    println(s"\t Training data average log likelihood: $avgLogLikelihood")
+    println()
+
+    // Print the topics, showing the top-weighted terms for each topic.
+    val topicIndices = ldaModel.describeTopics(maxTermsPerTopic = 10)
+    val topics = topicIndices.map { case (terms, termWeights) =>
+      terms.zip(termWeights).map { case (term, weight) => (vocabArray(term.toInt), weight) }
+    }
+    println(s"${params.k} topics:")
+    topics.zipWithIndex.foreach { case (topic, i) =>
+      println(s"TOPIC $i")
+      topic.foreach { case (term, weight) =>
+        println(s"$term\t$weight")
+      }
+      println()
+    }
+
+  }
+
+  /**
+   * Load documents, tokenize them, create vocabulary, and prepare documents as term count vectors.
+   * @return (corpus, vocabulary as array, total token count in corpus)
+   */
+  private def preprocess(
+      sc: SparkContext,
+      paths: Seq[String],
+      vocabSize: Int,
+      stopwordFile: String): (RDD[(Long, Vector)], Array[String], Long) = {
+
+    // Get dataset of document texts
+    // One document per line in each text file.
+    val textRDD: RDD[String] = sc.textFile(paths.mkString(","))
+
+    // Split text into words
+    val tokenizer = new SimpleTokenizer(sc, stopwordFile)
+    val tokenized: RDD[(Long, IndexedSeq[String])] = textRDD.zipWithIndex().map { case (text, id) =>
+      id -> tokenizer.getWords(text)
+    }
+    tokenized.cache()
+
+    // Counts words: RDD[(word, wordCount)]
+    val wordCounts: RDD[(String, Long)] = tokenized
+      .flatMap { case (_, tokens) => tokens.map(_ -> 1L) }
+      .reduceByKey(_ + _)
+    wordCounts.cache()
+    val fullVocabSize = wordCounts.count()
+    // Select vocab
+    //  (vocab: Map[word -> id], total tokens after selecting vocab)
+    val (vocab: Map[String, Int], selectedTokenCount: Long) = {
+      val tmpSortedWC: Array[(String, Long)] = if (vocabSize == -1 || fullVocabSize <= vocabSize) {
+        // Use all terms
+        wordCounts.collect().sortBy(-_._2)
+      } else {
+        // Sort terms to select vocab
+        wordCounts.sortBy(_._2, ascending = false).take(vocabSize)
+      }
+      (tmpSortedWC.map(_._1).zipWithIndex.toMap, tmpSortedWC.map(_._2).sum)
+    }
+
+    val documents = tokenized.map { case (id, tokens) =>
+      // Filter tokens by vocabulary, and create word count vector representation of document.
+      val wc = new mutable.HashMap[Int, Int]()
+      tokens.foreach { term =>
+        if (vocab.contains(term)) {
+          val termIndex = vocab(term)
+          wc(termIndex) = wc.getOrElse(termIndex, 0) + 1
+        }
+      }
+      val indices = wc.keys.toArray.sorted
+      val values = indices.map(i => wc(i).toDouble)
+
+      val sb = Vectors.sparse(vocab.size, indices, values)
+      (id, sb)
+    }
+
+    val vocabArray = new Array[String](vocab.size)
+    vocab.foreach { case (term, i) => vocabArray(i) = term }
+
+    (documents, vocabArray, selectedTokenCount)
+  }
+}
+
+/**
+ * Simple Tokenizer.
+ *
+ * TODO: Formalize the interface, and make this a public class in mllib.feature
+ */
+private class SimpleTokenizer(sc: SparkContext, stopwordFile: String) extends Serializable {
+
+  private val stopwords: Set[String] = if (stopwordFile.isEmpty) {
+    Set.empty[String]
+  } else {
+    val stopwordText = sc.textFile(stopwordFile).collect()
+    stopwordText.flatMap(_.stripMargin.split("\\s+")).toSet
+  }
+
+  // Matches sequences of Unicode letters
+  private val allWordRegex = "^(\\p{L}*)$".r
+
+  // Ignore words shorter than this length.
+  private val minWordLength = 3
+
+  def getWords(text: String): IndexedSeq[String] = {
+
+    val words = new mutable.ArrayBuffer[String]()
+
+    // Use Java BreakIterator to tokenize text into words.
+    val wb = BreakIterator.getWordInstance
+    wb.setText(text)
+
+    // current,end index start,end of each word
+    var current = wb.first()
+    var end = wb.next()
+    while (end != BreakIterator.DONE) {
+      // Convert to lowercase
+      val word: String = text.substring(current, end).toLowerCase
+      // Remove short words and strings that aren't only letters
+      word match {
+        case allWordRegex(w) if w.length >= minWordLength && !stopwords.contains(w) =>
+          words += w
+        case _ =>
+      }
+
+      current = end
+      try {
+        end = wb.next()
+      } catch {
+        case e: Exception =>
+          // Ignore remaining text in line.
+          // This is a known bug in BreakIterator (for some Java versions),
+          // which fails when it sees certain characters.
+          end = BreakIterator.DONE
+      }
+    }
+    words
+  }
+
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
index e1f9622350135..6a456ba7ec07b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
@@ -33,7 +33,7 @@ import org.apache.spark.mllib.optimization.{SimpleUpdater, SquaredL2Updater, L1U
  * A synthetic dataset can be found at `data/mllib/sample_linear_regression_data.txt`.
  * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
  */
-object LinearRegression extends App {
+object LinearRegression {
 
   object RegType extends Enumeration {
     type RegType = Value
@@ -47,42 +47,44 @@ object LinearRegression extends App {
       numIterations: Int = 100,
       stepSize: Double = 1.0,
       regType: RegType = L2,
-      regParam: Double = 0.1) extends AbstractParams[Params]
-
-  val defaultParams = Params()
-
-  val parser = new OptionParser[Params]("LinearRegression") {
-    head("LinearRegression: an example app for linear regression.")
-    opt[Int]("numIterations")
-      .text("number of iterations")
-      .action((x, c) => c.copy(numIterations = x))
-    opt[Double]("stepSize")
-      .text(s"initial step size, default: ${defaultParams.stepSize}")
-      .action((x, c) => c.copy(stepSize = x))
-    opt[String]("regType")
-      .text(s"regularization type (${RegType.values.mkString(",")}), " +
-      s"default: ${defaultParams.regType}")
-      .action((x, c) => c.copy(regType = RegType.withName(x)))
-    opt[Double]("regParam")
-      .text(s"regularization parameter, default: ${defaultParams.regParam}")
-    arg[String]("<input>")
-      .required()
-      .text("input paths to labeled examples in LIBSVM format")
-      .action((x, c) => c.copy(input = x))
-    note(
-      """
-        |For example, the following command runs this app on a synthetic dataset:
-        |
-        | bin/spark-submit --class org.apache.spark.examples.mllib.LinearRegression \
-        |  examples/target/scala-*/spark-examples-*.jar \
-        |  data/mllib/sample_linear_regression_data.txt
-      """.stripMargin)
-  }
+      regParam: Double = 0.01) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("LinearRegression") {
+      head("LinearRegression: an example app for linear regression.")
+      opt[Int]("numIterations")
+        .text("number of iterations")
+        .action((x, c) => c.copy(numIterations = x))
+      opt[Double]("stepSize")
+        .text(s"initial step size, default: ${defaultParams.stepSize}")
+        .action((x, c) => c.copy(stepSize = x))
+      opt[String]("regType")
+        .text(s"regularization type (${RegType.values.mkString(",")}), " +
+        s"default: ${defaultParams.regType}")
+        .action((x, c) => c.copy(regType = RegType.withName(x)))
+      opt[Double]("regParam")
+        .text(s"regularization parameter, default: ${defaultParams.regParam}")
+      arg[String]("<input>")
+        .required()
+        .text("input paths to labeled examples in LIBSVM format")
+        .action((x, c) => c.copy(input = x))
+      note(
+        """
+          |For example, the following command runs this app on a synthetic dataset:
+          |
+          | bin/spark-submit --class org.apache.spark.examples.mllib.LinearRegression \
+          |  examples/target/scala-*/spark-examples-*.jar \
+          |  data/mllib/sample_linear_regression_data.txt
+        """.stripMargin)
+    }
 
-  parser.parse(args, defaultParams).map { params =>
-    run(params)
-  } getOrElse {
-    sys.exit(1)
+    parser.parse(args, defaultParams).map { params =>
+      run(params)
+    } getOrElse {
+      sys.exit(1)
+    }
   }
 
   def run(params: Params) {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
index 8796c28db8a66..91a0a860d6c71 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
@@ -106,9 +106,11 @@ object MovieLensALS {
 
     Logger.getRootLogger.setLevel(Level.WARN)
 
+    val implicitPrefs = params.implicitPrefs
+
     val ratings = sc.textFile(params.input).map { line =>
       val fields = line.split("::")
-      if (params.implicitPrefs) {
+      if (implicitPrefs) {
         /*
          * MovieLens ratings are on a scale of 1-5:
          * 5: Must see
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala
new file mode 100644
index 0000000000000..8bb12d2ee9ed2
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+import org.apache.spark.mllib.clustering.StreamingKMeans
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+
+/**
+ * Estimate clusters on one stream of data and make predictions
+ * on another stream, where the data streams arrive as text files
+ * into two different directories.
+ *
+ * The rows of the training text files must be vector data in the form
+ * `[x1,x2,x3,...,xn]`
+ * Where n is the number of dimensions.
+ *
+ * The rows of the test text files must be labeled data in the form
+ * `(y,[x1,x2,x3,...,xn])`
+ * Where y is some identifier. n must be the same for train and test.
+ *
+ * Usage:
+ *   StreamingKMeansExample <trainingDir> <testDir> <batchDuration> <numClusters> <numDimensions>
+ *
+ * To run on your local machine using the two directories `trainingDir` and `testDir`,
+ * with updates every 5 seconds, 2 dimensions per data point, and 3 clusters, call:
+ *    $ bin/run-example mllib.StreamingKMeansExample trainingDir testDir 5 3 2
+ *
+ * As you add text files to `trainingDir` the clusters will continuously update.
+ * Anytime you add text files to `testDir`, you'll see predicted labels using the current model.
+ *
+ */
+object StreamingKMeansExample {
+
+  def main(args: Array[String]) {
+    if (args.length != 5) {
+      System.err.println(
+        "Usage: StreamingKMeansExample " +
+          "<trainingDir> <testDir> <batchDuration> <numClusters> <numDimensions>")
+      System.exit(1)
+    }
+
+    val conf = new SparkConf().setMaster("local").setAppName("StreamingKMeansExample")
+    val ssc = new StreamingContext(conf, Seconds(args(2).toLong))
+
+    val trainingData = ssc.textFileStream(args(0)).map(Vectors.parse)
+    val testData = ssc.textFileStream(args(1)).map(LabeledPoint.parse)
+
+    val model = new StreamingKMeans()
+      .setK(args(3).toInt)
+      .setDecayFactor(1.0)
+      .setRandomCenters(args(4).toInt, 0.0)
+
+    model.trainOn(trainingData)
+    model.predictOnValues(testData.map(lp => (lp.label, lp.features))).print()
+
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala
index c5bd5b0b178d9..1a95048bbfe2d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala
@@ -35,8 +35,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext}
  *
  * To run on your local machine using the two directories `trainingDir` and `testDir`,
  * with updates every 5 seconds, and 2 features per data point, call:
- *    $ bin/run-example \
- *        org.apache.spark.examples.mllib.StreamingLinearRegression trainingDir testDir 5 2
+ *    $ bin/run-example mllib.StreamingLinearRegression trainingDir testDir 5 2
  *
  * As you add text files to `trainingDir` the model will continuously update.
  * Anytime you add text files to `testDir`, you'll see predictions from the current model.
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLogisticRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLogisticRegression.scala
new file mode 100644
index 0000000000000..e1998099c2d78
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLogisticRegression.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.classification.StreamingLogisticRegressionWithSGD
+import org.apache.spark.SparkConf
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+
+/**
+ * Train a logistic regression model on one stream of data and make predictions
+ * on another stream, where the data streams arrive as text files
+ * into two different directories.
+ *
+ * The rows of the text files must be labeled data points in the form
+ * `(y,[x1,x2,x3,...,xn])`
+ * Where n is the number of features, y is a binary label, and
+ * n must be the same for train and test.
+ *
+ * Usage: StreamingLogisticRegression <trainingDir> <testDir> <batchDuration> <numFeatures>
+ *
+ * To run on your local machine using the two directories `trainingDir` and `testDir`,
+ * with updates every 5 seconds, and 2 features per data point, call:
+ *    $ bin/run-example mllib.StreamingLogisticRegression trainingDir testDir 5 2
+ *
+ * As you add text files to `trainingDir` the model will continuously update.
+ * Anytime you add text files to `testDir`, you'll see predictions from the current model.
+ *
+ */
+object StreamingLogisticRegression {
+
+  def main(args: Array[String]) {
+
+    if (args.length != 4) {
+      System.err.println(
+        "Usage: StreamingLogisticRegression <trainingDir> <testDir> <batchDuration> <numFeatures>")
+      System.exit(1)
+    }
+
+    val conf = new SparkConf().setMaster("local").setAppName("StreamingLogisticRegression")
+    val ssc = new StreamingContext(conf, Seconds(args(2).toLong))
+
+    val trainingData = ssc.textFileStream(args(0)).map(LabeledPoint.parse)
+    val testData = ssc.textFileStream(args(1)).map(LabeledPoint.parse)
+
+    val model = new StreamingLogisticRegressionWithSGD()
+      .setInitialWeights(Vectors.zeros(args(3).toInt))
+
+    model.trainOn(trainingData)
+    model.predictOnValues(testData.map(lp => (lp.label, lp.features))).print()
+
+    ssc.start()
+    ssc.awaitTermination()
+
+  }
+
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
index 2e98b2dc30b80..1eac3c8d03e39 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
@@ -19,6 +19,7 @@ package org.apache.spark.examples.sql
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.Dsl._
 
 // One method for defining the schema of an RDD is to make a case class with the desired column
 // names and types.
@@ -31,7 +32,7 @@ object RDDRelation {
     val sqlContext = new SQLContext(sc)
 
     // Importing the SQL context gives access to all the SQL functions and implicit conversions.
-    import sqlContext._
+    import sqlContext.implicits._
 
     val rdd = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i")))
     // Any RDD containing case classes can be registered as a table.  The schema of the table is
@@ -40,21 +41,21 @@ object RDDRelation {
 
     // Once tables have been registered, you can run SQL queries over them.
     println("Result of SELECT *:")
-    sql("SELECT * FROM records").collect().foreach(println)
+    sqlContext.sql("SELECT * FROM records").collect().foreach(println)
 
     // Aggregation queries are also supported.
-    val count = sql("SELECT COUNT(*) FROM records").collect().head.getLong(0)
+    val count = sqlContext.sql("SELECT COUNT(*) FROM records").collect().head.getLong(0)
     println(s"COUNT(*): $count")
 
     // The results of SQL queries are themselves RDDs and support all normal RDD functions.  The
     // items in the RDD are of type Row, which allows you to access each column by ordinal.
-    val rddFromSql = sql("SELECT key, value FROM records WHERE key < 10")
+    val rddFromSql = sqlContext.sql("SELECT key, value FROM records WHERE key < 10")
 
     println("Result of RDD.map:")
     rddFromSql.map(row => s"Key: ${row(0)}, Value: ${row(1)}").collect().foreach(println)
 
     // Queries can also be written using a LINQ-like Scala DSL.
-    rdd.where('key === 1).orderBy('value.asc).select('key).collect().foreach(println)
+    rdd.where($"key" === 1).orderBy($"value".asc).select($"key").collect().foreach(println)
 
     // Write out an RDD as a parquet file.
     rdd.saveAsParquetFile("pair.parquet")
@@ -63,11 +64,11 @@ object RDDRelation {
     val parquetFile = sqlContext.parquetFile("pair.parquet")
 
     // Queries can be run using the DSL on parequet files just like the original RDD.
-    parquetFile.where('key === 1).select('value as 'a).collect().foreach(println)
+    parquetFile.where($"key" === 1).select($"value".as("a")).collect().foreach(println)
 
     // These files can also be registered as tables.
     parquetFile.registerTempTable("parquetFile")
-    sql("SELECT * FROM parquetFile").collect().foreach(println)
+    sqlContext.sql("SELECT * FROM parquetFile").collect().foreach(println)
 
     sc.stop()
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
index 0c52ef8ed96ac..15754cdfcc35e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.examples.sql.hive
 
+import com.google.common.io.{ByteStreams, Files}
+
+import java.io.File
+
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.sql._
 import org.apache.spark.sql.hive.HiveContext
@@ -24,18 +28,26 @@ import org.apache.spark.sql.hive.HiveContext
 object HiveFromSpark {
   case class Record(key: Int, value: String)
 
+  // Copy kv1.txt file from classpath to temporary directory
+  val kv1Stream = HiveFromSpark.getClass.getResourceAsStream("/kv1.txt")
+  val kv1File = File.createTempFile("kv1", "txt")
+  kv1File.deleteOnExit()
+  ByteStreams.copy(kv1Stream, Files.newOutputStreamSupplier(kv1File))
+
   def main(args: Array[String]) {
     val sparkConf = new SparkConf().setAppName("HiveFromSpark")
     val sc = new SparkContext(sparkConf)
 
-    // A local hive context creates an instance of the Hive Metastore in process, storing 
-    // the warehouse data in the current directory.  This location can be overridden by
-    // specifying a second parameter to the constructor.
+    // A hive context adds support for finding tables in the MetaStore and writing queries
+    // using HiveQL. Users who do not have an existing Hive deployment can still create a
+    // HiveContext. When not configured by the hive-site.xml, the context automatically
+    // creates metastore_db and warehouse in the current directory.
     val hiveContext = new HiveContext(sc)
-    import hiveContext._
+    import hiveContext.implicits._
+    import hiveContext.sql
 
     sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
-    sql("LOAD DATA LOCAL INPATH 'src/main/resources/kv1.txt' INTO TABLE src")
+    sql(s"LOAD DATA LOCAL INPATH '${kv1File.getAbsolutePath}' INTO TABLE src")
 
     // Queries are expressed in HiveQL
     println("Result of 'SELECT *': ")
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
index 6bb659fbd8be8..30269a7ccae97 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
@@ -23,7 +23,6 @@ import java.net.Socket
 import org.apache.spark.{SparkConf, Logging}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.{Seconds, StreamingContext}
-import org.apache.spark.streaming.StreamingContext._
 import org.apache.spark.streaming.receiver.Receiver
 
 /**
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/HdfsWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/HdfsWordCount.scala
index 6c24bc3ad09e0..4b4667fec44e6 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/HdfsWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/HdfsWordCount.scala
@@ -19,7 +19,6 @@ package org.apache.spark.examples.streaming
 
 import org.apache.spark.SparkConf
 import org.apache.spark.streaming.{Seconds, StreamingContext}
-import org.apache.spark.streaming.StreamingContext._
 
 /**
  * Counts words in new text files created in the given directory
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/MQTTWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/MQTTWordCount.scala
index e4283e04a1b11..6ff0c47793a25 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/MQTTWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/MQTTWordCount.scala
@@ -22,7 +22,6 @@ import org.eclipse.paho.client.mqttv3.persist.MqttDefaultFilePersistence
 
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.{Seconds, StreamingContext}
-import org.apache.spark.streaming.StreamingContext._
 import org.apache.spark.streaming.mqtt._
 import org.apache.spark.SparkConf
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala
index ae0a08c6cdb1a..2cd8073dada14 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala
@@ -19,7 +19,6 @@ package org.apache.spark.examples.streaming
 
 import org.apache.spark.SparkConf
 import org.apache.spark.streaming.{Seconds, StreamingContext}
-import org.apache.spark.streaming.StreamingContext._
 import org.apache.spark.storage.StorageLevel
 
 /**
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala
index 4caa90659111a..13ba9a43ec3c9 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala
@@ -22,7 +22,6 @@ import scala.collection.mutable.SynchronizedQueue
 import org.apache.spark.SparkConf
 import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming.{Seconds, StreamingContext}
-import org.apache.spark.streaming.StreamingContext._
 
 object QueueStream {
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
index 6af3a0f33efc2..c3a05c89d817e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
@@ -25,21 +25,18 @@ import com.google.common.io.Files
 import org.apache.spark.SparkConf
 import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming.{Time, Seconds, StreamingContext}
-import org.apache.spark.streaming.StreamingContext._
 import org.apache.spark.util.IntParam
 
 /**
  * Counts words in text encoded with UTF8 received from the network every second.
  *
- * Usage: NetworkWordCount <hostname> <port> <checkpoint-directory> <output-file>
+ * Usage: RecoverableNetworkWordCount <hostname> <port> <checkpoint-directory> <output-file>
  *   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive
  *   data. <checkpoint-directory> directory to HDFS-compatible file system which checkpoint data
  *   <output-file> file to which the word counts will be appended
  *
- * In local mode, <master> should be 'local[n]' with n > 1
  * <checkpoint-directory> and <output-file> must be absolute paths
  *
- *
  * To run this on your local machine, you need to first run a Netcat server
  *
  *      `$ nc -lk 9999`
@@ -54,22 +51,11 @@ import org.apache.spark.util.IntParam
  * checkpoint data exists in ~/checkpoint/, then it will create StreamingContext from
  * the checkpoint data.
  *
- * To run this example in a local standalone cluster with automatic driver recovery,
- *
- *      `$ bin/spark-class org.apache.spark.deploy.Client -s launch <cluster-url> \
- *              <path-to-examples-jar> \
- *              org.apache.spark.examples.streaming.RecoverableNetworkWordCount <cluster-url> \
- *              localhost 9999 ~/checkpoint ~/out`
- *
- * <path-to-examples-jar> would typically be
- * <spark-dir>/examples/target/scala-XX/spark-examples....jar
- *
  * Refer to the online documentation for more details.
  */
-
 object RecoverableNetworkWordCount {
 
-  def createContext(ip: String, port: Int, outputPath: String) = {
+  def createContext(ip: String, port: Int, outputPath: String, checkpointDirectory: String) = {
 
     // If you do not see this printed, that means the StreamingContext has been loaded
     // from the new checkpoint
@@ -79,6 +65,7 @@ object RecoverableNetworkWordCount {
     val sparkConf = new SparkConf().setAppName("RecoverableNetworkWordCount")
     // Create the context with a 1 second batch size
     val ssc = new StreamingContext(sparkConf, Seconds(1))
+    ssc.checkpoint(checkpointDirectory)
 
     // Create a socket stream on target ip:port and count the
     // words in input stream of \n delimited text (eg. generated by 'nc')
@@ -114,7 +101,7 @@ object RecoverableNetworkWordCount {
     val Array(ip, IntParam(port), checkpointDirectory, outputPath) = args
     val ssc = StreamingContext.getOrCreate(checkpointDirectory,
       () => {
-        createContext(ip, port, outputPath)
+        createContext(ip, port, outputPath, checkpointDirectory)
       })
     ssc.start()
     ssc.awaitTermination()
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
index a4d159bf38377..345d0bc441351 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
@@ -18,12 +18,12 @@
 package org.apache.spark.examples.streaming
 
 import org.apache.spark.SparkConf
+import org.apache.spark.HashPartitioner
 import org.apache.spark.streaming._
-import org.apache.spark.streaming.StreamingContext._
 
 /**
  * Counts words cumulatively in UTF8 encoded, '\n' delimited text received from the network every
- * second.
+ * second starting with initial value of word count.
  * Usage: StatefulNetworkWordCount <hostname> <port>
  *   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive
  *   data.
@@ -51,12 +51,19 @@ object StatefulNetworkWordCount {
       Some(currentCount + previousCount)
     }
 
+    val newUpdateFunc = (iterator: Iterator[(String, Seq[Int], Option[Int])]) => {
+      iterator.flatMap(t => updateFunc(t._2, t._3).map(s => (t._1, s)))
+    }
+
     val sparkConf = new SparkConf().setAppName("StatefulNetworkWordCount")
     // Create the context with a 1 second batch size
     val ssc = new StreamingContext(sparkConf, Seconds(1))
     ssc.checkpoint(".")
 
-    // Create a NetworkInputDStream on target ip:port and count the
+    // Initial RDD input to updateStateByKey
+    val initialRDD = ssc.sparkContext.parallelize(List(("hello", 1), ("world", 1)))
+
+    // Create a ReceiverInputDStream on target ip:port and count the
     // words in input stream of \n delimited test (eg. generated by 'nc')
     val lines = ssc.socketTextStream(args(0), args(1).toInt)
     val words = lines.flatMap(_.split(" "))
@@ -64,7 +71,8 @@ object StatefulNetworkWordCount {
 
     // Update the cumulative count using updateStateByKey
     // This will give a Dstream made of state (which is the cumulative count of the words)
-    val stateDstream = wordDstream.updateStateByKey[Int](updateFunc)
+    val stateDstream = wordDstream.updateStateByKey[Int](newUpdateFunc,
+      new HashPartitioner (ssc.sparkContext.defaultParallelism), true, initialRDD)
     stateDstream.print()
     ssc.start()
     ssc.awaitTermination()
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterAlgebirdCMS.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterAlgebirdCMS.scala
index 683752ac96241..62f49530edb12 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterAlgebirdCMS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterAlgebirdCMS.scala
@@ -23,7 +23,6 @@ import org.apache.spark.SparkConf
 import org.apache.spark.SparkContext._
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.{Seconds, StreamingContext}
-import org.apache.spark.streaming.StreamingContext._
 import org.apache.spark.streaming.twitter._
 
 // scalastyle:off
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterPopularTags.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterPopularTags.scala
index f55d23ab3924b..f253d75b279f7 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterPopularTags.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterPopularTags.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.examples.streaming
 
 import org.apache.spark.streaming.{Seconds, StreamingContext}
-import StreamingContext._
 import org.apache.spark.SparkContext._
 import org.apache.spark.streaming.twitter._
 import org.apache.spark.SparkConf
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/ZeroMQWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/ZeroMQWordCount.scala
index 79905af381a12..6510c70bd1866 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/ZeroMQWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/ZeroMQWordCount.scala
@@ -24,7 +24,6 @@ import akka.zeromq.Subscribe
 import akka.util.ByteString
 
 import org.apache.spark.streaming.{Seconds, StreamingContext}
-import org.apache.spark.streaming.StreamingContext._
 import org.apache.spark.streaming.zeromq._
 
 import scala.language.implicitConversions
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
index d9b886eff77cc..fbacaee98690f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
@@ -19,7 +19,6 @@ package org.apache.spark.examples.streaming.clickstream
 
 import org.apache.spark.SparkContext._
 import org.apache.spark.streaming.{Seconds, StreamingContext}
-import org.apache.spark.streaming.StreamingContext._
 import org.apache.spark.examples.streaming.StreamingExamples
 // scalastyle:off
 /** Analyses a streaming dataset of web page views. This class demonstrates several types of
@@ -50,7 +49,7 @@ object PageViewStream {
     val ssc = new StreamingContext("local[2]", "PageViewStream", Seconds(1),
       System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass).toSeq)
 
-    // Create a NetworkInputDStream on target host:port and convert each line to a PageView
+    // Create a ReceiverInputDStream on target host:port and convert each line to a PageView
     val pageViews = ssc.socketTextStream(host, port)
                        .flatMap(_.split("\n"))
                        .map(PageView.fromString(_))
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index ac291bd4fde20..0706f1ebf66e2 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -38,37 +38,10 @@
     <dependency>
       <groupId>org.apache.flume</groupId>
       <artifactId>flume-ng-sdk</artifactId>
-      <version>${flume.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>io.netty</groupId>
-          <artifactId>netty</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.thrift</groupId>
-          <artifactId>libthrift</artifactId>
-        </exclusion>
-      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.flume</groupId>
       <artifactId>flume-ng-core</artifactId>
-      <version>${flume.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>io.netty</groupId>
-          <artifactId>netty</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.thrift</groupId>
-          <artifactId>libthrift</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scala-lang</groupId>
@@ -91,10 +64,6 @@
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-      </plugin>
       <plugin>
         <groupId>org.apache.avro</groupId>
         <artifactId>avro-maven-plugin</artifactId>
diff --git a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkAvroCallbackHandler.scala b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkAvroCallbackHandler.scala
index 3c656a381bd9b..4373be443e67d 100644
--- a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkAvroCallbackHandler.scala
+++ b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkAvroCallbackHandler.scala
@@ -16,10 +16,10 @@
  */
 package org.apache.spark.streaming.flume.sink
 
-import java.util.concurrent.{CountDownLatch, ConcurrentHashMap, Executors}
+import java.util.concurrent.{CountDownLatch, Executors}
 import java.util.concurrent.atomic.AtomicLong
 
-import scala.collection.JavaConversions._
+import scala.collection.mutable
 
 import org.apache.flume.Channel
 import org.apache.commons.lang.RandomStringUtils
@@ -47,8 +47,8 @@ private[flume] class SparkAvroCallbackHandler(val threads: Int, val channel: Cha
   val transactionExecutorOpt = Option(Executors.newFixedThreadPool(threads,
     new ThreadFactoryBuilder().setDaemon(true)
       .setNameFormat("Spark Sink Processor Thread - %d").build()))
-  private val sequenceNumberToProcessor =
-    new ConcurrentHashMap[CharSequence, TransactionProcessor]()
+  // Protected by `sequenceNumberToProcessor`
+  private val sequenceNumberToProcessor = mutable.HashMap[CharSequence, TransactionProcessor]()
   // This sink will not persist sequence numbers and reuses them if it gets restarted.
   // So it is possible to commit a transaction which may have been meant for the sink before the
   // restart.
@@ -58,8 +58,8 @@ private[flume] class SparkAvroCallbackHandler(val threads: Int, val channel: Cha
   private val seqBase = RandomStringUtils.randomAlphanumeric(8)
   private val seqCounter = new AtomicLong(0)
 
-
-  @volatile private var stopped = false
+  // Protected by `sequenceNumberToProcessor`
+  private var stopped = false
 
   @volatile private var isTest = false
   private var testLatch: CountDownLatch = null
@@ -131,7 +131,7 @@ private[flume] class SparkAvroCallbackHandler(val threads: Int, val channel: Cha
    * @param success Whether the batch was successful or not.
    */
   private def completeTransaction(sequenceNumber: CharSequence, success: Boolean) {
-    Option(removeAndGetProcessor(sequenceNumber)).foreach(processor => {
+    removeAndGetProcessor(sequenceNumber).foreach(processor => {
       processor.batchProcessed(success)
     })
   }
@@ -139,10 +139,11 @@ private[flume] class SparkAvroCallbackHandler(val threads: Int, val channel: Cha
   /**
    * Helper method to remove the TxnProcessor for a Sequence Number. Can be used to avoid a leak.
    * @param sequenceNumber
-   * @return The transaction processor for the corresponding batch. Note that this instance is no
-   *         longer tracked and the caller is responsible for that txn processor.
+   * @return An `Option` of the transaction processor for the corresponding batch. Note that this
+   *         instance is no longer tracked and the caller is responsible for that txn processor.
    */
-  private[sink] def removeAndGetProcessor(sequenceNumber: CharSequence): TransactionProcessor = {
+  private[sink] def removeAndGetProcessor(sequenceNumber: CharSequence):
+      Option[TransactionProcessor] = {
     sequenceNumberToProcessor.synchronized {
       sequenceNumberToProcessor.remove(sequenceNumber.toString)
     }
@@ -160,7 +161,7 @@ private[flume] class SparkAvroCallbackHandler(val threads: Int, val channel: Cha
     logInfo("Shutting down Spark Avro Callback Handler")
     sequenceNumberToProcessor.synchronized {
       stopped = true
-      sequenceNumberToProcessor.values().foreach(_.shutdown())
+      sequenceNumberToProcessor.values.foreach(_.shutdown())
     }
     transactionExecutorOpt.foreach(_.shutdownNow())
   }
diff --git a/external/flume-sink/src/test/resources/log4j.properties b/external/flume-sink/src/test/resources/log4j.properties
new file mode 100644
index 0000000000000..2a58e99817224
--- /dev/null
+++ b/external/flume-sink/src/test/resources/log4j.properties
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file streaming/target/unit-tests.log
+log4j.rootCategory=INFO, file
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=true
+log4j.appender.file.file=target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
+
diff --git a/external/flume-sink/src/test/scala/org/apache/spark/streaming/flume/sink/SparkSinkSuite.scala b/external/flume-sink/src/test/scala/org/apache/spark/streaming/flume/sink/SparkSinkSuite.scala
index a2b2cc6149d95..650b2fbe1c142 100644
--- a/external/flume-sink/src/test/scala/org/apache/spark/streaming/flume/sink/SparkSinkSuite.scala
+++ b/external/flume-sink/src/test/scala/org/apache/spark/streaming/flume/sink/SparkSinkSuite.scala
@@ -159,6 +159,7 @@ class SparkSinkSuite extends FunSuite {
     channelContext.put("transactionCapacity", 1000.toString)
     channelContext.put("keep-alive", 0.toString)
     channelContext.putAll(overrides)
+    channel.setName(scala.util.Random.nextString(10))
     channel.configure(channelContext)
 
     val sink = new SparkSink()
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 7d31e32283d88..1f2681394c583 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -39,6 +39,7 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
+      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
@@ -46,31 +47,12 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
+      <groupId>org.apache.flume</groupId>
+      <artifactId>flume-ng-core</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.flume</groupId>
       <artifactId>flume-ng-sdk</artifactId>
-      <version>${flume.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>io.netty</groupId>
-          <artifactId>netty</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.thrift</groupId>
-          <artifactId>libthrift</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
@@ -91,11 +73,5 @@
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <plugins>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-      </plugin>
-    </plugins>
   </build>
 </project>
diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala
index 4b2ea45fb81d0..2de2a7926bfd1 100644
--- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala
+++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala
@@ -66,7 +66,7 @@ class SparkFlumeEvent() extends Externalizable {
   var event : AvroFlumeEvent = new AvroFlumeEvent()
 
   /* De-serialize from bytes. */
-  def readExternal(in: ObjectInput) {
+  def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
     val bodyLength = in.readInt()
     val bodyBuff = new Array[Byte](bodyLength)
     in.readFully(bodyBuff)
@@ -93,7 +93,7 @@ class SparkFlumeEvent() extends Externalizable {
   }
 
   /* Serialize to bytes. */
-  def writeExternal(out: ObjectOutput) {
+  def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
     val body = event.getBody.array()
     out.writeInt(body.length)
     out.write(body)
diff --git a/external/flume/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java b/external/flume/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
new file mode 100644
index 0000000000000..1e24da7f5f60c
--- /dev/null
+++ b/external/flume/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.junit.After;
+import org.junit.Before;
+
+public abstract class LocalJavaStreamingContext {
+
+    protected transient JavaStreamingContext ssc;
+
+    @Before
+    public void setUp() {
+        SparkConf conf = new SparkConf()
+            .setMaster("local[2]")
+            .setAppName("test")
+            .set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock");
+        ssc = new JavaStreamingContext(conf, new Duration(1000));
+        ssc.checkpoint("checkpoint");
+    }
+
+    @After
+    public void tearDown() {
+        ssc.stop();
+        ssc = null;
+    }
+}
diff --git a/external/flume/src/test/resources/log4j.properties b/external/flume/src/test/resources/log4j.properties
index 4411d6e20c52a..9697237bfa1a3 100644
--- a/external/flume/src/test/resources/log4j.properties
+++ b/external/flume/src/test/resources/log4j.properties
@@ -15,11 +15,10 @@
 # limitations under the License.
 #
 
-# Set everything to be logged to the file streaming/target/unit-tests.log
+# Set everything to be logged to the file target/unit-tests.log
 log4j.rootCategory=INFO, file
-# log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.append=false
+log4j.appender.file.append=true
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
diff --git a/external/flume/src/test/scala/org/apache/spark/streaming/TestOutputStream.scala b/external/flume/src/test/scala/org/apache/spark/streaming/TestOutputStream.scala
new file mode 100644
index 0000000000000..1a900007b696b
--- /dev/null
+++ b/external/flume/src/test/scala/org/apache/spark/streaming/TestOutputStream.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import java.io.{IOException, ObjectInputStream}
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.dstream.{DStream, ForEachDStream}
+import org.apache.spark.util.Utils
+
+import scala.collection.mutable.ArrayBuffer
+import scala.reflect.ClassTag
+
+/**
+ * This is a output stream just for the testsuites. All the output is collected into a
+ * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
+ *
+ * The buffer contains a sequence of RDD's, each containing a sequence of items
+ */
+class TestOutputStream[T: ClassTag](parent: DStream[T],
+    val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]())
+  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
+    val collected = rdd.collect()
+    output += collected
+  }) {
+
+  // This is to clear the output buffer every it is read from a checkpoint
+  @throws(classOf[IOException])
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
+    ois.defaultReadObject()
+    output.clear()
+  }
+}
diff --git a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
index 32a19787a28e1..b57a1c71e35b9 100644
--- a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
+++ b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
@@ -20,9 +20,6 @@ package org.apache.spark.streaming.flume
 
 import java.net.InetSocketAddress
 import java.util.concurrent.{Callable, ExecutorCompletionService, Executors}
-import java.util.Random
-
-import org.apache.spark.TestUtils
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
@@ -32,20 +29,35 @@ import org.apache.flume.channel.MemoryChannel
 import org.apache.flume.conf.Configurables
 import org.apache.flume.event.EventBuilder
 
+import org.scalatest.{BeforeAndAfter, FunSuite}
+
+import org.apache.spark.{SparkConf, Logging}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
 import org.apache.spark.streaming.util.ManualClock
-import org.apache.spark.streaming.{TestSuiteBase, TestOutputStream, StreamingContext}
+import org.apache.spark.streaming.{Seconds, TestOutputStream, StreamingContext}
 import org.apache.spark.streaming.flume.sink._
 import org.apache.spark.util.Utils
 
-class FlumePollingStreamSuite extends TestSuiteBase {
+class FlumePollingStreamSuite extends FunSuite with BeforeAndAfter with Logging {
 
   val batchCount = 5
   val eventsPerBatch = 100
   val totalEventsPerChannel = batchCount * eventsPerBatch
   val channelCapacity = 5000
   val maxAttempts = 5
+  val batchDuration = Seconds(1)
+
+  val conf = new SparkConf()
+    .setMaster("local[2]")
+    .setAppName(this.getClass.getSimpleName)
+
+  def beforeFunction() {
+    logInfo("Using manual clock")
+    conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
+  }
+
+  before(beforeFunction())
 
   test("flume polling test") {
     testMultipleTimes(testFlumePolling)
@@ -145,11 +157,16 @@ class FlumePollingStreamSuite extends TestSuiteBase {
     outputStream.register()
 
     ssc.start()
-    writeAndVerify(Seq(channel, channel2), ssc, outputBuffer)
-    assertChannelIsEmpty(channel)
-    assertChannelIsEmpty(channel2)
-    sink.stop()
-    channel.stop()
+    try {
+      writeAndVerify(Seq(channel, channel2), ssc, outputBuffer)
+      assertChannelIsEmpty(channel)
+      assertChannelIsEmpty(channel2)
+    } finally {
+      sink.stop()
+      sink2.stop()
+      channel.stop()
+      channel2.stop()
+    }
   }
 
   def writeAndVerify(channels: Seq[MemoryChannel], ssc: StreamingContext,
@@ -224,4 +241,5 @@ class FlumePollingStreamSuite extends TestSuiteBase {
       null
     }
   }
+
 }
diff --git a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala
index 13943ed5442b9..f333e3891b5f0 100644
--- a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala
+++ b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala
@@ -80,7 +80,7 @@ class FlumeStreamSuite extends FunSuite with BeforeAndAfter with Matchers with L
       val socket = new ServerSocket(trialPort)
       socket.close()
       (null, trialPort)
-    })._2
+    }, conf)._2
   }
 
   /** Setup and start the streaming context */
diff --git a/external/kafka-assembly/pom.xml b/external/kafka-assembly/pom.xml
new file mode 100644
index 0000000000000..503fc129dc4f2
--- /dev/null
+++ b/external/kafka-assembly/pom.xml
@@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>1.3.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-streaming-kafka-assembly_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project External Kafka Assembly</name>
+  <url>http://spark.apache.org/</url>
+
+  <properties>
+    <sbt.project.name>streaming-kafka-assembly</sbt.project.name>
+    <spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir>
+    <spark.jar.basename>spark-streaming-kafka-assembly-${project.version}.jar</spark.jar.basename>
+    <spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming-kafka_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+  <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+  <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  <plugins>
+    <plugin>
+      <groupId>org.apache.maven.plugins</groupId>
+      <artifactId>maven-shade-plugin</artifactId>
+      <configuration>
+        <shadedArtifactAttached>false</shadedArtifactAttached>
+        <outputFile>${spark.jar}</outputFile>
+        <artifactSet>
+          <includes>
+            <include>*:*</include>
+          </includes>
+        </artifactSet>
+        <filters>
+          <filter>
+            <artifact>*:*</artifact>
+            <excludes>
+              <exclude>META-INF/*.SF</exclude>
+              <exclude>META-INF/*.DSA</exclude>
+              <exclude>META-INF/*.RSA</exclude>
+            </excludes>
+          </filter>
+        </filters>
+      </configuration>
+      <executions>
+        <execution>
+          <phase>package</phase>
+          <goals>
+            <goal>shade</goal>
+          </goals>
+          <configuration>
+            <transformers>
+              <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+              <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+                <resource>reference.conf</resource>
+              </transformer>
+              <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
+                <resource>log4j.properties</resource>
+              </transformer>
+              <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/>
+              <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer"/>
+            </transformers>
+          </configuration>
+        </execution>
+      </executions>
+    </plugin>
+  </plugins>
+</build>
+</project>
+
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 2067c473f0e3f..af96138d79405 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -39,18 +39,12 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
+      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.kafka</groupId>
       <artifactId>kafka_${scala.binary.version}</artifactId>
-      <version>0.8.0</version>
+      <version>0.8.1.1</version>
       <exclusions>
         <exclusion>
           <groupId>com.sun.jmx</groupId>
@@ -80,11 +74,6 @@
       <version>3.2</version>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
@@ -104,11 +93,5 @@
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <plugins>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-      </plugin>
-    </plugins>
   </build>
 </project>
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala
new file mode 100644
index 0000000000000..c7bca43eb889d
--- /dev/null
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka
+
+
+import scala.annotation.tailrec
+import scala.collection.mutable
+import scala.reflect.{classTag, ClassTag}
+
+import kafka.common.TopicAndPartition
+import kafka.message.MessageAndMetadata
+import kafka.serializer.Decoder
+
+import org.apache.spark.{Logging, SparkException}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.kafka.KafkaCluster.LeaderOffset
+import org.apache.spark.streaming.{StreamingContext, Time}
+import org.apache.spark.streaming.dstream._
+
+/**
+ *  A stream of {@link org.apache.spark.streaming.kafka.KafkaRDD} where
+ * each given Kafka topic/partition corresponds to an RDD partition.
+ * The spark configuration spark.streaming.kafka.maxRatePerPartition gives the maximum number
+ *  of messages
+ * per second that each '''partition''' will accept.
+ * Starting offsets are specified in advance,
+ * and this DStream is not responsible for committing offsets,
+ * so that you can control exactly-once semantics.
+ * For an easy interface to Kafka-managed offsets,
+ *  see {@link org.apache.spark.streaming.kafka.KafkaCluster}
+ * @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
+ * configuration parameters</a>.
+ *   Requires "metadata.broker.list" or "bootstrap.servers" to be set with Kafka broker(s),
+ *   NOT zookeeper servers, specified in host1:port1,host2:port2 form.
+ * @param fromOffsets per-topic/partition Kafka offsets defining the (inclusive)
+ *  starting point of the stream
+ * @param messageHandler function for translating each message into the desired type
+ * @param maxRetries maximum number of times in a row to retry getting leaders' offsets
+ */
+private[streaming]
+class DirectKafkaInputDStream[
+  K: ClassTag,
+  V: ClassTag,
+  U <: Decoder[_]: ClassTag,
+  T <: Decoder[_]: ClassTag,
+  R: ClassTag](
+    @transient ssc_ : StreamingContext,
+    val kafkaParams: Map[String, String],
+    val fromOffsets: Map[TopicAndPartition, Long],
+    messageHandler: MessageAndMetadata[K, V] => R
+) extends InputDStream[R](ssc_) with Logging {
+  val maxRetries = context.sparkContext.getConf.getInt(
+    "spark.streaming.kafka.maxRetries", 1)
+
+  protected[streaming] override val checkpointData =
+    new DirectKafkaInputDStreamCheckpointData
+
+  protected val kc = new KafkaCluster(kafkaParams)
+
+  protected val maxMessagesPerPartition: Option[Long] = {
+    val ratePerSec = context.sparkContext.getConf.getInt(
+      "spark.streaming.kafka.maxRatePerPartition", 0)
+    if (ratePerSec > 0) {
+      val secsPerBatch = context.graph.batchDuration.milliseconds.toDouble / 1000
+      Some((secsPerBatch * ratePerSec).toLong)
+    } else {
+      None
+    }
+  }
+
+  protected var currentOffsets = fromOffsets
+
+  @tailrec
+  protected final def latestLeaderOffsets(retries: Int): Map[TopicAndPartition, LeaderOffset] = {
+    val o = kc.getLatestLeaderOffsets(currentOffsets.keySet)
+    // Either.fold would confuse @tailrec, do it manually
+    if (o.isLeft) {
+      val err = o.left.get.toString
+      if (retries <= 0) {
+        throw new SparkException(err)
+      } else {
+        log.error(err)
+        Thread.sleep(kc.config.refreshLeaderBackoffMs)
+        latestLeaderOffsets(retries - 1)
+      }
+    } else {
+      o.right.get
+    }
+  }
+
+  // limits the maximum number of messages per partition
+  protected def clamp(
+    leaderOffsets: Map[TopicAndPartition, LeaderOffset]): Map[TopicAndPartition, LeaderOffset] = {
+    maxMessagesPerPartition.map { mmp =>
+      leaderOffsets.map { case (tp, lo) =>
+        tp -> lo.copy(offset = Math.min(currentOffsets(tp) + mmp, lo.offset))
+      }
+    }.getOrElse(leaderOffsets)
+  }
+
+  override def compute(validTime: Time): Option[KafkaRDD[K, V, U, T, R]] = {
+    val untilOffsets = clamp(latestLeaderOffsets(maxRetries))
+    val rdd = KafkaRDD[K, V, U, T, R](
+      context.sparkContext, kafkaParams, currentOffsets, untilOffsets, messageHandler)
+
+    currentOffsets = untilOffsets.map(kv => kv._1 -> kv._2.offset)
+    Some(rdd)
+  }
+
+  override def start(): Unit = {
+  }
+
+  def stop(): Unit = {
+  }
+
+  private[streaming]
+  class DirectKafkaInputDStreamCheckpointData extends DStreamCheckpointData(this) {
+    def batchForTime = data.asInstanceOf[mutable.HashMap[
+      Time, Array[OffsetRange.OffsetRangeTuple]]]
+
+    override def update(time: Time) {
+      batchForTime.clear()
+      generatedRDDs.foreach { kv =>
+        val a = kv._2.asInstanceOf[KafkaRDD[K, V, U, T, R]].offsetRanges.map(_.toTuple).toArray
+        batchForTime += kv._1 -> a
+      }
+    }
+
+    override def cleanup(time: Time) { }
+
+    override def restore() {
+      // this is assuming that the topics don't change during execution, which is true currently
+      val topics = fromOffsets.keySet
+      val leaders = kc.findLeaders(topics).fold(
+        errs => throw new SparkException(errs.mkString("\n")),
+        ok => ok
+      )
+
+      batchForTime.toSeq.sortBy(_._1)(Time.ordering).foreach { case (t, b) =>
+          logInfo(s"Restoring KafkaRDD for time $t ${b.mkString("[", ", ", "]")}")
+          generatedRDDs += t -> new KafkaRDD[K, V, U, T, R](
+            context.sparkContext, kafkaParams, b.map(OffsetRange(_)), leaders, messageHandler)
+      }
+    }
+  }
+
+}
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
new file mode 100644
index 0000000000000..ccc62bfe8f057
--- /dev/null
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
@@ -0,0 +1,369 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka
+
+import scala.util.control.NonFatal
+import scala.util.Random
+import scala.collection.mutable.ArrayBuffer
+import java.util.Properties
+import kafka.api._
+import kafka.common.{ErrorMapping, OffsetMetadataAndError, TopicAndPartition}
+import kafka.consumer.{ConsumerConfig, SimpleConsumer}
+import org.apache.spark.SparkException
+
+/**
+ * Convenience methods for interacting with a Kafka cluster.
+ * @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
+ * configuration parameters</a>.
+ *   Requires "metadata.broker.list" or "bootstrap.servers" to be set with Kafka broker(s),
+ *   NOT zookeeper servers, specified in host1:port1,host2:port2 form
+ */
+private[spark]
+class KafkaCluster(val kafkaParams: Map[String, String]) extends Serializable {
+  import KafkaCluster.{Err, LeaderOffset, SimpleConsumerConfig}
+
+  // ConsumerConfig isn't serializable
+  @transient private var _config: SimpleConsumerConfig = null
+
+  def config: SimpleConsumerConfig = this.synchronized {
+    if (_config == null) {
+      _config = SimpleConsumerConfig(kafkaParams)
+    }
+    _config
+  }
+
+  def connect(host: String, port: Int): SimpleConsumer =
+    new SimpleConsumer(host, port, config.socketTimeoutMs,
+      config.socketReceiveBufferBytes, config.clientId)
+
+  def connectLeader(topic: String, partition: Int): Either[Err, SimpleConsumer] =
+    findLeader(topic, partition).right.map(hp => connect(hp._1, hp._2))
+
+  // Metadata api
+  // scalastyle:off
+  // https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-MetadataAPI
+  // scalastyle:on
+
+  def findLeader(topic: String, partition: Int): Either[Err, (String, Int)] = {
+    val req = TopicMetadataRequest(TopicMetadataRequest.CurrentVersion,
+      0, config.clientId, Seq(topic))
+    val errs = new Err
+    withBrokers(Random.shuffle(config.seedBrokers), errs) { consumer =>
+      val resp: TopicMetadataResponse = consumer.send(req)
+      resp.topicsMetadata.find(_.topic == topic).flatMap { tm: TopicMetadata =>
+        tm.partitionsMetadata.find(_.partitionId == partition)
+      }.foreach { pm: PartitionMetadata =>
+        pm.leader.foreach { leader =>
+          return Right((leader.host, leader.port))
+        }
+      }
+    }
+    Left(errs)
+  }
+
+  def findLeaders(
+      topicAndPartitions: Set[TopicAndPartition]
+    ): Either[Err, Map[TopicAndPartition, (String, Int)]] = {
+    val topics = topicAndPartitions.map(_.topic)
+    val response = getPartitionMetadata(topics).right
+    val answer = response.flatMap { tms: Set[TopicMetadata] =>
+      val leaderMap = tms.flatMap { tm: TopicMetadata =>
+        tm.partitionsMetadata.flatMap { pm: PartitionMetadata =>
+          val tp = TopicAndPartition(tm.topic, pm.partitionId)
+          if (topicAndPartitions(tp)) {
+            pm.leader.map { l =>
+              tp -> (l.host -> l.port)
+            }
+          } else {
+            None
+          }
+        }
+      }.toMap
+
+      if (leaderMap.keys.size == topicAndPartitions.size) {
+        Right(leaderMap)
+      } else {
+        val missing = topicAndPartitions.diff(leaderMap.keySet)
+        val err = new Err
+        err.append(new SparkException(s"Couldn't find leaders for ${missing}"))
+        Left(err)
+      }
+    }
+    answer
+  }
+
+  def getPartitions(topics: Set[String]): Either[Err, Set[TopicAndPartition]] = {
+    getPartitionMetadata(topics).right.map { r =>
+      r.flatMap { tm: TopicMetadata =>
+        tm.partitionsMetadata.map { pm: PartitionMetadata =>
+          TopicAndPartition(tm.topic, pm.partitionId)
+        }    
+      }
+    }
+  }
+
+  def getPartitionMetadata(topics: Set[String]): Either[Err, Set[TopicMetadata]] = {
+    val req = TopicMetadataRequest(
+      TopicMetadataRequest.CurrentVersion, 0, config.clientId, topics.toSeq)
+    val errs = new Err
+    withBrokers(Random.shuffle(config.seedBrokers), errs) { consumer =>
+      val resp: TopicMetadataResponse = consumer.send(req)
+      // error codes here indicate missing / just created topic,
+      // repeating on a different broker wont be useful
+      return Right(resp.topicsMetadata.toSet)
+    }
+    Left(errs)
+  }
+
+  // Leader offset api
+  // scalastyle:off
+  // https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetAPI
+  // scalastyle:on
+
+  def getLatestLeaderOffsets(
+      topicAndPartitions: Set[TopicAndPartition]
+    ): Either[Err, Map[TopicAndPartition, LeaderOffset]] =
+    getLeaderOffsets(topicAndPartitions, OffsetRequest.LatestTime)
+
+  def getEarliestLeaderOffsets(
+      topicAndPartitions: Set[TopicAndPartition]
+    ): Either[Err, Map[TopicAndPartition, LeaderOffset]] =
+    getLeaderOffsets(topicAndPartitions, OffsetRequest.EarliestTime)
+
+  def getLeaderOffsets(
+      topicAndPartitions: Set[TopicAndPartition],
+      before: Long
+    ): Either[Err, Map[TopicAndPartition, LeaderOffset]] = {
+    getLeaderOffsets(topicAndPartitions, before, 1).right.map { r =>
+      r.map { kv =>
+        // mapValues isnt serializable, see SI-7005
+        kv._1 -> kv._2.head
+      }
+    }
+  }
+
+  private def flip[K, V](m: Map[K, V]): Map[V, Seq[K]] =
+    m.groupBy(_._2).map { kv =>
+      kv._1 -> kv._2.keys.toSeq
+    }
+
+  def getLeaderOffsets(
+      topicAndPartitions: Set[TopicAndPartition],
+      before: Long,
+      maxNumOffsets: Int
+    ): Either[Err, Map[TopicAndPartition, Seq[LeaderOffset]]] = {
+    findLeaders(topicAndPartitions).right.flatMap { tpToLeader =>
+      val leaderToTp: Map[(String, Int), Seq[TopicAndPartition]] = flip(tpToLeader)
+      val leaders = leaderToTp.keys
+      var result = Map[TopicAndPartition, Seq[LeaderOffset]]()
+      val errs = new Err
+      withBrokers(leaders, errs) { consumer =>
+        val partitionsToGetOffsets: Seq[TopicAndPartition] =
+          leaderToTp((consumer.host, consumer.port))
+        val reqMap = partitionsToGetOffsets.map { tp: TopicAndPartition =>
+          tp -> PartitionOffsetRequestInfo(before, maxNumOffsets)
+        }.toMap
+        val req = OffsetRequest(reqMap)
+        val resp = consumer.getOffsetsBefore(req)
+        val respMap = resp.partitionErrorAndOffsets
+        partitionsToGetOffsets.foreach { tp: TopicAndPartition =>
+          respMap.get(tp).foreach { por: PartitionOffsetsResponse =>
+            if (por.error == ErrorMapping.NoError) {
+              if (por.offsets.nonEmpty) {
+                result += tp -> por.offsets.map { off =>
+                  LeaderOffset(consumer.host, consumer.port, off)
+                }
+              } else {
+                errs.append(new SparkException(
+                  s"Empty offsets for ${tp}, is ${before} before log beginning?"))
+              }
+            } else {
+              errs.append(ErrorMapping.exceptionFor(por.error))
+            }
+          }
+        }
+        if (result.keys.size == topicAndPartitions.size) {
+          return Right(result)
+        }
+      }
+      val missing = topicAndPartitions.diff(result.keySet)
+      errs.append(new SparkException(s"Couldn't find leader offsets for ${missing}"))
+      Left(errs)
+    }
+  }
+
+  // Consumer offset api
+  // scalastyle:off
+  // https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI
+  // scalastyle:on
+
+  /** Requires Kafka >= 0.8.1.1 */
+  def getConsumerOffsets(
+      groupId: String,
+      topicAndPartitions: Set[TopicAndPartition]
+    ): Either[Err, Map[TopicAndPartition, Long]] = {
+    getConsumerOffsetMetadata(groupId, topicAndPartitions).right.map { r =>
+      r.map { kv =>
+        kv._1 -> kv._2.offset
+      }
+    }
+  }
+
+  /** Requires Kafka >= 0.8.1.1 */
+  def getConsumerOffsetMetadata(
+      groupId: String,
+      topicAndPartitions: Set[TopicAndPartition]
+    ): Either[Err, Map[TopicAndPartition, OffsetMetadataAndError]] = {
+    var result = Map[TopicAndPartition, OffsetMetadataAndError]()
+    val req = OffsetFetchRequest(groupId, topicAndPartitions.toSeq)
+    val errs = new Err
+    withBrokers(Random.shuffle(config.seedBrokers), errs) { consumer =>
+      val resp = consumer.fetchOffsets(req)
+      val respMap = resp.requestInfo
+      val needed = topicAndPartitions.diff(result.keySet)
+      needed.foreach { tp: TopicAndPartition =>
+        respMap.get(tp).foreach { ome: OffsetMetadataAndError =>
+          if (ome.error == ErrorMapping.NoError) {
+            result += tp -> ome
+          } else {
+            errs.append(ErrorMapping.exceptionFor(ome.error))
+          }
+        }
+      }
+      if (result.keys.size == topicAndPartitions.size) {
+        return Right(result)
+      }
+    }
+    val missing = topicAndPartitions.diff(result.keySet)
+    errs.append(new SparkException(s"Couldn't find consumer offsets for ${missing}"))
+    Left(errs)
+  }
+
+  /** Requires Kafka >= 0.8.1.1 */
+  def setConsumerOffsets(
+      groupId: String,
+      offsets: Map[TopicAndPartition, Long]
+    ): Either[Err, Map[TopicAndPartition, Short]] = {
+    setConsumerOffsetMetadata(groupId, offsets.map { kv =>
+      kv._1 -> OffsetMetadataAndError(kv._2)
+    })
+  }
+
+  /** Requires Kafka >= 0.8.1.1 */
+  def setConsumerOffsetMetadata(
+      groupId: String,
+      metadata: Map[TopicAndPartition, OffsetMetadataAndError]
+    ): Either[Err, Map[TopicAndPartition, Short]] = {
+    var result = Map[TopicAndPartition, Short]()
+    val req = OffsetCommitRequest(groupId, metadata)
+    val errs = new Err
+    val topicAndPartitions = metadata.keySet
+    withBrokers(Random.shuffle(config.seedBrokers), errs) { consumer =>
+      val resp = consumer.commitOffsets(req)
+      val respMap = resp.requestInfo
+      val needed = topicAndPartitions.diff(result.keySet)
+      needed.foreach { tp: TopicAndPartition =>
+        respMap.get(tp).foreach { err: Short =>
+          if (err == ErrorMapping.NoError) {
+            result += tp -> err
+          } else {
+            errs.append(ErrorMapping.exceptionFor(err))
+          }
+        }
+      }
+      if (result.keys.size == topicAndPartitions.size) {
+        return Right(result)
+      }
+    }
+    val missing = topicAndPartitions.diff(result.keySet)
+    errs.append(new SparkException(s"Couldn't set offsets for ${missing}"))
+    Left(errs)
+  }
+
+  // Try a call against potentially multiple brokers, accumulating errors
+  private def withBrokers(brokers: Iterable[(String, Int)], errs: Err)
+    (fn: SimpleConsumer => Any): Unit = {
+    brokers.foreach { hp =>
+      var consumer: SimpleConsumer = null
+      try {
+        consumer = connect(hp._1, hp._2)
+        fn(consumer)
+      } catch {
+        case NonFatal(e) =>
+          errs.append(e)
+      } finally {
+        if (consumer != null) {
+          consumer.close()
+        }
+      }
+    }
+  }
+}
+
+private[spark]
+object KafkaCluster {
+  type Err = ArrayBuffer[Throwable]
+
+  private[spark]
+  case class LeaderOffset(host: String, port: Int, offset: Long)
+
+  /**
+   * High-level kafka consumers connect to ZK.  ConsumerConfig assumes this use case.
+   * Simple consumers connect directly to brokers, but need many of the same configs.
+   * This subclass won't warn about missing ZK params, or presence of broker params.
+   */
+  private[spark]
+  class SimpleConsumerConfig private(brokers: String, originalProps: Properties)
+      extends ConsumerConfig(originalProps) {
+    val seedBrokers: Array[(String, Int)] = brokers.split(",").map { hp =>
+      val hpa = hp.split(":")
+      (hpa(0), hpa(1).toInt)
+    }
+  }
+
+  private[spark]
+  object SimpleConsumerConfig {
+    /**
+     * Make a consumer config without requiring group.id or zookeeper.connect,
+     * since communicating with brokers also needs common settings such as timeout
+     */
+    def apply(kafkaParams: Map[String, String]): SimpleConsumerConfig = {
+      // These keys are from other pre-existing kafka configs for specifying brokers, accept either
+      val brokers = kafkaParams.get("metadata.broker.list")
+        .orElse(kafkaParams.get("bootstrap.servers"))
+        .getOrElse(throw new SparkException(
+          "Must specify metadata.broker.list or bootstrap.servers"))
+
+      val props = new Properties()
+      kafkaParams.foreach { case (key, value) =>
+        // prevent warnings on parameters ConsumerConfig doesn't know about
+        if (key != "metadata.broker.list" && key != "bootstrap.servers") {
+          props.put(key, value)
+        }
+      }
+
+      Seq("zookeeper.connect", "group.id").foreach { s =>
+        if (!props.contains(s)) {
+          props.setProperty(s, "")
+        }
+      }
+
+      new SimpleConsumerConfig(brokers, props)
+    }
+  }
+}
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
index e20e2c8f26991..4d26b640e8d74 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
@@ -17,23 +17,21 @@
 
 package org.apache.spark.streaming.kafka
 
+import java.util.Properties
+
 import scala.collection.Map
 import scala.reflect.{classTag, ClassTag}
 
-import java.util.Properties
-import java.util.concurrent.Executors
-
-import kafka.consumer._
+import kafka.consumer.{KafkaStream, Consumer, ConsumerConfig, ConsumerConnector}
 import kafka.serializer.Decoder
 import kafka.utils.VerifiableProperties
-import kafka.utils.ZKStringSerializer
-import org.I0Itec.zkclient._
 
 import org.apache.spark.Logging
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.StreamingContext
 import org.apache.spark.streaming.dstream._
 import org.apache.spark.streaming.receiver.Receiver
+import org.apache.spark.util.Utils
 
 /**
  * Input stream that pulls messages from a Kafka Broker.
@@ -53,12 +51,16 @@ class KafkaInputDStream[
     @transient ssc_ : StreamingContext,
     kafkaParams: Map[String, String],
     topics: Map[String, Int],
+    useReliableReceiver: Boolean,
     storageLevel: StorageLevel
   ) extends ReceiverInputDStream[(K, V)](ssc_) with Logging {
 
   def getReceiver(): Receiver[(K, V)] = {
-    new KafkaReceiver[K, V, U, T](kafkaParams, topics, storageLevel)
-        .asInstanceOf[Receiver[(K, V)]]
+    if (!useReliableReceiver) {
+      new KafkaReceiver[K, V, U, T](kafkaParams, topics, storageLevel)
+    } else {
+      new ReliableKafkaReceiver[K, V, U, T](kafkaParams, topics, storageLevel)
+    }
   }
 }
 
@@ -71,14 +73,15 @@ class KafkaReceiver[
     kafkaParams: Map[String, String],
     topics: Map[String, Int],
     storageLevel: StorageLevel
-  ) extends Receiver[Any](storageLevel) with Logging {
+  ) extends Receiver[(K, V)](storageLevel) with Logging {
 
   // Connection to Kafka
-  var consumerConnector : ConsumerConnector = null
+  var consumerConnector: ConsumerConnector = null
 
   def onStop() {
     if (consumerConnector != null) {
       consumerConnector.shutdown()
+      consumerConnector = null
     }
   }
 
@@ -97,12 +100,6 @@ class KafkaReceiver[
     consumerConnector = Consumer.create(consumerConfig)
     logInfo("Connected to " + zkConnect)
 
-    // When auto.offset.reset is defined, it is our responsibility to try and whack the
-    // consumer group zk node.
-    if (kafkaParams.contains("auto.offset.reset")) {
-      tryZookeeperConsumerGroupCleanup(zkConnect, kafkaParams("group.id"))
-    }
-
     val keyDecoder = classTag[U].runtimeClass.getConstructor(classOf[VerifiableProperties])
       .newInstance(consumerConfig.props)
       .asInstanceOf[Decoder[K]]
@@ -110,11 +107,11 @@ class KafkaReceiver[
       .newInstance(consumerConfig.props)
       .asInstanceOf[Decoder[V]]
 
-    // Create Threads for each Topic/Message Stream we are listening
+    // Create threads for each topic/message Stream we are listening
     val topicMessageStreams = consumerConnector.createMessageStreams(
       topics, keyDecoder, valueDecoder)
 
-    val executorPool = Executors.newFixedThreadPool(topics.values.sum)
+    val executorPool = Utils.newDaemonFixedThreadPool(topics.values.sum, "KafkaMessageHandler")
     try {
       // Start the messages handler for each partition
       topicMessageStreams.values.foreach { streams =>
@@ -125,13 +122,15 @@ class KafkaReceiver[
     }
   }
 
-  // Handles Kafka Messages
-  private class MessageHandler[K: ClassTag, V: ClassTag](stream: KafkaStream[K, V])
+  // Handles Kafka messages
+  private class MessageHandler(stream: KafkaStream[K, V])
     extends Runnable {
     def run() {
       logInfo("Starting MessageHandler.")
       try {
-        for (msgAndMetadata <- stream) {
+        val streamIterator = stream.iterator()
+        while (streamIterator.hasNext()) {
+          val msgAndMetadata = streamIterator.next()
           store((msgAndMetadata.key, msgAndMetadata.message))
         }
       } catch {
@@ -139,26 +138,4 @@ class KafkaReceiver[
       }
     }
   }
-
-  // It is our responsibility to delete the consumer group when specifying auto.offset.reset. This
-  // is because Kafka 0.7.2 only honors this param when the group is not in zookeeper.
-  //
-  // The kafka high level consumer doesn't expose setting offsets currently, this is a trick copied
-  // from Kafka's ConsoleConsumer. See code related to 'auto.offset.reset' when it is set to
-  // 'smallest'/'largest':
-  // scalastyle:off
-  // https://github.com/apache/kafka/blob/0.7.2/core/src/main/scala/kafka/consumer/ConsoleConsumer.scala
-  // scalastyle:on
-  private def tryZookeeperConsumerGroupCleanup(zkUrl: String, groupId: String) {
-    val dir = "/consumers/" + groupId
-    logInfo("Cleaning up temporary Zookeeper data under " + dir + ".")
-    val zk = new ZkClient(zkUrl, 30*1000, 30*1000, ZKStringSerializer)
-    try {
-      zk.deleteRecursive(dir)
-    } catch {
-      case e: Throwable => logWarning("Error cleaning up temporary Zookeeper data", e)
-    } finally {
-      zk.close()
-    }
-  }
 }
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
new file mode 100644
index 0000000000000..50bf7cbdb8dbf
--- /dev/null
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka
+
+import scala.reflect.{classTag, ClassTag}
+
+import org.apache.spark.{Logging, Partition, SparkContext, SparkException, TaskContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.NextIterator
+
+import java.util.Properties
+import kafka.api.{FetchRequestBuilder, FetchResponse}
+import kafka.common.{ErrorMapping, TopicAndPartition}
+import kafka.consumer.{ConsumerConfig, SimpleConsumer}
+import kafka.message.{MessageAndMetadata, MessageAndOffset}
+import kafka.serializer.Decoder
+import kafka.utils.VerifiableProperties
+
+/**
+ * A batch-oriented interface for consuming from Kafka.
+ * Starting and ending offsets are specified in advance,
+ * so that you can control exactly-once semantics.
+ * @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
+ * configuration parameters</a>.
+ *   Requires "metadata.broker.list" or "bootstrap.servers" to be set with Kafka broker(s),
+ *   NOT zookeeper servers, specified in host1:port1,host2:port2 form.
+ * @param batch Each KafkaRDDPartition in the batch corresponds to a
+ *   range of offsets for a given Kafka topic/partition
+ * @param messageHandler function for translating each message into the desired type
+ */
+private[spark]
+class KafkaRDD[
+  K: ClassTag,
+  V: ClassTag,
+  U <: Decoder[_]: ClassTag,
+  T <: Decoder[_]: ClassTag,
+  R: ClassTag] private[spark] (
+    sc: SparkContext,
+    kafkaParams: Map[String, String],
+    val offsetRanges: Array[OffsetRange],
+    leaders: Map[TopicAndPartition, (String, Int)],
+    messageHandler: MessageAndMetadata[K, V] => R
+  ) extends RDD[R](sc, Nil) with Logging with HasOffsetRanges {
+  override def getPartitions: Array[Partition] = {
+    offsetRanges.zipWithIndex.map { case (o, i) =>
+        val (host, port) = leaders(TopicAndPartition(o.topic, o.partition))
+        new KafkaRDDPartition(i, o.topic, o.partition, o.fromOffset, o.untilOffset, host, port)
+    }.toArray
+  }
+
+  override def getPreferredLocations(thePart: Partition): Seq[String] = {
+    val part = thePart.asInstanceOf[KafkaRDDPartition]
+    // TODO is additional hostname resolution necessary here
+    Seq(part.host)
+  }
+
+  private def errBeginAfterEnd(part: KafkaRDDPartition): String =
+    s"Beginning offset ${part.fromOffset} is after the ending offset ${part.untilOffset} " +
+      s"for topic ${part.topic} partition ${part.partition}. " +
+      "You either provided an invalid fromOffset, or the Kafka topic has been damaged"
+
+  private def errRanOutBeforeEnd(part: KafkaRDDPartition): String =
+    s"Ran out of messages before reaching ending offset ${part.untilOffset} " +
+    s"for topic ${part.topic} partition ${part.partition} start ${part.fromOffset}." +
+    " This should not happen, and indicates that messages may have been lost"
+
+  private def errOvershotEnd(itemOffset: Long, part: KafkaRDDPartition): String =
+    s"Got ${itemOffset} > ending offset ${part.untilOffset} " +
+    s"for topic ${part.topic} partition ${part.partition} start ${part.fromOffset}." +
+    " This should not happen, and indicates a message may have been skipped"
+
+  override def compute(thePart: Partition, context: TaskContext): Iterator[R] = {
+    val part = thePart.asInstanceOf[KafkaRDDPartition]
+    assert(part.fromOffset <= part.untilOffset, errBeginAfterEnd(part))
+    if (part.fromOffset == part.untilOffset) {
+      log.warn("Beginning offset ${part.fromOffset} is the same as ending offset " +
+        s"skipping ${part.topic} ${part.partition}")
+      Iterator.empty
+    } else {
+      new KafkaRDDIterator(part, context)
+    }
+  }
+
+  private class KafkaRDDIterator(
+      part: KafkaRDDPartition,
+      context: TaskContext) extends NextIterator[R] {
+
+    context.addTaskCompletionListener{ context => closeIfNeeded() }
+
+    log.info(s"Computing topic ${part.topic}, partition ${part.partition} " +
+      s"offsets ${part.fromOffset} -> ${part.untilOffset}")
+
+    val kc = new KafkaCluster(kafkaParams)
+    val keyDecoder = classTag[U].runtimeClass.getConstructor(classOf[VerifiableProperties])
+      .newInstance(kc.config.props)
+      .asInstanceOf[Decoder[K]]
+    val valueDecoder = classTag[T].runtimeClass.getConstructor(classOf[VerifiableProperties])
+      .newInstance(kc.config.props)
+      .asInstanceOf[Decoder[V]]
+    val consumer = connectLeader
+    var requestOffset = part.fromOffset
+    var iter: Iterator[MessageAndOffset] = null
+
+    // The idea is to use the provided preferred host, except on task retry atttempts,
+    // to minimize number of kafka metadata requests
+    private def connectLeader: SimpleConsumer = {
+      if (context.attemptNumber > 0) {
+        kc.connectLeader(part.topic, part.partition).fold(
+          errs => throw new SparkException(
+            s"Couldn't connect to leader for topic ${part.topic} ${part.partition}: " +
+              errs.mkString("\n")),
+          consumer => consumer
+        )
+      } else {
+        kc.connect(part.host, part.port)
+      }
+    }
+
+    private def handleFetchErr(resp: FetchResponse) {
+      if (resp.hasError) {
+        val err = resp.errorCode(part.topic, part.partition)
+        if (err == ErrorMapping.LeaderNotAvailableCode ||
+          err == ErrorMapping.NotLeaderForPartitionCode) {
+          log.error(s"Lost leader for topic ${part.topic} partition ${part.partition}, " +
+            s" sleeping for ${kc.config.refreshLeaderBackoffMs}ms")
+          Thread.sleep(kc.config.refreshLeaderBackoffMs)
+        }
+        // Let normal rdd retry sort out reconnect attempts
+        throw ErrorMapping.exceptionFor(err)
+      }
+    }
+
+    private def fetchBatch: Iterator[MessageAndOffset] = {
+      val req = new FetchRequestBuilder()
+        .addFetch(part.topic, part.partition, requestOffset, kc.config.fetchMessageMaxBytes)
+        .build()
+      val resp = consumer.fetch(req)
+      handleFetchErr(resp)
+      // kafka may return a batch that starts before the requested offset
+      resp.messageSet(part.topic, part.partition)
+        .iterator
+        .dropWhile(_.offset < requestOffset)
+    }
+
+    override def close() = consumer.close()
+
+    override def getNext(): R = {
+      if (iter == null || !iter.hasNext) {
+        iter = fetchBatch
+      }
+      if (!iter.hasNext) {
+        assert(requestOffset == part.untilOffset, errRanOutBeforeEnd(part))
+        finished = true
+        null.asInstanceOf[R]
+      } else {
+        val item = iter.next()
+        if (item.offset >= part.untilOffset) {
+          assert(item.offset == part.untilOffset, errOvershotEnd(item.offset, part))
+          finished = true
+          null.asInstanceOf[R]
+        } else {
+          requestOffset = item.nextOffset
+          messageHandler(new MessageAndMetadata(
+            part.topic, part.partition, item.message, item.offset, keyDecoder, valueDecoder))
+        }
+      }
+    }
+  }
+}
+
+private[spark]
+object KafkaRDD {
+  import KafkaCluster.LeaderOffset
+
+  /**
+   * @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
+   * configuration parameters</a>.
+   *   Requires "metadata.broker.list" or "bootstrap.servers" to be set with Kafka broker(s),
+   *   NOT zookeeper servers, specified in host1:port1,host2:port2 form.
+   * @param fromOffsets per-topic/partition Kafka offsets defining the (inclusive)
+   *  starting point of the batch
+   * @param untilOffsets per-topic/partition Kafka offsets defining the (exclusive)
+   *  ending point of the batch
+   * @param messageHandler function for translating each message into the desired type
+   */
+  def apply[
+    K: ClassTag,
+    V: ClassTag,
+    U <: Decoder[_]: ClassTag,
+    T <: Decoder[_]: ClassTag,
+    R: ClassTag](
+      sc: SparkContext,
+      kafkaParams: Map[String, String],
+      fromOffsets: Map[TopicAndPartition, Long],
+      untilOffsets: Map[TopicAndPartition, LeaderOffset],
+      messageHandler: MessageAndMetadata[K, V] => R
+  ): KafkaRDD[K, V, U, T, R] = {
+    val leaders = untilOffsets.map { case (tp, lo) =>
+        tp -> (lo.host, lo.port)
+    }.toMap
+
+    val offsetRanges = fromOffsets.map { case (tp, fo) =>
+        val uo = untilOffsets(tp)
+        OffsetRange(tp.topic, tp.partition, fo, uo.offset)
+    }.toArray
+
+    new KafkaRDD[K, V, U, T, R](sc, kafkaParams, offsetRanges, leaders, messageHandler)
+  }
+}
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDDPartition.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDDPartition.scala
new file mode 100644
index 0000000000000..36372e08f65f6
--- /dev/null
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDDPartition.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka
+
+import org.apache.spark.Partition
+
+/** @param topic kafka topic name
+  * @param partition kafka partition id
+  * @param fromOffset inclusive starting offset
+  * @param untilOffset exclusive ending offset
+  * @param host preferred kafka host, i.e. the leader at the time the rdd was created
+  * @param port preferred kafka host's port
+  */
+private[spark]
+class KafkaRDDPartition(
+  val index: Int,
+  val topic: String,
+  val partition: Int,
+  val fromOffset: Long,
+  val untilOffset: Long,
+  val host: String,
+  val port: Int
+) extends Partition
+
+private[spark]
+object KafkaRDDPartition {
+  def apply(
+    index: Int,
+    topic: String,
+    partition: Int,
+    fromOffset: Long,
+    untilOffset: Long,
+    host: String,
+    port: Int
+  ): KafkaRDDPartition = new KafkaRDDPartition(
+    index,
+    topic,
+    partition,
+    fromOffset,
+    untilOffset,
+    host,
+    port
+  )
+}
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
index 48668f763e41e..f8aa6c5c6263c 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
@@ -17,19 +17,24 @@
 
 package org.apache.spark.streaming.kafka
 
-import scala.reflect.ClassTag
-import scala.collection.JavaConversions._
-
 import java.lang.{Integer => JInt}
 import java.util.{Map => JMap}
 
+import scala.reflect.ClassTag
+import scala.collection.JavaConversions._
+
+import kafka.common.TopicAndPartition
+import kafka.message.MessageAndMetadata
 import kafka.serializer.{Decoder, StringDecoder}
 
+
+import org.apache.spark.{SparkContext, SparkException}
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.StreamingContext
-import org.apache.spark.streaming.api.java.{JavaPairReceiverInputDStream, JavaStreamingContext, JavaPairDStream}
-import org.apache.spark.streaming.dstream.{ReceiverInputDStream, DStream}
-
+import org.apache.spark.streaming.api.java.{JavaPairReceiverInputDStream, JavaStreamingContext}
+import org.apache.spark.streaming.dstream.{InputDStream, ReceiverInputDStream}
 
 object KafkaUtils {
   /**
@@ -71,11 +76,12 @@ object KafkaUtils {
       topics: Map[String, Int],
       storageLevel: StorageLevel
     ): ReceiverInputDStream[(K, V)] = {
-    new KafkaInputDStream[K, V, U, T](ssc, kafkaParams, topics, storageLevel)
+    val walEnabled = ssc.conf.getBoolean("spark.streaming.receiver.writeAheadLog.enable", false)
+    new KafkaInputDStream[K, V, U, T](ssc, kafkaParams, topics, walEnabled, storageLevel)
   }
 
   /**
-   * Create an input stream that pulls messages form a Kafka Broker.
+   * Create an input stream that pulls messages from a Kafka Broker.
    * Storage level of the data will be the default StorageLevel.MEMORY_AND_DISK_SER_2.
    * @param jssc      JavaStreamingContext object
    * @param zkQuorum  Zookeeper quorum (hostname:port,hostname:port,..)
@@ -93,14 +99,13 @@ object KafkaUtils {
   }
 
   /**
-   * Create an input stream that pulls messages form a Kafka Broker.
+   * Create an input stream that pulls messages from a Kafka Broker.
    * @param jssc      JavaStreamingContext object
    * @param zkQuorum  Zookeeper quorum (hostname:port,hostname:port,..).
    * @param groupId   The group id for this consumer.
    * @param topics    Map of (topic_name -> numPartitions) to consume. Each partition is consumed
    *                  in its own thread.
    * @param storageLevel RDD storage level.
-   *
    */
   def createStream(
       jssc: JavaStreamingContext,
@@ -114,7 +119,7 @@ object KafkaUtils {
   }
 
   /**
-   * Create an input stream that pulls messages form a Kafka Broker.
+   * Create an input stream that pulls messages from a Kafka Broker.
    * @param jssc      JavaStreamingContext object
    * @param keyTypeClass Key type of RDD
    * @param valueTypeClass value type of RDD
@@ -145,4 +150,174 @@ object KafkaUtils {
     createStream[K, V, U, T](
       jssc.ssc, kafkaParams.toMap, Map(topics.mapValues(_.intValue()).toSeq: _*), storageLevel)
   }
+
+  /** A batch-oriented interface for consuming from Kafka.
+   * Starting and ending offsets are specified in advance,
+   * so that you can control exactly-once semantics.
+   * @param sc SparkContext object
+   * @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
+   * configuration parameters</a>.
+   *   Requires "metadata.broker.list" or "bootstrap.servers" to be set with Kafka broker(s),
+   *   NOT zookeeper servers, specified in host1:port1,host2:port2 form.
+   * @param offsetRanges Each OffsetRange in the batch corresponds to a
+   *   range of offsets for a given Kafka topic/partition
+   */
+  @Experimental
+  def createRDD[
+    K: ClassTag,
+    V: ClassTag,
+    U <: Decoder[_]: ClassTag,
+    T <: Decoder[_]: ClassTag] (
+      sc: SparkContext,
+      kafkaParams: Map[String, String],
+      offsetRanges: Array[OffsetRange]
+  ): RDD[(K, V)] = {
+    val messageHandler = (mmd: MessageAndMetadata[K, V]) => (mmd.key, mmd.message)
+    val kc = new KafkaCluster(kafkaParams)
+    val topics = offsetRanges.map(o => TopicAndPartition(o.topic, o.partition)).toSet
+    val leaders = kc.findLeaders(topics).fold(
+      errs => throw new SparkException(errs.mkString("\n")),
+      ok => ok
+    )
+    new KafkaRDD[K, V, U, T, (K, V)](sc, kafkaParams, offsetRanges, leaders, messageHandler)
+  }
+
+  /** A batch-oriented interface for consuming from Kafka.
+   * Starting and ending offsets are specified in advance,
+   * so that you can control exactly-once semantics.
+   * @param sc SparkContext object
+   * @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
+   * configuration parameters</a>.
+   *   Requires "metadata.broker.list" or "bootstrap.servers" to be set with Kafka broker(s),
+   *   NOT zookeeper servers, specified in host1:port1,host2:port2 form.
+   * @param offsetRanges Each OffsetRange in the batch corresponds to a
+   *   range of offsets for a given Kafka topic/partition
+   * @param leaders Kafka leaders for each offset range in batch
+   * @param messageHandler function for translating each message into the desired type
+   */
+  @Experimental
+  def createRDD[
+    K: ClassTag,
+    V: ClassTag,
+    U <: Decoder[_]: ClassTag,
+    T <: Decoder[_]: ClassTag,
+    R: ClassTag] (
+      sc: SparkContext,
+      kafkaParams: Map[String, String],
+      offsetRanges: Array[OffsetRange],
+      leaders: Array[Leader],
+      messageHandler: MessageAndMetadata[K, V] => R
+  ): RDD[R] = {
+
+    val leaderMap = leaders
+      .map(l => TopicAndPartition(l.topic, l.partition) -> (l.host, l.port))
+      .toMap
+    new KafkaRDD[K, V, U, T, R](sc, kafkaParams, offsetRanges, leaderMap, messageHandler)
+  }
+
+  /**
+   * This stream can guarantee that each message from Kafka is included in transformations
+   * (as opposed to output actions) exactly once, even in most failure situations.
+   *
+   * Points to note:
+   *
+   * Failure Recovery - You must checkpoint this stream, or save offsets yourself and provide them
+   * as the fromOffsets parameter on restart.
+   * Kafka must have sufficient log retention to obtain messages after failure.
+   *
+   * Getting offsets from the stream - see programming guide
+   *
+.  * Zookeeper - This does not use Zookeeper to store offsets.  For interop with Kafka monitors
+   * that depend on Zookeeper, you must store offsets in ZK yourself.
+   *
+   * End-to-end semantics - This does not guarantee that any output operation will push each record
+   * exactly once. To ensure end-to-end exactly-once semantics (that is, receiving exactly once and
+   * outputting exactly once), you have to either ensure that the output operation is
+   * idempotent, or transactionally store offsets with the output. See the programming guide for
+   * more details.
+   *
+   * @param ssc StreamingContext object
+   * @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
+   * configuration parameters</a>.
+   *   Requires "metadata.broker.list" or "bootstrap.servers" to be set with Kafka broker(s),
+   *   NOT zookeeper servers, specified in host1:port1,host2:port2 form.
+   * @param messageHandler function for translating each message into the desired type
+   * @param fromOffsets per-topic/partition Kafka offsets defining the (inclusive)
+   *  starting point of the stream
+   */
+  @Experimental
+  def createDirectStream[
+    K: ClassTag,
+    V: ClassTag,
+    U <: Decoder[_]: ClassTag,
+    T <: Decoder[_]: ClassTag,
+    R: ClassTag] (
+      ssc: StreamingContext,
+      kafkaParams: Map[String, String],
+      fromOffsets: Map[TopicAndPartition, Long],
+      messageHandler: MessageAndMetadata[K, V] => R
+  ): InputDStream[R] = {
+    new DirectKafkaInputDStream[K, V, U, T, R](
+      ssc, kafkaParams, fromOffsets, messageHandler)
+  }
+
+  /**
+   * This stream can guarantee that each message from Kafka is included in transformations
+   * (as opposed to output actions) exactly once, even in most failure situations.
+   *
+   * Points to note:
+   *
+   * Failure Recovery - You must checkpoint this stream.
+   * Kafka must have sufficient log retention to obtain messages after failure.
+   *
+   * Getting offsets from the stream - see programming guide
+   *
+.  * Zookeeper - This does not use Zookeeper to store offsets.  For interop with Kafka monitors
+   * that depend on Zookeeper, you must store offsets in ZK yourself.
+   *
+   * End-to-end semantics - This does not guarantee that any output operation will push each record
+   * exactly once. To ensure end-to-end exactly-once semantics (that is, receiving exactly once and
+   * outputting exactly once), you have to ensure that the output operation is idempotent.
+   *
+   * @param ssc StreamingContext object
+   * @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
+   * configuration parameters</a>.
+   *   Requires "metadata.broker.list" or "bootstrap.servers" to be set with Kafka broker(s),
+   *   NOT zookeeper servers, specified in host1:port1,host2:port2 form.
+   *   If starting without a checkpoint, "auto.offset.reset" may be set to "largest" or "smallest"
+   *   to determine where the stream starts (defaults to "largest")
+   * @param topics names of the topics to consume
+   */
+  @Experimental
+  def createDirectStream[
+    K: ClassTag,
+    V: ClassTag,
+    U <: Decoder[_]: ClassTag,
+    T <: Decoder[_]: ClassTag] (
+      ssc: StreamingContext,
+      kafkaParams: Map[String, String],
+      topics: Set[String]
+  ): InputDStream[(K, V)] = {
+    val messageHandler = (mmd: MessageAndMetadata[K, V]) => (mmd.key, mmd.message)
+    val kc = new KafkaCluster(kafkaParams)
+    val reset = kafkaParams.get("auto.offset.reset").map(_.toLowerCase)
+
+    (for {
+      topicPartitions <- kc.getPartitions(topics).right
+      leaderOffsets <- (if (reset == Some("smallest")) {
+        kc.getEarliestLeaderOffsets(topicPartitions)
+      } else {
+        kc.getLatestLeaderOffsets(topicPartitions)
+      }).right
+    } yield {
+      val fromOffsets = leaderOffsets.map { case (tp, lo) =>
+          (tp, lo.offset)
+      }
+      new DirectKafkaInputDStream[K, V, U, T, (K, V)](
+        ssc, kafkaParams, fromOffsets, messageHandler)
+    }).fold(
+      errs => throw new SparkException(errs.mkString("\n")),
+      ok => ok
+    )
+  }
 }
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/Leader.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/Leader.scala
new file mode 100644
index 0000000000000..3454d92e72b47
--- /dev/null
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/Leader.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka
+
+import kafka.common.TopicAndPartition
+
+/** Host info for the leader of a Kafka TopicAndPartition */
+final class Leader private(
+    /** kafka topic name */
+    val topic: String,
+    /** kafka partition id */
+    val partition: Int,
+    /** kafka hostname */
+    val host: String,
+    /** kafka host's port */
+    val port: Int) extends Serializable
+
+object Leader {
+  def create(topic: String, partition: Int, host: String, port: Int): Leader =
+    new Leader(topic, partition, host, port)
+
+  def create(topicAndPartition: TopicAndPartition, host: String, port: Int): Leader =
+    new Leader(topicAndPartition.topic, topicAndPartition.partition, host, port)
+
+  def apply(topic: String, partition: Int, host: String, port: Int): Leader =
+    new Leader(topic, partition, host, port)
+
+  def apply(topicAndPartition: TopicAndPartition, host: String, port: Int): Leader =
+    new Leader(topicAndPartition.topic, topicAndPartition.partition, host, port)
+
+}
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/OffsetRange.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/OffsetRange.scala
new file mode 100644
index 0000000000000..334c12e4627b4
--- /dev/null
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/OffsetRange.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka
+
+import kafka.common.TopicAndPartition
+
+/** Something that has a collection of OffsetRanges */
+trait HasOffsetRanges {
+  def offsetRanges: Array[OffsetRange]
+}
+
+/** Represents a range of offsets from a single Kafka TopicAndPartition */
+final class OffsetRange private(
+    /** kafka topic name */
+    val topic: String,
+    /** kafka partition id */
+    val partition: Int,
+    /** inclusive starting offset */
+    val fromOffset: Long,
+    /** exclusive ending offset */
+    val untilOffset: Long) extends Serializable {
+  import OffsetRange.OffsetRangeTuple
+
+  /** this is to avoid ClassNotFoundException during checkpoint restore */
+  private[streaming]
+  def toTuple: OffsetRangeTuple = (topic, partition, fromOffset, untilOffset)
+}
+
+object OffsetRange {
+  def create(topic: String, partition: Int, fromOffset: Long, untilOffset: Long): OffsetRange =
+    new OffsetRange(topic, partition, fromOffset, untilOffset)
+
+  def create(
+      topicAndPartition: TopicAndPartition,
+      fromOffset: Long,
+      untilOffset: Long): OffsetRange =
+    new OffsetRange(topicAndPartition.topic, topicAndPartition.partition, fromOffset, untilOffset)
+
+  def apply(topic: String, partition: Int, fromOffset: Long, untilOffset: Long): OffsetRange =
+    new OffsetRange(topic, partition, fromOffset, untilOffset)
+
+  def apply(
+      topicAndPartition: TopicAndPartition,
+      fromOffset: Long,
+      untilOffset: Long): OffsetRange =
+    new OffsetRange(topicAndPartition.topic, topicAndPartition.partition, fromOffset, untilOffset)
+
+  /** this is to avoid ClassNotFoundException during checkpoint restore */
+  private[spark]
+  type OffsetRangeTuple = (String, Int, Long, Long)
+
+  private[streaming]
+  def apply(t: OffsetRangeTuple) =
+    new OffsetRange(t._1, t._2, t._3, t._4)
+}
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala
new file mode 100644
index 0000000000000..c4a44c1822c39
--- /dev/null
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala
@@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka
+
+import java.util.Properties
+import java.util.concurrent.{ThreadPoolExecutor, ConcurrentHashMap}
+
+import scala.collection.{Map, mutable}
+import scala.reflect.{ClassTag, classTag}
+
+import kafka.common.TopicAndPartition
+import kafka.consumer.{Consumer, ConsumerConfig, ConsumerConnector, KafkaStream}
+import kafka.message.MessageAndMetadata
+import kafka.serializer.Decoder
+import kafka.utils.{VerifiableProperties, ZKGroupTopicDirs, ZKStringSerializer, ZkUtils}
+import org.I0Itec.zkclient.ZkClient
+
+import org.apache.spark.{Logging, SparkEnv}
+import org.apache.spark.storage.{StorageLevel, StreamBlockId}
+import org.apache.spark.streaming.receiver.{BlockGenerator, BlockGeneratorListener, Receiver}
+import org.apache.spark.util.Utils
+
+/**
+ * ReliableKafkaReceiver offers the ability to reliably store data into BlockManager without loss.
+ * It is turned off by default and will be enabled when
+ * spark.streaming.receiver.writeAheadLog.enable is true. The difference compared to KafkaReceiver
+ * is that this receiver manages topic-partition/offset itself and updates the offset information
+ * after data is reliably stored as write-ahead log. Offsets will only be updated when data is
+ * reliably stored, so the potential data loss problem of KafkaReceiver can be eliminated.
+ *
+ * Note: ReliableKafkaReceiver will set auto.commit.enable to false to turn off automatic offset
+ * commit mechanism in Kafka consumer. So setting this configuration manually within kafkaParams
+ * will not take effect.
+ */
+private[streaming]
+class ReliableKafkaReceiver[
+  K: ClassTag,
+  V: ClassTag,
+  U <: Decoder[_]: ClassTag,
+  T <: Decoder[_]: ClassTag](
+    kafkaParams: Map[String, String],
+    topics: Map[String, Int],
+    storageLevel: StorageLevel)
+    extends Receiver[(K, V)](storageLevel) with Logging {
+
+  private val groupId = kafkaParams("group.id")
+  private val AUTO_OFFSET_COMMIT = "auto.commit.enable"
+  private def conf = SparkEnv.get.conf
+
+  /** High level consumer to connect to Kafka. */
+  private var consumerConnector: ConsumerConnector = null
+
+  /** zkClient to connect to Zookeeper to commit the offsets. */
+  private var zkClient: ZkClient = null
+
+  /**
+   * A HashMap to manage the offset for each topic/partition, this HashMap is called in
+   * synchronized block, so mutable HashMap will not meet concurrency issue.
+   */
+  private var topicPartitionOffsetMap: mutable.HashMap[TopicAndPartition, Long] = null
+
+  /** A concurrent HashMap to store the stream block id and related offset snapshot. */
+  private var blockOffsetMap: ConcurrentHashMap[StreamBlockId, Map[TopicAndPartition, Long]] = null
+
+  /**
+   * Manage the BlockGenerator in receiver itself for better managing block store and offset
+   * commit.
+   */
+  private var blockGenerator: BlockGenerator = null
+
+  /** Thread pool running the handlers for receiving message from multiple topics and partitions. */
+  private var messageHandlerThreadPool: ThreadPoolExecutor = null
+
+  override def onStart(): Unit = {
+    logInfo(s"Starting Kafka Consumer Stream with group: $groupId")
+
+    // Initialize the topic-partition / offset hash map.
+    topicPartitionOffsetMap = new mutable.HashMap[TopicAndPartition, Long]
+
+    // Initialize the stream block id / offset snapshot hash map.
+    blockOffsetMap = new ConcurrentHashMap[StreamBlockId, Map[TopicAndPartition, Long]]()
+
+    // Initialize the block generator for storing Kafka message.
+    blockGenerator = new BlockGenerator(new GeneratedBlockHandler, streamId, conf)
+
+    if (kafkaParams.contains(AUTO_OFFSET_COMMIT) && kafkaParams(AUTO_OFFSET_COMMIT) == "true") {
+      logWarning(s"$AUTO_OFFSET_COMMIT should be set to false in ReliableKafkaReceiver, " +
+        "otherwise we will manually set it to false to turn off auto offset commit in Kafka")
+    }
+
+    val props = new Properties()
+    kafkaParams.foreach(param => props.put(param._1, param._2))
+    // Manually set "auto.commit.enable" to "false" no matter user explicitly set it to true,
+    // we have to make sure this property is set to false to turn off auto commit mechanism in
+    // Kafka.
+    props.setProperty(AUTO_OFFSET_COMMIT, "false")
+
+    val consumerConfig = new ConsumerConfig(props)
+
+    assert(!consumerConfig.autoCommitEnable)
+
+    logInfo(s"Connecting to Zookeeper: ${consumerConfig.zkConnect}")
+    consumerConnector = Consumer.create(consumerConfig)
+    logInfo(s"Connected to Zookeeper: ${consumerConfig.zkConnect}")
+
+    zkClient = new ZkClient(consumerConfig.zkConnect, consumerConfig.zkSessionTimeoutMs,
+      consumerConfig.zkConnectionTimeoutMs, ZKStringSerializer)
+
+    messageHandlerThreadPool = Utils.newDaemonFixedThreadPool(
+      topics.values.sum, "KafkaMessageHandler")
+
+    blockGenerator.start()
+
+    val keyDecoder = classTag[U].runtimeClass.getConstructor(classOf[VerifiableProperties])
+      .newInstance(consumerConfig.props)
+      .asInstanceOf[Decoder[K]]
+
+    val valueDecoder = classTag[T].runtimeClass.getConstructor(classOf[VerifiableProperties])
+      .newInstance(consumerConfig.props)
+      .asInstanceOf[Decoder[V]]
+
+    val topicMessageStreams = consumerConnector.createMessageStreams(
+      topics, keyDecoder, valueDecoder)
+
+    topicMessageStreams.values.foreach { streams =>
+      streams.foreach { stream =>
+        messageHandlerThreadPool.submit(new MessageHandler(stream))
+      }
+    }
+  }
+
+  override def onStop(): Unit = {
+    if (messageHandlerThreadPool != null) {
+      messageHandlerThreadPool.shutdown()
+      messageHandlerThreadPool = null
+    }
+
+    if (consumerConnector != null) {
+      consumerConnector.shutdown()
+      consumerConnector = null
+    }
+
+    if (zkClient != null) {
+      zkClient.close()
+      zkClient = null
+    }
+
+    if (blockGenerator != null) {
+      blockGenerator.stop()
+      blockGenerator = null
+    }
+
+    if (topicPartitionOffsetMap != null) {
+      topicPartitionOffsetMap.clear()
+      topicPartitionOffsetMap = null
+    }
+
+    if (blockOffsetMap != null) {
+      blockOffsetMap.clear()
+      blockOffsetMap = null
+    }
+  }
+
+  /** Store a Kafka message and the associated metadata as a tuple. */
+  private def storeMessageAndMetadata(
+      msgAndMetadata: MessageAndMetadata[K, V]): Unit = {
+    val topicAndPartition = TopicAndPartition(msgAndMetadata.topic, msgAndMetadata.partition)
+    val data = (msgAndMetadata.key, msgAndMetadata.message)
+    val metadata = (topicAndPartition, msgAndMetadata.offset)
+    blockGenerator.addDataWithCallback(data, metadata)
+  }
+
+  /** Update stored offset */
+  private def updateOffset(topicAndPartition: TopicAndPartition, offset: Long): Unit = {
+    topicPartitionOffsetMap.put(topicAndPartition, offset)
+  }
+
+  /**
+   * Remember the current offsets for each topic and partition. This is called when a block is
+   * generated.
+   */
+  private def rememberBlockOffsets(blockId: StreamBlockId): Unit = {
+    // Get a snapshot of current offset map and store with related block id.
+    val offsetSnapshot = topicPartitionOffsetMap.toMap
+    blockOffsetMap.put(blockId, offsetSnapshot)
+    topicPartitionOffsetMap.clear()
+  }
+
+  /**
+   * Store the ready-to-be-stored block and commit the related offsets to zookeeper. This method
+   * will try a fixed number of times to push the block. If the push fails, the receiver is stopped.
+   */
+  private def storeBlockAndCommitOffset(
+      blockId: StreamBlockId, arrayBuffer: mutable.ArrayBuffer[_]): Unit = {
+    var count = 0
+    var pushed = false
+    var exception: Exception = null
+    while (!pushed && count <= 3) {
+      try {
+        store(arrayBuffer.asInstanceOf[mutable.ArrayBuffer[(K, V)]])
+        pushed = true
+      } catch {
+        case ex: Exception =>
+          count += 1
+          exception = ex
+      }
+    }
+    if (pushed) {
+      Option(blockOffsetMap.get(blockId)).foreach(commitOffset)
+      blockOffsetMap.remove(blockId)
+    } else {
+      stop("Error while storing block into Spark", exception)
+    }
+  }
+
+  /**
+   * Commit the offset of Kafka's topic/partition, the commit mechanism follow Kafka 0.8.x's
+   * metadata schema in Zookeeper.
+   */
+  private def commitOffset(offsetMap: Map[TopicAndPartition, Long]): Unit = {
+    if (zkClient == null) {
+      val thrown = new IllegalStateException("Zookeeper client is unexpectedly null")
+      stop("Zookeeper client is not initialized before commit offsets to ZK", thrown)
+      return
+    }
+
+    for ((topicAndPart, offset) <- offsetMap) {
+      try {
+        val topicDirs = new ZKGroupTopicDirs(groupId, topicAndPart.topic)
+        val zkPath = s"${topicDirs.consumerOffsetDir}/${topicAndPart.partition}"
+
+        ZkUtils.updatePersistentPath(zkClient, zkPath, offset.toString)
+      } catch {
+        case e: Exception =>
+          logWarning(s"Exception during commit offset $offset for topic" +
+            s"${topicAndPart.topic}, partition ${topicAndPart.partition}", e)
+      }
+
+      logInfo(s"Committed offset $offset for topic ${topicAndPart.topic}, " +
+        s"partition ${topicAndPart.partition}")
+    }
+  }
+
+  /** Class to handle received Kafka message. */
+  private final class MessageHandler(stream: KafkaStream[K, V]) extends Runnable {
+    override def run(): Unit = {
+      while (!isStopped) {
+        try {
+          val streamIterator = stream.iterator()
+          while (streamIterator.hasNext) {
+            storeMessageAndMetadata(streamIterator.next)
+          }
+        } catch {
+          case e: Exception =>
+            logError("Error handling message", e)
+        }
+      }
+    }
+  }
+
+  /** Class to handle blocks generated by the block generator. */
+  private final class GeneratedBlockHandler extends BlockGeneratorListener {
+
+    def onAddData(data: Any, metadata: Any): Unit = {
+      // Update the offset of the data that was added to the generator
+      if (metadata != null) {
+        val (topicAndPartition, offset) = metadata.asInstanceOf[(TopicAndPartition, Long)]
+        updateOffset(topicAndPartition, offset)
+      }
+    }
+
+    def onGenerateBlock(blockId: StreamBlockId): Unit = {
+      // Remember the offsets of topics/partitions when a block has been generated
+      rememberBlockOffsets(blockId)
+    }
+
+    def onPushBlock(blockId: StreamBlockId, arrayBuffer: mutable.ArrayBuffer[_]): Unit = {
+      // Store block and commit the blocks offset
+      storeBlockAndCommitOffset(blockId, arrayBuffer)
+    }
+
+    def onError(message: String, throwable: Throwable): Unit = {
+      reportError(message, throwable)
+    }
+  }
+}
diff --git a/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java b/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java
index efb0099c7c850..6e1abf3f385ee 100644
--- a/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java
+++ b/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java
@@ -20,7 +20,10 @@
 import java.io.Serializable;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Random;
 
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.Duration;
 import scala.Predef;
 import scala.Tuple2;
 import scala.collection.JavaConverters;
@@ -32,8 +35,6 @@
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.function.Function;
 import org.apache.spark.storage.StorageLevel;
-import org.apache.spark.streaming.Duration;
-import org.apache.spark.streaming.LocalJavaStreamingContext;
 import org.apache.spark.streaming.api.java.JavaDStream;
 import org.apache.spark.streaming.api.java.JavaPairDStream;
 import org.apache.spark.streaming.api.java.JavaStreamingContext;
@@ -42,25 +43,27 @@
 import org.junit.After;
 import org.junit.Before;
 
-public class JavaKafkaStreamSuite extends LocalJavaStreamingContext implements Serializable {
-  private transient KafkaStreamSuite testSuite = new KafkaStreamSuite();
+public class JavaKafkaStreamSuite implements Serializable {
+  private transient JavaStreamingContext ssc = null;
+  private transient Random random = new Random();
+  private transient KafkaStreamSuiteBase suiteBase = null;
 
   @Before
-  @Override
   public void setUp() {
-    testSuite.beforeFunction();
+    suiteBase = new KafkaStreamSuiteBase() { };
+    suiteBase.setupKafka();
     System.clearProperty("spark.driver.port");
-    //System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock");
-    ssc = new JavaStreamingContext("local[2]", "test", new Duration(1000));
+    SparkConf sparkConf = new SparkConf()
+      .setMaster("local[4]").setAppName(this.getClass().getSimpleName());
+    ssc = new JavaStreamingContext(sparkConf, new Duration(500));
   }
 
   @After
-  @Override
   public void tearDown() {
     ssc.stop();
     ssc = null;
     System.clearProperty("spark.driver.port");
-    testSuite.afterFunction();
+    suiteBase.tearDownKafka();
   }
 
   @Test
@@ -74,15 +77,15 @@ public void testKafkaStream() throws InterruptedException {
     sent.put("b", 3);
     sent.put("c", 10);
 
-    testSuite.createTopic(topic);
+    suiteBase.createTopic(topic);
     HashMap<String, Object> tmp = new HashMap<String, Object>(sent);
-    testSuite.produceAndSendMessage(topic,
-      JavaConverters.mapAsScalaMapConverter(tmp).asScala().toMap(
-        Predef.<Tuple2<String, Object>>conforms()));
+    suiteBase.produceAndSendMessage(topic,
+        JavaConverters.mapAsScalaMapConverter(tmp).asScala().toMap(
+            Predef.<Tuple2<String, Object>>conforms()));
 
     HashMap<String, String> kafkaParams = new HashMap<String, String>();
-    kafkaParams.put("zookeeper.connect", testSuite.zkHost() + ":" + testSuite.zkPort());
-    kafkaParams.put("group.id", "test-consumer-" + KafkaTestUtils.random().nextInt(10000));
+    kafkaParams.put("zookeeper.connect", suiteBase.zkAddress());
+    kafkaParams.put("group.id", "test-consumer-" + random.nextInt(10000));
     kafkaParams.put("auto.offset.reset", "smallest");
 
     JavaPairDStream<String, String> stream = KafkaUtils.createStream(ssc,
@@ -124,11 +127,16 @@ public Void call(JavaPairRDD<String, Long> rdd) throws Exception {
     );
 
     ssc.start();
-    ssc.awaitTermination(3000);
-
+    long startTime = System.currentTimeMillis();
+    boolean sizeMatches = false;
+    while (!sizeMatches && System.currentTimeMillis() - startTime < 20000) {
+      sizeMatches = sent.size() == result.size();
+      Thread.sleep(200);
+    }
     Assert.assertEquals(sent.size(), result.size());
     for (String k : sent.keySet()) {
       Assert.assertEquals(sent.get(k).intValue(), result.get(k).intValue());
     }
+    ssc.stop();
   }
 }
diff --git a/external/kafka/src/test/resources/log4j.properties b/external/kafka/src/test/resources/log4j.properties
index 4411d6e20c52a..9697237bfa1a3 100644
--- a/external/kafka/src/test/resources/log4j.properties
+++ b/external/kafka/src/test/resources/log4j.properties
@@ -15,11 +15,10 @@
 # limitations under the License.
 #
 
-# Set everything to be logged to the file streaming/target/unit-tests.log
+# Set everything to be logged to the file target/unit-tests.log
 log4j.rootCategory=INFO, file
-# log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.append=false
+log4j.appender.file.append=true
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaClusterSuite.scala b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaClusterSuite.scala
new file mode 100644
index 0000000000000..e57c8f6987fdc
--- /dev/null
+++ b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaClusterSuite.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka
+
+import scala.util.Random
+
+import org.scalatest.BeforeAndAfter
+import kafka.common.TopicAndPartition
+
+class KafkaClusterSuite extends KafkaStreamSuiteBase with BeforeAndAfter {
+  val brokerHost = "localhost"
+
+  val kafkaParams = Map("metadata.broker.list" -> s"$brokerHost:$brokerPort")
+
+  val kc = new KafkaCluster(kafkaParams)
+
+  val topic = "kcsuitetopic" + Random.nextInt(10000)
+
+  val topicAndPartition = TopicAndPartition(topic, 0)
+
+  before {
+    setupKafka()
+    createTopic(topic)
+    produceAndSendMessage(topic, Map("a" -> 1))
+  }
+
+  after {
+    tearDownKafka()
+  }
+
+  test("metadata apis") {
+    val leader = kc.findLeaders(Set(topicAndPartition)).right.get
+    assert(leader(topicAndPartition) === (brokerHost, brokerPort), "didn't get leader")
+
+    val parts = kc.getPartitions(Set(topic)).right.get
+    assert(parts(topicAndPartition), "didn't get partitions")
+  }
+
+  test("leader offset apis") {
+    val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get
+    assert(earliest(topicAndPartition).offset === 0, "didn't get earliest")
+
+    val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get
+    assert(latest(topicAndPartition).offset === 1, "didn't get latest")
+  }
+
+  test("consumer offset apis") {
+    val group = "kcsuitegroup" + Random.nextInt(10000)
+
+    val offset = Random.nextInt(10000)
+
+    val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset))
+    assert(set.isRight, "didn't set consumer offsets")
+
+    val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get
+    assert(get(topicAndPartition) === offset, "didn't get consumer offsets")
+  }
+}
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaDirectStreamSuite.scala b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaDirectStreamSuite.scala
new file mode 100644
index 0000000000000..0891ce344f16a
--- /dev/null
+++ b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaDirectStreamSuite.scala
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka
+
+import scala.util.Random
+import scala.concurrent.duration._
+
+import org.scalatest.BeforeAndAfter
+import org.scalatest.concurrent.Eventually
+
+import kafka.serializer.StringDecoder
+
+import org.apache.spark.SparkConf
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Milliseconds, StreamingContext}
+
+class KafkaDirectStreamSuite extends KafkaStreamSuiteBase with BeforeAndAfter with Eventually {
+  val sparkConf = new SparkConf()
+    .setMaster("local[4]")
+    .setAppName(this.getClass.getSimpleName)
+
+  val brokerHost = "localhost"
+
+  val kafkaParams = Map(
+    "metadata.broker.list" -> s"$brokerHost:$brokerPort",
+    "auto.offset.reset" -> "smallest"
+  )
+
+  var ssc: StreamingContext = _
+
+  before {
+    setupKafka()
+
+    ssc = new StreamingContext(sparkConf, Milliseconds(500))
+  }
+
+  after {
+    if (ssc != null) {
+      ssc.stop()
+    }
+    tearDownKafka()
+  }
+
+  test("multi topic stream") {
+    val topics = Set("newA", "newB")
+    val data = Map("a" -> 7, "b" -> 9)
+    topics.foreach { t =>
+      createTopic(t)
+      produceAndSendMessage(t, data)
+    }
+    val stream = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](
+      ssc, kafkaParams, topics)
+    var total = 0L;
+
+    stream.foreachRDD { rdd =>
+      val offsets = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
+      val collected = rdd.mapPartitionsWithIndex { (i, iter) =>
+        val off = offsets(i)
+        val all = iter.toSeq
+        val partSize = all.size
+        val rangeSize = off.untilOffset - off.fromOffset
+        all.map { _ =>
+          (partSize, rangeSize)
+        }.toIterator
+      }.collect
+      collected.foreach { case (partSize, rangeSize) =>
+          assert(partSize === rangeSize, "offset ranges are wrong")
+      }
+      total += collected.size
+    }
+    ssc.start()
+    eventually(timeout(20000.milliseconds), interval(200.milliseconds)) {
+      assert(total === data.values.sum * topics.size, "didn't get all messages")
+    }
+    ssc.stop()
+  }
+}
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaRDDSuite.scala b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaRDDSuite.scala
new file mode 100644
index 0000000000000..9b9e3f5fce8bd
--- /dev/null
+++ b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaRDDSuite.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka
+
+import scala.util.Random
+
+import kafka.serializer.StringDecoder
+import kafka.common.TopicAndPartition
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark._
+import org.apache.spark.SparkContext._
+
+class KafkaRDDSuite extends KafkaStreamSuiteBase with BeforeAndAfter {
+  var sc: SparkContext = _
+  before {
+    setupKafka()
+  }
+
+  after {
+    if (sc != null) {
+      sc.stop
+      sc = null
+    }
+    tearDownKafka()
+  }
+
+  test("Kafka RDD") {
+    val sparkConf = new SparkConf().setMaster("local[4]").setAppName(this.getClass.getSimpleName)
+    sc = new SparkContext(sparkConf)
+    val topic = "topic1"
+    val sent = Map("a" -> 5, "b" -> 3, "c" -> 10)
+    createTopic(topic)
+    produceAndSendMessage(topic, sent)
+
+    val kafkaParams = Map("metadata.broker.list" -> s"localhost:$brokerPort",
+      "group.id" -> s"test-consumer-${Random.nextInt(10000)}")
+
+    val kc = new KafkaCluster(kafkaParams)
+
+    val rdd = getRdd(kc, Set(topic))
+    // this is the "lots of messages" case
+    // make sure we get all of them
+    assert(rdd.isDefined)
+    assert(rdd.get.count === sent.values.sum)
+
+    kc.setConsumerOffsets(
+      kafkaParams("group.id"),
+      rdd.get.offsetRanges.map(o => TopicAndPartition(o.topic, o.partition) -> o.untilOffset).toMap)
+
+    val rdd2 = getRdd(kc, Set(topic))
+    val sent2 = Map("d" -> 1)
+    produceAndSendMessage(topic, sent2)
+    // this is the "0 messages" case
+    // make sure we dont get anything, since messages were sent after rdd was defined
+    assert(rdd2.isDefined)
+    assert(rdd2.get.count === 0)
+
+    val rdd3 = getRdd(kc, Set(topic))
+    produceAndSendMessage(topic, Map("extra" -> 22))
+    // this is the "exactly 1 message" case
+    // make sure we get exactly one message, despite there being lots more available
+    assert(rdd3.isDefined)
+    assert(rdd3.get.count === sent2.values.sum)
+
+  }
+
+  // get an rdd from the committed consumer offsets until the latest leader offsets,
+  private def getRdd(kc: KafkaCluster, topics: Set[String]) = {
+    val groupId = kc.kafkaParams("group.id")
+    for {
+      topicPartitions <- kc.getPartitions(topics).right.toOption
+      from <- kc.getConsumerOffsets(groupId, topicPartitions).right.toOption.orElse(
+        kc.getEarliestLeaderOffsets(topicPartitions).right.toOption.map { offs =>
+          offs.map(kv => kv._1 -> kv._2.offset)
+        }
+      )
+      until <- kc.getLatestLeaderOffsets(topicPartitions).right.toOption
+    } yield {
+      KafkaRDD[String, String, StringDecoder, StringDecoder, String](
+        sc, kc.kafkaParams, from, until, mmd => s"${mmd.offset} ${mmd.message}")
+    }
+  }
+}
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala
index 6943326eb750e..f207dc6d4fa04 100644
--- a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala
+++ b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala
@@ -19,51 +19,57 @@ package org.apache.spark.streaming.kafka
 
 import java.io.File
 import java.net.InetSocketAddress
-import java.util.{Properties, Random}
+import java.util.Properties
 
 import scala.collection.mutable
+import scala.concurrent.duration._
+import scala.language.postfixOps
+import scala.util.Random
 
-import kafka.admin.CreateTopicCommand
+import kafka.admin.AdminUtils
 import kafka.common.{KafkaException, TopicAndPartition}
-import kafka.producer.{KeyedMessage, ProducerConfig, Producer}
-import kafka.utils.ZKStringSerializer
+import kafka.producer.{KeyedMessage, Producer, ProducerConfig}
 import kafka.serializer.{StringDecoder, StringEncoder}
 import kafka.server.{KafkaConfig, KafkaServer}
-
+import kafka.utils.ZKStringSerializer
 import org.I0Itec.zkclient.ZkClient
+import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}
+import org.scalatest.{BeforeAndAfter, FunSuite}
+import org.scalatest.concurrent.Eventually
 
-import org.apache.zookeeper.server.ZooKeeperServer
-import org.apache.zookeeper.server.NIOServerCnxnFactory
-
-import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
+import org.apache.spark.{Logging, SparkConf}
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Milliseconds, StreamingContext}
 import org.apache.spark.util.Utils
 
-class KafkaStreamSuite extends TestSuiteBase {
-  import KafkaTestUtils._
+/**
+ * This is an abstract base class for Kafka testsuites. This has the functionality to set up
+ * and tear down local Kafka servers, and to push data using Kafka producers.
+ */
+abstract class KafkaStreamSuiteBase extends FunSuite with Eventually with Logging {
 
-  val zkHost = "localhost"
-  var zkPort: Int = 0
-  val zkConnectionTimeout = 6000
-  val zkSessionTimeout = 6000
+  var zkAddress: String = _
+  var zkClient: ZkClient = _
 
+  private val zkHost = "localhost"
+  private val zkConnectionTimeout = 6000
+  private val zkSessionTimeout = 6000
+  private var zookeeper: EmbeddedZookeeper = _
+  private var zkPort: Int = 0
   protected var brokerPort = 9092
-  protected var brokerConf: KafkaConfig = _
-  protected var zookeeper: EmbeddedZookeeper = _
-  protected var zkClient: ZkClient = _
-  protected var server: KafkaServer = _
-  protected var producer: Producer[String, String] = _
-
-  override def useManualClock = false
+  private var brokerConf: KafkaConfig = _
+  private var server: KafkaServer = _
+  private var producer: Producer[String, String] = _
 
-  override def beforeFunction() {
+  def setupKafka() {
     // Zookeeper server startup
     zookeeper = new EmbeddedZookeeper(s"$zkHost:$zkPort")
     // Get the actual zookeeper binding port
     zkPort = zookeeper.actualPort
+    zkAddress = s"$zkHost:$zkPort"
     logInfo("==================== 0 ====================")
 
-    zkClient = new ZkClient(s"$zkHost:$zkPort", zkSessionTimeout, zkConnectionTimeout,
+    zkClient = new ZkClient(zkAddress, zkSessionTimeout, zkConnectionTimeout,
       ZKStringSerializer)
     logInfo("==================== 1 ====================")
 
@@ -71,7 +77,7 @@ class KafkaStreamSuite extends TestSuiteBase {
     var bindSuccess: Boolean = false
     while(!bindSuccess) {
       try {
-        val brokerProps = getBrokerConfig(brokerPort, s"$zkHost:$zkPort")
+        val brokerProps = getBrokerConfig()
         brokerConf = new KafkaConfig(brokerProps)
         server = new KafkaServer(brokerConf)
         logInfo("==================== 2 ====================")
@@ -89,53 +95,30 @@ class KafkaStreamSuite extends TestSuiteBase {
 
     Thread.sleep(2000)
     logInfo("==================== 4 ====================")
-    super.beforeFunction()
-  }
-
-  override def afterFunction() {
-    producer.close()
-    server.shutdown()
-    brokerConf.logDirs.foreach { f => Utils.deleteRecursively(new File(f)) }
-
-    zkClient.close()
-    zookeeper.shutdown()
-
-    super.afterFunction()
   }
 
-  test("Kafka input stream") {
-    val ssc = new StreamingContext(master, framework, batchDuration)
-    val topic = "topic1"
-    val sent = Map("a" -> 5, "b" -> 3, "c" -> 10)
-    createTopic(topic)
-    produceAndSendMessage(topic, sent)
+  def tearDownKafka() {
+    if (producer != null) {
+      producer.close()
+      producer = null
+    }
 
-    val kafkaParams = Map("zookeeper.connect" -> s"$zkHost:$zkPort",
-      "group.id" -> s"test-consumer-${random.nextInt(10000)}",
-      "auto.offset.reset" -> "smallest")
+    if (server != null) {
+      server.shutdown()
+      server = null
+    }
 
-    val stream = KafkaUtils.createStream[String, String, StringDecoder, StringDecoder](
-      ssc,
-      kafkaParams,
-      Map(topic -> 1),
-      StorageLevel.MEMORY_ONLY)
-    val result = new mutable.HashMap[String, Long]()
-    stream.map { case (k, v) => v }
-      .countByValue()
-      .foreachRDD { r =>
-        val ret = r.collect()
-        ret.toMap.foreach { kv =>
-          val count = result.getOrElseUpdate(kv._1, 0) + kv._2
-          result.put(kv._1, count)
-        }
-      }
-    ssc.start()
-    ssc.awaitTermination(3000)
+    brokerConf.logDirs.foreach { f => Utils.deleteRecursively(new File(f)) }
 
-    assert(sent.size === result.size)
-    sent.keys.foreach { k => assert(sent(k) === result(k).toInt) }
+    if (zkClient != null) {
+      zkClient.close()
+      zkClient = null
+    }
 
-    ssc.stop()
+    if (zookeeper != null) {
+      zookeeper.shutdown()
+      zookeeper = null
+    }
   }
 
   private def createTestMessage(topic: String, sent: Map[String, Int])
@@ -147,61 +130,46 @@ class KafkaStreamSuite extends TestSuiteBase {
   }
 
   def createTopic(topic: String) {
-    CreateTopicCommand.createTopic(zkClient, topic, 1, 1, "0")
+    AdminUtils.createTopic(zkClient, topic, 1, 1)
     logInfo("==================== 5 ====================")
     // wait until metadata is propagated
-    waitUntilMetadataIsPropagated(Seq(server), topic, 0, 1000)
+    waitUntilMetadataIsPropagated(topic, 0)
   }
 
   def produceAndSendMessage(topic: String, sent: Map[String, Int]) {
-    val brokerAddr = brokerConf.hostName + ":" + brokerConf.port
-    producer = new Producer[String, String](new ProducerConfig(getProducerConfig(brokerAddr)))
+    producer = new Producer[String, String](new ProducerConfig(getProducerConfig()))
     producer.send(createTestMessage(topic, sent): _*)
+    producer.close()
     logInfo("==================== 6 ====================")
   }
-}
 
-object KafkaTestUtils {
-  val random = new Random()
-
-  def getBrokerConfig(port: Int, zkConnect: String): Properties = {
+  private def getBrokerConfig(): Properties = {
     val props = new Properties()
     props.put("broker.id", "0")
     props.put("host.name", "localhost")
-    props.put("port", port.toString)
+    props.put("port", brokerPort.toString)
     props.put("log.dir", Utils.createTempDir().getAbsolutePath)
-    props.put("zookeeper.connect", zkConnect)
+    props.put("zookeeper.connect", zkAddress)
     props.put("log.flush.interval.messages", "1")
     props.put("replica.socket.timeout.ms", "1500")
     props
   }
 
-  def getProducerConfig(brokerList: String): Properties = {
+  private def getProducerConfig(): Properties = {
+    val brokerAddr = brokerConf.hostName + ":" + brokerConf.port
     val props = new Properties()
-    props.put("metadata.broker.list", brokerList)
+    props.put("metadata.broker.list", brokerAddr)
     props.put("serializer.class", classOf[StringEncoder].getName)
     props
   }
 
-  def waitUntilTrue(condition: () => Boolean, waitTime: Long): Boolean = {
-    val startTime = System.currentTimeMillis()
-    while (true) {
-      if (condition())
-        return true
-      if (System.currentTimeMillis() > startTime + waitTime)
-        return false
-      Thread.sleep(waitTime.min(100L))
+  private def waitUntilMetadataIsPropagated(topic: String, partition: Int) {
+    eventually(timeout(10000 milliseconds), interval(100 milliseconds)) {
+      assert(
+        server.apis.metadataCache.containsTopicAndPartition(topic, partition),
+        s"Partition [$topic, $partition] metadata not propagated after timeout"
+      )
     }
-    // Should never go to here
-    throw new RuntimeException("unexpected error")
-  }
-
-  def waitUntilMetadataIsPropagated(servers: Seq[KafkaServer], topic: String, partition: Int,
-      timeout: Long) {
-    assert(waitUntilTrue(() =>
-      servers.foldLeft(true)(_ && _.apis.leaderCache.keySet.contains(
-        TopicAndPartition(topic, partition))), timeout),
-      s"Partition [$topic, $partition] metadata not propagated after timeout")
   }
 
   class EmbeddedZookeeper(val zkConnect: String) {
@@ -227,3 +195,53 @@ object KafkaTestUtils {
     }
   }
 }
+
+
+class KafkaStreamSuite extends KafkaStreamSuiteBase with BeforeAndAfter {
+  var ssc: StreamingContext = _
+
+  before {
+    setupKafka()
+  }
+
+  after {
+    if (ssc != null) {
+      ssc.stop()
+      ssc = null
+    }
+    tearDownKafka()
+  }
+
+  test("Kafka input stream") {
+    val sparkConf = new SparkConf().setMaster("local[4]").setAppName(this.getClass.getSimpleName)
+    ssc = new StreamingContext(sparkConf, Milliseconds(500))
+    val topic = "topic1"
+    val sent = Map("a" -> 5, "b" -> 3, "c" -> 10)
+    createTopic(topic)
+    produceAndSendMessage(topic, sent)
+
+    val kafkaParams = Map("zookeeper.connect" -> zkAddress,
+      "group.id" -> s"test-consumer-${Random.nextInt(10000)}",
+      "auto.offset.reset" -> "smallest")
+
+    val stream = KafkaUtils.createStream[String, String, StringDecoder, StringDecoder](
+      ssc, kafkaParams, Map(topic -> 1), StorageLevel.MEMORY_ONLY)
+    val result = new mutable.HashMap[String, Long]()
+    stream.map(_._2).countByValue().foreachRDD { r =>
+      val ret = r.collect()
+      ret.toMap.foreach { kv =>
+        val count = result.getOrElseUpdate(kv._1, 0) + kv._2
+        result.put(kv._1, count)
+      }
+    }
+    ssc.start()
+    eventually(timeout(10000 milliseconds), interval(100 milliseconds)) {
+      assert(sent.size === result.size)
+      sent.keys.foreach { k =>
+        assert(sent(k) === result(k).toInt)
+      }
+    }
+    ssc.stop()
+  }
+}
+
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/ReliableKafkaStreamSuite.scala b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/ReliableKafkaStreamSuite.scala
new file mode 100644
index 0000000000000..64ccc92c81fa9
--- /dev/null
+++ b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/ReliableKafkaStreamSuite.scala
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka
+
+
+import java.io.File
+
+import scala.collection.mutable
+import scala.concurrent.duration._
+import scala.language.postfixOps
+import scala.util.Random
+
+import com.google.common.io.Files
+import kafka.serializer.StringDecoder
+import kafka.utils.{ZKGroupTopicDirs, ZkUtils}
+import org.apache.commons.io.FileUtils
+import org.scalatest.BeforeAndAfter
+import org.scalatest.concurrent.Eventually
+
+import org.apache.spark.SparkConf
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Milliseconds, StreamingContext}
+
+class ReliableKafkaStreamSuite extends KafkaStreamSuiteBase with BeforeAndAfter with Eventually {
+
+  val sparkConf = new SparkConf()
+    .setMaster("local[4]")
+    .setAppName(this.getClass.getSimpleName)
+    .set("spark.streaming.receiver.writeAheadLog.enable", "true")
+  val data = Map("a" -> 10, "b" -> 10, "c" -> 10)
+
+
+  var groupId: String = _
+  var kafkaParams: Map[String, String] = _
+  var ssc: StreamingContext = _
+  var tempDirectory: File = null
+
+  before {
+    setupKafka()
+    groupId = s"test-consumer-${Random.nextInt(10000)}"
+    kafkaParams = Map(
+      "zookeeper.connect" -> zkAddress,
+      "group.id" -> groupId,
+      "auto.offset.reset" -> "smallest"
+    )
+
+    ssc = new StreamingContext(sparkConf, Milliseconds(500))
+    tempDirectory = Files.createTempDir()
+    ssc.checkpoint(tempDirectory.getAbsolutePath)
+  }
+
+  after {
+    if (ssc != null) {
+      ssc.stop()
+    }
+    if (tempDirectory != null && tempDirectory.exists()) {
+      FileUtils.deleteDirectory(tempDirectory)
+      tempDirectory = null
+    }
+    tearDownKafka()
+  }
+
+
+  test("Reliable Kafka input stream with single topic") {
+    var topic = "test-topic"
+    createTopic(topic)
+    produceAndSendMessage(topic, data)
+
+    // Verify whether the offset of this group/topic/partition is 0 before starting.
+    assert(getCommitOffset(groupId, topic, 0) === None)
+
+    val stream = KafkaUtils.createStream[String, String, StringDecoder, StringDecoder](
+      ssc, kafkaParams, Map(topic -> 1), StorageLevel.MEMORY_ONLY)
+    val result = new mutable.HashMap[String, Long]()
+    stream.map { case (k, v) => v }.foreachRDD { r =>
+        val ret = r.collect()
+        ret.foreach { v =>
+          val count = result.getOrElseUpdate(v, 0) + 1
+          result.put(v, count)
+        }
+      }
+    ssc.start()
+    eventually(timeout(20000 milliseconds), interval(200 milliseconds)) {
+      // A basic process verification for ReliableKafkaReceiver.
+      // Verify whether received message number is equal to the sent message number.
+      assert(data.size === result.size)
+      // Verify whether each message is the same as the data to be verified.
+      data.keys.foreach { k => assert(data(k) === result(k).toInt) }
+      // Verify the offset number whether it is equal to the total message number.
+      assert(getCommitOffset(groupId, topic, 0) === Some(29L))
+    }
+    ssc.stop()
+  }
+
+  test("Reliable Kafka input stream with multiple topics") {
+    val topics = Map("topic1" -> 1, "topic2" -> 1, "topic3" -> 1)
+    topics.foreach { case (t, _) =>
+      createTopic(t)
+      produceAndSendMessage(t, data)
+    }
+
+    // Before started, verify all the group/topic/partition offsets are 0.
+    topics.foreach { case (t, _) => assert(getCommitOffset(groupId, t, 0) === None) }
+
+    // Consuming all the data sent to the broker which will potential commit the offsets internally.
+    val stream = KafkaUtils.createStream[String, String, StringDecoder, StringDecoder](
+      ssc, kafkaParams, topics, StorageLevel.MEMORY_ONLY)
+    stream.foreachRDD(_ => Unit)
+    ssc.start()
+    eventually(timeout(20000 milliseconds), interval(100 milliseconds)) {
+      // Verify the offset for each group/topic to see whether they are equal to the expected one.
+      topics.foreach { case (t, _) => assert(getCommitOffset(groupId, t, 0) === Some(29L)) }
+    }
+    ssc.stop()
+  }
+
+
+  /** Getting partition offset from Zookeeper. */
+  private def getCommitOffset(groupId: String, topic: String, partition: Int): Option[Long] = {
+    assert(zkClient != null, "Zookeeper client is not initialized")
+    val topicDirs = new ZKGroupTopicDirs(groupId, topic)
+    val zkPath = s"${topicDirs.consumerOffsetDir}/$partition"
+    ZkUtils.readDataMaybeNull(zkClient, zkPath)._1.map(_.toLong)
+  }
+}
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 371f1f1e9d39a..560c8b9d18276 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -39,28 +39,12 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
+      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.eclipse.paho</groupId>
-      <artifactId>mqtt-client</artifactId>
-       <version>0.4.0</version>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-zeromq_${scala.binary.version}</artifactId>
-      <version>${akka.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
+      <artifactId>org.eclipse.paho.client.mqttv3</artifactId>
+      <version>1.0.1</version>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
@@ -77,15 +61,15 @@
       <artifactId>junit-interface</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.activemq</groupId>
+      <artifactId>activemq-core</artifactId>
+      <version>5.7.0</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <plugins>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-      </plugin>
-    </plugins>
   </build>
 </project>
diff --git a/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala b/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
index 77661f71ada21..1ef91dd49284f 100644
--- a/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
+++ b/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
@@ -55,14 +55,14 @@ class MQTTInputDStream(
     brokerUrl: String,
     topic: String,
     storageLevel: StorageLevel
-  ) extends ReceiverInputDStream[String](ssc_) with Logging {
-  
+  ) extends ReceiverInputDStream[String](ssc_) {
+
   def getReceiver(): Receiver[String] = {
     new MQTTReceiver(brokerUrl, topic, storageLevel)
   }
 }
 
-private[streaming] 
+private[streaming]
 class MQTTReceiver(
     brokerUrl: String,
     topic: String,
@@ -72,21 +72,15 @@ class MQTTReceiver(
   def onStop() {
 
   }
-  
+
   def onStart() {
 
-    // Set up persistence for messages 
+    // Set up persistence for messages
     val persistence = new MemoryPersistence()
 
     // Initializing Mqtt Client specifying brokerUrl, clientID and MqttClientPersistance
     val client = new MqttClient(brokerUrl, MqttClient.generateClientId(), persistence)
 
-    // Connect to MqttBroker
-    client.connect()
-
-    // Subscribe to Mqtt topic
-    client.subscribe(topic)
-
     // Callback automatically triggers as and when new message arrives on specified topic
     val callback: MqttCallback = new MqttCallback() {
 
@@ -103,7 +97,15 @@ class MQTTReceiver(
       }
     }
 
-    // Set up callback for MqttClient
+    // Set up callback for MqttClient. This needs to happen before
+    // connecting or subscribing, otherwise messages may be lost
     client.setCallback(callback)
+
+    // Connect to MqttBroker
+    client.connect()
+
+    // Subscribe to Mqtt topic
+    client.subscribe(topic)
+
   }
 }
diff --git a/external/mqtt/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java b/external/mqtt/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
new file mode 100644
index 0000000000000..1e24da7f5f60c
--- /dev/null
+++ b/external/mqtt/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.junit.After;
+import org.junit.Before;
+
+public abstract class LocalJavaStreamingContext {
+
+    protected transient JavaStreamingContext ssc;
+
+    @Before
+    public void setUp() {
+        SparkConf conf = new SparkConf()
+            .setMaster("local[2]")
+            .setAppName("test")
+            .set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock");
+        ssc = new JavaStreamingContext(conf, new Duration(1000));
+        ssc.checkpoint("checkpoint");
+    }
+
+    @After
+    public void tearDown() {
+        ssc.stop();
+        ssc = null;
+    }
+}
diff --git a/external/mqtt/src/test/resources/log4j.properties b/external/mqtt/src/test/resources/log4j.properties
index 4411d6e20c52a..9697237bfa1a3 100644
--- a/external/mqtt/src/test/resources/log4j.properties
+++ b/external/mqtt/src/test/resources/log4j.properties
@@ -15,11 +15,10 @@
 # limitations under the License.
 #
 
-# Set everything to be logged to the file streaming/target/unit-tests.log
+# Set everything to be logged to the file target/unit-tests.log
 log4j.rootCategory=INFO, file
-# log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.append=false
+log4j.appender.file.append=true
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
diff --git a/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala b/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
index 467fd263e2d64..e84adc088a680 100644
--- a/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
+++ b/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
@@ -17,23 +17,147 @@
 
 package org.apache.spark.streaming.mqtt
 
-import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
+import java.net.{URI, ServerSocket}
+import java.util.concurrent.CountDownLatch
+import java.util.concurrent.TimeUnit
+
+import scala.concurrent.duration._
+import scala.language.postfixOps
+
+import org.apache.activemq.broker.{TransportConnector, BrokerService}
+import org.eclipse.paho.client.mqttv3._
+import org.eclipse.paho.client.mqttv3.persist.MqttDefaultFilePersistence
+
+import org.scalatest.{BeforeAndAfter, FunSuite}
+import org.scalatest.concurrent.Eventually
+
+import org.apache.spark.streaming.{Milliseconds, StreamingContext}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
+import org.apache.spark.streaming.scheduler.StreamingListener
+import org.apache.spark.streaming.scheduler.StreamingListenerReceiverStarted
+import org.apache.spark.SparkConf
+import org.apache.spark.util.Utils
+
+class MQTTStreamSuite extends FunSuite with Eventually with BeforeAndAfter {
+
+  private val batchDuration = Milliseconds(500)
+  private val master: String = "local[2]"
+  private val framework: String = this.getClass.getSimpleName
+  private val freePort = findFreePort()
+  private val brokerUri = "//localhost:" + freePort
+  private val topic = "def"
+  private val persistenceDir = Utils.createTempDir()
 
-class MQTTStreamSuite extends TestSuiteBase {
+  private var ssc: StreamingContext = _
+  private var broker: BrokerService = _
+  private var connector: TransportConnector = _
+
+  before {
+    ssc = new StreamingContext(master, framework, batchDuration)
+    setupMQTT()
+  }
+
+  after {
+    if (ssc != null) {
+      ssc.stop()
+      ssc = null
+    }
+    Utils.deleteRecursively(persistenceDir)
+    tearDownMQTT()
+  }
 
   test("mqtt input stream") {
-    val ssc = new StreamingContext(master, framework, batchDuration)
-    val brokerUrl = "abc"
-    val topic = "def"
+    val sendMessage = "MQTT demo for spark streaming"
+    val receiveStream: ReceiverInputDStream[String] =
+      MQTTUtils.createStream(ssc, "tcp:" + brokerUri, topic, StorageLevel.MEMORY_ONLY)
+    @volatile var receiveMessage: List[String] = List()
+    receiveStream.foreachRDD { rdd =>
+      if (rdd.collect.length > 0) {
+        receiveMessage = receiveMessage ::: List(rdd.first)
+        receiveMessage
+      }
+    }
+    ssc.start()
 
-    // tests the API, does not actually test data receiving
-    val test1: ReceiverInputDStream[String] = MQTTUtils.createStream(ssc, brokerUrl, topic)
-    val test2: ReceiverInputDStream[String] =
-      MQTTUtils.createStream(ssc, brokerUrl, topic, StorageLevel.MEMORY_AND_DISK_SER_2)
+    // wait for the receiver to start before publishing data, or we risk failing
+    // the test nondeterministically. See SPARK-4631
+    waitForReceiverToStart()
 
-    // TODO: Actually test receiving data
+    publishData(sendMessage)
+    eventually(timeout(10000 milliseconds), interval(100 milliseconds)) {
+      assert(sendMessage.equals(receiveMessage(0)))
+    }
     ssc.stop()
   }
+
+  private def setupMQTT() {
+    broker = new BrokerService()
+    connector = new TransportConnector()
+    connector.setName("mqtt")
+    connector.setUri(new URI("mqtt:" + brokerUri))
+    broker.addConnector(connector)
+    broker.start()
+  }
+
+  private def tearDownMQTT() {
+    if (broker != null) {
+      broker.stop()
+      broker = null
+    }
+    if (connector != null) {
+      connector.stop()
+      connector = null
+    }
+  }
+
+  private def findFreePort(): Int = {
+    Utils.startServiceOnPort(23456, (trialPort: Int) => {
+      val socket = new ServerSocket(trialPort)
+      socket.close()
+      (null, trialPort)
+    }, new SparkConf())._2
+  }
+
+  def publishData(data: String): Unit = {
+    var client: MqttClient = null
+    try {
+      val persistence: MqttClientPersistence = new MqttDefaultFilePersistence(persistenceDir.getAbsolutePath)
+      client = new MqttClient("tcp:" + brokerUri, MqttClient.generateClientId(), persistence)
+      client.connect()
+      if (client.isConnected) {
+        val msgTopic: MqttTopic = client.getTopic(topic)
+        val message: MqttMessage = new MqttMessage(data.getBytes("utf-8"))
+        message.setQos(1)
+        message.setRetained(true)
+
+        for (i <- 0 to 10) {
+          try {
+            msgTopic.publish(message)
+          } catch {
+            case e: MqttException if e.getReasonCode == MqttException.REASON_CODE_MAX_INFLIGHT =>
+              Thread.sleep(50) // wait for Spark streaming to consume something from the message queue
+          }
+        }
+      }
+    } finally {
+      client.disconnect()
+      client.close()
+      client = null
+    }
+  }
+
+  /**
+   * Block until at least one receiver has started or timeout occurs.
+   */
+  private def waitForReceiverToStart() = {
+    val latch = new CountDownLatch(1)
+    ssc.addStreamingListener(new StreamingListener {
+      override def onReceiverStarted(receiverStarted: StreamingListenerReceiverStarted) {
+        latch.countDown()
+      }
+    })
+
+    assert(latch.await(10, TimeUnit.SECONDS), "Timeout waiting for receiver to start.")
+  }
 }
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 1d7dd49d15c22..da6ffe7662f63 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -39,24 +39,13 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
+      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.twitter4j</groupId>
       <artifactId>twitter4j-stream</artifactId>
       <version>3.0.3</version>
     </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
@@ -76,11 +65,5 @@
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <plugins>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-      </plugin>
-    </plugins>
   </build>
 </project>
diff --git a/external/twitter/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java b/external/twitter/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
new file mode 100644
index 0000000000000..1e24da7f5f60c
--- /dev/null
+++ b/external/twitter/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.junit.After;
+import org.junit.Before;
+
+public abstract class LocalJavaStreamingContext {
+
+    protected transient JavaStreamingContext ssc;
+
+    @Before
+    public void setUp() {
+        SparkConf conf = new SparkConf()
+            .setMaster("local[2]")
+            .setAppName("test")
+            .set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock");
+        ssc = new JavaStreamingContext(conf, new Duration(1000));
+        ssc.checkpoint("checkpoint");
+    }
+
+    @After
+    public void tearDown() {
+        ssc.stop();
+        ssc = null;
+    }
+}
diff --git a/external/twitter/src/test/resources/log4j.properties b/external/twitter/src/test/resources/log4j.properties
index 4411d6e20c52a..64bfc5745088f 100644
--- a/external/twitter/src/test/resources/log4j.properties
+++ b/external/twitter/src/test/resources/log4j.properties
@@ -15,11 +15,10 @@
 # limitations under the License.
 #
 
-# Set everything to be logged to the file streaming/target/unit-tests.log
+# Set everything to be logged to the filetarget/unit-tests.log
 log4j.rootCategory=INFO, file
-# log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.append=false
+log4j.appender.file.append=true
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
diff --git a/external/twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala b/external/twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
index 93741e0375164..9ee57d7581d85 100644
--- a/external/twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
+++ b/external/twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
@@ -17,13 +17,23 @@
 
 package org.apache.spark.streaming.twitter
 
-import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
-import org.apache.spark.storage.StorageLevel
+
+import org.scalatest.{BeforeAndAfter, FunSuite}
+import twitter4j.Status
 import twitter4j.auth.{NullAuthorization, Authorization}
+
+import org.apache.spark.Logging
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
-import twitter4j.Status
 
-class TwitterStreamSuite extends TestSuiteBase {
+class TwitterStreamSuite extends FunSuite with BeforeAndAfter with Logging {
+
+  val batchDuration = Seconds(1)
+
+  private val master: String = "local[2]"
+
+  private val framework: String = this.getClass.getSimpleName
 
   test("twitter input stream") {
     val ssc = new StreamingContext(master, framework, batchDuration)
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 7e48968feb3bc..e919c2c9b19ea 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -39,23 +39,11 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
+      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>${akka.group}</groupId>
       <artifactId>akka-zeromq_${scala.binary.version}</artifactId>
-      <version>${akka.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
@@ -76,11 +64,5 @@
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <plugins>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-      </plugin>
-    </plugins>
   </build>
 </project>
diff --git a/external/zeromq/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java b/external/zeromq/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
new file mode 100644
index 0000000000000..1e24da7f5f60c
--- /dev/null
+++ b/external/zeromq/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.junit.After;
+import org.junit.Before;
+
+public abstract class LocalJavaStreamingContext {
+
+    protected transient JavaStreamingContext ssc;
+
+    @Before
+    public void setUp() {
+        SparkConf conf = new SparkConf()
+            .setMaster("local[2]")
+            .setAppName("test")
+            .set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock");
+        ssc = new JavaStreamingContext(conf, new Duration(1000));
+        ssc.checkpoint("checkpoint");
+    }
+
+    @After
+    public void tearDown() {
+        ssc.stop();
+        ssc = null;
+    }
+}
diff --git a/external/zeromq/src/test/resources/log4j.properties b/external/zeromq/src/test/resources/log4j.properties
index 4411d6e20c52a..9697237bfa1a3 100644
--- a/external/zeromq/src/test/resources/log4j.properties
+++ b/external/zeromq/src/test/resources/log4j.properties
@@ -15,11 +15,10 @@
 # limitations under the License.
 #
 
-# Set everything to be logged to the file streaming/target/unit-tests.log
+# Set everything to be logged to the file target/unit-tests.log
 log4j.rootCategory=INFO, file
-# log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.append=false
+log4j.appender.file.append=true
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
diff --git a/external/zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala b/external/zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala
index cc10ff6ae03cd..a7566e733d891 100644
--- a/external/zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala
+++ b/external/zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala
@@ -20,12 +20,19 @@ package org.apache.spark.streaming.zeromq
 import akka.actor.SupervisorStrategy
 import akka.util.ByteString
 import akka.zeromq.Subscribe
+import org.scalatest.FunSuite
 
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
+import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
 
-class ZeroMQStreamSuite extends TestSuiteBase {
+class ZeroMQStreamSuite extends FunSuite {
+
+  val batchDuration = Seconds(1)
+
+  private val master: String = "local[2]"
+
+  private val framework: String = this.getClass.getSimpleName
 
   test("zeromq input stream") {
     val ssc = new StreamingContext(master, framework, batchDuration)
diff --git a/extras/java8-tests/README.md b/extras/java8-tests/README.md
index e95b73ac7702a..dc9e87f2eeb92 100644
--- a/extras/java8-tests/README.md
+++ b/extras/java8-tests/README.md
@@ -8,7 +8,7 @@ to your Java location. The set-up depends a bit on the build system:
   `-java-home` to the sbt launch script. If a Java 8 JDK is detected sbt will automatically
   include the Java 8 test project.
 
-  `$ JAVA_HOME=/opt/jdk1.8.0/ sbt/sbt clean "test-only org.apache.spark.Java8APISuite"`
+  `$ JAVA_HOME=/opt/jdk1.8.0/ build/sbt clean "test-only org.apache.spark.Java8APISuite"`
 
 * For Maven users,
 
@@ -19,6 +19,6 @@ to your Java location. The set-up depends a bit on the build system:
   `$ JAVA_HOME=/opt/jdk1.8.0/ mvn clean install -DskipTests`
   `$ JAVA_HOME=/opt/jdk1.8.0/ mvn test -Pjava8-tests -DwildcardSuites=org.apache.spark.Java8APISuite`
 
-  Note that the above command can only be run from project root directory since this module 
-  depends on core and the test-jars of core and streaming. This means an install step is 
+  Note that the above command can only be run from project root directory since this module
+  depends on core and the test-jars of core and streaming. This means an install step is
   required to make the test dependencies visible to the Java 8 sub-project.
diff --git a/extras/java8-tests/pom.xml b/extras/java8-tests/pom.xml
index 7e478bed62da7..0fb431808bacd 100644
--- a/extras/java8-tests/pom.xml
+++ b/extras/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -60,11 +60,6 @@
       <artifactId>junit-interface</artifactId>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
   </dependencies>
 
   <profiles>
@@ -159,16 +154,6 @@
           </execution>
         </executions>
       </plugin>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>test</id>
-            <phase>none</phase>
-          </execution>
-        </executions>
-      </plugin>
     </plugins>
   </build>
 </project>
diff --git a/extras/java8-tests/src/test/resources/log4j.properties b/extras/java8-tests/src/test/resources/log4j.properties
index bb0ab319a0080..287c8e3563503 100644
--- a/extras/java8-tests/src/test/resources/log4j.properties
+++ b/extras/java8-tests/src/test/resources/log4j.properties
@@ -18,7 +18,7 @@
 # Set everything to be logged to the file target/unit-tests.log
 log4j.rootCategory=INFO, file
 log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.append=false
+log4j.appender.file.append=true
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
diff --git a/extras/kinesis-asl/pom.xml b/extras/kinesis-asl/pom.xml
index 560244ad93369..c815eda52bda7 100644
--- a/extras/kinesis-asl/pom.xml
+++ b/extras/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -57,11 +57,6 @@
       <artifactId>aws-java-sdk</artifactId>
       <version>${aws.java.sdk.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-all</artifactId>
@@ -86,11 +81,5 @@
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <plugins>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-      </plugin>
-    </plugins>
   </build>
 </project>
diff --git a/extras/kinesis-asl/src/test/resources/log4j.properties b/extras/kinesis-asl/src/test/resources/log4j.properties
index d9d08f68687d3..853ef0ed2986f 100644
--- a/extras/kinesis-asl/src/test/resources/log4j.properties
+++ b/extras/kinesis-asl/src/test/resources/log4j.properties
@@ -14,10 +14,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+
+# Set everything to be logged to the file target/unit-tests.log
 log4j.rootCategory=INFO, file
-# log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.append=false
+log4j.appender.file.append=true
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
diff --git a/extras/spark-ganglia-lgpl/pom.xml b/extras/spark-ganglia-lgpl/pom.xml
index 71a078d58a8d8..f2f0aa78b0a4b 100644
--- a/extras/spark-ganglia-lgpl/pom.xml
+++ b/extras/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -42,7 +42,7 @@
     </dependency>
 
     <dependency>
-      <groupId>com.codahale.metrics</groupId>
+      <groupId>io.dropwizard.metrics</groupId>
       <artifactId>metrics-ganglia</artifactId>
     </dependency>
   </dependencies>
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 3f49b1d63b6e1..8fac24b6ed86d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -40,20 +40,15 @@
       <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.jblas</groupId>
       <artifactId>jblas</artifactId>
       <version>${jblas.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-server</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
@@ -63,11 +58,5 @@
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <plugins>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-      </plugin>
-    </plugins>
   </build>
 </project>
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
index 7e842ec4cc82f..ecc37dcaad1fe 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.graphx
 
+import org.apache.spark.util.collection.SortDataFormat
+
 /**
  * A single directed edge consisting of a source id, target id,
  * and the data associated with the edge.
@@ -65,4 +67,32 @@ object Edge {
       else 1
     }
   }
+
+  private[graphx] def edgeArraySortDataFormat[ED] = new SortDataFormat[Edge[ED], Array[Edge[ED]]] {
+    override def getKey(data: Array[Edge[ED]], pos: Int): Edge[ED] = {
+      data(pos)
+    }
+
+    override def swap(data: Array[Edge[ED]], pos0: Int, pos1: Int): Unit = {
+      val tmp = data(pos0)
+      data(pos0) = data(pos1)
+      data(pos1) = tmp
+    }
+
+    override def copyElement(
+        src: Array[Edge[ED]], srcPos: Int,
+        dst: Array[Edge[ED]], dstPos: Int) {
+      dst(dstPos) = src(srcPos)
+    }
+
+    override def copyRange(
+        src: Array[Edge[ED]], srcPos: Int,
+        dst: Array[Edge[ED]], dstPos: Int, length: Int) {
+      System.arraycopy(src, srcPos, dst, dstPos, length)
+    }
+
+    override def allocate(length: Int): Array[Edge[ED]] = {
+      new Array[Edge[ED]](length)
+    }
+  }
 }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeContext.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeContext.scala
new file mode 100644
index 0000000000000..f70715fca6eea
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeContext.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+/**
+ * Represents an edge along with its neighboring vertices and allows sending messages along the
+ * edge. Used in [[Graph#aggregateMessages]].
+ */
+abstract class EdgeContext[VD, ED, A] {
+  /** The vertex id of the edge's source vertex. */
+  def srcId: VertexId
+  /** The vertex id of the edge's destination vertex. */
+  def dstId: VertexId
+  /** The vertex attribute of the edge's source vertex. */
+  def srcAttr: VD
+  /** The vertex attribute of the edge's destination vertex. */
+  def dstAttr: VD
+  /** The attribute associated with the edge. */
+  def attr: ED
+
+  /** Sends a message to the source vertex. */
+  def sendToSrc(msg: A): Unit
+  /** Sends a message to the destination vertex. */
+  def sendToDst(msg: A): Unit
+
+  /** Converts the edge and vertex properties into an [[EdgeTriplet]] for convenience. */
+  def toEdgeTriplet: EdgeTriplet[VD, ED] = {
+    val et = new EdgeTriplet[VD, ED]
+    et.srcId = srcId
+    et.srcAttr = srcAttr
+    et.dstId = dstId
+    et.dstAttr = dstAttr
+    et.attr = attr
+    et
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
index 5bcb96b136ed7..cc70b396a8dd4 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
@@ -17,14 +17,19 @@
 
 package org.apache.spark.graphx
 
-import scala.reflect.{classTag, ClassTag}
+import scala.language.existentials
+import scala.reflect.ClassTag
 
-import org.apache.spark.{OneToOneDependency, Partition, Partitioner, TaskContext}
+import org.apache.spark.Dependency
+import org.apache.spark.Partition
+import org.apache.spark.SparkContext
+import org.apache.spark.TaskContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 
 import org.apache.spark.graphx.impl.EdgePartition
 import org.apache.spark.graphx.impl.EdgePartitionBuilder
+import org.apache.spark.graphx.impl.EdgeRDDImpl
 
 /**
  * `EdgeRDD[ED, VD]` extends `RDD[Edge[ED]]` by storing the edges in columnar format on each
@@ -32,33 +37,16 @@ import org.apache.spark.graphx.impl.EdgePartitionBuilder
  * edge to provide the triplet view. Shipping of the vertex attributes is managed by
  * `impl.ReplicatedVertexView`.
  */
-class EdgeRDD[@specialized ED: ClassTag, VD: ClassTag](
-    val partitionsRDD: RDD[(PartitionID, EdgePartition[ED, VD])],
-    val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY)
-  extends RDD[Edge[ED]](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
+abstract class EdgeRDD[ED](
+    @transient sc: SparkContext,
+    @transient deps: Seq[Dependency[_]]) extends RDD[Edge[ED]](sc, deps) {
 
-  override def setName(_name: String): this.type = {
-    if (partitionsRDD.name != null) {
-      partitionsRDD.setName(partitionsRDD.name + ", " + _name)
-    } else {
-      partitionsRDD.setName(_name)
-    }
-    this
-  }
-  setName("EdgeRDD")
+  private[graphx] def partitionsRDD: RDD[(PartitionID, EdgePartition[ED, VD])] forSome { type VD }
 
   override protected def getPartitions: Array[Partition] = partitionsRDD.partitions
 
-  /**
-   * If `partitionsRDD` already has a partitioner, use it. Otherwise assume that the
-   * [[PartitionID]]s in `partitionsRDD` correspond to the actual partitions and create a new
-   * partitioner that allows co-partitioning with `partitionsRDD`.
-   */
-  override val partitioner =
-    partitionsRDD.partitioner.orElse(Some(Partitioner.defaultPartitioner(partitionsRDD)))
-
   override def compute(part: Partition, context: TaskContext): Iterator[Edge[ED]] = {
-    val p = firstParent[(PartitionID, EdgePartition[ED, VD])].iterator(part, context)
+    val p = firstParent[(PartitionID, EdgePartition[ED, _])].iterator(part, context)
     if (p.hasNext) {
       p.next._2.iterator.map(_.copy())
     } else {
@@ -66,40 +54,6 @@ class EdgeRDD[@specialized ED: ClassTag, VD: ClassTag](
     }
   }
 
-  override def collect(): Array[Edge[ED]] = this.map(_.copy()).collect()
-
-  /**
-   * Persists the edge partitions at the specified storage level, ignoring any existing target
-   * storage level.
-   */
-  override def persist(newLevel: StorageLevel): this.type = {
-    partitionsRDD.persist(newLevel)
-    this
-  }
-
-  override def unpersist(blocking: Boolean = true): this.type = {
-    partitionsRDD.unpersist(blocking)
-    this
-  }
-
-  /** Persists the vertex partitions using `targetStorageLevel`, which defaults to MEMORY_ONLY. */
-  override def cache(): this.type = {
-    partitionsRDD.persist(targetStorageLevel)
-    this
-  }
-
-  private[graphx] def mapEdgePartitions[ED2: ClassTag, VD2: ClassTag](
-      f: (PartitionID, EdgePartition[ED, VD]) => EdgePartition[ED2, VD2]): EdgeRDD[ED2, VD2] = {
-    this.withPartitionsRDD[ED2, VD2](partitionsRDD.mapPartitions({ iter =>
-      if (iter.hasNext) {
-        val (pid, ep) = iter.next()
-        Iterator(Tuple2(pid, f(pid, ep)))
-      } else {
-        Iterator.empty
-      }
-    }, preservesPartitioning = true))
-  }
-
   /**
    * Map the values in an edge partitioning preserving the structure but changing the values.
    *
@@ -107,22 +61,14 @@ class EdgeRDD[@specialized ED: ClassTag, VD: ClassTag](
    * @param f the function from an edge to a new edge value
    * @return a new EdgeRDD containing the new edge values
    */
-  def mapValues[ED2: ClassTag](f: Edge[ED] => ED2): EdgeRDD[ED2, VD] =
-    mapEdgePartitions((pid, part) => part.map(f))
+  def mapValues[ED2: ClassTag](f: Edge[ED] => ED2): EdgeRDD[ED2]
 
   /**
    * Reverse all the edges in this RDD.
    *
    * @return a new EdgeRDD containing all the edges reversed
    */
-  def reverse: EdgeRDD[ED, VD] = mapEdgePartitions((pid, part) => part.reverse)
-
-  /** Removes all edges but those matching `epred` and where both vertices match `vpred`. */
-  def filter(
-      epred: EdgeTriplet[VD, ED] => Boolean,
-      vpred: (VertexId, VD) => Boolean): EdgeRDD[ED, VD] = {
-    mapEdgePartitions((pid, part) => part.filter(epred, vpred))
-  }
+  def reverse: EdgeRDD[ED]
 
   /**
    * Inner joins this EdgeRDD with another EdgeRDD, assuming both are partitioned using the same
@@ -134,23 +80,8 @@ class EdgeRDD[@specialized ED: ClassTag, VD: ClassTag](
    *         with values supplied by `f`
    */
   def innerJoin[ED2: ClassTag, ED3: ClassTag]
-      (other: EdgeRDD[ED2, _])
-      (f: (VertexId, VertexId, ED, ED2) => ED3): EdgeRDD[ED3, VD] = {
-    val ed2Tag = classTag[ED2]
-    val ed3Tag = classTag[ED3]
-    this.withPartitionsRDD[ED3, VD](partitionsRDD.zipPartitions(other.partitionsRDD, true) {
-      (thisIter, otherIter) =>
-        val (pid, thisEPart) = thisIter.next()
-        val (_, otherEPart) = otherIter.next()
-        Iterator(Tuple2(pid, thisEPart.innerJoin(otherEPart)(f)(ed2Tag, ed3Tag)))
-    })
-  }
-
-  /** Replaces the vertex partitions while preserving all other properties of the VertexRDD. */
-  private[graphx] def withPartitionsRDD[ED2: ClassTag, VD2: ClassTag](
-      partitionsRDD: RDD[(PartitionID, EdgePartition[ED2, VD2])]): EdgeRDD[ED2, VD2] = {
-    new EdgeRDD(partitionsRDD, this.targetStorageLevel)
-  }
+      (other: EdgeRDD[ED2])
+      (f: (VertexId, VertexId, ED, ED2) => ED3): EdgeRDD[ED3]
 
   /**
    * Changes the target storage level while preserving all other properties of the
@@ -159,11 +90,7 @@ class EdgeRDD[@specialized ED: ClassTag, VD: ClassTag](
    * This does not actually trigger a cache; to do this, call
    * [[org.apache.spark.graphx.EdgeRDD#cache]] on the returned EdgeRDD.
    */
-  private[graphx] def withTargetStorageLevel(
-      targetStorageLevel: StorageLevel): EdgeRDD[ED, VD] = {
-    new EdgeRDD(this.partitionsRDD, targetStorageLevel)
-  }
-
+  private[graphx] def withTargetStorageLevel(targetStorageLevel: StorageLevel): EdgeRDD[ED]
 }
 
 object EdgeRDD {
@@ -173,7 +100,7 @@ object EdgeRDD {
    * @tparam ED the edge attribute type
    * @tparam VD the type of the vertex attributes that may be joined with the returned EdgeRDD
    */
-  def fromEdges[ED: ClassTag, VD: ClassTag](edges: RDD[Edge[ED]]): EdgeRDD[ED, VD] = {
+  def fromEdges[ED: ClassTag, VD: ClassTag](edges: RDD[Edge[ED]]): EdgeRDDImpl[ED, VD] = {
     val edgePartitions = edges.mapPartitionsWithIndex { (pid, iter) =>
       val builder = new EdgePartitionBuilder[ED, VD]
       iter.foreach { e =>
@@ -190,8 +117,8 @@ object EdgeRDD {
    * @tparam ED the edge attribute type
    * @tparam VD the type of the vertex attributes that may be joined with the returned EdgeRDD
    */
-  def fromEdgePartitions[ED: ClassTag, VD: ClassTag](
-      edgePartitions: RDD[(Int, EdgePartition[ED, VD])]): EdgeRDD[ED, VD] = {
-    new EdgeRDD(edgePartitions)
+  private[graphx] def fromEdgePartitions[ED: ClassTag, VD: ClassTag](
+      edgePartitions: RDD[(Int, EdgePartition[ED, VD])]): EdgeRDDImpl[ED, VD] = {
+    new EdgeRDDImpl(edgePartitions)
   }
 }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index fa4b891754c40..8494d06b1cdb7 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -55,11 +55,11 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
    * @return an RDD containing the edges in this graph
    *
    * @see [[Edge]] for the edge type.
-   * @see [[triplets]] to get an RDD which contains all the edges
+   * @see [[Graph#triplets]] to get an RDD which contains all the edges
    * along with their vertex data.
    *
    */
-  @transient val edges: EdgeRDD[ED, VD]
+  @transient val edges: EdgeRDD[ED]
 
   /**
    * An RDD containing the edge triplets, which are edges along with the vertex data associated with
@@ -96,6 +96,32 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
    */
   def cache(): Graph[VD, ED]
 
+  /**
+   * Mark this Graph for checkpointing. It will be saved to a file inside the checkpoint
+   * directory set with SparkContext.setCheckpointDir() and all references to its parent
+   * RDDs will be removed. It is strongly recommended that this Graph is persisted in
+   * memory, otherwise saving it on a file will require recomputation.
+   */
+  def checkpoint(): Unit
+
+  /**
+   * Return whether this Graph has been checkpointed or not.
+   * This returns true iff both the vertices RDD and edges RDD have been checkpointed.
+   */
+  def isCheckpointed: Boolean
+
+  /**
+   * Gets the name of the files to which this Graph was checkpointed.
+   * (The vertices RDD and edges RDD are checkpointed separately.)
+   */
+  def getCheckpointFiles: Seq[String]
+
+  /**
+   * Uncaches both vertices and edges of this graph. This is useful in iterative algorithms that
+   * build a new graph in each iteration.
+   */
+  def unpersist(blocking: Boolean = true): Graph[VD, ED]
+
   /**
    * Uncaches only the vertices of this graph, leaving the edges alone. This is useful in iterative
    * algorithms that modify the vertex attributes but reuse the edges. This method can be used to
@@ -208,7 +234,37 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
    *
    */
   def mapTriplets[ED2: ClassTag](map: EdgeTriplet[VD, ED] => ED2): Graph[VD, ED2] = {
-    mapTriplets((pid, iter) => iter.map(map))
+    mapTriplets((pid, iter) => iter.map(map), TripletFields.All)
+  }
+
+  /**
+   * Transforms each edge attribute using the map function, passing it the adjacent vertex
+   * attributes as well. If adjacent vertex values are not required,
+   * consider using `mapEdges` instead.
+   *
+   * @note This does not change the structure of the
+   * graph or modify the values of this graph.  As a consequence
+   * the underlying index structures can be reused.
+   *
+   * @param map the function from an edge object to a new edge value.
+   * @param tripletFields which fields should be included in the edge triplet passed to the map
+   *   function. If not all fields are needed, specifying this can improve performance.
+   *
+   * @tparam ED2 the new edge data type
+   *
+   * @example This function might be used to initialize edge
+   * attributes based on the attributes associated with each vertex.
+   * {{{
+   * val rawGraph: Graph[Int, Int] = someLoadFunction()
+   * val graph = rawGraph.mapTriplets[Int]( edge =>
+   *   edge.src.data - edge.dst.data)
+   * }}}
+   *
+   */
+  def mapTriplets[ED2: ClassTag](
+      map: EdgeTriplet[VD, ED] => ED2,
+      tripletFields: TripletFields): Graph[VD, ED2] = {
+    mapTriplets((pid, iter) => iter.map(map), tripletFields)
   }
 
   /**
@@ -223,12 +279,15 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
    * the underlying index structures can be reused.
    *
    * @param map the iterator transform
+   * @param tripletFields which fields should be included in the edge triplet passed to the map
+   *   function. If not all fields are needed, specifying this can improve performance.
    *
    * @tparam ED2 the new edge data type
    *
    */
-  def mapTriplets[ED2: ClassTag](map: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2])
-    : Graph[VD, ED2]
+  def mapTriplets[ED2: ClassTag](
+      map: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2],
+      tripletFields: TripletFields): Graph[VD, ED2]
 
   /**
    * Reverses all edges in the graph.  If this graph contains an edge from a to b then the returned
@@ -287,6 +346,8 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
    * "sent" to either vertex in the edge.  The `reduceFunc` is then used to combine the output of
    * the map phase destined to each vertex.
    *
+   * This function is deprecated in 1.2.0 because of SPARK-3936. Use aggregateMessages instead.
+   *
    * @tparam A the type of "message" to be sent to each vertex
    *
    * @param mapFunc the user defined map function which returns 0 or
@@ -296,13 +357,15 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
    * be commutative and associative and is used to combine the output
    * of the map phase
    *
-   * @param activeSetOpt optionally, a set of "active" vertices and a direction of edges to
-   * consider when running `mapFunc`. If the direction is `In`, `mapFunc` will only be run on
-   * edges with destination in the active set.  If the direction is `Out`,
-   * `mapFunc` will only be run on edges originating from vertices in the active set. If the
-   * direction is `Either`, `mapFunc` will be run on edges with *either* vertex in the active set
-   * . If the direction is `Both`, `mapFunc` will be run on edges with *both* vertices in the
-   * active set. The active set must have the same index as the graph's vertices.
+   * @param activeSetOpt an efficient way to run the aggregation on a subset of the edges if
+   * desired. This is done by specifying a set of "active" vertices and an edge direction. The
+   * `sendMsg` function will then run only on edges connected to active vertices by edges in the
+   * specified direction. If the direction is `In`, `sendMsg` will only be run on edges with
+   * destination in the active set. If the direction is `Out`, `sendMsg` will only be run on edges
+   * originating from vertices in the active set. If the direction is `Either`, `sendMsg` will be
+   * run on edges with *either* vertex in the active set. If the direction is `Both`, `sendMsg`
+   * will be run on edges with *both* vertices in the active set. The active set must have the
+   * same index as the graph's vertices.
    *
    * @example We can use this function to compute the in-degree of each
    * vertex
@@ -319,6 +382,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
    * predicate or implement PageRank.
    *
    */
+  @deprecated("use aggregateMessages", "1.2.0")
   def mapReduceTriplets[A: ClassTag](
       mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
       reduceFunc: (A, A) => A,
@@ -326,8 +390,80 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
     : VertexRDD[A]
 
   /**
-   * Joins the vertices with entries in the `table` RDD and merges the results using `mapFunc`.  The
-   * input table should contain at most one entry for each vertex.  If no entry in `other` is
+   * Aggregates values from the neighboring edges and vertices of each vertex. The user-supplied
+   * `sendMsg` function is invoked on each edge of the graph, generating 0 or more messages to be
+   * sent to either vertex in the edge. The `mergeMsg` function is then used to combine all messages
+   * destined to the same vertex.
+   *
+   * @tparam A the type of message to be sent to each vertex
+   *
+   * @param sendMsg runs on each edge, sending messages to neighboring vertices using the
+   *   [[EdgeContext]].
+   * @param mergeMsg used to combine messages from `sendMsg` destined to the same vertex. This
+   *   combiner should be commutative and associative.
+   * @param tripletFields which fields should be included in the [[EdgeContext]] passed to the
+   *   `sendMsg` function. If not all fields are needed, specifying this can improve performance.
+   *
+   * @example We can use this function to compute the in-degree of each
+   * vertex
+   * {{{
+   * val rawGraph: Graph[_, _] = Graph.textFile("twittergraph")
+   * val inDeg: RDD[(VertexId, Int)] =
+   *   aggregateMessages[Int](ctx => ctx.sendToDst(1), _ + _)
+   * }}}
+   *
+   * @note By expressing computation at the edge level we achieve
+   * maximum parallelism.  This is one of the core functions in the
+   * Graph API in that enables neighborhood level computation. For
+   * example this function can be used to count neighbors satisfying a
+   * predicate or implement PageRank.
+   *
+   */
+  def aggregateMessages[A: ClassTag](
+      sendMsg: EdgeContext[VD, ED, A] => Unit,
+      mergeMsg: (A, A) => A,
+      tripletFields: TripletFields = TripletFields.All)
+    : VertexRDD[A] = {
+    aggregateMessagesWithActiveSet(sendMsg, mergeMsg, tripletFields, None)
+  }
+
+  /**
+   * Aggregates values from the neighboring edges and vertices of each vertex. The user-supplied
+   * `sendMsg` function is invoked on each edge of the graph, generating 0 or more messages to be
+   * sent to either vertex in the edge. The `mergeMsg` function is then used to combine all messages
+   * destined to the same vertex.
+   *
+   * This variant can take an active set to restrict the computation and is intended for internal
+   * use only.
+   *
+   * @tparam A the type of message to be sent to each vertex
+   *
+   * @param sendMsg runs on each edge, sending messages to neighboring vertices using the
+   *   [[EdgeContext]].
+   * @param mergeMsg used to combine messages from `sendMsg` destined to the same vertex. This
+   *   combiner should be commutative and associative.
+   * @param tripletFields which fields should be included in the [[EdgeContext]] passed to the
+   *   `sendMsg` function. If not all fields are needed, specifying this can improve performance.
+   * @param activeSetOpt an efficient way to run the aggregation on a subset of the edges if
+   *   desired. This is done by specifying a set of "active" vertices and an edge direction. The
+   *   `sendMsg` function will then run on only edges connected to active vertices by edges in the
+   *   specified direction. If the direction is `In`, `sendMsg` will only be run on edges with
+   *   destination in the active set. If the direction is `Out`, `sendMsg` will only be run on edges
+   *   originating from vertices in the active set. If the direction is `Either`, `sendMsg` will be
+   *   run on edges with *either* vertex in the active set. If the direction is `Both`, `sendMsg`
+   *   will be run on edges with *both* vertices in the active set. The active set must have the
+   *   same index as the graph's vertices.
+   */
+  private[graphx] def aggregateMessagesWithActiveSet[A: ClassTag](
+      sendMsg: EdgeContext[VD, ED, A] => Unit,
+      mergeMsg: (A, A) => A,
+      tripletFields: TripletFields,
+      activeSetOpt: Option[(VertexRDD[_], EdgeDirection)])
+    : VertexRDD[A]
+
+  /**
+   * Joins the vertices with entries in the `table` RDD and merges the results using `mapFunc`.
+   * The input table should contain at most one entry for each vertex.  If no entry in `other` is
    * provided for a particular vertex in the graph, the map function receives `None`.
    *
    * @tparam U the type of entry in the table of updates
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
index f4c79365b16da..4933aecba1286 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
@@ -48,7 +48,8 @@ object GraphLoader extends Logging {
    * @param path the path to the file (e.g., /home/data/file or hdfs://file)
    * @param canonicalOrientation whether to orient edges in the positive
    *        direction
-   * @param minEdgePartitions the number of partitions for the edge RDD
+   * @param numEdgePartitions the number of partitions for the edge RDD
+   * Setting this value to -1 will use the default parallelism.
    * @param edgeStorageLevel the desired storage level for the edge partitions
    * @param vertexStorageLevel the desired storage level for the vertex partitions
    */
@@ -56,7 +57,7 @@ object GraphLoader extends Logging {
       sc: SparkContext,
       path: String,
       canonicalOrientation: Boolean = false,
-      minEdgePartitions: Int = 1,
+      numEdgePartitions: Int = -1,
       edgeStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY,
       vertexStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY)
     : Graph[Int, Int] =
@@ -64,7 +65,12 @@ object GraphLoader extends Logging {
     val startTime = System.currentTimeMillis
 
     // Parse the edge data table directly into edge partitions
-    val lines = sc.textFile(path, minEdgePartitions).coalesce(minEdgePartitions)
+    val lines =
+      if (numEdgePartitions > 0) {
+        sc.textFile(path, numEdgePartitions).coalesce(numEdgePartitions)
+      } else {
+        sc.textFile(path)
+      }
     val edges = lines.mapPartitionsWithIndex { (pid, iter) =>
       val builder = new EdgePartitionBuilder[Int, Int]
       iter.foreach { line =>
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
index d0dd45dba618e..dc8b4789c4b61 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -69,11 +69,12 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
    */
   private def degreesRDD(edgeDirection: EdgeDirection): VertexRDD[Int] = {
     if (edgeDirection == EdgeDirection.In) {
-      graph.mapReduceTriplets(et => Iterator((et.dstId,1)), _ + _)
+      graph.aggregateMessages(_.sendToDst(1), _ + _, TripletFields.None)
     } else if (edgeDirection == EdgeDirection.Out) {
-      graph.mapReduceTriplets(et => Iterator((et.srcId,1)), _ + _)
+      graph.aggregateMessages(_.sendToSrc(1), _ + _, TripletFields.None)
     } else { // EdgeDirection.Either
-      graph.mapReduceTriplets(et => Iterator((et.srcId,1), (et.dstId,1)), _ + _)
+      graph.aggregateMessages(ctx => { ctx.sendToSrc(1); ctx.sendToDst(1) }, _ + _,
+        TripletFields.None)
     }
   }
 
@@ -88,18 +89,17 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
   def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexId]] = {
     val nbrs =
       if (edgeDirection == EdgeDirection.Either) {
-        graph.mapReduceTriplets[Array[VertexId]](
-          mapFunc = et => Iterator((et.srcId, Array(et.dstId)), (et.dstId, Array(et.srcId))),
-          reduceFunc = _ ++ _
-        )
+        graph.aggregateMessages[Array[VertexId]](
+          ctx => { ctx.sendToSrc(Array(ctx.dstId)); ctx.sendToDst(Array(ctx.srcId)) },
+          _ ++ _, TripletFields.None)
       } else if (edgeDirection == EdgeDirection.Out) {
-        graph.mapReduceTriplets[Array[VertexId]](
-          mapFunc = et => Iterator((et.srcId, Array(et.dstId))),
-          reduceFunc = _ ++ _)
+        graph.aggregateMessages[Array[VertexId]](
+          ctx => ctx.sendToSrc(Array(ctx.dstId)),
+          _ ++ _, TripletFields.None)
       } else if (edgeDirection == EdgeDirection.In) {
-        graph.mapReduceTriplets[Array[VertexId]](
-          mapFunc = et => Iterator((et.dstId, Array(et.srcId))),
-          reduceFunc = _ ++ _)
+        graph.aggregateMessages[Array[VertexId]](
+          ctx => ctx.sendToDst(Array(ctx.srcId)),
+          _ ++ _, TripletFields.None)
       } else {
         throw new SparkException("It doesn't make sense to collect neighbor ids without a " +
           "direction. (EdgeDirection.Both is not supported; use EdgeDirection.Either instead.)")
@@ -122,22 +122,27 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
    * @return the vertex set of neighboring vertex attributes for each vertex
    */
   def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[Array[(VertexId, VD)]] = {
-    val nbrs = graph.mapReduceTriplets[Array[(VertexId,VD)]](
-      edge => {
-        val msgToSrc = (edge.srcId, Array((edge.dstId, edge.dstAttr)))
-        val msgToDst = (edge.dstId, Array((edge.srcId, edge.srcAttr)))
-        edgeDirection match {
-          case EdgeDirection.Either => Iterator(msgToSrc, msgToDst)
-          case EdgeDirection.In => Iterator(msgToDst)
-          case EdgeDirection.Out => Iterator(msgToSrc)
-          case EdgeDirection.Both =>
-            throw new SparkException("collectNeighbors does not support EdgeDirection.Both. Use" +
-              "EdgeDirection.Either instead.")
-        }
-      },
-      (a, b) => a ++ b)
-
-    graph.vertices.leftZipJoin(nbrs) { (vid, vdata, nbrsOpt) =>
+    val nbrs = edgeDirection match {
+      case EdgeDirection.Either =>
+        graph.aggregateMessages[Array[(VertexId,VD)]](
+          ctx => {
+            ctx.sendToSrc(Array((ctx.dstId, ctx.dstAttr)))
+            ctx.sendToDst(Array((ctx.srcId, ctx.srcAttr)))
+          },
+          (a, b) => a ++ b, TripletFields.All)
+      case EdgeDirection.In =>
+        graph.aggregateMessages[Array[(VertexId,VD)]](
+          ctx => ctx.sendToDst(Array((ctx.srcId, ctx.srcAttr))),
+          (a, b) => a ++ b, TripletFields.Src)
+      case EdgeDirection.Out =>
+        graph.aggregateMessages[Array[(VertexId,VD)]](
+          ctx => ctx.sendToSrc(Array((ctx.dstId, ctx.dstAttr))),
+          (a, b) => a ++ b, TripletFields.Dst)
+      case EdgeDirection.Both =>
+        throw new SparkException("collectEdges does not support EdgeDirection.Both. Use" +
+          "EdgeDirection.Either instead.")
+    }
+    graph.vertices.leftJoin(nbrs) { (vid, vdata, nbrsOpt) =>
       nbrsOpt.getOrElse(Array.empty[(VertexId, VD)])
     }
   } // end of collectNeighbor
@@ -160,18 +165,20 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
   def collectEdges(edgeDirection: EdgeDirection): VertexRDD[Array[Edge[ED]]] = {
     edgeDirection match {
       case EdgeDirection.Either =>
-        graph.mapReduceTriplets[Array[Edge[ED]]](
-          edge => Iterator((edge.srcId, Array(new Edge(edge.srcId, edge.dstId, edge.attr))),
-                           (edge.dstId, Array(new Edge(edge.srcId, edge.dstId, edge.attr)))),
-          (a, b) => a ++ b)
+        graph.aggregateMessages[Array[Edge[ED]]](
+          ctx => {
+            ctx.sendToSrc(Array(new Edge(ctx.srcId, ctx.dstId, ctx.attr)))
+            ctx.sendToDst(Array(new Edge(ctx.srcId, ctx.dstId, ctx.attr)))
+          },
+          (a, b) => a ++ b, TripletFields.EdgeOnly)
       case EdgeDirection.In =>
-        graph.mapReduceTriplets[Array[Edge[ED]]](
-          edge => Iterator((edge.dstId, Array(new Edge(edge.srcId, edge.dstId, edge.attr)))),
-          (a, b) => a ++ b)
+        graph.aggregateMessages[Array[Edge[ED]]](
+          ctx => ctx.sendToDst(Array(new Edge(ctx.srcId, ctx.dstId, ctx.attr))),
+          (a, b) => a ++ b, TripletFields.EdgeOnly)
       case EdgeDirection.Out =>
-        graph.mapReduceTriplets[Array[Edge[ED]]](
-          edge => Iterator((edge.srcId, Array(new Edge(edge.srcId, edge.dstId, edge.attr)))),
-          (a, b) => a ++ b)
+        graph.aggregateMessages[Array[Edge[ED]]](
+          ctx => ctx.sendToSrc(Array(new Edge(ctx.srcId, ctx.dstId, ctx.attr))),
+          (a, b) => a ++ b, TripletFields.EdgeOnly)
       case EdgeDirection.Both =>
         throw new SparkException("collectEdges does not support EdgeDirection.Both. Use" +
           "EdgeDirection.Either instead.")
@@ -271,6 +278,32 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
    retVal
   }
 
+  /**
+   * Convert bi-directional edges into uni-directional ones.
+   * Some graph algorithms (e.g., TriangleCount) assume that an input graph
+   * has its edges in canonical direction.
+   * This function rewrites the vertex ids of edges so that srcIds are bigger
+   * than dstIds, and merges the duplicated edges.
+   *
+   * @param mergeFunc the user defined reduce function which should
+   * be commutative and associative and is used to combine the output
+   * of the map phase
+   *
+   * @return the resulting graph with canonical edges
+   */
+  def convertToCanonicalEdges(
+      mergeFunc: (ED, ED) => ED = (e1, e2) => e1): Graph[VD, ED] = {
+    val newEdges =
+      graph.edges
+        .map {
+          case e if e.srcId < e.dstId => ((e.srcId, e.dstId), e.attr)
+          case e => ((e.dstId, e.srcId), e.attr)
+        }
+        .reduceByKey(mergeFunc)
+        .map(e => new Edge(e._1._1, e._1._2, e._2))
+    Graph(graph.vertices, newEdges)
+  }
+
   /**
    * Execute a Pregel-like iterative vertex-parallel abstraction.  The
    * user-defined vertex-program `vprog` is executed in parallel on
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala b/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
index 13033fee0e6b5..7372dfbd9fe98 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
@@ -32,9 +32,9 @@ trait PartitionStrategy extends Serializable {
 object PartitionStrategy {
   /**
    * Assigns edges to partitions using a 2D partitioning of the sparse edge adjacency matrix,
-   * guaranteeing a `2 * sqrt(numParts)` bound on vertex replication.
+   * guaranteeing a `2 * sqrt(numParts) - 1` bound on vertex replication.
    *
-   * Suppose we have a graph with 11 vertices that we want to partition
+   * Suppose we have a graph with 12 vertices that we want to partition
    * over 9 machines.  We can use the following sparse matrix representation:
    *
    * <pre>
@@ -61,7 +61,7 @@ object PartitionStrategy {
    * that edges adjacent to `v11` can only be in the first column of blocks `(P0, P3,
    * P6)` or the last
    * row of blocks `(P6, P7, P8)`.  As a consequence we can guarantee that `v11` will need to be
-   * replicated to at most `2 * sqrt(numParts)` machines.
+   * replicated to at most `2 * sqrt(numParts) - 1` machines.
    *
    * Notice that `P0` has many edges and as a consequence this partitioning would lead to poor work
    * balance.  To improve balance we first multiply each vertex id by a large prime to shuffle the
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/TripletFields.java b/graphx/src/main/scala/org/apache/spark/graphx/TripletFields.java
new file mode 100644
index 0000000000000..7eb4ae0f44602
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/TripletFields.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx;
+
+import java.io.Serializable;
+
+/**
+ * Represents a subset of the fields of an [[EdgeTriplet]] or [[EdgeContext]]. This allows the
+ * system to populate only those fields for efficiency.
+ */
+public class TripletFields implements Serializable {
+
+  /** Indicates whether the source vertex attribute is included. */
+  public final boolean useSrc;
+
+  /** Indicates whether the destination vertex attribute is included. */
+  public final boolean useDst;
+
+  /** Indicates whether the edge attribute is included. */
+  public final boolean useEdge;
+
+  /** Constructs a default TripletFields in which all fields are included. */
+  public TripletFields() {
+    this(true, true, true);
+  }
+
+  public TripletFields(boolean useSrc, boolean useDst, boolean useEdge) {
+    this.useSrc = useSrc;
+    this.useDst = useDst;
+    this.useEdge = useEdge;
+  }
+
+  /**
+   * None of the triplet fields are exposed.
+   */
+  public static final TripletFields None = new TripletFields(false, false, false);
+
+  /**
+   * Expose only the edge field and not the source or destination field.
+   */
+  public static final TripletFields EdgeOnly = new TripletFields(false, false, true);
+
+  /**
+   * Expose the source and edge fields but not the destination field. (Same as Src)
+   */
+  public static final TripletFields Src = new TripletFields(true, false, true);
+
+  /**
+   * Expose the destination and edge fields but not the source field. (Same as Dst)
+   */
+  public static final TripletFields Dst = new TripletFields(false, true, true);
+
+  /**
+   * Expose all the fields (source, edge, and destination).
+   */
+  public static final TripletFields All = new TripletFields(true, true, true);
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
index 2c8b245955d12..09ae3f9f6c09b 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
@@ -27,8 +27,7 @@ import org.apache.spark.storage.StorageLevel
 import org.apache.spark.graphx.impl.RoutingTablePartition
 import org.apache.spark.graphx.impl.ShippableVertexPartition
 import org.apache.spark.graphx.impl.VertexAttributeBlock
-import org.apache.spark.graphx.impl.RoutingTableMessageRDDFunctions._
-import org.apache.spark.graphx.impl.VertexRDDFunctions._
+import org.apache.spark.graphx.impl.VertexRDDImpl
 
 /**
  * Extends `RDD[(VertexId, VD)]` by ensuring that there is only one entry for each vertex and by
@@ -55,85 +54,45 @@ import org.apache.spark.graphx.impl.VertexRDDFunctions._
  *
  * @tparam VD the vertex attribute associated with each vertex in the set.
  */
-class VertexRDD[@specialized VD: ClassTag](
-    val partitionsRDD: RDD[ShippableVertexPartition[VD]],
-    val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY)
-  extends RDD[(VertexId, VD)](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
+abstract class VertexRDD[VD](
+    @transient sc: SparkContext,
+    @transient deps: Seq[Dependency[_]]) extends RDD[(VertexId, VD)](sc, deps) {
 
-  require(partitionsRDD.partitioner.isDefined)
+  implicit protected def vdTag: ClassTag[VD]
 
-  /**
-   * Construct a new VertexRDD that is indexed by only the visible vertices. The resulting
-   * VertexRDD will be based on a different index and can no longer be quickly joined with this
-   * RDD.
-   */
-  def reindex(): VertexRDD[VD] = this.withPartitionsRDD(partitionsRDD.map(_.reindex()))
-
-  override val partitioner = partitionsRDD.partitioner
+  private[graphx] def partitionsRDD: RDD[ShippableVertexPartition[VD]]
 
   override protected def getPartitions: Array[Partition] = partitionsRDD.partitions
 
-  override protected def getPreferredLocations(s: Partition): Seq[String] =
-    partitionsRDD.preferredLocations(s)
-
-  override def setName(_name: String): this.type = {
-    if (partitionsRDD.name != null) {
-      partitionsRDD.setName(partitionsRDD.name + ", " + _name)
-    } else {
-      partitionsRDD.setName(_name)
-    }
-    this
-  }
-  setName("VertexRDD")
-
   /**
-   * Persists the vertex partitions at the specified storage level, ignoring any existing target
-   * storage level.
+   * Provides the `RDD[(VertexId, VD)]` equivalent output.
    */
-  override def persist(newLevel: StorageLevel): this.type = {
-    partitionsRDD.persist(newLevel)
-    this
-  }
-
-  override def unpersist(blocking: Boolean = true): this.type = {
-    partitionsRDD.unpersist(blocking)
-    this
-  }
-
-  /** Persists the vertex partitions at `targetStorageLevel`, which defaults to MEMORY_ONLY. */
-  override def cache(): this.type = {
-    partitionsRDD.persist(targetStorageLevel)
-    this
-  }
-
-  /** The number of vertices in the RDD. */
-  override def count(): Long = {
-    partitionsRDD.map(_.size.toLong).reduce(_ + _)
+  override def compute(part: Partition, context: TaskContext): Iterator[(VertexId, VD)] = {
+    firstParent[ShippableVertexPartition[VD]].iterator(part, context).next().iterator
   }
 
   /**
-   * Provides the `RDD[(VertexId, VD)]` equivalent output.
+   * Construct a new VertexRDD that is indexed by only the visible vertices. The resulting
+   * VertexRDD will be based on a different index and can no longer be quickly joined with this
+   * RDD.
    */
-  override def compute(part: Partition, context: TaskContext): Iterator[(VertexId, VD)] = {
-    firstParent[ShippableVertexPartition[VD]].iterator(part, context).next.iterator
-  }
+  def reindex(): VertexRDD[VD]
 
   /**
    * Applies a function to each `VertexPartition` of this RDD and returns a new VertexRDD.
    */
   private[graphx] def mapVertexPartitions[VD2: ClassTag](
       f: ShippableVertexPartition[VD] => ShippableVertexPartition[VD2])
-    : VertexRDD[VD2] = {
-    val newPartitionsRDD = partitionsRDD.mapPartitions(_.map(f), preservesPartitioning = true)
-    this.withPartitionsRDD(newPartitionsRDD)
-  }
-
+    : VertexRDD[VD2]
 
   /**
    * Restricts the vertex set to the set of vertices satisfying the given predicate. This operation
    * preserves the index for efficient joins with the original RDD, and it sets bits in the bitmask
    * rather than allocating new memory.
    *
+   * It is declared and defined here to allow refining the return type from `RDD[(VertexId, VD)]` to
+   * `VertexRDD[VD]`.
+   *
    * @param pred the user defined predicate, which takes a tuple to conform to the
    * `RDD[(VertexId, VD)]` interface
    */
@@ -149,8 +108,7 @@ class VertexRDD[@specialized VD: ClassTag](
    * @return a new VertexRDD with values obtained by applying `f` to each of the entries in the
    * original VertexRDD
    */
-  def mapValues[VD2: ClassTag](f: VD => VD2): VertexRDD[VD2] =
-    this.mapVertexPartitions(_.map((vid, attr) => f(attr)))
+  def mapValues[VD2: ClassTag](f: VD => VD2): VertexRDD[VD2]
 
   /**
    * Maps each vertex attribute, additionally supplying the vertex ID.
@@ -161,23 +119,13 @@ class VertexRDD[@specialized VD: ClassTag](
    * @return a new VertexRDD with values obtained by applying `f` to each of the entries in the
    * original VertexRDD.  The resulting VertexRDD retains the same index.
    */
-  def mapValues[VD2: ClassTag](f: (VertexId, VD) => VD2): VertexRDD[VD2] =
-    this.mapVertexPartitions(_.map(f))
+  def mapValues[VD2: ClassTag](f: (VertexId, VD) => VD2): VertexRDD[VD2]
 
   /**
    * Hides vertices that are the same between `this` and `other`; for vertices that are different,
    * keeps the values from `other`.
    */
-  def diff(other: VertexRDD[VD]): VertexRDD[VD] = {
-    val newPartitionsRDD = partitionsRDD.zipPartitions(
-      other.partitionsRDD, preservesPartitioning = true
-    ) { (thisIter, otherIter) =>
-      val thisPart = thisIter.next()
-      val otherPart = otherIter.next()
-      Iterator(thisPart.diff(otherPart))
-    }
-    this.withPartitionsRDD(newPartitionsRDD)
-  }
+  def diff(other: VertexRDD[VD]): VertexRDD[VD]
 
   /**
    * Left joins this RDD with another VertexRDD with the same index. This function will fail if
@@ -194,16 +142,7 @@ class VertexRDD[@specialized VD: ClassTag](
    * @return a VertexRDD containing the results of `f`
    */
   def leftZipJoin[VD2: ClassTag, VD3: ClassTag]
-      (other: VertexRDD[VD2])(f: (VertexId, VD, Option[VD2]) => VD3): VertexRDD[VD3] = {
-    val newPartitionsRDD = partitionsRDD.zipPartitions(
-      other.partitionsRDD, preservesPartitioning = true
-    ) { (thisIter, otherIter) =>
-      val thisPart = thisIter.next()
-      val otherPart = otherIter.next()
-      Iterator(thisPart.leftJoin(otherPart)(f))
-    }
-    this.withPartitionsRDD(newPartitionsRDD)
-  }
+      (other: VertexRDD[VD2])(f: (VertexId, VD, Option[VD2]) => VD3): VertexRDD[VD3]
 
   /**
    * Left joins this VertexRDD with an RDD containing vertex attribute pairs. If the other RDD is
@@ -224,37 +163,14 @@ class VertexRDD[@specialized VD: ClassTag](
   def leftJoin[VD2: ClassTag, VD3: ClassTag]
       (other: RDD[(VertexId, VD2)])
       (f: (VertexId, VD, Option[VD2]) => VD3)
-    : VertexRDD[VD3] = {
-    // Test if the other vertex is a VertexRDD to choose the optimal join strategy.
-    // If the other set is a VertexRDD then we use the much more efficient leftZipJoin
-    other match {
-      case other: VertexRDD[_] =>
-        leftZipJoin(other)(f)
-      case _ =>
-        this.withPartitionsRDD[VD3](
-          partitionsRDD.zipPartitions(
-            other.copartitionWithVertices(this.partitioner.get), preservesPartitioning = true) {
-            (partIter, msgs) => partIter.map(_.leftJoin(msgs)(f))
-          }
-        )
-    }
-  }
+    : VertexRDD[VD3]
 
   /**
    * Efficiently inner joins this VertexRDD with another VertexRDD sharing the same index. See
    * [[innerJoin]] for the behavior of the join.
    */
   def innerZipJoin[U: ClassTag, VD2: ClassTag](other: VertexRDD[U])
-      (f: (VertexId, VD, U) => VD2): VertexRDD[VD2] = {
-    val newPartitionsRDD = partitionsRDD.zipPartitions(
-      other.partitionsRDD, preservesPartitioning = true
-    ) { (thisIter, otherIter) =>
-      val thisPart = thisIter.next()
-      val otherPart = otherIter.next()
-      Iterator(thisPart.innerJoin(otherPart)(f))
-    }
-    this.withPartitionsRDD(newPartitionsRDD)
-  }
+      (f: (VertexId, VD, U) => VD2): VertexRDD[VD2]
 
   /**
    * Inner joins this VertexRDD with an RDD containing vertex attribute pairs. If the other RDD is
@@ -268,21 +184,7 @@ class VertexRDD[@specialized VD: ClassTag](
    *         `this` and `other`, with values supplied by `f`
    */
   def innerJoin[U: ClassTag, VD2: ClassTag](other: RDD[(VertexId, U)])
-      (f: (VertexId, VD, U) => VD2): VertexRDD[VD2] = {
-    // Test if the other vertex is a VertexRDD to choose the optimal join strategy.
-    // If the other set is a VertexRDD then we use the much more efficient innerZipJoin
-    other match {
-      case other: VertexRDD[_] =>
-        innerZipJoin(other)(f)
-      case _ =>
-        this.withPartitionsRDD(
-          partitionsRDD.zipPartitions(
-            other.copartitionWithVertices(this.partitioner.get), preservesPartitioning = true) {
-            (partIter, msgs) => partIter.map(_.innerJoin(msgs)(f))
-          }
-        )
-    }
-  }
+      (f: (VertexId, VD, U) => VD2): VertexRDD[VD2]
 
   /**
    * Aggregates vertices in `messages` that have the same ids using `reduceFunc`, returning a
@@ -296,38 +198,20 @@ class VertexRDD[@specialized VD: ClassTag](
    * messages.
    */
   def aggregateUsingIndex[VD2: ClassTag](
-      messages: RDD[(VertexId, VD2)], reduceFunc: (VD2, VD2) => VD2): VertexRDD[VD2] = {
-    val shuffled = messages.copartitionWithVertices(this.partitioner.get)
-    val parts = partitionsRDD.zipPartitions(shuffled, true) { (thisIter, msgIter) =>
-      thisIter.map(_.aggregateUsingIndex(msgIter, reduceFunc))
-    }
-    this.withPartitionsRDD[VD2](parts)
-  }
+      messages: RDD[(VertexId, VD2)], reduceFunc: (VD2, VD2) => VD2): VertexRDD[VD2]
 
   /**
    * Returns a new `VertexRDD` reflecting a reversal of all edge directions in the corresponding
    * [[EdgeRDD]].
    */
-  def reverseRoutingTables(): VertexRDD[VD] =
-    this.mapVertexPartitions(vPart => vPart.withRoutingTable(vPart.routingTable.reverse))
+  def reverseRoutingTables(): VertexRDD[VD]
 
   /** Prepares this VertexRDD for efficient joins with the given EdgeRDD. */
-  def withEdges(edges: EdgeRDD[_, _]): VertexRDD[VD] = {
-    val routingTables = VertexRDD.createRoutingTables(edges, this.partitioner.get)
-    val vertexPartitions = partitionsRDD.zipPartitions(routingTables, true) {
-      (partIter, routingTableIter) =>
-        val routingTable =
-          if (routingTableIter.hasNext) routingTableIter.next() else RoutingTablePartition.empty
-        partIter.map(_.withRoutingTable(routingTable))
-    }
-    this.withPartitionsRDD(vertexPartitions)
-  }
+  def withEdges(edges: EdgeRDD[_]): VertexRDD[VD]
 
   /** Replaces the vertex partitions while preserving all other properties of the VertexRDD. */
   private[graphx] def withPartitionsRDD[VD2: ClassTag](
-      partitionsRDD: RDD[ShippableVertexPartition[VD2]]): VertexRDD[VD2] = {
-    new VertexRDD(partitionsRDD, this.targetStorageLevel)
-  }
+      partitionsRDD: RDD[ShippableVertexPartition[VD2]]): VertexRDD[VD2]
 
   /**
    * Changes the target storage level while preserving all other properties of the
@@ -337,20 +221,14 @@ class VertexRDD[@specialized VD: ClassTag](
    * [[org.apache.spark.graphx.VertexRDD#cache]] on the returned VertexRDD.
    */
   private[graphx] def withTargetStorageLevel(
-      targetStorageLevel: StorageLevel): VertexRDD[VD] = {
-    new VertexRDD(this.partitionsRDD, targetStorageLevel)
-  }
+      targetStorageLevel: StorageLevel): VertexRDD[VD]
 
   /** Generates an RDD of vertex attributes suitable for shipping to the edge partitions. */
   private[graphx] def shipVertexAttributes(
-      shipSrc: Boolean, shipDst: Boolean): RDD[(PartitionID, VertexAttributeBlock[VD])] = {
-    partitionsRDD.mapPartitions(_.flatMap(_.shipVertexAttributes(shipSrc, shipDst)))
-  }
+      shipSrc: Boolean, shipDst: Boolean): RDD[(PartitionID, VertexAttributeBlock[VD])]
 
   /** Generates an RDD of vertex IDs suitable for shipping to the edge partitions. */
-  private[graphx] def shipVertexIds(): RDD[(PartitionID, Array[VertexId])] = {
-    partitionsRDD.mapPartitions(_.flatMap(_.shipVertexIds()))
-  }
+  private[graphx] def shipVertexIds(): RDD[(PartitionID, Array[VertexId])]
 
 } // end of VertexRDD
 
@@ -371,12 +249,12 @@ object VertexRDD {
   def apply[VD: ClassTag](vertices: RDD[(VertexId, VD)]): VertexRDD[VD] = {
     val vPartitioned: RDD[(VertexId, VD)] = vertices.partitioner match {
       case Some(p) => vertices
-      case None => vertices.copartitionWithVertices(new HashPartitioner(vertices.partitions.size))
+      case None => vertices.partitionBy(new HashPartitioner(vertices.partitions.size))
     }
     val vertexPartitions = vPartitioned.mapPartitions(
       iter => Iterator(ShippableVertexPartition(iter)),
       preservesPartitioning = true)
-    new VertexRDD(vertexPartitions)
+    new VertexRDDImpl(vertexPartitions)
   }
 
   /**
@@ -391,7 +269,7 @@ object VertexRDD {
    * @param defaultVal the vertex attribute to use when creating missing vertices
    */
   def apply[VD: ClassTag](
-      vertices: RDD[(VertexId, VD)], edges: EdgeRDD[_, _], defaultVal: VD): VertexRDD[VD] = {
+      vertices: RDD[(VertexId, VD)], edges: EdgeRDD[_], defaultVal: VD): VertexRDD[VD] = {
     VertexRDD(vertices, edges, defaultVal, (a, b) => a)
   }
 
@@ -408,11 +286,11 @@ object VertexRDD {
    * @param mergeFunc the commutative, associative duplicate vertex attribute merge function
    */
   def apply[VD: ClassTag](
-      vertices: RDD[(VertexId, VD)], edges: EdgeRDD[_, _], defaultVal: VD, mergeFunc: (VD, VD) => VD
+      vertices: RDD[(VertexId, VD)], edges: EdgeRDD[_], defaultVal: VD, mergeFunc: (VD, VD) => VD
     ): VertexRDD[VD] = {
     val vPartitioned: RDD[(VertexId, VD)] = vertices.partitioner match {
       case Some(p) => vertices
-      case None => vertices.copartitionWithVertices(new HashPartitioner(vertices.partitions.size))
+      case None => vertices.partitionBy(new HashPartitioner(vertices.partitions.size))
     }
     val routingTables = createRoutingTables(edges, vPartitioned.partitioner.get)
     val vertexPartitions = vPartitioned.zipPartitions(routingTables, preservesPartitioning = true) {
@@ -421,7 +299,7 @@ object VertexRDD {
           if (routingTableIter.hasNext) routingTableIter.next() else RoutingTablePartition.empty
         Iterator(ShippableVertexPartition(vertexIter, routingTable, defaultVal, mergeFunc))
     }
-    new VertexRDD(vertexPartitions)
+    new VertexRDDImpl(vertexPartitions)
   }
 
   /**
@@ -436,25 +314,25 @@ object VertexRDD {
    * @param defaultVal the vertex attribute to use when creating missing vertices
    */
   def fromEdges[VD: ClassTag](
-      edges: EdgeRDD[_, _], numPartitions: Int, defaultVal: VD): VertexRDD[VD] = {
+      edges: EdgeRDD[_], numPartitions: Int, defaultVal: VD): VertexRDD[VD] = {
     val routingTables = createRoutingTables(edges, new HashPartitioner(numPartitions))
     val vertexPartitions = routingTables.mapPartitions({ routingTableIter =>
       val routingTable =
         if (routingTableIter.hasNext) routingTableIter.next() else RoutingTablePartition.empty
       Iterator(ShippableVertexPartition(Iterator.empty, routingTable, defaultVal))
     }, preservesPartitioning = true)
-    new VertexRDD(vertexPartitions)
+    new VertexRDDImpl(vertexPartitions)
   }
 
-  private def createRoutingTables(
-      edges: EdgeRDD[_, _], vertexPartitioner: Partitioner): RDD[RoutingTablePartition] = {
+  private[graphx] def createRoutingTables(
+      edges: EdgeRDD[_], vertexPartitioner: Partitioner): RDD[RoutingTablePartition] = {
     // Determine which vertices each edge partition needs by creating a mapping from vid to pid.
     val vid2pid = edges.partitionsRDD.mapPartitions(_.flatMap(
       Function.tupled(RoutingTablePartition.edgePartitionToMsgs)))
       .setName("VertexRDD.createRoutingTables - vid2pid (aggregation)")
 
     val numEdgePartitions = edges.partitions.size
-    vid2pid.copartitionWithVertices(vertexPartitioner).mapPartitions(
+    vid2pid.partitionBy(vertexPartitioner).mapPartitions(
       iter => Iterator(RoutingTablePartition.fromMsgs(numEdgePartitions, iter)),
       preservesPartitioning = true)
   }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeActiveness.java b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeActiveness.java
new file mode 100644
index 0000000000000..377ae849f045c
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeActiveness.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl;
+
+/**
+ * Criteria for filtering edges based on activeness. For internal use only.
+ */
+public enum EdgeActiveness {
+  /** Neither the source vertex nor the destination vertex need be active. */
+  Neither,
+  /** The source vertex must be active. */
+  SrcOnly,
+  /** The destination vertex must be active. */
+  DstOnly,
+  /** Both vertices must be active. */
+  Both,
+  /** At least one vertex must be active. */
+  Either
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
index a5c9cd1f8b4e6..373af75448374 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
@@ -21,63 +21,94 @@ import scala.reflect.{classTag, ClassTag}
 
 import org.apache.spark.graphx._
 import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
+import org.apache.spark.util.collection.BitSet
 
 /**
- * A collection of edges stored in columnar format, along with any vertex attributes referenced. The
- * edges are stored in 3 large columnar arrays (src, dst, attribute). The arrays are clustered by
- * src. There is an optional active vertex set for filtering computation on the edges.
+ * A collection of edges, along with referenced vertex attributes and an optional active vertex set
+ * for filtering computation on the edges.
+ *
+ * The edges are stored in columnar format in `localSrcIds`, `localDstIds`, and `data`. All
+ * referenced global vertex ids are mapped to a compact set of local vertex ids according to the
+ * `global2local` map. Each local vertex id is a valid index into `vertexAttrs`, which stores the
+ * corresponding vertex attribute, and `local2global`, which stores the reverse mapping to global
+ * vertex id. The global vertex ids that are active are optionally stored in `activeSet`.
+ *
+ * The edges are clustered by source vertex id, and the mapping from global vertex id to the index
+ * of the corresponding edge cluster is stored in `index`.
  *
  * @tparam ED the edge attribute type
  * @tparam VD the vertex attribute type
  *
- * @param srcIds the source vertex id of each edge
- * @param dstIds the destination vertex id of each edge
+ * @param localSrcIds the local source vertex id of each edge as an index into `local2global` and
+ *   `vertexAttrs`
+ * @param localDstIds the local destination vertex id of each edge as an index into `local2global`
+ *   and `vertexAttrs`
  * @param data the attribute associated with each edge
- * @param index a clustered index on source vertex id
- * @param vertices a map from referenced vertex ids to their corresponding attributes. Must
- *   contain all vertex ids from `srcIds` and `dstIds`, though not necessarily valid attributes for
- *   those vertex ids. The mask is not used.
+ * @param index a clustered index on source vertex id as a map from each global source vertex id to
+ *   the offset in the edge arrays where the cluster for that vertex id begins
+ * @param global2local a map from referenced vertex ids to local ids which index into vertexAttrs
+ * @param local2global an array of global vertex ids where the offsets are local vertex ids
+ * @param vertexAttrs an array of vertex attributes where the offsets are local vertex ids
  * @param activeSet an optional active vertex set for filtering computation on the edges
  */
 private[graphx]
 class EdgePartition[
     @specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED: ClassTag, VD: ClassTag](
-    val srcIds: Array[VertexId] = null,
-    val dstIds: Array[VertexId] = null,
-    val data: Array[ED] = null,
-    val index: GraphXPrimitiveKeyOpenHashMap[VertexId, Int] = null,
-    val vertices: VertexPartition[VD] = null,
-    val activeSet: Option[VertexSet] = None
-  ) extends Serializable {
+    localSrcIds: Array[Int],
+    localDstIds: Array[Int],
+    data: Array[ED],
+    index: GraphXPrimitiveKeyOpenHashMap[VertexId, Int],
+    global2local: GraphXPrimitiveKeyOpenHashMap[VertexId, Int],
+    local2global: Array[VertexId],
+    vertexAttrs: Array[VD],
+    activeSet: Option[VertexSet])
+  extends Serializable {
 
-  /** Return a new `EdgePartition` with the specified edge data. */
-  def withData[ED2: ClassTag](data_ : Array[ED2]): EdgePartition[ED2, VD] = {
-    new EdgePartition(srcIds, dstIds, data_, index, vertices, activeSet)
-  }
+  /** No-arg constructor for serialization. */
+  private def this() = this(null, null, null, null, null, null, null, null)
 
-  /** Return a new `EdgePartition` with the specified vertex partition. */
-  def withVertices[VD2: ClassTag](
-      vertices_ : VertexPartition[VD2]): EdgePartition[ED, VD2] = {
-    new EdgePartition(srcIds, dstIds, data, index, vertices_, activeSet)
+  /** Return a new `EdgePartition` with the specified edge data. */
+  def withData[ED2: ClassTag](data: Array[ED2]): EdgePartition[ED2, VD] = {
+    new EdgePartition(
+      localSrcIds, localDstIds, data, index, global2local, local2global, vertexAttrs, activeSet)
   }
 
   /** Return a new `EdgePartition` with the specified active set, provided as an iterator. */
   def withActiveSet(iter: Iterator[VertexId]): EdgePartition[ED, VD] = {
-    val newActiveSet = new VertexSet
-    iter.foreach(newActiveSet.add(_))
-    new EdgePartition(srcIds, dstIds, data, index, vertices, Some(newActiveSet))
-  }
-
-  /** Return a new `EdgePartition` with the specified active set. */
-  def withActiveSet(activeSet_ : Option[VertexSet]): EdgePartition[ED, VD] = {
-    new EdgePartition(srcIds, dstIds, data, index, vertices, activeSet_)
+    val activeSet = new VertexSet
+    while (iter.hasNext) { activeSet.add(iter.next()) }
+    new EdgePartition(
+      localSrcIds, localDstIds, data, index, global2local, local2global, vertexAttrs,
+      Some(activeSet))
   }
 
   /** Return a new `EdgePartition` with updates to vertex attributes specified in `iter`. */
   def updateVertices(iter: Iterator[(VertexId, VD)]): EdgePartition[ED, VD] = {
-    this.withVertices(vertices.innerJoinKeepLeft(iter))
+    val newVertexAttrs = new Array[VD](vertexAttrs.length)
+    System.arraycopy(vertexAttrs, 0, newVertexAttrs, 0, vertexAttrs.length)
+    while (iter.hasNext) {
+      val kv = iter.next()
+      newVertexAttrs(global2local(kv._1)) = kv._2
+    }
+    new EdgePartition(
+      localSrcIds, localDstIds, data, index, global2local, local2global, newVertexAttrs,
+      activeSet)
   }
 
+  /** Return a new `EdgePartition` without any locally cached vertex attributes. */
+  def withoutVertexAttributes[VD2: ClassTag](): EdgePartition[ED, VD2] = {
+    val newVertexAttrs = new Array[VD2](vertexAttrs.length)
+    new EdgePartition(
+      localSrcIds, localDstIds, data, index, global2local, local2global, newVertexAttrs,
+      activeSet)
+  }
+
+  @inline private def srcIds(pos: Int): VertexId = local2global(localSrcIds(pos))
+
+  @inline private def dstIds(pos: Int): VertexId = local2global(localDstIds(pos))
+
+  @inline private def attrs(pos: Int): ED = data(pos)
+
   /** Look up vid in activeSet, throwing an exception if it is None. */
   def isActive(vid: VertexId): Boolean = {
     activeSet.get.contains(vid)
@@ -92,11 +123,19 @@ class EdgePartition[
    * @return a new edge partition with all edges reversed.
    */
   def reverse: EdgePartition[ED, VD] = {
-    val builder = new EdgePartitionBuilder(size)(classTag[ED], classTag[VD])
-    for (e <- iterator) {
-      builder.add(e.dstId, e.srcId, e.attr)
+    val builder = new ExistingEdgePartitionBuilder[ED, VD](
+      global2local, local2global, vertexAttrs, activeSet, size)
+    var i = 0
+    while (i < size) {
+      val localSrcId = localSrcIds(i)
+      val localDstId = localDstIds(i)
+      val srcId = local2global(localSrcId)
+      val dstId = local2global(localDstId)
+      val attr = data(i)
+      builder.add(dstId, srcId, localDstId, localSrcId, attr)
+      i += 1
     }
-    builder.toEdgePartition.withVertices(vertices).withActiveSet(activeSet)
+    builder.toEdgePartition
   }
 
   /**
@@ -157,13 +196,25 @@ class EdgePartition[
   def filter(
       epred: EdgeTriplet[VD, ED] => Boolean,
       vpred: (VertexId, VD) => Boolean): EdgePartition[ED, VD] = {
-    val filtered = tripletIterator().filter(et =>
-      vpred(et.srcId, et.srcAttr) && vpred(et.dstId, et.dstAttr) && epred(et))
-    val builder = new EdgePartitionBuilder[ED, VD]
-    for (e <- filtered) {
-      builder.add(e.srcId, e.dstId, e.attr)
+    val builder = new ExistingEdgePartitionBuilder[ED, VD](
+      global2local, local2global, vertexAttrs, activeSet)
+    var i = 0
+    while (i < size) {
+      // The user sees the EdgeTriplet, so we can't reuse it and must create one per edge.
+      val localSrcId = localSrcIds(i)
+      val localDstId = localDstIds(i)
+      val et = new EdgeTriplet[VD, ED]
+      et.srcId = local2global(localSrcId)
+      et.dstId = local2global(localDstId)
+      et.srcAttr = vertexAttrs(localSrcId)
+      et.dstAttr = vertexAttrs(localDstId)
+      et.attr = data(i)
+      if (vpred(et.srcId, et.srcAttr) && vpred(et.dstId, et.dstAttr) && epred(et)) {
+        builder.add(et.srcId, et.dstId, localSrcId, localDstId, et.attr)
+      }
+      i += 1
     }
-    builder.toEdgePartition.withVertices(vertices).withActiveSet(activeSet)
+    builder.toEdgePartition
   }
 
   /**
@@ -183,28 +234,40 @@ class EdgePartition[
    * @return a new edge partition without duplicate edges
    */
   def groupEdges(merge: (ED, ED) => ED): EdgePartition[ED, VD] = {
-    val builder = new EdgePartitionBuilder[ED, VD]
+    val builder = new ExistingEdgePartitionBuilder[ED, VD](
+      global2local, local2global, vertexAttrs, activeSet)
     var currSrcId: VertexId = null.asInstanceOf[VertexId]
     var currDstId: VertexId = null.asInstanceOf[VertexId]
+    var currLocalSrcId = -1
+    var currLocalDstId = -1
     var currAttr: ED = null.asInstanceOf[ED]
+    // Iterate through the edges, accumulating runs of identical edges using the curr* variables and
+    // releasing them to the builder when we see the beginning of the next run
     var i = 0
     while (i < size) {
       if (i > 0 && currSrcId == srcIds(i) && currDstId == dstIds(i)) {
+        // This edge should be accumulated into the existing run
         currAttr = merge(currAttr, data(i))
       } else {
+        // This edge starts a new run of edges
         if (i > 0) {
-          builder.add(currSrcId, currDstId, currAttr)
+          // First release the existing run to the builder
+          builder.add(currSrcId, currDstId, currLocalSrcId, currLocalDstId, currAttr)
         }
+        // Then start accumulating for a new run
         currSrcId = srcIds(i)
         currDstId = dstIds(i)
+        currLocalSrcId = localSrcIds(i)
+        currLocalDstId = localDstIds(i)
         currAttr = data(i)
       }
       i += 1
     }
+    // Finally, release the last accumulated run
     if (size > 0) {
-      builder.add(currSrcId, currDstId, currAttr)
+      builder.add(currSrcId, currDstId, currLocalSrcId, currLocalDstId, currAttr)
     }
-    builder.toEdgePartition.withVertices(vertices).withActiveSet(activeSet)
+    builder.toEdgePartition
   }
 
   /**
@@ -220,7 +283,8 @@ class EdgePartition[
   def innerJoin[ED2: ClassTag, ED3: ClassTag]
       (other: EdgePartition[ED2, _])
       (f: (VertexId, VertexId, ED, ED2) => ED3): EdgePartition[ED3, VD] = {
-    val builder = new EdgePartitionBuilder[ED3, VD]
+    val builder = new ExistingEdgePartitionBuilder[ED3, VD](
+      global2local, local2global, vertexAttrs, activeSet)
     var i = 0
     var j = 0
     // For i = index of each edge in `this`...
@@ -233,12 +297,13 @@ class EdgePartition[
         while (j < other.size && other.srcIds(j) == srcId && other.dstIds(j) < dstId) { j += 1 }
         if (j < other.size && other.srcIds(j) == srcId && other.dstIds(j) == dstId) {
           // ... run `f` on the matching edge
-          builder.add(srcId, dstId, f(srcId, dstId, this.data(i), other.data(j)))
+          builder.add(srcId, dstId, localSrcIds(i), localDstIds(i),
+            f(srcId, dstId, this.data(i), other.attrs(j)))
         }
       }
       i += 1
     }
-    builder.toEdgePartition.withVertices(vertices).withActiveSet(activeSet)
+    builder.toEdgePartition
   }
 
   /**
@@ -246,7 +311,7 @@ class EdgePartition[
    *
    * @return size of the partition
    */
-  val size: Int = srcIds.size
+  val size: Int = localSrcIds.size
 
   /** The number of unique source vertices in the partition. */
   def indexSize: Int = index.size
@@ -280,55 +345,198 @@ class EdgePartition[
    * It is safe to keep references to the objects from this iterator.
    */
   def tripletIterator(
-      includeSrc: Boolean = true, includeDst: Boolean = true): Iterator[EdgeTriplet[VD, ED]] = {
-    new EdgeTripletIterator(this, includeSrc, includeDst)
+      includeSrc: Boolean = true, includeDst: Boolean = true)
+      : Iterator[EdgeTriplet[VD, ED]] = new Iterator[EdgeTriplet[VD, ED]] {
+    private[this] var pos = 0
+
+    override def hasNext: Boolean = pos < EdgePartition.this.size
+
+    override def next() = {
+      val triplet = new EdgeTriplet[VD, ED]
+      val localSrcId = localSrcIds(pos)
+      val localDstId = localDstIds(pos)
+      triplet.srcId = local2global(localSrcId)
+      triplet.dstId = local2global(localDstId)
+      if (includeSrc) {
+        triplet.srcAttr = vertexAttrs(localSrcId)
+      }
+      if (includeDst) {
+        triplet.dstAttr = vertexAttrs(localDstId)
+      }
+      triplet.attr = data(pos)
+      pos += 1
+      triplet
+    }
   }
 
   /**
-   * Upgrade the given edge iterator into a triplet iterator.
+   * Send messages along edges and aggregate them at the receiving vertices. Implemented by scanning
+   * all edges sequentially.
    *
-   * Be careful not to keep references to the objects from this iterator. To improve GC performance
-   * the same object is re-used in `next()`.
+   * @param sendMsg generates messages to neighboring vertices of an edge
+   * @param mergeMsg the combiner applied to messages destined to the same vertex
+   * @param tripletFields which triplet fields `sendMsg` uses
+   * @param activeness criteria for filtering edges based on activeness
+   *
+   * @return iterator aggregated messages keyed by the receiving vertex id
    */
-  def upgradeIterator(
-      edgeIter: Iterator[Edge[ED]], includeSrc: Boolean = true, includeDst: Boolean = true)
-    : Iterator[EdgeTriplet[VD, ED]] = {
-    new ReusingEdgeTripletIterator(edgeIter, this, includeSrc, includeDst)
+  def aggregateMessagesEdgeScan[A: ClassTag](
+      sendMsg: EdgeContext[VD, ED, A] => Unit,
+      mergeMsg: (A, A) => A,
+      tripletFields: TripletFields,
+      activeness: EdgeActiveness): Iterator[(VertexId, A)] = {
+    val aggregates = new Array[A](vertexAttrs.length)
+    val bitset = new BitSet(vertexAttrs.length)
+
+    var ctx = new AggregatingEdgeContext[VD, ED, A](mergeMsg, aggregates, bitset)
+    var i = 0
+    while (i < size) {
+      val localSrcId = localSrcIds(i)
+      val srcId = local2global(localSrcId)
+      val localDstId = localDstIds(i)
+      val dstId = local2global(localDstId)
+      val edgeIsActive =
+        if (activeness == EdgeActiveness.Neither) true
+        else if (activeness == EdgeActiveness.SrcOnly) isActive(srcId)
+        else if (activeness == EdgeActiveness.DstOnly) isActive(dstId)
+        else if (activeness == EdgeActiveness.Both) isActive(srcId) && isActive(dstId)
+        else if (activeness == EdgeActiveness.Either) isActive(srcId) || isActive(dstId)
+        else throw new Exception("unreachable")
+      if (edgeIsActive) {
+        val srcAttr = if (tripletFields.useSrc) vertexAttrs(localSrcId) else null.asInstanceOf[VD]
+        val dstAttr = if (tripletFields.useDst) vertexAttrs(localDstId) else null.asInstanceOf[VD]
+        ctx.set(srcId, dstId, localSrcId, localDstId, srcAttr, dstAttr, data(i))
+        sendMsg(ctx)
+      }
+      i += 1
+    }
+
+    bitset.iterator.map { localId => (local2global(localId), aggregates(localId)) }
   }
 
   /**
-   * Get an iterator over the edges in this partition whose source vertex ids match srcIdPred. The
-   * iterator is generated using an index scan, so it is efficient at skipping edges that don't
-   * match srcIdPred.
+   * Send messages along edges and aggregate them at the receiving vertices. Implemented by
+   * filtering the source vertex index, then scanning each edge cluster.
    *
-   * Be careful not to keep references to the objects from this iterator. To improve GC performance
-   * the same object is re-used in `next()`.
-   */
-  def indexIterator(srcIdPred: VertexId => Boolean): Iterator[Edge[ED]] =
-    index.iterator.filter(kv => srcIdPred(kv._1)).flatMap(Function.tupled(clusterIterator))
-
-  /**
-   * Get an iterator over the cluster of edges in this partition with source vertex id `srcId`. The
-   * cluster must start at position `index`.
+   * @param sendMsg generates messages to neighboring vertices of an edge
+   * @param mergeMsg the combiner applied to messages destined to the same vertex
+   * @param tripletFields which triplet fields `sendMsg` uses
+   * @param activeness criteria for filtering edges based on activeness
    *
-   * Be careful not to keep references to the objects from this iterator. To improve GC performance
-   * the same object is re-used in `next()`.
+   * @return iterator aggregated messages keyed by the receiving vertex id
    */
-  private def clusterIterator(srcId: VertexId, index: Int) = new Iterator[Edge[ED]] {
-    private[this] val edge = new Edge[ED]
-    private[this] var pos = index
+  def aggregateMessagesIndexScan[A: ClassTag](
+      sendMsg: EdgeContext[VD, ED, A] => Unit,
+      mergeMsg: (A, A) => A,
+      tripletFields: TripletFields,
+      activeness: EdgeActiveness): Iterator[(VertexId, A)] = {
+    val aggregates = new Array[A](vertexAttrs.length)
+    val bitset = new BitSet(vertexAttrs.length)
+
+    var ctx = new AggregatingEdgeContext[VD, ED, A](mergeMsg, aggregates, bitset)
+    index.iterator.foreach { cluster =>
+      val clusterSrcId = cluster._1
+      val clusterPos = cluster._2
+      val clusterLocalSrcId = localSrcIds(clusterPos)
 
-    override def hasNext: Boolean = {
-      pos >= 0 && pos < EdgePartition.this.size && srcIds(pos) == srcId
+      val scanCluster =
+        if (activeness == EdgeActiveness.Neither) true
+        else if (activeness == EdgeActiveness.SrcOnly) isActive(clusterSrcId)
+        else if (activeness == EdgeActiveness.DstOnly) true
+        else if (activeness == EdgeActiveness.Both) isActive(clusterSrcId)
+        else if (activeness == EdgeActiveness.Either) true
+        else throw new Exception("unreachable")
+
+      if (scanCluster) {
+        var pos = clusterPos
+        val srcAttr =
+          if (tripletFields.useSrc) vertexAttrs(clusterLocalSrcId) else null.asInstanceOf[VD]
+        ctx.setSrcOnly(clusterSrcId, clusterLocalSrcId, srcAttr)
+        while (pos < size && localSrcIds(pos) == clusterLocalSrcId) {
+          val localDstId = localDstIds(pos)
+          val dstId = local2global(localDstId)
+          val edgeIsActive =
+            if (activeness == EdgeActiveness.Neither) true
+            else if (activeness == EdgeActiveness.SrcOnly) true
+            else if (activeness == EdgeActiveness.DstOnly) isActive(dstId)
+            else if (activeness == EdgeActiveness.Both) isActive(dstId)
+            else if (activeness == EdgeActiveness.Either) isActive(clusterSrcId) || isActive(dstId)
+            else throw new Exception("unreachable")
+          if (edgeIsActive) {
+            val dstAttr =
+              if (tripletFields.useDst) vertexAttrs(localDstId) else null.asInstanceOf[VD]
+            ctx.setRest(dstId, localDstId, dstAttr, data(pos))
+            sendMsg(ctx)
+          }
+          pos += 1
+        }
+      }
     }
 
-    override def next(): Edge[ED] = {
-      assert(srcIds(pos) == srcId)
-      edge.srcId = srcIds(pos)
-      edge.dstId = dstIds(pos)
-      edge.attr = data(pos)
-      pos += 1
-      edge
+    bitset.iterator.map { localId => (local2global(localId), aggregates(localId)) }
+  }
+}
+
+private class AggregatingEdgeContext[VD, ED, A](
+    mergeMsg: (A, A) => A,
+    aggregates: Array[A],
+    bitset: BitSet)
+  extends EdgeContext[VD, ED, A] {
+
+  private[this] var _srcId: VertexId = _
+  private[this] var _dstId: VertexId = _
+  private[this] var _localSrcId: Int = _
+  private[this] var _localDstId: Int = _
+  private[this] var _srcAttr: VD = _
+  private[this] var _dstAttr: VD = _
+  private[this] var _attr: ED = _
+
+  def set(
+      srcId: VertexId, dstId: VertexId,
+      localSrcId: Int, localDstId: Int,
+      srcAttr: VD, dstAttr: VD,
+      attr: ED) {
+    _srcId = srcId
+    _dstId = dstId
+    _localSrcId = localSrcId
+    _localDstId = localDstId
+    _srcAttr = srcAttr
+    _dstAttr = dstAttr
+    _attr = attr
+  }
+
+  def setSrcOnly(srcId: VertexId, localSrcId: Int, srcAttr: VD) {
+    _srcId = srcId
+    _localSrcId = localSrcId
+    _srcAttr = srcAttr
+  }
+
+  def setRest(dstId: VertexId, localDstId: Int, dstAttr: VD, attr: ED) {
+    _dstId = dstId
+    _localDstId = localDstId
+    _dstAttr = dstAttr
+    _attr = attr
+  }
+
+  override def srcId = _srcId
+  override def dstId = _dstId
+  override def srcAttr = _srcAttr
+  override def dstAttr = _dstAttr
+  override def attr = _attr
+
+  override def sendToSrc(msg: A) {
+    send(_localSrcId, msg)
+  }
+  override def sendToDst(msg: A) {
+    send(_localDstId, msg)
+  }
+
+  @inline private def send(localId: Int, msg: A) {
+    if (bitset.get(localId)) {
+      aggregates(localId) = mergeMsg(aggregates(localId), msg)
+    } else {
+      aggregates(localId) = msg
+      bitset.set(localId)
     }
   }
 }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
index 4520beb991515..906d42328fcb9 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
@@ -18,17 +18,16 @@
 package org.apache.spark.graphx.impl
 
 import scala.reflect.ClassTag
-import scala.util.Sorting
-
-import org.apache.spark.util.collection.{BitSet, OpenHashSet, PrimitiveVector}
 
 import org.apache.spark.graphx._
 import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
+import org.apache.spark.util.collection.{SortDataFormat, Sorter, PrimitiveVector}
 
+/** Constructs an EdgePartition from scratch. */
 private[graphx]
 class EdgePartitionBuilder[@specialized(Long, Int, Double) ED: ClassTag, VD: ClassTag](
     size: Int = 64) {
-  var edges = new PrimitiveVector[Edge[ED]](size)
+  private[this] val edges = new PrimitiveVector[Edge[ED]](size)
 
   /** Add a new edge to the partition. */
   def add(src: VertexId, dst: VertexId, d: ED) {
@@ -37,20 +36,81 @@ class EdgePartitionBuilder[@specialized(Long, Int, Double) ED: ClassTag, VD: Cla
 
   def toEdgePartition: EdgePartition[ED, VD] = {
     val edgeArray = edges.trim().array
-    Sorting.quickSort(edgeArray)(Edge.lexicographicOrdering)
-    val srcIds = new Array[VertexId](edgeArray.size)
-    val dstIds = new Array[VertexId](edgeArray.size)
+    new Sorter(Edge.edgeArraySortDataFormat[ED])
+      .sort(edgeArray, 0, edgeArray.length, Edge.lexicographicOrdering)
+    val localSrcIds = new Array[Int](edgeArray.size)
+    val localDstIds = new Array[Int](edgeArray.size)
+    val data = new Array[ED](edgeArray.size)
+    val index = new GraphXPrimitiveKeyOpenHashMap[VertexId, Int]
+    val global2local = new GraphXPrimitiveKeyOpenHashMap[VertexId, Int]
+    val local2global = new PrimitiveVector[VertexId]
+    var vertexAttrs = Array.empty[VD]
+    // Copy edges into columnar structures, tracking the beginnings of source vertex id clusters and
+    // adding them to the index. Also populate a map from vertex id to a sequential local offset.
+    if (edgeArray.length > 0) {
+      index.update(edgeArray(0).srcId, 0)
+      var currSrcId: VertexId = edgeArray(0).srcId
+      var currLocalId = -1
+      var i = 0
+      while (i < edgeArray.size) {
+        val srcId = edgeArray(i).srcId
+        val dstId = edgeArray(i).dstId
+        localSrcIds(i) = global2local.changeValue(srcId,
+          { currLocalId += 1; local2global += srcId; currLocalId }, identity)
+        localDstIds(i) = global2local.changeValue(dstId,
+          { currLocalId += 1; local2global += dstId; currLocalId }, identity)
+        data(i) = edgeArray(i).attr
+        if (srcId != currSrcId) {
+          currSrcId = srcId
+          index.update(currSrcId, i)
+        }
+
+        i += 1
+      }
+      vertexAttrs = new Array[VD](currLocalId + 1)
+    }
+    new EdgePartition(
+      localSrcIds, localDstIds, data, index, global2local, local2global.trim().array, vertexAttrs,
+      None)
+  }
+}
+
+/**
+ * Constructs an EdgePartition from an existing EdgePartition with the same vertex set. This enables
+ * reuse of the local vertex ids. Intended for internal use in EdgePartition only.
+ */
+private[impl]
+class ExistingEdgePartitionBuilder[
+    @specialized(Long, Int, Double) ED: ClassTag, VD: ClassTag](
+    global2local: GraphXPrimitiveKeyOpenHashMap[VertexId, Int],
+    local2global: Array[VertexId],
+    vertexAttrs: Array[VD],
+    activeSet: Option[VertexSet],
+    size: Int = 64) {
+  private[this] val edges = new PrimitiveVector[EdgeWithLocalIds[ED]](size)
+
+  /** Add a new edge to the partition. */
+  def add(src: VertexId, dst: VertexId, localSrc: Int, localDst: Int, d: ED) {
+    edges += EdgeWithLocalIds(src, dst, localSrc, localDst, d)
+  }
+
+  def toEdgePartition: EdgePartition[ED, VD] = {
+    val edgeArray = edges.trim().array
+    new Sorter(EdgeWithLocalIds.edgeArraySortDataFormat[ED])
+      .sort(edgeArray, 0, edgeArray.length, EdgeWithLocalIds.lexicographicOrdering)
+    val localSrcIds = new Array[Int](edgeArray.size)
+    val localDstIds = new Array[Int](edgeArray.size)
     val data = new Array[ED](edgeArray.size)
     val index = new GraphXPrimitiveKeyOpenHashMap[VertexId, Int]
     // Copy edges into columnar structures, tracking the beginnings of source vertex id clusters and
     // adding them to the index
     if (edgeArray.length > 0) {
-      index.update(srcIds(0), 0)
-      var currSrcId: VertexId = srcIds(0)
+      index.update(edgeArray(0).srcId, 0)
+      var currSrcId: VertexId = edgeArray(0).srcId
       var i = 0
       while (i < edgeArray.size) {
-        srcIds(i) = edgeArray(i).srcId
-        dstIds(i) = edgeArray(i).dstId
+        localSrcIds(i) = edgeArray(i).localSrcId
+        localDstIds(i) = edgeArray(i).localDstId
         data(i) = edgeArray(i).attr
         if (edgeArray(i).srcId != currSrcId) {
           currSrcId = edgeArray(i).srcId
@@ -60,13 +120,54 @@ class EdgePartitionBuilder[@specialized(Long, Int, Double) ED: ClassTag, VD: Cla
       }
     }
 
-    // Create and populate a VertexPartition with vids from the edges, but no attributes
-    val vidsIter = srcIds.iterator ++ dstIds.iterator
-    val vertexIds = new OpenHashSet[VertexId]
-    vidsIter.foreach(vid => vertexIds.add(vid))
-    val vertices = new VertexPartition(
-      vertexIds, new Array[VD](vertexIds.capacity), vertexIds.getBitSet)
+    new EdgePartition(
+      localSrcIds, localDstIds, data, index, global2local, local2global, vertexAttrs, activeSet)
+  }
+}
+
+private[impl] case class EdgeWithLocalIds[@specialized ED](
+    srcId: VertexId, dstId: VertexId, localSrcId: Int, localDstId: Int, attr: ED)
+
+private[impl] object EdgeWithLocalIds {
+  implicit def lexicographicOrdering[ED]: Ordering[EdgeWithLocalIds[ED]] =
+    new Ordering[EdgeWithLocalIds[ED]] {
+      override def compare(a: EdgeWithLocalIds[ED], b: EdgeWithLocalIds[ED]): Int = {
+        if (a.srcId == b.srcId) {
+          if (a.dstId == b.dstId) 0
+          else if (a.dstId < b.dstId) -1
+          else 1
+        } else if (a.srcId < b.srcId) -1
+        else 1
+      }
+    }
+
+  private[graphx] def edgeArraySortDataFormat[ED] = {
+    new SortDataFormat[EdgeWithLocalIds[ED], Array[EdgeWithLocalIds[ED]]] {
+      override def getKey(data: Array[EdgeWithLocalIds[ED]], pos: Int): EdgeWithLocalIds[ED] = {
+        data(pos)
+      }
+
+      override def swap(data: Array[EdgeWithLocalIds[ED]], pos0: Int, pos1: Int): Unit = {
+        val tmp = data(pos0)
+        data(pos0) = data(pos1)
+        data(pos1) = tmp
+      }
+
+      override def copyElement(
+          src: Array[EdgeWithLocalIds[ED]], srcPos: Int,
+          dst: Array[EdgeWithLocalIds[ED]], dstPos: Int) {
+        dst(dstPos) = src(srcPos)
+      }
+
+      override def copyRange(
+          src: Array[EdgeWithLocalIds[ED]], srcPos: Int,
+          dst: Array[EdgeWithLocalIds[ED]], dstPos: Int, length: Int) {
+        System.arraycopy(src, srcPos, dst, dstPos, length)
+      }
 
-    new EdgePartition(srcIds, dstIds, data, index, vertices)
+      override def allocate(length: Int): Array[EdgeWithLocalIds[ED]] = {
+        new Array[EdgeWithLocalIds[ED]](length)
+      }
+    }
   }
 }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
new file mode 100644
index 0000000000000..56cb41661e300
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl
+
+import scala.reflect.{classTag, ClassTag}
+
+import org.apache.spark.{OneToOneDependency, HashPartitioner, TaskContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+
+import org.apache.spark.graphx._
+
+class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] private[graphx] (
+    @transient override val partitionsRDD: RDD[(PartitionID, EdgePartition[ED, VD])],
+    val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY)
+  extends EdgeRDD[ED](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
+
+  override def setName(_name: String): this.type = {
+    if (partitionsRDD.name != null) {
+      partitionsRDD.setName(partitionsRDD.name + ", " + _name)
+    } else {
+      partitionsRDD.setName(_name)
+    }
+    this
+  }
+  setName("EdgeRDD")
+
+  /**
+   * If `partitionsRDD` already has a partitioner, use it. Otherwise assume that the
+   * [[PartitionID]]s in `partitionsRDD` correspond to the actual partitions and create a new
+   * partitioner that allows co-partitioning with `partitionsRDD`.
+   */
+  override val partitioner =
+    partitionsRDD.partitioner.orElse(Some(new HashPartitioner(partitions.size)))
+
+  override def collect(): Array[Edge[ED]] = this.map(_.copy()).collect()
+
+  /**
+   * Persists the edge partitions at the specified storage level, ignoring any existing target
+   * storage level.
+   */
+  override def persist(newLevel: StorageLevel): this.type = {
+    partitionsRDD.persist(newLevel)
+    this
+  }
+
+  override def unpersist(blocking: Boolean = true): this.type = {
+    partitionsRDD.unpersist(blocking)
+    this
+  }
+
+  /** Persists the edge partitions using `targetStorageLevel`, which defaults to MEMORY_ONLY. */
+  override def cache(): this.type = {
+    partitionsRDD.persist(targetStorageLevel)
+    this
+  }
+
+  override def getStorageLevel = partitionsRDD.getStorageLevel
+
+  override def checkpoint() = {
+    partitionsRDD.checkpoint()
+  }
+
+  override def isCheckpointed: Boolean = {
+    firstParent[(PartitionID, EdgePartition[ED, VD])].isCheckpointed
+  }
+
+  override def getCheckpointFile: Option[String] = {
+    partitionsRDD.getCheckpointFile
+  }
+
+  /** The number of edges in the RDD. */
+  override def count(): Long = {
+    partitionsRDD.map(_._2.size.toLong).reduce(_ + _)
+  }
+
+  override def mapValues[ED2: ClassTag](f: Edge[ED] => ED2): EdgeRDDImpl[ED2, VD] =
+    mapEdgePartitions((pid, part) => part.map(f))
+
+  override def reverse: EdgeRDDImpl[ED, VD] = mapEdgePartitions((pid, part) => part.reverse)
+
+  def filter(
+      epred: EdgeTriplet[VD, ED] => Boolean,
+      vpred: (VertexId, VD) => Boolean): EdgeRDDImpl[ED, VD] = {
+    mapEdgePartitions((pid, part) => part.filter(epred, vpred))
+  }
+
+  override def innerJoin[ED2: ClassTag, ED3: ClassTag]
+      (other: EdgeRDD[ED2])
+      (f: (VertexId, VertexId, ED, ED2) => ED3): EdgeRDDImpl[ED3, VD] = {
+    val ed2Tag = classTag[ED2]
+    val ed3Tag = classTag[ED3]
+    this.withPartitionsRDD[ED3, VD](partitionsRDD.zipPartitions(other.partitionsRDD, true) {
+      (thisIter, otherIter) =>
+        val (pid, thisEPart) = thisIter.next()
+        val (_, otherEPart) = otherIter.next()
+        Iterator(Tuple2(pid, thisEPart.innerJoin(otherEPart)(f)(ed2Tag, ed3Tag)))
+    })
+  }
+
+  def mapEdgePartitions[ED2: ClassTag, VD2: ClassTag](
+      f: (PartitionID, EdgePartition[ED, VD]) => EdgePartition[ED2, VD2]): EdgeRDDImpl[ED2, VD2] = {
+    this.withPartitionsRDD[ED2, VD2](partitionsRDD.mapPartitions({ iter =>
+      if (iter.hasNext) {
+        val (pid, ep) = iter.next()
+        Iterator(Tuple2(pid, f(pid, ep)))
+      } else {
+        Iterator.empty
+      }
+    }, preservesPartitioning = true))
+  }
+
+  private[graphx] def withPartitionsRDD[ED2: ClassTag, VD2: ClassTag](
+      partitionsRDD: RDD[(PartitionID, EdgePartition[ED2, VD2])]): EdgeRDDImpl[ED2, VD2] = {
+    new EdgeRDDImpl(partitionsRDD, this.targetStorageLevel)
+  }
+
+  override private[graphx] def withTargetStorageLevel(
+      targetStorageLevel: StorageLevel): EdgeRDDImpl[ED, VD] = {
+    new EdgeRDDImpl(this.partitionsRDD, targetStorageLevel)
+  }
+
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala
deleted file mode 100644
index 56f79a7097fce..0000000000000
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.graphx.impl
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.graphx._
-import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
-
-/**
- * The Iterator type returned when constructing edge triplets. This could be an anonymous class in
- * EdgePartition.tripletIterator, but we name it here explicitly so it is easier to debug / profile.
- */
-private[impl]
-class EdgeTripletIterator[VD: ClassTag, ED: ClassTag](
-    val edgePartition: EdgePartition[ED, VD],
-    val includeSrc: Boolean,
-    val includeDst: Boolean)
-  extends Iterator[EdgeTriplet[VD, ED]] {
-
-  // Current position in the array.
-  private var pos = 0
-
-  override def hasNext: Boolean = pos < edgePartition.size
-
-  override def next() = {
-    val triplet = new EdgeTriplet[VD, ED]
-    triplet.srcId = edgePartition.srcIds(pos)
-    if (includeSrc) {
-      triplet.srcAttr = edgePartition.vertices(triplet.srcId)
-    }
-    triplet.dstId = edgePartition.dstIds(pos)
-    if (includeDst) {
-      triplet.dstAttr = edgePartition.vertices(triplet.dstId)
-    }
-    triplet.attr = edgePartition.data(pos)
-    pos += 1
-    triplet
-  }
-}
-
-/**
- * An Iterator type for internal use that reuses EdgeTriplet objects. This could be an anonymous
- * class in EdgePartition.upgradeIterator, but we name it here explicitly so it is easier to debug /
- * profile.
- */
-private[impl]
-class ReusingEdgeTripletIterator[VD: ClassTag, ED: ClassTag](
-    val edgeIter: Iterator[Edge[ED]],
-    val edgePartition: EdgePartition[ED, VD],
-    val includeSrc: Boolean,
-    val includeDst: Boolean)
-  extends Iterator[EdgeTriplet[VD, ED]] {
-
-  private val triplet = new EdgeTriplet[VD, ED]
-
-  override def hasNext = edgeIter.hasNext
-
-  override def next() = {
-    triplet.set(edgeIter.next())
-    if (includeSrc) {
-      triplet.srcAttr = edgePartition.vertices(triplet.srcId)
-    }
-    if (includeDst) {
-      triplet.dstAttr = edgePartition.vertices(triplet.dstId)
-    }
-    triplet
-  }
-}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
index 33f35cfb69a26..90a74d23a26cc 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -23,7 +23,6 @@ import org.apache.spark.HashPartitioner
 import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.{RDD, ShuffledRDD}
 import org.apache.spark.storage.StorageLevel
-
 import org.apache.spark.graphx._
 import org.apache.spark.graphx.impl.GraphImpl._
 import org.apache.spark.graphx.util.BytecodeUtils
@@ -44,7 +43,7 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
   /** Default constructor is provided to support serialization */
   protected def this() = this(null, null)
 
-  @transient override val edges: EdgeRDD[ED, VD] = replicatedVertexView.edges
+  @transient override val edges: EdgeRDDImpl[ED, VD] = replicatedVertexView.edges
 
   /** Return a RDD that brings edges together with their source and destination vertices. */
   @transient override lazy val triplets: RDD[EdgeTriplet[VD, ED]] = {
@@ -66,6 +65,28 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
     this
   }
 
+  override def checkpoint(): Unit = {
+    vertices.checkpoint()
+    replicatedVertexView.edges.checkpoint()
+  }
+
+  override def isCheckpointed: Boolean = {
+    vertices.isCheckpointed && replicatedVertexView.edges.isCheckpointed
+  }
+
+  override def getCheckpointFiles: Seq[String] = {
+    Seq(vertices.getCheckpointFile, replicatedVertexView.edges.getCheckpointFile).flatMap {
+      case Some(path) => Seq(path)
+      case None => Seq()
+    }
+  }
+
+  override def unpersist(blocking: Boolean = true): Graph[VD, ED] = {
+    unpersistVertices(blocking)
+    replicatedVertexView.edges.unpersist(blocking)
+    this
+  }
+
   override def unpersistVertices(blocking: Boolean = true): Graph[VD, ED] = {
     vertices.unpersist(blocking)
     // TODO: unpersist the replicated vertices in `replicatedVertexView` but leave the edges alone
@@ -127,13 +148,12 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
   }
 
   override def mapTriplets[ED2: ClassTag](
-      f: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2]): Graph[VD, ED2] = {
+      f: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2],
+      tripletFields: TripletFields): Graph[VD, ED2] = {
     vertices.cache()
-    val mapUsesSrcAttr = accessesVertexAttr(f, "srcAttr")
-    val mapUsesDstAttr = accessesVertexAttr(f, "dstAttr")
-    replicatedVertexView.upgrade(vertices, mapUsesSrcAttr, mapUsesDstAttr)
+    replicatedVertexView.upgrade(vertices, tripletFields.useSrc, tripletFields.useDst)
     val newEdges = replicatedVertexView.edges.mapEdgePartitions { (pid, part) =>
-      part.map(f(pid, part.tripletIterator(mapUsesSrcAttr, mapUsesDstAttr)))
+      part.map(f(pid, part.tripletIterator(tripletFields.useSrc, tripletFields.useDst)))
     }
     new GraphImpl(vertices, replicatedVertexView.withEdges(newEdges))
   }
@@ -171,15 +191,38 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
   override def mapReduceTriplets[A: ClassTag](
       mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
       reduceFunc: (A, A) => A,
-      activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None): VertexRDD[A] = {
+      activeSetOpt: Option[(VertexRDD[_], EdgeDirection)]): VertexRDD[A] = {
+
+    def sendMsg(ctx: EdgeContext[VD, ED, A]) {
+      mapFunc(ctx.toEdgeTriplet).foreach { kv =>
+        val id = kv._1
+        val msg = kv._2
+        if (id == ctx.srcId) {
+          ctx.sendToSrc(msg)
+        } else {
+          assert(id == ctx.dstId)
+          ctx.sendToDst(msg)
+        }
+      }
+    }
 
-    vertices.cache()
+    val mapUsesSrcAttr = accessesVertexAttr(mapFunc, "srcAttr")
+    val mapUsesDstAttr = accessesVertexAttr(mapFunc, "dstAttr")
+    val tripletFields = new TripletFields(mapUsesSrcAttr, mapUsesDstAttr, true)
+
+    aggregateMessagesWithActiveSet(sendMsg, reduceFunc, tripletFields, activeSetOpt)
+  }
 
+  override def aggregateMessagesWithActiveSet[A: ClassTag](
+      sendMsg: EdgeContext[VD, ED, A] => Unit,
+      mergeMsg: (A, A) => A,
+      tripletFields: TripletFields,
+      activeSetOpt: Option[(VertexRDD[_], EdgeDirection)]): VertexRDD[A] = {
+
+    vertices.cache()
     // For each vertex, replicate its attribute only to partitions where it is
     // in the relevant position in an edge.
-    val mapUsesSrcAttr = accessesVertexAttr(mapFunc, "srcAttr")
-    val mapUsesDstAttr = accessesVertexAttr(mapFunc, "dstAttr")
-    replicatedVertexView.upgrade(vertices, mapUsesSrcAttr, mapUsesDstAttr)
+    replicatedVertexView.upgrade(vertices, tripletFields.useSrc, tripletFields.useDst)
     val view = activeSetOpt match {
       case Some((activeSet, _)) =>
         replicatedVertexView.withActiveSet(activeSet)
@@ -193,42 +236,40 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
       case (pid, edgePartition) =>
         // Choose scan method
         val activeFraction = edgePartition.numActives.getOrElse(0) / edgePartition.indexSize.toFloat
-        val edgeIter = activeDirectionOpt match {
+        activeDirectionOpt match {
           case Some(EdgeDirection.Both) =>
             if (activeFraction < 0.8) {
-              edgePartition.indexIterator(srcVertexId => edgePartition.isActive(srcVertexId))
-                .filter(e => edgePartition.isActive(e.dstId))
+              edgePartition.aggregateMessagesIndexScan(sendMsg, mergeMsg, tripletFields,
+                EdgeActiveness.Both)
             } else {
-              edgePartition.iterator.filter(e =>
-                edgePartition.isActive(e.srcId) && edgePartition.isActive(e.dstId))
+              edgePartition.aggregateMessagesEdgeScan(sendMsg, mergeMsg, tripletFields,
+                EdgeActiveness.Both)
             }
           case Some(EdgeDirection.Either) =>
             // TODO: Because we only have a clustered index on the source vertex ID, we can't filter
             // the index here. Instead we have to scan all edges and then do the filter.
-            edgePartition.iterator.filter(e =>
-              edgePartition.isActive(e.srcId) || edgePartition.isActive(e.dstId))
+            edgePartition.aggregateMessagesEdgeScan(sendMsg, mergeMsg, tripletFields,
+              EdgeActiveness.Either)
           case Some(EdgeDirection.Out) =>
             if (activeFraction < 0.8) {
-              edgePartition.indexIterator(srcVertexId => edgePartition.isActive(srcVertexId))
+              edgePartition.aggregateMessagesIndexScan(sendMsg, mergeMsg, tripletFields,
+                EdgeActiveness.SrcOnly)
             } else {
-              edgePartition.iterator.filter(e => edgePartition.isActive(e.srcId))
+              edgePartition.aggregateMessagesEdgeScan(sendMsg, mergeMsg, tripletFields,
+                EdgeActiveness.SrcOnly)
             }
           case Some(EdgeDirection.In) =>
-            edgePartition.iterator.filter(e => edgePartition.isActive(e.dstId))
+            edgePartition.aggregateMessagesEdgeScan(sendMsg, mergeMsg, tripletFields,
+              EdgeActiveness.DstOnly)
           case _ => // None
-            edgePartition.iterator
+            edgePartition.aggregateMessagesEdgeScan(sendMsg, mergeMsg, tripletFields,
+              EdgeActiveness.Neither)
         }
-
-        // Scan edges and run the map function
-        val mapOutputs = edgePartition.upgradeIterator(edgeIter, mapUsesSrcAttr, mapUsesDstAttr)
-          .flatMap(mapFunc(_))
-        // Note: This doesn't allow users to send messages to arbitrary vertices.
-        edgePartition.vertices.aggregateUsingIndex(mapOutputs, reduceFunc).iterator
-    }).setName("GraphImpl.mapReduceTriplets - preAgg")
+    }).setName("GraphImpl.aggregateMessages - preAgg")
 
     // do the final reduction reusing the index map
-    vertices.aggregateUsingIndex(preAgg, reduceFunc)
-  } // end of mapReduceTriplets
+    vertices.aggregateUsingIndex(preAgg, mergeMsg)
+  }
 
   override def outerJoinVertices[U: ClassTag, VD2: ClassTag]
       (other: RDD[(VertexId, U)])
@@ -304,11 +345,10 @@ object GraphImpl {
    */
   def apply[VD: ClassTag, ED: ClassTag](
       vertices: VertexRDD[VD],
-      edges: EdgeRDD[ED, _]): GraphImpl[VD, ED] = {
+      edges: EdgeRDD[ED]): GraphImpl[VD, ED] = {
     // Convert the vertex partitions in edges to the correct type
-    val newEdges = edges.mapEdgePartitions(
-      (pid, part) => part.withVertices(part.vertices.map(
-        (vid, attr) => null.asInstanceOf[VD])))
+    val newEdges = edges.asInstanceOf[EdgeRDDImpl[ED, _]]
+      .mapEdgePartitions((pid, part) => part.withoutVertexAttributes[VD])
     GraphImpl.fromExistingRDDs(vertices, newEdges)
   }
 
@@ -319,8 +359,8 @@ object GraphImpl {
    */
   def fromExistingRDDs[VD: ClassTag, ED: ClassTag](
       vertices: VertexRDD[VD],
-      edges: EdgeRDD[ED, VD]): GraphImpl[VD, ED] = {
-    new GraphImpl(vertices, new ReplicatedVertexView(edges))
+      edges: EdgeRDD[ED]): GraphImpl[VD, ED] = {
+    new GraphImpl(vertices, new ReplicatedVertexView(edges.asInstanceOf[EdgeRDDImpl[ED, VD]]))
   }
 
   /**
@@ -328,7 +368,7 @@ object GraphImpl {
    * `defaultVertexAttr`. The vertices will have the same number of partitions as the EdgeRDD.
    */
   private def fromEdgeRDD[VD: ClassTag, ED: ClassTag](
-      edges: EdgeRDD[ED, VD],
+      edges: EdgeRDDImpl[ED, VD],
       defaultVertexAttr: VD,
       edgeStorageLevel: StorageLevel,
       vertexStorageLevel: StorageLevel): GraphImpl[VD, ED] = {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala
deleted file mode 100644
index 714f3b81c9dad..0000000000000
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.graphx.impl
-
-import scala.language.implicitConversions
-import scala.reflect.{classTag, ClassTag}
-
-import org.apache.spark.Partitioner
-import org.apache.spark.graphx.{PartitionID, VertexId}
-import org.apache.spark.rdd.{ShuffledRDD, RDD}
-
-
-private[graphx]
-class VertexRDDFunctions[VD: ClassTag](self: RDD[(VertexId, VD)]) {
-  def copartitionWithVertices(partitioner: Partitioner): RDD[(VertexId, VD)] = {
-    val rdd = new ShuffledRDD[VertexId, VD, VD](self, partitioner)
-
-    // Set a custom serializer if the data is of int or double type.
-    if (classTag[VD] == ClassTag.Int) {
-      rdd.setSerializer(new IntAggMsgSerializer)
-    } else if (classTag[VD] == ClassTag.Long) {
-      rdd.setSerializer(new LongAggMsgSerializer)
-    } else if (classTag[VD] == ClassTag.Double) {
-      rdd.setSerializer(new DoubleAggMsgSerializer)
-    }
-    rdd
-  }
-}
-
-private[graphx]
-object VertexRDDFunctions {
-  implicit def rdd2VertexRDDFunctions[VD: ClassTag](rdd: RDD[(VertexId, VD)]) = {
-    new VertexRDDFunctions(rdd)
-  }
-}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
index 86b366eb9202b..8ab255bd4038c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
@@ -33,7 +33,7 @@ import org.apache.spark.graphx._
  */
 private[impl]
 class ReplicatedVertexView[VD: ClassTag, ED: ClassTag](
-    var edges: EdgeRDD[ED, VD],
+    var edges: EdgeRDDImpl[ED, VD],
     var hasSrcId: Boolean = false,
     var hasDstId: Boolean = false) {
 
@@ -42,7 +42,7 @@ class ReplicatedVertexView[VD: ClassTag, ED: ClassTag](
    * shipping level.
    */
   def withEdges[VD2: ClassTag, ED2: ClassTag](
-      edges_ : EdgeRDD[ED2, VD2]): ReplicatedVertexView[VD2, ED2] = {
+      edges_ : EdgeRDDImpl[ED2, VD2]): ReplicatedVertexView[VD2, ED2] = {
     new ReplicatedVertexView(edges_, hasSrcId, hasDstId)
   }
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala
index b27485953f719..eb3c997e0f3c0 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala
@@ -29,24 +29,6 @@ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap
 
 import org.apache.spark.graphx.impl.RoutingTablePartition.RoutingTableMessage
 
-private[graphx]
-class RoutingTableMessageRDDFunctions(self: RDD[RoutingTableMessage]) {
-  /** Copartition an `RDD[RoutingTableMessage]` with the vertex RDD with the given `partitioner`. */
-  def copartitionWithVertices(partitioner: Partitioner): RDD[RoutingTableMessage] = {
-    new ShuffledRDD[VertexId, Int, Int](
-      self, partitioner).setSerializer(new RoutingTableMessageSerializer)
-  }
-}
-
-private[graphx]
-object RoutingTableMessageRDDFunctions {
-  import scala.language.implicitConversions
-
-  implicit def rdd2RoutingTableMessageRDDFunctions(rdd: RDD[RoutingTableMessage]) = {
-    new RoutingTableMessageRDDFunctions(rdd)
-  }
-}
-
 private[graphx]
 object RoutingTablePartition {
   /**
@@ -74,11 +56,9 @@ object RoutingTablePartition {
     // Determine which positions each vertex id appears in using a map where the low 2 bits
     // represent src and dst
     val map = new GraphXPrimitiveKeyOpenHashMap[VertexId, Byte]
-    edgePartition.srcIds.iterator.foreach { srcId =>
-      map.changeValue(srcId, 0x1, (b: Byte) => (b | 0x1).toByte)
-    }
-    edgePartition.dstIds.iterator.foreach { dstId =>
-      map.changeValue(dstId, 0x2, (b: Byte) => (b | 0x2).toByte)
+    edgePartition.iterator.foreach { e =>
+      map.changeValue(e.srcId, 0x1, (b: Byte) => (b | 0x1).toByte)
+      map.changeValue(e.dstId, 0x2, (b: Byte) => (b | 0x2).toByte)
     }
     map.iterator.map { vidAndPosition =>
       val vid = vidAndPosition._1
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala
deleted file mode 100644
index 3909efcdfc993..0000000000000
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala
+++ /dev/null
@@ -1,369 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.graphx.impl
-
-import scala.language.existentials
-
-import java.io.{EOFException, InputStream, OutputStream}
-import java.nio.ByteBuffer
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.serializer._
-
-import org.apache.spark.graphx._
-import org.apache.spark.graphx.impl.RoutingTablePartition.RoutingTableMessage
-
-private[graphx]
-class RoutingTableMessageSerializer extends Serializer with Serializable {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream): SerializationStream =
-      new ShuffleSerializationStream(s) {
-        def writeObject[T: ClassTag](t: T): SerializationStream = {
-          val msg = t.asInstanceOf[RoutingTableMessage]
-          writeVarLong(msg._1, optimizePositive = false)
-          writeInt(msg._2)
-          this
-        }
-      }
-
-    override def deserializeStream(s: InputStream): DeserializationStream =
-      new ShuffleDeserializationStream(s) {
-        override def readObject[T: ClassTag](): T = {
-          val a = readVarLong(optimizePositive = false)
-          val b = readInt()
-          (a, b).asInstanceOf[T]
-        }
-      }
-  }
-}
-
-private[graphx]
-class VertexIdMsgSerializer extends Serializer with Serializable {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T: ClassTag](t: T) = {
-        val msg = t.asInstanceOf[(VertexId, _)]
-        writeVarLong(msg._1, optimizePositive = false)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      override def readObject[T: ClassTag](): T = {
-        (readVarLong(optimizePositive = false), null).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-/** A special shuffle serializer for AggregationMessage[Int]. */
-private[graphx]
-class IntAggMsgSerializer extends Serializer with Serializable {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T: ClassTag](t: T) = {
-        val msg = t.asInstanceOf[(VertexId, Int)]
-        writeVarLong(msg._1, optimizePositive = false)
-        writeUnsignedVarInt(msg._2)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      override def readObject[T: ClassTag](): T = {
-        val a = readVarLong(optimizePositive = false)
-        val b = readUnsignedVarInt()
-        (a, b).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-/** A special shuffle serializer for AggregationMessage[Long]. */
-private[graphx]
-class LongAggMsgSerializer extends Serializer with Serializable {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T: ClassTag](t: T) = {
-        val msg = t.asInstanceOf[(VertexId, Long)]
-        writeVarLong(msg._1, optimizePositive = false)
-        writeVarLong(msg._2, optimizePositive = true)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      override def readObject[T: ClassTag](): T = {
-        val a = readVarLong(optimizePositive = false)
-        val b = readVarLong(optimizePositive = true)
-        (a, b).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-/** A special shuffle serializer for AggregationMessage[Double]. */
-private[graphx]
-class DoubleAggMsgSerializer extends Serializer with Serializable {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T: ClassTag](t: T) = {
-        val msg = t.asInstanceOf[(VertexId, Double)]
-        writeVarLong(msg._1, optimizePositive = false)
-        writeDouble(msg._2)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      def readObject[T: ClassTag](): T = {
-        val a = readVarLong(optimizePositive = false)
-        val b = readDouble()
-        (a, b).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Helper classes to shorten the implementation of those special serializers.
-////////////////////////////////////////////////////////////////////////////////
-
-private[graphx]
-abstract class ShuffleSerializationStream(s: OutputStream) extends SerializationStream {
-  // The implementation should override this one.
-  def writeObject[T: ClassTag](t: T): SerializationStream
-
-  def writeInt(v: Int) {
-    s.write(v >> 24)
-    s.write(v >> 16)
-    s.write(v >> 8)
-    s.write(v)
-  }
-
-  def writeUnsignedVarInt(value: Int) {
-    if ((value >>> 7) == 0) {
-      s.write(value.toInt)
-    } else if ((value >>> 14) == 0) {
-      s.write((value & 0x7F) | 0x80)
-      s.write(value >>> 7)
-    } else if ((value >>> 21) == 0) {
-      s.write((value & 0x7F) | 0x80)
-      s.write(value >>> 7 | 0x80)
-      s.write(value >>> 14)
-    } else if ((value >>> 28) == 0) {
-      s.write((value & 0x7F) | 0x80)
-      s.write(value >>> 7 | 0x80)
-      s.write(value >>> 14 | 0x80)
-      s.write(value >>> 21)
-    } else {
-      s.write((value & 0x7F) | 0x80)
-      s.write(value >>> 7 | 0x80)
-      s.write(value >>> 14 | 0x80)
-      s.write(value >>> 21 | 0x80)
-      s.write(value >>> 28)
-    }
-  }
-
-  def writeVarLong(value: Long, optimizePositive: Boolean) {
-    val v = if (!optimizePositive) (value << 1) ^ (value >> 63) else value
-    if ((v >>> 7) == 0) {
-      s.write(v.toInt)
-    } else if ((v >>> 14) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7).toInt)
-    } else if ((v >>> 21) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14).toInt)
-    } else if ((v >>> 28) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21).toInt)
-    } else if ((v >>> 35) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21 | 0x80).toInt)
-      s.write((v >>> 28).toInt)
-    } else if ((v >>> 42) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21 | 0x80).toInt)
-      s.write((v >>> 28 | 0x80).toInt)
-      s.write((v >>> 35).toInt)
-    } else if ((v >>> 49) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21 | 0x80).toInt)
-      s.write((v >>> 28 | 0x80).toInt)
-      s.write((v >>> 35 | 0x80).toInt)
-      s.write((v >>> 42).toInt)
-    } else if ((v >>> 56) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21 | 0x80).toInt)
-      s.write((v >>> 28 | 0x80).toInt)
-      s.write((v >>> 35 | 0x80).toInt)
-      s.write((v >>> 42 | 0x80).toInt)
-      s.write((v >>> 49).toInt)
-    } else {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21 | 0x80).toInt)
-      s.write((v >>> 28 | 0x80).toInt)
-      s.write((v >>> 35 | 0x80).toInt)
-      s.write((v >>> 42 | 0x80).toInt)
-      s.write((v >>> 49 | 0x80).toInt)
-      s.write((v >>> 56).toInt)
-    }
-  }
-
-  def writeLong(v: Long) {
-    s.write((v >>> 56).toInt)
-    s.write((v >>> 48).toInt)
-    s.write((v >>> 40).toInt)
-    s.write((v >>> 32).toInt)
-    s.write((v >>> 24).toInt)
-    s.write((v >>> 16).toInt)
-    s.write((v >>> 8).toInt)
-    s.write(v.toInt)
-  }
-
-  def writeDouble(v: Double): Unit = writeLong(java.lang.Double.doubleToLongBits(v))
-
-  override def flush(): Unit = s.flush()
-
-  override def close(): Unit = s.close()
-}
-
-private[graphx]
-abstract class ShuffleDeserializationStream(s: InputStream) extends DeserializationStream {
-  // The implementation should override this one.
-  def readObject[T: ClassTag](): T
-
-  def readInt(): Int = {
-    val first = s.read()
-    if (first < 0) throw new EOFException
-    (first & 0xFF) << 24 | (s.read() & 0xFF) << 16 | (s.read() & 0xFF) << 8 | (s.read() & 0xFF)
-  }
-
-  def readUnsignedVarInt(): Int = {
-    var value: Int = 0
-    var i: Int = 0
-    def readOrThrow(): Int = {
-      val in = s.read()
-      if (in < 0) throw new EOFException
-      in & 0xFF
-    }
-    var b: Int = readOrThrow()
-    while ((b & 0x80) != 0) {
-      value |= (b & 0x7F) << i
-      i += 7
-      if (i > 35) throw new IllegalArgumentException("Variable length quantity is too long")
-      b = readOrThrow()
-    }
-    value | (b << i)
-  }
-
-  def readVarLong(optimizePositive: Boolean): Long = {
-    def readOrThrow(): Int = {
-      val in = s.read()
-      if (in < 0) throw new EOFException
-      in & 0xFF
-    }
-    var b = readOrThrow()
-    var ret: Long = b & 0x7F
-    if ((b & 0x80) != 0) {
-      b = readOrThrow()
-      ret |= (b & 0x7F) << 7
-      if ((b & 0x80) != 0) {
-        b = readOrThrow()
-        ret |= (b & 0x7F) << 14
-        if ((b & 0x80) != 0) {
-          b = readOrThrow()
-          ret |= (b & 0x7F) << 21
-          if ((b & 0x80) != 0) {
-            b = readOrThrow()
-            ret |= (b & 0x7F).toLong << 28
-            if ((b & 0x80) != 0) {
-              b = readOrThrow()
-              ret |= (b & 0x7F).toLong << 35
-              if ((b & 0x80) != 0) {
-                b = readOrThrow()
-                ret |= (b & 0x7F).toLong << 42
-                if ((b & 0x80) != 0) {
-                  b = readOrThrow()
-                  ret |= (b & 0x7F).toLong << 49
-                  if ((b & 0x80) != 0) {
-                    b = readOrThrow()
-                    ret |= b.toLong << 56
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-    if (!optimizePositive) (ret >>> 1) ^ -(ret & 1) else ret
-  }
-
-  def readLong(): Long = {
-    val first = s.read()
-    if (first < 0) throw new EOFException()
-    (first.toLong << 56) |
-      (s.read() & 0xFF).toLong << 48 |
-      (s.read() & 0xFF).toLong << 40 |
-      (s.read() & 0xFF).toLong << 32 |
-      (s.read() & 0xFF).toLong << 24 |
-      (s.read() & 0xFF) << 16 |
-      (s.read() & 0xFF) << 8 |
-      (s.read() & 0xFF)
-  }
-
-  def readDouble(): Double = java.lang.Double.longBitsToDouble(readLong())
-
-  override def close(): Unit = s.close()
-}
-
-private[graphx] sealed trait ShuffleSerializerInstance extends SerializerInstance {
-
-  override def serialize[T: ClassTag](t: T): ByteBuffer = throw new UnsupportedOperationException
-
-  override def deserialize[T: ClassTag](bytes: ByteBuffer): T =
-    throw new UnsupportedOperationException
-
-  override def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T =
-    throw new UnsupportedOperationException
-
-  // The implementation should override the following two.
-  override def serializeStream(s: OutputStream): SerializationStream
-  override def deserializeStream(s: InputStream): DeserializationStream
-}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
index 5412d720475dc..aa320088f2088 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
@@ -74,8 +74,8 @@ object ShippableVertexPartition {
    * Implicit conversion to allow invoking `VertexPartitionBase` operations directly on a
    * `ShippableVertexPartition`.
    */
-  implicit def shippablePartitionToOps[VD: ClassTag](partition: ShippableVertexPartition[VD]) =
-    new ShippableVertexPartitionOps(partition)
+  implicit def shippablePartitionToOps[VD: ClassTag](partition: ShippableVertexPartition[VD])
+    : ShippableVertexPartitionOps[VD] = new ShippableVertexPartitionOps(partition)
 
   /**
    * Implicit evidence that `ShippableVertexPartition` is a member of the
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala
index 55c7a19d1bdab..fbe53acfc32aa 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala
@@ -38,8 +38,8 @@ private[graphx] object VertexPartition {
    * Implicit conversion to allow invoking `VertexPartitionBase` operations directly on a
    * `VertexPartition`.
    */
-  implicit def partitionToOps[VD: ClassTag](partition: VertexPartition[VD]) =
-    new VertexPartitionOps(partition)
+  implicit def partitionToOps[VD: ClassTag](partition: VertexPartition[VD])
+    : VertexPartitionOps[VD] = new VertexPartitionOps(partition)
 
   /**
    * Implicit evidence that `VertexPartition` is a member of the `VertexPartitionBaseOpsConstructor`
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala
index b40aa1b417a0f..4fd2548b7faf6 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala
@@ -238,8 +238,8 @@ private[graphx] abstract class VertexPartitionBaseOps
    * because these methods return a `Self` and this implicit conversion re-wraps that in a
    * `VertexPartitionBaseOps`. This relies on the context bound on `Self`.
    */
-  private implicit def toOps[VD2: ClassTag](
-      partition: Self[VD2]): VertexPartitionBaseOps[VD2, Self] = {
+  private implicit def toOps[VD2: ClassTag](partition: Self[VD2])
+    : VertexPartitionBaseOps[VD2, Self] = {
     implicitly[VertexPartitionBaseOpsConstructor[Self]].toOps(partition)
   }
 }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
new file mode 100644
index 0000000000000..6dad167fa7411
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl
+
+import scala.reflect.ClassTag
+
+import org.apache.spark._
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd._
+import org.apache.spark.storage.StorageLevel
+
+import org.apache.spark.graphx._
+
+class VertexRDDImpl[VD] private[graphx] (
+    @transient val partitionsRDD: RDD[ShippableVertexPartition[VD]],
+    val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY)
+  (implicit override protected val vdTag: ClassTag[VD])
+  extends VertexRDD[VD](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
+
+  require(partitionsRDD.partitioner.isDefined)
+
+  override def reindex(): VertexRDD[VD] = this.withPartitionsRDD(partitionsRDD.map(_.reindex()))
+
+  override val partitioner = partitionsRDD.partitioner
+
+  override protected def getPreferredLocations(s: Partition): Seq[String] =
+    partitionsRDD.preferredLocations(s)
+
+  override def setName(_name: String): this.type = {
+    if (partitionsRDD.name != null) {
+      partitionsRDD.setName(partitionsRDD.name + ", " + _name)
+    } else {
+      partitionsRDD.setName(_name)
+    }
+    this
+  }
+  setName("VertexRDD")
+
+  /**
+   * Persists the vertex partitions at the specified storage level, ignoring any existing target
+   * storage level.
+   */
+  override def persist(newLevel: StorageLevel): this.type = {
+    partitionsRDD.persist(newLevel)
+    this
+  }
+
+  override def unpersist(blocking: Boolean = true): this.type = {
+    partitionsRDD.unpersist(blocking)
+    this
+  }
+
+  /** Persists the vertex partitions at `targetStorageLevel`, which defaults to MEMORY_ONLY. */
+  override def cache(): this.type = {
+    partitionsRDD.persist(targetStorageLevel)
+    this
+  }
+
+  override def getStorageLevel = partitionsRDD.getStorageLevel
+
+  override def checkpoint() = {
+    partitionsRDD.checkpoint()
+  }
+
+  override def isCheckpointed: Boolean = {
+    firstParent[ShippableVertexPartition[VD]].isCheckpointed
+  }
+
+  override def getCheckpointFile: Option[String] = {
+    partitionsRDD.getCheckpointFile
+  }
+
+  /** The number of vertices in the RDD. */
+  override def count(): Long = {
+    partitionsRDD.map(_.size).reduce(_ + _)
+  }
+
+  override private[graphx] def mapVertexPartitions[VD2: ClassTag](
+      f: ShippableVertexPartition[VD] => ShippableVertexPartition[VD2])
+    : VertexRDD[VD2] = {
+    val newPartitionsRDD = partitionsRDD.mapPartitions(_.map(f), preservesPartitioning = true)
+    this.withPartitionsRDD(newPartitionsRDD)
+  }
+
+  override def mapValues[VD2: ClassTag](f: VD => VD2): VertexRDD[VD2] =
+    this.mapVertexPartitions(_.map((vid, attr) => f(attr)))
+
+  override def mapValues[VD2: ClassTag](f: (VertexId, VD) => VD2): VertexRDD[VD2] =
+    this.mapVertexPartitions(_.map(f))
+
+  override def diff(other: VertexRDD[VD]): VertexRDD[VD] = {
+    val newPartitionsRDD = partitionsRDD.zipPartitions(
+      other.partitionsRDD, preservesPartitioning = true
+    ) { (thisIter, otherIter) =>
+      val thisPart = thisIter.next()
+      val otherPart = otherIter.next()
+      Iterator(thisPart.diff(otherPart))
+    }
+    this.withPartitionsRDD(newPartitionsRDD)
+  }
+
+  override def leftZipJoin[VD2: ClassTag, VD3: ClassTag]
+      (other: VertexRDD[VD2])(f: (VertexId, VD, Option[VD2]) => VD3): VertexRDD[VD3] = {
+    val newPartitionsRDD = partitionsRDD.zipPartitions(
+      other.partitionsRDD, preservesPartitioning = true
+    ) { (thisIter, otherIter) =>
+      val thisPart = thisIter.next()
+      val otherPart = otherIter.next()
+      Iterator(thisPart.leftJoin(otherPart)(f))
+    }
+    this.withPartitionsRDD(newPartitionsRDD)
+  }
+
+  override def leftJoin[VD2: ClassTag, VD3: ClassTag]
+      (other: RDD[(VertexId, VD2)])
+      (f: (VertexId, VD, Option[VD2]) => VD3)
+    : VertexRDD[VD3] = {
+    // Test if the other vertex is a VertexRDD to choose the optimal join strategy.
+    // If the other set is a VertexRDD then we use the much more efficient leftZipJoin
+    other match {
+      case other: VertexRDD[_] =>
+        leftZipJoin(other)(f)
+      case _ =>
+        this.withPartitionsRDD[VD3](
+          partitionsRDD.zipPartitions(
+            other.partitionBy(this.partitioner.get), preservesPartitioning = true) {
+            (partIter, msgs) => partIter.map(_.leftJoin(msgs)(f))
+          }
+        )
+    }
+  }
+
+  override def innerZipJoin[U: ClassTag, VD2: ClassTag](other: VertexRDD[U])
+      (f: (VertexId, VD, U) => VD2): VertexRDD[VD2] = {
+    val newPartitionsRDD = partitionsRDD.zipPartitions(
+      other.partitionsRDD, preservesPartitioning = true
+    ) { (thisIter, otherIter) =>
+      val thisPart = thisIter.next()
+      val otherPart = otherIter.next()
+      Iterator(thisPart.innerJoin(otherPart)(f))
+    }
+    this.withPartitionsRDD(newPartitionsRDD)
+  }
+
+  override def innerJoin[U: ClassTag, VD2: ClassTag](other: RDD[(VertexId, U)])
+      (f: (VertexId, VD, U) => VD2): VertexRDD[VD2] = {
+    // Test if the other vertex is a VertexRDD to choose the optimal join strategy.
+    // If the other set is a VertexRDD then we use the much more efficient innerZipJoin
+    other match {
+      case other: VertexRDD[_] =>
+        innerZipJoin(other)(f)
+      case _ =>
+        this.withPartitionsRDD(
+          partitionsRDD.zipPartitions(
+            other.partitionBy(this.partitioner.get), preservesPartitioning = true) {
+            (partIter, msgs) => partIter.map(_.innerJoin(msgs)(f))
+          }
+        )
+    }
+  }
+
+  override def aggregateUsingIndex[VD2: ClassTag](
+      messages: RDD[(VertexId, VD2)], reduceFunc: (VD2, VD2) => VD2): VertexRDD[VD2] = {
+    val shuffled = messages.partitionBy(this.partitioner.get)
+    val parts = partitionsRDD.zipPartitions(shuffled, true) { (thisIter, msgIter) =>
+      thisIter.map(_.aggregateUsingIndex(msgIter, reduceFunc))
+    }
+    this.withPartitionsRDD[VD2](parts)
+  }
+
+  override def reverseRoutingTables(): VertexRDD[VD] =
+    this.mapVertexPartitions(vPart => vPart.withRoutingTable(vPart.routingTable.reverse))
+
+  override def withEdges(edges: EdgeRDD[_]): VertexRDD[VD] = {
+    val routingTables = VertexRDD.createRoutingTables(edges, this.partitioner.get)
+    val vertexPartitions = partitionsRDD.zipPartitions(routingTables, true) {
+      (partIter, routingTableIter) =>
+        val routingTable =
+          if (routingTableIter.hasNext) routingTableIter.next() else RoutingTablePartition.empty
+        partIter.map(_.withRoutingTable(routingTable))
+    }
+    this.withPartitionsRDD(vertexPartitions)
+  }
+
+  override private[graphx] def withPartitionsRDD[VD2: ClassTag](
+      partitionsRDD: RDD[ShippableVertexPartition[VD2]]): VertexRDD[VD2] = {
+    new VertexRDDImpl(partitionsRDD, this.targetStorageLevel)
+  }
+
+  override private[graphx] def withTargetStorageLevel(
+      targetStorageLevel: StorageLevel): VertexRDD[VD] = {
+    new VertexRDDImpl(this.partitionsRDD, targetStorageLevel)
+  }
+
+  override private[graphx] def shipVertexAttributes(
+      shipSrc: Boolean, shipDst: Boolean): RDD[(PartitionID, VertexAttributeBlock[VD])] = {
+    partitionsRDD.mapPartitions(_.flatMap(_.shipVertexAttributes(shipSrc, shipDst)))
+  }
+
+  override private[graphx] def shipVertexIds(): RDD[(PartitionID, Array[VertexId])] = {
+    partitionsRDD.mapPartitions(_.flatMap(_.shipVertexIds()))
+  }
+
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index 257e2f3a36115..e139959c3f5c1 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -85,7 +85,7 @@ object PageRank extends Logging {
       // Associate the degree with each vertex
       .outerJoinVertices(graph.outDegrees) { (vid, vdata, deg) => deg.getOrElse(0) }
       // Set the weight on the edges based on the degree
-      .mapTriplets( e => 1.0 / e.srcAttr )
+      .mapTriplets( e => 1.0 / e.srcAttr, TripletFields.Src )
       // Set the vertex attributes to the initial pagerank values
       .mapVertices( (id, attr) => resetProb )
 
@@ -96,8 +96,8 @@ object PageRank extends Logging {
 
       // Compute the outgoing rank contributions of each vertex, perform local preaggregation, and
       // do the final aggregation at the receiving vertices. Requires a shuffle for aggregation.
-      val rankUpdates = rankGraph.mapReduceTriplets[Double](
-        e => Iterator((e.dstId, e.srcAttr * e.attr)), _ + _)
+      val rankUpdates = rankGraph.aggregateMessages[Double](
+        ctx => ctx.sendToDst(ctx.srcAttr * ctx.attr), _ + _, TripletFields.Src)
 
       // Apply the final rank updates to get the new ranks, using join to preserve ranks of vertices
       // that didn't receive a message. Requires a shuffle for broadcasting updated ranks to the
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
index ccd7de537b6e3..f58587e10a820 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
@@ -74,9 +74,9 @@ object SVDPlusPlus {
     var g = Graph.fromEdges(edges, defaultF(conf.rank)).cache()
 
     // Calculate initial bias and norm
-    val t0 = g.mapReduceTriplets(
-      et => Iterator((et.srcId, (1L, et.attr)), (et.dstId, (1L, et.attr))),
-        (g1: (Long, Double), g2: (Long, Double)) => (g1._1 + g2._1, g1._2 + g2._2))
+    val t0 = g.aggregateMessages[(Long, Double)](
+      ctx => { ctx.sendToSrc((1L, ctx.attr)); ctx.sendToDst((1L, ctx.attr)) },
+      (g1, g2) => (g1._1 + g2._1, g1._2 + g2._2))
 
     g = g.outerJoinVertices(t0) {
       (vid: VertexId, vd: (DoubleMatrix, DoubleMatrix, Double, Double),
@@ -84,15 +84,17 @@ object SVDPlusPlus {
         (vd._1, vd._2, msg.get._2 / msg.get._1, 1.0 / scala.math.sqrt(msg.get._1))
     }
 
-    def mapTrainF(conf: Conf, u: Double)
-        (et: EdgeTriplet[(DoubleMatrix, DoubleMatrix, Double, Double), Double])
-      : Iterator[(VertexId, (DoubleMatrix, DoubleMatrix, Double))] = {
-      val (usr, itm) = (et.srcAttr, et.dstAttr)
+    def sendMsgTrainF(conf: Conf, u: Double)
+        (ctx: EdgeContext[
+          (DoubleMatrix, DoubleMatrix, Double, Double),
+          Double,
+          (DoubleMatrix, DoubleMatrix, Double)]) {
+      val (usr, itm) = (ctx.srcAttr, ctx.dstAttr)
       val (p, q) = (usr._1, itm._1)
       var pred = u + usr._3 + itm._3 + q.dot(usr._2)
       pred = math.max(pred, conf.minVal)
       pred = math.min(pred, conf.maxVal)
-      val err = et.attr - pred
+      val err = ctx.attr - pred
       val updateP = q.mul(err)
         .subColumnVector(p.mul(conf.gamma7))
         .mul(conf.gamma2)
@@ -102,16 +104,16 @@ object SVDPlusPlus {
       val updateY = q.mul(err * usr._4)
         .subColumnVector(itm._2.mul(conf.gamma7))
         .mul(conf.gamma2)
-      Iterator((et.srcId, (updateP, updateY, (err - conf.gamma6 * usr._3) * conf.gamma1)),
-        (et.dstId, (updateQ, updateY, (err - conf.gamma6 * itm._3) * conf.gamma1)))
+      ctx.sendToSrc((updateP, updateY, (err - conf.gamma6 * usr._3) * conf.gamma1))
+      ctx.sendToDst((updateQ, updateY, (err - conf.gamma6 * itm._3) * conf.gamma1))
     }
 
     for (i <- 0 until conf.maxIters) {
       // Phase 1, calculate pu + |N(u)|^(-0.5)*sum(y) for user nodes
       g.cache()
-      val t1 = g.mapReduceTriplets(
-        et => Iterator((et.srcId, et.dstAttr._2)),
-        (g1: DoubleMatrix, g2: DoubleMatrix) => g1.addColumnVector(g2))
+      val t1 = g.aggregateMessages[DoubleMatrix](
+        ctx => ctx.sendToSrc(ctx.dstAttr._2),
+        (g1, g2) => g1.addColumnVector(g2))
       g = g.outerJoinVertices(t1) {
         (vid: VertexId, vd: (DoubleMatrix, DoubleMatrix, Double, Double),
          msg: Option[DoubleMatrix]) =>
@@ -121,8 +123,8 @@ object SVDPlusPlus {
 
       // Phase 2, update p for user nodes and q, y for item nodes
       g.cache()
-      val t2 = g.mapReduceTriplets(
-        mapTrainF(conf, u),
+      val t2 = g.aggregateMessages(
+        sendMsgTrainF(conf, u),
         (g1: (DoubleMatrix, DoubleMatrix, Double), g2: (DoubleMatrix, DoubleMatrix, Double)) =>
           (g1._1.addColumnVector(g2._1), g1._2.addColumnVector(g2._2), g1._3 + g2._3))
       g = g.outerJoinVertices(t2) {
@@ -135,20 +137,18 @@ object SVDPlusPlus {
     }
 
     // calculate error on training set
-    def mapTestF(conf: Conf, u: Double)
-        (et: EdgeTriplet[(DoubleMatrix, DoubleMatrix, Double, Double), Double])
-      : Iterator[(VertexId, Double)] =
-    {
-      val (usr, itm) = (et.srcAttr, et.dstAttr)
+    def sendMsgTestF(conf: Conf, u: Double)
+        (ctx: EdgeContext[(DoubleMatrix, DoubleMatrix, Double, Double), Double, Double]) {
+      val (usr, itm) = (ctx.srcAttr, ctx.dstAttr)
       val (p, q) = (usr._1, itm._1)
       var pred = u + usr._3 + itm._3 + q.dot(usr._2)
       pred = math.max(pred, conf.minVal)
       pred = math.min(pred, conf.maxVal)
-      val err = (et.attr - pred) * (et.attr - pred)
-      Iterator((et.dstId, err))
+      val err = (ctx.attr - pred) * (ctx.attr - pred)
+      ctx.sendToDst(err)
     }
     g.cache()
-    val t3 = g.mapReduceTriplets(mapTestF(conf, u), (g1: Double, g2: Double) => g1 + g2)
+    val t3 = g.aggregateMessages[Double](sendMsgTestF(conf, u), _ + _)
     g = g.outerJoinVertices(t3) {
       (vid: VertexId, vd: (DoubleMatrix, DoubleMatrix, Double, Double), msg: Option[Double]) =>
         if (msg.isDefined) (vd._1, vd._2, vd._3, msg.get) else vd
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
index 7c396e6e66a28..daf162085e3e4 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
@@ -61,26 +61,27 @@ object TriangleCount {
       (vid, _, optSet) => optSet.getOrElse(null)
     }
     // Edge function computes intersection of smaller vertex with larger vertex
-    def edgeFunc(et: EdgeTriplet[VertexSet, ED]): Iterator[(VertexId, Int)] = {
-      assert(et.srcAttr != null)
-      assert(et.dstAttr != null)
-      val (smallSet, largeSet) = if (et.srcAttr.size < et.dstAttr.size) {
-        (et.srcAttr, et.dstAttr)
+    def edgeFunc(ctx: EdgeContext[VertexSet, ED, Int]) {
+      assert(ctx.srcAttr != null)
+      assert(ctx.dstAttr != null)
+      val (smallSet, largeSet) = if (ctx.srcAttr.size < ctx.dstAttr.size) {
+        (ctx.srcAttr, ctx.dstAttr)
       } else {
-        (et.dstAttr, et.srcAttr)
+        (ctx.dstAttr, ctx.srcAttr)
       }
       val iter = smallSet.iterator
       var counter: Int = 0
       while (iter.hasNext) {
         val vid = iter.next()
-        if (vid != et.srcId && vid != et.dstId && largeSet.contains(vid)) {
+        if (vid != ctx.srcId && vid != ctx.dstId && largeSet.contains(vid)) {
           counter += 1
         }
       }
-      Iterator((et.srcId, counter), (et.dstId, counter))
+      ctx.sendToSrc(counter)
+      ctx.sendToDst(counter)
     }
     // compute the intersection along edges
-    val counters: VertexRDD[Int] = setGraph.mapReduceTriplets(edgeFunc, _ + _)
+    val counters: VertexRDD[Int] = setGraph.aggregateMessages(edgeFunc, _ + _)
     // Merge counters with the graph and divide by two since each triangle is counted twice
     g.outerJoinVertices(counters) {
       (vid, _, optCounter: Option[Int]) =>
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
index 8a13c74221546..2d6a825b61726 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
@@ -133,6 +133,12 @@ object GraphGenerators {
     // This ensures that the 4 quadrants are the same size at all recursion levels
     val numVertices = math.round(
       math.pow(2.0, math.ceil(math.log(requestedNumVertices) / math.log(2.0)))).toInt
+    val numEdgesUpperBound =
+      math.pow(2.0, 2 * ((math.log(numVertices) / math.log(2.0)) - 1)).toInt
+    if (numEdgesUpperBound < numEdges) {
+      throw new IllegalArgumentException(
+        s"numEdges must be <= $numEdgesUpperBound but was $numEdges")
+    }
     var edges: Set[Edge[Int]] = Set()
     while (edges.size < numEdges) {
       if (edges.size % 100 == 0) {
diff --git a/graphx/src/test/resources/log4j.properties b/graphx/src/test/resources/log4j.properties
index 9dd05f17f012b..287c8e3563503 100644
--- a/graphx/src/test/resources/log4j.properties
+++ b/graphx/src/test/resources/log4j.properties
@@ -15,10 +15,10 @@
 # limitations under the License.
 #
 
-# Set everything to be logged to the file core/target/unit-tests.log
+# Set everything to be logged to the file target/unit-tests.log
 log4j.rootCategory=INFO, file
 log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.append=false
+log4j.appender.file.append=true
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/EdgeRDDSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/EdgeRDDSuite.scala
new file mode 100644
index 0000000000000..eb1dbe52c2fda
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/EdgeRDDSuite.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.storage.StorageLevel
+
+class EdgeRDDSuite extends FunSuite with LocalSparkContext {
+
+  test("cache, getStorageLevel") {
+    // test to see if getStorageLevel returns correct value after caching
+    withSpark { sc =>
+      val verts = sc.parallelize(List((0L, 0), (1L, 1), (1L, 2), (2L, 3), (2L, 3), (2L, 3)))
+      val edges = EdgeRDD.fromEdges(sc.parallelize(List.empty[Edge[Int]]))
+      assert(edges.getStorageLevel == StorageLevel.NONE)
+      edges.cache()
+      assert(edges.getStorageLevel == StorageLevel.MEMORY_ONLY)
+    }
+  }
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala
index ea94d4accb63b..9bc8007ce49cd 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala
@@ -79,6 +79,21 @@ class GraphOpsSuite extends FunSuite with LocalSparkContext {
     }
   }
 
+  test ("convertToCanonicalEdges") {
+    withSpark { sc =>
+      val vertices =
+        sc.parallelize(Seq[(VertexId, String)]((1, "one"), (2, "two"), (3, "three")), 2)
+      val edges =
+        sc.parallelize(Seq(Edge(1, 2, 1), Edge(2, 1, 1), Edge(3, 2, 2)))
+      val g: Graph[String, Int] = Graph(vertices, edges)
+
+      val g1 = g.convertToCanonicalEdges()
+
+      val e = g1.edges.collect().toSet
+      assert(e === Set(Edge(1, 2, 1), Edge(2, 3, 2)))
+    }
+  }
+
   test("collectEdgesCycleDirectionOut") {
     withSpark { sc =>
       val graph = getCycleGraph(sc, 100)
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
index 6506bac73d71c..b61d9f0fbe5e4 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
@@ -19,10 +19,13 @@ package org.apache.spark.graphx
 
 import org.scalatest.FunSuite
 
+import com.google.common.io.Files
+
 import org.apache.spark.SparkContext
 import org.apache.spark.graphx.Graph._
 import org.apache.spark.graphx.PartitionStrategy._
 import org.apache.spark.rdd._
+import org.apache.spark.storage.StorageLevel
 
 class GraphSuite extends FunSuite with LocalSparkContext {
 
@@ -118,7 +121,7 @@ class GraphSuite extends FunSuite with LocalSparkContext {
       // Each vertex should be replicated to at most 2 * sqrt(p) partitions
       val partitionSets = partitionedGraph.edges.partitionsRDD.mapPartitions { iter =>
         val part = iter.next()._2
-        Iterator((part.srcIds ++ part.dstIds).toSet)
+        Iterator((part.iterator.flatMap(e => Iterator(e.srcId, e.dstId))).toSet)
       }.collect
       if (!verts.forall(id => partitionSets.count(_.contains(id)) <= bound)) {
         val numFailures = verts.count(id => partitionSets.count(_.contains(id)) > bound)
@@ -130,7 +133,7 @@ class GraphSuite extends FunSuite with LocalSparkContext {
       // This should not be true for the default hash partitioning
       val partitionSetsUnpartitioned = graph.edges.partitionsRDD.mapPartitions { iter =>
         val part = iter.next()._2
-        Iterator((part.srcIds ++ part.dstIds).toSet)
+        Iterator((part.iterator.flatMap(e => Iterator(e.srcId, e.dstId))).toSet)
       }.collect
       assert(verts.exists(id => partitionSetsUnpartitioned.count(_.contains(id)) > bound))
 
@@ -318,6 +321,21 @@ class GraphSuite extends FunSuite with LocalSparkContext {
     }
   }
 
+  test("aggregateMessages") {
+    withSpark { sc =>
+      val n = 5
+      val agg = starGraph(sc, n).aggregateMessages[String](
+        ctx => {
+          if (ctx.dstAttr != null) {
+            throw new Exception(
+              "expected ctx.dstAttr to be null due to TripletFields, but it was " + ctx.dstAttr)
+          }
+          ctx.sendToDst(ctx.srcAttr)
+        }, _ + _, TripletFields.Src)
+      assert(agg.collect().toSet === (1 to n).map(x => (x: VertexId, "v")).toSet)
+    }
+  }
+
   test("outerJoinVertices") {
     withSpark { sc =>
       val n = 5
@@ -350,4 +368,61 @@ class GraphSuite extends FunSuite with LocalSparkContext {
     }
   }
 
+  test("checkpoint") {
+    val checkpointDir = Files.createTempDir()
+    checkpointDir.deleteOnExit()
+    withSpark { sc =>
+      sc.setCheckpointDir(checkpointDir.getAbsolutePath)
+      val ring = (0L to 100L).zip((1L to 99L) :+ 0L).map { case (a, b) => Edge(a, b, 1)}
+      val rdd = sc.parallelize(ring)
+      val graph = Graph.fromEdges(rdd, 1.0F)
+      assert(!graph.isCheckpointed)
+      assert(graph.getCheckpointFiles.size === 0)
+      graph.checkpoint()
+      graph.edges.map(_.attr).count()
+      graph.vertices.map(_._2).count()
+
+      val edgesDependencies = graph.edges.partitionsRDD.dependencies
+      val verticesDependencies = graph.vertices.partitionsRDD.dependencies
+      assert(edgesDependencies.forall(_.rdd.isInstanceOf[CheckpointRDD[_]]))
+      assert(verticesDependencies.forall(_.rdd.isInstanceOf[CheckpointRDD[_]]))
+      assert(graph.isCheckpointed)
+      assert(graph.getCheckpointFiles.size === 2)
+    }
+  }
+
+  test("cache, getStorageLevel") {
+    // test to see if getStorageLevel returns correct value
+    withSpark { sc =>
+      val verts = sc.parallelize(List((1: VertexId, "a"), (2: VertexId, "b")), 1)
+      val edges = sc.parallelize(List(Edge(1, 2, 0), Edge(2, 1, 0)), 2)
+      val graph = Graph(verts, edges, "", StorageLevel.MEMORY_ONLY, StorageLevel.MEMORY_ONLY)
+      // Note: Before caching, graph.vertices is cached, but graph.edges is not (but graph.edges'
+      //       parent RDD is cached).
+      graph.cache()
+      assert(graph.vertices.getStorageLevel == StorageLevel.MEMORY_ONLY)
+      assert(graph.edges.getStorageLevel == StorageLevel.MEMORY_ONLY)
+    }
+  }
+
+  test("non-default number of edge partitions") {
+    val n = 10
+    val defaultParallelism = 3
+    val numEdgePartitions = 4
+    assert(defaultParallelism != numEdgePartitions)
+    val conf = new org.apache.spark.SparkConf()
+      .set("spark.default.parallelism", defaultParallelism.toString)
+    val sc = new SparkContext("local", "test", conf)
+    try {
+      val edges = sc.parallelize((1 to n).map(x => (x: VertexId, 0: VertexId)),
+        numEdgePartitions)
+      val graph = Graph.fromEdgeTuples(edges, 1)
+      val neighborAttrSums = graph.mapReduceTriplets[Int](
+        et => Iterator((et.dstId, et.srcAttr)), _ + _)
+      assert(neighborAttrSums.collect.toSet === Set((0: VertexId, n)))
+    } finally {
+      sc.stop()
+    }
+  }
+
 }
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala
deleted file mode 100644
index 864cb1fdf0022..0000000000000
--- a/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.graphx
-
-import java.io.{EOFException, ByteArrayInputStream, ByteArrayOutputStream}
-
-import scala.util.Random
-import scala.reflect.ClassTag
-
-import org.scalatest.FunSuite
-
-import org.apache.spark._
-import org.apache.spark.graphx.impl._
-import org.apache.spark.serializer.SerializationStream
-
-
-class SerializerSuite extends FunSuite with LocalSparkContext {
-
-  test("IntAggMsgSerializer") {
-    val outMsg = (4: VertexId, 5)
-    val bout = new ByteArrayOutputStream
-    val outStrm = new IntAggMsgSerializer().newInstance().serializeStream(bout)
-    outStrm.writeObject(outMsg)
-    outStrm.writeObject(outMsg)
-    bout.flush()
-    val bin = new ByteArrayInputStream(bout.toByteArray)
-    val inStrm = new IntAggMsgSerializer().newInstance().deserializeStream(bin)
-    val inMsg1: (VertexId, Int) = inStrm.readObject()
-    val inMsg2: (VertexId, Int) = inStrm.readObject()
-    assert(outMsg === inMsg1)
-    assert(outMsg === inMsg2)
-
-    intercept[EOFException] {
-      inStrm.readObject()
-    }
-  }
-
-  test("LongAggMsgSerializer") {
-    val outMsg = (4: VertexId, 1L << 32)
-    val bout = new ByteArrayOutputStream
-    val outStrm = new LongAggMsgSerializer().newInstance().serializeStream(bout)
-    outStrm.writeObject(outMsg)
-    outStrm.writeObject(outMsg)
-    bout.flush()
-    val bin = new ByteArrayInputStream(bout.toByteArray)
-    val inStrm = new LongAggMsgSerializer().newInstance().deserializeStream(bin)
-    val inMsg1: (VertexId, Long) = inStrm.readObject()
-    val inMsg2: (VertexId, Long) = inStrm.readObject()
-    assert(outMsg === inMsg1)
-    assert(outMsg === inMsg2)
-
-    intercept[EOFException] {
-      inStrm.readObject()
-    }
-  }
-
-  test("DoubleAggMsgSerializer") {
-    val outMsg = (4: VertexId, 5.0)
-    val bout = new ByteArrayOutputStream
-    val outStrm = new DoubleAggMsgSerializer().newInstance().serializeStream(bout)
-    outStrm.writeObject(outMsg)
-    outStrm.writeObject(outMsg)
-    bout.flush()
-    val bin = new ByteArrayInputStream(bout.toByteArray)
-    val inStrm = new DoubleAggMsgSerializer().newInstance().deserializeStream(bin)
-    val inMsg1: (VertexId, Double) = inStrm.readObject()
-    val inMsg2: (VertexId, Double) = inStrm.readObject()
-    assert(outMsg === inMsg1)
-    assert(outMsg === inMsg2)
-
-    intercept[EOFException] {
-      inStrm.readObject()
-    }
-  }
-
-  test("variable long encoding") {
-    def testVarLongEncoding(v: Long, optimizePositive: Boolean) {
-      val bout = new ByteArrayOutputStream
-      val stream = new ShuffleSerializationStream(bout) {
-        def writeObject[T: ClassTag](t: T): SerializationStream = {
-          writeVarLong(t.asInstanceOf[Long], optimizePositive = optimizePositive)
-          this
-        }
-      }
-      stream.writeObject(v)
-
-      val bin = new ByteArrayInputStream(bout.toByteArray)
-      val dstream = new ShuffleDeserializationStream(bin) {
-        def readObject[T: ClassTag](): T = {
-          readVarLong(optimizePositive).asInstanceOf[T]
-        }
-      }
-      val read = dstream.readObject[Long]()
-      assert(read === v)
-    }
-
-    // Test all variable encoding code path (each branch uses 7 bits, i.e. 1L << 7 difference)
-    val d = Random.nextLong() % 128
-    Seq[Long](0, 1L << 0 + d, 1L << 7 + d, 1L << 14 + d, 1L << 21 + d, 1L << 28 + d, 1L << 35 + d,
-      1L << 42 + d, 1L << 49 + d, 1L << 56 + d, 1L << 63 + d).foreach { number =>
-      testVarLongEncoding(number, optimizePositive = false)
-      testVarLongEncoding(number, optimizePositive = true)
-      testVarLongEncoding(-number, optimizePositive = false)
-      testVarLongEncoding(-number, optimizePositive = true)
-    }
-  }
-}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
index 42d3f21dbae98..131959cea3ef7 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
@@ -17,12 +17,11 @@
 
 package org.apache.spark.graphx
 
-import org.apache.spark.SparkContext
-import org.apache.spark.graphx.Graph._
-import org.apache.spark.graphx.impl.EdgePartition
-import org.apache.spark.rdd._
 import org.scalatest.FunSuite
 
+import org.apache.spark.SparkContext
+import org.apache.spark.storage.StorageLevel
+
 class VertexRDDSuite extends FunSuite with LocalSparkContext {
 
   def vertices(sc: SparkContext, n: Int) = {
@@ -110,4 +109,16 @@ class VertexRDDSuite extends FunSuite with LocalSparkContext {
     }
   }
 
+  test("cache, getStorageLevel") {
+    // test to see if getStorageLevel returns correct value after caching
+    withSpark { sc =>
+      val verts = sc.parallelize(List((0L, 0), (1L, 1), (1L, 2), (2L, 3), (2L, 3), (2L, 3)))
+      val edges = EdgeRDD.fromEdges(sc.parallelize(List.empty[Edge[Int]]))
+      val rdd = VertexRDD(verts, edges, 0, (a: Int, b: Int) => a + b)
+      assert(rdd.getStorageLevel == StorageLevel.NONE)
+      rdd.cache()
+      assert(rdd.getStorageLevel == StorageLevel.MEMORY_ONLY)
+    }
+  }
+
 }
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
index db1dac6160080..515f3a9cd02eb 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
@@ -82,29 +82,6 @@ class EdgePartitionSuite extends FunSuite {
     assert(edgePartition.groupEdges(_ + _).iterator.map(_.copy()).toList === groupedEdges)
   }
 
-  test("upgradeIterator") {
-    val edges = List((0, 1, 0), (1, 0, 0))
-    val verts = List((0L, 1), (1L, 2))
-    val part = makeEdgePartition(edges).updateVertices(verts.iterator)
-    assert(part.upgradeIterator(part.iterator).map(_.toTuple).toList ===
-      part.tripletIterator().toList.map(_.toTuple))
-  }
-
-  test("indexIterator") {
-    val edgesFrom0 = List(Edge(0, 1, 0))
-    val edgesFrom1 = List(Edge(1, 0, 0), Edge(1, 2, 0))
-    val sortedEdges = edgesFrom0 ++ edgesFrom1
-    val builder = new EdgePartitionBuilder[Int, Nothing]
-    for (e <- Random.shuffle(sortedEdges)) {
-      builder.add(e.srcId, e.dstId, e.attr)
-    }
-
-    val edgePartition = builder.toEdgePartition
-    assert(edgePartition.iterator.map(_.copy()).toList === sortedEdges)
-    assert(edgePartition.indexIterator(_ == 0).map(_.copy()).toList === edgesFrom0)
-    assert(edgePartition.indexIterator(_ == 1).map(_.copy()).toList === edgesFrom1)
-  }
-
   test("innerJoin") {
     val aList = List((0, 1, 0), (1, 0, 0), (1, 2, 0), (5, 4, 0), (5, 5, 0))
     val bList = List((0, 1, 0), (1, 0, 0), (1, 1, 0), (3, 4, 0), (5, 5, 0))
@@ -125,8 +102,18 @@ class EdgePartitionSuite extends FunSuite {
     assert(ep.numActives == Some(2))
   }
 
+  test("tripletIterator") {
+    val builder = new EdgePartitionBuilder[Int, Int]
+    builder.add(1, 2, 0)
+    builder.add(1, 3, 0)
+    builder.add(1, 4, 0)
+    val ep = builder.toEdgePartition
+    val result = ep.tripletIterator().toList.map(et => (et.srcId, et.dstId))
+    assert(result === Seq((1, 2), (1, 3), (1, 4)))
+  }
+
   test("serialization") {
-    val aList = List((0, 1, 0), (1, 0, 0), (1, 2, 0), (5, 4, 0), (5, 5, 0))
+    val aList = List((0, 1, 1), (1, 0, 2), (1, 2, 3), (5, 4, 4), (5, 5, 5))
     val a: EdgePartition[Int, Int] = makeEdgePartition(aList)
     val javaSer = new JavaSerializer(new SparkConf())
     val conf = new SparkConf()
@@ -135,11 +122,7 @@ class EdgePartitionSuite extends FunSuite {
 
     for (ser <- List(javaSer, kryoSer); s = ser.newInstance()) {
       val aSer: EdgePartition[Int, Int] = s.deserialize(s.serialize(a))
-      assert(aSer.srcIds.toList === a.srcIds.toList)
-      assert(aSer.dstIds.toList === a.dstIds.toList)
-      assert(aSer.data.toList === a.data.toList)
-      assert(aSer.index != null)
-      assert(aSer.vertices.iterator.toSet === a.vertices.iterator.toSet)
+      assert(aSer.tripletIterator().toList === a.tripletIterator().toList)
     }
   }
 }
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgeTripletIteratorSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgeTripletIteratorSuite.scala
deleted file mode 100644
index 49b2704390fea..0000000000000
--- a/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgeTripletIteratorSuite.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.graphx.impl
-
-import scala.reflect.ClassTag
-import scala.util.Random
-
-import org.scalatest.FunSuite
-
-import org.apache.spark.graphx._
-
-class EdgeTripletIteratorSuite extends FunSuite {
-  test("iterator.toList") {
-    val builder = new EdgePartitionBuilder[Int, Int]
-    builder.add(1, 2, 0)
-    builder.add(1, 3, 0)
-    builder.add(1, 4, 0)
-    val iter = new EdgeTripletIterator[Int, Int](builder.toEdgePartition, true, true)
-    val result = iter.toList.map(et => (et.srcId, et.dstId))
-    assert(result === Seq((1, 2), (1, 3), (1, 4)))
-  }
-}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/ShortestPathsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/ShortestPathsSuite.scala
index 265827b3341c2..f2c38e79c452c 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/ShortestPathsSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/ShortestPathsSuite.scala
@@ -40,7 +40,7 @@ class ShortestPathsSuite extends FunSuite with LocalSparkContext {
       val graph = Graph.fromEdgeTuples(edges, 1)
       val landmarks = Seq(1, 4).map(_.toLong)
       val results = ShortestPaths.run(graph, landmarks).vertices.collect.map {
-        case (v, spMap) => (v, spMap.mapValues(_.get))
+        case (v, spMap) => (v, spMap.mapValues(i => i))
       }
       assert(results.toSet === shortestPaths)
     }
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/util/GraphGeneratorsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/util/GraphGeneratorsSuite.scala
index 3abefbe52fa8a..8d9c8ddccbb3c 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/util/GraphGeneratorsSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/util/GraphGeneratorsSuite.scala
@@ -110,4 +110,14 @@ class GraphGeneratorsSuite extends FunSuite with LocalSparkContext {
     }
   }
 
+  test("SPARK-5064 GraphGenerators.rmatGraph numEdges upper bound") {
+    withSpark { sc =>
+      val g1 = GraphGenerators.rmatGraph(sc, 4, 4)
+      assert(g1.edges.count() === 4)
+      intercept[IllegalArgumentException] {
+        val g2 = GraphGenerators.rmatGraph(sc, 4, 8)
+      }
+    }
+  }
+
 }
diff --git a/make-distribution.sh b/make-distribution.sh
index 0bc839e1dbe4d..051c87c0894ae 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -28,18 +28,24 @@ set -o pipefail
 set -e
 
 # Figure out where the Spark framework is installed
-FWDIR="$(cd "`dirname "$0"`"; pwd)"
-DISTDIR="$FWDIR/dist"
+SPARK_HOME="$(cd "`dirname "$0"`"; pwd)"
+DISTDIR="$SPARK_HOME/dist"
 
 SPARK_TACHYON=false
+TACHYON_VERSION="0.5.0"
+TACHYON_TGZ="tachyon-${TACHYON_VERSION}-bin.tar.gz"
+TACHYON_URL="https://github.com/amplab/tachyon/releases/download/v${TACHYON_VERSION}/${TACHYON_TGZ}"
+
 MAKE_TGZ=false
 NAME=none
+MVN="$SPARK_HOME/build/mvn"
 
 function exit_with_usage {
   echo "make-distribution.sh - tool for making binary distributions of Spark"
   echo ""
   echo "usage:"
-  echo "./make-distribution.sh [--name] [--tgz] [--with-tachyon] <maven build options>"
+  cl_options="[--name] [--tgz] [--mvn <mvn-command>] [--with-tachyon]"
+  echo "./make-distribution.sh $cl_options <maven build options>"
   echo "See Spark's \"Building Spark\" doc for correct Maven options."
   echo ""
   exit 1
@@ -59,7 +65,7 @@ while (( "$#" )); do
       exit_with_usage
       ;;
     --with-hive)
-      echo "Error: '--with-hive' is no longer supported, use Maven option -Phive"
+      echo "Error: '--with-hive' is no longer supported, use Maven options -Phive and -Phive-thriftserver"
       exit_with_usage
       ;;
     --skip-java-test)
@@ -71,6 +77,10 @@ while (( "$#" )); do
     --tgz)
       MAKE_TGZ=true
       ;;
+    --mvn)
+      MVN="$2"
+      shift
+      ;;
     --name)
       NAME="$2"
       shift
@@ -87,7 +97,7 @@ done
 
 if [ -z "$JAVA_HOME" ]; then
   # Fall back on JAVA_HOME from rpm, if found
-  if which rpm &>/dev/null; then
+  if [ $(command -v  rpm) ]; then
     RPM_JAVA_HOME=$(rpm -E %java_home 2>/dev/null)
     if [ "$RPM_JAVA_HOME" != "%java_home" ]; then
       JAVA_HOME=$RPM_JAVA_HOME
@@ -101,7 +111,7 @@ if [ -z "$JAVA_HOME" ]; then
   exit -1
 fi
 
-if which git &>/dev/null; then
+if [ $(command -v git) ]; then
     GITREV=$(git rev-parse --short HEAD 2>/dev/null || :)
     if [ ! -z $GITREV ]; then
 	 GITREVSTRING=" (git revision $GITREV)"
@@ -109,17 +119,18 @@ if which git &>/dev/null; then
     unset GITREV
 fi
 
-if ! which mvn &>/dev/null; then
-    echo -e "You need Maven installed to build Spark."
-    echo -e "Download Maven from https://maven.apache.org/"
+
+if [ ! $(command -v $MVN) ] ; then
+    echo -e "Could not locate Maven command: '$MVN'."
+    echo -e "Specify the Maven command with the --mvn flag"
     exit -1;
 fi
 
-VERSION=$(mvn help:evaluate -Dexpression=project.version 2>/dev/null | grep -v "INFO" | tail -n 1)
-SPARK_HADOOP_VERSION=$(mvn help:evaluate -Dexpression=hadoop.version $@ 2>/dev/null\
+VERSION=$($MVN help:evaluate -Dexpression=project.version 2>/dev/null | grep -v "INFO" | tail -n 1)
+SPARK_HADOOP_VERSION=$($MVN help:evaluate -Dexpression=hadoop.version $@ 2>/dev/null\
     | grep -v "INFO"\
     | tail -n 1)
-SPARK_HIVE=$(mvn help:evaluate -Dexpression=project.activeProfiles $@ 2>/dev/null\
+SPARK_HIVE=$($MVN help:evaluate -Dexpression=project.activeProfiles -pl sql/hive $@ 2>/dev/null\
     | grep -v "INFO"\
     | fgrep --count "<id>hive</id>";\
     # Reset exit status to 0, otherwise the script stops here if the last grep finds nothing\
@@ -161,64 +172,80 @@ else
 fi
 
 # Build uber fat JAR
-cd "$FWDIR"
+cd "$SPARK_HOME"
 
 export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
 
-BUILD_COMMAND="mvn clean package -DskipTests $@"
+# Store the command as an array because $MVN variable might have spaces in it.
+# Normal quoting tricks don't work.
+# See: http://mywiki.wooledge.org/BashFAQ/050
+BUILD_COMMAND=("$MVN" clean package -DskipTests $@)
 
 # Actually build the jar
 echo -e "\nBuilding with..."
-echo -e "\$ $BUILD_COMMAND\n"
+echo -e "\$ ${BUILD_COMMAND[@]}\n"
 
-${BUILD_COMMAND}
+"${BUILD_COMMAND[@]}"
 
 # Make directories
 rm -rf "$DISTDIR"
 mkdir -p "$DISTDIR/lib"
 echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"
+echo "Build flags: $@" >> "$DISTDIR/RELEASE"
 
 # Copy jars
-cp "$FWDIR"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
-cp "$FWDIR"/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/"
+cp "$SPARK_HOME"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
+cp "$SPARK_HOME"/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/"
+# This will fail if the -Pyarn profile is not provided
+# In this case, silence the error and ignore the return code of this command
+cp "$SPARK_HOME"/network/yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/lib/" &> /dev/null || :
 
 # Copy example sources (needed for python and SQL)
 mkdir -p "$DISTDIR/examples/src/main"
-cp -r "$FWDIR"/examples/src/main "$DISTDIR/examples/src/"
+cp -r "$SPARK_HOME"/examples/src/main "$DISTDIR/examples/src/"
 
 if [ "$SPARK_HIVE" == "1" ]; then
-  cp "$FWDIR"/lib_managed/jars/datanucleus*.jar "$DISTDIR/lib/"
+  cp "$SPARK_HOME"/lib_managed/jars/datanucleus*.jar "$DISTDIR/lib/"
 fi
 
 # Copy license and ASF files
-cp "$FWDIR/LICENSE" "$DISTDIR"
-cp "$FWDIR/NOTICE" "$DISTDIR"
+cp "$SPARK_HOME/LICENSE" "$DISTDIR"
+cp "$SPARK_HOME/NOTICE" "$DISTDIR"
 
-if [ -e "$FWDIR"/CHANGES.txt ]; then
-  cp "$FWDIR/CHANGES.txt" "$DISTDIR"
+if [ -e "$SPARK_HOME"/CHANGES.txt ]; then
+  cp "$SPARK_HOME/CHANGES.txt" "$DISTDIR"
 fi
 
+# Copy data files
+cp -r "$SPARK_HOME/data" "$DISTDIR"
+
 # Copy other things
 mkdir "$DISTDIR"/conf
-cp "$FWDIR"/conf/*.template "$DISTDIR"/conf
-cp "$FWDIR/README.md" "$DISTDIR"
-cp -r "$FWDIR/bin" "$DISTDIR"
-cp -r "$FWDIR/python" "$DISTDIR"
-cp -r "$FWDIR/sbin" "$DISTDIR"
-cp -r "$FWDIR/ec2" "$DISTDIR"
+cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf
+cp "$SPARK_HOME/README.md" "$DISTDIR"
+cp -r "$SPARK_HOME/bin" "$DISTDIR"
+cp -r "$SPARK_HOME/python" "$DISTDIR"
+cp -r "$SPARK_HOME/sbin" "$DISTDIR"
+cp -r "$SPARK_HOME/ec2" "$DISTDIR"
 
 # Download and copy in tachyon, if requested
 if [ "$SPARK_TACHYON" == "true" ]; then
-  TACHYON_VERSION="0.5.0"
-  TACHYON_URL="https://github.com/amplab/tachyon/releases/download/v${TACHYON_VERSION}/tachyon-${TACHYON_VERSION}-bin.tar.gz"
-
   TMPD=`mktemp -d 2>/dev/null || mktemp -d -t 'disttmp'`
 
   pushd $TMPD > /dev/null
   echo "Fetching tachyon tgz"
-  wget "$TACHYON_URL"
 
-  tar xf "tachyon-${TACHYON_VERSION}-bin.tar.gz"
+  TACHYON_DL="${TACHYON_TGZ}.part"
+  if [ $(command -v curl) ]; then
+    curl --silent -k -L "${TACHYON_URL}" > "${TACHYON_DL}" && mv "${TACHYON_DL}" "${TACHYON_TGZ}"
+  elif [ $(command -v wget) ]; then
+    wget --quiet "${TACHYON_URL}" -O "${TACHYON_DL}" && mv "${TACHYON_DL}" "${TACHYON_TGZ}"
+  else
+    printf "You do not have curl or wget installed. please install Tachyon manually.\n"
+    exit -1
+  fi
+
+  tar xzf "${TACHYON_TGZ}"
   cp "tachyon-${TACHYON_VERSION}/core/target/tachyon-${TACHYON_VERSION}-jar-with-dependencies.jar" "$DISTDIR/lib"
   mkdir -p "$DISTDIR/tachyon/src/main/java/tachyon/web"
   cp -r "tachyon-${TACHYON_VERSION}"/{bin,conf,libexec} "$DISTDIR/tachyon"
@@ -237,9 +264,9 @@ fi
 
 if [ "$MAKE_TGZ" == "true" ]; then
   TARDIR_NAME=spark-$VERSION-bin-$NAME
-  TARDIR="$FWDIR/$TARDIR_NAME"
+  TARDIR="$SPARK_HOME/$TARDIR_NAME"
   rm -rf "$TARDIR"
   cp -r "$DISTDIR" "$TARDIR"
-  tar czf "spark-$VERSION-bin-$NAME.tgz" -C "$FWDIR" "$TARDIR_NAME"
+  tar czf "spark-$VERSION-bin-$NAME.tgz" -C "$SPARK_HOME" "$TARDIR_NAME"
   rm -rf "$TARDIR"
 fi
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 696e9396f627c..a8cee3d51a780 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -29,7 +29,7 @@
   <artifactId>spark-mllib_2.10</artifactId>
   <properties>
     <sbt.project.name>mllib</sbt.project.name>
-  </properties>  
+  </properties>
   <packaging>jar</packaging>
   <name>Spark Project ML Library</name>
   <url>http://spark.apache.org/</url>
@@ -46,8 +46,14 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-server</artifactId>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-graphx_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.jblas</groupId>
@@ -72,9 +78,8 @@
       </exclusions>
     </dependency>
     <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-math3</artifactId>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
@@ -91,6 +96,11 @@
       <artifactId>junit-interface</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
@@ -115,17 +125,14 @@
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <plugins>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-      </plugin>
-    </plugins>
     <resources>
       <resource>
         <directory>../python</directory>
         <includes>
           <include>pyspark/mllib/*.py</include>
+          <include>pyspark/mllib/stat/*.py</include>
+          <include>pyspark/ml/*.py</include>
+          <include>pyspark/ml/param/*.py</include>
         </includes>
       </resource>
     </resources>
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala b/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala
new file mode 100644
index 0000000000000..bc3defe968afd
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import scala.annotation.varargs
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.param.{ParamMap, ParamPair, Params}
+import org.apache.spark.sql.DataFrame
+
+/**
+ * :: AlphaComponent ::
+ * Abstract class for estimators that fit models to data.
+ */
+@AlphaComponent
+abstract class Estimator[M <: Model[M]] extends PipelineStage with Params {
+
+  /**
+   * Fits a single model to the input data with optional parameters.
+   *
+   * @param dataset input dataset
+   * @param paramPairs optional list of param pairs (overwrite embedded params)
+   * @return fitted model
+   */
+  @varargs
+  def fit(dataset: DataFrame, paramPairs: ParamPair[_]*): M = {
+    val map = new ParamMap().put(paramPairs: _*)
+    fit(dataset, map)
+  }
+
+  /**
+   * Fits a single model to the input data with provided parameter map.
+   *
+   * @param dataset input dataset
+   * @param paramMap parameter map
+   * @return fitted model
+   */
+  def fit(dataset: DataFrame, paramMap: ParamMap): M
+
+  /**
+   * Fits multiple models to the input data with multiple sets of parameters.
+   * The default implementation uses a for loop on each parameter map.
+   * Subclasses could overwrite this to optimize multi-model training.
+   *
+   * @param dataset input dataset
+   * @param paramMaps an array of parameter maps
+   * @return fitted models, matching the input parameter maps
+   */
+  def fit(dataset: DataFrame, paramMaps: Array[ParamMap]): Seq[M] = {
+    paramMaps.map(fit(dataset, _))
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Evaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/Evaluator.scala
new file mode 100644
index 0000000000000..d2ca2e6871e6b
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/Evaluator.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.sql.DataFrame
+
+/**
+ * :: AlphaComponent ::
+ * Abstract class for evaluators that compute metrics from predictions.
+ */
+@AlphaComponent
+abstract class Evaluator extends Identifiable {
+
+  /**
+   * Evaluates the output.
+   *
+   * @param dataset a dataset that contains labels/observations and predictions.
+   * @param paramMap parameter map that specifies the input columns and output metrics
+   * @return metric
+   */
+  def evaluate(dataset: DataFrame, paramMap: ParamMap): Double
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala b/mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala
new file mode 100644
index 0000000000000..cd84b05bfb496
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import java.util.UUID
+
+/**
+ * Object with a unique id.
+ */
+private[ml] trait Identifiable extends Serializable {
+
+  /**
+   * A unique id for the object. The default implementation concatenates the class name, "-", and 8
+   * random hex chars.
+   */
+  private[ml] val uid: String =
+    this.getClass.getSimpleName + "-" + UUID.randomUUID().toString.take(8)
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Model.scala b/mllib/src/main/scala/org/apache/spark/ml/Model.scala
new file mode 100644
index 0000000000000..cae5082b51196
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/Model.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.param.ParamMap
+
+/**
+ * :: AlphaComponent ::
+ * A fitted model, i.e., a [[Transformer]] produced by an [[Estimator]].
+ *
+ * @tparam M model type
+ */
+@AlphaComponent
+abstract class Model[M <: Model[M]] extends Transformer {
+  /**
+   * The parent estimator that produced this model.
+   */
+  val parent: Estimator[M]
+
+  /**
+   * Fitting parameters, such that parent.fit(..., fittingParamMap) could reproduce the model.
+   */
+  val fittingParamMap: ParamMap
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
new file mode 100644
index 0000000000000..bb291e6e1fd7d
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import scala.collection.mutable.ListBuffer
+
+import org.apache.spark.Logging
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.param.{Param, ParamMap}
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.types.StructType
+
+/**
+ * :: AlphaComponent ::
+ * A stage in a pipeline, either an [[Estimator]] or a [[Transformer]].
+ */
+@AlphaComponent
+abstract class PipelineStage extends Serializable with Logging {
+
+  /**
+   * Derives the output schema from the input schema and parameters.
+   */
+  private[ml] def transformSchema(schema: StructType, paramMap: ParamMap): StructType
+
+  /**
+   * Derives the output schema from the input schema and parameters, optionally with logging.
+   */
+  protected def transformSchema(
+      schema: StructType,
+      paramMap: ParamMap,
+      logging: Boolean): StructType = {
+    if (logging) {
+      logDebug(s"Input schema: ${schema.json}")
+    }
+    val outputSchema = transformSchema(schema, paramMap)
+    if (logging) {
+      logDebug(s"Expected output schema: ${outputSchema.json}")
+    }
+    outputSchema
+  }
+}
+
+/**
+ * :: AlphaComponent ::
+ * A simple pipeline, which acts as an estimator. A Pipeline consists of a sequence of stages, each
+ * of which is either an [[Estimator]] or a [[Transformer]]. When [[Pipeline#fit]] is called, the
+ * stages are executed in order. If a stage is an [[Estimator]], its [[Estimator#fit]] method will
+ * be called on the input dataset to fit a model. Then the model, which is a transformer, will be
+ * used to transform the dataset as the input to the next stage. If a stage is a [[Transformer]],
+ * its [[Transformer#transform]] method will be called to produce the dataset for the next stage.
+ * The fitted model from a [[Pipeline]] is an [[PipelineModel]], which consists of fitted models and
+ * transformers, corresponding to the pipeline stages. If there are no stages, the pipeline acts as
+ * an identity transformer.
+ */
+@AlphaComponent
+class Pipeline extends Estimator[PipelineModel] {
+
+  /** param for pipeline stages */
+  val stages: Param[Array[PipelineStage]] = new Param(this, "stages", "stages of the pipeline")
+  def setStages(value: Array[PipelineStage]): this.type = { set(stages, value); this }
+  def getStages: Array[PipelineStage] = get(stages)
+
+  /**
+   * Fits the pipeline to the input dataset with additional parameters. If a stage is an
+   * [[Estimator]], its [[Estimator#fit]] method will be called on the input dataset to fit a model.
+   * Then the model, which is a transformer, will be used to transform the dataset as the input to
+   * the next stage. If a stage is a [[Transformer]], its [[Transformer#transform]] method will be
+   * called to produce the dataset for the next stage. The fitted model from a [[Pipeline]] is an
+   * [[PipelineModel]], which consists of fitted models and transformers, corresponding to the
+   * pipeline stages. If there are no stages, the output model acts as an identity transformer.
+   *
+   * @param dataset input dataset
+   * @param paramMap parameter map
+   * @return fitted pipeline
+   */
+  override def fit(dataset: DataFrame, paramMap: ParamMap): PipelineModel = {
+    transformSchema(dataset.schema, paramMap, logging = true)
+    val map = this.paramMap ++ paramMap
+    val theStages = map(stages)
+    // Search for the last estimator.
+    var indexOfLastEstimator = -1
+    theStages.view.zipWithIndex.foreach { case (stage, index) =>
+      stage match {
+        case _: Estimator[_] =>
+          indexOfLastEstimator = index
+        case _ =>
+      }
+    }
+    var curDataset = dataset
+    val transformers = ListBuffer.empty[Transformer]
+    theStages.view.zipWithIndex.foreach { case (stage, index) =>
+      if (index <= indexOfLastEstimator) {
+        val transformer = stage match {
+          case estimator: Estimator[_] =>
+            estimator.fit(curDataset, paramMap)
+          case t: Transformer =>
+            t
+          case _ =>
+            throw new IllegalArgumentException(
+              s"Do not support stage $stage of type ${stage.getClass}")
+        }
+        curDataset = transformer.transform(curDataset, paramMap)
+        transformers += transformer
+      } else {
+        transformers += stage.asInstanceOf[Transformer]
+      }
+    }
+
+    new PipelineModel(this, map, transformers.toArray)
+  }
+
+  private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    val map = this.paramMap ++ paramMap
+    val theStages = map(stages)
+    require(theStages.toSet.size == theStages.size,
+      "Cannot have duplicate components in a pipeline.")
+    theStages.foldLeft(schema)((cur, stage) => stage.transformSchema(cur, paramMap))
+  }
+}
+
+/**
+ * :: AlphaComponent ::
+ * Represents a compiled pipeline.
+ */
+@AlphaComponent
+class PipelineModel private[ml] (
+    override val parent: Pipeline,
+    override val fittingParamMap: ParamMap,
+    private[ml] val stages: Array[Transformer])
+  extends Model[PipelineModel] with Logging {
+
+  /**
+   * Gets the model produced by the input estimator. Throws an NoSuchElementException is the input
+   * estimator does not exist in the pipeline.
+   */
+  def getModel[M <: Model[M]](stage: Estimator[M]): M = {
+    val matched = stages.filter {
+      case m: Model[_] => m.parent.eq(stage)
+      case _ => false
+    }
+    if (matched.isEmpty) {
+      throw new NoSuchElementException(s"Cannot find stage $stage from the pipeline.")
+    } else if (matched.size > 1) {
+      throw new IllegalStateException(s"Cannot have duplicate estimators in the sample pipeline.")
+    } else {
+      matched.head.asInstanceOf[M]
+    }
+  }
+
+  override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = {
+    // Precedence of ParamMaps: paramMap > this.paramMap > fittingParamMap
+    val map = (fittingParamMap ++ this.paramMap) ++ paramMap
+    transformSchema(dataset.schema, map, logging = true)
+    stages.foldLeft(dataset)((cur, transformer) => transformer.transform(cur, map))
+  }
+
+  private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    // Precedence of ParamMaps: paramMap > this.paramMap > fittingParamMap
+    val map = (fittingParamMap ++ this.paramMap) ++ paramMap
+    stages.foldLeft(schema)((cur, transformer) => transformer.transformSchema(cur, map))
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
new file mode 100644
index 0000000000000..cd95c16aa768d
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import scala.annotation.varargs
+
+import org.apache.spark.Logging
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.param._
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.Dsl._
+import org.apache.spark.sql.types._
+
+/**
+ * :: AlphaComponent ::
+ * Abstract class for transformers that transform one dataset into another.
+ */
+@AlphaComponent
+abstract class Transformer extends PipelineStage with Params {
+
+  /**
+   * Transforms the dataset with optional parameters
+   * @param dataset input dataset
+   * @param paramPairs optional list of param pairs, overwrite embedded params
+   * @return transformed dataset
+   */
+  @varargs
+  def transform(dataset: DataFrame, paramPairs: ParamPair[_]*): DataFrame = {
+    val map = new ParamMap()
+    paramPairs.foreach(map.put(_))
+    transform(dataset, map)
+  }
+
+  /**
+   * Transforms the dataset with provided parameter map as additional parameters.
+   * @param dataset input dataset
+   * @param paramMap additional parameters, overwrite embedded params
+   * @return transformed dataset
+   */
+  def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame
+}
+
+/**
+ * Abstract class for transformers that take one input column, apply transformation, and output the
+ * result as a new column.
+ */
+private[ml] abstract class UnaryTransformer[IN, OUT, T <: UnaryTransformer[IN, OUT, T]]
+  extends Transformer with HasInputCol with HasOutputCol with Logging {
+
+  def setInputCol(value: String): T = set(inputCol, value).asInstanceOf[T]
+  def setOutputCol(value: String): T = set(outputCol, value).asInstanceOf[T]
+
+  /**
+   * Creates the transform function using the given param map. The input param map already takes
+   * account of the embedded param map. So the param values should be determined solely by the input
+   * param map.
+   */
+  protected def createTransformFunc(paramMap: ParamMap): IN => OUT
+
+  /**
+   * Returns the data type of the output column.
+   */
+  protected def outputDataType: DataType
+
+  /**
+   * Validates the input type. Throw an exception if it is invalid.
+   */
+  protected def validateInputType(inputType: DataType): Unit = {}
+
+  override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    val map = this.paramMap ++ paramMap
+    val inputType = schema(map(inputCol)).dataType
+    validateInputType(inputType)
+    if (schema.fieldNames.contains(map(outputCol))) {
+      throw new IllegalArgumentException(s"Output column ${map(outputCol)} already exists.")
+    }
+    val outputFields = schema.fields :+
+      StructField(map(outputCol), outputDataType, !outputDataType.isPrimitive)
+    StructType(outputFields)
+  }
+
+  override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = {
+    transformSchema(dataset.schema, paramMap, logging = true)
+    val map = this.paramMap ++ paramMap
+    dataset.select($"*", callUDF(
+      this.createTransformFunc(map), outputDataType, dataset(map(inputCol))).as(map(outputCol)))
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
new file mode 100644
index 0000000000000..b46a5cd8bdf29
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.classification
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml._
+import org.apache.spark.ml.param._
+import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
+import org.apache.spark.mllib.linalg.{BLAS, Vector, VectorUDT}
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.sql._
+import org.apache.spark.sql.Dsl._
+import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
+import org.apache.spark.storage.StorageLevel
+
+/**
+ * :: AlphaComponent ::
+ * Params for logistic regression.
+ */
+@AlphaComponent
+private[classification] trait LogisticRegressionParams extends Params
+  with HasRegParam with HasMaxIter with HasLabelCol with HasThreshold with HasFeaturesCol
+  with HasScoreCol with HasPredictionCol {
+
+  /**
+   * Validates and transforms the input schema with the provided param map.
+   * @param schema input schema
+   * @param paramMap additional parameters
+   * @param fitting whether this is in fitting
+   * @return output schema
+   */
+  protected def validateAndTransformSchema(
+      schema: StructType,
+      paramMap: ParamMap,
+      fitting: Boolean): StructType = {
+    val map = this.paramMap ++ paramMap
+    val featuresType = schema(map(featuresCol)).dataType
+    // TODO: Support casting Array[Double] and Array[Float] to Vector.
+    require(featuresType.isInstanceOf[VectorUDT],
+      s"Features column ${map(featuresCol)} must be a vector column but got $featuresType.")
+    if (fitting) {
+      val labelType = schema(map(labelCol)).dataType
+      require(labelType == DoubleType,
+        s"Cannot convert label column ${map(labelCol)} of type $labelType to a double column.")
+    }
+    val fieldNames = schema.fieldNames
+    require(!fieldNames.contains(map(scoreCol)), s"Score column ${map(scoreCol)} already exists.")
+    require(!fieldNames.contains(map(predictionCol)),
+      s"Prediction column ${map(predictionCol)} already exists.")
+    val outputFields = schema.fields ++ Seq(
+      StructField(map(scoreCol), DoubleType, false),
+      StructField(map(predictionCol), DoubleType, false))
+    StructType(outputFields)
+  }
+}
+
+/**
+ * Logistic regression.
+ */
+class LogisticRegression extends Estimator[LogisticRegressionModel] with LogisticRegressionParams {
+
+  setRegParam(0.1)
+  setMaxIter(100)
+  setThreshold(0.5)
+
+  def setRegParam(value: Double): this.type = set(regParam, value)
+  def setMaxIter(value: Int): this.type = set(maxIter, value)
+  def setLabelCol(value: String): this.type = set(labelCol, value)
+  def setThreshold(value: Double): this.type = set(threshold, value)
+  def setFeaturesCol(value: String): this.type = set(featuresCol, value)
+  def setScoreCol(value: String): this.type = set(scoreCol, value)
+  def setPredictionCol(value: String): this.type = set(predictionCol, value)
+
+  override def fit(dataset: DataFrame, paramMap: ParamMap): LogisticRegressionModel = {
+    transformSchema(dataset.schema, paramMap, logging = true)
+    val map = this.paramMap ++ paramMap
+    val instances = dataset.select(map(labelCol), map(featuresCol))
+      .map { case Row(label: Double, features: Vector) =>
+        LabeledPoint(label, features)
+      }.persist(StorageLevel.MEMORY_AND_DISK)
+    val lr = new LogisticRegressionWithLBFGS
+    lr.optimizer
+      .setRegParam(map(regParam))
+      .setNumIterations(map(maxIter))
+    val lrm = new LogisticRegressionModel(this, map, lr.run(instances).weights)
+    instances.unpersist()
+    // copy model params
+    Params.inheritValues(map, this, lrm)
+    lrm
+  }
+
+  private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    validateAndTransformSchema(schema, paramMap, fitting = true)
+  }
+}
+
+/**
+ * :: AlphaComponent ::
+ * Model produced by [[LogisticRegression]].
+ */
+@AlphaComponent
+class LogisticRegressionModel private[ml] (
+    override val parent: LogisticRegression,
+    override val fittingParamMap: ParamMap,
+    weights: Vector)
+  extends Model[LogisticRegressionModel] with LogisticRegressionParams {
+
+  def setThreshold(value: Double): this.type = set(threshold, value)
+  def setFeaturesCol(value: String): this.type = set(featuresCol, value)
+  def setScoreCol(value: String): this.type = set(scoreCol, value)
+  def setPredictionCol(value: String): this.type = set(predictionCol, value)
+
+  private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    validateAndTransformSchema(schema, paramMap, fitting = false)
+  }
+
+  override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = {
+    transformSchema(dataset.schema, paramMap, logging = true)
+    val map = this.paramMap ++ paramMap
+    val scoreFunction = udf { v: Vector =>
+      val margin = BLAS.dot(v, weights)
+      1.0 / (1.0 + math.exp(-margin))
+    }
+    val t = map(threshold)
+    val predictFunction = udf { score: Double =>
+      if (score > t) 1.0 else 0.0
+    }
+    dataset
+      .select($"*", scoreFunction(col(map(featuresCol))).as(map(scoreCol)))
+      .select($"*", predictFunction(col(map(scoreCol))).as(map(predictionCol)))
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
new file mode 100644
index 0000000000000..1979ab9eb6516
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.evaluation
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml._
+import org.apache.spark.ml.param._
+import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
+import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.types.DoubleType
+
+/**
+ * :: AlphaComponent ::
+ * Evaluator for binary classification, which expects two input columns: score and label.
+ */
+@AlphaComponent
+class BinaryClassificationEvaluator extends Evaluator with Params
+  with HasScoreCol with HasLabelCol {
+
+  /** param for metric name in evaluation */
+  val metricName: Param[String] = new Param(this, "metricName",
+    "metric name in evaluation (areaUnderROC|areaUnderPR)", Some("areaUnderROC"))
+  def getMetricName: String = get(metricName)
+  def setMetricName(value: String): this.type = set(metricName, value)
+
+  def setScoreCol(value: String): this.type = set(scoreCol, value)
+  def setLabelCol(value: String): this.type = set(labelCol, value)
+
+  override def evaluate(dataset: DataFrame, paramMap: ParamMap): Double = {
+    val map = this.paramMap ++ paramMap
+
+    val schema = dataset.schema
+    val scoreType = schema(map(scoreCol)).dataType
+    require(scoreType == DoubleType,
+      s"Score column ${map(scoreCol)} must be double type but found $scoreType")
+    val labelType = schema(map(labelCol)).dataType
+    require(labelType == DoubleType,
+      s"Label column ${map(labelCol)} must be double type but found $labelType")
+
+    val scoreAndLabels = dataset.select(map(scoreCol), map(labelCol))
+      .map { case Row(score: Double, label: Double) =>
+        (score, label)
+      }
+    val metrics = new BinaryClassificationMetrics(scoreAndLabels)
+    val metric = map(metricName) match {
+      case "areaUnderROC" =>
+        metrics.areaUnderROC()
+      case "areaUnderPR" =>
+        metrics.areaUnderPR()
+      case other =>
+        throw new IllegalArgumentException(s"Does not support metric $other.")
+    }
+    metrics.unpersist()
+    metric
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
new file mode 100644
index 0000000000000..0956062643f23
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.UnaryTransformer
+import org.apache.spark.ml.param.{IntParam, ParamMap}
+import org.apache.spark.mllib.feature
+import org.apache.spark.mllib.linalg.{VectorUDT, Vector}
+import org.apache.spark.sql.types.DataType
+
+/**
+ * :: AlphaComponent ::
+ * Maps a sequence of terms to their term frequencies using the hashing trick.
+ */
+@AlphaComponent
+class HashingTF extends UnaryTransformer[Iterable[_], Vector, HashingTF] {
+
+  /** number of features */
+  val numFeatures = new IntParam(this, "numFeatures", "number of features", Some(1 << 18))
+  def setNumFeatures(value: Int) = set(numFeatures, value)
+  def getNumFeatures: Int = get(numFeatures)
+
+  override protected def createTransformFunc(paramMap: ParamMap): Iterable[_] => Vector = {
+    val hashingTF = new feature.HashingTF(paramMap(numFeatures))
+    hashingTF.transform
+  }
+
+  override protected def outputDataType: DataType = new VectorUDT()
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
new file mode 100644
index 0000000000000..4745a7ae95679
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml._
+import org.apache.spark.ml.param._
+import org.apache.spark.mllib.feature
+import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
+import org.apache.spark.sql._
+import org.apache.spark.sql.Dsl._
+import org.apache.spark.sql.types.{StructField, StructType}
+
+/**
+ * Params for [[StandardScaler]] and [[StandardScalerModel]].
+ */
+private[feature] trait StandardScalerParams extends Params with HasInputCol with HasOutputCol
+
+/**
+ * :: AlphaComponent ::
+ * Standardizes features by removing the mean and scaling to unit variance using column summary
+ * statistics on the samples in the training set.
+ */
+@AlphaComponent
+class StandardScaler extends Estimator[StandardScalerModel] with StandardScalerParams {
+
+  def setInputCol(value: String): this.type = set(inputCol, value)
+  def setOutputCol(value: String): this.type = set(outputCol, value)
+
+  override def fit(dataset: DataFrame, paramMap: ParamMap): StandardScalerModel = {
+    transformSchema(dataset.schema, paramMap, logging = true)
+    val map = this.paramMap ++ paramMap
+    val input = dataset.select(map(inputCol)).map { case Row(v: Vector) => v }
+    val scaler = new feature.StandardScaler().fit(input)
+    val model = new StandardScalerModel(this, map, scaler)
+    Params.inheritValues(map, this, model)
+    model
+  }
+
+  private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    val map = this.paramMap ++ paramMap
+    val inputType = schema(map(inputCol)).dataType
+    require(inputType.isInstanceOf[VectorUDT],
+      s"Input column ${map(inputCol)} must be a vector column")
+    require(!schema.fieldNames.contains(map(outputCol)),
+      s"Output column ${map(outputCol)} already exists.")
+    val outputFields = schema.fields :+ StructField(map(outputCol), new VectorUDT, false)
+    StructType(outputFields)
+  }
+}
+
+/**
+ * :: AlphaComponent ::
+ * Model fitted by [[StandardScaler]].
+ */
+@AlphaComponent
+class StandardScalerModel private[ml] (
+    override val parent: StandardScaler,
+    override val fittingParamMap: ParamMap,
+    scaler: feature.StandardScalerModel)
+  extends Model[StandardScalerModel] with StandardScalerParams {
+
+  def setInputCol(value: String): this.type = set(inputCol, value)
+  def setOutputCol(value: String): this.type = set(outputCol, value)
+
+  override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = {
+    transformSchema(dataset.schema, paramMap, logging = true)
+    val map = this.paramMap ++ paramMap
+    val scale = udf((v: Vector) => { scaler.transform(v) } : Vector)
+    dataset.select($"*", scale(col(map(inputCol))).as(map(outputCol)))
+  }
+
+  private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    val map = this.paramMap ++ paramMap
+    val inputType = schema(map(inputCol)).dataType
+    require(inputType.isInstanceOf[VectorUDT],
+      s"Input column ${map(inputCol)} must be a vector column")
+    require(!schema.fieldNames.contains(map(outputCol)),
+      s"Output column ${map(outputCol)} already exists.")
+    val outputFields = schema.fields :+ StructField(map(outputCol), new VectorUDT, false)
+    StructType(outputFields)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
new file mode 100644
index 0000000000000..e622a5cf9e6f3
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.UnaryTransformer
+import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.sql.types.{DataType, StringType, ArrayType}
+
+/**
+ * :: AlphaComponent ::
+ * A tokenizer that converts the input string to lowercase and then splits it by white spaces.
+ */
+@AlphaComponent
+class Tokenizer extends UnaryTransformer[String, Seq[String], Tokenizer] {
+
+  protected override def createTransformFunc(paramMap: ParamMap): String => Seq[String] = {
+    _.toLowerCase.split("\\s")
+  }
+
+  protected override def validateInputType(inputType: DataType): Unit = {
+    require(inputType == StringType, s"Input type must be string type but got $inputType.")
+  }
+
+  override protected def outputDataType: DataType = new ArrayType(StringType, false)
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/package-info.java b/mllib/src/main/scala/org/apache/spark/ml/package-info.java
new file mode 100644
index 0000000000000..00d9c802e930d
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/package-info.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Spark ML is an ALPHA component that adds a new set of machine learning APIs to let users quickly
+ * assemble and configure practical machine learning pipelines.
+ */
+@AlphaComponent
+package org.apache.spark.ml;
+
+import org.apache.spark.annotation.AlphaComponent;
diff --git a/mllib/src/main/scala/org/apache/spark/ml/package.scala b/mllib/src/main/scala/org/apache/spark/ml/package.scala
new file mode 100644
index 0000000000000..51cd48c90432a
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/package.scala
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+/**
+ * Spark ML is an ALPHA component that adds a new set of machine learning APIs to let users quickly
+ * assemble and configure practical machine learning pipelines.
+ */
+package object ml
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
new file mode 100644
index 0000000000000..5fb4379e23c2f
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -0,0 +1,329 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.param
+
+import scala.annotation.varargs
+import scala.collection.mutable
+
+import java.lang.reflect.Modifier
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.Identifiable
+
+/**
+ * :: AlphaComponent ::
+ * A param with self-contained documentation and optionally default value. Primitive-typed param
+ * should use the specialized versions, which are more friendly to Java users.
+ *
+ * @param parent parent object
+ * @param name param name
+ * @param doc documentation
+ * @tparam T param value type
+ */
+@AlphaComponent
+class Param[T] (
+    val parent: Params,
+    val name: String,
+    val doc: String,
+    val defaultValue: Option[T] = None)
+  extends Serializable {
+
+  /**
+   * Creates a param pair with the given value (for Java).
+   */
+  def w(value: T): ParamPair[T] = this -> value
+
+  /**
+   * Creates a param pair with the given value (for Scala).
+   */
+  def ->(value: T): ParamPair[T] = ParamPair(this, value)
+
+  override def toString: String = {
+    if (defaultValue.isDefined) {
+      s"$name: $doc (default: ${defaultValue.get})"
+    } else {
+      s"$name: $doc"
+    }
+  }
+}
+
+// specialize primitive-typed params because Java doesn't recognize scala.Double, scala.Int, ...
+
+/** Specialized version of [[Param[Double]]] for Java. */
+class DoubleParam(parent: Params, name: String, doc: String, defaultValue: Option[Double] = None)
+  extends Param[Double](parent, name, doc, defaultValue) {
+
+  override def w(value: Double): ParamPair[Double] = super.w(value)
+}
+
+/** Specialized version of [[Param[Int]]] for Java. */
+class IntParam(parent: Params, name: String, doc: String, defaultValue: Option[Int] = None)
+  extends Param[Int](parent, name, doc, defaultValue) {
+
+  override def w(value: Int): ParamPair[Int] = super.w(value)
+}
+
+/** Specialized version of [[Param[Float]]] for Java. */
+class FloatParam(parent: Params, name: String, doc: String, defaultValue: Option[Float] = None)
+  extends Param[Float](parent, name, doc, defaultValue) {
+
+  override def w(value: Float): ParamPair[Float] = super.w(value)
+}
+
+/** Specialized version of [[Param[Long]]] for Java. */
+class LongParam(parent: Params, name: String, doc: String, defaultValue: Option[Long] = None)
+  extends Param[Long](parent, name, doc, defaultValue) {
+
+  override def w(value: Long): ParamPair[Long] = super.w(value)
+}
+
+/** Specialized version of [[Param[Boolean]]] for Java. */
+class BooleanParam(parent: Params, name: String, doc: String, defaultValue: Option[Boolean] = None)
+  extends Param[Boolean](parent, name, doc, defaultValue) {
+
+  override def w(value: Boolean): ParamPair[Boolean] = super.w(value)
+}
+
+/**
+ * A param amd its value.
+ */
+case class ParamPair[T](param: Param[T], value: T)
+
+/**
+ * :: AlphaComponent ::
+ * Trait for components that take parameters. This also provides an internal param map to store
+ * parameter values attached to the instance.
+ */
+@AlphaComponent
+trait Params extends Identifiable with Serializable {
+
+  /** Returns all params. */
+  def params: Array[Param[_]] = {
+    val methods = this.getClass.getMethods
+    methods.filter { m =>
+        Modifier.isPublic(m.getModifiers) &&
+          classOf[Param[_]].isAssignableFrom(m.getReturnType) &&
+          m.getParameterTypes.isEmpty
+      }.sortBy(_.getName)
+      .map(m => m.invoke(this).asInstanceOf[Param[_]])
+  }
+
+  /**
+   * Validates parameter values stored internally plus the input parameter map.
+   * Raises an exception if any parameter is invalid.
+   */
+  def validate(paramMap: ParamMap): Unit = {}
+
+  /**
+   * Validates parameter values stored internally.
+   * Raise an exception if any parameter value is invalid.
+   */
+  def validate(): Unit = validate(ParamMap.empty)
+
+  /**
+   * Returns the documentation of all params.
+   */
+  def explainParams(): String = params.mkString("\n")
+
+  /** Checks whether a param is explicitly set. */
+  def isSet(param: Param[_]): Boolean = {
+    require(param.parent.eq(this))
+    paramMap.contains(param)
+  }
+
+  /** Gets a param by its name. */
+  private[ml] def getParam(paramName: String): Param[Any] = {
+    val m = this.getClass.getMethod(paramName)
+    assert(Modifier.isPublic(m.getModifiers) &&
+      classOf[Param[_]].isAssignableFrom(m.getReturnType) &&
+      m.getParameterTypes.isEmpty)
+    m.invoke(this).asInstanceOf[Param[Any]]
+  }
+
+  /**
+   * Sets a parameter in the embedded param map.
+   */
+  private[ml] def set[T](param: Param[T], value: T): this.type = {
+    require(param.parent.eq(this))
+    paramMap.put(param.asInstanceOf[Param[Any]], value)
+    this
+  }
+
+  /**
+   * Sets a parameter (by name) in the embedded param map.
+   */
+  private[ml] def set(param: String, value: Any): this.type = {
+    set(getParam(param), value)
+  }
+
+  /**
+   * Gets the value of a parameter in the embedded param map.
+   */
+  private[ml] def get[T](param: Param[T]): T = {
+    require(param.parent.eq(this))
+    paramMap(param)
+  }
+
+  /**
+   * Internal param map.
+   */
+  protected val paramMap: ParamMap = ParamMap.empty
+}
+
+private[ml] object Params {
+
+  /**
+   * Copies parameter values from the parent estimator to the child model it produced.
+   * @param paramMap the param map that holds parameters of the parent
+   * @param parent the parent estimator
+   * @param child the child model
+   */
+  def inheritValues[E <: Params, M <: E](
+      paramMap: ParamMap,
+      parent: E,
+      child: M): Unit = {
+    parent.params.foreach { param =>
+      if (paramMap.contains(param)) {
+        child.set(child.getParam(param.name), paramMap(param))
+      }
+    }
+  }
+}
+
+/**
+ * :: AlphaComponent ::
+ * A param to value map.
+ */
+@AlphaComponent
+class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) extends Serializable {
+
+  /**
+   * Creates an empty param map.
+   */
+  def this() = this(mutable.Map.empty[Param[Any], Any])
+
+  /**
+   * Puts a (param, value) pair (overwrites if the input param exists).
+   */
+  def put[T](param: Param[T], value: T): this.type = {
+    map(param.asInstanceOf[Param[Any]]) = value
+    this
+  }
+
+  /**
+   * Puts a list of param pairs (overwrites if the input params exists).
+   */
+  @varargs
+  def put(paramPairs: ParamPair[_]*): this.type = {
+    paramPairs.foreach { p =>
+      put(p.param.asInstanceOf[Param[Any]], p.value)
+    }
+    this
+  }
+
+  /**
+   * Optionally returns the value associated with a param or its default.
+   */
+  def get[T](param: Param[T]): Option[T] = {
+    map.get(param.asInstanceOf[Param[Any]])
+      .orElse(param.defaultValue)
+      .asInstanceOf[Option[T]]
+  }
+
+  /**
+   * Gets the value of the input param or its default value if it does not exist.
+   * Raises a NoSuchElementException if there is no value associated with the input param.
+   */
+  def apply[T](param: Param[T]): T = {
+    val value = get(param)
+    if (value.isDefined) {
+      value.get
+    } else {
+      throw new NoSuchElementException(s"Cannot find param ${param.name}.")
+    }
+  }
+
+  /**
+   * Checks whether a parameter is explicitly specified.
+   */
+  def contains(param: Param[_]): Boolean = {
+    map.contains(param.asInstanceOf[Param[Any]])
+  }
+
+  /**
+   * Filters this param map for the given parent.
+   */
+  def filter(parent: Params): ParamMap = {
+    val filtered = map.filterKeys(_.parent == parent)
+    new ParamMap(filtered.asInstanceOf[mutable.Map[Param[Any], Any]])
+  }
+
+  /**
+   * Make a copy of this param map.
+   */
+  def copy: ParamMap = new ParamMap(map.clone())
+
+  override def toString: String = {
+    map.map { case (param, value) =>
+      s"\t${param.parent.uid}-${param.name}: $value"
+    }.mkString("{\n", ",\n", "\n}")
+  }
+
+  /**
+   * Returns a new param map that contains parameters in this map and the given map,
+   * where the latter overwrites this if there exists conflicts.
+   */
+  def ++(other: ParamMap): ParamMap = {
+    // TODO: Provide a better method name for Java users.
+    new ParamMap(this.map ++ other.map)
+  }
+
+  /**
+   * Adds all parameters from the input param map into this param map.
+   */
+  def ++=(other: ParamMap): this.type = {
+    // TODO: Provide a better method name for Java users.
+    this.map ++= other.map
+    this
+  }
+
+  /**
+   * Converts this param map to a sequence of param pairs.
+   */
+  def toSeq: Seq[ParamPair[_]] = {
+    map.toSeq.map { case (param, value) =>
+      ParamPair(param, value)
+    }
+  }
+}
+
+object ParamMap {
+
+  /**
+   * Returns an empty param map.
+   */
+  def empty: ParamMap = new ParamMap()
+
+  /**
+   * Constructs a param map by specifying its entries.
+   */
+  @varargs
+  def apply(paramPairs: ParamPair[_]*): ParamMap = {
+    new ParamMap().put(paramPairs: _*)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/sharedParams.scala
new file mode 100644
index 0000000000000..ef141d3eb2b06
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/sharedParams.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.param
+
+private[ml] trait HasRegParam extends Params {
+  /** param for regularization parameter */
+  val regParam: DoubleParam = new DoubleParam(this, "regParam", "regularization parameter")
+  def getRegParam: Double = get(regParam)
+}
+
+private[ml] trait HasMaxIter extends Params {
+  /** param for max number of iterations */
+  val maxIter: IntParam = new IntParam(this, "maxIter", "max number of iterations")
+  def getMaxIter: Int = get(maxIter)
+}
+
+private[ml] trait HasFeaturesCol extends Params {
+  /** param for features column name */
+  val featuresCol: Param[String] =
+    new Param(this, "featuresCol", "features column name", Some("features"))
+  def getFeaturesCol: String = get(featuresCol)
+}
+
+private[ml] trait HasLabelCol extends Params {
+  /** param for label column name */
+  val labelCol: Param[String] = new Param(this, "labelCol", "label column name", Some("label"))
+  def getLabelCol: String = get(labelCol)
+}
+
+private[ml] trait HasScoreCol extends Params {
+  /** param for score column name */
+  val scoreCol: Param[String] = new Param(this, "scoreCol", "score column name", Some("score"))
+  def getScoreCol: String = get(scoreCol)
+}
+
+private[ml] trait HasPredictionCol extends Params {
+  /** param for prediction column name */
+  val predictionCol: Param[String] =
+    new Param(this, "predictionCol", "prediction column name", Some("prediction"))
+  def getPredictionCol: String = get(predictionCol)
+}
+
+private[ml] trait HasThreshold extends Params {
+  /** param for threshold in (binary) prediction */
+  val threshold: DoubleParam = new DoubleParam(this, "threshold", "threshold in prediction")
+  def getThreshold: Double = get(threshold)
+}
+
+private[ml] trait HasInputCol extends Params {
+  /** param for input column name */
+  val inputCol: Param[String] = new Param(this, "inputCol", "input column name")
+  def getInputCol: String = get(inputCol)
+}
+
+private[ml] trait HasOutputCol extends Params {
+  /** param for output column name */
+  val outputCol: Param[String] = new Param(this, "outputCol", "output column name")
+  def getOutputCol: String = get(outputCol)
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
new file mode 100644
index 0000000000000..bf5737177ceee
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -0,0 +1,1080 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.recommendation
+
+import java.{util => ju}
+
+import scala.collection.mutable
+import scala.reflect.ClassTag
+import scala.util.Sorting
+import scala.util.hashing.byteswap64
+
+import com.github.fommil.netlib.BLAS.{getInstance => blas}
+import com.github.fommil.netlib.LAPACK.{getInstance => lapack}
+import org.jblas.DoubleMatrix
+import org.netlib.util.intW
+
+import org.apache.spark.{HashPartitioner, Logging, Partitioner}
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.ml.{Estimator, Model}
+import org.apache.spark.ml.param._
+import org.apache.spark.mllib.optimization.NNLS
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.Dsl._
+import org.apache.spark.sql.types.{DoubleType, FloatType, IntegerType, StructField, StructType}
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
+import org.apache.spark.util.collection.{OpenHashMap, OpenHashSet, SortDataFormat, Sorter}
+import org.apache.spark.util.random.XORShiftRandom
+
+/**
+ * Common params for ALS.
+ */
+private[recommendation] trait ALSParams extends Params with HasMaxIter with HasRegParam
+  with HasPredictionCol {
+
+  /** Param for rank of the matrix factorization. */
+  val rank = new IntParam(this, "rank", "rank of the factorization", Some(10))
+  def getRank: Int = get(rank)
+
+  /** Param for number of user blocks. */
+  val numUserBlocks = new IntParam(this, "numUserBlocks", "number of user blocks", Some(10))
+  def getNumUserBlocks: Int = get(numUserBlocks)
+
+  /** Param for number of item blocks. */
+  val numItemBlocks =
+    new IntParam(this, "numItemBlocks", "number of item blocks", Some(10))
+  def getNumItemBlocks: Int = get(numItemBlocks)
+
+  /** Param to decide whether to use implicit preference. */
+  val implicitPrefs =
+    new BooleanParam(this, "implicitPrefs", "whether to use implicit preference", Some(false))
+  def getImplicitPrefs: Boolean = get(implicitPrefs)
+
+  /** Param for the alpha parameter in the implicit preference formulation. */
+  val alpha = new DoubleParam(this, "alpha", "alpha for implicit preference", Some(1.0))
+  def getAlpha: Double = get(alpha)
+
+  /** Param for the column name for user ids. */
+  val userCol = new Param[String](this, "userCol", "column name for user ids", Some("user"))
+  def getUserCol: String = get(userCol)
+
+  /** Param for the column name for item ids. */
+  val itemCol =
+    new Param[String](this, "itemCol", "column name for item ids", Some("item"))
+  def getItemCol: String = get(itemCol)
+
+  /** Param for the column name for ratings. */
+  val ratingCol = new Param[String](this, "ratingCol", "column name for ratings", Some("rating"))
+  def getRatingCol: String = get(ratingCol)
+
+  val nonnegative = new BooleanParam(
+    this, "nonnegative", "whether to use nonnegative constraint for least squares", Some(false))
+  val getNonnegative: Boolean = get(nonnegative)
+
+  /**
+   * Validates and transforms the input schema.
+   * @param schema input schema
+   * @param paramMap extra params
+   * @return output schema
+   */
+  protected def validateAndTransformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    val map = this.paramMap ++ paramMap
+    assert(schema(map(userCol)).dataType == IntegerType)
+    assert(schema(map(itemCol)).dataType== IntegerType)
+    val ratingType = schema(map(ratingCol)).dataType
+    assert(ratingType == FloatType || ratingType == DoubleType)
+    val predictionColName = map(predictionCol)
+    assert(!schema.fieldNames.contains(predictionColName),
+      s"Prediction column $predictionColName already exists.")
+    val newFields = schema.fields :+ StructField(map(predictionCol), FloatType, nullable = false)
+    StructType(newFields)
+  }
+}
+
+/**
+ * Model fitted by ALS.
+ */
+class ALSModel private[ml] (
+    override val parent: ALS,
+    override val fittingParamMap: ParamMap,
+    k: Int,
+    userFactors: RDD[(Int, Array[Float])],
+    itemFactors: RDD[(Int, Array[Float])])
+  extends Model[ALSModel] with ALSParams {
+
+  def setPredictionCol(value: String): this.type = set(predictionCol, value)
+
+  override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = {
+    import dataset.sqlContext.implicits._
+    val map = this.paramMap ++ paramMap
+    val users = userFactors.toDataFrame("id", "features")
+    val items = itemFactors.toDataFrame("id", "features")
+
+    // Register a UDF for DataFrame, and then
+    // create a new column named map(predictionCol) by running the predict UDF.
+    val predict = udf { (userFeatures: Seq[Float], itemFeatures: Seq[Float]) =>
+      if (userFeatures != null && itemFeatures != null) {
+        blas.sdot(k, userFeatures.toArray, 1, itemFeatures.toArray, 1)
+      } else {
+        Float.NaN
+      }
+    }
+    dataset
+      .join(users, dataset(map(userCol)) === users("id"), "left")
+      .join(items, dataset(map(itemCol)) === items("id"), "left")
+      .select(dataset("*"), predict(users("features"), items("features")).as(map(predictionCol)))
+  }
+
+  override private[ml] def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    validateAndTransformSchema(schema, paramMap)
+  }
+}
+
+
+/**
+ * Alternating Least Squares (ALS) matrix factorization.
+ *
+ * ALS attempts to estimate the ratings matrix `R` as the product of two lower-rank matrices,
+ * `X` and `Y`, i.e. `X * Yt = R`. Typically these approximations are called 'factor' matrices.
+ * The general approach is iterative. During each iteration, one of the factor matrices is held
+ * constant, while the other is solved for using least squares. The newly-solved factor matrix is
+ * then held constant while solving for the other factor matrix.
+ *
+ * This is a blocked implementation of the ALS factorization algorithm that groups the two sets
+ * of factors (referred to as "users" and "products") into blocks and reduces communication by only
+ * sending one copy of each user vector to each product block on each iteration, and only for the
+ * product blocks that need that user's feature vector. This is achieved by pre-computing some
+ * information about the ratings matrix to determine the "out-links" of each user (which blocks of
+ * products it will contribute to) and "in-link" information for each product (which of the feature
+ * vectors it receives from each user block it will depend on). This allows us to send only an
+ * array of feature vectors between each user block and product block, and have the product block
+ * find the users' ratings and update the products based on these messages.
+ *
+ * For implicit preference data, the algorithm used is based on
+ * "Collaborative Filtering for Implicit Feedback Datasets", available at
+ * [[http://dx.doi.org/10.1109/ICDM.2008.22]], adapted for the blocked approach used here.
+ *
+ * Essentially instead of finding the low-rank approximations to the rating matrix `R`,
+ * this finds the approximations for a preference matrix `P` where the elements of `P` are 1 if
+ * r > 0 and 0 if r <= 0. The ratings then act as 'confidence' values related to strength of
+ * indicated user
+ * preferences rather than explicit ratings given to items.
+ */
+class ALS extends Estimator[ALSModel] with ALSParams {
+
+  import org.apache.spark.ml.recommendation.ALS.Rating
+
+  def setRank(value: Int): this.type = set(rank, value)
+  def setNumUserBlocks(value: Int): this.type = set(numUserBlocks, value)
+  def setNumItemBlocks(value: Int): this.type = set(numItemBlocks, value)
+  def setImplicitPrefs(value: Boolean): this.type = set(implicitPrefs, value)
+  def setAlpha(value: Double): this.type = set(alpha, value)
+  def setUserCol(value: String): this.type = set(userCol, value)
+  def setItemCol(value: String): this.type = set(itemCol, value)
+  def setRatingCol(value: String): this.type = set(ratingCol, value)
+  def setPredictionCol(value: String): this.type = set(predictionCol, value)
+  def setMaxIter(value: Int): this.type = set(maxIter, value)
+  def setRegParam(value: Double): this.type = set(regParam, value)
+  def setNonnegative(value: Boolean): this.type = set(nonnegative, value)
+
+  /** Sets both numUserBlocks and numItemBlocks to the specific value. */
+  def setNumBlocks(value: Int): this.type = {
+    setNumUserBlocks(value)
+    setNumItemBlocks(value)
+    this
+  }
+
+  setMaxIter(20)
+  setRegParam(1.0)
+
+  override def fit(dataset: DataFrame, paramMap: ParamMap): ALSModel = {
+    val map = this.paramMap ++ paramMap
+    val ratings = dataset
+      .select(col(map(userCol)), col(map(itemCol)), col(map(ratingCol)).cast(FloatType))
+      .map { row =>
+        Rating(row.getInt(0), row.getInt(1), row.getFloat(2))
+      }
+    val (userFactors, itemFactors) = ALS.train(ratings, rank = map(rank),
+      numUserBlocks = map(numUserBlocks), numItemBlocks = map(numItemBlocks),
+      maxIter = map(maxIter), regParam = map(regParam), implicitPrefs = map(implicitPrefs),
+      alpha = map(alpha), nonnegative = map(nonnegative))
+    val model = new ALSModel(this, map, map(rank), userFactors, itemFactors)
+    Params.inheritValues(map, this, model)
+    model
+  }
+
+  override private[ml] def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    validateAndTransformSchema(schema, paramMap)
+  }
+}
+
+/**
+ * :: DeveloperApi ::
+ * An implementation of ALS that supports generic ID types, specialized for Int and Long. This is
+ * exposed as a developer API for users who do need other ID types. But it is not recommended
+ * because it increases the shuffle size and memory requirement during training. For simplicity,
+ * users and items must have the same type. The number of distinct users/items should be smaller
+ * than 2 billion.
+ */
+@DeveloperApi
+object ALS extends Logging {
+
+  /** Rating class for better code readability. */
+  case class Rating[@specialized(Int, Long) ID](user: ID, item: ID, rating: Float)
+
+  /** Trait for least squares solvers applied to the normal equation. */
+  private[recommendation] trait LeastSquaresNESolver extends Serializable {
+    /** Solves a least squares problem (possibly with other constraints). */
+    def solve(ne: NormalEquation, lambda: Double): Array[Float]
+  }
+
+  /** Cholesky solver for least square problems. */
+  private[recommendation] class CholeskySolver extends LeastSquaresNESolver {
+
+    private val upper = "U"
+
+    /**
+     * Solves a least squares problem with L2 regularization:
+     *
+     *   min norm(A x - b)^2^ + lambda * n * norm(x)^2^
+     *
+     * @param ne a [[NormalEquation]] instance that contains AtA, Atb, and n (number of instances)
+     * @param lambda regularization constant, which will be scaled by n
+     * @return the solution x
+     */
+    override def solve(ne: NormalEquation, lambda: Double): Array[Float] = {
+      val k = ne.k
+      // Add scaled lambda to the diagonals of AtA.
+      val scaledlambda = lambda * ne.n
+      var i = 0
+      var j = 2
+      while (i < ne.triK) {
+        ne.ata(i) += scaledlambda
+        i += j
+        j += 1
+      }
+      val info = new intW(0)
+      lapack.dppsv(upper, k, 1, ne.ata, ne.atb, k, info)
+      val code = info.`val`
+      assert(code == 0, s"lapack.dppsv returned $code.")
+      val x = new Array[Float](k)
+      i = 0
+      while (i < k) {
+        x(i) = ne.atb(i).toFloat
+        i += 1
+      }
+      ne.reset()
+      x
+    }
+  }
+
+  /** NNLS solver. */
+  private[recommendation] class NNLSSolver extends LeastSquaresNESolver {
+    private var rank: Int = -1
+    private var workspace: NNLS.Workspace = _
+    private var ata: DoubleMatrix = _
+    private var initialized: Boolean = false
+
+    private def initialize(rank: Int): Unit = {
+      if (!initialized) {
+        this.rank = rank
+        workspace = NNLS.createWorkspace(rank)
+        ata = new DoubleMatrix(rank, rank)
+        initialized = true
+      } else {
+        require(this.rank == rank)
+      }
+    }
+
+    /**
+     * Solves a nonnegative least squares problem with L2 regularizatin:
+     *
+     *   min_x_  norm(A x - b)^2^ + lambda * n * norm(x)^2^
+     *   subject to x >= 0
+     */
+    override def solve(ne: NormalEquation, lambda: Double): Array[Float] = {
+      val rank = ne.k
+      initialize(rank)
+      fillAtA(ne.ata, lambda * ne.n)
+      val x = NNLS.solve(ata, new DoubleMatrix(rank, 1, ne.atb: _*), workspace)
+      ne.reset()
+      x.map(x => x.toFloat)
+    }
+
+    /**
+     * Given a triangular matrix in the order of fillXtX above, compute the full symmetric square
+     * matrix that it represents, storing it into destMatrix.
+     */
+    private def fillAtA(triAtA: Array[Double], lambda: Double) {
+      var i = 0
+      var pos = 0
+      var a = 0.0
+      val data = ata.data
+      while (i < rank) {
+        var j = 0
+        while (j <= i) {
+          a = triAtA(pos)
+          data(i * rank + j) = a
+          data(j * rank + i) = a
+          pos += 1
+          j += 1
+        }
+        data(i * rank + i) += lambda
+        i += 1
+      }
+    }
+  }
+
+  /** Representing a normal equation (ALS' subproblem). */
+  private[recommendation] class NormalEquation(val k: Int) extends Serializable {
+
+    /** Number of entries in the upper triangular part of a k-by-k matrix. */
+    val triK = k * (k + 1) / 2
+    /** A^T^ * A */
+    val ata = new Array[Double](triK)
+    /** A^T^ * b */
+    val atb = new Array[Double](k)
+    /** Number of observations. */
+    var n = 0
+
+    private val da = new Array[Double](k)
+    private val upper = "U"
+
+    private def copyToDouble(a: Array[Float]): Unit = {
+      var i = 0
+      while (i < k) {
+        da(i) = a(i)
+        i += 1
+      }
+    }
+
+    /** Adds an observation. */
+    def add(a: Array[Float], b: Float): this.type = {
+      require(a.length == k)
+      copyToDouble(a)
+      blas.dspr(upper, k, 1.0, da, 1, ata)
+      blas.daxpy(k, b.toDouble, da, 1, atb, 1)
+      n += 1
+      this
+    }
+
+    /**
+     * Adds an observation with implicit feedback. Note that this does not increment the counter.
+     */
+    def addImplicit(a: Array[Float], b: Float, alpha: Double): this.type = {
+      require(a.length == k)
+      // Extension to the original paper to handle b < 0. confidence is a function of |b| instead
+      // so that it is never negative.
+      val confidence = 1.0 + alpha * math.abs(b)
+      copyToDouble(a)
+      blas.dspr(upper, k, confidence - 1.0, da, 1, ata)
+      // For b <= 0, the corresponding preference is 0. So the term below is only added for b > 0.
+      if (b > 0) {
+        blas.daxpy(k, confidence, da, 1, atb, 1)
+      }
+      this
+    }
+
+    /** Merges another normal equation object. */
+    def merge(other: NormalEquation): this.type = {
+      require(other.k == k)
+      blas.daxpy(ata.length, 1.0, other.ata, 1, ata, 1)
+      blas.daxpy(atb.length, 1.0, other.atb, 1, atb, 1)
+      n += other.n
+      this
+    }
+
+    /** Resets everything to zero, which should be called after each solve. */
+    def reset(): Unit = {
+      ju.Arrays.fill(ata, 0.0)
+      ju.Arrays.fill(atb, 0.0)
+      n = 0
+    }
+  }
+
+  /**
+   * Implementation of the ALS algorithm.
+   */
+  def train[ID: ClassTag]( // scalastyle:ignore
+      ratings: RDD[Rating[ID]],
+      rank: Int = 10,
+      numUserBlocks: Int = 10,
+      numItemBlocks: Int = 10,
+      maxIter: Int = 10,
+      regParam: Double = 1.0,
+      implicitPrefs: Boolean = false,
+      alpha: Double = 1.0,
+      nonnegative: Boolean = false,
+      intermediateRDDStorageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK,
+      finalRDDStorageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK,
+      seed: Long = 0L)(
+      implicit ord: Ordering[ID]): (RDD[(ID, Array[Float])], RDD[(ID, Array[Float])]) = {
+    require(intermediateRDDStorageLevel != StorageLevel.NONE,
+      "ALS is not designed to run without persisting intermediate RDDs.")
+    val sc = ratings.sparkContext
+    val userPart = new HashPartitioner(numUserBlocks)
+    val itemPart = new HashPartitioner(numItemBlocks)
+    val userLocalIndexEncoder = new LocalIndexEncoder(userPart.numPartitions)
+    val itemLocalIndexEncoder = new LocalIndexEncoder(itemPart.numPartitions)
+    val solver = if (nonnegative) new NNLSSolver else new CholeskySolver
+    val blockRatings = partitionRatings(ratings, userPart, itemPart)
+      .persist(intermediateRDDStorageLevel)
+    val (userInBlocks, userOutBlocks) =
+      makeBlocks("user", blockRatings, userPart, itemPart, intermediateRDDStorageLevel)
+    // materialize blockRatings and user blocks
+    userOutBlocks.count()
+    val swappedBlockRatings = blockRatings.map {
+      case ((userBlockId, itemBlockId), RatingBlock(userIds, itemIds, localRatings)) =>
+        ((itemBlockId, userBlockId), RatingBlock(itemIds, userIds, localRatings))
+    }
+    val (itemInBlocks, itemOutBlocks) =
+      makeBlocks("item", swappedBlockRatings, itemPart, userPart, intermediateRDDStorageLevel)
+    // materialize item blocks
+    itemOutBlocks.count()
+    val seedGen = new XORShiftRandom(seed)
+    var userFactors = initialize(userInBlocks, rank, seedGen.nextLong())
+    var itemFactors = initialize(itemInBlocks, rank, seedGen.nextLong())
+    if (implicitPrefs) {
+      for (iter <- 1 to maxIter) {
+        userFactors.setName(s"userFactors-$iter").persist(intermediateRDDStorageLevel)
+        val previousItemFactors = itemFactors
+        itemFactors = computeFactors(userFactors, userOutBlocks, itemInBlocks, rank, regParam,
+          userLocalIndexEncoder, implicitPrefs, alpha, solver)
+        previousItemFactors.unpersist()
+        if (sc.checkpointDir.isDefined && (iter % 3 == 0)) {
+          itemFactors.checkpoint()
+        }
+        itemFactors.setName(s"itemFactors-$iter").persist(intermediateRDDStorageLevel)
+        val previousUserFactors = userFactors
+        userFactors = computeFactors(itemFactors, itemOutBlocks, userInBlocks, rank, regParam,
+          itemLocalIndexEncoder, implicitPrefs, alpha, solver)
+        previousUserFactors.unpersist()
+      }
+    } else {
+      for (iter <- 0 until maxIter) {
+        itemFactors = computeFactors(userFactors, userOutBlocks, itemInBlocks, rank, regParam,
+          userLocalIndexEncoder, solver = solver)
+        userFactors = computeFactors(itemFactors, itemOutBlocks, userInBlocks, rank, regParam,
+          itemLocalIndexEncoder, solver = solver)
+      }
+    }
+    val userIdAndFactors = userInBlocks
+      .mapValues(_.srcIds)
+      .join(userFactors)
+      .values
+      .setName("userFactors")
+      .persist(finalRDDStorageLevel)
+    val itemIdAndFactors = itemInBlocks
+      .mapValues(_.srcIds)
+      .join(itemFactors)
+      .values
+      .setName("itemFactors")
+      .persist(finalRDDStorageLevel)
+    if (finalRDDStorageLevel != StorageLevel.NONE) {
+      userIdAndFactors.count()
+      itemFactors.unpersist()
+      itemIdAndFactors.count()
+      userInBlocks.unpersist()
+      userOutBlocks.unpersist()
+      itemInBlocks.unpersist()
+      itemOutBlocks.unpersist()
+      blockRatings.unpersist()
+    }
+    val userOutput = userIdAndFactors.flatMap { case (ids, factors) =>
+      ids.view.zip(factors)
+    }
+    val itemOutput = itemIdAndFactors.flatMap { case (ids, factors) =>
+      ids.view.zip(factors)
+    }
+    (userOutput, itemOutput)
+  }
+
+  /**
+   * Factor block that stores factors (Array[Float]) in an Array.
+   */
+  private type FactorBlock = Array[Array[Float]]
+
+  /**
+   * Out-link block that stores, for each dst (item/user) block, which src (user/item) factors to
+   * send. For example, outLinkBlock(0) contains the local indices (not the original src IDs) of the
+   * src factors in this block to send to dst block 0.
+   */
+  private type OutBlock = Array[Array[Int]]
+
+  /**
+   * In-link block for computing src (user/item) factors. This includes the original src IDs
+   * of the elements within this block as well as encoded dst (item/user) indices and corresponding
+   * ratings. The dst indices are in the form of (blockId, localIndex), which are not the original
+   * dst IDs. To compute src factors, we expect receiving dst factors that match the dst indices.
+   * For example, if we have an in-link record
+   *
+   * {srcId: 0, dstBlockId: 2, dstLocalIndex: 3, rating: 5.0},
+   *
+   * and assume that the dst factors are stored as dstFactors: Map[Int, Array[Array[Float]]], which
+   * is a blockId to dst factors map, the corresponding dst factor of the record is dstFactor(2)(3).
+   *
+   * We use a CSC-like (compressed sparse column) format to store the in-link information. So we can
+   * compute src factors one after another using only one normal equation instance.
+   *
+   * @param srcIds src ids (ordered)
+   * @param dstPtrs dst pointers. Elements in range [dstPtrs(i), dstPtrs(i+1)) of dst indices and
+   *                ratings are associated with srcIds(i).
+   * @param dstEncodedIndices encoded dst indices
+   * @param ratings ratings
+   *
+   * @see [[LocalIndexEncoder]]
+   */
+  private[recommendation] case class InBlock[@specialized(Int, Long) ID: ClassTag](
+      srcIds: Array[ID],
+      dstPtrs: Array[Int],
+      dstEncodedIndices: Array[Int],
+      ratings: Array[Float]) {
+    /** Size of the block. */
+    def size: Int = ratings.length
+    require(dstEncodedIndices.length == size)
+    require(dstPtrs.length == srcIds.length + 1)
+  }
+
+  /**
+   * Initializes factors randomly given the in-link blocks.
+   *
+   * @param inBlocks in-link blocks
+   * @param rank rank
+   * @return initialized factor blocks
+   */
+  private def initialize[ID](
+      inBlocks: RDD[(Int, InBlock[ID])],
+      rank: Int,
+      seed: Long): RDD[(Int, FactorBlock)] = {
+    // Choose a unit vector uniformly at random from the unit sphere, but from the
+    // "first quadrant" where all elements are nonnegative. This can be done by choosing
+    // elements distributed as Normal(0,1) and taking the absolute value, and then normalizing.
+    // This appears to create factorizations that have a slightly better reconstruction
+    // (<1%) compared picking elements uniformly at random in [0,1].
+    inBlocks.map { case (srcBlockId, inBlock) =>
+      val random = new XORShiftRandom(byteswap64(seed ^ srcBlockId))
+      val factors = Array.fill(inBlock.srcIds.length) {
+        val factor = Array.fill(rank)(random.nextGaussian().toFloat)
+        val nrm = blas.snrm2(rank, factor, 1)
+        blas.sscal(rank, 1.0f / nrm, factor, 1)
+        factor
+      }
+      (srcBlockId, factors)
+    }
+  }
+
+  /**
+   * A rating block that contains src IDs, dst IDs, and ratings, stored in primitive arrays.
+   */
+  private[recommendation] case class RatingBlock[@specialized(Int, Long) ID: ClassTag](
+      srcIds: Array[ID],
+      dstIds: Array[ID],
+      ratings: Array[Float]) {
+    /** Size of the block. */
+    def size: Int = srcIds.length
+    require(dstIds.length == srcIds.length)
+    require(ratings.length == srcIds.length)
+  }
+
+  /**
+   * Builder for [[RatingBlock]]. [[mutable.ArrayBuilder]] is used to avoid boxing/unboxing.
+   */
+  private[recommendation] class RatingBlockBuilder[@specialized(Int, Long) ID: ClassTag]
+    extends Serializable {
+
+    private val srcIds = mutable.ArrayBuilder.make[ID]
+    private val dstIds = mutable.ArrayBuilder.make[ID]
+    private val ratings = mutable.ArrayBuilder.make[Float]
+    var size = 0
+
+    /** Adds a rating. */
+    def add(r: Rating[ID]): this.type = {
+      size += 1
+      srcIds += r.user
+      dstIds += r.item
+      ratings += r.rating
+      this
+    }
+
+    /** Merges another [[RatingBlockBuilder]]. */
+    def merge(other: RatingBlock[ID]): this.type = {
+      size += other.srcIds.length
+      srcIds ++= other.srcIds
+      dstIds ++= other.dstIds
+      ratings ++= other.ratings
+      this
+    }
+
+    /** Builds a [[RatingBlock]]. */
+    def build(): RatingBlock[ID] = {
+      RatingBlock[ID](srcIds.result(), dstIds.result(), ratings.result())
+    }
+  }
+
+  /**
+   * Partitions raw ratings into blocks.
+   *
+   * @param ratings raw ratings
+   * @param srcPart partitioner for src IDs
+   * @param dstPart partitioner for dst IDs
+   *
+   * @return an RDD of rating blocks in the form of ((srcBlockId, dstBlockId), ratingBlock)
+   */
+  private def partitionRatings[ID: ClassTag](
+      ratings: RDD[Rating[ID]],
+      srcPart: Partitioner,
+      dstPart: Partitioner): RDD[((Int, Int), RatingBlock[ID])] = {
+
+     /* The implementation produces the same result as the following but generates less objects.
+
+     ratings.map { r =>
+       ((srcPart.getPartition(r.user), dstPart.getPartition(r.item)), r)
+     }.aggregateByKey(new RatingBlockBuilder)(
+         seqOp = (b, r) => b.add(r),
+         combOp = (b0, b1) => b0.merge(b1.build()))
+       .mapValues(_.build())
+     */
+
+    val numPartitions = srcPart.numPartitions * dstPart.numPartitions
+    ratings.mapPartitions { iter =>
+      val builders = Array.fill(numPartitions)(new RatingBlockBuilder[ID])
+      iter.flatMap { r =>
+        val srcBlockId = srcPart.getPartition(r.user)
+        val dstBlockId = dstPart.getPartition(r.item)
+        val idx = srcBlockId + srcPart.numPartitions * dstBlockId
+        val builder = builders(idx)
+        builder.add(r)
+        if (builder.size >= 2048) { // 2048 * (3 * 4) = 24k
+          builders(idx) = new RatingBlockBuilder
+          Iterator.single(((srcBlockId, dstBlockId), builder.build()))
+        } else {
+          Iterator.empty
+        }
+      } ++ {
+        builders.view.zipWithIndex.filter(_._1.size > 0).map { case (block, idx) =>
+          val srcBlockId = idx % srcPart.numPartitions
+          val dstBlockId = idx / srcPart.numPartitions
+          ((srcBlockId, dstBlockId), block.build())
+        }
+      }
+    }.groupByKey().mapValues { blocks =>
+      val builder = new RatingBlockBuilder[ID]
+      blocks.foreach(builder.merge)
+      builder.build()
+    }.setName("ratingBlocks")
+  }
+
+  /**
+   * Builder for uncompressed in-blocks of (srcId, dstEncodedIndex, rating) tuples.
+   * @param encoder encoder for dst indices
+   */
+  private[recommendation] class UncompressedInBlockBuilder[@specialized(Int, Long) ID: ClassTag](
+      encoder: LocalIndexEncoder)(
+      implicit ord: Ordering[ID]) {
+
+    private val srcIds = mutable.ArrayBuilder.make[ID]
+    private val dstEncodedIndices = mutable.ArrayBuilder.make[Int]
+    private val ratings = mutable.ArrayBuilder.make[Float]
+
+    /**
+     * Adds a dst block of (srcId, dstLocalIndex, rating) tuples.
+     *
+     * @param dstBlockId dst block ID
+     * @param srcIds original src IDs
+     * @param dstLocalIndices dst local indices
+     * @param ratings ratings
+     */
+    def add(
+        dstBlockId: Int,
+        srcIds: Array[ID],
+        dstLocalIndices: Array[Int],
+        ratings: Array[Float]): this.type = {
+      val sz = srcIds.length
+      require(dstLocalIndices.length == sz)
+      require(ratings.length == sz)
+      this.srcIds ++= srcIds
+      this.ratings ++= ratings
+      var j = 0
+      while (j < sz) {
+        this.dstEncodedIndices += encoder.encode(dstBlockId, dstLocalIndices(j))
+        j += 1
+      }
+      this
+    }
+
+    /** Builds a [[UncompressedInBlock]]. */
+    def build(): UncompressedInBlock[ID] = {
+      new UncompressedInBlock(srcIds.result(), dstEncodedIndices.result(), ratings.result())
+    }
+  }
+
+  /**
+   * A block of (srcId, dstEncodedIndex, rating) tuples stored in primitive arrays.
+   */
+  private[recommendation] class UncompressedInBlock[@specialized(Int, Long) ID: ClassTag](
+      val srcIds: Array[ID],
+      val dstEncodedIndices: Array[Int],
+      val ratings: Array[Float])(
+      implicit ord: Ordering[ID]) {
+
+    /** Size the of block. */
+    def length: Int = srcIds.length
+
+    /**
+     * Compresses the block into an [[InBlock]]. The algorithm is the same as converting a
+     * sparse matrix from coordinate list (COO) format into compressed sparse column (CSC) format.
+     * Sorting is done using Spark's built-in Timsort to avoid generating too many objects.
+     */
+    def compress(): InBlock[ID] = {
+      val sz = length
+      assert(sz > 0, "Empty in-link block should not exist.")
+      sort()
+      val uniqueSrcIdsBuilder = mutable.ArrayBuilder.make[ID]
+      val dstCountsBuilder = mutable.ArrayBuilder.make[Int]
+      var preSrcId = srcIds(0)
+      uniqueSrcIdsBuilder += preSrcId
+      var curCount = 1
+      var i = 1
+      var j = 0
+      while (i < sz) {
+        val srcId = srcIds(i)
+        if (srcId != preSrcId) {
+          uniqueSrcIdsBuilder += srcId
+          dstCountsBuilder += curCount
+          preSrcId = srcId
+          j += 1
+          curCount = 0
+        }
+        curCount += 1
+        i += 1
+      }
+      dstCountsBuilder += curCount
+      val uniqueSrcIds = uniqueSrcIdsBuilder.result()
+      val numUniqueSrdIds = uniqueSrcIds.length
+      val dstCounts = dstCountsBuilder.result()
+      val dstPtrs = new Array[Int](numUniqueSrdIds + 1)
+      var sum = 0
+      i = 0
+      while (i < numUniqueSrdIds) {
+        sum += dstCounts(i)
+        i += 1
+        dstPtrs(i) = sum
+      }
+      InBlock(uniqueSrcIds, dstPtrs, dstEncodedIndices, ratings)
+    }
+
+    private def sort(): Unit = {
+      val sz = length
+      // Since there might be interleaved log messages, we insert a unique id for easy pairing.
+      val sortId = Utils.random.nextInt()
+      logDebug(s"Start sorting an uncompressed in-block of size $sz. (sortId = $sortId)")
+      val start = System.nanoTime()
+      val sorter = new Sorter(new UncompressedInBlockSort[ID])
+      sorter.sort(this, 0, length, Ordering[KeyWrapper[ID]])
+      val duration = (System.nanoTime() - start) / 1e9
+      logDebug(s"Sorting took $duration seconds. (sortId = $sortId)")
+    }
+  }
+
+  /**
+   * A wrapper that holds a primitive key.
+   *
+   * @see [[UncompressedInBlockSort]]
+   */
+  private class KeyWrapper[@specialized(Int, Long) ID: ClassTag](
+      implicit ord: Ordering[ID]) extends Ordered[KeyWrapper[ID]] {
+
+    var key: ID = _
+
+    override def compare(that: KeyWrapper[ID]): Int = {
+      ord.compare(key, that.key)
+    }
+
+    def setKey(key: ID): this.type = {
+      this.key = key
+      this
+    }
+  }
+
+  /**
+   * [[SortDataFormat]] of [[UncompressedInBlock]] used by [[Sorter]].
+   */
+  private class UncompressedInBlockSort[@specialized(Int, Long) ID: ClassTag](
+      implicit ord: Ordering[ID])
+    extends SortDataFormat[KeyWrapper[ID], UncompressedInBlock[ID]] {
+
+    override def newKey(): KeyWrapper[ID] = new KeyWrapper()
+
+    override def getKey(
+        data: UncompressedInBlock[ID],
+        pos: Int,
+        reuse: KeyWrapper[ID]): KeyWrapper[ID] = {
+      if (reuse == null) {
+        new KeyWrapper().setKey(data.srcIds(pos))
+      } else {
+        reuse.setKey(data.srcIds(pos))
+      }
+    }
+
+    override def getKey(
+        data: UncompressedInBlock[ID],
+        pos: Int): KeyWrapper[ID] = {
+      getKey(data, pos, null)
+    }
+
+    private def swapElements[@specialized(Int, Float) T](
+        data: Array[T],
+        pos0: Int,
+        pos1: Int): Unit = {
+      val tmp = data(pos0)
+      data(pos0) = data(pos1)
+      data(pos1) = tmp
+    }
+
+    override def swap(data: UncompressedInBlock[ID], pos0: Int, pos1: Int): Unit = {
+      swapElements(data.srcIds, pos0, pos1)
+      swapElements(data.dstEncodedIndices, pos0, pos1)
+      swapElements(data.ratings, pos0, pos1)
+    }
+
+    override def copyRange(
+        src: UncompressedInBlock[ID],
+        srcPos: Int,
+        dst: UncompressedInBlock[ID],
+        dstPos: Int,
+        length: Int): Unit = {
+      System.arraycopy(src.srcIds, srcPos, dst.srcIds, dstPos, length)
+      System.arraycopy(src.dstEncodedIndices, srcPos, dst.dstEncodedIndices, dstPos, length)
+      System.arraycopy(src.ratings, srcPos, dst.ratings, dstPos, length)
+    }
+
+    override def allocate(length: Int): UncompressedInBlock[ID] = {
+      new UncompressedInBlock(
+        new Array[ID](length), new Array[Int](length), new Array[Float](length))
+    }
+
+    override def copyElement(
+        src: UncompressedInBlock[ID],
+        srcPos: Int,
+        dst: UncompressedInBlock[ID],
+        dstPos: Int): Unit = {
+      dst.srcIds(dstPos) = src.srcIds(srcPos)
+      dst.dstEncodedIndices(dstPos) = src.dstEncodedIndices(srcPos)
+      dst.ratings(dstPos) = src.ratings(srcPos)
+    }
+  }
+
+  /**
+   * Creates in-blocks and out-blocks from rating blocks.
+   * @param prefix prefix for in/out-block names
+   * @param ratingBlocks rating blocks
+   * @param srcPart partitioner for src IDs
+   * @param dstPart partitioner for dst IDs
+   * @return (in-blocks, out-blocks)
+   */
+  private def makeBlocks[ID: ClassTag](
+      prefix: String,
+      ratingBlocks: RDD[((Int, Int), RatingBlock[ID])],
+      srcPart: Partitioner,
+      dstPart: Partitioner,
+      storageLevel: StorageLevel)(
+      implicit srcOrd: Ordering[ID]): (RDD[(Int, InBlock[ID])], RDD[(Int, OutBlock)]) = {
+    val inBlocks = ratingBlocks.map {
+      case ((srcBlockId, dstBlockId), RatingBlock(srcIds, dstIds, ratings)) =>
+        // The implementation is a faster version of
+        // val dstIdToLocalIndex = dstIds.toSet.toSeq.sorted.zipWithIndex.toMap
+        val start = System.nanoTime()
+        val dstIdSet = new OpenHashSet[ID](1 << 20)
+        dstIds.foreach(dstIdSet.add)
+        val sortedDstIds = new Array[ID](dstIdSet.size)
+        var i = 0
+        var pos = dstIdSet.nextPos(0)
+        while (pos != -1) {
+          sortedDstIds(i) = dstIdSet.getValue(pos)
+          pos = dstIdSet.nextPos(pos + 1)
+          i += 1
+        }
+        assert(i == dstIdSet.size)
+        Sorting.quickSort(sortedDstIds)
+        val dstIdToLocalIndex = new OpenHashMap[ID, Int](sortedDstIds.length)
+        i = 0
+        while (i < sortedDstIds.length) {
+          dstIdToLocalIndex.update(sortedDstIds(i), i)
+          i += 1
+        }
+        logDebug(
+          "Converting to local indices took " + (System.nanoTime() - start) / 1e9 + " seconds.")
+        val dstLocalIndices = dstIds.map(dstIdToLocalIndex.apply)
+        (srcBlockId, (dstBlockId, srcIds, dstLocalIndices, ratings))
+    }.groupByKey(new HashPartitioner(srcPart.numPartitions))
+        .mapValues { iter =>
+      val builder =
+        new UncompressedInBlockBuilder[ID](new LocalIndexEncoder(dstPart.numPartitions))
+      iter.foreach { case (dstBlockId, srcIds, dstLocalIndices, ratings) =>
+        builder.add(dstBlockId, srcIds, dstLocalIndices, ratings)
+      }
+      builder.build().compress()
+    }.setName(prefix + "InBlocks")
+      .persist(storageLevel)
+    val outBlocks = inBlocks.mapValues { case InBlock(srcIds, dstPtrs, dstEncodedIndices, _) =>
+      val encoder = new LocalIndexEncoder(dstPart.numPartitions)
+      val activeIds = Array.fill(dstPart.numPartitions)(mutable.ArrayBuilder.make[Int])
+      var i = 0
+      val seen = new Array[Boolean](dstPart.numPartitions)
+      while (i < srcIds.length) {
+        var j = dstPtrs(i)
+        ju.Arrays.fill(seen, false)
+        while (j < dstPtrs(i + 1)) {
+          val dstBlockId = encoder.blockId(dstEncodedIndices(j))
+          if (!seen(dstBlockId)) {
+            activeIds(dstBlockId) += i // add the local index in this out-block
+            seen(dstBlockId) = true
+          }
+          j += 1
+        }
+        i += 1
+      }
+      activeIds.map { x =>
+        x.result()
+      }
+    }.setName(prefix + "OutBlocks")
+      .persist(storageLevel)
+    (inBlocks, outBlocks)
+  }
+
+  /**
+   * Compute dst factors by constructing and solving least square problems.
+   *
+   * @param srcFactorBlocks src factors
+   * @param srcOutBlocks src out-blocks
+   * @param dstInBlocks dst in-blocks
+   * @param rank rank
+   * @param regParam regularization constant
+   * @param srcEncoder encoder for src local indices
+   * @param implicitPrefs whether to use implicit preference
+   * @param alpha the alpha constant in the implicit preference formulation
+   * @param solver solver for least squares problems
+   *
+   * @return dst factors
+   */
+  private def computeFactors[ID](
+      srcFactorBlocks: RDD[(Int, FactorBlock)],
+      srcOutBlocks: RDD[(Int, OutBlock)],
+      dstInBlocks: RDD[(Int, InBlock[ID])],
+      rank: Int,
+      regParam: Double,
+      srcEncoder: LocalIndexEncoder,
+      implicitPrefs: Boolean = false,
+      alpha: Double = 1.0,
+      solver: LeastSquaresNESolver): RDD[(Int, FactorBlock)] = {
+    val numSrcBlocks = srcFactorBlocks.partitions.length
+    val YtY = if (implicitPrefs) Some(computeYtY(srcFactorBlocks, rank)) else None
+    val srcOut = srcOutBlocks.join(srcFactorBlocks).flatMap {
+      case (srcBlockId, (srcOutBlock, srcFactors)) =>
+        srcOutBlock.view.zipWithIndex.map { case (activeIndices, dstBlockId) =>
+          (dstBlockId, (srcBlockId, activeIndices.map(idx => srcFactors(idx))))
+        }
+    }
+    val merged = srcOut.groupByKey(new HashPartitioner(dstInBlocks.partitions.length))
+    dstInBlocks.join(merged).mapValues {
+      case (InBlock(dstIds, srcPtrs, srcEncodedIndices, ratings), srcFactors) =>
+        val sortedSrcFactors = new Array[FactorBlock](numSrcBlocks)
+        srcFactors.foreach { case (srcBlockId, factors) =>
+          sortedSrcFactors(srcBlockId) = factors
+        }
+        val dstFactors = new Array[Array[Float]](dstIds.length)
+        var j = 0
+        val ls = new NormalEquation(rank)
+        while (j < dstIds.length) {
+          ls.reset()
+          if (implicitPrefs) {
+            ls.merge(YtY.get)
+          }
+          var i = srcPtrs(j)
+          while (i < srcPtrs(j + 1)) {
+            val encoded = srcEncodedIndices(i)
+            val blockId = srcEncoder.blockId(encoded)
+            val localIndex = srcEncoder.localIndex(encoded)
+            val srcFactor = sortedSrcFactors(blockId)(localIndex)
+            val rating = ratings(i)
+            if (implicitPrefs) {
+              ls.addImplicit(srcFactor, rating, alpha)
+            } else {
+              ls.add(srcFactor, rating)
+            }
+            i += 1
+          }
+          dstFactors(j) = solver.solve(ls, regParam)
+          j += 1
+        }
+        dstFactors
+    }
+  }
+
+  /**
+   * Computes the Gramian matrix of user or item factors, which is only used in implicit preference.
+   * Caching of the input factors is handled in [[ALS#train]].
+   */
+  private def computeYtY(factorBlocks: RDD[(Int, FactorBlock)], rank: Int): NormalEquation = {
+    factorBlocks.values.aggregate(new NormalEquation(rank))(
+      seqOp = (ne, factors) => {
+        factors.foreach(ne.add(_, 0.0f))
+        ne
+      },
+      combOp = (ne1, ne2) => ne1.merge(ne2))
+  }
+
+  /**
+   * Encoder for storing (blockId, localIndex) into a single integer.
+   *
+   * We use the leading bits (including the sign bit) to store the block id and the rest to store
+   * the local index. This is based on the assumption that users/items are approximately evenly
+   * partitioned. With this assumption, we should be able to encode two billion distinct values.
+   *
+   * @param numBlocks number of blocks
+   */
+  private[recommendation] class LocalIndexEncoder(numBlocks: Int) extends Serializable {
+
+    require(numBlocks > 0, s"numBlocks must be positive but found $numBlocks.")
+
+    private[this] final val numLocalIndexBits =
+      math.min(java.lang.Integer.numberOfLeadingZeros(numBlocks - 1), 31)
+    private[this] final val localIndexMask = (1 << numLocalIndexBits) - 1
+
+    /** Encodes a (blockId, localIndex) into a single integer. */
+    def encode(blockId: Int, localIndex: Int): Int = {
+      require(blockId < numBlocks)
+      require((localIndex & ~localIndexMask) == 0)
+      (blockId << numLocalIndexBits) | localIndex
+    }
+
+    /** Gets the block id from an encoded index. */
+    @inline
+    def blockId(encoded: Int): Int = {
+      encoded >>> numLocalIndexBits
+    }
+
+    /** Gets the local index from an encoded index. */
+    @inline
+    def localIndex(encoded: Int): Int = {
+      encoded & localIndexMask
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
new file mode 100644
index 0000000000000..5d51c51346665
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.tuning
+
+import com.github.fommil.netlib.F2jBLAS
+
+import org.apache.spark.Logging
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml._
+import org.apache.spark.ml.param.{IntParam, Param, ParamMap, Params}
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Params for [[CrossValidator]] and [[CrossValidatorModel]].
+ */
+private[ml] trait CrossValidatorParams extends Params {
+  /** param for the estimator to be cross-validated */
+  val estimator: Param[Estimator[_]] = new Param(this, "estimator", "estimator for selection")
+  def getEstimator: Estimator[_] = get(estimator)
+
+  /** param for estimator param maps */
+  val estimatorParamMaps: Param[Array[ParamMap]] =
+    new Param(this, "estimatorParamMaps", "param maps for the estimator")
+  def getEstimatorParamMaps: Array[ParamMap] = get(estimatorParamMaps)
+
+  /** param for the evaluator for selection */
+  val evaluator: Param[Evaluator] = new Param(this, "evaluator", "evaluator for selection")
+  def getEvaluator: Evaluator = get(evaluator)
+
+  /** param for number of folds for cross validation */
+  val numFolds: IntParam =
+    new IntParam(this, "numFolds", "number of folds for cross validation", Some(3))
+  def getNumFolds: Int = get(numFolds)
+}
+
+/**
+ * :: AlphaComponent ::
+ * K-fold cross validation.
+ */
+@AlphaComponent
+class CrossValidator extends Estimator[CrossValidatorModel] with CrossValidatorParams with Logging {
+
+  private val f2jBLAS = new F2jBLAS
+
+  def setEstimator(value: Estimator[_]): this.type = set(estimator, value)
+  def setEstimatorParamMaps(value: Array[ParamMap]): this.type = set(estimatorParamMaps, value)
+  def setEvaluator(value: Evaluator): this.type = set(evaluator, value)
+  def setNumFolds(value: Int): this.type = set(numFolds, value)
+
+  override def fit(dataset: DataFrame, paramMap: ParamMap): CrossValidatorModel = {
+    val map = this.paramMap ++ paramMap
+    val schema = dataset.schema
+    transformSchema(dataset.schema, paramMap, logging = true)
+    val sqlCtx = dataset.sqlContext
+    val est = map(estimator)
+    val eval = map(evaluator)
+    val epm = map(estimatorParamMaps)
+    val numModels = epm.size
+    val metrics = new Array[Double](epm.size)
+    val splits = MLUtils.kFold(dataset.rdd, map(numFolds), 0)
+    splits.zipWithIndex.foreach { case ((training, validation), splitIndex) =>
+      val trainingDataset = sqlCtx.applySchema(training, schema).cache()
+      val validationDataset = sqlCtx.applySchema(validation, schema).cache()
+      // multi-model training
+      logDebug(s"Train split $splitIndex with multiple sets of parameters.")
+      val models = est.fit(trainingDataset, epm).asInstanceOf[Seq[Model[_]]]
+      var i = 0
+      while (i < numModels) {
+        val metric = eval.evaluate(models(i).transform(validationDataset, epm(i)), map)
+        logDebug(s"Got metric $metric for model trained with ${epm(i)}.")
+        metrics(i) += metric
+        i += 1
+      }
+    }
+    f2jBLAS.dscal(numModels, 1.0 / map(numFolds), metrics, 1)
+    logInfo(s"Average cross-validation metrics: ${metrics.toSeq}")
+    val (bestMetric, bestIndex) = metrics.zipWithIndex.maxBy(_._1)
+    logInfo(s"Best set of parameters:\n${epm(bestIndex)}")
+    logInfo(s"Best cross-validation metric: $bestMetric.")
+    val bestModel = est.fit(dataset, epm(bestIndex)).asInstanceOf[Model[_]]
+    val cvModel = new CrossValidatorModel(this, map, bestModel)
+    Params.inheritValues(map, this, cvModel)
+    cvModel
+  }
+
+  private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    val map = this.paramMap ++ paramMap
+    map(estimator).transformSchema(schema, paramMap)
+  }
+}
+
+/**
+ * :: AlphaComponent ::
+ * Model from k-fold cross validation.
+ */
+@AlphaComponent
+class CrossValidatorModel private[ml] (
+    override val parent: CrossValidator,
+    override val fittingParamMap: ParamMap,
+    val bestModel: Model[_])
+  extends Model[CrossValidatorModel] with CrossValidatorParams {
+
+  override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = {
+    bestModel.transform(dataset, paramMap)
+  }
+
+  private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = {
+    bestModel.transformSchema(schema, paramMap)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
new file mode 100644
index 0000000000000..dafe73d82c00a
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.tuning
+
+import scala.annotation.varargs
+import scala.collection.mutable
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.param._
+
+/**
+ * :: AlphaComponent ::
+ * Builder for a param grid used in grid search-based model selection.
+ */
+@AlphaComponent
+class ParamGridBuilder {
+
+  private val paramGrid = mutable.Map.empty[Param[_], Iterable[_]]
+
+  /**
+   * Sets the given parameters in this grid to fixed values.
+   */
+  def baseOn(paramMap: ParamMap): this.type = {
+    baseOn(paramMap.toSeq: _*)
+    this
+  }
+
+  /**
+   * Sets the given parameters in this grid to fixed values.
+   */
+  @varargs
+  def baseOn(paramPairs: ParamPair[_]*): this.type = {
+    paramPairs.foreach { p =>
+      addGrid(p.param.asInstanceOf[Param[Any]], Seq(p.value))
+    }
+    this
+  }
+
+  /**
+   * Adds a param with multiple values (overwrites if the input param exists).
+   */
+  def addGrid[T](param: Param[T], values: Iterable[T]): this.type = {
+    paramGrid.put(param, values)
+    this
+  }
+
+  // specialized versions of addGrid for Java.
+
+  /**
+   * Adds a double param with multiple values.
+   */
+  def addGrid(param: DoubleParam, values: Array[Double]): this.type = {
+    addGrid[Double](param, values)
+  }
+
+  /**
+   * Adds a int param with multiple values.
+   */
+  def addGrid(param: IntParam, values: Array[Int]): this.type = {
+    addGrid[Int](param, values)
+  }
+
+  /**
+   * Adds a float param with multiple values.
+   */
+  def addGrid(param: FloatParam, values: Array[Float]): this.type = {
+    addGrid[Float](param, values)
+  }
+
+  /**
+   * Adds a long param with multiple values.
+   */
+  def addGrid(param: LongParam, values: Array[Long]): this.type = {
+    addGrid[Long](param, values)
+  }
+
+  /**
+   * Adds a boolean param with true and false.
+   */
+  def addGrid(param: BooleanParam): this.type = {
+    addGrid[Boolean](param, Array(true, false))
+  }
+
+  /**
+   * Builds and returns all combinations of parameters specified by the param grid.
+   */
+  def build(): Array[ParamMap] = {
+    var paramMaps = Array(new ParamMap)
+    paramGrid.foreach { case (param, values) =>
+      val newParamMaps = values.flatMap { v =>
+        paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v))
+      }
+      paramMaps = newParamMaps.toArray
+    }
+    paramMaps
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index b478c21537c2a..cbd87ea8aeb37 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -18,9 +18,11 @@
 package org.apache.spark.mllib.api.python
 
 import java.io.OutputStream
-import java.util.{ArrayList => JArrayList}
+import java.nio.{ByteBuffer, ByteOrder}
+import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
 import scala.language.existentials
 import scala.reflect.ClassTag
 
@@ -28,33 +30,33 @@ import net.razorvine.pickle._
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
-import org.apache.spark.api.python.{PythonRDD, SerDeUtil}
+import org.apache.spark.api.python.SerDeUtil
 import org.apache.spark.mllib.classification._
 import org.apache.spark.mllib.clustering._
-import org.apache.spark.mllib.feature.Word2Vec
-import org.apache.spark.mllib.feature.Word2VecModel
-import org.apache.spark.mllib.optimization._
+import org.apache.spark.mllib.feature._
 import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.random.{RandomRDDs => RG}
 import org.apache.spark.mllib.recommendation._
 import org.apache.spark.mllib.regression._
-import org.apache.spark.mllib.tree.configuration.{Algo, Strategy}
-import org.apache.spark.mllib.tree.DecisionTree
-import org.apache.spark.mllib.tree.impurity._
-import org.apache.spark.mllib.tree.model.DecisionTreeModel
 import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, Statistics}
 import org.apache.spark.mllib.stat.correlation.CorrelationNames
+import org.apache.spark.mllib.stat.distribution.MultivariateGaussian
+import org.apache.spark.mllib.stat.test.ChiSqTestResult
+import org.apache.spark.mllib.tree.{GradientBoostedTrees, RandomForest, DecisionTree}
+import org.apache.spark.mllib.tree.configuration.{BoostingStrategy, Algo, Strategy}
+import org.apache.spark.mllib.tree.impurity._
+import org.apache.spark.mllib.tree.loss.Losses
+import org.apache.spark.mllib.tree.model.{GradientBoostedTreesModel, RandomForestModel, DecisionTreeModel}
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
 /**
- * :: DeveloperApi ::
- * The Java stubs necessary for the Python mllib bindings.
+ * The Java stubs necessary for the Python mllib bindings. It is called by Py4J on the Python side.
  */
-@DeveloperApi
-class PythonMLLibAPI extends Serializable {
+private[python] class PythonMLLibAPI extends Serializable {
 
 
   /**
@@ -73,15 +75,29 @@ class PythonMLLibAPI extends Serializable {
   private def trainRegressionModel(
       learner: GeneralizedLinearAlgorithm[_ <: GeneralizedLinearModel],
       data: JavaRDD[LabeledPoint],
-      initialWeightsBA: Array[Byte]): java.util.LinkedList[java.lang.Object] = {
-    val initialWeights = SerDe.loads(initialWeightsBA).asInstanceOf[Vector]
-    // Disable the uncached input warning because 'data' is a deliberately uncached MappedRDD.
-    learner.disableUncachedWarning()
-    val model = learner.run(data.rdd, initialWeights)
-    val ret = new java.util.LinkedList[java.lang.Object]()
-    ret.add(SerDe.dumps(model.weights))
-    ret.add(model.intercept: java.lang.Double)
-    ret
+      initialWeights: Vector): JList[Object] = {
+    try {
+      val model = learner.run(data.rdd.persist(StorageLevel.MEMORY_AND_DISK), initialWeights)
+      List(model.weights, model.intercept).map(_.asInstanceOf[Object]).asJava
+    } finally {
+      data.rdd.unpersist(blocking = false)
+    }
+  }
+
+  /**
+   * Return the Updater from string
+   */
+  def getUpdaterFromString(regType: String): Updater = {
+    if (regType == "l2") {
+      new SquaredL2Updater
+    } else if (regType == "l1") {
+      new L1Updater
+    } else if (regType == null || regType == "none") {
+      new SimpleUpdater
+    } else {
+      throw new IllegalArgumentException("Invalid value for 'regType' parameter."
+        + " Can only be initialized using the following string values: ['l1', 'l2', None].")
+    }
   }
 
   /**
@@ -92,10 +108,10 @@ class PythonMLLibAPI extends Serializable {
       numIterations: Int,
       stepSize: Double,
       miniBatchFraction: Double,
-      initialWeightsBA: Array[Byte], 
+      initialWeights: Vector,
       regParam: Double,
       regType: String,
-      intercept: Boolean): java.util.List[java.lang.Object] = {
+      intercept: Boolean): JList[Object] = {
     val lrAlg = new LinearRegressionWithSGD()
     lrAlg.setIntercept(intercept)
     lrAlg.optimizer
@@ -103,18 +119,11 @@ class PythonMLLibAPI extends Serializable {
       .setRegParam(regParam)
       .setStepSize(stepSize)
       .setMiniBatchFraction(miniBatchFraction)
-    if (regType == "l2") {
-      lrAlg.optimizer.setUpdater(new SquaredL2Updater)
-    } else if (regType == "l1") {
-      lrAlg.optimizer.setUpdater(new L1Updater)
-    } else if (regType != "none") {
-      throw new java.lang.IllegalArgumentException("Invalid value for 'regType' parameter."
-        + " Can only be initialized using the following string values: [l1, l2, none].")
-    }
+    lrAlg.optimizer.setUpdater(getUpdaterFromString(regType))
     trainRegressionModel(
       lrAlg,
       data,
-      initialWeightsBA)
+      initialWeights)
   }
 
   /**
@@ -126,7 +135,7 @@ class PythonMLLibAPI extends Serializable {
       stepSize: Double,
       regParam: Double,
       miniBatchFraction: Double,
-      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
+      initialWeights: Vector): JList[Object] = {
     val lassoAlg = new LassoWithSGD()
     lassoAlg.optimizer
       .setNumIterations(numIterations)
@@ -136,7 +145,7 @@ class PythonMLLibAPI extends Serializable {
     trainRegressionModel(
       lassoAlg,
       data,
-      initialWeightsBA)
+      initialWeights)
   }
 
   /**
@@ -148,7 +157,7 @@ class PythonMLLibAPI extends Serializable {
       stepSize: Double,
       regParam: Double,
       miniBatchFraction: Double,
-      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
+      initialWeights: Vector): JList[Object] = {
     val ridgeAlg = new RidgeRegressionWithSGD()
     ridgeAlg.optimizer
       .setNumIterations(numIterations)
@@ -158,7 +167,7 @@ class PythonMLLibAPI extends Serializable {
     trainRegressionModel(
       ridgeAlg,
       data,
-      initialWeightsBA)
+      initialWeights)
   }
 
   /**
@@ -170,9 +179,9 @@ class PythonMLLibAPI extends Serializable {
       stepSize: Double,
       regParam: Double,
       miniBatchFraction: Double,
-      initialWeightsBA: Array[Byte],
+      initialWeights: Vector,
       regType: String,
-      intercept: Boolean): java.util.List[java.lang.Object] = {
+      intercept: Boolean): JList[Object] = {
     val SVMAlg = new SVMWithSGD()
     SVMAlg.setIntercept(intercept)
     SVMAlg.optimizer
@@ -180,18 +189,11 @@ class PythonMLLibAPI extends Serializable {
       .setRegParam(regParam)
       .setStepSize(stepSize)
       .setMiniBatchFraction(miniBatchFraction)
-    if (regType == "l2") {
-      SVMAlg.optimizer.setUpdater(new SquaredL2Updater)
-    } else if (regType == "l1") {
-      SVMAlg.optimizer.setUpdater(new L1Updater)
-    } else if (regType != "none") {
-      throw new java.lang.IllegalArgumentException("Invalid value for 'regType' parameter."
-        + " Can only be initialized using the following string values: [l1, l2, none].")
-    }
+    SVMAlg.optimizer.setUpdater(getUpdaterFromString(regType))
     trainRegressionModel(
       SVMAlg,
       data,
-      initialWeightsBA)
+      initialWeights)
   }
 
   /**
@@ -202,10 +204,10 @@ class PythonMLLibAPI extends Serializable {
       numIterations: Int,
       stepSize: Double,
       miniBatchFraction: Double,
-      initialWeightsBA: Array[Byte],
+      initialWeights: Vector,
       regParam: Double,
       regType: String,
-      intercept: Boolean): java.util.List[java.lang.Object] = {
+      intercept: Boolean): JList[Object] = {
     val LogRegAlg = new LogisticRegressionWithSGD()
     LogRegAlg.setIntercept(intercept)
     LogRegAlg.optimizer
@@ -213,18 +215,37 @@ class PythonMLLibAPI extends Serializable {
       .setRegParam(regParam)
       .setStepSize(stepSize)
       .setMiniBatchFraction(miniBatchFraction)
-    if (regType == "l2") {
-      LogRegAlg.optimizer.setUpdater(new SquaredL2Updater)
-    } else if (regType == "l1") {
-      LogRegAlg.optimizer.setUpdater(new L1Updater)
-    } else if (regType != "none") {
-      throw new java.lang.IllegalArgumentException("Invalid value for 'regType' parameter."
-        + " Can only be initialized using the following string values: [l1, l2, none].")
-    }
+    LogRegAlg.optimizer.setUpdater(getUpdaterFromString(regType))
+    trainRegressionModel(
+      LogRegAlg,
+      data,
+      initialWeights)
+  }
+
+  /**
+   * Java stub for Python mllib LogisticRegressionWithLBFGS.train()
+   */
+  def trainLogisticRegressionModelWithLBFGS(
+      data: JavaRDD[LabeledPoint],
+      numIterations: Int,
+      initialWeights: Vector,
+      regParam: Double,
+      regType: String,
+      intercept: Boolean,
+      corrections: Int,
+      tolerance: Double): JList[Object] = {
+    val LogRegAlg = new LogisticRegressionWithLBFGS()
+    LogRegAlg.setIntercept(intercept)
+    LogRegAlg.optimizer
+      .setNumIterations(numIterations)
+      .setRegParam(regParam)
+      .setNumCorrections(corrections)
+      .setConvergenceTol(tolerance)
+    LogRegAlg.optimizer.setUpdater(getUpdaterFromString(regType))
     trainRegressionModel(
       LogRegAlg,
       data,
-      initialWeightsBA)
+      initialWeights)
   }
 
   /**
@@ -232,32 +253,102 @@ class PythonMLLibAPI extends Serializable {
    */
   def trainNaiveBayes(
       data: JavaRDD[LabeledPoint],
-      lambda: Double): java.util.List[java.lang.Object] = {
+      lambda: Double): JList[Object] = {
     val model = NaiveBayes.train(data.rdd, lambda)
-    val ret = new java.util.LinkedList[java.lang.Object]()
-    ret.add(Vectors.dense(model.labels))
-    ret.add(Vectors.dense(model.pi))
-    ret.add(model.theta)
-    ret
+    List(Vectors.dense(model.labels), Vectors.dense(model.pi), model.theta).
+      map(_.asInstanceOf[Object]).asJava
   }
 
   /**
-   * Java stub for Python mllib KMeans.train()
+   * Java stub for Python mllib KMeans.run()
    */
   def trainKMeansModel(
       data: JavaRDD[Vector],
       k: Int,
       maxIterations: Int,
       runs: Int,
-      initializationMode: String): KMeansModel = {
+      initializationMode: String,
+      seed: java.lang.Long): KMeansModel = {
     val kMeansAlg = new KMeans()
       .setK(k)
       .setMaxIterations(maxIterations)
       .setRuns(runs)
       .setInitializationMode(initializationMode)
-      // Disable the uncached input warning because 'data' is a deliberately uncached MappedRDD.
-      .disableUncachedWarning()
-    return kMeansAlg.run(data.rdd)
+
+    if (seed != null) kMeansAlg.setSeed(seed)
+
+    try {
+      kMeansAlg.run(data.rdd.persist(StorageLevel.MEMORY_AND_DISK))
+    } finally {
+      data.rdd.unpersist(blocking = false)
+    }
+  }
+
+  /**
+   * Java stub for Python mllib GaussianMixture.run()
+   * Returns a list containing weights, mean and covariance of each mixture component.
+   */
+  def trainGaussianMixture(
+      data: JavaRDD[Vector], 
+      k: Int, 
+      convergenceTol: Double, 
+      maxIterations: Int,
+      seed: java.lang.Long): JList[Object] = {
+    val gmmAlg = new GaussianMixture()
+      .setK(k)
+      .setConvergenceTol(convergenceTol)
+      .setMaxIterations(maxIterations)
+
+    if (seed != null) gmmAlg.setSeed(seed)
+
+    try {
+      val model = gmmAlg.run(data.rdd.persist(StorageLevel.MEMORY_AND_DISK))
+      var wt = ArrayBuffer.empty[Double]
+      var mu = ArrayBuffer.empty[Vector]      
+      var sigma = ArrayBuffer.empty[Matrix]
+      for (i <- 0 until model.k) {
+          wt += model.weights(i)
+          mu += model.gaussians(i).mu
+          sigma += model.gaussians(i).sigma
+      }    
+      List(wt.toArray, mu.toArray, sigma.toArray).map(_.asInstanceOf[Object]).asJava
+    } finally {
+      data.rdd.unpersist(blocking = false)
+    }
+  }
+
+  /**
+   * Java stub for Python mllib GaussianMixtureModel.predictSoft()
+   */
+  def predictSoftGMM(
+      data: JavaRDD[Vector],
+      wt: Object,
+      mu: Array[Object],
+      si: Array[Object]):  RDD[Array[Double]]  = {
+
+      val weight = wt.asInstanceOf[Array[Double]]
+      val mean = mu.map(_.asInstanceOf[DenseVector])
+      val sigma = si.map(_.asInstanceOf[DenseMatrix])
+      val gaussians = Array.tabulate(weight.length){
+        i => new MultivariateGaussian(mean(i), sigma(i))
+      }      
+      val model = new GaussianMixtureModel(weight, gaussians)
+      model.predictSoft(data)
+  }
+
+  /**
+   * A Wrapper of MatrixFactorizationModel to provide helpfer method for Python
+   */
+  private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorizationModel)
+    extends MatrixFactorizationModel(model.rank, model.userFeatures, model.productFeatures) {
+
+    def predict(userAndProducts: JavaRDD[Array[Any]]): RDD[Rating] =
+      predict(SerDe.asTupleRDD(userAndProducts.rdd))
+
+    def getUserFeatures = SerDe.fromTuple2RDD(userFeatures.asInstanceOf[RDD[(Any, Any)]])
+
+    def getProductFeatures = SerDe.fromTuple2RDD(productFeatures.asInstanceOf[RDD[(Any, Any)]])
+
   }
 
   /**
@@ -267,12 +358,25 @@ class PythonMLLibAPI extends Serializable {
    * the Py4J documentation.
    */
   def trainALSModel(
-      ratings: JavaRDD[Rating],
+      ratingsJRDD: JavaRDD[Rating],
       rank: Int,
       iterations: Int,
       lambda: Double,
-      blocks: Int): MatrixFactorizationModel = {
-    ALS.train(ratings.rdd, rank, iterations, lambda, blocks)
+      blocks: Int,
+      nonnegative: Boolean,
+      seed: java.lang.Long): MatrixFactorizationModel = {
+
+    val als = new ALS()
+      .setRank(rank)
+      .setIterations(iterations)
+      .setLambda(lambda)
+      .setBlocks(blocks)
+      .setNonnegative(nonnegative)
+
+    if (seed != null) als.setSeed(seed)
+
+    val model =  als.run(ratingsJRDD.rdd)
+    new MatrixFactorizationModelWrapper(model)
   }
 
   /**
@@ -287,8 +391,60 @@ class PythonMLLibAPI extends Serializable {
       iterations: Int,
       lambda: Double,
       blocks: Int,
-      alpha: Double): MatrixFactorizationModel = {
-    ALS.trainImplicit(ratingsJRDD.rdd, rank, iterations, lambda, blocks, alpha)
+      alpha: Double,
+      nonnegative: Boolean,
+      seed: java.lang.Long): MatrixFactorizationModel = {
+
+    val als = new ALS()
+      .setImplicitPrefs(true)
+      .setRank(rank)
+      .setIterations(iterations)
+      .setLambda(lambda)
+      .setBlocks(blocks)
+      .setAlpha(alpha)
+      .setNonnegative(nonnegative)
+
+    if (seed != null) als.setSeed(seed)
+
+    val model =  als.run(ratingsJRDD.rdd)
+    new MatrixFactorizationModelWrapper(model)
+  }
+
+  /**
+   * Java stub for Normalizer.transform()
+   */
+  def normalizeVector(p: Double, vector: Vector): Vector = {
+    new Normalizer(p).transform(vector)
+  }
+
+  /**
+   * Java stub for Normalizer.transform()
+   */
+  def normalizeVector(p: Double, rdd: JavaRDD[Vector]): JavaRDD[Vector] = {
+    new Normalizer(p).transform(rdd)
+  }
+
+  /**
+   * Java stub for IDF.fit(). This stub returns a
+   * handle to the Java object instead of the content of the Java object.
+   * Extra care needs to be taken in the Python code to ensure it gets freed on
+   * exit; see the Py4J documentation.
+   */
+  def fitStandardScaler(
+      withMean: Boolean,
+      withStd: Boolean,
+      data: JavaRDD[Vector]): StandardScalerModel = {
+    new StandardScaler(withMean, withStd).fit(data.rdd)
+  }
+
+  /**
+   * Java stub for IDF.fit(). This stub returns a
+   * handle to the Java object instead of the content of the Java object.
+   * Extra care needs to be taken in the Python code to ensure it gets freed on
+   * exit; see the Py4J documentation.
+   */
+  def fitIDF(minDocFreq: Int, dataset: JavaRDD[Vector]): IDFModel = {
+    new IDF(minDocFreq).fit(dataset)
   }
 
   /**
@@ -311,16 +467,18 @@ class PythonMLLibAPI extends Serializable {
       numPartitions: Int,
       numIterations: Int,
       seed: Long): Word2VecModelWrapper = {
-    val data = dataJRDD.rdd.persist(StorageLevel.MEMORY_AND_DISK_SER)
     val word2vec = new Word2Vec()
       .setVectorSize(vectorSize)
       .setLearningRate(learningRate)
       .setNumPartitions(numPartitions)
       .setNumIterations(numIterations)
       .setSeed(seed)
-    val model = word2vec.fit(data)
-    data.unpersist()
-    new Word2VecModelWrapper(model)
+    try {
+      val model = word2vec.fit(dataJRDD.rdd.persist(StorageLevel.MEMORY_AND_DISK_SER))
+      new Word2VecModelWrapper(model)
+    } finally {
+      dataJRDD.rdd.unpersist(blocking = false)
+    }
   }
 
   private[python] class Word2VecModelWrapper(model: Word2VecModel) {
@@ -328,19 +486,25 @@ class PythonMLLibAPI extends Serializable {
       model.transform(word)
     }
 
-    def findSynonyms(word: String, num: Int): java.util.List[java.lang.Object] = {
+    /**
+     * Transforms an RDD of words to its vector representation
+     * @param rdd an RDD of words
+     * @return an RDD of vector representations of words
+     */
+    def transform(rdd: JavaRDD[String]): JavaRDD[Vector] = {
+      rdd.rdd.map(model.transform)
+    }
+
+    def findSynonyms(word: String, num: Int): JList[Object] = {
       val vec = transform(word)
       findSynonyms(vec, num)
     }
 
-    def findSynonyms(vector: Vector, num: Int): java.util.List[java.lang.Object] = {
+    def findSynonyms(vector: Vector, num: Int): JList[Object] = {
       val result = model.findSynonyms(vector, num)
       val similarity = Vectors.dense(result.map(_._2))
       val words = result.map(_._1)
-      val ret = new java.util.LinkedList[java.lang.Object]()
-      ret.add(words)
-      ret.add(similarity)
-      ret
+      List(words, similarity).map(_.asInstanceOf[Object]).asJava
     }
   }
 
@@ -350,13 +514,13 @@ class PythonMLLibAPI extends Serializable {
    * Extra care needs to be taken in the Python code to ensure it gets freed on exit;
    * see the Py4J documentation.
    * @param data  Training data
-   * @param categoricalFeaturesInfoJMap  Categorical features info, as Java map
+   * @param categoricalFeaturesInfo  Categorical features info, as Java map
    */
   def trainDecisionTreeModel(
       data: JavaRDD[LabeledPoint],
       algoStr: String,
       numClasses: Int,
-      categoricalFeaturesInfoJMap: java.util.Map[Int, Int],
+      categoricalFeaturesInfo: JMap[Int, Int],
       impurityStr: String,
       maxDepth: Int,
       maxBins: Int,
@@ -370,13 +534,84 @@ class PythonMLLibAPI extends Serializable {
       algo = algo,
       impurity = impurity,
       maxDepth = maxDepth,
-      numClassesForClassification = numClasses,
+      numClasses = numClasses,
       maxBins = maxBins,
-      categoricalFeaturesInfo = categoricalFeaturesInfoJMap.asScala.toMap,
+      categoricalFeaturesInfo = categoricalFeaturesInfo.asScala.toMap,
       minInstancesPerNode = minInstancesPerNode,
       minInfoGain = minInfoGain)
+    try {
+      DecisionTree.train(data.rdd.persist(StorageLevel.MEMORY_AND_DISK), strategy)
+    } finally {
+      data.rdd.unpersist(blocking = false)
+    }
+  }
 
-    DecisionTree.train(data.rdd, strategy)
+  /**
+   * Java stub for Python mllib RandomForest.train().
+   * This stub returns a handle to the Java object instead of the content of the Java object.
+   * Extra care needs to be taken in the Python code to ensure it gets freed on exit;
+   * see the Py4J documentation.
+   */
+  def trainRandomForestModel(
+      data: JavaRDD[LabeledPoint],
+      algoStr: String,
+      numClasses: Int,
+      categoricalFeaturesInfo: JMap[Int, Int],
+      numTrees: Int,
+      featureSubsetStrategy: String,
+      impurityStr: String,
+      maxDepth: Int,
+      maxBins: Int,
+      seed: Int): RandomForestModel = {
+
+    val algo = Algo.fromString(algoStr)
+    val impurity = Impurities.fromString(impurityStr)
+    val strategy = new Strategy(
+      algo = algo,
+      impurity = impurity,
+      maxDepth = maxDepth,
+      numClasses = numClasses,
+      maxBins = maxBins,
+      categoricalFeaturesInfo = categoricalFeaturesInfo.asScala.toMap)
+    val cached = data.rdd.persist(StorageLevel.MEMORY_AND_DISK)
+    try {
+      if (algo == Algo.Classification) {
+        RandomForest.trainClassifier(cached, strategy, numTrees, featureSubsetStrategy, seed)
+      } else {
+        RandomForest.trainRegressor(cached, strategy, numTrees, featureSubsetStrategy, seed)
+      }
+    } finally {
+      cached.unpersist(blocking = false)
+    }
+  }
+
+  /**
+   * Java stub for Python mllib GradientBoostedTrees.train().
+   * This stub returns a handle to the Java object instead of the content of the Java object.
+   * Extra care needs to be taken in the Python code to ensure it gets freed on exit;
+   * see the Py4J documentation.
+   */
+  def trainGradientBoostedTreesModel(
+      data: JavaRDD[LabeledPoint],
+      algoStr: String,
+      categoricalFeaturesInfo: JMap[Int, Int],
+      lossStr: String,
+      numIterations: Int,
+      learningRate: Double,
+      maxDepth: Int): GradientBoostedTreesModel = {
+    val boostingStrategy = BoostingStrategy.defaultParams(algoStr)
+    boostingStrategy.setLoss(Losses.fromString(lossStr))
+    boostingStrategy.setNumIterations(numIterations)
+    boostingStrategy.setLearningRate(learningRate)
+    boostingStrategy.treeStrategy.setMaxDepth(maxDepth)
+    boostingStrategy.treeStrategy.categoricalFeaturesInfo = categoricalFeaturesInfo.asScala.toMap
+
+    val cached = data.rdd.persist(StorageLevel.MEMORY_AND_DISK)
+    try {
+      GradientBoostedTrees.train(cached, boostingStrategy)
+    } finally {
+      cached.unpersist(blocking = false)
+    }
   }
 
   /**
@@ -403,6 +638,31 @@ class PythonMLLibAPI extends Serializable {
     Statistics.corr(x.rdd, y.rdd, getCorrNameOrDefault(method))
   }
 
+  /**
+   * Java stub for mllib Statistics.chiSqTest()
+   */
+  def chiSqTest(observed: Vector, expected: Vector): ChiSqTestResult = {
+    if (expected == null) {
+      Statistics.chiSqTest(observed)
+    } else {
+      Statistics.chiSqTest(observed, expected)
+    }
+  }
+
+  /**
+   * Java stub for mllib Statistics.chiSqTest(observed: Matrix)
+   */
+  def chiSqTest(observed: Matrix): ChiSqTestResult = {
+    Statistics.chiSqTest(observed)
+  }
+
+  /**
+   * Java stub for mllib Statistics.chiSqTest(RDD[LabelPoint])
+   */
+  def chiSqTest(data: JavaRDD[LabeledPoint]): Array[ChiSqTestResult] = {
+    Statistics.chiSqTest(data.rdd)
+  }
+
   // used by the corr methods to retrieve the name of the correlation method passed in via pyspark
   private def getCorrNameOrDefault(method: String) = {
     if (method == null) CorrelationNames.defaultCorrName else method
@@ -450,6 +710,21 @@ class PythonMLLibAPI extends Serializable {
     RG.normalRDD(jsc.sc, size, parts, s)
   }
 
+  /**
+   * Java stub for Python mllib RandomRDDGenerators.logNormalRDD()
+   */
+  def logNormalRDD(jsc: JavaSparkContext,
+      mean: Double,
+      std: Double,
+      size: Long,
+      numPartitions: java.lang.Integer,
+      seed: java.lang.Long): JavaRDD[Double] = {
+    val parts = getNumPartitionsOrDefault(numPartitions, jsc)
+    val s = getSeedOrDefault(seed)
+    RG.logNormalRDD(jsc.sc, mean, std, size, parts, s)
+  }
+
+
   /**
    * Java stub for Python mllib RandomRDDGenerators.poissonRDD()
    */
@@ -463,6 +738,33 @@ class PythonMLLibAPI extends Serializable {
     RG.poissonRDD(jsc.sc, mean, size, parts, s)
   }
 
+  /**
+   * Java stub for Python mllib RandomRDDGenerators.exponentialRDD()
+   */
+  def exponentialRDD(jsc: JavaSparkContext,
+      mean: Double,
+      size: Long,
+      numPartitions: java.lang.Integer,
+      seed: java.lang.Long): JavaRDD[Double] = {
+    val parts = getNumPartitionsOrDefault(numPartitions, jsc)
+    val s = getSeedOrDefault(seed)
+    RG.exponentialRDD(jsc.sc, mean, size, parts, s)
+  }
+
+  /**
+   * Java stub for Python mllib RandomRDDGenerators.gammaRDD()
+   */
+  def gammaRDD(jsc: JavaSparkContext,
+      shape: Double,
+      scale: Double,
+      size: Long,
+      numPartitions: java.lang.Integer,
+      seed: java.lang.Long): JavaRDD[Double] = {
+    val parts = getNumPartitionsOrDefault(numPartitions, jsc)
+    val s = getSeedOrDefault(seed)
+    RG.gammaRDD(jsc.sc, shape, scale, size, parts, s)
+  }
+
   /**
    * Java stub for Python mllib RandomRDDGenerators.uniformVectorRDD()
    */
@@ -489,6 +791,22 @@ class PythonMLLibAPI extends Serializable {
     RG.normalVectorRDD(jsc.sc, numRows, numCols, parts, s)
   }
 
+  /**
+   * Java stub for Python mllib RandomRDDGenerators.logNormalVectorRDD()
+   */
+  def logNormalVectorRDD(jsc: JavaSparkContext,
+      mean: Double,
+      std: Double,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: java.lang.Integer,
+      seed: java.lang.Long): JavaRDD[Vector] = {
+    val parts = getNumPartitionsOrDefault(numPartitions, jsc)
+    val s = getSeedOrDefault(seed)
+    RG.logNormalVectorRDD(jsc.sc, mean, std, numRows, numCols, parts, s)
+  }
+
+
   /**
    * Java stub for Python mllib RandomRDDGenerators.poissonVectorRDD()
    */
@@ -503,6 +821,36 @@ class PythonMLLibAPI extends Serializable {
     RG.poissonVectorRDD(jsc.sc, mean, numRows, numCols, parts, s)
   }
 
+  /**
+   * Java stub for Python mllib RandomRDDGenerators.exponentialVectorRDD()
+   */
+  def exponentialVectorRDD(jsc: JavaSparkContext,
+      mean: Double,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: java.lang.Integer,
+      seed: java.lang.Long): JavaRDD[Vector] = {
+    val parts = getNumPartitionsOrDefault(numPartitions, jsc)
+    val s = getSeedOrDefault(seed)
+    RG.exponentialVectorRDD(jsc.sc, mean, numRows, numCols, parts, s)
+  }
+
+  /**
+   * Java stub for Python mllib RandomRDDGenerators.gammaVectorRDD()
+   */
+  def gammaVectorRDD(jsc: JavaSparkContext,
+      shape: Double,
+      scale: Double,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: java.lang.Integer,
+      seed: java.lang.Long): JavaRDD[Vector] = {
+    val parts = getNumPartitionsOrDefault(numPartitions, jsc)
+    val s = getSeedOrDefault(seed)
+    RG.gammaVectorRDD(jsc.sc, shape, scale, numRows, numCols, parts, s)
+  }
+
+
 }
 
 /**
@@ -511,6 +859,7 @@ class PythonMLLibAPI extends Serializable {
 private[spark] object SerDe extends Serializable {
 
   val PYSPARK_PACKAGE = "pyspark.mllib"
+  val LATIN1 = "ISO-8859-1"
 
   /**
    * Base class used for pickle
@@ -532,7 +881,7 @@ private[spark] object SerDe extends Serializable {
     def pickle(obj: Object, out: OutputStream, pickler: Pickler): Unit = {
       if (obj == this) {
         out.write(Opcodes.GLOBAL)
-        out.write((module + "\n" + name + "\n").getBytes())
+        out.write((module + "\n" + name + "\n").getBytes)
       } else {
         pickler.save(this)  // it will be memorized by Pickler
         saveState(obj, out, pickler)
@@ -544,7 +893,7 @@ private[spark] object SerDe extends Serializable {
       if (objects.length == 0 || objects.length > 3) {
         out.write(Opcodes.MARK)
       }
-      objects.foreach(pickler.save(_))
+      objects.foreach(pickler.save)
       val code = objects.length match {
         case 1 => Opcodes.TUPLE1
         case 2 => Opcodes.TUPLE2
@@ -562,7 +911,16 @@ private[spark] object SerDe extends Serializable {
 
     def saveState(obj: Object, out: OutputStream, pickler: Pickler) = {
       val vector: DenseVector = obj.asInstanceOf[DenseVector]
-      saveObjects(out, pickler, vector.toArray)
+      val bytes = new Array[Byte](8 * vector.size)
+      val bb = ByteBuffer.wrap(bytes)
+      bb.order(ByteOrder.nativeOrder())
+      val db = bb.asDoubleBuffer()
+      db.put(vector.values)
+
+      out.write(Opcodes.BINSTRING)
+      out.write(PickleUtils.integer_to_bytes(bytes.length))
+      out.write(bytes)
+      out.write(Opcodes.TUPLE1)
     }
 
     def construct(args: Array[Object]): Object = {
@@ -570,7 +928,13 @@ private[spark] object SerDe extends Serializable {
       if (args.length != 1) {
         throw new PickleException("should be 1")
       }
-      new DenseVector(args(0).asInstanceOf[Array[Double]])
+      val bytes = args(0).asInstanceOf[String].getBytes(LATIN1)
+      val bb = ByteBuffer.wrap(bytes, 0, bytes.length)
+      bb.order(ByteOrder.nativeOrder())
+      val db = bb.asDoubleBuffer()
+      val ans = new Array[Double](bytes.length / 8)
+      db.get(ans)
+      Vectors.dense(ans)
     }
   }
 
@@ -579,15 +943,30 @@ private[spark] object SerDe extends Serializable {
 
     def saveState(obj: Object, out: OutputStream, pickler: Pickler) = {
       val m: DenseMatrix = obj.asInstanceOf[DenseMatrix]
-      saveObjects(out, pickler, m.numRows, m.numCols, m.values)
+      val bytes = new Array[Byte](8 * m.values.size)
+      val order = ByteOrder.nativeOrder()
+      ByteBuffer.wrap(bytes).order(order).asDoubleBuffer().put(m.values)
+
+      out.write(Opcodes.BININT)
+      out.write(PickleUtils.integer_to_bytes(m.numRows))
+      out.write(Opcodes.BININT)
+      out.write(PickleUtils.integer_to_bytes(m.numCols))
+      out.write(Opcodes.BINSTRING)
+      out.write(PickleUtils.integer_to_bytes(bytes.length))
+      out.write(bytes)
+      out.write(Opcodes.TUPLE3)
     }
 
     def construct(args: Array[Object]): Object = {
       if (args.length != 3) {
         throw new PickleException("should be 3")
       }
-      new DenseMatrix(args(0).asInstanceOf[Int], args(1).asInstanceOf[Int],
-        args(2).asInstanceOf[Array[Double]])
+      val bytes = args(2).asInstanceOf[String].getBytes(LATIN1)
+      val n = bytes.length / 8
+      val values = new Array[Double](n)
+      val order = ByteOrder.nativeOrder()
+      ByteBuffer.wrap(bytes).order(order).asDoubleBuffer().get(values)
+      new DenseMatrix(args(0).asInstanceOf[Int], args(1).asInstanceOf[Int], values)
     }
   }
 
@@ -596,15 +975,40 @@ private[spark] object SerDe extends Serializable {
 
     def saveState(obj: Object, out: OutputStream, pickler: Pickler) = {
       val v: SparseVector = obj.asInstanceOf[SparseVector]
-      saveObjects(out, pickler, v.size, v.indices, v.values)
+      val n = v.indices.size
+      val indiceBytes = new Array[Byte](4 * n)
+      val order = ByteOrder.nativeOrder()
+      ByteBuffer.wrap(indiceBytes).order(order).asIntBuffer().put(v.indices)
+      val valueBytes = new Array[Byte](8 * n)
+      ByteBuffer.wrap(valueBytes).order(order).asDoubleBuffer().put(v.values)
+
+      out.write(Opcodes.BININT)
+      out.write(PickleUtils.integer_to_bytes(v.size))
+      out.write(Opcodes.BINSTRING)
+      out.write(PickleUtils.integer_to_bytes(indiceBytes.length))
+      out.write(indiceBytes)
+      out.write(Opcodes.BINSTRING)
+      out.write(PickleUtils.integer_to_bytes(valueBytes.length))
+      out.write(valueBytes)
+      out.write(Opcodes.TUPLE3)
     }
 
     def construct(args: Array[Object]): Object = {
       if (args.length != 3) {
         throw new PickleException("should be 3")
       }
-      new SparseVector(args(0).asInstanceOf[Int], args(1).asInstanceOf[Array[Int]],
-        args(2).asInstanceOf[Array[Double]])
+      val size = args(0).asInstanceOf[Int]
+      val indiceBytes = args(1).asInstanceOf[String].getBytes(LATIN1)
+      val valueBytes = args(2).asInstanceOf[String].getBytes(LATIN1)
+      val n = indiceBytes.length / 4
+      val indices = new Array[Int](n)
+      val values = new Array[Double](n)
+      if (n > 0) {
+        val order = ByteOrder.nativeOrder()
+        ByteBuffer.wrap(indiceBytes).order(order).asIntBuffer().get(indices)
+        ByteBuffer.wrap(valueBytes).order(order).asDoubleBuffer().get(values)
+      }
+      new SparseVector(size, indices, values)
     }
   }
 
@@ -674,7 +1078,7 @@ private[spark] object SerDe extends Serializable {
   }
 
   /* convert RDD[Tuple2[,]] to RDD[Array[Any]] */
-  def fromTuple2RDD(rdd: RDD[Tuple2[Any, Any]]): RDD[Array[Any]]  = {
+  def fromTuple2RDD(rdd: RDD[(Any, Any)]): RDD[Array[Any]]  = {
     rdd.map(x => Array(x._1, x._2))
   }
 
@@ -685,7 +1089,7 @@ private[spark] object SerDe extends Serializable {
   def javaToPython(jRDD: JavaRDD[Any]): JavaRDD[Array[Byte]] = {
     jRDD.rdd.mapPartitions { iter =>
       initialize()  // let it called in executor
-      new PythonRDD.AutoBatchedPickler(iter)
+      new SerDeUtil.AutoBatchedPickler(iter)
     }
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
index b7a1d90d24d72..348c1e8760a66 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
@@ -20,7 +20,9 @@ package org.apache.spark.mllib.classification
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.util.Loader
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Row}
 
 /**
  * :: Experimental ::
@@ -53,3 +55,21 @@ trait ClassificationModel extends Serializable {
   def predict(testData: JavaRDD[Vector]): JavaRDD[java.lang.Double] =
     predict(testData.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]]
 }
+
+private[mllib] object ClassificationModel {
+
+  /**
+   * Helper method for loading GLM classification model metadata.
+   *
+   * @param modelClass  String name for model class (used for error messages)
+   * @return (numFeatures, numClasses)
+   */
+  def getNumFeaturesClasses(metadata: DataFrame, modelClass: String, path: String): (Int, Int) = {
+    metadata.select("numFeatures", "numClasses").take(1)(0) match {
+      case Row(nFeatures: Int, nClasses: Int) => (nFeatures, nClasses)
+      case _ => throw new Exception(s"$modelClass unable to load" +
+        s" numFeatures, numClasses from metadata: ${Loader.metadataPath(path)}")
+    }
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index 84d3c7cebd7c8..a668e7a7a359f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -17,31 +17,63 @@
 
 package org.apache.spark.mllib.classification
 
+import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.classification.impl.GLMClassificationModel
+import org.apache.spark.mllib.linalg.BLAS.dot
+import org.apache.spark.mllib.linalg.{DenseVector, Vector}
 import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.regression._
-import org.apache.spark.mllib.util.DataValidators
+import org.apache.spark.mllib.util.{DataValidators, Saveable, Loader}
 import org.apache.spark.rdd.RDD
 
+
 /**
- * Classification model trained using Logistic Regression.
+ * Classification model trained using Multinomial/Binary Logistic Regression.
  *
  * @param weights Weights computed for every feature.
- * @param intercept Intercept computed for this model.
+ * @param intercept Intercept computed for this model. (Only used in Binary Logistic Regression.
+ *                  In Multinomial Logistic Regression, the intercepts will not be a single values,
+ *                  so the intercepts will be part of the weights.)
+ * @param numFeatures the dimension of the features.
+ * @param numClasses the number of possible outcomes for k classes classification problem in
+ *                   Multinomial Logistic Regression. By default, it is binary logistic regression
+ *                   so numClasses will be set to 2.
  */
 class LogisticRegressionModel (
     override val weights: Vector,
-    override val intercept: Double)
-  extends GeneralizedLinearModel(weights, intercept) with ClassificationModel with Serializable {
+    override val intercept: Double,
+    val numFeatures: Int,
+    val numClasses: Int)
+  extends GeneralizedLinearModel(weights, intercept) with ClassificationModel with Serializable
+  with Saveable {
+
+  if (numClasses == 2) {
+    require(weights.size == numFeatures,
+      s"LogisticRegressionModel with numClasses = 2 was given non-matching values:" +
+      s" numFeatures = $numFeatures, but weights.size = ${weights.size}")
+  } else {
+    val weightsSizeWithoutIntercept = (numClasses - 1) * numFeatures
+    val weightsSizeWithIntercept = (numClasses - 1) * (numFeatures + 1)
+    require(weights.size == weightsSizeWithoutIntercept || weights.size == weightsSizeWithIntercept,
+      s"LogisticRegressionModel.load with numClasses = $numClasses and numFeatures = $numFeatures" +
+      s" expected weights of length $weightsSizeWithoutIntercept (without intercept)" +
+      s" or $weightsSizeWithIntercept (with intercept)," +
+      s" but was given weights of length ${weights.size}")
+  }
+
+  /**
+   * Constructs a [[LogisticRegressionModel]] with weights and intercept for binary classification.
+   */
+  def this(weights: Vector, intercept: Double) = this(weights, intercept, weights.size, 2)
 
   private var threshold: Option[Double] = Some(0.5)
 
   /**
    * :: Experimental ::
-   * Sets the threshold that separates positive predictions from negative predictions. An example
-   * with prediction score greater than or equal to this threshold is identified as an positive,
-   * and negative otherwise. The default value is 0.5.
+   * Sets the threshold that separates positive predictions from negative predictions
+   * in Binary Logistic Regression. An example with prediction score greater than or equal to
+   * this threshold is identified as an positive, and negative otherwise. The default value is 0.5.
    */
   @Experimental
   def setThreshold(threshold: Double): this.type = {
@@ -49,6 +81,13 @@ class LogisticRegressionModel (
     this
   }
 
+  /**
+   * :: Experimental ::
+   * Returns the threshold (if any) used for converting raw prediction scores into 0/1 predictions.
+   */
+  @Experimental
+  def getThreshold: Option[Double] = threshold
+
   /**
    * :: Experimental ::
    * Clears the threshold so that `predict` will output raw prediction scores.
@@ -59,24 +98,109 @@ class LogisticRegressionModel (
     this
   }
 
-  override protected def predictPoint(dataMatrix: Vector, weightMatrix: Vector,
+  override protected def predictPoint(
+      dataMatrix: Vector,
+      weightMatrix: Vector,
       intercept: Double) = {
-    val margin = weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
-    val score = 1.0 / (1.0 + math.exp(-margin))
-    threshold match {
-      case Some(t) => if (score < t) 0.0 else 1.0
-      case None => score
+    require(dataMatrix.size == numFeatures)
+
+    // If dataMatrix and weightMatrix have the same dimension, it's binary logistic regression.
+    if (numClasses == 2) {
+      require(numFeatures == weightMatrix.size)
+      val margin = dot(weights, dataMatrix) + intercept
+      val score = 1.0 / (1.0 + math.exp(-margin))
+      threshold match {
+        case Some(t) => if (score > t) 1.0 else 0.0
+        case None => score
+      }
+    } else {
+      val dataWithBiasSize = weightMatrix.size / (numClasses - 1)
+
+      val weightsArray = weights match {
+        case dv: DenseVector => dv.values
+        case _ =>
+          throw new IllegalArgumentException(
+            s"weights only supports dense vector but got type ${weights.getClass}.")
+      }
+
+      val margins = (0 until numClasses - 1).map { i =>
+        var margin = 0.0
+        dataMatrix.foreachActive { (index, value) =>
+          if (value != 0.0) margin += value * weightsArray((i * dataWithBiasSize) + index)
+        }
+        // Intercept is required to be added into margin.
+        if (dataMatrix.size + 1 == dataWithBiasSize) {
+          margin += weightsArray((i * dataWithBiasSize) + dataMatrix.size)
+        }
+        margin
+      }
+
+      /**
+       * Find the one with maximum margins. If the maxMargin is negative, then the prediction
+       * result will be the first class.
+       *
+       * PS, if you want to compute the probabilities for each outcome instead of the outcome
+       * with maximum probability, remember to subtract the maxMargin from margins if maxMargin
+       * is positive to prevent overflow.
+       */
+      var bestClass = 0
+      var maxMargin = 0.0
+      var i = 0
+      while(i < margins.size) {
+        if (margins(i) > maxMargin) {
+          maxMargin = margins(i)
+          bestClass = i + 1
+        }
+        i += 1
+      }
+      bestClass.toDouble
+    }
+  }
+
+  override def save(sc: SparkContext, path: String): Unit = {
+    GLMClassificationModel.SaveLoadV1_0.save(sc, path, this.getClass.getName,
+      numFeatures, numClasses, weights, intercept, threshold)
+  }
+
+  override protected def formatVersion: String = "1.0"
+}
+
+object LogisticRegressionModel extends Loader[LogisticRegressionModel] {
+
+  override def load(sc: SparkContext, path: String): LogisticRegressionModel = {
+    val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
+    // Hard-code class name string in case it changes in the future
+    val classNameV1_0 = "org.apache.spark.mllib.classification.LogisticRegressionModel"
+    (loadedClassName, version) match {
+      case (className, "1.0") if className == classNameV1_0 =>
+        val (numFeatures, numClasses) =
+          ClassificationModel.getNumFeaturesClasses(metadata, classNameV1_0, path)
+        val data = GLMClassificationModel.SaveLoadV1_0.loadData(sc, path, classNameV1_0)
+        // numFeatures, numClasses, weights are checked in model initialization
+        val model =
+          new LogisticRegressionModel(data.weights, data.intercept, numFeatures, numClasses)
+        data.threshold match {
+          case Some(t) => model.setThreshold(t)
+          case None => model.clearThreshold()
+        }
+        model
+      case _ => throw new Exception(
+        s"LogisticRegressionModel.load did not recognize model with (className, format version):" +
+        s"($loadedClassName, $version).  Supported:\n" +
+        s"  ($classNameV1_0, 1.0)")
     }
   }
 }
 
 /**
- * Train a classification model for Logistic Regression using Stochastic Gradient Descent.
- * NOTE: Labels used in Logistic Regression should be {0, 1}
- *
+ * Train a classification model for Binary Logistic Regression
+ * using Stochastic Gradient Descent. By default L2 regularization is used,
+ * which can be changed via [[LogisticRegressionWithSGD.optimizer]].
+ * NOTE: Labels used in Logistic Regression should be {0, 1, ..., k - 1}
+ * for k classes multi-label classification problem.
  * Using [[LogisticRegressionWithLBFGS]] is recommended over this.
  */
-class LogisticRegressionWithSGD private (
+class LogisticRegressionWithSGD private[mllib] (
     private var stepSize: Double,
     private var numIterations: Int,
     private var regParam: Double,
@@ -93,11 +217,12 @@ class LogisticRegressionWithSGD private (
   override protected val validators = List(DataValidators.binaryLabelValidator)
 
   /**
-   * Construct a LogisticRegression object with default parameters
+   * Construct a LogisticRegression object with default parameters: {stepSize: 1.0,
+   * numIterations: 100, regParm: 0.01, miniBatchFraction: 1.0}.
    */
-  def this() = this(1.0, 100, 0.0, 1.0)
+  def this() = this(1.0, 100, 0.01, 1.0)
 
-  override protected def createModel(weights: Vector, intercept: Double) = {
+  override protected[mllib] def createModel(weights: Vector, intercept: Double) = {
     new LogisticRegressionModel(weights, intercept)
   }
 }
@@ -192,9 +317,10 @@ object LogisticRegressionWithSGD {
 }
 
 /**
- * Train a classification model for Logistic Regression using Limited-memory BFGS.
- * Standard feature scaling and L2 regularization are used by default.
- * NOTE: Labels used in Logistic Regression should be {0, 1}
+ * Train a classification model for Multinomial/Binary Logistic Regression using
+ * Limited-memory BFGS. Standard feature scaling and L2 regularization are used by default.
+ * NOTE: Labels used in Logistic Regression should be {0, 1, ..., k - 1}
+ * for k classes multi-label classification problem.
  */
 class LogisticRegressionWithLBFGS
   extends GeneralizedLinearAlgorithm[LogisticRegressionModel] with Serializable {
@@ -203,9 +329,33 @@ class LogisticRegressionWithLBFGS
 
   override val optimizer = new LBFGS(new LogisticGradient, new SquaredL2Updater)
 
-  override protected val validators = List(DataValidators.binaryLabelValidator)
+  override protected val validators = List(multiLabelValidator)
+
+  private def multiLabelValidator: RDD[LabeledPoint] => Boolean = { data =>
+    if (numOfLinearPredictor > 1) {
+      DataValidators.multiLabelValidator(numOfLinearPredictor + 1)(data)
+    } else {
+      DataValidators.binaryLabelValidator(data)
+    }
+  }
+
+  /**
+   * :: Experimental ::
+   * Set the number of possible outcomes for k classes classification problem in
+   * Multinomial Logistic Regression.
+   * By default, it is binary logistic regression so k will be set to 2.
+   */
+  @Experimental
+  def setNumClasses(numClasses: Int): this.type = {
+    require(numClasses > 1)
+    numOfLinearPredictor = numClasses - 1
+    if (numClasses > 2) {
+      optimizer.setGradient(new LogisticGradient(numClasses))
+    }
+    this
+  }
 
   override protected def createModel(weights: Vector, intercept: Double) = {
-    new LogisticRegressionModel(weights, intercept)
+    new LogisticRegressionModel(weights, intercept, numFeatures, numOfLinearPredictor + 1)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 8c8e4a161aa5b..d9ce2822dd391 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -19,11 +19,13 @@ package org.apache.spark.mllib.classification
 
 import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, argmax => brzArgmax, sum => brzSum}
 
-import org.apache.spark.{SparkException, Logging}
-import org.apache.spark.SparkContext._
+import org.apache.spark.{SparkContext, SparkException, Logging}
 import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector}
 import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.{Loader, Saveable}
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, SQLContext}
+
 
 /**
  * Model for Naive Bayes Classifiers.
@@ -36,7 +38,7 @@ import org.apache.spark.rdd.RDD
 class NaiveBayesModel private[mllib] (
     val labels: Array[Double],
     val pi: Array[Double],
-    val theta: Array[Array[Double]]) extends ClassificationModel with Serializable {
+    val theta: Array[Array[Double]]) extends ClassificationModel with Serializable with Saveable {
 
   private val brzPi = new BDV[Double](pi)
   private val brzTheta = new BDM[Double](theta.length, theta(0).length)
@@ -65,6 +67,85 @@ class NaiveBayesModel private[mllib] (
   override def predict(testData: Vector): Double = {
     labels(brzArgmax(brzPi + brzTheta * testData.toBreeze))
   }
+
+  override def save(sc: SparkContext, path: String): Unit = {
+    val data = NaiveBayesModel.SaveLoadV1_0.Data(labels, pi, theta)
+    NaiveBayesModel.SaveLoadV1_0.save(sc, path, data)
+  }
+
+  override protected def formatVersion: String = "1.0"
+}
+
+object NaiveBayesModel extends Loader[NaiveBayesModel] {
+
+  import Loader._
+
+  private object SaveLoadV1_0 {
+
+    def thisFormatVersion = "1.0"
+
+    /** Hard-code class name string in case it changes in the future */
+    def thisClassName = "org.apache.spark.mllib.classification.NaiveBayesModel"
+
+    /** Model data for model import/export */
+    case class Data(labels: Array[Double], pi: Array[Double], theta: Array[Array[Double]])
+
+    def save(sc: SparkContext, path: String, data: Data): Unit = {
+      val sqlContext = new SQLContext(sc)
+      import sqlContext.implicits._
+
+      // Create JSON metadata.
+      val metadataRDD =
+        sc.parallelize(Seq((thisClassName, thisFormatVersion, data.theta(0).size, data.pi.size)), 1)
+          .toDataFrame("class", "version", "numFeatures", "numClasses")
+      metadataRDD.toJSON.saveAsTextFile(metadataPath(path))
+
+      // Create Parquet data.
+      val dataRDD: DataFrame = sc.parallelize(Seq(data), 1)
+      dataRDD.saveAsParquetFile(dataPath(path))
+    }
+
+    def load(sc: SparkContext, path: String): NaiveBayesModel = {
+      val sqlContext = new SQLContext(sc)
+      // Load Parquet data.
+      val dataRDD = sqlContext.parquetFile(dataPath(path))
+      // Check schema explicitly since erasure makes it hard to use match-case for checking.
+      checkSchema[Data](dataRDD.schema)
+      val dataArray = dataRDD.select("labels", "pi", "theta").take(1)
+      assert(dataArray.size == 1, s"Unable to load NaiveBayesModel data from: ${dataPath(path)}")
+      val data = dataArray(0)
+      val labels = data.getAs[Seq[Double]](0).toArray
+      val pi = data.getAs[Seq[Double]](1).toArray
+      val theta = data.getAs[Seq[Seq[Double]]](2).map(_.toArray).toArray
+      new NaiveBayesModel(labels, pi, theta)
+    }
+  }
+
+  override def load(sc: SparkContext, path: String): NaiveBayesModel = {
+    val (loadedClassName, version, metadata) = loadMetadata(sc, path)
+    val classNameV1_0 = SaveLoadV1_0.thisClassName
+    (loadedClassName, version) match {
+      case (className, "1.0") if className == classNameV1_0 =>
+        val (numFeatures, numClasses) =
+          ClassificationModel.getNumFeaturesClasses(metadata, classNameV1_0, path)
+        val model = SaveLoadV1_0.load(sc, path)
+        assert(model.pi.size == numClasses,
+          s"NaiveBayesModel.load expected $numClasses classes," +
+          s" but class priors vector pi had ${model.pi.size} elements")
+        assert(model.theta.size == numClasses,
+          s"NaiveBayesModel.load expected $numClasses classes," +
+            s" but class conditionals array theta had ${model.theta.size} elements")
+        assert(model.theta.forall(_.size == numFeatures),
+          s"NaiveBayesModel.load expected $numFeatures features," +
+          s" but class conditionals array theta had elements of size:" +
+          s" ${model.theta.map(_.size).mkString(",")}")
+        model
+      case _ => throw new Exception(
+        s"NaiveBayesModel.load did not recognize model with (className, format version):" +
+        s"($loadedClassName, $version).  Supported:\n" +
+        s"  ($classNameV1_0, 1.0)")
+    }
+  }
 }
 
 /**
@@ -93,10 +174,10 @@ class NaiveBayes private (private var lambda: Double) extends Serializable with
   def run(data: RDD[LabeledPoint]) = {
     val requireNonnegativeValues: Vector => Unit = (v: Vector) => {
       val values = v match {
-        case sv: SparseVector =>
-          sv.values
-        case dv: DenseVector =>
-          dv.values
+        case SparseVector(size, indices, values) =>
+          values
+        case DenseVector(values) =>
+          values
       }
       if (!values.forall(_ >= 0.0)) {
         throw new SparkException(s"Naive Bayes requires nonnegative feature values but found $v.")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 80f8a1b2f1e84..24d31e62ba500 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -17,13 +17,16 @@
 
 package org.apache.spark.mllib.classification
 
+import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.classification.impl.GLMClassificationModel
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.regression._
-import org.apache.spark.mllib.util.DataValidators
+import org.apache.spark.mllib.util.{DataValidators, Saveable, Loader}
 import org.apache.spark.rdd.RDD
 
+
 /**
  * Model for Support Vector Machines (SVMs).
  *
@@ -33,7 +36,8 @@ import org.apache.spark.rdd.RDD
 class SVMModel (
     override val weights: Vector,
     override val intercept: Double)
-  extends GeneralizedLinearModel(weights, intercept) with ClassificationModel with Serializable {
+  extends GeneralizedLinearModel(weights, intercept) with ClassificationModel with Serializable
+  with Saveable {
 
   private var threshold: Option[Double] = Some(0.0)
 
@@ -49,6 +53,13 @@ class SVMModel (
     this
   }
 
+  /**
+   * :: Experimental ::
+   * Returns the threshold (if any) used for converting raw prediction scores into 0/1 predictions.
+   */
+  @Experimental
+  def getThreshold: Option[Double] = threshold
+
   /**
    * :: Experimental ::
    * Clears the threshold so that `predict` will output raw prediction scores.
@@ -65,14 +76,51 @@ class SVMModel (
       intercept: Double) = {
     val margin = weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
     threshold match {
-      case Some(t) => if (margin < t) 0.0 else 1.0
+      case Some(t) => if (margin > t) 1.0 else 0.0
       case None => margin
     }
   }
+
+  override def save(sc: SparkContext, path: String): Unit = {
+    GLMClassificationModel.SaveLoadV1_0.save(sc, path, this.getClass.getName,
+      numFeatures = weights.size, numClasses = 2, weights, intercept, threshold)
+  }
+
+  override protected def formatVersion: String = "1.0"
+}
+
+object SVMModel extends Loader[SVMModel] {
+
+  override def load(sc: SparkContext, path: String): SVMModel = {
+    val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
+    // Hard-code class name string in case it changes in the future
+    val classNameV1_0 = "org.apache.spark.mllib.classification.SVMModel"
+    (loadedClassName, version) match {
+      case (className, "1.0") if className == classNameV1_0 =>
+        val (numFeatures, numClasses) =
+          ClassificationModel.getNumFeaturesClasses(metadata, classNameV1_0, path)
+        val data = GLMClassificationModel.SaveLoadV1_0.loadData(sc, path, classNameV1_0)
+        val model = new SVMModel(data.weights, data.intercept)
+        assert(model.weights.size == numFeatures, s"SVMModel.load with numFeatures=$numFeatures" +
+          s" was given non-matching weights vector of size ${model.weights.size}")
+        assert(numClasses == 2,
+          s"SVMModel.load was given numClasses=$numClasses but only supports 2 classes")
+        data.threshold match {
+          case Some(t) => model.setThreshold(t)
+          case None => model.clearThreshold()
+        }
+        model
+      case _ => throw new Exception(
+        s"SVMModel.load did not recognize model with (className, format version):" +
+        s"($loadedClassName, $version).  Supported:\n" +
+        s"  ($classNameV1_0, 1.0)")
+    }
+  }
 }
 
 /**
- * Train a Support Vector Machine (SVM) using Stochastic Gradient Descent.
+ * Train a Support Vector Machine (SVM) using Stochastic Gradient Descent. By default L2
+ * regularization is used, which can be changed via [[SVMWithSGD.optimizer]].
  * NOTE: Labels used in SVM should be {0, 1}.
  */
 class SVMWithSGD private (
@@ -92,9 +140,10 @@ class SVMWithSGD private (
   override protected val validators = List(DataValidators.binaryLabelValidator)
 
   /**
-   * Construct a SVM object with default parameters
+   * Construct a SVM object with default parameters: {stepSize: 1.0, numIterations: 100,
+   * regParm: 0.01, miniBatchFraction: 1.0}.
    */
-  def this() = this(1.0, 100, 1.0, 1.0)
+  def this() = this(1.0, 100, 0.01, 1.0)
 
   override protected def createModel(weights: Vector, intercept: Double) = {
     new SVMModel(weights, intercept)
@@ -185,6 +234,6 @@ object SVMWithSGD {
    * @return a SVMModel which has the weights and offset from training.
    */
   def train(input: RDD[LabeledPoint], numIterations: Int): SVMModel = {
-    train(input, numIterations, 1.0, 1.0, 1.0)
+    train(input, numIterations, 1.0, 0.01, 1.0)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionWithSGD.scala
new file mode 100644
index 0000000000000..b89f38cf5aba4
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionWithSGD.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.classification
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.regression.StreamingLinearAlgorithm
+
+/**
+ * :: Experimental ::
+ * Train or predict a logistic regression model on streaming data. Training uses
+ * Stochastic Gradient Descent to update the model based on each new batch of
+ * incoming data from a DStream (see `LogisticRegressionWithSGD` for model equation)
+ *
+ * Each batch of data is assumed to be an RDD of LabeledPoints.
+ * The number of data points per batch can vary, but the number
+ * of features must be constant. An initial weight
+ * vector must be provided.
+ *
+ * Use a builder pattern to construct a streaming logistic regression
+ * analysis in an application, like:
+ *
+ * {{{
+ *  val model = new StreamingLogisticRegressionWithSGD()
+ *    .setStepSize(0.5)
+ *    .setNumIterations(10)
+ *    .setInitialWeights(Vectors.dense(...))
+ *    .trainOn(DStream)
+ * }}}
+ */
+@Experimental
+class StreamingLogisticRegressionWithSGD private[mllib] (
+    private var stepSize: Double,
+    private var numIterations: Int,
+    private var miniBatchFraction: Double,
+    private var regParam: Double)
+  extends StreamingLinearAlgorithm[LogisticRegressionModel, LogisticRegressionWithSGD]
+  with Serializable {
+
+  /**
+   * Construct a StreamingLogisticRegression object with default parameters:
+   * {stepSize: 0.1, numIterations: 50, miniBatchFraction: 1.0, regParam: 0.0}.
+   * Initial weights must be set before using trainOn or predictOn
+   * (see `StreamingLinearAlgorithm`)
+   */
+  def this() = this(0.1, 50, 1.0, 0.0)
+
+  protected val algorithm = new LogisticRegressionWithSGD(
+    stepSize, numIterations, regParam, miniBatchFraction)
+
+  /** Set the step size for gradient descent. Default: 0.1. */
+  def setStepSize(stepSize: Double): this.type = {
+    this.algorithm.optimizer.setStepSize(stepSize)
+    this
+  }
+
+  /** Set the number of iterations of gradient descent to run per update. Default: 50. */
+  def setNumIterations(numIterations: Int): this.type = {
+    this.algorithm.optimizer.setNumIterations(numIterations)
+    this
+  }
+
+  /** Set the fraction of each batch to use for updates. Default: 1.0. */
+  def setMiniBatchFraction(miniBatchFraction: Double): this.type = {
+    this.algorithm.optimizer.setMiniBatchFraction(miniBatchFraction)
+    this
+  }
+
+  /** Set the regularization parameter. Default: 0.0. */
+  def setRegParam(regParam: Double): this.type = {
+    this.algorithm.optimizer.setRegParam(regParam)
+    this
+  }
+
+  /** Set the initial weights. Default: [0.0, 0.0]. */
+  def setInitialWeights(initialWeights: Vector): this.type = {
+    this.model = Some(algorithm.createModel(initialWeights, 0.0))
+    this
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
new file mode 100644
index 0000000000000..8d600572ed7f3
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.classification.impl
+
+import org.apache.spark.SparkContext
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.util.Loader
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+
+/**
+ * Helper class for import/export of GLM classification models.
+ */
+private[classification] object GLMClassificationModel {
+
+  object SaveLoadV1_0 {
+
+    def thisFormatVersion = "1.0"
+
+    /** Model data for import/export */
+    case class Data(weights: Vector, intercept: Double, threshold: Option[Double])
+
+    /**
+     * Helper method for saving GLM classification model metadata and data.
+     * @param modelClass  String name for model class, to be saved with metadata
+     * @param numClasses  Number of classes label can take, to be saved with metadata
+     */
+    def save(
+        sc: SparkContext,
+        path: String,
+        modelClass: String,
+        numFeatures: Int,
+        numClasses: Int,
+        weights: Vector,
+        intercept: Double,
+        threshold: Option[Double]): Unit = {
+      val sqlContext = new SQLContext(sc)
+      import sqlContext.implicits._
+
+      // Create JSON metadata.
+      val metadataRDD =
+        sc.parallelize(Seq((modelClass, thisFormatVersion, numFeatures, numClasses)), 1)
+          .toDataFrame("class", "version", "numFeatures", "numClasses")
+      metadataRDD.toJSON.saveAsTextFile(Loader.metadataPath(path))
+
+      // Create Parquet data.
+      val data = Data(weights, intercept, threshold)
+      val dataRDD: DataFrame = sc.parallelize(Seq(data), 1)
+      // TODO: repartition with 1 partition after SPARK-5532 gets fixed
+      dataRDD.saveAsParquetFile(Loader.dataPath(path))
+    }
+
+    /**
+     * Helper method for loading GLM classification model data.
+     *
+     * NOTE: Callers of this method should check numClasses, numFeatures on their own.
+     *
+     * @param modelClass  String name for model class (used for error messages)
+     */
+    def loadData(sc: SparkContext, path: String, modelClass: String): Data = {
+      val datapath = Loader.dataPath(path)
+      val sqlContext = new SQLContext(sc)
+      val dataRDD = sqlContext.parquetFile(datapath)
+      val dataArray = dataRDD.select("weights", "intercept", "threshold").take(1)
+      assert(dataArray.size == 1, s"Unable to load $modelClass data from: $datapath")
+      val data = dataArray(0)
+      assert(data.size == 3, s"Unable to load $modelClass data from: $datapath")
+      val (weights, intercept) = data match {
+        case Row(weights: Vector, intercept: Double, _) =>
+          (weights, intercept)
+      }
+      val threshold = if (data.isNullAt(2)) {
+        None
+      } else {
+        Some(data.getDouble(2))
+      }
+      Data(weights, intercept, threshold)
+    }
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
new file mode 100644
index 0000000000000..0be3014de862e
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@@ -0,0 +1,258 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.clustering
+
+import scala.collection.mutable.IndexedSeq
+
+import breeze.linalg.{DenseMatrix => BreezeMatrix, DenseVector => BreezeVector, Transpose, diag}
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix, DenseVector, Matrices, Vector, Vectors}
+import org.apache.spark.mllib.stat.distribution.MultivariateGaussian
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
+
+/**
+ * :: Experimental ::
+ *
+ * This class performs expectation maximization for multivariate Gaussian
+ * Mixture Models (GMMs).  A GMM represents a composite distribution of
+ * independent Gaussian distributions with associated "mixing" weights
+ * specifying each's contribution to the composite.
+ *
+ * Given a set of sample points, this class will maximize the log-likelihood 
+ * for a mixture of k Gaussians, iterating until the log-likelihood changes by 
+ * less than convergenceTol, or until it has reached the max number of iterations.
+ * While this process is generally guaranteed to converge, it is not guaranteed
+ * to find a global optimum.  
+ * 
+ * @param k The number of independent Gaussians in the mixture model
+ * @param convergenceTol The maximum change in log-likelihood at which convergence
+ * is considered to have occurred.
+ * @param maxIterations The maximum number of iterations to perform
+ */
+@Experimental
+class GaussianMixture private (
+    private var k: Int, 
+    private var convergenceTol: Double, 
+    private var maxIterations: Int,
+    private var seed: Long) extends Serializable {
+  
+  /**
+   * Constructs a default instance. The default parameters are {k: 2, convergenceTol: 0.01,
+   * maxIterations: 100, seed: random}.
+   */
+  def this() = this(2, 0.01, 100, Utils.random.nextLong())
+  
+  // number of samples per cluster to use when initializing Gaussians
+  private val nSamples = 5
+  
+  // an initializing GMM can be provided rather than using the 
+  // default random starting point
+  private var initialModel: Option[GaussianMixtureModel] = None
+  
+  /** Set the initial GMM starting point, bypassing the random initialization.
+   *  You must call setK() prior to calling this method, and the condition
+   *  (model.k == this.k) must be met; failure will result in an IllegalArgumentException
+   */
+  def setInitialModel(model: GaussianMixtureModel): this.type = {
+    if (model.k == k) {
+      initialModel = Some(model)
+    } else {
+      throw new IllegalArgumentException("mismatched cluster count (model.k != k)")
+    }
+    this
+  }
+  
+  /** Return the user supplied initial GMM, if supplied */
+  def getInitialModel: Option[GaussianMixtureModel] = initialModel
+  
+  /** Set the number of Gaussians in the mixture model.  Default: 2 */
+  def setK(k: Int): this.type = {
+    this.k = k
+    this
+  }
+  
+  /** Return the number of Gaussians in the mixture model */
+  def getK: Int = k
+  
+  /** Set the maximum number of iterations to run. Default: 100 */
+  def setMaxIterations(maxIterations: Int): this.type = {
+    this.maxIterations = maxIterations
+    this
+  }
+  
+  /** Return the maximum number of iterations to run */
+  def getMaxIterations: Int = maxIterations
+  
+  /**
+   * Set the largest change in log-likelihood at which convergence is 
+   * considered to have occurred.
+   */
+  def setConvergenceTol(convergenceTol: Double): this.type = {
+    this.convergenceTol = convergenceTol
+    this
+  }
+  
+  /**
+   * Return the largest change in log-likelihood at which convergence is
+   * considered to have occurred.
+   */
+  def getConvergenceTol: Double = convergenceTol
+
+  /** Set the random seed */
+  def setSeed(seed: Long): this.type = {
+    this.seed = seed
+    this
+  }
+
+  /** Return the random seed */
+  def getSeed: Long = seed
+
+  /** Perform expectation maximization */
+  def run(data: RDD[Vector]): GaussianMixtureModel = {
+    val sc = data.sparkContext
+    
+    // we will operate on the data as breeze data
+    val breezeData = data.map(u => u.toBreeze.toDenseVector).cache()
+    
+    // Get length of the input vectors
+    val d = breezeData.first().length
+    
+    // Determine initial weights and corresponding Gaussians.
+    // If the user supplied an initial GMM, we use those values, otherwise
+    // we start with uniform weights, a random mean from the data, and
+    // diagonal covariance matrices using component variances
+    // derived from the samples    
+    val (weights, gaussians) = initialModel match {
+      case Some(gmm) => (gmm.weights, gmm.gaussians)
+      
+      case None => {
+        val samples = breezeData.takeSample(withReplacement = true, k * nSamples, seed)
+        (Array.fill(k)(1.0 / k), Array.tabulate(k) { i => 
+          val slice = samples.view(i * nSamples, (i + 1) * nSamples)
+          new MultivariateGaussian(vectorMean(slice), initCovariance(slice)) 
+        })  
+      }
+    }
+    
+    var llh = Double.MinValue // current log-likelihood 
+    var llhp = 0.0            // previous log-likelihood
+    
+    var iter = 0
+    while(iter < maxIterations && Math.abs(llh-llhp) > convergenceTol) {
+      // create and broadcast curried cluster contribution function
+      val compute = sc.broadcast(ExpectationSum.add(weights, gaussians)_)
+      
+      // aggregate the cluster contribution for all sample points
+      val sums = breezeData.aggregate(ExpectationSum.zero(k, d))(compute.value, _ += _)
+      
+      // Create new distributions based on the partial assignments
+      // (often referred to as the "M" step in literature)
+      val sumWeights = sums.weights.sum
+      var i = 0
+      while (i < k) {
+        val mu = sums.means(i) / sums.weights(i)
+        BLAS.syr(-sums.weights(i), Vectors.fromBreeze(mu).asInstanceOf[DenseVector],
+          Matrices.fromBreeze(sums.sigmas(i)).asInstanceOf[DenseMatrix])
+        weights(i) = sums.weights(i) / sumWeights
+        gaussians(i) = new MultivariateGaussian(mu, sums.sigmas(i) / sums.weights(i))
+        i = i + 1
+      }
+   
+      llhp = llh // current becomes previous
+      llh = sums.logLikelihood // this is the freshly computed log-likelihood
+      iter += 1
+    } 
+    
+    new GaussianMixtureModel(weights, gaussians)
+  }
+    
+  /** Average of dense breeze vectors */
+  private def vectorMean(x: IndexedSeq[BreezeVector[Double]]): BreezeVector[Double] = {
+    val v = BreezeVector.zeros[Double](x(0).length)
+    x.foreach(xi => v += xi)
+    v / x.length.toDouble 
+  }
+  
+  /**
+   * Construct matrix where diagonal entries are element-wise
+   * variance of input vectors (computes biased variance)
+   */
+  private def initCovariance(x: IndexedSeq[BreezeVector[Double]]): BreezeMatrix[Double] = {
+    val mu = vectorMean(x)
+    val ss = BreezeVector.zeros[Double](x(0).length)
+    x.map(xi => (xi - mu) :^ 2.0).foreach(u => ss += u)
+    diag(ss / x.length.toDouble)
+  }
+}
+
+// companion class to provide zero constructor for ExpectationSum
+private object ExpectationSum {
+  def zero(k: Int, d: Int): ExpectationSum = {
+    new ExpectationSum(0.0, Array.fill(k)(0.0), 
+      Array.fill(k)(BreezeVector.zeros(d)), Array.fill(k)(BreezeMatrix.zeros(d,d)))
+  }
+  
+  // compute cluster contributions for each input point
+  // (U, T) => U for aggregation
+  def add(
+      weights: Array[Double], 
+      dists: Array[MultivariateGaussian])
+      (sums: ExpectationSum, x: BreezeVector[Double]): ExpectationSum = {
+    val p = weights.zip(dists).map {
+      case (weight, dist) => MLUtils.EPSILON + weight * dist.pdf(x)
+    }
+    val pSum = p.sum
+    sums.logLikelihood += math.log(pSum)
+    val xxt = x * new Transpose(x)
+    var i = 0
+    while (i < sums.k) {
+      p(i) /= pSum
+      sums.weights(i) += p(i)
+      sums.means(i) += x * p(i)
+      BLAS.syr(p(i), Vectors.fromBreeze(x).asInstanceOf[DenseVector],
+        Matrices.fromBreeze(sums.sigmas(i)).asInstanceOf[DenseMatrix])
+      i = i + 1
+    }
+    sums
+  }  
+}
+
+// Aggregation class for partial expectation results
+private class ExpectationSum(
+    var logLikelihood: Double,
+    val weights: Array[Double],
+    val means: Array[BreezeVector[Double]],
+    val sigmas: Array[BreezeMatrix[Double]]) extends Serializable {
+  
+  val k = weights.length
+  
+  def +=(x: ExpectationSum): ExpectationSum = {
+    var i = 0
+    while (i < k) {
+      weights(i) += x.weights(i)
+      means(i) += x.means(i)
+      sigmas(i) += x.sigmas(i)
+      i = i + 1
+    }
+    logLikelihood += x.logLikelihood
+    this
+  }  
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
new file mode 100644
index 0000000000000..af6f83c74bb40
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.clustering
+
+import breeze.linalg.{DenseVector => BreezeVector}
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.stat.distribution.MultivariateGaussian
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.rdd.RDD
+
+/**
+ * :: Experimental ::
+ *
+ * Multivariate Gaussian Mixture Model (GMM) consisting of k Gaussians, where points 
+ * are drawn from each Gaussian i=1..k with probability w(i); mu(i) and sigma(i) are 
+ * the respective mean and covariance for each Gaussian distribution i=1..k. 
+ * 
+ * @param weight Weights for each Gaussian distribution in the mixture, where weight(i) is
+ *               the weight for Gaussian i, and weight.sum == 1
+ * @param mu Means for each Gaussian in the mixture, where mu(i) is the mean for Gaussian i
+ * @param sigma Covariance maxtrix for each Gaussian in the mixture, where sigma(i) is the
+ *              covariance matrix for Gaussian i
+ */
+@Experimental
+class GaussianMixtureModel(
+  val weights: Array[Double], 
+  val gaussians: Array[MultivariateGaussian]) extends Serializable {
+  
+  require(weights.length == gaussians.length, "Length of weight and Gaussian arrays must match")
+  
+  /** Number of gaussians in mixture */
+  def k: Int = weights.length
+
+  /** Maps given points to their cluster indices. */
+  def predict(points: RDD[Vector]): RDD[Int] = {
+    val responsibilityMatrix = predictSoft(points)
+    responsibilityMatrix.map(r => r.indexOf(r.max))
+  }
+  
+  /**
+   * Given the input vectors, return the membership value of each vector
+   * to all mixture components. 
+   */
+  def predictSoft(points: RDD[Vector]): RDD[Array[Double]] = {
+    val sc = points.sparkContext
+    val bcDists = sc.broadcast(gaussians)
+    val bcWeights = sc.broadcast(weights)
+    points.map { x => 
+      computeSoftAssignments(x.toBreeze.toDenseVector, bcDists.value, bcWeights.value, k)
+    }
+  }
+  
+  /**
+   * Compute the partial assignments for each vector
+   */
+  private def computeSoftAssignments(
+      pt: BreezeVector[Double],
+      dists: Array[MultivariateGaussian],
+      weights: Array[Double],
+      k: Int): Array[Double] = {
+    val p = weights.zip(dists).map {
+      case (weight, dist) => MLUtils.EPSILON + weight * dist.pdf(pt)
+    }
+    val pSum = p.sum 
+    for (i <- 0 until k) {
+      p(i) /= pSum
+    }
+    p
+  }  
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index 7443f232ec3e7..11633e8242313 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -19,15 +19,14 @@ package org.apache.spark.mllib.clustering
 
 import scala.collection.mutable.ArrayBuffer
 
-import breeze.linalg.{DenseVector => BDV, Vector => BV, norm => breezeNorm}
-
-import org.apache.spark.annotation.Experimental
 import org.apache.spark.Logging
-import org.apache.spark.SparkContext._
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.linalg.BLAS.{axpy, scal}
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
 import org.apache.spark.util.random.XORShiftRandom
 
 /**
@@ -44,13 +43,14 @@ class KMeans private (
     private var runs: Int,
     private var initializationMode: String,
     private var initializationSteps: Int,
-    private var epsilon: Double) extends Serializable with Logging {
+    private var epsilon: Double,
+    private var seed: Long) extends Serializable with Logging {
 
   /**
    * Constructs a KMeans instance with default parameters: {k: 2, maxIterations: 20, runs: 1,
-   * initializationMode: "k-means||", initializationSteps: 5, epsilon: 1e-4}.
+   * initializationMode: "k-means||", initializationSteps: 5, epsilon: 1e-4, seed: random}.
    */
-  def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 5, 1e-4)
+  def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 5, 1e-4, Utils.random.nextLong())
 
   /** Set the number of clusters to create (k). Default: 2. */
   def setK(k: Int): this.type = {
@@ -113,14 +113,11 @@ class KMeans private (
     this
   }
 
-  /** Whether a warning should be logged if the input RDD is uncached. */
-  private var warnOnUncachedInput = true
-
-  /** Disable warnings about uncached input. */
-  private[spark] def disableUncachedWarning(): this.type = {
-    warnOnUncachedInput = false
+  /** Set the random seed for cluster initialization. */
+  def setSeed(seed: Long): this.type = {
+    this.seed = seed
     this
-  }  
+  }
 
   /**
    * Train a K-means model on the given set of points; `data` should be cached for high
@@ -128,22 +125,22 @@ class KMeans private (
    */
   def run(data: RDD[Vector]): KMeansModel = {
 
-    if (warnOnUncachedInput && data.getStorageLevel == StorageLevel.NONE) {
+    if (data.getStorageLevel == StorageLevel.NONE) {
       logWarning("The input data is not directly cached, which may hurt performance if its"
         + " parent RDDs are also uncached.")
     }
 
     // Compute squared norms and cache them.
-    val norms = data.map(v => breezeNorm(v.toBreeze, 2.0))
+    val norms = data.map(Vectors.norm(_, 2.0))
     norms.persist()
-    val breezeData = data.map(_.toBreeze).zip(norms).map { case (v, norm) =>
-      new BreezeVectorWithNorm(v, norm)
+    val zippedData = data.zip(norms).map { case (v, norm) =>
+      new VectorWithNorm(v, norm)
     }
-    val model = runBreeze(breezeData)
+    val model = runAlgorithm(zippedData)
     norms.unpersist()
 
     // Warn at the end of the run as well, for increased visibility.
-    if (warnOnUncachedInput && data.getStorageLevel == StorageLevel.NONE) {
+    if (data.getStorageLevel == StorageLevel.NONE) {
       logWarning("The input data was not directly cached, which may hurt performance if its"
         + " parent RDDs are also uncached.")
     }
@@ -151,9 +148,9 @@ class KMeans private (
   }
 
   /**
-   * Implementation of K-Means using breeze.
+   * Implementation of K-Means algorithm.
    */
-  private def runBreeze(data: RDD[BreezeVectorWithNorm]): KMeansModel = {
+  private def runAlgorithm(data: RDD[VectorWithNorm]): KMeansModel = {
 
     val sc = data.sparkContext
 
@@ -179,9 +176,10 @@ class KMeans private (
 
     // Execute iterations of Lloyd's algorithm until all runs have converged
     while (iteration < maxIterations && !activeRuns.isEmpty) {
-      type WeightedPoint = (BV[Double], Long)
-      def mergeContribs(p1: WeightedPoint, p2: WeightedPoint): WeightedPoint = {
-        (p1._1 += p2._1, p1._2 + p2._2)
+      type WeightedPoint = (Vector, Long)
+      def mergeContribs(x: WeightedPoint, y: WeightedPoint): WeightedPoint = {
+        axpy(1.0, x._1, y._1)
+        (y._1, x._2 + y._2)
       }
 
       val activeCenters = activeRuns.map(r => centers(r)).toArray
@@ -194,16 +192,17 @@ class KMeans private (
         val thisActiveCenters = bcActiveCenters.value
         val runs = thisActiveCenters.length
         val k = thisActiveCenters(0).length
-        val dims = thisActiveCenters(0)(0).vector.length
+        val dims = thisActiveCenters(0)(0).vector.size
 
-        val sums = Array.fill(runs, k)(BDV.zeros[Double](dims).asInstanceOf[BV[Double]])
+        val sums = Array.fill(runs, k)(Vectors.zeros(dims))
         val counts = Array.fill(runs, k)(0L)
 
         points.foreach { point =>
           (0 until runs).foreach { i =>
             val (bestCenter, cost) = KMeans.findClosest(thisActiveCenters(i), point)
             costAccums(i) += cost
-            sums(i)(bestCenter) += point.vector
+            val sum = sums(i)(bestCenter)
+            axpy(1.0, point.vector, sum)
             counts(i)(bestCenter) += 1
           }
         }
@@ -221,8 +220,8 @@ class KMeans private (
         while (j < k) {
           val (sum, count) = totalContribs((i, j))
           if (count != 0) {
-            sum /= count.toDouble
-            val newCenter = new BreezeVectorWithNorm(sum)
+            scal(1.0 / count, sum)
+            val newCenter = new VectorWithNorm(sum)
             if (KMeans.fastSquaredDistance(newCenter, centers(run)(j)) > epsilon * epsilon) {
               changed = true
             }
@@ -254,18 +253,18 @@ class KMeans private (
 
     logInfo(s"The cost for the best run is $minCost.")
 
-    new KMeansModel(centers(bestRun).map(c => Vectors.fromBreeze(c.vector)))
+    new KMeansModel(centers(bestRun).map(_.vector))
   }
 
   /**
    * Initialize `runs` sets of cluster centers at random.
    */
-  private def initRandom(data: RDD[BreezeVectorWithNorm])
-  : Array[Array[BreezeVectorWithNorm]] = {
+  private def initRandom(data: RDD[VectorWithNorm])
+  : Array[Array[VectorWithNorm]] = {
     // Sample all the cluster centers in one pass to avoid repeated scans
-    val sample = data.takeSample(true, runs * k, new XORShiftRandom().nextInt()).toSeq
+    val sample = data.takeSample(true, runs * k, new XORShiftRandom(this.seed).nextInt()).toSeq
     Array.tabulate(runs)(r => sample.slice(r * k, (r + 1) * k).map { v =>
-      new BreezeVectorWithNorm(v.vector.toDenseVector, v.norm)
+      new VectorWithNorm(Vectors.dense(v.vector.toArray), v.norm)
     }.toArray)
   }
 
@@ -278,47 +277,83 @@ class KMeans private (
    *
    * The original paper can be found at http://theory.stanford.edu/~sergei/papers/vldb12-kmpar.pdf.
    */
-  private def initKMeansParallel(data: RDD[BreezeVectorWithNorm])
-  : Array[Array[BreezeVectorWithNorm]] = {
-    // Initialize each run's center to a random point
-    val seed = new XORShiftRandom().nextInt()
+  private def initKMeansParallel(data: RDD[VectorWithNorm])
+  : Array[Array[VectorWithNorm]] = {
+    // Initialize empty centers and point costs.
+    val centers = Array.tabulate(runs)(r => ArrayBuffer.empty[VectorWithNorm])
+    var costs = data.map(_ => Vectors.dense(Array.fill(runs)(Double.PositiveInfinity))).cache()
+
+    // Initialize each run's first center to a random point.
+    val seed = new XORShiftRandom(this.seed).nextInt()
     val sample = data.takeSample(true, runs, seed).toSeq
-    val centers = Array.tabulate(runs)(r => ArrayBuffer(sample(r).toDense))
+    val newCenters = Array.tabulate(runs)(r => ArrayBuffer(sample(r).toDense))
+
+    /** Merges new centers to centers. */
+    def mergeNewCenters(): Unit = {
+      var r = 0
+      while (r < runs) {
+        centers(r) ++= newCenters(r)
+        newCenters(r).clear()
+        r += 1
+      }
+    }
 
     // On each step, sample 2 * k points on average for each run with probability proportional
-    // to their squared distance from that run's current centers
+    // to their squared distance from that run's centers. Note that only distances between points
+    // and new centers are computed in each iteration.
     var step = 0
     while (step < initializationSteps) {
-      val bcCenters = data.context.broadcast(centers)
-      val sumCosts = data.flatMap { point =>
-        (0 until runs).map { r =>
-          (r, KMeans.pointCost(bcCenters.value(r), point))
-        }
-      }.reduceByKey(_ + _).collectAsMap()
-      val chosen = data.mapPartitionsWithIndex { (index, points) =>
+      val bcNewCenters = data.context.broadcast(newCenters)
+      val preCosts = costs
+      costs = data.zip(preCosts).map { case (point, cost) =>
+        Vectors.dense(
+          Array.tabulate(runs) { r =>
+            math.min(KMeans.pointCost(bcNewCenters.value(r), point), cost(r))
+          })
+      }.cache()
+      val sumCosts = costs
+        .aggregate(Vectors.zeros(runs))(
+          seqOp = (s, v) => {
+            // s += v
+            axpy(1.0, v, s)
+            s
+          },
+          combOp = (s0, s1) => {
+            // s0 += s1
+            axpy(1.0, s1, s0)
+            s0
+          }
+        )
+      preCosts.unpersist(blocking = false)
+      val chosen = data.zip(costs).mapPartitionsWithIndex { (index, pointsWithCosts) =>
         val rand = new XORShiftRandom(seed ^ (step << 16) ^ index)
-        points.flatMap { p =>
-          (0 until runs).filter { r =>
-            rand.nextDouble() < 2.0 * KMeans.pointCost(bcCenters.value(r), p) * k / sumCosts(r)
-          }.map((_, p))
+        pointsWithCosts.flatMap { case (p, c) =>
+          val rs = (0 until runs).filter { r =>
+            rand.nextDouble() < 2.0 * c(r) * k / sumCosts(r)
+          }
+          if (rs.length > 0) Some(p, rs) else None
         }
       }.collect()
-      chosen.foreach { case (r, p) =>
-        centers(r) += p.toDense
+      mergeNewCenters()
+      chosen.foreach { case (p, rs) =>
+        rs.foreach(newCenters(_) += p.toDense)
       }
       step += 1
     }
 
+    mergeNewCenters()
+    costs.unpersist(blocking = false)
+
     // Finally, we might have a set of more than k candidate centers for each run; weigh each
     // candidate by the number of points in the dataset mapping to it and run a local k-means++
     // on the weighted centers to pick just k of them
     val bcCenters = data.context.broadcast(centers)
     val weightMap = data.flatMap { p =>
-      (0 until runs).map { r =>
+      Iterator.tabulate(runs) { r =>
         ((r, KMeans.findClosest(bcCenters.value(r), p)._1), 1.0)
       }
     }.reduceByKey(_ + _).collectAsMap()
-    val finalCenters = (0 until runs).map { r =>
+    val finalCenters = (0 until runs).par.map { r =>
       val myCenters = centers(r).toArray
       val myWeights = (0 until myCenters.length).map(i => weightMap.getOrElse((r, i), 0.0)).toArray
       LocalKMeans.kMeansPlusPlus(r, myCenters, myWeights, k, 30)
@@ -341,7 +376,32 @@ object KMeans {
   /**
    * Trains a k-means model using the given set of parameters.
    *
-   * @param data training points stored as `RDD[Array[Double]]`
+   * @param data training points stored as `RDD[Vector]`
+   * @param k number of clusters
+   * @param maxIterations max number of iterations
+   * @param runs number of parallel runs, defaults to 1. The best model is returned.
+   * @param initializationMode initialization model, either "random" or "k-means||" (default).
+   * @param seed random seed value for cluster initialization
+   */
+  def train(
+      data: RDD[Vector],
+      k: Int,
+      maxIterations: Int,
+      runs: Int,
+      initializationMode: String,
+      seed: Long): KMeansModel = {
+    new KMeans().setK(k)
+      .setMaxIterations(maxIterations)
+      .setRuns(runs)
+      .setInitializationMode(initializationMode)
+      .setSeed(seed)
+      .run(data)
+  }
+
+  /**
+   * Trains a k-means model using the given set of parameters.
+   *
+   * @param data training points stored as `RDD[Vector]`
    * @param k number of clusters
    * @param maxIterations max number of iterations
    * @param runs number of parallel runs, defaults to 1. The best model is returned.
@@ -385,8 +445,8 @@ object KMeans {
    * Returns the index of the closest center to the given point, as well as the squared distance.
    */
   private[mllib] def findClosest(
-      centers: TraversableOnce[BreezeVectorWithNorm],
-      point: BreezeVectorWithNorm): (Int, Double) = {
+      centers: TraversableOnce[VectorWithNorm],
+      point: VectorWithNorm): (Int, Double) = {
     var bestDistance = Double.PositiveInfinity
     var bestIndex = 0
     var i = 0
@@ -411,8 +471,8 @@ object KMeans {
    * Returns the K-means cost of a given point against the given cluster centers.
    */
   private[mllib] def pointCost(
-      centers: TraversableOnce[BreezeVectorWithNorm],
-      point: BreezeVectorWithNorm): Double =
+      centers: TraversableOnce[VectorWithNorm],
+      point: VectorWithNorm): Double =
     findClosest(centers, point)._2
 
   /**
@@ -420,26 +480,24 @@ object KMeans {
    * [[org.apache.spark.mllib.util.MLUtils#fastSquaredDistance]].
    */
   private[clustering] def fastSquaredDistance(
-      v1: BreezeVectorWithNorm,
-      v2: BreezeVectorWithNorm): Double = {
+      v1: VectorWithNorm,
+      v2: VectorWithNorm): Double = {
     MLUtils.fastSquaredDistance(v1.vector, v1.norm, v2.vector, v2.norm)
   }
 }
 
 /**
- * A breeze vector with its norm for fast distance computation.
+ * A vector with its norm for fast distance computation.
  *
  * @see [[org.apache.spark.mllib.clustering.KMeans#fastSquaredDistance]]
  */
 private[clustering]
-class BreezeVectorWithNorm(val vector: BV[Double], val norm: Double) extends Serializable {
-
-  def this(vector: BV[Double]) = this(vector, breezeNorm(vector, 2.0))
+class VectorWithNorm(val vector: Vector, val norm: Double) extends Serializable {
 
-  def this(array: Array[Double]) = this(new BDV[Double](array))
+  def this(vector: Vector) = this(vector, Vectors.norm(vector, 2.0))
 
-  def this(v: Vector) = this(v.toBreeze)
+  def this(array: Array[Double]) = this(Vectors.dense(array))
 
   /** Converts the vector to a dense vector. */
-  def toDense = new BreezeVectorWithNorm(vector.toDenseVector, norm)
+  def toDense = new VectorWithNorm(Vectors.dense(vector.toArray), norm)
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index 12a3d91cd31a6..3b95a9e6936e8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -32,14 +32,14 @@ class KMeansModel (val clusterCenters: Array[Vector]) extends Serializable {
 
   /** Returns the cluster index that a given point belongs to. */
   def predict(point: Vector): Int = {
-    KMeans.findClosest(clusterCentersWithNorm, new BreezeVectorWithNorm(point))._1
+    KMeans.findClosest(clusterCentersWithNorm, new VectorWithNorm(point))._1
   }
 
   /** Maps given points to their cluster indices. */
   def predict(points: RDD[Vector]): RDD[Int] = {
     val centersWithNorm = clusterCentersWithNorm
     val bcCentersWithNorm = points.context.broadcast(centersWithNorm)
-    points.map(p => KMeans.findClosest(bcCentersWithNorm.value, new BreezeVectorWithNorm(p))._1)
+    points.map(p => KMeans.findClosest(bcCentersWithNorm.value, new VectorWithNorm(p))._1)
   }
 
   /** Maps given points to their cluster indices. */
@@ -53,9 +53,9 @@ class KMeansModel (val clusterCenters: Array[Vector]) extends Serializable {
   def computeCost(data: RDD[Vector]): Double = {
     val centersWithNorm = clusterCentersWithNorm
     val bcCentersWithNorm = data.context.broadcast(centersWithNorm)
-    data.map(p => KMeans.pointCost(bcCentersWithNorm.value, new BreezeVectorWithNorm(p))).sum()
+    data.map(p => KMeans.pointCost(bcCentersWithNorm.value, new VectorWithNorm(p))).sum()
   }
 
-  private def clusterCentersWithNorm: Iterable[BreezeVectorWithNorm] =
-    clusterCenters.map(new BreezeVectorWithNorm(_))
+  private def clusterCentersWithNorm: Iterable[VectorWithNorm] =
+    clusterCenters.map(new VectorWithNorm(_))
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
new file mode 100644
index 0000000000000..d8f82867a09d2
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
@@ -0,0 +1,519 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.clustering
+
+import java.util.Random
+
+import breeze.linalg.{DenseVector => BDV, normalize, axpy => brzAxpy}
+
+import org.apache.spark.Logging
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.JavaPairRDD
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.impl.GraphImpl
+import org.apache.spark.mllib.impl.PeriodicGraphCheckpointer
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
+
+
+/**
+ * :: Experimental ::
+ *
+ * Latent Dirichlet Allocation (LDA), a topic model designed for text documents.
+ *
+ * Terminology:
+ *  - "word" = "term": an element of the vocabulary
+ *  - "token": instance of a term appearing in a document
+ *  - "topic": multinomial distribution over words representing some concept
+ *
+ * Currently, the underlying implementation uses Expectation-Maximization (EM), implemented
+ * according to the Asuncion et al. (2009) paper referenced below.
+ *
+ * References:
+ *  - Original LDA paper (journal version):
+ *    Blei, Ng, and Jordan.  "Latent Dirichlet Allocation."  JMLR, 2003.
+ *     - This class implements their "smoothed" LDA model.
+ *  - Paper which clearly explains several algorithms, including EM:
+ *    Asuncion, Welling, Smyth, and Teh.
+ *    "On Smoothing and Inference for Topic Models."  UAI, 2009.
+ */
+@Experimental
+class LDA private (
+    private var k: Int,
+    private var maxIterations: Int,
+    private var docConcentration: Double,
+    private var topicConcentration: Double,
+    private var seed: Long,
+    private var checkpointDir: Option[String],
+    private var checkpointInterval: Int) extends Logging {
+
+  def this() = this(k = 10, maxIterations = 20, docConcentration = -1, topicConcentration = -1,
+    seed = Utils.random.nextLong(), checkpointDir = None, checkpointInterval = 10)
+
+  /**
+   * Number of topics to infer.  I.e., the number of soft cluster centers.
+   */
+  def getK: Int = k
+
+  /**
+   * Number of topics to infer.  I.e., the number of soft cluster centers.
+   * (default = 10)
+   */
+  def setK(k: Int): this.type = {
+    require(k > 0, s"LDA k (number of clusters) must be > 0, but was set to $k")
+    this.k = k
+    this
+  }
+
+  /**
+   * Concentration parameter (commonly named "alpha") for the prior placed on documents'
+   * distributions over topics ("theta").
+   *
+   * This is the parameter to a symmetric Dirichlet distribution.
+   */
+  def getDocConcentration: Double = {
+    if (this.docConcentration == -1) {
+      (50.0 / k) + 1.0
+    } else {
+      this.docConcentration
+    }
+  }
+
+  /**
+   * Concentration parameter (commonly named "alpha") for the prior placed on documents'
+   * distributions over topics ("theta").
+   *
+   * This is the parameter to a symmetric Dirichlet distribution.
+   *
+   * This value should be > 1.0, where larger values mean more smoothing (more regularization).
+   * If set to -1, then docConcentration is set automatically.
+   *  (default = -1 = automatic)
+   *
+   * Automatic setting of parameter:
+   *  - For EM: default = (50 / k) + 1.
+   *     - The 50/k is common in LDA libraries.
+   *     - The +1 follows Asuncion et al. (2009), who recommend a +1 adjustment for EM.
+   *
+   * Note: The restriction > 1.0 may be relaxed in the future (allowing sparse solutions),
+   *       but values in (0,1) are not yet supported.
+   */
+  def setDocConcentration(docConcentration: Double): this.type = {
+    require(docConcentration > 1.0 || docConcentration == -1.0,
+      s"LDA docConcentration must be > 1.0 (or -1 for auto), but was set to $docConcentration")
+    this.docConcentration = docConcentration
+    this
+  }
+
+  /** Alias for [[getDocConcentration]] */
+  def getAlpha: Double = getDocConcentration
+
+  /** Alias for [[setDocConcentration()]] */
+  def setAlpha(alpha: Double): this.type = setDocConcentration(alpha)
+
+  /**
+   * Concentration parameter (commonly named "beta" or "eta") for the prior placed on topics'
+   * distributions over terms.
+   *
+   * This is the parameter to a symmetric Dirichlet distribution.
+   *
+   * Note: The topics' distributions over terms are called "beta" in the original LDA paper
+   * by Blei et al., but are called "phi" in many later papers such as Asuncion et al., 2009.
+   */
+  def getTopicConcentration: Double = {
+    if (this.topicConcentration == -1) {
+      1.1
+    } else {
+      this.topicConcentration
+    }
+  }
+
+  /**
+   * Concentration parameter (commonly named "beta" or "eta") for the prior placed on topics'
+   * distributions over terms.
+   *
+   * This is the parameter to a symmetric Dirichlet distribution.
+   *
+   * Note: The topics' distributions over terms are called "beta" in the original LDA paper
+   * by Blei et al., but are called "phi" in many later papers such as Asuncion et al., 2009.
+   *
+   * This value should be > 0.0.
+   * If set to -1, then topicConcentration is set automatically.
+   *  (default = -1 = automatic)
+   *
+   * Automatic setting of parameter:
+   *  - For EM: default = 0.1 + 1.
+   *     - The 0.1 gives a small amount of smoothing.
+   *     - The +1 follows Asuncion et al. (2009), who recommend a +1 adjustment for EM.
+   *
+   * Note: The restriction > 1.0 may be relaxed in the future (allowing sparse solutions),
+   *       but values in (0,1) are not yet supported.
+   */
+  def setTopicConcentration(topicConcentration: Double): this.type = {
+    require(topicConcentration > 1.0 || topicConcentration == -1.0,
+      s"LDA topicConcentration must be > 1.0 (or -1 for auto), but was set to $topicConcentration")
+    this.topicConcentration = topicConcentration
+    this
+  }
+
+  /** Alias for [[getTopicConcentration]] */
+  def getBeta: Double = getTopicConcentration
+
+  /** Alias for [[setTopicConcentration()]] */
+  def setBeta(beta: Double): this.type = setBeta(beta)
+
+  /**
+   * Maximum number of iterations for learning.
+   */
+  def getMaxIterations: Int = maxIterations
+
+  /**
+   * Maximum number of iterations for learning.
+   * (default = 20)
+   */
+  def setMaxIterations(maxIterations: Int): this.type = {
+    this.maxIterations = maxIterations
+    this
+  }
+
+  /** Random seed */
+  def getSeed: Long = seed
+
+  /** Random seed */
+  def setSeed(seed: Long): this.type = {
+    this.seed = seed
+    this
+  }
+
+  /**
+   * Directory for storing checkpoint files during learning.
+   * This is not necessary, but checkpointing helps with recovery (when nodes fail).
+   * It also helps with eliminating temporary shuffle files on disk, which can be important when
+   * LDA is run for many iterations.
+   */
+  def getCheckpointDir: Option[String] = checkpointDir
+
+  /**
+   * Directory for storing checkpoint files during learning.
+   * This is not necessary, but checkpointing helps with recovery (when nodes fail).
+   * It also helps with eliminating temporary shuffle files on disk, which can be important when
+   * LDA is run for many iterations.
+   *
+   * NOTE: If the [[org.apache.spark.SparkContext.checkpointDir]] is already set, then the value
+   *       given to LDA is ignored, and the existing directory is kept.
+   *
+   * (default = None)
+   */
+  def setCheckpointDir(checkpointDir: String): this.type = {
+    this.checkpointDir = Some(checkpointDir)
+    this
+  }
+
+  /**
+   * Clear the directory for storing checkpoint files during learning.
+   * If one is already set in the [[org.apache.spark.SparkContext]], then checkpointing will still
+   * occur; otherwise, no checkpointing will be used.
+   */
+  def clearCheckpointDir(): this.type = {
+    this.checkpointDir = None
+    this
+  }
+
+  /**
+   * Period (in iterations) between checkpoints.
+   * @see [[getCheckpointDir]]
+   */
+  def getCheckpointInterval: Int = checkpointInterval
+
+  /**
+   * Period (in iterations) between checkpoints.
+   * (default = 10)
+   * @see [[getCheckpointDir]]
+   */
+  def setCheckpointInterval(checkpointInterval: Int): this.type = {
+    this.checkpointInterval = checkpointInterval
+    this
+  }
+
+  /**
+   * Learn an LDA model using the given dataset.
+   *
+   * @param documents  RDD of documents, which are term (word) count vectors paired with IDs.
+   *                   The term count vectors are "bags of words" with a fixed-size vocabulary
+   *                   (where the vocabulary size is the length of the vector).
+   *                   Document IDs must be unique and >= 0.
+   * @return  Inferred LDA model
+   */
+  def run(documents: RDD[(Long, Vector)]): DistributedLDAModel = {
+    val state = LDA.initialState(documents, k, getDocConcentration, getTopicConcentration, seed,
+      checkpointDir, checkpointInterval)
+    var iter = 0
+    val iterationTimes = Array.fill[Double](maxIterations)(0)
+    while (iter < maxIterations) {
+      val start = System.nanoTime()
+      state.next()
+      val elapsedSeconds = (System.nanoTime() - start) / 1e9
+      iterationTimes(iter) = elapsedSeconds
+      iter += 1
+    }
+    state.graphCheckpointer.deleteAllCheckpoints()
+    new DistributedLDAModel(state, iterationTimes)
+  }
+
+  /** Java-friendly version of [[run()]] */
+  def run(documents: JavaPairRDD[java.lang.Long, Vector]): DistributedLDAModel = {
+    run(documents.rdd.asInstanceOf[RDD[(Long, Vector)]])
+  }
+}
+
+
+private[clustering] object LDA {
+
+  /*
+    DEVELOPERS NOTE:
+
+    This implementation uses GraphX, where the graph is bipartite with 2 types of vertices:
+     - Document vertices
+        - indexed with unique indices >= 0
+        - Store vectors of length k (# topics).
+     - Term vertices
+        - indexed {-1, -2, ..., -vocabSize}
+        - Store vectors of length k (# topics).
+     - Edges correspond to terms appearing in documents.
+        - Edges are directed Document -> Term.
+        - Edges are partitioned by documents.
+
+    Info on EM implementation.
+     - We follow Section 2.2 from Asuncion et al., 2009.  We use some of their notation.
+     - In this implementation, there is one edge for every unique term appearing in a document,
+       i.e., for every unique (document, term) pair.
+     - Notation:
+        - N_{wkj} = count of tokens of term w currently assigned to topic k in document j
+        - N_{*} where * is missing a subscript w/k/j is the count summed over missing subscript(s)
+        - gamma_{wjk} = P(z_i = k | x_i = w, d_i = j),
+          the probability of term x_i in document d_i having topic z_i.
+     - Data graph
+        - Document vertices store N_{kj}
+        - Term vertices store N_{wk}
+        - Edges store N_{wj}.
+        - Global data N_k
+     - Algorithm
+        - Initial state:
+           - Document and term vertices store random counts N_{wk}, N_{kj}.
+        - E-step: For each (document,term) pair i, compute P(z_i | x_i, d_i).
+           - Aggregate N_k from term vertices.
+           - Compute gamma_{wjk} for each possible topic k, from each triplet.
+             using inputs N_{wk}, N_{kj}, N_k.
+        - M-step: Compute sufficient statistics for hidden parameters phi and theta
+          (counts N_{wk}, N_{kj}, N_k).
+           - Document update:
+              - N_{kj} <- sum_w N_{wj} gamma_{wjk}
+              - N_j <- sum_k N_{kj}  (only needed to output predictions)
+           - Term update:
+              - N_{wk} <- sum_j N_{wj} gamma_{wjk}
+              - N_k <- sum_w N_{wk}
+
+    TODO: Add simplex constraints to allow alpha in (0,1).
+          See: Vorontsov and Potapenko. "Tutorial on Probabilistic Topic Modeling : Additive
+               Regularization for Stochastic Matrix Factorization." 2014.
+   */
+
+  /**
+   * Vector over topics (length k) of token counts.
+   * The meaning of these counts can vary, and it may or may not be normalized to be a distribution.
+   */
+  type TopicCounts = BDV[Double]
+
+  type TokenCount = Double
+
+  /** Term vertex IDs are {-1, -2, ..., -vocabSize} */
+  def term2index(term: Int): Long = -(1 + term.toLong)
+
+  def index2term(termIndex: Long): Int = -(1 + termIndex).toInt
+
+  def isDocumentVertex(v: (VertexId, _)): Boolean = v._1 >= 0
+
+  def isTermVertex(v: (VertexId, _)): Boolean = v._1 < 0
+
+  /**
+   * Optimizer for EM algorithm which stores data + parameter graph, plus algorithm parameters.
+   *
+   * @param graph  EM graph, storing current parameter estimates in vertex descriptors and
+   *               data (token counts) in edge descriptors.
+   * @param k  Number of topics
+   * @param vocabSize  Number of unique terms
+   * @param docConcentration  "alpha"
+   * @param topicConcentration  "beta" or "eta"
+   */
+  class EMOptimizer(
+      var graph: Graph[TopicCounts, TokenCount],
+      val k: Int,
+      val vocabSize: Int,
+      val docConcentration: Double,
+      val topicConcentration: Double,
+      checkpointDir: Option[String],
+      checkpointInterval: Int) {
+
+    private[LDA] val graphCheckpointer = new PeriodicGraphCheckpointer[TopicCounts, TokenCount](
+      graph, checkpointDir, checkpointInterval)
+
+    def next(): EMOptimizer = {
+      val eta = topicConcentration
+      val W = vocabSize
+      val alpha = docConcentration
+
+      val N_k = globalTopicTotals
+      val sendMsg: EdgeContext[TopicCounts, TokenCount, (Boolean, TopicCounts)] => Unit =
+        (edgeContext) => {
+          // Compute N_{wj} gamma_{wjk}
+          val N_wj = edgeContext.attr
+          // E-STEP: Compute gamma_{wjk} (smoothed topic distributions), scaled by token count
+          // N_{wj}.
+          val scaledTopicDistribution: TopicCounts =
+            computePTopic(edgeContext.srcAttr, edgeContext.dstAttr, N_k, W, eta, alpha) *= N_wj
+          edgeContext.sendToDst((false, scaledTopicDistribution))
+          edgeContext.sendToSrc((false, scaledTopicDistribution))
+        }
+      // This is a hack to detect whether we could modify the values in-place.
+      // TODO: Add zero/seqOp/combOp option to aggregateMessages. (SPARK-5438)
+      val mergeMsg: ((Boolean, TopicCounts), (Boolean, TopicCounts)) => (Boolean, TopicCounts) =
+        (m0, m1) => {
+          val sum =
+            if (m0._1) {
+              m0._2 += m1._2
+            } else if (m1._1) {
+              m1._2 += m0._2
+            } else {
+              m0._2 + m1._2
+            }
+          (true, sum)
+        }
+      // M-STEP: Aggregation computes new N_{kj}, N_{wk} counts.
+      val docTopicDistributions: VertexRDD[TopicCounts] =
+        graph.aggregateMessages[(Boolean, TopicCounts)](sendMsg, mergeMsg)
+          .mapValues(_._2)
+      // Update the vertex descriptors with the new counts.
+      val newGraph = GraphImpl.fromExistingRDDs(docTopicDistributions, graph.edges)
+      graph = newGraph
+      graphCheckpointer.updateGraph(newGraph)
+      globalTopicTotals = computeGlobalTopicTotals()
+      this
+    }
+
+    /**
+     * Aggregate distributions over topics from all term vertices.
+     *
+     * Note: This executes an action on the graph RDDs.
+     */
+    var globalTopicTotals: TopicCounts = computeGlobalTopicTotals()
+
+    private def computeGlobalTopicTotals(): TopicCounts = {
+      val numTopics = k
+      graph.vertices.filter(isTermVertex).values.fold(BDV.zeros[Double](numTopics))(_ += _)
+    }
+
+  }
+
+  /**
+   * Compute gamma_{wjk}, a distribution over topics k.
+   */
+  private def computePTopic(
+      docTopicCounts: TopicCounts,
+      termTopicCounts: TopicCounts,
+      totalTopicCounts: TopicCounts,
+      vocabSize: Int,
+      eta: Double,
+      alpha: Double): TopicCounts = {
+    val K = docTopicCounts.length
+    val N_j = docTopicCounts.data
+    val N_w = termTopicCounts.data
+    val N = totalTopicCounts.data
+    val eta1 = eta - 1.0
+    val alpha1 = alpha - 1.0
+    val Weta1 = vocabSize * eta1
+    var sum = 0.0
+    val gamma_wj = new Array[Double](K)
+    var k = 0
+    while (k < K) {
+      val gamma_wjk = (N_w(k) + eta1) * (N_j(k) + alpha1) / (N(k) + Weta1)
+      gamma_wj(k) = gamma_wjk
+      sum += gamma_wjk
+      k += 1
+    }
+    // normalize
+    BDV(gamma_wj) /= sum
+  }
+
+  /**
+   * Compute bipartite term/doc graph.
+   */
+  private def initialState(
+      docs: RDD[(Long, Vector)],
+      k: Int,
+      docConcentration: Double,
+      topicConcentration: Double,
+      randomSeed: Long,
+      checkpointDir: Option[String],
+      checkpointInterval: Int): EMOptimizer = {
+    // For each document, create an edge (Document -> Term) for each unique term in the document.
+    val edges: RDD[Edge[TokenCount]] = docs.flatMap { case (docID: Long, termCounts: Vector) =>
+      // Add edges for terms with non-zero counts.
+      termCounts.toBreeze.activeIterator.filter(_._2 != 0.0).map { case (term, cnt) =>
+        Edge(docID, term2index(term), cnt)
+      }
+    }
+
+    val vocabSize = docs.take(1).head._2.size
+
+    // Create vertices.
+    // Initially, we use random soft assignments of tokens to topics (random gamma).
+    val edgesWithGamma: RDD[(Edge[TokenCount], TopicCounts)] =
+      edges.mapPartitionsWithIndex { case (partIndex, partEdges) =>
+        val random = new Random(partIndex + randomSeed)
+        partEdges.map { edge =>
+          // Create a random gamma_{wjk}
+          (edge, normalize(BDV.fill[Double](k)(random.nextDouble()), 1.0))
+        }
+      }
+    def createVertices(sendToWhere: Edge[TokenCount] => VertexId): RDD[(VertexId, TopicCounts)] = {
+      val verticesTMP: RDD[(VertexId, (TokenCount, TopicCounts))] =
+        edgesWithGamma.map { case (edge, gamma: TopicCounts) =>
+          (sendToWhere(edge), (edge.attr, gamma))
+        }
+      verticesTMP.aggregateByKey(BDV.zeros[Double](k))(
+        (sum, t) => {
+          brzAxpy(t._1, t._2, sum)
+          sum
+        },
+        (sum0, sum1) => {
+          sum0 += sum1
+        }
+      )
+    }
+    val docVertices = createVertices(_.srcId)
+    val termVertices = createVertices(_.dstId)
+
+    // Partition such that edges are grouped by document
+    val graph = Graph(docVertices ++ termVertices, edges)
+      .partitionBy(PartitionStrategy.EdgePartition1D)
+
+    new EMOptimizer(graph, k, vocabSize, docConcentration, topicConcentration, checkpointDir,
+      checkpointInterval)
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
new file mode 100644
index 0000000000000..b0e991d2f2344
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -0,0 +1,351 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.clustering
+
+import breeze.linalg.{DenseMatrix => BDM, normalize, sum => brzSum}
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.graphx.{VertexId, EdgeContext, Graph}
+import org.apache.spark.mllib.linalg.{Vectors, Vector, Matrices, Matrix}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.BoundedPriorityQueue
+
+/**
+ * :: Experimental ::
+ *
+ * Latent Dirichlet Allocation (LDA) model.
+ *
+ * This abstraction permits for different underlying representations,
+ * including local and distributed data structures.
+ */
+@Experimental
+abstract class LDAModel private[clustering] {
+
+  /** Number of topics */
+  def k: Int
+
+  /** Vocabulary size (number of terms or terms in the vocabulary) */
+  def vocabSize: Int
+
+  /**
+   * Inferred topics, where each topic is represented by a distribution over terms.
+   * This is a matrix of size vocabSize x k, where each column is a topic.
+   * No guarantees are given about the ordering of the topics.
+   */
+  def topicsMatrix: Matrix
+
+  /**
+   * Return the topics described by weighted terms.
+   *
+   * This limits the number of terms per topic.
+   * This is approximate; it may not return exactly the top-weighted terms for each topic.
+   * To get a more precise set of top terms, increase maxTermsPerTopic.
+   *
+   * @param maxTermsPerTopic  Maximum number of terms to collect for each topic.
+   * @return  Array over topics.  Each topic is represented as a pair of matching arrays:
+   *          (term indices, term weights in topic).
+   *          Each topic's terms are sorted in order of decreasing weight.
+   */
+  def describeTopics(maxTermsPerTopic: Int): Array[(Array[Int], Array[Double])]
+
+  /**
+   * Return the topics described by weighted terms.
+   *
+   * WARNING: If vocabSize and k are large, this can return a large object!
+   *
+   * @return  Array over topics.  Each topic is represented as a pair of matching arrays:
+   *          (term indices, term weights in topic).
+   *          Each topic's terms are sorted in order of decreasing weight.
+   */
+  def describeTopics(): Array[(Array[Int], Array[Double])] = describeTopics(vocabSize)
+
+  /* TODO (once LDA can be trained with Strings or given a dictionary)
+   * Return the topics described by weighted terms.
+   *
+   * This is similar to [[describeTopics()]] but returns String values for terms.
+   * If this model was trained using Strings or was given a dictionary, then this method returns
+   * terms as text.  Otherwise, this method returns terms as term indices.
+   *
+   * This limits the number of terms per topic.
+   * This is approximate; it may not return exactly the top-weighted terms for each topic.
+   * To get a more precise set of top terms, increase maxTermsPerTopic.
+   *
+   * @param maxTermsPerTopic  Maximum number of terms to collect for each topic.
+   * @return  Array over topics.  Each topic is represented as a pair of matching arrays:
+   *          (terms, term weights in topic) where terms are either the actual term text
+   *          (if available) or the term indices.
+   *          Each topic's terms are sorted in order of decreasing weight.
+   */
+  // def describeTopicsAsStrings(maxTermsPerTopic: Int): Array[(Array[Double], Array[String])]
+
+  /* TODO (once LDA can be trained with Strings or given a dictionary)
+   * Return the topics described by weighted terms.
+   *
+   * This is similar to [[describeTopics()]] but returns String values for terms.
+   * If this model was trained using Strings or was given a dictionary, then this method returns
+   * terms as text.  Otherwise, this method returns terms as term indices.
+   *
+   * WARNING: If vocabSize and k are large, this can return a large object!
+   *
+   * @return  Array over topics.  Each topic is represented as a pair of matching arrays:
+   *          (terms, term weights in topic) where terms are either the actual term text
+   *          (if available) or the term indices.
+   *          Each topic's terms are sorted in order of decreasing weight.
+   */
+  // def describeTopicsAsStrings(): Array[(Array[Double], Array[String])] =
+  //  describeTopicsAsStrings(vocabSize)
+
+  /* TODO
+   * Compute the log likelihood of the observed tokens, given the current parameter estimates:
+   *  log P(docs | topics, topic distributions for docs, alpha, eta)
+   *
+   * Note:
+   *  - This excludes the prior.
+   *  - Even with the prior, this is NOT the same as the data log likelihood given the
+   *    hyperparameters.
+   *
+   * @param documents  RDD of documents, which are term (word) count vectors paired with IDs.
+   *                   The term count vectors are "bags of words" with a fixed-size vocabulary
+   *                   (where the vocabulary size is the length of the vector).
+   *                   This must use the same vocabulary (ordering of term counts) as in training.
+   *                   Document IDs must be unique and >= 0.
+   * @return  Estimated log likelihood of the data under this model
+   */
+  // def logLikelihood(documents: RDD[(Long, Vector)]): Double
+
+  /* TODO
+   * Compute the estimated topic distribution for each document.
+   * This is often called “theta” in the literature.
+   *
+   * @param documents  RDD of documents, which are term (word) count vectors paired with IDs.
+   *                   The term count vectors are "bags of words" with a fixed-size vocabulary
+   *                   (where the vocabulary size is the length of the vector).
+   *                   This must use the same vocabulary (ordering of term counts) as in training.
+   *                   Document IDs must be unique and >= 0.
+   * @return  Estimated topic distribution for each document.
+   *          The returned RDD may be zipped with the given RDD, where each returned vector
+   *          is a multinomial distribution over topics.
+   */
+  // def topicDistributions(documents: RDD[(Long, Vector)]): RDD[(Long, Vector)]
+
+}
+
+/**
+ * :: Experimental ::
+ *
+ * Local LDA model.
+ * This model stores only the inferred topics.
+ * It may be used for computing topics for new documents, but it may give less accurate answers
+ * than the [[DistributedLDAModel]].
+ *
+ * @param topics Inferred topics (vocabSize x k matrix).
+ */
+@Experimental
+class LocalLDAModel private[clustering] (
+    private val topics: Matrix) extends LDAModel with Serializable {
+
+  override def k: Int = topics.numCols
+
+  override def vocabSize: Int = topics.numRows
+
+  override def topicsMatrix: Matrix = topics
+
+  override def describeTopics(maxTermsPerTopic: Int): Array[(Array[Int], Array[Double])] = {
+    val brzTopics = topics.toBreeze.toDenseMatrix
+    Range(0, k).map { topicIndex =>
+      val topic = normalize(brzTopics(::, topicIndex), 1.0)
+      val (termWeights, terms) =
+        topic.toArray.zipWithIndex.sortBy(-_._1).take(maxTermsPerTopic).unzip
+      (terms.toArray, termWeights.toArray)
+    }.toArray
+  }
+
+  // TODO
+  // override def logLikelihood(documents: RDD[(Long, Vector)]): Double = ???
+
+  // TODO:
+  // override def topicDistributions(documents: RDD[(Long, Vector)]): RDD[(Long, Vector)] = ???
+
+}
+
+/**
+ * :: Experimental ::
+ *
+ * Distributed LDA model.
+ * This model stores the inferred topics, the full training dataset, and the topic distributions.
+ * When computing topics for new documents, it may give more accurate answers
+ * than the [[LocalLDAModel]].
+ */
+@Experimental
+class DistributedLDAModel private (
+    private val graph: Graph[LDA.TopicCounts, LDA.TokenCount],
+    private val globalTopicTotals: LDA.TopicCounts,
+    val k: Int,
+    val vocabSize: Int,
+    private val docConcentration: Double,
+    private val topicConcentration: Double,
+    private[spark] val iterationTimes: Array[Double]) extends LDAModel {
+
+  import LDA._
+
+  private[clustering] def this(state: LDA.EMOptimizer, iterationTimes: Array[Double]) = {
+    this(state.graph, state.globalTopicTotals, state.k, state.vocabSize, state.docConcentration,
+      state.topicConcentration, iterationTimes)
+  }
+
+  /**
+   * Convert model to a local model.
+   * The local model stores the inferred topics but not the topic distributions for training
+   * documents.
+   */
+  def toLocal: LocalLDAModel = new LocalLDAModel(topicsMatrix)
+
+  /**
+   * Inferred topics, where each topic is represented by a distribution over terms.
+   * This is a matrix of size vocabSize x k, where each column is a topic.
+   * No guarantees are given about the ordering of the topics.
+   *
+   * WARNING: This matrix is collected from an RDD. Beware memory usage when vocabSize, k are large.
+   */
+  override lazy val topicsMatrix: Matrix = {
+    // Collect row-major topics
+    val termTopicCounts: Array[(Int, TopicCounts)] =
+      graph.vertices.filter(_._1 < 0).map { case (termIndex, cnts) =>
+        (index2term(termIndex), cnts)
+      }.collect()
+    // Convert to Matrix
+    val brzTopics = BDM.zeros[Double](vocabSize, k)
+    termTopicCounts.foreach { case (term, cnts) =>
+      var j = 0
+      while (j < k) {
+        brzTopics(term, j) = cnts(j)
+        j += 1
+      }
+    }
+    Matrices.fromBreeze(brzTopics)
+  }
+
+  override def describeTopics(maxTermsPerTopic: Int): Array[(Array[Int], Array[Double])] = {
+    val numTopics = k
+    // Note: N_k is not needed to find the top terms, but it is needed to normalize weights
+    //       to a distribution over terms.
+    val N_k: TopicCounts = globalTopicTotals
+    val topicsInQueues: Array[BoundedPriorityQueue[(Double, Int)]] =
+      graph.vertices.filter(isTermVertex)
+        .mapPartitions { termVertices =>
+        // For this partition, collect the most common terms for each topic in queues:
+        //  queues(topic) = queue of (term weight, term index).
+        // Term weights are N_{wk} / N_k.
+        val queues =
+          Array.fill(numTopics)(new BoundedPriorityQueue[(Double, Int)](maxTermsPerTopic))
+        for ((termId, n_wk) <- termVertices) {
+          var topic = 0
+          while (topic < numTopics) {
+            queues(topic) += (n_wk(topic) / N_k(topic) -> index2term(termId.toInt))
+            topic += 1
+          }
+        }
+        Iterator(queues)
+      }.reduce { (q1, q2) =>
+        q1.zip(q2).foreach { case (a, b) => a ++= b}
+        q1
+      }
+    topicsInQueues.map { q =>
+      val (termWeights, terms) = q.toArray.sortBy(-_._1).unzip
+      (terms.toArray, termWeights.toArray)
+    }
+  }
+
+  // TODO
+  // override def logLikelihood(documents: RDD[(Long, Vector)]): Double = ???
+
+  /**
+   * Log likelihood of the observed tokens in the training set,
+   * given the current parameter estimates:
+   *  log P(docs | topics, topic distributions for docs, alpha, eta)
+   *
+   * Note:
+   *  - This excludes the prior; for that, use [[logPrior]].
+   *  - Even with [[logPrior]], this is NOT the same as the data log likelihood given the
+   *    hyperparameters.
+   */
+  lazy val logLikelihood: Double = {
+    val eta = topicConcentration
+    val alpha = docConcentration
+    assert(eta > 1.0)
+    assert(alpha > 1.0)
+    val N_k = globalTopicTotals
+    val smoothed_N_k: TopicCounts = N_k + (vocabSize * (eta - 1.0))
+    // Edges: Compute token log probability from phi_{wk}, theta_{kj}.
+    val sendMsg: EdgeContext[TopicCounts, TokenCount, Double] => Unit = (edgeContext) => {
+      val N_wj = edgeContext.attr
+      val smoothed_N_wk: TopicCounts = edgeContext.dstAttr + (eta - 1.0)
+      val smoothed_N_kj: TopicCounts = edgeContext.srcAttr + (alpha - 1.0)
+      val phi_wk: TopicCounts = smoothed_N_wk :/ smoothed_N_k
+      val theta_kj: TopicCounts = normalize(smoothed_N_kj, 1.0)
+      val tokenLogLikelihood = N_wj * math.log(phi_wk.dot(theta_kj))
+      edgeContext.sendToDst(tokenLogLikelihood)
+    }
+    graph.aggregateMessages[Double](sendMsg, _ + _)
+      .map(_._2).fold(0.0)(_ + _)
+  }
+
+  /**
+   * Log probability of the current parameter estimate:
+   *  log P(topics, topic distributions for docs | alpha, eta)
+   */
+  lazy val logPrior: Double = {
+    val eta = topicConcentration
+    val alpha = docConcentration
+    // Term vertices: Compute phi_{wk}.  Use to compute prior log probability.
+    // Doc vertex: Compute theta_{kj}.  Use to compute prior log probability.
+    val N_k = globalTopicTotals
+    val smoothed_N_k: TopicCounts = N_k + (vocabSize * (eta - 1.0))
+    val seqOp: (Double, (VertexId, TopicCounts)) => Double = {
+      case (sumPrior: Double, vertex: (VertexId, TopicCounts)) =>
+        if (isTermVertex(vertex)) {
+          val N_wk = vertex._2
+          val smoothed_N_wk: TopicCounts = N_wk + (eta - 1.0)
+          val phi_wk: TopicCounts = smoothed_N_wk :/ smoothed_N_k
+          (eta - 1.0) * brzSum(phi_wk.map(math.log))
+        } else {
+          val N_kj = vertex._2
+          val smoothed_N_kj: TopicCounts = N_kj + (alpha - 1.0)
+          val theta_kj: TopicCounts = normalize(smoothed_N_kj, 1.0)
+          (alpha - 1.0) * brzSum(theta_kj.map(math.log))
+        }
+    }
+    graph.vertices.aggregate(0.0)(seqOp, _ + _)
+  }
+
+  /**
+   * For each document in the training set, return the distribution over topics for that document
+   * ("theta_doc").
+   *
+   * @return  RDD of (document ID, topic distribution) pairs
+   */
+  def topicDistributions: RDD[(Long, Vector)] = {
+    graph.vertices.filter(LDA.isDocumentVertex).map { case (docID, topicCounts) =>
+      (docID.toLong, Vectors.fromBreeze(normalize(topicCounts, 1.0)))
+    }
+  }
+
+  // TODO:
+  // override def topicDistributions(documents: RDD[(Long, Vector)]): RDD[(Long, Vector)] = ???
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
index f0722d7c14a46..b2f140e1b1352 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
@@ -19,9 +19,9 @@ package org.apache.spark.mllib.clustering
 
 import scala.util.Random
 
-import breeze.linalg.{Vector => BV, DenseVector => BDV, norm => breezeNorm}
-
 import org.apache.spark.Logging
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.linalg.BLAS.{axpy, scal}
 
 /**
  * An utility object to run K-means locally. This is private to the ML package because it's used
@@ -35,14 +35,14 @@ private[mllib] object LocalKMeans extends Logging {
    */
   def kMeansPlusPlus(
       seed: Int,
-      points: Array[BreezeVectorWithNorm],
+      points: Array[VectorWithNorm],
       weights: Array[Double],
       k: Int,
       maxIterations: Int
-  ): Array[BreezeVectorWithNorm] = {
+  ): Array[VectorWithNorm] = {
     val rand = new Random(seed)
-    val dimensions = points(0).vector.length
-    val centers = new Array[BreezeVectorWithNorm](k)
+    val dimensions = points(0).vector.size
+    val centers = new Array[VectorWithNorm](k)
 
     // Initialize centers by sampling using the k-means++ procedure.
     centers(0) = pickWeighted(rand, points, weights).toDense
@@ -75,14 +75,12 @@ private[mllib] object LocalKMeans extends Logging {
     while (moved && iteration < maxIterations) {
       moved = false
       val counts = Array.fill(k)(0.0)
-      val sums = Array.fill(k)(
-        BDV.zeros[Double](dimensions).asInstanceOf[BV[Double]]
-      )
+      val sums = Array.fill(k)(Vectors.zeros(dimensions))
       var i = 0
       while (i < points.length) {
         val p = points(i)
         val index = KMeans.findClosest(centers, p)._1
-        breeze.linalg.axpy(weights(i), p.vector, sums(index))
+        axpy(weights(i), p.vector, sums(index))
         counts(index) += weights(i)
         if (index != oldClosest(i)) {
           moved = true
@@ -97,8 +95,8 @@ private[mllib] object LocalKMeans extends Logging {
           // Assign center to a random point
           centers(j) = points(rand.nextInt(points.length)).toDense
         } else {
-          sums(j) /= counts(j)
-          centers(j) = new BreezeVectorWithNorm(sums(j))
+          scal(1.0 / counts(j), sums(j))
+          centers(j) = new VectorWithNorm(sums(j))
         }
         j += 1
       }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
new file mode 100644
index 0000000000000..3b1caf0c679ef
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.clustering
+
+import org.apache.spark.{Logging, SparkException}
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.impl.GraphImpl
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.random.XORShiftRandom
+
+/**
+ * :: Experimental ::
+ *
+ * Model produced by [[PowerIterationClustering]].
+ *
+ * @param k number of clusters
+ * @param assignments an RDD of (vertexID, clusterID) pairs
+ */
+@Experimental
+class PowerIterationClusteringModel(
+    val k: Int,
+    val assignments: RDD[(Long, Int)]) extends Serializable
+
+/**
+ * :: Experimental ::
+ *
+ * Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by
+ * [[http://www.icml2010.org/papers/387.pdf Lin and Cohen]]. From the abstract: PIC finds a very
+ * low-dimensional embedding of a dataset using truncated power iteration on a normalized pair-wise
+ * similarity matrix of the data.
+ *
+ * @param k Number of clusters.
+ * @param maxIterations Maximum number of iterations of the PIC algorithm.
+ * @param initMode Initialization mode.
+ *
+ * @see [[http://en.wikipedia.org/wiki/Spectral_clustering Spectral clustering (Wikipedia)]]
+ */
+@Experimental
+class PowerIterationClustering private[clustering] (
+    private var k: Int,
+    private var maxIterations: Int,
+    private var initMode: String) extends Serializable {
+
+  import org.apache.spark.mllib.clustering.PowerIterationClustering._
+
+  /** Constructs a PIC instance with default parameters: {k: 2, maxIterations: 100,
+   *  initMode: "random"}. 
+   */
+  def this() = this(k = 2, maxIterations = 100, initMode = "random")
+
+  /**
+   * Set the number of clusters.
+   */
+  def setK(k: Int): this.type = {
+    this.k = k
+    this
+  }
+
+  /**
+   * Set maximum number of iterations of the power iteration loop
+   */
+  def setMaxIterations(maxIterations: Int): this.type = {
+    this.maxIterations = maxIterations
+    this
+  }
+
+  /**
+   * Set the initialization mode. This can be either "random" to use a random vector
+   * as vertex properties, or "degree" to use normalized sum similarities. Default: random.
+   */
+  def setInitializationMode(mode: String): this.type = {
+    this.initMode = mode match {
+      case "random" | "degree" => mode
+      case _ => throw new IllegalArgumentException("Invalid initialization mode: " + mode)
+    }
+    this
+  }
+
+  /**
+   * Run the PIC algorithm.
+   *
+   * @param similarities an RDD of (i, j, s,,ij,,) tuples representing the affinity matrix, which is
+   *                     the matrix A in the PIC paper. The similarity s,,ij,, must be nonnegative.
+   *                     This is a symmetric matrix and hence s,,ij,, = s,,ji,,. For any (i, j) with
+   *                     nonzero similarity, there should be either (i, j, s,,ij,,) or
+   *                     (j, i, s,,ji,,) in the input. Tuples with i = j are ignored, because we
+   *                     assume s,,ij,, = 0.0.
+   *
+   * @return a [[PowerIterationClusteringModel]] that contains the clustering result
+   */
+  def run(similarities: RDD[(Long, Long, Double)]): PowerIterationClusteringModel = {
+    val w = normalize(similarities)
+    val w0 = initMode match {
+      case "random" => randomInit(w)
+      case "degree" => initDegreeVector(w)
+    }
+    pic(w0)
+  }
+
+  /**
+   * Runs the PIC algorithm.
+   *
+   * @param w The normalized affinity matrix, which is the matrix W in the PIC paper with
+   *          w,,ij,, = a,,ij,, / d,,ii,, as its edge properties and the initial vector of the power
+   *          iteration as its vertex properties.
+   */
+  private def pic(w: Graph[Double, Double]): PowerIterationClusteringModel = {
+    val v = powerIter(w, maxIterations)
+    val assignments = kMeans(v, k)
+    new PowerIterationClusteringModel(k, assignments)
+  }
+}
+
+private[clustering] object PowerIterationClustering extends Logging {
+  /**
+   * Normalizes the affinity matrix (A) by row sums and returns the normalized affinity matrix (W).
+   */
+  def normalize(similarities: RDD[(Long, Long, Double)]): Graph[Double, Double] = {
+    val edges = similarities.flatMap { case (i, j, s) =>
+      if (s < 0.0) {
+        throw new SparkException("Similarity must be nonnegative but found s($i, $j) = $s.")
+      }
+      if (i != j) {
+        Seq(Edge(i, j, s), Edge(j, i, s))
+      } else {
+        None
+      }
+    }
+    val gA = Graph.fromEdges(edges, 0.0)
+    val vD = gA.aggregateMessages[Double](
+      sendMsg = ctx => {
+        ctx.sendToSrc(ctx.attr)
+      },
+      mergeMsg = _ + _,
+      TripletFields.EdgeOnly)
+    GraphImpl.fromExistingRDDs(vD, gA.edges)
+      .mapTriplets(
+        e => e.attr / math.max(e.srcAttr, MLUtils.EPSILON),
+        TripletFields.Src)
+  }
+
+  /**
+   * Generates random vertex properties (v0) to start power iteration.
+   * 
+   * @param g a graph representing the normalized affinity matrix (W)
+   * @return a graph with edges representing W and vertices representing a random vector
+   *         with unit 1-norm
+   */
+  def randomInit(g: Graph[Double, Double]): Graph[Double, Double] = {
+    val r = g.vertices.mapPartitionsWithIndex(
+      (part, iter) => {
+        val random = new XORShiftRandom(part)
+        iter.map { case (id, _) =>
+          (id, random.nextGaussian())
+        }
+      }, preservesPartitioning = true).cache()
+    val sum = r.values.map(math.abs).sum()
+    val v0 = r.mapValues(x => x / sum)
+    GraphImpl.fromExistingRDDs(VertexRDD(v0), g.edges)
+  }
+
+  /**
+   * Generates the degree vector as the vertex properties (v0) to start power iteration.
+   * It is not exactly the node degrees but just the normalized sum similarities. Call it
+   * as degree vector because it is used in the PIC paper.
+   * 
+   * @param g a graph representing the normalized affinity matrix (W)
+   * @return a graph with edges representing W and vertices representing the degree vector
+   */
+  def initDegreeVector(g: Graph[Double, Double]): Graph[Double, Double] = {
+    val sum = g.vertices.values.sum()
+    val v0 = g.vertices.mapValues(_ / sum)
+    GraphImpl.fromExistingRDDs(VertexRDD(v0), g.edges)
+  }
+ 
+  /**
+   * Runs power iteration.
+   * @param g input graph with edges representing the normalized affinity matrix (W) and vertices
+   *          representing the initial vector of the power iterations.
+   * @param maxIterations maximum number of iterations
+   * @return a [[VertexRDD]] representing the pseudo-eigenvector
+   */
+  def powerIter(
+      g: Graph[Double, Double],
+      maxIterations: Int): VertexRDD[Double] = {
+    // the default tolerance used in the PIC paper, with a lower bound 1e-8
+    val tol = math.max(1e-5 / g.vertices.count(), 1e-8)
+    var prevDelta = Double.MaxValue
+    var diffDelta = Double.MaxValue
+    var curG = g
+    for (iter <- 0 until maxIterations if math.abs(diffDelta) > tol) {
+      val msgPrefix = s"Iteration $iter"
+      // multiply W by vt
+      val v = curG.aggregateMessages[Double](
+        sendMsg = ctx => ctx.sendToSrc(ctx.attr * ctx.dstAttr),
+        mergeMsg = _ + _,
+        TripletFields.Dst).cache()
+      // normalize v
+      val norm = v.values.map(math.abs).sum()
+      logInfo(s"$msgPrefix: norm(v) = $norm.")
+      val v1 = v.mapValues(x => x / norm)
+      // compare difference
+      val delta = curG.joinVertices(v1) { case (_, x, y) =>
+        math.abs(x - y)
+      }.vertices.values.sum()
+      logInfo(s"$msgPrefix: delta = $delta.")
+      diffDelta = math.abs(delta - prevDelta)
+      logInfo(s"$msgPrefix: diff(delta) = $diffDelta.")
+      // update v
+      curG = GraphImpl.fromExistingRDDs(VertexRDD(v1), g.edges)
+      prevDelta = delta
+    }
+    curG.vertices
+  }
+
+  /**
+   * Runs k-means clustering.
+   * @param v a [[VertexRDD]] representing the pseudo-eigenvector
+   * @param k number of clusters
+   * @return a [[VertexRDD]] representing the clustering assignments
+   */
+  def kMeans(v: VertexRDD[Double], k: Int): VertexRDD[Int] = {
+    val points = v.mapValues(x => Vectors.dense(x)).cache()
+    val model = new KMeans()
+      .setK(k)
+      .setRuns(5)
+      .setSeed(0L)
+      .run(points.values)
+    points.mapValues(p => model.predict(p)).cache()
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
new file mode 100644
index 0000000000000..f483fd1c7d2cf
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.clustering
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.Logging
+import org.apache.spark.SparkContext._
+import org.apache.spark.annotation.{Experimental, DeveloperApi}
+import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.dstream.DStream
+import org.apache.spark.util.Utils
+import org.apache.spark.util.random.XORShiftRandom
+
+/**
+ * :: Experimental ::
+ *
+ * StreamingKMeansModel extends MLlib's KMeansModel for streaming
+ * algorithms, so it can keep track of a continuously updated weight
+ * associated with each cluster, and also update the model by
+ * doing a single iteration of the standard k-means algorithm.
+ *
+ * The update algorithm uses the "mini-batch" KMeans rule,
+ * generalized to incorporate forgetfullness (i.e. decay).
+ * The update rule (for each cluster) is:
+ *
+ * {{{
+ * c_t+1 = [(c_t * n_t * a) + (x_t * m_t)] / [n_t + m_t]
+ * n_t+t = n_t * a + m_t
+ * }}}
+ *
+ * Where c_t is the previously estimated centroid for that cluster,
+ * n_t is the number of points assigned to it thus far, x_t is the centroid
+ * estimated on the current batch, and m_t is the number of points assigned
+ * to that centroid in the current batch.
+ *
+ * The decay factor 'a' scales the contribution of the clusters as estimated thus far,
+ * by applying a as a discount weighting on the current point when evaluating
+ * new incoming data. If a=1, all batches are weighted equally. If a=0, new centroids
+ * are determined entirely by recent data. Lower values correspond to
+ * more forgetting.
+ *
+ * Decay can optionally be specified by a half life and associated
+ * time unit. The time unit can either be a batch of data or a single
+ * data point. Considering data arrived at time t, the half life h is defined
+ * such that at time t + h the discount applied to the data from t is 0.5.
+ * The definition remains the same whether the time unit is given
+ * as batches or points.
+ *
+ */
+@Experimental
+class StreamingKMeansModel(
+    override val clusterCenters: Array[Vector],
+    val clusterWeights: Array[Double]) extends KMeansModel(clusterCenters) with Logging {
+
+  /** Perform a k-means update on a batch of data. */
+  def update(data: RDD[Vector], decayFactor: Double, timeUnit: String): StreamingKMeansModel = {
+
+    // find nearest cluster to each point
+    val closest = data.map(point => (this.predict(point), (point, 1L)))
+
+    // get sums and counts for updating each cluster
+    val mergeContribs: ((Vector, Long), (Vector, Long)) => (Vector, Long) = (p1, p2) => {
+      BLAS.axpy(1.0, p2._1, p1._1)
+      (p1._1, p1._2 + p2._2)
+    }
+    val dim = clusterCenters(0).size
+    val pointStats: Array[(Int, (Vector, Long))] = closest
+      .aggregateByKey((Vectors.zeros(dim), 0L))(mergeContribs, mergeContribs)
+      .collect()
+
+    val discount = timeUnit match {
+      case StreamingKMeans.BATCHES => decayFactor
+      case StreamingKMeans.POINTS =>
+        val numNewPoints = pointStats.view.map { case (_, (_, n)) =>
+          n
+        }.sum
+        math.pow(decayFactor, numNewPoints)
+    }
+
+    // apply discount to weights
+    BLAS.scal(discount, Vectors.dense(clusterWeights))
+
+    // implement update rule
+    pointStats.foreach { case (label, (sum, count)) =>
+      val centroid = clusterCenters(label)
+
+      val updatedWeight = clusterWeights(label) + count
+      val lambda = count / math.max(updatedWeight, 1e-16)
+
+      clusterWeights(label) = updatedWeight
+      BLAS.scal(1.0 - lambda, centroid)
+      BLAS.axpy(lambda / count, sum, centroid)
+
+      // display the updated cluster centers
+      val display = clusterCenters(label).size match {
+        case x if x > 100 => centroid.toArray.take(100).mkString("[", ",", "...")
+        case _ => centroid.toArray.mkString("[", ",", "]")
+      }
+
+      logInfo(s"Cluster $label updated with weight $updatedWeight and centroid: $display")
+    }
+
+    // Check whether the smallest cluster is dying. If so, split the largest cluster.
+    val weightsWithIndex = clusterWeights.view.zipWithIndex
+    val (maxWeight, largest) = weightsWithIndex.maxBy(_._1)
+    val (minWeight, smallest) = weightsWithIndex.minBy(_._1)
+    if (minWeight < 1e-8 * maxWeight) {
+      logInfo(s"Cluster $smallest is dying. Split the largest cluster $largest into two.")
+      val weight = (maxWeight + minWeight) / 2.0
+      clusterWeights(largest) = weight
+      clusterWeights(smallest) = weight
+      val largestClusterCenter = clusterCenters(largest)
+      val smallestClusterCenter = clusterCenters(smallest)
+      var j = 0
+      while (j < dim) {
+        val x = largestClusterCenter(j)
+        val p = 1e-14 * math.max(math.abs(x), 1.0)
+        largestClusterCenter.toBreeze(j) = x + p
+        smallestClusterCenter.toBreeze(j) = x - p
+        j += 1
+      }
+    }
+
+    this
+  }
+}
+
+/**
+ * :: Experimental ::
+ *
+ * StreamingKMeans provides methods for configuring a
+ * streaming k-means analysis, training the model on streaming,
+ * and using the model to make predictions on streaming data.
+ * See KMeansModel for details on algorithm and update rules.
+ *
+ * Use a builder pattern to construct a streaming k-means analysis
+ * in an application, like:
+ *
+ * {{{
+ *  val model = new StreamingKMeans()
+ *    .setDecayFactor(0.5)
+ *    .setK(3)
+ *    .setRandomCenters(5, 100.0)
+ *    .trainOn(DStream)
+ * }}}
+ */
+@Experimental
+class StreamingKMeans(
+    var k: Int,
+    var decayFactor: Double,
+    var timeUnit: String) extends Logging {
+
+  def this() = this(2, 1.0, StreamingKMeans.BATCHES)
+
+  protected var model: StreamingKMeansModel = new StreamingKMeansModel(null, null)
+
+  /** Set the number of clusters. */
+  def setK(k: Int): this.type = {
+    this.k = k
+    this
+  }
+
+  /** Set the decay factor directly (for forgetful algorithms). */
+  def setDecayFactor(a: Double): this.type = {
+    this.decayFactor = decayFactor
+    this
+  }
+
+  /** Set the half life and time unit ("batches" or "points") for forgetful algorithms. */
+  def setHalfLife(halfLife: Double, timeUnit: String): this.type = {
+    if (timeUnit != StreamingKMeans.BATCHES && timeUnit != StreamingKMeans.POINTS) {
+      throw new IllegalArgumentException("Invalid time unit for decay: " + timeUnit)
+    }
+    this.decayFactor = math.exp(math.log(0.5) / halfLife)
+    logInfo("Setting decay factor to: %g ".format (this.decayFactor))
+    this.timeUnit = timeUnit
+    this
+  }
+
+  /** Specify initial centers directly. */
+  def setInitialCenters(centers: Array[Vector], weights: Array[Double]): this.type = {
+    model = new StreamingKMeansModel(centers, weights)
+    this
+  }
+
+  /**
+   * Initialize random centers, requiring only the number of dimensions.
+   *
+   * @param dim Number of dimensions
+   * @param weight Weight for each center
+   * @param seed Random seed
+   */
+  def setRandomCenters(dim: Int, weight: Double, seed: Long = Utils.random.nextLong): this.type = {
+    val random = new XORShiftRandom(seed)
+    val centers = Array.fill(k)(Vectors.dense(Array.fill(dim)(random.nextGaussian())))
+    val weights = Array.fill(k)(weight)
+    model = new StreamingKMeansModel(centers, weights)
+    this
+  }
+
+  /** Return the latest model. */
+  def latestModel(): StreamingKMeansModel = {
+    model
+  }
+
+  /**
+   * Update the clustering model by training on batches of data from a DStream.
+   * This operation registers a DStream for training the model,
+   * checks whether the cluster centers have been initialized,
+   * and updates the model using each batch of data from the stream.
+   *
+   * @param data DStream containing vector data
+   */
+  def trainOn(data: DStream[Vector]) {
+    assertInitialized()
+    data.foreachRDD { (rdd, time) =>
+      model = model.update(rdd, decayFactor, timeUnit)
+    }
+  }
+
+  /**
+   * Use the clustering model to make predictions on batches of data from a DStream.
+   *
+   * @param data DStream containing vector data
+   * @return DStream containing predictions
+   */
+  def predictOn(data: DStream[Vector]): DStream[Int] = {
+    assertInitialized()
+    data.map(model.predict)
+  }
+
+  /**
+   * Use the model to make predictions on the values of a DStream and carry over its keys.
+   *
+   * @param data DStream containing (key, feature vector) pairs
+   * @tparam K key type
+   * @return DStream containing the input keys and the predictions as values
+   */
+  def predictOnValues[K: ClassTag](data: DStream[(K, Vector)]): DStream[(K, Int)] = {
+    assertInitialized()
+    data.mapValues(model.predict)
+  }
+
+  /** Check whether cluster centers have been initialized. */
+  private[this] def assertInitialized(): Unit = {
+    if (model.clusterCenters == null) {
+      throw new IllegalStateException(
+        "Initial cluster centers must be set before starting predictions")
+    }
+  }
+}
+
+private[clustering] object StreamingKMeans {
+  final val BATCHES = "batches"
+  final val POINTS = "points"
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
index 7858ec602483f..078fbfbe4f0e1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
@@ -43,7 +43,7 @@ private[evaluation] object AreaUnderCurve {
    */
   def of(curve: RDD[(Double, Double)]): Double = {
     curve.sliding(2).aggregate(0.0)(
-      seqOp = (auc: Double, points: Seq[(Double, Double)]) => auc + trapezoid(points),
+      seqOp = (auc: Double, points: Array[(Double, Double)]) => auc + trapezoid(points),
       combOp = _ + _
     )
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index 1af40de2c7fcf..ced042e2f96ca 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -28,9 +28,30 @@ import org.apache.spark.rdd.{RDD, UnionRDD}
  * Evaluator for binary classification.
  *
  * @param scoreAndLabels an RDD of (score, label) pairs.
+ * @param numBins if greater than 0, then the curves (ROC curve, PR curve) computed internally
+ *                will be down-sampled to this many "bins". If 0, no down-sampling will occur.
+ *                This is useful because the curve contains a point for each distinct score
+ *                in the input, and this could be as large as the input itself -- millions of
+ *                points or more, when thousands may be entirely sufficient to summarize
+ *                the curve. After down-sampling, the curves will instead be made of approximately
+ *                `numBins` points instead. Points are made from bins of equal numbers of
+ *                consecutive points. The size of each bin is
+ *                `floor(scoreAndLabels.count() / numBins)`, which means the resulting number
+ *                of bins may not exactly equal numBins. The last bin in each partition may
+ *                be smaller as a result, meaning there may be an extra sample at
+ *                partition boundaries.
  */
 @Experimental
-class BinaryClassificationMetrics(scoreAndLabels: RDD[(Double, Double)]) extends Logging {
+class BinaryClassificationMetrics(
+    val scoreAndLabels: RDD[(Double, Double)],
+    val numBins: Int) extends Logging {
+
+  require(numBins >= 0, "numBins must be nonnegative")
+
+  /**
+   * Defaults `numBins` to 0.
+   */
+  def this(scoreAndLabels: RDD[(Double, Double)]) = this(scoreAndLabels, 0)
 
   /** Unpersist intermediate RDDs used in the computation. */
   def unpersist() {
@@ -103,7 +124,39 @@ class BinaryClassificationMetrics(scoreAndLabels: RDD[(Double, Double)]) extends
       mergeValue = (c: BinaryLabelCounter, label: Double) => c += label,
       mergeCombiners = (c1: BinaryLabelCounter, c2: BinaryLabelCounter) => c1 += c2
     ).sortByKey(ascending = false)
-    val agg = counts.values.mapPartitions { iter =>
+
+    val binnedCounts =
+      // Only down-sample if bins is > 0
+      if (numBins == 0) {
+        // Use original directly
+        counts
+      } else {
+        val countsSize = counts.count()
+        // Group the iterator into chunks of about countsSize / numBins points,
+        // so that the resulting number of bins is about numBins
+        var grouping = countsSize / numBins
+        if (grouping < 2) {
+          // numBins was more than half of the size; no real point in down-sampling to bins
+          logInfo(s"Curve is too small ($countsSize) for $numBins bins to be useful")
+          counts
+        } else {
+          if (grouping >= Int.MaxValue) {
+            logWarning(
+              s"Curve too large ($countsSize) for $numBins bins; capping at ${Int.MaxValue}")
+            grouping = Int.MaxValue
+          }
+          counts.mapPartitions(_.grouped(grouping.toInt).map { pairs =>
+            // The score of the combined point will be just the first one's score
+            val firstScore = pairs.head._1
+            // The point will contain all counts in this chunk
+            val agg = new BinaryLabelCounter()
+            pairs.foreach(pair => agg += pair._2)
+            (firstScore, agg)
+          })
+        }
+      }
+
+    val agg = binnedCounts.values.mapPartitions { iter =>
       val agg = new BinaryLabelCounter()
       iter.foreach(agg += _)
       Iterator(agg)
@@ -113,7 +166,7 @@ class BinaryClassificationMetrics(scoreAndLabels: RDD[(Double, Double)]) extends
         (agg: BinaryLabelCounter, c: BinaryLabelCounter) => agg.clone() += c)
     val totalCount = partitionwiseCumulativeCounts.last
     logInfo(s"Total counts: $totalCount")
-    val cumulativeCounts = counts.mapPartitionsWithIndex(
+    val cumulativeCounts = binnedCounts.mapPartitionsWithIndex(
       (index: Int, iter: Iterator[(Double, BinaryLabelCounter)]) => {
         val cumCount = partitionwiseCumulativeCounts(index)
         iter.map { case (score, c) =>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
new file mode 100644
index 0000000000000..ea10bde5fa252
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.evaluation
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext._
+
+/**
+ * Evaluator for multilabel classification.
+ * @param predictionAndLabels an RDD of (predictions, labels) pairs,
+ * both are non-null Arrays, each with unique elements.
+ */
+class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]) {
+
+  private lazy val numDocs: Long = predictionAndLabels.count()
+
+  private lazy val numLabels: Long = predictionAndLabels.flatMap { case (_, labels) =>
+    labels}.distinct().count()
+
+  /**
+   * Returns subset accuracy
+   * (for equal sets of labels)
+   */
+  lazy val subsetAccuracy: Double = predictionAndLabels.filter { case (predictions, labels) =>
+    predictions.deep == labels.deep
+  }.count().toDouble / numDocs
+
+  /**
+   * Returns accuracy
+   */
+  lazy val accuracy: Double = predictionAndLabels.map { case (predictions, labels) =>
+    labels.intersect(predictions).size.toDouble /
+      (labels.size + predictions.size - labels.intersect(predictions).size)}.sum / numDocs
+
+
+  /**
+   * Returns Hamming-loss
+   */
+  lazy val hammingLoss: Double = predictionAndLabels.map { case (predictions, labels) =>
+    labels.size + predictions.size - 2 * labels.intersect(predictions).size
+  }.sum / (numDocs * numLabels)
+
+  /**
+   * Returns document-based precision averaged by the number of documents
+   */
+  lazy val precision: Double = predictionAndLabels.map { case (predictions, labels) =>
+    if (predictions.size > 0) {
+      predictions.intersect(labels).size.toDouble / predictions.size
+    } else {
+      0
+    }
+  }.sum / numDocs
+
+  /**
+   * Returns document-based recall averaged by the number of documents
+   */
+  lazy val recall: Double = predictionAndLabels.map { case (predictions, labels) =>
+    labels.intersect(predictions).size.toDouble / labels.size
+  }.sum / numDocs
+
+  /**
+   * Returns document-based f1-measure averaged by the number of documents
+   */
+  lazy val f1Measure: Double = predictionAndLabels.map { case (predictions, labels) =>
+    2.0 * predictions.intersect(labels).size / (predictions.size + labels.size)
+  }.sum / numDocs
+
+  private lazy val tpPerClass = predictionAndLabels.flatMap { case (predictions, labels) =>
+    predictions.intersect(labels)
+  }.countByValue()
+
+  private lazy val fpPerClass = predictionAndLabels.flatMap { case (predictions, labels) =>
+    predictions.diff(labels)
+  }.countByValue()
+
+  private lazy val fnPerClass = predictionAndLabels.flatMap { case(predictions, labels) =>
+    labels.diff(predictions)
+  }.countByValue()
+
+  /**
+   * Returns precision for a given label (category)
+   * @param label the label.
+   */
+  def precision(label: Double) = {
+    val tp = tpPerClass(label)
+    val fp = fpPerClass.getOrElse(label, 0L)
+    if (tp + fp == 0) 0 else tp.toDouble / (tp + fp)
+  }
+
+  /**
+   * Returns recall for a given label (category)
+   * @param label the label.
+   */
+  def recall(label: Double) = {
+    val tp = tpPerClass(label)
+    val fn = fnPerClass.getOrElse(label, 0L)
+    if (tp + fn == 0) 0 else tp.toDouble / (tp + fn)
+  }
+
+  /**
+   * Returns f1-measure for a given label (category)
+   * @param label the label.
+   */
+  def f1Measure(label: Double) = {
+    val p = precision(label)
+    val r = recall(label)
+    if((p + r) == 0) 0 else 2 * p * r / (p + r)
+  }
+
+  private lazy val sumTp = tpPerClass.foldLeft(0L) { case (sum, (_, tp)) => sum + tp }
+  private lazy val sumFpClass = fpPerClass.foldLeft(0L) { case (sum, (_, fp)) => sum + fp }
+  private lazy val sumFnClass = fnPerClass.foldLeft(0L) { case (sum, (_, fn)) => sum + fn }
+
+  /**
+   * Returns micro-averaged label-based precision
+   * (equals to micro-averaged document-based precision)
+   */
+  lazy val microPrecision = {
+    val sumFp = fpPerClass.foldLeft(0L){ case(cum, (_, fp)) => cum + fp}
+    sumTp.toDouble / (sumTp + sumFp)
+  }
+
+  /**
+   * Returns micro-averaged label-based recall
+   * (equals to micro-averaged document-based recall)
+   */
+  lazy val microRecall = {
+    val sumFn = fnPerClass.foldLeft(0.0){ case(cum, (_, fn)) => cum + fn}
+    sumTp.toDouble / (sumTp + sumFn)
+  }
+
+  /**
+   * Returns micro-averaged label-based f1-measure
+   * (equals to micro-averaged document-based f1-measure)
+   */
+  lazy val microF1Measure = 2.0 * sumTp / (2 * sumTp + sumFnClass + sumFpClass)
+
+  /**
+   * Returns the sequence of labels in ascending order
+   */
+  lazy val labels: Array[Double] = tpPerClass.keys.toArray.sorted
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
new file mode 100644
index 0000000000000..693117d820580
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.evaluation
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.rdd.RDD
+import org.apache.spark.Logging
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, MultivariateOnlineSummarizer}
+
+/**
+ * :: Experimental ::
+ * Evaluator for regression.
+ *
+ * @param predictionAndObservations an RDD of (prediction, observation) pairs.
+ */
+@Experimental
+class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extends Logging {
+
+  /**
+   * Use MultivariateOnlineSummarizer to calculate summary statistics of observations and errors.
+   */
+  private lazy val summary: MultivariateStatisticalSummary = {
+    val summary: MultivariateStatisticalSummary = predictionAndObservations.map {
+      case (prediction, observation) => Vectors.dense(observation, observation - prediction)
+    }.aggregate(new MultivariateOnlineSummarizer())(
+        (summary, v) => summary.add(v),
+        (sum1, sum2) => sum1.merge(sum2)
+      )
+    summary
+  }
+
+  /**
+   * Returns the explained variance regression score.
+   * explainedVariance = 1 - variance(y - \hat{y}) / variance(y)
+   * Reference: [[http://en.wikipedia.org/wiki/Explained_variation]]
+   */
+  def explainedVariance: Double = {
+    1 - summary.variance(1) / summary.variance(0)
+  }
+
+  /**
+   * Returns the mean absolute error, which is a risk function corresponding to the
+   * expected value of the absolute error loss or l1-norm loss.
+   */
+  def meanAbsoluteError: Double = {
+    summary.normL1(1) / summary.count
+  }
+
+  /**
+   * Returns the mean squared error, which is a risk function corresponding to the
+   * expected value of the squared error loss or quadratic loss.
+   */
+  def meanSquaredError: Double = {
+    val rmse = summary.normL2(1) / math.sqrt(summary.count)
+    rmse * rmse
+  }
+
+  /**
+   * Returns the root mean squared error, which is defined as the square root of
+   * the mean squared error.
+   */
+  def rootMeanSquaredError: Double = {
+    summary.normL2(1) / math.sqrt(summary.count)
+  }
+
+  /**
+   * Returns R^2^, the coefficient of determination.
+   * Reference: [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
+   */
+  def r2: Double = {
+    1 - math.pow(summary.normL2(1), 2) / (summary.variance(0) * (summary.count - 1))
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
index 562663ad36b40..be3319d60ce25 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
@@ -24,26 +24,43 @@ private[evaluation] trait BinaryClassificationMetricComputer extends Serializabl
   def apply(c: BinaryConfusionMatrix): Double
 }
 
-/** Precision. */
+/** Precision. Defined as 1.0 when there are no positive examples. */
 private[evaluation] object Precision extends BinaryClassificationMetricComputer {
-  override def apply(c: BinaryConfusionMatrix): Double =
-    c.numTruePositives.toDouble / (c.numTruePositives + c.numFalsePositives)
+  override def apply(c: BinaryConfusionMatrix): Double = {
+    val totalPositives = c.numTruePositives + c.numFalsePositives
+    if (totalPositives == 0) {
+      1.0
+    } else {
+      c.numTruePositives.toDouble / totalPositives
+    }
+  }
 }
 
-/** False positive rate. */
+/** False positive rate. Defined as 0.0 when there are no negative examples. */
 private[evaluation] object FalsePositiveRate extends BinaryClassificationMetricComputer {
-  override def apply(c: BinaryConfusionMatrix): Double =
-    c.numFalsePositives.toDouble / c.numNegatives
+  override def apply(c: BinaryConfusionMatrix): Double = {
+    if (c.numNegatives == 0) {
+      0.0
+    } else {
+      c.numFalsePositives.toDouble / c.numNegatives
+    }
+  }
 }
 
-/** Recall. */
+/** Recall. Defined as 0.0 when there are no positive examples. */
 private[evaluation] object Recall extends BinaryClassificationMetricComputer {
-  override def apply(c: BinaryConfusionMatrix): Double =
-    c.numTruePositives.toDouble / c.numPositives
+  override def apply(c: BinaryConfusionMatrix): Double = {
+    if (c.numPositives == 0) {
+      0.0
+    } else {
+      c.numTruePositives.toDouble / c.numPositives
+    }
+  }
 }
 
 /**
- * F-Measure.
+ * F-Measure. Defined as 0 if both precision and recall are 0. EG in the case that all examples
+ * are false positives.
  * @param beta the beta constant in F-Measure
  * @see http://en.wikipedia.org/wiki/F1_score
  */
@@ -52,6 +69,10 @@ private[evaluation] case class FMeasure(beta: Double) extends BinaryClassificati
   override def apply(c: BinaryConfusionMatrix): Double = {
     val precision = Precision(c)
     val recall = Recall(c)
-    (1.0 + beta2) * (precision * recall) / (beta2 * precision + recall)
+    if (precision + recall == 0) {
+      0.0
+    } else {
+      (1.0 + beta2) * (precision * recall) / (beta2 * precision + recall)
+    }
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
new file mode 100644
index 0000000000000..c6057c7f837b1
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import scala.collection.mutable.ArrayBuilder
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors}
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.stat.Statistics
+import org.apache.spark.rdd.RDD
+
+/**
+ * :: Experimental ::
+ * Chi Squared selector model.
+ *
+ * @param selectedFeatures list of indices to select (filter). Must be ordered asc
+ */
+@Experimental
+class ChiSqSelectorModel (val selectedFeatures: Array[Int]) extends VectorTransformer {
+
+  require(isSorted(selectedFeatures), "Array has to be sorted asc")
+
+  protected def isSorted(array: Array[Int]): Boolean = {
+    var i = 1
+    while (i < array.length) {
+      if (array(i) < array(i-1)) return false
+      i += 1
+    }
+    true
+  }
+
+  /**
+   * Applies transformation on a vector.
+   *
+   * @param vector vector to be transformed.
+   * @return transformed vector.
+   */
+  override def transform(vector: Vector): Vector = {
+    compress(vector, selectedFeatures)
+  }
+
+  /**
+   * Returns a vector with features filtered.
+   * Preserves the order of filtered features the same as their indices are stored.
+   * Might be moved to Vector as .slice
+   * @param features vector
+   * @param filterIndices indices of features to filter, must be ordered asc
+   */
+  private def compress(features: Vector, filterIndices: Array[Int]): Vector = {
+    features match {
+      case SparseVector(size, indices, values) =>
+        val newSize = filterIndices.length
+        val newValues = new ArrayBuilder.ofDouble
+        val newIndices = new ArrayBuilder.ofInt
+        var i = 0
+        var j = 0
+        var indicesIdx = 0
+        var filterIndicesIdx = 0
+        while (i < indices.length && j < filterIndices.length) {
+          indicesIdx = indices(i)
+          filterIndicesIdx = filterIndices(j)
+          if (indicesIdx == filterIndicesIdx) {
+            newIndices += j
+            newValues += values(i)
+            j += 1
+            i += 1
+          } else {
+            if (indicesIdx > filterIndicesIdx) {
+              j += 1
+            } else {
+              i += 1
+            }
+          }
+        }
+        // TODO: Sparse representation might be ineffective if (newSize ~= newValues.size)
+        Vectors.sparse(newSize, newIndices.result(), newValues.result())
+      case DenseVector(values) =>
+        val values = features.toArray
+        Vectors.dense(filterIndices.map(i => values(i)))
+      case other =>
+        throw new UnsupportedOperationException(
+          s"Only sparse and dense vectors are supported but got ${other.getClass}.")
+    }
+  }
+}
+
+/**
+ * :: Experimental ::
+ * Creates a ChiSquared feature selector.
+ * @param numTopFeatures number of features that selector will select
+ *                       (ordered by statistic value descending)
+ */
+@Experimental
+class ChiSqSelector (val numTopFeatures: Int) {
+
+  /**
+   * Returns a ChiSquared feature selector.
+   *
+   * @param data an `RDD[LabeledPoint]` containing the labeled dataset with categorical features.
+   *             Real-valued features will be treated as categorical for each distinct value.
+   *             Apply feature discretizer before using this function.
+   */
+  def fit(data: RDD[LabeledPoint]): ChiSqSelectorModel = {
+    val indices = Statistics.chiSqTest(data)
+      .zipWithIndex.sortBy { case (res, _) => -res.statistic }
+      .take(numTopFeatures)
+      .map { case (_, indices) => indices }
+      .sorted
+    new ChiSqSelectorModel(indices)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
index 720bb70b08dbf..a89eea0e21be2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
@@ -22,7 +22,6 @@ import breeze.linalg.{DenseVector => BDV}
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors}
-import org.apache.spark.mllib.rdd.RDDFunctions._
 import org.apache.spark.rdd.RDD
 
 /**
@@ -86,20 +85,20 @@ private object IDF {
         df = BDV.zeros(doc.size)
       }
       doc match {
-        case sv: SparseVector =>
-          val nnz = sv.indices.size
+        case SparseVector(size, indices, values) =>
+          val nnz = indices.size
           var k = 0
           while (k < nnz) {
-            if (sv.values(k) > 0) {
-              df(sv.indices(k)) += 1L
+            if (values(k) > 0) {
+              df(indices(k)) += 1L
             }
             k += 1
           }
-        case dv: DenseVector =>
-          val n = dv.size
+        case DenseVector(values) =>
+          val n = values.size
           var j = 0
           while (j < n) {
-            if (dv.values(j) > 0.0) {
+            if (values(j) > 0.0) {
               df(j) += 1L
             }
             j += 1
@@ -174,36 +173,17 @@ class IDFModel private[mllib] (val idf: Vector) extends Serializable {
    */
   def transform(dataset: RDD[Vector]): RDD[Vector] = {
     val bcIdf = dataset.context.broadcast(idf)
-    dataset.mapPartitions { iter =>
-      val thisIdf = bcIdf.value
-      iter.map { v =>
-        val n = v.size
-        v match {
-          case sv: SparseVector =>
-            val nnz = sv.indices.size
-            val newValues = new Array[Double](nnz)
-            var k = 0
-            while (k < nnz) {
-              newValues(k) = sv.values(k) * thisIdf(sv.indices(k))
-              k += 1
-            }
-            Vectors.sparse(n, sv.indices, newValues)
-          case dv: DenseVector =>
-            val newValues = new Array[Double](n)
-            var j = 0
-            while (j < n) {
-              newValues(j) = dv.values(j) * thisIdf(j)
-              j += 1
-            }
-            Vectors.dense(newValues)
-          case other =>
-            throw new UnsupportedOperationException(
-              s"Only sparse and dense vectors are supported but got ${other.getClass}.")
-        }
-      }
-    }
+    dataset.mapPartitions(iter => iter.map(v => IDFModel.transform(bcIdf.value, v)))
   }
 
+  /**
+   * Transforms a term frequency (TF) vector to a TF-IDF vector
+   *
+   * @param v a term frequency vector
+   * @return a TF-IDF vector
+   */
+  def transform(v: Vector): Vector = IDFModel.transform(idf, v)
+
   /**
    * Transforms term frequency (TF) vectors to TF-IDF vectors (Java version).
    * @param dataset a JavaRDD of term frequency vectors
@@ -213,3 +193,39 @@ class IDFModel private[mllib] (val idf: Vector) extends Serializable {
     transform(dataset.rdd).toJavaRDD()
   }
 }
+
+private object IDFModel {
+
+  /**
+   * Transforms a term frequency (TF) vector to a TF-IDF vector with a IDF vector
+   *
+   * @param idf an IDF vector
+   * @param v a term frequence vector
+   * @return a TF-IDF vector
+   */
+  def transform(idf: Vector, v: Vector): Vector = {
+    val n = v.size
+    v match {
+      case SparseVector(size, indices, values) =>
+        val nnz = indices.size
+        val newValues = new Array[Double](nnz)
+        var k = 0
+        while (k < nnz) {
+          newValues(k) = values(k) * idf(indices(k))
+          k += 1
+        }
+        Vectors.sparse(n, indices, newValues)
+      case DenseVector(values) =>
+        val newValues = new Array[Double](n)
+        var j = 0
+        while (j < n) {
+          newValues(j) = values(j) * idf(j)
+          j += 1
+        }
+        Vectors.dense(newValues)
+      case other =>
+        throw new UnsupportedOperationException(
+          s"Only sparse and dense vectors are supported but got ${other.getClass}.")
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala
index 4734251127bb4..32848e039eb81 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala
@@ -17,16 +17,14 @@
 
 package org.apache.spark.mllib.feature
 
-import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, norm => brzNorm}
-
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors}
 
 /**
  * :: Experimental ::
  * Normalizes samples individually to unit L^p^ norm
  *
- * For any 1 <= p < Double.PositiveInfinity, normalizes samples using
+ * For any 1 &lt;= p &lt; Double.PositiveInfinity, normalizes samples using
  * sum(abs(vector).^p^)^(1/p)^ as norm.
  *
  * For p = Double.PositiveInfinity, max(abs(vector)) will be used as norm for normalization.
@@ -47,22 +45,31 @@ class Normalizer(p: Double) extends VectorTransformer {
    * @return normalized vector. If the norm of the input is zero, it will return the input vector.
    */
   override def transform(vector: Vector): Vector = {
-    var norm = brzNorm(vector.toBreeze, p)
+    val norm = Vectors.norm(vector, p)
 
     if (norm != 0.0) {
       // For dense vector, we've to allocate new memory for new output vector.
       // However, for sparse vector, the `index` array will not be changed,
       // so we can re-use it to save memory.
-      vector.toBreeze match {
-        case dv: BDV[Double] => Vectors.fromBreeze(dv :/ norm)
-        case sv: BSV[Double] =>
-          val output = new BSV[Double](sv.index, sv.data.clone(), sv.length)
+      vector match {
+        case DenseVector(vs) =>
+          val values = vs.clone()
+          val size = values.size
+          var i = 0
+          while (i < size) {
+            values(i) /= norm
+            i += 1
+          }
+          Vectors.dense(values)
+        case SparseVector(size, ids, vs) =>
+          val values = vs.clone()
+          val nnz = values.size
           var i = 0
-          while (i < output.data.length) {
-            output.data(i) /= norm
+          while (i < nnz) {
+            values(i) /= norm
             i += 1
           }
-          Vectors.fromBreeze(output)
+          Vectors.sparse(size, ids, values)
         case v => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
       }
     } else {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
index 4dfd1f0ab8134..6ae6917eae595 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
@@ -17,18 +17,15 @@
 
 package org.apache.spark.mllib.feature
 
-import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}
-
 import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.mllib.linalg.{Vector, Vectors}
-import org.apache.spark.mllib.rdd.RDDFunctions._
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
 import org.apache.spark.rdd.RDD
 
 /**
  * :: Experimental ::
- * Standardizes features by removing the mean and scaling to unit variance using column summary
+ * Standardizes features by removing the mean and scaling to unit std using column summary
  * statistics on the samples in the training set.
  *
  * @param withMean False by default. Centers the data with mean before scaling. It will build a
@@ -55,7 +52,11 @@ class StandardScaler(withMean: Boolean, withStd: Boolean) extends Logging {
     val summary = data.treeAggregate(new MultivariateOnlineSummarizer)(
       (aggregator, data) => aggregator.add(data),
       (aggregator1, aggregator2) => aggregator1.merge(aggregator2))
-    new StandardScalerModel(withMean, withStd, summary.mean, summary.variance)
+    new StandardScalerModel(
+      Vectors.dense(summary.variance.toArray.map(v => math.sqrt(v))),
+      summary.mean,
+      withStd,
+      withMean)
   }
 }
 
@@ -63,64 +64,106 @@ class StandardScaler(withMean: Boolean, withStd: Boolean) extends Logging {
  * :: Experimental ::
  * Represents a StandardScaler model that can transform vectors.
  *
- * @param withMean whether to center the data before scaling
- * @param withStd whether to scale the data to have unit standard deviation
+ * @param std column standard deviation values
  * @param mean column mean values
- * @param variance column variance values
+ * @param withStd whether to scale the data to have unit standard deviation
+ * @param withMean whether to center the data before scaling
  */
 @Experimental
-class StandardScalerModel private[mllib] (
-    val withMean: Boolean,
-    val withStd: Boolean,
+class StandardScalerModel (
+    val std: Vector,
     val mean: Vector,
-    val variance: Vector) extends VectorTransformer {
+    var withStd: Boolean,
+    var withMean: Boolean) extends VectorTransformer {
 
-  require(mean.size == variance.size)
-
-  private lazy val factor: BDV[Double] = {
-    val f = BDV.zeros[Double](variance.size)
-    var i = 0
-    while (i < f.size) {
-      f(i) = if (variance(i) != 0.0) 1.0 / math.sqrt(variance(i)) else 0.0
-      i += 1
+  def this(std: Vector, mean: Vector) {
+    this(std, mean, withStd = std != null, withMean = mean != null)
+    require(this.withStd || this.withMean,
+      "at least one of std or mean vectors must be provided")
+    if (this.withStd && this.withMean) {
+      require(mean.size == std.size,
+        "mean and std vectors must have equal size if both are provided")
     }
-    f
   }
 
+  def this(std: Vector) = this(std, null)
+
+  @DeveloperApi
+  def setWithMean(withMean: Boolean): this.type = {
+    require(!(withMean && this.mean == null),"cannot set withMean to true while mean is null")
+    this.withMean = withMean
+    this
+  }
+
+  @DeveloperApi
+  def setWithStd(withStd: Boolean): this.type = {
+    require(!(withStd && this.std == null),
+      "cannot set withStd to true while std is null")
+    this.withStd = withStd
+    this
+  }
+
+  // Since `shift` will be only used in `withMean` branch, we have it as
+  // `lazy val` so it will be evaluated in that branch. Note that we don't
+  // want to create this array multiple times in `transform` function.
+  private lazy val shift: Array[Double] = mean.toArray
+
   /**
    * Applies standardization transformation on a vector.
    *
    * @param vector Vector to be standardized.
-   * @return Standardized vector. If the variance of a column is zero, it will return default `0.0`
-   *         for the column with zero variance.
+   * @return Standardized vector. If the std of a column is zero, it will return default `0.0`
+   *         for the column with zero std.
    */
   override def transform(vector: Vector): Vector = {
     require(mean.size == vector.size)
     if (withMean) {
-      vector.toBreeze match {
-        case dv: BDV[Double] =>
-          val output = vector.toBreeze.copy
-          var i = 0
-          while (i < output.length) {
-            output(i) = (output(i) - mean(i)) * (if (withStd) factor(i) else 1.0)
-            i += 1
+      // By default, Scala generates Java methods for member variables. So every time when
+      // the member variables are accessed, `invokespecial` will be called which is expensive.
+      // This can be avoid by having a local reference of `shift`.
+      val localShift = shift
+      vector match {
+        case DenseVector(vs) =>
+          val values = vs.clone()
+          val size = values.size
+          if (withStd) {
+            var i = 0
+            while (i < size) {
+              values(i) = if (std(i) != 0.0) (values(i) - localShift(i)) * (1.0 / std(i)) else 0.0
+              i += 1
+            }
+          } else {
+            var i = 0
+            while (i < size) {
+              values(i) -= localShift(i)
+              i += 1
+            }
           }
-          Vectors.fromBreeze(output)
+          Vectors.dense(values)
         case v => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
       }
     } else if (withStd) {
-      vector.toBreeze match {
-        case dv: BDV[Double] => Vectors.fromBreeze(dv :* factor)
-        case sv: BSV[Double] =>
+      vector match {
+        case DenseVector(vs) =>
+          val values = vs.clone()
+          val size = values.size
+          var i = 0
+          while(i < size) {
+            values(i) *= (if (std(i) != 0.0) 1.0 / std(i) else 0.0)
+            i += 1
+          }
+          Vectors.dense(values)
+        case SparseVector(size, indices, vs) =>
           // For sparse vector, the `index` array inside sparse vector object will not be changed,
           // so we can re-use it to save memory.
-          val output = new BSV[Double](sv.index, sv.data.clone(), sv.length)
+          val values = vs.clone()
+          val nnz = values.size
           var i = 0
-          while (i < output.data.length) {
-            output.data(i) *= factor(output.index(i))
+          while (i < nnz) {
+            values(i) *= (if (std(indices(i)) != 0.0) 1.0 / std(indices(i)) else 0.0)
             i += 1
           }
-          Vectors.fromBreeze(output)
+          Vectors.sparse(size, indices, values)
         case v => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
       }
     } else {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala
index 415a845332d45..7358c1c84f79c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.mllib.feature
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.rdd.RDD
 
@@ -48,4 +49,14 @@ trait VectorTransformer extends Serializable {
     data.map(x => this.transform(x))
   }
 
+  /**
+   * Applies transformation on an JavaRDD[Vector].
+   *
+   * @param data JavaRDD[Vector] to be transformed.
+   * @return transformed JavaRDD[Vector].
+   */
+  def transform(data: JavaRDD[Vector]): JavaRDD[Vector] = {
+    transform(data.rdd)
+  }
+
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index d321994c2a651..a3e40200bc063 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -71,7 +71,8 @@ class Word2Vec extends Serializable with Logging {
   private var numPartitions = 1
   private var numIterations = 1
   private var seed = Utils.random.nextLong()
-
+  private var minCount = 5
+  
   /**
    * Sets vector size (default: 100).
    */
@@ -114,6 +115,15 @@ class Word2Vec extends Serializable with Logging {
     this
   }
 
+  /** 
+   * Sets minCount, the minimum number of times a token must appear to be included in the word2vec 
+   * model's vocabulary (default: 5).
+   */
+  def setMinCount(minCount: Int): this.type = {
+    this.minCount = minCount
+    this
+  }
+  
   private val EXP_TABLE_SIZE = 1000
   private val MAX_EXP = 6
   private val MAX_CODE_LENGTH = 40
@@ -122,9 +132,6 @@ class Word2Vec extends Serializable with Logging {
   /** context words from [-window, window] */
   private val window = 5
 
-  /** minimum frequency to consider a vocabulary word */
-  private val minCount = 5
-
   private var trainWordsCount = 0
   private var vocabSize = 0
   private var vocab: Array[VocabWord] = null
@@ -283,6 +290,13 @@ class Word2Vec extends Serializable with Logging {
     
     val newSentences = sentences.repartition(numPartitions).cache()
     val initRandom = new XORShiftRandom(seed)
+
+    if (vocabSize.toLong * vectorSize * 8 >= Int.MaxValue) {
+      throw new RuntimeException("Please increase minCount or decrease vectorSize in Word2Vec" +
+        " to avoid an OOM. You are highly recommended to make your vocabSize*vectorSize, " +
+        "which is " + vocabSize + "*" + vectorSize + " for now, less than `Int.MaxValue/8`.")
+    }
+
     val syn0Global =
       Array.fill[Float](vocabSize * vectorSize)((initRandom.nextFloat() - 0.5f) / vectorSize)
     val syn1Global = new Array[Float](vocabSize * vectorSize)
@@ -432,7 +446,7 @@ class Word2VecModel private[mllib] (
         throw new IllegalStateException(s"$word not in vocabulary")
     }
   }
-  
+
   /**
    * Find synonyms of a word
    * @param word a word
@@ -443,7 +457,7 @@ class Word2VecModel private[mllib] (
     val vector = transform(word)
     findSynonyms(vector,num)
   }
-  
+
   /**
    * Find synonyms of the vector representation of a word
    * @param vector vector representation of a word
@@ -461,4 +475,11 @@ class Word2VecModel private[mllib] (
       .tail
       .toArray
   }
+  
+  /**
+   * Returns a map of words to their vector representations.
+   */
+  def getVectors: Map[String, Array[Float]] = {
+    model
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
new file mode 100644
index 0000000000000..3168d608c9556
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.fpm
+
+import java.{util => ju}
+import java.lang.{Iterable => JavaIterable}
+
+import scala.collection.mutable
+import scala.collection.JavaConverters._
+import scala.reflect.ClassTag
+
+import org.apache.spark.{HashPartitioner, Logging, Partitioner, SparkException}
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
+import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+
+/**
+ * :: Experimental ::
+ *
+ * Model trained by [[FPGrowth]], which holds frequent itemsets.
+ * @param freqItemsets frequent itemset, which is an RDD of (itemset, frequency) pairs
+ * @tparam Item item type
+ */
+@Experimental
+class FPGrowthModel[Item: ClassTag](
+    val freqItemsets: RDD[(Array[Item], Long)]) extends Serializable {
+
+  /** Returns frequent itemsets as a [[org.apache.spark.api.java.JavaPairRDD]]. */
+  def javaFreqItemsets(): JavaPairRDD[Array[Item], java.lang.Long] = {
+    JavaPairRDD.fromRDD(freqItemsets).asInstanceOf[JavaPairRDD[Array[Item], java.lang.Long]]
+  }
+}
+
+/**
+ * :: Experimental ::
+ *
+ * A parallel FP-growth algorithm to mine frequent itemsets. The algorithm is described in
+ * [[http://dx.doi.org/10.1145/1454008.1454027 Li et al., PFP: Parallel FP-Growth for Query
+ *  Recommendation]]. PFP distributes computation in such a way that each worker executes an
+ * independent group of mining tasks. The FP-Growth algorithm is described in
+ * [[http://dx.doi.org/10.1145/335191.335372 Han et al., Mining frequent patterns without candidate
+ *  generation]].
+ *
+ * @param minSupport the minimal support level of the frequent pattern, any pattern appears
+ *                   more than (minSupport * size-of-the-dataset) times will be output
+ * @param numPartitions number of partitions used by parallel FP-growth
+ *
+ * @see [[http://en.wikipedia.org/wiki/Association_rule_learning Association rule learning
+ *       (Wikipedia)]]
+ */
+@Experimental
+class FPGrowth private (
+    private var minSupport: Double,
+    private var numPartitions: Int) extends Logging with Serializable {
+
+  /**
+   * Constructs a default instance with default parameters {minSupport: `0.3`, numPartitions: same
+   * as the input data}.
+   */
+  def this() = this(0.3, -1)
+
+  /**
+   * Sets the minimal support level (default: `0.3`).
+   */
+  def setMinSupport(minSupport: Double): this.type = {
+    this.minSupport = minSupport
+    this
+  }
+
+  /**
+   * Sets the number of partitions used by parallel FP-growth (default: same as input data).
+   */
+  def setNumPartitions(numPartitions: Int): this.type = {
+    this.numPartitions = numPartitions
+    this
+  }
+
+  /**
+   * Computes an FP-Growth model that contains frequent itemsets.
+   * @param data input data set, each element contains a transaction
+   * @return an [[FPGrowthModel]]
+   */
+  def run[Item: ClassTag](data: RDD[Array[Item]]): FPGrowthModel[Item] = {
+    if (data.getStorageLevel == StorageLevel.NONE) {
+      logWarning("Input data is not cached.")
+    }
+    val count = data.count()
+    val minCount = math.ceil(minSupport * count).toLong
+    val numParts = if (numPartitions > 0) numPartitions else data.partitions.length
+    val partitioner = new HashPartitioner(numParts)
+    val freqItems = genFreqItems(data, minCount, partitioner)
+    val freqItemsets = genFreqItemsets(data, minCount, freqItems, partitioner)
+    new FPGrowthModel(freqItemsets)
+  }
+
+  def run[Item, Basket <: JavaIterable[Item]](data: JavaRDD[Basket]): FPGrowthModel[Item] = {
+    implicit val tag = fakeClassTag[Item]
+    run(data.rdd.map(_.asScala.toArray))
+  }
+
+  /**
+   * Generates frequent items by filtering the input data using minimal support level.
+   * @param minCount minimum count for frequent itemsets
+   * @param partitioner partitioner used to distribute items
+   * @return array of frequent pattern ordered by their frequencies
+   */
+  private def genFreqItems[Item: ClassTag](
+      data: RDD[Array[Item]],
+      minCount: Long,
+      partitioner: Partitioner): Array[Item] = {
+    data.flatMap { t =>
+      val uniq = t.toSet
+      if (t.size != uniq.size) {
+        throw new SparkException(s"Items in a transaction must be unique but got ${t.toSeq}.")
+      }
+      t
+    }.map(v => (v, 1L))
+      .reduceByKey(partitioner, _ + _)
+      .filter(_._2 >= minCount)
+      .collect()
+      .sortBy(-_._2)
+      .map(_._1)
+  }
+
+  /**
+   * Generate frequent itemsets by building FP-Trees, the extraction is done on each partition.
+   * @param data transactions
+   * @param minCount minimum count for frequent itemsets
+   * @param freqItems frequent items
+   * @param partitioner partitioner used to distribute transactions
+   * @return an RDD of (frequent itemset, count)
+   */
+  private def genFreqItemsets[Item: ClassTag](
+      data: RDD[Array[Item]],
+      minCount: Long,
+      freqItems: Array[Item],
+      partitioner: Partitioner): RDD[(Array[Item], Long)] = {
+    val itemToRank = freqItems.zipWithIndex.toMap
+    data.flatMap { transaction =>
+      genCondTransactions(transaction, itemToRank, partitioner)
+    }.aggregateByKey(new FPTree[Int], partitioner.numPartitions)(
+      (tree, transaction) => tree.add(transaction, 1L),
+      (tree1, tree2) => tree1.merge(tree2))
+    .flatMap { case (part, tree) =>
+      tree.extract(minCount, x => partitioner.getPartition(x) == part)
+    }.map { case (ranks, count) =>
+      (ranks.map(i => freqItems(i)).toArray, count)
+    }
+  }
+
+  /**
+   * Generates conditional transactions.
+   * @param transaction a transaction
+   * @param itemToRank map from item to their rank
+   * @param partitioner partitioner used to distribute transactions
+   * @return a map of (target partition, conditional transaction)
+   */
+  private def genCondTransactions[Item: ClassTag](
+      transaction: Array[Item],
+      itemToRank: Map[Item, Int],
+      partitioner: Partitioner): mutable.Map[Int, Array[Int]] = {
+    val output = mutable.Map.empty[Int, Array[Int]]
+    // Filter the basket by frequent items pattern and sort their ranks.
+    val filtered = transaction.flatMap(itemToRank.get)
+    ju.Arrays.sort(filtered)
+    val n = filtered.length
+    var i = n - 1
+    while (i >= 0) {
+      val item = filtered(i)
+      val part = partitioner.getPartition(item)
+      if (!output.contains(part)) {
+        output(part) = filtered.slice(0, i + 1)
+      }
+      i -= 1
+    }
+    output
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala
new file mode 100644
index 0000000000000..1d2d777c00793
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.fpm
+
+import scala.collection.mutable
+import scala.collection.mutable.ListBuffer
+
+/**
+ * FP-Tree data structure used in FP-Growth.
+ * @tparam T item type
+ */
+private[fpm] class FPTree[T] extends Serializable {
+
+  import FPTree._
+
+  val root: Node[T] = new Node(null)
+
+  private val summaries: mutable.Map[T, Summary[T]] = mutable.Map.empty
+
+  /** Adds a transaction with count. */
+  def add(t: Iterable[T], count: Long = 1L): this.type = {
+    require(count > 0)
+    var curr = root
+    curr.count += count
+    t.foreach { item =>
+      val summary = summaries.getOrElseUpdate(item, new Summary)
+      summary.count += count
+      val child = curr.children.getOrElseUpdate(item, {
+        val newNode = new Node(curr)
+        newNode.item = item
+        summary.nodes += newNode
+        newNode
+      })
+      child.count += count
+      curr = child
+    }
+    this
+  }
+
+  /** Merges another FP-Tree. */
+  def merge(other: FPTree[T]): this.type = {
+    other.transactions.foreach { case (t, c) =>
+      add(t, c)
+    }
+    this
+  }
+
+  /** Gets a subtree with the suffix. */
+  private def project(suffix: T): FPTree[T] = {
+    val tree = new FPTree[T]
+    if (summaries.contains(suffix)) {
+      val summary = summaries(suffix)
+      summary.nodes.foreach { node =>
+        var t = List.empty[T]
+        var curr = node.parent
+        while (!curr.isRoot) {
+          t = curr.item :: t
+          curr = curr.parent
+        }
+        tree.add(t, node.count)
+      }
+    }
+    tree
+  }
+
+  /** Returns all transactions in an iterator. */
+  def transactions: Iterator[(List[T], Long)] = getTransactions(root)
+
+  /** Returns all transactions under this node. */
+  private def getTransactions(node: Node[T]): Iterator[(List[T], Long)] = {
+    var count = node.count
+    node.children.iterator.flatMap { case (item, child) =>
+      getTransactions(child).map { case (t, c) =>
+        count -= c
+        (item :: t, c)
+      }
+    } ++ {
+      if (count > 0) {
+        Iterator.single((Nil, count))
+      } else {
+        Iterator.empty
+      }
+    }
+  }
+
+  /** Extracts all patterns with valid suffix and minimum count. */
+  def extract(
+      minCount: Long,
+      validateSuffix: T => Boolean = _ => true): Iterator[(List[T], Long)] = {
+    summaries.iterator.flatMap { case (item, summary) =>
+      if (validateSuffix(item) && summary.count >= minCount) {
+        Iterator.single((item :: Nil, summary.count)) ++
+          project(item).extract(minCount).map { case (t, c) =>
+            (item :: t, c)
+          }
+      } else {
+        Iterator.empty
+      }
+    }
+  }
+}
+
+private[fpm] object FPTree {
+
+  /** Representing a node in an FP-Tree. */
+  class Node[T](val parent: Node[T]) extends Serializable {
+    var item: T = _
+    var count: Long = 0L
+    val children: mutable.Map[T, Node[T]] = mutable.Map.empty
+
+    def isRoot: Boolean = parent == null
+  }
+
+  /** Summary of a item in an FP-Tree. */
+  private class Summary[T] extends Serializable {
+    var count: Long = 0L
+    val nodes: ListBuffer[Node[T]] = ListBuffer.empty
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/impl/PeriodicGraphCheckpointer.scala b/mllib/src/main/scala/org/apache/spark/mllib/impl/PeriodicGraphCheckpointer.scala
new file mode 100644
index 0000000000000..76672fe51e834
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/impl/PeriodicGraphCheckpointer.scala
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.impl
+
+import scala.collection.mutable
+
+import org.apache.hadoop.fs.{Path, FileSystem}
+
+import org.apache.spark.Logging
+import org.apache.spark.graphx.Graph
+import org.apache.spark.storage.StorageLevel
+
+
+/**
+ * This class helps with persisting and checkpointing Graphs.
+ * Specifically, it automatically handles persisting and (optionally) checkpointing, as well as
+ * unpersisting and removing checkpoint files.
+ *
+ * Users should call [[PeriodicGraphCheckpointer.updateGraph()]] when a new graph has been created,
+ * before the graph has been materialized.  After updating [[PeriodicGraphCheckpointer]], users are
+ * responsible for materializing the graph to ensure that persisting and checkpointing actually
+ * occur.
+ *
+ * When [[PeriodicGraphCheckpointer.updateGraph()]] is called, this does the following:
+ *  - Persist new graph (if not yet persisted), and put in queue of persisted graphs.
+ *  - Unpersist graphs from queue until there are at most 3 persisted graphs.
+ *  - If using checkpointing and the checkpoint interval has been reached,
+ *     - Checkpoint the new graph, and put in a queue of checkpointed graphs.
+ *     - Remove older checkpoints.
+ *
+ * WARNINGS:
+ *  - This class should NOT be copied (since copies may conflict on which Graphs should be
+ *    checkpointed).
+ *  - This class removes checkpoint files once later graphs have been checkpointed.
+ *    However, references to the older graphs will still return isCheckpointed = true.
+ *
+ * Example usage:
+ * {{{
+ *  val (graph1, graph2, graph3, ...) = ...
+ *  val cp = new PeriodicGraphCheckpointer(graph1, dir, 2)
+ *  graph1.vertices.count(); graph1.edges.count()
+ *  // persisted: graph1
+ *  cp.updateGraph(graph2)
+ *  graph2.vertices.count(); graph2.edges.count()
+ *  // persisted: graph1, graph2
+ *  // checkpointed: graph2
+ *  cp.updateGraph(graph3)
+ *  graph3.vertices.count(); graph3.edges.count()
+ *  // persisted: graph1, graph2, graph3
+ *  // checkpointed: graph2
+ *  cp.updateGraph(graph4)
+ *  graph4.vertices.count(); graph4.edges.count()
+ *  // persisted: graph2, graph3, graph4
+ *  // checkpointed: graph4
+ *  cp.updateGraph(graph5)
+ *  graph5.vertices.count(); graph5.edges.count()
+ *  // persisted: graph3, graph4, graph5
+ *  // checkpointed: graph4
+ * }}}
+ *
+ * @param currentGraph  Initial graph
+ * @param checkpointDir The directory for storing checkpoint files
+ * @param checkpointInterval Graphs will be checkpointed at this interval
+ * @tparam VD  Vertex descriptor type
+ * @tparam ED  Edge descriptor type
+ *
+ * TODO: Generalize this for Graphs and RDDs, and move it out of MLlib.
+ */
+private[mllib] class PeriodicGraphCheckpointer[VD, ED](
+    var currentGraph: Graph[VD, ED],
+    val checkpointDir: Option[String],
+    val checkpointInterval: Int) extends Logging {
+
+  /** FIFO queue of past checkpointed RDDs */
+  private val checkpointQueue = mutable.Queue[Graph[VD, ED]]()
+
+  /** FIFO queue of past persisted RDDs */
+  private val persistedQueue = mutable.Queue[Graph[VD, ED]]()
+
+  /** Number of times [[updateGraph()]] has been called */
+  private var updateCount = 0
+
+  /**
+   * Spark Context for the Graphs given to this checkpointer.
+   * NOTE: This code assumes that only one SparkContext is used for the given graphs.
+   */
+  private val sc = currentGraph.vertices.sparkContext
+
+  // If a checkpoint directory is given, and there's no prior checkpoint directory,
+  // then set the checkpoint directory with the given one.
+  if (checkpointDir.nonEmpty && sc.getCheckpointDir.isEmpty) {
+    sc.setCheckpointDir(checkpointDir.get)
+  }
+
+  updateGraph(currentGraph)
+
+  /**
+   * Update [[currentGraph]] with a new graph. Handle persistence and checkpointing as needed.
+   * Since this handles persistence and checkpointing, this should be called before the graph
+   * has been materialized.
+   *
+   * @param newGraph  New graph created from previous graphs in the lineage.
+   */
+  def updateGraph(newGraph: Graph[VD, ED]): Unit = {
+    if (newGraph.vertices.getStorageLevel == StorageLevel.NONE) {
+      newGraph.persist()
+    }
+    persistedQueue.enqueue(newGraph)
+    // We try to maintain 2 Graphs in persistedQueue to support the semantics of this class:
+    // Users should call [[updateGraph()]] when a new graph has been created,
+    // before the graph has been materialized.
+    while (persistedQueue.size > 3) {
+      val graphToUnpersist = persistedQueue.dequeue()
+      graphToUnpersist.unpersist(blocking = false)
+    }
+    updateCount += 1
+
+    // Handle checkpointing (after persisting)
+    if ((updateCount % checkpointInterval) == 0 && sc.getCheckpointDir.nonEmpty) {
+      // Add new checkpoint before removing old checkpoints.
+      newGraph.checkpoint()
+      checkpointQueue.enqueue(newGraph)
+      // Remove checkpoints before the latest one.
+      var canDelete = true
+      while (checkpointQueue.size > 1 && canDelete) {
+        // Delete the oldest checkpoint only if the next checkpoint exists.
+        if (checkpointQueue.get(1).get.isCheckpointed) {
+          removeCheckpointFile()
+        } else {
+          canDelete = false
+        }
+      }
+    }
+  }
+
+  /**
+   * Call this at the end to delete any remaining checkpoint files.
+   */
+  def deleteAllCheckpoints(): Unit = {
+    while (checkpointQueue.size > 0) {
+      removeCheckpointFile()
+    }
+  }
+
+  /**
+   * Dequeue the oldest checkpointed Graph, and remove its checkpoint files.
+   * This prints a warning but does not fail if the files cannot be removed.
+   */
+  private def removeCheckpointFile(): Unit = {
+    val old = checkpointQueue.dequeue()
+    // Since the old checkpoint is not deleted by Spark, we manually delete it.
+    val fs = FileSystem.get(sc.hadoopConfiguration)
+    old.getCheckpointFiles.foreach { checkpointFile =>
+      try {
+        fs.delete(new Path(checkpointFile), true)
+      } catch {
+        case e: Exception =>
+          logWarning("PeriodicGraphCheckpointer could not remove old checkpoint file: " +
+            checkpointFile)
+      }
+    }
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
index 54ee930d61003..079f7ca564a92 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
@@ -25,7 +25,7 @@ import org.apache.spark.Logging
 /**
  * BLAS routines for MLlib's vectors and matrices.
  */
-private[mllib] object BLAS extends Serializable with Logging {
+private[spark] object BLAS extends Serializable with Logging {
 
   @transient private var _f2jBLAS: NetlibBLAS = _
   @transient private var _nativeBLAS: NetlibBLAS = _
@@ -72,17 +72,21 @@ private[mllib] object BLAS extends Serializable with Logging {
    * y += a * x
    */
   private def axpy(a: Double, x: SparseVector, y: DenseVector): Unit = {
-    val nnz = x.indices.size
+    val xValues = x.values
+    val xIndices = x.indices
+    val yValues = y.values
+    val nnz = xIndices.size
+
     if (a == 1.0) {
       var k = 0
       while (k < nnz) {
-        y.values(x.indices(k)) += x.values(k)
+        yValues(xIndices(k)) += xValues(k)
         k += 1
       }
     } else {
       var k = 0
       while (k < nnz) {
-        y.values(x.indices(k)) += a * x.values(k)
+        yValues(xIndices(k)) += a * xValues(k)
         k += 1
       }
     }
@@ -92,7 +96,9 @@ private[mllib] object BLAS extends Serializable with Logging {
    * dot(x, y)
    */
   def dot(x: Vector, y: Vector): Double = {
-    require(x.size == y.size)
+    require(x.size == y.size,
+      "BLAS.dot(x: Vector, y:Vector) was given Vectors with non-matching sizes:" +
+      " x.size = " + x.size + ", y.size = " + y.size)
     (x, y) match {
       case (dx: DenseVector, dy: DenseVector) =>
         dot(dx, dy)
@@ -119,11 +125,15 @@ private[mllib] object BLAS extends Serializable with Logging {
    * dot(x, y)
    */
   private def dot(x: SparseVector, y: DenseVector): Double = {
-    val nnz = x.indices.size
+    val xValues = x.values
+    val xIndices = x.indices
+    val yValues = y.values
+    val nnz = xIndices.size
+
     var sum = 0.0
     var k = 0
     while (k < nnz) {
-      sum += x.values(k) * y.values(x.indices(k))
+      sum += xValues(k) * yValues(xIndices(k))
       k += 1
     }
     sum
@@ -133,19 +143,24 @@ private[mllib] object BLAS extends Serializable with Logging {
    * dot(x, y)
    */
   private def dot(x: SparseVector, y: SparseVector): Double = {
+    val xValues = x.values
+    val xIndices = x.indices
+    val yValues = y.values
+    val yIndices = y.indices
+    val nnzx = xIndices.size
+    val nnzy = yIndices.size
+
     var kx = 0
-    val nnzx = x.indices.size
     var ky = 0
-    val nnzy = y.indices.size
     var sum = 0.0
     // y catching x
     while (kx < nnzx && ky < nnzy) {
-      val ix = x.indices(kx)
-      while (ky < nnzy && y.indices(ky) < ix) {
+      val ix = xIndices(kx)
+      while (ky < nnzy && yIndices(ky) < ix) {
         ky += 1
       }
-      if (ky < nnzy && y.indices(ky) == ix) {
-        sum += x.values(kx) * y.values(ky)
+      if (ky < nnzy && yIndices(ky) == ix) {
+        sum += xValues(kx) * yValues(ky)
         ky += 1
       }
       kx += 1
@@ -163,21 +178,25 @@ private[mllib] object BLAS extends Serializable with Logging {
       case dy: DenseVector =>
         x match {
           case sx: SparseVector =>
+            val sxIndices = sx.indices
+            val sxValues = sx.values
+            val dyValues = dy.values
+            val nnz = sxIndices.size
+
             var i = 0
             var k = 0
-            val nnz = sx.indices.size
             while (k < nnz) {
-              val j = sx.indices(k)
+              val j = sxIndices(k)
               while (i < j) {
-                dy.values(i) = 0.0
+                dyValues(i) = 0.0
                 i += 1
               }
-              dy.values(i) = sx.values(k)
+              dyValues(i) = sxValues(k)
               i += 1
               k += 1
             }
             while (i < n) {
-              dy.values(i) = 0.0
+              dyValues(i) = 0.0
               i += 1
             }
           case dx: DenseVector =>
@@ -209,83 +228,84 @@ private[mllib] object BLAS extends Serializable with Logging {
     }
     _nativeBLAS
   }
+ 
+  /**
+   * A := alpha * x * x^T^ + A
+   * @param alpha a real scalar that will be multiplied to x * x^T^.
+   * @param x the vector x that contains the n elements.
+   * @param A the symmetric matrix A. Size of n x n.
+   */
+  def syr(alpha: Double, x: DenseVector, A: DenseMatrix) {
+    val mA = A.numRows
+    val nA = A.numCols
+    require(mA == nA, s"A is not a symmetric matrix. A: $mA x $nA")
+    require(mA == x.size, s"The size of x doesn't match the rank of A. A: $mA x $nA, x: ${x.size}")
+
+    nativeBLAS.dsyr("U", x.size, alpha, x.values, 1, A.values, nA)
+
+    // Fill lower triangular part of A
+    var i = 0
+    while (i < mA) {
+      var j = i + 1
+      while (j < nA) {
+        A(j, i) = A(i, j)
+        j += 1
+      }
+      i += 1
+    }    
+  }
 
   /**
    * C := alpha * A * B + beta * C
-   * @param transA whether to use the transpose of matrix A (true), or A itself (false).
-   * @param transB whether to use the transpose of matrix B (true), or B itself (false).
    * @param alpha a scalar to scale the multiplication A * B.
    * @param A the matrix A that will be left multiplied to B. Size of m x k.
    * @param B the matrix B that will be left multiplied by A. Size of k x n.
    * @param beta a scalar that can be used to scale matrix C.
-   * @param C the resulting matrix C. Size of m x n.
+   * @param C the resulting matrix C. Size of m x n. C.isTransposed must be false.
    */
   def gemm(
-      transA: Boolean,
-      transB: Boolean,
       alpha: Double,
       A: Matrix,
       B: DenseMatrix,
       beta: Double,
       C: DenseMatrix): Unit = {
+    require(!C.isTransposed,
+      "The matrix C cannot be the product of a transpose() call. C.isTransposed must be false.")
     if (alpha == 0.0) {
       logDebug("gemm: alpha is equal to 0. Returning C.")
     } else {
       A match {
-        case sparse: SparseMatrix =>
-          gemm(transA, transB, alpha, sparse, B, beta, C)
-        case dense: DenseMatrix =>
-          gemm(transA, transB, alpha, dense, B, beta, C)
+        case sparse: SparseMatrix => gemm(alpha, sparse, B, beta, C)
+        case dense: DenseMatrix => gemm(alpha, dense, B, beta, C)
         case _ =>
           throw new IllegalArgumentException(s"gemm doesn't support matrix type ${A.getClass}.")
       }
     }
   }
 
-  /**
-   * C := alpha * A * B + beta * C
-   *
-   * @param alpha a scalar to scale the multiplication A * B.
-   * @param A the matrix A that will be left multiplied to B. Size of m x k.
-   * @param B the matrix B that will be left multiplied by A. Size of k x n.
-   * @param beta a scalar that can be used to scale matrix C.
-   * @param C the resulting matrix C. Size of m x n.
-   */
-  def gemm(
-      alpha: Double,
-      A: Matrix,
-      B: DenseMatrix,
-      beta: Double,
-      C: DenseMatrix): Unit = {
-    gemm(false, false, alpha, A, B, beta, C)
-  }
-
   /**
    * C := alpha * A * B + beta * C
    * For `DenseMatrix` A.
    */
   private def gemm(
-      transA: Boolean,
-      transB: Boolean,
       alpha: Double,
       A: DenseMatrix,
       B: DenseMatrix,
       beta: Double,
       C: DenseMatrix): Unit = {
-    val mA: Int = if (!transA) A.numRows else A.numCols
-    val nB: Int = if (!transB) B.numCols else B.numRows
-    val kA: Int = if (!transA) A.numCols else A.numRows
-    val kB: Int = if (!transB) B.numRows else B.numCols
-    val tAstr = if (!transA) "N" else "T"
-    val tBstr = if (!transB) "N" else "T"
-
-    require(kA == kB, s"The columns of A don't match the rows of B. A: $kA, B: $kB")
-    require(mA == C.numRows, s"The rows of C don't match the rows of A. C: ${C.numRows}, A: $mA")
-    require(nB == C.numCols,
-      s"The columns of C don't match the columns of B. C: ${C.numCols}, A: $nB")
-
-    nativeBLAS.dgemm(tAstr, tBstr, mA, nB, kA, alpha, A.values, A.numRows, B.values, B.numRows,
-      beta, C.values, C.numRows)
+    val tAstr = if (A.isTransposed) "T" else "N"
+    val tBstr = if (B.isTransposed) "T" else "N"
+    val lda = if (!A.isTransposed) A.numRows else A.numCols
+    val ldb = if (!B.isTransposed) B.numRows else B.numCols
+
+    require(A.numCols == B.numRows,
+      s"The columns of A don't match the rows of B. A: ${A.numCols}, B: ${B.numRows}")
+    require(A.numRows == C.numRows,
+      s"The rows of C don't match the rows of A. C: ${C.numRows}, A: ${A.numRows}")
+    require(B.numCols == C.numCols,
+      s"The columns of C don't match the columns of B. C: ${C.numCols}, A: ${B.numCols}")
+    nativeBLAS.dgemm(tAstr, tBstr, A.numRows, B.numCols, A.numCols, alpha, A.values, lda,
+      B.values, ldb, beta, C.values, C.numRows)
   }
 
   /**
@@ -293,17 +313,15 @@ private[mllib] object BLAS extends Serializable with Logging {
    * For `SparseMatrix` A.
    */
   private def gemm(
-      transA: Boolean,
-      transB: Boolean,
       alpha: Double,
       A: SparseMatrix,
       B: DenseMatrix,
       beta: Double,
       C: DenseMatrix): Unit = {
-    val mA: Int = if (!transA) A.numRows else A.numCols
-    val nB: Int = if (!transB) B.numCols else B.numRows
-    val kA: Int = if (!transA) A.numCols else A.numRows
-    val kB: Int = if (!transB) B.numRows else B.numCols
+    val mA: Int = A.numRows
+    val nB: Int = B.numCols
+    val kA: Int = A.numCols
+    val kB: Int = B.numRows
 
     require(kA == kB, s"The columns of A don't match the rows of B. A: $kA, B: $kB")
     require(mA == C.numRows, s"The rows of C don't match the rows of A. C: ${C.numRows}, A: $mA")
@@ -311,69 +329,71 @@ private[mllib] object BLAS extends Serializable with Logging {
       s"The columns of C don't match the columns of B. C: ${C.numCols}, A: $nB")
 
     val Avals = A.values
-    val Arows = if (!transA) A.rowIndices else A.colPtrs
-    val Acols = if (!transA) A.colPtrs else A.rowIndices
+    val Bvals = B.values
+    val Cvals = C.values
+    val ArowIndices = A.rowIndices
+    val AcolPtrs = A.colPtrs
 
     // Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices
-    if (transA){
+    if (A.isTransposed){
       var colCounterForB = 0
-      if (!transB) { // Expensive to put the check inside the loop
+      if (!B.isTransposed) { // Expensive to put the check inside the loop
         while (colCounterForB < nB) {
           var rowCounterForA = 0
           val Cstart = colCounterForB * mA
           val Bstart = colCounterForB * kA
           while (rowCounterForA < mA) {
-            var i = Arows(rowCounterForA)
-            val indEnd = Arows(rowCounterForA + 1)
+            var i = AcolPtrs(rowCounterForA)
+            val indEnd = AcolPtrs(rowCounterForA + 1)
             var sum = 0.0
             while (i < indEnd) {
-              sum += Avals(i) * B.values(Bstart + Acols(i))
+              sum += Avals(i) * Bvals(Bstart + ArowIndices(i))
               i += 1
             }
             val Cindex = Cstart + rowCounterForA
-            C.values(Cindex) = beta * C.values(Cindex) + sum * alpha
+            Cvals(Cindex) = beta * Cvals(Cindex) + sum * alpha
             rowCounterForA += 1
           }
           colCounterForB += 1
         }
       } else {
         while (colCounterForB < nB) {
-          var rowCounter = 0
+          var rowCounterForA = 0
           val Cstart = colCounterForB * mA
-          while (rowCounter < mA) {
-            var i = Arows(rowCounter)
-            val indEnd = Arows(rowCounter + 1)
+          while (rowCounterForA < mA) {
+            var i = AcolPtrs(rowCounterForA)
+            val indEnd = AcolPtrs(rowCounterForA + 1)
             var sum = 0.0
             while (i < indEnd) {
-              sum += Avals(i) * B(colCounterForB, Acols(i))
+              sum += Avals(i) * B(ArowIndices(i), colCounterForB)
               i += 1
             }
-            val Cindex = Cstart + rowCounter
-            C.values(Cindex) = beta * C.values(Cindex) + sum * alpha
-            rowCounter += 1
+            val Cindex = Cstart + rowCounterForA
+            Cvals(Cindex) = beta * Cvals(Cindex) + sum * alpha
+            rowCounterForA += 1
           }
           colCounterForB += 1
         }
       }
     } else {
       // Scale matrix first if `beta` is not equal to 0.0
-      if (beta != 0.0){
+      if (beta != 0.0) {
         f2jBLAS.dscal(C.values.length, beta, C.values, 1)
       }
       // Perform matrix multiplication and add to C. The rows of A are multiplied by the columns of
       // B, and added to C.
       var colCounterForB = 0 // the column to be updated in C
-      if (!transB) { // Expensive to put the check inside the loop
+      if (!B.isTransposed) { // Expensive to put the check inside the loop
         while (colCounterForB < nB) {
           var colCounterForA = 0 // The column of A to multiply with the row of B
           val Bstart = colCounterForB * kB
           val Cstart = colCounterForB * mA
           while (colCounterForA < kA) {
-            var i = Acols(colCounterForA)
-            val indEnd = Acols(colCounterForA + 1)
-            val Bval = B.values(Bstart + colCounterForA) * alpha
-            while (i < indEnd){
-              C.values(Cstart + Arows(i)) += Avals(i) * Bval
+            var i = AcolPtrs(colCounterForA)
+            val indEnd = AcolPtrs(colCounterForA + 1)
+            val Bval = Bvals(Bstart + colCounterForA) * alpha
+            while (i < indEnd) {
+              Cvals(Cstart + ArowIndices(i)) += Avals(i) * Bval
               i += 1
             }
             colCounterForA += 1
@@ -384,12 +404,12 @@ private[mllib] object BLAS extends Serializable with Logging {
         while (colCounterForB < nB) {
           var colCounterForA = 0 // The column of A to multiply with the row of B
           val Cstart = colCounterForB * mA
-          while (colCounterForA < kA){
-            var i = Acols(colCounterForA)
-            val indEnd = Acols(colCounterForA + 1)
-            val Bval = B(colCounterForB, colCounterForA) * alpha
-            while (i < indEnd){
-              C.values(Cstart + Arows(i)) += Avals(i) * Bval
+          while (colCounterForA < kA) {
+            var i = AcolPtrs(colCounterForA)
+            val indEnd = AcolPtrs(colCounterForA + 1)
+            val Bval = B(colCounterForA, colCounterForB) * alpha
+            while (i < indEnd) {
+              Cvals(Cstart + ArowIndices(i)) += Avals(i) * Bval
               i += 1
             }
             colCounterForA += 1
@@ -402,7 +422,6 @@ private[mllib] object BLAS extends Serializable with Logging {
 
   /**
    * y := alpha * A * x + beta * y
-   * @param trans whether to use the transpose of matrix A (true), or A itself (false).
    * @param alpha a scalar to scale the multiplication A * x.
    * @param A the matrix A that will be left multiplied to x. Size of m x n.
    * @param x the vector x that will be left multiplied by A. Size of n x 1.
@@ -410,65 +429,43 @@ private[mllib] object BLAS extends Serializable with Logging {
    * @param y the resulting vector y. Size of m x 1.
    */
   def gemv(
-      trans: Boolean,
       alpha: Double,
       A: Matrix,
       x: DenseVector,
       beta: Double,
       y: DenseVector): Unit = {
-
-    val mA: Int = if (!trans) A.numRows else A.numCols
-    val nx: Int = x.size
-    val nA: Int = if (!trans) A.numCols else A.numRows
-
-    require(nA == nx, s"The columns of A don't match the number of elements of x. A: $nA, x: $nx")
-    require(mA == y.size,
-      s"The rows of A don't match the number of elements of y. A: $mA, y:${y.size}}")
+    require(A.numCols == x.size,
+      s"The columns of A don't match the number of elements of x. A: ${A.numCols}, x: ${x.size}")
+    require(A.numRows == y.size,
+      s"The rows of A don't match the number of elements of y. A: ${A.numRows}, y:${y.size}}")
     if (alpha == 0.0) {
       logDebug("gemv: alpha is equal to 0. Returning y.")
     } else {
       A match {
         case sparse: SparseMatrix =>
-          gemv(trans, alpha, sparse, x, beta, y)
+          gemv(alpha, sparse, x, beta, y)
         case dense: DenseMatrix =>
-          gemv(trans, alpha, dense, x, beta, y)
+          gemv(alpha, dense, x, beta, y)
         case _ =>
           throw new IllegalArgumentException(s"gemv doesn't support matrix type ${A.getClass}.")
       }
     }
   }
 
-  /**
-   * y := alpha * A * x + beta * y
-   *
-   * @param alpha a scalar to scale the multiplication A * x.
-   * @param A the matrix A that will be left multiplied to x. Size of m x n.
-   * @param x the vector x that will be left multiplied by A. Size of n x 1.
-   * @param beta a scalar that can be used to scale vector y.
-   * @param y the resulting vector y. Size of m x 1.
-   */
-  def gemv(
-      alpha: Double,
-      A: Matrix,
-      x: DenseVector,
-      beta: Double,
-      y: DenseVector): Unit = {
-    gemv(false, alpha, A, x, beta, y)
-  }
-
   /**
    * y := alpha * A * x + beta * y
    * For `DenseMatrix` A.
    */
   private def gemv(
-      trans: Boolean,
       alpha: Double,
       A: DenseMatrix,
       x: DenseVector,
       beta: Double,
       y: DenseVector): Unit =  {
-    val tStrA = if (!trans) "N" else "T"
-    nativeBLAS.dgemv(tStrA, A.numRows, A.numCols, alpha, A.values, A.numRows, x.values, 1, beta,
+    val tStrA = if (A.isTransposed) "T" else "N"
+    val mA = if (!A.isTransposed) A.numRows else A.numCols
+    val nA = if (!A.isTransposed) A.numCols else A.numRows
+    nativeBLAS.dgemv(tStrA, mA, nA, alpha, A.values, mA, x.values, 1, beta,
       y.values, 1)
   }
 
@@ -477,48 +474,47 @@ private[mllib] object BLAS extends Serializable with Logging {
    * For `SparseMatrix` A.
    */
   private def gemv(
-      trans: Boolean,
       alpha: Double,
       A: SparseMatrix,
       x: DenseVector,
       beta: Double,
       y: DenseVector): Unit =  {
-
-    val mA: Int = if(!trans) A.numRows else A.numCols
-    val nA: Int = if(!trans) A.numCols else A.numRows
+    val xValues = x.values
+    val yValues = y.values
+    val mA: Int = A.numRows
+    val nA: Int = A.numCols
 
     val Avals = A.values
-    val Arows = if (!trans) A.rowIndices else A.colPtrs
-    val Acols = if (!trans) A.colPtrs else A.rowIndices
-
+    val Arows = if (!A.isTransposed) A.rowIndices else A.colPtrs
+    val Acols = if (!A.isTransposed) A.colPtrs else A.rowIndices
     // Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices
-    if (trans){
+    if (A.isTransposed) {
       var rowCounter = 0
-      while (rowCounter < mA){
+      while (rowCounter < mA) {
         var i = Arows(rowCounter)
         val indEnd = Arows(rowCounter + 1)
         var sum = 0.0
-        while(i < indEnd){
-          sum += Avals(i) * x.values(Acols(i))
+        while (i < indEnd) {
+          sum += Avals(i) * xValues(Acols(i))
           i += 1
         }
-        y.values(rowCounter) =  beta * y.values(rowCounter) + sum * alpha
+        yValues(rowCounter) = beta * yValues(rowCounter) + sum * alpha
         rowCounter += 1
       }
     } else {
       // Scale vector first if `beta` is not equal to 0.0
-      if (beta != 0.0){
+      if (beta != 0.0) {
         scal(beta, y)
       }
       // Perform matrix-vector multiplication and add to y
       var colCounterForA = 0
-      while (colCounterForA < nA){
+      while (colCounterForA < nA) {
         var i = Acols(colCounterForA)
         val indEnd = Acols(colCounterForA + 1)
-        val xVal = x.values(colCounterForA) * alpha
-        while (i < indEnd){
+        val xVal = xValues(colCounterForA) * alpha
+        while (i < indEnd) {
           val rowIndex = Arows(i)
-          y.values(rowIndex) += Avals(i) * xVal
+          yValues(rowIndex) += Avals(i) * xVal
           i += 1
         }
         colCounterForA += 1
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 2cc52e94282ba..84f8ac2e0d9cd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark.mllib.linalg
 
-import java.util.Arrays
+import java.util.{Arrays, Random}
 
-import breeze.linalg.{Matrix => BM, DenseMatrix => BDM, CSCMatrix => BSM}
+import scala.collection.mutable.{ArrayBuilder => MArrayBuilder, HashSet => MHashSet, ArrayBuffer}
 
-import org.apache.spark.util.random.XORShiftRandom
+import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM, Matrix => BM}
 
 /**
  * Trait for a local matrix.
@@ -34,8 +34,17 @@ sealed trait Matrix extends Serializable {
   /** Number of columns. */
   def numCols: Int
 
+  /** Flag that keeps track whether the matrix is transposed or not. False by default. */
+  val isTransposed: Boolean = false
+
   /** Converts to a dense array in column major. */
-  def toArray: Array[Double]
+  def toArray: Array[Double] = {
+    val newArray = new Array[Double](numRows * numCols)
+    foreachActive { (i, j, v) =>
+      newArray(j * numRows + i) = v
+    }
+    newArray
+  }
 
   /** Converts to a breeze matrix. */
   private[mllib] def toBreeze: BM[Double]
@@ -52,10 +61,13 @@ sealed trait Matrix extends Serializable {
   /** Get a deep copy of the matrix. */
   def copy: Matrix
 
+  /** Transpose the Matrix. Returns a new `Matrix` instance sharing the same underlying data. */
+  def transpose: Matrix
+
   /** Convenience method for `Matrix`-`DenseMatrix` multiplication. */
   def multiply(y: DenseMatrix): DenseMatrix = {
-    val C: DenseMatrix = Matrices.zeros(numRows, y.numCols).asInstanceOf[DenseMatrix]
-    BLAS.gemm(false, false, 1.0, this, y, 0.0, C)
+    val C: DenseMatrix = DenseMatrix.zeros(numRows, y.numCols)
+    BLAS.gemm(1.0, this, y, 0.0, C)
     C
   }
 
@@ -66,22 +78,28 @@ sealed trait Matrix extends Serializable {
     output
   }
 
-  /** Convenience method for `Matrix`^T^-`DenseMatrix` multiplication. */
-  def transposeMultiply(y: DenseMatrix): DenseMatrix = {
-    val C: DenseMatrix = Matrices.zeros(numCols, y.numCols).asInstanceOf[DenseMatrix]
-    BLAS.gemm(true, false, 1.0, this, y, 0.0, C)
-    C
-  }
-
-  /** Convenience method for `Matrix`^T^-`DenseVector` multiplication. */
-  def transposeMultiply(y: DenseVector): DenseVector = {
-    val output = new DenseVector(new Array[Double](numCols))
-    BLAS.gemv(true, 1.0, this, y, 0.0, output)
-    output
-  }
-
   /** A human readable representation of the matrix */
   override def toString: String = toBreeze.toString()
+
+  /** Map the values of this matrix using a function. Generates a new matrix. Performs the
+    * function on only the backing array. For example, an operation such as addition or
+    * subtraction will only be performed on the non-zero values in a `SparseMatrix`. */
+  private[mllib] def map(f: Double => Double): Matrix
+
+  /** Update all the values of this matrix using the function f. Performed in-place on the
+    * backing array. For example, an operation such as addition or subtraction will only be
+    * performed on the non-zero values in a `SparseMatrix`. */
+  private[mllib] def update(f: Double => Double): Matrix
+
+  /**
+   * Applies a function `f` to all the active elements of dense and sparse matrix. The ordering
+   * of the elements are not defined.
+   *
+   * @param f the function takes three parameters where the first two parameters are the row
+   *          and column indices respectively with the type `Int`, and the final parameter is the
+   *          corresponding value in the matrix with type `Double`.
+   */
+  private[spark] def foreachActive(f: (Int, Int, Double) => Unit)
 }
 
 /**
@@ -97,14 +115,36 @@ sealed trait Matrix extends Serializable {
  *
  * @param numRows number of rows
  * @param numCols number of columns
- * @param values matrix entries in column major
+ * @param values matrix entries in column major if not transposed or in row major otherwise
+ * @param isTransposed whether the matrix is transposed. If true, `values` stores the matrix in
+ *                     row major.
  */
-class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) extends Matrix {
+class DenseMatrix(
+    val numRows: Int,
+    val numCols: Int,
+    val values: Array[Double],
+    override val isTransposed: Boolean) extends Matrix {
 
   require(values.length == numRows * numCols, "The number of values supplied doesn't match the " +
     s"size of the matrix! values.length: ${values.length}, numRows * numCols: ${numRows * numCols}")
 
-  override def toArray: Array[Double] = values
+  /**
+   * Column-major dense matrix.
+   * The entry values are stored in a single array of doubles with columns listed in sequence.
+   * For example, the following matrix
+   * {{{
+   *   1.0 2.0
+   *   3.0 4.0
+   *   5.0 6.0
+   * }}}
+   * is stored as `[1.0, 3.0, 5.0, 2.0, 4.0, 6.0]`.
+   *
+   * @param numRows number of rows
+   * @param numCols number of columns
+   * @param values matrix entries in column major
+   */
+  def this(numRows: Int, numCols: Int, values: Array[Double]) =
+    this(numRows, numCols, values, false)
 
   override def equals(o: Any) = o match {
     case m: DenseMatrix =>
@@ -112,19 +152,176 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double])
     case _ => false
   }
 
-  private[mllib] def toBreeze: BM[Double] = new BDM[Double](numRows, numCols, values)
+  private[mllib] def toBreeze: BM[Double] = {
+    if (!isTransposed) {
+      new BDM[Double](numRows, numCols, values)
+    } else {
+      val breezeMatrix = new BDM[Double](numCols, numRows, values)
+      breezeMatrix.t
+    }
+  }
 
   private[mllib] def apply(i: Int): Double = values(i)
 
   private[mllib] def apply(i: Int, j: Int): Double = values(index(i, j))
 
-  private[mllib] def index(i: Int, j: Int): Int = i + numRows * j
+  private[mllib] def index(i: Int, j: Int): Int = {
+    if (!isTransposed) i + numRows * j else j + numCols * i
+  }
 
   private[mllib] def update(i: Int, j: Int, v: Double): Unit = {
     values(index(i, j)) = v
   }
 
   override def copy = new DenseMatrix(numRows, numCols, values.clone())
+
+  private[mllib] def map(f: Double => Double) = new DenseMatrix(numRows, numCols, values.map(f))
+
+  private[mllib] def update(f: Double => Double): DenseMatrix = {
+    val len = values.length
+    var i = 0
+    while (i < len) {
+      values(i) = f(values(i))
+      i += 1
+    }
+    this
+  }
+
+  override def transpose: DenseMatrix = new DenseMatrix(numCols, numRows, values, !isTransposed)
+
+  private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = {
+    if (!isTransposed) {
+      // outer loop over columns
+      var j = 0
+      while (j < numCols) {
+        var i = 0
+        val indStart = j * numRows
+        while (i < numRows) {
+          f(i, j, values(indStart + i))
+          i += 1
+        }
+        j += 1
+      }
+    } else {
+      // outer loop over rows
+      var i = 0
+      while (i < numRows) {
+        var j = 0
+        val indStart = i * numCols
+        while (j < numCols) {
+          f(i, j, values(indStart + j))
+          j += 1
+        }
+        i += 1
+      }
+    }
+  }
+
+  /**
+   * Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed
+   * set to false.
+   */
+  def toSparse: SparseMatrix = {
+    val spVals: MArrayBuilder[Double] = new MArrayBuilder.ofDouble
+    val colPtrs: Array[Int] = new Array[Int](numCols + 1)
+    val rowIndices: MArrayBuilder[Int] = new MArrayBuilder.ofInt
+    var nnz = 0
+    var j = 0
+    while (j < numCols) {
+      var i = 0
+      while (i < numRows) {
+        val v = values(index(i, j))
+        if (v != 0.0) {
+          rowIndices += i
+          spVals += v
+          nnz += 1
+        }
+        i += 1
+      }
+      j += 1
+      colPtrs(j) = nnz
+    }
+    new SparseMatrix(numRows, numCols, colPtrs, rowIndices.result(), spVals.result())
+  }
+}
+
+/**
+ * Factory methods for [[org.apache.spark.mllib.linalg.DenseMatrix]].
+ */
+object DenseMatrix {
+
+  /**
+   * Generate a `DenseMatrix` consisting of zeros.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @return `DenseMatrix` with size `numRows` x `numCols` and values of zeros
+   */
+  def zeros(numRows: Int, numCols: Int): DenseMatrix =
+    new DenseMatrix(numRows, numCols, new Array[Double](numRows * numCols))
+
+  /**
+   * Generate a `DenseMatrix` consisting of ones.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @return `DenseMatrix` with size `numRows` x `numCols` and values of ones
+   */
+  def ones(numRows: Int, numCols: Int): DenseMatrix =
+    new DenseMatrix(numRows, numCols, Array.fill(numRows * numCols)(1.0))
+
+  /**
+   * Generate an Identity Matrix in `DenseMatrix` format.
+   * @param n number of rows and columns of the matrix
+   * @return `DenseMatrix` with size `n` x `n` and values of ones on the diagonal
+   */
+  def eye(n: Int): DenseMatrix = {
+    val identity = DenseMatrix.zeros(n, n)
+    var i = 0
+    while (i < n) {
+      identity.update(i, i, 1.0)
+      i += 1
+    }
+    identity
+  }
+
+  /**
+   * Generate a `DenseMatrix` consisting of `i.i.d.` uniform random numbers.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @param rng a random number generator
+   * @return `DenseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
+   */
+  def rand(numRows: Int, numCols: Int, rng: Random): DenseMatrix = {
+    new DenseMatrix(numRows, numCols, Array.fill(numRows * numCols)(rng.nextDouble()))
+  }
+
+  /**
+   * Generate a `DenseMatrix` consisting of `i.i.d.` gaussian random numbers.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @param rng a random number generator
+   * @return `DenseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
+   */
+  def randn(numRows: Int, numCols: Int, rng: Random): DenseMatrix = {
+    new DenseMatrix(numRows, numCols, Array.fill(numRows * numCols)(rng.nextGaussian()))
+  }
+
+  /**
+   * Generate a diagonal matrix in `DenseMatrix` format from the supplied values.
+   * @param vector a `Vector` that will form the values on the diagonal of the matrix
+   * @return Square `DenseMatrix` with size `values.length` x `values.length` and `values`
+   *         on the diagonal
+   */
+  def diag(vector: Vector): DenseMatrix = {
+    val n = vector.size
+    val matrix = DenseMatrix.zeros(n, n)
+    val values = vector.toArray
+    var i = 0
+    while (i < n) {
+      matrix.update(i, i, values(i))
+      i += 1
+    }
+    matrix
+  }
 }
 
 /**
@@ -141,64 +338,309 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double])
  *
  * @param numRows number of rows
  * @param numCols number of columns
- * @param colPtrs the index corresponding to the start of a new column
- * @param rowIndices the row index of the entry. They must be in strictly increasing order for each
- *                   column
- * @param values non-zero matrix entries in column major
+ * @param colPtrs the index corresponding to the start of a new column (if not transposed)
+ * @param rowIndices the row index of the entry (if not transposed). They must be in strictly
+ *                   increasing order for each column
+ * @param values nonzero matrix entries in column major (if not transposed)
+ * @param isTransposed whether the matrix is transposed. If true, the matrix can be considered
+ *                     Compressed Sparse Row (CSR) format, where `colPtrs` behaves as rowPtrs,
+ *                     and `rowIndices` behave as colIndices, and `values` are stored in row major.
  */
 class SparseMatrix(
     val numRows: Int,
     val numCols: Int,
     val colPtrs: Array[Int],
     val rowIndices: Array[Int],
-    val values: Array[Double]) extends Matrix {
+    val values: Array[Double],
+    override val isTransposed: Boolean) extends Matrix {
 
   require(values.length == rowIndices.length, "The number of row indices and values don't match! " +
     s"values.length: ${values.length}, rowIndices.length: ${rowIndices.length}")
-  require(colPtrs.length == numCols + 1, "The length of the column indices should be the " +
-    s"number of columns + 1. Currently, colPointers.length: ${colPtrs.length}, " +
-    s"numCols: $numCols")
+  // The Or statement is for the case when the matrix is transposed
+  require(colPtrs.length == numCols + 1 || colPtrs.length == numRows + 1, "The length of the " +
+    "column indices should be the number of columns + 1. Currently, colPointers.length: " +
+    s"${colPtrs.length}, numCols: $numCols")
+  require(values.length == colPtrs.last, "The last value of colPtrs must equal the number of " +
+    s"elements. values.length: ${values.length}, colPtrs.last: ${colPtrs.last}")
 
-  override def toArray: Array[Double] = {
-    val arr = new Array[Double](numRows * numCols)
-    var j = 0
-    while (j < numCols) {
-      var i = colPtrs(j)
-      val indEnd = colPtrs(j + 1)
-      val offset = j * numRows
-      while (i < indEnd) {
-        val rowIndex = rowIndices(i)
-        arr(offset + rowIndex) = values(i)
-        i += 1
-      }
-      j += 1
-    }
-    arr
+  /**
+   * Column-major sparse matrix.
+   * The entry values are stored in Compressed Sparse Column (CSC) format.
+   * For example, the following matrix
+   * {{{
+   *   1.0 0.0 4.0
+   *   0.0 3.0 5.0
+   *   2.0 0.0 6.0
+   * }}}
+   * is stored as `values: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]`,
+   * `rowIndices=[0, 2, 1, 0, 1, 2]`, `colPointers=[0, 2, 3, 6]`.
+   *
+   * @param numRows number of rows
+   * @param numCols number of columns
+   * @param colPtrs the index corresponding to the start of a new column
+   * @param rowIndices the row index of the entry. They must be in strictly increasing
+   *                   order for each column
+   * @param values non-zero matrix entries in column major
+   */
+  def this(
+      numRows: Int,
+      numCols: Int,
+      colPtrs: Array[Int],
+      rowIndices: Array[Int],
+      values: Array[Double]) = this(numRows, numCols, colPtrs, rowIndices, values, false)
+
+  private[mllib] def toBreeze: BM[Double] = {
+     if (!isTransposed) {
+       new BSM[Double](values, numRows, numCols, colPtrs, rowIndices)
+     } else {
+       val breezeMatrix = new BSM[Double](values, numCols, numRows, colPtrs, rowIndices)
+       breezeMatrix.t
+     }
   }
 
-  private[mllib] def toBreeze: BM[Double] =
-    new BSM[Double](values, numRows, numCols, colPtrs, rowIndices)
-
   private[mllib] def apply(i: Int, j: Int): Double = {
     val ind = index(i, j)
     if (ind < 0) 0.0 else values(ind)
   }
 
   private[mllib] def index(i: Int, j: Int): Int = {
-    Arrays.binarySearch(rowIndices, colPtrs(j), colPtrs(j + 1), i)
+    if (!isTransposed) {
+      Arrays.binarySearch(rowIndices, colPtrs(j), colPtrs(j + 1), i)
+    } else {
+      Arrays.binarySearch(rowIndices, colPtrs(i), colPtrs(i + 1), j)
+    }
   }
 
   private[mllib] def update(i: Int, j: Int, v: Double): Unit = {
     val ind = index(i, j)
-    if (ind == -1){
+    if (ind == -1) {
       throw new NoSuchElementException("The given row and column indices correspond to a zero " +
         "value. Only non-zero elements in Sparse Matrices can be updated.")
     } else {
-      values(index(i, j)) = v
+      values(ind) = v
     }
   }
 
   override def copy = new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.clone())
+
+  private[mllib] def map(f: Double => Double) =
+    new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.map(f))
+
+  private[mllib] def update(f: Double => Double): SparseMatrix = {
+    val len = values.length
+    var i = 0
+    while (i < len) {
+      values(i) = f(values(i))
+      i += 1
+    }
+    this
+  }
+
+  override def transpose: SparseMatrix =
+    new SparseMatrix(numCols, numRows, colPtrs, rowIndices, values, !isTransposed)
+
+  private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = {
+    if (!isTransposed) {
+      var j = 0
+      while (j < numCols) {
+        var idx = colPtrs(j)
+        val idxEnd = colPtrs(j + 1)
+        while (idx < idxEnd) {
+          f(rowIndices(idx), j, values(idx))
+          idx += 1
+        }
+        j += 1
+      }
+    } else {
+      var i = 0
+      while (i < numRows) {
+        var idx = colPtrs(i)
+        val idxEnd = colPtrs(i + 1)
+        while (idx < idxEnd) {
+          val j = rowIndices(idx)
+          f(i, j, values(idx))
+          idx += 1
+        }
+        i += 1
+      }
+    }
+  }
+
+  /**
+   * Generate a `DenseMatrix` from the given `SparseMatrix`. The new matrix will have isTransposed
+   * set to false.
+   */
+  def toDense: DenseMatrix = {
+    new DenseMatrix(numRows, numCols, toArray)
+  }
+}
+
+/**
+ * Factory methods for [[org.apache.spark.mllib.linalg.SparseMatrix]].
+ */
+object SparseMatrix {
+
+  /**
+   * Generate a `SparseMatrix` from Coordinate List (COO) format. Input must be an array of
+   * (i, j, value) tuples. Entries that have duplicate values of i and j are
+   * added together. Tuples where value is equal to zero will be omitted.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @param entries Array of (i, j, value) tuples
+   * @return The corresponding `SparseMatrix`
+   */
+  def fromCOO(numRows: Int, numCols: Int, entries: Iterable[(Int, Int, Double)]): SparseMatrix = {
+    val sortedEntries = entries.toSeq.sortBy(v => (v._2, v._1))
+    val numEntries = sortedEntries.size
+    if (sortedEntries.nonEmpty) {
+      // Since the entries are sorted by column index, we only need to check the first and the last.
+      for (col <- Seq(sortedEntries.head._2, sortedEntries.last._2)) {
+        require(col >= 0 && col < numCols, s"Column index out of range [0, $numCols): $col.")
+      }
+    }
+    val colPtrs = new Array[Int](numCols + 1)
+    val rowIndices = MArrayBuilder.make[Int]
+    rowIndices.sizeHint(numEntries)
+    val values = MArrayBuilder.make[Double]
+    values.sizeHint(numEntries)
+    var nnz = 0
+    var prevCol = 0
+    var prevRow = -1
+    var prevVal = 0.0
+    // Append a dummy entry to include the last one at the end of the loop.
+    (sortedEntries.view :+ (numRows, numCols, 1.0)).foreach { case (i, j, v) =>
+      if (v != 0) {
+        if (i == prevRow && j == prevCol) {
+          prevVal += v
+        } else {
+          if (prevVal != 0) {
+            require(prevRow >= 0 && prevRow < numRows,
+              s"Row index out of range [0, $numRows): $prevRow.")
+            nnz += 1
+            rowIndices += prevRow
+            values += prevVal
+          }
+          prevRow = i
+          prevVal = v
+          while (prevCol < j) {
+            colPtrs(prevCol + 1) = nnz
+            prevCol += 1
+          }
+        }
+      }
+    }
+    new SparseMatrix(numRows, numCols, colPtrs, rowIndices.result(), values.result())
+  }
+
+  /**
+   * Generate an Identity Matrix in `SparseMatrix` format.
+   * @param n number of rows and columns of the matrix
+   * @return `SparseMatrix` with size `n` x `n` and values of ones on the diagonal
+   */
+  def speye(n: Int): SparseMatrix = {
+    new SparseMatrix(n, n, (0 to n).toArray, (0 until n).toArray, Array.fill(n)(1.0))
+  }
+
+  /**
+   * Generates the skeleton of a random `SparseMatrix` with a given random number generator.
+   * The values of the matrix returned are undefined.
+   */
+  private def genRandMatrix(
+      numRows: Int,
+      numCols: Int,
+      density: Double,
+      rng: Random): SparseMatrix = {
+    require(numRows > 0, s"numRows must be greater than 0 but got $numRows")
+    require(numCols > 0, s"numCols must be greater than 0 but got $numCols")
+    require(density >= 0.0 && density <= 1.0,
+      s"density must be a double in the range 0.0 <= d <= 1.0. Currently, density: $density")
+    val size = numRows.toLong * numCols
+    val expected = size * density
+    assert(expected < Int.MaxValue,
+      "The expected number of nonzeros cannot be greater than Int.MaxValue.")
+    val nnz = math.ceil(expected).toInt
+    if (density == 0.0) {
+      new SparseMatrix(numRows, numCols, new Array[Int](numCols + 1), Array[Int](), Array[Double]())
+    } else if (density == 1.0) {
+      val colPtrs = Array.tabulate(numCols + 1)(j => j * numRows)
+      val rowIndices = Array.tabulate(size.toInt)(idx => idx % numRows)
+      new SparseMatrix(numRows, numCols, colPtrs, rowIndices, new Array[Double](numRows * numCols))
+    } else if (density < 0.34) {
+      // draw-by-draw, expected number of iterations is less than 1.5 * nnz
+      val entries = MHashSet[(Int, Int)]()
+      while (entries.size < nnz) {
+        entries += ((rng.nextInt(numRows), rng.nextInt(numCols)))
+      }
+      SparseMatrix.fromCOO(numRows, numCols, entries.map(v => (v._1, v._2, 1.0)))
+    } else {
+      // selection-rejection method
+      var idx = 0L
+      var numSelected = 0
+      var j = 0
+      val colPtrs = new Array[Int](numCols + 1)
+      val rowIndices = new Array[Int](nnz)
+      while (j < numCols && numSelected < nnz) {
+        var i = 0
+        while (i < numRows && numSelected < nnz) {
+          if (rng.nextDouble() < 1.0 * (nnz - numSelected) / (size - idx)) {
+            rowIndices(numSelected) = i
+            numSelected += 1
+          }
+          i += 1
+          idx += 1
+        }
+        colPtrs(j + 1) = numSelected
+        j += 1
+      }
+      new SparseMatrix(numRows, numCols, colPtrs, rowIndices, new Array[Double](nnz))
+    }
+  }
+
+  /**
+   * Generate a `SparseMatrix` consisting of `i.i.d`. uniform random numbers. The number of non-zero
+   * elements equal the ceiling of `numRows` x `numCols` x `density`
+   *
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @param density the desired density for the matrix
+   * @param rng a random number generator
+   * @return `SparseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
+   */
+  def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = {
+    val mat = genRandMatrix(numRows, numCols, density, rng)
+    mat.update(i => rng.nextDouble())
+  }
+
+  /**
+   * Generate a `SparseMatrix` consisting of `i.i.d`. gaussian random numbers.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @param density the desired density for the matrix
+   * @param rng a random number generator
+   * @return `SparseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
+   */
+  def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = {
+    val mat = genRandMatrix(numRows, numCols, density, rng)
+    mat.update(i => rng.nextGaussian())
+  }
+
+  /**
+   * Generate a diagonal matrix in `SparseMatrix` format from the supplied values.
+   * @param vector a `Vector` that will form the values on the diagonal of the matrix
+   * @return Square `SparseMatrix` with size `values.length` x `values.length` and non-zero
+   *         `values` on the diagonal
+   */
+  def spdiag(vector: Vector): SparseMatrix = {
+    val n = vector.size
+    vector match {
+      case sVec: SparseVector =>
+        SparseMatrix.fromCOO(n, n, sVec.indices.zip(sVec.values).map(v => (v._1, v._1, v._2)))
+      case dVec: DenseVector =>
+        val entries = dVec.values.zipWithIndex
+        val nnzVals = entries.filter(v => v._1 != 0.0)
+        SparseMatrix.fromCOO(n, n, nnzVals.map(v => (v._2, v._2, v._1)))
+    }
+  }
 }
 
 /**
@@ -243,10 +685,9 @@ object Matrices {
   private[mllib] def fromBreeze(breeze: BM[Double]): Matrix = {
     breeze match {
       case dm: BDM[Double] =>
-        require(dm.majorStride == dm.rows,
-          "Do not support stride size different from the number of rows.")
-        new DenseMatrix(dm.rows, dm.cols, dm.data)
+        new DenseMatrix(dm.rows, dm.cols, dm.data, dm.isTranspose)
       case sm: BSM[Double] =>
+        // There is no isTranspose flag for sparse matrices in Breeze
         new SparseMatrix(sm.rows, sm.cols, sm.colPtrs, sm.rowIndices, sm.data)
       case _ =>
         throw new UnsupportedOperationException(
@@ -258,72 +699,206 @@ object Matrices {
    * Generate a `DenseMatrix` consisting of zeros.
    * @param numRows number of rows of the matrix
    * @param numCols number of columns of the matrix
-   * @return `DenseMatrix` with size `numRows` x `numCols` and values of zeros
+   * @return `Matrix` with size `numRows` x `numCols` and values of zeros
    */
-  def zeros(numRows: Int, numCols: Int): Matrix =
-    new DenseMatrix(numRows, numCols, new Array[Double](numRows * numCols))
+  def zeros(numRows: Int, numCols: Int): Matrix = DenseMatrix.zeros(numRows, numCols)
 
   /**
    * Generate a `DenseMatrix` consisting of ones.
    * @param numRows number of rows of the matrix
    * @param numCols number of columns of the matrix
-   * @return `DenseMatrix` with size `numRows` x `numCols` and values of ones
+   * @return `Matrix` with size `numRows` x `numCols` and values of ones
    */
-  def ones(numRows: Int, numCols: Int): Matrix =
-    new DenseMatrix(numRows, numCols, Array.fill(numRows * numCols)(1.0))
+  def ones(numRows: Int, numCols: Int): Matrix = DenseMatrix.ones(numRows, numCols)
 
   /**
-   * Generate an Identity Matrix in `DenseMatrix` format.
+   * Generate a dense Identity Matrix in `Matrix` format.
    * @param n number of rows and columns of the matrix
-   * @return `DenseMatrix` with size `n` x `n` and values of ones on the diagonal
+   * @return `Matrix` with size `n` x `n` and values of ones on the diagonal
    */
-  def eye(n: Int): Matrix = {
-    val identity = Matrices.zeros(n, n)
-    var i = 0
-    while (i < n){
-      identity.update(i, i, 1.0)
-      i += 1
-    }
-    identity
-  }
+  def eye(n: Int): Matrix = DenseMatrix.eye(n)
+
+  /**
+   * Generate a sparse Identity Matrix in `Matrix` format.
+   * @param n number of rows and columns of the matrix
+   * @return `Matrix` with size `n` x `n` and values of ones on the diagonal
+   */
+  def speye(n: Int): Matrix = SparseMatrix.speye(n)
 
   /**
-   * Generate a `DenseMatrix` consisting of i.i.d. uniform random numbers.
+   * Generate a `DenseMatrix` consisting of `i.i.d.` uniform random numbers.
    * @param numRows number of rows of the matrix
    * @param numCols number of columns of the matrix
-   * @return `DenseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
+   * @param rng a random number generator
+   * @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1)
    */
-  def rand(numRows: Int, numCols: Int): Matrix = {
-    val rand = new XORShiftRandom
-    new DenseMatrix(numRows, numCols, Array.fill(numRows * numCols)(rand.nextDouble()))
-  }
+  def rand(numRows: Int, numCols: Int, rng: Random): Matrix =
+    DenseMatrix.rand(numRows, numCols, rng)
 
   /**
-   * Generate a `DenseMatrix` consisting of i.i.d. gaussian random numbers.
+   * Generate a `SparseMatrix` consisting of `i.i.d.` gaussian random numbers.
    * @param numRows number of rows of the matrix
    * @param numCols number of columns of the matrix
-   * @return `DenseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
+   * @param density the desired density for the matrix
+   * @param rng a random number generator
+   * @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1)
    */
-  def randn(numRows: Int, numCols: Int): Matrix = {
-    val rand = new XORShiftRandom
-    new DenseMatrix(numRows, numCols, Array.fill(numRows * numCols)(rand.nextGaussian()))
-  }
+  def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix =
+    SparseMatrix.sprand(numRows, numCols, density, rng)
+
+  /**
+   * Generate a `DenseMatrix` consisting of `i.i.d.` gaussian random numbers.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @param rng a random number generator
+   * @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1)
+   */
+  def randn(numRows: Int, numCols: Int, rng: Random): Matrix =
+    DenseMatrix.randn(numRows, numCols, rng)
+
+  /**
+   * Generate a `SparseMatrix` consisting of `i.i.d.` gaussian random numbers.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @param density the desired density for the matrix
+   * @param rng a random number generator
+   * @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1)
+   */
+  def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix =
+    SparseMatrix.sprandn(numRows, numCols, density, rng)
 
   /**
    * Generate a diagonal matrix in `DenseMatrix` format from the supplied values.
    * @param vector a `Vector` tat will form the values on the diagonal of the matrix
-   * @return Square `DenseMatrix` with size `values.length` x `values.length` and `values`
+   * @return Square `Matrix` with size `values.length` x `values.length` and `values`
    *         on the diagonal
    */
-  def diag(vector: Vector): Matrix = {
-    val n = vector.size
-    val matrix = Matrices.eye(n)
-    val values = vector.toArray
-    var i = 0
-    while (i < n) {
-      matrix.update(i, i, values(i))
-      i += 1
+  def diag(vector: Vector): Matrix = DenseMatrix.diag(vector)
+
+  /**
+   * Horizontally concatenate a sequence of matrices. The returned matrix will be in the format
+   * the matrices are supplied in. Supplying a mix of dense and sparse matrices will result in
+   * a sparse matrix. If the Array is empty, an empty `DenseMatrix` will be returned.
+   * @param matrices array of matrices
+   * @return a single `Matrix` composed of the matrices that were horizontally concatenated
+   */
+  def horzcat(matrices: Array[Matrix]): Matrix = {
+    if (matrices.isEmpty) {
+      return new DenseMatrix(0, 0, Array[Double]())
+    } else if (matrices.size == 1) {
+      return matrices(0)
+    }
+    val numRows = matrices(0).numRows
+    var hasSparse = false
+    var numCols = 0
+    matrices.foreach { mat =>
+      require(numRows == mat.numRows, "The number of rows of the matrices in this sequence, " +
+        "don't match!")
+      mat match {
+        case sparse: SparseMatrix => hasSparse = true
+        case dense: DenseMatrix => // empty on purpose
+        case _ => throw new IllegalArgumentException("Unsupported matrix format. Expected " +
+          s"SparseMatrix or DenseMatrix. Instead got: ${mat.getClass}")
+      }
+      numCols += mat.numCols
+    }
+    if (!hasSparse) {
+      new DenseMatrix(numRows, numCols, matrices.flatMap(_.toArray))
+    } else {
+      var startCol = 0
+      val entries: Array[(Int, Int, Double)] = matrices.flatMap { mat =>
+        val nCols = mat.numCols
+        mat match {
+          case spMat: SparseMatrix =>
+            val data = new Array[(Int, Int, Double)](spMat.values.length)
+            var cnt = 0
+            spMat.foreachActive { (i, j, v) =>
+              data(cnt) = (i, j + startCol, v)
+              cnt += 1
+            }
+            startCol += nCols
+            data
+          case dnMat: DenseMatrix =>
+            val data = new ArrayBuffer[(Int, Int, Double)]()
+            dnMat.foreachActive { (i, j, v) =>
+              if (v != 0.0) {
+                data.append((i, j + startCol, v))
+              }
+            }
+            startCol += nCols
+            data
+        }
+      }
+      SparseMatrix.fromCOO(numRows, numCols, entries)
+    }
+  }
+
+  /**
+   * Vertically concatenate a sequence of matrices. The returned matrix will be in the format
+   * the matrices are supplied in. Supplying a mix of dense and sparse matrices will result in
+   * a sparse matrix. If the Array is empty, an empty `DenseMatrix` will be returned.
+   * @param matrices array of matrices
+   * @return a single `Matrix` composed of the matrices that were vertically concatenated
+   */
+  def vertcat(matrices: Array[Matrix]): Matrix = {
+    if (matrices.isEmpty) {
+      return new DenseMatrix(0, 0, Array[Double]())
+    } else if (matrices.size == 1) {
+      return matrices(0)
+    }
+    val numCols = matrices(0).numCols
+    var hasSparse = false
+    var numRows = 0
+    matrices.foreach { mat =>
+      require(numCols == mat.numCols, "The number of rows of the matrices in this sequence, " +
+        "don't match!")
+      mat match {
+        case sparse: SparseMatrix => hasSparse = true
+        case dense: DenseMatrix => // empty on purpose
+        case _ => throw new IllegalArgumentException("Unsupported matrix format. Expected " +
+          s"SparseMatrix or DenseMatrix. Instead got: ${mat.getClass}")
+      }
+      numRows += mat.numRows
+    }
+    if (!hasSparse) {
+      val allValues = new Array[Double](numRows * numCols)
+      var startRow = 0
+      matrices.foreach { mat =>
+        var j = 0
+        val nRows = mat.numRows
+        mat.foreachActive { (i, j, v) =>
+          val indStart = j * numRows + startRow
+          allValues(indStart + i) = v
+        }
+        startRow += nRows
+      }
+      new DenseMatrix(numRows, numCols, allValues)
+    } else {
+      var startRow = 0
+      val entries: Array[(Int, Int, Double)] = matrices.flatMap { mat =>
+        val nRows = mat.numRows
+        mat match {
+          case spMat: SparseMatrix =>
+            val data = new Array[(Int, Int, Double)](spMat.values.length)
+            var cnt = 0
+            spMat.foreachActive { (i, j, v) =>
+              data(cnt) = (i + startRow, j, v)
+              cnt += 1
+            }
+            startRow += nRows
+            data
+          case dnMat: DenseMatrix =>
+            val data = new ArrayBuffer[(Int, Int, Double)]()
+            dnMat.foreachActive { (i, j, v) =>
+              if (v != 0.0) {
+                data.append((i + startRow, j, v))
+              }
+            }
+            startRow += nRows
+            data
+        }
+      }
+      SparseMatrix.fromCOO(numRows, numCols, entries)
     }
-    matrix
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 6af225b7f49f7..77785bdbd03d9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -17,22 +17,26 @@
 
 package org.apache.spark.mllib.linalg
 
-import java.lang.{Double => JavaDouble, Integer => JavaInteger, Iterable => JavaIterable}
 import java.util
+import java.lang.{Double => JavaDouble, Integer => JavaInteger, Iterable => JavaIterable}
 
 import scala.annotation.varargs
 import scala.collection.JavaConverters._
 
 import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV}
 
-import org.apache.spark.mllib.util.NumericParser
 import org.apache.spark.SparkException
+import org.apache.spark.mllib.util.NumericParser
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
+import org.apache.spark.sql.types._
 
 /**
  * Represents a numeric vector, whose index type is Int and value type is Double.
  *
  * Note: Users should not implement this interface.
  */
+@SQLUserDefinedType(udt = classOf[VectorUDT])
 sealed trait Vector extends Serializable {
 
   /**
@@ -47,13 +51,35 @@ sealed trait Vector extends Serializable {
 
   override def equals(other: Any): Boolean = {
     other match {
-      case v: Vector =>
-        util.Arrays.equals(this.toArray, v.toArray)
+      case v2: Vector => {
+        if (this.size != v2.size) return false
+        (this, v2) match {
+          case (s1: SparseVector, s2: SparseVector) =>
+            Vectors.equals(s1.indices, s1.values, s2.indices, s2.values)
+          case (s1: SparseVector, d1: DenseVector) =>
+            Vectors.equals(s1.indices, s1.values, 0 until d1.size, d1.values)
+          case (d1: DenseVector, s1: SparseVector) =>
+            Vectors.equals(0 until d1.size, d1.values, s1.indices, s1.values)
+          case (_, _) => util.Arrays.equals(this.toArray, v2.toArray)
+        }
+      }
       case _ => false
     }
   }
 
-  override def hashCode(): Int = util.Arrays.hashCode(this.toArray)
+  override def hashCode(): Int = {
+    var result: Int = size + 31
+    this.foreachActive { case (index, value) =>
+      // ignore explict 0 for comparison between sparse and dense
+      if (value != 0) {
+        result = 31 * result + index
+        // refer to {@link java.util.Arrays.equals} for hash algorithm
+        val bits = java.lang.Double.doubleToLongBits(value)
+        result = 31 * result + (bits ^ (bits >>> 32)).toInt
+      }
+    }
+    result
+  }
 
   /**
    * Converts the instance to a breeze vector.
@@ -72,6 +98,77 @@ sealed trait Vector extends Serializable {
   def copy: Vector = {
     throw new NotImplementedError(s"copy is not implemented for ${this.getClass}.")
   }
+
+  /**
+   * Applies a function `f` to all the active elements of dense and sparse vector.
+   *
+   * @param f the function takes two parameters where the first parameter is the index of
+   *          the vector with type `Int`, and the second parameter is the corresponding value
+   *          with type `Double`.
+   */
+  private[spark] def foreachActive(f: (Int, Double) => Unit)
+}
+
+/**
+ * User-defined type for [[Vector]] which allows easy interaction with SQL
+ * via [[org.apache.spark.sql.DataFrame]].
+ */
+private[spark] class VectorUDT extends UserDefinedType[Vector] {
+
+  override def sqlType: StructType = {
+    // type: 0 = sparse, 1 = dense
+    // We only use "values" for dense vectors, and "size", "indices", and "values" for sparse
+    // vectors. The "values" field is nullable because we might want to add binary vectors later,
+    // which uses "size" and "indices", but not "values".
+    StructType(Seq(
+      StructField("type", ByteType, nullable = false),
+      StructField("size", IntegerType, nullable = true),
+      StructField("indices", ArrayType(IntegerType, containsNull = false), nullable = true),
+      StructField("values", ArrayType(DoubleType, containsNull = false), nullable = true)))
+  }
+
+  override def serialize(obj: Any): Row = {
+    val row = new GenericMutableRow(4)
+    obj match {
+      case SparseVector(size, indices, values) =>
+        row.setByte(0, 0)
+        row.setInt(1, size)
+        row.update(2, indices.toSeq)
+        row.update(3, values.toSeq)
+      case DenseVector(values) =>
+        row.setByte(0, 1)
+        row.setNullAt(1)
+        row.setNullAt(2)
+        row.update(3, values.toSeq)
+    }
+    row
+  }
+
+  override def deserialize(datum: Any): Vector = {
+    datum match {
+      // TODO: something wrong with UDT serialization
+      case v: Vector =>
+        v
+      case row: Row =>
+        require(row.length == 4,
+          s"VectorUDT.deserialize given row with length ${row.length} but requires length == 4")
+        val tpe = row.getByte(0)
+        tpe match {
+          case 0 =>
+            val size = row.getInt(1)
+            val indices = row.getAs[Iterable[Int]](2).toArray
+            val values = row.getAs[Iterable[Double]](3).toArray
+            new SparseVector(size, indices, values)
+          case 1 =>
+            val values = row.getAs[Iterable[Double]](3).toArray
+            new DenseVector(values)
+        }
+    }
+  }
+
+  override def pyUDT: String = "pyspark.mllib.linalg.VectorUDT"
+
+  override def userClass: Class[Vector] = classOf[Vector]
 }
 
 /**
@@ -147,8 +244,7 @@ object Vectors {
   }
 
   /**
-   * Parses a string resulted from `Vector#toString` into
-   * an [[org.apache.spark.mllib.linalg.Vector]].
+   * Parses a string resulted from [[Vector.toString]] into a [[Vector]].
    */
   def parse(s: String): Vector = {
     parseNumeric(NumericParser.parse(s))
@@ -171,7 +267,7 @@ object Vectors {
   private[mllib] def fromBreeze(breezeVector: BV[Double]): Vector = {
     breezeVector match {
       case v: BDV[Double] =>
-        if (v.offset == 0 && v.stride == 1) {
+        if (v.offset == 0 && v.stride == 1 && v.length == v.data.length) {
           new DenseVector(v.data)
         } else {
           new DenseVector(v.toArray)  // Can't use underlying array directly, so make a new one
@@ -186,11 +282,177 @@ object Vectors {
         sys.error("Unsupported Breeze vector type: " + v.getClass.getName)
     }
   }
+
+  /**
+   * Returns the p-norm of this vector.
+   * @param vector input vector.
+   * @param p norm.
+   * @return norm in L^p^ space.
+   */
+  def norm(vector: Vector, p: Double): Double = {
+    require(p >= 1.0)
+    val values = vector match {
+      case DenseVector(vs) => vs
+      case SparseVector(n, ids, vs) => vs
+      case v => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
+    }
+    val size = values.size
+
+    if (p == 1) {
+      var sum = 0.0
+      var i = 0
+      while (i < size) {
+        sum += math.abs(values(i))
+        i += 1
+      }
+      sum
+    } else if (p == 2) {
+      var sum = 0.0
+      var i = 0
+      while (i < size) {
+        sum += values(i) * values(i)
+        i += 1
+      }
+      math.sqrt(sum)
+    } else if (p == Double.PositiveInfinity) {
+      var max = 0.0
+      var i = 0
+      while (i < size) {
+        val value = math.abs(values(i))
+        if (value > max) max = value
+        i += 1
+      }
+      max
+    } else {
+      var sum = 0.0
+      var i = 0
+      while (i < size) {
+        sum += math.pow(math.abs(values(i)), p)
+        i += 1
+      }
+      math.pow(sum, 1.0 / p)
+    }
+  }
+
+  /**
+   * Returns the squared distance between two Vectors.
+   * @param v1 first Vector.
+   * @param v2 second Vector.
+   * @return squared distance between two Vectors.
+   */
+  def sqdist(v1: Vector, v2: Vector): Double = {
+    require(v1.size == v2.size, "vector dimension mismatch")
+    var squaredDistance = 0.0
+    (v1, v2) match {
+      case (v1: SparseVector, v2: SparseVector) =>
+        val v1Values = v1.values
+        val v1Indices = v1.indices
+        val v2Values = v2.values
+        val v2Indices = v2.indices
+        val nnzv1 = v1Indices.size
+        val nnzv2 = v2Indices.size
+
+        var kv1 = 0
+        var kv2 = 0
+        while (kv1 < nnzv1 || kv2 < nnzv2) {
+          var score = 0.0
+
+          if (kv2 >= nnzv2 || (kv1 < nnzv1 && v1Indices(kv1) < v2Indices(kv2))) {
+            score = v1Values(kv1)
+            kv1 += 1
+          } else if (kv1 >= nnzv1 || (kv2 < nnzv2 && v2Indices(kv2) < v1Indices(kv1))) {
+            score = v2Values(kv2)
+            kv2 += 1
+          } else {
+            score = v1Values(kv1) - v2Values(kv2)
+            kv1 += 1
+            kv2 += 1
+          }
+          squaredDistance += score * score
+        }
+
+      case (v1: SparseVector, v2: DenseVector) =>
+        squaredDistance = sqdist(v1, v2)
+
+      case (v1: DenseVector, v2: SparseVector) =>
+        squaredDistance = sqdist(v2, v1)
+
+      case (DenseVector(vv1), DenseVector(vv2)) =>
+        var kv = 0
+        val sz = vv1.size
+        while (kv < sz) {
+          val score = vv1(kv) - vv2(kv)
+          squaredDistance += score * score
+          kv += 1
+        }
+      case _ =>
+        throw new IllegalArgumentException("Do not support vector type " + v1.getClass +
+          " and " + v2.getClass)
+    }
+    squaredDistance
+  }
+
+  /**
+   * Returns the squared distance between DenseVector and SparseVector.
+   */
+  private[mllib] def sqdist(v1: SparseVector, v2: DenseVector): Double = {
+    var kv1 = 0
+    var kv2 = 0
+    val indices = v1.indices
+    var squaredDistance = 0.0
+    val nnzv1 = indices.size
+    val nnzv2 = v2.size
+    var iv1 = if (nnzv1 > 0) indices(kv1) else -1
+
+    while (kv2 < nnzv2) {
+      var score = 0.0
+      if (kv2 != iv1) {
+        score = v2(kv2)
+      } else {
+        score = v1.values(kv1) - v2(kv2)
+        if (kv1 < nnzv1 - 1) {
+          kv1 += 1
+          iv1 = indices(kv1)
+        }
+      }
+      squaredDistance += score * score
+      kv2 += 1
+    }
+    squaredDistance
+  }
+
+  /**
+   * Check equality between sparse/dense vectors
+   */
+  private[mllib] def equals(
+      v1Indices: IndexedSeq[Int],
+      v1Values: Array[Double],
+      v2Indices: IndexedSeq[Int],
+      v2Values: Array[Double]): Boolean = {
+    val v1Size = v1Values.size
+    val v2Size = v2Values.size
+    var k1 = 0
+    var k2 = 0
+    var allEqual = true
+    while (allEqual) {
+      while (k1 < v1Size && v1Values(k1) == 0) k1 += 1
+      while (k2 < v2Size && v2Values(k2) == 0) k2 += 1
+
+      if (k1 >= v1Size || k2 >= v2Size) {
+        return k1 >= v1Size && k2 >= v2Size // check end alignment
+      }
+      allEqual = v1Indices(k1) == v2Indices(k2) && v1Values(k1) == v2Values(k2)
+      k1 += 1
+      k2 += 1
+    }
+    allEqual
+  }
 }
 
 /**
  * A dense vector represented by a value array.
  */
+@SQLUserDefinedType(udt = classOf[VectorUDT])
 class DenseVector(val values: Array[Double]) extends Vector {
 
   override def size: Int = values.length
@@ -206,6 +468,22 @@ class DenseVector(val values: Array[Double]) extends Vector {
   override def copy: DenseVector = {
     new DenseVector(values.clone())
   }
+
+  private[spark] override def foreachActive(f: (Int, Double) => Unit) = {
+    var i = 0
+    val localValuesSize = values.size
+    val localValues = values
+
+    while (i < localValuesSize) {
+      f(i, localValues(i))
+      i += 1
+    }
+  }
+}
+
+object DenseVector {
+  /** Extracts the value array from a dense vector. */
+  def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values)
 }
 
 /**
@@ -215,6 +493,7 @@ class DenseVector(val values: Array[Double]) extends Vector {
  * @param indices index array, assume to be strictly increasing.
  * @param values value array, must have the same length as the index array.
  */
+@SQLUserDefinedType(udt = classOf[VectorUDT])
 class SparseVector(
     override val size: Int,
     val indices: Array[Int],
@@ -241,4 +520,21 @@ class SparseVector(
   }
 
   private[mllib] override def toBreeze: BV[Double] = new BSV[Double](indices, values, size)
+
+  private[spark] override def foreachActive(f: (Int, Double) => Unit) = {
+    var i = 0
+    val localValuesSize = values.size
+    val localIndices = indices
+    val localValues = values
+
+    while (i < localValuesSize) {
+      f(localIndices(i), localValues(i))
+      i += 1
+    }
+  }
+}
+
+object SparseVector {
+  def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])] =
+    Some((sv.size, sv.indices, sv.values))
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
new file mode 100644
index 0000000000000..1d253963130f1
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -0,0 +1,378 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.linalg.distributed
+
+import scala.collection.mutable.ArrayBuffer
+
+import breeze.linalg.{DenseMatrix => BDM}
+
+import org.apache.spark.{Logging, Partitioner, SparkException}
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, Matrix, SparseMatrix}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+
+/**
+ * A grid partitioner, which uses a regular grid to partition coordinates.
+ *
+ * @param rows Number of rows.
+ * @param cols Number of columns.
+ * @param rowsPerPart Number of rows per partition, which may be less at the bottom edge.
+ * @param colsPerPart Number of columns per partition, which may be less at the right edge.
+ */
+private[mllib] class GridPartitioner(
+    val rows: Int,
+    val cols: Int,
+    val rowsPerPart: Int,
+    val colsPerPart: Int) extends Partitioner {
+
+  require(rows > 0)
+  require(cols > 0)
+  require(rowsPerPart > 0)
+  require(colsPerPart > 0)
+
+  private val rowPartitions = math.ceil(rows * 1.0 / rowsPerPart).toInt
+  private val colPartitions = math.ceil(cols * 1.0 / colsPerPart).toInt
+
+  override val numPartitions = rowPartitions * colPartitions
+
+  /**
+   * Returns the index of the partition the input coordinate belongs to.
+   *
+   * @param key The coordinate (i, j) or a tuple (i, j, k), where k is the inner index used in
+   *            multiplication. k is ignored in computing partitions.
+   * @return The index of the partition, which the coordinate belongs to.
+   */
+  override def getPartition(key: Any): Int = {
+    key match {
+      case (i: Int, j: Int) =>
+        getPartitionId(i, j)
+      case (i: Int, j: Int, _: Int) =>
+        getPartitionId(i, j)
+      case _ =>
+        throw new IllegalArgumentException(s"Unrecognized key: $key.")
+    }
+  }
+
+  /** Partitions sub-matrices as blocks with neighboring sub-matrices. */
+  private def getPartitionId(i: Int, j: Int): Int = {
+    require(0 <= i && i < rows, s"Row index $i out of range [0, $rows).")
+    require(0 <= j && j < cols, s"Column index $j out of range [0, $cols).")
+    i / rowsPerPart + j / colsPerPart * rowPartitions
+  }
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case r: GridPartitioner =>
+        (this.rows == r.rows) && (this.cols == r.cols) &&
+          (this.rowsPerPart == r.rowsPerPart) && (this.colsPerPart == r.colsPerPart)
+      case _ =>
+        false
+    }
+  }
+}
+
+private[mllib] object GridPartitioner {
+
+  /** Creates a new [[GridPartitioner]] instance. */
+  def apply(rows: Int, cols: Int, rowsPerPart: Int, colsPerPart: Int): GridPartitioner = {
+    new GridPartitioner(rows, cols, rowsPerPart, colsPerPart)
+  }
+
+  /** Creates a new [[GridPartitioner]] instance with the input suggested number of partitions. */
+  def apply(rows: Int, cols: Int, suggestedNumPartitions: Int): GridPartitioner = {
+    require(suggestedNumPartitions > 0)
+    val scale = 1.0 / math.sqrt(suggestedNumPartitions)
+    val rowsPerPart = math.round(math.max(scale * rows, 1.0)).toInt
+    val colsPerPart = math.round(math.max(scale * cols, 1.0)).toInt
+    new GridPartitioner(rows, cols, rowsPerPart, colsPerPart)
+  }
+}
+
+/**
+ * :: Experimental ::
+ *
+ * Represents a distributed matrix in blocks of local matrices.
+ *
+ * @param blocks The RDD of sub-matrix blocks ((blockRowIndex, blockColIndex), sub-matrix) that
+ *               form this distributed matrix. If multiple blocks with the same index exist, the
+ *               results for operations like add and multiply will be unpredictable.
+ * @param rowsPerBlock Number of rows that make up each block. The blocks forming the final
+ *                     rows are not required to have the given number of rows
+ * @param colsPerBlock Number of columns that make up each block. The blocks forming the final
+ *                     columns are not required to have the given number of columns
+ * @param nRows Number of rows of this matrix. If the supplied value is less than or equal to zero,
+ *              the number of rows will be calculated when `numRows` is invoked.
+ * @param nCols Number of columns of this matrix. If the supplied value is less than or equal to
+ *              zero, the number of columns will be calculated when `numCols` is invoked.
+ */
+@Experimental
+class BlockMatrix(
+    val blocks: RDD[((Int, Int), Matrix)],
+    val rowsPerBlock: Int,
+    val colsPerBlock: Int,
+    private var nRows: Long,
+    private var nCols: Long) extends DistributedMatrix with Logging {
+
+  private type MatrixBlock = ((Int, Int), Matrix) // ((blockRowIndex, blockColIndex), sub-matrix)
+
+  /**
+   * Alternate constructor for BlockMatrix without the input of the number of rows and columns.
+   *
+   * @param blocks The RDD of sub-matrix blocks ((blockRowIndex, blockColIndex), sub-matrix) that
+   *               form this distributed matrix. If multiple blocks with the same index exist, the
+   *               results for operations like add and multiply will be unpredictable.
+   * @param rowsPerBlock Number of rows that make up each block. The blocks forming the final
+   *                     rows are not required to have the given number of rows
+   * @param colsPerBlock Number of columns that make up each block. The blocks forming the final
+   *                     columns are not required to have the given number of columns
+   */
+  def this(
+      blocks: RDD[((Int, Int), Matrix)],
+      rowsPerBlock: Int,
+      colsPerBlock: Int) = {
+    this(blocks, rowsPerBlock, colsPerBlock, 0L, 0L)
+  }
+
+  override def numRows(): Long = {
+    if (nRows <= 0L) estimateDim()
+    nRows
+  }
+
+  override def numCols(): Long = {
+    if (nCols <= 0L) estimateDim()
+    nCols
+  }
+
+  val numRowBlocks = math.ceil(numRows() * 1.0 / rowsPerBlock).toInt
+  val numColBlocks = math.ceil(numCols() * 1.0 / colsPerBlock).toInt
+
+  private[mllib] def createPartitioner(): GridPartitioner =
+    GridPartitioner(numRowBlocks, numColBlocks, suggestedNumPartitions = blocks.partitions.size)
+
+  private lazy val blockInfo = blocks.mapValues(block => (block.numRows, block.numCols)).cache()
+
+  /** Estimates the dimensions of the matrix. */
+  private def estimateDim(): Unit = {
+    val (rows, cols) = blockInfo.map { case ((blockRowIndex, blockColIndex), (m, n)) =>
+      (blockRowIndex.toLong * rowsPerBlock + m,
+        blockColIndex.toLong * colsPerBlock + n)
+    }.reduce { (x0, x1) =>
+      (math.max(x0._1, x1._1), math.max(x0._2, x1._2))
+    }
+    if (nRows <= 0L) nRows = rows
+    assert(rows <= nRows, s"The number of rows $rows is more than claimed $nRows.")
+    if (nCols <= 0L) nCols = cols
+    assert(cols <= nCols, s"The number of columns $cols is more than claimed $nCols.")
+  }
+
+  /**
+   * Validates the block matrix info against the matrix data (`blocks`) and throws an exception if
+   * any error is found.
+   */
+  def validate(): Unit = {
+    logDebug("Validating BlockMatrix...")
+    // check if the matrix is larger than the claimed dimensions
+    estimateDim()
+    logDebug("BlockMatrix dimensions are okay...")
+
+    // Check if there are multiple MatrixBlocks with the same index.
+    blockInfo.countByKey().foreach { case (key, cnt) =>
+      if (cnt > 1) {
+        throw new SparkException(s"Found multiple MatrixBlocks with the indices $key. Please " +
+          "remove blocks with duplicate indices.")
+      }
+    }
+    logDebug("MatrixBlock indices are okay...")
+    // Check if each MatrixBlock (except edges) has the dimensions rowsPerBlock x colsPerBlock
+    // The first tuple is the index and the second tuple is the dimensions of the MatrixBlock
+    val dimensionMsg = s"dimensions different than rowsPerBlock: $rowsPerBlock, and " +
+      s"colsPerBlock: $colsPerBlock. Blocks on the right and bottom edges can have smaller " +
+      s"dimensions. You may use the repartition method to fix this issue."
+    blockInfo.foreach { case ((blockRowIndex, blockColIndex), (m, n)) =>
+      if ((blockRowIndex < numRowBlocks - 1 && m != rowsPerBlock) ||
+          (blockRowIndex == numRowBlocks - 1 && (m <= 0 || m > rowsPerBlock))) {
+        throw new SparkException(s"The MatrixBlock at ($blockRowIndex, $blockColIndex) has " +
+          dimensionMsg)
+      }
+      if ((blockColIndex < numColBlocks - 1 && n != colsPerBlock) ||
+        (blockColIndex == numColBlocks - 1 && (n <= 0 || n > colsPerBlock))) {
+        throw new SparkException(s"The MatrixBlock at ($blockRowIndex, $blockColIndex) has " +
+          dimensionMsg)
+      }
+    }
+    logDebug("MatrixBlock dimensions are okay...")
+    logDebug("BlockMatrix is valid!")
+  }
+
+  /** Caches the underlying RDD. */
+  def cache(): this.type = {
+    blocks.cache()
+    this
+  }
+
+  /** Persists the underlying RDD with the specified storage level. */
+  def persist(storageLevel: StorageLevel): this.type = {
+    blocks.persist(storageLevel)
+    this
+  }
+
+  /** Converts to CoordinateMatrix. */
+  def toCoordinateMatrix(): CoordinateMatrix = {
+    val entryRDD = blocks.flatMap { case ((blockRowIndex, blockColIndex), mat) =>
+      val rowStart = blockRowIndex.toLong * rowsPerBlock
+      val colStart = blockColIndex.toLong * colsPerBlock
+      val entryValues = new ArrayBuffer[MatrixEntry]()
+      mat.foreachActive { (i, j, v) =>
+        if (v != 0.0) entryValues.append(new MatrixEntry(rowStart + i, colStart + j, v))
+      }
+      entryValues
+    }
+    new CoordinateMatrix(entryRDD, numRows(), numCols())
+  }
+
+  /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */
+  def toIndexedRowMatrix(): IndexedRowMatrix = {
+    require(numCols() < Int.MaxValue, "The number of columns must be within the integer range. " +
+      s"numCols: ${numCols()}")
+    // TODO: This implementation may be optimized
+    toCoordinateMatrix().toIndexedRowMatrix()
+  }
+
+  /** Collect the distributed matrix on the driver as a `DenseMatrix`. */
+  def toLocalMatrix(): Matrix = {
+    require(numRows() < Int.MaxValue, "The number of rows of this matrix should be less than " +
+      s"Int.MaxValue. Currently numRows: ${numRows()}")
+    require(numCols() < Int.MaxValue, "The number of columns of this matrix should be less than " +
+      s"Int.MaxValue. Currently numCols: ${numCols()}")
+    require(numRows() * numCols() < Int.MaxValue, "The length of the values array must be " +
+      s"less than Int.MaxValue. Currently numRows * numCols: ${numRows() * numCols()}")
+    val m = numRows().toInt
+    val n = numCols().toInt
+    val mem = m * n / 125000
+    if (mem > 500) logWarning(s"Storing this matrix will require $mem MB of memory!")
+    val localBlocks = blocks.collect()
+    val values = new Array[Double](m * n)
+    localBlocks.foreach { case ((blockRowIndex, blockColIndex), submat) =>
+      val rowOffset = blockRowIndex * rowsPerBlock
+      val colOffset = blockColIndex * colsPerBlock
+      submat.foreachActive { (i, j, v) =>
+        val indexOffset = (j + colOffset) * m + rowOffset + i
+        values(indexOffset) = v
+      }
+    }
+    new DenseMatrix(m, n, values)
+  }
+
+  /** Transpose this `BlockMatrix`. Returns a new `BlockMatrix` instance sharing the
+    * same underlying data. Is a lazy operation. */
+  def transpose: BlockMatrix = {
+    val transposedBlocks = blocks.map { case ((blockRowIndex, blockColIndex), mat) =>
+      ((blockColIndex, blockRowIndex), mat.transpose)
+    }
+    new BlockMatrix(transposedBlocks, colsPerBlock, rowsPerBlock, nCols, nRows)
+  }
+
+  /** Collects data and assembles a local dense breeze matrix (for test only). */
+  private[mllib] def toBreeze(): BDM[Double] = {
+    val localMat = toLocalMatrix()
+    new BDM[Double](localMat.numRows, localMat.numCols, localMat.toArray)
+  }
+
+  /** Adds two block matrices together. The matrices must have the same size and matching
+    * `rowsPerBlock` and `colsPerBlock` values. If one of the blocks that are being added are
+    * instances of [[SparseMatrix]], the resulting sub matrix will also be a [[SparseMatrix]], even
+    * if it is being added to a [[DenseMatrix]]. If two dense matrices are added, the output will
+    * also be a [[DenseMatrix]].
+    */
+  def add(other: BlockMatrix): BlockMatrix = {
+    require(numRows() == other.numRows(), "Both matrices must have the same number of rows. " +
+      s"A.numRows: ${numRows()}, B.numRows: ${other.numRows()}")
+    require(numCols() == other.numCols(), "Both matrices must have the same number of columns. " +
+      s"A.numCols: ${numCols()}, B.numCols: ${other.numCols()}")
+    if (rowsPerBlock == other.rowsPerBlock && colsPerBlock == other.colsPerBlock) {
+      val addedBlocks = blocks.cogroup(other.blocks, createPartitioner())
+        .map { case ((blockRowIndex, blockColIndex), (a, b)) =>
+          if (a.size > 1 || b.size > 1) {
+            throw new SparkException("There are multiple MatrixBlocks with indices: " +
+              s"($blockRowIndex, $blockColIndex). Please remove them.")
+          }
+          if (a.isEmpty) {
+            new MatrixBlock((blockRowIndex, blockColIndex), b.head)
+          } else if (b.isEmpty) {
+            new MatrixBlock((blockRowIndex, blockColIndex), a.head)
+          } else {
+            val result = a.head.toBreeze + b.head.toBreeze
+            new MatrixBlock((blockRowIndex, blockColIndex), Matrices.fromBreeze(result))
+          }
+      }
+      new BlockMatrix(addedBlocks, rowsPerBlock, colsPerBlock, numRows(), numCols())
+    } else {
+      throw new SparkException("Cannot add matrices with different block dimensions")
+    }
+  }
+
+  /** Left multiplies this [[BlockMatrix]] to `other`, another [[BlockMatrix]]. The `colsPerBlock`
+    * of this matrix must equal the `rowsPerBlock` of `other`. If `other` contains
+    * [[SparseMatrix]], they will have to be converted to a [[DenseMatrix]]. The output
+    * [[BlockMatrix]] will only consist of blocks of [[DenseMatrix]]. This may cause
+    * some performance issues until support for multiplying two sparse matrices is added.
+    */
+  def multiply(other: BlockMatrix): BlockMatrix = {
+    require(numCols() == other.numRows(), "The number of columns of A and the number of rows " +
+      s"of B must be equal. A.numCols: ${numCols()}, B.numRows: ${other.numRows()}. If you " +
+      "think they should be equal, try setting the dimensions of A and B explicitly while " +
+      "initializing them.")
+    if (colsPerBlock == other.rowsPerBlock) {
+      val resultPartitioner = GridPartitioner(numRowBlocks, other.numColBlocks,
+        math.max(blocks.partitions.length, other.blocks.partitions.length))
+      // Each block of A must be multiplied with the corresponding blocks in each column of B.
+      // TODO: Optimize to send block to a partition once, similar to ALS
+      val flatA = blocks.flatMap { case ((blockRowIndex, blockColIndex), block) =>
+        Iterator.tabulate(other.numColBlocks)(j => ((blockRowIndex, j, blockColIndex), block))
+      }
+      // Each block of B must be multiplied with the corresponding blocks in each row of A.
+      val flatB = other.blocks.flatMap { case ((blockRowIndex, blockColIndex), block) =>
+        Iterator.tabulate(numRowBlocks)(i => ((i, blockColIndex, blockRowIndex), block))
+      }
+      val newBlocks: RDD[MatrixBlock] = flatA.cogroup(flatB, resultPartitioner)
+        .flatMap { case ((blockRowIndex, blockColIndex, _), (a, b)) =>
+          if (a.size > 1 || b.size > 1) {
+            throw new SparkException("There are multiple MatrixBlocks with indices: " +
+              s"($blockRowIndex, $blockColIndex). Please remove them.")
+          }
+          if (a.nonEmpty && b.nonEmpty) {
+            val C = b.head match {
+              case dense: DenseMatrix => a.head.multiply(dense)
+              case sparse: SparseMatrix => a.head.multiply(sparse.toDense)
+              case _ => throw new SparkException(s"Unrecognized matrix type ${b.head.getClass}.")
+            }
+            Iterator(((blockRowIndex, blockColIndex), C.toBreeze))
+          } else {
+            Iterator()
+          }
+      }.reduceByKey(resultPartitioner, (a, b) => a + b)
+        .mapValues(Matrices.fromBreeze)
+      // TODO: Try to use aggregateByKey instead of reduceByKey to get rid of intermediate matrices
+      new BlockMatrix(newBlocks, rowsPerBlock, other.colsPerBlock, numRows(), other.numCols())
+    } else {
+      throw new SparkException("colsPerBlock of A doesn't match rowsPerBlock of B. " +
+        s"A.colsPerBlock: $colsPerBlock, B.rowsPerBlock: ${other.rowsPerBlock}")
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
index 06d8915f3bfa1..078d1fac44443 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
@@ -21,8 +21,7 @@ import breeze.linalg.{DenseMatrix => BDM}
 
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.rdd.RDD
-import org.apache.spark.SparkContext._
-import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.linalg.{Matrix, SparseMatrix, Vectors}
 
 /**
  * :: Experimental ::
@@ -69,6 +68,11 @@ class CoordinateMatrix(
     nRows
   }
 
+  /** Transposes this CoordinateMatrix. */
+  def transpose(): CoordinateMatrix = {
+    new CoordinateMatrix(entries.map(x => MatrixEntry(x.j, x.i, x.value)), numCols(), numRows())
+  }
+
   /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */
   def toIndexedRowMatrix(): IndexedRowMatrix = {
     val nl = numCols()
@@ -93,6 +97,46 @@ class CoordinateMatrix(
     toIndexedRowMatrix().toRowMatrix()
   }
 
+  /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. */
+  def toBlockMatrix(): BlockMatrix = {
+    toBlockMatrix(1024, 1024)
+  }
+
+  /**
+   * Converts to BlockMatrix. Creates blocks of [[SparseMatrix]].
+   * @param rowsPerBlock The number of rows of each block. The blocks at the bottom edge may have
+   *                     a smaller value. Must be an integer value greater than 0.
+   * @param colsPerBlock The number of columns of each block. The blocks at the right edge may have
+   *                     a smaller value. Must be an integer value greater than 0.
+   * @return a [[BlockMatrix]]
+   */
+  def toBlockMatrix(rowsPerBlock: Int, colsPerBlock: Int): BlockMatrix = {
+    require(rowsPerBlock > 0,
+      s"rowsPerBlock needs to be greater than 0. rowsPerBlock: $rowsPerBlock")
+    require(colsPerBlock > 0,
+      s"colsPerBlock needs to be greater than 0. colsPerBlock: $colsPerBlock")
+    val m = numRows()
+    val n = numCols()
+    val numRowBlocks = math.ceil(m.toDouble / rowsPerBlock).toInt
+    val numColBlocks = math.ceil(n.toDouble / colsPerBlock).toInt
+    val partitioner = GridPartitioner(numRowBlocks, numColBlocks, entries.partitions.length)
+
+    val blocks: RDD[((Int, Int), Matrix)] = entries.map { entry =>
+      val blockRowIndex = (entry.i / rowsPerBlock).toInt
+      val blockColIndex = (entry.j / colsPerBlock).toInt
+
+      val rowId = entry.i % rowsPerBlock
+      val colId = entry.j % colsPerBlock
+
+      ((blockRowIndex, blockColIndex), (rowId.toInt, colId.toInt, entry.value))
+    }.groupByKey(partitioner).map { case ((blockRowIndex, blockColIndex), entry) =>
+      val effRows = math.min(m - blockRowIndex.toLong * rowsPerBlock, rowsPerBlock).toInt
+      val effCols = math.min(n - blockColIndex.toLong * colsPerBlock, colsPerBlock).toInt
+      ((blockRowIndex, blockColIndex), SparseMatrix.fromCOO(effRows, effCols, entry))
+    }
+    new BlockMatrix(blocks, rowsPerBlock, colsPerBlock, m, n)
+  }
+
   /** Determines the size by computing the max row/column index. */
   private def computeSize() {
     // Reduce will throw an exception if `entries` is empty.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index 5c1acca0ec532..3be530fa07537 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -75,6 +75,41 @@ class IndexedRowMatrix(
     new RowMatrix(rows.map(_.vector), 0L, nCols)
   }
 
+  /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. */
+  def toBlockMatrix(): BlockMatrix = {
+    toBlockMatrix(1024, 1024)
+  }
+
+  /**
+   * Converts to BlockMatrix. Creates blocks of [[SparseMatrix]].
+   * @param rowsPerBlock The number of rows of each block. The blocks at the bottom edge may have
+   *                     a smaller value. Must be an integer value greater than 0.
+   * @param colsPerBlock The number of columns of each block. The blocks at the right edge may have
+   *                     a smaller value. Must be an integer value greater than 0.
+   * @return a [[BlockMatrix]]
+   */
+  def toBlockMatrix(rowsPerBlock: Int, colsPerBlock: Int): BlockMatrix = {
+    // TODO: This implementation may be optimized
+    toCoordinateMatrix().toBlockMatrix(rowsPerBlock, colsPerBlock)
+  }
+
+  /**
+   * Converts this matrix to a
+   * [[org.apache.spark.mllib.linalg.distributed.CoordinateMatrix]].
+   */
+  def toCoordinateMatrix(): CoordinateMatrix = {
+    val entries = rows.flatMap { row =>
+      val rowIndex = row.index
+      row.vector match {
+        case SparseVector(size, indices, values) =>
+          Iterator.tabulate(indices.size)(i => MatrixEntry(rowIndex, indices(i), values(i)))
+        case DenseVector(values) =>
+          Iterator.tabulate(values.size)(i => MatrixEntry(rowIndex, i, values(i)))
+      }
+    }
+    new CoordinateMatrix(entries, numRows(), numCols())
+  }
+
   /**
    * Computes the singular value decomposition of this IndexedRowMatrix.
    * Denote this matrix by A (m x n), this will compute matrices U, S, V such that A = U * S * V'.
@@ -102,6 +137,9 @@ class IndexedRowMatrix(
       k: Int,
       computeU: Boolean = false,
       rCond: Double = 1e-9): SingularValueDecomposition[IndexedRowMatrix, Matrix] = {
+
+    val n = numCols().toInt
+    require(k > 0 && k <= n, s"Requested k singular values but got k=$k and numCols=$n.")
     val indices = rows.map(_.index)
     val svd = toRowMatrix().computeSVD(k, computeU, rCond)
     val U = if (computeU) {
@@ -142,7 +180,7 @@ class IndexedRowMatrix(
     val mat = BDM.zeros[Double](m, n)
     rows.collect().foreach { case IndexedRow(rowIndex, vector) =>
       val i = rowIndex.toInt
-      vector.toBreeze.activeIterator.foreach { case (j, v) =>
+      vector.foreachActive { case (j, v) =>
         mat(i, j) = v
       }
     }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index ec2d481dccc22..961111507f2c2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -30,7 +30,6 @@ import org.apache.spark.Logging
 import org.apache.spark.SparkContext._
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.mllib.linalg._
-import org.apache.spark.mllib.rdd.RDDFunctions._
 import org.apache.spark.mllib.stat.{MultivariateOnlineSummarizer, MultivariateStatisticalSummary}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.random.XORShiftRandom
@@ -131,8 +130,8 @@ class RowMatrix(
       throw new IllegalArgumentException(s"Argument with more than 65535 cols: $cols")
     }
     if (cols > 10000) {
-      val mem = cols * cols * 8
-      logWarning(s"$cols columns will require at least $mem bytes of memory!")
+      val memMB = (cols.toLong * cols) / 125000
+      logWarning(s"$cols columns will require at least $memMB megabytes of memory!")
     }
   }
 
@@ -152,10 +151,10 @@ class RowMatrix(
    * storing the right singular vectors, is computed via matrix multiplication as
    * U = A * (V * S^-1^), if requested by user. The actual method to use is determined
    * automatically based on the cost:
-   *  - If n is small (n < 100) or k is large compared with n (k > n / 2), we compute the Gramian
-   *    matrix first and then compute its top eigenvalues and eigenvectors locally on the driver.
-   *    This requires a single pass with O(n^2^) storage on each executor and on the driver, and
-   *    O(n^2^ k) time on the driver.
+   *  - If n is small (n &lt; 100) or k is large compared with n (k &gt; n / 2), we compute
+   *    the Gramian matrix first and then compute its top eigenvalues and eigenvectors locally
+   *    on the driver. This requires a single pass with O(n^2^) storage on each executor and
+   *    on the driver, and O(n^2^ k) time on the driver.
    *  - Otherwise, we compute (A' * A) * v in a distributive way and send it to ARPACK's DSAUPD to
    *    compute (A' * A)'s top eigenvalues and eigenvectors on the driver node. This requires O(k)
    *    passes, O(n) storage on each executor, and O(n k) storage on the driver.
@@ -169,7 +168,8 @@ class RowMatrix(
    * @note The conditions that decide which method to use internally and the default parameters are
    *       subject to change.
    *
-   * @param k number of leading singular values to keep (0 < k <= n). It might return less than k if
+   * @param k number of leading singular values to keep (0 &lt; k &lt;= n).
+   *          It might return less than k if
    *          there are numerically zero singular values or there are not enough Ritz values
    *          converged before the maximum number of Arnoldi update iterations is reached (in case
    *          that matrix A is ill-conditioned).
@@ -192,7 +192,7 @@ class RowMatrix(
   /**
    * The actual SVD implementation, visible for testing.
    *
-   * @param k number of leading singular values to keep (0 < k <= n)
+   * @param k number of leading singular values to keep (0 &lt; k &lt;= n)
    * @param computeU whether to compute U
    * @param rCond the reciprocal condition number
    * @param maxIter max number of iterations (if ARPACK is used)
@@ -211,7 +211,7 @@ class RowMatrix(
       tol: Double,
       mode: String): SingularValueDecomposition[RowMatrix, Matrix] = {
     val n = numCols().toInt
-    require(k > 0 && k <= n, s"Request up to n singular values but got k=$k and n=$n.")
+    require(k > 0 && k <= n, s"Requested k singular values but got k=$k and numCols=$n.")
 
     object SVDMode extends Enumeration {
       val LocalARPACK, LocalLAPACK, DistARPACK = Value
@@ -219,8 +219,12 @@ class RowMatrix(
 
     val computeMode = mode match {
       case "auto" =>
+        if(k > 5000) {
+          logWarning(s"computing svd with k=$k and n=$n, please check necessity")
+        }
+
         // TODO: The conditions below are not fully tested.
-        if (n < 100 || k > n / 2) {
+        if (n < 100 || (k > n / 2 && n <= 15000)) {
           // If n is small or k is large compared with n, we better compute the Gramian matrix first
           // and then compute its eigenvalues locally, instead of making multiple passes.
           if (k < n / 3) {
@@ -245,6 +249,8 @@ class RowMatrix(
         val G = computeGramianMatrix().toBreeze.asInstanceOf[BDM[Double]]
         EigenValueDecomposition.symmetricEigs(v => G * v, n, k, tol, maxIter)
       case SVDMode.LocalLAPACK =>
+        // breeze (v0.10) svd latent constraint, 7 * n * n + 4 * n < Int.MaxValue
+        require(n < 17515, s"$n exceeds the breeze svd capability")
         val G = computeGramianMatrix().toBreeze.asInstanceOf[BDM[Double]]
         val brzSvd.SVD(uFull: BDM[Double], sigmaSquaresFull: BDV[Double], _) = brzSvd(G)
         (sigmaSquaresFull, uFull)
@@ -527,21 +533,21 @@ class RowMatrix(
       iter.flatMap { row =>
         val buf = new ListBuffer[((Int, Int), Double)]()
         row match {
-          case sv: SparseVector =>
-            val nnz = sv.indices.size
+          case SparseVector(size, indices, values) =>
+            val nnz = indices.size
             var k = 0
             while (k < nnz) {
-              scaled(k) = sv.values(k) / q(sv.indices(k))
+              scaled(k) = values(k) / q(indices(k))
               k += 1
             }
             k = 0
             while (k < nnz) {
-              val i = sv.indices(k)
+              val i = indices(k)
               val iVal = scaled(k)
               if (iVal != 0 && rand.nextDouble() < p(i)) {
                 var l = k + 1
                 while (l < nnz) {
-                  val j = sv.indices(l)
+                  val j = indices(l)
                   val jVal = scaled(l)
                   if (jVal != 0 && rand.nextDouble() < p(j)) {
                     buf += (((i, j), iVal * jVal))
@@ -551,11 +557,11 @@ class RowMatrix(
               }
               k += 1
             }
-          case dv: DenseVector =>
-            val n = dv.values.size
+          case DenseVector(values) =>
+            val n = values.size
             var i = 0
             while (i < n) {
-              scaled(i) = dv.values(i) / q(i)
+              scaled(i) = values(i) / q(i)
               i += 1
             }
             i = 0
@@ -587,8 +593,8 @@ class RowMatrix(
     val n = numCols().toInt
     val mat = BDM.zeros[Double](m, n)
     var i = 0
-    rows.collect().foreach { v =>
-      v.toBreeze.activeIterator.foreach { case (j, v) =>
+    rows.collect().foreach { vector =>
+      vector.foreachActive { case (j, v) =>
         mat(i, j) = v
       }
       i += 1
@@ -619,11 +625,9 @@ object RowMatrix {
     // TODO: Find a better home (breeze?) for this method.
     val n = v.size
     v match {
-      case dv: DenseVector =>
-        blas.dspr("U", n, alpha, dv.values, 1, U)
-      case sv: SparseVector =>
-        val indices = sv.indices
-        val values = sv.values
+      case DenseVector(values) =>
+        blas.dspr("U", n, alpha, values, 1, U)
+      case SparseVector(size, indices, values) =>
         val nnz = indices.length
         var colStartIdx = 0
         var prevCol = 0
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
index 45dbf6044fcc5..0acdab797e8f3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
@@ -18,8 +18,9 @@
 package org.apache.spark.mllib.optimization
 
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.linalg.{DenseVector, Vector, Vectors}
 import org.apache.spark.mllib.linalg.BLAS.{axpy, dot, scal}
+import org.apache.spark.mllib.util.MLUtils
 
 /**
  * :: DeveloperApi ::
@@ -54,23 +55,86 @@ abstract class Gradient extends Serializable {
 
 /**
  * :: DeveloperApi ::
- * Compute gradient and loss for a logistic loss function, as used in binary classification.
- * See also the documentation for the precise formulation.
+ * Compute gradient and loss for a multinomial logistic loss function, as used
+ * in multi-class classification (it is also used in binary logistic regression).
+ *
+ * In `The Elements of Statistical Learning: Data Mining, Inference, and Prediction, 2nd Edition`
+ * by Trevor Hastie, Robert Tibshirani, and Jerome Friedman, which can be downloaded from
+ * http://statweb.stanford.edu/~tibs/ElemStatLearn/ , Eq. (4.17) on page 119 gives the formula of
+ * multinomial logistic regression model. A simple calculation shows that
+ *
+ * P(y=0|x, w) = 1 / (1 + \sum_i^{K-1} \exp(x w_i))
+ * P(y=1|x, w) = exp(x w_1) / (1 + \sum_i^{K-1} \exp(x w_i))
+ *   ...
+ * P(y=K-1|x, w) = exp(x w_{K-1}) / (1 + \sum_i^{K-1} \exp(x w_i))
+ *
+ * for K classes multiclass classification problem.
+ *
+ * The model weights w = (w_1, w_2, ..., w_{K-1})^T becomes a matrix which has dimension of
+ * (K-1) * (N+1) if the intercepts are added. If the intercepts are not added, the dimension
+ * will be (K-1) * N.
+ *
+ * As a result, the loss of objective function for a single instance of data can be written as
+ * l(w, x) = -log P(y|x, w) = -\alpha(y) log P(y=0|x, w) - (1-\alpha(y)) log P(y|x, w)
+ *         = log(1 + \sum_i^{K-1}\exp(x w_i)) - (1-\alpha(y)) x w_{y-1}
+ *         = log(1 + \sum_i^{K-1}\exp(margins_i)) - (1-\alpha(y)) margins_{y-1}
+ *
+ * where \alpha(i) = 1 if i != 0, and
+ *       \alpha(i) = 0 if i == 0,
+ *       margins_i = x w_i.
+ *
+ * For optimization, we have to calculate the first derivative of the loss function, and
+ * a simple calculation shows that
+ *
+ * \frac{\partial l(w, x)}{\partial w_{ij}}
+ *   = (\exp(x w_i) / (1 + \sum_k^{K-1} \exp(x w_k)) - (1-\alpha(y)\delta_{y, i+1})) * x_j
+ *   = multiplier_i * x_j
+ *
+ * where \delta_{i, j} = 1 if i == j,
+ *       \delta_{i, j} = 0 if i != j, and
+ *       multiplier
+ *         = \exp(margins_i) / (1 + \sum_k^{K-1} \exp(margins_i)) - (1-\alpha(y)\delta_{y, i+1})
+ *
+ * If any of margins is larger than 709.78, the numerical computation of multiplier and loss
+ * function will be suffered from arithmetic overflow. This issue occurs when there are outliers
+ * in data which are far away from hyperplane, and this will cause the failing of training once
+ * infinity / infinity is introduced. Note that this is only a concern when max(margins) > 0.
+ *
+ * Fortunately, when max(margins) = maxMargin > 0, the loss function and the multiplier can be
+ * easily rewritten into the following equivalent numerically stable formula.
+ *
+ * l(w, x) = log(1 + \sum_i^{K-1}\exp(margins_i)) - (1-\alpha(y)) margins_{y-1}
+ *         = log(\exp(-maxMargin) + \sum_i^{K-1}\exp(margins_i - maxMargin)) + maxMargin
+ *           - (1-\alpha(y)) margins_{y-1}
+ *         = log(1 + sum) + maxMargin - (1-\alpha(y)) margins_{y-1}
+ *
+ * where sum = \exp(-maxMargin) + \sum_i^{K-1}\exp(margins_i - maxMargin) - 1.
+ *
+ * Note that each term, (margins_i - maxMargin) in \exp is smaller than zero; as a result,
+ * overflow will not happen with this formula.
+ *
+ * For multiplier, similar trick can be applied as the following,
+ *
+ * multiplier = \exp(margins_i) / (1 + \sum_k^{K-1} \exp(margins_i)) - (1-\alpha(y)\delta_{y, i+1})
+ *            = \exp(margins_i - maxMargin) / (1 + sum) - (1-\alpha(y)\delta_{y, i+1})
+ *
+ * where each term in \exp is also smaller than zero, so overflow is not a concern.
+ *
+ * For the detailed mathematical derivation, see the reference at
+ * http://www.slideshare.net/dbtsai/2014-0620-mlor-36132297
+ *
+ * @param numClasses the number of possible outcomes for k classes classification problem in
+ *                   Multinomial Logistic Regression. By default, it is binary logistic regression
+ *                   so numClasses will be set to 2.
  */
 @DeveloperApi
-class LogisticGradient extends Gradient {
-  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
-    val margin = -1.0 * dot(data, weights)
-    val gradientMultiplier = (1.0 / (1.0 + math.exp(margin))) - label
-    val gradient = data.copy
-    scal(gradientMultiplier, gradient)
-    val loss =
-      if (label > 0) {
-        math.log1p(math.exp(margin)) // log1p is log(1+p) but more accurate for small p
-      } else {
-        math.log1p(math.exp(margin)) - margin
-      }
+class LogisticGradient(numClasses: Int) extends Gradient {
 
+  def this() = this(2)
+
+  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
+    val gradient = Vectors.zeros(weights.size)
+    val loss = compute(data, label, weights, gradient)
     (gradient, loss)
   }
 
@@ -79,13 +143,104 @@ class LogisticGradient extends Gradient {
       label: Double,
       weights: Vector,
       cumGradient: Vector): Double = {
-    val margin = -1.0 * dot(data, weights)
-    val gradientMultiplier = (1.0 / (1.0 + math.exp(margin))) - label
-    axpy(gradientMultiplier, data, cumGradient)
-    if (label > 0) {
-      math.log1p(math.exp(margin))
-    } else {
-      math.log1p(math.exp(margin)) - margin
+    val dataSize = data.size
+
+    // (weights.size / dataSize + 1) is number of classes
+    require(weights.size % dataSize == 0 && numClasses == weights.size / dataSize + 1)
+    numClasses match {
+      case 2 =>
+        /**
+         * For Binary Logistic Regression.
+         *
+         * Although the loss and gradient calculation for multinomial one is more generalized,
+         * and multinomial one can also be used in binary case, we still implement a specialized
+         * binary version for performance reason.
+         */
+        val margin = -1.0 * dot(data, weights)
+        val multiplier = (1.0 / (1.0 + math.exp(margin))) - label
+        axpy(multiplier, data, cumGradient)
+        if (label > 0) {
+          // The following is equivalent to log(1 + exp(margin)) but more numerically stable.
+          MLUtils.log1pExp(margin)
+        } else {
+          MLUtils.log1pExp(margin) - margin
+        }
+      case _ =>
+        /**
+         * For Multinomial Logistic Regression.
+         */
+        val weightsArray = weights match {
+          case dv: DenseVector => dv.values
+          case _ =>
+            throw new IllegalArgumentException(
+              s"weights only supports dense vector but got type ${weights.getClass}.")
+        }
+        val cumGradientArray = cumGradient match {
+          case dv: DenseVector => dv.values
+          case _ =>
+            throw new IllegalArgumentException(
+              s"cumGradient only supports dense vector but got type ${cumGradient.getClass}.")
+        }
+
+        // marginY is margins(label - 1) in the formula.
+        var marginY = 0.0
+        var maxMargin = Double.NegativeInfinity
+        var maxMarginIndex = 0
+
+        val margins = Array.tabulate(numClasses - 1) { i =>
+          var margin = 0.0
+          data.foreachActive { (index, value) =>
+            if (value != 0.0) margin += value * weightsArray((i * dataSize) + index)
+          }
+          if (i == label.toInt - 1) marginY = margin
+          if (margin > maxMargin) {
+            maxMargin = margin
+            maxMarginIndex = i
+          }
+          margin
+        }
+
+        /**
+         * When maxMargin > 0, the original formula will cause overflow as we discuss
+         * in the previous comment.
+         * We address this by subtracting maxMargin from all the margins, so it's guaranteed
+         * that all of the new margins will be smaller than zero to prevent arithmetic overflow.
+         */
+        val sum = {
+          var temp = 0.0
+          if (maxMargin > 0) {
+            for (i <- 0 until numClasses - 1) {
+              margins(i) -= maxMargin
+              if (i == maxMarginIndex) {
+                temp += math.exp(-maxMargin)
+              } else {
+                temp += math.exp(margins(i))
+              }
+            }
+          } else {
+            for (i <- 0 until numClasses - 1) {
+              temp += math.exp(margins(i))
+            }
+          }
+          temp
+        }
+
+        for (i <- 0 until numClasses - 1) {
+          val multiplier = math.exp(margins(i)) / (sum + 1.0) - {
+            if (label != 0.0 && label == i + 1) 1.0 else 0.0
+          }
+          data.foreachActive { (index, value) =>
+            if (value != 0.0) cumGradientArray(i * dataSize + index) += multiplier * value
+          }
+        }
+
+        val loss = if (label > 0.0) math.log1p(sum) - marginY else math.log1p(sum)
+
+        if (maxMargin > 0) {
+          loss + maxMargin
+        } else {
+          loss
+        }
     }
   }
 }
@@ -94,16 +249,16 @@ class LogisticGradient extends Gradient {
  * :: DeveloperApi ::
  * Compute gradient and loss for a Least-squared loss function, as used in linear regression.
  * This is correct for the averaged least squares loss function (mean squared error)
- *              L = 1/n ||A weights-y||^2
+ *              L = 1/2n ||A weights-y||^2
  * See also the documentation for the precise formulation.
  */
 @DeveloperApi
 class LeastSquaresGradient extends Gradient {
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
     val diff = dot(data, weights) - label
-    val loss = diff * diff
+    val loss = diff * diff / 2.0
     val gradient = data.copy
-    scal(2.0 * diff, gradient)
+    scal(diff, gradient)
     (gradient, loss)
   }
 
@@ -113,8 +268,8 @@ class LeastSquaresGradient extends Gradient {
       weights: Vector,
       cumGradient: Vector): Double = {
     val diff = dot(data, weights) - label
-    axpy(2.0 * diff, data, cumGradient)
-    diff * diff
+    axpy(diff, data, cumGradient)
+    diff * diff / 2.0
   }
 }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index a6912056395d7..4b7d0589c973b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -25,7 +25,6 @@ import org.apache.spark.annotation.{Experimental, DeveloperApi}
 import org.apache.spark.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
-import org.apache.spark.mllib.rdd.RDDFunctions._
 
 /**
  * Class used to solve an optimization problem using Gradient Descent.
@@ -160,14 +159,15 @@ object GradientDescent extends Logging {
     val stochasticLossHistory = new ArrayBuffer[Double](numIterations)
 
     val numExamples = data.count()
-    val miniBatchSize = numExamples * miniBatchFraction
 
     // if no data, return initial weights to avoid NaNs
     if (numExamples == 0) {
-
-      logInfo("GradientDescent.runMiniBatchSGD returning initial weights, no data found")
+      logWarning("GradientDescent.runMiniBatchSGD returning initial weights, no data found")
       return (initialWeights, stochasticLossHistory.toArray)
+    }
 
+    if (numExamples * miniBatchFraction < 1) {
+      logWarning("The miniBatchFraction is too small")
     }
 
     // Initialize weights as a column vector
@@ -185,25 +185,31 @@ object GradientDescent extends Logging {
       val bcWeights = data.context.broadcast(weights)
       // Sample a subset (fraction miniBatchFraction) of the total data
       // compute and sum up the subgradients on this subset (this is one map-reduce)
-      val (gradientSum, lossSum) = data.sample(false, miniBatchFraction, 42 + i)
-        .treeAggregate((BDV.zeros[Double](n), 0.0))(
-          seqOp = (c, v) => (c, v) match { case ((grad, loss), (label, features)) =>
-            val l = gradient.compute(features, label, bcWeights.value, Vectors.fromBreeze(grad))
-            (grad, loss + l)
+      val (gradientSum, lossSum, miniBatchSize) = data.sample(false, miniBatchFraction, 42 + i)
+        .treeAggregate((BDV.zeros[Double](n), 0.0, 0L))(
+          seqOp = (c, v) => {
+            // c: (grad, loss, count), v: (label, features)
+            val l = gradient.compute(v._2, v._1, bcWeights.value, Vectors.fromBreeze(c._1))
+            (c._1, c._2 + l, c._3 + 1)
           },
-          combOp = (c1, c2) => (c1, c2) match { case ((grad1, loss1), (grad2, loss2)) =>
-            (grad1 += grad2, loss1 + loss2)
+          combOp = (c1, c2) => {
+            // c: (grad, loss, count)
+            (c1._1 += c2._1, c1._2 + c2._2, c1._3 + c2._3)
           })
 
-      /**
-       * NOTE(Xinghao): lossSum is computed using the weights from the previous iteration
-       * and regVal is the regularization value computed in the previous iteration as well.
-       */
-      stochasticLossHistory.append(lossSum / miniBatchSize + regVal)
-      val update = updater.compute(
-        weights, Vectors.fromBreeze(gradientSum / miniBatchSize), stepSize, i, regParam)
-      weights = update._1
-      regVal = update._2
+      if (miniBatchSize > 0) {
+        /**
+         * NOTE(Xinghao): lossSum is computed using the weights from the previous iteration
+         * and regVal is the regularization value computed in the previous iteration as well.
+         */
+        stochasticLossHistory.append(lossSum / miniBatchSize + regVal)
+        val update = updater.compute(
+          weights, Vectors.fromBreeze(gradientSum / miniBatchSize.toDouble), stepSize, i, regParam)
+        weights = update._1
+        regVal = update._2
+      } else {
+        logWarning(s"Iteration ($i/$numIterations). The size of sampled batch is zero")
+      }
     }
 
     logInfo("GradientDescent.runMiniBatchSGD finished. Last 10 stochastic losses %s".format(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
index d16d0daf08565..d5e4f4ccbff10 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
@@ -26,7 +26,6 @@ import org.apache.spark.Logging
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.linalg.BLAS.axpy
-import org.apache.spark.mllib.rdd.RDDFunctions._
 import org.apache.spark.rdd.RDD
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
index e4b436b023794..ccd93b318bc23 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
@@ -19,13 +19,11 @@ package org.apache.spark.mllib.optimization
 
 import org.jblas.{DoubleMatrix, SimpleBlas}
 
-import org.apache.spark.annotation.DeveloperApi
-
 /**
  * Object used to solve nonnegative least squares problems using a modified
  * projected gradient method.
  */
-private[mllib] object NNLS {
+private[spark] object NNLS {
   class Workspace(val n: Int) {
     val scratch = new DoubleMatrix(n, 1)
     val grad = new DoubleMatrix(n, 1)
@@ -79,7 +77,7 @@ private[mllib] object NNLS {
     // stopping condition
     def stop(step: Double, ndir: Double, nx: Double): Boolean = {
         ((step.isNaN) // NaN
-      || (step < 1e-6) // too small or negative
+      || (step < 1e-7) // too small or negative
       || (step > 1e40) // too small; almost certainly numerical problems
       || (ndir < 1e-12 * nx) // gradient relatively too small
       || (ndir < 1e-32) // gradient absolutely too small; numerical issues may lurk
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
index 28179fbc450c0..405bae62ee8b6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.mllib.random
 
-import cern.jet.random.Poisson
-import cern.jet.random.engine.DRand
+import org.apache.commons.math3.distribution.{ExponentialDistribution,
+  GammaDistribution, LogNormalDistribution, PoissonDistribution}
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.util.random.{XORShiftRandom, Pseudorandom}
@@ -89,13 +89,76 @@ class StandardNormalGenerator extends RandomDataGenerator[Double] {
 @DeveloperApi
 class PoissonGenerator(val mean: Double) extends RandomDataGenerator[Double] {
 
-  private var rng = new Poisson(mean, new DRand)
+  private val rng = new PoissonDistribution(mean)
 
-  override def nextValue(): Double = rng.nextDouble()
+  override def nextValue(): Double = rng.sample()
 
   override def setSeed(seed: Long) {
-    rng = new Poisson(mean, new DRand(seed.toInt))
+    rng.reseedRandomGenerator(seed)
   }
 
   override def copy(): PoissonGenerator = new PoissonGenerator(mean)
 }
+
+/**
+ * :: DeveloperApi ::
+ * Generates i.i.d. samples from the exponential distribution with the given mean.
+ *
+ * @param mean mean for the exponential distribution.
+ */
+@DeveloperApi
+class ExponentialGenerator(val mean: Double) extends RandomDataGenerator[Double] {
+
+  private val rng = new ExponentialDistribution(mean)
+
+  override def nextValue(): Double = rng.sample()
+
+  override def setSeed(seed: Long) {
+    rng.reseedRandomGenerator(seed)
+  }
+
+  override def copy(): ExponentialGenerator = new ExponentialGenerator(mean)
+}
+
+/**
+ * :: DeveloperApi ::
+ * Generates i.i.d. samples from the gamma distribution with the given shape and scale.
+ *
+ * @param shape shape for the gamma distribution.
+ * @param scale scale for the gamma distribution
+ */
+@DeveloperApi
+class GammaGenerator(val shape: Double, val scale: Double) extends RandomDataGenerator[Double] {
+
+  private val rng = new GammaDistribution(shape, scale)
+
+  override def nextValue(): Double = rng.sample()
+
+  override def setSeed(seed: Long) {
+    rng.reseedRandomGenerator(seed)
+  }
+
+  override def copy(): GammaGenerator = new GammaGenerator(shape, scale)
+}
+
+/**
+ * :: DeveloperApi ::
+ * Generates i.i.d. samples from the log normal distribution with the
+ * given mean and standard deviation.
+ *
+ * @param mean mean for the log normal distribution.
+ * @param std standard deviation for the log normal distribution
+ */
+@DeveloperApi
+class LogNormalGenerator(val mean: Double, val std: Double) extends RandomDataGenerator[Double] {
+
+  private val rng = new LogNormalDistribution(mean, std)
+
+  override def nextValue(): Double = rng.sample()
+
+  override def setSeed(seed: Long) {
+    rng.reseedRandomGenerator(seed)
+  }
+
+  override def copy(): LogNormalGenerator = new LogNormalGenerator(mean, std)
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
index c5f4b084321f7..8341bb86afd71 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
@@ -29,13 +29,13 @@ import org.apache.spark.util.Utils
 
 /**
  * :: Experimental ::
- * Generator methods for creating RDDs comprised of i.i.d. samples from some distribution.
+ * Generator methods for creating RDDs comprised of `i.i.d.` samples from some distribution.
  */
 @Experimental
 object RandomRDDs {
 
   /**
-   * Generates an RDD comprised of i.i.d. samples from the uniform distribution `U(0.0, 1.0)`.
+   * Generates an RDD comprised of `i.i.d.` samples from the uniform distribution `U(0.0, 1.0)`.
    *
    * To transform the distribution in the generated RDD from `U(0.0, 1.0)` to `U(a, b)`, use
    * `RandomRDDs.uniformRDD(sc, n, p, seed).map(v => a + (b - a) * v)`.
@@ -44,7 +44,7 @@ object RandomRDDs {
    * @param size Size of the RDD.
    * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
    * @param seed Random seed (default: a random long integer).
-   * @return RDD[Double] comprised of i.i.d. samples ~ `U(0.0, 1.0)`.
+   * @return RDD[Double] comprised of `i.i.d.` samples ~ `U(0.0, 1.0)`.
    */
   def uniformRDD(
       sc: SparkContext,
@@ -81,7 +81,7 @@ object RandomRDDs {
   }
 
   /**
-   * Generates an RDD comprised of i.i.d. samples from the standard normal distribution.
+   * Generates an RDD comprised of `i.i.d.` samples from the standard normal distribution.
    *
    * To transform the distribution in the generated RDD from standard normal to some other normal
    * `N(mean, sigma^2^)`, use `RandomRDDs.normalRDD(sc, n, p, seed).map(v => mean + sigma * v)`.
@@ -90,7 +90,7 @@ object RandomRDDs {
    * @param size Size of the RDD.
    * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
    * @param seed Random seed (default: a random long integer).
-   * @return RDD[Double] comprised of i.i.d. samples ~ N(0.0, 1.0).
+   * @return RDD[Double] comprised of `i.i.d.` samples ~ N(0.0, 1.0).
    */
   def normalRDD(
       sc: SparkContext,
@@ -127,14 +127,15 @@ object RandomRDDs {
   }
 
   /**
-   * Generates an RDD comprised of i.i.d. samples from the Poisson distribution with the input mean.
+   * Generates an RDD comprised of `i.i.d.` samples from the Poisson distribution with the input
+   * mean.
    *
    * @param sc SparkContext used to create the RDD.
    * @param mean Mean, or lambda, for the Poisson distribution.
    * @param size Size of the RDD.
    * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
    * @param seed Random seed (default: a random long integer).
-   * @return RDD[Double] comprised of i.i.d. samples ~ Pois(mean).
+   * @return RDD[Double] comprised of `i.i.d.` samples ~ Pois(mean).
    */
   def poissonRDD(
       sc: SparkContext,
@@ -176,16 +177,186 @@ object RandomRDDs {
     JavaDoubleRDD.fromRDD(poissonRDD(jsc.sc, mean, size))
   }
 
+  /**
+   * Generates an RDD comprised of `i.i.d.` samples from the exponential distribution with
+   * the input mean.
+   *
+   * @param sc SparkContext used to create the RDD.
+   * @param mean Mean, or 1 / lambda, for the exponential distribution.
+   * @param size Size of the RDD.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
+   * @param seed Random seed (default: a random long integer).
+   * @return RDD[Double] comprised of `i.i.d.` samples ~ Pois(mean).
+   */
+  def exponentialRDD(
+      sc: SparkContext,
+      mean: Double,
+      size: Long,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Double] = {
+    val exponential = new ExponentialGenerator(mean)
+    randomRDD(sc, exponential, size, numPartitionsOrDefault(sc, numPartitions), seed)
+  }
+
+  /**
+   * Java-friendly version of [[RandomRDDs#exponentialRDD]].
+   */
+  def exponentialJavaRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
+      size: Long,
+      numPartitions: Int,
+      seed: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(exponentialRDD(jsc.sc, mean, size, numPartitions, seed))
+  }
+
+  /**
+   * [[RandomRDDs#exponentialJavaRDD]] with the default seed.
+   */
+  def exponentialJavaRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
+      size: Long,
+      numPartitions: Int): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(exponentialRDD(jsc.sc, mean, size, numPartitions))
+  }
+
+  /**
+   * [[RandomRDDs#exponentialJavaRDD]] with the default number of partitions and the default seed.
+   */
+  def exponentialJavaRDD(jsc: JavaSparkContext, mean: Double, size: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(exponentialRDD(jsc.sc, mean, size))
+  }
+
+  /**
+   * Generates an RDD comprised of `i.i.d.` samples from the gamma distribution with the input
+   *  shape and scale.
+   *
+   * @param sc SparkContext used to create the RDD.
+   * @param shape shape parameter (> 0) for the gamma distribution
+   * @param scale scale parameter (> 0) for the gamma distribution  
+   * @param size Size of the RDD.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
+   * @param seed Random seed (default: a random long integer).
+   * @return RDD[Double] comprised of `i.i.d.` samples ~ Pois(mean).
+   */
+  def gammaRDD(
+      sc: SparkContext,
+      shape: Double,
+      scale: Double,
+      size: Long,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Double] = {
+    val gamma = new GammaGenerator(shape, scale)
+    randomRDD(sc, gamma, size, numPartitionsOrDefault(sc, numPartitions), seed)
+  }
+
+  /**
+   * Java-friendly version of [[RandomRDDs#gammaRDD]].
+   */
+  def gammaJavaRDD(
+      jsc: JavaSparkContext,
+      shape: Double,
+      scale: Double,
+      size: Long,
+      numPartitions: Int,
+      seed: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(gammaRDD(jsc.sc, shape, scale, size, numPartitions, seed))
+  }
+
+  /**
+   * [[RandomRDDs#gammaJavaRDD]] with the default seed.
+   */
+  def gammaJavaRDD(
+      jsc: JavaSparkContext,
+      shape: Double,
+      scale: Double,
+      size: Long,
+      numPartitions: Int): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(gammaRDD(jsc.sc, shape, scale, size, numPartitions))
+  }
+
+  /**
+   * [[RandomRDDs#gammaJavaRDD]] with the default number of partitions and the default seed.
+   */
+  def gammaJavaRDD(
+    jsc: JavaSparkContext,
+    shape: Double,
+    scale: Double,
+    size: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(gammaRDD(jsc.sc, shape, scale, size))
+  }
+
+  /**
+   * Generates an RDD comprised of `i.i.d.` samples from the log normal distribution with the input
+   *  mean and standard deviation
+   *
+   * @param sc SparkContext used to create the RDD.
+   * @param mean mean for the log normal distribution
+   * @param std standard deviation for the log normal distribution  
+   * @param size Size of the RDD.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
+   * @param seed Random seed (default: a random long integer).
+   * @return RDD[Double] comprised of `i.i.d.` samples ~ Pois(mean).
+   */
+  def logNormalRDD(
+      sc: SparkContext,
+      mean: Double,
+      std: Double,
+      size: Long,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Double] = {
+    val logNormal = new LogNormalGenerator(mean, std)
+    randomRDD(sc, logNormal, size, numPartitionsOrDefault(sc, numPartitions), seed)
+  }
+
+  /**
+   * Java-friendly version of [[RandomRDDs#logNormalRDD]].
+   */
+  def logNormalJavaRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
+      std: Double,
+      size: Long,
+      numPartitions: Int,
+      seed: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(logNormalRDD(jsc.sc, mean, std, size, numPartitions, seed))
+  }
+
+  /**
+   * [[RandomRDDs#logNormalJavaRDD]] with the default seed.
+   */
+  def logNormalJavaRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
+      std: Double,
+      size: Long,
+      numPartitions: Int): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(logNormalRDD(jsc.sc, mean, std, size, numPartitions))
+  }
+
+  /**
+   * [[RandomRDDs#logNormalJavaRDD]] with the default number of partitions and the default seed.
+   */
+  def logNormalJavaRDD(
+    jsc: JavaSparkContext,
+    mean: Double,
+    std: Double,
+    size: Long): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(logNormalRDD(jsc.sc, mean, std, size))
+  }
+
+
   /**
    * :: DeveloperApi ::
-   * Generates an RDD comprised of i.i.d. samples produced by the input RandomDataGenerator.
+   * Generates an RDD comprised of `i.i.d.` samples produced by the input RandomDataGenerator.
    *
    * @param sc SparkContext used to create the RDD.
    * @param generator RandomDataGenerator used to populate the RDD.
    * @param size Size of the RDD.
    * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
    * @param seed Random seed (default: a random long integer).
-   * @return RDD[Double] comprised of i.i.d. samples produced by generator.
+   * @return RDD[Double] comprised of `i.i.d.` samples produced by generator.
    */
   @DeveloperApi
   def randomRDD[T: ClassTag](
@@ -200,7 +371,7 @@ object RandomRDDs {
   // TODO Generate RDD[Vector] from multivariate distributions.
 
   /**
-   * Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
+   * Generates an RDD[Vector] with vectors containing `i.i.d.` samples drawn from the
    * uniform distribution on `U(0.0, 1.0)`.
    *
    * @param sc SparkContext used to create the RDD.
@@ -254,7 +425,7 @@ object RandomRDDs {
   }
 
   /**
-   * Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
+   * Generates an RDD[Vector] with vectors containing `i.i.d.` samples drawn from the
    * standard normal distribution.
    *
    * @param sc SparkContext used to create the RDD.
@@ -262,7 +433,7 @@ object RandomRDDs {
    * @param numCols Number of elements in each Vector.
    * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
    * @param seed Random seed (default: a random long integer).
-   * @return RDD[Vector] with vectors containing i.i.d. samples ~ `N(0.0, 1.0)`.
+   * @return RDD[Vector] with vectors containing `i.i.d.` samples ~ `N(0.0, 1.0)`.
    */
   def normalVectorRDD(
       sc: SparkContext,
@@ -308,7 +479,73 @@ object RandomRDDs {
   }
 
   /**
-   * Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
+   * Generates an RDD[Vector] with vectors containing `i.i.d.` samples drawn from a
+   * log normal distribution.
+   *
+   * @param sc SparkContext used to create the RDD.
+   * @param mean Mean of the log normal distribution.
+   * @param std Standard deviation of the log normal distribution.
+   * @param numRows Number of Vectors in the RDD.
+   * @param numCols Number of elements in each Vector.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
+   * @param seed Random seed (default: a random long integer).
+   * @return RDD[Vector] with vectors containing `i.i.d.` samples.
+   */
+  def logNormalVectorRDD(
+      sc: SparkContext,
+      mean: Double,
+      std: Double,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Vector] = {
+    val logNormal = new LogNormalGenerator(mean, std)
+    randomVectorRDD(sc, logNormal, numRows, numCols,
+      numPartitionsOrDefault(sc, numPartitions), seed)
+  }
+
+  /**
+   * Java-friendly version of [[RandomRDDs#logNormalVectorRDD]].
+   */
+  def logNormalJavaVectorRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
+      std: Double,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int,
+      seed: Long): JavaRDD[Vector] = {
+    logNormalVectorRDD(jsc.sc, mean, std, numRows, numCols, numPartitions, seed).toJavaRDD()
+  }
+
+  /**
+   * [[RandomRDDs#logNormalJavaVectorRDD]] with the default seed.
+   */
+  def logNormalJavaVectorRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
+      std: Double,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int): JavaRDD[Vector] = {
+    logNormalVectorRDD(jsc.sc, mean, std, numRows, numCols, numPartitions).toJavaRDD()
+  }
+
+  /**
+   * [[RandomRDDs#logNormalJavaVectorRDD]] with the default number of partitions and
+   * the default seed.
+   */
+  def logNormalJavaVectorRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
+      std: Double,
+      numRows: Long,
+      numCols: Int): JavaRDD[Vector] = {
+    logNormalVectorRDD(jsc.sc, mean, std, numRows, numCols).toJavaRDD()
+  }
+
+  /**
+   * Generates an RDD[Vector] with vectors containing `i.i.d.` samples drawn from the
    * Poisson distribution with the input mean.
    *
    * @param sc SparkContext used to create the RDD.
@@ -317,7 +554,7 @@ object RandomRDDs {
    * @param numCols Number of elements in each Vector.
    * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`)
    * @param seed Random seed (default: a random long integer).
-   * @return RDD[Vector] with vectors containing i.i.d. samples ~ Pois(mean).
+   * @return RDD[Vector] with vectors containing `i.i.d.` samples ~ Pois(mean).
    */
   def poissonVectorRDD(
       sc: SparkContext,
@@ -366,9 +603,136 @@ object RandomRDDs {
     poissonVectorRDD(jsc.sc, mean, numRows, numCols).toJavaRDD()
   }
 
+  /**
+   * Generates an RDD[Vector] with vectors containing `i.i.d.` samples drawn from the
+   * exponential distribution with the input mean.
+   *
+   * @param sc SparkContext used to create the RDD.
+   * @param mean Mean, or 1 / lambda, for the Exponential distribution.
+   * @param numRows Number of Vectors in the RDD.
+   * @param numCols Number of elements in each Vector.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`)
+   * @param seed Random seed (default: a random long integer).
+   * @return RDD[Vector] with vectors containing `i.i.d.` samples ~ Exp(mean).
+   */
+  def exponentialVectorRDD(
+      sc: SparkContext,
+      mean: Double,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Vector] = {
+    val exponential = new ExponentialGenerator(mean)
+    randomVectorRDD(sc, exponential, numRows, numCols,
+      numPartitionsOrDefault(sc, numPartitions), seed)
+  }
+
+  /**
+   * Java-friendly version of [[RandomRDDs#exponentialVectorRDD]].
+   */
+  def exponentialJavaVectorRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int,
+      seed: Long): JavaRDD[Vector] = {
+    exponentialVectorRDD(jsc.sc, mean, numRows, numCols, numPartitions, seed).toJavaRDD()
+  }
+
+  /**
+   * [[RandomRDDs#exponentialJavaVectorRDD]] with the default seed.
+   */
+  def exponentialJavaVectorRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int): JavaRDD[Vector] = {
+    exponentialVectorRDD(jsc.sc, mean, numRows, numCols, numPartitions).toJavaRDD()
+  }
+
+  /**
+   * [[RandomRDDs#exponentialJavaVectorRDD]] with the default number of partitions
+   * and the default seed.
+   */
+  def exponentialJavaVectorRDD(
+      jsc: JavaSparkContext,
+      mean: Double,
+      numRows: Long,
+      numCols: Int): JavaRDD[Vector] = {
+    exponentialVectorRDD(jsc.sc, mean, numRows, numCols).toJavaRDD()
+  }
+
+
+  /**
+   * Generates an RDD[Vector] with vectors containing `i.i.d.` samples drawn from the
+   * gamma distribution with the input shape and scale.
+   *
+   * @param sc SparkContext used to create the RDD.
+   * @param shape shape parameter (> 0) for the gamma distribution.
+   * @param scale scale parameter (> 0) for the gamma distribution. 
+   * @param numRows Number of Vectors in the RDD.
+   * @param numCols Number of elements in each Vector.
+   * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`)
+   * @param seed Random seed (default: a random long integer).
+   * @return RDD[Vector] with vectors containing `i.i.d.` samples ~ Exp(mean).
+   */
+  def gammaVectorRDD(
+      sc: SparkContext,
+      shape: Double,
+      scale: Double,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int = 0,
+      seed: Long = Utils.random.nextLong()): RDD[Vector] = {
+    val gamma = new GammaGenerator(shape, scale)
+    randomVectorRDD(sc, gamma, numRows, numCols, numPartitionsOrDefault(sc, numPartitions), seed)
+  }
+
+  /**
+   * Java-friendly version of [[RandomRDDs#gammaVectorRDD]].
+   */
+  def gammaJavaVectorRDD(
+      jsc: JavaSparkContext,
+      shape: Double,
+      scale: Double,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int,
+      seed: Long): JavaRDD[Vector] = {
+    gammaVectorRDD(jsc.sc, shape, scale, numRows, numCols, numPartitions, seed).toJavaRDD()
+  }
+
+  /**
+   * [[RandomRDDs#gammaJavaVectorRDD]] with the default seed.
+   */
+  def gammaJavaVectorRDD(
+      jsc: JavaSparkContext,
+      shape: Double,
+      scale: Double,
+      numRows: Long,
+      numCols: Int,
+      numPartitions: Int): JavaRDD[Vector] = {
+    gammaVectorRDD(jsc.sc, shape, scale, numRows, numCols, numPartitions).toJavaRDD()
+  }
+
+  /**
+   * [[RandomRDDs#gammaJavaVectorRDD]] with the default number of partitions and the default seed.
+   */
+  def gammaJavaVectorRDD(
+      jsc: JavaSparkContext,
+      shape: Double,
+      scale: Double,
+      numRows: Long,
+      numCols: Int): JavaRDD[Vector] = {
+    gammaVectorRDD(jsc.sc, shape, scale, numRows, numCols).toJavaRDD()
+  }
+
+
   /**
    * :: DeveloperApi ::
-   * Generates an RDD[Vector] with vectors containing i.i.d. samples produced by the
+   * Generates an RDD[Vector] with vectors containing `i.i.d.` samples produced by the
    * input RandomDataGenerator.
    *
    * @param sc SparkContext used to create the RDD.
@@ -377,7 +741,7 @@ object RandomRDDs {
    * @param numCols Number of elements in each Vector.
    * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
    * @param seed Random seed (default: a random long integer).
-   * @return RDD[Vector] with vectors containing i.i.d. samples produced by generator.
+   * @return RDD[Vector] with vectors containing `i.i.d.` samples produced by generator.
    */
   @DeveloperApi
   def randomVectorRDD(sc: SparkContext,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
index b5e403bc8c14d..78172843be56e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
@@ -20,16 +20,14 @@ package org.apache.spark.mllib.rdd
 import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
-import org.apache.spark.HashPartitioner
-import org.apache.spark.SparkContext._
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.RDD
-import org.apache.spark.util.Utils
 
 /**
  * Machine learning specific RDD functions.
  */
-private[mllib]
-class RDDFunctions[T: ClassTag](self: RDD[T]) {
+@DeveloperApi
+class RDDFunctions[T: ClassTag](self: RDD[T]) extends Serializable {
 
   /**
    * Returns a RDD from grouping items of its parent RDD in fixed size blocks by passing a sliding
@@ -39,10 +37,10 @@ class RDDFunctions[T: ClassTag](self: RDD[T]) {
    * trigger a Spark job if the parent RDD has more than one partitions and the window size is
    * greater than 1.
    */
-  def sliding(windowSize: Int): RDD[Seq[T]] = {
+  def sliding(windowSize: Int): RDD[Array[T]] = {
     require(windowSize > 0, s"Sliding window size must be positive, but got $windowSize.")
     if (windowSize == 1) {
-      self.map(Seq(_))
+      self.map(Array(_))
     } else {
       new SlidingRDD[T](self, windowSize)
     }
@@ -52,69 +50,31 @@ class RDDFunctions[T: ClassTag](self: RDD[T]) {
    * Reduces the elements of this RDD in a multi-level tree pattern.
    *
    * @param depth suggested depth of the tree (default: 2)
-   * @see [[org.apache.spark.rdd.RDD#reduce]]
+   * @see [[org.apache.spark.rdd.RDD#treeReduce]]
+   * @deprecated Use [[org.apache.spark.rdd.RDD#treeReduce]] instead.
    */
-  def treeReduce(f: (T, T) => T, depth: Int = 2): T = {
-    require(depth >= 1, s"Depth must be greater than or equal to 1 but got $depth.")
-    val cleanF = self.context.clean(f)
-    val reducePartition: Iterator[T] => Option[T] = iter => {
-      if (iter.hasNext) {
-        Some(iter.reduceLeft(cleanF))
-      } else {
-        None
-      }
-    }
-    val partiallyReduced = self.mapPartitions(it => Iterator(reducePartition(it)))
-    val op: (Option[T], Option[T]) => Option[T] = (c, x) => {
-      if (c.isDefined && x.isDefined) {
-        Some(cleanF(c.get, x.get))
-      } else if (c.isDefined) {
-        c
-      } else if (x.isDefined) {
-        x
-      } else {
-        None
-      }
-    }
-    RDDFunctions.fromRDD(partiallyReduced).treeAggregate(Option.empty[T])(op, op, depth)
-      .getOrElse(throw new UnsupportedOperationException("empty collection"))
-  }
+  @deprecated("Use RDD.treeReduce instead.", "1.3.0")
+  def treeReduce(f: (T, T) => T, depth: Int = 2): T = self.treeReduce(f, depth)
 
   /**
    * Aggregates the elements of this RDD in a multi-level tree pattern.
    *
    * @param depth suggested depth of the tree (default: 2)
-   * @see [[org.apache.spark.rdd.RDD#aggregate]]
+   * @see [[org.apache.spark.rdd.RDD#treeAggregate]]
+   * @deprecated Use [[org.apache.spark.rdd.RDD#treeAggregate]] instead.
    */
+  @deprecated("Use RDD.treeAggregate instead.", "1.3.0")
   def treeAggregate[U: ClassTag](zeroValue: U)(
       seqOp: (U, T) => U,
       combOp: (U, U) => U,
       depth: Int = 2): U = {
-    require(depth >= 1, s"Depth must be greater than or equal to 1 but got $depth.")
-    if (self.partitions.size == 0) {
-      return Utils.clone(zeroValue, self.context.env.closureSerializer.newInstance())
-    }
-    val cleanSeqOp = self.context.clean(seqOp)
-    val cleanCombOp = self.context.clean(combOp)
-    val aggregatePartition = (it: Iterator[T]) => it.aggregate(zeroValue)(cleanSeqOp, cleanCombOp)
-    var partiallyAggregated = self.mapPartitions(it => Iterator(aggregatePartition(it)))
-    var numPartitions = partiallyAggregated.partitions.size
-    val scale = math.max(math.ceil(math.pow(numPartitions, 1.0 / depth)).toInt, 2)
-    // If creating an extra level doesn't help reduce the wall-clock time, we stop tree aggregation.
-    while (numPartitions > scale + numPartitions / scale) {
-      numPartitions /= scale
-      val curNumPartitions = numPartitions
-      partiallyAggregated = partiallyAggregated.mapPartitionsWithIndex { (i, iter) =>
-        iter.map((i % curNumPartitions, _))
-      }.reduceByKey(new HashPartitioner(curNumPartitions), cleanCombOp).values
-    }
-    partiallyAggregated.reduce(cleanCombOp)
+    self.treeAggregate(zeroValue)(seqOp, combOp, depth)
   }
 }
 
-private[mllib]
+@DeveloperApi
 object RDDFunctions {
 
   /** Implicit conversion from an RDD to RDDFunctions. */
-  implicit def fromRDD[T: ClassTag](rdd: RDD[T]) = new RDDFunctions[T](rdd)
+  implicit def fromRDD[T: ClassTag](rdd: RDD[T]): RDDFunctions[T] = new RDDFunctions[T](rdd)
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala
index dd80782c0f001..35e81fcb3de0d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala
@@ -45,15 +45,16 @@ class SlidingRDDPartition[T](val idx: Int, val prev: Partition, val tail: Seq[T]
  */
 private[mllib]
 class SlidingRDD[T: ClassTag](@transient val parent: RDD[T], val windowSize: Int)
-  extends RDD[Seq[T]](parent) {
+  extends RDD[Array[T]](parent) {
 
   require(windowSize > 1, s"Window size must be greater than 1, but got $windowSize.")
 
-  override def compute(split: Partition, context: TaskContext): Iterator[Seq[T]] = {
+  override def compute(split: Partition, context: TaskContext): Iterator[Array[T]] = {
     val part = split.asInstanceOf[SlidingRDDPartition[T]]
     (firstParent[T].iterator(part.prev, context) ++ part.tail)
       .sliding(windowSize)
       .withPartial(false)
+      .map(_.toArray)
   }
 
   override def getPreferredLocations(split: Partition): Seq[String] =
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 84d192db53e26..4bb28d1b1e071 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -17,52 +17,16 @@
 
 package org.apache.spark.mllib.recommendation
 
-import scala.collection.mutable
-import scala.collection.mutable.ArrayBuffer
-import scala.math.{abs, sqrt}
-import scala.util.Random
-import scala.util.Sorting
-import scala.util.hashing.byteswap32
-
-import org.jblas.{DoubleMatrix, SimpleBlas, Solve}
-
+import org.apache.spark.Logging
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
-import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.{Logging, HashPartitioner, Partitioner}
-import org.apache.spark.storage.StorageLevel
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.ml.recommendation.{ALS => NewALS}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.SparkContext._
-import org.apache.spark.util.Utils
-import org.apache.spark.mllib.optimization.NNLS
-
-/**
- * Out-link information for a user or product block. This includes the original user/product IDs
- * of the elements within this block, and the list of destination blocks that each user or
- * product will need to send its feature vector to.
- */
-private[recommendation]
-case class OutLinkBlock(elementIds: Array[Int], shouldSend: Array[mutable.BitSet])
-
-
-/**
- * In-link information for a user (or product) block. This includes the original user/product IDs
- * of the elements within this block, as well as an array of indices and ratings that specify
- * which user in the block will be rated by which products from each product block (or vice-versa).
- * Specifically, if this InLinkBlock is for users, ratingsForBlock(b)(i) will contain two arrays,
- * indices and ratings, for the i'th product that will be sent to us by product block b (call this
- * P). These arrays represent the users that product P had ratings for (by their index in this
- * block), as well as the corresponding rating for each one. We can thus use this information when
- * we get product block b's message to update the corresponding users.
- */
-private[recommendation] case class InLinkBlock(
-  elementIds: Array[Int], ratingsForBlock: Array[Array[(Array[Int], Array[Double])]])
-
+import org.apache.spark.storage.StorageLevel
 
 /**
- * :: Experimental ::
  * A more compact class to represent a rating than Tuple3[Int, Int, Double].
  */
-@Experimental
 case class Rating(user: Int, product: Int, rating: Double)
 
 /**
@@ -90,7 +54,7 @@ case class Rating(user: Int, product: Int, rating: Double)
  *
  * Essentially instead of finding the low-rank approximations to the rating matrix `R`,
  * this finds the approximations for a preference matrix `P` where the elements of `P` are 1 if
- * r > 0 and 0 if r = 0. The ratings then act as 'confidence' values related to strength of
+ * r > 0 and 0 if r <= 0. The ratings then act as 'confidence' values related to strength of
  * indicated user
  * preferences rather than explicit ratings given to items.
  */
@@ -116,6 +80,7 @@ class ALS private (
 
   /** storage level for user/product in/out links */
   private var intermediateRDDStorageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK
+  private var finalRDDStorageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK
 
   /**
    * Set the number of blocks for both user blocks and product blocks to parallelize the computation
@@ -168,10 +133,8 @@ class ALS private (
   }
 
   /**
-   * :: Experimental ::
    * Sets the constant used in computing confidence in implicit ALS. Default: 1.0.
    */
-  @Experimental
   def setAlpha(alpha: Double): this.type = {
     this.alpha = alpha
     this
@@ -200,10 +163,25 @@ class ALS private (
    */
   @DeveloperApi
   def setIntermediateRDDStorageLevel(storageLevel: StorageLevel): this.type = {
+    require(storageLevel != StorageLevel.NONE,
+      "ALS is not designed to run without persisting intermediate RDDs.")
     this.intermediateRDDStorageLevel = storageLevel
     this
   }
 
+  /**
+   * :: DeveloperApi ::
+   * Sets storage level for final RDDs (user/product used in MatrixFactorizationModel). The default
+   * value is `MEMORY_AND_DISK`. Users can change it to a serialized storage, e.g. 
+   * `MEMORY_AND_DISK_SER` and set `spark.rdd.compress` to `true` to reduce the space requirement,
+   * at the cost of speed.
+   */
+  @DeveloperApi
+  def setFinalRDDStorageLevel(storageLevel: StorageLevel): this.type = {
+    this.finalRDDStorageLevel = storageLevel
+    this
+  }
+
   /**
    * Run ALS with the configured parameters on an input RDD of (user, product, rating) triples.
    * Returns a MatrixFactorizationModel with feature vectors for each user and product.
@@ -222,426 +200,39 @@ class ALS private (
       this.numProductBlocks
     }
 
-    val userPartitioner = new ALSPartitioner(numUserBlocks)
-    val productPartitioner = new ALSPartitioner(numProductBlocks)
-
-    val ratingsByUserBlock = ratings.map { rating =>
-      (userPartitioner.getPartition(rating.user), rating)
+    val (floatUserFactors, floatProdFactors) = NewALS.train[Int](
+      ratings = ratings.map(r => NewALS.Rating(r.user, r.product, r.rating.toFloat)),
+      rank = rank,
+      numUserBlocks = numUserBlocks,
+      numItemBlocks = numProductBlocks,
+      maxIter = iterations,
+      regParam = lambda,
+      implicitPrefs = implicitPrefs,
+      alpha = alpha,
+      nonnegative = nonnegative,
+      intermediateRDDStorageLevel = intermediateRDDStorageLevel,
+      finalRDDStorageLevel = StorageLevel.NONE,
+      seed = seed)
+
+    val userFactors = floatUserFactors
+      .mapValues(_.map(_.toDouble))
+      .setName("users")
+      .persist(finalRDDStorageLevel)
+    val prodFactors = floatProdFactors
+      .mapValues(_.map(_.toDouble))
+      .setName("products")
+      .persist(finalRDDStorageLevel)
+    if (finalRDDStorageLevel != StorageLevel.NONE) {
+      userFactors.count()
+      prodFactors.count()
     }
-    val ratingsByProductBlock = ratings.map { rating =>
-      (productPartitioner.getPartition(rating.product),
-        Rating(rating.product, rating.user, rating.rating))
-    }
-
-    val (userInLinks, userOutLinks) =
-      makeLinkRDDs(numUserBlocks, numProductBlocks, ratingsByUserBlock, productPartitioner)
-    val (productInLinks, productOutLinks) =
-      makeLinkRDDs(numProductBlocks, numUserBlocks, ratingsByProductBlock, userPartitioner)
-    userInLinks.setName("userInLinks")
-    userOutLinks.setName("userOutLinks")
-    productInLinks.setName("productInLinks")
-    productOutLinks.setName("productOutLinks")
-
-    // Initialize user and product factors randomly, but use a deterministic seed for each
-    // partition so that fault recovery works
-    val seedGen = new Random(seed)
-    val seed1 = seedGen.nextInt()
-    val seed2 = seedGen.nextInt()
-    var users = userOutLinks.mapPartitionsWithIndex { (index, itr) =>
-      val rand = new Random(byteswap32(seed1 ^ index))
-      itr.map { case (x, y) =>
-        (x, y.elementIds.map(_ => randomFactor(rank, rand)))
-      }
-    }
-    var products = productOutLinks.mapPartitionsWithIndex { (index, itr) =>
-      val rand = new Random(byteswap32(seed2 ^ index))
-      itr.map { case (x, y) =>
-        (x, y.elementIds.map(_ => randomFactor(rank, rand)))
-      }
-    }
-
-    if (implicitPrefs) {
-      for (iter <- 1 to iterations) {
-        // perform ALS update
-        logInfo("Re-computing I given U (Iteration %d/%d)".format(iter, iterations))
-        // Persist users because it will be called twice.
-        users.setName(s"users-$iter").persist()
-        val YtY = Some(sc.broadcast(computeYtY(users)))
-        val previousProducts = products
-        products = updateFeatures(numProductBlocks, users, userOutLinks, productInLinks,
-          rank, lambda, alpha, YtY)
-        previousProducts.unpersist()
-        logInfo("Re-computing U given I (Iteration %d/%d)".format(iter, iterations))
-        if (sc.checkpointDir.isDefined && (iter % 3 == 0)) {
-          products.checkpoint()
-        }
-        products.setName(s"products-$iter").persist()
-        val XtX = Some(sc.broadcast(computeYtY(products)))
-        val previousUsers = users
-        users = updateFeatures(numUserBlocks, products, productOutLinks, userInLinks,
-          rank, lambda, alpha, XtX)
-        previousUsers.unpersist()
-      }
-    } else {
-      for (iter <- 1 to iterations) {
-        // perform ALS update
-        logInfo("Re-computing I given U (Iteration %d/%d)".format(iter, iterations))
-        products = updateFeatures(numProductBlocks, users, userOutLinks, productInLinks,
-          rank, lambda, alpha, YtY = None)
-        if (sc.checkpointDir.isDefined && (iter % 3 == 0)) {
-          products.checkpoint()
-        }
-        products.setName(s"products-$iter")
-        logInfo("Re-computing U given I (Iteration %d/%d)".format(iter, iterations))
-        users = updateFeatures(numUserBlocks, products, productOutLinks, userInLinks,
-          rank, lambda, alpha, YtY = None)
-        users.setName(s"users-$iter")
-      }
-    }
-
-    // The last `products` will be used twice. One to generate the last `users` and the other to
-    // generate `productsOut`. So we cache it for better performance.
-    products.setName("products").persist()
-
-    // Flatten and cache the two final RDDs to un-block them
-    val usersOut = unblockFactors(users, userOutLinks)
-    val productsOut = unblockFactors(products, productOutLinks)
-
-    usersOut.setName("usersOut").persist(StorageLevel.MEMORY_AND_DISK)
-    productsOut.setName("productsOut").persist(StorageLevel.MEMORY_AND_DISK)
-
-    // Materialize usersOut and productsOut.
-    usersOut.count()
-    productsOut.count()
-
-    products.unpersist()
-
-    // Clean up.
-    userInLinks.unpersist()
-    userOutLinks.unpersist()
-    productInLinks.unpersist()
-    productOutLinks.unpersist()
-
-    new MatrixFactorizationModel(rank, usersOut, productsOut)
+    new MatrixFactorizationModel(rank, userFactors, prodFactors)
   }
 
   /**
-   * Computes the (`rank x rank`) matrix `YtY`, where `Y` is the (`nui x rank`) matrix of factors
-   * for each user (or product), in a distributed fashion.
-   *
-   * @param factors the (block-distributed) user or product factor vectors
-   * @return YtY - whose value is only used in the implicit preference model
+   * Java-friendly version of [[ALS.run]].
    */
-  private def computeYtY(factors: RDD[(Int, Array[Array[Double]])]) = {
-    val n = rank * (rank + 1) / 2
-    val LYtY = factors.values.aggregate(new DoubleMatrix(n))( seqOp = (L, Y) => {
-      Y.foreach(y => dspr(1.0, wrapDoubleArray(y), L))
-      L
-    }, combOp = (L1, L2) => {
-      L1.addi(L2)
-    })
-    val YtY = new DoubleMatrix(rank, rank)
-    fillFullMatrix(LYtY, YtY)
-    YtY
-  }
-
-  /**
-   * Adds alpha * x * x.t to a matrix in-place. This is the same as BLAS's DSPR.
-   *
-   * @param L the lower triangular part of the matrix packed in an array (row major)
-   */
-  private def dspr(alpha: Double, x: DoubleMatrix, L: DoubleMatrix) = {
-    val n = x.length
-    var i = 0
-    var j = 0
-    var idx = 0
-    var axi = 0.0
-    val xd = x.data
-    val Ld = L.data
-    while (i < n) {
-      axi = alpha * xd(i)
-      j = 0
-      while (j <= i) {
-        Ld(idx) += axi * xd(j)
-        j += 1
-        idx += 1
-      }
-      i += 1
-    }
-  }
-
-  /**
-   * Wrap a double array in a DoubleMatrix without creating garbage.
-   * This is a temporary fix for jblas 1.2.3; it should be safe to move back to the
-   * DoubleMatrix(double[]) constructor come jblas 1.2.4.
-   */
-  private def wrapDoubleArray(v: Array[Double]): DoubleMatrix = {
-    new DoubleMatrix(v.length, 1, v: _*)
-  }
-
-  /**
-   * Flatten out blocked user or product factors into an RDD of (id, factor vector) pairs
-   */
-  private def unblockFactors(
-      blockedFactors: RDD[(Int, Array[Array[Double]])],
-      outLinks: RDD[(Int, OutLinkBlock)]): RDD[(Int, Array[Double])] = {
-    blockedFactors.join(outLinks).flatMap { case (b, (factors, outLinkBlock)) =>
-      for (i <- 0 until factors.length) yield (outLinkBlock.elementIds(i), factors(i))
-    }
-  }
-
-  /**
-   * Make the out-links table for a block of the users (or products) dataset given the list of
-   * (user, product, rating) values for the users in that block (or the opposite for products).
-   */
-  private def makeOutLinkBlock(numProductBlocks: Int, ratings: Array[Rating],
-      productPartitioner: Partitioner): OutLinkBlock = {
-    val userIds = ratings.map(_.user).distinct.sorted
-    val numUsers = userIds.length
-    val userIdToPos = userIds.zipWithIndex.toMap
-    val shouldSend = Array.fill(numUsers)(new mutable.BitSet(numProductBlocks))
-    for (r <- ratings) {
-      shouldSend(userIdToPos(r.user))(productPartitioner.getPartition(r.product)) = true
-    }
-    OutLinkBlock(userIds, shouldSend)
-  }
-
-  /**
-   * Make the in-links table for a block of the users (or products) dataset given a list of
-   * (user, product, rating) values for the users in that block (or the opposite for products).
-   */
-  private def makeInLinkBlock(numProductBlocks: Int, ratings: Array[Rating],
-      productPartitioner: Partitioner): InLinkBlock = {
-    val userIds = ratings.map(_.user).distinct.sorted
-    val userIdToPos = userIds.zipWithIndex.toMap
-    // Split out our ratings by product block
-    val blockRatings = Array.fill(numProductBlocks)(new ArrayBuffer[Rating])
-    for (r <- ratings) {
-      blockRatings(productPartitioner.getPartition(r.product)) += r
-    }
-    val ratingsForBlock = new Array[Array[(Array[Int], Array[Double])]](numProductBlocks)
-    for (productBlock <- 0 until numProductBlocks) {
-      // Create an array of (product, Seq(Rating)) ratings
-      val groupedRatings = blockRatings(productBlock).groupBy(_.product).toArray
-      // Sort them by product ID
-      val ordering = new Ordering[(Int, ArrayBuffer[Rating])] {
-        def compare(a: (Int, ArrayBuffer[Rating]), b: (Int, ArrayBuffer[Rating])): Int =
-            a._1 - b._1
-      }
-      Sorting.quickSort(groupedRatings)(ordering)
-      // Translate the user IDs to indices based on userIdToPos
-      ratingsForBlock(productBlock) = groupedRatings.map { case (p, rs) =>
-        (rs.view.map(r => userIdToPos(r.user)).toArray, rs.view.map(_.rating).toArray)
-      }
-    }
-    InLinkBlock(userIds, ratingsForBlock)
-  }
-
-  /**
-   * Make RDDs of InLinkBlocks and OutLinkBlocks given an RDD of (blockId, (u, p, r)) values for
-   * the users (or (blockId, (p, u, r)) for the products). We create these simultaneously to avoid
-   * having to shuffle the (blockId, (u, p, r)) RDD twice, or to cache it.
-   */
-  private def makeLinkRDDs(
-      numUserBlocks: Int,
-      numProductBlocks: Int,
-      ratingsByUserBlock: RDD[(Int, Rating)],
-      productPartitioner: Partitioner): (RDD[(Int, InLinkBlock)], RDD[(Int, OutLinkBlock)]) = {
-    val grouped = ratingsByUserBlock.partitionBy(new HashPartitioner(numUserBlocks))
-    val links = grouped.mapPartitionsWithIndex((blockId, elements) => {
-      val ratings = elements.map(_._2).toArray
-      val inLinkBlock = makeInLinkBlock(numProductBlocks, ratings, productPartitioner)
-      val outLinkBlock = makeOutLinkBlock(numProductBlocks, ratings, productPartitioner)
-      Iterator.single((blockId, (inLinkBlock, outLinkBlock)))
-    }, preservesPartitioning = true)
-    val inLinks = links.mapValues(_._1)
-    val outLinks = links.mapValues(_._2)
-    inLinks.persist(intermediateRDDStorageLevel)
-    outLinks.persist(intermediateRDDStorageLevel)
-    (inLinks, outLinks)
-  }
-
-  /**
-   * Make a random factor vector with the given random.
-   */
-  private def randomFactor(rank: Int, rand: Random): Array[Double] = {
-    // Choose a unit vector uniformly at random from the unit sphere, but from the
-    // "first quadrant" where all elements are nonnegative. This can be done by choosing
-    // elements distributed as Normal(0,1) and taking the absolute value, and then normalizing.
-    // This appears to create factorizations that have a slightly better reconstruction
-    // (<1%) compared picking elements uniformly at random in [0,1].
-    val factor = Array.fill(rank)(abs(rand.nextGaussian()))
-    val norm = sqrt(factor.map(x => x * x).sum)
-    factor.map(x => x / norm)
-  }
-
-  /**
-   * Compute the user feature vectors given the current products (or vice-versa). This first joins
-   * the products with their out-links to generate a set of messages to each destination block
-   * (specifically, the features for the products that user block cares about), then groups these
-   * by destination and joins them with the in-link info to figure out how to update each user.
-   * It returns an RDD of new feature vectors for each user block.
-   */
-  private def updateFeatures(
-      numUserBlocks: Int,
-      products: RDD[(Int, Array[Array[Double]])],
-      productOutLinks: RDD[(Int, OutLinkBlock)],
-      userInLinks: RDD[(Int, InLinkBlock)],
-      rank: Int,
-      lambda: Double,
-      alpha: Double,
-      YtY: Option[Broadcast[DoubleMatrix]]): RDD[(Int, Array[Array[Double]])] = {
-    productOutLinks.join(products).flatMap { case (bid, (outLinkBlock, factors)) =>
-        val toSend = Array.fill(numUserBlocks)(new ArrayBuffer[Array[Double]])
-        for (p <- 0 until outLinkBlock.elementIds.length; userBlock <- 0 until numUserBlocks) {
-          if (outLinkBlock.shouldSend(p)(userBlock)) {
-            toSend(userBlock) += factors(p)
-          }
-        }
-        toSend.zipWithIndex.map{ case (buf, idx) => (idx, (bid, buf.toArray)) }
-    }.groupByKey(new HashPartitioner(numUserBlocks))
-     .join(userInLinks)
-     .mapValues{ case (messages, inLinkBlock) =>
-        updateBlock(messages, inLinkBlock, rank, lambda, alpha, YtY)
-      }
-  }
-
-  /**
-   * Compute the new feature vectors for a block of the users matrix given the list of factors
-   * it received from each product and its InLinkBlock.
-   */
-  private def updateBlock(messages: Iterable[(Int, Array[Array[Double]])], inLinkBlock: InLinkBlock,
-      rank: Int, lambda: Double, alpha: Double, YtY: Option[Broadcast[DoubleMatrix]])
-    : Array[Array[Double]] =
-  {
-    // Sort the incoming block factor messages by block ID and make them an array
-    val blockFactors = messages.toSeq.sortBy(_._1).map(_._2).toArray // Array[Array[Double]]
-    val numProductBlocks = blockFactors.length
-    val numUsers = inLinkBlock.elementIds.length
-
-    // We'll sum up the XtXes using vectors that represent only the lower-triangular part, since
-    // the matrices are symmetric
-    val triangleSize = rank * (rank + 1) / 2
-    val userXtX = Array.fill(numUsers)(DoubleMatrix.zeros(triangleSize))
-    val userXy = Array.fill(numUsers)(DoubleMatrix.zeros(rank))
-
-    // Some temp variables to avoid memory allocation
-    val tempXtX = DoubleMatrix.zeros(triangleSize)
-    val fullXtX = DoubleMatrix.zeros(rank, rank)
-
-    // Count the number of ratings each user gives to provide user-specific regularization
-    val numRatings = Array.fill(numUsers)(0)
-
-    // Compute the XtX and Xy values for each user by adding products it rated in each product
-    // block
-    for (productBlock <- 0 until numProductBlocks) {
-      var p = 0
-      while (p < blockFactors(productBlock).length) {
-        val x = wrapDoubleArray(blockFactors(productBlock)(p))
-        tempXtX.fill(0.0)
-        dspr(1.0, x, tempXtX)
-        val (us, rs) = inLinkBlock.ratingsForBlock(productBlock)(p)
-        if (implicitPrefs) {
-          var i = 0
-          while (i < us.length) {
-            numRatings(us(i)) += 1
-            // Extension to the original paper to handle rs(i) < 0. confidence is a function
-            // of |rs(i)| instead so that it is never negative:
-            val confidence = 1 + alpha * abs(rs(i))
-            SimpleBlas.axpy(confidence - 1.0, tempXtX, userXtX(us(i)))
-            // For rs(i) < 0, the corresponding entry in P is 0 now, not 1 -- negative rs(i)
-            // means we try to reconstruct 0. We add terms only where P = 1, so, term below
-            // is now only added for rs(i) > 0:
-            if (rs(i) > 0) {
-              SimpleBlas.axpy(confidence, x, userXy(us(i)))
-            }
-            i += 1
-          }
-        } else {
-          var i = 0
-          while (i < us.length) {
-            numRatings(us(i)) += 1
-            userXtX(us(i)).addi(tempXtX)
-            SimpleBlas.axpy(rs(i), x, userXy(us(i)))
-            i += 1
-          }
-        }
-        p += 1
-      }
-    }
-
-    val ws = if (nonnegative) NNLS.createWorkspace(rank) else null
-
-    // Solve the least-squares problem for each user and return the new feature vectors
-    Array.range(0, numUsers).map { index =>
-      // Compute the full XtX matrix from the lower-triangular part we got above
-      fillFullMatrix(userXtX(index), fullXtX)
-      // Add regularization
-      val regParam = numRatings(index) * lambda
-      var i = 0
-      while (i < rank) {
-        fullXtX.data(i * rank + i) += regParam
-        i += 1
-      }
-      // Solve the resulting matrix, which is symmetric and positive-definite
-      if (implicitPrefs) {
-        solveLeastSquares(fullXtX.addi(YtY.get.value), userXy(index), ws)
-      } else {
-        solveLeastSquares(fullXtX, userXy(index), ws)
-      }
-    }
-  }
-
-  /**
-   * Given A^T A and A^T b, find the x minimising ||Ax - b||_2, possibly subject
-   * to nonnegativity constraints if `nonnegative` is true.
-   */
-  def solveLeastSquares(ata: DoubleMatrix, atb: DoubleMatrix,
-      ws: NNLS.Workspace): Array[Double] = {
-    if (!nonnegative) {
-      Solve.solvePositive(ata, atb).data
-    } else {
-      NNLS.solve(ata, atb, ws)
-    }
-  }
-
-  /**
-   * Given a triangular matrix in the order of fillXtX above, compute the full symmetric square
-   * matrix that it represents, storing it into destMatrix.
-   */
-  private def fillFullMatrix(triangularMatrix: DoubleMatrix, destMatrix: DoubleMatrix) {
-    val rank = destMatrix.rows
-    var i = 0
-    var pos = 0
-    while (i < rank) {
-      var j = 0
-      while (j <= i) {
-        destMatrix.data(i*rank + j) = triangularMatrix.data(pos)
-        destMatrix.data(j*rank + i) = triangularMatrix.data(pos)
-        pos += 1
-        j += 1
-      }
-      i += 1
-    }
-  }
-}
-
-/**
- * Partitioner for ALS.
- */
-private[recommendation] class ALSPartitioner(override val numPartitions: Int) extends Partitioner {
-  override def getPartition(key: Any): Int = {
-    Utils.nonNegativeMod(byteswap32(key.asInstanceOf[Int]), numPartitions)
-  }
-
-  override def equals(obj: Any): Boolean = {
-    obj match {
-      case p: ALSPartitioner =>
-        this.numPartitions == p.numPartitions
-      case _ =>
-        false
-    }
-  }
+  def run(ratings: JavaRDD[Rating]): MatrixFactorizationModel = run(ratings.rdd)
 }
 
 /**
@@ -741,7 +332,7 @@ object ALS {
    * @param iterations number of iterations of ALS (recommended: 10-20)
    * @param lambda     regularization factor (recommended: 0.01)
    * @param blocks     level of parallelism to split computation into
-   * @param alpha      confidence parameter (only applies when immplicitPrefs = true)
+   * @param alpha      confidence parameter
    * @param seed       random seed
    */
   def trainImplicit(
@@ -768,7 +359,7 @@ object ALS {
    * @param iterations number of iterations of ALS (recommended: 10-20)
    * @param lambda     regularization factor (recommended: 0.01)
    * @param blocks     level of parallelism to split computation into
-   * @param alpha      confidence parameter (only applies when immplicitPrefs = true)
+   * @param alpha      confidence parameter
    */
   def trainImplicit(
       ratings: RDD[Rating],
@@ -792,6 +383,7 @@ object ALS {
    * @param rank       number of features to use
    * @param iterations number of iterations of ALS (recommended: 10-20)
    * @param lambda     regularization factor (recommended: 0.01)
+   * @param alpha      confidence parameter
    */
   def trainImplicit(ratings: RDD[Rating], rank: Int, iterations: Int, lambda: Double, alpha: Double)
     : MatrixFactorizationModel = {
@@ -814,120 +406,4 @@ object ALS {
     : MatrixFactorizationModel = {
     trainImplicit(ratings, rank, iterations, 0.01, -1, 1.0)
   }
-
-  /**
-   * :: DeveloperApi ::
-   * Statistics of a block in ALS computation.
-   *
-   * @param category type of this block, "user" or "product"
-   * @param index index of this block
-   * @param count number of users or products inside this block, the same as the number of
-   *              least-squares problems to solve on this block in each iteration
-   * @param numRatings total number of ratings inside this block, the same as the number of outer
-   *                   products we need to make on this block in each iteration
-   * @param numInLinks total number of incoming links, the same as the number of vectors to retrieve
-   *                   before each iteration
-   * @param numOutLinks total number of outgoing links, the same as the number of vectors to send
-   *                    for the next iteration
-   */
-  @DeveloperApi
-  case class BlockStats(
-      category: String,
-      index: Int,
-      count: Long,
-      numRatings: Long,
-      numInLinks: Long,
-      numOutLinks: Long)
-
-  /**
-   * :: DeveloperApi ::
-   * Given an RDD of ratings, number of user blocks, and number of product blocks, computes the
-   * statistics of each block in ALS computation. This is useful for estimating cost and diagnosing
-   * load balance.
-   *
-   * @param ratings an RDD of ratings
-   * @param numUserBlocks number of user blocks
-   * @param numProductBlocks number of product blocks
-   * @return statistics of user blocks and product blocks
-   */
-  @DeveloperApi
-  def analyzeBlocks(
-      ratings: RDD[Rating],
-      numUserBlocks: Int,
-      numProductBlocks: Int): Array[BlockStats] = {
-
-    val userPartitioner = new ALSPartitioner(numUserBlocks)
-    val productPartitioner = new ALSPartitioner(numProductBlocks)
-
-    val ratingsByUserBlock = ratings.map { rating =>
-      (userPartitioner.getPartition(rating.user), rating)
-    }
-    val ratingsByProductBlock = ratings.map { rating =>
-      (productPartitioner.getPartition(rating.product),
-        Rating(rating.product, rating.user, rating.rating))
-    }
-
-    val als = new ALS()
-    val (userIn, userOut) =
-      als.makeLinkRDDs(numUserBlocks, numProductBlocks, ratingsByUserBlock, userPartitioner)
-    val (prodIn, prodOut) =
-      als.makeLinkRDDs(numProductBlocks, numUserBlocks, ratingsByProductBlock, productPartitioner)
-
-    def sendGrid(outLinks: RDD[(Int, OutLinkBlock)]): Map[(Int, Int), Long] = {
-      outLinks.map { x =>
-        val grid = new mutable.HashMap[(Int, Int), Long]()
-        val uPartition = x._1
-        x._2.shouldSend.foreach { ss =>
-          ss.foreach { pPartition =>
-            val pair = (uPartition, pPartition)
-            grid.put(pair, grid.getOrElse(pair, 0L) + 1L)
-          }
-        }
-        grid
-      }.reduce { (grid1, grid2) =>
-        grid2.foreach { x =>
-          grid1.put(x._1, grid1.getOrElse(x._1, 0L) + x._2)
-        }
-        grid1
-      }.toMap
-    }
-
-    val userSendGrid = sendGrid(userOut)
-    val prodSendGrid = sendGrid(prodOut)
-
-    val userInbound = new Array[Long](numUserBlocks)
-    val prodInbound = new Array[Long](numProductBlocks)
-    val userOutbound = new Array[Long](numUserBlocks)
-    val prodOutbound = new Array[Long](numProductBlocks)
-
-    for (u <- 0 until numUserBlocks; p <- 0 until numProductBlocks) {
-      userOutbound(u) += userSendGrid.getOrElse((u, p), 0L)
-      prodInbound(p) += userSendGrid.getOrElse((u, p), 0L)
-      userInbound(u) += prodSendGrid.getOrElse((p, u), 0L)
-      prodOutbound(p) += prodSendGrid.getOrElse((p, u), 0L)
-    }
-
-    val userCounts = userOut.mapValues(x => x.elementIds.length).collectAsMap()
-    val prodCounts = prodOut.mapValues(x => x.elementIds.length).collectAsMap()
-
-    val userRatings = countRatings(userIn)
-    val prodRatings = countRatings(prodIn)
-
-    val userStats = Array.tabulate(numUserBlocks)(
-      u => BlockStats("user", u, userCounts(u), userRatings(u), userInbound(u), userOutbound(u)))
-    val productStatus = Array.tabulate(numProductBlocks)(
-      p => BlockStats("product", p, prodCounts(p), prodRatings(p), prodInbound(p), prodOutbound(p)))
-
-    (userStats ++ productStatus).toArray
-  }
-
-  private def countRatings(inLinks: RDD[(Int, InLinkBlock)]): Map[Int, Long] = {
-    inLinks.mapValues { ilb =>
-      var numRatings = 0L
-      ilb.ratingsForBlock.foreach { ar =>
-        ar.foreach { p => numRatings += p._1.length }
-      }
-      numRatings
-    }.collectAsMap().toMap
-  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index 66b58ba770160..ed2f8b41bcae5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -17,27 +17,49 @@
 
 package org.apache.spark.mllib.recommendation
 
+import java.lang.{Integer => JavaInteger}
+
 import org.jblas.DoubleMatrix
 
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.Logging
+import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.SparkContext._
-import org.apache.spark.mllib.api.python.SerDe
+import org.apache.spark.storage.StorageLevel
 
 /**
  * Model representing the result of matrix factorization.
  *
+ * Note: If you create the model directly using constructor, please be aware that fast prediction
+ * requires cached user/product features and their associated partitioners.
+ *
  * @param rank Rank for the features in this model.
  * @param userFeatures RDD of tuples where each tuple represents the userId and
  *                     the features computed for this user.
  * @param productFeatures RDD of tuples where each tuple represents the productId
  *                        and the features computed for this product.
  */
-class MatrixFactorizationModel private[mllib] (
+class MatrixFactorizationModel(
     val rank: Int,
     val userFeatures: RDD[(Int, Array[Double])],
-    val productFeatures: RDD[(Int, Array[Double])]) extends Serializable {
+    val productFeatures: RDD[(Int, Array[Double])]) extends Serializable with Logging {
+
+  require(rank > 0)
+  validateFeatures("User", userFeatures)
+  validateFeatures("Product", productFeatures)
+
+  /** Validates factors and warns users if there are performance concerns. */
+  private def validateFeatures(name: String, features: RDD[(Int, Array[Double])]): Unit = {
+    require(features.first()._2.size == rank,
+      s"$name feature dimension does not match the rank $rank.")
+    if (features.partitioner.isEmpty) {
+      logWarning(s"$name factor does not have a partitioner. "
+        + "Prediction on individual records could be slow.")
+    }
+    if (features.getStorageLevel == StorageLevel.NONE) {
+      logWarning(s"$name factor is not cached. Prediction could be slow.")
+    }
+  }
+
   /** Predict the rating of one user for one product. */
   def predict(user: Int, product: Int): Double = {
     val userVector = new DoubleMatrix(userFeatures.lookup(user).head)
@@ -65,6 +87,13 @@ class MatrixFactorizationModel private[mllib] (
     }
   }
 
+  /**
+   * Java-friendly version of [[MatrixFactorizationModel.predict]].
+   */
+  def predict(usersProducts: JavaPairRDD[JavaInteger, JavaInteger]): JavaRDD[Rating] = {
+    predict(usersProducts.rdd.asInstanceOf[RDD[(Int, Int)]]).toJavaRDD()
+  }
+
   /**
    * Recommends products to a user.
    *
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index 00dfc86c9e0bd..17de215b97f9d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -98,6 +98,23 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
 
   protected var validateData: Boolean = true
 
+  /**
+   * In `GeneralizedLinearModel`, only single linear predictor is allowed for both weights
+   * and intercept. However, for multinomial logistic regression, with K possible outcomes,
+   * we are training K-1 independent binary logistic regression models which requires K-1 sets
+   * of linear predictor.
+   *
+   * As a result, the workaround here is if more than two sets of linear predictors are needed,
+   * we construct bigger `weights` vector which can hold both weights and intercepts.
+   * If the intercepts are added, the dimension of `weights` will be
+   * (numOfLinearPredictor) * (numFeatures + 1) . If the intercepts are not added,
+   * the dimension of `weights` will be (numOfLinearPredictor) * numFeatures.
+   *
+   * Thus, the intercepts will be encapsulated into weights, and we leave the value of intercept
+   * in GeneralizedLinearModel as zero.
+   */
+  protected var numOfLinearPredictor: Int = 1
+
   /**
    * Whether to perform feature scaling before model training to reduce the condition numbers
    * which can significantly help the optimizer converging faster. The scaling correction will be
@@ -106,6 +123,11 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
    */
   private var useFeatureScaling = false
 
+  /**
+   * The dimension of training features.
+   */
+  protected var numFeatures: Int = 0
+
   /**
    * Set if the algorithm should use feature scaling to improve the convergence during optimization.
    */
@@ -136,22 +158,33 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
     this
   }
 
-  /** Whether a warning should be logged if the input RDD is uncached. */
-  private var warnOnUncachedInput = true
-
-  /** Disable warnings about uncached input. */
-  private[spark] def disableUncachedWarning(): this.type = {
-    warnOnUncachedInput = false
-    this
-  }
-
   /**
    * Run the algorithm with the configured parameters on an input
    * RDD of LabeledPoint entries.
    */
   def run(input: RDD[LabeledPoint]): M = {
-    val numFeatures: Int = input.first().features.size
-    val initialWeights = Vectors.dense(new Array[Double](numFeatures))
+    numFeatures = input.first().features.size
+
+    /**
+     * When `numOfLinearPredictor > 1`, the intercepts are encapsulated into weights,
+     * so the `weights` will include the intercepts. When `numOfLinearPredictor == 1`,
+     * the intercept will be stored as separated value in `GeneralizedLinearModel`.
+     * This will result in different behaviors since when `numOfLinearPredictor == 1`,
+     * users have no way to set the initial intercept, while in the other case, users
+     * can set the intercepts as part of weights.
+     *
+     * TODO: See if we can deprecate `intercept` in `GeneralizedLinearModel`, and always
+     * have the intercept as part of weights to have consistent design.
+     */
+    val initialWeights = {
+      if (numOfLinearPredictor == 1) {
+        Vectors.dense(new Array[Double](numFeatures))
+      } else if (addIntercept) {
+        Vectors.dense(new Array[Double]((numFeatures + 1) * numOfLinearPredictor))
+      } else {
+        Vectors.dense(new Array[Double](numFeatures * numOfLinearPredictor))
+      }
+    }
     run(input, initialWeights)
   }
 
@@ -160,8 +193,9 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
    * of LabeledPoint entries starting from the initial weights provided.
    */
   def run(input: RDD[LabeledPoint], initialWeights: Vector): M = {
+    numFeatures = input.first().features.size
 
-    if (warnOnUncachedInput && input.getStorageLevel == StorageLevel.NONE) {
+    if (input.getStorageLevel == StorageLevel.NONE) {
       logWarning("The input data is not directly cached, which may hurt performance if its"
         + " parent RDDs are also uncached.")
     }
@@ -191,14 +225,14 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
      * Currently, it's only enabled in LogisticRegressionWithLBFGS
      */
     val scaler = if (useFeatureScaling) {
-      (new StandardScaler).fit(input.map(x => x.features))
+      (new StandardScaler(withStd = true, withMean = false)).fit(input.map(x => x.features))
     } else {
       null
     }
 
     // Prepend an extra variable consisting of all 1.0's for the intercept.
     val data = if (addIntercept) {
-      if(useFeatureScaling) {
+      if (useFeatureScaling) {
         input.map(labeledPoint =>
           (labeledPoint.label, appendBias(scaler.transform(labeledPoint.features))))
       } else {
@@ -212,21 +246,31 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
       }
     }
 
-    val initialWeightsWithIntercept = if (addIntercept) {
+    /**
+     * TODO: For better convergence, in logistic regression, the intercepts should be computed
+     * from the prior probability distribution of the outcomes; for linear regression,
+     * the intercept should be set as the average of response.
+     */
+    val initialWeightsWithIntercept = if (addIntercept && numOfLinearPredictor == 1) {
       appendBias(initialWeights)
     } else {
+      /** If `numOfLinearPredictor > 1`, initialWeights already contains intercepts. */
       initialWeights
     }
 
     val weightsWithIntercept = optimizer.optimize(data, initialWeightsWithIntercept)
 
-    val intercept = if (addIntercept) weightsWithIntercept(weightsWithIntercept.size - 1) else 0.0
-    var weights =
-      if (addIntercept) {
-        Vectors.dense(weightsWithIntercept.toArray.slice(0, weightsWithIntercept.size - 1))
-      } else {
-        weightsWithIntercept
-      }
+    val intercept = if (addIntercept && numOfLinearPredictor == 1) {
+      weightsWithIntercept(weightsWithIntercept.size - 1)
+    } else {
+      0.0
+    }
+
+    var weights = if (addIntercept && numOfLinearPredictor == 1) {
+      Vectors.dense(weightsWithIntercept.toArray.slice(0, weightsWithIntercept.size - 1))
+    } else {
+      weightsWithIntercept
+    }
 
     /**
      * The weights and intercept are trained in the scaled space; we're converting them back to
@@ -237,11 +281,33 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
      * is the coefficient in the original space, and v_i is the variance of the column i.
      */
     if (useFeatureScaling) {
-      weights = scaler.transform(weights)
+      if (numOfLinearPredictor == 1) {
+        weights = scaler.transform(weights)
+      } else {
+        /**
+         * For `numOfLinearPredictor > 1`, we have to transform the weights back to the original
+         * scale for each set of linear predictor. Note that the intercepts have to be explicitly
+         * excluded when `addIntercept == true` since the intercepts are part of weights now.
+         */
+        var i = 0
+        val n = weights.size / numOfLinearPredictor
+        val weightsArray = weights.toArray
+        while (i < numOfLinearPredictor) {
+          val start = i * n
+          val end = (i + 1) * n - { if (addIntercept) 1 else 0 }
+
+          val partialWeightsArray = scaler.transform(
+            Vectors.dense(weightsArray.slice(start, end))).toArray
+
+          System.arraycopy(partialWeightsArray, 0, weightsArray, start, partialWeightsArray.size)
+          i += 1
+        }
+        weights = Vectors.dense(weightsArray)
+      }
     }
 
     // Warn at the end of the run as well, for increased visibility.
-    if (warnOnUncachedInput && input.getStorageLevel == StorageLevel.NONE) {
+    if (input.getStorageLevel == StorageLevel.NONE) {
       logWarning("The input data was not directly cached, which may hurt performance if its"
         + " parent RDDs are also uncached.")
     }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
new file mode 100644
index 0000000000000..cb70852e3cc8d
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression
+
+import java.io.Serializable
+import java.lang.{Double => JDouble}
+import java.util.Arrays.binarySearch
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.{JavaDoubleRDD, JavaRDD}
+import org.apache.spark.rdd.RDD
+
+/**
+ * :: Experimental ::
+ *
+ * Regression model for isotonic regression.
+ *
+ * @param boundaries Array of boundaries for which predictions are known.
+ *                   Boundaries must be sorted in increasing order.
+ * @param predictions Array of predictions associated to the boundaries at the same index.
+ *                    Results of isotonic regression and therefore monotone.
+ * @param isotonic indicates whether this is isotonic or antitonic.
+ */
+@Experimental
+class IsotonicRegressionModel (
+    val boundaries: Array[Double],
+    val predictions: Array[Double],
+    val isotonic: Boolean) extends Serializable {
+
+  private val predictionOrd = if (isotonic) Ordering[Double] else Ordering[Double].reverse
+
+  require(boundaries.length == predictions.length)
+  assertOrdered(boundaries)
+  assertOrdered(predictions)(predictionOrd)
+
+  /** Asserts the input array is monotone with the given ordering. */
+  private def assertOrdered(xs: Array[Double])(implicit ord: Ordering[Double]): Unit = {
+    var i = 1
+    while (i < xs.length) {
+      require(ord.compare(xs(i - 1), xs(i)) <= 0,
+        s"Elements (${xs(i - 1)}, ${xs(i)}) are not ordered.")
+      i += 1
+    }
+  }
+
+  /**
+   * Predict labels for provided features.
+   * Using a piecewise linear function.
+   *
+   * @param testData Features to be labeled.
+   * @return Predicted labels.
+   */
+  def predict(testData: RDD[Double]): RDD[Double] = {
+    testData.map(predict)
+  }
+
+  /**
+   * Predict labels for provided features.
+   * Using a piecewise linear function.
+   *
+   * @param testData Features to be labeled.
+   * @return Predicted labels.
+   */
+  def predict(testData: JavaDoubleRDD): JavaDoubleRDD = {
+    JavaDoubleRDD.fromRDD(predict(testData.rdd.retag.asInstanceOf[RDD[Double]]))
+  }
+
+  /**
+   * Predict a single label.
+   * Using a piecewise linear function.
+   *
+   * @param testData Feature to be labeled.
+   * @return Predicted label.
+   *         1) If testData exactly matches a boundary then associated prediction is returned.
+   *           In case there are multiple predictions with the same boundary then one of them
+   *           is returned. Which one is undefined (same as java.util.Arrays.binarySearch).
+   *         2) If testData is lower or higher than all boundaries then first or last prediction
+   *           is returned respectively. In case there are multiple predictions with the same
+   *           boundary then the lowest or highest is returned respectively.
+   *         3) If testData falls between two values in boundary array then prediction is treated
+   *           as piecewise linear function and interpolated value is returned. In case there are
+   *           multiple values with the same boundary then the same rules as in 2) are used.
+   */
+  def predict(testData: Double): Double = {
+
+    def linearInterpolation(x1: Double, y1: Double, x2: Double, y2: Double, x: Double): Double = {
+      y1 + (y2 - y1) * (x - x1) / (x2 - x1)
+    }
+
+    val foundIndex = binarySearch(boundaries, testData)
+    val insertIndex = -foundIndex - 1
+
+    // Find if the index was lower than all values,
+    // higher than all values, in between two values or exact match.
+    if (insertIndex == 0) {
+      predictions.head
+    } else if (insertIndex == boundaries.length){
+      predictions.last
+    } else if (foundIndex < 0) {
+      linearInterpolation(
+        boundaries(insertIndex - 1),
+        predictions(insertIndex - 1),
+        boundaries(insertIndex),
+        predictions(insertIndex),
+        testData)
+    } else {
+      predictions(foundIndex)
+    }
+  }
+}
+
+/**
+ * :: Experimental ::
+ *
+ * Isotonic regression.
+ * Currently implemented using parallelized pool adjacent violators algorithm.
+ * Only univariate (single feature) algorithm supported.
+ *
+ * Sequential PAV implementation based on:
+ * Tibshirani, Ryan J., Holger Hoefling, and Robert Tibshirani.
+ *   "Nearly-isotonic regression." Technometrics 53.1 (2011): 54-61.
+ *   Available from [[http://www.stat.cmu.edu/~ryantibs/papers/neariso.pdf]]
+ *
+ * Sequential PAV parallelization based on:
+ * Kearsley, Anthony J., Richard A. Tapia, and Michael W. Trosset.
+ *   "An approach to parallelizing isotonic regression."
+ *   Applied Mathematics and Parallel Computing. Physica-Verlag HD, 1996. 141-147.
+ *   Available from [[http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf]]
+ *
+ * @see [[http://en.wikipedia.org/wiki/Isotonic_regression Isotonic regression (Wikipedia)]]
+ */
+@Experimental
+class IsotonicRegression private (private var isotonic: Boolean) extends Serializable {
+
+  /**
+   * Constructs IsotonicRegression instance with default parameter isotonic = true.
+   *
+   * @return New instance of IsotonicRegression.
+   */
+  def this() = this(true)
+
+  /**
+   * Sets the isotonic parameter.
+   *
+   * @param isotonic Isotonic (increasing) or antitonic (decreasing) sequence.
+   * @return This instance of IsotonicRegression.
+   */
+  def setIsotonic(isotonic: Boolean): this.type = {
+    this.isotonic = isotonic
+    this
+  }
+
+  /**
+   * Run IsotonicRegression algorithm to obtain isotonic regression model.
+   *
+   * @param input RDD of tuples (label, feature, weight) where label is dependent variable
+   *              for which we calculate isotonic regression, feature is independent variable
+   *              and weight represents number of measures with default 1.
+   *              If multiple labels share the same feature value then they are ordered before
+   *              the algorithm is executed.
+   * @return Isotonic regression model.
+   */
+  def run(input: RDD[(Double, Double, Double)]): IsotonicRegressionModel = {
+    val preprocessedInput = if (isotonic) {
+      input
+    } else {
+      input.map(x => (-x._1, x._2, x._3))
+    }
+
+    val pooled = parallelPoolAdjacentViolators(preprocessedInput)
+
+    val predictions = if (isotonic) pooled.map(_._1) else pooled.map(-_._1)
+    val boundaries = pooled.map(_._2)
+
+    new IsotonicRegressionModel(boundaries, predictions, isotonic)
+  }
+
+  /**
+   * Run pool adjacent violators algorithm to obtain isotonic regression model.
+   *
+   * @param input JavaRDD of tuples (label, feature, weight) where label is dependent variable
+   *              for which we calculate isotonic regression, feature is independent variable
+   *              and weight represents number of measures with default 1.
+   *              If multiple labels share the same feature value then they are ordered before
+   *              the algorithm is executed.
+   * @return Isotonic regression model.
+   */
+  def run(input: JavaRDD[(JDouble, JDouble, JDouble)]): IsotonicRegressionModel = {
+    run(input.rdd.retag.asInstanceOf[RDD[(Double, Double, Double)]])
+  }
+
+  /**
+   * Performs a pool adjacent violators algorithm (PAV).
+   * Uses approach with single processing of data where violators
+   * in previously processed data created by pooling are fixed immediately.
+   * Uses optimization of discovering monotonicity violating sequences (blocks).
+   *
+   * @param input Input data of tuples (label, feature, weight).
+   * @return Result tuples (label, feature, weight) where labels were updated
+   *         to form a monotone sequence as per isotonic regression definition.
+   */
+  private def poolAdjacentViolators(
+      input: Array[(Double, Double, Double)]): Array[(Double, Double, Double)] = {
+
+    if (input.isEmpty) {
+      return Array.empty
+    }
+
+    // Pools sub array within given bounds assigning weighted average value to all elements.
+    def pool(input: Array[(Double, Double, Double)], start: Int, end: Int): Unit = {
+      val poolSubArray = input.slice(start, end + 1)
+
+      val weightedSum = poolSubArray.map(lp => lp._1 * lp._3).sum
+      val weight = poolSubArray.map(_._3).sum
+
+      var i = start
+      while (i <= end) {
+        input(i) = (weightedSum / weight, input(i)._2, input(i)._3)
+        i = i + 1
+      }
+    }
+
+    var i = 0
+    while (i < input.length) {
+      var j = i
+
+      // Find monotonicity violating sequence, if any.
+      while (j < input.length - 1 && input(j)._1 > input(j + 1)._1) {
+        j = j + 1
+      }
+
+      // If monotonicity was not violated, move to next data point.
+      if (i == j) {
+        i = i + 1
+      } else {
+        // Otherwise pool the violating sequence
+        // and check if pooling caused monotonicity violation in previously processed points.
+        while (i >= 0 && input(i)._1 > input(i + 1)._1) {
+          pool(input, i, j)
+          i = i - 1
+        }
+
+        i = j
+      }
+    }
+
+    // For points having the same prediction, we only keep two boundary points.
+    val compressed = ArrayBuffer.empty[(Double, Double, Double)]
+
+    var (curLabel, curFeature, curWeight) = input.head
+    var rightBound = curFeature
+    def merge(): Unit = {
+      compressed += ((curLabel, curFeature, curWeight))
+      if (rightBound > curFeature) {
+        compressed += ((curLabel, rightBound, 0.0))
+      }
+    }
+    i = 1
+    while (i < input.length) {
+      val (label, feature, weight) = input(i)
+      if (label == curLabel) {
+        curWeight += weight
+        rightBound = feature
+      } else {
+        merge()
+        curLabel = label
+        curFeature = feature
+        curWeight = weight
+        rightBound = curFeature
+      }
+      i += 1
+    }
+    merge()
+
+    compressed.toArray
+  }
+
+  /**
+   * Performs parallel pool adjacent violators algorithm.
+   * Performs Pool adjacent violators algorithm on each partition and then again on the result.
+   *
+   * @param input Input data of tuples (label, feature, weight).
+   * @return Result tuples (label, feature, weight) where labels were updated
+   *         to form a monotone sequence as per isotonic regression definition.
+   */
+  private def parallelPoolAdjacentViolators(
+      input: RDD[(Double, Double, Double)]): Array[(Double, Double, Double)] = {
+    val parallelStepResult = input
+      .sortBy(x => (x._2, x._1))
+      .glom()
+      .flatMap(poolAdjacentViolators)
+      .collect()
+      .sortBy(x => (x._2, x._1)) // Sort again because collect() doesn't promise ordering.
+    poolAdjacentViolators(parallelStepResult)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
index 17c753c56681f..2067b36f246b3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.mllib.regression
 
+import scala.beans.BeanInfo
+
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
 import org.apache.spark.mllib.util.NumericParser
 import org.apache.spark.SparkException
@@ -27,6 +29,7 @@ import org.apache.spark.SparkException
  * @param label Label for this data point.
  * @param features List of features for this data point.
  */
+@BeanInfo
 case class LabeledPoint(label: Double, features: Vector) {
   override def toString: String = {
     "(%s,%s)".format(label, features)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index cb0d39e759a9f..1159e59fff5f6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.mllib.regression
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.SparkContext
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.optimization._
+import org.apache.spark.mllib.regression.impl.GLMRegressionModel
+import org.apache.spark.mllib.util.{Saveable, Loader}
 import org.apache.spark.rdd.RDD
 
 /**
@@ -32,7 +34,7 @@ class LassoModel (
     override val weights: Vector,
     override val intercept: Double)
   extends GeneralizedLinearModel(weights, intercept)
-  with RegressionModel with Serializable {
+  with RegressionModel with Serializable with Saveable {
 
   override protected def predictPoint(
       dataMatrix: Vector,
@@ -40,12 +42,37 @@ class LassoModel (
       intercept: Double): Double = {
     weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
   }
+
+  override def save(sc: SparkContext, path: String): Unit = {
+    GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
+  }
+
+  override protected def formatVersion: String = "1.0"
+}
+
+object LassoModel extends Loader[LassoModel] {
+
+  override def load(sc: SparkContext, path: String): LassoModel = {
+    val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
+    // Hard-code class name string in case it changes in the future
+    val classNameV1_0 = "org.apache.spark.mllib.regression.LassoModel"
+    (loadedClassName, version) match {
+      case (className, "1.0") if className == classNameV1_0 =>
+        val numFeatures = RegressionModel.getNumFeatures(metadata, classNameV1_0, path)
+        val data = GLMRegressionModel.SaveLoadV1_0.loadData(sc, path, classNameV1_0, numFeatures)
+        new LassoModel(data.weights, data.intercept)
+      case _ => throw new Exception(
+        s"LassoModel.load did not recognize model with (className, format version):" +
+        s"($loadedClassName, $version).  Supported:\n" +
+        s"  ($classNameV1_0, 1.0)")
+    }
+  }
 }
 
 /**
  * Train a regression model with L1-regularization using Stochastic Gradient Descent.
  * This solves the l1-regularized least squares regression formulation
- *          f(weights) = 1/n ||A weights-y||^2  + regParam ||weights||_1
+ *          f(weights) = 1/2n ||A weights-y||^2^  + regParam ||weights||_1
  * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
@@ -67,9 +94,9 @@ class LassoWithSGD private (
 
   /**
    * Construct a Lasso object with default parameters: {stepSize: 1.0, numIterations: 100,
-   * regParam: 1.0, miniBatchFraction: 1.0}.
+   * regParam: 0.01, miniBatchFraction: 1.0}.
    */
-  def this() = this(1.0, 100, 1.0, 1.0)
+  def this() = this(1.0, 100, 0.01, 1.0)
 
   override protected def createModel(weights: Vector, intercept: Double) = {
     new LassoModel(weights, intercept)
@@ -161,6 +188,6 @@ object LassoWithSGD {
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int): LassoModel = {
-    train(input, numIterations, 1.0, 1.0, 1.0)
+    train(input, numIterations, 1.0, 0.01, 1.0)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index 81b6598377ff5..0136dcfdceaef 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -17,9 +17,12 @@
 
 package org.apache.spark.mllib.regression
 
-import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.optimization._
+import org.apache.spark.mllib.regression.impl.GLMRegressionModel
+import org.apache.spark.mllib.util.{Saveable, Loader}
+import org.apache.spark.rdd.RDD
 
 /**
  * Regression model trained using LinearRegression.
@@ -30,7 +33,8 @@ import org.apache.spark.mllib.optimization._
 class LinearRegressionModel (
     override val weights: Vector,
     override val intercept: Double)
-  extends GeneralizedLinearModel(weights, intercept) with RegressionModel with Serializable {
+  extends GeneralizedLinearModel(weights, intercept) with RegressionModel with Serializable
+  with Saveable {
 
   override protected def predictPoint(
       dataMatrix: Vector,
@@ -38,12 +42,37 @@ class LinearRegressionModel (
       intercept: Double): Double = {
     weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
   }
+
+  override def save(sc: SparkContext, path: String): Unit = {
+    GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
+  }
+
+  override protected def formatVersion: String = "1.0"
+}
+
+object LinearRegressionModel extends Loader[LinearRegressionModel] {
+
+  override def load(sc: SparkContext, path: String): LinearRegressionModel = {
+    val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
+    // Hard-code class name string in case it changes in the future
+    val classNameV1_0 = "org.apache.spark.mllib.regression.LinearRegressionModel"
+    (loadedClassName, version) match {
+      case (className, "1.0") if className == classNameV1_0 =>
+        val numFeatures = RegressionModel.getNumFeatures(metadata, classNameV1_0, path)
+        val data = GLMRegressionModel.SaveLoadV1_0.loadData(sc, path, classNameV1_0, numFeatures)
+        new LinearRegressionModel(data.weights, data.intercept)
+      case _ => throw new Exception(
+        s"LinearRegressionModel.load did not recognize model with (className, format version):" +
+        s"($loadedClassName, $version).  Supported:\n" +
+        s"  ($classNameV1_0, 1.0)")
+    }
+  }
 }
 
 /**
  * Train a linear regression model with no regularization using Stochastic Gradient Descent.
  * This solves the least squares regression formulation
- *              f(weights) = 1/n ||A weights-y||^2
+ *              f(weights) = 1/n ||A weights-y||^2^
  * (which is the mean squared error).
  * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
  * its corresponding right hand side label y.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
index 64b02f7a6e7a9..843e59bdfbdd2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
@@ -19,8 +19,10 @@ package org.apache.spark.mllib.regression
 
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaRDD
-import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.util.Loader
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Row}
 
 @Experimental
 trait RegressionModel extends Serializable {
@@ -48,3 +50,21 @@ trait RegressionModel extends Serializable {
   def predict(testData: JavaRDD[Vector]): JavaRDD[java.lang.Double] =
     predict(testData.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]]
 }
+
+private[mllib] object RegressionModel {
+
+  /**
+   * Helper method for loading GLM regression model metadata.
+   *
+   * @param modelClass  String name for model class (used for error messages)
+   * @return numFeatures
+   */
+  def getNumFeatures(metadata: DataFrame, modelClass: String, path: String): Int = {
+    metadata.select("numFeatures").take(1)(0) match {
+      case Row(nFeatures: Int) => nFeatures
+      case _ => throw new Exception(s"$modelClass unable to load" +
+        s" numFeatures from metadata: ${Loader.metadataPath(path)}")
+    }
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index a826deb695ee1..f2a5f1db1ece6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -17,10 +17,13 @@
 
 package org.apache.spark.mllib.regression
 
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.rdd.RDD
-import org.apache.spark.mllib.optimization._
+import org.apache.spark.SparkContext
 import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.optimization._
+import org.apache.spark.mllib.regression.impl.GLMRegressionModel
+import org.apache.spark.mllib.util.{Loader, Saveable}
+import org.apache.spark.rdd.RDD
+
 
 /**
  * Regression model trained using RidgeRegression.
@@ -32,7 +35,7 @@ class RidgeRegressionModel (
     override val weights: Vector,
     override val intercept: Double)
   extends GeneralizedLinearModel(weights, intercept)
-  with RegressionModel with Serializable {
+  with RegressionModel with Serializable with Saveable {
 
   override protected def predictPoint(
       dataMatrix: Vector,
@@ -40,12 +43,37 @@ class RidgeRegressionModel (
       intercept: Double): Double = {
     weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
   }
+
+  override def save(sc: SparkContext, path: String): Unit = {
+    GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
+  }
+
+  override protected def formatVersion: String = "1.0"
+}
+
+object RidgeRegressionModel extends Loader[RidgeRegressionModel] {
+
+  override def load(sc: SparkContext, path: String): RidgeRegressionModel = {
+    val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
+    // Hard-code class name string in case it changes in the future
+    val classNameV1_0 = "org.apache.spark.mllib.regression.RidgeRegressionModel"
+    (loadedClassName, version) match {
+      case (className, "1.0") if className == classNameV1_0 =>
+        val numFeatures = RegressionModel.getNumFeatures(metadata, classNameV1_0, path)
+        val data = GLMRegressionModel.SaveLoadV1_0.loadData(sc, path, classNameV1_0, numFeatures)
+        new RidgeRegressionModel(data.weights, data.intercept)
+      case _ => throw new Exception(
+        s"RidgeRegressionModel.load did not recognize model with (className, format version):" +
+        s"($loadedClassName, $version).  Supported:\n" +
+        s"  ($classNameV1_0, 1.0)")
+    }
+  }
 }
 
 /**
  * Train a regression model with L2-regularization using Stochastic Gradient Descent.
  * This solves the l1-regularized least squares regression formulation
- *          f(weights) = 1/n ||A weights-y||^2  + regParam/2 ||weights||^2
+ *          f(weights) = 1/2n ||A weights-y||^2^  + regParam/2 ||weights||^2^
  * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
@@ -68,9 +96,9 @@ class RidgeRegressionWithSGD private (
 
   /**
    * Construct a RidgeRegression object with default parameters: {stepSize: 1.0, numIterations: 100,
-   * regParam: 1.0, miniBatchFraction: 1.0}.
+   * regParam: 0.01, miniBatchFraction: 1.0}.
    */
-  def this() = this(1.0, 100, 1.0, 1.0)
+  def this() = this(1.0, 100, 0.01, 1.0)
 
   override protected def createModel(weights: Vector, intercept: Double) = {
     new RidgeRegressionModel(weights, intercept)
@@ -143,7 +171,7 @@ object RidgeRegressionWithSGD {
       numIterations: Int,
       stepSize: Double,
       regParam: Double): RidgeRegressionModel = {
-    train(input, numIterations, stepSize, regParam, 1.0)
+    train(input, numIterations, stepSize, regParam, 0.01)
   }
 
   /**
@@ -158,6 +186,6 @@ object RidgeRegressionWithSGD {
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int): RidgeRegressionModel = {
-    train(input, numIterations, 1.0, 1.0, 1.0)
+    train(input, numIterations, 1.0, 0.01, 1.0)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
index 8db0442a7a569..c854f124451cf 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
@@ -21,8 +21,7 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.Logging
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.mllib.linalg.Vector
-import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.streaming.dstream.DStream
 
 /**
@@ -40,14 +39,14 @@ import org.apache.spark.streaming.dstream.DStream
  *
  * For example usage, see `StreamingLinearRegressionWithSGD`.
  *
- * NOTE(Freeman): In some use cases, the order in which trainOn and predictOn
+ * NOTE: In some use cases, the order in which trainOn and predictOn
  * are called in an application will affect the results. When called on
  * the same DStream, if trainOn is called before predictOn, when new data
  * arrive the model will update and the prediction will be based on the new
  * model. Whereas if predictOn is called first, the prediction will use the model
  * from the previous update.
  *
- * NOTE(Freeman): It is ok to call predictOn repeatedly on multiple streams; this
+ * NOTE: It is ok to call predictOn repeatedly on multiple streams; this
  * will generate predictions for each one all using the current model.
  * It is also ok to call trainOn on different streams; this will update
  * the model using each of the different sources, in sequence.
@@ -59,14 +58,14 @@ abstract class StreamingLinearAlgorithm[
     A <: GeneralizedLinearAlgorithm[M]] extends Logging {
 
   /** The model to be updated and used for prediction. */
-  protected var model: M
+  protected var model: Option[M] = None
 
   /** The algorithm to use for updating. */
   protected val algorithm: A
 
   /** Return the latest model. */
   def latestModel(): M = {
-    model
+    model.get
   }
 
   /**
@@ -78,18 +77,25 @@ abstract class StreamingLinearAlgorithm[
    * @param data DStream containing labeled data
    */
   def trainOn(data: DStream[LabeledPoint]) {
-    if (Option(model.weights) == None) {
-      logError("Initial weights must be set before starting training")
-      throw new IllegalArgumentException
+    if (model.isEmpty) {
+      throw new IllegalArgumentException("Model must be initialized before starting training.")
     }
     data.foreachRDD { (rdd, time) =>
-        model = algorithm.run(rdd, model.weights)
-        logInfo("Model updated at time %s".format(time.toString))
-        val display = model.weights.size match {
-          case x if x > 100 => model.weights.toArray.take(100).mkString("[", ",", "...")
-          case _ => model.weights.toArray.mkString("[", ",", "]")
+      val initialWeights =
+        model match {
+          case Some(m) =>
+            m.weights
+          case None =>
+            val numFeatures = rdd.first().features.size
+            Vectors.dense(numFeatures)
         }
-        logInfo("Current model: weights, %s".format (display))
+      model = Some(algorithm.run(rdd, initialWeights))
+      logInfo("Model updated at time %s".format(time.toString))
+      val display = model.get.weights.size match {
+        case x if x > 100 => model.get.weights.toArray.take(100).mkString("[", ",", "...")
+        case _ => model.get.weights.toArray.mkString("[", ",", "]")
+      }
+      logInfo("Current model: weights, %s".format (display))
     }
   }
 
@@ -100,12 +106,10 @@ abstract class StreamingLinearAlgorithm[
    * @return DStream containing predictions
    */
   def predictOn(data: DStream[Vector]): DStream[Double] = {
-    if (Option(model.weights) == None) {
-      val msg = "Initial weights must be set before starting prediction"
-      logError(msg)
-      throw new IllegalArgumentException(msg)
+    if (model.isEmpty) {
+      throw new IllegalArgumentException("Model must be initialized before starting prediction.")
     }
-    data.map(model.predict)
+    data.map(model.get.predict)
   }
 
   /**
@@ -115,11 +119,9 @@ abstract class StreamingLinearAlgorithm[
    * @return DStream containing the input keys and the predictions as values
    */
   def predictOnValues[K: ClassTag](data: DStream[(K, Vector)]): DStream[(K, Double)] = {
-    if (Option(model.weights) == None) {
-      val msg = "Initial weights must be set before starting prediction"
-      logError(msg)
-      throw new IllegalArgumentException(msg)
+    if (model.isEmpty) {
+      throw new IllegalArgumentException("Model must be initialized before starting prediction")
     }
-    data.mapValues(model.predict)
+    data.mapValues(model.get.predict)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
index 1d11fde24712c..e5e6301127a28 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
@@ -21,6 +21,7 @@ import org.apache.spark.annotation.Experimental
 import org.apache.spark.mllib.linalg.Vector
 
 /**
+ * :: Experimental ::
  * Train or predict a linear regression model on streaming data. Training uses
  * Stochastic Gradient Descent to update the model based on each new batch of
  * incoming data from a DStream (see `LinearRegressionWithSGD` for model equation)
@@ -41,13 +42,12 @@ import org.apache.spark.mllib.linalg.Vector
  *
  */
 @Experimental
-class StreamingLinearRegressionWithSGD (
+class StreamingLinearRegressionWithSGD private[mllib] (
     private var stepSize: Double,
     private var numIterations: Int,
-    private var miniBatchFraction: Double,
-    private var initialWeights: Vector)
-  extends StreamingLinearAlgorithm[
-    LinearRegressionModel, LinearRegressionWithSGD] with Serializable {
+    private var miniBatchFraction: Double)
+  extends StreamingLinearAlgorithm[LinearRegressionModel, LinearRegressionWithSGD]
+  with Serializable {
 
   /**
    * Construct a StreamingLinearRegression object with default parameters:
@@ -55,12 +55,10 @@ class StreamingLinearRegressionWithSGD (
    * Initial weights must be set before using trainOn or predictOn
    * (see `StreamingLinearAlgorithm`)
    */
-  def this() = this(0.1, 50, 1.0, null)
+  def this() = this(0.1, 50, 1.0)
 
   val algorithm = new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction)
 
-  var model = algorithm.createModel(initialWeights, 0.0)
-
   /** Set the step size for gradient descent. Default: 0.1. */
   def setStepSize(stepSize: Double): this.type = {
     this.algorithm.optimizer.setStepSize(stepSize)
@@ -81,7 +79,7 @@ class StreamingLinearRegressionWithSGD (
 
   /** Set the initial weights. Default: [0.0, 0.0]. */
   def setInitialWeights(initialWeights: Vector): this.type = {
-    this.model = algorithm.createModel(initialWeights, 0.0)
+    this.model = Some(algorithm.createModel(initialWeights, 0.0))
     this
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
new file mode 100644
index 0000000000000..838100e949ec2
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression.impl
+
+import org.apache.spark.SparkContext
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.util.Loader
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+
+/**
+ * Helper methods for import/export of GLM regression models.
+ */
+private[regression] object GLMRegressionModel {
+
+  object SaveLoadV1_0 {
+
+    def thisFormatVersion = "1.0"
+
+    /** Model data for model import/export */
+    case class Data(weights: Vector, intercept: Double)
+
+    /**
+     * Helper method for saving GLM regression model metadata and data.
+     * @param modelClass  String name for model class, to be saved with metadata
+     */
+    def save(
+        sc: SparkContext,
+        path: String,
+        modelClass: String,
+        weights: Vector,
+        intercept: Double): Unit = {
+      val sqlContext = new SQLContext(sc)
+      import sqlContext.implicits._
+
+      // Create JSON metadata.
+      val metadataRDD =
+        sc.parallelize(Seq((modelClass, thisFormatVersion, weights.size)), 1)
+          .toDataFrame("class", "version", "numFeatures")
+      metadataRDD.toJSON.saveAsTextFile(Loader.metadataPath(path))
+
+      // Create Parquet data.
+      val data = Data(weights, intercept)
+      val dataRDD: DataFrame = sc.parallelize(Seq(data), 1)
+      // TODO: repartition with 1 partition after SPARK-5532 gets fixed
+      dataRDD.saveAsParquetFile(Loader.dataPath(path))
+    }
+
+    /**
+     * Helper method for loading GLM regression model data.
+     * @param modelClass  String name for model class (used for error messages)
+     * @param numFeatures  Number of features, to be checked against loaded data.
+     *                     The length of the weights vector should equal numFeatures.
+     */
+    def loadData(sc: SparkContext, path: String, modelClass: String, numFeatures: Int): Data = {
+      val datapath = Loader.dataPath(path)
+      val sqlContext = new SQLContext(sc)
+      val dataRDD = sqlContext.parquetFile(datapath)
+      val dataArray = dataRDD.select("weights", "intercept").take(1)
+      assert(dataArray.size == 1, s"Unable to load $modelClass data from: $datapath")
+      val data = dataArray(0)
+      assert(data.size == 2, s"Unable to load $modelClass data from: $datapath")
+      data match {
+        case Row(weights: Vector, intercept: Double) =>
+          assert(weights.size == numFeatures, s"Expected $numFeatures features, but" +
+            s" found ${weights.size} features when loading $modelClass weights from $datapath")
+          Data(weights, intercept)
+      }
+    }
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
index 3025d4837cab4..fcc2a148791bd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.mllib.stat
 
-import breeze.linalg.{DenseVector => BDV}
-
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
 
@@ -40,14 +38,14 @@ import org.apache.spark.mllib.linalg.{Vectors, Vector}
 class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with Serializable {
 
   private var n = 0
-  private var currMean: BDV[Double] = _
-  private var currM2n: BDV[Double] = _
-  private var currM2: BDV[Double] = _
-  private var currL1: BDV[Double] = _
+  private var currMean: Array[Double] = _
+  private var currM2n: Array[Double] = _
+  private var currM2: Array[Double] = _
+  private var currL1: Array[Double] = _
   private var totalCnt: Long = 0
-  private var nnz: BDV[Double] = _
-  private var currMax: BDV[Double] = _
-  private var currMin: BDV[Double] = _
+  private var nnz: Array[Double] = _
+  private var currMax: Array[Double] = _
+  private var currMin: Array[Double] = _
 
   /**
    * Add a new sample to this summarizer, and update the statistical summary.
@@ -60,35 +58,36 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
       require(sample.size > 0, s"Vector should have dimension larger than zero.")
       n = sample.size
 
-      currMean = BDV.zeros[Double](n)
-      currM2n = BDV.zeros[Double](n)
-      currM2 = BDV.zeros[Double](n)
-      currL1 = BDV.zeros[Double](n)
-      nnz = BDV.zeros[Double](n)
-      currMax = BDV.fill(n)(Double.MinValue)
-      currMin = BDV.fill(n)(Double.MaxValue)
+      currMean = Array.ofDim[Double](n)
+      currM2n = Array.ofDim[Double](n)
+      currM2 = Array.ofDim[Double](n)
+      currL1 = Array.ofDim[Double](n)
+      nnz = Array.ofDim[Double](n)
+      currMax = Array.fill[Double](n)(Double.MinValue)
+      currMin = Array.fill[Double](n)(Double.MaxValue)
     }
 
     require(n == sample.size, s"Dimensions mismatch when adding new sample." +
       s" Expecting $n but got ${sample.size}.")
 
-    sample.toBreeze.activeIterator.foreach {
-      case (_, 0.0) => // Skip explicit zero elements.
-      case (i, value) =>
-        if (currMax(i) < value) {
-          currMax(i) = value
+    sample.foreachActive { (index, value) =>
+      if (value != 0.0) {
+        if (currMax(index) < value) {
+          currMax(index) = value
         }
-        if (currMin(i) > value) {
-          currMin(i) = value
+        if (currMin(index) > value) {
+          currMin(index) = value
         }
 
-        val tmpPrevMean = currMean(i)
-        currMean(i) = (currMean(i) * nnz(i) + value) / (nnz(i) + 1.0)
-        currM2n(i) += (value - currMean(i)) * (value - tmpPrevMean)
-        currM2(i) += value * value
-        currL1(i) += math.abs(value)
+        val prevMean = currMean(index)
+        val diff = value - prevMean
+        currMean(index) = prevMean + diff / (nnz(index) + 1.0)
+        currM2n(index) += (value - currMean(index)) * diff
+        currM2(index) += value * value
+        currL1(index) += math.abs(value)
 
-        nnz(i) += 1.0
+        nnz(index) += 1.0
+      }
     }
 
     totalCnt += 1
@@ -107,47 +106,38 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
       require(n == other.n, s"Dimensions mismatch when merging with another summarizer. " +
         s"Expecting $n but got ${other.n}.")
       totalCnt += other.totalCnt
-      val deltaMean: BDV[Double] = currMean - other.currMean
       var i = 0
       while (i < n) {
-        // merge mean together
-        if (other.currMean(i) != 0.0) {
-          currMean(i) = (currMean(i) * nnz(i) + other.currMean(i) * other.nnz(i)) /
-            (nnz(i) + other.nnz(i))
-        }
-        // merge m2n together
-        if (nnz(i) + other.nnz(i) != 0.0) {
-          currM2n(i) += other.currM2n(i) + deltaMean(i) * deltaMean(i) * nnz(i) * other.nnz(i) /
-            (nnz(i) + other.nnz(i))
-        }
-        // merge m2 together
-        if (nnz(i) + other.nnz(i) != 0.0) {
+        val thisNnz = nnz(i)
+        val otherNnz = other.nnz(i)
+        val totalNnz = thisNnz + otherNnz
+        if (totalNnz != 0.0) {
+          val deltaMean = other.currMean(i) - currMean(i)
+          // merge mean together
+          currMean(i) += deltaMean * otherNnz / totalNnz
+          // merge m2n together
+          currM2n(i) += other.currM2n(i) + deltaMean * deltaMean * thisNnz * otherNnz / totalNnz
+          // merge m2 together
           currM2(i) += other.currM2(i)
-        }
-        // merge l1 together
-        if (nnz(i) + other.nnz(i) != 0.0) {
+          // merge l1 together
           currL1(i) += other.currL1(i)
+          // merge max and min
+          currMax(i) = math.max(currMax(i), other.currMax(i))
+          currMin(i) = math.min(currMin(i), other.currMin(i))
         }
-
-        if (currMax(i) < other.currMax(i)) {
-          currMax(i) = other.currMax(i)
-        }
-        if (currMin(i) > other.currMin(i)) {
-          currMin(i) = other.currMin(i)
-        }
+        nnz(i) = totalNnz
         i += 1
       }
-      nnz += other.nnz
     } else if (totalCnt == 0 && other.totalCnt != 0) {
       this.n = other.n
-      this.currMean = other.currMean.copy
-      this.currM2n = other.currM2n.copy
-      this.currM2 = other.currM2.copy
-      this.currL1 = other.currL1.copy
+      this.currMean = other.currMean.clone
+      this.currM2n = other.currM2n.clone
+      this.currM2 = other.currM2.clone
+      this.currL1 = other.currL1.clone
       this.totalCnt = other.totalCnt
-      this.nnz = other.nnz.copy
-      this.currMax = other.currMax.copy
-      this.currMin = other.currMin.copy
+      this.nnz = other.nnz.clone
+      this.currMax = other.currMax.clone
+      this.currMin = other.currMin.clone
     }
     this
   }
@@ -155,19 +145,19 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
   override def mean: Vector = {
     require(totalCnt > 0, s"Nothing has been added to this summarizer.")
 
-    val realMean = BDV.zeros[Double](n)
+    val realMean = Array.ofDim[Double](n)
     var i = 0
     while (i < n) {
       realMean(i) = currMean(i) * (nnz(i) / totalCnt)
       i += 1
     }
-    Vectors.fromBreeze(realMean)
+    Vectors.dense(realMean)
   }
 
   override def variance: Vector = {
     require(totalCnt > 0, s"Nothing has been added to this summarizer.")
 
-    val realVariance = BDV.zeros[Double](n)
+    val realVariance = Array.ofDim[Double](n)
 
     val denominator = totalCnt - 1.0
 
@@ -182,8 +172,7 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
         i += 1
       }
     }
-
-    Vectors.fromBreeze(realVariance)
+    Vectors.dense(realVariance)
   }
 
   override def count: Long = totalCnt
@@ -191,7 +180,7 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
   override def numNonzeros: Vector = {
     require(totalCnt > 0, s"Nothing has been added to this summarizer.")
 
-    Vectors.fromBreeze(nnz)
+    Vectors.dense(nnz)
   }
 
   override def max: Vector = {
@@ -202,7 +191,7 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
       if ((nnz(i) < totalCnt) && (currMax(i) < 0.0)) currMax(i) = 0.0
       i += 1
     }
-    Vectors.fromBreeze(currMax)
+    Vectors.dense(currMax)
   }
 
   override def min: Vector = {
@@ -213,25 +202,25 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
       if ((nnz(i) < totalCnt) && (currMin(i) > 0.0)) currMin(i) = 0.0
       i += 1
     }
-    Vectors.fromBreeze(currMin)
+    Vectors.dense(currMin)
   }
 
   override def normL2: Vector = {
     require(totalCnt > 0, s"Nothing has been added to this summarizer.")
 
-    val realMagnitude = BDV.zeros[Double](n)
+    val realMagnitude = Array.ofDim[Double](n)
 
     var i = 0
     while (i < currM2.size) {
       realMagnitude(i) = math.sqrt(currM2(i))
       i += 1
     }
-
-    Vectors.fromBreeze(realMagnitude)
+    Vectors.dense(realMagnitude)
   }
 
   override def normL1: Vector = {
     require(totalCnt > 0, s"Nothing has been added to this summarizer.")
-    Vectors.fromBreeze(currL1)
+
+    Vectors.dense(currL1)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
index 3cf4e807b4cf7..b3fad0c52d655 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
@@ -26,36 +26,32 @@ import org.apache.spark.mllib.stat.test.{ChiSqTest, ChiSqTestResult}
 import org.apache.spark.rdd.RDD
 
 /**
+ * :: Experimental ::
  * API for statistical functions in MLlib.
  */
 @Experimental
 object Statistics {
 
   /**
-   * :: Experimental ::
    * Computes column-wise summary statistics for the input RDD[Vector].
    *
    * @param X an RDD[Vector] for which column-wise summary statistics are to be computed.
    * @return [[MultivariateStatisticalSummary]] object containing column-wise summary statistics.
    */
-  @Experimental
   def colStats(X: RDD[Vector]): MultivariateStatisticalSummary = {
     new RowMatrix(X).computeColumnSummaryStatistics()
   }
 
   /**
-   * :: Experimental ::
    * Compute the Pearson correlation matrix for the input RDD of Vectors.
    * Columns with 0 covariance produce NaN entries in the correlation matrix.
    *
    * @param X an RDD[Vector] for which the correlation matrix is to be computed.
    * @return Pearson correlation matrix comparing columns in X.
    */
-  @Experimental
   def corr(X: RDD[Vector]): Matrix = Correlations.corrMatrix(X)
 
   /**
-   * :: Experimental ::
    * Compute the correlation matrix for the input RDD of Vectors using the specified method.
    * Methods currently supported: `pearson` (default), `spearman`.
    *
@@ -69,11 +65,9 @@ object Statistics {
    *               Supported: `pearson` (default), `spearman`
    * @return Correlation matrix comparing columns in X.
    */
-  @Experimental
   def corr(X: RDD[Vector], method: String): Matrix = Correlations.corrMatrix(X, method)
 
   /**
-   * :: Experimental ::
    * Compute the Pearson correlation for the input RDDs.
    * Returns NaN if either vector has 0 variance.
    *
@@ -84,11 +78,9 @@ object Statistics {
    * @param y RDD[Double] of the same cardinality as x.
    * @return A Double containing the Pearson correlation between the two input RDD[Double]s
    */
-  @Experimental
   def corr(x: RDD[Double], y: RDD[Double]): Double = Correlations.corr(x, y)
 
   /**
-   * :: Experimental ::
    * Compute the correlation for the input RDDs using the specified method.
    * Methods currently supported: `pearson` (default), `spearman`.
    *
@@ -99,14 +91,12 @@ object Statistics {
    * @param y RDD[Double] of the same cardinality as x.
    * @param method String specifying the method to use for computing correlation.
    *               Supported: `pearson` (default), `spearman`
-   *@return A Double containing the correlation between the two input RDD[Double]s using the
+   * @return A Double containing the correlation between the two input RDD[Double]s using the
    *         specified method.
    */
-  @Experimental
   def corr(x: RDD[Double], y: RDD[Double], method: String): Double = Correlations.corr(x, y, method)
 
   /**
-   * :: Experimental ::
    * Conduct Pearson's chi-squared goodness of fit test of the observed data against the
    * expected distribution.
    *
@@ -120,13 +110,11 @@ object Statistics {
    * @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value,
    *         the method used, and the null hypothesis.
    */
-  @Experimental
   def chiSqTest(observed: Vector, expected: Vector): ChiSqTestResult = {
     ChiSqTest.chiSquared(observed, expected)
   }
 
   /**
-   * :: Experimental ::
    * Conduct Pearson's chi-squared goodness of fit test of the observed data against the uniform
    * distribution, with each category having an expected frequency of `1 / observed.size`.
    *
@@ -136,11 +124,9 @@ object Statistics {
    * @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value,
    *         the method used, and the null hypothesis.
    */
-  @Experimental
   def chiSqTest(observed: Vector): ChiSqTestResult = ChiSqTest.chiSquared(observed)
 
   /**
-   * :: Experimental ::
    * Conduct Pearson's independence test on the input contingency matrix, which cannot contain
    * negative entries or columns or rows that sum up to 0.
    *
@@ -148,11 +134,9 @@ object Statistics {
    * @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value,
    *         the method used, and the null hypothesis.
    */
-  @Experimental
   def chiSqTest(observed: Matrix): ChiSqTestResult = ChiSqTest.chiSquaredMatrix(observed)
 
   /**
-   * :: Experimental ::
    * Conduct Pearson's independence test for every feature against the label across the input RDD.
    * For each feature, the (feature, label) pairs are converted into a contingency matrix for which
    * the chi-squared statistic is computed. All label and feature values must be categorical.
@@ -162,7 +146,6 @@ object Statistics {
    * @return an array containing the ChiSquaredTestResult for every feature against the label.
    *         The order of the elements in the returned array reflects the order of input features.
    */
-  @Experimental
   def chiSqTest(data: RDD[LabeledPoint]): Array[ChiSqTestResult] = {
     ChiSqTest.chiSquaredFeatures(data)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
new file mode 100644
index 0000000000000..fd186b5ee6f72
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.stat.distribution
+
+import breeze.linalg.{DenseVector => DBV, DenseMatrix => DBM, diag, max, eigSym}
+
+import org.apache.spark.annotation.DeveloperApi;
+import org.apache.spark.mllib.linalg.{Vectors, Vector, Matrices, Matrix}
+import org.apache.spark.mllib.util.MLUtils
+
+/**
+ * :: DeveloperApi ::
+ * This class provides basic functionality for a Multivariate Gaussian (Normal) Distribution. In
+ * the event that the covariance matrix is singular, the density will be computed in a
+ * reduced dimensional subspace under which the distribution is supported.
+ * (see [[http://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case]])
+ * 
+ * @param mu The mean vector of the distribution
+ * @param sigma The covariance matrix of the distribution
+ */
+@DeveloperApi
+class MultivariateGaussian (
+    val mu: Vector, 
+    val sigma: Matrix) extends Serializable {
+
+  require(sigma.numCols == sigma.numRows, "Covariance matrix must be square")
+  require(mu.size == sigma.numCols, "Mean vector length must match covariance matrix size")
+  
+  private val breezeMu = mu.toBreeze.toDenseVector
+  
+  /**
+   * private[mllib] constructor
+   * 
+   * @param mu The mean vector of the distribution
+   * @param sigma The covariance matrix of the distribution
+   */
+  private[mllib] def this(mu: DBV[Double], sigma: DBM[Double]) = {
+    this(Vectors.fromBreeze(mu), Matrices.fromBreeze(sigma))
+  }
+  
+  /**
+   * Compute distribution dependent constants:
+   *    rootSigmaInv = D^(-1/2)^ * U, where sigma = U * D * U.t
+   *    u = log((2*pi)^(-k/2)^ * det(sigma)^(-1/2)^) 
+   */
+  private val (rootSigmaInv: DBM[Double], u: Double) = calculateCovarianceConstants
+  
+  /** Returns density of this multivariate Gaussian at given point, x */
+  def pdf(x: Vector): Double = {
+    pdf(x.toBreeze.toDenseVector)
+  }
+  
+  /** Returns the log-density of this multivariate Gaussian at given point, x */
+  def logpdf(x: Vector): Double = {
+    logpdf(x.toBreeze.toDenseVector)
+  }
+  
+  /** Returns density of this multivariate Gaussian at given point, x */
+  private[mllib] def pdf(x: DBV[Double]): Double = {
+    math.exp(logpdf(x))
+  }
+  
+  /** Returns the log-density of this multivariate Gaussian at given point, x */
+  private[mllib] def logpdf(x: DBV[Double]): Double = {
+    val delta = x - breezeMu
+    val v = rootSigmaInv * delta
+    u + v.t * v * -0.5
+  }
+  
+  /**
+   * Calculate distribution dependent components used for the density function:
+   *    pdf(x) = (2*pi)^(-k/2)^ * det(sigma)^(-1/2)^ * exp((-1/2) * (x-mu).t * inv(sigma) * (x-mu))
+   * where k is length of the mean vector.
+   * 
+   * We here compute distribution-fixed parts 
+   *  log((2*pi)^(-k/2)^ * det(sigma)^(-1/2)^)
+   * and
+   *  D^(-1/2)^ * U, where sigma = U * D * U.t
+   *  
+   * Both the determinant and the inverse can be computed from the singular value decomposition
+   * of sigma.  Noting that covariance matrices are always symmetric and positive semi-definite,
+   * we can use the eigendecomposition. We also do not compute the inverse directly; noting
+   * that 
+   * 
+   *    sigma = U * D * U.t
+   *    inv(Sigma) = U * inv(D) * U.t 
+   *               = (D^{-1/2}^ * U).t * (D^{-1/2}^ * U)
+   * 
+   * and thus
+   * 
+   *    -0.5 * (x-mu).t * inv(Sigma) * (x-mu) = -0.5 * norm(D^{-1/2}^ * U  * (x-mu))^2^
+   *  
+   * To guard against singular covariance matrices, this method computes both the 
+   * pseudo-determinant and the pseudo-inverse (Moore-Penrose).  Singular values are considered
+   * to be non-zero only if they exceed a tolerance based on machine precision, matrix size, and
+   * relation to the maximum singular value (same tolerance used by, e.g., Octave).
+   */
+  private def calculateCovarianceConstants: (DBM[Double], Double) = {
+    val eigSym.EigSym(d, u) = eigSym(sigma.toBreeze.toDenseMatrix) // sigma = u * diag(d) * u.t
+    
+    // For numerical stability, values are considered to be non-zero only if they exceed tol.
+    // This prevents any inverted value from exceeding (eps * n * max(d))^-1
+    val tol = MLUtils.EPSILON * max(d) * d.length
+    
+    try {
+      // log(pseudo-determinant) is sum of the logs of all non-zero singular values
+      val logPseudoDetSigma = d.activeValuesIterator.filter(_ > tol).map(math.log).sum
+      
+      // calculate the root-pseudo-inverse of the diagonal matrix of singular values 
+      // by inverting the square root of all non-zero values
+      val pinvS = diag(new DBV(d.map(v => if (v > tol) math.sqrt(1.0 / v) else 0.0).toArray))
+    
+      (pinvS * u, -0.5 * (mu.size * math.log(2.0 * math.Pi) + logPseudoDetSigma))
+    } catch {
+      case uex: UnsupportedOperationException =>
+        throw new IllegalArgumentException("Covariance matrix has no non-zero singular values")
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
index 0089419c2c5d4..ea82d39b72c03 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.mllib.stat.test
 
 import breeze.linalg.{DenseMatrix => BDM}
-import cern.jet.stat.Probability.chiSquareComplemented
+import org.apache.commons.math3.distribution.ChiSquaredDistribution
 
 import org.apache.spark.{SparkException, Logging}
 import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector, Vectors}
@@ -33,7 +33,7 @@ import scala.collection.mutable
  * on an input of type `Matrix` in which independence between columns is assessed.
  * We also provide a method for computing the chi-squared statistic between each feature and the
  * label for an input `RDD[LabeledPoint]`, return an `Array[ChiSquaredTestResult]` of size =
- * number of features in the inpuy RDD.
+ * number of features in the input RDD.
  *
  * Supported methods for goodness of fit: `pearson` (default)
  * Supported methods for independence: `pearson` (default)
@@ -139,7 +139,7 @@ private[stat] object ChiSqTest extends Logging {
   }
 
   /*
-   * Pearon's goodness of fit test on the input observed and expected counts/relative frequencies.
+   * Pearson's goodness of fit test on the input observed and expected counts/relative frequencies.
    * Uniform distribution is assumed when `expected` is not passed in.
    */
   def chiSquared(observed: Vector,
@@ -188,12 +188,12 @@ private[stat] object ChiSqTest extends Logging {
       }
     }
     val df = size - 1
-    val pValue = chiSquareComplemented(df, statistic)
+    val pValue = 1.0 - new ChiSquaredDistribution(df).cumulativeProbability(statistic)
     new ChiSqTestResult(pValue, df, statistic, PEARSON.name, NullHypothesis.goodnessOfFit.toString)
   }
 
   /*
-   * Pearon's independence test on the input contingency matrix.
+   * Pearson's independence test on the input contingency matrix.
    * TODO: optimize for SparseMatrix when it becomes supported.
    */
   def chiSquaredMatrix(counts: Matrix, methodName:String = PEARSON.name): ChiSqTestResult = {
@@ -238,7 +238,13 @@ private[stat] object ChiSqTest extends Logging {
       j += 1
     }
     val df = (numCols - 1) * (numRows - 1)
-    val pValue = chiSquareComplemented(df, statistic)
-    new ChiSqTestResult(pValue, df, statistic, methodName, NullHypothesis.independence.toString)
+    if (df == 0) {
+      // 1 column or 1 row. Constant distribution is independent of anything.
+      // pValue = 1.0 and statistic = 0.0 in this case.
+      new ChiSqTestResult(1.0, 0, 0.0, methodName, NullHypothesis.independence.toString)
+    } else {
+      val pValue = 1.0 - new ChiSquaredDistribution(df).cumulativeProbability(statistic)
+      new ChiSqTestResult(pValue, df, statistic, methodName, NullHypothesis.independence.toString)
+    }
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
index 6737a2f4176c2..b3e8ed9af8c51 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
@@ -58,13 +58,12 @@ class DecisionTree (private val strategy: Strategy) extends Serializable with Lo
    * @param input Training data: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]
    * @return DecisionTreeModel that can be used for prediction
    */
-  def train(input: RDD[LabeledPoint]): DecisionTreeModel = {
+  def run(input: RDD[LabeledPoint]): DecisionTreeModel = {
     // Note: random seed will not be used since numTrees = 1.
     val rf = new RandomForest(strategy, numTrees = 1, featureSubsetStrategy = "all", seed = 0)
-    val rfModel = rf.train(input)
+    val rfModel = rf.run(input)
     rfModel.trees(0)
   }
-
 }
 
 object DecisionTree extends Serializable with Logging {
@@ -86,7 +85,7 @@ object DecisionTree extends Serializable with Logging {
    * @return DecisionTreeModel that can be used for prediction
   */
   def train(input: RDD[LabeledPoint], strategy: Strategy): DecisionTreeModel = {
-    new DecisionTree(strategy).train(input)
+    new DecisionTree(strategy).run(input)
   }
 
   /**
@@ -112,7 +111,7 @@ object DecisionTree extends Serializable with Logging {
       impurity: Impurity,
       maxDepth: Int): DecisionTreeModel = {
     val strategy = new Strategy(algo, impurity, maxDepth)
-    new DecisionTree(strategy).train(input)
+    new DecisionTree(strategy).run(input)
   }
 
   /**
@@ -130,7 +129,7 @@ object DecisionTree extends Serializable with Logging {
    * @param impurity impurity criterion used for information gain calculation
    * @param maxDepth Maximum depth of the tree.
    *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
-   * @param numClassesForClassification number of classes for classification. Default value of 2.
+   * @param numClasses number of classes for classification. Default value of 2.
    * @return DecisionTreeModel that can be used for prediction
    */
   def train(
@@ -138,9 +137,9 @@ object DecisionTree extends Serializable with Logging {
       algo: Algo,
       impurity: Impurity,
       maxDepth: Int,
-      numClassesForClassification: Int): DecisionTreeModel = {
-    val strategy = new Strategy(algo, impurity, maxDepth, numClassesForClassification)
-    new DecisionTree(strategy).train(input)
+      numClasses: Int): DecisionTreeModel = {
+    val strategy = new Strategy(algo, impurity, maxDepth, numClasses)
+    new DecisionTree(strategy).run(input)
   }
 
   /**
@@ -158,7 +157,7 @@ object DecisionTree extends Serializable with Logging {
    * @param impurity criterion used for information gain calculation
    * @param maxDepth Maximum depth of the tree.
    *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
-   * @param numClassesForClassification number of classes for classification. Default value of 2.
+   * @param numClasses number of classes for classification. Default value of 2.
    * @param maxBins maximum number of bins used for splitting features
    * @param quantileCalculationStrategy  algorithm for calculating quantiles
    * @param categoricalFeaturesInfo Map storing arity of categorical features.
@@ -171,13 +170,13 @@ object DecisionTree extends Serializable with Logging {
       algo: Algo,
       impurity: Impurity,
       maxDepth: Int,
-      numClassesForClassification: Int,
+      numClasses: Int,
       maxBins: Int,
       quantileCalculationStrategy: QuantileStrategy,
       categoricalFeaturesInfo: Map[Int,Int]): DecisionTreeModel = {
-    val strategy = new Strategy(algo, impurity, maxDepth, numClassesForClassification, maxBins,
+    val strategy = new Strategy(algo, impurity, maxDepth, numClasses, maxBins,
       quantileCalculationStrategy, categoricalFeaturesInfo)
-    new DecisionTree(strategy).train(input)
+    new DecisionTree(strategy).run(input)
   }
 
   /**
@@ -185,7 +184,7 @@ object DecisionTree extends Serializable with Logging {
    *
    * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    *              Labels should take values {0, 1, ..., numClasses-1}.
-   * @param numClassesForClassification number of classes for classification.
+   * @param numClasses number of classes for classification.
    * @param categoricalFeaturesInfo Map storing arity of categorical features.
    *                                E.g., an entry (n -> k) indicates that feature n is categorical
    *                                with k categories indexed from 0: {0, 1, ..., k-1}.
@@ -200,13 +199,13 @@ object DecisionTree extends Serializable with Logging {
    */
   def trainClassifier(
       input: RDD[LabeledPoint],
-      numClassesForClassification: Int,
+      numClasses: Int,
       categoricalFeaturesInfo: Map[Int, Int],
       impurity: String,
       maxDepth: Int,
       maxBins: Int): DecisionTreeModel = {
     val impurityType = Impurities.fromString(impurity)
-    train(input, Classification, impurityType, maxDepth, numClassesForClassification, maxBins, Sort,
+    train(input, Classification, impurityType, maxDepth, numClasses, maxBins, Sort,
       categoricalFeaturesInfo)
   }
 
@@ -215,12 +214,12 @@ object DecisionTree extends Serializable with Logging {
    */
   def trainClassifier(
       input: JavaRDD[LabeledPoint],
-      numClassesForClassification: Int,
+      numClasses: Int,
       categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer],
       impurity: String,
       maxDepth: Int,
       maxBins: Int): DecisionTreeModel = {
-    trainClassifier(input.rdd, numClassesForClassification,
+    trainClassifier(input.rdd, numClasses,
       categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap,
       impurity, maxDepth, maxBins)
   }
@@ -437,6 +436,11 @@ object DecisionTree extends Serializable with Logging {
    * @param bins possible bins for all features, indexed (numFeatures)(numBins)
    * @param nodeQueue  Queue of nodes to split, with values (treeIndex, node).
    *                   Updated with new non-leaf nodes which are created.
+   * @param nodeIdCache Node Id cache containing an RDD of Array[Int] where
+   *                    each value in the array is the data point's node Id
+   *                    for a corresponding tree. This is used to prevent the need
+   *                    to pass the entire tree to the executors during
+   *                    the node stat aggregation phase.
    */
   private[tree] def findBestSplits(
       input: RDD[BaggedPoint[TreePoint]],
@@ -447,7 +451,8 @@ object DecisionTree extends Serializable with Logging {
       splits: Array[Array[Split]],
       bins: Array[Array[Bin]],
       nodeQueue: mutable.Queue[(Int, Node)],
-      timer: TimeTracker = new TimeTracker): Unit = {
+      timer: TimeTracker = new TimeTracker,
+      nodeIdCache: Option[NodeIdCache] = None): Unit = {
 
     /*
      * The high-level descriptions of the best split optimizations are noted here.
@@ -479,6 +484,37 @@ object DecisionTree extends Serializable with Logging {
     logDebug("isMulticlass = " + metadata.isMulticlass)
     logDebug("isMulticlassWithCategoricalFeatures = " +
       metadata.isMulticlassWithCategoricalFeatures)
+    logDebug("using nodeIdCache = " + nodeIdCache.nonEmpty.toString)
+
+    /**
+     * Performs a sequential aggregation over a partition for a particular tree and node.
+     *
+     * For each feature, the aggregate sufficient statistics are updated for the relevant
+     * bins.
+     *
+     * @param treeIndex Index of the tree that we want to perform aggregation for.
+     * @param nodeInfo The node info for the tree node.
+     * @param agg Array storing aggregate calculation, with a set of sufficient statistics
+     *            for each (node, feature, bin).
+     * @param baggedPoint Data point being aggregated.
+     */
+    def nodeBinSeqOp(
+        treeIndex: Int,
+        nodeInfo: RandomForest.NodeIndexInfo,
+        agg: Array[DTStatsAggregator],
+        baggedPoint: BaggedPoint[TreePoint]): Unit = {
+      if (nodeInfo != null) {
+        val aggNodeIndex = nodeInfo.nodeIndexInGroup
+        val featuresForNode = nodeInfo.featureSubset
+        val instanceWeight = baggedPoint.subsampleWeights(treeIndex)
+        if (metadata.unorderedFeatures.isEmpty) {
+          orderedBinSeqOp(agg(aggNodeIndex), baggedPoint.datum, instanceWeight, featuresForNode)
+        } else {
+          mixedBinSeqOp(agg(aggNodeIndex), baggedPoint.datum, bins, metadata.unorderedFeatures,
+            instanceWeight, featuresForNode)
+        }
+      }
+    }
 
     /**
      * Performs a sequential aggregation over a partition.
@@ -497,20 +533,25 @@ object DecisionTree extends Serializable with Logging {
       treeToNodeToIndexInfo.foreach { case (treeIndex, nodeIndexToInfo) =>
         val nodeIndex = predictNodeIndex(topNodes(treeIndex), baggedPoint.datum.binnedFeatures,
           bins, metadata.unorderedFeatures)
-        val nodeInfo = nodeIndexToInfo.getOrElse(nodeIndex, null)
-        // If the example does not reach a node in this group, then nodeIndex = null.
-        if (nodeInfo != null) {
-          val aggNodeIndex = nodeInfo.nodeIndexInGroup
-          val featuresForNode = nodeInfo.featureSubset
-          val instanceWeight = baggedPoint.subsampleWeights(treeIndex)
-          if (metadata.unorderedFeatures.isEmpty) {
-            orderedBinSeqOp(agg(aggNodeIndex), baggedPoint.datum, instanceWeight, featuresForNode)
-          } else {
-            mixedBinSeqOp(agg(aggNodeIndex), baggedPoint.datum, bins, metadata.unorderedFeatures,
-              instanceWeight, featuresForNode)
-          }
-        }
+        nodeBinSeqOp(treeIndex, nodeIndexToInfo.getOrElse(nodeIndex, null), agg, baggedPoint)
       }
+
+      agg
+    }
+
+    /**
+     * Do the same thing as binSeqOp, but with nodeIdCache.
+     */
+    def binSeqOpWithNodeIdCache(
+        agg: Array[DTStatsAggregator],
+        dataPoint: (BaggedPoint[TreePoint], Array[Int])): Array[DTStatsAggregator] = {
+      treeToNodeToIndexInfo.foreach { case (treeIndex, nodeIndexToInfo) =>
+        val baggedPoint = dataPoint._1
+        val nodeIdCache = dataPoint._2
+        val nodeIndex = nodeIdCache(treeIndex)
+        nodeBinSeqOp(treeIndex, nodeIndexToInfo.getOrElse(nodeIndex, null), agg, baggedPoint)
+      }
+
       agg
     }
 
@@ -553,7 +594,26 @@ object DecisionTree extends Serializable with Logging {
     // Finally, only best Splits for nodes are collected to driver to construct decision tree.
     val nodeToFeatures = getNodeToFeatures(treeToNodeToIndexInfo)
     val nodeToFeaturesBc = input.sparkContext.broadcast(nodeToFeatures)
-    val nodeToBestSplits =
+
+    val partitionAggregates : RDD[(Int, DTStatsAggregator)] = if (nodeIdCache.nonEmpty) {
+      input.zip(nodeIdCache.get.nodeIdsForInstances).mapPartitions { points =>
+        // Construct a nodeStatsAggregators array to hold node aggregate stats,
+        // each node will have a nodeStatsAggregator
+        val nodeStatsAggregators = Array.tabulate(numNodes) { nodeIndex =>
+          val featuresForNode = nodeToFeaturesBc.value.flatMap { nodeToFeatures =>
+            Some(nodeToFeatures(nodeIndex))
+          }
+          new DTStatsAggregator(metadata, featuresForNode)
+        }
+
+        // iterator all instances in current partition and update aggregate stats
+        points.foreach(binSeqOpWithNodeIdCache(nodeStatsAggregators, _))
+
+        // transform nodeStatsAggregators array to (nodeIndex, nodeAggregateStats) pairs,
+        // which can be combined with other partition using `reduceByKey`
+        nodeStatsAggregators.view.zipWithIndex.map(_.swap).iterator
+      }
+    } else {
       input.mapPartitions { points =>
         // Construct a nodeStatsAggregators array to hold node aggregate stats,
         // each node will have a nodeStatsAggregator
@@ -570,7 +630,10 @@ object DecisionTree extends Serializable with Logging {
         // transform nodeStatsAggregators array to (nodeIndex, nodeAggregateStats) pairs,
         // which can be combined with other partition using `reduceByKey`
         nodeStatsAggregators.view.zipWithIndex.map(_.swap).iterator
-      }.reduceByKey((a, b) => a.merge(b))
+      }
+    }
+
+    val nodeToBestSplits = partitionAggregates.reduceByKey((a, b) => a.merge(b))
         .map { case (nodeIndex, aggStats) =>
           val featuresForNode = nodeToFeaturesBc.value.flatMap { nodeToFeatures =>
             Some(nodeToFeatures(nodeIndex))
@@ -584,6 +647,13 @@ object DecisionTree extends Serializable with Logging {
 
     timer.stop("chooseSplits")
 
+    val nodeIdUpdaters = if (nodeIdCache.nonEmpty) {
+      Array.fill[mutable.Map[Int, NodeIndexUpdater]](
+        metadata.numTrees)(mutable.Map[Int, NodeIndexUpdater]())
+    } else {
+      null
+    }
+
     // Iterate over all nodes in this group.
     nodesForGroup.foreach { case (treeIndex, nodesForTree) =>
       nodesForTree.foreach { node =>
@@ -613,6 +683,13 @@ object DecisionTree extends Serializable with Logging {
           node.rightNode = Some(Node(Node.rightChildIndex(nodeIndex),
             stats.rightPredict, stats.rightImpurity, rightChildIsLeaf))
 
+          if (nodeIdCache.nonEmpty) {
+            val nodeIndexUpdater = NodeIndexUpdater(
+              split = split,
+              nodeIndex = nodeIndex)
+            nodeIdUpdaters(treeIndex).put(nodeIndex, nodeIndexUpdater)
+          }
+
           // enqueue left child and right child if they are not leaves
           if (!leftChildIsLeaf) {
             nodeQueue.enqueue((treeIndex, node.leftNode.get))
@@ -629,6 +706,10 @@ object DecisionTree extends Serializable with Logging {
       }
     }
 
+    if (nodeIdCache.nonEmpty) {
+      // Update the cache if needed.
+      nodeIdCache.get.updateNodeIndices(input, nodeIdUpdaters, bins)
+    }
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
new file mode 100644
index 0000000000000..61f6b1313f82e
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree
+
+import org.apache.spark.Logging
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.configuration.BoostingStrategy
+import org.apache.spark.mllib.tree.configuration.Algo._
+import org.apache.spark.mllib.tree.impl.TimeTracker
+import org.apache.spark.mllib.tree.impurity.Variance
+import org.apache.spark.mllib.tree.model.{DecisionTreeModel, GradientBoostedTreesModel}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+
+/**
+ * :: Experimental ::
+ * A class that implements
+ * [[http://en.wikipedia.org/wiki/Gradient_boosting  Stochastic Gradient Boosting]]
+ * for regression and binary classification.
+ *
+ * The implementation is based upon:
+ *   J.H. Friedman.  "Stochastic Gradient Boosting."  1999.
+ *
+ * Notes on Gradient Boosting vs. TreeBoost:
+ *  - This implementation is for Stochastic Gradient Boosting, not for TreeBoost.
+ *  - Both algorithms learn tree ensembles by minimizing loss functions.
+ *  - TreeBoost (Friedman, 1999) additionally modifies the outputs at tree leaf nodes
+ *    based on the loss function, whereas the original gradient boosting method does not.
+ *     - When the loss is SquaredError, these methods give the same result, but they could differ
+ *       for other loss functions.
+ *
+ * @param boostingStrategy Parameters for the gradient boosting algorithm.
+ */
+@Experimental
+class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
+  extends Serializable with Logging {
+
+  /**
+   * Method to train a gradient boosting model
+   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   * @return a gradient boosted trees model that can be used for prediction
+   */
+  def run(input: RDD[LabeledPoint]): GradientBoostedTreesModel = {
+    val algo = boostingStrategy.treeStrategy.algo
+    algo match {
+      case Regression => GradientBoostedTrees.boost(input, boostingStrategy)
+      case Classification =>
+        // Map labels to -1, +1 so binary classification can be treated as regression.
+        val remappedInput = input.map(x => new LabeledPoint((x.label * 2) - 1, x.features))
+        GradientBoostedTrees.boost(remappedInput, boostingStrategy)
+      case _ =>
+        throw new IllegalArgumentException(s"$algo is not supported by the gradient boosting.")
+    }
+  }
+
+  /**
+   * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees!#run]].
+   */
+  def run(input: JavaRDD[LabeledPoint]): GradientBoostedTreesModel = {
+    run(input.rdd)
+  }
+}
+
+
+object GradientBoostedTrees extends Logging {
+
+  /**
+   * Method to train a gradient boosting model.
+   *
+   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   *              For classification, labels should take values {0, 1, ..., numClasses-1}.
+   *              For regression, labels are real numbers.
+   * @param boostingStrategy Configuration options for the boosting algorithm.
+   * @return a gradient boosted trees model that can be used for prediction
+   */
+  def train(
+      input: RDD[LabeledPoint],
+      boostingStrategy: BoostingStrategy): GradientBoostedTreesModel = {
+    new GradientBoostedTrees(boostingStrategy).run(input)
+  }
+
+  /**
+   * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees$#train]]
+   */
+  def train(
+      input: JavaRDD[LabeledPoint],
+      boostingStrategy: BoostingStrategy): GradientBoostedTreesModel = {
+    train(input.rdd, boostingStrategy)
+  }
+
+  /**
+   * Internal method for performing regression using trees as base learners.
+   * @param input training dataset
+   * @param boostingStrategy boosting parameters
+   * @return a gradient boosted trees model that can be used for prediction
+   */
+  private def boost(
+      input: RDD[LabeledPoint],
+      boostingStrategy: BoostingStrategy): GradientBoostedTreesModel = {
+
+    val timer = new TimeTracker()
+    timer.start("total")
+    timer.start("init")
+
+    boostingStrategy.assertValid()
+
+    // Initialize gradient boosting parameters
+    val numIterations = boostingStrategy.numIterations
+    val baseLearners = new Array[DecisionTreeModel](numIterations)
+    val baseLearnerWeights = new Array[Double](numIterations)
+    val loss = boostingStrategy.loss
+    val learningRate = boostingStrategy.learningRate
+    // Prepare strategy for individual trees, which use regression with variance impurity.
+    val treeStrategy = boostingStrategy.treeStrategy.copy
+    treeStrategy.algo = Regression
+    treeStrategy.impurity = Variance
+    treeStrategy.assertValid()
+
+    // Cache input
+    if (input.getStorageLevel == StorageLevel.NONE) {
+      input.persist(StorageLevel.MEMORY_AND_DISK)
+    }
+
+    timer.stop("init")
+
+    logDebug("##########")
+    logDebug("Building tree 0")
+    logDebug("##########")
+    var data = input
+
+    // Initialize tree
+    timer.start("building tree 0")
+    val firstTreeModel = new DecisionTree(treeStrategy).run(data)
+    baseLearners(0) = firstTreeModel
+    baseLearnerWeights(0) = 1.0
+    val startingModel = new GradientBoostedTreesModel(Regression, Array(firstTreeModel), Array(1.0))
+    logDebug("error of gbt = " + loss.computeError(startingModel, input))
+    // Note: A model of type regression is used since we require raw prediction
+    timer.stop("building tree 0")
+
+    // psuedo-residual for second iteration
+    data = input.map(point => LabeledPoint(loss.gradient(startingModel, point),
+      point.features))
+
+    var m = 1
+    while (m < numIterations) {
+      timer.start(s"building tree $m")
+      logDebug("###################################################")
+      logDebug("Gradient boosting tree iteration " + m)
+      logDebug("###################################################")
+      val model = new DecisionTree(treeStrategy).run(data)
+      timer.stop(s"building tree $m")
+      // Create partial model
+      baseLearners(m) = model
+      // Note: The setting of baseLearnerWeights is incorrect for losses other than SquaredError.
+      //       Technically, the weight should be optimized for the particular loss.
+      //       However, the behavior should be reasonable, though not optimal.
+      baseLearnerWeights(m) = learningRate
+      // Note: A model of type regression is used since we require raw prediction
+      val partialModel = new GradientBoostedTreesModel(
+        Regression, baseLearners.slice(0, m + 1), baseLearnerWeights.slice(0, m + 1))
+      logDebug("error of gbt = " + loss.computeError(partialModel, input))
+      // Update data with pseudo-residuals
+      data = input.map(point => LabeledPoint(-loss.gradient(partialModel, point),
+        point.features))
+      m += 1
+    }
+
+    timer.stop("total")
+
+    logInfo("Internal timing for DecisionTree:")
+    logInfo(s"$timer")
+
+    new GradientBoostedTreesModel(
+      boostingStrategy.treeStrategy.algo, baseLearners, baseLearnerWeights)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
index ebbd8e0257209..482dd4b272d1d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
@@ -17,17 +17,18 @@
 
 package org.apache.spark.mllib.tree
 
-import scala.collection.JavaConverters._
 import scala.collection.mutable
+import scala.collection.JavaConverters._
 
 import org.apache.spark.Logging
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.configuration.Strategy
 import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
-import org.apache.spark.mllib.tree.configuration.Strategy
-import org.apache.spark.mllib.tree.impl.{BaggedPoint, TreePoint, DecisionTreeMetadata, TimeTracker}
+import org.apache.spark.mllib.tree.impl.{BaggedPoint, DecisionTreeMetadata, NodeIdCache,
+  TimeTracker, TreePoint}
 import org.apache.spark.mllib.tree.impurity.Impurities
 import org.apache.spark.mllib.tree.model._
 import org.apache.spark.rdd.RDD
@@ -36,7 +37,8 @@ import org.apache.spark.util.Utils
 
 /**
  * :: Experimental ::
- * A class which implements a random forest learning algorithm for classification and regression.
+ * A class that implements a [[http://en.wikipedia.org/wiki/Random_forest  Random Forest]]
+ * learning algorithm for classification and regression.
  * It supports both continuous and categorical features.
  *
  * The settings for featureSubsetStrategy are based on the following references:
@@ -54,12 +56,12 @@ import org.apache.spark.util.Utils
  *                 etc.
  * @param numTrees If 1, then no bootstrapping is used.  If > 1, then bootstrapping is done.
  * @param featureSubsetStrategy Number of features to consider for splits at each node.
- *                              Supported: "auto" (default), "all", "sqrt", "log2", "onethird".
+ *                              Supported: "auto", "all", "sqrt", "log2", "onethird".
  *                              If "auto" is set, this parameter is set based on numTrees:
  *                                if numTrees == 1, set to "all";
  *                                if numTrees > 1 (forest) set to "sqrt" for classification and
  *                                  to "onethird" for regression.
- * @param seed  Random seed for bootstrapping and choosing feature subsets.
+ * @param seed Random seed for bootstrapping and choosing feature subsets.
  */
 @Experimental
 private class RandomForest (
@@ -69,6 +71,47 @@ private class RandomForest (
     private val seed: Int)
   extends Serializable with Logging {
 
+  /*
+     ALGORITHM
+     This is a sketch of the algorithm to help new developers.
+
+     The algorithm partitions data by instances (rows).
+     On each iteration, the algorithm splits a set of nodes.  In order to choose the best split
+     for a given node, sufficient statistics are collected from the distributed data.
+     For each node, the statistics are collected to some worker node, and that worker selects
+     the best split.
+
+     This setup requires discretization of continuous features.  This binning is done in the
+     findSplitsBins() method during initialization, after which each continuous feature becomes
+     an ordered discretized feature with at most maxBins possible values.
+
+     The main loop in the algorithm operates on a queue of nodes (nodeQueue).  These nodes
+     lie at the periphery of the tree being trained.  If multiple trees are being trained at once,
+     then this queue contains nodes from all of them.  Each iteration works roughly as follows:
+       On the master node:
+         - Some number of nodes are pulled off of the queue (based on the amount of memory
+           required for their sufficient statistics).
+         - For random forests, if featureSubsetStrategy is not "all," then a subset of candidate
+           features are chosen for each node.  See method selectNodesToSplit().
+       On worker nodes, via method findBestSplits():
+         - The worker makes one pass over its subset of instances.
+         - For each (tree, node, feature, split) tuple, the worker collects statistics about
+           splitting.  Note that the set of (tree, node) pairs is limited to the nodes selected
+           from the queue for this iteration.  The set of features considered can also be limited
+           based on featureSubsetStrategy.
+         - For each node, the statistics for that node are aggregated to a particular worker
+           via reduceByKey().  The designated worker chooses the best (feature, split) pair,
+           or chooses to stop splitting if the stopping criteria are met.
+       On the master node:
+         - The master collects all decisions about splitting nodes and updates the model.
+         - The updated model is passed to the workers on the next iteration.
+     This process continues until the node queue is empty.
+
+     Most of the methods in this implementation support the statistics aggregation, which is
+     the heaviest part of the computation.  In general, this implementation is bound by either
+     the cost of statistics computation on workers or by communicating the sufficient statistics.
+   */
+
   strategy.assertValid()
   require(numTrees > 0, s"RandomForest requires numTrees > 0, but was given numTrees = $numTrees.")
   require(RandomForest.supportedFeatureSubsetStrategies.contains(featureSubsetStrategy),
@@ -78,9 +121,9 @@ private class RandomForest (
   /**
    * Method to train a decision tree model over an RDD
    * @param input Training data: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]
-   * @return RandomForestModel that can be used for prediction
+   * @return a random forest model that can be used for prediction
    */
-  def train(input: RDD[LabeledPoint]): RandomForestModel = {
+  def run(input: RDD[LabeledPoint]): RandomForestModel = {
 
     val timer = new TimeTracker()
 
@@ -97,6 +140,7 @@ private class RandomForest (
     logDebug("maxBins = " + metadata.maxBins)
     logDebug("featureSubsetStrategy = " + featureSubsetStrategy)
     logDebug("numFeaturesPerNode = " + metadata.numFeaturesPerNode)
+    logDebug("subsamplingRate = " + strategy.subsamplingRate)
 
     // Find the splits and the corresponding bins (interval between the splits) using a sample
     // of the input data.
@@ -111,11 +155,13 @@ private class RandomForest (
     // Bin feature values (TreePoint representation).
     // Cache input RDD for speedup during multiple passes.
     val treeInput = TreePoint.convertToTreeRDD(retaggedInput, bins, metadata)
-    val baggedInput = if (numTrees > 1) {
-      BaggedPoint.convertToBaggedRDD(treeInput, numTrees, seed)
-    } else {
-      BaggedPoint.convertToBaggedRDDWithoutSampling(treeInput)
-    }.persist(StorageLevel.MEMORY_AND_DISK)
+
+    val withReplacement = if (numTrees > 1) true else false
+
+    val baggedInput
+      = BaggedPoint.convertToBaggedRDD(treeInput,
+          strategy.subsamplingRate, numTrees,
+          withReplacement, seed).persist(StorageLevel.MEMORY_AND_DISK)
 
     // depth of the decision tree
     val maxDepth = strategy.maxDepth
@@ -150,6 +196,19 @@ private class RandomForest (
      * in lower levels).
      */
 
+    // Create an RDD of node Id cache.
+    // At first, all the rows belong to the root nodes (node Id == 1).
+    val nodeIdCache = if (strategy.useNodeIdCache) {
+      Some(NodeIdCache.init(
+        data = baggedInput,
+        numTrees = numTrees,
+        checkpointDir = strategy.checkpointDir,
+        checkpointInterval = strategy.checkpointInterval,
+        initVal = 1))
+    } else {
+      None
+    }
+
     // FIFO queue of nodes to train: (treeIndex, node)
     val nodeQueue = new mutable.Queue[(Int, Node)]()
 
@@ -172,7 +231,7 @@ private class RandomForest (
       // Choose node splits, and enqueue new nodes as needed.
       timer.start("findBestSplits")
       DecisionTree.findBestSplits(baggedInput, metadata, topNodes, nodesForGroup,
-        treeToNodeToIndexInfo, splits, bins, nodeQueue, timer)
+        treeToNodeToIndexInfo, splits, bins, nodeQueue, timer, nodeIdCache = nodeIdCache)
       timer.stop("findBestSplits")
     }
 
@@ -183,8 +242,13 @@ private class RandomForest (
     logInfo("Internal timing for DecisionTree:")
     logInfo(s"$timer")
 
+    // Delete any remaining checkpoints used for node Id cache.
+    if (nodeIdCache.nonEmpty) {
+      nodeIdCache.get.deleteAllCheckpoints()
+    }
+
     val trees = topNodes.map(topNode => new DecisionTreeModel(topNode, strategy.algo))
-    RandomForestModel.build(trees)
+    new RandomForestModel(strategy.algo, trees)
   }
 
 }
@@ -199,13 +263,12 @@ object RandomForest extends Serializable with Logging {
    * @param strategy Parameters for training each tree in the forest.
    * @param numTrees Number of trees in the random forest.
    * @param featureSubsetStrategy Number of features to consider for splits at each node.
-   *                              Supported: "auto" (default), "all", "sqrt", "log2", "onethird".
+   *                              Supported: "auto", "all", "sqrt", "log2", "onethird".
    *                              If "auto" is set, this parameter is set based on numTrees:
    *                                if numTrees == 1, set to "all";
-   *                                if numTrees > 1 (forest) set to "sqrt" for classification and
-   *                                  to "onethird" for regression.
+   *                                if numTrees > 1 (forest) set to "sqrt".
    * @param seed  Random seed for bootstrapping and choosing feature subsets.
-   * @return RandomForestModel that can be used for prediction
+   * @return a random forest model that can be used for prediction
    */
   def trainClassifier(
       input: RDD[LabeledPoint],
@@ -216,7 +279,7 @@ object RandomForest extends Serializable with Logging {
     require(strategy.algo == Classification,
       s"RandomForest.trainClassifier given Strategy with invalid algo: ${strategy.algo}")
     val rf = new RandomForest(strategy, numTrees, featureSubsetStrategy, seed)
-    rf.train(input)
+    rf.run(input)
   }
 
   /**
@@ -224,17 +287,16 @@ object RandomForest extends Serializable with Logging {
    *
    * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    *              Labels should take values {0, 1, ..., numClasses-1}.
-   * @param numClassesForClassification number of classes for classification.
+   * @param numClasses number of classes for classification.
    * @param categoricalFeaturesInfo Map storing arity of categorical features.
    *                                E.g., an entry (n -> k) indicates that feature n is categorical
    *                                with k categories indexed from 0: {0, 1, ..., k-1}.
    * @param numTrees Number of trees in the random forest.
    * @param featureSubsetStrategy Number of features to consider for splits at each node.
-   *                              Supported: "auto" (default), "all", "sqrt", "log2", "onethird".
+   *                              Supported: "auto", "all", "sqrt", "log2", "onethird".
    *                              If "auto" is set, this parameter is set based on numTrees:
    *                                if numTrees == 1, set to "all";
-   *                                if numTrees > 1 (forest) set to "sqrt" for classification and
-   *                                  to "onethird" for regression.
+   *                                if numTrees > 1 (forest) set to "sqrt".
    * @param impurity Criterion used for information gain calculation.
    *                 Supported values: "gini" (recommended) or "entropy".
    * @param maxDepth Maximum depth of the tree.
@@ -243,11 +305,11 @@ object RandomForest extends Serializable with Logging {
    * @param maxBins maximum number of bins used for splitting features
    *                 (suggested value: 100)
    * @param seed  Random seed for bootstrapping and choosing feature subsets.
-   * @return RandomForestModel that can be used for prediction
+   * @return a random forest model  that can be used for prediction
    */
   def trainClassifier(
       input: RDD[LabeledPoint],
-      numClassesForClassification: Int,
+      numClasses: Int,
       categoricalFeaturesInfo: Map[Int, Int],
       numTrees: Int,
       featureSubsetStrategy: String,
@@ -257,7 +319,7 @@ object RandomForest extends Serializable with Logging {
       seed: Int = Utils.random.nextInt()): RandomForestModel = {
     val impurityType = Impurities.fromString(impurity)
     val strategy = new Strategy(Classification, impurityType, maxDepth,
-      numClassesForClassification, maxBins, Sort, categoricalFeaturesInfo)
+      numClasses, maxBins, Sort, categoricalFeaturesInfo)
     trainClassifier(input, strategy, numTrees, featureSubsetStrategy, seed)
   }
 
@@ -266,7 +328,7 @@ object RandomForest extends Serializable with Logging {
    */
   def trainClassifier(
       input: JavaRDD[LabeledPoint],
-      numClassesForClassification: Int,
+      numClasses: Int,
       categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer],
       numTrees: Int,
       featureSubsetStrategy: String,
@@ -274,7 +336,7 @@ object RandomForest extends Serializable with Logging {
       maxDepth: Int,
       maxBins: Int,
       seed: Int): RandomForestModel = {
-    trainClassifier(input.rdd, numClassesForClassification,
+    trainClassifier(input.rdd, numClasses,
       categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap,
       numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins, seed)
   }
@@ -287,13 +349,12 @@ object RandomForest extends Serializable with Logging {
    * @param strategy Parameters for training each tree in the forest.
    * @param numTrees Number of trees in the random forest.
    * @param featureSubsetStrategy Number of features to consider for splits at each node.
-   *                              Supported: "auto" (default), "all", "sqrt", "log2", "onethird".
+   *                              Supported: "auto", "all", "sqrt", "log2", "onethird".
    *                              If "auto" is set, this parameter is set based on numTrees:
    *                                if numTrees == 1, set to "all";
-   *                                if numTrees > 1 (forest) set to "sqrt" for classification and
-   *                                  to "onethird" for regression.
+   *                                if numTrees > 1 (forest) set to "onethird".
    * @param seed  Random seed for bootstrapping and choosing feature subsets.
-   * @return RandomForestModel that can be used for prediction
+   * @return a random forest model that can be used for prediction
    */
   def trainRegressor(
       input: RDD[LabeledPoint],
@@ -304,7 +365,7 @@ object RandomForest extends Serializable with Logging {
     require(strategy.algo == Regression,
       s"RandomForest.trainRegressor given Strategy with invalid algo: ${strategy.algo}")
     val rf = new RandomForest(strategy, numTrees, featureSubsetStrategy, seed)
-    rf.train(input)
+    rf.run(input)
   }
 
   /**
@@ -317,11 +378,10 @@ object RandomForest extends Serializable with Logging {
    *                                with k categories indexed from 0: {0, 1, ..., k-1}.
    * @param numTrees Number of trees in the random forest.
    * @param featureSubsetStrategy Number of features to consider for splits at each node.
-   *                              Supported: "auto" (default), "all", "sqrt", "log2", "onethird".
+   *                              Supported: "auto", "all", "sqrt", "log2", "onethird".
    *                              If "auto" is set, this parameter is set based on numTrees:
    *                                if numTrees == 1, set to "all";
-   *                                if numTrees > 1 (forest) set to "sqrt" for classification and
-   *                                  to "onethird" for regression.
+   *                                if numTrees > 1 (forest) set to "onethird".
    * @param impurity Criterion used for information gain calculation.
    *                 Supported values: "variance".
    * @param maxDepth Maximum depth of the tree.
@@ -330,7 +390,7 @@ object RandomForest extends Serializable with Logging {
    * @param maxBins maximum number of bins used for splitting features
    *                 (suggested value: 100)
    * @param seed  Random seed for bootstrapping and choosing feature subsets.
-   * @return RandomForestModel that can be used for prediction
+   * @return a random forest model that can be used for prediction
    */
   def trainRegressor(
       input: RDD[LabeledPoint],
@@ -450,5 +510,4 @@ object RandomForest extends Serializable with Logging {
       3 * totalBins
     }
   }
-
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
index 0ef9c6181a0a0..b6099259971b7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
@@ -29,8 +29,8 @@ object Algo extends Enumeration {
   val Classification, Regression = Value
 
   private[mllib] def fromString(name: String): Algo = name match {
-    case "classification" => Classification
-    case "regression" => Regression
+    case "classification" | "Classification" => Classification
+    case "regression" | "Regression" => Regression
     case _ => throw new IllegalArgumentException(s"Did not recognize Algo name: $name")
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
new file mode 100644
index 0000000000000..ed8e6a796f8c4
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.configuration
+
+import scala.beans.BeanProperty
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.tree.configuration.Algo._
+import org.apache.spark.mllib.tree.loss.{LogLoss, SquaredError, Loss}
+
+/**
+ * :: Experimental ::
+ * Configuration options for [[org.apache.spark.mllib.tree.GradientBoostedTrees]].
+ *
+ * @param treeStrategy Parameters for the tree algorithm. We support regression and binary
+ *                     classification for boosting. Impurity setting will be ignored.
+ * @param loss Loss function used for minimization during gradient boosting.
+ * @param numIterations Number of iterations of boosting.  In other words, the number of
+ *                      weak hypotheses used in the final model.
+ * @param learningRate Learning rate for shrinking the contribution of each estimator. The
+ *                     learning rate should be between in the interval (0, 1]
+ */
+@Experimental
+case class BoostingStrategy(
+    // Required boosting parameters
+    @BeanProperty var treeStrategy: Strategy,
+    @BeanProperty var loss: Loss,
+    // Optional boosting parameters
+    @BeanProperty var numIterations: Int = 100,
+    @BeanProperty var learningRate: Double = 0.1) extends Serializable {
+
+  /**
+   * Check validity of parameters.
+   * Throws exception if invalid.
+   */
+  private[tree] def assertValid(): Unit = {
+    treeStrategy.algo match {
+      case Classification =>
+        require(treeStrategy.numClasses == 2,
+          "Only binary classification is supported for boosting.")
+      case Regression =>
+        // nothing
+      case _ =>
+        throw new IllegalArgumentException(
+          s"BoostingStrategy given invalid algo parameter: ${treeStrategy.algo}." +
+            s"  Valid settings are: Classification, Regression.")
+    }
+    require(learningRate > 0 && learningRate <= 1,
+      "Learning rate should be in range (0, 1]. Provided learning rate is " + s"$learningRate.")
+  }
+}
+
+@Experimental
+object BoostingStrategy {
+
+  /**
+   * Returns default configuration for the boosting algorithm
+   * @param algo Learning goal.  Supported: "Classification" or "Regression"
+   * @return Configuration for boosting algorithm
+   */
+  def defaultParams(algo: String): BoostingStrategy = {
+    defaultParams(Algo.fromString(algo))
+  }
+
+  /**
+   * Returns default configuration for the boosting algorithm
+   * @param algo Learning goal.  Supported:
+   *             [[org.apache.spark.mllib.tree.configuration.Algo.Classification]],
+   *             [[org.apache.spark.mllib.tree.configuration.Algo.Regression]]
+   * @return Configuration for boosting algorithm
+   */
+  def defaultParams(algo: Algo): BoostingStrategy = {
+    val treeStragtegy = Strategy.defaultStategy(algo)
+    treeStragtegy.maxDepth = 3
+    algo match {
+      case Algo.Classification =>
+        treeStragtegy.numClasses = 2
+        new BoostingStrategy(treeStragtegy, LogLoss)
+      case Algo.Regression =>
+        new BoostingStrategy(treeStragtegy, SquaredError)
+      case _ =>
+        throw new IllegalArgumentException(s"$algo is not supported by boosting.")
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/EnsembleCombiningStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/EnsembleCombiningStrategy.scala
new file mode 100644
index 0000000000000..b5bf732d1b33a
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/EnsembleCombiningStrategy.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.configuration
+
+/**
+ * Enum to select ensemble combining strategy for base learners
+ */
+private[tree] object EnsembleCombiningStrategy extends Enumeration {
+  type EnsembleCombiningStrategy = Value
+  val Average, Sum, Vote = Value
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
index caaccbfb8ad16..3308adb6752ff 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.mllib.tree.configuration
 
+import scala.beans.BeanProperty
 import scala.collection.JavaConverters._
 
 import org.apache.spark.annotation.Experimental
@@ -36,14 +37,14 @@ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
  *                 Supported for Regression: [[org.apache.spark.mllib.tree.impurity.Variance]].
  * @param maxDepth Maximum depth of the tree.
  *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
- * @param numClassesForClassification Number of classes for classification.
+ * @param numClasses Number of classes for classification.
  *                                    (Ignored for regression.)
  *                                    Default value is 2 (binary classification).
  * @param maxBins Maximum number of bins used for discretizing continuous features and
  *                for choosing how to split on features at each node.
  *                More bins give higher granularity.
  * @param quantileCalculationStrategy Algorithm for calculating quantiles.  Supported:
-   *                             [[org.apache.spark.mllib.tree.configuration.QuantileStrategy.Sort]]
+ *                             [[org.apache.spark.mllib.tree.configuration.QuantileStrategy.Sort]]
  * @param categoricalFeaturesInfo A map storing information about the categorical variables and the
  *                                number of discrete values they take. For example, an entry (n ->
  *                                k) implies the feature n is categorical with k categories 0,
@@ -58,31 +59,35 @@ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
  *                    this split will not be considered as a valid split.
  * @param maxMemoryInMB Maximum memory in MB allocated to histogram aggregation. Default value is
  *                      256 MB.
+ * @param subsamplingRate Fraction of the training data used for learning decision tree.
+ * @param useNodeIdCache If this is true, instead of passing trees to executors, the algorithm will
+ *                      maintain a separate RDD of node Id cache for each row.
+ * @param checkpointDir If the node Id cache is used, it will help to checkpoint
+ *                      the node Id cache periodically. This is the checkpoint directory
+ *                      to be used for the node Id cache.
+ * @param checkpointInterval How often to checkpoint when the node Id cache gets updated.
+ *                           E.g. 10 means that the cache will get checkpointed every 10 updates.
  */
 @Experimental
 class Strategy (
-    val algo: Algo,
-    val impurity: Impurity,
-    val maxDepth: Int,
-    val numClassesForClassification: Int = 2,
-    val maxBins: Int = 32,
-    val quantileCalculationStrategy: QuantileStrategy = Sort,
-    val categoricalFeaturesInfo: Map[Int, Int] = Map[Int, Int](),
-    val minInstancesPerNode: Int = 1,
-    val minInfoGain: Double = 0.0,
-    val maxMemoryInMB: Int = 256) extends Serializable {
+    @BeanProperty var algo: Algo,
+    @BeanProperty var impurity: Impurity,
+    @BeanProperty var maxDepth: Int,
+    @BeanProperty var numClasses: Int = 2,
+    @BeanProperty var maxBins: Int = 32,
+    @BeanProperty var quantileCalculationStrategy: QuantileStrategy = Sort,
+    @BeanProperty var categoricalFeaturesInfo: Map[Int, Int] = Map[Int, Int](),
+    @BeanProperty var minInstancesPerNode: Int = 1,
+    @BeanProperty var minInfoGain: Double = 0.0,
+    @BeanProperty var maxMemoryInMB: Int = 256,
+    @BeanProperty var subsamplingRate: Double = 1,
+    @BeanProperty var useNodeIdCache: Boolean = false,
+    @BeanProperty var checkpointDir: Option[String] = None,
+    @BeanProperty var checkpointInterval: Int = 10) extends Serializable {
 
-  if (algo == Classification) {
-    require(numClassesForClassification >= 2)
-  }
-  require(minInstancesPerNode >= 1,
-    s"DecisionTree Strategy requires minInstancesPerNode >= 1 but was given $minInstancesPerNode")
-  require(maxMemoryInMB <= 10240,
-    s"DecisionTree Strategy requires maxMemoryInMB <= 10240, but was given $maxMemoryInMB")
-
-  val isMulticlassClassification =
-    algo == Classification && numClassesForClassification > 2
-  val isMulticlassWithCategoricalFeatures
+  def isMulticlassClassification =
+    algo == Classification && numClasses > 2
+  def isMulticlassWithCategoricalFeatures
     = isMulticlassClassification && (categoricalFeaturesInfo.size > 0)
 
   /**
@@ -92,13 +97,30 @@ class Strategy (
       algo: Algo,
       impurity: Impurity,
       maxDepth: Int,
-      numClassesForClassification: Int,
+      numClasses: Int,
       maxBins: Int,
       categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer]) {
-    this(algo, impurity, maxDepth, numClassesForClassification, maxBins, Sort,
+    this(algo, impurity, maxDepth, numClasses, maxBins, Sort,
       categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap)
   }
 
+  /**
+   * Sets Algorithm using a String.
+   */
+  def setAlgo(algo: String): Unit = algo match {
+    case "Classification" => setAlgo(Classification)
+    case "Regression" => setAlgo(Regression)
+  }
+
+  /**
+   * Sets categoricalFeaturesInfo using a Java Map.
+   */
+  def setCategoricalFeaturesInfo(
+      categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer]): Unit = {
+    this.categoricalFeaturesInfo =
+      categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap
+  }
+
   /**
    * Check validity of parameters.
    * Throws exception if invalid.
@@ -106,9 +128,9 @@ class Strategy (
   private[tree] def assertValid(): Unit = {
     algo match {
       case Classification =>
-        require(numClassesForClassification >= 2,
-          s"DecisionTree Strategy for Classification must have numClassesForClassification >= 2," +
-          s" but numClassesForClassification = $numClassesForClassification.")
+        require(numClasses >= 2,
+          s"DecisionTree Strategy for Classification must have numClasses >= 2," +
+          s" but numClasses = $numClasses.")
         require(Set(Gini, Entropy).contains(impurity),
           s"DecisionTree Strategy given invalid impurity for Classification: $impurity." +
           s"  Valid settings: Gini, Entropy")
@@ -130,6 +152,44 @@ class Strategy (
         s"DecisionTree Strategy given invalid categoricalFeaturesInfo setting:" +
         s" feature $feature has $arity categories.  The number of categories should be >= 2.")
     }
+    require(minInstancesPerNode >= 1,
+      s"DecisionTree Strategy requires minInstancesPerNode >= 1 but was given $minInstancesPerNode")
+    require(maxMemoryInMB <= 10240,
+      s"DecisionTree Strategy requires maxMemoryInMB <= 10240, but was given $maxMemoryInMB")
+    require(subsamplingRate > 0 && subsamplingRate <= 1,
+      s"DecisionTree Strategy requires subsamplingRate <=1 and >0, but was given " +
+      s"$subsamplingRate")
+  }
+
+  /** Returns a shallow copy of this instance. */
+  def copy: Strategy = {
+    new Strategy(algo, impurity, maxDepth, numClasses, maxBins,
+      quantileCalculationStrategy, categoricalFeaturesInfo, minInstancesPerNode, minInfoGain,
+      maxMemoryInMB, subsamplingRate, useNodeIdCache, checkpointDir, checkpointInterval)
   }
+}
+
+@Experimental
+object Strategy {
 
+  /**
+   * Construct a default set of parameters for [[org.apache.spark.mllib.tree.DecisionTree]]
+   * @param algo  "Classification" or "Regression"
+   */
+  def defaultStrategy(algo: String): Strategy = {
+    defaultStategy(Algo.fromString(algo))
+  }
+
+  /**
+   * Construct a default set of parameters for [[org.apache.spark.mllib.tree.DecisionTree]]
+   * @param algo Algo.Classification or Algo.Regression
+   */
+  def defaultStategy(algo: Algo): Strategy = algo match {
+    case Algo.Classification =>
+      new Strategy(algo = Classification, impurity = Gini, maxDepth = 10,
+        numClasses = 2)
+    case Algo.Regression =>
+      new Strategy(algo = Regression, impurity = Variance, maxDepth = 10,
+        numClasses = 0)
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/BaggedPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/BaggedPoint.scala
index 937c8a2ac5836..089010c81ffb6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/BaggedPoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/BaggedPoint.scala
@@ -17,18 +17,18 @@
 
 package org.apache.spark.mllib.tree.impl
 
-import cern.jet.random.Poisson
-import cern.jet.random.engine.DRand
+import org.apache.commons.math3.distribution.PoissonDistribution
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.Utils
+import org.apache.spark.util.random.XORShiftRandom
 
 /**
  * Internal representation of a datapoint which belongs to several subsamples of the same dataset,
  * particularly for bagging (e.g., for random forests).
  *
  * This holds one instance, as well as an array of weights which represent the (weighted)
- * number of times which this instance appears in each subsample.
+ * number of times which this instance appears in each subsamplingRate.
  * E.g., (datum, [1, 0, 4]) indicates that there are 3 subsamples of the dataset and that
  * this datum has 1 copy, 0 copies, and 4 copies in the 3 subsamples, respectively.
  *
@@ -45,27 +45,71 @@ private[tree] object BaggedPoint {
 
   /**
    * Convert an input dataset into its BaggedPoint representation,
-   * choosing subsample counts for each instance.
-   * Each subsample has the same number of instances as the original dataset,
-   * and is created by subsampling with replacement.
-   * @param input     Input dataset.
-   * @param numSubsamples  Number of subsamples of this RDD to take.
-   * @param seed   Random seed.
-   * @return  BaggedPoint dataset representation
+   * choosing subsamplingRate counts for each instance.
+   * Each subsamplingRate has the same number of instances as the original dataset,
+   * and is created by subsampling without replacement.
+   * @param input Input dataset.
+   * @param subsamplingRate Fraction of the training data used for learning decision tree.
+   * @param numSubsamples Number of subsamples of this RDD to take.
+   * @param withReplacement Sampling with/without replacement.
+   * @param seed Random seed.
+   * @return BaggedPoint dataset representation.
    */
-  def convertToBaggedRDD[Datum](
+  def convertToBaggedRDD[Datum] (
       input: RDD[Datum],
+      subsamplingRate: Double,
       numSubsamples: Int,
+      withReplacement: Boolean,
       seed: Int = Utils.random.nextInt()): RDD[BaggedPoint[Datum]] = {
+    if (withReplacement) {
+      convertToBaggedRDDSamplingWithReplacement(input, subsamplingRate, numSubsamples, seed)
+    } else {
+      if (numSubsamples == 1 && subsamplingRate == 1.0) {
+        convertToBaggedRDDWithoutSampling(input)
+      } else {
+        convertToBaggedRDDSamplingWithoutReplacement(input, subsamplingRate, numSubsamples, seed)
+      }
+    }
+  }
+
+  private def convertToBaggedRDDSamplingWithoutReplacement[Datum] (
+      input: RDD[Datum],
+      subsamplingRate: Double,
+      numSubsamples: Int,
+      seed: Int): RDD[BaggedPoint[Datum]] = {
+    input.mapPartitionsWithIndex { (partitionIndex, instances) =>
+      // Use random seed = seed + partitionIndex + 1 to make generation reproducible.
+      val rng = new XORShiftRandom
+      rng.setSeed(seed + partitionIndex + 1)
+      instances.map { instance =>
+        val subsampleWeights = new Array[Double](numSubsamples)
+        var subsampleIndex = 0
+        while (subsampleIndex < numSubsamples) {
+          val x = rng.nextDouble()
+          subsampleWeights(subsampleIndex) = {
+            if (x < subsamplingRate) 1.0 else 0.0
+          }
+          subsampleIndex += 1
+        }
+        new BaggedPoint(instance, subsampleWeights)
+      }
+    }
+  }
+
+  private def convertToBaggedRDDSamplingWithReplacement[Datum] (
+      input: RDD[Datum],
+      subsample: Double,
+      numSubsamples: Int,
+      seed: Int): RDD[BaggedPoint[Datum]] = {
     input.mapPartitionsWithIndex { (partitionIndex, instances) =>
-      // TODO: Support different sampling rates, and sampling without replacement.
       // Use random seed = seed + partitionIndex + 1 to make generation reproducible.
-      val poisson = new Poisson(1.0, new DRand(seed + partitionIndex + 1))
+      val poisson = new PoissonDistribution(subsample)
+      poisson.reseedRandomGenerator(seed + partitionIndex + 1)
       instances.map { instance =>
         val subsampleWeights = new Array[Double](numSubsamples)
         var subsampleIndex = 0
         while (subsampleIndex < numSubsamples) {
-          subsampleWeights(subsampleIndex) = poisson.nextInt()
+          subsampleWeights(subsampleIndex) = poisson.sample()
           subsampleIndex += 1
         }
         new BaggedPoint(instance, subsampleWeights)
@@ -73,7 +117,8 @@ private[tree] object BaggedPoint {
     }
   }
 
-  def convertToBaggedRDDWithoutSampling[Datum](input: RDD[Datum]): RDD[BaggedPoint[Datum]] = {
+  private def convertToBaggedRDDWithoutSampling[Datum] (
+      input: RDD[Datum]): RDD[BaggedPoint[Datum]] = {
     input.map(datum => new BaggedPoint(datum, Array(1.0)))
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala
index 5bc0f2635c6b1..f1a6ed230186e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala
@@ -110,7 +110,7 @@ private[tree] object DecisionTreeMetadata extends Logging {
     val numFeatures = input.take(1)(0).features.size
     val numExamples = input.count()
     val numClasses = strategy.algo match {
-      case Classification => strategy.numClassesForClassification
+      case Classification => strategy.numClasses
       case Regression => 0
     }
 
@@ -183,7 +183,7 @@ private[tree] object DecisionTreeMetadata extends Logging {
   }
 
   /**
-   * Version of [[buildMetadata()]] for DecisionTree.
+   * Version of [[DecisionTreeMetadata#buildMetadata]] for DecisionTree.
    */
   def buildMetadata(
       input: RDD[LabeledPoint],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/NodeIdCache.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/NodeIdCache.scala
new file mode 100644
index 0000000000000..83011b48b7d9b
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/NodeIdCache.scala
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.impl
+
+import scala.collection.mutable
+
+import org.apache.hadoop.fs.{Path, FileSystem}
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.tree.configuration.FeatureType._
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.mllib.tree.model.{Bin, Node, Split}
+
+/**
+ * :: DeveloperApi ::
+ * This is used by the node id cache to find the child id that a data point would belong to.
+ * @param split Split information.
+ * @param nodeIndex The current node index of a data point that this will update.
+ */
+@DeveloperApi
+private[tree] case class NodeIndexUpdater(
+    split: Split,
+    nodeIndex: Int) {
+  /**
+   * Determine a child node index based on the feature value and the split.
+   * @param binnedFeatures Binned feature values.
+   * @param bins Bin information to convert the bin indices to approximate feature values.
+   * @return Child node index to update to.
+   */
+  def updateNodeIndex(binnedFeatures: Array[Int], bins: Array[Array[Bin]]): Int = {
+    if (split.featureType == Continuous) {
+      val featureIndex = split.feature
+      val binIndex = binnedFeatures(featureIndex)
+      val featureValueUpperBound = bins(featureIndex)(binIndex).highSplit.threshold
+      if (featureValueUpperBound <= split.threshold) {
+        Node.leftChildIndex(nodeIndex)
+      } else {
+        Node.rightChildIndex(nodeIndex)
+      }
+    } else {
+      if (split.categories.contains(binnedFeatures(split.feature).toDouble)) {
+        Node.leftChildIndex(nodeIndex)
+      } else {
+        Node.rightChildIndex(nodeIndex)
+      }
+    }
+  }
+}
+
+/**
+ * :: DeveloperApi ::
+ * A given TreePoint would belong to a particular node per tree.
+ * Each row in the nodeIdsForInstances RDD is an array over trees of the node index
+ * in each tree. Initially, values should all be 1 for root node.
+ * The nodeIdsForInstances RDD needs to be updated at each iteration.
+ * @param nodeIdsForInstances The initial values in the cache
+ *            (should be an Array of all 1's (meaning the root nodes)).
+ * @param checkpointDir The checkpoint directory where
+ *                      the checkpointed files will be stored.
+ * @param checkpointInterval The checkpointing interval
+ *                           (how often should the cache be checkpointed.).
+ */
+@DeveloperApi
+private[tree] class NodeIdCache(
+  var nodeIdsForInstances: RDD[Array[Int]],
+  val checkpointDir: Option[String],
+  val checkpointInterval: Int) {
+
+  // Keep a reference to a previous node Ids for instances.
+  // Because we will keep on re-persisting updated node Ids,
+  // we want to unpersist the previous RDD.
+  private var prevNodeIdsForInstances: RDD[Array[Int]] = null
+
+  // To keep track of the past checkpointed RDDs.
+  private val checkpointQueue = mutable.Queue[RDD[Array[Int]]]()
+  private var rddUpdateCount = 0
+
+  // If a checkpoint directory is given, and there's no prior checkpoint directory,
+  // then set the checkpoint directory with the given one.
+  if (checkpointDir.nonEmpty && nodeIdsForInstances.sparkContext.getCheckpointDir.isEmpty) {
+    nodeIdsForInstances.sparkContext.setCheckpointDir(checkpointDir.get)
+  }
+
+  /**
+   * Update the node index values in the cache.
+   * This updates the RDD and its lineage.
+   * TODO: Passing bin information to executors seems unnecessary and costly.
+   * @param data The RDD of training rows.
+   * @param nodeIdUpdaters A map of node index updaters.
+   *                       The key is the indices of nodes that we want to update.
+   * @param bins Bin information needed to find child node indices.
+   */
+  def updateNodeIndices(
+      data: RDD[BaggedPoint[TreePoint]],
+      nodeIdUpdaters: Array[mutable.Map[Int, NodeIndexUpdater]],
+      bins: Array[Array[Bin]]): Unit = {
+    if (prevNodeIdsForInstances != null) {
+      // Unpersist the previous one if one exists.
+      prevNodeIdsForInstances.unpersist()
+    }
+
+    prevNodeIdsForInstances = nodeIdsForInstances
+    nodeIdsForInstances = data.zip(nodeIdsForInstances).map {
+      dataPoint => {
+        var treeId = 0
+        while (treeId < nodeIdUpdaters.length) {
+          val nodeIdUpdater = nodeIdUpdaters(treeId).getOrElse(dataPoint._2(treeId), null)
+          if (nodeIdUpdater != null) {
+            val newNodeIndex = nodeIdUpdater.updateNodeIndex(
+              binnedFeatures = dataPoint._1.datum.binnedFeatures,
+              bins = bins)
+            dataPoint._2(treeId) = newNodeIndex
+          }
+
+          treeId += 1
+        }
+
+        dataPoint._2
+      }
+    }
+
+    // Keep on persisting new ones.
+    nodeIdsForInstances.persist(StorageLevel.MEMORY_AND_DISK)
+    rddUpdateCount += 1
+
+    // Handle checkpointing if the directory is not None.
+    if (nodeIdsForInstances.sparkContext.getCheckpointDir.nonEmpty &&
+      (rddUpdateCount % checkpointInterval) == 0) {
+      // Let's see if we can delete previous checkpoints.
+      var canDelete = true
+      while (checkpointQueue.size > 1 && canDelete) {
+        // We can delete the oldest checkpoint iff
+        // the next checkpoint actually exists in the file system.
+        if (checkpointQueue.get(1).get.getCheckpointFile != None) {
+          val old = checkpointQueue.dequeue()
+
+          // Since the old checkpoint is not deleted by Spark,
+          // we'll manually delete it here.
+          val fs = FileSystem.get(old.sparkContext.hadoopConfiguration)
+          fs.delete(new Path(old.getCheckpointFile.get), true)
+        } else {
+          canDelete = false
+        }
+      }
+
+      nodeIdsForInstances.checkpoint()
+      checkpointQueue.enqueue(nodeIdsForInstances)
+    }
+  }
+
+  /**
+   * Call this after training is finished to delete any remaining checkpoints.
+   */
+  def deleteAllCheckpoints(): Unit = {
+    while (checkpointQueue.size > 0) {
+      val old = checkpointQueue.dequeue()
+      if (old.getCheckpointFile != None) {
+        val fs = FileSystem.get(old.sparkContext.hadoopConfiguration)
+        fs.delete(new Path(old.getCheckpointFile.get), true)
+      }
+    }
+  }
+}
+
+@DeveloperApi
+private[tree] object NodeIdCache {
+  /**
+   * Initialize the node Id cache with initial node Id values.
+   * @param data The RDD of training rows.
+   * @param numTrees The number of trees that we want to create cache for.
+   * @param checkpointDir The checkpoint directory where the checkpointed files will be stored.
+   * @param checkpointInterval The checkpointing interval
+   *                           (how often should the cache be checkpointed.).
+   * @param initVal The initial values in the cache.
+   * @return A node Id cache containing an RDD of initial root node Indices.
+   */
+  def init(
+      data: RDD[BaggedPoint[TreePoint]],
+      numTrees: Int,
+      checkpointDir: Option[String],
+      checkpointInterval: Int,
+      initVal: Int = 1): NodeIdCache = {
+    new NodeIdCache(
+      data.map(_ => Array.fill[Int](numTrees)(initVal)),
+      checkpointDir,
+      checkpointInterval)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
index 0e02345aa3774..b7950e00786ab 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
@@ -94,6 +94,10 @@ private[tree] class EntropyAggregator(numClasses: Int)
       throw new IllegalArgumentException(s"EntropyAggregator given label $label" +
         s" but requires label < numClasses (= $statsSize).")
     }
+    if (label < 0) {
+      throw new IllegalArgumentException(s"EntropyAggregator given label $label" +
+        s"but requires label is non-negative.")
+    }
     allStats(offset + label.toInt) += instanceWeight
   }
 
@@ -147,6 +151,7 @@ private[tree] class EntropyCalculator(stats: Array[Double]) extends ImpurityCalc
     val lbl = label.toInt
     require(lbl < stats.length,
       s"EntropyCalculator.prob given invalid label: $lbl (should be < ${stats.length}")
+    require(lbl >= 0, "Entropy does not support negative labels")
     val cnt = count
     if (cnt == 0) {
       0
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
index 7c83cd48e16a0..c946db9c0d1c8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
@@ -90,6 +90,10 @@ private[tree] class GiniAggregator(numClasses: Int)
       throw new IllegalArgumentException(s"GiniAggregator given label $label" +
         s" but requires label < numClasses (= $statsSize).")
     }
+    if (label < 0) {
+      throw new IllegalArgumentException(s"GiniAggregator given label $label" +
+        s"but requires label is non-negative.")
+    }
     allStats(offset + label.toInt) += instanceWeight
   }
 
@@ -143,6 +147,7 @@ private[tree] class GiniCalculator(stats: Array[Double]) extends ImpurityCalcula
     val lbl = label.toInt
     require(lbl < stats.length,
       s"GiniCalculator.prob given invalid label: $lbl (should be < ${stats.length}")
+    require(lbl >= 0, "GiniImpurity does not support negative labels")
     val cnt = count
     if (cnt == 0) {
       0
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
new file mode 100644
index 0000000000000..d1bde15e6b150
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.loss
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.model.TreeEnsembleModel
+import org.apache.spark.rdd.RDD
+
+/**
+ * :: DeveloperApi ::
+ * Class for absolute error loss calculation (for regression).
+ *
+ * The absolute (L1) error is defined as:
+ *  |y - F(x)|
+ * where y is the label and F(x) is the model prediction for features x.
+ */
+@DeveloperApi
+object AbsoluteError extends Loss {
+
+  /**
+   * Method to calculate the gradients for the gradient boosting calculation for least
+   * absolute error calculation.
+   * The gradient with respect to F(x) is: sign(F(x) - y)
+   * @param model Ensemble model
+   * @param point Instance of the training dataset
+   * @return Loss gradient
+   */
+  override def gradient(
+      model: TreeEnsembleModel,
+      point: LabeledPoint): Double = {
+    if ((point.label - model.predict(point.features)) < 0) 1.0 else -1.0
+  }
+
+  /**
+   * Method to calculate loss of the base learner for the gradient boosting calculation.
+   * Note: This method is not used by the gradient boosting algorithm but is useful for debugging
+   * purposes.
+   * @param model Ensemble model
+   * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   * @return  Mean absolute error of model on data
+   */
+  override def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double = {
+    data.map { y =>
+      val err = model.predict(y.features) - y.label
+      math.abs(err)
+    }.mean()
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
new file mode 100644
index 0000000000000..55213e695638c
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.loss
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.model.TreeEnsembleModel
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.rdd.RDD
+
+/**
+ * :: DeveloperApi ::
+ * Class for log loss calculation (for classification).
+ * This uses twice the binomial negative log likelihood, called "deviance" in Friedman (1999).
+ *
+ * The log loss is defined as:
+ *   2 log(1 + exp(-2 y F(x)))
+ * where y is a label in {-1, 1} and F(x) is the model prediction for features x.
+ */
+@DeveloperApi
+object LogLoss extends Loss {
+
+  /**
+   * Method to calculate the loss gradients for the gradient boosting calculation for binary
+   * classification
+   * The gradient with respect to F(x) is: - 4 y / (1 + exp(2 y F(x)))
+   * @param model Ensemble model
+   * @param point Instance of the training dataset
+   * @return Loss gradient
+   */
+  override def gradient(
+      model: TreeEnsembleModel,
+      point: LabeledPoint): Double = {
+    val prediction = model.predict(point.features)
+    - 4.0 * point.label / (1.0 + math.exp(2.0 * point.label * prediction))
+  }
+
+  /**
+   * Method to calculate loss of the base learner for the gradient boosting calculation.
+   * Note: This method is not used by the gradient boosting algorithm but is useful for debugging
+   * purposes.
+   * @param model Ensemble model
+   * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   * @return Mean log loss of model on data
+   */
+  override def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double = {
+    data.map { case point =>
+      val prediction = model.predict(point.features)
+      val margin = 2.0 * point.label * prediction
+      // The following is equivalent to 2.0 * log(1 + exp(-margin)) but more numerically stable.
+      2.0 * MLUtils.log1pExp(-margin)
+    }.mean()
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
new file mode 100644
index 0000000000000..e1169d9f66ea4
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.loss
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.model.TreeEnsembleModel
+import org.apache.spark.rdd.RDD
+
+/**
+ * :: DeveloperApi ::
+ * Trait for adding "pluggable" loss functions for the gradient boosting algorithm.
+ */
+@DeveloperApi
+trait Loss extends Serializable {
+
+  /**
+   * Method to calculate the gradients for the gradient boosting calculation.
+   * @param model Model of the weak learner.
+   * @param point Instance of the training dataset.
+   * @return Loss gradient.
+   */
+  def gradient(
+      model: TreeEnsembleModel,
+      point: LabeledPoint): Double
+
+  /**
+   * Method to calculate error of the base learner for the gradient boosting calculation.
+   * Note: This method is not used by the gradient boosting algorithm but is useful for debugging
+   * purposes.
+   * @param model Model of the weak learner.
+   * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   * @return Measure of model error on data
+   */
+  def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala
new file mode 100644
index 0000000000000..42c9ead9884b4
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.loss
+
+object Losses {
+
+  def fromString(name: String): Loss = name match {
+    case "leastSquaresError" => SquaredError
+    case "leastAbsoluteError" => AbsoluteError
+    case "logLoss" => LogLoss
+    case _ => throw new IllegalArgumentException(s"Did not recognize Loss name: $name")
+  }
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
new file mode 100644
index 0000000000000..50ecaa2f86f35
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.loss
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.model.TreeEnsembleModel
+import org.apache.spark.rdd.RDD
+
+/**
+ * :: DeveloperApi ::
+ * Class for squared error loss calculation.
+ *
+ * The squared (L2) error is defined as:
+ *   (y - F(x))**2
+ * where y is the label and F(x) is the model prediction for features x.
+ */
+@DeveloperApi
+object SquaredError extends Loss {
+
+  /**
+   * Method to calculate the gradients for the gradient boosting calculation for least
+   * squares error calculation.
+   * The gradient with respect to F(x) is: - 2 (y - F(x))
+   * @param model Ensemble model
+   * @param point Instance of the training dataset
+   * @return Loss gradient
+   */
+  override def gradient(
+    model: TreeEnsembleModel,
+    point: LabeledPoint): Double = {
+    2.0 * (model.predict(point.features) - point.label)
+  }
+
+  /**
+   * Method to calculate loss of the base learner for the gradient boosting calculation.
+   * Note: This method is not used by the gradient boosting algorithm but is useful for debugging
+   * purposes.
+   * @param model Ensemble model
+   * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   * @return  Mean squared error of model on data
+   */
+  override def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double = {
+    data.map { y =>
+      val err = model.predict(y.features) - y.label
+      err * err
+    }.mean()
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index ec1d99ab26f9c..a25e625a4017a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -18,9 +18,10 @@
 package org.apache.spark.mllib.tree.model
 
 import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.rdd.RDD
-import org.apache.spark.mllib.linalg.Vector
 
 /**
  * :: Experimental ::
@@ -52,6 +53,16 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
     features.map(x => predict(x))
   }
 
+  /**
+   * Predict values for the given data set using the model trained.
+   *
+   * @param features JavaRDD representing data points to be predicted
+   * @return JavaRDD of predictions for each of the given data points
+   */
+  def predict(features: JavaRDD[Vector]): JavaRDD[Double] = {
+    predict(features.rdd)
+  }
+
   /**
    * Get number of nodes in tree, including leaf nodes.
    */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/RandomForestModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/RandomForestModel.scala
deleted file mode 100644
index 6a22e2abe59bd..0000000000000
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/RandomForestModel.scala
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.mllib.tree.model
-
-import scala.collection.mutable
-
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.mllib.linalg.Vector
-import org.apache.spark.mllib.tree.configuration.Algo._
-import org.apache.spark.rdd.RDD
-
-/**
- * :: Experimental ::
- * Random forest model for classification or regression.
- * This model stores a collection of [[DecisionTreeModel]] instances and uses them to make
- * aggregate predictions.
- * @param trees Trees which make up this forest.  This cannot be empty.
- * @param algo algorithm type -- classification or regression
- */
-@Experimental
-class RandomForestModel(val trees: Array[DecisionTreeModel], val algo: Algo) extends Serializable {
-
-  require(trees.size > 0, s"RandomForestModel cannot be created with empty trees collection.")
-
-  /**
-   * Predict values for a single data point.
-   *
-   * @param features array representing a single data point
-   * @return Double prediction from the trained model
-   */
-  def predict(features: Vector): Double = {
-    algo match {
-      case Classification =>
-        val predictionToCount = new mutable.HashMap[Int, Int]()
-        trees.foreach { tree =>
-          val prediction = tree.predict(features).toInt
-          predictionToCount(prediction) = predictionToCount.getOrElse(prediction, 0) + 1
-        }
-        predictionToCount.maxBy(_._2)._1
-      case Regression =>
-        trees.map(_.predict(features)).sum / trees.size
-    }
-  }
-
-  /**
-   * Predict values for the given data set.
-   *
-   * @param features RDD representing data points to be predicted
-   * @return RDD[Double] where each entry contains the corresponding prediction
-   */
-  def predict(features: RDD[Vector]): RDD[Double] = {
-    features.map(x => predict(x))
-  }
-
-  /**
-   * Get number of trees in forest.
-   */
-  def numTrees: Int = trees.size
-
-  /**
-   * Get total number of nodes, summed over all trees in the forest.
-   */
-  def totalNumNodes: Int = trees.map(tree => tree.numNodes).sum
-
-  /**
-   * Print a summary of the model.
-   */
-  override def toString: String = algo match {
-    case Classification =>
-      s"RandomForestModel classifier with $numTrees trees and $totalNumNodes total nodes"
-    case Regression =>
-      s"RandomForestModel regressor with $numTrees trees and $totalNumNodes total nodes"
-    case _ => throw new IllegalArgumentException(
-      s"RandomForestModel given unknown algo parameter: $algo.")
-  }
-
-  /**
-   * Print the full model to a string.
-   */
-  def toDebugString: String = {
-    val header = toString + "\n"
-    header + trees.zipWithIndex.map { case (tree, treeIndex) =>
-      s"  Tree $treeIndex:\n" + tree.topNode.subtreeToString(4)
-    }.fold("")(_ + _)
-  }
-
-}
-
-private[tree] object RandomForestModel {
-
-  def build(trees: Array[DecisionTreeModel]): RandomForestModel = {
-    require(trees.size > 0, s"RandomForestModel cannot be created with empty trees collection.")
-    val algo: Algo = trees(0).algo
-    require(trees.forall(_.algo == algo),
-      "RandomForestModel cannot combine trees which have different output types" +
-      " (classification/regression).")
-    new RandomForestModel(trees, algo)
-  }
-
-}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
new file mode 100644
index 0000000000000..22997110de8dd
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.model
+
+import scala.collection.mutable
+
+import com.github.fommil.netlib.BLAS.{getInstance => blas}
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.tree.configuration.Algo._
+import org.apache.spark.mllib.tree.configuration.EnsembleCombiningStrategy._
+import org.apache.spark.rdd.RDD
+
+/**
+ * :: Experimental ::
+ * Represents a random forest model.
+ *
+ * @param algo algorithm for the ensemble model, either Classification or Regression
+ * @param trees tree ensembles
+ */
+@Experimental
+class RandomForestModel(override val algo: Algo, override val trees: Array[DecisionTreeModel])
+  extends TreeEnsembleModel(algo, trees, Array.fill(trees.size)(1.0),
+    combiningStrategy = if (algo == Classification) Vote else Average) {
+
+  require(trees.forall(_.algo == algo))
+}
+
+/**
+ * :: Experimental ::
+ * Represents a gradient boosted trees model.
+ *
+ * @param algo algorithm for the ensemble model, either Classification or Regression
+ * @param trees tree ensembles
+ * @param treeWeights tree ensemble weights
+ */
+@Experimental
+class GradientBoostedTreesModel(
+    override val algo: Algo,
+    override val trees: Array[DecisionTreeModel],
+    override val treeWeights: Array[Double])
+  extends TreeEnsembleModel(algo, trees, treeWeights, combiningStrategy = Sum) {
+
+  require(trees.size == treeWeights.size)
+}
+
+/**
+ * Represents a tree ensemble model.
+ *
+ * @param algo algorithm for the ensemble model, either Classification or Regression
+ * @param trees tree ensembles
+ * @param treeWeights tree ensemble weights
+ * @param combiningStrategy strategy for combining the predictions, not used for regression.
+ */
+private[tree] sealed class TreeEnsembleModel(
+    protected val algo: Algo,
+    protected val trees: Array[DecisionTreeModel],
+    protected val treeWeights: Array[Double],
+    protected val combiningStrategy: EnsembleCombiningStrategy) extends Serializable {
+
+  require(numTrees > 0, "TreeEnsembleModel cannot be created without trees.")
+
+  private val sumWeights = math.max(treeWeights.sum, 1e-15)
+
+  /**
+   * Predicts for a single data point using the weighted sum of ensemble predictions.
+   *
+   * @param features array representing a single data point
+   * @return predicted category from the trained model
+   */
+  private def predictBySumming(features: Vector): Double = {
+    val treePredictions = trees.map(_.predict(features))
+    blas.ddot(numTrees, treePredictions, 1, treeWeights, 1)
+  }
+
+  /**
+   * Classifies a single data point based on (weighted) majority votes.
+   */
+  private def predictByVoting(features: Vector): Double = {
+    val votes = mutable.Map.empty[Int, Double]
+    trees.view.zip(treeWeights).foreach { case (tree, weight) =>
+      val prediction = tree.predict(features).toInt
+      votes(prediction) = votes.getOrElse(prediction, 0.0) + weight
+    }
+    votes.maxBy(_._2)._1
+  }
+
+  /**
+   * Predict values for a single data point using the model trained.
+   *
+   * @param features array representing a single data point
+   * @return predicted category from the trained model
+   */
+  def predict(features: Vector): Double = {
+    (algo, combiningStrategy) match {
+      case (Regression, Sum) =>
+        predictBySumming(features)
+      case (Regression, Average) =>
+        predictBySumming(features) / sumWeights
+      case (Classification, Sum) => // binary classification
+        val prediction = predictBySumming(features)
+        // TODO: predicted labels are +1 or -1 for GBT. Need a better way to store this info.
+        if (prediction > 0.0) 1.0 else 0.0
+      case (Classification, Vote) =>
+        predictByVoting(features)
+      case _ =>
+        throw new IllegalArgumentException(
+          "TreeEnsembleModel given unsupported (algo, combiningStrategy) combination: " +
+            s"($algo, $combiningStrategy).")
+    }
+  }
+
+  /**
+   * Predict values for the given data set.
+   *
+   * @param features RDD representing data points to be predicted
+   * @return RDD[Double] where each entry contains the corresponding prediction
+   */
+  def predict(features: RDD[Vector]): RDD[Double] = features.map(x => predict(x))
+
+  /**
+   * Java-friendly version of [[org.apache.spark.mllib.tree.model.TreeEnsembleModel#predict]].
+   */
+  def predict(features: JavaRDD[Vector]): JavaRDD[java.lang.Double] = {
+    predict(features.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]]
+  }
+
+  /**
+   * Print a summary of the model.
+   */
+  override def toString: String = {
+    algo match {
+      case Classification =>
+        s"TreeEnsembleModel classifier with $numTrees trees\n"
+      case Regression =>
+        s"TreeEnsembleModel regressor with $numTrees trees\n"
+      case _ => throw new IllegalArgumentException(
+        s"TreeEnsembleModel given unknown algo parameter: $algo.")
+    }
+  }
+
+  /**
+   * Print the full model to a string.
+   */
+  def toDebugString: String = {
+    val header = toString + "\n"
+    header + trees.zipWithIndex.map { case (tree, treeIndex) =>
+      s"  Tree $treeIndex:\n" + tree.topNode.subtreeToString(4)
+    }.fold("")(_ + _)
+  }
+
+  /**
+   * Get number of trees in forest.
+   */
+  def numTrees: Int = trees.size
+
+  /**
+   * Get total number of nodes, summed over all trees in the forest.
+   */
+  def totalNumNodes: Int = trees.map(_.numNodes).sum
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
index 45f95482a1def..be335a1aca58a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
@@ -34,11 +34,27 @@ object DataValidators extends Logging {
    *
    * @return True if labels are all zero or one, false otherwise.
    */
-   val binaryLabelValidator: RDD[LabeledPoint] => Boolean = { data =>
+  val binaryLabelValidator: RDD[LabeledPoint] => Boolean = { data =>
     val numInvalid = data.filter(x => x.label != 1.0 && x.label != 0.0).count()
     if (numInvalid != 0) {
       logError("Classification labels should be 0 or 1. Found " + numInvalid + " invalid labels")
     }
     numInvalid == 0
   }
+
+  /**
+   * Function to check if labels used for k class multi-label classification are
+   * in the range of {0, 1, ..., k - 1}.
+   *
+   * @return True if labels are all in the range of {0, 1, ..., k-1}, false otherwise.
+   */
+  def multiLabelValidator(k: Int): RDD[LabeledPoint] => Boolean = { data =>
+    val numInvalid = data.filter(x =>
+      x.label - x.label.toInt != 0.0 || x.label < 0 || x.label > k - 1).count()
+    if (numInvalid != 0) {
+      logError("Classification labels should be in {0 to " + (k - 1) + "}. " +
+        "Found " + numInvalid + " invalid labels")
+    }
+    numInvalid == 0
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
index 69299c219878c..97f54aa62d31c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -62,7 +62,7 @@ object LinearDataGenerator {
    * @param nPoints Number of points in sample.
    * @param seed Random seed
    * @param eps Epsilon scaling factor.
-   * @return
+   * @return Seq of input.
    */
   def generateLinearInput(
       intercept: Double,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index ca35100aa99c6..5d6ddd47f67d6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -19,16 +19,16 @@ package org.apache.spark.mllib.util
 
 import scala.reflect.ClassTag
 
-import breeze.linalg.{Vector => BV, DenseVector => BDV, SparseVector => BSV,
-  squaredDistance => breezeSquaredDistance}
+import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}
 
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.rdd.PartitionwiseSampledRDD
-import org.apache.spark.util.random.BernoulliSampler
+import org.apache.spark.util.random.BernoulliCellSampler
 import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.linalg.{SparseVector, DenseVector, Vector, Vectors}
+import org.apache.spark.mllib.linalg.BLAS.dot
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.StreamingContext
 import org.apache.spark.streaming.dstream.DStream
@@ -38,7 +38,7 @@ import org.apache.spark.streaming.dstream.DStream
  */
 object MLUtils {
 
-  private[util] lazy val EPSILON = {
+  private[mllib] lazy val EPSILON = {
     var eps = 1.0
     while ((1.0 + (eps / 2.0)) != 1.0) {
       eps /= 2.0
@@ -76,7 +76,7 @@ object MLUtils {
       .map { line =>
         val items = line.split(' ')
         val label = items.head.toDouble
-        val (indices, values) = items.tail.map { item =>
+        val (indices, values) = items.tail.filter(_.nonEmpty).map { item =>
           val indexAndValue = item.split(':')
           val index = indexAndValue(0).toInt - 1 // Convert 1-based indices to 0-based.
           val value = indexAndValue(1).toDouble
@@ -153,10 +153,12 @@ object MLUtils {
   def saveAsLibSVMFile(data: RDD[LabeledPoint], dir: String) {
     // TODO: allow to specify label precision and feature precision.
     val dataStr = data.map { case LabeledPoint(label, features) =>
-      val featureStrings = features.toBreeze.activeIterator.map { case (i, v) =>
-        s"${i + 1}:$v"
+      val sb = new StringBuilder(label.toString)
+      features.foreachActive { case (i, v) =>
+        sb += ' '
+        sb ++= s"${i + 1}:$v"
       }
-      (Iterator(label) ++ featureStrings).mkString(" ")
+      sb.mkString
     }
     dataStr.saveAsTextFile(dir)
   }
@@ -196,8 +198,8 @@ object MLUtils {
 
   /**
    * Load labeled data from a file. The data format used here is
-   * <L>, <f1> <f2> ...
-   * where <f1>, <f2> are feature values in Double and <L> is the corresponding label as Double.
+   * L, f1 f2 ...
+   * where f1, f2 are feature values in Double and L is the corresponding label as Double.
    *
    * @param sc SparkContext
    * @param dir Directory to the input data files.
@@ -219,8 +221,8 @@ object MLUtils {
 
   /**
    * Save labeled data to a file. The data format used here is
-   * <L>, <f1> <f2> ...
-   * where <f1>, <f2> are feature values in Double and <L> is the corresponding label as Double.
+   * L, f1 f2 ...
+   * where f1, f2 are feature values in Double and L is the corresponding label as Double.
    *
    * @param data An RDD of LabeledPoints containing data to be saved.
    * @param dir Directory to save the data.
@@ -244,7 +246,7 @@ object MLUtils {
   def kFold[T: ClassTag](rdd: RDD[T], numFolds: Int, seed: Int): Array[(RDD[T], RDD[T])] = {
     val numFoldsF = numFolds.toFloat
     (1 to numFolds).map { fold =>
-      val sampler = new BernoulliSampler[T]((fold - 1) / numFoldsF, fold / numFoldsF,
+      val sampler = new BernoulliCellSampler[T]((fold - 1) / numFoldsF, fold / numFoldsF,
         complement = false)
       val validation = new PartitionwiseSampledRDD(rdd, sampler, true, seed)
       val training = new PartitionwiseSampledRDD(rdd, sampler.cloneComplement(), true, seed)
@@ -263,7 +265,7 @@ object MLUtils {
     }
     Vectors.fromBreeze(vector1)
   }
-
+ 
   /**
    * Returns the squared Euclidean distance between two vectors. The following formula will be used
    * if it does not introduce too much numerical error:
@@ -281,9 +283,9 @@ object MLUtils {
    * @return squared distance between v1 and v2 within the specified precision
    */
   private[mllib] def fastSquaredDistance(
-      v1: BV[Double],
+      v1: Vector,
       norm1: Double,
-      v2: BV[Double],
+      v2: Vector,
       norm2: Double,
       precision: Double = 1e-6): Double = {
     val n = v1.size
@@ -306,17 +308,34 @@ object MLUtils {
      */
     val precisionBound1 = 2.0 * EPSILON * sumSquaredNorm / (normDiff * normDiff + EPSILON)
     if (precisionBound1 < precision) {
-      sqDist = sumSquaredNorm - 2.0 * v1.dot(v2)
-    } else if (v1.isInstanceOf[BSV[Double]] || v2.isInstanceOf[BSV[Double]]) {
-      val dot = v1.dot(v2)
-      sqDist = math.max(sumSquaredNorm - 2.0 * dot, 0.0)
-      val precisionBound2 = EPSILON * (sumSquaredNorm + 2.0 * math.abs(dot)) / (sqDist + EPSILON)
+      sqDist = sumSquaredNorm - 2.0 * dot(v1, v2)
+    } else if (v1.isInstanceOf[SparseVector] || v2.isInstanceOf[SparseVector]) {
+      val dotValue = dot(v1, v2)
+      sqDist = math.max(sumSquaredNorm - 2.0 * dotValue, 0.0)
+      val precisionBound2 = EPSILON * (sumSquaredNorm + 2.0 * math.abs(dotValue)) /
+        (sqDist + EPSILON)
       if (precisionBound2 > precision) {
-        sqDist = breezeSquaredDistance(v1, v2)
+        sqDist = Vectors.sqdist(v1, v2)
       }
     } else {
-      sqDist = breezeSquaredDistance(v1, v2)
+      sqDist = Vectors.sqdist(v1, v2)
     }
     sqDist
   }
+
+  /**
+   * When `x` is positive and large, computing `math.log(1 + math.exp(x))` will lead to arithmetic
+   * overflow. This will happen when `x > 709.78` which is not a very large number.
+   * It can be addressed by rewriting the formula into `x + math.log1p(math.exp(-x))` when `x > 0`.
+   *
+   * @param x a floating-point value as input.
+   * @return the result of `math.log(1 + math.exp(x))`.
+   */
+  private[mllib] def log1pExp(x: Double): Double = {
+    if (x > 0) {
+      x + math.log1p(math.exp(-x))
+    } else {
+      math.log1p(math.exp(x))
+    }
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
new file mode 100644
index 0000000000000..56b77a7d12e83
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.util
+
+import scala.reflect.runtime.universe.TypeTag
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.SparkContext
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.types.{DataType, StructType, StructField}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * Trait for models and transformers which may be saved as files.
+ * This should be inherited by the class which implements model instances.
+ */
+@DeveloperApi
+trait Saveable {
+
+  /**
+   * Save this model to the given path.
+   *
+   * This saves:
+   *  - human-readable (JSON) model metadata to path/metadata/
+   *  - Parquet formatted data to path/data/
+   *
+   * The model may be loaded using [[Loader.load]].
+   *
+   * @param sc  Spark context used to save model data.
+   * @param path  Path specifying the directory in which to save this model.
+   *              This directory and any intermediate directory will be created if needed.
+   */
+  def save(sc: SparkContext, path: String): Unit
+
+  /** Current version of model save/load format. */
+  protected def formatVersion: String
+
+}
+
+/**
+ * :: DeveloperApi ::
+ *
+ * Trait for classes which can load models and transformers from files.
+ * This should be inherited by an object paired with the model class.
+ */
+@DeveloperApi
+trait Loader[M <: Saveable] {
+
+  /**
+   * Load a model from the given path.
+   *
+   * The model should have been saved by [[Saveable.save]].
+   *
+   * @param sc  Spark context used for loading model files.
+   * @param path  Path specifying the directory to which the model was saved.
+   * @return  Model instance
+   */
+  def load(sc: SparkContext, path: String): M
+
+}
+
+/**
+ * Helper methods for loading models from files.
+ */
+private[mllib] object Loader {
+
+  /** Returns URI for path/data using the Hadoop filesystem */
+  def dataPath(path: String): String = new Path(path, "data").toUri.toString
+
+  /** Returns URI for path/metadata using the Hadoop filesystem */
+  def metadataPath(path: String): String = new Path(path, "metadata").toUri.toString
+
+  /**
+   * Check the schema of loaded model data.
+   *
+   * This checks every field in the expected schema to make sure that a field with the same
+   * name and DataType appears in the loaded schema.  Note that this does NOT check metadata
+   * or containsNull.
+   *
+   * @param loadedSchema  Schema for model data loaded from file.
+   * @tparam Data  Expected data type from which an expected schema can be derived.
+   */
+  def checkSchema[Data: TypeTag](loadedSchema: StructType): Unit = {
+    // Check schema explicitly since erasure makes it hard to use match-case for checking.
+    val expectedFields: Array[StructField] =
+      ScalaReflection.schemaFor[Data].dataType.asInstanceOf[StructType].fields
+    val loadedFields: Map[String, DataType] =
+      loadedSchema.map(field => field.name -> field.dataType).toMap
+    expectedFields.foreach { field =>
+      assert(loadedFields.contains(field.name), s"Unable to parse model data." +
+        s"  Expected field with name ${field.name} was missing in loaded schema:" +
+        s" ${loadedFields.mkString(", ")}")
+      assert(loadedFields(field.name) == field.dataType,
+        s"Unable to parse model data.  Expected field $field but found field" +
+          s" with different type: ${loadedFields(field.name)}")
+    }
+  }
+
+  /**
+   * Load metadata from the given path.
+   * @return (class name, version, metadata)
+   */
+  def loadMetadata(sc: SparkContext, path: String): (String, String, DataFrame) = {
+    val sqlContext = new SQLContext(sc)
+    val metadata = sqlContext.jsonFile(metadataPath(path))
+    val (clazz, version) = try {
+      val metadataArray = metadata.select("class", "version").take(1)
+      assert(metadataArray.size == 1)
+      metadataArray(0) match {
+        case Row(clazz: String, version: String) => (clazz, version)
+      }
+    } catch {
+      case e: Exception =>
+        throw new Exception(s"Unable to load model metadata from: ${metadataPath(path)}")
+    }
+    (clazz, version, metadata)
+  }
+
+}
diff --git a/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java b/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java
new file mode 100644
index 0000000000000..56a9dbdd58b64
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.feature.StandardScaler;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.SQLContext;
+import static org.apache.spark.mllib.classification.LogisticRegressionSuite.generateLogisticInputAsList;
+
+/**
+ * Test Pipeline construction and fitting in Java.
+ */
+public class JavaPipelineSuite {
+
+  private transient JavaSparkContext jsc;
+  private transient SQLContext jsql;
+  private transient DataFrame dataset;
+
+  @Before
+  public void setUp() {
+    jsc = new JavaSparkContext("local", "JavaPipelineSuite");
+    jsql = new SQLContext(jsc);
+    JavaRDD<LabeledPoint> points =
+      jsc.parallelize(generateLogisticInputAsList(1.0, 1.0, 100, 42), 2);
+    dataset = jsql.applySchema(points, LabeledPoint.class);
+  }
+
+  @After
+  public void tearDown() {
+    jsc.stop();
+    jsc = null;
+  }
+
+  @Test
+  public void pipeline() {
+    StandardScaler scaler = new StandardScaler()
+      .setInputCol("features")
+      .setOutputCol("scaledFeatures");
+    LogisticRegression lr = new LogisticRegression()
+      .setFeaturesCol("scaledFeatures");
+    Pipeline pipeline = new Pipeline()
+      .setStages(new PipelineStage[] {scaler, lr});
+    PipelineModel model = pipeline.fit(dataset);
+    model.transform(dataset).registerTempTable("prediction");
+    DataFrame predictions = jsql.sql("SELECT label, score, prediction FROM prediction");
+    predictions.collectAsList();
+  }
+}
diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java
new file mode 100644
index 0000000000000..f4ba23c44563e
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.classification;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.SQLContext;
+import static org.apache.spark.mllib.classification.LogisticRegressionSuite.generateLogisticInputAsList;
+
+public class JavaLogisticRegressionSuite implements Serializable {
+
+  private transient JavaSparkContext jsc;
+  private transient SQLContext jsql;
+  private transient DataFrame dataset;
+
+  @Before
+  public void setUp() {
+    jsc = new JavaSparkContext("local", "JavaLogisticRegressionSuite");
+    jsql = new SQLContext(jsc);
+    List<LabeledPoint> points = generateLogisticInputAsList(1.0, 1.0, 100, 42);
+    dataset = jsql.applySchema(jsc.parallelize(points, 2), LabeledPoint.class);
+  }
+
+  @After
+  public void tearDown() {
+    jsc.stop();
+    jsc = null;
+  }
+
+  @Test
+  public void logisticRegression() {
+    LogisticRegression lr = new LogisticRegression();
+    LogisticRegressionModel model = lr.fit(dataset);
+    model.transform(dataset).registerTempTable("prediction");
+    DataFrame predictions = jsql.sql("SELECT label, score, prediction FROM prediction");
+    predictions.collectAsList();
+  }
+
+  @Test
+  public void logisticRegressionWithSetters() {
+    LogisticRegression lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(1.0);
+    LogisticRegressionModel model = lr.fit(dataset);
+    model.transform(dataset, model.threshold().w(0.8)) // overwrite threshold
+      .registerTempTable("prediction");
+    DataFrame predictions = jsql.sql("SELECT label, score, prediction FROM prediction");
+    predictions.collectAsList();
+  }
+
+  @Test
+  public void logisticRegressionFitWithVarargs() {
+    LogisticRegression lr = new LogisticRegression();
+    lr.fit(dataset, lr.maxIter().w(10), lr.regParam().w(1.0));
+  }
+}
diff --git a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java
new file mode 100644
index 0000000000000..074b58c07df7a
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.tuning;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator;
+import org.apache.spark.ml.param.ParamMap;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.SQLContext;
+import static org.apache.spark.mllib.classification.LogisticRegressionSuite.generateLogisticInputAsList;
+
+public class JavaCrossValidatorSuite implements Serializable {
+
+  private transient JavaSparkContext jsc;
+  private transient SQLContext jsql;
+  private transient DataFrame dataset;
+
+  @Before
+  public void setUp() {
+    jsc = new JavaSparkContext("local", "JavaCrossValidatorSuite");
+    jsql = new SQLContext(jsc);
+    List<LabeledPoint> points = generateLogisticInputAsList(1.0, 1.0, 100, 42);
+    dataset = jsql.applySchema(jsc.parallelize(points, 2), LabeledPoint.class);
+  }
+
+  @After
+  public void tearDown() {
+    jsc.stop();
+    jsc = null;
+  }
+
+  @Test
+  public void crossValidationWithLogisticRegression() {
+    LogisticRegression lr = new LogisticRegression();
+    ParamMap[] lrParamMaps = new ParamGridBuilder()
+      .addGrid(lr.regParam(), new double[] {0.001, 1000.0})
+      .addGrid(lr.maxIter(), new int[] {0, 10})
+      .build();
+    BinaryClassificationEvaluator eval = new BinaryClassificationEvaluator();
+    CrossValidator cv = new CrossValidator()
+      .setEstimator(lr)
+      .setEstimatorParamMaps(lrParamMaps)
+      .setEvaluator(eval)
+      .setNumFolds(3);
+    CrossValidatorModel cvModel = cv.fit(dataset);
+    ParamMap bestParamMap = cvModel.bestModel().fittingParamMap();
+    Assert.assertEquals(0.001, bestParamMap.apply(lr.regParam()));
+    Assert.assertEquals(10, bestParamMap.apply(lr.maxIter()));
+  }
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
new file mode 100644
index 0000000000000..dc10aa67c7c1f
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.clustering;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+
+import org.apache.spark.api.java.JavaRDD;
+import scala.Tuple2;
+
+import org.junit.After;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertArrayEquals;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.Matrix;
+import org.apache.spark.mllib.linalg.Vector;
+
+
+public class JavaLDASuite implements Serializable {
+  private transient JavaSparkContext sc;
+
+  @Before
+  public void setUp() {
+    sc = new JavaSparkContext("local", "JavaLDA");
+    ArrayList<Tuple2<Long, Vector>> tinyCorpus = new ArrayList<Tuple2<Long, Vector>>();
+    for (int i = 0; i < LDASuite$.MODULE$.tinyCorpus().length; i++) {
+      tinyCorpus.add(new Tuple2<Long, Vector>((Long)LDASuite$.MODULE$.tinyCorpus()[i]._1(),
+          LDASuite$.MODULE$.tinyCorpus()[i]._2()));
+    }
+    JavaRDD<Tuple2<Long, Vector>> tmpCorpus = sc.parallelize(tinyCorpus, 2);
+    corpus = JavaPairRDD.fromJavaRDD(tmpCorpus);
+  }
+
+  @After
+  public void tearDown() {
+    sc.stop();
+    sc = null;
+  }
+
+  @Test
+  public void localLDAModel() {
+    LocalLDAModel model = new LocalLDAModel(LDASuite$.MODULE$.tinyTopics());
+
+    // Check: basic parameters
+    assertEquals(model.k(), tinyK);
+    assertEquals(model.vocabSize(), tinyVocabSize);
+    assertEquals(model.topicsMatrix(), tinyTopics);
+
+    // Check: describeTopics() with all terms
+    Tuple2<int[], double[]>[] fullTopicSummary = model.describeTopics();
+    assertEquals(fullTopicSummary.length, tinyK);
+    for (int i = 0; i < fullTopicSummary.length; i++) {
+      assertArrayEquals(fullTopicSummary[i]._1(), tinyTopicDescription[i]._1());
+      assertArrayEquals(fullTopicSummary[i]._2(), tinyTopicDescription[i]._2(), 1e-5);
+    }
+  }
+
+  @Test
+  public void distributedLDAModel() {
+    int k = 3;
+    double topicSmoothing = 1.2;
+    double termSmoothing = 1.2;
+
+    // Train a model
+    LDA lda = new LDA();
+    lda.setK(k)
+      .setDocConcentration(topicSmoothing)
+      .setTopicConcentration(termSmoothing)
+      .setMaxIterations(5)
+      .setSeed(12345);
+
+    DistributedLDAModel model = lda.run(corpus);
+
+    // Check: basic parameters
+    LocalLDAModel localModel = model.toLocal();
+    assertEquals(model.k(), k);
+    assertEquals(localModel.k(), k);
+    assertEquals(model.vocabSize(), tinyVocabSize);
+    assertEquals(localModel.vocabSize(), tinyVocabSize);
+    assertEquals(model.topicsMatrix(), localModel.topicsMatrix());
+
+    // Check: topic summaries
+    Tuple2<int[], double[]>[] roundedTopicSummary = model.describeTopics();
+    assertEquals(roundedTopicSummary.length, k);
+    Tuple2<int[], double[]>[] roundedLocalTopicSummary = localModel.describeTopics();
+    assertEquals(roundedLocalTopicSummary.length, k);
+
+    // Check: log probabilities
+    assert(model.logLikelihood() < 0.0);
+    assert(model.logPrior() < 0.0);
+  }
+
+  private static int tinyK = LDASuite$.MODULE$.tinyK();
+  private static int tinyVocabSize = LDASuite$.MODULE$.tinyVocabSize();
+  private static Matrix tinyTopics = LDASuite$.MODULE$.tinyTopics();
+  private static Tuple2<int[], double[]>[] tinyTopicDescription =
+      LDASuite$.MODULE$.tinyTopicDescription();
+  JavaPairRDD<Long, Vector> corpus;
+
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java
index 064263e02cd11..fbc26167ce66f 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java
@@ -49,6 +49,7 @@ public void tearDown() {
   public void tfIdf() {
     // The tests are to check Java compatibility.
     HashingTF tf = new HashingTF();
+    @SuppressWarnings("unchecked")
     JavaRDD<ArrayList<String>> documents = sc.parallelize(Lists.newArrayList(
       Lists.newArrayList("this is a sentence".split(" ")),
       Lists.newArrayList("this is another sentence".split(" ")),
@@ -68,6 +69,7 @@ public void tfIdf() {
   public void tfIdfMinimumDocumentFrequency() {
     // The tests are to check Java compatibility.
     HashingTF tf = new HashingTF();
+    @SuppressWarnings("unchecked")
     JavaRDD<ArrayList<String>> documents = sc.parallelize(Lists.newArrayList(
       Lists.newArrayList("this is a sentence".split(" ")),
       Lists.newArrayList("this is another sentence".split(" ")),
diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
new file mode 100644
index 0000000000000..851707c8a19c4
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.fpm;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import com.google.common.collect.Lists;
+import static org.junit.Assert.*;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+public class JavaFPGrowthSuite implements Serializable {
+  private transient JavaSparkContext sc;
+
+  @Before
+  public void setUp() {
+    sc = new JavaSparkContext("local", "JavaFPGrowth");
+  }
+
+  @After
+  public void tearDown() {
+    sc.stop();
+    sc = null;
+  }
+
+  @Test
+  public void runFPGrowth() {
+
+    @SuppressWarnings("unchecked")
+    JavaRDD<ArrayList<String>> rdd = sc.parallelize(Lists.newArrayList(
+      Lists.newArrayList("r z h k p".split(" ")),
+      Lists.newArrayList("z y x w v u t s".split(" ")),
+      Lists.newArrayList("s x o n r".split(" ")),
+      Lists.newArrayList("x z y m t s q e".split(" ")),
+      Lists.newArrayList("z".split(" ")),
+      Lists.newArrayList("x z y r q t p".split(" "))), 2);
+
+    FPGrowth fpg = new FPGrowth();
+
+    FPGrowthModel<String> model6 = fpg
+      .setMinSupport(0.9)
+      .setNumPartitions(1)
+      .run(rdd);
+    assertEquals(0, model6.javaFreqItemsets().count());
+
+    FPGrowthModel<String> model3 = fpg
+      .setMinSupport(0.5)
+      .setNumPartitions(2)
+      .run(rdd);
+    assertEquals(18, model3.javaFreqItemsets().count());
+
+    FPGrowthModel<String> model2 = fpg
+      .setMinSupport(0.3)
+      .setNumPartitions(4)
+      .run(rdd);
+    assertEquals(54, model2.javaFreqItemsets().count());
+
+    FPGrowthModel<String> model1 = fpg
+      .setMinSupport(0.1)
+      .setNumPartitions(8)
+      .run(rdd);
+    assertEquals(625, model1.javaFreqItemsets().count());
+  }
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaMatricesSuite.java b/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaMatricesSuite.java
new file mode 100644
index 0000000000000..3349c5022423a
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaMatricesSuite.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.linalg;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+
+import java.io.Serializable;
+import java.util.Random;
+
+public class JavaMatricesSuite implements Serializable {
+
+    @Test
+    public void randMatrixConstruction() {
+        Random rng = new Random(24);
+        Matrix r = Matrices.rand(3, 4, rng);
+        rng.setSeed(24);
+        DenseMatrix dr = DenseMatrix.rand(3, 4, rng);
+        assertArrayEquals(r.toArray(), dr.toArray(), 0.0);
+
+        rng.setSeed(24);
+        Matrix rn = Matrices.randn(3, 4, rng);
+        rng.setSeed(24);
+        DenseMatrix drn = DenseMatrix.randn(3, 4, rng);
+        assertArrayEquals(rn.toArray(), drn.toArray(), 0.0);
+
+        rng.setSeed(24);
+        Matrix s = Matrices.sprand(3, 4, 0.5, rng);
+        rng.setSeed(24);
+        SparseMatrix sr = SparseMatrix.sprand(3, 4, 0.5, rng);
+        assertArrayEquals(s.toArray(), sr.toArray(), 0.0);
+
+        rng.setSeed(24);
+        Matrix sn = Matrices.sprandn(3, 4, 0.5, rng);
+        rng.setSeed(24);
+        SparseMatrix srn = SparseMatrix.sprandn(3, 4, 0.5, rng);
+        assertArrayEquals(sn.toArray(), srn.toArray(), 0.0);
+    }
+
+    @Test
+    public void identityMatrixConstruction() {
+        Matrix r = Matrices.eye(2);
+        DenseMatrix dr = DenseMatrix.eye(2);
+        SparseMatrix sr = SparseMatrix.speye(2);
+        assertArrayEquals(r.toArray(), dr.toArray(), 0.0);
+        assertArrayEquals(sr.toArray(), dr.toArray(), 0.0);
+        assertArrayEquals(r.toArray(), new double[]{1.0, 0.0, 0.0, 1.0}, 0.0);
+    }
+
+    @Test
+    public void diagonalMatrixConstruction() {
+        Vector v = Vectors.dense(1.0, 0.0, 2.0);
+        Vector sv = Vectors.sparse(3, new int[]{0, 2}, new double[]{1.0, 2.0});
+
+        Matrix m = Matrices.diag(v);
+        Matrix sm = Matrices.diag(sv);
+        DenseMatrix d = DenseMatrix.diag(v);
+        DenseMatrix sd = DenseMatrix.diag(sv);
+        SparseMatrix s = SparseMatrix.spdiag(v);
+        SparseMatrix ss = SparseMatrix.spdiag(sv);
+
+        assertArrayEquals(m.toArray(), sm.toArray(), 0.0);
+        assertArrayEquals(d.toArray(), sm.toArray(), 0.0);
+        assertArrayEquals(d.toArray(), sd.toArray(), 0.0);
+        assertArrayEquals(sd.toArray(), s.toArray(), 0.0);
+        assertArrayEquals(s.toArray(), ss.toArray(), 0.0);
+        assertArrayEquals(s.values(), ss.values(), 0.0);
+        assert(s.values().length == 2);
+        assert(ss.values().length == 2);
+        assert(s.colPtrs().length == 4);
+        assert(ss.colPtrs().length == 4);
+    }
+
+    @Test
+    public void zerosMatrixConstruction() {
+        Matrix z = Matrices.zeros(2, 2);
+        Matrix one = Matrices.ones(2, 2);
+        DenseMatrix dz = DenseMatrix.zeros(2, 2);
+        DenseMatrix done = DenseMatrix.ones(2, 2);
+
+        assertArrayEquals(z.toArray(), new double[]{0.0, 0.0, 0.0, 0.0}, 0.0);
+        assertArrayEquals(dz.toArray(), new double[]{0.0, 0.0, 0.0, 0.0}, 0.0);
+        assertArrayEquals(one.toArray(), new double[]{1.0, 1.0, 1.0, 1.0}, 0.0);
+        assertArrayEquals(done.toArray(), new double[]{1.0, 1.0, 1.0, 1.0}, 0.0);
+    }
+
+    @Test
+    public void sparseDenseConversion() {
+        int m = 3;
+        int n = 2;
+        double[] values = new double[]{1.0, 2.0, 4.0, 5.0};
+        double[] allValues = new double[]{1.0, 2.0, 0.0, 0.0, 4.0, 5.0};
+        int[] colPtrs = new int[]{0, 2, 4};
+        int[] rowIndices = new int[]{0, 1, 1, 2};
+
+        SparseMatrix spMat1 = new SparseMatrix(m, n, colPtrs, rowIndices, values);
+        DenseMatrix deMat1 = new DenseMatrix(m, n, allValues);
+
+        SparseMatrix spMat2 = deMat1.toSparse();
+        DenseMatrix deMat2 = spMat1.toDense();
+
+        assertArrayEquals(spMat1.toArray(), spMat2.toArray(), 0.0);
+        assertArrayEquals(deMat1.toArray(), deMat2.toArray(), 0.0);
+    }
+
+    @Test
+    public void concatenateMatrices() {
+        int m = 3;
+        int n = 2;
+
+        Random rng = new Random(42);
+        SparseMatrix spMat1 = SparseMatrix.sprand(m, n, 0.5, rng);
+        rng.setSeed(42);
+        DenseMatrix deMat1 = DenseMatrix.rand(m, n, rng);
+        Matrix deMat2 = Matrices.eye(3);
+        Matrix spMat2 = Matrices.speye(3);
+        Matrix deMat3 = Matrices.eye(2);
+        Matrix spMat3 = Matrices.speye(2);
+
+        Matrix spHorz = Matrices.horzcat(new Matrix[]{spMat1, spMat2});
+        Matrix deHorz1 = Matrices.horzcat(new Matrix[]{deMat1, deMat2});
+        Matrix deHorz2 = Matrices.horzcat(new Matrix[]{spMat1, deMat2});
+        Matrix deHorz3 = Matrices.horzcat(new Matrix[]{deMat1, spMat2});
+
+        assert(deHorz1.numRows() == 3);
+        assert(deHorz2.numRows() == 3);
+        assert(deHorz3.numRows() == 3);
+        assert(spHorz.numRows() == 3);
+        assert(deHorz1.numCols() == 5);
+        assert(deHorz2.numCols() == 5);
+        assert(deHorz3.numCols() == 5);
+        assert(spHorz.numCols() == 5);
+
+        Matrix spVert = Matrices.vertcat(new Matrix[]{spMat1, spMat3});
+        Matrix deVert1 = Matrices.vertcat(new Matrix[]{deMat1, deMat3});
+        Matrix deVert2 = Matrices.vertcat(new Matrix[]{spMat1, deMat3});
+        Matrix deVert3 = Matrices.vertcat(new Matrix[]{deMat1, spMat3});
+
+        assert(deVert1.numRows() == 5);
+        assert(deVert2.numRows() == 5);
+        assert(deVert3.numRows() == 5);
+        assert(spVert.numRows() == 5);
+        assert(deVert1.numCols() == 2);
+        assert(deVert2.numCols() == 2);
+        assert(deVert3.numCols() == 2);
+        assert(spVert.numCols() == 2);
+    }
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
index a725736ca1a58..fcc13c00cbdc5 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
@@ -69,6 +69,21 @@ public void testNormalRDD() {
     }
   }
 
+  @Test
+  public void testLNormalRDD() {
+    double mean = 4.0;
+    double std = 2.0;
+    long m = 1000L;
+    int p = 2;
+    long seed = 1L;
+    JavaDoubleRDD rdd1 = logNormalJavaRDD(sc, mean, std, m);
+    JavaDoubleRDD rdd2 = logNormalJavaRDD(sc, mean, std, m, p);
+    JavaDoubleRDD rdd3 = logNormalJavaRDD(sc, mean, std, m, p, seed);
+    for (JavaDoubleRDD rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+    }
+  }
+
   @Test
   public void testPoissonRDD() {
     double mean = 2.0;
@@ -83,6 +98,36 @@ public void testPoissonRDD() {
     }
   }
 
+  @Test
+  public void testExponentialRDD() {
+    double mean = 2.0;
+    long m = 1000L;
+    int p = 2;
+    long seed = 1L;
+    JavaDoubleRDD rdd1 = exponentialJavaRDD(sc, mean, m);
+    JavaDoubleRDD rdd2 = exponentialJavaRDD(sc, mean, m, p);
+    JavaDoubleRDD rdd3 = exponentialJavaRDD(sc, mean, m, p, seed);
+    for (JavaDoubleRDD rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+    }
+  }
+
+  @Test
+  public void testGammaRDD() {
+    double shape = 1.0;
+    double scale = 2.0;
+    long m = 1000L;
+    int p = 2;
+    long seed = 1L;
+    JavaDoubleRDD rdd1 = gammaJavaRDD(sc, shape, scale, m);
+    JavaDoubleRDD rdd2 = gammaJavaRDD(sc, shape, scale, m, p);
+    JavaDoubleRDD rdd3 = gammaJavaRDD(sc, shape, scale, m, p, seed);
+    for (JavaDoubleRDD rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+    }
+  }
+
+
   @Test
   @SuppressWarnings("unchecked")
   public void testUniformVectorRDD() {
@@ -115,6 +160,24 @@ public void testNormalVectorRDD() {
     }
   }
 
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testLogNormalVectorRDD() {
+    double mean = 4.0;
+    double std = 2.0;  
+    long m = 100L;
+    int n = 10;
+    int p = 2;
+    long seed = 1L;
+    JavaRDD<Vector> rdd1 = logNormalJavaVectorRDD(sc, mean, std, m, n);
+    JavaRDD<Vector> rdd2 = logNormalJavaVectorRDD(sc, mean, std, m, n, p);
+    JavaRDD<Vector> rdd3 = logNormalJavaVectorRDD(sc, mean, std, m, n, p, seed);
+    for (JavaRDD<Vector> rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+      Assert.assertEquals(n, rdd.first().size());
+    }
+  }
+
   @Test
   @SuppressWarnings("unchecked")
   public void testPoissonVectorRDD() {
@@ -131,4 +194,40 @@ public void testPoissonVectorRDD() {
       Assert.assertEquals(n, rdd.first().size());
     }
   }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testExponentialVectorRDD() {
+    double mean = 2.0;
+    long m = 100L;
+    int n = 10;
+    int p = 2;
+    long seed = 1L;
+    JavaRDD<Vector> rdd1 = exponentialJavaVectorRDD(sc, mean, m, n);
+    JavaRDD<Vector> rdd2 = exponentialJavaVectorRDD(sc, mean, m, n, p);
+    JavaRDD<Vector> rdd3 = exponentialJavaVectorRDD(sc, mean, m, n, p, seed);
+    for (JavaRDD<Vector> rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+      Assert.assertEquals(n, rdd.first().size());
+    }
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testGammaVectorRDD() {
+    double shape = 1.0;
+    double scale = 2.0;
+    long m = 100L;
+    int n = 10;
+    int p = 2;
+    long seed = 1L;
+    JavaRDD<Vector> rdd1 = gammaJavaVectorRDD(sc, shape, scale, m, n);
+    JavaRDD<Vector> rdd2 = gammaJavaVectorRDD(sc, shape, scale, m, n, p);
+    JavaRDD<Vector> rdd3 = gammaJavaVectorRDD(sc, shape, scale, m, n, p, seed);
+    for (JavaRDD<Vector> rdd: Lists.newArrayList(rdd1, rdd2, rdd3)) {
+      Assert.assertEquals(m, rdd.count());
+      Assert.assertEquals(n, rdd.first().size());
+    }
+  }
+
 }
diff --git a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
index f6ca9643227f8..af688c504cf1e 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
@@ -23,13 +23,14 @@
 import scala.Tuple2;
 import scala.Tuple3;
 
+import com.google.common.collect.Lists;
 import org.jblas.DoubleMatrix;
-
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
+import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 
@@ -47,61 +48,48 @@ public void tearDown() {
     sc = null;
   }
 
-  static void validatePrediction(
+  void validatePrediction(
       MatrixFactorizationModel model,
       int users,
       int products,
-      int features,
       DoubleMatrix trueRatings,
       double matchThreshold,
       boolean implicitPrefs,
       DoubleMatrix truePrefs) {
-    DoubleMatrix predictedU = new DoubleMatrix(users, features);
-    List<Tuple2<Object, double[]>> userFeatures = model.userFeatures().toJavaRDD().collect();
-    for (int i = 0; i < features; ++i) {
-      for (Tuple2<Object, double[]> userFeature : userFeatures) {
-        predictedU.put((Integer)userFeature._1(), i, userFeature._2()[i]);
-      }
-    }
-    DoubleMatrix predictedP = new DoubleMatrix(products, features);
-
-    List<Tuple2<Object, double[]>> productFeatures =
-      model.productFeatures().toJavaRDD().collect();
-    for (int i = 0; i < features; ++i) {
-      for (Tuple2<Object, double[]> productFeature : productFeatures) {
-        predictedP.put((Integer)productFeature._1(), i, productFeature._2()[i]);
+    List<Tuple2<Integer, Integer>> localUsersProducts =
+      Lists.newArrayListWithCapacity(users * products);
+    for (int u=0; u < users; ++u) {
+      for (int p=0; p < products; ++p) {
+        localUsersProducts.add(new Tuple2<Integer, Integer>(u, p));
       }
     }
-
-    DoubleMatrix predictedRatings = predictedU.mmul(predictedP.transpose());
-
+    JavaPairRDD<Integer, Integer> usersProducts = sc.parallelizePairs(localUsersProducts);
+    List<Rating> predictedRatings = model.predict(usersProducts).collect();
+    Assert.assertEquals(users * products, predictedRatings.size());
     if (!implicitPrefs) {
-      for (int u = 0; u < users; ++u) {
-        for (int p = 0; p < products; ++p) {
-          double prediction = predictedRatings.get(u, p);
-          double correct = trueRatings.get(u, p);
-          Assert.assertTrue(String.format("Prediction=%2.4f not below match threshold of %2.2f",
-                  prediction, matchThreshold), Math.abs(prediction - correct) < matchThreshold);
-        }
+      for (Rating r: predictedRatings) {
+        double prediction = r.rating();
+        double correct = trueRatings.get(r.user(), r.product());
+        Assert.assertTrue(String.format("Prediction=%2.4f not below match threshold of %2.2f",
+          prediction, matchThreshold), Math.abs(prediction - correct) < matchThreshold);
       }
     } else {
       // For implicit prefs we use the confidence-weighted RMSE to test
       // (ref Mahout's implicit ALS tests)
       double sqErr = 0.0;
       double denom = 0.0;
-      for (int u = 0; u < users; ++u) {
-        for (int p = 0; p < products; ++p) {
-          double prediction = predictedRatings.get(u, p);
-          double truePref = truePrefs.get(u, p);
-          double confidence = 1.0 + /* alpha = */ 1.0 * Math.abs(trueRatings.get(u, p));
-          double err = confidence * (truePref - prediction) * (truePref - prediction);
-          sqErr += err;
-          denom += confidence;
-        }
+      for (Rating r: predictedRatings) {
+        double prediction = r.rating();
+        double truePref = truePrefs.get(r.user(), r.product());
+        double confidence = 1.0 +
+          /* alpha = */ 1.0 * Math.abs(trueRatings.get(r.user(), r.product()));
+        double err = confidence * (truePref - prediction) * (truePref - prediction);
+        sqErr += err;
+        denom += confidence;
       }
       double rmse = Math.sqrt(sqErr / denom);
       Assert.assertTrue(String.format("Confidence-weighted RMSE=%2.4f above threshold of %2.2f",
-              rmse, matchThreshold), rmse < matchThreshold);
+        rmse, matchThreshold), rmse < matchThreshold);
     }
   }
 
@@ -116,7 +104,7 @@ public void runALSUsingStaticMethods() {
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
     MatrixFactorizationModel model = ALS.train(data.rdd(), features, iterations);
-    validatePrediction(model, users, products, features, testData._2(), 0.3, false, testData._3());
+    validatePrediction(model, users, products, testData._2(), 0.3, false, testData._3());
   }
 
   @Test
@@ -132,8 +120,8 @@ public void runALSUsingConstructor() {
 
     MatrixFactorizationModel model = new ALS().setRank(features)
       .setIterations(iterations)
-      .run(data.rdd());
-    validatePrediction(model, users, products, features, testData._2(), 0.3, false, testData._3());
+      .run(data);
+    validatePrediction(model, users, products, testData._2(), 0.3, false, testData._3());
   }
 
   @Test
@@ -147,7 +135,7 @@ public void runImplicitALSUsingStaticMethods() {
 
     JavaRDD<Rating> data = sc.parallelize(testData._1());
     MatrixFactorizationModel model = ALS.trainImplicit(data.rdd(), features, iterations);
-    validatePrediction(model, users, products, features, testData._2(), 0.4, true, testData._3());
+    validatePrediction(model, users, products, testData._2(), 0.4, true, testData._3());
   }
 
   @Test
@@ -165,7 +153,7 @@ public void runImplicitALSUsingConstructor() {
       .setIterations(iterations)
       .setImplicitPrefs(true)
       .run(data.rdd());
-    validatePrediction(model, users, products, features, testData._2(), 0.4, true, testData._3());
+    validatePrediction(model, users, products, testData._2(), 0.4, true, testData._3());
   }
 
   @Test
@@ -183,7 +171,7 @@ public void runImplicitALSWithNegativeWeight() {
       .setImplicitPrefs(true)
       .setSeed(8675309L)
       .run(data.rdd());
-    validatePrediction(model, users, products, features, testData._2(), 0.4, true, testData._3());
+    validatePrediction(model, users, products, testData._2(), 0.4, true, testData._3());
   }
 
   @Test
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java
new file mode 100644
index 0000000000000..d38fc91ace3cf
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression;
+
+import java.io.Serializable;
+import java.util.List;
+
+import scala.Tuple3;
+
+import com.google.common.collect.Lists;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaDoubleRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+public class JavaIsotonicRegressionSuite implements Serializable {
+  private transient JavaSparkContext sc;
+
+  private List<Tuple3<Double, Double, Double>> generateIsotonicInput(double[] labels) {
+    List<Tuple3<Double, Double, Double>> input = Lists.newArrayList();
+
+    for (int i = 1; i <= labels.length; i++) {
+      input.add(new Tuple3<Double, Double, Double>(labels[i-1], (double) i, 1d));
+    }
+
+    return input;
+  }
+
+  private IsotonicRegressionModel runIsotonicRegression(double[] labels) {
+    JavaRDD<Tuple3<Double, Double, Double>> trainRDD =
+      sc.parallelize(generateIsotonicInput(labels), 2).cache();
+
+    return new IsotonicRegression().run(trainRDD);
+  }
+
+  @Before
+  public void setUp() {
+    sc = new JavaSparkContext("local", "JavaLinearRegressionSuite");
+  }
+
+  @After
+  public void tearDown() {
+    sc.stop();
+    sc = null;
+  }
+
+  @Test
+  public void testIsotonicRegressionJavaRDD() {
+    IsotonicRegressionModel model =
+      runIsotonicRegression(new double[]{1, 2, 3, 3, 1, 6, 7, 8, 11, 9, 10, 12});
+
+    Assert.assertArrayEquals(
+      new double[] {1, 2, 7d/3, 7d/3, 6, 7, 8, 10, 10, 12}, model.predictions(), 1e-14);
+  }
+
+  @Test
+  public void testIsotonicRegressionPredictionsJavaRDD() {
+    IsotonicRegressionModel model =
+      runIsotonicRegression(new double[]{1, 2, 3, 3, 1, 6, 7, 8, 11, 9, 10, 12});
+
+    JavaDoubleRDD testRDD = sc.parallelizeDoubles(Lists.newArrayList(0.0, 1.0, 9.5, 12.0, 13.0));
+    List<Double> predictions = model.predict(testRDD).collect();
+
+    Assert.assertTrue(predictions.get(0) == 1d);
+    Assert.assertTrue(predictions.get(1) == 1d);
+    Assert.assertTrue(predictions.get(2) == 10d);
+    Assert.assertTrue(predictions.get(3) == 12d);
+    Assert.assertTrue(predictions.get(4) == 12d);
+  }
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java b/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java
index 2c281a1ee7157..9925aae441af9 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java
@@ -74,7 +74,7 @@ public void runDTUsingConstructor() {
         maxBins, categoricalFeaturesInfo);
 
     DecisionTree learner = new DecisionTree(strategy);
-    DecisionTreeModel model = learner.train(rdd.rdd());
+    DecisionTreeModel model = learner.run(rdd.rdd());
 
     int numCorrect = validatePrediction(arr, model);
     Assert.assertTrue(numCorrect == rdd.count());
diff --git a/mllib/src/test/resources/log4j.properties b/mllib/src/test/resources/log4j.properties
index a469badf603c6..9697237bfa1a3 100644
--- a/mllib/src/test/resources/log4j.properties
+++ b/mllib/src/test/resources/log4j.properties
@@ -15,10 +15,10 @@
 # limitations under the License.
 #
 
-# Set everything to be logged to the file core/target/unit-tests.log
+# Set everything to be logged to the file target/unit-tests.log
 log4j.rootCategory=INFO, file
 log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.append=false
+log4j.appender.file.append=true
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
diff --git a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
new file mode 100644
index 0000000000000..2f175fb117941
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import org.mockito.Matchers.{any, eq => meq}
+import org.mockito.Mockito.when
+import org.scalatest.FunSuite
+import org.scalatest.mock.MockitoSugar.mock
+
+import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.sql.DataFrame
+
+class PipelineSuite extends FunSuite {
+
+  abstract class MyModel extends Model[MyModel]
+
+  test("pipeline") {
+    val estimator0 = mock[Estimator[MyModel]]
+    val model0 = mock[MyModel]
+    val transformer1 = mock[Transformer]
+    val estimator2 = mock[Estimator[MyModel]]
+    val model2 = mock[MyModel]
+    val transformer3 = mock[Transformer]
+    val dataset0 = mock[DataFrame]
+    val dataset1 = mock[DataFrame]
+    val dataset2 = mock[DataFrame]
+    val dataset3 = mock[DataFrame]
+    val dataset4 = mock[DataFrame]
+
+    when(estimator0.fit(meq(dataset0), any[ParamMap]())).thenReturn(model0)
+    when(model0.transform(meq(dataset0), any[ParamMap]())).thenReturn(dataset1)
+    when(model0.parent).thenReturn(estimator0)
+    when(transformer1.transform(meq(dataset1), any[ParamMap])).thenReturn(dataset2)
+    when(estimator2.fit(meq(dataset2), any[ParamMap]())).thenReturn(model2)
+    when(model2.transform(meq(dataset2), any[ParamMap]())).thenReturn(dataset3)
+    when(model2.parent).thenReturn(estimator2)
+    when(transformer3.transform(meq(dataset3), any[ParamMap]())).thenReturn(dataset4)
+
+    val pipeline = new Pipeline()
+      .setStages(Array(estimator0, transformer1, estimator2, transformer3))
+    val pipelineModel = pipeline.fit(dataset0)
+
+    assert(pipelineModel.stages.size === 4)
+    assert(pipelineModel.stages(0).eq(model0))
+    assert(pipelineModel.stages(1).eq(transformer1))
+    assert(pipelineModel.stages(2).eq(model2))
+    assert(pipelineModel.stages(3).eq(transformer3))
+
+    assert(pipelineModel.getModel(estimator0).eq(model0))
+    assert(pipelineModel.getModel(estimator2).eq(model2))
+    intercept[NoSuchElementException] {
+      pipelineModel.getModel(mock[Estimator[MyModel]])
+    }
+    val output = pipelineModel.transform(dataset0)
+    assert(output.eq(dataset4))
+  }
+
+  test("pipeline with duplicate stages") {
+    val estimator = mock[Estimator[MyModel]]
+    val pipeline = new Pipeline()
+      .setStages(Array(estimator, estimator))
+    val dataset = mock[DataFrame]
+    intercept[IllegalArgumentException] {
+      pipeline.fit(dataset)
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
new file mode 100644
index 0000000000000..33e40dc7410cc
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.classification
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.classification.LogisticRegressionSuite.generateLogisticInput
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.sql.{SQLContext, DataFrame}
+
+class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext {
+
+  @transient var sqlContext: SQLContext = _
+  @transient var dataset: DataFrame = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    sqlContext = new SQLContext(sc)
+    dataset = sqlContext.createDataFrame(
+      sc.parallelize(generateLogisticInput(1.0, 1.0, 100, 42), 2))
+  }
+
+  test("logistic regression") {
+    val lr = new LogisticRegression
+    val model = lr.fit(dataset)
+    model.transform(dataset)
+      .select("label", "prediction")
+      .collect()
+  }
+
+  test("logistic regression with setters") {
+    val lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(1.0)
+    val model = lr.fit(dataset)
+    model.transform(dataset, model.threshold -> 0.8) // overwrite threshold
+      .select("label", "score", "prediction")
+      .collect()
+  }
+
+  test("logistic regression fit and transform with varargs") {
+    val lr = new LogisticRegression
+    val model = lr.fit(dataset, lr.maxIter -> 10, lr.regParam -> 1.0)
+    model.transform(dataset, model.threshold -> 0.8, model.scoreCol -> "probability")
+      .select("label", "probability", "prediction")
+      .collect()
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
new file mode 100644
index 0000000000000..1ce2987612378
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.param
+
+import org.scalatest.FunSuite
+
+class ParamsSuite extends FunSuite {
+
+  val solver = new TestParams()
+  import solver.{inputCol, maxIter}
+
+  test("param") {
+    assert(maxIter.name === "maxIter")
+    assert(maxIter.doc === "max number of iterations")
+    assert(maxIter.defaultValue.get === 100)
+    assert(maxIter.parent.eq(solver))
+    assert(maxIter.toString === "maxIter: max number of iterations (default: 100)")
+    assert(inputCol.defaultValue === None)
+  }
+
+  test("param pair") {
+    val pair0 = maxIter -> 5
+    val pair1 = maxIter.w(5)
+    val pair2 = ParamPair(maxIter, 5)
+    for (pair <- Seq(pair0, pair1, pair2)) {
+      assert(pair.param.eq(maxIter))
+      assert(pair.value === 5)
+    }
+  }
+
+  test("param map") {
+    val map0 = ParamMap.empty
+
+    assert(!map0.contains(maxIter))
+    assert(map0(maxIter) === maxIter.defaultValue.get)
+    map0.put(maxIter, 10)
+    assert(map0.contains(maxIter))
+    assert(map0(maxIter) === 10)
+
+    assert(!map0.contains(inputCol))
+    intercept[NoSuchElementException] {
+      map0(inputCol)
+    }
+    map0.put(inputCol -> "input")
+    assert(map0.contains(inputCol))
+    assert(map0(inputCol) === "input")
+
+    val map1 = map0.copy
+    val map2 = ParamMap(maxIter -> 10, inputCol -> "input")
+    val map3 = new ParamMap()
+      .put(maxIter, 10)
+      .put(inputCol, "input")
+    val map4 = ParamMap.empty ++ map0
+    val map5 = ParamMap.empty
+    map5 ++= map0
+
+    for (m <- Seq(map1, map2, map3, map4, map5)) {
+      assert(m.contains(maxIter))
+      assert(m(maxIter) === 10)
+      assert(m.contains(inputCol))
+      assert(m(inputCol) === "input")
+    }
+  }
+
+  test("params") {
+    val params = solver.params
+    assert(params.size === 2)
+    assert(params(0).eq(inputCol), "params must be ordered by name")
+    assert(params(1).eq(maxIter))
+    assert(solver.explainParams() === Seq(inputCol, maxIter).mkString("\n"))
+    assert(solver.getParam("inputCol").eq(inputCol))
+    assert(solver.getParam("maxIter").eq(maxIter))
+    intercept[NoSuchMethodException] {
+      solver.getParam("abc")
+    }
+    assert(!solver.isSet(inputCol))
+    intercept[IllegalArgumentException] {
+      solver.validate()
+    }
+    solver.validate(ParamMap(inputCol -> "input"))
+    solver.setInputCol("input")
+    assert(solver.isSet(inputCol))
+    assert(solver.getInputCol === "input")
+    solver.validate()
+    intercept[IllegalArgumentException] {
+      solver.validate(ParamMap(maxIter -> -10))
+    }
+    solver.setMaxIter(-10)
+    intercept[IllegalArgumentException] {
+      solver.validate()
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala b/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala
new file mode 100644
index 0000000000000..1a65883d78a71
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.param
+
+/** A subclass of Params for testing. */
+class TestParams extends Params {
+
+  val maxIter = new IntParam(this, "maxIter", "max number of iterations", Some(100))
+  def setMaxIter(value: Int): this.type = { set(maxIter, value); this }
+  def getMaxIter: Int = get(maxIter)
+
+  val inputCol = new Param[String](this, "inputCol", "input column name")
+  def setInputCol(value: String): this.type = { set(inputCol, value); this }
+  def getInputCol: String = get(inputCol)
+
+  override def validate(paramMap: ParamMap) = {
+    val m = this.paramMap ++ paramMap
+    require(m(maxIter) >= 0)
+    require(m.contains(inputCol))
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
new file mode 100644
index 0000000000000..cb7d57de35c34
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -0,0 +1,458 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.recommendation
+
+import java.util.Random
+
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+
+import com.github.fommil.netlib.BLAS.{getInstance => blas}
+import org.scalatest.FunSuite
+
+import org.apache.spark.Logging
+import org.apache.spark.ml.recommendation.ALS._
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{Row, SQLContext}
+
+class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging {
+
+  private var sqlContext: SQLContext = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    sqlContext = new SQLContext(sc)
+  }
+
+  test("LocalIndexEncoder") {
+    val random = new Random
+    for (numBlocks <- Seq(1, 2, 5, 10, 20, 50, 100)) {
+      val encoder = new LocalIndexEncoder(numBlocks)
+      val maxLocalIndex = Int.MaxValue / numBlocks
+      val tests = Seq.fill(5)((random.nextInt(numBlocks), random.nextInt(maxLocalIndex))) ++
+        Seq((0, 0), (numBlocks - 1, maxLocalIndex))
+      tests.foreach { case (blockId, localIndex) =>
+        val err = s"Failed with numBlocks=$numBlocks, blockId=$blockId, and localIndex=$localIndex."
+        val encoded = encoder.encode(blockId, localIndex)
+        assert(encoder.blockId(encoded) === blockId, err)
+        assert(encoder.localIndex(encoded) === localIndex, err)
+      }
+    }
+  }
+
+  test("normal equation construction with explict feedback") {
+    val k = 2
+    val ne0 = new NormalEquation(k)
+      .add(Array(1.0f, 2.0f), 3.0f)
+      .add(Array(4.0f, 5.0f), 6.0f)
+    assert(ne0.k === k)
+    assert(ne0.triK === k * (k + 1) / 2)
+    assert(ne0.n === 2)
+    // NumPy code that computes the expected values:
+    // A = np.matrix("1 2; 4 5")
+    // b = np.matrix("3; 6")
+    // ata = A.transpose() * A
+    // atb = A.transpose() * b
+    assert(Vectors.dense(ne0.ata) ~== Vectors.dense(17.0, 22.0, 29.0) relTol 1e-8)
+    assert(Vectors.dense(ne0.atb) ~== Vectors.dense(27.0, 36.0) relTol 1e-8)
+
+    val ne1 = new NormalEquation(2)
+      .add(Array(7.0f, 8.0f), 9.0f)
+    ne0.merge(ne1)
+    assert(ne0.n === 3)
+    // NumPy code that computes the expected values:
+    // A = np.matrix("1 2; 4 5; 7 8")
+    // b = np.matrix("3; 6; 9")
+    // ata = A.transpose() * A
+    // atb = A.transpose() * b
+    assert(Vectors.dense(ne0.ata) ~== Vectors.dense(66.0, 78.0, 93.0) relTol 1e-8)
+    assert(Vectors.dense(ne0.atb) ~== Vectors.dense(90.0, 108.0) relTol 1e-8)
+
+    intercept[IllegalArgumentException] {
+      ne0.add(Array(1.0f), 2.0f)
+    }
+    intercept[IllegalArgumentException] {
+      ne0.add(Array(1.0f, 2.0f, 3.0f), 4.0f)
+    }
+    intercept[IllegalArgumentException] {
+      val ne2 = new NormalEquation(3)
+      ne0.merge(ne2)
+    }
+
+    ne0.reset()
+    assert(ne0.n === 0)
+    assert(ne0.ata.forall(_ == 0.0))
+    assert(ne0.atb.forall(_ == 0.0))
+  }
+
+  test("normal equation construction with implicit feedback") {
+    val k = 2
+    val alpha = 0.5
+    val ne0 = new NormalEquation(k)
+      .addImplicit(Array(-5.0f, -4.0f), -3.0f, alpha)
+      .addImplicit(Array(-2.0f, -1.0f), 0.0f, alpha)
+      .addImplicit(Array(1.0f, 2.0f), 3.0f, alpha)
+    assert(ne0.k === k)
+    assert(ne0.triK === k * (k + 1) / 2)
+    assert(ne0.n === 0) // addImplicit doesn't increase the count.
+    // NumPy code that computes the expected values:
+    // alpha = 0.5
+    // A = np.matrix("-5 -4; -2 -1; 1 2")
+    // b = np.matrix("-3; 0; 3")
+    // b1 = b > 0
+    // c = 1.0 + alpha * np.abs(b)
+    // C = np.diag(c.A1)
+    // I = np.eye(3)
+    // ata = A.transpose() * (C - I) * A
+    // atb = A.transpose() * C * b1
+    assert(Vectors.dense(ne0.ata) ~== Vectors.dense(39.0, 33.0, 30.0) relTol 1e-8)
+    assert(Vectors.dense(ne0.atb) ~== Vectors.dense(2.5, 5.0) relTol 1e-8)
+  }
+
+  test("CholeskySolver") {
+    val k = 2
+    val ne0 = new NormalEquation(k)
+      .add(Array(1.0f, 2.0f), 4.0f)
+      .add(Array(1.0f, 3.0f), 9.0f)
+      .add(Array(1.0f, 4.0f), 16.0f)
+    val ne1 = new NormalEquation(k)
+      .merge(ne0)
+
+    val chol = new CholeskySolver
+    val x0 = chol.solve(ne0, 0.0).map(_.toDouble)
+    // NumPy code that computes the expected solution:
+    // A = np.matrix("1 2; 1 3; 1 4")
+    // b = b = np.matrix("3; 6")
+    // x0 = np.linalg.lstsq(A, b)[0]
+    assert(Vectors.dense(x0) ~== Vectors.dense(-8.333333, 6.0) relTol 1e-6)
+
+    assert(ne0.n === 0)
+    assert(ne0.ata.forall(_ == 0.0))
+    assert(ne0.atb.forall(_ == 0.0))
+
+    val x1 = chol.solve(ne1, 0.5).map(_.toDouble)
+    // NumPy code that computes the expected solution, where lambda is scaled by n:
+    // x0 = np.linalg.solve(A.transpose() * A + 0.5 * 3 * np.eye(2), A.transpose() * b)
+    assert(Vectors.dense(x1) ~== Vectors.dense(-0.1155556, 3.28) relTol 1e-6)
+  }
+
+  test("RatingBlockBuilder") {
+    val emptyBuilder = new RatingBlockBuilder[Int]()
+    assert(emptyBuilder.size === 0)
+    val emptyBlock = emptyBuilder.build()
+    assert(emptyBlock.srcIds.isEmpty)
+    assert(emptyBlock.dstIds.isEmpty)
+    assert(emptyBlock.ratings.isEmpty)
+
+    val builder0 = new RatingBlockBuilder()
+      .add(Rating(0, 1, 2.0f))
+      .add(Rating(3, 4, 5.0f))
+    assert(builder0.size === 2)
+    val builder1 = new RatingBlockBuilder()
+      .add(Rating(6, 7, 8.0f))
+      .merge(builder0.build())
+    assert(builder1.size === 3)
+    val block = builder1.build()
+    val ratings = Seq.tabulate(block.size) { i =>
+      (block.srcIds(i), block.dstIds(i), block.ratings(i))
+    }.toSet
+    assert(ratings === Set((0, 1, 2.0f), (3, 4, 5.0f), (6, 7, 8.0f)))
+  }
+
+  test("UncompressedInBlock") {
+    val encoder = new LocalIndexEncoder(10)
+    val uncompressed = new UncompressedInBlockBuilder[Int](encoder)
+      .add(0, Array(1, 0, 2), Array(0, 1, 4), Array(1.0f, 2.0f, 3.0f))
+      .add(1, Array(3, 0), Array(2, 5), Array(4.0f, 5.0f))
+      .build()
+    assert(uncompressed.length === 5)
+    val records = Seq.tabulate(uncompressed.length) { i =>
+      val dstEncodedIndex = uncompressed.dstEncodedIndices(i)
+      val dstBlockId = encoder.blockId(dstEncodedIndex)
+      val dstLocalIndex = encoder.localIndex(dstEncodedIndex)
+      (uncompressed.srcIds(i), dstBlockId, dstLocalIndex, uncompressed.ratings(i))
+    }.toSet
+    val expected =
+      Set((1, 0, 0, 1.0f), (0, 0, 1, 2.0f), (2, 0, 4, 3.0f), (3, 1, 2, 4.0f), (0, 1, 5, 5.0f))
+    assert(records === expected)
+
+    val compressed = uncompressed.compress()
+    assert(compressed.size === 5)
+    assert(compressed.srcIds.toSeq === Seq(0, 1, 2, 3))
+    assert(compressed.dstPtrs.toSeq === Seq(0, 2, 3, 4, 5))
+    var decompressed = ArrayBuffer.empty[(Int, Int, Int, Float)]
+    var i = 0
+    while (i < compressed.srcIds.size) {
+      var j = compressed.dstPtrs(i)
+      while (j < compressed.dstPtrs(i + 1)) {
+        val dstEncodedIndex = compressed.dstEncodedIndices(j)
+        val dstBlockId = encoder.blockId(dstEncodedIndex)
+        val dstLocalIndex = encoder.localIndex(dstEncodedIndex)
+        decompressed += ((compressed.srcIds(i), dstBlockId, dstLocalIndex, compressed.ratings(j)))
+        j += 1
+      }
+      i += 1
+    }
+    assert(decompressed.toSet === expected)
+  }
+
+  /**
+   * Generates an explicit feedback dataset for testing ALS.
+   * @param numUsers number of users
+   * @param numItems number of items
+   * @param rank rank
+   * @param noiseStd the standard deviation of additive Gaussian noise on training data
+   * @param seed random seed
+   * @return (training, test)
+   */
+  def genExplicitTestData(
+      numUsers: Int,
+      numItems: Int,
+      rank: Int,
+      noiseStd: Double = 0.0,
+      seed: Long = 11L): (RDD[Rating[Int]], RDD[Rating[Int]]) = {
+    val trainingFraction = 0.6
+    val testFraction = 0.3
+    val totalFraction = trainingFraction + testFraction
+    val random = new Random(seed)
+    val userFactors = genFactors(numUsers, rank, random)
+    val itemFactors = genFactors(numItems, rank, random)
+    val training = ArrayBuffer.empty[Rating[Int]]
+    val test = ArrayBuffer.empty[Rating[Int]]
+    for ((userId, userFactor) <- userFactors; (itemId, itemFactor) <- itemFactors) {
+      val x = random.nextDouble()
+      if (x < totalFraction) {
+        val rating = blas.sdot(rank, userFactor, 1, itemFactor, 1)
+        if (x < trainingFraction) {
+          val noise = noiseStd * random.nextGaussian()
+          training += Rating(userId, itemId, rating + noise.toFloat)
+        } else {
+          test += Rating(userId, itemId, rating)
+        }
+      }
+    }
+    logInfo(s"Generated an explicit feedback dataset with ${training.size} ratings for training " +
+      s"and ${test.size} for test.")
+    (sc.parallelize(training, 2), sc.parallelize(test, 2))
+  }
+
+  /**
+   * Generates an implicit feedback dataset for testing ALS.
+   * @param numUsers number of users
+   * @param numItems number of items
+   * @param rank rank
+   * @param noiseStd the standard deviation of additive Gaussian noise on training data
+   * @param seed random seed
+   * @return (training, test)
+   */
+  def genImplicitTestData(
+      numUsers: Int,
+      numItems: Int,
+      rank: Int,
+      noiseStd: Double = 0.0,
+      seed: Long = 11L): (RDD[Rating[Int]], RDD[Rating[Int]]) = {
+    // The assumption of the implicit feedback model is that unobserved ratings are more likely to
+    // be negatives.
+    val positiveFraction = 0.8
+    val negativeFraction = 1.0 - positiveFraction
+    val trainingFraction = 0.6
+    val testFraction = 0.3
+    val totalFraction = trainingFraction + testFraction
+    val random = new Random(seed)
+    val userFactors = genFactors(numUsers, rank, random)
+    val itemFactors = genFactors(numItems, rank, random)
+    val training = ArrayBuffer.empty[Rating[Int]]
+    val test = ArrayBuffer.empty[Rating[Int]]
+    for ((userId, userFactor) <- userFactors; (itemId, itemFactor) <- itemFactors) {
+      val rating = blas.sdot(rank, userFactor, 1, itemFactor, 1)
+      val threshold = if (rating > 0) positiveFraction else negativeFraction
+      val observed = random.nextDouble() < threshold
+      if (observed) {
+        val x = random.nextDouble()
+        if (x < totalFraction) {
+          if (x < trainingFraction) {
+            val noise = noiseStd * random.nextGaussian()
+            training += Rating(userId, itemId, rating + noise.toFloat)
+          } else {
+            test += Rating(userId, itemId, rating)
+          }
+        }
+      }
+    }
+    logInfo(s"Generated an implicit feedback dataset with ${training.size} ratings for training " +
+      s"and ${test.size} for test.")
+    (sc.parallelize(training, 2), sc.parallelize(test, 2))
+  }
+
+  /**
+   * Generates random user/item factors, with i.i.d. values drawn from U(a, b).
+   * @param size number of users/items
+   * @param rank number of features
+   * @param random random number generator
+   * @param a min value of the support (default: -1)
+   * @param b max value of the support (default: 1)
+   * @return a sequence of (ID, factors) pairs
+   */
+  private def genFactors(
+      size: Int,
+      rank: Int,
+      random: Random,
+      a: Float = -1.0f,
+      b: Float = 1.0f): Seq[(Int, Array[Float])] = {
+    require(size > 0 && size < Int.MaxValue / 3)
+    require(b > a)
+    val ids = mutable.Set.empty[Int]
+    while (ids.size < size) {
+      ids += random.nextInt()
+    }
+    val width = b - a
+    ids.toSeq.sorted.map(id => (id, Array.fill(rank)(a + random.nextFloat() * width)))
+  }
+
+  /**
+   * Test ALS using the given training/test splits and parameters.
+   * @param training training dataset
+   * @param test test dataset
+   * @param rank rank of the matrix factorization
+   * @param maxIter max number of iterations
+   * @param regParam regularization constant
+   * @param implicitPrefs whether to use implicit preference
+   * @param numUserBlocks number of user blocks
+   * @param numItemBlocks number of item blocks
+   * @param targetRMSE target test RMSE
+   */
+  def testALS(
+      training: RDD[Rating[Int]],
+      test: RDD[Rating[Int]],
+      rank: Int,
+      maxIter: Int,
+      regParam: Double,
+      implicitPrefs: Boolean = false,
+      numUserBlocks: Int = 2,
+      numItemBlocks: Int = 3,
+      targetRMSE: Double = 0.05): Unit = {
+    val sqlContext = this.sqlContext
+    import sqlContext.implicits._
+    val als = new ALS()
+      .setRank(rank)
+      .setRegParam(regParam)
+      .setImplicitPrefs(implicitPrefs)
+      .setNumUserBlocks(numUserBlocks)
+      .setNumItemBlocks(numItemBlocks)
+    val alpha = als.getAlpha
+    val model = als.fit(training)
+    val predictions = model.transform(test)
+      .select("rating", "prediction")
+      .map { case Row(rating: Float, prediction: Float) =>
+        (rating.toDouble, prediction.toDouble)
+      }
+    val rmse =
+      if (implicitPrefs) {
+        // TODO: Use a better (rank-based?) evaluation metric for implicit feedback.
+        // We limit the ratings and the predictions to interval [0, 1] and compute the weighted RMSE
+        // with the confidence scores as weights.
+        val (totalWeight, weightedSumSq) = predictions.map { case (rating, prediction) =>
+          val confidence = 1.0 + alpha * math.abs(rating)
+          val rating01 = math.max(math.min(rating, 1.0), 0.0)
+          val prediction01 = math.max(math.min(prediction, 1.0), 0.0)
+          val err = prediction01 - rating01
+          (confidence, confidence * err * err)
+        }.reduce { case ((c0, e0), (c1, e1)) =>
+          (c0 + c1, e0 + e1)
+        }
+        math.sqrt(weightedSumSq / totalWeight)
+      } else {
+        val mse = predictions.map { case (rating, prediction) =>
+          val err = rating - prediction
+          err * err
+        }.mean()
+        math.sqrt(mse)
+      }
+    logInfo(s"Test RMSE is $rmse.")
+    assert(rmse < targetRMSE)
+  }
+
+  test("exact rank-1 matrix") {
+    val (training, test) = genExplicitTestData(numUsers = 20, numItems = 40, rank = 1)
+    testALS(training, test, maxIter = 1, rank = 1, regParam = 1e-5, targetRMSE = 0.001)
+    testALS(training, test, maxIter = 1, rank = 2, regParam = 1e-5, targetRMSE = 0.001)
+  }
+
+  test("approximate rank-1 matrix") {
+    val (training, test) =
+      genExplicitTestData(numUsers = 20, numItems = 40, rank = 1, noiseStd = 0.01)
+    testALS(training, test, maxIter = 2, rank = 1, regParam = 0.01, targetRMSE = 0.02)
+    testALS(training, test, maxIter = 2, rank = 2, regParam = 0.01, targetRMSE = 0.02)
+  }
+
+  test("approximate rank-2 matrix") {
+    val (training, test) =
+      genExplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01)
+    testALS(training, test, maxIter = 4, rank = 2, regParam = 0.01, targetRMSE = 0.03)
+    testALS(training, test, maxIter = 4, rank = 3, regParam = 0.01, targetRMSE = 0.03)
+  }
+
+  test("different block settings") {
+    val (training, test) =
+      genExplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01)
+    for ((numUserBlocks, numItemBlocks) <- Seq((1, 1), (1, 2), (2, 1), (2, 2))) {
+      testALS(training, test, maxIter = 4, rank = 3, regParam = 0.01, targetRMSE = 0.03,
+        numUserBlocks = numUserBlocks, numItemBlocks = numItemBlocks)
+    }
+  }
+
+  test("more blocks than ratings") {
+    val (training, test) =
+      genExplicitTestData(numUsers = 4, numItems = 4, rank = 1)
+    testALS(training, test, maxIter = 2, rank = 1, regParam = 1e-4, targetRMSE = 0.002,
+     numItemBlocks = 5, numUserBlocks = 5)
+  }
+
+  test("implicit feedback") {
+    val (training, test) =
+      genImplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01)
+    testALS(training, test, maxIter = 4, rank = 2, regParam = 0.01, implicitPrefs = true,
+      targetRMSE = 0.3)
+  }
+
+  test("using generic ID types") {
+    val (ratings, _) = genImplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01)
+
+    val longRatings = ratings.map(r => Rating(r.user.toLong, r.item.toLong, r.rating))
+    val (longUserFactors, _) = ALS.train(longRatings, rank = 2, maxIter = 4)
+    assert(longUserFactors.first()._1.getClass === classOf[Long])
+
+    val strRatings = ratings.map(r => Rating(r.user.toString, r.item.toString, r.rating))
+    val (strUserFactors, _) = ALS.train(strRatings, rank = 2, maxIter = 4)
+    assert(strUserFactors.first()._1.getClass === classOf[String])
+  }
+
+  test("nonnegative constraint") {
+    val (ratings, _) = genImplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01)
+    val (userFactors, itemFactors) = ALS.train(ratings, rank = 2, maxIter = 4, nonnegative = true)
+    def isNonnegative(factors: RDD[(Int, Array[Float])]): Boolean = {
+      factors.values.map { _.forall(_ >= 0.0) }.reduce(_ && _)
+    }
+    assert(isNonnegative(userFactors))
+    assert(isNonnegative(itemFactors))
+    // TODO: Validate the solution.
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
new file mode 100644
index 0000000000000..761ea821ef7c6
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.tuning
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.ml.classification.LogisticRegression
+import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
+import org.apache.spark.mllib.classification.LogisticRegressionSuite.generateLogisticInput
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.sql.{SQLContext, DataFrame}
+
+class CrossValidatorSuite extends FunSuite with MLlibTestSparkContext {
+
+  @transient var dataset: DataFrame = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    val sqlContext = new SQLContext(sc)
+    dataset = sqlContext.createDataFrame(
+      sc.parallelize(generateLogisticInput(1.0, 1.0, 100, 42), 2))
+  }
+
+  test("cross validation with logistic regression") {
+    val lr = new LogisticRegression
+    val lrParamMaps = new ParamGridBuilder()
+      .addGrid(lr.regParam, Array(0.001, 1000.0))
+      .addGrid(lr.maxIter, Array(0, 10))
+      .build()
+    val eval = new BinaryClassificationEvaluator
+    val cv = new CrossValidator()
+      .setEstimator(lr)
+      .setEstimatorParamMaps(lrParamMaps)
+      .setEvaluator(eval)
+      .setNumFolds(3)
+    val cvModel = cv.fit(dataset)
+    val bestParamMap = cvModel.bestModel.fittingParamMap
+    assert(bestParamMap(lr.regParam) === 0.001)
+    assert(bestParamMap(lr.maxIter) === 10)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/ParamGridBuilderSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/ParamGridBuilderSuite.scala
new file mode 100644
index 0000000000000..20aa100112bfe
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/ParamGridBuilderSuite.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.tuning
+
+import scala.collection.mutable
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.ml.param.{ParamMap, TestParams}
+
+class ParamGridBuilderSuite extends FunSuite {
+
+  val solver = new TestParams()
+  import solver.{inputCol, maxIter}
+
+  test("param grid builder") {
+    def validateGrid(maps: Array[ParamMap], expected: mutable.Set[(Int, String)]): Unit = {
+      assert(maps.size === expected.size)
+      maps.foreach { m =>
+        val tuple = (m(maxIter), m(inputCol))
+        assert(expected.contains(tuple))
+        expected.remove(tuple)
+      }
+      assert(expected.isEmpty)
+    }
+
+    val maps0 = new ParamGridBuilder()
+      .baseOn(maxIter -> 10)
+      .addGrid(inputCol, Array("input0", "input1"))
+      .build()
+    val expected0 = mutable.Set(
+      (10, "input0"),
+      (10, "input1"))
+    validateGrid(maps0, expected0)
+
+    val maps1 = new ParamGridBuilder()
+      .baseOn(ParamMap(maxIter -> 5, inputCol -> "input")) // will be overwritten
+      .addGrid(maxIter, Array(10, 20))
+      .addGrid(inputCol, Array("input0", "input1"))
+      .build()
+    val expected1 = mutable.Set(
+      (10, "input0"),
+      (20, "input0"),
+      (10, "input1"),
+      (20, "input1"))
+    validateGrid(maps1, expected1)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
index e954baaf7d91e..d2b40f2cae020 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
@@ -17,16 +17,19 @@
 
 package org.apache.spark.mllib.classification
 
-import scala.util.Random
 import scala.collection.JavaConversions._
+import scala.util.Random
+import scala.util.control.Breaks._
 
 import org.scalatest.FunSuite
 import org.scalatest.Matchers
 
-import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.regression._
-import org.apache.spark.mllib.util.{LocalClusterSparkContext, LocalSparkContext}
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
 import org.apache.spark.mllib.util.TestingUtils._
+import org.apache.spark.util.Utils
+
 
 object LogisticRegressionSuite {
 
@@ -55,9 +58,117 @@ object LogisticRegressionSuite {
     val testData = (0 until nPoints).map(i => LabeledPoint(y(i), Vectors.dense(Array(x1(i)))))
     testData
   }
+
+  /**
+   * Generates `k` classes multinomial synthetic logistic input in `n` dimensional space given the
+   * model weights and mean/variance of the features. The synthetic data will be drawn from
+   * the probability distribution constructed by weights using the following formula.
+   *
+   * P(y = 0 | x) = 1 / norm
+   * P(y = 1 | x) = exp(x * w_1) / norm
+   * P(y = 2 | x) = exp(x * w_2) / norm
+   * ...
+   * P(y = k-1 | x) = exp(x * w_{k-1}) / norm
+   * where norm = 1 + exp(x * w_1) + exp(x * w_2) + ... + exp(x * w_{k-1})
+   *
+   * @param weights matrix is flatten into a vector; as a result, the dimension of weights vector
+   *                will be (k - 1) * (n + 1) if `addIntercept == true`, and
+   *                if `addIntercept != true`, the dimension will be (k - 1) * n.
+   * @param xMean the mean of the generated features. Lots of time, if the features are not properly
+   *              standardized, the algorithm with poor implementation will have difficulty
+   *              to converge.
+   * @param xVariance the variance of the generated features.
+   * @param addIntercept whether to add intercept.
+   * @param nPoints the number of instance of generated data.
+   * @param seed the seed for random generator. For consistent testing result, it will be fixed.
+   */
+  def generateMultinomialLogisticInput(
+      weights: Array[Double],
+      xMean: Array[Double],
+      xVariance: Array[Double],
+      addIntercept: Boolean,
+      nPoints: Int,
+      seed: Int): Seq[LabeledPoint] = {
+    val rnd = new Random(seed)
+
+    val xDim = xMean.size
+    val xWithInterceptsDim = if (addIntercept) xDim + 1 else xDim
+    val nClasses = weights.size / xWithInterceptsDim + 1
+
+    val x = Array.fill[Vector](nPoints)(Vectors.dense(Array.fill[Double](xDim)(rnd.nextGaussian())))
+
+    x.map(vector => {
+      // This doesn't work if `vector` is a sparse vector.
+      val vectorArray = vector.toArray
+      var i = 0
+      while (i < vectorArray.size) {
+        vectorArray(i) = vectorArray(i) * math.sqrt(xVariance(i)) + xMean(i)
+        i += 1
+      }
+    })
+
+    val y = (0 until nPoints).map { idx =>
+      val xArray = x(idx).toArray
+      val margins = Array.ofDim[Double](nClasses)
+      val probs = Array.ofDim[Double](nClasses)
+
+      for (i <- 0 until nClasses - 1) {
+        for (j <- 0 until xDim) margins(i + 1) += weights(i * xWithInterceptsDim + j) * xArray(j)
+        if (addIntercept) margins(i + 1) += weights((i + 1) * xWithInterceptsDim - 1)
+      }
+      // Preventing the overflow when we compute the probability
+      val maxMargin = margins.max
+      if (maxMargin > 0) for (i <-0 until nClasses) margins(i) -= maxMargin
+
+      // Computing the probabilities for each class from the margins.
+      val norm = {
+        var temp = 0.0
+        for (i <- 0 until nClasses) {
+          probs(i) = math.exp(margins(i))
+          temp += probs(i)
+        }
+        temp
+      }
+      for (i <-0 until nClasses) probs(i) /= norm
+
+      // Compute the cumulative probability so we can generate a random number and assign a label.
+      for (i <- 1 until nClasses) probs(i) += probs(i - 1)
+      val p = rnd.nextDouble()
+      var y = 0
+      breakable {
+        for (i <- 0 until nClasses) {
+          if (p < probs(i)) {
+            y = i
+            break
+          }
+        }
+      }
+      y
+    }
+
+    val testData = (0 until nPoints).map(i => LabeledPoint(y(i), x(i)))
+    testData
+  }
+
+  /** Binary labels, 3 features */
+  private val binaryModel = new LogisticRegressionModel(
+    weights = Vectors.dense(0.1, 0.2, 0.3), intercept = 0.5, numFeatures = 3, numClasses = 2)
+
+  /** 3 classes, 2 features */
+  private val multiclassModel = new LogisticRegressionModel(
+    weights = Vectors.dense(0.1, 0.2, 0.3, 0.4), intercept = 1.0, numFeatures = 2, numClasses = 3)
+
+  private def checkModelsEqual(a: LogisticRegressionModel, b: LogisticRegressionModel): Unit = {
+    assert(a.weights == b.weights)
+    assert(a.intercept == b.intercept)
+    assert(a.numClasses == b.numClasses)
+    assert(a.numFeatures == b.numFeatures)
+    assert(a.getThreshold == b.getThreshold)
+  }
 }
 
-class LogisticRegressionSuite extends FunSuite with LocalSparkContext with Matchers {
+
+class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext with Matchers {
   def validatePrediction(
       predictions: Seq[Double],
       input: Seq[LabeledPoint],
@@ -80,13 +191,16 @@ class LogisticRegressionSuite extends FunSuite with LocalSparkContext with Match
     val testRDD = sc.parallelize(testData, 2)
     testRDD.cache()
     val lr = new LogisticRegressionWithSGD().setIntercept(true)
-    lr.optimizer.setStepSize(10.0).setNumIterations(20)
+    lr.optimizer
+      .setStepSize(10.0)
+      .setRegParam(0.0)
+      .setNumIterations(20)
 
     val model = lr.run(testRDD)
 
     // Test the weights
-    assert(model.weights(0) ~== -1.52 relTol 0.01)
-    assert(model.intercept ~== 2.00 relTol 0.01)
+    assert(model.weights(0) ~== B relTol 0.02)
+    assert(model.intercept ~== A relTol 0.02)
 
     val validationData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 17)
     val validationRDD = sc.parallelize(validationData, 2)
@@ -112,10 +226,8 @@ class LogisticRegressionSuite extends FunSuite with LocalSparkContext with Match
     val model = lr.run(testRDD)
 
     // Test the weights
-    assert(model.weights(0) ~== -1.52 relTol 0.01)
-    assert(model.intercept ~== 2.00 relTol 0.01)
-    assert(model.weights(0) ~== model.weights(0) relTol 0.01)
-    assert(model.intercept ~== model.intercept relTol 0.01)
+    assert(model.weights(0) ~== B relTol 0.02)
+    assert(model.intercept ~== A relTol 0.02)
 
     val validationData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 17)
     val validationRDD = sc.parallelize(validationData, 2)
@@ -141,13 +253,16 @@ class LogisticRegressionSuite extends FunSuite with LocalSparkContext with Match
 
     // Use half as many iterations as the previous test.
     val lr = new LogisticRegressionWithSGD().setIntercept(true)
-    lr.optimizer.setStepSize(10.0).setNumIterations(10)
+    lr.optimizer
+      .setStepSize(10.0)
+      .setRegParam(0.0)
+      .setNumIterations(10)
 
     val model = lr.run(testRDD, initialWeights)
 
     // Test the weights
-    assert(model.weights(0) ~== -1.50 relTol 0.01)
-    assert(model.intercept ~== 1.97 relTol 0.01)
+    assert(model.weights(0) ~== B relTol 0.02)
+    assert(model.intercept ~== A relTol 0.02)
 
     val validationData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 17)
     val validationRDD = sc.parallelize(validationData, 2)
@@ -174,15 +289,16 @@ class LogisticRegressionSuite extends FunSuite with LocalSparkContext with Match
     // Use half as many iterations as the previous test.
     val lr = new LogisticRegressionWithSGD().setIntercept(true)
     lr.optimizer.
-      setStepSize(10.0).
+      setStepSize(1.0).
       setNumIterations(10).
       setRegParam(1.0)
 
     val model = lr.run(testRDD, initialWeights)
 
     // Test the weights
-    assert(model.weights(0) ~== -430000.0 relTol 20000.0)
-    assert(model.intercept ~== 370000.0 relTol 20000.0)
+    // With regularization, the resulting weights will be smaller.
+    assert(model.weights(0) ~== -0.14 relTol 0.02)
+    assert(model.intercept ~== 0.25 relTol 0.02)
 
     val validationData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 17)
     val validationRDD = sc.parallelize(validationData, 2)
@@ -212,8 +328,8 @@ class LogisticRegressionSuite extends FunSuite with LocalSparkContext with Match
     val model = lr.run(testRDD, initialWeights)
 
     // Test the weights
-    assert(model.weights(0) ~== -1.50 relTol 0.02)
-    assert(model.intercept ~== 1.97 relTol 0.02)
+    assert(model.weights(0) ~== B relTol 0.02)
+    assert(model.intercept ~== A relTol 0.02)
 
     val validationData = LogisticRegressionSuite.generateLogisticInput(A, B, nPoints, 17)
     val validationRDD = sc.parallelize(validationData, 2)
@@ -280,6 +396,138 @@ class LogisticRegressionSuite extends FunSuite with LocalSparkContext with Match
     assert(modelB1.weights(0) !~== modelB3.weights(0) * 1.0E6 absTol 0.1)
   }
 
+  test("multinomial logistic regression with LBFGS") {
+    val nPoints = 10000
+
+    /**
+     * The following weights and xMean/xVariance are computed from iris dataset with lambda = 0.2.
+     * As a result, we are actually drawing samples from probability distribution of built model.
+     */
+    val weights = Array(
+      -0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
+      -0.16624, -0.84355, -0.048509, -0.301789, 4.170682)
+
+    val xMean = Array(5.843, 3.057, 3.758, 1.199)
+    val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
+
+    val testData = LogisticRegressionSuite.generateMultinomialLogisticInput(
+      weights, xMean, xVariance, true, nPoints, 42)
+
+    val testRDD = sc.parallelize(testData, 2)
+    testRDD.cache()
+
+    val lr = new LogisticRegressionWithLBFGS().setIntercept(true).setNumClasses(3)
+    lr.optimizer.setConvergenceTol(1E-15).setNumIterations(200)
+
+    val model = lr.run(testRDD)
+
+    /**
+     * The following is the instruction to reproduce the model using R's glmnet package.
+     *
+     * First of all, using the following scala code to save the data into `path`.
+     *
+     *    testRDD.map(x => x.label+ ", " + x.features(0) + ", " + x.features(1) + ", " +
+     *      x.features(2) + ", " + x.features(3)).saveAsTextFile("path")
+     *
+     * Using the following R code to load the data and train the model using glmnet package.
+     *
+     *    library("glmnet")
+     *    data <- read.csv("path", header=FALSE)
+     *    label = factor(data$V1)
+     *    features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+     *    weights = coef(glmnet(features,label, family="multinomial", alpha = 0, lambda = 0))
+     *
+     * The model weights of mutinomial logstic regression in R have `K` set of linear predictors
+     * for `K` classes classification problem; however, only `K-1` set is required if the first
+     * outcome is chosen as a "pivot", and the other `K-1` outcomes are separately regressed against
+     * the pivot outcome. This can be done by subtracting the first weights from those `K-1` set
+     * weights. The mathematical discussion and proof can be found here:
+     * http://en.wikipedia.org/wiki/Multinomial_logistic_regression
+     *
+     *    weights1 = weights$`1` - weights$`0`
+     *    weights2 = weights$`2` - weights$`0`
+     *
+     *    > weights1
+     *    5 x 1 sparse Matrix of class "dgCMatrix"
+     *                    s0
+     *             2.6228269
+     *    data.V2 -0.5837166
+     *    data.V3  0.9285260
+     *    data.V4 -0.3783612
+     *    data.V5 -0.8123411
+     *    > weights2
+     *    5 x 1 sparse Matrix of class "dgCMatrix"
+     *                     s0
+     *             4.11197445
+     *    data.V2 -0.16918650
+     *    data.V3 -0.81104784
+     *    data.V4 -0.06463799
+     *    data.V5 -0.29198337
+     */
+
+    val weightsR = Vectors.dense(Array(
+      -0.5837166, 0.9285260, -0.3783612, -0.8123411, 2.6228269,
+      -0.1691865, -0.811048, -0.0646380, -0.2919834, 4.1119745))
+
+    assert(model.weights ~== weightsR relTol 0.05)
+
+    val validationData = LogisticRegressionSuite.generateMultinomialLogisticInput(
+      weights, xMean, xVariance, true, nPoints, 17)
+    val validationRDD = sc.parallelize(validationData, 2)
+    // The validation accuracy is not good since this model (even the original weights) doesn't have
+    // very steep curve in logistic function so that when we draw samples from distribution, it's
+    // very easy to assign to another labels. However, this prediction result is consistent to R.
+    validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData, 0.47)
+
+  }
+
+  test("model save/load: binary classification") {
+    // NOTE: This will need to be generalized once there are multiple model format versions.
+    val model = LogisticRegressionSuite.binaryModel
+
+    model.clearThreshold()
+    assert(model.getThreshold.isEmpty)
+
+    val tempDir = Utils.createTempDir()
+    val path = tempDir.toURI.toString
+
+    // Save model, load it back, and compare.
+    try {
+      model.save(sc, path)
+      val sameModel = LogisticRegressionModel.load(sc, path)
+      LogisticRegressionSuite.checkModelsEqual(model, sameModel)
+    } finally {
+      Utils.deleteRecursively(tempDir)
+    }
+
+    // Save model with threshold.
+    try {
+      model.setThreshold(0.7)
+      model.save(sc, path)
+      val sameModel = LogisticRegressionModel.load(sc, path)
+      LogisticRegressionSuite.checkModelsEqual(model, sameModel)
+    } finally {
+      Utils.deleteRecursively(tempDir)
+    }
+  }
+
+  test("model save/load: multiclass classification") {
+    // NOTE: This will need to be generalized once there are multiple model format versions.
+    val model = LogisticRegressionSuite.multiclassModel
+
+    val tempDir = Utils.createTempDir()
+    val path = tempDir.toURI.toString
+
+    // Save model, load it back, and compare.
+    try {
+      model.save(sc, path)
+      val sameModel = LogisticRegressionModel.load(sc, path)
+      LogisticRegressionSuite.checkModelsEqual(model, sameModel)
+    } finally {
+      Utils.deleteRecursively(tempDir)
+    }
+  }
+
 }
 
 class LogisticRegressionClusterSuite extends FunSuite with LocalClusterSparkContext {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
index 80989bc074e84..64dcc0fb9f82c 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
@@ -24,7 +24,9 @@ import org.scalatest.FunSuite
 import org.apache.spark.SparkException
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.util.{LocalClusterSparkContext, LocalSparkContext}
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
+import org.apache.spark.util.Utils
+
 
 object NaiveBayesSuite {
 
@@ -58,9 +60,21 @@ object NaiveBayesSuite {
       LabeledPoint(y, Vectors.dense(xi))
     }
   }
+
+  private val smallPi = Array(0.5, 0.3, 0.2).map(math.log)
+
+  private val smallTheta = Array(
+    Array(0.91, 0.03, 0.03, 0.03), // label 0
+    Array(0.03, 0.91, 0.03, 0.03), // label 1
+    Array(0.03, 0.03, 0.91, 0.03)  // label 2
+  ).map(_.map(math.log))
+
+  /** Binary labels, 3 features */
+  private val binaryModel = new NaiveBayesModel(labels = Array(0.0, 1.0), pi = Array(0.2, 0.8),
+    theta = Array(Array(0.1, 0.3, 0.6), Array(0.2, 0.4, 0.4)))
 }
 
-class NaiveBayesSuite extends FunSuite with LocalSparkContext {
+class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext {
 
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
     val numOfPredictions = predictions.zip(input).count {
@@ -74,12 +88,8 @@ class NaiveBayesSuite extends FunSuite with LocalSparkContext {
   test("Naive Bayes") {
     val nPoints = 10000
 
-    val pi = Array(0.5, 0.3, 0.2).map(math.log)
-    val theta = Array(
-      Array(0.91, 0.03, 0.03, 0.03), // label 0
-      Array(0.03, 0.91, 0.03, 0.03), // label 1
-      Array(0.03, 0.03, 0.91, 0.03)  // label 2
-    ).map(_.map(math.log))
+    val pi = NaiveBayesSuite.smallPi
+    val theta = NaiveBayesSuite.smallTheta
 
     val testData = NaiveBayesSuite.generateNaiveBayesInput(pi, theta, nPoints, 42)
     val testRDD = sc.parallelize(testData, 2)
@@ -123,6 +133,24 @@ class NaiveBayesSuite extends FunSuite with LocalSparkContext {
       NaiveBayes.train(sc.makeRDD(nan, 2))
     }
   }
+
+  test("model save/load") {
+    val model = NaiveBayesSuite.binaryModel
+
+    val tempDir = Utils.createTempDir()
+    val path = tempDir.toURI.toString
+
+    // Save model, load it back, and compare.
+    try {
+      model.save(sc, path)
+      val sameModel = NaiveBayesModel.load(sc, path)
+      assert(model.labels === sameModel.labels)
+      assert(model.pi === sameModel.pi)
+      assert(model.theta === sameModel.theta)
+    } finally {
+      Utils.deleteRecursively(tempDir)
+    }
+  }
 }
 
 class NaiveBayesClusterSuite extends FunSuite with LocalClusterSparkContext {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
index 65e5df58db4c7..6de098b383ba3 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
@@ -26,7 +26,8 @@ import org.scalatest.FunSuite
 import org.apache.spark.SparkException
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression._
-import org.apache.spark.mllib.util.{LocalClusterSparkContext, LocalSparkContext}
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
+import org.apache.spark.util.Utils
 
 object SVMSuite {
 
@@ -56,9 +57,12 @@ object SVMSuite {
     y.zip(x).map(p => LabeledPoint(p._1, Vectors.dense(p._2)))
   }
 
+  /** Binary labels, 3 features */
+  private val binaryModel = new SVMModel(weights = Vectors.dense(0.1, 0.2, 0.3), intercept = 0.5)
+
 }
 
-class SVMSuite extends FunSuite with LocalSparkContext {
+class SVMSuite extends FunSuite with MLlibTestSparkContext {
 
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
@@ -191,6 +195,38 @@ class SVMSuite extends FunSuite with LocalSparkContext {
     // Turning off data validation should not throw an exception
     new SVMWithSGD().setValidateData(false).run(testRDDInvalid)
   }
+
+  test("model save/load") {
+    // NOTE: This will need to be generalized once there are multiple model format versions.
+    val model = SVMSuite.binaryModel
+
+    model.clearThreshold()
+    assert(model.getThreshold.isEmpty)
+
+    val tempDir = Utils.createTempDir()
+    val path = tempDir.toURI.toString
+
+    // Save model, load it back, and compare.
+    try {
+      model.save(sc, path)
+      val sameModel = SVMModel.load(sc, path)
+      assert(model.weights == sameModel.weights)
+      assert(model.intercept == sameModel.intercept)
+      assert(sameModel.getThreshold.isEmpty)
+    } finally {
+      Utils.deleteRecursively(tempDir)
+    }
+
+    // Save model with threshold.
+    try {
+      model.setThreshold(0.7)
+      model.save(sc, path)
+      val sameModel2 = SVMModel.load(sc, path)
+      assert(model.getThreshold.get == sameModel2.getThreshold.get)
+    } finally {
+      Utils.deleteRecursively(tempDir)
+    }
+  }
 }
 
 class SVMClusterSuite extends FunSuite with LocalClusterSparkContext {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
new file mode 100644
index 0000000000000..8b3e6e5ce9249
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.classification
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.TestingUtils._
+import org.apache.spark.streaming.dstream.DStream
+import org.apache.spark.streaming.TestSuiteBase
+
+class StreamingLogisticRegressionSuite extends FunSuite with TestSuiteBase {
+
+  // use longer wait time to ensure job completion
+  override def maxWaitTimeMillis = 30000
+
+  // Test if we can accurately learn B for Y = logistic(BX) on streaming data
+  test("parameter accuracy") {
+
+    val nPoints = 100
+    val B = 1.5
+
+    // create model
+    val model = new StreamingLogisticRegressionWithSGD()
+      .setInitialWeights(Vectors.dense(0.0))
+      .setStepSize(0.2)
+      .setNumIterations(25)
+
+    // generate sequence of simulated data
+    val numBatches = 20
+    val input = (0 until numBatches).map { i =>
+      LogisticRegressionSuite.generateLogisticInput(0.0, B, nPoints, 42 * (i + 1))
+    }
+
+    // apply model training to input stream
+    val ssc = setupStreams(input, (inputDStream: DStream[LabeledPoint]) => {
+      model.trainOn(inputDStream)
+      inputDStream.count()
+    })
+    runStreams(ssc, numBatches, numBatches)
+
+    // check accuracy of final parameter estimates
+    assert(model.latestModel().weights(0) ~== B relTol 0.1)
+
+  }
+
+  // Test that parameter estimates improve when learning Y = logistic(BX) on streaming data
+  test("parameter convergence") {
+
+    val B = 1.5
+    val nPoints = 100
+
+    // create model
+    val model = new StreamingLogisticRegressionWithSGD()
+      .setInitialWeights(Vectors.dense(0.0))
+      .setStepSize(0.2)
+      .setNumIterations(25)
+
+    // generate sequence of simulated data
+    val numBatches = 20
+    val input = (0 until numBatches).map { i =>
+      LogisticRegressionSuite.generateLogisticInput(0.0, B, nPoints, 42 * (i + 1))
+    }
+
+    // create buffer to store intermediate fits
+    val history = new ArrayBuffer[Double](numBatches)
+
+    // apply model training to input stream, storing the intermediate results
+    // (we add a count to ensure the result is a DStream)
+    val ssc = setupStreams(input, (inputDStream: DStream[LabeledPoint]) => {
+      model.trainOn(inputDStream)
+      inputDStream.foreachRDD(x => history.append(math.abs(model.latestModel().weights(0) - B)))
+      inputDStream.count()
+    })
+    runStreams(ssc, numBatches, numBatches)
+
+    // compute change in error
+    val deltas = history.drop(1).zip(history.dropRight(1))
+    // check error stability (it always either shrinks, or increases with small tol)
+    assert(deltas.forall(x => (x._1 - x._2) <= 0.1))
+    // check that error shrunk on at least 2 batches
+    assert(deltas.map(x => if ((x._1 - x._2) < 0) 1 else 0).sum > 1)
+  }
+
+  // Test predictions on a stream
+  test("predictions") {
+
+    val B = 1.5
+    val nPoints = 100
+
+    // create model initialized with true weights
+    val model = new StreamingLogisticRegressionWithSGD()
+      .setInitialWeights(Vectors.dense(1.5))
+      .setStepSize(0.2)
+      .setNumIterations(25)
+
+    // generate sequence of simulated data for testing
+    val numBatches = 10
+    val testInput = (0 until numBatches).map { i =>
+      LogisticRegressionSuite.generateLogisticInput(0.0, B, nPoints, 42 * (i + 1))
+    }
+
+    // apply model predictions to test stream
+    val ssc = setupStreams(testInput, (inputDStream: DStream[LabeledPoint]) => {
+      model.predictOnValues(inputDStream.map(x => (x.label, x.features)))
+    })
+
+    // collect the output as (true, estimated) tuples
+    val output: Seq[Seq[(Double, Double)]] = runStreams(ssc, numBatches, numBatches)
+
+    // check that at least 60% of predictions are correct on all batches
+    val errors = output.map(batch => batch.map(p => math.abs(p._1 - p._2)).sum / nPoints)
+
+    assert(errors.forall(x => x <= 0.4))
+  }
+
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala
new file mode 100644
index 0000000000000..c2cd56ea40adc
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.clustering
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.linalg.{Vectors, Matrices}
+import org.apache.spark.mllib.stat.distribution.MultivariateGaussian
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+
+class GaussianMixtureSuite extends FunSuite with MLlibTestSparkContext {
+  test("single cluster") {
+    val data = sc.parallelize(Array(
+      Vectors.dense(6.0, 9.0),
+      Vectors.dense(5.0, 10.0),
+      Vectors.dense(4.0, 11.0)
+    ))
+    
+    // expectations
+    val Ew = 1.0
+    val Emu = Vectors.dense(5.0, 10.0)
+    val Esigma = Matrices.dense(2, 2, Array(2.0 / 3.0, -2.0 / 3.0, -2.0 / 3.0, 2.0 / 3.0))
+
+    val seeds = Array(314589, 29032897, 50181, 494821, 4660)
+    seeds.foreach { seed =>
+      val gmm = new GaussianMixture().setK(1).setSeed(seed).run(data)
+      assert(gmm.weights(0) ~== Ew absTol 1E-5)
+      assert(gmm.gaussians(0).mu ~== Emu absTol 1E-5)
+      assert(gmm.gaussians(0).sigma ~== Esigma absTol 1E-5)
+    }
+  }
+  
+  test("two clusters") {
+    val data = sc.parallelize(Array(
+      Vectors.dense(-5.1971), Vectors.dense(-2.5359), Vectors.dense(-3.8220),
+      Vectors.dense(-5.2211), Vectors.dense(-5.0602), Vectors.dense( 4.7118),
+      Vectors.dense( 6.8989), Vectors.dense( 3.4592), Vectors.dense( 4.6322),
+      Vectors.dense( 5.7048), Vectors.dense( 4.6567), Vectors.dense( 5.5026),
+      Vectors.dense( 4.5605), Vectors.dense( 5.2043), Vectors.dense( 6.2734)
+    ))
+  
+    // we set an initial gaussian to induce expected results
+    val initialGmm = new GaussianMixtureModel(
+      Array(0.5, 0.5),
+      Array(
+        new MultivariateGaussian(Vectors.dense(-1.0), Matrices.dense(1, 1, Array(1.0))),
+        new MultivariateGaussian(Vectors.dense(1.0), Matrices.dense(1, 1, Array(1.0)))
+      )
+    )
+    
+    val Ew = Array(1.0 / 3.0, 2.0 / 3.0)
+    val Emu = Array(Vectors.dense(-4.3673), Vectors.dense(5.1604))
+    val Esigma = Array(Matrices.dense(1, 1, Array(1.1098)), Matrices.dense(1, 1, Array(0.86644)))
+    
+    val gmm = new GaussianMixture()
+      .setK(2)
+      .setInitialModel(initialGmm)
+      .run(data)
+      
+    assert(gmm.weights(0) ~== Ew(0) absTol 1E-3)
+    assert(gmm.weights(1) ~== Ew(1) absTol 1E-3)
+    assert(gmm.gaussians(0).mu ~== Emu(0) absTol 1E-3)
+    assert(gmm.gaussians(1).mu ~== Emu(1) absTol 1E-3)
+    assert(gmm.gaussians(0).sigma ~== Esigma(0) absTol 1E-3)
+    assert(gmm.gaussians(1).sigma ~== Esigma(1) absTol 1E-3)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
index afa1f79b95a12..caee5917000aa 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
@@ -22,10 +22,10 @@ import scala.util.Random
 import org.scalatest.FunSuite
 
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
-import org.apache.spark.mllib.util.{LocalClusterSparkContext, LocalSparkContext}
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
 import org.apache.spark.mllib.util.TestingUtils._
 
-class KMeansSuite extends FunSuite with LocalSparkContext {
+class KMeansSuite extends FunSuite with MLlibTestSparkContext {
 
   import org.apache.spark.mllib.clustering.KMeans.{K_MEANS_PARALLEL, RANDOM}
 
@@ -90,6 +90,27 @@ class KMeansSuite extends FunSuite with LocalSparkContext {
     assert(model.clusterCenters.size === 3)
   }
 
+  test("deterministic initialization") {
+    // Create a large-ish set of points for clustering
+    val points = List.tabulate(1000)(n => Vectors.dense(n, n))
+    val rdd = sc.parallelize(points, 3)
+
+    for (initMode <- Seq(RANDOM, K_MEANS_PARALLEL)) {
+      // Create three deterministic models and compare cluster means
+      val model1 = KMeans.train(rdd, k = 10, maxIterations = 2, runs = 1,
+        initializationMode = initMode, seed = 42)
+      val centers1 = model1.clusterCenters
+
+      val model2 = KMeans.train(rdd, k = 10, maxIterations = 2, runs = 1,
+        initializationMode = initMode, seed = 42)
+      val centers2 = model2.clusterCenters
+
+      centers1.zip(centers2).foreach { case (c1, c2) =>
+        assert(c1 ~== c2 absTol 1E-14)
+      }
+    }
+  }
+
   test("single cluster with big dataset") {
     val smallData = Array(
       Vectors.dense(1.0, 2.0, 6.0),
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
new file mode 100644
index 0000000000000..302d751eb8a94
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.clustering
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.linalg.{DenseMatrix, Matrix, Vectors}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+
+class LDASuite extends FunSuite with MLlibTestSparkContext {
+
+  import LDASuite._
+
+  test("LocalLDAModel") {
+    val model = new LocalLDAModel(tinyTopics)
+
+    // Check: basic parameters
+    assert(model.k === tinyK)
+    assert(model.vocabSize === tinyVocabSize)
+    assert(model.topicsMatrix === tinyTopics)
+
+    // Check: describeTopics() with all terms
+    val fullTopicSummary = model.describeTopics()
+    assert(fullTopicSummary.size === tinyK)
+    fullTopicSummary.zip(tinyTopicDescription).foreach {
+      case ((algTerms, algTermWeights), (terms, termWeights)) =>
+        assert(algTerms === terms)
+        assert(algTermWeights === termWeights)
+    }
+
+    // Check: describeTopics() with some terms
+    val smallNumTerms = 3
+    val smallTopicSummary = model.describeTopics(maxTermsPerTopic = smallNumTerms)
+    smallTopicSummary.zip(tinyTopicDescription).foreach {
+      case ((algTerms, algTermWeights), (terms, termWeights)) =>
+        assert(algTerms === terms.slice(0, smallNumTerms))
+        assert(algTermWeights === termWeights.slice(0, smallNumTerms))
+    }
+  }
+
+  test("running and DistributedLDAModel") {
+    val k = 3
+    val topicSmoothing = 1.2
+    val termSmoothing = 1.2
+
+    // Train a model
+    val lda = new LDA()
+    lda.setK(k)
+      .setDocConcentration(topicSmoothing)
+      .setTopicConcentration(termSmoothing)
+      .setMaxIterations(5)
+      .setSeed(12345)
+    val corpus = sc.parallelize(tinyCorpus, 2)
+
+    val model: DistributedLDAModel = lda.run(corpus)
+
+    // Check: basic parameters
+    val localModel = model.toLocal
+    assert(model.k === k)
+    assert(localModel.k === k)
+    assert(model.vocabSize === tinyVocabSize)
+    assert(localModel.vocabSize === tinyVocabSize)
+    assert(model.topicsMatrix === localModel.topicsMatrix)
+
+    // Check: topic summaries
+    //  The odd decimal formatting and sorting is a hack to do a robust comparison.
+    val roundedTopicSummary = model.describeTopics().map { case (terms, termWeights) =>
+      // cut values to 3 digits after the decimal place
+      terms.zip(termWeights).map { case (term, weight) =>
+        ("%.3f".format(weight).toDouble, term.toInt)
+      }
+    }.sortBy(_.mkString(""))
+    val roundedLocalTopicSummary = localModel.describeTopics().map { case (terms, termWeights) =>
+      // cut values to 3 digits after the decimal place
+      terms.zip(termWeights).map { case (term, weight) =>
+        ("%.3f".format(weight).toDouble, term.toInt)
+      }
+    }.sortBy(_.mkString(""))
+    roundedTopicSummary.zip(roundedLocalTopicSummary).foreach { case (t1, t2) =>
+      assert(t1 === t2)
+    }
+
+    // Check: per-doc topic distributions
+    val topicDistributions = model.topicDistributions.collect()
+    //  Ensure all documents are covered.
+    assert(topicDistributions.size === tinyCorpus.size)
+    assert(tinyCorpus.map(_._1).toSet === topicDistributions.map(_._1).toSet)
+    //  Ensure we have proper distributions
+    topicDistributions.foreach { case (docId, topicDistribution) =>
+      assert(topicDistribution.size === tinyK)
+      assert(topicDistribution.toArray.sum ~== 1.0 absTol 1e-5)
+    }
+
+    // Check: log probabilities
+    assert(model.logLikelihood < 0.0)
+    assert(model.logPrior < 0.0)
+  }
+
+  test("vertex indexing") {
+    // Check vertex ID indexing and conversions.
+    val docIds = Array(0, 1, 2)
+    val docVertexIds = docIds
+    val termIds = Array(0, 1, 2)
+    val termVertexIds = Array(-1, -2, -3)
+    assert(docVertexIds.forall(i => !LDA.isTermVertex((i.toLong, 0))))
+    assert(termIds.map(LDA.term2index) === termVertexIds)
+    assert(termVertexIds.map(i => LDA.index2term(i.toLong)) === termIds)
+    assert(termVertexIds.forall(i => LDA.isTermVertex((i.toLong, 0))))
+  }
+}
+
+private[clustering] object LDASuite {
+
+  def tinyK: Int = 3
+  def tinyVocabSize: Int = 5
+  def tinyTopicsAsArray: Array[Array[Double]] = Array(
+    Array[Double](0.1, 0.2, 0.3, 0.4, 0.0), // topic 0
+    Array[Double](0.5, 0.05, 0.05, 0.1, 0.3), // topic 1
+    Array[Double](0.2, 0.2, 0.05, 0.05, 0.5) // topic 2
+  )
+  def tinyTopics: Matrix = new DenseMatrix(numRows = tinyVocabSize, numCols = tinyK,
+    values = tinyTopicsAsArray.fold(Array.empty[Double])(_ ++ _))
+  def tinyTopicDescription: Array[(Array[Int], Array[Double])] = tinyTopicsAsArray.map { topic =>
+    val (termWeights, terms) = topic.zipWithIndex.sortBy(-_._1).unzip
+    (terms.toArray, termWeights.toArray)
+  }
+
+  def tinyCorpus = Array(
+    Vectors.dense(1, 3, 0, 2, 8),
+    Vectors.dense(0, 2, 1, 0, 4),
+    Vectors.dense(2, 3, 12, 3, 1),
+    Vectors.dense(0, 3, 1, 9, 8),
+    Vectors.dense(1, 1, 4, 2, 6)
+  ).zipWithIndex.map { case (wordCounts, docId) => (docId.toLong, wordCounts) }
+  assert(tinyCorpus.forall(_._2.size == tinyVocabSize)) // sanity check for test data
+
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
new file mode 100644
index 0000000000000..03ecd9ca730be
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.clustering
+
+import scala.collection.mutable
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.graphx.{Edge, Graph}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+
+class PowerIterationClusteringSuite extends FunSuite with MLlibTestSparkContext {
+
+  import org.apache.spark.mllib.clustering.PowerIterationClustering._
+
+  test("power iteration clustering") {
+    /*
+     We use the following graph to test PIC. All edges are assigned similarity 1.0 except 0.1 for
+     edge (3, 4).
+
+     15-14 -13 -12
+     |           |
+     4 . 3 - 2  11
+     |   | x |   |
+     5   0 - 1  10
+     |           |
+     6 - 7 - 8 - 9
+     */
+
+    val similarities = Seq[(Long, Long, Double)]((0, 1, 1.0), (0, 2, 1.0), (0, 3, 1.0), (1, 2, 1.0),
+      (1, 3, 1.0), (2, 3, 1.0), (3, 4, 0.1), // (3, 4) is a weak edge
+      (4, 5, 1.0), (4, 15, 1.0), (5, 6, 1.0), (6, 7, 1.0), (7, 8, 1.0), (8, 9, 1.0), (9, 10, 1.0),
+      (10, 11, 1.0), (11, 12, 1.0), (12, 13, 1.0), (13, 14, 1.0), (14, 15, 1.0))
+    val model = new PowerIterationClustering()
+      .setK(2)
+      .run(sc.parallelize(similarities, 2))
+    val predictions = Array.fill(2)(mutable.Set.empty[Long])
+    model.assignments.collect().foreach { case (i, c) =>
+        predictions(c) += i
+    }
+    assert(predictions.toSet == Set((0 to 3).toSet, (4 to 15).toSet))
+ 
+    val model2 = new PowerIterationClustering()
+      .setK(2)
+      .setInitializationMode("degree")
+      .run(sc.parallelize(similarities, 2))
+    val predictions2 = Array.fill(2)(mutable.Set.empty[Long])
+    model2.assignments.collect().foreach { case (i, c) =>
+        predictions2(c) += i
+    }
+    assert(predictions2.toSet == Set((0 to 3).toSet, (4 to 15).toSet))
+  }
+
+  test("normalize and powerIter") {
+    /*
+     Test normalize() with the following graph:
+
+     0 - 3
+     | \ |
+     1 - 2
+
+     The affinity matrix (A) is
+
+     0 1 1 1
+     1 0 1 0
+     1 1 0 1
+     1 0 1 0
+
+     D is diag(3, 2, 3, 2) and hence W is
+
+       0 1/3 1/3 1/3
+     1/2   0 1/2   0
+     1/3 1/3   0 1/3
+     1/2   0 1/2   0
+     */
+    val similarities = Seq[(Long, Long, Double)](
+      (0, 1, 1.0), (0, 2, 1.0), (0, 3, 1.0), (1, 2, 1.0), (2, 3, 1.0))
+    val expected = Array(
+      Array(0.0,     1.0/3.0, 1.0/3.0, 1.0/3.0),
+      Array(1.0/2.0,     0.0, 1.0/2.0,     0.0),
+      Array(1.0/3.0, 1.0/3.0,     0.0, 1.0/3.0),
+      Array(1.0/2.0,     0.0, 1.0/2.0,     0.0))
+    val w = normalize(sc.parallelize(similarities, 2))
+    w.edges.collect().foreach { case Edge(i, j, x) =>
+      assert(x ~== expected(i.toInt)(j.toInt) absTol 1e-14)
+    }
+    val v0 = sc.parallelize(Seq[(Long, Double)]((0, 0.1), (1, 0.2), (2, 0.3), (3, 0.4)), 2)
+    val w0 = Graph(v0, w.edges)
+    val v1 = powerIter(w0, maxIterations = 1).collect()
+    val u = Array(0.3, 0.2, 0.7/3.0, 0.2)
+    val norm = u.sum
+    val u1 = u.map(x => x / norm)
+    v1.foreach { case (i, x) =>
+      assert(x ~== u1(i.toInt) absTol 1e-14)
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
new file mode 100644
index 0000000000000..850c9fce507cd
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.clustering
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.util.TestingUtils._
+import org.apache.spark.streaming.TestSuiteBase
+import org.apache.spark.streaming.dstream.DStream
+import org.apache.spark.util.random.XORShiftRandom
+
+class StreamingKMeansSuite extends FunSuite with TestSuiteBase {
+
+  override def maxWaitTimeMillis = 30000
+
+  test("accuracy for single center and equivalence to grand average") {
+    // set parameters
+    val numBatches = 10
+    val numPoints = 50
+    val k = 1
+    val d = 5
+    val r = 0.1
+
+    // create model with one cluster
+    val model = new StreamingKMeans()
+      .setK(1)
+      .setDecayFactor(1.0)
+      .setInitialCenters(Array(Vectors.dense(0.0, 0.0, 0.0, 0.0, 0.0)), Array(0.0))
+
+    // generate random data for k-means
+    val (input, centers) = StreamingKMeansDataGenerator(numPoints, numBatches, k, d, r, 42)
+
+    // setup and run the model training
+    val ssc = setupStreams(input, (inputDStream: DStream[Vector]) => {
+      model.trainOn(inputDStream)
+      inputDStream.count()
+    })
+    runStreams(ssc, numBatches, numBatches)
+
+    // estimated center should be close to true center
+    assert(centers(0) ~== model.latestModel().clusterCenters(0) absTol 1E-1)
+
+    // estimated center from streaming should exactly match the arithmetic mean of all data points
+    // because the decay factor is set to 1.0
+    val grandMean =
+      input.flatten.map(x => x.toBreeze).reduce(_+_) / (numBatches * numPoints).toDouble
+    assert(model.latestModel().clusterCenters(0) ~== Vectors.dense(grandMean.toArray) absTol 1E-5)
+  }
+
+  test("accuracy for two centers") {
+    val numBatches = 10
+    val numPoints = 5
+    val k = 2
+    val d = 5
+    val r = 0.1
+
+    // create model with two clusters
+    val kMeans = new StreamingKMeans()
+      .setK(2)
+      .setHalfLife(2, "batches")
+      .setInitialCenters(
+        Array(Vectors.dense(-0.1, 0.1, -0.2, -0.3, -0.1),
+          Vectors.dense(0.1, -0.2, 0.0, 0.2, 0.1)),
+        Array(5.0, 5.0))
+
+    // generate random data for k-means
+    val (input, centers) = StreamingKMeansDataGenerator(numPoints, numBatches, k, d, r, 42)
+
+    // setup and run the model training
+    val ssc = setupStreams(input, (inputDStream: DStream[Vector]) => {
+      kMeans.trainOn(inputDStream)
+      inputDStream.count()
+    })
+    runStreams(ssc, numBatches, numBatches)
+
+    // check that estimated centers are close to true centers
+    // NOTE exact assignment depends on the initialization!
+    assert(centers(0) ~== kMeans.latestModel().clusterCenters(0) absTol 1E-1)
+    assert(centers(1) ~== kMeans.latestModel().clusterCenters(1) absTol 1E-1)
+  }
+
+  test("detecting dying clusters") {
+    val numBatches = 10
+    val numPoints = 5
+    val k = 1
+    val d = 1
+    val r = 1.0
+
+    // create model with two clusters
+    val kMeans = new StreamingKMeans()
+      .setK(2)
+      .setHalfLife(0.5, "points")
+      .setInitialCenters(
+        Array(Vectors.dense(0.0), Vectors.dense(1000.0)),
+        Array(1.0, 1.0))
+
+    // new data are all around the first cluster 0.0
+    val (input, _) =
+      StreamingKMeansDataGenerator(numPoints, numBatches, k, d, r, 42, Array(Vectors.dense(0.0)))
+
+    // setup and run the model training
+    val ssc = setupStreams(input, (inputDStream: DStream[Vector]) => {
+      kMeans.trainOn(inputDStream)
+      inputDStream.count()
+    })
+    runStreams(ssc, numBatches, numBatches)
+
+    // check that estimated centers are close to true centers
+    // NOTE exact assignment depends on the initialization!
+    val model = kMeans.latestModel()
+    val c0 = model.clusterCenters(0)(0)
+    val c1 = model.clusterCenters(1)(0)
+
+    assert(c0 * c1 < 0.0, "should have one positive center and one negative center")
+    // 0.8 is the mean of half-normal distribution
+    assert(math.abs(c0) ~== 0.8 absTol 0.6)
+    assert(math.abs(c1) ~== 0.8 absTol 0.6)
+  }
+
+  def StreamingKMeansDataGenerator(
+      numPoints: Int,
+      numBatches: Int,
+      k: Int,
+      d: Int,
+      r: Double,
+      seed: Int,
+      initCenters: Array[Vector] = null): (IndexedSeq[IndexedSeq[Vector]], Array[Vector]) = {
+    val rand = new XORShiftRandom(seed)
+    val centers = initCenters match {
+      case null => Array.fill(k)(Vectors.dense(Array.fill(d)(rand.nextGaussian())))
+      case _ => initCenters
+    }
+    val data = (0 until numBatches).map { i =>
+      (0 until numPoints).map { idx =>
+        val center = centers(idx % k)
+        Vectors.dense(Array.tabulate(d)(x => center(x) + rand.nextGaussian() * r))
+      }
+    }
+    (data, centers)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala
index 994e0feb8629e..79847633ff0dc 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala
@@ -19,10 +19,10 @@ package org.apache.spark.mllib.evaluation
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
 
-class AreaUnderCurveSuite extends FunSuite with LocalSparkContext {
+class AreaUnderCurveSuite extends FunSuite with MLlibTestSparkContext {
   test("auc computation") {
     val curve = Seq((0.0, 0.0), (1.0, 1.0), (2.0, 3.0), (3.0, 0.0))
     val auc = 4.0
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
index a733f88b60b80..e0224f960cc43 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
@@ -19,44 +19,145 @@ package org.apache.spark.mllib.evaluation
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
 
-class BinaryClassificationMetricsSuite extends FunSuite with LocalSparkContext {
+class BinaryClassificationMetricsSuite extends FunSuite with MLlibTestSparkContext {
 
-  def cond1(x: (Double, Double)): Boolean = x._1 ~= (x._2) absTol 1E-5
+  private def areWithinEpsilon(x: (Double, Double)): Boolean = x._1 ~= (x._2) absTol 1E-5
 
-  def cond2(x: ((Double, Double), (Double, Double))): Boolean =
+  private def pairsWithinEpsilon(x: ((Double, Double), (Double, Double))): Boolean =
     (x._1._1 ~= x._2._1 absTol 1E-5) && (x._1._2 ~= x._2._2 absTol 1E-5)
 
+  private def assertSequencesMatch(left: Seq[Double], right: Seq[Double]): Unit = {
+      assert(left.zip(right).forall(areWithinEpsilon))
+  }
+
+  private def assertTupleSequencesMatch(left: Seq[(Double, Double)],
+       right: Seq[(Double, Double)]): Unit = {
+    assert(left.zip(right).forall(pairsWithinEpsilon))
+  }
+
+  private def validateMetrics(metrics: BinaryClassificationMetrics,
+      expectedThresholds: Seq[Double],
+      expectedROCCurve: Seq[(Double, Double)],
+      expectedPRCurve: Seq[(Double, Double)],
+      expectedFMeasures1: Seq[Double],
+      expectedFmeasures2: Seq[Double],
+      expectedPrecisions: Seq[Double],
+      expectedRecalls: Seq[Double]) = {
+
+    assertSequencesMatch(metrics.thresholds().collect(), expectedThresholds)
+    assertTupleSequencesMatch(metrics.roc().collect(), expectedROCCurve)
+    assert(metrics.areaUnderROC() ~== AreaUnderCurve.of(expectedROCCurve) absTol 1E-5)
+    assertTupleSequencesMatch(metrics.pr().collect(), expectedPRCurve)
+    assert(metrics.areaUnderPR() ~== AreaUnderCurve.of(expectedPRCurve) absTol 1E-5)
+    assertTupleSequencesMatch(metrics.fMeasureByThreshold().collect(),
+      expectedThresholds.zip(expectedFMeasures1))
+    assertTupleSequencesMatch(metrics.fMeasureByThreshold(2.0).collect(),
+      expectedThresholds.zip(expectedFmeasures2))
+    assertTupleSequencesMatch(metrics.precisionByThreshold().collect(),
+      expectedThresholds.zip(expectedPrecisions))
+    assertTupleSequencesMatch(metrics.recallByThreshold().collect(),
+      expectedThresholds.zip(expectedRecalls))
+  }
+
   test("binary evaluation metrics") {
     val scoreAndLabels = sc.parallelize(
       Seq((0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)), 2)
     val metrics = new BinaryClassificationMetrics(scoreAndLabels)
-    val threshold = Seq(0.8, 0.6, 0.4, 0.1)
+    val thresholds = Seq(0.8, 0.6, 0.4, 0.1)
     val numTruePositives = Seq(1, 3, 3, 4)
     val numFalsePositives = Seq(0, 1, 2, 3)
     val numPositives = 4
     val numNegatives = 3
-    val precision = numTruePositives.zip(numFalsePositives).map { case (t, f) =>
+    val precisions = numTruePositives.zip(numFalsePositives).map { case (t, f) =>
       t.toDouble / (t + f)
     }
-    val recall = numTruePositives.map(t => t.toDouble / numPositives)
+    val recalls = numTruePositives.map(t => t.toDouble / numPositives)
     val fpr = numFalsePositives.map(f => f.toDouble / numNegatives)
-    val rocCurve = Seq((0.0, 0.0)) ++ fpr.zip(recall) ++ Seq((1.0, 1.0))
-    val pr = recall.zip(precision)
+    val rocCurve = Seq((0.0, 0.0)) ++ fpr.zip(recalls) ++ Seq((1.0, 1.0))
+    val pr = recalls.zip(precisions)
+    val prCurve = Seq((0.0, 1.0)) ++ pr
+    val f1 = pr.map { case (r, p) => 2.0 * (p * r) / (p + r)}
+    val f2 = pr.map { case (r, p) => 5.0 * (p * r) / (4.0 * p + r)}
+
+    validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls)
+  }
+
+  test("binary evaluation metrics for RDD where all examples have positive label") {
+    val scoreAndLabels = sc.parallelize(Seq((0.5, 1.0), (0.5, 1.0)), 2)
+    val metrics = new BinaryClassificationMetrics(scoreAndLabels)
+
+    val thresholds = Seq(0.5)
+    val precisions = Seq(1.0)
+    val recalls = Seq(1.0)
+    val fpr = Seq(0.0)
+    val rocCurve = Seq((0.0, 0.0)) ++ fpr.zip(recalls) ++ Seq((1.0, 1.0))
+    val pr = recalls.zip(precisions)
     val prCurve = Seq((0.0, 1.0)) ++ pr
     val f1 = pr.map { case (r, p) => 2.0 * (p * r) / (p + r)}
     val f2 = pr.map { case (r, p) => 5.0 * (p * r) / (4.0 * p + r)}
 
-    assert(metrics.thresholds().collect().zip(threshold).forall(cond1))
-    assert(metrics.roc().collect().zip(rocCurve).forall(cond2))
-    assert(metrics.areaUnderROC() ~== AreaUnderCurve.of(rocCurve) absTol 1E-5)
-    assert(metrics.pr().collect().zip(prCurve).forall(cond2))
-    assert(metrics.areaUnderPR() ~== AreaUnderCurve.of(prCurve) absTol 1E-5)
-    assert(metrics.fMeasureByThreshold().collect().zip(threshold.zip(f1)).forall(cond2))
-    assert(metrics.fMeasureByThreshold(2.0).collect().zip(threshold.zip(f2)).forall(cond2))
-    assert(metrics.precisionByThreshold().collect().zip(threshold.zip(precision)).forall(cond2))
-    assert(metrics.recallByThreshold().collect().zip(threshold.zip(recall)).forall(cond2))
+    validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls)
   }
+
+  test("binary evaluation metrics for RDD where all examples have negative label") {
+    val scoreAndLabels = sc.parallelize(Seq((0.5, 0.0), (0.5, 0.0)), 2)
+    val metrics = new BinaryClassificationMetrics(scoreAndLabels)
+
+    val thresholds = Seq(0.5)
+    val precisions = Seq(0.0)
+    val recalls = Seq(0.0)
+    val fpr = Seq(1.0)
+    val rocCurve = Seq((0.0, 0.0)) ++ fpr.zip(recalls) ++ Seq((1.0, 1.0))
+    val pr = recalls.zip(precisions)
+    val prCurve = Seq((0.0, 1.0)) ++ pr
+    val f1 = pr.map {
+      case (0, 0) => 0.0
+      case (r, p) => 2.0 * (p * r) / (p + r)
+    }
+    val f2 = pr.map {
+      case (0, 0) => 0.0
+      case (r, p) => 5.0 * (p * r) / (4.0 * p + r)
+    }
+
+    validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls)
+  }
+
+  test("binary evaluation metrics with downsampling") {
+    val scoreAndLabels = Seq(
+      (0.1, 0.0), (0.2, 0.0), (0.3, 1.0), (0.4, 0.0), (0.5, 0.0),
+      (0.6, 1.0), (0.7, 1.0), (0.8, 0.0), (0.9, 1.0))
+
+    val scoreAndLabelsRDD = sc.parallelize(scoreAndLabels, 1)
+
+    val original = new BinaryClassificationMetrics(scoreAndLabelsRDD)
+    val originalROC = original.roc().collect().sorted.toList
+    // Add 2 for (0,0) and (1,1) appended at either end
+    assert(2 + scoreAndLabels.size == originalROC.size)
+    assert(
+      List(
+        (0.0, 0.0), (0.0, 0.25), (0.2, 0.25), (0.2, 0.5), (0.2, 0.75),
+        (0.4, 0.75), (0.6, 0.75), (0.6, 1.0), (0.8, 1.0), (1.0, 1.0),
+        (1.0, 1.0)
+      ) ==
+      originalROC)
+
+    val numBins = 4
+
+    val downsampled = new BinaryClassificationMetrics(scoreAndLabelsRDD, numBins)
+    val downsampledROC = downsampled.roc().collect().sorted.toList
+    assert(
+      // May have to add 1 if the sample factor didn't divide evenly
+      2 + (numBins + (if (scoreAndLabels.size % numBins == 0) 0 else 1)) ==
+      downsampledROC.size)
+    assert(
+      List(
+        (0.0, 0.0), (0.2, 0.25), (0.2, 0.75), (0.6, 0.75), (0.8, 1.0),
+        (1.0, 1.0), (1.0, 1.0)
+      ) ==
+      downsampledROC)
+  }
+
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala
index 1ea503971c864..7dc4f3cfbc4e4 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala
@@ -20,9 +20,9 @@ package org.apache.spark.mllib.evaluation
 import org.scalatest.FunSuite
 
 import org.apache.spark.mllib.linalg.Matrices
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 
-class MulticlassMetricsSuite extends FunSuite with LocalSparkContext {
+class MulticlassMetricsSuite extends FunSuite with MLlibTestSparkContext {
   test("Multiclass evaluation metrics") {
     /*
      * Confusion matrix for 3-class classification with total 9 instances:
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MultilabelMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MultilabelMetricsSuite.scala
new file mode 100644
index 0000000000000..2537dd62c92f2
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MultilabelMetricsSuite.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.evaluation
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.rdd.RDD
+
+class MultilabelMetricsSuite extends FunSuite with MLlibTestSparkContext {
+  test("Multilabel evaluation metrics") {
+    /*
+    * Documents true labels (5x class0, 3x class1, 4x class2):
+    * doc 0 - predict 0, 1 - class 0, 2
+    * doc 1 - predict 0, 2 - class 0, 1
+    * doc 2 - predict none - class 0
+    * doc 3 - predict 2 - class 2
+    * doc 4 - predict 2, 0 - class 2, 0
+    * doc 5 - predict 0, 1, 2 - class 0, 1
+    * doc 6 - predict 1 - class 1, 2
+    *
+    * predicted classes
+    * class 0 - doc 0, 1, 4, 5 (total 4)
+    * class 1 - doc 0, 5, 6 (total 3)
+    * class 2 - doc 1, 3, 4, 5 (total 4)
+    *
+    * true classes
+    * class 0 - doc 0, 1, 2, 4, 5 (total 5)
+    * class 1 - doc 1, 5, 6 (total 3)
+    * class 2 - doc 0, 3, 4, 6 (total 4)
+    *
+    */
+    val scoreAndLabels: RDD[(Array[Double], Array[Double])] = sc.parallelize(
+      Seq((Array(0.0, 1.0), Array(0.0, 2.0)),
+        (Array(0.0, 2.0), Array(0.0, 1.0)),
+        (Array(), Array(0.0)),
+        (Array(2.0), Array(2.0)),
+        (Array(2.0, 0.0), Array(2.0, 0.0)),
+        (Array(0.0, 1.0, 2.0), Array(0.0, 1.0)),
+        (Array(1.0), Array(1.0, 2.0))), 2)
+    val metrics = new MultilabelMetrics(scoreAndLabels)
+    val delta = 0.00001
+    val precision0 = 4.0 / (4 + 0)
+    val precision1 = 2.0 / (2 + 1)
+    val precision2 = 2.0 / (2 + 2)
+    val recall0 = 4.0 / (4 + 1)
+    val recall1 = 2.0 / (2 + 1)
+    val recall2 = 2.0 / (2 + 2)
+    val f1measure0 = 2 * precision0 * recall0 / (precision0 + recall0)
+    val f1measure1 = 2 * precision1 * recall1 / (precision1 + recall1)
+    val f1measure2 = 2 * precision2 * recall2 / (precision2 + recall2)
+    val sumTp = 4 + 2 + 2
+    assert(sumTp == (1 + 1 + 0 + 1 + 2 + 2 + 1))
+    val microPrecisionClass = sumTp.toDouble / (4 + 0 + 2 + 1 + 2 + 2)
+    val microRecallClass = sumTp.toDouble / (4 + 1 + 2 + 1 + 2 + 2)
+    val microF1MeasureClass = 2.0 * sumTp.toDouble /
+      (2 * sumTp.toDouble + (1 + 1 + 2) + (0 + 1 + 2))
+    val macroPrecisionDoc = 1.0 / 7 *
+      (1.0 / 2 + 1.0 / 2 + 0 + 1.0 / 1 + 2.0 / 2 + 2.0 / 3 + 1.0 / 1.0)
+    val macroRecallDoc = 1.0 / 7 *
+      (1.0 / 2 + 1.0 / 2 + 0 / 1 + 1.0 / 1 + 2.0 / 2 + 2.0 / 2 + 1.0 / 2)
+    val macroF1MeasureDoc = (1.0 / 7) *
+      2 * ( 1.0 / (2 + 2) + 1.0 / (2 + 2) + 0 + 1.0 / (1 + 1) +
+        2.0 / (2 + 2) + 2.0 / (3 + 2) + 1.0 / (1 + 2) )
+    val hammingLoss = (1.0 / (7 * 3)) * (2 + 2 + 1 + 0 + 0 + 1 + 1)
+    val strictAccuracy = 2.0 / 7
+    val accuracy = 1.0 / 7 * (1.0 / 3 + 1.0 /3 + 0 + 1.0 / 1 + 2.0 / 2 + 2.0 / 3 + 1.0 / 2)
+    assert(math.abs(metrics.precision(0.0) - precision0) < delta)
+    assert(math.abs(metrics.precision(1.0) - precision1) < delta)
+    assert(math.abs(metrics.precision(2.0) - precision2) < delta)
+    assert(math.abs(metrics.recall(0.0) - recall0) < delta)
+    assert(math.abs(metrics.recall(1.0) - recall1) < delta)
+    assert(math.abs(metrics.recall(2.0) - recall2) < delta)
+    assert(math.abs(metrics.f1Measure(0.0) - f1measure0) < delta)
+    assert(math.abs(metrics.f1Measure(1.0) - f1measure1) < delta)
+    assert(math.abs(metrics.f1Measure(2.0) - f1measure2) < delta)
+    assert(math.abs(metrics.microPrecision - microPrecisionClass) < delta)
+    assert(math.abs(metrics.microRecall - microRecallClass) < delta)
+    assert(math.abs(metrics.microF1Measure - microF1MeasureClass) < delta)
+    assert(math.abs(metrics.precision - macroPrecisionDoc) < delta)
+    assert(math.abs(metrics.recall - macroRecallDoc) < delta)
+    assert(math.abs(metrics.f1Measure - macroF1MeasureDoc) < delta)
+    assert(math.abs(metrics.hammingLoss - hammingLoss) < delta)
+    assert(math.abs(metrics.subsetAccuracy - strictAccuracy) < delta)
+    assert(math.abs(metrics.accuracy - accuracy) < delta)
+    assert(metrics.labels.sameElements(Array(0.0, 1.0, 2.0)))
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
index a2d4bb41484b8..609eed983ff4e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
@@ -20,9 +20,9 @@ package org.apache.spark.mllib.evaluation
 import org.scalatest.FunSuite
 
 import org.apache.spark.mllib.util.TestingUtils._
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 
-class RankingMetricsSuite extends FunSuite with LocalSparkContext {
+class RankingMetricsSuite extends FunSuite with MLlibTestSparkContext {
   test("Ranking metrics: map, ndcg") {
     val predictionAndLabels = sc.parallelize(
       Seq(
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala
new file mode 100644
index 0000000000000..670b4c34e6095
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.evaluation
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+
+class RegressionMetricsSuite extends FunSuite with MLlibTestSparkContext {
+
+  test("regression metrics") {
+    val predictionAndObservations = sc.parallelize(
+      Seq((2.5,3.0),(0.0,-0.5),(2.0,2.0),(8.0,7.0)), 2)
+    val metrics = new RegressionMetrics(predictionAndObservations)
+    assert(metrics.explainedVariance ~== 0.95717 absTol 1E-5,
+      "explained variance regression score mismatch")
+    assert(metrics.meanAbsoluteError ~== 0.5 absTol 1E-5, "mean absolute error mismatch")
+    assert(metrics.meanSquaredError ~== 0.375 absTol 1E-5, "mean squared error mismatch")
+    assert(metrics.rootMeanSquaredError ~== 0.61237 absTol 1E-5,
+      "root mean squared error mismatch")
+    assert(metrics.r2 ~== 0.94861 absTol 1E-5, "r2 score mismatch")
+  }
+
+  test("regression metrics with complete fitting") {
+    val predictionAndObservations = sc.parallelize(
+      Seq((3.0,3.0),(0.0,0.0),(2.0,2.0),(8.0,8.0)), 2)
+    val metrics = new RegressionMetrics(predictionAndObservations)
+    assert(metrics.explainedVariance ~== 1.0 absTol 1E-5,
+      "explained variance regression score mismatch")
+    assert(metrics.meanAbsoluteError ~== 0.0 absTol 1E-5, "mean absolute error mismatch")
+    assert(metrics.meanSquaredError ~== 0.0 absTol 1E-5, "mean squared error mismatch")
+    assert(metrics.rootMeanSquaredError ~== 0.0 absTol 1E-5,
+      "root mean squared error mismatch")
+    assert(metrics.r2 ~== 1.0 absTol 1E-5, "r2 score mismatch")
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/ChiSqSelectorSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/ChiSqSelectorSuite.scala
new file mode 100644
index 0000000000000..747f5914598ec
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/ChiSqSelectorSuite.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+
+class ChiSqSelectorSuite extends FunSuite with MLlibTestSparkContext {
+
+  /*
+   *  Contingency tables
+   *  feature0 = {8.0, 0.0}
+   *  class  0 1 2
+   *    8.0||1|0|1|
+   *    0.0||0|2|0|
+   *
+   *  feature1 = {7.0, 9.0}
+   *  class  0 1 2
+   *    7.0||1|0|0|
+   *    9.0||0|2|1|
+   *
+   *  feature2 = {0.0, 6.0, 8.0, 5.0}
+   *  class  0 1 2
+   *    0.0||1|0|0|
+   *    6.0||0|1|0|
+   *    8.0||0|1|0|
+   *    5.0||0|0|1|
+   *
+   *  Use chi-squared calculator from Internet
+   */
+
+  test("ChiSqSelector transform test (sparse & dense vector)") {
+    val labeledDiscreteData = sc.parallelize(
+      Seq(LabeledPoint(0.0, Vectors.sparse(3, Array((0, 8.0), (1, 7.0)))),
+        LabeledPoint(1.0, Vectors.sparse(3, Array((1, 9.0), (2, 6.0)))),
+        LabeledPoint(1.0, Vectors.dense(Array(0.0, 9.0, 8.0))),
+        LabeledPoint(2.0, Vectors.dense(Array(8.0, 9.0, 5.0)))), 2)
+    val preFilteredData =
+      Set(LabeledPoint(0.0, Vectors.dense(Array(0.0))),
+        LabeledPoint(1.0, Vectors.dense(Array(6.0))),
+        LabeledPoint(1.0, Vectors.dense(Array(8.0))),
+        LabeledPoint(2.0, Vectors.dense(Array(5.0))))
+    val model = new ChiSqSelector(1).fit(labeledDiscreteData)
+    val filteredData = labeledDiscreteData.map { lp =>
+      LabeledPoint(lp.label, model.transform(lp.features))
+    }.collect().toSet
+    assert(filteredData == preFilteredData)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/HashingTFSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/HashingTFSuite.scala
index a599e0d938569..0c4dfb7b97c7f 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/feature/HashingTFSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/HashingTFSuite.scala
@@ -20,9 +20,9 @@ package org.apache.spark.mllib.feature
 import org.scalatest.FunSuite
 
 import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 
-class HashingTFSuite extends FunSuite with LocalSparkContext {
+class HashingTFSuite extends FunSuite with MLlibTestSparkContext {
 
   test("hashing tf on a single doc") {
     val hashingTF = new HashingTF(1000)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala
index 43974f84e3ca8..0a5cad7caf8e4 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala
@@ -19,12 +19,11 @@ package org.apache.spark.mllib.feature
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.SparkContext._
-import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vectors}
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vectors, Vector}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
 
-class IDFSuite extends FunSuite with LocalSparkContext {
+class IDFSuite extends FunSuite with MLlibTestSparkContext {
 
   test("idf") {
     val n = 4
@@ -41,18 +40,26 @@ class IDFSuite extends FunSuite with LocalSparkContext {
       math.log((m + 1.0) / (x + 1.0))
     })
     assert(model.idf ~== expected absTol 1e-12)
-    val tfidf = model.transform(termFrequencies).cache().zipWithIndex().map(_.swap).collectAsMap()
-    assert(tfidf.size === 3)
-    val tfidf0 = tfidf(0L).asInstanceOf[SparseVector]
-    assert(tfidf0.indices === Array(1, 3))
-    assert(Vectors.dense(tfidf0.values) ~==
-      Vectors.dense(1.0 * expected(1), 2.0 * expected(3)) absTol 1e-12)
-    val tfidf1 = tfidf(1L).asInstanceOf[DenseVector]
-    assert(Vectors.dense(tfidf1.values) ~==
-      Vectors.dense(0.0, 1.0 * expected(1), 2.0 * expected(2), 3.0 * expected(3)) absTol 1e-12)
-    val tfidf2 = tfidf(2L).asInstanceOf[SparseVector]
-    assert(tfidf2.indices === Array(1))
-    assert(tfidf2.values(0) ~== (1.0 * expected(1)) absTol 1e-12)
+
+    val assertHelper = (tfidf: Array[Vector]) => {
+      assert(tfidf.size === 3)
+      val tfidf0 = tfidf(0).asInstanceOf[SparseVector]
+      assert(tfidf0.indices === Array(1, 3))
+      assert(Vectors.dense(tfidf0.values) ~==
+          Vectors.dense(1.0 * expected(1), 2.0 * expected(3)) absTol 1e-12)
+      val tfidf1 = tfidf(1).asInstanceOf[DenseVector]
+      assert(Vectors.dense(tfidf1.values) ~==
+          Vectors.dense(0.0, 1.0 * expected(1), 2.0 * expected(2), 3.0 * expected(3)) absTol 1e-12)
+      val tfidf2 = tfidf(2).asInstanceOf[SparseVector]
+      assert(tfidf2.indices === Array(1))
+      assert(tfidf2.values(0) ~== (1.0 * expected(1)) absTol 1e-12)
+    }
+    // Transforms a RDD
+    val tfidf = model.transform(termFrequencies).collect()
+    assertHelper(tfidf)
+    // Transforms local vectors
+    val localTfidf = localTermFrequencies.map(model.transform(_)).toArray
+    assertHelper(localTfidf)
   }
 
   test("idf minimum document frequency filtering") {
@@ -74,18 +81,26 @@ class IDFSuite extends FunSuite with LocalSparkContext {
       }
     })
     assert(model.idf ~== expected absTol 1e-12)
-    val tfidf = model.transform(termFrequencies).cache().zipWithIndex().map(_.swap).collectAsMap()
-    assert(tfidf.size === 3)
-    val tfidf0 = tfidf(0L).asInstanceOf[SparseVector]
-    assert(tfidf0.indices === Array(1, 3))
-    assert(Vectors.dense(tfidf0.values) ~==
-      Vectors.dense(1.0 * expected(1), 2.0 * expected(3)) absTol 1e-12)
-    val tfidf1 = tfidf(1L).asInstanceOf[DenseVector]
-    assert(Vectors.dense(tfidf1.values) ~==
-      Vectors.dense(0.0, 1.0 * expected(1), 2.0 * expected(2), 3.0 * expected(3)) absTol 1e-12)
-    val tfidf2 = tfidf(2L).asInstanceOf[SparseVector]
-    assert(tfidf2.indices === Array(1))
-    assert(tfidf2.values(0) ~== (1.0 * expected(1)) absTol 1e-12)
+
+    val assertHelper = (tfidf: Array[Vector]) => {
+      assert(tfidf.size === 3)
+      val tfidf0 = tfidf(0).asInstanceOf[SparseVector]
+      assert(tfidf0.indices === Array(1, 3))
+      assert(Vectors.dense(tfidf0.values) ~==
+          Vectors.dense(1.0 * expected(1), 2.0 * expected(3)) absTol 1e-12)
+      val tfidf1 = tfidf(1).asInstanceOf[DenseVector]
+      assert(Vectors.dense(tfidf1.values) ~==
+          Vectors.dense(0.0, 1.0 * expected(1), 2.0 * expected(2), 3.0 * expected(3)) absTol 1e-12)
+      val tfidf2 = tfidf(2).asInstanceOf[SparseVector]
+      assert(tfidf2.indices === Array(1))
+      assert(tfidf2.values(0) ~== (1.0 * expected(1)) absTol 1e-12)
+    }
+    // Transforms a RDD
+    val tfidf = model.transform(termFrequencies).collect()
+    assertHelper(tfidf)
+    // Transforms local vectors
+    val localTfidf = localTermFrequencies.map(model.transform(_)).toArray
+    assertHelper(localTfidf)
   }
 
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala
index 2bf9d9816ae45..85fdd271b5ed1 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala
@@ -22,10 +22,10 @@ import org.scalatest.FunSuite
 import breeze.linalg.{norm => brzNorm}
 
 import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vectors}
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
 
-class NormalizerSuite extends FunSuite with LocalSparkContext {
+class NormalizerSuite extends FunSuite with MLlibTestSparkContext {
 
   val data = Array(
     Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))),
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala
index e217b93cebbdb..7f94564b2a3ae 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala
@@ -20,13 +20,38 @@ package org.apache.spark.mllib.feature
 import org.scalatest.FunSuite
 
 import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors}
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
-import org.apache.spark.mllib.rdd.RDDFunctions._
 import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, MultivariateOnlineSummarizer}
 import org.apache.spark.rdd.RDD
 
-class StandardScalerSuite extends FunSuite with LocalSparkContext {
+class StandardScalerSuite extends FunSuite with MLlibTestSparkContext {
+
+  // When the input data is all constant, the variance is zero. The standardization against
+  // zero variance is not well-defined, but we decide to just set it into zero here.
+  val constantData = Array(
+    Vectors.dense(2.0),
+    Vectors.dense(2.0),
+    Vectors.dense(2.0)
+  )
+
+  val sparseData = Array(
+    Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))),
+    Vectors.sparse(3, Seq((1, -1.0), (2, -3.0))),
+    Vectors.sparse(3, Seq((1, -5.1))),
+    Vectors.sparse(3, Seq((0, 3.8), (2, 1.9))),
+    Vectors.sparse(3, Seq((0, 1.7), (1, -0.6))),
+    Vectors.sparse(3, Seq((1, 1.9)))
+  )
+
+  val denseData = Array(
+    Vectors.dense(-2.0, 2.3, 0),
+    Vectors.dense(0.0, -1.0, -3.0),
+    Vectors.dense(0.0, -5.1, 0.0),
+    Vectors.dense(3.8, 0.0, 1.9),
+    Vectors.dense(1.7, -0.6, 0.0),
+    Vectors.dense(0.0, 1.9, 0.0)
+  )
 
   private def computeSummary(data: RDD[Vector]): MultivariateStatisticalSummary = {
     data.treeAggregate(new MultivariateOnlineSummarizer)(
@@ -34,17 +59,77 @@ class StandardScalerSuite extends FunSuite with LocalSparkContext {
       (aggregator1, aggregator2) => aggregator1.merge(aggregator2))
   }
 
+  test("Standardization with dense input when means and stds are provided") {
+
+    val dataRDD = sc.parallelize(denseData, 3)
+
+    val standardizer1 = new StandardScaler(withMean = true, withStd = true)
+    val standardizer2 = new StandardScaler()
+    val standardizer3 = new StandardScaler(withMean = true, withStd = false)
+
+    val model1 = standardizer1.fit(dataRDD)
+    val model2 = standardizer2.fit(dataRDD)
+    val model3 = standardizer3.fit(dataRDD)
+
+    val equivalentModel1 = new StandardScalerModel(model1.std, model1.mean)
+    val equivalentModel2 = new StandardScalerModel(model2.std, model2.mean, true, false)
+    val equivalentModel3 = new StandardScalerModel(model3.std, model3.mean, false, true)
+
+    val data1 = denseData.map(equivalentModel1.transform)
+    val data2 = denseData.map(equivalentModel2.transform)
+    val data3 = denseData.map(equivalentModel3.transform)
+
+    val data1RDD = equivalentModel1.transform(dataRDD)
+    val data2RDD = equivalentModel2.transform(dataRDD)
+    val data3RDD = equivalentModel3.transform(dataRDD)
+
+    val summary = computeSummary(dataRDD)
+    val summary1 = computeSummary(data1RDD)
+    val summary2 = computeSummary(data2RDD)
+    val summary3 = computeSummary(data3RDD)
+
+    assert((denseData, data1, data1RDD.collect()).zipped.forall {
+      case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
+      case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
+      case _ => false
+    }, "The vector type should be preserved after standardization.")
+
+    assert((denseData, data2, data2RDD.collect()).zipped.forall {
+      case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
+      case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
+      case _ => false
+    }, "The vector type should be preserved after standardization.")
+
+    assert((denseData, data3, data3RDD.collect()).zipped.forall {
+      case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
+      case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
+      case _ => false
+    }, "The vector type should be preserved after standardization.")
+
+    assert((data1, data1RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5))
+    assert((data2, data2RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5))
+    assert((data3, data3RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5))
+
+    assert(summary1.mean ~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5)
+    assert(summary1.variance ~== Vectors.dense(1.0, 1.0, 1.0) absTol 1E-5)
+
+    assert(summary2.mean !~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5)
+    assert(summary2.variance ~== Vectors.dense(1.0, 1.0, 1.0) absTol 1E-5)
+
+    assert(summary3.mean ~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5)
+    assert(summary3.variance ~== summary.variance absTol 1E-5)
+
+    assert(data1(0) ~== Vectors.dense(-1.31527964, 1.023470449, 0.11637768424) absTol 1E-5)
+    assert(data1(3) ~== Vectors.dense(1.637735298, 0.156973995, 1.32247368462) absTol 1E-5)
+    assert(data2(4) ~== Vectors.dense(0.865538862, -0.22604255, 0.0) absTol 1E-5)
+    assert(data2(5) ~== Vectors.dense(0.0, 0.71580142, 0.0) absTol 1E-5)
+    assert(data3(1) ~== Vectors.dense(-0.58333333, -0.58333333, -2.8166666666) absTol 1E-5)
+    assert(data3(5) ~== Vectors.dense(-0.58333333, 2.316666666, 0.18333333333) absTol 1E-5)
+  }
+
   test("Standardization with dense input") {
-    val data = Array(
-      Vectors.dense(-2.0, 2.3, 0),
-      Vectors.dense(0.0, -1.0, -3.0),
-      Vectors.dense(0.0, -5.1, 0.0),
-      Vectors.dense(3.8, 0.0, 1.9),
-      Vectors.dense(1.7, -0.6, 0.0),
-      Vectors.dense(0.0, 1.9, 0.0)
-    )
 
-    val dataRDD = sc.parallelize(data, 3)
+    val dataRDD = sc.parallelize(denseData, 3)
 
     val standardizer1 = new StandardScaler(withMean = true, withStd = true)
     val standardizer2 = new StandardScaler()
@@ -54,9 +139,9 @@ class StandardScalerSuite extends FunSuite with LocalSparkContext {
     val model2 = standardizer2.fit(dataRDD)
     val model3 = standardizer3.fit(dataRDD)
 
-    val data1 = data.map(model1.transform)
-    val data2 = data.map(model2.transform)
-    val data3 = data.map(model3.transform)
+    val data1 = denseData.map(model1.transform)
+    val data2 = denseData.map(model2.transform)
+    val data3 = denseData.map(model3.transform)
 
     val data1RDD = model1.transform(dataRDD)
     val data2RDD = model2.transform(dataRDD)
@@ -67,19 +152,19 @@ class StandardScalerSuite extends FunSuite with LocalSparkContext {
     val summary2 = computeSummary(data2RDD)
     val summary3 = computeSummary(data3RDD)
 
-    assert((data, data1, data1RDD.collect()).zipped.forall {
+    assert((denseData, data1, data1RDD.collect()).zipped.forall {
       case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
       case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
       case _ => false
     }, "The vector type should be preserved after standardization.")
 
-    assert((data, data2, data2RDD.collect()).zipped.forall {
+    assert((denseData, data2, data2RDD.collect()).zipped.forall {
       case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
       case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
       case _ => false
     }, "The vector type should be preserved after standardization.")
 
-    assert((data, data3, data3RDD.collect()).zipped.forall {
+    assert((denseData, data3, data3RDD.collect()).zipped.forall {
       case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
       case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
       case _ => false
@@ -107,17 +192,58 @@ class StandardScalerSuite extends FunSuite with LocalSparkContext {
   }
 
 
+  test("Standardization with sparse input when means and stds are provided") {
+
+    val dataRDD = sc.parallelize(sparseData, 3)
+
+    val standardizer1 = new StandardScaler(withMean = true, withStd = true)
+    val standardizer2 = new StandardScaler()
+    val standardizer3 = new StandardScaler(withMean = true, withStd = false)
+
+    val model1 = standardizer1.fit(dataRDD)
+    val model2 = standardizer2.fit(dataRDD)
+    val model3 = standardizer3.fit(dataRDD)
+
+    val equivalentModel1 = new StandardScalerModel(model1.std, model1.mean)
+    val equivalentModel2 = new StandardScalerModel(model2.std, model2.mean, true, false)
+    val equivalentModel3 = new StandardScalerModel(model3.std, model3.mean, false, true)
+
+    val data2 = sparseData.map(equivalentModel2.transform)
+
+    withClue("Standardization with mean can not be applied on sparse input.") {
+      intercept[IllegalArgumentException] {
+        sparseData.map(equivalentModel1.transform)
+      }
+    }
+
+    withClue("Standardization with mean can not be applied on sparse input.") {
+      intercept[IllegalArgumentException] {
+        sparseData.map(equivalentModel3.transform)
+      }
+    }
+
+    val data2RDD = equivalentModel2.transform(dataRDD)
+
+    val summary = computeSummary(data2RDD)
+
+    assert((sparseData, data2, data2RDD.collect()).zipped.forall {
+      case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
+      case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
+      case _ => false
+    }, "The vector type should be preserved after standardization.")
+
+    assert((data2, data2RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5))
+
+    assert(summary.mean !~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5)
+    assert(summary.variance ~== Vectors.dense(1.0, 1.0, 1.0) absTol 1E-5)
+
+    assert(data2(4) ~== Vectors.sparse(3, Seq((0, 0.865538862), (1, -0.22604255))) absTol 1E-5)
+    assert(data2(5) ~== Vectors.sparse(3, Seq((1, 0.71580142))) absTol 1E-5)
+  }
+
   test("Standardization with sparse input") {
-    val data = Array(
-      Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))),
-      Vectors.sparse(3, Seq((1, -1.0), (2, -3.0))),
-      Vectors.sparse(3, Seq((1, -5.1))),
-      Vectors.sparse(3, Seq((0, 3.8), (2, 1.9))),
-      Vectors.sparse(3, Seq((0, 1.7), (1, -0.6))),
-      Vectors.sparse(3, Seq((1, 1.9)))
-    )
 
-    val dataRDD = sc.parallelize(data, 3)
+    val dataRDD = sc.parallelize(sparseData, 3)
 
     val standardizer1 = new StandardScaler(withMean = true, withStd = true)
     val standardizer2 = new StandardScaler()
@@ -127,25 +253,26 @@ class StandardScalerSuite extends FunSuite with LocalSparkContext {
     val model2 = standardizer2.fit(dataRDD)
     val model3 = standardizer3.fit(dataRDD)
 
-    val data2 = data.map(model2.transform)
+    val data2 = sparseData.map(model2.transform)
 
     withClue("Standardization with mean can not be applied on sparse input.") {
       intercept[IllegalArgumentException] {
-        data.map(model1.transform)
+        sparseData.map(model1.transform)
       }
     }
 
     withClue("Standardization with mean can not be applied on sparse input.") {
       intercept[IllegalArgumentException] {
-        data.map(model3.transform)
+        sparseData.map(model3.transform)
       }
     }
 
     val data2RDD = model2.transform(dataRDD)
 
-    val summary2 = computeSummary(data2RDD)
 
-    assert((data, data2, data2RDD.collect()).zipped.forall {
+    val summary = computeSummary(data2RDD)
+
+    assert((sparseData, data2, data2RDD.collect()).zipped.forall {
       case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
       case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
       case _ => false
@@ -153,23 +280,44 @@ class StandardScalerSuite extends FunSuite with LocalSparkContext {
 
     assert((data2, data2RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5))
 
-    assert(summary2.mean !~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5)
-    assert(summary2.variance ~== Vectors.dense(1.0, 1.0, 1.0) absTol 1E-5)
+    assert(summary.mean !~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5)
+    assert(summary.variance ~== Vectors.dense(1.0, 1.0, 1.0) absTol 1E-5)
 
     assert(data2(4) ~== Vectors.sparse(3, Seq((0, 0.865538862), (1, -0.22604255))) absTol 1E-5)
     assert(data2(5) ~== Vectors.sparse(3, Seq((1, 0.71580142))) absTol 1E-5)
   }
 
+  test("Standardization with constant input when means and stds are provided") {
+
+    val dataRDD = sc.parallelize(constantData, 2)
+
+    val standardizer1 = new StandardScaler(withMean = true, withStd = true)
+    val standardizer2 = new StandardScaler(withMean = true, withStd = false)
+    val standardizer3 = new StandardScaler(withMean = false, withStd = true)
+
+    val model1 = standardizer1.fit(dataRDD)
+    val model2 = standardizer2.fit(dataRDD)
+    val model3 = standardizer3.fit(dataRDD)
+
+    val equivalentModel1 = new StandardScalerModel(model1.std, model1.mean)
+    val equivalentModel2 = new StandardScalerModel(model2.std, model2.mean, true, false)
+    val equivalentModel3 = new StandardScalerModel(model3.std, model3.mean, false, true)
+
+    val data1 = constantData.map(equivalentModel1.transform)
+    val data2 = constantData.map(equivalentModel2.transform)
+    val data3 = constantData.map(equivalentModel3.transform)
+
+    assert(data1.forall(_.toArray.forall(_ == 0.0)),
+      "The variance is zero, so the transformed result should be 0.0")
+    assert(data2.forall(_.toArray.forall(_ == 0.0)),
+      "The variance is zero, so the transformed result should be 0.0")
+    assert(data3.forall(_.toArray.forall(_ == 0.0)),
+      "The variance is zero, so the transformed result should be 0.0")
+  }
+
   test("Standardization with constant input") {
-    // When the input data is all constant, the variance is zero. The standardization against
-    // zero variance is not well-defined, but we decide to just set it into zero here.
-    val data = Array(
-      Vectors.dense(2.0),
-      Vectors.dense(2.0),
-      Vectors.dense(2.0)
-    )
 
-    val dataRDD = sc.parallelize(data, 2)
+    val dataRDD = sc.parallelize(constantData, 2)
 
     val standardizer1 = new StandardScaler(withMean = true, withStd = true)
     val standardizer2 = new StandardScaler(withMean = true, withStd = false)
@@ -179,9 +327,9 @@ class StandardScalerSuite extends FunSuite with LocalSparkContext {
     val model2 = standardizer2.fit(dataRDD)
     val model3 = standardizer3.fit(dataRDD)
 
-    val data1 = data.map(model1.transform)
-    val data2 = data.map(model2.transform)
-    val data3 = data.map(model3.transform)
+    val data1 = constantData.map(model1.transform)
+    val data2 = constantData.map(model2.transform)
+    val data3 = constantData.map(model3.transform)
 
     assert(data1.forall(_.toArray.forall(_ == 0.0)),
       "The variance is zero, so the transformed result should be 0.0")
@@ -191,4 +339,29 @@ class StandardScalerSuite extends FunSuite with LocalSparkContext {
       "The variance is zero, so the transformed result should be 0.0")
   }
 
+  test("StandardScalerModel argument nulls are properly handled") {
+
+    withClue("model needs at least one of std or mean vectors") {
+      intercept[IllegalArgumentException] {
+        val model = new StandardScalerModel(null, null)
+      }
+    }
+    withClue("model needs std to set withStd to true") {
+      intercept[IllegalArgumentException] {
+        val model = new StandardScalerModel(null, Vectors.dense(0.0))
+        model.setWithStd(true)
+      }
+    }
+    withClue("model needs mean to set withMean to true") {
+      intercept[IllegalArgumentException] {
+        val model = new StandardScalerModel(Vectors.dense(0.0), null)
+        model.setWithMean(true)
+      }
+    }
+    withClue("model needs std and mean vectors to be equal size when both are provided") {
+      intercept[IllegalArgumentException] {
+        val model = new StandardScalerModel(Vectors.dense(0.0), Vectors.dense(0.0,1.0))
+      }
+    }
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
index e34335d89eb75..52278690dbd89 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
@@ -19,9 +19,9 @@ package org.apache.spark.mllib.feature
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 
-class Word2VecSuite extends FunSuite with LocalSparkContext {
+class Word2VecSuite extends FunSuite with MLlibTestSparkContext {
 
   // TODO: add more tests
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/fpm/FPGrowthSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/fpm/FPGrowthSuite.scala
new file mode 100644
index 0000000000000..68128284b8608
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/fpm/FPGrowthSuite.scala
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.mllib.fpm
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+
+class FPGrowthSuite extends FunSuite with MLlibTestSparkContext {
+
+
+  test("FP-Growth using String type") {
+    val transactions = Seq(
+      "r z h k p",
+      "z y x w v u t s",
+      "s x o n r",
+      "x z y m t s q e",
+      "z",
+      "x z y r q t p")
+      .map(_.split(" "))
+    val rdd = sc.parallelize(transactions, 2).cache()
+
+    val fpg = new FPGrowth()
+
+    val model6 = fpg
+      .setMinSupport(0.9)
+      .setNumPartitions(1)
+      .run(rdd)
+    assert(model6.freqItemsets.count() === 0)
+
+    val model3 = fpg
+      .setMinSupport(0.5)
+      .setNumPartitions(2)
+      .run(rdd)
+    val freqItemsets3 = model3.freqItemsets.collect().map { case (items, count) =>
+      (items.toSet, count)
+    }
+    val expected = Set(
+      (Set("s"), 3L), (Set("z"), 5L), (Set("x"), 4L), (Set("t"), 3L), (Set("y"), 3L),
+      (Set("r"), 3L),
+      (Set("x", "z"), 3L), (Set("t", "y"), 3L), (Set("t", "x"), 3L), (Set("s", "x"), 3L),
+      (Set("y", "x"), 3L), (Set("y", "z"), 3L), (Set("t", "z"), 3L),
+      (Set("y", "x", "z"), 3L), (Set("t", "x", "z"), 3L), (Set("t", "y", "z"), 3L),
+      (Set("t", "y", "x"), 3L),
+      (Set("t", "y", "x", "z"), 3L))
+    assert(freqItemsets3.toSet === expected)
+
+    val model2 = fpg
+      .setMinSupport(0.3)
+      .setNumPartitions(4)
+      .run(rdd)
+    assert(model2.freqItemsets.count() === 54)
+
+    val model1 = fpg
+      .setMinSupport(0.1)
+      .setNumPartitions(8)
+      .run(rdd)
+    assert(model1.freqItemsets.count() === 625)
+  }
+
+  test("FP-Growth using Int type") {
+    val transactions = Seq(
+      "1 2 3",
+      "1 2 3 4",
+      "5 4 3 2 1",
+      "6 5 4 3 2 1",
+      "2 4",
+      "1 3",
+      "1 7")
+      .map(_.split(" ").map(_.toInt).toArray)
+    val rdd = sc.parallelize(transactions, 2).cache()
+
+    val fpg = new FPGrowth()
+
+    val model6 = fpg
+      .setMinSupport(0.9)
+      .setNumPartitions(1)
+      .run(rdd)
+    assert(model6.freqItemsets.count() === 0)
+
+    val model3 = fpg
+      .setMinSupport(0.5)
+      .setNumPartitions(2)
+      .run(rdd)
+    assert(model3.freqItemsets.first()._1.getClass === Array(1).getClass,
+      "frequent itemsets should use primitive arrays")
+    val freqItemsets3 = model3.freqItemsets.collect().map { case (items, count) =>
+      (items.toSet, count)
+    }
+    val expected = Set(
+      (Set(1), 6L), (Set(2), 5L), (Set(3), 5L), (Set(4), 4L),
+      (Set(1, 2), 4L), (Set(1, 3), 5L), (Set(2, 3), 4L),
+      (Set(2, 4), 4L), (Set(1, 2, 3), 4L))
+    assert(freqItemsets3.toSet === expected)
+
+    val model2 = fpg
+      .setMinSupport(0.3)
+      .setNumPartitions(4)
+      .run(rdd)
+    assert(model2.freqItemsets.count() === 15)
+
+    val model1 = fpg
+      .setMinSupport(0.1)
+      .setNumPartitions(8)
+      .run(rdd)
+    assert(model1.freqItemsets.count() === 65)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/fpm/FPTreeSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/fpm/FPTreeSuite.scala
new file mode 100644
index 0000000000000..04017f67c311d
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/fpm/FPTreeSuite.scala
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.fpm
+
+import scala.language.existentials
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+
+class FPTreeSuite extends FunSuite with MLlibTestSparkContext {
+
+  test("add transaction") {
+    val tree = new FPTree[String]
+      .add(Seq("a", "b", "c"))
+      .add(Seq("a", "b", "y"))
+      .add(Seq("b"))
+
+    assert(tree.root.children.size == 2)
+    assert(tree.root.children.contains("a"))
+    assert(tree.root.children("a").item.equals("a"))
+    assert(tree.root.children("a").count == 2)
+    assert(tree.root.children.contains("b"))
+    assert(tree.root.children("b").item.equals("b"))
+    assert(tree.root.children("b").count == 1)
+    var child = tree.root.children("a")
+    assert(child.children.size == 1)
+    assert(child.children.contains("b"))
+    assert(child.children("b").item.equals("b"))
+    assert(child.children("b").count == 2)
+    child = child.children("b")
+    assert(child.children.size == 2)
+    assert(child.children.contains("c"))
+    assert(child.children.contains("y"))
+    assert(child.children("c").item.equals("c"))
+    assert(child.children("y").item.equals("y"))
+    assert(child.children("c").count == 1)
+    assert(child.children("y").count == 1)
+  }
+
+  test("merge tree") {
+    val tree1 = new FPTree[String]
+      .add(Seq("a", "b", "c"))
+      .add(Seq("a", "b", "y"))
+      .add(Seq("b"))
+
+    val tree2 = new FPTree[String]
+      .add(Seq("a", "b"))
+      .add(Seq("a", "b", "c"))
+      .add(Seq("a", "b", "c", "d"))
+      .add(Seq("a", "x"))
+      .add(Seq("a", "x", "y"))
+      .add(Seq("c", "n"))
+      .add(Seq("c", "m"))
+
+    val tree3 = tree1.merge(tree2)
+
+    assert(tree3.root.children.size == 3)
+    assert(tree3.root.children("a").count == 7)
+    assert(tree3.root.children("b").count == 1)
+    assert(tree3.root.children("c").count == 2)
+    val child1 = tree3.root.children("a")
+    assert(child1.children.size == 2)
+    assert(child1.children("b").count == 5)
+    assert(child1.children("x").count == 2)
+    val child2 = child1.children("b")
+    assert(child2.children.size == 2)
+    assert(child2.children("y").count == 1)
+    assert(child2.children("c").count == 3)
+    val child3 = child2.children("c")
+    assert(child3.children.size == 1)
+    assert(child3.children("d").count == 1)
+    val child4 = child1.children("x")
+    assert(child4.children.size == 1)
+    assert(child4.children("y").count == 1)
+    val child5 = tree3.root.children("c")
+    assert(child5.children.size == 2)
+    assert(child5.children("n").count == 1)
+    assert(child5.children("m").count == 1)
+  }
+
+  test("extract freq itemsets") {
+    val tree = new FPTree[String]
+      .add(Seq("a", "b", "c"))
+      .add(Seq("a", "b", "y"))
+      .add(Seq("a", "b"))
+      .add(Seq("a"))
+      .add(Seq("b"))
+      .add(Seq("b", "n"))
+
+    val freqItemsets = tree.extract(3L).map { case (items, count) =>
+      (items.toSet, count)
+    }.toSet
+    val expected = Set(
+      (Set("a"), 4L),
+      (Set("b"), 5L),
+      (Set("a", "b"), 3L))
+    assert(freqItemsets === expected)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/impl/PeriodicGraphCheckpointerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/impl/PeriodicGraphCheckpointerSuite.scala
new file mode 100644
index 0000000000000..dac28a369b5b2
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/impl/PeriodicGraphCheckpointerSuite.scala
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.impl
+
+import org.scalatest.FunSuite
+
+import org.apache.hadoop.fs.{FileSystem, Path}
+
+import org.apache.spark.SparkContext
+import org.apache.spark.graphx.{Edge, Graph}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
+
+
+class PeriodicGraphCheckpointerSuite extends FunSuite with MLlibTestSparkContext {
+
+  import PeriodicGraphCheckpointerSuite._
+
+  // TODO: Do I need to call count() on the graphs' RDDs?
+
+  test("Persisting") {
+    var graphsToCheck = Seq.empty[GraphToCheck]
+
+    val graph1 = createGraph(sc)
+    val checkpointer = new PeriodicGraphCheckpointer(graph1, None, 10)
+    graphsToCheck = graphsToCheck :+ GraphToCheck(graph1, 1)
+    checkPersistence(graphsToCheck, 1)
+
+    var iteration = 2
+    while (iteration < 9) {
+      val graph = createGraph(sc)
+      checkpointer.updateGraph(graph)
+      graphsToCheck = graphsToCheck :+ GraphToCheck(graph, iteration)
+      checkPersistence(graphsToCheck, iteration)
+      iteration += 1
+    }
+  }
+
+  test("Checkpointing") {
+    val tempDir = Utils.createTempDir()
+    val path = tempDir.toURI.toString
+    val checkpointInterval = 2
+    var graphsToCheck = Seq.empty[GraphToCheck]
+
+    val graph1 = createGraph(sc)
+    val checkpointer = new PeriodicGraphCheckpointer(graph1, Some(path), checkpointInterval)
+    graph1.edges.count()
+    graph1.vertices.count()
+    graphsToCheck = graphsToCheck :+ GraphToCheck(graph1, 1)
+    checkCheckpoint(graphsToCheck, 1, checkpointInterval)
+
+    var iteration = 2
+    while (iteration < 9) {
+      val graph = createGraph(sc)
+      checkpointer.updateGraph(graph)
+      graph.vertices.count()
+      graph.edges.count()
+      graphsToCheck = graphsToCheck :+ GraphToCheck(graph, iteration)
+      checkCheckpoint(graphsToCheck, iteration, checkpointInterval)
+      iteration += 1
+    }
+
+    checkpointer.deleteAllCheckpoints()
+    graphsToCheck.foreach { graph =>
+      confirmCheckpointRemoved(graph.graph)
+    }
+
+    Utils.deleteRecursively(tempDir)
+  }
+}
+
+private object PeriodicGraphCheckpointerSuite {
+
+  case class GraphToCheck(graph: Graph[Double, Double], gIndex: Int)
+
+  val edges = Seq(
+    Edge[Double](0, 1, 0),
+    Edge[Double](1, 2, 0),
+    Edge[Double](2, 3, 0),
+    Edge[Double](3, 4, 0))
+
+  def createGraph(sc: SparkContext): Graph[Double, Double] = {
+    Graph.fromEdges[Double, Double](sc.parallelize(edges), 0)
+  }
+
+  def checkPersistence(graphs: Seq[GraphToCheck], iteration: Int): Unit = {
+    graphs.foreach { g =>
+      checkPersistence(g.graph, g.gIndex, iteration)
+    }
+  }
+
+  /**
+   * Check storage level of graph.
+   * @param gIndex  Index of graph in order inserted into checkpointer (from 1).
+   * @param iteration  Total number of graphs inserted into checkpointer.
+   */
+  def checkPersistence(graph: Graph[_, _], gIndex: Int, iteration: Int): Unit = {
+    try {
+      if (gIndex + 2 < iteration) {
+        assert(graph.vertices.getStorageLevel == StorageLevel.NONE)
+        assert(graph.edges.getStorageLevel == StorageLevel.NONE)
+      } else {
+        assert(graph.vertices.getStorageLevel != StorageLevel.NONE)
+        assert(graph.edges.getStorageLevel != StorageLevel.NONE)
+      }
+    } catch {
+      case _: AssertionError =>
+        throw new Exception(s"PeriodicGraphCheckpointerSuite.checkPersistence failed with:\n" +
+          s"\t gIndex = $gIndex\n" +
+          s"\t iteration = $iteration\n" +
+          s"\t graph.vertices.getStorageLevel = ${graph.vertices.getStorageLevel}\n" +
+          s"\t graph.edges.getStorageLevel = ${graph.edges.getStorageLevel}\n")
+    }
+  }
+
+  def checkCheckpoint(graphs: Seq[GraphToCheck], iteration: Int, checkpointInterval: Int): Unit = {
+    graphs.reverse.foreach { g =>
+      checkCheckpoint(g.graph, g.gIndex, iteration, checkpointInterval)
+    }
+  }
+
+  def confirmCheckpointRemoved(graph: Graph[_, _]): Unit = {
+    // Note: We cannot check graph.isCheckpointed since that value is never updated.
+    //       Instead, we check for the presence of the checkpoint files.
+    //       This test should continue to work even after this graph.isCheckpointed issue
+    //       is fixed (though it can then be simplified and not look for the files).
+    val fs = FileSystem.get(graph.vertices.sparkContext.hadoopConfiguration)
+    graph.getCheckpointFiles.foreach { checkpointFile =>
+      assert(!fs.exists(new Path(checkpointFile)),
+        "Graph checkpoint file should have been removed")
+    }
+  }
+
+  /**
+   * Check checkpointed status of graph.
+   * @param gIndex  Index of graph in order inserted into checkpointer (from 1).
+   * @param iteration  Total number of graphs inserted into checkpointer.
+   */
+  def checkCheckpoint(
+      graph: Graph[_, _],
+      gIndex: Int,
+      iteration: Int,
+      checkpointInterval: Int): Unit = {
+    try {
+      if (gIndex % checkpointInterval == 0) {
+        // We allow 2 checkpoint intervals since we perform an action (checkpointing a second graph)
+        // only AFTER PeriodicGraphCheckpointer decides whether to remove the previous checkpoint.
+        if (iteration - 2 * checkpointInterval < gIndex && gIndex <= iteration) {
+          assert(graph.isCheckpointed, "Graph should be checkpointed")
+          assert(graph.getCheckpointFiles.length == 2, "Graph should have 2 checkpoint files")
+        } else {
+          confirmCheckpointRemoved(graph)
+        }
+      } else {
+        // Graph should never be checkpointed
+        assert(!graph.isCheckpointed, "Graph should never have been checkpointed")
+        assert(graph.getCheckpointFiles.length == 0, "Graph should not have any checkpoint files")
+      }
+    } catch {
+      case e: AssertionError =>
+        throw new Exception(s"PeriodicGraphCheckpointerSuite.checkCheckpoint failed with:\n" +
+          s"\t gIndex = $gIndex\n" +
+          s"\t iteration = $iteration\n" +
+          s"\t checkpointInterval = $checkpointInterval\n" +
+          s"\t graph.isCheckpointed = ${graph.isCheckpointed}\n" +
+          s"\t graph.getCheckpointFiles = ${graph.getCheckpointFiles.mkString(", ")}\n" +
+          s"  AssertionError message: ${e.getMessage}")
+    }
+  }
+
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala
index 5d70c914f14b0..b0b78acd6df16 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala
@@ -127,17 +127,59 @@ class BLASSuite extends FunSuite {
     }
   }
 
-  test("gemm") {
+  test("syr") {
+    val dA = new DenseMatrix(4, 4,
+      Array(0.0, 1.2, 2.2, 3.1, 1.2, 3.2, 5.3, 4.6, 2.2, 5.3, 1.8, 3.0, 3.1, 4.6, 3.0, 0.8))
+    val x = new DenseVector(Array(0.0, 2.7, 3.5, 2.1))
+    val alpha = 0.15
+
+    val expected = new DenseMatrix(4, 4,
+      Array(0.0, 1.2, 2.2, 3.1, 1.2, 4.2935, 6.7175, 5.4505, 2.2, 6.7175, 3.6375, 4.1025, 3.1,
+        5.4505, 4.1025, 1.4615))
+
+    syr(alpha, x, dA)
+
+    assert(dA ~== expected absTol 1e-15)
+ 
+    val dB =
+      new DenseMatrix(3, 4, Array(0.0, 1.2, 2.2, 3.1, 1.2, 3.2, 5.3, 4.6, 2.2, 5.3, 1.8, 3.0))
+
+    withClue("Matrix A must be a symmetric Matrix") {
+      intercept[Exception] {
+        syr(alpha, x, dB)
+      }
+    }
+ 
+    val dC =
+      new DenseMatrix(3, 3, Array(0.0, 1.2, 2.2, 1.2, 3.2, 5.3, 2.2, 5.3, 1.8))
+
+    withClue("Size of vector must match the rank of matrix") {
+      intercept[Exception] {
+        syr(alpha, x, dC)
+      }
+    }
+ 
+    val y = new DenseVector(Array(0.0, 2.7, 3.5, 2.1, 1.5))
 
+    withClue("Size of vector must match the rank of matrix") {
+      intercept[Exception] {
+        syr(alpha, y, dA)
+      }
+    }
+  }
+
+  test("gemm") {
     val dA =
       new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0))
     val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0))
 
     val B = new DenseMatrix(3, 2, Array(1.0, 0.0, 0.0, 0.0, 2.0, 1.0))
     val expected = new DenseMatrix(4, 2, Array(0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 2.0, 3.0))
+    val BTman = new DenseMatrix(2, 3, Array(1.0, 0.0, 0.0, 2.0, 0.0, 1.0))
+    val BT = B.transpose
 
-    assert(dA multiply B ~== expected absTol 1e-15)
-    assert(sA multiply B ~== expected absTol 1e-15)
+    assert(dA.multiply(B) ~== expected absTol 1e-15)
+    assert(sA.multiply(B) ~== expected absTol 1e-15)
 
     val C1 = new DenseMatrix(4, 2, Array(1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0))
     val C2 = C1.copy
@@ -147,6 +189,10 @@ class BLASSuite extends FunSuite {
     val C6 = C1.copy
     val C7 = C1.copy
     val C8 = C1.copy
+    val C9 = C1.copy
+    val C10 = C1.copy
+    val C11 = C1.copy
+    val C12 = C1.copy
     val expected2 = new DenseMatrix(4, 2, Array(2.0, 1.0, 4.0, 2.0, 4.0, 0.0, 4.0, 3.0))
     val expected3 = new DenseMatrix(4, 2, Array(2.0, 2.0, 4.0, 2.0, 8.0, 0.0, 6.0, 6.0))
 
@@ -161,26 +207,40 @@ class BLASSuite extends FunSuite {
 
     withClue("columns of A don't match the rows of B") {
       intercept[Exception] {
-        gemm(true, false, 1.0, dA, B, 2.0, C1)
+        gemm(1.0, dA.transpose, B, 2.0, C1)
       }
     }
 
-    val dAT =
+    val dATman =
       new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0))
-    val sAT =
+    val sATman =
       new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0))
 
-    assert(dAT transposeMultiply B ~== expected absTol 1e-15)
-    assert(sAT transposeMultiply B ~== expected absTol 1e-15)
-
-    gemm(true, false, 1.0, dAT, B, 2.0, C5)
-    gemm(true, false, 1.0, sAT, B, 2.0, C6)
-    gemm(true, false, 2.0, dAT, B, 2.0, C7)
-    gemm(true, false, 2.0, sAT, B, 2.0, C8)
+    val dATT = dATman.transpose
+    val sATT = sATman.transpose
+    val BTT = BTman.transpose.asInstanceOf[DenseMatrix]
+
+    assert(dATT.multiply(B) ~== expected absTol 1e-15)
+    assert(sATT.multiply(B) ~== expected absTol 1e-15)
+    assert(dATT.multiply(BTT) ~== expected absTol 1e-15)
+    assert(sATT.multiply(BTT) ~== expected absTol 1e-15)
+
+    gemm(1.0, dATT, BTT, 2.0, C5)
+    gemm(1.0, sATT, BTT, 2.0, C6)
+    gemm(2.0, dATT, BTT, 2.0, C7)
+    gemm(2.0, sATT, BTT, 2.0, C8)
+    gemm(1.0, dA, BTT, 2.0, C9)
+    gemm(1.0, sA, BTT, 2.0, C10)
+    gemm(2.0, dA, BTT, 2.0, C11)
+    gemm(2.0, sA, BTT, 2.0, C12)
     assert(C5 ~== expected2 absTol 1e-15)
     assert(C6 ~== expected2 absTol 1e-15)
     assert(C7 ~== expected3 absTol 1e-15)
     assert(C8 ~== expected3 absTol 1e-15)
+    assert(C9 ~== expected2 absTol 1e-15)
+    assert(C10 ~== expected2 absTol 1e-15)
+    assert(C11 ~== expected3 absTol 1e-15)
+    assert(C12 ~== expected3 absTol 1e-15)
   }
 
   test("gemv") {
@@ -192,17 +252,13 @@ class BLASSuite extends FunSuite {
     val x = new DenseVector(Array(1.0, 2.0, 3.0))
     val expected = new DenseVector(Array(4.0, 1.0, 2.0, 9.0))
 
-    assert(dA multiply x ~== expected absTol 1e-15)
-    assert(sA multiply x ~== expected absTol 1e-15)
+    assert(dA.multiply(x) ~== expected absTol 1e-15)
+    assert(sA.multiply(x) ~== expected absTol 1e-15)
 
     val y1 = new DenseVector(Array(1.0, 3.0, 1.0, 0.0))
     val y2 = y1.copy
     val y3 = y1.copy
     val y4 = y1.copy
-    val y5 = y1.copy
-    val y6 = y1.copy
-    val y7 = y1.copy
-    val y8 = y1.copy
     val expected2 = new DenseVector(Array(6.0, 7.0, 4.0, 9.0))
     val expected3 = new DenseVector(Array(10.0, 8.0, 6.0, 18.0))
 
@@ -216,25 +272,18 @@ class BLASSuite extends FunSuite {
     assert(y4 ~== expected3 absTol 1e-15)
     withClue("columns of A don't match the rows of B") {
       intercept[Exception] {
-        gemv(true, 1.0, dA, x, 2.0, y1)
+        gemv(1.0, dA.transpose, x, 2.0, y1)
       }
     }
-
     val dAT =
       new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0))
     val sAT =
       new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0))
 
-    assert(dAT transposeMultiply x ~== expected absTol 1e-15)
-    assert(sAT transposeMultiply x ~== expected absTol 1e-15)
-
-    gemv(true, 1.0, dAT, x, 2.0, y5)
-    gemv(true, 1.0, sAT, x, 2.0, y6)
-    gemv(true, 2.0, dAT, x, 2.0, y7)
-    gemv(true, 2.0, sAT, x, 2.0, y8)
-    assert(y5 ~== expected2 absTol 1e-15)
-    assert(y6 ~== expected2 absTol 1e-15)
-    assert(y7 ~== expected3 absTol 1e-15)
-    assert(y8 ~== expected3 absTol 1e-15)
+    val dATT = dAT.transpose
+    val sATT = sAT.transpose
+
+    assert(dATT.multiply(x) ~== expected absTol 1e-15)
+    assert(sATT.multiply(x) ~== expected absTol 1e-15)
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala
index 73a6d3a27d868..2031032373971 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala
@@ -36,6 +36,11 @@ class BreezeMatrixConversionSuite extends FunSuite {
     assert(mat.numRows === breeze.rows)
     assert(mat.numCols === breeze.cols)
     assert(mat.values.eq(breeze.data), "should not copy data")
+    // transposed matrix
+    val matTransposed = Matrices.fromBreeze(breeze.t).asInstanceOf[DenseMatrix]
+    assert(matTransposed.numRows === breeze.cols)
+    assert(matTransposed.numCols === breeze.rows)
+    assert(matTransposed.values.eq(breeze.data), "should not copy data")
   }
 
   test("sparse matrix to breeze") {
@@ -58,5 +63,9 @@ class BreezeMatrixConversionSuite extends FunSuite {
     assert(mat.numRows === breeze.rows)
     assert(mat.numCols === breeze.cols)
     assert(mat.values.eq(breeze.data), "should not copy data")
+    val matTransposed = Matrices.fromBreeze(breeze.t).asInstanceOf[SparseMatrix]
+    assert(matTransposed.numRows === breeze.cols)
+    assert(matTransposed.numCols === breeze.rows)
+    assert(!matTransposed.values.eq(breeze.data), "has to copy data")
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
index 5f8b8c4b72697..c098b5458fe6b 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
@@ -17,7 +17,14 @@
 
 package org.apache.spark.mllib.linalg
 
+import java.util.Random
+
+import org.mockito.Mockito.when
 import org.scalatest.FunSuite
+import org.scalatest.mock.MockitoSugar._
+import scala.collection.mutable.{Map => MutableMap}
+
+import org.apache.spark.mllib.util.TestingUtils._
 
 class MatricesSuite extends FunSuite {
   test("dense matrix construction") {
@@ -28,7 +35,6 @@ class MatricesSuite extends FunSuite {
     assert(mat.numRows === m)
     assert(mat.numCols === n)
     assert(mat.values.eq(values), "should not copy data")
-    assert(mat.toArray.eq(values), "toArray should not copy data")
   }
 
   test("dense matrix construction with wrong dimension") {
@@ -39,9 +45,9 @@ class MatricesSuite extends FunSuite {
 
   test("sparse matrix construction") {
     val m = 3
-    val n = 2
+    val n = 4
     val values = Array(1.0, 2.0, 4.0, 5.0)
-    val colPtrs = Array(0, 2, 4)
+    val colPtrs = Array(0, 2, 2, 4, 4)
     val rowIndices = Array(1, 2, 1, 2)
     val mat = Matrices.sparse(m, n, colPtrs, rowIndices, values).asInstanceOf[SparseMatrix]
     assert(mat.numRows === m)
@@ -49,6 +55,13 @@ class MatricesSuite extends FunSuite {
     assert(mat.values.eq(values), "should not copy data")
     assert(mat.colPtrs.eq(colPtrs), "should not copy data")
     assert(mat.rowIndices.eq(rowIndices), "should not copy data")
+
+    val entries: Array[(Int, Int, Double)] = Array((2, 2, 3.0), (1, 0, 1.0), (2, 0, 2.0),
+        (1, 2, 2.0), (2, 2, 2.0), (1, 2, 2.0), (0, 0, 0.0))
+
+    val mat2 = SparseMatrix.fromCOO(m, n, entries)
+    assert(mat.toBreeze === mat2.toBreeze)
+    assert(mat2.values.length == 4)
   }
 
   test("sparse matrix construction with wrong number of elements") {
@@ -112,4 +125,303 @@ class MatricesSuite extends FunSuite {
     assert(sparseMat(0, 1) === 10.0)
     assert(sparseMat.values(2) === 10.0)
   }
+
+  test("toSparse, toDense") {
+    val m = 3
+    val n = 2
+    val values = Array(1.0, 2.0, 4.0, 5.0)
+    val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(0, 1, 1, 2)
+
+    val spMat1 = new SparseMatrix(m, n, colPtrs, rowIndices, values)
+    val deMat1 = new DenseMatrix(m, n, allValues)
+
+    val spMat2 = deMat1.toSparse
+    val deMat2 = spMat1.toDense
+
+    assert(spMat1.toBreeze === spMat2.toBreeze)
+    assert(deMat1.toBreeze === deMat2.toBreeze)
+  }
+
+  test("map, update") {
+    val m = 3
+    val n = 2
+    val values = Array(1.0, 2.0, 4.0, 5.0)
+    val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(0, 1, 1, 2)
+
+    val spMat1 = new SparseMatrix(m, n, colPtrs, rowIndices, values)
+    val deMat1 = new DenseMatrix(m, n, allValues)
+    val deMat2 = deMat1.map(_ * 2)
+    val spMat2 = spMat1.map(_ * 2)
+    deMat1.update(_ * 2)
+    spMat1.update(_ * 2)
+
+    assert(spMat1.toArray === spMat2.toArray)
+    assert(deMat1.toArray === deMat2.toArray)
+  }
+
+  test("transpose") {
+    val dA =
+      new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0))
+    val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0))
+
+    val dAT = dA.transpose.asInstanceOf[DenseMatrix]
+    val sAT = sA.transpose.asInstanceOf[SparseMatrix]
+    val dATexpected =
+      new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0))
+    val sATexpected =
+      new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0))
+
+    assert(dAT.toBreeze === dATexpected.toBreeze)
+    assert(sAT.toBreeze === sATexpected.toBreeze)
+    assert(dA(1, 0) === dAT(0, 1))
+    assert(dA(2, 1) === dAT(1, 2))
+    assert(sA(1, 0) === sAT(0, 1))
+    assert(sA(2, 1) === sAT(1, 2))
+
+    assert(!dA.toArray.eq(dAT.toArray), "has to have a new array")
+    assert(dA.values.eq(dAT.transpose.asInstanceOf[DenseMatrix].values), "should not copy array")
+
+    assert(dAT.toSparse.toBreeze === sATexpected.toBreeze)
+    assert(sAT.toDense.toBreeze === dATexpected.toBreeze)
+  }
+
+  test("foreachActive") {
+    val m = 3
+    val n = 2
+    val values = Array(1.0, 2.0, 4.0, 5.0)
+    val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(0, 1, 1, 2)
+
+    val sp = new SparseMatrix(m, n, colPtrs, rowIndices, values)
+    val dn = new DenseMatrix(m, n, allValues)
+
+    val dnMap = MutableMap[(Int, Int), Double]()
+    dn.foreachActive { (i, j, value) =>
+      dnMap.put((i, j), value)
+    }
+    assert(dnMap.size === 6)
+    assert(dnMap(0, 0) === 1.0)
+    assert(dnMap(1, 0) === 2.0)
+    assert(dnMap(2, 0) === 0.0)
+    assert(dnMap(0, 1) === 0.0)
+    assert(dnMap(1, 1) === 4.0)
+    assert(dnMap(2, 1) === 5.0)
+
+    val spMap = MutableMap[(Int, Int), Double]()
+    sp.foreachActive { (i, j, value) =>
+      spMap.put((i, j), value)
+    }
+    assert(spMap.size === 4)
+    assert(spMap(0, 0) === 1.0)
+    assert(spMap(1, 0) === 2.0)
+    assert(spMap(1, 1) === 4.0)
+    assert(spMap(2, 1) === 5.0)
+  }
+
+  test("horzcat, vertcat, eye, speye") {
+    val m = 3
+    val n = 2
+    val values = Array(1.0, 2.0, 4.0, 5.0)
+    val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(0, 1, 1, 2)
+    // transposed versions
+    val allValuesT = Array(1.0, 0.0, 2.0, 4.0, 0.0, 5.0)
+    val colPtrsT = Array(0, 1, 3, 4)
+    val rowIndicesT = Array(0, 0, 1, 1)
+
+    val spMat1 = new SparseMatrix(m, n, colPtrs, rowIndices, values)
+    val deMat1 = new DenseMatrix(m, n, allValues)
+    val spMat1T = new SparseMatrix(n, m, colPtrsT, rowIndicesT, values)
+    val deMat1T = new DenseMatrix(n, m, allValuesT)
+
+    // should equal spMat1 & deMat1 respectively
+    val spMat1TT = spMat1T.transpose
+    val deMat1TT = deMat1T.transpose
+
+    val deMat2 = Matrices.eye(3)
+    val spMat2 = Matrices.speye(3)
+    val deMat3 = Matrices.eye(2)
+    val spMat3 = Matrices.speye(2)
+
+    val spHorz = Matrices.horzcat(Array(spMat1, spMat2))
+    val spHorz2 = Matrices.horzcat(Array(spMat1, deMat2))
+    val spHorz3 = Matrices.horzcat(Array(deMat1, spMat2))
+    val deHorz1 = Matrices.horzcat(Array(deMat1, deMat2))
+    val deHorz2 = Matrices.horzcat(Array[Matrix]())
+
+    assert(deHorz1.numRows === 3)
+    assert(spHorz2.numRows === 3)
+    assert(spHorz3.numRows === 3)
+    assert(spHorz.numRows === 3)
+    assert(deHorz1.numCols === 5)
+    assert(spHorz2.numCols === 5)
+    assert(spHorz3.numCols === 5)
+    assert(spHorz.numCols === 5)
+    assert(deHorz2.numRows === 0)
+    assert(deHorz2.numCols === 0)
+    assert(deHorz2.toArray.length === 0)
+
+    assert(deHorz1 ~== spHorz2.asInstanceOf[SparseMatrix].toDense absTol 1e-15)
+    assert(spHorz2 ~== spHorz3 absTol 1e-15)
+    assert(spHorz(0, 0) === 1.0)
+    assert(spHorz(2, 1) === 5.0)
+    assert(spHorz(0, 2) === 1.0)
+    assert(spHorz(1, 2) === 0.0)
+    assert(spHorz(1, 3) === 1.0)
+    assert(spHorz(2, 4) === 1.0)
+    assert(spHorz(1, 4) === 0.0)
+    assert(deHorz1(0, 0) === 1.0)
+    assert(deHorz1(2, 1) === 5.0)
+    assert(deHorz1(0, 2) === 1.0)
+    assert(deHorz1(1, 2) == 0.0)
+    assert(deHorz1(1, 3) === 1.0)
+    assert(deHorz1(2, 4) === 1.0)
+    assert(deHorz1(1, 4) === 0.0)
+
+    // containing transposed matrices
+    val spHorzT = Matrices.horzcat(Array(spMat1TT, spMat2))
+    val spHorz2T = Matrices.horzcat(Array(spMat1TT, deMat2))
+    val spHorz3T = Matrices.horzcat(Array(deMat1TT, spMat2))
+    val deHorz1T = Matrices.horzcat(Array(deMat1TT, deMat2))
+
+    assert(deHorz1T ~== deHorz1 absTol 1e-15)
+    assert(spHorzT ~== spHorz absTol 1e-15)
+    assert(spHorz2T ~== spHorz2 absTol 1e-15)
+    assert(spHorz3T ~== spHorz3 absTol 1e-15)
+
+    intercept[IllegalArgumentException] {
+      Matrices.horzcat(Array(spMat1, spMat3))
+    }
+
+    intercept[IllegalArgumentException] {
+      Matrices.horzcat(Array(deMat1, spMat3))
+    }
+
+    val spVert = Matrices.vertcat(Array(spMat1, spMat3))
+    val deVert1 = Matrices.vertcat(Array(deMat1, deMat3))
+    val spVert2 = Matrices.vertcat(Array(spMat1, deMat3))
+    val spVert3 = Matrices.vertcat(Array(deMat1, spMat3))
+    val deVert2 = Matrices.vertcat(Array[Matrix]())
+
+    assert(deVert1.numRows === 5)
+    assert(spVert2.numRows === 5)
+    assert(spVert3.numRows === 5)
+    assert(spVert.numRows === 5)
+    assert(deVert1.numCols === 2)
+    assert(spVert2.numCols === 2)
+    assert(spVert3.numCols === 2)
+    assert(spVert.numCols === 2)
+    assert(deVert2.numRows === 0)
+    assert(deVert2.numCols === 0)
+    assert(deVert2.toArray.length === 0)
+
+    assert(deVert1 ~== spVert2.asInstanceOf[SparseMatrix].toDense absTol 1e-15)
+    assert(spVert2 ~== spVert3 absTol 1e-15)
+    assert(spVert(0, 0) === 1.0)
+    assert(spVert(2, 1) === 5.0)
+    assert(spVert(3, 0) === 1.0)
+    assert(spVert(3, 1) === 0.0)
+    assert(spVert(4, 1) === 1.0)
+    assert(deVert1(0, 0) === 1.0)
+    assert(deVert1(2, 1) === 5.0)
+    assert(deVert1(3, 0) === 1.0)
+    assert(deVert1(3, 1) === 0.0)
+    assert(deVert1(4, 1) === 1.0)
+
+    // containing transposed matrices
+    val spVertT = Matrices.vertcat(Array(spMat1TT, spMat3))
+    val deVert1T = Matrices.vertcat(Array(deMat1TT, deMat3))
+    val spVert2T = Matrices.vertcat(Array(spMat1TT, deMat3))
+    val spVert3T = Matrices.vertcat(Array(deMat1TT, spMat3))
+
+    assert(deVert1T ~== deVert1 absTol 1e-15)
+    assert(spVertT ~== spVert absTol 1e-15)
+    assert(spVert2T ~== spVert2 absTol 1e-15)
+    assert(spVert3T ~== spVert3 absTol 1e-15)
+
+    intercept[IllegalArgumentException] {
+      Matrices.vertcat(Array(spMat1, spMat2))
+    }
+
+    intercept[IllegalArgumentException] {
+      Matrices.vertcat(Array(deMat1, spMat2))
+    }
+  }
+
+  test("zeros") {
+    val mat = Matrices.zeros(2, 3).asInstanceOf[DenseMatrix]
+    assert(mat.numRows === 2)
+    assert(mat.numCols === 3)
+    assert(mat.values.forall(_ == 0.0))
+  }
+
+  test("ones") {
+    val mat = Matrices.ones(2, 3).asInstanceOf[DenseMatrix]
+    assert(mat.numRows === 2)
+    assert(mat.numCols === 3)
+    assert(mat.values.forall(_ == 1.0))
+  }
+
+  test("eye") {
+    val mat = Matrices.eye(2).asInstanceOf[DenseMatrix]
+    assert(mat.numCols === 2)
+    assert(mat.numCols === 2)
+    assert(mat.values.toSeq === Seq(1.0, 0.0, 0.0, 1.0))
+  }
+
+  test("rand") {
+    val rng = mock[Random]
+    when(rng.nextDouble()).thenReturn(1.0, 2.0, 3.0, 4.0)
+    val mat = Matrices.rand(2, 2, rng).asInstanceOf[DenseMatrix]
+    assert(mat.numRows === 2)
+    assert(mat.numCols === 2)
+    assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
+  }
+
+  test("randn") {
+    val rng = mock[Random]
+    when(rng.nextGaussian()).thenReturn(1.0, 2.0, 3.0, 4.0)
+    val mat = Matrices.randn(2, 2, rng).asInstanceOf[DenseMatrix]
+    assert(mat.numRows === 2)
+    assert(mat.numCols === 2)
+    assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
+  }
+
+  test("diag") {
+    val mat = Matrices.diag(Vectors.dense(1.0, 2.0)).asInstanceOf[DenseMatrix]
+    assert(mat.numRows === 2)
+    assert(mat.numCols === 2)
+    assert(mat.values.toSeq === Seq(1.0, 0.0, 0.0, 2.0))
+  }
+
+  test("sprand") {
+    val rng = mock[Random]
+    when(rng.nextInt(4)).thenReturn(0, 1, 1, 3, 2, 2, 0, 1, 3, 0)
+    when(rng.nextDouble()).thenReturn(1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)
+    val mat = SparseMatrix.sprand(4, 4, 0.25, rng)
+    assert(mat.numRows === 4)
+    assert(mat.numCols === 4)
+    assert(mat.rowIndices.toSeq === Seq(3, 0, 2, 1))
+    assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
+    val mat2 = SparseMatrix.sprand(2, 3, 1.0, rng)
+    assert(mat2.rowIndices.toSeq === Seq(0, 1, 0, 1, 0, 1))
+    assert(mat2.colPtrs.toSeq === Seq(0, 2, 4, 6))
+  }
+
+  test("sprandn") {
+    val rng = mock[Random]
+    when(rng.nextInt(4)).thenReturn(0, 1, 1, 3, 2, 2, 0, 1, 3, 0)
+    when(rng.nextGaussian()).thenReturn(1.0, 2.0, 3.0, 4.0)
+    val mat = SparseMatrix.sprandn(4, 4, 0.25, rng)
+    assert(mat.numRows === 4)
+    assert(mat.numCols === 4)
+    assert(mat.rowIndices.toSeq === Seq(3, 0, 2, 1))
+    assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
index cd651fe2d2ddf..5def899cea117 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
@@ -17,9 +17,13 @@
 
 package org.apache.spark.mllib.linalg
 
+import scala.util.Random
+
+import breeze.linalg.{DenseMatrix => BDM, squaredDistance => breezeSquaredDistance}
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkException
+import org.apache.spark.mllib.util.TestingUtils._
 
 class VectorsSuite extends FunSuite {
 
@@ -85,6 +89,24 @@ class VectorsSuite extends FunSuite {
     }
   }
 
+  test("vectors equals with explicit 0") {
+    val dv1 = Vectors.dense(Array(0, 0.9, 0, 0.8, 0))
+    val sv1 = Vectors.sparse(5, Array(1, 3), Array(0.9, 0.8))
+    val sv2 = Vectors.sparse(5, Array(0, 1, 2, 3, 4), Array(0, 0.9, 0, 0.8, 0))
+
+    val vectors = Seq(dv1, sv1, sv2)
+    for (v <- vectors; u <- vectors) {
+      assert(v === u)
+      assert(v.## === u.##)
+    }
+
+    val another = Vectors.sparse(5, Array(0, 1, 3), Array(0, 0.9, 0.2))
+    for (v <- vectors) {
+      assert(v != another)
+      assert(v.## != another.##)
+    }
+  }
+
   test("indexing dense vectors") {
     val vec = Vectors.dense(1.0, 2.0, 3.0, 4.0)
     assert(vec(0) === 1.0)
@@ -155,4 +177,95 @@ class VectorsSuite extends FunSuite {
         throw new RuntimeException(s"copy returned ${dvCopy.getClass} on ${dv.getClass}.")
     }
   }
+
+  test("VectorUDT") {
+    val dv0 = Vectors.dense(Array.empty[Double])
+    val dv1 = Vectors.dense(1.0, 2.0)
+    val sv0 = Vectors.sparse(2, Array.empty, Array.empty)
+    val sv1 = Vectors.sparse(2, Array(1), Array(2.0))
+    val udt = new VectorUDT()
+    for (v <- Seq(dv0, dv1, sv0, sv1)) {
+      assert(v === udt.deserialize(udt.serialize(v)))
+    }
+  }
+
+  test("fromBreeze") {
+    val x = BDM.zeros[Double](10, 10)
+    val v = Vectors.fromBreeze(x(::, 0))
+    assert(v.size === x.rows)
+  }
+
+  test("sqdist") {
+    val random = new Random()
+    for (m <- 1 until 1000 by 100) {
+      val nnz = random.nextInt(m)
+
+      val indices1 = random.shuffle(0 to m - 1).slice(0, nnz).sorted.toArray
+      val values1 = Array.fill(nnz)(random.nextDouble)
+      val sparseVector1 = Vectors.sparse(m, indices1, values1)
+
+      val indices2 = random.shuffle(0 to m - 1).slice(0, nnz).sorted.toArray
+      val values2 = Array.fill(nnz)(random.nextDouble)
+      val sparseVector2 = Vectors.sparse(m, indices2, values2)
+
+      val denseVector1 = Vectors.dense(sparseVector1.toArray)
+      val denseVector2 = Vectors.dense(sparseVector2.toArray)
+
+      val squaredDist = breezeSquaredDistance(sparseVector1.toBreeze, sparseVector2.toBreeze)
+
+      // SparseVector vs. SparseVector 
+      assert(Vectors.sqdist(sparseVector1, sparseVector2) ~== squaredDist relTol 1E-8) 
+      // DenseVector  vs. SparseVector
+      assert(Vectors.sqdist(denseVector1, sparseVector2) ~== squaredDist relTol 1E-8)
+      // DenseVector  vs. DenseVector
+      assert(Vectors.sqdist(denseVector1, denseVector2) ~== squaredDist relTol 1E-8)
+    }    
+  }
+
+  test("foreachActive") {
+    val dv = Vectors.dense(0.0, 1.2, 3.1, 0.0)
+    val sv = Vectors.sparse(4, Seq((1, 1.2), (2, 3.1), (3, 0.0)))
+
+    val dvMap = scala.collection.mutable.Map[Int, Double]()
+    dv.foreachActive { (index, value) =>
+      dvMap.put(index, value)
+    }
+    assert(dvMap.size === 4)
+    assert(dvMap.get(0) === Some(0.0))
+    assert(dvMap.get(1) === Some(1.2))
+    assert(dvMap.get(2) === Some(3.1))
+    assert(dvMap.get(3) === Some(0.0))
+
+    val svMap = scala.collection.mutable.Map[Int, Double]()
+    sv.foreachActive { (index, value) =>
+      svMap.put(index, value)
+    }
+    assert(svMap.size === 3)
+    assert(svMap.get(1) === Some(1.2))
+    assert(svMap.get(2) === Some(3.1))
+    assert(svMap.get(3) === Some(0.0))
+  }
+
+  test("vector p-norm") {
+    val dv = Vectors.dense(0.0, -1.2, 3.1, 0.0, -4.5, 1.9)
+    val sv = Vectors.sparse(6, Seq((1, -1.2), (2, 3.1), (3, 0.0), (4, -4.5), (5, 1.9)))
+
+    assert(Vectors.norm(dv, 1.0) ~== dv.toArray.foldLeft(0.0)((a, v) =>
+      a + math.abs(v)) relTol 1E-8)
+    assert(Vectors.norm(sv, 1.0) ~== sv.toArray.foldLeft(0.0)((a, v) =>
+      a + math.abs(v)) relTol 1E-8)
+
+    assert(Vectors.norm(dv, 2.0) ~== math.sqrt(dv.toArray.foldLeft(0.0)((a, v) =>
+      a + v * v)) relTol 1E-8)
+    assert(Vectors.norm(sv, 2.0) ~== math.sqrt(sv.toArray.foldLeft(0.0)((a, v) =>
+      a + v * v)) relTol 1E-8)
+
+    assert(Vectors.norm(dv, Double.PositiveInfinity) ~== dv.toArray.map(math.abs).max relTol 1E-8)
+    assert(Vectors.norm(sv, Double.PositiveInfinity) ~== sv.toArray.map(math.abs).max relTol 1E-8)
+
+    assert(Vectors.norm(dv, 3.7) ~== math.pow(dv.toArray.foldLeft(0.0)((a, v) =>
+      a + math.pow(math.abs(v), 3.7)), 1.0 / 3.7) relTol 1E-8)
+    assert(Vectors.norm(sv, 3.7) ~== math.pow(sv.toArray.foldLeft(0.0)((a, v) =>
+      a + math.pow(math.abs(v), 3.7)), 1.0 / 3.7) relTol 1E-8)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
new file mode 100644
index 0000000000000..949d1c9939570
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
@@ -0,0 +1,298 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.linalg.distributed
+
+import java.{util => ju}
+
+import breeze.linalg.{DenseMatrix => BDM}
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkException
+import org.apache.spark.mllib.linalg.{SparseMatrix, DenseMatrix, Matrices, Matrix}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+
+class BlockMatrixSuite extends FunSuite with MLlibTestSparkContext {
+
+  val m = 5
+  val n = 4
+  val rowPerPart = 2
+  val colPerPart = 2
+  val numPartitions = 3
+  var gridBasedMat: BlockMatrix = _
+
+  override def beforeAll() {
+    super.beforeAll()
+    val blocks: Seq[((Int, Int), Matrix)] = Seq(
+      ((0, 0), new DenseMatrix(2, 2, Array(1.0, 0.0, 0.0, 2.0))),
+      ((0, 1), new DenseMatrix(2, 2, Array(0.0, 1.0, 0.0, 0.0))),
+      ((1, 0), new DenseMatrix(2, 2, Array(3.0, 0.0, 1.0, 1.0))),
+      ((1, 1), new DenseMatrix(2, 2, Array(1.0, 2.0, 0.0, 1.0))),
+      ((2, 1), new DenseMatrix(1, 2, Array(1.0, 5.0))))
+
+    gridBasedMat = new BlockMatrix(sc.parallelize(blocks, numPartitions), rowPerPart, colPerPart)
+  }
+
+  test("size") {
+    assert(gridBasedMat.numRows() === m)
+    assert(gridBasedMat.numCols() === n)
+  }
+
+  test("grid partitioner") {
+    val random = new ju.Random()
+    // This should generate a 4x4 grid of 1x2 blocks.
+    val part0 = GridPartitioner(4, 7, suggestedNumPartitions = 12)
+    val expected0 = Array(
+      Array(0, 0, 4, 4,  8,  8, 12),
+      Array(1, 1, 5, 5,  9,  9, 13),
+      Array(2, 2, 6, 6, 10, 10, 14),
+      Array(3, 3, 7, 7, 11, 11, 15))
+    for (i <- 0 until 4; j <- 0 until 7) {
+      assert(part0.getPartition((i, j)) === expected0(i)(j))
+      assert(part0.getPartition((i, j, random.nextInt())) === expected0(i)(j))
+    }
+
+    intercept[IllegalArgumentException] {
+      part0.getPartition((-1, 0))
+    }
+
+    intercept[IllegalArgumentException] {
+      part0.getPartition((4, 0))
+    }
+
+    intercept[IllegalArgumentException] {
+      part0.getPartition((0, -1))
+    }
+
+    intercept[IllegalArgumentException] {
+      part0.getPartition((0, 7))
+    }
+
+    val part1 = GridPartitioner(2, 2, suggestedNumPartitions = 5)
+    val expected1 = Array(
+      Array(0, 2),
+      Array(1, 3))
+    for (i <- 0 until 2; j <- 0 until 2) {
+      assert(part1.getPartition((i, j)) === expected1(i)(j))
+      assert(part1.getPartition((i, j, random.nextInt())) === expected1(i)(j))
+    }
+
+    val part2 = GridPartitioner(2, 2, suggestedNumPartitions = 5)
+    assert(part0 !== part2)
+    assert(part1 === part2)
+
+    val part3 = new GridPartitioner(2, 3, rowsPerPart = 1, colsPerPart = 2)
+    val expected3 = Array(
+      Array(0, 0, 2),
+      Array(1, 1, 3))
+    for (i <- 0 until 2; j <- 0 until 3) {
+      assert(part3.getPartition((i, j)) === expected3(i)(j))
+      assert(part3.getPartition((i, j, random.nextInt())) === expected3(i)(j))
+    }
+
+    val part4 = GridPartitioner(2, 3, rowsPerPart = 1, colsPerPart = 2)
+    assert(part3 === part4)
+
+    intercept[IllegalArgumentException] {
+      new GridPartitioner(2, 2, rowsPerPart = 0, colsPerPart = 1)
+    }
+
+    intercept[IllegalArgumentException] {
+      GridPartitioner(2, 2, rowsPerPart = 1, colsPerPart = 0)
+    }
+
+    intercept[IllegalArgumentException] {
+      GridPartitioner(2, 2, suggestedNumPartitions = 0)
+    }
+  }
+
+  test("toCoordinateMatrix") {
+    val coordMat = gridBasedMat.toCoordinateMatrix()
+    assert(coordMat.numRows() === m)
+    assert(coordMat.numCols() === n)
+    assert(coordMat.toBreeze() === gridBasedMat.toBreeze())
+  }
+
+  test("toIndexedRowMatrix") {
+    val rowMat = gridBasedMat.toIndexedRowMatrix()
+    assert(rowMat.numRows() === m)
+    assert(rowMat.numCols() === n)
+    assert(rowMat.toBreeze() === gridBasedMat.toBreeze())
+  }
+
+  test("toBreeze and toLocalMatrix") {
+    val expected = BDM(
+      (1.0, 0.0, 0.0, 0.0),
+      (0.0, 2.0, 1.0, 0.0),
+      (3.0, 1.0, 1.0, 0.0),
+      (0.0, 1.0, 2.0, 1.0),
+      (0.0, 0.0, 1.0, 5.0))
+
+    val dense = Matrices.fromBreeze(expected).asInstanceOf[DenseMatrix]
+    assert(gridBasedMat.toLocalMatrix() === dense)
+    assert(gridBasedMat.toBreeze() === expected)
+  }
+
+  test("add") {
+    val blocks: Seq[((Int, Int), Matrix)] = Seq(
+      ((0, 0), new DenseMatrix(2, 2, Array(1.0, 0.0, 0.0, 2.0))),
+      ((0, 1), new DenseMatrix(2, 2, Array(0.0, 1.0, 0.0, 0.0))),
+      ((1, 0), new DenseMatrix(2, 2, Array(3.0, 0.0, 1.0, 1.0))),
+      ((1, 1), new DenseMatrix(2, 2, Array(1.0, 2.0, 0.0, 1.0))),
+      ((2, 0), new DenseMatrix(1, 2, Array(1.0, 0.0))), // Added block that doesn't exist in A
+      ((2, 1), new DenseMatrix(1, 2, Array(1.0, 5.0))))
+    val rdd = sc.parallelize(blocks, numPartitions)
+    val B = new BlockMatrix(rdd, rowPerPart, colPerPart)
+
+    val expected = BDM(
+      (2.0, 0.0, 0.0, 0.0),
+      (0.0, 4.0, 2.0, 0.0),
+      (6.0, 2.0, 2.0, 0.0),
+      (0.0, 2.0, 4.0, 2.0),
+      (1.0, 0.0, 2.0, 10.0))
+
+    val AplusB = gridBasedMat.add(B)
+    assert(AplusB.numRows() === m)
+    assert(AplusB.numCols() === B.numCols())
+    assert(AplusB.toBreeze() === expected)
+
+    val C = new BlockMatrix(rdd, rowPerPart, colPerPart, m, n + 1) // columns don't match
+    intercept[IllegalArgumentException] {
+      gridBasedMat.add(C)
+    }
+    val largerBlocks: Seq[((Int, Int), Matrix)] = Seq(
+      ((0, 0), new DenseMatrix(4, 4, new Array[Double](16))),
+      ((1, 0), new DenseMatrix(1, 4, Array(1.0, 0.0, 1.0, 5.0))))
+    val C2 = new BlockMatrix(sc.parallelize(largerBlocks, numPartitions), 4, 4, m, n)
+    intercept[SparkException] { // partitioning doesn't match
+      gridBasedMat.add(C2)
+    }
+    // adding BlockMatrices composed of SparseMatrices
+    val sparseBlocks = for (i <- 0 until 4) yield ((i / 2, i % 2), SparseMatrix.speye(4))
+    val denseBlocks = for (i <- 0 until 4) yield ((i / 2, i % 2), DenseMatrix.eye(4))
+    val sparseBM = new BlockMatrix(sc.makeRDD(sparseBlocks, 4), 4, 4, 8, 8)
+    val denseBM = new BlockMatrix(sc.makeRDD(denseBlocks, 4), 4, 4, 8, 8)
+
+    assert(sparseBM.add(sparseBM).toBreeze() === sparseBM.add(denseBM).toBreeze())
+  }
+
+  test("multiply") {
+    // identity matrix
+    val blocks: Seq[((Int, Int), Matrix)] = Seq(
+      ((0, 0), new DenseMatrix(2, 2, Array(1.0, 0.0, 0.0, 1.0))),
+      ((1, 1), new DenseMatrix(2, 2, Array(1.0, 0.0, 0.0, 1.0))))
+    val rdd = sc.parallelize(blocks, 2)
+    val B = new BlockMatrix(rdd, colPerPart, rowPerPart)
+    val expected = BDM(
+      (1.0, 0.0, 0.0, 0.0),
+      (0.0, 2.0, 1.0, 0.0),
+      (3.0, 1.0, 1.0, 0.0),
+      (0.0, 1.0, 2.0, 1.0),
+      (0.0, 0.0, 1.0, 5.0))
+
+    val AtimesB = gridBasedMat.multiply(B)
+    assert(AtimesB.numRows() === m)
+    assert(AtimesB.numCols() === n)
+    assert(AtimesB.toBreeze() === expected)
+    val C = new BlockMatrix(rdd, rowPerPart, colPerPart, m + 1, n) // dimensions don't match
+    intercept[IllegalArgumentException] {
+      gridBasedMat.multiply(C)
+    }
+    val largerBlocks = Seq(((0, 0), DenseMatrix.eye(4)))
+    val C2 = new BlockMatrix(sc.parallelize(largerBlocks, numPartitions), 4, 4)
+    intercept[SparkException] {
+      // partitioning doesn't match
+      gridBasedMat.multiply(C2)
+    }
+    val rand = new ju.Random(42)
+    val largerAblocks = for (i <- 0 until 20) yield ((i % 5, i / 5), DenseMatrix.rand(6, 4, rand))
+    val largerBblocks = for (i <- 0 until 16) yield ((i % 4, i / 4), DenseMatrix.rand(4, 4, rand))
+
+    // Try it with increased number of partitions
+    val largeA = new BlockMatrix(sc.parallelize(largerAblocks, 10), 6, 4)
+    val largeB = new BlockMatrix(sc.parallelize(largerBblocks, 8), 4, 4)
+    val largeC = largeA.multiply(largeB)
+    val localC = largeC.toLocalMatrix()
+    val result = largeA.toLocalMatrix().multiply(largeB.toLocalMatrix().asInstanceOf[DenseMatrix])
+    assert(largeC.numRows() === largeA.numRows())
+    assert(largeC.numCols() === largeB.numCols())
+    assert(localC ~== result absTol 1e-8)
+  }
+
+  test("validate") {
+    // No error
+    gridBasedMat.validate()
+    // Wrong MatrixBlock dimensions
+    val blocks: Seq[((Int, Int), Matrix)] = Seq(
+      ((0, 0), new DenseMatrix(2, 2, Array(1.0, 0.0, 0.0, 2.0))),
+      ((0, 1), new DenseMatrix(2, 2, Array(0.0, 1.0, 0.0, 0.0))),
+      ((1, 0), new DenseMatrix(2, 2, Array(3.0, 0.0, 1.0, 1.0))),
+      ((1, 1), new DenseMatrix(2, 2, Array(1.0, 2.0, 0.0, 1.0))),
+      ((2, 1), new DenseMatrix(1, 2, Array(1.0, 5.0))))
+    val rdd = sc.parallelize(blocks, numPartitions)
+    val wrongRowPerParts = new BlockMatrix(rdd, rowPerPart + 1, colPerPart)
+    val wrongColPerParts = new BlockMatrix(rdd, rowPerPart, colPerPart + 1)
+    intercept[SparkException] {
+      wrongRowPerParts.validate()
+    }
+    intercept[SparkException] {
+      wrongColPerParts.validate()
+    }
+    // Wrong BlockMatrix dimensions
+    val wrongRowSize = new BlockMatrix(rdd, rowPerPart, colPerPart, 4, 4)
+    intercept[AssertionError] {
+      wrongRowSize.validate()
+    }
+    val wrongColSize = new BlockMatrix(rdd, rowPerPart, colPerPart, 5, 2)
+    intercept[AssertionError] {
+      wrongColSize.validate()
+    }
+    // Duplicate indices
+    val duplicateBlocks: Seq[((Int, Int), Matrix)] = Seq(
+      ((0, 0), new DenseMatrix(2, 2, Array(1.0, 0.0, 0.0, 2.0))),
+      ((0, 0), new DenseMatrix(2, 2, Array(0.0, 1.0, 0.0, 0.0))),
+      ((1, 1), new DenseMatrix(2, 2, Array(3.0, 0.0, 1.0, 1.0))),
+      ((1, 1), new DenseMatrix(2, 2, Array(1.0, 2.0, 0.0, 1.0))),
+      ((2, 1), new DenseMatrix(1, 2, Array(1.0, 5.0))))
+    val dupMatrix = new BlockMatrix(sc.parallelize(duplicateBlocks, numPartitions), 2, 2)
+    intercept[SparkException] {
+      dupMatrix.validate()
+    }
+  }
+
+  test("transpose") {
+    val expected = BDM(
+      (1.0, 0.0, 3.0, 0.0, 0.0),
+      (0.0, 2.0, 1.0, 1.0, 0.0),
+      (0.0, 1.0, 1.0, 2.0, 1.0),
+      (0.0, 0.0, 0.0, 1.0, 5.0))
+
+    val AT = gridBasedMat.transpose
+    assert(AT.numRows() === gridBasedMat.numCols())
+    assert(AT.numCols() === gridBasedMat.numRows())
+    assert(AT.toBreeze() === expected)
+
+    // make sure it works when matrices are cached as well
+    gridBasedMat.cache()
+    val AT2 = gridBasedMat.transpose
+    AT2.cache()
+    assert(AT2.toBreeze() === AT.toBreeze())
+    val A = AT2.transpose
+    assert(A.toBreeze() === gridBasedMat.toBreeze())
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
index cd45438fb628f..04b36a9ef9990 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
@@ -21,10 +21,10 @@ import org.scalatest.FunSuite
 
 import breeze.linalg.{DenseMatrix => BDM}
 
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.linalg.Vectors
 
-class CoordinateMatrixSuite extends FunSuite with LocalSparkContext {
+class CoordinateMatrixSuite extends FunSuite with MLlibTestSparkContext {
 
   val m = 5
   val n = 4
@@ -73,6 +73,11 @@ class CoordinateMatrixSuite extends FunSuite with LocalSparkContext {
     assert(mat.toBreeze() === expected)
   }
 
+  test("transpose") {
+    val transposed = mat.transpose()
+    assert(mat.toBreeze().t === transposed.toBreeze())
+  }
+
   test("toIndexedRowMatrix") {
     val indexedRowMatrix = mat.toIndexedRowMatrix()
     val expected = BDM(
@@ -95,4 +100,18 @@ class CoordinateMatrixSuite extends FunSuite with LocalSparkContext {
       Vectors.dense(0.0, 9.0, 0.0, 0.0))
     assert(rows === expected)
   }
+
+  test("toBlockMatrix") {
+    val blockMat = mat.toBlockMatrix(2, 2)
+    assert(blockMat.numRows() === m)
+    assert(blockMat.numCols() === n)
+    assert(blockMat.toBreeze() === mat.toBreeze())
+
+    intercept[IllegalArgumentException] {
+      mat.toBlockMatrix(-1, 2)
+    }
+    intercept[IllegalArgumentException] {
+      mat.toBlockMatrix(2, 0)
+    }
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
index f7c46f23b746d..2ab53cc13db71 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
@@ -21,11 +21,11 @@ import org.scalatest.FunSuite
 
 import breeze.linalg.{diag => brzDiag, DenseMatrix => BDM, DenseVector => BDV}
 
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.{Matrices, Vectors}
 
-class IndexedRowMatrixSuite extends FunSuite with LocalSparkContext {
+class IndexedRowMatrixSuite extends FunSuite with MLlibTestSparkContext {
 
   val m = 4
   val n = 3
@@ -80,6 +80,29 @@ class IndexedRowMatrixSuite extends FunSuite with LocalSparkContext {
     assert(rowMat.rows.collect().toSeq === data.map(_.vector).toSeq)
   }
 
+  test("toCoordinateMatrix") {
+    val idxRowMat = new IndexedRowMatrix(indexedRows)
+    val coordMat = idxRowMat.toCoordinateMatrix()
+    assert(coordMat.numRows() === m)
+    assert(coordMat.numCols() === n)
+    assert(coordMat.toBreeze() === idxRowMat.toBreeze())
+  }
+
+  test("toBlockMatrix") {
+    val idxRowMat = new IndexedRowMatrix(indexedRows)
+    val blockMat = idxRowMat.toBlockMatrix(2, 2)
+    assert(blockMat.numRows() === m)
+    assert(blockMat.numCols() === n)
+    assert(blockMat.toBreeze() === idxRowMat.toBreeze())
+
+    intercept[IllegalArgumentException] {
+      idxRowMat.toBlockMatrix(-1, 2)
+    }
+    intercept[IllegalArgumentException] {
+      idxRowMat.toBlockMatrix(2, 0)
+    }
+  }
+
   test("multiply a local matrix") {
     val A = new IndexedRowMatrix(indexedRows)
     val B = Matrices.dense(3, 2, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))
@@ -113,6 +136,13 @@ class IndexedRowMatrixSuite extends FunSuite with LocalSparkContext {
     assert(closeToZero(U * brzDiag(s) * V.t - localA))
   }
 
+  test("validate k in svd") {
+    val A = new IndexedRowMatrix(indexedRows)
+    intercept[IllegalArgumentException] {
+      A.computeSVD(-1)
+    }
+  }
+
   def closeToZero(G: BDM[Double]): Boolean = {
     G.valuesIterator.map(math.abs).sum < 1e-6
   }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
index 63f3ed58c0d4d..3309713e91f87 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
@@ -23,9 +23,9 @@ import breeze.linalg.{DenseVector => BDV, DenseMatrix => BDM, norm => brzNorm, s
 import org.scalatest.FunSuite
 
 import org.apache.spark.mllib.linalg.{Matrices, Vectors, Vector}
-import org.apache.spark.mllib.util.{LocalClusterSparkContext, LocalSparkContext}
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
 
-class RowMatrixSuite extends FunSuite with LocalSparkContext {
+class RowMatrixSuite extends FunSuite with MLlibTestSparkContext {
 
   val m = 4
   val n = 3
@@ -171,6 +171,14 @@ class RowMatrixSuite extends FunSuite with LocalSparkContext {
     }
   }
 
+  test("validate k in svd") {
+    for (mat <- Seq(denseMat, sparseMat)) {
+      intercept[IllegalArgumentException] {
+        mat.computeSVD(-1)
+      }
+    }
+  }
+
   def closeToZero(G: BDM[Double]): Boolean = {
     G.valuesIterator.map(math.abs).sum < 1e-6
   }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
index bf040110e228b..86481c6e66200 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
@@ -24,7 +24,7 @@ import org.scalatest.{FunSuite, Matchers}
 
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression._
-import org.apache.spark.mllib.util.{LocalClusterSparkContext, LocalSparkContext}
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
 import org.apache.spark.mllib.util.TestingUtils._
 
 object GradientDescentSuite {
@@ -61,7 +61,7 @@ object GradientDescentSuite {
   }
 }
 
-class GradientDescentSuite extends FunSuite with LocalSparkContext with Matchers {
+class GradientDescentSuite extends FunSuite with MLlibTestSparkContext with Matchers {
 
   test("Assert the loss is decreasing.") {
     val nPoints = 10000
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
index ccba004baa007..70c64775e4c04 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
@@ -23,10 +23,10 @@ import org.scalatest.{FunSuite, Matchers}
 
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.util.{LocalClusterSparkContext, LocalSparkContext}
+import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
 import org.apache.spark.mllib.util.TestingUtils._
 
-class LBFGSSuite extends FunSuite with LocalSparkContext with Matchers {
+class LBFGSSuite extends FunSuite with MLlibTestSparkContext with Matchers {
 
   val nPoints = 10000
   val A = 2.0
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
index b781a6aed9a8c..82c327bd49fcd 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
@@ -37,6 +37,12 @@ class NNLSSuite extends FunSuite {
     (ata, atb)
   }
 
+  /** Compute the objective value */
+  def computeObjectiveValue(ata: DoubleMatrix, atb: DoubleMatrix, x: DoubleMatrix): Double = {
+    val res = (x.transpose().mmul(ata).mmul(x)).mul(0.5).sub(atb.dot(x))
+    res.get(0)
+  }
+
   test("NNLS: exact solution cases") {
     val n = 20
     val rand = new Random(12346)
@@ -79,4 +85,28 @@ class NNLSSuite extends FunSuite {
       assert(x(i) >= 0)
     }
   }
+
+  test("NNLS: objective value test") {
+    val n = 5
+    val ata = new DoubleMatrix(5, 5
+      , 517399.13534, 242529.67289, -153644.98976, 130802.84503, -798452.29283
+      , 242529.67289, 126017.69765, -75944.21743, 81785.36128, -405290.60884
+      , -153644.98976, -75944.21743, 46986.44577, -45401.12659, 247059.51049
+      , 130802.84503, 81785.36128, -45401.12659, 67457.31310, -253747.03819
+      , -798452.29283, -405290.60884, 247059.51049, -253747.03819, 1310939.40814
+    )
+    val atb = new DoubleMatrix(5, 1,
+      -31755.05710, 13047.14813, -20191.24443, 25993.77580, 11963.55017)
+
+    /** reference solution obtained from matlab function quadprog */
+    val refx = new DoubleMatrix(Array(34.90751, 103.96254, 0.00000, 27.82094, 58.79627))
+    val refObj = computeObjectiveValue(ata, atb, refx)
+
+
+    val ws = NNLS.createWorkspace(n)
+    val x = new DoubleMatrix(NNLS.solve(ata, atb, ws))
+    val obj = computeObjectiveValue(ata, atb, x)
+
+    assert(obj < refObj + 1E-5)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
index 3df7c128af5ab..b792d819fdabb 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.mllib.random
 
+import scala.math
+
 import org.scalatest.FunSuite
 
 import org.apache.spark.util.StatCounter
@@ -25,7 +27,6 @@ import org.apache.spark.util.StatCounter
 class RandomDataGeneratorSuite extends FunSuite {
 
   def apiChecks(gen: RandomDataGenerator[Double]) {
-
     // resetting seed should generate the same sequence of random numbers
     gen.setSeed(42L)
     val array1 = (0 until 1000).map(_ => gen.nextValue())
@@ -79,6 +80,26 @@ class RandomDataGeneratorSuite extends FunSuite {
     distributionChecks(normal, 0.0, 1.0)
   }
 
+  test("LogNormalGenerator") {
+    List((0.0, 1.0), (0.0, 2.0), (2.0, 1.0), (2.0, 2.0)).map {
+      case (mean: Double, vari: Double) =>
+        val normal = new LogNormalGenerator(mean, math.sqrt(vari))
+        apiChecks(normal)
+
+        // mean of log normal = e^(mean + var / 2)
+        val expectedMean = math.exp(mean + 0.5 * vari)
+
+        // variance of log normal = (e^var - 1) * e^(2 * mean + var)
+        val expectedStd = math.sqrt((math.exp(vari) - 1.0) * math.exp(2.0 * mean + vari))
+
+        // since sampling error increases with variance, let's set
+        // the absolute tolerance as a percentage
+        val epsilon = 0.05 * expectedStd * expectedStd
+
+        distributionChecks(normal, expectedMean, expectedStd, epsilon)
+    }
+  }
+
   test("PoissonGenerator") {
     // mean = 0.0 will not pass the API checks since 0.0 is always deterministically produced.
     for (mean <- List(1.0, 5.0, 100.0)) {
@@ -87,4 +108,33 @@ class RandomDataGeneratorSuite extends FunSuite {
       distributionChecks(poisson, mean, math.sqrt(mean), 0.1)
     }
   }
+
+  test("ExponentialGenerator") {
+    // mean = 0.0 will not pass the API checks since 0.0 is always deterministically produced.
+    for (mean <- List(2.0, 5.0, 10.0, 50.0, 100.0)) {
+      val exponential = new ExponentialGenerator(mean)
+      apiChecks(exponential)
+      // var of exp = lambda^-2 = (1.0 / mean)^-2 = mean^2
+
+      // since sampling error increases with variance, let's set
+      // the absolute tolerance as a percentage
+      val epsilon = 0.05 * mean * mean
+
+      distributionChecks(exponential, mean, mean, epsilon)
+    }
+  }
+
+  test("GammaGenerator") {
+    // mean = 0.0 will not pass the API checks since 0.0 is always deterministically produced.
+    List((1.0, 2.0), (2.0, 2.0), (3.0, 2.0), (5.0, 1.0), (9.0, 0.5)).map {
+      case (shape: Double, scale: Double) =>
+        val gamma = new GammaGenerator(shape, scale)
+        apiChecks(gamma)
+        // mean of gamma = shape * scale
+        val expectedMean = shape * scale
+        // var of gamma = shape * scale^2
+        val expectedStd = math.sqrt(shape * scale * scale)
+        distributionChecks(gamma, expectedMean, expectedStd, 0.1)
+    }
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
index c50b78bcbcc61..6395188a0842a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
@@ -24,7 +24,7 @@ import org.scalatest.FunSuite
 import org.apache.spark.SparkContext._
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.rdd.{RandomRDDPartition, RandomRDD}
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.StatCounter
 
@@ -34,7 +34,7 @@ import org.apache.spark.util.StatCounter
  *
  * TODO update tests to use TestingUtils for floating point comparison after PR 1367 is merged
  */
-class RandomRDDsSuite extends FunSuite with LocalSparkContext with Serializable {
+class RandomRDDsSuite extends FunSuite with MLlibTestSparkContext with Serializable {
 
   def testGeneratedRDD(rdd: RDD[Double],
       expectedSize: Long,
@@ -110,7 +110,19 @@ class RandomRDDsSuite extends FunSuite with LocalSparkContext with Serializable
   test("randomRDD for different distributions") {
     val size = 100000L
     val numPartitions = 10
+
+    //  mean of log normal = e^(mean + var / 2)
+    val logNormalMean = math.exp(0.5)
+    // variance of log normal = (e^var - 1) * e^(2 * mean + var)
+    val logNormalStd = math.sqrt((math.E - 1.0) * math.E)
+    val gammaScale = 1.0
+    val gammaShape = 2.0
+    // mean of gamma = shape * scale
+    val gammaMean = gammaShape * gammaScale
+    // var of gamma = shape * scale^2
+    val gammaStd = math.sqrt(gammaShape * gammaScale * gammaScale)
     val poissonMean = 100.0
+    val exponentialMean = 1.0
 
     for (seed <- 0 until 5) {
       val uniform = RandomRDDs.uniformRDD(sc, size, numPartitions, seed)
@@ -119,8 +131,18 @@ class RandomRDDsSuite extends FunSuite with LocalSparkContext with Serializable
       val normal = RandomRDDs.normalRDD(sc, size, numPartitions, seed)
       testGeneratedRDD(normal, size, numPartitions, 0.0, 1.0)
 
+      val logNormal = RandomRDDs.logNormalRDD(sc, 0.0, 1.0, size, numPartitions, seed)
+      testGeneratedRDD(logNormal, size, numPartitions, logNormalMean, logNormalStd, 0.1)
+
       val poisson = RandomRDDs.poissonRDD(sc, poissonMean, size, numPartitions, seed)
       testGeneratedRDD(poisson, size, numPartitions, poissonMean, math.sqrt(poissonMean), 0.1)
+
+      val exponential = RandomRDDs.exponentialRDD(sc, exponentialMean, size, numPartitions, seed)
+      testGeneratedRDD(exponential, size, numPartitions, exponentialMean, exponentialMean, 0.1)
+
+      val gamma = RandomRDDs.gammaRDD(sc, gammaShape, gammaScale, size, numPartitions, seed)
+      testGeneratedRDD(gamma, size, numPartitions, gammaMean, gammaStd, 0.1)
+
     }
 
     // mock distribution to check that partitions have unique seeds
@@ -132,7 +154,19 @@ class RandomRDDsSuite extends FunSuite with LocalSparkContext with Serializable
     val rows = 1000L
     val cols = 100
     val parts = 10
+
+    //  mean of log normal = e^(mean + var / 2)
+    val logNormalMean = math.exp(0.5)
+    // variance of log normal = (e^var - 1) * e^(2 * mean + var)
+    val logNormalStd = math.sqrt((math.E - 1.0) * math.E)
+    val gammaScale = 1.0
+    val gammaShape = 2.0
+    // mean of gamma = shape * scale
+    val gammaMean = gammaShape * gammaScale
+    // var of gamma = shape * scale^2
+    val gammaStd = math.sqrt(gammaShape * gammaScale * gammaScale)
     val poissonMean = 100.0
+    val exponentialMean = 1.0
 
     for (seed <- 0 until 5) {
       val uniform = RandomRDDs.uniformVectorRDD(sc, rows, cols, parts, seed)
@@ -141,8 +175,17 @@ class RandomRDDsSuite extends FunSuite with LocalSparkContext with Serializable
       val normal = RandomRDDs.normalVectorRDD(sc, rows, cols, parts, seed)
       testGeneratedVectorRDD(normal, rows, cols, parts, 0.0, 1.0)
 
+      val logNormal = RandomRDDs.logNormalVectorRDD(sc, 0.0, 1.0, rows, cols, parts, seed)
+      testGeneratedVectorRDD(logNormal, rows, cols, parts, logNormalMean, logNormalStd, 0.1)
+
       val poisson = RandomRDDs.poissonVectorRDD(sc, poissonMean, rows, cols, parts, seed)
       testGeneratedVectorRDD(poisson, rows, cols, parts, poissonMean, math.sqrt(poissonMean), 0.1)
+
+      val exponential = RandomRDDs.exponentialVectorRDD(sc, exponentialMean, rows, cols, parts, seed)
+      testGeneratedVectorRDD(exponential, rows, cols, parts, exponentialMean, exponentialMean, 0.1)
+
+      val gamma = RandomRDDs.gammaVectorRDD(sc, gammaShape, gammaScale, rows, cols, parts, seed)
+      testGeneratedVectorRDD(gamma, rows, cols, parts, gammaMean, gammaStd, 0.1)
     }
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/rdd/RDDFunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/rdd/RDDFunctionsSuite.scala
index 27a19f793242b..6d6c0aa5be812 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/rdd/RDDFunctionsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/rdd/RDDFunctionsSuite.scala
@@ -19,10 +19,10 @@ package org.apache.spark.mllib.rdd
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.rdd.RDDFunctions._
 
-class RDDFunctionsSuite extends FunSuite with LocalSparkContext {
+class RDDFunctionsSuite extends FunSuite with MLlibTestSparkContext {
 
   test("sliding") {
     val data = 0 until 6
@@ -42,26 +42,8 @@ class RDDFunctionsSuite extends FunSuite with LocalSparkContext {
     val data = Seq(Seq(1, 2, 3), Seq.empty[Int], Seq(4), Seq.empty[Int], Seq(5, 6, 7))
     val rdd = sc.parallelize(data, data.length).flatMap(s => s)
     assert(rdd.partitions.size === data.length)
-    val sliding = rdd.sliding(3)
-    val expected = data.flatMap(x => x).sliding(3).toList
-    assert(sliding.collect().toList === expected)
-  }
-
-  test("treeAggregate") {
-    val rdd = sc.makeRDD(-1000 until 1000, 10)
-    def seqOp = (c: Long, x: Int) => c + x
-    def combOp = (c1: Long, c2: Long) => c1 + c2
-    for (depth <- 1 until 10) {
-      val sum = rdd.treeAggregate(0L)(seqOp, combOp, depth)
-      assert(sum === -1000L)
-    }
-  }
-
-  test("treeReduce") {
-    val rdd = sc.makeRDD(-1000 until 1000, 10)
-    for (depth <- 1 until 10) {
-      val sum = rdd.treeReduce(_ + _, depth)
-      assert(sum === -1000)
-    }
+    val sliding = rdd.sliding(3).collect().toSeq.map(_.toSeq)
+    val expected = data.flatMap(x => x).sliding(3).toSeq.map(_.toSeq)
+    assert(sliding === expected)
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
index 017c39edb185f..8775c0ca9df84 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -24,9 +24,8 @@ import scala.util.Random
 import org.scalatest.FunSuite
 import org.jblas.DoubleMatrix
 
-import org.apache.spark.SparkContext._
-import org.apache.spark.mllib.util.LocalSparkContext
-import org.apache.spark.mllib.recommendation.ALS.BlockStats
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.storage.StorageLevel
 
 object ALSSuite {
 
@@ -85,7 +84,7 @@ object ALSSuite {
 }
 
 
-class ALSSuite extends FunSuite with LocalSparkContext {
+class ALSSuite extends FunSuite with MLlibTestSparkContext {
 
   test("rank-1 matrices") {
     testALS(50, 100, 1, 15, 0.7, 0.3)
@@ -139,6 +138,32 @@ class ALSSuite extends FunSuite with LocalSparkContext {
     assert(u11 != u2)
   }
 
+  test("Storage Level for RDDs in model") {
+    val ratings = sc.parallelize(ALSSuite.generateRatings(10, 20, 5, 0.5, false, false)._1, 2)
+    var storageLevel = StorageLevel.MEMORY_ONLY
+    var model = new ALS()
+      .setRank(5)
+      .setIterations(1)
+      .setLambda(1.0)
+      .setBlocks(2)
+      .setSeed(1)
+      .setFinalRDDStorageLevel(storageLevel)
+      .run(ratings)
+    assert(model.productFeatures.getStorageLevel == storageLevel);
+    assert(model.userFeatures.getStorageLevel == storageLevel);
+    storageLevel = StorageLevel.DISK_ONLY
+    model = new ALS()
+      .setRank(5)
+      .setIterations(1)
+      .setLambda(1.0)
+      .setBlocks(2)
+      .setSeed(1)
+      .setFinalRDDStorageLevel(storageLevel)
+      .run(ratings)
+    assert(model.productFeatures.getStorageLevel == storageLevel);
+    assert(model.userFeatures.getStorageLevel == storageLevel);
+  }
+
   test("negative ids") {
     val data = ALSSuite.generateRatings(50, 50, 2, 0.7, false, false)
     val ratings = sc.parallelize(data._1.map { case Rating(u, p, r) =>
@@ -162,22 +187,6 @@ class ALSSuite extends FunSuite with LocalSparkContext {
     testALS(100, 200, 2, 15, 0.7, 0.4, false, false, false, -1, -1, false)
   }
 
-  test("analyze one user block and one product block") {
-    val localRatings = Seq(
-      Rating(0, 100, 1.0),
-      Rating(0, 101, 2.0),
-      Rating(0, 102, 3.0),
-      Rating(1, 102, 4.0),
-      Rating(2, 103, 5.0))
-    val ratings = sc.makeRDD(localRatings, 2)
-    val stats = ALS.analyzeBlocks(ratings, 1, 1)
-    assert(stats.size === 2)
-    assert(stats(0) === BlockStats("user", 0, 3, 5, 4, 3))
-    assert(stats(1) === BlockStats("product", 0, 4, 5, 3, 4))
-  }
-
-  // TODO: add tests for analyzing multiple user/product blocks
-
   /**
    * Test if we can correctly factorize R = U * P where U and P are of known rank.
    *
@@ -188,7 +197,7 @@ class ALSSuite extends FunSuite with LocalSparkContext {
    * @param samplingRate what fraction of the user-product pairs are known
    * @param matchThreshold max difference allowed to consider a predicted rating correct
    * @param implicitPrefs flag to test implicit feedback
-   * @param bulkPredict flag to test bulk prediciton
+   * @param bulkPredict flag to test bulk predicition
    * @param negativeWeights whether the generated data can contain negative values
    * @param numUserBlocks number of user blocks to partition users into
    * @param numProductBlocks number of product blocks to partition products into
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModelSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModelSuite.scala
new file mode 100644
index 0000000000000..b9caecc904a23
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModelSuite.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.recommendation
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+import org.apache.spark.rdd.RDD
+
+class MatrixFactorizationModelSuite extends FunSuite with MLlibTestSparkContext {
+
+  val rank = 2
+  var userFeatures: RDD[(Int, Array[Double])] = _
+  var prodFeatures: RDD[(Int, Array[Double])] = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    userFeatures = sc.parallelize(Seq((0, Array(1.0, 2.0)), (1, Array(3.0, 4.0))))
+    prodFeatures = sc.parallelize(Seq((2, Array(5.0, 6.0))))
+  }
+
+  test("constructor") {
+    val model = new MatrixFactorizationModel(rank, userFeatures, prodFeatures)
+    assert(model.predict(0, 2) ~== 17.0 relTol 1e-14)
+
+    intercept[IllegalArgumentException] {
+      new MatrixFactorizationModel(1, userFeatures, prodFeatures)
+    }
+
+    val userFeatures1 = sc.parallelize(Seq((0, Array(1.0)), (1, Array(3.0))))
+    intercept[IllegalArgumentException] {
+      new MatrixFactorizationModel(rank, userFeatures1, prodFeatures)
+    }
+
+    val prodFeatures1 = sc.parallelize(Seq((2, Array(5.0))))
+    intercept[IllegalArgumentException] {
+      new MatrixFactorizationModel(rank, userFeatures, prodFeatures1)
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala
new file mode 100644
index 0000000000000..7ef45248281e9
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.regression
+
+import org.scalatest.{Matchers, FunSuite}
+
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+
+class IsotonicRegressionSuite extends FunSuite with MLlibTestSparkContext with Matchers {
+
+  private def round(d: Double) = {
+    Math.round(d * 100).toDouble / 100
+  }
+
+  private def generateIsotonicInput(labels: Seq[Double]): Seq[(Double, Double, Double)] = {
+    Seq.tabulate(labels.size)(i => (labels(i), i.toDouble, 1d))
+  }
+
+  private def generateIsotonicInput(
+      labels: Seq[Double],
+      weights: Seq[Double]): Seq[(Double, Double, Double)] = {
+    Seq.tabulate(labels.size)(i => (labels(i), i.toDouble, weights(i)))
+  }
+
+  private def runIsotonicRegression(
+      labels: Seq[Double],
+      weights: Seq[Double],
+      isotonic: Boolean): IsotonicRegressionModel = {
+    val trainRDD = sc.parallelize(generateIsotonicInput(labels, weights)).cache()
+    new IsotonicRegression().setIsotonic(isotonic).run(trainRDD)
+  }
+
+  private def runIsotonicRegression(
+      labels: Seq[Double],
+      isotonic: Boolean): IsotonicRegressionModel = {
+    runIsotonicRegression(labels, Array.fill(labels.size)(1d), isotonic)
+  }
+
+  test("increasing isotonic regression") {
+    /*
+     The following result could be re-produced with sklearn.
+
+     > from sklearn.isotonic import IsotonicRegression
+     > x = range(9)
+     > y = [1, 2, 3, 1, 6, 17, 16, 17, 18]
+     > ir = IsotonicRegression(x, y)
+     > print ir.predict(x)
+
+     array([  1. ,   2. ,   2. ,   2. ,   6. ,  16.5,  16.5,  17. ,  18. ])
+     */
+    val model = runIsotonicRegression(Seq(1, 2, 3, 1, 6, 17, 16, 17, 18), true)
+
+    assert(Array.tabulate(9)(x => model.predict(x)) === Array(1, 2, 2, 2, 6, 16.5, 16.5, 17, 18))
+
+    assert(model.boundaries === Array(0, 1, 3, 4, 5, 6, 7, 8))
+    assert(model.predictions === Array(1, 2, 2, 6, 16.5, 16.5, 17.0, 18.0))
+    assert(model.isotonic)
+  }
+
+  test("isotonic regression with size 0") {
+    val model = runIsotonicRegression(Seq(), true)
+
+    assert(model.predictions === Array())
+  }
+
+  test("isotonic regression with size 1") {
+    val model = runIsotonicRegression(Seq(1), true)
+
+    assert(model.predictions === Array(1.0))
+  }
+
+  test("isotonic regression strictly increasing sequence") {
+    val model = runIsotonicRegression(Seq(1, 2, 3, 4, 5), true)
+
+    assert(model.predictions === Array(1, 2, 3, 4, 5))
+  }
+
+  test("isotonic regression strictly decreasing sequence") {
+    val model = runIsotonicRegression(Seq(5, 4, 3, 2, 1), true)
+
+    assert(model.boundaries === Array(0, 4))
+    assert(model.predictions === Array(3, 3))
+  }
+
+  test("isotonic regression with last element violating monotonicity") {
+    val model = runIsotonicRegression(Seq(1, 2, 3, 4, 2), true)
+
+    assert(model.boundaries === Array(0, 1, 2, 4))
+    assert(model.predictions === Array(1, 2, 3, 3))
+  }
+
+  test("isotonic regression with first element violating monotonicity") {
+    val model = runIsotonicRegression(Seq(4, 2, 3, 4, 5), true)
+
+    assert(model.boundaries === Array(0, 2, 3, 4))
+    assert(model.predictions === Array(3, 3, 4, 5))
+  }
+
+  test("isotonic regression with negative labels") {
+    val model = runIsotonicRegression(Seq(-1, -2, 0, 1, -1), true)
+
+    assert(model.boundaries === Array(0, 1, 2, 4))
+    assert(model.predictions === Array(-1.5, -1.5, 0, 0))
+  }
+
+  test("isotonic regression with unordered input") {
+    val trainRDD = sc.parallelize(generateIsotonicInput(Seq(1, 2, 3, 4, 5)).reverse, 2).cache()
+
+    val model = new IsotonicRegression().run(trainRDD)
+    assert(model.predictions === Array(1, 2, 3, 4, 5))
+  }
+
+  test("weighted isotonic regression") {
+    val model = runIsotonicRegression(Seq(1, 2, 3, 4, 2), Seq(1, 1, 1, 1, 2), true)
+
+    assert(model.boundaries === Array(0, 1, 2, 4))
+    assert(model.predictions === Array(1, 2, 2.75, 2.75))
+  }
+
+  test("weighted isotonic regression with weights lower than 1") {
+    val model = runIsotonicRegression(Seq(1, 2, 3, 2, 1), Seq(1, 1, 1, 0.1, 0.1), true)
+
+    assert(model.boundaries === Array(0, 1, 2, 4))
+    assert(model.predictions.map(round) === Array(1, 2, 3.3/1.2, 3.3/1.2))
+  }
+
+  test("weighted isotonic regression with negative weights") {
+    val model = runIsotonicRegression(Seq(1, 2, 3, 2, 1), Seq(-1, 1, -3, 1, -5), true)
+
+    assert(model.boundaries === Array(0.0, 1.0, 4.0))
+    assert(model.predictions === Array(1.0, 10.0/6, 10.0/6))
+  }
+
+  test("weighted isotonic regression with zero weights") {
+    val model = runIsotonicRegression(Seq[Double](1, 2, 3, 2, 1), Seq[Double](0, 0, 0, 1, 0), true)
+
+    assert(model.boundaries === Array(0.0, 1.0, 4.0))
+    assert(model.predictions === Array(1, 2, 2))
+  }
+
+  test("isotonic regression prediction") {
+    val model = runIsotonicRegression(Seq(1, 2, 7, 1, 2), true)
+
+    assert(model.predict(-2) === 1)
+    assert(model.predict(-1) === 1)
+    assert(model.predict(0.5) === 1.5)
+    assert(model.predict(0.75) === 1.75)
+    assert(model.predict(1) === 2)
+    assert(model.predict(2) === 10d/3)
+    assert(model.predict(9) === 10d/3)
+  }
+
+  test("isotonic regression prediction with duplicate features") {
+    val trainRDD = sc.parallelize(
+      Seq[(Double, Double, Double)](
+        (2, 1, 1), (1, 1, 1), (4, 2, 1), (2, 2, 1), (6, 3, 1), (5, 3, 1)), 2).cache()
+    val model = new IsotonicRegression().run(trainRDD)
+
+    assert(model.predict(0) === 1)
+    assert(model.predict(1.5) === 2)
+    assert(model.predict(2.5) === 4.5)
+    assert(model.predict(4) === 6)
+  }
+
+  test("antitonic regression prediction with duplicate features") {
+    val trainRDD = sc.parallelize(
+      Seq[(Double, Double, Double)](
+        (5, 1, 1), (6, 1, 1), (2, 2, 1), (4, 2, 1), (1, 3, 1), (2, 3, 1)), 2).cache()
+    val model = new IsotonicRegression().setIsotonic(false).run(trainRDD)
+
+    assert(model.predict(0) === 6)
+    assert(model.predict(1.5) === 4.5)
+    assert(model.predict(2.5) === 2)
+    assert(model.predict(4) === 1)
+  }
+
+  test("isotonic regression RDD prediction") {
+    val model = runIsotonicRegression(Seq(1, 2, 7, 1, 2), true)
+
+    val testRDD = sc.parallelize(List(-2.0, -1.0, 0.5, 0.75, 1.0, 2.0, 9.0), 2).cache()
+    val predictions = testRDD.map(x => (x, model.predict(x))).collect().sortBy(_._1).map(_._2)
+    assert(predictions === Array(1, 1, 1.5, 1.75, 2, 10.0/3, 10.0/3))
+  }
+
+  test("antitonic regression prediction") {
+    val model = runIsotonicRegression(Seq(7, 5, 3, 5, 1), false)
+
+    assert(model.predict(-2) === 7)
+    assert(model.predict(-1) === 7)
+    assert(model.predict(0.5) === 6)
+    assert(model.predict(0.75) === 5.5)
+    assert(model.predict(1) === 5)
+    assert(model.predict(2) === 4)
+    assert(model.predict(9) === 1)
+  }
+
+  test("model construction") {
+    val model = new IsotonicRegressionModel(Array(0.0, 1.0), Array(1.0, 2.0), isotonic = true)
+    assert(model.predict(-0.5) === 1.0)
+    assert(model.predict(0.0) === 1.0)
+    assert(model.predict(0.5) ~== 1.5 absTol 1e-14)
+    assert(model.predict(1.0) === 2.0)
+    assert(model.predict(1.5) === 2.0)
+
+    intercept[IllegalArgumentException] {
+      // different array sizes.
+      new IsotonicRegressionModel(Array(0.0, 1.0), Array(1.0), isotonic = true)
+    }
+
+    intercept[IllegalArgumentException] {
+      // unordered boundaries
+      new IsotonicRegressionModel(Array(1.0, 0.0), Array(1.0, 2.0), isotonic = true)
+    }
+
+    intercept[IllegalArgumentException] {
+      // unordered predictions (isotonic)
+      new IsotonicRegressionModel(Array(0.0, 1.0), Array(2.0, 1.0), isotonic = true)
+    }
+
+    intercept[IllegalArgumentException] {
+      // unordered predictions (antitonic)
+      new IsotonicRegressionModel(Array(0.0, 1.0), Array(1.0, 2.0), isotonic = false)
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
index 7aa96421aed87..c9f5dc069ef2e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
@@ -23,9 +23,16 @@ import org.scalatest.FunSuite
 
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.{LocalClusterSparkContext, LinearDataGenerator,
-  LocalSparkContext}
+  MLlibTestSparkContext}
+import org.apache.spark.util.Utils
 
-class LassoSuite extends FunSuite with LocalSparkContext {
+private object LassoSuite {
+
+  /** 3 features */
+  val model = new LassoModel(weights = Vectors.dense(0.1, 0.2, 0.3), intercept = 0.5)
+}
+
+class LassoSuite extends FunSuite with MLlibTestSparkContext {
 
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
@@ -115,6 +122,23 @@ class LassoSuite extends FunSuite with LocalSparkContext {
     // Test prediction on Array.
     validatePrediction(validationData.map(row => model.predict(row.features)), validationData)
   }
+
+  test("model save/load") {
+    val model = LassoSuite.model
+
+    val tempDir = Utils.createTempDir()
+    val path = tempDir.toURI.toString
+
+    // Save model, load it back, and compare.
+    try {
+      model.save(sc, path)
+      val sameModel = LassoModel.load(sc, path)
+      assert(model.weights == sameModel.weights)
+      assert(model.intercept == sameModel.intercept)
+    } finally {
+      Utils.deleteRecursively(tempDir)
+    }
+  }
 }
 
 class LassoClusterSuite extends FunSuite with LocalClusterSparkContext {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
index 4f89112b650c5..3781931c2f819 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
@@ -23,9 +23,16 @@ import org.scalatest.FunSuite
 
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.{LocalClusterSparkContext, LinearDataGenerator,
-  LocalSparkContext}
+  MLlibTestSparkContext}
+import org.apache.spark.util.Utils
 
-class LinearRegressionSuite extends FunSuite with LocalSparkContext {
+private object LinearRegressionSuite {
+
+  /** 3 features */
+  val model = new LinearRegressionModel(weights = Vectors.dense(0.1, 0.2, 0.3), intercept = 0.5)
+}
+
+class LinearRegressionSuite extends FunSuite with MLlibTestSparkContext {
 
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
@@ -124,6 +131,23 @@ class LinearRegressionSuite extends FunSuite with LocalSparkContext {
     validatePrediction(
       sparseValidationData.map(row => model.predict(row.features)), sparseValidationData)
   }
+
+  test("model save/load") {
+    val model = LinearRegressionSuite.model
+
+    val tempDir = Utils.createTempDir()
+    val path = tempDir.toURI.toString
+
+    // Save model, load it back, and compare.
+    try {
+      model.save(sc, path)
+      val sameModel = LinearRegressionModel.load(sc, path)
+      assert(model.weights == sameModel.weights)
+      assert(model.intercept == sameModel.intercept)
+    } finally {
+      Utils.deleteRecursively(tempDir)
+    }
+  }
 }
 
 class LinearRegressionClusterSuite extends FunSuite with LocalClusterSparkContext {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
index 727bbd051ff15..43d61151e2471 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
@@ -24,9 +24,16 @@ import org.scalatest.FunSuite
 
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.{LocalClusterSparkContext, LinearDataGenerator,
-  LocalSparkContext}
+  MLlibTestSparkContext}
+import org.apache.spark.util.Utils
 
-class RidgeRegressionSuite extends FunSuite with LocalSparkContext {
+private object RidgeRegressionSuite {
+
+  /** 3 features */
+  val model = new RidgeRegressionModel(weights = Vectors.dense(0.1, 0.2, 0.3), intercept = 0.5)
+}
+
+class RidgeRegressionSuite extends FunSuite with MLlibTestSparkContext {
 
   def predictionError(predictions: Seq[Double], input: Seq[LabeledPoint]) = {
     predictions.zip(input).map { case (prediction, expected) =>
@@ -75,6 +82,23 @@ class RidgeRegressionSuite extends FunSuite with LocalSparkContext {
     assert(ridgeErr < linearErr,
       "ridgeError (" + ridgeErr + ") was not less than linearError(" + linearErr + ")")
   }
+
+  test("model save/load") {
+    val model = RidgeRegressionSuite.model
+
+    val tempDir = Utils.createTempDir()
+    val path = tempDir.toURI.toString
+
+    // Save model, load it back, and compare.
+    try {
+      model.save(sc, path)
+      val sameModel = RidgeRegressionModel.load(sc, path)
+      assert(model.weights == sameModel.weights)
+      assert(model.intercept == sameModel.intercept)
+    } finally {
+      Utils.deleteRecursively(tempDir)
+    }
+  }
 }
 
 class RidgeRegressionClusterSuite extends FunSuite with LocalClusterSparkContext {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
index 03b71301e9ab1..70b43ddb7daf5 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
@@ -52,7 +52,7 @@ class StreamingLinearRegressionSuite extends FunSuite with TestSuiteBase {
     // create model
     val model = new StreamingLinearRegressionWithSGD()
       .setInitialWeights(Vectors.dense(0.0, 0.0))
-      .setStepSize(0.1)
+      .setStepSize(0.2)
       .setNumIterations(25)
 
     // generate sequence of simulated data
@@ -84,7 +84,7 @@ class StreamingLinearRegressionSuite extends FunSuite with TestSuiteBase {
     // create model
     val model = new StreamingLinearRegressionWithSGD()
       .setInitialWeights(Vectors.dense(0.0))
-      .setStepSize(0.1)
+      .setStepSize(0.2)
       .setNumIterations(25)
 
     // generate sequence of simulated data
@@ -118,7 +118,7 @@ class StreamingLinearRegressionSuite extends FunSuite with TestSuiteBase {
     // create model initialized with true weights
     val model = new StreamingLinearRegressionWithSGD()
       .setInitialWeights(Vectors.dense(10.0, 10.0))
-      .setStepSize(0.1)
+      .setStepSize(0.2)
       .setNumIterations(25)
 
     // generate sequence of simulated data for testing
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala
index 34548c86ebc14..d20a09b4b4925 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala
@@ -24,9 +24,9 @@ import breeze.linalg.{DenseMatrix => BDM, Matrix => BM}
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.stat.correlation.{Correlations, PearsonCorrelation,
   SpearmanCorrelation}
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 
-class CorrelationSuite extends FunSuite with LocalSparkContext {
+class CorrelationSuite extends FunSuite with MLlibTestSparkContext {
 
   // test input data
   val xData = Array(1.0, 0.0, -2.0)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala
index 6de3840b3f198..15418e6035965 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala
@@ -25,10 +25,10 @@ import org.apache.spark.SparkException
 import org.apache.spark.mllib.linalg.{DenseVector, Matrices, Vectors}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.stat.test.ChiSqTest
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
 
-class HypothesisTestSuite extends FunSuite with LocalSparkContext {
+class HypothesisTestSuite extends FunSuite with MLlibTestSparkContext {
 
   test("chi squared pearson goodness of fit") {
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala
index 1e9415249104b..23b0eec865de6 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala
@@ -208,4 +208,15 @@ class MultivariateOnlineSummarizerSuite extends FunSuite {
 
     assert(summarizer2.variance ~== Vectors.dense(0, 0, 0) absTol 1E-5, "variance mismatch")
   }
+
+  test("merging summarizer when one side has zero mean (SPARK-4355)") {
+    val s0 = new MultivariateOnlineSummarizer()
+      .add(Vectors.dense(2.0))
+      .add(Vectors.dense(2.0))
+    val s1 = new MultivariateOnlineSummarizer()
+      .add(Vectors.dense(1.0))
+      .add(Vectors.dense(-1.0))
+    s0.merge(s1)
+    assert(s0.mean(0) ~== 1.0 absTol 1e-14)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala
new file mode 100644
index 0000000000000..fac2498e4dcb3
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.stat.distribution
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.linalg.{ Vectors, Matrices }
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+
+class MultivariateGaussianSuite extends FunSuite with MLlibTestSparkContext {
+  test("univariate") {
+    val x1 = Vectors.dense(0.0)
+    val x2 = Vectors.dense(1.5)
+                     
+    val mu = Vectors.dense(0.0)
+    val sigma1 = Matrices.dense(1, 1, Array(1.0))
+    val dist1 = new MultivariateGaussian(mu, sigma1)
+    assert(dist1.pdf(x1) ~== 0.39894 absTol 1E-5)
+    assert(dist1.pdf(x2) ~== 0.12952 absTol 1E-5)
+    
+    val sigma2 = Matrices.dense(1, 1, Array(4.0))
+    val dist2 = new MultivariateGaussian(mu, sigma2)
+    assert(dist2.pdf(x1) ~== 0.19947 absTol 1E-5)
+    assert(dist2.pdf(x2) ~== 0.15057 absTol 1E-5)
+  }
+  
+  test("multivariate") {
+    val x1 = Vectors.dense(0.0, 0.0)
+    val x2 = Vectors.dense(1.0, 1.0)
+    
+    val mu = Vectors.dense(0.0, 0.0)
+    val sigma1 = Matrices.dense(2, 2, Array(1.0, 0.0, 0.0, 1.0))
+    val dist1 = new MultivariateGaussian(mu, sigma1)
+    assert(dist1.pdf(x1) ~== 0.15915 absTol 1E-5)
+    assert(dist1.pdf(x2) ~== 0.05855 absTol 1E-5)
+    
+    val sigma2 = Matrices.dense(2, 2, Array(4.0, -1.0, -1.0, 2.0))
+    val dist2 = new MultivariateGaussian(mu, sigma2)
+    assert(dist2.pdf(x1) ~== 0.060155 absTol 1E-5)
+    assert(dist2.pdf(x2) ~== 0.033971 absTol 1E-5)
+  }
+  
+  test("multivariate degenerate") {
+    val x1 = Vectors.dense(0.0, 0.0)
+    val x2 = Vectors.dense(1.0, 1.0)
+    
+    val mu = Vectors.dense(0.0, 0.0)
+    val sigma = Matrices.dense(2, 2, Array(1.0, 1.0, 1.0, 1.0))
+    val dist = new MultivariateGaussian(mu, sigma)
+    assert(dist.pdf(x1) ~== 0.11254 absTol 1E-5)
+    assert(dist.pdf(x2) ~== 0.068259 absTol 1E-5)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
index 8fc5e111bbc17..9347eaf9221a8 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
@@ -30,9 +30,9 @@ import org.apache.spark.mllib.tree.configuration.{QuantileStrategy, Strategy}
 import org.apache.spark.mllib.tree.impl.{BaggedPoint, DecisionTreeMetadata, TreePoint}
 import org.apache.spark.mllib.tree.impurity.{Entropy, Gini, Variance}
 import org.apache.spark.mllib.tree.model.{InformationGainStats, DecisionTreeModel, Node}
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 
-class DecisionTreeSuite extends FunSuite with LocalSparkContext {
+class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext {
 
   test("Binary classification with continuous features: split and bin calculation") {
     val arr = DecisionTreeSuite.generateOrderedLabeledPointsWithLabel1()
@@ -57,7 +57,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
       Classification,
       Gini,
       maxDepth = 2,
-      numClassesForClassification = 2,
+      numClasses = 2,
       maxBins = 100,
       categoricalFeaturesInfo = Map(0 -> 2, 1-> 2))
 
@@ -81,7 +81,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
       Classification,
       Gini,
       maxDepth = 2,
-      numClassesForClassification = 2,
+      numClasses = 2,
       maxBins = 100,
       categoricalFeaturesInfo = Map(0 -> 3, 1 -> 3))
 
@@ -177,7 +177,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
       Classification,
       Gini,
       maxDepth = 2,
-      numClassesForClassification = 100,
+      numClasses = 100,
       maxBins = 100,
       categoricalFeaturesInfo = Map(0 -> 3, 1-> 3))
 
@@ -271,7 +271,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
       Classification,
       Gini,
       maxDepth = 2,
-      numClassesForClassification = 100,
+      numClasses = 100,
       maxBins = 100,
       categoricalFeaturesInfo = Map(0 -> 10, 1-> 10))
     // 2^(10-1) - 1 > 100, so categorical features will be ordered
@@ -295,7 +295,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
     val strategy = new Strategy(
       Classification,
       Gini,
-      numClassesForClassification = 2,
+      numClasses = 2,
       maxDepth = 2,
       maxBins = 100,
       categoricalFeaturesInfo = Map(0 -> 3, 1-> 3))
@@ -377,7 +377,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(Classification, Gini, maxDepth = 3,
-      numClassesForClassification = 2, maxBins = 100)
+      numClasses = 2, maxBins = 100)
     val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
     assert(!metadata.isUnordered(featureIndex = 0))
     assert(!metadata.isUnordered(featureIndex = 1))
@@ -401,7 +401,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(Classification, Gini, maxDepth = 3,
-      numClassesForClassification = 2, maxBins = 100)
+      numClasses = 2, maxBins = 100)
     val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
     assert(!metadata.isUnordered(featureIndex = 0))
     assert(!metadata.isUnordered(featureIndex = 1))
@@ -426,7 +426,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(Classification, Entropy, maxDepth = 3,
-      numClassesForClassification = 2, maxBins = 100)
+      numClasses = 2, maxBins = 100)
     val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
     assert(!metadata.isUnordered(featureIndex = 0))
     assert(!metadata.isUnordered(featureIndex = 1))
@@ -451,7 +451,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
     assert(arr.length === 1000)
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(Classification, Entropy, maxDepth = 3,
-      numClassesForClassification = 2, maxBins = 100)
+      numClasses = 2, maxBins = 100)
     val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
     assert(!metadata.isUnordered(featureIndex = 0))
     assert(!metadata.isUnordered(featureIndex = 1))
@@ -485,7 +485,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
 
     // Train a 1-node model
     val strategyOneNode = new Strategy(Classification, Entropy, maxDepth = 1,
-      numClassesForClassification = 2, maxBins = 100)
+      numClasses = 2, maxBins = 100)
     val modelOneNode = DecisionTree.train(rdd, strategyOneNode)
     val rootNode1 = modelOneNode.topNode.deepCopy()
     val rootNode2 = modelOneNode.topNode.deepCopy()
@@ -493,7 +493,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
     assert(rootNode1.rightNode.nonEmpty)
 
     val treeInput = TreePoint.convertToTreeRDD(rdd, bins, metadata)
-    val baggedInput = BaggedPoint.convertToBaggedRDDWithoutSampling(treeInput)
+    val baggedInput = BaggedPoint.convertToBaggedRDD(treeInput, 1.0, 1, false)
 
     // Single group second level tree construction.
     val nodesForGroup = Map((0, Array(rootNode1.leftNode.get, rootNode1.rightNode.get)))
@@ -545,7 +545,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
     val arr = DecisionTreeSuite.generateCategoricalDataPointsForMulticlass()
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
-      numClassesForClassification = 3, categoricalFeaturesInfo = Map(0 -> 3, 1 -> 3))
+      numClasses = 3, categoricalFeaturesInfo = Map(0 -> 3, 1 -> 3))
     val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
     assert(strategy.isMulticlassClassification)
     assert(metadata.isUnordered(featureIndex = 0))
@@ -568,7 +568,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
     arr(3) = new LabeledPoint(1.0, Vectors.dense(3.0))
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
-      numClassesForClassification = 2)
+      numClasses = 2)
 
     val model = DecisionTree.train(rdd, strategy)
     DecisionTreeSuite.validateClassifier(model, arr, 1.0)
@@ -585,7 +585,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
 
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
-      numClassesForClassification = 2)
+      numClasses = 2)
 
     val model = DecisionTree.train(rdd, strategy)
     DecisionTreeSuite.validateClassifier(model, arr, 1.0)
@@ -600,7 +600,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
     val arr = DecisionTreeSuite.generateCategoricalDataPointsForMulticlass()
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
-      numClassesForClassification = 3, maxBins = maxBins,
+      numClasses = 3, maxBins = maxBins,
       categoricalFeaturesInfo = Map(0 -> 3, 1 -> 3))
     assert(strategy.isMulticlassClassification)
     val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
@@ -629,7 +629,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
     val arr = DecisionTreeSuite.generateContinuousDataPointsForMulticlass()
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
-      numClassesForClassification = 3, maxBins = 100)
+      numClasses = 3, maxBins = 100)
     assert(strategy.isMulticlassClassification)
     val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
 
@@ -650,7 +650,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
     val arr = DecisionTreeSuite.generateContinuousDataPointsForMulticlass()
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
-      numClassesForClassification = 3, maxBins = 100, categoricalFeaturesInfo = Map(0 -> 3))
+      numClasses = 3, maxBins = 100, categoricalFeaturesInfo = Map(0 -> 3))
     assert(strategy.isMulticlassClassification)
     val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
     assert(metadata.isUnordered(featureIndex = 0))
@@ -671,7 +671,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
     val arr = DecisionTreeSuite.generateCategoricalDataPointsForMulticlassForOrderedFeatures()
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
-      numClassesForClassification = 3, maxBins = 100,
+      numClasses = 3, maxBins = 100,
       categoricalFeaturesInfo = Map(0 -> 10, 1 -> 10))
     assert(strategy.isMulticlassClassification)
     val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy)
@@ -692,7 +692,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
     val arr = DecisionTreeSuite.generateCategoricalDataPointsForMulticlassForOrderedFeatures()
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4,
-      numClassesForClassification = 3, maxBins = 10,
+      numClasses = 3, maxBins = 10,
       categoricalFeaturesInfo = Map(0 -> 10, 1 -> 10))
     assert(strategy.isMulticlassClassification)
 
@@ -708,7 +708,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
 
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(algo = Classification, impurity = Gini,
-      maxDepth = 2, numClassesForClassification = 2, minInstancesPerNode = 2)
+      maxDepth = 2, numClasses = 2, minInstancesPerNode = 2)
 
     val model = DecisionTree.train(rdd, strategy)
     assert(model.topNode.isLeaf)
@@ -737,7 +737,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(algo = Classification, impurity = Gini,
       maxBins = 2, maxDepth = 2, categoricalFeaturesInfo = Map(0 -> 2, 1-> 2),
-      numClassesForClassification = 2, minInstancesPerNode = 2)
+      numClasses = 2, minInstancesPerNode = 2)
 
     val rootNode = DecisionTree.train(rdd, strategy).topNode
 
@@ -755,7 +755,7 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
 
     val input = sc.parallelize(arr)
     val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2,
-      numClassesForClassification = 2, minInfoGain = 1.0)
+      numClasses = 2, minInfoGain = 1.0)
 
     val model = DecisionTree.train(input, strategy)
     assert(model.topNode.isLeaf)
@@ -781,12 +781,12 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
     val input = sc.parallelize(arr)
 
     val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 1,
-      numClassesForClassification = 2, categoricalFeaturesInfo = Map(0 -> 3))
+      numClasses = 2, categoricalFeaturesInfo = Map(0 -> 3))
     val metadata = DecisionTreeMetadata.buildMetadata(input, strategy)
     val (splits, bins) = DecisionTree.findSplitsBins(input, metadata)
 
     val treeInput = TreePoint.convertToTreeRDD(input, bins, metadata)
-    val baggedInput = BaggedPoint.convertToBaggedRDDWithoutSampling(treeInput)
+    val baggedInput = BaggedPoint.convertToBaggedRDD(treeInput, 1.0, 1, false)
 
     val topNode = Node.emptyNode(nodeIndex = 1)
     assert(topNode.predict.predict === Double.MinValue)
@@ -824,12 +824,12 @@ class DecisionTreeSuite extends FunSuite with LocalSparkContext {
     val input = sc.parallelize(arr)
 
     val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 5,
-      numClassesForClassification = 2, categoricalFeaturesInfo = Map(0 -> 3))
+      numClasses = 2, categoricalFeaturesInfo = Map(0 -> 3))
     val metadata = DecisionTreeMetadata.buildMetadata(input, strategy)
     val (splits, bins) = DecisionTree.findSplitsBins(input, metadata)
 
     val treeInput = TreePoint.convertToTreeRDD(input, bins, metadata)
-    val baggedInput = BaggedPoint.convertToBaggedRDDWithoutSampling(treeInput)
+    val baggedInput = BaggedPoint.convertToBaggedRDD(treeInput, 1.0, 1, false)
 
     val topNode = Node.emptyNode(nodeIndex = 1)
     assert(topNode.predict.predict === Double.MinValue)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala
new file mode 100644
index 0000000000000..8972c229b7ecb
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.model.TreeEnsembleModel
+import org.apache.spark.util.StatCounter
+
+import scala.collection.mutable
+
+object EnsembleTestHelper {
+
+  /**
+   * Aggregates all values in data, and tests whether the empirical mean and stddev are within
+   * epsilon of the expected values.
+   * @param data  Every element of the data should be an i.i.d. sample from some distribution.
+   */
+  def testRandomArrays(
+      data: Array[Array[Double]],
+      numCols: Int,
+      expectedMean: Double,
+      expectedStddev: Double,
+      epsilon: Double) {
+    val values = new mutable.ArrayBuffer[Double]()
+    data.foreach { row =>
+      assert(row.size == numCols)
+      values ++= row
+    }
+    val stats = new StatCounter(values)
+    assert(math.abs(stats.mean - expectedMean) < epsilon)
+    assert(math.abs(stats.stdev - expectedStddev) < epsilon)
+  }
+
+  def validateClassifier(
+      model: TreeEnsembleModel,
+      input: Seq[LabeledPoint],
+      requiredAccuracy: Double) {
+    val predictions = input.map(x => model.predict(x.features))
+    val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
+      prediction != expected.label
+    }
+    val accuracy = (input.length - numOffPredictions).toDouble / input.length
+    assert(accuracy >= requiredAccuracy,
+      s"validateClassifier calculated accuracy $accuracy but required $requiredAccuracy.")
+  }
+
+  /**
+   * Validates a tree ensemble model for regression.
+   */
+  def validateRegressor(
+      model: TreeEnsembleModel,
+      input: Seq[LabeledPoint],
+      required: Double,
+      metricName: String = "mse") {
+    val predictions = input.map(x => model.predict(x.features))
+    val errors = predictions.zip(input.map(_.label)).map { case (prediction, label) =>
+      prediction - label
+    }
+    val metric = metricName match {
+      case "mse" =>
+        errors.map(err => err * err).sum / errors.size
+      case "mae" =>
+        errors.map(math.abs).sum / errors.size
+    }
+
+    assert(metric <= required,
+      s"validateRegressor calculated $metricName $metric but required $required.")
+  }
+
+  def generateOrderedLabeledPoints(numFeatures: Int, numInstances: Int): Array[LabeledPoint] = {
+    val arr = new Array[LabeledPoint](numInstances)
+    for (i <- 0 until numInstances) {
+      val label = if (i < numInstances / 10) {
+        0.0
+      } else if (i < numInstances / 2) {
+        1.0
+      } else if (i < numInstances * 0.9) {
+        0.0
+      } else {
+        1.0
+      }
+      val features = Array.fill[Double](numFeatures)(i.toDouble)
+      arr(i) = new LabeledPoint(label, Vectors.dense(features))
+    }
+    arr
+  }
+
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala
new file mode 100644
index 0000000000000..e8341a5d0d104
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.configuration.Algo._
+import org.apache.spark.mllib.tree.configuration.{BoostingStrategy, Strategy}
+import org.apache.spark.mllib.tree.impurity.Variance
+import org.apache.spark.mllib.tree.loss.{AbsoluteError, SquaredError, LogLoss}
+
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+
+/**
+ * Test suite for [[GradientBoostedTrees]].
+ */
+class GradientBoostedTreesSuite extends FunSuite with MLlibTestSparkContext {
+
+  test("Regression with continuous features: SquaredError") {
+    GradientBoostedTreesSuite.testCombinations.foreach {
+      case (numIterations, learningRate, subsamplingRate) =>
+        GradientBoostedTreesSuite.randomSeeds.foreach { randomSeed =>
+          val rdd = sc.parallelize(GradientBoostedTreesSuite.data, 2)
+
+          val treeStrategy = new Strategy(algo = Regression, impurity = Variance, maxDepth = 2,
+            categoricalFeaturesInfo = Map.empty, subsamplingRate = subsamplingRate)
+          val boostingStrategy =
+            new BoostingStrategy(treeStrategy, SquaredError, numIterations, learningRate)
+
+          val gbt = GradientBoostedTrees.train(rdd, boostingStrategy)
+
+          assert(gbt.trees.size === numIterations)
+          try {
+            EnsembleTestHelper.validateRegressor(gbt, GradientBoostedTreesSuite.data, 0.06)
+          } catch {
+            case e: java.lang.AssertionError =>
+              println(s"FAILED for numIterations=$numIterations, learningRate=$learningRate," +
+                s" subsamplingRate=$subsamplingRate")
+              throw e
+          }
+
+          val remappedInput = rdd.map(x => new LabeledPoint((x.label * 2) - 1, x.features))
+          val dt = DecisionTree.train(remappedInput, treeStrategy)
+
+          // Make sure trees are the same.
+          assert(gbt.trees.head.toString == dt.toString)
+        }
+    }
+  }
+
+  test("Regression with continuous features: Absolute Error") {
+    GradientBoostedTreesSuite.testCombinations.foreach {
+      case (numIterations, learningRate, subsamplingRate) =>
+        val rdd = sc.parallelize(GradientBoostedTreesSuite.data, 2)
+
+        val treeStrategy = new Strategy(algo = Regression, impurity = Variance, maxDepth = 2,
+          categoricalFeaturesInfo = Map.empty, subsamplingRate = subsamplingRate)
+        val boostingStrategy =
+          new BoostingStrategy(treeStrategy, AbsoluteError, numIterations, learningRate)
+
+        val gbt = GradientBoostedTrees.train(rdd, boostingStrategy)
+
+        assert(gbt.trees.size === numIterations)
+        try {
+          EnsembleTestHelper.validateRegressor(gbt, GradientBoostedTreesSuite.data, 0.85, "mae")
+        } catch {
+          case e: java.lang.AssertionError =>
+            println(s"FAILED for numIterations=$numIterations, learningRate=$learningRate," +
+              s" subsamplingRate=$subsamplingRate")
+            throw e
+        }
+
+        val remappedInput = rdd.map(x => new LabeledPoint((x.label * 2) - 1, x.features))
+        val dt = DecisionTree.train(remappedInput, treeStrategy)
+
+        // Make sure trees are the same.
+        assert(gbt.trees.head.toString == dt.toString)
+    }
+  }
+
+  test("Binary classification with continuous features: Log Loss") {
+    GradientBoostedTreesSuite.testCombinations.foreach {
+      case (numIterations, learningRate, subsamplingRate) =>
+        val rdd = sc.parallelize(GradientBoostedTreesSuite.data, 2)
+
+        val treeStrategy = new Strategy(algo = Classification, impurity = Variance, maxDepth = 2,
+          numClasses = 2, categoricalFeaturesInfo = Map.empty,
+          subsamplingRate = subsamplingRate)
+        val boostingStrategy =
+          new BoostingStrategy(treeStrategy, LogLoss, numIterations, learningRate)
+
+        val gbt = GradientBoostedTrees.train(rdd, boostingStrategy)
+
+        assert(gbt.trees.size === numIterations)
+        try {
+          EnsembleTestHelper.validateClassifier(gbt, GradientBoostedTreesSuite.data, 0.9)
+        } catch {
+          case e: java.lang.AssertionError =>
+            println(s"FAILED for numIterations=$numIterations, learningRate=$learningRate," +
+              s" subsamplingRate=$subsamplingRate")
+            throw e
+        }
+
+        val remappedInput = rdd.map(x => new LabeledPoint((x.label * 2) - 1, x.features))
+        val ensembleStrategy = treeStrategy.copy
+        ensembleStrategy.algo = Regression
+        ensembleStrategy.impurity = Variance
+        val dt = DecisionTree.train(remappedInput, ensembleStrategy)
+
+        // Make sure trees are the same.
+        assert(gbt.trees.head.toString == dt.toString)
+    }
+  }
+
+  test("SPARK-5496: BoostingStrategy.defaultParams should recognize Classification") {
+    for (algo <- Seq("classification", "Classification", "regression", "Regression")) {
+      BoostingStrategy.defaultParams(algo)
+    }
+  }
+}
+
+object GradientBoostedTreesSuite {
+
+  // Combinations for estimators, learning rates and subsamplingRate
+  val testCombinations = Array((10, 1.0, 1.0), (10, 0.1, 1.0), (10, 0.5, 0.75), (10, 0.1, 0.75))
+
+  val randomSeeds = Array(681283, 4398)
+
+  val data = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/ImpuritySuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/ImpuritySuite.scala
new file mode 100644
index 0000000000000..92b498580af03
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/ImpuritySuite.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.tree.impurity.{EntropyAggregator, GiniAggregator}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+
+/**
+ * Test suites for [[GiniAggregator]] and [[EntropyAggregator]].
+ */
+class ImpuritySuite extends FunSuite with MLlibTestSparkContext {
+  test("Gini impurity does not support negative labels") {
+    val gini = new GiniAggregator(2)
+    intercept[IllegalArgumentException] {
+      gini.update(Array(0.0, 1.0, 2.0), 0, -1, 0.0)
+    }
+  }
+
+  test("Entropy does not support negative labels") {
+    val entropy = new EntropyAggregator(2)
+    intercept[IllegalArgumentException] {
+      entropy.update(Array(0.0, 1.0, 2.0), 0, -1, 0.0)
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
index 6b13765b98f41..55e963977b54f 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
@@ -25,52 +25,20 @@ import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.mllib.tree.configuration.Strategy
-import org.apache.spark.mllib.tree.impl.{BaggedPoint, DecisionTreeMetadata}
+import org.apache.spark.mllib.tree.impl.DecisionTreeMetadata
 import org.apache.spark.mllib.tree.impurity.{Gini, Variance}
-import org.apache.spark.mllib.tree.model.{Node, RandomForestModel}
-import org.apache.spark.mllib.util.LocalSparkContext
-import org.apache.spark.util.StatCounter
+import org.apache.spark.mllib.tree.model.Node
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 
 /**
  * Test suite for [[RandomForest]].
  */
-class RandomForestSuite extends FunSuite with LocalSparkContext {
-
-  test("BaggedPoint RDD: without subsampling") {
-    val arr = RandomForestSuite.generateOrderedLabeledPoints(numFeatures = 1)
-    val rdd = sc.parallelize(arr)
-    val baggedRDD = BaggedPoint.convertToBaggedRDDWithoutSampling(rdd)
-    baggedRDD.collect().foreach { baggedPoint =>
-      assert(baggedPoint.subsampleWeights.size == 1 && baggedPoint.subsampleWeights(0) == 1)
-    }
-  }
-
-  test("BaggedPoint RDD: with subsampling") {
-    val numSubsamples = 100
-    val (expectedMean, expectedStddev) = (1.0, 1.0)
-
-    val seeds = Array(123, 5354, 230, 349867, 23987)
-    val arr = RandomForestSuite.generateOrderedLabeledPoints(numFeatures = 1)
-    val rdd = sc.parallelize(arr)
-    seeds.foreach { seed =>
-      val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, numSubsamples, seed = seed)
-      val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
-      RandomForestSuite.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
-        expectedStddev, epsilon = 0.01)
-    }
-  }
-
-  test("Binary classification with continuous features:" +
-      " comparing DecisionTree vs. RandomForest(numTrees = 1)") {
-
-    val arr = RandomForestSuite.generateOrderedLabeledPoints(numFeatures = 50)
+class RandomForestSuite extends FunSuite with MLlibTestSparkContext {
+  def binaryClassificationTestWithContinuousFeatures(strategy: Strategy) {
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
     val rdd = sc.parallelize(arr)
-    val categoricalFeaturesInfo = Map.empty[Int, Int]
     val numTrees = 1
 
-    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2,
-      numClassesForClassification = 2, categoricalFeaturesInfo = categoricalFeaturesInfo)
-
     val rf = RandomForest.trainClassifier(rdd, strategy, numTrees = numTrees,
       featureSubsetStrategy = "auto", seed = 123)
     assert(rf.trees.size === 1)
@@ -78,24 +46,34 @@ class RandomForestSuite extends FunSuite with LocalSparkContext {
 
     val dt = DecisionTree.train(rdd, strategy)
 
-    RandomForestSuite.validateClassifier(rf, arr, 0.9)
+    EnsembleTestHelper.validateClassifier(rf, arr, 0.9)
     DecisionTreeSuite.validateClassifier(dt, arr, 0.9)
 
     // Make sure trees are the same.
     assert(rfTree.toString == dt.toString)
   }
 
-  test("Regression with continuous features:" +
+  test("Binary classification with continuous features:" +
     " comparing DecisionTree vs. RandomForest(numTrees = 1)") {
+    val categoricalFeaturesInfo = Map.empty[Int, Int]
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2,
+      numClasses = 2, categoricalFeaturesInfo = categoricalFeaturesInfo)
+    binaryClassificationTestWithContinuousFeatures(strategy)
+  }
 
-    val arr = RandomForestSuite.generateOrderedLabeledPoints(numFeatures = 50)
-    val rdd = sc.parallelize(arr)
+  test("Binary classification with continuous features and node Id cache :" +
+    " comparing DecisionTree vs. RandomForest(numTrees = 1)") {
     val categoricalFeaturesInfo = Map.empty[Int, Int]
-    val numTrees = 1
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2,
+      numClasses = 2, categoricalFeaturesInfo = categoricalFeaturesInfo,
+      useNodeIdCache = true)
+    binaryClassificationTestWithContinuousFeatures(strategy)
+  }
 
-    val strategy = new Strategy(algo = Regression, impurity = Variance,
-      maxDepth = 2, maxBins = 10, numClassesForClassification = 2,
-      categoricalFeaturesInfo = categoricalFeaturesInfo)
+  def regressionTestWithContinuousFeatures(strategy: Strategy) {
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
+    val rdd = sc.parallelize(arr)
+    val numTrees = 1
 
     val rf = RandomForest.trainRegressor(rdd, strategy, numTrees = numTrees,
       featureSubsetStrategy = "auto", seed = 123)
@@ -104,21 +82,35 @@ class RandomForestSuite extends FunSuite with LocalSparkContext {
 
     val dt = DecisionTree.train(rdd, strategy)
 
-    RandomForestSuite.validateRegressor(rf, arr, 0.01)
+    EnsembleTestHelper.validateRegressor(rf, arr, 0.01)
     DecisionTreeSuite.validateRegressor(dt, arr, 0.01)
 
     // Make sure trees are the same.
     assert(rfTree.toString == dt.toString)
   }
 
-  test("Binary classification with continuous features: subsampling features") {
-    val numFeatures = 50
-    val arr = RandomForestSuite.generateOrderedLabeledPoints(numFeatures)
-    val rdd = sc.parallelize(arr)
+  test("Regression with continuous features:" +
+    " comparing DecisionTree vs. RandomForest(numTrees = 1)") {
     val categoricalFeaturesInfo = Map.empty[Int, Int]
+    val strategy = new Strategy(algo = Regression, impurity = Variance,
+      maxDepth = 2, maxBins = 10, numClasses = 2,
+      categoricalFeaturesInfo = categoricalFeaturesInfo)
+    regressionTestWithContinuousFeatures(strategy)
+  }
 
-    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2,
-      numClassesForClassification = 2, categoricalFeaturesInfo = categoricalFeaturesInfo)
+  test("Regression with continuous features and node Id cache :" +
+    " comparing DecisionTree vs. RandomForest(numTrees = 1)") {
+    val categoricalFeaturesInfo = Map.empty[Int, Int]
+    val strategy = new Strategy(algo = Regression, impurity = Variance,
+      maxDepth = 2, maxBins = 10, numClasses = 2,
+      categoricalFeaturesInfo = categoricalFeaturesInfo, useNodeIdCache = true)
+    regressionTestWithContinuousFeatures(strategy)
+  }
+
+  def binaryClassificationTestWithContinuousFeaturesAndSubsampledFeatures(strategy: Strategy) {
+    val numFeatures = 50
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures, 1000)
+    val rdd = sc.parallelize(arr)
 
     // Select feature subset for top nodes.  Return true if OK.
     def checkFeatureSubsetStrategy(
@@ -174,6 +166,21 @@ class RandomForestSuite extends FunSuite with LocalSparkContext {
     checkFeatureSubsetStrategy(numTrees = 2, "onethird", (numFeatures / 3.0).ceil.toInt)
   }
 
+  test("Binary classification with continuous features: subsampling features") {
+    val categoricalFeaturesInfo = Map.empty[Int, Int]
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2,
+      numClasses = 2, categoricalFeaturesInfo = categoricalFeaturesInfo)
+    binaryClassificationTestWithContinuousFeaturesAndSubsampledFeatures(strategy)
+  }
+
+  test("Binary classification with continuous features and node Id cache: subsampling features") {
+    val categoricalFeaturesInfo = Map.empty[Int, Int]
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2,
+      numClasses = 2, categoricalFeaturesInfo = categoricalFeaturesInfo,
+      useNodeIdCache = true)
+    binaryClassificationTestWithContinuousFeaturesAndSubsampledFeatures(strategy)
+  }
+
   test("alternating categorical and continuous features with multiclass labels to test indexing") {
     val arr = new Array[LabeledPoint](4)
     arr(0) = new LabeledPoint(0.0, Vectors.dense(1.0, 0.0, 0.0, 3.0, 1.0))
@@ -184,80 +191,27 @@ class RandomForestSuite extends FunSuite with LocalSparkContext {
     val input = sc.parallelize(arr)
 
     val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 5,
-      numClassesForClassification = 3, categoricalFeaturesInfo = categoricalFeaturesInfo)
+      numClasses = 3, categoricalFeaturesInfo = categoricalFeaturesInfo)
     val model = RandomForest.trainClassifier(input, strategy, numTrees = 2,
       featureSubsetStrategy = "sqrt", seed = 12345)
-    RandomForestSuite.validateClassifier(model, arr, 1.0)
+    EnsembleTestHelper.validateClassifier(model, arr, 1.0)
   }
 
-}
-
-object RandomForestSuite {
-
-  /**
-   * Aggregates all values in data, and tests whether the empirical mean and stddev are within
-   * epsilon of the expected values.
-   * @param data  Every element of the data should be an i.i.d. sample from some distribution.
-   */
-  def testRandomArrays(
-      data: Array[Array[Double]],
-      numCols: Int,
-      expectedMean: Double,
-      expectedStddev: Double,
-      epsilon: Double) {
-    val values = new mutable.ArrayBuffer[Double]()
-    data.foreach { row =>
-      assert(row.size == numCols)
-      values ++= row
-    }
-    val stats = new StatCounter(values)
-    assert(math.abs(stats.mean - expectedMean) < epsilon)
-    assert(math.abs(stats.stdev - expectedStddev) < epsilon)
-  }
+  test("subsampling rate in RandomForest"){
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(5, 20)
+    val rdd = sc.parallelize(arr)
+    val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2,
+      numClasses = 2, categoricalFeaturesInfo = Map.empty[Int, Int],
+      useNodeIdCache = true)
 
-  def validateClassifier(
-      model: RandomForestModel,
-      input: Seq[LabeledPoint],
-      requiredAccuracy: Double) {
-    val predictions = input.map(x => model.predict(x.features))
-    val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
-      prediction != expected.label
-    }
-    val accuracy = (input.length - numOffPredictions).toDouble / input.length
-    assert(accuracy >= requiredAccuracy,
-      s"validateClassifier calculated accuracy $accuracy but required $requiredAccuracy.")
+    val rf1 = RandomForest.trainClassifier(rdd, strategy, numTrees = 3,
+      featureSubsetStrategy = "auto", seed = 123)
+    strategy.subsamplingRate = 0.5
+    val rf2 = RandomForest.trainClassifier(rdd, strategy, numTrees = 3,
+      featureSubsetStrategy = "auto", seed = 123)
+    assert(rf1.toDebugString != rf2.toDebugString)
   }
 
-  def validateRegressor(
-      model: RandomForestModel,
-      input: Seq[LabeledPoint],
-      requiredMSE: Double) {
-    val predictions = input.map(x => model.predict(x.features))
-    val squaredError = predictions.zip(input).map { case (prediction, expected) =>
-      val err = prediction - expected.label
-      err * err
-    }.sum
-    val mse = squaredError / input.length
-    assert(mse <= requiredMSE, s"validateRegressor calculated MSE $mse but required $requiredMSE.")
-  }
+}
 
-  def generateOrderedLabeledPoints(numFeatures: Int): Array[LabeledPoint] = {
-    val numInstances = 1000
-    val arr = new Array[LabeledPoint](numInstances)
-    for (i <- 0 until numInstances) {
-      val label = if (i < numInstances / 10) {
-        0.0
-      } else if (i < numInstances / 2) {
-        1.0
-      } else if (i < numInstances * 0.9) {
-        0.0
-      } else {
-        1.0
-      }
-      val features = Array.fill[Double](numFeatures)(i.toDouble)
-      arr(i) = new LabeledPoint(label, Vectors.dense(features))
-    }
-    arr
-  }
 
-}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/impl/BaggedPointSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/impl/BaggedPointSuite.scala
new file mode 100644
index 0000000000000..b184e936672ca
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/impl/BaggedPointSuite.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.tree.impl
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.tree.EnsembleTestHelper
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+
+/**
+ * Test suite for [[BaggedPoint]].
+ */
+class BaggedPointSuite extends FunSuite with MLlibTestSparkContext  {
+
+  test("BaggedPoint RDD: without subsampling") {
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
+    val rdd = sc.parallelize(arr)
+    val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, 1, false, 42)
+    baggedRDD.collect().foreach { baggedPoint =>
+      assert(baggedPoint.subsampleWeights.size == 1 && baggedPoint.subsampleWeights(0) == 1)
+    }
+  }
+
+  test("BaggedPoint RDD: with subsampling with replacement (fraction = 1.0)") {
+    val numSubsamples = 100
+    val (expectedMean, expectedStddev) = (1.0, 1.0)
+
+    val seeds = Array(123, 5354, 230, 349867, 23987)
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
+    val rdd = sc.parallelize(arr)
+    seeds.foreach { seed =>
+      val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, numSubsamples, true, seed)
+      val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
+      EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
+        expectedStddev, epsilon = 0.01)
+    }
+  }
+
+  test("BaggedPoint RDD: with subsampling with replacement (fraction = 0.5)") {
+    val numSubsamples = 100
+    val subsample = 0.5
+    val (expectedMean, expectedStddev) = (subsample, math.sqrt(subsample))
+
+    val seeds = Array(123, 5354, 230, 349867, 23987)
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
+    val rdd = sc.parallelize(arr)
+    seeds.foreach { seed =>
+      val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, subsample, numSubsamples, true, seed)
+      val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
+      EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
+        expectedStddev, epsilon = 0.01)
+    }
+  }
+
+  test("BaggedPoint RDD: with subsampling without replacement (fraction = 1.0)") {
+    val numSubsamples = 100
+    val (expectedMean, expectedStddev) = (1.0, 0)
+
+    val seeds = Array(123, 5354, 230, 349867, 23987)
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
+    val rdd = sc.parallelize(arr)
+    seeds.foreach { seed =>
+      val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, numSubsamples, false, seed)
+      val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
+      EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
+        expectedStddev, epsilon = 0.01)
+    }
+  }
+
+  test("BaggedPoint RDD: with subsampling without replacement (fraction = 0.5)") {
+    val numSubsamples = 100
+    val subsample = 0.5
+    val (expectedMean, expectedStddev) = (subsample, math.sqrt(subsample * (1 - subsample)))
+
+    val seeds = Array(123, 5354, 230, 349867, 23987)
+    val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
+    val rdd = sc.parallelize(arr)
+    seeds.foreach { seed =>
+      val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, subsample, numSubsamples, false, seed)
+      val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
+      EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
+        expectedStddev, epsilon = 0.01)
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/LocalSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/LocalSparkContext.scala
deleted file mode 100644
index 7857d9e5ee5c4..0000000000000
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/LocalSparkContext.scala
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.mllib.util
-
-import org.scalatest.Suite
-import org.scalatest.BeforeAndAfterAll
-
-import org.apache.spark.{SparkConf, SparkContext}
-
-trait LocalSparkContext extends BeforeAndAfterAll { self: Suite =>
-  @transient var sc: SparkContext = _
-
-  override def beforeAll() {
-    val conf = new SparkConf()
-      .setMaster("local")
-      .setAppName("test")
-    sc = new SparkContext(conf)
-    super.beforeAll()
-  }
-
-  override def afterAll() {
-    if (sc != null) {
-      sc.stop()
-    }
-    super.afterAll()
-  }
-}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
index 0dbe766b4d917..668fc1d43c5d6 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
@@ -20,21 +20,20 @@ package org.apache.spark.mllib.util
 import java.io.File
 
 import scala.io.Source
-import scala.math
 
 import org.scalatest.FunSuite
 
-import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, norm => breezeNorm,
-  squaredDistance => breezeSquaredDistance}
+import breeze.linalg.{squaredDistance => breezeSquaredDistance}
 import com.google.common.base.Charsets
 import com.google.common.io.Files
 
 import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vectors}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.util.MLUtils._
+import org.apache.spark.mllib.util.TestingUtils._
 import org.apache.spark.util.Utils
 
-class MLUtilsSuite extends FunSuite with LocalSparkContext {
+class MLUtilsSuite extends FunSuite with MLlibTestSparkContext {
 
   test("epsilon computation") {
     assert(1.0 + EPSILON > 1.0, s"EPSILON is too small: $EPSILON.")
@@ -44,19 +43,35 @@ class MLUtilsSuite extends FunSuite with LocalSparkContext {
   test("fast squared distance") {
     val a = (30 to 0 by -1).map(math.pow(2.0, _)).toArray
     val n = a.length
-    val v1 = new BDV[Double](a)
-    val norm1 = breezeNorm(v1, 2.0)
+    val v1 = Vectors.dense(a)
+    val norm1 = Vectors.norm(v1, 2.0)
     val precision = 1e-6
     for (m <- 0 until n) {
       val indices = (0 to m).toArray
       val values = indices.map(i => a(i))
-      val v2 = new BSV[Double](indices, values, n)
-      val norm2 = breezeNorm(v2, 2.0)
-      val squaredDist = breezeSquaredDistance(v1, v2)
+      val v2 = Vectors.sparse(n, indices, values)
+      val norm2 = Vectors.norm(v2, 2.0)
+      val v3 = Vectors.sparse(n, indices, indices.map(i => a(i) + 0.5))
+      val norm3 = Vectors.norm(v3, 2.0)
+      val squaredDist = breezeSquaredDistance(v1.toBreeze, v2.toBreeze)
       val fastSquaredDist1 = fastSquaredDistance(v1, norm1, v2, norm2, precision)
       assert((fastSquaredDist1 - squaredDist) <= precision * squaredDist, s"failed with m = $m")
-      val fastSquaredDist2 = fastSquaredDistance(v1, norm1, v2.toDenseVector, norm2, precision)
+      val fastSquaredDist2 =
+        fastSquaredDistance(v1, norm1, Vectors.dense(v2.toArray), norm2, precision)
       assert((fastSquaredDist2 - squaredDist) <= precision * squaredDist, s"failed with m = $m")
+      val squaredDist2 = breezeSquaredDistance(v2.toBreeze, v3.toBreeze)
+      val fastSquaredDist3 =
+        fastSquaredDistance(v2, norm2, v3, norm3, precision)
+      assert((fastSquaredDist3 - squaredDist2) <= precision * squaredDist2, s"failed with m = $m")
+      if (m > 10) { 
+        val v4 = Vectors.sparse(n, indices.slice(0, m - 10),
+          indices.map(i => a(i) + 0.5).slice(0, m - 10))
+        val norm4 = Vectors.norm(v4, 2.0)
+        val squaredDist = breezeSquaredDistance(v2.toBreeze, v4.toBreeze)
+        val fastSquaredDist =
+          fastSquaredDistance(v2, norm2, v4, norm4, precision)
+        assert((fastSquaredDist - squaredDist) <= precision * squaredDist, s"failed with m = $m")
+      }
     }
   }
 
@@ -188,4 +203,12 @@ class MLUtilsSuite extends FunSuite with LocalSparkContext {
     assert(points.collect().toSet === loaded.collect().toSet)
     Utils.deleteRecursively(tempDir)
   }
+
+  test("log1pExp") {
+    assert(log1pExp(76.3) ~== math.log1p(math.exp(76.3)) relTol 1E-10)
+    assert(log1pExp(87296763.234) ~== 87296763.234 relTol 1E-10)
+
+    assert(log1pExp(-13.8) ~== math.log1p(math.exp(-13.8)) absTol 1E-10)
+    assert(log1pExp(-238423789.865) ~== math.log1p(math.exp(-238423789.865)) absTol 1E-10)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
new file mode 100644
index 0000000000000..b658889476d37
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.util
+
+import org.scalatest.Suite
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.{SparkConf, SparkContext}
+
+trait MLlibTestSparkContext extends BeforeAndAfterAll { self: Suite =>
+  @transient var sc: SparkContext = _
+
+  override def beforeAll() {
+    super.beforeAll()
+    val conf = new SparkConf()
+      .setMaster("local[2]")
+      .setAppName("MLlibUnitTest")
+    sc = new SparkContext(conf)
+  }
+
+  override def afterAll() {
+    if (sc != null) {
+      sc.stop()
+    }
+    super.afterAll()
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala
index 30b906aaa3ba4..e957fa5d25f4c 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala
@@ -178,17 +178,17 @@ object TestingUtils {
   implicit class MatrixWithAlmostEquals(val x: Matrix) {
 
     /**
-     * When the difference of two vectors are within eps, returns true; otherwise, returns false.
+     * When the difference of two matrices are within eps, returns true; otherwise, returns false.
      */
     def ~=(r: CompareMatrixRightSide): Boolean = r.fun(x, r.y, r.eps)
 
     /**
-     * When the difference of two vectors are within eps, returns false; otherwise, returns true.
+     * When the difference of two matrices are within eps, returns false; otherwise, returns true.
      */
     def !~=(r: CompareMatrixRightSide): Boolean = !r.fun(x, r.y, r.eps)
 
     /**
-     * Throws exception when the difference of two vectors are NOT within eps;
+     * Throws exception when the difference of two matrices are NOT within eps;
      * otherwise, returns true.
      */
     def ~==(r: CompareMatrixRightSide): Boolean = {
diff --git a/network/common/pom.xml b/network/common/pom.xml
new file mode 100644
index 0000000000000..8f7c924d6b3a3
--- /dev/null
+++ b/network/common/pom.xml
@@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>1.3.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-network-common_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project Networking</name>
+  <url>http://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>network-common</sbt.project.name>
+  </properties>
+
+  <dependencies>
+    <!-- Core dependencies -->
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-all</artifactId>
+    </dependency>
+
+    <!-- Provided dependencies -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <!--
+      Promote Guava to "compile" so that maven-shade-plugin picks it up (for packaging the Optional
+      class exposed in the Java API). The plugin will then remove this dependency from the published
+      pom, so that Guava does not pollute the client's compilation classpath.
+    -->
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <scope>compile</scope>
+    </dependency>
+
+    <!-- Test dependencies -->
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.novocode</groupId>
+      <artifactId>junit-interface</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <!-- Create a test-jar so network-shuffle can depend on our test utilities. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <version>2.2</version>
+        <executions>
+          <execution>
+            <id>test-jar-on-test-compile</id>
+            <phase>test-compile</phase>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/network/common/src/main/java/org/apache/spark/network/TransportContext.java b/network/common/src/main/java/org/apache/spark/network/TransportContext.java
new file mode 100644
index 0000000000000..5bc6e5a2418a9
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/TransportContext.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import java.util.List;
+
+import com.google.common.collect.Lists;
+import io.netty.channel.Channel;
+import io.netty.channel.socket.SocketChannel;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportClientBootstrap;
+import org.apache.spark.network.client.TransportClientFactory;
+import org.apache.spark.network.client.TransportResponseHandler;
+import org.apache.spark.network.protocol.MessageDecoder;
+import org.apache.spark.network.protocol.MessageEncoder;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.TransportChannelHandler;
+import org.apache.spark.network.server.TransportRequestHandler;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.server.StreamManager;
+import org.apache.spark.network.util.NettyUtils;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * Contains the context to create a {@link TransportServer}, {@link TransportClientFactory}, and to
+ * setup Netty Channel pipelines with a {@link org.apache.spark.network.server.TransportChannelHandler}.
+ *
+ * There are two communication protocols that the TransportClient provides, control-plane RPCs and
+ * data-plane "chunk fetching". The handling of the RPCs is performed outside of the scope of the
+ * TransportContext (i.e., by a user-provided handler), and it is responsible for setting up streams
+ * which can be streamed through the data plane in chunks using zero-copy IO.
+ *
+ * The TransportServer and TransportClientFactory both create a TransportChannelHandler for each
+ * channel. As each TransportChannelHandler contains a TransportClient, this enables server
+ * processes to send messages back to the client on an existing channel.
+ */
+public class TransportContext {
+  private final Logger logger = LoggerFactory.getLogger(TransportContext.class);
+
+  private final TransportConf conf;
+  private final RpcHandler rpcHandler;
+
+  private final MessageEncoder encoder;
+  private final MessageDecoder decoder;
+
+  public TransportContext(TransportConf conf, RpcHandler rpcHandler) {
+    this.conf = conf;
+    this.rpcHandler = rpcHandler;
+    this.encoder = new MessageEncoder();
+    this.decoder = new MessageDecoder();
+  }
+
+  /**
+   * Initializes a ClientFactory which runs the given TransportClientBootstraps prior to returning
+   * a new Client. Bootstraps will be executed synchronously, and must run successfully in order
+   * to create a Client.
+   */
+  public TransportClientFactory createClientFactory(List<TransportClientBootstrap> bootstraps) {
+    return new TransportClientFactory(this, bootstraps);
+  }
+
+  public TransportClientFactory createClientFactory() {
+    return createClientFactory(Lists.<TransportClientBootstrap>newArrayList());
+  }
+
+  /** Create a server which will attempt to bind to a specific port. */
+  public TransportServer createServer(int port) {
+    return new TransportServer(this, port);
+  }
+
+  /** Creates a new server, binding to any available ephemeral port. */
+  public TransportServer createServer() {
+    return new TransportServer(this, 0);
+  }
+
+  /**
+   * Initializes a client or server Netty Channel Pipeline which encodes/decodes messages and
+   * has a {@link org.apache.spark.network.server.TransportChannelHandler} to handle request or
+   * response messages.
+   *
+   * @return Returns the created TransportChannelHandler, which includes a TransportClient that can
+   * be used to communicate on this channel. The TransportClient is directly associated with a
+   * ChannelHandler to ensure all users of the same channel get the same TransportClient object.
+   */
+  public TransportChannelHandler initializePipeline(SocketChannel channel) {
+    try {
+      TransportChannelHandler channelHandler = createChannelHandler(channel);
+      channel.pipeline()
+        .addLast("encoder", encoder)
+        .addLast("frameDecoder", NettyUtils.createFrameDecoder())
+        .addLast("decoder", decoder)
+        // NOTE: Chunks are currently guaranteed to be returned in the order of request, but this
+        // would require more logic to guarantee if this were not part of the same event loop.
+        .addLast("handler", channelHandler);
+      return channelHandler;
+    } catch (RuntimeException e) {
+      logger.error("Error while initializing Netty pipeline", e);
+      throw e;
+    }
+  }
+
+  /**
+   * Creates the server- and client-side handler which is used to handle both RequestMessages and
+   * ResponseMessages. The channel is expected to have been successfully created, though certain
+   * properties (such as the remoteAddress()) may not be available yet.
+   */
+  private TransportChannelHandler createChannelHandler(Channel channel) {
+    TransportResponseHandler responseHandler = new TransportResponseHandler(channel);
+    TransportClient client = new TransportClient(channel, responseHandler);
+    TransportRequestHandler requestHandler = new TransportRequestHandler(channel, client,
+      rpcHandler);
+    return new TransportChannelHandler(client, responseHandler, requestHandler);
+  }
+
+  public TransportConf getConf() { return conf; }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
new file mode 100644
index 0000000000000..844eff4f4c701
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.buffer;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+import com.google.common.base.Objects;
+import com.google.common.io.ByteStreams;
+import io.netty.channel.DefaultFileRegion;
+
+import org.apache.spark.network.util.JavaUtils;
+import org.apache.spark.network.util.LimitedInputStream;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * A {@link ManagedBuffer} backed by a segment in a file.
+ */
+public final class FileSegmentManagedBuffer extends ManagedBuffer {
+  private final TransportConf conf;
+  private final File file;
+  private final long offset;
+  private final long length;
+
+  public FileSegmentManagedBuffer(TransportConf conf, File file, long offset, long length) {
+    this.conf = conf;
+    this.file = file;
+    this.offset = offset;
+    this.length = length;
+  }
+
+  @Override
+  public long size() {
+    return length;
+  }
+
+  @Override
+  public ByteBuffer nioByteBuffer() throws IOException {
+    FileChannel channel = null;
+    try {
+      channel = new RandomAccessFile(file, "r").getChannel();
+      // Just copy the buffer if it's sufficiently small, as memory mapping has a high overhead.
+      if (length < conf.memoryMapBytes()) {
+        ByteBuffer buf = ByteBuffer.allocate((int) length);
+        channel.position(offset);
+        while (buf.remaining() != 0) {
+          if (channel.read(buf) == -1) {
+            throw new IOException(String.format("Reached EOF before filling buffer\n" +
+              "offset=%s\nfile=%s\nbuf.remaining=%s",
+              offset, file.getAbsoluteFile(), buf.remaining()));
+          }
+        }
+        buf.flip();
+        return buf;
+      } else {
+        return channel.map(FileChannel.MapMode.READ_ONLY, offset, length);
+      }
+    } catch (IOException e) {
+      try {
+        if (channel != null) {
+          long size = channel.size();
+          throw new IOException("Error in reading " + this + " (actual file length " + size + ")",
+            e);
+        }
+      } catch (IOException ignored) {
+        // ignore
+      }
+      throw new IOException("Error in opening " + this, e);
+    } finally {
+      JavaUtils.closeQuietly(channel);
+    }
+  }
+
+  @Override
+  public InputStream createInputStream() throws IOException {
+    FileInputStream is = null;
+    try {
+      is = new FileInputStream(file);
+      ByteStreams.skipFully(is, offset);
+      return new LimitedInputStream(is, length);
+    } catch (IOException e) {
+      try {
+        if (is != null) {
+          long size = file.length();
+          throw new IOException("Error in reading " + this + " (actual file length " + size + ")",
+              e);
+        }
+      } catch (IOException ignored) {
+        // ignore
+      } finally {
+        JavaUtils.closeQuietly(is);
+      }
+      throw new IOException("Error in opening " + this, e);
+    } catch (RuntimeException e) {
+      JavaUtils.closeQuietly(is);
+      throw e;
+    }
+  }
+
+  @Override
+  public ManagedBuffer retain() {
+    return this;
+  }
+
+  @Override
+  public ManagedBuffer release() {
+    return this;
+  }
+
+  @Override
+  public Object convertToNetty() throws IOException {
+    if (conf.lazyFileDescriptor()) {
+      return new LazyFileRegion(file, offset, length);
+    } else {
+      FileChannel fileChannel = new FileInputStream(file).getChannel();
+      return new DefaultFileRegion(fileChannel, offset, length);
+    }
+  }
+
+  public File getFile() { return file; }
+
+  public long getOffset() { return offset; }
+
+  public long getLength() { return length; }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("file", file)
+      .add("offset", offset)
+      .add("length", length)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LazyFileRegion.java b/network/common/src/main/java/org/apache/spark/network/buffer/LazyFileRegion.java
new file mode 100644
index 0000000000000..81bc8ec40fc82
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LazyFileRegion.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.buffer;
+
+import java.io.FileInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.nio.channels.FileChannel;
+import java.nio.channels.WritableByteChannel;
+
+import com.google.common.base.Objects;
+import io.netty.channel.FileRegion;
+import io.netty.util.AbstractReferenceCounted;
+
+import org.apache.spark.network.util.JavaUtils;
+
+/**
+ * A FileRegion implementation that only creates the file descriptor when the region is being
+ * transferred. This cannot be used with Epoll because there is no native support for it.
+ * 
+ * This is mostly copied from DefaultFileRegion implementation in Netty. In the future, we
+ * should push this into Netty so the native Epoll transport can support this feature.
+ */
+public final class LazyFileRegion extends AbstractReferenceCounted implements FileRegion {
+
+  private final File file;
+  private final long position;
+  private final long count;
+
+  private FileChannel channel;
+
+  private long numBytesTransferred = 0L;
+
+  /**
+   * @param file file to transfer.
+   * @param position start position for the transfer.
+   * @param count number of bytes to transfer starting from position.
+   */
+  public LazyFileRegion(File file, long position, long count) {
+    this.file = file;
+    this.position = position;
+    this.count = count;
+  }
+
+  @Override
+  protected void deallocate() {
+    JavaUtils.closeQuietly(channel);
+  }
+
+  @Override
+  public long position() {
+    return position;
+  }
+
+  @Override
+  public long transfered() {
+    return numBytesTransferred;
+  }
+
+  @Override
+  public long count() {
+    return count;
+  }
+
+  @Override
+  public long transferTo(WritableByteChannel target, long position) throws IOException {
+    if (channel == null) {
+      channel = new FileInputStream(file).getChannel();
+    }
+
+    long count = this.count - position;
+    if (count < 0 || position < 0) {
+      throw new IllegalArgumentException(
+          "position out of range: " + position + " (expected: 0 - " + (count - 1) + ')');
+    }
+
+    if (count == 0) {
+      return 0L;
+    }
+
+    long written = channel.transferTo(this.position + position, count, target);
+    if (written > 0) {
+      numBytesTransferred += written;
+    }
+    return written;
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+        .add("file", file)
+        .add("position", position)
+        .add("count", count)
+        .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
new file mode 100644
index 0000000000000..a415db593a788
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.buffer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+/**
+ * This interface provides an immutable view for data in the form of bytes. The implementation
+ * should specify how the data is provided:
+ *
+ * - {@link FileSegmentManagedBuffer}: data backed by part of a file
+ * - {@link NioManagedBuffer}: data backed by a NIO ByteBuffer
+ * - {@link NettyManagedBuffer}: data backed by a Netty ByteBuf
+ *
+ * The concrete buffer implementation might be managed outside the JVM garbage collector.
+ * For example, in the case of {@link NettyManagedBuffer}, the buffers are reference counted.
+ * In that case, if the buffer is going to be passed around to a different thread, retain/release
+ * should be called.
+ */
+public abstract class ManagedBuffer {
+
+  /** Number of bytes of the data. */
+  public abstract long size();
+
+  /**
+   * Exposes this buffer's data as an NIO ByteBuffer. Changing the position and limit of the
+   * returned ByteBuffer should not affect the content of this buffer.
+   */
+  // TODO: Deprecate this, usage may require expensive memory mapping or allocation.
+  public abstract ByteBuffer nioByteBuffer() throws IOException;
+
+  /**
+   * Exposes this buffer's data as an InputStream. The underlying implementation does not
+   * necessarily check for the length of bytes read, so the caller is responsible for making sure
+   * it does not go over the limit.
+   */
+  public abstract InputStream createInputStream() throws IOException;
+
+  /**
+   * Increment the reference count by one if applicable.
+   */
+  public abstract ManagedBuffer retain();
+
+  /**
+   * If applicable, decrement the reference count by one and deallocates the buffer if the
+   * reference count reaches zero.
+   */
+  public abstract ManagedBuffer release();
+
+  /**
+   * Convert the buffer into an Netty object, used to write the data out.
+   */
+  public abstract Object convertToNetty() throws IOException;
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
new file mode 100644
index 0000000000000..c806bfa45bef3
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.buffer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.ByteBufInputStream;
+
+/**
+ * A {@link ManagedBuffer} backed by a Netty {@link ByteBuf}.
+ */
+public final class NettyManagedBuffer extends ManagedBuffer {
+  private final ByteBuf buf;
+
+  public NettyManagedBuffer(ByteBuf buf) {
+    this.buf = buf;
+  }
+
+  @Override
+  public long size() {
+    return buf.readableBytes();
+  }
+
+  @Override
+  public ByteBuffer nioByteBuffer() throws IOException {
+    return buf.nioBuffer();
+  }
+
+  @Override
+  public InputStream createInputStream() throws IOException {
+    return new ByteBufInputStream(buf);
+  }
+
+  @Override
+  public ManagedBuffer retain() {
+    buf.retain();
+    return this;
+  }
+
+  @Override
+  public ManagedBuffer release() {
+    buf.release();
+    return this;
+  }
+
+  @Override
+  public Object convertToNetty() throws IOException {
+    return buf.duplicate();
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("buf", buf)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
new file mode 100644
index 0000000000000..f55b884bc45ce
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.buffer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBufInputStream;
+import io.netty.buffer.Unpooled;
+
+/**
+ * A {@link ManagedBuffer} backed by {@link ByteBuffer}.
+ */
+public final class NioManagedBuffer extends ManagedBuffer {
+  private final ByteBuffer buf;
+
+  public NioManagedBuffer(ByteBuffer buf) {
+    this.buf = buf;
+  }
+
+  @Override
+  public long size() {
+    return buf.remaining();
+  }
+
+  @Override
+  public ByteBuffer nioByteBuffer() throws IOException {
+    return buf.duplicate();
+  }
+
+  @Override
+  public InputStream createInputStream() throws IOException {
+    return new ByteBufInputStream(Unpooled.wrappedBuffer(buf));
+  }
+
+  @Override
+  public ManagedBuffer retain() {
+    return this;
+  }
+
+  @Override
+  public ManagedBuffer release() {
+    return this;
+  }
+
+  @Override
+  public Object convertToNetty() throws IOException {
+    return Unpooled.wrappedBuffer(buf);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("buf", buf)
+      .toString();
+  }
+}
+
diff --git a/network/common/src/main/java/org/apache/spark/network/client/ChunkFetchFailureException.java b/network/common/src/main/java/org/apache/spark/network/client/ChunkFetchFailureException.java
new file mode 100644
index 0000000000000..1fbdcd6780785
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/client/ChunkFetchFailureException.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.client;
+
+/**
+ * General exception caused by a remote exception while fetching a chunk.
+ */
+public class ChunkFetchFailureException extends RuntimeException {
+  public ChunkFetchFailureException(String errorMsg, Throwable cause) {
+    super(errorMsg, cause);
+  }
+
+  public ChunkFetchFailureException(String errorMsg) {
+    super(errorMsg);
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/client/ChunkReceivedCallback.java b/network/common/src/main/java/org/apache/spark/network/client/ChunkReceivedCallback.java
new file mode 100644
index 0000000000000..519e6cb470d0d
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/client/ChunkReceivedCallback.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.client;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+
+/**
+ * Callback for the result of a single chunk result. For a single stream, the callbacks are
+ * guaranteed to be called by the same thread in the same order as the requests for chunks were
+ * made.
+ *
+ * Note that if a general stream failure occurs, all outstanding chunk requests may be failed.
+ */
+public interface ChunkReceivedCallback {
+  /**
+   * Called upon receipt of a particular chunk.
+   *
+   * The given buffer will initially have a refcount of 1, but will be release()'d as soon as this
+   * call returns. You must therefore either retain() the buffer or copy its contents before
+   * returning.
+   */
+  void onSuccess(int chunkIndex, ManagedBuffer buffer);
+
+  /**
+   * Called upon failure to fetch a particular chunk. Note that this may actually be called due
+   * to failure to fetch a prior chunk in this stream.
+   *
+   * After receiving a failure, the stream may or may not be valid. The client should not assume
+   * that the server's side of the stream has been closed.
+   */
+  void onFailure(int chunkIndex, Throwable e);
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/client/RpcResponseCallback.java b/network/common/src/main/java/org/apache/spark/network/client/RpcResponseCallback.java
new file mode 100644
index 0000000000000..6ec960d795420
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/client/RpcResponseCallback.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.client;
+
+/**
+ * Callback for the result of a single RPC. This will be invoked once with either success or
+ * failure.
+ */
+public interface RpcResponseCallback {
+  /** Successful serialized result from server. */
+  void onSuccess(byte[] response);
+
+  /** Exception either propagated from server or raised on client side. */
+  void onFailure(Throwable e);
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/client/TransportClient.java b/network/common/src/main/java/org/apache/spark/network/client/TransportClient.java
new file mode 100644
index 0000000000000..37f2e34ceb24d
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/client/TransportClient.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.client;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.UUID;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.base.Objects;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+import com.google.common.util.concurrent.SettableFuture;
+import io.netty.channel.Channel;
+import io.netty.channel.ChannelFuture;
+import io.netty.channel.ChannelFutureListener;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.protocol.ChunkFetchRequest;
+import org.apache.spark.network.protocol.RpcRequest;
+import org.apache.spark.network.protocol.StreamChunkId;
+import org.apache.spark.network.util.NettyUtils;
+
+/**
+ * Client for fetching consecutive chunks of a pre-negotiated stream. This API is intended to allow
+ * efficient transfer of a large amount of data, broken up into chunks with size ranging from
+ * hundreds of KB to a few MB.
+ *
+ * Note that while this client deals with the fetching of chunks from a stream (i.e., data plane),
+ * the actual setup of the streams is done outside the scope of the transport layer. The convenience
+ * method "sendRPC" is provided to enable control plane communication between the client and server
+ * to perform this setup.
+ *
+ * For example, a typical workflow might be:
+ * client.sendRPC(new OpenFile("/foo")) --&gt; returns StreamId = 100
+ * client.fetchChunk(streamId = 100, chunkIndex = 0, callback)
+ * client.fetchChunk(streamId = 100, chunkIndex = 1, callback)
+ * ...
+ * client.sendRPC(new CloseStream(100))
+ *
+ * Construct an instance of TransportClient using {@link TransportClientFactory}. A single
+ * TransportClient may be used for multiple streams, but any given stream must be restricted to a
+ * single client, in order to avoid out-of-order responses.
+ *
+ * NB: This class is used to make requests to the server, while {@link TransportResponseHandler} is
+ * responsible for handling responses from the server.
+ *
+ * Concurrency: thread safe and can be called from multiple threads.
+ */
+public class TransportClient implements Closeable {
+  private final Logger logger = LoggerFactory.getLogger(TransportClient.class);
+
+  private final Channel channel;
+  private final TransportResponseHandler handler;
+
+  public TransportClient(Channel channel, TransportResponseHandler handler) {
+    this.channel = Preconditions.checkNotNull(channel);
+    this.handler = Preconditions.checkNotNull(handler);
+  }
+
+  public boolean isActive() {
+    return channel.isOpen() || channel.isActive();
+  }
+
+  /**
+   * Requests a single chunk from the remote side, from the pre-negotiated streamId.
+   *
+   * Chunk indices go from 0 onwards. It is valid to request the same chunk multiple times, though
+   * some streams may not support this.
+   *
+   * Multiple fetchChunk requests may be outstanding simultaneously, and the chunks are guaranteed
+   * to be returned in the same order that they were requested, assuming only a single
+   * TransportClient is used to fetch the chunks.
+   *
+   * @param streamId Identifier that refers to a stream in the remote StreamManager. This should
+   *                 be agreed upon by client and server beforehand.
+   * @param chunkIndex 0-based index of the chunk to fetch
+   * @param callback Callback invoked upon successful receipt of chunk, or upon any failure.
+   */
+  public void fetchChunk(
+      long streamId,
+      final int chunkIndex,
+      final ChunkReceivedCallback callback) {
+    final String serverAddr = NettyUtils.getRemoteAddress(channel);
+    final long startTime = System.currentTimeMillis();
+    logger.debug("Sending fetch chunk request {} to {}", chunkIndex, serverAddr);
+
+    final StreamChunkId streamChunkId = new StreamChunkId(streamId, chunkIndex);
+    handler.addFetchRequest(streamChunkId, callback);
+
+    channel.writeAndFlush(new ChunkFetchRequest(streamChunkId)).addListener(
+      new ChannelFutureListener() {
+        @Override
+        public void operationComplete(ChannelFuture future) throws Exception {
+          if (future.isSuccess()) {
+            long timeTaken = System.currentTimeMillis() - startTime;
+            logger.trace("Sending request {} to {} took {} ms", streamChunkId, serverAddr,
+              timeTaken);
+          } else {
+            String errorMsg = String.format("Failed to send request %s to %s: %s", streamChunkId,
+              serverAddr, future.cause());
+            logger.error(errorMsg, future.cause());
+            handler.removeFetchRequest(streamChunkId);
+            channel.close();
+            try {
+              callback.onFailure(chunkIndex, new IOException(errorMsg, future.cause()));
+            } catch (Exception e) {
+              logger.error("Uncaught exception in RPC response callback handler!", e);
+            }
+          }
+        }
+      });
+  }
+
+  /**
+   * Sends an opaque message to the RpcHandler on the server-side. The callback will be invoked
+   * with the server's response or upon any failure.
+   */
+  public void sendRpc(byte[] message, final RpcResponseCallback callback) {
+    final String serverAddr = NettyUtils.getRemoteAddress(channel);
+    final long startTime = System.currentTimeMillis();
+    logger.trace("Sending RPC to {}", serverAddr);
+
+    final long requestId = Math.abs(UUID.randomUUID().getLeastSignificantBits());
+    handler.addRpcRequest(requestId, callback);
+
+    channel.writeAndFlush(new RpcRequest(requestId, message)).addListener(
+      new ChannelFutureListener() {
+        @Override
+        public void operationComplete(ChannelFuture future) throws Exception {
+          if (future.isSuccess()) {
+            long timeTaken = System.currentTimeMillis() - startTime;
+            logger.trace("Sending request {} to {} took {} ms", requestId, serverAddr, timeTaken);
+          } else {
+            String errorMsg = String.format("Failed to send RPC %s to %s: %s", requestId,
+              serverAddr, future.cause());
+            logger.error(errorMsg, future.cause());
+            handler.removeRpcRequest(requestId);
+            channel.close();
+            try {
+              callback.onFailure(new IOException(errorMsg, future.cause()));
+            } catch (Exception e) {
+              logger.error("Uncaught exception in RPC response callback handler!", e);
+            }
+          }
+        }
+      });
+  }
+
+  /**
+   * Synchronously sends an opaque message to the RpcHandler on the server-side, waiting for up to
+   * a specified timeout for a response.
+   */
+  public byte[] sendRpcSync(byte[] message, long timeoutMs) {
+    final SettableFuture<byte[]> result = SettableFuture.create();
+
+    sendRpc(message, new RpcResponseCallback() {
+      @Override
+      public void onSuccess(byte[] response) {
+        result.set(response);
+      }
+
+      @Override
+      public void onFailure(Throwable e) {
+        result.setException(e);
+      }
+    });
+
+    try {
+      return result.get(timeoutMs, TimeUnit.MILLISECONDS);
+    } catch (ExecutionException e) {
+      throw Throwables.propagate(e.getCause());
+    } catch (Exception e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  @Override
+  public void close() {
+    // close is a local operation and should finish with milliseconds; timeout just to be safe
+    channel.close().awaitUninterruptibly(10, TimeUnit.SECONDS);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("remoteAdress", channel.remoteAddress())
+      .add("isActive", isActive())
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/client/TransportClientBootstrap.java b/network/common/src/main/java/org/apache/spark/network/client/TransportClientBootstrap.java
new file mode 100644
index 0000000000000..65e8020e34121
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/client/TransportClientBootstrap.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.client;
+
+/**
+ * A bootstrap which is executed on a TransportClient before it is returned to the user.
+ * This enables an initial exchange of information (e.g., SASL authentication tokens) on a once-per-
+ * connection basis.
+ *
+ * Since connections (and TransportClients) are reused as much as possible, it is generally
+ * reasonable to perform an expensive bootstrapping operation, as they often share a lifespan with
+ * the JVM itself.
+ */
+public interface TransportClientBootstrap {
+  /** Performs the bootstrapping operation, throwing an exception on failure. */
+  public void doBootstrap(TransportClient client) throws RuntimeException;
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java b/network/common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
new file mode 100644
index 0000000000000..d26b9b4d6055f
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.client;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.SocketAddress;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicReference;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+import com.google.common.collect.Lists;
+import io.netty.bootstrap.Bootstrap;
+import io.netty.buffer.PooledByteBufAllocator;
+import io.netty.channel.Channel;
+import io.netty.channel.ChannelFuture;
+import io.netty.channel.ChannelInitializer;
+import io.netty.channel.ChannelOption;
+import io.netty.channel.EventLoopGroup;
+import io.netty.channel.socket.SocketChannel;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.TransportContext;
+import org.apache.spark.network.server.TransportChannelHandler;
+import org.apache.spark.network.util.IOMode;
+import org.apache.spark.network.util.JavaUtils;
+import org.apache.spark.network.util.NettyUtils;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * Factory for creating {@link TransportClient}s by using createClient.
+ *
+ * The factory maintains a connection pool to other hosts and should return the same
+ * TransportClient for the same remote host. It also shares a single worker thread pool for
+ * all TransportClients.
+ *
+ * TransportClients will be reused whenever possible. Prior to completing the creation of a new
+ * TransportClient, all given {@link TransportClientBootstrap}s will be run.
+ */
+public class TransportClientFactory implements Closeable {
+
+  /** A simple data structure to track the pool of clients between two peer nodes. */
+  private static class ClientPool {
+    TransportClient[] clients;
+    Object[] locks;
+
+    public ClientPool(int size) {
+      clients = new TransportClient[size];
+      locks = new Object[size];
+      for (int i = 0; i < size; i++) {
+        locks[i] = new Object();
+      }
+    }
+  }
+
+  private final Logger logger = LoggerFactory.getLogger(TransportClientFactory.class);
+
+  private final TransportContext context;
+  private final TransportConf conf;
+  private final List<TransportClientBootstrap> clientBootstraps;
+  private final ConcurrentHashMap<SocketAddress, ClientPool> connectionPool;
+
+  /** Random number generator for picking connections between peers. */
+  private final Random rand;
+  private final int numConnectionsPerPeer;
+
+  private final Class<? extends Channel> socketChannelClass;
+  private EventLoopGroup workerGroup;
+  private PooledByteBufAllocator pooledAllocator;
+
+  public TransportClientFactory(
+      TransportContext context,
+      List<TransportClientBootstrap> clientBootstraps) {
+    this.context = Preconditions.checkNotNull(context);
+    this.conf = context.getConf();
+    this.clientBootstraps = Lists.newArrayList(Preconditions.checkNotNull(clientBootstraps));
+    this.connectionPool = new ConcurrentHashMap<SocketAddress, ClientPool>();
+    this.numConnectionsPerPeer = conf.numConnectionsPerPeer();
+    this.rand = new Random();
+
+    IOMode ioMode = IOMode.valueOf(conf.ioMode());
+    this.socketChannelClass = NettyUtils.getClientChannelClass(ioMode);
+    // TODO: Make thread pool name configurable.
+    this.workerGroup = NettyUtils.createEventLoop(ioMode, conf.clientThreads(), "shuffle-client");
+    this.pooledAllocator = NettyUtils.createPooledByteBufAllocator(
+      conf.preferDirectBufs(), false /* allowCache */, conf.clientThreads());
+  }
+
+  /**
+   * Create a {@link TransportClient} connecting to the given remote host / port.
+   *
+   * We maintains an array of clients (size determined by spark.shuffle.io.numConnectionsPerPeer)
+   * and randomly picks one to use. If no client was previously created in the randomly selected
+   * spot, this function creates a new client and places it there.
+   *
+   * Prior to the creation of a new TransportClient, we will execute all
+   * {@link TransportClientBootstrap}s that are registered with this factory.
+   *
+   * This blocks until a connection is successfully established and fully bootstrapped.
+   *
+   * Concurrency: This method is safe to call from multiple threads.
+   */
+  public TransportClient createClient(String remoteHost, int remotePort) throws IOException {
+    // Get connection from the connection pool first.
+    // If it is not found or not active, create a new one.
+    final InetSocketAddress address = new InetSocketAddress(remoteHost, remotePort);
+
+    // Create the ClientPool if we don't have it yet.
+    ClientPool clientPool = connectionPool.get(address);
+    if (clientPool == null) {
+      connectionPool.putIfAbsent(address, new ClientPool(numConnectionsPerPeer));
+      clientPool = connectionPool.get(address);
+    }
+
+    int clientIndex = rand.nextInt(numConnectionsPerPeer);
+    TransportClient cachedClient = clientPool.clients[clientIndex];
+
+    if (cachedClient != null && cachedClient.isActive()) {
+      logger.trace("Returning cached connection to {}: {}", address, cachedClient);
+      return cachedClient;
+    }
+
+    // If we reach here, we don't have an existing connection open. Let's create a new one.
+    // Multiple threads might race here to create new connections. Keep only one of them active.
+    synchronized (clientPool.locks[clientIndex]) {
+      cachedClient = clientPool.clients[clientIndex];
+
+      if (cachedClient != null) {
+        if (cachedClient.isActive()) {
+          logger.trace("Returning cached connection to {}: {}", address, cachedClient);
+          return cachedClient;
+        } else {
+          logger.info("Found inactive connection to {}, creating a new one.", address);
+        }
+      }
+      clientPool.clients[clientIndex] = createClient(address);
+      return clientPool.clients[clientIndex];
+    }
+  }
+
+  /** Create a completely new {@link TransportClient} to the remote address. */
+  private TransportClient createClient(InetSocketAddress address) throws IOException {
+    logger.debug("Creating new connection to " + address);
+
+    Bootstrap bootstrap = new Bootstrap();
+    bootstrap.group(workerGroup)
+      .channel(socketChannelClass)
+      // Disable Nagle's Algorithm since we don't want packets to wait
+      .option(ChannelOption.TCP_NODELAY, true)
+      .option(ChannelOption.SO_KEEPALIVE, true)
+      .option(ChannelOption.CONNECT_TIMEOUT_MILLIS, conf.connectionTimeoutMs())
+      .option(ChannelOption.ALLOCATOR, pooledAllocator);
+
+    final AtomicReference<TransportClient> clientRef = new AtomicReference<TransportClient>();
+
+    bootstrap.handler(new ChannelInitializer<SocketChannel>() {
+      @Override
+      public void initChannel(SocketChannel ch) {
+        TransportChannelHandler clientHandler = context.initializePipeline(ch);
+        clientRef.set(clientHandler.getClient());
+      }
+    });
+
+    // Connect to the remote server
+    long preConnect = System.nanoTime();
+    ChannelFuture cf = bootstrap.connect(address);
+    if (!cf.awaitUninterruptibly(conf.connectionTimeoutMs())) {
+      throw new IOException(
+        String.format("Connecting to %s timed out (%s ms)", address, conf.connectionTimeoutMs()));
+    } else if (cf.cause() != null) {
+      throw new IOException(String.format("Failed to connect to %s", address), cf.cause());
+    }
+
+    TransportClient client = clientRef.get();
+    assert client != null : "Channel future completed successfully with null client";
+
+    // Execute any client bootstraps synchronously before marking the Client as successful.
+    long preBootstrap = System.nanoTime();
+    logger.debug("Connection to {} successful, running bootstraps...", address);
+    try {
+      for (TransportClientBootstrap clientBootstrap : clientBootstraps) {
+        clientBootstrap.doBootstrap(client);
+      }
+    } catch (Exception e) { // catch non-RuntimeExceptions too as bootstrap may be written in Scala
+      long bootstrapTimeMs = (System.nanoTime() - preBootstrap) / 1000000;
+      logger.error("Exception while bootstrapping client after " + bootstrapTimeMs + " ms", e);
+      client.close();
+      throw Throwables.propagate(e);
+    }
+    long postBootstrap = System.nanoTime();
+
+    logger.debug("Successfully created connection to {} after {} ms ({} ms spent in bootstraps)",
+      address, (postBootstrap - preConnect) / 1000000, (postBootstrap - preBootstrap) / 1000000);
+
+    return client;
+  }
+
+  /** Close all connections in the connection pool, and shutdown the worker thread pool. */
+  @Override
+  public void close() {
+    // Go through all clients and close them if they are active.
+    for (ClientPool clientPool : connectionPool.values()) {
+      for (int i = 0; i < clientPool.clients.length; i++) {
+        TransportClient client = clientPool.clients[i];
+        if (client != null) {
+          clientPool.clients[i] = null;
+          JavaUtils.closeQuietly(client);
+        }
+      }
+    }
+    connectionPool.clear();
+
+    if (workerGroup != null) {
+      workerGroup.shutdownGracefully();
+      workerGroup = null;
+    }
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java b/network/common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
new file mode 100644
index 0000000000000..2044afb0d85db
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.client;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import com.google.common.annotations.VisibleForTesting;
+import io.netty.channel.Channel;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.protocol.ChunkFetchFailure;
+import org.apache.spark.network.protocol.ChunkFetchSuccess;
+import org.apache.spark.network.protocol.ResponseMessage;
+import org.apache.spark.network.protocol.RpcFailure;
+import org.apache.spark.network.protocol.RpcResponse;
+import org.apache.spark.network.protocol.StreamChunkId;
+import org.apache.spark.network.server.MessageHandler;
+import org.apache.spark.network.util.NettyUtils;
+
+/**
+ * Handler that processes server responses, in response to requests issued from a
+ * [[TransportClient]]. It works by tracking the list of outstanding requests (and their callbacks).
+ *
+ * Concurrency: thread safe and can be called from multiple threads.
+ */
+public class TransportResponseHandler extends MessageHandler<ResponseMessage> {
+  private final Logger logger = LoggerFactory.getLogger(TransportResponseHandler.class);
+
+  private final Channel channel;
+
+  private final Map<StreamChunkId, ChunkReceivedCallback> outstandingFetches;
+
+  private final Map<Long, RpcResponseCallback> outstandingRpcs;
+
+  public TransportResponseHandler(Channel channel) {
+    this.channel = channel;
+    this.outstandingFetches = new ConcurrentHashMap<StreamChunkId, ChunkReceivedCallback>();
+    this.outstandingRpcs = new ConcurrentHashMap<Long, RpcResponseCallback>();
+  }
+
+  public void addFetchRequest(StreamChunkId streamChunkId, ChunkReceivedCallback callback) {
+    outstandingFetches.put(streamChunkId, callback);
+  }
+
+  public void removeFetchRequest(StreamChunkId streamChunkId) {
+    outstandingFetches.remove(streamChunkId);
+  }
+
+  public void addRpcRequest(long requestId, RpcResponseCallback callback) {
+    outstandingRpcs.put(requestId, callback);
+  }
+
+  public void removeRpcRequest(long requestId) {
+    outstandingRpcs.remove(requestId);
+  }
+
+  /**
+   * Fire the failure callback for all outstanding requests. This is called when we have an
+   * uncaught exception or pre-mature connection termination.
+   */
+  private void failOutstandingRequests(Throwable cause) {
+    for (Map.Entry<StreamChunkId, ChunkReceivedCallback> entry : outstandingFetches.entrySet()) {
+      entry.getValue().onFailure(entry.getKey().chunkIndex, cause);
+    }
+    for (Map.Entry<Long, RpcResponseCallback> entry : outstandingRpcs.entrySet()) {
+      entry.getValue().onFailure(cause);
+    }
+
+    // It's OK if new fetches appear, as they will fail immediately.
+    outstandingFetches.clear();
+    outstandingRpcs.clear();
+  }
+
+  @Override
+  public void channelUnregistered() {
+    if (numOutstandingRequests() > 0) {
+      String remoteAddress = NettyUtils.getRemoteAddress(channel);
+      logger.error("Still have {} requests outstanding when connection from {} is closed",
+        numOutstandingRequests(), remoteAddress);
+      failOutstandingRequests(new IOException("Connection from " + remoteAddress + " closed"));
+    }
+  }
+
+  @Override
+  public void exceptionCaught(Throwable cause) {
+    if (numOutstandingRequests() > 0) {
+      String remoteAddress = NettyUtils.getRemoteAddress(channel);
+      logger.error("Still have {} requests outstanding when connection from {} is closed",
+        numOutstandingRequests(), remoteAddress);
+      failOutstandingRequests(cause);
+    }
+  }
+
+  @Override
+  public void handle(ResponseMessage message) {
+    String remoteAddress = NettyUtils.getRemoteAddress(channel);
+    if (message instanceof ChunkFetchSuccess) {
+      ChunkFetchSuccess resp = (ChunkFetchSuccess) message;
+      ChunkReceivedCallback listener = outstandingFetches.get(resp.streamChunkId);
+      if (listener == null) {
+        logger.warn("Ignoring response for block {} from {} since it is not outstanding",
+          resp.streamChunkId, remoteAddress);
+        resp.buffer.release();
+      } else {
+        outstandingFetches.remove(resp.streamChunkId);
+        listener.onSuccess(resp.streamChunkId.chunkIndex, resp.buffer);
+        resp.buffer.release();
+      }
+    } else if (message instanceof ChunkFetchFailure) {
+      ChunkFetchFailure resp = (ChunkFetchFailure) message;
+      ChunkReceivedCallback listener = outstandingFetches.get(resp.streamChunkId);
+      if (listener == null) {
+        logger.warn("Ignoring response for block {} from {} ({}) since it is not outstanding",
+          resp.streamChunkId, remoteAddress, resp.errorString);
+      } else {
+        outstandingFetches.remove(resp.streamChunkId);
+        listener.onFailure(resp.streamChunkId.chunkIndex, new ChunkFetchFailureException(
+          "Failure while fetching " + resp.streamChunkId + ": " + resp.errorString));
+      }
+    } else if (message instanceof RpcResponse) {
+      RpcResponse resp = (RpcResponse) message;
+      RpcResponseCallback listener = outstandingRpcs.get(resp.requestId);
+      if (listener == null) {
+        logger.warn("Ignoring response for RPC {} from {} ({} bytes) since it is not outstanding",
+          resp.requestId, remoteAddress, resp.response.length);
+      } else {
+        outstandingRpcs.remove(resp.requestId);
+        listener.onSuccess(resp.response);
+      }
+    } else if (message instanceof RpcFailure) {
+      RpcFailure resp = (RpcFailure) message;
+      RpcResponseCallback listener = outstandingRpcs.get(resp.requestId);
+      if (listener == null) {
+        logger.warn("Ignoring response for RPC {} from {} ({}) since it is not outstanding",
+          resp.requestId, remoteAddress, resp.errorString);
+      } else {
+        outstandingRpcs.remove(resp.requestId);
+        listener.onFailure(new RuntimeException(resp.errorString));
+      }
+    } else {
+      throw new IllegalStateException("Unknown response type: " + message.type());
+    }
+  }
+
+  /** Returns total number of outstanding requests (fetch requests + rpcs) */
+  @VisibleForTesting
+  public int numOutstandingRequests() {
+    return outstandingFetches.size() + outstandingRpcs.size();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java b/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java
new file mode 100644
index 0000000000000..986957c1509fd
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+/**
+ * Response to {@link ChunkFetchRequest} when there is an error fetching the chunk.
+ */
+public final class ChunkFetchFailure implements ResponseMessage {
+  public final StreamChunkId streamChunkId;
+  public final String errorString;
+
+  public ChunkFetchFailure(StreamChunkId streamChunkId, String errorString) {
+    this.streamChunkId = streamChunkId;
+    this.errorString = errorString;
+  }
+
+  @Override
+  public Type type() { return Type.ChunkFetchFailure; }
+
+  @Override
+  public int encodedLength() {
+    return streamChunkId.encodedLength() + Encoders.Strings.encodedLength(errorString);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    streamChunkId.encode(buf);
+    Encoders.Strings.encode(buf, errorString);
+  }
+
+  public static ChunkFetchFailure decode(ByteBuf buf) {
+    StreamChunkId streamChunkId = StreamChunkId.decode(buf);
+    String errorString = Encoders.Strings.decode(buf);
+    return new ChunkFetchFailure(streamChunkId, errorString);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof ChunkFetchFailure) {
+      ChunkFetchFailure o = (ChunkFetchFailure) other;
+      return streamChunkId.equals(o.streamChunkId) && errorString.equals(o.errorString);
+    }
+    return false;
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("streamChunkId", streamChunkId)
+      .add("errorString", errorString)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java b/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java
new file mode 100644
index 0000000000000..980947cf13f6b
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+/**
+ * Request to fetch a sequence of a single chunk of a stream. This will correspond to a single
+ * {@link org.apache.spark.network.protocol.ResponseMessage} (either success or failure).
+ */
+public final class ChunkFetchRequest implements RequestMessage {
+  public final StreamChunkId streamChunkId;
+
+  public ChunkFetchRequest(StreamChunkId streamChunkId) {
+    this.streamChunkId = streamChunkId;
+  }
+
+  @Override
+  public Type type() { return Type.ChunkFetchRequest; }
+
+  @Override
+  public int encodedLength() {
+    return streamChunkId.encodedLength();
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    streamChunkId.encode(buf);
+  }
+
+  public static ChunkFetchRequest decode(ByteBuf buf) {
+    return new ChunkFetchRequest(StreamChunkId.decode(buf));
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof ChunkFetchRequest) {
+      ChunkFetchRequest o = (ChunkFetchRequest) other;
+      return streamChunkId.equals(o.streamChunkId);
+    }
+    return false;
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("streamChunkId", streamChunkId)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java b/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java
new file mode 100644
index 0000000000000..ff4936470c697
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NettyManagedBuffer;
+
+/**
+ * Response to {@link ChunkFetchRequest} when a chunk exists and has been successfully fetched.
+ *
+ * Note that the server-side encoding of this messages does NOT include the buffer itself, as this
+ * may be written by Netty in a more efficient manner (i.e., zero-copy write).
+ * Similarly, the client-side decoding will reuse the Netty ByteBuf as the buffer.
+ */
+public final class ChunkFetchSuccess implements ResponseMessage {
+  public final StreamChunkId streamChunkId;
+  public final ManagedBuffer buffer;
+
+  public ChunkFetchSuccess(StreamChunkId streamChunkId, ManagedBuffer buffer) {
+    this.streamChunkId = streamChunkId;
+    this.buffer = buffer;
+  }
+
+  @Override
+  public Type type() { return Type.ChunkFetchSuccess; }
+
+  @Override
+  public int encodedLength() {
+    return streamChunkId.encodedLength();
+  }
+
+  /** Encoding does NOT include 'buffer' itself. See {@link MessageEncoder}. */
+  @Override
+  public void encode(ByteBuf buf) {
+    streamChunkId.encode(buf);
+  }
+
+  /** Decoding uses the given ByteBuf as our data, and will retain() it. */
+  public static ChunkFetchSuccess decode(ByteBuf buf) {
+    StreamChunkId streamChunkId = StreamChunkId.decode(buf);
+    buf.retain();
+    NettyManagedBuffer managedBuf = new NettyManagedBuffer(buf.duplicate());
+    return new ChunkFetchSuccess(streamChunkId, managedBuf);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof ChunkFetchSuccess) {
+      ChunkFetchSuccess o = (ChunkFetchSuccess) other;
+      return streamChunkId.equals(o.streamChunkId) && buffer.equals(o.buffer);
+    }
+    return false;
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("streamChunkId", streamChunkId)
+      .add("buffer", buffer)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/Encodable.java b/network/common/src/main/java/org/apache/spark/network/protocol/Encodable.java
new file mode 100644
index 0000000000000..b4e299471b41a
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/Encodable.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import io.netty.buffer.ByteBuf;
+
+/**
+ * Interface for an object which can be encoded into a ByteBuf. Multiple Encodable objects are
+ * stored in a single, pre-allocated ByteBuf, so Encodables must also provide their length.
+ *
+ * Encodable objects should provide a static "decode(ByteBuf)" method which is invoked by
+ * {@link MessageDecoder}. During decoding, if the object uses the ByteBuf as its data (rather than
+ * just copying data from it), then you must retain() the ByteBuf.
+ *
+ * Additionally, when adding a new Encodable Message, add it to {@link Message.Type}.
+ */
+public interface Encodable {
+  /** Number of bytes of the encoded form of this object. */
+  int encodedLength();
+
+  /**
+   * Serializes this object by writing into the given ByteBuf.
+   * This method must write exactly encodedLength() bytes.
+   */
+  void encode(ByteBuf buf);
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/Encoders.java b/network/common/src/main/java/org/apache/spark/network/protocol/Encoders.java
new file mode 100644
index 0000000000000..873c694250942
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/Encoders.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+
+import com.google.common.base.Charsets;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+
+/** Provides a canonical set of Encoders for simple types. */
+public class Encoders {
+
+  /** Strings are encoded with their length followed by UTF-8 bytes. */
+  public static class Strings {
+    public static int encodedLength(String s) {
+      return 4 + s.getBytes(Charsets.UTF_8).length;
+    }
+
+    public static void encode(ByteBuf buf, String s) {
+      byte[] bytes = s.getBytes(Charsets.UTF_8);
+      buf.writeInt(bytes.length);
+      buf.writeBytes(bytes);
+    }
+
+    public static String decode(ByteBuf buf) {
+      int length = buf.readInt();
+      byte[] bytes = new byte[length];
+      buf.readBytes(bytes);
+      return new String(bytes, Charsets.UTF_8);
+    }
+  }
+
+  /** Byte arrays are encoded with their length followed by bytes. */
+  public static class ByteArrays {
+    public static int encodedLength(byte[] arr) {
+      return 4 + arr.length;
+    }
+
+    public static void encode(ByteBuf buf, byte[] arr) {
+      buf.writeInt(arr.length);
+      buf.writeBytes(arr);
+    }
+
+    public static byte[] decode(ByteBuf buf) {
+      int length = buf.readInt();
+      byte[] bytes = new byte[length];
+      buf.readBytes(bytes);
+      return bytes;
+    }
+  }
+
+  /** String arrays are encoded with the number of strings followed by per-String encoding. */
+  public static class StringArrays {
+    public static int encodedLength(String[] strings) {
+      int totalLength = 4;
+      for (String s : strings) {
+        totalLength += Strings.encodedLength(s);
+      }
+      return totalLength;
+    }
+
+    public static void encode(ByteBuf buf, String[] strings) {
+      buf.writeInt(strings.length);
+      for (String s : strings) {
+        Strings.encode(buf, s);
+      }
+    }
+
+    public static String[] decode(ByteBuf buf) {
+      int numStrings = buf.readInt();
+      String[] strings = new String[numStrings];
+      for (int i = 0; i < strings.length; i ++) {
+        strings[i] = Strings.decode(buf);
+      }
+      return strings;
+    }
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/Message.java b/network/common/src/main/java/org/apache/spark/network/protocol/Message.java
new file mode 100644
index 0000000000000..d568370125fd4
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/Message.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import io.netty.buffer.ByteBuf;
+
+/** An on-the-wire transmittable message. */
+public interface Message extends Encodable {
+  /** Used to identify this request type. */
+  Type type();
+
+  /** Preceding every serialized Message is its type, which allows us to deserialize it. */
+  public static enum Type implements Encodable {
+    ChunkFetchRequest(0), ChunkFetchSuccess(1), ChunkFetchFailure(2),
+    RpcRequest(3), RpcResponse(4), RpcFailure(5);
+
+    private final byte id;
+
+    private Type(int id) {
+      assert id < 128 : "Cannot have more than 128 message types";
+      this.id = (byte) id;
+    }
+
+    public byte id() { return id; }
+
+    @Override public int encodedLength() { return 1; }
+
+    @Override public void encode(ByteBuf buf) { buf.writeByte(id); }
+
+    public static Type decode(ByteBuf buf) {
+      byte id = buf.readByte();
+      switch (id) {
+        case 0: return ChunkFetchRequest;
+        case 1: return ChunkFetchSuccess;
+        case 2: return ChunkFetchFailure;
+        case 3: return RpcRequest;
+        case 4: return RpcResponse;
+        case 5: return RpcFailure;
+        default: throw new IllegalArgumentException("Unknown message type: " + id);
+      }
+    }
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/MessageDecoder.java b/network/common/src/main/java/org/apache/spark/network/protocol/MessageDecoder.java
new file mode 100644
index 0000000000000..81f8d7f96350f
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/MessageDecoder.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import java.util.List;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.channel.ChannelHandler;
+import io.netty.channel.ChannelHandlerContext;
+import io.netty.handler.codec.MessageToMessageDecoder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Decoder used by the client side to encode server-to-client responses.
+ * This encoder is stateless so it is safe to be shared by multiple threads.
+ */
+@ChannelHandler.Sharable
+public final class MessageDecoder extends MessageToMessageDecoder<ByteBuf> {
+
+  private final Logger logger = LoggerFactory.getLogger(MessageDecoder.class);
+  @Override
+  public void decode(ChannelHandlerContext ctx, ByteBuf in, List<Object> out) {
+    Message.Type msgType = Message.Type.decode(in);
+    Message decoded = decode(msgType, in);
+    assert decoded.type() == msgType;
+    logger.trace("Received message " + msgType + ": " + decoded);
+    out.add(decoded);
+  }
+
+  private Message decode(Message.Type msgType, ByteBuf in) {
+    switch (msgType) {
+      case ChunkFetchRequest:
+        return ChunkFetchRequest.decode(in);
+
+      case ChunkFetchSuccess:
+        return ChunkFetchSuccess.decode(in);
+
+      case ChunkFetchFailure:
+        return ChunkFetchFailure.decode(in);
+
+      case RpcRequest:
+        return RpcRequest.decode(in);
+
+      case RpcResponse:
+        return RpcResponse.decode(in);
+
+      case RpcFailure:
+        return RpcFailure.decode(in);
+
+      default:
+        throw new IllegalArgumentException("Unexpected message type: " + msgType);
+    }
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java b/network/common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java
new file mode 100644
index 0000000000000..91d1e8a538a77
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import java.util.List;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.channel.ChannelHandler;
+import io.netty.channel.ChannelHandlerContext;
+import io.netty.handler.codec.MessageToMessageEncoder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Encoder used by the server side to encode server-to-client responses.
+ * This encoder is stateless so it is safe to be shared by multiple threads.
+ */
+@ChannelHandler.Sharable
+public final class MessageEncoder extends MessageToMessageEncoder<Message> {
+
+  private final Logger logger = LoggerFactory.getLogger(MessageEncoder.class);
+
+  /***
+   * Encodes a Message by invoking its encode() method. For non-data messages, we will add one
+   * ByteBuf to 'out' containing the total frame length, the message type, and the message itself.
+   * In the case of a ChunkFetchSuccess, we will also add the ManagedBuffer corresponding to the
+   * data to 'out', in order to enable zero-copy transfer.
+   */
+  @Override
+  public void encode(ChannelHandlerContext ctx, Message in, List<Object> out) {
+    Object body = null;
+    long bodyLength = 0;
+
+    // Only ChunkFetchSuccesses have data besides the header.
+    // The body is used in order to enable zero-copy transfer for the payload.
+    if (in instanceof ChunkFetchSuccess) {
+      ChunkFetchSuccess resp = (ChunkFetchSuccess) in;
+      try {
+        bodyLength = resp.buffer.size();
+        body = resp.buffer.convertToNetty();
+      } catch (Exception e) {
+        // Re-encode this message as BlockFetchFailure.
+        logger.error(String.format("Error opening block %s for client %s",
+          resp.streamChunkId, ctx.channel().remoteAddress()), e);
+        encode(ctx, new ChunkFetchFailure(resp.streamChunkId, e.getMessage()), out);
+        return;
+      }
+    }
+
+    Message.Type msgType = in.type();
+    // All messages have the frame length, message type, and message itself.
+    int headerLength = 8 + msgType.encodedLength() + in.encodedLength();
+    long frameLength = headerLength + bodyLength;
+    ByteBuf header = ctx.alloc().heapBuffer(headerLength);
+    header.writeLong(frameLength);
+    msgType.encode(header);
+    in.encode(header);
+    assert header.writableBytes() == 0;
+
+    out.add(header);
+    if (body != null && bodyLength > 0) {
+      out.add(body);
+    }
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/RequestMessage.java b/network/common/src/main/java/org/apache/spark/network/protocol/RequestMessage.java
new file mode 100644
index 0000000000000..31b15bb17a327
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/RequestMessage.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import org.apache.spark.network.protocol.Message;
+
+/** Messages from the client to the server. */
+public interface RequestMessage extends Message {
+  // token interface
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/ResponseMessage.java b/network/common/src/main/java/org/apache/spark/network/protocol/ResponseMessage.java
new file mode 100644
index 0000000000000..6edffd11cf1e2
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/ResponseMessage.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import org.apache.spark.network.protocol.Message;
+
+/** Messages from the server to the client. */
+public interface ResponseMessage extends Message {
+  // token interface
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java b/network/common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java
new file mode 100644
index 0000000000000..ebd764eb5eb5f
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+/** Response to {@link RpcRequest} for a failed RPC. */
+public final class RpcFailure implements ResponseMessage {
+  public final long requestId;
+  public final String errorString;
+
+  public RpcFailure(long requestId, String errorString) {
+    this.requestId = requestId;
+    this.errorString = errorString;
+  }
+
+  @Override
+  public Type type() { return Type.RpcFailure; }
+
+  @Override
+  public int encodedLength() {
+    return 8 + Encoders.Strings.encodedLength(errorString);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    buf.writeLong(requestId);
+    Encoders.Strings.encode(buf, errorString);
+  }
+
+  public static RpcFailure decode(ByteBuf buf) {
+    long requestId = buf.readLong();
+    String errorString = Encoders.Strings.decode(buf);
+    return new RpcFailure(requestId, errorString);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof RpcFailure) {
+      RpcFailure o = (RpcFailure) other;
+      return requestId == o.requestId && errorString.equals(o.errorString);
+    }
+    return false;
+  }
+
+  @Override
+   public String toString() {
+    return Objects.toStringHelper(this)
+      .add("requestId", requestId)
+      .add("errorString", errorString)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java b/network/common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java
new file mode 100644
index 0000000000000..cdee0b0e0316b
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import java.util.Arrays;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+/**
+ * A generic RPC which is handled by a remote {@link org.apache.spark.network.server.RpcHandler}.
+ * This will correspond to a single
+ * {@link org.apache.spark.network.protocol.ResponseMessage} (either success or failure).
+ */
+public final class RpcRequest implements RequestMessage {
+  /** Used to link an RPC request with its response. */
+  public final long requestId;
+
+  /** Serialized message to send to remote RpcHandler. */
+  public final byte[] message;
+
+  public RpcRequest(long requestId, byte[] message) {
+    this.requestId = requestId;
+    this.message = message;
+  }
+
+  @Override
+  public Type type() { return Type.RpcRequest; }
+
+  @Override
+  public int encodedLength() {
+    return 8 + Encoders.ByteArrays.encodedLength(message);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    buf.writeLong(requestId);
+    Encoders.ByteArrays.encode(buf, message);
+  }
+
+  public static RpcRequest decode(ByteBuf buf) {
+    long requestId = buf.readLong();
+    byte[] message = Encoders.ByteArrays.decode(buf);
+    return new RpcRequest(requestId, message);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof RpcRequest) {
+      RpcRequest o = (RpcRequest) other;
+      return requestId == o.requestId && Arrays.equals(message, o.message);
+    }
+    return false;
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("requestId", requestId)
+      .add("message", message)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java b/network/common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java
new file mode 100644
index 0000000000000..0a62e09a8115c
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import java.util.Arrays;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+/** Response to {@link RpcRequest} for a successful RPC. */
+public final class RpcResponse implements ResponseMessage {
+  public final long requestId;
+  public final byte[] response;
+
+  public RpcResponse(long requestId, byte[] response) {
+    this.requestId = requestId;
+    this.response = response;
+  }
+
+  @Override
+  public Type type() { return Type.RpcResponse; }
+
+  @Override
+  public int encodedLength() { return 8 + Encoders.ByteArrays.encodedLength(response); }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    buf.writeLong(requestId);
+    Encoders.ByteArrays.encode(buf, response);
+  }
+
+  public static RpcResponse decode(ByteBuf buf) {
+    long requestId = buf.readLong();
+    byte[] response = Encoders.ByteArrays.decode(buf);
+    return new RpcResponse(requestId, response);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof RpcResponse) {
+      RpcResponse o = (RpcResponse) other;
+      return requestId == o.requestId && Arrays.equals(response, o.response);
+    }
+    return false;
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("requestId", requestId)
+      .add("response", response)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java b/network/common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java
new file mode 100644
index 0000000000000..d46a263884807
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+/**
+* Encapsulates a request for a particular chunk of a stream.
+*/
+public final class StreamChunkId implements Encodable {
+  public final long streamId;
+  public final int chunkIndex;
+
+  public StreamChunkId(long streamId, int chunkIndex) {
+    this.streamId = streamId;
+    this.chunkIndex = chunkIndex;
+  }
+
+  @Override
+  public int encodedLength() {
+    return 8 + 4;
+  }
+
+  public void encode(ByteBuf buffer) {
+    buffer.writeLong(streamId);
+    buffer.writeInt(chunkIndex);
+  }
+
+  public static StreamChunkId decode(ByteBuf buffer) {
+    assert buffer.readableBytes() >= 8 + 4;
+    long streamId = buffer.readLong();
+    int chunkIndex = buffer.readInt();
+    return new StreamChunkId(streamId, chunkIndex);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(streamId, chunkIndex);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof StreamChunkId) {
+      StreamChunkId o = (StreamChunkId) other;
+      return streamId == o.streamId && chunkIndex == o.chunkIndex;
+    }
+    return false;
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("streamId", streamId)
+      .add("chunkIndex", chunkIndex)
+      .toString();
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/MessageHandler.java b/network/common/src/main/java/org/apache/spark/network/server/MessageHandler.java
new file mode 100644
index 0000000000000..b80c15106ecbd
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/server/MessageHandler.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.server;
+
+import org.apache.spark.network.protocol.Message;
+
+/**
+ * Handles either request or response messages coming off of Netty. A MessageHandler instance
+ * is associated with a single Netty Channel (though it may have multiple clients on the same
+ * Channel.)
+ */
+public abstract class MessageHandler<T extends Message> {
+  /** Handles the receipt of a single message. */
+  public abstract void handle(T message);
+
+  /** Invoked when an exception was caught on the Channel. */
+  public abstract void exceptionCaught(Throwable cause);
+
+  /** Invoked when the channel this MessageHandler is on has been unregistered. */
+  public abstract void channelUnregistered();
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/NoOpRpcHandler.java b/network/common/src/main/java/org/apache/spark/network/server/NoOpRpcHandler.java
new file mode 100644
index 0000000000000..1502b7489e864
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/server/NoOpRpcHandler.java
@@ -0,0 +1,38 @@
+package org.apache.spark.network.server;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+
+/** An RpcHandler suitable for a client-only TransportContext, which cannot receive RPCs. */
+public class NoOpRpcHandler extends RpcHandler {
+  private final StreamManager streamManager;
+
+  public NoOpRpcHandler() {
+    streamManager = new OneForOneStreamManager();
+  }
+
+  @Override
+  public void receive(TransportClient client, byte[] message, RpcResponseCallback callback) {
+    throw new UnsupportedOperationException("Cannot handle messages");
+  }
+
+  @Override
+  public StreamManager getStreamManager() { return streamManager; }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java b/network/common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
new file mode 100644
index 0000000000000..a6d390e13f396
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.server;
+
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+
+/**
+ * StreamManager which allows registration of an Iterator&lt;ManagedBuffer&gt;, which are individually
+ * fetched as chunks by the client. Each registered buffer is one chunk.
+ */
+public class OneForOneStreamManager extends StreamManager {
+  private final Logger logger = LoggerFactory.getLogger(OneForOneStreamManager.class);
+
+  private final AtomicLong nextStreamId;
+  private final Map<Long, StreamState> streams;
+
+  /** State of a single stream. */
+  private static class StreamState {
+    final Iterator<ManagedBuffer> buffers;
+
+    // Used to keep track of the index of the buffer that the user has retrieved, just to ensure
+    // that the caller only requests each chunk one at a time, in order.
+    int curChunk = 0;
+
+    StreamState(Iterator<ManagedBuffer> buffers) {
+      this.buffers = buffers;
+    }
+  }
+
+  public OneForOneStreamManager() {
+    // For debugging purposes, start with a random stream id to help identifying different streams.
+    // This does not need to be globally unique, only unique to this class.
+    nextStreamId = new AtomicLong((long) new Random().nextInt(Integer.MAX_VALUE) * 1000);
+    streams = new ConcurrentHashMap<Long, StreamState>();
+  }
+
+  @Override
+  public ManagedBuffer getChunk(long streamId, int chunkIndex) {
+    StreamState state = streams.get(streamId);
+    if (chunkIndex != state.curChunk) {
+      throw new IllegalStateException(String.format(
+        "Received out-of-order chunk index %s (expected %s)", chunkIndex, state.curChunk));
+    } else if (!state.buffers.hasNext()) {
+      throw new IllegalStateException(String.format(
+        "Requested chunk index beyond end %s", chunkIndex));
+    }
+    state.curChunk += 1;
+    ManagedBuffer nextChunk = state.buffers.next();
+
+    if (!state.buffers.hasNext()) {
+      logger.trace("Removing stream id {}", streamId);
+      streams.remove(streamId);
+    }
+
+    return nextChunk;
+  }
+
+  @Override
+  public void connectionTerminated(long streamId) {
+    // Release all remaining buffers.
+    StreamState state = streams.remove(streamId);
+    if (state != null && state.buffers != null) {
+      while (state.buffers.hasNext()) {
+        state.buffers.next().release();
+      }
+    }
+  }
+
+  /**
+   * Registers a stream of ManagedBuffers which are served as individual chunks one at a time to
+   * callers. Each ManagedBuffer will be release()'d after it is transferred on the wire. If a
+   * client connection is closed before the iterator is fully drained, then the remaining buffers
+   * will all be release()'d.
+   */
+  public long registerStream(Iterator<ManagedBuffer> buffers) {
+    long myStreamId = nextStreamId.getAndIncrement();
+    streams.put(myStreamId, new StreamState(buffers));
+    return myStreamId;
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java b/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java
new file mode 100644
index 0000000000000..2ba92a40f8b0a
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.server;
+
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+
+/**
+ * Handler for sendRPC() messages sent by {@link org.apache.spark.network.client.TransportClient}s.
+ */
+public abstract class RpcHandler {
+  /**
+   * Receive a single RPC message. Any exception thrown while in this method will be sent back to
+   * the client in string form as a standard RPC failure.
+   *
+   * This method will not be called in parallel for a single TransportClient (i.e., channel).
+   *
+   * @param client A channel client which enables the handler to make requests back to the sender
+   *               of this RPC. This will always be the exact same object for a particular channel.
+   * @param message The serialized bytes of the RPC.
+   * @param callback Callback which should be invoked exactly once upon success or failure of the
+   *                 RPC.
+   */
+  public abstract void receive(
+      TransportClient client,
+      byte[] message,
+      RpcResponseCallback callback);
+
+  /**
+   * Returns the StreamManager which contains the state about which streams are currently being
+   * fetched by a TransportClient.
+   */
+  public abstract StreamManager getStreamManager();
+
+  /**
+   * Invoked when the connection associated with the given client has been invalidated.
+   * No further requests will come from this client.
+   */
+  public void connectionTerminated(TransportClient client) { }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/StreamManager.java b/network/common/src/main/java/org/apache/spark/network/server/StreamManager.java
new file mode 100644
index 0000000000000..5a9a14a180c10
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/server/StreamManager.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.server;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+
+/**
+ * The StreamManager is used to fetch individual chunks from a stream. This is used in
+ * {@link TransportRequestHandler} in order to respond to fetchChunk() requests. Creation of the
+ * stream is outside the scope of the transport layer, but a given stream is guaranteed to be read
+ * by only one client connection, meaning that getChunk() for a particular stream will be called
+ * serially and that once the connection associated with the stream is closed, that stream will
+ * never be used again.
+ */
+public abstract class StreamManager {
+  /**
+   * Called in response to a fetchChunk() request. The returned buffer will be passed as-is to the
+   * client. A single stream will be associated with a single TCP connection, so this method
+   * will not be called in parallel for a particular stream.
+   *
+   * Chunks may be requested in any order, and requests may be repeated, but it is not required
+   * that implementations support this behavior.
+   *
+   * The returned ManagedBuffer will be release()'d after being written to the network.
+   *
+   * @param streamId id of a stream that has been previously registered with the StreamManager.
+   * @param chunkIndex 0-indexed chunk of the stream that's requested
+   */
+  public abstract ManagedBuffer getChunk(long streamId, int chunkIndex);
+
+  /**
+   * Indicates that the TCP connection that was tied to the given stream has been terminated. After
+   * this occurs, we are guaranteed not to read from the stream again, so any state can be cleaned
+   * up.
+   */
+  public void connectionTerminated(long streamId) { }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java b/network/common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
new file mode 100644
index 0000000000000..e491367fa4528
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.server;
+
+import io.netty.channel.ChannelHandlerContext;
+import io.netty.channel.SimpleChannelInboundHandler;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportResponseHandler;
+import org.apache.spark.network.protocol.Message;
+import org.apache.spark.network.protocol.RequestMessage;
+import org.apache.spark.network.protocol.ResponseMessage;
+import org.apache.spark.network.util.NettyUtils;
+
+/**
+ * The single Transport-level Channel handler which is used for delegating requests to the
+ * {@link TransportRequestHandler} and responses to the {@link TransportResponseHandler}.
+ *
+ * All channels created in the transport layer are bidirectional. When the Client initiates a Netty
+ * Channel with a RequestMessage (which gets handled by the Server's RequestHandler), the Server
+ * will produce a ResponseMessage (handled by the Client's ResponseHandler). However, the Server
+ * also gets a handle on the same Channel, so it may then begin to send RequestMessages to the
+ * Client.
+ * This means that the Client also needs a RequestHandler and the Server needs a ResponseHandler,
+ * for the Client's responses to the Server's requests.
+ */
+public class TransportChannelHandler extends SimpleChannelInboundHandler<Message> {
+  private final Logger logger = LoggerFactory.getLogger(TransportChannelHandler.class);
+
+  private final TransportClient client;
+  private final TransportResponseHandler responseHandler;
+  private final TransportRequestHandler requestHandler;
+
+  public TransportChannelHandler(
+      TransportClient client,
+      TransportResponseHandler responseHandler,
+      TransportRequestHandler requestHandler) {
+    this.client = client;
+    this.responseHandler = responseHandler;
+    this.requestHandler = requestHandler;
+  }
+
+  public TransportClient getClient() {
+    return client;
+  }
+
+  @Override
+  public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
+    logger.warn("Exception in connection from " + NettyUtils.getRemoteAddress(ctx.channel()),
+      cause);
+    requestHandler.exceptionCaught(cause);
+    responseHandler.exceptionCaught(cause);
+    ctx.close();
+  }
+
+  @Override
+  public void channelUnregistered(ChannelHandlerContext ctx) throws Exception {
+    try {
+      requestHandler.channelUnregistered();
+    } catch (RuntimeException e) {
+      logger.error("Exception from request handler while unregistering channel", e);
+    }
+    try {
+      responseHandler.channelUnregistered();
+    } catch (RuntimeException e) {
+      logger.error("Exception from response handler while unregistering channel", e);
+    }
+    super.channelUnregistered(ctx);
+  }
+
+  @Override
+  public void channelRead0(ChannelHandlerContext ctx, Message request) {
+    if (request instanceof RequestMessage) {
+      requestHandler.handle((RequestMessage) request);
+    } else {
+      responseHandler.handle((ResponseMessage) request);
+    }
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java b/network/common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
new file mode 100644
index 0000000000000..1580180cc17e9
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.server;
+
+import java.util.Set;
+
+import com.google.common.base.Throwables;
+import com.google.common.collect.Sets;
+import io.netty.channel.Channel;
+import io.netty.channel.ChannelFuture;
+import io.netty.channel.ChannelFutureListener;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.protocol.Encodable;
+import org.apache.spark.network.protocol.RequestMessage;
+import org.apache.spark.network.protocol.ChunkFetchRequest;
+import org.apache.spark.network.protocol.RpcRequest;
+import org.apache.spark.network.protocol.ChunkFetchFailure;
+import org.apache.spark.network.protocol.ChunkFetchSuccess;
+import org.apache.spark.network.protocol.RpcFailure;
+import org.apache.spark.network.protocol.RpcResponse;
+import org.apache.spark.network.util.NettyUtils;
+
+/**
+ * A handler that processes requests from clients and writes chunk data back. Each handler is
+ * attached to a single Netty channel, and keeps track of which streams have been fetched via this
+ * channel, in order to clean them up if the channel is terminated (see #channelUnregistered).
+ *
+ * The messages should have been processed by the pipeline setup by {@link TransportServer}.
+ */
+public class TransportRequestHandler extends MessageHandler<RequestMessage> {
+  private final Logger logger = LoggerFactory.getLogger(TransportRequestHandler.class);
+
+  /** The Netty channel that this handler is associated with. */
+  private final Channel channel;
+
+  /** Client on the same channel allowing us to talk back to the requester. */
+  private final TransportClient reverseClient;
+
+  /** Handles all RPC messages. */
+  private final RpcHandler rpcHandler;
+
+  /** Returns each chunk part of a stream. */
+  private final StreamManager streamManager;
+
+  /** List of all stream ids that have been read on this handler, used for cleanup. */
+  private final Set<Long> streamIds;
+
+  public TransportRequestHandler(
+      Channel channel,
+      TransportClient reverseClient,
+      RpcHandler rpcHandler) {
+    this.channel = channel;
+    this.reverseClient = reverseClient;
+    this.rpcHandler = rpcHandler;
+    this.streamManager = rpcHandler.getStreamManager();
+    this.streamIds = Sets.newHashSet();
+  }
+
+  @Override
+  public void exceptionCaught(Throwable cause) {
+  }
+
+  @Override
+  public void channelUnregistered() {
+    // Inform the StreamManager that these streams will no longer be read from.
+    for (long streamId : streamIds) {
+      streamManager.connectionTerminated(streamId);
+    }
+    rpcHandler.connectionTerminated(reverseClient);
+  }
+
+  @Override
+  public void handle(RequestMessage request) {
+    if (request instanceof ChunkFetchRequest) {
+      processFetchRequest((ChunkFetchRequest) request);
+    } else if (request instanceof RpcRequest) {
+      processRpcRequest((RpcRequest) request);
+    } else {
+      throw new IllegalArgumentException("Unknown request type: " + request);
+    }
+  }
+
+  private void processFetchRequest(final ChunkFetchRequest req) {
+    final String client = NettyUtils.getRemoteAddress(channel);
+    streamIds.add(req.streamChunkId.streamId);
+
+    logger.trace("Received req from {} to fetch block {}", client, req.streamChunkId);
+
+    ManagedBuffer buf;
+    try {
+      buf = streamManager.getChunk(req.streamChunkId.streamId, req.streamChunkId.chunkIndex);
+    } catch (Exception e) {
+      logger.error(String.format(
+        "Error opening block %s for request from %s", req.streamChunkId, client), e);
+      respond(new ChunkFetchFailure(req.streamChunkId, Throwables.getStackTraceAsString(e)));
+      return;
+    }
+
+    respond(new ChunkFetchSuccess(req.streamChunkId, buf));
+  }
+
+  private void processRpcRequest(final RpcRequest req) {
+    try {
+      rpcHandler.receive(reverseClient, req.message, new RpcResponseCallback() {
+        @Override
+        public void onSuccess(byte[] response) {
+          respond(new RpcResponse(req.requestId, response));
+        }
+
+        @Override
+        public void onFailure(Throwable e) {
+          respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e)));
+        }
+      });
+    } catch (Exception e) {
+      logger.error("Error while invoking RpcHandler#receive() on RPC id " + req.requestId, e);
+      respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e)));
+    }
+  }
+
+  /**
+   * Responds to a single message with some Encodable object. If a failure occurs while sending,
+   * it will be logged and the channel closed.
+   */
+  private void respond(final Encodable result) {
+    final String remoteAddress = channel.remoteAddress().toString();
+    channel.writeAndFlush(result).addListener(
+      new ChannelFutureListener() {
+        @Override
+        public void operationComplete(ChannelFuture future) throws Exception {
+          if (future.isSuccess()) {
+            logger.trace(String.format("Sent result %s to client %s", result, remoteAddress));
+          } else {
+            logger.error(String.format("Error sending result %s to %s; closing connection",
+              result, remoteAddress), future.cause());
+            channel.close();
+          }
+        }
+      }
+    );
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java b/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java
new file mode 100644
index 0000000000000..625c3257d764e
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.server;
+
+import java.io.Closeable;
+import java.net.InetSocketAddress;
+import java.util.concurrent.TimeUnit;
+
+import io.netty.bootstrap.ServerBootstrap;
+import io.netty.buffer.PooledByteBufAllocator;
+import io.netty.channel.ChannelFuture;
+import io.netty.channel.ChannelInitializer;
+import io.netty.channel.ChannelOption;
+import io.netty.channel.EventLoopGroup;
+import io.netty.channel.socket.SocketChannel;
+import io.netty.util.internal.PlatformDependent;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.TransportContext;
+import org.apache.spark.network.util.IOMode;
+import org.apache.spark.network.util.NettyUtils;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * Server for the efficient, low-level streaming service.
+ */
+public class TransportServer implements Closeable {
+  private final Logger logger = LoggerFactory.getLogger(TransportServer.class);
+
+  private final TransportContext context;
+  private final TransportConf conf;
+
+  private ServerBootstrap bootstrap;
+  private ChannelFuture channelFuture;
+  private int port = -1;
+
+  /** Creates a TransportServer that binds to the given port, or to any available if 0. */
+  public TransportServer(TransportContext context, int portToBind) {
+    this.context = context;
+    this.conf = context.getConf();
+
+    init(portToBind);
+  }
+
+  public int getPort() {
+    if (port == -1) {
+      throw new IllegalStateException("Server not initialized");
+    }
+    return port;
+  }
+
+  private void init(int portToBind) {
+
+    IOMode ioMode = IOMode.valueOf(conf.ioMode());
+    EventLoopGroup bossGroup =
+      NettyUtils.createEventLoop(ioMode, conf.serverThreads(), "shuffle-server");
+    EventLoopGroup workerGroup = bossGroup;
+
+    PooledByteBufAllocator allocator = NettyUtils.createPooledByteBufAllocator(
+      conf.preferDirectBufs(), true /* allowCache */, conf.serverThreads());
+
+    bootstrap = new ServerBootstrap()
+      .group(bossGroup, workerGroup)
+      .channel(NettyUtils.getServerChannelClass(ioMode))
+      .option(ChannelOption.ALLOCATOR, allocator)
+      .childOption(ChannelOption.ALLOCATOR, allocator);
+
+    if (conf.backLog() > 0) {
+      bootstrap.option(ChannelOption.SO_BACKLOG, conf.backLog());
+    }
+
+    if (conf.receiveBuf() > 0) {
+      bootstrap.childOption(ChannelOption.SO_RCVBUF, conf.receiveBuf());
+    }
+
+    if (conf.sendBuf() > 0) {
+      bootstrap.childOption(ChannelOption.SO_SNDBUF, conf.sendBuf());
+    }
+
+    bootstrap.childHandler(new ChannelInitializer<SocketChannel>() {
+      @Override
+      protected void initChannel(SocketChannel ch) throws Exception {
+        context.initializePipeline(ch);
+      }
+    });
+
+    channelFuture = bootstrap.bind(new InetSocketAddress(portToBind));
+    channelFuture.syncUninterruptibly();
+
+    port = ((InetSocketAddress) channelFuture.channel().localAddress()).getPort();
+    logger.debug("Shuffle server started on port :" + port);
+  }
+
+  @Override
+  public void close() {
+    if (channelFuture != null) {
+      // close is a local operation and should finish within milliseconds; timeout just to be safe
+      channelFuture.channel().close().awaitUninterruptibly(10, TimeUnit.SECONDS);
+      channelFuture = null;
+    }
+    if (bootstrap != null && bootstrap.group() != null) {
+      bootstrap.group().shutdownGracefully();
+    }
+    if (bootstrap != null && bootstrap.childGroup() != null) {
+      bootstrap.childGroup().shutdownGracefully();
+    }
+    bootstrap = null;
+  }
+
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/util/ConfigProvider.java b/network/common/src/main/java/org/apache/spark/network/util/ConfigProvider.java
new file mode 100644
index 0000000000000..d944d9da1c7f8
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/util/ConfigProvider.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+import java.util.NoSuchElementException;
+
+/**
+ * Provides a mechanism for constructing a {@link TransportConf} using some sort of configuration.
+ */
+public abstract class ConfigProvider {
+  /** Obtains the value of the given config, throws NoSuchElementException if it doesn't exist. */
+  public abstract String get(String name);
+
+  public String get(String name, String defaultValue) {
+    try {
+      return get(name);
+    } catch (NoSuchElementException e) {
+      return defaultValue;
+    }
+  }
+
+  public int getInt(String name, int defaultValue) {
+    return Integer.parseInt(get(name, Integer.toString(defaultValue)));
+  }
+
+  public long getLong(String name, long defaultValue) {
+    return Long.parseLong(get(name, Long.toString(defaultValue)));
+  }
+
+  public double getDouble(String name, double defaultValue) {
+    return Double.parseDouble(get(name, Double.toString(defaultValue)));
+  }
+
+  public boolean getBoolean(String name, boolean defaultValue) {
+    return Boolean.parseBoolean(get(name, Boolean.toString(defaultValue)));
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/util/IOMode.java b/network/common/src/main/java/org/apache/spark/network/util/IOMode.java
new file mode 100644
index 0000000000000..6b208d95bbfbc
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/util/IOMode.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+/**
+ * Selector for which form of low-level IO we should use.
+ * NIO is always available, while EPOLL is only available on Linux.
+ * AUTO is used to select EPOLL if it's available, or NIO otherwise.
+ */
+public enum IOMode {
+  NIO, EPOLL
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/util/JavaUtils.java b/network/common/src/main/java/org/apache/spark/network/util/JavaUtils.java
new file mode 100644
index 0000000000000..bf8a1fc42fc6d
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/util/JavaUtils.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+import java.nio.ByteBuffer;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+
+import com.google.common.base.Preconditions;
+import com.google.common.io.Closeables;
+import com.google.common.base.Charsets;
+import io.netty.buffer.Unpooled;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * General utilities available in the network package. Many of these are sourced from Spark's
+ * own Utils, just accessible within this package.
+ */
+public class JavaUtils {
+  private static final Logger logger = LoggerFactory.getLogger(JavaUtils.class);
+
+  /** Closes the given object, ignoring IOExceptions. */
+  public static void closeQuietly(Closeable closeable) {
+    try {
+      if (closeable != null) {
+        closeable.close();
+      }
+    } catch (IOException e) {
+      logger.error("IOException should not have been thrown.", e);
+    }
+  }
+
+  /** Returns a hash consistent with Spark's Utils.nonNegativeHash(). */
+  public static int nonNegativeHash(Object obj) {
+    if (obj == null) { return 0; }
+    int hash = obj.hashCode();
+    return hash != Integer.MIN_VALUE ? Math.abs(hash) : 0;
+  }
+
+  /**
+   * Convert the given string to a byte buffer. The resulting buffer can be
+   * converted back to the same string through {@link #bytesToString(ByteBuffer)}.
+   */
+  public static ByteBuffer stringToBytes(String s) {
+    return Unpooled.wrappedBuffer(s.getBytes(Charsets.UTF_8)).nioBuffer();
+  }
+
+  /**
+   * Convert the given byte buffer to a string. The resulting string can be
+   * converted back to the same byte buffer through {@link #stringToBytes(String)}.
+   */
+  public static String bytesToString(ByteBuffer b) {
+    return Unpooled.wrappedBuffer(b).toString(Charsets.UTF_8);
+  }
+
+  /*
+   * Delete a file or directory and its contents recursively.
+   * Don't follow directories if they are symlinks.
+   * Throws an exception if deletion is unsuccessful.
+   */
+  public static void deleteRecursively(File file) throws IOException {
+    if (file == null) { return; }
+
+    if (file.isDirectory() && !isSymlink(file)) {
+      IOException savedIOException = null;
+      for (File child : listFilesSafely(file)) {
+        try {
+          deleteRecursively(child);
+        } catch (IOException e) {
+          // In case of multiple exceptions, only last one will be thrown
+          savedIOException = e;
+        }
+      }
+      if (savedIOException != null) {
+        throw savedIOException;
+      }
+    }
+
+    boolean deleted = file.delete();
+    // Delete can also fail if the file simply did not exist.
+    if (!deleted && file.exists()) {
+      throw new IOException("Failed to delete: " + file.getAbsolutePath());
+    }
+  }
+
+  private static File[] listFilesSafely(File file) throws IOException {
+    if (file.exists()) {
+      File[] files = file.listFiles();
+      if (files == null) {
+        throw new IOException("Failed to list files for dir: " + file);
+      }
+      return files;
+    } else {
+      return new File[0];
+    }
+  }
+
+  private static boolean isSymlink(File file) throws IOException {
+    Preconditions.checkNotNull(file);
+    File fileInCanonicalDir = null;
+    if (file.getParent() == null) {
+      fileInCanonicalDir = file;
+    } else {
+      fileInCanonicalDir = new File(file.getParentFile().getCanonicalFile(), file.getName());
+    }
+    return !fileInCanonicalDir.getCanonicalFile().equals(fileInCanonicalDir.getAbsoluteFile());
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java b/network/common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java
new file mode 100644
index 0000000000000..57113ed12d414
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+import java.io.FilterInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Wraps a {@link InputStream}, limiting the number of bytes which can be read.
+ *
+ * This code is from Guava's 14.0 source code, because there is no compatible way to
+ * use this functionality in both a Guava 11 environment and a Guava &gt;14 environment.
+ */
+public final class LimitedInputStream extends FilterInputStream {
+  private long left;
+  private long mark = -1;
+
+  public LimitedInputStream(InputStream in, long limit) {
+    super(in);
+    Preconditions.checkNotNull(in);
+    Preconditions.checkArgument(limit >= 0, "limit must be non-negative");
+    left = limit;
+  }
+  @Override public int available() throws IOException {
+    return (int) Math.min(in.available(), left);
+  }
+  // it's okay to mark even if mark isn't supported, as reset won't work
+  @Override public synchronized void mark(int readLimit) {
+    in.mark(readLimit);
+    mark = left;
+  }
+  @Override public int read() throws IOException {
+    if (left == 0) {
+      return -1;
+    }
+    int result = in.read();
+    if (result != -1) {
+      --left;
+    }
+    return result;
+  }
+  @Override public int read(byte[] b, int off, int len) throws IOException {
+    if (left == 0) {
+      return -1;
+    }
+    len = (int) Math.min(len, left);
+    int result = in.read(b, off, len);
+    if (result != -1) {
+      left -= result;
+    }
+    return result;
+  }
+  @Override public synchronized void reset() throws IOException {
+    if (!in.markSupported()) {
+      throw new IOException("Mark not supported");
+    }
+    if (mark == -1) {
+      throw new IOException("Mark not set");
+    }
+    in.reset();
+    left = mark;
+  }
+  @Override public long skip(long n) throws IOException {
+    n = Math.min(n, left);
+    long skipped = in.skip(n);
+    left -= skipped;
+    return skipped;
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/util/NettyUtils.java b/network/common/src/main/java/org/apache/spark/network/util/NettyUtils.java
new file mode 100644
index 0000000000000..2a4b88b64cdc9
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/util/NettyUtils.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+import java.lang.reflect.Field;
+import java.util.concurrent.ThreadFactory;
+
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import io.netty.buffer.PooledByteBufAllocator;
+import io.netty.channel.Channel;
+import io.netty.channel.EventLoopGroup;
+import io.netty.channel.ServerChannel;
+import io.netty.channel.epoll.Epoll;
+import io.netty.channel.epoll.EpollEventLoopGroup;
+import io.netty.channel.epoll.EpollServerSocketChannel;
+import io.netty.channel.epoll.EpollSocketChannel;
+import io.netty.channel.nio.NioEventLoopGroup;
+import io.netty.channel.socket.nio.NioServerSocketChannel;
+import io.netty.channel.socket.nio.NioSocketChannel;
+import io.netty.handler.codec.ByteToMessageDecoder;
+import io.netty.handler.codec.LengthFieldBasedFrameDecoder;
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Utilities for creating various Netty constructs based on whether we're using EPOLL or NIO.
+ */
+public class NettyUtils {
+  /** Creates a new ThreadFactory which prefixes each thread with the given name. */
+  public static ThreadFactory createThreadFactory(String threadPoolPrefix) {
+    return new ThreadFactoryBuilder()
+      .setDaemon(true)
+      .setNameFormat(threadPoolPrefix + "-%d")
+      .build();
+  }
+
+  /** Creates a Netty EventLoopGroup based on the IOMode. */
+  public static EventLoopGroup createEventLoop(IOMode mode, int numThreads, String threadPrefix) {
+    ThreadFactory threadFactory = createThreadFactory(threadPrefix);
+
+    switch (mode) {
+      case NIO:
+        return new NioEventLoopGroup(numThreads, threadFactory);
+      case EPOLL:
+        return new EpollEventLoopGroup(numThreads, threadFactory);
+      default:
+        throw new IllegalArgumentException("Unknown io mode: " + mode);
+    }
+  }
+
+  /** Returns the correct (client) SocketChannel class based on IOMode. */
+  public static Class<? extends Channel> getClientChannelClass(IOMode mode) {
+    switch (mode) {
+      case NIO:
+        return NioSocketChannel.class;
+      case EPOLL:
+        return EpollSocketChannel.class;
+      default:
+        throw new IllegalArgumentException("Unknown io mode: " + mode);
+    }
+  }
+
+  /** Returns the correct ServerSocketChannel class based on IOMode. */
+  public static Class<? extends ServerChannel> getServerChannelClass(IOMode mode) {
+    switch (mode) {
+      case NIO:
+        return NioServerSocketChannel.class;
+      case EPOLL:
+        return EpollServerSocketChannel.class;
+      default:
+        throw new IllegalArgumentException("Unknown io mode: " + mode);
+    }
+  }
+
+  /**
+   * Creates a LengthFieldBasedFrameDecoder where the first 8 bytes are the length of the frame.
+   * This is used before all decoders.
+   */
+  public static ByteToMessageDecoder createFrameDecoder() {
+    // maxFrameLength = 2G
+    // lengthFieldOffset = 0
+    // lengthFieldLength = 8
+    // lengthAdjustment = -8, i.e. exclude the 8 byte length itself
+    // initialBytesToStrip = 8, i.e. strip out the length field itself
+    return new LengthFieldBasedFrameDecoder(Integer.MAX_VALUE, 0, 8, -8, 8);
+  }
+
+  /** Returns the remote address on the channel or "&lt;remote address&gt;" if none exists. */
+  public static String getRemoteAddress(Channel channel) {
+    if (channel != null && channel.remoteAddress() != null) {
+      return channel.remoteAddress().toString();
+    }
+    return "<unknown remote>";
+  }
+
+  /**
+   * Create a pooled ByteBuf allocator but disables the thread-local cache. Thread-local caches
+   * are disabled for TransportClients because the ByteBufs are allocated by the event loop thread,
+   * but released by the executor thread rather than the event loop thread. Those thread-local
+   * caches actually delay the recycling of buffers, leading to larger memory usage.
+   */
+  public static PooledByteBufAllocator createPooledByteBufAllocator(
+      boolean allowDirectBufs,
+      boolean allowCache,
+      int numCores) {
+    if (numCores == 0) {
+      numCores = Runtime.getRuntime().availableProcessors();
+    }
+    return new PooledByteBufAllocator(
+      allowDirectBufs && PlatformDependent.directBufferPreferred(),
+      Math.min(getPrivateStaticField("DEFAULT_NUM_HEAP_ARENA"), numCores),
+      Math.min(getPrivateStaticField("DEFAULT_NUM_DIRECT_ARENA"), allowDirectBufs ? numCores : 0),
+      getPrivateStaticField("DEFAULT_PAGE_SIZE"),
+      getPrivateStaticField("DEFAULT_MAX_ORDER"),
+      allowCache ? getPrivateStaticField("DEFAULT_TINY_CACHE_SIZE") : 0,
+      allowCache ? getPrivateStaticField("DEFAULT_SMALL_CACHE_SIZE") : 0,
+      allowCache ? getPrivateStaticField("DEFAULT_NORMAL_CACHE_SIZE") : 0
+    );
+  }
+
+  /** Used to get defaults from Netty's private static fields. */
+  private static int getPrivateStaticField(String name) {
+    try {
+      Field f = PooledByteBufAllocator.DEFAULT.getClass().getDeclaredField(name);
+      f.setAccessible(true);
+      return f.getInt(null);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/util/SystemPropertyConfigProvider.java b/network/common/src/main/java/org/apache/spark/network/util/SystemPropertyConfigProvider.java
new file mode 100644
index 0000000000000..5f20b70678d1e
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/util/SystemPropertyConfigProvider.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+import java.util.NoSuchElementException;
+
+import org.apache.spark.network.util.ConfigProvider;
+
+/** Uses System properties to obtain config values. */
+public class SystemPropertyConfigProvider extends ConfigProvider {
+  @Override
+  public String get(String name) {
+    String value = System.getProperty(name);
+    if (value == null) {
+      throw new NoSuchElementException(name);
+    }
+    return value;
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/util/TransportConf.java b/network/common/src/main/java/org/apache/spark/network/util/TransportConf.java
new file mode 100644
index 0000000000000..6c9178688693f
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/util/TransportConf.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+/**
+ * A central location that tracks all the settings we expose to users.
+ */
+public class TransportConf {
+  private final ConfigProvider conf;
+
+  public TransportConf(ConfigProvider conf) {
+    this.conf = conf;
+  }
+
+  /** IO mode: nio or epoll */
+  public String ioMode() { return conf.get("spark.shuffle.io.mode", "NIO").toUpperCase(); }
+
+  /** If true, we will prefer allocating off-heap byte buffers within Netty. */
+  public boolean preferDirectBufs() {
+    return conf.getBoolean("spark.shuffle.io.preferDirectBufs", true);
+  }
+
+  /** Connect timeout in milliseconds. Default 120 secs. */
+  public int connectionTimeoutMs() {
+    int defaultTimeout = conf.getInt("spark.network.timeout", 120);
+    return conf.getInt("spark.shuffle.io.connectionTimeout", defaultTimeout) * 1000;
+  }
+
+  /** Number of concurrent connections between two nodes for fetching data. */
+  public int numConnectionsPerPeer() {
+    return conf.getInt("spark.shuffle.io.numConnectionsPerPeer", 1);
+  }
+
+  /** Requested maximum length of the queue of incoming connections. Default -1 for no backlog. */
+  public int backLog() { return conf.getInt("spark.shuffle.io.backLog", -1); }
+
+  /** Number of threads used in the server thread pool. Default to 0, which is 2x#cores. */
+  public int serverThreads() { return conf.getInt("spark.shuffle.io.serverThreads", 0); }
+
+  /** Number of threads used in the client thread pool. Default to 0, which is 2x#cores. */
+  public int clientThreads() { return conf.getInt("spark.shuffle.io.clientThreads", 0); }
+
+  /**
+   * Receive buffer size (SO_RCVBUF).
+   * Note: the optimal size for receive buffer and send buffer should be
+   *  latency * network_bandwidth.
+   * Assuming latency = 1ms, network_bandwidth = 10Gbps
+   *  buffer size should be ~ 1.25MB
+   */
+  public int receiveBuf() { return conf.getInt("spark.shuffle.io.receiveBuffer", -1); }
+
+  /** Send buffer size (SO_SNDBUF). */
+  public int sendBuf() { return conf.getInt("spark.shuffle.io.sendBuffer", -1); }
+
+  /** Timeout for a single round trip of SASL token exchange, in milliseconds. */
+  public int saslRTTimeoutMs() { return conf.getInt("spark.shuffle.sasl.timeout", 30) * 1000; }
+
+  /**
+   * Max number of times we will try IO exceptions (such as connection timeouts) per request.
+   * If set to 0, we will not do any retries.
+   */
+  public int maxIORetries() { return conf.getInt("spark.shuffle.io.maxRetries", 3); }
+
+  /**
+   * Time (in milliseconds) that we will wait in order to perform a retry after an IOException.
+   * Only relevant if maxIORetries &gt; 0.
+   */
+  public int ioRetryWaitTimeMs() { return conf.getInt("spark.shuffle.io.retryWait", 5) * 1000; }
+
+  /**
+   * Minimum size of a block that we should start using memory map rather than reading in through
+   * normal IO operations. This prevents Spark from memory mapping very small blocks. In general,
+   * memory mapping has high overhead for blocks close to or below the page size of the OS.
+   */
+  public int memoryMapBytes() {
+    return conf.getInt("spark.storage.memoryMapThreshold", 2 * 1024 * 1024);
+  }
+
+  /**
+   * Whether to initialize shuffle FileDescriptor lazily or not. If true, file descriptors are
+   * created only when data is going to be transferred. This can reduce the number of open files.
+   */
+  public boolean lazyFileDescriptor() {
+    return conf.getBoolean("spark.shuffle.io.lazyFD", true);
+  }
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java b/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
new file mode 100644
index 0000000000000..dfb7740344ed0
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import java.io.File;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NioManagedBuffer;
+import org.apache.spark.network.client.ChunkReceivedCallback;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportClientFactory;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.server.StreamManager;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
+public class ChunkFetchIntegrationSuite {
+  static final long STREAM_ID = 1;
+  static final int BUFFER_CHUNK_INDEX = 0;
+  static final int FILE_CHUNK_INDEX = 1;
+
+  static TransportServer server;
+  static TransportClientFactory clientFactory;
+  static StreamManager streamManager;
+  static File testFile;
+
+  static ManagedBuffer bufferChunk;
+  static ManagedBuffer fileChunk;
+
+  private TransportConf transportConf;
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    int bufSize = 100000;
+    final ByteBuffer buf = ByteBuffer.allocate(bufSize);
+    for (int i = 0; i < bufSize; i ++) {
+      buf.put((byte) i);
+    }
+    buf.flip();
+    bufferChunk = new NioManagedBuffer(buf);
+
+    testFile = File.createTempFile("shuffle-test-file", "txt");
+    testFile.deleteOnExit();
+    RandomAccessFile fp = new RandomAccessFile(testFile, "rw");
+    byte[] fileContent = new byte[1024];
+    new Random().nextBytes(fileContent);
+    fp.write(fileContent);
+    fp.close();
+
+    final TransportConf conf = new TransportConf(new SystemPropertyConfigProvider());
+    fileChunk = new FileSegmentManagedBuffer(conf, testFile, 10, testFile.length() - 25);
+
+    streamManager = new StreamManager() {
+      @Override
+      public ManagedBuffer getChunk(long streamId, int chunkIndex) {
+        assertEquals(STREAM_ID, streamId);
+        if (chunkIndex == BUFFER_CHUNK_INDEX) {
+          return new NioManagedBuffer(buf);
+        } else if (chunkIndex == FILE_CHUNK_INDEX) {
+          return new FileSegmentManagedBuffer(conf, testFile, 10, testFile.length() - 25);
+        } else {
+          throw new IllegalArgumentException("Invalid chunk index: " + chunkIndex);
+        }
+      }
+    };
+    RpcHandler handler = new RpcHandler() {
+      @Override
+      public void receive(TransportClient client, byte[] message, RpcResponseCallback callback) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public StreamManager getStreamManager() {
+        return streamManager;
+      }
+    };
+    TransportContext context = new TransportContext(conf, handler);
+    server = context.createServer();
+    clientFactory = context.createClientFactory();
+  }
+
+  @AfterClass
+  public static void tearDown() {
+    server.close();
+    clientFactory.close();
+    testFile.delete();
+  }
+
+  class FetchResult {
+    public Set<Integer> successChunks;
+    public Set<Integer> failedChunks;
+    public List<ManagedBuffer> buffers;
+
+    public void releaseBuffers() {
+      for (ManagedBuffer buffer : buffers) {
+        buffer.release();
+      }
+    }
+  }
+
+  private FetchResult fetchChunks(List<Integer> chunkIndices) throws Exception {
+    TransportClient client = clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
+    final Semaphore sem = new Semaphore(0);
+
+    final FetchResult res = new FetchResult();
+    res.successChunks = Collections.synchronizedSet(new HashSet<Integer>());
+    res.failedChunks = Collections.synchronizedSet(new HashSet<Integer>());
+    res.buffers = Collections.synchronizedList(new LinkedList<ManagedBuffer>());
+
+    ChunkReceivedCallback callback = new ChunkReceivedCallback() {
+      @Override
+      public void onSuccess(int chunkIndex, ManagedBuffer buffer) {
+        buffer.retain();
+        res.successChunks.add(chunkIndex);
+        res.buffers.add(buffer);
+        sem.release();
+      }
+
+      @Override
+      public void onFailure(int chunkIndex, Throwable e) {
+        res.failedChunks.add(chunkIndex);
+        sem.release();
+      }
+    };
+
+    for (int chunkIndex : chunkIndices) {
+      client.fetchChunk(STREAM_ID, chunkIndex, callback);
+    }
+    if (!sem.tryAcquire(chunkIndices.size(), 5, TimeUnit.SECONDS)) {
+      fail("Timeout getting response from the server");
+    }
+    client.close();
+    return res;
+  }
+
+  @Test
+  public void fetchBufferChunk() throws Exception {
+    FetchResult res = fetchChunks(Lists.newArrayList(BUFFER_CHUNK_INDEX));
+    assertEquals(res.successChunks, Sets.newHashSet(BUFFER_CHUNK_INDEX));
+    assertTrue(res.failedChunks.isEmpty());
+    assertBufferListsEqual(res.buffers, Lists.newArrayList(bufferChunk));
+    res.releaseBuffers();
+  }
+
+  @Test
+  public void fetchFileChunk() throws Exception {
+    FetchResult res = fetchChunks(Lists.newArrayList(FILE_CHUNK_INDEX));
+    assertEquals(res.successChunks, Sets.newHashSet(FILE_CHUNK_INDEX));
+    assertTrue(res.failedChunks.isEmpty());
+    assertBufferListsEqual(res.buffers, Lists.newArrayList(fileChunk));
+    res.releaseBuffers();
+  }
+
+  @Test
+  public void fetchNonExistentChunk() throws Exception {
+    FetchResult res = fetchChunks(Lists.newArrayList(12345));
+    assertTrue(res.successChunks.isEmpty());
+    assertEquals(res.failedChunks, Sets.newHashSet(12345));
+    assertTrue(res.buffers.isEmpty());
+  }
+
+  @Test
+  public void fetchBothChunks() throws Exception {
+    FetchResult res = fetchChunks(Lists.newArrayList(BUFFER_CHUNK_INDEX, FILE_CHUNK_INDEX));
+    assertEquals(res.successChunks, Sets.newHashSet(BUFFER_CHUNK_INDEX, FILE_CHUNK_INDEX));
+    assertTrue(res.failedChunks.isEmpty());
+    assertBufferListsEqual(res.buffers, Lists.newArrayList(bufferChunk, fileChunk));
+    res.releaseBuffers();
+  }
+
+  @Test
+  public void fetchChunkAndNonExistent() throws Exception {
+    FetchResult res = fetchChunks(Lists.newArrayList(BUFFER_CHUNK_INDEX, 12345));
+    assertEquals(res.successChunks, Sets.newHashSet(BUFFER_CHUNK_INDEX));
+    assertEquals(res.failedChunks, Sets.newHashSet(12345));
+    assertBufferListsEqual(res.buffers, Lists.newArrayList(bufferChunk));
+    res.releaseBuffers();
+  }
+
+  private void assertBufferListsEqual(List<ManagedBuffer> list0, List<ManagedBuffer> list1)
+      throws Exception {
+    assertEquals(list0.size(), list1.size());
+    for (int i = 0; i < list0.size(); i ++) {
+      assertBuffersEqual(list0.get(i), list1.get(i));
+    }
+  }
+
+  private void assertBuffersEqual(ManagedBuffer buffer0, ManagedBuffer buffer1) throws Exception {
+    ByteBuffer nio0 = buffer0.nioByteBuffer();
+    ByteBuffer nio1 = buffer1.nioByteBuffer();
+
+    int len = nio0.remaining();
+    assertEquals(nio0.remaining(), nio1.remaining());
+    for (int i = 0; i < len; i ++) {
+      assertEquals(nio0.get(), nio1.get());
+    }
+  }
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/ProtocolSuite.java b/network/common/src/test/java/org/apache/spark/network/ProtocolSuite.java
new file mode 100644
index 0000000000000..43dc0cf8c7194
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/ProtocolSuite.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import io.netty.channel.embedded.EmbeddedChannel;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.spark.network.protocol.Message;
+import org.apache.spark.network.protocol.StreamChunkId;
+import org.apache.spark.network.protocol.ChunkFetchRequest;
+import org.apache.spark.network.protocol.ChunkFetchFailure;
+import org.apache.spark.network.protocol.ChunkFetchSuccess;
+import org.apache.spark.network.protocol.RpcRequest;
+import org.apache.spark.network.protocol.RpcFailure;
+import org.apache.spark.network.protocol.RpcResponse;
+import org.apache.spark.network.protocol.MessageDecoder;
+import org.apache.spark.network.protocol.MessageEncoder;
+import org.apache.spark.network.util.NettyUtils;
+
+public class ProtocolSuite {
+  private void testServerToClient(Message msg) {
+    EmbeddedChannel serverChannel = new EmbeddedChannel(new MessageEncoder());
+    serverChannel.writeOutbound(msg);
+
+    EmbeddedChannel clientChannel = new EmbeddedChannel(
+        NettyUtils.createFrameDecoder(), new MessageDecoder());
+
+    while (!serverChannel.outboundMessages().isEmpty()) {
+      clientChannel.writeInbound(serverChannel.readOutbound());
+    }
+
+    assertEquals(1, clientChannel.inboundMessages().size());
+    assertEquals(msg, clientChannel.readInbound());
+  }
+
+  private void testClientToServer(Message msg) {
+    EmbeddedChannel clientChannel = new EmbeddedChannel(new MessageEncoder());
+    clientChannel.writeOutbound(msg);
+
+    EmbeddedChannel serverChannel = new EmbeddedChannel(
+        NettyUtils.createFrameDecoder(), new MessageDecoder());
+
+    while (!clientChannel.outboundMessages().isEmpty()) {
+      serverChannel.writeInbound(clientChannel.readOutbound());
+    }
+
+    assertEquals(1, serverChannel.inboundMessages().size());
+    assertEquals(msg, serverChannel.readInbound());
+  }
+
+  @Test
+  public void requests() {
+    testClientToServer(new ChunkFetchRequest(new StreamChunkId(1, 2)));
+    testClientToServer(new RpcRequest(12345, new byte[0]));
+    testClientToServer(new RpcRequest(12345, new byte[100]));
+  }
+
+  @Test
+  public void responses() {
+    testServerToClient(new ChunkFetchSuccess(new StreamChunkId(1, 2), new TestManagedBuffer(10)));
+    testServerToClient(new ChunkFetchSuccess(new StreamChunkId(1, 2), new TestManagedBuffer(0)));
+    testServerToClient(new ChunkFetchFailure(new StreamChunkId(1, 2), "this is an error"));
+    testServerToClient(new ChunkFetchFailure(new StreamChunkId(1, 2), ""));
+    testServerToClient(new RpcResponse(12345, new byte[0]));
+    testServerToClient(new RpcResponse(12345, new byte[1000]));
+    testServerToClient(new RpcFailure(0, "this is an error"));
+    testServerToClient(new RpcFailure(0, ""));
+  }
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java b/network/common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
new file mode 100644
index 0000000000000..64b457b4b3f01
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.Sets;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportClientFactory;
+import org.apache.spark.network.server.OneForOneStreamManager;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.StreamManager;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
+public class RpcIntegrationSuite {
+  static TransportServer server;
+  static TransportClientFactory clientFactory;
+  static RpcHandler rpcHandler;
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    TransportConf conf = new TransportConf(new SystemPropertyConfigProvider());
+    rpcHandler = new RpcHandler() {
+      @Override
+      public void receive(TransportClient client, byte[] message, RpcResponseCallback callback) {
+        String msg = new String(message, Charsets.UTF_8);
+        String[] parts = msg.split("/");
+        if (parts[0].equals("hello")) {
+          callback.onSuccess(("Hello, " + parts[1] + "!").getBytes(Charsets.UTF_8));
+        } else if (parts[0].equals("return error")) {
+          callback.onFailure(new RuntimeException("Returned: " + parts[1]));
+        } else if (parts[0].equals("throw error")) {
+          throw new RuntimeException("Thrown: " + parts[1]);
+        }
+      }
+
+      @Override
+      public StreamManager getStreamManager() { return new OneForOneStreamManager(); }
+    };
+    TransportContext context = new TransportContext(conf, rpcHandler);
+    server = context.createServer();
+    clientFactory = context.createClientFactory();
+  }
+
+  @AfterClass
+  public static void tearDown() {
+    server.close();
+    clientFactory.close();
+  }
+
+  class RpcResult {
+    public Set<String> successMessages;
+    public Set<String> errorMessages;
+  }
+
+  private RpcResult sendRPC(String ... commands) throws Exception {
+    TransportClient client = clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
+    final Semaphore sem = new Semaphore(0);
+
+    final RpcResult res = new RpcResult();
+    res.successMessages = Collections.synchronizedSet(new HashSet<String>());
+    res.errorMessages = Collections.synchronizedSet(new HashSet<String>());
+
+    RpcResponseCallback callback = new RpcResponseCallback() {
+      @Override
+      public void onSuccess(byte[] message) {
+        res.successMessages.add(new String(message, Charsets.UTF_8));
+        sem.release();
+      }
+
+      @Override
+      public void onFailure(Throwable e) {
+        res.errorMessages.add(e.getMessage());
+        sem.release();
+      }
+    };
+
+    for (String command : commands) {
+      client.sendRpc(command.getBytes(Charsets.UTF_8), callback);
+    }
+
+    if (!sem.tryAcquire(commands.length, 5, TimeUnit.SECONDS)) {
+      fail("Timeout getting response from the server");
+    }
+    client.close();
+    return res;
+  }
+
+  @Test
+  public void singleRPC() throws Exception {
+    RpcResult res = sendRPC("hello/Aaron");
+    assertEquals(res.successMessages, Sets.newHashSet("Hello, Aaron!"));
+    assertTrue(res.errorMessages.isEmpty());
+  }
+
+  @Test
+  public void doubleRPC() throws Exception {
+    RpcResult res = sendRPC("hello/Aaron", "hello/Reynold");
+    assertEquals(res.successMessages, Sets.newHashSet("Hello, Aaron!", "Hello, Reynold!"));
+    assertTrue(res.errorMessages.isEmpty());
+  }
+
+  @Test
+  public void returnErrorRPC() throws Exception {
+    RpcResult res = sendRPC("return error/OK");
+    assertTrue(res.successMessages.isEmpty());
+    assertErrorsContain(res.errorMessages, Sets.newHashSet("Returned: OK"));
+  }
+
+  @Test
+  public void throwErrorRPC() throws Exception {
+    RpcResult res = sendRPC("throw error/uh-oh");
+    assertTrue(res.successMessages.isEmpty());
+    assertErrorsContain(res.errorMessages, Sets.newHashSet("Thrown: uh-oh"));
+  }
+
+  @Test
+  public void doubleTrouble() throws Exception {
+    RpcResult res = sendRPC("return error/OK", "throw error/uh-oh");
+    assertTrue(res.successMessages.isEmpty());
+    assertErrorsContain(res.errorMessages, Sets.newHashSet("Returned: OK", "Thrown: uh-oh"));
+  }
+
+  @Test
+  public void sendSuccessAndFailure() throws Exception {
+    RpcResult res = sendRPC("hello/Bob", "throw error/the", "hello/Builder", "return error/!");
+    assertEquals(res.successMessages, Sets.newHashSet("Hello, Bob!", "Hello, Builder!"));
+    assertErrorsContain(res.errorMessages, Sets.newHashSet("Thrown: the", "Returned: !"));
+  }
+
+  private void assertErrorsContain(Set<String> errors, Set<String> contains) {
+    assertEquals(contains.size(), errors.size());
+
+    Set<String> remainingErrors = Sets.newHashSet(errors);
+    for (String contain : contains) {
+      Iterator<String> it = remainingErrors.iterator();
+      boolean foundMatch = false;
+      while (it.hasNext()) {
+        if (it.next().contains(contain)) {
+          it.remove();
+          foundMatch = true;
+          break;
+        }
+      }
+      assertTrue("Could not find error containing " + contain + "; errors: " + errors, foundMatch);
+    }
+
+    assertTrue(remainingErrors.isEmpty());
+  }
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/TestManagedBuffer.java b/network/common/src/test/java/org/apache/spark/network/TestManagedBuffer.java
new file mode 100644
index 0000000000000..38113a918f795
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/TestManagedBuffer.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import com.google.common.base.Preconditions;
+import io.netty.buffer.Unpooled;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NettyManagedBuffer;
+
+/**
+ * A ManagedBuffer implementation that contains 0, 1, 2, 3, ..., (len-1).
+ *
+ * Used for testing.
+ */
+public class TestManagedBuffer extends ManagedBuffer {
+
+  private final int len;
+  private NettyManagedBuffer underlying;
+
+  public TestManagedBuffer(int len) {
+    Preconditions.checkArgument(len <= Byte.MAX_VALUE);
+    this.len = len;
+    byte[] byteArray = new byte[len];
+    for (int i = 0; i < len; i ++) {
+      byteArray[i] = (byte) i;
+    }
+    this.underlying = new NettyManagedBuffer(Unpooled.wrappedBuffer(byteArray));
+  }
+
+
+  @Override
+  public long size() {
+    return underlying.size();
+  }
+
+  @Override
+  public ByteBuffer nioByteBuffer() throws IOException {
+    return underlying.nioByteBuffer();
+  }
+
+  @Override
+  public InputStream createInputStream() throws IOException {
+    return underlying.createInputStream();
+  }
+
+  @Override
+  public ManagedBuffer retain() {
+    underlying.retain();
+    return this;
+  }
+
+  @Override
+  public ManagedBuffer release() {
+    underlying.release();
+    return this;
+  }
+
+  @Override
+  public Object convertToNetty() throws IOException {
+    return underlying.convertToNetty();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof ManagedBuffer) {
+      try {
+        ByteBuffer nioBuf = ((ManagedBuffer) other).nioByteBuffer();
+        if (nioBuf.remaining() != len) {
+          return false;
+        } else {
+          for (int i = 0; i < len; i ++) {
+            if (nioBuf.get() != i) {
+              return false;
+            }
+          }
+          return true;
+        }
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+    return false;
+  }
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/TestUtils.java b/network/common/src/test/java/org/apache/spark/network/TestUtils.java
new file mode 100644
index 0000000000000..56a2b805f154c
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/TestUtils.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import java.net.InetAddress;
+
+public class TestUtils {
+  public static String getLocalHost() {
+    try {
+      return InetAddress.getLocalHost().getHostAddress();
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java b/network/common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java
new file mode 100644
index 0000000000000..416dc1b969fa4
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.NoSuchElementException;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportClientFactory;
+import org.apache.spark.network.server.NoOpRpcHandler;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.util.ConfigProvider;
+import org.apache.spark.network.util.JavaUtils;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
+public class TransportClientFactorySuite {
+  private TransportConf conf;
+  private TransportContext context;
+  private TransportServer server1;
+  private TransportServer server2;
+
+  @Before
+  public void setUp() {
+    conf = new TransportConf(new SystemPropertyConfigProvider());
+    RpcHandler rpcHandler = new NoOpRpcHandler();
+    context = new TransportContext(conf, rpcHandler);
+    server1 = context.createServer();
+    server2 = context.createServer();
+  }
+
+  @After
+  public void tearDown() {
+    JavaUtils.closeQuietly(server1);
+    JavaUtils.closeQuietly(server2);
+  }
+
+  /**
+   * Request a bunch of clients to a single server to test
+   * we create up to maxConnections of clients.
+   *
+   * If concurrent is true, create multiple threads to create clients in parallel.
+   */
+  private void testClientReuse(final int maxConnections, boolean concurrent)
+    throws IOException, InterruptedException {
+    TransportConf conf = new TransportConf(new ConfigProvider() {
+      @Override
+      public String get(String name) {
+        if (name.equals("spark.shuffle.io.numConnectionsPerPeer")) {
+          return Integer.toString(maxConnections);
+        } else {
+          throw new NoSuchElementException();
+        }
+      }
+    });
+
+    RpcHandler rpcHandler = new NoOpRpcHandler();
+    TransportContext context = new TransportContext(conf, rpcHandler);
+    final TransportClientFactory factory = context.createClientFactory();
+    final Set<TransportClient> clients = Collections.synchronizedSet(
+      new HashSet<TransportClient>());
+
+    final AtomicInteger failed = new AtomicInteger();
+    Thread[] attempts = new Thread[maxConnections * 10];
+
+    // Launch a bunch of threads to create new clients.
+    for (int i = 0; i < attempts.length; i++) {
+      attempts[i] = new Thread() {
+        @Override
+        public void run() {
+          try {
+            TransportClient client =
+              factory.createClient(TestUtils.getLocalHost(), server1.getPort());
+            assert (client.isActive());
+            clients.add(client);
+          } catch (IOException e) {
+            failed.incrementAndGet();
+          }
+        }
+      };
+
+      if (concurrent) {
+        attempts[i].start();
+      } else {
+        attempts[i].run();
+      }
+    }
+
+    // Wait until all the threads complete.
+    for (int i = 0; i < attempts.length; i++) {
+      attempts[i].join();
+    }
+
+    assert(failed.get() == 0);
+    assert(clients.size() == maxConnections);
+
+    for (TransportClient client : clients) {
+      client.close();
+    }
+  }
+
+  @Test
+  public void reuseClientsUpToConfigVariable() throws Exception {
+    testClientReuse(1, false);
+    testClientReuse(2, false);
+    testClientReuse(3, false);
+    testClientReuse(4, false);
+  }
+
+  @Test
+  public void reuseClientsUpToConfigVariableConcurrent() throws Exception {
+    testClientReuse(1, true);
+    testClientReuse(2, true);
+    testClientReuse(3, true);
+    testClientReuse(4, true);
+  }
+
+  @Test
+  public void returnDifferentClientsForDifferentServers() throws IOException {
+    TransportClientFactory factory = context.createClientFactory();
+    TransportClient c1 = factory.createClient(TestUtils.getLocalHost(), server1.getPort());
+    TransportClient c2 = factory.createClient(TestUtils.getLocalHost(), server2.getPort());
+    assertTrue(c1.isActive());
+    assertTrue(c2.isActive());
+    assertTrue(c1 != c2);
+    factory.close();
+  }
+
+  @Test
+  public void neverReturnInactiveClients() throws IOException, InterruptedException {
+    TransportClientFactory factory = context.createClientFactory();
+    TransportClient c1 = factory.createClient(TestUtils.getLocalHost(), server1.getPort());
+    c1.close();
+
+    long start = System.currentTimeMillis();
+    while (c1.isActive() && (System.currentTimeMillis() - start) < 3000) {
+      Thread.sleep(10);
+    }
+    assertFalse(c1.isActive());
+
+    TransportClient c2 = factory.createClient(TestUtils.getLocalHost(), server1.getPort());
+    assertFalse(c1 == c2);
+    assertTrue(c2.isActive());
+    factory.close();
+  }
+
+  @Test
+  public void closeBlockClientsWithFactory() throws IOException {
+    TransportClientFactory factory = context.createClientFactory();
+    TransportClient c1 = factory.createClient(TestUtils.getLocalHost(), server1.getPort());
+    TransportClient c2 = factory.createClient(TestUtils.getLocalHost(), server2.getPort());
+    assertTrue(c1.isActive());
+    assertTrue(c2.isActive());
+    factory.close();
+    assertFalse(c1.isActive());
+    assertFalse(c2.isActive());
+  }
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/TransportResponseHandlerSuite.java b/network/common/src/test/java/org/apache/spark/network/TransportResponseHandlerSuite.java
new file mode 100644
index 0000000000000..17a03ebe88a93
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/TransportResponseHandlerSuite.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network;
+
+import io.netty.channel.local.LocalChannel;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.*;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.client.ChunkReceivedCallback;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportResponseHandler;
+import org.apache.spark.network.protocol.ChunkFetchFailure;
+import org.apache.spark.network.protocol.ChunkFetchSuccess;
+import org.apache.spark.network.protocol.RpcFailure;
+import org.apache.spark.network.protocol.RpcResponse;
+import org.apache.spark.network.protocol.StreamChunkId;
+
+public class TransportResponseHandlerSuite {
+  @Test
+  public void handleSuccessfulFetch() {
+    StreamChunkId streamChunkId = new StreamChunkId(1, 0);
+
+    TransportResponseHandler handler = new TransportResponseHandler(new LocalChannel());
+    ChunkReceivedCallback callback = mock(ChunkReceivedCallback.class);
+    handler.addFetchRequest(streamChunkId, callback);
+    assertEquals(1, handler.numOutstandingRequests());
+
+    handler.handle(new ChunkFetchSuccess(streamChunkId, new TestManagedBuffer(123)));
+    verify(callback, times(1)).onSuccess(eq(0), (ManagedBuffer) any());
+    assertEquals(0, handler.numOutstandingRequests());
+  }
+
+  @Test
+  public void handleFailedFetch() {
+    StreamChunkId streamChunkId = new StreamChunkId(1, 0);
+    TransportResponseHandler handler = new TransportResponseHandler(new LocalChannel());
+    ChunkReceivedCallback callback = mock(ChunkReceivedCallback.class);
+    handler.addFetchRequest(streamChunkId, callback);
+    assertEquals(1, handler.numOutstandingRequests());
+
+    handler.handle(new ChunkFetchFailure(streamChunkId, "some error msg"));
+    verify(callback, times(1)).onFailure(eq(0), (Throwable) any());
+    assertEquals(0, handler.numOutstandingRequests());
+  }
+
+  @Test
+  public void clearAllOutstandingRequests() {
+    TransportResponseHandler handler = new TransportResponseHandler(new LocalChannel());
+    ChunkReceivedCallback callback = mock(ChunkReceivedCallback.class);
+    handler.addFetchRequest(new StreamChunkId(1, 0), callback);
+    handler.addFetchRequest(new StreamChunkId(1, 1), callback);
+    handler.addFetchRequest(new StreamChunkId(1, 2), callback);
+    assertEquals(3, handler.numOutstandingRequests());
+
+    handler.handle(new ChunkFetchSuccess(new StreamChunkId(1, 0), new TestManagedBuffer(12)));
+    handler.exceptionCaught(new Exception("duh duh duhhhh"));
+
+    // should fail both b2 and b3
+    verify(callback, times(1)).onSuccess(eq(0), (ManagedBuffer) any());
+    verify(callback, times(1)).onFailure(eq(1), (Throwable) any());
+    verify(callback, times(1)).onFailure(eq(2), (Throwable) any());
+    assertEquals(0, handler.numOutstandingRequests());
+  }
+
+  @Test
+  public void handleSuccessfulRPC() {
+    TransportResponseHandler handler = new TransportResponseHandler(new LocalChannel());
+    RpcResponseCallback callback = mock(RpcResponseCallback.class);
+    handler.addRpcRequest(12345, callback);
+    assertEquals(1, handler.numOutstandingRequests());
+
+    handler.handle(new RpcResponse(54321, new byte[7])); // should be ignored
+    assertEquals(1, handler.numOutstandingRequests());
+
+    byte[] arr = new byte[10];
+    handler.handle(new RpcResponse(12345, arr));
+    verify(callback, times(1)).onSuccess(eq(arr));
+    assertEquals(0, handler.numOutstandingRequests());
+  }
+
+  @Test
+  public void handleFailedRPC() {
+    TransportResponseHandler handler = new TransportResponseHandler(new LocalChannel());
+    RpcResponseCallback callback = mock(RpcResponseCallback.class);
+    handler.addRpcRequest(12345, callback);
+    assertEquals(1, handler.numOutstandingRequests());
+
+    handler.handle(new RpcFailure(54321, "uh-oh!")); // should be ignored
+    assertEquals(1, handler.numOutstandingRequests());
+
+    handler.handle(new RpcFailure(12345, "oh no"));
+    verify(callback, times(1)).onFailure((Throwable) any());
+    assertEquals(0, handler.numOutstandingRequests());
+  }
+}
diff --git a/network/shuffle/pom.xml b/network/shuffle/pom.xml
new file mode 100644
index 0000000000000..c2d0300ecd904
--- /dev/null
+++ b/network/shuffle/pom.xml
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>1.3.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-network-shuffle_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project Shuffle Streaming Service</name>
+  <url>http://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>network-shuffle</sbt.project.name>
+  </properties>
+
+  <dependencies>
+    <!-- Core dependencies -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-network-common_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <!-- Provided dependencies -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+
+    <!-- Test dependencies -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-network-common_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.novocode</groupId>
+      <artifactId>junit-interface</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  </build>
+</project>
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslClientBootstrap.java b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslClientBootstrap.java
new file mode 100644
index 0000000000000..33aa1344345ff
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslClientBootstrap.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportClientBootstrap;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * Bootstraps a {@link TransportClient} by performing SASL authentication on the connection. The
+ * server should be setup with a {@link SaslRpcHandler} with matching keys for the given appId.
+ */
+public class SaslClientBootstrap implements TransportClientBootstrap {
+  private final Logger logger = LoggerFactory.getLogger(SaslClientBootstrap.class);
+
+  private final TransportConf conf;
+  private final String appId;
+  private final SecretKeyHolder secretKeyHolder;
+
+  public SaslClientBootstrap(TransportConf conf, String appId, SecretKeyHolder secretKeyHolder) {
+    this.conf = conf;
+    this.appId = appId;
+    this.secretKeyHolder = secretKeyHolder;
+  }
+
+  /**
+   * Performs SASL authentication by sending a token, and then proceeding with the SASL
+   * challenge-response tokens until we either successfully authenticate or throw an exception
+   * due to mismatch.
+   */
+  @Override
+  public void doBootstrap(TransportClient client) {
+    SparkSaslClient saslClient = new SparkSaslClient(appId, secretKeyHolder);
+    try {
+      byte[] payload = saslClient.firstToken();
+
+      while (!saslClient.isComplete()) {
+        SaslMessage msg = new SaslMessage(appId, payload);
+        ByteBuf buf = Unpooled.buffer(msg.encodedLength());
+        msg.encode(buf);
+
+        byte[] response = client.sendRpcSync(buf.array(), conf.saslRTTimeoutMs());
+        payload = saslClient.response(response);
+      }
+    } finally {
+      try {
+        // Once authentication is complete, the server will trust all remaining communication.
+        saslClient.dispose();
+      } catch (RuntimeException e) {
+        logger.error("Error while disposing SASL client", e);
+      }
+    }
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslMessage.java b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslMessage.java
new file mode 100644
index 0000000000000..cad76ab7aa54e
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslMessage.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+import io.netty.buffer.ByteBuf;
+
+import org.apache.spark.network.protocol.Encodable;
+import org.apache.spark.network.protocol.Encoders;
+
+/**
+ * Encodes a Sasl-related message which is attempting to authenticate using some credentials tagged
+ * with the given appId. This appId allows a single SaslRpcHandler to multiplex different
+ * applications which may be using different sets of credentials.
+ */
+class SaslMessage implements Encodable {
+
+  /** Serialization tag used to catch incorrect payloads. */
+  private static final byte TAG_BYTE = (byte) 0xEA;
+
+  public final String appId;
+  public final byte[] payload;
+
+  public SaslMessage(String appId, byte[] payload) {
+    this.appId = appId;
+    this.payload = payload;
+  }
+
+  @Override
+  public int encodedLength() {
+    return 1 + Encoders.Strings.encodedLength(appId) + Encoders.ByteArrays.encodedLength(payload);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    buf.writeByte(TAG_BYTE);
+    Encoders.Strings.encode(buf, appId);
+    Encoders.ByteArrays.encode(buf, payload);
+  }
+
+  public static SaslMessage decode(ByteBuf buf) {
+    if (buf.readByte() != TAG_BYTE) {
+      throw new IllegalStateException("Expected SaslMessage, received something else"
+        + " (maybe your client does not have SASL enabled?)");
+    }
+
+    String appId = Encoders.Strings.decode(buf);
+    byte[] payload = Encoders.ByteArrays.decode(buf);
+    return new SaslMessage(appId, payload);
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslRpcHandler.java b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslRpcHandler.java
new file mode 100644
index 0000000000000..3777a18e33f78
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SaslRpcHandler.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+import java.util.concurrent.ConcurrentMap;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.Maps;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.protocol.Encodable;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.StreamManager;
+
+/**
+ * RPC Handler which performs SASL authentication before delegating to a child RPC handler.
+ * The delegate will only receive messages if the given connection has been successfully
+ * authenticated. A connection may be authenticated at most once.
+ *
+ * Note that the authentication process consists of multiple challenge-response pairs, each of
+ * which are individual RPCs.
+ */
+public class SaslRpcHandler extends RpcHandler {
+  private final Logger logger = LoggerFactory.getLogger(SaslRpcHandler.class);
+
+  /** RpcHandler we will delegate to for authenticated connections. */
+  private final RpcHandler delegate;
+
+  /** Class which provides secret keys which are shared by server and client on a per-app basis. */
+  private final SecretKeyHolder secretKeyHolder;
+
+  /** Maps each channel to its SASL authentication state. */
+  private final ConcurrentMap<TransportClient, SparkSaslServer> channelAuthenticationMap;
+
+  public SaslRpcHandler(RpcHandler delegate, SecretKeyHolder secretKeyHolder) {
+    this.delegate = delegate;
+    this.secretKeyHolder = secretKeyHolder;
+    this.channelAuthenticationMap = Maps.newConcurrentMap();
+  }
+
+  @Override
+  public void receive(TransportClient client, byte[] message, RpcResponseCallback callback) {
+    SparkSaslServer saslServer = channelAuthenticationMap.get(client);
+    if (saslServer != null && saslServer.isComplete()) {
+      // Authentication complete, delegate to base handler.
+      delegate.receive(client, message, callback);
+      return;
+    }
+
+    SaslMessage saslMessage = SaslMessage.decode(Unpooled.wrappedBuffer(message));
+
+    if (saslServer == null) {
+      // First message in the handshake, setup the necessary state.
+      saslServer = new SparkSaslServer(saslMessage.appId, secretKeyHolder);
+      channelAuthenticationMap.put(client, saslServer);
+    }
+
+    byte[] response = saslServer.response(saslMessage.payload);
+    if (saslServer.isComplete()) {
+      logger.debug("SASL authentication successful for channel {}", client);
+    }
+    callback.onSuccess(response);
+  }
+
+  @Override
+  public StreamManager getStreamManager() {
+    return delegate.getStreamManager();
+  }
+
+  @Override
+  public void connectionTerminated(TransportClient client) {
+    SparkSaslServer saslServer = channelAuthenticationMap.remove(client);
+    if (saslServer != null) {
+      saslServer.dispose();
+    }
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/sasl/SecretKeyHolder.java b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SecretKeyHolder.java
new file mode 100644
index 0000000000000..81d5766794688
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SecretKeyHolder.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+/**
+ * Interface for getting a secret key associated with some application.
+ */
+public interface SecretKeyHolder {
+  /**
+   * Gets an appropriate SASL User for the given appId.
+   * @throws IllegalArgumentException if the given appId is not associated with a SASL user.
+   */
+  String getSaslUser(String appId);
+
+  /**
+   * Gets an appropriate SASL secret key for the given appId.
+   * @throws IllegalArgumentException if the given appId is not associated with a SASL secret key.
+   */
+  String getSecretKey(String appId);
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java b/network/shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java
new file mode 100644
index 0000000000000..351c7930a900f
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+import java.lang.Override;
+import java.nio.ByteBuffer;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.sasl.SecretKeyHolder;
+import org.apache.spark.network.util.JavaUtils;
+
+/**
+ * A class that manages shuffle secret used by the external shuffle service.
+ */
+public class ShuffleSecretManager implements SecretKeyHolder {
+  private final Logger logger = LoggerFactory.getLogger(ShuffleSecretManager.class);
+  private final ConcurrentHashMap<String, String> shuffleSecretMap;
+
+  // Spark user used for authenticating SASL connections
+  // Note that this must match the value in org.apache.spark.SecurityManager
+  private static final String SPARK_SASL_USER = "sparkSaslUser";
+
+  public ShuffleSecretManager() {
+    shuffleSecretMap = new ConcurrentHashMap<String, String>();
+  }
+
+  /**
+   * Register an application with its secret.
+   * Executors need to first authenticate themselves with the same secret before
+   * fetching shuffle files written by other executors in this application.
+   */
+  public void registerApp(String appId, String shuffleSecret) {
+    if (!shuffleSecretMap.contains(appId)) {
+      shuffleSecretMap.put(appId, shuffleSecret);
+      logger.info("Registered shuffle secret for application {}", appId);
+    } else {
+      logger.debug("Application {} already registered", appId);
+    }
+  }
+
+  /**
+   * Register an application with its secret specified as a byte buffer.
+   */
+  public void registerApp(String appId, ByteBuffer shuffleSecret) {
+    registerApp(appId, JavaUtils.bytesToString(shuffleSecret));
+  }
+
+  /**
+   * Unregister an application along with its secret.
+   * This is called when the application terminates.
+   */
+  public void unregisterApp(String appId) {
+    if (shuffleSecretMap.contains(appId)) {
+      shuffleSecretMap.remove(appId);
+      logger.info("Unregistered shuffle secret for application {}", appId);
+    } else {
+      logger.warn("Attempted to unregister application {} when it is not registered", appId);
+    }
+  }
+
+  /**
+   * Return the Spark user for authenticating SASL connections.
+   */
+  @Override
+  public String getSaslUser(String appId) {
+    return SPARK_SASL_USER;
+  }
+
+  /**
+   * Return the secret key registered with the given application.
+   * This key is used to authenticate the executors before they can fetch shuffle files
+   * written by this application from the external shuffle service. If the specified
+   * application is not registered, return null.
+   */
+  @Override
+  public String getSecretKey(String appId) {
+    return shuffleSecretMap.get(appId);
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/sasl/SparkSaslClient.java b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SparkSaslClient.java
new file mode 100644
index 0000000000000..9abad1f30a259
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SparkSaslClient.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+import javax.security.auth.callback.Callback;
+import javax.security.auth.callback.CallbackHandler;
+import javax.security.auth.callback.NameCallback;
+import javax.security.auth.callback.PasswordCallback;
+import javax.security.auth.callback.UnsupportedCallbackException;
+import javax.security.sasl.RealmCallback;
+import javax.security.sasl.RealmChoiceCallback;
+import javax.security.sasl.Sasl;
+import javax.security.sasl.SaslClient;
+import javax.security.sasl.SaslException;
+import java.io.IOException;
+
+import com.google.common.base.Throwables;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.spark.network.sasl.SparkSaslServer.*;
+
+/**
+ * A SASL Client for Spark which simply keeps track of the state of a single SASL session, from the
+ * initial state to the "authenticated" state. This client initializes the protocol via a
+ * firstToken, which is then followed by a set of challenges and responses.
+ */
+public class SparkSaslClient {
+  private final Logger logger = LoggerFactory.getLogger(SparkSaslClient.class);
+
+  private final String secretKeyId;
+  private final SecretKeyHolder secretKeyHolder;
+  private SaslClient saslClient;
+
+  public SparkSaslClient(String secretKeyId, SecretKeyHolder secretKeyHolder) {
+    this.secretKeyId = secretKeyId;
+    this.secretKeyHolder = secretKeyHolder;
+    try {
+      this.saslClient = Sasl.createSaslClient(new String[] { DIGEST }, null, null, DEFAULT_REALM,
+        SASL_PROPS, new ClientCallbackHandler());
+    } catch (SaslException e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  /** Used to initiate SASL handshake with server. */
+  public synchronized byte[] firstToken() {
+    if (saslClient != null && saslClient.hasInitialResponse()) {
+      try {
+        return saslClient.evaluateChallenge(new byte[0]);
+      } catch (SaslException e) {
+        throw Throwables.propagate(e);
+      }
+    } else {
+      return new byte[0];
+    }
+  }
+
+  /** Determines whether the authentication exchange has completed. */
+  public synchronized boolean isComplete() {
+    return saslClient != null && saslClient.isComplete();
+  }
+
+  /**
+   * Respond to server's SASL token.
+   * @param token contains server's SASL token
+   * @return client's response SASL token
+   */
+  public synchronized byte[] response(byte[] token) {
+    try {
+      return saslClient != null ? saslClient.evaluateChallenge(token) : new byte[0];
+    } catch (SaslException e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  /**
+   * Disposes of any system resources or security-sensitive information the
+   * SaslClient might be using.
+   */
+  public synchronized void dispose() {
+    if (saslClient != null) {
+      try {
+        saslClient.dispose();
+      } catch (SaslException e) {
+        // ignore
+      } finally {
+        saslClient = null;
+      }
+    }
+  }
+
+  /**
+   * Implementation of javax.security.auth.callback.CallbackHandler
+   * that works with share secrets.
+   */
+  private class ClientCallbackHandler implements CallbackHandler {
+    @Override
+    public void handle(Callback[] callbacks) throws IOException, UnsupportedCallbackException {
+
+      for (Callback callback : callbacks) {
+        if (callback instanceof NameCallback) {
+          logger.trace("SASL client callback: setting username");
+          NameCallback nc = (NameCallback) callback;
+          nc.setName(encodeIdentifier(secretKeyHolder.getSaslUser(secretKeyId)));
+        } else if (callback instanceof PasswordCallback) {
+          logger.trace("SASL client callback: setting password");
+          PasswordCallback pc = (PasswordCallback) callback;
+          pc.setPassword(encodePassword(secretKeyHolder.getSecretKey(secretKeyId)));
+        } else if (callback instanceof RealmCallback) {
+          logger.trace("SASL client callback: setting realm");
+          RealmCallback rc = (RealmCallback) callback;
+          rc.setText(rc.getDefaultText());
+        } else if (callback instanceof RealmChoiceCallback) {
+          // ignore (?)
+        } else {
+          throw new UnsupportedCallbackException(callback, "Unrecognized SASL DIGEST-MD5 Callback");
+        }
+      }
+    }
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
new file mode 100644
index 0000000000000..e87b17ead1e1a
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+import javax.security.auth.callback.Callback;
+import javax.security.auth.callback.CallbackHandler;
+import javax.security.auth.callback.NameCallback;
+import javax.security.auth.callback.PasswordCallback;
+import javax.security.auth.callback.UnsupportedCallbackException;
+import javax.security.sasl.AuthorizeCallback;
+import javax.security.sasl.RealmCallback;
+import javax.security.sasl.Sasl;
+import javax.security.sasl.SaslException;
+import javax.security.sasl.SaslServer;
+import java.io.IOException;
+import java.util.Map;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+import com.google.common.collect.ImmutableMap;
+import io.netty.buffer.Unpooled;
+import io.netty.handler.codec.base64.Base64;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A SASL Server for Spark which simply keeps track of the state of a single SASL session, from the
+ * initial state to the "authenticated" state. (It is not a server in the sense of accepting
+ * connections on some socket.)
+ */
+public class SparkSaslServer {
+  private final Logger logger = LoggerFactory.getLogger(SparkSaslServer.class);
+
+  /**
+   * This is passed as the server name when creating the sasl client/server.
+   * This could be changed to be configurable in the future.
+   */
+  static final String DEFAULT_REALM = "default";
+
+  /**
+   * The authentication mechanism used here is DIGEST-MD5. This could be changed to be
+   * configurable in the future.
+   */
+  static final String DIGEST = "DIGEST-MD5";
+
+  /**
+   * The quality of protection is just "auth". This means that we are doing
+   * authentication only, we are not supporting integrity or privacy protection of the
+   * communication channel after authentication. This could be changed to be configurable
+   * in the future.
+   */
+  static final Map<String, String> SASL_PROPS = ImmutableMap.<String, String>builder()
+    .put(Sasl.QOP, "auth")
+    .put(Sasl.SERVER_AUTH, "true")
+    .build();
+
+  /** Identifier for a certain secret key within the secretKeyHolder. */
+  private final String secretKeyId;
+  private final SecretKeyHolder secretKeyHolder;
+  private SaslServer saslServer;
+
+  public SparkSaslServer(String secretKeyId, SecretKeyHolder secretKeyHolder) {
+    this.secretKeyId = secretKeyId;
+    this.secretKeyHolder = secretKeyHolder;
+    try {
+      this.saslServer = Sasl.createSaslServer(DIGEST, null, DEFAULT_REALM, SASL_PROPS,
+        new DigestCallbackHandler());
+    } catch (SaslException e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  /**
+   * Determines whether the authentication exchange has completed successfully.
+   */
+  public synchronized boolean isComplete() {
+    return saslServer != null && saslServer.isComplete();
+  }
+
+  /**
+   * Used to respond to server SASL tokens.
+   * @param token Server's SASL token
+   * @return response to send back to the server.
+   */
+  public synchronized byte[] response(byte[] token) {
+    try {
+      return saslServer != null ? saslServer.evaluateResponse(token) : new byte[0];
+    } catch (SaslException e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  /**
+   * Disposes of any system resources or security-sensitive information the
+   * SaslServer might be using.
+   */
+  public synchronized void dispose() {
+    if (saslServer != null) {
+      try {
+        saslServer.dispose();
+      } catch (SaslException e) {
+        // ignore
+      } finally {
+        saslServer = null;
+      }
+    }
+  }
+
+  /**
+   * Implementation of javax.security.auth.callback.CallbackHandler for SASL DIGEST-MD5 mechanism.
+   */
+  private class DigestCallbackHandler implements CallbackHandler {
+    @Override
+    public void handle(Callback[] callbacks) throws IOException, UnsupportedCallbackException {
+      for (Callback callback : callbacks) {
+        if (callback instanceof NameCallback) {
+          logger.trace("SASL server callback: setting username");
+          NameCallback nc = (NameCallback) callback;
+          nc.setName(encodeIdentifier(secretKeyHolder.getSaslUser(secretKeyId)));
+        } else if (callback instanceof PasswordCallback) {
+          logger.trace("SASL server callback: setting password");
+          PasswordCallback pc = (PasswordCallback) callback;
+          pc.setPassword(encodePassword(secretKeyHolder.getSecretKey(secretKeyId)));
+        } else if (callback instanceof RealmCallback) {
+          logger.trace("SASL server callback: setting realm");
+          RealmCallback rc = (RealmCallback) callback;
+          rc.setText(rc.getDefaultText());
+        } else if (callback instanceof AuthorizeCallback) {
+          AuthorizeCallback ac = (AuthorizeCallback) callback;
+          String authId = ac.getAuthenticationID();
+          String authzId = ac.getAuthorizationID();
+          ac.setAuthorized(authId.equals(authzId));
+          if (ac.isAuthorized()) {
+            ac.setAuthorizedID(authzId);
+          }
+          logger.debug("SASL Authorization complete, authorized set to {}", ac.isAuthorized());
+        } else {
+          throw new UnsupportedCallbackException(callback, "Unrecognized SASL DIGEST-MD5 Callback");
+        }
+      }
+    }
+  }
+
+  /* Encode a byte[] identifier as a Base64-encoded string. */
+  public static String encodeIdentifier(String identifier) {
+    Preconditions.checkNotNull(identifier, "User cannot be null if SASL is enabled");
+    return Base64.encode(Unpooled.wrappedBuffer(identifier.getBytes(Charsets.UTF_8)))
+      .toString(Charsets.UTF_8);
+  }
+
+  /** Encode a password as a base64-encoded char[] array. */
+  public static char[] encodePassword(String password) {
+    Preconditions.checkNotNull(password, "Password cannot be null if SASL is enabled");
+    return Base64.encode(Unpooled.wrappedBuffer(password.getBytes(Charsets.UTF_8)))
+      .toString(Charsets.UTF_8).toCharArray();
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/BlockFetchingListener.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/BlockFetchingListener.java
new file mode 100644
index 0000000000000..138fd5389c20a
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/BlockFetchingListener.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.util.EventListener;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+
+public interface BlockFetchingListener extends EventListener {
+  /**
+   * Called once per successfully fetched block. After this call returns, data will be released
+   * automatically. If the data will be passed to another thread, the receiver should retain()
+   * and release() the buffer on their own, or copy the data to a new buffer.
+   */
+  void onBlockFetchSuccess(String blockId, ManagedBuffer data);
+
+  /**
+   * Called at least once per block upon failures.
+   */
+  void onBlockFetchFailure(String blockId, Throwable exception);
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
new file mode 100644
index 0000000000000..46ca9708621b9
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.util.List;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
+import org.apache.spark.network.util.TransportConf;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.server.OneForOneStreamManager;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.StreamManager;
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage;
+import org.apache.spark.network.shuffle.protocol.OpenBlocks;
+import org.apache.spark.network.shuffle.protocol.RegisterExecutor;
+import org.apache.spark.network.shuffle.protocol.StreamHandle;
+
+/**
+ * RPC Handler for a server which can serve shuffle blocks from outside of an Executor process.
+ *
+ * Handles registering executors and opening shuffle blocks from them. Shuffle blocks are registered
+ * with the "one-for-one" strategy, meaning each Transport-layer Chunk is equivalent to one Spark-
+ * level shuffle block.
+ */
+public class ExternalShuffleBlockHandler extends RpcHandler {
+  private final Logger logger = LoggerFactory.getLogger(ExternalShuffleBlockHandler.class);
+
+  private final ExternalShuffleBlockManager blockManager;
+  private final OneForOneStreamManager streamManager;
+
+  public ExternalShuffleBlockHandler(TransportConf conf) {
+    this(new OneForOneStreamManager(), new ExternalShuffleBlockManager(conf));
+  }
+
+  /** Enables mocking out the StreamManager and BlockManager. */
+  @VisibleForTesting
+  ExternalShuffleBlockHandler(
+      OneForOneStreamManager streamManager,
+      ExternalShuffleBlockManager blockManager) {
+    this.streamManager = streamManager;
+    this.blockManager = blockManager;
+  }
+
+  @Override
+  public void receive(TransportClient client, byte[] message, RpcResponseCallback callback) {
+    BlockTransferMessage msgObj = BlockTransferMessage.Decoder.fromByteArray(message);
+
+    if (msgObj instanceof OpenBlocks) {
+      OpenBlocks msg = (OpenBlocks) msgObj;
+      List<ManagedBuffer> blocks = Lists.newArrayList();
+
+      for (String blockId : msg.blockIds) {
+        blocks.add(blockManager.getBlockData(msg.appId, msg.execId, blockId));
+      }
+      long streamId = streamManager.registerStream(blocks.iterator());
+      logger.trace("Registered streamId {} with {} buffers", streamId, msg.blockIds.length);
+      callback.onSuccess(new StreamHandle(streamId, msg.blockIds.length).toByteArray());
+
+    } else if (msgObj instanceof RegisterExecutor) {
+      RegisterExecutor msg = (RegisterExecutor) msgObj;
+      blockManager.registerExecutor(msg.appId, msg.execId, msg.executorInfo);
+      callback.onSuccess(new byte[0]);
+
+    } else {
+      throw new UnsupportedOperationException("Unexpected message: " + msgObj);
+    }
+  }
+
+  @Override
+  public StreamManager getStreamManager() {
+    return streamManager;
+  }
+
+  /**
+   * Removes an application (once it has been terminated), and optionally will clean up any
+   * local directories associated with the executors of that application in a separate thread.
+   */
+  public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
+    blockManager.applicationRemoved(appId, cleanupLocalDirs);
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManager.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManager.java
new file mode 100644
index 0000000000000..93e6fdd7161fa
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManager.java
@@ -0,0 +1,254 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.Executor;
+import java.util.concurrent.Executors;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Objects;
+import com.google.common.collect.Maps;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.util.JavaUtils;
+import org.apache.spark.network.util.NettyUtils;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * Manages converting shuffle BlockIds into physical segments of local files, from a process outside
+ * of Executors. Each Executor must register its own configuration about where it stores its files
+ * (local dirs) and how (shuffle manager). The logic for retrieval of individual files is replicated
+ * from Spark's FileShuffleBlockManager and IndexShuffleBlockManager.
+ *
+ * Executors with shuffle file consolidation are not currently supported, as the index is stored in
+ * the Executor's memory, unlike the IndexShuffleBlockManager.
+ */
+public class ExternalShuffleBlockManager {
+  private static final Logger logger = LoggerFactory.getLogger(ExternalShuffleBlockManager.class);
+
+  // Map containing all registered executors' metadata.
+  private final ConcurrentMap<AppExecId, ExecutorShuffleInfo> executors;
+
+  // Single-threaded Java executor used to perform expensive recursive directory deletion.
+  private final Executor directoryCleaner;
+
+  private final TransportConf conf;
+
+  public ExternalShuffleBlockManager(TransportConf conf) {
+    this(conf, Executors.newSingleThreadExecutor(
+        // Add `spark` prefix because it will run in NM in Yarn mode.
+        NettyUtils.createThreadFactory("spark-shuffle-directory-cleaner")));
+  }
+
+  // Allows tests to have more control over when directories are cleaned up.
+  @VisibleForTesting
+  ExternalShuffleBlockManager(TransportConf conf, Executor directoryCleaner) {
+    this.conf = conf;
+    this.executors = Maps.newConcurrentMap();
+    this.directoryCleaner = directoryCleaner;
+  }
+
+  /** Registers a new Executor with all the configuration we need to find its shuffle files. */
+  public void registerExecutor(
+      String appId,
+      String execId,
+      ExecutorShuffleInfo executorInfo) {
+    AppExecId fullId = new AppExecId(appId, execId);
+    logger.info("Registered executor {} with {}", fullId, executorInfo);
+    executors.put(fullId, executorInfo);
+  }
+
+  /**
+   * Obtains a FileSegmentManagedBuffer from a shuffle block id. We expect the blockId has the
+   * format "shuffle_ShuffleId_MapId_ReduceId" (from ShuffleBlockId), and additionally make
+   * assumptions about how the hash and sort based shuffles store their data.
+   */
+  public ManagedBuffer getBlockData(String appId, String execId, String blockId) {
+    String[] blockIdParts = blockId.split("_");
+    if (blockIdParts.length < 4) {
+      throw new IllegalArgumentException("Unexpected block id format: " + blockId);
+    } else if (!blockIdParts[0].equals("shuffle")) {
+      throw new IllegalArgumentException("Expected shuffle block id, got: " + blockId);
+    }
+    int shuffleId = Integer.parseInt(blockIdParts[1]);
+    int mapId = Integer.parseInt(blockIdParts[2]);
+    int reduceId = Integer.parseInt(blockIdParts[3]);
+
+    ExecutorShuffleInfo executor = executors.get(new AppExecId(appId, execId));
+    if (executor == null) {
+      throw new RuntimeException(
+        String.format("Executor is not registered (appId=%s, execId=%s)", appId, execId));
+    }
+
+    if ("org.apache.spark.shuffle.hash.HashShuffleManager".equals(executor.shuffleManager)) {
+      return getHashBasedShuffleBlockData(executor, blockId);
+    } else if ("org.apache.spark.shuffle.sort.SortShuffleManager".equals(executor.shuffleManager)) {
+      return getSortBasedShuffleBlockData(executor, shuffleId, mapId, reduceId);
+    } else {
+      throw new UnsupportedOperationException(
+        "Unsupported shuffle manager: " + executor.shuffleManager);
+    }
+  }
+
+  /**
+   * Removes our metadata of all executors registered for the given application, and optionally
+   * also deletes the local directories associated with the executors of that application in a
+   * separate thread.
+   *
+   * It is not valid to call registerExecutor() for an executor with this appId after invoking
+   * this method.
+   */
+  public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
+    logger.info("Application {} removed, cleanupLocalDirs = {}", appId, cleanupLocalDirs);
+    Iterator<Map.Entry<AppExecId, ExecutorShuffleInfo>> it = executors.entrySet().iterator();
+    while (it.hasNext()) {
+      Map.Entry<AppExecId, ExecutorShuffleInfo> entry = it.next();
+      AppExecId fullId = entry.getKey();
+      final ExecutorShuffleInfo executor = entry.getValue();
+
+      // Only touch executors associated with the appId that was removed.
+      if (appId.equals(fullId.appId)) {
+        it.remove();
+
+        if (cleanupLocalDirs) {
+          logger.info("Cleaning up executor {}'s {} local dirs", fullId, executor.localDirs.length);
+
+          // Execute the actual deletion in a different thread, as it may take some time.
+          directoryCleaner.execute(new Runnable() {
+            @Override
+            public void run() {
+              deleteExecutorDirs(executor.localDirs);
+            }
+          });
+        }
+      }
+    }
+  }
+
+  /**
+   * Synchronously deletes each directory one at a time.
+   * Should be executed in its own thread, as this may take a long time.
+   */
+  private void deleteExecutorDirs(String[] dirs) {
+    for (String localDir : dirs) {
+      try {
+        JavaUtils.deleteRecursively(new File(localDir));
+        logger.debug("Successfully cleaned up directory: " + localDir);
+      } catch (Exception e) {
+        logger.error("Failed to delete directory: " + localDir, e);
+      }
+    }
+  }
+
+  /**
+   * Hash-based shuffle data is simply stored as one file per block.
+   * This logic is from FileShuffleBlockManager.
+   */
+  // TODO: Support consolidated hash shuffle files
+  private ManagedBuffer getHashBasedShuffleBlockData(ExecutorShuffleInfo executor, String blockId) {
+    File shuffleFile = getFile(executor.localDirs, executor.subDirsPerLocalDir, blockId);
+    return new FileSegmentManagedBuffer(conf, shuffleFile, 0, shuffleFile.length());
+  }
+
+  /**
+   * Sort-based shuffle data uses an index called "shuffle_ShuffleId_MapId_0.index" into a data file
+   * called "shuffle_ShuffleId_MapId_0.data". This logic is from IndexShuffleBlockManager,
+   * and the block id format is from ShuffleDataBlockId and ShuffleIndexBlockId.
+   */
+  private ManagedBuffer getSortBasedShuffleBlockData(
+    ExecutorShuffleInfo executor, int shuffleId, int mapId, int reduceId) {
+    File indexFile = getFile(executor.localDirs, executor.subDirsPerLocalDir,
+      "shuffle_" + shuffleId + "_" + mapId + "_0.index");
+
+    DataInputStream in = null;
+    try {
+      in = new DataInputStream(new FileInputStream(indexFile));
+      in.skipBytes(reduceId * 8);
+      long offset = in.readLong();
+      long nextOffset = in.readLong();
+      return new FileSegmentManagedBuffer(
+        conf,
+        getFile(executor.localDirs, executor.subDirsPerLocalDir,
+          "shuffle_" + shuffleId + "_" + mapId + "_0.data"),
+        offset,
+        nextOffset - offset);
+    } catch (IOException e) {
+      throw new RuntimeException("Failed to open file: " + indexFile, e);
+    } finally {
+      if (in != null) {
+        JavaUtils.closeQuietly(in);
+      }
+    }
+  }
+
+  /**
+   * Hashes a filename into the corresponding local directory, in a manner consistent with
+   * Spark's DiskBlockManager.getFile().
+   */
+  @VisibleForTesting
+  static File getFile(String[] localDirs, int subDirsPerLocalDir, String filename) {
+    int hash = JavaUtils.nonNegativeHash(filename);
+    String localDir = localDirs[hash % localDirs.length];
+    int subDirId = (hash / localDirs.length) % subDirsPerLocalDir;
+    return new File(new File(localDir, String.format("%02x", subDirId)), filename);
+  }
+
+  /** Simply encodes an executor's full ID, which is appId + execId. */
+  private static class AppExecId {
+    final String appId;
+    final String execId;
+
+    private AppExecId(String appId, String execId) {
+      this.appId = appId;
+      this.execId = execId;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+
+      AppExecId appExecId = (AppExecId) o;
+      return Objects.equal(appId, appExecId.appId) && Objects.equal(execId, appExecId.execId);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(appId, execId);
+    }
+
+    @Override
+    public String toString() {
+      return Objects.toStringHelper(this)
+        .add("appId", appId)
+        .add("execId", execId)
+        .toString();
+    }
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
new file mode 100644
index 0000000000000..6e8018b723dc6
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.IOException;
+import java.util.List;
+
+import com.google.common.collect.Lists;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.TransportContext;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportClientBootstrap;
+import org.apache.spark.network.client.TransportClientFactory;
+import org.apache.spark.network.sasl.SaslClientBootstrap;
+import org.apache.spark.network.sasl.SecretKeyHolder;
+import org.apache.spark.network.server.NoOpRpcHandler;
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.shuffle.protocol.RegisterExecutor;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * Client for reading shuffle blocks which points to an external (outside of executor) server.
+ * This is instead of reading shuffle blocks directly from other executors (via
+ * BlockTransferService), which has the downside of losing the shuffle data if we lose the
+ * executors.
+ */
+public class ExternalShuffleClient extends ShuffleClient {
+  private final Logger logger = LoggerFactory.getLogger(ExternalShuffleClient.class);
+
+  private final TransportConf conf;
+  private final boolean saslEnabled;
+  private final SecretKeyHolder secretKeyHolder;
+
+  private TransportClientFactory clientFactory;
+  private String appId;
+
+  /**
+   * Creates an external shuffle client, with SASL optionally enabled. If SASL is not enabled,
+   * then secretKeyHolder may be null.
+   */
+  public ExternalShuffleClient(
+      TransportConf conf,
+      SecretKeyHolder secretKeyHolder,
+      boolean saslEnabled) {
+    this.conf = conf;
+    this.secretKeyHolder = secretKeyHolder;
+    this.saslEnabled = saslEnabled;
+  }
+
+  @Override
+  public void init(String appId) {
+    this.appId = appId;
+    TransportContext context = new TransportContext(conf, new NoOpRpcHandler());
+    List<TransportClientBootstrap> bootstraps = Lists.newArrayList();
+    if (saslEnabled) {
+      bootstraps.add(new SaslClientBootstrap(conf, appId, secretKeyHolder));
+    }
+    clientFactory = context.createClientFactory(bootstraps);
+  }
+
+  @Override
+  public void fetchBlocks(
+      final String host,
+      final int port,
+      final String execId,
+      String[] blockIds,
+      BlockFetchingListener listener) {
+    assert appId != null : "Called before init()";
+    logger.debug("External shuffle fetch from {}:{} (executor id {})", host, port, execId);
+    try {
+      RetryingBlockFetcher.BlockFetchStarter blockFetchStarter =
+        new RetryingBlockFetcher.BlockFetchStarter() {
+          @Override
+          public void createAndStart(String[] blockIds, BlockFetchingListener listener)
+              throws IOException {
+            TransportClient client = clientFactory.createClient(host, port);
+            new OneForOneBlockFetcher(client, appId, execId, blockIds, listener).start();
+          }
+        };
+
+      int maxRetries = conf.maxIORetries();
+      if (maxRetries > 0) {
+        // Note this Fetcher will correctly handle maxRetries == 0; we avoid it just in case there's
+        // a bug in this code. We should remove the if statement once we're sure of the stability.
+        new RetryingBlockFetcher(conf, blockFetchStarter, blockIds, listener).start();
+      } else {
+        blockFetchStarter.createAndStart(blockIds, listener);
+      }
+    } catch (Exception e) {
+      logger.error("Exception while beginning fetchBlocks", e);
+      for (String blockId : blockIds) {
+        listener.onBlockFetchFailure(blockId, e);
+      }
+    }
+  }
+
+  /**
+   * Registers this executor with an external shuffle server. This registration is required to
+   * inform the shuffle server about where and how we store our shuffle files.
+   *
+   * @param host Host of shuffle server.
+   * @param port Port of shuffle server.
+   * @param execId This Executor's id.
+   * @param executorInfo Contains all info necessary for the service to find our shuffle files.
+   */
+  public void registerWithShuffleServer(
+      String host,
+      int port,
+      String execId,
+      ExecutorShuffleInfo executorInfo) throws IOException {
+    assert appId != null : "Called before init()";
+    TransportClient client = clientFactory.createClient(host, port);
+    byte[] registerMessage = new RegisterExecutor(appId, execId, executorInfo).toByteArray();
+    client.sendRpcSync(registerMessage, 5000 /* timeoutMs */);
+  }
+
+  @Override
+  public void close() {
+    clientFactory.close();
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
new file mode 100644
index 0000000000000..8ed2e0b39ad23
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.util.Arrays;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.client.ChunkReceivedCallback;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage;
+import org.apache.spark.network.shuffle.protocol.OpenBlocks;
+import org.apache.spark.network.shuffle.protocol.StreamHandle;
+import org.apache.spark.network.util.JavaUtils;
+
+/**
+ * Simple wrapper on top of a TransportClient which interprets each chunk as a whole block, and
+ * invokes the BlockFetchingListener appropriately. This class is agnostic to the actual RPC
+ * handler, as long as there is a single "open blocks" message which returns a ShuffleStreamHandle,
+ * and Java serialization is used.
+ *
+ * Note that this typically corresponds to a
+ * {@link org.apache.spark.network.server.OneForOneStreamManager} on the server side.
+ */
+public class OneForOneBlockFetcher {
+  private final Logger logger = LoggerFactory.getLogger(OneForOneBlockFetcher.class);
+
+  private final TransportClient client;
+  private final OpenBlocks openMessage;
+  private final String[] blockIds;
+  private final BlockFetchingListener listener;
+  private final ChunkReceivedCallback chunkCallback;
+
+  private StreamHandle streamHandle = null;
+
+  public OneForOneBlockFetcher(
+      TransportClient client,
+      String appId,
+      String execId,
+      String[] blockIds,
+      BlockFetchingListener listener) {
+    this.client = client;
+    this.openMessage = new OpenBlocks(appId, execId, blockIds);
+    this.blockIds = blockIds;
+    this.listener = listener;
+    this.chunkCallback = new ChunkCallback();
+  }
+
+  /** Callback invoked on receipt of each chunk. We equate a single chunk to a single block. */
+  private class ChunkCallback implements ChunkReceivedCallback {
+    @Override
+    public void onSuccess(int chunkIndex, ManagedBuffer buffer) {
+      // On receipt of a chunk, pass it upwards as a block.
+      listener.onBlockFetchSuccess(blockIds[chunkIndex], buffer);
+    }
+
+    @Override
+    public void onFailure(int chunkIndex, Throwable e) {
+      // On receipt of a failure, fail every block from chunkIndex onwards.
+      String[] remainingBlockIds = Arrays.copyOfRange(blockIds, chunkIndex, blockIds.length);
+      failRemainingBlocks(remainingBlockIds, e);
+    }
+  }
+
+  /**
+   * Begins the fetching process, calling the listener with every block fetched.
+   * The given message will be serialized with the Java serializer, and the RPC must return a
+   * {@link StreamHandle}. We will send all fetch requests immediately, without throttling.
+   */
+  public void start() {
+    if (blockIds.length == 0) {
+      throw new IllegalArgumentException("Zero-sized blockIds array");
+    }
+
+    client.sendRpc(openMessage.toByteArray(), new RpcResponseCallback() {
+      @Override
+      public void onSuccess(byte[] response) {
+        try {
+          streamHandle = (StreamHandle) BlockTransferMessage.Decoder.fromByteArray(response);
+          logger.trace("Successfully opened blocks {}, preparing to fetch chunks.", streamHandle);
+
+          // Immediately request all chunks -- we expect that the total size of the request is
+          // reasonable due to higher level chunking in [[ShuffleBlockFetcherIterator]].
+          for (int i = 0; i < streamHandle.numChunks; i++) {
+            client.fetchChunk(streamHandle.streamId, i, chunkCallback);
+          }
+        } catch (Exception e) {
+          logger.error("Failed while starting block fetches after success", e);
+          failRemainingBlocks(blockIds, e);
+        }
+      }
+
+      @Override
+      public void onFailure(Throwable e) {
+        logger.error("Failed while starting block fetches", e);
+        failRemainingBlocks(blockIds, e);
+      }
+    });
+  }
+
+  /** Invokes the "onBlockFetchFailure" callback for every listed block id. */
+  private void failRemainingBlocks(String[] failedBlockIds, Throwable e) {
+    for (String blockId : failedBlockIds) {
+      try {
+        listener.onBlockFetchFailure(blockId, e);
+      } catch (Exception e2) {
+        logger.error("Error in block fetch failure callback", e2);
+      }
+    }
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java
new file mode 100644
index 0000000000000..4bb0498e5d5aa
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.LinkedHashSet;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.collect.Sets;
+import com.google.common.util.concurrent.Uninterruptibles;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.util.NettyUtils;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * Wraps another BlockFetcher with the ability to automatically retry fetches which fail due to
+ * IOExceptions, which we hope are due to transient network conditions.
+ *
+ * This fetcher provides stronger guarantees regarding the parent BlockFetchingListener. In
+ * particular, the listener will be invoked exactly once per blockId, with a success or failure.
+ */
+public class RetryingBlockFetcher {
+
+  /**
+   * Used to initiate the first fetch for all blocks, and subsequently for retrying the fetch on any
+   * remaining blocks.
+   */
+  public static interface BlockFetchStarter {
+    /**
+     * Creates a new BlockFetcher to fetch the given block ids which may do some synchronous
+     * bootstrapping followed by fully asynchronous block fetching.
+     * The BlockFetcher must eventually invoke the Listener on every input blockId, or else this
+     * method must throw an exception.
+     *
+     * This method should always attempt to get a new TransportClient from the
+     * {@link org.apache.spark.network.client.TransportClientFactory} in order to fix connection
+     * issues.
+     */
+    void createAndStart(String[] blockIds, BlockFetchingListener listener) throws IOException;
+  }
+
+  /** Shared executor service used for waiting and retrying. */
+  private static final ExecutorService executorService = Executors.newCachedThreadPool(
+    NettyUtils.createThreadFactory("Block Fetch Retry"));
+
+  private final Logger logger = LoggerFactory.getLogger(RetryingBlockFetcher.class);
+
+  /** Used to initiate new Block Fetches on our remaining blocks. */
+  private final BlockFetchStarter fetchStarter;
+
+  /** Parent listener which we delegate all successful or permanently failed block fetches to. */
+  private final BlockFetchingListener listener;
+
+  /** Max number of times we are allowed to retry. */
+  private final int maxRetries;
+
+  /** Milliseconds to wait before each retry. */
+  private final int retryWaitTime;
+
+  // NOTE:
+  // All of our non-final fields are synchronized under 'this' and should only be accessed/mutated
+  // while inside a synchronized block.
+  /** Number of times we've attempted to retry so far. */
+  private int retryCount = 0;
+
+  /**
+   * Set of all block ids which have not been fetched successfully or with a non-IO Exception.
+   * A retry involves requesting every outstanding block. Note that since this is a LinkedHashSet,
+   * input ordering is preserved, so we always request blocks in the same order the user provided.
+   */
+  private final LinkedHashSet<String> outstandingBlocksIds;
+
+  /**
+   * The BlockFetchingListener that is active with our current BlockFetcher.
+   * When we start a retry, we immediately replace this with a new Listener, which causes all any
+   * old Listeners to ignore all further responses.
+   */
+  private RetryingBlockFetchListener currentListener;
+
+  public RetryingBlockFetcher(
+      TransportConf conf,
+      BlockFetchStarter fetchStarter,
+      String[] blockIds,
+      BlockFetchingListener listener) {
+    this.fetchStarter = fetchStarter;
+    this.listener = listener;
+    this.maxRetries = conf.maxIORetries();
+    this.retryWaitTime = conf.ioRetryWaitTimeMs();
+    this.outstandingBlocksIds = Sets.newLinkedHashSet();
+    Collections.addAll(outstandingBlocksIds, blockIds);
+    this.currentListener = new RetryingBlockFetchListener();
+  }
+
+  /**
+   * Initiates the fetch of all blocks provided in the constructor, with possible retries in the
+   * event of transient IOExceptions.
+   */
+  public void start() {
+    fetchAllOutstanding();
+  }
+
+  /**
+   * Fires off a request to fetch all blocks that have not been fetched successfully or permanently
+   * failed (i.e., by a non-IOException).
+   */
+  private void fetchAllOutstanding() {
+    // Start by retrieving our shared state within a synchronized block.
+    String[] blockIdsToFetch;
+    int numRetries;
+    RetryingBlockFetchListener myListener;
+    synchronized (this) {
+      blockIdsToFetch = outstandingBlocksIds.toArray(new String[outstandingBlocksIds.size()]);
+      numRetries = retryCount;
+      myListener = currentListener;
+    }
+
+    // Now initiate the fetch on all outstanding blocks, possibly initiating a retry if that fails.
+    try {
+      fetchStarter.createAndStart(blockIdsToFetch, myListener);
+    } catch (Exception e) {
+      logger.error(String.format("Exception while beginning fetch of %s outstanding blocks %s",
+        blockIdsToFetch.length, numRetries > 0 ? "(after " + numRetries + " retries)" : ""), e);
+
+      if (shouldRetry(e)) {
+        initiateRetry();
+      } else {
+        for (String bid : blockIdsToFetch) {
+          listener.onBlockFetchFailure(bid, e);
+        }
+      }
+    }
+  }
+
+  /**
+   * Lightweight method which initiates a retry in a different thread. The retry will involve
+   * calling fetchAllOutstanding() after a configured wait time.
+   */
+  private synchronized void initiateRetry() {
+    retryCount += 1;
+    currentListener = new RetryingBlockFetchListener();
+
+    logger.info("Retrying fetch ({}/{}) for {} outstanding blocks after {} ms",
+      retryCount, maxRetries, outstandingBlocksIds.size(), retryWaitTime);
+
+    executorService.submit(new Runnable() {
+      @Override
+      public void run() {
+        Uninterruptibles.sleepUninterruptibly(retryWaitTime, TimeUnit.MILLISECONDS);
+        fetchAllOutstanding();
+      }
+    });
+  }
+
+  /**
+   * Returns true if we should retry due a block fetch failure. We will retry if and only if
+   * the exception was an IOException and we haven't retried 'maxRetries' times already.
+   */
+  private synchronized boolean shouldRetry(Throwable e) {
+    boolean isIOException = e instanceof IOException
+      || (e.getCause() != null && e.getCause() instanceof IOException);
+    boolean hasRemainingRetries = retryCount < maxRetries;
+    return isIOException && hasRemainingRetries;
+  }
+
+  /**
+   * Our RetryListener intercepts block fetch responses and forwards them to our parent listener.
+   * Note that in the event of a retry, we will immediately replace the 'currentListener' field,
+   * indicating that any responses from non-current Listeners should be ignored.
+   */
+  private class RetryingBlockFetchListener implements BlockFetchingListener {
+    @Override
+    public void onBlockFetchSuccess(String blockId, ManagedBuffer data) {
+      // We will only forward this success message to our parent listener if this block request is
+      // outstanding and we are still the active listener.
+      boolean shouldForwardSuccess = false;
+      synchronized (RetryingBlockFetcher.this) {
+        if (this == currentListener && outstandingBlocksIds.contains(blockId)) {
+          outstandingBlocksIds.remove(blockId);
+          shouldForwardSuccess = true;
+        }
+      }
+
+      // Now actually invoke the parent listener, outside of the synchronized block.
+      if (shouldForwardSuccess) {
+        listener.onBlockFetchSuccess(blockId, data);
+      }
+    }
+
+    @Override
+    public void onBlockFetchFailure(String blockId, Throwable exception) {
+      // We will only forward this failure to our parent listener if this block request is
+      // outstanding, we are still the active listener, AND we cannot retry the fetch.
+      boolean shouldForwardFailure = false;
+      synchronized (RetryingBlockFetcher.this) {
+        if (this == currentListener && outstandingBlocksIds.contains(blockId)) {
+          if (shouldRetry(exception)) {
+            initiateRetry();
+          } else {
+            logger.error(String.format("Failed to fetch block %s, and will not retry (%s retries)",
+              blockId, retryCount), exception);
+            outstandingBlocksIds.remove(blockId);
+            shouldForwardFailure = true;
+          }
+        }
+      }
+
+      // Now actually invoke the parent listener, outside of the synchronized block.
+      if (shouldForwardFailure) {
+        listener.onBlockFetchFailure(blockId, exception);
+      }
+    }
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java
new file mode 100644
index 0000000000000..f72ab40690d0d
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.Closeable;
+
+/** Provides an interface for reading shuffle files, either from an Executor or external service. */
+public abstract class ShuffleClient implements Closeable {
+
+  /**
+   * Initializes the ShuffleClient, specifying this Executor's appId.
+   * Must be called before any other method on the ShuffleClient.
+   */
+  public void init(String appId) { }
+
+  /**
+   * Fetch a sequence of blocks from a remote node asynchronously,
+   *
+   * Note that this API takes a sequence so the implementation can batch requests, and does not
+   * return a future so the underlying implementation can invoke onBlockFetchSuccess as soon as
+   * the data of a block is fetched, rather than waiting for all blocks to be fetched.
+   */
+  public abstract void fetchBlocks(
+      String host,
+      int port,
+      String execId,
+      String[] blockIds,
+      BlockFetchingListener listener);
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
new file mode 100644
index 0000000000000..6c1210b33268a
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+
+import org.apache.spark.network.protocol.Encodable;
+
+/**
+ * Messages handled by the {@link org.apache.spark.network.shuffle.ExternalShuffleBlockHandler}, or
+ * by Spark's NettyBlockTransferService.
+ *
+ * At a high level:
+ *   - OpenBlock is handled by both services, but only services shuffle files for the external
+ *     shuffle service. It returns a StreamHandle.
+ *   - UploadBlock is only handled by the NettyBlockTransferService.
+ *   - RegisterExecutor is only handled by the external shuffle service.
+ */
+public abstract class BlockTransferMessage implements Encodable {
+  protected abstract Type type();
+
+  /** Preceding every serialized message is its type, which allows us to deserialize it. */
+  public static enum Type {
+    OPEN_BLOCKS(0), UPLOAD_BLOCK(1), REGISTER_EXECUTOR(2), STREAM_HANDLE(3);
+
+    private final byte id;
+
+    private Type(int id) {
+      assert id < 128 : "Cannot have more than 128 message types";
+      this.id = (byte) id;
+    }
+
+    public byte id() { return id; }
+  }
+
+  // NB: Java does not support static methods in interfaces, so we must put this in a static class.
+  public static class Decoder {
+    /** Deserializes the 'type' byte followed by the message itself. */
+    public static BlockTransferMessage fromByteArray(byte[] msg) {
+      ByteBuf buf = Unpooled.wrappedBuffer(msg);
+      byte type = buf.readByte();
+      switch (type) {
+        case 0: return OpenBlocks.decode(buf);
+        case 1: return UploadBlock.decode(buf);
+        case 2: return RegisterExecutor.decode(buf);
+        case 3: return StreamHandle.decode(buf);
+        default: throw new IllegalArgumentException("Unknown message type: " + type);
+      }
+    }
+  }
+
+  /** Serializes the 'type' byte followed by the message itself. */
+  public byte[] toByteArray() {
+    // Allow room for encoded message, plus the type byte
+    ByteBuf buf = Unpooled.buffer(encodedLength() + 1);
+    buf.writeByte(type().id);
+    encode(buf);
+    assert buf.writableBytes() == 0 : "Writable bytes remain: " + buf.writableBytes();
+    return buf.array();
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java
new file mode 100644
index 0000000000000..cadc8e8369c6a
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import java.util.Arrays;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+import org.apache.spark.network.protocol.Encodable;
+import org.apache.spark.network.protocol.Encoders;
+
+/** Contains all configuration necessary for locating the shuffle files of an executor. */
+public class ExecutorShuffleInfo implements Encodable {
+  /** The base set of local directories that the executor stores its shuffle files in. */
+  public final String[] localDirs;
+  /** Number of subdirectories created within each localDir. */
+  public final int subDirsPerLocalDir;
+  /** Shuffle manager (SortShuffleManager or HashShuffleManager) that the executor is using. */
+  public final String shuffleManager;
+
+  public ExecutorShuffleInfo(String[] localDirs, int subDirsPerLocalDir, String shuffleManager) {
+    this.localDirs = localDirs;
+    this.subDirsPerLocalDir = subDirsPerLocalDir;
+    this.shuffleManager = shuffleManager;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(subDirsPerLocalDir, shuffleManager) * 41 + Arrays.hashCode(localDirs);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("localDirs", Arrays.toString(localDirs))
+      .add("subDirsPerLocalDir", subDirsPerLocalDir)
+      .add("shuffleManager", shuffleManager)
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other != null && other instanceof ExecutorShuffleInfo) {
+      ExecutorShuffleInfo o = (ExecutorShuffleInfo) other;
+      return Arrays.equals(localDirs, o.localDirs)
+        && Objects.equal(subDirsPerLocalDir, o.subDirsPerLocalDir)
+        && Objects.equal(shuffleManager, o.shuffleManager);
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return Encoders.StringArrays.encodedLength(localDirs)
+        + 4 // int
+        + Encoders.Strings.encodedLength(shuffleManager);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    Encoders.StringArrays.encode(buf, localDirs);
+    buf.writeInt(subDirsPerLocalDir);
+    Encoders.Strings.encode(buf, shuffleManager);
+  }
+
+  public static ExecutorShuffleInfo decode(ByteBuf buf) {
+    String[] localDirs = Encoders.StringArrays.decode(buf);
+    int subDirsPerLocalDir = buf.readInt();
+    String shuffleManager = Encoders.Strings.decode(buf);
+    return new ExecutorShuffleInfo(localDirs, subDirsPerLocalDir, shuffleManager);
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java
new file mode 100644
index 0000000000000..62fce9b0d16cd
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import java.util.Arrays;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+import org.apache.spark.network.protocol.Encoders;
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
+
+/** Request to read a set of blocks. Returns {@link StreamHandle}. */
+public class OpenBlocks extends BlockTransferMessage {
+  public final String appId;
+  public final String execId;
+  public final String[] blockIds;
+
+  public OpenBlocks(String appId, String execId, String[] blockIds) {
+    this.appId = appId;
+    this.execId = execId;
+    this.blockIds = blockIds;
+  }
+
+  @Override
+  protected Type type() { return Type.OPEN_BLOCKS; }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(appId, execId) * 41 + Arrays.hashCode(blockIds);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("appId", appId)
+      .add("execId", execId)
+      .add("blockIds", Arrays.toString(blockIds))
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other != null && other instanceof OpenBlocks) {
+      OpenBlocks o = (OpenBlocks) other;
+      return Objects.equal(appId, o.appId)
+        && Objects.equal(execId, o.execId)
+        && Arrays.equals(blockIds, o.blockIds);
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return Encoders.Strings.encodedLength(appId)
+      + Encoders.Strings.encodedLength(execId)
+      + Encoders.StringArrays.encodedLength(blockIds);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    Encoders.Strings.encode(buf, appId);
+    Encoders.Strings.encode(buf, execId);
+    Encoders.StringArrays.encode(buf, blockIds);
+  }
+
+  public static OpenBlocks decode(ByteBuf buf) {
+    String appId = Encoders.Strings.decode(buf);
+    String execId = Encoders.Strings.decode(buf);
+    String[] blockIds = Encoders.StringArrays.decode(buf);
+    return new OpenBlocks(appId, execId, blockIds);
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java
new file mode 100644
index 0000000000000..7eb4385044077
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+import org.apache.spark.network.protocol.Encoders;
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
+
+/**
+ * Initial registration message between an executor and its local shuffle server.
+ * Returns nothing (empty bye array).
+ */
+public class RegisterExecutor extends BlockTransferMessage {
+  public final String appId;
+  public final String execId;
+  public final ExecutorShuffleInfo executorInfo;
+
+  public RegisterExecutor(
+      String appId,
+      String execId,
+      ExecutorShuffleInfo executorInfo) {
+    this.appId = appId;
+    this.execId = execId;
+    this.executorInfo = executorInfo;
+  }
+
+  @Override
+  protected Type type() { return Type.REGISTER_EXECUTOR; }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(appId, execId, executorInfo);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("appId", appId)
+      .add("execId", execId)
+      .add("executorInfo", executorInfo)
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other != null && other instanceof RegisterExecutor) {
+      RegisterExecutor o = (RegisterExecutor) other;
+      return Objects.equal(appId, o.appId)
+        && Objects.equal(execId, o.execId)
+        && Objects.equal(executorInfo, o.executorInfo);
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return Encoders.Strings.encodedLength(appId)
+      + Encoders.Strings.encodedLength(execId)
+      + executorInfo.encodedLength();
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    Encoders.Strings.encode(buf, appId);
+    Encoders.Strings.encode(buf, execId);
+    executorInfo.encode(buf);
+  }
+
+  public static RegisterExecutor decode(ByteBuf buf) {
+    String appId = Encoders.Strings.decode(buf);
+    String execId = Encoders.Strings.decode(buf);
+    ExecutorShuffleInfo executorShuffleInfo = ExecutorShuffleInfo.decode(buf);
+    return new RegisterExecutor(appId, execId, executorShuffleInfo);
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java
new file mode 100644
index 0000000000000..bc9daa6158ba3
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
+
+/**
+ * Identifier for a fixed number of chunks to read from a stream created by an "open blocks"
+ * message. This is used by {@link org.apache.spark.network.shuffle.OneForOneBlockFetcher}.
+ */
+public class StreamHandle extends BlockTransferMessage {
+  public final long streamId;
+  public final int numChunks;
+
+  public StreamHandle(long streamId, int numChunks) {
+    this.streamId = streamId;
+    this.numChunks = numChunks;
+  }
+
+  @Override
+  protected Type type() { return Type.STREAM_HANDLE; }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(streamId, numChunks);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("streamId", streamId)
+      .add("numChunks", numChunks)
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other != null && other instanceof StreamHandle) {
+      StreamHandle o = (StreamHandle) other;
+      return Objects.equal(streamId, o.streamId)
+        && Objects.equal(numChunks, o.numChunks);
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return 8 + 4;
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    buf.writeLong(streamId);
+    buf.writeInt(numChunks);
+  }
+
+  public static StreamHandle decode(ByteBuf buf) {
+    long streamId = buf.readLong();
+    int numChunks = buf.readInt();
+    return new StreamHandle(streamId, numChunks);
+  }
+}
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java
new file mode 100644
index 0000000000000..0b23e112bd512
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import java.util.Arrays;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+import org.apache.spark.network.protocol.Encoders;
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
+
+
+/** Request to upload a block with a certain StorageLevel. Returns nothing (empty byte array). */
+public class UploadBlock extends BlockTransferMessage {
+  public final String appId;
+  public final String execId;
+  public final String blockId;
+  // TODO: StorageLevel is serialized separately in here because StorageLevel is not available in
+  // this package. We should avoid this hack.
+  public final byte[] metadata;
+  public final byte[] blockData;
+
+  /**
+   * @param metadata Meta-information about block, typically StorageLevel.
+   * @param blockData The actual block's bytes.
+   */
+  public UploadBlock(
+      String appId,
+      String execId,
+      String blockId,
+      byte[] metadata,
+      byte[] blockData) {
+    this.appId = appId;
+    this.execId = execId;
+    this.blockId = blockId;
+    this.metadata = metadata;
+    this.blockData = blockData;
+  }
+
+  @Override
+  protected Type type() { return Type.UPLOAD_BLOCK; }
+
+  @Override
+  public int hashCode() {
+    int objectsHashCode = Objects.hashCode(appId, execId, blockId);
+    return (objectsHashCode * 41 + Arrays.hashCode(metadata)) * 41 + Arrays.hashCode(blockData);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("appId", appId)
+      .add("execId", execId)
+      .add("blockId", blockId)
+      .add("metadata size", metadata.length)
+      .add("block size", blockData.length)
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other != null && other instanceof UploadBlock) {
+      UploadBlock o = (UploadBlock) other;
+      return Objects.equal(appId, o.appId)
+        && Objects.equal(execId, o.execId)
+        && Objects.equal(blockId, o.blockId)
+        && Arrays.equals(metadata, o.metadata)
+        && Arrays.equals(blockData, o.blockData);
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return Encoders.Strings.encodedLength(appId)
+      + Encoders.Strings.encodedLength(execId)
+      + Encoders.Strings.encodedLength(blockId)
+      + Encoders.ByteArrays.encodedLength(metadata)
+      + Encoders.ByteArrays.encodedLength(blockData);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    Encoders.Strings.encode(buf, appId);
+    Encoders.Strings.encode(buf, execId);
+    Encoders.Strings.encode(buf, blockId);
+    Encoders.ByteArrays.encode(buf, metadata);
+    Encoders.ByteArrays.encode(buf, blockData);
+  }
+
+  public static UploadBlock decode(ByteBuf buf) {
+    String appId = Encoders.Strings.decode(buf);
+    String execId = Encoders.Strings.decode(buf);
+    String blockId = Encoders.Strings.decode(buf);
+    byte[] metadata = Encoders.ByteArrays.decode(buf);
+    byte[] blockData = Encoders.ByteArrays.decode(buf);
+    return new UploadBlock(appId, execId, blockId, metadata, blockData);
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
new file mode 100644
index 0000000000000..d25283e46ef96
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+import java.io.IOException;
+
+import com.google.common.collect.Lists;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+import org.apache.spark.network.TestUtils;
+import org.apache.spark.network.TransportContext;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportClientBootstrap;
+import org.apache.spark.network.client.TransportClientFactory;
+import org.apache.spark.network.server.OneForOneStreamManager;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.StreamManager;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.shuffle.ExternalShuffleBlockHandler;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
+public class SaslIntegrationSuite {
+  static ExternalShuffleBlockHandler handler;
+  static TransportServer server;
+  static TransportConf conf;
+  static TransportContext context;
+
+  TransportClientFactory clientFactory;
+
+  /** Provides a secret key holder which always returns the given secret key. */
+  static class TestSecretKeyHolder implements SecretKeyHolder {
+
+    private final String secretKey;
+
+    TestSecretKeyHolder(String secretKey) {
+      this.secretKey = secretKey;
+    }
+
+    @Override
+    public String getSaslUser(String appId) {
+      return "user";
+    }
+    @Override
+    public String getSecretKey(String appId) {
+      return secretKey;
+    }
+  }
+
+
+  @BeforeClass
+  public static void beforeAll() throws IOException {
+    SecretKeyHolder secretKeyHolder = new TestSecretKeyHolder("good-key");
+    SaslRpcHandler handler = new SaslRpcHandler(new TestRpcHandler(), secretKeyHolder);
+    conf = new TransportConf(new SystemPropertyConfigProvider());
+    context = new TransportContext(conf, handler);
+    server = context.createServer();
+  }
+
+
+  @AfterClass
+  public static void afterAll() {
+    server.close();
+  }
+
+  @After
+  public void afterEach() {
+    if (clientFactory != null) {
+      clientFactory.close();
+      clientFactory = null;
+    }
+  }
+
+  @Test
+  public void testGoodClient() throws IOException {
+    clientFactory = context.createClientFactory(
+      Lists.<TransportClientBootstrap>newArrayList(
+        new SaslClientBootstrap(conf, "app-id", new TestSecretKeyHolder("good-key"))));
+
+    TransportClient client = clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
+    String msg = "Hello, World!";
+    byte[] resp = client.sendRpcSync(msg.getBytes(), 1000);
+    assertEquals(msg, new String(resp)); // our rpc handler should just return the given msg
+  }
+
+  @Test
+  public void testBadClient() {
+    clientFactory = context.createClientFactory(
+      Lists.<TransportClientBootstrap>newArrayList(
+        new SaslClientBootstrap(conf, "app-id", new TestSecretKeyHolder("bad-key"))));
+
+    try {
+      // Bootstrap should fail on startup.
+      clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
+    } catch (Exception e) {
+      assertTrue(e.getMessage(), e.getMessage().contains("Mismatched response"));
+    }
+  }
+
+  @Test
+  public void testNoSaslClient() throws IOException {
+    clientFactory = context.createClientFactory(
+      Lists.<TransportClientBootstrap>newArrayList());
+
+    TransportClient client = clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
+    try {
+      client.sendRpcSync(new byte[13], 1000);
+      fail("Should have failed");
+    } catch (Exception e) {
+      assertTrue(e.getMessage(), e.getMessage().contains("Expected SaslMessage"));
+    }
+
+    try {
+      // Guessing the right tag byte doesn't magically get you in...
+      client.sendRpcSync(new byte[] { (byte) 0xEA }, 1000);
+      fail("Should have failed");
+    } catch (Exception e) {
+      assertTrue(e.getMessage(), e.getMessage().contains("java.lang.IndexOutOfBoundsException"));
+    }
+  }
+
+  @Test
+  public void testNoSaslServer() {
+    RpcHandler handler = new TestRpcHandler();
+    TransportContext context = new TransportContext(conf, handler);
+    clientFactory = context.createClientFactory(
+      Lists.<TransportClientBootstrap>newArrayList(
+        new SaslClientBootstrap(conf, "app-id", new TestSecretKeyHolder("key"))));
+    TransportServer server = context.createServer();
+    try {
+      clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
+    } catch (Exception e) {
+      assertTrue(e.getMessage(), e.getMessage().contains("Digest-challenge format violation"));
+    } finally {
+      server.close();
+    }
+  }
+
+  /** RPC handler which simply responds with the message it received. */
+  public static class TestRpcHandler extends RpcHandler {
+    @Override
+    public void receive(TransportClient client, byte[] message, RpcResponseCallback callback) {
+      callback.onSuccess(message);
+    }
+
+    @Override
+    public StreamManager getStreamManager() {
+      return new OneForOneStreamManager();
+    }
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
new file mode 100644
index 0000000000000..67a07f38eb5a0
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+import java.util.Map;
+
+import com.google.common.collect.ImmutableMap;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+/**
+ * Jointly tests SparkSaslClient and SparkSaslServer, as both are black boxes.
+ */
+public class SparkSaslSuite {
+
+  /** Provides a secret key holder which returns secret key == appId */
+  private SecretKeyHolder secretKeyHolder = new SecretKeyHolder() {
+    @Override
+    public String getSaslUser(String appId) {
+      return "user";
+    }
+
+    @Override
+    public String getSecretKey(String appId) {
+      return appId;
+    }
+  };
+
+  @Test
+  public void testMatching() {
+    SparkSaslClient client = new SparkSaslClient("shared-secret", secretKeyHolder);
+    SparkSaslServer server = new SparkSaslServer("shared-secret", secretKeyHolder);
+
+    assertFalse(client.isComplete());
+    assertFalse(server.isComplete());
+
+    byte[] clientMessage = client.firstToken();
+
+    while (!client.isComplete()) {
+      clientMessage = client.response(server.response(clientMessage));
+    }
+    assertTrue(server.isComplete());
+
+    // Disposal should invalidate
+    server.dispose();
+    assertFalse(server.isComplete());
+    client.dispose();
+    assertFalse(client.isComplete());
+  }
+
+
+  @Test
+  public void testNonMatching() {
+    SparkSaslClient client = new SparkSaslClient("my-secret", secretKeyHolder);
+    SparkSaslServer server = new SparkSaslServer("your-secret", secretKeyHolder);
+
+    assertFalse(client.isComplete());
+    assertFalse(server.isComplete());
+
+    byte[] clientMessage = client.firstToken();
+
+    try {
+      while (!client.isComplete()) {
+        clientMessage = client.response(server.response(clientMessage));
+      }
+      fail("Should not have completed");
+    } catch (Exception e) {
+      assertTrue(e.getMessage().contains("Mismatched response"));
+      assertFalse(client.isComplete());
+      assertFalse(server.isComplete());
+    }
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java
new file mode 100644
index 0000000000000..d65de9ca550a3
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+import org.apache.spark.network.shuffle.protocol.*;
+
+/** Verifies that all BlockTransferMessages can be serialized correctly. */
+public class BlockTransferMessagesSuite {
+  @Test
+  public void serializeOpenShuffleBlocks() {
+    checkSerializeDeserialize(new OpenBlocks("app-1", "exec-2", new String[] { "b1", "b2" }));
+    checkSerializeDeserialize(new RegisterExecutor("app-1", "exec-2", new ExecutorShuffleInfo(
+      new String[] { "/local1", "/local2" }, 32, "MyShuffleManager")));
+    checkSerializeDeserialize(new UploadBlock("app-1", "exec-2", "block-3", new byte[] { 1, 2 },
+      new byte[] { 4, 5, 6, 7} ));
+    checkSerializeDeserialize(new StreamHandle(12345, 16));
+  }
+
+  private void checkSerializeDeserialize(BlockTransferMessage msg) {
+    BlockTransferMessage msg2 = BlockTransferMessage.Decoder.fromByteArray(msg.toByteArray());
+    assertEquals(msg, msg2);
+    assertEquals(msg.hashCode(), msg2.hashCode());
+    assertEquals(msg.toString(), msg2.toString());
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
new file mode 100644
index 0000000000000..3f9fe1681cf27
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+
+import static org.junit.Assert.*;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.*;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NioManagedBuffer;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.server.OneForOneStreamManager;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage;
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.shuffle.protocol.OpenBlocks;
+import org.apache.spark.network.shuffle.protocol.RegisterExecutor;
+import org.apache.spark.network.shuffle.protocol.StreamHandle;
+import org.apache.spark.network.shuffle.protocol.UploadBlock;
+
+public class ExternalShuffleBlockHandlerSuite {
+  TransportClient client = mock(TransportClient.class);
+
+  OneForOneStreamManager streamManager;
+  ExternalShuffleBlockManager blockManager;
+  RpcHandler handler;
+
+  @Before
+  public void beforeEach() {
+    streamManager = mock(OneForOneStreamManager.class);
+    blockManager = mock(ExternalShuffleBlockManager.class);
+    handler = new ExternalShuffleBlockHandler(streamManager, blockManager);
+  }
+
+  @Test
+  public void testRegisterExecutor() {
+    RpcResponseCallback callback = mock(RpcResponseCallback.class);
+
+    ExecutorShuffleInfo config = new ExecutorShuffleInfo(new String[] {"/a", "/b"}, 16, "sort");
+    byte[] registerMessage = new RegisterExecutor("app0", "exec1", config).toByteArray();
+    handler.receive(client, registerMessage, callback);
+    verify(blockManager, times(1)).registerExecutor("app0", "exec1", config);
+
+    verify(callback, times(1)).onSuccess((byte[]) any());
+    verify(callback, never()).onFailure((Throwable) any());
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testOpenShuffleBlocks() {
+    RpcResponseCallback callback = mock(RpcResponseCallback.class);
+
+    ManagedBuffer block0Marker = new NioManagedBuffer(ByteBuffer.wrap(new byte[3]));
+    ManagedBuffer block1Marker = new NioManagedBuffer(ByteBuffer.wrap(new byte[7]));
+    when(blockManager.getBlockData("app0", "exec1", "b0")).thenReturn(block0Marker);
+    when(blockManager.getBlockData("app0", "exec1", "b1")).thenReturn(block1Marker);
+    byte[] openBlocks = new OpenBlocks("app0", "exec1", new String[] { "b0", "b1" }).toByteArray();
+    handler.receive(client, openBlocks, callback);
+    verify(blockManager, times(1)).getBlockData("app0", "exec1", "b0");
+    verify(blockManager, times(1)).getBlockData("app0", "exec1", "b1");
+
+    ArgumentCaptor<byte[]> response = ArgumentCaptor.forClass(byte[].class);
+    verify(callback, times(1)).onSuccess(response.capture());
+    verify(callback, never()).onFailure((Throwable) any());
+
+    StreamHandle handle =
+      (StreamHandle) BlockTransferMessage.Decoder.fromByteArray(response.getValue());
+    assertEquals(2, handle.numChunks);
+
+    ArgumentCaptor<Iterator> stream = ArgumentCaptor.forClass(Iterator.class);
+    verify(streamManager, times(1)).registerStream(stream.capture());
+    Iterator<ManagedBuffer> buffers = (Iterator<ManagedBuffer>) stream.getValue();
+    assertEquals(block0Marker, buffers.next());
+    assertEquals(block1Marker, buffers.next());
+    assertFalse(buffers.hasNext());
+  }
+
+  @Test
+  public void testBadMessages() {
+    RpcResponseCallback callback = mock(RpcResponseCallback.class);
+
+    byte[] unserializableMsg = new byte[] { 0x12, 0x34, 0x56 };
+    try {
+      handler.receive(client, unserializableMsg, callback);
+      fail("Should have thrown");
+    } catch (Exception e) {
+      // pass
+    }
+
+    byte[] unexpectedMsg = new UploadBlock("a", "e", "b", new byte[1], new byte[2]).toByteArray();
+    try {
+      handler.receive(client, unexpectedMsg, callback);
+      fail("Should have thrown");
+    } catch (UnsupportedOperationException e) {
+      // pass
+    }
+
+    verify(callback, never()).onSuccess((byte[]) any());
+    verify(callback, never()).onFailure((Throwable) any());
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManagerSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManagerSuite.java
new file mode 100644
index 0000000000000..dad6428a836fc
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManagerSuite.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+
+import com.google.common.io.CharStreams;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+public class ExternalShuffleBlockManagerSuite {
+  static String sortBlock0 = "Hello!";
+  static String sortBlock1 = "World!";
+
+  static String hashBlock0 = "Elementary";
+  static String hashBlock1 = "Tabular";
+
+  static TestShuffleDataContext dataContext;
+
+  static TransportConf conf = new TransportConf(new SystemPropertyConfigProvider());
+
+  @BeforeClass
+  public static void beforeAll() throws IOException {
+    dataContext = new TestShuffleDataContext(2, 5);
+
+    dataContext.create();
+    // Write some sort and hash data.
+    dataContext.insertSortShuffleData(0, 0,
+      new byte[][] { sortBlock0.getBytes(), sortBlock1.getBytes() } );
+    dataContext.insertHashShuffleData(1, 0,
+      new byte[][] { hashBlock0.getBytes(), hashBlock1.getBytes() } );
+  }
+
+  @AfterClass
+  public static void afterAll() {
+    dataContext.cleanup();
+  }
+
+  @Test
+  public void testBadRequests() {
+    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf);
+    // Unregistered executor
+    try {
+      manager.getBlockData("app0", "exec1", "shuffle_1_1_0");
+      fail("Should have failed");
+    } catch (RuntimeException e) {
+      assertTrue("Bad error message: " + e, e.getMessage().contains("not registered"));
+    }
+
+    // Invalid shuffle manager
+    manager.registerExecutor("app0", "exec2", dataContext.createExecutorInfo("foobar"));
+    try {
+      manager.getBlockData("app0", "exec2", "shuffle_1_1_0");
+      fail("Should have failed");
+    } catch (UnsupportedOperationException e) {
+      // pass
+    }
+
+    // Nonexistent shuffle block
+    manager.registerExecutor("app0", "exec3",
+      dataContext.createExecutorInfo("org.apache.spark.shuffle.sort.SortShuffleManager"));
+    try {
+      manager.getBlockData("app0", "exec3", "shuffle_1_1_0");
+      fail("Should have failed");
+    } catch (Exception e) {
+      // pass
+    }
+  }
+
+  @Test
+  public void testSortShuffleBlocks() throws IOException {
+    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf);
+    manager.registerExecutor("app0", "exec0",
+      dataContext.createExecutorInfo("org.apache.spark.shuffle.sort.SortShuffleManager"));
+
+    InputStream block0Stream =
+      manager.getBlockData("app0", "exec0", "shuffle_0_0_0").createInputStream();
+    String block0 = CharStreams.toString(new InputStreamReader(block0Stream));
+    block0Stream.close();
+    assertEquals(sortBlock0, block0);
+
+    InputStream block1Stream =
+      manager.getBlockData("app0", "exec0", "shuffle_0_0_1").createInputStream();
+    String block1 = CharStreams.toString(new InputStreamReader(block1Stream));
+    block1Stream.close();
+    assertEquals(sortBlock1, block1);
+  }
+
+  @Test
+  public void testHashShuffleBlocks() throws IOException {
+    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf);
+    manager.registerExecutor("app0", "exec0",
+      dataContext.createExecutorInfo("org.apache.spark.shuffle.hash.HashShuffleManager"));
+
+    InputStream block0Stream =
+      manager.getBlockData("app0", "exec0", "shuffle_1_0_0").createInputStream();
+    String block0 = CharStreams.toString(new InputStreamReader(block0Stream));
+    block0Stream.close();
+    assertEquals(hashBlock0, block0);
+
+    InputStream block1Stream =
+      manager.getBlockData("app0", "exec0", "shuffle_1_0_1").createInputStream();
+    String block1 = CharStreams.toString(new InputStreamReader(block1Stream));
+    block1Stream.close();
+    assertEquals(hashBlock1, block1);
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java
new file mode 100644
index 0000000000000..254e3a7a32b98
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Random;
+import java.util.concurrent.Executor;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import com.google.common.util.concurrent.MoreExecutors;
+import org.junit.Test;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
+public class ExternalShuffleCleanupSuite {
+
+  // Same-thread Executor used to ensure cleanup happens synchronously in test thread.
+  Executor sameThreadExecutor = MoreExecutors.sameThreadExecutor();
+  TransportConf conf = new TransportConf(new SystemPropertyConfigProvider());
+
+  @Test
+  public void noCleanupAndCleanup() throws IOException {
+    TestShuffleDataContext dataContext = createSomeData();
+
+    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf, sameThreadExecutor);
+    manager.registerExecutor("app", "exec0", dataContext.createExecutorInfo("shuffleMgr"));
+    manager.applicationRemoved("app", false /* cleanup */);
+
+    assertStillThere(dataContext);
+
+    manager.registerExecutor("app", "exec1", dataContext.createExecutorInfo("shuffleMgr"));
+    manager.applicationRemoved("app", true /* cleanup */);
+
+    assertCleanedUp(dataContext);
+  }
+
+  @Test
+  public void cleanupUsesExecutor() throws IOException {
+    TestShuffleDataContext dataContext = createSomeData();
+
+    final AtomicBoolean cleanupCalled = new AtomicBoolean(false);
+
+    // Executor which does nothing to ensure we're actually using it.
+    Executor noThreadExecutor = new Executor() {
+      @Override public void execute(Runnable runnable) { cleanupCalled.set(true); }
+    };
+
+    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf, noThreadExecutor);
+
+    manager.registerExecutor("app", "exec0", dataContext.createExecutorInfo("shuffleMgr"));
+    manager.applicationRemoved("app", true);
+
+    assertTrue(cleanupCalled.get());
+    assertStillThere(dataContext);
+
+    dataContext.cleanup();
+    assertCleanedUp(dataContext);
+  }
+
+  @Test
+  public void cleanupMultipleExecutors() throws IOException {
+    TestShuffleDataContext dataContext0 = createSomeData();
+    TestShuffleDataContext dataContext1 = createSomeData();
+
+    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf, sameThreadExecutor);
+
+    manager.registerExecutor("app", "exec0", dataContext0.createExecutorInfo("shuffleMgr"));
+    manager.registerExecutor("app", "exec1", dataContext1.createExecutorInfo("shuffleMgr"));
+    manager.applicationRemoved("app", true);
+
+    assertCleanedUp(dataContext0);
+    assertCleanedUp(dataContext1);
+  }
+
+  @Test
+  public void cleanupOnlyRemovedApp() throws IOException {
+    TestShuffleDataContext dataContext0 = createSomeData();
+    TestShuffleDataContext dataContext1 = createSomeData();
+
+    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf, sameThreadExecutor);
+
+    manager.registerExecutor("app-0", "exec0", dataContext0.createExecutorInfo("shuffleMgr"));
+    manager.registerExecutor("app-1", "exec0", dataContext1.createExecutorInfo("shuffleMgr"));
+
+    manager.applicationRemoved("app-nonexistent", true);
+    assertStillThere(dataContext0);
+    assertStillThere(dataContext1);
+
+    manager.applicationRemoved("app-0", true);
+    assertCleanedUp(dataContext0);
+    assertStillThere(dataContext1);
+
+    manager.applicationRemoved("app-1", true);
+    assertCleanedUp(dataContext0);
+    assertCleanedUp(dataContext1);
+
+    // Make sure it's not an error to cleanup multiple times
+    manager.applicationRemoved("app-1", true);
+    assertCleanedUp(dataContext0);
+    assertCleanedUp(dataContext1);
+  }
+
+  private void assertStillThere(TestShuffleDataContext dataContext) {
+    for (String localDir : dataContext.localDirs) {
+      assertTrue(localDir + " was cleaned up prematurely", new File(localDir).exists());
+    }
+  }
+
+  private void assertCleanedUp(TestShuffleDataContext dataContext) {
+    for (String localDir : dataContext.localDirs) {
+      assertFalse(localDir + " wasn't cleaned up", new File(localDir).exists());
+    }
+  }
+
+  private TestShuffleDataContext createSomeData() throws IOException {
+    Random rand = new Random(123);
+    TestShuffleDataContext dataContext = new TestShuffleDataContext(10, 5);
+
+    dataContext.create();
+    dataContext.insertSortShuffleData(rand.nextInt(1000), rand.nextInt(1000),
+      new byte[][] { "ABC".getBytes(), "DEF".getBytes() } );
+    dataContext.insertHashShuffleData(rand.nextInt(1000), rand.nextInt(1000) + 1000,
+      new byte[][] { "GHI".getBytes(), "JKLMNOPQRSTUVWXYZ".getBytes() } );
+    return dataContext;
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
new file mode 100644
index 0000000000000..02c10bcb7b261
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
@@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+import org.apache.spark.network.TestUtils;
+import org.apache.spark.network.TransportContext;
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NioManagedBuffer;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
+public class ExternalShuffleIntegrationSuite {
+
+  static String APP_ID = "app-id";
+  static String SORT_MANAGER = "org.apache.spark.shuffle.sort.SortShuffleManager";
+  static String HASH_MANAGER = "org.apache.spark.shuffle.hash.HashShuffleManager";
+
+  // Executor 0 is sort-based
+  static TestShuffleDataContext dataContext0;
+  // Executor 1 is hash-based
+  static TestShuffleDataContext dataContext1;
+
+  static ExternalShuffleBlockHandler handler;
+  static TransportServer server;
+  static TransportConf conf;
+
+  static byte[][] exec0Blocks = new byte[][] {
+    new byte[123],
+    new byte[12345],
+    new byte[1234567],
+  };
+
+  static byte[][] exec1Blocks = new byte[][] {
+    new byte[321],
+    new byte[54321],
+  };
+
+  @BeforeClass
+  public static void beforeAll() throws IOException {
+    Random rand = new Random();
+
+    for (byte[] block : exec0Blocks) {
+      rand.nextBytes(block);
+    }
+    for (byte[] block: exec1Blocks) {
+      rand.nextBytes(block);
+    }
+
+    dataContext0 = new TestShuffleDataContext(2, 5);
+    dataContext0.create();
+    dataContext0.insertSortShuffleData(0, 0, exec0Blocks);
+
+    dataContext1 = new TestShuffleDataContext(6, 2);
+    dataContext1.create();
+    dataContext1.insertHashShuffleData(1, 0, exec1Blocks);
+
+    conf = new TransportConf(new SystemPropertyConfigProvider());
+    handler = new ExternalShuffleBlockHandler(conf);
+    TransportContext transportContext = new TransportContext(conf, handler);
+    server = transportContext.createServer();
+  }
+
+  @AfterClass
+  public static void afterAll() {
+    dataContext0.cleanup();
+    dataContext1.cleanup();
+    server.close();
+  }
+
+  @After
+  public void afterEach() {
+    handler.applicationRemoved(APP_ID, false /* cleanupLocalDirs */);
+  }
+
+  class FetchResult {
+    public Set<String> successBlocks;
+    public Set<String> failedBlocks;
+    public List<ManagedBuffer> buffers;
+
+    public void releaseBuffers() {
+      for (ManagedBuffer buffer : buffers) {
+        buffer.release();
+      }
+    }
+  }
+
+  // Fetch a set of blocks from a pre-registered executor.
+  private FetchResult fetchBlocks(String execId, String[] blockIds) throws Exception {
+    return fetchBlocks(execId, blockIds, server.getPort());
+  }
+
+  // Fetch a set of blocks from a pre-registered executor. Connects to the server on the given port,
+  // to allow connecting to invalid servers.
+  private FetchResult fetchBlocks(String execId, String[] blockIds, int port) throws Exception {
+    final FetchResult res = new FetchResult();
+    res.successBlocks = Collections.synchronizedSet(new HashSet<String>());
+    res.failedBlocks = Collections.synchronizedSet(new HashSet<String>());
+    res.buffers = Collections.synchronizedList(new LinkedList<ManagedBuffer>());
+
+    final Semaphore requestsRemaining = new Semaphore(0);
+
+    ExternalShuffleClient client = new ExternalShuffleClient(conf, null, false);
+    client.init(APP_ID);
+    client.fetchBlocks(TestUtils.getLocalHost(), port, execId, blockIds,
+      new BlockFetchingListener() {
+        @Override
+        public void onBlockFetchSuccess(String blockId, ManagedBuffer data) {
+          synchronized (this) {
+            if (!res.successBlocks.contains(blockId) && !res.failedBlocks.contains(blockId)) {
+              data.retain();
+              res.successBlocks.add(blockId);
+              res.buffers.add(data);
+              requestsRemaining.release();
+            }
+          }
+        }
+
+        @Override
+        public void onBlockFetchFailure(String blockId, Throwable exception) {
+          synchronized (this) {
+            if (!res.successBlocks.contains(blockId) && !res.failedBlocks.contains(blockId)) {
+              res.failedBlocks.add(blockId);
+              requestsRemaining.release();
+            }
+          }
+        }
+      });
+
+    if (!requestsRemaining.tryAcquire(blockIds.length, 5, TimeUnit.SECONDS)) {
+      fail("Timeout getting response from the server");
+    }
+    client.close();
+    return res;
+  }
+
+  @Test
+  public void testFetchOneSort() throws Exception {
+    registerExecutor("exec-0", dataContext0.createExecutorInfo(SORT_MANAGER));
+    FetchResult exec0Fetch = fetchBlocks("exec-0", new String[] { "shuffle_0_0_0" });
+    assertEquals(Sets.newHashSet("shuffle_0_0_0"), exec0Fetch.successBlocks);
+    assertTrue(exec0Fetch.failedBlocks.isEmpty());
+    assertBufferListsEqual(exec0Fetch.buffers, Lists.newArrayList(exec0Blocks[0]));
+    exec0Fetch.releaseBuffers();
+  }
+
+  @Test
+  public void testFetchThreeSort() throws Exception {
+    registerExecutor("exec-0", dataContext0.createExecutorInfo(SORT_MANAGER));
+    FetchResult exec0Fetch = fetchBlocks("exec-0",
+      new String[] { "shuffle_0_0_0", "shuffle_0_0_1", "shuffle_0_0_2" });
+    assertEquals(Sets.newHashSet("shuffle_0_0_0", "shuffle_0_0_1", "shuffle_0_0_2"),
+      exec0Fetch.successBlocks);
+    assertTrue(exec0Fetch.failedBlocks.isEmpty());
+    assertBufferListsEqual(exec0Fetch.buffers, Lists.newArrayList(exec0Blocks));
+    exec0Fetch.releaseBuffers();
+  }
+
+  @Test
+  public void testFetchHash() throws Exception {
+    registerExecutor("exec-1", dataContext1.createExecutorInfo(HASH_MANAGER));
+    FetchResult execFetch = fetchBlocks("exec-1",
+      new String[] { "shuffle_1_0_0", "shuffle_1_0_1" });
+    assertEquals(Sets.newHashSet("shuffle_1_0_0", "shuffle_1_0_1"), execFetch.successBlocks);
+    assertTrue(execFetch.failedBlocks.isEmpty());
+    assertBufferListsEqual(execFetch.buffers, Lists.newArrayList(exec1Blocks));
+    execFetch.releaseBuffers();
+  }
+
+  @Test
+  public void testFetchWrongShuffle() throws Exception {
+    registerExecutor("exec-1", dataContext1.createExecutorInfo(SORT_MANAGER /* wrong manager */));
+    FetchResult execFetch = fetchBlocks("exec-1",
+      new String[] { "shuffle_1_0_0", "shuffle_1_0_1" });
+    assertTrue(execFetch.successBlocks.isEmpty());
+    assertEquals(Sets.newHashSet("shuffle_1_0_0", "shuffle_1_0_1"), execFetch.failedBlocks);
+  }
+
+  @Test
+  public void testFetchInvalidShuffle() throws Exception {
+    registerExecutor("exec-1", dataContext1.createExecutorInfo("unknown sort manager"));
+    FetchResult execFetch = fetchBlocks("exec-1",
+      new String[] { "shuffle_1_0_0" });
+    assertTrue(execFetch.successBlocks.isEmpty());
+    assertEquals(Sets.newHashSet("shuffle_1_0_0"), execFetch.failedBlocks);
+  }
+
+  @Test
+  public void testFetchWrongBlockId() throws Exception {
+    registerExecutor("exec-1", dataContext1.createExecutorInfo(SORT_MANAGER /* wrong manager */));
+    FetchResult execFetch = fetchBlocks("exec-1",
+      new String[] { "rdd_1_0_0" });
+    assertTrue(execFetch.successBlocks.isEmpty());
+    assertEquals(Sets.newHashSet("rdd_1_0_0"), execFetch.failedBlocks);
+  }
+
+  @Test
+  public void testFetchNonexistent() throws Exception {
+    registerExecutor("exec-0", dataContext0.createExecutorInfo(SORT_MANAGER));
+    FetchResult execFetch = fetchBlocks("exec-0",
+      new String[] { "shuffle_2_0_0" });
+    assertTrue(execFetch.successBlocks.isEmpty());
+    assertEquals(Sets.newHashSet("shuffle_2_0_0"), execFetch.failedBlocks);
+  }
+
+  @Test
+  public void testFetchWrongExecutor() throws Exception {
+    registerExecutor("exec-0", dataContext0.createExecutorInfo(SORT_MANAGER));
+    FetchResult execFetch = fetchBlocks("exec-0",
+      new String[] { "shuffle_0_0_0" /* right */, "shuffle_1_0_0" /* wrong */ });
+    // Both still fail, as we start by checking for all block.
+    assertTrue(execFetch.successBlocks.isEmpty());
+    assertEquals(Sets.newHashSet("shuffle_0_0_0", "shuffle_1_0_0"), execFetch.failedBlocks);
+  }
+
+  @Test
+  public void testFetchUnregisteredExecutor() throws Exception {
+    registerExecutor("exec-0", dataContext0.createExecutorInfo(SORT_MANAGER));
+    FetchResult execFetch = fetchBlocks("exec-2",
+      new String[] { "shuffle_0_0_0", "shuffle_1_0_0" });
+    assertTrue(execFetch.successBlocks.isEmpty());
+    assertEquals(Sets.newHashSet("shuffle_0_0_0", "shuffle_1_0_0"), execFetch.failedBlocks);
+  }
+
+  @Test
+  public void testFetchNoServer() throws Exception {
+    System.setProperty("spark.shuffle.io.maxRetries", "0");
+    try {
+      registerExecutor("exec-0", dataContext0.createExecutorInfo(SORT_MANAGER));
+      FetchResult execFetch = fetchBlocks("exec-0",
+        new String[]{"shuffle_1_0_0", "shuffle_1_0_1"}, 1 /* port */);
+      assertTrue(execFetch.successBlocks.isEmpty());
+      assertEquals(Sets.newHashSet("shuffle_1_0_0", "shuffle_1_0_1"), execFetch.failedBlocks);
+    } finally {
+      System.clearProperty("spark.shuffle.io.maxRetries");
+    }
+  }
+
+  private void registerExecutor(String executorId, ExecutorShuffleInfo executorInfo)
+      throws IOException {
+    ExternalShuffleClient client = new ExternalShuffleClient(conf, null, false);
+    client.init(APP_ID);
+    client.registerWithShuffleServer(TestUtils.getLocalHost(), server.getPort(),
+      executorId, executorInfo);
+  }
+
+  private void assertBufferListsEqual(List<ManagedBuffer> list0, List<byte[]> list1)
+    throws Exception {
+    assertEquals(list0.size(), list1.size());
+    for (int i = 0; i < list0.size(); i ++) {
+      assertBuffersEqual(list0.get(i), new NioManagedBuffer(ByteBuffer.wrap(list1.get(i))));
+    }
+  }
+
+  private void assertBuffersEqual(ManagedBuffer buffer0, ManagedBuffer buffer1) throws Exception {
+    ByteBuffer nio0 = buffer0.nioByteBuffer();
+    ByteBuffer nio1 = buffer1.nioByteBuffer();
+
+    int len = nio0.remaining();
+    assertEquals(nio0.remaining(), nio1.remaining());
+    for (int i = 0; i < len; i ++) {
+      assertEquals(nio0.get(), nio1.get());
+    }
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
new file mode 100644
index 0000000000000..759a12910c94d
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.IOException;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+import org.apache.spark.network.TestUtils;
+import org.apache.spark.network.TransportContext;
+import org.apache.spark.network.sasl.SaslRpcHandler;
+import org.apache.spark.network.sasl.SecretKeyHolder;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
+public class ExternalShuffleSecuritySuite {
+
+  TransportConf conf = new TransportConf(new SystemPropertyConfigProvider());
+  TransportServer server;
+
+  @Before
+  public void beforeEach() {
+    RpcHandler handler = new SaslRpcHandler(new ExternalShuffleBlockHandler(conf),
+      new TestSecretKeyHolder("my-app-id", "secret"));
+    TransportContext context = new TransportContext(conf, handler);
+    this.server = context.createServer();
+  }
+
+  @After
+  public void afterEach() {
+    if (server != null) {
+      server.close();
+      server = null;
+    }
+  }
+
+  @Test
+  public void testValid() throws IOException {
+    validate("my-app-id", "secret");
+  }
+
+  @Test
+  public void testBadAppId() {
+    try {
+      validate("wrong-app-id", "secret");
+    } catch (Exception e) {
+      assertTrue(e.getMessage(), e.getMessage().contains("Wrong appId!"));
+    }
+  }
+
+  @Test
+  public void testBadSecret() {
+    try {
+      validate("my-app-id", "bad-secret");
+    } catch (Exception e) {
+      assertTrue(e.getMessage(), e.getMessage().contains("Mismatched response"));
+    }
+  }
+
+  /** Creates an ExternalShuffleClient and attempts to register with the server. */
+  private void validate(String appId, String secretKey) throws IOException {
+    ExternalShuffleClient client =
+      new ExternalShuffleClient(conf, new TestSecretKeyHolder(appId, secretKey), true);
+    client.init(appId);
+    // Registration either succeeds or throws an exception.
+    client.registerWithShuffleServer(TestUtils.getLocalHost(), server.getPort(), "exec0",
+      new ExecutorShuffleInfo(new String[0], 0, ""));
+    client.close();
+  }
+
+  /** Provides a secret key holder which always returns the given secret key, for a single appId. */
+  static class TestSecretKeyHolder implements SecretKeyHolder {
+    private final String appId;
+    private final String secretKey;
+
+    TestSecretKeyHolder(String appId, String secretKey) {
+      this.appId = appId;
+      this.secretKey = secretKey;
+    }
+
+    @Override
+    public String getSaslUser(String appId) {
+      return "user";
+    }
+
+    @Override
+    public String getSecretKey(String appId) {
+      if (!appId.equals(this.appId)) {
+        throw new IllegalArgumentException("Wrong appId!");
+      }
+      return secretKey;
+    }
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
new file mode 100644
index 0000000000000..842741e3d354f
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import com.google.common.collect.Maps;
+import io.netty.buffer.Unpooled;
+import org.junit.Test;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.*;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NettyManagedBuffer;
+import org.apache.spark.network.buffer.NioManagedBuffer;
+import org.apache.spark.network.client.ChunkReceivedCallback;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage;
+import org.apache.spark.network.shuffle.protocol.OpenBlocks;
+import org.apache.spark.network.shuffle.protocol.StreamHandle;
+
+public class OneForOneBlockFetcherSuite {
+  @Test
+  public void testFetchOne() {
+    LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
+    blocks.put("shuffle_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
+
+    BlockFetchingListener listener = fetchBlocks(blocks);
+
+    verify(listener).onBlockFetchSuccess("shuffle_0_0_0", blocks.get("shuffle_0_0_0"));
+  }
+
+  @Test
+  public void testFetchThree() {
+    LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
+    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
+    blocks.put("b1", new NioManagedBuffer(ByteBuffer.wrap(new byte[23])));
+    blocks.put("b2", new NettyManagedBuffer(Unpooled.wrappedBuffer(new byte[23])));
+
+    BlockFetchingListener listener = fetchBlocks(blocks);
+
+    for (int i = 0; i < 3; i ++) {
+      verify(listener, times(1)).onBlockFetchSuccess("b" + i, blocks.get("b" + i));
+    }
+  }
+
+  @Test
+  public void testFailure() {
+    LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
+    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
+    blocks.put("b1", null);
+    blocks.put("b2", null);
+
+    BlockFetchingListener listener = fetchBlocks(blocks);
+
+    // Each failure will cause a failure to be invoked in all remaining block fetches.
+    verify(listener, times(1)).onBlockFetchSuccess("b0", blocks.get("b0"));
+    verify(listener, times(1)).onBlockFetchFailure(eq("b1"), (Throwable) any());
+    verify(listener, times(2)).onBlockFetchFailure(eq("b2"), (Throwable) any());
+  }
+
+  @Test
+  public void testFailureAndSuccess() {
+    LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
+    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
+    blocks.put("b1", null);
+    blocks.put("b2", new NioManagedBuffer(ByteBuffer.wrap(new byte[21])));
+
+    BlockFetchingListener listener = fetchBlocks(blocks);
+
+    // We may call both success and failure for the same block.
+    verify(listener, times(1)).onBlockFetchSuccess("b0", blocks.get("b0"));
+    verify(listener, times(1)).onBlockFetchFailure(eq("b1"), (Throwable) any());
+    verify(listener, times(1)).onBlockFetchSuccess("b2", blocks.get("b2"));
+    verify(listener, times(1)).onBlockFetchFailure(eq("b2"), (Throwable) any());
+  }
+
+  @Test
+  public void testEmptyBlockFetch() {
+    try {
+      fetchBlocks(Maps.<String, ManagedBuffer>newLinkedHashMap());
+      fail();
+    } catch (IllegalArgumentException e) {
+      assertEquals("Zero-sized blockIds array", e.getMessage());
+    }
+  }
+
+  /**
+   * Begins a fetch on the given set of blocks by mocking out the server side of the RPC which
+   * simply returns the given (BlockId, Block) pairs.
+   * As "blocks" is a LinkedHashMap, the blocks are guaranteed to be returned in the same order
+   * that they were inserted in.
+   *
+   * If a block's buffer is "null", an exception will be thrown instead.
+   */
+  private BlockFetchingListener fetchBlocks(final LinkedHashMap<String, ManagedBuffer> blocks) {
+    TransportClient client = mock(TransportClient.class);
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+    final String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
+    OneForOneBlockFetcher fetcher =
+      new OneForOneBlockFetcher(client, "app-id", "exec-id", blockIds, listener);
+
+    // Respond to the "OpenBlocks" message with an appropirate ShuffleStreamHandle with streamId 123
+    doAnswer(new Answer<Void>() {
+      @Override
+      public Void answer(InvocationOnMock invocationOnMock) throws Throwable {
+        BlockTransferMessage message = BlockTransferMessage.Decoder.fromByteArray(
+          (byte[]) invocationOnMock.getArguments()[0]);
+        RpcResponseCallback callback = (RpcResponseCallback) invocationOnMock.getArguments()[1];
+        callback.onSuccess(new StreamHandle(123, blocks.size()).toByteArray());
+        assertEquals(new OpenBlocks("app-id", "exec-id", blockIds), message);
+        return null;
+      }
+    }).when(client).sendRpc((byte[]) any(), (RpcResponseCallback) any());
+
+    // Respond to each chunk request with a single buffer from our blocks array.
+    final AtomicInteger expectedChunkIndex = new AtomicInteger(0);
+    final Iterator<ManagedBuffer> blockIterator = blocks.values().iterator();
+    doAnswer(new Answer<Void>() {
+      @Override
+      public Void answer(InvocationOnMock invocation) throws Throwable {
+        try {
+          long streamId = (Long) invocation.getArguments()[0];
+          int myChunkIndex = (Integer) invocation.getArguments()[1];
+          assertEquals(123, streamId);
+          assertEquals(expectedChunkIndex.getAndIncrement(), myChunkIndex);
+
+          ChunkReceivedCallback callback = (ChunkReceivedCallback) invocation.getArguments()[2];
+          ManagedBuffer result = blockIterator.next();
+          if (result != null) {
+            callback.onSuccess(myChunkIndex, result);
+          } else {
+            callback.onFailure(myChunkIndex, new RuntimeException("Failed " + myChunkIndex));
+          }
+        } catch (Exception e) {
+          e.printStackTrace();
+          fail("Unexpected failure");
+        }
+        return null;
+      }
+    }).when(client).fetchChunk(anyLong(), anyInt(), (ChunkReceivedCallback) any());
+
+    fetcher.start();
+    return listener;
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java
new file mode 100644
index 0000000000000..1ad0d72ae5ec5
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java
@@ -0,0 +1,310 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.LinkedHashSet;
+import java.util.Map;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Sets;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+import org.mockito.stubbing.Stubber;
+
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NioManagedBuffer;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+import static org.apache.spark.network.shuffle.RetryingBlockFetcher.BlockFetchStarter;
+
+/**
+ * Tests retry logic by throwing IOExceptions and ensuring that subsequent attempts are made to
+ * fetch the lost blocks.
+ */
+public class RetryingBlockFetcherSuite {
+
+  ManagedBuffer block0 = new NioManagedBuffer(ByteBuffer.wrap(new byte[13]));
+  ManagedBuffer block1 = new NioManagedBuffer(ByteBuffer.wrap(new byte[7]));
+  ManagedBuffer block2 = new NioManagedBuffer(ByteBuffer.wrap(new byte[19]));
+
+  @Before
+  public void beforeEach() {
+    System.setProperty("spark.shuffle.io.maxRetries", "2");
+    System.setProperty("spark.shuffle.io.retryWait", "0");
+  }
+
+  @After
+  public void afterEach() {
+    System.clearProperty("spark.shuffle.io.maxRetries");
+    System.clearProperty("spark.shuffle.io.retryWait");
+  }
+
+  @Test
+  public void testNoFailures() throws IOException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    Map[] interactions = new Map[] {
+      // Immediately return both blocks successfully.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", block0)
+        .put("b1", block1)
+        .build(),
+    };
+
+    performInteractions(interactions, listener);
+
+    verify(listener).onBlockFetchSuccess("b0", block0);
+    verify(listener).onBlockFetchSuccess("b1", block1);
+    verifyNoMoreInteractions(listener);
+  }
+
+  @Test
+  public void testUnrecoverableFailure() throws IOException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    Map[] interactions = new Map[] {
+      // b0 throws a non-IOException error, so it will be failed without retry.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", new RuntimeException("Ouch!"))
+        .put("b1", block1)
+        .build(),
+    };
+
+    performInteractions(interactions, listener);
+
+    verify(listener).onBlockFetchFailure(eq("b0"), (Throwable) any());
+    verify(listener).onBlockFetchSuccess("b1", block1);
+    verifyNoMoreInteractions(listener);
+  }
+
+  @Test
+  public void testSingleIOExceptionOnFirst() throws IOException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    Map[] interactions = new Map[] {
+      // IOException will cause a retry. Since b0 fails, we will retry both.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", new IOException("Connection failed or something"))
+        .put("b1", block1)
+        .build(),
+      ImmutableMap.<String, Object>builder()
+        .put("b0", block0)
+        .put("b1", block1)
+        .build(),
+    };
+
+    performInteractions(interactions, listener);
+
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b0", block0);
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b1", block1);
+    verifyNoMoreInteractions(listener);
+  }
+
+  @Test
+  public void testSingleIOExceptionOnSecond() throws IOException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    Map[] interactions = new Map[] {
+      // IOException will cause a retry. Since b1 fails, we will not retry b0.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", block0)
+        .put("b1", new IOException("Connection failed or something"))
+        .build(),
+      ImmutableMap.<String, Object>builder()
+        .put("b1", block1)
+        .build(),
+    };
+
+    performInteractions(interactions, listener);
+
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b0", block0);
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b1", block1);
+    verifyNoMoreInteractions(listener);
+  }
+
+  @Test
+  public void testTwoIOExceptions() throws IOException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    Map[] interactions = new Map[] {
+      // b0's IOException will trigger retry, b1's will be ignored.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", new IOException())
+        .put("b1", new IOException())
+        .build(),
+      // Next, b0 is successful and b1 errors again, so we just request that one.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", block0)
+        .put("b1", new IOException())
+        .build(),
+      // b1 returns successfully within 2 retries.
+      ImmutableMap.<String, Object>builder()
+        .put("b1", block1)
+        .build(),
+    };
+
+    performInteractions(interactions, listener);
+
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b0", block0);
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b1", block1);
+    verifyNoMoreInteractions(listener);
+  }
+
+  @Test
+  public void testThreeIOExceptions() throws IOException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    Map[] interactions = new Map[] {
+      // b0's IOException will trigger retry, b1's will be ignored.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", new IOException())
+        .put("b1", new IOException())
+        .build(),
+      // Next, b0 is successful and b1 errors again, so we just request that one.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", block0)
+        .put("b1", new IOException())
+        .build(),
+      // b1 errors again, but this was the last retry
+      ImmutableMap.<String, Object>builder()
+        .put("b1", new IOException())
+        .build(),
+      // This is not reached -- b1 has failed.
+      ImmutableMap.<String, Object>builder()
+        .put("b1", block1)
+        .build(),
+    };
+
+    performInteractions(interactions, listener);
+
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b0", block0);
+    verify(listener, timeout(5000)).onBlockFetchFailure(eq("b1"), (Throwable) any());
+    verifyNoMoreInteractions(listener);
+  }
+
+  @Test
+  public void testRetryAndUnrecoverable() throws IOException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    Map[] interactions = new Map[] {
+      // b0's IOException will trigger retry, subsequent messages will be ignored.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", new IOException())
+        .put("b1", new RuntimeException())
+        .put("b2", block2)
+        .build(),
+      // Next, b0 is successful, b1 errors unrecoverably, and b2 triggers a retry.
+      ImmutableMap.<String, Object>builder()
+        .put("b0", block0)
+        .put("b1", new RuntimeException())
+        .put("b2", new IOException())
+        .build(),
+      // b2 succeeds in its last retry.
+      ImmutableMap.<String, Object>builder()
+        .put("b2", block2)
+        .build(),
+    };
+
+    performInteractions(interactions, listener);
+
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b0", block0);
+    verify(listener, timeout(5000)).onBlockFetchFailure(eq("b1"), (Throwable) any());
+    verify(listener, timeout(5000)).onBlockFetchSuccess("b2", block2);
+    verifyNoMoreInteractions(listener);
+  }
+
+  /**
+   * Performs a set of interactions in response to block requests from a RetryingBlockFetcher.
+   * Each interaction is a Map from BlockId to either ManagedBuffer or Exception. This interaction
+   * means "respond to the next block fetch request with these Successful buffers and these Failure
+   * exceptions". We verify that the expected block ids are exactly the ones requested.
+   *
+   * If multiple interactions are supplied, they will be used in order. This is useful for encoding
+   * retries -- the first interaction may include an IOException, which causes a retry of some
+   * subset of the original blocks in a second interaction.
+   */
+  @SuppressWarnings("unchecked")
+  private void performInteractions(final Map[] interactions, BlockFetchingListener listener)
+    throws IOException {
+
+    TransportConf conf = new TransportConf(new SystemPropertyConfigProvider());
+    BlockFetchStarter fetchStarter = mock(BlockFetchStarter.class);
+
+    Stubber stub = null;
+
+    // Contains all blockIds that are referenced across all interactions.
+    final LinkedHashSet<String> blockIds = Sets.newLinkedHashSet();
+
+    for (final Map<String, Object> interaction : interactions) {
+      blockIds.addAll(interaction.keySet());
+
+      Answer<Void> answer = new Answer<Void>() {
+        @Override
+        public Void answer(InvocationOnMock invocationOnMock) throws Throwable {
+          try {
+            // Verify that the RetryingBlockFetcher requested the expected blocks.
+            String[] requestedBlockIds = (String[]) invocationOnMock.getArguments()[0];
+            String[] desiredBlockIds = interaction.keySet().toArray(new String[interaction.size()]);
+            assertArrayEquals(desiredBlockIds, requestedBlockIds);
+
+            // Now actually invoke the success/failure callbacks on each block.
+            BlockFetchingListener retryListener =
+              (BlockFetchingListener) invocationOnMock.getArguments()[1];
+            for (Map.Entry<String, Object> block : interaction.entrySet()) {
+              String blockId = block.getKey();
+              Object blockValue = block.getValue();
+
+              if (blockValue instanceof ManagedBuffer) {
+                retryListener.onBlockFetchSuccess(blockId, (ManagedBuffer) blockValue);
+              } else if (blockValue instanceof Exception) {
+                retryListener.onBlockFetchFailure(blockId, (Exception) blockValue);
+              } else {
+                fail("Can only handle ManagedBuffers and Exceptions, got " + blockValue);
+              }
+            }
+            return null;
+          } catch (Throwable e) {
+            e.printStackTrace();
+            throw e;
+          }
+        }
+      };
+
+      // This is either the first stub, or should be chained behind the prior ones.
+      if (stub == null) {
+        stub = doAnswer(answer);
+      } else {
+        stub.doAnswer(answer);
+      }
+    }
+
+    assert stub != null;
+    stub.when(fetchStarter).createAndStart((String[]) any(), (BlockFetchingListener) anyObject());
+    String[] blockIdArray = blockIds.toArray(new String[blockIds.size()]);
+    new RetryingBlockFetcher(conf, fetchStarter, blockIdArray, listener).start();
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
new file mode 100644
index 0000000000000..76639114df5d9
--- /dev/null
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import com.google.common.io.Files;
+
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+
+/**
+ * Manages some sort- and hash-based shuffle data, including the creation
+ * and cleanup of directories that can be read by the {@link ExternalShuffleBlockManager}.
+ */
+public class TestShuffleDataContext {
+  public final String[] localDirs;
+  public final int subDirsPerLocalDir;
+
+  public TestShuffleDataContext(int numLocalDirs, int subDirsPerLocalDir) {
+    this.localDirs = new String[numLocalDirs];
+    this.subDirsPerLocalDir = subDirsPerLocalDir;
+  }
+
+  public void create() {
+    for (int i = 0; i < localDirs.length; i ++) {
+      localDirs[i] = Files.createTempDir().getAbsolutePath();
+
+      for (int p = 0; p < subDirsPerLocalDir; p ++) {
+        new File(localDirs[i], String.format("%02x", p)).mkdirs();
+      }
+    }
+  }
+
+  public void cleanup() {
+    for (String localDir : localDirs) {
+      deleteRecursively(new File(localDir));
+    }
+  }
+
+  /** Creates reducer blocks in a sort-based data format within our local dirs. */
+  public void insertSortShuffleData(int shuffleId, int mapId, byte[][] blocks) throws IOException {
+    String blockId = "shuffle_" + shuffleId + "_" + mapId + "_0";
+
+    OutputStream dataStream = new FileOutputStream(
+      ExternalShuffleBlockManager.getFile(localDirs, subDirsPerLocalDir, blockId + ".data"));
+    DataOutputStream indexStream = new DataOutputStream(new FileOutputStream(
+      ExternalShuffleBlockManager.getFile(localDirs, subDirsPerLocalDir, blockId + ".index")));
+
+    long offset = 0;
+    indexStream.writeLong(offset);
+    for (byte[] block : blocks) {
+      offset += block.length;
+      dataStream.write(block);
+      indexStream.writeLong(offset);
+    }
+
+    dataStream.close();
+    indexStream.close();
+  }
+
+  /** Creates reducer blocks in a hash-based data format within our local dirs. */
+  public void insertHashShuffleData(int shuffleId, int mapId, byte[][] blocks) throws IOException {
+    for (int i = 0; i < blocks.length; i ++) {
+      String blockId = "shuffle_" + shuffleId + "_" + mapId + "_" + i;
+      Files.write(blocks[i],
+        ExternalShuffleBlockManager.getFile(localDirs, subDirsPerLocalDir, blockId));
+    }
+  }
+
+  /**
+   * Creates an ExecutorShuffleInfo object based on the given shuffle manager which targets this
+   * context's directories.
+   */
+  public ExecutorShuffleInfo createExecutorInfo(String shuffleManager) {
+    return new ExecutorShuffleInfo(localDirs, subDirsPerLocalDir, shuffleManager);
+  }
+
+  private static void deleteRecursively(File f) {
+    assert f != null;
+    if (f.isDirectory()) {
+      File[] children = f.listFiles();
+      if (children != null) {
+        for (File child : children) {
+          deleteRecursively(child);
+        }
+      }
+    }
+    f.delete();
+  }
+}
diff --git a/network/yarn/pom.xml b/network/yarn/pom.xml
new file mode 100644
index 0000000000000..acec8f18f2b5c
--- /dev/null
+++ b/network/yarn/pom.xml
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>1.3.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-network-yarn_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project YARN Shuffle Service</name>
+  <url>http://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>network-yarn</sbt.project.name>
+  </properties>
+
+  <dependencies>
+    <!-- Core dependencies -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-network-shuffle_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <!-- Provided dependencies -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <scope>provided</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <configuration>
+          <shadedArtifactAttached>false</shadedArtifactAttached>
+          <outputFile>${project.build.directory}/scala-${scala.binary.version}/spark-${project.version}-yarn-shuffle.jar</outputFile>
+          <artifactSet>
+            <includes>
+              <include>*:*</include>
+            </includes>
+          </artifactSet>
+          <filters>
+            <filter>
+              <artifact>*:*</artifact>
+              <excludes>
+                <exclude>META-INF/*.SF</exclude>
+                <exclude>META-INF/*.DSA</exclude>
+                <exclude>META-INF/*.RSA</exclude>
+              </excludes>
+            </filter>
+          </filters>
+        </configuration>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/network/yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/network/yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
new file mode 100644
index 0000000000000..a34aabe9e78a6
--- /dev/null
+++ b/network/yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.yarn;
+
+import java.lang.Override;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.server.api.AuxiliaryService;
+import org.apache.hadoop.yarn.server.api.ApplicationInitializationContext;
+import org.apache.hadoop.yarn.server.api.ApplicationTerminationContext;
+import org.apache.hadoop.yarn.server.api.ContainerInitializationContext;
+import org.apache.hadoop.yarn.server.api.ContainerTerminationContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.TransportContext;
+import org.apache.spark.network.sasl.SaslRpcHandler;
+import org.apache.spark.network.sasl.ShuffleSecretManager;
+import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.shuffle.ExternalShuffleBlockHandler;
+import org.apache.spark.network.util.TransportConf;
+import org.apache.spark.network.yarn.util.HadoopConfigProvider;
+
+/**
+ * An external shuffle service used by Spark on Yarn.
+ *
+ * This is intended to be a long-running auxiliary service that runs in the NodeManager process.
+ * A Spark application may connect to this service by setting `spark.shuffle.service.enabled`.
+ * The application also automatically derives the service port through `spark.shuffle.service.port`
+ * specified in the Yarn configuration. This is so that both the clients and the server agree on
+ * the same port to communicate on.
+ *
+ * The service also optionally supports authentication. This ensures that executors from one
+ * application cannot read the shuffle files written by those from another. This feature can be
+ * enabled by setting `spark.authenticate` in the Yarn configuration before starting the NM.
+ * Note that the Spark application must also set `spark.authenticate` manually and, unlike in
+ * the case of the service port, will not inherit this setting from the Yarn configuration. This
+ * is because an application running on the same Yarn cluster may choose to not use the external
+ * shuffle service, in which case its setting of `spark.authenticate` should be independent of
+ * the service's.
+ */
+public class YarnShuffleService extends AuxiliaryService {
+  private final Logger logger = LoggerFactory.getLogger(YarnShuffleService.class);
+
+  // Port on which the shuffle server listens for fetch requests
+  private static final String SPARK_SHUFFLE_SERVICE_PORT_KEY = "spark.shuffle.service.port";
+  private static final int DEFAULT_SPARK_SHUFFLE_SERVICE_PORT = 7337;
+
+  // Whether the shuffle server should authenticate fetch requests
+  private static final String SPARK_AUTHENTICATE_KEY = "spark.authenticate";
+  private static final boolean DEFAULT_SPARK_AUTHENTICATE = false;
+
+  // An entity that manages the shuffle secret per application
+  // This is used only if authentication is enabled
+  private ShuffleSecretManager secretManager;
+
+  // The actual server that serves shuffle files
+  private TransportServer shuffleServer = null;
+
+  public YarnShuffleService() {
+    super("spark_shuffle");
+    logger.info("Initializing YARN shuffle service for Spark");
+  }
+
+  /**
+   * Return whether authentication is enabled as specified by the configuration.
+   * If so, fetch requests will fail unless the appropriate authentication secret
+   * for the application is provided.
+   */
+  private boolean isAuthenticationEnabled() {
+    return secretManager != null;
+  }
+
+  /**
+   * Start the shuffle server with the given configuration.
+   */
+  @Override
+  protected void serviceInit(Configuration conf) {
+    TransportConf transportConf = new TransportConf(new HadoopConfigProvider(conf));
+    // If authentication is enabled, set up the shuffle server to use a
+    // special RPC handler that filters out unauthenticated fetch requests
+    boolean authEnabled = conf.getBoolean(SPARK_AUTHENTICATE_KEY, DEFAULT_SPARK_AUTHENTICATE);
+    RpcHandler rpcHandler = new ExternalShuffleBlockHandler(transportConf);
+    if (authEnabled) {
+      secretManager = new ShuffleSecretManager();
+      rpcHandler = new SaslRpcHandler(rpcHandler, secretManager);
+    }
+
+    int port = conf.getInt(
+      SPARK_SHUFFLE_SERVICE_PORT_KEY, DEFAULT_SPARK_SHUFFLE_SERVICE_PORT);
+    TransportContext transportContext = new TransportContext(transportConf, rpcHandler);
+    shuffleServer = transportContext.createServer(port);
+    String authEnabledString = authEnabled ? "enabled" : "not enabled";
+    logger.info("Started YARN shuffle service for Spark on port {}. " +
+      "Authentication is {}.", port, authEnabledString);
+  }
+
+  @Override
+  public void initializeApplication(ApplicationInitializationContext context) {
+    String appId = context.getApplicationId().toString();
+    try {
+      ByteBuffer shuffleSecret = context.getApplicationDataForService();
+      logger.info("Initializing application {}", appId);
+      if (isAuthenticationEnabled()) {
+        secretManager.registerApp(appId, shuffleSecret);
+      }
+    } catch (Exception e) {
+      logger.error("Exception when initializing application {}", appId, e);
+    }
+  }
+
+  @Override
+  public void stopApplication(ApplicationTerminationContext context) {
+    String appId = context.getApplicationId().toString();
+    try {
+      logger.info("Stopping application {}", appId);
+      if (isAuthenticationEnabled()) {
+        secretManager.unregisterApp(appId);
+      }
+    } catch (Exception e) {
+      logger.error("Exception when stopping application {}", appId, e);
+    }
+  }
+
+  @Override
+  public void initializeContainer(ContainerInitializationContext context) {
+    ContainerId containerId = context.getContainerId();
+    logger.info("Initializing container {}", containerId);
+  }
+
+  @Override
+  public void stopContainer(ContainerTerminationContext context) {
+    ContainerId containerId = context.getContainerId();
+    logger.info("Stopping container {}", containerId);
+  }
+
+  /**
+   * Close the shuffle server to clean up any associated state.
+   */
+  @Override
+  protected void serviceStop() {
+    try {
+      if (shuffleServer != null) {
+        shuffleServer.close();
+      }
+    } catch (Exception e) {
+      logger.error("Exception when stopping service", e);
+    }
+  }
+
+  // Not currently used
+  @Override
+  public ByteBuffer getMetaData() {
+    return ByteBuffer.allocate(0);
+  }
+
+}
diff --git a/network/yarn/src/main/java/org/apache/spark/network/yarn/util/HadoopConfigProvider.java b/network/yarn/src/main/java/org/apache/spark/network/yarn/util/HadoopConfigProvider.java
new file mode 100644
index 0000000000000..884861752e80d
--- /dev/null
+++ b/network/yarn/src/main/java/org/apache/spark/network/yarn/util/HadoopConfigProvider.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.yarn.util;
+
+import java.util.NoSuchElementException;
+
+import org.apache.hadoop.conf.Configuration;
+
+import org.apache.spark.network.util.ConfigProvider;
+
+/** Use the Hadoop configuration to obtain config values. */
+public class HadoopConfigProvider extends ConfigProvider {
+  private final Configuration conf;
+
+  public HadoopConfigProvider(Configuration conf) {
+    this.conf = conf;
+  }
+
+  @Override
+  public String get(String name) {
+    String value = conf.get(name);
+    if (value == null) {
+      throw new NoSuchElementException(name);
+    }
+    return value;
+  }
+}
diff --git a/pom.xml b/pom.xml
index 687cc6352d5c1..d324b5f0ec93a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent</artifactId>
-  <version>1.2.0-SNAPSHOT</version>
+  <version>1.3.0-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
@@ -91,57 +91,94 @@
     <module>graphx</module>
     <module>mllib</module>
     <module>tools</module>
+    <module>network/common</module>
+    <module>network/shuffle</module>
     <module>streaming</module>
     <module>sql/catalyst</module>
     <module>sql/core</module>
     <module>sql/hive</module>
-    <module>repl</module>
     <module>assembly</module>
     <module>external/twitter</module>
-    <module>external/kafka</module>
     <module>external/flume</module>
     <module>external/flume-sink</module>
-    <module>external/zeromq</module>
     <module>external/mqtt</module>
+    <module>external/zeromq</module>
     <module>examples</module>
+    <module>repl</module>
   </modules>
 
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
-
+    <akka.group>org.spark-project.akka</akka.group>
+    <akka.version>2.3.4-spark</akka.version>
     <java.version>1.6</java.version>
     <sbt.project.name>spark</sbt.project.name>
-    <scala.version>2.10.4</scala.version>
-    <scala.binary.version>2.10</scala.binary.version>
     <scala.macros.version>2.0.1</scala.macros.version>
-    <mesos.version>0.18.1</mesos.version>
+    <mesos.version>0.21.0</mesos.version>
     <mesos.classifier>shaded-protobuf</mesos.classifier>
-    <akka.group>org.spark-project.akka</akka.group>
-    <akka.version>2.3.4-spark</akka.version>
-    <slf4j.version>1.7.5</slf4j.version>
+    <slf4j.version>1.7.10</slf4j.version>
     <log4j.version>1.2.17</log4j.version>
     <hadoop.version>1.0.4</hadoop.version>
     <protobuf.version>2.4.1</protobuf.version>
     <yarn.version>${hadoop.version}</yarn.version>
-    <hbase.version>0.94.6</hbase.version>
+    <hbase.version>0.98.7-hadoop1</hbase.version>
+    <hbase.artifact>hbase</hbase.artifact>
     <flume.version>1.4.0</flume.version>
     <zookeeper.version>3.4.5</zookeeper.version>
-    <hive.version>0.12.0-protobuf-2.5</hive.version>
-    <parquet.version>1.4.3</parquet.version>
+    <hive.group>org.spark-project.hive</hive.group>
+    <!-- Version used in Maven Hive dependency -->
+    <hive.version>0.13.1a</hive.version>
+    <!-- Version used for internal directory structure -->
+    <hive.version.short>0.13.1</hive.version.short>
+    <derby.version>10.10.1.1</derby.version>
+    <parquet.version>1.6.0rc3</parquet.version>
     <jblas.version>1.2.3</jblas.version>
     <jetty.version>8.1.14.v20131031</jetty.version>
-    <chill.version>0.3.6</chill.version>
-    <codahale.metrics.version>3.0.0</codahale.metrics.version>
+    <chill.version>0.5.0</chill.version>
+    <kryo.version>2.24.0</kryo.version>
+    <ivy.version>2.4.0</ivy.version>
+    <oro.version>2.0.8</oro.version>
+    <codahale.metrics.version>3.1.0</codahale.metrics.version>
     <avro.version>1.7.6</avro.version>
     <avro.mapred.classifier></avro.mapred.classifier>
     <jets3t.version>0.7.1</jets3t.version>
     <aws.java.sdk.version>1.8.3</aws.java.sdk.version>
     <aws.kinesis.client.version>1.1.0</aws.kinesis.client.version>
     <commons.httpclient.version>4.2.6</commons.httpclient.version>
+    <commons.math3.version>3.1.1</commons.math3.version>
+    <test_classpath_file>${project.build.directory}/spark-test-classpath.txt</test_classpath_file>
+    <scala.version>2.10.4</scala.version>
+    <scala.binary.version>2.10</scala.binary.version>
+    <jline.version>${scala.version}</jline.version>
+    <jline.groupid>org.scala-lang</jline.groupid>
+    <jodd.version>3.6.3</jodd.version>
+    <codehaus.jackson.version>1.8.8</codehaus.jackson.version>
+    <snappy.version>1.1.1.6</snappy.version>
+
+    <!--
+      Dependency scopes that can be overridden by enabling certain profiles. These profiles are
+      declared in the projects that build assemblies.
+
+      For other projects the scope should remain as "compile", otherwise they are not available
+      during compilation if the dependency is transivite (e.g. "bagel/" depending on "core/" and
+      needing Hadoop classes in the classpath to compile).
+    -->
+    <flume.deps.scope>compile</flume.deps.scope>
+    <hadoop.deps.scope>compile</hadoop.deps.scope>
+    <hbase.deps.scope>compile</hbase.deps.scope>
+    <hive.deps.scope>compile</hive.deps.scope>
+    <parquet.deps.scope>compile</parquet.deps.scope>
+
+    <!--
+      Overridable test home. So that you can call individual pom files directory without
+      things breaking.
+    -->
+    <spark.test.home>${session.executionRootDirectory}</spark.test.home>
 
     <PermGen>64m</PermGen>
     <MaxPermGen>512m</MaxPermGen>
+    <CodeCacheSize>512m</CodeCacheSize>
   </properties>
 
   <repositories>
@@ -223,18 +260,6 @@
         <enabled>false</enabled>
       </snapshots>
     </repository>
-    <repository>
-      <!-- This is temporarily included to fix issues with Hive 0.12 -->
-      <id>spark-staging</id>
-      <name>Spring Staging Repository</name>
-      <url>https://oss.sonatype.org/content/repositories/orgspark-project-1085</url>
-      <releases>
-        <enabled>true</enabled>
-      </releases>
-      <snapshots>
-        <enabled>false</enabled>
-      </snapshots>
-    </repository>
   </repositories>
   <pluginRepositories>
     <pluginRepository>
@@ -248,38 +273,125 @@
       </snapshots>
     </pluginRepository>
   </pluginRepositories>
-  <!-- 
-       This is a dummy dependency that is used along with the shading plug-in
-       to create effective poms on publishing (see SPARK-3812).
-  -->
   <dependencies>
+    <!--
+      This is a dummy dependency that is used along with the shading plug-in
+      to create effective poms on publishing (see SPARK-3812).
+    -->
     <dependency>
       <groupId>org.spark-project.spark</groupId>
       <artifactId>unused</artifactId>
       <version>1.0.0</version>
     </dependency>
+    <!--
+      This depndency has been added to provided scope as it is needed for executing build
+      specific groovy scripts using gmaven+ and not required for downstream project building
+      with spark.
+    -->
+    <dependency>
+      <groupId>org.codehaus.groovy</groupId>
+      <artifactId>groovy-all</artifactId>
+      <version>2.3.7</version>
+      <scope>provided</scope>
+    </dependency>
+    <!--
+         This is needed by the scalatest plugin, and so is declared here to be available in
+         all child modules, just as scalatest is run in all children
+    -->
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
   <dependencyManagement>
     <dependencies>
+      <dependency>
+        <groupId>${jline.groupid}</groupId>
+        <artifactId>jline</artifactId>
+        <version>${jline.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.twitter</groupId>
+        <artifactId>chill_${scala.binary.version}</artifactId>
+        <version>${chill.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm-commons</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>com.twitter</groupId>
+        <artifactId>chill-java</artifactId>
+        <version>${chill.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm-commons</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <!-- Bump kryo version (included via chill) due to SPARK-5607 -->
+      <dependency>
+        <groupId>com.esotericsoftware.kryo</groupId>
+        <artifactId>kryo</artifactId>
+        <version>${kryo.version}</version>
+      </dependency>
+
+      <!-- Shaded deps marked as provided. These are promoted to compile scope
+           in the modules where we want the shaded classes to appear in the
+           associated jar. -->
+      <dependency>
+        <groupId>org.eclipse.jetty</groupId>
+        <artifactId>jetty-http</artifactId>
+        <version>${jetty.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.jetty</groupId>
+        <artifactId>jetty-continuation</artifactId>
+        <version>${jetty.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.jetty</groupId>
+        <artifactId>jetty-servlet</artifactId>
+        <version>${jetty.version}</version>
+        <scope>provided</scope>
+      </dependency>
       <dependency>
         <groupId>org.eclipse.jetty</groupId>
         <artifactId>jetty-util</artifactId>
         <version>${jetty.version}</version>
+        <scope>provided</scope>
       </dependency>
       <dependency>
         <groupId>org.eclipse.jetty</groupId>
         <artifactId>jetty-security</artifactId>
         <version>${jetty.version}</version>
+        <scope>provided</scope>
       </dependency>
       <dependency>
         <groupId>org.eclipse.jetty</groupId>
         <artifactId>jetty-plus</artifactId>
         <version>${jetty.version}</version>
+        <scope>provided</scope>
       </dependency>
       <dependency>
         <groupId>org.eclipse.jetty</groupId>
         <artifactId>jetty-server</artifactId>
         <version>${jetty.version}</version>
+        <scope>provided</scope>
       </dependency>
       <dependency>
         <groupId>com.google.guava</groupId>
@@ -287,6 +399,8 @@
         <version>14.0.1</version>
         <scope>provided</scope>
       </dependency>
+      <!-- End of shaded deps -->
+ 
       <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-lang3</artifactId>
@@ -300,23 +414,30 @@
       <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-math3</artifactId>
-        <version>3.3</version>
-        <scope>test</scope>
+        <version>${commons.math3.version}</version>
       </dependency>
       <dependency>
         <groupId>com.google.code.findbugs</groupId>
         <artifactId>jsr305</artifactId>
         <version>1.3.9</version>
       </dependency>
+      <dependency>
+        <groupId>org.seleniumhq.selenium</groupId>
+        <artifactId>selenium-java</artifactId>
+        <version>2.42.2</version>
+        <scope>test</scope>
+      </dependency>
       <dependency>
         <groupId>org.slf4j</groupId>
         <artifactId>slf4j-api</artifactId>
         <version>${slf4j.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
       </dependency>
       <dependency>
         <groupId>org.slf4j</groupId>
         <artifactId>slf4j-log4j12</artifactId>
         <version>${slf4j.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
       </dependency>
       <dependency>
         <groupId>org.slf4j</groupId>
@@ -333,6 +454,7 @@
         <groupId>log4j</groupId>
         <artifactId>log4j</artifactId>
         <version>${log4j.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
       </dependency>
       <dependency>
         <groupId>com.ning</groupId>
@@ -342,7 +464,8 @@
       <dependency>
         <groupId>org.xerial.snappy</groupId>
         <artifactId>snappy-java</artifactId>
-        <version>1.1.1.3</version>
+        <version>${snappy.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
       </dependency>
       <dependency>
         <groupId>net.jpountz.lz4</groupId>
@@ -370,36 +493,7 @@
         <groupId>com.google.protobuf</groupId>
         <artifactId>protobuf-java</artifactId>
         <version>${protobuf.version}</version>
-      </dependency>
-      <dependency>
-        <groupId>com.twitter</groupId>
-        <artifactId>chill_${scala.binary.version}</artifactId>
-        <version>${chill.version}</version>
-        <exclusions>
-          <exclusion>
-            <groupId>org.ow2.asm</groupId>
-            <artifactId>asm</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.ow2.asm</groupId>
-            <artifactId>asm-commons</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-      <dependency>
-        <groupId>com.twitter</groupId>
-        <artifactId>chill-java</artifactId>
-        <version>${chill.version}</version>
-        <exclusions>
-          <exclusion>
-            <groupId>org.ow2.asm</groupId>
-            <artifactId>asm</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.ow2.asm</groupId>
-            <artifactId>asm-commons</artifactId>
-          </exclusion>
-        </exclusions>
+        <scope>${hadoop.deps.scope}</scope>
       </dependency>
       <dependency>
         <groupId>${akka.group}</groupId>
@@ -422,9 +516,15 @@
         <version>${akka.version}</version>
       </dependency>
       <dependency>
-        <groupId>colt</groupId>
-        <artifactId>colt</artifactId>
-        <version>1.2.0</version>
+        <groupId>${akka.group}</groupId>
+        <artifactId>akka-zeromq_${scala.binary.version}</artifactId>
+        <version>${akka.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>${akka.group}</groupId>
+            <artifactId>akka-actor_${scala.binary.version}</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>org.apache.mesos</groupId>
@@ -438,6 +538,11 @@
           </exclusion>
         </exclusions>
       </dependency>
+      <dependency>
+        <groupId>org.roaringbitmap</groupId>
+        <artifactId>RoaringBitmap</artifactId>
+        <version>0.4.5</version>
+      </dependency>
       <dependency>
         <groupId>commons-net</groupId>
         <artifactId>commons-net</artifactId>
@@ -451,30 +556,30 @@
       <dependency>
         <groupId>org.apache.derby</groupId>
         <artifactId>derby</artifactId>
-        <version>10.4.2.0</version>
+        <version>${derby.version}</version>
       </dependency>
       <dependency>
-        <groupId>com.codahale.metrics</groupId>
+        <groupId>io.dropwizard.metrics</groupId>
         <artifactId>metrics-core</artifactId>
         <version>${codahale.metrics.version}</version>
       </dependency>
       <dependency>
-        <groupId>com.codahale.metrics</groupId>
+        <groupId>io.dropwizard.metrics</groupId>
         <artifactId>metrics-jvm</artifactId>
         <version>${codahale.metrics.version}</version>
       </dependency>
       <dependency>
-        <groupId>com.codahale.metrics</groupId>
+        <groupId>io.dropwizard.metrics</groupId>
         <artifactId>metrics-json</artifactId>
         <version>${codahale.metrics.version}</version>
       </dependency>
       <dependency>
-        <groupId>com.codahale.metrics</groupId>
+        <groupId>io.dropwizard.metrics</groupId>
         <artifactId>metrics-ganglia</artifactId>
         <version>${codahale.metrics.version}</version>
       </dependency>
       <dependency>
-        <groupId>com.codahale.metrics</groupId>
+        <groupId>io.dropwizard.metrics</groupId>
         <artifactId>metrics-graphite</artifactId>
         <version>${codahale.metrics.version}</version>
       </dependency>
@@ -488,11 +593,6 @@
         <artifactId>scala-reflect</artifactId>
         <version>${scala.version}</version>
       </dependency>
-      <dependency>
-        <groupId>org.scala-lang</groupId>
-        <artifactId>jline</artifactId>
-        <version>${scala.version}</version>
-      </dependency>
       <dependency>
         <groupId>org.scala-lang</groupId>
         <artifactId>scala-library</artifactId>
@@ -511,7 +611,7 @@
       <dependency>
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest_${scala.binary.version}</artifactId>
-        <version>2.1.5</version>
+        <version>2.2.1</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -555,6 +655,7 @@
         <groupId>org.apache.curator</groupId>
         <artifactId>curator-recipes</artifactId>
         <version>2.4.0</version>
+        <scope>${hadoop.deps.scope}</scope>
         <exclusions>
           <exclusion>
             <groupId>org.jboss.netty</groupId>
@@ -566,6 +667,7 @@
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-client</artifactId>
         <version>${hadoop.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
         <exclusions>
           <exclusion>
             <groupId>asm</groupId>
@@ -601,11 +703,13 @@
         <groupId>org.apache.avro</groupId>
         <artifactId>avro</artifactId>
         <version>${avro.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
       </dependency>
       <dependency>
         <groupId>org.apache.avro</groupId>
         <artifactId>avro-ipc</artifactId>
         <version>${avro.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
         <exclusions>
           <exclusion>
             <groupId>io.netty</groupId>
@@ -634,6 +738,7 @@
         <artifactId>avro-mapred</artifactId>
         <version>${avro.version}</version>
         <classifier>${avro.mapred.classifier}</classifier>
+        <scope>${hive.deps.scope}</scope>
         <exclusions>
           <exclusion>
             <groupId>io.netty</groupId>
@@ -662,6 +767,7 @@
         <groupId>net.java.dev.jets3t</groupId>
         <artifactId>jets3t</artifactId>
         <version>${jets3t.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
         <exclusions>
           <exclusion>
             <groupId>commons-logging</groupId>
@@ -673,6 +779,7 @@
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-yarn-api</artifactId>
         <version>${yarn.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
         <exclusions>
           <exclusion>
             <groupId>javax.servlet</groupId>
@@ -700,6 +807,7 @@
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-yarn-common</artifactId>
         <version>${yarn.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
         <exclusions>
           <exclusion>
             <groupId>asm</groupId>
@@ -756,6 +864,7 @@
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-yarn-server-web-proxy</artifactId>
         <version>${yarn.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
         <exclusions>
           <exclusion>
             <groupId>asm</groupId>
@@ -783,6 +892,7 @@
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-yarn-client</artifactId>
         <version>${yarn.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
         <exclusions>
           <exclusion>
             <groupId>asm</groupId>
@@ -807,10 +917,126 @@
         </exclusions>
       </dependency>
       <dependency>
-        <!-- Matches the version of jackson-core-asl pulled in by avro -->
+        <groupId>org.apache.zookeeper</groupId>
+        <artifactId>zookeeper</artifactId>
+        <version>${zookeeper.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.codehaus.jackson</groupId>
+        <artifactId>jackson-core-asl</artifactId>
+        <version>${codehaus.jackson.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
+      <dependency>
         <groupId>org.codehaus.jackson</groupId>
         <artifactId>jackson-mapper-asl</artifactId>
-        <version>1.8.8</version>
+        <version>${codehaus.jackson.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>${hive.group}</groupId>
+        <artifactId>hive-beeline</artifactId>
+        <version>${hive.version}</version>
+        <scope>${hive.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>${hive.group}</groupId>
+        <artifactId>hive-cli</artifactId>
+        <version>${hive.version}</version>
+        <scope>${hive.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>${hive.group}</groupId>
+        <artifactId>hive-exec</artifactId>
+        <version>${hive.version}</version>
+        <scope>${hive.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.esotericsoftware.kryo</groupId>
+            <artifactId>kryo</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>${hive.group}</groupId>
+        <artifactId>hive-jdbc</artifactId>
+        <version>${hive.version}</version>
+        <scope>${hive.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>${hive.group}</groupId>
+        <artifactId>hive-metastore</artifactId>
+        <version>${hive.version}</version>
+        <scope>${hive.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>${hive.group}</groupId>
+        <artifactId>hive-serde</artifactId>
+        <version>${hive.version}</version>
+        <scope>${hive.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging-api</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>com.twitter</groupId>
+        <artifactId>parquet-column</artifactId>
+        <version>${parquet.version}</version>
+        <scope>${parquet.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>com.twitter</groupId>
+        <artifactId>parquet-hadoop</artifactId>
+        <version>${parquet.version}</version>
+        <scope>${parquet.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.flume</groupId>
+        <artifactId>flume-ng-core</artifactId>
+        <version>${flume.version}</version>
+        <scope>${flume.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.thrift</groupId>
+            <artifactId>libthrift</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.flume</groupId>
+        <artifactId>flume-ng-sdk</artifactId>
+        <version>${flume.version}</version>
+        <scope>${flume.deps.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.thrift</groupId>
+            <artifactId>libthrift</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
     </dependencies>
   </dependencyManagement>
@@ -887,6 +1113,7 @@
               <jvmArg>-Xmx1024m</jvmArg>
               <jvmArg>-XX:PermSize=${PermGen}</jvmArg>
               <jvmArg>-XX:MaxPermSize=${MaxPermGen}</jvmArg>
+              <jvmArg>-XX:ReservedCodeCacheSize=${CodeCacheSize}</jvmArg>
             </jvmArgs>
             <javacArgs>
               <javacArg>-source</javacArg>
@@ -898,9 +1125,9 @@
                  by Spark SQL for code generation. -->
             <compilerPlugins>
               <compilerPlugin>
-                  <groupId>org.scalamacros</groupId>
-                  <artifactId>paradise_${scala.version}</artifactId>
-                  <version>${scala.macros.version}</version>
+                <groupId>org.scalamacros</groupId>
+                <artifactId>paradise_${scala.version}</artifactId>
+                <version>${scala.macros.version}</version>
               </compilerPlugin>
             </compilerPlugins>
           </configuration>
@@ -917,30 +1144,59 @@
             <fork>true</fork>
           </configuration>
         </plugin>
+        <!-- Surefire runs all Java tests -->
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-surefire-plugin</artifactId>
-          <version>2.17</version>
+          <version>2.18</version>
+          <!-- Note config is repeated in scalatest config -->
           <configuration>
-            <!-- Uses scalatest instead -->
-            <skipTests>true</skipTests>
+            <includes>
+              <include>**/Test*.java</include>
+              <include>**/*Test.java</include>
+              <include>**/*TestCase.java</include>
+              <include>**/*Suite.java</include>
+            </includes>
+            <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
+            <argLine>-Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
+            <systemProperties>
+              <java.awt.headless>true</java.awt.headless>
+              <spark.test.home>${session.executionRootDirectory}</spark.test.home>
+              <spark.testing>1</spark.testing>
+              <spark.ui.enabled>false</spark.ui.enabled>
+              <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
+              <spark.executor.extraClassPath>${test_classpath}</spark.executor.extraClassPath>
+              <spark.driver.allowMultipleContexts>true</spark.driver.allowMultipleContexts>
+            </systemProperties>
+            <failIfNoTests>false</failIfNoTests>
           </configuration>
         </plugin>
+        <!-- Scalatest runs all Scala tests -->
         <plugin>
           <groupId>org.scalatest</groupId>
           <artifactId>scalatest-maven-plugin</artifactId>
           <version>1.0</version>
+          <!-- Note config is repeated in surefire config -->
           <configuration>
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
             <junitxml>.</junitxml>
             <filereports>SparkTestSuite.txt</filereports>
-            <argLine>-Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
+            <argLine>-ea -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
             <stderr/>
+            <environmentVariables>
+              <!--
+                Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
+                launched by the tests have access to the correct test-time classpath.
+              -->
+              <SPARK_DIST_CLASSPATH>${test_classpath}</SPARK_DIST_CLASSPATH>
+            </environmentVariables>
             <systemProperties>
               <java.awt.headless>true</java.awt.headless>
-              <spark.test.home>${session.executionRootDirectory}</spark.test.home>
+              <spark.test.home>${spark.test.home}</spark.test.home>
               <spark.testing>1</spark.testing>
               <spark.ui.enabled>false</spark.ui.enabled>
+              <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
+              <spark.driver.allowMultipleContexts>true</spark.driver.allowMultipleContexts>
             </systemProperties>
           </configuration>
           <executions>
@@ -962,11 +1218,6 @@
           <artifactId>maven-antrun-plugin</artifactId>
           <version>1.7</version>
         </plugin>
-        <plugin>
-          <groupId>org.apache.maven.plugins</groupId>
-          <artifactId>maven-shade-plugin</artifactId>
-          <version>2.2</version>
-        </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-source-plugin</artifactId>
@@ -995,31 +1246,119 @@
               <fileset>
                 <directory>checkpoint</directory>
               </fileset>
+              <fileset>
+                <directory>lib_managed</directory>
+              </fileset>
             </filesets>
           </configuration>
         </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-javadoc-plugin</artifactId>
+          <version>2.10.1</version>
+        </plugin>
       </plugins>
     </pluginManagement>
 
     <plugins>
-      <!-- The shade plug-in is used here to create effective pom's (see SPARK-3812). -->
+      <!-- This plugin dumps the test classpath into a file -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <version>2.9</version>
+        <executions>
+          <execution>
+            <phase>test-compile</phase>
+            <goals>
+              <goal>build-classpath</goal>
+            </goals>
+            <configuration>
+              <includeScope>test</includeScope>
+              <outputFile>${test_classpath_file}</outputFile>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!-- This plugin reads a file into maven property. And it lets us write groovy !! -->
+      <plugin>
+        <groupId>org.codehaus.gmavenplus</groupId>
+        <artifactId>gmavenplus-plugin</artifactId>
+        <version>1.2</version>
+        <executions>
+          <execution>
+            <phase>process-test-classes</phase>
+            <goals>
+              <goal>execute</goal>
+            </goals>
+            <configuration>
+              <scripts>
+                <script><![CDATA[
+                def file = new File(project.properties.test_classpath_file)
+                project.properties.test_classpath = file.getText().split().join(":")
+                ]]></script>
+              </scripts>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <!--
+        The shade plug-in is used here to create effective pom's (see SPARK-3812), and also
+        remove references from the shaded libraries from artifacts published by Spark.
+      -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-shade-plugin</artifactId>
+        <version>2.2</version>
+        <configuration>
+          <shadedArtifactAttached>false</shadedArtifactAttached>
+          <artifactSet>
+            <includes>
+              <!-- At a minimum we must include this to force effective pom generation -->
+              <include>org.spark-project.spark:unused</include>
+
+              <include>org.eclipse.jetty:jetty-io</include>
+              <include>org.eclipse.jetty:jetty-http</include>
+              <include>org.eclipse.jetty:jetty-continuation</include>
+              <include>org.eclipse.jetty:jetty-servlet</include>
+              <include>org.eclipse.jetty:jetty-plus</include>
+              <include>org.eclipse.jetty:jetty-security</include>
+              <include>org.eclipse.jetty:jetty-util</include>
+              <include>org.eclipse.jetty:jetty-server</include>
+              <include>com.google.guava:guava</include>
+            </includes>
+          </artifactSet>
+          <relocations>
+            <relocation>
+              <pattern>org.eclipse.jetty</pattern>
+              <shadedPattern>org.spark-project.jetty</shadedPattern>
+              <includes>
+                <include>org.eclipse.jetty.**</include>
+              </includes>
+            </relocation>
+            <relocation>
+              <pattern>com.google.common</pattern>
+              <shadedPattern>org.spark-project.guava</shadedPattern>
+              <excludes>
+                <!--
+                  These classes cannot be relocated, because the Java API exposes the
+                  "Optional" type; the others are referenced by the Optional class.
+                -->
+                <exclude>com/google/common/base/Absent*</exclude>
+                <exclude>com/google/common/base/Function</exclude>
+                <exclude>com/google/common/base/Optional*</exclude>
+                <exclude>com/google/common/base/Present*</exclude>
+                <exclude>com/google/common/base/Supplier</exclude>
+              </excludes>
+            </relocation>
+          </relocations>
+        </configuration>
         <executions>
           <execution>
             <phase>package</phase>
             <goals>
               <goal>shade</goal>
             </goals>
-            <configuration>
-              <shadedArtifactAttached>false</shadedArtifactAttached>
-              <artifactSet>
-                <includes>
-                  <include>org.spark-project.spark:unused</include>
-                </includes>
-              </artifactSet>
-            </configuration>
           </execution>
         </executions>
       </plugin>
@@ -1089,6 +1428,15 @@
           </execution>
         </executions>
       </plugin>
+      <!-- Enable surefire and scalatest in all children, in one place: -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
     </plugins>
   </build>
 
@@ -1150,8 +1498,31 @@
 
     </profile>
 
+    <profile>
+      <id>doclint-java8-disable</id>
+      <activation>
+        <jdk>[1.8,)</jdk>
+      </activation>
+
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-javadoc-plugin</artifactId>
+            <configuration>
+              <additionalparam>-Xdoclint:all -Xdoclint:-missing</additionalparam>
+            </configuration>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+
     <!-- A series of build profiles where customizations for particular Hadoop releases can be made -->
 
+    <!-- Hadoop-a.b.c dependencies can be found at
+    http://hadoop.apache.org/docs/ra.b.c/hadoop-project-dist/hadoop-common/dependency-analysis.html
+    -->
+
     <profile>
       <id>hadoop-0.23</id>
       <!-- SPARK-1121: Adds an explicit dependency on Avro to work around a Hadoop 0.23.X issue -->
@@ -1171,6 +1542,7 @@
       <properties>
         <hadoop.version>2.2.0</hadoop.version>
         <protobuf.version>2.5.0</protobuf.version>
+        <hbase.version>0.98.7-hadoop2</hbase.version>
         <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
       </properties>
     </profile>
@@ -1181,6 +1553,8 @@
         <hadoop.version>2.3.0</hadoop.version>
         <protobuf.version>2.5.0</protobuf.version>
         <jets3t.version>0.9.0</jets3t.version>
+        <hbase.version>0.98.7-hadoop2</hbase.version>
+        <commons.math3.version>3.1.1</commons.math3.version>
         <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
       </properties>
     </profile>
@@ -1191,32 +1565,25 @@
         <hadoop.version>2.4.0</hadoop.version>
         <protobuf.version>2.5.0</protobuf.version>
         <jets3t.version>0.9.0</jets3t.version>
+        <hbase.version>0.98.7-hadoop2</hbase.version>
+        <commons.math3.version>3.1.1</commons.math3.version>
         <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
       </properties>
     </profile>
 
-    <profile>
-      <id>yarn-alpha</id>
-      <modules>
-        <module>yarn</module>
-      </modules>
-    </profile>
-
     <profile>
       <id>yarn</id>
       <modules>
         <module>yarn</module>
+        <module>network/yarn</module>
       </modules>
     </profile>
 
     <profile>
       <id>mapr3</id>
-      <activation>
-        <activeByDefault>false</activeByDefault>
-      </activation>
       <properties>
         <hadoop.version>1.0.3-mapr-3.0.3</hadoop.version>
-        <yarn.version>2.3.0-mapr-4.0.0-FCS</yarn.version>
+        <yarn.version>2.4.1-mapr-1408</yarn.version>
         <hbase.version>0.94.17-mapr-1405</hbase.version>
         <zookeeper.version>3.4.5-mapr-1406</zookeeper.version>
       </properties>
@@ -1224,12 +1591,9 @@
 
     <profile>
       <id>mapr4</id>
-      <activation>
-        <activeByDefault>false</activeByDefault>
-      </activation>
       <properties>
-        <hadoop.version>2.3.0-mapr-4.0.0-FCS</hadoop.version>
-        <yarn.version>2.3.0-mapr-4.0.0-FCS</yarn.version>
+        <hadoop.version>2.4.1-mapr-1408</hadoop.version>
+        <yarn.version>2.4.1-mapr-1408</yarn.version>
         <hbase.version>0.94.17-mapr-1405-4.0.0-FCS</hbase.version>
         <zookeeper.version>3.4.5-mapr-1406</zookeeper.version>
       </properties>
@@ -1253,66 +1617,78 @@
       </dependencies>
     </profile>
 
-    <!-- Build without Hadoop dependencies that are included in some runtime environments. -->
     <profile>
-      <id>hadoop-provided</id>
-      <activation>
-        <activeByDefault>false</activeByDefault>
-      </activation>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-client</artifactId>
-          <scope>provided</scope>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-yarn-api</artifactId>
-          <scope>provided</scope>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-yarn-common</artifactId>
-          <scope>provided</scope>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-yarn-server-web-proxy</artifactId>
-          <scope>provided</scope>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-yarn-client</artifactId>
-          <scope>provided</scope>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-          <scope>provided</scope>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro-ipc</artifactId>
-          <scope>provided</scope>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.zookeeper</groupId>
-          <artifactId>zookeeper</artifactId>
-          <version>${zookeeper.version}</version>
-          <scope>provided</scope>
-        </dependency>
-      </dependencies>
+      <id>hive-thriftserver</id>
+      <modules>
+        <module>sql/hive-thriftserver</module>
+      </modules>
+    </profile>
+    <profile>
+      <id>hive-0.12.0</id>
+      <properties>
+        <hive.version>0.12.0-protobuf-2.5</hive.version>
+        <hive.version.short>0.12.0</hive.version.short>
+        <derby.version>10.4.2.0</derby.version>
+      </properties>
+    </profile>
+    <profile>
+      <id>hive-0.13.1</id>
+      <properties>
+        <hive.version>0.13.1a</hive.version>
+        <hive.version.short>0.13.1</hive.version.short>
+        <derby.version>10.10.1.1</derby.version>
+      </properties>
     </profile>
 
     <profile>
-      <id>hive</id>
+      <id>scala-2.10</id>
       <activation>
-        <activeByDefault>false</activeByDefault>
+        <property><name>!scala-2.11</name></property>
       </activation>
+      <properties>
+        <scala.version>2.10.4</scala.version>
+        <scala.binary.version>2.10</scala.binary.version>
+        <jline.version>${scala.version}</jline.version>
+        <jline.groupid>org.scala-lang</jline.groupid>
+      </properties>
       <modules>
-        <module>sql/hive-thriftserver</module>
+        <module>external/kafka</module>
+        <module>external/kafka-assembly</module>
       </modules>
     </profile>
 
+    <profile>
+      <id>scala-2.11</id>
+      <activation>
+        <property><name>scala-2.11</name></property>
+      </activation>
+      <properties>
+        <scala.version>2.11.2</scala.version>
+        <scala.binary.version>2.11</scala.binary.version>
+        <jline.version>2.12</jline.version>
+        <jline.groupid>jline</jline.groupid>
+      </properties>
+    </profile>
+
+    <!--
+      These empty profiles are available in some sub-modules. Declare them here so that
+      maven does not complain when they're provided on the command line for a sub-module
+      that does not have them.
+    -->
+    <profile>
+      <id>flume-provided</id>
+    </profile>
+    <profile>
+      <id>hadoop-provided</id>
+    </profile>
+    <profile>
+      <id>hbase-provided</id>
+    </profile>
+    <profile>
+      <id>hive-provided</id>
+    </profile>
+    <profile>
+      <id>parquet-provided</id>
+    </profile>
   </profiles>
 </project>
diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
index d919b18e09855..f0cbf4e57b8c5 100644
--- a/project/MimaBuild.scala
+++ b/project/MimaBuild.scala
@@ -30,7 +30,7 @@ object MimaBuild {
 
   def excludeMember(fullName: String) = Seq(
       ProblemFilters.exclude[MissingMethodProblem](fullName),
-      // Sometimes excluded methods have default arguments and 
+      // Sometimes excluded methods have default arguments and
       // they are translated into public methods/fields($default$) in generated
       // bytecode. It is not possible to exhaustively list everything.
       // But this should be okay.
@@ -91,9 +91,9 @@ object MimaBuild {
 
   def mimaSettings(sparkHome: File, projectRef: ProjectRef) = {
     val organization = "org.apache.spark"
-    val previousSparkVersion = "1.1.0"
+    val previousSparkVersion = "1.2.0"
     val fullId = "spark-" + projectRef.project + "_2.10"
-    mimaDefaultSettings ++ 
+    mimaDefaultSettings ++
     Seq(previousArtifact := Some(organization % fullId % previousSparkVersion),
       binaryIssueFilters ++= ignoredABIProblems(sparkHome, version.value))
   }
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index c58666af84f24..b17532c1d814c 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -33,6 +33,117 @@ import com.typesafe.tools.mima.core._
 object MimaExcludes {
     def excludes(version: String) =
       version match {
+        case v if v.startsWith("1.3") =>
+          Seq(
+            MimaBuild.excludeSparkPackage("deploy"),
+            // These are needed if checking against the sbt build, since they are part of
+            // the maven-generated artifacts in the 1.2 build.
+            MimaBuild.excludeSparkPackage("unused"),
+            ProblemFilters.exclude[MissingClassProblem]("com.google.common.base.Optional")
+          ) ++ Seq(
+            // SPARK-2321
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.SparkStageInfoImpl.this"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.SparkStageInfo.submissionTime")
+          ) ++ Seq(
+            // SPARK-4614
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.linalg.Matrices.randn"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.linalg.Matrices.rand")
+          ) ++ Seq(
+            // SPARK-5321
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.linalg.SparseMatrix.transposeMultiply"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.linalg.Matrix.transpose"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.linalg.DenseMatrix.transposeMultiply"),
+            ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.linalg.Matrix." +
+                "org$apache$spark$mllib$linalg$Matrix$_setter_$isTransposed_="),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.linalg.Matrix.isTransposed"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.linalg.Matrix.foreachActive")
+          ) ++ Seq(
+            // SPARK-5540
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.recommendation.ALS.solveLeastSquares"),
+            // SPARK-5536
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$^dateFeatures"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$^dateBlock")
+          ) ++ Seq(
+            // SPARK-3325
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.streaming.api.java.JavaDStreamLike.print"),
+            // SPARK-2757
+            ProblemFilters.exclude[IncompatibleResultTypeProblem](
+              "org.apache.spark.streaming.flume.sink.SparkAvroCallbackHandler." +
+                "removeAndGetProcessor")
+          ) ++ Seq(
+            // SPARK-5123 (SparkSQL data type change) - alpha component only
+            ProblemFilters.exclude[IncompatibleResultTypeProblem](
+              "org.apache.spark.ml.feature.HashingTF.outputDataType"),
+            ProblemFilters.exclude[IncompatibleResultTypeProblem](
+              "org.apache.spark.ml.feature.Tokenizer.outputDataType"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem](
+              "org.apache.spark.ml.feature.Tokenizer.validateInputType"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem](
+              "org.apache.spark.ml.classification.LogisticRegressionModel.validateAndTransformSchema"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem](
+              "org.apache.spark.ml.classification.LogisticRegression.validateAndTransformSchema")
+          ) ++ Seq(
+            // SPARK-4014
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.TaskContext.taskAttemptId"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.TaskContext.attemptNumber")
+          ) ++ Seq(
+            // SPARK-5166 Spark SQL API stabilization
+            ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.Transformer.transform"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.Estimator.fit"),
+            ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.ml.Transformer.transform"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.Pipeline.fit"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.PipelineModel.transform"),
+            ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.ml.Estimator.fit"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.Evaluator.evaluate"),
+            ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.ml.Evaluator.evaluate"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.tuning.CrossValidator.fit"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.tuning.CrossValidatorModel.transform"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.StandardScaler.fit"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.StandardScalerModel.transform"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.classification.LogisticRegressionModel.transform"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.classification.LogisticRegression.fit"),
+            ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.evaluation.BinaryClassificationEvaluator.evaluate")
+          ) ++ Seq(
+            // SPARK-5270
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.api.java.JavaRDDLike.isEmpty")
+          ) ++ Seq(
+            // SPARK-5430
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.api.java.JavaRDDLike.treeReduce"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.api.java.JavaRDDLike.treeAggregate")
+          ) ++ Seq(
+            // SPARK-5297 Java FileStream do not work with custom key/values
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.streaming.api.java.JavaStreamingContext.fileStream")
+          ) ++ Seq(
+            // SPARK-5315 Spark Streaming Java API returns Scala DStream
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.streaming.api.java.JavaDStreamLike.reduceByWindow")
+          ) ++ Seq(
+            // SPARK-5461 Graph should have isCheckpointed, getCheckpointFiles methods
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.graphx.Graph.getCheckpointFiles"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.graphx.Graph.isCheckpointed")
+          )
+
         case v if v.startsWith("1.2") =>
           Seq(
             MimaBuild.excludeSparkPackage("deploy"),
@@ -51,9 +162,16 @@ object MimaExcludes {
             // MapStatus should be private[spark]
             ProblemFilters.exclude[IncompatibleTemplateDefProblem](
               "org.apache.spark.scheduler.MapStatus"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.network.netty.PathResolver"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.network.netty.client.BlockClientListener"),
+
             // TaskContext was promoted to Abstract class
             ProblemFilters.exclude[AbstractClassProblem](
-              "org.apache.spark.TaskContext")
+              "org.apache.spark.TaskContext"),
+            ProblemFilters.exclude[IncompatibleTemplateDefProblem](
+              "org.apache.spark.util.collection.SortDataFormat")
           ) ++ Seq(
             // Adding new methods to the JavaRDDLike trait:
             ProblemFilters.exclude[MissingMethodProblem](
@@ -66,6 +184,22 @@ object MimaExcludes {
               "org.apache.spark.api.java.JavaRDDLike.foreachAsync"),
             ProblemFilters.exclude[MissingMethodProblem](
               "org.apache.spark.api.java.JavaRDDLike.collectAsync")
+          ) ++ Seq(
+            // SPARK-3822
+            ProblemFilters.exclude[IncompatibleResultTypeProblem](
+              "org.apache.spark.SparkContext.org$apache$spark$SparkContext$$createTaskScheduler")
+          ) ++ Seq(
+            // SPARK-1209
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.hadoop.mapreduce.SparkHadoopMapReduceUtil"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.hadoop.mapred.SparkHadoopMapRedUtil"),
+            ProblemFilters.exclude[MissingTypesProblem](
+              "org.apache.spark.rdd.PairRDDFunctions")
+          ) ++ Seq(
+            // SPARK-4062
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.streaming.kafka.KafkaReceiver#MessageHandler.this")
           )
 
         case v if v.startsWith("1.1") =>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 01a5b20e7c51d..93698efe84252 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+import java.io.File
+
 import scala.util.Properties
 import scala.collection.JavaConversions._
 
@@ -22,8 +24,8 @@ import sbt._
 import sbt.Classpaths.publishTask
 import sbt.Keys._
 import sbtunidoc.Plugin.genjavadocSettings
-import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings}
-import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys}
+import sbtunidoc.Plugin.UnidocKeys.unidocGenjavadocVersion
+import com.typesafe.sbt.pom.{loadEffectivePom, PomBuild, SbtPomKeys}
 import net.virtualvoid.sbt.graph.Plugin.graphSettings
 
 object BuildCommons {
@@ -31,18 +33,20 @@ object BuildCommons {
   private val buildLocation = file(".").getAbsoluteFile.getParentFile
 
   val allProjects@Seq(bagel, catalyst, core, graphx, hive, hiveThriftServer, mllib, repl,
-  sql, streaming, streamingFlumeSink, streamingFlume, streamingKafka, streamingMqtt,
-  streamingTwitter, streamingZeromq) =
+    sql, networkCommon, networkShuffle, streaming, streamingFlumeSink, streamingFlume, streamingKafka,
+    streamingMqtt, streamingTwitter, streamingZeromq) =
     Seq("bagel", "catalyst", "core", "graphx", "hive", "hive-thriftserver", "mllib", "repl",
-      "sql", "streaming", "streaming-flume-sink", "streaming-flume", "streaming-kafka",
-      "streaming-mqtt", "streaming-twitter", "streaming-zeromq").map(ProjectRef(buildLocation, _))
+      "sql", "network-common", "network-shuffle", "streaming", "streaming-flume-sink",
+      "streaming-flume", "streaming-kafka", "streaming-mqtt", "streaming-twitter",
+      "streaming-zeromq").map(ProjectRef(buildLocation, _))
 
-  val optionallyEnabledProjects@Seq(yarn, yarnStable, yarnAlpha, java8Tests, sparkGangliaLgpl, sparkKinesisAsl) =
-    Seq("yarn", "yarn-stable", "yarn-alpha", "java8-tests", "ganglia-lgpl", "kinesis-asl")
-      .map(ProjectRef(buildLocation, _))
+  val optionallyEnabledProjects@Seq(yarn, yarnStable, java8Tests, sparkGangliaLgpl,
+    sparkKinesisAsl) = Seq("yarn", "yarn-stable", "java8-tests", "ganglia-lgpl",
+    "kinesis-asl").map(ProjectRef(buildLocation, _))
 
-  val assemblyProjects@Seq(assembly, examples) = Seq("assembly", "examples")
-    .map(ProjectRef(buildLocation, _))
+  val assemblyProjects@Seq(assembly, examples, networkYarn, streamingKafkaAssembly) =
+    Seq("assembly", "examples", "network-yarn", "streaming-kafka-assembly")
+      .map(ProjectRef(buildLocation, _))
 
   val tools = ProjectRef(buildLocation, "tools")
   // Root project.
@@ -67,8 +71,8 @@ object SparkBuild extends PomBuild {
       profiles ++= Seq("spark-ganglia-lgpl")
     }
     if (Properties.envOrNone("SPARK_HIVE").isDefined) {
-      println("NOTE: SPARK_HIVE is deprecated, please use -Phive flag.")
-      profiles ++= Seq("hive")
+      println("NOTE: SPARK_HIVE is deprecated, please use -Phive and -Phive-thriftserver flags.")
+      profiles ++= Seq("hive", "hive-thriftserver")
     }
     Properties.envOrNone("SPARK_HADOOP_VERSION") match {
       case Some(v) =>
@@ -78,25 +82,29 @@ object SparkBuild extends PomBuild {
       case None =>
     }
     if (Properties.envOrNone("SPARK_YARN").isDefined) {
-      if(isAlphaYarn) {
-        println("NOTE: SPARK_YARN is deprecated, please use -Pyarn-alpha flag.")
-        profiles ++= Seq("yarn-alpha")
-      }
-      else {
-        println("NOTE: SPARK_YARN is deprecated, please use -Pyarn flag.")
-        profiles ++= Seq("yarn")
-      }
+      println("NOTE: SPARK_YARN is deprecated, please use -Pyarn flag.")
+      profiles ++= Seq("yarn")
     }
     profiles
   }
 
-  override val profiles = Properties.envOrNone("SBT_MAVEN_PROFILES") match {
+  override val profiles = {
+    val profiles = Properties.envOrNone("SBT_MAVEN_PROFILES") match {
     case None => backwardCompatibility
     case Some(v) =>
       if (backwardCompatibility.nonEmpty)
         println("Note: We ignore environment variables, when use of profile is detected in " +
           "conjunction with environment variable.")
       v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq
+    }
+
+    if (System.getProperty("scala-2.11") == "") {
+      // To activate scala-2.11 profile, replace empty property value to non-empty value
+      // in the same way as Maven which handles -Dname as -Dname=true before executes build process.
+      // see: https://github.com/apache/maven/blob/maven-3.0.4/maven-embedder/src/main/java/org/apache/maven/cli/MavenCli.java#L1082
+      System.setProperty("scala-2.11", "true")
+    }
+    profiles
   }
 
   Properties.envOrNone("SBT_MAVEN_PROPERTIES") match {
@@ -110,12 +118,13 @@ object SparkBuild extends PomBuild {
   lazy val MavenCompile = config("m2r") extend(Compile)
   lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy")
 
-  lazy val sharedSettings = graphSettings ++ ScalaStyleSettings ++ genjavadocSettings ++ Seq (
+  lazy val sharedSettings = graphSettings ++ genjavadocSettings ++ Seq (
     javaHome   := Properties.envOrNone("JAVA_HOME").map(file),
     incOptions := incOptions.value.withNameHashing(true),
     retrieveManaged := true,
     retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]",
     publishMavenStyle := true,
+    unidocGenjavadocVersion := "0.8",
 
     resolvers += Resolver.mavenLocal,
     otherResolvers <<= SbtPomKeys.mvnLocalRepository(dotM2 => Seq(Resolver.file("dotM2", dotM2))),
@@ -124,7 +133,12 @@ object SparkBuild extends PomBuild {
     },
     publishMavenStyle in MavenCompile := true,
     publishLocal in MavenCompile <<= publishTask(publishLocalConfiguration in MavenCompile, deliverLocal),
-    publishLocalBoth <<= Seq(publishLocal in MavenCompile, publishLocal).dependOn
+    publishLocalBoth <<= Seq(publishLocal in MavenCompile, publishLocal).dependOn,
+
+    javacOptions in (Compile, doc) ++= {
+      val Array(major, minor, _) = System.getProperty("java.version").split("\\.", 3)
+      if (major.toInt >= 1 && minor.toInt >= 8) Seq("-Xdoclint:all", "-Xdoclint:-missing") else Seq.empty
+    }
   )
 
   def enable(settings: Seq[Setting[_]])(projectRef: ProjectRef) = {
@@ -134,14 +148,17 @@ object SparkBuild extends PomBuild {
 
   // Note ordering of these settings matter.
   /* Enable shared settings on all projects */
-  (allProjects ++ optionallyEnabledProjects ++ assemblyProjects).foreach(enable(sharedSettings))
+  (allProjects ++ optionallyEnabledProjects ++ assemblyProjects ++ Seq(spark, tools))
+    .foreach(enable(sharedSettings ++ ExludedDependencies.settings))
 
   /* Enable tests settings for all projects except examples, assembly and tools */
   (allProjects ++ optionallyEnabledProjects).foreach(enable(TestSettings.settings))
 
   // TODO: Add Sql to mima checks
   allProjects.filterNot(x => Seq(spark, sql, hive, hiveThriftServer, catalyst, repl,
-    streamingFlumeSink).contains(x)).foreach(x => enable(MimaBuild.mimaSettings(sparkHome, x))(x))
+    networkCommon, networkShuffle, networkYarn).contains(x)).foreach {
+      x => enable(MimaBuild.mimaSettings(sparkHome, x))(x)
+    }
 
   /* Enable Assembly for all assembly projects */
   assemblyProjects.foreach(enable(Assembly.settings))
@@ -174,6 +191,16 @@ object Flume {
   lazy val settings = sbtavro.SbtAvro.avroSettings
 }
 
+/**
+  This excludes library dependencies in sbt, which are specified in maven but are
+  not needed by sbt build.
+  */
+object ExludedDependencies {
+  lazy val settings = Seq(
+    libraryDependencies ~= { libs => libs.filterNot(_.name == "groovy-all") }
+  )
+}
+
 /**
  * Following project only exists to pull previous artifacts of Spark for generating
  * Mima ignores. For more information see: SPARK 2071
@@ -184,7 +211,7 @@ object OldDeps {
 
   def versionArtifact(id: String): Option[sbt.ModuleID] = {
     val fullId = id + "_2.10"
-    Some("org.apache.spark" % fullId % "1.1.0")
+    Some("org.apache.spark" % fullId % "1.2.0")
   }
 
   def oldDepsSettings() = Defaults.coreDefaultSettings ++ Seq(
@@ -217,10 +244,11 @@ object SQL {
         |import org.apache.spark.sql.catalyst.expressions._
         |import org.apache.spark.sql.catalyst.plans.logical._
         |import org.apache.spark.sql.catalyst.rules._
-        |import org.apache.spark.sql.catalyst.types._
         |import org.apache.spark.sql.catalyst.util._
+        |import org.apache.spark.sql.Dsl._
         |import org.apache.spark.sql.execution
         |import org.apache.spark.sql.test.TestSQLContext._
+        |import org.apache.spark.sql.types._
         |import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin,
     cleanupCommands in console := "sparkContext.stop()"
   )
@@ -230,6 +258,8 @@ object Hive {
 
   lazy val settings = Seq(
     javaOptions += "-XX:MaxPermSize=1g",
+    // Specially disable assertions since some Hive tests fail them
+    javaOptions in Test := (javaOptions in Test).value.filterNot(_ == "-ea"),
     // Multiple queries rely on the TestHive singleton. See comments there for more details.
     parallelExecution in Test := false,
     // Supporting all SerDes requires us to depend on deprecated APIs, so we turn off the warnings
@@ -245,13 +275,17 @@ object Hive {
         |import org.apache.spark.sql.catalyst.expressions._
         |import org.apache.spark.sql.catalyst.plans.logical._
         |import org.apache.spark.sql.catalyst.rules._
-        |import org.apache.spark.sql.catalyst.types._
         |import org.apache.spark.sql.catalyst.util._
         |import org.apache.spark.sql.execution
         |import org.apache.spark.sql.hive._
         |import org.apache.spark.sql.hive.test.TestHive._
+        |import org.apache.spark.sql.types._
         |import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin,
-    cleanupCommands in console := "sparkContext.stop()"
+    cleanupCommands in console := "sparkContext.stop()",
+    // Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce
+    // in order to generate golden files.  This is only required for developers who are adding new
+    // new query tests.
+    fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") }
   )
 
 }
@@ -260,10 +294,22 @@ object Assembly {
   import sbtassembly.Plugin._
   import AssemblyKeys._
 
+  val hadoopVersion = taskKey[String]("The version of hadoop that spark is compiled against.")
+
   lazy val settings = assemblySettings ++ Seq(
     test in assembly := {},
-    jarName in assembly <<= (version, moduleName) map { (v, mName) => mName + "-"+v + "-hadoop" +
-      Option(System.getProperty("hadoop.version")).getOrElse("1.0.4") + ".jar" },
+    hadoopVersion := {
+      sys.props.get("hadoop.version")
+        .getOrElse(SbtPomKeys.effectivePom.value.getProperties.get("hadoop.version").asInstanceOf[String])
+    },
+    jarName in assembly <<= (version, moduleName, hadoopVersion) map { (v, mName, hv) =>
+      if (mName.contains("streaming-kafka-assembly")) {
+        // This must match the same name used in maven (see external/kafka-assembly/pom.xml)
+        s"${mName}-${v}.jar"
+      } else {
+        s"${mName}-${v}-hadoop${hv}.jar"
+      }
+    },
     mergeStrategy in assembly := {
       case PathList("org", "datanucleus", xs @ _*)             => MergeStrategy.discard
       case m if m.toLowerCase.endsWith("manifest.mf")          => MergeStrategy.discard
@@ -274,7 +320,6 @@ object Assembly {
       case _                                                   => MergeStrategy.first
     }
   )
-
 }
 
 object Unidoc {
@@ -292,9 +337,9 @@ object Unidoc {
     publish := {},
 
     unidocProjectFilter in(ScalaUnidoc, unidoc) :=
-      inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, catalyst, streamingFlumeSink, yarn, yarnAlpha),
+      inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, catalyst, streamingFlumeSink, yarn),
     unidocProjectFilter in(JavaUnidoc, unidoc) :=
-      inAnyProject -- inProjects(OldDeps.project, repl, bagel, graphx, examples, tools, catalyst, streamingFlumeSink, yarn, yarnAlpha),
+      inAnyProject -- inProjects(OldDeps.project, repl, bagel, examples, tools, catalyst, streamingFlumeSink, yarn),
 
     // Skip class names containing $ and some internal packages in Javadocs
     unidocAllSources in (JavaUnidoc, unidoc) := {
@@ -322,7 +367,10 @@ object Unidoc {
         "mllib.classification", "mllib.clustering", "mllib.evaluation.binary", "mllib.linalg",
         "mllib.linalg.distributed", "mllib.optimization", "mllib.rdd", "mllib.recommendation",
         "mllib.regression", "mllib.stat", "mllib.tree", "mllib.tree.configuration",
-        "mllib.tree.impurity", "mllib.tree.model", "mllib.util"
+        "mllib.tree.impurity", "mllib.tree.model", "mllib.util",
+        "mllib.evaluation", "mllib.feature", "mllib.random", "mllib.stat.correlation",
+        "mllib.stat.test", "mllib.tree.impl", "mllib.tree.loss",
+        "ml", "ml.classification", "ml.evaluation", "ml.feature", "ml.param", "ml.tuning"
       ),
       "-group", "Spark SQL", packageList("sql.api.java", "sql.api.java.types", "sql.hive.api.java"),
       "-noqualifier", "java.lang"
@@ -340,13 +388,19 @@ object TestSettings {
     javaOptions in Test += "-Dspark.testing=1",
     javaOptions in Test += "-Dspark.port.maxRetries=100",
     javaOptions in Test += "-Dspark.ui.enabled=false",
+    javaOptions in Test += "-Dspark.ui.showConsoleProgress=false",
+    javaOptions in Test += "-Dspark.driver.allowMultipleContexts=true",
     javaOptions in Test += "-Dsun.io.serialization.extendedDebugInfo=true",
     javaOptions in Test ++= System.getProperties.filter(_._1 startsWith "spark")
       .map { case (k,v) => s"-D$k=$v" }.toSeq,
+    javaOptions in Test += "-ea",
     javaOptions in Test ++= "-Xmx3g -XX:PermSize=128M -XX:MaxNewSize=256m -XX:MaxPermSize=1g"
       .split(" ").toSeq,
+    // This places test scope jars on the classpath of executors during tests.
+    javaOptions in Test +=
+      "-Dspark.executor.extraClassPath=" + (fullClasspath in Test).value.files.
+      map(_.getAbsolutePath).mkString(":").stripSuffix(":"),
     javaOptions += "-Xmx3g",
-
     // Show full stack trace and duration in test cases.
     testOptions in Test += Tests.Argument("-oDF"),
     testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
diff --git a/project/build.properties b/project/build.properties
index 32a3aeefaf9fb..064ec843da9ea 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-sbt.version=0.13.6
+sbt.version=0.13.7
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 9d50a50b109af..ee45b6a51905e 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -19,7 +19,7 @@ addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.6.0")
 
 addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.4")
 
-addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.5.0")
+addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.6.0")
 
 addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.1.6")
 
diff --git a/project/project/SparkPluginBuild.scala b/project/project/SparkPluginBuild.scala
index 3ef2d5451da0d..8863f272da415 100644
--- a/project/project/SparkPluginBuild.scala
+++ b/project/project/SparkPluginBuild.scala
@@ -26,7 +26,7 @@ import sbt.Keys._
 object SparkPluginDef extends Build {
   lazy val root = Project("plugins", file(".")) dependsOn(sparkStyle, sbtPomReader)
   lazy val sparkStyle = Project("spark-style", file("spark-style"), settings = styleSettings)
-  lazy val sbtPomReader = uri("https://github.com/ScrapCodes/sbt-pom-reader.git")
+  lazy val sbtPomReader = uri("https://github.com/ScrapCodes/sbt-pom-reader.git#ignore_artifact_id")
 
   // There is actually no need to publish this artifact.
   def styleSettings = Defaults.defaultSettings ++ Seq (
diff --git a/python/docs/conf.py b/python/docs/conf.py
index e58d97ae6a746..b00dce95d65b4 100644
--- a/python/docs/conf.py
+++ b/python/docs/conf.py
@@ -55,9 +55,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '1.2-SNAPSHOT'
+version = '1.3-SNAPSHOT'
 # The full version, including alpha/beta/rc tags.
-release = '1.2-SNAPSHOT'
+release = '1.3-SNAPSHOT'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/python/docs/epytext.py b/python/docs/epytext.py
index 19fefbfc057a4..e884d5e6b19c7 100644
--- a/python/docs/epytext.py
+++ b/python/docs/epytext.py
@@ -1,7 +1,7 @@
 import re
 
 RULES = (
-    (r"<[\w.]+>", r""),
+    (r"<(!BLANKLINE)[\w.]+>", r""),
     (r"L{([\w.()]+)}", r":class:`\1`"),
     (r"[LC]{(\w+\.\w+)\(\)}", r":func:`\1`"),
     (r"C{([\w.()]+)}", r":class:`\1`"),
diff --git a/python/docs/index.rst b/python/docs/index.rst
index 703bef644de28..d150de9d5c502 100644
--- a/python/docs/index.rst
+++ b/python/docs/index.rst
@@ -14,6 +14,7 @@ Contents:
    pyspark
    pyspark.sql
    pyspark.streaming
+   pyspark.ml
    pyspark.mllib
 
 
diff --git a/python/docs/make.bat b/python/docs/make.bat
index c011e82b4a35a..cc29acdc19686 100644
--- a/python/docs/make.bat
+++ b/python/docs/make.bat
@@ -1,6 +1,6 @@
-@ECHO OFF
-
-rem This is the entry point for running Sphinx documentation. To avoid polluting the
-rem environment, it just launches a new cmd to do the real work.
-
-cmd /V /E /C %~dp0make2.bat %*
+@ECHO OFF
+
+rem This is the entry point for running Sphinx documentation. To avoid polluting the
+rem environment, it just launches a new cmd to do the real work.
+
+cmd /V /E /C %~dp0make2.bat %*
diff --git a/python/docs/make2.bat b/python/docs/make2.bat
index 7bcaeafad13d7..05d22eb5cdd23 100644
--- a/python/docs/make2.bat
+++ b/python/docs/make2.bat
@@ -1,243 +1,243 @@
-@ECHO OFF
-
-REM Command file for Sphinx documentation
-
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
-set BUILDDIR=_build
-set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
-set I18NSPHINXOPTS=%SPHINXOPTS% .
-if NOT "%PAPER%" == "" (
-	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
-	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
-)
-
-if "%1" == "" goto help
-
-if "%1" == "help" (
-	:help
-	echo.Please use `make ^<target^>` where ^<target^> is one of
-	echo.  html       to make standalone HTML files
-	echo.  dirhtml    to make HTML files named index.html in directories
-	echo.  singlehtml to make a single large HTML file
-	echo.  pickle     to make pickle files
-	echo.  json       to make JSON files
-	echo.  htmlhelp   to make HTML files and a HTML help project
-	echo.  qthelp     to make HTML files and a qthelp project
-	echo.  devhelp    to make HTML files and a Devhelp project
-	echo.  epub       to make an epub
-	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
-	echo.  text       to make text files
-	echo.  man        to make manual pages
-	echo.  texinfo    to make Texinfo files
-	echo.  gettext    to make PO message catalogs
-	echo.  changes    to make an overview over all changed/added/deprecated items
-	echo.  xml        to make Docutils-native XML files
-	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
-	echo.  linkcheck  to check all external links for integrity
-	echo.  doctest    to run all doctests embedded in the documentation if enabled
-	goto end
-)
-
-if "%1" == "clean" (
-	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
-	del /q /s %BUILDDIR%\*
-	goto end
-)
-
-
-%SPHINXBUILD% 2> nul
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-if "%1" == "html" (
-	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
-	goto end
-)
-
-if "%1" == "dirhtml" (
-	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
-	goto end
-)
-
-if "%1" == "singlehtml" (
-	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
-	goto end
-)
-
-if "%1" == "pickle" (
-	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can process the pickle files.
-	goto end
-)
-
-if "%1" == "json" (
-	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can process the JSON files.
-	goto end
-)
-
-if "%1" == "htmlhelp" (
-	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can run HTML Help Workshop with the ^
-.hhp project file in %BUILDDIR%/htmlhelp.
-	goto end
-)
-
-if "%1" == "qthelp" (
-	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can run "qcollectiongenerator" with the ^
-.qhcp project file in %BUILDDIR%/qthelp, like this:
-	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\pyspark.qhcp
-	echo.To view the help file:
-	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\pyspark.ghc
-	goto end
-)
-
-if "%1" == "devhelp" (
-	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished.
-	goto end
-)
-
-if "%1" == "epub" (
-	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The epub file is in %BUILDDIR%/epub.
-	goto end
-)
-
-if "%1" == "latex" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "latexpdf" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	cd %BUILDDIR%/latex
-	make all-pdf
-	cd %BUILDDIR%/..
-	echo.
-	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "latexpdfja" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	cd %BUILDDIR%/latex
-	make all-pdf-ja
-	cd %BUILDDIR%/..
-	echo.
-	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "text" (
-	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The text files are in %BUILDDIR%/text.
-	goto end
-)
-
-if "%1" == "man" (
-	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The manual pages are in %BUILDDIR%/man.
-	goto end
-)
-
-if "%1" == "texinfo" (
-	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
-	goto end
-)
-
-if "%1" == "gettext" (
-	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
-	goto end
-)
-
-if "%1" == "changes" (
-	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.The overview file is in %BUILDDIR%/changes.
-	goto end
-)
-
-if "%1" == "linkcheck" (
-	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Link check complete; look for any errors in the above output ^
-or in %BUILDDIR%/linkcheck/output.txt.
-	goto end
-)
-
-if "%1" == "doctest" (
-	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Testing of doctests in the sources finished, look at the ^
-results in %BUILDDIR%/doctest/output.txt.
-	goto end
-)
-
-if "%1" == "xml" (
-	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The XML files are in %BUILDDIR%/xml.
-	goto end
-)
-
-if "%1" == "pseudoxml" (
-	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
-	goto end
-)
-
-:end
+@ECHO OFF
+
+REM Command file for Sphinx documentation
+
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set BUILDDIR=_build
+set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
+set I18NSPHINXOPTS=%SPHINXOPTS% .
+if NOT "%PAPER%" == "" (
+	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
+	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
+)
+
+if "%1" == "" goto help
+
+if "%1" == "help" (
+	:help
+	echo.Please use `make ^<target^>` where ^<target^> is one of
+	echo.  html       to make standalone HTML files
+	echo.  dirhtml    to make HTML files named index.html in directories
+	echo.  singlehtml to make a single large HTML file
+	echo.  pickle     to make pickle files
+	echo.  json       to make JSON files
+	echo.  htmlhelp   to make HTML files and a HTML help project
+	echo.  qthelp     to make HTML files and a qthelp project
+	echo.  devhelp    to make HTML files and a Devhelp project
+	echo.  epub       to make an epub
+	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
+	echo.  text       to make text files
+	echo.  man        to make manual pages
+	echo.  texinfo    to make Texinfo files
+	echo.  gettext    to make PO message catalogs
+	echo.  changes    to make an overview over all changed/added/deprecated items
+	echo.  xml        to make Docutils-native XML files
+	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
+	echo.  linkcheck  to check all external links for integrity
+	echo.  doctest    to run all doctests embedded in the documentation if enabled
+	goto end
+)
+
+if "%1" == "clean" (
+	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
+	del /q /s %BUILDDIR%\*
+	goto end
+)
+
+
+%SPHINXBUILD% 2> nul
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+if "%1" == "html" (
+	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
+	goto end
+)
+
+if "%1" == "dirhtml" (
+	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
+	goto end
+)
+
+if "%1" == "singlehtml" (
+	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
+	goto end
+)
+
+if "%1" == "pickle" (
+	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can process the pickle files.
+	goto end
+)
+
+if "%1" == "json" (
+	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can process the JSON files.
+	goto end
+)
+
+if "%1" == "htmlhelp" (
+	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can run HTML Help Workshop with the ^
+.hhp project file in %BUILDDIR%/htmlhelp.
+	goto end
+)
+
+if "%1" == "qthelp" (
+	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can run "qcollectiongenerator" with the ^
+.qhcp project file in %BUILDDIR%/qthelp, like this:
+	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\pyspark.qhcp
+	echo.To view the help file:
+	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\pyspark.ghc
+	goto end
+)
+
+if "%1" == "devhelp" (
+	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished.
+	goto end
+)
+
+if "%1" == "epub" (
+	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The epub file is in %BUILDDIR%/epub.
+	goto end
+)
+
+if "%1" == "latex" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "latexpdf" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	cd %BUILDDIR%/latex
+	make all-pdf
+	cd %BUILDDIR%/..
+	echo.
+	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "latexpdfja" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	cd %BUILDDIR%/latex
+	make all-pdf-ja
+	cd %BUILDDIR%/..
+	echo.
+	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "text" (
+	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The text files are in %BUILDDIR%/text.
+	goto end
+)
+
+if "%1" == "man" (
+	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The manual pages are in %BUILDDIR%/man.
+	goto end
+)
+
+if "%1" == "texinfo" (
+	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
+	goto end
+)
+
+if "%1" == "gettext" (
+	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
+	goto end
+)
+
+if "%1" == "changes" (
+	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.The overview file is in %BUILDDIR%/changes.
+	goto end
+)
+
+if "%1" == "linkcheck" (
+	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Link check complete; look for any errors in the above output ^
+or in %BUILDDIR%/linkcheck/output.txt.
+	goto end
+)
+
+if "%1" == "doctest" (
+	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Testing of doctests in the sources finished, look at the ^
+results in %BUILDDIR%/doctest/output.txt.
+	goto end
+)
+
+if "%1" == "xml" (
+	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The XML files are in %BUILDDIR%/xml.
+	goto end
+)
+
+if "%1" == "pseudoxml" (
+	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
+	goto end
+)
+
+:end
diff --git a/python/docs/pyspark.ml.rst b/python/docs/pyspark.ml.rst
new file mode 100644
index 0000000000000..f10d1339a9a8f
--- /dev/null
+++ b/python/docs/pyspark.ml.rst
@@ -0,0 +1,29 @@
+pyspark.ml package
+=====================
+
+Submodules
+----------
+
+pyspark.ml module
+-----------------
+
+.. automodule:: pyspark.ml
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.feature module
+-------------------------
+
+.. automodule:: pyspark.ml.feature
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.classification module
+--------------------------------
+
+.. automodule:: pyspark.ml.classification
+    :members:
+    :undoc-members:
+    :inherited-members:
diff --git a/python/docs/pyspark.rst b/python/docs/pyspark.rst
index e81be3b6cb796..0df12c49ad033 100644
--- a/python/docs/pyspark.rst
+++ b/python/docs/pyspark.rst
@@ -9,6 +9,7 @@ Subpackages
     
     pyspark.sql
     pyspark.streaming
+    pyspark.ml
     pyspark.mllib
 
 Contents
diff --git a/python/docs/pyspark.streaming.rst b/python/docs/pyspark.streaming.rst
index 5024d694b668f..f08185627d0bc 100644
--- a/python/docs/pyspark.streaming.rst
+++ b/python/docs/pyspark.streaming.rst
@@ -1,5 +1,5 @@
 pyspark.streaming module
-==================
+========================
 
 Module contents
 ---------------
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index e39e6514d77a1..d3efcdf221d82 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -37,16 +37,6 @@
 
 """
 
-# The following block allows us to import python's random instead of mllib.random for scripts in
-# mllib that depend on top level pyspark packages, which transitively depend on python's random.
-# Since Python's import logic looks for modules in the current package first, we eliminate
-# mllib.random as a candidate for C{import random} by removing the first search path, the script's
-# location, in order to force the loader to look in Python's top-level modules for C{random}.
-import sys
-s = sys.path.pop(0)
-import random
-sys.path.insert(0, s)
-
 from pyspark.conf import SparkConf
 from pyspark.context import SparkContext
 from pyspark.rdd import RDD
@@ -55,6 +45,7 @@
 from pyspark.accumulators import Accumulator, AccumulatorParam
 from pyspark.broadcast import Broadcast
 from pyspark.serializers import MarshalSerializer, PickleSerializer
+from pyspark.profiler import Profiler, BasicProfiler
 
 # for back compatibility
 from pyspark.sql import SQLContext, HiveContext, SchemaRDD, Row
@@ -62,4 +53,5 @@
 __all__ = [
     "SparkConf", "SparkContext", "SparkFiles", "RDD", "StorageLevel", "Broadcast",
     "Accumulator", "AccumulatorParam", "MarshalSerializer", "PickleSerializer",
+    "Profiler", "BasicProfiler",
 ]
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index b8cdbbe3cf2b6..ccbca67656c8d 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -215,21 +215,6 @@ def addInPlace(self, value1, value2):
 COMPLEX_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0j)
 
 
-class PStatsParam(AccumulatorParam):
-    """PStatsParam is used to merge pstats.Stats"""
-
-    @staticmethod
-    def zero(value):
-        return None
-
-    @staticmethod
-    def addInPlace(value1, value2):
-        if value1 is None:
-            return value2
-        value1.add(value2)
-        return value1
-
-
 class _UpdateRequestHandler(SocketServer.StreamRequestHandler):
 
     """
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index f124dc6c07575..6b8a8b256a891 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -15,21 +15,10 @@
 # limitations under the License.
 #
 
-"""
->>> from pyspark.context import SparkContext
->>> sc = SparkContext('local', 'test')
->>> b = sc.broadcast([1, 2, 3, 4, 5])
->>> b.value
-[1, 2, 3, 4, 5]
->>> sc.parallelize([0, 0]).flatMap(lambda x: b.value).collect()
-[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
->>> b.unpersist()
-
->>> large_broadcast = sc.broadcast(list(range(10000)))
-"""
 import os
-
-from pyspark.serializers import CompressedSerializer, PickleSerializer
+import cPickle
+import gc
+from tempfile import NamedTemporaryFile
 
 
 __all__ = ['Broadcast']
@@ -49,44 +38,88 @@ def _from_id(bid):
 class Broadcast(object):
 
     """
-    A broadcast variable created with
-    L{SparkContext.broadcast()<pyspark.context.SparkContext.broadcast>}.
+    A broadcast variable created with L{SparkContext.broadcast()}.
     Access its value through C{.value}.
+
+    Examples:
+
+    >>> from pyspark.context import SparkContext
+    >>> sc = SparkContext('local', 'test')
+    >>> b = sc.broadcast([1, 2, 3, 4, 5])
+    >>> b.value
+    [1, 2, 3, 4, 5]
+    >>> sc.parallelize([0, 0]).flatMap(lambda x: b.value).collect()
+    [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
+    >>> b.unpersist()
+
+    >>> large_broadcast = sc.broadcast(range(10000))
     """
 
-    def __init__(self, bid, value, java_broadcast=None,
-                 pickle_registry=None, path=None):
+    def __init__(self, sc=None, value=None, pickle_registry=None, path=None):
         """
-        Should not be called directly by users -- use
-        L{SparkContext.broadcast()<pyspark.context.SparkContext.broadcast>}
+        Should not be called directly by users -- use L{SparkContext.broadcast()}
         instead.
         """
-        self.bid = bid
-        if path is None:
-            self._value = value
-        self._jbroadcast = java_broadcast
-        self._pickle_registry = pickle_registry
-        self.path = path
+        if sc is not None:
+            f = NamedTemporaryFile(delete=False, dir=sc._temp_dir)
+            self._path = self.dump(value, f)
+            self._jbroadcast = sc._jvm.PythonRDD.readBroadcastFromFile(sc._jsc, self._path)
+            self._pickle_registry = pickle_registry
+        else:
+            self._jbroadcast = None
+            self._path = path
+
+    def dump(self, value, f):
+        if isinstance(value, basestring):
+            if isinstance(value, unicode):
+                f.write('U')
+                value = value.encode('utf8')
+            else:
+                f.write('S')
+            f.write(value)
+        else:
+            f.write('P')
+            cPickle.dump(value, f, 2)
+        f.close()
+        return f.name
+
+    def load(self, path):
+        with open(path, 'rb', 1 << 20) as f:
+            flag = f.read(1)
+            data = f.read()
+            if flag == 'P':
+                # cPickle.loads() may create lots of objects, disable GC
+                # temporary for better performance
+                gc.disable()
+                try:
+                    return cPickle.loads(data)
+                finally:
+                    gc.enable()
+            else:
+                return data.decode('utf8') if flag == 'U' else data
 
     @property
     def value(self):
         """ Return the broadcasted value
         """
-        if not hasattr(self, "_value") and self.path is not None:
-            ser = CompressedSerializer(PickleSerializer())
-            self._value = ser.load_stream(open(self.path)).next()
+        if not hasattr(self, "_value") and self._path is not None:
+            self._value = self.load(self._path)
         return self._value
 
     def unpersist(self, blocking=False):
         """
         Delete cached copies of this broadcast on the executors.
         """
+        if self._jbroadcast is None:
+            raise Exception("Broadcast can only be unpersisted in driver")
         self._jbroadcast.unpersist(blocking)
-        os.unlink(self.path)
+        os.unlink(self._path)
 
     def __reduce__(self):
+        if self._jbroadcast is None:
+            raise Exception("Broadcast can only be serialized in driver")
         self._pickle_registry.add(self)
-        return (_from_id, (self.bid, ))
+        return _from_id, (self._jbroadcast.id(),)
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 8d27ccb95f82c..bf1f61c8504ed 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -20,7 +20,6 @@
 import sys
 from threading import Lock
 from tempfile import NamedTemporaryFile
-import atexit
 
 from pyspark import accumulators
 from pyspark.accumulators import Accumulator
@@ -29,10 +28,11 @@
 from pyspark.files import SparkFiles
 from pyspark.java_gateway import launch_gateway
 from pyspark.serializers import PickleSerializer, BatchedSerializer, UTF8Deserializer, \
-    PairDeserializer, CompressedSerializer, AutoBatchedSerializer
+    PairDeserializer, AutoBatchedSerializer, NoOpSerializer
 from pyspark.storagelevel import StorageLevel
 from pyspark.rdd import RDD
 from pyspark.traceback_utils import CallSite, first_spark_call
+from pyspark.profiler import ProfilerCollector, BasicProfiler
 
 from py4j.java_collections import ListConverter
 
@@ -43,7 +43,6 @@
 # These are special default configs for PySpark, they will overwrite
 # the default ones for Spark if they are not configured by user.
 DEFAULT_CONFIGS = {
-    "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
     "spark.serializer.objectStreamReset": 100,
     "spark.rdd.compress": True,
 }
@@ -64,11 +63,10 @@ class SparkContext(object):
     _active_spark_context = None
     _lock = Lock()
     _python_includes = None  # zip and egg files that need to be added to PYTHONPATH
-    _default_batch_size_for_serialized_input = 10
 
     def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
                  environment=None, batchSize=0, serializer=PickleSerializer(), conf=None,
-                 gateway=None, jsc=None):
+                 gateway=None, jsc=None, profiler_cls=BasicProfiler):
         """
         Create a new SparkContext. At least the master and app name should be set,
         either through the named parameters here or through C{conf}.
@@ -90,6 +88,9 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
         :param conf: A L{SparkConf} object setting Spark properties.
         :param gateway: Use an existing gateway and JVM, otherwise a new JVM
                will be instantiated.
+        :param jsc: The JavaSparkContext instance (optional).
+        :param profiler_cls: A class of custom Profiler used to do profiling
+               (default is pyspark.profiler.BasicProfiler).
 
 
         >>> from pyspark.context import SparkContext
@@ -104,21 +105,19 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
         SparkContext._ensure_initialized(self, gateway=gateway)
         try:
             self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
-                          conf, jsc)
+                          conf, jsc, profiler_cls)
         except:
             # If an error occurs, clean up in order to allow future SparkContext creation:
             self.stop()
             raise
 
     def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
-                 conf, jsc):
+                 conf, jsc, profiler_cls):
         self.environment = environment or {}
         self._conf = conf or SparkConf(_jvm=self._jvm)
         self._batchSize = batchSize  # -1 represents an unlimited batch size
         self._unbatched_serializer = serializer
-        if batchSize == 1:
-            self.serializer = self._unbatched_serializer
-        elif batchSize == 0:
+        if batchSize == 0:
             self.serializer = AutoBatchedSerializer(self._unbatched_serializer)
         else:
             self.serializer = BatchedSerializer(self._unbatched_serializer,
@@ -193,10 +192,15 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize,
         # Create a temporary directory inside spark.local.dir:
         local_dir = self._jvm.org.apache.spark.util.Utils.getLocalDir(self._jsc.sc().conf())
         self._temp_dir = \
-            self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir).getAbsolutePath()
+            self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir, "pyspark") \
+                .getAbsolutePath()
 
         # profiling stats collected for each PythonRDD
-        self._profile_stats = []
+        if self._conf.get("spark.python.profile", "false") == "true":
+            dump_path = self._conf.get("spark.python.profile.dump", None)
+            self.profiler_collector = ProfilerCollector(profiler_cls, dump_path)
+        else:
+            self.profiler_collector = None
 
     def _initialize_context(self, jconf):
         """
@@ -233,6 +237,14 @@ def _ensure_initialized(cls, instance=None, gateway=None):
                 else:
                     SparkContext._active_spark_context = instance
 
+    def __getnewargs__(self):
+        # This method is called when attempting to pickle SparkContext, which is always an error:
+        raise Exception(
+            "It appears that you are attempting to reference SparkContext from a broadcast "
+            "variable, action, or transforamtion. SparkContext can only be used on the driver, "
+            "not in code that it run on workers. For more information, see SPARK-5063."
+        )
+
     def __enter__(self):
         """
         Enable 'with SparkContext(...) as sc: app(sc)' syntax.
@@ -293,12 +305,29 @@ def stop(self):
 
     def parallelize(self, c, numSlices=None):
         """
-        Distribute a local Python collection to form an RDD.
+        Distribute a local Python collection to form an RDD. Using xrange
+        is recommended if the input represents a range for performance.
 
-        >>> sc.parallelize(range(5), 5).glom().collect()
-        [[0], [1], [2], [3], [4]]
+        >>> sc.parallelize([0, 2, 3, 4, 6], 5).glom().collect()
+        [[0], [2], [3], [4], [6]]
+        >>> sc.parallelize(xrange(0, 6, 2), 5).glom().collect()
+        [[], [0], [], [2], [4]]
         """
-        numSlices = numSlices or self.defaultParallelism
+        numSlices = int(numSlices) if numSlices is not None else self.defaultParallelism
+        if isinstance(c, xrange):
+            size = len(c)
+            if size == 0:
+                return self.parallelize([], numSlices)
+            step = c[1] - c[0] if size > 1 else 1
+            start0 = c[0]
+
+            def getStart(split):
+                return start0 + (split * size / numSlices) * step
+
+            def f(split, iterator):
+                return xrange(getStart(split), getStart(split + 1), step)
+
+            return self.parallelize([], numSlices).mapPartitionsWithIndex(f)
         # Calling the Java parallelize() method with an ArrayList is too slow,
         # because it sends O(n) Py4J commands.  As an alternative, serialized
         # objects are written to a file and loaded through textFile().
@@ -306,12 +335,8 @@ def parallelize(self, c, numSlices=None):
         # Make sure we distribute data evenly if it's smaller than self.batchSize
         if "__len__" not in dir(c):
             c = list(c)    # Make it a list so we can compute its length
-        batchSize = min(len(c) // numSlices, self._batchSize)
-        if batchSize > 1:
-            serializer = BatchedSerializer(self._unbatched_serializer,
-                                           batchSize)
-        else:
-            serializer = self._unbatched_serializer
+        batchSize = max(1, min(len(c) // numSlices, self._batchSize or 1024))
+        serializer = BatchedSerializer(self._unbatched_serializer, batchSize)
         serializer.dump_stream(c, tempFile)
         tempFile.close()
         readRDDFromFile = self._jvm.PythonRDD.readRDDFromFile
@@ -329,8 +354,7 @@ def pickleFile(self, name, minPartitions=None):
         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
         """
         minPartitions = minPartitions or self.defaultMinPartitions
-        return RDD(self._jsc.objectFile(name, minPartitions), self,
-                   BatchedSerializer(PickleSerializer()))
+        return RDD(self._jsc.objectFile(name, minPartitions), self)
 
     def textFile(self, name, minPartitions=None, use_unicode=True):
         """
@@ -397,6 +421,36 @@ def wholeTextFiles(self, path, minPartitions=None, use_unicode=True):
         return RDD(self._jsc.wholeTextFiles(path, minPartitions), self,
                    PairDeserializer(UTF8Deserializer(use_unicode), UTF8Deserializer(use_unicode)))
 
+    def binaryFiles(self, path, minPartitions=None):
+        """
+        .. note:: Experimental
+
+        Read a directory of binary files from HDFS, a local file system
+        (available on all nodes), or any Hadoop-supported file system URI
+        as a byte array. Each file is read as a single record and returned
+        in a key-value pair, where the key is the path of each file, the
+        value is the content of each file.
+
+        Note: Small files are preferred, large file is also allowable, but
+        may cause bad performance.
+        """
+        minPartitions = minPartitions or self.defaultMinPartitions
+        return RDD(self._jsc.binaryFiles(path, minPartitions), self,
+                   PairDeserializer(UTF8Deserializer(), NoOpSerializer()))
+
+    def binaryRecords(self, path, recordLength):
+        """
+        .. note:: Experimental
+
+        Load data from a flat binary file, assuming each record is a set of numbers
+        with the specified numerical format (see ByteBuffer), and the number of
+        bytes per record is constant.
+
+        :param path: Directory to the input data files
+        :param recordLength: The length at which to split the records
+        """
+        return RDD(self._jsc.binaryRecords(path, recordLength), self, NoOpSerializer())
+
     def _dictToJavaMap(self, d):
         jm = self._jvm.java.util.HashMap()
         if not d:
@@ -406,7 +460,7 @@ def _dictToJavaMap(self, d):
         return jm
 
     def sequenceFile(self, path, keyClass=None, valueClass=None, keyConverter=None,
-                     valueConverter=None, minSplits=None, batchSize=None):
+                     valueConverter=None, minSplits=None, batchSize=0):
         """
         Read a Hadoop SequenceFile with arbitrary key and value Writable class from HDFS,
         a local file system (available on all nodes), or any Hadoop-supported file system URI.
@@ -428,17 +482,15 @@ def sequenceFile(self, path, keyClass=None, valueClass=None, keyConverter=None,
         :param minSplits: minimum splits in dataset
                (default min(2, sc.defaultParallelism))
         :param batchSize: The number of Python objects represented as a single
-               Java object. (default sc._default_batch_size_for_serialized_input)
+               Java object. (default 0, choose batchSize automatically)
         """
         minSplits = minSplits or min(self.defaultParallelism, 2)
-        batchSize = max(1, batchSize or self._default_batch_size_for_serialized_input)
-        ser = BatchedSerializer(PickleSerializer()) if (batchSize > 1) else PickleSerializer()
         jrdd = self._jvm.PythonRDD.sequenceFile(self._jsc, path, keyClass, valueClass,
                                                 keyConverter, valueConverter, minSplits, batchSize)
-        return RDD(jrdd, self, ser)
+        return RDD(jrdd, self)
 
     def newAPIHadoopFile(self, path, inputFormatClass, keyClass, valueClass, keyConverter=None,
-                         valueConverter=None, conf=None, batchSize=None):
+                         valueConverter=None, conf=None, batchSize=0):
         """
         Read a 'new API' Hadoop InputFormat with arbitrary key and value class from HDFS,
         a local file system (available on all nodes), or any Hadoop-supported file system URI.
@@ -459,18 +511,16 @@ def newAPIHadoopFile(self, path, inputFormatClass, keyClass, valueClass, keyConv
         :param conf: Hadoop configuration, passed in as a dict
                (None by default)
         :param batchSize: The number of Python objects represented as a single
-               Java object. (default sc._default_batch_size_for_serialized_input)
+               Java object. (default 0, choose batchSize automatically)
         """
         jconf = self._dictToJavaMap(conf)
-        batchSize = max(1, batchSize or self._default_batch_size_for_serialized_input)
-        ser = BatchedSerializer(PickleSerializer()) if (batchSize > 1) else PickleSerializer()
         jrdd = self._jvm.PythonRDD.newAPIHadoopFile(self._jsc, path, inputFormatClass, keyClass,
                                                     valueClass, keyConverter, valueConverter,
                                                     jconf, batchSize)
-        return RDD(jrdd, self, ser)
+        return RDD(jrdd, self)
 
     def newAPIHadoopRDD(self, inputFormatClass, keyClass, valueClass, keyConverter=None,
-                        valueConverter=None, conf=None, batchSize=None):
+                        valueConverter=None, conf=None, batchSize=0):
         """
         Read a 'new API' Hadoop InputFormat with arbitrary key and value class, from an arbitrary
         Hadoop configuration, which is passed in as a Python dict.
@@ -488,18 +538,16 @@ def newAPIHadoopRDD(self, inputFormatClass, keyClass, valueClass, keyConverter=N
         :param conf: Hadoop configuration, passed in as a dict
                (None by default)
         :param batchSize: The number of Python objects represented as a single
-               Java object. (default sc._default_batch_size_for_serialized_input)
+               Java object. (default 0, choose batchSize automatically)
         """
         jconf = self._dictToJavaMap(conf)
-        batchSize = max(1, batchSize or self._default_batch_size_for_serialized_input)
-        ser = BatchedSerializer(PickleSerializer()) if (batchSize > 1) else PickleSerializer()
         jrdd = self._jvm.PythonRDD.newAPIHadoopRDD(self._jsc, inputFormatClass, keyClass,
                                                    valueClass, keyConverter, valueConverter,
                                                    jconf, batchSize)
-        return RDD(jrdd, self, ser)
+        return RDD(jrdd, self)
 
     def hadoopFile(self, path, inputFormatClass, keyClass, valueClass, keyConverter=None,
-                   valueConverter=None, conf=None, batchSize=None):
+                   valueConverter=None, conf=None, batchSize=0):
         """
         Read an 'old' Hadoop InputFormat with arbitrary key and value class from HDFS,
         a local file system (available on all nodes), or any Hadoop-supported file system URI.
@@ -520,18 +568,16 @@ def hadoopFile(self, path, inputFormatClass, keyClass, valueClass, keyConverter=
         :param conf: Hadoop configuration, passed in as a dict
                (None by default)
         :param batchSize: The number of Python objects represented as a single
-               Java object. (default sc._default_batch_size_for_serialized_input)
+               Java object. (default 0, choose batchSize automatically)
         """
         jconf = self._dictToJavaMap(conf)
-        batchSize = max(1, batchSize or self._default_batch_size_for_serialized_input)
-        ser = BatchedSerializer(PickleSerializer()) if (batchSize > 1) else PickleSerializer()
         jrdd = self._jvm.PythonRDD.hadoopFile(self._jsc, path, inputFormatClass, keyClass,
                                               valueClass, keyConverter, valueConverter,
                                               jconf, batchSize)
-        return RDD(jrdd, self, ser)
+        return RDD(jrdd, self)
 
     def hadoopRDD(self, inputFormatClass, keyClass, valueClass, keyConverter=None,
-                  valueConverter=None, conf=None, batchSize=None):
+                  valueConverter=None, conf=None, batchSize=0):
         """
         Read an 'old' Hadoop InputFormat with arbitrary key and value class, from an arbitrary
         Hadoop configuration, which is passed in as a Python dict.
@@ -549,15 +595,13 @@ def hadoopRDD(self, inputFormatClass, keyClass, valueClass, keyConverter=None,
         :param conf: Hadoop configuration, passed in as a dict
                (None by default)
         :param batchSize: The number of Python objects represented as a single
-               Java object. (default sc._default_batch_size_for_serialized_input)
+               Java object. (default 0, choose batchSize automatically)
         """
         jconf = self._dictToJavaMap(conf)
-        batchSize = max(1, batchSize or self._default_batch_size_for_serialized_input)
-        ser = BatchedSerializer(PickleSerializer()) if (batchSize > 1) else PickleSerializer()
         jrdd = self._jvm.PythonRDD.hadoopRDD(self._jsc, inputFormatClass, keyClass,
                                              valueClass, keyConverter, valueConverter,
                                              jconf, batchSize)
-        return RDD(jrdd, self, ser)
+        return RDD(jrdd, self)
 
     def _checkpointFile(self, name, input_deserializer):
         jrdd = self._jsc.checkpointFile(name)
@@ -596,14 +640,7 @@ def broadcast(self, value):
         object for reading it in distributed functions. The variable will
         be sent to each cluster only once.
         """
-        ser = CompressedSerializer(PickleSerializer())
-        # pass large object by py4j is very slow and need much memory
-        tempFile = NamedTemporaryFile(delete=False, dir=self._temp_dir)
-        ser.dump_stream([value], tempFile)
-        tempFile.close()
-        jbroadcast = self._jvm.PythonRDD.readBroadcastFromFile(self._jsc, tempFile.name)
-        return Broadcast(jbroadcast.id(), None, jbroadcast,
-                         self._pickled_broadcast_vars, tempFile.name)
+        return Broadcast(self, value, self._pickled_broadcast_vars)
 
     def accumulator(self, value, accum_param=None):
         """
@@ -797,39 +834,14 @@ def runJob(self, rdd, partitionFunc, partitions=None, allowLocal=False):
         it = self._jvm.PythonRDD.runJob(self._jsc.sc(), mappedRDD._jrdd, javaPartitions, allowLocal)
         return list(mappedRDD._collect_iterator_through_file(it))
 
-    def _add_profile(self, id, profileAcc):
-        if not self._profile_stats:
-            dump_path = self._conf.get("spark.python.profile.dump")
-            if dump_path:
-                atexit.register(self.dump_profiles, dump_path)
-            else:
-                atexit.register(self.show_profiles)
-
-        self._profile_stats.append([id, profileAcc, False])
-
     def show_profiles(self):
         """ Print the profile stats to stdout """
-        for i, (id, acc, showed) in enumerate(self._profile_stats):
-            stats = acc.value
-            if not showed and stats:
-                print "=" * 60
-                print "Profile of RDD<id=%d>" % id
-                print "=" * 60
-                stats.sort_stats("time", "cumulative").print_stats()
-                # mark it as showed
-                self._profile_stats[i][2] = True
+        self.profiler_collector.show_profiles()
 
     def dump_profiles(self, path):
         """ Dump the profile stats into directory `path`
         """
-        if not os.path.exists(path):
-            os.makedirs(path)
-        for id, acc, _ in self._profile_stats:
-            stats = acc.value
-            if stats:
-                p = os.path.join(path, "rdd_%d.pstats" % id)
-                stats.dump_stats(p)
-        self._profile_stats = []
+        self.profiler_collector.dump_profiles(path)
 
 
 def _test():
@@ -837,7 +849,7 @@ def _test():
     import doctest
     import tempfile
     globs = globals().copy()
-    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    globs['sc'] = SparkContext('local[4]', 'PythonTest')
     globs['tempdir'] = tempfile.mkdtemp()
     atexit.register(lambda: shutil.rmtree(globs['tempdir']))
     (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
diff --git a/python/pyspark/daemon.py b/python/pyspark/daemon.py
index 64d6202acb27d..f09587f211708 100644
--- a/python/pyspark/daemon.py
+++ b/python/pyspark/daemon.py
@@ -26,7 +26,7 @@
 import gc
 from errno import EINTR, ECHILD, EAGAIN
 from socket import AF_INET, SOCK_STREAM, SOMAXCONN
-from signal import SIGHUP, SIGTERM, SIGCHLD, SIG_DFL, SIG_IGN
+from signal import SIGHUP, SIGTERM, SIGCHLD, SIG_DFL, SIG_IGN, SIGINT
 from pyspark.worker import main as worker_main
 from pyspark.serializers import read_int, write_int
 
@@ -46,6 +46,9 @@ def worker(sock):
     signal.signal(SIGHUP, SIG_DFL)
     signal.signal(SIGCHLD, SIG_DFL)
     signal.signal(SIGTERM, SIG_DFL)
+    # restore the handler for SIGINT,
+    # it's useful for debugging (show the stacktrace before exit)
+    signal.signal(SIGINT, signal.default_int_handler)
 
     # Read the socket using fdopen instead of socket.makefile() because the latter
     # seems to be very slow; note that we need to dup() the file descriptor because
@@ -59,8 +62,7 @@ def worker(sock):
         exit_code = compute_real_exit_code(exc.code)
     finally:
         outfile.flush()
-        if exit_code:
-            os._exit(exit_code)
+    return exit_code
 
 
 # Cleanup zombie children
@@ -157,10 +159,13 @@ def handle_sigterm(*args):
                         outfile.flush()
                         outfile.close()
                         while True:
-                            worker(sock)
-                            if not reuse:
+                            code = worker(sock)
+                            if not reuse or code:
                                 # wait for closing
-                                while sock.recv(1024):
+                                try:
+                                    while sock.recv(1024):
+                                        pass
+                                except Exception:
                                     pass
                                 break
                             gc.collect()
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index 9c70fa5c16d0c..a0a028446d5fd 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -45,7 +45,9 @@ def launch_gateway():
             # Don't send ctrl-c / SIGINT to the Java gateway:
             def preexec_func():
                 signal.signal(signal.SIGINT, signal.SIG_IGN)
-            proc = Popen(command, stdout=PIPE, stdin=PIPE, preexec_fn=preexec_func)
+            env = dict(os.environ)
+            env["IS_SUBPROCESS"] = "1"  # tell JVM to exit after python exits
+            proc = Popen(command, stdout=PIPE, stdin=PIPE, preexec_fn=preexec_func, env=env)
         else:
             # preexec_fn not supported on Windows
             proc = Popen(command, stdout=PIPE, stdin=PIPE)
@@ -109,10 +111,9 @@ def run(self):
     java_import(gateway.jvm, "org.apache.spark.api.java.*")
     java_import(gateway.jvm, "org.apache.spark.api.python.*")
     java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*")
-    java_import(gateway.jvm, "org.apache.spark.sql.SQLContext")
-    java_import(gateway.jvm, "org.apache.spark.sql.hive.HiveContext")
-    java_import(gateway.jvm, "org.apache.spark.sql.hive.LocalHiveContext")
-    java_import(gateway.jvm, "org.apache.spark.sql.hive.TestHiveContext")
+    # TODO(davies): move into sql
+    java_import(gateway.jvm, "org.apache.spark.sql.*")
+    java_import(gateway.jvm, "org.apache.spark.sql.hive.*")
     java_import(gateway.jvm, "scala.Tuple2")
 
     return gateway
diff --git a/python/pyspark/ml/__init__.py b/python/pyspark/ml/__init__.py
new file mode 100644
index 0000000000000..47fed80f42e13
--- /dev/null
+++ b/python/pyspark/ml/__init__.py
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.ml.param import *
+from pyspark.ml.pipeline import *
+
+__all__ = ["Param", "Params", "Transformer", "Estimator", "Pipeline"]
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
new file mode 100644
index 0000000000000..6bd2aa8e47837
--- /dev/null
+++ b/python/pyspark/ml/classification.py
@@ -0,0 +1,76 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.ml.util import inherit_doc
+from pyspark.ml.wrapper import JavaEstimator, JavaModel
+from pyspark.ml.param.shared import HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,\
+    HasRegParam
+
+
+__all__ = ['LogisticRegression', 'LogisticRegressionModel']
+
+
+@inherit_doc
+class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
+                         HasRegParam):
+    """
+    Logistic regression.
+
+    >>> from pyspark.sql import Row
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> dataset = sqlCtx.inferSchema(sc.parallelize([ \
+            Row(label=1.0, features=Vectors.dense(1.0)), \
+            Row(label=0.0, features=Vectors.sparse(1, [], []))]))
+    >>> lr = LogisticRegression() \
+            .setMaxIter(5) \
+            .setRegParam(0.01)
+    >>> model = lr.fit(dataset)
+    >>> test0 = sqlCtx.inferSchema(sc.parallelize([Row(features=Vectors.dense(-1.0))]))
+    >>> print model.transform(test0).head().prediction
+    0.0
+    >>> test1 = sqlCtx.inferSchema(sc.parallelize([Row(features=Vectors.sparse(1, [0], [1.0]))]))
+    >>> print model.transform(test1).head().prediction
+    1.0
+    """
+    _java_class = "org.apache.spark.ml.classification.LogisticRegression"
+
+    def _create_model(self, java_model):
+        return LogisticRegressionModel(java_model)
+
+
+class LogisticRegressionModel(JavaModel):
+    """
+    Model fitted by LogisticRegression.
+    """
+
+
+if __name__ == "__main__":
+    import doctest
+    from pyspark.context import SparkContext
+    from pyspark.sql import SQLContext
+    globs = globals().copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    sc = SparkContext("local[2]", "ml.feature tests")
+    sqlCtx = SQLContext(sc)
+    globs['sc'] = sc
+    globs['sqlCtx'] = sqlCtx
+    (failure_count, test_count) = doctest.testmod(
+        globs=globs, optionflags=doctest.ELLIPSIS)
+    sc.stop()
+    if failure_count:
+        exit(-1)
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
new file mode 100644
index 0000000000000..e088acd0ca82d
--- /dev/null
+++ b/python/pyspark/ml/feature.py
@@ -0,0 +1,82 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.ml.param.shared import HasInputCol, HasOutputCol, HasNumFeatures
+from pyspark.ml.util import inherit_doc
+from pyspark.ml.wrapper import JavaTransformer
+
+__all__ = ['Tokenizer', 'HashingTF']
+
+
+@inherit_doc
+class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol):
+    """
+    A tokenizer that converts the input string to lowercase and then
+    splits it by white spaces.
+
+    >>> from pyspark.sql import Row
+    >>> dataset = sqlCtx.inferSchema(sc.parallelize([Row(text="a b c")]))
+    >>> tokenizer = Tokenizer() \
+            .setInputCol("text") \
+            .setOutputCol("words")
+    >>> print tokenizer.transform(dataset).head()
+    Row(text=u'a b c', words=[u'a', u'b', u'c'])
+    >>> print tokenizer.transform(dataset, {tokenizer.outputCol: "tokens"}).head()
+    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
+    """
+
+    _java_class = "org.apache.spark.ml.feature.Tokenizer"
+
+
+@inherit_doc
+class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
+    """
+    Maps a sequence of terms to their term frequencies using the
+    hashing trick.
+
+    >>> from pyspark.sql import Row
+    >>> dataset = sqlCtx.inferSchema(sc.parallelize([Row(words=["a", "b", "c"])]))
+    >>> hashingTF = HashingTF() \
+            .setNumFeatures(10) \
+            .setInputCol("words") \
+            .setOutputCol("features")
+    >>> print hashingTF.transform(dataset).head().features
+    (10,[7,8,9],[1.0,1.0,1.0])
+    >>> params = {hashingTF.numFeatures: 5, hashingTF.outputCol: "vector"}
+    >>> print hashingTF.transform(dataset, params).head().vector
+    (5,[2,3,4],[1.0,1.0,1.0])
+    """
+
+    _java_class = "org.apache.spark.ml.feature.HashingTF"
+
+
+if __name__ == "__main__":
+    import doctest
+    from pyspark.context import SparkContext
+    from pyspark.sql import SQLContext
+    globs = globals().copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    sc = SparkContext("local[2]", "ml.feature tests")
+    sqlCtx = SQLContext(sc)
+    globs['sc'] = sc
+    globs['sqlCtx'] = sqlCtx
+    (failure_count, test_count) = doctest.testmod(
+        globs=globs, optionflags=doctest.ELLIPSIS)
+    sc.stop()
+    if failure_count:
+        exit(-1)
diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
new file mode 100644
index 0000000000000..5566792cead48
--- /dev/null
+++ b/python/pyspark/ml/param/__init__.py
@@ -0,0 +1,82 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from abc import ABCMeta
+
+from pyspark.ml.util import Identifiable
+
+
+__all__ = ['Param', 'Params']
+
+
+class Param(object):
+    """
+    A param with self-contained documentation and optionally default value.
+    """
+
+    def __init__(self, parent, name, doc, defaultValue=None):
+        if not isinstance(parent, Identifiable):
+            raise ValueError("Parent must be identifiable but got type %s." % type(parent).__name__)
+        self.parent = parent
+        self.name = str(name)
+        self.doc = str(doc)
+        self.defaultValue = defaultValue
+
+    def __str__(self):
+        return str(self.parent) + "-" + self.name
+
+    def __repr__(self):
+        return "Param(parent=%r, name=%r, doc=%r, defaultValue=%r)" % \
+               (self.parent, self.name, self.doc, self.defaultValue)
+
+
+class Params(Identifiable):
+    """
+    Components that take parameters. This also provides an internal
+    param map to store parameter values attached to the instance.
+    """
+
+    __metaclass__ = ABCMeta
+
+    def __init__(self):
+        super(Params, self).__init__()
+        #: embedded param map
+        self.paramMap = {}
+
+    @property
+    def params(self):
+        """
+        Returns all params. The default implementation uses
+        :py:func:`dir` to get all attributes of type
+        :py:class:`Param`.
+        """
+        return filter(lambda attr: isinstance(attr, Param),
+                      [getattr(self, x) for x in dir(self) if x != "params"])
+
+    def _merge_params(self, params):
+        paramMap = self.paramMap.copy()
+        paramMap.update(params)
+        return paramMap
+
+    @staticmethod
+    def _dummy():
+        """
+        Returns a dummy Params instance used as a placeholder to generate docs.
+        """
+        dummy = Params()
+        dummy.uid = "undefined"
+        return dummy
diff --git a/python/pyspark/ml/param/_gen_shared_params.py b/python/pyspark/ml/param/_gen_shared_params.py
new file mode 100644
index 0000000000000..5eb81106f116c
--- /dev/null
+++ b/python/pyspark/ml/param/_gen_shared_params.py
@@ -0,0 +1,98 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+header = """#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#"""
+
+
+def _gen_param_code(name, doc, defaultValue):
+    """
+    Generates Python code for a shared param class.
+
+    :param name: param name
+    :param doc: param doc
+    :param defaultValue: string representation of the param
+    :return: code string
+    """
+    # TODO: How to correctly inherit instance attributes?
+    template = '''class Has$Name(Params):
+    """
+    Params with $name.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    $name = Param(Params._dummy(), "$name", "$doc", $defaultValue)
+
+    def __init__(self):
+        super(Has$Name, self).__init__()
+        #: param for $doc
+        self.$name = Param(self, "$name", "$doc", $defaultValue)
+
+    def set$Name(self, value):
+        """
+        Sets the value of :py:attr:`$name`.
+        """
+        self.paramMap[self.$name] = value
+        return self
+
+    def get$Name(self):
+        """
+        Gets the value of $name or its default value.
+        """
+        if self.$name in self.paramMap:
+            return self.paramMap[self.$name]
+        else:
+            return self.$name.defaultValue'''
+
+    upperCamelName = name[0].upper() + name[1:]
+    return template \
+        .replace("$name", name) \
+        .replace("$Name", upperCamelName) \
+        .replace("$doc", doc) \
+        .replace("$defaultValue", defaultValue)
+
+if __name__ == "__main__":
+    print header
+    print "\n# DO NOT MODIFY. The code is generated by _gen_shared_params.py.\n"
+    print "from pyspark.ml.param import Param, Params\n\n"
+    shared = [
+        ("maxIter", "max number of iterations", "100"),
+        ("regParam", "regularization constant", "0.1"),
+        ("featuresCol", "features column name", "'features'"),
+        ("labelCol", "label column name", "'label'"),
+        ("predictionCol", "prediction column name", "'prediction'"),
+        ("inputCol", "input column name", "'input'"),
+        ("outputCol", "output column name", "'output'"),
+        ("numFeatures", "number of features", "1 << 18")]
+    code = []
+    for name, doc, defaultValue in shared:
+        code.append(_gen_param_code(name, doc, defaultValue))
+    print "\n\n\n".join(code)
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
new file mode 100644
index 0000000000000..586822f2de423
--- /dev/null
+++ b/python/pyspark/ml/param/shared.py
@@ -0,0 +1,260 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# DO NOT MODIFY. The code is generated by _gen_shared_params.py.
+
+from pyspark.ml.param import Param, Params
+
+
+class HasMaxIter(Params):
+    """
+    Params with maxIter.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    maxIter = Param(Params._dummy(), "maxIter", "max number of iterations", 100)
+
+    def __init__(self):
+        super(HasMaxIter, self).__init__()
+        #: param for max number of iterations
+        self.maxIter = Param(self, "maxIter", "max number of iterations", 100)
+
+    def setMaxIter(self, value):
+        """
+        Sets the value of :py:attr:`maxIter`.
+        """
+        self.paramMap[self.maxIter] = value
+        return self
+
+    def getMaxIter(self):
+        """
+        Gets the value of maxIter or its default value.
+        """
+        if self.maxIter in self.paramMap:
+            return self.paramMap[self.maxIter]
+        else:
+            return self.maxIter.defaultValue
+
+
+class HasRegParam(Params):
+    """
+    Params with regParam.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    regParam = Param(Params._dummy(), "regParam", "regularization constant", 0.1)
+
+    def __init__(self):
+        super(HasRegParam, self).__init__()
+        #: param for regularization constant
+        self.regParam = Param(self, "regParam", "regularization constant", 0.1)
+
+    def setRegParam(self, value):
+        """
+        Sets the value of :py:attr:`regParam`.
+        """
+        self.paramMap[self.regParam] = value
+        return self
+
+    def getRegParam(self):
+        """
+        Gets the value of regParam or its default value.
+        """
+        if self.regParam in self.paramMap:
+            return self.paramMap[self.regParam]
+        else:
+            return self.regParam.defaultValue
+
+
+class HasFeaturesCol(Params):
+    """
+    Params with featuresCol.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    featuresCol = Param(Params._dummy(), "featuresCol", "features column name", 'features')
+
+    def __init__(self):
+        super(HasFeaturesCol, self).__init__()
+        #: param for features column name
+        self.featuresCol = Param(self, "featuresCol", "features column name", 'features')
+
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        self.paramMap[self.featuresCol] = value
+        return self
+
+    def getFeaturesCol(self):
+        """
+        Gets the value of featuresCol or its default value.
+        """
+        if self.featuresCol in self.paramMap:
+            return self.paramMap[self.featuresCol]
+        else:
+            return self.featuresCol.defaultValue
+
+
+class HasLabelCol(Params):
+    """
+    Params with labelCol.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    labelCol = Param(Params._dummy(), "labelCol", "label column name", 'label')
+
+    def __init__(self):
+        super(HasLabelCol, self).__init__()
+        #: param for label column name
+        self.labelCol = Param(self, "labelCol", "label column name", 'label')
+
+    def setLabelCol(self, value):
+        """
+        Sets the value of :py:attr:`labelCol`.
+        """
+        self.paramMap[self.labelCol] = value
+        return self
+
+    def getLabelCol(self):
+        """
+        Gets the value of labelCol or its default value.
+        """
+        if self.labelCol in self.paramMap:
+            return self.paramMap[self.labelCol]
+        else:
+            return self.labelCol.defaultValue
+
+
+class HasPredictionCol(Params):
+    """
+    Params with predictionCol.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    predictionCol = Param(Params._dummy(), "predictionCol", "prediction column name", 'prediction')
+
+    def __init__(self):
+        super(HasPredictionCol, self).__init__()
+        #: param for prediction column name
+        self.predictionCol = Param(self, "predictionCol", "prediction column name", 'prediction')
+
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        self.paramMap[self.predictionCol] = value
+        return self
+
+    def getPredictionCol(self):
+        """
+        Gets the value of predictionCol or its default value.
+        """
+        if self.predictionCol in self.paramMap:
+            return self.paramMap[self.predictionCol]
+        else:
+            return self.predictionCol.defaultValue
+
+
+class HasInputCol(Params):
+    """
+    Params with inputCol.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    inputCol = Param(Params._dummy(), "inputCol", "input column name", 'input')
+
+    def __init__(self):
+        super(HasInputCol, self).__init__()
+        #: param for input column name
+        self.inputCol = Param(self, "inputCol", "input column name", 'input')
+
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        self.paramMap[self.inputCol] = value
+        return self
+
+    def getInputCol(self):
+        """
+        Gets the value of inputCol or its default value.
+        """
+        if self.inputCol in self.paramMap:
+            return self.paramMap[self.inputCol]
+        else:
+            return self.inputCol.defaultValue
+
+
+class HasOutputCol(Params):
+    """
+    Params with outputCol.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    outputCol = Param(Params._dummy(), "outputCol", "output column name", 'output')
+
+    def __init__(self):
+        super(HasOutputCol, self).__init__()
+        #: param for output column name
+        self.outputCol = Param(self, "outputCol", "output column name", 'output')
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        self.paramMap[self.outputCol] = value
+        return self
+
+    def getOutputCol(self):
+        """
+        Gets the value of outputCol or its default value.
+        """
+        if self.outputCol in self.paramMap:
+            return self.paramMap[self.outputCol]
+        else:
+            return self.outputCol.defaultValue
+
+
+class HasNumFeatures(Params):
+    """
+    Params with numFeatures.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    numFeatures = Param(Params._dummy(), "numFeatures", "number of features", 1 << 18)
+
+    def __init__(self):
+        super(HasNumFeatures, self).__init__()
+        #: param for number of features
+        self.numFeatures = Param(self, "numFeatures", "number of features", 1 << 18)
+
+    def setNumFeatures(self, value):
+        """
+        Sets the value of :py:attr:`numFeatures`.
+        """
+        self.paramMap[self.numFeatures] = value
+        return self
+
+    def getNumFeatures(self):
+        """
+        Gets the value of numFeatures or its default value.
+        """
+        if self.numFeatures in self.paramMap:
+            return self.paramMap[self.numFeatures]
+        else:
+            return self.numFeatures.defaultValue
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
new file mode 100644
index 0000000000000..2d239f8c802a0
--- /dev/null
+++ b/python/pyspark/ml/pipeline.py
@@ -0,0 +1,154 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from abc import ABCMeta, abstractmethod
+
+from pyspark.ml.param import Param, Params
+from pyspark.ml.util import inherit_doc
+
+
+__all__ = ['Estimator', 'Transformer', 'Pipeline', 'PipelineModel']
+
+
+@inherit_doc
+class Estimator(Params):
+    """
+    Abstract class for estimators that fit models to data.
+    """
+
+    __metaclass__ = ABCMeta
+
+    @abstractmethod
+    def fit(self, dataset, params={}):
+        """
+        Fits a model to the input dataset with optional parameters.
+
+        :param dataset: input dataset, which is an instance of
+                        :py:class:`pyspark.sql.SchemaRDD`
+        :param params: an optional param map that overwrites embedded
+                       params
+        :returns: fitted model
+        """
+        raise NotImplementedError()
+
+
+@inherit_doc
+class Transformer(Params):
+    """
+    Abstract class for transformers that transform one dataset into
+    another.
+    """
+
+    __metaclass__ = ABCMeta
+
+    @abstractmethod
+    def transform(self, dataset, params={}):
+        """
+        Transforms the input dataset with optional parameters.
+
+        :param dataset: input dataset, which is an instance of
+                        :py:class:`pyspark.sql.SchemaRDD`
+        :param params: an optional param map that overwrites embedded
+                       params
+        :returns: transformed dataset
+        """
+        raise NotImplementedError()
+
+
+@inherit_doc
+class Pipeline(Estimator):
+    """
+    A simple pipeline, which acts as an estimator. A Pipeline consists
+    of a sequence of stages, each of which is either an
+    :py:class:`Estimator` or a :py:class:`Transformer`. When
+    :py:meth:`Pipeline.fit` is called, the stages are executed in
+    order. If a stage is an :py:class:`Estimator`, its
+    :py:meth:`Estimator.fit` method will be called on the input
+    dataset to fit a model. Then the model, which is a transformer,
+    will be used to transform the dataset as the input to the next
+    stage. If a stage is a :py:class:`Transformer`, its
+    :py:meth:`Transformer.transform` method will be called to produce
+    the dataset for the next stage. The fitted model from a
+    :py:class:`Pipeline` is an :py:class:`PipelineModel`, which
+    consists of fitted models and transformers, corresponding to the
+    pipeline stages. If there are no stages, the pipeline acts as an
+    identity transformer.
+    """
+
+    def __init__(self):
+        super(Pipeline, self).__init__()
+        #: Param for pipeline stages.
+        self.stages = Param(self, "stages", "pipeline stages")
+
+    def setStages(self, value):
+        """
+        Set pipeline stages.
+        :param value: a list of transformers or estimators
+        :return: the pipeline instance
+        """
+        self.paramMap[self.stages] = value
+        return self
+
+    def getStages(self):
+        """
+        Get pipeline stages.
+        """
+        if self.stages in self.paramMap:
+            return self.paramMap[self.stages]
+
+    def fit(self, dataset, params={}):
+        paramMap = self._merge_params(params)
+        stages = paramMap[self.stages]
+        for stage in stages:
+            if not (isinstance(stage, Estimator) or isinstance(stage, Transformer)):
+                raise ValueError(
+                    "Cannot recognize a pipeline stage of type %s." % type(stage).__name__)
+        indexOfLastEstimator = -1
+        for i, stage in enumerate(stages):
+            if isinstance(stage, Estimator):
+                indexOfLastEstimator = i
+        transformers = []
+        for i, stage in enumerate(stages):
+            if i <= indexOfLastEstimator:
+                if isinstance(stage, Transformer):
+                    transformers.append(stage)
+                    dataset = stage.transform(dataset, paramMap)
+                else:  # must be an Estimator
+                    model = stage.fit(dataset, paramMap)
+                    transformers.append(model)
+                    if i < indexOfLastEstimator:
+                        dataset = model.transform(dataset, paramMap)
+            else:
+                transformers.append(stage)
+        return PipelineModel(transformers)
+
+
+@inherit_doc
+class PipelineModel(Transformer):
+    """
+    Represents a compiled pipeline with transformers and fitted models.
+    """
+
+    def __init__(self, transformers):
+        super(PipelineModel, self).__init__()
+        self.transformers = transformers
+
+    def transform(self, dataset, params={}):
+        paramMap = self._merge_params(params)
+        for t in self.transformers:
+            dataset = t.transform(dataset, paramMap)
+        return dataset
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
new file mode 100644
index 0000000000000..b627c2b4e930b
--- /dev/null
+++ b/python/pyspark/ml/tests.py
@@ -0,0 +1,115 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Unit tests for Spark ML Python APIs.
+"""
+
+import sys
+
+if sys.version_info[:2] <= (2, 6):
+    try:
+        import unittest2 as unittest
+    except ImportError:
+        sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier')
+        sys.exit(1)
+else:
+    import unittest
+
+from pyspark.tests import ReusedPySparkTestCase as PySparkTestCase
+from pyspark.sql import DataFrame
+from pyspark.ml.param import Param
+from pyspark.ml.pipeline import Transformer, Estimator, Pipeline
+
+
+class MockDataset(DataFrame):
+
+    def __init__(self):
+        self.index = 0
+
+
+class MockTransformer(Transformer):
+
+    def __init__(self):
+        super(MockTransformer, self).__init__()
+        self.fake = Param(self, "fake", "fake", None)
+        self.dataset_index = None
+        self.fake_param_value = None
+
+    def transform(self, dataset, params={}):
+        self.dataset_index = dataset.index
+        if self.fake in params:
+            self.fake_param_value = params[self.fake]
+        dataset.index += 1
+        return dataset
+
+
+class MockEstimator(Estimator):
+
+    def __init__(self):
+        super(MockEstimator, self).__init__()
+        self.fake = Param(self, "fake", "fake", None)
+        self.dataset_index = None
+        self.fake_param_value = None
+        self.model = None
+
+    def fit(self, dataset, params={}):
+        self.dataset_index = dataset.index
+        if self.fake in params:
+            self.fake_param_value = params[self.fake]
+        model = MockModel()
+        self.model = model
+        return model
+
+
+class MockModel(MockTransformer, Transformer):
+
+    def __init__(self):
+        super(MockModel, self).__init__()
+
+
+class PipelineTests(PySparkTestCase):
+
+    def test_pipeline(self):
+        dataset = MockDataset()
+        estimator0 = MockEstimator()
+        transformer1 = MockTransformer()
+        estimator2 = MockEstimator()
+        transformer3 = MockTransformer()
+        pipeline = Pipeline() \
+            .setStages([estimator0, transformer1, estimator2, transformer3])
+        pipeline_model = pipeline.fit(dataset, {estimator0.fake: 0, transformer1.fake: 1})
+        self.assertEqual(0, estimator0.dataset_index)
+        self.assertEqual(0, estimator0.fake_param_value)
+        model0 = estimator0.model
+        self.assertEqual(0, model0.dataset_index)
+        self.assertEqual(1, transformer1.dataset_index)
+        self.assertEqual(1, transformer1.fake_param_value)
+        self.assertEqual(2, estimator2.dataset_index)
+        model2 = estimator2.model
+        self.assertIsNone(model2.dataset_index, "The model produced by the last estimator should "
+                                                "not be called during fit.")
+        dataset = pipeline_model.transform(dataset)
+        self.assertEqual(2, model0.dataset_index)
+        self.assertEqual(3, transformer1.dataset_index)
+        self.assertEqual(4, model2.dataset_index)
+        self.assertEqual(5, transformer3.dataset_index)
+        self.assertEqual(6, dataset.index)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
new file mode 100644
index 0000000000000..b1caa84b6306a
--- /dev/null
+++ b/python/pyspark/ml/util.py
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import uuid
+
+
+def inherit_doc(cls):
+    for name, func in vars(cls).items():
+        # only inherit docstring for public functions
+        if name.startswith("_"):
+            continue
+        if not func.__doc__:
+            for parent in cls.__bases__:
+                parent_func = getattr(parent, name, None)
+                if parent_func and getattr(parent_func, "__doc__", None):
+                    func.__doc__ = parent_func.__doc__
+                    break
+    return cls
+
+
+class Identifiable(object):
+    """
+    Object with a unique ID.
+    """
+
+    def __init__(self):
+        #: A unique id for the object. The default implementation
+        #: concatenates the class name, "-", and 8 random hex chars.
+        self.uid = type(self).__name__ + "-" + uuid.uuid4().hex[:8]
+
+    def __repr__(self):
+        return self.uid
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
new file mode 100644
index 0000000000000..9e12ddc3d9b8f
--- /dev/null
+++ b/python/pyspark/ml/wrapper.py
@@ -0,0 +1,149 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from abc import ABCMeta
+
+from pyspark import SparkContext
+from pyspark.sql import DataFrame
+from pyspark.ml.param import Params
+from pyspark.ml.pipeline import Estimator, Transformer
+from pyspark.ml.util import inherit_doc
+
+
+def _jvm():
+    """
+    Returns the JVM view associated with SparkContext. Must be called
+    after SparkContext is initialized.
+    """
+    jvm = SparkContext._jvm
+    if jvm:
+        return jvm
+    else:
+        raise AttributeError("Cannot load _jvm from SparkContext. Is SparkContext initialized?")
+
+
+@inherit_doc
+class JavaWrapper(Params):
+    """
+    Utility class to help create wrapper classes from Java/Scala
+    implementations of pipeline components.
+    """
+
+    __metaclass__ = ABCMeta
+
+    #: Fully-qualified class name of the wrapped Java component.
+    _java_class = None
+
+    def _java_obj(self):
+        """
+        Returns or creates a Java object.
+        """
+        java_obj = _jvm()
+        for name in self._java_class.split("."):
+            java_obj = getattr(java_obj, name)
+        return java_obj()
+
+    def _transfer_params_to_java(self, params, java_obj):
+        """
+        Transforms the embedded params and additional params to the
+        input Java object.
+        :param params: additional params (overwriting embedded values)
+        :param java_obj: Java object to receive the params
+        """
+        paramMap = self._merge_params(params)
+        for param in self.params:
+            if param in paramMap:
+                java_obj.set(param.name, paramMap[param])
+
+    def _empty_java_param_map(self):
+        """
+        Returns an empty Java ParamMap reference.
+        """
+        return _jvm().org.apache.spark.ml.param.ParamMap()
+
+    def _create_java_param_map(self, params, java_obj):
+        paramMap = self._empty_java_param_map()
+        for param, value in params.items():
+            if param.parent is self:
+                paramMap.put(java_obj.getParam(param.name), value)
+        return paramMap
+
+
+@inherit_doc
+class JavaEstimator(Estimator, JavaWrapper):
+    """
+    Base class for :py:class:`Estimator`s that wrap Java/Scala
+    implementations.
+    """
+
+    __metaclass__ = ABCMeta
+
+    def _create_model(self, java_model):
+        """
+        Creates a model from the input Java model reference.
+        """
+        return JavaModel(java_model)
+
+    def _fit_java(self, dataset, params={}):
+        """
+        Fits a Java model to the input dataset.
+        :param dataset: input dataset, which is an instance of
+                        :py:class:`pyspark.sql.SchemaRDD`
+        :param params: additional params (overwriting embedded values)
+        :return: fitted Java model
+        """
+        java_obj = self._java_obj()
+        self._transfer_params_to_java(params, java_obj)
+        return java_obj.fit(dataset._jdf, self._empty_java_param_map())
+
+    def fit(self, dataset, params={}):
+        java_model = self._fit_java(dataset, params)
+        return self._create_model(java_model)
+
+
+@inherit_doc
+class JavaTransformer(Transformer, JavaWrapper):
+    """
+    Base class for :py:class:`Transformer`s that wrap Java/Scala
+    implementations.
+    """
+
+    __metaclass__ = ABCMeta
+
+    def transform(self, dataset, params={}):
+        java_obj = self._java_obj()
+        self._transfer_params_to_java({}, java_obj)
+        java_param_map = self._create_java_param_map(params, java_obj)
+        return DataFrame(java_obj.transform(dataset._jdf, java_param_map),
+                         dataset.sql_ctx)
+
+
+@inherit_doc
+class JavaModel(JavaTransformer):
+    """
+    Base class for :py:class:`Model`s that wrap Java/Scala
+    implementations.
+    """
+
+    __metaclass__ = ABCMeta
+
+    def __init__(self, java_model):
+        super(JavaTransformer, self).__init__()
+        self._java_model = java_model
+
+    def _java_obj(self):
+        return self._java_model
diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py
index 4149f54931d1f..c3217620e3c4e 100644
--- a/python/pyspark/mllib/__init__.py
+++ b/python/pyspark/mllib/__init__.py
@@ -24,3 +24,12 @@
 import numpy
 if numpy.version.version < '1.4':
     raise Exception("MLlib requires NumPy 1.4+")
+
+__all__ = ['classification', 'clustering', 'feature', 'linalg', 'random',
+           'recommendation', 'regression', 'stat', 'tree', 'util']
+
+import sys
+import rand as random
+random.__name__ = 'random'
+random.RandomRDDs.__module__ = __name__ + '.random'
+sys.modules[__name__ + '.random'] = random
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index e295c9d0954d9..00e2e76711e84 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -20,99 +20,200 @@
 import numpy
 from numpy import array
 
-from pyspark import SparkContext, PickleSerializer
-from pyspark.mllib.linalg import SparseVector, _convert_to_vector, _to_java_object_rdd
+from pyspark import RDD
+from pyspark.mllib.common import callMLlibFunc
+from pyspark.mllib.linalg import SparseVector, _convert_to_vector
 from pyspark.mllib.regression import LabeledPoint, LinearModel, _regression_train_wrapper
 
 
-__all__ = ['LogisticRegressionModel', 'LogisticRegressionWithSGD', 'SVMModel',
-           'SVMWithSGD', 'NaiveBayesModel', 'NaiveBayes']
+__all__ = ['LogisticRegressionModel', 'LogisticRegressionWithSGD', 'LogisticRegressionWithLBFGS',
+           'SVMModel', 'SVMWithSGD', 'NaiveBayesModel', 'NaiveBayes']
 
 
-class LogisticRegressionModel(LinearModel):
+class LinearBinaryClassificationModel(LinearModel):
+    """
+    Represents a linear binary classification model that predicts to whether an
+    example is positive (1.0) or negative (0.0).
+    """
+    def __init__(self, weights, intercept):
+        super(LinearBinaryClassificationModel, self).__init__(weights, intercept)
+        self._threshold = None
+
+    def setThreshold(self, value):
+        """
+        .. note:: Experimental
+
+        Sets the threshold that separates positive predictions from negative
+        predictions. An example with prediction score greater than or equal
+        to this threshold is identified as an positive, and negative otherwise.
+        """
+        self._threshold = value
+
+    def clearThreshold(self):
+        """
+        .. note:: Experimental
+
+        Clears the threshold so that `predict` will output raw prediction scores.
+        """
+        self._threshold = None
+
+    def predict(self, test):
+        """
+        Predict values for a single data point or an RDD of points using
+        the model trained.
+        """
+        raise NotImplementedError
+
+
+class LogisticRegressionModel(LinearBinaryClassificationModel):
 
     """A linear binary classification model derived from logistic regression.
 
     >>> data = [
-    ...     LabeledPoint(0.0, [0.0]),
-    ...     LabeledPoint(1.0, [1.0]),
-    ...     LabeledPoint(1.0, [2.0]),
-    ...     LabeledPoint(1.0, [3.0])
+    ...     LabeledPoint(0.0, [0.0, 1.0]),
+    ...     LabeledPoint(1.0, [1.0, 0.0]),
     ... ]
     >>> lrm = LogisticRegressionWithSGD.train(sc.parallelize(data))
-    >>> lrm.predict(array([1.0])) > 0
-    True
-    >>> lrm.predict(array([0.0])) <= 0
-    True
+    >>> lrm.predict([1.0, 0.0])
+    1
+    >>> lrm.predict([0.0, 1.0])
+    0
+    >>> lrm.predict(sc.parallelize([[1.0, 0.0], [0.0, 1.0]])).collect()
+    [1, 0]
+    >>> lrm.clearThreshold()
+    >>> lrm.predict([0.0, 1.0])
+    0.123...
+
     >>> sparse_data = [
     ...     LabeledPoint(0.0, SparseVector(2, {0: 0.0})),
     ...     LabeledPoint(1.0, SparseVector(2, {1: 1.0})),
-    ...     LabeledPoint(0.0, SparseVector(2, {0: 0.0})),
+    ...     LabeledPoint(0.0, SparseVector(2, {0: 1.0})),
     ...     LabeledPoint(1.0, SparseVector(2, {1: 2.0}))
     ... ]
     >>> lrm = LogisticRegressionWithSGD.train(sc.parallelize(sparse_data))
-    >>> lrm.predict(array([0.0, 1.0])) > 0
-    True
-    >>> lrm.predict(array([0.0, 0.0])) <= 0
-    True
-    >>> lrm.predict(SparseVector(2, {1: 1.0})) > 0
-    True
-    >>> lrm.predict(SparseVector(2, {1: 0.0})) <= 0
-    True
+    >>> lrm.predict(array([0.0, 1.0]))
+    1
+    >>> lrm.predict(array([1.0, 0.0]))
+    0
+    >>> lrm.predict(SparseVector(2, {1: 1.0}))
+    1
+    >>> lrm.predict(SparseVector(2, {0: 1.0}))
+    0
     """
+    def __init__(self, weights, intercept):
+        super(LogisticRegressionModel, self).__init__(weights, intercept)
+        self._threshold = 0.5
 
     def predict(self, x):
+        """
+        Predict values for a single data point or an RDD of points using
+        the model trained.
+        """
+        if isinstance(x, RDD):
+            return x.map(lambda v: self.predict(v))
+
+        x = _convert_to_vector(x)
         margin = self.weights.dot(x) + self._intercept
         if margin > 0:
             prob = 1 / (1 + exp(-margin))
         else:
             exp_margin = exp(margin)
             prob = exp_margin / (1 + exp_margin)
-        return 1 if prob > 0.5 else 0
+        if self._threshold is None:
+            return prob
+        else:
+            return 1 if prob > self._threshold else 0
 
 
 class LogisticRegressionWithSGD(object):
 
     @classmethod
     def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
-              initialWeights=None, regParam=1.0, regType="none", intercept=False):
+              initialWeights=None, regParam=0.01, regType="l2", intercept=False):
         """
         Train a logistic regression model on the given data.
 
-        :param data:              The training data.
+        :param data:              The training data, an RDD of LabeledPoint.
         :param iterations:        The number of iterations (default: 100).
         :param step:              The step parameter used in SGD
                                   (default: 1.0).
         :param miniBatchFraction: Fraction of data to be used for each SGD
                                   iteration.
         :param initialWeights:    The initial weights (default: None).
-        :param regParam:          The regularizer parameter (default: 1.0).
+        :param regParam:          The regularizer parameter (default: 0.01).
         :param regType:           The type of regularizer used for training
                                   our model.
 
                                   :Allowed values:
-                                     - "l1" for using L1Updater
-                                     - "l2" for using SquaredL2Updater
-                                     - "none" for no regularizer
+                                     - "l1" for using L1 regularization
+                                     - "l2" for using L2 regularization
+                                     - None for no regularization
 
-                                     (default: "none")
+                                     (default: "l2")
 
-        @param intercept:         Boolean parameter which indicates the use
+        :param intercept:         Boolean parameter which indicates the use
                                   or not of the augmented representation for
                                   training data (i.e. whether bias features
                                   are activated or not).
         """
-        sc = data.context
+        def train(rdd, i):
+            return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations),
+                                 float(step), float(miniBatchFraction), i, float(regParam), regType,
+                                 bool(intercept))
 
-        def train(jdata, i):
-            return sc._jvm.PythonMLLibAPI().trainLogisticRegressionModelWithSGD(
-                jdata, iterations, step, miniBatchFraction, i, regParam, regType, intercept)
+        return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
 
-        return _regression_train_wrapper(sc, train, LogisticRegressionModel, data,
-                                         initialWeights)
 
+class LogisticRegressionWithLBFGS(object):
 
-class SVMModel(LinearModel):
+    @classmethod
+    def train(cls, data, iterations=100, initialWeights=None, regParam=0.01, regType="l2",
+              intercept=False, corrections=10, tolerance=1e-4):
+        """
+        Train a logistic regression model on the given data.
+
+        :param data:           The training data, an RDD of LabeledPoint.
+        :param iterations:     The number of iterations (default: 100).
+        :param initialWeights: The initial weights (default: None).
+        :param regParam:       The regularizer parameter (default: 0.01).
+        :param regType:        The type of regularizer used for training
+                               our model.
+
+                               :Allowed values:
+                                 - "l1" for using L1 regularization
+                                 - "l2" for using L2 regularization
+                                 - None for no regularization
+
+                                 (default: "l2")
+
+        :param intercept:      Boolean parameter which indicates the use
+                               or not of the augmented representation for
+                               training data (i.e. whether bias features
+                               are activated or not).
+        :param corrections:    The number of corrections used in the LBFGS
+                               update (default: 10).
+        :param tolerance:      The convergence tolerance of iterations for
+                               L-BFGS (default: 1e-4).
+
+        >>> data = [
+        ...     LabeledPoint(0.0, [0.0, 1.0]),
+        ...     LabeledPoint(1.0, [1.0, 0.0]),
+        ... ]
+        >>> lrm = LogisticRegressionWithLBFGS.train(sc.parallelize(data))
+        >>> lrm.predict([1.0, 0.0])
+        1
+        >>> lrm.predict([0.0, 1.0])
+        0
+        """
+        def train(rdd, i):
+            return callMLlibFunc("trainLogisticRegressionModelWithLBFGS", rdd, int(iterations), i,
+                                 float(regParam), str(regType), bool(intercept), int(corrections),
+                                 float(tolerance))
+
+        return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
+
+
+class SVMModel(LinearBinaryClassificationModel):
 
     """A support vector machine.
 
@@ -123,8 +224,14 @@ class SVMModel(LinearModel):
     ...     LabeledPoint(1.0, [3.0])
     ... ]
     >>> svm = SVMWithSGD.train(sc.parallelize(data))
-    >>> svm.predict(array([1.0])) > 0
-    True
+    >>> svm.predict([1.0])
+    1
+    >>> svm.predict(sc.parallelize([[1.0]])).collect()
+    [1]
+    >>> svm.clearThreshold()
+    >>> svm.predict(array([1.0]))
+    1.25...
+
     >>> sparse_data = [
     ...     LabeledPoint(0.0, SparseVector(2, {0: -1.0})),
     ...     LabeledPoint(1.0, SparseVector(2, {1: 1.0})),
@@ -132,30 +239,44 @@ class SVMModel(LinearModel):
     ...     LabeledPoint(1.0, SparseVector(2, {1: 2.0}))
     ... ]
     >>> svm = SVMWithSGD.train(sc.parallelize(sparse_data))
-    >>> svm.predict(SparseVector(2, {1: 1.0})) > 0
-    True
-    >>> svm.predict(SparseVector(2, {0: -1.0})) <= 0
-    True
+    >>> svm.predict(SparseVector(2, {1: 1.0}))
+    1
+    >>> svm.predict(SparseVector(2, {0: -1.0}))
+    0
     """
+    def __init__(self, weights, intercept):
+        super(SVMModel, self).__init__(weights, intercept)
+        self._threshold = 0.0
 
     def predict(self, x):
+        """
+        Predict values for a single data point or an RDD of points using
+        the model trained.
+        """
+        if isinstance(x, RDD):
+            return x.map(lambda v: self.predict(v))
+
+        x = _convert_to_vector(x)
         margin = self.weights.dot(x) + self.intercept
-        return 1 if margin >= 0 else 0
+        if self._threshold is None:
+            return margin
+        else:
+            return 1 if margin > self._threshold else 0
 
 
 class SVMWithSGD(object):
 
     @classmethod
-    def train(cls, data, iterations=100, step=1.0, regParam=1.0,
-              miniBatchFraction=1.0, initialWeights=None, regType="none", intercept=False):
+    def train(cls, data, iterations=100, step=1.0, regParam=0.01,
+              miniBatchFraction=1.0, initialWeights=None, regType="l2", intercept=False):
         """
         Train a support vector machine on the given data.
 
-        :param data:              The training data.
+        :param data:              The training data, an RDD of LabeledPoint.
         :param iterations:        The number of iterations (default: 100).
         :param step:              The step parameter used in SGD
                                   (default: 1.0).
-        :param regParam:          The regularizer parameter (default: 1.0).
+        :param regParam:          The regularizer parameter (default: 0.01).
         :param miniBatchFraction: Fraction of data to be used for each SGD
                                   iteration.
         :param initialWeights:    The initial weights (default: None).
@@ -163,24 +284,23 @@ def train(cls, data, iterations=100, step=1.0, regParam=1.0,
                                   our model.
 
                                   :Allowed values:
-                                     - "l1" for using L1Updater
-                                     - "l2" for using SquaredL2Updater,
-                                     - "none" for no regularizer.
+                                     - "l1" for using L1 regularization
+                                     - "l2" for using L2 regularization
+                                     - None for no regularization
 
-                                     (default: "none")
+                                     (default: "l2")
 
-        @param intercept:         Boolean parameter which indicates the use
+        :param intercept:         Boolean parameter which indicates the use
                                   or not of the augmented representation for
                                   training data (i.e. whether bias features
                                   are activated or not).
         """
-        sc = data.context
-
-        def train(jrdd, i):
-            return sc._jvm.PythonMLLibAPI().trainSVMModelWithSGD(
-                jrdd, iterations, step, regParam, miniBatchFraction, i, regType, intercept)
+        def train(rdd, i):
+            return callMLlibFunc("trainSVMModelWithSGD", rdd, int(iterations), float(step),
+                                 float(regParam), float(miniBatchFraction), i, regType,
+                                 bool(intercept))
 
-        return _regression_train_wrapper(sc, train, SVMModel, data, initialWeights)
+        return _regression_train_wrapper(train, SVMModel, data, initialWeights)
 
 
 class NaiveBayesModel(object):
@@ -202,6 +322,8 @@ class NaiveBayesModel(object):
     0.0
     >>> model.predict(array([1.0, 0.0]))
     1.0
+    >>> model.predict(sc.parallelize([[1.0, 0.0]])).collect()
+    [1.0]
     >>> sparse_data = [
     ...     LabeledPoint(0.0, SparseVector(2, {1: 0.0})),
     ...     LabeledPoint(0.0, SparseVector(2, {1: 1.0})),
@@ -220,7 +342,9 @@ def __init__(self, labels, pi, theta):
         self.theta = theta
 
     def predict(self, x):
-        """Return the most likely class for a data vector x"""
+        """Return the most likely class for a data vector or an RDD of vectors"""
+        if isinstance(x, RDD):
+            return x.map(lambda v: self.predict(v))
         x = _convert_to_vector(x)
         return self.labels[numpy.argmax(self.pi + x.dot(self.theta.transpose()))]
 
@@ -238,20 +362,21 @@ def train(cls, data, lambda_=1.0):
         classification.  By making every vector a 0-1 vector, it can also be
         used as Bernoulli NB (U{http://tinyurl.com/p7c96j6}).
 
-        :param data: RDD of NumPy vectors, one per element, where the first
-               coordinate is the label and the rest is the feature vector
-               (e.g. a count vector).
+        :param data: RDD of LabeledPoint.
         :param lambda_: The smoothing parameter
         """
-        sc = data.context
-        jlist = sc._jvm.PythonMLLibAPI().trainNaiveBayes(_to_java_object_rdd(data), lambda_)
-        labels, pi, theta = PickleSerializer().loads(str(sc._jvm.SerDe.dumps(jlist)))
+        first = data.first()
+        if not isinstance(first, LabeledPoint):
+            raise ValueError("`data` should be an RDD of LabeledPoint")
+        labels, pi, theta = callMLlibFunc("trainNaiveBayes", data, lambda_)
         return NaiveBayesModel(labels.toArray(), pi.toArray(), numpy.array(theta))
 
 
 def _test():
     import doctest
-    globs = globals().copy()
+    from pyspark import SparkContext
+    import pyspark.mllib.classification
+    globs = pyspark.mllib.classification.__dict__.copy()
     globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
     (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
     globs['sc'].stop()
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index 5ee7997104d21..f6b97abb1723c 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -15,19 +15,22 @@
 # limitations under the License.
 #
 
+from numpy import array
+
+from pyspark import RDD
 from pyspark import SparkContext
-from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
-from pyspark.mllib.linalg import SparseVector, _convert_to_vector, _to_java_object_rdd
+from pyspark.mllib.common import callMLlibFunc, callJavaFunc
+from pyspark.mllib.linalg import DenseVector, SparseVector, _convert_to_vector
+from pyspark.mllib.stat.distribution import MultivariateGaussian
 
-__all__ = ['KMeansModel', 'KMeans']
+__all__ = ['KMeansModel', 'KMeans', 'GaussianMixtureModel', 'GaussianMixture']
 
 
 class KMeansModel(object):
 
     """A clustering model derived from the k-means method.
 
-    >>> from numpy import array
-    >>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4,2)
+    >>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4, 2)
     >>> model = KMeans.train(
     ...     sc.parallelize(data), 2, maxIterations=10, runs=30, initializationMode="random")
     >>> model.predict(array([0.0, 0.0])) == model.predict(array([1.0, 1.0]))
@@ -78,19 +81,95 @@ def predict(self, x):
 class KMeans(object):
 
     @classmethod
-    def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||"):
+    def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||", seed=None):
         """Train a k-means clustering model."""
-        sc = rdd.context
-        ser = PickleSerializer()
-        # cache serialized data to avoid objects over head in JVM
-        cached = rdd.map(_convert_to_vector)._reserialize(AutoBatchedSerializer(ser)).cache()
-        model = sc._jvm.PythonMLLibAPI().trainKMeansModel(
-            _to_java_object_rdd(cached), k, maxIterations, runs, initializationMode)
-        bytes = sc._jvm.SerDe.dumps(model.clusterCenters())
-        centers = ser.loads(str(bytes))
+        model = callMLlibFunc("trainKMeansModel", rdd.map(_convert_to_vector), k, maxIterations,
+                              runs, initializationMode, seed)
+        centers = callJavaFunc(rdd.context, model.clusterCenters)
         return KMeansModel([c.toArray() for c in centers])
 
 
+class GaussianMixtureModel(object):
+
+    """A clustering model derived from the Gaussian Mixture Model method.
+
+    >>> clusterdata_1 =  sc.parallelize(array([-0.1,-0.05,-0.01,-0.1,
+    ...                                         0.9,0.8,0.75,0.935,
+    ...                                        -0.83,-0.68,-0.91,-0.76 ]).reshape(6, 2))
+    >>> model = GaussianMixture.train(clusterdata_1, 3, convergenceTol=0.0001,
+    ...                                 maxIterations=50, seed=10)
+    >>> labels = model.predict(clusterdata_1).collect()
+    >>> labels[0]==labels[1]
+    False
+    >>> labels[1]==labels[2]
+    True
+    >>> labels[4]==labels[5]
+    True
+    >>> clusterdata_2 =  sc.parallelize(array([-5.1971, -2.5359, -3.8220,
+    ...                                        -5.2211, -5.0602,  4.7118,
+    ...                                         6.8989, 3.4592,  4.6322,
+    ...                                         5.7048,  4.6567, 5.5026,
+    ...                                         4.5605,  5.2043,  6.2734]).reshape(5, 3))
+    >>> model = GaussianMixture.train(clusterdata_2, 2, convergenceTol=0.0001,
+    ...                                 maxIterations=150, seed=10)
+    >>> labels = model.predict(clusterdata_2).collect()
+    >>> labels[0]==labels[1]==labels[2]
+    True
+    >>> labels[3]==labels[4]
+    True
+    """
+
+    def __init__(self, weights, gaussians):
+        self.weights = weights
+        self.gaussians = gaussians
+        self.k = len(self.weights)
+
+    def predict(self, x):
+        """
+        Find the cluster to which the points in 'x' has maximum membership
+        in this model.
+
+        :param x:    RDD of data points.
+        :return:     cluster_labels. RDD of cluster labels.
+        """
+        if isinstance(x, RDD):
+            cluster_labels = self.predictSoft(x).map(lambda z: z.index(max(z)))
+            return cluster_labels
+
+    def predictSoft(self, x):
+        """
+        Find the membership of each point in 'x' to all mixture components.
+
+        :param x:    RDD of data points.
+        :return:     membership_matrix. RDD of array of double values.
+        """
+        if isinstance(x, RDD):
+            means, sigmas = zip(*[(g.mu, g.sigma) for g in self.gaussians])
+            membership_matrix = callMLlibFunc("predictSoftGMM", x.map(_convert_to_vector),
+                                              self.weights, means, sigmas)
+            return membership_matrix
+
+
+class GaussianMixture(object):
+    """
+    Estimate model parameters with the expectation-maximization algorithm.
+
+    :param data:            RDD of data points
+    :param k:               Number of components
+    :param convergenceTol:  Threshold value to check the convergence criteria. Defaults to 1e-3
+    :param maxIterations:   Number of iterations. Default to 100
+    :param seed:            Random Seed
+    """
+    @classmethod
+    def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None):
+        """Train a Gaussian Mixture clustering model."""
+        weight, mu, sigma = callMLlibFunc("trainGaussianMixture",
+                                          rdd.map(_convert_to_vector), k,
+                                          convergenceTol, maxIterations, seed)
+        mvg_obj = [MultivariateGaussian(mu[i], sigma[i]) for i in range(k)]
+        return GaussianMixtureModel(weight, mvg_obj)
+
+
 def _test():
     import doctest
     globs = globals().copy()
diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py
new file mode 100644
index 0000000000000..3c5ee66cd8b64
--- /dev/null
+++ b/python/pyspark/mllib/common.py
@@ -0,0 +1,136 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import py4j.protocol
+from py4j.protocol import Py4JJavaError
+from py4j.java_gateway import JavaObject
+from py4j.java_collections import ListConverter, JavaArray, JavaList
+
+from pyspark import RDD, SparkContext
+from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
+
+
+# Hack for support float('inf') in Py4j
+_old_smart_decode = py4j.protocol.smart_decode
+
+_float_str_mapping = {
+    'nan': 'NaN',
+    'inf': 'Infinity',
+    '-inf': '-Infinity',
+}
+
+
+def _new_smart_decode(obj):
+    if isinstance(obj, float):
+        s = unicode(obj)
+        return _float_str_mapping.get(s, s)
+    return _old_smart_decode(obj)
+
+py4j.protocol.smart_decode = _new_smart_decode
+
+
+_picklable_classes = [
+    'LinkedList',
+    'SparseVector',
+    'DenseVector',
+    'DenseMatrix',
+    'Rating',
+    'LabeledPoint',
+]
+
+
+# this will call the MLlib version of pythonToJava()
+def _to_java_object_rdd(rdd):
+    """ Return an JavaRDD of Object by unpickling
+
+    It will convert each Python object into Java object by Pyrolite, whenever the
+    RDD is serialized in batch or not.
+    """
+    rdd = rdd._reserialize(AutoBatchedSerializer(PickleSerializer()))
+    return rdd.ctx._jvm.SerDe.pythonToJava(rdd._jrdd, True)
+
+
+def _py2java(sc, obj):
+    """ Convert Python object into Java """
+    if isinstance(obj, RDD):
+        obj = _to_java_object_rdd(obj)
+    elif isinstance(obj, SparkContext):
+        obj = obj._jsc
+    elif isinstance(obj, list) and (obj or isinstance(obj[0], JavaObject)):
+        obj = ListConverter().convert(obj, sc._gateway._gateway_client)
+    elif isinstance(obj, JavaObject):
+        pass
+    elif isinstance(obj, (int, long, float, bool, basestring)):
+        pass
+    else:
+        bytes = bytearray(PickleSerializer().dumps(obj))
+        obj = sc._jvm.SerDe.loads(bytes)
+    return obj
+
+
+def _java2py(sc, r):
+    if isinstance(r, JavaObject):
+        clsName = r.getClass().getSimpleName()
+        # convert RDD into JavaRDD
+        if clsName != 'JavaRDD' and clsName.endswith("RDD"):
+            r = r.toJavaRDD()
+            clsName = 'JavaRDD'
+
+        if clsName == 'JavaRDD':
+            jrdd = sc._jvm.SerDe.javaToPython(r)
+            return RDD(jrdd, sc)
+
+        if clsName in _picklable_classes:
+            r = sc._jvm.SerDe.dumps(r)
+        elif isinstance(r, (JavaArray, JavaList)):
+            try:
+                r = sc._jvm.SerDe.dumps(r)
+            except Py4JJavaError:
+                pass  # not pickable
+
+    if isinstance(r, bytearray):
+        r = PickleSerializer().loads(str(r))
+    return r
+
+
+def callJavaFunc(sc, func, *args):
+    """ Call Java Function """
+    args = [_py2java(sc, a) for a in args]
+    return _java2py(sc, func(*args))
+
+
+def callMLlibFunc(name, *args):
+    """ Call API in PythonMLLibAPI """
+    sc = SparkContext._active_spark_context
+    api = getattr(sc._jvm.PythonMLLibAPI(), name)
+    return callJavaFunc(sc, api, *args)
+
+
+class JavaModelWrapper(object):
+    """
+    Wrapper for the model in JVM
+    """
+    def __init__(self, java_model):
+        self._sc = SparkContext._active_spark_context
+        self._java_model = java_model
+
+    def __del__(self):
+        self._sc._gateway.detach(self._java_model)
+
+    def call(self, name, *a):
+        """Call method of java_model"""
+        return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index b5a3f22c6907e..10df6288065b8 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -18,59 +18,302 @@
 """
 Python package for feature in MLlib.
 """
-from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
-from pyspark.mllib.linalg import _convert_to_vector, _to_java_object_rdd
+from __future__ import absolute_import
 
-__all__ = ['Word2Vec', 'Word2VecModel']
+import sys
+import warnings
+import random
 
+from py4j.protocol import Py4JJavaError
 
-class Word2VecModel(object):
+from pyspark import RDD, SparkContext
+from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
+from pyspark.mllib.linalg import Vectors, Vector, _convert_to_vector
+
+__all__ = ['Normalizer', 'StandardScalerModel', 'StandardScaler',
+           'HashingTF', 'IDFModel', 'IDF', 'Word2Vec', 'Word2VecModel']
+
+
+class VectorTransformer(object):
     """
-    class for Word2Vec model
+    .. note:: DeveloperApi
+
+    Base class for transformation of a vector or RDD of vector
     """
-    def __init__(self, sc, java_model):
+    def transform(self, vector):
         """
-        :param sc:  Spark context
-        :param java_model:  Handle to Java model object
+        Applies transformation on a vector.
+
+        :param vector: vector to be transformed.
         """
-        self._sc = sc
-        self._java_model = java_model
+        raise NotImplementedError
 
-    def __del__(self):
-        self._sc._gateway.detach(self._java_model)
 
-    def transform(self, word):
+class Normalizer(VectorTransformer):
+    """
+    .. note:: Experimental
+
+    Normalizes samples individually to unit L\ :sup:`p`\  norm
+
+    For any 1 <= `p` < float('inf'), normalizes samples using
+    sum(abs(vector) :sup:`p`) :sup:`(1/p)` as norm.
+
+    For `p` = float('inf'), max(abs(vector)) will be used as norm for normalization.
+
+    >>> v = Vectors.dense(range(3))
+    >>> nor = Normalizer(1)
+    >>> nor.transform(v)
+    DenseVector([0.0, 0.3333, 0.6667])
+
+    >>> rdd = sc.parallelize([v])
+    >>> nor.transform(rdd).collect()
+    [DenseVector([0.0, 0.3333, 0.6667])]
+
+    >>> nor2 = Normalizer(float("inf"))
+    >>> nor2.transform(v)
+    DenseVector([0.0, 0.5, 1.0])
+    """
+    def __init__(self, p=2.0):
         """
-        :param word: a word
-        :return: vector representation of word
+        :param p: Normalization in L^p^ space, p = 2 by default.
+        """
+        assert p >= 1.0, "p should be greater than 1.0"
+        self.p = float(p)
+
+    def transform(self, vector):
+        """
+        Applies unit length normalization on a vector.
+
+        :param vector: vector or RDD of vector to be normalized.
+        :return: normalized vector. If the norm of the input is zero, it
+                will return the input vector.
+        """
+        sc = SparkContext._active_spark_context
+        assert sc is not None, "SparkContext should be initialized first"
+        if isinstance(vector, RDD):
+            vector = vector.map(_convert_to_vector)
+        else:
+            vector = _convert_to_vector(vector)
+        return callMLlibFunc("normalizeVector", self.p, vector)
+
+
+class JavaVectorTransformer(JavaModelWrapper, VectorTransformer):
+    """
+    Wrapper for the model in JVM
+    """
+
+    def transform(self, vector):
+        if isinstance(vector, RDD):
+            vector = vector.map(_convert_to_vector)
+        else:
+            vector = _convert_to_vector(vector)
+        return self.call("transform", vector)
+
+
+class StandardScalerModel(JavaVectorTransformer):
+    """
+    .. note:: Experimental
+
+    Represents a StandardScaler model that can transform vectors.
+    """
+    def transform(self, vector):
+        """
+        Applies standardization transformation on a vector.
+
+        :param vector: Vector or RDD of Vector to be standardized.
+        :return: Standardized vector. If the variance of a column is zero,
+                it will return default `0.0` for the column with zero variance.
+        """
+        return JavaVectorTransformer.transform(self, vector)
+
+
+class StandardScaler(object):
+    """
+    .. note:: Experimental
+
+    Standardizes features by removing the mean and scaling to unit
+    variance using column summary statistics on the samples in the
+    training set.
+
+    >>> vs = [Vectors.dense([-2.0, 2.3, 0]), Vectors.dense([3.8, 0.0, 1.9])]
+    >>> dataset = sc.parallelize(vs)
+    >>> standardizer = StandardScaler(True, True)
+    >>> model = standardizer.fit(dataset)
+    >>> result = model.transform(dataset)
+    >>> for r in result.collect(): r
+    DenseVector([-0.7071, 0.7071, -0.7071])
+    DenseVector([0.7071, -0.7071, 0.7071])
+    """
+    def __init__(self, withMean=False, withStd=True):
+        """
+        :param withMean: False by default. Centers the data with mean
+                 before scaling. It will build a dense output, so this
+                 does not work on sparse input and will raise an exception.
+        :param withStd: True by default. Scales the data to unit standard
+                 deviation.
+        """
+        if not (withMean or withStd):
+            warnings.warn("Both withMean and withStd are false. The model does nothing.")
+        self.withMean = withMean
+        self.withStd = withStd
+
+    def fit(self, dataset):
+        """
+        Computes the mean and variance and stores as a model to be used for later scaling.
+
+        :param data: The data used to compute the mean and variance to build
+                    the transformation model.
+        :return: a StandardScalarModel
+        """
+        dataset = dataset.map(_convert_to_vector)
+        jmodel = callMLlibFunc("fitStandardScaler", self.withMean, self.withStd, dataset)
+        return StandardScalerModel(jmodel)
+
+
+class HashingTF(object):
+    """
+    .. note:: Experimental
 
+    Maps a sequence of terms to their term frequencies using the hashing trick.
+
+    Note: the terms must be hashable (can not be dict/set/list...).
+
+    >>> htf = HashingTF(100)
+    >>> doc = "a a b b c d".split(" ")
+    >>> htf.transform(doc)
+    SparseVector(100, {1: 1.0, 14: 1.0, 31: 2.0, 44: 2.0})
+    """
+    def __init__(self, numFeatures=1 << 20):
+        """
+        :param numFeatures: number of features (default: 2^20)
+        """
+        self.numFeatures = numFeatures
+
+    def indexOf(self, term):
+        """ Returns the index of the input term. """
+        return hash(term) % self.numFeatures
+
+    def transform(self, document):
+        """
+        Transforms the input document (list of terms) to term frequency vectors,
+        or transform the RDD of document to RDD of term frequency vectors.
+        """
+        if isinstance(document, RDD):
+            return document.map(self.transform)
+
+        freq = {}
+        for term in document:
+            i = self.indexOf(term)
+            freq[i] = freq.get(i, 0) + 1.0
+        return Vectors.sparse(self.numFeatures, freq.items())
+
+
+class IDFModel(JavaVectorTransformer):
+    """
+    Represents an IDF model that can transform term frequency vectors.
+    """
+    def transform(self, x):
+        """
+        Transforms term frequency (TF) vectors to TF-IDF vectors.
+
+        If `minDocFreq` was set for the IDF calculation,
+        the terms which occur in fewer than `minDocFreq`
+        documents will have an entry of 0.
+
+        :param x: an RDD of term frequency vectors or a term frequency vector
+        :return: an RDD of TF-IDF vectors or a TF-IDF vector
+        """
+        if isinstance(x, RDD):
+            return JavaVectorTransformer.transform(self, x)
+
+        x = _convert_to_vector(x)
+        return JavaVectorTransformer.transform(self, x)
+
+
+class IDF(object):
+    """
+    .. note:: Experimental
+
+    Inverse document frequency (IDF).
+
+    The standard formulation is used: `idf = log((m + 1) / (d(t) + 1))`,
+    where `m` is the total number of documents and `d(t)` is the number
+    of documents that contain term `t`.
+
+    This implementation supports filtering out terms which do not appear
+    in a minimum number of documents (controlled by the variable `minDocFreq`).
+    For terms that are not in at least `minDocFreq` documents, the IDF is
+    found as 0, resulting in TF-IDFs of 0.
+
+    >>> n = 4
+    >>> freqs = [Vectors.sparse(n, (1, 3), (1.0, 2.0)),
+    ...          Vectors.dense([0.0, 1.0, 2.0, 3.0]),
+    ...          Vectors.sparse(n, [1], [1.0])]
+    >>> data = sc.parallelize(freqs)
+    >>> idf = IDF()
+    >>> model = idf.fit(data)
+    >>> tfidf = model.transform(data)
+    >>> for r in tfidf.collect(): r
+    SparseVector(4, {1: 0.0, 3: 0.5754})
+    DenseVector([0.0, 0.0, 1.3863, 0.863])
+    SparseVector(4, {1: 0.0})
+    >>> model.transform(Vectors.dense([0.0, 1.0, 2.0, 3.0]))
+    DenseVector([0.0, 0.0, 1.3863, 0.863])
+    >>> model.transform([0.0, 1.0, 2.0, 3.0])
+    DenseVector([0.0, 0.0, 1.3863, 0.863])
+    >>> model.transform(Vectors.sparse(n, (1, 3), (1.0, 2.0)))
+    SparseVector(4, {1: 0.0, 3: 0.5754})
+    """
+    def __init__(self, minDocFreq=0):
+        """
+        :param minDocFreq: minimum of documents in which a term
+                           should appear for filtering
+        """
+        self.minDocFreq = minDocFreq
+
+    def fit(self, dataset):
+        """
+        Computes the inverse document frequency.
+
+        :param dataset: an RDD of term frequency vectors
+        """
+        if not isinstance(dataset, RDD):
+            raise TypeError("dataset should be an RDD of term frequency vectors")
+        jmodel = callMLlibFunc("fitIDF", self.minDocFreq, dataset.map(_convert_to_vector))
+        return IDFModel(jmodel)
+
+
+class Word2VecModel(JavaVectorTransformer):
+    """
+    class for Word2Vec model
+    """
+    def transform(self, word):
+        """
         Transforms a word to its vector representation
 
         Note: local use only
+
+        :param word: a word
+        :return: vector representation of word(s)
         """
-        # TODO: make transform usable in RDD operations from python side
-        result = self._java_model.transform(word)
-        return PickleSerializer().loads(str(self._sc._jvm.SerDe.dumps(result)))
+        try:
+            return self.call("transform", word)
+        except Py4JJavaError:
+            raise ValueError("%s not found" % word)
 
-    def findSynonyms(self, x, num):
+    def findSynonyms(self, word, num):
         """
-        :param x: a word or a vector representation of word
+        Find synonyms of a word
+
+        :param word: a word or a vector representation of word
         :param num: number of synonyms to find
         :return: array of (word, cosineSimilarity)
 
-        Find synonyms of a word
-
         Note: local use only
         """
-        # TODO: make findSynonyms usable in RDD operations from python side
-        ser = PickleSerializer()
-        if type(x) == str:
-            jlist = self._java_model.findSynonyms(x, num)
-        else:
-            bytes = bytearray(ser.dumps(_convert_to_vector(x)))
-            vec = self._sc._jvm.SerDe.loads(bytes)
-            jlist = self._java_model.findSynonyms(vec, num)
-        words, similarity = ser.loads(str(self._sc._jvm.SerDe.dumps(jlist)))
+        if not isinstance(word, basestring):
+            word = _convert_to_vector(word)
+        words, similarity = self.call("findSynonyms", word, num)
         return zip(words, similarity)
 
 
@@ -85,6 +328,7 @@ class Word2Vec(object):
     We used skip-gram model in our implementation and hierarchical softmax
     method to train the model. The variable names in the implementation
     matches the original C implementation.
+
     For original C implementation, see https://code.google.com/p/word2vec/
     For research papers, see
     Efficient Estimation of Word Representations in Vector Space
@@ -95,23 +339,14 @@ class Word2Vec(object):
     >>> localDoc = [sentence, sentence]
     >>> doc = sc.parallelize(localDoc).map(lambda line: line.split(" "))
     >>> model = Word2Vec().setVectorSize(10).setSeed(42L).fit(doc)
+
     >>> syms = model.findSynonyms("a", 2)
-    >>> str(syms[0][0])
-    'b'
-    >>> str(syms[1][0])
-    'c'
-    >>> len(syms)
-    2
+    >>> [s[0] for s in syms]
+    [u'b', u'c']
     >>> vec = model.transform("a")
-    >>> len(vec)
-    10
     >>> syms = model.findSynonyms(vec, 2)
-    >>> str(syms[0][0])
-    'b'
-    >>> str(syms[1][0])
-    'c'
-    >>> len(syms)
-    2
+    >>> [s[0] for s in syms]
+    [u'b', u'c']
     """
     def __init__(self):
         """
@@ -121,7 +356,7 @@ def __init__(self):
         self.learningRate = 0.025
         self.numPartitions = 1
         self.numIterations = 1
-        self.seed = 42L
+        self.seed = random.randint(0, sys.maxint)
 
     def setVectorSize(self, vectorSize):
         """
@@ -163,21 +398,15 @@ def fit(self, data):
         """
         Computes the vector representation of each word in vocabulary.
 
-        :param data: training data. RDD of subtype of Iterable[String]
-        :return: python Word2VecModel instance
+        :param data: training data. RDD of list of string
+        :return: Word2VecModel instance
         """
-        sc = data.context
-        ser = PickleSerializer()
-        vectorSize = self.vectorSize
-        learningRate = self.learningRate
-        numPartitions = self.numPartitions
-        numIterations = self.numIterations
-        seed = self.seed
-
-        model = sc._jvm.PythonMLLibAPI().trainWord2Vec(
-            _to_java_object_rdd(data), vectorSize,
-            learningRate, numPartitions, numIterations, seed)
-        return Word2VecModel(sc, model)
+        if not isinstance(data, RDD):
+            raise TypeError("data should be an RDD of list of string")
+        jmodel = callMLlibFunc("trainWord2Vec", data, int(self.vectorSize),
+                               float(self.learningRate), int(self.numPartitions),
+                               int(self.numIterations), long(self.seed))
+        return Word2VecModel(jmodel)
 
 
 def _test():
@@ -191,4 +420,5 @@ def _test():
         exit(-1)
 
 if __name__ == "__main__":
+    sys.path.pop(0)
     _test()
diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py
index 773d8d393805d..7f21190ed8c25 100644
--- a/python/pyspark/mllib/linalg.py
+++ b/python/pyspark/mllib/linalg.py
@@ -29,9 +29,11 @@
 
 import numpy as np
 
-from pyspark.serializers import AutoBatchedSerializer, PickleSerializer
+from pyspark.sql import UserDefinedType, StructField, StructType, ArrayType, DoubleType, \
+    IntegerType, ByteType
 
-__all__ = ['Vector', 'DenseVector', 'SparseVector', 'Vectors']
+
+__all__ = ['Vector', 'DenseVector', 'SparseVector', 'Vectors', 'DenseMatrix', 'Matrices']
 
 
 if sys.version_info[:2] == (2, 7):
@@ -52,17 +54,6 @@ def fast_pickle_array(ar):
     _have_scipy = False
 
 
-# this will call the MLlib version of pythonToJava()
-def _to_java_object_rdd(rdd):
-    """ Return an JavaRDD of Object by unpickling
-
-    It will convert each Python object into Java object by Pyrolite, whenever the
-    RDD is serialized in batch or not.
-    """
-    rdd = rdd._reserialize(AutoBatchedSerializer(PickleSerializer()))
-    return rdd.ctx._jvm.SerDe.pythonToJava(rdd._jrdd, True)
-
-
 def _convert_to_vector(l):
     if isinstance(l, Vector):
         return l
@@ -111,7 +102,61 @@ def _vector_size(v):
         raise TypeError("Cannot treat type %s as a vector" % type(v))
 
 
+def _format_float(f, digits=4):
+    s = str(round(f, digits))
+    if '.' in s:
+        s = s[:s.index('.') + 1 + digits]
+    return s
+
+
+class VectorUDT(UserDefinedType):
+    """
+    SQL user-defined type (UDT) for Vector.
+    """
+
+    @classmethod
+    def sqlType(cls):
+        return StructType([
+            StructField("type", ByteType(), False),
+            StructField("size", IntegerType(), True),
+            StructField("indices", ArrayType(IntegerType(), False), True),
+            StructField("values", ArrayType(DoubleType(), False), True)])
+
+    @classmethod
+    def module(cls):
+        return "pyspark.mllib.linalg"
+
+    @classmethod
+    def scalaUDT(cls):
+        return "org.apache.spark.mllib.linalg.VectorUDT"
+
+    def serialize(self, obj):
+        if isinstance(obj, SparseVector):
+            indices = [int(i) for i in obj.indices]
+            values = [float(v) for v in obj.values]
+            return (0, obj.size, indices, values)
+        elif isinstance(obj, DenseVector):
+            values = [float(v) for v in obj]
+            return (1, None, None, values)
+        else:
+            raise ValueError("cannot serialize %r of type %r" % (obj, type(obj)))
+
+    def deserialize(self, datum):
+        assert len(datum) == 4, \
+            "VectorUDT.deserialize given row with length %d but requires 4" % len(datum)
+        tpe = datum[0]
+        if tpe == 0:
+            return SparseVector(datum[1], datum[2], datum[3])
+        elif tpe == 1:
+            return DenseVector(datum[3])
+        else:
+            raise ValueError("do not recognize type %r" % tpe)
+
+
 class Vector(object):
+
+    __UDT__ = VectorUDT()
+
     """
     Abstract class for DenseVector and SparseVector
     """
@@ -128,12 +173,16 @@ class DenseVector(Vector):
     A dense vector represented by a value array.
     """
     def __init__(self, ar):
-        if not isinstance(ar, array.array):
-            ar = array.array('d', ar)
+        if isinstance(ar, basestring):
+            ar = np.frombuffer(ar, dtype=np.float64)
+        elif not isinstance(ar, np.ndarray):
+            ar = np.array(ar, dtype=np.float64)
+        if ar.dtype != np.float64:
+            ar = ar.astype(np.float64)
         self.array = ar
 
     def __reduce__(self):
-        return DenseVector, (self.array,)
+        return DenseVector, (self.array.tostring(),)
 
     def dot(self, other):
         """
@@ -162,9 +211,10 @@ def dot(self, other):
             ...
         AssertionError: dimension mismatch
         """
-        if type(other) == np.ndarray and other.ndim > 1:
-            assert len(self) == other.shape[0], "dimension mismatch"
-            return np.dot(self.toArray(), other)
+        if type(other) == np.ndarray:
+            if other.ndim > 1:
+                assert len(self) == other.shape[0], "dimension mismatch"
+            return np.dot(self.array, other)
         elif _have_scipy and scipy.sparse.issparse(other):
             assert len(self) == other.shape[0], "dimension mismatch"
             return other.transpose().dot(self.toArray())
@@ -216,7 +266,7 @@ def squared_distance(self, other):
         return np.dot(diff, diff)
 
     def toArray(self):
-        return np.array(self.array)
+        return self.array
 
     def __getitem__(self, item):
         return self.array[item]
@@ -228,10 +278,10 @@ def __str__(self):
         return "[" + ",".join([str(v) for v in self.array]) + "]"
 
     def __repr__(self):
-        return "DenseVector(%r)" % self.array
+        return "DenseVector([%s])" % (', '.join(_format_float(i) for i in self.array))
 
     def __eq__(self, other):
-        return isinstance(other, DenseVector) and self.array == other.array
+        return isinstance(other, DenseVector) and np.array_equal(self.array, other.array)
 
     def __ne__(self, other):
         return not self == other
@@ -269,18 +319,28 @@ def __init__(self, size, *args):
             if type(pairs) == dict:
                 pairs = pairs.items()
             pairs = sorted(pairs)
-            self.indices = array.array('i', [p[0] for p in pairs])
-            self.values = array.array('d', [p[1] for p in pairs])
+            self.indices = np.array([p[0] for p in pairs], dtype=np.int32)
+            self.values = np.array([p[1] for p in pairs], dtype=np.float64)
         else:
-            assert len(args[0]) == len(args[1]), "index and value arrays not same length"
-            self.indices = array.array('i', args[0])
-            self.values = array.array('d', args[1])
+            if isinstance(args[0], basestring):
+                assert isinstance(args[1], str), "values should be string too"
+                if args[0]:
+                    self.indices = np.frombuffer(args[0], np.int32)
+                    self.values = np.frombuffer(args[1], np.float64)
+                else:
+                    # np.frombuffer() doesn't work well with empty string in older version
+                    self.indices = np.array([], dtype=np.int32)
+                    self.values = np.array([], dtype=np.float64)
+            else:
+                self.indices = np.array(args[0], dtype=np.int32)
+                self.values = np.array(args[1], dtype=np.float64)
+            assert len(self.indices) == len(self.values), "index and value arrays not same length"
             for i in xrange(len(self.indices) - 1):
                 if self.indices[i] >= self.indices[i + 1]:
                     raise TypeError("indices array must be sorted")
 
     def __reduce__(self):
-        return (SparseVector, (self.size, self.indices, self.values))
+        return (SparseVector, (self.size, self.indices.tostring(), self.values.tostring()))
 
     def dot(self, other):
         """
@@ -416,8 +476,7 @@ def toArray(self):
         Returns a copy of this SparseVector as a 1-dimensional NumPy array.
         """
         arr = np.zeros((self.size,), dtype=np.float64)
-        for i in xrange(self.indices.size):
-            arr[self.indices[i]] = self.values[i]
+        arr[self.indices] = self.values
         return arr
 
     def __len__(self):
@@ -431,7 +490,8 @@ def __str__(self):
     def __repr__(self):
         inds = self.indices
         vals = self.values
-        entries = ", ".join(["{0}: {1}".format(inds[i], vals[i]) for i in xrange(len(inds))])
+        entries = ", ".join(["{0}: {1}".format(inds[i], _format_float(vals[i]))
+                             for i in xrange(len(inds))])
         return "SparseVector({0}, {{{1}}})".format(self.size, entries)
 
     def __eq__(self, other):
@@ -447,8 +507,25 @@ def __eq__(self, other):
         """
         return (isinstance(other, self.__class__)
                 and other.size == self.size
-                and other.indices == self.indices
-                and other.values == self.values)
+                and np.array_equal(other.indices, self.indices)
+                and np.array_equal(other.values, self.values))
+
+    def __getitem__(self, index):
+        inds = self.indices
+        vals = self.values
+        if not isinstance(index, int):
+            raise ValueError(
+                "Indices must be of type integer, got type %s" % type(index))
+        if index < 0:
+            index += self.size
+        if index >= self.size or index < 0:
+            raise ValueError("Index %d out of bounds." % index)
+
+        insert_index = np.searchsorted(inds, index)
+        row_ind = inds[insert_index]
+        if row_ind == index:
+            return vals[insert_index]
+        return 0.
 
     def __ne__(self, other):
         return not self.__eq__(other)
@@ -491,7 +568,7 @@ def dense(elements):
         returns a NumPy array.
 
         >>> Vectors.dense([1, 2, 3])
-        DenseVector(array('d', [1.0, 2.0, 3.0]))
+        DenseVector([1.0, 2.0, 3.0])
         """
         return DenseVector(elements)
 
@@ -531,23 +608,43 @@ class DenseMatrix(Matrix):
     """
     def __init__(self, numRows, numCols, values):
         Matrix.__init__(self, numRows, numCols)
+        if isinstance(values, basestring):
+            values = np.frombuffer(values, dtype=np.float64)
+        elif not isinstance(values, np.ndarray):
+            values = np.array(values, dtype=np.float64)
         assert len(values) == numRows * numCols
+        if values.dtype != np.float64:
+            values.astype(np.float64)
         self.values = values
 
     def __reduce__(self):
-        return DenseMatrix, (self.numRows, self.numCols, self.values)
+        return DenseMatrix, (self.numRows, self.numCols, self.values.tostring())
 
     def toArray(self):
         """
         Return an numpy.ndarray
 
-        >>> arr = array.array('d', [float(i) for i in range(4)])
-        >>> m = DenseMatrix(2, 2, arr)
+        >>> m = DenseMatrix(2, 2, range(4))
         >>> m.toArray()
         array([[ 0.,  2.],
                [ 1.,  3.]])
         """
-        return np.reshape(self.values, (self.numRows, self.numCols), order='F')
+        return self.values.reshape((self.numRows, self.numCols), order='F')
+
+    def __eq__(self, other):
+        return (isinstance(other, DenseMatrix) and
+                self.numRows == other.numRows and
+                self.numCols == other.numCols and
+                all(self.values == other.values))
+
+
+class Matrices(object):
+    @staticmethod
+    def dense(numRows, numCols, values):
+        """
+        Create a DenseMatrix
+        """
+        return DenseMatrix(numRows, numCols, values)
 
 
 def _test():
@@ -557,8 +654,4 @@ def _test():
         exit(-1)
 
 if __name__ == "__main__":
-    # remove current path from list of search paths to avoid importing mllib.random
-    # for C{import random}, which is done in an external dependency of pyspark during doctests.
-    import sys
-    sys.path.pop(0)
     _test()
diff --git a/python/pyspark/mllib/rand.py b/python/pyspark/mllib/rand.py
new file mode 100644
index 0000000000000..20ee9d78bf5b0
--- /dev/null
+++ b/python/pyspark/mllib/rand.py
@@ -0,0 +1,410 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Python package for random data generation.
+"""
+
+from functools import wraps
+
+from pyspark.mllib.common import callMLlibFunc
+
+
+__all__ = ['RandomRDDs', ]
+
+
+def toArray(f):
+    @wraps(f)
+    def func(sc, *a, **kw):
+        rdd = f(sc, *a, **kw)
+        return rdd.map(lambda vec: vec.toArray())
+    return func
+
+
+class RandomRDDs(object):
+    """
+    Generator methods for creating RDDs comprised of i.i.d samples from
+    some distribution.
+    """
+
+    @staticmethod
+    def uniformRDD(sc, size, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of i.i.d. samples from the
+        uniform distribution U(0.0, 1.0).
+
+        To transform the distribution in the generated RDD from U(0.0, 1.0)
+        to U(a, b), use
+        C{RandomRDDs.uniformRDD(sc, n, p, seed)\
+          .map(lambda v: a + (b - a) * v)}
+
+        :param sc: SparkContext used to create the RDD.
+        :param size: Size of the RDD.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of float comprised of i.i.d. samples ~ `U(0.0, 1.0)`.
+
+        >>> x = RandomRDDs.uniformRDD(sc, 100).collect()
+        >>> len(x)
+        100
+        >>> max(x) <= 1.0 and min(x) >= 0.0
+        True
+        >>> RandomRDDs.uniformRDD(sc, 100, 4).getNumPartitions()
+        4
+        >>> parts = RandomRDDs.uniformRDD(sc, 100, seed=4).getNumPartitions()
+        >>> parts == sc.defaultParallelism
+        True
+        """
+        return callMLlibFunc("uniformRDD", sc._jsc, size, numPartitions, seed)
+
+    @staticmethod
+    def normalRDD(sc, size, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of i.i.d. samples from the standard normal
+        distribution.
+
+        To transform the distribution in the generated RDD from standard normal
+        to some other normal N(mean, sigma^2), use
+        C{RandomRDDs.normal(sc, n, p, seed)\
+          .map(lambda v: mean + sigma * v)}
+
+        :param sc: SparkContext used to create the RDD.
+        :param size: Size of the RDD.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of float comprised of i.i.d. samples ~ N(0.0, 1.0).
+
+        >>> x = RandomRDDs.normalRDD(sc, 1000, seed=1L)
+        >>> stats = x.stats()
+        >>> stats.count()
+        1000L
+        >>> abs(stats.mean() - 0.0) < 0.1
+        True
+        >>> abs(stats.stdev() - 1.0) < 0.1
+        True
+        """
+        return callMLlibFunc("normalRDD", sc._jsc, size, numPartitions, seed)
+
+    @staticmethod
+    def logNormalRDD(sc, mean, std, size, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of i.i.d. samples from the log normal
+        distribution with the input mean and standard distribution.
+
+        :param sc: SparkContext used to create the RDD.
+        :param mean: mean for the log Normal distribution
+        :param std: std for the log Normal distribution
+        :param size: Size of the RDD.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of float comprised of i.i.d. samples ~ log N(mean, std).
+
+        >>> from math import sqrt, exp
+        >>> mean = 0.0
+        >>> std = 1.0
+        >>> expMean = exp(mean + 0.5 * std * std)
+        >>> expStd = sqrt((exp(std * std) - 1.0) * exp(2.0 * mean + std * std))
+        >>> x = RandomRDDs.logNormalRDD(sc, mean, std, 1000, seed=2L)
+        >>> stats = x.stats()
+        >>> stats.count()
+        1000L
+        >>> abs(stats.mean() - expMean) < 0.5
+        True
+        >>> from math import sqrt
+        >>> abs(stats.stdev() - expStd) < 0.5
+        True
+        """
+        return callMLlibFunc("logNormalRDD", sc._jsc, float(mean), float(std),
+                             size, numPartitions, seed)
+
+    @staticmethod
+    def poissonRDD(sc, mean, size, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of i.i.d. samples from the Poisson
+        distribution with the input mean.
+
+        :param sc: SparkContext used to create the RDD.
+        :param mean: Mean, or lambda, for the Poisson distribution.
+        :param size: Size of the RDD.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of float comprised of i.i.d. samples ~ Pois(mean).
+
+        >>> mean = 100.0
+        >>> x = RandomRDDs.poissonRDD(sc, mean, 1000, seed=2L)
+        >>> stats = x.stats()
+        >>> stats.count()
+        1000L
+        >>> abs(stats.mean() - mean) < 0.5
+        True
+        >>> from math import sqrt
+        >>> abs(stats.stdev() - sqrt(mean)) < 0.5
+        True
+        """
+        return callMLlibFunc("poissonRDD", sc._jsc, float(mean), size, numPartitions, seed)
+
+    @staticmethod
+    def exponentialRDD(sc, mean, size, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of i.i.d. samples from the Exponential
+        distribution with the input mean.
+
+        :param sc: SparkContext used to create the RDD.
+        :param mean: Mean, or 1 / lambda, for the Exponential distribution.
+        :param size: Size of the RDD.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of float comprised of i.i.d. samples ~ Exp(mean).
+
+        >>> mean = 2.0
+        >>> x = RandomRDDs.exponentialRDD(sc, mean, 1000, seed=2L)
+        >>> stats = x.stats()
+        >>> stats.count()
+        1000L
+        >>> abs(stats.mean() - mean) < 0.5
+        True
+        >>> from math import sqrt
+        >>> abs(stats.stdev() - sqrt(mean)) < 0.5
+        True
+        """
+        return callMLlibFunc("exponentialRDD", sc._jsc, float(mean), size, numPartitions, seed)
+
+    @staticmethod
+    def gammaRDD(sc, shape, scale, size, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of i.i.d. samples from the Gamma
+        distribution with the input shape and scale.
+
+        :param sc: SparkContext used to create the RDD.
+        :param shape: shape (> 0) parameter for the Gamma distribution
+        :param scale: scale (> 0) parameter for the Gamma distribution
+        :param size: Size of the RDD.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of float comprised of i.i.d. samples ~ Gamma(shape, scale).
+
+        >>> from math import sqrt
+        >>> shape = 1.0
+        >>> scale = 2.0
+        >>> expMean = shape * scale
+        >>> expStd = sqrt(shape * scale * scale)
+        >>> x = RandomRDDs.gammaRDD(sc, shape, scale, 1000, seed=2L)
+        >>> stats = x.stats()
+        >>> stats.count()
+        1000L
+        >>> abs(stats.mean() - expMean) < 0.5
+        True
+        >>> abs(stats.stdev() - expStd) < 0.5
+        True
+        """
+        return callMLlibFunc("gammaRDD", sc._jsc, float(shape),
+                             float(scale), size, numPartitions, seed)
+
+    @staticmethod
+    @toArray
+    def uniformVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
+        from the uniform distribution U(0.0, 1.0).
+
+        :param sc: SparkContext used to create the RDD.
+        :param numRows: Number of Vectors in the RDD.
+        :param numCols: Number of elements in each Vector.
+        :param numPartitions: Number of partitions in the RDD.
+        :param seed: Seed for the RNG that generates the seed for the generator in each partition.
+        :return: RDD of Vector with vectors containing i.i.d samples ~ `U(0.0, 1.0)`.
+
+        >>> import numpy as np
+        >>> mat = np.matrix(RandomRDDs.uniformVectorRDD(sc, 10, 10).collect())
+        >>> mat.shape
+        (10, 10)
+        >>> mat.max() <= 1.0 and mat.min() >= 0.0
+        True
+        >>> RandomRDDs.uniformVectorRDD(sc, 10, 10, 4).getNumPartitions()
+        4
+        """
+        return callMLlibFunc("uniformVectorRDD", sc._jsc, numRows, numCols, numPartitions, seed)
+
+    @staticmethod
+    @toArray
+    def normalVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
+        from the standard normal distribution.
+
+        :param sc: SparkContext used to create the RDD.
+        :param numRows: Number of Vectors in the RDD.
+        :param numCols: Number of elements in each Vector.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of Vector with vectors containing i.i.d. samples ~ `N(0.0, 1.0)`.
+
+        >>> import numpy as np
+        >>> mat = np.matrix(RandomRDDs.normalVectorRDD(sc, 100, 100, seed=1L).collect())
+        >>> mat.shape
+        (100, 100)
+        >>> abs(mat.mean() - 0.0) < 0.1
+        True
+        >>> abs(mat.std() - 1.0) < 0.1
+        True
+        """
+        return callMLlibFunc("normalVectorRDD", sc._jsc, numRows, numCols, numPartitions, seed)
+
+    @staticmethod
+    @toArray
+    def logNormalVectorRDD(sc, mean, std, numRows, numCols, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
+        from the log normal distribution.
+
+        :param sc: SparkContext used to create the RDD.
+        :param mean: Mean of the log normal distribution
+        :param std: Standard Deviation of the log normal distribution
+        :param numRows: Number of Vectors in the RDD.
+        :param numCols: Number of elements in each Vector.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of Vector with vectors containing i.i.d. samples ~ log `N(mean, std)`.
+
+        >>> import numpy as np
+        >>> from math import sqrt, exp
+        >>> mean = 0.0
+        >>> std = 1.0
+        >>> expMean = exp(mean + 0.5 * std * std)
+        >>> expStd = sqrt((exp(std * std) - 1.0) * exp(2.0 * mean + std * std))
+        >>> mat = np.matrix(RandomRDDs.logNormalVectorRDD(sc, mean, std, \
+                               100, 100, seed=1L).collect())
+        >>> mat.shape
+        (100, 100)
+        >>> abs(mat.mean() - expMean) < 0.1
+        True
+        >>> abs(mat.std() - expStd) < 0.1
+        True
+        """
+        return callMLlibFunc("logNormalVectorRDD", sc._jsc, float(mean), float(std),
+                             numRows, numCols, numPartitions, seed)
+
+    @staticmethod
+    @toArray
+    def poissonVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
+        from the Poisson distribution with the input mean.
+
+        :param sc: SparkContext used to create the RDD.
+        :param mean: Mean, or lambda, for the Poisson distribution.
+        :param numRows: Number of Vectors in the RDD.
+        :param numCols: Number of elements in each Vector.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`)
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of Vector with vectors containing i.i.d. samples ~ Pois(mean).
+
+        >>> import numpy as np
+        >>> mean = 100.0
+        >>> rdd = RandomRDDs.poissonVectorRDD(sc, mean, 100, 100, seed=1L)
+        >>> mat = np.mat(rdd.collect())
+        >>> mat.shape
+        (100, 100)
+        >>> abs(mat.mean() - mean) < 0.5
+        True
+        >>> from math import sqrt
+        >>> abs(mat.std() - sqrt(mean)) < 0.5
+        True
+        """
+        return callMLlibFunc("poissonVectorRDD", sc._jsc, float(mean), numRows, numCols,
+                             numPartitions, seed)
+
+    @staticmethod
+    @toArray
+    def exponentialVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
+        from the Exponential distribution with the input mean.
+
+        :param sc: SparkContext used to create the RDD.
+        :param mean: Mean, or 1 / lambda, for the Exponential distribution.
+        :param numRows: Number of Vectors in the RDD.
+        :param numCols: Number of elements in each Vector.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`)
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of Vector with vectors containing i.i.d. samples ~ Exp(mean).
+
+        >>> import numpy as np
+        >>> mean = 0.5
+        >>> rdd = RandomRDDs.exponentialVectorRDD(sc, mean, 100, 100, seed=1L)
+        >>> mat = np.mat(rdd.collect())
+        >>> mat.shape
+        (100, 100)
+        >>> abs(mat.mean() - mean) < 0.5
+        True
+        >>> from math import sqrt
+        >>> abs(mat.std() - sqrt(mean)) < 0.5
+        True
+        """
+        return callMLlibFunc("exponentialVectorRDD", sc._jsc, float(mean), numRows, numCols,
+                             numPartitions, seed)
+
+    @staticmethod
+    @toArray
+    def gammaVectorRDD(sc, shape, scale, numRows, numCols, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
+        from the Gamma distribution.
+
+        :param sc: SparkContext used to create the RDD.
+        :param shape: Shape (> 0) of the Gamma distribution
+        :param scale: Scale (> 0) of the Gamma distribution
+        :param numRows: Number of Vectors in the RDD.
+        :param numCols: Number of elements in each Vector.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of Vector with vectors containing i.i.d. samples ~ Gamma(shape, scale).
+
+        >>> import numpy as np
+        >>> from math import sqrt
+        >>> shape = 1.0
+        >>> scale = 2.0
+        >>> expMean = shape * scale
+        >>> expStd = sqrt(shape * scale * scale)
+        >>> mat = np.matrix(RandomRDDs.gammaVectorRDD(sc, shape, scale, \
+                       100, 100, seed=1L).collect())
+        >>> mat.shape
+        (100, 100)
+        >>> abs(mat.mean() - expMean) < 0.1
+        True
+        >>> abs(mat.std() - expStd) < 0.1
+        True
+        """
+        return callMLlibFunc("gammaVectorRDD", sc._jsc, float(shape), float(scale),
+                             numRows, numCols, numPartitions, seed)
+
+
+def _test():
+    import doctest
+    from pyspark.context import SparkContext
+    globs = globals().copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    globs['sc'] = SparkContext('local[2]', 'PythonTest', batchSize=2)
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
+    if failure_count:
+        exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/random.py
deleted file mode 100644
index 73baba4ace5f6..0000000000000
--- a/python/pyspark/mllib/random.py
+++ /dev/null
@@ -1,200 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""
-Python package for random data generation.
-"""
-
-from functools import wraps
-
-from pyspark.rdd import RDD
-from pyspark.serializers import BatchedSerializer, PickleSerializer
-
-
-__all__ = ['RandomRDDs', ]
-
-
-def serialize(f):
-    @wraps(f)
-    def func(sc, *a, **kw):
-        jrdd = f(sc, *a, **kw)
-        return RDD(sc._jvm.SerDe.javaToPython(jrdd), sc,
-                   BatchedSerializer(PickleSerializer(), 1024))
-    return func
-
-
-def toArray(f):
-    @wraps(f)
-    def func(sc, *a, **kw):
-        rdd = f(sc, *a, **kw)
-        return rdd.map(lambda vec: vec.toArray())
-    return func
-
-
-class RandomRDDs(object):
-    """
-    Generator methods for creating RDDs comprised of i.i.d samples from
-    some distribution.
-    """
-
-    @staticmethod
-    @serialize
-    def uniformRDD(sc, size, numPartitions=None, seed=None):
-        """
-        Generates an RDD comprised of i.i.d. samples from the
-        uniform distribution U(0.0, 1.0).
-
-        To transform the distribution in the generated RDD from U(0.0, 1.0)
-        to U(a, b), use
-        C{RandomRDDs.uniformRDD(sc, n, p, seed)\
-          .map(lambda v: a + (b - a) * v)}
-
-        >>> x = RandomRDDs.uniformRDD(sc, 100).collect()
-        >>> len(x)
-        100
-        >>> max(x) <= 1.0 and min(x) >= 0.0
-        True
-        >>> RandomRDDs.uniformRDD(sc, 100, 4).getNumPartitions()
-        4
-        >>> parts = RandomRDDs.uniformRDD(sc, 100, seed=4).getNumPartitions()
-        >>> parts == sc.defaultParallelism
-        True
-        """
-        return sc._jvm.PythonMLLibAPI().uniformRDD(sc._jsc, size, numPartitions, seed)
-
-    @staticmethod
-    @serialize
-    def normalRDD(sc, size, numPartitions=None, seed=None):
-        """
-        Generates an RDD comprised of i.i.d. samples from the standard normal
-        distribution.
-
-        To transform the distribution in the generated RDD from standard normal
-        to some other normal N(mean, sigma^2), use
-        C{RandomRDDs.normal(sc, n, p, seed)\
-          .map(lambda v: mean + sigma * v)}
-
-        >>> x = RandomRDDs.normalRDD(sc, 1000, seed=1L)
-        >>> stats = x.stats()
-        >>> stats.count()
-        1000L
-        >>> abs(stats.mean() - 0.0) < 0.1
-        True
-        >>> abs(stats.stdev() - 1.0) < 0.1
-        True
-        """
-        return sc._jvm.PythonMLLibAPI().normalRDD(sc._jsc, size, numPartitions, seed)
-
-    @staticmethod
-    @serialize
-    def poissonRDD(sc, mean, size, numPartitions=None, seed=None):
-        """
-        Generates an RDD comprised of i.i.d. samples from the Poisson
-        distribution with the input mean.
-
-        >>> mean = 100.0
-        >>> x = RandomRDDs.poissonRDD(sc, mean, 1000, seed=1L)
-        >>> stats = x.stats()
-        >>> stats.count()
-        1000L
-        >>> abs(stats.mean() - mean) < 0.5
-        True
-        >>> from math import sqrt
-        >>> abs(stats.stdev() - sqrt(mean)) < 0.5
-        True
-        """
-        return sc._jvm.PythonMLLibAPI().poissonRDD(sc._jsc, mean, size, numPartitions, seed)
-
-    @staticmethod
-    @toArray
-    @serialize
-    def uniformVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
-        """
-        Generates an RDD comprised of vectors containing i.i.d. samples drawn
-        from the uniform distribution U(0.0, 1.0).
-
-        >>> import numpy as np
-        >>> mat = np.matrix(RandomRDDs.uniformVectorRDD(sc, 10, 10).collect())
-        >>> mat.shape
-        (10, 10)
-        >>> mat.max() <= 1.0 and mat.min() >= 0.0
-        True
-        >>> RandomRDDs.uniformVectorRDD(sc, 10, 10, 4).getNumPartitions()
-        4
-        """
-        return sc._jvm.PythonMLLibAPI() \
-            .uniformVectorRDD(sc._jsc, numRows, numCols, numPartitions, seed)
-
-    @staticmethod
-    @toArray
-    @serialize
-    def normalVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
-        """
-        Generates an RDD comprised of vectors containing i.i.d. samples drawn
-        from the standard normal distribution.
-
-        >>> import numpy as np
-        >>> mat = np.matrix(RandomRDDs.normalVectorRDD(sc, 100, 100, seed=1L).collect())
-        >>> mat.shape
-        (100, 100)
-        >>> abs(mat.mean() - 0.0) < 0.1
-        True
-        >>> abs(mat.std() - 1.0) < 0.1
-        True
-        """
-        return sc._jvm.PythonMLLibAPI() \
-            .normalVectorRDD(sc._jsc, numRows, numCols, numPartitions, seed)
-
-    @staticmethod
-    @toArray
-    @serialize
-    def poissonVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None):
-        """
-        Generates an RDD comprised of vectors containing i.i.d. samples drawn
-        from the Poisson distribution with the input mean.
-
-        >>> import numpy as np
-        >>> mean = 100.0
-        >>> rdd = RandomRDDs.poissonVectorRDD(sc, mean, 100, 100, seed=1L)
-        >>> mat = np.mat(rdd.collect())
-        >>> mat.shape
-        (100, 100)
-        >>> abs(mat.mean() - mean) < 0.5
-        True
-        >>> from math import sqrt
-        >>> abs(mat.std() - sqrt(mean)) < 0.5
-        True
-        """
-        return sc._jvm.PythonMLLibAPI() \
-            .poissonVectorRDD(sc._jsc, mean, numRows, numCols, numPartitions, seed)
-
-
-def _test():
-    import doctest
-    from pyspark.context import SparkContext
-    globs = globals().copy()
-    # The small batch size here ensures that we see multiple batches,
-    # even in these small test examples:
-    globs['sc'] = SparkContext('local[2]', 'PythonTest', batchSize=2)
-    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
-    globs['sc'].stop()
-    if failure_count:
-        exit(-1)
-
-
-if __name__ == "__main__":
-    _test()
diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py
index 22872dbbe3b55..0d99e6dedfad9 100644
--- a/python/pyspark/mllib/recommendation.py
+++ b/python/pyspark/mllib/recommendation.py
@@ -15,28 +15,31 @@
 # limitations under the License.
 #
 
+from collections import namedtuple
+
 from pyspark import SparkContext
-from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
 from pyspark.rdd import RDD
-from pyspark.mllib.linalg import _to_java_object_rdd
+from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc
 
-__all__ = ['MatrixFactorizationModel', 'ALS']
+__all__ = ['MatrixFactorizationModel', 'ALS', 'Rating']
 
 
-class Rating(object):
-    def __init__(self, user, product, rating):
-        self.user = int(user)
-        self.product = int(product)
-        self.rating = float(rating)
+class Rating(namedtuple("Rating", ["user", "product", "rating"])):
+    """
+    Represents a (user, product, rating) tuple.
 
-    def __reduce__(self):
-        return Rating, (self.user, self.product, self.rating)
+    >>> r = Rating(1, 2, 5.0)
+    >>> (r.user, r.product, r.rating)
+    (1, 2, 5.0)
+    >>> (r[0], r[1], r[2])
+    (1, 2, 5.0)
+    """
 
-    def __repr__(self):
-        return "Rating(%d, %d, %d)" % (self.user, self.product, self.rating)
+    def __reduce__(self):
+        return Rating, (int(self.user), int(self.product), float(self.rating))
 
 
-class MatrixFactorizationModel(object):
+class MatrixFactorizationModel(JavaModelWrapper):
 
     """A matrix factorisation model trained by regularized alternating
     least-squares.
@@ -45,74 +48,55 @@ class MatrixFactorizationModel(object):
     >>> r2 = (1, 2, 2.0)
     >>> r3 = (2, 1, 2.0)
     >>> ratings = sc.parallelize([r1, r2, r3])
-    >>> model = ALS.trainImplicit(ratings, 1)
-    >>> model.predict(2,2) is not None
-    True
+    >>> model = ALS.trainImplicit(ratings, 1, seed=10)
+    >>> model.predict(2, 2)
+    0.43...
 
     >>> testset = sc.parallelize([(1, 2), (1, 1)])
-    >>> model = ALS.train(ratings, 1)
-    >>> model.predictAll(testset).count() == 2
-    True
+    >>> model = ALS.train(ratings, 2, seed=0)
+    >>> model.predictAll(testset).collect()
+    [Rating(user=1, product=1, rating=1.0...), Rating(user=1, product=2, rating=1.9...)]
 
-    >>> model = ALS.train(ratings, 4)
-    >>> model.userFeatures().count() == 2
-    True
+    >>> model = ALS.train(ratings, 4, seed=10)
+    >>> model.userFeatures().collect()
+    [(1, array('d', [...])), (2, array('d', [...]))]
 
     >>> first_user = model.userFeatures().take(1)[0]
     >>> latents = first_user[1]
     >>> len(latents) == 4
     True
 
-    >>> model.productFeatures().count() == 2
-    True
+    >>> model.productFeatures().collect()
+    [(1, array('d', [...])), (2, array('d', [...]))]
 
     >>> first_product = model.productFeatures().take(1)[0]
     >>> latents = first_product[1]
     >>> len(latents) == 4
     True
-    """
-
-    def __init__(self, sc, java_model):
-        self._context = sc
-        self._java_model = java_model
 
-    def __del__(self):
-        self._context._gateway.detach(self._java_model)
+    >>> model = ALS.train(ratings, 1, nonnegative=True, seed=10)
+    >>> model.predict(2,2)
+    3.8...
 
+    >>> model = ALS.trainImplicit(ratings, 1, nonnegative=True, seed=10)
+    >>> model.predict(2,2)
+    0.43...
+    """
     def predict(self, user, product):
-        return self._java_model.predict(user, product)
+        return self._java_model.predict(int(user), int(product))
 
     def predictAll(self, user_product):
         assert isinstance(user_product, RDD), "user_product should be RDD of (user, product)"
         first = user_product.first()
-        if isinstance(first, list):
-            user_product = user_product.map(tuple)
-            first = tuple(first)
-        assert type(first) is tuple and len(first) == 2, \
-            "user_product should be RDD of (user, product)"
-        if any(isinstance(x, str) for x in first):
-            user_product = user_product.map(lambda (u, p): (int(x), int(p)))
-            first = tuple(map(int, first))
-        assert all(type(x) is int for x in first), "user and product in user_product shoul be int"
-        sc = self._context
-        tuplerdd = sc._jvm.SerDe.asTupleRDD(_to_java_object_rdd(user_product).rdd())
-        jresult = self._java_model.predict(tuplerdd).toJavaRDD()
-        return RDD(sc._jvm.SerDe.javaToPython(jresult), sc,
-                   AutoBatchedSerializer(PickleSerializer()))
+        assert len(first) == 2, "user_product should be RDD of (user, product)"
+        user_product = user_product.map(lambda (u, p): (int(u), int(p)))
+        return self.call("predict", user_product)
 
     def userFeatures(self):
-        sc = self._context
-        juf = self._java_model.userFeatures()
-        juf = sc._jvm.SerDe.fromTuple2RDD(juf).toJavaRDD()
-        return RDD(sc._jvm.PythonRDD.javaToPython(juf), sc,
-                   AutoBatchedSerializer(PickleSerializer()))
+        return self.call("getUserFeatures")
 
     def productFeatures(self):
-        sc = self._context
-        jpf = self._java_model.productFeatures()
-        jpf = sc._jvm.SerDe.fromTuple2RDD(jpf).toJavaRDD()
-        return RDD(sc._jvm.PythonRDD.javaToPython(jpf), sc,
-                   AutoBatchedSerializer(PickleSerializer()))
+        return self.call("getProductFeatures")
 
 
 class ALS(object):
@@ -126,32 +110,28 @@ def _prepare(cls, ratings):
                 ratings = ratings.map(lambda x: Rating(*x))
             else:
                 raise ValueError("rating should be RDD of Rating or tuple/list")
-        # serialize them by AutoBatchedSerializer before cache to reduce the
-        # objects overhead in JVM
-        cached = ratings._reserialize(AutoBatchedSerializer(PickleSerializer())).cache()
-        return _to_java_object_rdd(cached)
+        return ratings
 
     @classmethod
-    def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1):
-        sc = ratings.context
-        jrating = cls._prepare(ratings)
-        mod = sc._jvm.PythonMLLibAPI().trainALSModel(jrating, rank, iterations, lambda_, blocks)
-        return MatrixFactorizationModel(sc, mod)
+    def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, nonnegative=False,
+              seed=None):
+        model = callMLlibFunc("trainALSModel", cls._prepare(ratings), rank, iterations,
+                              lambda_, blocks, nonnegative, seed)
+        return MatrixFactorizationModel(model)
 
     @classmethod
-    def trainImplicit(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, alpha=0.01):
-        sc = ratings.context
-        jrating = cls._prepare(ratings)
-        mod = sc._jvm.PythonMLLibAPI().trainImplicitALSModel(
-            jrating, rank, iterations, lambda_, blocks, alpha)
-        return MatrixFactorizationModel(sc, mod)
+    def trainImplicit(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, alpha=0.01,
+                      nonnegative=False, seed=None):
+        model = callMLlibFunc("trainImplicitALSModel", cls._prepare(ratings), rank,
+                              iterations, lambda_, blocks, alpha, nonnegative, seed)
+        return MatrixFactorizationModel(model)
 
 
 def _test():
     import doctest
     import pyspark.mllib.recommendation
     globs = pyspark.mllib.recommendation.__dict__.copy()
-    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    globs['sc'] = SparkContext('local[4]', 'PythonTest')
     (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
     globs['sc'].stop()
     if failure_count:
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 93e17faf5cd51..210060140fd91 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -18,9 +18,8 @@
 import numpy as np
 from numpy import array
 
-from pyspark import SparkContext
-from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
-from pyspark.mllib.linalg import SparseVector, _convert_to_vector, _to_java_object_rdd
+from pyspark.mllib.common import callMLlibFunc
+from pyspark.mllib.linalg import SparseVector, _convert_to_vector
 
 __all__ = ['LabeledPoint', 'LinearModel', 'LinearRegressionModel', 'RidgeRegressionModel',
            'LinearRegressionWithSGD', 'LassoWithSGD', 'RidgeRegressionWithSGD']
@@ -37,7 +36,7 @@ class LabeledPoint(object):
     """
 
     def __init__(self, label, features):
-        self.label = label
+        self.label = float(label)
         self.features = _convert_to_vector(features)
 
     def __reduce__(self):
@@ -47,7 +46,7 @@ def __str__(self):
         return "(" + ",".join((str(self.label), str(self.features))) + ")"
 
     def __repr__(self):
-        return "LabeledPoint(" + ",".join((repr(self.label), repr(self.features))) + ")"
+        return "LabeledPoint(%s, %s)" % (self.label, self.features)
 
 
 class LinearModel(object):
@@ -56,7 +55,7 @@ class LinearModel(object):
 
     def __init__(self, weights, intercept):
         self._coeff = _convert_to_vector(weights)
-        self._intercept = intercept
+        self._intercept = float(intercept)
 
     @property
     def weights(self):
@@ -67,7 +66,7 @@ def intercept(self):
         return self._intercept
 
     def __repr__(self):
-        return "(weights=%s, intercept=%s)" % (self._coeff, self._intercept)
+        return "(weights=%s, intercept=%r)" % (self._coeff, self._intercept)
 
 
 class LinearRegressionModelBase(LinearModel):
@@ -86,6 +85,7 @@ def predict(self, x):
         Predict the value of the dependent variable given a vector x
         containing values for the independent variables.
         """
+        x = _convert_to_vector(x)
         return self.weights.dot(x) + self.intercept
 
 
@@ -124,24 +124,20 @@ class LinearRegressionModel(LinearRegressionModelBase):
 # train_func should take two parameters, namely data and initial_weights, and
 # return the result of a call to the appropriate JVM stub.
 # _regression_train_wrapper is responsible for setup and error checking.
-def _regression_train_wrapper(sc, train_func, modelClass, data, initial_weights):
+def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
+    first = data.first()
+    if not isinstance(first, LabeledPoint):
+        raise ValueError("data should be an RDD of LabeledPoint, but got %s" % first)
     initial_weights = initial_weights or [0.0] * len(data.first().features)
-    ser = PickleSerializer()
-    initial_bytes = bytearray(ser.dumps(_convert_to_vector(initial_weights)))
-    # use AutoBatchedSerializer before cache to reduce the memory
-    # overhead in JVM
-    cached = data._reserialize(AutoBatchedSerializer(ser)).cache()
-    ans = train_func(_to_java_object_rdd(cached), initial_bytes)
-    assert len(ans) == 2, "JVM call result had unexpected length"
-    weights = ser.loads(str(ans[0]))
-    return modelClass(weights, ans[1])
+    weights, intercept = train_func(data, _convert_to_vector(initial_weights))
+    return modelClass(weights, intercept)
 
 
 class LinearRegressionWithSGD(object):
 
     @classmethod
     def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
-              initialWeights=None, regParam=1.0, regType="none", intercept=False):
+              initialWeights=None, regParam=0.0, regType=None, intercept=False):
         """
         Train a linear regression model on the given data.
 
@@ -152,29 +148,28 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
         :param miniBatchFraction: Fraction of data to be used for each SGD
                                   iteration.
         :param initialWeights:    The initial weights (default: None).
-        :param regParam:          The regularizer parameter (default: 1.0).
+        :param regParam:          The regularizer parameter (default: 0.0).
         :param regType:           The type of regularizer used for training
                                   our model.
 
                                   :Allowed values:
-                                     - "l1" for using L1Updater,
-                                     - "l2" for using SquaredL2Updater,
-                                     - "none" for no regularizer.
+                                     - "l1" for using L1 regularization (lasso),
+                                     - "l2" for using L2 regularization (ridge),
+                                     - None for no regularization
 
-                                     (default: "none")
+                                     (default: None)
 
         @param intercept:         Boolean parameter which indicates the use
                                   or not of the augmented representation for
                                   training data (i.e. whether bias features
                                   are activated or not).
         """
-        sc = data.context
+        def train(rdd, i):
+            return callMLlibFunc("trainLinearRegressionModelWithSGD", rdd, int(iterations),
+                                 float(step), float(miniBatchFraction), i, float(regParam),
+                                 regType, bool(intercept))
 
-        def train(jrdd, i):
-            return sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
-                jrdd, iterations, step, miniBatchFraction, i, regParam, regType, intercept)
-
-        return _regression_train_wrapper(sc, train, LinearRegressionModel, data, initialWeights)
+        return _regression_train_wrapper(train, LinearRegressionModel, data, initialWeights)
 
 
 class LassoModel(LinearRegressionModelBase):
@@ -213,15 +208,14 @@ class LassoModel(LinearRegressionModelBase):
 class LassoWithSGD(object):
 
     @classmethod
-    def train(cls, data, iterations=100, step=1.0, regParam=1.0,
+    def train(cls, data, iterations=100, step=1.0, regParam=0.01,
               miniBatchFraction=1.0, initialWeights=None):
         """Train a Lasso regression model on the given data."""
-        sc = data.context
+        def train(rdd, i):
+            return callMLlibFunc("trainLassoModelWithSGD", rdd, int(iterations), float(step),
+                                 float(regParam), float(miniBatchFraction), i)
 
-        def train(jrdd, i):
-            return sc._jvm.PythonMLLibAPI().trainLassoModelWithSGD(
-                jrdd, iterations, step, regParam, miniBatchFraction, i)
-        return _regression_train_wrapper(sc, train, LassoModel, data, initialWeights)
+        return _regression_train_wrapper(train, LassoModel, data, initialWeights)
 
 
 class RidgeRegressionModel(LinearRegressionModelBase):
@@ -260,21 +254,21 @@ class RidgeRegressionModel(LinearRegressionModelBase):
 class RidgeRegressionWithSGD(object):
 
     @classmethod
-    def train(cls, data, iterations=100, step=1.0, regParam=1.0,
+    def train(cls, data, iterations=100, step=1.0, regParam=0.01,
               miniBatchFraction=1.0, initialWeights=None):
         """Train a ridge regression model on the given data."""
-        sc = data.context
-
-        def train(jrdd, i):
-            return sc._jvm.PythonMLLibAPI().trainRidgeModelWithSGD(
-                jrdd, iterations, step, regParam, miniBatchFraction, i)
+        def train(rdd, i):
+            return callMLlibFunc("trainRidgeModelWithSGD", rdd, int(iterations), float(step),
+                                 float(regParam), float(miniBatchFraction), i)
 
-        return _regression_train_wrapper(sc, train, RidgeRegressionModel, data, initialWeights)
+        return _regression_train_wrapper(train, RidgeRegressionModel, data, initialWeights)
 
 
 def _test():
     import doctest
-    globs = globals().copy()
+    from pyspark import SparkContext
+    import pyspark.mllib.regression
+    globs = pyspark.mllib.regression.__dict__.copy()
     globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
     (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
     globs['sc'].stop()
diff --git a/python/pyspark/mllib/stat.py b/python/pyspark/mllib/stat.py
deleted file mode 100644
index 84baf12b906df..0000000000000
--- a/python/pyspark/mllib/stat.py
+++ /dev/null
@@ -1,190 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""
-Python package for statistical functions in MLlib.
-"""
-
-from functools import wraps
-
-from pyspark import PickleSerializer
-from pyspark.mllib.linalg import _convert_to_vector, _to_java_object_rdd
-
-
-__all__ = ['MultivariateStatisticalSummary', 'Statistics']
-
-
-def serialize(f):
-    ser = PickleSerializer()
-
-    @wraps(f)
-    def func(self):
-        jvec = f(self)
-        bytes = self._sc._jvm.SerDe.dumps(jvec)
-        return ser.loads(str(bytes)).toArray()
-
-    return func
-
-
-class MultivariateStatisticalSummary(object):
-
-    """
-    Trait for multivariate statistical summary of a data matrix.
-    """
-
-    def __init__(self, sc, java_summary):
-        """
-        :param sc:  Spark context
-        :param java_summary:  Handle to Java summary object
-        """
-        self._sc = sc
-        self._java_summary = java_summary
-
-    def __del__(self):
-        self._sc._gateway.detach(self._java_summary)
-
-    @serialize
-    def mean(self):
-        return self._java_summary.mean()
-
-    @serialize
-    def variance(self):
-        return self._java_summary.variance()
-
-    def count(self):
-        return self._java_summary.count()
-
-    @serialize
-    def numNonzeros(self):
-        return self._java_summary.numNonzeros()
-
-    @serialize
-    def max(self):
-        return self._java_summary.max()
-
-    @serialize
-    def min(self):
-        return self._java_summary.min()
-
-
-class Statistics(object):
-
-    @staticmethod
-    def colStats(rdd):
-        """
-        Computes column-wise summary statistics for the input RDD[Vector].
-
-        >>> from pyspark.mllib.linalg import Vectors
-        >>> rdd = sc.parallelize([Vectors.dense([2, 0, 0, -2]),
-        ...                       Vectors.dense([4, 5, 0,  3]),
-        ...                       Vectors.dense([6, 7, 0,  8])])
-        >>> cStats = Statistics.colStats(rdd)
-        >>> cStats.mean()
-        array([ 4.,  4.,  0.,  3.])
-        >>> cStats.variance()
-        array([  4.,  13.,   0.,  25.])
-        >>> cStats.count()
-        3L
-        >>> cStats.numNonzeros()
-        array([ 3.,  2.,  0.,  3.])
-        >>> cStats.max()
-        array([ 6.,  7.,  0.,  8.])
-        >>> cStats.min()
-        array([ 2.,  0.,  0., -2.])
-        """
-        sc = rdd.ctx
-        jrdd = _to_java_object_rdd(rdd.map(_convert_to_vector))
-        cStats = sc._jvm.PythonMLLibAPI().colStats(jrdd)
-        return MultivariateStatisticalSummary(sc, cStats)
-
-    @staticmethod
-    def corr(x, y=None, method=None):
-        """
-        Compute the correlation (matrix) for the input RDD(s) using the
-        specified method.
-        Methods currently supported: I{pearson (default), spearman}.
-
-        If a single RDD of Vectors is passed in, a correlation matrix
-        comparing the columns in the input RDD is returned. Use C{method=}
-        to specify the method to be used for single RDD inout.
-        If two RDDs of floats are passed in, a single float is returned.
-
-        >>> x = sc.parallelize([1.0, 0.0, -2.0], 2)
-        >>> y = sc.parallelize([4.0, 5.0, 3.0], 2)
-        >>> zeros = sc.parallelize([0.0, 0.0, 0.0], 2)
-        >>> abs(Statistics.corr(x, y) - 0.6546537) < 1e-7
-        True
-        >>> Statistics.corr(x, y) == Statistics.corr(x, y, "pearson")
-        True
-        >>> Statistics.corr(x, y, "spearman")
-        0.5
-        >>> from math import isnan
-        >>> isnan(Statistics.corr(x, zeros))
-        True
-        >>> from pyspark.mllib.linalg import Vectors
-        >>> rdd = sc.parallelize([Vectors.dense([1, 0, 0, -2]), Vectors.dense([4, 5, 0, 3]),
-        ...                       Vectors.dense([6, 7, 0,  8]), Vectors.dense([9, 0, 0, 1])])
-        >>> pearsonCorr = Statistics.corr(rdd)
-        >>> print str(pearsonCorr).replace('nan', 'NaN')
-        [[ 1.          0.05564149         NaN  0.40047142]
-         [ 0.05564149  1.                 NaN  0.91359586]
-         [        NaN         NaN  1.                 NaN]
-         [ 0.40047142  0.91359586         NaN  1.        ]]
-        >>> spearmanCorr = Statistics.corr(rdd, method="spearman")
-        >>> print str(spearmanCorr).replace('nan', 'NaN')
-        [[ 1.          0.10540926         NaN  0.4       ]
-         [ 0.10540926  1.                 NaN  0.9486833 ]
-         [        NaN         NaN  1.                 NaN]
-         [ 0.4         0.9486833          NaN  1.        ]]
-        >>> try:
-        ...     Statistics.corr(rdd, "spearman")
-        ...     print "Method name as second argument without 'method=' shouldn't be allowed."
-        ... except TypeError:
-        ...     pass
-        """
-        sc = x.ctx
-        # Check inputs to determine whether a single value or a matrix is needed for output.
-        # Since it's legal for users to use the method name as the second argument, we need to
-        # check if y is used to specify the method name instead.
-        if type(y) == str:
-            raise TypeError("Use 'method=' to specify method name.")
-
-        if not y:
-            jx = _to_java_object_rdd(x.map(_convert_to_vector))
-            resultMat = sc._jvm.PythonMLLibAPI().corr(jx, method)
-            bytes = sc._jvm.SerDe.dumps(resultMat)
-            ser = PickleSerializer()
-            return ser.loads(str(bytes)).toArray()
-        else:
-            jx = _to_java_object_rdd(x.map(float))
-            jy = _to_java_object_rdd(y.map(float))
-            return sc._jvm.PythonMLLibAPI().corr(jx, jy, method)
-
-
-def _test():
-    import doctest
-    from pyspark import SparkContext
-    globs = globals().copy()
-    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
-    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
-    globs['sc'].stop()
-    if failure_count:
-        exit(-1)
-
-
-if __name__ == "__main__":
-    _test()
diff --git a/python/pyspark/mllib/stat/__init__.py b/python/pyspark/mllib/stat/__init__.py
new file mode 100644
index 0000000000000..b686d955a0080
--- /dev/null
+++ b/python/pyspark/mllib/stat/__init__.py
@@ -0,0 +1,25 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Python package for statistical functions in MLlib.
+"""
+
+from pyspark.mllib.stat._statistics import *
+from pyspark.mllib.stat.distribution import MultivariateGaussian
+
+__all__ = ["Statistics", "MultivariateStatisticalSummary", "MultivariateGaussian"]
diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py
new file mode 100644
index 0000000000000..218ac148ca992
--- /dev/null
+++ b/python/pyspark/mllib/stat/_statistics.py
@@ -0,0 +1,247 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark import RDD
+from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
+from pyspark.mllib.linalg import Matrix, _convert_to_vector
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.stat.test import ChiSqTestResult
+
+
+__all__ = ['MultivariateStatisticalSummary', 'Statistics']
+
+
+class MultivariateStatisticalSummary(JavaModelWrapper):
+
+    """
+    Trait for multivariate statistical summary of a data matrix.
+    """
+
+    def mean(self):
+        return self.call("mean").toArray()
+
+    def variance(self):
+        return self.call("variance").toArray()
+
+    def count(self):
+        return self.call("count")
+
+    def numNonzeros(self):
+        return self.call("numNonzeros").toArray()
+
+    def max(self):
+        return self.call("max").toArray()
+
+    def min(self):
+        return self.call("min").toArray()
+
+
+class Statistics(object):
+
+    @staticmethod
+    def colStats(rdd):
+        """
+        Computes column-wise summary statistics for the input RDD[Vector].
+
+        :param rdd: an RDD[Vector] for which column-wise summary statistics
+                    are to be computed.
+        :return: :class:`MultivariateStatisticalSummary` object containing
+                 column-wise summary statistics.
+
+        >>> from pyspark.mllib.linalg import Vectors
+        >>> rdd = sc.parallelize([Vectors.dense([2, 0, 0, -2]),
+        ...                       Vectors.dense([4, 5, 0,  3]),
+        ...                       Vectors.dense([6, 7, 0,  8])])
+        >>> cStats = Statistics.colStats(rdd)
+        >>> cStats.mean()
+        array([ 4.,  4.,  0.,  3.])
+        >>> cStats.variance()
+        array([  4.,  13.,   0.,  25.])
+        >>> cStats.count()
+        3L
+        >>> cStats.numNonzeros()
+        array([ 3.,  2.,  0.,  3.])
+        >>> cStats.max()
+        array([ 6.,  7.,  0.,  8.])
+        >>> cStats.min()
+        array([ 2.,  0.,  0., -2.])
+        """
+        cStats = callMLlibFunc("colStats", rdd.map(_convert_to_vector))
+        return MultivariateStatisticalSummary(cStats)
+
+    @staticmethod
+    def corr(x, y=None, method=None):
+        """
+        Compute the correlation (matrix) for the input RDD(s) using the
+        specified method.
+        Methods currently supported: I{pearson (default), spearman}.
+
+        If a single RDD of Vectors is passed in, a correlation matrix
+        comparing the columns in the input RDD is returned. Use C{method=}
+        to specify the method to be used for single RDD inout.
+        If two RDDs of floats are passed in, a single float is returned.
+
+        :param x: an RDD of vector for which the correlation matrix is to be computed,
+                  or an RDD of float of the same cardinality as y when y is specified.
+        :param y: an RDD of float of the same cardinality as x.
+        :param method: String specifying the method to use for computing correlation.
+                       Supported: `pearson` (default), `spearman`
+        :return: Correlation matrix comparing columns in x.
+
+        >>> x = sc.parallelize([1.0, 0.0, -2.0], 2)
+        >>> y = sc.parallelize([4.0, 5.0, 3.0], 2)
+        >>> zeros = sc.parallelize([0.0, 0.0, 0.0], 2)
+        >>> abs(Statistics.corr(x, y) - 0.6546537) < 1e-7
+        True
+        >>> Statistics.corr(x, y) == Statistics.corr(x, y, "pearson")
+        True
+        >>> Statistics.corr(x, y, "spearman")
+        0.5
+        >>> from math import isnan
+        >>> isnan(Statistics.corr(x, zeros))
+        True
+        >>> from pyspark.mllib.linalg import Vectors
+        >>> rdd = sc.parallelize([Vectors.dense([1, 0, 0, -2]), Vectors.dense([4, 5, 0, 3]),
+        ...                       Vectors.dense([6, 7, 0,  8]), Vectors.dense([9, 0, 0, 1])])
+        >>> pearsonCorr = Statistics.corr(rdd)
+        >>> print str(pearsonCorr).replace('nan', 'NaN')
+        [[ 1.          0.05564149         NaN  0.40047142]
+         [ 0.05564149  1.                 NaN  0.91359586]
+         [        NaN         NaN  1.                 NaN]
+         [ 0.40047142  0.91359586         NaN  1.        ]]
+        >>> spearmanCorr = Statistics.corr(rdd, method="spearman")
+        >>> print str(spearmanCorr).replace('nan', 'NaN')
+        [[ 1.          0.10540926         NaN  0.4       ]
+         [ 0.10540926  1.                 NaN  0.9486833 ]
+         [        NaN         NaN  1.                 NaN]
+         [ 0.4         0.9486833          NaN  1.        ]]
+        >>> try:
+        ...     Statistics.corr(rdd, "spearman")
+        ...     print "Method name as second argument without 'method=' shouldn't be allowed."
+        ... except TypeError:
+        ...     pass
+        """
+        # Check inputs to determine whether a single value or a matrix is needed for output.
+        # Since it's legal for users to use the method name as the second argument, we need to
+        # check if y is used to specify the method name instead.
+        if type(y) == str:
+            raise TypeError("Use 'method=' to specify method name.")
+
+        if not y:
+            return callMLlibFunc("corr", x.map(_convert_to_vector), method).toArray()
+        else:
+            return callMLlibFunc("corr", x.map(float), y.map(float), method)
+
+    @staticmethod
+    def chiSqTest(observed, expected=None):
+        """
+        .. note:: Experimental
+
+        If `observed` is Vector, conduct Pearson's chi-squared goodness
+        of fit test of the observed data against the expected distribution,
+        or againt the uniform distribution (by default), with each category
+        having an expected frequency of `1 / len(observed)`.
+        (Note: `observed` cannot contain negative values)
+
+        If `observed` is matrix, conduct Pearson's independence test on the
+        input contingency matrix, which cannot contain negative entries or
+        columns or rows that sum up to 0.
+
+        If `observed` is an RDD of LabeledPoint, conduct Pearson's independence
+        test for every feature against the label across the input RDD.
+        For each feature, the (feature, label) pairs are converted into a
+        contingency matrix for which the chi-squared statistic is computed.
+        All label and feature values must be categorical.
+
+        :param observed: it could be a vector containing the observed categorical
+                         counts/relative frequencies, or the contingency matrix
+                         (containing either counts or relative frequencies),
+                         or an RDD of LabeledPoint containing the labeled dataset
+                         with categorical features. Real-valued features will be
+                         treated as categorical for each distinct value.
+        :param expected: Vector containing the expected categorical counts/relative
+                         frequencies. `expected` is rescaled if the `expected` sum
+                         differs from the `observed` sum.
+        :return: ChiSquaredTest object containing the test statistic, degrees
+                 of freedom, p-value, the method used, and the null hypothesis.
+
+        >>> from pyspark.mllib.linalg import Vectors, Matrices
+        >>> observed = Vectors.dense([4, 6, 5])
+        >>> pearson = Statistics.chiSqTest(observed)
+        >>> print pearson.statistic
+        0.4
+        >>> pearson.degreesOfFreedom
+        2
+        >>> print round(pearson.pValue, 4)
+        0.8187
+        >>> pearson.method
+        u'pearson'
+        >>> pearson.nullHypothesis
+        u'observed follows the same distribution as expected.'
+
+        >>> observed = Vectors.dense([21, 38, 43, 80])
+        >>> expected = Vectors.dense([3, 5, 7, 20])
+        >>> pearson = Statistics.chiSqTest(observed, expected)
+        >>> print round(pearson.pValue, 4)
+        0.0027
+
+        >>> data = [40.0, 24.0, 29.0, 56.0, 32.0, 42.0, 31.0, 10.0, 0.0, 30.0, 15.0, 12.0]
+        >>> chi = Statistics.chiSqTest(Matrices.dense(3, 4, data))
+        >>> print round(chi.statistic, 4)
+        21.9958
+
+        >>> data = [LabeledPoint(0.0, Vectors.dense([0.5, 10.0])),
+        ...         LabeledPoint(0.0, Vectors.dense([1.5, 20.0])),
+        ...         LabeledPoint(1.0, Vectors.dense([1.5, 30.0])),
+        ...         LabeledPoint(0.0, Vectors.dense([3.5, 30.0])),
+        ...         LabeledPoint(0.0, Vectors.dense([3.5, 40.0])),
+        ...         LabeledPoint(1.0, Vectors.dense([3.5, 40.0])),]
+        >>> rdd = sc.parallelize(data, 4)
+        >>> chi = Statistics.chiSqTest(rdd)
+        >>> print chi[0].statistic
+        0.75
+        >>> print chi[1].statistic
+        1.5
+        """
+        if isinstance(observed, RDD):
+            if not isinstance(observed.first(), LabeledPoint):
+                raise ValueError("observed should be an RDD of LabeledPoint")
+            jmodels = callMLlibFunc("chiSqTest", observed)
+            return [ChiSqTestResult(m) for m in jmodels]
+
+        if isinstance(observed, Matrix):
+            jmodel = callMLlibFunc("chiSqTest", observed)
+        else:
+            if expected and len(expected) != len(observed):
+                raise ValueError("`expected` should have same length with `observed`")
+            jmodel = callMLlibFunc("chiSqTest", _convert_to_vector(observed), expected)
+        return ChiSqTestResult(jmodel)
+
+
+def _test():
+    import doctest
+    from pyspark import SparkContext
+    globs = globals().copy()
+    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
+    if failure_count:
+        exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/mllib/stat/distribution.py b/python/pyspark/mllib/stat/distribution.py
new file mode 100644
index 0000000000000..07792e1532046
--- /dev/null
+++ b/python/pyspark/mllib/stat/distribution.py
@@ -0,0 +1,31 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from collections import namedtuple
+
+__all__ = ['MultivariateGaussian']
+
+
+class MultivariateGaussian(namedtuple('MultivariateGaussian', ['mu', 'sigma'])):
+
+    """ Represents a (mu, sigma) tuple
+    >>> m = MultivariateGaussian(Vectors.dense([11,12]),DenseMatrix(2, 2, (1.0, 3.0, 5.0, 2.0)))
+    >>> (m.mu, m.sigma.toArray())
+    (DenseVector([11.0, 12.0]), array([[ 1., 5.],[ 3., 2.]]))
+    >>> (m[0], m[1])
+    (DenseVector([11.0, 12.0]), array([[ 1., 5.],[ 3., 2.]]))
+    """
diff --git a/python/pyspark/mllib/stat/test.py b/python/pyspark/mllib/stat/test.py
new file mode 100644
index 0000000000000..762506e952b43
--- /dev/null
+++ b/python/pyspark/mllib/stat/test.py
@@ -0,0 +1,69 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.mllib.common import JavaModelWrapper
+
+
+__all__ = ["ChiSqTestResult"]
+
+
+class ChiSqTestResult(JavaModelWrapper):
+    """
+    .. note:: Experimental
+
+    Object containing the test results for the chi-squared hypothesis test.
+    """
+    @property
+    def method(self):
+        """
+        Name of the test method
+        """
+        return self._java_model.method()
+
+    @property
+    def pValue(self):
+        """
+        The probability of obtaining a test statistic result at least as
+        extreme as the one that was actually observed, assuming that the
+        null hypothesis is true.
+        """
+        return self._java_model.pValue()
+
+    @property
+    def degreesOfFreedom(self):
+        """
+        Returns the degree(s) of freedom of the hypothesis test.
+        Return type should be Number(e.g. Int, Double) or tuples of Numbers.
+        """
+        return self._java_model.degreesOfFreedom()
+
+    @property
+    def statistic(self):
+        """
+        Test statistic.
+        """
+        return self._java_model.statistic()
+
+    @property
+    def nullHypothesis(self):
+        """
+        Null hypothesis of the test.
+        """
+        return self._java_model.nullHypothesis()
+
+    def __str__(self):
+        return self._java_model.toString()
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index d6fb87b378b4a..49e5c9d58e5db 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -23,6 +23,7 @@
 import array as pyarray
 
 from numpy import array, array_equal
+from py4j.protocol import Py4JJavaError
 
 if sys.version_info[:2] <= (2, 6):
     try:
@@ -33,14 +34,15 @@
 else:
     import unittest
 
-from pyspark.serializers import PickleSerializer
-from pyspark.mllib.linalg import Vector, SparseVector, DenseVector, _convert_to_vector
+from pyspark.mllib.linalg import Vector, SparseVector, DenseVector, VectorUDT, _convert_to_vector,\
+    DenseMatrix, Vectors, Matrices
 from pyspark.mllib.regression import LabeledPoint
 from pyspark.mllib.random import RandomRDDs
 from pyspark.mllib.stat import Statistics
+from pyspark.serializers import PickleSerializer
+from pyspark.sql import SQLContext
 from pyspark.tests import ReusedPySparkTestCase as PySparkTestCase
 
-
 _have_scipy = False
 try:
     import scipy.sparse
@@ -62,6 +64,7 @@ def _squared_distance(a, b):
 class VectorTests(PySparkTestCase):
 
     def _test_serialize(self, v):
+        self.assertEqual(v, ser.loads(ser.dumps(v)))
         jvec = self.sc._jvm.SerDe.loads(bytearray(ser.dumps(v)))
         nv = ser.loads(str(self.sc._jvm.SerDe.dumps(jvec)))
         self.assertEqual(v, nv)
@@ -75,6 +78,8 @@ def test_serialize(self):
         self._test_serialize(DenseVector(array([1., 2., 3., 4.])))
         self._test_serialize(DenseVector(pyarray.array('d', range(10))))
         self._test_serialize(SparseVector(4, {1: 1, 3: 2}))
+        self._test_serialize(SparseVector(3, {}))
+        self._test_serialize(DenseMatrix(2, 3, range(6)))
 
     def test_dot(self):
         sv = SparseVector(4, {1: 1, 3: 2})
@@ -105,6 +110,28 @@ def test_squared_distance(self):
         self.assertEquals(0.0, _squared_distance(dv, dv))
         self.assertEquals(0.0, _squared_distance(lst, lst))
 
+    def test_conversion(self):
+        # numpy arrays should be automatically upcast to float64
+        # tests for fix of [SPARK-5089]
+        v = array([1, 2, 3, 4], dtype='float64')
+        dv = DenseVector(v)
+        self.assertTrue(dv.array.dtype == 'float64')
+        v = array([1, 2, 3, 4], dtype='float32')
+        dv = DenseVector(v)
+        self.assertTrue(dv.array.dtype == 'float64')
+
+    def test_sparse_vector_indexing(self):
+        sv = SparseVector(4, {1: 1, 3: 2})
+        self.assertEquals(sv[0], 0.)
+        self.assertEquals(sv[3], 2.)
+        self.assertEquals(sv[1], 1.)
+        self.assertEquals(sv[2], 0.)
+        self.assertEquals(sv[-1], 2)
+        self.assertEquals(sv[-2], 0)
+        self.assertEquals(sv[-4], 0)
+        for ind in [4, -5, 7.8]:
+            self.assertRaises(ValueError, sv.__getitem__, ind)
+
 
 class ListTests(PySparkTestCase):
 
@@ -113,7 +140,7 @@ class ListTests(PySparkTestCase):
     as NumPy arrays.
     """
 
-    def test_clustering(self):
+    def test_kmeans(self):
         from pyspark.mllib.clustering import KMeans
         data = [
             [0, 1.1],
@@ -125,9 +152,50 @@ def test_clustering(self):
         self.assertEquals(clusters.predict(data[0]), clusters.predict(data[1]))
         self.assertEquals(clusters.predict(data[2]), clusters.predict(data[3]))
 
+    def test_kmeans_deterministic(self):
+        from pyspark.mllib.clustering import KMeans
+        X = range(0, 100, 10)
+        Y = range(0, 100, 10)
+        data = [[x, y] for x, y in zip(X, Y)]
+        clusters1 = KMeans.train(self.sc.parallelize(data),
+                                 3, initializationMode="k-means||", seed=42)
+        clusters2 = KMeans.train(self.sc.parallelize(data),
+                                 3, initializationMode="k-means||", seed=42)
+        centers1 = clusters1.centers
+        centers2 = clusters2.centers
+        for c1, c2 in zip(centers1, centers2):
+            # TODO: Allow small numeric difference.
+            self.assertTrue(array_equal(c1, c2))
+
+    def test_gmm(self):
+        from pyspark.mllib.clustering import GaussianMixture
+        data = self.sc.parallelize([
+            [1, 2],
+            [8, 9],
+            [-4, -3],
+            [-6, -7],
+        ])
+        clusters = GaussianMixture.train(data, 2, convergenceTol=0.001,
+                                         maxIterations=100, seed=56)
+        labels = clusters.predict(data).collect()
+        self.assertEquals(labels[0], labels[1])
+        self.assertEquals(labels[2], labels[3])
+
+    def test_gmm_deterministic(self):
+        from pyspark.mllib.clustering import GaussianMixture
+        x = range(0, 100, 10)
+        y = range(0, 100, 10)
+        data = self.sc.parallelize([[a, b] for a, b in zip(x, y)])
+        clusters1 = GaussianMixture.train(data, 5, convergenceTol=0.001,
+                                          maxIterations=100, seed=63)
+        clusters2 = GaussianMixture.train(data, 5, convergenceTol=0.001,
+                                          maxIterations=100, seed=63)
+        for c1, c2 in zip(clusters1.weights, clusters2.weights):
+            self.assertEquals(round(c1, 7), round(c2, 7))
+
     def test_classification(self):
         from pyspark.mllib.classification import LogisticRegressionWithSGD, SVMWithSGD, NaiveBayes
-        from pyspark.mllib.tree import DecisionTree
+        from pyspark.mllib.tree import DecisionTree, RandomForest, GradientBoostedTrees
         data = [
             LabeledPoint(0.0, [1, 0, 0]),
             LabeledPoint(1.0, [0, 1, 1]),
@@ -156,18 +224,31 @@ def test_classification(self):
         self.assertTrue(nb_model.predict(features[3]) > 0)
 
         categoricalFeaturesInfo = {0: 3}  # feature 0 has 3 categories
-        dt_model = \
-            DecisionTree.trainClassifier(rdd, numClasses=2,
-                                         categoricalFeaturesInfo=categoricalFeaturesInfo)
+        dt_model = DecisionTree.trainClassifier(
+            rdd, numClasses=2, categoricalFeaturesInfo=categoricalFeaturesInfo)
         self.assertTrue(dt_model.predict(features[0]) <= 0)
         self.assertTrue(dt_model.predict(features[1]) > 0)
         self.assertTrue(dt_model.predict(features[2]) <= 0)
         self.assertTrue(dt_model.predict(features[3]) > 0)
 
+        rf_model = RandomForest.trainClassifier(
+            rdd, numClasses=2, categoricalFeaturesInfo=categoricalFeaturesInfo, numTrees=100)
+        self.assertTrue(rf_model.predict(features[0]) <= 0)
+        self.assertTrue(rf_model.predict(features[1]) > 0)
+        self.assertTrue(rf_model.predict(features[2]) <= 0)
+        self.assertTrue(rf_model.predict(features[3]) > 0)
+
+        gbt_model = GradientBoostedTrees.trainClassifier(
+            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo)
+        self.assertTrue(gbt_model.predict(features[0]) <= 0)
+        self.assertTrue(gbt_model.predict(features[1]) > 0)
+        self.assertTrue(gbt_model.predict(features[2]) <= 0)
+        self.assertTrue(gbt_model.predict(features[3]) > 0)
+
     def test_regression(self):
         from pyspark.mllib.regression import LinearRegressionWithSGD, LassoWithSGD, \
             RidgeRegressionWithSGD
-        from pyspark.mllib.tree import DecisionTree
+        from pyspark.mllib.tree import DecisionTree, RandomForest, GradientBoostedTrees
         data = [
             LabeledPoint(-1.0, [0, -1]),
             LabeledPoint(1.0, [0, 1]),
@@ -196,13 +277,27 @@ def test_regression(self):
         self.assertTrue(rr_model.predict(features[3]) > 0)
 
         categoricalFeaturesInfo = {0: 2}  # feature 0 has 2 categories
-        dt_model = \
-            DecisionTree.trainRegressor(rdd, categoricalFeaturesInfo=categoricalFeaturesInfo)
+        dt_model = DecisionTree.trainRegressor(
+            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo)
         self.assertTrue(dt_model.predict(features[0]) <= 0)
         self.assertTrue(dt_model.predict(features[1]) > 0)
         self.assertTrue(dt_model.predict(features[2]) <= 0)
         self.assertTrue(dt_model.predict(features[3]) > 0)
 
+        rf_model = RandomForest.trainRegressor(
+            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo, numTrees=100, seed=1)
+        self.assertTrue(rf_model.predict(features[0]) <= 0)
+        self.assertTrue(rf_model.predict(features[1]) > 0)
+        self.assertTrue(rf_model.predict(features[2]) <= 0)
+        self.assertTrue(rf_model.predict(features[3]) > 0)
+
+        gbt_model = GradientBoostedTrees.trainRegressor(
+            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo)
+        self.assertTrue(gbt_model.predict(features[0]) <= 0)
+        self.assertTrue(gbt_model.predict(features[1]) > 0)
+        self.assertTrue(gbt_model.predict(features[2]) <= 0)
+        self.assertTrue(gbt_model.predict(features[3]) > 0)
+
 
 class StatTests(PySparkTestCase):
     # SPARK-4023
@@ -221,6 +316,39 @@ def test_col_with_different_rdds(self):
         self.assertEqual(10, summary.count())
 
 
+class VectorUDTTests(PySparkTestCase):
+
+    dv0 = DenseVector([])
+    dv1 = DenseVector([1.0, 2.0])
+    sv0 = SparseVector(2, [], [])
+    sv1 = SparseVector(2, [1], [2.0])
+    udt = VectorUDT()
+
+    def test_json_schema(self):
+        self.assertEqual(VectorUDT.fromJson(self.udt.jsonValue()), self.udt)
+
+    def test_serialization(self):
+        for v in [self.dv0, self.dv1, self.sv0, self.sv1]:
+            self.assertEqual(v, self.udt.deserialize(self.udt.serialize(v)))
+
+    def test_infer_schema(self):
+        sqlCtx = SQLContext(self.sc)
+        rdd = self.sc.parallelize([LabeledPoint(1.0, self.dv1), LabeledPoint(0.0, self.sv1)])
+        srdd = sqlCtx.inferSchema(rdd)
+        schema = srdd.schema()
+        field = [f for f in schema.fields if f.name == "features"][0]
+        self.assertEqual(field.dataType, self.udt)
+        vectors = srdd.map(lambda p: p.features).collect()
+        self.assertEqual(len(vectors), 2)
+        for v in vectors:
+            if isinstance(v, SparseVector):
+                self.assertEqual(v, self.sv1)
+            elif isinstance(v, DenseVector):
+                self.assertEqual(v, self.dv1)
+            else:
+                raise ValueError("expecting a vector but got %r of type %r" % (v, type(v)))
+
+
 @unittest.skipIf(not _have_scipy, "SciPy not installed")
 class SciPyTests(PySparkTestCase):
 
@@ -363,6 +491,103 @@ def test_regression(self):
         self.assertTrue(dt_model.predict(features[3]) > 0)
 
 
+class ChiSqTestTests(PySparkTestCase):
+    def test_goodness_of_fit(self):
+        from numpy import inf
+
+        observed = Vectors.dense([4, 6, 5])
+        pearson = Statistics.chiSqTest(observed)
+
+        # Validated against the R command `chisq.test(c(4, 6, 5), p=c(1/3, 1/3, 1/3))`
+        self.assertEqual(pearson.statistic, 0.4)
+        self.assertEqual(pearson.degreesOfFreedom, 2)
+        self.assertAlmostEqual(pearson.pValue, 0.8187, 4)
+
+        # Different expected and observed sum
+        observed1 = Vectors.dense([21, 38, 43, 80])
+        expected1 = Vectors.dense([3, 5, 7, 20])
+        pearson1 = Statistics.chiSqTest(observed1, expected1)
+
+        # Results validated against the R command
+        # `chisq.test(c(21, 38, 43, 80), p=c(3/35, 1/7, 1/5, 4/7))`
+        self.assertAlmostEqual(pearson1.statistic, 14.1429, 4)
+        self.assertEqual(pearson1.degreesOfFreedom, 3)
+        self.assertAlmostEqual(pearson1.pValue, 0.002717, 4)
+
+        # Vectors with different sizes
+        observed3 = Vectors.dense([1.0, 2.0, 3.0])
+        expected3 = Vectors.dense([1.0, 2.0, 3.0, 4.0])
+        self.assertRaises(ValueError, Statistics.chiSqTest, observed3, expected3)
+
+        # Negative counts in observed
+        neg_obs = Vectors.dense([1.0, 2.0, 3.0, -4.0])
+        self.assertRaises(Py4JJavaError, Statistics.chiSqTest, neg_obs, expected1)
+
+        # Count = 0.0 in expected but not observed
+        zero_expected = Vectors.dense([1.0, 0.0, 3.0])
+        pearson_inf = Statistics.chiSqTest(observed, zero_expected)
+        self.assertEqual(pearson_inf.statistic, inf)
+        self.assertEqual(pearson_inf.degreesOfFreedom, 2)
+        self.assertEqual(pearson_inf.pValue, 0.0)
+
+        # 0.0 in expected and observed simultaneously
+        zero_observed = Vectors.dense([2.0, 0.0, 1.0])
+        self.assertRaises(Py4JJavaError, Statistics.chiSqTest, zero_observed, zero_expected)
+
+    def test_matrix_independence(self):
+        data = [40.0, 24.0, 29.0, 56.0, 32.0, 42.0, 31.0, 10.0, 0.0, 30.0, 15.0, 12.0]
+        chi = Statistics.chiSqTest(Matrices.dense(3, 4, data))
+
+        # Results validated against R command
+        # `chisq.test(rbind(c(40, 56, 31, 30),c(24, 32, 10, 15), c(29, 42, 0, 12)))`
+        self.assertAlmostEqual(chi.statistic, 21.9958, 4)
+        self.assertEqual(chi.degreesOfFreedom, 6)
+        self.assertAlmostEqual(chi.pValue, 0.001213, 4)
+
+        # Negative counts
+        neg_counts = Matrices.dense(2, 2, [4.0, 5.0, 3.0, -3.0])
+        self.assertRaises(Py4JJavaError, Statistics.chiSqTest, neg_counts)
+
+        # Row sum = 0.0
+        row_zero = Matrices.dense(2, 2, [0.0, 1.0, 0.0, 2.0])
+        self.assertRaises(Py4JJavaError, Statistics.chiSqTest, row_zero)
+
+        # Column sum = 0.0
+        col_zero = Matrices.dense(2, 2, [0.0, 0.0, 2.0, 2.0])
+        self.assertRaises(Py4JJavaError, Statistics.chiSqTest, col_zero)
+
+    def test_chi_sq_pearson(self):
+        data = [
+            LabeledPoint(0.0, Vectors.dense([0.5, 10.0])),
+            LabeledPoint(0.0, Vectors.dense([1.5, 20.0])),
+            LabeledPoint(1.0, Vectors.dense([1.5, 30.0])),
+            LabeledPoint(0.0, Vectors.dense([3.5, 30.0])),
+            LabeledPoint(0.0, Vectors.dense([3.5, 40.0])),
+            LabeledPoint(1.0, Vectors.dense([3.5, 40.0]))
+        ]
+
+        for numParts in [2, 4, 6, 8]:
+            chi = Statistics.chiSqTest(self.sc.parallelize(data, numParts))
+            feature1 = chi[0]
+            self.assertEqual(feature1.statistic, 0.75)
+            self.assertEqual(feature1.degreesOfFreedom, 2)
+            self.assertAlmostEqual(feature1.pValue, 0.6873, 4)
+
+            feature2 = chi[1]
+            self.assertEqual(feature2.statistic, 1.5)
+            self.assertEqual(feature2.degreesOfFreedom, 3)
+            self.assertAlmostEqual(feature2.pValue, 0.6823, 4)
+
+    def test_right_number_of_results(self):
+        num_cols = 1001
+        sparse_data = [
+            LabeledPoint(0.0, Vectors.sparse(num_cols, [(100, 2.0)])),
+            LabeledPoint(0.1, Vectors.sparse(num_cols, [(200, 1.0)]))
+        ]
+        chi = Statistics.chiSqTest(self.sc.parallelize(sparse_data))
+        self.assertEqual(len(chi), num_cols)
+        self.assertIsNotNone(chi[1000])
+
 if __name__ == "__main__":
     if not _have_scipy:
         print "NOTE: Skipping SciPy tests as it does not seem to be installed"
diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py
index 64ee79d83e849..aae48f213246b 100644
--- a/python/pyspark/mllib/tree.py
+++ b/python/pyspark/mllib/tree.py
@@ -15,36 +15,58 @@
 # limitations under the License.
 #
 
-from py4j.java_collections import MapConverter
+from __future__ import absolute_import
+
+import random
 
 from pyspark import SparkContext, RDD
-from pyspark.serializers import BatchedSerializer, PickleSerializer
-from pyspark.mllib.linalg import Vector, _convert_to_vector, _to_java_object_rdd
+from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
+from pyspark.mllib.linalg import _convert_to_vector
 from pyspark.mllib.regression import LabeledPoint
 
-__all__ = ['DecisionTreeModel', 'DecisionTree']
+__all__ = ['DecisionTreeModel', 'DecisionTree', 'RandomForestModel',
+           'RandomForest', 'GradientBoostedTrees']
 
 
-class DecisionTreeModel(object):
+class TreeEnsembleModel(JavaModelWrapper):
+    def predict(self, x):
+        """
+        Predict values for a single data point or an RDD of points using
+        the model trained.
+        """
+        if isinstance(x, RDD):
+            return self.call("predict", x.map(_convert_to_vector))
 
-    """
-    A decision tree model for classification or regression.
+        else:
+            return self.call("predict", _convert_to_vector(x))
 
-    EXPERIMENTAL: This is an experimental API.
-                  It will probably be modified for Spark v1.2.
-    """
+    def numTrees(self):
+        """
+        Get number of trees in ensemble.
+        """
+        return self.call("numTrees")
 
-    def __init__(self, sc, java_model):
+    def totalNumNodes(self):
         """
-        :param sc:  Spark context
-        :param java_model:  Handle to Java model object
+        Get total number of nodes, summed over all trees in the ensemble.
         """
-        self._sc = sc
-        self._java_model = java_model
+        return self.call("totalNumNodes")
 
-    def __del__(self):
-        self._sc._gateway.detach(self._java_model)
+    def __repr__(self):
+        """ Summary of model """
+        return self._java_model.toString()
 
+    def toDebugString(self):
+        """ Full model """
+        return self._java_model.toDebugString()
+
+
+class DecisionTreeModel(JavaModelWrapper):
+    """
+    .. note:: Experimental
+
+    A decision tree model for classification or regression.
+    """
     def predict(self, x):
         """
         Predict the label of one or more examples.
@@ -52,24 +74,11 @@ def predict(self, x):
         :param x:  Data point (feature vector),
                    or an RDD of data points (feature vectors).
         """
-        SerDe = self._sc._jvm.SerDe
-        ser = PickleSerializer()
         if isinstance(x, RDD):
-            # Bulk prediction
-            first = x.take(1)
-            if not first:
-                return self._sc.parallelize([])
-            if not isinstance(first[0], Vector):
-                x = x.map(_convert_to_vector)
-            jPred = self._java_model.predict(_to_java_object_rdd(x)).toJavaRDD()
-            jpyrdd = self._sc._jvm.SerDe.javaToPython(jPred)
-            return RDD(jpyrdd, self._sc, BatchedSerializer(ser, 1024))
+            return self.call("predict", x.map(_convert_to_vector))
 
         else:
-            # Assume x is a single data point.
-            bytes = bytearray(ser.dumps(_convert_to_vector(x)))
-            vec = self._sc._jvm.SerDe.loads(bytes)
-            return self._java_model.predict(vec)
+            return self.call("predict", _convert_to_vector(x))
 
     def numNodes(self):
         return self._java_model.numNodes()
@@ -78,42 +87,32 @@ def depth(self):
         return self._java_model.depth()
 
     def __repr__(self):
-        """ Print summary of model. """
+        """ summary of model. """
         return self._java_model.toString()
 
     def toDebugString(self):
-        """ Print full model. """
+        """ full model. """
         return self._java_model.toDebugString()
 
 
 class DecisionTree(object):
-
     """
-    Learning algorithm for a decision tree model
-    for classification or regression.
-
-    EXPERIMENTAL: This is an experimental API.
-                  It will probably be modified for Spark v1.2.
+    .. note:: Experimental
 
+    Learning algorithm for a decision tree model for classification or regression.
     """
 
-    @staticmethod
-    def _train(data, type, numClasses, categoricalFeaturesInfo,
-               impurity="gini", maxDepth=5, maxBins=32, minInstancesPerNode=1,
-               minInfoGain=0.0):
+    @classmethod
+    def _train(cls, data, type, numClasses, features, impurity="gini", maxDepth=5, maxBins=32,
+               minInstancesPerNode=1, minInfoGain=0.0):
         first = data.first()
         assert isinstance(first, LabeledPoint), "the data should be RDD of LabeledPoint"
-        sc = data.context
-        jrdd = _to_java_object_rdd(data)
-        cfiMap = MapConverter().convert(categoricalFeaturesInfo,
-                                        sc._gateway._gateway_client)
-        model = sc._jvm.PythonMLLibAPI().trainDecisionTreeModel(
-            jrdd, type, numClasses, cfiMap,
-            impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain)
-        return DecisionTreeModel(sc, model)
-
-    @staticmethod
-    def trainClassifier(data, numClasses, categoricalFeaturesInfo,
+        model = callMLlibFunc("trainDecisionTreeModel", data, type, numClasses, features,
+                              impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain)
+        return DecisionTreeModel(model)
+
+    @classmethod
+    def trainClassifier(cls, data, numClasses, categoricalFeaturesInfo,
                         impurity="gini", maxDepth=5, maxBins=32, minInstancesPerNode=1,
                         minInfoGain=0.0):
         """
@@ -131,8 +130,8 @@ def trainClassifier(data, numClasses, categoricalFeaturesInfo,
                          E.g., depth 0 means 1 leaf node.
                          Depth 1 means 1 internal node + 2 leaf nodes.
         :param maxBins: Number of bins used for finding splits at each node.
-        :param minInstancesPerNode: Min number of instances required at child nodes to create
-                                    the parent split
+        :param minInstancesPerNode: Min number of instances required at child
+                                    nodes to create the parent split
         :param minInfoGain: Min info gain required to create a split
         :return: DecisionTreeModel
 
@@ -157,16 +156,19 @@ def trainClassifier(data, numClasses, categoricalFeaturesInfo,
            Predict: 0.0
           Else (feature 0 > 0.0)
            Predict: 1.0
-        >>> model.predict(array([1.0])) > 0
-        True
-        >>> model.predict(array([0.0])) == 0
-        True
+        >>> model.predict(array([1.0]))
+        1.0
+        >>> model.predict(array([0.0]))
+        0.0
+        >>> rdd = sc.parallelize([[1.0], [0.0]])
+        >>> model.predict(rdd).collect()
+        [1.0, 0.0]
         """
-        return DecisionTree._train(data, "classification", numClasses, categoricalFeaturesInfo,
-                                   impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain)
+        return cls._train(data, "classification", numClasses, categoricalFeaturesInfo,
+                          impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain)
 
-    @staticmethod
-    def trainRegressor(data, categoricalFeaturesInfo,
+    @classmethod
+    def trainRegressor(cls, data, categoricalFeaturesInfo,
                        impurity="variance", maxDepth=5, maxBins=32, minInstancesPerNode=1,
                        minInfoGain=0.0):
         """
@@ -183,14 +185,13 @@ def trainRegressor(data, categoricalFeaturesInfo,
                          E.g., depth 0 means 1 leaf node.
                          Depth 1 means 1 internal node + 2 leaf nodes.
         :param maxBins: Number of bins used for finding splits at each node.
-        :param minInstancesPerNode: Min number of instances required at child nodes to create
-                                    the parent split
+        :param minInstancesPerNode: Min number of instances required at child
+                                    nodes to create the parent split
         :param minInfoGain: Min info gain required to create a split
         :return: DecisionTreeModel
 
         Example usage:
 
-        >>> from numpy import array
         >>> from pyspark.mllib.regression import LabeledPoint
         >>> from pyspark.mllib.tree import DecisionTree
         >>> from pyspark.mllib.linalg import SparseVector
@@ -203,17 +204,312 @@ def trainRegressor(data, categoricalFeaturesInfo,
         ... ]
         >>>
         >>> model = DecisionTree.trainRegressor(sc.parallelize(sparse_data), {})
-        >>> model.predict(array([0.0, 1.0])) == 1
-        True
-        >>> model.predict(array([0.0, 0.0])) == 0
-        True
-        >>> model.predict(SparseVector(2, {1: 1.0})) == 1
-        True
-        >>> model.predict(SparseVector(2, {1: 0.0})) == 0
-        True
-        """
-        return DecisionTree._train(data, "regression", 0, categoricalFeaturesInfo,
-                                   impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain)
+        >>> model.predict(SparseVector(2, {1: 1.0}))
+        1.0
+        >>> model.predict(SparseVector(2, {1: 0.0}))
+        0.0
+        >>> rdd = sc.parallelize([[0.0, 1.0], [0.0, 0.0]])
+        >>> model.predict(rdd).collect()
+        [1.0, 0.0]
+        """
+        return cls._train(data, "regression", 0, categoricalFeaturesInfo,
+                          impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain)
+
+
+class RandomForestModel(TreeEnsembleModel):
+    """
+    .. note:: Experimental
+
+    Represents a random forest model.
+    """
+
+
+class RandomForest(object):
+    """
+    .. note:: Experimental
+
+    Learning algorithm for a random forest model for classification or regression.
+    """
+
+    supportedFeatureSubsetStrategies = ("auto", "all", "sqrt", "log2", "onethird")
+
+    @classmethod
+    def _train(cls, data, algo, numClasses, categoricalFeaturesInfo, numTrees,
+               featureSubsetStrategy, impurity, maxDepth, maxBins, seed):
+        first = data.first()
+        assert isinstance(first, LabeledPoint), "the data should be RDD of LabeledPoint"
+        if featureSubsetStrategy not in cls.supportedFeatureSubsetStrategies:
+            raise ValueError("unsupported featureSubsetStrategy: %s" % featureSubsetStrategy)
+        if seed is None:
+            seed = random.randint(0, 1 << 30)
+        model = callMLlibFunc("trainRandomForestModel", data, algo, numClasses,
+                              categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity,
+                              maxDepth, maxBins, seed)
+        return RandomForestModel(model)
+
+    @classmethod
+    def trainClassifier(cls, data, numClasses, categoricalFeaturesInfo, numTrees,
+                        featureSubsetStrategy="auto", impurity="gini", maxDepth=4, maxBins=32,
+                        seed=None):
+        """
+        Method to train a decision tree model for binary or multiclass
+        classification.
+
+        :param data: Training dataset: RDD of LabeledPoint. Labels should take
+               values {0, 1, ..., numClasses-1}.
+        :param numClasses: number of classes for classification.
+        :param categoricalFeaturesInfo: Map storing arity of categorical features.
+               E.g., an entry (n -> k) indicates that feature n is categorical
+               with k categories indexed from 0: {0, 1, ..., k-1}.
+        :param numTrees: Number of trees in the random forest.
+        :param featureSubsetStrategy: Number of features to consider for splits at
+               each node.
+               Supported: "auto" (default), "all", "sqrt", "log2", "onethird".
+               If "auto" is set, this parameter is set based on numTrees:
+               if numTrees == 1, set to "all";
+               if numTrees > 1 (forest) set to "sqrt".
+        :param impurity: Criterion used for information gain calculation.
+               Supported values: "gini" (recommended) or "entropy".
+        :param maxDepth: Maximum depth of the tree. E.g., depth 0 means 1 leaf node;
+               depth 1 means 1 internal node + 2 leaf nodes. (default: 4)
+        :param maxBins: maximum number of bins used for splitting features
+               (default: 100)
+        :param seed: Random seed for bootstrapping and choosing feature subsets.
+        :return: RandomForestModel that can be used for prediction
+
+        Example usage:
+
+        >>> from pyspark.mllib.regression import LabeledPoint
+        >>> from pyspark.mllib.tree import RandomForest
+        >>>
+        >>> data = [
+        ...     LabeledPoint(0.0, [0.0]),
+        ...     LabeledPoint(0.0, [1.0]),
+        ...     LabeledPoint(1.0, [2.0]),
+        ...     LabeledPoint(1.0, [3.0])
+        ... ]
+        >>> model = RandomForest.trainClassifier(sc.parallelize(data), 2, {}, 3, seed=42)
+        >>> model.numTrees()
+        3
+        >>> model.totalNumNodes()
+        7
+        >>> print model,
+        TreeEnsembleModel classifier with 3 trees
+        >>> print model.toDebugString(),
+        TreeEnsembleModel classifier with 3 trees
+        <BLANKLINE>
+          Tree 0:
+            Predict: 1.0
+          Tree 1:
+            If (feature 0 <= 1.0)
+             Predict: 0.0
+            Else (feature 0 > 1.0)
+             Predict: 1.0
+          Tree 2:
+            If (feature 0 <= 1.0)
+             Predict: 0.0
+            Else (feature 0 > 1.0)
+             Predict: 1.0
+        >>> model.predict([2.0])
+        1.0
+        >>> model.predict([0.0])
+        0.0
+        >>> rdd = sc.parallelize([[3.0], [1.0]])
+        >>> model.predict(rdd).collect()
+        [1.0, 0.0]
+        """
+        return cls._train(data, "classification", numClasses,
+                          categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity,
+                          maxDepth, maxBins, seed)
+
+    @classmethod
+    def trainRegressor(cls, data, categoricalFeaturesInfo, numTrees, featureSubsetStrategy="auto",
+                       impurity="variance", maxDepth=4, maxBins=32, seed=None):
+        """
+        Method to train a decision tree model for regression.
+
+        :param data: Training dataset: RDD of LabeledPoint. Labels are
+               real numbers.
+        :param categoricalFeaturesInfo: Map storing arity of categorical
+               features. E.g., an entry (n -> k) indicates that feature
+               n is categorical with k categories indexed from 0:
+               {0, 1, ..., k-1}.
+        :param numTrees: Number of trees in the random forest.
+        :param featureSubsetStrategy: Number of features to consider for
+               splits at each node.
+               Supported: "auto" (default), "all", "sqrt", "log2", "onethird".
+               If "auto" is set, this parameter is set based on numTrees:
+               if numTrees == 1, set to "all";
+               if numTrees > 1 (forest) set to "onethird" for regression.
+        :param impurity: Criterion used for information gain calculation.
+               Supported values: "variance".
+        :param maxDepth: Maximum depth of the tree. E.g., depth 0 means 1
+               leaf node; depth 1 means 1 internal node + 2 leaf nodes.
+               (default: 4)
+        :param maxBins: maximum number of bins used for splitting features
+               (default: 100)
+        :param seed: Random seed for bootstrapping and choosing feature subsets.
+        :return: RandomForestModel that can be used for prediction
+
+        Example usage:
+
+        >>> from pyspark.mllib.regression import LabeledPoint
+        >>> from pyspark.mllib.tree import RandomForest
+        >>> from pyspark.mllib.linalg import SparseVector
+        >>>
+        >>> sparse_data = [
+        ...     LabeledPoint(0.0, SparseVector(2, {0: 1.0})),
+        ...     LabeledPoint(1.0, SparseVector(2, {1: 1.0})),
+        ...     LabeledPoint(0.0, SparseVector(2, {0: 1.0})),
+        ...     LabeledPoint(1.0, SparseVector(2, {1: 2.0}))
+        ... ]
+        >>>
+        >>> model = RandomForest.trainRegressor(sc.parallelize(sparse_data), {}, 2, seed=42)
+        >>> model.numTrees()
+        2
+        >>> model.totalNumNodes()
+        4
+        >>> model.predict(SparseVector(2, {1: 1.0}))
+        1.0
+        >>> model.predict(SparseVector(2, {0: 1.0}))
+        0.5
+        >>> rdd = sc.parallelize([[0.0, 1.0], [1.0, 0.0]])
+        >>> model.predict(rdd).collect()
+        [1.0, 0.5]
+        """
+        return cls._train(data, "regression", 0, categoricalFeaturesInfo, numTrees,
+                          featureSubsetStrategy, impurity, maxDepth, maxBins, seed)
+
+
+class GradientBoostedTreesModel(TreeEnsembleModel):
+    """
+    .. note:: Experimental
+
+    Represents a gradient-boosted tree model.
+    """
+
+
+class GradientBoostedTrees(object):
+    """
+    .. note:: Experimental
+
+    Learning algorithm for a gradient boosted trees model for classification or regression.
+    """
+
+    @classmethod
+    def _train(cls, data, algo, categoricalFeaturesInfo,
+               loss, numIterations, learningRate, maxDepth):
+        first = data.first()
+        assert isinstance(first, LabeledPoint), "the data should be RDD of LabeledPoint"
+        model = callMLlibFunc("trainGradientBoostedTreesModel", data, algo, categoricalFeaturesInfo,
+                              loss, numIterations, learningRate, maxDepth)
+        return GradientBoostedTreesModel(model)
+
+    @classmethod
+    def trainClassifier(cls, data, categoricalFeaturesInfo,
+                        loss="logLoss", numIterations=100, learningRate=0.1, maxDepth=3):
+        """
+        Method to train a gradient-boosted trees model for classification.
+
+        :param data: Training dataset: RDD of LabeledPoint. Labels should take values {0, 1}.
+        :param categoricalFeaturesInfo: Map storing arity of categorical
+               features. E.g., an entry (n -> k) indicates that feature
+               n is categorical with k categories indexed from 0:
+               {0, 1, ..., k-1}.
+        :param loss: Loss function used for minimization during gradient boosting.
+                     Supported: {"logLoss" (default), "leastSquaresError", "leastAbsoluteError"}.
+        :param numIterations: Number of iterations of boosting.
+                              (default: 100)
+        :param learningRate: Learning rate for shrinking the contribution of each estimator.
+                             The learning rate should be between in the interval (0, 1]
+                             (default: 0.1)
+        :param maxDepth: Maximum depth of the tree. E.g., depth 0 means 1
+               leaf node; depth 1 means 1 internal node + 2 leaf nodes.
+               (default: 3)
+        :return: GradientBoostedTreesModel that can be used for prediction
+
+        Example usage:
+
+        >>> from pyspark.mllib.regression import LabeledPoint
+        >>> from pyspark.mllib.tree import GradientBoostedTrees
+        >>>
+        >>> data = [
+        ...     LabeledPoint(0.0, [0.0]),
+        ...     LabeledPoint(0.0, [1.0]),
+        ...     LabeledPoint(1.0, [2.0]),
+        ...     LabeledPoint(1.0, [3.0])
+        ... ]
+        >>>
+        >>> model = GradientBoostedTrees.trainClassifier(sc.parallelize(data), {})
+        >>> model.numTrees()
+        100
+        >>> model.totalNumNodes()
+        300
+        >>> print model,  # it already has newline
+        TreeEnsembleModel classifier with 100 trees
+        >>> model.predict([2.0])
+        1.0
+        >>> model.predict([0.0])
+        0.0
+        >>> rdd = sc.parallelize([[2.0], [0.0]])
+        >>> model.predict(rdd).collect()
+        [1.0, 0.0]
+        """
+        return cls._train(data, "classification", categoricalFeaturesInfo,
+                          loss, numIterations, learningRate, maxDepth)
+
+    @classmethod
+    def trainRegressor(cls, data, categoricalFeaturesInfo,
+                       loss="leastSquaresError", numIterations=100, learningRate=0.1, maxDepth=3):
+        """
+        Method to train a gradient-boosted trees model for regression.
+
+        :param data: Training dataset: RDD of LabeledPoint. Labels are
+               real numbers.
+        :param categoricalFeaturesInfo: Map storing arity of categorical
+               features. E.g., an entry (n -> k) indicates that feature
+               n is categorical with k categories indexed from 0:
+               {0, 1, ..., k-1}.
+        :param loss: Loss function used for minimization during gradient boosting.
+                     Supported: {"logLoss" (default), "leastSquaresError", "leastAbsoluteError"}.
+        :param numIterations: Number of iterations of boosting.
+                              (default: 100)
+        :param learningRate: Learning rate for shrinking the contribution of each estimator.
+                             The learning rate should be between in the interval (0, 1]
+                             (default: 0.1)
+        :param maxDepth: Maximum depth of the tree. E.g., depth 0 means 1
+               leaf node; depth 1 means 1 internal node + 2 leaf nodes.
+               (default: 3)
+        :return: GradientBoostedTreesModel that can be used for prediction
+
+        Example usage:
+
+        >>> from pyspark.mllib.regression import LabeledPoint
+        >>> from pyspark.mllib.tree import GradientBoostedTrees
+        >>> from pyspark.mllib.linalg import SparseVector
+        >>>
+        >>> sparse_data = [
+        ...     LabeledPoint(0.0, SparseVector(2, {0: 1.0})),
+        ...     LabeledPoint(1.0, SparseVector(2, {1: 1.0})),
+        ...     LabeledPoint(0.0, SparseVector(2, {0: 1.0})),
+        ...     LabeledPoint(1.0, SparseVector(2, {1: 2.0}))
+        ... ]
+        >>>
+        >>> model = GradientBoostedTrees.trainRegressor(sc.parallelize(sparse_data), {})
+        >>> model.numTrees()
+        100
+        >>> model.totalNumNodes()
+        102
+        >>> model.predict(SparseVector(2, {1: 1.0}))
+        1.0
+        >>> model.predict(SparseVector(2, {0: 1.0}))
+        0.0
+        >>> rdd = sc.parallelize([[0.0, 1.0], [1.0, 0.0]])
+        >>> model.predict(rdd).collect()
+        [1.0, 0.0]
+        """
+        return cls._train(data, "regression", categoricalFeaturesInfo,
+                          loss, numIterations, learningRate, maxDepth)
 
 
 def _test():
diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py
index 84b39a48619d2..4ed978b45409c 100644
--- a/python/pyspark/mllib/util.py
+++ b/python/pyspark/mllib/util.py
@@ -18,8 +18,7 @@
 import numpy as np
 import warnings
 
-from pyspark.rdd import RDD
-from pyspark.serializers import AutoBatchedSerializer, PickleSerializer
+from pyspark.mllib.common import callMLlibFunc
 from pyspark.mllib.linalg import Vectors, SparseVector, _convert_to_vector
 from pyspark.mllib.regression import LabeledPoint
 
@@ -162,20 +161,11 @@ def loadLabeledPoints(sc, path, minPartitions=None):
         >>> tempFile = NamedTemporaryFile(delete=True)
         >>> tempFile.close()
         >>> sc.parallelize(examples, 1).saveAsTextFile(tempFile.name)
-        >>> loaded = MLUtils.loadLabeledPoints(sc, tempFile.name).collect()
-        >>> type(loaded[0]) == LabeledPoint
-        True
-        >>> print examples[0]
-        (1.1,(3,[0,2],[-1.23,4.56e-07]))
-        >>> type(examples[1]) == LabeledPoint
-        True
-        >>> print examples[1]
-        (0.0,[1.01,2.02,3.03])
+        >>> MLUtils.loadLabeledPoints(sc, tempFile.name).collect()
+        [LabeledPoint(1.1, (3,[0,2],[-1.23,4.56e-07])), LabeledPoint(0.0, [1.01,2.02,3.03])]
         """
         minPartitions = minPartitions or min(sc.defaultParallelism, 2)
-        jrdd = sc._jvm.PythonMLLibAPI().loadLabeledPoints(sc._jsc, path, minPartitions)
-        jpyrdd = sc._jvm.SerDe.javaToPython(jrdd)
-        return RDD(jpyrdd, sc, AutoBatchedSerializer(PickleSerializer()))
+        return callMLlibFunc("loadLabeledPoints", sc, path, minPartitions)
 
 
 def _test():
diff --git a/python/pyspark/profiler.py b/python/pyspark/profiler.py
new file mode 100644
index 0000000000000..4408996db0790
--- /dev/null
+++ b/python/pyspark/profiler.py
@@ -0,0 +1,172 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import cProfile
+import pstats
+import os
+import atexit
+
+from pyspark.accumulators import AccumulatorParam
+
+
+class ProfilerCollector(object):
+    """
+    This class keeps track of different profilers on a per
+    stage basis. Also this is used to create new profilers for
+    the different stages.
+    """
+
+    def __init__(self, profiler_cls, dump_path=None):
+        self.profiler_cls = profiler_cls
+        self.profile_dump_path = dump_path
+        self.profilers = []
+
+    def new_profiler(self, ctx):
+        """ Create a new profiler using class `profiler_cls` """
+        return self.profiler_cls(ctx)
+
+    def add_profiler(self, id, profiler):
+        """ Add a profiler for RDD `id` """
+        if not self.profilers:
+            if self.profile_dump_path:
+                atexit.register(self.dump_profiles, self.profile_dump_path)
+            else:
+                atexit.register(self.show_profiles)
+
+        self.profilers.append([id, profiler, False])
+
+    def dump_profiles(self, path):
+        """ Dump the profile stats into directory `path` """
+        for id, profiler, _ in self.profilers:
+            profiler.dump(id, path)
+        self.profilers = []
+
+    def show_profiles(self):
+        """ Print the profile stats to stdout """
+        for i, (id, profiler, showed) in enumerate(self.profilers):
+            if not showed and profiler:
+                profiler.show(id)
+                # mark it as showed
+                self.profilers[i][2] = True
+
+
+class Profiler(object):
+    """
+    .. note:: DeveloperApi
+
+    PySpark supports custom profilers, this is to allow for different profilers to
+    be used as well as outputting to different formats than what is provided in the
+    BasicProfiler.
+
+    A custom profiler has to define or inherit the following methods:
+        profile - will produce a system profile of some sort.
+        stats - return the collected stats.
+        dump - dumps the profiles to a path
+        add - adds a profile to the existing accumulated profile
+
+    The profiler class is chosen when creating a SparkContext
+
+    >>> from pyspark import SparkConf, SparkContext
+    >>> from pyspark import BasicProfiler
+    >>> class MyCustomProfiler(BasicProfiler):
+    ...     def show(self, id):
+    ...         print "My custom profiles for RDD:%s" % id
+    ...
+    >>> conf = SparkConf().set("spark.python.profile", "true")
+    >>> sc = SparkContext('local', 'test', conf=conf, profiler_cls=MyCustomProfiler)
+    >>> sc.parallelize(list(range(1000))).map(lambda x: 2 * x).take(10)
+    [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
+    >>> sc.show_profiles()
+    My custom profiles for RDD:1
+    My custom profiles for RDD:2
+    >>> sc.stop()
+    """
+
+    def __init__(self, ctx):
+        pass
+
+    def profile(self, func):
+        """ Do profiling on the function `func`"""
+        raise NotImplemented
+
+    def stats(self):
+        """ Return the collected profiling stats (pstats.Stats)"""
+        raise NotImplemented
+
+    def show(self, id):
+        """ Print the profile stats to stdout, id is the RDD id """
+        stats = self.stats()
+        if stats:
+            print "=" * 60
+            print "Profile of RDD<id=%d>" % id
+            print "=" * 60
+            stats.sort_stats("time", "cumulative").print_stats()
+
+    def dump(self, id, path):
+        """ Dump the profile into path, id is the RDD id """
+        if not os.path.exists(path):
+            os.makedirs(path)
+        stats = self.stats()
+        if stats:
+            p = os.path.join(path, "rdd_%d.pstats" % id)
+            stats.dump_stats(p)
+
+
+class PStatsParam(AccumulatorParam):
+    """PStatsParam is used to merge pstats.Stats"""
+
+    @staticmethod
+    def zero(value):
+        return None
+
+    @staticmethod
+    def addInPlace(value1, value2):
+        if value1 is None:
+            return value2
+        value1.add(value2)
+        return value1
+
+
+class BasicProfiler(Profiler):
+    """
+    BasicProfiler is the default profiler, which is implemented based on
+    cProfile and Accumulator
+    """
+    def __init__(self, ctx):
+        Profiler.__init__(self, ctx)
+        # Creates a new accumulator for combining the profiles of different
+        # partitions of a stage
+        self._accumulator = ctx.accumulator(None, PStatsParam)
+
+    def profile(self, func):
+        """ Runs and profiles the method to_profile passed in. A profile object is returned. """
+        pr = cProfile.Profile()
+        pr.runcall(func)
+        st = pstats.Stats(pr)
+        st.stream = None  # make it picklable
+        st.strip_dirs()
+
+        # Adds a new profile to the existing accumulated value
+        self._accumulator.add(st)
+
+    def stats(self):
+        return self._accumulator.value
+
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 15be4bfec92f9..6e029bf7f13fc 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -28,17 +28,16 @@
 import warnings
 import heapq
 import bisect
-from random import Random
-from math import sqrt, log, isinf, isnan
+import random
+from math import sqrt, log, isinf, isnan, pow, ceil
 
-from pyspark.accumulators import PStatsParam
 from pyspark.serializers import NoOpSerializer, CartesianDeserializer, \
     BatchedSerializer, CloudPickleSerializer, PairDeserializer, \
     PickleSerializer, pack_long, AutoBatchedSerializer
 from pyspark.join import python_join, python_left_outer_join, \
     python_right_outer_join, python_full_outer_join, python_cogroup
 from pyspark.statcounter import StatCounter
-from pyspark.rddsampler import RDDSampler, RDDStratifiedSampler
+from pyspark.rddsampler import RDDSampler, RDDRangeSampler, RDDStratifiedSampler
 from pyspark.storagelevel import StorageLevel
 from pyspark.resultiterable import ResultIterable
 from pyspark.shuffle import Aggregator, InMemoryMerger, ExternalMerger, \
@@ -120,7 +119,7 @@ class RDD(object):
     operated on in parallel.
     """
 
-    def __init__(self, jrdd, ctx, jrdd_deserializer):
+    def __init__(self, jrdd, ctx, jrdd_deserializer=AutoBatchedSerializer(PickleSerializer())):
         self._jrdd = jrdd
         self.is_cached = False
         self.is_checkpointed = False
@@ -129,12 +128,8 @@ def __init__(self, jrdd, ctx, jrdd_deserializer):
         self._id = jrdd.id()
         self._partitionFunc = None
 
-    def _toPickleSerialization(self):
-        if (self._jrdd_deserializer == PickleSerializer() or
-                self._jrdd_deserializer == BatchedSerializer(PickleSerializer())):
-            return self
-        else:
-            return self._reserialize(BatchedSerializer(PickleSerializer(), 10))
+    def _pickled(self):
+        return self._reserialize(AutoBatchedSerializer(PickleSerializer()))
 
     def id(self):
         """
@@ -145,6 +140,17 @@ def id(self):
     def __repr__(self):
         return self._jrdd.toString()
 
+    def __getnewargs__(self):
+        # This method is called when attempting to pickle an RDD, which is always an error:
+        raise Exception(
+            "It appears that you are attempting to broadcast an RDD or reference an RDD from an "
+            "action or transformation. RDD transformations and actions can only be invoked by the "
+            "driver, not inside of other transformations; for example, "
+            "rdd1.map(lambda x: rdd2.values.count() * x) is invalid because the values "
+            "transformation and count action cannot be performed inside of the rdd1.map "
+            "transformation. For more information, see SPARK-5063."
+        )
+
     @property
     def context(self):
         """
@@ -314,20 +320,43 @@ def distinct(self, numPartitions=None):
 
     def sample(self, withReplacement, fraction, seed=None):
         """
-        Return a sampled subset of this RDD (relies on numpy and falls back
-        on default random generator if numpy is unavailable).
+        Return a sampled subset of this RDD.
 
-        >>> sc.parallelize(range(0, 100)).sample(False, 0.1, 2).collect() #doctest: +SKIP
-        [2, 3, 20, 21, 24, 41, 42, 66, 67, 89, 90, 98]
+        >>> rdd = sc.parallelize(range(100), 4)
+        >>> rdd.sample(False, 0.1, 81).count()
+        10
         """
         assert fraction >= 0.0, "Negative fraction value: %s" % fraction
         return self.mapPartitionsWithIndex(RDDSampler(withReplacement, fraction, seed).func, True)
 
+    def randomSplit(self, weights, seed=None):
+        """
+        Randomly splits this RDD with the provided weights.
+
+        :param weights: weights for splits, will be normalized if they don't sum to 1
+        :param seed: random seed
+        :return: split RDDs in a list
+
+        >>> rdd = sc.parallelize(range(5), 1)
+        >>> rdd1, rdd2 = rdd.randomSplit([2, 3], 17)
+        >>> rdd1.collect()
+        [1, 3]
+        >>> rdd2.collect()
+        [0, 2, 4]
+        """
+        s = float(sum(weights))
+        cweights = [0.0]
+        for w in weights:
+            cweights.append(cweights[-1] + w / s)
+        if seed is None:
+            seed = random.randint(0, 2 ** 32 - 1)
+        return [self.mapPartitionsWithIndex(RDDRangeSampler(lb, ub, seed).func, True)
+                for lb, ub in zip(cweights, cweights[1:])]
+
     # this is ported from scala/spark/RDD.scala
     def takeSample(self, withReplacement, num, seed=None):
         """
-        Return a fixed-size sampled subset of this RDD (currently requires
-        numpy).
+        Return a fixed-size sampled subset of this RDD.
 
         >>> rdd = sc.parallelize(range(0, 10))
         >>> len(rdd.takeSample(True, 20, 1))
@@ -348,7 +377,7 @@ def takeSample(self, withReplacement, num, seed=None):
         if initialCount == 0:
             return []
 
-        rand = Random(seed)
+        rand = random.Random(seed)
 
         if (not withReplacement) and num >= initialCount:
             # shuffle current RDD and return
@@ -449,12 +478,10 @@ def intersection(self, other):
 
     def _reserialize(self, serializer=None):
         serializer = serializer or self.ctx.serializer
-        if self._jrdd_deserializer == serializer:
-            return self
-        else:
-            converted = self.map(lambda x: x, preservesPartitioning=True)
-            converted._jrdd_deserializer = serializer
-            return converted
+        if self._jrdd_deserializer != serializer:
+            self = self.map(lambda x: x, preservesPartitioning=True)
+            self._jrdd_deserializer = serializer
+        return self
 
     def __add__(self, other):
         """
@@ -529,6 +556,8 @@ def sortPartition(iterator):
         # the key-space into bins such that the bins have roughly the same
         # number of (key, value) pairs falling into them
         rddSize = self.count()
+        if not rddSize:
+            return self  # empty RDD
         maxSampleSize = numPartitions * 20.0  # constant from Spark's RangePartitioner
         fraction = min(maxSampleSize / max(rddSize, 1), 1.0)
         samples = self.sample(False, fraction, 1).map(lambda (k, v): k).collect()
@@ -697,6 +726,43 @@ def func(iterator):
             return reduce(f, vals)
         raise ValueError("Can not reduce() empty RDD")
 
+    def treeReduce(self, f, depth=2):
+        """
+        Reduces the elements of this RDD in a multi-level tree pattern.
+
+        :param depth: suggested depth of the tree (default: 2)
+
+        >>> add = lambda x, y: x + y
+        >>> rdd = sc.parallelize([-5, -4, -3, -2, -1, 1, 2, 3, 4], 10)
+        >>> rdd.treeReduce(add)
+        -5
+        >>> rdd.treeReduce(add, 1)
+        -5
+        >>> rdd.treeReduce(add, 2)
+        -5
+        >>> rdd.treeReduce(add, 5)
+        -5
+        >>> rdd.treeReduce(add, 10)
+        -5
+        """
+        if depth < 1:
+            raise ValueError("Depth cannot be smaller than 1 but got %d." % depth)
+
+        zeroValue = None, True  # Use the second entry to indicate whether this is a dummy value.
+
+        def op(x, y):
+            if x[1]:
+                return y
+            elif y[1]:
+                return x
+            else:
+                return f(x[0], y[0]), False
+
+        reduced = self.map(lambda x: (x, False)).treeAggregate(zeroValue, op, op, depth)
+        if reduced[1]:
+            raise ValueError("Cannot reduce empty RDD.")
+        return reduced[0]
+
     def fold(self, zeroValue, op):
         """
         Aggregate the elements of each partition, and then the results for all
@@ -748,6 +814,58 @@ def func(iterator):
 
         return self.mapPartitions(func).fold(zeroValue, combOp)
 
+    def treeAggregate(self, zeroValue, seqOp, combOp, depth=2):
+        """
+        Aggregates the elements of this RDD in a multi-level tree
+        pattern.
+
+        :param depth: suggested depth of the tree (default: 2)
+
+        >>> add = lambda x, y: x + y
+        >>> rdd = sc.parallelize([-5, -4, -3, -2, -1, 1, 2, 3, 4], 10)
+        >>> rdd.treeAggregate(0, add, add)
+        -5
+        >>> rdd.treeAggregate(0, add, add, 1)
+        -5
+        >>> rdd.treeAggregate(0, add, add, 2)
+        -5
+        >>> rdd.treeAggregate(0, add, add, 5)
+        -5
+        >>> rdd.treeAggregate(0, add, add, 10)
+        -5
+        """
+        if depth < 1:
+            raise ValueError("Depth cannot be smaller than 1 but got %d." % depth)
+
+        if self.getNumPartitions() == 0:
+            return zeroValue
+
+        def aggregatePartition(iterator):
+            acc = zeroValue
+            for obj in iterator:
+                acc = seqOp(acc, obj)
+            yield acc
+
+        partiallyAggregated = self.mapPartitions(aggregatePartition)
+        numPartitions = partiallyAggregated.getNumPartitions()
+        scale = max(int(ceil(pow(numPartitions, 1.0 / depth))), 2)
+        # If creating an extra level doesn't help reduce the wall-clock time, we stop the tree
+        # aggregation.
+        while numPartitions > scale + numPartitions / scale:
+            numPartitions /= scale
+            curNumPartitions = numPartitions
+
+            def mapPartition(i, iterator):
+                for obj in iterator:
+                    yield (i % curNumPartitions, obj)
+
+            partiallyAggregated = partiallyAggregated \
+                .mapPartitionsWithIndex(mapPartition) \
+                .reduceByKey(combOp, curNumPartitions) \
+                .values()
+
+        return partiallyAggregated.reduce(combOp)
+
     def max(self, key=None):
         """
         Find the maximum item in this RDD.
@@ -1111,6 +1229,18 @@ def first(self):
             return rs[0]
         raise ValueError("RDD is empty")
 
+    def isEmpty(self):
+        """
+        Returns true if and only if the RDD contains no elements at all. Note that an RDD
+        may be empty even when it has at least 1 partition.
+
+        >>> sc.parallelize([]).isEmpty()
+        True
+        >>> sc.parallelize([1]).isEmpty()
+        False
+        """
+        return self._jrdd.partitions().size() == 0 or len(self.take(1)) == 0
+
     def saveAsNewAPIHadoopDataset(self, conf, keyConverter=None, valueConverter=None):
         """
         Output a Python RDD of key-value pairs (of form C{RDD[(K, V)]}) to any Hadoop file
@@ -1123,9 +1253,8 @@ def saveAsNewAPIHadoopDataset(self, conf, keyConverter=None, valueConverter=None
         :param valueConverter: (None by default)
         """
         jconf = self.ctx._dictToJavaMap(conf)
-        pickledRDD = self._toPickleSerialization()
-        batched = isinstance(pickledRDD._jrdd_deserializer, BatchedSerializer)
-        self.ctx._jvm.PythonRDD.saveAsHadoopDataset(pickledRDD._jrdd, batched, jconf,
+        pickledRDD = self._pickled()
+        self.ctx._jvm.PythonRDD.saveAsHadoopDataset(pickledRDD._jrdd, True, jconf,
                                                     keyConverter, valueConverter, True)
 
     def saveAsNewAPIHadoopFile(self, path, outputFormatClass, keyClass=None, valueClass=None,
@@ -1150,9 +1279,8 @@ def saveAsNewAPIHadoopFile(self, path, outputFormatClass, keyClass=None, valueCl
         :param conf: Hadoop job configuration, passed in as a dict (None by default)
         """
         jconf = self.ctx._dictToJavaMap(conf)
-        pickledRDD = self._toPickleSerialization()
-        batched = isinstance(pickledRDD._jrdd_deserializer, BatchedSerializer)
-        self.ctx._jvm.PythonRDD.saveAsNewAPIHadoopFile(pickledRDD._jrdd, batched, path,
+        pickledRDD = self._pickled()
+        self.ctx._jvm.PythonRDD.saveAsNewAPIHadoopFile(pickledRDD._jrdd, True, path,
                                                        outputFormatClass,
                                                        keyClass, valueClass,
                                                        keyConverter, valueConverter, jconf)
@@ -1169,9 +1297,8 @@ def saveAsHadoopDataset(self, conf, keyConverter=None, valueConverter=None):
         :param valueConverter: (None by default)
         """
         jconf = self.ctx._dictToJavaMap(conf)
-        pickledRDD = self._toPickleSerialization()
-        batched = isinstance(pickledRDD._jrdd_deserializer, BatchedSerializer)
-        self.ctx._jvm.PythonRDD.saveAsHadoopDataset(pickledRDD._jrdd, batched, jconf,
+        pickledRDD = self._pickled()
+        self.ctx._jvm.PythonRDD.saveAsHadoopDataset(pickledRDD._jrdd, True, jconf,
                                                     keyConverter, valueConverter, False)
 
     def saveAsHadoopFile(self, path, outputFormatClass, keyClass=None, valueClass=None,
@@ -1198,9 +1325,8 @@ def saveAsHadoopFile(self, path, outputFormatClass, keyClass=None, valueClass=No
         :param compressionCodecClass: (None by default)
         """
         jconf = self.ctx._dictToJavaMap(conf)
-        pickledRDD = self._toPickleSerialization()
-        batched = isinstance(pickledRDD._jrdd_deserializer, BatchedSerializer)
-        self.ctx._jvm.PythonRDD.saveAsHadoopFile(pickledRDD._jrdd, batched, path,
+        pickledRDD = self._pickled()
+        self.ctx._jvm.PythonRDD.saveAsHadoopFile(pickledRDD._jrdd, True, path,
                                                  outputFormatClass,
                                                  keyClass, valueClass,
                                                  keyConverter, valueConverter,
@@ -1218,9 +1344,8 @@ def saveAsSequenceFile(self, path, compressionCodecClass=None):
         :param path: path to sequence file
         :param compressionCodecClass: (None by default)
         """
-        pickledRDD = self._toPickleSerialization()
-        batched = isinstance(pickledRDD._jrdd_deserializer, BatchedSerializer)
-        self.ctx._jvm.PythonRDD.saveAsSequenceFile(pickledRDD._jrdd, batched,
+        pickledRDD = self._pickled()
+        self.ctx._jvm.PythonRDD.saveAsSequenceFile(pickledRDD._jrdd, True,
                                                    path, compressionCodecClass)
 
     def saveAsPickleFile(self, path, batchSize=10):
@@ -1235,8 +1360,11 @@ def saveAsPickleFile(self, path, batchSize=10):
         >>> sorted(sc.pickleFile(tmpFile.name, 5).collect())
         [1, 2, 'rdd', 'spark']
         """
-        self._reserialize(BatchedSerializer(PickleSerializer(),
-                                            batchSize))._jrdd.saveAsObjectFile(path)
+        if batchSize == 0:
+            ser = AutoBatchedSerializer(PickleSerializer())
+        else:
+            ser = BatchedSerializer(PickleSerializer(), batchSize)
+        self._reserialize(ser)._jrdd.saveAsObjectFile(path)
 
     def saveAsTextFile(self, path):
         """
@@ -1594,8 +1722,8 @@ def groupByKey(self, numPartitions=None):
         Hash-partitions the resulting RDD with into numPartitions partitions.
 
         Note: If you are grouping in order to perform an aggregation (such as a
-        sum or average) over each key, using reduceByKey will provide much
-        better performance.
+        sum or average) over each key, using reduceByKey or aggregateByKey will
+        provide much better performance.
 
         >>> x = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
         >>> map((lambda (x,y): (x, list(y))), sorted(x.groupByKey().collect()))
@@ -1777,28 +1905,27 @@ def zip(self, other):
         >>> x.zip(y).collect()
         [(0, 1000), (1, 1001), (2, 1002), (3, 1003), (4, 1004)]
         """
-        if self.getNumPartitions() != other.getNumPartitions():
-            raise ValueError("Can only zip with RDD which has the same number of partitions")
-
         def get_batch_size(ser):
             if isinstance(ser, BatchedSerializer):
                 return ser.batchSize
-            return 0
+            return 1  # not batched
 
         def batch_as(rdd, batchSize):
-            ser = rdd._jrdd_deserializer
-            if isinstance(ser, BatchedSerializer):
-                ser = ser.serializer
-            return rdd._reserialize(BatchedSerializer(ser, batchSize))
+            return rdd._reserialize(BatchedSerializer(PickleSerializer(), batchSize))
 
         my_batch = get_batch_size(self._jrdd_deserializer)
         other_batch = get_batch_size(other._jrdd_deserializer)
         if my_batch != other_batch:
-            # use the greatest batchSize to batch the other one.
-            if my_batch > other_batch:
-                other = batch_as(other, my_batch)
-            else:
-                self = batch_as(self, other_batch)
+            # use the smallest batchSize for both of them
+            batchSize = min(my_batch, other_batch)
+            if batchSize <= 0:
+                # auto batched or unlimited
+                batchSize = 100
+            other = batch_as(other, batchSize)
+            self = batch_as(self, batchSize)
+
+        if self.getNumPartitions() != other.getNumPartitions():
+            raise ValueError("Can only zip with RDD which has the same number of partitions")
 
         # There will be an Exception in JVM if there are different number
         # of items in each partitions.
@@ -1867,11 +1994,11 @@ def setName(self, name):
         Assign a name to this RDD.
 
         >>> rdd1 = sc.parallelize([1,2])
-        >>> rdd1.setName('RDD1')
-        >>> rdd1.name()
+        >>> rdd1.setName('RDD1').name()
         'RDD1'
         """
         self._jrdd.setName(name)
+        return self
 
     def toDebugString(self):
         """
@@ -1937,29 +2064,18 @@ def lookup(self, key):
 
         return values.collect()
 
-    def _is_pickled(self):
-        """ Return this RDD is serialized by Pickle or not. """
-        der = self._jrdd_deserializer
-        if isinstance(der, PickleSerializer):
-            return True
-        if isinstance(der, BatchedSerializer) and isinstance(der.serializer, PickleSerializer):
-            return True
-        return False
-
     def _to_java_object_rdd(self):
         """ Return an JavaRDD of Object by unpickling
 
         It will convert each Python object into Java object by Pyrolite, whenever the
         RDD is serialized in batch or not.
         """
-        rdd = self._reserialize(AutoBatchedSerializer(PickleSerializer())) \
-            if not self._is_pickled() else self
-        is_batch = isinstance(rdd._jrdd_deserializer, BatchedSerializer)
-        return self.ctx._jvm.PythonRDD.pythonToJava(rdd._jrdd, is_batch)
+        rdd = self._pickled()
+        return self.ctx._jvm.SerDeUtil.pythonToJava(rdd._jrdd, True)
 
     def countApprox(self, timeout, confidence=0.95):
         """
-        :: Experimental ::
+        .. note:: Experimental
         Approximate version of count() that returns a potentially incomplete
         result within a timeout, even if not all tasks have finished.
 
@@ -1972,7 +2088,7 @@ def countApprox(self, timeout, confidence=0.95):
 
     def sumApprox(self, timeout, confidence=0.95):
         """
-        :: Experimental ::
+        .. note:: Experimental
         Approximate operation to return the sum within a timeout
         or meet the confidence.
 
@@ -1988,7 +2104,7 @@ def sumApprox(self, timeout, confidence=0.95):
 
     def meanApprox(self, timeout, confidence=0.95):
         """
-        :: Experimental ::
+        .. note:: Experimental
         Approximate operation to return the mean within a timeout
         or meet the confidence.
 
@@ -2004,7 +2120,7 @@ def meanApprox(self, timeout, confidence=0.95):
 
     def countApproxDistinct(self, relativeSD=0.05):
         """
-        :: Experimental ::
+        .. note:: Experimental
         Return approximate number of distinct elements in the RDD.
 
         The algorithm used is based on streamlib's implementation of
@@ -2031,6 +2147,39 @@ def countApproxDistinct(self, relativeSD=0.05):
         hashRDD = self.map(lambda x: portable_hash(x) & 0xFFFFFFFF)
         return hashRDD._to_java_object_rdd().countApproxDistinct(relativeSD)
 
+    def toLocalIterator(self):
+        """
+        Return an iterator that contains all of the elements in this RDD.
+        The iterator will consume as much memory as the largest partition in this RDD.
+        >>> rdd = sc.parallelize(range(10))
+        >>> [x for x in rdd.toLocalIterator()]
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+        """
+        partitions = xrange(self.getNumPartitions())
+        for partition in partitions:
+            rows = self.context.runJob(self, lambda x: x, [partition])
+            for row in rows:
+                yield row
+
+
+def _prepare_for_python_RDD(sc, command, obj=None):
+    # the serialized command will be compressed by broadcast
+    ser = CloudPickleSerializer()
+    pickled_command = ser.dumps(command)
+    if len(pickled_command) > (1 << 20):  # 1M
+        broadcast = sc.broadcast(pickled_command)
+        pickled_command = ser.dumps(broadcast)
+        # tracking the life cycle by obj
+        if obj is not None:
+            obj._broadcast = broadcast
+    broadcast_vars = ListConverter().convert(
+        [x._jbroadcast for x in sc._pickled_broadcast_vars],
+        sc._gateway._gateway_client)
+    sc._pickled_broadcast_vars.clear()
+    env = MapConverter().convert(sc.environment, sc._gateway._gateway_client)
+    includes = ListConverter().convert(sc._python_includes, sc._gateway._gateway_client)
+    return pickled_command, broadcast_vars, env, includes
+
 
 class PipelinedRDD(RDD):
 
@@ -2090,34 +2239,25 @@ def _jrdd(self):
             return self._jrdd_val
         if self._bypass_serializer:
             self._jrdd_deserializer = NoOpSerializer()
-        enable_profile = self.ctx._conf.get("spark.python.profile", "false") == "true"
-        profileStats = self.ctx.accumulator(None, PStatsParam) if enable_profile else None
-        command = (self.func, profileStats, self._prev_jrdd_deserializer,
+
+        if self.ctx.profiler_collector:
+            profiler = self.ctx.profiler_collector.new_profiler(self.ctx)
+        else:
+            profiler = None
+
+        command = (self.func, profiler, self._prev_jrdd_deserializer,
                    self._jrdd_deserializer)
-        # the serialized command will be compressed by broadcast
-        ser = CloudPickleSerializer()
-        pickled_command = ser.dumps(command)
-        if len(pickled_command) > (1 << 20):  # 1M
-            self._broadcast = self.ctx.broadcast(pickled_command)
-            pickled_command = ser.dumps(self._broadcast)
-        broadcast_vars = ListConverter().convert(
-            [x._jbroadcast for x in self.ctx._pickled_broadcast_vars],
-            self.ctx._gateway._gateway_client)
-        self.ctx._pickled_broadcast_vars.clear()
-        env = MapConverter().convert(self.ctx.environment,
-                                     self.ctx._gateway._gateway_client)
-        includes = ListConverter().convert(self.ctx._python_includes,
-                                           self.ctx._gateway._gateway_client)
+        pickled_cmd, bvars, env, includes = _prepare_for_python_RDD(self.ctx, command, self)
         python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(),
-                                             bytearray(pickled_command),
+                                             bytearray(pickled_cmd),
                                              env, includes, self.preservesPartitioning,
                                              self.ctx.pythonExec,
-                                             broadcast_vars, self.ctx._javaAccumulator)
+                                             bvars, self.ctx._javaAccumulator)
         self._jrdd_val = python_rdd.asJavaRDD()
 
-        if enable_profile:
+        if profiler:
             self._id = self._jrdd_val.id()
-            self.ctx._add_profile(self._id, profileStats)
+            self.ctx.profiler_collector.add_profiler(self._id, profiler)
         return self._jrdd_val
 
     def id(self):
@@ -2135,7 +2275,7 @@ def _test():
     globs = globals().copy()
     # The small batch size here ensures that we see multiple batches,
     # even in these small test examples:
-    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    globs['sc'] = SparkContext('local[4]', 'PythonTest')
     (failure_count, test_count) = doctest.testmod(
         globs=globs, optionflags=doctest.ELLIPSIS)
     globs['sc'].stop()
diff --git a/python/pyspark/rddsampler.py b/python/pyspark/rddsampler.py
index 528a181e8905a..459e1427803cb 100644
--- a/python/pyspark/rddsampler.py
+++ b/python/pyspark/rddsampler.py
@@ -17,82 +17,48 @@
 
 import sys
 import random
+import math
 
 
 class RDDSamplerBase(object):
 
     def __init__(self, withReplacement, seed=None):
-        try:
-            import numpy
-            self._use_numpy = True
-        except ImportError:
-            print >> sys.stderr, (
-                "NumPy does not appear to be installed. "
-                "Falling back to default random generator for sampling.")
-            self._use_numpy = False
-
-        self._seed = seed if seed is not None else random.randint(0, 2 ** 32 - 1)
+        self._seed = seed if seed is not None else random.randint(0, sys.maxint)
         self._withReplacement = withReplacement
         self._random = None
-        self._split = None
-        self._rand_initialized = False
 
     def initRandomGenerator(self, split):
-        if self._use_numpy:
-            import numpy
-            self._random = numpy.random.RandomState(self._seed)
+        self._random = random.Random(self._seed ^ split)
+
+        # mixing because the initial seeds are close to each other
+        for _ in xrange(10):
+            self._random.randint(0, 1)
+
+    def getUniformSample(self):
+        return self._random.random()
+
+    def getPoissonSample(self, mean):
+        # Using Knuth's algorithm described in
+        # http://en.wikipedia.org/wiki/Poisson_distribution
+        if mean < 20.0:
+            # one exp and k+1 random calls
+            l = math.exp(-mean)
+            p = self._random.random()
+            k = 0
+            while p > l:
+                k += 1
+                p *= self._random.random()
         else:
-            self._random = random.Random(self._seed)
+            # switch to the log domain, k+1 expovariate (random + log) calls
+            p = self._random.expovariate(mean)
+            k = 0
+            while p < 1.0:
+                k += 1
+                p += self._random.expovariate(mean)
+        return k
 
-        for _ in range(0, split):
-            # discard the next few values in the sequence to have a
-            # different seed for the different splits
-            self._random.randint(0, 2 ** 32 - 1)
-
-        self._split = split
-        self._rand_initialized = True
-
-    def getUniformSample(self, split):
-        if not self._rand_initialized or split != self._split:
-            self.initRandomGenerator(split)
-
-        if self._use_numpy:
-            return self._random.random_sample()
-        else:
-            return self._random.uniform(0.0, 1.0)
-
-    def getPoissonSample(self, split, mean):
-        if not self._rand_initialized or split != self._split:
-            self.initRandomGenerator(split)
-
-        if self._use_numpy:
-            return self._random.poisson(mean)
-        else:
-            # here we simulate drawing numbers n_i ~ Poisson(lambda = 1/mean) by
-            # drawing a sequence of numbers delta_j ~ Exp(mean)
-            num_arrivals = 1
-            cur_time = 0.0
-
-            cur_time += self._random.expovariate(mean)
-
-            if cur_time > 1.0:
-                return 0
-
-            while(cur_time <= 1.0):
-                cur_time += self._random.expovariate(mean)
-                num_arrivals += 1
-
-            return (num_arrivals - 1)
-
-    def shuffle(self, vals):
-        if self._random is None:
-            self.initRandomGenerator(0)  # this should only ever called on the master so
-            # the split does not matter
-
-        if self._use_numpy:
-            self._random.shuffle(vals)
-        else:
-            self._random.shuffle(vals, self._random.random)
+    def func(self, split, iterator):
+        raise NotImplementedError
 
 
 class RDDSampler(RDDSamplerBase):
@@ -102,20 +68,35 @@ def __init__(self, withReplacement, fraction, seed=None):
         self._fraction = fraction
 
     def func(self, split, iterator):
+        self.initRandomGenerator(split)
         if self._withReplacement:
             for obj in iterator:
                 # For large datasets, the expected number of occurrences of each element in
                 # a sample with replacement is Poisson(frac). We use that to get a count for
                 # each element.
-                count = self.getPoissonSample(split, mean=self._fraction)
+                count = self.getPoissonSample(self._fraction)
                 for _ in range(0, count):
                     yield obj
         else:
             for obj in iterator:
-                if self.getUniformSample(split) <= self._fraction:
+                if self.getUniformSample() < self._fraction:
                     yield obj
 
 
+class RDDRangeSampler(RDDSamplerBase):
+
+    def __init__(self, lowerBound, upperBound, seed=None):
+        RDDSamplerBase.__init__(self, False, seed)
+        self._lowerBound = lowerBound
+        self._upperBound = upperBound
+
+    def func(self, split, iterator):
+        self.initRandomGenerator(split)
+        for obj in iterator:
+            if self._lowerBound <= self.getUniformSample() < self._upperBound:
+                yield obj
+
+
 class RDDStratifiedSampler(RDDSamplerBase):
 
     def __init__(self, withReplacement, fractions, seed=None):
@@ -123,15 +104,16 @@ def __init__(self, withReplacement, fractions, seed=None):
         self._fractions = fractions
 
     def func(self, split, iterator):
+        self.initRandomGenerator(split)
         if self._withReplacement:
             for key, val in iterator:
                 # For large datasets, the expected number of occurrences of each element in
                 # a sample with replacement is Poisson(frac). We use that to get a count for
                 # each element.
-                count = self.getPoissonSample(split, mean=self._fractions[key])
+                count = self.getPoissonSample(self._fractions[key])
                 for _ in range(0, count):
                     yield key, val
         else:
             for key, val in iterator:
-                if self.getUniformSample(split) <= self._fractions[key]:
+                if self.getUniformSample() < self._fractions[key]:
                     yield key, val
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 08a0f0d8ffb3e..0ffb41d02f6f6 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -33,9 +33,8 @@
 [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
 >>> sc.stop()
 
-By default, PySpark serialize objects in batches; the batch size can be
-controlled through SparkContext's C{batchSize} parameter
-(the default size is 1024 objects):
+PySpark serialize objects in batches; By default, the batch size is chosen based
+on the size of objects, also configurable by SparkContext's C{batchSize} parameter:
 
 >>> sc = SparkContext('local', 'test', batchSize=2)
 >>> rdd = sc.parallelize(range(16), 4).map(lambda x: x)
@@ -48,16 +47,6 @@
 >>> rdd._jrdd.count()
 8L
 >>> sc.stop()
-
-A batch size of -1 uses an unlimited batch size, and a size of 1 disables
-batching:
-
->>> sc = SparkContext('local', 'test', batchSize=1)
->>> rdd = sc.parallelize(range(16), 4).map(lambda x: x)
->>> rdd.glom().collect()
-[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]
->>> rdd._jrdd.count()
-16L
 """
 
 import cPickle
@@ -73,13 +62,15 @@
 from pyspark import cloudpickle
 
 
-__all__ = ["PickleSerializer", "MarshalSerializer"]
+__all__ = ["PickleSerializer", "MarshalSerializer", "UTF8Deserializer"]
 
 
 class SpecialLengths(object):
     END_OF_DATA_SECTION = -1
     PYTHON_EXCEPTION_THROWN = -2
     TIMING_DATA = -3
+    END_OF_STREAM = -4
+    NULL = -5
 
 
 class Serializer(object):
@@ -112,7 +103,7 @@ def __ne__(self, other):
         return not self.__eq__(other)
 
     def __repr__(self):
-        return "<%s object>" % self.__class__.__name__
+        return "%s()" % self.__class__.__name__
 
     def __hash__(self):
         return hash(str(self))
@@ -143,6 +134,10 @@ def load_stream(self, stream):
 
     def _write_with_length(self, obj, stream):
         serialized = self.dumps(obj)
+        if serialized is None:
+            raise ValueError("serialized value should not be None")
+        if len(serialized) > (1 << 31):
+            raise ValueError("can not serialize object larger than 2G")
         write_int(len(serialized), stream)
         if self._only_write_strings:
             stream.write(str(serialized))
@@ -153,8 +148,10 @@ def _read_with_length(self, stream):
         length = read_int(stream)
         if length == SpecialLengths.END_OF_DATA_SECTION:
             raise EOFError
+        elif length == SpecialLengths.NULL:
+            return None
         obj = stream.read(length)
-        if obj == "":
+        if len(obj) < length:
             raise EOFError
         return self.loads(obj)
 
@@ -180,6 +177,7 @@ class BatchedSerializer(Serializer):
     """
 
     UNLIMITED_BATCH_SIZE = -1
+    UNKNOWN_BATCH_SIZE = 0
 
     def __init__(self, serializer, batchSize=UNLIMITED_BATCH_SIZE):
         self.serializer = serializer
@@ -188,6 +186,10 @@ def __init__(self, serializer, batchSize=UNLIMITED_BATCH_SIZE):
     def _batched(self, iterator):
         if self.batchSize == self.UNLIMITED_BATCH_SIZE:
             yield list(iterator)
+        elif hasattr(iterator, "__len__") and hasattr(iterator, "__getslice__"):
+            n = len(iterator)
+            for i in xrange(0, n, self.batchSize):
+                yield iterator[i: i + self.batchSize]
         else:
             items = []
             count = 0
@@ -212,10 +214,10 @@ def _load_stream_without_unbatching(self, stream):
 
     def __eq__(self, other):
         return (isinstance(other, BatchedSerializer) and
-                other.serializer == self.serializer)
+                other.serializer == self.serializer and other.batchSize == self.batchSize)
 
     def __repr__(self):
-        return "BatchedSerializer<%s>" % str(self.serializer)
+        return "BatchedSerializer(%s, %d)" % (str(self.serializer), self.batchSize)
 
 
 class AutoBatchedSerializer(BatchedSerializer):
@@ -224,7 +226,7 @@ class AutoBatchedSerializer(BatchedSerializer):
     """
 
     def __init__(self, serializer, bestSize=1 << 16):
-        BatchedSerializer.__init__(self, serializer, -1)
+        BatchedSerializer.__init__(self, serializer, self.UNKNOWN_BATCH_SIZE)
         self.bestSize = bestSize
 
     def dump_stream(self, iterator, stream):
@@ -247,10 +249,10 @@ def dump_stream(self, iterator, stream):
 
     def __eq__(self, other):
         return (isinstance(other, AutoBatchedSerializer) and
-                other.serializer == self.serializer)
+                other.serializer == self.serializer and other.bestSize == self.bestSize)
 
     def __str__(self):
-        return "AutoBatchedSerializer<%s>" % str(self.serializer)
+        return "AutoBatchedSerializer(%s)" % str(self.serializer)
 
 
 class CartesianDeserializer(FramedSerializer):
@@ -283,7 +285,7 @@ def __eq__(self, other):
                 self.key_ser == other.key_ser and self.val_ser == other.val_ser)
 
     def __repr__(self):
-        return "CartesianDeserializer<%s, %s>" % \
+        return "CartesianDeserializer(%s, %s)" % \
                (str(self.key_ser), str(self.val_ser))
 
 
@@ -310,7 +312,7 @@ def __eq__(self, other):
                 self.key_ser == other.key_ser and self.val_ser == other.val_ser)
 
     def __repr__(self):
-        return "PairDeserializer<%s, %s>" % (str(self.key_ser), str(self.val_ser))
+        return "PairDeserializer(%s, %s)" % (str(self.key_ser), str(self.val_ser))
 
 
 class NoOpSerializer(FramedSerializer):
@@ -429,7 +431,7 @@ def loads(self, obj):
 class AutoSerializer(FramedSerializer):
 
     """
-    Choose marshal or cPickle as serialization protocol autumatically
+    Choose marshal or cPickle as serialization protocol automatically
     """
 
     def __init__(self):
@@ -459,9 +461,9 @@ class CompressedSerializer(FramedSerializer):
     """
     Compress the serialized data
     """
-
     def __init__(self, serializer):
         FramedSerializer.__init__(self)
+        assert isinstance(serializer, FramedSerializer), "serializer must be a FramedSerializer"
         self.serializer = serializer
 
     def dumps(self, obj):
@@ -470,6 +472,9 @@ def dumps(self, obj):
     def loads(self, obj):
         return self.serializer.loads(zlib.decompress(obj))
 
+    def __eq__(self, other):
+        return isinstance(other, CompressedSerializer) and self.serializer == other.serializer
+
 
 class UTF8Deserializer(Serializer):
 
@@ -484,6 +489,8 @@ def loads(self, stream):
         length = read_int(stream)
         if length == SpecialLengths.END_OF_DATA_SECTION:
             raise EOFError
+        elif length == SpecialLengths.NULL:
+            return None
         s = stream.read(length)
         return s.decode("utf-8") if self.use_unicode else s
 
@@ -496,6 +503,9 @@ def load_stream(self, stream):
         except EOFError:
             return
 
+    def __eq__(self, other):
+        return isinstance(other, UTF8Deserializer) and self.use_unicode == other.use_unicode
+
 
 def read_long(stream):
     length = stream.read(8)
@@ -526,3 +536,8 @@ def write_int(value, stream):
 def write_with_length(obj, stream):
     write_int(len(obj), stream)
     stream.write(obj)
+
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py
index d57a802e4734a..10a7ccd502000 100644
--- a/python/pyspark/shuffle.py
+++ b/python/pyspark/shuffle.py
@@ -25,7 +25,7 @@
 import random
 
 import pyspark.heapq3 as heapq
-from pyspark.serializers import BatchedSerializer, PickleSerializer
+from pyspark.serializers import AutoBatchedSerializer, PickleSerializer
 
 try:
     import psutil
@@ -213,8 +213,7 @@ def __init__(self, aggregator, memory_limit=512, serializer=None,
         Merger.__init__(self, aggregator)
         self.memory_limit = memory_limit
         # default serializer is only used for tests
-        self.serializer = serializer or \
-            BatchedSerializer(PickleSerializer(), 1024)
+        self.serializer = serializer or AutoBatchedSerializer(PickleSerializer())
         self.localdirs = localdirs or _get_local_dirs(str(id(self)))
         # number of partitions when spill data into disks
         self.partitions = partitions
@@ -470,7 +469,7 @@ class ExternalSorter(object):
     def __init__(self, memory_limit, serializer=None):
         self.memory_limit = memory_limit
         self.local_dirs = _get_local_dirs("sort")
-        self.serializer = serializer or BatchedSerializer(PickleSerializer(), 1024)
+        self.serializer = serializer or AutoBatchedSerializer(PickleSerializer())
 
     def _get_path(self, n):
         """ Choose one directory for spill by number n """
@@ -479,13 +478,21 @@ def _get_path(self, n):
             os.makedirs(d)
         return os.path.join(d, str(n))
 
+    def _next_limit(self):
+        """
+        Return the next memory limit. If the memory is not released
+        after spilling, it will dump the data only when the used memory
+        starts to increase.
+        """
+        return max(self.memory_limit, get_used_memory() * 1.05)
+
     def sorted(self, iterator, key=None, reverse=False):
         """
         Sort the elements in iterator, do external sort when the memory
         goes above the limit.
         """
         global MemoryBytesSpilled, DiskBytesSpilled
-        batch = 100
+        batch, limit = 100, self._next_limit()
         chunks, current_chunk = [], []
         iterator = iter(iterator)
         while True:
@@ -505,6 +512,7 @@ def sorted(self, iterator, key=None, reverse=False):
                 chunks.append(self.serializer.load_stream(open(path)))
                 current_chunk = []
                 gc.collect()
+                limit = self._next_limit()
                 MemoryBytesSpilled += (used_memory - get_used_memory()) << 20
                 DiskBytesSpilled += os.path.getsize(path)
 
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index b31a82f9b19ac..3ac8ea597e142 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -20,21 +20,29 @@
 
     - L{SQLContext}
       Main entry point for SQL functionality.
-    - L{SchemaRDD}
+    - L{DataFrame}
       A Resilient Distributed Dataset (RDD) with Schema information for the data contained. In
-      addition to normal RDD operations, SchemaRDDs also support SQL.
+      addition to normal RDD operations, DataFrames also support SQL.
+    - L{GroupedData}
+    - L{Column}
+      Column is a DataFrame with a single column.
     - L{Row}
       A Row of data returned by a Spark SQL query.
     - L{HiveContext}
       Main entry point for accessing data stored in Apache Hive..
 """
 
+import sys
 import itertools
 import decimal
 import datetime
 import keyword
 import warnings
 import json
+import re
+import random
+import os
+from tempfile import NamedTemporaryFile
 from array import array
 from operator import itemgetter
 from itertools import imap
@@ -42,17 +50,20 @@
 from py4j.protocol import Py4JError
 from py4j.java_collections import ListConverter, MapConverter
 
-from pyspark.rdd import RDD
-from pyspark.serializers import BatchedSerializer, PickleSerializer, CloudPickleSerializer
+from pyspark.context import SparkContext
+from pyspark.rdd import RDD, _prepare_for_python_RDD
+from pyspark.serializers import BatchedSerializer, AutoBatchedSerializer, PickleSerializer, \
+    CloudPickleSerializer, UTF8Deserializer
 from pyspark.storagelevel import StorageLevel
 from pyspark.traceback_utils import SCCallSiteSync
 
 
 __all__ = [
-    "StringType", "BinaryType", "BooleanType", "TimestampType", "DecimalType",
+    "StringType", "BinaryType", "BooleanType", "DateType", "TimestampType", "DecimalType",
     "DoubleType", "FloatType", "ByteType", "IntegerType", "LongType",
     "ShortType", "ArrayType", "MapType", "StructField", "StructType",
-    "SQLContext", "HiveContext", "SchemaRDD", "Row"]
+    "SQLContext", "HiveContext", "DataFrame", "GroupedData", "Column", "Row", "Dsl",
+    "SchemaRDD"]
 
 
 class DataType(object):
@@ -108,6 +119,15 @@ def __eq__(self, other):
         return self is other
 
 
+class NullType(PrimitiveType):
+
+    """Spark SQL NullType
+
+    The data type representing None, used for the types which has not
+    been inferred.
+    """
+
+
 class StringType(PrimitiveType):
 
     """Spark SQL StringType
@@ -132,6 +152,14 @@ class BooleanType(PrimitiveType):
     """
 
 
+class DateType(PrimitiveType):
+
+    """Spark SQL DateType
+
+    The data type representing datetime.date values.
+    """
+
+
 class TimestampType(PrimitiveType):
 
     """Spark SQL TimestampType
@@ -140,13 +168,30 @@ class TimestampType(PrimitiveType):
     """
 
 
-class DecimalType(PrimitiveType):
+class DecimalType(DataType):
 
     """Spark SQL DecimalType
 
     The data type representing decimal.Decimal values.
     """
 
+    def __init__(self, precision=None, scale=None):
+        self.precision = precision
+        self.scale = scale
+        self.hasPrecisionInfo = precision is not None
+
+    def jsonValue(self):
+        if self.hasPrecisionInfo:
+            return "decimal(%d,%d)" % (self.precision, self.scale)
+        else:
+            return "decimal"
+
+    def __repr__(self):
+        if self.hasPrecisionInfo:
+            return "DecimalType(%d,%d)" % (self.precision, self.scale)
+        else:
+            return "DecimalType()"
+
 
 class DoubleType(PrimitiveType):
 
@@ -305,12 +350,15 @@ class StructField(DataType):
 
     """
 
-    def __init__(self, name, dataType, nullable):
+    def __init__(self, name, dataType, nullable=True, metadata=None):
         """Creates a StructField
         :param name: the name of this field.
         :param dataType: the data type of this field.
         :param nullable: indicates whether values of this field
                          can be null.
+        :param metadata: metadata of this field, which is a map from string
+                         to simple type that can be serialized to JSON
+                         automatically
 
         >>> (StructField("f1", StringType, True)
         ...      == StructField("f1", StringType, True))
@@ -322,6 +370,7 @@ def __init__(self, name, dataType, nullable):
         self.name = name
         self.dataType = dataType
         self.nullable = nullable
+        self.metadata = metadata or {}
 
     def __repr__(self):
         return "StructField(%s,%s,%s)" % (self.name, self.dataType,
@@ -330,13 +379,15 @@ def __repr__(self):
     def jsonValue(self):
         return {"name": self.name,
                 "type": self.dataType.jsonValue(),
-                "nullable": self.nullable}
+                "nullable": self.nullable,
+                "metadata": self.metadata}
 
     @classmethod
     def fromJson(cls, json):
         return StructField(json["name"],
                            _parse_datatype_json_value(json["type"]),
-                           json["nullable"])
+                           json["nullable"],
+                           json["metadata"])
 
 
 class StructType(DataType):
@@ -376,6 +427,75 @@ def fromJson(cls, json):
         return StructType([StructField.fromJson(f) for f in json["fields"]])
 
 
+class UserDefinedType(DataType):
+    """
+    .. note:: WARN: Spark Internal Use Only
+    SQL User-Defined Type (UDT).
+    """
+
+    @classmethod
+    def typeName(cls):
+        return cls.__name__.lower()
+
+    @classmethod
+    def sqlType(cls):
+        """
+        Underlying SQL storage type for this UDT.
+        """
+        raise NotImplementedError("UDT must implement sqlType().")
+
+    @classmethod
+    def module(cls):
+        """
+        The Python module of the UDT.
+        """
+        raise NotImplementedError("UDT must implement module().")
+
+    @classmethod
+    def scalaUDT(cls):
+        """
+        The class name of the paired Scala UDT.
+        """
+        raise NotImplementedError("UDT must have a paired Scala UDT.")
+
+    def serialize(self, obj):
+        """
+        Converts the a user-type object into a SQL datum.
+        """
+        raise NotImplementedError("UDT must implement serialize().")
+
+    def deserialize(self, datum):
+        """
+        Converts a SQL datum into a user-type object.
+        """
+        raise NotImplementedError("UDT must implement deserialize().")
+
+    def json(self):
+        return json.dumps(self.jsonValue(), separators=(',', ':'), sort_keys=True)
+
+    def jsonValue(self):
+        schema = {
+            "type": "udt",
+            "class": self.scalaUDT(),
+            "pyClass": "%s.%s" % (self.module(), type(self).__name__),
+            "sqlType": self.sqlType().jsonValue()
+        }
+        return schema
+
+    @classmethod
+    def fromJson(cls, json):
+        pyUDT = json["pyClass"]
+        split = pyUDT.rfind(".")
+        pyModule = pyUDT[:split]
+        pyClass = pyUDT[split+1:]
+        m = __import__(pyModule, globals(), locals(), [pyClass], -1)
+        UDT = getattr(m, pyClass)
+        return UDT()
+
+    def __eq__(self, other):
+        return type(self) == type(other)
+
+
 _all_primitive_types = dict((v.typeName(), v)
                             for v in globals().itervalues()
                             if type(v) is PrimitiveTypeSingleton and
@@ -415,7 +535,8 @@ def _parse_datatype_json_string(json_string):
     ...     StructField("simpleArray", simple_arraytype, True),
     ...     StructField("simpleMap", simple_maptype, True),
     ...     StructField("simpleStruct", simple_structtype, True),
-    ...     StructField("boolean", BooleanType(), False)])
+    ...     StructField("boolean", BooleanType(), False),
+    ...     StructField("withMeta", DoubleType(), False, {"name": "age"})])
     >>> check_datatype(complex_structtype)
     True
     >>> # Complex ArrayType.
@@ -427,19 +548,43 @@ def _parse_datatype_json_string(json_string):
     ...                           complex_arraytype, False)
     >>> check_datatype(complex_maptype)
     True
+    >>> check_datatype(ExamplePointUDT())
+    True
+    >>> structtype_with_udt = StructType([StructField("label", DoubleType(), False),
+    ...                                   StructField("point", ExamplePointUDT(), False)])
+    >>> check_datatype(structtype_with_udt)
+    True
     """
     return _parse_datatype_json_value(json.loads(json_string))
 
 
+_FIXED_DECIMAL = re.compile("decimal\\((\\d+),(\\d+)\\)")
+
+
 def _parse_datatype_json_value(json_value):
-    if type(json_value) is unicode and json_value in _all_primitive_types.keys():
-        return _all_primitive_types[json_value]()
+    if type(json_value) is unicode:
+        if json_value in _all_primitive_types.keys():
+            return _all_primitive_types[json_value]()
+        elif json_value == u'decimal':
+            return DecimalType()
+        elif _FIXED_DECIMAL.match(json_value):
+            m = _FIXED_DECIMAL.match(json_value)
+            return DecimalType(int(m.group(1)), int(m.group(2)))
+        else:
+            raise ValueError("Could not parse datatype: %s" % json_value)
     else:
-        return _all_complex_types[json_value["type"]].fromJson(json_value)
+        tpe = json_value["type"]
+        if tpe in _all_complex_types:
+            return _all_complex_types[tpe].fromJson(json_value)
+        elif tpe == 'udt':
+            return UserDefinedType.fromJson(json_value)
+        else:
+            raise ValueError("not supported type: %s" % tpe)
 
 
-# Mapping Python types to Spark SQL DateType
+# Mapping Python types to Spark SQL DataType
 _type_mappings = {
+    type(None): NullType,
     bool: BooleanType,
     int: IntegerType,
     long: LongType,
@@ -448,30 +593,41 @@ def _parse_datatype_json_value(json_value):
     unicode: StringType,
     bytearray: BinaryType,
     decimal.Decimal: DecimalType,
+    datetime.date: DateType,
     datetime.datetime: TimestampType,
-    datetime.date: TimestampType,
     datetime.time: TimestampType,
 }
 
 
 def _infer_type(obj):
-    """Infer the DataType from obj"""
+    """Infer the DataType from obj
+
+    >>> p = ExamplePoint(1.0, 2.0)
+    >>> _infer_type(p)
+    ExamplePointUDT
+    """
     if obj is None:
         raise ValueError("Can not infer type for None")
 
+    if hasattr(obj, '__UDT__'):
+        return obj.__UDT__
+
     dataType = _type_mappings.get(type(obj))
     if dataType is not None:
         return dataType()
 
     if isinstance(obj, dict):
-        if not obj:
-            raise ValueError("Can not infer type for empty dict")
-        key, value = obj.iteritems().next()
-        return MapType(_infer_type(key), _infer_type(value), True)
+        for key, value in obj.iteritems():
+            if key is not None and value is not None:
+                return MapType(_infer_type(key), _infer_type(value), True)
+        else:
+            return MapType(NullType(), NullType(), True)
     elif isinstance(obj, (list, array)):
-        if not obj:
-            raise ValueError("Can not infer type for empty list/array")
-        return ArrayType(_infer_type(obj[0]), True)
+        for v in obj:
+            if v is not None:
+                return ArrayType(_infer_type(obj[0]), True)
+        else:
+            return ArrayType(NullType(), True)
     else:
         try:
             return _infer_schema(obj)
@@ -504,60 +660,181 @@ def _infer_schema(row):
     return StructType(fields)
 
 
-def _create_converter(obj, dataType):
+def _need_python_to_sql_conversion(dataType):
+    """
+    Checks whether we need python to sql conversion for the given type.
+    For now, only UDTs need this conversion.
+
+    >>> _need_python_to_sql_conversion(DoubleType())
+    False
+    >>> schema0 = StructType([StructField("indices", ArrayType(IntegerType(), False), False),
+    ...                       StructField("values", ArrayType(DoubleType(), False), False)])
+    >>> _need_python_to_sql_conversion(schema0)
+    False
+    >>> _need_python_to_sql_conversion(ExamplePointUDT())
+    True
+    >>> schema1 = ArrayType(ExamplePointUDT(), False)
+    >>> _need_python_to_sql_conversion(schema1)
+    True
+    >>> schema2 = StructType([StructField("label", DoubleType(), False),
+    ...                       StructField("point", ExamplePointUDT(), False)])
+    >>> _need_python_to_sql_conversion(schema2)
+    True
+    """
+    if isinstance(dataType, StructType):
+        return any([_need_python_to_sql_conversion(f.dataType) for f in dataType.fields])
+    elif isinstance(dataType, ArrayType):
+        return _need_python_to_sql_conversion(dataType.elementType)
+    elif isinstance(dataType, MapType):
+        return _need_python_to_sql_conversion(dataType.keyType) or \
+            _need_python_to_sql_conversion(dataType.valueType)
+    elif isinstance(dataType, UserDefinedType):
+        return True
+    else:
+        return False
+
+
+def _python_to_sql_converter(dataType):
+    """
+    Returns a converter that converts a Python object into a SQL datum for the given type.
+
+    >>> conv = _python_to_sql_converter(DoubleType())
+    >>> conv(1.0)
+    1.0
+    >>> conv = _python_to_sql_converter(ArrayType(DoubleType(), False))
+    >>> conv([1.0, 2.0])
+    [1.0, 2.0]
+    >>> conv = _python_to_sql_converter(ExamplePointUDT())
+    >>> conv(ExamplePoint(1.0, 2.0))
+    [1.0, 2.0]
+    >>> schema = StructType([StructField("label", DoubleType(), False),
+    ...                      StructField("point", ExamplePointUDT(), False)])
+    >>> conv = _python_to_sql_converter(schema)
+    >>> conv((1.0, ExamplePoint(1.0, 2.0)))
+    (1.0, [1.0, 2.0])
+    """
+    if not _need_python_to_sql_conversion(dataType):
+        return lambda x: x
+
+    if isinstance(dataType, StructType):
+        names, types = zip(*[(f.name, f.dataType) for f in dataType.fields])
+        converters = map(_python_to_sql_converter, types)
+
+        def converter(obj):
+            if isinstance(obj, dict):
+                return tuple(c(obj.get(n)) for n, c in zip(names, converters))
+            elif isinstance(obj, tuple):
+                if hasattr(obj, "_fields") or hasattr(obj, "__FIELDS__"):
+                    return tuple(c(v) for c, v in zip(converters, obj))
+                elif all(isinstance(x, tuple) and len(x) == 2 for x in obj):  # k-v pairs
+                    d = dict(obj)
+                    return tuple(c(d.get(n)) for n, c in zip(names, converters))
+                else:
+                    return tuple(c(v) for c, v in zip(converters, obj))
+            else:
+                raise ValueError("Unexpected tuple %r with type %r" % (obj, dataType))
+        return converter
+    elif isinstance(dataType, ArrayType):
+        element_converter = _python_to_sql_converter(dataType.elementType)
+        return lambda a: [element_converter(v) for v in a]
+    elif isinstance(dataType, MapType):
+        key_converter = _python_to_sql_converter(dataType.keyType)
+        value_converter = _python_to_sql_converter(dataType.valueType)
+        return lambda m: dict([(key_converter(k), value_converter(v)) for k, v in m.items()])
+    elif isinstance(dataType, UserDefinedType):
+        return lambda obj: dataType.serialize(obj)
+    else:
+        raise ValueError("Unexpected type %r" % dataType)
+
+
+def _has_nulltype(dt):
+    """ Return whether there is NullType in `dt` or not """
+    if isinstance(dt, StructType):
+        return any(_has_nulltype(f.dataType) for f in dt.fields)
+    elif isinstance(dt, ArrayType):
+        return _has_nulltype((dt.elementType))
+    elif isinstance(dt, MapType):
+        return _has_nulltype(dt.keyType) or _has_nulltype(dt.valueType)
+    else:
+        return isinstance(dt, NullType)
+
+
+def _merge_type(a, b):
+    if isinstance(a, NullType):
+        return b
+    elif isinstance(b, NullType):
+        return a
+    elif type(a) is not type(b):
+        # TODO: type cast (such as int -> long)
+        raise TypeError("Can not merge type %s and %s" % (a, b))
+
+    # same type
+    if isinstance(a, StructType):
+        nfs = dict((f.name, f.dataType) for f in b.fields)
+        fields = [StructField(f.name, _merge_type(f.dataType, nfs.get(f.name, NullType())))
+                  for f in a.fields]
+        names = set([f.name for f in fields])
+        for n in nfs:
+            if n not in names:
+                fields.append(StructField(n, nfs[n]))
+        return StructType(fields)
+
+    elif isinstance(a, ArrayType):
+        return ArrayType(_merge_type(a.elementType, b.elementType), True)
+
+    elif isinstance(a, MapType):
+        return MapType(_merge_type(a.keyType, b.keyType),
+                       _merge_type(a.valueType, b.valueType),
+                       True)
+    else:
+        return a
+
+
+def _create_converter(dataType):
     """Create an converter to drop the names of fields in obj """
     if isinstance(dataType, ArrayType):
-        conv = _create_converter(obj[0], dataType.elementType)
+        conv = _create_converter(dataType.elementType)
         return lambda row: map(conv, row)
 
     elif isinstance(dataType, MapType):
-        value = obj.values()[0]
-        conv = _create_converter(value, dataType.valueType)
-        return lambda row: dict((k, conv(v)) for k, v in row.iteritems())
+        kconv = _create_converter(dataType.keyType)
+        vconv = _create_converter(dataType.valueType)
+        return lambda row: dict((kconv(k), vconv(v)) for k, v in row.iteritems())
+
+    elif isinstance(dataType, NullType):
+        return lambda x: None
 
     elif not isinstance(dataType, StructType):
         return lambda x: x
 
     # dataType must be StructType
     names = [f.name for f in dataType.fields]
+    converters = [_create_converter(f.dataType) for f in dataType.fields]
+
+    def convert_struct(obj):
+        if obj is None:
+            return
+
+        if isinstance(obj, tuple):
+            if hasattr(obj, "_fields"):
+                d = dict(zip(obj._fields, obj))
+            elif hasattr(obj, "__FIELDS__"):
+                d = dict(zip(obj.__FIELDS__, obj))
+            elif all(isinstance(x, tuple) and len(x) == 2 for x in obj):
+                d = dict(obj)
+            else:
+                raise ValueError("unexpected tuple: %s" % str(obj))
 
-    if isinstance(obj, dict):
-        conv = lambda o: tuple(o.get(n) for n in names)
-
-    elif isinstance(obj, tuple):
-        if hasattr(obj, "_fields"):  # namedtuple
-            conv = tuple
-        elif hasattr(obj, "__FIELDS__"):
-            conv = tuple
-        elif all(isinstance(x, tuple) and len(x) == 2 for x in obj):
-            conv = lambda o: tuple(v for k, v in o)
+        elif isinstance(obj, dict):
+            d = obj
+        elif hasattr(obj, "__dict__"):  # object
+            d = obj.__dict__
         else:
-            raise ValueError("unexpected tuple")
-
-    elif hasattr(obj, "__dict__"):  # object
-        conv = lambda o: [o.__dict__.get(n, None) for n in names]
-
-    if all(isinstance(f.dataType, PrimitiveType) for f in dataType.fields):
-        return conv
-
-    row = conv(obj)
-    convs = [_create_converter(v, f.dataType)
-             for v, f in zip(row, dataType.fields)]
-
-    def nested_conv(row):
-        return tuple(f(v) for f, v in zip(convs, conv(row)))
+            raise ValueError("Unexpected obj: %s" % obj)
 
-    return nested_conv
+        return tuple([conv(d.get(name)) for name, conv in zip(names, converters)])
 
-
-def _drop_schema(rows, schema):
-    """ all the names of fields, becoming tuples"""
-    iterator = iter(rows)
-    row = iterator.next()
-    converter = _create_converter(row, schema)
-    yield converter(row)
-    for i in iterator:
-        yield converter(i)
+    return convert_struct
 
 
 _BRACKETS = {'(': ')', '[': ']', '{': '}'}
@@ -654,12 +931,12 @@ def _parse_schema_abstract(s):
 
 def _infer_schema_type(obj, dataType):
     """
-    Fill the dataType with types infered from obj
+    Fill the dataType with types inferred from obj
 
-    >>> schema = _parse_schema_abstract("a b c")
-    >>> row = (1, 1.0, "str")
+    >>> schema = _parse_schema_abstract("a b c d")
+    >>> row = (1, 1.0, "str", datetime.date(2014, 10, 10))
     >>> _infer_schema_type(row, schema)
-    StructType...IntegerType...DoubleType...StringType...
+    StructType...IntegerType...DoubleType...StringType...DateType...
     >>> row = [[1], {"key": (1, 2.0)}]
     >>> schema = _parse_schema_abstract("a[] b{c d}")
     >>> _infer_schema_type(row, schema)
@@ -669,7 +946,7 @@ def _infer_schema_type(obj, dataType):
         return _infer_type(obj)
 
     if not obj:
-        raise ValueError("Can not infer type from empty value")
+        return NullType()
 
     if isinstance(dataType, ArrayType):
         eType = _infer_schema_type(obj[0], dataType.elementType)
@@ -677,7 +954,7 @@ def _infer_schema_type(obj, dataType):
 
     elif isinstance(dataType, MapType):
         k, v = obj.iteritems().next()
-        return MapType(_infer_type(k),
+        return MapType(_infer_schema_type(k, dataType.keyType),
                        _infer_schema_type(v, dataType.valueType))
 
     elif isinstance(dataType, StructType):
@@ -703,6 +980,7 @@ def _infer_schema_type(obj, dataType):
     DecimalType: (decimal.Decimal,),
     StringType: (str, unicode),
     BinaryType: (bytearray,),
+    DateType: (datetime.date,),
     TimestampType: (datetime.datetime,),
     ArrayType: (list, tuple, array),
     MapType: (dict,),
@@ -730,17 +1008,28 @@ def _verify_type(obj, dataType):
     Traceback (most recent call last):
         ...
     ValueError:...
+    >>> _verify_type(ExamplePoint(1.0, 2.0), ExamplePointUDT())
+    >>> _verify_type([1.0, 2.0], ExamplePointUDT()) # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    ValueError:...
     """
     # all objects are nullable
     if obj is None:
         return
 
+    if isinstance(dataType, UserDefinedType):
+        if not (hasattr(obj, '__UDT__') and obj.__UDT__ == dataType):
+            raise ValueError("%r is not an instance of type %r" % (obj, dataType))
+        _verify_type(dataType.serialize(obj), dataType.sqlType())
+        return
+
     _type = type(dataType)
     assert _type in _acceptable_types, "unkown datatype: %s" % dataType
 
     # subclass of them can not be deserialized in JVM
     if type(obj) not in _acceptable_types[_type]:
-        raise TypeError("%s can not accept abject in type %s"
+        raise TypeError("%s can not accept object in type %s"
                         % (dataType, type(obj)))
 
     if isinstance(dataType, ArrayType):
@@ -767,7 +1056,7 @@ def _restore_object(dataType, obj):
     """ Restore object during unpickling. """
     # use id(dataType) as key to speed up lookup in dict
     # Because of batched pickling, dataType will be the
-    # same object in mose cases.
+    # same object in most cases.
     k = id(dataType)
     cls = _cached_cls.get(k)
     if cls is None:
@@ -782,6 +1071,10 @@ def _restore_object(dataType, obj):
 
 def _create_object(cls, v):
     """ Create an customized object with class `cls`. """
+    # datetime.date would be deserialized as datetime.datetime
+    # from java type, so we need to set it back.
+    if cls is datetime.date and isinstance(v, datetime.datetime):
+        return v.date()
     return cls(v) if v is not None else v
 
 
@@ -795,14 +1088,18 @@ def getter(self):
     return getter
 
 
-def _has_struct(dt):
-    """Return whether `dt` is or has StructType in it"""
+def _has_struct_or_date(dt):
+    """Return whether `dt` is or has StructType/DateType in it"""
     if isinstance(dt, StructType):
         return True
     elif isinstance(dt, ArrayType):
-        return _has_struct(dt.elementType)
+        return _has_struct_or_date(dt.elementType)
     elif isinstance(dt, MapType):
-        return _has_struct(dt.valueType)
+        return _has_struct_or_date(dt.keyType) or _has_struct_or_date(dt.valueType)
+    elif isinstance(dt, DateType):
+        return True
+    elif isinstance(dt, UserDefinedType):
+        return True
     return False
 
 
@@ -815,7 +1112,7 @@ def _create_properties(fields):
                 or keyword.iskeyword(name)):
             warnings.warn("field name %s can not be accessed in Python,"
                           "use position to access it instead" % name)
-        if _has_struct(f.dataType):
+        if _has_struct_or_date(f.dataType):
             # delay creating object until accessing it
             getter = _create_getter(f.dataType, i)
         else:
@@ -861,21 +1158,29 @@ def List(l):
         return List
 
     elif isinstance(dataType, MapType):
-        cls = _create_cls(dataType.valueType)
+        kcls = _create_cls(dataType.keyType)
+        vcls = _create_cls(dataType.valueType)
 
         def Dict(d):
             if d is None:
                 return
-            return dict((k, _create_object(cls, v)) for k, v in d.items())
+            return dict((_create_object(kcls, k), _create_object(vcls, v)) for k, v in d.items())
 
         return Dict
 
+    elif isinstance(dataType, DateType):
+        return datetime.date
+
+    elif isinstance(dataType, UserDefinedType):
+        return lambda datum: dataType.deserialize(datum)
+
     elif not isinstance(dataType, StructType):
-        raise Exception("unexpected data type: %s" % dataType)
+        # no wrapper for primitive types
+        return lambda x: x
 
     class Row(tuple):
 
-        """ Row in SchemaRDD """
+        """ Row in DataFrame """
         __DATATYPE__ = dataType
         __FIELDS__ = tuple(f.name for f in dataType.fields)
         __slots__ = ()
@@ -883,6 +1188,10 @@ class Row(tuple):
         # create property for fast access
         locals().update(_create_properties(dataType.fields))
 
+        def asDict(self):
+            """ Return as a dict """
+            return dict((n, getattr(self, n)) for n in self.__FIELDS__)
+
         def __repr__(self):
             # call collect __repr__ for nested objects
             return ("Row(%s)" % ", ".join("%s=%r" % (n, getattr(self, n))
@@ -898,7 +1207,7 @@ class SQLContext(object):
 
     """Main entry point for Spark SQL functionality.
 
-    A SQLContext can be used create L{SchemaRDD}, register L{SchemaRDD} as
+    A SQLContext can be used create L{DataFrame}, register L{DataFrame} as
     tables, execute SQL over tables, cache tables, and read parquet files.
     """
 
@@ -909,8 +1218,8 @@ def __init__(self, sparkContext, sqlContext=None):
         :param sqlContext: An optional JVM Scala SQLContext. If set, we do not instatiate a new
         SQLContext in the JVM, instead we make all calls to this object.
 
-        >>> srdd = sqlCtx.inferSchema(rdd)
-        >>> sqlCtx.inferSchema(srdd) # doctest: +IGNORE_EXCEPTION_DETAIL
+        >>> df = sqlCtx.inferSchema(rdd)
+        >>> sqlCtx.inferSchema(df) # doctest: +IGNORE_EXCEPTION_DETAIL
         Traceback (most recent call last):
             ...
         TypeError:...
@@ -925,19 +1234,18 @@ def __init__(self, sparkContext, sqlContext=None):
         >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1L,
         ...     b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1),
         ...     time=datetime(2014, 8, 1, 14, 1, 5))])
-        >>> srdd = sqlCtx.inferSchema(allTypes)
-        >>> srdd.registerTempTable("allTypes")
+        >>> df = sqlCtx.inferSchema(allTypes)
+        >>> df.registerTempTable("allTypes")
         >>> sqlCtx.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a '
         ...            'from allTypes where b and i > 0').collect()
         [Row(c0=2, c1=2.0, c2=False, c3=2, c4=0...8, 1, 14, 1, 5), a=1)]
-        >>> srdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time,
+        >>> df.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time,
         ...                     x.row.a, x.list)).collect()
         [(1, u'string', 1.0, 1, True, ...(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])]
         """
         self._sc = sparkContext
         self._jsc = self._sc._jsc
         self._jvm = self._sc._jvm
-        self._pythonToJava = self._jvm.PythonRDD.pythonToJavaArray
         self._scala_SQLContext = sqlContext
 
     @property
@@ -966,70 +1274,69 @@ def registerFunction(self, name, f, returnType=StringType()):
         [Row(c0=4)]
         """
         func = lambda _, it: imap(lambda x: f(*x), it)
-        command = (func, None,
-                   BatchedSerializer(PickleSerializer(), 1024),
-                   BatchedSerializer(PickleSerializer(), 1024))
-        ser = CloudPickleSerializer()
-        pickled_command = ser.dumps(command)
-        if len(pickled_command) > (1 << 20):  # 1M
-            broadcast = self._sc.broadcast(pickled_command)
-            pickled_command = ser.dumps(broadcast)
-        broadcast_vars = ListConverter().convert(
-            [x._jbroadcast for x in self._sc._pickled_broadcast_vars],
-            self._sc._gateway._gateway_client)
-        self._sc._pickled_broadcast_vars.clear()
-        env = MapConverter().convert(self._sc.environment,
-                                     self._sc._gateway._gateway_client)
-        includes = ListConverter().convert(self._sc._python_includes,
-                                           self._sc._gateway._gateway_client)
-        self._ssql_ctx.registerPython(name,
-                                      bytearray(pickled_command),
-                                      env,
-                                      includes,
-                                      self._sc.pythonExec,
-                                      broadcast_vars,
-                                      self._sc._javaAccumulator,
-                                      returnType.json())
-
-    def inferSchema(self, rdd):
+        ser = AutoBatchedSerializer(PickleSerializer())
+        command = (func, None, ser, ser)
+        pickled_cmd, bvars, env, includes = _prepare_for_python_RDD(self._sc, command, self)
+        self._ssql_ctx.udf().registerPython(name,
+                                            bytearray(pickled_cmd),
+                                            env,
+                                            includes,
+                                            self._sc.pythonExec,
+                                            bvars,
+                                            self._sc._javaAccumulator,
+                                            returnType.json())
+
+    def inferSchema(self, rdd, samplingRatio=None):
         """Infer and apply a schema to an RDD of L{Row}.
 
-        We peek at the first row of the RDD to determine the fields' names
-        and types. Nested collections are supported, which include array,
-        dict, list, Row, tuple, namedtuple, or object.
+        When samplingRatio is specified, the schema is inferred by looking
+        at the types of each row in the sampled dataset. Otherwise, the
+        first 100 rows of the RDD are inspected. Nested collections are
+        supported, which can include array, dict, list, Row, tuple,
+        namedtuple, or object.
 
-        All the rows in `rdd` should have the same type with the first one,
-        or it will cause runtime exceptions.
+        Each row could be L{pyspark.sql.Row} object or namedtuple or objects.
+        Using top level dicts is deprecated, as dict is used to represent Maps.
 
-        Each row could be L{pyspark.sql.Row} object or namedtuple or objects,
-        using dict is deprecated.
+        If a single column has multiple distinct inferred types, it may cause
+        runtime exceptions.
 
         >>> rdd = sc.parallelize(
         ...     [Row(field1=1, field2="row1"),
         ...      Row(field1=2, field2="row2"),
         ...      Row(field1=3, field2="row3")])
-        >>> srdd = sqlCtx.inferSchema(rdd)
-        >>> srdd.collect()[0]
+        >>> df = sqlCtx.inferSchema(rdd)
+        >>> df.collect()[0]
         Row(field1=1, field2=u'row1')
 
         >>> NestedRow = Row("f1", "f2")
         >>> nestedRdd1 = sc.parallelize([
         ...     NestedRow(array('i', [1, 2]), {"row1": 1.0}),
         ...     NestedRow(array('i', [2, 3]), {"row2": 2.0})])
-        >>> srdd = sqlCtx.inferSchema(nestedRdd1)
-        >>> srdd.collect()
+        >>> df = sqlCtx.inferSchema(nestedRdd1)
+        >>> df.collect()
         [Row(f1=[1, 2], f2={u'row1': 1.0}), ..., f2={u'row2': 2.0})]
 
         >>> nestedRdd2 = sc.parallelize([
         ...     NestedRow([[1, 2], [2, 3]], [1, 2]),
         ...     NestedRow([[2, 3], [3, 4]], [2, 3])])
-        >>> srdd = sqlCtx.inferSchema(nestedRdd2)
-        >>> srdd.collect()
+        >>> df = sqlCtx.inferSchema(nestedRdd2)
+        >>> df.collect()
         [Row(f1=[[1, 2], [2, 3]], f2=[1, 2]), ..., f2=[2, 3])]
+
+        >>> from collections import namedtuple
+        >>> CustomRow = namedtuple('CustomRow', 'field1 field2')
+        >>> rdd = sc.parallelize(
+        ...     [CustomRow(field1=1, field2="row1"),
+        ...      CustomRow(field1=2, field2="row2"),
+        ...      CustomRow(field1=3, field2="row3")])
+        >>> df = sqlCtx.inferSchema(rdd)
+        >>> df.collect()[0]
+        Row(field1=1, field2=u'row1')
         """
 
-        if isinstance(rdd, SchemaRDD):
-            raise TypeError("Cannot apply schema to SchemaRDD")
+        if isinstance(rdd, DataFrame):
+            raise TypeError("Cannot apply schema to DataFrame")
 
         first = rdd.first()
         if not first:
@@ -1039,8 +1346,23 @@ def inferSchema(self, rdd):
             warnings.warn("Using RDD of dict to inferSchema is deprecated,"
                           "please use pyspark.sql.Row instead")
 
-        schema = _infer_schema(first)
-        rdd = rdd.mapPartitions(lambda rows: _drop_schema(rows, schema))
+        if samplingRatio is None:
+            schema = _infer_schema(first)
+            if _has_nulltype(schema):
+                for row in rdd.take(100)[1:]:
+                    schema = _merge_type(schema, _infer_schema(row))
+                    if not _has_nulltype(schema):
+                        break
+                else:
+                    warnings.warn("Some of types cannot be determined by the "
+                                  "first 100 rows, please try again with sampling")
+        else:
+            if samplingRatio > 0.99:
+                rdd = rdd.sample(False, float(samplingRatio))
+            schema = rdd.map(_infer_schema).reduce(_merge_type)
+
+        converter = _create_converter(schema)
+        rdd = rdd.map(converter)
         return self.applySchema(rdd, schema)
 
     def applySchema(self, rdd, schema):
@@ -1058,14 +1380,15 @@ def applySchema(self, rdd, schema):
         >>> rdd2 = sc.parallelize([(1, "row1"), (2, "row2"), (3, "row3")])
         >>> schema = StructType([StructField("field1", IntegerType(), False),
         ...     StructField("field2", StringType(), False)])
-        >>> srdd = sqlCtx.applySchema(rdd2, schema)
-        >>> sqlCtx.registerRDDAsTable(srdd, "table1")
-        >>> srdd2 = sqlCtx.sql("SELECT * from table1")
-        >>> srdd2.collect()
+        >>> df = sqlCtx.applySchema(rdd2, schema)
+        >>> sqlCtx.registerRDDAsTable(df, "table1")
+        >>> df2 = sqlCtx.sql("SELECT * from table1")
+        >>> df2.collect()
         [Row(field1=1, field2=u'row1'),..., Row(field1=3, field2=u'row3')]
 
-        >>> from datetime import datetime
+        >>> from datetime import date, datetime
         >>> rdd = sc.parallelize([(127, -128L, -32768, 32767, 2147483647L, 1.0,
+        ...     date(2010, 1, 1),
         ...     datetime(2010, 1, 1, 1, 1, 1),
         ...     {"a": 1}, (2,), [1, 2, 3], None)])
         >>> schema = StructType([
@@ -1075,6 +1398,7 @@ def applySchema(self, rdd, schema):
         ...     StructField("short2", ShortType(), False),
         ...     StructField("int", IntegerType(), False),
         ...     StructField("float", FloatType(), False),
+        ...     StructField("date", DateType(), False),
         ...     StructField("time", TimestampType(), False),
         ...     StructField("map",
         ...         MapType(StringType(), IntegerType(), False), False),
@@ -1082,14 +1406,15 @@ def applySchema(self, rdd, schema):
         ...         StructType([StructField("b", ShortType(), False)]), False),
         ...     StructField("list", ArrayType(ByteType(), False), False),
         ...     StructField("null", DoubleType(), True)])
-        >>> srdd = sqlCtx.applySchema(rdd, schema)
-        >>> results = srdd.map(
-        ...     lambda x: (x.byte1, x.byte2, x.short1, x.short2, x.int, x.float, x.time,
-        ...         x.map["a"], x.struct.b, x.list, x.null))
-        >>> results.collect()[0]
-        (127, -128, -32768, 32767, 2147483647, 1.0, ...(2010, 1, 1, 1, 1, 1), 1, 2, [1, 2, 3], None)
-
-        >>> srdd.registerTempTable("table2")
+        >>> df = sqlCtx.applySchema(rdd, schema)
+        >>> results = df.map(
+        ...     lambda x: (x.byte1, x.byte2, x.short1, x.short2, x.int, x.float, x.date,
+        ...         x.time, x.map["a"], x.struct.b, x.list, x.null))
+        >>> results.collect()[0] # doctest: +NORMALIZE_WHITESPACE
+        (127, -128, -32768, 32767, 2147483647, 1.0, datetime.date(2010, 1, 1),
+             datetime.datetime(2010, 1, 1, 1, 1, 1), 1, 2, [1, 2, 3], None)
+
+        >>> df.registerTempTable("table2")
         >>> sqlCtx.sql(
         ...   "SELECT byte1 - 1 AS byte1, byte2 + 1 AS byte2, " +
         ...     "short1 + 1 AS short1, short2 - 1 AS short2, int - 1 AS int, " +
@@ -1102,13 +1427,13 @@ def applySchema(self, rdd, schema):
         >>> abstract = "byte short float time map{} struct(b) list[]"
         >>> schema = _parse_schema_abstract(abstract)
         >>> typedSchema = _infer_schema_type(rdd.first(), schema)
-        >>> srdd = sqlCtx.applySchema(rdd, typedSchema)
-        >>> srdd.collect()
+        >>> df = sqlCtx.applySchema(rdd, typedSchema)
+        >>> df.collect()
         [Row(byte=127, short=-32768, float=1.0, time=..., list=[1, 2, 3])]
         """
 
-        if isinstance(rdd, SchemaRDD):
-            raise TypeError("Cannot apply schema to SchemaRDD")
+        if isinstance(rdd, DataFrame):
+            raise TypeError("Cannot apply schema to DataFrame")
 
         if not isinstance(schema, StructType):
             raise TypeError("schema should be StructType")
@@ -1123,10 +1448,13 @@ def applySchema(self, rdd, schema):
         for row in rows:
             _verify_type(row, schema)
 
-        batched = isinstance(rdd._jrdd_deserializer, BatchedSerializer)
-        jrdd = self._pythonToJava(rdd._jrdd, batched)
-        srdd = self._ssql_ctx.applySchemaToPythonRDD(jrdd.rdd(), schema.json())
-        return SchemaRDD(srdd.toJavaSchemaRDD(), self)
+        # convert python objects to sql data
+        converter = _python_to_sql_converter(schema)
+        rdd = rdd.map(converter)
+
+        jrdd = self._jvm.SerDeUtil.toJavaArray(rdd._to_java_object_rdd())
+        df = self._ssql_ctx.applySchemaToPythonRDD(jrdd.rdd(), schema.json())
+        return DataFrame(df, self)
 
     def registerRDDAsTable(self, rdd, tableName):
         """Registers the given RDD as a temporary table in the catalog.
@@ -1134,40 +1462,40 @@ def registerRDDAsTable(self, rdd, tableName):
         Temporary tables exist only during the lifetime of this instance of
         SQLContext.
 
-        >>> srdd = sqlCtx.inferSchema(rdd)
-        >>> sqlCtx.registerRDDAsTable(srdd, "table1")
+        >>> df = sqlCtx.inferSchema(rdd)
+        >>> sqlCtx.registerRDDAsTable(df, "table1")
         """
-        if (rdd.__class__ is SchemaRDD):
-            srdd = rdd._jschema_rdd.baseSchemaRDD()
-            self._ssql_ctx.registerRDDAsTable(srdd, tableName)
+        if (rdd.__class__ is DataFrame):
+            df = rdd._jdf
+            self._ssql_ctx.registerRDDAsTable(df, tableName)
         else:
-            raise ValueError("Can only register SchemaRDD as table")
+            raise ValueError("Can only register DataFrame as table")
 
     def parquetFile(self, path):
-        """Loads a Parquet file, returning the result as a L{SchemaRDD}.
+        """Loads a Parquet file, returning the result as a L{DataFrame}.
 
         >>> import tempfile, shutil
         >>> parquetFile = tempfile.mkdtemp()
         >>> shutil.rmtree(parquetFile)
-        >>> srdd = sqlCtx.inferSchema(rdd)
-        >>> srdd.saveAsParquetFile(parquetFile)
-        >>> srdd2 = sqlCtx.parquetFile(parquetFile)
-        >>> sorted(srdd.collect()) == sorted(srdd2.collect())
+        >>> df = sqlCtx.inferSchema(rdd)
+        >>> df.saveAsParquetFile(parquetFile)
+        >>> df2 = sqlCtx.parquetFile(parquetFile)
+        >>> sorted(df.collect()) == sorted(df2.collect())
         True
         """
-        jschema_rdd = self._ssql_ctx.parquetFile(path).toJavaSchemaRDD()
-        return SchemaRDD(jschema_rdd, self)
+        jdf = self._ssql_ctx.parquetFile(path)
+        return DataFrame(jdf, self)
 
-    def jsonFile(self, path, schema=None):
+    def jsonFile(self, path, schema=None, samplingRatio=1.0):
         """
         Loads a text file storing one JSON object per line as a
-        L{SchemaRDD}.
+        L{DataFrame}.
 
         If the schema is provided, applies the given schema to this
         JSON dataset.
 
-        Otherwise, it goes through the entire dataset once to determine
-        the schema.
+        Otherwise, it samples the dataset with ratio `samplingRatio` to
+        determine the schema.
 
         >>> import tempfile, shutil
         >>> jsonFile = tempfile.mkdtemp()
@@ -1176,23 +1504,23 @@ def jsonFile(self, path, schema=None):
         >>> for json in jsonStrings:
         ...   print>>ofn, json
         >>> ofn.close()
-        >>> srdd1 = sqlCtx.jsonFile(jsonFile)
-        >>> sqlCtx.registerRDDAsTable(srdd1, "table1")
-        >>> srdd2 = sqlCtx.sql(
+        >>> df1 = sqlCtx.jsonFile(jsonFile)
+        >>> sqlCtx.registerRDDAsTable(df1, "table1")
+        >>> df2 = sqlCtx.sql(
         ...   "SELECT field1 AS f1, field2 as f2, field3 as f3, "
         ...   "field6 as f4 from table1")
-        >>> for r in srdd2.collect():
+        >>> for r in df2.collect():
         ...     print r
         Row(f1=1, f2=u'row1', f3=Row(field4=11, field5=None), f4=None)
         Row(f1=2, f2=None, f3=Row(field4=22,..., f4=[Row(field7=u'row2')])
         Row(f1=None, f2=u'row3', f3=Row(field4=33, field5=[]), f4=None)
 
-        >>> srdd3 = sqlCtx.jsonFile(jsonFile, srdd1.schema())
-        >>> sqlCtx.registerRDDAsTable(srdd3, "table2")
-        >>> srdd4 = sqlCtx.sql(
+        >>> df3 = sqlCtx.jsonFile(jsonFile, df1.schema())
+        >>> sqlCtx.registerRDDAsTable(df3, "table2")
+        >>> df4 = sqlCtx.sql(
         ...   "SELECT field1 AS f1, field2 as f2, field3 as f3, "
         ...   "field6 as f4 from table2")
-        >>> for r in srdd4.collect():
+        >>> for r in df4.collect():
         ...    print r
         Row(f1=1, f2=u'row1', f3=Row(field4=11, field5=None), f4=None)
         Row(f1=2, f2=None, f3=Row(field4=22,..., f4=[Row(field7=u'row2')])
@@ -1204,47 +1532,47 @@ def jsonFile(self, path, schema=None):
         ...         StructType([
         ...             StructField("field5",
         ...                 ArrayType(IntegerType(), False), True)]), False)])
-        >>> srdd5 = sqlCtx.jsonFile(jsonFile, schema)
-        >>> sqlCtx.registerRDDAsTable(srdd5, "table3")
-        >>> srdd6 = sqlCtx.sql(
+        >>> df5 = sqlCtx.jsonFile(jsonFile, schema)
+        >>> sqlCtx.registerRDDAsTable(df5, "table3")
+        >>> df6 = sqlCtx.sql(
         ...   "SELECT field2 AS f1, field3.field5 as f2, "
         ...   "field3.field5[0] as f3 from table3")
-        >>> srdd6.collect()
+        >>> df6.collect()
         [Row(f1=u'row1', f2=None, f3=None)...Row(f1=u'row3', f2=[], f3=None)]
         """
         if schema is None:
-            srdd = self._ssql_ctx.jsonFile(path)
+            df = self._ssql_ctx.jsonFile(path, samplingRatio)
         else:
             scala_datatype = self._ssql_ctx.parseDataType(schema.json())
-            srdd = self._ssql_ctx.jsonFile(path, scala_datatype)
-        return SchemaRDD(srdd.toJavaSchemaRDD(), self)
+            df = self._ssql_ctx.jsonFile(path, scala_datatype)
+        return DataFrame(df, self)
 
-    def jsonRDD(self, rdd, schema=None):
-        """Loads an RDD storing one JSON object per string as a L{SchemaRDD}.
+    def jsonRDD(self, rdd, schema=None, samplingRatio=1.0):
+        """Loads an RDD storing one JSON object per string as a L{DataFrame}.
 
         If the schema is provided, applies the given schema to this
         JSON dataset.
 
-        Otherwise, it goes through the entire dataset once to determine
-        the schema.
+        Otherwise, it samples the dataset with ratio `samplingRatio` to
+        determine the schema.
 
-        >>> srdd1 = sqlCtx.jsonRDD(json)
-        >>> sqlCtx.registerRDDAsTable(srdd1, "table1")
-        >>> srdd2 = sqlCtx.sql(
+        >>> df1 = sqlCtx.jsonRDD(json)
+        >>> sqlCtx.registerRDDAsTable(df1, "table1")
+        >>> df2 = sqlCtx.sql(
         ...   "SELECT field1 AS f1, field2 as f2, field3 as f3, "
         ...   "field6 as f4 from table1")
-        >>> for r in srdd2.collect():
+        >>> for r in df2.collect():
         ...     print r
         Row(f1=1, f2=u'row1', f3=Row(field4=11, field5=None), f4=None)
         Row(f1=2, f2=None, f3=Row(field4=22..., f4=[Row(field7=u'row2')])
         Row(f1=None, f2=u'row3', f3=Row(field4=33, field5=[]), f4=None)
 
-        >>> srdd3 = sqlCtx.jsonRDD(json, srdd1.schema())
-        >>> sqlCtx.registerRDDAsTable(srdd3, "table2")
-        >>> srdd4 = sqlCtx.sql(
+        >>> df3 = sqlCtx.jsonRDD(json, df1.schema())
+        >>> sqlCtx.registerRDDAsTable(df3, "table2")
+        >>> df4 = sqlCtx.sql(
         ...   "SELECT field1 AS f1, field2 as f2, field3 as f3, "
         ...   "field6 as f4 from table2")
-        >>> for r in srdd4.collect():
+        >>> for r in df4.collect():
         ...     print r
         Row(f1=1, f2=u'row1', f3=Row(field4=11, field5=None), f4=None)
         Row(f1=2, f2=None, f3=Row(field4=22..., f4=[Row(field7=u'row2')])
@@ -1256,12 +1584,12 @@ def jsonRDD(self, rdd, schema=None):
         ...         StructType([
         ...             StructField("field5",
         ...                 ArrayType(IntegerType(), False), True)]), False)])
-        >>> srdd5 = sqlCtx.jsonRDD(json, schema)
-        >>> sqlCtx.registerRDDAsTable(srdd5, "table3")
-        >>> srdd6 = sqlCtx.sql(
+        >>> df5 = sqlCtx.jsonRDD(json, schema)
+        >>> sqlCtx.registerRDDAsTable(df5, "table3")
+        >>> df6 = sqlCtx.sql(
         ...   "SELECT field2 AS f1, field3.field5 as f2, "
         ...   "field3.field5[0] as f3 from table3")
-        >>> srdd6.collect()
+        >>> df6.collect()
         [Row(f1=u'row1', f2=None,...Row(f1=u'row3', f2=[], f3=None)]
 
         >>> sqlCtx.jsonRDD(sc.parallelize(['{}',
@@ -1283,33 +1611,33 @@ def func(iterator):
         keyed._bypass_serializer = True
         jrdd = keyed._jrdd.map(self._jvm.BytesToString())
         if schema is None:
-            srdd = self._ssql_ctx.jsonRDD(jrdd.rdd())
+            df = self._ssql_ctx.jsonRDD(jrdd.rdd(), samplingRatio)
         else:
             scala_datatype = self._ssql_ctx.parseDataType(schema.json())
-            srdd = self._ssql_ctx.jsonRDD(jrdd.rdd(), scala_datatype)
-        return SchemaRDD(srdd.toJavaSchemaRDD(), self)
+            df = self._ssql_ctx.jsonRDD(jrdd.rdd(), scala_datatype)
+        return DataFrame(df, self)
 
     def sql(self, sqlQuery):
-        """Return a L{SchemaRDD} representing the result of the given query.
+        """Return a L{DataFrame} representing the result of the given query.
 
-        >>> srdd = sqlCtx.inferSchema(rdd)
-        >>> sqlCtx.registerRDDAsTable(srdd, "table1")
-        >>> srdd2 = sqlCtx.sql("SELECT field1 AS f1, field2 as f2 from table1")
-        >>> srdd2.collect()
+        >>> df = sqlCtx.inferSchema(rdd)
+        >>> sqlCtx.registerRDDAsTable(df, "table1")
+        >>> df2 = sqlCtx.sql("SELECT field1 AS f1, field2 as f2 from table1")
+        >>> df2.collect()
         [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
         """
-        return SchemaRDD(self._ssql_ctx.sql(sqlQuery).toJavaSchemaRDD(), self)
+        return DataFrame(self._ssql_ctx.sql(sqlQuery), self)
 
     def table(self, tableName):
-        """Returns the specified table as a L{SchemaRDD}.
+        """Returns the specified table as a L{DataFrame}.
 
-        >>> srdd = sqlCtx.inferSchema(rdd)
-        >>> sqlCtx.registerRDDAsTable(srdd, "table1")
-        >>> srdd2 = sqlCtx.table("table1")
-        >>> sorted(srdd.collect()) == sorted(srdd2.collect())
+        >>> df = sqlCtx.inferSchema(rdd)
+        >>> sqlCtx.registerRDDAsTable(df, "table1")
+        >>> df2 = sqlCtx.table("table1")
+        >>> sorted(df.collect()) == sorted(df2.collect())
         True
         """
-        return SchemaRDD(self._ssql_ctx.table(tableName).toJavaSchemaRDD(), self)
+        return DataFrame(self._ssql_ctx.table(tableName), self)
 
     def cacheTable(self, tableName):
         """Caches the specified table in-memory."""
@@ -1349,73 +1677,11 @@ def _ssql_ctx(self):
         except Py4JError as e:
             raise Exception("You must build Spark with Hive. "
                             "Export 'SPARK_HIVE=true' and run "
-                            "sbt/sbt assembly", e)
+                            "build/sbt assembly", e)
 
     def _get_hive_ctx(self):
         return self._jvm.HiveContext(self._jsc.sc())
 
-    def hiveql(self, hqlQuery):
-        """
-        DEPRECATED: Use sql()
-        """
-        warnings.warn("hiveql() is deprecated as the sql function now parses using HiveQL by" +
-                      "default. The SQL dialect for parsing can be set using 'spark.sql.dialect'",
-                      DeprecationWarning)
-        return SchemaRDD(self._ssql_ctx.hiveql(hqlQuery).toJavaSchemaRDD(), self)
-
-    def hql(self, hqlQuery):
-        """
-        DEPRECATED: Use sql()
-        """
-        warnings.warn("hql() is deprecated as the sql function now parses using HiveQL by" +
-                      "default. The SQL dialect for parsing can be set using 'spark.sql.dialect'",
-                      DeprecationWarning)
-        return self.hiveql(hqlQuery)
-
-
-class LocalHiveContext(HiveContext):
-
-    """Starts up an instance of hive where metadata is stored locally.
-
-    An in-process metadata data is created with data stored in ./metadata.
-    Warehouse data is stored in in ./warehouse.
-
-    >>> import os
-    >>> hiveCtx = LocalHiveContext(sc)
-    >>> try:
-    ...     supress = hiveCtx.sql("DROP TABLE src")
-    ... except Exception:
-    ...     pass
-    >>> kv1 = os.path.join(os.environ["SPARK_HOME"],
-    ...        'examples/src/main/resources/kv1.txt')
-    >>> supress = hiveCtx.sql(
-    ...     "CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
-    >>> supress = hiveCtx.sql("LOAD DATA LOCAL INPATH '%s' INTO TABLE src"
-    ...        % kv1)
-    >>> results = hiveCtx.sql("FROM src SELECT value"
-    ...      ).map(lambda r: int(r.value.split('_')[1]))
-    >>> num = results.count()
-    >>> reduce_sum = results.reduce(lambda x, y: x + y)
-    >>> num
-    500
-    >>> reduce_sum
-    130091
-    """
-
-    def __init__(self, sparkContext, sqlContext=None):
-        HiveContext.__init__(self, sparkContext, sqlContext)
-        warnings.warn("LocalHiveContext is deprecated. "
-                      "Use HiveContext instead.", DeprecationWarning)
-
-    def _get_hive_ctx(self):
-        return self._jvm.LocalHiveContext(self._jsc.sc())
-
-
-class TestHiveContext(HiveContext):
-
-    def _get_hive_ctx(self):
-        return self._jvm.TestHiveContext(self._jsc.sc())
-
 
 def _create_row(fields, values):
     row = Row(*values)
@@ -1426,7 +1692,7 @@ def _create_row(fields, values):
 class Row(tuple):
 
     """
-    A row in L{SchemaRDD}. The fields in it can be accessed like attributes.
+    A row in L{DataFrame}. The fields in it can be accessed like attributes.
 
     Row can be used to create a row object by using named arguments,
     the fields will be sorted by names.
@@ -1466,6 +1732,14 @@ def __new__(self, *args, **kwargs):
         else:
             raise ValueError("No args or kwargs")
 
+    def asDict(self):
+        """
+        Return as an dict
+        """
+        if not hasattr(self, "__FIELDS__"):
+            raise TypeError("Cannot convert a Row class into dict")
+        return dict(zip(self.__FIELDS__, self))
+
     # let obect acs like class
     def __call__(self, *args):
         """create new Row object"""
@@ -1496,110 +1770,107 @@ def __repr__(self):
             return "<Row(%s)>" % ", ".join(self)
 
 
-def inherit_doc(cls):
-    for name, func in vars(cls).items():
-        # only inherit docstring for public functions
-        if name.startswith("_"):
-            continue
-        if not func.__doc__:
-            for parent in cls.__bases__:
-                parent_func = getattr(parent, name, None)
-                if parent_func and getattr(parent_func, "__doc__", None):
-                    func.__doc__ = parent_func.__doc__
-                    break
-    return cls
+class DataFrame(object):
+
+    """A collection of rows that have the same columns.
+
+    A :class:`DataFrame` is equivalent to a relational table in Spark SQL,
+    and can be created using various functions in :class:`SQLContext`::
+
+        people = sqlContext.parquetFile("...")
+
+    Once created, it can be manipulated using the various domain-specific-language
+    (DSL) functions defined in: :class:`DataFrame`, :class:`Column`.
+
+    To select a column from the data frame, use the apply method::
 
+        ageCol = people.age
 
-@inherit_doc
-class SchemaRDD(RDD):
+    Note that the :class:`Column` type can also be manipulated
+    through its various functions::
 
-    """An RDD of L{Row} objects that has an associated schema.
+        # The following creates a new column that increases everybody's age by 10.
+        people.age + 10
 
-    The underlying JVM object is a SchemaRDD, not a PythonRDD, so we can
-    utilize the relational query api exposed by Spark SQL.
 
-    For normal L{pyspark.rdd.RDD} operations (map, count, etc.) the
-    L{SchemaRDD} is not operated on directly, as it's underlying
-    implementation is an RDD composed of Java objects. Instead it is
-    converted to a PythonRDD in the JVM, on which Python operations can
-    be done.
+    A more concrete example::
 
-    This class receives raw tuples from Java but assigns a class to it in
-    all its data-collection methods (mapPartitionsWithIndex, collect, take,
-    etc) so that PySpark sees them as Row objects with named fields.
+        # To create DataFrame using SQLContext
+        people = sqlContext.parquetFile("...")
+        department = sqlContext.parquetFile("...")
+
+        people.filter(people.age > 30).join(department, people.deptId == department.id)) \
+          .groupBy(department.name, "gender").agg({"salary": "avg", "age": "max"})
     """
 
-    def __init__(self, jschema_rdd, sql_ctx):
+    def __init__(self, jdf, sql_ctx):
+        self._jdf = jdf
         self.sql_ctx = sql_ctx
-        self._sc = sql_ctx._sc
-        clsName = jschema_rdd.getClass().getName()
-        assert clsName.endswith("JavaSchemaRDD"), "jschema_rdd must be JavaSchemaRDD"
-        self._jschema_rdd = jschema_rdd
-        self._id = None
+        self._sc = sql_ctx and sql_ctx._sc
         self.is_cached = False
-        self.is_checkpointed = False
-        self.ctx = self.sql_ctx._sc
-        # the _jrdd is created by javaToPython(), serialized by pickle
-        self._jrdd_deserializer = BatchedSerializer(PickleSerializer())
 
     @property
-    def _jrdd(self):
-        """Lazy evaluation of PythonRDD object.
-
-        Only done when a user calls methods defined by the
-        L{pyspark.rdd.RDD} super class (map, filter, etc.).
+    def rdd(self):
+        """
+        Return the content of the :class:`DataFrame` as an :class:`RDD`
+        of :class:`Row` s.
         """
-        if not hasattr(self, '_lazy_jrdd'):
-            self._lazy_jrdd = self._jschema_rdd.baseSchemaRDD().javaToPython()
-        return self._lazy_jrdd
+        if not hasattr(self, '_lazy_rdd'):
+            jrdd = self._jdf.javaToPython()
+            rdd = RDD(jrdd, self.sql_ctx._sc, BatchedSerializer(PickleSerializer()))
+            schema = self.schema()
 
-    def id(self):
-        if self._id is None:
-            self._id = self._jrdd.id()
-        return self._id
+            def applySchema(it):
+                cls = _create_cls(schema)
+                return itertools.imap(cls, it)
 
-    def limit(self, num):
-        """Limit the result count to the number specified.
+            self._lazy_rdd = rdd.mapPartitions(applySchema)
 
-        >>> srdd = sqlCtx.inferSchema(rdd)
-        >>> srdd.limit(2).collect()
-        [Row(field1=1, field2=u'row1'), Row(field1=2, field2=u'row2')]
-        >>> srdd.limit(0).collect()
-        []
+        return self._lazy_rdd
+
+    def toJSON(self, use_unicode=False):
+        """Convert a DataFrame into a MappedRDD of JSON documents; one document per row.
+
+        >>> df1 = sqlCtx.jsonRDD(json)
+        >>> sqlCtx.registerRDDAsTable(df1, "table1")
+        >>> df2 = sqlCtx.sql( "SELECT * from table1")
+        >>> df2.toJSON().take(1)[0] == '{"field1":1,"field2":"row1","field3":{"field4":11}}'
+        True
+        >>> df3 = sqlCtx.sql( "SELECT field3.field4 from table1")
+        >>> df3.toJSON().collect() == ['{"field4":11}', '{"field4":22}', '{"field4":33}']
+        True
         """
-        rdd = self._jschema_rdd.baseSchemaRDD().limit(num).toJavaSchemaRDD()
-        return SchemaRDD(rdd, self.sql_ctx)
+        rdd = self._jdf.toJSON()
+        return RDD(rdd.toJavaRDD(), self._sc, UTF8Deserializer(use_unicode))
 
     def saveAsParquetFile(self, path):
         """Save the contents as a Parquet file, preserving the schema.
 
         Files that are written out using this method can be read back in as
-        a SchemaRDD using the L{SQLContext.parquetFile} method.
+        a DataFrame using the L{SQLContext.parquetFile} method.
 
         >>> import tempfile, shutil
         >>> parquetFile = tempfile.mkdtemp()
         >>> shutil.rmtree(parquetFile)
-        >>> srdd = sqlCtx.inferSchema(rdd)
-        >>> srdd.saveAsParquetFile(parquetFile)
-        >>> srdd2 = sqlCtx.parquetFile(parquetFile)
-        >>> sorted(srdd2.collect()) == sorted(srdd.collect())
+        >>> df.saveAsParquetFile(parquetFile)
+        >>> df2 = sqlCtx.parquetFile(parquetFile)
+        >>> sorted(df2.collect()) == sorted(df.collect())
         True
         """
-        self._jschema_rdd.saveAsParquetFile(path)
+        self._jdf.saveAsParquetFile(path)
 
     def registerTempTable(self, name):
         """Registers this RDD as a temporary table using the given name.
 
         The lifetime of this temporary table is tied to the L{SQLContext}
-        that was used to create this SchemaRDD.
+        that was used to create this DataFrame.
 
-        >>> srdd = sqlCtx.inferSchema(rdd)
-        >>> srdd.registerTempTable("test")
-        >>> srdd2 = sqlCtx.sql("select * from test")
-        >>> sorted(srdd.collect()) == sorted(srdd2.collect())
+        >>> df.registerTempTable("people")
+        >>> df2 = sqlCtx.sql("select * from people")
+        >>> sorted(df.collect()) == sorted(df2.collect())
         True
         """
-        self._jschema_rdd.registerTempTable(name)
+        self._jdf.registerTempTable(name)
 
     def registerAsTable(self, name):
         """DEPRECATED: use registerTempTable() instead"""
@@ -1607,62 +1878,79 @@ def registerAsTable(self, name):
         self.registerTempTable(name)
 
     def insertInto(self, tableName, overwrite=False):
-        """Inserts the contents of this SchemaRDD into the specified table.
+        """Inserts the contents of this DataFrame into the specified table.
 
         Optionally overwriting any existing data.
         """
-        self._jschema_rdd.insertInto(tableName, overwrite)
+        self._jdf.insertInto(tableName, overwrite)
 
     def saveAsTable(self, tableName):
-        """Creates a new table with the contents of this SchemaRDD."""
-        self._jschema_rdd.saveAsTable(tableName)
+        """Creates a new table with the contents of this DataFrame."""
+        self._jdf.saveAsTable(tableName)
 
     def schema(self):
-        """Returns the schema of this SchemaRDD (represented by
-        a L{StructType})."""
-        return _parse_datatype_json_string(self._jschema_rdd.baseSchemaRDD().schema().json())
+        """Returns the schema of this DataFrame (represented by
+        a L{StructType}).
 
-    def schemaString(self):
-        """Returns the output schema in the tree format."""
-        return self._jschema_rdd.schemaString()
+        >>> df.schema()
+        StructType(List(StructField(age,IntegerType,true),StructField(name,StringType,true)))
+        """
+        return _parse_datatype_json_string(self._jdf.schema().json())
 
     def printSchema(self):
-        """Prints out the schema in the tree format."""
-        print self.schemaString()
+        """Prints out the schema in the tree format.
+
+        >>> df.printSchema()
+        root
+         |-- age: integer (nullable = true)
+         |-- name: string (nullable = true)
+        <BLANKLINE>
+        """
+        print (self._jdf.schema().treeString())
 
     def count(self):
         """Return the number of elements in this RDD.
 
         Unlike the base RDD implementation of count, this implementation
-        leverages the query optimizer to compute the count on the SchemaRDD,
+        leverages the query optimizer to compute the count on the DataFrame,
         which supports features such as filter pushdown.
 
-        >>> srdd = sqlCtx.inferSchema(rdd)
-        >>> srdd.count()
-        3L
-        >>> srdd.count() == srdd.map(lambda x: x).count()
-        True
+        >>> df.count()
+        2L
         """
-        return self._jschema_rdd.count()
+        return self._jdf.count()
 
     def collect(self):
-        """Return a list that contains all of the rows in this RDD.
+        """Return a list that contains all of the rows.
 
         Each object in the list is a Row, the fields can be accessed as
         attributes.
 
-        Unlike the base RDD implementation of collect, this implementation
-        leverages the query optimizer to perform a collect on the SchemaRDD,
-        which supports features such as filter pushdown.
-
-        >>> srdd = sqlCtx.inferSchema(rdd)
-        >>> srdd.collect()
-        [Row(field1=1, field2=u'row1'), ..., Row(field1=3, field2=u'row3')]
+        >>> df.collect()
+        [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
         """
-        with SCCallSiteSync(self.context) as css:
-            bytesInJava = self._jschema_rdd.baseSchemaRDD().collectToPython().iterator()
+        with SCCallSiteSync(self._sc) as css:
+            bytesInJava = self._jdf.javaToPython().collect().iterator()
+        tempFile = NamedTemporaryFile(delete=False, dir=self._sc._temp_dir)
+        tempFile.close()
+        self._sc._writeToFile(bytesInJava, tempFile.name)
+        # Read the data into Python and deserialize it:
+        with open(tempFile.name, 'rb') as tempFile:
+            rs = list(BatchedSerializer(PickleSerializer()).load_stream(tempFile))
+        os.unlink(tempFile.name)
         cls = _create_cls(self.schema())
-        return map(cls, self._collect_iterator_through_file(bytesInJava))
+        return [cls(r) for r in rs]
+
+    def limit(self, num):
+        """Limit the result count to the number specified.
+
+        >>> df.limit(1).collect()
+        [Row(age=2, name=u'Alice')]
+        >>> df.limit(0).collect()
+        []
+        """
+        jdf = self._jdf.limit(num)
+        return DataFrame(jdf, self.sql_ctx)
 
     def take(self, num):
         """Take the first num rows of the RDD.
@@ -1670,122 +1958,734 @@ def take(self, num):
         Each object in the list is a Row, the fields can be accessed as
         attributes.
 
-        Unlike the base RDD implementation of take, this implementation
-        leverages the query optimizer to perform a collect on a SchemaRDD,
-        which supports features such as filter pushdown.
-
-        >>> srdd = sqlCtx.inferSchema(rdd)
-        >>> srdd.take(2)
-        [Row(field1=1, field2=u'row1'), Row(field1=2, field2=u'row2')]
+        >>> df.take(2)
+        [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
         """
         return self.limit(num).collect()
 
-    # Convert each object in the RDD to a Row with the right class
-    # for this SchemaRDD, so that fields can be accessed as attributes.
-    def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
-        """
-        Return a new RDD by applying a function to each partition of this RDD,
-        while tracking the index of the original partition.
+    def map(self, f):
+        """ Return a new RDD by applying a function to each Row, it's a
+        shorthand for df.rdd.map()
 
-        >>> rdd = sc.parallelize([1, 2, 3, 4], 4)
-        >>> def f(splitIndex, iterator): yield splitIndex
-        >>> rdd.mapPartitionsWithIndex(f).sum()
-        6
+        >>> df.map(lambda p: p.name).collect()
+        [u'Alice', u'Bob']
         """
-        rdd = RDD(self._jrdd, self._sc, self._jrdd_deserializer)
-
-        schema = self.schema()
+        return self.rdd.map(f)
 
-        def applySchema(_, it):
-            cls = _create_cls(schema)
-            return itertools.imap(cls, it)
+    def mapPartitions(self, f, preservesPartitioning=False):
+        """
+        Return a new RDD by applying a function to each partition.
 
-        objrdd = rdd.mapPartitionsWithIndex(applySchema, preservesPartitioning)
-        return objrdd.mapPartitionsWithIndex(f, preservesPartitioning)
+        >>> rdd = sc.parallelize([1, 2, 3, 4], 4)
+        >>> def f(iterator): yield 1
+        >>> rdd.mapPartitions(f).sum()
+        4
+        """
+        return self.rdd.mapPartitions(f, preservesPartitioning)
 
-    # We override the default cache/persist/checkpoint behavior
-    # as we want to cache the underlying SchemaRDD object in the JVM,
-    # not the PythonRDD checkpointed by the super class
     def cache(self):
+        """ Persist with the default storage level (C{MEMORY_ONLY_SER}).
+        """
         self.is_cached = True
-        self._jschema_rdd.cache()
+        self._jdf.cache()
         return self
 
     def persist(self, storageLevel=StorageLevel.MEMORY_ONLY_SER):
+        """ Set the storage level to persist its values across operations
+        after the first time it is computed. This can only be used to assign
+        a new storage level if the RDD does not have a storage level set yet.
+        If no storage level is specified defaults to (C{MEMORY_ONLY_SER}).
+        """
         self.is_cached = True
-        javaStorageLevel = self.ctx._getJavaStorageLevel(storageLevel)
-        self._jschema_rdd.persist(javaStorageLevel)
+        javaStorageLevel = self._sc._getJavaStorageLevel(storageLevel)
+        self._jdf.persist(javaStorageLevel)
         return self
 
     def unpersist(self, blocking=True):
+        """ Mark it as non-persistent, and remove all blocks for it from
+        memory and disk.
+        """
         self.is_cached = False
-        self._jschema_rdd.unpersist(blocking)
+        self._jdf.unpersist(blocking)
         return self
 
-    def checkpoint(self):
-        self.is_checkpointed = True
-        self._jschema_rdd.checkpoint()
+    # def coalesce(self, numPartitions, shuffle=False):
+    #     rdd = self._jdf.coalesce(numPartitions, shuffle, None)
+    #     return DataFrame(rdd, self.sql_ctx)
+
+    def repartition(self, numPartitions):
+        """ Return a new :class:`DataFrame` that has exactly `numPartitions`
+        partitions.
+        """
+        rdd = self._jdf.repartition(numPartitions, None)
+        return DataFrame(rdd, self.sql_ctx)
+
+    def sample(self, withReplacement, fraction, seed=None):
+        """
+        Return a sampled subset of this DataFrame.
+
+        >>> df = sqlCtx.inferSchema(rdd)
+        >>> df.sample(False, 0.5, 97).count()
+        2L
+        """
+        assert fraction >= 0.0, "Negative fraction value: %s" % fraction
+        seed = seed if seed is not None else random.randint(0, sys.maxint)
+        rdd = self._jdf.sample(withReplacement, fraction, long(seed))
+        return DataFrame(rdd, self.sql_ctx)
+
+    # def takeSample(self, withReplacement, num, seed=None):
+    #     """Return a fixed-size sampled subset of this DataFrame.
+    #
+    #     >>> df = sqlCtx.inferSchema(rdd)
+    #     >>> df.takeSample(False, 2, 97)
+    #     [Row(field1=3, field2=u'row3'), Row(field1=1, field2=u'row1')]
+    #     """
+    #     seed = seed if seed is not None else random.randint(0, sys.maxint)
+    #     with SCCallSiteSync(self.context) as css:
+    #         bytesInJava = self._jdf \
+    #             .takeSampleToPython(withReplacement, num, long(seed)) \
+    #             .iterator()
+    #     cls = _create_cls(self.schema())
+    #     return map(cls, self._collect_iterator_through_file(bytesInJava))
+
+    @property
+    def dtypes(self):
+        """Return all column names and their data types as a list.
+
+        >>> df.dtypes
+        [('age', 'integer'), ('name', 'string')]
+        """
+        return [(str(f.name), f.dataType.jsonValue()) for f in self.schema().fields]
+
+    @property
+    def columns(self):
+        """ Return all column names as a list.
+
+        >>> df.columns
+        [u'age', u'name']
+        """
+        return [f.name for f in self.schema().fields]
 
-    def isCheckpointed(self):
-        return self._jschema_rdd.isCheckpointed()
+    def join(self, other, joinExprs=None, joinType=None):
+        """
+        Join with another DataFrame, using the given join expression.
+        The following performs a full outer join between `df1` and `df2`::
 
-    def getCheckpointFile(self):
-        checkpointFile = self._jschema_rdd.getCheckpointFile()
-        if checkpointFile.isPresent():
-            return checkpointFile.get()
+        :param other: Right side of the join
+        :param joinExprs: Join expression
+        :param joinType: One of `inner`, `outer`, `left_outer`, `right_outer`, `semijoin`.
 
-    def coalesce(self, numPartitions, shuffle=False):
-        rdd = self._jschema_rdd.coalesce(numPartitions, shuffle)
-        return SchemaRDD(rdd, self.sql_ctx)
+        >>> df.join(df2, df.name == df2.name, 'outer').select(df.name, df2.height).collect()
+        [Row(name=None, height=80), Row(name=u'Bob', height=85), Row(name=u'Alice', height=None)]
+        """
 
-    def distinct(self, numPartitions=None):
-        if numPartitions is None:
-            rdd = self._jschema_rdd.distinct()
+        if joinExprs is None:
+            jdf = self._jdf.join(other._jdf)
         else:
-            rdd = self._jschema_rdd.distinct(numPartitions)
-        return SchemaRDD(rdd, self.sql_ctx)
+            assert isinstance(joinExprs, Column), "joinExprs should be Column"
+            if joinType is None:
+                jdf = self._jdf.join(other._jdf, joinExprs._jc)
+            else:
+                assert isinstance(joinType, basestring), "joinType should be basestring"
+                jdf = self._jdf.join(other._jdf, joinExprs._jc, joinType)
+        return DataFrame(jdf, self.sql_ctx)
+
+    def sort(self, *cols):
+        """ Return a new :class:`DataFrame` sorted by the specified column.
+
+        :param cols: The columns or expressions used for sorting
+
+        >>> df.sort(df.age.desc()).collect()
+        [Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]
+        >>> df.sortBy(df.age.desc()).collect()
+        [Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]
+        """
+        if not cols:
+            raise ValueError("should sort by at least one column")
+        jcols = ListConverter().convert([_to_java_column(c) for c in cols],
+                                        self._sc._gateway._gateway_client)
+        jdf = self._jdf.sort(self._sc._jvm.PythonUtils.toSeq(jcols))
+        return DataFrame(jdf, self.sql_ctx)
+
+    sortBy = sort
+
+    def head(self, n=None):
+        """ Return the first `n` rows or the first row if n is None.
+
+        >>> df.head()
+        Row(age=2, name=u'Alice')
+        >>> df.head(1)
+        [Row(age=2, name=u'Alice')]
+        """
+        if n is None:
+            rs = self.head(1)
+            return rs[0] if rs else None
+        return self.take(n)
+
+    def first(self):
+        """ Return the first row.
+
+        >>> df.first()
+        Row(age=2, name=u'Alice')
+        """
+        return self.head()
+
+    def __getitem__(self, item):
+        """ Return the column by given name
 
-    def intersection(self, other):
-        if (other.__class__ is SchemaRDD):
-            rdd = self._jschema_rdd.intersection(other._jschema_rdd)
-            return SchemaRDD(rdd, self.sql_ctx)
+        >>> df['age'].collect()
+        [Row(age=2), Row(age=5)]
+        >>> df[ ["name", "age"]].collect()
+        [Row(name=u'Alice', age=2), Row(name=u'Bob', age=5)]
+        >>> df[ df.age > 3 ].collect()
+        [Row(age=5, name=u'Bob')]
+        """
+        if isinstance(item, basestring):
+            jc = self._jdf.apply(item)
+            return Column(jc, self.sql_ctx)
+        elif isinstance(item, Column):
+            return self.filter(item)
+        elif isinstance(item, list):
+            return self.select(*item)
         else:
-            raise ValueError("Can only intersect with another SchemaRDD")
+            raise IndexError("unexpected index: %s" % item)
 
-    def repartition(self, numPartitions):
-        rdd = self._jschema_rdd.repartition(numPartitions)
-        return SchemaRDD(rdd, self.sql_ctx)
+    def __getattr__(self, name):
+        """ Return the column by given name
 
-    def subtract(self, other, numPartitions=None):
-        if (other.__class__ is SchemaRDD):
-            if numPartitions is None:
-                rdd = self._jschema_rdd.subtract(other._jschema_rdd)
-            else:
-                rdd = self._jschema_rdd.subtract(other._jschema_rdd,
-                                                 numPartitions)
-            return SchemaRDD(rdd, self.sql_ctx)
+        >>> df.age.collect()
+        [Row(age=2), Row(age=5)]
+        """
+        if name.startswith("__"):
+            raise AttributeError(name)
+        jc = self._jdf.apply(name)
+        return Column(jc, self.sql_ctx)
+
+    def select(self, *cols):
+        """ Selecting a set of expressions.
+
+        >>> df.select().collect()
+        [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
+        >>> df.select('*').collect()
+        [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
+        >>> df.select('name', 'age').collect()
+        [Row(name=u'Alice', age=2), Row(name=u'Bob', age=5)]
+        >>> df.select(df.name, (df.age + 10).alias('age')).collect()
+        [Row(name=u'Alice', age=12), Row(name=u'Bob', age=15)]
+        """
+        if not cols:
+            cols = ["*"]
+        jcols = ListConverter().convert([_to_java_column(c) for c in cols],
+                                        self._sc._gateway._gateway_client)
+        jdf = self._jdf.select(self.sql_ctx._sc._jvm.PythonUtils.toSeq(jcols))
+        return DataFrame(jdf, self.sql_ctx)
+
+    def selectExpr(self, *expr):
+        """
+        Selects a set of SQL expressions. This is a variant of
+        `select` that accepts SQL expressions.
+
+        >>> df.selectExpr("age * 2", "abs(age)").collect()
+        [Row(('age * 2)=4, Abs('age)=2), Row(('age * 2)=10, Abs('age)=5)]
+        """
+        jexpr = ListConverter().convert(expr, self._sc._gateway._gateway_client)
+        jdf = self._jdf.selectExpr(self._sc._jvm.PythonUtils.toSeq(jexpr))
+        return DataFrame(jdf, self.sql_ctx)
+
+    def filter(self, condition):
+        """ Filtering rows using the given condition, which could be
+        Column expression or string of SQL expression.
+
+        where() is an alias for filter().
+
+        >>> df.filter(df.age > 3).collect()
+        [Row(age=5, name=u'Bob')]
+        >>> df.where(df.age == 2).collect()
+        [Row(age=2, name=u'Alice')]
+
+        >>> df.filter("age > 3").collect()
+        [Row(age=5, name=u'Bob')]
+        >>> df.where("age = 2").collect()
+        [Row(age=2, name=u'Alice')]
+        """
+        if isinstance(condition, basestring):
+            jdf = self._jdf.filter(condition)
+        elif isinstance(condition, Column):
+            jdf = self._jdf.filter(condition._jc)
         else:
-            raise ValueError("Can only subtract another SchemaRDD")
+            raise TypeError("condition should be string or Column")
+        return DataFrame(jdf, self.sql_ctx)
+
+    where = filter
+
+    def groupBy(self, *cols):
+        """ Group the :class:`DataFrame` using the specified columns,
+        so we can run aggregation on them. See :class:`GroupedData`
+        for all the available aggregate functions.
+
+        >>> df.groupBy().avg().collect()
+        [Row(AVG(age#0)=3.5)]
+        >>> df.groupBy('name').agg({'age': 'mean'}).collect()
+        [Row(name=u'Bob', AVG(age#0)=5.0), Row(name=u'Alice', AVG(age#0)=2.0)]
+        >>> df.groupBy(df.name).avg().collect()
+        [Row(name=u'Bob', AVG(age#0)=5.0), Row(name=u'Alice', AVG(age#0)=2.0)]
+        """
+        jcols = ListConverter().convert([_to_java_column(c) for c in cols],
+                                        self._sc._gateway._gateway_client)
+        jdf = self._jdf.groupBy(self.sql_ctx._sc._jvm.PythonUtils.toSeq(jcols))
+        return GroupedData(jdf, self.sql_ctx)
+
+    def agg(self, *exprs):
+        """ Aggregate on the entire :class:`DataFrame` without groups
+        (shorthand for df.groupBy.agg()).
+
+        >>> df.agg({"age": "max"}).collect()
+        [Row(MAX(age#0)=5)]
+        >>> from pyspark.sql import Dsl
+        >>> df.agg(Dsl.min(df.age)).collect()
+        [Row(MIN(age#0)=2)]
+        """
+        return self.groupBy().agg(*exprs)
+
+    def unionAll(self, other):
+        """ Return a new DataFrame containing union of rows in this
+        frame and another frame.
+
+        This is equivalent to `UNION ALL` in SQL.
+        """
+        return DataFrame(self._jdf.unionAll(other._jdf), self.sql_ctx)
+
+    def intersect(self, other):
+        """ Return a new :class:`DataFrame` containing rows only in
+        both this frame and another frame.
+
+        This is equivalent to `INTERSECT` in SQL.
+        """
+        return DataFrame(self._jdf.intersect(other._jdf), self.sql_ctx)
+
+    def subtract(self, other):
+        """ Return a new :class:`DataFrame` containing rows in this frame
+        but not in another frame.
+
+        This is equivalent to `EXCEPT` in SQL.
+        """
+        return DataFrame(getattr(self._jdf, "except")(other._jdf), self.sql_ctx)
+
+    def addColumn(self, colName, col):
+        """ Return a new :class:`DataFrame` by adding a column.
+
+        >>> df.addColumn('age2', df.age + 2).collect()
+        [Row(age=2, name=u'Alice', age2=4), Row(age=5, name=u'Bob', age2=7)]
+        """
+        return self.select('*', col.alias(colName))
+
+
+# Having SchemaRDD for backward compatibility (for docs)
+class SchemaRDD(DataFrame):
+    """
+    SchemaRDD is deprecated, please use DataFrame
+    """
+
+
+def dfapi(f):
+    def _api(self):
+        name = f.__name__
+        jdf = getattr(self._jdf, name)()
+        return DataFrame(jdf, self.sql_ctx)
+    _api.__name__ = f.__name__
+    _api.__doc__ = f.__doc__
+    return _api
+
+
+class GroupedData(object):
+
+    """
+    A set of methods for aggregations on a :class:`DataFrame`,
+    created by DataFrame.groupBy().
+    """
+
+    def __init__(self, jdf, sql_ctx):
+        self._jdf = jdf
+        self.sql_ctx = sql_ctx
+
+    def agg(self, *exprs):
+        """ Compute aggregates by specifying a map from column name
+        to aggregate methods.
+
+        The available aggregate methods are `avg`, `max`, `min`,
+        `sum`, `count`.
+
+        :param exprs: list or aggregate columns or a map from column
+                      name to aggregate methods.
+
+        >>> gdf = df.groupBy(df.name)
+        >>> gdf.agg({"age": "max"}).collect()
+        [Row(name=u'Bob', MAX(age#0)=5), Row(name=u'Alice', MAX(age#0)=2)]
+        >>> from pyspark.sql import Dsl
+        >>> gdf.agg(Dsl.min(df.age)).collect()
+        [Row(MIN(age#0)=5), Row(MIN(age#0)=2)]
+        """
+        assert exprs, "exprs should not be empty"
+        if len(exprs) == 1 and isinstance(exprs[0], dict):
+            jmap = MapConverter().convert(exprs[0],
+                                          self.sql_ctx._sc._gateway._gateway_client)
+            jdf = self._jdf.agg(jmap)
+        else:
+            # Columns
+            assert all(isinstance(c, Column) for c in exprs), "all exprs should be Column"
+            jcols = ListConverter().convert([c._jc for c in exprs[1:]],
+                                            self.sql_ctx._sc._gateway._gateway_client)
+            jdf = self._jdf.agg(exprs[0]._jc, self.sql_ctx._sc._jvm.PythonUtils.toSeq(jcols))
+        return DataFrame(jdf, self.sql_ctx)
+
+    @dfapi
+    def count(self):
+        """ Count the number of rows for each group.
+
+        >>> df.groupBy(df.age).count().collect()
+        [Row(age=2, count=1), Row(age=5, count=1)]
+        """
+
+    @dfapi
+    def mean(self):
+        """Compute the average value for each numeric columns
+        for each group. This is an alias for `avg`."""
+
+    @dfapi
+    def avg(self):
+        """Compute the average value for each numeric columns
+        for each group."""
+
+    @dfapi
+    def max(self):
+        """Compute the max value for each numeric columns for
+        each group. """
+
+    @dfapi
+    def min(self):
+        """Compute the min value for each numeric column for
+        each group."""
+
+    @dfapi
+    def sum(self):
+        """Compute the sum for each numeric columns for each
+        group."""
+
+
+def _create_column_from_literal(literal):
+    sc = SparkContext._active_spark_context
+    return sc._jvm.Dsl.lit(literal)
+
+
+def _create_column_from_name(name):
+    sc = SparkContext._active_spark_context
+    return sc._jvm.Dsl.col(name)
+
+
+def _to_java_column(col):
+    if isinstance(col, Column):
+        jcol = col._jc
+    else:
+        jcol = _create_column_from_name(col)
+    return jcol
+
+
+def _unary_op(name, doc="unary operator"):
+    """ Create a method for given unary operator """
+    def _(self):
+        jc = getattr(self._jc, name)()
+        return Column(jc, self.sql_ctx)
+    _.__doc__ = doc
+    return _
+
+
+def _dsl_op(name, doc=''):
+    def _(self):
+        jc = getattr(self._sc._jvm.Dsl, name)(self._jc)
+        return Column(jc, self.sql_ctx)
+    _.__doc__ = doc
+    return _
+
+
+def _bin_op(name, doc="binary operator"):
+    """ Create a method for given binary operator
+    """
+    def _(self, other):
+        jc = other._jc if isinstance(other, Column) else other
+        njc = getattr(self._jc, name)(jc)
+        return Column(njc, self.sql_ctx)
+    _.__doc__ = doc
+    return _
+
+
+def _reverse_op(name, doc="binary operator"):
+    """ Create a method for binary operator (this object is on right side)
+    """
+    def _(self, other):
+        jother = _create_column_from_literal(other)
+        jc = getattr(jother, name)(self._jc)
+        return Column(jc, self.sql_ctx)
+    _.__doc__ = doc
+    return _
+
+
+class Column(DataFrame):
+
+    """
+    A column in a DataFrame.
+
+    `Column` instances can be created by::
+
+        # 1. Select a column out of a DataFrame
+        df.colName
+        df["colName"]
+
+        # 2. Create from an expression
+        df.colName + 1
+        1 / df.colName
+    """
+
+    def __init__(self, jc, sql_ctx=None):
+        self._jc = jc
+        super(Column, self).__init__(jc, sql_ctx)
+
+    # arithmetic operators
+    __neg__ = _dsl_op("negate")
+    __add__ = _bin_op("plus")
+    __sub__ = _bin_op("minus")
+    __mul__ = _bin_op("multiply")
+    __div__ = _bin_op("divide")
+    __mod__ = _bin_op("mod")
+    __radd__ = _bin_op("plus")
+    __rsub__ = _reverse_op("minus")
+    __rmul__ = _bin_op("multiply")
+    __rdiv__ = _reverse_op("divide")
+    __rmod__ = _reverse_op("mod")
+
+    # logistic operators
+    __eq__ = _bin_op("equalTo")
+    __ne__ = _bin_op("notEqual")
+    __lt__ = _bin_op("lt")
+    __le__ = _bin_op("leq")
+    __ge__ = _bin_op("geq")
+    __gt__ = _bin_op("gt")
+
+    # `and`, `or`, `not` cannot be overloaded in Python,
+    # so use bitwise operators as boolean operators
+    __and__ = _bin_op('and')
+    __or__ = _bin_op('or')
+    __invert__ = _dsl_op('not')
+    __rand__ = _bin_op("and")
+    __ror__ = _bin_op("or")
+
+    # container operators
+    __contains__ = _bin_op("contains")
+    __getitem__ = _bin_op("getItem")
+    getField = _bin_op("getField", "An expression that gets a field by name in a StructField.")
+
+    # string methods
+    rlike = _bin_op("rlike")
+    like = _bin_op("like")
+    startswith = _bin_op("startsWith")
+    endswith = _bin_op("endsWith")
+
+    def substr(self, startPos, length):
+        """
+        Return a Column which is a substring of the column
+
+        :param startPos: start position (int or Column)
+        :param length:  length of the substring (int or Column)
+
+        >>> df.name.substr(1, 3).collect()
+        [Row(col=u'Ali'), Row(col=u'Bob')]
+        """
+        if type(startPos) != type(length):
+            raise TypeError("Can not mix the type")
+        if isinstance(startPos, (int, long)):
+            jc = self._jc.substr(startPos, length)
+        elif isinstance(startPos, Column):
+            jc = self._jc.substr(startPos._jc, length._jc)
+        else:
+            raise TypeError("Unexpected type: %s" % type(startPos))
+        return Column(jc, self.sql_ctx)
+
+    __getslice__ = substr
+
+    # order
+    asc = _unary_op("asc")
+    desc = _unary_op("desc")
+
+    isNull = _unary_op("isNull", "True if the current expression is null.")
+    isNotNull = _unary_op("isNotNull", "True if the current expression is not null.")
+
+    def alias(self, alias):
+        """Return a alias for this column
+
+        >>> df.age.alias("age2").collect()
+        [Row(age2=2), Row(age2=5)]
+        """
+        return Column(getattr(self._jc, "as")(alias), self.sql_ctx)
+
+    def cast(self, dataType):
+        """ Convert the column into type `dataType`
+
+        >>> df.select(df.age.cast("string").alias('ages')).collect()
+        [Row(ages=u'2'), Row(ages=u'5')]
+        >>> df.select(df.age.cast(StringType()).alias('ages')).collect()
+        [Row(ages=u'2'), Row(ages=u'5')]
+        """
+        if self.sql_ctx is None:
+            sc = SparkContext._active_spark_context
+            ssql_ctx = sc._jvm.SQLContext(sc._jsc.sc())
+        else:
+            ssql_ctx = self.sql_ctx._ssql_ctx
+        if isinstance(dataType, basestring):
+            jc = self._jc.cast(dataType)
+        elif isinstance(dataType, DataType):
+            jdt = ssql_ctx.parseDataType(dataType.json())
+            jc = self._jc.cast(jdt)
+        return Column(jc, self.sql_ctx)
+
+
+def _aggregate_func(name, doc=""):
+    """ Create a function for aggregator by name"""
+    def _(col):
+        sc = SparkContext._active_spark_context
+        jc = getattr(sc._jvm.Dsl, name)(_to_java_column(col))
+        return Column(jc)
+    _.__name__ = name
+    _.__doc__ = doc
+    return staticmethod(_)
+
+
+class UserDefinedFunction(object):
+    def __init__(self, func, returnType):
+        self.func = func
+        self.returnType = returnType
+        self._broadcast = None
+        self._judf = self._create_judf()
+
+    def _create_judf(self):
+        f = self.func  # put it in closure `func`
+        func = lambda _, it: imap(lambda x: f(*x), it)
+        ser = AutoBatchedSerializer(PickleSerializer())
+        command = (func, None, ser, ser)
+        sc = SparkContext._active_spark_context
+        pickled_command, broadcast_vars, env, includes = _prepare_for_python_RDD(sc, command, self)
+        ssql_ctx = sc._jvm.SQLContext(sc._jsc.sc())
+        jdt = ssql_ctx.parseDataType(self.returnType.json())
+        judf = sc._jvm.UserDefinedPythonFunction(f.__name__, bytearray(pickled_command), env,
+                                                 includes, sc.pythonExec, broadcast_vars,
+                                                 sc._javaAccumulator, jdt)
+        return judf
+
+    def __del__(self):
+        if self._broadcast is not None:
+            self._broadcast.unpersist()
+            self._broadcast = None
+
+    def __call__(self, *cols):
+        sc = SparkContext._active_spark_context
+        jcols = ListConverter().convert([_to_java_column(c) for c in cols],
+                                        sc._gateway._gateway_client)
+        jc = self._judf.apply(sc._jvm.PythonUtils.toSeq(jcols))
+        return Column(jc)
+
+
+class Dsl(object):
+    """
+    A collections of builtin aggregators
+    """
+    DSLS = {
+        'lit': 'Creates a :class:`Column` of literal value.',
+        'col': 'Returns a :class:`Column` based on the given column name.',
+        'column': 'Returns a :class:`Column` based on the given column name.',
+        'upper': 'Converts a string expression to upper case.',
+        'lower': 'Converts a string expression to upper case.',
+        'sqrt': 'Computes the square root of the specified float value.',
+        'abs': 'Computes the absolutle value.',
+
+        'max': 'Aggregate function: returns the maximum value of the expression in a group.',
+        'min': 'Aggregate function: returns the minimum value of the expression in a group.',
+        'first': 'Aggregate function: returns the first value in a group.',
+        'last': 'Aggregate function: returns the last value in a group.',
+        'count': 'Aggregate function: returns the number of items in a group.',
+        'sum': 'Aggregate function: returns the sum of all values in the expression.',
+        'avg': 'Aggregate function: returns the average of the values in a group.',
+        'mean': 'Aggregate function: returns the average of the values in a group.',
+        'sumDistinct': 'Aggregate function: returns the sum of distinct values in the expression.',
+    }
+
+    for _name, _doc in DSLS.items():
+        locals()[_name] = _aggregate_func(_name, _doc)
+    del _name, _doc
+
+    @staticmethod
+    def countDistinct(col, *cols):
+        """ Return a new Column for distinct count of (col, *cols)
+
+        >>> from pyspark.sql import Dsl
+        >>> df.agg(Dsl.countDistinct(df.age, df.name).alias('c')).collect()
+        [Row(c=2)]
+
+        >>> df.agg(Dsl.countDistinct("age", "name").alias('c')).collect()
+        [Row(c=2)]
+        """
+        sc = SparkContext._active_spark_context
+        jcols = ListConverter().convert([_to_java_column(c) for c in cols],
+                                        sc._gateway._gateway_client)
+        jc = sc._jvm.Dsl.countDistinct(_to_java_column(col),
+                                       sc._jvm.PythonUtils.toSeq(jcols))
+        return Column(jc)
+
+    @staticmethod
+    def approxCountDistinct(col, rsd=None):
+        """ Return a new Column for approxiate distinct count of (col, *cols)
+
+        >>> from pyspark.sql import Dsl
+        >>> df.agg(Dsl.approxCountDistinct(df.age).alias('c')).collect()
+        [Row(c=2)]
+        """
+        sc = SparkContext._active_spark_context
+        if rsd is None:
+            jc = sc._jvm.Dsl.approxCountDistinct(_to_java_column(col))
+        else:
+            jc = sc._jvm.Dsl.approxCountDistinct(_to_java_column(col), rsd)
+        return Column(jc)
+
+    @staticmethod
+    def udf(f, returnType=StringType()):
+        """Create a user defined function (UDF)
+
+        >>> slen = Dsl.udf(lambda s: len(s), IntegerType())
+        >>> df.select(slen(df.name).alias('slen')).collect()
+        [Row(slen=5), Row(slen=3)]
+        """
+        return UserDefinedFunction(f, returnType)
 
 
 def _test():
     import doctest
-    from array import array
     from pyspark.context import SparkContext
     # let doctest run in pyspark.sql, so DataTypes can be picklable
     import pyspark.sql
     from pyspark.sql import Row, SQLContext
+    from pyspark.sql_tests import ExamplePoint, ExamplePointUDT
     globs = pyspark.sql.__dict__.copy()
-    # The small batch size here ensures that we see multiple batches,
-    # even in these small test examples:
-    sc = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    sc = SparkContext('local[4]', 'PythonTest')
     globs['sc'] = sc
-    globs['sqlCtx'] = SQLContext(sc)
+    globs['sqlCtx'] = sqlCtx = SQLContext(sc)
     globs['rdd'] = sc.parallelize(
         [Row(field1=1, field2="row1"),
          Row(field1=2, field2="row2"),
          Row(field1=3, field2="row3")]
     )
+    rdd2 = sc.parallelize([Row(name='Alice', age=2), Row(name='Bob', age=5)])
+    rdd3 = sc.parallelize([Row(name='Tom', height=80), Row(name='Bob', height=85)])
+    globs['df'] = sqlCtx.inferSchema(rdd2)
+    globs['df2'] = sqlCtx.inferSchema(rdd3)
+    globs['ExamplePoint'] = ExamplePoint
+    globs['ExamplePointUDT'] = ExamplePointUDT
     jsonStrings = [
         '{"field1": 1, "field2": "row1", "field3":{"field4":11}}',
         '{"field1" : 2, "field3":{"field4":22, "field5": [10, 11]},'
diff --git a/python/pyspark/sql_tests.py b/python/pyspark/sql_tests.py
new file mode 100644
index 0000000000000..d314f46e8d2d5
--- /dev/null
+++ b/python/pyspark/sql_tests.py
@@ -0,0 +1,299 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Unit tests for pyspark.sql; additional tests are implemented as doctests in
+individual modules.
+"""
+import os
+import sys
+import pydoc
+import shutil
+import tempfile
+
+if sys.version_info[:2] <= (2, 6):
+    try:
+        import unittest2 as unittest
+    except ImportError:
+        sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier')
+        sys.exit(1)
+else:
+    import unittest
+
+from pyspark.sql import SQLContext, IntegerType, Row, ArrayType, StructType, StructField, \
+    UserDefinedType, DoubleType
+from pyspark.tests import ReusedPySparkTestCase
+
+
+class ExamplePointUDT(UserDefinedType):
+    """
+    User-defined type (UDT) for ExamplePoint.
+    """
+
+    @classmethod
+    def sqlType(self):
+        return ArrayType(DoubleType(), False)
+
+    @classmethod
+    def module(cls):
+        return 'pyspark.tests'
+
+    @classmethod
+    def scalaUDT(cls):
+        return 'org.apache.spark.sql.test.ExamplePointUDT'
+
+    def serialize(self, obj):
+        return [obj.x, obj.y]
+
+    def deserialize(self, datum):
+        return ExamplePoint(datum[0], datum[1])
+
+
+class ExamplePoint:
+    """
+    An example class to demonstrate UDT in Scala, Java, and Python.
+    """
+
+    __UDT__ = ExamplePointUDT()
+
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+    def __repr__(self):
+        return "ExamplePoint(%s,%s)" % (self.x, self.y)
+
+    def __str__(self):
+        return "(%s,%s)" % (self.x, self.y)
+
+    def __eq__(self, other):
+        return isinstance(other, ExamplePoint) and \
+            other.x == self.x and other.y == self.y
+
+
+class SQLTests(ReusedPySparkTestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        ReusedPySparkTestCase.setUpClass()
+        cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
+        os.unlink(cls.tempdir.name)
+        cls.sqlCtx = SQLContext(cls.sc)
+        cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
+        rdd = cls.sc.parallelize(cls.testData)
+        cls.df = cls.sqlCtx.inferSchema(rdd)
+
+    @classmethod
+    def tearDownClass(cls):
+        ReusedPySparkTestCase.tearDownClass()
+        shutil.rmtree(cls.tempdir.name, ignore_errors=True)
+
+    def test_udf(self):
+        self.sqlCtx.registerFunction("twoArgs", lambda x, y: len(x) + y, IntegerType())
+        [row] = self.sqlCtx.sql("SELECT twoArgs('test', 1)").collect()
+        self.assertEqual(row[0], 5)
+
+    def test_udf2(self):
+        self.sqlCtx.registerFunction("strlen", lambda string: len(string), IntegerType())
+        self.sqlCtx.inferSchema(self.sc.parallelize([Row(a="test")])).registerTempTable("test")
+        [res] = self.sqlCtx.sql("SELECT strlen(a) FROM test WHERE strlen(a) > 1").collect()
+        self.assertEqual(4, res[0])
+
+    def test_udf_with_array_type(self):
+        d = [Row(l=range(3), d={"key": range(5)})]
+        rdd = self.sc.parallelize(d)
+        self.sqlCtx.inferSchema(rdd).registerTempTable("test")
+        self.sqlCtx.registerFunction("copylist", lambda l: list(l), ArrayType(IntegerType()))
+        self.sqlCtx.registerFunction("maplen", lambda d: len(d), IntegerType())
+        [(l1, l2)] = self.sqlCtx.sql("select copylist(l), maplen(d) from test").collect()
+        self.assertEqual(range(3), l1)
+        self.assertEqual(1, l2)
+
+    def test_broadcast_in_udf(self):
+        bar = {"a": "aa", "b": "bb", "c": "abc"}
+        foo = self.sc.broadcast(bar)
+        self.sqlCtx.registerFunction("MYUDF", lambda x: foo.value[x] if x else '')
+        [res] = self.sqlCtx.sql("SELECT MYUDF('c')").collect()
+        self.assertEqual("abc", res[0])
+        [res] = self.sqlCtx.sql("SELECT MYUDF('')").collect()
+        self.assertEqual("", res[0])
+
+    def test_basic_functions(self):
+        rdd = self.sc.parallelize(['{"foo":"bar"}', '{"foo":"baz"}'])
+        df = self.sqlCtx.jsonRDD(rdd)
+        df.count()
+        df.collect()
+        df.schema()
+
+        # cache and checkpoint
+        self.assertFalse(df.is_cached)
+        df.persist()
+        df.unpersist()
+        df.cache()
+        self.assertTrue(df.is_cached)
+        self.assertEqual(2, df.count())
+
+        df.registerTempTable("temp")
+        df = self.sqlCtx.sql("select foo from temp")
+        df.count()
+        df.collect()
+
+    def test_apply_schema_to_row(self):
+        df = self.sqlCtx.jsonRDD(self.sc.parallelize(["""{"a":2}"""]))
+        df2 = self.sqlCtx.applySchema(df.map(lambda x: x), df.schema())
+        self.assertEqual(df.collect(), df2.collect())
+
+        rdd = self.sc.parallelize(range(10)).map(lambda x: Row(a=x))
+        df3 = self.sqlCtx.applySchema(rdd, df.schema())
+        self.assertEqual(10, df3.count())
+
+    def test_serialize_nested_array_and_map(self):
+        d = [Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})]
+        rdd = self.sc.parallelize(d)
+        df = self.sqlCtx.inferSchema(rdd)
+        row = df.head()
+        self.assertEqual(1, len(row.l))
+        self.assertEqual(1, row.l[0].a)
+        self.assertEqual("2", row.d["key"].d)
+
+        l = df.map(lambda x: x.l).first()
+        self.assertEqual(1, len(l))
+        self.assertEqual('s', l[0].b)
+
+        d = df.map(lambda x: x.d).first()
+        self.assertEqual(1, len(d))
+        self.assertEqual(1.0, d["key"].c)
+
+        row = df.map(lambda x: x.d["key"]).first()
+        self.assertEqual(1.0, row.c)
+        self.assertEqual("2", row.d)
+
+    def test_infer_schema(self):
+        d = [Row(l=[], d={}),
+             Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")}, s="")]
+        rdd = self.sc.parallelize(d)
+        df = self.sqlCtx.inferSchema(rdd)
+        self.assertEqual([], df.map(lambda r: r.l).first())
+        self.assertEqual([None, ""], df.map(lambda r: r.s).collect())
+        df.registerTempTable("test")
+        result = self.sqlCtx.sql("SELECT l[0].a from test where d['key'].d = '2'")
+        self.assertEqual(1, result.head()[0])
+
+        df2 = self.sqlCtx.inferSchema(rdd, 1.0)
+        self.assertEqual(df.schema(), df2.schema())
+        self.assertEqual({}, df2.map(lambda r: r.d).first())
+        self.assertEqual([None, ""], df2.map(lambda r: r.s).collect())
+        df2.registerTempTable("test2")
+        result = self.sqlCtx.sql("SELECT l[0].a from test2 where d['key'].d = '2'")
+        self.assertEqual(1, result.head()[0])
+
+    def test_struct_in_map(self):
+        d = [Row(m={Row(i=1): Row(s="")})]
+        rdd = self.sc.parallelize(d)
+        df = self.sqlCtx.inferSchema(rdd)
+        k, v = df.head().m.items()[0]
+        self.assertEqual(1, k.i)
+        self.assertEqual("", v.s)
+
+    def test_convert_row_to_dict(self):
+        row = Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})
+        self.assertEqual(1, row.asDict()['l'][0].a)
+        rdd = self.sc.parallelize([row])
+        df = self.sqlCtx.inferSchema(rdd)
+        df.registerTempTable("test")
+        row = self.sqlCtx.sql("select l, d from test").head()
+        self.assertEqual(1, row.asDict()["l"][0].a)
+        self.assertEqual(1.0, row.asDict()['d']['key'].c)
+
+    def test_infer_schema_with_udt(self):
+        from pyspark.sql_tests import ExamplePoint, ExamplePointUDT
+        row = Row(label=1.0, point=ExamplePoint(1.0, 2.0))
+        rdd = self.sc.parallelize([row])
+        df = self.sqlCtx.inferSchema(rdd)
+        schema = df.schema()
+        field = [f for f in schema.fields if f.name == "point"][0]
+        self.assertEqual(type(field.dataType), ExamplePointUDT)
+        df.registerTempTable("labeled_point")
+        point = self.sqlCtx.sql("SELECT point FROM labeled_point").head().point
+        self.assertEqual(point, ExamplePoint(1.0, 2.0))
+
+    def test_apply_schema_with_udt(self):
+        from pyspark.sql_tests import ExamplePoint, ExamplePointUDT
+        row = (1.0, ExamplePoint(1.0, 2.0))
+        rdd = self.sc.parallelize([row])
+        schema = StructType([StructField("label", DoubleType(), False),
+                             StructField("point", ExamplePointUDT(), False)])
+        df = self.sqlCtx.applySchema(rdd, schema)
+        point = df.head().point
+        self.assertEquals(point, ExamplePoint(1.0, 2.0))
+
+    def test_parquet_with_udt(self):
+        from pyspark.sql_tests import ExamplePoint
+        row = Row(label=1.0, point=ExamplePoint(1.0, 2.0))
+        rdd = self.sc.parallelize([row])
+        df0 = self.sqlCtx.inferSchema(rdd)
+        output_dir = os.path.join(self.tempdir.name, "labeled_point")
+        df0.saveAsParquetFile(output_dir)
+        df1 = self.sqlCtx.parquetFile(output_dir)
+        point = df1.head().point
+        self.assertEquals(point, ExamplePoint(1.0, 2.0))
+
+    def test_column_operators(self):
+        from pyspark.sql import Column, LongType
+        ci = self.df.key
+        cs = self.df.value
+        c = ci == cs
+        self.assertTrue(isinstance((- ci - 1 - 2) % 3 * 2.5 / 3.5, Column))
+        rcc = (1 + ci), (1 - ci), (1 * ci), (1 / ci), (1 % ci)
+        self.assertTrue(all(isinstance(c, Column) for c in rcc))
+        cb = [ci == 5, ci != 0, ci > 3, ci < 4, ci >= 0, ci <= 7, ci and cs, ci or cs]
+        self.assertTrue(all(isinstance(c, Column) for c in cb))
+        cbool = (ci & ci), (ci | ci), (~ci)
+        self.assertTrue(all(isinstance(c, Column) for c in cbool))
+        css = cs.like('a'), cs.rlike('a'), cs.asc(), cs.desc(), cs.startswith('a'), cs.endswith('a')
+        self.assertTrue(all(isinstance(c, Column) for c in css))
+        self.assertTrue(isinstance(ci.cast(LongType()), Column))
+
+    def test_column_select(self):
+        df = self.df
+        self.assertEqual(self.testData, df.select("*").collect())
+        self.assertEqual(self.testData, df.select(df.key, df.value).collect())
+        self.assertEqual([Row(value='1')], df.where(df.key == 1).select(df.value).collect())
+
+    def test_aggregator(self):
+        df = self.df
+        g = df.groupBy()
+        self.assertEqual([99, 100], sorted(g.agg({'key': 'max', 'value': 'count'}).collect()[0]))
+        self.assertEqual([Row(**{"AVG(key#0)": 49.5})], g.mean().collect())
+
+        from pyspark.sql import Dsl
+        self.assertEqual((0, u'99'), tuple(g.agg(Dsl.first(df.key), Dsl.last(df.value)).first()))
+        self.assertTrue(95 < g.agg(Dsl.approxCountDistinct(df.key)).first()[0])
+        self.assertEqual(100, g.agg(Dsl.countDistinct(df.value)).first()[0])
+
+    def test_help_command(self):
+        # Regression test for SPARK-5464
+        rdd = self.sc.parallelize(['{"foo":"bar"}', '{"foo":"baz"}'])
+        df = self.sqlCtx.jsonRDD(rdd)
+        # render_doc() reproduces the help() exception without printing output
+        pydoc.render_doc(df)
+        pydoc.render_doc(df.foo)
+        pydoc.render_doc(df.take(1))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/pyspark/streaming/context.py b/python/pyspark/streaming/context.py
index 2f53fbd27b17a..b06ab650370bd 100644
--- a/python/pyspark/streaming/context.py
+++ b/python/pyspark/streaming/context.py
@@ -21,7 +21,7 @@
 from py4j.java_gateway import java_import, JavaObject
 
 from pyspark import RDD, SparkConf
-from pyspark.serializers import UTF8Deserializer, CloudPickleSerializer
+from pyspark.serializers import NoOpSerializer, UTF8Deserializer, CloudPickleSerializer
 from pyspark.context import SparkContext
 from pyspark.storagelevel import StorageLevel
 from pyspark.streaming.dstream import DStream
@@ -142,8 +142,8 @@ def getOrCreate(cls, checkpointPath, setupFunc):
         recreated from the checkpoint data. If the data does not exist, then the provided setupFunc
         will be used to create a JavaStreamingContext.
 
-        @param checkpointPath Checkpoint directory used in an earlier JavaStreamingContext program
-        @param setupFunc      Function to create a new JavaStreamingContext and setup DStreams
+        @param checkpointPath: Checkpoint directory used in an earlier JavaStreamingContext program
+        @param setupFunc:      Function to create a new JavaStreamingContext and setup DStreams
         """
         # TODO: support checkpoint in HDFS
         if not os.path.exists(checkpointPath) or not os.listdir(checkpointPath):
@@ -191,6 +191,15 @@ def awaitTermination(self, timeout=None):
         else:
             self._jssc.awaitTermination(int(timeout * 1000))
 
+    def awaitTerminationOrTimeout(self, timeout):
+        """
+        Wait for the execution to stop. Return `true` if it's stopped; or
+        throw the reported error during the execution; or `false` if the
+        waiting time elapsed before returning from the method.
+        @param timeout: time to wait in seconds
+        """
+        self._jssc.awaitTerminationOrTimeout(int(timeout * 1000))
+
     def stop(self, stopSparkContext=True, stopGraceFully=False):
         """
         Stop the execution of the streams, with option of ensuring all
@@ -251,6 +260,20 @@ def textFileStream(self, directory):
         """
         return DStream(self._jssc.textFileStream(directory), self, UTF8Deserializer())
 
+    def binaryRecordsStream(self, directory, recordLength):
+        """
+        Create an input stream that monitors a Hadoop-compatible file system
+        for new files and reads them as flat binary files with records of
+        fixed length. Files must be written to the monitored directory by "moving"
+        them from another location within the same file system.
+        File names starting with . are ignored.
+
+        @param directory:       Directory to load data from
+        @param recordLength:    Length of each record in bytes
+        """
+        return DStream(self._jssc.binaryRecordsStream(directory, recordLength), self,
+                       NoOpSerializer())
+
     def _check_serializers(self, rdds):
         # make sure they have same serializer
         if len(set(rdd._jrdd_deserializer for rdd in rdds)) > 1:
diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py
index 0826ddc56e844..2fe39392ff081 100644
--- a/python/pyspark/streaming/dstream.py
+++ b/python/pyspark/streaming/dstream.py
@@ -157,18 +157,20 @@ def foreachRDD(self, func):
         api = self._ssc._jvm.PythonDStream
         api.callForeachRDD(self._jdstream, jfunc)
 
-    def pprint(self):
+    def pprint(self, num=10):
         """
-        Print the first ten elements of each RDD generated in this DStream.
+        Print the first num elements of each RDD generated in this DStream.
+
+        @param num: the number of elements from the first will be printed.
         """
         def takeAndPrint(time, rdd):
-            taken = rdd.take(11)
+            taken = rdd.take(num + 1)
             print "-------------------------------------------"
             print "Time: %s" % time
             print "-------------------------------------------"
-            for record in taken[:10]:
+            for record in taken[:num]:
                 print record
-            if len(taken) > 10:
+            if len(taken) > num:
                 print "..."
             print
 
diff --git a/python/pyspark/streaming/kafka.py b/python/pyspark/streaming/kafka.py
new file mode 100644
index 0000000000000..19ad71f99d4d5
--- /dev/null
+++ b/python/pyspark/streaming/kafka.py
@@ -0,0 +1,83 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from py4j.java_collections import MapConverter
+from py4j.java_gateway import java_import, Py4JError
+
+from pyspark.storagelevel import StorageLevel
+from pyspark.serializers import PairDeserializer, NoOpSerializer
+from pyspark.streaming import DStream
+
+__all__ = ['KafkaUtils', 'utf8_decoder']
+
+
+def utf8_decoder(s):
+    """ Decode the unicode as UTF-8 """
+    return s and s.decode('utf-8')
+
+
+class KafkaUtils(object):
+
+    @staticmethod
+    def createStream(ssc, zkQuorum, groupId, topics, kafkaParams={},
+                     storageLevel=StorageLevel.MEMORY_AND_DISK_SER_2,
+                     keyDecoder=utf8_decoder, valueDecoder=utf8_decoder):
+        """
+        Create an input stream that pulls messages from a Kafka Broker.
+
+        :param ssc:  StreamingContext object
+        :param zkQuorum:  Zookeeper quorum (hostname:port,hostname:port,..).
+        :param groupId:  The group id for this consumer.
+        :param topics:  Dict of (topic_name -> numPartitions) to consume.
+                        Each partition is consumed in its own thread.
+        :param kafkaParams: Additional params for Kafka
+        :param storageLevel:  RDD storage level.
+        :param keyDecoder:  A function used to decode key (default is utf8_decoder)
+        :param valueDecoder:  A function used to decode value (default is utf8_decoder)
+        :return: A DStream object
+        """
+        java_import(ssc._jvm, "org.apache.spark.streaming.kafka.KafkaUtils")
+
+        kafkaParams.update({
+            "zookeeper.connect": zkQuorum,
+            "group.id": groupId,
+            "zookeeper.connection.timeout.ms": "10000",
+        })
+        if not isinstance(topics, dict):
+            raise TypeError("topics should be dict")
+        jtopics = MapConverter().convert(topics, ssc.sparkContext._gateway._gateway_client)
+        jparam = MapConverter().convert(kafkaParams, ssc.sparkContext._gateway._gateway_client)
+        jlevel = ssc._sc._getJavaStorageLevel(storageLevel)
+
+        def getClassByName(name):
+            return ssc._jvm.org.apache.spark.util.Utils.classForName(name)
+
+        try:
+            array = getClassByName("[B")
+            decoder = getClassByName("kafka.serializer.DefaultDecoder")
+            jstream = ssc._jvm.KafkaUtils.createStream(ssc._jssc, array, array, decoder, decoder,
+                                                       jparam, jtopics, jlevel)
+        except Py4JError, e:
+            # TODO: use --jar once it also work on driver
+            if not e.message or 'call a package' in e.message:
+                print "No kafka package, please put the assembly jar into classpath:"
+                print " $ bin/spark-submit --driver-class-path external/kafka-assembly/target/" + \
+                      "scala-*/spark-streaming-kafka-assembly-*.jar"
+            raise e
+        ser = PairDeserializer(NoOpSerializer(), NoOpSerializer())
+        stream = DStream(jstream, ssc, ser)
+        return stream.map(lambda (k, v): (keyDecoder(k), valueDecoder(v)))
diff --git a/python/pyspark/streaming/tests.py b/python/pyspark/streaming/tests.py
index a8d876d0fa3b3..608f8e26473a6 100644
--- a/python/pyspark/streaming/tests.py
+++ b/python/pyspark/streaming/tests.py
@@ -21,6 +21,7 @@
 import operator
 import unittest
 import tempfile
+import struct
 
 from pyspark.context import SparkConf, SparkContext, RDD
 from pyspark.streaming.context import StreamingContext
@@ -455,6 +456,20 @@ def test_text_file_stream(self):
         self.wait_for(result, 2)
         self.assertEqual([range(10), range(10)], result)
 
+    def test_binary_records_stream(self):
+        d = tempfile.mkdtemp()
+        self.ssc = StreamingContext(self.sc, self.duration)
+        dstream = self.ssc.binaryRecordsStream(d, 10).map(
+            lambda v: struct.unpack("10b", str(v)))
+        result = self._collect(dstream, 2, block=False)
+        self.ssc.start()
+        for name in ('a', 'b'):
+            time.sleep(1)
+            with open(os.path.join(d, name), "wb") as f:
+                f.write(bytearray(range(10)))
+        self.wait_for(result, 2)
+        self.assertEqual([range(10), range(10)], map(lambda v: list(v[0]), result))
+
     def test_union(self):
         input = [range(i + 1) for i in range(3)]
         dstream = self.ssc.queueStream(input)
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 1a8e4150e63c3..b5e28c498040b 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -31,7 +31,8 @@
 import time
 import zipfile
 import random
-from platform import python_implementation
+import threading
+import hashlib
 
 if sys.version_info[:2] <= (2, 6):
     try:
@@ -45,12 +46,13 @@
 
 from pyspark.conf import SparkConf
 from pyspark.context import SparkContext
+from pyspark.rdd import RDD
 from pyspark.files import SparkFiles
 from pyspark.serializers import read_int, BatchedSerializer, MarshalSerializer, PickleSerializer, \
-    CloudPickleSerializer
+    CloudPickleSerializer, CompressedSerializer, UTF8Deserializer, NoOpSerializer
 from pyspark.shuffle import Aggregator, InMemoryMerger, ExternalMerger, ExternalSorter
-from pyspark.sql import SQLContext, IntegerType, Row
 from pyspark import shuffle
+from pyspark.profiler import BasicProfiler
 
 _have_scipy = False
 _have_numpy = False
@@ -235,13 +237,24 @@ def foo():
         self.assertTrue("exit" in foo.func_code.co_names)
         ser.dumps(foo)
 
+    def test_compressed_serializer(self):
+        ser = CompressedSerializer(PickleSerializer())
+        from StringIO import StringIO
+        io = StringIO()
+        ser.dump_stream(["abc", u"123", range(5)], io)
+        io.seek(0)
+        self.assertEqual(["abc", u"123", range(5)], list(ser.load_stream(io)))
+        ser.dump_stream(range(1000), io)
+        io.seek(0)
+        self.assertEqual(["abc", u"123", range(5)] + range(1000), list(ser.load_stream(io)))
+
 
 class PySparkTestCase(unittest.TestCase):
 
     def setUp(self):
         self._old_sys_path = list(sys.path)
         class_name = self.__class__.__name__
-        self.sc = SparkContext('local[4]', class_name, batchSize=2)
+        self.sc = SparkContext('local[4]', class_name)
 
     def tearDown(self):
         self.sc.stop()
@@ -252,7 +265,7 @@ class ReusedPySparkTestCase(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls):
-        cls.sc = SparkContext('local[4]', cls.__name__, batchSize=2)
+        cls.sc = SparkContext('local[4]', cls.__name__)
 
     @classmethod
     def tearDownClass(cls):
@@ -439,7 +452,7 @@ def test_sampling_default_seed(self):
         subset = data.takeSample(False, 10)
         self.assertEqual(len(subset), 10)
 
-    def testAggregateByKey(self):
+    def test_aggregate_by_key(self):
         data = self.sc.parallelize([(1, 1), (1, 1), (3, 2), (5, 1), (5, 3)], 2)
 
         def seqOp(x, y):
@@ -477,6 +490,32 @@ def test_large_broadcast(self):
         m = self.sc.parallelize(range(1), 1).map(lambda x: len(bdata.value)).sum()
         self.assertEquals(N, m)
 
+    def test_multiple_broadcasts(self):
+        N = 1 << 21
+        b1 = self.sc.broadcast(set(range(N)))  # multiple blocks in JVM
+        r = range(1 << 15)
+        random.shuffle(r)
+        s = str(r)
+        checksum = hashlib.md5(s).hexdigest()
+        b2 = self.sc.broadcast(s)
+        r = list(set(self.sc.parallelize(range(10), 10).map(
+            lambda x: (len(b1.value), hashlib.md5(b2.value).hexdigest())).collect()))
+        self.assertEqual(1, len(r))
+        size, csum = r[0]
+        self.assertEqual(N, size)
+        self.assertEqual(checksum, csum)
+
+        random.shuffle(r)
+        s = str(r)
+        checksum = hashlib.md5(s).hexdigest()
+        b2 = self.sc.broadcast(s)
+        r = list(set(self.sc.parallelize(range(10), 10).map(
+            lambda x: (len(b1.value), hashlib.md5(b2.value).hexdigest())).collect()))
+        self.assertEqual(1, len(r))
+        size, csum = r[0]
+        self.assertEqual(N, size)
+        self.assertEqual(checksum, csum)
+
     def test_large_closure(self):
         N = 1000000
         data = [float(i) for i in xrange(N)]
@@ -494,6 +533,15 @@ def test_zip_with_different_serializers(self):
         a = a._reserialize(BatchedSerializer(PickleSerializer(), 2))
         b = b._reserialize(MarshalSerializer())
         self.assertEqual(a.zip(b).collect(), [(0, 100), (1, 101), (2, 102), (3, 103), (4, 104)])
+        # regression test for SPARK-4841
+        path = os.path.join(SPARK_HOME, "python/test_support/hello.txt")
+        t = self.sc.textFile(path)
+        cnt = t.count()
+        self.assertEqual(cnt, t.zip(t).count())
+        rdd = t.map(str)
+        self.assertEqual(cnt, t.zip(rdd).count())
+        # regression test for bug in _reserializer()
+        self.assertEqual(cnt, t.zip(rdd).count())
 
     def test_zip_with_different_number_of_items(self):
         a = self.sc.parallelize(range(5), 2)
@@ -648,6 +696,50 @@ def test_distinct(self):
         self.assertEquals(result.getNumPartitions(), 5)
         self.assertEquals(result.count(), 3)
 
+    def test_sort_on_empty_rdd(self):
+        self.assertEqual([], self.sc.parallelize(zip([], [])).sortByKey().collect())
+
+    def test_sample(self):
+        rdd = self.sc.parallelize(range(0, 100), 4)
+        wo = rdd.sample(False, 0.1, 2).collect()
+        wo_dup = rdd.sample(False, 0.1, 2).collect()
+        self.assertSetEqual(set(wo), set(wo_dup))
+        wr = rdd.sample(True, 0.2, 5).collect()
+        wr_dup = rdd.sample(True, 0.2, 5).collect()
+        self.assertSetEqual(set(wr), set(wr_dup))
+        wo_s10 = rdd.sample(False, 0.3, 10).collect()
+        wo_s20 = rdd.sample(False, 0.3, 20).collect()
+        self.assertNotEqual(set(wo_s10), set(wo_s20))
+        wr_s11 = rdd.sample(True, 0.4, 11).collect()
+        wr_s21 = rdd.sample(True, 0.4, 21).collect()
+        self.assertNotEqual(set(wr_s11), set(wr_s21))
+
+    def test_null_in_rdd(self):
+        jrdd = self.sc._jvm.PythonUtils.generateRDDWithNull(self.sc._jsc)
+        rdd = RDD(jrdd, self.sc, UTF8Deserializer())
+        self.assertEqual([u"a", None, u"b"], rdd.collect())
+        rdd = RDD(jrdd, self.sc, NoOpSerializer())
+        self.assertEqual(["a", None, "b"], rdd.collect())
+
+    def test_multiple_python_java_RDD_conversions(self):
+        # Regression test for SPARK-5361
+        data = [
+            (u'1', {u'director': u'David Lean'}),
+            (u'2', {u'director': u'Andrew Dominik'})
+        ]
+        from pyspark.rdd import RDD
+        data_rdd = self.sc.parallelize(data)
+        data_java_rdd = data_rdd._to_java_object_rdd()
+        data_python_rdd = self.sc._jvm.SerDe.javaToPython(data_java_rdd)
+        converted_rdd = RDD(data_python_rdd, self.sc)
+        self.assertEqual(2, converted_rdd.count())
+
+        # conversion between python and java RDD threw exceptions
+        data_java_rdd = converted_rdd._to_java_object_rdd()
+        data_python_rdd = self.sc._jvm.SerDe.javaToPython(data_java_rdd)
+        converted_rdd = RDD(data_python_rdd, self.sc)
+        self.assertEqual(2, converted_rdd.count())
+
 
 class ProfilerTests(PySparkTestCase):
 
@@ -655,19 +747,15 @@ def setUp(self):
         self._old_sys_path = list(sys.path)
         class_name = self.__class__.__name__
         conf = SparkConf().set("spark.python.profile", "true")
-        self.sc = SparkContext('local[4]', class_name, batchSize=2, conf=conf)
+        self.sc = SparkContext('local[4]', class_name, conf=conf)
 
     def test_profiler(self):
+        self.do_computation()
 
-        def heavy_foo(x):
-            for i in range(1 << 20):
-                x = 1
-        rdd = self.sc.parallelize(range(100))
-        rdd.foreach(heavy_foo)
-        profiles = self.sc._profile_stats
-        self.assertEqual(1, len(profiles))
-        id, acc, _ = profiles[0]
-        stats = acc.value
+        profilers = self.sc.profiler_collector.profilers
+        self.assertEqual(1, len(profilers))
+        id, profiler, _ = profilers[0]
+        stats = profiler.stats()
         self.assertTrue(stats is not None)
         width, stat_list = stats.get_print_list([])
         func_names = [func_name for fname, n, func_name in stat_list]
@@ -678,98 +766,30 @@ def heavy_foo(x):
         self.sc.dump_profiles(d)
         self.assertTrue("rdd_%d.pstats" % id in os.listdir(d))
 
+    def test_custom_profiler(self):
+        class TestCustomProfiler(BasicProfiler):
+            def show(self, id):
+                self.result = "Custom formatting"
 
-class SQLTests(ReusedPySparkTestCase):
+        self.sc.profiler_collector.profiler_cls = TestCustomProfiler
 
-    def setUp(self):
-        self.sqlCtx = SQLContext(self.sc)
-
-    def test_udf(self):
-        self.sqlCtx.registerFunction("twoArgs", lambda x, y: len(x) + y, IntegerType())
-        [row] = self.sqlCtx.sql("SELECT twoArgs('test', 1)").collect()
-        self.assertEqual(row[0], 5)
-
-    def test_udf2(self):
-        self.sqlCtx.registerFunction("strlen", lambda string: len(string))
-        self.sqlCtx.inferSchema(self.sc.parallelize([Row(a="test")])).registerTempTable("test")
-        [res] = self.sqlCtx.sql("SELECT strlen(a) FROM test WHERE strlen(a) > 1").collect()
-        self.assertEqual(u"4", res[0])
-
-    def test_broadcast_in_udf(self):
-        bar = {"a": "aa", "b": "bb", "c": "abc"}
-        foo = self.sc.broadcast(bar)
-        self.sqlCtx.registerFunction("MYUDF", lambda x: foo.value[x] if x else '')
-        [res] = self.sqlCtx.sql("SELECT MYUDF('c')").collect()
-        self.assertEqual("abc", res[0])
-        [res] = self.sqlCtx.sql("SELECT MYUDF('')").collect()
-        self.assertEqual("", res[0])
-
-    def test_basic_functions(self):
-        rdd = self.sc.parallelize(['{"foo":"bar"}', '{"foo":"baz"}'])
-        srdd = self.sqlCtx.jsonRDD(rdd)
-        srdd.count()
-        srdd.collect()
-        srdd.schemaString()
-        srdd.schema()
-
-        # cache and checkpoint
-        self.assertFalse(srdd.is_cached)
-        srdd.persist()
-        srdd.unpersist()
-        srdd.cache()
-        self.assertTrue(srdd.is_cached)
-        self.assertFalse(srdd.isCheckpointed())
-        self.assertEqual(None, srdd.getCheckpointFile())
-
-        srdd = srdd.coalesce(2, True)
-        srdd = srdd.repartition(3)
-        srdd = srdd.distinct()
-        srdd.intersection(srdd)
-        self.assertEqual(2, srdd.count())
-
-        srdd.registerTempTable("temp")
-        srdd = self.sqlCtx.sql("select foo from temp")
-        srdd.count()
-        srdd.collect()
+        self.do_computation()
 
-    def test_distinct(self):
-        rdd = self.sc.parallelize(['{"a": 1}', '{"b": 2}', '{"c": 3}']*10, 10)
-        srdd = self.sqlCtx.jsonRDD(rdd)
-        self.assertEquals(srdd.getNumPartitions(), 10)
-        self.assertEquals(srdd.distinct().count(), 3)
-        result = srdd.distinct(5)
-        self.assertEquals(result.getNumPartitions(), 5)
-        self.assertEquals(result.count(), 3)
+        profilers = self.sc.profiler_collector.profilers
+        self.assertEqual(1, len(profilers))
+        _, profiler, _ = profilers[0]
+        self.assertTrue(isinstance(profiler, TestCustomProfiler))
 
-    def test_apply_schema_to_row(self):
-        srdd = self.sqlCtx.jsonRDD(self.sc.parallelize(["""{"a":2}"""]))
-        srdd2 = self.sqlCtx.applySchema(srdd.map(lambda x: x), srdd.schema())
-        self.assertEqual(srdd.collect(), srdd2.collect())
-
-        rdd = self.sc.parallelize(range(10)).map(lambda x: Row(a=x))
-        srdd3 = self.sqlCtx.applySchema(rdd, srdd.schema())
-        self.assertEqual(10, srdd3.count())
-
-    def test_serialize_nested_array_and_map(self):
-        d = [Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})]
-        rdd = self.sc.parallelize(d)
-        srdd = self.sqlCtx.inferSchema(rdd)
-        row = srdd.first()
-        self.assertEqual(1, len(row.l))
-        self.assertEqual(1, row.l[0].a)
-        self.assertEqual("2", row.d["key"].d)
-
-        l = srdd.map(lambda x: x.l).first()
-        self.assertEqual(1, len(l))
-        self.assertEqual('s', l[0].b)
+        self.sc.show_profiles()
+        self.assertEqual("Custom formatting", profiler.result)
 
-        d = srdd.map(lambda x: x.d).first()
-        self.assertEqual(1, len(d))
-        self.assertEqual(1.0, d["key"].c)
+    def do_computation(self):
+        def heavy_foo(x):
+            for i in range(1 << 20):
+                x = 1
 
-        row = srdd.map(lambda x: x.d["key"]).first()
-        self.assertEqual(1.0, row.c)
-        self.assertEqual("2", row.d)
+        rdd = self.sc.parallelize(range(100))
+        rdd.foreach(heavy_foo)
 
 
 class InputFormatTests(ReusedPySparkTestCase):
@@ -868,16 +888,19 @@ def test_sequencefiles(self):
         clazz = sorted(self.sc.sequenceFile(basepath + "/sftestdata/sfclass/",
                                             "org.apache.hadoop.io.Text",
                                             "org.apache.spark.api.python.TestWritable").collect())
-        ec = (u'1',
-              {u'__class__': u'org.apache.spark.api.python.TestWritable',
-               u'double': 54.0, u'int': 123, u'str': u'test1'})
-        self.assertEqual(clazz[0], ec)
+        cname = u'org.apache.spark.api.python.TestWritable'
+        ec = [(u'1', {u'__class__': cname, u'double': 1.0, u'int': 1, u'str': u'test1'}),
+              (u'2', {u'__class__': cname, u'double': 2.3, u'int': 2, u'str': u'test2'}),
+              (u'3', {u'__class__': cname, u'double': 3.1, u'int': 3, u'str': u'test3'}),
+              (u'4', {u'__class__': cname, u'double': 4.2, u'int': 4, u'str': u'test4'}),
+              (u'5', {u'__class__': cname, u'double': 5.5, u'int': 5, u'str': u'test56'})]
+        self.assertEqual(clazz, ec)
 
         unbatched_clazz = sorted(self.sc.sequenceFile(basepath + "/sftestdata/sfclass/",
                                                       "org.apache.hadoop.io.Text",
                                                       "org.apache.spark.api.python.TestWritable",
-                                                      batchSize=1).collect())
-        self.assertEqual(unbatched_clazz[0], ec)
+                                                      ).collect())
+        self.assertEqual(unbatched_clazz, ec)
 
     def test_oldhadoop(self):
         basepath = self.tempdir.name
@@ -963,6 +986,25 @@ def test_converters(self):
               (u'\x03', [2.0])]
         self.assertEqual(maps, em)
 
+    def test_binary_files(self):
+        path = os.path.join(self.tempdir.name, "binaryfiles")
+        os.mkdir(path)
+        data = "short binary data"
+        with open(os.path.join(path, "part-0000"), 'w') as f:
+            f.write(data)
+        [(p, d)] = self.sc.binaryFiles(path).collect()
+        self.assertTrue(p.endswith("part-0000"))
+        self.assertEqual(d, data)
+
+    def test_binary_records(self):
+        path = os.path.join(self.tempdir.name, "binaryrecords")
+        os.mkdir(path)
+        with open(os.path.join(path, "part-0000"), 'w') as f:
+            for i in range(100):
+                f.write('%04d' % i)
+        result = self.sc.binaryRecords(path, 4).map(int).collect()
+        self.assertEqual(range(100), result)
+
 
 class OutputFormatTests(ReusedPySparkTestCase):
 
@@ -1197,51 +1239,6 @@ def test_reserialization(self):
         result5 = sorted(self.sc.sequenceFile(basepath + "/reserialize/newdataset").collect())
         self.assertEqual(result5, data)
 
-    def test_unbatched_save_and_read(self):
-        basepath = self.tempdir.name
-        ei = [(1, u'aa'), (1, u'aa'), (2, u'aa'), (2, u'bb'), (2, u'bb'), (3, u'cc')]
-        self.sc.parallelize(ei, len(ei)).saveAsSequenceFile(
-            basepath + "/unbatched/")
-
-        unbatched_sequence = sorted(self.sc.sequenceFile(
-            basepath + "/unbatched/",
-            batchSize=1).collect())
-        self.assertEqual(unbatched_sequence, ei)
-
-        unbatched_hadoopFile = sorted(self.sc.hadoopFile(
-            basepath + "/unbatched/",
-            "org.apache.hadoop.mapred.SequenceFileInputFormat",
-            "org.apache.hadoop.io.IntWritable",
-            "org.apache.hadoop.io.Text",
-            batchSize=1).collect())
-        self.assertEqual(unbatched_hadoopFile, ei)
-
-        unbatched_newAPIHadoopFile = sorted(self.sc.newAPIHadoopFile(
-            basepath + "/unbatched/",
-            "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
-            "org.apache.hadoop.io.IntWritable",
-            "org.apache.hadoop.io.Text",
-            batchSize=1).collect())
-        self.assertEqual(unbatched_newAPIHadoopFile, ei)
-
-        oldconf = {"mapred.input.dir": basepath + "/unbatched/"}
-        unbatched_hadoopRDD = sorted(self.sc.hadoopRDD(
-            "org.apache.hadoop.mapred.SequenceFileInputFormat",
-            "org.apache.hadoop.io.IntWritable",
-            "org.apache.hadoop.io.Text",
-            conf=oldconf,
-            batchSize=1).collect())
-        self.assertEqual(unbatched_hadoopRDD, ei)
-
-        newconf = {"mapred.input.dir": basepath + "/unbatched/"}
-        unbatched_newAPIHadoopRDD = sorted(self.sc.newAPIHadoopRDD(
-            "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
-            "org.apache.hadoop.io.IntWritable",
-            "org.apache.hadoop.io.Text",
-            conf=newconf,
-            batchSize=1).collect())
-        self.assertEqual(unbatched_newAPIHadoopRDD, ei)
-
     def test_malformed_RDD(self):
         basepath = self.tempdir.name
         # non-batch-serialized RDD[[(K, V)]] should be rejected
@@ -1380,6 +1377,23 @@ def test_accumulator_when_reuse_worker(self):
         self.assertEqual(sum(range(100)), acc2.value)
         self.assertEqual(sum(range(100)), acc1.value)
 
+    def test_reuse_worker_after_take(self):
+        rdd = self.sc.parallelize(range(100000), 1)
+        self.assertEqual(0, rdd.first())
+
+        def count():
+            try:
+                rdd.count()
+            except Exception:
+                pass
+
+        t = threading.Thread(target=count)
+        t.daemon = True
+        t.start()
+        t.join(5)
+        self.assertTrue(not t.isAlive())
+        self.assertEqual(100000, rdd.count())
+
 
 class SparkSubmitTests(unittest.TestCase):
 
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 8257dddfee1c3..8a93c320ec5d3 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -23,15 +23,12 @@
 import time
 import socket
 import traceback
-import cProfile
-import pstats
 
 from pyspark.accumulators import _accumulatorRegistry
 from pyspark.broadcast import Broadcast, _broadcastRegistry
 from pyspark.files import SparkFiles
 from pyspark.serializers import write_with_length, write_int, read_long, \
-    write_long, read_int, SpecialLengths, UTF8Deserializer, PickleSerializer, \
-    CompressedSerializer
+    write_long, read_int, SpecialLengths, UTF8Deserializer, PickleSerializer
 from pyspark import shuffle
 
 pickleSer = PickleSerializer()
@@ -57,7 +54,7 @@ def main(infile, outfile):
         boot_time = time.time()
         split_index = read_int(infile)
         if split_index == -1:  # for unit tests
-            return
+            exit(-1)
 
         # initialize global state
         shuffle.MemoryBytesSpilled = 0
@@ -78,12 +75,11 @@ def main(infile, outfile):
 
         # fetch names and values of broadcast variables
         num_broadcast_variables = read_int(infile)
-        ser = CompressedSerializer(pickleSer)
         for _ in range(num_broadcast_variables):
             bid = read_long(infile)
             if bid >= 0:
-                value = ser._read_with_length(infile)
-                _broadcastRegistry[bid] = Broadcast(bid, value)
+                path = utf8_deserializer.loads(infile)
+                _broadcastRegistry[bid] = Broadcast(path=path)
             else:
                 bid = - bid - 1
                 _broadcastRegistry.pop(bid)
@@ -92,26 +88,21 @@ def main(infile, outfile):
         command = pickleSer._read_with_length(infile)
         if isinstance(command, Broadcast):
             command = pickleSer.loads(command.value)
-        (func, stats, deserializer, serializer) = command
+        (func, profiler, deserializer, serializer) = command
         init_time = time.time()
 
         def process():
             iterator = deserializer.load_stream(infile)
             serializer.dump_stream(func(split_index, iterator), outfile)
 
-        if stats:
-            p = cProfile.Profile()
-            p.runcall(process)
-            st = pstats.Stats(p)
-            st.stream = None  # make it picklable
-            stats.add(st.strip_dirs())
+        if profiler:
+            profiler.profile(process)
         else:
             process()
     except Exception:
         try:
             write_int(SpecialLengths.PYTHON_EXCEPTION_THROWN, outfile)
             write_with_length(traceback.format_exc(), outfile)
-            outfile.flush()
         except IOError:
             # JVM close the socket
             pass
@@ -131,6 +122,14 @@ def process():
     for (aid, accum) in _accumulatorRegistry.items():
         pickleSer._write_with_length((aid, accum._value), outfile)
 
+    # check end of stream
+    if read_int(infile) == SpecialLengths.END_OF_STREAM:
+        write_int(SpecialLengths.END_OF_STREAM, outfile)
+    else:
+        # write a different value to tell JVM to not reuse this worker
+        write_int(SpecialLengths.END_OF_DATA_SECTION, outfile)
+        exit(-1)
+
 
 if __name__ == '__main__':
     # Read a local port to connect to from stdin
diff --git a/python/run-tests b/python/run-tests
index 80acd002ab7eb..649a2c44d187b 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -41,7 +41,7 @@ function run_test() {
 
     # Fail and exit on the first test failure.
     if [[ $FAILED != 0 ]]; then
-        cat unit-tests.log | grep -v "^[0-9][0-9]*" # filter all lines starting with a number.
+        cat $LOG_FILE | grep -v "^[0-9][0-9]*" # filter all lines starting with a number.
         echo -en "\033[31m"  # Red
         echo "Had test failures; see logs."
         echo -en "\033[0m"  # No color
@@ -56,7 +56,8 @@ function run_core_tests() {
     run_test "pyspark/conf.py"
     PYSPARK_DOC_TEST=1 run_test "pyspark/broadcast.py"
     PYSPARK_DOC_TEST=1 run_test "pyspark/accumulators.py"
-    PYSPARK_DOC_TEST=1 run_test "pyspark/serializers.py"
+    run_test "pyspark/serializers.py"
+    run_test "pyspark/profiler.py" 
     run_test "pyspark/shuffle.py"
     run_test "pyspark/tests.py"
 }
@@ -64,6 +65,7 @@ function run_core_tests() {
 function run_sql_tests() {
     echo "Run sql tests ..."
     run_test "pyspark/sql.py"
+    run_test "pyspark/sql_tests.py"
 }
 
 function run_mllib_tests() {
@@ -72,22 +74,29 @@ function run_mllib_tests() {
     run_test "pyspark/mllib/clustering.py"
     run_test "pyspark/mllib/feature.py"
     run_test "pyspark/mllib/linalg.py"
-    run_test "pyspark/mllib/random.py"
+    run_test "pyspark/mllib/rand.py"
     run_test "pyspark/mllib/recommendation.py"
     run_test "pyspark/mllib/regression.py"
-    run_test "pyspark/mllib/stat.py"
+    run_test "pyspark/mllib/stat/_statistics.py"
     run_test "pyspark/mllib/tree.py"
     run_test "pyspark/mllib/util.py"
     run_test "pyspark/mllib/tests.py"
 }
 
+function run_ml_tests() {
+    echo "Run ml tests ..."
+    run_test "pyspark/ml/feature.py"
+    run_test "pyspark/ml/classification.py"
+    run_test "pyspark/ml/tests.py"
+}
+
 function run_streaming_tests() {
     echo "Run streaming tests ..."
     run_test "pyspark/streaming/util.py"
     run_test "pyspark/streaming/tests.py"
 }
 
-echo "Running PySpark tests. Output is in python/unit-tests.log."
+echo "Running PySpark tests. Output is in python/$LOG_FILE."
 
 export PYSPARK_PYTHON="python"
 
@@ -102,6 +111,7 @@ $PYSPARK_PYTHON --version
 run_core_tests
 run_sql_tests
 run_mllib_tests
+run_ml_tests
 run_streaming_tests
 
 # Try to test with PyPy
diff --git a/repl/pom.xml b/repl/pom.xml
index af528c8914335..bd39b90fd8714 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -35,9 +35,16 @@
     <sbt.project.name>repl</sbt.project.name>
     <deb.install.path>/usr/share/spark</deb.install.path>
     <deb.user>root</deb.user>
+    <extra.source.dir>scala-2.10/src/main/scala</extra.source.dir>
+    <extra.testsource.dir>scala-2.10/src/test/scala</extra.testsource.dir>
   </properties>
 
   <dependencies>
+    <dependency>
+      <groupId>${jline.groupid}</groupId>
+      <artifactId>jline</artifactId>
+      <version>${jline.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -61,10 +68,6 @@
       <version>${project.version}</version>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-server</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.scala-lang</groupId>
       <artifactId>scala-compiler</artifactId>
@@ -75,53 +78,89 @@
       <artifactId>scala-reflect</artifactId>
       <version>${scala.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>jline</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
     <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>jul-to-slf4j</artifactId>
     </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
+
+    <!-- Explicit listing of transitive deps that are shaded. Otherwise, odd compiler crashes. -->
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-server</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-plus</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-util</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-http</artifactId>
+    </dependency>
+    <!-- End of shaded deps. -->
+
+    <dependency>
+      <groupId>org.scala-lang</groupId>
+      <artifactId>scala-library</artifactId>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
+      <!-- Include a source dir depending on the Scala version -->
       <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-deploy-plugin</artifactId>
-        <configuration>
-          <skip>true</skip>
-        </configuration>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-install-plugin</artifactId>
-        <configuration>
-          <skip>true</skip>
-        </configuration>
-      </plugin>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-        <configuration>
-          <environmentVariables>
-            <SPARK_HOME>${basedir}/..</SPARK_HOME>
-          </environmentVariables>
-        </configuration>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-scala-sources</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>add-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>src/main/scala</source>
+                <source>${extra.source.dir}</source>
+              </sources>
+            </configuration>
+          </execution>
+          <execution>
+            <id>add-scala-test-sources</id>
+            <phase>generate-test-sources</phase>
+            <goals>
+              <goal>add-test-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>src/test/scala</source>
+                <source>${extra.testsource.dir}</source>
+              </sources>
+            </configuration>
+          </execution>
+        </executions>
       </plugin>
     </plugins>
   </build>
+  <profiles>
+    <profile>
+      <id>scala-2.11</id>
+      <activation>
+        <property><name>scala-2.11</name></property>
+      </activation>
+      <properties>
+        <extra.source.dir>scala-2.11/src/main/scala</extra.source.dir>
+        <extra.testsource.dir>scala-2.11/src/test/scala</extra.testsource.dir>
+      </properties>
+    </profile>
+  </profiles>
 </project>
diff --git a/repl/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/Main.scala
similarity index 100%
rename from repl/src/main/scala/org/apache/spark/repl/Main.scala
rename to repl/scala-2.10/src/main/scala/org/apache/spark/repl/Main.scala
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala
similarity index 83%
rename from repl/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala
rename to repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala
index 05816941b54b3..6480e2d24e044 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala
@@ -19,14 +19,21 @@ package org.apache.spark.repl
 
 import scala.tools.nsc.{Settings, CompilerCommand}
 import scala.Predef._
+import org.apache.spark.annotation.DeveloperApi
 
 /**
  * Command class enabling Spark-specific command line options (provided by
  * <i>org.apache.spark.repl.SparkRunnerSettings</i>).
+ *
+ * @example new SparkCommandLine(Nil).settings
+ *
+ * @param args The list of command line arguments
+ * @param settings The underlying settings to associate with this set of
+ *                 command-line options
  */
+@DeveloperApi
 class SparkCommandLine(args: List[String], override val settings: Settings)
     extends CompilerCommand(args, settings) {
-
   def this(args: List[String], error: String => Unit) {
     this(args, new SparkRunnerSettings(error))
   }
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
new file mode 100644
index 0000000000000..5fb378112ef92
--- /dev/null
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
@@ -0,0 +1,114 @@
+// scalastyle:off
+
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author  Paul Phillips
+ */
+
+package org.apache.spark.repl
+
+import scala.tools.nsc._
+import scala.tools.nsc.interpreter._
+
+import scala.reflect.internal.util.BatchSourceFile
+import scala.tools.nsc.ast.parser.Tokens.EOF
+
+import org.apache.spark.Logging
+
+private[repl] trait SparkExprTyper extends Logging {
+  val repl: SparkIMain
+
+  import repl._
+  import global.{ reporter => _, Import => _, _ }
+  import definitions._
+  import syntaxAnalyzer.{ UnitParser, UnitScanner, token2name }
+  import naming.freshInternalVarName
+
+  object codeParser extends { val global: repl.global.type = repl.global } with CodeHandlers[Tree] {
+    def applyRule[T](code: String, rule: UnitParser => T): T = {
+      reporter.reset()
+      val scanner = newUnitParser(code)
+      val result  = rule(scanner)
+
+      if (!reporter.hasErrors)
+        scanner.accept(EOF)
+
+      result
+    }
+
+    def defns(code: String) = stmts(code) collect { case x: DefTree => x }
+    def expr(code: String)  = applyRule(code, _.expr())
+    def stmts(code: String) = applyRule(code, _.templateStats())
+    def stmt(code: String)  = stmts(code).last  // guaranteed nonempty
+  }
+
+  /** Parse a line into a sequence of trees. Returns None if the input is incomplete. */
+  def parse(line: String): Option[List[Tree]] = debugging(s"""parse("$line")""")  {
+    var isIncomplete = false
+    reporter.withIncompleteHandler((_, _) => isIncomplete = true) {
+      val trees = codeParser.stmts(line)
+      if (reporter.hasErrors) {
+        Some(Nil)
+      } else if (isIncomplete) {
+        None
+      } else {
+        Some(trees)
+      }
+    }
+  }
+  // def parsesAsExpr(line: String) = {
+  //   import codeParser._
+  //   (opt expr line).isDefined
+  // }
+
+  def symbolOfLine(code: String): Symbol = {
+    def asExpr(): Symbol = {
+      val name  = freshInternalVarName()
+      // Typing it with a lazy val would give us the right type, but runs
+      // into compiler bugs with things like existentials, so we compile it
+      // behind a def and strip the NullaryMethodType which wraps the expr.
+      val line = "def " + name + " = {\n" + code + "\n}"
+
+      interpretSynthetic(line) match {
+        case IR.Success =>
+          val sym0 = symbolOfTerm(name)
+          // drop NullaryMethodType
+          val sym = sym0.cloneSymbol setInfo afterTyper(sym0.info.finalResultType)
+          if (sym.info.typeSymbol eq UnitClass) NoSymbol else sym
+        case _          => NoSymbol
+      }
+    }
+    def asDefn(): Symbol = {
+      val old = repl.definedSymbolList.toSet
+
+      interpretSynthetic(code) match {
+        case IR.Success =>
+          repl.definedSymbolList filterNot old match {
+            case Nil        => NoSymbol
+            case sym :: Nil => sym
+            case syms       => NoSymbol.newOverloaded(NoPrefix, syms)
+          }
+        case _ => NoSymbol
+      }
+    }
+    beQuietDuring(asExpr()) orElse beQuietDuring(asDefn())
+  }
+
+  private var typeOfExpressionDepth = 0
+  def typeOfExpression(expr: String, silent: Boolean = true): Type = {
+    if (typeOfExpressionDepth > 2) {
+      logDebug("Terminating typeOfExpression recursion for expression: " + expr)
+      return NoType
+    }
+    typeOfExpressionDepth += 1
+    // Don't presently have a good way to suppress undesirable success output
+    // while letting errors through, so it is first trying it silently: if there
+    // is an error, and errors are desired, then it re-evaluates non-silently
+    // to induce the error message.
+    try beSilentDuring(symbolOfLine(expr).tpe) match {
+      case NoType if !silent => symbolOfLine(expr).tpe // generate error
+      case tpe               => tpe
+    }
+    finally typeOfExpressionDepth -= 1
+  }
+}
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkHelper.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkHelper.scala
new file mode 100644
index 0000000000000..955be17a73b85
--- /dev/null
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkHelper.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package scala.tools.nsc
+
+import org.apache.spark.annotation.DeveloperApi
+
+// NOTE: Forced to be public (and in scala.tools.nsc package) to access the
+//       settings "explicitParentLoader" method
+
+/**
+ * Provides exposure for the explicitParentLoader method on settings instances.
+ */
+@DeveloperApi
+object SparkHelper {
+  /**
+   * Retrieves the explicit parent loader for the provided settings.
+   *
+   * @param settings The settings whose explicit parent loader to retrieve
+   *
+   * @return The Optional classloader representing the explicit parent loader
+   */
+  @DeveloperApi
+  def explicitParentLoader(settings: Settings) = settings.explicitParentLoader
+}
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
new file mode 100644
index 0000000000000..72c1a989999b4
--- /dev/null
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -0,0 +1,1119 @@
+// scalastyle:off
+
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author Alexander Spoon
+ */
+
+package org.apache.spark.repl
+
+
+import java.net.URL
+
+import org.apache.spark.annotation.DeveloperApi
+
+import scala.reflect.io.AbstractFile
+import scala.tools.nsc._
+import scala.tools.nsc.backend.JavaPlatform
+import scala.tools.nsc.interpreter._
+
+import scala.tools.nsc.interpreter.{Results => IR}
+import Predef.{println => _, _}
+import java.io.{BufferedReader, FileReader}
+import java.net.URI
+import java.util.concurrent.locks.ReentrantLock
+import scala.sys.process.Process
+import scala.tools.nsc.interpreter.session._
+import scala.util.Properties.{jdkHome, javaVersion}
+import scala.tools.util.{Javap}
+import scala.annotation.tailrec
+import scala.collection.mutable.ListBuffer
+import scala.concurrent.ops
+import scala.tools.nsc.util._
+import scala.tools.nsc.interpreter._
+import scala.tools.nsc.io.{File, Directory}
+import scala.reflect.NameTransformer._
+import scala.tools.nsc.util.ScalaClassLoader._
+import scala.tools.util._
+import scala.language.{implicitConversions, existentials, postfixOps}
+import scala.reflect.{ClassTag, classTag}
+import scala.tools.reflect.StdRuntimeTags._
+
+import java.lang.{Class => jClass}
+import scala.reflect.api.{Mirror, TypeCreator, Universe => ApiUniverse}
+
+import org.apache.spark.Logging
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+import org.apache.spark.util.Utils
+
+/** The Scala interactive shell.  It provides a read-eval-print loop
+ *  around the Interpreter class.
+ *  After instantiation, clients should call the main() method.
+ *
+ *  If no in0 is specified, then input will come from the console, and
+ *  the class will attempt to provide input editing feature such as
+ *  input history.
+ *
+ *  @author Moez A. Abdel-Gawad
+ *  @author  Lex Spoon
+ *  @version 1.2
+ */
+@DeveloperApi
+class SparkILoop(
+    private val in0: Option[BufferedReader],
+    protected val out: JPrintWriter,
+    val master: Option[String]
+) extends AnyRef with LoopCommands with SparkILoopInit with Logging {
+  def this(in0: BufferedReader, out: JPrintWriter, master: String) = this(Some(in0), out, Some(master))
+  def this(in0: BufferedReader, out: JPrintWriter) = this(Some(in0), out, None)
+  def this() = this(None, new JPrintWriter(Console.out, true), None)
+
+  private var in: InteractiveReader = _   // the input stream from which commands come
+
+  // NOTE: Exposed in package for testing
+  private[repl] var settings: Settings = _
+
+  private[repl] var intp: SparkIMain = _
+
+  @deprecated("Use `intp` instead.", "2.9.0") def interpreter = intp
+  @deprecated("Use `intp` instead.", "2.9.0") def interpreter_= (i: SparkIMain): Unit = intp = i
+
+  /** Having inherited the difficult "var-ness" of the repl instance,
+   *  I'm trying to work around it by moving operations into a class from
+   *  which it will appear a stable prefix.
+   */
+  private def onIntp[T](f: SparkIMain => T): T = f(intp)
+
+  class IMainOps[T <: SparkIMain](val intp: T) {
+    import intp._
+    import global._
+
+    def printAfterTyper(msg: => String) =
+      intp.reporter printMessage afterTyper(msg)
+
+    /** Strip NullaryMethodType artifacts. */
+    private def replInfo(sym: Symbol) = {
+      sym.info match {
+        case NullaryMethodType(restpe) if sym.isAccessor  => restpe
+        case info                                         => info
+      }
+    }
+    def echoTypeStructure(sym: Symbol) =
+      printAfterTyper("" + deconstruct.show(replInfo(sym)))
+
+    def echoTypeSignature(sym: Symbol, verbose: Boolean) = {
+      if (verbose) SparkILoop.this.echo("// Type signature")
+      printAfterTyper("" + replInfo(sym))
+
+      if (verbose) {
+        SparkILoop.this.echo("\n// Internal Type structure")
+        echoTypeStructure(sym)
+      }
+    }
+  }
+  implicit def stabilizeIMain(intp: SparkIMain) = new IMainOps[intp.type](intp)
+
+  /** TODO -
+   *  -n normalize
+   *  -l label with case class parameter names
+   *  -c complete - leave nothing out
+   */
+  private def typeCommandInternal(expr: String, verbose: Boolean): Result = {
+    onIntp { intp =>
+      val sym = intp.symbolOfLine(expr)
+      if (sym.exists) intp.echoTypeSignature(sym, verbose)
+      else ""
+    }
+  }
+
+  // NOTE: Must be public for visibility
+  @DeveloperApi
+  var sparkContext: SparkContext = _
+
+  override def echoCommandMessage(msg: String) {
+    intp.reporter printMessage msg
+  }
+
+  // def isAsync = !settings.Yreplsync.value
+  private[repl] def isAsync = false
+  // lazy val power = new Power(intp, new StdReplVals(this))(tagOfStdReplVals, classTag[StdReplVals])
+  private def history = in.history
+
+  /** The context class loader at the time this object was created */
+  protected val originalClassLoader = Utils.getContextOrSparkClassLoader
+
+  // classpath entries added via :cp
+  private var addedClasspath: String = ""
+
+  /** A reverse list of commands to replay if the user requests a :replay */
+  private var replayCommandStack: List[String] = Nil
+
+  /** A list of commands to replay if the user requests a :replay */
+  private def replayCommands = replayCommandStack.reverse
+
+  /** Record a command for replay should the user request a :replay */
+  private def addReplay(cmd: String) = replayCommandStack ::= cmd
+
+  private def savingReplayStack[T](body: => T): T = {
+    val saved = replayCommandStack
+    try body
+    finally replayCommandStack = saved
+  }
+  private def savingReader[T](body: => T): T = {
+    val saved = in
+    try body
+    finally in = saved
+  }
+
+
+  private def sparkCleanUp(){
+    echo("Stopping spark context.")
+    intp.beQuietDuring {
+      command("sc.stop()")
+    }
+  }
+  /** Close the interpreter and set the var to null. */
+  private def closeInterpreter() {
+    if (intp ne null) {
+      sparkCleanUp()
+      intp.close()
+      intp = null
+    }
+  }
+
+  class SparkILoopInterpreter extends SparkIMain(settings, out) {
+    outer =>
+
+    override private[repl] lazy val formatting = new Formatting {
+      def prompt = SparkILoop.this.prompt
+    }
+    override protected def parentClassLoader = SparkHelper.explicitParentLoader(settings).getOrElse(classOf[SparkILoop].getClassLoader)
+  }
+
+  /**
+   * Constructs a new interpreter.
+   */
+  protected def createInterpreter() {
+    require(settings != null)
+
+    if (addedClasspath != "") settings.classpath.append(addedClasspath)
+    val addedJars =
+      if (Utils.isWindows) {
+        // Strip any URI scheme prefix so we can add the correct path to the classpath
+        // e.g. file:/C:/my/path.jar -> C:/my/path.jar
+        SparkILoop.getAddedJars.map { jar => new URI(jar).getPath.stripPrefix("/") }
+      } else {
+        SparkILoop.getAddedJars
+      }
+    // work around for Scala bug
+    val totalClassPath = addedJars.foldLeft(
+      settings.classpath.value)((l, r) => ClassPath.join(l, r))
+    this.settings.classpath.value = totalClassPath
+
+    intp = new SparkILoopInterpreter
+  }
+
+  /** print a friendly help message */
+  private def helpCommand(line: String): Result = {
+    if (line == "") helpSummary()
+    else uniqueCommand(line) match {
+      case Some(lc) => echo("\n" + lc.longHelp)
+      case _        => ambiguousError(line)
+    }
+  }
+  private def helpSummary() = {
+    val usageWidth  = commands map (_.usageMsg.length) max
+    val formatStr   = "%-" + usageWidth + "s %s %s"
+
+    echo("All commands can be abbreviated, e.g. :he instead of :help.")
+    echo("Those marked with a * have more detailed help, e.g. :help imports.\n")
+
+    commands foreach { cmd =>
+      val star = if (cmd.hasLongHelp) "*" else " "
+      echo(formatStr.format(cmd.usageMsg, star, cmd.help))
+    }
+  }
+  private def ambiguousError(cmd: String): Result = {
+    matchingCommands(cmd) match {
+      case Nil  => echo(cmd + ": no such command.  Type :help for help.")
+      case xs   => echo(cmd + " is ambiguous: did you mean " + xs.map(":" + _.name).mkString(" or ") + "?")
+    }
+    Result(true, None)
+  }
+  private def matchingCommands(cmd: String) = commands filter (_.name startsWith cmd)
+  private def uniqueCommand(cmd: String): Option[LoopCommand] = {
+    // this lets us add commands willy-nilly and only requires enough command to disambiguate
+    matchingCommands(cmd) match {
+      case List(x)  => Some(x)
+      // exact match OK even if otherwise appears ambiguous
+      case xs       => xs find (_.name == cmd)
+    }
+  }
+  private var fallbackMode = false 
+
+  private def toggleFallbackMode() {
+    val old = fallbackMode
+    fallbackMode = !old
+    System.setProperty("spark.repl.fallback", fallbackMode.toString)
+    echo(s"""
+      |Switched ${if (old) "off" else "on"} fallback mode without restarting.
+      |       If you have defined classes in the repl, it would 
+      |be good to redefine them incase you plan to use them. If you still run
+      |into issues it would be good to restart the repl and turn on `:fallback` 
+      |mode as first command.
+      """.stripMargin)
+  }
+
+  /** Show the history */
+  private lazy val historyCommand = new LoopCommand("history", "show the history (optional num is commands to show)") {
+    override def usage = "[num]"
+    def defaultLines = 20
+
+    def apply(line: String): Result = {
+      if (history eq NoHistory)
+        return "No history available."
+
+      val xs      = words(line)
+      val current = history.index
+      val count   = try xs.head.toInt catch { case _: Exception => defaultLines }
+      val lines   = history.asStrings takeRight count
+      val offset  = current - lines.size + 1
+
+      for ((line, index) <- lines.zipWithIndex)
+        echo("%3d  %s".format(index + offset, line))
+    }
+  }
+
+  // When you know you are most likely breaking into the middle
+  // of a line being typed.  This softens the blow.
+  private[repl] def echoAndRefresh(msg: String) = {
+    echo("\n" + msg)
+    in.redrawLine()
+  }
+  private[repl] def echo(msg: String) = {
+    out println msg
+    out.flush()
+  }
+  private def echoNoNL(msg: String) = {
+    out print msg
+    out.flush()
+  }
+
+  /** Search the history */
+  private def searchHistory(_cmdline: String) {
+    val cmdline = _cmdline.toLowerCase
+    val offset  = history.index - history.size + 1
+
+    for ((line, index) <- history.asStrings.zipWithIndex ; if line.toLowerCase contains cmdline)
+      echo("%d %s".format(index + offset, line))
+  }
+
+  private var currentPrompt = Properties.shellPromptString
+
+  /**
+   * Sets the prompt string used by the REPL.
+   *
+   * @param prompt The new prompt string
+   */
+  @DeveloperApi
+  def setPrompt(prompt: String) = currentPrompt = prompt
+
+  /**
+   * Represents the current prompt string used by the REPL.
+   *
+   * @return The current prompt string
+   */
+  @DeveloperApi
+  def prompt = currentPrompt
+
+  import LoopCommand.{ cmd, nullary }
+
+  /** Standard commands */
+  private lazy val standardCommands = List(
+    cmd("cp", "<path>", "add a jar or directory to the classpath", addClasspath),
+    cmd("help", "[command]", "print this summary or command-specific help", helpCommand),
+    historyCommand,
+    cmd("h?", "<string>", "search the history", searchHistory),
+    cmd("imports", "[name name ...]", "show import history, identifying sources of names", importsCommand),
+    cmd("implicits", "[-v]", "show the implicits in scope", implicitsCommand),
+    cmd("javap", "<path|class>", "disassemble a file or class name", javapCommand),
+    cmd("load", "<path>", "load and interpret a Scala file", loadCommand),
+    nullary("paste", "enter paste mode: all input up to ctrl-D compiled together", pasteCommand),
+//    nullary("power", "enable power user mode", powerCmd),
+    nullary("quit", "exit the repl", () => Result(false, None)),
+    nullary("replay", "reset execution and replay all previous commands", replay),
+    nullary("reset", "reset the repl to its initial state, forgetting all session entries", resetCommand),
+    shCommand,
+    nullary("silent", "disable/enable automatic printing of results", verbosity),
+    nullary("fallback", """
+                           |disable/enable advanced repl changes, these fix some issues but may introduce others. 
+                           |This mode will be removed once these fixes stablize""".stripMargin, toggleFallbackMode),
+    cmd("type", "[-v] <expr>", "display the type of an expression without evaluating it", typeCommand),
+    nullary("warnings", "show the suppressed warnings from the most recent line which had any", warningsCommand)
+  )
+
+  /** Power user commands */
+  private lazy val powerCommands: List[LoopCommand] = List(
+    // cmd("phase", "<phase>", "set the implicit phase for power commands", phaseCommand)
+  )
+
+  // private def dumpCommand(): Result = {
+  //   echo("" + power)
+  //   history.asStrings takeRight 30 foreach echo
+  //   in.redrawLine()
+  // }
+  // private def valsCommand(): Result = power.valsDescription
+
+  private val typeTransforms = List(
+    "scala.collection.immutable." -> "immutable.",
+    "scala.collection.mutable."   -> "mutable.",
+    "scala.collection.generic."   -> "generic.",
+    "java.lang."                  -> "jl.",
+    "scala.runtime."              -> "runtime."
+  )
+
+  private def importsCommand(line: String): Result = {
+    val tokens    = words(line)
+    val handlers  = intp.languageWildcardHandlers ++ intp.importHandlers
+    val isVerbose = tokens contains "-v"
+
+    handlers.filterNot(_.importedSymbols.isEmpty).zipWithIndex foreach {
+      case (handler, idx) =>
+        val (types, terms) = handler.importedSymbols partition (_.name.isTypeName)
+        val imps           = handler.implicitSymbols
+        val found          = tokens filter (handler importsSymbolNamed _)
+        val typeMsg        = if (types.isEmpty) "" else types.size + " types"
+        val termMsg        = if (terms.isEmpty) "" else terms.size + " terms"
+        val implicitMsg    = if (imps.isEmpty) "" else imps.size + " are implicit"
+        val foundMsg       = if (found.isEmpty) "" else found.mkString(" // imports: ", ", ", "")
+        val statsMsg       = List(typeMsg, termMsg, implicitMsg) filterNot (_ == "") mkString ("(", ", ", ")")
+
+        intp.reporter.printMessage("%2d) %-30s %s%s".format(
+          idx + 1,
+          handler.importString,
+          statsMsg,
+          foundMsg
+        ))
+    }
+  }
+
+  private def implicitsCommand(line: String): Result = onIntp { intp =>
+    import intp._
+    import global._
+
+    def p(x: Any) = intp.reporter.printMessage("" + x)
+
+    // If an argument is given, only show a source with that
+    // in its name somewhere.
+    val args     = line split "\\s+"
+    val filtered = intp.implicitSymbolsBySource filter {
+      case (source, syms) =>
+        (args contains "-v") || {
+          if (line == "") (source.fullName.toString != "scala.Predef")
+          else (args exists (source.name.toString contains _))
+        }
+    }
+
+    if (filtered.isEmpty)
+      return "No implicits have been imported other than those in Predef."
+
+    filtered foreach {
+      case (source, syms) =>
+        p("/* " + syms.size + " implicit members imported from " + source.fullName + " */")
+
+        // This groups the members by where the symbol is defined
+        val byOwner = syms groupBy (_.owner)
+        val sortedOwners = byOwner.toList sortBy { case (owner, _) => afterTyper(source.info.baseClasses indexOf owner) }
+
+        sortedOwners foreach {
+          case (owner, members) =>
+            // Within each owner, we cluster results based on the final result type
+            // if there are more than a couple, and sort each cluster based on name.
+            // This is really just trying to make the 100 or so implicits imported
+            // by default into something readable.
+            val memberGroups: List[List[Symbol]] = {
+              val groups = members groupBy (_.tpe.finalResultType) toList
+              val (big, small) = groups partition (_._2.size > 3)
+              val xss = (
+                (big sortBy (_._1.toString) map (_._2)) :+
+                (small flatMap (_._2))
+              )
+
+              xss map (xs => xs sortBy (_.name.toString))
+            }
+
+            val ownerMessage = if (owner == source) " defined in " else " inherited from "
+            p("  /* " + members.size + ownerMessage + owner.fullName + " */")
+
+            memberGroups foreach { group =>
+              group foreach (s => p("  " + intp.symbolDefString(s)))
+              p("")
+            }
+        }
+        p("")
+    }
+  }
+
+  private def findToolsJar() = {
+    val jdkPath = Directory(jdkHome)
+    val jar     = jdkPath / "lib" / "tools.jar" toFile;
+
+    if (jar isFile)
+      Some(jar)
+    else if (jdkPath.isDirectory)
+      jdkPath.deepFiles find (_.name == "tools.jar")
+    else None
+  }
+  private def addToolsJarToLoader() = {
+    val cl = findToolsJar match {
+      case Some(tools) => ScalaClassLoader.fromURLs(Seq(tools.toURL), intp.classLoader)
+      case _           => intp.classLoader
+    }
+    if (Javap.isAvailable(cl)) {
+      logDebug(":javap available.")
+      cl
+    }
+    else {
+      logDebug(":javap unavailable: no tools.jar at " + jdkHome)
+      intp.classLoader
+    }
+  }
+
+  private def newJavap() = new JavapClass(addToolsJarToLoader(), new SparkIMain.ReplStrippingWriter(intp)) {
+    override def tryClass(path: String): Array[Byte] = {
+      val hd :: rest = path split '.' toList;
+      // If there are dots in the name, the first segment is the
+      // key to finding it.
+      if (rest.nonEmpty) {
+        intp optFlatName hd match {
+          case Some(flat) =>
+            val clazz = flat :: rest mkString NAME_JOIN_STRING
+            val bytes = super.tryClass(clazz)
+            if (bytes.nonEmpty) bytes
+            else super.tryClass(clazz + MODULE_SUFFIX_STRING)
+          case _          => super.tryClass(path)
+        }
+      }
+      else {
+        // Look for Foo first, then Foo$, but if Foo$ is given explicitly,
+        // we have to drop the $ to find object Foo, then tack it back onto
+        // the end of the flattened name.
+        def className  = intp flatName path
+        def moduleName = (intp flatName path.stripSuffix(MODULE_SUFFIX_STRING)) + MODULE_SUFFIX_STRING
+
+        val bytes = super.tryClass(className)
+        if (bytes.nonEmpty) bytes
+        else super.tryClass(moduleName)
+      }
+    }
+  }
+  // private lazy val javap = substituteAndLog[Javap]("javap", NoJavap)(newJavap())
+  private lazy val javap =
+    try newJavap()
+    catch { case _: Exception => null }
+
+  // Still todo: modules.
+  private def typeCommand(line0: String): Result = {
+    line0.trim match {
+      case ""                      => ":type [-v] <expression>"
+      case s if s startsWith "-v " => typeCommandInternal(s stripPrefix "-v " trim, true)
+      case s                       => typeCommandInternal(s, false)
+    }
+  }
+
+  private def warningsCommand(): Result = {
+    if (intp.lastWarnings.isEmpty)
+      "Can't find any cached warnings."
+    else
+      intp.lastWarnings foreach { case (pos, msg) => intp.reporter.warning(pos, msg) }
+  }
+
+  private def javapCommand(line: String): Result = {
+    if (javap == null)
+      ":javap unavailable, no tools.jar at %s.  Set JDK_HOME.".format(jdkHome)
+    else if (javaVersion startsWith "1.7")
+      ":javap not yet working with java 1.7"
+    else if (line == "")
+      ":javap [-lcsvp] [path1 path2 ...]"
+    else
+      javap(words(line)) foreach { res =>
+        if (res.isError) return "Failed: " + res.value
+        else res.show()
+      }
+  }
+
+  private def wrapCommand(line: String): Result = {
+    def failMsg = "Argument to :wrap must be the name of a method with signature [T](=> T): T"
+    onIntp { intp =>
+      import intp._
+      import global._
+
+      words(line) match {
+        case Nil            =>
+          intp.executionWrapper match {
+            case ""   => "No execution wrapper is set."
+            case s    => "Current execution wrapper: " + s
+          }
+        case "clear" :: Nil =>
+          intp.executionWrapper match {
+            case ""   => "No execution wrapper is set."
+            case s    => intp.clearExecutionWrapper() ; "Cleared execution wrapper."
+          }
+        case wrapper :: Nil =>
+          intp.typeOfExpression(wrapper) match {
+            case PolyType(List(targ), MethodType(List(arg), restpe)) =>
+              intp setExecutionWrapper intp.pathToTerm(wrapper)
+              "Set wrapper to '" + wrapper + "'"
+            case tp =>
+              failMsg + "\nFound: <unknown>"
+          }
+        case _ => failMsg
+      }
+    }
+  }
+
+  private def pathToPhaseWrapper = intp.pathToTerm("$r") + ".phased.atCurrent"
+  // private def phaseCommand(name: String): Result = {
+  //   val phased: Phased = power.phased
+  //   import phased.NoPhaseName
+
+  //   if (name == "clear") {
+  //     phased.set(NoPhaseName)
+  //     intp.clearExecutionWrapper()
+  //     "Cleared active phase."
+  //   }
+  //   else if (name == "") phased.get match {
+  //     case NoPhaseName => "Usage: :phase <expr> (e.g. typer, erasure.next, erasure+3)"
+  //     case ph          => "Active phase is '%s'.  (To clear, :phase clear)".format(phased.get)
+  //   }
+  //   else {
+  //     val what = phased.parse(name)
+  //     if (what.isEmpty || !phased.set(what))
+  //       "'" + name + "' does not appear to represent a valid phase."
+  //     else {
+  //       intp.setExecutionWrapper(pathToPhaseWrapper)
+  //       val activeMessage =
+  //         if (what.toString.length == name.length) "" + what
+  //         else "%s (%s)".format(what, name)
+
+  //       "Active phase is now: " + activeMessage
+  //     }
+  //   }
+  // }
+
+  /**
+   * Provides a list of available commands.
+   *
+   * @return The list of commands
+   */
+  @DeveloperApi
+  def commands: List[LoopCommand] = standardCommands /*++ (
+    if (isReplPower) powerCommands else Nil
+  )*/
+
+  private val replayQuestionMessage =
+    """|That entry seems to have slain the compiler.  Shall I replay
+       |your session? I can re-run each line except the last one.
+       |[y/n]
+    """.trim.stripMargin
+
+  private def crashRecovery(ex: Throwable): Boolean = {
+    echo(ex.toString)
+    ex match {
+      case _: NoSuchMethodError | _: NoClassDefFoundError =>
+        echo("\nUnrecoverable error.")
+        throw ex
+      case _  =>
+        def fn(): Boolean =
+          try in.readYesOrNo(replayQuestionMessage, { echo("\nYou must enter y or n.") ; fn() })
+          catch { case _: RuntimeException => false }
+
+        if (fn()) replay()
+        else echo("\nAbandoning crashed session.")
+    }
+    true
+  }
+
+  /** The main read-eval-print loop for the repl.  It calls
+   *  command() for each line of input, and stops when
+   *  command() returns false.
+   */
+  private def loop() {
+    def readOneLine() = {
+      out.flush()
+      in readLine prompt
+    }
+    // return false if repl should exit
+    def processLine(line: String): Boolean = {
+      if (isAsync) {
+        if (!awaitInitialized()) return false
+        runThunks()
+      }
+      if (line eq null) false               // assume null means EOF
+      else command(line) match {
+        case Result(false, _)           => false
+        case Result(_, Some(finalLine)) => addReplay(finalLine) ; true
+        case _                          => true
+      }
+    }
+    def innerLoop() {
+      val shouldContinue = try {
+        processLine(readOneLine())
+      } catch {case t: Throwable => crashRecovery(t)}
+      if (shouldContinue)
+        innerLoop()
+    }
+    innerLoop()
+  }
+
+  /** interpret all lines from a specified file */
+  private def interpretAllFrom(file: File) {
+    savingReader {
+      savingReplayStack {
+        file applyReader { reader =>
+          in = SimpleReader(reader, out, false)
+          echo("Loading " + file + "...")
+          loop()
+        }
+      }
+    }
+  }
+
+  /** create a new interpreter and replay the given commands */
+  private def replay() {
+    reset()
+    if (replayCommandStack.isEmpty)
+      echo("Nothing to replay.")
+    else for (cmd <- replayCommands) {
+      echo("Replaying: " + cmd)  // flush because maybe cmd will have its own output
+      command(cmd)
+      echo("")
+    }
+  }
+  private def resetCommand() {
+    echo("Resetting repl state.")
+    if (replayCommandStack.nonEmpty) {
+      echo("Forgetting this session history:\n")
+      replayCommands foreach echo
+      echo("")
+      replayCommandStack = Nil
+    }
+    if (intp.namedDefinedTerms.nonEmpty)
+      echo("Forgetting all expression results and named terms: " + intp.namedDefinedTerms.mkString(", "))
+    if (intp.definedTypes.nonEmpty)
+      echo("Forgetting defined types: " + intp.definedTypes.mkString(", "))
+
+    reset()
+  }
+
+  private def reset() {
+    intp.reset()
+    // unleashAndSetPhase()
+  }
+
+  /** fork a shell and run a command */
+  private lazy val shCommand = new LoopCommand("sh", "run a shell command (result is implicitly => List[String])") {
+    override def usage = "<command line>"
+    def apply(line: String): Result = line match {
+      case ""   => showUsage()
+      case _    =>
+        val toRun = classOf[ProcessResult].getName + "(" + string2codeQuoted(line) + ")"
+        intp interpret toRun
+        ()
+    }
+  }
+
+  private def withFile(filename: String)(action: File => Unit) {
+    val f = File(filename)
+
+    if (f.exists) action(f)
+    else echo("That file does not exist")
+  }
+
+  private def loadCommand(arg: String) = {
+    var shouldReplay: Option[String] = None
+    withFile(arg)(f => {
+      interpretAllFrom(f)
+      shouldReplay = Some(":load " + arg)
+    })
+    Result(true, shouldReplay)
+  }
+
+  private def addAllClasspath(args: Seq[String]): Unit = {
+    var added = false
+    var totalClasspath = ""
+    for (arg <- args) {
+      val f = File(arg).normalize
+      if (f.exists) {
+        added = true
+        addedClasspath = ClassPath.join(addedClasspath, f.path)
+        totalClasspath = ClassPath.join(settings.classpath.value, addedClasspath)
+        intp.addUrlsToClassPath(f.toURI.toURL)
+        sparkContext.addJar(f.toURI.toURL.getPath)
+      }
+    }
+  }
+
+  private def addClasspath(arg: String): Unit = {
+    val f = File(arg).normalize
+    if (f.exists) {
+      addedClasspath = ClassPath.join(addedClasspath, f.path)
+      intp.addUrlsToClassPath(f.toURI.toURL)
+      sparkContext.addJar(f.toURI.toURL.getPath)
+      echo("Added '%s'.  Your new classpath is:\n\"%s\"".format(f.path, intp.global.classPath.asClasspathString))
+    }
+    else echo("The path '" + f + "' doesn't seem to exist.")
+  }
+
+
+  private def powerCmd(): Result = {
+    if (isReplPower) "Already in power mode."
+    else enablePowerMode(false)
+  }
+
+  private[repl] def enablePowerMode(isDuringInit: Boolean) = {
+    // replProps.power setValue true
+    // unleashAndSetPhase()
+    // asyncEcho(isDuringInit, power.banner)
+  }
+  // private def unleashAndSetPhase() {
+//     if (isReplPower) {
+// //      power.unleash()
+//       // Set the phase to "typer"
+//       intp beSilentDuring phaseCommand("typer")
+//     }
+//   }
+
+  private def asyncEcho(async: Boolean, msg: => String) {
+    if (async) asyncMessage(msg)
+    else echo(msg)
+  }
+
+  private def verbosity() = {
+    // val old = intp.printResults
+    // intp.printResults = !old
+    // echo("Switched " + (if (old) "off" else "on") + " result printing.")
+  }
+
+  /** Run one command submitted by the user.  Two values are returned:
+    * (1) whether to keep running, (2) the line to record for replay,
+    * if any. */
+  private[repl] def command(line: String): Result = {
+    if (line startsWith ":") {
+      val cmd = line.tail takeWhile (x => !x.isWhitespace)
+      uniqueCommand(cmd) match {
+        case Some(lc) => lc(line.tail stripPrefix cmd dropWhile (_.isWhitespace))
+        case _        => ambiguousError(cmd)
+      }
+    }
+    else if (intp.global == null) Result(false, None)  // Notice failure to create compiler
+    else Result(true, interpretStartingWith(line))
+  }
+
+  private def readWhile(cond: String => Boolean) = {
+    Iterator continually in.readLine("") takeWhile (x => x != null && cond(x))
+  }
+
+  private def pasteCommand(): Result = {
+    echo("// Entering paste mode (ctrl-D to finish)\n")
+    val code = readWhile(_ => true) mkString "\n"
+    echo("\n// Exiting paste mode, now interpreting.\n")
+    intp interpret code
+    ()
+  }
+
+  private object paste extends Pasted {
+    val ContinueString = "     | "
+    val PromptString   = "scala> "
+
+    def interpret(line: String): Unit = {
+      echo(line.trim)
+      intp interpret line
+      echo("")
+    }
+
+    def transcript(start: String) = {
+      echo("\n// Detected repl transcript paste: ctrl-D to finish.\n")
+      apply(Iterator(start) ++ readWhile(_.trim != PromptString.trim))
+    }
+  }
+  import paste.{ ContinueString, PromptString }
+
+  /** Interpret expressions starting with the first line.
+    * Read lines until a complete compilation unit is available
+    * or until a syntax error has been seen.  If a full unit is
+    * read, go ahead and interpret it.  Return the full string
+    * to be recorded for replay, if any.
+    */
+  private def interpretStartingWith(code: String): Option[String] = {
+    // signal completion non-completion input has been received
+    in.completion.resetVerbosity()
+
+    def reallyInterpret = {
+      val reallyResult = intp.interpret(code)
+      (reallyResult, reallyResult match {
+        case IR.Error       => None
+        case IR.Success     => Some(code)
+        case IR.Incomplete  =>
+          if (in.interactive && code.endsWith("\n\n")) {
+            echo("You typed two blank lines.  Starting a new command.")
+            None
+          }
+          else in.readLine(ContinueString) match {
+            case null =>
+              // we know compilation is going to fail since we're at EOF and the
+              // parser thinks the input is still incomplete, but since this is
+              // a file being read non-interactively we want to fail.  So we send
+              // it straight to the compiler for the nice error message.
+              intp.compileString(code)
+              None
+
+            case line => interpretStartingWith(code + "\n" + line)
+          }
+      })
+    }
+
+    /** Here we place ourselves between the user and the interpreter and examine
+     *  the input they are ostensibly submitting.  We intervene in several cases:
+     *
+     *  1) If the line starts with "scala> " it is assumed to be an interpreter paste.
+     *  2) If the line starts with "." (but not ".." or "./") it is treated as an invocation
+     *     on the previous result.
+     *  3) If the Completion object's execute returns Some(_), we inject that value
+     *     and avoid the interpreter, as it's likely not valid scala code.
+     */
+    if (code == "") None
+    else if (!paste.running && code.trim.startsWith(PromptString)) {
+      paste.transcript(code)
+      None
+    }
+    else if (Completion.looksLikeInvocation(code) && intp.mostRecentVar != "") {
+      interpretStartingWith(intp.mostRecentVar + code)
+    }
+    else if (code.trim startsWith "//") {
+      // line comment, do nothing
+      None
+    }
+    else
+      reallyInterpret._2
+  }
+
+  // runs :load `file` on any files passed via -i
+  private def loadFiles(settings: Settings) = settings match {
+    case settings: SparkRunnerSettings =>
+      for (filename <- settings.loadfiles.value) {
+        val cmd = ":load " + filename
+        command(cmd)
+        addReplay(cmd)
+        echo("")
+      }
+    case _ =>
+  }
+
+  /** Tries to create a JLineReader, falling back to SimpleReader:
+   *  unless settings or properties are such that it should start
+   *  with SimpleReader.
+   */
+  private def chooseReader(settings: Settings): InteractiveReader = {
+    if (settings.Xnojline.value || Properties.isEmacsShell)
+      SimpleReader()
+    else try new SparkJLineReader(
+      if (settings.noCompletion.value) NoCompletion
+      else new SparkJLineCompletion(intp)
+    )
+    catch {
+      case ex @ (_: Exception | _: NoClassDefFoundError) =>
+        echo("Failed to created SparkJLineReader: " + ex + "\nFalling back to SimpleReader.")
+        SimpleReader()
+    }
+  }
+
+  private val u: scala.reflect.runtime.universe.type = scala.reflect.runtime.universe
+  private val m = u.runtimeMirror(Utils.getSparkClassLoader)
+  private def tagOfStaticClass[T: ClassTag]: u.TypeTag[T] =
+    u.TypeTag[T](
+      m,
+      new TypeCreator {
+        def apply[U <: ApiUniverse with Singleton](m: Mirror[U]): U # Type =
+          m.staticClass(classTag[T].runtimeClass.getName).toTypeConstructor.asInstanceOf[U # Type]
+      })
+
+  private def process(settings: Settings): Boolean = savingContextLoader {
+    if (getMaster() == "yarn-client") System.setProperty("SPARK_YARN_MODE", "true")
+
+    this.settings = settings
+    createInterpreter()
+
+    // sets in to some kind of reader depending on environmental cues
+    in = in0 match {
+      case Some(reader) => SimpleReader(reader, out, true)
+      case None         =>
+        // some post-initialization
+        chooseReader(settings) match {
+          case x: SparkJLineReader => addThunk(x.consoleReader.postInit) ; x
+          case x                   => x
+        }
+    }
+    lazy val tagOfSparkIMain = tagOfStaticClass[org.apache.spark.repl.SparkIMain]
+    // Bind intp somewhere out of the regular namespace where
+    // we can get at it in generated code.
+    addThunk(intp.quietBind(NamedParam[SparkIMain]("$intp", intp)(tagOfSparkIMain, classTag[SparkIMain])))
+    addThunk({
+      import scala.tools.nsc.io._
+      import Properties.userHome
+      import scala.compat.Platform.EOL
+      val autorun = replProps.replAutorunCode.option flatMap (f => io.File(f).safeSlurp())
+      if (autorun.isDefined) intp.quietRun(autorun.get)
+    })
+
+    addThunk(printWelcome())
+    addThunk(initializeSpark())
+
+    // it is broken on startup; go ahead and exit
+    if (intp.reporter.hasErrors)
+      return false
+
+    // This is about the illusion of snappiness.  We call initialize()
+    // which spins off a separate thread, then print the prompt and try
+    // our best to look ready.  The interlocking lazy vals tend to
+    // inter-deadlock, so we break the cycle with a single asynchronous
+    // message to an actor.
+    if (isAsync) {
+      intp initialize initializedCallback()
+      createAsyncListener() // listens for signal to run postInitialization
+    }
+    else {
+      intp.initializeSynchronous()
+      postInitialization()
+    }
+    // printWelcome()
+
+    loadFiles(settings)
+
+    try loop()
+    catch AbstractOrMissingHandler()
+    finally closeInterpreter()
+
+    true
+  }
+
+  // NOTE: Must be public for visibility
+  @DeveloperApi
+  def createSparkContext(): SparkContext = {
+    val execUri = System.getenv("SPARK_EXECUTOR_URI")
+    val jars = SparkILoop.getAddedJars
+    val conf = new SparkConf()
+      .setMaster(getMaster())
+      .setAppName("Spark shell")
+      .setJars(jars)
+      .set("spark.repl.class.uri", intp.classServerUri)
+    if (execUri != null) {
+      conf.set("spark.executor.uri", execUri)
+    }
+    sparkContext = new SparkContext(conf)
+    logInfo("Created spark context..")
+    sparkContext
+  }
+
+  private def getMaster(): String = {
+    val master = this.master match {
+      case Some(m) => m
+      case None =>
+        val envMaster = sys.env.get("MASTER")
+        val propMaster = sys.props.get("spark.master")
+        propMaster.orElse(envMaster).getOrElse("local[*]")
+    }
+    master
+  }
+
+  /** process command-line arguments and do as they request */
+  def process(args: Array[String]): Boolean = {
+    val command = new SparkCommandLine(args.toList, msg => echo(msg))
+    def neededHelp(): String =
+      (if (command.settings.help.value) command.usageMsg + "\n" else "") +
+      (if (command.settings.Xhelp.value) command.xusageMsg + "\n" else "")
+
+    // if they asked for no help and command is valid, we call the real main
+    neededHelp() match {
+      case ""     => command.ok && process(command.settings)
+      case help   => echoNoNL(help) ; true
+    }
+  }
+
+  @deprecated("Use `process` instead", "2.9.0")
+  private def main(settings: Settings): Unit = process(settings)
+}
+
+object SparkILoop {
+  implicit def loopToInterpreter(repl: SparkILoop): SparkIMain = repl.intp
+  private def echo(msg: String) = Console println msg
+
+  def getAddedJars: Array[String] = {
+    val envJars = sys.env.get("ADD_JARS")
+    val propJars = sys.props.get("spark.jars").flatMap { p =>
+      if (p == "") None else Some(p)
+    }
+    val jars = propJars.orElse(envJars).getOrElse("")
+    Utils.resolveURIs(jars).split(",").filter(_.nonEmpty)
+  }
+
+  // Designed primarily for use by test code: take a String with a
+  // bunch of code, and prints out a transcript of what it would look
+  // like if you'd just typed it into the repl.
+  private[repl] def runForTranscript(code: String, settings: Settings): String = {
+    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
+
+    stringFromStream { ostream =>
+      Console.withOut(ostream) {
+        val output = new JPrintWriter(new OutputStreamWriter(ostream), true) {
+          override def write(str: String) = {
+            // completely skip continuation lines
+            if (str forall (ch => ch.isWhitespace || ch == '|')) ()
+            // print a newline on empty scala prompts
+            else if ((str contains '\n') && (str.trim == "scala> ")) super.write("\n")
+            else super.write(str)
+          }
+        }
+        val input = new BufferedReader(new StringReader(code)) {
+          override def readLine(): String = {
+            val s = super.readLine()
+            // helping out by printing the line being interpreted.
+            if (s != null)
+              output.println(s)
+            s
+          }
+        }
+        val repl = new SparkILoop(input, output)
+
+        if (settings.classpath.isDefault)
+          settings.classpath.value = sys.props("java.class.path")
+
+        getAddedJars.foreach(settings.classpath.append(_))
+
+        repl process settings
+      }
+    }
+  }
+
+  /** Creates an interpreter loop with default settings and feeds
+   *  the given code to it as input.
+   */
+  private[repl] def run(code: String, sets: Settings = new Settings): String = {
+    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
+
+    stringFromStream { ostream =>
+      Console.withOut(ostream) {
+        val input    = new BufferedReader(new StringReader(code))
+        val output   = new JPrintWriter(new OutputStreamWriter(ostream), true)
+        val repl     = new ILoop(input, output)
+
+        if (sets.classpath.isDefault)
+          sets.classpath.value = sys.props("java.class.path")
+
+        repl process sets
+      }
+    }
+  }
+  private[repl] def run(lines: List[String]): String = run(lines map (_ + "\n") mkString)
+}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
similarity index 95%
rename from repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
rename to repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
index 7667a9c11979e..99bd777c04fdb 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
@@ -19,7 +19,7 @@ import org.apache.spark.SPARK_VERSION
 /**
  *  Machinery for the asynchronous initialization of the repl.
  */
-trait SparkILoopInit {
+private[repl] trait SparkILoopInit {
   self: SparkILoop =>
 
   /** Print a welcome message */
@@ -121,11 +121,14 @@ trait SparkILoopInit {
   def initializeSpark() {
     intp.beQuietDuring {
       command("""
-         @transient val sc = org.apache.spark.repl.Main.interp.createSparkContext();
+         @transient val sc = {
+           val _sc = org.apache.spark.repl.Main.interp.createSparkContext()
+           println("Spark context available as sc.")
+           _sc
+         }
         """)
       command("import org.apache.spark.SparkContext._")
     }
-    echo("Spark context available as sc.")
   }
 
   // code to be executed only after the interpreter is initialized
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkIMain.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkIMain.scala
new file mode 100644
index 0000000000000..35fb625645022
--- /dev/null
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkIMain.scala
@@ -0,0 +1,1817 @@
+// scalastyle:off
+
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author  Martin Odersky
+ */
+
+package org.apache.spark.repl
+
+import java.io.File
+
+import scala.tools.nsc._
+import scala.tools.nsc.backend.JavaPlatform
+import scala.tools.nsc.interpreter._
+
+import Predef.{ println => _, _ }
+import scala.tools.nsc.util.{MergedClassPath, stringFromWriter, ScalaClassLoader, stackTraceString}
+import scala.reflect.internal.util._
+import java.net.URL
+import scala.sys.BooleanProp
+import io.{AbstractFile, PlainFile, VirtualDirectory}
+
+import reporters._
+import symtab.Flags
+import scala.reflect.internal.Names
+import scala.tools.util.PathResolver
+import ScalaClassLoader.URLClassLoader
+import scala.tools.nsc.util.Exceptional.unwrap
+import scala.collection.{ mutable, immutable }
+import scala.util.control.Exception.{ ultimately }
+import SparkIMain._
+import java.util.concurrent.Future
+import typechecker.Analyzer
+import scala.language.implicitConversions
+import scala.reflect.runtime.{ universe => ru }
+import scala.reflect.{ ClassTag, classTag }
+import scala.tools.reflect.StdRuntimeTags._
+import scala.util.control.ControlThrowable
+
+import org.apache.spark.{Logging, HttpServer, SecurityManager, SparkConf}
+import org.apache.spark.util.Utils
+import org.apache.spark.annotation.DeveloperApi
+
+// /** directory to save .class files to */
+// private class ReplVirtualDirectory(out: JPrintWriter) extends VirtualDirectory("((memory))", None) {
+//   private def pp(root: AbstractFile, indentLevel: Int) {
+//     val spaces = "    " * indentLevel
+//     out.println(spaces + root.name)
+//     if (root.isDirectory)
+//       root.toList sortBy (_.name) foreach (x => pp(x, indentLevel + 1))
+//   }
+//   // print the contents hierarchically
+//   def show() = pp(this, 0)
+// }
+
+  /** An interpreter for Scala code.
+   *
+   *  The main public entry points are compile(), interpret(), and bind().
+   *  The compile() method loads a complete Scala file.  The interpret() method
+   *  executes one line of Scala code at the request of the user.  The bind()
+   *  method binds an object to a variable that can then be used by later
+   *  interpreted code.
+   *
+   *  The overall approach is based on compiling the requested code and then
+   *  using a Java classloader and Java reflection to run the code
+   *  and access its results.
+   *
+   *  In more detail, a single compiler instance is used
+   *  to accumulate all successfully compiled or interpreted Scala code.  To
+   *  "interpret" a line of code, the compiler generates a fresh object that
+   *  includes the line of code and which has public member(s) to export
+   *  all variables defined by that code.  To extract the result of an
+   *  interpreted line to show the user, a second "result object" is created
+   *  which imports the variables exported by the above object and then
+   *  exports members called "$eval" and "$print". To accomodate user expressions
+   *  that read from variables or methods defined in previous statements, "import"
+   *  statements are used.
+   *
+   *  This interpreter shares the strengths and weaknesses of using the
+   *  full compiler-to-Java.  The main strength is that interpreted code
+   *  behaves exactly as does compiled code, including running at full speed.
+   *  The main weakness is that redefining classes and methods is not handled
+   *  properly, because rebinding at the Java level is technically difficult.
+   *
+   *  @author Moez A. Abdel-Gawad
+   *  @author Lex Spoon
+   */
+  @DeveloperApi
+  class SparkIMain(
+      initialSettings: Settings,
+      val out: JPrintWriter,
+      propagateExceptions: Boolean = false)
+    extends SparkImports with Logging { imain =>
+
+    private val conf = new SparkConf()
+
+    private val SPARK_DEBUG_REPL: Boolean = (System.getenv("SPARK_DEBUG_REPL") == "1")
+    /** Local directory to save .class files too */
+    private lazy val outputDir = {
+      val tmp = System.getProperty("java.io.tmpdir")
+      val rootDir = conf.get("spark.repl.classdir",  tmp)
+      Utils.createTempDir(rootDir)
+    }
+    if (SPARK_DEBUG_REPL) {
+      echo("Output directory: " + outputDir)
+    }
+
+    /**
+     * Returns the path to the output directory containing all generated
+     * class files that will be served by the REPL class server.
+     */
+    @DeveloperApi
+    lazy val getClassOutputDirectory = outputDir
+
+    private val virtualDirectory                              = new PlainFile(outputDir) // "directory" for classfiles
+    /** Jetty server that will serve our classes to worker nodes */
+    private val classServerPort                               = conf.getInt("spark.replClassServer.port", 0)
+    private val classServer                                   = new HttpServer(conf, outputDir, new SecurityManager(conf), classServerPort, "HTTP class server")
+    private var currentSettings: Settings             = initialSettings
+    private var printResults                                  = true      // whether to print result lines
+    private var totalSilence                                  = false     // whether to print anything
+    private var _initializeComplete                   = false     // compiler is initialized
+    private var _isInitialized: Future[Boolean]       = null      // set up initialization future
+    private var bindExceptions                        = true      // whether to bind the lastException variable
+    private var _executionWrapper                     = ""        // code to be wrapped around all lines
+
+
+    // Start the classServer and store its URI in a spark system property
+    // (which will be passed to executors so that they can connect to it)
+    classServer.start()
+    if (SPARK_DEBUG_REPL) {
+      echo("Class server started, URI = " + classServer.uri)
+    }
+
+    /**
+     * URI of the class server used to feed REPL compiled classes.
+     *
+     * @return The string representing the class server uri
+     */
+    @DeveloperApi
+    def classServerUri = classServer.uri
+
+    /** We're going to go to some trouble to initialize the compiler asynchronously.
+     *  It's critical that nothing call into it until it's been initialized or we will
+     *  run into unrecoverable issues, but the perceived repl startup time goes
+     *  through the roof if we wait for it.  So we initialize it with a future and
+     *  use a lazy val to ensure that any attempt to use the compiler object waits
+     *  on the future.
+     */
+    private var _classLoader: AbstractFileClassLoader = null                              // active classloader
+    private val _compiler: Global                     = newCompiler(settings, reporter)   // our private compiler
+
+    private trait ExposeAddUrl extends URLClassLoader { def addNewUrl(url: URL) = this.addURL(url) }
+    private var _runtimeClassLoader: URLClassLoader with ExposeAddUrl = null              // wrapper exposing addURL
+
+    private val nextReqId = {
+      var counter = 0
+      () => { counter += 1 ; counter }
+    }
+
+    private def compilerClasspath: Seq[URL] = (
+      if (isInitializeComplete) global.classPath.asURLs
+      else new PathResolver(settings).result.asURLs  // the compiler's classpath
+      )
+    // NOTE: Exposed to repl package since accessed indirectly from SparkIMain
+    private[repl] def settings = currentSettings
+    private def mostRecentLine = prevRequestList match {
+      case Nil      => ""
+      case req :: _ => req.originalLine
+    }
+    // Run the code body with the given boolean settings flipped to true.
+    private def withoutWarnings[T](body: => T): T = beQuietDuring {
+      val saved = settings.nowarn.value
+      if (!saved)
+        settings.nowarn.value = true
+
+      try body
+      finally if (!saved) settings.nowarn.value = false
+    }
+
+    /** construct an interpreter that reports to Console */
+    def this(settings: Settings) = this(settings, new NewLinePrintWriter(new ConsoleWriter, true))
+    def this() = this(new Settings())
+
+    private lazy val repllog: Logger = new Logger {
+      val out: JPrintWriter = imain.out
+      val isInfo: Boolean  = BooleanProp keyExists "scala.repl.info"
+      val isDebug: Boolean = BooleanProp keyExists "scala.repl.debug"
+      val isTrace: Boolean = BooleanProp keyExists "scala.repl.trace"
+    }
+    private[repl] lazy val formatting: Formatting = new Formatting {
+      val prompt = Properties.shellPromptString
+    }
+
+    // NOTE: Exposed to repl package since used by SparkExprTyper and SparkILoop
+    private[repl] lazy val reporter: ConsoleReporter = new SparkIMain.ReplReporter(this)
+
+    /**
+     * Determines if errors were reported (typically during compilation).
+     *
+     * @note This is not for runtime errors
+     *
+     * @return True if had errors, otherwise false
+     */
+    @DeveloperApi
+    def isReportingErrors = reporter.hasErrors
+
+    import formatting._
+    import reporter.{ printMessage, withoutTruncating }
+
+    // This exists mostly because using the reporter too early leads to deadlock.
+    private def echo(msg: String) { Console println msg }
+    private def _initSources = List(new BatchSourceFile("<init>", "class $repl_$init { }"))
+    private def _initialize() = {
+      try {
+        // todo. if this crashes, REPL will hang
+        new _compiler.Run() compileSources _initSources
+        _initializeComplete = true
+        true
+      }
+      catch AbstractOrMissingHandler()
+    }
+    private def tquoted(s: String) = "\"\"\"" + s + "\"\"\""
+
+    // argument is a thunk to execute after init is done
+    // NOTE: Exposed to repl package since used by SparkILoop
+    private[repl] def initialize(postInitSignal: => Unit) {
+      synchronized {
+        if (_isInitialized == null) {
+          _isInitialized = io.spawn {
+            try _initialize()
+            finally postInitSignal
+          }
+        }
+      }
+    }
+
+    /**
+     * Initializes the underlying compiler/interpreter in a blocking fashion.
+     *
+     * @note Must be executed before using SparkIMain!
+     */
+    @DeveloperApi
+    def initializeSynchronous(): Unit = {
+      if (!isInitializeComplete) {
+        _initialize()
+        assert(global != null, global)
+      }
+    }
+    private def isInitializeComplete = _initializeComplete
+
+    /** the public, go through the future compiler */
+
+    /**
+     * The underlying compiler used to generate ASTs and execute code.
+     */
+    @DeveloperApi
+    lazy val global: Global = {
+      if (isInitializeComplete) _compiler
+      else {
+        // If init hasn't been called yet you're on your own.
+        if (_isInitialized == null) {
+          logWarning("Warning: compiler accessed before init set up.  Assuming no postInit code.")
+          initialize(())
+        }
+        //       // blocks until it is ; false means catastrophic failure
+        if (_isInitialized.get()) _compiler
+        else null
+      }
+    }
+    @deprecated("Use `global` for access to the compiler instance.", "2.9.0")
+    private lazy val compiler: global.type = global
+
+    import global._
+    import definitions.{ScalaPackage, JavaLangPackage, termMember, typeMember}
+    import rootMirror.{RootClass, getClassIfDefined, getModuleIfDefined, getRequiredModule, getRequiredClass}
+
+    private implicit class ReplTypeOps(tp: Type) {
+      def orElse(other: => Type): Type    = if (tp ne NoType) tp else other
+      def andAlso(fn: Type => Type): Type = if (tp eq NoType) tp else fn(tp)
+    }
+
+    // TODO: If we try to make naming a lazy val, we run into big time
+    // scalac unhappiness with what look like cycles.  It has not been easy to
+    // reduce, but name resolution clearly takes different paths.
+    // NOTE: Exposed to repl package since used by SparkExprTyper
+    private[repl] object naming extends {
+      val global: imain.global.type = imain.global
+    } with Naming {
+      // make sure we don't overwrite their unwisely named res3 etc.
+      def freshUserTermName(): TermName = {
+        val name = newTermName(freshUserVarName())
+        if (definedNameMap contains name) freshUserTermName()
+        else name
+      }
+      def isUserTermName(name: Name) = isUserVarName("" + name)
+      def isInternalTermName(name: Name) = isInternalVarName("" + name)
+    }
+    import naming._
+
+    // NOTE: Exposed to repl package since used by SparkILoop
+    private[repl] object deconstruct extends {
+      val global: imain.global.type = imain.global
+    } with StructuredTypeStrings
+
+    // NOTE: Exposed to repl package since used by SparkImports
+    private[repl] lazy val memberHandlers = new {
+      val intp: imain.type = imain
+    } with SparkMemberHandlers
+    import memberHandlers._
+
+    /**
+     * Suppresses overwriting print results during the operation.
+     *
+     * @param body The block to execute
+     * @tparam T The return type of the block
+     *
+     * @return The result from executing the block
+     */
+    @DeveloperApi
+    def beQuietDuring[T](body: => T): T = {
+      val saved = printResults
+      printResults = false
+      try body
+      finally printResults = saved
+    }
+
+    /**
+     * Completely masks all output during the operation (minus JVM standard
+     * out and error).
+     *
+     * @param operation The block to execute
+     * @tparam T The return type of the block
+     *
+     * @return The result from executing the block
+     */
+    @DeveloperApi
+    def beSilentDuring[T](operation: => T): T = {
+      val saved = totalSilence
+      totalSilence = true
+      try operation
+      finally totalSilence = saved
+    }
+
+    // NOTE: Exposed to repl package since used by SparkILoop
+    private[repl] def quietRun[T](code: String) = beQuietDuring(interpret(code))
+
+    private def logAndDiscard[T](label: String, alt: => T): PartialFunction[Throwable, T] = {
+      case t: ControlThrowable => throw t
+      case t: Throwable        =>
+        logDebug(label + ": " + unwrap(t))
+        logDebug(stackTraceString(unwrap(t)))
+      alt
+    }
+    /** takes AnyRef because it may be binding a Throwable or an Exceptional */
+
+    private def withLastExceptionLock[T](body: => T, alt: => T): T = {
+      assert(bindExceptions, "withLastExceptionLock called incorrectly.")
+      bindExceptions = false
+
+      try     beQuietDuring(body)
+      catch   logAndDiscard("withLastExceptionLock", alt)
+      finally bindExceptions = true
+    }
+
+    /**
+     * Contains the code (in string form) representing a wrapper around all
+     * code executed by this instance.
+     *
+     * @return The wrapper code as a string
+     */
+    @DeveloperApi
+    def executionWrapper = _executionWrapper
+
+    /**
+     * Sets the code to use as a wrapper around all code executed by this
+     * instance.
+     *
+     * @param code The wrapper code as a string
+     */
+    @DeveloperApi
+    def setExecutionWrapper(code: String) = _executionWrapper = code
+
+    /**
+     * Clears the code used as a wrapper around all code executed by
+     * this instance.
+     */
+    @DeveloperApi
+    def clearExecutionWrapper() = _executionWrapper = ""
+
+    /** interpreter settings */
+    private lazy val isettings = new SparkISettings(this)
+
+    /**
+     * Instantiates a new compiler used by SparkIMain. Overridable to provide
+     * own instance of a compiler.
+     *
+     * @param settings The settings to provide the compiler
+     * @param reporter The reporter to use for compiler output
+     *
+     * @return The compiler as a Global
+     */
+    @DeveloperApi
+    protected def newCompiler(settings: Settings, reporter: Reporter): ReplGlobal = {
+      settings.outputDirs setSingleOutput virtualDirectory
+      settings.exposeEmptyPackage.value = true
+      new Global(settings, reporter) with ReplGlobal {
+        override def toString: String = "<global>"
+      }
+    }
+
+    /**
+     * Adds any specified jars to the compile and runtime classpaths.
+     *
+     * @note Currently only supports jars, not directories
+     * @param urls The list of items to add to the compile and runtime classpaths
+     */
+    @DeveloperApi
+    def addUrlsToClassPath(urls: URL*): Unit = {
+      new Run // Needed to force initialization of "something" to correctly load Scala classes from jars
+      urls.foreach(_runtimeClassLoader.addNewUrl) // Add jars/classes to runtime for execution
+      updateCompilerClassPath(urls: _*)           // Add jars/classes to compile time for compiling
+    }
+
+    private def updateCompilerClassPath(urls: URL*): Unit = {
+      require(!global.forMSIL) // Only support JavaPlatform
+
+      val platform = global.platform.asInstanceOf[JavaPlatform]
+
+      val newClassPath = mergeUrlsIntoClassPath(platform, urls: _*)
+
+      // NOTE: Must use reflection until this is exposed/fixed upstream in Scala
+      val fieldSetter = platform.getClass.getMethods
+        .find(_.getName.endsWith("currentClassPath_$eq")).get
+      fieldSetter.invoke(platform, Some(newClassPath))
+
+      // Reload all jars specified into our compiler
+      global.invalidateClassPathEntries(urls.map(_.getPath): _*)
+    }
+
+    private def mergeUrlsIntoClassPath(platform: JavaPlatform, urls: URL*): MergedClassPath[AbstractFile] = {
+      // Collect our new jars/directories and add them to the existing set of classpaths
+      val allClassPaths = (
+        platform.classPath.asInstanceOf[MergedClassPath[AbstractFile]].entries ++
+        urls.map(url => {
+          platform.classPath.context.newClassPath(
+            if (url.getProtocol == "file") {
+              val f = new File(url.getPath)
+              if (f.isDirectory)
+                io.AbstractFile.getDirectory(f)
+              else
+                io.AbstractFile.getFile(f)
+            } else {
+              io.AbstractFile.getURL(url)
+            }
+          )
+        })
+      ).distinct
+
+      // Combine all of our classpaths (old and new) into one merged classpath
+      new MergedClassPath(allClassPaths, platform.classPath.context)
+    }
+
+    /**
+     * Represents the parent classloader used by this instance. Can be
+     * overridden to provide alternative classloader.
+     *
+     * @return The classloader used as the parent loader of this instance
+     */
+    @DeveloperApi
+    protected def parentClassLoader: ClassLoader =
+      SparkHelper.explicitParentLoader(settings).getOrElse( this.getClass.getClassLoader() )
+
+    /* A single class loader is used for all commands interpreted by this Interpreter.
+     It would also be possible to create a new class loader for each command
+     to interpret.  The advantages of the current approach are:
+
+    - Expressions are only evaluated one time.  This is especially
+    significant for I/O, e.g. "val x = Console.readLine"
+
+    The main disadvantage is:
+
+    - Objects, classes, and methods cannot be rebound.  Instead, definitions
+    shadow the old ones, and old code objects refer to the old
+    definitions.
+    */
+    private def resetClassLoader() = {
+      logDebug("Setting new classloader: was " + _classLoader)
+      _classLoader = null
+      ensureClassLoader()
+    }
+    private final def ensureClassLoader() {
+      if (_classLoader == null)
+        _classLoader = makeClassLoader()
+    }
+
+    // NOTE: Exposed to repl package since used by SparkILoop
+    private[repl] def classLoader: AbstractFileClassLoader = {
+      ensureClassLoader()
+      _classLoader
+    }
+    private class TranslatingClassLoader(parent: ClassLoader) extends AbstractFileClassLoader(virtualDirectory, parent) {
+      /** Overridden here to try translating a simple name to the generated
+       *  class name if the original attempt fails.  This method is used by
+       *  getResourceAsStream as well as findClass.
+       */
+      override protected def findAbstractFile(name: String): AbstractFile = {
+        super.findAbstractFile(name) match {
+          // deadlocks on startup if we try to translate names too early
+          case null if isInitializeComplete =>
+            generatedName(name) map (x => super.findAbstractFile(x)) orNull
+          case file                         =>
+            file
+        }
+      }
+    }
+    private def makeClassLoader(): AbstractFileClassLoader =
+      new TranslatingClassLoader(parentClassLoader match {
+        case null   => ScalaClassLoader fromURLs compilerClasspath
+        case p      =>
+          _runtimeClassLoader = new URLClassLoader(compilerClasspath, p) with ExposeAddUrl
+          _runtimeClassLoader
+      })
+
+    private def getInterpreterClassLoader() = classLoader
+
+    // Set the current Java "context" class loader to this interpreter's class loader
+    // NOTE: Exposed to repl package since used by SparkILoopInit
+    private[repl] def setContextClassLoader() = classLoader.setAsContext()
+
+    /**
+     * Returns the real name of a class based on its repl-defined name.
+     *
+     * ==Example==
+     * Given a simple repl-defined name, returns the real name of
+     * the class representing it, e.g. for "Bippy" it may return
+     * {{{
+     *     $line19.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$Bippy
+     * }}}
+     *
+     * @param simpleName The repl-defined name whose real name to retrieve
+     *
+     * @return Some real name if the simple name exists, else None
+     */
+    @DeveloperApi
+    def generatedName(simpleName: String): Option[String] = {
+      if (simpleName endsWith nme.MODULE_SUFFIX_STRING) optFlatName(simpleName.init) map (_ + nme.MODULE_SUFFIX_STRING)
+      else optFlatName(simpleName)
+    }
+
+    // NOTE: Exposed to repl package since used by SparkILoop
+    private[repl] def flatName(id: String)    = optFlatName(id) getOrElse id
+    // NOTE: Exposed to repl package since used by SparkILoop
+    private[repl] def optFlatName(id: String) = requestForIdent(id) map (_ fullFlatName id)
+
+    /**
+     * Retrieves all simple names contained in the current instance.
+     *
+     * @return A list of sorted names
+     */
+    @DeveloperApi
+    def allDefinedNames = definedNameMap.keys.toList.sorted
+
+    private def pathToType(id: String): String = pathToName(newTypeName(id))
+    // NOTE: Exposed to repl package since used by SparkILoop
+    private[repl] def pathToTerm(id: String): String = pathToName(newTermName(id))
+
+    /**
+     * Retrieves the full code path to access the specified simple name
+     * content.
+     *
+     * @param name The simple name of the target whose path to determine
+     *
+     * @return The full path used to access the specified target (name)
+     */
+    @DeveloperApi
+    def pathToName(name: Name): String = {
+      if (definedNameMap contains name)
+        definedNameMap(name) fullPath name
+      else name.toString
+    }
+
+    /** Most recent tree handled which wasn't wholly synthetic. */
+    private def mostRecentlyHandledTree: Option[Tree] = {
+      prevRequests.reverse foreach { req =>
+        req.handlers.reverse foreach {
+          case x: MemberDefHandler if x.definesValue && !isInternalTermName(x.name) => return Some(x.member)
+          case _ => ()
+        }
+      }
+      None
+    }
+
+    /** Stubs for work in progress. */
+    private def handleTypeRedefinition(name: TypeName, old: Request, req: Request) = {
+      for (t1 <- old.simpleNameOfType(name) ; t2 <- req.simpleNameOfType(name)) {
+        logDebug("Redefining type '%s'\n  %s -> %s".format(name, t1, t2))
+      }
+    }
+
+    private def handleTermRedefinition(name: TermName, old: Request, req: Request) = {
+      for (t1 <- old.compilerTypeOf get name ; t2 <- req.compilerTypeOf get name) {
+    //    Printing the types here has a tendency to cause assertion errors, like
+        //   assertion failed: fatal: <refinement> has owner value x, but a class owner is required
+        // so DBG is by-name now to keep it in the family.  (It also traps the assertion error,
+        // but we don't want to unnecessarily risk hosing the compiler's internal state.)
+        logDebug("Redefining term '%s'\n  %s -> %s".format(name, t1, t2))
+      }
+    }
+
+    private def recordRequest(req: Request) {
+      if (req == null || referencedNameMap == null)
+        return
+
+      prevRequests += req
+      req.referencedNames foreach (x => referencedNameMap(x) = req)
+
+      // warning about serially defining companions.  It'd be easy
+      // enough to just redefine them together but that may not always
+      // be what people want so I'm waiting until I can do it better.
+      for {
+        name   <- req.definedNames filterNot (x => req.definedNames contains x.companionName)
+        oldReq <- definedNameMap get name.companionName
+        newSym <- req.definedSymbols get name
+        oldSym <- oldReq.definedSymbols get name.companionName
+        if Seq(oldSym, newSym).permutations exists { case Seq(s1, s2) => s1.isClass && s2.isModule }
+      } {
+        afterTyper(replwarn(s"warning: previously defined $oldSym is not a companion to $newSym."))
+        replwarn("Companions must be defined together; you may wish to use :paste mode for this.")
+      }
+
+      // Updating the defined name map
+      req.definedNames foreach { name =>
+        if (definedNameMap contains name) {
+          if (name.isTypeName) handleTypeRedefinition(name.toTypeName, definedNameMap(name), req)
+          else handleTermRedefinition(name.toTermName, definedNameMap(name), req)
+        }
+         definedNameMap(name) = req
+      }
+    }
+
+    private def replwarn(msg: => String) {
+      if (!settings.nowarnings.value)
+        printMessage(msg)
+    }
+
+    private def isParseable(line: String): Boolean = {
+      beSilentDuring {
+        try parse(line) match {
+          case Some(xs) => xs.nonEmpty  // parses as-is
+          case None     => true         // incomplete
+        }
+        catch { case x: Exception =>    // crashed the compiler
+          replwarn("Exception in isParseable(\"" + line + "\"): " + x)
+           false
+         }
+      }
+    }
+
+    private def compileSourcesKeepingRun(sources: SourceFile*) = {
+      val run = new Run()
+      reporter.reset()
+      run compileSources sources.toList
+      (!reporter.hasErrors, run)
+    }
+
+    /**
+     * Compiles specified source files.
+     *
+     * @param sources The sequence of source files to compile
+     *
+     * @return True if successful, otherwise false
+     */
+    @DeveloperApi
+    def compileSources(sources: SourceFile*): Boolean =
+      compileSourcesKeepingRun(sources: _*)._1
+
+    /**
+     * Compiles a string of code.
+     *
+     * @param code The string of code to compile
+     *
+     * @return True if successful, otherwise false
+     */
+    @DeveloperApi
+    def compileString(code: String): Boolean =
+      compileSources(new BatchSourceFile("<script>", code))
+
+    /** Build a request from the user. `trees` is `line` after being parsed.
+     */
+    private def buildRequest(line: String, trees: List[Tree]): Request = {
+      executingRequest = new Request(line, trees)
+      executingRequest
+    }
+
+  // rewriting "5 // foo" to "val x = { 5 // foo }" creates broken code because
+  // the close brace is commented out.  Strip single-line comments.
+  // ... but for error message output reasons this is not used, and rather than
+  // enclosing in braces it is constructed like "val x =\n5 // foo".
+  private def removeComments(line: String): String = {
+    showCodeIfDebugging(line) // as we're about to lose our // show
+    line.lines map (s => s indexOf "//" match {
+      case -1   => s
+      case idx  => s take idx
+    }) mkString "\n"
+  }
+
+  private def safePos(t: Tree, alt: Int): Int =
+    try t.pos.startOrPoint
+    catch { case _: UnsupportedOperationException => alt }
+
+  // Given an expression like 10 * 10 * 10 we receive the parent tree positioned
+  // at a '*'.  So look at each subtree and find the earliest of all positions.
+  private def earliestPosition(tree: Tree): Int = {
+    var pos = Int.MaxValue
+    tree foreach { t =>
+      pos = math.min(pos, safePos(t, Int.MaxValue))
+    }
+    pos
+  }
+
+
+  private def requestFromLine(line: String, synthetic: Boolean): Either[IR.Result, Request] = {
+    val content = indentCode(line)
+    val trees = parse(content) match {
+      case None         => return Left(IR.Incomplete)
+      case Some(Nil)    => return Left(IR.Error) // parse error or empty input
+      case Some(trees)  => trees
+    }
+    logDebug(
+      trees map (t => {
+        // [Eugene to Paul] previously it just said `t map ...`
+        // because there was an implicit conversion from Tree to a list of Trees
+        // however Martin and I have removed the conversion
+        // (it was conflicting with the new reflection API),
+        // so I had to rewrite this a bit
+        val subs = t collect { case sub => sub }
+        subs map (t0 =>
+          "  " + safePos(t0, -1) + ": " + t0.shortClass + "\n"
+                ) mkString ""
+      }) mkString "\n"
+    )
+    // If the last tree is a bare expression, pinpoint where it begins using the
+    // AST node position and snap the line off there.  Rewrite the code embodied
+    // by the last tree as a ValDef instead, so we can access the value.
+    trees.last match {
+      case _:Assign                        => // we don't want to include assignments
+        case _:TermTree | _:Ident | _:Select => // ... but do want other unnamed terms.
+          val varName  = if (synthetic) freshInternalVarName() else freshUserVarName()
+      val rewrittenLine = (
+        // In theory this would come out the same without the 1-specific test, but
+        // it's a cushion against any more sneaky parse-tree position vs. code mismatches:
+        // this way such issues will only arise on multiple-statement repl input lines,
+        // which most people don't use.
+        if (trees.size == 1) "val " + varName + " =\n" + content
+        else {
+          // The position of the last tree
+          val lastpos0 = earliestPosition(trees.last)
+          // Oh boy, the parser throws away parens so "(2+2)" is mispositioned,
+          // with increasingly hard to decipher positions as we move on to "() => 5",
+          // (x: Int) => x + 1, and more.  So I abandon attempts to finesse and just
+          // look for semicolons and newlines, which I'm sure is also buggy.
+          val (raw1, raw2) = content splitAt lastpos0
+          logDebug("[raw] " + raw1 + "   <--->   " + raw2)
+
+          val adjustment = (raw1.reverse takeWhile (ch => (ch != ';') && (ch != '\n'))).size
+          val lastpos = lastpos0 - adjustment
+
+          // the source code split at the laboriously determined position.
+          val (l1, l2) = content splitAt lastpos
+          logDebug("[adj] " + l1 + "   <--->   " + l2)
+
+          val prefix   = if (l1.trim == "") "" else l1 + ";\n"
+          // Note to self: val source needs to have this precise structure so that
+          // error messages print the user-submitted part without the "val res0 = " part.
+          val combined   = prefix + "val " + varName + " =\n" + l2
+
+          logDebug(List(
+            "    line" -> line,
+            " content" -> content,
+            "     was" -> l2,
+            "combined" -> combined) map {
+              case (label, s) => label + ": '" + s + "'"
+            } mkString "\n"
+          )
+          combined
+        }
+      )
+        // Rewriting    "foo ; bar ; 123"
+        // to           "foo ; bar ; val resXX = 123"
+        requestFromLine(rewrittenLine, synthetic) match {
+          case Right(req) => return Right(req withOriginalLine line)
+          case x          => return x
+        }
+      case _ =>
+    }
+    Right(buildRequest(line, trees))
+  }
+
+  // normalize non-public types so we don't see protected aliases like Self
+  private def normalizeNonPublic(tp: Type) = tp match {
+    case TypeRef(_, sym, _) if sym.isAliasType && !sym.isPublic => tp.dealias
+    case _                                                      => tp
+  }
+
+  /**
+   * Interpret one line of input. All feedback, including parse errors
+   * and evaluation results, are printed via the supplied compiler's
+   * reporter. Values defined are available for future interpreted strings.
+   *
+   * @note This assigns variables with user name structure like "res0"
+   *
+   * @param line The line representing the code to interpret
+   *
+   * @return Whether the line was interpreted successfully, or failed due to
+   *         incomplete code, compilation error, or runtime error
+   */
+  @DeveloperApi
+  def interpret(line: String): IR.Result = interpret(line, false)
+
+  /**
+   * Interpret one line of input. All feedback, including parse errors
+   * and evaluation results, are printed via the supplied compiler's
+   * reporter. Values defined are available for future interpreted strings.
+   *
+   * @note This assigns variables with synthetic (generated) name structure
+   *       like "$ires0"
+   *
+   * @param line The line representing the code to interpret
+   *
+   * @return Whether the line was interpreted successfully, or failed due to
+   *         incomplete code, compilation error, or runtime error
+   */
+  @DeveloperApi
+  def interpretSynthetic(line: String): IR.Result = interpret(line, true)
+
+  private def interpret(line: String, synthetic: Boolean): IR.Result = {
+    def loadAndRunReq(req: Request) = {
+      classLoader.setAsContext()
+      val (result, succeeded) = req.loadAndRun
+
+      /** To our displeasure, ConsoleReporter offers only printMessage,
+       *  which tacks a newline on the end.  Since that breaks all the
+       *  output checking, we have to take one off to balance.
+       */
+      if (succeeded) {
+        if (printResults && result != "")
+          printMessage(result stripSuffix "\n")
+        else if (isReplDebug) // show quiet-mode activity
+          printMessage(result.trim.lines map ("[quiet] " + _) mkString "\n")
+
+        // Book-keeping.  Have to record synthetic requests too,
+        // as they may have been issued for information, e.g. :type
+        recordRequest(req)
+        IR.Success
+      }
+        else {
+          // don't truncate stack traces
+          withoutTruncating(printMessage(result))
+          IR.Error
+        }
+    }
+
+    if (global == null) IR.Error
+    else requestFromLine(line, synthetic) match {
+      case Left(result) => result
+      case Right(req)   =>
+        // null indicates a disallowed statement type; otherwise compile and
+        // fail if false (implying e.g. a type error)
+        if (req == null || !req.compile) IR.Error
+        else loadAndRunReq(req)
+    }
+  }
+
+  /**
+   * Bind a specified name to a specified value.  The name may
+   * later be used by expressions passed to interpret.
+   *
+   * @note This binds via compilation and interpretation
+   *
+   * @param name The variable name to bind
+   * @param boundType The type of the variable, as a string
+   * @param value The object value to bind to it
+   *
+   * @return An indication of whether the binding succeeded or failed
+   *         using interpreter results
+   */
+  @DeveloperApi
+  def bind(name: String, boundType: String, value: Any, modifiers: List[String] = Nil): IR.Result = {
+    val bindRep = new ReadEvalPrint()
+    val run = bindRep.compile("""
+                              |object %s {
+                                |  var value: %s = _
+                              |  def set(x: Any) = value = x.asInstanceOf[%s]
+                              |}
+                              """.stripMargin.format(bindRep.evalName, boundType, boundType)
+                            )
+    bindRep.callEither("set", value) match {
+      case Left(ex) =>
+        logDebug("Set failed in bind(%s, %s, %s)".format(name, boundType, value))
+        logDebug(util.stackTraceString(ex))
+        IR.Error
+
+      case Right(_) =>
+        val line = "%sval %s = %s.value".format(modifiers map (_ + " ") mkString, name, bindRep.evalPath)
+      logDebug("Interpreting: " + line)
+      interpret(line)
+    }
+  }
+
+  /**
+   * Bind a specified name to a specified value directly.
+   *
+   * @note This updates internal bound names directly
+   *
+   * @param name The variable name to bind
+   * @param boundType The type of the variable, as a string
+   * @param value The object value to bind to it
+   *
+   * @return An indication of whether the binding succeeded or failed
+   *         using interpreter results
+   */
+  @DeveloperApi
+  def directBind(name: String, boundType: String, value: Any): IR.Result = {
+    val result = bind(name, boundType, value)
+    if (result == IR.Success)
+      directlyBoundNames += newTermName(name)
+    result
+  }
+
+  private def directBind(p: NamedParam): IR.Result                                    = directBind(p.name, p.tpe, p.value)
+  private def directBind[T: ru.TypeTag : ClassTag](name: String, value: T): IR.Result = directBind((name, value))
+
+  /**
+   * Overwrites previously-bound val with a new instance.
+   *
+   * @param p The named parameters used to provide the name, value, and type
+   *
+   * @return The results of rebinding the named val
+   */
+  @DeveloperApi
+  def rebind(p: NamedParam): IR.Result = {
+    val name     = p.name
+    val oldType  = typeOfTerm(name) orElse { return IR.Error }
+    val newType  = p.tpe
+    val tempName = freshInternalVarName()
+
+    quietRun("val %s = %s".format(tempName, name))
+    quietRun("val %s = %s.asInstanceOf[%s]".format(name, tempName, newType))
+  }
+  private def quietImport(ids: String*): IR.Result = beQuietDuring(addImports(ids: _*))
+
+  /**
+   * Executes an import statement per "id" provided
+   *
+   * @example addImports("org.apache.spark.SparkContext")
+   *
+   * @param ids The series of "id" strings used for import statements
+   *
+   * @return The results of importing the series of "id" strings
+   */
+  @DeveloperApi
+  def addImports(ids: String*): IR.Result =
+    if (ids.isEmpty) IR.Success
+    else interpret("import " + ids.mkString(", "))
+
+  // NOTE: Exposed to repl package since used by SparkILoop
+  private[repl] def quietBind(p: NamedParam): IR.Result                               = beQuietDuring(bind(p))
+  private def bind(p: NamedParam): IR.Result                                    = bind(p.name, p.tpe, p.value)
+  private def bind[T: ru.TypeTag : ClassTag](name: String, value: T): IR.Result = bind((name, value))
+  private def bindSyntheticValue(x: Any): IR.Result                             = bindValue(freshInternalVarName(), x)
+  private def bindValue(x: Any): IR.Result                                      = bindValue(freshUserVarName(), x)
+  private def bindValue(name: String, x: Any): IR.Result                        = bind(name, TypeStrings.fromValue(x), x)
+
+  /**
+   * Reset this interpreter, forgetting all user-specified requests.
+   */
+  @DeveloperApi
+  def reset() {
+    clearExecutionWrapper()
+    resetClassLoader()
+    resetAllCreators()
+    prevRequests.clear()
+    referencedNameMap.clear()
+    definedNameMap.clear()
+    virtualDirectory.delete()
+    virtualDirectory.create()
+  }
+
+  /**
+   * Stops the underlying REPL class server and flushes the reporter used
+   * for compiler output.
+   */
+  @DeveloperApi
+  def close() {
+    reporter.flush()
+    classServer.stop()
+  }
+
+  /**
+   * Captures the session names (which are set by system properties) once, instead of for each line.
+   */
+  @DeveloperApi
+  object FixedSessionNames {
+    val lineName    = sessionNames.line
+    val readName    = sessionNames.read
+    val evalName    = sessionNames.eval
+    val printName   = sessionNames.print
+    val resultName  = sessionNames.result
+  }
+
+  /** Here is where we:
+   *
+   *  1) Read some source code, and put it in the "read" object.
+   *  2) Evaluate the read object, and put the result in the "eval" object.
+   *  3) Create a String for human consumption, and put it in the "print" object.
+   *
+   *  Read! Eval! Print! Some of that not yet centralized here.
+   */
+  class ReadEvalPrint(val lineId: Int) {
+    def this() = this(freshLineId())
+
+    private var lastRun: Run = _
+    private var evalCaught: Option[Throwable] = None
+    private var conditionalWarnings: List[ConditionalWarning] = Nil
+
+    val packageName = FixedSessionNames.lineName + lineId
+    val readName    = FixedSessionNames.readName
+    val evalName    = FixedSessionNames.evalName
+    val printName   = FixedSessionNames.printName
+    val resultName  = FixedSessionNames.resultName
+
+    def bindError(t: Throwable) = {
+      // Immediately throw the exception if we are asked to propagate them
+      if (propagateExceptions) {
+        throw unwrap(t)
+      }
+      if (!bindExceptions) // avoid looping if already binding
+        throw t
+
+      val unwrapped = unwrap(t)
+      withLastExceptionLock[String]({
+        directBind[Throwable]("lastException", unwrapped)(tagOfThrowable, classTag[Throwable])
+        util.stackTraceString(unwrapped)
+      }, util.stackTraceString(unwrapped))
+    }
+
+    // TODO: split it out into a package object and a regular
+    // object and we can do that much less wrapping.
+    def packageDecl = "package " + packageName
+
+    def pathTo(name: String)   = packageName + "." + name
+    def packaged(code: String) = packageDecl + "\n\n" + code
+
+    def readPath  = pathTo(readName)
+    def evalPath  = pathTo(evalName)
+    def printPath = pathTo(printName)
+
+    def call(name: String, args: Any*): AnyRef = {
+      val m = evalMethod(name)
+      logDebug("Invoking: " + m)
+      if (args.nonEmpty)
+        logDebug("  with args: " + args.mkString(", "))
+
+      m.invoke(evalClass, args.map(_.asInstanceOf[AnyRef]): _*)
+    }
+
+    def callEither(name: String, args: Any*): Either[Throwable, AnyRef] =
+      try Right(call(name, args: _*))
+    catch { case ex: Throwable => Left(ex) }
+
+    def callOpt(name: String, args: Any*): Option[AnyRef] =
+      try Some(call(name, args: _*))
+    catch { case ex: Throwable => bindError(ex) ; None }
+
+    class EvalException(msg: String, cause: Throwable) extends RuntimeException(msg, cause) { }
+
+    private def evalError(path: String, ex: Throwable) =
+      throw new EvalException("Failed to load '" + path + "': " + ex.getMessage, ex)
+
+    private def load(path: String): Class[_] = {
+      try Class.forName(path, true, classLoader)
+      catch { case ex: Throwable => evalError(path, unwrap(ex)) }
+    }
+
+    lazy val evalClass = load(evalPath)
+    lazy val evalValue = callEither(resultName) match {
+      case Left(ex)      => evalCaught = Some(ex) ; None
+      case Right(result) => Some(result)
+    }
+
+    def compile(source: String): Boolean = compileAndSaveRun("<console>", source)
+
+    /** The innermost object inside the wrapper, found by
+     * following accessPath into the outer one.
+     */
+    def resolvePathToSymbol(accessPath: String): Symbol = {
+      // val readRoot  = getRequiredModule(readPath)   // the outermost wrapper
+      // MATEI: Changed this to getClass because the root object is no longer a module (Scala singleton object)
+
+      val readRoot  = rootMirror.getClassByName(newTypeName(readPath))   // the outermost wrapper
+      (accessPath split '.').foldLeft(readRoot: Symbol) {
+        case (sym, "")    => sym
+        case (sym, name)  => afterTyper(termMember(sym, name))
+      }
+    }
+    /** We get a bunch of repeated warnings for reasons I haven't
+     *  entirely figured out yet.  For now, squash.
+     */
+    private def updateRecentWarnings(run: Run) {
+      def loop(xs: List[(Position, String)]): List[(Position, String)] = xs match {
+        case Nil                  => Nil
+        case ((pos, msg)) :: rest =>
+          val filtered = rest filter { case (pos0, msg0) =>
+            (msg != msg0) || (pos.lineContent.trim != pos0.lineContent.trim) || {
+              // same messages and same line content after whitespace removal
+              // but we want to let through multiple warnings on the same line
+              // from the same run.  The untrimmed line will be the same since
+              // there's no whitespace indenting blowing it.
+              (pos.lineContent == pos0.lineContent)
+            }
+                                    }
+        ((pos, msg)) :: loop(filtered)
+      }
+     // PRASHANT: This leads to a NoSuchMethodError for _.warnings. Yet to figure out its purpose.
+      // val warnings = loop(run.allConditionalWarnings flatMap (_.warnings))
+      // if (warnings.nonEmpty)
+      //   mostRecentWarnings = warnings
+    }
+    private def evalMethod(name: String) = evalClass.getMethods filter (_.getName == name) match {
+      case Array(method) => method
+      case xs            => sys.error("Internal error: eval object " + evalClass + ", " + xs.mkString("\n", "\n", ""))
+    }
+    private def compileAndSaveRun(label: String, code: String) = {
+      showCodeIfDebugging(code)
+      val (success, run) = compileSourcesKeepingRun(new BatchSourceFile(label, packaged(code)))
+      updateRecentWarnings(run)
+      lastRun = run
+      success
+    }
+  }
+
+  /** One line of code submitted by the user for interpretation */
+  // private
+  class Request(val line: String, val trees: List[Tree]) {
+    val reqId = nextReqId()
+    val lineRep = new ReadEvalPrint()
+
+    private var _originalLine: String = null
+    def withOriginalLine(s: String): this.type = { _originalLine = s ; this }
+    def originalLine = if (_originalLine == null) line else _originalLine
+
+    /** handlers for each tree in this request */
+    val handlers: List[MemberHandler] = trees map (memberHandlers chooseHandler _)
+    def defHandlers = handlers collect { case x: MemberDefHandler => x }
+
+    /** all (public) names defined by these statements */
+    val definedNames = handlers flatMap (_.definedNames)
+
+    /** list of names used by this expression */
+    val referencedNames: List[Name] = handlers flatMap (_.referencedNames)
+
+    /** def and val names */
+    def termNames = handlers flatMap (_.definesTerm)
+    def typeNames = handlers flatMap (_.definesType)
+    def definedOrImported = handlers flatMap (_.definedOrImported)
+    def definedSymbolList = defHandlers flatMap (_.definedSymbols)
+
+    def definedTypeSymbol(name: String) = definedSymbols(newTypeName(name))
+    def definedTermSymbol(name: String) = definedSymbols(newTermName(name))
+
+    val definedClasses = handlers.exists {
+      case _: ClassHandler => true
+      case _ => false
+    }
+
+    /** Code to import bound names from previous lines - accessPath is code to
+     * append to objectName to access anything bound by request.
+     */
+    val SparkComputedImports(importsPreamble, importsTrailer, accessPath) =
+      importsCode(referencedNames.toSet, definedClasses)
+
+    /** Code to access a variable with the specified name */
+    def fullPath(vname: String) = {
+      // lineRep.readPath + accessPath + ".`%s`".format(vname)
+      lineRep.readPath + ".INSTANCE" + accessPath + ".`%s`".format(vname)
+    }
+      /** Same as fullpath, but after it has been flattened, so:
+       *  $line5.$iw.$iw.$iw.Bippy      // fullPath
+       *  $line5.$iw$$iw$$iw$Bippy      // fullFlatName
+       */
+      def fullFlatName(name: String) =
+        // lineRep.readPath + accessPath.replace('.', '$') + nme.NAME_JOIN_STRING + name
+        lineRep.readPath + ".INSTANCE" + accessPath.replace('.', '$') + nme.NAME_JOIN_STRING + name
+
+    /** The unmangled symbol name, but supplemented with line info. */
+    def disambiguated(name: Name): String = name + " (in " + lineRep + ")"
+
+    /** Code to access a variable with the specified name */
+    def fullPath(vname: Name): String = fullPath(vname.toString)
+
+    /** the line of code to compute */
+    def toCompute = line
+
+    /** generate the source code for the object that computes this request */
+    private object ObjectSourceCode extends CodeAssembler[MemberHandler] {
+      def path = pathToTerm("$intp")
+      def envLines = {
+        if (!isReplPower) Nil // power mode only for now
+        // $intp is not bound; punt, but include the line.
+        else if (path == "$intp") List(
+          "def $line = " + tquoted(originalLine),
+          "def $trees = Nil"
+        )
+        else List(
+          "def $line  = " + tquoted(originalLine),
+          "def $req = %s.requestForReqId(%s).orNull".format(path, reqId),
+          "def $trees = if ($req eq null) Nil else $req.trees".format(lineRep.readName, path, reqId)
+        )
+      }
+
+      val preamble = """
+        |class %s extends Serializable {
+        |  %s%s%s
+      """.stripMargin.format(lineRep.readName, envLines.map("  " + _ + ";\n").mkString, importsPreamble, indentCode(toCompute))
+      val postamble = importsTrailer + "\n}" + "\n" +
+        "object " + lineRep.readName + " {\n" +
+        "  val INSTANCE = new " + lineRep.readName + "();\n" +
+        "}\n"
+      val generate = (m: MemberHandler) => m extraCodeToEvaluate Request.this
+
+      /*
+      val preamble = """
+        |object %s extends Serializable {
+        |%s%s%s
+      """.stripMargin.format(lineRep.readName, envLines.map("  " + _ + ";\n").mkString, importsPreamble, indentCode(toCompute))
+      val postamble = importsTrailer + "\n}"
+      val generate = (m: MemberHandler) => m extraCodeToEvaluate Request.this
+      */
+
+    }
+
+    private object ResultObjectSourceCode extends CodeAssembler[MemberHandler] {
+      /** We only want to generate this code when the result
+       *  is a value which can be referred to as-is.
+       */
+      val evalResult =
+        if (!handlers.last.definesValue) ""
+        else handlers.last.definesTerm match {
+          case Some(vname) if typeOf contains vname =>
+            "lazy val %s = %s".format(lineRep.resultName, fullPath(vname))
+          case _  => ""
+        }
+
+      // first line evaluates object to make sure constructor is run
+      // initial "" so later code can uniformly be: + etc
+      val preamble = """
+      |object %s {
+      |  %s
+      |  val %s: String = %s {
+      |    %s
+      |    (""
+      """.stripMargin.format(
+        lineRep.evalName, evalResult, lineRep.printName,
+        executionWrapper, lineRep.readName + ".INSTANCE" + accessPath
+      )
+      val postamble = """
+      |    )
+      |  }
+      |}
+      """.stripMargin
+      val generate = (m: MemberHandler) => m resultExtractionCode Request.this
+    }
+
+    // get it
+    def getEvalTyped[T] : Option[T] = getEval map (_.asInstanceOf[T])
+    def getEval: Option[AnyRef] = {
+      // ensure it has been compiled
+      compile
+      // try to load it and call the value method
+      lineRep.evalValue filterNot (_ == null)
+    }
+
+    /** Compile the object file.  Returns whether the compilation succeeded.
+     *  If all goes well, the "types" map is computed. */
+    lazy val compile: Boolean = {
+      // error counting is wrong, hence interpreter may overlook failure - so we reset
+      reporter.reset()
+
+      // compile the object containing the user's code
+      lineRep.compile(ObjectSourceCode(handlers)) && {
+        // extract and remember types
+        typeOf
+        typesOfDefinedTerms
+
+        // Assign symbols to the original trees
+        // TODO - just use the new trees.
+        defHandlers foreach { dh =>
+          val name = dh.member.name
+          definedSymbols get name foreach { sym =>
+            dh.member setSymbol sym
+           logDebug("Set symbol of " + name + " to " + sym.defString)
+          }
+        }
+
+        // compile the result-extraction object
+        withoutWarnings(lineRep compile ResultObjectSourceCode(handlers))
+      }
+    }
+
+    lazy val resultSymbol = lineRep.resolvePathToSymbol(accessPath)
+    def applyToResultMember[T](name: Name, f: Symbol => T) = afterTyper(f(resultSymbol.info.nonPrivateDecl(name)))
+
+    /* typeOf lookup with encoding */
+    def lookupTypeOf(name: Name) = typeOf.getOrElse(name, typeOf(global.encode(name.toString)))
+    def simpleNameOfType(name: TypeName) = (compilerTypeOf get name) map (_.typeSymbol.simpleName)
+
+    private def typeMap[T](f: Type => T) =
+      mapFrom[Name, Name, T](termNames ++ typeNames)(x => f(cleanMemberDecl(resultSymbol, x)))
+
+    /** Types of variables defined by this request. */
+    lazy val compilerTypeOf = typeMap[Type](x => x) withDefaultValue NoType
+    /** String representations of same. */
+    lazy val typeOf         = typeMap[String](tp => afterTyper(tp.toString))
+
+    // lazy val definedTypes: Map[Name, Type] = {
+    //   typeNames map (x => x -> afterTyper(resultSymbol.info.nonPrivateDecl(x).tpe)) toMap
+    // }
+    lazy val definedSymbols = (
+      termNames.map(x => x -> applyToResultMember(x, x => x)) ++
+      typeNames.map(x => x -> compilerTypeOf(x).typeSymbolDirect)
+    ).toMap[Name, Symbol] withDefaultValue NoSymbol
+
+    lazy val typesOfDefinedTerms = mapFrom[Name, Name, Type](termNames)(x => applyToResultMember(x, _.tpe))
+
+    /** load and run the code using reflection */
+    def loadAndRun: (String, Boolean) = {
+      try   { ("" + (lineRep call sessionNames.print), true) }
+      catch { case ex: Throwable => (lineRep.bindError(ex), false) }
+    }
+
+    override def toString = "Request(line=%s, %s trees)".format(line, trees.size)
+  }
+
+  /**
+   * Returns the name of the most recent interpreter result. Useful for
+   * for extracting information regarding the previous result.
+   *
+   * @return The simple name of the result (such as res0)
+   */
+  @DeveloperApi
+  def mostRecentVar: String =
+    if (mostRecentlyHandledTree.isEmpty) ""
+    else "" + (mostRecentlyHandledTree.get match {
+      case x: ValOrDefDef           => x.name
+      case Assign(Ident(name), _)   => name
+      case ModuleDef(_, name, _)    => name
+      case _                        => naming.mostRecentVar
+    })
+
+  private var mostRecentWarnings: List[(global.Position, String)] = Nil
+
+  /**
+   * Returns a list of recent warnings from compiler execution.
+   *
+   * @return The list of tuples (compiler position, warning)
+   */
+  @DeveloperApi
+  def lastWarnings = mostRecentWarnings
+
+  def treesForRequestId(id: Int): List[Tree] =
+    requestForReqId(id).toList flatMap (_.trees)
+
+  def requestForReqId(id: Int): Option[Request] =
+    if (executingRequest != null && executingRequest.reqId == id) Some(executingRequest)
+    else prevRequests find (_.reqId == id)
+
+  def requestForName(name: Name): Option[Request] = {
+    assert(definedNameMap != null, "definedNameMap is null")
+    definedNameMap get name
+  }
+
+  def requestForIdent(line: String): Option[Request] =
+    requestForName(newTermName(line)) orElse requestForName(newTypeName(line))
+
+  def requestHistoryForName(name: Name): List[Request] =
+    prevRequests.toList.reverse filter (_.definedNames contains name)
+
+
+  def definitionForName(name: Name): Option[MemberHandler] =
+    requestForName(name) flatMap { req =>
+      req.handlers find (_.definedNames contains name)
+    }
+
+  /**
+   * Retrieves the object representing the id (variable name, method name,
+   * class name, etc) provided.
+   *
+   * @param id The id (variable name, method name, class name, etc) whose
+   *           associated content to retrieve
+   *
+   * @return Some containing term name (id) representation if exists, else None
+   */
+  @DeveloperApi
+  def valueOfTerm(id: String): Option[AnyRef] =
+    requestForName(newTermName(id)) flatMap (_.getEval)
+
+  /**
+   * Retrieves the class representing the id (variable name, method name,
+   * class name, etc) provided.
+   *
+   * @param id The id (variable name, method name, class name, etc) whose
+   *           associated class to retrieve
+   *
+   * @return Some containing term name (id) class if exists, else None
+   */
+  @DeveloperApi
+  def classOfTerm(id: String): Option[JClass] =
+    valueOfTerm(id) map (_.getClass)
+
+  /**
+   * Retrieves the type representing the id (variable name, method name,
+   * class name, etc) provided.
+   *
+   * @param id The id (variable name, method name, class name, etc) whose
+   *           associated type to retrieve
+   *
+   * @return The Type information about the term name (id) provided
+   */
+  @DeveloperApi
+  def typeOfTerm(id: String): Type = newTermName(id) match {
+    case nme.ROOTPKG  => RootClass.tpe
+    case name         => requestForName(name).fold(NoType: Type)(_ compilerTypeOf name)
+  }
+
+  /**
+   * Retrieves the symbol representing the id (variable name, method name,
+   * class name, etc) provided.
+   *
+   * @param id The id (variable name, method name, class name, etc) whose
+   *           associated symbol to retrieve
+   *
+   * @return The Symbol information about the term name (id) provided
+   */
+  @DeveloperApi
+  def symbolOfTerm(id: String): Symbol =
+    requestForIdent(newTermName(id)).fold(NoSymbol: Symbol)(_ definedTermSymbol id)
+
+  // TODO: No use yet, but could be exposed as a DeveloperApi
+  private def symbolOfType(id: String): Symbol =
+    requestForName(newTypeName(id)).fold(NoSymbol: Symbol)(_ definedTypeSymbol id)
+
+  /**
+   * Retrieves the runtime class and type representing the id (variable name,
+   * method name, class name, etc) provided.
+   *
+   * @param id The id (variable name, method name, class name, etc) whose
+   *           associated runtime class and type to retrieve
+   *
+   * @return Some runtime class and Type information as a tuple for the
+   *         provided term name if it exists, else None
+   */
+  @DeveloperApi
+  def runtimeClassAndTypeOfTerm(id: String): Option[(JClass, Type)] = {
+    classOfTerm(id) flatMap { clazz =>
+      new RichClass(clazz).supers find(c => !(new RichClass(c).isScalaAnonymous)) map { nonAnon =>
+        (nonAnon, runtimeTypeOfTerm(id))
+      }
+    }
+  }
+
+  /**
+   * Retrieves the runtime type representing the id (variable name,
+   * method name, class name, etc) provided.
+   *
+   * @param id The id (variable name, method name, class name, etc) whose
+   *           associated runtime type to retrieve
+   *
+   * @return The runtime Type information about the term name (id) provided
+   */
+  @DeveloperApi
+  def runtimeTypeOfTerm(id: String): Type = {
+    typeOfTerm(id) andAlso { tpe =>
+      val clazz      = classOfTerm(id) getOrElse { return NoType }
+      val staticSym  = tpe.typeSymbol
+      val runtimeSym = getClassIfDefined(clazz.getName)
+
+      if ((runtimeSym != NoSymbol) && (runtimeSym != staticSym) && (runtimeSym isSubClass staticSym))
+        runtimeSym.info
+      else NoType
+    }
+  }
+
+  private def cleanMemberDecl(owner: Symbol, member: Name): Type = afterTyper {
+    normalizeNonPublic {
+      owner.info.nonPrivateDecl(member).tpe match {
+        case NullaryMethodType(tp) => tp
+        case tp                    => tp
+      }
+    }
+  }
+
+  private object exprTyper extends {
+    val repl: SparkIMain.this.type = imain
+  } with SparkExprTyper { }
+
+  /**
+   * Constructs a list of abstract syntax trees representing the provided code.
+   *
+   * @param line The line of code to parse and construct into ASTs
+   *
+   * @return Some list of ASTs if the line is valid, else None
+   */
+  @DeveloperApi
+  def parse(line: String): Option[List[Tree]] = exprTyper.parse(line)
+
+  /**
+   * Constructs a Symbol representing the final result of the expression
+   * provided or representing the definition provided.
+   *
+   * @param code The line of code
+   *
+   * @return The Symbol or NoSymbol (found under scala.reflect.internal)
+   */
+  @DeveloperApi
+  def symbolOfLine(code: String): Symbol =
+    exprTyper.symbolOfLine(code)
+
+  /**
+   * Constucts type information based on the provided expression's final
+   * result or the definition provided.
+   *
+   * @param expr The expression or definition
+   *
+   * @param silent Whether to output information while constructing the type
+   *
+   * @return The type information or an error
+   */
+  @DeveloperApi
+  def typeOfExpression(expr: String, silent: Boolean = true): Type =
+    exprTyper.typeOfExpression(expr, silent)
+
+  protected def onlyTerms(xs: List[Name]) = xs collect { case x: TermName => x }
+  protected def onlyTypes(xs: List[Name]) = xs collect { case x: TypeName => x }
+
+  /**
+   * Retrieves the defined, public names in the compiler.
+   *
+   * @return The list of matching "term" names
+   */
+  @DeveloperApi
+  def definedTerms      = onlyTerms(allDefinedNames) filterNot isInternalTermName
+
+  /**
+   * Retrieves the defined type names in the compiler.
+   *
+   * @return The list of matching type names
+   */
+  @DeveloperApi
+  def definedTypes      = onlyTypes(allDefinedNames)
+
+  /**
+   * Retrieves the defined symbols in the compiler.
+   *
+   * @return The set of matching Symbol instances
+   */
+  @DeveloperApi
+  def definedSymbols    = prevRequestList.flatMap(_.definedSymbols.values).toSet[Symbol]
+
+  /**
+   * Retrieves the list of public symbols in the compiler.
+   *
+   * @return The list of public Symbol instances
+   */
+  @DeveloperApi
+  def definedSymbolList = prevRequestList flatMap (_.definedSymbolList) filterNot (s => isInternalTermName(s.name))
+
+  // Terms with user-given names (i.e. not res0 and not synthetic)
+
+  /**
+   * Retrieves defined, public names that are not res0 or the result of a direct bind.
+   *
+   * @return The list of matching "term" names
+   */
+  @DeveloperApi
+  def namedDefinedTerms = definedTerms filterNot (x => isUserVarName("" + x) || directlyBoundNames(x))
+
+  private def findName(name: Name) = definedSymbols find (_.name == name) getOrElse NoSymbol
+
+  /** Translate a repl-defined identifier into a Symbol.
+   */
+  private def apply(name: String): Symbol =
+    types(name) orElse terms(name)
+
+  private def types(name: String): Symbol = {
+    val tpname = newTypeName(name)
+    findName(tpname) orElse getClassIfDefined(tpname)
+  }
+  private def terms(name: String): Symbol = {
+    val termname = newTypeName(name)
+    findName(termname) orElse getModuleIfDefined(termname)
+  }
+  // [Eugene to Paul] possibly you could make use of TypeTags here
+  private def types[T: ClassTag] : Symbol = types(classTag[T].runtimeClass.getName)
+  private def terms[T: ClassTag] : Symbol = terms(classTag[T].runtimeClass.getName)
+  private def apply[T: ClassTag] : Symbol = apply(classTag[T].runtimeClass.getName)
+
+  /**
+   * Retrieves the Symbols representing classes in the compiler.
+   *
+   * @return The list of matching ClassSymbol instances
+   */
+  @DeveloperApi
+  def classSymbols  = allDefSymbols collect { case x: ClassSymbol => x }
+
+  /**
+   * Retrieves the Symbols representing methods in the compiler.
+   *
+   * @return The list of matching MethodSymbol instances
+   */
+  @DeveloperApi
+  def methodSymbols = allDefSymbols collect { case x: MethodSymbol => x }
+
+  /** the previous requests this interpreter has processed */
+  private var executingRequest: Request = _
+  private val prevRequests       = mutable.ListBuffer[Request]()
+  private val referencedNameMap  = mutable.Map[Name, Request]()
+  private val definedNameMap     = mutable.Map[Name, Request]()
+  private val directlyBoundNames = mutable.Set[Name]()
+
+  private def allHandlers    = prevRequestList flatMap (_.handlers)
+  private def allDefHandlers = allHandlers collect { case x: MemberDefHandler => x }
+  private def allDefSymbols  = allDefHandlers map (_.symbol) filter (_ ne NoSymbol)
+
+  private def lastRequest         = if (prevRequests.isEmpty) null else prevRequests.last
+  // NOTE: Exposed to repl package since used by SparkImports
+  private[repl] def prevRequestList     = prevRequests.toList
+  private def allSeenTypes        = prevRequestList flatMap (_.typeOf.values.toList) distinct
+  private def allImplicits        = allHandlers filter (_.definesImplicit) flatMap (_.definedNames)
+  // NOTE: Exposed to repl package since used by SparkILoop and SparkImports
+  private[repl] def importHandlers      = allHandlers collect { case x: ImportHandler => x }
+
+  /**
+   * Retrieves a list of unique defined and imported names in the compiler.
+   *
+   * @return The list of "term" names
+   */
+  def visibleTermNames: List[Name] = definedTerms ++ importedTerms distinct
+
+  /** Another entry point for tab-completion, ids in scope */
+  // NOTE: Exposed to repl package since used by SparkJLineCompletion
+  private[repl] def unqualifiedIds = visibleTermNames map (_.toString) filterNot (_ contains "$") sorted
+
+  /** Parse the ScalaSig to find type aliases */
+  private def aliasForType(path: String) = ByteCode.aliasForType(path)
+
+  private def withoutUnwrapping(op: => Unit): Unit = {
+    val saved = isettings.unwrapStrings
+    isettings.unwrapStrings = false
+    try op
+    finally isettings.unwrapStrings = saved
+  }
+
+  // NOTE: Exposed to repl package since used by SparkILoop
+  private[repl] def symbolDefString(sym: Symbol) = {
+    TypeStrings.quieter(
+      afterTyper(sym.defString),
+      sym.owner.name + ".this.",
+      sym.owner.fullName + "."
+    )
+  }
+
+  private def showCodeIfDebugging(code: String) {
+    /** Secret bookcase entrance for repl debuggers: end the line
+     *  with "// show" and see what's going on.
+     */
+    def isShow    = code.lines exists (_.trim endsWith "// show")
+    def isShowRaw = code.lines exists (_.trim endsWith "// raw")
+
+    // old style
+    beSilentDuring(parse(code)) foreach { ts =>
+      ts foreach { t =>
+        if (isShow || isShowRaw)
+          withoutUnwrapping(echo(asCompactString(t)))
+        else
+          withoutUnwrapping(logDebug(asCompactString(t)))
+      }
+    }
+  }
+
+  // debugging
+  // NOTE: Exposed to repl package since accessed indirectly from SparkIMain
+  //       and SparkJLineCompletion
+  private[repl] def debugging[T](msg: String)(res: T) = {
+    logDebug(msg + " " + res)
+    res
+  }
+}
+
+/** Utility methods for the Interpreter. */
+object SparkIMain {
+  // The two name forms this is catching are the two sides of this assignment:
+  //
+  // $line3.$read.$iw.$iw.Bippy =
+  //   $line3.$read$$iw$$iw$Bippy@4a6a00ca
+  private def removeLineWrapper(s: String) = s.replaceAll("""\$line\d+[./]\$(read|eval|print)[$.]""", "")
+  private def removeIWPackages(s: String)  = s.replaceAll("""\$(iw|iwC|read|eval|print)[$.]""", "")
+  private def removeSparkVals(s: String)   = s.replaceAll("""\$VAL[0-9]+[$.]""", "")
+
+  def stripString(s: String)               = removeSparkVals(removeIWPackages(removeLineWrapper(s)))
+
+  trait CodeAssembler[T] {
+    def preamble: String
+    def generate: T => String
+    def postamble: String
+
+    def apply(contributors: List[T]): String = stringFromWriter { code =>
+      code println preamble
+      contributors map generate foreach (code println _)
+      code println postamble
+    }
+  }
+
+  trait StrippingWriter {
+    def isStripping: Boolean
+    def stripImpl(str: String): String
+    def strip(str: String): String = if (isStripping) stripImpl(str) else str
+  }
+  trait TruncatingWriter {
+    def maxStringLength: Int
+    def isTruncating: Boolean
+    def truncate(str: String): String = {
+      if (isTruncating && (maxStringLength != 0 && str.length > maxStringLength))
+        (str take maxStringLength - 3) + "..."
+      else str
+    }
+  }
+  abstract class StrippingTruncatingWriter(out: JPrintWriter)
+          extends JPrintWriter(out)
+             with StrippingWriter
+             with TruncatingWriter {
+    self =>
+
+    def clean(str: String): String = truncate(strip(str))
+    override def write(str: String) = super.write(clean(str))
+  }
+  class ReplStrippingWriter(intp: SparkIMain) extends StrippingTruncatingWriter(intp.out) {
+    import intp._
+    def maxStringLength    = isettings.maxPrintString
+    def isStripping        = isettings.unwrapStrings
+    def isTruncating       = reporter.truncationOK
+
+    def stripImpl(str: String): String = naming.unmangle(str)
+  }
+
+  class ReplReporter(intp: SparkIMain) extends ConsoleReporter(intp.settings, null, new ReplStrippingWriter(intp)) {
+    override def printMessage(msg: String) {
+      // Avoiding deadlock when the compiler starts logging before
+      // the lazy val is done.
+      if (intp.isInitializeComplete) {
+        if (intp.totalSilence) ()
+        else super.printMessage(msg)
+      }
+      else Console.println(msg)
+    }
+  }
+}
+
+class SparkISettings(intp: SparkIMain) extends Logging {
+  /** A list of paths where :load should look */
+  var loadPath = List(".")
+
+  /** Set this to true to see repl machinery under -Yrich-exceptions.
+   */
+  var showInternalStackTraces = false
+
+  /** The maximum length of toString to use when printing the result
+   *  of an evaluation.  0 means no maximum.  If a printout requires
+   *  more than this number of characters, then the printout is
+   *  truncated.
+   */
+  var maxPrintString = 800
+
+  /** The maximum number of completion candidates to print for tab
+   *  completion without requiring confirmation.
+   */
+  var maxAutoprintCompletion = 250
+
+  /** String unwrapping can be disabled if it is causing issues.
+   *  Settings this to false means you will see Strings like "$iw.$iw.".
+   */
+  var unwrapStrings = true
+
+  def deprecation_=(x: Boolean) = {
+    val old = intp.settings.deprecation.value
+    intp.settings.deprecation.value = x
+    if (!old && x) logDebug("Enabled -deprecation output.")
+    else if (old && !x) logDebug("Disabled -deprecation output.")
+  }
+
+  def deprecation: Boolean = intp.settings.deprecation.value
+
+  def allSettings = Map(
+    "maxPrintString" -> maxPrintString,
+    "maxAutoprintCompletion" -> maxAutoprintCompletion,
+    "unwrapStrings" -> unwrapStrings,
+    "deprecation" -> deprecation
+  )
+
+  private def allSettingsString =
+    allSettings.toList sortBy (_._1) map { case (k, v) => "  " + k + " = " + v + "\n" } mkString
+
+  override def toString = """
+    | SparkISettings {
+    | %s
+    | }""".stripMargin.format(allSettingsString)
+}
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkImports.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkImports.scala
new file mode 100644
index 0000000000000..1d0fe10d3d817
--- /dev/null
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkImports.scala
@@ -0,0 +1,238 @@
+// scalastyle:off
+
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author  Paul Phillips
+ */
+
+package org.apache.spark.repl
+
+import scala.tools.nsc._
+import scala.tools.nsc.interpreter._
+
+import scala.collection.{ mutable, immutable }
+
+private[repl] trait SparkImports {
+  self: SparkIMain =>
+
+  import global._
+  import definitions.{ ScalaPackage, JavaLangPackage, PredefModule }
+  import memberHandlers._
+
+  def isNoImports = settings.noimports.value
+  def isNoPredef  = settings.nopredef.value
+
+  /** Synthetic import handlers for the language defined imports. */
+  private def makeWildcardImportHandler(sym: Symbol): ImportHandler = {
+    val hd :: tl = sym.fullName.split('.').toList map newTermName
+    val tree = Import(
+      tl.foldLeft(Ident(hd): Tree)((x, y) => Select(x, y)),
+      ImportSelector.wildList
+    )
+    tree setSymbol sym
+    new ImportHandler(tree)
+  }
+
+  /** Symbols whose contents are language-defined to be imported. */
+  def languageWildcardSyms: List[Symbol] = List(JavaLangPackage, ScalaPackage, PredefModule)
+  def languageWildcards: List[Type] = languageWildcardSyms map (_.tpe)
+  def languageWildcardHandlers = languageWildcardSyms map makeWildcardImportHandler
+
+  def allImportedNames = importHandlers flatMap (_.importedNames)
+  def importedTerms    = onlyTerms(allImportedNames)
+  def importedTypes    = onlyTypes(allImportedNames)
+
+  /** Types which have been wildcard imported, such as:
+   *    val x = "abc" ; import x._  // type java.lang.String
+   *    import java.lang.String._   // object java.lang.String
+   *
+   *  Used by tab completion.
+   *
+   *  XXX right now this gets import x._ and import java.lang.String._,
+   *  but doesn't figure out import String._.  There's a lot of ad hoc
+   *  scope twiddling which should be swept away in favor of digging
+   *  into the compiler scopes.
+   */
+  def sessionWildcards: List[Type] = {
+    importHandlers filter (_.importsWildcard) map (_.targetType) distinct
+  }
+  def wildcardTypes = languageWildcards ++ sessionWildcards
+
+  def languageSymbols        = languageWildcardSyms flatMap membersAtPickler
+  def sessionImportedSymbols = importHandlers flatMap (_.importedSymbols)
+  def importedSymbols        = languageSymbols ++ sessionImportedSymbols
+  def importedTermSymbols    = importedSymbols collect { case x: TermSymbol => x }
+  def importedTypeSymbols    = importedSymbols collect { case x: TypeSymbol => x }
+  def implicitSymbols        = importedSymbols filter (_.isImplicit)
+
+  def importedTermNamed(name: String): Symbol =
+    importedTermSymbols find (_.name.toString == name) getOrElse NoSymbol
+
+  /** Tuples of (source, imported symbols) in the order they were imported.
+   */
+  def importedSymbolsBySource: List[(Symbol, List[Symbol])] = {
+    val lang    = languageWildcardSyms map (sym => (sym, membersAtPickler(sym)))
+    val session = importHandlers filter (_.targetType != NoType) map { mh =>
+      (mh.targetType.typeSymbol, mh.importedSymbols)
+    }
+
+    lang ++ session
+  }
+  def implicitSymbolsBySource: List[(Symbol, List[Symbol])] = {
+    importedSymbolsBySource map {
+      case (k, vs) => (k, vs filter (_.isImplicit))
+    } filterNot (_._2.isEmpty)
+  }
+
+  /** Compute imports that allow definitions from previous
+   *  requests to be visible in a new request.  Returns
+   *  three pieces of related code:
+   *
+   *  1. An initial code fragment that should go before
+   *  the code of the new request.
+   *
+   *  2. A code fragment that should go after the code
+   *  of the new request.
+   *
+   *  3. An access path which can be traversed to access
+   *  any bindings inside code wrapped by #1 and #2 .
+   *
+   * The argument is a set of Names that need to be imported.
+   *
+   * Limitations: This method is not as precise as it could be.
+   * (1) It does not process wildcard imports to see what exactly
+   * they import.
+   * (2) If it imports any names from a request, it imports all
+   * of them, which is not really necessary.
+   * (3) It imports multiple same-named implicits, but only the
+   * last one imported is actually usable.
+   */
+  case class SparkComputedImports(prepend: String, append: String, access: String)
+  def fallback = System.getProperty("spark.repl.fallback", "false").toBoolean
+
+  protected def importsCode(wanted: Set[Name], definedClass: Boolean): SparkComputedImports = {
+    /** Narrow down the list of requests from which imports
+     *  should be taken.  Removes requests which cannot contribute
+     *  useful imports for the specified set of wanted names.
+     */
+    case class ReqAndHandler(req: Request, handler: MemberHandler) { }
+
+    def reqsToUse: List[ReqAndHandler] = {
+      /** Loop through a list of MemberHandlers and select which ones to keep.
+        * 'wanted' is the set of names that need to be imported.
+       */
+      def select(reqs: List[ReqAndHandler], wanted: Set[Name]): List[ReqAndHandler] = {
+        // Single symbol imports might be implicits! See bug #1752.  Rather than
+        // try to finesse this, we will mimic all imports for now.
+        def keepHandler(handler: MemberHandler) = handler match {
+       /* This case clause tries to "precisely" import only what is required. And in this
+        * it may miss out on some implicits, because implicits are not known in `wanted`. Thus 
+        * it is suitable for defining classes. AFAIK while defining classes implicits are not
+        * needed.*/
+          case h: ImportHandler if definedClass && !fallback => 
+            h.importedNames.exists(x => wanted.contains(x))
+          case _: ImportHandler  => true
+          case x                 => x.definesImplicit || (x.definedNames exists wanted)
+        }
+
+        reqs match {
+          case Nil                                    => Nil
+          case rh :: rest if !keepHandler(rh.handler) => select(rest, wanted)
+          case rh :: rest                             =>
+            import rh.handler._
+            val newWanted = wanted ++ referencedNames -- definedNames -- importedNames
+            rh :: select(rest, newWanted)
+        }
+      }
+
+      /** Flatten the handlers out and pair each with the original request */
+      select(allReqAndHandlers reverseMap { case (r, h) => ReqAndHandler(r, h) }, wanted).reverse
+    }
+
+    val code, trailingBraces, accessPath = new StringBuilder
+    val currentImps = mutable.HashSet[Name]()
+
+    // add code for a new object to hold some imports
+    def addWrapper() {
+      val impname = nme.INTERPRETER_IMPORT_WRAPPER
+      code append "class %sC extends Serializable {\n".format(impname)
+      trailingBraces append "}\nval " + impname + " = new " + impname + "C;\n"
+      accessPath append ("." + impname)
+
+      currentImps.clear
+      // code append "object %s {\n".format(impname)
+      // trailingBraces append "}\n"
+      // accessPath append ("." + impname)
+
+      // currentImps.clear
+    }
+
+    addWrapper()
+
+    // loop through previous requests, adding imports for each one
+    for (ReqAndHandler(req, handler) <- reqsToUse) {
+      handler match {
+        // If the user entered an import, then just use it; add an import wrapping
+        // level if the import might conflict with some other import
+        case x: ImportHandler =>
+          if (x.importsWildcard || currentImps.exists(x.importedNames contains _))
+            addWrapper()
+
+          code append (x.member + "\n")
+
+          // give wildcard imports a import wrapper all to their own
+          if (x.importsWildcard) addWrapper()
+          else currentImps ++= x.importedNames
+
+        // For other requests, import each defined name.
+        // import them explicitly instead of with _, so that
+        // ambiguity errors will not be generated. Also, quote
+        // the name of the variable, so that we don't need to
+        // handle quoting keywords separately.
+        case x: ClassHandler if !fallback =>
+        // I am trying to guess if the import is a defined class
+        // This is an ugly hack, I am not 100% sure of the consequences.
+        // Here we, let everything but "defined classes" use the import with val.
+        // The reason for this is, otherwise the remote executor tries to pull the
+        // classes involved and may fail.
+          for (imv <- x.definedNames) {
+            val objName = req.lineRep.readPath
+            code.append("import " + objName + ".INSTANCE" + req.accessPath + ".`" + imv + "`\n")
+          }
+
+        case x =>
+          for (imv <- x.definedNames) {
+            if (currentImps contains imv) addWrapper()
+            val objName = req.lineRep.readPath
+            val valName = "$VAL" + newValId()
+
+            if(!code.toString.endsWith(".`" + imv + "`;\n")) { // Which means already imported
+                code.append("val " + valName + " = " + objName + ".INSTANCE;\n")
+                code.append("import " + valName + req.accessPath + ".`" + imv + "`;\n")
+            }
+            // code.append("val " + valName + " = " + objName + ".INSTANCE;\n")
+            // code.append("import " + valName + req.accessPath + ".`" + imv + "`;\n")
+            // code append ("import " + (req fullPath imv) + "\n")
+            currentImps += imv
+          }
+      }
+    }
+    // add one extra wrapper, to prevent warnings in the common case of
+    // redefining the value bound in the last interpreter request.
+    addWrapper()
+    SparkComputedImports(code.toString, trailingBraces.toString, accessPath.toString)
+  }
+
+  private def allReqAndHandlers =
+    prevRequestList flatMap (req => req.handlers map (req -> _))
+
+  private def membersAtPickler(sym: Symbol): List[Symbol] =
+    beforePickler(sym.info.nonPrivateMembers.toList)
+
+  private var curValId = 0
+
+  private def newValId(): Int = {
+    curValId += 1
+    curValId
+  }
+}
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
new file mode 100644
index 0000000000000..f24d6da72437e
--- /dev/null
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
@@ -0,0 +1,403 @@
+// scalastyle:off
+
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author Paul Phillips
+ */
+
+package org.apache.spark.repl
+
+import org.apache.spark.annotation.DeveloperApi
+
+import scala.tools.nsc._
+import scala.tools.nsc.interpreter._
+
+import scala.tools.jline._
+import scala.tools.jline.console.completer._
+import Completion._
+import scala.collection.mutable.ListBuffer
+import org.apache.spark.Logging
+
+/**
+ * Represents an auto-completion tool for the supplied interpreter that
+ * utilizes supplied queries for valid completions based on the current
+ * contents of the internal buffer.
+ *
+ * @param intp The interpreter to use for information retrieval to do with
+ *             auto completion
+ */
+@DeveloperApi
+class SparkJLineCompletion(val intp: SparkIMain) extends Completion with CompletionOutput with Logging {
+  // NOTE: Exposed in package as used in quite a few classes
+  // NOTE: Must be public to override the global found in CompletionOutput
+  val global: intp.global.type = intp.global
+
+  import global._
+  import definitions.{ PredefModule, AnyClass, AnyRefClass, ScalaPackage, JavaLangPackage }
+  import rootMirror.{ RootClass, getModuleIfDefined }
+  type ExecResult = Any
+  import intp.{ debugging }
+
+  /**
+   * Represents the level of verbosity. Increments with consecutive tabs.
+   */
+  @DeveloperApi
+  var verbosity: Int = 0
+
+  /**
+   * Resets the level of verbosity to zero.
+   */
+  @DeveloperApi
+  def resetVerbosity() = verbosity = 0
+
+  private def getSymbol(name: String, isModule: Boolean) = (
+    if (isModule) getModuleIfDefined(name)
+    else getModuleIfDefined(name)
+  )
+  private def getType(name: String, isModule: Boolean) = getSymbol(name, isModule).tpe
+  private def typeOf(name: String)                     = getType(name, false)
+  private def moduleOf(name: String)                   = getType(name, true)
+
+  trait CompilerCompletion {
+    def tp: Type
+    def effectiveTp = tp match {
+      case MethodType(Nil, resType)   => resType
+      case NullaryMethodType(resType) => resType
+      case _                          => tp
+    }
+
+    // for some reason any's members don't show up in subclasses, which
+    // we need so 5.<tab> offers asInstanceOf etc.
+    private def anyMembers = AnyClass.tpe.nonPrivateMembers
+    def anyRefMethodsToShow = Set("isInstanceOf", "asInstanceOf", "toString")
+
+    def tos(sym: Symbol): String = sym.decodedName
+    def memberNamed(s: String) = afterTyper(effectiveTp member newTermName(s))
+    def hasMethod(s: String) = memberNamed(s).isMethod
+
+    // XXX we'd like to say "filterNot (_.isDeprecated)" but this causes the
+    // compiler to crash for reasons not yet known.
+    def members     = afterTyper((effectiveTp.nonPrivateMembers.toList ++ anyMembers) filter (_.isPublic))
+    def methods     = members.toList filter (_.isMethod)
+    def packages    = members.toList filter (_.isPackage)
+    def aliases     = members.toList filter (_.isAliasType)
+
+    def memberNames   = members map tos
+    def methodNames   = methods map tos
+    def packageNames  = packages map tos
+    def aliasNames    = aliases map tos
+  }
+
+  object NoTypeCompletion extends TypeMemberCompletion(NoType) {
+    override def memberNamed(s: String) = NoSymbol
+    override def members = Nil
+    override def follow(s: String) = None
+    override def alternativesFor(id: String) = Nil
+  }
+
+  object TypeMemberCompletion {
+    def apply(tp: Type, runtimeType: Type, param: NamedParam): TypeMemberCompletion = {
+      new TypeMemberCompletion(tp) {
+        var upgraded = false
+        lazy val upgrade = {
+          intp rebind param
+          intp.reporter.printMessage("\nRebinding stable value %s from %s to %s".format(param.name, tp, param.tpe))
+          upgraded = true
+          new TypeMemberCompletion(runtimeType)
+        }
+        override def completions(verbosity: Int) = {
+          super.completions(verbosity) ++ (
+            if (verbosity == 0) Nil
+            else upgrade.completions(verbosity)
+          )
+        }
+        override def follow(s: String) = super.follow(s) orElse {
+          if (upgraded) upgrade.follow(s)
+          else None
+        }
+        override def alternativesFor(id: String) = super.alternativesFor(id) ++ (
+          if (upgraded) upgrade.alternativesFor(id)
+          else Nil
+        ) distinct
+      }
+    }
+    def apply(tp: Type): TypeMemberCompletion = {
+      if (tp eq NoType) NoTypeCompletion
+      else if (tp.typeSymbol.isPackageClass) new PackageCompletion(tp)
+      else new TypeMemberCompletion(tp)
+    }
+    def imported(tp: Type) = new ImportCompletion(tp)
+  }
+
+  class TypeMemberCompletion(val tp: Type) extends CompletionAware
+                                              with CompilerCompletion {
+    def excludeEndsWith: List[String] = Nil
+    def excludeStartsWith: List[String] = List("<") // <byname>, <repeated>, etc.
+    def excludeNames: List[String] = (anyref.methodNames filterNot anyRefMethodsToShow) :+ "_root_"
+
+    def methodSignatureString(sym: Symbol) = {
+      IMain stripString afterTyper(new MethodSymbolOutput(sym).methodString())
+    }
+
+    def exclude(name: String): Boolean = (
+      (name contains "$") ||
+      (excludeNames contains name) ||
+      (excludeEndsWith exists (name endsWith _)) ||
+      (excludeStartsWith exists (name startsWith _))
+    )
+    def filtered(xs: List[String]) = xs filterNot exclude distinct
+
+    def completions(verbosity: Int) =
+      debugging(tp + " completions ==> ")(filtered(memberNames))
+
+    override def follow(s: String): Option[CompletionAware] =
+      debugging(tp + " -> '" + s + "' ==> ")(Some(TypeMemberCompletion(memberNamed(s).tpe)) filterNot (_ eq NoTypeCompletion))
+
+    override def alternativesFor(id: String): List[String] =
+      debugging(id + " alternatives ==> ") {
+        val alts = members filter (x => x.isMethod && tos(x) == id) map methodSignatureString
+
+        if (alts.nonEmpty) "" :: alts else Nil
+      }
+
+    override def toString = "%s (%d members)".format(tp, members.size)
+  }
+
+  class PackageCompletion(tp: Type) extends TypeMemberCompletion(tp) {
+    override def excludeNames = anyref.methodNames
+  }
+
+  class LiteralCompletion(lit: Literal) extends TypeMemberCompletion(lit.value.tpe) {
+    override def completions(verbosity: Int) = verbosity match {
+      case 0    => filtered(memberNames)
+      case _    => memberNames
+    }
+  }
+
+  class ImportCompletion(tp: Type) extends TypeMemberCompletion(tp) {
+    override def completions(verbosity: Int) = verbosity match {
+      case 0    => filtered(members filterNot (_.isSetter) map tos)
+      case _    => super.completions(verbosity)
+    }
+  }
+
+  // not for completion but for excluding
+  object anyref extends TypeMemberCompletion(AnyRefClass.tpe) { }
+
+  // the unqualified vals/defs/etc visible in the repl
+  object ids extends CompletionAware {
+    override def completions(verbosity: Int) = intp.unqualifiedIds ++ List("classOf") //, "_root_")
+    // now we use the compiler for everything.
+    override def follow(id: String): Option[CompletionAware] = {
+      if (!completions(0).contains(id))
+        return None
+
+      val tpe = intp typeOfExpression id
+      if (tpe == NoType)
+        return None
+
+      def default = Some(TypeMemberCompletion(tpe))
+
+      // only rebinding vals in power mode for now.
+      if (!isReplPower) default
+      else intp runtimeClassAndTypeOfTerm id match {
+        case Some((clazz, runtimeType)) =>
+          val sym = intp.symbolOfTerm(id)
+          if (sym.isStable) {
+            val param = new NamedParam.Untyped(id, intp valueOfTerm id getOrElse null)
+            Some(TypeMemberCompletion(tpe, runtimeType, param))
+          }
+          else default
+        case _        =>
+          default
+      }
+    }
+    override def toString = "<repl ids> (%s)".format(completions(0).size)
+  }
+
+  // user-issued wildcard imports like "import global._" or "import String._"
+  private def imported = intp.sessionWildcards map TypeMemberCompletion.imported
+
+  // literal Ints, Strings, etc.
+  object literals extends CompletionAware {
+    def simpleParse(code: String): Tree = newUnitParser(code).templateStats().last
+    def completions(verbosity: Int) = Nil
+
+    override def follow(id: String) = simpleParse(id) match {
+      case x: Literal   => Some(new LiteralCompletion(x))
+      case _            => None
+    }
+  }
+
+  // top level packages
+  object rootClass extends TypeMemberCompletion(RootClass.tpe) {
+    override def completions(verbosity: Int) = super.completions(verbosity) :+ "_root_"
+    override def follow(id: String) = id match {
+      case "_root_" => Some(this)
+      case _        => super.follow(id)
+    }
+  }
+  // members of Predef
+  object predef extends TypeMemberCompletion(PredefModule.tpe) {
+    override def excludeEndsWith    = super.excludeEndsWith ++ List("Wrapper", "ArrayOps")
+    override def excludeStartsWith  = super.excludeStartsWith ++ List("wrap")
+    override def excludeNames       = anyref.methodNames
+
+    override def exclude(name: String) = super.exclude(name) || (
+      (name contains "2")
+    )
+
+    override def completions(verbosity: Int) = verbosity match {
+      case 0    => Nil
+      case _    => super.completions(verbosity)
+    }
+  }
+  // members of scala.*
+  object scalalang extends PackageCompletion(ScalaPackage.tpe) {
+    def arityClasses = List("Product", "Tuple", "Function")
+    def skipArity(name: String) = arityClasses exists (x => name != x && (name startsWith x))
+    override def exclude(name: String) = super.exclude(name) || (
+      skipArity(name)
+    )
+
+    override def completions(verbosity: Int) = verbosity match {
+      case 0    => filtered(packageNames ++ aliasNames)
+      case _    => super.completions(verbosity)
+    }
+  }
+  // members of java.lang.*
+  object javalang extends PackageCompletion(JavaLangPackage.tpe) {
+    override lazy val excludeEndsWith   = super.excludeEndsWith ++ List("Exception", "Error")
+    override lazy val excludeStartsWith = super.excludeStartsWith ++ List("CharacterData")
+
+    override def completions(verbosity: Int) = verbosity match {
+      case 0    => filtered(packageNames)
+      case _    => super.completions(verbosity)
+    }
+  }
+
+  // the list of completion aware objects which should be consulted
+  // for top level unqualified, it's too noisy to let much in.
+  private lazy val topLevelBase: List[CompletionAware] = List(ids, rootClass, predef, scalalang, javalang, literals)
+  private def topLevel = topLevelBase ++ imported
+  private def topLevelThreshold = 50
+
+  // the first tier of top level objects (doesn't include file completion)
+  private def topLevelFor(parsed: Parsed): List[String] = {
+    val buf = new ListBuffer[String]
+    topLevel foreach { ca =>
+      buf ++= (ca completionsFor parsed)
+
+      if (buf.size > topLevelThreshold)
+        return buf.toList.sorted
+    }
+    buf.toList
+  }
+
+  // the most recent result
+  private def lastResult = Forwarder(() => ids follow intp.mostRecentVar)
+
+  private def lastResultFor(parsed: Parsed) = {
+    /** The logic is a little tortured right now because normally '.' is
+     *  ignored as a delimiter, but on .<tab> it needs to be propagated.
+     */
+    val xs = lastResult completionsFor parsed
+    if (parsed.isEmpty) xs map ("." + _) else xs
+  }
+
+  // generic interface for querying (e.g. interpreter loop, testing)
+  private def completions(buf: String): List[String] =
+    topLevelFor(Parsed.dotted(buf + ".", buf.length + 1))
+
+  /**
+   * Constructs a new ScalaCompleter for auto completion.
+   *
+   * @return The new JLineTabCompletion instance
+   */
+  @DeveloperApi
+  def completer(): ScalaCompleter = new JLineTabCompletion
+
+  /** This gets a little bit hairy.  It's no small feat delegating everything
+   *  and also keeping track of exactly where the cursor is and where it's supposed
+   *  to end up.  The alternatives mechanism is a little hacky: if there is an empty
+   *  string in the list of completions, that means we are expanding a unique
+   *  completion, so don't update the "last" buffer because it'll be wrong.
+   */
+  class JLineTabCompletion extends ScalaCompleter {
+    // For recording the buffer on the last tab hit
+    private var lastBuf: String = ""
+    private var lastCursor: Int = -1
+
+    // Does this represent two consecutive tabs?
+    def isConsecutiveTabs(buf: String, cursor: Int) =
+      cursor == lastCursor && buf == lastBuf
+
+    // Longest common prefix
+    def commonPrefix(xs: List[String]): String = {
+      if (xs.isEmpty || xs.contains("")) ""
+      else xs.head.head match {
+        case ch =>
+          if (xs.tail forall (_.head == ch)) "" + ch + commonPrefix(xs map (_.tail))
+          else ""
+      }
+    }
+
+    // This is jline's entry point for completion.
+    override def complete(buf: String, cursor: Int): Candidates = {
+      verbosity = if (isConsecutiveTabs(buf, cursor)) verbosity + 1 else 0
+      logDebug("\ncomplete(%s, %d) last = (%s, %d), verbosity: %s".format(buf, cursor, lastBuf, lastCursor, verbosity))
+
+      // we don't try lower priority completions unless higher ones return no results.
+      def tryCompletion(p: Parsed, completionFunction: Parsed => List[String]): Option[Candidates] = {
+        val winners = completionFunction(p)
+        if (winners.isEmpty)
+          return None
+        val newCursor =
+          if (winners contains "") p.cursor
+          else {
+            val advance = commonPrefix(winners)
+            lastCursor = p.position + advance.length
+            lastBuf = (buf take p.position) + advance
+            logDebug("tryCompletion(%s, _) lastBuf = %s, lastCursor = %s, p.position = %s".format(
+              p, lastBuf, lastCursor, p.position))
+            p.position
+          }
+
+        Some(Candidates(newCursor, winners))
+      }
+
+      def mkDotted      = Parsed.dotted(buf, cursor) withVerbosity verbosity
+      def mkUndelimited = Parsed.undelimited(buf, cursor) withVerbosity verbosity
+
+      // a single dot is special cased to completion on the previous result
+      def lastResultCompletion =
+        if (!looksLikeInvocation(buf)) None
+        else tryCompletion(Parsed.dotted(buf drop 1, cursor), lastResultFor)
+
+      def tryAll = (
+                  lastResultCompletion
+           orElse tryCompletion(mkDotted, topLevelFor)
+        getOrElse Candidates(cursor, Nil)
+      )
+
+      /**
+       *  This is the kickoff point for all manner of theoretically
+       *  possible compiler unhappiness. The fault may be here or
+       *  elsewhere, but we don't want to crash the repl regardless.
+       *  The compiler makes it impossible to avoid catching Throwable
+       *  with its unfortunate tendency to throw java.lang.Errors and
+       *  AssertionErrors as the hats drop. We take two swings at it
+       *  because there are some spots which like to throw an assertion
+       *  once, then work after that. Yeah, what can I say.
+       */
+      try tryAll
+      catch { case ex: Throwable =>
+        logWarning("Error: complete(%s, %s) provoked".format(buf, cursor) + ex)
+        Candidates(cursor,
+          if (isReplDebug) List("<error:" + ex + ">")
+          else Nil
+        )
+      }
+    }
+  }
+}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala
similarity index 94%
rename from repl/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala
rename to repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala
index 0db26c3407dff..016e0f039f4f1 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala
@@ -22,7 +22,7 @@ import io.Streamable.slurp
 /**
  *  Reads from the console using JLine.
  */
-class SparkJLineReader(_completion: => Completion) extends InteractiveReader {
+private[repl] class SparkJLineReader(_completion: => Completion) extends InteractiveReader {
   val interactive = true
   val consoleReader = new JLineConsoleReader()
 
@@ -82,7 +82,7 @@ class SparkJLineReader(_completion: => Completion) extends InteractiveReader {
 }
 
 /** Changes the default history file to not collide with the scala repl's. */
-class SparkJLineHistory extends JLineFileHistory {
+private[repl] class SparkJLineHistory extends JLineFileHistory {
   import Properties.userHome
 
   def defaultFileName = ".spark_history"
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
new file mode 100644
index 0000000000000..4de9714247c1f
--- /dev/null
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
@@ -0,0 +1,232 @@
+// scalastyle:off
+
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author  Martin Odersky
+ */
+
+package org.apache.spark.repl
+
+import scala.tools.nsc._
+import scala.tools.nsc.interpreter._
+
+import scala.collection.{ mutable, immutable }
+import scala.PartialFunction.cond
+import scala.reflect.internal.Chars
+import scala.reflect.internal.Flags._
+import scala.language.implicitConversions
+
+private[repl] trait SparkMemberHandlers {
+  val intp: SparkIMain
+
+  import intp.{ Request, global, naming }
+  import global._
+  import naming._
+
+  private def codegenln(leadingPlus: Boolean, xs: String*): String = codegen(leadingPlus, (xs ++ Array("\n")): _*)
+  private def codegenln(xs: String*): String = codegenln(true, xs: _*)
+
+  private def codegen(xs: String*): String = codegen(true, xs: _*)
+  private def codegen(leadingPlus: Boolean, xs: String*): String = {
+    val front = if (leadingPlus) "+ " else ""
+    front + (xs map string2codeQuoted mkString " + ")
+  }
+  private implicit def name2string(name: Name) = name.toString
+
+  /** A traverser that finds all mentioned identifiers, i.e. things
+   *  that need to be imported.  It might return extra names.
+   */
+  private class ImportVarsTraverser extends Traverser {
+    val importVars = new mutable.HashSet[Name]()
+
+    override def traverse(ast: Tree) = ast match {
+      case Ident(name) =>
+        // XXX this is obviously inadequate but it's going to require some effort
+        // to get right.
+        if (name.toString startsWith "x$") ()
+        else importVars += name
+      case _        => super.traverse(ast)
+    }
+  }
+  private object ImportVarsTraverser {
+    def apply(member: Tree) = {
+      val ivt = new ImportVarsTraverser()
+      ivt traverse member
+      ivt.importVars.toList
+    }
+  }
+
+  def chooseHandler(member: Tree): MemberHandler = member match {
+    case member: DefDef        => new DefHandler(member)
+    case member: ValDef        => new ValHandler(member)
+    case member: Assign        => new AssignHandler(member)
+    case member: ModuleDef     => new ModuleHandler(member)
+    case member: ClassDef      => new ClassHandler(member)
+    case member: TypeDef       => new TypeAliasHandler(member)
+    case member: Import        => new ImportHandler(member)
+    case DocDef(_, documented) => chooseHandler(documented)
+    case member                => new GenericHandler(member)
+  }
+
+  sealed abstract class MemberDefHandler(override val member: MemberDef) extends MemberHandler(member) {
+    def symbol          = if (member.symbol eq null) NoSymbol else member.symbol
+    def name: Name      = member.name
+    def mods: Modifiers = member.mods
+    def keyword         = member.keyword
+    def prettyName      = name.decode
+
+    override def definesImplicit = member.mods.isImplicit
+    override def definesTerm: Option[TermName] = Some(name.toTermName) filter (_ => name.isTermName)
+    override def definesType: Option[TypeName] = Some(name.toTypeName) filter (_ => name.isTypeName)
+    override def definedSymbols = if (symbol eq NoSymbol) Nil else List(symbol)
+  }
+
+  /** Class to handle one member among all the members included
+   *  in a single interpreter request.
+   */
+  sealed abstract class MemberHandler(val member: Tree) {
+    def definesImplicit = false
+    def definesValue    = false
+    def isLegalTopLevel = false
+
+    def definesTerm     = Option.empty[TermName]
+    def definesType     = Option.empty[TypeName]
+
+    lazy val referencedNames = ImportVarsTraverser(member)
+    def importedNames        = List[Name]()
+    def definedNames         = definesTerm.toList ++ definesType.toList
+    def definedOrImported    = definedNames ++ importedNames
+    def definedSymbols       = List[Symbol]()
+
+    def extraCodeToEvaluate(req: Request): String = ""
+    def resultExtractionCode(req: Request): String = ""
+
+    private def shortName = this.getClass.toString split '.' last
+    override def toString = shortName + referencedNames.mkString(" (refs: ", ", ", ")")
+  }
+
+  class GenericHandler(member: Tree) extends MemberHandler(member)
+
+  class ValHandler(member: ValDef) extends MemberDefHandler(member) {
+    val maxStringElements = 1000  // no need to mkString billions of elements
+    override def definesValue = true
+
+    override def resultExtractionCode(req: Request): String = {
+      val isInternal = isUserVarName(name) && req.lookupTypeOf(name) == "Unit"
+      if (!mods.isPublic || isInternal) ""
+      else {
+        // if this is a lazy val we avoid evaluating it here
+        val resultString =
+          if (mods.isLazy) codegenln(false, "<lazy>")
+          else any2stringOf(req fullPath name, maxStringElements)
+
+        val vidString =
+          if (replProps.vids) """" + " @ " + "%%8x".format(System.identityHashCode(%s)) + " """.trim.format(req fullPath name)
+          else ""
+
+        """ + "%s%s: %s = " + %s""".format(string2code(prettyName), vidString, string2code(req typeOf name), resultString)
+      }
+    }
+  }
+
+  class DefHandler(member: DefDef) extends MemberDefHandler(member) {
+    private def vparamss = member.vparamss
+    private def isMacro = member.symbol hasFlag MACRO
+    // true if not a macro and 0-arity
+    override def definesValue = !isMacro && flattensToEmpty(vparamss)
+    override def resultExtractionCode(req: Request) =
+      if (mods.isPublic) codegenln(name, ": ", req.typeOf(name)) else ""
+  }
+
+  class AssignHandler(member: Assign) extends MemberHandler(member) {
+    val Assign(lhs, rhs) = member
+    val name = newTermName(freshInternalVarName())
+
+    override def definesTerm = Some(name)
+    override def definesValue = true
+    override def extraCodeToEvaluate(req: Request) =
+      """val %s = %s""".format(name, lhs)
+
+    /** Print out lhs instead of the generated varName */
+    override def resultExtractionCode(req: Request) = {
+      val lhsType = string2code(req lookupTypeOf name)
+      val res     = string2code(req fullPath name)
+      """ + "%s: %s = " + %s + "\n" """.format(string2code(lhs.toString), lhsType, res) + "\n"
+    }
+  }
+
+  class ModuleHandler(module: ModuleDef) extends MemberDefHandler(module) {
+    override def definesTerm = Some(name)
+    override def definesValue = true
+    override def isLegalTopLevel = true
+
+    override def resultExtractionCode(req: Request) = codegenln("defined module ", name)
+  }
+
+  class ClassHandler(member: ClassDef) extends MemberDefHandler(member) {
+    override def definesType = Some(name.toTypeName)
+    override def definesTerm = Some(name.toTermName) filter (_ => mods.isCase)
+    override def isLegalTopLevel = true
+
+    override def resultExtractionCode(req: Request) =
+      codegenln("defined %s %s".format(keyword, name))
+  }
+
+  class TypeAliasHandler(member: TypeDef) extends MemberDefHandler(member) {
+    private def isAlias = mods.isPublic && treeInfo.isAliasTypeDef(member)
+    override def definesType = Some(name.toTypeName) filter (_ => isAlias)
+
+    override def resultExtractionCode(req: Request) =
+      codegenln("defined type alias ", name) + "\n"
+  }
+
+  class ImportHandler(imp: Import) extends MemberHandler(imp) {
+    val Import(expr, selectors) = imp
+    def targetType: Type = intp.typeOfExpression("" + expr)
+    override def isLegalTopLevel = true
+
+    def createImportForName(name: Name): String = {
+      selectors foreach {
+        case sel @ ImportSelector(old, _, `name`, _)  => return "import %s.{ %s }".format(expr, sel)
+        case _ => ()
+      }
+      "import %s.%s".format(expr, name)
+    }
+    // TODO: Need to track these specially to honor Predef masking attempts,
+    // because they must be the leading imports in the code generated for each
+    // line.  We can use the same machinery as Contexts now, anyway.
+    def isPredefImport = isReferenceToPredef(expr)
+
+    // wildcard imports, e.g. import foo._
+    private def selectorWild    = selectors filter (_.name == nme.USCOREkw)
+    // renamed imports, e.g. import foo.{ bar => baz }
+    private def selectorRenames = selectors map (_.rename) filterNot (_ == null)
+
+    /** Whether this import includes a wildcard import */
+    val importsWildcard = selectorWild.nonEmpty
+
+    /** Whether anything imported is implicit .*/
+    def importsImplicit = implicitSymbols.nonEmpty
+
+    def implicitSymbols = importedSymbols filter (_.isImplicit)
+    def importedSymbols = individualSymbols ++ wildcardSymbols
+
+    lazy val individualSymbols: List[Symbol] =
+      beforePickler(individualNames map (targetType nonPrivateMember _))
+
+    lazy val wildcardSymbols: List[Symbol] =
+      if (importsWildcard) beforePickler(targetType.nonPrivateMembers.toList)
+      else Nil
+
+    /** Complete list of names imported by a wildcard */
+    lazy val wildcardNames: List[Name]   = wildcardSymbols map (_.name)
+    lazy val individualNames: List[Name] = selectorRenames filterNot (_ == nme.USCOREkw) flatMap (_.bothNames)
+
+    /** The names imported by this statement */
+    override lazy val importedNames: List[Name] = wildcardNames ++ individualNames
+    lazy val importsSymbolNamed: Set[String] = importedNames map (_.toString) toSet
+
+    def importString = imp.toString
+    override def resultExtractionCode(req: Request) = codegenln(importString) + "\n"
+  }
+}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala
similarity index 92%
rename from repl/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala
rename to repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala
index 7fd5fbb42468c..94c801ebec7c1 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala
@@ -23,8 +23,7 @@ import scala.tools.nsc.Settings
  * <i>scala.tools.nsc.Settings</i> implementation adding Spark-specific REPL
  * command line options.
  */
-class SparkRunnerSettings(error: String => Unit) extends Settings(error){
-
+private[repl] class SparkRunnerSettings(error: String => Unit) extends Settings(error) {
   val loadfiles = MultiStringSetting(
       "-i",
       "file",
diff --git a/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
new file mode 100644
index 0000000000000..201f2672d5474
--- /dev/null
+++ b/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -0,0 +1,318 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl
+
+import java.io._
+import java.net.URLClassLoader
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.scalatest.FunSuite
+import org.apache.spark.SparkContext
+import org.apache.commons.lang3.StringEscapeUtils
+import org.apache.spark.util.Utils
+
+
+class ReplSuite extends FunSuite {
+
+  def runInterpreter(master: String, input: String): String = {
+    val CONF_EXECUTOR_CLASSPATH = "spark.executor.extraClassPath"
+
+    val in = new BufferedReader(new StringReader(input + "\n"))
+    val out = new StringWriter()
+    val cl = getClass.getClassLoader
+    var paths = new ArrayBuffer[String]
+    if (cl.isInstanceOf[URLClassLoader]) {
+      val urlLoader = cl.asInstanceOf[URLClassLoader]
+      for (url <- urlLoader.getURLs) {
+        if (url.getProtocol == "file") {
+          paths += url.getFile
+        }
+      }
+    }
+    val classpath = paths.mkString(File.pathSeparator)
+
+    val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH)
+    System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath)
+
+    val interp = new SparkILoop(in, new PrintWriter(out), master)
+    org.apache.spark.repl.Main.interp = interp
+    interp.process(Array("-classpath", classpath))
+    org.apache.spark.repl.Main.interp = null
+    if (interp.sparkContext != null) {
+      interp.sparkContext.stop()
+    }
+    if (oldExecutorClasspath != null) {
+      System.setProperty(CONF_EXECUTOR_CLASSPATH, oldExecutorClasspath)
+    } else {
+      System.clearProperty(CONF_EXECUTOR_CLASSPATH)
+    }
+    return out.toString
+  }
+
+  def assertContains(message: String, output: String) {
+    val isContain = output.contains(message)
+    assert(isContain,
+      "Interpreter output did not contain '" + message + "':\n" + output)
+  }
+
+  def assertDoesNotContain(message: String, output: String) {
+    val isContain = output.contains(message)
+    assert(!isContain,
+      "Interpreter output contained '" + message + "':\n" + output)
+  }
+
+  test("propagation of local properties") {
+    // A mock ILoop that doesn't install the SIGINT handler.
+    class ILoop(out: PrintWriter) extends SparkILoop(None, out, None) {
+      settings = new scala.tools.nsc.Settings
+      settings.usejavacp.value = true
+      org.apache.spark.repl.Main.interp = this
+      override def createInterpreter() {
+        intp = new SparkILoopInterpreter
+        intp.setContextClassLoader()
+      }
+    }
+
+    val out = new StringWriter()
+    val interp = new ILoop(new PrintWriter(out))
+    interp.sparkContext = new SparkContext("local", "repl-test")
+    interp.createInterpreter()
+    interp.intp.initialize()
+    interp.sparkContext.setLocalProperty("someKey", "someValue")
+
+    // Make sure the value we set in the caller to interpret is propagated in the thread that
+    // interprets the command.
+    interp.interpret("org.apache.spark.repl.Main.interp.sparkContext.getLocalProperty(\"someKey\")")
+    assert(out.toString.contains("someValue"))
+
+    interp.sparkContext.stop()
+    System.clearProperty("spark.driver.port")
+  }
+
+  test("simple foreach with accumulator") {
+    val output = runInterpreter("local",
+      """
+        |val accum = sc.accumulator(0)
+        |sc.parallelize(1 to 10).foreach(x => accum += x)
+        |accum.value
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res1: Int = 55", output)
+  }
+
+  test("external vars") {
+    val output = runInterpreter("local",
+      """
+        |var v = 7
+        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
+        |v = 10
+        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 70", output)
+    assertContains("res1: Int = 100", output)
+  }
+
+  test("external classes") {
+    val output = runInterpreter("local",
+      """
+        |class C {
+        |def foo = 5
+        |}
+        |sc.parallelize(1 to 10).map(x => (new C).foo).collect.reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 50", output)
+  }
+
+  test("external functions") {
+    val output = runInterpreter("local",
+      """
+        |def double(x: Int) = x + x
+        |sc.parallelize(1 to 10).map(x => double(x)).collect.reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 110", output)
+  }
+
+  test("external functions that access vars") {
+    val output = runInterpreter("local",
+      """
+        |var v = 7
+        |def getV() = v
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |v = 10
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 70", output)
+    assertContains("res1: Int = 100", output)
+  }
+
+  test("broadcast vars") {
+    // Test that the value that a broadcast var had when it was created is used,
+    // even if that variable is then modified in the driver program
+    // TODO: This doesn't actually work for arrays when we run in local mode!
+    val output = runInterpreter("local",
+      """
+        |var array = new Array[Int](5)
+        |val broadcastArray = sc.broadcast(array)
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |array(0) = 5
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+    assertContains("res2: Array[Int] = Array(5, 0, 0, 0, 0)", output)
+  }
+
+  test("interacting with files") {
+    val tempDir = Utils.createTempDir()
+    val out = new FileWriter(tempDir + "/input")
+    out.write("Hello world!\n")
+    out.write("What's up?\n")
+    out.write("Goodbye\n")
+    out.close()
+    val output = runInterpreter("local",
+      """
+        |var file = sc.textFile("%s").cache()
+        |file.count()
+        |file.count()
+        |file.count()
+      """.stripMargin.format(StringEscapeUtils.escapeJava(
+        tempDir.getAbsolutePath + File.separator + "input")))
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Long = 3", output)
+    assertContains("res1: Long = 3", output)
+    assertContains("res2: Long = 3", output)
+    Utils.deleteRecursively(tempDir)
+  }
+
+  test("local-cluster mode") {
+    val output = runInterpreter("local-cluster[1,1,512]",
+      """
+        |var v = 7
+        |def getV() = v
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |v = 10
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |var array = new Array[Int](5)
+        |val broadcastArray = sc.broadcast(array)
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |array(0) = 5
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 70", output)
+    assertContains("res1: Int = 100", output)
+    assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+    assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+  }
+
+  test("SPARK-1199 two instances of same class don't type check.") {
+    val output = runInterpreter("local-cluster[1,1,512]",
+      """
+        |case class Sum(exp: String, exp2: String)
+        |val a = Sum("A", "B")
+        |def b(a: Sum): String = a match { case Sum(_, _) => "Found Sum" }
+        |b(a)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-2452 compound statements.") {
+    val output = runInterpreter("local",
+      """
+        |val x = 4 ; def f() = x
+        |f()
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-2576 importing SQLContext.implicits._") {
+    // We need to use local-cluster to test this case.
+    val output = runInterpreter("local-cluster[1,1,512]",
+      """
+        |val sqlContext = new org.apache.spark.sql.SQLContext(sc)
+        |import sqlContext.implicits._
+        |case class TestCaseClass(value: Int)
+        |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).toDataFrame.collect()
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-2632 importing a method from non serializable class and not using it.") {
+    val output = runInterpreter("local",
+    """
+      |class TestClass() { def testMethod = 3 }
+      |val t = new TestClass
+      |import t.testMethod
+      |case class TestCaseClass(value: Int)
+      |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).collect
+    """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  if (System.getenv("MESOS_NATIVE_LIBRARY") != null) {
+    test("running on Mesos") {
+      val output = runInterpreter("localquiet",
+        """
+          |var v = 7
+          |def getV() = v
+          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+          |v = 10
+          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+          |var array = new Array[Int](5)
+          |val broadcastArray = sc.broadcast(array)
+          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+          |array(0) = 5
+          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        """.stripMargin)
+      assertDoesNotContain("error:", output)
+      assertDoesNotContain("Exception", output)
+      assertContains("res0: Int = 70", output)
+      assertContains("res1: Int = 100", output)
+      assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+      assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+    }
+  }
+
+  test("collecting objects of class defined in repl") {
+    val output = runInterpreter("local[2]",
+      """
+        |case class Foo(i: Int)
+        |val ret = sc.parallelize((1 to 100).map(Foo), 10).collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("ret: Array[Foo] = Array(Foo(1),", output)
+  }
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
new file mode 100644
index 0000000000000..69e44d4f916e1
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl
+
+import org.apache.spark.util.Utils
+import org.apache.spark._
+
+import scala.tools.nsc.Settings
+import scala.tools.nsc.interpreter.SparkILoop
+
+object Main extends Logging {
+
+  val conf = new SparkConf()
+  val tmp = System.getProperty("java.io.tmpdir")
+  val rootDir = conf.get("spark.repl.classdir", tmp)
+  val outputDir = Utils.createTempDir(rootDir)
+  val s = new Settings()
+  s.processArguments(List("-Yrepl-class-based",
+    "-Yrepl-outdir", s"${outputDir.getAbsolutePath}", "-Yrepl-sync"), true)
+  val classServer = new HttpServer(conf, outputDir, new SecurityManager(conf))
+  var sparkContext: SparkContext = _
+  var interp = new SparkILoop // this is a public var because tests reset it.
+
+  def main(args: Array[String]) {
+    if (getMaster == "yarn-client") System.setProperty("SPARK_YARN_MODE", "true")
+    // Start the classServer and store its URI in a spark system property
+    // (which will be passed to executors so that they can connect to it)
+    classServer.start()
+    interp.process(s) // Repl starts and goes in loop of R.E.P.L
+    classServer.stop()
+    Option(sparkContext).map(_.stop)
+  }
+
+
+  def getAddedJars: Array[String] = {
+    val envJars = sys.env.get("ADD_JARS")
+    val propJars = sys.props.get("spark.jars").flatMap { p => if (p == "") None else Some(p) }
+    val jars = propJars.orElse(envJars).getOrElse("")
+    Utils.resolveURIs(jars).split(",").filter(_.nonEmpty)
+  }
+
+  def createSparkContext(): SparkContext = {
+    val execUri = System.getenv("SPARK_EXECUTOR_URI")
+    val jars = getAddedJars
+    val conf = new SparkConf()
+      .setMaster(getMaster)
+      .setAppName("Spark shell")
+      .setJars(jars)
+      .set("spark.repl.class.uri", classServer.uri)
+    logInfo("Spark class server started at " + classServer.uri)
+    if (execUri != null) {
+      conf.set("spark.executor.uri", execUri)
+    }
+    if (System.getenv("SPARK_HOME") != null) {
+      conf.setSparkHome(System.getenv("SPARK_HOME"))
+    }
+    sparkContext = new SparkContext(conf)
+    logInfo("Created spark context..")
+    sparkContext
+  }
+
+  private def getMaster: String = {
+    val master = {
+      val envMaster = sys.env.get("MASTER")
+      val propMaster = sys.props.get("spark.master")
+      propMaster.orElse(envMaster).getOrElse("local[*]")
+    }
+    master
+  }
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
new file mode 100644
index 0000000000000..8e519fa67f649
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
@@ -0,0 +1,86 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author  Paul Phillips
+ */
+
+package scala.tools.nsc
+package interpreter
+
+import scala.tools.nsc.ast.parser.Tokens.EOF
+
+trait SparkExprTyper {
+  val repl: SparkIMain
+
+  import repl._
+  import global.{ reporter => _, Import => _, _ }
+  import naming.freshInternalVarName
+
+  def symbolOfLine(code: String): Symbol = {
+    def asExpr(): Symbol = {
+      val name  = freshInternalVarName()
+      // Typing it with a lazy val would give us the right type, but runs
+      // into compiler bugs with things like existentials, so we compile it
+      // behind a def and strip the NullaryMethodType which wraps the expr.
+      val line = "def " + name + " = " + code
+
+      interpretSynthetic(line) match {
+        case IR.Success =>
+          val sym0 = symbolOfTerm(name)
+          // drop NullaryMethodType
+          sym0.cloneSymbol setInfo exitingTyper(sym0.tpe_*.finalResultType)
+        case _          => NoSymbol
+      }
+    }
+    def asDefn(): Symbol = {
+      val old = repl.definedSymbolList.toSet
+
+      interpretSynthetic(code) match {
+        case IR.Success =>
+          repl.definedSymbolList filterNot old match {
+            case Nil        => NoSymbol
+            case sym :: Nil => sym
+            case syms       => NoSymbol.newOverloaded(NoPrefix, syms)
+          }
+        case _ => NoSymbol
+      }
+    }
+    def asError(): Symbol = {
+      interpretSynthetic(code)
+      NoSymbol
+    }
+    beSilentDuring(asExpr()) orElse beSilentDuring(asDefn()) orElse asError()
+  }
+
+  private var typeOfExpressionDepth = 0
+  def typeOfExpression(expr: String, silent: Boolean = true): Type = {
+    if (typeOfExpressionDepth > 2) {
+      repldbg("Terminating typeOfExpression recursion for expression: " + expr)
+      return NoType
+    }
+    typeOfExpressionDepth += 1
+    // Don't presently have a good way to suppress undesirable success output
+    // while letting errors through, so it is first trying it silently: if there
+    // is an error, and errors are desired, then it re-evaluates non-silently
+    // to induce the error message.
+    try beSilentDuring(symbolOfLine(expr).tpe) match {
+      case NoType if !silent => symbolOfLine(expr).tpe // generate error
+      case tpe               => tpe
+    }
+    finally typeOfExpressionDepth -= 1
+  }
+
+  // This only works for proper types.
+  def typeOfTypeString(typeString: String): Type = {
+    def asProperType(): Option[Type] = {
+      val name = freshInternalVarName()
+      val line = "def %s: %s = ???" format (name, typeString)
+      interpretSynthetic(line) match {
+        case IR.Success =>
+          val sym0 = symbolOfTerm(name)
+          Some(sym0.asMethod.returnType)
+        case _          => None
+      }
+    }
+    beSilentDuring(asProperType()) getOrElse NoType
+  }
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
new file mode 100644
index 0000000000000..250727305970d
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -0,0 +1,969 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author Alexander Spoon
+ */
+
+package scala
+package tools.nsc
+package interpreter
+
+import scala.language.{ implicitConversions, existentials }
+import scala.annotation.tailrec
+import Predef.{ println => _, _ }
+import interpreter.session._
+import StdReplTags._
+import scala.reflect.api.{Mirror, Universe, TypeCreator}
+import scala.util.Properties.{ jdkHome, javaVersion, versionString, javaVmName }
+import scala.tools.nsc.util.{ ClassPath, Exceptional, stringFromWriter, stringFromStream }
+import scala.reflect.{ClassTag, classTag}
+import scala.reflect.internal.util.{ BatchSourceFile, ScalaClassLoader }
+import ScalaClassLoader._
+import scala.reflect.io.{ File, Directory }
+import scala.tools.util._
+import scala.collection.generic.Clearable
+import scala.concurrent.{ ExecutionContext, Await, Future, future }
+import ExecutionContext.Implicits._
+import java.io.{ BufferedReader, FileReader }
+
+/** The Scala interactive shell.  It provides a read-eval-print loop
+  *  around the Interpreter class.
+  *  After instantiation, clients should call the main() method.
+  *
+  *  If no in0 is specified, then input will come from the console, and
+  *  the class will attempt to provide input editing feature such as
+  *  input history.
+  *
+  *  @author Moez A. Abdel-Gawad
+  *  @author  Lex Spoon
+  *  @version 1.2
+  */
+class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter)
+  extends AnyRef
+  with LoopCommands
+{
+  def this(in0: BufferedReader, out: JPrintWriter) = this(Some(in0), out)
+  def this() = this(None, new JPrintWriter(Console.out, true))
+//
+//  @deprecated("Use `intp` instead.", "2.9.0") def interpreter = intp
+//  @deprecated("Use `intp` instead.", "2.9.0") def interpreter_= (i: Interpreter): Unit = intp = i
+
+  var in: InteractiveReader = _   // the input stream from which commands come
+  var settings: Settings = _
+  var intp: SparkIMain = _
+
+  var globalFuture: Future[Boolean] = _
+
+  protected def asyncMessage(msg: String) {
+    if (isReplInfo || isReplPower)
+      echoAndRefresh(msg)
+  }
+
+  def initializeSpark() {
+    intp.beQuietDuring {
+      command( """
+         @transient val sc = {
+           val _sc = org.apache.spark.repl.Main.createSparkContext()
+           println("Spark context available as sc.")
+           _sc
+         }
+               """)
+      command("import org.apache.spark.SparkContext._")
+    }
+  }
+
+  /** Print a welcome message */
+  def printWelcome() {
+    import org.apache.spark.SPARK_VERSION
+    echo("""Welcome to
+      ____              __
+     / __/__  ___ _____/ /__
+    _\ \/ _ \/ _ `/ __/  '_/
+   /___/ .__/\_,_/_/ /_/\_\   version %s
+      /_/
+         """.format(SPARK_VERSION))
+    val welcomeMsg = "Using Scala %s (%s, Java %s)".format(
+      versionString, javaVmName, javaVersion)
+    echo(welcomeMsg)
+    echo("Type in expressions to have them evaluated.")
+    echo("Type :help for more information.")
+  }
+
+  override def echoCommandMessage(msg: String) {
+    intp.reporter printUntruncatedMessage msg
+  }
+
+  // lazy val power = new Power(intp, new StdReplVals(this))(tagOfStdReplVals, classTag[StdReplVals])
+  def history = in.history
+
+  // classpath entries added via :cp
+  var addedClasspath: String = ""
+
+  /** A reverse list of commands to replay if the user requests a :replay */
+  var replayCommandStack: List[String] = Nil
+
+  /** A list of commands to replay if the user requests a :replay */
+  def replayCommands = replayCommandStack.reverse
+
+  /** Record a command for replay should the user request a :replay */
+  def addReplay(cmd: String) = replayCommandStack ::= cmd
+
+  def savingReplayStack[T](body: => T): T = {
+    val saved = replayCommandStack
+    try body
+    finally replayCommandStack = saved
+  }
+  def savingReader[T](body: => T): T = {
+    val saved = in
+    try body
+    finally in = saved
+  }
+
+  /** Close the interpreter and set the var to null. */
+  def closeInterpreter() {
+    if (intp ne null) {
+      intp.close()
+      intp = null
+    }
+  }
+
+  class SparkILoopInterpreter extends SparkIMain(settings, out) {
+    outer =>
+
+    override lazy val formatting = new Formatting {
+      def prompt = SparkILoop.this.prompt
+    }
+    override protected def parentClassLoader =
+      settings.explicitParentLoader.getOrElse( classOf[SparkILoop].getClassLoader )
+  }
+
+  /** Create a new interpreter. */
+  def createInterpreter() {
+    if (addedClasspath != "")
+      settings.classpath append addedClasspath
+
+    intp = new SparkILoopInterpreter
+  }
+
+  /** print a friendly help message */
+  def helpCommand(line: String): Result = {
+    if (line == "") helpSummary()
+    else uniqueCommand(line) match {
+      case Some(lc) => echo("\n" + lc.help)
+      case _        => ambiguousError(line)
+    }
+  }
+  private def helpSummary() = {
+    val usageWidth  = commands map (_.usageMsg.length) max
+    val formatStr   = "%-" + usageWidth + "s %s"
+
+    echo("All commands can be abbreviated, e.g. :he instead of :help.")
+
+    commands foreach { cmd =>
+      echo(formatStr.format(cmd.usageMsg, cmd.help))
+    }
+  }
+  private def ambiguousError(cmd: String): Result = {
+    matchingCommands(cmd) match {
+      case Nil  => echo(cmd + ": no such command.  Type :help for help.")
+      case xs   => echo(cmd + " is ambiguous: did you mean " + xs.map(":" + _.name).mkString(" or ") + "?")
+    }
+    Result(keepRunning = true, None)
+  }
+  private def matchingCommands(cmd: String) = commands filter (_.name startsWith cmd)
+  private def uniqueCommand(cmd: String): Option[LoopCommand] = {
+    // this lets us add commands willy-nilly and only requires enough command to disambiguate
+    matchingCommands(cmd) match {
+      case List(x)  => Some(x)
+      // exact match OK even if otherwise appears ambiguous
+      case xs       => xs find (_.name == cmd)
+    }
+  }
+
+  /** Show the history */
+  lazy val historyCommand = new LoopCommand("history", "show the history (optional num is commands to show)") {
+    override def usage = "[num]"
+    def defaultLines = 20
+
+    def apply(line: String): Result = {
+      if (history eq NoHistory)
+        return "No history available."
+
+      val xs      = words(line)
+      val current = history.index
+      val count   = try xs.head.toInt catch { case _: Exception => defaultLines }
+      val lines   = history.asStrings takeRight count
+      val offset  = current - lines.size + 1
+
+      for ((line, index) <- lines.zipWithIndex)
+        echo("%3d  %s".format(index + offset, line))
+    }
+  }
+
+  // When you know you are most likely breaking into the middle
+  // of a line being typed.  This softens the blow.
+  protected def echoAndRefresh(msg: String) = {
+    echo("\n" + msg)
+    in.redrawLine()
+  }
+  protected def echo(msg: String) = {
+    out println msg
+    out.flush()
+  }
+
+  /** Search the history */
+  def searchHistory(_cmdline: String) {
+    val cmdline = _cmdline.toLowerCase
+    val offset  = history.index - history.size + 1
+
+    for ((line, index) <- history.asStrings.zipWithIndex ; if line.toLowerCase contains cmdline)
+      echo("%d %s".format(index + offset, line))
+  }
+
+  private val currentPrompt = Properties.shellPromptString
+
+  /** Prompt to print when awaiting input */
+  def prompt = currentPrompt
+
+  import LoopCommand.{ cmd, nullary }
+
+  /** Standard commands **/
+  lazy val standardCommands = List(
+    cmd("cp", "<path>", "add a jar or directory to the classpath", addClasspath),
+    cmd("edit", "<id>|<line>", "edit history", editCommand),
+    cmd("help", "[command]", "print this summary or command-specific help", helpCommand),
+    historyCommand,
+    cmd("h?", "<string>", "search the history", searchHistory),
+    cmd("imports", "[name name ...]", "show import history, identifying sources of names", importsCommand),
+    //cmd("implicits", "[-v]", "show the implicits in scope", intp.implicitsCommand),
+    cmd("javap", "<path|class>", "disassemble a file or class name", javapCommand),
+    cmd("line", "<id>|<line>", "place line(s) at the end of history", lineCommand),
+    cmd("load", "<path>", "interpret lines in a file", loadCommand),
+    cmd("paste", "[-raw] [path]", "enter paste mode or paste a file", pasteCommand),
+    // nullary("power", "enable power user mode", powerCmd),
+    nullary("quit", "exit the interpreter", () => Result(keepRunning = false, None)),
+    nullary("replay", "reset execution and replay all previous commands", replay),
+    nullary("reset", "reset the repl to its initial state, forgetting all session entries", resetCommand),
+    cmd("save", "<path>", "save replayable session to a file", saveCommand),
+    shCommand,
+    cmd("settings", "[+|-]<options>", "+enable/-disable flags, set compiler options", changeSettings),
+    nullary("silent", "disable/enable automatic printing of results", verbosity),
+//    cmd("type", "[-v] <expr>", "display the type of an expression without evaluating it", typeCommand),
+//    cmd("kind", "[-v] <expr>", "display the kind of expression's type", kindCommand),
+    nullary("warnings", "show the suppressed warnings from the most recent line which had any", warningsCommand)
+  )
+
+  /** Power user commands */
+//  lazy val powerCommands: List[LoopCommand] = List(
+//    cmd("phase", "<phase>", "set the implicit phase for power commands", phaseCommand)
+//  )
+
+  private def importsCommand(line: String): Result = {
+    val tokens    = words(line)
+    val handlers  = intp.languageWildcardHandlers ++ intp.importHandlers
+
+    handlers.filterNot(_.importedSymbols.isEmpty).zipWithIndex foreach {
+      case (handler, idx) =>
+        val (types, terms) = handler.importedSymbols partition (_.name.isTypeName)
+        val imps           = handler.implicitSymbols
+        val found          = tokens filter (handler importsSymbolNamed _)
+        val typeMsg        = if (types.isEmpty) "" else types.size + " types"
+        val termMsg        = if (terms.isEmpty) "" else terms.size + " terms"
+        val implicitMsg    = if (imps.isEmpty) "" else imps.size + " are implicit"
+        val foundMsg       = if (found.isEmpty) "" else found.mkString(" // imports: ", ", ", "")
+        val statsMsg       = List(typeMsg, termMsg, implicitMsg) filterNot (_ == "") mkString ("(", ", ", ")")
+
+        intp.reporter.printMessage("%2d) %-30s %s%s".format(
+          idx + 1,
+          handler.importString,
+          statsMsg,
+          foundMsg
+        ))
+    }
+  }
+
+  private def findToolsJar() = PathResolver.SupplementalLocations.platformTools
+
+  private def addToolsJarToLoader() = {
+    val cl = findToolsJar() match {
+      case Some(tools) => ScalaClassLoader.fromURLs(Seq(tools.toURL), intp.classLoader)
+      case _           => intp.classLoader
+    }
+    if (Javap.isAvailable(cl)) {
+      repldbg(":javap available.")
+      cl
+    }
+    else {
+      repldbg(":javap unavailable: no tools.jar at " + jdkHome)
+      intp.classLoader
+    }
+  }
+//
+//  protected def newJavap() =
+//    JavapClass(addToolsJarToLoader(), new IMain.ReplStrippingWriter(intp), Some(intp))
+//
+//  private lazy val javap = substituteAndLog[Javap]("javap", NoJavap)(newJavap())
+
+  // Still todo: modules.
+//  private def typeCommand(line0: String): Result = {
+//    line0.trim match {
+//      case "" => ":type [-v] <expression>"
+//      case s  => intp.typeCommandInternal(s stripPrefix "-v " trim, verbose = s startsWith "-v ")
+//    }
+//  }
+
+//  private def kindCommand(expr: String): Result = {
+//    expr.trim match {
+//      case "" => ":kind [-v] <expression>"
+//      case s  => intp.kindCommandInternal(s stripPrefix "-v " trim, verbose = s startsWith "-v ")
+//    }
+//  }
+
+  private def warningsCommand(): Result = {
+    if (intp.lastWarnings.isEmpty)
+      "Can't find any cached warnings."
+    else
+      intp.lastWarnings foreach { case (pos, msg) => intp.reporter.warning(pos, msg) }
+  }
+
+  private def changeSettings(args: String): Result = {
+    def showSettings() = {
+      for (s <- settings.userSetSettings.toSeq.sorted) echo(s.toString)
+    }
+    def updateSettings() = {
+      // put aside +flag options
+      val (pluses, rest) = (args split "\\s+").toList partition (_.startsWith("+"))
+      val tmps = new Settings
+      val (ok, leftover) = tmps.processArguments(rest, processAll = true)
+      if (!ok) echo("Bad settings request.")
+      else if (leftover.nonEmpty) echo("Unprocessed settings.")
+      else {
+        // boolean flags set-by-user on tmp copy should be off, not on
+        val offs = tmps.userSetSettings filter (_.isInstanceOf[Settings#BooleanSetting])
+        val (minuses, nonbools) = rest partition (arg => offs exists (_ respondsTo arg))
+        // update non-flags
+        settings.processArguments(nonbools, processAll = true)
+        // also snag multi-value options for clearing, e.g. -Ylog: and -language:
+        for {
+          s <- settings.userSetSettings
+          if s.isInstanceOf[Settings#MultiStringSetting] || s.isInstanceOf[Settings#PhasesSetting]
+          if nonbools exists (arg => arg.head == '-' && arg.last == ':' && (s respondsTo arg.init))
+        } s match {
+          case c: Clearable => c.clear()
+          case _ =>
+        }
+        def update(bs: Seq[String], name: String=>String, setter: Settings#Setting=>Unit) = {
+          for (b <- bs)
+            settings.lookupSetting(name(b)) match {
+              case Some(s) =>
+                if (s.isInstanceOf[Settings#BooleanSetting]) setter(s)
+                else echo(s"Not a boolean flag: $b")
+              case _ =>
+                echo(s"Not an option: $b")
+            }
+        }
+        update(minuses, identity, _.tryToSetFromPropertyValue("false"))  // turn off
+        update(pluses, "-" + _.drop(1), _.tryToSet(Nil))                 // turn on
+      }
+    }
+    if (args.isEmpty) showSettings() else updateSettings()
+  }
+
+  private def javapCommand(line: String): Result = {
+//    if (javap == null)
+//      ":javap unavailable, no tools.jar at %s.  Set JDK_HOME.".format(jdkHome)
+//    else if (line == "")
+//      ":javap [-lcsvp] [path1 path2 ...]"
+//    else
+//      javap(words(line)) foreach { res =>
+//        if (res.isError) return "Failed: " + res.value
+//        else res.show()
+//      }
+  }
+
+  private def pathToPhaseWrapper = intp.originalPath("$r") + ".phased.atCurrent"
+
+  private def phaseCommand(name: String): Result = {
+//    val phased: Phased = power.phased
+//    import phased.NoPhaseName
+//
+//    if (name == "clear") {
+//      phased.set(NoPhaseName)
+//      intp.clearExecutionWrapper()
+//      "Cleared active phase."
+//    }
+//    else if (name == "") phased.get match {
+//      case NoPhaseName => "Usage: :phase <expr> (e.g. typer, erasure.next, erasure+3)"
+//      case ph          => "Active phase is '%s'.  (To clear, :phase clear)".format(phased.get)
+//    }
+//    else {
+//      val what = phased.parse(name)
+//      if (what.isEmpty || !phased.set(what))
+//        "'" + name + "' does not appear to represent a valid phase."
+//      else {
+//        intp.setExecutionWrapper(pathToPhaseWrapper)
+//        val activeMessage =
+//          if (what.toString.length == name.length) "" + what
+//          else "%s (%s)".format(what, name)
+//
+//        "Active phase is now: " + activeMessage
+//      }
+//    }
+  }
+
+  /** Available commands */
+  def commands: List[LoopCommand] = standardCommands ++ (
+    // if (isReplPower)
+    //  powerCommands
+    // else
+      Nil
+    )
+
+  val replayQuestionMessage =
+    """|That entry seems to have slain the compiler.  Shall I replay
+      |your session? I can re-run each line except the last one.
+      |[y/n]
+    """.trim.stripMargin
+
+  private val crashRecovery: PartialFunction[Throwable, Boolean] = {
+    case ex: Throwable =>
+      val (err, explain) = (
+        if (intp.isInitializeComplete)
+          (intp.global.throwableAsString(ex), "")
+        else
+          (ex.getMessage, "The compiler did not initialize.\n")
+        )
+      echo(err)
+
+      ex match {
+        case _: NoSuchMethodError | _: NoClassDefFoundError =>
+          echo("\nUnrecoverable error.")
+          throw ex
+        case _  =>
+          def fn(): Boolean =
+            try in.readYesOrNo(explain + replayQuestionMessage, { echo("\nYou must enter y or n.") ; fn() })
+            catch { case _: RuntimeException => false }
+
+          if (fn()) replay()
+          else echo("\nAbandoning crashed session.")
+      }
+      true
+  }
+
+  // return false if repl should exit
+  def processLine(line: String): Boolean = {
+    import scala.concurrent.duration._
+    Await.ready(globalFuture, 60.seconds)
+
+    (line ne null) && (command(line) match {
+      case Result(false, _)      => false
+      case Result(_, Some(line)) => addReplay(line) ; true
+      case _                     => true
+    })
+  }
+
+  private def readOneLine() = {
+    out.flush()
+    in readLine prompt
+  }
+
+  /** The main read-eval-print loop for the repl.  It calls
+    *  command() for each line of input, and stops when
+    *  command() returns false.
+    */
+  @tailrec final def loop() {
+    if ( try processLine(readOneLine()) catch crashRecovery )
+      loop()
+  }
+
+  /** interpret all lines from a specified file */
+  def interpretAllFrom(file: File) {
+    savingReader {
+      savingReplayStack {
+        file applyReader { reader =>
+          in = SimpleReader(reader, out, interactive = false)
+          echo("Loading " + file + "...")
+          loop()
+        }
+      }
+    }
+  }
+
+  /** create a new interpreter and replay the given commands */
+  def replay() {
+    reset()
+    if (replayCommandStack.isEmpty)
+      echo("Nothing to replay.")
+    else for (cmd <- replayCommands) {
+      echo("Replaying: " + cmd)  // flush because maybe cmd will have its own output
+      command(cmd)
+      echo("")
+    }
+  }
+  def resetCommand() {
+    echo("Resetting interpreter state.")
+    if (replayCommandStack.nonEmpty) {
+      echo("Forgetting this session history:\n")
+      replayCommands foreach echo
+      echo("")
+      replayCommandStack = Nil
+    }
+    if (intp.namedDefinedTerms.nonEmpty)
+      echo("Forgetting all expression results and named terms: " + intp.namedDefinedTerms.mkString(", "))
+    if (intp.definedTypes.nonEmpty)
+      echo("Forgetting defined types: " + intp.definedTypes.mkString(", "))
+
+    reset()
+  }
+  def reset() {
+    intp.reset()
+    unleashAndSetPhase()
+  }
+
+  def lineCommand(what: String): Result = editCommand(what, None)
+
+  // :edit id or :edit line
+  def editCommand(what: String): Result = editCommand(what, Properties.envOrNone("EDITOR"))
+
+  def editCommand(what: String, editor: Option[String]): Result = {
+    def diagnose(code: String) = {
+      echo("The edited code is incomplete!\n")
+      val errless = intp compileSources new BatchSourceFile("<pastie>", s"object pastel {\n$code\n}")
+      if (errless) echo("The compiler reports no errors.")
+    }
+    def historicize(text: String) = history match {
+      case jlh: JLineHistory => text.lines foreach jlh.add ; jlh.moveToEnd() ; true
+      case _ => false
+    }
+    def edit(text: String): Result = editor match {
+      case Some(ed) =>
+        val tmp = File.makeTemp()
+        tmp.writeAll(text)
+        try {
+          val pr = new ProcessResult(s"$ed ${tmp.path}")
+          pr.exitCode match {
+            case 0 =>
+              tmp.safeSlurp() match {
+                case Some(edited) if edited.trim.isEmpty => echo("Edited text is empty.")
+                case Some(edited) =>
+                  echo(edited.lines map ("+" + _) mkString "\n")
+                  val res = intp interpret edited
+                  if (res == IR.Incomplete) diagnose(edited)
+                  else {
+                    historicize(edited)
+                    Result(lineToRecord = Some(edited), keepRunning = true)
+                  }
+                case None => echo("Can't read edited text. Did you delete it?")
+              }
+            case x => echo(s"Error exit from $ed ($x), ignoring")
+          }
+        } finally {
+          tmp.delete()
+        }
+      case None =>
+        if (historicize(text)) echo("Placing text in recent history.")
+        else echo(f"No EDITOR defined and you can't change history, echoing your text:%n$text")
+    }
+
+    // if what is a number, use it as a line number or range in history
+    def isNum = what forall (c => c.isDigit || c == '-' || c == '+')
+    // except that "-" means last value
+    def isLast = (what == "-")
+    if (isLast || !isNum) {
+      val name = if (isLast) intp.mostRecentVar else what
+      val sym = intp.symbolOfIdent(name)
+      intp.prevRequestList collectFirst { case r if r.defines contains sym => r } match {
+        case Some(req) => edit(req.line)
+        case None      => echo(s"No symbol in scope: $what")
+      }
+    } else try {
+      val s = what
+      // line 123, 120+3, -3, 120-123, 120-, note -3 is not 0-3 but (cur-3,cur)
+      val (start, len) =
+        if ((s indexOf '+') > 0) {
+          val (a,b) = s splitAt (s indexOf '+')
+          (a.toInt, b.drop(1).toInt)
+        } else {
+          (s indexOf '-') match {
+            case -1 => (s.toInt, 1)
+            case 0  => val n = s.drop(1).toInt ; (history.index - n, n)
+            case _ if s.last == '-' => val n = s.init.toInt ; (n, history.index - n)
+            case i  => val n = s.take(i).toInt ; (n, s.drop(i+1).toInt - n)
+          }
+        }
+      import scala.collection.JavaConverters._
+      val index = (start - 1) max 0
+      val text = history match {
+        case jlh: JLineHistory => jlh.entries(index).asScala.take(len) map (_.value) mkString "\n"
+        case _ => history.asStrings.slice(index, index + len) mkString "\n"
+      }
+      edit(text)
+    } catch {
+      case _: NumberFormatException => echo(s"Bad range '$what'")
+        echo("Use line 123, 120+3, -3, 120-123, 120-, note -3 is not 0-3 but (cur-3,cur)")
+    }
+  }
+
+  /** fork a shell and run a command */
+  lazy val shCommand = new LoopCommand("sh", "run a shell command (result is implicitly => List[String])") {
+    override def usage = "<command line>"
+    def apply(line: String): Result = line match {
+      case ""   => showUsage()
+      case _    =>
+        val toRun = s"new ${classOf[ProcessResult].getName}(${string2codeQuoted(line)})"
+        intp interpret toRun
+        ()
+    }
+  }
+
+  def withFile[A](filename: String)(action: File => A): Option[A] = {
+    val res = Some(File(filename)) filter (_.exists) map action
+    if (res.isEmpty) echo("That file does not exist")  // courtesy side-effect
+    res
+  }
+
+  def loadCommand(arg: String) = {
+    var shouldReplay: Option[String] = None
+    withFile(arg)(f => {
+      interpretAllFrom(f)
+      shouldReplay = Some(":load " + arg)
+    })
+    Result(keepRunning = true, shouldReplay)
+  }
+
+  def saveCommand(filename: String): Result = (
+    if (filename.isEmpty) echo("File name is required.")
+    else if (replayCommandStack.isEmpty) echo("No replay commands in session")
+    else File(filename).printlnAll(replayCommands: _*)
+    )
+
+  def addClasspath(arg: String): Unit = {
+    val f = File(arg).normalize
+    if (f.exists) {
+      addedClasspath = ClassPath.join(addedClasspath, f.path)
+      val totalClasspath = ClassPath.join(settings.classpath.value, addedClasspath)
+      echo("Added '%s'.  Your new classpath is:\n\"%s\"".format(f.path, totalClasspath))
+      replay()
+    }
+    else echo("The path '" + f + "' doesn't seem to exist.")
+  }
+
+  def powerCmd(): Result = {
+    if (isReplPower) "Already in power mode."
+    else enablePowerMode(isDuringInit = false)
+  }
+  def enablePowerMode(isDuringInit: Boolean) = {
+    replProps.power setValue true
+    unleashAndSetPhase()
+    // asyncEcho(isDuringInit, power.banner)
+  }
+  private def unleashAndSetPhase() {
+    if (isReplPower) {
+    //  power.unleash()
+      // Set the phase to "typer"
+      // intp beSilentDuring phaseCommand("typer")
+    }
+  }
+
+  def asyncEcho(async: Boolean, msg: => String) {
+    if (async) asyncMessage(msg)
+    else echo(msg)
+  }
+
+  def verbosity() = {
+    val old = intp.printResults
+    intp.printResults = !old
+    echo("Switched " + (if (old) "off" else "on") + " result printing.")
+  }
+
+  /** Run one command submitted by the user.  Two values are returned:
+    * (1) whether to keep running, (2) the line to record for replay,
+    * if any. */
+  def command(line: String): Result = {
+    if (line startsWith ":") {
+      val cmd = line.tail takeWhile (x => !x.isWhitespace)
+      uniqueCommand(cmd) match {
+        case Some(lc) => lc(line.tail stripPrefix cmd dropWhile (_.isWhitespace))
+        case _        => ambiguousError(cmd)
+      }
+    }
+    else if (intp.global == null) Result(keepRunning = false, None)  // Notice failure to create compiler
+    else Result(keepRunning = true, interpretStartingWith(line))
+  }
+
+  private def readWhile(cond: String => Boolean) = {
+    Iterator continually in.readLine("") takeWhile (x => x != null && cond(x))
+  }
+
+  def pasteCommand(arg: String): Result = {
+    var shouldReplay: Option[String] = None
+    def result = Result(keepRunning = true, shouldReplay)
+    val (raw, file) =
+      if (arg.isEmpty) (false, None)
+      else {
+        val r = """(-raw)?(\s+)?([^\-]\S*)?""".r
+        arg match {
+          case r(flag, sep, name) =>
+            if (flag != null && name != null && sep == null)
+              echo(s"""I assume you mean "$flag $name"?""")
+            (flag != null, Option(name))
+          case _ =>
+            echo("usage: :paste -raw file")
+            return result
+        }
+      }
+    val code = file match {
+      case Some(name) =>
+        withFile(name)(f => {
+          shouldReplay = Some(s":paste $arg")
+          val s = f.slurp.trim
+          if (s.isEmpty) echo(s"File contains no code: $f")
+          else echo(s"Pasting file $f...")
+          s
+        }) getOrElse ""
+      case None =>
+        echo("// Entering paste mode (ctrl-D to finish)\n")
+        val text = (readWhile(_ => true) mkString "\n").trim
+        if (text.isEmpty) echo("\n// Nothing pasted, nothing gained.\n")
+        else echo("\n// Exiting paste mode, now interpreting.\n")
+        text
+    }
+    def interpretCode() = {
+      val res = intp interpret code
+      // if input is incomplete, let the compiler try to say why
+      if (res == IR.Incomplete) {
+        echo("The pasted code is incomplete!\n")
+        // Remembrance of Things Pasted in an object
+        val errless = intp compileSources new BatchSourceFile("<pastie>", s"object pastel {\n$code\n}")
+        if (errless) echo("...but compilation found no error? Good luck with that.")
+      }
+    }
+    def compileCode() = {
+      val errless = intp compileSources new BatchSourceFile("<pastie>", code)
+      if (!errless) echo("There were compilation errors!")
+    }
+    if (code.nonEmpty) {
+      if (raw) compileCode() else interpretCode()
+    }
+    result
+  }
+
+  private object paste extends Pasted {
+    val ContinueString = "     | "
+    val PromptString   = "scala> "
+
+    def interpret(line: String): Unit = {
+      echo(line.trim)
+      intp interpret line
+      echo("")
+    }
+
+    def transcript(start: String) = {
+      echo("\n// Detected repl transcript paste: ctrl-D to finish.\n")
+      apply(Iterator(start) ++ readWhile(_.trim != PromptString.trim))
+    }
+  }
+  import paste.{ ContinueString, PromptString }
+
+  /** Interpret expressions starting with the first line.
+    * Read lines until a complete compilation unit is available
+    * or until a syntax error has been seen.  If a full unit is
+    * read, go ahead and interpret it.  Return the full string
+    * to be recorded for replay, if any.
+    */
+  def interpretStartingWith(code: String): Option[String] = {
+    // signal completion non-completion input has been received
+    in.completion.resetVerbosity()
+
+    def reallyInterpret = {
+      val reallyResult = intp.interpret(code)
+      (reallyResult, reallyResult match {
+        case IR.Error       => None
+        case IR.Success     => Some(code)
+        case IR.Incomplete  =>
+          if (in.interactive && code.endsWith("\n\n")) {
+            echo("You typed two blank lines.  Starting a new command.")
+            None
+          }
+          else in.readLine(ContinueString) match {
+            case null =>
+              // we know compilation is going to fail since we're at EOF and the
+              // parser thinks the input is still incomplete, but since this is
+              // a file being read non-interactively we want to fail.  So we send
+              // it straight to the compiler for the nice error message.
+              intp.compileString(code)
+              None
+
+            case line => interpretStartingWith(code + "\n" + line)
+          }
+      })
+    }
+
+    /** Here we place ourselves between the user and the interpreter and examine
+      *  the input they are ostensibly submitting.  We intervene in several cases:
+      *
+      *  1) If the line starts with "scala> " it is assumed to be an interpreter paste.
+      *  2) If the line starts with "." (but not ".." or "./") it is treated as an invocation
+      *     on the previous result.
+      *  3) If the Completion object's execute returns Some(_), we inject that value
+      *     and avoid the interpreter, as it's likely not valid scala code.
+      */
+    if (code == "") None
+    else if (!paste.running && code.trim.startsWith(PromptString)) {
+      paste.transcript(code)
+      None
+    }
+    else if (Completion.looksLikeInvocation(code) && intp.mostRecentVar != "") {
+      interpretStartingWith(intp.mostRecentVar + code)
+    }
+    else if (code.trim startsWith "//") {
+      // line comment, do nothing
+      None
+    }
+    else
+      reallyInterpret._2
+  }
+
+  // runs :load `file` on any files passed via -i
+  def loadFiles(settings: Settings) = settings match {
+    case settings: GenericRunnerSettings =>
+      for (filename <- settings.loadfiles.value) {
+        val cmd = ":load " + filename
+        command(cmd)
+        addReplay(cmd)
+        echo("")
+      }
+    case _ =>
+  }
+
+  /** Tries to create a JLineReader, falling back to SimpleReader:
+    *  unless settings or properties are such that it should start
+    *  with SimpleReader.
+    */
+  def chooseReader(settings: Settings): InteractiveReader = {
+    if (settings.Xnojline || Properties.isEmacsShell)
+      SimpleReader()
+    else try new JLineReader(
+      if (settings.noCompletion) NoCompletion
+      else new SparkJLineCompletion(intp)
+    )
+    catch {
+      case ex @ (_: Exception | _: NoClassDefFoundError) =>
+        echo("Failed to created JLineReader: " + ex + "\nFalling back to SimpleReader.")
+        SimpleReader()
+    }
+  }
+  protected def tagOfStaticClass[T: ClassTag]: u.TypeTag[T] =
+    u.TypeTag[T](
+      m,
+      new TypeCreator {
+        def apply[U <: Universe with Singleton](m: Mirror[U]): U # Type =
+          m.staticClass(classTag[T].runtimeClass.getName).toTypeConstructor.asInstanceOf[U # Type]
+      })
+
+  private def loopPostInit() {
+    // Bind intp somewhere out of the regular namespace where
+    // we can get at it in generated code.
+    intp.quietBind(NamedParam[SparkIMain]("$intp", intp)(tagOfStaticClass[SparkIMain], classTag[SparkIMain]))
+    // Auto-run code via some setting.
+    ( replProps.replAutorunCode.option
+      flatMap (f => io.File(f).safeSlurp())
+      foreach (intp quietRun _)
+      )
+    // classloader and power mode setup
+    intp.setContextClassLoader()
+    if (isReplPower) {
+     // replProps.power setValue true
+     // unleashAndSetPhase()
+     // asyncMessage(power.banner)
+    }
+    // SI-7418 Now, and only now, can we enable TAB completion.
+    in match {
+      case x: JLineReader => x.consoleReader.postInit
+      case _              =>
+    }
+  }
+  def process(settings: Settings): Boolean = savingContextLoader {
+    this.settings = settings
+    createInterpreter()
+
+    // sets in to some kind of reader depending on environmental cues
+    in = in0.fold(chooseReader(settings))(r => SimpleReader(r, out, interactive = true))
+    globalFuture = future {
+      intp.initializeSynchronous()
+      loopPostInit()
+      !intp.reporter.hasErrors
+    }
+    import scala.concurrent.duration._
+    Await.ready(globalFuture, 10 seconds)
+    printWelcome()
+    initializeSpark()
+    loadFiles(settings)
+
+    try loop()
+    catch AbstractOrMissingHandler()
+    finally closeInterpreter()
+
+    true
+  }
+
+  @deprecated("Use `process` instead", "2.9.0")
+  def main(settings: Settings): Unit = process(settings) //used by sbt
+}
+
+object SparkILoop {
+  implicit def loopToInterpreter(repl: SparkILoop): SparkIMain = repl.intp
+
+  // Designed primarily for use by test code: take a String with a
+  // bunch of code, and prints out a transcript of what it would look
+  // like if you'd just typed it into the repl.
+  def runForTranscript(code: String, settings: Settings): String = {
+    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
+
+    stringFromStream { ostream =>
+      Console.withOut(ostream) {
+        val output = new JPrintWriter(new OutputStreamWriter(ostream), true) {
+          override def write(str: String) = {
+            // completely skip continuation lines
+            if (str forall (ch => ch.isWhitespace || ch == '|')) ()
+            else super.write(str)
+          }
+        }
+        val input = new BufferedReader(new StringReader(code.trim + "\n")) {
+          override def readLine(): String = {
+            val s = super.readLine()
+            // helping out by printing the line being interpreted.
+            if (s != null)
+              output.println(s)
+            s
+          }
+        }
+        val repl = new SparkILoop(input, output)
+        if (settings.classpath.isDefault)
+          settings.classpath.value = sys.props("java.class.path")
+
+        repl process settings
+      }
+    }
+  }
+
+  /** Creates an interpreter loop with default settings and feeds
+    *  the given code to it as input.
+    */
+  def run(code: String, sets: Settings = new Settings): String = {
+    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
+
+    stringFromStream { ostream =>
+      Console.withOut(ostream) {
+        val input    = new BufferedReader(new StringReader(code))
+        val output   = new JPrintWriter(new OutputStreamWriter(ostream), true)
+        val repl     = new SparkILoop(input, output)
+
+        if (sets.classpath.isDefault)
+          sets.classpath.value = sys.props("java.class.path")
+
+        repl process sets
+      }
+    }
+  }
+  def run(lines: List[String]): String = run(lines map (_ + "\n") mkString)
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkIMain.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkIMain.scala
new file mode 100644
index 0000000000000..1bb62c84abddc
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkIMain.scala
@@ -0,0 +1,1319 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author  Martin Odersky
+ */
+
+package scala
+package tools.nsc
+package interpreter
+
+import PartialFunction.cond
+import scala.language.implicitConversions
+import scala.beans.BeanProperty
+import scala.collection.mutable
+import scala.concurrent.{ Future, ExecutionContext }
+import scala.reflect.runtime.{ universe => ru }
+import scala.reflect.{ ClassTag, classTag }
+import scala.reflect.internal.util.{ BatchSourceFile, SourceFile }
+import scala.tools.util.PathResolver
+import scala.tools.nsc.io.AbstractFile
+import scala.tools.nsc.typechecker.{ TypeStrings, StructuredTypeStrings }
+import scala.tools.nsc.util.{ ScalaClassLoader, stringFromReader, stringFromWriter, StackTraceOps }
+import scala.tools.nsc.util.Exceptional.unwrap
+import javax.script.{AbstractScriptEngine, Bindings, ScriptContext, ScriptEngine, ScriptEngineFactory, ScriptException, CompiledScript, Compilable}
+
+/** An interpreter for Scala code.
+  *
+  *  The main public entry points are compile(), interpret(), and bind().
+  *  The compile() method loads a complete Scala file.  The interpret() method
+  *  executes one line of Scala code at the request of the user.  The bind()
+  *  method binds an object to a variable that can then be used by later
+  *  interpreted code.
+  *
+  *  The overall approach is based on compiling the requested code and then
+  *  using a Java classloader and Java reflection to run the code
+  *  and access its results.
+  *
+  *  In more detail, a single compiler instance is used
+  *  to accumulate all successfully compiled or interpreted Scala code.  To
+  *  "interpret" a line of code, the compiler generates a fresh object that
+  *  includes the line of code and which has public member(s) to export
+  *  all variables defined by that code.  To extract the result of an
+  *  interpreted line to show the user, a second "result object" is created
+  *  which imports the variables exported by the above object and then
+  *  exports members called "$eval" and "$print". To accomodate user expressions
+  *  that read from variables or methods defined in previous statements, "import"
+  *  statements are used.
+  *
+  *  This interpreter shares the strengths and weaknesses of using the
+  *  full compiler-to-Java.  The main strength is that interpreted code
+  *  behaves exactly as does compiled code, including running at full speed.
+  *  The main weakness is that redefining classes and methods is not handled
+  *  properly, because rebinding at the Java level is technically difficult.
+  *
+  *  @author Moez A. Abdel-Gawad
+  *  @author Lex Spoon
+  */
+class SparkIMain(@BeanProperty val factory: ScriptEngineFactory, initialSettings: Settings,
+  protected val out: JPrintWriter) extends AbstractScriptEngine with Compilable with SparkImports {
+  imain =>
+
+  setBindings(createBindings, ScriptContext.ENGINE_SCOPE)
+  object replOutput extends ReplOutput(settings.Yreploutdir) { }
+
+  @deprecated("Use replOutput.dir instead", "2.11.0")
+  def virtualDirectory = replOutput.dir
+  // Used in a test case.
+  def showDirectory() = replOutput.show(out)
+
+  private[nsc] var printResults               = true      // whether to print result lines
+  private[nsc] var totalSilence               = false     // whether to print anything
+  private var _initializeComplete             = false     // compiler is initialized
+  private var _isInitialized: Future[Boolean] = null      // set up initialization future
+  private var bindExceptions                  = true      // whether to bind the lastException variable
+  private var _executionWrapper               = ""        // code to be wrapped around all lines
+
+  /** We're going to go to some trouble to initialize the compiler asynchronously.
+    *  It's critical that nothing call into it until it's been initialized or we will
+    *  run into unrecoverable issues, but the perceived repl startup time goes
+    *  through the roof if we wait for it.  So we initialize it with a future and
+    *  use a lazy val to ensure that any attempt to use the compiler object waits
+    *  on the future.
+    */
+  private var _classLoader: util.AbstractFileClassLoader = null                              // active classloader
+  private val _compiler: ReplGlobal                 = newCompiler(settings, reporter)   // our private compiler
+
+  def compilerClasspath: Seq[java.net.URL] = (
+    if (isInitializeComplete) global.classPath.asURLs
+    else new PathResolver(settings).result.asURLs  // the compiler's classpath
+    )
+  def settings = initialSettings
+  // Run the code body with the given boolean settings flipped to true.
+  def withoutWarnings[T](body: => T): T = beQuietDuring {
+    val saved = settings.nowarn.value
+    if (!saved)
+      settings.nowarn.value = true
+
+    try body
+    finally if (!saved) settings.nowarn.value = false
+  }
+
+  /** construct an interpreter that reports to Console */
+  def this(settings: Settings, out: JPrintWriter) = this(null, settings, out)
+  def this(factory: ScriptEngineFactory, settings: Settings) = this(factory, settings, new NewLinePrintWriter(new ConsoleWriter, true))
+  def this(settings: Settings) = this(settings, new NewLinePrintWriter(new ConsoleWriter, true))
+  def this(factory: ScriptEngineFactory) = this(factory, new Settings())
+  def this() = this(new Settings())
+
+  lazy val formatting: Formatting = new Formatting {
+    val prompt = Properties.shellPromptString
+  }
+  lazy val reporter: SparkReplReporter = new SparkReplReporter(this)
+
+  import formatting._
+  import reporter.{ printMessage, printUntruncatedMessage }
+
+  // This exists mostly because using the reporter too early leads to deadlock.
+  private def echo(msg: String) { Console println msg }
+  private def _initSources = List(new BatchSourceFile("<init>", "class $repl_$init { }"))
+  private def _initialize() = {
+    try {
+      // if this crashes, REPL will hang its head in shame
+      val run = new _compiler.Run()
+      assert(run.typerPhase != NoPhase, "REPL requires a typer phase.")
+      run compileSources _initSources
+      _initializeComplete = true
+      true
+    }
+    catch AbstractOrMissingHandler()
+  }
+  private def tquoted(s: String) = "\"\"\"" + s + "\"\"\""
+  private val logScope = scala.sys.props contains "scala.repl.scope"
+  private def scopelog(msg: String) = if (logScope) Console.err.println(msg)
+
+  // argument is a thunk to execute after init is done
+  def initialize(postInitSignal: => Unit) {
+    synchronized {
+      if (_isInitialized == null) {
+        _isInitialized =
+          Future(try _initialize() finally postInitSignal)(ExecutionContext.global)
+      }
+    }
+  }
+  def initializeSynchronous(): Unit = {
+    if (!isInitializeComplete) {
+      _initialize()
+      assert(global != null, global)
+    }
+  }
+  def isInitializeComplete = _initializeComplete
+
+  lazy val global: Global = {
+    if (!isInitializeComplete) _initialize()
+    _compiler
+  }
+
+  import global._
+  import definitions.{ ObjectClass, termMember, dropNullaryMethod}
+
+  lazy val runtimeMirror = ru.runtimeMirror(classLoader)
+
+  private def noFatal(body: => Symbol): Symbol = try body catch { case _: FatalError => NoSymbol }
+
+  def getClassIfDefined(path: String)  = (
+    noFatal(runtimeMirror staticClass path)
+      orElse noFatal(rootMirror staticClass path)
+    )
+  def getModuleIfDefined(path: String) = (
+    noFatal(runtimeMirror staticModule path)
+      orElse noFatal(rootMirror staticModule path)
+    )
+
+  implicit class ReplTypeOps(tp: Type) {
+    def andAlso(fn: Type => Type): Type = if (tp eq NoType) tp else fn(tp)
+  }
+
+  // TODO: If we try to make naming a lazy val, we run into big time
+  // scalac unhappiness with what look like cycles.  It has not been easy to
+  // reduce, but name resolution clearly takes different paths.
+  object naming extends {
+    val global: imain.global.type = imain.global
+  } with Naming {
+    // make sure we don't overwrite their unwisely named res3 etc.
+    def freshUserTermName(): TermName = {
+      val name = newTermName(freshUserVarName())
+      if (replScope containsName name) freshUserTermName()
+      else name
+    }
+    def isInternalTermName(name: Name) = isInternalVarName("" + name)
+  }
+  import naming._
+
+  object deconstruct extends {
+    val global: imain.global.type = imain.global
+  } with StructuredTypeStrings
+
+  lazy val memberHandlers = new {
+    val intp: imain.type = imain
+  } with SparkMemberHandlers
+  import memberHandlers._
+
+  /** Temporarily be quiet */
+  def beQuietDuring[T](body: => T): T = {
+    val saved = printResults
+    printResults = false
+    try body
+    finally printResults = saved
+  }
+  def beSilentDuring[T](operation: => T): T = {
+    val saved = totalSilence
+    totalSilence = true
+    try operation
+    finally totalSilence = saved
+  }
+
+  def quietRun[T](code: String) = beQuietDuring(interpret(code))
+
+  /** takes AnyRef because it may be binding a Throwable or an Exceptional */
+  private def withLastExceptionLock[T](body: => T, alt: => T): T = {
+    assert(bindExceptions, "withLastExceptionLock called incorrectly.")
+    bindExceptions = false
+
+    try     beQuietDuring(body)
+    catch   logAndDiscard("withLastExceptionLock", alt)
+    finally bindExceptions = true
+  }
+
+  def executionWrapper = _executionWrapper
+  def setExecutionWrapper(code: String) = _executionWrapper = code
+  def clearExecutionWrapper() = _executionWrapper = ""
+
+  /** interpreter settings */
+  lazy val isettings = new SparkISettings(this)
+
+  /** Instantiate a compiler.  Overridable. */
+  protected def newCompiler(settings: Settings, reporter: reporters.Reporter): ReplGlobal = {
+    settings.outputDirs setSingleOutput replOutput.dir
+    settings.exposeEmptyPackage.value = true
+    new Global(settings, reporter) with ReplGlobal { override def toString: String = "<global>" }
+  }
+
+  /** Parent classloader.  Overridable. */
+  protected def parentClassLoader: ClassLoader =
+    settings.explicitParentLoader.getOrElse( this.getClass.getClassLoader() )
+
+  /* A single class loader is used for all commands interpreted by this Interpreter.
+     It would also be possible to create a new class loader for each command
+     to interpret.  The advantages of the current approach are:
+
+       - Expressions are only evaluated one time.  This is especially
+         significant for I/O, e.g. "val x = Console.readLine"
+
+     The main disadvantage is:
+
+       - Objects, classes, and methods cannot be rebound.  Instead, definitions
+         shadow the old ones, and old code objects refer to the old
+         definitions.
+  */
+  def resetClassLoader() = {
+    repldbg("Setting new classloader: was " + _classLoader)
+    _classLoader = null
+    ensureClassLoader()
+  }
+  final def ensureClassLoader() {
+    if (_classLoader == null)
+      _classLoader = makeClassLoader()
+  }
+  def classLoader: util.AbstractFileClassLoader = {
+    ensureClassLoader()
+    _classLoader
+  }
+
+  def backticked(s: String): String = (
+    (s split '.').toList map {
+      case "_"                               => "_"
+      case s if nme.keywords(newTermName(s)) => s"`$s`"
+      case s                                 => s
+    } mkString "."
+    )
+  def readRootPath(readPath: String) = getModuleIfDefined(readPath)
+
+  abstract class PhaseDependentOps {
+    def shift[T](op: => T): T
+
+    def path(name: => Name): String = shift(path(symbolOfName(name)))
+    def path(sym: Symbol): String = backticked(shift(sym.fullName))
+    def sig(sym: Symbol): String  = shift(sym.defString)
+  }
+  object typerOp extends PhaseDependentOps {
+    def shift[T](op: => T): T = exitingTyper(op)
+  }
+  object flatOp extends PhaseDependentOps {
+    def shift[T](op: => T): T = exitingFlatten(op)
+  }
+
+  def originalPath(name: String): String = originalPath(name: TermName)
+  def originalPath(name: Name): String   = typerOp path name
+  def originalPath(sym: Symbol): String  = typerOp path sym
+  def flatPath(sym: Symbol): String      = flatOp shift sym.javaClassName
+  def translatePath(path: String) = {
+    val sym = if (path endsWith "$") symbolOfTerm(path.init) else symbolOfIdent(path)
+    sym.toOption map flatPath
+  }
+  def translateEnclosingClass(n: String) = symbolOfTerm(n).enclClass.toOption map flatPath
+
+  private class TranslatingClassLoader(parent: ClassLoader) extends util.AbstractFileClassLoader(replOutput.dir, parent) {
+    /** Overridden here to try translating a simple name to the generated
+      *  class name if the original attempt fails.  This method is used by
+      *  getResourceAsStream as well as findClass.
+      */
+    override protected def findAbstractFile(name: String): AbstractFile =
+      super.findAbstractFile(name) match {
+        case null if _initializeComplete => translatePath(name) map (super.findAbstractFile(_)) orNull
+        case file => file
+      }
+  }
+  private def makeClassLoader(): util.AbstractFileClassLoader =
+    new TranslatingClassLoader(parentClassLoader match {
+      case null   => ScalaClassLoader fromURLs compilerClasspath
+      case p      => new ScalaClassLoader.URLClassLoader(compilerClasspath, p)
+    })
+
+  // Set the current Java "context" class loader to this interpreter's class loader
+  def setContextClassLoader() = classLoader.setAsContext()
+
+  def allDefinedNames: List[Name]  = exitingTyper(replScope.toList.map(_.name).sorted)
+  def unqualifiedIds: List[String] = allDefinedNames map (_.decode) sorted
+
+  /** Most recent tree handled which wasn't wholly synthetic. */
+  private def mostRecentlyHandledTree: Option[Tree] = {
+    prevRequests.reverse foreach { req =>
+      req.handlers.reverse foreach {
+        case x: MemberDefHandler if x.definesValue && !isInternalTermName(x.name) => return Some(x.member)
+        case _ => ()
+      }
+    }
+    None
+  }
+
+  private def updateReplScope(sym: Symbol, isDefined: Boolean) {
+    def log(what: String) {
+      val mark = if (sym.isType) "t " else "v "
+      val name = exitingTyper(sym.nameString)
+      val info = cleanTypeAfterTyper(sym)
+      val defn = sym defStringSeenAs info
+
+      scopelog(f"[$mark$what%6s] $name%-25s $defn%s")
+    }
+    if (ObjectClass isSubClass sym.owner) return
+    // unlink previous
+    replScope lookupAll sym.name foreach { sym =>
+      log("unlink")
+      replScope unlink sym
+    }
+    val what = if (isDefined) "define" else "import"
+    log(what)
+    replScope enter sym
+  }
+
+  def recordRequest(req: Request) {
+    if (req == null)
+      return
+
+    prevRequests += req
+
+    // warning about serially defining companions.  It'd be easy
+    // enough to just redefine them together but that may not always
+    // be what people want so I'm waiting until I can do it better.
+    exitingTyper {
+      req.defines filterNot (s => req.defines contains s.companionSymbol) foreach { newSym =>
+        val oldSym = replScope lookup newSym.name.companionName
+        if (Seq(oldSym, newSym).permutations exists { case Seq(s1, s2) => s1.isClass && s2.isModule }) {
+          replwarn(s"warning: previously defined $oldSym is not a companion to $newSym.")
+          replwarn("Companions must be defined together; you may wish to use :paste mode for this.")
+        }
+      }
+    }
+    exitingTyper {
+      req.imports foreach (sym => updateReplScope(sym, isDefined = false))
+      req.defines foreach (sym => updateReplScope(sym, isDefined = true))
+    }
+  }
+
+  private[nsc] def replwarn(msg: => String) {
+    if (!settings.nowarnings)
+      printMessage(msg)
+  }
+
+  def compileSourcesKeepingRun(sources: SourceFile*) = {
+    val run = new Run()
+    assert(run.typerPhase != NoPhase, "REPL requires a typer phase.")
+    reporter.reset()
+    run compileSources sources.toList
+    (!reporter.hasErrors, run)
+  }
+
+  /** Compile an nsc SourceFile.  Returns true if there are
+    *  no compilation errors, or false otherwise.
+    */
+  def compileSources(sources: SourceFile*): Boolean =
+    compileSourcesKeepingRun(sources: _*)._1
+
+  /** Compile a string.  Returns true if there are no
+    *  compilation errors, or false otherwise.
+    */
+  def compileString(code: String): Boolean =
+    compileSources(new BatchSourceFile("<script>", code))
+
+  /** Build a request from the user. `trees` is `line` after being parsed.
+    */
+  private def buildRequest(line: String, trees: List[Tree]): Request = {
+    executingRequest = new Request(line, trees)
+    executingRequest
+  }
+
+  private def safePos(t: Tree, alt: Int): Int =
+    try t.pos.start
+    catch { case _: UnsupportedOperationException => alt }
+
+  // Given an expression like 10 * 10 * 10 we receive the parent tree positioned
+  // at a '*'.  So look at each subtree and find the earliest of all positions.
+  private def earliestPosition(tree: Tree): Int = {
+    var pos = Int.MaxValue
+    tree foreach { t =>
+      pos = math.min(pos, safePos(t, Int.MaxValue))
+    }
+    pos
+  }
+
+  private def requestFromLine(line: String, synthetic: Boolean): Either[IR.Result, Request] = {
+    val content = indentCode(line)
+    val trees = parse(content) match {
+      case parse.Incomplete     => return Left(IR.Incomplete)
+      case parse.Error          => return Left(IR.Error)
+      case parse.Success(trees) => trees
+    }
+    repltrace(
+      trees map (t => {
+        // [Eugene to Paul] previously it just said `t map ...`
+        // because there was an implicit conversion from Tree to a list of Trees
+        // however Martin and I have removed the conversion
+        // (it was conflicting with the new reflection API),
+        // so I had to rewrite this a bit
+        val subs = t collect { case sub => sub }
+        subs map (t0 =>
+          "  " + safePos(t0, -1) + ": " + t0.shortClass + "\n"
+          ) mkString ""
+      }) mkString "\n"
+    )
+    // If the last tree is a bare expression, pinpoint where it begins using the
+    // AST node position and snap the line off there.  Rewrite the code embodied
+    // by the last tree as a ValDef instead, so we can access the value.
+    val last = trees.lastOption.getOrElse(EmptyTree)
+    last match {
+      case _:Assign                        => // we don't want to include assignments
+      case _:TermTree | _:Ident | _:Select => // ... but do want other unnamed terms.
+        val varName  = if (synthetic) freshInternalVarName() else freshUserVarName()
+        val rewrittenLine = (
+          // In theory this would come out the same without the 1-specific test, but
+          // it's a cushion against any more sneaky parse-tree position vs. code mismatches:
+          // this way such issues will only arise on multiple-statement repl input lines,
+          // which most people don't use.
+          if (trees.size == 1) "val " + varName + " =\n" + content
+          else {
+            // The position of the last tree
+            val lastpos0 = earliestPosition(last)
+            // Oh boy, the parser throws away parens so "(2+2)" is mispositioned,
+            // with increasingly hard to decipher positions as we move on to "() => 5",
+            // (x: Int) => x + 1, and more.  So I abandon attempts to finesse and just
+            // look for semicolons and newlines, which I'm sure is also buggy.
+            val (raw1, raw2) = content splitAt lastpos0
+            repldbg("[raw] " + raw1 + "   <--->   " + raw2)
+
+            val adjustment = (raw1.reverse takeWhile (ch => (ch != ';') && (ch != '\n'))).size
+            val lastpos = lastpos0 - adjustment
+
+            // the source code split at the laboriously determined position.
+            val (l1, l2) = content splitAt lastpos
+            repldbg("[adj] " + l1 + "   <--->   " + l2)
+
+            val prefix   = if (l1.trim == "") "" else l1 + ";\n"
+            // Note to self: val source needs to have this precise structure so that
+            // error messages print the user-submitted part without the "val res0 = " part.
+            val combined   = prefix + "val " + varName + " =\n" + l2
+
+            repldbg(List(
+              "    line" -> line,
+              " content" -> content,
+              "     was" -> l2,
+              "combined" -> combined) map {
+              case (label, s) => label + ": '" + s + "'"
+            } mkString "\n"
+            )
+            combined
+          }
+          )
+        // Rewriting    "foo ; bar ; 123"
+        // to           "foo ; bar ; val resXX = 123"
+        requestFromLine(rewrittenLine, synthetic) match {
+          case Right(req) => return Right(req withOriginalLine line)
+          case x          => return x
+        }
+      case _ =>
+    }
+    Right(buildRequest(line, trees))
+  }
+
+  // dealias non-public types so we don't see protected aliases like Self
+  def dealiasNonPublic(tp: Type) = tp match {
+    case TypeRef(_, sym, _) if sym.isAliasType && !sym.isPublic => tp.dealias
+    case _                                                      => tp
+  }
+
+  /**
+   *  Interpret one line of input. All feedback, including parse errors
+   *  and evaluation results, are printed via the supplied compiler's
+   *  reporter. Values defined are available for future interpreted strings.
+   *
+   *  The return value is whether the line was interpreter successfully,
+   *  e.g. that there were no parse errors.
+   */
+  def interpret(line: String): IR.Result = interpret(line, synthetic = false)
+  def interpretSynthetic(line: String): IR.Result = interpret(line, synthetic = true)
+  def interpret(line: String, synthetic: Boolean): IR.Result = compile(line, synthetic) match {
+    case Left(result) => result
+    case Right(req)   => new WrappedRequest(req).loadAndRunReq
+  }
+
+  private def compile(line: String, synthetic: Boolean): Either[IR.Result, Request] = {
+    if (global == null) Left(IR.Error)
+    else requestFromLine(line, synthetic) match {
+      case Left(result) => Left(result)
+      case Right(req)   =>
+        // null indicates a disallowed statement type; otherwise compile and
+        // fail if false (implying e.g. a type error)
+        if (req == null || !req.compile) Left(IR.Error) else Right(req)
+    }
+  }
+
+  var code = ""
+  var bound = false
+  def compiled(script: String): CompiledScript = {
+    if (!bound) {
+      quietBind("engine" -> this.asInstanceOf[ScriptEngine])
+      bound = true
+    }
+    val cat = code + script
+    compile(cat, false) match {
+      case Left(result) => result match {
+        case IR.Incomplete => {
+          code = cat + "\n"
+          new CompiledScript {
+            def eval(context: ScriptContext): Object = null
+            def getEngine: ScriptEngine = SparkIMain.this
+          }
+        }
+        case _ => {
+          code = ""
+          throw new ScriptException("compile-time error")
+        }
+      }
+      case Right(req)   => {
+        code = ""
+        new WrappedRequest(req)
+      }
+    }
+  }
+
+  private class WrappedRequest(val req: Request) extends CompiledScript {
+    var recorded = false
+
+    /** In Java we would have to wrap any checked exception in the declared
+      *  ScriptException. Runtime exceptions and errors would be ok and would
+      *  not need to be caught. So let us do the same in Scala : catch and
+      *  wrap any checked exception, and let runtime exceptions and errors
+      *  escape. We could have wrapped runtime exceptions just like other
+      *  exceptions in ScriptException, this is a choice.
+      */
+    @throws[ScriptException]
+    def eval(context: ScriptContext): Object = {
+      val result = req.lineRep.evalEither match {
+        case Left(e: RuntimeException) => throw e
+        case Left(e: Exception) => throw new ScriptException(e)
+        case Left(e) => throw e
+        case Right(result) => result.asInstanceOf[Object]
+      }
+      if (!recorded) {
+        recordRequest(req)
+        recorded = true
+      }
+      result
+    }
+
+    def loadAndRunReq = classLoader.asContext {
+      val (result, succeeded) = req.loadAndRun
+
+      /** To our displeasure, ConsoleReporter offers only printMessage,
+        *  which tacks a newline on the end.  Since that breaks all the
+        *  output checking, we have to take one off to balance.
+        */
+      if (succeeded) {
+        if (printResults && result != "")
+          printMessage(result stripSuffix "\n")
+        else if (isReplDebug) // show quiet-mode activity
+          printMessage(result.trim.lines map ("[quiet] " + _) mkString "\n")
+
+        // Book-keeping.  Have to record synthetic requests too,
+        // as they may have been issued for information, e.g. :type
+        recordRequest(req)
+        IR.Success
+      }
+      else {
+        // don't truncate stack traces
+        printUntruncatedMessage(result)
+        IR.Error
+      }
+    }
+
+    def getEngine: ScriptEngine = SparkIMain.this
+  }
+
+  /** Bind a specified name to a specified value.  The name may
+    *  later be used by expressions passed to interpret.
+    *
+    *  @param name      the variable name to bind
+    *  @param boundType the type of the variable, as a string
+    *  @param value     the object value to bind to it
+    *  @return          an indication of whether the binding succeeded
+    */
+  def bind(name: String, boundType: String, value: Any, modifiers: List[String] = Nil): IR.Result = {
+    val bindRep = new ReadEvalPrint()
+    bindRep.compile("""
+                      |object %s {
+                      |  var value: %s = _
+                      |  def set(x: Any) = value = x.asInstanceOf[%s]
+                      |}
+                    """.stripMargin.format(bindRep.evalName, boundType, boundType)
+    )
+    bindRep.callEither("set", value) match {
+      case Left(ex) =>
+        repldbg("Set failed in bind(%s, %s, %s)".format(name, boundType, value))
+        repldbg(util.stackTraceString(ex))
+        IR.Error
+
+      case Right(_) =>
+        val line = "%sval %s = %s.value".format(modifiers map (_ + " ") mkString, name, bindRep.evalPath)
+        repldbg("Interpreting: " + line)
+        interpret(line)
+    }
+  }
+  def directBind(name: String, boundType: String, value: Any): IR.Result = {
+    val result = bind(name, boundType, value)
+    if (result == IR.Success)
+      directlyBoundNames += newTermName(name)
+    result
+  }
+  def directBind(p: NamedParam): IR.Result                                    = directBind(p.name, p.tpe, p.value)
+  def directBind[T: ru.TypeTag : ClassTag](name: String, value: T): IR.Result = directBind((name, value))
+
+  def rebind(p: NamedParam): IR.Result = {
+    val name     = p.name
+    val newType  = p.tpe
+    val tempName = freshInternalVarName()
+
+    quietRun("val %s = %s".format(tempName, name))
+    quietRun("val %s = %s.asInstanceOf[%s]".format(name, tempName, newType))
+  }
+  def quietBind(p: NamedParam): IR.Result                               = beQuietDuring(bind(p))
+  def bind(p: NamedParam): IR.Result                                    = bind(p.name, p.tpe, p.value)
+  def bind[T: ru.TypeTag : ClassTag](name: String, value: T): IR.Result = bind((name, value))
+
+  /** Reset this interpreter, forgetting all user-specified requests. */
+  def reset() {
+    clearExecutionWrapper()
+    resetClassLoader()
+    resetAllCreators()
+    prevRequests.clear()
+    resetReplScope()
+    replOutput.dir.clear()
+  }
+
+  /** This instance is no longer needed, so release any resources
+    *  it is using.  The reporter's output gets flushed.
+    */
+  def close() {
+    reporter.flush()
+  }
+
+  /** Here is where we:
+    *
+    *  1) Read some source code, and put it in the "read" object.
+    *  2) Evaluate the read object, and put the result in the "eval" object.
+    *  3) Create a String for human consumption, and put it in the "print" object.
+    *
+    *  Read! Eval! Print! Some of that not yet centralized here.
+    */
+  class ReadEvalPrint(val lineId: Int) {
+    def this() = this(freshLineId())
+
+    val packageName = sessionNames.line + lineId
+    val readName    = sessionNames.read
+    val evalName    = sessionNames.eval
+    val printName   = sessionNames.print
+    val resultName  = sessionNames.result
+
+    def bindError(t: Throwable) = {
+      if (!bindExceptions) // avoid looping if already binding
+        throw t
+
+      val unwrapped = unwrap(t)
+
+      // Example input: $line3.$read$$iw$$iw$
+      val classNameRegex = (naming.lineRegex + ".*").r
+      def isWrapperInit(x: StackTraceElement) = cond(x.getClassName) {
+        case classNameRegex() if x.getMethodName == nme.CONSTRUCTOR.decoded => true
+      }
+      val stackTrace = unwrapped stackTracePrefixString (!isWrapperInit(_))
+
+      withLastExceptionLock[String]({
+        directBind[Throwable]("lastException", unwrapped)(StdReplTags.tagOfThrowable, classTag[Throwable])
+        stackTrace
+      }, stackTrace)
+    }
+
+    // TODO: split it out into a package object and a regular
+    // object and we can do that much less wrapping.
+    def packageDecl = "package " + packageName
+
+    def pathTo(name: String)   = packageName + "." + name
+    def packaged(code: String) = packageDecl + "\n\n" + code
+
+    def readPath  = pathTo(readName)
+    def evalPath  = pathTo(evalName)
+
+    def call(name: String, args: Any*): AnyRef = {
+      val m = evalMethod(name)
+      repldbg("Invoking: " + m)
+      if (args.nonEmpty)
+        repldbg("  with args: " + args.mkString(", "))
+
+      m.invoke(evalClass, args.map(_.asInstanceOf[AnyRef]): _*)
+    }
+
+    def callEither(name: String, args: Any*): Either[Throwable, AnyRef] =
+      try Right(call(name, args: _*))
+      catch { case ex: Throwable => Left(ex) }
+
+    class EvalException(msg: String, cause: Throwable) extends RuntimeException(msg, cause) { }
+
+    private def evalError(path: String, ex: Throwable) =
+      throw new EvalException("Failed to load '" + path + "': " + ex.getMessage, ex)
+
+    private def load(path: String): Class[_] = {
+      try Class.forName(path, true, classLoader)
+      catch { case ex: Throwable => evalError(path, unwrap(ex)) }
+    }
+
+    lazy val evalClass = load(evalPath)
+
+    def evalEither = callEither(resultName) match {
+      case Left(ex) => ex match {
+        case ex: NullPointerException => Right(null)
+        case ex => Left(unwrap(ex))
+      }
+      case Right(result) => Right(result)
+    }
+
+    def compile(source: String): Boolean = compileAndSaveRun("<console>", source)
+
+    /** The innermost object inside the wrapper, found by
+      * following accessPath into the outer one.
+      */
+    def resolvePathToSymbol(accessPath: String): Symbol = {
+      val readRoot: global.Symbol = readRootPath(readPath) // the outermost wrapper
+      ((".INSTANCE" + accessPath) split '.').foldLeft(readRoot: Symbol) {
+        case (sym, "")    => sym
+        case (sym, name)  => exitingTyper(termMember(sym, name))
+      }
+    }
+    /** We get a bunch of repeated warnings for reasons I haven't
+      *  entirely figured out yet.  For now, squash.
+      */
+    private def updateRecentWarnings(run: Run) {
+      def loop(xs: List[(Position, String)]): List[(Position, String)] = xs match {
+        case Nil                  => Nil
+        case ((pos, msg)) :: rest =>
+          val filtered = rest filter { case (pos0, msg0) =>
+            (msg != msg0) || (pos.lineContent.trim != pos0.lineContent.trim) || {
+              // same messages and same line content after whitespace removal
+              // but we want to let through multiple warnings on the same line
+              // from the same run.  The untrimmed line will be the same since
+              // there's no whitespace indenting blowing it.
+              (pos.lineContent == pos0.lineContent)
+            }
+          }
+          ((pos, msg)) :: loop(filtered)
+      }
+      val warnings = loop(run.reporting.allConditionalWarnings)
+      if (warnings.nonEmpty)
+        mostRecentWarnings = warnings
+    }
+    private def evalMethod(name: String) = evalClass.getMethods filter (_.getName == name) match {
+      case Array()       => null
+      case Array(method) => method
+      case xs            => sys.error("Internal error: eval object " + evalClass + ", " + xs.mkString("\n", "\n", ""))
+    }
+    private def compileAndSaveRun(label: String, code: String) = {
+      showCodeIfDebugging(code)
+      val (success, run) = compileSourcesKeepingRun(new BatchSourceFile(label, packaged(code)))
+      updateRecentWarnings(run)
+      success
+    }
+  }
+
+  /** One line of code submitted by the user for interpretation */
+  class Request(val line: String, val trees: List[Tree]) {
+    def defines    = defHandlers flatMap (_.definedSymbols)
+    def imports    = importedSymbols
+    def value      = Some(handlers.last) filter (h => h.definesValue) map (h => definedSymbols(h.definesTerm.get)) getOrElse NoSymbol
+
+    val lineRep = new ReadEvalPrint()
+
+    private var _originalLine: String = null
+    def withOriginalLine(s: String): this.type = { _originalLine = s ; this }
+    def originalLine = if (_originalLine == null) line else _originalLine
+
+    /** handlers for each tree in this request */
+    val handlers: List[MemberHandler] = trees map (memberHandlers chooseHandler _)
+    def defHandlers = handlers collect { case x: MemberDefHandler => x }
+
+    /** list of names used by this expression */
+    val referencedNames: List[Name] = handlers flatMap (_.referencedNames)
+
+    /** def and val names */
+    def termNames = handlers flatMap (_.definesTerm)
+    def typeNames = handlers flatMap (_.definesType)
+    def importedSymbols = handlers flatMap {
+      case x: ImportHandler => x.importedSymbols
+      case _                => Nil
+    }
+
+    val definedClasses = handlers.exists {
+      case _: ClassHandler => true
+      case _ => false
+    }
+    /** Code to import bound names from previous lines - accessPath is code to
+      * append to objectName to access anything bound by request.
+      */
+    lazy val ComputedImports(importsPreamble, importsTrailer, accessPath) =
+      exitingTyper(importsCode(referencedNames.toSet, ObjectSourceCode, definedClasses))
+
+    /** the line of code to compute */
+    def toCompute = line
+
+    /** The path of the value that contains the user code. */
+    def fullAccessPath = s"${lineRep.readPath}.INSTANCE$accessPath"
+
+    /** The path of the given member of the wrapping instance. */
+    def fullPath(vname: String) = s"$fullAccessPath.`$vname`"
+
+    /** generate the source code for the object that computes this request */
+    abstract class Wrapper extends SparkIMain.CodeAssembler[MemberHandler] {
+      def path = originalPath("$intp")
+      def envLines = {
+        if (!isReplPower) Nil // power mode only for now
+        else List("def %s = %s".format("$line", tquoted(originalLine)), "def %s = Nil".format("$trees"))
+      }
+      def preamble = s"""
+        |$preambleHeader
+        |%s%s%s
+      """.stripMargin.format(lineRep.readName, envLines.map("  " + _ + ";\n").mkString,
+          importsPreamble, indentCode(toCompute))
+
+      val generate = (m: MemberHandler) => m extraCodeToEvaluate Request.this
+
+      /** A format string with %s for $read, specifying the wrapper definition. */
+      def preambleHeader: String
+
+      /** Like preambleHeader for an import wrapper. */
+      def prewrap: String = preambleHeader + "\n"
+
+      /** Like postamble for an import wrapper. */
+      def postwrap: String
+    }
+
+    private class ObjectBasedWrapper extends Wrapper {
+      def preambleHeader = "object %s {"
+
+      def postamble = importsTrailer + "\n}"
+
+      def postwrap = "}\n"
+    }
+
+    private class ClassBasedWrapper extends Wrapper {
+      def preambleHeader = "class %s extends Serializable {"
+
+      /** Adds an object that instantiates the outer wrapping class. */
+      def postamble  = s"""
+                          |$importsTrailer
+                          |}
+                          |object ${lineRep.readName} {
+                          |   val INSTANCE = new ${lineRep.readName}();
+                          |}
+                          |""".stripMargin
+
+      import nme.{ INTERPRETER_IMPORT_WRAPPER => iw }
+
+      /** Adds a val that instantiates the wrapping class. */
+      def postwrap = s"}\nval $iw = new $iw\n"
+    }
+
+    private lazy val ObjectSourceCode: Wrapper = new ClassBasedWrapper
+    private object ResultObjectSourceCode extends SparkIMain.CodeAssembler[MemberHandler] {
+      /** We only want to generate this code when the result
+        *  is a value which can be referred to as-is.
+        */
+      val evalResult = Request.this.value match {
+        case NoSymbol => ""
+        case sym      =>
+          "lazy val %s = %s".format(lineRep.resultName,  fullPath(sym.decodedName))
+      }
+      // first line evaluates object to make sure constructor is run
+      // initial "" so later code can uniformly be: + etc
+      val preamble = """
+                       |object %s {
+                       |  %s
+                       |  lazy val %s: String = %s {
+                       |    %s
+                       |    (""
+                     """.stripMargin.format(
+          lineRep.evalName, evalResult, lineRep.printName,
+          executionWrapper, fullAccessPath
+        )
+
+      val postamble = """
+                        |    )
+                        |  }
+                        |}
+                      """.stripMargin
+      val generate = (m: MemberHandler) => m resultExtractionCode Request.this
+    }
+
+    /** Compile the object file.  Returns whether the compilation succeeded.
+      *  If all goes well, the "types" map is computed. */
+    lazy val compile: Boolean = {
+      // error counting is wrong, hence interpreter may overlook failure - so we reset
+      reporter.reset()
+
+      // compile the object containing the user's code
+      lineRep.compile(ObjectSourceCode(handlers)) && {
+        // extract and remember types
+        typeOf
+        typesOfDefinedTerms
+
+        // Assign symbols to the original trees
+        // TODO - just use the new trees.
+        defHandlers foreach { dh =>
+          val name = dh.member.name
+          definedSymbols get name foreach { sym =>
+            dh.member setSymbol sym
+            repldbg("Set symbol of " + name + " to " + symbolDefString(sym))
+          }
+        }
+
+        // compile the result-extraction object
+        val handls = if (printResults) handlers else Nil
+        withoutWarnings(lineRep compile ResultObjectSourceCode(handls))
+      }
+    }
+
+    lazy val resultSymbol =  lineRep.resolvePathToSymbol(accessPath)
+
+    def applyToResultMember[T](name: Name, f: Symbol => T) = exitingTyper(f(resultSymbol.info.nonPrivateDecl(name)))
+
+    /* typeOf lookup with encoding */
+    def lookupTypeOf(name: Name) = typeOf.getOrElse(name, typeOf(global.encode(name.toString)))
+
+    private def typeMap[T](f: Type => T) =
+      mapFrom[Name, Name, T](termNames ++ typeNames)(x => f(cleanMemberDecl(resultSymbol, x)))
+
+    /** Types of variables defined by this request. */
+    lazy val compilerTypeOf = typeMap[Type](x => x) withDefaultValue NoType
+    /** String representations of same. */
+    lazy val typeOf         = typeMap[String](tp => exitingTyper(tp.toString))
+
+    lazy val definedSymbols = (
+      termNames.map(x => x -> applyToResultMember(x, x => x)) ++
+        typeNames.map(x => x -> compilerTypeOf(x).typeSymbolDirect)
+      ).toMap[Name, Symbol] withDefaultValue NoSymbol
+
+    lazy val typesOfDefinedTerms = mapFrom[Name, Name, Type](termNames)(x => applyToResultMember(x, _.tpe))
+
+    /** load and run the code using reflection */
+    def loadAndRun: (String, Boolean) = {
+      try   { ("" + (lineRep call sessionNames.print), true) }
+      catch { case ex: Throwable => (lineRep.bindError(ex), false) }
+    }
+
+    override def toString = "Request(line=%s, %s trees)".format(line, trees.size)
+  }
+
+  def createBindings: Bindings = new IBindings {
+    override def put(name: String, value: Object): Object = {
+      val n = name.indexOf(":")
+      val p: NamedParam = if (n < 0) (name, value) else {
+        val nme = name.substring(0, n).trim
+        val tpe = name.substring(n + 1).trim
+        NamedParamClass(nme, tpe, value)
+      }
+      if (!p.name.startsWith("javax.script")) bind(p)
+      null
+    }
+  }
+
+  @throws[ScriptException]
+  def compile(script: String): CompiledScript = eval("new javax.script.CompiledScript { def eval(context: javax.script.ScriptContext): Object = { " + script + " }.asInstanceOf[Object]; def getEngine: javax.script.ScriptEngine = engine }").asInstanceOf[CompiledScript]
+
+  @throws[ScriptException]
+  def compile(reader: java.io.Reader): CompiledScript = compile(stringFromReader(reader))
+
+  @throws[ScriptException]
+  def eval(script: String, context: ScriptContext): Object = compiled(script).eval(context)
+
+  @throws[ScriptException]
+  def eval(reader: java.io.Reader, context: ScriptContext): Object = eval(stringFromReader(reader), context)
+
+  override def finalize = close
+
+  /** Returns the name of the most recent interpreter result.
+    *  Mostly this exists so you can conveniently invoke methods on
+    *  the previous result.
+    */
+  def mostRecentVar: String =
+    if (mostRecentlyHandledTree.isEmpty) ""
+    else "" + (mostRecentlyHandledTree.get match {
+      case x: ValOrDefDef           => x.name
+      case Assign(Ident(name), _)   => name
+      case ModuleDef(_, name, _)    => name
+      case _                        => naming.mostRecentVar
+    })
+
+  private var mostRecentWarnings: List[(global.Position, String)] = Nil
+  def lastWarnings = mostRecentWarnings
+
+  private lazy val importToGlobal  = global mkImporter ru
+  private lazy val importToRuntime = ru.internal createImporter global
+  private lazy val javaMirror = ru.rootMirror match {
+    case x: ru.JavaMirror => x
+    case _                => null
+  }
+  private implicit def importFromRu(sym: ru.Symbol): Symbol = importToGlobal importSymbol sym
+  private implicit def importToRu(sym: Symbol): ru.Symbol   = importToRuntime importSymbol sym
+
+  def classOfTerm(id: String): Option[JClass] = symbolOfTerm(id) match {
+    case NoSymbol => None
+    case sym      => Some(javaMirror runtimeClass importToRu(sym).asClass)
+  }
+
+  def typeOfTerm(id: String): Type = symbolOfTerm(id).tpe
+
+  def valueOfTerm(id: String): Option[Any] = exitingTyper {
+    def value() = {
+      val sym0    = symbolOfTerm(id)
+      val sym     = (importToRuntime importSymbol sym0).asTerm
+      val module  = runtimeMirror.reflectModule(sym.owner.companionSymbol.asModule).instance
+      val module1 = runtimeMirror.reflect(module)
+      val invoker = module1.reflectField(sym)
+
+      invoker.get
+    }
+
+    try Some(value()) catch { case _: Exception => None }
+  }
+
+  /** It's a bit of a shotgun approach, but for now we will gain in
+    *  robustness. Try a symbol-producing operation at phase typer, and
+    *  if that is NoSymbol, try again at phase flatten. I'll be able to
+    *  lose this and run only from exitingTyper as soon as I figure out
+    *  exactly where a flat name is sneaking in when calculating imports.
+    */
+  def tryTwice(op: => Symbol): Symbol = exitingTyper(op) orElse exitingFlatten(op)
+
+  def symbolOfIdent(id: String): Symbol  = symbolOfType(id) orElse symbolOfTerm(id)
+  def symbolOfType(id: String): Symbol   = tryTwice(replScope lookup (id: TypeName))
+  def symbolOfTerm(id: String): Symbol   = tryTwice(replScope lookup (id: TermName))
+  def symbolOfName(id: Name): Symbol     = replScope lookup id
+
+  def runtimeClassAndTypeOfTerm(id: String): Option[(JClass, Type)] = {
+    classOfTerm(id) flatMap { clazz =>
+      clazz.supers find (!_.isScalaAnonymous) map { nonAnon =>
+        (nonAnon, runtimeTypeOfTerm(id))
+      }
+    }
+  }
+
+  def runtimeTypeOfTerm(id: String): Type = {
+    typeOfTerm(id) andAlso { tpe =>
+      val clazz      = classOfTerm(id) getOrElse { return NoType }
+      val staticSym  = tpe.typeSymbol
+      val runtimeSym = getClassIfDefined(clazz.getName)
+
+      if ((runtimeSym != NoSymbol) && (runtimeSym != staticSym) && (runtimeSym isSubClass staticSym))
+        runtimeSym.info
+      else NoType
+    }
+  }
+
+  def cleanTypeAfterTyper(sym: => Symbol): Type = {
+    exitingTyper(
+      dealiasNonPublic(
+        dropNullaryMethod(
+          sym.tpe_*
+        )
+      )
+    )
+  }
+  def cleanMemberDecl(owner: Symbol, member: Name): Type =
+    cleanTypeAfterTyper(owner.info nonPrivateDecl member)
+
+  object exprTyper extends {
+    val repl: SparkIMain.this.type = imain
+  } with SparkExprTyper { }
+
+  /** Parse a line into and return parsing result (error, incomplete or success with list of trees) */
+  object parse {
+    abstract sealed class Result
+    case object Error extends Result
+    case object Incomplete extends Result
+    case class Success(trees: List[Tree]) extends Result
+
+    def apply(line: String): Result = debugging(s"""parse("$line")""")  {
+      var isIncomplete = false
+      currentRun.reporting.withIncompleteHandler((_, _) => isIncomplete = true) {
+        reporter.reset()
+        val trees = newUnitParser(line).parseStats()
+        if (reporter.hasErrors) Error
+        else if (isIncomplete) Incomplete
+        else Success(trees)
+      }
+    }
+  }
+
+  def symbolOfLine(code: String): Symbol =
+    exprTyper.symbolOfLine(code)
+
+  def typeOfExpression(expr: String, silent: Boolean = true): Type =
+    exprTyper.typeOfExpression(expr, silent)
+
+  protected def onlyTerms(xs: List[Name]): List[TermName] = xs collect { case x: TermName => x }
+  protected def onlyTypes(xs: List[Name]): List[TypeName] = xs collect { case x: TypeName => x }
+
+  def definedTerms      = onlyTerms(allDefinedNames) filterNot isInternalTermName
+  def definedTypes      = onlyTypes(allDefinedNames)
+  def definedSymbolList = prevRequestList flatMap (_.defines) filterNot (s => isInternalTermName(s.name))
+
+  // Terms with user-given names (i.e. not res0 and not synthetic)
+  def namedDefinedTerms = definedTerms filterNot (x => isUserVarName("" + x) || directlyBoundNames(x))
+
+  private var _replScope: Scope = _
+  private def resetReplScope() {
+    _replScope = newScope
+  }
+  def replScope = {
+    if (_replScope eq null)
+      _replScope = newScope
+
+    _replScope
+  }
+
+  private var executingRequest: Request = _
+  private val prevRequests       = mutable.ListBuffer[Request]()
+  private val directlyBoundNames = mutable.Set[Name]()
+
+  def allHandlers     = prevRequestList flatMap (_.handlers)
+  def lastRequest     = if (prevRequests.isEmpty) null else prevRequests.last
+  def prevRequestList = prevRequests.toList
+  def importHandlers  = allHandlers collect { case x: ImportHandler => x }
+
+  def withoutUnwrapping(op: => Unit): Unit = {
+    val saved = isettings.unwrapStrings
+    isettings.unwrapStrings = false
+    try op
+    finally isettings.unwrapStrings = saved
+  }
+
+  def symbolDefString(sym: Symbol) = {
+    TypeStrings.quieter(
+      exitingTyper(sym.defString),
+      sym.owner.name + ".this.",
+      sym.owner.fullName + "."
+    )
+  }
+
+  def showCodeIfDebugging(code: String) {
+    /** Secret bookcase entrance for repl debuggers: end the line
+      *  with "// show" and see what's going on.
+      */
+    def isShow = code.lines exists (_.trim endsWith "// show")
+    if (isReplDebug || isShow) {
+      beSilentDuring(parse(code)) match {
+        case parse.Success(ts) =>
+          ts foreach { t =>
+            withoutUnwrapping(echo(asCompactString(t)))
+          }
+        case _ =>
+      }
+    }
+  }
+
+  // debugging
+  def debugging[T](msg: String)(res: T) = {
+    repldbg(msg + " " + res)
+    res
+  }
+}
+
+/** Utility methods for the Interpreter. */
+object SparkIMain {
+  import java.util.Arrays.{ asList => asJavaList }
+
+  // The two name forms this is catching are the two sides of this assignment:
+  //
+  // $line3.$read.$iw.$iw.Bippy =
+  //   $line3.$read$$iw$$iw$Bippy@4a6a00ca
+  private def removeLineWrapper(s: String) = s.replaceAll("""\$line\d+[./]\$(read|eval|print)[$.]""", "")
+  private def removeIWPackages(s: String)  = s.replaceAll("""\$(iw|read|eval|print)[$.]""", "")
+  private def removeSparkVals(s: String) = s.replaceAll("""\$VAL[0-9]+[$.]""", "")
+  def stripString(s: String)               = removeSparkVals(removeIWPackages(removeLineWrapper(s)))
+  
+  trait CodeAssembler[T] {
+    def preamble: String
+    def generate: T => String
+    def postamble: String
+
+    def apply(contributors: List[T]): String = stringFromWriter { code =>
+      code println preamble
+      contributors map generate foreach (code println _)
+      code println postamble
+    }
+  }
+
+  trait StrippingWriter {
+    def isStripping: Boolean
+    def stripImpl(str: String): String
+    def strip(str: String): String = if (isStripping) stripImpl(str) else str
+  }
+  trait TruncatingWriter {
+    def maxStringLength: Int
+    def isTruncating: Boolean
+    def truncate(str: String): String = {
+      if (isTruncating && (maxStringLength != 0 && str.length > maxStringLength))
+        (str take maxStringLength - 3) + "..."
+      else str
+    }
+  }
+  abstract class StrippingTruncatingWriter(out: JPrintWriter)
+    extends JPrintWriter(out)
+    with StrippingWriter
+    with TruncatingWriter {
+    self =>
+
+    def clean(str: String): String = truncate(strip(str))
+    override def write(str: String) = super.write(clean(str))
+  }
+  class SparkReplStrippingWriter(intp: SparkIMain) extends StrippingTruncatingWriter(intp.out) {
+    import intp._
+    def maxStringLength    = isettings.maxPrintString
+    def isStripping        = isettings.unwrapStrings
+    def isTruncating       = reporter.truncationOK
+
+    def stripImpl(str: String): String = naming.unmangle(str)
+  }
+}
+
+/** Settings for the interpreter
+  *
+  * @version 1.0
+  * @author Lex Spoon, 2007/3/24
+  **/
+class SparkISettings(intp: SparkIMain) {
+  /** The maximum length of toString to use when printing the result
+    *  of an evaluation.  0 means no maximum.  If a printout requires
+    *  more than this number of characters, then the printout is
+    *  truncated.
+    */
+  var maxPrintString = replProps.maxPrintString.option.getOrElse(800)
+
+  /** The maximum number of completion candidates to print for tab
+    *  completion without requiring confirmation.
+    */
+  var maxAutoprintCompletion = 250
+
+  /** String unwrapping can be disabled if it is causing issues.
+    *  Setting this to false means you will see Strings like "$iw.$iw.".
+    */
+  var unwrapStrings = true
+
+  def deprecation_=(x: Boolean) = {
+    val old = intp.settings.deprecation.value
+    intp.settings.deprecation.value = x
+    if (!old && x) println("Enabled -deprecation output.")
+    else if (old && !x) println("Disabled -deprecation output.")
+  }
+  def deprecation: Boolean = intp.settings.deprecation.value
+
+  def allSettings = Map[String, Any](
+    "maxPrintString" -> maxPrintString,
+    "maxAutoprintCompletion" -> maxAutoprintCompletion,
+    "unwrapStrings" -> unwrapStrings,
+    "deprecation" -> deprecation
+  )
+
+  private def allSettingsString =
+    allSettings.toList sortBy (_._1) map { case (k, v) => "  " + k + " = " + v + "\n" } mkString
+
+  override def toString = """
+                            | SparkISettings {
+                            | %s
+                            | }""".stripMargin.format(allSettingsString)
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkImports.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkImports.scala
new file mode 100644
index 0000000000000..e60406d1e5ad8
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkImports.scala
@@ -0,0 +1,201 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author  Paul Phillips
+ */
+
+package scala.tools.nsc
+package interpreter
+
+import scala.collection.{ mutable, immutable }
+
+trait SparkImports {
+  self: SparkIMain =>
+
+  import global._
+  import definitions.{ ObjectClass, ScalaPackage, JavaLangPackage, PredefModule }
+  import memberHandlers._
+
+  /** Synthetic import handlers for the language defined imports. */
+  private def makeWildcardImportHandler(sym: Symbol): ImportHandler = {
+    val hd :: tl = sym.fullName.split('.').toList map newTermName
+    val tree = Import(
+      tl.foldLeft(Ident(hd): Tree)((x, y) => Select(x, y)),
+      ImportSelector.wildList
+    )
+    tree setSymbol sym
+    new ImportHandler(tree)
+  }
+
+  /** Symbols whose contents are language-defined to be imported. */
+  def languageWildcardSyms: List[Symbol] = List(JavaLangPackage, ScalaPackage, PredefModule)
+  def languageWildcardHandlers = languageWildcardSyms map makeWildcardImportHandler
+
+  def allImportedNames = importHandlers flatMap (_.importedNames)
+
+  /** Types which have been wildcard imported, such as:
+    *    val x = "abc" ; import x._  // type java.lang.String
+    *    import java.lang.String._   // object java.lang.String
+    *
+    *  Used by tab completion.
+    *
+    *  XXX right now this gets import x._ and import java.lang.String._,
+    *  but doesn't figure out import String._.  There's a lot of ad hoc
+    *  scope twiddling which should be swept away in favor of digging
+    *  into the compiler scopes.
+    */
+  def sessionWildcards: List[Type] = {
+    importHandlers filter (_.importsWildcard) map (_.targetType) distinct
+  }
+
+  def languageSymbols        = languageWildcardSyms flatMap membersAtPickler
+  def sessionImportedSymbols = importHandlers flatMap (_.importedSymbols)
+  def importedSymbols        = languageSymbols ++ sessionImportedSymbols
+  def importedTermSymbols    = importedSymbols collect { case x: TermSymbol => x }
+
+  /** Tuples of (source, imported symbols) in the order they were imported.
+    */
+  def importedSymbolsBySource: List[(Symbol, List[Symbol])] = {
+    val lang    = languageWildcardSyms map (sym => (sym, membersAtPickler(sym)))
+    val session = importHandlers filter (_.targetType != NoType) map { mh =>
+      (mh.targetType.typeSymbol, mh.importedSymbols)
+    }
+
+    lang ++ session
+  }
+  def implicitSymbolsBySource: List[(Symbol, List[Symbol])] = {
+    importedSymbolsBySource map {
+      case (k, vs) => (k, vs filter (_.isImplicit))
+    } filterNot (_._2.isEmpty)
+  }
+
+  /** Compute imports that allow definitions from previous
+    *  requests to be visible in a new request.  Returns
+    *  three pieces of related code:
+    *
+    *  1. An initial code fragment that should go before
+    *  the code of the new request.
+    *
+    *  2. A code fragment that should go after the code
+    *  of the new request.
+    *
+    *  3. An access path which can be traversed to access
+    *  any bindings inside code wrapped by #1 and #2 .
+    *
+    * The argument is a set of Names that need to be imported.
+    *
+    * Limitations: This method is not as precise as it could be.
+    * (1) It does not process wildcard imports to see what exactly
+    * they import.
+    * (2) If it imports any names from a request, it imports all
+    * of them, which is not really necessary.
+    * (3) It imports multiple same-named implicits, but only the
+    * last one imported is actually usable.
+    */
+  case class ComputedImports(prepend: String, append: String, access: String)
+  protected def importsCode(wanted: Set[Name], wrapper: Request#Wrapper, definedClass: Boolean): ComputedImports = {
+    /** Narrow down the list of requests from which imports
+      *  should be taken.  Removes requests which cannot contribute
+      *  useful imports for the specified set of wanted names.
+      */
+    case class ReqAndHandler(req: Request, handler: MemberHandler) { }
+
+    def reqsToUse: List[ReqAndHandler] = {
+      /** Loop through a list of MemberHandlers and select which ones to keep.
+        * 'wanted' is the set of names that need to be imported.
+        */
+      def select(reqs: List[ReqAndHandler], wanted: Set[Name]): List[ReqAndHandler] = {
+        // Single symbol imports might be implicits! See bug #1752.  Rather than
+        // try to finesse this, we will mimic all imports for now.
+        def keepHandler(handler: MemberHandler) = handler match {
+          case h: ImportHandler if definedClass => h.importedNames.exists(x => wanted.contains(x))
+          case _: ImportHandler => true
+          case x                => x.definesImplicit || (x.definedNames exists wanted)
+        }
+
+        reqs match {
+          case Nil                                    => Nil
+          case rh :: rest if !keepHandler(rh.handler) => select(rest, wanted)
+          case rh :: rest                             =>
+            import rh.handler._
+            val newWanted = wanted ++ referencedNames -- definedNames -- importedNames
+            rh :: select(rest, newWanted)
+        }
+      }
+
+      /** Flatten the handlers out and pair each with the original request */
+      select(allReqAndHandlers reverseMap { case (r, h) => ReqAndHandler(r, h) }, wanted).reverse
+    }
+
+    val code, trailingBraces, accessPath = new StringBuilder
+    val currentImps = mutable.HashSet[Name]()
+
+    // add code for a new object to hold some imports
+    def addWrapper() {
+      import nme.{ INTERPRETER_IMPORT_WRAPPER => iw }
+      code append (wrapper.prewrap format iw)
+      trailingBraces append wrapper.postwrap
+      accessPath append s".$iw"
+      currentImps.clear()
+    }
+
+    def maybeWrap(names: Name*) = if (names exists currentImps) addWrapper()
+
+    def wrapBeforeAndAfter[T](op: => T): T = {
+      addWrapper()
+      try op finally addWrapper()
+    }
+
+    // loop through previous requests, adding imports for each one
+    wrapBeforeAndAfter {
+      for (ReqAndHandler(req, handler) <- reqsToUse) {
+        handler match {
+          // If the user entered an import, then just use it; add an import wrapping
+          // level if the import might conflict with some other import
+          case x: ImportHandler if x.importsWildcard =>
+            wrapBeforeAndAfter(code append (x.member + "\n"))
+          case x: ImportHandler =>
+            maybeWrap(x.importedNames: _*)
+            code append (x.member + "\n")
+            currentImps ++= x.importedNames
+
+          case x: ClassHandler =>
+             for (imv <- x.definedNames) {
+              val objName = req.lineRep.readPath
+              code.append("import " + objName + ".INSTANCE" + req.accessPath + ".`" + imv + "`\n")
+            }
+
+          // For other requests, import each defined name.
+          // import them explicitly instead of with _, so that
+          // ambiguity errors will not be generated. Also, quote
+          // the name of the variable, so that we don't need to
+          // handle quoting keywords separately.
+          case x =>
+            for (imv <- x.definedNames) {
+              if (currentImps contains imv) addWrapper()
+              val objName = req.lineRep.readPath
+              val valName = "$VAL" + newValId()
+              if(!code.toString.endsWith(".`" + imv + "`;\n")) { // Which means already imported
+                code.append("val " + valName + " = " + objName + ".INSTANCE\n")
+                code.append("import " + valName + req.accessPath + ".`" + imv + "`;\n")
+              }
+              currentImps += imv
+            }
+        }
+      }
+    }
+    addWrapper()
+    ComputedImports(code.toString, trailingBraces.toString, accessPath.toString)
+  }
+  private var curValId = 0
+
+  private def newValId(): Int = {
+    curValId += 1
+    curValId
+  }
+
+  private def allReqAndHandlers =
+    prevRequestList flatMap (req => req.handlers map (req -> _))
+
+  private def membersAtPickler(sym: Symbol): List[Symbol] =
+    enteringPickler(sym.info.nonPrivateMembers.toList)
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
new file mode 100644
index 0000000000000..7fe6dcb32827f
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
@@ -0,0 +1,350 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author Paul Phillips
+ */
+
+package scala.tools.nsc
+package interpreter
+
+import Completion._
+import scala.collection.mutable.ListBuffer
+import scala.reflect.internal.util.StringOps.longestCommonPrefix
+
+// REPL completor - queries supplied interpreter for valid
+// completions based on current contents of buffer.
+class SparkJLineCompletion(val intp: SparkIMain) extends Completion with CompletionOutput {
+  val global: intp.global.type = intp.global
+  import global._
+  import definitions._
+  import rootMirror.{ RootClass, getModuleIfDefined }
+  import intp.{ debugging }
+
+  // verbosity goes up with consecutive tabs
+  private var verbosity: Int = 0
+  def resetVerbosity() = verbosity = 0
+
+  def getSymbol(name: String, isModule: Boolean) = (
+    if (isModule) getModuleIfDefined(name)
+    else getModuleIfDefined(name)
+    )
+
+  trait CompilerCompletion {
+    def tp: Type
+    def effectiveTp = tp match {
+      case MethodType(Nil, resType)   => resType
+      case NullaryMethodType(resType) => resType
+      case _                          => tp
+    }
+
+    // for some reason any's members don't show up in subclasses, which
+    // we need so 5.<tab> offers asInstanceOf etc.
+    private def anyMembers = AnyTpe.nonPrivateMembers
+    def anyRefMethodsToShow = Set("isInstanceOf", "asInstanceOf", "toString")
+
+    def tos(sym: Symbol): String = sym.decodedName
+    def memberNamed(s: String) = exitingTyper(effectiveTp member newTermName(s))
+
+    // XXX we'd like to say "filterNot (_.isDeprecated)" but this causes the
+    // compiler to crash for reasons not yet known.
+    def members     = exitingTyper((effectiveTp.nonPrivateMembers.toList ++ anyMembers) filter (_.isPublic))
+    def methods     = members.toList filter (_.isMethod)
+    def packages    = members.toList filter (_.hasPackageFlag)
+    def aliases     = members.toList filter (_.isAliasType)
+
+    def memberNames   = members map tos
+    def methodNames   = methods map tos
+    def packageNames  = packages map tos
+    def aliasNames    = aliases map tos
+  }
+
+  object NoTypeCompletion extends TypeMemberCompletion(NoType) {
+    override def memberNamed(s: String) = NoSymbol
+    override def members = Nil
+    override def follow(s: String) = None
+    override def alternativesFor(id: String) = Nil
+  }
+
+  object TypeMemberCompletion {
+    def apply(tp: Type, runtimeType: Type, param: NamedParam): TypeMemberCompletion = {
+      new TypeMemberCompletion(tp) {
+        var upgraded = false
+        lazy val upgrade = {
+          intp rebind param
+          intp.reporter.printMessage("\nRebinding stable value %s from %s to %s".format(param.name, tp, param.tpe))
+          upgraded = true
+          new TypeMemberCompletion(runtimeType)
+        }
+        override def completions(verbosity: Int) = {
+          super.completions(verbosity) ++ (
+            if (verbosity == 0) Nil
+            else upgrade.completions(verbosity)
+            )
+        }
+        override def follow(s: String) = super.follow(s) orElse {
+          if (upgraded) upgrade.follow(s)
+          else None
+        }
+        override def alternativesFor(id: String) = super.alternativesFor(id) ++ (
+          if (upgraded) upgrade.alternativesFor(id)
+          else Nil
+          ) distinct
+      }
+    }
+    def apply(tp: Type): TypeMemberCompletion = {
+      if (tp eq NoType) NoTypeCompletion
+      else if (tp.typeSymbol.isPackageClass) new PackageCompletion(tp)
+      else new TypeMemberCompletion(tp)
+    }
+    def imported(tp: Type) = new ImportCompletion(tp)
+  }
+
+  class TypeMemberCompletion(val tp: Type) extends CompletionAware
+  with CompilerCompletion {
+    def excludeEndsWith: List[String] = Nil
+    def excludeStartsWith: List[String] = List("<") // <byname>, <repeated>, etc.
+    def excludeNames: List[String] = (anyref.methodNames filterNot anyRefMethodsToShow) :+ "_root_"
+
+    def methodSignatureString(sym: Symbol) = {
+      IMain stripString exitingTyper(new MethodSymbolOutput(sym).methodString())
+    }
+
+    def exclude(name: String): Boolean = (
+      (name contains "$") ||
+        (excludeNames contains name) ||
+        (excludeEndsWith exists (name endsWith _)) ||
+        (excludeStartsWith exists (name startsWith _))
+      )
+    def filtered(xs: List[String]) = xs filterNot exclude distinct
+
+    def completions(verbosity: Int) =
+      debugging(tp + " completions ==> ")(filtered(memberNames))
+
+    override def follow(s: String): Option[CompletionAware] =
+      debugging(tp + " -> '" + s + "' ==> ")(Some(TypeMemberCompletion(memberNamed(s).tpe)) filterNot (_ eq NoTypeCompletion))
+
+    override def alternativesFor(id: String): List[String] =
+      debugging(id + " alternatives ==> ") {
+        val alts = members filter (x => x.isMethod && tos(x) == id) map methodSignatureString
+
+        if (alts.nonEmpty) "" :: alts else Nil
+      }
+
+    override def toString = "%s (%d members)".format(tp, members.size)
+  }
+
+  class PackageCompletion(tp: Type) extends TypeMemberCompletion(tp) {
+    override def excludeNames = anyref.methodNames
+  }
+
+  class LiteralCompletion(lit: Literal) extends TypeMemberCompletion(lit.value.tpe) {
+    override def completions(verbosity: Int) = verbosity match {
+      case 0    => filtered(memberNames)
+      case _    => memberNames
+    }
+  }
+
+  class ImportCompletion(tp: Type) extends TypeMemberCompletion(tp) {
+    override def completions(verbosity: Int) = verbosity match {
+      case 0    => filtered(members filterNot (_.isSetter) map tos)
+      case _    => super.completions(verbosity)
+    }
+  }
+
+  // not for completion but for excluding
+  object anyref extends TypeMemberCompletion(AnyRefTpe) { }
+
+  // the unqualified vals/defs/etc visible in the repl
+  object ids extends CompletionAware {
+    override def completions(verbosity: Int) = intp.unqualifiedIds ++ List("classOf") //, "_root_")
+    // now we use the compiler for everything.
+    override def follow(id: String): Option[CompletionAware] = {
+      if (!completions(0).contains(id))
+        return None
+
+      val tpe = intp typeOfExpression id
+      if (tpe == NoType)
+        return None
+
+      def default = Some(TypeMemberCompletion(tpe))
+
+      // only rebinding vals in power mode for now.
+      if (!isReplPower) default
+      else intp runtimeClassAndTypeOfTerm id match {
+        case Some((clazz, runtimeType)) =>
+          val sym = intp.symbolOfTerm(id)
+          if (sym.isStable) {
+            val param = new NamedParam.Untyped(id, intp valueOfTerm id getOrElse null)
+            Some(TypeMemberCompletion(tpe, runtimeType, param))
+          }
+          else default
+        case _        =>
+          default
+      }
+    }
+    override def toString = "<repl ids> (%s)".format(completions(0).size)
+  }
+
+  // user-issued wildcard imports like "import global._" or "import String._"
+  private def imported = intp.sessionWildcards map TypeMemberCompletion.imported
+
+  // literal Ints, Strings, etc.
+  object literals extends CompletionAware {
+    def simpleParse(code: String): Option[Tree] = newUnitParser(code).parseStats().lastOption
+    def completions(verbosity: Int) = Nil
+
+    override def follow(id: String) = simpleParse(id).flatMap {
+      case x: Literal   => Some(new LiteralCompletion(x))
+      case _            => None
+    }
+  }
+
+  // top level packages
+  object rootClass extends TypeMemberCompletion(RootClass.tpe) {
+    override def completions(verbosity: Int) = super.completions(verbosity) :+ "_root_"
+    override def follow(id: String) = id match {
+      case "_root_" => Some(this)
+      case _        => super.follow(id)
+    }
+  }
+  // members of Predef
+  object predef extends TypeMemberCompletion(PredefModule.tpe) {
+    override def excludeEndsWith    = super.excludeEndsWith ++ List("Wrapper", "ArrayOps")
+    override def excludeStartsWith  = super.excludeStartsWith ++ List("wrap")
+    override def excludeNames       = anyref.methodNames
+
+    override def exclude(name: String) = super.exclude(name) || (
+      (name contains "2")
+      )
+
+    override def completions(verbosity: Int) = verbosity match {
+      case 0    => Nil
+      case _    => super.completions(verbosity)
+    }
+  }
+  // members of scala.*
+  object scalalang extends PackageCompletion(ScalaPackage.tpe) {
+    def arityClasses = List("Product", "Tuple", "Function")
+    def skipArity(name: String) = arityClasses exists (x => name != x && (name startsWith x))
+    override def exclude(name: String) = super.exclude(name) || (
+      skipArity(name)
+      )
+
+    override def completions(verbosity: Int) = verbosity match {
+      case 0    => filtered(packageNames ++ aliasNames)
+      case _    => super.completions(verbosity)
+    }
+  }
+  // members of java.lang.*
+  object javalang extends PackageCompletion(JavaLangPackage.tpe) {
+    override lazy val excludeEndsWith   = super.excludeEndsWith ++ List("Exception", "Error")
+    override lazy val excludeStartsWith = super.excludeStartsWith ++ List("CharacterData")
+
+    override def completions(verbosity: Int) = verbosity match {
+      case 0    => filtered(packageNames)
+      case _    => super.completions(verbosity)
+    }
+  }
+
+  // the list of completion aware objects which should be consulted
+  // for top level unqualified, it's too noisy to let much in.
+  lazy val topLevelBase: List[CompletionAware] = List(ids, rootClass, predef, scalalang, javalang, literals)
+  def topLevel = topLevelBase ++ imported
+  def topLevelThreshold = 50
+
+  // the first tier of top level objects (doesn't include file completion)
+  def topLevelFor(parsed: Parsed): List[String] = {
+    val buf = new ListBuffer[String]
+    topLevel foreach { ca =>
+      buf ++= (ca completionsFor parsed)
+
+      if (buf.size > topLevelThreshold)
+        return buf.toList.sorted
+    }
+    buf.toList
+  }
+
+  // the most recent result
+  def lastResult = Forwarder(() => ids follow intp.mostRecentVar)
+
+  def lastResultFor(parsed: Parsed) = {
+    /** The logic is a little tortured right now because normally '.' is
+      *  ignored as a delimiter, but on .<tab> it needs to be propagated.
+      */
+    val xs = lastResult completionsFor parsed
+    if (parsed.isEmpty) xs map ("." + _) else xs
+  }
+
+  def completer(): ScalaCompleter = new JLineTabCompletion
+
+  /** This gets a little bit hairy.  It's no small feat delegating everything
+    *  and also keeping track of exactly where the cursor is and where it's supposed
+    *  to end up.  The alternatives mechanism is a little hacky: if there is an empty
+    *  string in the list of completions, that means we are expanding a unique
+    *  completion, so don't update the "last" buffer because it'll be wrong.
+    */
+  class JLineTabCompletion extends ScalaCompleter {
+    // For recording the buffer on the last tab hit
+    private var lastBuf: String = ""
+    private var lastCursor: Int = -1
+
+    // Does this represent two consecutive tabs?
+    def isConsecutiveTabs(buf: String, cursor: Int) =
+      cursor == lastCursor && buf == lastBuf
+
+    // This is jline's entry point for completion.
+    override def complete(buf: String, cursor: Int): Candidates = {
+      verbosity = if (isConsecutiveTabs(buf, cursor)) verbosity + 1 else 0
+      repldbg(f"%ncomplete($buf, $cursor%d) last = ($lastBuf, $lastCursor%d), verbosity: $verbosity")
+
+      // we don't try lower priority completions unless higher ones return no results.
+      def tryCompletion(p: Parsed, completionFunction: Parsed => List[String]): Option[Candidates] = {
+        val winners = completionFunction(p)
+        if (winners.isEmpty)
+          return None
+        val newCursor =
+          if (winners contains "") p.cursor
+          else {
+            val advance = longestCommonPrefix(winners)
+            lastCursor = p.position + advance.length
+            lastBuf = (buf take p.position) + advance
+            repldbg(s"tryCompletion($p, _) lastBuf = $lastBuf, lastCursor = $lastCursor, p.position = ${p.position}")
+            p.position
+          }
+
+        Some(Candidates(newCursor, winners))
+      }
+
+      def mkDotted = Parsed.dotted(buf, cursor) withVerbosity verbosity
+
+      // a single dot is special cased to completion on the previous result
+      def lastResultCompletion =
+        if (!looksLikeInvocation(buf)) None
+        else tryCompletion(Parsed.dotted(buf drop 1, cursor), lastResultFor)
+
+      def tryAll = (
+        lastResultCompletion
+          orElse tryCompletion(mkDotted, topLevelFor)
+          getOrElse Candidates(cursor, Nil)
+        )
+
+      /**
+       *  This is the kickoff point for all manner of theoretically
+       *  possible compiler unhappiness. The fault may be here or
+       *  elsewhere, but we don't want to crash the repl regardless.
+       *  The compiler makes it impossible to avoid catching Throwable
+       *  with its unfortunate tendency to throw java.lang.Errors and
+       *  AssertionErrors as the hats drop. We take two swings at it
+       *  because there are some spots which like to throw an assertion
+       *  once, then work after that. Yeah, what can I say.
+       */
+      try tryAll
+      catch { case ex: Throwable =>
+        repldbg("Error: complete(%s, %s) provoked".format(buf, cursor) + ex)
+        Candidates(cursor,
+          if (isReplDebug) List("<error:" + ex + ">")
+          else Nil
+        )
+      }
+    }
+  }
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
new file mode 100644
index 0000000000000..0e22bc806d922
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
@@ -0,0 +1,221 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author  Martin Odersky
+ */
+
+package scala.tools.nsc
+package interpreter
+
+import scala.collection.{ mutable, immutable }
+import scala.language.implicitConversions
+
+trait SparkMemberHandlers {
+  val intp: SparkIMain
+
+  import intp.{ Request, global, naming }
+  import global._
+  import naming._
+
+  private def codegenln(leadingPlus: Boolean, xs: String*): String = codegen(leadingPlus, (xs ++ Array("\n")): _*)
+  private def codegenln(xs: String*): String = codegenln(true, xs: _*)
+  private def codegen(leadingPlus: Boolean, xs: String*): String = {
+    val front = if (leadingPlus) "+ " else ""
+    front + (xs map string2codeQuoted mkString " + ")
+  }
+  private implicit def name2string(name: Name) = name.toString
+
+  /** A traverser that finds all mentioned identifiers, i.e. things
+    *  that need to be imported.  It might return extra names.
+    */
+  private class ImportVarsTraverser extends Traverser {
+    val importVars = new mutable.HashSet[Name]()
+
+    override def traverse(ast: Tree) = ast match {
+      case Ident(name) =>
+        // XXX this is obviously inadequate but it's going to require some effort
+        // to get right.
+        if (name.toString startsWith "x$") ()
+        else importVars += name
+      case _        => super.traverse(ast)
+    }
+  }
+  private object ImportVarsTraverser {
+    def apply(member: Tree) = {
+      val ivt = new ImportVarsTraverser()
+      ivt traverse member
+      ivt.importVars.toList
+    }
+  }
+
+  private def isTermMacro(ddef: DefDef): Boolean = ddef.mods.isMacro
+
+  def chooseHandler(member: Tree): MemberHandler = member match {
+    case member: DefDef if isTermMacro(member) => new TermMacroHandler(member)
+    case member: DefDef                        => new DefHandler(member)
+    case member: ValDef                        => new ValHandler(member)
+    case member: ModuleDef                     => new ModuleHandler(member)
+    case member: ClassDef                      => new ClassHandler(member)
+    case member: TypeDef                       => new TypeAliasHandler(member)
+    case member: Assign                        => new AssignHandler(member)
+    case member: Import                        => new ImportHandler(member)
+    case DocDef(_, documented)                 => chooseHandler(documented)
+    case member                                => new GenericHandler(member)
+  }
+
+  sealed abstract class MemberDefHandler(override val member: MemberDef) extends MemberHandler(member) {
+    override def name: Name = member.name
+    def mods: Modifiers     = member.mods
+    def keyword             = member.keyword
+    def prettyName          = name.decode
+
+    override def definesImplicit = member.mods.isImplicit
+    override def definesTerm: Option[TermName] = Some(name.toTermName) filter (_ => name.isTermName)
+    override def definesType: Option[TypeName] = Some(name.toTypeName) filter (_ => name.isTypeName)
+    override def definedSymbols = if (symbol.exists) symbol :: Nil else Nil
+  }
+
+  /** Class to handle one member among all the members included
+    *  in a single interpreter request.
+    */
+  sealed abstract class MemberHandler(val member: Tree) {
+    def name: Name      = nme.NO_NAME
+    def path            = intp.originalPath(symbol).replaceFirst("read", "read.INSTANCE")
+    def symbol          = if (member.symbol eq null) NoSymbol else member.symbol
+    def definesImplicit = false
+    def definesValue    = false
+
+    def definesTerm     = Option.empty[TermName]
+    def definesType     = Option.empty[TypeName]
+
+    private lazy val _referencedNames = ImportVarsTraverser(member)
+    def referencedNames = _referencedNames
+    def importedNames   = List[Name]()
+    def definedNames    = definesTerm.toList ++ definesType.toList
+    def definedSymbols  = List[Symbol]()
+
+    def extraCodeToEvaluate(req: Request): String = ""
+    def resultExtractionCode(req: Request): String = ""
+
+    private def shortName = this.getClass.toString split '.' last
+    override def toString = shortName + referencedNames.mkString(" (refs: ", ", ", ")")
+  }
+
+  class GenericHandler(member: Tree) extends MemberHandler(member)
+
+  class ValHandler(member: ValDef) extends MemberDefHandler(member) {
+    val maxStringElements = 1000  // no need to mkString billions of elements
+    override def definesValue = true
+
+    override def resultExtractionCode(req: Request): String = {
+
+      val isInternal = isUserVarName(name) && req.lookupTypeOf(name) == "Unit"
+      if (!mods.isPublic || isInternal) ""
+      else {
+        // if this is a lazy val we avoid evaluating it here
+        val resultString =
+          if (mods.isLazy) codegenln(false, "<lazy>")
+          else any2stringOf(path, maxStringElements)
+
+        val vidString =
+          if (replProps.vids) s"""" + " @ " + "%%8x".format(System.identityHashCode($path)) + " """.trim
+          else ""
+
+        """ + "%s%s: %s = " + %s""".format(string2code(prettyName), vidString, string2code(req typeOf name), resultString)
+      }
+    }
+  }
+
+  class DefHandler(member: DefDef) extends MemberDefHandler(member) {
+    override def definesValue = flattensToEmpty(member.vparamss) // true if 0-arity
+    override def resultExtractionCode(req: Request) =
+      if (mods.isPublic) codegenln(name, ": ", req.typeOf(name)) else ""
+  }
+
+  abstract class MacroHandler(member: DefDef) extends MemberDefHandler(member) {
+    override def referencedNames = super.referencedNames.flatMap(name => List(name.toTermName, name.toTypeName))
+    override def definesValue = false
+    override def definesTerm: Option[TermName] = Some(name.toTermName)
+    override def definesType: Option[TypeName] = None
+    override def resultExtractionCode(req: Request) = if (mods.isPublic) codegenln(notification(req)) else ""
+    def notification(req: Request): String
+  }
+
+  class TermMacroHandler(member: DefDef) extends MacroHandler(member) {
+    def notification(req: Request) = s"defined term macro $name: ${req.typeOf(name)}"
+  }
+
+  class AssignHandler(member: Assign) extends MemberHandler(member) {
+    val Assign(lhs, rhs) = member
+    override lazy val name = newTermName(freshInternalVarName())
+
+    override def definesTerm = Some(name)
+    override def definesValue = true
+    override def extraCodeToEvaluate(req: Request) =
+      """val %s = %s""".format(name, lhs)
+
+    /** Print out lhs instead of the generated varName */
+    override def resultExtractionCode(req: Request) = {
+      val lhsType = string2code(req lookupTypeOf name)
+      val res     = string2code(req fullPath name)
+      """ + "%s: %s = " + %s + "\n" """.format(string2code(lhs.toString), lhsType, res) + "\n"
+    }
+  }
+
+  class ModuleHandler(module: ModuleDef) extends MemberDefHandler(module) {
+    override def definesTerm = Some(name.toTermName)
+    override def definesValue = true
+
+    override def resultExtractionCode(req: Request) = codegenln("defined object ", name)
+  }
+
+  class ClassHandler(member: ClassDef) extends MemberDefHandler(member) {
+    override def definedSymbols = List(symbol, symbol.companionSymbol) filterNot (_ == NoSymbol)
+    override def definesType = Some(name.toTypeName)
+    override def definesTerm = Some(name.toTermName) filter (_ => mods.isCase)
+
+    override def resultExtractionCode(req: Request) =
+      codegenln("defined %s %s".format(keyword, name))
+  }
+
+  class TypeAliasHandler(member: TypeDef) extends MemberDefHandler(member) {
+    private def isAlias = mods.isPublic && treeInfo.isAliasTypeDef(member)
+    override def definesType = Some(name.toTypeName) filter (_ => isAlias)
+
+    override def resultExtractionCode(req: Request) =
+      codegenln("defined type alias ", name) + "\n"
+  }
+
+  class ImportHandler(imp: Import) extends MemberHandler(imp) {
+    val Import(expr, selectors) = imp
+    def targetType = intp.global.rootMirror.getModuleIfDefined("" + expr) match {
+      case NoSymbol => intp.typeOfExpression("" + expr)
+      case sym      => sym.thisType
+    }
+    private def importableTargetMembers = importableMembers(targetType).toList
+    // wildcard imports, e.g. import foo._
+    private def selectorWild    = selectors filter (_.name == nme.USCOREkw)
+    // renamed imports, e.g. import foo.{ bar => baz }
+    private def selectorRenames = selectors map (_.rename) filterNot (_ == null)
+
+    /** Whether this import includes a wildcard import */
+    val importsWildcard = selectorWild.nonEmpty
+
+    def implicitSymbols = importedSymbols filter (_.isImplicit)
+    def importedSymbols = individualSymbols ++ wildcardSymbols
+
+    private val selectorNames = selectorRenames filterNot (_ == nme.USCOREkw) flatMap (_.bothNames) toSet
+    lazy val individualSymbols: List[Symbol] = exitingTyper(importableTargetMembers filter (m => selectorNames(m.name)))
+    lazy val wildcardSymbols: List[Symbol]   = exitingTyper(if (importsWildcard) importableTargetMembers else Nil)
+
+    /** Complete list of names imported by a wildcard */
+    lazy val wildcardNames: List[Name]   = wildcardSymbols map (_.name)
+    lazy val individualNames: List[Name] = individualSymbols map (_.name)
+
+    /** The names imported by this statement */
+    override lazy val importedNames: List[Name] = wildcardNames ++ individualNames
+    lazy val importsSymbolNamed: Set[String] = importedNames map (_.toString) toSet
+
+    def importString = imp.toString
+    override def resultExtractionCode(req: Request) = codegenln(importString) + "\n"
+  }
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkReplReporter.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkReplReporter.scala
new file mode 100644
index 0000000000000..0711ed4871bb6
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkReplReporter.scala
@@ -0,0 +1,53 @@
+/* NSC -- new Scala compiler
+ * Copyright 2002-2013 LAMP/EPFL
+ * @author Paul Phillips
+ */
+
+package scala.tools.nsc
+package interpreter
+
+import reporters._
+import SparkIMain._
+
+import scala.reflect.internal.util.Position
+
+/** Like ReplGlobal, a layer for ensuring extra functionality.
+  */
+class SparkReplReporter(intp: SparkIMain) extends ConsoleReporter(intp.settings, Console.in, new SparkReplStrippingWriter(intp)) {
+  def printUntruncatedMessage(msg: String) = withoutTruncating(printMessage(msg))
+
+  /** Whether very long lines can be truncated.  This exists so important
+    *  debugging information (like printing the classpath) is not rendered
+    *  invisible due to the max message length.
+    */
+  private var _truncationOK: Boolean = !intp.settings.verbose
+  def truncationOK = _truncationOK
+  def withoutTruncating[T](body: => T): T = {
+    val saved = _truncationOK
+    _truncationOK = false
+    try body
+    finally _truncationOK = saved
+  }
+
+  override def warning(pos: Position, msg: String): Unit = withoutTruncating(super.warning(pos, msg))
+  override def error(pos: Position, msg: String): Unit   = withoutTruncating(super.error(pos, msg))
+
+  override def printMessage(msg: String) {
+    // Avoiding deadlock if the compiler starts logging before
+    // the lazy val is complete.
+    if (intp.isInitializeComplete) {
+      if (intp.totalSilence) {
+        if (isReplTrace)
+          super.printMessage("[silent] " + msg)
+      }
+      else super.printMessage(msg)
+    }
+    else Console.println("[init] " + msg)
+  }
+
+  override def displayPrompt() {
+    if (intp.totalSilence) ()
+    else super.displayPrompt()
+  }
+
+}
diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
new file mode 100644
index 0000000000000..f966f25c5a14c
--- /dev/null
+++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -0,0 +1,326 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl
+
+import java.io._
+import java.net.URLClassLoader
+
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.Await
+import scala.concurrent.duration._
+import scala.tools.nsc.interpreter.SparkILoop
+
+import com.google.common.io.Files
+import org.scalatest.FunSuite
+import org.apache.commons.lang3.StringEscapeUtils
+import org.apache.spark.SparkContext
+import org.apache.spark.util.Utils
+
+
+
+class ReplSuite extends FunSuite {
+
+  def runInterpreter(master: String, input: String): String = {
+    val CONF_EXECUTOR_CLASSPATH = "spark.executor.extraClassPath"
+
+    val in = new BufferedReader(new StringReader(input + "\n"))
+    val out = new StringWriter()
+    val cl = getClass.getClassLoader
+    var paths = new ArrayBuffer[String]
+    if (cl.isInstanceOf[URLClassLoader]) {
+      val urlLoader = cl.asInstanceOf[URLClassLoader]
+      for (url <- urlLoader.getURLs) {
+        if (url.getProtocol == "file") {
+          paths += url.getFile
+        }
+      }
+    }
+    val classpath = paths.mkString(File.pathSeparator)
+
+    val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH)
+    System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath)
+
+    System.setProperty("spark.master", master)
+    val interp = {
+      new SparkILoop(in, new PrintWriter(out))
+    }
+    org.apache.spark.repl.Main.interp = interp
+    Main.s.processArguments(List("-classpath", classpath), true)
+    Main.main(Array()) // call main
+    org.apache.spark.repl.Main.interp = null
+
+    if (oldExecutorClasspath != null) {
+      System.setProperty(CONF_EXECUTOR_CLASSPATH, oldExecutorClasspath)
+    } else {
+      System.clearProperty(CONF_EXECUTOR_CLASSPATH)
+    }
+    return out.toString
+  }
+
+  def assertContains(message: String, output: String) {
+    val isContain = output.contains(message)
+    assert(isContain,
+      "Interpreter output did not contain '" + message + "':\n" + output)
+  }
+
+  def assertDoesNotContain(message: String, output: String) {
+    val isContain = output.contains(message)
+    assert(!isContain,
+      "Interpreter output contained '" + message + "':\n" + output)
+  }
+
+  test("propagation of local properties") {
+    // A mock ILoop that doesn't install the SIGINT handler.
+    class ILoop(out: PrintWriter) extends SparkILoop(None, out) {
+      settings = new scala.tools.nsc.Settings
+      settings.usejavacp.value = true
+      org.apache.spark.repl.Main.interp = this
+      override def createInterpreter() {
+        intp = new SparkILoopInterpreter
+        intp.setContextClassLoader()
+      }
+    }
+
+    val out = new StringWriter()
+    Main.interp = new ILoop(new PrintWriter(out))
+    Main.sparkContext = new SparkContext("local", "repl-test")
+    Main.interp.createInterpreter()
+
+    Main.sparkContext.setLocalProperty("someKey", "someValue")
+
+    // Make sure the value we set in the caller to interpret is propagated in the thread that
+    // interprets the command.
+    Main.interp.interpret("org.apache.spark.repl.Main.sparkContext.getLocalProperty(\"someKey\")")
+    assert(out.toString.contains("someValue"))
+
+    Main.sparkContext.stop()
+    System.clearProperty("spark.driver.port")
+  }
+
+  test("simple foreach with accumulator") {
+    val output = runInterpreter("local",
+      """
+        |val accum = sc.accumulator(0)
+        |sc.parallelize(1 to 10).foreach(x => accum += x)
+        |accum.value
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res1: Int = 55", output)
+  }
+
+  test("external vars") {
+    val output = runInterpreter("local",
+      """
+        |var v = 7
+        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
+        |v = 10
+        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 70", output)
+    assertContains("res1: Int = 100", output)
+  }
+
+  test("external classes") {
+    val output = runInterpreter("local",
+      """
+        |class C {
+        |def foo = 5
+        |}
+        |sc.parallelize(1 to 10).map(x => (new C).foo).collect.reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 50", output)
+  }
+
+  test("external functions") {
+    val output = runInterpreter("local",
+      """
+        |def double(x: Int) = x + x
+        |sc.parallelize(1 to 10).map(x => double(x)).collect.reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 110", output)
+  }
+
+  test("external functions that access vars") {
+    val output = runInterpreter("local",
+      """
+        |var v = 7
+        |def getV() = v
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |v = 10
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 70", output)
+    assertContains("res1: Int = 100", output)
+  }
+
+  test("broadcast vars") {
+    // Test that the value that a broadcast var had when it was created is used,
+    // even if that variable is then modified in the driver program
+    // TODO: This doesn't actually work for arrays when we run in local mode!
+    val output = runInterpreter("local",
+      """
+        |var array = new Array[Int](5)
+        |val broadcastArray = sc.broadcast(array)
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |array(0) = 5
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+    assertContains("res2: Array[Int] = Array(5, 0, 0, 0, 0)", output)
+  }
+
+  test("interacting with files") {
+    val tempDir = Files.createTempDir()
+    tempDir.deleteOnExit()
+    val out = new FileWriter(tempDir + "/input")
+    out.write("Hello world!\n")
+    out.write("What's up?\n")
+    out.write("Goodbye\n")
+    out.close()
+    val output = runInterpreter("local",
+      """
+        |var file = sc.textFile("%s").cache()
+        |file.count()
+        |file.count()
+        |file.count()
+      """.stripMargin.format(StringEscapeUtils.escapeJava(
+        tempDir.getAbsolutePath + File.separator + "input")))
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Long = 3", output)
+    assertContains("res1: Long = 3", output)
+    assertContains("res2: Long = 3", output)
+    Utils.deleteRecursively(tempDir)
+  }
+
+  test("local-cluster mode") {
+    val output = runInterpreter("local-cluster[1,1,512]",
+      """
+        |var v = 7
+        |def getV() = v
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |v = 10
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |var array = new Array[Int](5)
+        |val broadcastArray = sc.broadcast(array)
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |array(0) = 5
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res0: Int = 70", output)
+    assertContains("res1: Int = 100", output)
+    assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+    assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+  }
+
+  test("SPARK-1199 two instances of same class don't type check.") {
+    val output = runInterpreter("local-cluster[1,1,512]",
+      """
+        |case class Sum(exp: String, exp2: String)
+        |val a = Sum("A", "B")
+        |def b(a: Sum): String = a match { case Sum(_, _) => "Found Sum" }
+        |b(a)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-2452 compound statements.") {
+    val output = runInterpreter("local",
+      """
+        |val x = 4 ; def f() = x
+        |f()
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-2576 importing SQLContext.createSchemaRDD.") {
+    // We need to use local-cluster to test this case.
+    val output = runInterpreter("local-cluster[1,1,512]",
+      """
+        |val sqlContext = new org.apache.spark.sql.SQLContext(sc)
+        |import sqlContext.createSchemaRDD
+        |case class TestCaseClass(value: Int)
+        |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).toSchemaRDD.collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-2632 importing a method from non serializable class and not using it.") {
+    val output = runInterpreter("local",
+    """
+      |class TestClass() { def testMethod = 3 }
+      |val t = new TestClass
+      |import t.testMethod
+      |case class TestCaseClass(value: Int)
+      |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).collect
+    """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  if (System.getenv("MESOS_NATIVE_LIBRARY") != null) {
+    test("running on Mesos") {
+      val output = runInterpreter("localquiet",
+        """
+          |var v = 7
+          |def getV() = v
+          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+          |v = 10
+          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+          |var array = new Array[Int](5)
+          |val broadcastArray = sc.broadcast(array)
+          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+          |array(0) = 5
+          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        """.stripMargin)
+      assertDoesNotContain("error:", output)
+      assertDoesNotContain("Exception", output)
+      assertContains("res0: Int = 70", output)
+      assertContains("res1: Int = 100", output)
+      assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+      assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+    }
+  }
+
+  test("collecting objects of class defined in repl") {
+    val output = runInterpreter("local[2]",
+      """
+        |case class Foo(i: Int)
+        |val ret = sc.parallelize((1 to 100).map(Foo), 10).collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("ret: Array[Foo] = Array(Foo(1),", output)
+  }
+}
diff --git a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
index 5ee325008a5cd..9805609120005 100644
--- a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
@@ -17,13 +17,13 @@
 
 package org.apache.spark.repl
 
-import java.io.{ByteArrayOutputStream, InputStream}
+import java.io.{ByteArrayOutputStream, InputStream, FileNotFoundException}
 import java.net.{URI, URL, URLEncoder}
 import java.util.concurrent.{Executors, ExecutorService}
 
 import org.apache.hadoop.fs.{FileSystem, Path}
 
-import org.apache.spark.{SparkConf, SparkEnv}
+import org.apache.spark.{SparkConf, SparkEnv, Logging}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.util.Utils
 import org.apache.spark.util.ParentClassLoader
@@ -37,7 +37,7 @@ import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.Opcodes._
  * Allows the user to specify if user class path should be first
  */
 class ExecutorClassLoader(conf: SparkConf, classUri: String, parent: ClassLoader,
-    userClassPathFirst: Boolean) extends ClassLoader {
+    userClassPathFirst: Boolean) extends ClassLoader with Logging {
   val uri = new URI(classUri)
   val directory = uri.getPath
 
@@ -45,7 +45,7 @@ class ExecutorClassLoader(conf: SparkConf, classUri: String, parent: ClassLoader
 
   // Hadoop FileSystem object for our URI, if it isn't using HTTP
   var fileSystem: FileSystem = {
-    if (uri.getScheme() == "http") {
+    if (Set("http", "https", "ftp").contains(uri.getScheme)) {
       null
     } else {
       FileSystem.get(uri, SparkHadoopUtil.get.newConfiguration(conf))
@@ -78,20 +78,30 @@ class ExecutorClassLoader(conf: SparkConf, classUri: String, parent: ClassLoader
         if (fileSystem != null) {
           fileSystem.open(new Path(directory, pathInDirectory))
         } else {
-          if (SparkEnv.get.securityManager.isAuthenticationEnabled()) {
+          val url = if (SparkEnv.get.securityManager.isAuthenticationEnabled()) {
             val uri = new URI(classUri + "/" + urlEncode(pathInDirectory))
             val newuri = Utils.constructURIForAuthentication(uri, SparkEnv.get.securityManager)
-            newuri.toURL().openStream()
+            newuri.toURL
           } else {
-            new URL(classUri + "/" + urlEncode(pathInDirectory)).openStream()
+            new URL(classUri + "/" + urlEncode(pathInDirectory))
           }
+
+          Utils.setupSecureURLConnection(url.openConnection(), SparkEnv.get.securityManager)
+            .getInputStream
         }
       }
       val bytes = readAndTransformClass(name, inputStream)
       inputStream.close()
       Some(defineClass(name, bytes, 0, bytes.length))
     } catch {
-      case e: Exception => None
+      case e: FileNotFoundException =>
+        // We did not find the class
+        logDebug(s"Did not load class $name from REPL class server at $uri", e)
+        None
+      case e: Exception =>
+        // Something bad happened while checking if the class exists
+        logError(s"Failed to check existence of class $name on REPL class server at $uri", e)
+        None
     }
   }
 
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala b/repl/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
deleted file mode 100644
index f8432c8af6ed2..0000000000000
--- a/repl/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
+++ /dev/null
@@ -1,114 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author  Paul Phillips
- */
-
-package org.apache.spark.repl
-
-import scala.tools.nsc._
-import scala.tools.nsc.interpreter._
-
-import scala.reflect.internal.util.BatchSourceFile
-import scala.tools.nsc.ast.parser.Tokens.EOF
-
-import org.apache.spark.Logging
-
-trait SparkExprTyper extends Logging {
-  val repl: SparkIMain
-
-  import repl._
-  import global.{ reporter => _, Import => _, _ }
-  import definitions._
-  import syntaxAnalyzer.{ UnitParser, UnitScanner, token2name }
-  import naming.freshInternalVarName
-
-  object codeParser extends { val global: repl.global.type = repl.global } with CodeHandlers[Tree] {
-    def applyRule[T](code: String, rule: UnitParser => T): T = {
-      reporter.reset()
-      val scanner = newUnitParser(code)
-      val result  = rule(scanner)
-
-      if (!reporter.hasErrors)
-        scanner.accept(EOF)
-
-      result
-    }
-
-    def defns(code: String) = stmts(code) collect { case x: DefTree => x }
-    def expr(code: String)  = applyRule(code, _.expr())
-    def stmts(code: String) = applyRule(code, _.templateStats())
-    def stmt(code: String)  = stmts(code).last  // guaranteed nonempty
-  }
-
-  /** Parse a line into a sequence of trees. Returns None if the input is incomplete. */
-  def parse(line: String): Option[List[Tree]] = debugging(s"""parse("$line")""")  {
-    var isIncomplete = false
-    reporter.withIncompleteHandler((_, _) => isIncomplete = true) {
-      val trees = codeParser.stmts(line)
-      if (reporter.hasErrors) {
-        Some(Nil)
-      } else if (isIncomplete) {
-        None
-      } else {
-        Some(trees)
-      }
-    }
-  }
-  // def parsesAsExpr(line: String) = {
-  //   import codeParser._
-  //   (opt expr line).isDefined
-  // }
-
-  def symbolOfLine(code: String): Symbol = {
-    def asExpr(): Symbol = {
-      val name  = freshInternalVarName()
-      // Typing it with a lazy val would give us the right type, but runs
-      // into compiler bugs with things like existentials, so we compile it
-      // behind a def and strip the NullaryMethodType which wraps the expr.
-      val line = "def " + name + " = {\n" + code + "\n}"
-
-      interpretSynthetic(line) match {
-        case IR.Success =>
-          val sym0 = symbolOfTerm(name)
-          // drop NullaryMethodType
-          val sym = sym0.cloneSymbol setInfo afterTyper(sym0.info.finalResultType)
-          if (sym.info.typeSymbol eq UnitClass) NoSymbol else sym
-        case _          => NoSymbol
-      }
-    }
-    def asDefn(): Symbol = {
-      val old = repl.definedSymbolList.toSet
-
-      interpretSynthetic(code) match {
-        case IR.Success =>
-          repl.definedSymbolList filterNot old match {
-            case Nil        => NoSymbol
-            case sym :: Nil => sym
-            case syms       => NoSymbol.newOverloaded(NoPrefix, syms)
-          }
-        case _ => NoSymbol
-      }
-    }
-    beQuietDuring(asExpr()) orElse beQuietDuring(asDefn())
-  }
-
-  private var typeOfExpressionDepth = 0
-  def typeOfExpression(expr: String, silent: Boolean = true): Type = {
-    if (typeOfExpressionDepth > 2) {
-      logDebug("Terminating typeOfExpression recursion for expression: " + expr)
-      return NoType
-    }
-    typeOfExpressionDepth += 1
-    // Don't presently have a good way to suppress undesirable success output
-    // while letting errors through, so it is first trying it silently: if there
-    // is an error, and errors are desired, then it re-evaluates non-silently
-    // to induce the error message.
-    try beSilentDuring(symbolOfLine(expr).tpe) match {
-      case NoType if !silent => symbolOfLine(expr).tpe // generate error
-      case tpe               => tpe
-    }
-    finally typeOfExpressionDepth -= 1
-  }
-}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkHelper.scala b/repl/src/main/scala/org/apache/spark/repl/SparkHelper.scala
deleted file mode 100644
index 5340951d91331..0000000000000
--- a/repl/src/main/scala/org/apache/spark/repl/SparkHelper.scala
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package scala.tools.nsc
-
-object SparkHelper {
-  def explicitParentLoader(settings: Settings) = settings.explicitParentLoader
-}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
deleted file mode 100644
index e56b74edba88c..0000000000000
--- a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ /dev/null
@@ -1,1091 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author Alexander Spoon
- */
-
-package org.apache.spark.repl
-
-
-import java.net.URL
-
-import scala.reflect.io.AbstractFile
-import scala.tools.nsc._
-import scala.tools.nsc.backend.JavaPlatform
-import scala.tools.nsc.interpreter._
-
-import scala.tools.nsc.interpreter.{Results => IR}
-import Predef.{println => _, _}
-import java.io.{BufferedReader, FileReader}
-import java.net.URI
-import java.util.concurrent.locks.ReentrantLock
-import scala.sys.process.Process
-import scala.tools.nsc.interpreter.session._
-import scala.util.Properties.{jdkHome, javaVersion}
-import scala.tools.util.{Javap}
-import scala.annotation.tailrec
-import scala.collection.mutable.ListBuffer
-import scala.concurrent.ops
-import scala.tools.nsc.util._
-import scala.tools.nsc.interpreter._
-import scala.tools.nsc.io.{File, Directory}
-import scala.reflect.NameTransformer._
-import scala.tools.nsc.util.ScalaClassLoader._
-import scala.tools.util._
-import scala.language.{implicitConversions, existentials, postfixOps}
-import scala.reflect.{ClassTag, classTag}
-import scala.tools.reflect.StdRuntimeTags._
-
-import java.lang.{Class => jClass}
-import scala.reflect.api.{Mirror, TypeCreator, Universe => ApiUniverse}
-
-import org.apache.spark.Logging
-import org.apache.spark.SparkConf
-import org.apache.spark.SparkContext
-import org.apache.spark.util.Utils
-
-/** The Scala interactive shell.  It provides a read-eval-print loop
- *  around the Interpreter class.
- *  After instantiation, clients should call the main() method.
- *
- *  If no in0 is specified, then input will come from the console, and
- *  the class will attempt to provide input editing feature such as
- *  input history.
- *
- *  @author Moez A. Abdel-Gawad
- *  @author  Lex Spoon
- *  @version 1.2
- */
-class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter,
-               val master: Option[String])
-                extends AnyRef
-                   with LoopCommands
-                   with SparkILoopInit
-                   with Logging
-{
-  def this(in0: BufferedReader, out: JPrintWriter, master: String) = this(Some(in0), out, Some(master))
-  def this(in0: BufferedReader, out: JPrintWriter) = this(Some(in0), out, None)
-  def this() = this(None, new JPrintWriter(Console.out, true), None)
-
-  var in: InteractiveReader = _   // the input stream from which commands come
-  var settings: Settings = _
-  var intp: SparkIMain = _
-
-  @deprecated("Use `intp` instead.", "2.9.0") def interpreter = intp
-  @deprecated("Use `intp` instead.", "2.9.0") def interpreter_= (i: SparkIMain): Unit = intp = i
-
-  /** Having inherited the difficult "var-ness" of the repl instance,
-   *  I'm trying to work around it by moving operations into a class from
-   *  which it will appear a stable prefix.
-   */
-  private def onIntp[T](f: SparkIMain => T): T = f(intp)
-
-  class IMainOps[T <: SparkIMain](val intp: T) {
-    import intp._
-    import global._
-
-    def printAfterTyper(msg: => String) =
-      intp.reporter printMessage afterTyper(msg)
-
-    /** Strip NullaryMethodType artifacts. */
-    private def replInfo(sym: Symbol) = {
-      sym.info match {
-        case NullaryMethodType(restpe) if sym.isAccessor  => restpe
-        case info                                         => info
-      }
-    }
-    def echoTypeStructure(sym: Symbol) =
-      printAfterTyper("" + deconstruct.show(replInfo(sym)))
-
-    def echoTypeSignature(sym: Symbol, verbose: Boolean) = {
-      if (verbose) SparkILoop.this.echo("// Type signature")
-      printAfterTyper("" + replInfo(sym))
-
-      if (verbose) {
-        SparkILoop.this.echo("\n// Internal Type structure")
-        echoTypeStructure(sym)
-      }
-    }
-  }
-  implicit def stabilizeIMain(intp: SparkIMain) = new IMainOps[intp.type](intp)
-
-  /** TODO -
-   *  -n normalize
-   *  -l label with case class parameter names
-   *  -c complete - leave nothing out
-   */
-  private def typeCommandInternal(expr: String, verbose: Boolean): Result = {
-    onIntp { intp =>
-      val sym = intp.symbolOfLine(expr)
-      if (sym.exists) intp.echoTypeSignature(sym, verbose)
-      else ""
-    }
-  }
-
-  var sparkContext: SparkContext = _
-
-  override def echoCommandMessage(msg: String) {
-    intp.reporter printMessage msg
-  }
-
-  // def isAsync = !settings.Yreplsync.value
-  def isAsync = false
-  // lazy val power = new Power(intp, new StdReplVals(this))(tagOfStdReplVals, classTag[StdReplVals])
-  def history = in.history
-
-  /** The context class loader at the time this object was created */
-  protected val originalClassLoader = Utils.getContextOrSparkClassLoader
-
-  // classpath entries added via :cp
-  var addedClasspath: String = ""
-
-  /** A reverse list of commands to replay if the user requests a :replay */
-  var replayCommandStack: List[String] = Nil
-
-  /** A list of commands to replay if the user requests a :replay */
-  def replayCommands = replayCommandStack.reverse
-
-  /** Record a command for replay should the user request a :replay */
-  def addReplay(cmd: String) = replayCommandStack ::= cmd
-
-  def savingReplayStack[T](body: => T): T = {
-    val saved = replayCommandStack
-    try body
-    finally replayCommandStack = saved
-  }
-  def savingReader[T](body: => T): T = {
-    val saved = in
-    try body
-    finally in = saved
-  }
-
-
-  def sparkCleanUp(){
-    echo("Stopping spark context.")
-    intp.beQuietDuring {
-      command("sc.stop()")
-    }
-  }
-  /** Close the interpreter and set the var to null. */
-  def closeInterpreter() {
-    if (intp ne null) {
-      sparkCleanUp()
-      intp.close()
-      intp = null
-    }
-  }
-
-  class SparkILoopInterpreter extends SparkIMain(settings, out) {
-    outer =>
-
-    override lazy val formatting = new Formatting {
-      def prompt = SparkILoop.this.prompt
-    }
-    override protected def parentClassLoader = SparkHelper.explicitParentLoader(settings).getOrElse(classOf[SparkILoop].getClassLoader)
-  }
-
-  /** Create a new interpreter. */
-  def createInterpreter() {
-    require(settings != null)
-
-    if (addedClasspath != "") settings.classpath.append(addedClasspath)
-    val addedJars =
-      if (Utils.isWindows) {
-        // Strip any URI scheme prefix so we can add the correct path to the classpath
-        // e.g. file:/C:/my/path.jar -> C:/my/path.jar
-        SparkILoop.getAddedJars.map { jar => new URI(jar).getPath.stripPrefix("/") }
-      } else {
-        SparkILoop.getAddedJars
-      }
-    // work around for Scala bug
-    val totalClassPath = addedJars.foldLeft(
-      settings.classpath.value)((l, r) => ClassPath.join(l, r))
-    this.settings.classpath.value = totalClassPath
-
-    intp = new SparkILoopInterpreter
-  }
-
-  /** print a friendly help message */
-  def helpCommand(line: String): Result = {
-    if (line == "") helpSummary()
-    else uniqueCommand(line) match {
-      case Some(lc) => echo("\n" + lc.longHelp)
-      case _        => ambiguousError(line)
-    }
-  }
-  private def helpSummary() = {
-    val usageWidth  = commands map (_.usageMsg.length) max
-    val formatStr   = "%-" + usageWidth + "s %s %s"
-
-    echo("All commands can be abbreviated, e.g. :he instead of :help.")
-    echo("Those marked with a * have more detailed help, e.g. :help imports.\n")
-
-    commands foreach { cmd =>
-      val star = if (cmd.hasLongHelp) "*" else " "
-      echo(formatStr.format(cmd.usageMsg, star, cmd.help))
-    }
-  }
-  private def ambiguousError(cmd: String): Result = {
-    matchingCommands(cmd) match {
-      case Nil  => echo(cmd + ": no such command.  Type :help for help.")
-      case xs   => echo(cmd + " is ambiguous: did you mean " + xs.map(":" + _.name).mkString(" or ") + "?")
-    }
-    Result(true, None)
-  }
-  private def matchingCommands(cmd: String) = commands filter (_.name startsWith cmd)
-  private def uniqueCommand(cmd: String): Option[LoopCommand] = {
-    // this lets us add commands willy-nilly and only requires enough command to disambiguate
-    matchingCommands(cmd) match {
-      case List(x)  => Some(x)
-      // exact match OK even if otherwise appears ambiguous
-      case xs       => xs find (_.name == cmd)
-    }
-  }
-  private var fallbackMode = false 
-
-  private def toggleFallbackMode() {
-    val old = fallbackMode
-    fallbackMode = !old
-    System.setProperty("spark.repl.fallback", fallbackMode.toString)
-    echo(s"""
-      |Switched ${if (old) "off" else "on"} fallback mode without restarting.
-      |       If you have defined classes in the repl, it would 
-      |be good to redefine them incase you plan to use them. If you still run
-      |into issues it would be good to restart the repl and turn on `:fallback` 
-      |mode as first command.
-      """.stripMargin)
-  }
-
-  /** Show the history */
-  lazy val historyCommand = new LoopCommand("history", "show the history (optional num is commands to show)") {
-    override def usage = "[num]"
-    def defaultLines = 20
-
-    def apply(line: String): Result = {
-      if (history eq NoHistory)
-        return "No history available."
-
-      val xs      = words(line)
-      val current = history.index
-      val count   = try xs.head.toInt catch { case _: Exception => defaultLines }
-      val lines   = history.asStrings takeRight count
-      val offset  = current - lines.size + 1
-
-      for ((line, index) <- lines.zipWithIndex)
-        echo("%3d  %s".format(index + offset, line))
-    }
-  }
-
-  // When you know you are most likely breaking into the middle
-  // of a line being typed.  This softens the blow.
-  protected def echoAndRefresh(msg: String) = {
-    echo("\n" + msg)
-    in.redrawLine()
-  }
-  protected def echo(msg: String) = {
-    out println msg
-    out.flush()
-  }
-  protected def echoNoNL(msg: String) = {
-    out print msg
-    out.flush()
-  }
-
-  /** Search the history */
-  def searchHistory(_cmdline: String) {
-    val cmdline = _cmdline.toLowerCase
-    val offset  = history.index - history.size + 1
-
-    for ((line, index) <- history.asStrings.zipWithIndex ; if line.toLowerCase contains cmdline)
-      echo("%d %s".format(index + offset, line))
-  }
-
-  private var currentPrompt = Properties.shellPromptString
-  def setPrompt(prompt: String) = currentPrompt = prompt
-  /** Prompt to print when awaiting input */
-  def prompt = currentPrompt
-
-  import LoopCommand.{ cmd, nullary }
-
-  /** Standard commands */
-  lazy val standardCommands = List(
-    cmd("cp", "<path>", "add a jar or directory to the classpath", addClasspath),
-    cmd("help", "[command]", "print this summary or command-specific help", helpCommand),
-    historyCommand,
-    cmd("h?", "<string>", "search the history", searchHistory),
-    cmd("imports", "[name name ...]", "show import history, identifying sources of names", importsCommand),
-    cmd("implicits", "[-v]", "show the implicits in scope", implicitsCommand),
-    cmd("javap", "<path|class>", "disassemble a file or class name", javapCommand),
-    cmd("load", "<path>", "load and interpret a Scala file", loadCommand),
-    nullary("paste", "enter paste mode: all input up to ctrl-D compiled together", pasteCommand),
-//    nullary("power", "enable power user mode", powerCmd),
-    nullary("quit", "exit the repl", () => Result(false, None)),
-    nullary("replay", "reset execution and replay all previous commands", replay),
-    nullary("reset", "reset the repl to its initial state, forgetting all session entries", resetCommand),
-    shCommand,
-    nullary("silent", "disable/enable automatic printing of results", verbosity),
-    nullary("fallback", """
-                           |disable/enable advanced repl changes, these fix some issues but may introduce others. 
-                           |This mode will be removed once these fixes stablize""".stripMargin, toggleFallbackMode),
-    cmd("type", "[-v] <expr>", "display the type of an expression without evaluating it", typeCommand),
-    nullary("warnings", "show the suppressed warnings from the most recent line which had any", warningsCommand)
-  )
-
-  /** Power user commands */
-  lazy val powerCommands: List[LoopCommand] = List(
-    // cmd("phase", "<phase>", "set the implicit phase for power commands", phaseCommand)
-  )
-
-  // private def dumpCommand(): Result = {
-  //   echo("" + power)
-  //   history.asStrings takeRight 30 foreach echo
-  //   in.redrawLine()
-  // }
-  // private def valsCommand(): Result = power.valsDescription
-
-  private val typeTransforms = List(
-    "scala.collection.immutable." -> "immutable.",
-    "scala.collection.mutable."   -> "mutable.",
-    "scala.collection.generic."   -> "generic.",
-    "java.lang."                  -> "jl.",
-    "scala.runtime."              -> "runtime."
-  )
-
-  private def importsCommand(line: String): Result = {
-    val tokens    = words(line)
-    val handlers  = intp.languageWildcardHandlers ++ intp.importHandlers
-    val isVerbose = tokens contains "-v"
-
-    handlers.filterNot(_.importedSymbols.isEmpty).zipWithIndex foreach {
-      case (handler, idx) =>
-        val (types, terms) = handler.importedSymbols partition (_.name.isTypeName)
-        val imps           = handler.implicitSymbols
-        val found          = tokens filter (handler importsSymbolNamed _)
-        val typeMsg        = if (types.isEmpty) "" else types.size + " types"
-        val termMsg        = if (terms.isEmpty) "" else terms.size + " terms"
-        val implicitMsg    = if (imps.isEmpty) "" else imps.size + " are implicit"
-        val foundMsg       = if (found.isEmpty) "" else found.mkString(" // imports: ", ", ", "")
-        val statsMsg       = List(typeMsg, termMsg, implicitMsg) filterNot (_ == "") mkString ("(", ", ", ")")
-
-        intp.reporter.printMessage("%2d) %-30s %s%s".format(
-          idx + 1,
-          handler.importString,
-          statsMsg,
-          foundMsg
-        ))
-    }
-  }
-
-  private def implicitsCommand(line: String): Result = onIntp { intp =>
-    import intp._
-    import global._
-
-    def p(x: Any) = intp.reporter.printMessage("" + x)
-
-    // If an argument is given, only show a source with that
-    // in its name somewhere.
-    val args     = line split "\\s+"
-    val filtered = intp.implicitSymbolsBySource filter {
-      case (source, syms) =>
-        (args contains "-v") || {
-          if (line == "") (source.fullName.toString != "scala.Predef")
-          else (args exists (source.name.toString contains _))
-        }
-    }
-
-    if (filtered.isEmpty)
-      return "No implicits have been imported other than those in Predef."
-
-    filtered foreach {
-      case (source, syms) =>
-        p("/* " + syms.size + " implicit members imported from " + source.fullName + " */")
-
-        // This groups the members by where the symbol is defined
-        val byOwner = syms groupBy (_.owner)
-        val sortedOwners = byOwner.toList sortBy { case (owner, _) => afterTyper(source.info.baseClasses indexOf owner) }
-
-        sortedOwners foreach {
-          case (owner, members) =>
-            // Within each owner, we cluster results based on the final result type
-            // if there are more than a couple, and sort each cluster based on name.
-            // This is really just trying to make the 100 or so implicits imported
-            // by default into something readable.
-            val memberGroups: List[List[Symbol]] = {
-              val groups = members groupBy (_.tpe.finalResultType) toList
-              val (big, small) = groups partition (_._2.size > 3)
-              val xss = (
-                (big sortBy (_._1.toString) map (_._2)) :+
-                (small flatMap (_._2))
-              )
-
-              xss map (xs => xs sortBy (_.name.toString))
-            }
-
-            val ownerMessage = if (owner == source) " defined in " else " inherited from "
-            p("  /* " + members.size + ownerMessage + owner.fullName + " */")
-
-            memberGroups foreach { group =>
-              group foreach (s => p("  " + intp.symbolDefString(s)))
-              p("")
-            }
-        }
-        p("")
-    }
-  }
-
-  private def findToolsJar() = {
-    val jdkPath = Directory(jdkHome)
-    val jar     = jdkPath / "lib" / "tools.jar" toFile;
-
-    if (jar isFile)
-      Some(jar)
-    else if (jdkPath.isDirectory)
-      jdkPath.deepFiles find (_.name == "tools.jar")
-    else None
-  }
-  private def addToolsJarToLoader() = {
-    val cl = findToolsJar match {
-      case Some(tools) => ScalaClassLoader.fromURLs(Seq(tools.toURL), intp.classLoader)
-      case _           => intp.classLoader
-    }
-    if (Javap.isAvailable(cl)) {
-      logDebug(":javap available.")
-      cl
-    }
-    else {
-      logDebug(":javap unavailable: no tools.jar at " + jdkHome)
-      intp.classLoader
-    }
-  }
-
-  protected def newJavap() = new JavapClass(addToolsJarToLoader(), new SparkIMain.ReplStrippingWriter(intp)) {
-    override def tryClass(path: String): Array[Byte] = {
-      val hd :: rest = path split '.' toList;
-      // If there are dots in the name, the first segment is the
-      // key to finding it.
-      if (rest.nonEmpty) {
-        intp optFlatName hd match {
-          case Some(flat) =>
-            val clazz = flat :: rest mkString NAME_JOIN_STRING
-            val bytes = super.tryClass(clazz)
-            if (bytes.nonEmpty) bytes
-            else super.tryClass(clazz + MODULE_SUFFIX_STRING)
-          case _          => super.tryClass(path)
-        }
-      }
-      else {
-        // Look for Foo first, then Foo$, but if Foo$ is given explicitly,
-        // we have to drop the $ to find object Foo, then tack it back onto
-        // the end of the flattened name.
-        def className  = intp flatName path
-        def moduleName = (intp flatName path.stripSuffix(MODULE_SUFFIX_STRING)) + MODULE_SUFFIX_STRING
-
-        val bytes = super.tryClass(className)
-        if (bytes.nonEmpty) bytes
-        else super.tryClass(moduleName)
-      }
-    }
-  }
-  // private lazy val javap = substituteAndLog[Javap]("javap", NoJavap)(newJavap())
-  private lazy val javap =
-    try newJavap()
-    catch { case _: Exception => null }
-
-  // Still todo: modules.
-  private def typeCommand(line0: String): Result = {
-    line0.trim match {
-      case ""                      => ":type [-v] <expression>"
-      case s if s startsWith "-v " => typeCommandInternal(s stripPrefix "-v " trim, true)
-      case s                       => typeCommandInternal(s, false)
-    }
-  }
-
-  private def warningsCommand(): Result = {
-    if (intp.lastWarnings.isEmpty)
-      "Can't find any cached warnings."
-    else
-      intp.lastWarnings foreach { case (pos, msg) => intp.reporter.warning(pos, msg) }
-  }
-
-  private def javapCommand(line: String): Result = {
-    if (javap == null)
-      ":javap unavailable, no tools.jar at %s.  Set JDK_HOME.".format(jdkHome)
-    else if (javaVersion startsWith "1.7")
-      ":javap not yet working with java 1.7"
-    else if (line == "")
-      ":javap [-lcsvp] [path1 path2 ...]"
-    else
-      javap(words(line)) foreach { res =>
-        if (res.isError) return "Failed: " + res.value
-        else res.show()
-      }
-  }
-
-  private def wrapCommand(line: String): Result = {
-    def failMsg = "Argument to :wrap must be the name of a method with signature [T](=> T): T"
-    onIntp { intp =>
-      import intp._
-      import global._
-
-      words(line) match {
-        case Nil            =>
-          intp.executionWrapper match {
-            case ""   => "No execution wrapper is set."
-            case s    => "Current execution wrapper: " + s
-          }
-        case "clear" :: Nil =>
-          intp.executionWrapper match {
-            case ""   => "No execution wrapper is set."
-            case s    => intp.clearExecutionWrapper() ; "Cleared execution wrapper."
-          }
-        case wrapper :: Nil =>
-          intp.typeOfExpression(wrapper) match {
-            case PolyType(List(targ), MethodType(List(arg), restpe)) =>
-              intp setExecutionWrapper intp.pathToTerm(wrapper)
-              "Set wrapper to '" + wrapper + "'"
-            case tp =>
-              failMsg + "\nFound: <unknown>"
-          }
-        case _ => failMsg
-      }
-    }
-  }
-
-  private def pathToPhaseWrapper = intp.pathToTerm("$r") + ".phased.atCurrent"
-  // private def phaseCommand(name: String): Result = {
-  //   val phased: Phased = power.phased
-  //   import phased.NoPhaseName
-
-  //   if (name == "clear") {
-  //     phased.set(NoPhaseName)
-  //     intp.clearExecutionWrapper()
-  //     "Cleared active phase."
-  //   }
-  //   else if (name == "") phased.get match {
-  //     case NoPhaseName => "Usage: :phase <expr> (e.g. typer, erasure.next, erasure+3)"
-  //     case ph          => "Active phase is '%s'.  (To clear, :phase clear)".format(phased.get)
-  //   }
-  //   else {
-  //     val what = phased.parse(name)
-  //     if (what.isEmpty || !phased.set(what))
-  //       "'" + name + "' does not appear to represent a valid phase."
-  //     else {
-  //       intp.setExecutionWrapper(pathToPhaseWrapper)
-  //       val activeMessage =
-  //         if (what.toString.length == name.length) "" + what
-  //         else "%s (%s)".format(what, name)
-
-  //       "Active phase is now: " + activeMessage
-  //     }
-  //   }
-  // }
-
-  /** Available commands */
-  def commands: List[LoopCommand] = standardCommands /*++ (
-    if (isReplPower) powerCommands else Nil
-  )*/
-
-  private val replayQuestionMessage =
-    """|That entry seems to have slain the compiler.  Shall I replay
-       |your session? I can re-run each line except the last one.
-       |[y/n]
-    """.trim.stripMargin
-
-  private def crashRecovery(ex: Throwable): Boolean = {
-    echo(ex.toString)
-    ex match {
-      case _: NoSuchMethodError | _: NoClassDefFoundError =>
-        echo("\nUnrecoverable error.")
-        throw ex
-      case _  =>
-        def fn(): Boolean =
-          try in.readYesOrNo(replayQuestionMessage, { echo("\nYou must enter y or n.") ; fn() })
-          catch { case _: RuntimeException => false }
-
-        if (fn()) replay()
-        else echo("\nAbandoning crashed session.")
-    }
-    true
-  }
-
-  /** The main read-eval-print loop for the repl.  It calls
-   *  command() for each line of input, and stops when
-   *  command() returns false.
-   */
-  def loop() {
-    def readOneLine() = {
-      out.flush()
-      in readLine prompt
-    }
-    // return false if repl should exit
-    def processLine(line: String): Boolean = {
-      if (isAsync) {
-        if (!awaitInitialized()) return false
-        runThunks()
-      }
-      if (line eq null) false               // assume null means EOF
-      else command(line) match {
-        case Result(false, _)           => false
-        case Result(_, Some(finalLine)) => addReplay(finalLine) ; true
-        case _                          => true
-      }
-    }
-    def innerLoop() {
-      val shouldContinue = try {
-        processLine(readOneLine())
-      } catch {case t: Throwable => crashRecovery(t)}
-      if (shouldContinue)
-        innerLoop()
-    }
-    innerLoop()
-  }
-
-  /** interpret all lines from a specified file */
-  def interpretAllFrom(file: File) {
-    savingReader {
-      savingReplayStack {
-        file applyReader { reader =>
-          in = SimpleReader(reader, out, false)
-          echo("Loading " + file + "...")
-          loop()
-        }
-      }
-    }
-  }
-
-  /** create a new interpreter and replay the given commands */
-  def replay() {
-    reset()
-    if (replayCommandStack.isEmpty)
-      echo("Nothing to replay.")
-    else for (cmd <- replayCommands) {
-      echo("Replaying: " + cmd)  // flush because maybe cmd will have its own output
-      command(cmd)
-      echo("")
-    }
-  }
-  def resetCommand() {
-    echo("Resetting repl state.")
-    if (replayCommandStack.nonEmpty) {
-      echo("Forgetting this session history:\n")
-      replayCommands foreach echo
-      echo("")
-      replayCommandStack = Nil
-    }
-    if (intp.namedDefinedTerms.nonEmpty)
-      echo("Forgetting all expression results and named terms: " + intp.namedDefinedTerms.mkString(", "))
-    if (intp.definedTypes.nonEmpty)
-      echo("Forgetting defined types: " + intp.definedTypes.mkString(", "))
-
-    reset()
-  }
-
-  def reset() {
-    intp.reset()
-    // unleashAndSetPhase()
-  }
-
-  /** fork a shell and run a command */
-  lazy val shCommand = new LoopCommand("sh", "run a shell command (result is implicitly => List[String])") {
-    override def usage = "<command line>"
-    def apply(line: String): Result = line match {
-      case ""   => showUsage()
-      case _    =>
-        val toRun = classOf[ProcessResult].getName + "(" + string2codeQuoted(line) + ")"
-        intp interpret toRun
-        ()
-    }
-  }
-
-  def withFile(filename: String)(action: File => Unit) {
-    val f = File(filename)
-
-    if (f.exists) action(f)
-    else echo("That file does not exist")
-  }
-
-  def loadCommand(arg: String) = {
-    var shouldReplay: Option[String] = None
-    withFile(arg)(f => {
-      interpretAllFrom(f)
-      shouldReplay = Some(":load " + arg)
-    })
-    Result(true, shouldReplay)
-  }
-
-  def addAllClasspath(args: Seq[String]): Unit = {
-    var added = false
-    var totalClasspath = ""
-    for (arg <- args) {
-      val f = File(arg).normalize
-      if (f.exists) {
-        added = true
-        addedClasspath = ClassPath.join(addedClasspath, f.path)
-        totalClasspath = ClassPath.join(settings.classpath.value, addedClasspath)
-        intp.addUrlsToClassPath(f.toURI.toURL)
-        sparkContext.addJar(f.toURI.toURL.getPath)
-      }
-    }
-  }
-
-  def addClasspath(arg: String): Unit = {
-    val f = File(arg).normalize
-    if (f.exists) {
-      addedClasspath = ClassPath.join(addedClasspath, f.path)
-      intp.addUrlsToClassPath(f.toURI.toURL)
-      sparkContext.addJar(f.toURI.toURL.getPath)
-      echo("Added '%s'.  Your new classpath is:\n\"%s\"".format(f.path, intp.global.classPath.asClasspathString))
-    }
-    else echo("The path '" + f + "' doesn't seem to exist.")
-  }
-
-
-  def powerCmd(): Result = {
-    if (isReplPower) "Already in power mode."
-    else enablePowerMode(false)
-  }
-
-  def enablePowerMode(isDuringInit: Boolean) = {
-    // replProps.power setValue true
-    // unleashAndSetPhase()
-    // asyncEcho(isDuringInit, power.banner)
-  }
-  // private def unleashAndSetPhase() {
-//     if (isReplPower) {
-// //      power.unleash()
-//       // Set the phase to "typer"
-//       intp beSilentDuring phaseCommand("typer")
-//     }
-//   }
-
-  def asyncEcho(async: Boolean, msg: => String) {
-    if (async) asyncMessage(msg)
-    else echo(msg)
-  }
-
-  def verbosity() = {
-    // val old = intp.printResults
-    // intp.printResults = !old
-    // echo("Switched " + (if (old) "off" else "on") + " result printing.")
-  }
-
-  /** Run one command submitted by the user.  Two values are returned:
-    * (1) whether to keep running, (2) the line to record for replay,
-    * if any. */
-  def command(line: String): Result = {
-    if (line startsWith ":") {
-      val cmd = line.tail takeWhile (x => !x.isWhitespace)
-      uniqueCommand(cmd) match {
-        case Some(lc) => lc(line.tail stripPrefix cmd dropWhile (_.isWhitespace))
-        case _        => ambiguousError(cmd)
-      }
-    }
-    else if (intp.global == null) Result(false, None)  // Notice failure to create compiler
-    else Result(true, interpretStartingWith(line))
-  }
-
-  private def readWhile(cond: String => Boolean) = {
-    Iterator continually in.readLine("") takeWhile (x => x != null && cond(x))
-  }
-
-  def pasteCommand(): Result = {
-    echo("// Entering paste mode (ctrl-D to finish)\n")
-    val code = readWhile(_ => true) mkString "\n"
-    echo("\n// Exiting paste mode, now interpreting.\n")
-    intp interpret code
-    ()
-  }
-
-  private object paste extends Pasted {
-    val ContinueString = "     | "
-    val PromptString   = "scala> "
-
-    def interpret(line: String): Unit = {
-      echo(line.trim)
-      intp interpret line
-      echo("")
-    }
-
-    def transcript(start: String) = {
-      echo("\n// Detected repl transcript paste: ctrl-D to finish.\n")
-      apply(Iterator(start) ++ readWhile(_.trim != PromptString.trim))
-    }
-  }
-  import paste.{ ContinueString, PromptString }
-
-  /** Interpret expressions starting with the first line.
-    * Read lines until a complete compilation unit is available
-    * or until a syntax error has been seen.  If a full unit is
-    * read, go ahead and interpret it.  Return the full string
-    * to be recorded for replay, if any.
-    */
-  def interpretStartingWith(code: String): Option[String] = {
-    // signal completion non-completion input has been received
-    in.completion.resetVerbosity()
-
-    def reallyInterpret = {
-      val reallyResult = intp.interpret(code)
-      (reallyResult, reallyResult match {
-        case IR.Error       => None
-        case IR.Success     => Some(code)
-        case IR.Incomplete  =>
-          if (in.interactive && code.endsWith("\n\n")) {
-            echo("You typed two blank lines.  Starting a new command.")
-            None
-          }
-          else in.readLine(ContinueString) match {
-            case null =>
-              // we know compilation is going to fail since we're at EOF and the
-              // parser thinks the input is still incomplete, but since this is
-              // a file being read non-interactively we want to fail.  So we send
-              // it straight to the compiler for the nice error message.
-              intp.compileString(code)
-              None
-
-            case line => interpretStartingWith(code + "\n" + line)
-          }
-      })
-    }
-
-    /** Here we place ourselves between the user and the interpreter and examine
-     *  the input they are ostensibly submitting.  We intervene in several cases:
-     *
-     *  1) If the line starts with "scala> " it is assumed to be an interpreter paste.
-     *  2) If the line starts with "." (but not ".." or "./") it is treated as an invocation
-     *     on the previous result.
-     *  3) If the Completion object's execute returns Some(_), we inject that value
-     *     and avoid the interpreter, as it's likely not valid scala code.
-     */
-    if (code == "") None
-    else if (!paste.running && code.trim.startsWith(PromptString)) {
-      paste.transcript(code)
-      None
-    }
-    else if (Completion.looksLikeInvocation(code) && intp.mostRecentVar != "") {
-      interpretStartingWith(intp.mostRecentVar + code)
-    }
-    else if (code.trim startsWith "//") {
-      // line comment, do nothing
-      None
-    }
-    else
-      reallyInterpret._2
-  }
-
-  // runs :load `file` on any files passed via -i
-  def loadFiles(settings: Settings) = settings match {
-    case settings: SparkRunnerSettings =>
-      for (filename <- settings.loadfiles.value) {
-        val cmd = ":load " + filename
-        command(cmd)
-        addReplay(cmd)
-        echo("")
-      }
-    case _ =>
-  }
-
-  /** Tries to create a JLineReader, falling back to SimpleReader:
-   *  unless settings or properties are such that it should start
-   *  with SimpleReader.
-   */
-  def chooseReader(settings: Settings): InteractiveReader = {
-    if (settings.Xnojline.value || Properties.isEmacsShell)
-      SimpleReader()
-    else try new SparkJLineReader(
-      if (settings.noCompletion.value) NoCompletion
-      else new SparkJLineCompletion(intp)
-    )
-    catch {
-      case ex @ (_: Exception | _: NoClassDefFoundError) =>
-        echo("Failed to created SparkJLineReader: " + ex + "\nFalling back to SimpleReader.")
-        SimpleReader()
-    }
-  }
-
-  val u: scala.reflect.runtime.universe.type = scala.reflect.runtime.universe
-  val m = u.runtimeMirror(Utils.getSparkClassLoader)
-  private def tagOfStaticClass[T: ClassTag]: u.TypeTag[T] =
-    u.TypeTag[T](
-      m,
-      new TypeCreator {
-        def apply[U <: ApiUniverse with Singleton](m: Mirror[U]): U # Type =
-          m.staticClass(classTag[T].runtimeClass.getName).toTypeConstructor.asInstanceOf[U # Type]
-      })
-
-  def process(settings: Settings): Boolean = savingContextLoader {
-    if (getMaster() == "yarn-client") System.setProperty("SPARK_YARN_MODE", "true")
-
-    this.settings = settings
-    createInterpreter()
-
-    // sets in to some kind of reader depending on environmental cues
-    in = in0 match {
-      case Some(reader) => SimpleReader(reader, out, true)
-      case None         =>
-        // some post-initialization
-        chooseReader(settings) match {
-          case x: SparkJLineReader => addThunk(x.consoleReader.postInit) ; x
-          case x                   => x
-        }
-    }
-    lazy val tagOfSparkIMain = tagOfStaticClass[org.apache.spark.repl.SparkIMain]
-    // Bind intp somewhere out of the regular namespace where
-    // we can get at it in generated code.
-    addThunk(intp.quietBind(NamedParam[SparkIMain]("$intp", intp)(tagOfSparkIMain, classTag[SparkIMain])))
-    addThunk({
-      import scala.tools.nsc.io._
-      import Properties.userHome
-      import scala.compat.Platform.EOL
-      val autorun = replProps.replAutorunCode.option flatMap (f => io.File(f).safeSlurp())
-      if (autorun.isDefined) intp.quietRun(autorun.get)
-    })
-
-    addThunk(printWelcome())
-    addThunk(initializeSpark())
-
-    // it is broken on startup; go ahead and exit
-    if (intp.reporter.hasErrors)
-      return false
-
-    // This is about the illusion of snappiness.  We call initialize()
-    // which spins off a separate thread, then print the prompt and try
-    // our best to look ready.  The interlocking lazy vals tend to
-    // inter-deadlock, so we break the cycle with a single asynchronous
-    // message to an actor.
-    if (isAsync) {
-      intp initialize initializedCallback()
-      createAsyncListener() // listens for signal to run postInitialization
-    }
-    else {
-      intp.initializeSynchronous()
-      postInitialization()
-    }
-    // printWelcome()
-
-    loadFiles(settings)
-
-    try loop()
-    catch AbstractOrMissingHandler()
-    finally closeInterpreter()
-
-    true
-  }
-
-  def createSparkContext(): SparkContext = {
-    val execUri = System.getenv("SPARK_EXECUTOR_URI")
-    val jars = SparkILoop.getAddedJars
-    val conf = new SparkConf()
-      .setMaster(getMaster())
-      .setAppName("Spark shell")
-      .setJars(jars)
-      .set("spark.repl.class.uri", intp.classServer.uri)
-    if (execUri != null) {
-      conf.set("spark.executor.uri", execUri)
-    }
-    sparkContext = new SparkContext(conf)
-    logInfo("Created spark context..")
-    sparkContext
-  }
-
-  private def getMaster(): String = {
-    val master = this.master match {
-      case Some(m) => m
-      case None =>
-        val envMaster = sys.env.get("MASTER")
-        val propMaster = sys.props.get("spark.master")
-        propMaster.orElse(envMaster).getOrElse("local[*]")
-    }
-    master
-  }
-
-  /** process command-line arguments and do as they request */
-  def process(args: Array[String]): Boolean = {
-    val command = new SparkCommandLine(args.toList, msg => echo(msg))
-    def neededHelp(): String =
-      (if (command.settings.help.value) command.usageMsg + "\n" else "") +
-      (if (command.settings.Xhelp.value) command.xusageMsg + "\n" else "")
-
-    // if they asked for no help and command is valid, we call the real main
-    neededHelp() match {
-      case ""     => command.ok && process(command.settings)
-      case help   => echoNoNL(help) ; true
-    }
-  }
-
-  @deprecated("Use `process` instead", "2.9.0")
-  def main(settings: Settings): Unit = process(settings)
-}
-
-object SparkILoop {
-  implicit def loopToInterpreter(repl: SparkILoop): SparkIMain = repl.intp
-  private def echo(msg: String) = Console println msg
-
-  def getAddedJars: Array[String] = {
-    val envJars = sys.env.get("ADD_JARS")
-    val propJars = sys.props.get("spark.jars").flatMap { p =>
-      if (p == "") None else Some(p)
-    }
-    val jars = propJars.orElse(envJars).getOrElse("")
-    Utils.resolveURIs(jars).split(",").filter(_.nonEmpty)
-  }
-
-  // Designed primarily for use by test code: take a String with a
-  // bunch of code, and prints out a transcript of what it would look
-  // like if you'd just typed it into the repl.
-  def runForTranscript(code: String, settings: Settings): String = {
-    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
-
-    stringFromStream { ostream =>
-      Console.withOut(ostream) {
-        val output = new JPrintWriter(new OutputStreamWriter(ostream), true) {
-          override def write(str: String) = {
-            // completely skip continuation lines
-            if (str forall (ch => ch.isWhitespace || ch == '|')) ()
-            // print a newline on empty scala prompts
-            else if ((str contains '\n') && (str.trim == "scala> ")) super.write("\n")
-            else super.write(str)
-          }
-        }
-        val input = new BufferedReader(new StringReader(code)) {
-          override def readLine(): String = {
-            val s = super.readLine()
-            // helping out by printing the line being interpreted.
-            if (s != null)
-              output.println(s)
-            s
-          }
-        }
-        val repl = new SparkILoop(input, output)
-
-        if (settings.classpath.isDefault)
-          settings.classpath.value = sys.props("java.class.path")
-
-        getAddedJars.foreach(settings.classpath.append(_))
-
-        repl process settings
-      }
-    }
-  }
-
-  /** Creates an interpreter loop with default settings and feeds
-   *  the given code to it as input.
-   */
-  def run(code: String, sets: Settings = new Settings): String = {
-    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
-
-    stringFromStream { ostream =>
-      Console.withOut(ostream) {
-        val input    = new BufferedReader(new StringReader(code))
-        val output   = new JPrintWriter(new OutputStreamWriter(ostream), true)
-        val repl     = new ILoop(input, output)
-
-        if (sets.classpath.isDefault)
-          sets.classpath.value = sys.props("java.class.path")
-
-        repl process sets
-      }
-    }
-  }
-  def run(lines: List[String]): String = run(lines map (_ + "\n") mkString)
-}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala b/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala
deleted file mode 100644
index 646c68e60c2e9..0000000000000
--- a/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala
+++ /dev/null
@@ -1,1445 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author  Martin Odersky
- */
-
-package org.apache.spark.repl
-
-import java.io.File
-
-import scala.tools.nsc._
-import scala.tools.nsc.backend.JavaPlatform
-import scala.tools.nsc.interpreter._
-
-import Predef.{ println => _, _ }
-import scala.tools.nsc.util.{MergedClassPath, stringFromWriter, ScalaClassLoader, stackTraceString}
-import scala.reflect.internal.util._
-import java.net.URL
-import scala.sys.BooleanProp
-import io.{AbstractFile, PlainFile, VirtualDirectory}
-
-import reporters._
-import symtab.Flags
-import scala.reflect.internal.Names
-import scala.tools.util.PathResolver
-import ScalaClassLoader.URLClassLoader
-import scala.tools.nsc.util.Exceptional.unwrap
-import scala.collection.{ mutable, immutable }
-import scala.util.control.Exception.{ ultimately }
-import SparkIMain._
-import java.util.concurrent.Future
-import typechecker.Analyzer
-import scala.language.implicitConversions
-import scala.reflect.runtime.{ universe => ru }
-import scala.reflect.{ ClassTag, classTag }
-import scala.tools.reflect.StdRuntimeTags._
-import scala.util.control.ControlThrowable
-
-import org.apache.spark.{Logging, HttpServer, SecurityManager, SparkConf}
-import org.apache.spark.util.Utils
-
-// /** directory to save .class files to */
-// private class ReplVirtualDirectory(out: JPrintWriter) extends VirtualDirectory("((memory))", None) {
-//   private def pp(root: AbstractFile, indentLevel: Int) {
-//     val spaces = "    " * indentLevel
-//     out.println(spaces + root.name)
-//     if (root.isDirectory)
-//       root.toList sortBy (_.name) foreach (x => pp(x, indentLevel + 1))
-//   }
-//   // print the contents hierarchically
-//   def show() = pp(this, 0)
-// }
-
-  /** An interpreter for Scala code.
-   *
-   *  The main public entry points are compile(), interpret(), and bind().
-   *  The compile() method loads a complete Scala file.  The interpret() method
-   *  executes one line of Scala code at the request of the user.  The bind()
-   *  method binds an object to a variable that can then be used by later
-   *  interpreted code.
-   *
-   *  The overall approach is based on compiling the requested code and then
-   *  using a Java classloader and Java reflection to run the code
-   *  and access its results.
-   *
-   *  In more detail, a single compiler instance is used
-   *  to accumulate all successfully compiled or interpreted Scala code.  To
-   *  "interpret" a line of code, the compiler generates a fresh object that
-   *  includes the line of code and which has public member(s) to export
-   *  all variables defined by that code.  To extract the result of an
-   *  interpreted line to show the user, a second "result object" is created
-   *  which imports the variables exported by the above object and then
-   *  exports members called "$eval" and "$print". To accomodate user expressions
-   *  that read from variables or methods defined in previous statements, "import"
-   *  statements are used.
-   *
-   *  This interpreter shares the strengths and weaknesses of using the
-   *  full compiler-to-Java.  The main strength is that interpreted code
-   *  behaves exactly as does compiled code, including running at full speed.
-   *  The main weakness is that redefining classes and methods is not handled
-   *  properly, because rebinding at the Java level is technically difficult.
-   *
-   *  @author Moez A. Abdel-Gawad
-   *  @author Lex Spoon
-   */
-  class SparkIMain(
-      initialSettings: Settings,
-      val out: JPrintWriter,
-      propagateExceptions: Boolean = false)
-    extends SparkImports with Logging { imain =>
-
-    val conf = new SparkConf()
-
-    val SPARK_DEBUG_REPL: Boolean = (System.getenv("SPARK_DEBUG_REPL") == "1")
-    /** Local directory to save .class files too */
-    lazy val outputDir = {
-      val tmp = System.getProperty("java.io.tmpdir")
-      val rootDir = conf.get("spark.repl.classdir",  tmp)
-      Utils.createTempDir(rootDir)
-    }
-    if (SPARK_DEBUG_REPL) {
-      echo("Output directory: " + outputDir)
-    }
-
-    val virtualDirectory                              = new PlainFile(outputDir) // "directory" for classfiles
-    /** Jetty server that will serve our classes to worker nodes */
-    val classServerPort                               = conf.getInt("spark.replClassServer.port", 0)
-    val classServer                                   = new HttpServer(outputDir, new SecurityManager(conf), classServerPort, "HTTP class server")
-    private var currentSettings: Settings             = initialSettings
-    var printResults                                  = true      // whether to print result lines
-    var totalSilence                                  = false     // whether to print anything
-    private var _initializeComplete                   = false     // compiler is initialized
-    private var _isInitialized: Future[Boolean]       = null      // set up initialization future
-    private var bindExceptions                        = true      // whether to bind the lastException variable
-    private var _executionWrapper                     = ""        // code to be wrapped around all lines
-
-
-    // Start the classServer and store its URI in a spark system property
-    // (which will be passed to executors so that they can connect to it)
-    classServer.start()
-    if (SPARK_DEBUG_REPL) {
-      echo("Class server started, URI = " + classServer.uri)
-    }
-
-    /** We're going to go to some trouble to initialize the compiler asynchronously.
-     *  It's critical that nothing call into it until it's been initialized or we will
-     *  run into unrecoverable issues, but the perceived repl startup time goes
-     *  through the roof if we wait for it.  So we initialize it with a future and
-     *  use a lazy val to ensure that any attempt to use the compiler object waits
-     *  on the future.
-     */
-    private var _classLoader: AbstractFileClassLoader = null                              // active classloader
-    private val _compiler: Global                     = newCompiler(settings, reporter)   // our private compiler
-
-    private trait ExposeAddUrl extends URLClassLoader { def addNewUrl(url: URL) = this.addURL(url) }
-    private var _runtimeClassLoader: URLClassLoader with ExposeAddUrl = null              // wrapper exposing addURL
-
-    private val nextReqId = {
-      var counter = 0
-      () => { counter += 1 ; counter }
-    }
-
-    def compilerClasspath: Seq[URL] = (
-      if (isInitializeComplete) global.classPath.asURLs
-      else new PathResolver(settings).result.asURLs  // the compiler's classpath
-      )
-    def settings = currentSettings
-    def mostRecentLine = prevRequestList match {
-      case Nil      => ""
-      case req :: _ => req.originalLine
-    }
-    // Run the code body with the given boolean settings flipped to true.
-    def withoutWarnings[T](body: => T): T = beQuietDuring {
-      val saved = settings.nowarn.value
-      if (!saved)
-        settings.nowarn.value = true
-
-      try body
-      finally if (!saved) settings.nowarn.value = false
-    }
-
-    /** construct an interpreter that reports to Console */
-    def this(settings: Settings) = this(settings, new NewLinePrintWriter(new ConsoleWriter, true))
-    def this() = this(new Settings())
-
-    lazy val repllog: Logger = new Logger {
-      val out: JPrintWriter = imain.out
-      val isInfo: Boolean  = BooleanProp keyExists "scala.repl.info"
-      val isDebug: Boolean = BooleanProp keyExists "scala.repl.debug"
-      val isTrace: Boolean = BooleanProp keyExists "scala.repl.trace"
-    }
-    lazy val formatting: Formatting = new Formatting {
-      val prompt = Properties.shellPromptString
-    }
-    lazy val reporter: ConsoleReporter = new SparkIMain.ReplReporter(this)
-
-    import formatting._
-    import reporter.{ printMessage, withoutTruncating }
-
-    // This exists mostly because using the reporter too early leads to deadlock.
-    private def echo(msg: String) { Console println msg }
-    private def _initSources = List(new BatchSourceFile("<init>", "class $repl_$init { }"))
-    private def _initialize() = {
-      try {
-        // todo. if this crashes, REPL will hang
-        new _compiler.Run() compileSources _initSources
-        _initializeComplete = true
-        true
-      }
-      catch AbstractOrMissingHandler()
-    }
-    private def tquoted(s: String) = "\"\"\"" + s + "\"\"\""
-
-    // argument is a thunk to execute after init is done
-    def initialize(postInitSignal: => Unit) {
-      synchronized {
-        if (_isInitialized == null) {
-          _isInitialized = io.spawn {
-            try _initialize()
-            finally postInitSignal
-          }
-        }
-      }
-    }
-    def initializeSynchronous(): Unit = {
-      if (!isInitializeComplete) {
-        _initialize()
-        assert(global != null, global)
-      }
-    }
-    def isInitializeComplete = _initializeComplete
-
-    /** the public, go through the future compiler */
-    lazy val global: Global = {
-      if (isInitializeComplete) _compiler
-      else {
-        // If init hasn't been called yet you're on your own.
-        if (_isInitialized == null) {
-          logWarning("Warning: compiler accessed before init set up.  Assuming no postInit code.")
-          initialize(())
-        }
-        //       // blocks until it is ; false means catastrophic failure
-        if (_isInitialized.get()) _compiler
-        else null
-      }
-    }
-    @deprecated("Use `global` for access to the compiler instance.", "2.9.0")
-    lazy val compiler: global.type = global
-
-    import global._
-    import definitions.{ScalaPackage, JavaLangPackage, termMember, typeMember}
-    import rootMirror.{RootClass, getClassIfDefined, getModuleIfDefined, getRequiredModule, getRequiredClass}
-
-    implicit class ReplTypeOps(tp: Type) {
-      def orElse(other: => Type): Type    = if (tp ne NoType) tp else other
-      def andAlso(fn: Type => Type): Type = if (tp eq NoType) tp else fn(tp)
-    }
-
-    // TODO: If we try to make naming a lazy val, we run into big time
-    // scalac unhappiness with what look like cycles.  It has not been easy to
-    // reduce, but name resolution clearly takes different paths.
-    object naming extends {
-      val global: imain.global.type = imain.global
-    } with Naming {
-      // make sure we don't overwrite their unwisely named res3 etc.
-      def freshUserTermName(): TermName = {
-        val name = newTermName(freshUserVarName())
-        if (definedNameMap contains name) freshUserTermName()
-        else name
-      }
-      def isUserTermName(name: Name) = isUserVarName("" + name)
-      def isInternalTermName(name: Name) = isInternalVarName("" + name)
-    }
-    import naming._
-
-    object deconstruct extends {
-      val global: imain.global.type = imain.global
-    } with StructuredTypeStrings
-
-    lazy val memberHandlers = new {
-      val intp: imain.type = imain
-    } with SparkMemberHandlers
-    import memberHandlers._
-
-    /** Temporarily be quiet */
-    def beQuietDuring[T](body: => T): T = {
-      val saved = printResults
-      printResults = false
-      try body
-      finally printResults = saved
-    }
-    def beSilentDuring[T](operation: => T): T = {
-      val saved = totalSilence
-      totalSilence = true
-      try operation
-      finally totalSilence = saved
-    }
-
-    def quietRun[T](code: String) = beQuietDuring(interpret(code))
-
-
-     private def logAndDiscard[T](label: String, alt: => T): PartialFunction[Throwable, T] = {
-      case t: ControlThrowable => throw t
-      case t: Throwable        =>
-        logDebug(label + ": " + unwrap(t))
-        logDebug(stackTraceString(unwrap(t)))
-      alt
-    }
-    /** takes AnyRef because it may be binding a Throwable or an Exceptional */
-
-    private def withLastExceptionLock[T](body: => T, alt: => T): T = {
-      assert(bindExceptions, "withLastExceptionLock called incorrectly.")
-      bindExceptions = false
-
-      try     beQuietDuring(body)
-      catch   logAndDiscard("withLastExceptionLock", alt)
-      finally bindExceptions = true
-    }
-
-    def executionWrapper = _executionWrapper
-    def setExecutionWrapper(code: String) = _executionWrapper = code
-    def clearExecutionWrapper() = _executionWrapper = ""
-
-    /** interpreter settings */
-    lazy val isettings = new SparkISettings(this)
-
-    /** Instantiate a compiler.  Overridable. */
-    protected def newCompiler(settings: Settings, reporter: Reporter): ReplGlobal = {
-      settings.outputDirs setSingleOutput virtualDirectory
-      settings.exposeEmptyPackage.value = true
-      new Global(settings, reporter) with ReplGlobal {
-        override def toString: String = "<global>"
-      }
-    }
-
-    /**
-     * Adds any specified jars to the compile and runtime classpaths.
-     *
-     * @note Currently only supports jars, not directories
-     * @param urls The list of items to add to the compile and runtime classpaths
-     */
-    def addUrlsToClassPath(urls: URL*): Unit = {
-      new Run // Needed to force initialization of "something" to correctly load Scala classes from jars
-      urls.foreach(_runtimeClassLoader.addNewUrl) // Add jars/classes to runtime for execution
-      updateCompilerClassPath(urls: _*)           // Add jars/classes to compile time for compiling
-    }
-
-    protected def updateCompilerClassPath(urls: URL*): Unit = {
-      require(!global.forMSIL) // Only support JavaPlatform
-
-      val platform = global.platform.asInstanceOf[JavaPlatform]
-
-      val newClassPath = mergeUrlsIntoClassPath(platform, urls: _*)
-
-      // NOTE: Must use reflection until this is exposed/fixed upstream in Scala
-      val fieldSetter = platform.getClass.getMethods
-        .find(_.getName.endsWith("currentClassPath_$eq")).get
-      fieldSetter.invoke(platform, Some(newClassPath))
-
-      // Reload all jars specified into our compiler
-      global.invalidateClassPathEntries(urls.map(_.getPath): _*)
-    }
-
-    protected def mergeUrlsIntoClassPath(platform: JavaPlatform, urls: URL*): MergedClassPath[AbstractFile] = {
-      // Collect our new jars/directories and add them to the existing set of classpaths
-      val allClassPaths = (
-        platform.classPath.asInstanceOf[MergedClassPath[AbstractFile]].entries ++
-        urls.map(url => {
-          platform.classPath.context.newClassPath(
-            if (url.getProtocol == "file") {
-              val f = new File(url.getPath)
-              if (f.isDirectory)
-                io.AbstractFile.getDirectory(f)
-              else
-                io.AbstractFile.getFile(f)
-            } else {
-              io.AbstractFile.getURL(url)
-            }
-          )
-        })
-      ).distinct
-
-      // Combine all of our classpaths (old and new) into one merged classpath
-      new MergedClassPath(allClassPaths, platform.classPath.context)
-    }
-
-    /** Parent classloader.  Overridable. */
-    protected def parentClassLoader: ClassLoader =
-      SparkHelper.explicitParentLoader(settings).getOrElse( this.getClass.getClassLoader() )
-
-    /* A single class loader is used for all commands interpreted by this Interpreter.
-     It would also be possible to create a new class loader for each command
-     to interpret.  The advantages of the current approach are:
-
-    - Expressions are only evaluated one time.  This is especially
-    significant for I/O, e.g. "val x = Console.readLine"
-
-    The main disadvantage is:
-
-    - Objects, classes, and methods cannot be rebound.  Instead, definitions
-    shadow the old ones, and old code objects refer to the old
-    definitions.
-    */
-    def resetClassLoader() = {
-      logDebug("Setting new classloader: was " + _classLoader)
-      _classLoader = null
-      ensureClassLoader()
-    }
-    final def ensureClassLoader() {
-      if (_classLoader == null)
-        _classLoader = makeClassLoader()
-    }
-    def classLoader: AbstractFileClassLoader = {
-      ensureClassLoader()
-      _classLoader
-    }
-    private class TranslatingClassLoader(parent: ClassLoader) extends AbstractFileClassLoader(virtualDirectory, parent) {
-      /** Overridden here to try translating a simple name to the generated
-       *  class name if the original attempt fails.  This method is used by
-       *  getResourceAsStream as well as findClass.
-       */
-      override protected def findAbstractFile(name: String): AbstractFile = {
-        super.findAbstractFile(name) match {
-          // deadlocks on startup if we try to translate names too early
-          case null if isInitializeComplete =>
-            generatedName(name) map (x => super.findAbstractFile(x)) orNull
-          case file                         =>
-            file
-        }
-      }
-    }
-    private def makeClassLoader(): AbstractFileClassLoader =
-      new TranslatingClassLoader(parentClassLoader match {
-        case null   => ScalaClassLoader fromURLs compilerClasspath
-        case p      =>
-          _runtimeClassLoader = new URLClassLoader(compilerClasspath, p) with ExposeAddUrl
-          _runtimeClassLoader
-      })
-
-    def getInterpreterClassLoader() = classLoader
-
-    // Set the current Java "context" class loader to this interpreter's class loader
-    def setContextClassLoader() = classLoader.setAsContext()
-
-    /** Given a simple repl-defined name, returns the real name of
-     *  the class representing it, e.g. for "Bippy" it may return
-     *  {{{
-     *    $line19.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$Bippy
-     *  }}}
-     */
-    def generatedName(simpleName: String): Option[String] = {
-      if (simpleName endsWith nme.MODULE_SUFFIX_STRING) optFlatName(simpleName.init) map (_ + nme.MODULE_SUFFIX_STRING)
-      else optFlatName(simpleName)
-    }
-    def flatName(id: String)    = optFlatName(id) getOrElse id
-    def optFlatName(id: String) = requestForIdent(id) map (_ fullFlatName id)
-
-    def allDefinedNames = definedNameMap.keys.toList.sorted
-    def pathToType(id: String): String = pathToName(newTypeName(id))
-    def pathToTerm(id: String): String = pathToName(newTermName(id))
-    def pathToName(name: Name): String = {
-      if (definedNameMap contains name)
-        definedNameMap(name) fullPath name
-      else name.toString
-    }
-
-    /** Most recent tree handled which wasn't wholly synthetic. */
-    private def mostRecentlyHandledTree: Option[Tree] = {
-      prevRequests.reverse foreach { req =>
-        req.handlers.reverse foreach {
-          case x: MemberDefHandler if x.definesValue && !isInternalTermName(x.name) => return Some(x.member)
-          case _ => ()
-        }
-      }
-      None
-    }
-
-    /** Stubs for work in progress. */
-    def handleTypeRedefinition(name: TypeName, old: Request, req: Request) = {
-      for (t1 <- old.simpleNameOfType(name) ; t2 <- req.simpleNameOfType(name)) {
-        logDebug("Redefining type '%s'\n  %s -> %s".format(name, t1, t2))
-      }
-    }
-
-    def handleTermRedefinition(name: TermName, old: Request, req: Request) = {
-      for (t1 <- old.compilerTypeOf get name ; t2 <- req.compilerTypeOf get name) {
-    //    Printing the types here has a tendency to cause assertion errors, like
-        //   assertion failed: fatal: <refinement> has owner value x, but a class owner is required
-        // so DBG is by-name now to keep it in the family.  (It also traps the assertion error,
-        // but we don't want to unnecessarily risk hosing the compiler's internal state.)
-        logDebug("Redefining term '%s'\n  %s -> %s".format(name, t1, t2))
-      }
-    }
-
-    def recordRequest(req: Request) {
-      if (req == null || referencedNameMap == null)
-        return
-
-      prevRequests += req
-      req.referencedNames foreach (x => referencedNameMap(x) = req)
-
-      // warning about serially defining companions.  It'd be easy
-      // enough to just redefine them together but that may not always
-      // be what people want so I'm waiting until I can do it better.
-      for {
-        name   <- req.definedNames filterNot (x => req.definedNames contains x.companionName)
-        oldReq <- definedNameMap get name.companionName
-        newSym <- req.definedSymbols get name
-        oldSym <- oldReq.definedSymbols get name.companionName
-        if Seq(oldSym, newSym).permutations exists { case Seq(s1, s2) => s1.isClass && s2.isModule }
-      } {
-        afterTyper(replwarn(s"warning: previously defined $oldSym is not a companion to $newSym."))
-        replwarn("Companions must be defined together; you may wish to use :paste mode for this.")
-      }
-
-      // Updating the defined name map
-      req.definedNames foreach { name =>
-        if (definedNameMap contains name) {
-          if (name.isTypeName) handleTypeRedefinition(name.toTypeName, definedNameMap(name), req)
-          else handleTermRedefinition(name.toTermName, definedNameMap(name), req)
-        }
-         definedNameMap(name) = req
-      }
-    }
-
-    def replwarn(msg: => String) {
-      if (!settings.nowarnings.value)
-        printMessage(msg)
-    }
-
-    def isParseable(line: String): Boolean = {
-      beSilentDuring {
-        try parse(line) match {
-          case Some(xs) => xs.nonEmpty  // parses as-is
-          case None     => true         // incomplete
-        }
-        catch { case x: Exception =>    // crashed the compiler
-          replwarn("Exception in isParseable(\"" + line + "\"): " + x)
-           false
-         }
-      }
-    }
-
-    def compileSourcesKeepingRun(sources: SourceFile*) = {
-      val run = new Run()
-      reporter.reset()
-      run compileSources sources.toList
-      (!reporter.hasErrors, run)
-    }
-
-    /** Compile an nsc SourceFile.  Returns true if there are
-     *  no compilation errors, or false otherwise.
-     */
-    def compileSources(sources: SourceFile*): Boolean =
-      compileSourcesKeepingRun(sources: _*)._1
-
-    /** Compile a string.  Returns true if there are no
-     *  compilation errors, or false otherwise.
-     */
-    def compileString(code: String): Boolean =
-      compileSources(new BatchSourceFile("<script>", code))
-
-    /** Build a request from the user. `trees` is `line` after being parsed.
-     */
-    private def buildRequest(line: String, trees: List[Tree]): Request = {
-      executingRequest = new Request(line, trees)
-      executingRequest
-    }
-
-  // rewriting "5 // foo" to "val x = { 5 // foo }" creates broken code because
-  // the close brace is commented out.  Strip single-line comments.
-  // ... but for error message output reasons this is not used, and rather than
-  // enclosing in braces it is constructed like "val x =\n5 // foo".
-  private def removeComments(line: String): String = {
-    showCodeIfDebugging(line) // as we're about to lose our // show
-    line.lines map (s => s indexOf "//" match {
-      case -1   => s
-      case idx  => s take idx
-    }) mkString "\n"
-  }
-
-  private def safePos(t: Tree, alt: Int): Int =
-    try t.pos.startOrPoint
-  catch { case _: UnsupportedOperationException => alt }
-
-  // Given an expression like 10 * 10 * 10 we receive the parent tree positioned
-  // at a '*'.  So look at each subtree and find the earliest of all positions.
-  private def earliestPosition(tree: Tree): Int = {
-    var pos = Int.MaxValue
-    tree foreach { t =>
-      pos = math.min(pos, safePos(t, Int.MaxValue))
-    }
-    pos
-  }
-
-
-  private def requestFromLine(line: String, synthetic: Boolean): Either[IR.Result, Request] = {
-    val content = indentCode(line)
-    val trees = parse(content) match {
-      case None         => return Left(IR.Incomplete)
-      case Some(Nil)    => return Left(IR.Error) // parse error or empty input
-      case Some(trees)  => trees
-    }
-    logDebug(
-      trees map (t => {
-        // [Eugene to Paul] previously it just said `t map ...`
-        // because there was an implicit conversion from Tree to a list of Trees
-        // however Martin and I have removed the conversion
-        // (it was conflicting with the new reflection API),
-        // so I had to rewrite this a bit
-        val subs = t collect { case sub => sub }
-        subs map (t0 =>
-          "  " + safePos(t0, -1) + ": " + t0.shortClass + "\n"
-                ) mkString ""
-      }) mkString "\n"
-    )
-    // If the last tree is a bare expression, pinpoint where it begins using the
-    // AST node position and snap the line off there.  Rewrite the code embodied
-    // by the last tree as a ValDef instead, so we can access the value.
-    trees.last match {
-      case _:Assign                        => // we don't want to include assignments
-        case _:TermTree | _:Ident | _:Select => // ... but do want other unnamed terms.
-          val varName  = if (synthetic) freshInternalVarName() else freshUserVarName()
-      val rewrittenLine = (
-        // In theory this would come out the same without the 1-specific test, but
-        // it's a cushion against any more sneaky parse-tree position vs. code mismatches:
-        // this way such issues will only arise on multiple-statement repl input lines,
-        // which most people don't use.
-        if (trees.size == 1) "val " + varName + " =\n" + content
-        else {
-          // The position of the last tree
-          val lastpos0 = earliestPosition(trees.last)
-          // Oh boy, the parser throws away parens so "(2+2)" is mispositioned,
-          // with increasingly hard to decipher positions as we move on to "() => 5",
-          // (x: Int) => x + 1, and more.  So I abandon attempts to finesse and just
-          // look for semicolons and newlines, which I'm sure is also buggy.
-          val (raw1, raw2) = content splitAt lastpos0
-          logDebug("[raw] " + raw1 + "   <--->   " + raw2)
-
-          val adjustment = (raw1.reverse takeWhile (ch => (ch != ';') && (ch != '\n'))).size
-          val lastpos = lastpos0 - adjustment
-
-          // the source code split at the laboriously determined position.
-          val (l1, l2) = content splitAt lastpos
-          logDebug("[adj] " + l1 + "   <--->   " + l2)
-
-          val prefix   = if (l1.trim == "") "" else l1 + ";\n"
-          // Note to self: val source needs to have this precise structure so that
-          // error messages print the user-submitted part without the "val res0 = " part.
-          val combined   = prefix + "val " + varName + " =\n" + l2
-
-          logDebug(List(
-            "    line" -> line,
-            " content" -> content,
-            "     was" -> l2,
-            "combined" -> combined) map {
-              case (label, s) => label + ": '" + s + "'"
-            } mkString "\n"
-          )
-          combined
-        }
-      )
-        // Rewriting    "foo ; bar ; 123"
-        // to           "foo ; bar ; val resXX = 123"
-        requestFromLine(rewrittenLine, synthetic) match {
-          case Right(req) => return Right(req withOriginalLine line)
-          case x          => return x
-        }
-      case _ =>
-    }
-    Right(buildRequest(line, trees))
-  }
-
-  // normalize non-public types so we don't see protected aliases like Self
-  def normalizeNonPublic(tp: Type) = tp match {
-    case TypeRef(_, sym, _) if sym.isAliasType && !sym.isPublic => tp.dealias
-    case _                                                      => tp
-  }
-
-  /**
-   *  Interpret one line of input. All feedback, including parse errors
-   *  and evaluation results, are printed via the supplied compiler's
-   *  reporter. Values defined are available for future interpreted strings.
-   *
-   *  The return value is whether the line was interpreter successfully,
-   *  e.g. that there were no parse errors.
-   */
-  def interpret(line: String): IR.Result = interpret(line, false)
-  def interpretSynthetic(line: String): IR.Result = interpret(line, true)
-  def interpret(line: String, synthetic: Boolean): IR.Result = {
-    def loadAndRunReq(req: Request) = {
-      classLoader.setAsContext()
-      val (result, succeeded) = req.loadAndRun
-
-      /** To our displeasure, ConsoleReporter offers only printMessage,
-       *  which tacks a newline on the end.  Since that breaks all the
-       *  output checking, we have to take one off to balance.
-       */
-      if (succeeded) {
-        if (printResults && result != "")
-          printMessage(result stripSuffix "\n")
-        else if (isReplDebug) // show quiet-mode activity
-          printMessage(result.trim.lines map ("[quiet] " + _) mkString "\n")
-
-        // Book-keeping.  Have to record synthetic requests too,
-        // as they may have been issued for information, e.g. :type
-        recordRequest(req)
-        IR.Success
-      }
-        else {
-          // don't truncate stack traces
-          withoutTruncating(printMessage(result))
-          IR.Error
-        }
-    }
-
-    if (global == null) IR.Error
-    else requestFromLine(line, synthetic) match {
-      case Left(result) => result
-      case Right(req)   =>
-        // null indicates a disallowed statement type; otherwise compile and
-        // fail if false (implying e.g. a type error)
-        if (req == null || !req.compile) IR.Error
-        else loadAndRunReq(req)
-    }
-  }
-
-  /** Bind a specified name to a specified value.  The name may
-   *  later be used by expressions passed to interpret.
-   *
-   *  @param name      the variable name to bind
-   *  @param boundType the type of the variable, as a string
-   *  @param value     the object value to bind to it
-   *  @return          an indication of whether the binding succeeded
-   */
-  def bind(name: String, boundType: String, value: Any, modifiers: List[String] = Nil): IR.Result = {
-    val bindRep = new ReadEvalPrint()
-    val run = bindRep.compile("""
-                              |object %s {
-                                |  var value: %s = _
-                              |  def set(x: Any) = value = x.asInstanceOf[%s]
-                              |}
-                              """.stripMargin.format(bindRep.evalName, boundType, boundType)
-                            )
-    bindRep.callEither("set", value) match {
-      case Left(ex) =>
-        logDebug("Set failed in bind(%s, %s, %s)".format(name, boundType, value))
-        logDebug(util.stackTraceString(ex))
-        IR.Error
-
-      case Right(_) =>
-        val line = "%sval %s = %s.value".format(modifiers map (_ + " ") mkString, name, bindRep.evalPath)
-      logDebug("Interpreting: " + line)
-      interpret(line)
-    }
-  }
-  def directBind(name: String, boundType: String, value: Any): IR.Result = {
-    val result = bind(name, boundType, value)
-    if (result == IR.Success)
-      directlyBoundNames += newTermName(name)
-    result
-  }
-  def directBind(p: NamedParam): IR.Result                                    = directBind(p.name, p.tpe, p.value)
-  def directBind[T: ru.TypeTag : ClassTag](name: String, value: T): IR.Result = directBind((name, value))
-
-  def rebind(p: NamedParam): IR.Result = {
-    val name     = p.name
-    val oldType  = typeOfTerm(name) orElse { return IR.Error }
-    val newType  = p.tpe
-    val tempName = freshInternalVarName()
-
-    quietRun("val %s = %s".format(tempName, name))
-    quietRun("val %s = %s.asInstanceOf[%s]".format(name, tempName, newType))
-  }
-  def quietImport(ids: String*): IR.Result = beQuietDuring(addImports(ids: _*))
-  def addImports(ids: String*): IR.Result =
-    if (ids.isEmpty) IR.Success
-    else interpret("import " + ids.mkString(", "))
-
-  def quietBind(p: NamedParam): IR.Result                               = beQuietDuring(bind(p))
-  def bind(p: NamedParam): IR.Result                                    = bind(p.name, p.tpe, p.value)
-  def bind[T: ru.TypeTag : ClassTag](name: String, value: T): IR.Result = bind((name, value))
-  def bindSyntheticValue(x: Any): IR.Result                             = bindValue(freshInternalVarName(), x)
-  def bindValue(x: Any): IR.Result                                      = bindValue(freshUserVarName(), x)
-  def bindValue(name: String, x: Any): IR.Result                        = bind(name, TypeStrings.fromValue(x), x)
-
-  /** Reset this interpreter, forgetting all user-specified requests. */
-  def reset() {
-    clearExecutionWrapper()
-    resetClassLoader()
-    resetAllCreators()
-    prevRequests.clear()
-    referencedNameMap.clear()
-    definedNameMap.clear()
-    virtualDirectory.delete()
-    virtualDirectory.create()
-  }
-
-  /** This instance is no longer needed, so release any resources
-   *  it is using.  The reporter's output gets flushed.
-   */
-  def close() {
-    reporter.flush()
-    classServer.stop()
-  }
-
-  /**
-   * Captures the session names (which are set by system properties) once, instead of for each line.
-   */
-  object FixedSessionNames {
-    val lineName    = sessionNames.line
-    val readName    = sessionNames.read
-    val evalName    = sessionNames.eval
-    val printName   = sessionNames.print
-    val resultName  = sessionNames.result
-  }
-
-  /** Here is where we:
-   *
-   *  1) Read some source code, and put it in the "read" object.
-   *  2) Evaluate the read object, and put the result in the "eval" object.
-   *  3) Create a String for human consumption, and put it in the "print" object.
-   *
-   *  Read! Eval! Print! Some of that not yet centralized here.
-   */
-  class ReadEvalPrint(val lineId: Int) {
-    def this() = this(freshLineId())
-
-    private var lastRun: Run = _
-    private var evalCaught: Option[Throwable] = None
-    private var conditionalWarnings: List[ConditionalWarning] = Nil
-
-    val packageName = FixedSessionNames.lineName + lineId
-    val readName    = FixedSessionNames.readName
-    val evalName    = FixedSessionNames.evalName
-    val printName   = FixedSessionNames.printName
-    val resultName  = FixedSessionNames.resultName
-
-    def bindError(t: Throwable) = {
-      // Immediately throw the exception if we are asked to propagate them
-      if (propagateExceptions) {
-        throw unwrap(t)
-      }
-      if (!bindExceptions) // avoid looping if already binding
-        throw t
-
-      val unwrapped = unwrap(t)
-      withLastExceptionLock[String]({
-        directBind[Throwable]("lastException", unwrapped)(tagOfThrowable, classTag[Throwable])
-        util.stackTraceString(unwrapped)
-      }, util.stackTraceString(unwrapped))
-    }
-
-    // TODO: split it out into a package object and a regular
-    // object and we can do that much less wrapping.
-    def packageDecl = "package " + packageName
-
-    def pathTo(name: String)   = packageName + "." + name
-    def packaged(code: String) = packageDecl + "\n\n" + code
-
-    def readPath  = pathTo(readName)
-    def evalPath  = pathTo(evalName)
-    def printPath = pathTo(printName)
-
-    def call(name: String, args: Any*): AnyRef = {
-      val m = evalMethod(name)
-      logDebug("Invoking: " + m)
-      if (args.nonEmpty)
-        logDebug("  with args: " + args.mkString(", "))
-
-      m.invoke(evalClass, args.map(_.asInstanceOf[AnyRef]): _*)
-    }
-
-    def callEither(name: String, args: Any*): Either[Throwable, AnyRef] =
-      try Right(call(name, args: _*))
-    catch { case ex: Throwable => Left(ex) }
-
-    def callOpt(name: String, args: Any*): Option[AnyRef] =
-      try Some(call(name, args: _*))
-    catch { case ex: Throwable => bindError(ex) ; None }
-
-    class EvalException(msg: String, cause: Throwable) extends RuntimeException(msg, cause) { }
-
-    private def evalError(path: String, ex: Throwable) =
-      throw new EvalException("Failed to load '" + path + "': " + ex.getMessage, ex)
-
-    private def load(path: String): Class[_] = {
-      try Class.forName(path, true, classLoader)
-      catch { case ex: Throwable => evalError(path, unwrap(ex)) }
-    }
-
-    lazy val evalClass = load(evalPath)
-    lazy val evalValue = callEither(resultName) match {
-      case Left(ex)      => evalCaught = Some(ex) ; None
-      case Right(result) => Some(result)
-    }
-
-    def compile(source: String): Boolean = compileAndSaveRun("<console>", source)
-
-    /** The innermost object inside the wrapper, found by
-     * following accessPath into the outer one.
-     */
-    def resolvePathToSymbol(accessPath: String): Symbol = {
-      // val readRoot  = getRequiredModule(readPath)   // the outermost wrapper
-      // MATEI: Changed this to getClass because the root object is no longer a module (Scala singleton object)
-
-      val readRoot  = rootMirror.getClassByName(newTypeName(readPath))   // the outermost wrapper
-      (accessPath split '.').foldLeft(readRoot: Symbol) {
-        case (sym, "")    => sym
-        case (sym, name)  => afterTyper(termMember(sym, name))
-      }
-    }
-    /** We get a bunch of repeated warnings for reasons I haven't
-     *  entirely figured out yet.  For now, squash.
-     */
-    private def updateRecentWarnings(run: Run) {
-      def loop(xs: List[(Position, String)]): List[(Position, String)] = xs match {
-        case Nil                  => Nil
-        case ((pos, msg)) :: rest =>
-          val filtered = rest filter { case (pos0, msg0) =>
-            (msg != msg0) || (pos.lineContent.trim != pos0.lineContent.trim) || {
-              // same messages and same line content after whitespace removal
-              // but we want to let through multiple warnings on the same line
-              // from the same run.  The untrimmed line will be the same since
-              // there's no whitespace indenting blowing it.
-              (pos.lineContent == pos0.lineContent)
-            }
-                                    }
-        ((pos, msg)) :: loop(filtered)
-      }
-     // PRASHANT: This leads to a NoSuchMethodError for _.warnings. Yet to figure out its purpose.
-      // val warnings = loop(run.allConditionalWarnings flatMap (_.warnings))
-      // if (warnings.nonEmpty)
-      //   mostRecentWarnings = warnings
-    }
-    private def evalMethod(name: String) = evalClass.getMethods filter (_.getName == name) match {
-      case Array(method) => method
-      case xs            => sys.error("Internal error: eval object " + evalClass + ", " + xs.mkString("\n", "\n", ""))
-    }
-    private def compileAndSaveRun(label: String, code: String) = {
-      showCodeIfDebugging(code)
-      val (success, run) = compileSourcesKeepingRun(new BatchSourceFile(label, packaged(code)))
-      updateRecentWarnings(run)
-      lastRun = run
-      success
-    }
-  }
-
-  /** One line of code submitted by the user for interpretation */
-  // private
-  class Request(val line: String, val trees: List[Tree]) {
-    val reqId = nextReqId()
-    val lineRep = new ReadEvalPrint()
-
-    private var _originalLine: String = null
-    def withOriginalLine(s: String): this.type = { _originalLine = s ; this }
-    def originalLine = if (_originalLine == null) line else _originalLine
-
-    /** handlers for each tree in this request */
-    val handlers: List[MemberHandler] = trees map (memberHandlers chooseHandler _)
-    def defHandlers = handlers collect { case x: MemberDefHandler => x }
-
-    /** all (public) names defined by these statements */
-    val definedNames = handlers flatMap (_.definedNames)
-
-    /** list of names used by this expression */
-    val referencedNames: List[Name] = handlers flatMap (_.referencedNames)
-
-    /** def and val names */
-    def termNames = handlers flatMap (_.definesTerm)
-    def typeNames = handlers flatMap (_.definesType)
-    def definedOrImported = handlers flatMap (_.definedOrImported)
-    def definedSymbolList = defHandlers flatMap (_.definedSymbols)
-
-    def definedTypeSymbol(name: String) = definedSymbols(newTypeName(name))
-    def definedTermSymbol(name: String) = definedSymbols(newTermName(name))
-
-    val definedClasses = handlers.exists {
-      case _: ClassHandler => true
-      case _ => false
-    }
-
-    /** Code to import bound names from previous lines - accessPath is code to
-     * append to objectName to access anything bound by request.
-     */
-    val SparkComputedImports(importsPreamble, importsTrailer, accessPath) =
-      importsCode(referencedNames.toSet, definedClasses)
-
-    /** Code to access a variable with the specified name */
-    def fullPath(vname: String) = {
-      // lineRep.readPath + accessPath + ".`%s`".format(vname)
-      lineRep.readPath + ".INSTANCE" + accessPath + ".`%s`".format(vname)
-    }
-      /** Same as fullpath, but after it has been flattened, so:
-       *  $line5.$iw.$iw.$iw.Bippy      // fullPath
-       *  $line5.$iw$$iw$$iw$Bippy      // fullFlatName
-       */
-      def fullFlatName(name: String) =
-        // lineRep.readPath + accessPath.replace('.', '$') + nme.NAME_JOIN_STRING + name
-        lineRep.readPath + ".INSTANCE" + accessPath.replace('.', '$') + nme.NAME_JOIN_STRING + name
-
-    /** The unmangled symbol name, but supplemented with line info. */
-    def disambiguated(name: Name): String = name + " (in " + lineRep + ")"
-
-    /** Code to access a variable with the specified name */
-    def fullPath(vname: Name): String = fullPath(vname.toString)
-
-    /** the line of code to compute */
-    def toCompute = line
-
-    /** generate the source code for the object that computes this request */
-    private object ObjectSourceCode extends CodeAssembler[MemberHandler] {
-      def path = pathToTerm("$intp")
-      def envLines = {
-        if (!isReplPower) Nil // power mode only for now
-        // $intp is not bound; punt, but include the line.
-        else if (path == "$intp") List(
-          "def $line = " + tquoted(originalLine),
-          "def $trees = Nil"
-        )
-        else List(
-          "def $line  = " + tquoted(originalLine),
-          "def $req = %s.requestForReqId(%s).orNull".format(path, reqId),
-          "def $trees = if ($req eq null) Nil else $req.trees".format(lineRep.readName, path, reqId)
-        )
-      }
-
-      val preamble = """
-        |class %s extends Serializable {
-        |  %s%s%s
-      """.stripMargin.format(lineRep.readName, envLines.map("  " + _ + ";\n").mkString, importsPreamble, indentCode(toCompute))
-      val postamble = importsTrailer + "\n}" + "\n" +
-        "object " + lineRep.readName + " {\n" +
-        "  val INSTANCE = new " + lineRep.readName + "();\n" +
-        "}\n"
-      val generate = (m: MemberHandler) => m extraCodeToEvaluate Request.this
-
-      /*
-      val preamble = """
-        |object %s extends Serializable {
-        |%s%s%s
-      """.stripMargin.format(lineRep.readName, envLines.map("  " + _ + ";\n").mkString, importsPreamble, indentCode(toCompute))
-      val postamble = importsTrailer + "\n}"
-      val generate = (m: MemberHandler) => m extraCodeToEvaluate Request.this
-      */
-
-    }
-
-    private object ResultObjectSourceCode extends CodeAssembler[MemberHandler] {
-      /** We only want to generate this code when the result
-       *  is a value which can be referred to as-is.
-       */
-      val evalResult =
-        if (!handlers.last.definesValue) ""
-        else handlers.last.definesTerm match {
-          case Some(vname) if typeOf contains vname =>
-            "lazy val %s = %s".format(lineRep.resultName, fullPath(vname))
-          case _  => ""
-        }
-
-      // first line evaluates object to make sure constructor is run
-      // initial "" so later code can uniformly be: + etc
-      val preamble = """
-      |object %s {
-      |  %s
-      |  val %s: String = %s {
-      |    %s
-      |    (""
-      """.stripMargin.format(
-        lineRep.evalName, evalResult, lineRep.printName,
-        executionWrapper, lineRep.readName + ".INSTANCE" + accessPath
-      )
-      val postamble = """
-      |    )
-      |  }
-      |}
-      """.stripMargin
-      val generate = (m: MemberHandler) => m resultExtractionCode Request.this
-    }
-
-    // get it
-    def getEvalTyped[T] : Option[T] = getEval map (_.asInstanceOf[T])
-    def getEval: Option[AnyRef] = {
-      // ensure it has been compiled
-      compile
-      // try to load it and call the value method
-      lineRep.evalValue filterNot (_ == null)
-    }
-
-    /** Compile the object file.  Returns whether the compilation succeeded.
-     *  If all goes well, the "types" map is computed. */
-    lazy val compile: Boolean = {
-      // error counting is wrong, hence interpreter may overlook failure - so we reset
-      reporter.reset()
-
-      // compile the object containing the user's code
-      lineRep.compile(ObjectSourceCode(handlers)) && {
-        // extract and remember types
-        typeOf
-        typesOfDefinedTerms
-
-        // Assign symbols to the original trees
-        // TODO - just use the new trees.
-        defHandlers foreach { dh =>
-          val name = dh.member.name
-          definedSymbols get name foreach { sym =>
-            dh.member setSymbol sym
-           logDebug("Set symbol of " + name + " to " + sym.defString)
-          }
-        }
-
-        // compile the result-extraction object
-        withoutWarnings(lineRep compile ResultObjectSourceCode(handlers))
-      }
-    }
-
-    lazy val resultSymbol = lineRep.resolvePathToSymbol(accessPath)
-    def applyToResultMember[T](name: Name, f: Symbol => T) = afterTyper(f(resultSymbol.info.nonPrivateDecl(name)))
-
-    /* typeOf lookup with encoding */
-    def lookupTypeOf(name: Name) = typeOf.getOrElse(name, typeOf(global.encode(name.toString)))
-    def simpleNameOfType(name: TypeName) = (compilerTypeOf get name) map (_.typeSymbol.simpleName)
-
-    private def typeMap[T](f: Type => T) =
-      mapFrom[Name, Name, T](termNames ++ typeNames)(x => f(cleanMemberDecl(resultSymbol, x)))
-
-    /** Types of variables defined by this request. */
-    lazy val compilerTypeOf = typeMap[Type](x => x) withDefaultValue NoType
-    /** String representations of same. */
-    lazy val typeOf         = typeMap[String](tp => afterTyper(tp.toString))
-
-    // lazy val definedTypes: Map[Name, Type] = {
-    //   typeNames map (x => x -> afterTyper(resultSymbol.info.nonPrivateDecl(x).tpe)) toMap
-    // }
-    lazy val definedSymbols = (
-      termNames.map(x => x -> applyToResultMember(x, x => x)) ++
-      typeNames.map(x => x -> compilerTypeOf(x).typeSymbolDirect)
-    ).toMap[Name, Symbol] withDefaultValue NoSymbol
-
-    lazy val typesOfDefinedTerms = mapFrom[Name, Name, Type](termNames)(x => applyToResultMember(x, _.tpe))
-
-    /** load and run the code using reflection */
-    def loadAndRun: (String, Boolean) = {
-      try   { ("" + (lineRep call sessionNames.print), true) }
-      catch { case ex: Throwable => (lineRep.bindError(ex), false) }
-    }
-
-    override def toString = "Request(line=%s, %s trees)".format(line, trees.size)
-  }
-
-  /** Returns the name of the most recent interpreter result.
-   *  Mostly this exists so you can conveniently invoke methods on
-   *  the previous result.
-   */
-  def mostRecentVar: String =
-    if (mostRecentlyHandledTree.isEmpty) ""
-    else "" + (mostRecentlyHandledTree.get match {
-      case x: ValOrDefDef           => x.name
-      case Assign(Ident(name), _)   => name
-      case ModuleDef(_, name, _)    => name
-      case _                        => naming.mostRecentVar
-    })
-
-  private var mostRecentWarnings: List[(global.Position, String)] = Nil
-  def lastWarnings = mostRecentWarnings
-
-  def treesForRequestId(id: Int): List[Tree] =
-    requestForReqId(id).toList flatMap (_.trees)
-
-  def requestForReqId(id: Int): Option[Request] =
-    if (executingRequest != null && executingRequest.reqId == id) Some(executingRequest)
-    else prevRequests find (_.reqId == id)
-
-  def requestForName(name: Name): Option[Request] = {
-    assert(definedNameMap != null, "definedNameMap is null")
-    definedNameMap get name
-  }
-
-  def requestForIdent(line: String): Option[Request] =
-    requestForName(newTermName(line)) orElse requestForName(newTypeName(line))
-
-  def requestHistoryForName(name: Name): List[Request] =
-    prevRequests.toList.reverse filter (_.definedNames contains name)
-
-
-  def definitionForName(name: Name): Option[MemberHandler] =
-    requestForName(name) flatMap { req =>
-      req.handlers find (_.definedNames contains name)
-    }
-
-  def valueOfTerm(id: String): Option[AnyRef] =
-    requestForName(newTermName(id)) flatMap (_.getEval)
-
-  def classOfTerm(id: String): Option[JClass] =
-    valueOfTerm(id) map (_.getClass)
-
-  def typeOfTerm(id: String): Type = newTermName(id) match {
-    case nme.ROOTPKG  => RootClass.tpe
-    case name         => requestForName(name).fold(NoType: Type)(_ compilerTypeOf name)
-  }
-
-  def symbolOfType(id: String): Symbol =
-    requestForName(newTypeName(id)).fold(NoSymbol: Symbol)(_ definedTypeSymbol id)
-
-  def symbolOfTerm(id: String): Symbol =
-    requestForIdent(newTermName(id)).fold(NoSymbol: Symbol)(_ definedTermSymbol id)
-
-  def runtimeClassAndTypeOfTerm(id: String): Option[(JClass, Type)] = {
-    classOfTerm(id) flatMap { clazz =>
-      new RichClass(clazz).supers find(c => !(new RichClass(c).isScalaAnonymous)) map { nonAnon =>
-        (nonAnon, runtimeTypeOfTerm(id))
-      }
-    }
-  }
-
-  def runtimeTypeOfTerm(id: String): Type = {
-    typeOfTerm(id) andAlso { tpe =>
-      val clazz      = classOfTerm(id) getOrElse { return NoType }
-      val staticSym  = tpe.typeSymbol
-      val runtimeSym = getClassIfDefined(clazz.getName)
-
-      if ((runtimeSym != NoSymbol) && (runtimeSym != staticSym) && (runtimeSym isSubClass staticSym))
-        runtimeSym.info
-      else NoType
-    }
-  }
-  def cleanMemberDecl(owner: Symbol, member: Name): Type = afterTyper {
-    normalizeNonPublic {
-      owner.info.nonPrivateDecl(member).tpe match {
-        case NullaryMethodType(tp) => tp
-        case tp                    => tp
-      }
-    }
-  }
-
-  object exprTyper extends {
-    val repl: SparkIMain.this.type = imain
-  } with SparkExprTyper { }
-
-  def parse(line: String): Option[List[Tree]] = exprTyper.parse(line)
-
-  def symbolOfLine(code: String): Symbol =
-    exprTyper.symbolOfLine(code)
-
-  def typeOfExpression(expr: String, silent: Boolean = true): Type =
-    exprTyper.typeOfExpression(expr, silent)
-
-  protected def onlyTerms(xs: List[Name]) = xs collect { case x: TermName => x }
-  protected def onlyTypes(xs: List[Name]) = xs collect { case x: TypeName => x }
-
-  def definedTerms      = onlyTerms(allDefinedNames) filterNot isInternalTermName
-  def definedTypes      = onlyTypes(allDefinedNames)
-  def definedSymbols    = prevRequestList.flatMap(_.definedSymbols.values).toSet[Symbol]
-  def definedSymbolList = prevRequestList flatMap (_.definedSymbolList) filterNot (s => isInternalTermName(s.name))
-
-  // Terms with user-given names (i.e. not res0 and not synthetic)
-    def namedDefinedTerms = definedTerms filterNot (x => isUserVarName("" + x) || directlyBoundNames(x))
-
-  private def findName(name: Name) = definedSymbols find (_.name == name) getOrElse NoSymbol
-
-  /** Translate a repl-defined identifier into a Symbol.
-   */
-  def apply(name: String): Symbol =
-    types(name) orElse terms(name)
-
-  def types(name: String): Symbol = {
-    val tpname = newTypeName(name)
-    findName(tpname) orElse getClassIfDefined(tpname)
-  }
-  def terms(name: String): Symbol = {
-    val termname = newTypeName(name)
-    findName(termname) orElse getModuleIfDefined(termname)
-  }
-  // [Eugene to Paul] possibly you could make use of TypeTags here
-  def types[T: ClassTag] : Symbol = types(classTag[T].runtimeClass.getName)
-  def terms[T: ClassTag] : Symbol = terms(classTag[T].runtimeClass.getName)
-  def apply[T: ClassTag] : Symbol = apply(classTag[T].runtimeClass.getName)
-
-  def classSymbols  = allDefSymbols collect { case x: ClassSymbol => x }
-  def methodSymbols = allDefSymbols collect { case x: MethodSymbol => x }
-
-  /** the previous requests this interpreter has processed */
-  private var executingRequest: Request = _
-  private val prevRequests       = mutable.ListBuffer[Request]()
-  private val referencedNameMap  = mutable.Map[Name, Request]()
-  private val definedNameMap     = mutable.Map[Name, Request]()
-  private val directlyBoundNames = mutable.Set[Name]()
-
-  def allHandlers    = prevRequestList flatMap (_.handlers)
-  def allDefHandlers = allHandlers collect { case x: MemberDefHandler => x }
-  def allDefSymbols  = allDefHandlers map (_.symbol) filter (_ ne NoSymbol)
-
-  def lastRequest         = if (prevRequests.isEmpty) null else prevRequests.last
-  def prevRequestList     = prevRequests.toList
-  def allSeenTypes        = prevRequestList flatMap (_.typeOf.values.toList) distinct
-  def allImplicits        = allHandlers filter (_.definesImplicit) flatMap (_.definedNames)
-  def importHandlers      = allHandlers collect { case x: ImportHandler => x }
-
-  def visibleTermNames: List[Name] = definedTerms ++ importedTerms distinct
-
-  /** Another entry point for tab-completion, ids in scope */
-  def unqualifiedIds = visibleTermNames map (_.toString) filterNot (_ contains "$") sorted
-
-  /** Parse the ScalaSig to find type aliases */
-  def aliasForType(path: String) = ByteCode.aliasForType(path)
-
-  def withoutUnwrapping(op: => Unit): Unit = {
-    val saved = isettings.unwrapStrings
-    isettings.unwrapStrings = false
-    try op
-    finally isettings.unwrapStrings = saved
-  }
-
-  def symbolDefString(sym: Symbol) = {
-    TypeStrings.quieter(
-      afterTyper(sym.defString),
-      sym.owner.name + ".this.",
-      sym.owner.fullName + "."
-    )
-  }
-
-  def showCodeIfDebugging(code: String) {
-    /** Secret bookcase entrance for repl debuggers: end the line
-     *  with "// show" and see what's going on.
-     */
-    def isShow    = code.lines exists (_.trim endsWith "// show")
-    def isShowRaw = code.lines exists (_.trim endsWith "// raw")
-
-    // old style
-    beSilentDuring(parse(code)) foreach { ts =>
-      ts foreach { t =>
-        if (isShow || isShowRaw)
-          withoutUnwrapping(echo(asCompactString(t)))
-        else
-          withoutUnwrapping(logDebug(asCompactString(t)))
-      }
-    }
-  }
-
-  // debugging
-  def debugging[T](msg: String)(res: T) = {
-    logDebug(msg + " " + res)
-    res
-  }
-}
-
-/** Utility methods for the Interpreter. */
-object SparkIMain {
-  // The two name forms this is catching are the two sides of this assignment:
-  //
-  // $line3.$read.$iw.$iw.Bippy =
-  //   $line3.$read$$iw$$iw$Bippy@4a6a00ca
-  private def removeLineWrapper(s: String) = s.replaceAll("""\$line\d+[./]\$(read|eval|print)[$.]""", "")
-  private def removeIWPackages(s: String)  = s.replaceAll("""\$(iw|iwC|read|eval|print)[$.]""", "")
-  private def removeSparkVals(s: String)   = s.replaceAll("""\$VAL[0-9]+[$.]""", "")
-
-  def stripString(s: String)               = removeSparkVals(removeIWPackages(removeLineWrapper(s)))
-
-  trait CodeAssembler[T] {
-    def preamble: String
-    def generate: T => String
-    def postamble: String
-
-    def apply(contributors: List[T]): String = stringFromWriter { code =>
-      code println preamble
-      contributors map generate foreach (code println _)
-      code println postamble
-    }
-  }
-
-  trait StrippingWriter {
-    def isStripping: Boolean
-    def stripImpl(str: String): String
-    def strip(str: String): String = if (isStripping) stripImpl(str) else str
-  }
-  trait TruncatingWriter {
-    def maxStringLength: Int
-    def isTruncating: Boolean
-    def truncate(str: String): String = {
-      if (isTruncating && (maxStringLength != 0 && str.length > maxStringLength))
-        (str take maxStringLength - 3) + "..."
-      else str
-    }
-  }
-  abstract class StrippingTruncatingWriter(out: JPrintWriter)
-          extends JPrintWriter(out)
-             with StrippingWriter
-             with TruncatingWriter {
-    self =>
-
-    def clean(str: String): String = truncate(strip(str))
-    override def write(str: String) = super.write(clean(str))
-  }
-  class ReplStrippingWriter(intp: SparkIMain) extends StrippingTruncatingWriter(intp.out) {
-    import intp._
-    def maxStringLength    = isettings.maxPrintString
-    def isStripping        = isettings.unwrapStrings
-    def isTruncating       = reporter.truncationOK
-
-    def stripImpl(str: String): String = naming.unmangle(str)
-  }
-
-  class ReplReporter(intp: SparkIMain) extends ConsoleReporter(intp.settings, null, new ReplStrippingWriter(intp)) {
-    override def printMessage(msg: String) {
-      // Avoiding deadlock when the compiler starts logging before
-      // the lazy val is done.
-      if (intp.isInitializeComplete) {
-        if (intp.totalSilence) ()
-        else super.printMessage(msg)
-      }
-      else Console.println(msg)
-    }
-  }
-}
-
-class SparkISettings(intp: SparkIMain) extends Logging {
-  /** A list of paths where :load should look */
-  var loadPath = List(".")
-
-  /** Set this to true to see repl machinery under -Yrich-exceptions.
-   */
-  var showInternalStackTraces = false
-
-  /** The maximum length of toString to use when printing the result
-   *  of an evaluation.  0 means no maximum.  If a printout requires
-   *  more than this number of characters, then the printout is
-   *  truncated.
-   */
-  var maxPrintString = 800
-
-  /** The maximum number of completion candidates to print for tab
-   *  completion without requiring confirmation.
-   */
-  var maxAutoprintCompletion = 250
-
-  /** String unwrapping can be disabled if it is causing issues.
-   *  Settings this to false means you will see Strings like "$iw.$iw.".
-   */
-  var unwrapStrings = true
-
-  def deprecation_=(x: Boolean) = {
-    val old = intp.settings.deprecation.value
-    intp.settings.deprecation.value = x
-    if (!old && x) logDebug("Enabled -deprecation output.")
-    else if (old && !x) logDebug("Disabled -deprecation output.")
-  }
-
-  def deprecation: Boolean = intp.settings.deprecation.value
-
-  def allSettings = Map(
-    "maxPrintString" -> maxPrintString,
-    "maxAutoprintCompletion" -> maxAutoprintCompletion,
-    "unwrapStrings" -> unwrapStrings,
-    "deprecation" -> deprecation
-  )
-
-  private def allSettingsString =
-    allSettings.toList sortBy (_._1) map { case (k, v) => "  " + k + " = " + v + "\n" } mkString
-
-  override def toString = """
-    | SparkISettings {
-    | %s
-    | }""".stripMargin.format(allSettingsString)
-}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala b/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala
deleted file mode 100644
index 193a42dcded12..0000000000000
--- a/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala
+++ /dev/null
@@ -1,238 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author  Paul Phillips
- */
-
-package org.apache.spark.repl
-
-import scala.tools.nsc._
-import scala.tools.nsc.interpreter._
-
-import scala.collection.{ mutable, immutable }
-
-trait SparkImports {
-  self: SparkIMain =>
-
-  import global._
-  import definitions.{ ScalaPackage, JavaLangPackage, PredefModule }
-  import memberHandlers._
-
-  def isNoImports = settings.noimports.value
-  def isNoPredef  = settings.nopredef.value
-
-  /** Synthetic import handlers for the language defined imports. */
-  private def makeWildcardImportHandler(sym: Symbol): ImportHandler = {
-    val hd :: tl = sym.fullName.split('.').toList map newTermName
-    val tree = Import(
-      tl.foldLeft(Ident(hd): Tree)((x, y) => Select(x, y)),
-      ImportSelector.wildList
-    )
-    tree setSymbol sym
-    new ImportHandler(tree)
-  }
-
-  /** Symbols whose contents are language-defined to be imported. */
-  def languageWildcardSyms: List[Symbol] = List(JavaLangPackage, ScalaPackage, PredefModule)
-  def languageWildcards: List[Type] = languageWildcardSyms map (_.tpe)
-  def languageWildcardHandlers = languageWildcardSyms map makeWildcardImportHandler
-
-  def allImportedNames = importHandlers flatMap (_.importedNames)
-  def importedTerms    = onlyTerms(allImportedNames)
-  def importedTypes    = onlyTypes(allImportedNames)
-
-  /** Types which have been wildcard imported, such as:
-   *    val x = "abc" ; import x._  // type java.lang.String
-   *    import java.lang.String._   // object java.lang.String
-   *
-   *  Used by tab completion.
-   *
-   *  XXX right now this gets import x._ and import java.lang.String._,
-   *  but doesn't figure out import String._.  There's a lot of ad hoc
-   *  scope twiddling which should be swept away in favor of digging
-   *  into the compiler scopes.
-   */
-  def sessionWildcards: List[Type] = {
-    importHandlers filter (_.importsWildcard) map (_.targetType) distinct
-  }
-  def wildcardTypes = languageWildcards ++ sessionWildcards
-
-  def languageSymbols        = languageWildcardSyms flatMap membersAtPickler
-  def sessionImportedSymbols = importHandlers flatMap (_.importedSymbols)
-  def importedSymbols        = languageSymbols ++ sessionImportedSymbols
-  def importedTermSymbols    = importedSymbols collect { case x: TermSymbol => x }
-  def importedTypeSymbols    = importedSymbols collect { case x: TypeSymbol => x }
-  def implicitSymbols        = importedSymbols filter (_.isImplicit)
-
-  def importedTermNamed(name: String): Symbol =
-    importedTermSymbols find (_.name.toString == name) getOrElse NoSymbol
-
-  /** Tuples of (source, imported symbols) in the order they were imported.
-   */
-  def importedSymbolsBySource: List[(Symbol, List[Symbol])] = {
-    val lang    = languageWildcardSyms map (sym => (sym, membersAtPickler(sym)))
-    val session = importHandlers filter (_.targetType != NoType) map { mh =>
-      (mh.targetType.typeSymbol, mh.importedSymbols)
-    }
-
-    lang ++ session
-  }
-  def implicitSymbolsBySource: List[(Symbol, List[Symbol])] = {
-    importedSymbolsBySource map {
-      case (k, vs) => (k, vs filter (_.isImplicit))
-    } filterNot (_._2.isEmpty)
-  }
-
-  /** Compute imports that allow definitions from previous
-   *  requests to be visible in a new request.  Returns
-   *  three pieces of related code:
-   *
-   *  1. An initial code fragment that should go before
-   *  the code of the new request.
-   *
-   *  2. A code fragment that should go after the code
-   *  of the new request.
-   *
-   *  3. An access path which can be traversed to access
-   *  any bindings inside code wrapped by #1 and #2 .
-   *
-   * The argument is a set of Names that need to be imported.
-   *
-   * Limitations: This method is not as precise as it could be.
-   * (1) It does not process wildcard imports to see what exactly
-   * they import.
-   * (2) If it imports any names from a request, it imports all
-   * of them, which is not really necessary.
-   * (3) It imports multiple same-named implicits, but only the
-   * last one imported is actually usable.
-   */
-  case class SparkComputedImports(prepend: String, append: String, access: String)
-  def fallback = System.getProperty("spark.repl.fallback", "false").toBoolean
-
-  protected def importsCode(wanted: Set[Name], definedClass: Boolean): SparkComputedImports = {
-    /** Narrow down the list of requests from which imports
-     *  should be taken.  Removes requests which cannot contribute
-     *  useful imports for the specified set of wanted names.
-     */
-    case class ReqAndHandler(req: Request, handler: MemberHandler) { }
-
-    def reqsToUse: List[ReqAndHandler] = {
-      /** Loop through a list of MemberHandlers and select which ones to keep.
-        * 'wanted' is the set of names that need to be imported.
-       */
-      def select(reqs: List[ReqAndHandler], wanted: Set[Name]): List[ReqAndHandler] = {
-        // Single symbol imports might be implicits! See bug #1752.  Rather than
-        // try to finesse this, we will mimic all imports for now.
-        def keepHandler(handler: MemberHandler) = handler match {
-       /* This case clause tries to "precisely" import only what is required. And in this
-        * it may miss out on some implicits, because implicits are not known in `wanted`. Thus 
-        * it is suitable for defining classes. AFAIK while defining classes implicits are not
-        * needed.*/
-          case h: ImportHandler if definedClass && !fallback => 
-            h.importedNames.exists(x => wanted.contains(x))
-          case _: ImportHandler  => true
-          case x                 => x.definesImplicit || (x.definedNames exists wanted)
-        }
-
-        reqs match {
-          case Nil                                    => Nil
-          case rh :: rest if !keepHandler(rh.handler) => select(rest, wanted)
-          case rh :: rest                             =>
-            import rh.handler._
-            val newWanted = wanted ++ referencedNames -- definedNames -- importedNames
-            rh :: select(rest, newWanted)
-        }
-      }
-
-      /** Flatten the handlers out and pair each with the original request */
-      select(allReqAndHandlers reverseMap { case (r, h) => ReqAndHandler(r, h) }, wanted).reverse
-    }
-
-    val code, trailingBraces, accessPath = new StringBuilder
-    val currentImps = mutable.HashSet[Name]()
-
-    // add code for a new object to hold some imports
-    def addWrapper() {
-      val impname = nme.INTERPRETER_IMPORT_WRAPPER
-      code append "class %sC extends Serializable {\n".format(impname)
-      trailingBraces append "}\nval " + impname + " = new " + impname + "C;\n"
-      accessPath append ("." + impname)
-
-      currentImps.clear
-      // code append "object %s {\n".format(impname)
-      // trailingBraces append "}\n"
-      // accessPath append ("." + impname)
-
-      // currentImps.clear
-    }
-
-    addWrapper()
-
-    // loop through previous requests, adding imports for each one
-    for (ReqAndHandler(req, handler) <- reqsToUse) {
-      handler match {
-        // If the user entered an import, then just use it; add an import wrapping
-        // level if the import might conflict with some other import
-        case x: ImportHandler =>
-          if (x.importsWildcard || currentImps.exists(x.importedNames contains _))
-            addWrapper()
-
-          code append (x.member + "\n")
-
-          // give wildcard imports a import wrapper all to their own
-          if (x.importsWildcard) addWrapper()
-          else currentImps ++= x.importedNames
-
-        // For other requests, import each defined name.
-        // import them explicitly instead of with _, so that
-        // ambiguity errors will not be generated. Also, quote
-        // the name of the variable, so that we don't need to
-        // handle quoting keywords separately.
-        case x: ClassHandler if !fallback =>
-        // I am trying to guess if the import is a defined class
-        // This is an ugly hack, I am not 100% sure of the consequences.
-        // Here we, let everything but "defined classes" use the import with val.
-        // The reason for this is, otherwise the remote executor tries to pull the
-        // classes involved and may fail.
-          for (imv <- x.definedNames) {
-            val objName = req.lineRep.readPath
-            code.append("import " + objName + ".INSTANCE" + req.accessPath + ".`" + imv + "`\n")
-          }
-
-        case x =>
-          for (imv <- x.definedNames) {
-            if (currentImps contains imv) addWrapper()
-            val objName = req.lineRep.readPath
-            val valName = "$VAL" + newValId()
-
-            if(!code.toString.endsWith(".`" + imv + "`;\n")) { // Which means already imported
-                code.append("val " + valName + " = " + objName + ".INSTANCE;\n")
-                code.append("import " + valName + req.accessPath + ".`" + imv + "`;\n")
-            }
-            // code.append("val " + valName + " = " + objName + ".INSTANCE;\n")
-            // code.append("import " + valName + req.accessPath + ".`" + imv + "`;\n")
-            // code append ("import " + (req fullPath imv) + "\n")
-            currentImps += imv
-          }
-      }
-    }
-    // add one extra wrapper, to prevent warnings in the common case of
-    // redefining the value bound in the last interpreter request.
-    addWrapper()
-    SparkComputedImports(code.toString, trailingBraces.toString, accessPath.toString)
-  }
-
-  private def allReqAndHandlers =
-    prevRequestList flatMap (req => req.handlers map (req -> _))
-
-  private def membersAtPickler(sym: Symbol): List[Symbol] =
-    beforePickler(sym.info.nonPrivateMembers.toList)
-
-  private var curValId = 0
-
-  private def newValId(): Int = {
-    curValId += 1
-    curValId
-  }
-}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala b/repl/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
deleted file mode 100644
index 3159b70008ae3..0000000000000
--- a/repl/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
+++ /dev/null
@@ -1,377 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author Paul Phillips
- */
-
-package org.apache.spark.repl
-
-import scala.tools.nsc._
-import scala.tools.nsc.interpreter._
-
-import scala.tools.jline._
-import scala.tools.jline.console.completer._
-import Completion._
-import scala.collection.mutable.ListBuffer
-import org.apache.spark.Logging
-
-// REPL completor - queries supplied interpreter for valid
-// completions based on current contents of buffer.
-class SparkJLineCompletion(val intp: SparkIMain) extends Completion with CompletionOutput with Logging {
-  val global: intp.global.type = intp.global
-  import global._
-  import definitions.{ PredefModule, AnyClass, AnyRefClass, ScalaPackage, JavaLangPackage }
-  import rootMirror.{ RootClass, getModuleIfDefined }
-  type ExecResult = Any
-  import intp.{ debugging }
-
-  // verbosity goes up with consecutive tabs
-  private var verbosity: Int = 0
-  def resetVerbosity() = verbosity = 0
-
-  def getSymbol(name: String, isModule: Boolean) = (
-    if (isModule) getModuleIfDefined(name)
-    else getModuleIfDefined(name)
-  )
-  def getType(name: String, isModule: Boolean) = getSymbol(name, isModule).tpe
-  def typeOf(name: String)                     = getType(name, false)
-  def moduleOf(name: String)                   = getType(name, true)
-
-  trait CompilerCompletion {
-    def tp: Type
-    def effectiveTp = tp match {
-      case MethodType(Nil, resType)   => resType
-      case NullaryMethodType(resType) => resType
-      case _                          => tp
-    }
-
-    // for some reason any's members don't show up in subclasses, which
-    // we need so 5.<tab> offers asInstanceOf etc.
-    private def anyMembers = AnyClass.tpe.nonPrivateMembers
-    def anyRefMethodsToShow = Set("isInstanceOf", "asInstanceOf", "toString")
-
-    def tos(sym: Symbol): String = sym.decodedName
-    def memberNamed(s: String) = afterTyper(effectiveTp member newTermName(s))
-    def hasMethod(s: String) = memberNamed(s).isMethod
-
-    // XXX we'd like to say "filterNot (_.isDeprecated)" but this causes the
-    // compiler to crash for reasons not yet known.
-    def members     = afterTyper((effectiveTp.nonPrivateMembers.toList ++ anyMembers) filter (_.isPublic))
-    def methods     = members.toList filter (_.isMethod)
-    def packages    = members.toList filter (_.isPackage)
-    def aliases     = members.toList filter (_.isAliasType)
-
-    def memberNames   = members map tos
-    def methodNames   = methods map tos
-    def packageNames  = packages map tos
-    def aliasNames    = aliases map tos
-  }
-
-  object NoTypeCompletion extends TypeMemberCompletion(NoType) {
-    override def memberNamed(s: String) = NoSymbol
-    override def members = Nil
-    override def follow(s: String) = None
-    override def alternativesFor(id: String) = Nil
-  }
-
-  object TypeMemberCompletion {
-    def apply(tp: Type, runtimeType: Type, param: NamedParam): TypeMemberCompletion = {
-      new TypeMemberCompletion(tp) {
-        var upgraded = false
-        lazy val upgrade = {
-          intp rebind param
-          intp.reporter.printMessage("\nRebinding stable value %s from %s to %s".format(param.name, tp, param.tpe))
-          upgraded = true
-          new TypeMemberCompletion(runtimeType)
-        }
-        override def completions(verbosity: Int) = {
-          super.completions(verbosity) ++ (
-            if (verbosity == 0) Nil
-            else upgrade.completions(verbosity)
-          )
-        }
-        override def follow(s: String) = super.follow(s) orElse {
-          if (upgraded) upgrade.follow(s)
-          else None
-        }
-        override def alternativesFor(id: String) = super.alternativesFor(id) ++ (
-          if (upgraded) upgrade.alternativesFor(id)
-          else Nil
-        ) distinct
-      }
-    }
-    def apply(tp: Type): TypeMemberCompletion = {
-      if (tp eq NoType) NoTypeCompletion
-      else if (tp.typeSymbol.isPackageClass) new PackageCompletion(tp)
-      else new TypeMemberCompletion(tp)
-    }
-    def imported(tp: Type) = new ImportCompletion(tp)
-  }
-
-  class TypeMemberCompletion(val tp: Type) extends CompletionAware
-                                              with CompilerCompletion {
-    def excludeEndsWith: List[String] = Nil
-    def excludeStartsWith: List[String] = List("<") // <byname>, <repeated>, etc.
-    def excludeNames: List[String] = (anyref.methodNames filterNot anyRefMethodsToShow) :+ "_root_"
-
-    def methodSignatureString(sym: Symbol) = {
-      IMain stripString afterTyper(new MethodSymbolOutput(sym).methodString())
-    }
-
-    def exclude(name: String): Boolean = (
-      (name contains "$") ||
-      (excludeNames contains name) ||
-      (excludeEndsWith exists (name endsWith _)) ||
-      (excludeStartsWith exists (name startsWith _))
-    )
-    def filtered(xs: List[String]) = xs filterNot exclude distinct
-
-    def completions(verbosity: Int) =
-      debugging(tp + " completions ==> ")(filtered(memberNames))
-
-    override def follow(s: String): Option[CompletionAware] =
-      debugging(tp + " -> '" + s + "' ==> ")(Some(TypeMemberCompletion(memberNamed(s).tpe)) filterNot (_ eq NoTypeCompletion))
-
-    override def alternativesFor(id: String): List[String] =
-      debugging(id + " alternatives ==> ") {
-        val alts = members filter (x => x.isMethod && tos(x) == id) map methodSignatureString
-
-        if (alts.nonEmpty) "" :: alts else Nil
-      }
-
-    override def toString = "%s (%d members)".format(tp, members.size)
-  }
-
-  class PackageCompletion(tp: Type) extends TypeMemberCompletion(tp) {
-    override def excludeNames = anyref.methodNames
-  }
-
-  class LiteralCompletion(lit: Literal) extends TypeMemberCompletion(lit.value.tpe) {
-    override def completions(verbosity: Int) = verbosity match {
-      case 0    => filtered(memberNames)
-      case _    => memberNames
-    }
-  }
-
-  class ImportCompletion(tp: Type) extends TypeMemberCompletion(tp) {
-    override def completions(verbosity: Int) = verbosity match {
-      case 0    => filtered(members filterNot (_.isSetter) map tos)
-      case _    => super.completions(verbosity)
-    }
-  }
-
-  // not for completion but for excluding
-  object anyref extends TypeMemberCompletion(AnyRefClass.tpe) { }
-
-  // the unqualified vals/defs/etc visible in the repl
-  object ids extends CompletionAware {
-    override def completions(verbosity: Int) = intp.unqualifiedIds ++ List("classOf") //, "_root_")
-    // now we use the compiler for everything.
-    override def follow(id: String): Option[CompletionAware] = {
-      if (!completions(0).contains(id))
-        return None
-
-      val tpe = intp typeOfExpression id
-      if (tpe == NoType)
-        return None
-
-      def default = Some(TypeMemberCompletion(tpe))
-
-      // only rebinding vals in power mode for now.
-      if (!isReplPower) default
-      else intp runtimeClassAndTypeOfTerm id match {
-        case Some((clazz, runtimeType)) =>
-          val sym = intp.symbolOfTerm(id)
-          if (sym.isStable) {
-            val param = new NamedParam.Untyped(id, intp valueOfTerm id getOrElse null)
-            Some(TypeMemberCompletion(tpe, runtimeType, param))
-          }
-          else default
-        case _        =>
-          default
-      }
-    }
-    override def toString = "<repl ids> (%s)".format(completions(0).size)
-  }
-
-  // user-issued wildcard imports like "import global._" or "import String._"
-  private def imported = intp.sessionWildcards map TypeMemberCompletion.imported
-
-  // literal Ints, Strings, etc.
-  object literals extends CompletionAware {
-    def simpleParse(code: String): Tree = newUnitParser(code).templateStats().last
-    def completions(verbosity: Int) = Nil
-
-    override def follow(id: String) = simpleParse(id) match {
-      case x: Literal   => Some(new LiteralCompletion(x))
-      case _            => None
-    }
-  }
-
-  // top level packages
-  object rootClass extends TypeMemberCompletion(RootClass.tpe) {
-    override def completions(verbosity: Int) = super.completions(verbosity) :+ "_root_"
-    override def follow(id: String) = id match {
-      case "_root_" => Some(this)
-      case _        => super.follow(id)
-    }
-  }
-  // members of Predef
-  object predef extends TypeMemberCompletion(PredefModule.tpe) {
-    override def excludeEndsWith    = super.excludeEndsWith ++ List("Wrapper", "ArrayOps")
-    override def excludeStartsWith  = super.excludeStartsWith ++ List("wrap")
-    override def excludeNames       = anyref.methodNames
-
-    override def exclude(name: String) = super.exclude(name) || (
-      (name contains "2")
-    )
-
-    override def completions(verbosity: Int) = verbosity match {
-      case 0    => Nil
-      case _    => super.completions(verbosity)
-    }
-  }
-  // members of scala.*
-  object scalalang extends PackageCompletion(ScalaPackage.tpe) {
-    def arityClasses = List("Product", "Tuple", "Function")
-    def skipArity(name: String) = arityClasses exists (x => name != x && (name startsWith x))
-    override def exclude(name: String) = super.exclude(name) || (
-      skipArity(name)
-    )
-
-    override def completions(verbosity: Int) = verbosity match {
-      case 0    => filtered(packageNames ++ aliasNames)
-      case _    => super.completions(verbosity)
-    }
-  }
-  // members of java.lang.*
-  object javalang extends PackageCompletion(JavaLangPackage.tpe) {
-    override lazy val excludeEndsWith   = super.excludeEndsWith ++ List("Exception", "Error")
-    override lazy val excludeStartsWith = super.excludeStartsWith ++ List("CharacterData")
-
-    override def completions(verbosity: Int) = verbosity match {
-      case 0    => filtered(packageNames)
-      case _    => super.completions(verbosity)
-    }
-  }
-
-  // the list of completion aware objects which should be consulted
-  // for top level unqualified, it's too noisy to let much in.
-  lazy val topLevelBase: List[CompletionAware] = List(ids, rootClass, predef, scalalang, javalang, literals)
-  def topLevel = topLevelBase ++ imported
-  def topLevelThreshold = 50
-
-  // the first tier of top level objects (doesn't include file completion)
-  def topLevelFor(parsed: Parsed): List[String] = {
-    val buf = new ListBuffer[String]
-    topLevel foreach { ca =>
-      buf ++= (ca completionsFor parsed)
-
-      if (buf.size > topLevelThreshold)
-        return buf.toList.sorted
-    }
-    buf.toList
-  }
-
-  // the most recent result
-  def lastResult = Forwarder(() => ids follow intp.mostRecentVar)
-
-  def lastResultFor(parsed: Parsed) = {
-    /** The logic is a little tortured right now because normally '.' is
-     *  ignored as a delimiter, but on .<tab> it needs to be propagated.
-     */
-    val xs = lastResult completionsFor parsed
-    if (parsed.isEmpty) xs map ("." + _) else xs
-  }
-
-  // generic interface for querying (e.g. interpreter loop, testing)
-  def completions(buf: String): List[String] =
-    topLevelFor(Parsed.dotted(buf + ".", buf.length + 1))
-
-  def completer(): ScalaCompleter = new JLineTabCompletion
-
-  /** This gets a little bit hairy.  It's no small feat delegating everything
-   *  and also keeping track of exactly where the cursor is and where it's supposed
-   *  to end up.  The alternatives mechanism is a little hacky: if there is an empty
-   *  string in the list of completions, that means we are expanding a unique
-   *  completion, so don't update the "last" buffer because it'll be wrong.
-   */
-  class JLineTabCompletion extends ScalaCompleter {
-    // For recording the buffer on the last tab hit
-    private var lastBuf: String = ""
-    private var lastCursor: Int = -1
-
-    // Does this represent two consecutive tabs?
-    def isConsecutiveTabs(buf: String, cursor: Int) =
-      cursor == lastCursor && buf == lastBuf
-
-    // Longest common prefix
-    def commonPrefix(xs: List[String]): String = {
-      if (xs.isEmpty || xs.contains("")) ""
-      else xs.head.head match {
-        case ch =>
-          if (xs.tail forall (_.head == ch)) "" + ch + commonPrefix(xs map (_.tail))
-          else ""
-      }
-    }
-
-    // This is jline's entry point for completion.
-    override def complete(buf: String, cursor: Int): Candidates = {
-      verbosity = if (isConsecutiveTabs(buf, cursor)) verbosity + 1 else 0
-      logDebug("\ncomplete(%s, %d) last = (%s, %d), verbosity: %s".format(buf, cursor, lastBuf, lastCursor, verbosity))
-
-      // we don't try lower priority completions unless higher ones return no results.
-      def tryCompletion(p: Parsed, completionFunction: Parsed => List[String]): Option[Candidates] = {
-        val winners = completionFunction(p)
-        if (winners.isEmpty)
-          return None
-        val newCursor =
-          if (winners contains "") p.cursor
-          else {
-            val advance = commonPrefix(winners)
-            lastCursor = p.position + advance.length
-            lastBuf = (buf take p.position) + advance
-            logDebug("tryCompletion(%s, _) lastBuf = %s, lastCursor = %s, p.position = %s".format(
-              p, lastBuf, lastCursor, p.position))
-            p.position
-          }
-
-        Some(Candidates(newCursor, winners))
-      }
-
-      def mkDotted      = Parsed.dotted(buf, cursor) withVerbosity verbosity
-      def mkUndelimited = Parsed.undelimited(buf, cursor) withVerbosity verbosity
-
-      // a single dot is special cased to completion on the previous result
-      def lastResultCompletion =
-        if (!looksLikeInvocation(buf)) None
-        else tryCompletion(Parsed.dotted(buf drop 1, cursor), lastResultFor)
-
-      def tryAll = (
-                  lastResultCompletion
-           orElse tryCompletion(mkDotted, topLevelFor)
-        getOrElse Candidates(cursor, Nil)
-      )
-
-      /**
-       *  This is the kickoff point for all manner of theoretically
-       *  possible compiler unhappiness. The fault may be here or
-       *  elsewhere, but we don't want to crash the repl regardless.
-       *  The compiler makes it impossible to avoid catching Throwable
-       *  with its unfortunate tendency to throw java.lang.Errors and
-       *  AssertionErrors as the hats drop. We take two swings at it
-       *  because there are some spots which like to throw an assertion
-       *  once, then work after that. Yeah, what can I say.
-       */
-      try tryAll
-      catch { case ex: Throwable =>
-        logWarning("Error: complete(%s, %s) provoked".format(buf, cursor) + ex)
-        Candidates(cursor,
-          if (isReplDebug) List("<error:" + ex + ">")
-          else Nil
-        )
-      }
-    }
-  }
-}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala b/repl/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
deleted file mode 100644
index 13cd2b7fa56c7..0000000000000
--- a/repl/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
+++ /dev/null
@@ -1,232 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author  Martin Odersky
- */
-
-package org.apache.spark.repl
-
-import scala.tools.nsc._
-import scala.tools.nsc.interpreter._
-
-import scala.collection.{ mutable, immutable }
-import scala.PartialFunction.cond
-import scala.reflect.internal.Chars
-import scala.reflect.internal.Flags._
-import scala.language.implicitConversions
-
-trait SparkMemberHandlers {
-  val intp: SparkIMain
-
-  import intp.{ Request, global, naming }
-  import global._
-  import naming._
-
-  private def codegenln(leadingPlus: Boolean, xs: String*): String = codegen(leadingPlus, (xs ++ Array("\n")): _*)
-  private def codegenln(xs: String*): String = codegenln(true, xs: _*)
-
-  private def codegen(xs: String*): String = codegen(true, xs: _*)
-  private def codegen(leadingPlus: Boolean, xs: String*): String = {
-    val front = if (leadingPlus) "+ " else ""
-    front + (xs map string2codeQuoted mkString " + ")
-  }
-  private implicit def name2string(name: Name) = name.toString
-
-  /** A traverser that finds all mentioned identifiers, i.e. things
-   *  that need to be imported.  It might return extra names.
-   */
-  private class ImportVarsTraverser extends Traverser {
-    val importVars = new mutable.HashSet[Name]()
-
-    override def traverse(ast: Tree) = ast match {
-      case Ident(name) =>
-        // XXX this is obviously inadequate but it's going to require some effort
-        // to get right.
-        if (name.toString startsWith "x$") ()
-        else importVars += name
-      case _        => super.traverse(ast)
-    }
-  }
-  private object ImportVarsTraverser {
-    def apply(member: Tree) = {
-      val ivt = new ImportVarsTraverser()
-      ivt traverse member
-      ivt.importVars.toList
-    }
-  }
-
-  def chooseHandler(member: Tree): MemberHandler = member match {
-    case member: DefDef        => new DefHandler(member)
-    case member: ValDef        => new ValHandler(member)
-    case member: Assign        => new AssignHandler(member)
-    case member: ModuleDef     => new ModuleHandler(member)
-    case member: ClassDef      => new ClassHandler(member)
-    case member: TypeDef       => new TypeAliasHandler(member)
-    case member: Import        => new ImportHandler(member)
-    case DocDef(_, documented) => chooseHandler(documented)
-    case member                => new GenericHandler(member)
-  }
-
-  sealed abstract class MemberDefHandler(override val member: MemberDef) extends MemberHandler(member) {
-    def symbol          = if (member.symbol eq null) NoSymbol else member.symbol
-    def name: Name      = member.name
-    def mods: Modifiers = member.mods
-    def keyword         = member.keyword
-    def prettyName      = name.decode
-
-    override def definesImplicit = member.mods.isImplicit
-    override def definesTerm: Option[TermName] = Some(name.toTermName) filter (_ => name.isTermName)
-    override def definesType: Option[TypeName] = Some(name.toTypeName) filter (_ => name.isTypeName)
-    override def definedSymbols = if (symbol eq NoSymbol) Nil else List(symbol)
-  }
-
-  /** Class to handle one member among all the members included
-   *  in a single interpreter request.
-   */
-  sealed abstract class MemberHandler(val member: Tree) {
-    def definesImplicit = false
-    def definesValue    = false
-    def isLegalTopLevel = false
-
-    def definesTerm     = Option.empty[TermName]
-    def definesType     = Option.empty[TypeName]
-
-    lazy val referencedNames = ImportVarsTraverser(member)
-    def importedNames        = List[Name]()
-    def definedNames         = definesTerm.toList ++ definesType.toList
-    def definedOrImported    = definedNames ++ importedNames
-    def definedSymbols       = List[Symbol]()
-
-    def extraCodeToEvaluate(req: Request): String = ""
-    def resultExtractionCode(req: Request): String = ""
-
-    private def shortName = this.getClass.toString split '.' last
-    override def toString = shortName + referencedNames.mkString(" (refs: ", ", ", ")")
-  }
-
-  class GenericHandler(member: Tree) extends MemberHandler(member)
-
-  class ValHandler(member: ValDef) extends MemberDefHandler(member) {
-    val maxStringElements = 1000  // no need to mkString billions of elements
-    override def definesValue = true
-
-    override def resultExtractionCode(req: Request): String = {
-      val isInternal = isUserVarName(name) && req.lookupTypeOf(name) == "Unit"
-      if (!mods.isPublic || isInternal) ""
-      else {
-        // if this is a lazy val we avoid evaluating it here
-        val resultString =
-          if (mods.isLazy) codegenln(false, "<lazy>")
-          else any2stringOf(req fullPath name, maxStringElements)
-
-        val vidString =
-          if (replProps.vids) """" + " @ " + "%%8x".format(System.identityHashCode(%s)) + " """.trim.format(req fullPath name)
-          else ""
-
-        """ + "%s%s: %s = " + %s""".format(string2code(prettyName), vidString, string2code(req typeOf name), resultString)
-      }
-    }
-  }
-
-  class DefHandler(member: DefDef) extends MemberDefHandler(member) {
-    private def vparamss = member.vparamss
-    private def isMacro = member.symbol hasFlag MACRO
-    // true if not a macro and 0-arity
-    override def definesValue = !isMacro && flattensToEmpty(vparamss)
-    override def resultExtractionCode(req: Request) =
-      if (mods.isPublic) codegenln(name, ": ", req.typeOf(name)) else ""
-  }
-
-  class AssignHandler(member: Assign) extends MemberHandler(member) {
-    val Assign(lhs, rhs) = member
-    val name = newTermName(freshInternalVarName())
-
-    override def definesTerm = Some(name)
-    override def definesValue = true
-    override def extraCodeToEvaluate(req: Request) =
-      """val %s = %s""".format(name, lhs)
-
-    /** Print out lhs instead of the generated varName */
-    override def resultExtractionCode(req: Request) = {
-      val lhsType = string2code(req lookupTypeOf name)
-      val res     = string2code(req fullPath name)
-      """ + "%s: %s = " + %s + "\n" """.format(string2code(lhs.toString), lhsType, res) + "\n"
-    }
-  }
-
-  class ModuleHandler(module: ModuleDef) extends MemberDefHandler(module) {
-    override def definesTerm = Some(name)
-    override def definesValue = true
-    override def isLegalTopLevel = true
-
-    override def resultExtractionCode(req: Request) = codegenln("defined module ", name)
-  }
-
-  class ClassHandler(member: ClassDef) extends MemberDefHandler(member) {
-    override def definesType = Some(name.toTypeName)
-    override def definesTerm = Some(name.toTermName) filter (_ => mods.isCase)
-    override def isLegalTopLevel = true
-
-    override def resultExtractionCode(req: Request) =
-      codegenln("defined %s %s".format(keyword, name))
-  }
-
-  class TypeAliasHandler(member: TypeDef) extends MemberDefHandler(member) {
-    private def isAlias = mods.isPublic && treeInfo.isAliasTypeDef(member)
-    override def definesType = Some(name.toTypeName) filter (_ => isAlias)
-
-    override def resultExtractionCode(req: Request) =
-      codegenln("defined type alias ", name) + "\n"
-  }
-
-  class ImportHandler(imp: Import) extends MemberHandler(imp) {
-    val Import(expr, selectors) = imp
-    def targetType: Type = intp.typeOfExpression("" + expr)
-    override def isLegalTopLevel = true
-
-    def createImportForName(name: Name): String = {
-      selectors foreach {
-        case sel @ ImportSelector(old, _, `name`, _)  => return "import %s.{ %s }".format(expr, sel)
-        case _ => ()
-      }
-      "import %s.%s".format(expr, name)
-    }
-    // TODO: Need to track these specially to honor Predef masking attempts,
-    // because they must be the leading imports in the code generated for each
-    // line.  We can use the same machinery as Contexts now, anyway.
-    def isPredefImport = isReferenceToPredef(expr)
-
-    // wildcard imports, e.g. import foo._
-    private def selectorWild    = selectors filter (_.name == nme.USCOREkw)
-    // renamed imports, e.g. import foo.{ bar => baz }
-    private def selectorRenames = selectors map (_.rename) filterNot (_ == null)
-
-    /** Whether this import includes a wildcard import */
-    val importsWildcard = selectorWild.nonEmpty
-
-    /** Whether anything imported is implicit .*/
-    def importsImplicit = implicitSymbols.nonEmpty
-
-    def implicitSymbols = importedSymbols filter (_.isImplicit)
-    def importedSymbols = individualSymbols ++ wildcardSymbols
-
-    lazy val individualSymbols: List[Symbol] =
-      beforePickler(individualNames map (targetType nonPrivateMember _))
-
-    lazy val wildcardSymbols: List[Symbol] =
-      if (importsWildcard) beforePickler(targetType.nonPrivateMembers.toList)
-      else Nil
-
-    /** Complete list of names imported by a wildcard */
-    lazy val wildcardNames: List[Name]   = wildcardSymbols map (_.name)
-    lazy val individualNames: List[Name] = selectorRenames filterNot (_ == nme.USCOREkw) flatMap (_.bothNames)
-
-    /** The names imported by this statement */
-    override lazy val importedNames: List[Name] = wildcardNames ++ individualNames
-    lazy val importsSymbolNamed: Set[String] = importedNames map (_.toString) toSet
-
-    def importString = imp.toString
-    override def resultExtractionCode(req: Request) = codegenln(importString) + "\n"
-  }
-}
diff --git a/repl/src/test/resources/log4j.properties b/repl/src/test/resources/log4j.properties
index 52098993f5c3c..e7e4a4113174a 100644
--- a/repl/src/test/resources/log4j.properties
+++ b/repl/src/test/resources/log4j.properties
@@ -15,10 +15,10 @@
 # limitations under the License.
 #
 
-# Set everything to be logged to the repl/target/unit-tests.log
+# Set everything to be logged to the target/unit-tests.log
 log4j.rootCategory=INFO, file
 log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.append=false
+log4j.appender.file.append=true
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
deleted file mode 100644
index 91c9c52c3c98a..0000000000000
--- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.repl
-
-import java.io._
-import java.net.URLClassLoader
-
-import scala.collection.mutable.ArrayBuffer
-
-import org.scalatest.FunSuite
-import org.apache.spark.SparkContext
-import org.apache.commons.lang3.StringEscapeUtils
-import org.apache.spark.util.Utils
-
-
-class ReplSuite extends FunSuite {
-
-  def runInterpreter(master: String, input: String): String = {
-    val CONF_EXECUTOR_CLASSPATH = "spark.executor.extraClassPath"
-
-    val in = new BufferedReader(new StringReader(input + "\n"))
-    val out = new StringWriter()
-    val cl = getClass.getClassLoader
-    var paths = new ArrayBuffer[String]
-    if (cl.isInstanceOf[URLClassLoader]) {
-      val urlLoader = cl.asInstanceOf[URLClassLoader]
-      for (url <- urlLoader.getURLs) {
-        if (url.getProtocol == "file") {
-          paths += url.getFile
-        }
-      }
-    }
-    val classpath = paths.mkString(File.pathSeparator)
-
-    val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH)
-    System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath)
-
-    val interp = new SparkILoop(in, new PrintWriter(out), master)
-    org.apache.spark.repl.Main.interp = interp
-    interp.process(Array("-classpath", classpath))
-    org.apache.spark.repl.Main.interp = null
-    if (interp.sparkContext != null) {
-      interp.sparkContext.stop()
-    }
-    if (oldExecutorClasspath != null) {
-      System.setProperty(CONF_EXECUTOR_CLASSPATH, oldExecutorClasspath)
-    } else {
-      System.clearProperty(CONF_EXECUTOR_CLASSPATH)
-    }
-    return out.toString
-  }
-
-  def assertContains(message: String, output: String) {
-    val isContain = output.contains(message)
-    assert(isContain,
-      "Interpreter output did not contain '" + message + "':\n" + output)
-  }
-
-  def assertDoesNotContain(message: String, output: String) {
-    val isContain = output.contains(message)
-    assert(!isContain,
-      "Interpreter output contained '" + message + "':\n" + output)
-  }
-
-  test("propagation of local properties") {
-    // A mock ILoop that doesn't install the SIGINT handler.
-    class ILoop(out: PrintWriter) extends SparkILoop(None, out, None) {
-      settings = new scala.tools.nsc.Settings
-      settings.usejavacp.value = true
-      org.apache.spark.repl.Main.interp = this
-      override def createInterpreter() {
-        intp = new SparkILoopInterpreter
-        intp.setContextClassLoader()
-      }
-    }
-
-    val out = new StringWriter()
-    val interp = new ILoop(new PrintWriter(out))
-    interp.sparkContext = new SparkContext("local", "repl-test")
-    interp.createInterpreter()
-    interp.intp.initialize()
-    interp.sparkContext.setLocalProperty("someKey", "someValue")
-
-    // Make sure the value we set in the caller to interpret is propagated in the thread that
-    // interprets the command.
-    interp.interpret("org.apache.spark.repl.Main.interp.sparkContext.getLocalProperty(\"someKey\")")
-    assert(out.toString.contains("someValue"))
-
-    interp.sparkContext.stop()
-    System.clearProperty("spark.driver.port")
-  }
-
-  test("simple foreach with accumulator") {
-    val output = runInterpreter("local",
-      """
-        |val accum = sc.accumulator(0)
-        |sc.parallelize(1 to 10).foreach(x => accum += x)
-        |accum.value
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res1: Int = 55", output)
-  }
-
-  test("external vars") {
-    val output = runInterpreter("local",
-      """
-        |var v = 7
-        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
-        |v = 10
-        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 70", output)
-    assertContains("res1: Int = 100", output)
-  }
-
-  test("external classes") {
-    val output = runInterpreter("local",
-      """
-        |class C {
-        |def foo = 5
-        |}
-        |sc.parallelize(1 to 10).map(x => (new C).foo).collect.reduceLeft(_+_)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 50", output)
-  }
-
-  test("external functions") {
-    val output = runInterpreter("local",
-      """
-        |def double(x: Int) = x + x
-        |sc.parallelize(1 to 10).map(x => double(x)).collect.reduceLeft(_+_)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 110", output)
-  }
-
-  test("external functions that access vars") {
-    val output = runInterpreter("local",
-      """
-        |var v = 7
-        |def getV() = v
-        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-        |v = 10
-        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 70", output)
-    assertContains("res1: Int = 100", output)
-  }
-
-  test("broadcast vars") {
-    // Test that the value that a broadcast var had when it was created is used,
-    // even if that variable is then modified in the driver program
-    // TODO: This doesn't actually work for arrays when we run in local mode!
-    val output = runInterpreter("local",
-      """
-        |var array = new Array[Int](5)
-        |val broadcastArray = sc.broadcast(array)
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-        |array(0) = 5
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-    assertContains("res2: Array[Int] = Array(5, 0, 0, 0, 0)", output)
-  }
-
-  test("interacting with files") {
-    val tempDir = Utils.createTempDir()
-    val out = new FileWriter(tempDir + "/input")
-    out.write("Hello world!\n")
-    out.write("What's up?\n")
-    out.write("Goodbye\n")
-    out.close()
-    val output = runInterpreter("local",
-      """
-        |var file = sc.textFile("%s").cache()
-        |file.count()
-        |file.count()
-        |file.count()
-      """.stripMargin.format(StringEscapeUtils.escapeJava(
-        tempDir.getAbsolutePath + File.separator + "input")))
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Long = 3", output)
-    assertContains("res1: Long = 3", output)
-    assertContains("res2: Long = 3", output)
-    Utils.deleteRecursively(tempDir)
-  }
-
-  test("local-cluster mode") {
-    val output = runInterpreter("local-cluster[1,1,512]",
-      """
-        |var v = 7
-        |def getV() = v
-        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-        |v = 10
-        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-        |var array = new Array[Int](5)
-        |val broadcastArray = sc.broadcast(array)
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-        |array(0) = 5
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 70", output)
-    assertContains("res1: Int = 100", output)
-    assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-    assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-  }
-
-  test("SPARK-1199 two instances of same class don't type check.") {
-    val output = runInterpreter("local-cluster[1,1,512]",
-      """
-        |case class Sum(exp: String, exp2: String)
-        |val a = Sum("A", "B")
-        |def b(a: Sum): String = a match { case Sum(_, _) => "Found Sum" }
-        |b(a)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-  }
-
-  test("SPARK-2452 compound statements.") {
-    val output = runInterpreter("local",
-      """
-        |val x = 4 ; def f() = x
-        |f()
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-  }
-
-  test("SPARK-2576 importing SQLContext.createSchemaRDD.") {
-    // We need to use local-cluster to test this case.
-    val output = runInterpreter("local-cluster[1,1,512]",
-      """
-        |val sqlContext = new org.apache.spark.sql.SQLContext(sc)
-        |import sqlContext.createSchemaRDD
-        |case class TestCaseClass(value: Int)
-        |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).toSchemaRDD.collect
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-  }
-
-  test("SPARK-2632 importing a method from non serializable class and not using it.") {
-    val output = runInterpreter("local",
-    """
-      |class TestClass() { def testMethod = 3 }
-      |val t = new TestClass
-      |import t.testMethod
-      |case class TestCaseClass(value: Int)
-      |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).collect
-    """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-  }
-
-  if (System.getenv("MESOS_NATIVE_LIBRARY") != null) {
-    test("running on Mesos") {
-      val output = runInterpreter("localquiet",
-        """
-          |var v = 7
-          |def getV() = v
-          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-          |v = 10
-          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-          |var array = new Array[Int](5)
-          |val broadcastArray = sc.broadcast(array)
-          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-          |array(0) = 5
-          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-        """.stripMargin)
-      assertDoesNotContain("error:", output)
-      assertDoesNotContain("Exception", output)
-      assertContains("res0: Int = 70", output)
-      assertContains("res1: Int = 100", output)
-      assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-      assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-    }
-  }
-
-  test("collecting objects of class defined in repl") {
-    val output = runInterpreter("local[2]",
-      """
-        |case class Foo(i: Int)
-        |val ret = sc.parallelize((1 to 100).map(Foo), 10).collect
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("ret: Array[Foo] = Array(Foo(1),", output)
-  }
-}
diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh
index 1d154e62ed5b6..b0361d72d3f2c 100755
--- a/sbin/spark-config.sh
+++ b/sbin/spark-config.sh
@@ -20,7 +20,7 @@
 # also should not be passed any arguments, since we need original $*
 
 # resolve links - $0 may be a softlink
-this="${BASH_SOURCE-$0}"
+this="${BASH_SOURCE:-$0}"
 common_bin="$(cd -P -- "$(dirname -- "$this")" && pwd -P)"
 script="$(basename -- "$this")"
 this="$common_bin/$script"
diff --git a/sbin/spark-daemon.sh b/sbin/spark-daemon.sh
index cba475e2dd8c8..89608bc41b71d 100755
--- a/sbin/spark-daemon.sh
+++ b/sbin/spark-daemon.sh
@@ -21,8 +21,8 @@
 #
 # Environment Variables
 #
-#   SPARK_CONF_DIR  Alternate conf dir. Default is ${SPARK_PREFIX}/conf.
-#   SPARK_LOG_DIR   Where log files are stored.  PWD by default.
+#   SPARK_CONF_DIR  Alternate conf dir. Default is ${SPARK_HOME}/conf.
+#   SPARK_LOG_DIR   Where log files are stored. ${SPARK_HOME}/logs by default.
 #   SPARK_MASTER    host:path where spark code should be rsync'd from
 #   SPARK_PID_DIR   The pid files are stored. /tmp by default.
 #   SPARK_IDENT_STRING   A string representing this instance of spark. $USER by default
diff --git a/sbin/spark-executor b/sbin/spark-executor
deleted file mode 100755
index 674ce906d9421..0000000000000
--- a/sbin/spark-executor
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/sh
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
-
-export PYTHONPATH="$FWDIR/python:$PYTHONPATH"
-export PYTHONPATH="$FWDIR/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
-
-echo "Running spark-executor with framework dir = $FWDIR"
-exec "$FWDIR"/bin/spark-class org.apache.spark.executor.MesosExecutorBackend
diff --git a/sbin/start-thriftserver.sh b/sbin/start-thriftserver.sh
index 50e8e06418b07..070cc7a87e6f2 100755
--- a/sbin/start-thriftserver.sh
+++ b/sbin/start-thriftserver.sh
@@ -26,6 +26,8 @@ set -o posix
 # Figure out where Spark is installed
 FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 
+# NOTE: This exact class name is matched downstream by SparkSubmit.
+# Any changes need to be reflected there.
 CLASS="org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"
 
 function usage {
diff --git a/sbin/stop-all.sh b/sbin/stop-all.sh
index 298c6a9859795..971d5d49da664 100755
--- a/sbin/stop-all.sh
+++ b/sbin/stop-all.sh
@@ -30,3 +30,20 @@ sbin="`cd "$sbin"; pwd`"
 # Stop the slaves, then the master
 "$sbin"/stop-slaves.sh
 "$sbin"/stop-master.sh
+
+if [ "$1" == "--wait" ]
+then
+  printf "Waiting for workers to shut down..."
+  while true
+  do
+    running=`$sbin/slaves.sh ps -ef | grep -v grep | grep deploy.worker.Worker`
+    if [ -z "$running" ]
+    then
+      printf "\nAll workers successfully shut down.\n"
+      break
+    else
+      printf "."
+      sleep 10
+    fi
+  done
+fi
diff --git a/sbt/sbt b/sbt/sbt
index c172fa74bc771..41438251f681e 100755
--- a/sbt/sbt
+++ b/sbt/sbt
@@ -1,111 +1,29 @@
 #!/usr/bin/env bash
 
-# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so
-# that we can run Hive to generate the golden answer.  This is not required for normal development
-# or testing.
-for i in "$HIVE_HOME"/lib/*
-do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i"
-done
-export HADOOP_CLASSPATH
-
-realpath () {
-(
-  TARGET_FILE="$1"
-
-  cd "$(dirname "$TARGET_FILE")"
-  TARGET_FILE="$(basename "$TARGET_FILE")"
-
-  COUNT=0
-  while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ]
-  do
-      TARGET_FILE="$(readlink "$TARGET_FILE")"
-      cd $(dirname "$TARGET_FILE")
-      TARGET_FILE="$(basename $TARGET_FILE)"
-      COUNT=$(($COUNT + 1))
-  done
-
-  echo "$(pwd -P)/"$TARGET_FILE""
-)
-}
-
-. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash
-
-
-declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
-declare -r sbt_opts_file=".sbtopts"
-declare -r etc_sbt_opts_file="/etc/sbt/sbtopts"
-
-usage() {
- cat <<EOM
-Usage: $script_name [options]
-
-  -h | -help         print this message
-  -v | -verbose      this runner is chattier
-  -d | -debug        set sbt log level to debug
-  -no-colors         disable ANSI color codes
-  -sbt-create        start sbt even if current directory contains no sbt project
-  -sbt-dir   <path>  path to global settings/plugins directory (default: ~/.sbt)
-  -sbt-boot  <path>  path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
-  -ivy       <path>  path to local Ivy repository (default: ~/.ivy2)
-  -mem    <integer>  set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem))
-  -no-share          use all local caches; no sharing
-  -no-global         uses global caches, but does not use global ~/.sbt directory.
-  -jvm-debug <port>  Turn on JVM debugging, open at the given port.
-  -batch             Disable interactive mode
-
-  # sbt version (default: from project/build.properties if present, else latest release)
-  -sbt-version  <version>   use the specified version of sbt
-  -sbt-jar      <path>      use the specified jar as the sbt launcher
-  -sbt-rc                   use an RC version of sbt
-  -sbt-snapshot             use a snapshot version of sbt
-
-  # java version (default: java from PATH, currently $(java -version 2>&1 | grep version))
-  -java-home <path>         alternate JAVA_HOME
-
-  # jvm options and output control
-  JAVA_OPTS          environment variable, if unset uses "$java_opts"
-  SBT_OPTS           environment variable, if unset uses "$default_sbt_opts"
-  .sbtopts           if this file exists in the current directory, it is
-                     prepended to the runner args
-  /etc/sbt/sbtopts   if this file exists, it is prepended to the runner args
-  -Dkey=val          pass -Dkey=val directly to the java runtime
-  -J-X               pass option -X directly to the java runtime
-                     (-J is stripped)
-  -S-X               add -X to sbt's scalacOptions (-J is stripped)
-  -PmavenProfiles     Enable a maven profile for the build.
-
-In the case of duplicated or conflicting options, the order above
-shows precedence: JAVA_OPTS lowest, command line options highest.
-EOM
-}
-
-process_my_args () {
-  while [[ $# -gt 0 ]]; do
-    case "$1" in
-     -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;;
-      -no-share) addJava "$noshare_opts" && shift ;;
-     -no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;;
-      -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;;
-       -sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;;
-     -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;;
-         -batch) exec </dev/null && shift ;;
-
-    -sbt-create) sbt_create=true && shift ;;
-
-              *) addResidual "$1" && shift ;;
-    esac
-  done
-
-  # Now, ensure sbt version is used.
-  [[ "${sbt_version}XXX" != "XXX" ]] && addJava "-Dsbt.version=$sbt_version"
-}
-
-loadConfigFile() {
-  cat "$1" | sed '/^\#/d'
-}
-
-# if sbtopts files exist, prepend their contents to $@ so it can be processed by this runner
-[[ -f "$etc_sbt_opts_file" ]] && set -- $(loadConfigFile "$etc_sbt_opts_file") "$@"
-[[ -f "$sbt_opts_file" ]] && set -- $(loadConfigFile "$sbt_opts_file") "$@"
-
-run "$@"
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Determine the current working directory
+_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+echo "NOTE: The sbt/sbt script has been relocated to build/sbt." >&2
+echo "      Please update references to point to the new location." >&2
+echo "" >&2
+echo "      Invoking 'build/sbt $@' now ..." >&2
+echo "" >&2
+
+${_DIR}/../build/sbt "$@"
diff --git a/sql/README.md b/sql/README.md
index c84534da9a3d3..254ab8eb6376f 100644
--- a/sql/README.md
+++ b/sql/README.md
@@ -22,59 +22,34 @@ export HADOOP_HOME="<path to>/hadoop-1.0.4"
 
 Using the console
 =================
-An interactive scala console can be invoked by running `sbt/sbt hive/console`.  From here you can execute queries and inspect the various stages of query optimization.
+An interactive scala console can be invoked by running `build/sbt hive/console`.
+From here you can execute queries with HiveQl and manipulate DataFrame by using DSL.
 
 ```scala
-catalyst$ sbt/sbt hive/console
+catalyst$ build/sbt hive/console
 
 [info] Starting scala interpreter...
-import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.dsl._
-import org.apache.spark.sql.catalyst.errors._
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.catalyst.types._
-import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.execution
+import org.apache.spark.sql.Dsl._
 import org.apache.spark.sql.hive._
-import org.apache.spark.sql.hive.TestHive._
+import org.apache.spark.sql.hive.test.TestHive._
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.parquet.ParquetTestData
 Welcome to Scala version 2.10.4 (Java HotSpot(TM) 64-Bit Server VM, Java 1.7.0_45).
 Type in expressions to have them evaluated.
 Type :help for more information.
 
 scala> val query = sql("SELECT * FROM (SELECT * FROM src) a")
-query: org.apache.spark.sql.SchemaRDD =
-== Query Plan ==
-== Physical Plan ==
-HiveTableScan [key#10,value#11], (MetastoreRelation default, src, None), None
+query: org.apache.spark.sql.DataFrame = org.apache.spark.sql.DataFrame@74448eed
 ```
 
-Query results are RDDs and can be operated as such.
+Query results are `DataFrames` and can be operated as such.
 ```
 scala> query.collect()
 res2: Array[org.apache.spark.sql.Row] = Array([238,val_238], [86,val_86], [311,val_311], [27,val_27]...
 ```
 
-You can also build further queries on top of these RDDs using the query DSL.
+You can also build further queries on top of these `DataFrames` using the query DSL.
 ```
-scala> query.where('key === 100).collect()
-res3: Array[org.apache.spark.sql.Row] = Array([100,val_100], [100,val_100])
-```
-
-From the console you can even write rules that transform query plans.  For example, the above query has redundant project operators that aren't doing anything.  This redundancy can be eliminated using the `transform` function that is available on all [`TreeNode`](https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala) objects.
-```scala
-scala> query.queryExecution.analyzed
-res4: org.apache.spark.sql.catalyst.plans.logical.LogicalPlan =
-Project [key#10,value#11]
- Project [key#10,value#11]
-  MetastoreRelation default, src, None
-
-
-scala> query.queryExecution.analyzed transform {
-     |   case Project(projectList, child) if projectList == child.output => child
-     | }
-res5: res17: org.apache.spark.sql.catalyst.plans.logical.LogicalPlan =
-Project [key#10,value#11]
- MetastoreRelation default, src, None
+scala> query.where('key > 30).select(avg('key)).collect()
+res3: Array[org.apache.spark.sql.Row] = Array([274.79025423728814])
 ```
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 0d756f873e486..a1947fb022e54 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -44,21 +44,12 @@
       <groupId>org.scala-lang</groupId>
       <artifactId>scala-reflect</artifactId>
     </dependency>
-    <dependency>
-      <groupId>org.scalamacros</groupId>
-      <artifactId>quasiquotes_${scala.binary.version}</artifactId>
-      <version>${scala.macros.version}</version>
-    </dependency>
+
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
@@ -69,11 +60,6 @@
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-      </plugin>
-
       <!--
            This plugin forces the generation of jar containing catalyst test classes,
            so that the tests classes of external modules can use them. The two execution profiles
@@ -103,4 +89,20 @@
       </plugin>
     </plugins>
   </build>
+  <profiles>
+    <!-- Quasiquotes are merged into scala reflect from scala 2.11 onwards. -->
+    <profile>
+      <id>scala-2.10</id>
+      <activation>
+        <property><name>!scala-2.11</name></property>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.scalamacros</groupId>
+          <artifactId>quasiquotes_${scala.binary.version}</artifactId>
+          <version>${scala.macros.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
 </project>
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/RowFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/RowFactory.java
new file mode 100644
index 0000000000000..5ed60fe78d116
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/RowFactory.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql;
+
+import org.apache.spark.sql.catalyst.expressions.GenericRow;
+
+/**
+ * A factory class used to construct {@link Row} objects.
+ */
+public class RowFactory {
+
+  /**
+   * Create a {@link Row} from the given arguments. Position i in the argument list becomes
+   * position i in the created {@link Row} object.
+   */
+  public static Row create(Object ... values) {
+    return new GenericRow(values);
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java b/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java
new file mode 100644
index 0000000000000..e457542c647e7
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.types;
+
+import java.util.*;
+
+/**
+ * To get/create specific data type, users should use singleton objects and factory methods
+ * provided by this class.
+ */
+public class DataTypes {
+  /**
+   * Gets the StringType object.
+   */
+  public static final DataType StringType = StringType$.MODULE$;
+
+  /**
+   * Gets the BinaryType object.
+   */
+  public static final DataType BinaryType = BinaryType$.MODULE$;
+
+  /**
+   * Gets the BooleanType object.
+   */
+  public static final DataType BooleanType = BooleanType$.MODULE$;
+
+  /**
+   * Gets the DateType object.
+   */
+  public static final DataType DateType = DateType$.MODULE$;
+
+  /**
+   * Gets the TimestampType object.
+   */
+  public static final DataType TimestampType = TimestampType$.MODULE$;
+
+  /**
+   * Gets the DoubleType object.
+   */
+  public static final DataType DoubleType = DoubleType$.MODULE$;
+
+  /**
+   * Gets the FloatType object.
+   */
+  public static final DataType FloatType = FloatType$.MODULE$;
+
+  /**
+   * Gets the ByteType object.
+   */
+  public static final DataType ByteType = ByteType$.MODULE$;
+
+  /**
+   * Gets the IntegerType object.
+   */
+  public static final DataType IntegerType = IntegerType$.MODULE$;
+
+  /**
+   * Gets the LongType object.
+   */
+  public static final DataType LongType = LongType$.MODULE$;
+
+  /**
+   * Gets the ShortType object.
+   */
+  public static final DataType ShortType = ShortType$.MODULE$;
+
+  /**
+   * Gets the NullType object.
+   */
+  public static final DataType NullType = NullType$.MODULE$;
+
+  /**
+   * Creates an ArrayType by specifying the data type of elements ({@code elementType}).
+   * The field of {@code containsNull} is set to {@code true}.
+   */
+  public static ArrayType createArrayType(DataType elementType) {
+    if (elementType == null) {
+      throw new IllegalArgumentException("elementType should not be null.");
+    }
+    return new ArrayType(elementType, true);
+  }
+
+  /**
+   * Creates an ArrayType by specifying the data type of elements ({@code elementType}) and
+   * whether the array contains null values ({@code containsNull}).
+   */
+  public static ArrayType createArrayType(DataType elementType, boolean containsNull) {
+    if (elementType == null) {
+      throw new IllegalArgumentException("elementType should not be null.");
+    }
+    return new ArrayType(elementType, containsNull);
+  }
+
+  public static DecimalType createDecimalType(int precision, int scale) {
+    return DecimalType$.MODULE$.apply(precision, scale);
+  }
+
+  public static DecimalType createDecimalType() {
+    return DecimalType$.MODULE$.Unlimited();
+  }
+
+  /**
+   * Creates a MapType by specifying the data type of keys ({@code keyType}) and values
+   * ({@code keyType}). The field of {@code valueContainsNull} is set to {@code true}.
+   */
+  public static MapType createMapType(DataType keyType, DataType valueType) {
+    if (keyType == null) {
+      throw new IllegalArgumentException("keyType should not be null.");
+    }
+    if (valueType == null) {
+      throw new IllegalArgumentException("valueType should not be null.");
+    }
+    return new MapType(keyType, valueType, true);
+  }
+
+  /**
+   * Creates a MapType by specifying the data type of keys ({@code keyType}), the data type of
+   * values ({@code keyType}), and whether values contain any null value
+   * ({@code valueContainsNull}).
+   */
+  public static MapType createMapType(
+      DataType keyType,
+      DataType valueType,
+      boolean valueContainsNull) {
+    if (keyType == null) {
+      throw new IllegalArgumentException("keyType should not be null.");
+    }
+    if (valueType == null) {
+      throw new IllegalArgumentException("valueType should not be null.");
+    }
+    return new MapType(keyType, valueType, valueContainsNull);
+  }
+
+  /**
+   * Creates a StructField by specifying the name ({@code name}), data type ({@code dataType}) and
+   * whether values of this field can be null values ({@code nullable}).
+   */
+  public static StructField createStructField(
+      String name,
+      DataType dataType,
+      boolean nullable,
+      Metadata metadata) {
+    if (name == null) {
+      throw new IllegalArgumentException("name should not be null.");
+    }
+    if (dataType == null) {
+      throw new IllegalArgumentException("dataType should not be null.");
+    }
+    if (metadata == null) {
+      throw new IllegalArgumentException("metadata should not be null.");
+    }
+    return new StructField(name, dataType, nullable, metadata);
+  }
+
+  /**
+   * Creates a StructField with empty metadata.
+   *
+   * @see #createStructField(String, DataType, boolean, Metadata)
+   */
+  public static StructField createStructField(String name, DataType dataType, boolean nullable) {
+    return createStructField(name, dataType, nullable, (new MetadataBuilder()).build());
+  }
+
+  /**
+   * Creates a StructType with the given list of StructFields ({@code fields}).
+   */
+  public static StructType createStructType(List<StructField> fields) {
+    return createStructType(fields.toArray(new StructField[0]));
+  }
+
+  /**
+   * Creates a StructType with the given StructField array ({@code fields}).
+   */
+  public static StructType createStructType(StructField[] fields) {
+    if (fields == null) {
+      throw new IllegalArgumentException("fields should not be null.");
+    }
+    Set<String> distinctNames = new HashSet<String>();
+    for (StructField field : fields) {
+      if (field == null) {
+        throw new IllegalArgumentException(
+          "fields should not contain any null.");
+      }
+
+      distinctNames.add(field.name());
+    }
+    if (distinctNames.size() != fields.length) {
+      throw new IllegalArgumentException("fields should have distinct names.");
+    }
+
+    return StructType$.MODULE$.apply(fields);
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
new file mode 100644
index 0000000000000..3a70d25534968
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -0,0 +1,369 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import scala.util.hashing.MurmurHash3
+
+import org.apache.spark.sql.catalyst.expressions.GenericRow
+import org.apache.spark.sql.types.DateUtils
+
+object Row {
+  /**
+   * This method can be used to extract fields from a [[Row]] object in a pattern match. Example:
+   * {{{
+   * import org.apache.spark.sql._
+   *
+   * val pairs = sql("SELECT key, value FROM src").rdd.map {
+   *   case Row(key: Int, value: String) =>
+   *     key -> value
+   * }
+   * }}}
+   */
+  def unapplySeq(row: Row): Some[Seq[Any]] = Some(row.toSeq)
+
+  /**
+   * This method can be used to construct a [[Row]] with the given values.
+   */
+  def apply(values: Any*): Row = new GenericRow(values.toArray)
+
+  /**
+   * This method can be used to construct a [[Row]] from a [[Seq]] of values.
+   */
+  def fromSeq(values: Seq[Any]): Row = new GenericRow(values.toArray)
+
+  def fromTuple(tuple: Product): Row = fromSeq(tuple.productIterator.toSeq)
+
+  /**
+   * Merge multiple rows into a single row, one after another.
+   */
+  def merge(rows: Row*): Row = {
+    // TODO: Improve the performance of this if used in performance critical part.
+    new GenericRow(rows.flatMap(_.toSeq).toArray)
+  }
+}
+
+
+/**
+ * Represents one row of output from a relational operator.  Allows both generic access by ordinal,
+ * which will incur boxing overhead for primitives, as well as native primitive access.
+ *
+ * It is invalid to use the native primitive interface to retrieve a value that is null, instead a
+ * user must check `isNullAt` before attempting to retrieve a value that might be null.
+ *
+ * To create a new Row, use [[RowFactory.create()]] in Java or [[Row.apply()]] in Scala.
+ *
+ * A [[Row]] object can be constructed by providing field values. Example:
+ * {{{
+ * import org.apache.spark.sql._
+ *
+ * // Create a Row from values.
+ * Row(value1, value2, value3, ...)
+ * // Create a Row from a Seq of values.
+ * Row.fromSeq(Seq(value1, value2, ...))
+ * }}}
+ *
+ * A value of a row can be accessed through both generic access by ordinal,
+ * which will incur boxing overhead for primitives, as well as native primitive access.
+ * An example of generic access by ordinal:
+ * {{{
+ * import org.apache.spark.sql._
+ *
+ * val row = Row(1, true, "a string", null)
+ * // row: Row = [1,true,a string,null]
+ * val firstValue = row(0)
+ * // firstValue: Any = 1
+ * val fourthValue = row(3)
+ * // fourthValue: Any = null
+ * }}}
+ *
+ * For native primitive access, it is invalid to use the native primitive interface to retrieve
+ * a value that is null, instead a user must check `isNullAt` before attempting to retrieve a
+ * value that might be null.
+ * An example of native primitive access:
+ * {{{
+ * // using the row from the previous example.
+ * val firstValue = row.getInt(0)
+ * // firstValue: Int = 1
+ * val isNull = row.isNullAt(3)
+ * // isNull: Boolean = true
+ * }}}
+ *
+ * In Scala, fields in a [[Row]] object can be extracted in a pattern match. Example:
+ * {{{
+ * import org.apache.spark.sql._
+ *
+ * val pairs = sql("SELECT key, value FROM src").rdd.map {
+ *   case Row(key: Int, value: String) =>
+ *     key -> value
+ * }
+ * }}}
+ *
+ * @group row
+ */
+trait Row extends Serializable {
+  /** Number of elements in the Row. */
+  def size: Int = length
+
+  /** Number of elements in the Row. */
+  def length: Int
+
+  /**
+   * Returns the value at position i. If the value is null, null is returned. The following
+   * is a mapping between Spark SQL types and return types:
+   *
+   * {{{
+   *   BooleanType -> java.lang.Boolean
+   *   ByteType -> java.lang.Byte
+   *   ShortType -> java.lang.Short
+   *   IntegerType -> java.lang.Integer
+   *   FloatType -> java.lang.Float
+   *   DoubleType -> java.lang.Double
+   *   StringType -> String
+   *   DecimalType -> java.math.BigDecimal
+   *
+   *   DateType -> java.sql.Date
+   *   TimestampType -> java.sql.Timestamp
+   *
+   *   BinaryType -> byte array
+   *   ArrayType -> scala.collection.Seq (use getList for java.util.List)
+   *   MapType -> scala.collection.Map (use getJavaMap for java.util.Map)
+   *   StructType -> org.apache.spark.sql.Row
+   * }}}
+   */
+  def apply(i: Int): Any
+
+  /**
+   * Returns the value at position i. If the value is null, null is returned. The following
+   * is a mapping between Spark SQL types and return types:
+   *
+   * {{{
+   *   BooleanType -> java.lang.Boolean
+   *   ByteType -> java.lang.Byte
+   *   ShortType -> java.lang.Short
+   *   IntegerType -> java.lang.Integer
+   *   FloatType -> java.lang.Float
+   *   DoubleType -> java.lang.Double
+   *   StringType -> String
+   *   DecimalType -> java.math.BigDecimal
+   *
+   *   DateType -> java.sql.Date
+   *   TimestampType -> java.sql.Timestamp
+   *
+   *   BinaryType -> byte array
+   *   ArrayType -> scala.collection.Seq (use getList for java.util.List)
+   *   MapType -> scala.collection.Map (use getJavaMap for java.util.Map)
+   *   StructType -> org.apache.spark.sql.Row
+   * }}}
+   */
+  def get(i: Int): Any = apply(i)
+
+  /** Checks whether the value at position i is null. */
+  def isNullAt(i: Int): Boolean
+
+  /**
+   * Returns the value at position i as a primitive boolean.
+   *
+   * @throws ClassCastException when data type does not match.
+   * @throws NullPointerException when value is null.
+   */
+  def getBoolean(i: Int): Boolean
+
+  /**
+   * Returns the value at position i as a primitive byte.
+   *
+   * @throws ClassCastException when data type does not match.
+   * @throws NullPointerException when value is null.
+   */
+  def getByte(i: Int): Byte
+
+  /**
+   * Returns the value at position i as a primitive short.
+   *
+   * @throws ClassCastException when data type does not match.
+   * @throws NullPointerException when value is null.
+   */
+  def getShort(i: Int): Short
+
+  /**
+   * Returns the value at position i as a primitive int.
+   *
+   * @throws ClassCastException when data type does not match.
+   * @throws NullPointerException when value is null.
+   */
+  def getInt(i: Int): Int
+
+  /**
+   * Returns the value at position i as a primitive long.
+   *
+   * @throws ClassCastException when data type does not match.
+   * @throws NullPointerException when value is null.
+   */
+  def getLong(i: Int): Long
+
+  /**
+   * Returns the value at position i as a primitive float.
+   * Throws an exception if the type mismatches or if the value is null.
+   *
+   * @throws ClassCastException when data type does not match.
+   * @throws NullPointerException when value is null.
+   */
+  def getFloat(i: Int): Float
+
+  /**
+   * Returns the value at position i as a primitive double.
+   *
+   * @throws ClassCastException when data type does not match.
+   * @throws NullPointerException when value is null.
+   */
+  def getDouble(i: Int): Double
+
+  /**
+   * Returns the value at position i as a String object.
+   *
+   * @throws ClassCastException when data type does not match.
+   * @throws NullPointerException when value is null.
+   */
+  def getString(i: Int): String
+
+  /**
+   * Returns the value at position i of decimal type as java.math.BigDecimal.
+   *
+   * @throws ClassCastException when data type does not match.
+   */
+  def getDecimal(i: Int): java.math.BigDecimal = apply(i).asInstanceOf[java.math.BigDecimal]
+
+  /**
+   * Returns the value at position i of date type as java.sql.Date.
+   *
+   * @throws ClassCastException when data type does not match.
+   */
+  def getDate(i: Int): java.sql.Date = apply(i).asInstanceOf[java.sql.Date]
+
+  /**
+   * Returns the value at position i of array type as a Scala Seq.
+   *
+   * @throws ClassCastException when data type does not match.
+   */
+  def getSeq[T](i: Int): Seq[T] = apply(i).asInstanceOf[Seq[T]]
+
+  /**
+   * Returns the value at position i of array type as [[java.util.List]].
+   *
+   * @throws ClassCastException when data type does not match.
+   */
+  def getList[T](i: Int): java.util.List[T] = {
+    scala.collection.JavaConversions.seqAsJavaList(getSeq[T](i))
+  }
+
+  /**
+   * Returns the value at position i of map type as a Scala Map.
+   *
+   * @throws ClassCastException when data type does not match.
+   */
+  def getMap[K, V](i: Int): scala.collection.Map[K, V] = apply(i).asInstanceOf[Map[K, V]]
+
+  /**
+   * Returns the value at position i of array type as a [[java.util.Map]].
+   *
+   * @throws ClassCastException when data type does not match.
+   */
+  def getJavaMap[K, V](i: Int): java.util.Map[K, V] = {
+    scala.collection.JavaConversions.mapAsJavaMap(getMap[K, V](i))
+  }
+
+  /**
+   * Returns the value at position i of struct type as an [[Row]] object.
+   *
+   * @throws ClassCastException when data type does not match.
+   */
+  def getStruct(i: Int): Row = getAs[Row](i)
+
+  /**
+   * Returns the value at position i.
+   *
+   * @throws ClassCastException when data type does not match.
+   */
+  def getAs[T](i: Int): T = apply(i).asInstanceOf[T]
+
+  override def toString(): String = s"[${this.mkString(",")}]"
+
+  /**
+   * Make a copy of the current [[Row]] object.
+   */
+  def copy(): Row
+
+  /** Returns true if there are any NULL values in this row. */
+  def anyNull: Boolean = {
+    val len = length
+    var i = 0
+    while (i < len) {
+      if (isNullAt(i)) { return true }
+      i += 1
+    }
+    false
+  }
+
+  override def equals(that: Any): Boolean = that match {
+    case null => false
+    case that: Row =>
+      if (this.length != that.length) {
+        return false
+      }
+      var i = 0
+      val len = this.length
+      while (i < len) {
+        if (apply(i) != that.apply(i)) {
+          return false
+        }
+        i += 1
+      }
+      true
+    case _ => false
+  }
+
+  override def hashCode: Int = {
+    // Using Scala's Seq hash code implementation.
+    var n = 0
+    var h = MurmurHash3.seqSeed
+    val len = length
+    while (n < len) {
+      h = MurmurHash3.mix(h, apply(n).##)
+      n += 1
+    }
+    MurmurHash3.finalizeHash(h, n)
+  }
+
+  /* ---------------------- utility methods for Scala ---------------------- */
+
+  /**
+   * Return a Scala Seq representing the row. ELements are placed in the same order in the Seq.
+   */
+  def toSeq: Seq[Any]
+
+  /** Displays all elements of this sequence in a string (without a separator). */
+  def mkString: String = toSeq.mkString
+
+  /** Displays all elements of this sequence in a string using a separator string. */
+  def mkString(sep: String): String = toSeq.mkString(sep)
+
+  /**
+   * Displays all elements of this traversable or iterator in a string using
+   * start, end, and separator strings.
+   */
+  def mkString(start: String, sep: String, end: String): String = toSeq.mkString(start, sep, end)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala
new file mode 100644
index 0000000000000..366be00473d1c
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import scala.language.implicitConversions
+import scala.util.parsing.combinator.lexical.StdLexical
+import scala.util.parsing.combinator.syntactical.StandardTokenParsers
+import scala.util.parsing.combinator.{PackratParsers, RegexParsers}
+import scala.util.parsing.input.CharArrayReader.EofCh
+
+import org.apache.spark.sql.catalyst.plans.logical._
+
+private[sql] object KeywordNormalizer {
+  def apply(str: String) = str.toLowerCase()
+}
+
+private[sql] abstract class AbstractSparkSQLParser
+  extends StandardTokenParsers with PackratParsers {
+
+  def apply(input: String): LogicalPlan = {
+    // Initialize the Keywords.
+    lexical.initialize(reservedWords)
+    phrase(start)(new lexical.Scanner(input)) match {
+      case Success(plan, _) => plan
+      case failureOrError => sys.error(failureOrError.toString)
+    }
+  }
+
+  protected case class Keyword(str: String) {
+    def normalize = KeywordNormalizer(str)
+    def parser: Parser[String] = normalize
+  }
+
+  protected implicit def asParser(k: Keyword): Parser[String] = k.parser
+
+  // By default, use Reflection to find the reserved words defined in the sub class.
+  // NOTICE, Since the Keyword properties defined by sub class, we couldn't call this
+  // method during the parent class instantiation, because the sub class instance
+  // isn't created yet.
+  protected lazy val reservedWords: Seq[String] =
+    this
+      .getClass
+      .getMethods
+      .filter(_.getReturnType == classOf[Keyword])
+      .map(_.invoke(this).asInstanceOf[Keyword].normalize)
+
+  // Set the keywords as empty by default, will change that later.
+  override val lexical = new SqlLexical
+
+  protected def start: Parser[LogicalPlan]
+
+  // Returns the whole input string
+  protected lazy val wholeInput: Parser[String] = new Parser[String] {
+    def apply(in: Input): ParseResult[String] =
+      Success(in.source.toString, in.drop(in.source.length()))
+  }
+
+  // Returns the rest of the input string that are not parsed yet
+  protected lazy val restInput: Parser[String] = new Parser[String] {
+    def apply(in: Input): ParseResult[String] =
+      Success(
+        in.source.subSequence(in.offset, in.source.length()).toString,
+        in.drop(in.source.length()))
+  }
+}
+
+class SqlLexical extends StdLexical {
+  case class FloatLit(chars: String) extends Token {
+    override def toString = chars
+  }
+
+  /* This is a work around to support the lazy setting */
+  def initialize(keywords: Seq[String]): Unit = {
+    reserved.clear()
+    reserved ++= keywords
+  }
+
+  delimiters += (
+    "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
+    ",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~", "<=>"
+  )
+
+  protected override def processIdent(name: String) = {
+    val token = KeywordNormalizer(name)
+    if (reserved contains token) Keyword(token) else Identifier(name)
+  }
+
+  override lazy val token: Parser[Token] =
+    ( identChar ~ (identChar | digit).* ^^
+      { case first ~ rest => processIdent((first :: rest).mkString) }
+    | rep1(digit) ~ ('.' ~> digit.*).? ^^ {
+        case i ~ None    => NumericLit(i.mkString)
+        case i ~ Some(d) => FloatLit(i.mkString + "." + d.mkString)
+      }
+    | '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^
+      { case chars => StringLit(chars mkString "") }
+    | '"' ~> chrExcept('"', '\n', EofCh).* <~ '"' ^^
+      { case chars => StringLit(chars mkString "") }
+    | '`' ~> chrExcept('`', '\n', EofCh).* <~ '`' ^^
+      { case chars => Identifier(chars mkString "") }
+    | EofCh ^^^ EOF
+    | '\'' ~> failure("unclosed string literal")
+    | '"' ~> failure("unclosed string literal")
+    | delim
+    | failure("illegal character")
+    )
+
+  override def identChar = letter | elem('_')
+
+  override def whitespace: Parser[Any] =
+    ( whitespaceChar
+    | '/' ~ '*' ~ comment
+    | '/' ~ '/' ~ chrExcept(EofCh, '\n').*
+    | '#' ~ chrExcept(EofCh, '\n').*
+    | '-' ~ '-' ~ chrExcept(EofCh, '\n').*
+    | '/' ~ '*' ~ failure("unclosed comment")
+    ).*
+}
+
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 3d4296f9d7068..0445f3aa07912 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -17,84 +17,168 @@
 
 package org.apache.spark.sql.catalyst
 
-import java.sql.{Date, Timestamp}
+import java.sql.Timestamp
 
-import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute, AttributeReference}
+import org.apache.spark.util.Utils
+import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute, AttributeReference, Row}
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 /**
- * Provides experimental support for generating catalyst schemas for scala objects.
+ * A default version of ScalaReflection that uses the runtime universe.
  */
-object ScalaReflection {
-  import scala.reflect.runtime.universe._
+object ScalaReflection extends ScalaReflection {
+  val universe: scala.reflect.runtime.universe.type = scala.reflect.runtime.universe
+}
+
+/**
+ * Support for generating catalyst schemas for scala objects.
+ */
+trait ScalaReflection {
+  /** The universe we work in (runtime or macro) */
+  val universe: scala.reflect.api.Universe
+
+  import universe._
+
+  // The Predef.Map is scala.collection.immutable.Map.
+  // Since the map values can be mutable, we explicitly import scala.collection.Map at here.
+  import scala.collection.Map
 
   case class Schema(dataType: DataType, nullable: Boolean)
 
-  /** Converts Scala objects to catalyst rows / types */
-  def convertToCatalyst(a: Any): Any = a match {
-    case o: Option[_] => o.map(convertToCatalyst).orNull
-    case s: Seq[_] => s.map(convertToCatalyst)
-    case m: Map[_, _] => m.map { case (k, v) => convertToCatalyst(k) -> convertToCatalyst(v) }
-    case p: Product => new GenericRow(p.productIterator.map(convertToCatalyst).toArray)
-    case other => other
+  /**
+   * Converts Scala objects to catalyst rows / types.
+   * Note: This is always called after schemaFor has been called.
+   *       This ordering is important for UDT registration.
+   */
+  def convertToCatalyst(a: Any, dataType: DataType): Any = (a, dataType) match {
+    // Check UDT first since UDTs can override other types
+    case (obj, udt: UserDefinedType[_]) => udt.serialize(obj)
+    case (o: Option[_], _) => o.map(convertToCatalyst(_, dataType)).orNull
+    case (s: Seq[_], arrayType: ArrayType) => s.map(convertToCatalyst(_, arrayType.elementType))
+    case (s: Array[_], arrayType: ArrayType) => if (arrayType.elementType.isPrimitive) {
+      s.toSeq
+    } else {
+      s.toSeq.map(convertToCatalyst(_, arrayType.elementType))
+    }
+    case (m: Map[_, _], mapType: MapType) => m.map { case (k, v) =>
+      convertToCatalyst(k, mapType.keyType) -> convertToCatalyst(v, mapType.valueType)
+    }
+    case (p: Product, structType: StructType) =>
+      new GenericRow(
+        p.productIterator.toSeq.zip(structType.fields).map { case (elem, field) =>
+          convertToCatalyst(elem, field.dataType)
+        }.toArray)
+    case (d: BigDecimal, _) => Decimal(d)
+    case (d: java.math.BigDecimal, _) => Decimal(d)
+    case (d: java.sql.Date, _) => DateUtils.fromJavaDate(d)
+    case (other, _) => other
+  }
+
+  /** Converts Catalyst types used internally in rows to standard Scala types */
+  def convertToScala(a: Any, dataType: DataType): Any = (a, dataType) match {
+    // Check UDT first since UDTs can override other types
+    case (d, udt: UserDefinedType[_]) => udt.deserialize(d)
+    case (s: Seq[_], arrayType: ArrayType) => s.map(convertToScala(_, arrayType.elementType))
+    case (m: Map[_, _], mapType: MapType) => m.map { case (k, v) =>
+      convertToScala(k, mapType.keyType) -> convertToScala(v, mapType.valueType)
+    }
+    case (r: Row, s: StructType) => convertRowToScala(r, s)
+    case (d: Decimal, _: DecimalType) => d.toJavaBigDecimal
+    case (i: Int, DateType) => DateUtils.toJavaDate(i)
+    case (other, _) => other
+  }
+
+  def convertRowToScala(r: Row, schema: StructType): Row = {
+    // TODO: This is very slow!!!
+    new GenericRow(
+      r.toSeq.zip(schema.fields.map(_.dataType))
+        .map(r_dt => convertToScala(r_dt._1, r_dt._2)).toArray)
   }
 
   /** Returns a Sequence of attributes for the given case class type. */
   def attributesFor[T: TypeTag]: Seq[Attribute] = schemaFor[T] match {
     case Schema(s: StructType, _) =>
-      s.fields.map(f => AttributeReference(f.name, f.dataType, f.nullable)())
+      s.toAttributes
   }
 
   /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
   def schemaFor[T: TypeTag]: Schema = schemaFor(typeOf[T])
 
   /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
-  def schemaFor(tpe: `Type`): Schema = tpe match {
-    case t if t <:< typeOf[Option[_]] =>
-      val TypeRef(_, _, Seq(optType)) = t
-      Schema(schemaFor(optType).dataType, nullable = true)
-    case t if t <:< typeOf[Product] =>
-      val formalTypeArgs = t.typeSymbol.asClass.typeParams
-      val TypeRef(_, _, actualTypeArgs) = t
-      val params = t.member(nme.CONSTRUCTOR).asMethod.paramss
-      Schema(StructType(
-        params.head.map { p =>
-          val Schema(dataType, nullable) =
-            schemaFor(p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs))
-          StructField(p.name.toString, dataType, nullable)
-        }), nullable = true)
-    // Need to decide if we actually need a special type here.
-    case t if t <:< typeOf[Array[Byte]] => Schema(BinaryType, nullable = true)
-    case t if t <:< typeOf[Array[_]] =>
-      sys.error(s"Only Array[Byte] supported now, use Seq instead of $t")
-    case t if t <:< typeOf[Seq[_]] =>
-      val TypeRef(_, _, Seq(elementType)) = t
-      val Schema(dataType, nullable) = schemaFor(elementType)
-      Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
-    case t if t <:< typeOf[Map[_,_]] =>
-      val TypeRef(_, _, Seq(keyType, valueType)) = t
-      val Schema(valueDataType, valueNullable) = schemaFor(valueType)
-      Schema(MapType(schemaFor(keyType).dataType,
-        valueDataType, valueContainsNull = valueNullable), nullable = true)
-    case t if t <:< typeOf[String] => Schema(StringType, nullable = true)
-    case t if t <:< typeOf[Timestamp] => Schema(TimestampType, nullable = true)
-    case t if t <:< typeOf[Date] => Schema(DateType, nullable = true)
-    case t if t <:< typeOf[BigDecimal] => Schema(DecimalType, nullable = true)
-    case t if t <:< typeOf[java.lang.Integer] => Schema(IntegerType, nullable = true)
-    case t if t <:< typeOf[java.lang.Long] => Schema(LongType, nullable = true)
-    case t if t <:< typeOf[java.lang.Double] => Schema(DoubleType, nullable = true)
-    case t if t <:< typeOf[java.lang.Float] => Schema(FloatType, nullable = true)
-    case t if t <:< typeOf[java.lang.Short] => Schema(ShortType, nullable = true)
-    case t if t <:< typeOf[java.lang.Byte] => Schema(ByteType, nullable = true)
-    case t if t <:< typeOf[java.lang.Boolean] => Schema(BooleanType, nullable = true)
-    case t if t <:< definitions.IntTpe => Schema(IntegerType, nullable = false)
-    case t if t <:< definitions.LongTpe => Schema(LongType, nullable = false)
-    case t if t <:< definitions.DoubleTpe => Schema(DoubleType, nullable = false)
-    case t if t <:< definitions.FloatTpe => Schema(FloatType, nullable = false)
-    case t if t <:< definitions.ShortTpe => Schema(ShortType, nullable = false)
-    case t if t <:< definitions.ByteTpe => Schema(ByteType, nullable = false)
-    case t if t <:< definitions.BooleanTpe => Schema(BooleanType, nullable = false)
+  def schemaFor(tpe: `Type`): Schema = {
+    val className: String = tpe.erasure.typeSymbol.asClass.fullName
+    tpe match {
+      case t if Utils.classIsLoadable(className) &&
+        Utils.classForName(className).isAnnotationPresent(classOf[SQLUserDefinedType]) =>
+        // Note: We check for classIsLoadable above since Utils.classForName uses Java reflection,
+        //       whereas className is from Scala reflection.  This can make it hard to find classes
+        //       in some cases, such as when a class is enclosed in an object (in which case
+        //       Java appends a '$' to the object name but Scala does not).
+        val udt = Utils.classForName(className)
+          .getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance()
+        Schema(udt, nullable = true)
+      case t if t <:< typeOf[Option[_]] =>
+        val TypeRef(_, _, Seq(optType)) = t
+        Schema(schemaFor(optType).dataType, nullable = true)
+      case t if t <:< typeOf[Product] =>
+        val formalTypeArgs = t.typeSymbol.asClass.typeParams
+        val TypeRef(_, _, actualTypeArgs) = t
+        val constructorSymbol = t.member(nme.CONSTRUCTOR)
+        val params = if (constructorSymbol.isMethod) {
+          constructorSymbol.asMethod.paramss
+        } else {
+          // Find the primary constructor, and use its parameter ordering.
+          val primaryConstructorSymbol: Option[Symbol] = constructorSymbol.asTerm.alternatives.find(
+            s => s.isMethod && s.asMethod.isPrimaryConstructor)
+          if (primaryConstructorSymbol.isEmpty) {
+            sys.error("Internal SQL error: Product object did not have a primary constructor.")
+          } else {
+            primaryConstructorSymbol.get.asMethod.paramss
+          }
+        }
+        Schema(StructType(
+          params.head.map { p =>
+            val Schema(dataType, nullable) =
+              schemaFor(p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs))
+            StructField(p.name.toString, dataType, nullable)
+          }), nullable = true)
+      // Need to decide if we actually need a special type here.
+      case t if t <:< typeOf[Array[Byte]] => Schema(BinaryType, nullable = true)
+      case t if t <:< typeOf[Array[_]] =>
+        val TypeRef(_, _, Seq(elementType)) = t
+        val Schema(dataType, nullable) = schemaFor(elementType)
+        Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
+      case t if t <:< typeOf[Seq[_]] =>
+        val TypeRef(_, _, Seq(elementType)) = t
+        val Schema(dataType, nullable) = schemaFor(elementType)
+        Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
+      case t if t <:< typeOf[Map[_, _]] =>
+        val TypeRef(_, _, Seq(keyType, valueType)) = t
+        val Schema(valueDataType, valueNullable) = schemaFor(valueType)
+        Schema(MapType(schemaFor(keyType).dataType,
+          valueDataType, valueContainsNull = valueNullable), nullable = true)
+      case t if t <:< typeOf[String] => Schema(StringType, nullable = true)
+      case t if t <:< typeOf[Timestamp] => Schema(TimestampType, nullable = true)
+      case t if t <:< typeOf[java.sql.Date] => Schema(DateType, nullable = true)
+      case t if t <:< typeOf[BigDecimal] => Schema(DecimalType.Unlimited, nullable = true)
+      case t if t <:< typeOf[java.math.BigDecimal] => Schema(DecimalType.Unlimited, nullable = true)
+      case t if t <:< typeOf[Decimal] => Schema(DecimalType.Unlimited, nullable = true)
+      case t if t <:< typeOf[java.lang.Integer] => Schema(IntegerType, nullable = true)
+      case t if t <:< typeOf[java.lang.Long] => Schema(LongType, nullable = true)
+      case t if t <:< typeOf[java.lang.Double] => Schema(DoubleType, nullable = true)
+      case t if t <:< typeOf[java.lang.Float] => Schema(FloatType, nullable = true)
+      case t if t <:< typeOf[java.lang.Short] => Schema(ShortType, nullable = true)
+      case t if t <:< typeOf[java.lang.Byte] => Schema(ByteType, nullable = true)
+      case t if t <:< typeOf[java.lang.Boolean] => Schema(BooleanType, nullable = true)
+      case t if t <:< definitions.IntTpe => Schema(IntegerType, nullable = false)
+      case t if t <:< definitions.LongTpe => Schema(LongType, nullable = false)
+      case t if t <:< definitions.DoubleTpe => Schema(DoubleType, nullable = false)
+      case t if t <:< definitions.FloatTpe => Schema(FloatType, nullable = false)
+      case t if t <:< definitions.ShortTpe => Schema(ShortType, nullable = false)
+      case t if t <:< definitions.ByteTpe => Schema(ByteType, nullable = false)
+      case t if t <:< definitions.BooleanTpe => Schema(BooleanType, nullable = false)
+    }
   }
 
   def typeOfObject: PartialFunction[Any, DataType] = {
@@ -108,7 +192,9 @@ object ScalaReflection {
     case obj: LongType.JvmType => LongType
     case obj: FloatType.JvmType => FloatType
     case obj: DoubleType.JvmType => DoubleType
-    case obj: DecimalType.JvmType => DecimalType
+    case obj: java.sql.Date => DateType
+    case obj: java.math.BigDecimal => DecimalType.Unlimited
+    case obj: Decimal => DecimalType.Unlimited
     case obj: TimestampType.JvmType => TimestampType
     case null => NullType
     // For other cases, there is no obvious mapping from the type of the given object to a
@@ -125,7 +211,7 @@ object ScalaReflection {
      */
     def asRelation: LocalRelation = {
       val output = attributesFor[A]
-      LocalRelation(output, data)
+      LocalRelation.fromProduct(output, data)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala
deleted file mode 100644
index 04467342e6ab5..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst
-
-import scala.language.implicitConversions
-import scala.util.parsing.combinator.lexical.StdLexical
-import scala.util.parsing.combinator.syntactical.StandardTokenParsers
-import scala.util.parsing.combinator.{PackratParsers, RegexParsers}
-import scala.util.parsing.input.CharArrayReader.EofCh
-
-import org.apache.spark.sql.catalyst.plans.logical._
-
-private[sql] abstract class AbstractSparkSQLParser
-  extends StandardTokenParsers with PackratParsers {
-
-  def apply(input: String): LogicalPlan = phrase(start)(new lexical.Scanner(input)) match {
-    case Success(plan, _) => plan
-    case failureOrError => sys.error(failureOrError.toString)
-  }
-
-  protected case class Keyword(str: String)
-
-  protected def start: Parser[LogicalPlan]
-
-  // Returns the whole input string
-  protected lazy val wholeInput: Parser[String] = new Parser[String] {
-    def apply(in: Input): ParseResult[String] =
-      Success(in.source.toString, in.drop(in.source.length()))
-  }
-
-  // Returns the rest of the input string that are not parsed yet
-  protected lazy val restInput: Parser[String] = new Parser[String] {
-    def apply(in: Input): ParseResult[String] =
-      Success(
-        in.source.subSequence(in.offset, in.source.length()).toString,
-        in.drop(in.source.length()))
-  }
-}
-
-class SqlLexical(val keywords: Seq[String]) extends StdLexical {
-  case class FloatLit(chars: String) extends Token {
-    override def toString = chars
-  }
-
-  reserved ++= keywords.flatMap(w => allCaseVersions(w))
-
-  delimiters += (
-    "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
-    ",", ";", "%", "{", "}", ":", "[", "]", "."
-  )
-
-  override lazy val token: Parser[Token] =
-    ( identChar ~ (identChar | digit).* ^^
-      { case first ~ rest => processIdent((first :: rest).mkString) }
-    | rep1(digit) ~ ('.' ~> digit.*).? ^^ {
-        case i ~ None    => NumericLit(i.mkString)
-        case i ~ Some(d) => FloatLit(i.mkString + "." + d.mkString)
-      }
-    | '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^
-      { case chars => StringLit(chars mkString "") }
-    | '"' ~> chrExcept('"', '\n', EofCh).* <~ '"' ^^
-      { case chars => StringLit(chars mkString "") }
-    | EofCh ^^^ EOF
-    | '\'' ~> failure("unclosed string literal")
-    | '"' ~> failure("unclosed string literal")
-    | delim
-    | failure("illegal character")
-    )
-
-  override def identChar = letter | elem('_')
-
-  override def whitespace: Parser[Any] =
-    ( whitespaceChar
-    | '/' ~ '*' ~ comment
-    | '/' ~ '/' ~ chrExcept(EofCh, '\n').*
-    | '#' ~ chrExcept(EofCh, '\n').*
-    | '-' ~ '-' ~ chrExcept(EofCh, '\n').*
-    | '/' ~ '*' ~ failure("unclosed comment")
-    ).*
-
-  /** Generate all variations of upper and lower case of a given string */
-  def allCaseVersions(s: String, prefix: String = ""): Stream[String] = {
-    if (s == "") {
-      Stream(prefix)
-    } else {
-      allCaseVersions(s.tail, prefix + s.head.toLower) ++
-        allCaseVersions(s.tail, prefix + s.head.toUpper)
-    }
-  }
-}
-
-/**
- * The top level Spark SQL parser. This parser recognizes syntaxes that are available for all SQL
- * dialects supported by Spark SQL, and delegates all the other syntaxes to the `fallback` parser.
- *
- * @param fallback A function that parses an input string to a logical plan
- */
-private[sql] class SparkSQLParser(fallback: String => LogicalPlan) extends AbstractSparkSQLParser {
-
-  // A parser for the key-value part of the "SET [key = [value ]]" syntax
-  private object SetCommandParser extends RegexParsers {
-    private val key: Parser[String] = "(?m)[^=]+".r
-
-    private val value: Parser[String] = "(?m).*$".r
-
-    private val pair: Parser[LogicalPlan] =
-      (key ~ ("=".r ~> value).?).? ^^ {
-        case None => SetCommand(None)
-        case Some(k ~ v) => SetCommand(Some(k.trim -> v.map(_.trim)))
-      }
-
-    def apply(input: String): LogicalPlan = parseAll(pair, input) match {
-      case Success(plan, _) => plan
-      case x => sys.error(x.toString)
-    }
-  }
-
-  protected val AS      = Keyword("AS")
-  protected val CACHE   = Keyword("CACHE")
-  protected val LAZY    = Keyword("LAZY")
-  protected val SET     = Keyword("SET")
-  protected val TABLE   = Keyword("TABLE")
-  protected val SOURCE  = Keyword("SOURCE")
-  protected val UNCACHE = Keyword("UNCACHE")
-
-  protected implicit def asParser(k: Keyword): Parser[String] =
-    lexical.allCaseVersions(k.str).map(x => x : Parser[String]).reduce(_ | _)
-
-  private val reservedWords: Seq[String] =
-    this
-      .getClass
-      .getMethods
-      .filter(_.getReturnType == classOf[Keyword])
-      .map(_.invoke(this).asInstanceOf[Keyword].str)
-
-  override val lexical = new SqlLexical(reservedWords)
-
-  override protected lazy val start: Parser[LogicalPlan] =
-    cache | uncache | set | shell | source | others
-
-  private lazy val cache: Parser[LogicalPlan] =
-    CACHE ~> LAZY.? ~ (TABLE ~> ident) ~ (AS ~> restInput).? ^^ {
-      case isLazy ~ tableName ~ plan =>
-        CacheTableCommand(tableName, plan.map(fallback), isLazy.isDefined)
-    }
-
-  private lazy val uncache: Parser[LogicalPlan] =
-    UNCACHE ~ TABLE ~> ident ^^ {
-      case tableName => UncacheTableCommand(tableName)
-    }
-
-  private lazy val set: Parser[LogicalPlan] =
-    SET ~> restInput ^^ {
-      case input => SetCommandParser(input)
-    }
-
-  private lazy val shell: Parser[LogicalPlan] =
-    "!" ~> restInput ^^ {
-      case input => ShellCommand(input.trim)
-    }
-
-  private lazy val source: Parser[LogicalPlan] =
-    SOURCE ~> restInput ^^ {
-      case input => SourceCommand(input.trim)
-    }
-
-  private lazy val others: Parser[LogicalPlan] =
-    wholeInput ^^ {
-      case input => fallback(input)
-    }
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index a277684f6327c..a9bd079c7049d 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 /**
  * A very simple SQL parser.  Based loosely on:
@@ -36,9 +36,18 @@ import org.apache.spark.sql.catalyst.types._
  * for a SQL like language should checkout the HiveQL support in the sql/hive sub-project.
  */
 class SqlParser extends AbstractSparkSQLParser {
-  protected implicit def asParser(k: Keyword): Parser[String] =
-    lexical.allCaseVersions(k.str).map(x => x : Parser[String]).reduce(_ | _)
 
+  def parseExpression(input: String): Expression = {
+    // Initialize the Keywords.
+    lexical.initialize(reservedWords)
+    phrase(expression)(new lexical.Scanner(input)) match {
+      case Success(plan, _) => plan
+      case failureOrError => sys.error(failureOrError.toString)
+    }
+  }
+
+  // Keyword is a convention with AbstractSparkSQLParser, which will scan all of the `Keyword`
+  // properties via reflection the class in runtime for constructing the SqlLexical object
   protected val ABS = Keyword("ABS")
   protected val ALL = Keyword("ALL")
   protected val AND = Keyword("AND")
@@ -51,9 +60,13 @@ class SqlParser extends AbstractSparkSQLParser {
   protected val CACHE = Keyword("CACHE")
   protected val CASE = Keyword("CASE")
   protected val CAST = Keyword("CAST")
+  protected val COALESCE = Keyword("COALESCE")
   protected val COUNT = Keyword("COUNT")
+  protected val DATE = Keyword("DATE")
+  protected val DECIMAL = Keyword("DECIMAL")
   protected val DESC = Keyword("DESC")
   protected val DISTINCT = Keyword("DISTINCT")
+  protected val DOUBLE = Keyword("DOUBLE")
   protected val ELSE = Keyword("ELSE")
   protected val END = Keyword("END")
   protected val EXCEPT = Keyword("EXCEPT")
@@ -83,6 +96,7 @@ class SqlParser extends AbstractSparkSQLParser {
   protected val ON = Keyword("ON")
   protected val OR = Keyword("OR")
   protected val ORDER = Keyword("ORDER")
+  protected val SORT = Keyword("SORT")
   protected val OUTER = Keyword("OUTER")
   protected val OVERWRITE = Keyword("OVERWRITE")
   protected val REGEXP = Keyword("REGEXP")
@@ -104,16 +118,6 @@ class SqlParser extends AbstractSparkSQLParser {
   protected val WHEN = Keyword("WHEN")
   protected val WHERE = Keyword("WHERE")
 
-  // Use reflection to find the reserved words defined in this class.
-  protected val reservedWords =
-    this
-      .getClass
-      .getMethods
-      .filter(_.getReturnType == classOf[Keyword])
-      .map(_.invoke(this).asInstanceOf[Keyword].str)
-
-  override val lexical = new SqlLexical(reservedWords)
-
   protected def assignAliases(exprs: Seq[Expression]): Seq[NamedExpression] = {
     exprs.zipWithIndex.map {
       case (ne: NamedExpression, _) => ne
@@ -122,7 +126,7 @@ class SqlParser extends AbstractSparkSQLParser {
   }
 
   protected lazy val start: Parser[LogicalPlan] =
-    ( select *
+    ( (select | ("(" ~> select <~ ")")) *
       ( UNION ~ ALL        ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Union(q1, q2) }
       | INTERSECT          ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Intersect(q1, q2) }
       | EXCEPT             ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Except(q1, q2)}
@@ -138,24 +142,24 @@ class SqlParser extends AbstractSparkSQLParser {
       (WHERE  ~> expression).? ~
       (GROUP  ~  BY ~> rep1sep(expression, ",")).? ~
       (HAVING ~> expression).? ~
-      (ORDER  ~  BY ~> ordering).? ~
+      sortType.? ~
       (LIMIT  ~> expression).? ^^ {
         case d ~ p ~ r ~ f ~ g ~ h ~ o ~ l  =>
           val base = r.getOrElse(NoRelation)
-          val withFilter = f.map(f => Filter(f, base)).getOrElse(base)
+          val withFilter = f.map(Filter(_, base)).getOrElse(base)
           val withProjection = g
             .map(Aggregate(_, assignAliases(p), withFilter))
             .getOrElse(Project(assignAliases(p), withFilter))
           val withDistinct = d.map(_ => Distinct(withProjection)).getOrElse(withProjection)
           val withHaving = h.map(Filter(_, withDistinct)).getOrElse(withDistinct)
-          val withOrder = o.map(Sort(_, withHaving)).getOrElse(withHaving)
+          val withOrder = o.map(_(withHaving)).getOrElse(withHaving)
           val withLimit = l.map(Limit(_, withOrder)).getOrElse(withOrder)
           withLimit
       }
 
   protected lazy val insert: Parser[LogicalPlan] =
-    INSERT ~> OVERWRITE.? ~ (INTO ~> relation) ~ select ^^ {
-      case o ~ r ~ s => InsertIntoTable(r, Map.empty[String, Option[String]], s, o.isDefined)
+    INSERT ~> (OVERWRITE ^^^ true | INTO ^^^ false) ~ (TABLE ~> relation) ~ select ^^ {
+      case o ~ r ~ s => InsertIntoTable(r, Map.empty[String, Option[String]], s, o)
     }
 
   protected lazy val projection: Parser[Expression] =
@@ -166,7 +170,8 @@ class SqlParser extends AbstractSparkSQLParser {
   // Based very loosely on the MySQL Grammar.
   // http://dev.mysql.com/doc/refman/5.0/en/join.html
   protected lazy val relations: Parser[LogicalPlan] =
-    ( relation ~ ("," ~> relation) ^^ { case r1 ~ r2 => Join(r1, r2, Inner, None) }
+    ( relation ~ rep1("," ~> relation) ^^ {
+        case r1 ~ joins => joins.foldLeft(r1) { case(lhs, r) => Join(lhs, r, Inner, None) } }
     | relation
     )
 
@@ -174,10 +179,10 @@ class SqlParser extends AbstractSparkSQLParser {
     joinedRelation | relationFactor
 
   protected lazy val relationFactor: Parser[LogicalPlan] =
-    ( ident ~ (opt(AS) ~> opt(ident)) ^^ {
-        case tableName ~ alias => UnresolvedRelation(None, tableName, alias)
+    ( rep1sep(ident, ".") ~ (opt(AS) ~> opt(ident)) ^^ {
+        case tableIdent ~ alias => UnresolvedRelation(tableIdent, alias)
       }
-    | ("(" ~> start <~ ")") ~ (AS.? ~> ident) ^^ { case s ~ a => Subquery(a, s) }
+      | ("(" ~> start <~ ")") ~ (AS.? ~> ident) ^^ { case s ~ a => Subquery(a, s) }
     )
 
   protected lazy val joinedRelation: Parser[LogicalPlan] =
@@ -199,16 +204,17 @@ class SqlParser extends AbstractSparkSQLParser {
     | FULL  ~ OUTER.? ^^^ FullOuter
     )
 
+  protected lazy val sortType: Parser[LogicalPlan => LogicalPlan] =
+    ( ORDER ~ BY  ~> ordering ^^ { case o => l: LogicalPlan => Sort(o, true, l) }
+    | SORT ~ BY  ~> ordering ^^ { case o => l: LogicalPlan => Sort(o, false, l) }
+    )
+
   protected lazy val ordering: Parser[Seq[SortOrder]] =
-    ( rep1sep(singleOrder, ",")
-    | rep1sep(expression, ",") ~ direction.? ^^ {
-        case exps ~ d => exps.map(SortOrder(_, d.getOrElse(Ascending)))
+    ( rep1sep(expression ~ direction.? , ",") ^^ {
+        case exps  => exps.map(pair => SortOrder(pair._1, pair._2.getOrElse(Ascending)))
       }
     )
 
-  protected lazy val singleOrder: Parser[SortOrder] =
-    expression ~ direction ^^ { case e ~ o => SortOrder(e, o) }
-
   protected lazy val direction: Parser[SortDirection] =
     ( ASC  ^^^ Ascending
     | DESC ^^^ Descending
@@ -231,12 +237,16 @@ class SqlParser extends AbstractSparkSQLParser {
     | termExpression ~ (">=" ~> termExpression) ^^ { case e1 ~ e2 => GreaterThanOrEqual(e1, e2) }
     | termExpression ~ ("!=" ~> termExpression) ^^ { case e1 ~ e2 => Not(EqualTo(e1, e2)) }
     | termExpression ~ ("<>" ~> termExpression) ^^ { case e1 ~ e2 => Not(EqualTo(e1, e2)) }
-    | termExpression ~ (BETWEEN ~> termExpression) ~ (AND ~> termExpression) ^^ {
-        case e ~ el ~ eu => And(GreaterThanOrEqual(e, el), LessThanOrEqual(e, eu))
+    | termExpression ~ ("<=>" ~> termExpression) ^^ { case e1 ~ e2 => EqualNullSafe(e1, e2) }
+    | termExpression ~ NOT.? ~ (BETWEEN ~> termExpression) ~ (AND ~> termExpression) ^^ {
+        case e ~ not ~ el ~ eu =>
+          val betweenExpr: Expression = And(GreaterThanOrEqual(e, el), LessThanOrEqual(e, eu))
+          not.fold(betweenExpr)(f=> Not(betweenExpr))
       }
     | termExpression ~ (RLIKE  ~> termExpression) ^^ { case e1 ~ e2 => RLike(e1, e2) }
     | termExpression ~ (REGEXP ~> termExpression) ^^ { case e1 ~ e2 => RLike(e1, e2) }
     | termExpression ~ (LIKE   ~> termExpression) ^^ { case e1 ~ e2 => Like(e1, e2) }
+    | termExpression ~ (NOT ~ LIKE ~> termExpression) ^^ { case e1 ~ e2 => Not(Like(e1, e2)) }
     | termExpression ~ (IN ~ "(" ~> rep1sep(termExpression, ",")) <~ ")" ^^ {
         case e1 ~ e2 => In(e1, e2)
       }
@@ -260,6 +270,9 @@ class SqlParser extends AbstractSparkSQLParser {
       ( "*" ^^^ { (e1: Expression, e2: Expression) => Multiply(e1, e2) }
       | "/" ^^^ { (e1: Expression, e2: Expression) => Divide(e1, e2) }
       | "%" ^^^ { (e1: Expression, e2: Expression) => Remainder(e1, e2) }
+      | "&" ^^^ { (e1: Expression, e2: Expression) => BitwiseAnd(e1, e2) }
+      | "|" ^^^ { (e1: Expression, e2: Expression) => BitwiseOr(e1, e2) }
+      | "^" ^^^ { (e1: Expression, e2: Expression) => BitwiseXor(e1, e2) }
       )
 
   protected lazy val function: Parser[Expression] =
@@ -267,7 +280,8 @@ class SqlParser extends AbstractSparkSQLParser {
     | SUM   ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => SumDistinct(exp) }
     | COUNT ~  "(" ~> "*"                    <~ ")" ^^ { case _ => Count(Literal(1)) }
     | COUNT ~  "(" ~> expression             <~ ")" ^^ { case exp => Count(exp) }
-    | COUNT ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => CountDistinct(exp :: Nil) }
+    | COUNT ~> "(" ~> DISTINCT ~> repsep(expression, ",") <~ ")" ^^
+      { case exps => CountDistinct(exps) }
     | APPROXIMATE ~ COUNT ~ "(" ~ DISTINCT ~> expression <~ ")" ^^
       { case exp => ApproxCountDistinct(exp) }
     | APPROXIMATE ~> "(" ~> floatLit ~ ")" ~ COUNT ~ "(" ~ DISTINCT ~ expression <~ ")" ^^
@@ -293,6 +307,7 @@ class SqlParser extends AbstractSparkSQLParser {
       { case s ~ p => Substring(s, p, Literal(Integer.MAX_VALUE)) }
     | (SUBSTR | SUBSTRING) ~ "(" ~> expression ~ ("," ~> expression) ~ ("," ~> expression) <~ ")" ^^
       { case s ~ p ~ l => Substring(s, p, l) }
+    | COALESCE ~ "(" ~> repsep(expression, ",") <~ ")" ^^ { case exprs => Coalesce(exprs) }
     | SQRT  ~ "(" ~> expression <~ ")" ^^ { case exp => Sqrt(exp) }
     | ABS   ~ "(" ~> expression <~ ")" ^^ { case exp => Abs(exp) }
     | ident ~ ("(" ~> repsep(expression, ",")) <~ ")" ^^
@@ -303,33 +318,69 @@ class SqlParser extends AbstractSparkSQLParser {
     CAST ~ "(" ~> expression ~ (AS ~> dataType) <~ ")" ^^ { case exp ~ t => Cast(exp, t) }
 
   protected lazy val literal: Parser[Literal] =
-    ( numericLit ^^ {
-        case i if i.toLong > Int.MaxValue => Literal(i.toLong)
-        case i => Literal(i.toInt)
-      }
-    | NULL ^^^ Literal(null, NullType)
-    | floatLit ^^ {case f => Literal(f.toDouble) }
+    ( numericLiteral
+    | booleanLiteral
     | stringLit ^^ {case s => Literal(s, StringType) }
+    | NULL ^^^ Literal(null, NullType)
+    )
+
+  protected lazy val booleanLiteral: Parser[Literal] =
+    ( TRUE ^^^ Literal(true, BooleanType)
+    | FALSE ^^^ Literal(false, BooleanType)
+    )
+
+  protected lazy val numericLiteral: Parser[Literal] =
+    signedNumericLiteral | unsignedNumericLiteral
+
+  protected lazy val sign: Parser[String] =
+    "+" | "-"
+
+  protected lazy val signedNumericLiteral: Parser[Literal] =
+    ( sign ~ numericLit  ^^ { case s ~ l => Literal(toNarrowestIntegerType(s + l)) }
+    | sign ~ floatLit ^^ { case s ~ f => Literal((s + f).toDouble) }
+    )
+
+  protected lazy val unsignedNumericLiteral: Parser[Literal] =
+    ( numericLit ^^ { n => Literal(toNarrowestIntegerType(n)) }
+    | floatLit ^^ { f => Literal(f.toDouble) }
     )
 
+  private def toNarrowestIntegerType(value: String): Any = {
+    val bigIntValue = BigDecimal(value)
+
+    bigIntValue match {
+      case v if bigIntValue.isValidInt => v.toIntExact
+      case v if bigIntValue.isValidLong => v.toLongExact
+      case v => v.underlying()
+    }
+  }
+
   protected lazy val floatLit: Parser[String] =
-    elem("decimal", _.isInstanceOf[lexical.FloatLit]) ^^ (_.chars)
+    ( "." ~> unsignedNumericLiteral ^^ { u => "0." + u }
+    | elem("decimal", _.isInstanceOf[lexical.FloatLit]) ^^ (_.chars)
+    )
+
+  protected lazy val baseExpression: Parser[Expression] =
+    ( "*" ^^^ UnresolvedStar(None)
+    | primary
+    )
+
+  protected lazy val signedPrimary: Parser[Expression] =
+    sign ~ primary ^^ { case s ~ e => if (s == "-") UnaryMinus(e) else e}
 
-  protected lazy val baseExpression: PackratParser[Expression] =
-    ( expression ~ ("[" ~> expression <~ "]") ^^
+  protected lazy val primary: PackratParser[Expression] =
+    ( literal
+    | expression ~ ("[" ~> expression <~ "]") ^^
       { case base ~ ordinal => GetItem(base, ordinal) }
     | (expression <~ ".") ~ ident ^^
       { case base ~ fieldName => GetField(base, fieldName) }
-    | TRUE  ^^^ Literal(true, BooleanType)
-    | FALSE ^^^ Literal(false, BooleanType)
     | cast
     | "(" ~> expression <~ ")"
     | function
-    | "-" ~> literal ^^ UnaryMinus
     | dotExpressionHeader
     | ident ^^ UnresolvedAttribute
-    | "*" ^^^ Star(None)
-    | literal
+    | signedPrimary
+    | "~" ~> expression ^^ BitwiseNot
     )
 
   protected lazy val dotExpressionHeader: Parser[Expression] =
@@ -338,5 +389,16 @@ class SqlParser extends AbstractSparkSQLParser {
     }
 
   protected lazy val dataType: Parser[DataType] =
-    STRING ^^^ StringType | TIMESTAMP ^^^ TimestampType
+    ( STRING ^^^ StringType
+    | TIMESTAMP ^^^ TimestampType
+    | DOUBLE ^^^ DoubleType
+    | fixedDecimalType
+    | DECIMAL ^^^ DecimalType.Unlimited
+    | DATE ^^^ DateType
+    )
+
+  protected lazy val fixedDecimalType: Parser[DataType] =
+    (DECIMAL ~ "(" ~> numericLit) ~ ("," ~> numericLit <~ ")") ^^ {
+      case precision ~ scale => DecimalType(precision.toInt, scale.toInt)
+    }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index a448c794213ae..ae1aee02c64a5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -17,10 +17,13 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import org.apache.spark.util.collection.OpenHashSet
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.IntegerType
 
 /**
  * A trivial [[Analyzer]] with an [[EmptyCatalog]] and [[EmptyFunctionRegistry]]. Used for testing
@@ -34,13 +37,15 @@ object SimpleAnalyzer extends Analyzer(EmptyCatalog, EmptyFunctionRegistry, true
  * [[UnresolvedRelation]]s into fully typed objects using information in a schema [[Catalog]] and
  * a [[FunctionRegistry]].
  */
-class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Boolean)
+class Analyzer(catalog: Catalog,
+               registry: FunctionRegistry,
+               caseSensitive: Boolean,
+               maxIterations: Int = 100)
   extends RuleExecutor[LogicalPlan] with HiveTypeCoercion {
 
   val resolver = if (caseSensitive) caseSensitiveResolution else caseInsensitiveResolution
 
-  // TODO: pass this in as a parameter.
-  val fixedPoint = FixedPoint(100)
+  val fixedPoint = FixedPoint(maxIterations)
 
   /**
    * Override to provide additional rules for the "Resolution" batch.
@@ -53,19 +58,20 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
     Batch("Resolution", fixedPoint,
       ResolveReferences ::
       ResolveRelations ::
+      ResolveGroupingAnalytics ::
       ResolveSortReferences ::
       NewRelationInstances ::
       ImplicitGenerate ::
-      StarExpansion ::
       ResolveFunctions ::
       GlobalAggregates ::
       UnresolvedHavingClauseAttributes ::
-      TrimAliases ::
+      TrimGroupingAliases ::
       typeCoercionRules ++
       extendedRules : _*),
     Batch("Check Analysis", Once,
-      CheckResolution,
-      CheckAggregation),
+      CheckResolution ::
+      CheckAggregation ::
+      Nil: _*),
     Batch("AnalysisOperators", fixedPoint,
       EliminateAnalysisOperators)
   )
@@ -93,17 +99,97 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
   /**
    * Removes no-op Alias expressions from the plan.
    */
-  object TrimAliases extends Rule[LogicalPlan] {
+  object TrimGroupingAliases extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan transform {
       case Aggregate(groups, aggs, child) =>
+        Aggregate(groups.map(_.transform { case Alias(c, _) => c }), aggs, child)
+    }
+  }
+
+  object ResolveGroupingAnalytics extends Rule[LogicalPlan] {
+    /**
+     * Extract attribute set according to the grouping id
+     * @param bitmask bitmask to represent the selected of the attribute sequence
+     * @param exprs the attributes in sequence
+     * @return the attributes of non selected specified via bitmask (with the bit set to 1)
+     */
+    private def buildNonSelectExprSet(bitmask: Int, exprs: Seq[Expression])
+    : OpenHashSet[Expression] = {
+      val set = new OpenHashSet[Expression](2)
+
+      var bit = exprs.length - 1
+      while (bit >= 0) {
+        if (((bitmask >> bit) & 1) == 0) set.add(exprs(bit))
+        bit -= 1
+      }
+
+      set
+    }
+
+    /*
+     *  GROUP BY a, b, c WITH ROLLUP
+     *  is equivalent to
+     *  GROUP BY a, b, c GROUPING SETS ( (a, b, c), (a, b), (a), ( ) ).
+     *  Group Count: N + 1 (N is the number of group expressions)
+     *
+     *  We need to get all of its subsets for the rule described above, the subset is
+     *  represented as the bit masks.
+     */
+    def bitmasks(r: Rollup): Seq[Int] = {
+      Seq.tabulate(r.groupByExprs.length + 1)(idx => {(1 << idx) - 1})
+    }
+
+    /*
+     *  GROUP BY a, b, c WITH CUBE
+     *  is equivalent to
+     *  GROUP BY a, b, c GROUPING SETS ( (a, b, c), (a, b), (b, c), (a, c), (a), (b), (c), ( ) ).
+     *  Group Count: 2 ^ N (N is the number of group expressions)
+     *
+     *  We need to get all of its subsets for a given GROUPBY expression, the subsets are
+     *  represented as the bit masks.
+     */
+    def bitmasks(c: Cube): Seq[Int] = {
+      Seq.tabulate(1 << c.groupByExprs.length)(i => i)
+    }
+
+    /**
+     * Create an array of Projections for the child projection, and replace the projections'
+     * expressions which equal GroupBy expressions with Literal(null), if those expressions
+     * are not set for this grouping set (according to the bit mask).
+     */
+    private[this] def expand(g: GroupingSets): Seq[GroupExpression] = {
+      val result = new scala.collection.mutable.ArrayBuffer[GroupExpression]
+
+      g.bitmasks.foreach { bitmask =>
+        // get the non selected grouping attributes according to the bit mask
+        val nonSelectedGroupExprSet = buildNonSelectExprSet(bitmask, g.groupByExprs)
+
+        val substitution = (g.child.output :+ g.gid).map(expr => expr transformDown {
+          case x: Expression if nonSelectedGroupExprSet.contains(x) =>
+            // if the input attribute in the Invalid Grouping Expression set of for this group
+            // replace it with constant null
+            Literal(null, expr.dataType)
+          case x if x == g.gid =>
+            // replace the groupingId with concrete value (the bit mask)
+            Literal(bitmask, IntegerType)
+        })
+
+        result += GroupExpression(substitution)
+      }
+
+      result.toSeq
+    }
+
+    def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+      case a: Cube if a.resolved =>
+        GroupingSets(bitmasks(a), a.groupByExprs, a.child, a.aggregations, a.gid)
+      case a: Rollup if a.resolved =>
+        GroupingSets(bitmasks(a), a.groupByExprs, a.child, a.aggregations, a.gid)
+      case x: GroupingSets if x.resolved =>
         Aggregate(
-          groups.map {
-            _ transform {
-              case Alias(c, _) => c
-            }
-          },
-          aggs,
-          child)
+          x.groupByExprs :+ x.gid,
+          x.aggregations,
+          Expand(expand(x), x.child.output :+ x.gid, x.child))
     }
   }
 
@@ -122,10 +208,15 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
             case e => e.children.forall(isValidAggregateExpression)
           }
 
-          aggregateExprs.foreach { e =>
-            if (!isValidAggregateExpression(e)) {
-              throw new TreeNodeException(plan, s"Expression not in GROUP BY: $e")
-            }
+          aggregateExprs.find { e =>
+            !isValidAggregateExpression(e.transform {
+              // Should trim aliases around `GetField`s. These aliases are introduced while
+              // resolving struct field accesses, because `GetField` is not a `NamedExpression`.
+              // (Should we just turn `GetField` into a `NamedExpression`?)
+              case Alias(g: GetField, _) => g
+            })
+          }.foreach { e =>
+            throw new TreeNodeException(plan, s"Expression not in GROUP BY: $e")
           }
 
           aggregatePlan
@@ -138,11 +229,11 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
    */
   object ResolveRelations extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-      case i @ InsertIntoTable(UnresolvedRelation(databaseName, name, alias), _, _, _) =>
+      case i @ InsertIntoTable(UnresolvedRelation(tableIdentifier, alias), _, _, _) =>
         i.copy(
-          table = EliminateAnalysisOperators(catalog.lookupRelation(databaseName, name, alias)))
-      case UnresolvedRelation(databaseName, name, alias) =>
-        catalog.lookupRelation(databaseName, name, alias)
+          table = EliminateAnalysisOperators(catalog.lookupRelation(tableIdentifier, alias)))
+      case UnresolvedRelation(tableIdentifier, alias) =>
+        catalog.lookupRelation(tableIdentifier, alias)
     }
   }
 
@@ -153,16 +244,74 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
    */
   object ResolveReferences extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
-      case q: LogicalPlan if q.childrenResolved =>
+      case p: LogicalPlan if !p.childrenResolved => p
+
+      // If the projection list contains Stars, expand it.
+      case p @ Project(projectList, child) if containsStar(projectList) =>
+        Project(
+          projectList.flatMap {
+            case s: Star => s.expand(child.output, resolver)
+            case Alias(f @ UnresolvedFunction(_, args), name) if containsStar(args) =>
+              val expandedArgs = args.flatMap {
+                case s: Star => s.expand(child.output, resolver)
+                case o => o :: Nil
+              }
+              Alias(child = f.copy(children = expandedArgs), name)() :: Nil
+            case Alias(c @ CreateArray(args), name) if containsStar(args) =>
+              val expandedArgs = args.flatMap {
+                case s: Star => s.expand(child.output, resolver)
+                case o => o :: Nil
+              }
+              Alias(c.copy(children = expandedArgs), name)() :: Nil
+            case o => o :: Nil
+          },
+          child)
+      case t: ScriptTransformation if containsStar(t.input) =>
+        t.copy(
+          input = t.input.flatMap {
+            case s: Star => s.expand(t.child.output, resolver)
+            case o => o :: Nil
+          }
+        )
+
+      // If the aggregate function argument contains Stars, expand it.
+      case a: Aggregate if containsStar(a.aggregateExpressions) =>
+        a.copy(
+          aggregateExpressions = a.aggregateExpressions.flatMap {
+            case s: Star => s.expand(a.child.output, resolver)
+            case o => o :: Nil
+          }
+        )
+
+      case q: LogicalPlan =>
         logTrace(s"Attempting to resolve ${q.simpleString}")
         q transformExpressions {
+          case u @ UnresolvedAttribute(name) if resolver(name, VirtualColumn.groupingIdName) &&
+            q.isInstanceOf[GroupingAnalytics] =>
+            // Resolve the virtual column GROUPING__ID for the operator GroupingAnalytics
+            q.asInstanceOf[GroupingAnalytics].gid
           case u @ UnresolvedAttribute(name) =>
             // Leave unchanged if resolution fails.  Hopefully will be resolved next round.
             val result = q.resolveChildren(name, resolver).getOrElse(u)
             logDebug(s"Resolving $u to $result")
             result
+
+          // Resolve field names using the resolver.
+          case f @ GetField(child, fieldName) if !f.resolved && child.resolved =>
+            child.dataType match {
+              case StructType(fields) =>
+                val resolvedFieldName = fields.map(_.name).find(resolver(_, fieldName))
+                resolvedFieldName.map(n => f.copy(fieldName = n)).getOrElse(f)
+              case _ => f
+            }
         }
     }
+
+    /**
+     * Returns true if `exprs` contains a [[Star]].
+     */
+    protected def containsStar(exprs: Seq[Expression]): Boolean =
+      exprs.exists(_.collect { case _: Star => true }.nonEmpty)
   }
 
   /**
@@ -173,7 +322,8 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
    */
   object ResolveSortReferences extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
-      case s @ Sort(ordering, p @ Project(projectList, child)) if !s.resolved && p.resolved =>
+      case s @ Sort(ordering, global, p @ Project(projectList, child))
+          if !s.resolved && p.resolved =>
         val unresolved = ordering.flatMap(_.collect { case UnresolvedAttribute(name) => name })
         val resolved = unresolved.flatMap(child.resolve(_, resolver))
         val requiredAttributes = AttributeSet(resolved.collect { case a: Attribute => a })
@@ -181,14 +331,15 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
         val missingInProject = requiredAttributes -- p.output
         if (missingInProject.nonEmpty) {
           // Add missing attributes and then project them away after the sort.
-          Project(projectList,
-            Sort(ordering,
+          Project(projectList.map(_.toAttribute),
+            Sort(ordering, global,
               Project(projectList ++ missingInProject, child)))
         } else {
           logDebug(s"Failed to find $missingInProject in ${p.output.mkString(", ")}")
           s // Nothing we can do here. Return original plan.
         }
-      case s @ Sort(ordering, a @ Aggregate(grouping, aggs, child)) if !s.resolved && a.resolved =>
+      case s @ Sort(ordering, global, a @ Aggregate(grouping, aggs, child))
+          if !s.resolved && a.resolved =>
         val unresolved = ordering.flatMap(_.collect { case UnresolvedAttribute(name) => name })
         // A small hack to create an object that will allow us to resolve any references that
         // refer to named expressions that are present in the grouping expressions.
@@ -203,8 +354,7 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
         if (missingInAggs.nonEmpty) {
           // Add missing grouping exprs and then project them away after the sort.
           Project(a.output,
-            Sort(ordering,
-              Aggregate(grouping, aggs ++ missingInAggs, child)))
+            Sort(ordering, global, Aggregate(grouping, aggs ++ missingInAggs, child)))
         } else {
           s // Nothing we can do here. Return original plan.
         }
@@ -277,45 +427,6 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
         Generate(g, join = false, outer = false, None, child)
     }
   }
-
-  /**
-   * Expands any references to [[Star]] (*) in project operators.
-   */
-  object StarExpansion extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-      // Wait until children are resolved
-      case p: LogicalPlan if !p.childrenResolved => p
-      // If the projection list contains Stars, expand it.
-      case p @ Project(projectList, child) if containsStar(projectList) =>
-        Project(
-          projectList.flatMap {
-            case s: Star => s.expand(child.output, resolver)
-            case o => o :: Nil
-          },
-          child)
-      case t: ScriptTransformation if containsStar(t.input) =>
-        t.copy(
-          input = t.input.flatMap {
-            case s: Star => s.expand(t.child.output, resolver)
-            case o => o :: Nil
-          }
-        )
-      // If the aggregate function argument contains Stars, expand it.
-      case a: Aggregate if containsStar(a.aggregateExpressions) =>
-        a.copy(
-          aggregateExpressions = a.aggregateExpressions.flatMap {
-            case s: Star => s.expand(a.child.output, resolver)
-            case o => o :: Nil
-          }
-        )
-    }
-
-    /**
-     * Returns true if `exprs` contains a [[Star]].
-     */
-    protected def containsStar(exprs: Seq[Expression]): Boolean =
-      exprs.collect { case _: Star => true }.nonEmpty
-  }
 }
 
 /**
@@ -328,4 +439,3 @@ object EliminateAnalysisOperators extends Rule[LogicalPlan] {
     case Subquery(_, child) => child
   }
 }
-
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
index 2059a91ba0612..df8d03b86c533 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
@@ -28,67 +28,74 @@ trait Catalog {
 
   def caseSensitive: Boolean
 
+  def tableExists(tableIdentifier: Seq[String]): Boolean
+
   def lookupRelation(
-    databaseName: Option[String],
-    tableName: String,
-    alias: Option[String] = None): LogicalPlan
+      tableIdentifier: Seq[String],
+      alias: Option[String] = None): LogicalPlan
 
-  def registerTable(databaseName: Option[String], tableName: String, plan: LogicalPlan): Unit
+  def registerTable(tableIdentifier: Seq[String], plan: LogicalPlan): Unit
 
-  def unregisterTable(databaseName: Option[String], tableName: String): Unit
+  def unregisterTable(tableIdentifier: Seq[String]): Unit
 
   def unregisterAllTables(): Unit
 
-  protected def processDatabaseAndTableName(
-      databaseName: Option[String],
-      tableName: String): (Option[String], String) = {
+  protected def processTableIdentifier(tableIdentifier: Seq[String]): Seq[String] = {
     if (!caseSensitive) {
-      (databaseName.map(_.toLowerCase), tableName.toLowerCase)
+      tableIdentifier.map(_.toLowerCase)
     } else {
-      (databaseName, tableName)
+      tableIdentifier
     }
   }
 
-  protected def processDatabaseAndTableName(
-      databaseName: String,
-      tableName: String): (String, String) = {
-    if (!caseSensitive) {
-      (databaseName.toLowerCase, tableName.toLowerCase)
+  protected def getDbTableName(tableIdent: Seq[String]): String = {
+    val size = tableIdent.size
+    if (size <= 2) {
+      tableIdent.mkString(".")
     } else {
-      (databaseName, tableName)
+      tableIdent.slice(size - 2, size).mkString(".")
     }
   }
+
+  protected def getDBTable(tableIdent: Seq[String]) : (Option[String], String) = {
+    (tableIdent.lift(tableIdent.size - 2), tableIdent.last)
+  }
 }
 
 class SimpleCatalog(val caseSensitive: Boolean) extends Catalog {
   val tables = new mutable.HashMap[String, LogicalPlan]()
 
   override def registerTable(
-      databaseName: Option[String],
-      tableName: String,
+      tableIdentifier: Seq[String],
       plan: LogicalPlan): Unit = {
-    val (dbName, tblName) = processDatabaseAndTableName(databaseName, tableName)
-    tables += ((tblName, plan))
+    val tableIdent = processTableIdentifier(tableIdentifier)
+    tables += ((getDbTableName(tableIdent), plan))
   }
 
-  override def unregisterTable(
-      databaseName: Option[String],
-      tableName: String) = {
-    val (dbName, tblName) = processDatabaseAndTableName(databaseName, tableName)
-    tables -= tblName
+  override def unregisterTable(tableIdentifier: Seq[String]) = {
+    val tableIdent = processTableIdentifier(tableIdentifier)
+    tables -= getDbTableName(tableIdent)
   }
 
   override def unregisterAllTables() = {
     tables.clear()
   }
 
+  override def tableExists(tableIdentifier: Seq[String]): Boolean = {
+    val tableIdent = processTableIdentifier(tableIdentifier)
+    tables.get(getDbTableName(tableIdent)) match {
+      case Some(_) => true
+      case None => false
+    }
+  }
+
   override def lookupRelation(
-      databaseName: Option[String],
-      tableName: String,
+      tableIdentifier: Seq[String],
       alias: Option[String] = None): LogicalPlan = {
-    val (dbName, tblName) = processDatabaseAndTableName(databaseName, tableName)
-    val table = tables.getOrElse(tblName, sys.error(s"Table Not Found: $tableName"))
-    val tableWithQualifiers = Subquery(tblName, table)
+    val tableIdent = processTableIdentifier(tableIdentifier)
+    val tableFullName = getDbTableName(tableIdent)
+    val table = tables.getOrElse(tableFullName, sys.error(s"Table Not Found: $tableFullName"))
+    val tableWithQualifiers = Subquery(tableIdent.last, table)
 
     // If an alias was specified by the lookup, wrap the plan in a subquery so that attributes are
     // properly qualified with this alias.
@@ -107,33 +114,39 @@ trait OverrideCatalog extends Catalog {
   // TODO: This doesn't work when the database changes...
   val overrides = new mutable.HashMap[(Option[String],String), LogicalPlan]()
 
+  abstract override def tableExists(tableIdentifier: Seq[String]): Boolean = {
+    val tableIdent = processTableIdentifier(tableIdentifier)
+    overrides.get(getDBTable(tableIdent)) match {
+      case Some(_) => true
+      case None => super.tableExists(tableIdentifier)
+    }
+  }
+
   abstract override def lookupRelation(
-    databaseName: Option[String],
-    tableName: String,
+    tableIdentifier: Seq[String],
     alias: Option[String] = None): LogicalPlan = {
-    val (dbName, tblName) = processDatabaseAndTableName(databaseName, tableName)
-    val overriddenTable = overrides.get((dbName, tblName))
-    val tableWithQualifers = overriddenTable.map(r => Subquery(tblName, r))
+    val tableIdent = processTableIdentifier(tableIdentifier)
+    val overriddenTable = overrides.get(getDBTable(tableIdent))
+    val tableWithQualifers = overriddenTable.map(r => Subquery(tableIdent.last, r))
 
     // If an alias was specified by the lookup, wrap the plan in a subquery so that attributes are
     // properly qualified with this alias.
     val withAlias =
       tableWithQualifers.map(r => alias.map(a => Subquery(a, r)).getOrElse(r))
 
-    withAlias.getOrElse(super.lookupRelation(dbName, tblName, alias))
+    withAlias.getOrElse(super.lookupRelation(tableIdentifier, alias))
   }
 
   override def registerTable(
-      databaseName: Option[String],
-      tableName: String,
+      tableIdentifier: Seq[String],
       plan: LogicalPlan): Unit = {
-    val (dbName, tblName) = processDatabaseAndTableName(databaseName, tableName)
-    overrides.put((dbName, tblName), plan)
+    val tableIdent = processTableIdentifier(tableIdentifier)
+    overrides.put(getDBTable(tableIdent), plan)
   }
 
-  override def unregisterTable(databaseName: Option[String], tableName: String): Unit = {
-    val (dbName, tblName) = processDatabaseAndTableName(databaseName, tableName)
-    overrides.remove((dbName, tblName))
+  override def unregisterTable(tableIdentifier: Seq[String]): Unit = {
+    val tableIdent = processTableIdentifier(tableIdentifier)
+    overrides.remove(getDBTable(tableIdent))
   }
 
   override def unregisterAllTables(): Unit = {
@@ -149,18 +162,21 @@ object EmptyCatalog extends Catalog {
 
   val caseSensitive: Boolean = true
 
+  def tableExists(tableIdentifier: Seq[String]): Boolean = {
+    throw new UnsupportedOperationException
+  }
+
   def lookupRelation(
-    databaseName: Option[String],
-    tableName: String,
+    tableIdentifier: Seq[String],
     alias: Option[String] = None) = {
     throw new UnsupportedOperationException
   }
 
-  def registerTable(databaseName: Option[String], tableName: String, plan: LogicalPlan): Unit = {
+  def registerTable(tableIdentifier: Seq[String], plan: LogicalPlan): Unit = {
     throw new UnsupportedOperationException
   }
 
-  def unregisterTable(databaseName: Option[String], tableName: String): Unit = {
+  def unregisterTable(tableIdentifier: Seq[String]): Unit = {
     throw new UnsupportedOperationException
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 760c49fbca4a5..9f334f6d42ad1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -27,23 +27,25 @@ trait FunctionRegistry {
   def registerFunction(name: String, builder: FunctionBuilder): Unit
 
   def lookupFunction(name: String, children: Seq[Expression]): Expression
+
+  def caseSensitive: Boolean
 }
 
 trait OverrideFunctionRegistry extends FunctionRegistry {
 
-  val functionBuilders = new mutable.HashMap[String, FunctionBuilder]()
+  val functionBuilders = StringKeyHashMap[FunctionBuilder](caseSensitive)
 
   def registerFunction(name: String, builder: FunctionBuilder) = {
     functionBuilders.put(name, builder)
   }
 
   abstract override def lookupFunction(name: String, children: Seq[Expression]): Expression = {
-    functionBuilders.get(name).map(_(children)).getOrElse(super.lookupFunction(name,children))
+    functionBuilders.get(name).map(_(children)).getOrElse(super.lookupFunction(name, children))
   }
 }
 
-class SimpleFunctionRegistry extends FunctionRegistry {
-  val functionBuilders = new mutable.HashMap[String, FunctionBuilder]()
+class SimpleFunctionRegistry(val caseSensitive: Boolean) extends FunctionRegistry {
+  val functionBuilders = StringKeyHashMap[FunctionBuilder](caseSensitive)
 
   def registerFunction(name: String, builder: FunctionBuilder) = {
     functionBuilders.put(name, builder)
@@ -64,4 +66,30 @@ object EmptyFunctionRegistry extends FunctionRegistry {
   def lookupFunction(name: String, children: Seq[Expression]): Expression = {
     throw new UnsupportedOperationException
   }
+
+  def caseSensitive: Boolean = ???
+}
+
+/**
+ * Build a map with String type of key, and it also supports either key case
+ * sensitive or insensitive.
+ * TODO move this into util folder?
+ */
+object StringKeyHashMap {
+  def apply[T](caseSensitive: Boolean) = caseSensitive match {
+    case false => new StringKeyHashMap[T](_.toLowerCase)
+    case true => new StringKeyHashMap[T](identity)
+  }
+}
+
+class StringKeyHashMap[T](normalizer: (String) => String) {
+  private val base = new collection.mutable.HashMap[String, T]()
+
+  def apply(key: String): T = base(normalizer(key))
+
+  def get(key: String): Option[T] = base.get(normalizer(key))
+  def put(key: String, value: T): Option[T] = base.put(normalizer(key), value)
+  def remove(key: String): Option[T] = base.remove(normalizer(key))
+  def iterator: Iterator[(String, T)] = base.toIterator
 }
+
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index 7c480de107e7f..34ef7d28cc7f2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -20,24 +20,36 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Union}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 object HiveTypeCoercion {
   // See https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types.
   // The conversion for integral and floating point types have a linear widening hierarchy:
-  val numericPrecedence =
-    Seq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType)
-  val allPromotions: Seq[Seq[DataType]] = numericPrecedence :: Nil
+  private val numericPrecedence =
+    Seq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType.Unlimited)
 
+  /**
+   * Find the tightest common type of two types that might be used in a binary expression.
+   * This handles all numeric types except fixed-precision decimals interacting with each other or
+   * with primitive types, because in that case the precision and scale of the result depends on
+   * the operation. Those rules are implemented in [[HiveTypeCoercion.DecimalPrecision]].
+   */
   def findTightestCommonType(t1: DataType, t2: DataType): Option[DataType] = {
     val valueTypes = Seq(t1, t2).filter(t => t != NullType)
     if (valueTypes.distinct.size > 1) {
-      // Try and find a promotion rule that contains both types in question.
-      val applicableConversion =
-        HiveTypeCoercion.allPromotions.find(p => p.contains(t1) && p.contains(t2))
-
-      // If found return the widest common type, otherwise None
-      applicableConversion.map(_.filter(t => t == t1 || t == t2).last)
+      // Promote numeric types to the highest of the two and all numeric types to unlimited decimal
+      if (numericPrecedence.contains(t1) && numericPrecedence.contains(t2)) {
+        Some(numericPrecedence.filter(t => t == t1 || t == t2).last)
+      } else if (t1.isInstanceOf[DecimalType] && t2.isInstanceOf[DecimalType]) {
+        // Fixed-precision decimals can up-cast into unlimited
+        if (t1 == DecimalType.Unlimited || t2 == DecimalType.Unlimited) {
+          Some(DecimalType.Unlimited)
+        } else {
+          None
+        }
+      } else {
+        None
+      }
     } else {
       Some(if (valueTypes.size == 0) NullType else valueTypes.head)
     }
@@ -52,11 +64,14 @@ object HiveTypeCoercion {
  */
 trait HiveTypeCoercion {
 
+  import HiveTypeCoercion._
+
   val typeCoercionRules =
     PropagateTypes ::
     ConvertNaNs ::
     WidenTypes ::
     PromoteStrings ::
+    DecimalPrecision ::
     BooleanComparisons ::
     BooleanCasts ::
     StringToIntegralCasts ::
@@ -149,6 +164,7 @@ trait HiveTypeCoercion {
     import HiveTypeCoercion._
 
     def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+      // TODO: unions with fixed-precision decimals
       case u @ Union(left, right) if u.childrenResolved && !u.resolved =>
         val castedInput = left.output.zip(right.output).map {
           // When a string is found on one side, make the other side a string too.
@@ -263,12 +279,132 @@ trait HiveTypeCoercion {
     }
   }
 
+  // scalastyle:off
+  /**
+   * Calculates and propagates precision for fixed-precision decimals. Hive has a number of
+   * rules for this based on the SQL standard and MS SQL:
+   * https://cwiki.apache.org/confluence/download/attachments/27362075/Hive_Decimal_Precision_Scale_Support.pdf
+   *
+   * In particular, if we have expressions e1 and e2 with precision/scale p1/s2 and p2/s2
+   * respectively, then the following operations have the following precision / scale:
+   *
+   *   Operation    Result Precision                        Result Scale
+   *   ------------------------------------------------------------------------
+   *   e1 + e2      max(s1, s2) + max(p1-s1, p2-s2) + 1     max(s1, s2)
+   *   e1 - e2      max(s1, s2) + max(p1-s1, p2-s2) + 1     max(s1, s2)
+   *   e1 * e2      p1 + p2 + 1                             s1 + s2
+   *   e1 / e2      p1 - s1 + s2 + max(6, s1 + p2 + 1)      max(6, s1 + p2 + 1)
+   *   e1 % e2      min(p1-s1, p2-s2) + max(s1, s2)         max(s1, s2)
+   *   sum(e1)      p1 + 10                                 s1
+   *   avg(e1)      p1 + 4                                  s1 + 4
+   *
+   * Catalyst also has unlimited-precision decimals. For those, all ops return unlimited precision.
+   *
+   * To implement the rules for fixed-precision types, we introduce casts to turn them to unlimited
+   * precision, do the math on unlimited-precision numbers, then introduce casts back to the
+   * required fixed precision. This allows us to do all rounding and overflow handling in the
+   * cast-to-fixed-precision operator.
+   *
+   * In addition, when mixing non-decimal types with decimals, we use the following rules:
+   * - BYTE gets turned into DECIMAL(3, 0)
+   * - SHORT gets turned into DECIMAL(5, 0)
+   * - INT gets turned into DECIMAL(10, 0)
+   * - LONG gets turned into DECIMAL(20, 0)
+   * - FLOAT and DOUBLE cause fixed-length decimals to turn into DOUBLE (this is the same as Hive,
+   *   but note that unlimited decimals are considered bigger than doubles in WidenTypes)
+   */
+  // scalastyle:on
+  object DecimalPrecision extends Rule[LogicalPlan] {
+    import scala.math.{max, min}
+
+    // Conversion rules for integer types into fixed-precision decimals
+    val intTypeToFixed: Map[DataType, DecimalType] = Map(
+      ByteType -> DecimalType(3, 0),
+      ShortType -> DecimalType(5, 0),
+      IntegerType -> DecimalType(10, 0),
+      LongType -> DecimalType(20, 0)
+    )
+
+    def isFloat(t: DataType): Boolean = t == FloatType || t == DoubleType
+
+    def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
+      // Skip nodes whose children have not been resolved yet
+      case e if !e.childrenResolved => e
+
+      case Add(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+        Cast(
+          Add(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited)),
+          DecimalType(max(s1, s2) + max(p1 - s1, p2 - s2) + 1, max(s1, s2))
+        )
+
+      case Subtract(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+        Cast(
+          Subtract(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited)),
+          DecimalType(max(s1, s2) + max(p1 - s1, p2 - s2) + 1, max(s1, s2))
+        )
+
+      case Multiply(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+        Cast(
+          Multiply(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited)),
+          DecimalType(p1 + p2 + 1, s1 + s2)
+        )
+
+      case Divide(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+        Cast(
+          Divide(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited)),
+          DecimalType(p1 - s1 + s2 + max(6, s1 + p2 + 1), max(6, s1 + p2 + 1))
+        )
+
+      case Remainder(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+        Cast(
+          Remainder(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited)),
+          DecimalType(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2))
+        )
+
+      case LessThan(e1 @ DecimalType.Expression(p1, s1),
+          e2 @ DecimalType.Expression(p2, s2)) if p1 != p2 || s1 != s2 =>
+        LessThan(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited))
+
+      case LessThanOrEqual(e1 @ DecimalType.Expression(p1, s1),
+          e2 @ DecimalType.Expression(p2, s2)) if p1 != p2 || s1 != s2 =>
+        LessThanOrEqual(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited))
+
+      case GreaterThan(e1 @ DecimalType.Expression(p1, s1),
+          e2 @ DecimalType.Expression(p2, s2)) if p1 != p2 || s1 != s2 =>
+        GreaterThan(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited))
+
+      case GreaterThanOrEqual(e1 @ DecimalType.Expression(p1, s1),
+          e2 @ DecimalType.Expression(p2, s2)) if p1 != p2 || s1 != s2 =>
+        GreaterThanOrEqual(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited))
+
+      // Promote integers inside a binary expression with fixed-precision decimals to decimals,
+      // and fixed-precision decimals in an expression with floats / doubles to doubles
+      case b: BinaryExpression if b.left.dataType != b.right.dataType =>
+        (b.left.dataType, b.right.dataType) match {
+          case (t, DecimalType.Fixed(p, s)) if intTypeToFixed.contains(t) =>
+            b.makeCopy(Array(Cast(b.left, intTypeToFixed(t)), b.right))
+          case (DecimalType.Fixed(p, s), t) if intTypeToFixed.contains(t) =>
+            b.makeCopy(Array(b.left, Cast(b.right, intTypeToFixed(t))))
+          case (t, DecimalType.Fixed(p, s)) if isFloat(t) =>
+            b.makeCopy(Array(b.left, Cast(b.right, DoubleType)))
+          case (DecimalType.Fixed(p, s), t) if isFloat(t) =>
+            b.makeCopy(Array(Cast(b.left, DoubleType), b.right))
+          case _ =>
+            b
+        }
+
+      // TODO: MaxOf, MinOf, etc might want other rules
+
+      // SUM and AVERAGE are handled by the implementations of those expressions
+    }
+  }
+
   /**
    * Changes Boolean values to Bytes so that expressions like true < false can be Evaluated.
    */
   object BooleanComparisons extends Rule[LogicalPlan] {
-    val trueValues = Seq(1, 1L, 1.toByte, 1.toShort, BigDecimal(1)).map(Literal(_))
-    val falseValues = Seq(0, 0L, 0.toByte, 0.toShort, BigDecimal(0)).map(Literal(_))
+    val trueValues = Seq(1, 1L, 1.toByte, 1.toShort, new java.math.BigDecimal(1)).map(Literal(_))
+    val falseValues = Seq(0, 0L, 0.toByte, 0.toShort, new java.math.BigDecimal(0)).map(Literal(_))
 
     def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
       // Skip nodes who's children have not been resolved yet.
@@ -328,7 +464,7 @@ trait HiveTypeCoercion {
       case e if !e.childrenResolved => e
 
       case Cast(e @ StringType(), t: IntegralType) =>
-        Cast(Cast(e, DecimalType), t)
+        Cast(Cast(e, DecimalType.Unlimited), t)
     }
   }
 
@@ -340,6 +476,13 @@ trait HiveTypeCoercion {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
 
+      case a @ CreateArray(children) if !a.resolved =>
+        val commonType = a.childTypes.reduce(
+          (a,b) =>
+            findTightestCommonType(a,b).getOrElse(StringType))
+        CreateArray(
+          children.map(c => if (c.dataType == commonType) c else Cast(c, commonType)))
+
       // Promote SUM, SUM DISTINCT and AVERAGE to largest types to prevent overflows.
       case s @ Sum(e @ DecimalType()) => s // Decimal is already the biggest.
       case Sum(e @ IntegralType()) if e.dataType != LongType => Sum(Cast(e, LongType))
@@ -356,6 +499,26 @@ trait HiveTypeCoercion {
         Average(Cast(e, LongType))
       case Average(e @ FractionalType()) if e.dataType != DoubleType =>
         Average(Cast(e, DoubleType))
+
+      // Hive lets you do aggregation of timestamps... for some reason
+      case Sum(e @ TimestampType()) => Sum(Cast(e, DoubleType))
+      case Average(e @ TimestampType()) => Average(Cast(e, DoubleType))
+
+      // Coalesce should return the first non-null value, which could be any column
+      // from the list. So we need to make sure the return type is deterministic and
+      // compatible with every child column.
+      case Coalesce(es) if es.map(_.dataType).distinct.size > 1 =>
+        val dt: Option[DataType] = Some(NullType)
+        val types = es.map(_.dataType)
+        val rt = types.foldLeft(dt)((r, c) => r match {
+          case None => None
+          case Some(d) => findTightestCommonType(d, c)
+        })
+        rt match {
+          case Some(finaldt) => Coalesce(es.map(Cast(_, finaldt)))
+          case None =>
+            sys.error(s"Could not determine return type of Coalesce for ${types.mkString(",")}")
+        }
     }
   }
 
@@ -370,10 +533,12 @@ trait HiveTypeCoercion {
 
       // Decimal and Double remain the same
       case d: Divide if d.resolved && d.dataType == DoubleType => d
-      case d: Divide if d.resolved && d.dataType == DecimalType => d
+      case d: Divide if d.resolved && d.dataType.isInstanceOf[DecimalType] => d
 
-      case Divide(l, r) if l.dataType == DecimalType => Divide(l, Cast(r, DecimalType))
-      case Divide(l, r) if r.dataType == DecimalType => Divide(Cast(l, DecimalType), r)
+      case Divide(l, r) if l.dataType.isInstanceOf[DecimalType] =>
+        Divide(l, Cast(r, DecimalType.Unlimited))
+      case Divide(l, r) if r.dataType.isInstanceOf[DecimalType] =>
+        Divide(Cast(l, DecimalType.Unlimited), r)
 
       case Divide(l, r) => Divide(Cast(l, DoubleType), Cast(r, DoubleType))
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala
index 22941edef2d46..4c5fb3f45bf49 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala
@@ -47,7 +47,7 @@ object NewRelationInstances extends Rule[LogicalPlan] {
       .toSet
 
     plan transform {
-      case l: MultiInstanceRelation if multiAppearance contains l => l.newInstance
+      case l: MultiInstanceRelation if multiAppearance.contains(l) => l.newInstance()
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 77d84e1687e1b..f35921e2a772c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -34,8 +34,7 @@ class UnresolvedException[TreeType <: TreeNode[_]](tree: TreeType, function: Str
  * Holds the name of a relation that has yet to be looked up in a [[Catalog]].
  */
 case class UnresolvedRelation(
-    databaseName: Option[String],
-    tableName: String,
+    tableIdentifier: Seq[String],
     alias: Option[String] = None) extends LeafNode {
   override def output = Nil
   override lazy val resolved = false
@@ -51,7 +50,7 @@ case class UnresolvedAttribute(name: String) extends Attribute with trees.LeafNo
   override def qualifiers = throw new UnresolvedException(this, "qualifiers")
   override lazy val resolved = false
 
-  override def newInstance = this
+  override def newInstance() = this
   override def withNullability(newNullability: Boolean) = this
   override def withQualifiers(newQualifiers: Seq[String]) = this
   override def withName(newName: String) = UnresolvedAttribute(name)
@@ -78,15 +77,10 @@ case class UnresolvedFunction(name: String, children: Seq[Expression]) extends E
 
 /**
  * Represents all of the input attributes to a given relational operator, for example in
- * "SELECT * FROM ...".
- *
- * @param table an optional table that should be the target of the expansion.  If omitted all
- *              tables' columns are produced.
+ * "SELECT * FROM ...". A [[Star]] gets automatically expanded during analysis.
  */
-case class Star(
-    table: Option[String],
-    mapFunction: Attribute => Expression = identity[Attribute])
-  extends Attribute with trees.LeafNode[Expression] {
+trait Star extends Attribute with trees.LeafNode[Expression] {
+  self: Product =>
 
   override def name = throw new UnresolvedException(this, "name")
   override def exprId = throw new UnresolvedException(this, "exprId")
@@ -95,29 +89,91 @@ case class Star(
   override def qualifiers = throw new UnresolvedException(this, "qualifiers")
   override lazy val resolved = false
 
-  override def newInstance = this
+  override def newInstance() = this
   override def withNullability(newNullability: Boolean) = this
   override def withQualifiers(newQualifiers: Seq[String]) = this
   override def withName(newName: String) = this
 
-  def expand(input: Seq[Attribute], resolver: Resolver): Seq[NamedExpression] = {
+  // Star gets expanded at runtime so we never evaluate a Star.
+  override def eval(input: Row = null): EvaluatedType =
+    throw new TreeNodeException(this, s"No function to evaluate expression. type: ${this.nodeName}")
+
+  def expand(input: Seq[Attribute], resolver: Resolver): Seq[NamedExpression]
+}
+
+
+/**
+ * Represents all of the input attributes to a given relational operator, for example in
+ * "SELECT * FROM ...".
+ *
+ * @param table an optional table that should be the target of the expansion.  If omitted all
+ *              tables' columns are produced.
+ */
+case class UnresolvedStar(table: Option[String]) extends Star {
+
+  override def expand(input: Seq[Attribute], resolver: Resolver): Seq[NamedExpression] = {
     val expandedAttributes: Seq[Attribute] = table match {
       // If there is no table specified, use all input attributes.
       case None => input
       // If there is a table, pick out attributes that are part of this table.
       case Some(t) => input.filter(_.qualifiers.filter(resolver(_, t)).nonEmpty)
     }
-    val mappedAttributes = expandedAttributes.map(mapFunction).zip(input).map {
+    expandedAttributes.zip(input).map {
       case (n: NamedExpression, _) => n
       case (e, originalAttribute) =>
         Alias(e, originalAttribute.name)(qualifiers = originalAttribute.qualifiers)
     }
-    mappedAttributes
   }
 
-  // Star gets expanded at runtime so we never evaluate a Star.
+  override def toString = table.map(_ + ".").getOrElse("") + "*"
+}
+
+/**
+ * Used to assign new names to Generator's output, such as hive udtf.
+ * For example the SQL expression "stack(2, key, value, key, value) as (a, b)" could be represented
+ * as follows:
+ *  MultiAlias(stack_function, Seq(a, b))
+
+ * @param child the computation being performed
+ * @param names the names to be associated with each output of computing [[child]].
+ */
+case class MultiAlias(child: Expression, names: Seq[String])
+  extends Attribute with trees.UnaryNode[Expression] {
+
+  override def name = throw new UnresolvedException(this, "name")
+
+  override def exprId = throw new UnresolvedException(this, "exprId")
+
+  override def dataType = throw new UnresolvedException(this, "dataType")
+
+  override def nullable = throw new UnresolvedException(this, "nullable")
+
+  override def qualifiers = throw new UnresolvedException(this, "qualifiers")
+
+  override lazy val resolved = false
+
+  override def newInstance = this
+
+  override def withNullability(newNullability: Boolean) = this
+
+  override def withQualifiers(newQualifiers: Seq[String]) = this
+
+  override def withName(newName: String) = this
+
   override def eval(input: Row = null): EvaluatedType =
     throw new TreeNodeException(this, s"No function to evaluate expression. type: ${this.nodeName}")
 
-  override def toString = table.map(_ + ".").getOrElse("") + "*"
+  override def toString: String = s"$child AS $names"
+
+}
+
+/**
+ * Represents all the resolved input attributes to a given relational operator. This is used
+ * in the data frame DSL.
+ *
+ * @param expressions Expressions to expand.
+ */
+case class ResolvedStar(expressions: Seq[NamedExpression]) extends Star {
+  override def expand(input: Seq[Attribute], resolver: Resolver): Seq[NamedExpression] = expressions
+  override def toString = expressions.mkString("ResolvedStar(", ", ", ")")
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 75b6e37c2a1f9..417659eed5957 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -20,12 +20,13 @@ package org.apache.spark.sql.catalyst
 import java.sql.{Date, Timestamp}
 
 import scala.language.implicitConversions
+import scala.reflect.runtime.universe.{TypeTag, typeTag}
 
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 /**
  * A collection of implicit conversions that create a DSL for constructing catalyst data structures.
@@ -62,12 +63,16 @@ package object dsl {
 
     def unary_- = UnaryMinus(expr)
     def unary_! = Not(expr)
+    def unary_~ = BitwiseNot(expr)
 
     def + (other: Expression) = Add(expr, other)
     def - (other: Expression) = Subtract(expr, other)
     def * (other: Expression) = Multiply(expr, other)
     def / (other: Expression) = Divide(expr, other)
     def % (other: Expression) = Remainder(expr, other)
+    def & (other: Expression) = BitwiseAnd(expr, other)
+    def | (other: Expression) = BitwiseOr(expr, other)
+    def ^ (other: Expression) = BitwiseXor(expr, other)
 
     def && (other: Expression) = And(expr, other)
     def || (other: Expression) = Or(expr, other)
@@ -103,7 +108,8 @@ package object dsl {
     def asc = SortOrder(expr, Ascending)
     def desc = SortOrder(expr, Descending)
 
-    def as(s: Symbol) = Alias(expr, s.name)()
+    def as(alias: String) = Alias(expr, alias)()
+    def as(alias: Symbol) = Alias(expr, alias.name)()
   }
 
   trait ExpressionConversions {
@@ -111,20 +117,32 @@ package object dsl {
       def expr = e
     }
 
-    implicit def booleanToLiteral(b: Boolean) = Literal(b)
-    implicit def byteToLiteral(b: Byte) = Literal(b)
-    implicit def shortToLiteral(s: Short) = Literal(s)
-    implicit def intToLiteral(i: Int) = Literal(i)
-    implicit def longToLiteral(l: Long) = Literal(l)
-    implicit def floatToLiteral(f: Float) = Literal(f)
-    implicit def doubleToLiteral(d: Double) = Literal(d)
-    implicit def stringToLiteral(s: String) = Literal(s)
-    implicit def dateToLiteral(d: Date) = Literal(d)
-    implicit def decimalToLiteral(d: BigDecimal) = Literal(d)
-    implicit def timestampToLiteral(t: Timestamp) = Literal(t)
-    implicit def binaryToLiteral(a: Array[Byte]) = Literal(a)
-
-    implicit def symbolToUnresolvedAttribute(s: Symbol) = analysis.UnresolvedAttribute(s.name)
+    implicit def booleanToLiteral(b: Boolean): Literal = Literal(b)
+    implicit def byteToLiteral(b: Byte): Literal = Literal(b)
+    implicit def shortToLiteral(s: Short): Literal = Literal(s)
+    implicit def intToLiteral(i: Int): Literal = Literal(i)
+    implicit def longToLiteral(l: Long): Literal = Literal(l)
+    implicit def floatToLiteral(f: Float): Literal = Literal(f)
+    implicit def doubleToLiteral(d: Double): Literal = Literal(d)
+    implicit def stringToLiteral(s: String): Literal = Literal(s)
+    implicit def dateToLiteral(d: Date): Literal = Literal(d)
+    implicit def bigDecimalToLiteral(d: BigDecimal): Literal = Literal(d.underlying())
+    implicit def bigDecimalToLiteral(d: java.math.BigDecimal): Literal = Literal(d)
+    implicit def decimalToLiteral(d: Decimal): Literal = Literal(d)
+    implicit def timestampToLiteral(t: Timestamp): Literal = Literal(t)
+    implicit def binaryToLiteral(a: Array[Byte]): Literal = Literal(a)
+
+    implicit def symbolToUnresolvedAttribute(s: Symbol): analysis.UnresolvedAttribute =
+      analysis.UnresolvedAttribute(s.name)
+
+    /** Converts $"col name" into an [[analysis.UnresolvedAttribute]]. */
+    implicit class StringToAttributeConversionHelper(val sc: StringContext) {
+      // Note that if we make ExpressionConversions an object rather than a trait, we can
+      // then make this a value class to avoid the small penalty of runtime instantiation.
+      def $(args: Any*): analysis.UnresolvedAttribute = {
+        analysis.UnresolvedAttribute(sc.s(args :_*))
+      }
+    }
 
     def sum(e: Expression) = Sum(e)
     def sumDistinct(e: Expression) = SumDistinct(e)
@@ -138,6 +156,8 @@ package object dsl {
     def max(e: Expression) = Max(e)
     def upper(e: Expression) = Upper(e)
     def lower(e: Expression) = Lower(e)
+    def sqrt(e: Expression) = Sqrt(e)
+    def abs(e: Expression) = Abs(e)
 
     implicit class DslSymbol(sym: Symbol) extends ImplicitAttribute { def s = sym.name }
     // TODO more implicit class for literal?
@@ -179,7 +199,11 @@ package object dsl {
       def date = AttributeReference(s, DateType, nullable = true)()
 
       /** Creates a new AttributeReference of type decimal */
-      def decimal = AttributeReference(s, DecimalType, nullable = true)()
+      def decimal = AttributeReference(s, DecimalType.Unlimited, nullable = true)()
+
+      /** Creates a new AttributeReference of type decimal */
+      def decimal(precision: Int, scale: Int) =
+        AttributeReference(s, DecimalType(precision, scale), nullable = true)()
 
       /** Creates a new AttributeReference of type timestamp */
       def timestamp = AttributeReference(s, TimestampType, nullable = true)()
@@ -229,7 +253,9 @@ package object dsl {
         condition: Option[Expression] = None) =
       Join(logicalPlan, otherPlan, joinType, condition)
 
-    def orderBy(sortExprs: SortOrder*) = Sort(sortExprs, logicalPlan)
+    def orderBy(sortExprs: SortOrder*) = Sort(sortExprs, true, logicalPlan)
+
+    def sortBy(sortExprs: SortOrder*) = Sort(sortExprs, false, logicalPlan)
 
     def groupBy(groupingExprs: Expression*)(aggregateExprs: Expression*) = {
       val aliasedExprs = aggregateExprs.map {
@@ -246,9 +272,6 @@ package object dsl {
     def sfilter[T1](arg1: Symbol)(udf: (T1) => Boolean) =
       Filter(ScalaUdf(udf, BooleanType, Seq(UnresolvedAttribute(arg1.name))), logicalPlan)
 
-    def sfilter(dynamicUdf: (DynamicRow) => Boolean) =
-      Filter(ScalaUdf(dynamicUdf, BooleanType, Seq(WrapDynamic(logicalPlan.output))), logicalPlan)
-
     def sample(
         fraction: Double,
         withReplacement: Boolean = true,
@@ -264,7 +287,7 @@ package object dsl {
 
     def insertInto(tableName: String, overwrite: Boolean = false) =
       InsertIntoTable(
-        analysis.UnresolvedRelation(None, tableName), Map.empty, logicalPlan, overwrite)
+        analysis.UnresolvedRelation(Seq(tableName)), Map.empty, logicalPlan, overwrite)
 
     def analyze = analysis.SimpleAnalyzer(logicalPlan)
   }
@@ -274,4 +297,62 @@ package object dsl {
       def writeToFile(path: String) = WriteToFile(path, logicalPlan)
     }
   }
+
+  case class ScalaUdfBuilder[T: TypeTag](f: AnyRef) {
+    def call(args: Expression*) = ScalaUdf(f, ScalaReflection.schemaFor(typeTag[T]).dataType, args)
+  }
+
+  // scalastyle:off
+  /** functionToUdfBuilder 1-22 were generated by this script
+
+    (1 to 22).map { x =>
+      val argTypes = Seq.fill(x)("_").mkString(", ")
+      s"implicit def functionToUdfBuilder[T: TypeTag](func: Function$x[$argTypes, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)"
+    }
+  */
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function1[_, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function2[_, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function3[_, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function4[_, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function5[_, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function6[_, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function7[_, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function8[_, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function9[_, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function10[_, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function11[_, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function12[_, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function13[_, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+
+  implicit def functionToUdfBuilder[T: TypeTag](func: Function22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)
+  // scalastyle:on
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
index 8364379644c90..82e760b6c6916 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
@@ -23,8 +23,7 @@ package org.apache.spark.sql.catalyst.expressions
  * of the name, or the expected nullability).
  */
 object AttributeMap {
-  def apply[A](kvs: Seq[(Attribute, A)]) =
-    new AttributeMap(kvs.map(kv => (kv._1.exprId, (kv._1, kv._2))).toMap)
+  def apply[A](kvs: Seq[(Attribute, A)]) = new AttributeMap(kvs.map(kv => (kv._1.exprId, kv)).toMap)
 }
 
 class AttributeMap[A](baseMap: Map[ExprId, (Attribute, A)])
@@ -32,10 +31,9 @@ class AttributeMap[A](baseMap: Map[ExprId, (Attribute, A)])
 
   override def get(k: Attribute): Option[A] = baseMap.get(k.exprId).map(_._2)
 
-  override def + [B1 >: A](kv: (Attribute, B1)): Map[Attribute, B1] =
-    (baseMap.map(_._2) + kv).toMap
+  override def + [B1 >: A](kv: (Attribute, B1)): Map[Attribute, B1] = baseMap.values.toMap + kv
 
-  override def iterator: Iterator[(Attribute, A)] = baseMap.map(_._2).iterator
+  override def iterator: Iterator[(Attribute, A)] = baseMap.valuesIterator
 
-  override def -(key: Attribute): Map[Attribute, A] = (baseMap.map(_._2) - key).toMap
+  override def -(key: Attribute): Map[Attribute, A] = baseMap.values.toMap - key
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
index 2b4969b7cfec0..171845ad14e3e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
@@ -112,4 +112,6 @@ class AttributeSet private (val baseSet: Set[AttributeEquals])
   override def toSeq: Seq[Attribute] = baseSet.map(_.a).toArray.toSeq
 
   override def toString = "{" + baseSet.map(_.a).mkString(", ") + "}"
+
+  override def isEmpty: Boolean = baseSet.isEmpty
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
index fa80b07f8e6be..76a9f08dea85f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.errors.attachTree
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 import org.apache.spark.sql.catalyst.trees
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 8e5baf0eb82d6..b1bc858478ee1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -22,21 +22,85 @@ import java.text.{DateFormat, SimpleDateFormat}
 
 import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 /** Cast the child expression to the target data type. */
 case class Cast(child: Expression, dataType: DataType) extends UnaryExpression with Logging {
+
+  override lazy val resolved = childrenResolved && resolve(child.dataType, dataType)
+
   override def foldable = child.foldable
 
-  override def nullable = (child.dataType, dataType) match {
+  override def nullable = forceNullable(child.dataType, dataType) || child.nullable
+
+  private[this] def forceNullable(from: DataType, to: DataType) = (from, to) match {
     case (StringType, _: NumericType) => true
     case (StringType, TimestampType)  => true
+    case (DoubleType, TimestampType)  => true
+    case (FloatType, TimestampType)   => true
     case (StringType, DateType)       => true
     case (_: NumericType, DateType)   => true
     case (BooleanType, DateType)      => true
     case (DateType, _: NumericType)   => true
     case (DateType, BooleanType)      => true
-    case _                            => child.nullable
+    case (DoubleType, _: DecimalType) => true
+    case (FloatType, _: DecimalType)  => true
+    case (_, DecimalType.Fixed(_, _)) => true // TODO: not all upcasts here can really give null
+    case _                            => false
+  }
+
+  private[this] def resolvableNullability(from: Boolean, to: Boolean) = !from || to
+
+  private[this] def resolve(from: DataType, to: DataType): Boolean = {
+    (from, to) match {
+      case (from, to) if from == to         => true
+
+      case (NullType, _)                    => true
+
+      case (_, StringType)                  => true
+
+      case (StringType, BinaryType)         => true
+
+      case (StringType, BooleanType)        => true
+      case (DateType, BooleanType)          => true
+      case (TimestampType, BooleanType)     => true
+      case (_: NumericType, BooleanType)    => true
+
+      case (StringType, TimestampType)      => true
+      case (BooleanType, TimestampType)     => true
+      case (DateType, TimestampType)        => true
+      case (_: NumericType, TimestampType)  => true
+
+      case (_, DateType)                    => true
+
+      case (StringType, _: NumericType)     => true
+      case (BooleanType, _: NumericType)    => true
+      case (DateType, _: NumericType)       => true
+      case (TimestampType, _: NumericType)  => true
+      case (_: NumericType, _: NumericType) => true
+
+      case (ArrayType(from, fn), ArrayType(to, tn)) =>
+        resolve(from, to) &&
+          resolvableNullability(fn || forceNullable(from, to), tn)
+
+      case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) =>
+        resolve(fromKey, toKey) &&
+          (!forceNullable(fromKey, toKey)) &&
+          resolve(fromValue, toValue) &&
+          resolvableNullability(fn || forceNullable(fromValue, toValue), tn)
+
+      case (StructType(fromFields), StructType(toFields)) =>
+        fromFields.size == toFields.size &&
+          fromFields.zip(toFields).forall {
+            case (fromField, toField) =>
+              resolve(fromField.dataType, toField.dataType) &&
+                resolvableNullability(
+                  fromField.nullable || forceNullable(fromField.dataType, toField.dataType),
+                  toField.nullable)
+          }
+
+      case _ => false
+    }
   }
 
   override def toString = s"CAST($child, $dataType)"
@@ -47,27 +111,27 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
   @inline private[this] def buildCast[T](a: Any, func: T => Any): Any = func(a.asInstanceOf[T])
 
   // UDFToString
-  private[this] def castToString: Any => Any = child.dataType match {
+  private[this] def castToString(from: DataType): Any => Any = from match {
     case BinaryType => buildCast[Array[Byte]](_, new String(_, "UTF-8"))
-    case DateType => buildCast[Date](_, dateToString)
+    case DateType => buildCast[Int](_, d => DateUtils.toString(d))
     case TimestampType => buildCast[Timestamp](_, timestampToString)
     case _ => buildCast[Any](_, _.toString)
   }
 
   // BinaryConverter
-  private[this] def castToBinary: Any => Any = child.dataType match {
+  private[this] def castToBinary(from: DataType): Any => Any = from match {
     case StringType => buildCast[String](_, _.getBytes("UTF-8"))
   }
 
   // UDFToBoolean
-  private[this] def castToBoolean: Any => Any = child.dataType match {
+  private[this] def castToBoolean(from: DataType): Any => Any = from match {
     case StringType =>
       buildCast[String](_, _.length() != 0)
     case TimestampType =>
       buildCast[Timestamp](_, t => t.getTime() != 0 || t.getNanos() != 0)
     case DateType =>
       // Hive would return null when cast from date to boolean
-      buildCast[Date](_, d => null)
+      buildCast[Int](_, d => null)
     case LongType =>
       buildCast[Long](_, _ != 0)
     case IntegerType =>
@@ -76,8 +140,8 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
       buildCast[Short](_, _ != 0)
     case ByteType =>
       buildCast[Byte](_, _ != 0)
-    case DecimalType =>
-      buildCast[BigDecimal](_, _ != 0)
+    case DecimalType() =>
+      buildCast[Decimal](_, _ != 0)
     case DoubleType =>
       buildCast[Double](_, _ != 0)
     case FloatType =>
@@ -85,7 +149,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
   }
 
   // TimestampConverter
-  private[this] def castToTimestamp: Any => Any = child.dataType match {
+  private[this] def castToTimestamp(from: DataType): Any => Any = from match {
     case StringType =>
       buildCast[String](_, s => {
         // Throw away extra if more than 9 decimal places
@@ -107,21 +171,29 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
     case ByteType =>
       buildCast[Byte](_, b => new Timestamp(b))
     case DateType =>
-      buildCast[Date](_, d => new Timestamp(d.getTime))
+      buildCast[Int](_, d => new Timestamp(DateUtils.toJavaDate(d).getTime))
     // TimestampWritable.decimalToTimestamp
-    case DecimalType =>
-      buildCast[BigDecimal](_, d => decimalToTimestamp(d))
+    case DecimalType() =>
+      buildCast[Decimal](_, d => decimalToTimestamp(d))
     // TimestampWritable.doubleToTimestamp
     case DoubleType =>
-      buildCast[Double](_, d => decimalToTimestamp(d))
+      buildCast[Double](_, d => try {
+        decimalToTimestamp(Decimal(d))
+      } catch {
+        case _: NumberFormatException => null
+      })
     // TimestampWritable.floatToTimestamp
     case FloatType =>
-      buildCast[Float](_, f => decimalToTimestamp(f))
+      buildCast[Float](_, f => try {
+        decimalToTimestamp(Decimal(f))
+      } catch {
+        case _: NumberFormatException => null
+      })
   }
 
-  private[this]  def decimalToTimestamp(d: BigDecimal) = {
+  private[this] def decimalToTimestamp(d: Decimal) = {
     val seconds = Math.floor(d.toDouble).toLong
-    val bd = (d - seconds) * 1000000000
+    val bd = (d.toBigDecimal - seconds) * 1000000000
     val nanos = bd.intValue()
 
     val millis = seconds * 1000
@@ -152,40 +224,26 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
     }
   }
 
-  // Converts Timestamp to string according to Hive TimestampWritable convention
-  private[this] def timestampToDateString(ts: Timestamp): String = {
-    Cast.threadLocalDateFormat.get.format(ts)
-  }
-
   // DateConverter
-  private[this] def castToDate: Any => Any = child.dataType match {
+  private[this] def castToDate(from: DataType): Any => Any = from match {
     case StringType =>
       buildCast[String](_, s =>
-        try Date.valueOf(s) catch { case _: java.lang.IllegalArgumentException => null }
+        try DateUtils.fromJavaDate(Date.valueOf(s))
+        catch { case _: java.lang.IllegalArgumentException => null }
       )
     case TimestampType =>
       // throw valid precision more than seconds, according to Hive.
       // Timestamp.nanos is in 0 to 999,999,999, no more than a second.
-      buildCast[Timestamp](_, t => new Date(Math.floor(t.getTime / 1000.0).toLong * 1000))
+      buildCast[Timestamp](_, t => DateUtils.millisToDays(t.getTime))
     // Hive throws this exception as a Semantic Exception
-    // It is never possible to compare result when hive return with exception, so we can return null
+    // It is never possible to compare result when hive return with exception,
+    // so we can return null
     // NULL is more reasonable here, since the query itself obeys the grammar.
     case _ => _ => null
   }
 
-  // Date cannot be cast to long, according to hive
-  private[this] def dateToLong(d: Date) = null
-
-  // Date cannot be cast to double, according to hive
-  private[this] def dateToDouble(d: Date) = null
-
-  // Converts Date to string according to Hive DateWritable convention
-  private[this] def dateToString(d: Date): String = {
-    Cast.threadLocalDateFormat.get.format(d)
-  }
-
   // LongConverter
-  private[this] def castToLong: Any => Any = child.dataType match {
+  private[this] def castToLong(from: DataType): Any => Any = from match {
     case StringType =>
       buildCast[String](_, s => try s.toLong catch {
         case _: NumberFormatException => null
@@ -193,17 +251,15 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
     case BooleanType =>
       buildCast[Boolean](_, b => if (b) 1L else 0L)
     case DateType =>
-      buildCast[Date](_, d => dateToLong(d))
+      buildCast[Int](_, d => null)
     case TimestampType =>
       buildCast[Timestamp](_, t => timestampToLong(t))
-    case DecimalType =>
-      buildCast[BigDecimal](_, _.toLong)
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toLong(b)
   }
 
   // IntConverter
-  private[this] def castToInt: Any => Any = child.dataType match {
+  private[this] def castToInt(from: DataType): Any => Any = from match {
     case StringType =>
       buildCast[String](_, s => try s.toInt catch {
         case _: NumberFormatException => null
@@ -211,17 +267,15 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
     case BooleanType =>
       buildCast[Boolean](_, b => if (b) 1 else 0)
     case DateType =>
-      buildCast[Date](_, d => dateToLong(d))
+      buildCast[Int](_, d => null)
     case TimestampType =>
       buildCast[Timestamp](_, t => timestampToLong(t).toInt)
-    case DecimalType =>
-      buildCast[BigDecimal](_, _.toInt)
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b)
   }
 
   // ShortConverter
-  private[this] def castToShort: Any => Any = child.dataType match {
+  private[this] def castToShort(from: DataType): Any => Any = from match {
     case StringType =>
       buildCast[String](_, s => try s.toShort catch {
         case _: NumberFormatException => null
@@ -229,17 +283,15 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
     case BooleanType =>
       buildCast[Boolean](_, b => if (b) 1.toShort else 0.toShort)
     case DateType =>
-      buildCast[Date](_, d => dateToLong(d))
+      buildCast[Int](_, d => null)
     case TimestampType =>
       buildCast[Timestamp](_, t => timestampToLong(t).toShort)
-    case DecimalType =>
-      buildCast[BigDecimal](_, _.toShort)
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b).toShort
   }
 
   // ByteConverter
-  private[this] def castToByte: Any => Any = child.dataType match {
+  private[this] def castToByte(from: DataType): Any => Any = from match {
     case StringType =>
       buildCast[String](_, s => try s.toByte catch {
         case _: NumberFormatException => null
@@ -247,34 +299,54 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
     case BooleanType =>
       buildCast[Boolean](_, b => if (b) 1.toByte else 0.toByte)
     case DateType =>
-      buildCast[Date](_, d => dateToLong(d))
+      buildCast[Int](_, d => null)
     case TimestampType =>
       buildCast[Timestamp](_, t => timestampToLong(t).toByte)
-    case DecimalType =>
-      buildCast[BigDecimal](_, _.toByte)
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b).toByte
   }
 
-  // DecimalConverter
-  private[this] def castToDecimal: Any => Any = child.dataType match {
+  /**
+   * Change the precision / scale in a given decimal to those set in `decimalType` (if any),
+   * returning null if it overflows or modifying `value` in-place and returning it if successful.
+   *
+   * NOTE: this modifies `value` in-place, so don't call it on external data.
+   */
+  private[this] def changePrecision(value: Decimal, decimalType: DecimalType): Decimal = {
+    decimalType match {
+      case DecimalType.Unlimited =>
+        value
+      case DecimalType.Fixed(precision, scale) =>
+        if (value.changePrecision(precision, scale)) value else null
+    }
+  }
+
+  private[this] def castToDecimal(from: DataType, target: DecimalType): Any => Any = from match {
     case StringType =>
-      buildCast[String](_, s => try BigDecimal(s.toDouble) catch {
+      buildCast[String](_, s => try changePrecision(Decimal(s.toDouble), target) catch {
         case _: NumberFormatException => null
       })
     case BooleanType =>
-      buildCast[Boolean](_, b => if (b) BigDecimal(1) else BigDecimal(0))
+      buildCast[Boolean](_, b => changePrecision(if (b) Decimal(1) else Decimal(0), target))
     case DateType =>
-      buildCast[Date](_, d => dateToDouble(d))
+      buildCast[Int](_, d => null) // date can't cast to decimal in Hive
     case TimestampType =>
       // Note that we lose precision here.
-      buildCast[Timestamp](_, t => BigDecimal(timestampToDouble(t)))
-    case x: NumericType =>
-      b => BigDecimal(x.numeric.asInstanceOf[Numeric[Any]].toDouble(b))
+      buildCast[Timestamp](_, t => changePrecision(Decimal(timestampToDouble(t)), target))
+    case DecimalType() =>
+      b => changePrecision(b.asInstanceOf[Decimal].clone(), target)
+    case LongType =>
+      b => changePrecision(Decimal(b.asInstanceOf[Long]), target)
+    case x: NumericType => // All other numeric types can be represented precisely as Doubles
+      b => try {
+        changePrecision(Decimal(x.numeric.asInstanceOf[Numeric[Any]].toDouble(b)), target)
+      } catch {
+        case _: NumberFormatException => null
+      }
   }
 
   // DoubleConverter
-  private[this] def castToDouble: Any => Any = child.dataType match {
+  private[this] def castToDouble(from: DataType): Any => Any = from match {
     case StringType =>
       buildCast[String](_, s => try s.toDouble catch {
         case _: NumberFormatException => null
@@ -282,17 +354,15 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
     case BooleanType =>
       buildCast[Boolean](_, b => if (b) 1d else 0d)
     case DateType =>
-      buildCast[Date](_, d => dateToDouble(d))
+      buildCast[Int](_, d => null)
     case TimestampType =>
       buildCast[Timestamp](_, t => timestampToDouble(t))
-    case DecimalType =>
-      buildCast[BigDecimal](_, _.toDouble)
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toDouble(b)
   }
 
   // FloatConverter
-  private[this] def castToFloat: Any => Any = child.dataType match {
+  private[this] def castToFloat(from: DataType): Any => Any = from match {
     case StringType =>
       buildCast[String](_, s => try s.toFloat catch {
         case _: NumberFormatException => null
@@ -300,31 +370,57 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
     case BooleanType =>
       buildCast[Boolean](_, b => if (b) 1f else 0f)
     case DateType =>
-      buildCast[Date](_, d => dateToDouble(d))
+      buildCast[Int](_, d => null)
     case TimestampType =>
       buildCast[Timestamp](_, t => timestampToDouble(t).toFloat)
-    case DecimalType =>
-      buildCast[BigDecimal](_, _.toFloat)
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toFloat(b)
   }
 
-  private[this] lazy val cast: Any => Any = dataType match {
+  private[this] def castArray(from: ArrayType, to: ArrayType): Any => Any = {
+    val elementCast = cast(from.elementType, to.elementType)
+    buildCast[Seq[Any]](_, _.map(v => if (v == null) null else elementCast(v)))
+  }
+
+  private[this] def castMap(from: MapType, to: MapType): Any => Any = {
+    val keyCast = cast(from.keyType, to.keyType)
+    val valueCast = cast(from.valueType, to.valueType)
+    buildCast[Map[Any, Any]](_, _.map {
+      case (key, value) => (keyCast(key), if (value == null) null else valueCast(value))
+    })
+  }
+
+  private[this] def castStruct(from: StructType, to: StructType): Any => Any = {
+    val casts = from.fields.zip(to.fields).map {
+      case (fromField, toField) => cast(fromField.dataType, toField.dataType)
+    }
+    // TODO: This is very slow!
+    buildCast[Row](_, row => Row(row.toSeq.zip(casts).map {
+      case (v, cast) => if (v == null) null else cast(v)
+    }: _*))
+  }
+
+  private[this] def cast(from: DataType, to: DataType): Any => Any = to match {
     case dt if dt == child.dataType => identity[Any]
-    case StringType    => castToString
-    case BinaryType    => castToBinary
-    case DecimalType   => castToDecimal
-    case DateType      => castToDate
-    case TimestampType => castToTimestamp
-    case BooleanType   => castToBoolean
-    case ByteType      => castToByte
-    case ShortType     => castToShort
-    case IntegerType   => castToInt
-    case FloatType     => castToFloat
-    case LongType      => castToLong
-    case DoubleType    => castToDouble
+    case StringType                 => castToString(from)
+    case BinaryType                 => castToBinary(from)
+    case DateType                   => castToDate(from)
+    case decimal: DecimalType       => castToDecimal(from, decimal)
+    case TimestampType              => castToTimestamp(from)
+    case BooleanType                => castToBoolean(from)
+    case ByteType                   => castToByte(from)
+    case ShortType                  => castToShort(from)
+    case IntegerType                => castToInt(from)
+    case FloatType                  => castToFloat(from)
+    case LongType                   => castToLong(from)
+    case DoubleType                 => castToDouble(from)
+    case array: ArrayType           => castArray(from.asInstanceOf[ArrayType], array)
+    case map: MapType               => castMap(from.asInstanceOf[MapType], map)
+    case struct: StructType         => castStruct(from.asInstanceOf[StructType], struct)
   }
 
+  private[this] lazy val cast: Any => Any = cast(child.dataType, dataType)
+
   override def eval(input: Row): Any = {
     val evaluated = child.eval(input)
     if (evaluated == null) null else cast(evaluated)
@@ -333,16 +429,16 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
 
 object Cast {
   // `SimpleDateFormat` is not thread-safe.
-  private[sql] val threadLocalDateFormat = new ThreadLocal[DateFormat] {
+  private[sql] val threadLocalTimestampFormat = new ThreadLocal[DateFormat] {
     override def initialValue() = {
-      new SimpleDateFormat("yyyy-MM-dd")
+      new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
     }
   }
 
   // `SimpleDateFormat` is not thread-safe.
-  private[sql] val threadLocalTimestampFormat = new ThreadLocal[DateFormat] {
+  private[sql] val threadLocalDateFormat = new ThreadLocal[DateFormat] {
     override def initialValue() = {
-      new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
+      new SimpleDateFormat("yyyy-MM-dd")
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 1eb260efa6387..cf14992ef835c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.trees
 import org.apache.spark.sql.catalyst.trees.TreeNode
-import org.apache.spark.sql.catalyst.types.{DataType, FractionalType, IntegralType, NumericType, NativeType}
+import org.apache.spark.sql.types._
 
 abstract class Expression extends TreeNode[Expression] {
   self: Product =>
@@ -152,6 +152,25 @@ abstract class Expression extends TreeNode[Expression] {
     }
   }
 
+  /**
+   * Evaluation helper function for 1 Fractional children expression.
+   * if the expression result is null, the evaluation result should be null.
+   */
+  @inline
+  protected final def f1(i: Row, e1: Expression, f: ((Fractional[Any], Any) => Any)): Any  = {
+    val evalE1 = e1.eval(i: Row)
+    if(evalE1 == null) {
+      null
+    } else {
+      e1.dataType match {
+        case ft: FractionalType =>
+          f.asInstanceOf[(Fractional[ft.JvmType], ft.JvmType) => ft.JvmType](
+            ft.fractional, evalE1.asInstanceOf[ft.JvmType])
+        case other => sys.error(s"Type $other does not support fractional operations")
+      }
+    }
+  }
+
   /**
    * Evaluation helper function for 2 Integral children expressions. Those expressions are
    * supposed to be in the same data type, and also the return type.
@@ -188,6 +207,28 @@ abstract class Expression extends TreeNode[Expression] {
     }
   }
 
+  /**
+   * Evaluation helper function for 1 Integral children expression.
+   * if the expression result is null, the evaluation result should be null.
+   */
+  @inline
+  protected final def i1(i: Row, e1: Expression, f: ((Integral[Any], Any) => Any)): Any  = {
+    val evalE1 = e1.eval(i)
+    if(evalE1 == null) {
+      null
+    } else {
+      e1.dataType match {
+        case i: IntegralType =>
+          f.asInstanceOf[(Integral[i.JvmType], i.JvmType) => i.JvmType](
+            i.integral, evalE1.asInstanceOf[i.JvmType])
+        case i: FractionalType =>
+          f.asInstanceOf[(Integral[i.JvmType], i.JvmType) => i.JvmType](
+            i.asIntegral, evalE1.asInstanceOf[i.JvmType])
+        case other => sys.error(s"Type $other does not support numeric operations")
+      }
+    }
+  }
+
   /**
    * Evaluation helper function for 2 Comparable children expressions. Those expressions are
    * supposed to be in the same data type, and the return type should be Integer:
@@ -242,6 +283,17 @@ abstract class LeafExpression extends Expression with trees.LeafNode[Expression]
 
 abstract class UnaryExpression extends Expression with trees.UnaryNode[Expression] {
   self: Product =>
+}
 
-
+// TODO Semantically we probably not need GroupExpression
+// All we need is holding the Seq[Expression], and ONLY used in doing the
+// expressions transformation correctly. Probably will be removed since it's
+// not like a real expressions.
+case class GroupExpression(children: Seq[Expression]) extends Expression {
+  self: Product =>
+  type EvaluatedType = Seq[Any]
+  override def eval(input: Row): EvaluatedType = ???
+  override def nullable = false
+  override def foldable = false
+  override def dataType = ???
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
index e7e81a21fdf03..db5d897ee569f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
@@ -105,45 +105,45 @@ class JoinedRow extends Row {
     this
   }
 
-  def iterator = row1.iterator ++ row2.iterator
+  override def toSeq: Seq[Any] = row1.toSeq ++ row2.toSeq
 
-  def length = row1.length + row2.length
+  override def length = row1.length + row2.length
 
-  def apply(i: Int) =
-    if (i < row1.size) row1(i) else row2(i - row1.size)
+  override def apply(i: Int) =
+    if (i < row1.length) row1(i) else row2(i - row1.length)
 
-  def isNullAt(i: Int) =
-    if (i < row1.size) row1.isNullAt(i) else row2.isNullAt(i - row1.size)
+  override def isNullAt(i: Int) =
+    if (i < row1.length) row1.isNullAt(i) else row2.isNullAt(i - row1.length)
 
-  def getInt(i: Int): Int =
-    if (i < row1.size) row1.getInt(i) else row2.getInt(i - row1.size)
+  override def getInt(i: Int): Int =
+    if (i < row1.length) row1.getInt(i) else row2.getInt(i - row1.length)
 
-  def getLong(i: Int): Long =
-    if (i < row1.size) row1.getLong(i) else row2.getLong(i - row1.size)
+  override def getLong(i: Int): Long =
+    if (i < row1.length) row1.getLong(i) else row2.getLong(i - row1.length)
 
-  def getDouble(i: Int): Double =
-    if (i < row1.size) row1.getDouble(i) else row2.getDouble(i - row1.size)
+  override def getDouble(i: Int): Double =
+    if (i < row1.length) row1.getDouble(i) else row2.getDouble(i - row1.length)
 
-  def getBoolean(i: Int): Boolean =
-    if (i < row1.size) row1.getBoolean(i) else row2.getBoolean(i - row1.size)
+  override def getBoolean(i: Int): Boolean =
+    if (i < row1.length) row1.getBoolean(i) else row2.getBoolean(i - row1.length)
 
-  def getShort(i: Int): Short =
-    if (i < row1.size) row1.getShort(i) else row2.getShort(i - row1.size)
+  override def getShort(i: Int): Short =
+    if (i < row1.length) row1.getShort(i) else row2.getShort(i - row1.length)
 
-  def getByte(i: Int): Byte =
-    if (i < row1.size) row1.getByte(i) else row2.getByte(i - row1.size)
+  override def getByte(i: Int): Byte =
+    if (i < row1.length) row1.getByte(i) else row2.getByte(i - row1.length)
 
-  def getFloat(i: Int): Float =
-    if (i < row1.size) row1.getFloat(i) else row2.getFloat(i - row1.size)
+  override def getFloat(i: Int): Float =
+    if (i < row1.length) row1.getFloat(i) else row2.getFloat(i - row1.length)
 
-  def getString(i: Int): String =
-    if (i < row1.size) row1.getString(i) else row2.getString(i - row1.size)
+  override def getString(i: Int): String =
+    if (i < row1.length) row1.getString(i) else row2.getString(i - row1.length)
 
   override def getAs[T](i: Int): T =
-    if (i < row1.size) row1.getAs[T](i) else row2.getAs[T](i - row1.size)
+    if (i < row1.length) row1.getAs[T](i) else row2.getAs[T](i - row1.length)
 
-  def copy() = {
-    val totalSize = row1.size + row2.size
+  override def copy() = {
+    val totalSize = row1.length + row2.length
     val copiedValues = new Array[Any](totalSize)
     var i = 0
     while(i < totalSize) {
@@ -154,8 +154,16 @@ class JoinedRow extends Row {
   }
 
   override def toString() = {
-    val row = (if (row1 != null) row1 else Seq[Any]()) ++ (if (row2 != null) row2 else Seq[Any]())
-    s"[${row.mkString(",")}]"
+    // Make sure toString never throws NullPointerException.
+    if ((row1 eq null) && (row2 eq null)) {
+      "[ empty row ]"
+    } else if (row1 eq null) {
+      row2.mkString("[", ",", "]")
+    } else if (row2 eq null) {
+      row1.mkString("[", ",", "]")
+    } else {
+      mkString("[", ",", "]")
+    }
   }
 }
 
@@ -197,45 +205,45 @@ class JoinedRow2 extends Row {
     this
   }
 
-  def iterator = row1.iterator ++ row2.iterator
+  override def toSeq: Seq[Any] = row1.toSeq ++ row2.toSeq
 
-  def length = row1.length + row2.length
+  override def length = row1.length + row2.length
 
-  def apply(i: Int) =
-    if (i < row1.size) row1(i) else row2(i - row1.size)
+  override def apply(i: Int) =
+    if (i < row1.length) row1(i) else row2(i - row1.length)
 
-  def isNullAt(i: Int) =
-    if (i < row1.size) row1.isNullAt(i) else row2.isNullAt(i - row1.size)
+  override def isNullAt(i: Int) =
+    if (i < row1.length) row1.isNullAt(i) else row2.isNullAt(i - row1.length)
 
-  def getInt(i: Int): Int =
-    if (i < row1.size) row1.getInt(i) else row2.getInt(i - row1.size)
+  override def getInt(i: Int): Int =
+    if (i < row1.length) row1.getInt(i) else row2.getInt(i - row1.length)
 
-  def getLong(i: Int): Long =
-    if (i < row1.size) row1.getLong(i) else row2.getLong(i - row1.size)
+  override def getLong(i: Int): Long =
+    if (i < row1.length) row1.getLong(i) else row2.getLong(i - row1.length)
 
-  def getDouble(i: Int): Double =
-    if (i < row1.size) row1.getDouble(i) else row2.getDouble(i - row1.size)
+  override def getDouble(i: Int): Double =
+    if (i < row1.length) row1.getDouble(i) else row2.getDouble(i - row1.length)
 
-  def getBoolean(i: Int): Boolean =
-    if (i < row1.size) row1.getBoolean(i) else row2.getBoolean(i - row1.size)
+  override def getBoolean(i: Int): Boolean =
+    if (i < row1.length) row1.getBoolean(i) else row2.getBoolean(i - row1.length)
 
-  def getShort(i: Int): Short =
-    if (i < row1.size) row1.getShort(i) else row2.getShort(i - row1.size)
+  override def getShort(i: Int): Short =
+    if (i < row1.length) row1.getShort(i) else row2.getShort(i - row1.length)
 
-  def getByte(i: Int): Byte =
-    if (i < row1.size) row1.getByte(i) else row2.getByte(i - row1.size)
+  override def getByte(i: Int): Byte =
+    if (i < row1.length) row1.getByte(i) else row2.getByte(i - row1.length)
 
-  def getFloat(i: Int): Float =
-    if (i < row1.size) row1.getFloat(i) else row2.getFloat(i - row1.size)
+  override def getFloat(i: Int): Float =
+    if (i < row1.length) row1.getFloat(i) else row2.getFloat(i - row1.length)
 
-  def getString(i: Int): String =
-    if (i < row1.size) row1.getString(i) else row2.getString(i - row1.size)
+  override def getString(i: Int): String =
+    if (i < row1.length) row1.getString(i) else row2.getString(i - row1.length)
 
   override def getAs[T](i: Int): T =
-    if (i < row1.size) row1.getAs[T](i) else row2.getAs[T](i - row1.size)
+    if (i < row1.length) row1.getAs[T](i) else row2.getAs[T](i - row1.length)
 
-  def copy() = {
-    val totalSize = row1.size + row2.size
+  override def copy() = {
+    val totalSize = row1.length + row2.length
     val copiedValues = new Array[Any](totalSize)
     var i = 0
     while(i < totalSize) {
@@ -246,8 +254,16 @@ class JoinedRow2 extends Row {
   }
 
   override def toString() = {
-    val row = (if (row1 != null) row1 else Seq[Any]()) ++ (if (row2 != null) row2 else Seq[Any]())
-    s"[${row.mkString(",")}]"
+    // Make sure toString never throws NullPointerException.
+    if ((row1 eq null) && (row2 eq null)) {
+      "[ empty row ]"
+    } else if (row1 eq null) {
+      row2.mkString("[", ",", "]")
+    } else if (row2 eq null) {
+      row1.mkString("[", ",", "]")
+    } else {
+      mkString("[", ",", "]")
+    }
   }
 }
 
@@ -283,45 +299,45 @@ class JoinedRow3 extends Row {
     this
   }
 
-  def iterator = row1.iterator ++ row2.iterator
+  override def toSeq: Seq[Any] = row1.toSeq ++ row2.toSeq
 
-  def length = row1.length + row2.length
+  override def length = row1.length + row2.length
 
-  def apply(i: Int) =
-    if (i < row1.size) row1(i) else row2(i - row1.size)
+  override def apply(i: Int) =
+    if (i < row1.length) row1(i) else row2(i - row1.length)
 
-  def isNullAt(i: Int) =
-    if (i < row1.size) row1.isNullAt(i) else row2.isNullAt(i - row1.size)
+  override def isNullAt(i: Int) =
+    if (i < row1.length) row1.isNullAt(i) else row2.isNullAt(i - row1.length)
 
-  def getInt(i: Int): Int =
-    if (i < row1.size) row1.getInt(i) else row2.getInt(i - row1.size)
+  override def getInt(i: Int): Int =
+    if (i < row1.length) row1.getInt(i) else row2.getInt(i - row1.length)
 
-  def getLong(i: Int): Long =
-    if (i < row1.size) row1.getLong(i) else row2.getLong(i - row1.size)
+  override def getLong(i: Int): Long =
+    if (i < row1.length) row1.getLong(i) else row2.getLong(i - row1.length)
 
-  def getDouble(i: Int): Double =
-    if (i < row1.size) row1.getDouble(i) else row2.getDouble(i - row1.size)
+  override def getDouble(i: Int): Double =
+    if (i < row1.length) row1.getDouble(i) else row2.getDouble(i - row1.length)
 
-  def getBoolean(i: Int): Boolean =
-    if (i < row1.size) row1.getBoolean(i) else row2.getBoolean(i - row1.size)
+  override def getBoolean(i: Int): Boolean =
+    if (i < row1.length) row1.getBoolean(i) else row2.getBoolean(i - row1.length)
 
-  def getShort(i: Int): Short =
-    if (i < row1.size) row1.getShort(i) else row2.getShort(i - row1.size)
+  override def getShort(i: Int): Short =
+    if (i < row1.length) row1.getShort(i) else row2.getShort(i - row1.length)
 
-  def getByte(i: Int): Byte =
-    if (i < row1.size) row1.getByte(i) else row2.getByte(i - row1.size)
+  override def getByte(i: Int): Byte =
+    if (i < row1.length) row1.getByte(i) else row2.getByte(i - row1.length)
 
-  def getFloat(i: Int): Float =
-    if (i < row1.size) row1.getFloat(i) else row2.getFloat(i - row1.size)
+  override def getFloat(i: Int): Float =
+    if (i < row1.length) row1.getFloat(i) else row2.getFloat(i - row1.length)
 
-  def getString(i: Int): String =
-    if (i < row1.size) row1.getString(i) else row2.getString(i - row1.size)
+  override def getString(i: Int): String =
+    if (i < row1.length) row1.getString(i) else row2.getString(i - row1.length)
 
   override def getAs[T](i: Int): T =
-    if (i < row1.size) row1.getAs[T](i) else row2.getAs[T](i - row1.size)
+    if (i < row1.length) row1.getAs[T](i) else row2.getAs[T](i - row1.length)
 
-  def copy() = {
-    val totalSize = row1.size + row2.size
+  override def copy() = {
+    val totalSize = row1.length + row2.length
     val copiedValues = new Array[Any](totalSize)
     var i = 0
     while(i < totalSize) {
@@ -332,8 +348,16 @@ class JoinedRow3 extends Row {
   }
 
   override def toString() = {
-    val row = (if (row1 != null) row1 else Seq[Any]()) ++ (if (row2 != null) row2 else Seq[Any]())
-    s"[${row.mkString(",")}]"
+    // Make sure toString never throws NullPointerException.
+    if ((row1 eq null) && (row2 eq null)) {
+      "[ empty row ]"
+    } else if (row1 eq null) {
+      row2.mkString("[", ",", "]")
+    } else if (row2 eq null) {
+      row1.mkString("[", ",", "]")
+    } else {
+      mkString("[", ",", "]")
+    }
   }
 }
 
@@ -369,45 +393,45 @@ class JoinedRow4 extends Row {
     this
   }
 
-  def iterator = row1.iterator ++ row2.iterator
+  override def toSeq: Seq[Any] = row1.toSeq ++ row2.toSeq
 
-  def length = row1.length + row2.length
+  override def length = row1.length + row2.length
 
-  def apply(i: Int) =
-    if (i < row1.size) row1(i) else row2(i - row1.size)
+  override def apply(i: Int) =
+    if (i < row1.length) row1(i) else row2(i - row1.length)
 
-  def isNullAt(i: Int) =
-    if (i < row1.size) row1.isNullAt(i) else row2.isNullAt(i - row1.size)
+  override def isNullAt(i: Int) =
+    if (i < row1.length) row1.isNullAt(i) else row2.isNullAt(i - row1.length)
 
-  def getInt(i: Int): Int =
-    if (i < row1.size) row1.getInt(i) else row2.getInt(i - row1.size)
+  override def getInt(i: Int): Int =
+    if (i < row1.length) row1.getInt(i) else row2.getInt(i - row1.length)
 
-  def getLong(i: Int): Long =
-    if (i < row1.size) row1.getLong(i) else row2.getLong(i - row1.size)
+  override def getLong(i: Int): Long =
+    if (i < row1.length) row1.getLong(i) else row2.getLong(i - row1.length)
 
-  def getDouble(i: Int): Double =
-    if (i < row1.size) row1.getDouble(i) else row2.getDouble(i - row1.size)
+  override def getDouble(i: Int): Double =
+    if (i < row1.length) row1.getDouble(i) else row2.getDouble(i - row1.length)
 
-  def getBoolean(i: Int): Boolean =
-    if (i < row1.size) row1.getBoolean(i) else row2.getBoolean(i - row1.size)
+  override def getBoolean(i: Int): Boolean =
+    if (i < row1.length) row1.getBoolean(i) else row2.getBoolean(i - row1.length)
 
-  def getShort(i: Int): Short =
-    if (i < row1.size) row1.getShort(i) else row2.getShort(i - row1.size)
+  override def getShort(i: Int): Short =
+    if (i < row1.length) row1.getShort(i) else row2.getShort(i - row1.length)
 
-  def getByte(i: Int): Byte =
-    if (i < row1.size) row1.getByte(i) else row2.getByte(i - row1.size)
+  override def getByte(i: Int): Byte =
+    if (i < row1.length) row1.getByte(i) else row2.getByte(i - row1.length)
 
-  def getFloat(i: Int): Float =
-    if (i < row1.size) row1.getFloat(i) else row2.getFloat(i - row1.size)
+  override def getFloat(i: Int): Float =
+    if (i < row1.length) row1.getFloat(i) else row2.getFloat(i - row1.length)
 
-  def getString(i: Int): String =
-    if (i < row1.size) row1.getString(i) else row2.getString(i - row1.size)
+  override def getString(i: Int): String =
+    if (i < row1.length) row1.getString(i) else row2.getString(i - row1.length)
 
   override def getAs[T](i: Int): T =
-    if (i < row1.size) row1.getAs[T](i) else row2.getAs[T](i - row1.size)
+    if (i < row1.length) row1.getAs[T](i) else row2.getAs[T](i - row1.length)
 
-  def copy() = {
-    val totalSize = row1.size + row2.size
+  override def copy() = {
+    val totalSize = row1.length + row2.length
     val copiedValues = new Array[Any](totalSize)
     var i = 0
     while(i < totalSize) {
@@ -418,8 +442,16 @@ class JoinedRow4 extends Row {
   }
 
   override def toString() = {
-    val row = (if (row1 != null) row1 else Seq[Any]()) ++ (if (row2 != null) row2 else Seq[Any]())
-    s"[${row.mkString(",")}]"
+    // Make sure toString never throws NullPointerException.
+    if ((row1 eq null) && (row2 eq null)) {
+      "[ empty row ]"
+    } else if (row1 eq null) {
+      row2.mkString("[", ",", "]")
+    } else if (row2 eq null) {
+      row1.mkString("[", ",", "]")
+    } else {
+      mkString("[", ",", "]")
+    }
   }
 }
 
@@ -455,45 +487,45 @@ class JoinedRow5 extends Row {
     this
   }
 
-  def iterator = row1.iterator ++ row2.iterator
+  override def toSeq: Seq[Any] = row1.toSeq ++ row2.toSeq
 
-  def length = row1.length + row2.length
+  override def length = row1.length + row2.length
 
-  def apply(i: Int) =
-    if (i < row1.size) row1(i) else row2(i - row1.size)
+  override def apply(i: Int) =
+    if (i < row1.length) row1(i) else row2(i - row1.length)
 
-  def isNullAt(i: Int) =
-    if (i < row1.size) row1.isNullAt(i) else row2.isNullAt(i - row1.size)
+  override def isNullAt(i: Int) =
+    if (i < row1.length) row1.isNullAt(i) else row2.isNullAt(i - row1.length)
 
-  def getInt(i: Int): Int =
-    if (i < row1.size) row1.getInt(i) else row2.getInt(i - row1.size)
+  override def getInt(i: Int): Int =
+    if (i < row1.length) row1.getInt(i) else row2.getInt(i - row1.length)
 
-  def getLong(i: Int): Long =
-    if (i < row1.size) row1.getLong(i) else row2.getLong(i - row1.size)
+  override def getLong(i: Int): Long =
+    if (i < row1.length) row1.getLong(i) else row2.getLong(i - row1.length)
 
-  def getDouble(i: Int): Double =
-    if (i < row1.size) row1.getDouble(i) else row2.getDouble(i - row1.size)
+  override def getDouble(i: Int): Double =
+    if (i < row1.length) row1.getDouble(i) else row2.getDouble(i - row1.length)
 
-  def getBoolean(i: Int): Boolean =
-    if (i < row1.size) row1.getBoolean(i) else row2.getBoolean(i - row1.size)
+  override def getBoolean(i: Int): Boolean =
+    if (i < row1.length) row1.getBoolean(i) else row2.getBoolean(i - row1.length)
 
-  def getShort(i: Int): Short =
-    if (i < row1.size) row1.getShort(i) else row2.getShort(i - row1.size)
+  override def getShort(i: Int): Short =
+    if (i < row1.length) row1.getShort(i) else row2.getShort(i - row1.length)
 
-  def getByte(i: Int): Byte =
-    if (i < row1.size) row1.getByte(i) else row2.getByte(i - row1.size)
+  override def getByte(i: Int): Byte =
+    if (i < row1.length) row1.getByte(i) else row2.getByte(i - row1.length)
 
-  def getFloat(i: Int): Float =
-    if (i < row1.size) row1.getFloat(i) else row2.getFloat(i - row1.size)
+  override def getFloat(i: Int): Float =
+    if (i < row1.length) row1.getFloat(i) else row2.getFloat(i - row1.length)
 
-  def getString(i: Int): String =
-    if (i < row1.size) row1.getString(i) else row2.getString(i - row1.size)
+  override def getString(i: Int): String =
+    if (i < row1.length) row1.getString(i) else row2.getString(i - row1.length)
 
   override def getAs[T](i: Int): T =
-    if (i < row1.size) row1.getAs[T](i) else row2.getAs[T](i - row1.size)
+    if (i < row1.length) row1.getAs[T](i) else row2.getAs[T](i - row1.length)
 
-  def copy() = {
-    val totalSize = row1.size + row2.size
+  override def copy() = {
+    val totalSize = row1.length + row2.length
     val copiedValues = new Array[Any](totalSize)
     var i = 0
     while(i < totalSize) {
@@ -504,7 +536,15 @@ class JoinedRow5 extends Row {
   }
 
   override def toString() = {
-    val row = (if (row1 != null) row1 else Seq[Any]()) ++ (if (row2 != null) row2 else Seq[Any]())
-    s"[${row.mkString(",")}]"
+    // Make sure toString never throws NullPointerException.
+    if ((row1 eq null) && (row2 eq null)) {
+      "[ empty row ]"
+    } else if (row1 eq null) {
+      row2.mkString("[", ",", "]")
+    } else if (row2 eq null) {
+      row1.mkString("[", ",", "]")
+    } else {
+      mkString("[", ",", "]")
+    }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Rand.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Rand.scala
index 851db95b9177e..b2c6d3029031d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Rand.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Rand.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.util.Random
-import org.apache.spark.sql.catalyst.types.DoubleType
+import org.apache.spark.sql.types.DoubleType
 
 
 case object Rand extends LeafExpression {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala
deleted file mode 100644
index d00ec39774c35..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.expressions
-
-import org.apache.spark.sql.catalyst.types.NativeType
-
-object Row {
-  /**
-   * This method can be used to extract fields from a [[Row]] object in a pattern match. Example:
-   * {{{
-   * import org.apache.spark.sql._
-   *
-   * val pairs = sql("SELECT key, value FROM src").rdd.map {
-   *   case Row(key: Int, value: String) =>
-   *     key -> value
-   * }
-   * }}}
-   */
-  def unapplySeq(row: Row): Some[Seq[Any]] = Some(row)
-
-  /**
-   * This method can be used to construct a [[Row]] with the given values.
-   */
-  def apply(values: Any*): Row = new GenericRow(values.toArray)
-
-  /**
-   * This method can be used to construct a [[Row]] from a [[Seq]] of values.
-   */
-  def fromSeq(values: Seq[Any]): Row = new GenericRow(values.toArray)
-}
-
-/**
- * Represents one row of output from a relational operator.  Allows both generic access by ordinal,
- * which will incur boxing overhead for primitives, as well as native primitive access.
- *
- * It is invalid to use the native primitive interface to retrieve a value that is null, instead a
- * user must check [[isNullAt]] before attempting to retrieve a value that might be null.
- */
-trait Row extends Seq[Any] with Serializable {
-  def apply(i: Int): Any
-
-  def isNullAt(i: Int): Boolean
-
-  def getInt(i: Int): Int
-  def getLong(i: Int): Long
-  def getDouble(i: Int): Double
-  def getFloat(i: Int): Float
-  def getBoolean(i: Int): Boolean
-  def getShort(i: Int): Short
-  def getByte(i: Int): Byte
-  def getString(i: Int): String
-  def getAs[T](i: Int): T = apply(i).asInstanceOf[T]
-
-  override def toString() =
-    s"[${this.mkString(",")}]"
-
-  def copy(): Row
-
-  /** Returns true if there are any NULL values in this row. */
-  def anyNull: Boolean = {
-    var i = 0
-    while (i < length) {
-      if (isNullAt(i)) { return true }
-      i += 1
-    }
-    false
-  }
-}
-
-/**
- * An extended interface to [[Row]] that allows the values for each column to be updated.  Setting
- * a value through a primitive function implicitly marks that column as not null.
- */
-trait MutableRow extends Row {
-  def setNullAt(i: Int): Unit
-
-  def update(ordinal: Int, value: Any)
-
-  def setInt(ordinal: Int, value: Int)
-  def setLong(ordinal: Int, value: Long)
-  def setDouble(ordinal: Int, value: Double)
-  def setBoolean(ordinal: Int, value: Boolean)
-  def setShort(ordinal: Int, value: Short)
-  def setByte(ordinal: Int, value: Byte)
-  def setFloat(ordinal: Int, value: Float)
-  def setString(ordinal: Int, value: String)
-}
-
-/**
- * A row with no data.  Calling any methods will result in an error.  Can be used as a placeholder.
- */
-object EmptyRow extends Row {
-  def apply(i: Int): Any = throw new UnsupportedOperationException
-
-  def iterator = Iterator.empty
-  def length = 0
-  def isNullAt(i: Int): Boolean = throw new UnsupportedOperationException
-
-  def getInt(i: Int): Int = throw new UnsupportedOperationException
-  def getLong(i: Int): Long = throw new UnsupportedOperationException
-  def getDouble(i: Int): Double = throw new UnsupportedOperationException
-  def getFloat(i: Int): Float = throw new UnsupportedOperationException
-  def getBoolean(i: Int): Boolean = throw new UnsupportedOperationException
-  def getShort(i: Int): Short = throw new UnsupportedOperationException
-  def getByte(i: Int): Byte = throw new UnsupportedOperationException
-  def getString(i: Int): String = throw new UnsupportedOperationException
-  override def getAs[T](i: Int): T = throw new UnsupportedOperationException
-
-  def copy() = this
-}
-
-/**
- * A row implementation that uses an array of objects as the underlying storage.  Note that, while
- * the array is not copied, and thus could technically be mutated after creation, this is not
- * allowed.
- */
-class GenericRow(protected[sql] val values: Array[Any]) extends Row {
-  /** No-arg constructor for serialization. */
-  def this() = this(null)
-
-  def this(size: Int) = this(new Array[Any](size))
-
-  def iterator = values.iterator
-
-  def length = values.length
-
-  def apply(i: Int) = values(i)
-
-  def isNullAt(i: Int) = values(i) == null
-
-  def getInt(i: Int): Int = {
-    if (values(i) == null) sys.error("Failed to check null bit for primitive int value.")
-    values(i).asInstanceOf[Int]
-  }
-
-  def getLong(i: Int): Long = {
-    if (values(i) == null) sys.error("Failed to check null bit for primitive long value.")
-    values(i).asInstanceOf[Long]
-  }
-
-  def getDouble(i: Int): Double = {
-    if (values(i) == null) sys.error("Failed to check null bit for primitive double value.")
-    values(i).asInstanceOf[Double]
-  }
-
-  def getFloat(i: Int): Float = {
-    if (values(i) == null) sys.error("Failed to check null bit for primitive float value.")
-    values(i).asInstanceOf[Float]
-  }
-
-  def getBoolean(i: Int): Boolean = {
-    if (values(i) == null) sys.error("Failed to check null bit for primitive boolean value.")
-    values(i).asInstanceOf[Boolean]
-  }
-
-  def getShort(i: Int): Short = {
-    if (values(i) == null) sys.error("Failed to check null bit for primitive short value.")
-    values(i).asInstanceOf[Short]
-  }
-
-  def getByte(i: Int): Byte = {
-    if (values(i) == null) sys.error("Failed to check null bit for primitive byte value.")
-    values(i).asInstanceOf[Byte]
-  }
-
-  def getString(i: Int): String = {
-    if (values(i) == null) sys.error("Failed to check null bit for primitive String value.")
-    values(i).asInstanceOf[String]
-  }
-
-  // Custom hashCode function that matches the efficient code generated version.
-  override def hashCode(): Int = {
-    var result: Int = 37
-
-    var i = 0
-    while (i < values.length) {
-      val update: Int =
-        if (isNullAt(i)) {
-          0
-        } else {
-          apply(i) match {
-            case b: Boolean => if (b) 0 else 1
-            case b: Byte => b.toInt
-            case s: Short => s.toInt
-            case i: Int => i
-            case l: Long => (l ^ (l >>> 32)).toInt
-            case f: Float => java.lang.Float.floatToIntBits(f)
-            case d: Double =>
-              val b = java.lang.Double.doubleToLongBits(d)
-              (b ^ (b >>> 32)).toInt
-            case other => other.hashCode()
-          }
-        }
-      result = 37 * result + update
-      i += 1
-    }
-    result
-  }
-
-  def copy() = this
-}
-
-class GenericMutableRow(size: Int) extends GenericRow(size) with MutableRow {
-  /** No-arg constructor for serialization. */
-  def this() = this(0)
-
-  override def setBoolean(ordinal: Int, value: Boolean): Unit = { values(ordinal) = value }
-  override def setByte(ordinal: Int, value: Byte): Unit = { values(ordinal) = value }
-  override def setDouble(ordinal: Int, value: Double): Unit = { values(ordinal) = value }
-  override def setFloat(ordinal: Int, value: Float): Unit = { values(ordinal) = value }
-  override def setInt(ordinal: Int, value: Int): Unit = { values(ordinal) = value }
-  override def setLong(ordinal: Int, value: Long): Unit = { values(ordinal) = value }
-  override def setString(ordinal: Int, value: String): Unit = { values(ordinal) = value }
-
-  override def setNullAt(i: Int): Unit = { values(i) = null }
-
-  override def setShort(ordinal: Int, value: Short): Unit = { values(ordinal) = value }
-
-  override def update(ordinal: Int, value: Any): Unit = { values(ordinal) = value }
-
-  override def copy() = new GenericRow(values.clone())
-}
-
-
-class RowOrdering(ordering: Seq[SortOrder]) extends Ordering[Row] {
-  def this(ordering: Seq[SortOrder], inputSchema: Seq[Attribute]) =
-    this(ordering.map(BindReferences.bindReference(_, inputSchema)))
-
-  def compare(a: Row, b: Row): Int = {
-    var i = 0
-    while (i < ordering.size) {
-      val order = ordering(i)
-      val left = order.child.eval(a)
-      val right = order.child.eval(b)
-
-      if (left == null && right == null) {
-        // Both null, continue looking.
-      } else if (left == null) {
-        return if (order.direction == Ascending) -1 else 1
-      } else if (right == null) {
-        return if (order.direction == Ascending) 1 else -1
-      } else {
-        val comparison = order.dataType match {
-          case n: NativeType if order.direction == Ascending =>
-            n.ordering.asInstanceOf[Ordering[Any]].compare(left, right)
-          case n: NativeType if order.direction == Descending =>
-            n.ordering.asInstanceOf[Ordering[Any]].reverse.compare(left, right)
-        }
-        if (comparison != 0) return comparison
-      }
-      i += 1
-    }
-    return 0
-  }
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala
index 1b687a443ef8b..8a36c6810790d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala
@@ -18,9 +18,12 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.ScalaReflection
-import org.apache.spark.sql.catalyst.types.DataType
-import org.apache.spark.util.ClosureCleaner
+import org.apache.spark.sql.types.DataType
 
+/**
+ * User-defined function.
+ * @param dataType  Return type of function.
+ */
 case class ScalaUdf(function: AnyRef, dataType: DataType, children: Seq[Expression])
   extends Expression {
 
@@ -30,323 +33,369 @@ case class ScalaUdf(function: AnyRef, dataType: DataType, children: Seq[Expressi
 
   override def toString = s"scalaUDF(${children.mkString(",")})"
 
+  // scalastyle:off
+
   /** This method has been generated by this script
 
     (1 to 22).map { x =>
       val anys = (1 to x).map(x => "Any").reduce(_ + ", " + _)
-      val evals = (0 to x - 1).map(x => s"children($x).eval(input)").reduce(_ + ",\n    " + _)
+      val evals = (0 to x - 1).map(x => s"    ScalaReflection.convertToScala(children($x).eval(input), children($x).dataType)").reduce(_ + ",\n    " + _)
 
     s"""
     case $x =>
       function.asInstanceOf[($anys) => Any](
-      $evals)
+    $evals)
     """
-    }
+    }.foreach(println)
 
   */
 
-  // scalastyle:off
   override def eval(input: Row): Any = {
     val result = children.size match {
       case 0 => function.asInstanceOf[() => Any]()
-      case 1 => function.asInstanceOf[(Any) => Any](children(0).eval(input))
+      case 1 =>
+        function.asInstanceOf[(Any) => Any](
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType))
+
+
       case 2 =>
         function.asInstanceOf[(Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType))
+
+
       case 3 =>
         function.asInstanceOf[(Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType))
+
+
       case 4 =>
         function.asInstanceOf[(Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType))
+
+
       case 5 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType))
+
+
       case 6 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType))
+
+
       case 7 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input),
-          children(6).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType))
+
+
       case 8 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input),
-          children(6).eval(input),
-          children(7).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType))
+
+
       case 9 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input),
-          children(6).eval(input),
-          children(7).eval(input),
-          children(8).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType))
+
+
       case 10 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input),
-          children(6).eval(input),
-          children(7).eval(input),
-          children(8).eval(input),
-          children(9).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType))
+
+
       case 11 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input),
-          children(6).eval(input),
-          children(7).eval(input),
-          children(8).eval(input),
-          children(9).eval(input),
-          children(10).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType))
+
+
       case 12 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input),
-          children(6).eval(input),
-          children(7).eval(input),
-          children(8).eval(input),
-          children(9).eval(input),
-          children(10).eval(input),
-          children(11).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType))
+
+
       case 13 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input),
-          children(6).eval(input),
-          children(7).eval(input),
-          children(8).eval(input),
-          children(9).eval(input),
-          children(10).eval(input),
-          children(11).eval(input),
-          children(12).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType))
+
+
       case 14 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input),
-          children(6).eval(input),
-          children(7).eval(input),
-          children(8).eval(input),
-          children(9).eval(input),
-          children(10).eval(input),
-          children(11).eval(input),
-          children(12).eval(input),
-          children(13).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType))
+
+
       case 15 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input),
-          children(6).eval(input),
-          children(7).eval(input),
-          children(8).eval(input),
-          children(9).eval(input),
-          children(10).eval(input),
-          children(11).eval(input),
-          children(12).eval(input),
-          children(13).eval(input),
-          children(14).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType),
+          ScalaReflection.convertToScala(children(14).eval(input), children(14).dataType))
+
+
       case 16 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input),
-          children(6).eval(input),
-          children(7).eval(input),
-          children(8).eval(input),
-          children(9).eval(input),
-          children(10).eval(input),
-          children(11).eval(input),
-          children(12).eval(input),
-          children(13).eval(input),
-          children(14).eval(input),
-          children(15).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType),
+          ScalaReflection.convertToScala(children(14).eval(input), children(14).dataType),
+          ScalaReflection.convertToScala(children(15).eval(input), children(15).dataType))
+
+
       case 17 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input),
-          children(6).eval(input),
-          children(7).eval(input),
-          children(8).eval(input),
-          children(9).eval(input),
-          children(10).eval(input),
-          children(11).eval(input),
-          children(12).eval(input),
-          children(13).eval(input),
-          children(14).eval(input),
-          children(15).eval(input),
-          children(16).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType),
+          ScalaReflection.convertToScala(children(14).eval(input), children(14).dataType),
+          ScalaReflection.convertToScala(children(15).eval(input), children(15).dataType),
+          ScalaReflection.convertToScala(children(16).eval(input), children(16).dataType))
+
+
       case 18 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input),
-          children(6).eval(input),
-          children(7).eval(input),
-          children(8).eval(input),
-          children(9).eval(input),
-          children(10).eval(input),
-          children(11).eval(input),
-          children(12).eval(input),
-          children(13).eval(input),
-          children(14).eval(input),
-          children(15).eval(input),
-          children(16).eval(input),
-          children(17).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType),
+          ScalaReflection.convertToScala(children(14).eval(input), children(14).dataType),
+          ScalaReflection.convertToScala(children(15).eval(input), children(15).dataType),
+          ScalaReflection.convertToScala(children(16).eval(input), children(16).dataType),
+          ScalaReflection.convertToScala(children(17).eval(input), children(17).dataType))
+
+
       case 19 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input),
-          children(6).eval(input),
-          children(7).eval(input),
-          children(8).eval(input),
-          children(9).eval(input),
-          children(10).eval(input),
-          children(11).eval(input),
-          children(12).eval(input),
-          children(13).eval(input),
-          children(14).eval(input),
-          children(15).eval(input),
-          children(16).eval(input),
-          children(17).eval(input),
-          children(18).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType),
+          ScalaReflection.convertToScala(children(14).eval(input), children(14).dataType),
+          ScalaReflection.convertToScala(children(15).eval(input), children(15).dataType),
+          ScalaReflection.convertToScala(children(16).eval(input), children(16).dataType),
+          ScalaReflection.convertToScala(children(17).eval(input), children(17).dataType),
+          ScalaReflection.convertToScala(children(18).eval(input), children(18).dataType))
+
+
       case 20 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input),
-          children(6).eval(input),
-          children(7).eval(input),
-          children(8).eval(input),
-          children(9).eval(input),
-          children(10).eval(input),
-          children(11).eval(input),
-          children(12).eval(input),
-          children(13).eval(input),
-          children(14).eval(input),
-          children(15).eval(input),
-          children(16).eval(input),
-          children(17).eval(input),
-          children(18).eval(input),
-          children(19).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType),
+          ScalaReflection.convertToScala(children(14).eval(input), children(14).dataType),
+          ScalaReflection.convertToScala(children(15).eval(input), children(15).dataType),
+          ScalaReflection.convertToScala(children(16).eval(input), children(16).dataType),
+          ScalaReflection.convertToScala(children(17).eval(input), children(17).dataType),
+          ScalaReflection.convertToScala(children(18).eval(input), children(18).dataType),
+          ScalaReflection.convertToScala(children(19).eval(input), children(19).dataType))
+
+
       case 21 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input),
-          children(6).eval(input),
-          children(7).eval(input),
-          children(8).eval(input),
-          children(9).eval(input),
-          children(10).eval(input),
-          children(11).eval(input),
-          children(12).eval(input),
-          children(13).eval(input),
-          children(14).eval(input),
-          children(15).eval(input),
-          children(16).eval(input),
-          children(17).eval(input),
-          children(18).eval(input),
-          children(19).eval(input),
-          children(20).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType),
+          ScalaReflection.convertToScala(children(14).eval(input), children(14).dataType),
+          ScalaReflection.convertToScala(children(15).eval(input), children(15).dataType),
+          ScalaReflection.convertToScala(children(16).eval(input), children(16).dataType),
+          ScalaReflection.convertToScala(children(17).eval(input), children(17).dataType),
+          ScalaReflection.convertToScala(children(18).eval(input), children(18).dataType),
+          ScalaReflection.convertToScala(children(19).eval(input), children(19).dataType),
+          ScalaReflection.convertToScala(children(20).eval(input), children(20).dataType))
+
+
       case 22 =>
         function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any](
-          children(0).eval(input),
-          children(1).eval(input),
-          children(2).eval(input),
-          children(3).eval(input),
-          children(4).eval(input),
-          children(5).eval(input),
-          children(6).eval(input),
-          children(7).eval(input),
-          children(8).eval(input),
-          children(9).eval(input),
-          children(10).eval(input),
-          children(11).eval(input),
-          children(12).eval(input),
-          children(13).eval(input),
-          children(14).eval(input),
-          children(15).eval(input),
-          children(16).eval(input),
-          children(17).eval(input),
-          children(18).eval(input),
-          children(19).eval(input),
-          children(20).eval(input),
-          children(21).eval(input))
+          ScalaReflection.convertToScala(children(0).eval(input), children(0).dataType),
+          ScalaReflection.convertToScala(children(1).eval(input), children(1).dataType),
+          ScalaReflection.convertToScala(children(2).eval(input), children(2).dataType),
+          ScalaReflection.convertToScala(children(3).eval(input), children(3).dataType),
+          ScalaReflection.convertToScala(children(4).eval(input), children(4).dataType),
+          ScalaReflection.convertToScala(children(5).eval(input), children(5).dataType),
+          ScalaReflection.convertToScala(children(6).eval(input), children(6).dataType),
+          ScalaReflection.convertToScala(children(7).eval(input), children(7).dataType),
+          ScalaReflection.convertToScala(children(8).eval(input), children(8).dataType),
+          ScalaReflection.convertToScala(children(9).eval(input), children(9).dataType),
+          ScalaReflection.convertToScala(children(10).eval(input), children(10).dataType),
+          ScalaReflection.convertToScala(children(11).eval(input), children(11).dataType),
+          ScalaReflection.convertToScala(children(12).eval(input), children(12).dataType),
+          ScalaReflection.convertToScala(children(13).eval(input), children(13).dataType),
+          ScalaReflection.convertToScala(children(14).eval(input), children(14).dataType),
+          ScalaReflection.convertToScala(children(15).eval(input), children(15).dataType),
+          ScalaReflection.convertToScala(children(16).eval(input), children(16).dataType),
+          ScalaReflection.convertToScala(children(17).eval(input), children(17).dataType),
+          ScalaReflection.convertToScala(children(18).eval(input), children(18).dataType),
+          ScalaReflection.convertToScala(children(19).eval(input), children(19).dataType),
+          ScalaReflection.convertToScala(children(20).eval(input), children(20).dataType),
+          ScalaReflection.convertToScala(children(21).eval(input), children(21).dataType))
+
     }
     // scalastyle:on
 
-    ScalaReflection.convertToCatalyst(result)
+    ScalaReflection.convertToCatalyst(result, dataType)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificMutableRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificMutableRow.scala
index 570379c533e1f..7434165f654f8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificMutableRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificMutableRow.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 /**
  * A parent class for mutable container objects that are reused when the values are changed,
@@ -209,6 +209,8 @@ final class SpecificMutableRow(val values: Array[MutableValue]) extends MutableR
 
   override def length: Int = values.length
 
+  override def toSeq: Seq[Any] = values.map(_.boxed).toSeq
+
   override def setNullAt(i: Int): Unit = {
     values(i).isNull = true
   }
@@ -231,8 +233,6 @@ final class SpecificMutableRow(val values: Array[MutableValue]) extends MutableR
     if (value == null) setNullAt(ordinal) else values(ordinal).update(value)
   }
 
-  override def iterator: Iterator[Any] = values.map(_.boxed).iterator
-
   override def setString(ordinal: Int, value: String) = update(ordinal, value)
 
   override def getString(ordinal: Int) = apply(ordinal).asInstanceOf[String]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/WrapDynamic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/WrapDynamic.scala
deleted file mode 100644
index 1a4ac06c7a79d..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/WrapDynamic.scala
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.expressions
-
-import scala.language.dynamics
-
-import org.apache.spark.sql.catalyst.types.DataType
-
-/**
- * The data type representing [[DynamicRow]] values.
- */
-case object DynamicType extends DataType
-
-/**
- * Wrap a [[Row]] as a [[DynamicRow]].
- */
-case class WrapDynamic(children: Seq[Attribute]) extends Expression {
-  type EvaluatedType = DynamicRow
-
-  def nullable = false
-
-  def dataType = DynamicType
-
-  override def eval(input: Row): DynamicRow = input match {
-    // Avoid copy for generic rows.
-    case g: GenericRow => new DynamicRow(children, g.values)
-    case otherRowType => new DynamicRow(children, otherRowType.toArray)
-  }
-}
-
-/**
- * DynamicRows use scala's Dynamic trait to emulate an ORM of in a dynamically typed language.
- * Since the type of the column is not known at compile time, all attributes are converted to
- * strings before being passed to the function.
- */
-class DynamicRow(val schema: Seq[Attribute], values: Array[Any])
-  extends GenericRow(values) with Dynamic {
-
-  def selectDynamic(attributeName: String): String = {
-    val ordinal = schema.indexWhere(_.name == attributeName)
-    values(ordinal).toString
-  }
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala
index 1b4d892625dbb..735b7488fdcbd 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import com.clearspring.analytics.stream.cardinality.HyperLogLog
 
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 import org.apache.spark.sql.catalyst.trees
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.util.collection.OpenHashSet
@@ -158,7 +158,7 @@ case class Count(child: Expression) extends PartialAggregate with trees.UnaryNod
 
   override def asPartial: SplitEvaluation = {
     val partialCount = Alias(Count(child), "PartialCount")()
-    SplitEvaluation(Sum(partialCount.toAttribute), partialCount :: Nil)
+    SplitEvaluation(Coalesce(Seq(Sum(partialCount.toAttribute), Literal(0L))), partialCount :: Nil)
   }
 
   override def newInstance() = new CountFunction(child, this)
@@ -285,19 +285,42 @@ case class ApproxCountDistinct(child: Expression, relativeSD: Double = 0.05)
 
 case class Average(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] {
 
-  override def nullable = false
-  override def dataType = DoubleType
+  override def nullable = true
+
+  override def dataType = child.dataType match {
+    case DecimalType.Fixed(precision, scale) =>
+      DecimalType(precision + 4, scale + 4)  // Add 4 digits after decimal point, like Hive
+    case DecimalType.Unlimited =>
+      DecimalType.Unlimited
+    case _ =>
+      DoubleType
+  }
+
   override def toString = s"AVG($child)"
 
   override def asPartial: SplitEvaluation = {
-    val partialSum = Alias(Sum(child), "PartialSum")()
-    val partialCount = Alias(Count(child), "PartialCount")()
-    val castedSum = Cast(Sum(partialSum.toAttribute), dataType)
-    val castedCount = Cast(Sum(partialCount.toAttribute), dataType)
-
-    SplitEvaluation(
-      Divide(castedSum, castedCount),
-      partialCount :: partialSum :: Nil)
+    child.dataType match {
+      case DecimalType.Fixed(_, _) =>
+        // Turn the child to unlimited decimals for calculation, before going back to fixed
+        val partialSum = Alias(Sum(Cast(child, DecimalType.Unlimited)), "PartialSum")()
+        val partialCount = Alias(Count(child), "PartialCount")()
+
+        val castedSum = Cast(Sum(partialSum.toAttribute), DecimalType.Unlimited)
+        val castedCount = Cast(Sum(partialCount.toAttribute), DecimalType.Unlimited)
+        SplitEvaluation(
+          Cast(Divide(castedSum, castedCount), dataType),
+          partialCount :: partialSum :: Nil)
+
+      case _ =>
+        val partialSum = Alias(Sum(child), "PartialSum")()
+        val partialCount = Alias(Count(child), "PartialCount")()
+
+        val castedSum = Cast(Sum(partialSum.toAttribute), dataType)
+        val castedCount = Cast(Sum(partialCount.toAttribute), dataType)
+        SplitEvaluation(
+          Divide(castedSum, castedCount),
+          partialCount :: partialSum :: Nil)
+    }
   }
 
   override def newInstance() = new AverageFunction(child, this)
@@ -305,29 +328,100 @@ case class Average(child: Expression) extends PartialAggregate with trees.UnaryN
 
 case class Sum(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] {
 
-  override def nullable = false
-  override def dataType = child.dataType
+  override def nullable = true
+
+  override def dataType = child.dataType match {
+    case DecimalType.Fixed(precision, scale) =>
+      DecimalType(precision + 10, scale)  // Add 10 digits left of decimal point, like Hive
+    case DecimalType.Unlimited =>
+      DecimalType.Unlimited
+    case _ =>
+      child.dataType
+  }
+
   override def toString = s"SUM($child)"
 
   override def asPartial: SplitEvaluation = {
-    val partialSum = Alias(Sum(child), "PartialSum")()
-    SplitEvaluation(
-      Sum(partialSum.toAttribute),
-      partialSum :: Nil)
+    child.dataType match {
+      case DecimalType.Fixed(_, _) =>
+        val partialSum = Alias(Sum(Cast(child, DecimalType.Unlimited)), "PartialSum")()
+        SplitEvaluation(
+          Cast(Sum(partialSum.toAttribute), dataType),
+          partialSum :: Nil)
+
+      case _ =>
+        val partialSum = Alias(Sum(child), "PartialSum")()
+        SplitEvaluation(
+          Sum(partialSum.toAttribute),
+          partialSum :: Nil)
+    }
   }
 
   override def newInstance() = new SumFunction(child, this)
 }
 
 case class SumDistinct(child: Expression)
-  extends AggregateExpression with trees.UnaryNode[Expression] {
+  extends PartialAggregate with trees.UnaryNode[Expression] {
 
+  def this() = this(null)
+  override def nullable = true
+  override def dataType = child.dataType match {
+    case DecimalType.Fixed(precision, scale) =>
+      DecimalType(precision + 10, scale)  // Add 10 digits left of decimal point, like Hive
+    case DecimalType.Unlimited =>
+      DecimalType.Unlimited
+    case _ =>
+      child.dataType
+  }
+  override def toString = s"SUM(DISTINCT ${child})"
+  override def newInstance() = new SumDistinctFunction(child, this)
 
-  override def nullable = false
-  override def dataType = child.dataType
-  override def toString = s"SUM(DISTINCT $child)"
+  override def asPartial = {
+    val partialSet = Alias(CollectHashSet(child :: Nil), "partialSets")()
+    SplitEvaluation(
+      CombineSetsAndSum(partialSet.toAttribute, this),
+      partialSet :: Nil)
+  }
+}
 
-  override def newInstance() = new SumDistinctFunction(child, this)
+case class CombineSetsAndSum(inputSet: Expression, base: Expression) extends AggregateExpression {
+  def this() = this(null, null)
+
+  override def children = inputSet :: Nil
+  override def nullable = true
+  override def dataType = base.dataType
+  override def toString = s"CombineAndSum($inputSet)"
+  override def newInstance() = new CombineSetsAndSumFunction(inputSet, this)
+}
+
+case class CombineSetsAndSumFunction(
+    @transient inputSet: Expression,
+    @transient base: AggregateExpression)
+  extends AggregateFunction {
+
+  def this() = this(null, null) // Required for serialization.
+
+  val seen = new OpenHashSet[Any]()
+
+  override def update(input: Row): Unit = {
+    val inputSetEval = inputSet.eval(input).asInstanceOf[OpenHashSet[Any]]
+    val inputIterator = inputSetEval.iterator
+    while (inputIterator.hasNext) {
+      seen.add(inputIterator.next)
+    }
+  }
+
+  override def eval(input: Row): Any = {
+    val casted = seen.asInstanceOf[OpenHashSet[Row]]
+    if (casted.size == 0) {
+      null
+    } else {
+      Cast(Literal(
+        casted.iterator.map(f => f.apply(0)).reduceLeft(
+          base.dataType.asInstanceOf[NumericType].numeric.asInstanceOf[Numeric[Any]].plus)),
+        base.dataType).eval(null)
+    }
+  }
 }
 
 case class First(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] {
@@ -364,16 +458,37 @@ case class AverageFunction(expr: Expression, base: AggregateExpression)
 
   def this() = this(null, null) // Required for serialization.
 
-  private val zero = Cast(Literal(0), expr.dataType)
+  private val calcType =
+    expr.dataType match {
+      case DecimalType.Fixed(_, _) =>
+        DecimalType.Unlimited
+      case _ =>
+        expr.dataType
+    }
+
+  private val zero = Cast(Literal(0), calcType)
 
   private var count: Long = _
-  private val sum = MutableLiteral(zero.eval(null), expr.dataType)
-  private val sumAsDouble = Cast(sum, DoubleType)
+  private val sum = MutableLiteral(zero.eval(null), calcType)
 
-  private def addFunction(value: Any) = Add(sum, Literal(value))
+  private def addFunction(value: Any) = Add(sum, Cast(Literal(value, expr.dataType), calcType))
 
-  override def eval(input: Row): Any =
-    sumAsDouble.eval(EmptyRow).asInstanceOf[Double] / count.toDouble
+  override def eval(input: Row): Any = {
+    if (count == 0L) {
+      null
+    } else {
+      expr.dataType match {
+        case DecimalType.Fixed(_, _) =>
+          Cast(Divide(
+            Cast(sum, DecimalType.Unlimited),
+            Cast(Literal(count), DecimalType.Unlimited)), dataType).eval(null)
+        case _ =>
+          Divide(
+            Cast(sum, dataType),
+            Cast(Literal(count), dataType)).eval(null)
+      }
+    }
+  }
 
   override def update(input: Row): Unit = {
     val evaluatedExpr = expr.eval(input)
@@ -438,17 +553,31 @@ case class ApproxCountDistinctMergeFunction(
 case class SumFunction(expr: Expression, base: AggregateExpression) extends AggregateFunction {
   def this() = this(null, null) // Required for serialization.
 
-  private val zero = Cast(Literal(0), expr.dataType)
+  private val calcType =
+    expr.dataType match {
+      case DecimalType.Fixed(_, _) =>
+        DecimalType.Unlimited
+      case _ =>
+        expr.dataType
+    }
+
+  private val zero = Cast(Literal(0), calcType)
 
-  private val sum = MutableLiteral(zero.eval(null), expr.dataType)
+  private val sum = MutableLiteral(null, calcType)
 
-  private val addFunction = Add(sum, Coalesce(Seq(expr, zero)))
+  private val addFunction = Coalesce(Seq(Add(Coalesce(Seq(sum, zero)), Cast(expr, calcType)), sum))
 
   override def update(input: Row): Unit = {
     sum.update(addFunction, input)
   }
 
-  override def eval(input: Row): Any = sum.eval(null)
+  override def eval(input: Row): Any = {
+    expr.dataType match {
+      case DecimalType.Fixed(_, _) =>
+        Cast(sum, dataType).eval(null)
+      case _ => sum.eval(null)
+    }
+  }
 }
 
 case class SumDistinctFunction(expr: Expression, base: AggregateExpression)
@@ -465,8 +594,16 @@ case class SumDistinctFunction(expr: Expression, base: AggregateExpression)
     }
   }
 
-  override def eval(input: Row): Any =
-    seen.reduceLeft(base.dataType.asInstanceOf[NumericType].numeric.asInstanceOf[Numeric[Any]].plus)
+  override def eval(input: Row): Any = {
+    if (seen.size == 0) {
+      null
+    } else {
+      Cast(Literal(
+        seen.reduceLeft(
+          dataType.asInstanceOf[NumericType].numeric.asInstanceOf[Numeric[Any]].plus)),
+        dataType).eval(null)
+    }
+  }
 }
 
 case class CountDistinctFunction(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index fe825fdcdae37..574907f566c0f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.analysis.UnresolvedException
-import org.apache.spark.sql.catalyst.types._
-import scala.math.pow
+import org.apache.spark.sql.types._
 
 case class UnaryMinus(child: Expression) extends UnaryExpression {
   type EvaluatedType = Any
@@ -36,14 +35,25 @@ case class UnaryMinus(child: Expression) extends UnaryExpression {
 
 case class Sqrt(child: Expression) extends UnaryExpression {
   type EvaluatedType = Any
-  
+
   def dataType = DoubleType
   override def foldable = child.foldable
-  def nullable = child.nullable
+  def nullable = true
   override def toString = s"SQRT($child)"
 
   override def eval(input: Row): Any = {
-    n1(child, input, ((na,a) => math.sqrt(na.toDouble(a))))
+    val evalE = child.eval(input)
+    if (evalE == null) {
+      null
+    } else {
+      child.dataType match {
+        case n: NumericType =>
+          val value = n.numeric.toDouble(evalE.asInstanceOf[n.JvmType])
+          if (value < 0) null
+          else math.sqrt(value)
+        case other => sys.error(s"Type $other does not support non-negative numeric operations")
+      }
+    }
   }
 }
 
@@ -55,7 +65,9 @@ abstract class BinaryArithmetic extends BinaryExpression {
   def nullable = left.nullable || right.nullable
 
   override lazy val resolved =
-    left.resolved && right.resolved && left.dataType == right.dataType
+    left.resolved && right.resolved &&
+    left.dataType == right.dataType &&
+    !DecimalType.isFixed(left.dataType)
 
   def dataType = {
     if (!resolved) {
@@ -64,6 +76,23 @@ abstract class BinaryArithmetic extends BinaryExpression {
     }
     left.dataType
   }
+
+  override def eval(input: Row): Any = {
+    val evalE1 = left.eval(input)
+    if(evalE1 == null) {
+      null
+    } else {
+      val evalE2 = right.eval(input)
+      if (evalE2 == null) {
+        null
+      } else {
+        evalInternal(evalE1, evalE2)
+      }
+    }
+  }
+
+  def evalInternal(evalE1: EvaluatedType, evalE2: EvaluatedType): Any =
+    sys.error(s"BinaryExpressions must either override eval or evalInternal")
 }
 
 case class Add(left: Expression, right: Expression) extends BinaryArithmetic {
@@ -87,9 +116,16 @@ case class Multiply(left: Expression, right: Expression) extends BinaryArithmeti
 case class Divide(left: Expression, right: Expression) extends BinaryArithmetic {
   def symbol = "/"
 
-  override def eval(input: Row): Any = dataType match {
-    case _: FractionalType => f2(input, left, right, _.div(_, _))
-    case _: IntegralType => i2(input, left , right, _.quot(_, _))
+  override def nullable = true
+
+  override def eval(input: Row): Any = {
+    val evalE2 = right.eval(input)
+    dataType match {
+      case _ if evalE2 == null => null
+      case _ if evalE2 == 0 => null
+      case ft: FractionalType => f1(input, left, _.div(_, evalE2.asInstanceOf[ft.JvmType]))
+      case it: IntegralType => i1(input, left, _.quot(_, evalE2.asInstanceOf[it.JvmType]))
+    }
   }
 
 }
@@ -97,7 +133,88 @@ case class Divide(left: Expression, right: Expression) extends BinaryArithmetic
 case class Remainder(left: Expression, right: Expression) extends BinaryArithmetic {
   def symbol = "%"
 
-  override def eval(input: Row): Any = i2(input, left, right, _.rem(_, _))
+  override def nullable = true
+
+  override def eval(input: Row): Any = {
+    val evalE2 = right.eval(input)
+    dataType match {
+      case _ if evalE2 == null => null
+      case _ if evalE2 == 0 => null
+      case nt: NumericType => i1(input, left, _.rem(_, evalE2.asInstanceOf[nt.JvmType]))
+    }
+  }
+}
+
+/**
+ * A function that calculates bitwise and(&) of two numbers.
+ */
+case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic {
+  def symbol = "&"
+
+  override def evalInternal(evalE1: EvaluatedType, evalE2: EvaluatedType): Any = dataType match {
+    case ByteType => (evalE1.asInstanceOf[Byte] & evalE2.asInstanceOf[Byte]).toByte
+    case ShortType => (evalE1.asInstanceOf[Short] & evalE2.asInstanceOf[Short]).toShort
+    case IntegerType => evalE1.asInstanceOf[Int] & evalE2.asInstanceOf[Int]
+    case LongType => evalE1.asInstanceOf[Long] & evalE2.asInstanceOf[Long]
+    case other => sys.error(s"Unsupported bitwise & operation on $other")
+  }
+}
+
+/**
+ * A function that calculates bitwise or(|) of two numbers.
+ */
+case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic {
+  def symbol = "|"
+
+  override def evalInternal(evalE1: EvaluatedType, evalE2: EvaluatedType): Any = dataType match {
+    case ByteType => (evalE1.asInstanceOf[Byte] | evalE2.asInstanceOf[Byte]).toByte
+    case ShortType => (evalE1.asInstanceOf[Short] | evalE2.asInstanceOf[Short]).toShort
+    case IntegerType => evalE1.asInstanceOf[Int] | evalE2.asInstanceOf[Int]
+    case LongType => evalE1.asInstanceOf[Long] | evalE2.asInstanceOf[Long]
+    case other => sys.error(s"Unsupported bitwise | operation on $other")
+  }
+}
+
+/**
+ * A function that calculates bitwise xor(^) of two numbers.
+ */
+case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic {
+  def symbol = "^"
+
+  override def evalInternal(evalE1: EvaluatedType, evalE2: EvaluatedType): Any = dataType match {
+    case ByteType => (evalE1.asInstanceOf[Byte] ^ evalE2.asInstanceOf[Byte]).toByte
+    case ShortType => (evalE1.asInstanceOf[Short] ^ evalE2.asInstanceOf[Short]).toShort
+    case IntegerType => evalE1.asInstanceOf[Int] ^ evalE2.asInstanceOf[Int]
+    case LongType => evalE1.asInstanceOf[Long] ^ evalE2.asInstanceOf[Long]
+    case other => sys.error(s"Unsupported bitwise ^ operation on $other")
+  }
+}
+
+/**
+ * A function that calculates bitwise not(~) of a number.
+ */
+case class BitwiseNot(child: Expression) extends UnaryExpression {
+  type EvaluatedType = Any
+
+  def dataType = child.dataType
+  override def foldable = child.foldable
+  def nullable = child.nullable
+  override def toString = s"~$child"
+
+  override def eval(input: Row): Any = {
+    val evalE = child.eval(input)
+    if (evalE == null) {
+      null
+    } else {
+      dataType match {
+        case ByteType => (~evalE.asInstanceOf[Byte]).toByte
+        case ShortType => (~evalE.asInstanceOf[Short]).toShort
+        case IntegerType => ~evalE.asInstanceOf[Int]
+        case LongType => ~evalE.asInstanceOf[Long]
+        case other => sys.error(s"Unsupported bitwise ~ operation on $other")
+      }
+    }
+  }
 }
 
 case class MaxOf(left: Expression, right: Expression) extends Expression {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 5a3f013c34579..1f80d84b744a1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -24,7 +24,7 @@ import scala.language.existentials
 import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 // These classes are here to avoid issues with serialization and integration with quasiquotes.
 class IntegerHashSet extends org.apache.spark.util.collection.OpenHashSet[Int]
@@ -246,6 +246,9 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
               new String(${eval.primitiveTerm}.asInstanceOf[Array[Byte]])
         """.children
 
+      case Cast(child @ DateType(), StringType) =>
+        child.castOrNull(c => q"org.apache.spark.sql.types.DateUtils.toString($c)", StringType)
+
       case Cast(child @ NumericType(), IntegerType) =>
         child.castOrNull(c => q"$c.toInt", IntegerType)
 
@@ -313,20 +316,20 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
         val eval1 = expressionEvaluator(e1)
         val eval2 = expressionEvaluator(e2)
 
-        eval1.code ++ eval2.code ++
         q"""
+          ..${eval1.code}
           var $nullTerm = false
           var $primitiveTerm: ${termForType(BooleanType)} = false
 
-          if ((!${eval1.nullTerm} && !${eval1.primitiveTerm}) ||
-              (!${eval2.nullTerm} && !${eval2.primitiveTerm})) {
-            $nullTerm = false
-            $primitiveTerm = false
-          } else if (${eval1.nullTerm} || ${eval2.nullTerm} ) {
-            $nullTerm = true
+          if (!${eval1.nullTerm} && ${eval1.primitiveTerm} == false) {
           } else {
-            $nullTerm = false
-            $primitiveTerm = true
+            ..${eval2.code}
+            if (!${eval2.nullTerm} && ${eval2.primitiveTerm} == false) {
+            } else if (!${eval1.nullTerm} && !${eval2.nullTerm}) {
+              $primitiveTerm = true
+            } else {
+              $nullTerm = true
+            }
           }
          """.children
 
@@ -334,20 +337,22 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
         val eval1 = expressionEvaluator(e1)
         val eval2 = expressionEvaluator(e2)
 
-        eval1.code ++ eval2.code ++
         q"""
+          ..${eval1.code}
           var $nullTerm = false
           var $primitiveTerm: ${termForType(BooleanType)} = false
 
-          if ((!${eval1.nullTerm} && ${eval1.primitiveTerm}) ||
-              (!${eval2.nullTerm} && ${eval2.primitiveTerm})) {
-            $nullTerm = false
+          if (!${eval1.nullTerm} && ${eval1.primitiveTerm}) {
             $primitiveTerm = true
-          } else if (${eval1.nullTerm} || ${eval2.nullTerm} ) {
-            $nullTerm = true
           } else {
-            $nullTerm = false
-            $primitiveTerm = false
+            ..${eval2.code}
+            if (!${eval2.nullTerm} && ${eval2.primitiveTerm}) {
+              $primitiveTerm = true
+            } else if (!${eval1.nullTerm} && !${eval2.nullTerm}) {
+              $primitiveTerm = false
+            } else {
+              $nullTerm = true
+            }
           }
          """.children
 
@@ -358,7 +363,42 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
       case Add(e1, e2) =>      (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 + $eval2" }
       case Subtract(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 - $eval2" }
       case Multiply(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 * $eval2" }
-      case Divide(e1, e2) =>   (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 / $eval2" }
+      case Divide(e1, e2) =>
+        val eval1 = expressionEvaluator(e1)
+        val eval2 = expressionEvaluator(e2)
+
+        eval1.code ++ eval2.code ++
+        q"""
+          var $nullTerm = false
+          var $primitiveTerm: ${termForType(e1.dataType)} = 0
+
+          if (${eval1.nullTerm} || ${eval2.nullTerm} ) {
+            $nullTerm = true
+          } else if (${eval2.primitiveTerm} == 0)
+            $nullTerm = true
+          else {
+            $primitiveTerm = ${eval1.primitiveTerm} / ${eval2.primitiveTerm}
+          }
+         """.children
+
+      case Remainder(e1, e2) =>
+        val eval1 = expressionEvaluator(e1)
+        val eval2 = expressionEvaluator(e2)
+
+        eval1.code ++ eval2.code ++
+        q"""
+          var $nullTerm = false
+          var $primitiveTerm: ${termForType(e1.dataType)} = 0
+
+          if (${eval1.nullTerm} || ${eval2.nullTerm} ) {
+            $nullTerm = true
+          } else if (${eval2.primitiveTerm} == 0)
+            $nullTerm = true
+          else {
+            $nullTerm = false
+            $primitiveTerm = ${eval1.primitiveTerm} % ${eval2.primitiveTerm}
+          }
+         """.children
 
       case IsNotNull(e) =>
         val eval = expressionEvaluator(e)
@@ -476,7 +516,6 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
             $nullTerm = ${eval1.nullTerm}
             $primitiveTerm = ${eval1.primitiveTerm}
           } else {
-            $nullTerm = false
             if (${eval1.primitiveTerm} > ${eval2.primitiveTerm}) {
               $primitiveTerm = ${eval1.primitiveTerm}
             } else {
@@ -485,6 +524,34 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
           }
         """.children
 
+      case UnscaledValue(child) =>
+        val childEval = expressionEvaluator(child)
+
+        childEval.code ++
+        q"""
+         var $nullTerm = ${childEval.nullTerm}
+         var $primitiveTerm: Long = if (!$nullTerm) {
+           ${childEval.primitiveTerm}.toUnscaledLong
+         } else {
+           ${defaultPrimitive(LongType)}
+         }
+         """.children
+
+      case MakeDecimal(child, precision, scale) =>
+        val childEval = expressionEvaluator(child)
+
+        childEval.code ++
+        q"""
+         var $nullTerm = ${childEval.nullTerm}
+         var $primitiveTerm: org.apache.spark.sql.types.Decimal =
+           ${defaultPrimitive(DecimalType())}
+
+         if (!$nullTerm) {
+           $primitiveTerm = new org.apache.spark.sql.types.Decimal()
+           $primitiveTerm = $primitiveTerm.setOrNull(${childEval.primitiveTerm}, $precision, $scale)
+           $nullTerm = $primitiveTerm == null
+         }
+         """.children
     }
 
     // If there was no match in the partial function above, we fall back on calling the interpreted
@@ -562,7 +629,7 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
     case LongType => ru.Literal(Constant(1L))
     case ByteType => ru.Literal(Constant(-1.toByte))
     case DoubleType => ru.Literal(Constant(-1.toDouble))
-    case DecimalType => ru.Literal(Constant(-1)) // Will get implicity converted as needed.
+    case DecimalType() => q"org.apache.spark.sql.types.Decimal(-1)"
     case IntegerType => ru.Literal(Constant(-1))
     case _ => ru.Literal(Constant(null))
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index 094ff14552283..0db29eb404bd1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions.codegen
 
 import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.types.{StringType, NumericType}
+import org.apache.spark.sql.types.{StringType, NumericType}
 
 /**
  * Generates bytecode for an [[Ordering]] of [[Row Rows]] for a given set of
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
index 7871a62620478..69397a73a8880 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions.codegen
 
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 
 /**
@@ -53,8 +53,8 @@ object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] {
     val nullFunctions =
       q"""
         private[this] var nullBits = new Array[Boolean](${expressions.size})
-        final def setNullAt(i: Int) = { nullBits(i) = true }
-        final def isNullAt(i: Int) = nullBits(i)
+        override def setNullAt(i: Int) = { nullBits(i) = true }
+        override def isNullAt(i: Int) = nullBits(i)
       """.children
 
     val tupleElements = expressions.zipWithIndex.flatMap {
@@ -77,14 +77,6 @@ object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] {
         """.children : Seq[Tree]
     }
 
-    val iteratorFunction = {
-      val allColumns = (0 until expressions.size).map { i =>
-        val iLit = ru.Literal(Constant(i))
-        q"if(isNullAt($iLit)) { null } else { ${newTermName(s"c$i")} }"
-      }
-      q"final def iterator = Iterator[Any](..$allColumns)"
-    }
-
     val accessorFailure = q"""scala.sys.error("Invalid ordinal:" + i)"""
     val applyFunction = {
       val cases = (0 until expressions.size).map { i =>
@@ -94,7 +86,7 @@ object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] {
 
         q"if(i == $ordinal) { if(isNullAt($i)) return null else return $elementName }"
       }
-      q"final def apply(i: Int): Any = { ..$cases; $accessorFailure }"
+      q"override def apply(i: Int): Any = { ..$cases; $accessorFailure }"
     }
 
     val updateFunction = {
@@ -114,7 +106,7 @@ object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] {
             return
           }"""
       }
-      q"final def update(i: Int, value: Any): Unit = { ..$cases; $accessorFailure }"
+      q"override def update(i: Int, value: Any): Unit = { ..$cases; $accessorFailure }"
     }
 
     val specificAccessorFunctions = NativeType.all.map { dataType =>
@@ -128,7 +120,7 @@ object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] {
       }
 
       q"""
-      final def ${accessorForType(dataType)}(i: Int):${termForType(dataType)} = {
+      override def ${accessorForType(dataType)}(i: Int):${termForType(dataType)} = {
         ..$ifStatements;
         $accessorFailure
       }"""
@@ -145,7 +137,7 @@ object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] {
       }
 
       q"""
-      final def ${mutatorForType(dataType)}(i: Int, value: ${termForType(dataType)}): Unit = {
+      override def ${mutatorForType(dataType)}(i: Int, value: ${termForType(dataType)}): Unit = {
         ..$ifStatements;
         $accessorFailure
       }"""
@@ -191,20 +183,26 @@ object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] {
         }
       """
 
+    val allColumns = (0 until expressions.size).map { i =>
+      val iLit = ru.Literal(Constant(i))
+      q"if(isNullAt($iLit)) { null } else { ${newTermName(s"c$i")} }"
+    }
+
     val copyFunction =
-      q"""
-        final def copy() = new $genericRowType(this.toArray)
-      """
+      q"override def copy() = new $genericRowType(Array[Any](..$allColumns))"
+
+    val toSeqFunction =
+      q"override def toSeq: Seq[Any] = Seq(..$allColumns)"
 
     val classBody =
       nullFunctions ++ (
         lengthDef +:
-        iteratorFunction +:
         applyFunction +:
         updateFunction +:
         equalsFunction +:
         hashCodeFunction +:
         copyFunction +:
+        toSeqFunction +:
         (tupleElements ++ specificAccessorFunctions ++ specificMutatorFunctions))
 
     val code = q"""
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala
index dafd745ec96c6..1bc34f71441fe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import scala.collection.Map
 
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 /**
  * Returns the item at `ordinal` in the Array `child` or the Key `ordinal` in Map `child`.
@@ -92,7 +92,13 @@ case class GetField(child: Expression, fieldName: String) extends UnaryExpressio
 
   lazy val ordinal = structType.fields.indexOf(field)
 
-  override lazy val resolved = childrenResolved && child.dataType.isInstanceOf[StructType]
+  override lazy val resolved = childrenResolved && fieldResolved
+
+  /** Returns true only if the fieldName is found in the child struct. */
+  private def fieldResolved = child.dataType match {
+    case StructType(fields) => fields.map(_.name).contains(fieldName)
+    case _ => false
+  }
 
   override def eval(input: Row): Any = {
     val baseValue = child.eval(input).asInstanceOf[Row]
@@ -101,3 +107,32 @@ case class GetField(child: Expression, fieldName: String) extends UnaryExpressio
 
   override def toString = s"$child.$fieldName"
 }
+
+/**
+ * Returns an Array containing the evaluation of all children expressions.
+ */
+case class CreateArray(children: Seq[Expression]) extends Expression {
+  override type EvaluatedType = Any
+  
+  override def foldable = !children.exists(!_.foldable)
+  
+  lazy val childTypes = children.map(_.dataType).distinct
+
+  override lazy val resolved =
+    childrenResolved && childTypes.size <= 1
+
+  override def dataType: DataType = {
+    assert(resolved, s"Invalid dataType of mixed ArrayType ${childTypes.mkString(",")}")
+    ArrayType(
+      childTypes.headOption.getOrElse(NullType),
+      containsNull = children.exists(_.nullable))
+  }
+
+  override def nullable: Boolean = false
+
+  override def eval(input: Row): Any = {
+    children.map(_.eval(input))
+  }
+
+  override def toString = s"Array(${children.mkString(",")})"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalFunctions.scala
new file mode 100644
index 0000000000000..83d8c1d42bca4
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalFunctions.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.types._
+
+/** Return the unscaled Long value of a Decimal, assuming it fits in a Long */
+case class UnscaledValue(child: Expression) extends UnaryExpression {
+  override type EvaluatedType = Any
+
+  override def dataType: DataType = LongType
+  override def foldable = child.foldable
+  def nullable = child.nullable
+  override def toString = s"UnscaledValue($child)"
+
+  override def eval(input: Row): Any = {
+    val childResult = child.eval(input)
+    if (childResult == null) {
+      null
+    } else {
+      childResult.asInstanceOf[Decimal].toUnscaledLong
+    }
+  }
+}
+
+/** Create a Decimal from an unscaled Long value */
+case class MakeDecimal(child: Expression, precision: Int, scale: Int) extends UnaryExpression {
+  override type EvaluatedType = Decimal
+
+  override def dataType: DataType = DecimalType(precision, scale)
+  override def foldable = child.foldable
+  def nullable = child.nullable
+  override def toString = s"MakeDecimal($child,$precision,$scale)"
+
+  override def eval(input: Row): Decimal = {
+    val childResult = child.eval(input)
+    if (childResult == null) {
+      null
+    } else {
+      new Decimal().setOrNull(childResult.asInstanceOf[Long], precision, scale)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index 9c865254e0be9..43b6482c0171c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import scala.collection.Map
 
 import org.apache.spark.sql.catalyst.trees
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 /**
  * An expression that produces zero or more rows given a single input row.
@@ -43,7 +43,7 @@ abstract class Generator extends Expression {
   override type EvaluatedType = TraversableOnce[Row]
 
   override lazy val dataType =
-    ArrayType(StructType(output.map(a => StructField(a.name, a.dataType, a.nullable))))
+    ArrayType(StructType(output.map(a => StructField(a.name, a.dataType, a.nullable, a.metadata))))
 
   override def nullable = false
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index ba240233cae61..97bb96f48e2c7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.{Date, Timestamp}
 
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 object Literal {
   def apply(v: Any): Literal = v match {
@@ -31,14 +31,25 @@ object Literal {
     case s: Short => Literal(s, ShortType)
     case s: String => Literal(s, StringType)
     case b: Boolean => Literal(b, BooleanType)
-    case d: BigDecimal => Literal(d, DecimalType)
+    case d: BigDecimal => Literal(Decimal(d), DecimalType.Unlimited)
+    case d: java.math.BigDecimal => Literal(Decimal(d), DecimalType.Unlimited)
+    case d: Decimal => Literal(d, DecimalType.Unlimited)
     case t: Timestamp => Literal(t, TimestampType)
-    case d: Date => Literal(d, DateType)
+    case d: Date => Literal(DateUtils.fromJavaDate(d), DateType)
     case a: Array[Byte] => Literal(a, BinaryType)
     case null => Literal(null, NullType)
   }
 }
 
+/**
+ * An extractor that matches non-null literal values
+ */
+object NonNullLiteral {
+  def unapply(literal: Literal): Option[(Any, DataType)] = {
+    Option(literal.value).map(_ => (literal.value, literal.dataType))
+  }
+}
+
 /**
  * Extractor for retrieving Int literals.
  */
@@ -62,7 +73,7 @@ case class Literal(value: Any, dataType: DataType) extends LeafExpression {
 }
 
 // TODO: Specialize
-case class MutableLiteral(var value: Any, dataType: DataType, nullable: Boolean = true) 
+case class MutableLiteral(var value: Any, dataType: DataType, nullable: Boolean = true)
     extends LeafExpression {
   type EvaluatedType = Any
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index d023db44d8543..e6ab1fd8d7939 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -20,11 +20,12 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.trees
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 object NamedExpression {
   private val curId = new java.util.concurrent.atomic.AtomicLong()
   def newExprId = ExprId(curId.getAndIncrement())
+  def unapply(expr: NamedExpression): Option[(String, DataType)] = Some(expr.name, expr.dataType)
 }
 
 /**
@@ -43,6 +44,9 @@ abstract class NamedExpression extends Expression {
 
   def toAttribute: Attribute
 
+  /** Returns the metadata when an expression is a reference to another expression with metadata. */
+  def metadata: Metadata = Metadata.empty
+
   protected def typeSuffix =
     if (resolved) {
       dataType match {
@@ -71,7 +75,10 @@ abstract class Attribute extends NamedExpression {
 /**
  * Used to assign a new name to a computation.
  * For example the SQL expression "1 + 1 AS a" could be represented as follows:
- *  Alias(Add(Literal(1), Literal(1), "a")()
+ *  Alias(Add(Literal(1), Literal(1)), "a")()
+ *
+ * Note that exprId and qualifiers are in a separate parameter list because
+ * we only pattern match on child and name.
  *
  * @param child the computation being performed
  * @param name the name to be associated with the result of computing [[child]].
@@ -88,10 +95,16 @@ case class Alias(child: Expression, name: String)
 
   override def dataType = child.dataType
   override def nullable = child.nullable
+  override def metadata: Metadata = {
+    child match {
+      case named: NamedExpression => named.metadata
+      case _ => Metadata.empty
+    }
+  }
 
   override def toAttribute = {
     if (resolved) {
-      AttributeReference(name, child.dataType, child.nullable)(exprId, qualifiers)
+      AttributeReference(name, child.dataType, child.nullable, metadata)(exprId, qualifiers)
     } else {
       UnresolvedAttribute(name)
     }
@@ -108,18 +121,23 @@ case class Alias(child: Expression, name: String)
  * @param name The name of this attribute, should only be used during analysis or for debugging.
  * @param dataType The [[DataType]] of this attribute.
  * @param nullable True if null is a valid value for this attribute.
+ * @param metadata The metadata of this attribute.
  * @param exprId A globally unique id used to check if different AttributeReferences refer to the
  *               same attribute.
  * @param qualifiers a list of strings that can be used to referred to this attribute in a fully
  *                   qualified way. Consider the examples tableName.name, subQueryAlias.name.
  *                   tableName and subQueryAlias are possible qualifiers.
  */
-case class AttributeReference(name: String, dataType: DataType, nullable: Boolean = true)
-    (val exprId: ExprId = NamedExpression.newExprId, val qualifiers: Seq[String] = Nil)
-  extends Attribute with trees.LeafNode[Expression] {
+case class AttributeReference(
+    name: String,
+    dataType: DataType,
+    nullable: Boolean = true,
+    override val metadata: Metadata = Metadata.empty)(
+    val exprId: ExprId = NamedExpression.newExprId,
+    val qualifiers: Seq[String] = Nil) extends Attribute with trees.LeafNode[Expression] {
 
   override def equals(other: Any) = other match {
-    case ar: AttributeReference => exprId == ar.exprId && dataType == ar.dataType
+    case ar: AttributeReference => name == ar.name && exprId == ar.exprId && dataType == ar.dataType
     case _ => false
   }
 
@@ -128,10 +146,12 @@ case class AttributeReference(name: String, dataType: DataType, nullable: Boolea
     var h = 17
     h = h * 37 + exprId.hashCode()
     h = h * 37 + dataType.hashCode()
+    h = h * 37 + metadata.hashCode()
     h
   }
 
-  override def newInstance() = AttributeReference(name, dataType, nullable)(qualifiers = qualifiers)
+  override def newInstance() =
+    AttributeReference(name, dataType, nullable, metadata)(qualifiers = qualifiers)
 
   /**
    * Returns a copy of this [[AttributeReference]] with changed nullability.
@@ -140,7 +160,7 @@ case class AttributeReference(name: String, dataType: DataType, nullable: Boolea
     if (nullable == newNullability) {
       this
     } else {
-      AttributeReference(name, dataType, newNullability)(exprId, qualifiers)
+      AttributeReference(name, dataType, newNullability, metadata)(exprId, qualifiers)
     }
   }
 
@@ -156,10 +176,10 @@ case class AttributeReference(name: String, dataType: DataType, nullable: Boolea
    * Returns a copy of this [[AttributeReference]] with new qualifiers.
    */
   override def withQualifiers(newQualifiers: Seq[String]) = {
-    if (newQualifiers == qualifiers) {
+    if (newQualifiers.toSet == qualifiers.toSet) {
       this
     } else {
-      AttributeReference(name, dataType, nullable)(exprId, newQualifiers)
+      AttributeReference(name, dataType, nullable, metadata)(exprId, newQualifiers)
     }
   }
 
@@ -169,3 +189,8 @@ case class AttributeReference(name: String, dataType: DataType, nullable: Boolea
 
   override def toString: String = s"$name#${exprId.id}$typeSuffix"
 }
+
+object VirtualColumn {
+  val groupingIdName = "grouping__id"
+  def newGroupingId = AttributeReference(groupingIdName, IntegerType, false)()
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullFunctions.scala
index 086d0a3e073e5..08b982bc671e7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullFunctions.scala
@@ -37,15 +37,17 @@ case class Coalesce(children: Seq[Expression]) extends Expression {
   def dataType = if (resolved) {
     children.head.dataType
   } else {
-    throw new UnresolvedException(this, "Coalesce cannot have children of different types.")
+    val childTypes = children.map(c => s"$c: ${c.dataType}").mkString(", ")
+    throw new UnresolvedException(
+      this, s"Coalesce cannot have children of different types. $childTypes")
   }
 
   override def eval(input: Row): Any = {
     var i = 0
     var result: Any = null
-    while(i < children.size && result == null) {
-      result = children(i).eval(input)
-      i += 1
+    val childIterator = children.iterator
+    while (childIterator.hasNext && result == null) {
+      result = childIterator.next().eval(input)
     }
     result
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 55d95991c5f11..fbc97b2e75312 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -49,6 +49,10 @@ package org.apache.spark.sql.catalyst
  */
 package object expressions  {
 
+  type Row = org.apache.spark.sql.Row
+
+  val Row = org.apache.spark.sql.Row
+
   /**
    * Converts a [[Row]] to another Row given a sequence of expression that define each column of the
    * new row. If the schema of the input row is specified, then the given expression will be bound
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 1e22b2d03c672..c84cc95520a19 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -17,10 +17,9 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import scala.collection.immutable.HashSet
 import org.apache.spark.sql.catalyst.analysis.UnresolvedException
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.types.BooleanType
+import org.apache.spark.sql.types.BooleanType
 
 object InterpretedPredicate {
   def apply(expression: Expression, inputSchema: Seq[Attribute]): (Row => Boolean) =
@@ -48,6 +47,14 @@ trait PredicateHelper {
     }
   }
 
+  protected def splitDisjunctivePredicates(condition: Expression): Seq[Expression] = {
+    condition match {
+      case Or(cond1, cond2) =>
+        splitDisjunctivePredicates(cond1) ++ splitDisjunctivePredicates(cond2)
+      case other => other :: Nil
+    }
+  }
+
   /**
    * Returns true if `expr` can be evaluated using only the output of `plan`.  This method
    * can be used to determine when is is acceptable to move expression evaluation within a query
@@ -99,10 +106,10 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate {
  * Optimized version of In clause, when all filter values of In clause are
  * static.
  */
-case class InSet(value: Expression, hset: HashSet[Any], child: Seq[Expression]) 
+case class InSet(value: Expression, hset: Set[Any])
   extends Predicate {
 
-  def children = child
+  def children = value :: Nil
 
   def nullable = true // TODO: Figure out correct nullability semantics of IN.
   override def toString = s"$value INSET ${hset.mkString("(", ",", ")")}"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
new file mode 100644
index 0000000000000..73ec7a6d114f5
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.types.NativeType
+
+
+/**
+ * An extended interface to [[Row]] that allows the values for each column to be updated.  Setting
+ * a value through a primitive function implicitly marks that column as not null.
+ */
+trait MutableRow extends Row {
+  def setNullAt(i: Int): Unit
+
+  def update(ordinal: Int, value: Any)
+
+  def setInt(ordinal: Int, value: Int)
+  def setLong(ordinal: Int, value: Long)
+  def setDouble(ordinal: Int, value: Double)
+  def setBoolean(ordinal: Int, value: Boolean)
+  def setShort(ordinal: Int, value: Short)
+  def setByte(ordinal: Int, value: Byte)
+  def setFloat(ordinal: Int, value: Float)
+  def setString(ordinal: Int, value: String)
+}
+
+/**
+ * A row with no data.  Calling any methods will result in an error.  Can be used as a placeholder.
+ */
+object EmptyRow extends Row {
+  override def apply(i: Int): Any = throw new UnsupportedOperationException
+  override def toSeq = Seq.empty
+  override def length = 0
+  override def isNullAt(i: Int): Boolean = throw new UnsupportedOperationException
+  override def getInt(i: Int): Int = throw new UnsupportedOperationException
+  override def getLong(i: Int): Long = throw new UnsupportedOperationException
+  override def getDouble(i: Int): Double = throw new UnsupportedOperationException
+  override def getFloat(i: Int): Float = throw new UnsupportedOperationException
+  override def getBoolean(i: Int): Boolean = throw new UnsupportedOperationException
+  override def getShort(i: Int): Short = throw new UnsupportedOperationException
+  override def getByte(i: Int): Byte = throw new UnsupportedOperationException
+  override def getString(i: Int): String = throw new UnsupportedOperationException
+  override def getAs[T](i: Int): T = throw new UnsupportedOperationException
+  def copy() = this
+}
+
+/**
+ * A row implementation that uses an array of objects as the underlying storage.  Note that, while
+ * the array is not copied, and thus could technically be mutated after creation, this is not
+ * allowed.
+ */
+class GenericRow(protected[sql] val values: Array[Any]) extends Row {
+  /** No-arg constructor for serialization. */
+  def this() = this(null)
+
+  def this(size: Int) = this(new Array[Any](size))
+
+  override def toSeq = values.toSeq
+
+  override def length = values.length
+
+  override def apply(i: Int) = values(i)
+
+  override def isNullAt(i: Int) = values(i) == null
+
+  override def getInt(i: Int): Int = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive int value.")
+    values(i).asInstanceOf[Int]
+  }
+
+  override def getLong(i: Int): Long = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive long value.")
+    values(i).asInstanceOf[Long]
+  }
+
+  override def getDouble(i: Int): Double = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive double value.")
+    values(i).asInstanceOf[Double]
+  }
+
+  override def getFloat(i: Int): Float = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive float value.")
+    values(i).asInstanceOf[Float]
+  }
+
+  override def getBoolean(i: Int): Boolean = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive boolean value.")
+    values(i).asInstanceOf[Boolean]
+  }
+
+  override def getShort(i: Int): Short = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive short value.")
+    values(i).asInstanceOf[Short]
+  }
+
+  override def getByte(i: Int): Byte = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive byte value.")
+    values(i).asInstanceOf[Byte]
+  }
+
+  override def getString(i: Int): String = {
+    values(i).asInstanceOf[String]
+  }
+
+  // Custom hashCode function that matches the efficient code generated version.
+  override def hashCode: Int = {
+    var result: Int = 37
+
+    var i = 0
+    while (i < values.length) {
+      val update: Int =
+        if (isNullAt(i)) {
+          0
+        } else {
+          apply(i) match {
+            case b: Boolean => if (b) 0 else 1
+            case b: Byte => b.toInt
+            case s: Short => s.toInt
+            case i: Int => i
+            case l: Long => (l ^ (l >>> 32)).toInt
+            case f: Float => java.lang.Float.floatToIntBits(f)
+            case d: Double =>
+              val b = java.lang.Double.doubleToLongBits(d)
+              (b ^ (b >>> 32)).toInt
+            case other => other.hashCode()
+          }
+        }
+      result = 37 * result + update
+      i += 1
+    }
+    result
+  }
+
+  def copy() = this
+}
+
+class GenericMutableRow(v: Array[Any]) extends GenericRow(v) with MutableRow {
+  /** No-arg constructor for serialization. */
+  def this() = this(null)
+
+  def this(size: Int) = this(new Array[Any](size))
+
+  override def setBoolean(ordinal: Int, value: Boolean): Unit = { values(ordinal) = value }
+  override def setByte(ordinal: Int, value: Byte): Unit = { values(ordinal) = value }
+  override def setDouble(ordinal: Int, value: Double): Unit = { values(ordinal) = value }
+  override def setFloat(ordinal: Int, value: Float): Unit = { values(ordinal) = value }
+  override def setInt(ordinal: Int, value: Int): Unit = { values(ordinal) = value }
+  override def setLong(ordinal: Int, value: Long): Unit = { values(ordinal) = value }
+  override def setString(ordinal: Int, value: String): Unit = { values(ordinal) = value }
+
+  override def setNullAt(i: Int): Unit = { values(i) = null }
+
+  override def setShort(ordinal: Int, value: Short): Unit = { values(ordinal) = value }
+
+  override def update(ordinal: Int, value: Any): Unit = { values(ordinal) = value }
+
+  override def copy() = new GenericRow(values.clone())
+}
+
+
+class RowOrdering(ordering: Seq[SortOrder]) extends Ordering[Row] {
+  def this(ordering: Seq[SortOrder], inputSchema: Seq[Attribute]) =
+    this(ordering.map(BindReferences.bindReference(_, inputSchema)))
+
+  def compare(a: Row, b: Row): Int = {
+    var i = 0
+    while (i < ordering.size) {
+      val order = ordering(i)
+      val left = order.child.eval(a)
+      val right = order.child.eval(b)
+
+      if (left == null && right == null) {
+        // Both null, continue looking.
+      } else if (left == null) {
+        return if (order.direction == Ascending) -1 else 1
+      } else if (right == null) {
+        return if (order.direction == Ascending) 1 else -1
+      } else {
+        val comparison = order.dataType match {
+          case n: NativeType if order.direction == Ascending =>
+            n.ordering.asInstanceOf[Ordering[Any]].compare(left, right)
+          case n: NativeType if order.direction == Descending =>
+            n.ordering.asInstanceOf[Ordering[Any]].reverse.compare(left, right)
+        }
+        if (comparison != 0) return comparison
+      }
+      i += 1
+    }
+    return 0
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/sets.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/sets.scala
index 3d4c4a8853c12..3a5bdca1f07c3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/sets.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/sets.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 import org.apache.spark.util.collection.OpenHashSet
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index c2a3a5ca3ca8b..f85ee0a9bb6d8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -23,7 +23,7 @@ import scala.collection.IndexedSeqOptimized
 
 
 import org.apache.spark.sql.catalyst.analysis.UnresolvedException
-import org.apache.spark.sql.catalyst.types.{BinaryType, BooleanType, DataType, StringType}
+import org.apache.spark.sql.types.{BinaryType, BooleanType, DataType, StringType}
 
 trait StringRegexExpression {
   self: BinaryExpression =>
@@ -102,31 +102,27 @@ case class Like(left: Expression, right: Expression)
 
   // replace the _ with .{1} exactly match 1 time of any character
   // replace the % with .*, match 0 or more times with any character
-  override def escape(v: String) = {
-    val sb = new StringBuilder()
-    var i = 0;
-    while (i < v.length) {
-      // Make a special case for "\\_" and "\\%"
-      val n = v.charAt(i);
-      if (n == '\\' && i + 1 < v.length && (v.charAt(i + 1) == '_' || v.charAt(i + 1) == '%')) {
-        sb.append(v.charAt(i + 1))
-        i += 1
-      } else {
-        if (n == '_') {
-          sb.append(".");
-        } else if (n == '%') {
-          sb.append(".*");
-        } else {
-          sb.append(Pattern.quote(Character.toString(n)));
-        }
-      }
-
-      i += 1
+  override def escape(v: String) =
+    if (!v.isEmpty) {
+      "(?s)" + (' ' +: v.init).zip(v).flatMap {
+        case (prev, '\\') => ""
+        case ('\\', c) =>
+          c match {
+            case '_' => "_"
+            case '%' => "%"
+            case _ => Pattern.quote("\\" + c)
+          }
+        case (prev, c) =>
+          c match {
+            case '_' => "."
+            case '%' => ".*"
+            case _ => Pattern.quote(Character.toString(c))
+          }
+      }.mkString
+    } else {
+      v
     }
 
-    sb.toString()
-  }
-
   override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).matches()
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 9ce7c78195830..376a9f36568a7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.plans.RightOuter
 import org.apache.spark.sql.catalyst.plans.LeftSemi
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 abstract class Optimizer extends RuleExecutor[LogicalPlan]
 
@@ -43,6 +43,8 @@ object DefaultOptimizer extends Optimizer {
       SimplifyCasts,
       SimplifyCaseConversionExpressions,
       OptimizeIn) ::
+    Batch("Decimal Optimizations", FixedPoint(100),
+      DecimalAggregates) ::
     Batch("Filter Pushdown", FixedPoint(100),
       UnionPushdown,
       CombineFilters,
@@ -139,16 +141,16 @@ object ColumnPruning extends Rule[LogicalPlan] {
     case Project(projectList1, Project(projectList2, child)) =>
       // Create a map of Aliases to their values from the child projection.
       // e.g., 'SELECT ... FROM (SELECT a + b AS c, d ...)' produces Map(c -> Alias(a + b, c)).
-      val aliasMap = projectList2.collect {
-        case a @ Alias(e, _) => (a.toAttribute: Expression, a)
-      }.toMap
+      val aliasMap = AttributeMap(projectList2.collect {
+        case a @ Alias(e, _) => (a.toAttribute, a)
+      })
 
       // Substitute any attributes that are produced by the child projection, so that we safely
       // eliminate it.
       // e.g., 'SELECT c + 1 FROM (SELECT a + b AS C ...' produces 'SELECT a + b + 1 ...'
       // TODO: Fix TransformBase to avoid the cast below.
       val substitutedProjection = projectList1.map(_.transform {
-        case a if aliasMap.contains(a) => aliasMap(a)
+        case a: Attribute if aliasMap.contains(a) => aliasMap(a)
       }).asInstanceOf[Seq[NamedExpression]]
 
       Project(substitutedProjection, child)
@@ -200,8 +202,6 @@ object NullPropagation extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case q: LogicalPlan => q transformExpressionsUp {
       case e @ Count(Literal(null, _)) => Cast(Literal(0L), e.dataType)
-      case e @ Sum(Literal(c, _)) if c == 0 => Cast(Literal(0L), e.dataType)
-      case e @ Average(Literal(c, _)) if c == 0 => Literal(0.0, e.dataType)
       case e @ IsNull(c) if !c.nullable => Literal(false, BooleanType)
       case e @ IsNotNull(c) if !c.nullable => Literal(true, BooleanType)
       case e @ GetItem(Literal(null, _), _) => Literal(null, e.dataType)
@@ -209,6 +209,7 @@ object NullPropagation extends Rule[LogicalPlan] {
       case e @ GetField(Literal(null, _), _) => Literal(null, e.dataType)
       case e @ EqualNullSafe(Literal(null, _), r) => IsNull(r)
       case e @ EqualNullSafe(l, Literal(null, _)) => IsNull(l)
+      case e @ Count(expr) if !expr.nullable => Count(Literal(1))
 
       // For Coalesce, remove null literals.
       case e @ Coalesce(children) =>
@@ -286,50 +287,117 @@ object OptimizeIn extends Rule[LogicalPlan] {
     case q: LogicalPlan => q transformExpressionsDown {
       case In(v, list) if !list.exists(!_.isInstanceOf[Literal]) =>
           val hSet = list.map(e => e.eval(null))
-          InSet(v, HashSet() ++ hSet, v +: list)
+          InSet(v, HashSet() ++ hSet)
     }
   }
 }
 
 /**
- * Simplifies boolean expressions where the answer can be determined without evaluating both sides.
- * Note that this rule can eliminate expressions that might otherwise have been evaluated and thus
- * is only safe when evaluations of expressions does not result in side effects.
+ * Simplifies boolean expressions:
+ * 1. Simplifies expressions whose answer can be determined without evaluating both sides.
+ * 2. Eliminates / extracts common factors.
+ * 3. Merge same expressions
+ * 4. Removes `Not` operator.
  */
-object BooleanSimplification extends Rule[LogicalPlan] {
+object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case q: LogicalPlan => q transformExpressionsUp {
-      case and @ And(left, right) =>
-        (left, right) match {
-          case (Literal(true, BooleanType), r) => r
-          case (l, Literal(true, BooleanType)) => l
-          case (Literal(false, BooleanType), _) => Literal(false)
-          case (_, Literal(false, BooleanType)) => Literal(false)
-          case (_, _) => and
-        }
-
-      case or @ Or(left, right) =>
-        (left, right) match {
-          case (Literal(true, BooleanType), _) => Literal(true)
-          case (_, Literal(true, BooleanType)) => Literal(true)
-          case (Literal(false, BooleanType), r) => r
-          case (l, Literal(false, BooleanType)) => l
-          case (_, _) => or
-        }
-
-      case not @ Not(exp) =>
-        exp match {
-          case Literal(true, BooleanType) => Literal(false)
-          case Literal(false, BooleanType) => Literal(true)
-          case GreaterThan(l, r) => LessThanOrEqual(l, r)
-          case GreaterThanOrEqual(l, r) => LessThan(l, r)
-          case LessThan(l, r) => GreaterThanOrEqual(l, r)
-          case LessThanOrEqual(l, r) => GreaterThan(l, r)
-          case Not(e) => e
-          case _ => not
-        }
-
-      // Turn "if (true) a else b" into "a", and if (false) a else b" into "b".
+      case and @ And(left, right) => (left, right) match {
+        // true && r  =>  r
+        case (Literal(true, BooleanType), r) => r
+        // l && true  =>  l
+        case (l, Literal(true, BooleanType)) => l
+        // false && r  =>  false
+        case (Literal(false, BooleanType), _) => Literal(false)
+        // l && false  =>  false
+        case (_, Literal(false, BooleanType)) => Literal(false)
+        // a && a  =>  a
+        case (l, r) if l fastEquals r => l
+        // (a || b) && (a || c)  =>  a || (b && c)
+        case _ =>
+          // 1. Split left and right to get the disjunctive predicates,
+          //   i.e. lhsSet = (a, b), rhsSet = (a, c)
+          // 2. Find the common predict between lhsSet and rhsSet, i.e. common = (a)
+          // 3. Remove common predict from lhsSet and rhsSet, i.e. ldiff = (b), rdiff = (c)
+          // 4. Apply the formula, get the optimized predicate: common || (ldiff && rdiff)
+          val lhsSet = splitDisjunctivePredicates(left).toSet
+          val rhsSet = splitDisjunctivePredicates(right).toSet
+          val common = lhsSet.intersect(rhsSet)
+          if (common.isEmpty) {
+            // No common factors, return the original predicate
+            and
+          } else {
+            val ldiff = lhsSet.diff(common)
+            val rdiff = rhsSet.diff(common)
+            if (ldiff.isEmpty || rdiff.isEmpty) {
+              // (a || b || c || ...) && (a || b) => (a || b)
+              common.reduce(Or)
+            } else {
+              // (a || b || c || ...) && (a || b || d || ...) =>
+              // ((c || ...) && (d || ...)) || a || b
+              (common + And(ldiff.reduce(Or), rdiff.reduce(Or))).reduce(Or)
+            }
+          }
+      }  // end of And(left, right)
+
+      case or @ Or(left, right) => (left, right) match {
+        // true || r  =>  true
+        case (Literal(true, BooleanType), _) => Literal(true)
+        // r || true  =>  true
+        case (_, Literal(true, BooleanType)) => Literal(true)
+        // false || r  =>  r
+        case (Literal(false, BooleanType), r) => r
+        // l || false  =>  l
+        case (l, Literal(false, BooleanType)) => l
+        // a || a => a
+        case (l, r) if l fastEquals r => l
+        // (a && b) || (a && c)  =>  a && (b || c)
+        case _ =>
+           // 1. Split left and right to get the conjunctive predicates,
+           //   i.e.  lhsSet = (a, b), rhsSet = (a, c)
+           // 2. Find the common predict between lhsSet and rhsSet, i.e. common = (a)
+           // 3. Remove common predict from lhsSet and rhsSet, i.e. ldiff = (b), rdiff = (c)
+           // 4. Apply the formula, get the optimized predicate: common && (ldiff || rdiff)
+          val lhsSet = splitConjunctivePredicates(left).toSet
+          val rhsSet = splitConjunctivePredicates(right).toSet
+          val common = lhsSet.intersect(rhsSet)
+          if (common.isEmpty) {
+            // No common factors, return the original predicate
+            or
+          } else {
+            val ldiff = lhsSet.diff(common)
+            val rdiff = rhsSet.diff(common)
+            if (ldiff.isEmpty || rdiff.isEmpty) {
+              // (a && b) || (a && b && c && ...) => a && b
+              common.reduce(And)
+            } else {
+              // (a && b && c && ...) || (a && b && d && ...) =>
+              // ((c && ...) || (d && ...)) && a && b
+              (common + Or(ldiff.reduce(And), rdiff.reduce(And))).reduce(And)
+            }
+          }
+      }  // end of Or(left, right)
+
+      case not @ Not(exp) => exp match {
+        // not(true)  =>  false
+        case Literal(true, BooleanType) => Literal(false)
+        // not(false)  =>  true
+        case Literal(false, BooleanType) => Literal(true)
+        // not(l > r)  =>  l <= r
+        case GreaterThan(l, r) => LessThanOrEqual(l, r)
+        // not(l >= r)  =>  l < r
+        case GreaterThanOrEqual(l, r) => LessThan(l, r)
+        // not(l < r)  =>  l >= r
+        case LessThan(l, r) => GreaterThanOrEqual(l, r)
+        // not(l <= r)  =>  l > r
+        case LessThanOrEqual(l, r) => GreaterThan(l, r)
+        // not(not(e))  =>  e
+        case Not(e) => e
+        case _ => not
+      }  // end of Not(exp)
+
+      // if (true) a else b  =>  a
+      // if (false) a else b  =>  b
       case e @ If(Literal(v, _), trueValue, falseValue) => if (v == true) trueValue else falseValue
     }
   }
@@ -390,9 +458,9 @@ object PushPredicateThroughProject extends Rule[LogicalPlan] {
  * evaluated using only the attributes of the left or right side of a join.  Other
  * [[Filter]] conditions are moved into the `condition` of the [[Join]].
  *
- * And also Pushes down the join filter, where the `condition` can be evaluated using only the 
- * attributes of the left or right side of sub query when applicable. 
- * 
+ * And also Pushes down the join filter, where the `condition` can be evaluated using only the
+ * attributes of the left or right side of sub query when applicable.
+ *
  * Check https://cwiki.apache.org/confluence/display/Hive/OuterJoinBehavior for more details
  */
 object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
@@ -404,7 +472,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
   private def split(condition: Seq[Expression], left: LogicalPlan, right: LogicalPlan) = {
     val (leftEvaluateCondition, rest) =
         condition.partition(_.references subsetOf left.outputSet)
-    val (rightEvaluateCondition, commonCondition) = 
+    val (rightEvaluateCondition, commonCondition) =
         rest.partition(_.references subsetOf right.outputSet)
 
     (leftEvaluateCondition, rightEvaluateCondition, commonCondition)
@@ -413,7 +481,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     // push the where condition down into join filter
     case f @ Filter(filterCondition, Join(left, right, joinType, joinCondition)) =>
-      val (leftFilterConditions, rightFilterConditions, commonFilterCondition) = 
+      val (leftFilterConditions, rightFilterConditions, commonFilterCondition) =
         split(splitConjunctivePredicates(filterCondition), left, right)
 
       joinType match {
@@ -451,7 +519,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
 
     // push down the join filter into sub query scanning if applicable
     case f @ Join(left, right, joinType, joinCondition) =>
-      val (leftJoinConditions, rightJoinConditions, commonJoinCondition) = 
+      val (leftJoinConditions, rightJoinConditions, commonJoinCondition) =
         split(joinCondition.map(splitConjunctivePredicates).getOrElse(Nil), left, right)
 
       joinType match {
@@ -519,3 +587,26 @@ object SimplifyCaseConversionExpressions extends Rule[LogicalPlan] {
     }
   }
 }
+
+/**
+ * Speeds up aggregates on fixed-precision decimals by executing them on unscaled Long values.
+ *
+ * This uses the same rules for increasing the precision and scale of the output as
+ * [[org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion.DecimalPrecision]].
+ */
+object DecimalAggregates extends Rule[LogicalPlan] {
+  import Decimal.MAX_LONG_DIGITS
+
+  /** Maximum number of decimal digits representable precisely in a Double */
+  val MAX_DOUBLE_DIGITS = 15
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
+    case Sum(e @ DecimalType.Expression(prec, scale)) if prec + 10 <= MAX_LONG_DIGITS =>
+      MakeDecimal(Sum(UnscaledValue(e)), prec + 10, scale)
+
+    case Average(e @ DecimalType.Expression(prec, scale)) if prec + 4 <= MAX_DOUBLE_DIGITS =>
+      Cast(
+        Divide(Average(UnscaledValue(e)), Literal(math.pow(10.0, scale), DoubleType)),
+        DecimalType(prec + 4, scale + 4))
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala
index bdd07bbeb2230..105cdf52500c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala
@@ -17,12 +17,16 @@
 
 package org.apache.spark.sql
 
+/**
+ * Catalyst is a library for manipulating relational query plans.  All classes in catalyst are
+ * considered an internal API to Spark SQL and are subject to change between minor releases.
+ */
 package object catalyst {
   /**
    * A JVM-global lock that should be used to prevent thread safety issues when using things in
    * scala.reflect.*.  Note that Scala Reflection API is made thread-safe in 2.11, but not yet for
    * 2.10.* builds.  See SI-6240 for more details.
    */
-  protected[catalyst] object ScalaReflectionLock
+  protected[sql] object ScalaReflectionLock
 
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
index 5839c9f7c43ef..51b5699affed5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
@@ -21,6 +21,15 @@ import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.trees.TreeNode
 
+/**
+ * Given a [[plans.logical.LogicalPlan LogicalPlan]], returns a list of `PhysicalPlan`s that can
+ * be used for execution. If this strategy does not apply to the give logical operation then an
+ * empty list should be returned.
+ */
+abstract class GenericStrategy[PhysicalPlan <: TreeNode[PhysicalPlan]] extends Logging {
+  def apply(plan: LogicalPlan): Seq[PhysicalPlan]
+}
+
 /**
  * Abstract class for transforming [[plans.logical.LogicalPlan LogicalPlan]]s into physical plans.
  * Child classes are responsible for specifying a list of [[Strategy]] objects that each of which
@@ -35,16 +44,7 @@ import org.apache.spark.sql.catalyst.trees.TreeNode
  */
 abstract class QueryPlanner[PhysicalPlan <: TreeNode[PhysicalPlan]] {
   /** A list of execution strategies that can be used by the planner */
-  def strategies: Seq[Strategy]
-
-  /**
-   * Given a [[plans.logical.LogicalPlan LogicalPlan]], returns a list of `PhysicalPlan`s that can
-   * be used for execution. If this strategy does not apply to the give logical operation then an
-   * empty list should be returned.
-   */
-  abstract protected class Strategy extends Logging {
-    def apply(plan: LogicalPlan): Seq[PhysicalPlan]
-  }
+  def strategies: Seq[GenericStrategy[PhysicalPlan]]
 
   /**
    * Returns a placeholder for a physical plan that executes `plan`. This placeholder will be
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
index f0fd9a8b9a46e..b4c445b3badf1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -141,18 +141,26 @@ object PartialAggregation {
         // We need to pass all grouping expressions though so the grouping can happen a second
         // time. However some of them might be unnamed so we alias them allowing them to be
         // referenced in the second aggregation.
-        val namedGroupingExpressions: Map[Expression, NamedExpression] = groupingExpressions.map {
-          case n: NamedExpression => (n, n)
-          case other => (other, Alias(other, "PartialGroup")())
-        }.toMap
+        val namedGroupingExpressions: Map[Expression, NamedExpression] =
+          groupingExpressions.filter(!_.isInstanceOf[Literal]).map {
+            case n: NamedExpression => (n, n)
+            case other => (other, Alias(other, "PartialGroup")())
+          }.toMap
 
         // Replace aggregations with a new expression that computes the result from the already
         // computed partial evaluations and grouping values.
         val rewrittenAggregateExpressions = aggregateExpressions.map(_.transformUp {
           case e: Expression if partialEvaluations.contains(new TreeNodeRef(e)) =>
             partialEvaluations(new TreeNodeRef(e)).finalEvaluation
-          case e: Expression if namedGroupingExpressions.contains(e) =>
-            namedGroupingExpressions(e).toAttribute
+
+          case e: Expression =>
+            // Should trim aliases around `GetField`s. These aliases are introduced while
+            // resolving struct field accesses, because `GetField` is not a `NamedExpression`.
+            // (Should we just turn `GetField` into a `NamedExpression`?)
+            namedGroupingExpressions
+              .get(e.transform { case Alias(g: GetField, _) => g })
+              .map(_.toAttribute)
+              .getOrElse(e)
         }).asInstanceOf[Seq[NamedExpression]]
 
         val partialComputation =
@@ -188,7 +196,7 @@ object ExtractEquiJoinKeys extends Logging with PredicateHelper {
       logDebug(s"Considering join on: $condition")
       // Find equi-join predicates that can be evaluated before the join, and thus can be used
       // as join keys.
-      val (joinPredicates, otherPredicates) = 
+      val (joinPredicates, otherPredicates) =
         condition.map(splitConjunctivePredicates).getOrElse(Nil).partition {
           case EqualTo(l, r) if (canEvaluate(l, left) && canEvaluate(r, right)) ||
             (canEvaluate(l, right) && canEvaluate(r, left)) => true
@@ -203,7 +211,7 @@ object ExtractEquiJoinKeys extends Logging with PredicateHelper {
       val rightKeys = joinKeys.map(_._2)
 
       if (joinKeys.nonEmpty) {
-        logDebug(s"leftKeys:${leftKeys} | rightKeys:${rightKeys}")
+        logDebug(s"leftKeys:$leftKeys | rightKeys:$rightKeys")
         Some((joinType, leftKeys, rightKeys, otherPredicates.reduceOption(And), left, right))
       } else {
         None
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index dcbbb62c0aca4..619f42859cbb8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.plans
 
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, Expression}
 import org.apache.spark.sql.catalyst.trees.TreeNode
-import org.apache.spark.sql.catalyst.types.{ArrayType, DataType, StructField, StructType}
+import org.apache.spark.sql.types.{ArrayType, DataType, StructField, StructType}
 
 abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanType] {
   self: PlanType with Product =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
index 613f4bb09daf5..5dc0539caec24 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
@@ -17,9 +17,24 @@
 
 package org.apache.spark.sql.catalyst.plans
 
+object JoinType {
+  def apply(typ: String): JoinType = typ.toLowerCase.replace("_", "") match {
+    case "inner" => Inner
+    case "outer" | "full" | "fullouter" => FullOuter
+    case "leftouter" | "left" => LeftOuter
+    case "rightouter" | "right" => RightOuter
+    case "leftsemi" => LeftSemi
+  }
+}
+
 sealed abstract class JoinType
+
 case object Inner extends JoinType
+
 case object LeftOuter extends JoinType
+
 case object RightOuter extends JoinType
+
 case object FullOuter extends JoinType
+
 case object LeftSemi extends JoinType
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
new file mode 100644
index 0000000000000..92bd057c6f4b6
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.analysis
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.types.{DataTypeConversions, StructType, StructField}
+
+object LocalRelation {
+  def apply(output: Attribute*): LocalRelation = new LocalRelation(output)
+
+  def apply(output1: StructField, output: StructField*): LocalRelation = {
+    new LocalRelation(StructType(output1 +: output).toAttributes)
+  }
+
+  def fromProduct(output: Seq[Attribute], data: Seq[Product]): LocalRelation = {
+    val schema = StructType.fromAttributes(output)
+    LocalRelation(output, data.map(row => DataTypeConversions.productToRow(row, schema)))
+  }
+}
+
+case class LocalRelation(output: Seq[Attribute], data: Seq[Row] = Nil)
+  extends LeafNode with analysis.MultiInstanceRelation {
+
+  /**
+   * Returns an identical copy of this relation with new exprIds for all attributes.  Different
+   * attributes are required when a relation is going to be included multiple times in the same
+   * query.
+   */
+  override final def newInstance(): this.type = {
+    LocalRelation(output.map(_.newInstance()), data).asInstanceOf[this.type]
+  }
+
+  override protected def stringArgs = Iterator(output)
+
+  override def sameResult(plan: LogicalPlan): Boolean = plan match {
+    case LocalRelation(otherOutput, otherData) =>
+      otherOutput.map(_.dataType) == output.map(_.dataType) && otherData == data
+    case _ => false
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 882e9c6110089..65ae066e4b4b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -23,28 +23,27 @@ import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.trees.TreeNode
-import org.apache.spark.sql.catalyst.types.StructType
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.catalyst.trees
 
+/**
+ * Estimates of various statistics.  The default estimation logic simply lazily multiplies the
+ * corresponding statistic produced by the children.  To override this behavior, override
+ * `statistics` and assign it an overriden version of `Statistics`.
+ *
+ * '''NOTE''': concrete and/or overriden versions of statistics fields should pay attention to the
+ * performance of the implementations.  The reason is that estimations might get triggered in
+ * performance-critical processes, such as query plan planning.
+ *
+ * @param sizeInBytes Physical size in bytes. For leaf operators this defaults to 1, otherwise it
+ *                    defaults to the product of children's `sizeInBytes`.
+ */
+private[sql] case class Statistics(sizeInBytes: BigInt)
+
 abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
   self: Product =>
 
-  /**
-   * Estimates of various statistics.  The default estimation logic simply lazily multiplies the
-   * corresponding statistic produced by the children.  To override this behavior, override
-   * `statistics` and assign it an overriden version of `Statistics`.
-   *
-   * '''NOTE''': concrete and/or overriden versions of statistics fields should pay attention to the
-   * performance of the implementations.  The reason is that estimations might get triggered in
-   * performance-critical processes, such as query plan planning.
-   *
-   * @param sizeInBytes Physical size in bytes. For leaf operators this defaults to 1, otherwise it
-   *                    defaults to the product of children's `sizeInBytes`.
-   */
-  case class Statistics(
-    sizeInBytes: BigInt
-  )
-  lazy val statistics: Statistics = {
+  def statistics: Statistics = {
     if (children.size == 0) {
       throw new UnsupportedOperationException(s"LeafNode $nodeName must implement statistics.")
     }
@@ -192,14 +191,13 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
       case (Nil, _) => expression
       case (requestedField :: rest, StructType(fields)) =>
         val actualField = fields.filter(f => resolver(f.name, requestedField))
-        actualField match {
-          case Seq() =>
-            sys.error(
-              s"No such struct field $requestedField in ${fields.map(_.name).mkString(", ")}")
-          case Seq(singleMatch) =>
-            resolveNesting(rest, GetField(expression, singleMatch.name), resolver)
-          case multipleMatches =>
-            sys.error(s"Ambiguous reference to fields ${multipleMatches.mkString(", ")}")
+        if (actualField.length == 0) {
+          sys.error(
+            s"No such struct field $requestedField in ${fields.map(_.name).mkString(", ")}")
+        } else if (actualField.length == 1) {
+          resolveNesting(rest, GetField(expression, actualField(0).name), resolver)
+        } else {
+          sys.error(s"Ambiguous reference to fields ${actualField.mkString(", ")}")
         }
       case (_, dt) => sys.error(s"Can't access nested field in type $dt")
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ScriptTransformation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ScriptTransformation.scala
index 4460c86ed9026..cfe2c7a39a17c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ScriptTransformation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ScriptTransformation.scala
@@ -25,9 +25,17 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
  * @param input the set of expression that should be passed to the script.
  * @param script the command that should be executed.
  * @param output the attributes that are produced by the script.
+ * @param ioschema the input and output schema applied in the execution of the script.
  */
 case class ScriptTransformation(
     input: Seq[Expression],
     script: String,
     output: Seq[Attribute],
-    child: LogicalPlan) extends UnaryNode
+    child: LogicalPlan,
+    ioschema: ScriptInputOutputSchema) extends UnaryNode
+
+/**
+ * A placeholder for implementation specific input and output properties when passing data
+ * to a script. For example, in Hive this would specify which SerDes to use.
+ */
+trait ScriptInputOutputSchema
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TestRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TestRelation.scala
deleted file mode 100644
index 19769986ef58c..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TestRelation.scala
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.analysis
-import org.apache.spark.sql.catalyst.expressions.Attribute
-
-object LocalRelation {
-  def apply(output: Attribute*) =
-    new LocalRelation(output)
-}
-
-case class LocalRelation(output: Seq[Attribute], data: Seq[Product] = Nil)
-  extends LeafNode with analysis.MultiInstanceRelation {
-
-  // TODO: Validate schema compliance.
-  def loadData(newData: Seq[Product]) = new LocalRelation(output, data ++ newData)
-
-  /**
-   * Returns an identical copy of this relation with new exprIds for all attributes.  Different
-   * attributes are required when a relation is going to be included multiple times in the same
-   * query.
-   */
-  override final def newInstance: this.type = {
-    LocalRelation(output.map(_.newInstance), data).asInstanceOf[this.type]
-  }
-
-  override protected def stringArgs = Iterator(output)
-
-  override def sameResult(plan: LogicalPlan): Boolean = plan match {
-    case LocalRelation(otherOutput, otherData) =>
-      otherOutput.map(_.dataType) == output.map(_.dataType) && otherData == data
-    case _ => false
-  }
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
index 14b03c7445c13..9628e93274a11 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.plans.logical
 
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 case class Project(projectList: Seq[NamedExpression], child: LogicalPlan) extends UnaryNode {
   def output = projectList.map(_.toAttribute)
@@ -114,12 +114,14 @@ case class InsertIntoTable(
   }
 }
 
-case class CreateTableAsSelect(
+case class CreateTableAsSelect[T](
     databaseName: Option[String],
     tableName: String,
-    child: LogicalPlan) extends UnaryNode {
-  override def output = child.output
-  override lazy val resolved = (databaseName != None && childrenResolved)
+    child: LogicalPlan,
+    allowExisting: Boolean,
+    desc: Option[T] = None) extends UnaryNode {
+  override def output = Seq.empty[Attribute]
+  override lazy val resolved = databaseName != None && childrenResolved
 }
 
 case class WriteToFile(
@@ -128,7 +130,16 @@ case class WriteToFile(
   override def output = child.output
 }
 
-case class Sort(order: Seq[SortOrder], child: LogicalPlan) extends UnaryNode {
+/**
+ * @param order  The ordering expressions 
+ * @param global True means global sorting apply for entire data set, 
+ *               False means sorting only apply within the partition.
+ * @param child  Child logical plan              
+ */
+case class Sort(
+    order: Seq[SortOrder],
+    global: Boolean,
+    child: LogicalPlan) extends UnaryNode {
   override def output = child.output
 }
 
@@ -141,19 +152,97 @@ case class Aggregate(
   override def output = aggregateExpressions.map(_.toAttribute)
 }
 
+/**
+ * Apply the all of the GroupExpressions to every input row, hence we will get
+ * multiple output rows for a input row.
+ * @param projections The group of expressions, all of the group expressions should
+ *                    output the same schema specified by the parameter `output`
+ * @param output      The output Schema
+ * @param child       Child operator
+ */
+case class Expand(
+    projections: Seq[GroupExpression],
+    output: Seq[Attribute],
+    child: LogicalPlan) extends UnaryNode
+
+trait GroupingAnalytics extends UnaryNode {
+  self: Product =>
+  def gid: AttributeReference
+  def groupByExprs: Seq[Expression]
+  def aggregations: Seq[NamedExpression]
+
+  override def output = aggregations.map(_.toAttribute)
+}
+
+/**
+ * A GROUP BY clause with GROUPING SETS can generate a result set equivalent
+ * to generated by a UNION ALL of multiple simple GROUP BY clauses.
+ *
+ * We will transform GROUPING SETS into logical plan Aggregate(.., Expand) in Analyzer
+ * @param bitmasks     A list of bitmasks, each of the bitmask indicates the selected
+ *                     GroupBy expressions
+ * @param groupByExprs The Group By expressions candidates, take effective only if the
+ *                     associated bit in the bitmask set to 1.
+ * @param child        Child operator
+ * @param aggregations The Aggregation expressions, those non selected group by expressions
+ *                     will be considered as constant null if it appears in the expressions
+ * @param gid          The attribute represents the virtual column GROUPING__ID, and it's also
+ *                     the bitmask indicates the selected GroupBy Expressions for each
+ *                     aggregating output row.
+ *                     The associated output will be one of the value in `bitmasks`
+ */
+case class GroupingSets(
+    bitmasks: Seq[Int],
+    groupByExprs: Seq[Expression],
+    child: LogicalPlan,
+    aggregations: Seq[NamedExpression],
+    gid: AttributeReference = VirtualColumn.newGroupingId) extends GroupingAnalytics
+
+/**
+ * Cube is a syntactic sugar for GROUPING SETS, and will be transformed to GroupingSets,
+ * and eventually will be transformed to Aggregate(.., Expand) in Analyzer
+ *
+ * @param groupByExprs The Group By expressions candidates.
+ * @param child        Child operator
+ * @param aggregations The Aggregation expressions, those non selected group by expressions
+ *                     will be considered as constant null if it appears in the expressions
+ * @param gid          The attribute represents the virtual column GROUPING__ID, and it's also
+ *                     the bitmask indicates the selected GroupBy Expressions for each
+ *                     aggregating output row.
+ */
+case class Cube(
+    groupByExprs: Seq[Expression],
+    child: LogicalPlan,
+    aggregations: Seq[NamedExpression],
+    gid: AttributeReference = VirtualColumn.newGroupingId) extends GroupingAnalytics
+
+/**
+ * Rollup is a syntactic sugar for GROUPING SETS, and will be transformed to GroupingSets,
+ * and eventually will be transformed to Aggregate(.., Expand) in Analyzer
+ *
+ * @param groupByExprs The Group By expressions candidates, take effective only if the
+ *                     associated bit in the bitmask set to 1.
+ * @param child        Child operator
+ * @param aggregations The Aggregation expressions, those non selected group by expressions
+ *                     will be considered as constant null if it appears in the expressions
+ * @param gid          The attribute represents the virtual column GROUPING__ID, and it's also
+ *                     the bitmask indicates the selected GroupBy Expressions for each
+ *                     aggregating output row.
+ */
+case class Rollup(
+    groupByExprs: Seq[Expression],
+    child: LogicalPlan,
+    aggregations: Seq[NamedExpression],
+    gid: AttributeReference = VirtualColumn.newGroupingId) extends GroupingAnalytics
+
 case class Limit(limitExpr: Expression, child: LogicalPlan) extends UnaryNode {
   override def output = child.output
 
-  override lazy val statistics: Statistics =
-    if (output.forall(_.dataType.isInstanceOf[NativeType])) {
-      val limit = limitExpr.eval(null).asInstanceOf[Int]
-      val sizeInBytes = (limit: Long) * output.map { a =>
-        NativeType.defaultSizeOf(a.dataType.asInstanceOf[NativeType])
-      }.sum
-      Statistics(sizeInBytes = sizeInBytes)
-    } else {
-      Statistics(sizeInBytes = children.map(_.statistics).map(_.sizeInBytes).product)
-    }
+  override lazy val statistics: Statistics = {
+    val limit = limitExpr.eval(null).asInstanceOf[Int]
+    val sizeInBytes = (limit: Long) * output.map(a => a.dataType.defaultSize).sum
+    Statistics(sizeInBytes = sizeInBytes)
+  }
 }
 
 case class Subquery(alias: String, child: LogicalPlan) extends UnaryNode {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
index b8ba2ee428a20..45905f8ef98c5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, BoundReference}
-import org.apache.spark.sql.catalyst.types.StringType
+import org.apache.spark.sql.catalyst.expressions.Attribute
 
 /**
  * A logical node that represents a non-query command to be executed by the system.  For example,
@@ -28,67 +27,3 @@ abstract class Command extends LeafNode {
   self: Product =>
   def output: Seq[Attribute] = Seq.empty
 }
-
-/**
- * Returned for commands supported by a given parser, but not catalyst.  In general these are DDL
- * commands that are passed directly to another system.
- */
-case class NativeCommand(cmd: String) extends Command {
-  override def output =
-    Seq(AttributeReference("result", StringType, nullable = false)())
-}
-
-/**
- * Commands of the form "SET [key [= value] ]".
- */
-case class SetCommand(kv: Option[(String, Option[String])]) extends Command {
-  override def output = Seq(
-    AttributeReference("", StringType, nullable = false)())
-}
-
-/**
- * Returned by a parser when the users only wants to see what query plan would be executed, without
- * actually performing the execution.
- */
-case class ExplainCommand(plan: LogicalPlan, extended: Boolean = false) extends Command {
-  override def output =
-    Seq(AttributeReference("plan", StringType, nullable = false)())
-}
-
-/**
- * Returned for the "CACHE TABLE tableName [AS SELECT ...]" command.
- */
-case class CacheTableCommand(tableName: String, plan: Option[LogicalPlan], isLazy: Boolean)
-  extends Command
-
-/**
- * Returned for the "UNCACHE TABLE tableName" command.
- */
-case class UncacheTableCommand(tableName: String) extends Command
-
-/**
- * Returned for the "DESCRIBE [EXTENDED] [dbName.]tableName" command.
- * @param table The table to be described.
- * @param isExtended True if "DESCRIBE EXTENDED" is used. Otherwise, false.
- *                   It is effective only when the table is a Hive table.
- */
-case class DescribeCommand(
-    table: LogicalPlan,
-    isExtended: Boolean) extends Command {
-  override def output = Seq(
-    // Column names are based on Hive.
-    AttributeReference("col_name", StringType, nullable = false)(),
-    AttributeReference("data_type", StringType, nullable = false)(),
-    AttributeReference("comment", StringType, nullable = false)())
-}
-
-/**
- * Returned for the "! shellCommand" command
- */
-case class ShellCommand(cmd: String) extends Command
-
-
-/**
- * Returned for the "SOURCE file" command
- */
-case class SourceCommand(filePath: String) extends Command
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index ccb0df113c063..3c3d7a3119064 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.plans.physical
 
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions.{Expression, Row, SortOrder}
-import org.apache.spark.sql.catalyst.types.IntegerType
+import org.apache.spark.sql.types.IntegerType
 
 /**
  * Specifies how tuples that share common expressions will be distributed when a query is executed
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
index d192b151ac1c3..c441f0bf24d85 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
@@ -79,7 +79,8 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
         }
 
         if (curPlan.fastEquals(lastPlan)) {
-          logTrace(s"Fixed point reached for batch ${batch.name} after $iteration iterations.")
+          logTrace(
+            s"Fixed point reached for batch ${batch.name} after ${iteration - 1} iterations.")
           continue = false
         }
         lastPlan = curPlan
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala
deleted file mode 100644
index 0cf139ebde417..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala
+++ /dev/null
@@ -1,494 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.types
-
-import java.sql.{Date, Timestamp}
-
-import scala.math.Numeric.{BigDecimalAsIfIntegral, DoubleAsIfIntegral, FloatAsIfIntegral}
-import scala.reflect.ClassTag
-import scala.reflect.runtime.universe.{TypeTag, runtimeMirror, typeTag}
-import scala.util.parsing.combinator.RegexParsers
-
-import org.json4s.JsonAST.JValue
-import org.json4s._
-import org.json4s.JsonDSL._
-import org.json4s.jackson.JsonMethods._
-
-import org.apache.spark.sql.catalyst.ScalaReflectionLock
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression}
-import org.apache.spark.util.Utils
-
-
-object DataType {
-  def fromJson(json: String): DataType = parseDataType(parse(json))
-
-  private object JSortedObject {
-    def unapplySeq(value: JValue): Option[List[(String, JValue)]] = value match {
-      case JObject(seq) => Some(seq.toList.sortBy(_._1))
-      case _ => None
-    }
-  }
-
-  // NOTE: Map fields must be sorted in alphabetical order to keep consistent with the Python side.
-  private def parseDataType(json: JValue): DataType = json match {
-    case JString(name) =>
-      PrimitiveType.nameToType(name)
-
-    case JSortedObject(
-        ("containsNull", JBool(n)),
-        ("elementType", t: JValue),
-        ("type", JString("array"))) =>
-      ArrayType(parseDataType(t), n)
-
-    case JSortedObject(
-        ("keyType", k: JValue),
-        ("type", JString("map")),
-        ("valueContainsNull", JBool(n)),
-        ("valueType", v: JValue)) =>
-      MapType(parseDataType(k), parseDataType(v), n)
-
-    case JSortedObject(
-        ("fields", JArray(fields)),
-        ("type", JString("struct"))) =>
-      StructType(fields.map(parseStructField))
-  }
-
-  private def parseStructField(json: JValue): StructField = json match {
-    case JSortedObject(
-        ("name", JString(name)),
-        ("nullable", JBool(nullable)),
-        ("type", dataType: JValue)) =>
-      StructField(name, parseDataType(dataType), nullable)
-  }
-
-  @deprecated("Use DataType.fromJson instead")
-  def fromCaseClassString(string: String): DataType = CaseClassStringParser(string)
-
-  private object CaseClassStringParser extends RegexParsers {
-    protected lazy val primitiveType: Parser[DataType] =
-      ( "StringType" ^^^ StringType
-      | "FloatType" ^^^ FloatType
-      | "IntegerType" ^^^ IntegerType
-      | "ByteType" ^^^ ByteType
-      | "ShortType" ^^^ ShortType
-      | "DoubleType" ^^^ DoubleType
-      | "LongType" ^^^ LongType
-      | "BinaryType" ^^^ BinaryType
-      | "BooleanType" ^^^ BooleanType
-      | "DecimalType" ^^^ DecimalType
-      | "TimestampType" ^^^ TimestampType
-      )
-
-    protected lazy val arrayType: Parser[DataType] =
-      "ArrayType" ~> "(" ~> dataType ~ "," ~ boolVal <~ ")" ^^ {
-        case tpe ~ _ ~ containsNull => ArrayType(tpe, containsNull)
-      }
-
-    protected lazy val mapType: Parser[DataType] =
-      "MapType" ~> "(" ~> dataType ~ "," ~ dataType ~ "," ~ boolVal <~ ")" ^^ {
-        case t1 ~ _ ~ t2 ~ _ ~ valueContainsNull => MapType(t1, t2, valueContainsNull)
-      }
-
-    protected lazy val structField: Parser[StructField] =
-      ("StructField(" ~> "[a-zA-Z0-9_]*".r) ~ ("," ~> dataType) ~ ("," ~> boolVal <~ ")") ^^ {
-        case name ~ tpe ~ nullable  =>
-          StructField(name, tpe, nullable = nullable)
-      }
-
-    protected lazy val boolVal: Parser[Boolean] =
-      ( "true" ^^^ true
-      | "false" ^^^ false
-      )
-
-    protected lazy val structType: Parser[DataType] =
-      "StructType\\([A-zA-z]*\\(".r ~> repsep(structField, ",") <~ "))" ^^ {
-        case fields => new StructType(fields)
-      }
-
-    protected lazy val dataType: Parser[DataType] =
-      ( arrayType
-      | mapType
-      | structType
-      | primitiveType
-      )
-
-    /**
-     * Parses a string representation of a DataType.
-     *
-     * TODO: Generate parser as pickler...
-     */
-    def apply(asString: String): DataType = parseAll(dataType, asString) match {
-      case Success(result, _) => result
-      case failure: NoSuccess =>
-        throw new IllegalArgumentException(s"Unsupported dataType: $asString, $failure")
-    }
-
-  }
-
-  protected[types] def buildFormattedString(
-      dataType: DataType,
-      prefix: String,
-      builder: StringBuilder): Unit = {
-    dataType match {
-      case array: ArrayType =>
-        array.buildFormattedString(prefix, builder)
-      case struct: StructType =>
-        struct.buildFormattedString(prefix, builder)
-      case map: MapType =>
-        map.buildFormattedString(prefix, builder)
-      case _ =>
-    }
-  }
-}
-
-abstract class DataType {
-  /** Matches any expression that evaluates to this DataType */
-  def unapply(a: Expression): Boolean = a match {
-    case e: Expression if e.dataType == this => true
-    case _ => false
-  }
-
-  def isPrimitive: Boolean = false
-
-  def typeName: String = this.getClass.getSimpleName.stripSuffix("$").dropRight(4).toLowerCase
-
-  private[sql] def jsonValue: JValue = typeName
-
-  def json: String = compact(render(jsonValue))
-
-  def prettyJson: String = pretty(render(jsonValue))
-}
-
-case object NullType extends DataType
-
-object NativeType {
-  val all = Seq(
-    IntegerType, BooleanType, LongType, DoubleType, FloatType, ShortType, ByteType, StringType)
-
-  def unapply(dt: DataType): Boolean = all.contains(dt)
-
-  val defaultSizeOf: Map[NativeType, Int] = Map(
-    IntegerType -> 4,
-    BooleanType -> 1,
-    LongType -> 8,
-    DoubleType -> 8,
-    FloatType -> 4,
-    ShortType -> 2,
-    ByteType -> 1,
-    StringType -> 4096)
-}
-
-trait PrimitiveType extends DataType {
-  override def isPrimitive = true
-}
-
-object PrimitiveType {
-  private[sql] val all = Seq(DecimalType, TimestampType, BinaryType) ++ NativeType.all
-
-  private[sql] val nameToType = all.map(t => t.typeName -> t).toMap
-}
-
-abstract class NativeType extends DataType {
-  private[sql] type JvmType
-  @transient private[sql] val tag: TypeTag[JvmType]
-  private[sql] val ordering: Ordering[JvmType]
-
-  @transient private[sql] val classTag = ScalaReflectionLock.synchronized {
-    val mirror = runtimeMirror(Utils.getSparkClassLoader)
-    ClassTag[JvmType](mirror.runtimeClass(tag.tpe))
-  }
-}
-
-case object StringType extends NativeType with PrimitiveType {
-  private[sql] type JvmType = String
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
-  private[sql] val ordering = implicitly[Ordering[JvmType]]
-}
-
-case object BinaryType extends NativeType with PrimitiveType {
-  private[sql] type JvmType = Array[Byte]
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
-  private[sql] val ordering = new Ordering[JvmType] {
-    def compare(x: Array[Byte], y: Array[Byte]): Int = {
-      for (i <- 0 until x.length; if i < y.length) {
-        val res = x(i).compareTo(y(i))
-        if (res != 0) return res
-      }
-      x.length - y.length
-    }
-  }
-}
-
-case object BooleanType extends NativeType with PrimitiveType {
-  private[sql] type JvmType = Boolean
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
-  private[sql] val ordering = implicitly[Ordering[JvmType]]
-}
-
-case object TimestampType extends NativeType {
-  private[sql] type JvmType = Timestamp
-
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
-
-  private[sql] val ordering = new Ordering[JvmType] {
-    def compare(x: Timestamp, y: Timestamp) = x.compareTo(y)
-  }
-}
-
-case object DateType extends NativeType {
-  private[sql] type JvmType = Date
-
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
-
-  private[sql] val ordering = new Ordering[JvmType] {
-    def compare(x: Date, y: Date) = x.compareTo(y)
-  }
-}
-
-abstract class NumericType extends NativeType with PrimitiveType {
-  // Unfortunately we can't get this implicitly as that breaks Spark Serialization. In order for
-  // implicitly[Numeric[JvmType]] to be valid, we have to change JvmType from a type variable to a
-  // type parameter and and add a numeric annotation (i.e., [JvmType : Numeric]). This gets
-  // desugared by the compiler into an argument to the objects constructor. This means there is no
-  // longer an no argument constructor and thus the JVM cannot serialize the object anymore.
-  private[sql] val numeric: Numeric[JvmType]
-}
-
-object NumericType {
-  def unapply(e: Expression): Boolean = e.dataType.isInstanceOf[NumericType]
-}
-
-/** Matcher for any expressions that evaluate to [[IntegralType]]s */
-object IntegralType {
-  def unapply(a: Expression): Boolean = a match {
-    case e: Expression if e.dataType.isInstanceOf[IntegralType] => true
-    case _ => false
-  }
-}
-
-abstract class IntegralType extends NumericType {
-  private[sql] val integral: Integral[JvmType]
-}
-
-case object LongType extends IntegralType {
-  private[sql] type JvmType = Long
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
-  private[sql] val numeric = implicitly[Numeric[Long]]
-  private[sql] val integral = implicitly[Integral[Long]]
-  private[sql] val ordering = implicitly[Ordering[JvmType]]
-}
-
-case object IntegerType extends IntegralType {
-  private[sql] type JvmType = Int
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
-  private[sql] val numeric = implicitly[Numeric[Int]]
-  private[sql] val integral = implicitly[Integral[Int]]
-  private[sql] val ordering = implicitly[Ordering[JvmType]]
-}
-
-case object ShortType extends IntegralType {
-  private[sql] type JvmType = Short
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
-  private[sql] val numeric = implicitly[Numeric[Short]]
-  private[sql] val integral = implicitly[Integral[Short]]
-  private[sql] val ordering = implicitly[Ordering[JvmType]]
-}
-
-case object ByteType extends IntegralType {
-  private[sql] type JvmType = Byte
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
-  private[sql] val numeric = implicitly[Numeric[Byte]]
-  private[sql] val integral = implicitly[Integral[Byte]]
-  private[sql] val ordering = implicitly[Ordering[JvmType]]
-}
-
-/** Matcher for any expressions that evaluate to [[FractionalType]]s */
-object FractionalType {
-  def unapply(a: Expression): Boolean = a match {
-    case e: Expression if e.dataType.isInstanceOf[FractionalType] => true
-    case _ => false
-  }
-}
-abstract class FractionalType extends NumericType {
-  private[sql] val fractional: Fractional[JvmType]
-  private[sql] val asIntegral: Integral[JvmType]
-}
-
-case object DecimalType extends FractionalType {
-  private[sql] type JvmType = BigDecimal
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
-  private[sql] val numeric = implicitly[Numeric[BigDecimal]]
-  private[sql] val fractional = implicitly[Fractional[BigDecimal]]
-  private[sql] val ordering = implicitly[Ordering[JvmType]]
-  private[sql] val asIntegral = BigDecimalAsIfIntegral
-}
-
-case object DoubleType extends FractionalType {
-  private[sql] type JvmType = Double
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
-  private[sql] val numeric = implicitly[Numeric[Double]]
-  private[sql] val fractional = implicitly[Fractional[Double]]
-  private[sql] val ordering = implicitly[Ordering[JvmType]]
-  private[sql] val asIntegral = DoubleAsIfIntegral
-}
-
-case object FloatType extends FractionalType {
-  private[sql] type JvmType = Float
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
-  private[sql] val numeric = implicitly[Numeric[Float]]
-  private[sql] val fractional = implicitly[Fractional[Float]]
-  private[sql] val ordering = implicitly[Ordering[JvmType]]
-  private[sql] val asIntegral = FloatAsIfIntegral
-}
-
-object ArrayType {
-  /** Construct a [[ArrayType]] object with the given element type. The `containsNull` is true. */
-  def apply(elementType: DataType): ArrayType = ArrayType(elementType, true)
-}
-
-/**
- * The data type for collections of multiple values.
- * Internally these are represented as columns that contain a ``scala.collection.Seq``.
- *
- * @param elementType The data type of values.
- * @param containsNull Indicates if values have `null` values
- */
-case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataType {
-  private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
-    builder.append(
-      s"$prefix-- element: ${elementType.typeName} (containsNull = $containsNull)\n")
-    DataType.buildFormattedString(elementType, s"$prefix    |", builder)
-  }
-
-  override private[sql] def jsonValue =
-    ("type" -> typeName) ~
-      ("elementType" -> elementType.jsonValue) ~
-      ("containsNull" -> containsNull)
-}
-
-/**
- * A field inside a StructType.
- * @param name The name of this field.
- * @param dataType The data type of this field.
- * @param nullable Indicates if values of this field can be `null` values.
- */
-case class StructField(name: String, dataType: DataType, nullable: Boolean) {
-
-  private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
-    builder.append(s"$prefix-- $name: ${dataType.typeName} (nullable = $nullable)\n")
-    DataType.buildFormattedString(dataType, s"$prefix    |", builder)
-  }
-
-  private[sql] def jsonValue: JValue = {
-    ("name" -> name) ~
-      ("type" -> dataType.jsonValue) ~
-      ("nullable" -> nullable)
-  }
-}
-
-object StructType {
-  protected[sql] def fromAttributes(attributes: Seq[Attribute]): StructType =
-    StructType(attributes.map(a => StructField(a.name, a.dataType, a.nullable)))
-}
-
-case class StructType(fields: Seq[StructField]) extends DataType {
-
-  /**
-   * Returns all field names in a [[Seq]].
-   */
-  lazy val fieldNames: Seq[String] = fields.map(_.name)
-  private lazy val fieldNamesSet: Set[String] = fieldNames.toSet
-  private lazy val nameToField: Map[String, StructField] = fields.map(f => f.name -> f).toMap
-  /**
-   * Extracts a [[StructField]] of the given name. If the [[StructType]] object does not
-   * have a name matching the given name, `null` will be returned.
-   */
-  def apply(name: String): StructField = {
-    nameToField.getOrElse(name, throw new IllegalArgumentException(s"Field $name does not exist."))
-  }
-
-  /**
-   * Returns a [[StructType]] containing [[StructField]]s of the given names.
-   * Those names which do not have matching fields will be ignored.
-   */
-  def apply(names: Set[String]): StructType = {
-    val nonExistFields = names -- fieldNamesSet
-    if (nonExistFields.nonEmpty) {
-      throw new IllegalArgumentException(
-        s"Field ${nonExistFields.mkString(",")} does not exist.")
-    }
-    // Preserve the original order of fields.
-    StructType(fields.filter(f => names.contains(f.name)))
-  }
-
-  protected[sql] def toAttributes =
-    fields.map(f => AttributeReference(f.name, f.dataType, f.nullable)())
-
-  def treeString: String = {
-    val builder = new StringBuilder
-    builder.append("root\n")
-    val prefix = " |"
-    fields.foreach(field => field.buildFormattedString(prefix, builder))
-
-    builder.toString()
-  }
-
-  def printTreeString(): Unit = println(treeString)
-
-  private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
-    fields.foreach(field => field.buildFormattedString(prefix, builder))
-  }
-
-  override private[sql] def jsonValue =
-    ("type" -> typeName) ~
-      ("fields" -> fields.map(_.jsonValue))
-}
-
-object MapType {
-  /**
-   * Construct a [[MapType]] object with the given key type and value type.
-   * The `valueContainsNull` is true.
-   */
-  def apply(keyType: DataType, valueType: DataType): MapType =
-    MapType(keyType: DataType, valueType: DataType, true)
-}
-
-/**
- * The data type for Maps. Keys in a map are not allowed to have `null` values.
- * @param keyType The data type of map keys.
- * @param valueType The data type of map values.
- * @param valueContainsNull Indicates if map values have `null` values.
- */
-case class MapType(
-    keyType: DataType,
-    valueType: DataType,
-    valueContainsNull: Boolean) extends DataType {
-  private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
-    builder.append(s"$prefix-- key: ${keyType.typeName}\n")
-    builder.append(s"$prefix-- value: ${valueType.typeName} " +
-      s"(valueContainsNull = $valueContainsNull)\n")
-    DataType.buildFormattedString(keyType, s"$prefix    |", builder)
-    DataType.buildFormattedString(valueType, s"$prefix    |", builder)
-  }
-
-  override private[sql] def jsonValue: JValue =
-    ("type" -> typeName) ~
-      ("keyType" -> keyType.jsonValue) ~
-      ("valueType" -> valueType.jsonValue) ~
-      ("valueContainsNull" -> valueContainsNull)
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/package.scala
deleted file mode 100644
index de24449590f9a..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/package.scala
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst
-/**
- * Contains a type system for attributes produced by relations, including complex types like
- * structs, arrays and maps.
- */
-package object types
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeConversions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeConversions.scala
new file mode 100644
index 0000000000000..c243be07a91b6
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeConversions.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.types
+
+import java.text.SimpleDateFormat
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
+
+
+protected[sql] object DataTypeConversions {
+
+  def productToRow(product: Product, schema: StructType): Row = {
+    val mutableRow = new GenericMutableRow(product.productArity)
+    val schemaFields = schema.fields.toArray
+
+    var i = 0
+    while (i < mutableRow.length) {
+      mutableRow(i) =
+        ScalaReflection.convertToCatalyst(product.productElement(i), schemaFields(i).dataType)
+      i += 1
+    }
+
+    mutableRow
+  }
+
+  def stringToTime(s: String): java.util.Date = {
+    if (!s.contains('T')) {
+      // JDBC escape string
+      if (s.contains(' ')) {
+        java.sql.Timestamp.valueOf(s)
+      } else {
+        java.sql.Date.valueOf(s)
+      }
+    } else if (s.endsWith("Z")) {
+      // this is zero timezone of ISO8601
+      stringToTime(s.substring(0, s.length - 1) + "GMT-00:00")
+    } else if (s.indexOf("GMT") == -1) {
+      // timezone with ISO8601
+      val inset = "+00.00".length
+      val s0 = s.substring(0, s.length - inset)
+      val s1 = s.substring(s.length - inset, s.length)
+      if (s0.substring(s0.lastIndexOf(':')).contains('.')) {
+        stringToTime(s0 + "GMT" + s1)
+      } else {
+        stringToTime(s0 + ".0GMT" + s1)
+      }
+    } else {
+      // ISO8601 with GMT insert
+      val ISO8601GMT: SimpleDateFormat = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss.SSSz" )
+      ISO8601GMT.parse(s)
+    }
+  }
+
+  /** Converts Java objects to catalyst rows / types */
+  def convertJavaToCatalyst(a: Any, dataType: DataType): Any = (a, dataType) match {
+    case (obj, udt: UserDefinedType[_]) => ScalaReflection.convertToCatalyst(obj, udt) // Scala type
+    case (d: java.math.BigDecimal, _) => Decimal(d)
+    case (other, _) => other
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala
new file mode 100644
index 0000000000000..8a1a3b81b3d2c
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.types
+
+import java.sql.Date
+import java.util.{Calendar, TimeZone}
+
+import org.apache.spark.sql.catalyst.expressions.Cast
+
+/**
+ * helper function to convert between Int value of days since 1970-01-01 and java.sql.Date
+ */
+object DateUtils {
+  private val MILLIS_PER_DAY = 86400000
+
+  // Java TimeZone has no mention of thread safety. Use thread local instance to be safe.
+  private val LOCAL_TIMEZONE = new ThreadLocal[TimeZone] {
+    override protected def initialValue: TimeZone = {
+      Calendar.getInstance.getTimeZone
+    }
+  }
+
+  private def javaDateToDays(d: Date): Int = {
+    millisToDays(d.getTime)
+  }
+
+  def millisToDays(millisLocal: Long): Int = {
+    ((millisLocal + LOCAL_TIMEZONE.get().getOffset(millisLocal)) / MILLIS_PER_DAY).toInt
+  }
+
+  private def toMillisSinceEpoch(days: Int): Long = {
+    val millisUtc = days.toLong * MILLIS_PER_DAY
+    millisUtc - LOCAL_TIMEZONE.get().getOffset(millisUtc)
+  }
+
+  def fromJavaDate(date: java.sql.Date): Int = {
+    javaDateToDays(date)
+  }
+
+  def toJavaDate(daysSinceEpoch: Int): java.sql.Date = {
+    new java.sql.Date(toMillisSinceEpoch(daysSinceEpoch))
+  }
+
+  def toString(days: Int): String = Cast.threadLocalDateFormat.get.format(toJavaDate(days))
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
new file mode 100644
index 0000000000000..21cc6cea4bf54
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -0,0 +1,339 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.types
+
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * A mutable implementation of BigDecimal that can hold a Long if values are small enough.
+ *
+ * The semantics of the fields are as follows:
+ * - _precision and _scale represent the SQL precision and scale we are looking for
+ * - If decimalVal is set, it represents the whole decimal value
+ * - Otherwise, the decimal value is longVal / (10 ** _scale)
+ */
+final class Decimal extends Ordered[Decimal] with Serializable {
+  import org.apache.spark.sql.types.Decimal.{BIG_DEC_ZERO, MAX_LONG_DIGITS, POW_10, ROUNDING_MODE}
+
+  private var decimalVal: BigDecimal = null
+  private var longVal: Long = 0L
+  private var _precision: Int = 1
+  private var _scale: Int = 0
+
+  def precision: Int = _precision
+  def scale: Int = _scale
+
+  /**
+   * Set this Decimal to the given Long. Will have precision 20 and scale 0.
+   */
+  def set(longVal: Long): Decimal = {
+    if (longVal <= -POW_10(MAX_LONG_DIGITS) || longVal >= POW_10(MAX_LONG_DIGITS)) {
+      // We can't represent this compactly as a long without risking overflow
+      this.decimalVal = BigDecimal(longVal)
+      this.longVal = 0L
+    } else {
+      this.decimalVal = null
+      this.longVal = longVal
+    }
+    this._precision = 20
+    this._scale = 0
+    this
+  }
+
+  /**
+   * Set this Decimal to the given Int. Will have precision 10 and scale 0.
+   */
+  def set(intVal: Int): Decimal = {
+    this.decimalVal = null
+    this.longVal = intVal
+    this._precision = 10
+    this._scale = 0
+    this
+  }
+
+  /**
+   * Set this Decimal to the given unscaled Long, with a given precision and scale.
+   */
+  def set(unscaled: Long, precision: Int, scale: Int): Decimal = {
+    if (setOrNull(unscaled, precision, scale) == null) {
+      throw new IllegalArgumentException("Unscaled value too large for precision")
+    }
+    this
+  }
+
+  /**
+   * Set this Decimal to the given unscaled Long, with a given precision and scale,
+   * and return it, or return null if it cannot be set due to overflow.
+   */
+  def setOrNull(unscaled: Long, precision: Int, scale: Int): Decimal = {
+    if (unscaled <= -POW_10(MAX_LONG_DIGITS) || unscaled >= POW_10(MAX_LONG_DIGITS)) {
+      // We can't represent this compactly as a long without risking overflow
+      if (precision < 19) {
+        return null  // Requested precision is too low to represent this value
+      }
+      this.decimalVal = BigDecimal(longVal)
+      this.longVal = 0L
+    } else {
+      val p = POW_10(math.min(precision, MAX_LONG_DIGITS))
+      if (unscaled <= -p || unscaled >= p) {
+        return null  // Requested precision is too low to represent this value
+      }
+      this.decimalVal = null
+      this.longVal = unscaled
+    }
+    this._precision = precision
+    this._scale = scale
+    this
+  }
+
+  /**
+   * Set this Decimal to the given BigDecimal value, with a given precision and scale.
+   */
+  def set(decimal: BigDecimal, precision: Int, scale: Int): Decimal = {
+    this.decimalVal = decimal.setScale(scale, ROUNDING_MODE)
+    require(decimalVal.precision <= precision, "Overflowed precision")
+    this.longVal = 0L
+    this._precision = precision
+    this._scale = scale
+    this
+  }
+
+  /**
+   * Set this Decimal to the given BigDecimal value, inheriting its precision and scale.
+   */
+  def set(decimal: BigDecimal): Decimal = {
+    this.decimalVal = decimal
+    this.longVal = 0L
+    this._precision = decimal.precision
+    this._scale = decimal.scale
+    this
+  }
+
+  /**
+   * Set this Decimal to the given Decimal value.
+   */
+  def set(decimal: Decimal): Decimal = {
+    this.decimalVal = decimal.decimalVal
+    this.longVal = decimal.longVal
+    this._precision = decimal._precision
+    this._scale = decimal._scale
+    this
+  }
+
+  def toBigDecimal: BigDecimal = {
+    if (decimalVal.ne(null)) {
+      decimalVal
+    } else {
+      BigDecimal(longVal, _scale)
+    }
+  }
+
+  def toJavaBigDecimal: java.math.BigDecimal = toBigDecimal.underlying()
+
+  def toUnscaledLong: Long = {
+    if (decimalVal.ne(null)) {
+      decimalVal.underlying().unscaledValue().longValue()
+    } else {
+      longVal
+    }
+  }
+
+  override def toString: String = toBigDecimal.toString()
+
+  @DeveloperApi
+  def toDebugString: String = {
+    if (decimalVal.ne(null)) {
+      s"Decimal(expanded,$decimalVal,$precision,$scale})"
+    } else {
+      s"Decimal(compact,$longVal,$precision,$scale})"
+    }
+  }
+
+  def toDouble: Double = toBigDecimal.doubleValue()
+
+  def toFloat: Float = toBigDecimal.floatValue()
+
+  def toLong: Long = {
+    if (decimalVal.eq(null)) {
+      longVal / POW_10(_scale)
+    } else {
+      decimalVal.longValue()
+    }
+  }
+
+  def toInt: Int = toLong.toInt
+
+  def toShort: Short = toLong.toShort
+
+  def toByte: Byte = toLong.toByte
+
+  /**
+   * Update precision and scale while keeping our value the same, and return true if successful.
+   *
+   * @return true if successful, false if overflow would occur
+   */
+  def changePrecision(precision: Int, scale: Int): Boolean = {
+    // First, update our longVal if we can, or transfer over to using a BigDecimal
+    if (decimalVal.eq(null)) {
+      if (scale < _scale) {
+        // Easier case: we just need to divide our scale down
+        val diff = _scale - scale
+        val droppedDigits = longVal % POW_10(diff)
+        longVal /= POW_10(diff)
+        if (math.abs(droppedDigits) * 2 >= POW_10(diff)) {
+          longVal += (if (longVal < 0) -1L else 1L)
+        }
+      } else if (scale > _scale) {
+        // We might be able to multiply longVal by a power of 10 and not overflow, but if not,
+        // switch to using a BigDecimal
+        val diff = scale - _scale
+        val p = POW_10(math.max(MAX_LONG_DIGITS - diff, 0))
+        if (diff <= MAX_LONG_DIGITS && longVal > -p && longVal < p) {
+          // Multiplying longVal by POW_10(diff) will still keep it below MAX_LONG_DIGITS
+          longVal *= POW_10(diff)
+        } else {
+          // Give up on using Longs; switch to BigDecimal, which we'll modify below
+          decimalVal = BigDecimal(longVal, _scale)
+        }
+      }
+      // In both cases, we will check whether our precision is okay below
+    }
+
+    if (decimalVal.ne(null)) {
+      // We get here if either we started with a BigDecimal, or we switched to one because we would
+      // have overflowed our Long; in either case we must rescale decimalVal to the new scale.
+      val newVal = decimalVal.setScale(scale, ROUNDING_MODE)
+      if (newVal.precision > precision) {
+        return false
+      }
+      decimalVal = newVal
+    } else {
+      // We're still using Longs, but we should check whether we match the new precision
+      val p = POW_10(math.min(_precision, MAX_LONG_DIGITS))
+      if (longVal <= -p || longVal >= p) {
+        // Note that we shouldn't have been able to fix this by switching to BigDecimal
+        return false
+      }
+    }
+
+    _precision = precision
+    _scale = scale
+    true
+  }
+
+  override def clone(): Decimal = new Decimal().set(this)
+
+  override def compare(other: Decimal): Int = {
+    if (decimalVal.eq(null) && other.decimalVal.eq(null) && _scale == other._scale) {
+      if (longVal < other.longVal) -1 else if (longVal == other.longVal) 0 else 1
+    } else {
+      toBigDecimal.compare(other.toBigDecimal)
+    }
+  }
+
+  override def equals(other: Any) = other match {
+    case d: Decimal =>
+      compare(d) == 0
+    case _ =>
+      false
+  }
+
+  override def hashCode(): Int = toBigDecimal.hashCode()
+
+  def isZero: Boolean = if (decimalVal.ne(null)) decimalVal == BIG_DEC_ZERO else longVal == 0
+
+  def + (that: Decimal): Decimal = Decimal(toBigDecimal + that.toBigDecimal)
+
+  def - (that: Decimal): Decimal = Decimal(toBigDecimal - that.toBigDecimal)
+
+  def * (that: Decimal): Decimal = Decimal(toBigDecimal * that.toBigDecimal)
+
+  def / (that: Decimal): Decimal =
+    if (that.isZero) null else Decimal(toBigDecimal / that.toBigDecimal)
+
+  def % (that: Decimal): Decimal =
+    if (that.isZero) null else Decimal(toBigDecimal % that.toBigDecimal)
+
+  def remainder(that: Decimal): Decimal = this % that
+
+  def unary_- : Decimal = {
+    if (decimalVal.ne(null)) {
+      Decimal(-decimalVal)
+    } else {
+      Decimal(-longVal, precision, scale)
+    }
+  }
+}
+
+object Decimal {
+  private val ROUNDING_MODE = BigDecimal.RoundingMode.HALF_UP
+
+  /** Maximum number of decimal digits a Long can represent */
+  val MAX_LONG_DIGITS = 18
+
+  private val POW_10 = Array.tabulate[Long](MAX_LONG_DIGITS + 1)(i => math.pow(10, i).toLong)
+
+  private val BIG_DEC_ZERO = BigDecimal(0)
+
+  def apply(value: Double): Decimal = new Decimal().set(value)
+
+  def apply(value: Long): Decimal = new Decimal().set(value)
+
+  def apply(value: Int): Decimal = new Decimal().set(value)
+
+  def apply(value: BigDecimal): Decimal = new Decimal().set(value)
+
+  def apply(value: java.math.BigDecimal): Decimal = new Decimal().set(value)
+
+  def apply(value: BigDecimal, precision: Int, scale: Int): Decimal =
+    new Decimal().set(value, precision, scale)
+
+  def apply(unscaled: Long, precision: Int, scale: Int): Decimal =
+    new Decimal().set(unscaled, precision, scale)
+
+  def apply(value: String): Decimal = new Decimal().set(BigDecimal(value))
+
+  // Evidence parameters for Decimal considered either as Fractional or Integral. We provide two
+  // parameters inheriting from a common trait since both traits define mkNumericOps.
+  // See scala.math's Numeric.scala for examples for Scala's built-in types.
+
+  /** Common methods for Decimal evidence parameters */
+  trait DecimalIsConflicted extends Numeric[Decimal] {
+    override def plus(x: Decimal, y: Decimal): Decimal = x + y
+    override def times(x: Decimal, y: Decimal): Decimal = x * y
+    override def minus(x: Decimal, y: Decimal): Decimal = x - y
+    override def negate(x: Decimal): Decimal = -x
+    override def toDouble(x: Decimal): Double = x.toDouble
+    override def toFloat(x: Decimal): Float = x.toFloat
+    override def toInt(x: Decimal): Int = x.toInt
+    override def toLong(x: Decimal): Long = x.toLong
+    override def fromInt(x: Int): Decimal = new Decimal().set(x)
+    override def compare(x: Decimal, y: Decimal): Int = x.compare(y)
+  }
+
+  /** A [[scala.math.Fractional]] evidence parameter for Decimals. */
+  object DecimalIsFractional extends DecimalIsConflicted with Fractional[Decimal] {
+    override def div(x: Decimal, y: Decimal): Decimal = x / y
+  }
+
+  /** A [[scala.math.Integral]] evidence parameter for Decimals. */
+  object DecimalAsIfIntegral extends DecimalIsConflicted with Integral[Decimal] {
+    override def quot(x: Decimal, y: Decimal): Decimal = x / y
+    override def rem(x: Decimal, y: Decimal): Decimal = x % y
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala
new file mode 100755
index 0000000000000..e50e9761431f5
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.types
+
+import scala.collection.mutable
+
+import org.json4s._
+import org.json4s.jackson.JsonMethods._
+
+import org.apache.spark.annotation.DeveloperApi
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * Metadata is a wrapper over Map[String, Any] that limits the value type to simple ones: Boolean,
+ * Long, Double, String, Metadata, Array[Boolean], Array[Long], Array[Double], Array[String], and
+ * Array[Metadata]. JSON is used for serialization.
+ *
+ * The default constructor is private. User should use either [[MetadataBuilder]] or
+ * [[Metadata.fromJson()]] to create Metadata instances.
+ *
+ * @param map an immutable map that stores the data
+ */
+@DeveloperApi
+sealed class Metadata private[types] (private[types] val map: Map[String, Any])
+  extends Serializable {
+
+  /** Tests whether this Metadata contains a binding for a key. */
+  def contains(key: String): Boolean = map.contains(key)
+
+  /** Gets a Long. */
+  def getLong(key: String): Long = get(key)
+
+  /** Gets a Double. */
+  def getDouble(key: String): Double = get(key)
+
+  /** Gets a Boolean. */
+  def getBoolean(key: String): Boolean = get(key)
+
+  /** Gets a String. */
+  def getString(key: String): String = get(key)
+
+  /** Gets a Metadata. */
+  def getMetadata(key: String): Metadata = get(key)
+
+  /** Gets a Long array. */
+  def getLongArray(key: String): Array[Long] = get(key)
+
+  /** Gets a Double array. */
+  def getDoubleArray(key: String): Array[Double] = get(key)
+
+  /** Gets a Boolean array. */
+  def getBooleanArray(key: String): Array[Boolean] = get(key)
+
+  /** Gets a String array. */
+  def getStringArray(key: String): Array[String] = get(key)
+
+  /** Gets a Metadata array. */
+  def getMetadataArray(key: String): Array[Metadata] = get(key)
+
+  /** Converts to its JSON representation. */
+  def json: String = compact(render(jsonValue))
+
+  override def toString: String = json
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case that: Metadata =>
+        if (map.keySet == that.map.keySet) {
+          map.keys.forall { k =>
+            (map(k), that.map(k)) match {
+              case (v0: Array[_], v1: Array[_]) =>
+                v0.view == v1.view
+              case (v0, v1) =>
+                v0 == v1
+            }
+          }
+        } else {
+          false
+        }
+      case other =>
+        false
+    }
+  }
+
+  override def hashCode: Int = Metadata.hash(this)
+
+  private def get[T](key: String): T = {
+    map(key).asInstanceOf[T]
+  }
+
+  private[sql] def jsonValue: JValue = Metadata.toJsonValue(this)
+}
+
+object Metadata {
+
+  /** Returns an empty Metadata. */
+  def empty: Metadata = new Metadata(Map.empty)
+
+  /** Creates a Metadata instance from JSON. */
+  def fromJson(json: String): Metadata = {
+    fromJObject(parse(json).asInstanceOf[JObject])
+  }
+
+  /** Creates a Metadata instance from JSON AST. */
+  private[sql] def fromJObject(jObj: JObject): Metadata = {
+    val builder = new MetadataBuilder
+    jObj.obj.foreach {
+      case (key, JInt(value)) =>
+        builder.putLong(key, value.toLong)
+      case (key, JDouble(value)) =>
+        builder.putDouble(key, value)
+      case (key, JBool(value)) =>
+        builder.putBoolean(key, value)
+      case (key, JString(value)) =>
+        builder.putString(key, value)
+      case (key, o: JObject) =>
+        builder.putMetadata(key, fromJObject(o))
+      case (key, JArray(value)) =>
+        if (value.isEmpty) {
+          // If it is an empty array, we cannot infer its element type. We put an empty Array[Long].
+          builder.putLongArray(key, Array.empty)
+        } else {
+          value.head match {
+            case _: JInt =>
+              builder.putLongArray(key, value.asInstanceOf[List[JInt]].map(_.num.toLong).toArray)
+            case _: JDouble =>
+              builder.putDoubleArray(key, value.asInstanceOf[List[JDouble]].map(_.num).toArray)
+            case _: JBool =>
+              builder.putBooleanArray(key, value.asInstanceOf[List[JBool]].map(_.value).toArray)
+            case _: JString =>
+              builder.putStringArray(key, value.asInstanceOf[List[JString]].map(_.s).toArray)
+            case _: JObject =>
+              builder.putMetadataArray(
+                key, value.asInstanceOf[List[JObject]].map(fromJObject).toArray)
+            case other =>
+              throw new RuntimeException(s"Do not support array of type ${other.getClass}.")
+          }
+        }
+      case other =>
+        throw new RuntimeException(s"Do not support type ${other.getClass}.")
+    }
+    builder.build()
+  }
+
+  /** Converts to JSON AST. */
+  private def toJsonValue(obj: Any): JValue = {
+    obj match {
+      case map: Map[_, _] =>
+        val fields = map.toList.map { case (k: String, v) => (k, toJsonValue(v)) }
+        JObject(fields)
+      case arr: Array[_] =>
+        val values = arr.toList.map(toJsonValue)
+        JArray(values)
+      case x: Long =>
+        JInt(x)
+      case x: Double =>
+        JDouble(x)
+      case x: Boolean =>
+        JBool(x)
+      case x: String =>
+        JString(x)
+      case x: Metadata =>
+        toJsonValue(x.map)
+      case other =>
+        throw new RuntimeException(s"Do not support type ${other.getClass}.")
+    }
+  }
+
+  /** Computes the hash code for the types we support. */
+  private def hash(obj: Any): Int = {
+    obj match {
+      case map: Map[_, _] =>
+        map.mapValues(hash).##
+      case arr: Array[_] =>
+        // Seq.empty[T] has the same hashCode regardless of T.
+        arr.toSeq.map(hash).##
+      case x: Long =>
+        x.##
+      case x: Double =>
+        x.##
+      case x: Boolean =>
+        x.##
+      case x: String =>
+        x.##
+      case x: Metadata =>
+        hash(x.map)
+      case other =>
+        throw new RuntimeException(s"Do not support type ${other.getClass}.")
+    }
+  }
+}
+
+/**
+ * :: DeveloperApi ::
+ *
+ * Builder for [[Metadata]]. If there is a key collision, the latter will overwrite the former.
+ */
+@DeveloperApi
+class MetadataBuilder {
+
+  private val map: mutable.Map[String, Any] = mutable.Map.empty
+
+  /** Returns the immutable version of this map.  Used for java interop. */
+  protected def getMap = map.toMap
+
+  /** Include the content of an existing [[Metadata]] instance. */
+  def withMetadata(metadata: Metadata): this.type = {
+    map ++= metadata.map
+    this
+  }
+
+  /** Puts a Long. */
+  def putLong(key: String, value: Long): this.type = put(key, value)
+
+  /** Puts a Double. */
+  def putDouble(key: String, value: Double): this.type = put(key, value)
+
+  /** Puts a Boolean. */
+  def putBoolean(key: String, value: Boolean): this.type = put(key, value)
+
+  /** Puts a String. */
+  def putString(key: String, value: String): this.type = put(key, value)
+
+  /** Puts a [[Metadata]]. */
+  def putMetadata(key: String, value: Metadata): this.type = put(key, value)
+
+  /** Puts a Long array. */
+  def putLongArray(key: String, value: Array[Long]): this.type = put(key, value)
+
+  /** Puts a Double array. */
+  def putDoubleArray(key: String, value: Array[Double]): this.type = put(key, value)
+
+  /** Puts a Boolean array. */
+  def putBooleanArray(key: String, value: Array[Boolean]): this.type = put(key, value)
+
+  /** Puts a String array. */
+  def putStringArray(key: String, value: Array[String]): this.type = put(key, value)
+
+  /** Puts a [[Metadata]] array. */
+  def putMetadataArray(key: String, value: Array[Metadata]): this.type = put(key, value)
+
+  /** Builds the [[Metadata]] instance. */
+  def build(): Metadata = {
+    new Metadata(map.toMap)
+  }
+
+  private def put(key: String, value: Any): this.type = {
+    map.put(key, value)
+    this
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/SQLUserDefinedType.java b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/SQLUserDefinedType.java
new file mode 100644
index 0000000000000..a64d2bb7cde37
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/SQLUserDefinedType.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.types;
+
+import java.lang.annotation.*;
+
+import org.apache.spark.annotation.DeveloperApi;
+
+/**
+ * ::DeveloperApi::
+ * A user-defined type which can be automatically recognized by a SQLContext and registered.
+ *
+ * WARNING: This annotation will only work if both Java and Scala reflection return the same class
+ *          names (after erasure) for the UDT.  This will NOT be the case when, e.g., the UDT class
+ *          is enclosed in an object (a singleton).
+ *
+ * WARNING: UDTs are currently only supported from Scala.
+ */
+// TODO: Should I used @Documented ?
+@DeveloperApi
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface SQLUserDefinedType {
+
+  /**
+   * Returns an instance of the UserDefinedType which can serialize and deserialize the user
+   * class to and from Catalyst built-in types.
+   */
+  Class<? extends UserDefinedType<?> > udt();
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala
new file mode 100644
index 0000000000000..a6d6ddd905393
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala
@@ -0,0 +1,976 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.types
+
+import java.sql.Timestamp
+
+import scala.math.Numeric.{FloatAsIfIntegral, DoubleAsIfIntegral}
+import scala.reflect.ClassTag
+import scala.reflect.runtime.universe.{TypeTag, runtimeMirror, typeTag}
+import scala.util.parsing.combinator.RegexParsers
+
+import org.json4s._
+import org.json4s.JsonAST.JValue
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods._
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.catalyst.ScalaReflectionLock
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression}
+import org.apache.spark.util.Utils
+
+
+object DataType {
+  def fromJson(json: String): DataType = parseDataType(parse(json))
+
+  private object JSortedObject {
+    def unapplySeq(value: JValue): Option[List[(String, JValue)]] = value match {
+      case JObject(seq) => Some(seq.toList.sortBy(_._1))
+      case _ => None
+    }
+  }
+
+  // NOTE: Map fields must be sorted in alphabetical order to keep consistent with the Python side.
+  private def parseDataType(json: JValue): DataType = json match {
+    case JString(name) =>
+      PrimitiveType.nameToType(name)
+
+    case JSortedObject(
+        ("containsNull", JBool(n)),
+        ("elementType", t: JValue),
+        ("type", JString("array"))) =>
+      ArrayType(parseDataType(t), n)
+
+    case JSortedObject(
+        ("keyType", k: JValue),
+        ("type", JString("map")),
+        ("valueContainsNull", JBool(n)),
+        ("valueType", v: JValue)) =>
+      MapType(parseDataType(k), parseDataType(v), n)
+
+    case JSortedObject(
+        ("fields", JArray(fields)),
+        ("type", JString("struct"))) =>
+      StructType(fields.map(parseStructField))
+
+    case JSortedObject(
+        ("class", JString(udtClass)),
+        ("pyClass", _),
+        ("sqlType", _),
+        ("type", JString("udt"))) =>
+      Class.forName(udtClass).newInstance().asInstanceOf[UserDefinedType[_]]
+  }
+
+  private def parseStructField(json: JValue): StructField = json match {
+    case JSortedObject(
+        ("metadata", metadata: JObject),
+        ("name", JString(name)),
+        ("nullable", JBool(nullable)),
+        ("type", dataType: JValue)) =>
+      StructField(name, parseDataType(dataType), nullable, Metadata.fromJObject(metadata))
+    // Support reading schema when 'metadata' is missing.
+    case JSortedObject(
+        ("name", JString(name)),
+        ("nullable", JBool(nullable)),
+        ("type", dataType: JValue)) =>
+      StructField(name, parseDataType(dataType), nullable)
+  }
+
+  @deprecated("Use DataType.fromJson instead", "1.2.0")
+  def fromCaseClassString(string: String): DataType = CaseClassStringParser(string)
+
+  private object CaseClassStringParser extends RegexParsers {
+    protected lazy val primitiveType: Parser[DataType] =
+      ( "StringType" ^^^ StringType
+      | "FloatType" ^^^ FloatType
+      | "IntegerType" ^^^ IntegerType
+      | "ByteType" ^^^ ByteType
+      | "ShortType" ^^^ ShortType
+      | "DoubleType" ^^^ DoubleType
+      | "LongType" ^^^ LongType
+      | "BinaryType" ^^^ BinaryType
+      | "BooleanType" ^^^ BooleanType
+      | "DateType" ^^^ DateType
+      | "DecimalType()" ^^^ DecimalType.Unlimited
+      | fixedDecimalType
+      | "TimestampType" ^^^ TimestampType
+      )
+
+    protected lazy val fixedDecimalType: Parser[DataType] =
+      ("DecimalType(" ~> "[0-9]+".r) ~ ("," ~> "[0-9]+".r <~ ")") ^^ {
+        case precision ~ scale => DecimalType(precision.toInt, scale.toInt)
+      }
+
+    protected lazy val arrayType: Parser[DataType] =
+      "ArrayType" ~> "(" ~> dataType ~ "," ~ boolVal <~ ")" ^^ {
+        case tpe ~ _ ~ containsNull => ArrayType(tpe, containsNull)
+      }
+
+    protected lazy val mapType: Parser[DataType] =
+      "MapType" ~> "(" ~> dataType ~ "," ~ dataType ~ "," ~ boolVal <~ ")" ^^ {
+        case t1 ~ _ ~ t2 ~ _ ~ valueContainsNull => MapType(t1, t2, valueContainsNull)
+      }
+
+    protected lazy val structField: Parser[StructField] =
+      ("StructField(" ~> "[a-zA-Z0-9_]*".r) ~ ("," ~> dataType) ~ ("," ~> boolVal <~ ")") ^^ {
+        case name ~ tpe ~ nullable  =>
+          StructField(name, tpe, nullable = nullable)
+      }
+
+    protected lazy val boolVal: Parser[Boolean] =
+      ( "true" ^^^ true
+      | "false" ^^^ false
+      )
+
+    protected lazy val structType: Parser[DataType] =
+      "StructType\\([A-zA-z]*\\(".r ~> repsep(structField, ",") <~ "))" ^^ {
+        case fields => StructType(fields)
+      }
+
+    protected lazy val dataType: Parser[DataType] =
+      ( arrayType
+      | mapType
+      | structType
+      | primitiveType
+      )
+
+    /**
+     * Parses a string representation of a DataType.
+     *
+     * TODO: Generate parser as pickler...
+     */
+    def apply(asString: String): DataType = parseAll(dataType, asString) match {
+      case Success(result, _) => result
+      case failure: NoSuccess =>
+        throw new IllegalArgumentException(s"Unsupported dataType: $asString, $failure")
+    }
+
+  }
+
+  protected[types] def buildFormattedString(
+      dataType: DataType,
+      prefix: String,
+      builder: StringBuilder): Unit = {
+    dataType match {
+      case array: ArrayType =>
+        array.buildFormattedString(prefix, builder)
+      case struct: StructType =>
+        struct.buildFormattedString(prefix, builder)
+      case map: MapType =>
+        map.buildFormattedString(prefix, builder)
+      case _ =>
+    }
+  }
+
+  /**
+   * Compares two types, ignoring nullability of ArrayType, MapType, StructType.
+   */
+  private[sql] def equalsIgnoreNullability(left: DataType, right: DataType): Boolean = {
+    (left, right) match {
+      case (ArrayType(leftElementType, _), ArrayType(rightElementType, _)) =>
+        equalsIgnoreNullability(leftElementType, rightElementType)
+      case (MapType(leftKeyType, leftValueType, _), MapType(rightKeyType, rightValueType, _)) =>
+        equalsIgnoreNullability(leftKeyType, rightKeyType) &&
+        equalsIgnoreNullability(leftValueType, rightValueType)
+      case (StructType(leftFields), StructType(rightFields)) =>
+        leftFields.size == rightFields.size &&
+        leftFields.zip(rightFields)
+          .forall{
+            case (left, right) =>
+              left.name == right.name && equalsIgnoreNullability(left.dataType, right.dataType)
+          }
+      case (left, right) => left == right
+    }
+  }
+}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * The base type of all Spark SQL data types.
+ *
+ * @group dataType
+ */
+@DeveloperApi
+abstract class DataType {
+  /** Matches any expression that evaluates to this DataType */
+  def unapply(a: Expression): Boolean = a match {
+    case e: Expression if e.dataType == this => true
+    case _ => false
+  }
+
+  /** The default size of a value of this data type. */
+  def defaultSize: Int
+
+  def isPrimitive: Boolean = false
+
+  def typeName: String = this.getClass.getSimpleName.stripSuffix("$").dropRight(4).toLowerCase
+
+  private[sql] def jsonValue: JValue = typeName
+
+  def json: String = compact(render(jsonValue))
+
+  def prettyJson: String = pretty(render(jsonValue))
+}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * The data type representing `NULL` values. Please use the singleton [[DataTypes.NullType]].
+ *
+ * @group dataType
+ */
+@DeveloperApi
+case object NullType extends DataType {
+  override def defaultSize: Int = 1
+}
+
+
+protected[sql] object NativeType {
+  val all = Seq(
+    IntegerType, BooleanType, LongType, DoubleType, FloatType, ShortType, ByteType, StringType)
+
+  def unapply(dt: DataType): Boolean = all.contains(dt)
+}
+
+
+protected[sql] trait PrimitiveType extends DataType {
+  override def isPrimitive = true
+}
+
+
+protected[sql] object PrimitiveType {
+  private val nonDecimals = Seq(NullType, DateType, TimestampType, BinaryType) ++ NativeType.all
+  private val nonDecimalNameToType = nonDecimals.map(t => t.typeName -> t).toMap
+
+  /** Given the string representation of a type, return its DataType */
+  private[sql] def nameToType(name: String): DataType = {
+    val FIXED_DECIMAL = """decimal\(\s*(\d+)\s*,\s*(\d+)\s*\)""".r
+    name match {
+      case "decimal" => DecimalType.Unlimited
+      case FIXED_DECIMAL(precision, scale) => DecimalType(precision.toInt, scale.toInt)
+      case other => nonDecimalNameToType(other)
+    }
+  }
+}
+
+protected[sql] abstract class NativeType extends DataType {
+  private[sql] type JvmType
+  @transient private[sql] val tag: TypeTag[JvmType]
+  private[sql] val ordering: Ordering[JvmType]
+
+  @transient private[sql] val classTag = ScalaReflectionLock.synchronized {
+    val mirror = runtimeMirror(Utils.getSparkClassLoader)
+    ClassTag[JvmType](mirror.runtimeClass(tag.tpe))
+  }
+}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * The data type representing `String` values. Please use the singleton [[DataTypes.StringType]].
+ *
+ * @group dataType
+ */
+@DeveloperApi
+case object StringType extends NativeType with PrimitiveType {
+  private[sql] type JvmType = String
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
+
+  /**
+   * The default size of a value of the StringType is 4096 bytes.
+   */
+  override def defaultSize: Int = 4096
+}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * The data type representing `Array[Byte]` values.
+ * Please use the singleton [[DataTypes.BinaryType]].
+ *
+ * @group dataType
+ */
+@DeveloperApi
+case object BinaryType extends NativeType with PrimitiveType {
+  private[sql] type JvmType = Array[Byte]
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val ordering = new Ordering[JvmType] {
+    def compare(x: Array[Byte], y: Array[Byte]): Int = {
+      for (i <- 0 until x.length; if i < y.length) {
+        val res = x(i).compareTo(y(i))
+        if (res != 0) return res
+      }
+      x.length - y.length
+    }
+  }
+
+  /**
+   * The default size of a value of the BinaryType is 4096 bytes.
+   */
+  override def defaultSize: Int = 4096
+}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * The data type representing `Boolean` values. Please use the singleton [[DataTypes.BooleanType]].
+ *
+ *@group dataType
+ */
+@DeveloperApi
+case object BooleanType extends NativeType with PrimitiveType {
+  private[sql] type JvmType = Boolean
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
+
+  /**
+   * The default size of a value of the BooleanType is 1 byte.
+   */
+  override def defaultSize: Int = 1
+}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * The data type representing `java.sql.Timestamp` values.
+ * Please use the singleton [[DataTypes.TimestampType]].
+ *
+ * @group dataType
+ */
+@DeveloperApi
+case object TimestampType extends NativeType {
+  private[sql] type JvmType = Timestamp
+
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+
+  private[sql] val ordering = new Ordering[JvmType] {
+    def compare(x: Timestamp, y: Timestamp) = x.compareTo(y)
+  }
+
+  /**
+   * The default size of a value of the TimestampType is 12 bytes.
+   */
+  override def defaultSize: Int = 12
+}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * The data type representing `java.sql.Date` values.
+ * Please use the singleton [[DataTypes.DateType]].
+ *
+ * @group dataType
+ */
+@DeveloperApi
+case object DateType extends NativeType {
+  private[sql] type JvmType = Int
+
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
+
+  /**
+   * The default size of a value of the DateType is 4 bytes.
+   */
+  override def defaultSize: Int = 4
+}
+
+
+abstract class NumericType extends NativeType with PrimitiveType {
+  // Unfortunately we can't get this implicitly as that breaks Spark Serialization. In order for
+  // implicitly[Numeric[JvmType]] to be valid, we have to change JvmType from a type variable to a
+  // type parameter and and add a numeric annotation (i.e., [JvmType : Numeric]). This gets
+  // desugared by the compiler into an argument to the objects constructor. This means there is no
+  // longer an no argument constructor and thus the JVM cannot serialize the object anymore.
+  private[sql] val numeric: Numeric[JvmType]
+}
+
+
+protected[sql] object NumericType {
+  def unapply(e: Expression): Boolean = e.dataType.isInstanceOf[NumericType]
+}
+
+
+/** Matcher for any expressions that evaluate to [[IntegralType]]s */
+protected[sql] object IntegralType {
+  def unapply(a: Expression): Boolean = a match {
+    case e: Expression if e.dataType.isInstanceOf[IntegralType] => true
+    case _ => false
+  }
+}
+
+
+protected[sql] sealed abstract class IntegralType extends NumericType {
+  private[sql] val integral: Integral[JvmType]
+}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * The data type representing `Long` values. Please use the singleton [[DataTypes.LongType]].
+ *
+ * @group dataType
+ */
+@DeveloperApi
+case object LongType extends IntegralType {
+  private[sql] type JvmType = Long
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val numeric = implicitly[Numeric[Long]]
+  private[sql] val integral = implicitly[Integral[Long]]
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
+
+  /**
+   * The default size of a value of the LongType is 8 bytes.
+   */
+  override def defaultSize: Int = 8
+}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * The data type representing `Int` values. Please use the singleton [[DataTypes.IntegerType]].
+ *
+ * @group dataType
+ */
+@DeveloperApi
+case object IntegerType extends IntegralType {
+  private[sql] type JvmType = Int
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val numeric = implicitly[Numeric[Int]]
+  private[sql] val integral = implicitly[Integral[Int]]
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
+
+  /**
+   * The default size of a value of the IntegerType is 4 bytes.
+   */
+  override def defaultSize: Int = 4
+}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * The data type representing `Short` values. Please use the singleton [[DataTypes.ShortType]].
+ *
+ * @group dataType
+ */
+@DeveloperApi
+case object ShortType extends IntegralType {
+  private[sql] type JvmType = Short
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val numeric = implicitly[Numeric[Short]]
+  private[sql] val integral = implicitly[Integral[Short]]
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
+
+  /**
+   * The default size of a value of the ShortType is 2 bytes.
+   */
+  override def defaultSize: Int = 2
+}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * The data type representing `Byte` values. Please use the singleton [[DataTypes.ByteType]].
+ *
+ * @group dataType
+ */
+@DeveloperApi
+case object ByteType extends IntegralType {
+  private[sql] type JvmType = Byte
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val numeric = implicitly[Numeric[Byte]]
+  private[sql] val integral = implicitly[Integral[Byte]]
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
+
+  /**
+   * The default size of a value of the ByteType is 1 byte.
+   */
+  override def defaultSize: Int = 1
+}
+
+
+/** Matcher for any expressions that evaluate to [[FractionalType]]s */
+protected[sql] object FractionalType {
+  def unapply(a: Expression): Boolean = a match {
+    case e: Expression if e.dataType.isInstanceOf[FractionalType] => true
+    case _ => false
+  }
+}
+
+
+protected[sql] sealed abstract class FractionalType extends NumericType {
+  private[sql] val fractional: Fractional[JvmType]
+  private[sql] val asIntegral: Integral[JvmType]
+}
+
+
+/** Precision parameters for a Decimal */
+case class PrecisionInfo(precision: Int, scale: Int)
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * The data type representing `java.math.BigDecimal` values.
+ * A Decimal that might have fixed precision and scale, or unlimited values for these.
+ *
+ * Please use [[DataTypes.createDecimalType()]] to create a specific instance.
+ *
+ * @group dataType
+ */
+@DeveloperApi
+case class DecimalType(precisionInfo: Option[PrecisionInfo]) extends FractionalType {
+  private[sql] type JvmType = Decimal
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val numeric = Decimal.DecimalIsFractional
+  private[sql] val fractional = Decimal.DecimalIsFractional
+  private[sql] val ordering = Decimal.DecimalIsFractional
+  private[sql] val asIntegral = Decimal.DecimalAsIfIntegral
+
+  def precision: Int = precisionInfo.map(_.precision).getOrElse(-1)
+
+  def scale: Int = precisionInfo.map(_.scale).getOrElse(-1)
+
+  override def typeName: String = precisionInfo match {
+    case Some(PrecisionInfo(precision, scale)) => s"decimal($precision,$scale)"
+    case None => "decimal"
+  }
+
+  override def toString: String = precisionInfo match {
+    case Some(PrecisionInfo(precision, scale)) => s"DecimalType($precision,$scale)"
+    case None => "DecimalType()"
+  }
+
+  /**
+   * The default size of a value of the DecimalType is 4096 bytes.
+   */
+  override def defaultSize: Int = 4096
+}
+
+
+/** Extra factory methods and pattern matchers for Decimals */
+object DecimalType {
+  val Unlimited: DecimalType = DecimalType(None)
+
+  object Fixed {
+    def unapply(t: DecimalType): Option[(Int, Int)] =
+      t.precisionInfo.map(p => (p.precision, p.scale))
+  }
+
+  object Expression {
+    def unapply(e: Expression): Option[(Int, Int)] = e.dataType match {
+      case t: DecimalType => t.precisionInfo.map(p => (p.precision, p.scale))
+      case _ => None
+    }
+  }
+
+  def apply(): DecimalType = Unlimited
+
+  def apply(precision: Int, scale: Int): DecimalType =
+    DecimalType(Some(PrecisionInfo(precision, scale)))
+
+  def unapply(t: DataType): Boolean = t.isInstanceOf[DecimalType]
+
+  def unapply(e: Expression): Boolean = e.dataType.isInstanceOf[DecimalType]
+
+  def isFixed(dataType: DataType): Boolean = dataType match {
+    case DecimalType.Fixed(_, _) => true
+    case _ => false
+  }
+}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * The data type representing `Double` values. Please use the singleton [[DataTypes.DoubleType]].
+ *
+ * @group dataType
+ */
+@DeveloperApi
+case object DoubleType extends FractionalType {
+  private[sql] type JvmType = Double
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val numeric = implicitly[Numeric[Double]]
+  private[sql] val fractional = implicitly[Fractional[Double]]
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
+  private[sql] val asIntegral = DoubleAsIfIntegral
+
+  /**
+   * The default size of a value of the DoubleType is 8 bytes.
+   */
+  override def defaultSize: Int = 8
+}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * The data type representing `Float` values. Please use the singleton [[DataTypes.FloatType]].
+ *
+ * @group dataType
+ */
+@DeveloperApi
+case object FloatType extends FractionalType {
+  private[sql] type JvmType = Float
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
+  private[sql] val numeric = implicitly[Numeric[Float]]
+  private[sql] val fractional = implicitly[Fractional[Float]]
+  private[sql] val ordering = implicitly[Ordering[JvmType]]
+  private[sql] val asIntegral = FloatAsIfIntegral
+
+  /**
+   * The default size of a value of the FloatType is 4 bytes.
+   */
+  override def defaultSize: Int = 4
+}
+
+
+object ArrayType {
+  /** Construct a [[ArrayType]] object with the given element type. The `containsNull` is true. */
+  def apply(elementType: DataType): ArrayType = ArrayType(elementType, true)
+}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * The data type for collections of multiple values.
+ * Internally these are represented as columns that contain a ``scala.collection.Seq``.
+ *
+ * Please use [[DataTypes.createArrayType()]] to create a specific instance.
+ *
+ * An [[ArrayType]] object comprises two fields, `elementType: [[DataType]]` and
+ * `containsNull: Boolean`. The field of `elementType` is used to specify the type of
+ * array elements. The field of `containsNull` is used to specify if the array has `null` values.
+ *
+ * @param elementType The data type of values.
+ * @param containsNull Indicates if values have `null` values
+ *
+ * @group dataType
+ */
+@DeveloperApi
+case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataType {
+  private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
+    builder.append(
+      s"$prefix-- element: ${elementType.typeName} (containsNull = $containsNull)\n")
+    DataType.buildFormattedString(elementType, s"$prefix    |", builder)
+  }
+
+  override private[sql] def jsonValue =
+    ("type" -> typeName) ~
+      ("elementType" -> elementType.jsonValue) ~
+      ("containsNull" -> containsNull)
+
+  /**
+   * The default size of a value of the ArrayType is 100 * the default size of the element type.
+   * (We assume that there are 100 elements).
+   */
+  override def defaultSize: Int = 100 * elementType.defaultSize
+}
+
+
+/**
+ * A field inside a StructType.
+ *
+ * @param name The name of this field.
+ * @param dataType The data type of this field.
+ * @param nullable Indicates if values of this field can be `null` values.
+ * @param metadata The metadata of this field. The metadata should be preserved during
+ *                 transformation if the content of the column is not modified, e.g, in selection.
+ */
+case class StructField(
+    name: String,
+    dataType: DataType,
+    nullable: Boolean = true,
+    metadata: Metadata = Metadata.empty) {
+
+  private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
+    builder.append(s"$prefix-- $name: ${dataType.typeName} (nullable = $nullable)\n")
+    DataType.buildFormattedString(dataType, s"$prefix    |", builder)
+  }
+
+  // override the default toString to be compatible with legacy parquet files.
+  override def toString: String = s"StructField($name,$dataType,$nullable)"
+
+  private[sql] def jsonValue: JValue = {
+    ("name" -> name) ~
+      ("type" -> dataType.jsonValue) ~
+      ("nullable" -> nullable) ~
+      ("metadata" -> metadata.jsonValue)
+  }
+}
+
+
+object StructType {
+  protected[sql] def fromAttributes(attributes: Seq[Attribute]): StructType =
+    StructType(attributes.map(a => StructField(a.name, a.dataType, a.nullable, a.metadata)))
+
+  def apply(fields: Seq[StructField]): StructType = StructType(fields.toArray)
+
+  def apply(fields: java.util.List[StructField]): StructType = {
+    StructType(fields.toArray.asInstanceOf[Array[StructField]])
+  }
+}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * A [[StructType]] object can be constructed by
+ * {{{
+ * StructType(fields: Seq[StructField])
+ * }}}
+ * For a [[StructType]] object, one or multiple [[StructField]]s can be extracted by names.
+ * If multiple [[StructField]]s are extracted, a [[StructType]] object will be returned.
+ * If a provided name does not have a matching field, it will be ignored. For the case
+ * of extracting a single StructField, a `null` will be returned.
+ * Example:
+ * {{{
+ * import org.apache.spark.sql._
+ *
+ * val struct =
+ *   StructType(
+ *     StructField("a", IntegerType, true) ::
+ *     StructField("b", LongType, false) ::
+ *     StructField("c", BooleanType, false) :: Nil)
+ *
+ * // Extract a single StructField.
+ * val singleField = struct("b")
+ * // singleField: StructField = StructField(b,LongType,false)
+ *
+ * // This struct does not have a field called "d". null will be returned.
+ * val nonExisting = struct("d")
+ * // nonExisting: StructField = null
+ *
+ * // Extract multiple StructFields. Field names are provided in a set.
+ * // A StructType object will be returned.
+ * val twoFields = struct(Set("b", "c"))
+ * // twoFields: StructType =
+ * //   StructType(List(StructField(b,LongType,false), StructField(c,BooleanType,false)))
+ *
+ * // Any names without matching fields will be ignored.
+ * // For the case shown below, "d" will be ignored and
+ * // it is treated as struct(Set("b", "c")).
+ * val ignoreNonExisting = struct(Set("b", "c", "d"))
+ * // ignoreNonExisting: StructType =
+ * //   StructType(List(StructField(b,LongType,false), StructField(c,BooleanType,false)))
+ * }}}
+ *
+ * A [[org.apache.spark.sql.Row]] object is used as a value of the StructType.
+ * Example:
+ * {{{
+ * import org.apache.spark.sql._
+ *
+ * val innerStruct =
+ *   StructType(
+ *     StructField("f1", IntegerType, true) ::
+ *     StructField("f2", LongType, false) ::
+ *     StructField("f3", BooleanType, false) :: Nil)
+ *
+ * val struct = StructType(
+ *   StructField("a", innerStruct, true) :: Nil)
+ *
+ * // Create a Row with the schema defined by struct
+ * val row = Row(Row(1, 2, true))
+ * // row: Row = [[1,2,true]]
+ * }}}
+ *
+ * @group dataType
+ */
+@DeveloperApi
+case class StructType(fields: Array[StructField]) extends DataType with Seq[StructField] {
+
+  /** Returns all field names in an array. */
+  def fieldNames: Array[String] = fields.map(_.name)
+
+  private lazy val fieldNamesSet: Set[String] = fieldNames.toSet
+  private lazy val nameToField: Map[String, StructField] = fields.map(f => f.name -> f).toMap
+
+  /**
+   * Extracts a [[StructField]] of the given name. If the [[StructType]] object does not
+   * have a name matching the given name, `null` will be returned.
+   */
+  def apply(name: String): StructField = {
+    nameToField.getOrElse(name,
+      throw new IllegalArgumentException(s"""Field "$name" does not exist."""))
+  }
+
+  /**
+   * Returns a [[StructType]] containing [[StructField]]s of the given names, preserving the
+   * original order of fields. Those names which do not have matching fields will be ignored.
+   */
+  def apply(names: Set[String]): StructType = {
+    val nonExistFields = names -- fieldNamesSet
+    if (nonExistFields.nonEmpty) {
+      throw new IllegalArgumentException(
+        s"Field ${nonExistFields.mkString(",")} does not exist.")
+    }
+    // Preserve the original order of fields.
+    StructType(fields.filter(f => names.contains(f.name)))
+  }
+
+  protected[sql] def toAttributes: Seq[AttributeReference] =
+    map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)())
+
+  def treeString: String = {
+    val builder = new StringBuilder
+    builder.append("root\n")
+    val prefix = " |"
+    fields.foreach(field => field.buildFormattedString(prefix, builder))
+
+    builder.toString()
+  }
+
+  def printTreeString(): Unit = println(treeString)
+
+  private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
+    fields.foreach(field => field.buildFormattedString(prefix, builder))
+  }
+
+  override private[sql] def jsonValue =
+    ("type" -> typeName) ~
+      ("fields" -> map(_.jsonValue))
+
+  override def apply(fieldIndex: Int): StructField = fields(fieldIndex)
+
+  override def length: Int = fields.length
+
+  override def iterator: Iterator[StructField] = fields.iterator
+
+  /**
+   * The default size of a value of the StructType is the total default sizes of all field types.
+   */
+  override def defaultSize: Int = fields.map(_.dataType.defaultSize).sum
+}
+
+
+object MapType {
+  /**
+   * Construct a [[MapType]] object with the given key type and value type.
+   * The `valueContainsNull` is true.
+   */
+  def apply(keyType: DataType, valueType: DataType): MapType =
+    MapType(keyType: DataType, valueType: DataType, valueContainsNull = true)
+}
+
+
+/**
+ * :: DeveloperApi ::
+ *
+ * The data type for Maps. Keys in a map are not allowed to have `null` values.
+ *
+ * Please use [[DataTypes.createMapType()]] to create a specific instance.
+ *
+ * @param keyType The data type of map keys.
+ * @param valueType The data type of map values.
+ * @param valueContainsNull Indicates if map values have `null` values.
+ *
+ * @group dataType
+ */
+case class MapType(
+    keyType: DataType,
+    valueType: DataType,
+    valueContainsNull: Boolean) extends DataType {
+  private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
+    builder.append(s"$prefix-- key: ${keyType.typeName}\n")
+    builder.append(s"$prefix-- value: ${valueType.typeName} " +
+      s"(valueContainsNull = $valueContainsNull)\n")
+    DataType.buildFormattedString(keyType, s"$prefix    |", builder)
+    DataType.buildFormattedString(valueType, s"$prefix    |", builder)
+  }
+
+  override private[sql] def jsonValue: JValue =
+    ("type" -> typeName) ~
+      ("keyType" -> keyType.jsonValue) ~
+      ("valueType" -> valueType.jsonValue) ~
+      ("valueContainsNull" -> valueContainsNull)
+
+  /**
+   * The default size of a value of the MapType is
+   * 100 * (the default size of the key type + the default size of the value type).
+   * (We assume that there are 100 elements).
+   */
+  override def defaultSize: Int = 100 * (keyType.defaultSize + valueType.defaultSize)
+}
+
+
+/**
+ * ::DeveloperApi::
+ * The data type for User Defined Types (UDTs).
+ *
+ * This interface allows a user to make their own classes more interoperable with SparkSQL;
+ * e.g., by creating a [[UserDefinedType]] for a class X, it becomes possible to create
+ * a `DataFrame` which has class X in the schema.
+ *
+ * For SparkSQL to recognize UDTs, the UDT must be annotated with
+ * [[SQLUserDefinedType]].
+ *
+ * The conversion via `serialize` occurs when instantiating a `DataFrame` from another RDD.
+ * The conversion via `deserialize` occurs when reading from a `DataFrame`.
+ */
+@DeveloperApi
+abstract class UserDefinedType[UserType] extends DataType with Serializable {
+
+  /** Underlying storage type for this UDT */
+  def sqlType: DataType
+
+  /** Paired Python UDT class, if exists. */
+  def pyUDT: String = null
+
+  /**
+   * Convert the user type to a SQL datum
+   *
+   * TODO: Can we make this take obj: UserType?  The issue is in ScalaReflection.convertToCatalyst,
+   *       where we need to convert Any to UserType.
+   */
+  def serialize(obj: Any): Any
+
+  /** Convert a SQL datum to the user type */
+  def deserialize(datum: Any): UserType
+
+  override private[sql] def jsonValue: JValue = {
+    ("type" -> "udt") ~
+      ("class" -> this.getClass.getName) ~
+      ("pyClass" -> pyUDT) ~
+      ("sqlType" -> sqlType.jsonValue)
+  }
+
+  /**
+   * Class object for the UserType
+   */
+  def userClass: java.lang.Class[UserType]
+
+  /**
+   * The default size of a value of the UserDefinedType is 4096 bytes.
+   */
+  override def defaultSize: Int = 4096
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala
new file mode 100644
index 0000000000000..346a51ea10c82
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+/**
+ * Contains a type system for attributes produced by relations, including complex types like
+ * structs, arrays and maps.
+ */
+package object types
diff --git a/sql/catalyst/src/test/resources/log4j.properties b/sql/catalyst/src/test/resources/log4j.properties
new file mode 100644
index 0000000000000..287c8e3563503
--- /dev/null
+++ b/sql/catalyst/src/test/resources/log4j.properties
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file target/unit-tests.log
+log4j.rootCategory=INFO, file
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=true
+log4j.appender.file.file=target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
+org.eclipse.jetty.LEVEL=WARN
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
index 488e373854bb3..d0f547d187ecb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
@@ -18,11 +18,12 @@
 package org.apache.spark.sql.catalyst
 
 import java.math.BigInteger
-import java.sql.Timestamp
+import java.sql.{Date, Timestamp}
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.types._
 
 case class PrimitiveData(
     intField: Int,
@@ -42,7 +43,8 @@ case class NullableData(
     byteField: java.lang.Byte,
     booleanField: java.lang.Boolean,
     stringField: String,
-    decimalField: BigDecimal,
+    decimalField: java.math.BigDecimal,
+    dateField: Date,
     timestampField: Timestamp,
     binaryField: Array[Byte])
 
@@ -58,14 +60,20 @@ case class OptionalData(
 
 case class ComplexData(
     arrayField: Seq[Int],
+    arrayField1: Array[Int],
     arrayFieldContainsNull: Seq[java.lang.Integer],
     mapField: Map[Int, Long],
     mapFieldValueContainsNull: Map[Int, java.lang.Long],
-    structField: PrimitiveData)
+    structField: PrimitiveData,
+    nestedArrayField: Array[Array[Int]])
 
 case class GenericData[A](
     genericField: A)
 
+case class MultipleConstructorsData(a: Int, b: String, c: Double) {
+  def this(b: String, a: Int) = this(a, b, c = 1.0)
+}
+
 class ScalaReflectionSuite extends FunSuite {
   import ScalaReflection._
 
@@ -95,7 +103,8 @@ class ScalaReflectionSuite extends FunSuite {
         StructField("byteField", ByteType, nullable = true),
         StructField("booleanField", BooleanType, nullable = true),
         StructField("stringField", StringType, nullable = true),
-        StructField("decimalField", DecimalType, nullable = true),
+        StructField("decimalField", DecimalType.Unlimited, nullable = true),
+        StructField("dateField", DateType, nullable = true),
         StructField("timestampField", TimestampType, nullable = true),
         StructField("binaryField", BinaryType, nullable = true))),
       nullable = true))
@@ -124,6 +133,10 @@ class ScalaReflectionSuite extends FunSuite {
           "arrayField",
           ArrayType(IntegerType, containsNull = false),
           nullable = true),
+        StructField(
+          "arrayField1",
+          ArrayType(IntegerType, containsNull = false),
+          nullable = true),
         StructField(
           "arrayFieldContainsNull",
           ArrayType(IntegerType, containsNull = true),
@@ -146,7 +159,10 @@ class ScalaReflectionSuite extends FunSuite {
             StructField("shortField", ShortType, nullable = false),
             StructField("byteField", ByteType, nullable = false),
             StructField("booleanField", BooleanType, nullable = false))),
-          nullable = true))),
+          nullable = true),
+        StructField(
+          "nestedArrayField",
+          ArrayType(ArrayType(IntegerType, containsNull = false), containsNull = true)))),
       nullable = true))
   }
 
@@ -197,28 +213,32 @@ class ScalaReflectionSuite extends FunSuite {
     assert(DoubleType === typeOfObject(1.7976931348623157E308))
 
     // DecimalType
-    assert(DecimalType === typeOfObject(BigDecimal("1.7976931348623157E318")))
+    assert(DecimalType.Unlimited ===
+      typeOfObject(new java.math.BigDecimal("1.7976931348623157E318")))
+
+    // DateType
+    assert(DateType === typeOfObject(Date.valueOf("2014-07-25")))
 
     // TimestampType
-    assert(TimestampType === typeOfObject(java.sql.Timestamp.valueOf("2014-07-25 10:26:00")))
+    assert(TimestampType === typeOfObject(Timestamp.valueOf("2014-07-25 10:26:00")))
 
     // NullType
     assert(NullType === typeOfObject(null))
 
     def typeOfObject1: PartialFunction[Any, DataType] = typeOfObject orElse {
-      case value: java.math.BigInteger => DecimalType
-      case value: java.math.BigDecimal => DecimalType
+      case value: java.math.BigInteger => DecimalType.Unlimited
+      case value: java.math.BigDecimal => DecimalType.Unlimited
       case _ => StringType
     }
 
-    assert(DecimalType === typeOfObject1(
+    assert(DecimalType.Unlimited === typeOfObject1(
       new BigInteger("92233720368547758070")))
-    assert(DecimalType === typeOfObject1(
+    assert(DecimalType.Unlimited === typeOfObject1(
       new java.math.BigDecimal("1.7976931348623157E318")))
     assert(StringType === typeOfObject1(BigInt("92233720368547758070")))
 
     def typeOfObject2: PartialFunction[Any, DataType] = typeOfObject orElse {
-      case value: java.math.BigInteger => DecimalType
+      case value: java.math.BigInteger => DecimalType.Unlimited
     }
 
     intercept[MatchError](typeOfObject2(BigInt("92233720368547758070")))
@@ -233,14 +253,28 @@ class ScalaReflectionSuite extends FunSuite {
 
   test("convert PrimitiveData to catalyst") {
     val data = PrimitiveData(1, 1, 1, 1, 1, 1, true)
-    val convertedData = Seq(1, 1.toLong, 1.toDouble, 1.toFloat, 1.toShort, 1.toByte, true)
-    assert(convertToCatalyst(data) === convertedData)
+    val convertedData = Row(1, 1.toLong, 1.toDouble, 1.toFloat, 1.toShort, 1.toByte, true)
+    val dataType = schemaFor[PrimitiveData].dataType
+    assert(convertToCatalyst(data, dataType) === convertedData)
   }
 
   test("convert Option[Product] to catalyst") {
     val primitiveData = PrimitiveData(1, 1, 1, 1, 1, 1, true)
-    val data = OptionalData(Some(1), Some(1), Some(1), Some(1), Some(1), Some(1), Some(true), Some(primitiveData))
-    val convertedData = Seq(1, 1.toLong, 1.toDouble, 1.toFloat, 1.toShort, 1.toByte, true, convertToCatalyst(primitiveData))
-    assert(convertToCatalyst(data) === convertedData)
+    val data = OptionalData(Some(2), Some(2), Some(2), Some(2), Some(2), Some(2), Some(true),
+      Some(primitiveData))
+    val dataType = schemaFor[OptionalData].dataType
+    val convertedData = Row(2, 2.toLong, 2.toDouble, 2.toFloat, 2.toShort, 2.toByte, true,
+      Row(1, 1, 1, 1, 1, 1, true))
+    assert(convertToCatalyst(data, dataType) === convertedData)
+  }
+
+  test("infer schema from case class with multiple constructors") {
+    val dataType = schemaFor[MultipleConstructorsData].dataType
+    dataType match {
+      case s: StructType =>
+        // Schema should have order: a: Int, b: String, c: Double
+        assert(s.fieldNames === Seq("a", "b", "c"))
+        assert(s.fields.map(_.dataType) === Seq(IntegerType, StringType, DoubleType))
+    }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala
new file mode 100644
index 0000000000000..1a0a0e6154ad2
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.Command
+import org.scalatest.FunSuite
+
+private[sql] case class TestCommand(cmd: String) extends Command
+
+private[sql] class SuperLongKeywordTestParser extends AbstractSparkSQLParser {
+  protected val EXECUTE   = Keyword("THISISASUPERLONGKEYWORDTEST")
+
+  override protected lazy val start: Parser[LogicalPlan] = set
+
+  private lazy val set: Parser[LogicalPlan] =
+    EXECUTE ~> ident ^^ {
+      case fileName => TestCommand(fileName)
+    }
+}
+
+private[sql] class CaseInsensitiveTestParser extends AbstractSparkSQLParser {
+  protected val EXECUTE   = Keyword("EXECUTE")
+
+  override protected lazy val start: Parser[LogicalPlan] = set
+
+  private lazy val set: Parser[LogicalPlan] =
+    EXECUTE ~> ident ^^ {
+      case fileName => TestCommand(fileName)
+    }
+}
+
+class SqlParserSuite extends FunSuite {
+
+  test("test long keyword") {
+    val parser = new SuperLongKeywordTestParser
+    assert(TestCommand("NotRealCommand") === parser("ThisIsASuperLongKeyWordTest NotRealCommand"))
+  }
+
+  test("test case insensitive") {
+    val parser = new CaseInsensitiveTestParser
+    assert(TestCommand("NotRealCommand") === parser("EXECUTE NotRealCommand"))
+    assert(TestCommand("NotRealCommand") === parser("execute NotRealCommand"))
+    assert(TestCommand("NotRealCommand") === parser("exEcute NotRealCommand"))
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 7b45738c4fc95..60060bf02913b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -19,11 +19,13 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.scalatest.{BeforeAndAfter, FunSuite}
 
-import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference}
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
 
 class AnalysisSuite extends FunSuite with BeforeAndAfter {
   val caseSensitiveCatalog = new SimpleCatalog(true)
@@ -38,12 +40,22 @@ class AnalysisSuite extends FunSuite with BeforeAndAfter {
     AttributeReference("a", StringType)(),
     AttributeReference("b", StringType)(),
     AttributeReference("c", DoubleType)(),
-    AttributeReference("d", DecimalType)(),
+    AttributeReference("d", DecimalType.Unlimited)(),
     AttributeReference("e", ShortType)())
 
   before {
-    caseSensitiveCatalog.registerTable(None, "TaBlE", testRelation)
-    caseInsensitiveCatalog.registerTable(None, "TaBlE", testRelation)
+    caseSensitiveCatalog.registerTable(Seq("TaBlE"), testRelation)
+    caseInsensitiveCatalog.registerTable(Seq("TaBlE"), testRelation)
+  }
+
+  test("union project *") {
+    val plan = (1 to 100)
+      .map(_ => testRelation)
+      .fold[LogicalPlan](testRelation) { (a, b) =>
+        a.select(UnresolvedStar(None)).select('a).unionAll(b.select(UnresolvedStar(None)))
+      }
+
+    assert(caseInsensitiveAnalyze(plan).resolved)
   }
 
   test("analyze project") {
@@ -54,45 +66,45 @@ class AnalysisSuite extends FunSuite with BeforeAndAfter {
     assert(
       caseSensitiveAnalyze(
         Project(Seq(UnresolvedAttribute("TbL.a")),
-          UnresolvedRelation(None, "TaBlE", Some("TbL")))) ===
+          UnresolvedRelation(Seq("TaBlE"), Some("TbL")))) ===
         Project(testRelation.output, testRelation))
 
     val e = intercept[TreeNodeException[_]] {
       caseSensitiveAnalyze(
         Project(Seq(UnresolvedAttribute("tBl.a")),
-          UnresolvedRelation(None, "TaBlE", Some("TbL"))))
+          UnresolvedRelation(Seq("TaBlE"), Some("TbL"))))
     }
     assert(e.getMessage().toLowerCase.contains("unresolved"))
 
     assert(
       caseInsensitiveAnalyze(
         Project(Seq(UnresolvedAttribute("TbL.a")),
-          UnresolvedRelation(None, "TaBlE", Some("TbL")))) ===
+          UnresolvedRelation(Seq("TaBlE"), Some("TbL")))) ===
         Project(testRelation.output, testRelation))
 
     assert(
       caseInsensitiveAnalyze(
         Project(Seq(UnresolvedAttribute("tBl.a")),
-          UnresolvedRelation(None, "TaBlE", Some("TbL")))) ===
+          UnresolvedRelation(Seq("TaBlE"), Some("TbL")))) ===
         Project(testRelation.output, testRelation))
   }
 
   test("resolve relations") {
     val e = intercept[RuntimeException] {
-      caseSensitiveAnalyze(UnresolvedRelation(None, "tAbLe", None))
+      caseSensitiveAnalyze(UnresolvedRelation(Seq("tAbLe"), None))
     }
     assert(e.getMessage == "Table Not Found: tAbLe")
 
     assert(
-      caseSensitiveAnalyze(UnresolvedRelation(None, "TaBlE", None)) ===
+      caseSensitiveAnalyze(UnresolvedRelation(Seq("TaBlE"), None)) ===
         testRelation)
 
     assert(
-      caseInsensitiveAnalyze(UnresolvedRelation(None, "tAbLe", None)) ===
+      caseInsensitiveAnalyze(UnresolvedRelation(Seq("tAbLe"), None)) ===
         testRelation)
 
     assert(
-      caseInsensitiveAnalyze(UnresolvedRelation(None, "TaBlE", None)) ===
+      caseInsensitiveAnalyze(UnresolvedRelation(Seq("TaBlE"), None)) ===
         testRelation)
   }
 
@@ -119,7 +131,7 @@ class AnalysisSuite extends FunSuite with BeforeAndAfter {
       AttributeReference("a", StringType)(),
       AttributeReference("b", StringType)(),
       AttributeReference("c", DoubleType)(),
-      AttributeReference("d", DecimalType)(),
+      AttributeReference("d", DecimalType.Unlimited)(),
       AttributeReference("e", ShortType)())
 
     val expr0 = 'a / 2
@@ -137,7 +149,7 @@ class AnalysisSuite extends FunSuite with BeforeAndAfter {
     assert(pl(0).dataType == DoubleType)
     assert(pl(1).dataType == DoubleType)
     assert(pl(2).dataType == DoubleType)
-    assert(pl(3).dataType == DecimalType)
+    assert(pl(3).dataType == DecimalType.Unlimited)
     assert(pl(4).dataType == DoubleType)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
new file mode 100644
index 0000000000000..bc2ec754d5865
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.{Project, LocalRelation}
+import org.apache.spark.sql.types._
+import org.scalatest.{BeforeAndAfter, FunSuite}
+
+class DecimalPrecisionSuite extends FunSuite with BeforeAndAfter {
+  val catalog = new SimpleCatalog(false)
+  val analyzer = new Analyzer(catalog, EmptyFunctionRegistry, caseSensitive = false)
+
+  val relation = LocalRelation(
+    AttributeReference("i", IntegerType)(),
+    AttributeReference("d1", DecimalType(2, 1))(),
+    AttributeReference("d2", DecimalType(5, 2))(),
+    AttributeReference("u", DecimalType.Unlimited)(),
+    AttributeReference("f", FloatType)()
+  )
+
+  val i: Expression = UnresolvedAttribute("i")
+  val d1: Expression = UnresolvedAttribute("d1")
+  val d2: Expression = UnresolvedAttribute("d2")
+  val u: Expression = UnresolvedAttribute("u")
+  val f: Expression = UnresolvedAttribute("f")
+
+  before {
+    catalog.registerTable(Seq("table"), relation)
+  }
+
+  private def checkType(expression: Expression, expectedType: DataType): Unit = {
+    val plan = Project(Seq(Alias(expression, "c")()), relation)
+    assert(analyzer(plan).schema.fields(0).dataType === expectedType)
+  }
+
+  private def checkComparison(expression: Expression, expectedType: DataType): Unit = {
+    val plan = Project(Alias(expression, "c")() :: Nil, relation)
+    val comparison = analyzer(plan).collect {
+      case Project(Alias(e: BinaryComparison, _) :: Nil, _) => e
+    }.head
+    assert(comparison.left.dataType === expectedType)
+    assert(comparison.right.dataType === expectedType)
+  }
+
+  test("basic operations") {
+    checkType(Add(d1, d2), DecimalType(6, 2))
+    checkType(Subtract(d1, d2), DecimalType(6, 2))
+    checkType(Multiply(d1, d2), DecimalType(8, 3))
+    checkType(Divide(d1, d2), DecimalType(10, 7))
+    checkType(Divide(d2, d1), DecimalType(10, 6))
+    checkType(Remainder(d1, d2), DecimalType(3, 2))
+    checkType(Remainder(d2, d1), DecimalType(3, 2))
+    checkType(Sum(d1), DecimalType(12, 1))
+    checkType(Average(d1), DecimalType(6, 5))
+
+    checkType(Add(Add(d1, d2), d1), DecimalType(7, 2))
+    checkType(Add(Add(Add(d1, d2), d1), d2), DecimalType(8, 2))
+    checkType(Add(Add(d1, d2), Add(d1, d2)), DecimalType(7, 2))
+  }
+
+  test("Comparison operations") {
+    checkComparison(LessThan(i, d1), DecimalType.Unlimited)
+    checkComparison(LessThanOrEqual(d1, d2), DecimalType.Unlimited)
+    checkComparison(GreaterThan(d2, u), DecimalType.Unlimited)
+    checkComparison(GreaterThanOrEqual(d1, f), DoubleType)
+    checkComparison(GreaterThan(d2, d2), DecimalType(5, 2))
+  }
+
+  test("bringing in primitive types") {
+    checkType(Add(d1, i), DecimalType(12, 1))
+    checkType(Add(d1, f), DoubleType)
+    checkType(Add(i, d1), DecimalType(12, 1))
+    checkType(Add(f, d1), DoubleType)
+    checkType(Add(d1, Cast(i, LongType)), DecimalType(22, 1))
+    checkType(Add(d1, Cast(i, ShortType)), DecimalType(7, 1))
+    checkType(Add(d1, Cast(i, ByteType)), DecimalType(5, 1))
+    checkType(Add(d1, Cast(i, DoubleType)), DoubleType)
+  }
+
+  test("unlimited decimals make everything else cast up") {
+    for (expr <- Seq(d1, d2, i, f, u)) {
+      checkType(Add(expr, u), DecimalType.Unlimited)
+      checkType(Subtract(expr, u), DecimalType.Unlimited)
+      checkType(Multiply(expr, u), DecimalType.Unlimited)
+      checkType(Divide(expr, u), DecimalType.Unlimited)
+      checkType(Remainder(expr, u), DecimalType.Unlimited)
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
index baeb9b0cf5964..85798d0871fda 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
@@ -21,7 +21,7 @@ import org.scalatest.FunSuite
 
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 class HiveTypeCoercionSuite extends FunSuite {
 
@@ -68,6 +68,21 @@ class HiveTypeCoercionSuite extends FunSuite {
     widenTest(LongType, FloatType, Some(FloatType))
     widenTest(LongType, DoubleType, Some(DoubleType))
 
+    // Casting up to unlimited-precision decimal
+    widenTest(IntegerType, DecimalType.Unlimited, Some(DecimalType.Unlimited))
+    widenTest(DoubleType, DecimalType.Unlimited, Some(DecimalType.Unlimited))
+    widenTest(DecimalType(3, 2), DecimalType.Unlimited, Some(DecimalType.Unlimited))
+    widenTest(DecimalType.Unlimited, IntegerType, Some(DecimalType.Unlimited))
+    widenTest(DecimalType.Unlimited, DoubleType, Some(DecimalType.Unlimited))
+    widenTest(DecimalType.Unlimited, DecimalType(3, 2), Some(DecimalType.Unlimited))
+
+    // No up-casting for fixed-precision decimal (this is handled by arithmetic rules)
+    widenTest(DecimalType(2, 1), DecimalType(3, 2), None)
+    widenTest(DecimalType(2, 1), DoubleType, None)
+    widenTest(DecimalType(2, 1), IntegerType, None)
+    widenTest(DoubleType, DecimalType(2, 1), None)
+    widenTest(IntegerType, DecimalType(2, 1), None)
+
     // StringType
     widenTest(NullType, StringType, Some(StringType))
     widenTest(StringType, StringType, Some(StringType))
@@ -92,11 +107,38 @@ class HiveTypeCoercionSuite extends FunSuite {
     def ruleTest(initial: Expression, transformed: Expression) {
       val testRelation = LocalRelation(AttributeReference("a", IntegerType)())
       assert(booleanCasts(Project(Seq(Alias(initial, "a")()), testRelation)) ==
-        Project(Seq(Alias(transformed, "a")()), testRelation))      
+        Project(Seq(Alias(transformed, "a")()), testRelation))
     }
     // Remove superflous boolean -> boolean casts.
     ruleTest(Cast(Literal(true), BooleanType), Literal(true))
     // Stringify boolean when casting to string.
     ruleTest(Cast(Literal(false), StringType), If(Literal(false), Literal("true"), Literal("false")))
   }
+
+  test("coalesce casts") {
+    val fac = new HiveTypeCoercion { }.FunctionArgumentConversion
+    def ruleTest(initial: Expression, transformed: Expression) {
+      val testRelation = LocalRelation(AttributeReference("a", IntegerType)())
+      assert(fac(Project(Seq(Alias(initial, "a")()), testRelation)) ==
+        Project(Seq(Alias(transformed, "a")()), testRelation))
+    }
+    ruleTest(
+      Coalesce(Literal(1.0)
+        :: Literal(1)
+        :: Literal(1.0, FloatType)
+        :: Nil),
+      Coalesce(Cast(Literal(1.0), DoubleType)
+        :: Cast(Literal(1), DoubleType)
+        :: Cast(Literal(1.0, FloatType), DoubleType)
+        :: Nil))
+    ruleTest(
+      Coalesce(Literal(1L)
+        :: Literal(1)
+        :: Literal(new java.math.BigDecimal("1000000000000000000000"))
+        :: Nil),
+      Coalesce(Cast(Literal(1L), DecimalType())
+        :: Cast(Literal(1), DecimalType())
+        :: Cast(Literal(new java.math.BigDecimal("1000000000000000000000")), DecimalType())
+        :: Nil))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index 6dc5942023f9e..25d1c105a00a6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -21,15 +21,13 @@ import java.sql.{Date, Timestamp}
 
 import scala.collection.immutable.HashSet
 
+import org.scalactic.TripleEqualsSupport.Spread
 import org.scalatest.FunSuite
 import org.scalatest.Matchers._
-import org.scalautils.TripleEqualsSupport.Spread
 
-import org.apache.spark.sql.catalyst.types._
-
-
-/* Implicit conversions */
 import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.types._
+
 
 class ExpressionEvaluationSuite extends FunSuite {
 
@@ -41,6 +39,21 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation(Literal(1) + Literal(1), 2)
   }
 
+  test("unary BitwiseNOT") {
+    checkEvaluation(BitwiseNot(1), -2)
+    assert(BitwiseNot(1).dataType === IntegerType)
+    assert(BitwiseNot(1).eval(EmptyRow).isInstanceOf[Int])
+    checkEvaluation(BitwiseNot(1.toLong), -2.toLong)
+    assert(BitwiseNot(1.toLong).dataType === LongType)
+    assert(BitwiseNot(1.toLong).eval(EmptyRow).isInstanceOf[Long])
+    checkEvaluation(BitwiseNot(1.toShort), -2.toShort)
+    assert(BitwiseNot(1.toShort).dataType === ShortType)
+    assert(BitwiseNot(1.toShort).eval(EmptyRow).isInstanceOf[Short])
+    checkEvaluation(BitwiseNot(1.toByte), -2.toByte)
+    assert(BitwiseNot(1.toByte).dataType === ByteType)
+    assert(BitwiseNot(1.toByte).eval(EmptyRow).isInstanceOf[Byte])
+  }
+
   /**
    * Checks for three-valued-logic.  Based on:
    * http://en.wikipedia.org/wiki/Null_(SQL)#Comparisons_with_NULL_and_the_three-valued_logic_.283VL.29
@@ -138,7 +151,7 @@ class ExpressionEvaluationSuite extends FunSuite {
     val actual = try evaluate(expression, inputRow) catch {
       case e: Exception => fail(s"Exception evaluating $expression", e)
     }
-    actual.asInstanceOf[Double] shouldBe expected 
+    actual.asInstanceOf[Double] shouldBe expected
   }
 
   test("IN") {
@@ -148,6 +161,36 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation(In(Literal(1), Seq(Literal(1), Literal(2))) && In(Literal(2), Seq(Literal(1), Literal(2))), true)
   }
 
+  test("Divide") {
+    checkEvaluation(Divide(Literal(2), Literal(1)), 2)
+    checkEvaluation(Divide(Literal(1.0), Literal(2.0)), 0.5)
+    checkEvaluation(Divide(Literal(1), Literal(2)), 0)
+    checkEvaluation(Divide(Literal(1), Literal(0)), null)
+    checkEvaluation(Divide(Literal(1.0), Literal(0.0)), null)
+    checkEvaluation(Divide(Literal(0.0), Literal(0.0)), null)
+    checkEvaluation(Divide(Literal(0), Literal(null, IntegerType)), null)
+    checkEvaluation(Divide(Literal(1), Literal(null, IntegerType)), null)
+    checkEvaluation(Divide(Literal(null, IntegerType), Literal(0)), null)
+    checkEvaluation(Divide(Literal(null, DoubleType), Literal(0.0)), null)
+    checkEvaluation(Divide(Literal(null, IntegerType), Literal(1)), null)
+    checkEvaluation(Divide(Literal(null, IntegerType), Literal(null, IntegerType)), null)
+  }
+
+  test("Remainder") {
+    checkEvaluation(Remainder(Literal(2), Literal(1)), 0)
+    checkEvaluation(Remainder(Literal(1.0), Literal(2.0)), 1.0)
+    checkEvaluation(Remainder(Literal(1), Literal(2)), 1)
+    checkEvaluation(Remainder(Literal(1), Literal(0)), null)
+    checkEvaluation(Remainder(Literal(1.0), Literal(0.0)), null)
+    checkEvaluation(Remainder(Literal(0.0), Literal(0.0)), null)
+    checkEvaluation(Remainder(Literal(0), Literal(null, IntegerType)), null)
+    checkEvaluation(Remainder(Literal(1), Literal(null, IntegerType)), null)
+    checkEvaluation(Remainder(Literal(null, IntegerType), Literal(0)), null)
+    checkEvaluation(Remainder(Literal(null, DoubleType), Literal(0.0)), null)
+    checkEvaluation(Remainder(Literal(null, IntegerType), Literal(1)), null)
+    checkEvaluation(Remainder(Literal(null, IntegerType), Literal(null, IntegerType)), null)
+  }
+
   test("INSET") {
     val hS = HashSet[Any]() + 1 + 2
     val nS = HashSet[Any]() + 1 + 2 + null
@@ -157,15 +200,15 @@ class ExpressionEvaluationSuite extends FunSuite {
     val nl = Literal(null)
     val s = Seq(one, two)
     val nullS = Seq(one, two, null)
-    checkEvaluation(InSet(one, hS, one +: s), true)
-    checkEvaluation(InSet(two, hS, two +: s), true)
-    checkEvaluation(InSet(two, nS, two +: nullS), true)
-    checkEvaluation(InSet(nl, nS, nl +: nullS), true)
-    checkEvaluation(InSet(three, hS, three +: s), false)
-    checkEvaluation(InSet(three, nS, three +: nullS), false)
-    checkEvaluation(InSet(one, hS, one +: s) && InSet(two, hS, two +: s), true)
-  }
- 
+    checkEvaluation(InSet(one, hS), true)
+    checkEvaluation(InSet(two, hS), true)
+    checkEvaluation(InSet(two, nS), true)
+    checkEvaluation(InSet(nl, nS), true)
+    checkEvaluation(InSet(three, hS), false)
+    checkEvaluation(InSet(three, nS), false)
+    checkEvaluation(InSet(one, hS) && InSet(two, hS), true)
+  }
+
   test("MaxOf") {
     checkEvaluation(MaxOf(1, 2), 2)
     checkEvaluation(MaxOf(2, 1), 2)
@@ -191,6 +234,9 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation("abc" like "a%", true)
     checkEvaluation("abc"  like "b%", false)
     checkEvaluation("abc"  like "bc%", false)
+    checkEvaluation("a\nb" like "a_b", true)
+    checkEvaluation("ab" like "a%b", true)
+    checkEvaluation("a\nb" like "a%b", true)
   }
 
   test("LIKE Non-literal Regular Expression") {
@@ -207,6 +253,9 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation("abc" like regEx, true, new GenericRow(Array[Any]("a%")))
     checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("b%")))
     checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("bc%")))
+    checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a_b")))
+    checkEvaluation("ab" like regEx, true, new GenericRow(Array[Any]("a%b")))
+    checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a%b")))
 
     checkEvaluation(Literal(null, StringType) like regEx, null, new GenericRow(Array[Any]("bc%")))
   }
@@ -254,14 +303,15 @@ class ExpressionEvaluationSuite extends FunSuite {
 
     val sd = "1970-01-01"
     val d = Date.valueOf(sd)
+    val zts = sd + " 00:00:00"
     val sts = sd + " 00:00:02"
     val nts = sts + ".1"
     val ts = Timestamp.valueOf(nts)
 
     checkEvaluation("abdef" cast StringType, "abdef")
-    checkEvaluation("abdef" cast DecimalType, null)
+    checkEvaluation("abdef" cast DecimalType.Unlimited, null)
     checkEvaluation("abdef" cast TimestampType, null)
-    checkEvaluation("12.65" cast DecimalType, BigDecimal(12.65))
+    checkEvaluation("12.65" cast DecimalType.Unlimited, Decimal(12.65))
 
     checkEvaluation(Literal(1) cast LongType, 1)
     checkEvaluation(Cast(Literal(1000) cast TimestampType, LongType), 1.toLong)
@@ -270,25 +320,25 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation(Cast(Literal(1.toDouble) cast TimestampType, DoubleType), 1.toDouble)
 
     checkEvaluation(Cast(Literal(sd) cast DateType, StringType), sd)
-    checkEvaluation(Cast(Literal(d) cast StringType, DateType), d)
+    checkEvaluation(Cast(Literal(d) cast StringType, DateType), 0)
     checkEvaluation(Cast(Literal(nts) cast TimestampType, StringType), nts)
     checkEvaluation(Cast(Literal(ts) cast StringType, TimestampType), ts)
     // all convert to string type to check
     checkEvaluation(
       Cast(Cast(Literal(nts) cast TimestampType, DateType), StringType), sd)
     checkEvaluation(
-      Cast(Cast(Literal(ts) cast DateType, TimestampType), StringType), sts)
+      Cast(Cast(Literal(ts) cast DateType, TimestampType), StringType), zts)
 
     checkEvaluation(Cast("abdef" cast BinaryType, StringType), "abdef")
 
     checkEvaluation(Cast(Cast(Cast(Cast(
       Cast("5" cast ByteType, ShortType), IntegerType), FloatType), DoubleType), LongType), 5)
-    checkEvaluation(Cast(Cast(Cast(Cast(
-      Cast("5" cast ByteType, TimestampType), DecimalType), LongType), StringType), ShortType), 0)
-    checkEvaluation(Cast(Cast(Cast(Cast(
-      Cast("5" cast TimestampType, ByteType), DecimalType), LongType), StringType), ShortType), null)
-    checkEvaluation(Cast(Cast(Cast(Cast(
-      Cast("5" cast DecimalType, ByteType), TimestampType), LongType), StringType), ShortType), 0)
+    checkEvaluation(Cast(Cast(Cast(Cast(Cast("5" cast
+      ByteType, TimestampType), DecimalType.Unlimited), LongType), StringType), ShortType), 0)
+    checkEvaluation(Cast(Cast(Cast(Cast(Cast("5" cast
+      TimestampType, ByteType), DecimalType.Unlimited), LongType), StringType), ShortType), null)
+    checkEvaluation(Cast(Cast(Cast(Cast(Cast("5" cast
+      DecimalType.Unlimited, ByteType), TimestampType), LongType), StringType), ShortType), 0)
     checkEvaluation(Literal(true) cast IntegerType, 1)
     checkEvaluation(Literal(false) cast IntegerType, 0)
     checkEvaluation(Cast(Literal(1) cast BooleanType, IntegerType), 1)
@@ -296,7 +346,7 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation("23" cast DoubleType, 23d)
     checkEvaluation("23" cast IntegerType, 23)
     checkEvaluation("23" cast FloatType, 23f)
-    checkEvaluation("23" cast DecimalType, 23: BigDecimal)
+    checkEvaluation("23" cast DecimalType.Unlimited, Decimal(23))
     checkEvaluation("23" cast ByteType, 23.toByte)
     checkEvaluation("23" cast ShortType, 23.toShort)
     checkEvaluation("2012-12-11" cast DoubleType, null)
@@ -305,7 +355,7 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation(Literal(23d) + Cast(true, DoubleType), 24d)
     checkEvaluation(Literal(23) + Cast(true, IntegerType), 24)
     checkEvaluation(Literal(23f) + Cast(true, FloatType), 24f)
-    checkEvaluation(Literal(BigDecimal(23)) + Cast(true, DecimalType), 24: BigDecimal)
+    checkEvaluation(Literal(Decimal(23)) + Cast(true, DecimalType.Unlimited), Decimal(24))
     checkEvaluation(Literal(23.toByte) + Cast(true, ByteType), 24.toByte)
     checkEvaluation(Literal(23.toShort) + Cast(true, ShortType), 24.toShort)
 
@@ -319,7 +369,8 @@ class ExpressionEvaluationSuite extends FunSuite {
     assert(("abcdef" cast IntegerType).nullable === true)
     assert(("abcdef" cast ShortType).nullable === true)
     assert(("abcdef" cast ByteType).nullable === true)
-    assert(("abcdef" cast DecimalType).nullable === true)
+    assert(("abcdef" cast DecimalType.Unlimited).nullable === true)
+    assert(("abcdef" cast DecimalType(4, 2)).nullable === true)
     assert(("abcdef" cast DoubleType).nullable === true)
     assert(("abcdef" cast FloatType).nullable === true)
 
@@ -327,11 +378,79 @@ class ExpressionEvaluationSuite extends FunSuite {
   }
 
   test("date") {
-    val d1 = Date.valueOf("1970-01-01")
-    val d2 = Date.valueOf("1970-01-02")
+    val d1 = DateUtils.fromJavaDate(Date.valueOf("1970-01-01"))
+    val d2 = DateUtils.fromJavaDate(Date.valueOf("1970-01-02"))
     checkEvaluation(Literal(d1) < Literal(d2), true)
   }
 
+  test("casting to fixed-precision decimals") {
+    // Overflow and rounding for casting to fixed-precision decimals:
+    // - Values should round with HALF_UP mode by default when you lower scale
+    // - Values that would overflow the target precision should turn into null
+    // - Because of this, casts to fixed-precision decimals should be nullable
+
+    assert(Cast(Literal(123), DecimalType.Unlimited).nullable === false)
+    assert(Cast(Literal(10.03f), DecimalType.Unlimited).nullable === true)
+    assert(Cast(Literal(10.03), DecimalType.Unlimited).nullable === true)
+    assert(Cast(Literal(Decimal(10.03)), DecimalType.Unlimited).nullable === false)
+
+    assert(Cast(Literal(123), DecimalType(2, 1)).nullable === true)
+    assert(Cast(Literal(10.03f), DecimalType(2, 1)).nullable === true)
+    assert(Cast(Literal(10.03), DecimalType(2, 1)).nullable === true)
+    assert(Cast(Literal(Decimal(10.03)), DecimalType(2, 1)).nullable === true)
+
+    checkEvaluation(Cast(Literal(123), DecimalType.Unlimited), Decimal(123))
+    checkEvaluation(Cast(Literal(123), DecimalType(3, 0)), Decimal(123))
+    checkEvaluation(Cast(Literal(123), DecimalType(3, 1)), null)
+    checkEvaluation(Cast(Literal(123), DecimalType(2, 0)), null)
+
+    checkEvaluation(Cast(Literal(10.03), DecimalType.Unlimited), Decimal(10.03))
+    checkEvaluation(Cast(Literal(10.03), DecimalType(4, 2)), Decimal(10.03))
+    checkEvaluation(Cast(Literal(10.03), DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(Cast(Literal(10.03), DecimalType(2, 0)), Decimal(10))
+    checkEvaluation(Cast(Literal(10.03), DecimalType(1, 0)), null)
+    checkEvaluation(Cast(Literal(10.03), DecimalType(2, 1)), null)
+    checkEvaluation(Cast(Literal(10.03), DecimalType(3, 2)), null)
+    checkEvaluation(Cast(Literal(Decimal(10.03)), DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(Cast(Literal(Decimal(10.03)), DecimalType(3, 2)), null)
+
+    checkEvaluation(Cast(Literal(10.05), DecimalType.Unlimited), Decimal(10.05))
+    checkEvaluation(Cast(Literal(10.05), DecimalType(4, 2)), Decimal(10.05))
+    checkEvaluation(Cast(Literal(10.05), DecimalType(3, 1)), Decimal(10.1))
+    checkEvaluation(Cast(Literal(10.05), DecimalType(2, 0)), Decimal(10))
+    checkEvaluation(Cast(Literal(10.05), DecimalType(1, 0)), null)
+    checkEvaluation(Cast(Literal(10.05), DecimalType(2, 1)), null)
+    checkEvaluation(Cast(Literal(10.05), DecimalType(3, 2)), null)
+    checkEvaluation(Cast(Literal(Decimal(10.05)), DecimalType(3, 1)), Decimal(10.1))
+    checkEvaluation(Cast(Literal(Decimal(10.05)), DecimalType(3, 2)), null)
+
+    checkEvaluation(Cast(Literal(9.95), DecimalType(3, 2)), Decimal(9.95))
+    checkEvaluation(Cast(Literal(9.95), DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(Cast(Literal(9.95), DecimalType(2, 0)), Decimal(10))
+    checkEvaluation(Cast(Literal(9.95), DecimalType(2, 1)), null)
+    checkEvaluation(Cast(Literal(9.95), DecimalType(1, 0)), null)
+    checkEvaluation(Cast(Literal(Decimal(9.95)), DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(Cast(Literal(Decimal(9.95)), DecimalType(1, 0)), null)
+
+    checkEvaluation(Cast(Literal(-9.95), DecimalType(3, 2)), Decimal(-9.95))
+    checkEvaluation(Cast(Literal(-9.95), DecimalType(3, 1)), Decimal(-10.0))
+    checkEvaluation(Cast(Literal(-9.95), DecimalType(2, 0)), Decimal(-10))
+    checkEvaluation(Cast(Literal(-9.95), DecimalType(2, 1)), null)
+    checkEvaluation(Cast(Literal(-9.95), DecimalType(1, 0)), null)
+    checkEvaluation(Cast(Literal(Decimal(-9.95)), DecimalType(3, 1)), Decimal(-10.0))
+    checkEvaluation(Cast(Literal(Decimal(-9.95)), DecimalType(1, 0)), null)
+
+    checkEvaluation(Cast(Literal(Double.NaN), DecimalType.Unlimited), null)
+    checkEvaluation(Cast(Literal(1.0 / 0.0), DecimalType.Unlimited), null)
+    checkEvaluation(Cast(Literal(Float.NaN), DecimalType.Unlimited), null)
+    checkEvaluation(Cast(Literal(1.0f / 0.0f), DecimalType.Unlimited), null)
+
+    checkEvaluation(Cast(Literal(Double.NaN), DecimalType(2, 1)), null)
+    checkEvaluation(Cast(Literal(1.0 / 0.0), DecimalType(2, 1)), null)
+    checkEvaluation(Cast(Literal(Float.NaN), DecimalType(2, 1)), null)
+    checkEvaluation(Cast(Literal(1.0f / 0.0f), DecimalType(2, 1)), null)
+  }
+
   test("timestamp") {
     val ts1 = new Timestamp(12)
     val ts2 = new Timestamp(123)
@@ -341,20 +460,21 @@ class ExpressionEvaluationSuite extends FunSuite {
 
   test("date casting") {
     val d = Date.valueOf("1970-01-01")
-    checkEvaluation(Cast(d, ShortType), null)
-    checkEvaluation(Cast(d, IntegerType), null)
-    checkEvaluation(Cast(d, LongType), null)
-    checkEvaluation(Cast(d, FloatType), null)
-    checkEvaluation(Cast(d, DoubleType), null)
-    checkEvaluation(Cast(d, StringType), "1970-01-01")
-    checkEvaluation(Cast(Cast(d, TimestampType), StringType), "1970-01-01 00:00:00")
+    checkEvaluation(Cast(Literal(d), ShortType), null)
+    checkEvaluation(Cast(Literal(d), IntegerType), null)
+    checkEvaluation(Cast(Literal(d), LongType), null)
+    checkEvaluation(Cast(Literal(d), FloatType), null)
+    checkEvaluation(Cast(Literal(d), DoubleType), null)
+    checkEvaluation(Cast(Literal(d), DecimalType.Unlimited), null)
+    checkEvaluation(Cast(Literal(d), DecimalType(10, 2)), null)
+    checkEvaluation(Cast(Literal(d), StringType), "1970-01-01")
+    checkEvaluation(Cast(Cast(Literal(d), TimestampType), StringType), "1970-01-01 00:00:00")
   }
 
   test("timestamp casting") {
     val millis = 15 * 1000 + 2
     val seconds = millis * 1000 + 2
     val ts = new Timestamp(millis)
-    val ts1 = new Timestamp(15 * 1000)  // a timestamp without the milliseconds part
     val tss = new Timestamp(seconds)
     checkEvaluation(Cast(ts, ShortType), 15)
     checkEvaluation(Cast(ts, IntegerType), 15)
@@ -368,10 +488,251 @@ class ExpressionEvaluationSuite extends FunSuite {
       millis.toFloat / 1000)
     checkEvaluation(Cast(Cast(millis.toDouble / 1000, TimestampType), DoubleType),
       millis.toDouble / 1000)
-    checkEvaluation(Cast(Literal(BigDecimal(1)) cast TimestampType, DecimalType), 1)
+    checkEvaluation(Cast(Literal(Decimal(1)) cast TimestampType, DecimalType.Unlimited), Decimal(1))
 
     // A test for higher precision than millis
     checkEvaluation(Cast(Cast(0.00000001, TimestampType), DoubleType), 0.00000001)
+
+    checkEvaluation(Cast(Literal(Double.NaN), TimestampType), null)
+    checkEvaluation(Cast(Literal(1.0 / 0.0), TimestampType), null)
+    checkEvaluation(Cast(Literal(Float.NaN), TimestampType), null)
+    checkEvaluation(Cast(Literal(1.0f / 0.0f), TimestampType), null)
+  }
+
+  test("array casting") {
+    val array = Literal(Seq("123", "abc", "", null), ArrayType(StringType, containsNull = true))
+    val array_notNull = Literal(Seq("123", "abc", ""), ArrayType(StringType, containsNull = false))
+
+    {
+      val cast = Cast(array, ArrayType(IntegerType, containsNull = true))
+      assert(cast.resolved === true)
+      checkEvaluation(cast, Seq(123, null, null, null))
+    }
+    {
+      val cast = Cast(array, ArrayType(IntegerType, containsNull = false))
+      assert(cast.resolved === false)
+    }
+    {
+      val cast = Cast(array, ArrayType(BooleanType, containsNull = true))
+      assert(cast.resolved === true)
+      checkEvaluation(cast, Seq(true, true, false, null))
+    }
+    {
+      val cast = Cast(array, ArrayType(BooleanType, containsNull = false))
+      assert(cast.resolved === false)
+    }
+
+    {
+      val cast = Cast(array_notNull, ArrayType(IntegerType, containsNull = true))
+      assert(cast.resolved === true)
+      checkEvaluation(cast, Seq(123, null, null))
+    }
+    {
+      val cast = Cast(array_notNull, ArrayType(IntegerType, containsNull = false))
+      assert(cast.resolved === false)
+    }
+    {
+      val cast = Cast(array_notNull, ArrayType(BooleanType, containsNull = true))
+      assert(cast.resolved === true)
+      checkEvaluation(cast, Seq(true, true, false))
+    }
+    {
+      val cast = Cast(array_notNull, ArrayType(BooleanType, containsNull = false))
+      assert(cast.resolved === true)
+      checkEvaluation(cast, Seq(true, true, false))
+    }
+
+    {
+      val cast = Cast(array, IntegerType)
+      assert(cast.resolved === false)
+    }
+  }
+
+  test("map casting") {
+    val map = Literal(
+      Map("a" -> "123", "b" -> "abc", "c" -> "", "d" -> null),
+      MapType(StringType, StringType, valueContainsNull = true))
+    val map_notNull = Literal(
+      Map("a" -> "123", "b" -> "abc", "c" -> ""),
+      MapType(StringType, StringType, valueContainsNull = false))
+
+    {
+      val cast = Cast(map, MapType(StringType, IntegerType, valueContainsNull = true))
+      assert(cast.resolved === true)
+      checkEvaluation(cast, Map("a" -> 123, "b" -> null, "c" -> null, "d" -> null))
+    }
+    {
+      val cast = Cast(map, MapType(StringType, IntegerType, valueContainsNull = false))
+      assert(cast.resolved === false)
+    }
+    {
+      val cast = Cast(map, MapType(StringType, BooleanType, valueContainsNull = true))
+      assert(cast.resolved === true)
+      checkEvaluation(cast, Map("a" -> true, "b" -> true, "c" -> false, "d" -> null))
+    }
+    {
+      val cast = Cast(map, MapType(StringType, BooleanType, valueContainsNull = false))
+      assert(cast.resolved === false)
+    }
+    {
+      val cast = Cast(map, MapType(IntegerType, StringType, valueContainsNull = true))
+      assert(cast.resolved === false)
+    }
+
+    {
+      val cast = Cast(map_notNull, MapType(StringType, IntegerType, valueContainsNull = true))
+      assert(cast.resolved === true)
+      checkEvaluation(cast, Map("a" -> 123, "b" -> null, "c" -> null))
+    }
+    {
+      val cast = Cast(map_notNull, MapType(StringType, IntegerType, valueContainsNull = false))
+      assert(cast.resolved === false)
+    }
+    {
+      val cast = Cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = true))
+      assert(cast.resolved === true)
+      checkEvaluation(cast, Map("a" -> true, "b" -> true, "c" -> false))
+    }
+    {
+      val cast = Cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = false))
+      assert(cast.resolved === true)
+      checkEvaluation(cast, Map("a" -> true, "b" -> true, "c" -> false))
+    }
+    {
+      val cast = Cast(map_notNull, MapType(IntegerType, StringType, valueContainsNull = true))
+      assert(cast.resolved === false)
+    }
+
+    {
+      val cast = Cast(map, IntegerType)
+      assert(cast.resolved === false)
+    }
+  }
+
+  test("struct casting") {
+    val struct = Literal(
+      Row("123", "abc", "", null),
+      StructType(Seq(
+        StructField("a", StringType, nullable = true),
+        StructField("b", StringType, nullable = true),
+        StructField("c", StringType, nullable = true),
+        StructField("d", StringType, nullable = true))))
+    val struct_notNull = Literal(
+      Row("123", "abc", ""),
+      StructType(Seq(
+        StructField("a", StringType, nullable = false),
+        StructField("b", StringType, nullable = false),
+        StructField("c", StringType, nullable = false))))
+
+    {
+      val cast = Cast(struct, StructType(Seq(
+        StructField("a", IntegerType, nullable = true),
+        StructField("b", IntegerType, nullable = true),
+        StructField("c", IntegerType, nullable = true),
+        StructField("d", IntegerType, nullable = true))))
+      assert(cast.resolved === true)
+      checkEvaluation(cast, Row(123, null, null, null))
+    }
+    {
+      val cast = Cast(struct, StructType(Seq(
+        StructField("a", IntegerType, nullable = true),
+        StructField("b", IntegerType, nullable = true),
+        StructField("c", IntegerType, nullable = false),
+        StructField("d", IntegerType, nullable = true))))
+      assert(cast.resolved === false)
+    }
+    {
+      val cast = Cast(struct, StructType(Seq(
+        StructField("a", BooleanType, nullable = true),
+        StructField("b", BooleanType, nullable = true),
+        StructField("c", BooleanType, nullable = true),
+        StructField("d", BooleanType, nullable = true))))
+      assert(cast.resolved === true)
+      checkEvaluation(cast, Row(true, true, false, null))
+    }
+    {
+      val cast = Cast(struct, StructType(Seq(
+        StructField("a", BooleanType, nullable = true),
+        StructField("b", BooleanType, nullable = true),
+        StructField("c", BooleanType, nullable = false),
+        StructField("d", BooleanType, nullable = true))))
+      assert(cast.resolved === false)
+    }
+
+    {
+      val cast = Cast(struct_notNull, StructType(Seq(
+        StructField("a", IntegerType, nullable = true),
+        StructField("b", IntegerType, nullable = true),
+        StructField("c", IntegerType, nullable = true))))
+      assert(cast.resolved === true)
+      checkEvaluation(cast, Row(123, null, null))
+    }
+    {
+      val cast = Cast(struct_notNull, StructType(Seq(
+        StructField("a", IntegerType, nullable = true),
+        StructField("b", IntegerType, nullable = true),
+        StructField("c", IntegerType, nullable = false))))
+      assert(cast.resolved === false)
+    }
+    {
+      val cast = Cast(struct_notNull, StructType(Seq(
+        StructField("a", BooleanType, nullable = true),
+        StructField("b", BooleanType, nullable = true),
+        StructField("c", BooleanType, nullable = true))))
+      assert(cast.resolved === true)
+      checkEvaluation(cast, Row(true, true, false))
+    }
+    {
+      val cast = Cast(struct_notNull, StructType(Seq(
+        StructField("a", BooleanType, nullable = true),
+        StructField("b", BooleanType, nullable = true),
+        StructField("c", BooleanType, nullable = false))))
+      assert(cast.resolved === true)
+      checkEvaluation(cast, Row(true, true, false))
+    }
+
+    {
+      val cast = Cast(struct, StructType(Seq(
+        StructField("a", StringType, nullable = true),
+        StructField("b", StringType, nullable = true),
+        StructField("c", StringType, nullable = true))))
+      assert(cast.resolved === false)
+    }
+    {
+      val cast = Cast(struct, IntegerType)
+      assert(cast.resolved === false)
+    }
+  }
+
+  test("complex casting") {
+    val complex = Literal(
+      Row(
+        Seq("123", "abc", ""),
+        Map("a" -> "123", "b" -> "abc", "c" -> ""),
+        Row(0)),
+      StructType(Seq(
+        StructField("a",
+          ArrayType(StringType, containsNull = false), nullable = true),
+        StructField("m",
+          MapType(StringType, StringType, valueContainsNull = false), nullable = true),
+        StructField("s",
+          StructType(Seq(
+            StructField("i", IntegerType, nullable = true)))))))
+
+    val cast = Cast(complex, StructType(Seq(
+      StructField("a",
+        ArrayType(IntegerType, containsNull = true), nullable = true),
+      StructField("m",
+        MapType(StringType, BooleanType, valueContainsNull = false), nullable = true),
+      StructField("s",
+        StructType(Seq(
+          StructField("l", LongType, nullable = true)))))))
+
+    assert(cast.resolved === true)
+    checkEvaluation(cast, Row(
+      Seq(123, null, null),
+      Map("a" -> true, "b" -> true, "c" -> false),
+      Row(0L)))
   }
 
   test("null checking") {
@@ -667,11 +1028,45 @@ class ExpressionEvaluationSuite extends FunSuite {
     val expectedResults = inputSequence.map(l => math.sqrt(l.toDouble))
     val rowSequence = inputSequence.map(l => new GenericRow(Array[Any](l.toDouble)))
     val d = 'a.double.at(0)
-    
+
     for ((row, expected) <- rowSequence zip expectedResults) {
       checkEvaluation(Sqrt(d), expected, row)
     }
 
     checkEvaluation(Sqrt(Literal(null, DoubleType)), null, new GenericRow(Array[Any](null)))
+    checkEvaluation(Sqrt(-1), null, EmptyRow)
+    checkEvaluation(Sqrt(-1.5), null, EmptyRow)
+  }
+
+  test("Bitwise operations") {
+    val row = new GenericRow(Array[Any](1, 2, 3, null))
+    val c1 = 'a.int.at(0)
+    val c2 = 'a.int.at(1)
+    val c3 = 'a.int.at(2)
+    val c4 = 'a.int.at(3)
+
+    checkEvaluation(BitwiseAnd(c1, c4), null, row)
+    checkEvaluation(BitwiseAnd(c1, c2), 0, row)
+    checkEvaluation(BitwiseAnd(c1, Literal(null, IntegerType)), null, row)
+    checkEvaluation(BitwiseAnd(Literal(null, IntegerType), Literal(null, IntegerType)), null, row)
+
+    checkEvaluation(BitwiseOr(c1, c4), null, row)
+    checkEvaluation(BitwiseOr(c1, c2), 3, row)
+    checkEvaluation(BitwiseOr(c1, Literal(null, IntegerType)), null, row)
+    checkEvaluation(BitwiseOr(Literal(null, IntegerType), Literal(null, IntegerType)), null, row)
+
+    checkEvaluation(BitwiseXor(c1, c4), null, row)
+    checkEvaluation(BitwiseXor(c1, c2), 3, row)
+    checkEvaluation(BitwiseXor(c1, Literal(null, IntegerType)), null, row)
+    checkEvaluation(BitwiseXor(Literal(null, IntegerType), Literal(null, IntegerType)), null, row)
+
+    checkEvaluation(BitwiseNot(c4), null, row)
+    checkEvaluation(BitwiseNot(c1), -2, row)
+    checkEvaluation(BitwiseNot(Literal(null, IntegerType)), null, row)
+
+    checkEvaluation(c1 & c2, 0, row)
+    checkEvaluation(c1 | c2, 3, row)
+    checkEvaluation(c1 ^ c2, 3, row)
+    checkEvaluation(~c1, -2, row)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
new file mode 100644
index 0000000000000..264a0eff37d34
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.analysis.EliminateAnalysisOperators
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.dsl.expressions._
+
+class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("AnalysisNodes", Once,
+        EliminateAnalysisOperators) ::
+      Batch("Constant Folding", FixedPoint(50),
+        NullPropagation,
+        ConstantFolding,
+        BooleanSimplification,
+        SimplifyFilters) :: Nil
+  }
+
+  val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.string)
+
+  // The `foldLeft` is required to handle cases like comparing `a && (b && c)` and `(a && b) && c`
+  def compareConditions(e1: Expression, e2: Expression): Boolean = (e1, e2) match {
+    case (lhs: And, rhs: And) =>
+      val lhsSet = splitConjunctivePredicates(lhs).toSet
+      val rhsSet = splitConjunctivePredicates(rhs).toSet
+      lhsSet.foldLeft(rhsSet) { (set, e) =>
+        set.find(compareConditions(_, e)).map(set - _).getOrElse(set)
+      }.isEmpty
+
+    case (lhs: Or, rhs: Or) =>
+      val lhsSet = splitDisjunctivePredicates(lhs).toSet
+      val rhsSet = splitDisjunctivePredicates(rhs).toSet
+      lhsSet.foldLeft(rhsSet) { (set, e) =>
+        set.find(compareConditions(_, e)).map(set - _).getOrElse(set)
+      }.isEmpty
+
+    case (l, r) => l == r
+  }
+
+  def checkCondition(input: Expression, expected: Expression): Unit = {
+    val plan = testRelation.where(input).analyze
+    val actual = Optimize(plan).expressions.head
+    compareConditions(actual, expected)
+  }
+
+  test("a && a => a") {
+    checkCondition(Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a)
+    checkCondition(Literal(1) < 'a && Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a)
+  }
+
+  test("a || a => a") {
+    checkCondition(Literal(1) < 'a || Literal(1) < 'a, Literal(1) < 'a)
+    checkCondition(Literal(1) < 'a || Literal(1) < 'a || Literal(1) < 'a, Literal(1) < 'a)
+  }
+
+  test("(a && b && c && ...) || (a && b && d && ...) || (a && b && e && ...) ...") {
+    checkCondition('b > 3 || 'c > 5, 'b > 3 || 'c > 5)
+
+    checkCondition(('a < 2 && 'a > 3 && 'b > 5) || 'a < 2,  'a < 2)
+
+    checkCondition('a < 2 || ('a < 2 && 'a > 3 && 'b > 5),  'a < 2)
+
+    val input = ('a === 'b && 'b > 3 && 'c > 2) ||
+      ('a === 'b && 'c < 1 && 'a === 5) ||
+      ('a === 'b && 'b < 5 && 'a > 1)
+
+    val expected =
+      (((('b > 3) && ('c > 2)) ||
+        (('c < 1) && ('a === 5))) ||
+        (('b < 5) && ('a > 1))) && ('a === 'b)
+
+    checkCondition(input, expected)
+  }
+
+  test("(a || b || c || ...) && (a || b || d || ...) && (a || b || e || ...) ...") {
+    checkCondition('b > 3 && 'c > 5, 'b > 3 && 'c > 5)
+
+    checkCondition(('a < 2 || 'a > 3 || 'b > 5) && 'a < 2, 'a < 2)
+
+    checkCondition('a < 2 && ('a < 2 || 'a > 3 || 'b > 5) , 'a < 2)
+
+    checkCondition(('a < 2 || 'b > 3) && ('a < 2 || 'c > 5), ('b > 3 && 'c > 5) || 'a < 2)
+
+    checkCondition(
+      ('a === 'b || 'b > 3) && ('a === 'b || 'a > 3) && ('a === 'b || 'a < 5),
+      ('b > 3 && 'a > 3 && 'a < 5) || 'a === 'b)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index 0a27cce337482..9fdf3efa02bb6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 // For implicit conversions
 import org.apache.spark.sql.catalyst.dsl.plans._
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
index 97a78ec971c39..da912ab382179 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 // For implicit conversions
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -52,8 +52,7 @@ class OptimizeInSuite extends PlanTest {
     val optimized = Optimize(originalQuery.analyze)
     val correctAnswer =
       testRelation
-        .where(InSet(UnresolvedAttribute("a"), HashSet[Any]()+1+2, 
-            UnresolvedAttribute("a") +: Seq(Literal(1),Literal(2))))
+        .where(InSet(UnresolvedAttribute("a"), HashSet[Any]()+1+2))
         .analyze
 
     comparePlans(optimized, correctAnswer)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
index 7e9f47ef21df8..c4a1f899d8a13 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -33,7 +33,8 @@ class PlanTest extends FunSuite {
    * we must normalize them to check if two different queries are identical.
    */
   protected def normalizeExprIds(plan: LogicalPlan) = {
-    val minId = plan.flatMap(_.expressions.flatMap(_.references).map(_.exprId.id)).min
+    val list = plan.flatMap(_.expressions.flatMap(_.references).map(_.exprId.id))
+    val minId = if (list.isEmpty) 0 else list.min
     plan transformAllExpressions {
       case a: AttributeReference =>
         AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(a.exprId.id - minId))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala
index e8a793d107451..11e6831b24768 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.util._
 
 /**
- * Provides helper methods for comparing plans.
+ * Tests for the sameResult function of [[LogicalPlan]].
  */
 class SameResultSuite extends FunSuite {
   val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
@@ -52,11 +52,15 @@ class SameResultSuite extends FunSuite {
     assertSameResult(testRelation.select('a, 'b), testRelation2.select('a, 'b))
     assertSameResult(testRelation.select('b, 'a), testRelation2.select('b, 'a))
 
-    assertSameResult(testRelation, testRelation2.select('a), false)
-    assertSameResult(testRelation.select('b, 'a), testRelation2.select('a, 'b), false)
+    assertSameResult(testRelation, testRelation2.select('a), result = false)
+    assertSameResult(testRelation.select('b, 'a), testRelation2.select('a, 'b), result = false)
   }
 
   test("filters") {
     assertSameResult(testRelation.where('a === 'b), testRelation2.where('a === 'b))
   }
+
+  test("sorts") {
+    assertSameResult(testRelation.orderBy('a.asc), testRelation2.orderBy('a.asc))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index 036fd3fa1d6a1..cdb843f959704 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -22,7 +22,7 @@ import scala.collection.mutable.ArrayBuffer
 import org.scalatest.FunSuite
 
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.types.{StringType, NullType}
+import org.apache.spark.sql.types.{StringType, NullType}
 
 case class Dummy(optKey: Option[Expression]) extends Expression {
   def children = optKey.toSeq
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/MetadataSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/MetadataSuite.scala
new file mode 100755
index 0000000000000..d7d60efee50fa
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/MetadataSuite.scala
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import org.json4s.jackson.JsonMethods.parse
+import org.scalatest.FunSuite
+
+import org.apache.spark.sql.types.{MetadataBuilder, Metadata}
+
+class MetadataSuite extends FunSuite {
+
+  val baseMetadata = new MetadataBuilder()
+    .putString("purpose", "ml")
+    .putBoolean("isBase", true)
+    .build()
+
+  val summary = new MetadataBuilder()
+    .putLong("numFeatures", 10L)
+    .build()
+
+  val age = new MetadataBuilder()
+    .putString("name", "age")
+    .putLong("index", 1L)
+    .putBoolean("categorical", false)
+    .putDouble("average", 45.0)
+    .build()
+
+  val gender = new MetadataBuilder()
+    .putString("name", "gender")
+    .putLong("index", 5)
+    .putBoolean("categorical", true)
+    .putStringArray("categories", Array("male", "female"))
+    .build()
+
+  val metadata = new MetadataBuilder()
+    .withMetadata(baseMetadata)
+    .putBoolean("isBase", false) // overwrite an existing key
+    .putMetadata("summary", summary)
+    .putLongArray("long[]", Array(0L, 1L))
+    .putDoubleArray("double[]", Array(3.0, 4.0))
+    .putBooleanArray("boolean[]", Array(true, false))
+    .putMetadataArray("features", Array(age, gender))
+    .build()
+
+  test("metadata builder and getters") {
+    assert(age.contains("summary") === false)
+    assert(age.contains("index") === true)
+    assert(age.getLong("index") === 1L)
+    assert(age.contains("average") === true)
+    assert(age.getDouble("average") === 45.0)
+    assert(age.contains("categorical") === true)
+    assert(age.getBoolean("categorical") === false)
+    assert(age.contains("name") === true)
+    assert(age.getString("name") === "age")
+    assert(metadata.contains("purpose") === true)
+    assert(metadata.getString("purpose") === "ml")
+    assert(metadata.contains("isBase") === true)
+    assert(metadata.getBoolean("isBase") === false)
+    assert(metadata.contains("summary") === true)
+    assert(metadata.getMetadata("summary") === summary)
+    assert(metadata.contains("long[]") === true)
+    assert(metadata.getLongArray("long[]").toSeq === Seq(0L, 1L))
+    assert(metadata.contains("double[]") === true)
+    assert(metadata.getDoubleArray("double[]").toSeq === Seq(3.0, 4.0))
+    assert(metadata.contains("boolean[]") === true)
+    assert(metadata.getBooleanArray("boolean[]").toSeq === Seq(true, false))
+    assert(gender.contains("categories") === true)
+    assert(gender.getStringArray("categories").toSeq === Seq("male", "female"))
+    assert(metadata.contains("features") === true)
+    assert(metadata.getMetadataArray("features").toSeq === Seq(age, gender))
+  }
+
+  test("metadata json conversion") {
+    val json = metadata.json
+    withClue("toJson must produce a valid JSON string") {
+      parse(json)
+    }
+    val parsed = Metadata.fromJson(json)
+    assert(parsed === metadata)
+    assert(parsed.## === metadata.##)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
new file mode 100644
index 0000000000000..c97e0bec3e3a2
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -0,0 +1,118 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.types
+
+import org.scalatest.FunSuite
+
+class DataTypeSuite extends FunSuite {
+
+  test("construct an ArrayType") {
+    val array = ArrayType(StringType)
+
+    assert(ArrayType(StringType, true) === array)
+  }
+
+  test("construct an MapType") {
+    val map = MapType(StringType, IntegerType)
+
+    assert(MapType(StringType, IntegerType, true) === map)
+  }
+
+  test("extract fields from a StructType") {
+    val struct = StructType(
+      StructField("a", IntegerType, true) ::
+      StructField("b", LongType, false) ::
+      StructField("c", StringType, true) ::
+      StructField("d", FloatType, true) :: Nil)
+
+    assert(StructField("b", LongType, false) === struct("b"))
+
+    intercept[IllegalArgumentException] {
+      struct("e")
+    }
+
+    val expectedStruct = StructType(
+      StructField("b", LongType, false) ::
+      StructField("d", FloatType, true) :: Nil)
+
+    assert(expectedStruct === struct(Set("b", "d")))
+    intercept[IllegalArgumentException] {
+      struct(Set("b", "d", "e", "f"))
+    }
+  }
+
+  def checkDataTypeJsonRepr(dataType: DataType): Unit = {
+    test(s"JSON - $dataType") {
+      assert(DataType.fromJson(dataType.json) === dataType)
+    }
+  }
+
+  checkDataTypeJsonRepr(NullType)
+  checkDataTypeJsonRepr(BooleanType)
+  checkDataTypeJsonRepr(ByteType)
+  checkDataTypeJsonRepr(ShortType)
+  checkDataTypeJsonRepr(IntegerType)
+  checkDataTypeJsonRepr(LongType)
+  checkDataTypeJsonRepr(FloatType)
+  checkDataTypeJsonRepr(DoubleType)
+  checkDataTypeJsonRepr(DecimalType(10, 5))
+  checkDataTypeJsonRepr(DecimalType.Unlimited)
+  checkDataTypeJsonRepr(DateType)
+  checkDataTypeJsonRepr(TimestampType)
+  checkDataTypeJsonRepr(StringType)
+  checkDataTypeJsonRepr(BinaryType)
+  checkDataTypeJsonRepr(ArrayType(DoubleType, true))
+  checkDataTypeJsonRepr(ArrayType(StringType, false))
+  checkDataTypeJsonRepr(MapType(IntegerType, StringType, true))
+  checkDataTypeJsonRepr(MapType(IntegerType, ArrayType(DoubleType), false))
+
+  val metadata = new MetadataBuilder()
+    .putString("name", "age")
+    .build()
+  val structType = StructType(Seq(
+    StructField("a", IntegerType, nullable = true),
+    StructField("b", ArrayType(DoubleType), nullable = false),
+    StructField("c", DoubleType, nullable = false, metadata)))
+  checkDataTypeJsonRepr(structType)
+
+  def checkDefaultSize(dataType: DataType, expectedDefaultSize: Int): Unit = {
+    test(s"Check the default size of ${dataType}") {
+      assert(dataType.defaultSize === expectedDefaultSize)
+    }
+  }
+
+  checkDefaultSize(NullType, 1)
+  checkDefaultSize(BooleanType, 1)
+  checkDefaultSize(ByteType, 1)
+  checkDefaultSize(ShortType, 2)
+  checkDefaultSize(IntegerType, 4)
+  checkDefaultSize(LongType, 8)
+  checkDefaultSize(FloatType, 4)
+  checkDefaultSize(DoubleType, 8)
+  checkDefaultSize(DecimalType(10, 5), 4096)
+  checkDefaultSize(DecimalType.Unlimited, 4096)
+  checkDefaultSize(DateType, 4)
+  checkDefaultSize(TimestampType,12)
+  checkDefaultSize(StringType, 4096)
+  checkDefaultSize(BinaryType, 4096)
+  checkDefaultSize(ArrayType(DoubleType, true), 800)
+  checkDefaultSize(ArrayType(StringType, false), 409600)
+  checkDefaultSize(MapType(IntegerType, StringType, true), 410000)
+  checkDefaultSize(MapType(IntegerType, ArrayType(DoubleType), false), 80400)
+  checkDefaultSize(structType, 812)
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/decimal/DecimalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/decimal/DecimalSuite.scala
new file mode 100644
index 0000000000000..de6a2cd448c47
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/decimal/DecimalSuite.scala
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.types.decimal
+
+import org.apache.spark.sql.types.Decimal
+import org.scalatest.{PrivateMethodTester, FunSuite}
+
+import scala.language.postfixOps
+
+class DecimalSuite extends FunSuite with PrivateMethodTester {
+  test("creating decimals") {
+    /** Check that a Decimal has the given string representation, precision and scale */
+    def checkDecimal(d: Decimal, string: String, precision: Int, scale: Int): Unit = {
+      assert(d.toString === string)
+      assert(d.precision === precision)
+      assert(d.scale === scale)
+    }
+
+    checkDecimal(new Decimal(), "0", 1, 0)
+    checkDecimal(Decimal(BigDecimal("10.030")), "10.030", 5, 3)
+    checkDecimal(Decimal(BigDecimal("10.030"), 4, 1), "10.0", 4, 1)
+    checkDecimal(Decimal(BigDecimal("-9.95"), 4, 1), "-10.0", 4, 1)
+    checkDecimal(Decimal("10.030"), "10.030", 5, 3)
+    checkDecimal(Decimal(10.03), "10.03", 4, 2)
+    checkDecimal(Decimal(17L), "17", 20, 0)
+    checkDecimal(Decimal(17), "17", 10, 0)
+    checkDecimal(Decimal(17L, 2, 1), "1.7", 2, 1)
+    checkDecimal(Decimal(170L, 4, 2), "1.70", 4, 2)
+    checkDecimal(Decimal(17L, 24, 1), "1.7", 24, 1)
+    checkDecimal(Decimal(1e17.toLong, 18, 0), 1e17.toLong.toString, 18, 0)
+    checkDecimal(Decimal(Long.MaxValue), Long.MaxValue.toString, 20, 0)
+    checkDecimal(Decimal(Long.MinValue), Long.MinValue.toString, 20, 0)
+    intercept[IllegalArgumentException](Decimal(170L, 2, 1))
+    intercept[IllegalArgumentException](Decimal(170L, 2, 0))
+    intercept[IllegalArgumentException](Decimal(BigDecimal("10.030"), 2, 1))
+    intercept[IllegalArgumentException](Decimal(BigDecimal("-9.95"), 2, 1))
+    intercept[IllegalArgumentException](Decimal(1e17.toLong, 17, 0))
+  }
+
+  test("double and long values") {
+    /** Check that a Decimal converts to the given double and long values */
+    def checkValues(d: Decimal, doubleValue: Double, longValue: Long): Unit = {
+      assert(d.toDouble === doubleValue)
+      assert(d.toLong === longValue)
+    }
+
+    checkValues(new Decimal(), 0.0, 0L)
+    checkValues(Decimal(BigDecimal("10.030")), 10.03, 10L)
+    checkValues(Decimal(BigDecimal("10.030"), 4, 1), 10.0, 10L)
+    checkValues(Decimal(BigDecimal("-9.95"), 4, 1), -10.0, -10L)
+    checkValues(Decimal(10.03), 10.03, 10L)
+    checkValues(Decimal(17L), 17.0, 17L)
+    checkValues(Decimal(17), 17.0, 17L)
+    checkValues(Decimal(17L, 2, 1), 1.7, 1L)
+    checkValues(Decimal(170L, 4, 2), 1.7, 1L)
+    checkValues(Decimal(1e16.toLong), 1e16, 1e16.toLong)
+    checkValues(Decimal(1e17.toLong), 1e17, 1e17.toLong)
+    checkValues(Decimal(1e18.toLong), 1e18, 1e18.toLong)
+    checkValues(Decimal(2e18.toLong), 2e18, 2e18.toLong)
+    checkValues(Decimal(Long.MaxValue), Long.MaxValue.toDouble, Long.MaxValue)
+    checkValues(Decimal(Long.MinValue), Long.MinValue.toDouble, Long.MinValue)
+    checkValues(Decimal(Double.MaxValue), Double.MaxValue, 0L)
+    checkValues(Decimal(Double.MinValue), Double.MinValue, 0L)
+  }
+
+  // Accessor for the BigDecimal value of a Decimal, which will be null if it's using Longs
+  private val decimalVal = PrivateMethod[BigDecimal]('decimalVal)
+
+  /** Check whether a decimal is represented compactly (passing whether we expect it to be) */
+  private def checkCompact(d: Decimal, expected: Boolean): Unit = {
+    val isCompact = d.invokePrivate(decimalVal()).eq(null)
+    assert(isCompact == expected, s"$d ${if (expected) "was not" else "was"} compact")
+  }
+
+  test("small decimals represented as unscaled long") {
+    checkCompact(new Decimal(), true)
+    checkCompact(Decimal(BigDecimal(10.03)), false)
+    checkCompact(Decimal(BigDecimal(1e20)), false)
+    checkCompact(Decimal(17L), true)
+    checkCompact(Decimal(17), true)
+    checkCompact(Decimal(17L, 2, 1), true)
+    checkCompact(Decimal(170L, 4, 2), true)
+    checkCompact(Decimal(17L, 24, 1), true)
+    checkCompact(Decimal(1e16.toLong), true)
+    checkCompact(Decimal(1e17.toLong), true)
+    checkCompact(Decimal(1e18.toLong - 1), true)
+    checkCompact(Decimal(- 1e18.toLong + 1), true)
+    checkCompact(Decimal(1e18.toLong - 1, 30, 10), true)
+    checkCompact(Decimal(- 1e18.toLong + 1, 30, 10), true)
+    checkCompact(Decimal(1e18.toLong), false)
+    checkCompact(Decimal(-1e18.toLong), false)
+    checkCompact(Decimal(1e18.toLong, 30, 10), false)
+    checkCompact(Decimal(-1e18.toLong, 30, 10), false)
+    checkCompact(Decimal(Long.MaxValue), false)
+    checkCompact(Decimal(Long.MinValue), false)
+  }
+
+  test("hash code") {
+    assert(Decimal(123).hashCode() === (123).##)
+    assert(Decimal(-123).hashCode() === (-123).##)
+    assert(Decimal(Int.MaxValue).hashCode() === Int.MaxValue.##)
+    assert(Decimal(Long.MaxValue).hashCode() === Long.MaxValue.##)
+    assert(Decimal(BigDecimal(123)).hashCode() === (123).##)
+
+    val reallyBig = BigDecimal("123182312312313232112312312123.1231231231")
+    assert(Decimal(reallyBig).hashCode() === reallyBig.hashCode)
+  }
+
+  test("equals") {
+    // The decimals on the left are stored compactly, while the ones on the right aren't
+    checkCompact(Decimal(123), true)
+    checkCompact(Decimal(BigDecimal(123)), false)
+    checkCompact(Decimal("123"), false)
+    assert(Decimal(123) === Decimal(BigDecimal(123)))
+    assert(Decimal(123) === Decimal(BigDecimal("123.00")))
+    assert(Decimal(-123) === Decimal(BigDecimal(-123)))
+    assert(Decimal(-123) === Decimal(BigDecimal("-123.00")))
+  }
+
+  test("isZero") {
+    assert(Decimal(0).isZero)
+    assert(Decimal(0, 4, 2).isZero)
+    assert(Decimal("0").isZero)
+    assert(Decimal("0.000").isZero)
+    assert(!Decimal(1).isZero)
+    assert(!Decimal(1, 4, 2).isZero)
+    assert(!Decimal("1").isZero)
+    assert(!Decimal("0.001").isZero)
+  }
+
+  test("arithmetic") {
+    assert(Decimal(100) + Decimal(-100) === Decimal(0))
+    assert(Decimal(100) + Decimal(-100) === Decimal(0))
+    assert(Decimal(100) * Decimal(-100) === Decimal(-10000))
+    assert(Decimal(1e13) * Decimal(1e13) === Decimal(1e26))
+    assert(Decimal(100) / Decimal(-100) === Decimal(-1))
+    assert(Decimal(100) / Decimal(0) === null)
+    assert(Decimal(100) % Decimal(-100) === Decimal(0))
+    assert(Decimal(100) % Decimal(3) === Decimal(1))
+    assert(Decimal(-100) % Decimal(3) === Decimal(-1))
+    assert(Decimal(100) % Decimal(0) === null)
+  }
+}
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index bd110218d34f7..03a5c9e7c24a0 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -56,42 +56,58 @@
     <dependency>
       <groupId>com.twitter</groupId>
       <artifactId>parquet-column</artifactId>
-      <version>${parquet.version}</version>
     </dependency>
     <dependency>
       <groupId>com.twitter</groupId>
       <artifactId>parquet-hadoop</artifactId>
-      <version>${parquet.version}</version>
     </dependency>
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-databind</artifactId>
       <version>2.3.0</version>
     </dependency>
+    <dependency>
+      <groupId>org.jodd</groupId>
+      <artifactId>jodd-core</artifactId>
+      <version>${jodd.version}</version>
+    </dependency>
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
-      <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <groupId>com.h2database</groupId>
+      <artifactId>h2</artifactId>
+      <version>1.4.183</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>mysql</groupId>
+      <artifactId>mysql-connector-java</artifactId>
+      <version>5.1.34</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.postgresql</groupId>
+      <artifactId>postgresql</artifactId>
+      <version>9.3-1102-jdbc41</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.spotify</groupId>
+      <artifactId>docker-client</artifactId>
+      <version>2.7.5</version>
       <scope>test</scope>
     </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <plugins>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-      </plugin>
-    </plugins>
   </build>
 </project>
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/ArrayType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/ArrayType.java
deleted file mode 100644
index b73a371e93001..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/ArrayType.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-/**
- * The data type representing Lists.
- * An ArrayType object comprises two fields, {@code DataType elementType} and
- * {@code boolean containsNull}. The field of {@code elementType} is used to specify the type of
- * array elements. The field of {@code containsNull} is used to specify if the array has
- * {@code null} values.
- *
- * To create an {@link ArrayType},
- * {@link DataType#createArrayType(DataType)} or
- * {@link DataType#createArrayType(DataType, boolean)}
- * should be used.
- */
-public class ArrayType extends DataType {
-  private DataType elementType;
-  private boolean containsNull;
-
-  protected ArrayType(DataType elementType, boolean containsNull) {
-    this.elementType = elementType;
-    this.containsNull = containsNull;
-  }
-
-  public DataType getElementType() {
-    return elementType;
-  }
-
-  public boolean isContainsNull() {
-    return containsNull;
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) return true;
-    if (o == null || getClass() != o.getClass()) return false;
-
-    ArrayType arrayType = (ArrayType) o;
-
-    if (containsNull != arrayType.containsNull) return false;
-    if (!elementType.equals(arrayType.elementType)) return false;
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    int result = elementType.hashCode();
-    result = 31 * result + (containsNull ? 1 : 0);
-    return result;
-  }
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/BinaryType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/BinaryType.java
deleted file mode 100644
index 7daad60f62a0b..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/BinaryType.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-/**
- * The data type representing byte[] values.
- *
- * {@code BinaryType} is represented by the singleton object {@link DataType#BinaryType}.
- */
-public class BinaryType extends DataType {
-  protected BinaryType() {}
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/BooleanType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/BooleanType.java
deleted file mode 100644
index 5a1f52725631b..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/BooleanType.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-/**
- * The data type representing boolean and Boolean values.
- *
- * {@code BooleanType} is represented by the singleton object {@link DataType#BooleanType}.
- */
-public class BooleanType extends DataType {
-  protected BooleanType() {}
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/ByteType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/ByteType.java
deleted file mode 100644
index e5cdf06b21bbe..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/ByteType.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-/**
- * The data type representing byte and Byte values.
- *
- * {@code ByteType} is represented by the singleton object {@link DataType#ByteType}.
- */
-public class ByteType extends DataType {
-  protected ByteType() {}
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/DataType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/DataType.java
deleted file mode 100644
index 37e88d72b9172..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/DataType.java
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-/**
- * The base type of all Spark SQL data types.
- *
- * To get/create specific data type, users should use singleton objects and factory methods
- * provided by this class.
- */
-public abstract class DataType {
-
-  /**
-   * Gets the StringType object.
-   */
-  public static final StringType StringType = new StringType();
-
-  /**
-   * Gets the BinaryType object.
-   */
-  public static final BinaryType BinaryType = new BinaryType();
-
-  /**
-   * Gets the BooleanType object.
-   */
-  public static final BooleanType BooleanType = new BooleanType();
-
-  /**
-   * Gets the DateType object.
-   */
-  public static final DateType DateType = new DateType();
-
-  /**
-   * Gets the TimestampType object.
-   */
-  public static final TimestampType TimestampType = new TimestampType();
-
-  /**
-   * Gets the DecimalType object.
-   */
-  public static final DecimalType DecimalType = new DecimalType();
-
-  /**
-   * Gets the DoubleType object.
-   */
-  public static final DoubleType DoubleType = new DoubleType();
-
-  /**
-   * Gets the FloatType object.
-   */
-  public static final FloatType FloatType = new FloatType();
-
-  /**
-   * Gets the ByteType object.
-   */
-  public static final ByteType ByteType = new ByteType();
-
-  /**
-   * Gets the IntegerType object.
-   */
-  public static final IntegerType IntegerType = new IntegerType();
-
-  /**
-   * Gets the LongType object.
-   */
-  public static final LongType LongType = new LongType();
-
-  /**
-   * Gets the ShortType object.
-   */
-  public static final ShortType ShortType = new ShortType();
-
-  /**
-   * Creates an ArrayType by specifying the data type of elements ({@code elementType}).
-   * The field of {@code containsNull} is set to {@code true}.
-   */
-  public static ArrayType createArrayType(DataType elementType) {
-    if (elementType == null) {
-      throw new IllegalArgumentException("elementType should not be null.");
-    }
-
-    return new ArrayType(elementType, true);
-  }
-
-  /**
-   * Creates an ArrayType by specifying the data type of elements ({@code elementType}) and
-   * whether the array contains null values ({@code containsNull}).
-   */
-  public static ArrayType createArrayType(DataType elementType, boolean containsNull) {
-    if (elementType == null) {
-      throw new IllegalArgumentException("elementType should not be null.");
-    }
-
-    return new ArrayType(elementType, containsNull);
-  }
-
-  /**
-   * Creates a MapType by specifying the data type of keys ({@code keyType}) and values
-   * ({@code keyType}). The field of {@code valueContainsNull} is set to {@code true}.
-   */
-  public static MapType createMapType(DataType keyType, DataType valueType) {
-    if (keyType == null) {
-      throw new IllegalArgumentException("keyType should not be null.");
-    }
-    if (valueType == null) {
-      throw new IllegalArgumentException("valueType should not be null.");
-    }
-
-    return new MapType(keyType, valueType, true);
-  }
-
-  /**
-   * Creates a MapType by specifying the data type of keys ({@code keyType}), the data type of
-   * values ({@code keyType}), and whether values contain any null value
-   * ({@code valueContainsNull}).
-   */
-  public static MapType createMapType(
-      DataType keyType,
-      DataType valueType,
-      boolean valueContainsNull) {
-    if (keyType == null) {
-      throw new IllegalArgumentException("keyType should not be null.");
-    }
-    if (valueType == null) {
-      throw new IllegalArgumentException("valueType should not be null.");
-    }
-
-    return new MapType(keyType, valueType, valueContainsNull);
-  }
-
-  /**
-   * Creates a StructField by specifying the name ({@code name}), data type ({@code dataType}) and
-   * whether values of this field can be null values ({@code nullable}).
-   */
-  public static StructField createStructField(String name, DataType dataType, boolean nullable) {
-    if (name == null) {
-      throw new IllegalArgumentException("name should not be null.");
-    }
-    if (dataType == null) {
-      throw new IllegalArgumentException("dataType should not be null.");
-    }
-
-    return new StructField(name, dataType, nullable);
-  }
-
-  /**
-   * Creates a StructType with the given list of StructFields ({@code fields}).
-   */
-  public static StructType createStructType(List<StructField> fields) {
-    return createStructType(fields.toArray(new StructField[0]));
-  }
-
-  /**
-   * Creates a StructType with the given StructField array ({@code fields}).
-   */
-  public static StructType createStructType(StructField[] fields) {
-    if (fields == null) {
-      throw new IllegalArgumentException("fields should not be null.");
-    }
-    Set<String> distinctNames = new HashSet<String>();
-    for (StructField field: fields) {
-      if (field == null) {
-        throw new IllegalArgumentException(
-          "fields should not contain any null.");
-      }
-
-      distinctNames.add(field.getName());
-    }
-    if (distinctNames.size() != fields.length) {
-      throw new IllegalArgumentException("fields should have distinct names.");
-    }
-
-    return new StructType(fields);
-  }
-
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/DateType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/DateType.java
deleted file mode 100644
index 6677793baa365..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/DateType.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-/**
- * The data type representing java.sql.Date values.
- *
- * {@code DateType} is represented by the singleton object {@link DataType#DateType}.
- */
-public class DateType extends DataType {
-    protected DateType() {}
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/DecimalType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/DecimalType.java
deleted file mode 100644
index bc54c078d7a4e..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/DecimalType.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-/**
- * The data type representing java.math.BigDecimal values.
- *
- * {@code DecimalType} is represented by the singleton object {@link DataType#DecimalType}.
- */
-public class DecimalType extends DataType {
-  protected DecimalType() {}
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/DoubleType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/DoubleType.java
deleted file mode 100644
index f0060d0bcf9f5..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/DoubleType.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-/**
- * The data type representing double and Double values.
- *
- * {@code DoubleType} is represented by the singleton object {@link DataType#DoubleType}.
- */
-public class DoubleType extends DataType {
-  protected DoubleType() {}
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/FloatType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/FloatType.java
deleted file mode 100644
index 4a6a37f69176a..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/FloatType.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-/**
- * The data type representing float and Float values.
- *
- * {@code FloatType} is represented by the singleton object {@link DataType#FloatType}.
- */
-public class FloatType extends DataType {
-  protected FloatType() {}
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/IntegerType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/IntegerType.java
deleted file mode 100644
index bfd70490bbbbb..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/IntegerType.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-/**
- * The data type representing int and Integer values.
- *
- * {@code IntegerType} is represented by the singleton object {@link DataType#IntegerType}.
- */
-public class IntegerType extends DataType {
-  protected IntegerType() {}
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/LongType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/LongType.java
deleted file mode 100644
index af13a46eb165c..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/LongType.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-/**
- * The data type representing long and Long values.
- *
- * {@code LongType} is represented by the singleton object {@link DataType#LongType}.
- */
-public class LongType extends DataType {
-  protected LongType() {}
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/MapType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/MapType.java
deleted file mode 100644
index 063e6b34abc48..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/MapType.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-/**
- * The data type representing Maps. A MapType object comprises two fields,
- * {@code DataType keyType}, {@code DataType valueType}, and {@code boolean valueContainsNull}.
- * The field of {@code keyType} is used to specify the type of keys in the map.
- * The field of {@code valueType} is used to specify the type of values in the map.
- * The field of {@code valueContainsNull} is used to specify if map values have
- * {@code null} values.
- * For values of a MapType column, keys are not allowed to have {@code null} values.
- *
- * To create a {@link MapType},
- * {@link DataType#createMapType(DataType, DataType)} or
- * {@link DataType#createMapType(DataType, DataType, boolean)}
- * should be used.
- */
-public class MapType extends DataType {
-  private DataType keyType;
-  private DataType valueType;
-  private boolean valueContainsNull;
-
-  protected MapType(DataType keyType, DataType valueType, boolean valueContainsNull) {
-    this.keyType = keyType;
-    this.valueType = valueType;
-    this.valueContainsNull = valueContainsNull;
-  }
-
-  public DataType getKeyType() {
-    return keyType;
-  }
-
-  public DataType getValueType() {
-    return valueType;
-  }
-
-  public boolean isValueContainsNull() {
-    return valueContainsNull;
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) return true;
-    if (o == null || getClass() != o.getClass()) return false;
-
-    MapType mapType = (MapType) o;
-
-    if (valueContainsNull != mapType.valueContainsNull) return false;
-    if (!keyType.equals(mapType.keyType)) return false;
-    if (!valueType.equals(mapType.valueType)) return false;
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    int result = keyType.hashCode();
-    result = 31 * result + valueType.hashCode();
-    result = 31 * result + (valueContainsNull ? 1 : 0);
-    return result;
-  }
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/ShortType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/ShortType.java
deleted file mode 100644
index 7d7604b4e3d2d..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/ShortType.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-/**
- * The data type representing short and Short values.
- *
- * {@code ShortType} is represented by the singleton object {@link DataType#ShortType}.
- */
-public class ShortType extends DataType {
-  protected ShortType() {}
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/StringType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/StringType.java
deleted file mode 100644
index f4ba0c07c9c6e..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/StringType.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-/**
- * The data type representing String values.
- *
- * {@code StringType} is represented by the singleton object {@link DataType#StringType}.
- */
-public class StringType extends DataType {
-  protected StringType() {}
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/StructField.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/StructField.java
deleted file mode 100644
index b48e2a2c5f953..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/StructField.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-/**
- * A StructField object represents a field in a StructType object.
- * A StructField object comprises three fields, {@code String name}, {@code DataType dataType},
- * and {@code boolean nullable}. The field of {@code name} is the name of a StructField.
- * The field of {@code dataType} specifies the data type of a StructField.
- * The field of {@code nullable} specifies if values of a StructField can contain {@code null}
- * values.
- *
- * To create a {@link StructField},
- * {@link DataType#createStructField(String, DataType, boolean)}
- * should be used.
- */
-public class StructField {
-  private String name;
-  private DataType dataType;
-  private boolean nullable;
-
-  protected StructField(String name, DataType dataType, boolean nullable) {
-    this.name = name;
-    this.dataType = dataType;
-    this.nullable = nullable;
-  }
-
-  public String getName() {
-    return name;
-  }
-
-  public DataType getDataType() {
-    return dataType;
-  }
-
-  public boolean isNullable() {
-    return nullable;
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) return true;
-    if (o == null || getClass() != o.getClass()) return false;
-
-    StructField that = (StructField) o;
-
-    if (nullable != that.nullable) return false;
-    if (!dataType.equals(that.dataType)) return false;
-    if (!name.equals(that.name)) return false;
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    int result = name.hashCode();
-    result = 31 * result + dataType.hashCode();
-    result = 31 * result + (nullable ? 1 : 0);
-    return result;
-  }
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/StructType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/StructType.java
deleted file mode 100644
index a4b501efd9a10..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/StructType.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-import java.util.Arrays;
-
-/**
- * The data type representing Rows.
- * A StructType object comprises an array of StructFields.
- *
- * To create an {@link StructType},
- * {@link DataType#createStructType(java.util.List)} or
- * {@link DataType#createStructType(StructField[])}
- * should be used.
- */
-public class StructType extends DataType {
-  private StructField[] fields;
-
-  protected StructType(StructField[] fields) {
-    this.fields = fields;
-  }
-
-  public StructField[] getFields() {
-    return fields;
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) return true;
-    if (o == null || getClass() != o.getClass()) return false;
-
-    StructType that = (StructType) o;
-
-    if (!Arrays.equals(fields, that.fields)) return false;
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    return Arrays.hashCode(fields);
-  }
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/TimestampType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/TimestampType.java
deleted file mode 100644
index 06d44c731cdfe..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/api/java/TimestampType.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-/**
- * The data type representing java.sql.Timestamp values.
- *
- * {@code TimestampType} is represented by the singleton object {@link DataType#TimestampType}.
- */
-public class TimestampType extends DataType {
-  protected TimestampType() {}
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/jdbc/JDBCUtils.java b/sql/core/src/main/java/org/apache/spark/sql/jdbc/JDBCUtils.java
new file mode 100644
index 0000000000000..aa441b2096f18
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/jdbc/JDBCUtils.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc;
+
+import org.apache.spark.Partition;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.DataFrame;
+
+public class JDBCUtils {
+  /**
+   * Construct a DataFrame representing the JDBC table at the database
+   * specified by url with table name table.
+   */
+  public static DataFrame jdbcRDD(SQLContext sql, String url, String table) {
+    Partition[] parts = new Partition[1];
+    parts[0] = new JDBCPartition(null, 0);
+    return sql.baseRelationToDataFrame(
+        new JDBCRelation(url, table, parts, sql));
+  }
+
+  /**
+   * Construct a DataFrame representing the JDBC table at the database
+   * specified by url with table name table partitioned by parts.
+   * Here, parts is an array of expressions suitable for insertion into a WHERE
+   * clause; each one defines one partition.
+   */
+  public static DataFrame jdbcRDD(SQLContext sql, String url, String table, String[] parts) {
+    Partition[] partitions = new Partition[parts.length];
+    for (int i = 0; i < parts.length; i++)
+      partitions[i] = new JDBCPartition(parts[i], i);
+    return sql.baseRelationToDataFrame(
+        new JDBCRelation(url, table, partitions, sql));
+  }
+
+  private static JavaJDBCTrampoline trampoline = new JavaJDBCTrampoline();
+
+  public static void createJDBCTable(DataFrame rdd, String url, String table, boolean allowExisting) {
+    trampoline.createJDBCTable(rdd, url, table, allowExisting);
+  }
+
+  public static void insertIntoJDBC(DataFrame rdd, String url, String table, boolean overwrite) {
+    trampoline.insertIntoJDBC(rdd, url, table, overwrite);
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/CacheManager.scala
index 5ab2b5316ab10..f1949aa5dd74b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/CacheManager.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
 
 import java.util.concurrent.locks.ReentrantReadWriteLock
 
+import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.columnar.InMemoryRelation
 import org.apache.spark.storage.StorageLevel
@@ -32,9 +33,10 @@ private case class CachedData(plan: LogicalPlan, cachedRepresentation: InMemoryR
  * results when subsequent queries are executed.  Data is cached using byte buffers stored in an
  * InMemoryRelation.  This relation is automatically substituted query plans that return the
  * `sameResult` as the originally cached query.
+ *
+ * Internal to Spark SQL.
  */
-private[sql] trait CacheManager {
-  self: SQLContext =>
+private[sql] class CacheManager(sqlContext: SQLContext) extends Logging {
 
   @transient
   private val cachedData = new scala.collection.mutable.ArrayBuffer[CachedData]
@@ -43,13 +45,13 @@ private[sql] trait CacheManager {
   private val cacheLock = new ReentrantReadWriteLock
 
   /** Returns true if the table is currently cached in-memory. */
-  def isCached(tableName: String): Boolean = lookupCachedData(table(tableName)).nonEmpty
+  def isCached(tableName: String): Boolean = lookupCachedData(sqlContext.table(tableName)).nonEmpty
 
   /** Caches the specified table in-memory. */
-  def cacheTable(tableName: String): Unit = cacheQuery(table(tableName))
+  def cacheTable(tableName: String): Unit = cacheQuery(sqlContext.table(tableName), Some(tableName))
 
   /** Removes the specified table from the in-memory cache. */
-  def uncacheTable(tableName: String): Unit = uncacheQuery(table(tableName))
+  def uncacheTable(tableName: String): Unit = uncacheQuery(sqlContext.table(tableName))
 
   /** Acquires a read lock on the cache for the duration of `f`. */
   private def readLock[A](f: => A): A = {
@@ -80,9 +82,10 @@ private[sql] trait CacheManager {
    * the in-memory columnar representation of the underlying table is expensive.
    */
   private[sql] def cacheQuery(
-      query: SchemaRDD,
+      query: DataFrame,
+      tableName: Option[String] = None,
       storageLevel: StorageLevel = MEMORY_AND_DISK): Unit = writeLock {
-    val planToCache = query.queryExecution.optimizedPlan
+    val planToCache = query.queryExecution.analyzed
     if (lookupCachedData(planToCache).nonEmpty) {
       logWarning("Asked to cache already cached data.")
     } else {
@@ -90,28 +93,45 @@ private[sql] trait CacheManager {
         CachedData(
           planToCache,
           InMemoryRelation(
-            useCompression, columnBatchSize, storageLevel, query.queryExecution.executedPlan))
+            sqlContext.conf.useCompression,
+            sqlContext.conf.columnBatchSize,
+            storageLevel,
+            query.queryExecution.executedPlan,
+            tableName))
     }
   }
 
-  /** Removes the data for the given SchemaRDD from the cache */
-  private[sql] def uncacheQuery(query: SchemaRDD, blocking: Boolean = true): Unit = writeLock {
-    val planToCache = query.queryExecution.optimizedPlan
-    val dataIndex = cachedData.indexWhere(_.plan.sameResult(planToCache))
+  /** Removes the data for the given [[DataFrame]] from the cache */
+  private[sql] def uncacheQuery(query: DataFrame, blocking: Boolean = true): Unit = writeLock {
+    val planToCache = query.queryExecution.analyzed
+    val dataIndex = cachedData.indexWhere(cd => planToCache.sameResult(cd.plan))
     require(dataIndex >= 0, s"Table $query is not cached.")
     cachedData(dataIndex).cachedRepresentation.cachedColumnBuffers.unpersist(blocking)
     cachedData.remove(dataIndex)
   }
 
+  /** Tries to remove the data for the given [[DataFrame]] from the cache if it's cached */
+  private[sql] def tryUncacheQuery(
+      query: DataFrame,
+      blocking: Boolean = true): Boolean = writeLock {
+    val planToCache = query.queryExecution.analyzed
+    val dataIndex = cachedData.indexWhere(cd => planToCache.sameResult(cd.plan))
+    val found = dataIndex >= 0
+    if (found) {
+      cachedData(dataIndex).cachedRepresentation.cachedColumnBuffers.unpersist(blocking)
+      cachedData.remove(dataIndex)
+    }
+    found
+  }
 
-  /** Optionally returns cached data for the given SchemaRDD */
-  private[sql] def lookupCachedData(query: SchemaRDD): Option[CachedData] = readLock {
-    lookupCachedData(query.queryExecution.optimizedPlan)
+  /** Optionally returns cached data for the given [[DataFrame]] */
+  private[sql] def lookupCachedData(query: DataFrame): Option[CachedData] = readLock {
+    lookupCachedData(query.queryExecution.analyzed)
   }
 
   /** Optionally returns cached data for the given LogicalPlan. */
   private[sql] def lookupCachedData(plan: LogicalPlan): Option[CachedData] = readLock {
-    cachedData.find(_.plan.sameResult(plan))
+    cachedData.find(cd => plan.sameResult(cd.plan))
   }
 
   /** Replaces segments of the given logical plan with cached versions where possible. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
new file mode 100644
index 0000000000000..4c2aeadae9492
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -0,0 +1,651 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql
+
+import scala.annotation.tailrec
+import scala.language.implicitConversions
+
+import org.apache.spark.sql.Dsl.lit
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.{Subquery, Project, LogicalPlan}
+import org.apache.spark.sql.types._
+
+
+private[sql] object Column {
+
+  def apply(colName: String): Column = new IncomputableColumn(colName)
+
+  def apply(expr: Expression): Column = new IncomputableColumn(expr)
+
+  def apply(sqlContext: SQLContext, plan: LogicalPlan, expr: Expression): Column = {
+    new ComputableColumn(sqlContext, plan, expr)
+  }
+
+  def unapply(col: Column): Option[Expression] = Some(col.expr)
+}
+
+
+/**
+ * A column in a [[DataFrame]].
+ *
+ * `Column` instances can be created by:
+ * {{{
+ *   // 1. Select a column out of a DataFrame
+ *   df("colName")
+ *
+ *   // 2. Create a literal expression
+ *   Literal(1)
+ *
+ *   // 3. Create new columns from
+ * }}}
+ *
+ */
+// TODO: Improve documentation.
+trait Column extends DataFrame {
+
+  protected[sql] def expr: Expression
+
+  /**
+   * Returns true iff the [[Column]] is computable.
+   */
+  def isComputable: Boolean
+
+  private def computableCol(baseCol: ComputableColumn, expr: Expression) = {
+    val plan = Project(Seq(expr match {
+      case named: NamedExpression => named
+      case unnamed: Expression => Alias(unnamed, "col")()
+    }), baseCol.plan)
+    Column(baseCol.sqlContext, plan, expr)
+  }
+
+  private def constructColumn(otherValue: Any)(newExpr: Column => Expression): Column = {
+    // Removes all the top level projection and subquery so we can get to the underlying plan.
+    @tailrec def stripProject(p: LogicalPlan): LogicalPlan = p match {
+      case Project(_, child) => stripProject(child)
+      case Subquery(_, child) => stripProject(child)
+      case _ => p
+    }
+
+    (this, lit(otherValue)) match {
+      case (left: ComputableColumn, right: ComputableColumn) =>
+        if (stripProject(left.plan).sameResult(stripProject(right.plan))) {
+          computableCol(right, newExpr(right))
+        } else {
+          Column(newExpr(right))
+        }
+      case (left: ComputableColumn, right) => computableCol(left, newExpr(right))
+      case (_, right: ComputableColumn) => computableCol(right, newExpr(right))
+      case (_, right) => Column(newExpr(right))
+    }
+  }
+
+  /** Creates a column based on the given expression. */
+  private def exprToColumn(newExpr: Expression, computable: Boolean = true): Column = {
+    this match {
+      case c: ComputableColumn if computable => computableCol(c, newExpr)
+      case _ => Column(newExpr)
+    }
+  }
+
+  /**
+   * Unary minus, i.e. negate the expression.
+   * {{{
+   *   // Scala: select the amount column and negates all values.
+   *   df.select( -df("amount") )
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.Dsl.*;
+   *   df.select( negate(col("amount") );
+   * }}}
+   */
+  def unary_- : Column = exprToColumn(UnaryMinus(expr))
+
+  /**
+   * Inversion of boolean expression, i.e. NOT.
+   * {{
+   *   // Scala: select rows that are not active (isActive === false)
+   *   df.filter( !df("isActive") )
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.Dsl.*;
+   *   df.filter( not(df.col("isActive")) );
+   * }}
+   */
+  def unary_! : Column = exprToColumn(Not(expr))
+
+  /**
+   * Equality test.
+   * {{{
+   *   // Scala:
+   *   df.filter( df("colA") === df("colB") )
+   *
+   *   // Java
+   *   import static org.apache.spark.sql.Dsl.*;
+   *   df.filter( col("colA").equalTo(col("colB")) );
+   * }}}
+   */
+  def === (other: Any): Column = constructColumn(other) { o =>
+    EqualTo(expr, o.expr)
+  }
+
+  /**
+   * Equality test.
+   * {{{
+   *   // Scala:
+   *   df.filter( df("colA") === df("colB") )
+   *
+   *   // Java
+   *   import static org.apache.spark.sql.Dsl.*;
+   *   df.filter( col("colA").equalTo(col("colB")) );
+   * }}}
+   */
+  def equalTo(other: Any): Column = this === other
+
+  /**
+   * Inequality test.
+   * {{{
+   *   // Scala:
+   *   df.select( df("colA") !== df("colB") )
+   *   df.select( !(df("colA") === df("colB")) )
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.Dsl.*;
+   *   df.filter( col("colA").notEqual(col("colB")) );
+   * }}}
+   */
+  def !== (other: Any): Column = constructColumn(other) { o =>
+    Not(EqualTo(expr, o.expr))
+  }
+
+  /**
+   * Inequality test.
+   * {{{
+   *   // Scala:
+   *   df.select( df("colA") !== df("colB") )
+   *   df.select( !(df("colA") === df("colB")) )
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.Dsl.*;
+   *   df.filter( col("colA").notEqual(col("colB")) );
+   * }}}
+   */
+  def notEqual(other: Any): Column = constructColumn(other) { o =>
+    Not(EqualTo(expr, o.expr))
+  }
+
+  /**
+   * Greater than.
+   * {{{
+   *   // Scala: The following selects people older than 21.
+   *   people.select( people("age") > 21 )
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.Dsl.*;
+   *   people.select( people("age").gt(21) );
+   * }}}
+   */
+  def > (other: Any): Column = constructColumn(other) { o =>
+    GreaterThan(expr, o.expr)
+  }
+
+  /**
+   * Greater than.
+   * {{{
+   *   // Scala: The following selects people older than 21.
+   *   people.select( people("age") > lit(21) )
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.Dsl.*;
+   *   people.select( people("age").gt(21) );
+   * }}}
+   */
+  def gt(other: Any): Column = this > other
+
+  /**
+   * Less than.
+   * {{{
+   *   // Scala: The following selects people younger than 21.
+   *   people.select( people("age") < 21 )
+   *
+   *   // Java:
+   *   people.select( people("age").lt(21) );
+   * }}}
+   */
+  def < (other: Any): Column = constructColumn(other) { o =>
+    LessThan(expr, o.expr)
+  }
+
+  /**
+   * Less than.
+   * {{{
+   *   // Scala: The following selects people younger than 21.
+   *   people.select( people("age") < 21 )
+   *
+   *   // Java:
+   *   people.select( people("age").lt(21) );
+   * }}}
+   */
+  def lt(other: Any): Column = this < other
+
+  /**
+   * Less than or equal to.
+   * {{{
+   *   // Scala: The following selects people age 21 or younger than 21.
+   *   people.select( people("age") <= 21 )
+   *
+   *   // Java:
+   *   people.select( people("age").leq(21) );
+   * }}}
+   */
+  def <= (other: Any): Column = constructColumn(other) { o =>
+    LessThanOrEqual(expr, o.expr)
+  }
+
+  /**
+   * Less than or equal to.
+   * {{{
+   *   // Scala: The following selects people age 21 or younger than 21.
+   *   people.select( people("age") <= 21 )
+   *
+   *   // Java:
+   *   people.select( people("age").leq(21) );
+   * }}}
+   */
+  def leq(other: Any): Column = this <= other
+
+  /**
+   * Greater than or equal to an expression.
+   * {{{
+   *   // Scala: The following selects people age 21 or older than 21.
+   *   people.select( people("age") >= 21 )
+   *
+   *   // Java:
+   *   people.select( people("age").geq(21) )
+   * }}}
+   */
+  def >= (other: Any): Column = constructColumn(other) { o =>
+    GreaterThanOrEqual(expr, o.expr)
+  }
+
+  /**
+   * Greater than or equal to an expression.
+   * {{{
+   *   // Scala: The following selects people age 21 or older than 21.
+   *   people.select( people("age") >= 21 )
+   *
+   *   // Java:
+   *   people.select( people("age").geq(21) )
+   * }}}
+   */
+  def geq(other: Any): Column = this >= other
+
+  /**
+   * Equality test that is safe for null values.
+   */
+  def <=> (other: Any): Column = constructColumn(other) { o =>
+    EqualNullSafe(expr, o.expr)
+  }
+
+  /**
+   * Equality test that is safe for null values.
+   */
+  def eqNullSafe(other: Any): Column = this <=> other
+
+  /**
+   * True if the current expression is null.
+   */
+  def isNull: Column = exprToColumn(IsNull(expr))
+
+  /**
+   * True if the current expression is NOT null.
+   */
+  def isNotNull: Column = exprToColumn(IsNotNull(expr))
+
+  /**
+   * Boolean OR.
+   * {{{
+   *   // Scala: The following selects people that are in school or employed.
+   *   people.filter( people("inSchool") || people("isEmployed") )
+   *
+   *   // Java:
+   *   people.filter( people("inSchool").or(people("isEmployed")) );
+   * }}}
+   */
+  def || (other: Any): Column = constructColumn(other) { o =>
+    Or(expr, o.expr)
+  }
+
+  /**
+   * Boolean OR.
+   * {{{
+   *   // Scala: The following selects people that are in school or employed.
+   *   people.filter( people("inSchool") || people("isEmployed") )
+   *
+   *   // Java:
+   *   people.filter( people("inSchool").or(people("isEmployed")) );
+   * }}}
+   */
+  def or(other: Column): Column = this || other
+
+  /**
+   * Boolean AND.
+   * {{{
+   *   // Scala: The following selects people that are in school and employed at the same time.
+   *   people.select( people("inSchool") && people("isEmployed") )
+   *
+   *   // Java:
+   *   people.select( people("inSchool").and(people("isEmployed")) );
+   * }}}
+   */
+  def && (other: Any): Column = constructColumn(other) { o =>
+    And(expr, o.expr)
+  }
+
+  /**
+   * Boolean AND.
+   * {{{
+   *   // Scala: The following selects people that are in school and employed at the same time.
+   *   people.select( people("inSchool") && people("isEmployed") )
+   *
+   *   // Java:
+   *   people.select( people("inSchool").and(people("isEmployed")) );
+   * }}}
+   */
+  def and(other: Column): Column = this && other
+
+  /**
+   * Sum of this expression and another expression.
+   * {{{
+   *   // Scala: The following selects the sum of a person's height and weight.
+   *   people.select( people("height") + people("weight") )
+   *
+   *   // Java:
+   *   people.select( people("height").plus(people("weight")) );
+   * }}}
+   */
+  def + (other: Any): Column = constructColumn(other) { o =>
+    Add(expr, o.expr)
+  }
+
+  /**
+   * Sum of this expression and another expression.
+   * {{{
+   *   // Scala: The following selects the sum of a person's height and weight.
+   *   people.select( people("height") + people("weight") )
+   *
+   *   // Java:
+   *   people.select( people("height").plus(people("weight")) );
+   * }}}
+   */
+  def plus(other: Any): Column = this + other
+
+  /**
+   * Subtraction. Subtract the other expression from this expression.
+   * {{{
+   *   // Scala: The following selects the difference between people's height and their weight.
+   *   people.select( people("height") - people("weight") )
+   *
+   *   // Java:
+   *   people.select( people("height").minus(people("weight")) );
+   * }}}
+   */
+  def - (other: Any): Column = constructColumn(other) { o =>
+    Subtract(expr, o.expr)
+  }
+
+  /**
+   * Subtraction. Subtract the other expression from this expression.
+   * {{{
+   *   // Scala: The following selects the difference between people's height and their weight.
+   *   people.select( people("height") - people("weight") )
+   *
+   *   // Java:
+   *   people.select( people("height").minus(people("weight")) );
+   * }}}
+   */
+  def minus(other: Any): Column = this - other
+
+  /**
+   * Multiplication of this expression and another expression.
+   * {{{
+   *   // Scala: The following multiplies a person's height by their weight.
+   *   people.select( people("height") * people("weight") )
+   *
+   *   // Java:
+   *   people.select( people("height").multiply(people("weight")) );
+   * }}}
+   */
+  def * (other: Any): Column = constructColumn(other) { o =>
+    Multiply(expr, o.expr)
+  }
+
+  /**
+   * Multiplication of this expression and another expression.
+   * {{{
+   *   // Scala: The following multiplies a person's height by their weight.
+   *   people.select( people("height") * people("weight") )
+   *
+   *   // Java:
+   *   people.select( people("height").multiply(people("weight")) );
+   * }}}
+   */
+  def multiply(other: Any): Column = this * other
+
+  /**
+   * Division this expression by another expression.
+   * {{{
+   *   // Scala: The following divides a person's height by their weight.
+   *   people.select( people("height") / people("weight") )
+   *
+   *   // Java:
+   *   people.select( people("height").divide(people("weight")) );
+   * }}}
+   */
+  def / (other: Any): Column = constructColumn(other) { o =>
+    Divide(expr, o.expr)
+  }
+
+  /**
+   * Division this expression by another expression.
+   * {{{
+   *   // Scala: The following divides a person's height by their weight.
+   *   people.select( people("height") / people("weight") )
+   *
+   *   // Java:
+   *   people.select( people("height").divide(people("weight")) );
+   * }}}
+   */
+  def divide(other: Any): Column = this / other
+
+  /**
+   * Modulo (a.k.a. remainder) expression.
+   */
+  def % (other: Any): Column = constructColumn(other) { o =>
+    Remainder(expr, o.expr)
+  }
+
+  /**
+   * Modulo (a.k.a. remainder) expression.
+   */
+  def mod(other: Any): Column = this % other
+
+  /**
+   * A boolean expression that is evaluated to true if the value of this expression is contained
+   * by the evaluated values of the arguments.
+   */
+  @scala.annotation.varargs
+  def in(list: Column*): Column = {
+    new IncomputableColumn(In(expr, list.map(_.expr)))
+  }
+
+  def like(literal: String): Column = exprToColumn(Like(expr, lit(literal).expr))
+
+  def rlike(literal: String): Column = exprToColumn(RLike(expr, lit(literal).expr))
+
+  /**
+   * An expression that gets an item at position `ordinal` out of an array.
+   */
+  def getItem(ordinal: Int): Column = exprToColumn(GetItem(expr, Literal(ordinal)))
+
+  /**
+   * An expression that gets a field by name in a [[StructField]].
+   */
+  def getField(fieldName: String): Column = exprToColumn(GetField(expr, fieldName))
+
+  /**
+   * An expression that returns a substring.
+   * @param startPos expression for the starting position.
+   * @param len expression for the length of the substring.
+   */
+  def substr(startPos: Column, len: Column): Column =
+    exprToColumn(Substring(expr, startPos.expr, len.expr), computable = false)
+
+  /**
+   * An expression that returns a substring.
+   * @param startPos starting position.
+   * @param len length of the substring.
+   */
+  def substr(startPos: Int, len: Int): Column =
+    exprToColumn(Substring(expr, lit(startPos).expr, lit(len).expr))
+
+  def contains(other: Any): Column = constructColumn(other) { o =>
+    Contains(expr, o.expr)
+  }
+
+  def startsWith(other: Column): Column = constructColumn(other) { o =>
+    StartsWith(expr, o.expr)
+  }
+
+  def startsWith(literal: String): Column = this.startsWith(lit(literal))
+
+  def endsWith(other: Column): Column = constructColumn(other) { o =>
+    EndsWith(expr, o.expr)
+  }
+
+  def endsWith(literal: String): Column = this.endsWith(lit(literal))
+
+  /**
+   * Gives the column an alias.
+   * {{{
+   *   // Renames colA to colB in select output.
+   *   df.select($"colA".as("colB"))
+   * }}}
+   */
+  override def as(alias: String): Column = exprToColumn(Alias(expr, alias)())
+
+  /**
+   * Casts the column to a different data type.
+   * {{{
+   *   // Casts colA to IntegerType.
+   *   import org.apache.spark.sql.types.IntegerType
+   *   df.select(df("colA").cast(IntegerType))
+   *
+   *   // equivalent to
+   *   df.select(df("colA").cast("int"))
+   * }}}
+   */
+  def cast(to: DataType): Column = exprToColumn(Cast(expr, to))
+
+  /**
+   * Casts the column to a different data type, using the canonical string representation
+   * of the type. The supported types are: `string`, `boolean`, `byte`, `short`, `int`, `long`,
+   * `float`, `double`, `decimal`, `date`, `timestamp`.
+   * {{{
+   *   // Casts colA to integer.
+   *   df.select(df("colA").cast("int"))
+   * }}}
+   */
+  def cast(to: String): Column = exprToColumn(
+    Cast(expr, to.toLowerCase match {
+      case "string" => StringType
+      case "boolean" => BooleanType
+      case "byte" => ByteType
+      case "short" => ShortType
+      case "int" => IntegerType
+      case "long" => LongType
+      case "float" => FloatType
+      case "double" => DoubleType
+      case "decimal" => DecimalType.Unlimited
+      case "date" => DateType
+      case "timestamp" => TimestampType
+      case _ => throw new RuntimeException(s"""Unsupported cast type: "$to"""")
+    })
+  )
+
+  def desc: Column = exprToColumn(SortOrder(expr, Descending), computable = false)
+
+  def asc: Column = exprToColumn(SortOrder(expr, Ascending), computable = false)
+}
+
+
+class ColumnName(name: String) extends IncomputableColumn(name) {
+
+  /** Creates a new AttributeReference of type boolean */
+  def boolean: StructField = StructField(name, BooleanType)
+
+  /** Creates a new AttributeReference of type byte */
+  def byte: StructField = StructField(name, ByteType)
+
+  /** Creates a new AttributeReference of type short */
+  def short: StructField = StructField(name, ShortType)
+
+  /** Creates a new AttributeReference of type int */
+  def int: StructField = StructField(name, IntegerType)
+
+  /** Creates a new AttributeReference of type long */
+  def long: StructField = StructField(name, LongType)
+
+  /** Creates a new AttributeReference of type float */
+  def float: StructField = StructField(name, FloatType)
+
+  /** Creates a new AttributeReference of type double */
+  def double: StructField = StructField(name, DoubleType)
+
+  /** Creates a new AttributeReference of type string */
+  def string: StructField = StructField(name, StringType)
+
+  /** Creates a new AttributeReference of type date */
+  def date: StructField = StructField(name, DateType)
+
+  /** Creates a new AttributeReference of type decimal */
+  def decimal: StructField = StructField(name, DecimalType.Unlimited)
+
+  /** Creates a new AttributeReference of type decimal */
+  def decimal(precision: Int, scale: Int): StructField =
+    StructField(name, DecimalType(precision, scale))
+
+  /** Creates a new AttributeReference of type timestamp */
+  def timestamp: StructField = StructField(name, TimestampType)
+
+  /** Creates a new AttributeReference of type binary */
+  def binary: StructField = StructField(name, BinaryType)
+
+  /** Creates a new AttributeReference of type array */
+  def array(dataType: DataType): StructField = StructField(name, ArrayType(dataType))
+
+  /** Creates a new AttributeReference of type map */
+  def map(keyType: DataType, valueType: DataType): StructField =
+    map(MapType(keyType, valueType))
+
+  def map(mapType: MapType): StructField = StructField(name, mapType)
+
+  /** Creates a new AttributeReference of type struct */
+  def struct(fields: StructField*): StructField = struct(StructType(fields))
+
+  def struct(structType: StructType): StructField = StructField(name, structType)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ComputableColumn.scala b/sql/core/src/main/scala/org/apache/spark/sql/ComputableColumn.scala
new file mode 100644
index 0000000000000..ac479b26a7c6a
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ComputableColumn.scala
@@ -0,0 +1,33 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql
+
+import scala.language.implicitConversions
+
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+
+
+private[sql] class ComputableColumn protected[sql](
+    sqlContext: SQLContext,
+    protected[sql] val plan: LogicalPlan,
+    protected[sql] val expr: Expression)
+  extends DataFrameImpl(sqlContext, plan) with Column  {
+
+  override def isComputable: Boolean = true
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
new file mode 100644
index 0000000000000..92e04ce17c2e7
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -0,0 +1,621 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.Utils
+
+
+private[sql] object DataFrame {
+  def apply(sqlContext: SQLContext, logicalPlan: LogicalPlan): DataFrame = {
+    new DataFrameImpl(sqlContext, logicalPlan)
+  }
+}
+
+
+/**
+ * A collection of rows that have the same columns.
+ *
+ * A [[DataFrame]] is equivalent to a relational table in Spark SQL, and can be created using
+ * various functions in [[SQLContext]].
+ * {{{
+ *   val people = sqlContext.parquetFile("...")
+ * }}}
+ *
+ * Once created, it can be manipulated using the various domain-specific-language (DSL) functions
+ * defined in: [[DataFrame]] (this class), [[Column]], [[Dsl]] for the DSL.
+ *
+ * To select a column from the data frame, use the apply method:
+ * {{{
+ *   val ageCol = people("age")  // in Scala
+ *   Column ageCol = people.apply("age")  // in Java
+ * }}}
+ *
+ * Note that the [[Column]] type can also be manipulated through its various functions.
+ * {{{
+ *   // The following creates a new column that increases everybody's age by 10.
+ *   people("age") + 10  // in Scala
+ * }}}
+ *
+ * A more concrete example:
+ * {{{
+ *   // To create DataFrame using SQLContext
+ *   val people = sqlContext.parquetFile("...")
+ *   val department = sqlContext.parquetFile("...")
+ *
+ *   people.filter("age" > 30)
+ *     .join(department, people("deptId") === department("id"))
+ *     .groupBy(department("name"), "gender")
+ *     .agg(avg(people("salary")), max(people("age")))
+ * }}}
+ */
+// TODO: Improve documentation.
+trait DataFrame extends RDDApi[Row] {
+
+  val sqlContext: SQLContext
+
+  @DeveloperApi
+  def queryExecution: SQLContext#QueryExecution
+
+  protected[sql] def logicalPlan: LogicalPlan
+
+  /** Left here for backward compatibility. */
+  @deprecated("1.3.0", "use toDataFrame")
+  def toSchemaRDD: DataFrame = this
+
+  /**
+   * Returns the object itself. Used to force an implicit conversion from RDD to DataFrame in Scala.
+   */
+  def toDataFrame: DataFrame = this
+
+  /**
+   * Returns a new [[DataFrame]] with columns renamed. This can be quite convenient in conversion
+   * from a RDD of tuples into a [[DataFrame]] with meaningful names. For example:
+   * {{{
+   *   val rdd: RDD[(Int, String)] = ...
+   *   rdd.toDataFrame  // this implicit conversion creates a DataFrame with column name _1 and _2
+   *   rdd.toDataFrame("id", "name")  // this creates a DataFrame with column name "id" and "name"
+   * }}}
+   */
+  @scala.annotation.varargs
+  def toDataFrame(colName: String, colNames: String*): DataFrame
+
+  /** Returns the schema of this [[DataFrame]]. */
+  def schema: StructType
+
+  /** Returns all column names and their data types as an array. */
+  def dtypes: Array[(String, String)]
+
+  /** Returns all column names as an array. */
+  def columns: Array[String] = schema.fields.map(_.name)
+
+  /** Prints the schema to the console in a nice tree format. */
+  def printSchema(): Unit
+
+  /**
+   * Cartesian join with another [[DataFrame]].
+   *
+   * Note that cartesian joins are very expensive without an extra filter that can be pushed down.
+   *
+   * @param right Right side of the join operation.
+   */
+  def join(right: DataFrame): DataFrame
+
+  /**
+   * Inner join with another [[DataFrame]], using the given join expression.
+   *
+   * {{{
+   *   // The following two are equivalent:
+   *   df1.join(df2, $"df1Key" === $"df2Key")
+   *   df1.join(df2).where($"df1Key" === $"df2Key")
+   * }}}
+   */
+  def join(right: DataFrame, joinExprs: Column): DataFrame
+
+  /**
+   * Join with another [[DataFrame]], usin  g the given join expression. The following performs
+   * a full outer join between `df1` and `df2`.
+   *
+   * {{{
+   *   // Scala:
+   *   import org.apache.spark.sql.dsl._
+   *   df1.join(df2, "outer", $"df1Key" === $"df2Key")
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.Dsl.*;
+   *   df1.join(df2, "outer", col("df1Key") === col("df2Key"));
+   * }}}
+   *
+   * @param right Right side of the join.
+   * @param joinExprs Join expression.
+   * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `semijoin`.
+   */
+  def join(right: DataFrame, joinExprs: Column, joinType: String): DataFrame
+
+  /**
+   * Returns a new [[DataFrame]] sorted by the specified column, all in ascending order.
+   * {{{
+   *   // The following 3 are equivalent
+   *   df.sort("sortcol")
+   *   df.sort($"sortcol")
+   *   df.sort($"sortcol".asc)
+   * }}}
+   */
+  @scala.annotation.varargs
+  def sort(sortCol: String, sortCols: String*): DataFrame
+
+  /**
+   * Returns a new [[DataFrame]] sorted by the given expressions. For example:
+   * {{{
+   *   df.sort($"col1", $"col2".desc)
+   * }}}
+   */
+  @scala.annotation.varargs
+  def sort(sortExprs: Column*): DataFrame
+
+  /**
+   * Returns a new [[DataFrame]] sorted by the given expressions.
+   * This is an alias of the `sort` function.
+   */
+  @scala.annotation.varargs
+  def orderBy(sortCol: String, sortCols: String*): DataFrame
+
+  /**
+   * Returns a new [[DataFrame]] sorted by the given expressions.
+   * This is an alias of the `sort` function.
+   */
+  @scala.annotation.varargs
+  def orderBy(sortExprs: Column*): DataFrame
+
+  /**
+   * Selects column based on the column name and return it as a [[Column]].
+   */
+  def apply(colName: String): Column = col(colName)
+
+  /**
+   * Selects column based on the column name and return it as a [[Column]].
+   */
+  def col(colName: String): Column
+
+  /**
+   * Selects a set of expressions, wrapped in a Product.
+   * {{{
+   *   // The following two are equivalent:
+   *   df.apply(($"colA", $"colB" + 1))
+   *   df.select($"colA", $"colB" + 1)
+   * }}}
+   */
+  def apply(projection: Product): DataFrame
+
+  /**
+   * Returns a new [[DataFrame]] with an alias set.
+   */
+  def as(name: String): DataFrame
+
+  /**
+   * Selects a set of expressions.
+   * {{{
+   *   df.select($"colA", $"colB" + 1)
+   * }}}
+   */
+  @scala.annotation.varargs
+  def select(cols: Column*): DataFrame
+
+  /**
+   * Selects a set of columns. This is a variant of `select` that can only select
+   * existing columns using column names (i.e. cannot construct expressions).
+   *
+   * {{{
+   *   // The following two are equivalent:
+   *   df.select("colA", "colB")
+   *   df.select($"colA", $"colB")
+   * }}}
+   */
+  @scala.annotation.varargs
+  def select(col: String, cols: String*): DataFrame
+
+  /**
+   * Selects a set of SQL expressions. This is a variant of `select` that accepts
+   * SQL expressions.
+   *
+   * {{{
+   *   df.selectExpr("colA", "colB as newName", "abs(colC)")
+   * }}}
+   */
+  @scala.annotation.varargs
+  def selectExpr(exprs: String*): DataFrame
+
+  /**
+   * Filters rows using the given condition.
+   * {{{
+   *   // The following are equivalent:
+   *   peopleDf.filter($"age" > 15)
+   *   peopleDf.where($"age" > 15)
+   *   peopleDf($"age" > 15)
+   * }}}
+   */
+  def filter(condition: Column): DataFrame
+
+  /**
+   * Filters rows using the given SQL expression.
+   * {{{
+   *   peopleDf.filter("age > 15")
+   * }}}
+   */
+  def filter(conditionExpr: String): DataFrame
+
+  /**
+   * Filters rows using the given condition. This is an alias for `filter`.
+   * {{{
+   *   // The following are equivalent:
+   *   peopleDf.filter($"age" > 15)
+   *   peopleDf.where($"age" > 15)
+   *   peopleDf($"age" > 15)
+   * }}}
+   */
+  def where(condition: Column): DataFrame
+
+  /**
+   * Filters rows using the given condition. This is a shorthand meant for Scala.
+   * {{{
+   *   // The following are equivalent:
+   *   peopleDf.filter($"age" > 15)
+   *   peopleDf.where($"age" > 15)
+   *   peopleDf($"age" > 15)
+   * }}}
+   */
+  def apply(condition: Column): DataFrame
+
+  /**
+   * Groups the [[DataFrame]] using the specified columns, so we can run aggregation on them.
+   * See [[GroupedData]] for all the available aggregate functions.
+   *
+   * {{{
+   *   // Compute the average for all numeric columns grouped by department.
+   *   df.groupBy($"department").avg()
+   *
+   *   // Compute the max age and average salary, grouped by department and gender.
+   *   df.groupBy($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   */
+  @scala.annotation.varargs
+  def groupBy(cols: Column*): GroupedData
+
+  /**
+   * Groups the [[DataFrame]] using the specified columns, so we can run aggregation on them.
+   * See [[GroupedData]] for all the available aggregate functions.
+   *
+   * This is a variant of groupBy that can only group by existing columns using column names
+   * (i.e. cannot construct expressions).
+   *
+   * {{{
+   *   // Compute the average for all numeric columns grouped by department.
+   *   df.groupBy("department").avg()
+   *
+   *   // Compute the max age and average salary, grouped by department and gender.
+   *   df.groupBy($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   */
+  @scala.annotation.varargs
+  def groupBy(col1: String, cols: String*): GroupedData
+
+  /**
+   * (Scala-specific) Compute aggregates by specifying a map from column name to
+   * aggregate methods. The resulting [[DataFrame]] will also contain the grouping columns.
+   *
+   * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
+   * {{{
+   *   // Selects the age of the oldest employee and the aggregate expense for each department
+   *   df.groupBy("department").agg(
+   *     "age" -> "max",
+   *     "expense" -> "sum"
+   *   )
+   * }}}
+   */
+  def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
+    groupBy().agg(aggExpr, aggExprs :_*)
+  }
+
+  /**
+   * (Scala-specific) Aggregates on the entire [[DataFrame]] without groups.
+   * {{
+   *   // df.agg(...) is a shorthand for df.groupBy().agg(...)
+   *   df.agg(Map("age" -> "max", "salary" -> "avg"))
+   *   df.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
+   * }}
+   */
+  def agg(exprs: Map[String, String]): DataFrame = groupBy().agg(exprs)
+
+  /**
+   * (Java-specific) Aggregates on the entire [[DataFrame]] without groups.
+   * {{
+   *   // df.agg(...) is a shorthand for df.groupBy().agg(...)
+   *   df.agg(Map("age" -> "max", "salary" -> "avg"))
+   *   df.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
+   * }}
+   */
+  def agg(exprs: java.util.Map[String, String]): DataFrame = groupBy().agg(exprs)
+
+  /**
+   * Aggregates on the entire [[DataFrame]] without groups.
+   * {{
+   *   // df.agg(...) is a shorthand for df.groupBy().agg(...)
+   *   df.agg(max($"age"), avg($"salary"))
+   *   df.groupBy().agg(max($"age"), avg($"salary"))
+   * }}
+   */
+  @scala.annotation.varargs
+  def agg(expr: Column, exprs: Column*): DataFrame = groupBy().agg(expr, exprs :_*)
+
+  /**
+   * Returns a new [[DataFrame]] by taking the first `n` rows. The difference between this function
+   * and `head` is that `head` returns an array while `limit` returns a new [[DataFrame]].
+   */
+  def limit(n: Int): DataFrame
+
+  /**
+   * Returns a new [[DataFrame]] containing union of rows in this frame and another frame.
+   * This is equivalent to `UNION ALL` in SQL.
+   */
+  def unionAll(other: DataFrame): DataFrame
+
+  /**
+   * Returns a new [[DataFrame]] containing rows only in both this frame and another frame.
+   * This is equivalent to `INTERSECT` in SQL.
+   */
+  def intersect(other: DataFrame): DataFrame
+
+  /**
+   * Returns a new [[DataFrame]] containing rows in this frame but not in another frame.
+   * This is equivalent to `EXCEPT` in SQL.
+   */
+  def except(other: DataFrame): DataFrame
+
+  /**
+   * Returns a new [[DataFrame]] by sampling a fraction of rows.
+   *
+   * @param withReplacement Sample with replacement or not.
+   * @param fraction Fraction of rows to generate.
+   * @param seed Seed for sampling.
+   */
+  def sample(withReplacement: Boolean, fraction: Double, seed: Long): DataFrame
+
+  /**
+   * Returns a new [[DataFrame]] by sampling a fraction of rows, using a random seed.
+   *
+   * @param withReplacement Sample with replacement or not.
+   * @param fraction Fraction of rows to generate.
+   */
+  def sample(withReplacement: Boolean, fraction: Double): DataFrame = {
+    sample(withReplacement, fraction, Utils.random.nextLong)
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Returns a new [[DataFrame]] by adding a column.
+   */
+  def addColumn(colName: String, col: Column): DataFrame
+
+  /**
+   * Returns the first `n` rows.
+   */
+  def head(n: Int): Array[Row]
+
+  /**
+   * Returns the first row.
+   */
+  def head(): Row
+
+  /**
+   * Returns the first row. Alias for head().
+   */
+  override def first(): Row
+
+  /**
+   * Returns a new RDD by applying a function to all rows of this DataFrame.
+   */
+  override def map[R: ClassTag](f: Row => R): RDD[R]
+
+  /**
+   * Returns a new RDD by first applying a function to all rows of this [[DataFrame]],
+   * and then flattening the results.
+   */
+  override def flatMap[R: ClassTag](f: Row => TraversableOnce[R]): RDD[R]
+
+  /**
+   * Returns a new RDD by applying a function to each partition of this DataFrame.
+   */
+  override def mapPartitions[R: ClassTag](f: Iterator[Row] => Iterator[R]): RDD[R]
+  /**
+   * Applies a function `f` to all rows.
+   */
+  override def foreach(f: Row => Unit): Unit
+
+  /**
+   * Applies a function f to each partition of this [[DataFrame]].
+   */
+  override def foreachPartition(f: Iterator[Row] => Unit): Unit
+
+  /**
+   * Returns the first `n` rows in the [[DataFrame]].
+   */
+  override def take(n: Int): Array[Row]
+
+  /**
+   * Returns an array that contains all of [[Row]]s in this [[DataFrame]].
+   */
+  override def collect(): Array[Row]
+
+  /**
+   * Returns a Java list that contains all of [[Row]]s in this [[DataFrame]].
+   */
+  override def collectAsList(): java.util.List[Row]
+
+  /**
+   * Returns the number of rows in the [[DataFrame]].
+   */
+  override def count(): Long
+
+  /**
+   * Returns a new [[DataFrame]] that has exactly `numPartitions` partitions.
+   */
+  override def repartition(numPartitions: Int): DataFrame
+
+  override def persist(): this.type
+
+  override def persist(newLevel: StorageLevel): this.type
+
+  override def unpersist(blocking: Boolean): this.type
+
+  /////////////////////////////////////////////////////////////////////////////
+  // I/O
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Returns the content of the [[DataFrame]] as an [[RDD]] of [[Row]]s.
+   */
+  def rdd: RDD[Row]
+
+  /**
+   * Returns the content of the [[DataFrame]] as a [[JavaRDD]] of [[Row]]s.
+   */
+  def toJavaRDD: JavaRDD[Row] = rdd.toJavaRDD()
+
+  /**
+   * Returns the content of the [[DataFrame]] as a [[JavaRDD]] of [[Row]]s.
+   */
+  def javaRDD: JavaRDD[Row] = toJavaRDD
+
+  /**
+   * Registers this RDD as a temporary table using the given name.  The lifetime of this temporary
+   * table is tied to the [[SQLContext]] that was used to create this DataFrame.
+   *
+   * @group schema
+   */
+  def registerTempTable(tableName: String): Unit
+
+  /**
+   * Saves the contents of this [[DataFrame]] as a parquet file, preserving the schema.
+   * Files that are written out using this method can be read back in as a [[DataFrame]]
+   * using the `parquetFile` function in [[SQLContext]].
+   */
+  def saveAsParquetFile(path: String): Unit
+
+  /**
+   * :: Experimental ::
+   * Creates a table from the the contents of this DataFrame.  This will fail if the table already
+   * exists.
+   *
+   * Note that this currently only works with DataFrames that are created from a HiveContext as
+   * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
+   * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
+   * be the target of an `insertInto`.
+   */
+  @Experimental
+  def saveAsTable(tableName: String): Unit
+
+  /**
+   * :: Experimental ::
+   * Creates a table from the the contents of this DataFrame based on a given data source and
+   * a set of options. This will fail if the table already exists.
+   *
+   * Note that this currently only works with DataFrames that are created from a HiveContext as
+   * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
+   * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
+   * be the target of an `insertInto`.
+   */
+  @Experimental
+  def saveAsTable(
+      tableName: String,
+      dataSourceName: String,
+      option: (String, String),
+      options: (String, String)*): Unit
+
+  /**
+   * :: Experimental ::
+   * Creates a table from the the contents of this DataFrame based on a given data source and
+   * a set of options. This will fail if the table already exists.
+   *
+   * Note that this currently only works with DataFrames that are created from a HiveContext as
+   * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
+   * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
+   * be the target of an `insertInto`.
+   */
+  @Experimental
+  def saveAsTable(
+      tableName: String,
+      dataSourceName: String,
+      options: java.util.Map[String, String]): Unit
+
+  @Experimental
+  def save(path: String): Unit
+
+  @Experimental
+  def save(
+      dataSourceName: String,
+      option: (String, String),
+      options: (String, String)*): Unit
+
+  @Experimental
+  def save(
+      dataSourceName: String,
+      options: java.util.Map[String, String]): Unit
+
+  /**
+   * :: Experimental ::
+   * Adds the rows from this RDD to the specified table, optionally overwriting the existing data.
+   */
+  @Experimental
+  def insertInto(tableName: String, overwrite: Boolean): Unit
+
+  /**
+   * :: Experimental ::
+   * Adds the rows from this RDD to the specified table.
+   * Throws an exception if the table already exists.
+   */
+  @Experimental
+  def insertInto(tableName: String): Unit = insertInto(tableName, overwrite = false)
+
+  /**
+   * Returns the content of the [[DataFrame]] as a RDD of JSON strings.
+   */
+  def toJSON: RDD[String]
+
+  ////////////////////////////////////////////////////////////////////////////
+  // for Python API
+  ////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Converts a JavaRDD to a PythonRDD.
+   */
+  protected[sql] def javaToPython: JavaRDD[Array[Byte]]
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala
new file mode 100644
index 0000000000000..d6df927f9d42c
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala
@@ -0,0 +1,380 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql
+
+import scala.language.implicitConversions
+import scala.reflect.ClassTag
+import scala.collection.JavaConversions._
+
+import com.fasterxml.jackson.core.JsonFactory
+
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.api.python.SerDeUtil
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.sql.catalyst.{SqlParser, ScalaReflection}
+import org.apache.spark.sql.catalyst.analysis.{ResolvedStar, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.{JoinType, Inner}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.execution.{LogicalRDD, EvaluatePython}
+import org.apache.spark.sql.json.JsonRDD
+import org.apache.spark.sql.sources.{ResolvedDataSource, CreateTableUsingAsLogicalPlan}
+import org.apache.spark.sql.types.{NumericType, StructType}
+
+
+/**
+ * Internal implementation of [[DataFrame]]. Users of the API should use [[DataFrame]] directly.
+ */
+private[sql] class DataFrameImpl protected[sql](
+    override val sqlContext: SQLContext,
+    val queryExecution: SQLContext#QueryExecution)
+  extends DataFrame {
+
+  /**
+   * A constructor that automatically analyzes the logical plan. This reports error eagerly
+   * as the [[DataFrame]] is constructed.
+   */
+  def this(sqlContext: SQLContext, logicalPlan: LogicalPlan) = {
+    this(sqlContext, {
+      val qe = sqlContext.executePlan(logicalPlan)
+      qe.analyzed  // This should force analysis and throw errors if there are any
+      qe
+    })
+  }
+
+  @transient protected[sql] override val logicalPlan: LogicalPlan = queryExecution.logical match {
+    // For various commands (like DDL) and queries with side effects, we force query optimization to
+    // happen right away to let these side effects take place eagerly.
+    case _: Command | _: InsertIntoTable | _: CreateTableAsSelect[_] |_: WriteToFile =>
+      LogicalRDD(queryExecution.analyzed.output, queryExecution.toRdd)(sqlContext)
+    case _ =>
+      queryExecution.logical
+  }
+
+  /**
+   * An implicit conversion function internal to this class for us to avoid doing
+   * "new DataFrameImpl(...)" everywhere.
+   */
+  @inline private implicit def logicalPlanToDataFrame(logicalPlan: LogicalPlan): DataFrame = {
+    new DataFrameImpl(sqlContext, logicalPlan)
+  }
+
+  protected[sql] def resolve(colName: String): NamedExpression = {
+    queryExecution.analyzed.resolve(colName, sqlContext.analyzer.resolver).getOrElse {
+      throw new RuntimeException(
+        s"""Cannot resolve column name "$colName" among (${schema.fieldNames.mkString(", ")})""")
+    }
+  }
+
+  protected[sql] def numericColumns: Seq[Expression] = {
+    schema.fields.filter(_.dataType.isInstanceOf[NumericType]).map { n =>
+      queryExecution.analyzed.resolve(n.name, sqlContext.analyzer.resolver).get
+    }
+  }
+
+  override def toDataFrame(colName: String, colNames: String*): DataFrame = {
+    val newNames = colName +: colNames
+    require(schema.size == newNames.size,
+      "The number of columns doesn't match.\n" +
+        "Old column names: " + schema.fields.map(_.name).mkString(", ") + "\n" +
+        "New column names: " + newNames.mkString(", "))
+
+    val newCols = schema.fieldNames.zip(newNames).map { case (oldName, newName) =>
+      apply(oldName).as(newName)
+    }
+    select(newCols :_*)
+  }
+
+  override def schema: StructType = queryExecution.analyzed.schema
+
+  override def dtypes: Array[(String, String)] = schema.fields.map { field =>
+    (field.name, field.dataType.toString)
+  }
+
+  override def columns: Array[String] = schema.fields.map(_.name)
+
+  override def printSchema(): Unit = println(schema.treeString)
+
+  override def join(right: DataFrame): DataFrame = {
+    Join(logicalPlan, right.logicalPlan, joinType = Inner, None)
+  }
+
+  override def join(right: DataFrame, joinExprs: Column): DataFrame = {
+    Join(logicalPlan, right.logicalPlan, Inner, Some(joinExprs.expr))
+  }
+
+  override def join(right: DataFrame, joinExprs: Column, joinType: String): DataFrame = {
+    Join(logicalPlan, right.logicalPlan, JoinType(joinType), Some(joinExprs.expr))
+  }
+
+  override def sort(sortCol: String, sortCols: String*): DataFrame = {
+    sort((sortCol +: sortCols).map(apply) :_*)
+  }
+
+  override def sort(sortExprs: Column*): DataFrame = {
+    val sortOrder: Seq[SortOrder] = sortExprs.map { col =>
+      col.expr match {
+        case expr: SortOrder =>
+          expr
+        case expr: Expression =>
+          SortOrder(expr, Ascending)
+      }
+    }
+    Sort(sortOrder, global = true, logicalPlan)
+  }
+
+  override def orderBy(sortCol: String, sortCols: String*): DataFrame = {
+    sort(sortCol, sortCols :_*)
+  }
+
+  override def orderBy(sortExprs: Column*): DataFrame = {
+    sort(sortExprs :_*)
+  }
+
+  override def col(colName: String): Column = colName match {
+    case "*" =>
+      Column(ResolvedStar(schema.fieldNames.map(resolve)))
+    case _ =>
+      val expr = resolve(colName)
+      Column(sqlContext, Project(Seq(expr), logicalPlan), expr)
+  }
+
+  override def apply(projection: Product): DataFrame = {
+    require(projection.productArity >= 1)
+    select(projection.productIterator.map {
+      case c: Column => c
+      case o: Any => Column(Literal(o))
+    }.toSeq :_*)
+  }
+
+  override def as(name: String): DataFrame = Subquery(name, logicalPlan)
+
+  override def select(cols: Column*): DataFrame = {
+    val exprs = cols.zipWithIndex.map {
+      case (Column(expr: NamedExpression), _) =>
+        expr
+      case (Column(expr: Expression), _) =>
+        Alias(expr, expr.toString)()
+    }
+    Project(exprs.toSeq, logicalPlan)
+  }
+
+  override def select(col: String, cols: String*): DataFrame = {
+    select((col +: cols).map(Column(_)) :_*)
+  }
+
+  override def selectExpr(exprs: String*): DataFrame = {
+    select(exprs.map { expr =>
+      Column(new SqlParser().parseExpression(expr))
+    } :_*)
+  }
+
+  override def filter(condition: Column): DataFrame = {
+    Filter(condition.expr, logicalPlan)
+  }
+
+  override def filter(conditionExpr: String): DataFrame = {
+    filter(Column(new SqlParser().parseExpression(conditionExpr)))
+  }
+
+  override def where(condition: Column): DataFrame = {
+    filter(condition)
+  }
+
+  override def apply(condition: Column): DataFrame = {
+    filter(condition)
+  }
+
+  override def groupBy(cols: Column*): GroupedData = {
+    new GroupedData(this, cols.map(_.expr))
+  }
+
+  override def groupBy(col1: String, cols: String*): GroupedData = {
+    val colNames: Seq[String] = col1 +: cols
+    new GroupedData(this, colNames.map(colName => resolve(colName)))
+  }
+
+  override def limit(n: Int): DataFrame = {
+    Limit(Literal(n), logicalPlan)
+  }
+
+  override def unionAll(other: DataFrame): DataFrame = {
+    Union(logicalPlan, other.logicalPlan)
+  }
+
+  override def intersect(other: DataFrame): DataFrame = {
+    Intersect(logicalPlan, other.logicalPlan)
+  }
+
+  override def except(other: DataFrame): DataFrame = {
+    Except(logicalPlan, other.logicalPlan)
+  }
+
+  override def sample(withReplacement: Boolean, fraction: Double, seed: Long): DataFrame = {
+    Sample(fraction, withReplacement, seed, logicalPlan)
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  override def addColumn(colName: String, col: Column): DataFrame = {
+    select(Column("*"), col.as(colName))
+  }
+
+  override def head(n: Int): Array[Row] = limit(n).collect()
+
+  override def head(): Row = head(1).head
+
+  override def first(): Row = head()
+
+  override def map[R: ClassTag](f: Row => R): RDD[R] = rdd.map(f)
+
+  override def flatMap[R: ClassTag](f: Row => TraversableOnce[R]): RDD[R] = rdd.flatMap(f)
+
+  override def mapPartitions[R: ClassTag](f: Iterator[Row] => Iterator[R]): RDD[R] = {
+    rdd.mapPartitions(f)
+  }
+
+  override def foreach(f: Row => Unit): Unit = rdd.foreach(f)
+
+  override def foreachPartition(f: Iterator[Row] => Unit): Unit = rdd.foreachPartition(f)
+
+  override def take(n: Int): Array[Row] = head(n)
+
+  override def collect(): Array[Row] = queryExecution.executedPlan.executeCollect()
+
+  override def collectAsList(): java.util.List[Row] = java.util.Arrays.asList(rdd.collect() :_*)
+
+  override def count(): Long = groupBy().count().rdd.collect().head.getLong(0)
+
+  override def repartition(numPartitions: Int): DataFrame = {
+    sqlContext.applySchema(rdd.repartition(numPartitions), schema)
+  }
+
+  override def persist(): this.type = {
+    sqlContext.cacheManager.cacheQuery(this)
+    this
+  }
+
+  override def persist(newLevel: StorageLevel): this.type = {
+    sqlContext.cacheManager.cacheQuery(this, None, newLevel)
+    this
+  }
+
+  override def unpersist(blocking: Boolean): this.type = {
+    sqlContext.cacheManager.tryUncacheQuery(this, blocking)
+    this
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+  // I/O
+  /////////////////////////////////////////////////////////////////////////////
+
+  override def rdd: RDD[Row] = {
+    val schema = this.schema
+    queryExecution.executedPlan.execute().map(ScalaReflection.convertRowToScala(_, schema))
+  }
+
+  override def registerTempTable(tableName: String): Unit = {
+    sqlContext.registerRDDAsTable(this, tableName)
+  }
+
+  override def saveAsParquetFile(path: String): Unit = {
+    sqlContext.executePlan(WriteToFile(path, logicalPlan)).toRdd
+  }
+
+  override def saveAsTable(tableName: String): Unit = {
+    val dataSourceName = sqlContext.conf.defaultDataSourceName
+    val cmd =
+      CreateTableUsingAsLogicalPlan(
+        tableName,
+        dataSourceName,
+        temporary = false,
+        Map.empty,
+        allowExisting = false,
+        logicalPlan)
+
+    sqlContext.executePlan(cmd).toRdd
+  }
+
+  override def saveAsTable(
+      tableName: String,
+      dataSourceName: String,
+      option: (String, String),
+      options: (String, String)*): Unit = {
+    val cmd =
+      CreateTableUsingAsLogicalPlan(
+        tableName,
+        dataSourceName,
+        temporary = false,
+        (option +: options).toMap,
+        allowExisting = false,
+        logicalPlan)
+
+    sqlContext.executePlan(cmd).toRdd
+  }
+
+  override def saveAsTable(
+      tableName: String,
+      dataSourceName: String,
+      options: java.util.Map[String, String]): Unit = {
+    val opts = options.toSeq
+    saveAsTable(tableName, dataSourceName, opts.head, opts.tail:_*)
+  }
+
+  override def save(path: String): Unit = {
+    val dataSourceName = sqlContext.conf.defaultDataSourceName
+    save(dataSourceName, "path" -> path)
+  }
+
+  override def save(
+      dataSourceName: String,
+      option: (String, String),
+      options: (String, String)*): Unit = {
+    ResolvedDataSource(sqlContext, dataSourceName, (option +: options).toMap, this)
+  }
+
+  override def save(
+      dataSourceName: String,
+      options: java.util.Map[String, String]): Unit = {
+    val opts = options.toSeq
+    save(dataSourceName, opts.head, opts.tail:_*)
+  }
+
+  override def insertInto(tableName: String, overwrite: Boolean): Unit = {
+    sqlContext.executePlan(InsertIntoTable(UnresolvedRelation(Seq(tableName)),
+      Map.empty, logicalPlan, overwrite)).toRdd
+  }
+
+  override def toJSON: RDD[String] = {
+    val rowSchema = this.schema
+    this.mapPartitions { iter =>
+      val jsonFactory = new JsonFactory()
+      iter.map(JsonRDD.rowToJSON(rowSchema, jsonFactory))
+    }
+  }
+
+  ////////////////////////////////////////////////////////////////////////////
+  // for Python API
+  ////////////////////////////////////////////////////////////////////////////
+  protected[sql] override def javaToPython: JavaRDD[Array[Byte]] = {
+    val fieldTypes = schema.fields.map(_.dataType)
+    val jrdd = rdd.map(EvaluatePython.rowToArray(_, fieldTypes)).toJavaRDD()
+    SerDeUtil.javaToPython(jrdd)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala
new file mode 100644
index 0000000000000..6bf21dd1bc79b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala
@@ -0,0 +1,419 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import scala.language.implicitConversions
+import scala.reflect.runtime.universe.{TypeTag, typeTag}
+
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.types._
+
+
+/**
+ * Domain specific functions available for [[DataFrame]].
+ */
+object Dsl {
+
+  /** An implicit conversion that turns a Scala `Symbol` into a [[Column]]. */
+  implicit def symbolToColumn(s: Symbol): ColumnName = new ColumnName(s.name)
+
+  /** Converts $"col name" into an [[Column]]. */
+  implicit class StringToColumn(val sc: StringContext) extends AnyVal {
+    def $(args: Any*): ColumnName = {
+      new ColumnName(sc.s(args :_*))
+    }
+  }
+
+  private[this] implicit def toColumn(expr: Expression): Column = Column(expr)
+
+  /**
+   * Returns a [[Column]] based on the given column name.
+   */
+  def col(colName: String): Column = Column(colName)
+
+  /**
+   * Returns a [[Column]] based on the given column name. Alias of [[col]].
+   */
+  def column(colName: String): Column = Column(colName)
+
+  /**
+   * Creates a [[Column]] of literal value.
+   *
+   * The passed in object is returned directly if it is already a [[Column]].
+   * If the object is a Scala Symbol, it is converted into a [[Column]] also.
+   * Otherwise, a new [[Column]] is created to represent the literal value.
+   */
+  def lit(literal: Any): Column = {
+    literal match {
+      case c: Column => return c
+      case s: Symbol => return new ColumnName(literal.asInstanceOf[Symbol].name)
+      case _ =>  // continue
+    }
+
+    val literalExpr = literal match {
+      case v: Boolean => Literal(v, BooleanType)
+      case v: Byte => Literal(v, ByteType)
+      case v: Short => Literal(v, ShortType)
+      case v: Int => Literal(v, IntegerType)
+      case v: Long => Literal(v, LongType)
+      case v: Float => Literal(v, FloatType)
+      case v: Double => Literal(v, DoubleType)
+      case v: String => Literal(v, StringType)
+      case v: BigDecimal => Literal(Decimal(v), DecimalType.Unlimited)
+      case v: java.math.BigDecimal => Literal(Decimal(v), DecimalType.Unlimited)
+      case v: Decimal => Literal(v, DecimalType.Unlimited)
+      case v: java.sql.Timestamp => Literal(v, TimestampType)
+      case v: java.sql.Date => Literal(v, DateType)
+      case v: Array[Byte] => Literal(v, BinaryType)
+      case null => Literal(null, NullType)
+      case _ =>
+        throw new RuntimeException("Unsupported literal type " + literal.getClass + " " + literal)
+    }
+    Column(literalExpr)
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /** Aggregate function: returns the sum of all values in the expression. */
+  def sum(e: Column): Column = Sum(e.expr)
+
+  /** Aggregate function: returns the sum of all values in the given column. */
+  def sum(columnName: String): Column = sum(Column(columnName))
+
+  /** Aggregate function: returns the sum of distinct values in the expression. */
+  def sumDistinct(e: Column): Column = SumDistinct(e.expr)
+
+  /** Aggregate function: returns the sum of distinct values in the expression. */
+  def sumDistinct(columnName: String): Column = sumDistinct(Column(columnName))
+
+  /** Aggregate function: returns the number of items in a group. */
+  def count(e: Column): Column = Count(e.expr)
+
+  /** Aggregate function: returns the number of items in a group. */
+  def count(columnName: String): Column = count(Column(columnName))
+
+  /** Aggregate function: returns the number of distinct items in a group. */
+  @scala.annotation.varargs
+  def countDistinct(expr: Column, exprs: Column*): Column =
+    CountDistinct((expr +: exprs).map(_.expr))
+
+  /** Aggregate function: returns the number of distinct items in a group. */
+  @scala.annotation.varargs
+  def countDistinct(columnName: String, columnNames: String*): Column =
+    countDistinct(Column(columnName), columnNames.map(Column.apply) :_*)
+
+  /** Aggregate function: returns the approximate number of distinct items in a group. */
+  def approxCountDistinct(e: Column): Column = ApproxCountDistinct(e.expr)
+
+  /** Aggregate function: returns the approximate number of distinct items in a group. */
+  def approxCountDistinct(columnName: String): Column = approxCountDistinct(column(columnName))
+
+  /** Aggregate function: returns the approximate number of distinct items in a group. */
+  def approxCountDistinct(e: Column, rsd: Double): Column = ApproxCountDistinct(e.expr, rsd)
+
+  /** Aggregate function: returns the approximate number of distinct items in a group. */
+  def approxCountDistinct(columnName: String, rsd: Double): Column = {
+    approxCountDistinct(Column(columnName), rsd)
+  }
+
+  /** Aggregate function: returns the average of the values in a group. */
+  def avg(e: Column): Column = Average(e.expr)
+
+  /** Aggregate function: returns the average of the values in a group. */
+  def avg(columnName: String): Column = avg(Column(columnName))
+
+  /** Aggregate function: returns the first value in a group. */
+  def first(e: Column): Column = First(e.expr)
+
+  /** Aggregate function: returns the first value of a column in a group. */
+  def first(columnName: String): Column = first(Column(columnName))
+
+  /** Aggregate function: returns the last value in a group. */
+  def last(e: Column): Column = Last(e.expr)
+
+  /** Aggregate function: returns the last value of the column in a group. */
+  def last(columnName: String): Column = last(Column(columnName))
+
+  /** Aggregate function: returns the minimum value of the expression in a group. */
+  def min(e: Column): Column = Min(e.expr)
+
+  /** Aggregate function: returns the minimum value of the column in a group. */
+  def min(columnName: String): Column = min(Column(columnName))
+
+  /** Aggregate function: returns the maximum value of the expression in a group. */
+  def max(e: Column): Column = Max(e.expr)
+
+  /** Aggregate function: returns the maximum value of the column in a group. */
+  def max(columnName: String): Column = max(Column(columnName))
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Unary minus, i.e. negate the expression.
+   * {{{
+   *   // Select the amount column and negates all values.
+   *   // Scala:
+   *   df.select( -df("amount") )
+   *
+   *   // Java:
+   *   df.select( negate(df.col("amount")) );
+   * }}}
+   */
+  def negate(e: Column): Column = -e
+
+  /**
+   * Inversion of boolean expression, i.e. NOT.
+   * {{
+   *   // Scala: select rows that are not active (isActive === false)
+   *   df.filter( !df("isActive") )
+   *
+   *   // Java:
+   *   df.filter( not(df.col("isActive")) );
+   * }}
+   */
+  def not(e: Column): Column = !e
+
+  /** Converts a string expression to upper case. */
+  def upper(e: Column): Column = Upper(e.expr)
+
+  /** Converts a string exprsesion to lower case. */
+  def lower(e: Column): Column = Lower(e.expr)
+
+  /** Computes the square root of the specified float value. */
+  def sqrt(e: Column): Column = Sqrt(e.expr)
+
+  /** Computes the absolutle value. */
+  def abs(e: Column): Column = Abs(e.expr)
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  // scalastyle:off
+
+  /* Use the following code to generate:
+  (0 to 10).map { x =>
+    val types = (1 to x).foldRight("RT")((i, s) => {s"A$i, $s"})
+    val typeTags = (1 to x).map(i => s"A$i: TypeTag").foldLeft("RT: TypeTag")(_ + ", " + _)
+    println(s"""
+    /**
+     * Defines a user-defined function of ${x} arguments as user-defined function (UDF).
+     * The data types are automatically inferred based on the function's signature.
+     */
+    def udf[$typeTags](f: Function$x[$types]): UserDefinedFunction = {
+      UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
+    }""")
+  }
+
+  (0 to 10).map { x =>
+    val args = (1 to x).map(i => s"arg$i: Column").mkString(", ")
+    val fTypes = Seq.fill(x + 1)("_").mkString(", ")
+    val argsInUdf = (1 to x).map(i => s"arg$i.expr").mkString(", ")
+    println(s"""
+    /**
+     * Call a Scala function of ${x} arguments as user-defined function (UDF). This requires
+     * you to specify the return data type.
+     */
+    def callUDF(f: Function$x[$fTypes], returnType: DataType${if (args.length > 0) ", " + args else ""}): Column = {
+      ScalaUdf(f, returnType, Seq($argsInUdf))
+    }""")
+  }
+  }
+  */
+  /**
+   * Defines a user-defined function of 0 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the function's signature.
+   */
+  def udf[RT: TypeTag](f: Function0[RT]): UserDefinedFunction = {
+    UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
+  }
+
+  /**
+   * Defines a user-defined function of 1 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the function's signature.
+   */
+  def udf[RT: TypeTag, A1: TypeTag](f: Function1[A1, RT]): UserDefinedFunction = {
+    UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
+  }
+
+  /**
+   * Defines a user-defined function of 2 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the function's signature.
+   */
+  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag](f: Function2[A1, A2, RT]): UserDefinedFunction = {
+    UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
+  }
+
+  /**
+   * Defines a user-defined function of 3 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the function's signature.
+   */
+  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](f: Function3[A1, A2, A3, RT]): UserDefinedFunction = {
+    UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
+  }
+
+  /**
+   * Defines a user-defined function of 4 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the function's signature.
+   */
+  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](f: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = {
+    UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
+  }
+
+  /**
+   * Defines a user-defined function of 5 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the function's signature.
+   */
+  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](f: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = {
+    UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
+  }
+
+  /**
+   * Defines a user-defined function of 6 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the function's signature.
+   */
+  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](f: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = {
+    UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
+  }
+
+  /**
+   * Defines a user-defined function of 7 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the function's signature.
+   */
+  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](f: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = {
+    UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
+  }
+
+  /**
+   * Defines a user-defined function of 8 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the function's signature.
+   */
+  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](f: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = {
+    UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
+  }
+
+  /**
+   * Defines a user-defined function of 9 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the function's signature.
+   */
+  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](f: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = {
+    UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
+  }
+
+  /**
+   * Defines a user-defined function of 10 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the function's signature.
+   */
+  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](f: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = {
+    UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Call a Scala function of 0 arguments as user-defined function (UDF). This requires
+   * you to specify the return data type.
+   */
+  def callUDF(f: Function0[_], returnType: DataType): Column = {
+    ScalaUdf(f, returnType, Seq())
+  }
+
+  /**
+   * Call a Scala function of 1 arguments as user-defined function (UDF). This requires
+   * you to specify the return data type.
+   */
+  def callUDF(f: Function1[_, _], returnType: DataType, arg1: Column): Column = {
+    ScalaUdf(f, returnType, Seq(arg1.expr))
+  }
+
+  /**
+   * Call a Scala function of 2 arguments as user-defined function (UDF). This requires
+   * you to specify the return data type.
+   */
+  def callUDF(f: Function2[_, _, _], returnType: DataType, arg1: Column, arg2: Column): Column = {
+    ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr))
+  }
+
+  /**
+   * Call a Scala function of 3 arguments as user-defined function (UDF). This requires
+   * you to specify the return data type.
+   */
+  def callUDF(f: Function3[_, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column): Column = {
+    ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr))
+  }
+
+  /**
+   * Call a Scala function of 4 arguments as user-defined function (UDF). This requires
+   * you to specify the return data type.
+   */
+  def callUDF(f: Function4[_, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column): Column = {
+    ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr))
+  }
+
+  /**
+   * Call a Scala function of 5 arguments as user-defined function (UDF). This requires
+   * you to specify the return data type.
+   */
+  def callUDF(f: Function5[_, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column): Column = {
+    ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr))
+  }
+
+  /**
+   * Call a Scala function of 6 arguments as user-defined function (UDF). This requires
+   * you to specify the return data type.
+   */
+  def callUDF(f: Function6[_, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column): Column = {
+    ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr))
+  }
+
+  /**
+   * Call a Scala function of 7 arguments as user-defined function (UDF). This requires
+   * you to specify the return data type.
+   */
+  def callUDF(f: Function7[_, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column): Column = {
+    ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr))
+  }
+
+  /**
+   * Call a Scala function of 8 arguments as user-defined function (UDF). This requires
+   * you to specify the return data type.
+   */
+  def callUDF(f: Function8[_, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column): Column = {
+    ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr))
+  }
+
+  /**
+   * Call a Scala function of 9 arguments as user-defined function (UDF). This requires
+   * you to specify the return data type.
+   */
+  def callUDF(f: Function9[_, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column): Column = {
+    ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr))
+  }
+
+  /**
+   * Call a Scala function of 10 arguments as user-defined function (UDF). This requires
+   * you to specify the return data type.
+   */
+  def callUDF(f: Function10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column, arg10: Column): Column = {
+    ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr, arg10.expr))
+  }
+
+  // scalastyle:on
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
new file mode 100644
index 0000000000000..f0e6a8f332188
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.annotation.Experimental
+
+/**
+ * Holder for experimental methods for the bravest. We make NO guarantee about the stability
+ * regarding binary compatibility and source compatibility of methods here.
+ */
+@Experimental
+class ExperimentalMethods protected[sql](sqlContext: SQLContext) {
+
+  /**
+   * Allows extra strategies to be injected into the query planner at runtime.  Note this API
+   * should be consider experimental and is not intended to be stable across releases.
+   */
+  @Experimental
+  var extraStrategies: Seq[Strategy] = Nil
+
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
new file mode 100644
index 0000000000000..3c20676355c9d
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import scala.language.implicitConversions
+import scala.collection.JavaConversions._
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.{Literal => LiteralExpr}
+import org.apache.spark.sql.catalyst.plans.logical.Aggregate
+
+
+/**
+ * A set of methods for aggregations on a [[DataFrame]], created by [[DataFrame.groupBy]].
+ */
+class GroupedData protected[sql](df: DataFrameImpl, groupingExprs: Seq[Expression]) {
+
+  private[this] implicit def toDataFrame(aggExprs: Seq[NamedExpression]): DataFrame = {
+    val namedGroupingExprs = groupingExprs.map {
+      case expr: NamedExpression => expr
+      case expr: Expression => Alias(expr, expr.toString)()
+    }
+    DataFrame(
+      df.sqlContext, Aggregate(groupingExprs, namedGroupingExprs ++ aggExprs, df.logicalPlan))
+  }
+
+  private[this] def aggregateNumericColumns(f: Expression => Expression): Seq[NamedExpression] = {
+    df.numericColumns.map { c =>
+      val a = f(c)
+      Alias(a, a.toString)()
+    }
+  }
+
+  private[this] def strToExpr(expr: String): (Expression => Expression) = {
+    expr.toLowerCase match {
+      case "avg" | "average" | "mean" => Average
+      case "max" => Max
+      case "min" => Min
+      case "sum" => Sum
+      case "count" | "size" => Count
+    }
+  }
+
+  /**
+   * (Scala-specific) Compute aggregates by specifying a map from column name to
+   * aggregate methods. The resulting [[DataFrame]] will also contain the grouping columns.
+   *
+   * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
+   * {{{
+   *   // Selects the age of the oldest employee and the aggregate expense for each department
+   *   df.groupBy("department").agg(
+   *     "age" -> "max",
+   *     "expense" -> "sum"
+   *   )
+   * }}}
+   */
+  def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
+    agg((aggExpr +: aggExprs).toMap)
+  }
+
+  /**
+   * (Scala-specific) Compute aggregates by specifying a map from column name to
+   * aggregate methods. The resulting [[DataFrame]] will also contain the grouping columns.
+   *
+   * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
+   * {{{
+   *   // Selects the age of the oldest employee and the aggregate expense for each department
+   *   df.groupBy("department").agg(Map(
+   *     "age" -> "max",
+   *     "expense" -> "sum"
+   *   ))
+   * }}}
+   */
+  def agg(exprs: Map[String, String]): DataFrame = {
+    exprs.map { case (colName, expr) =>
+      val a = strToExpr(expr)(df(colName).expr)
+      Alias(a, a.toString)()
+    }.toSeq
+  }
+
+  /**
+   * (Java-specific) Compute aggregates by specifying a map from column name to
+   * aggregate methods. The resulting [[DataFrame]] will also contain the grouping columns.
+   *
+   * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
+   * {{{
+   *   // Selects the age of the oldest employee and the aggregate expense for each department
+   *   import com.google.common.collect.ImmutableMap;
+   *   df.groupBy("department").agg(ImmutableMap.<String, String>builder()
+   *     .put("age", "max")
+   *     .put("expense", "sum")
+   *     .build());
+   * }}}
+   */
+  def agg(exprs: java.util.Map[String, String]): DataFrame = {
+    agg(exprs.toMap)
+  }
+
+  /**
+   * Compute aggregates by specifying a series of aggregate columns. Unlike other methods in this
+   * class, the resulting [[DataFrame]] won't automatically include the grouping columns.
+   *
+   * The available aggregate methods are defined in [[org.apache.spark.sql.Dsl]].
+   *
+   * {{{
+   *   // Selects the age of the oldest employee and the aggregate expense for each department
+   *
+   *   // Scala:
+   *   import org.apache.spark.sql.dsl._
+   *   df.groupBy("department").agg($"department", max($"age"), sum($"expense"))
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.Dsl.*;
+   *   df.groupBy("department").agg(col("department"), max(col("age")), sum(col("expense")));
+   * }}}
+   */
+  @scala.annotation.varargs
+  def agg(expr: Column, exprs: Column*): DataFrame = {
+    val aggExprs = (expr +: exprs).map(_.expr).map {
+      case expr: NamedExpression => expr
+      case expr: Expression => Alias(expr, expr.toString)()
+    }
+    DataFrame(df.sqlContext, Aggregate(groupingExprs, aggExprs, df.logicalPlan))
+  }
+
+  /**
+   * Count the number of rows for each group.
+   * The resulting [[DataFrame]] will also contain the grouping columns.
+   */
+  def count(): DataFrame = Seq(Alias(Count(LiteralExpr(1)), "count")())
+
+  /**
+   * Compute the average value for each numeric columns for each group. This is an alias for `avg`.
+   * The resulting [[DataFrame]] will also contain the grouping columns.
+   */
+  def mean(): DataFrame = aggregateNumericColumns(Average)
+
+  /**
+   * Compute the max value for each numeric columns for each group.
+   * The resulting [[DataFrame]] will also contain the grouping columns.
+   */
+  def max(): DataFrame = aggregateNumericColumns(Max)
+
+  /**
+   * Compute the mean value for each numeric columns for each group.
+   * The resulting [[DataFrame]] will also contain the grouping columns.
+   */
+  def avg(): DataFrame = aggregateNumericColumns(Average)
+
+  /**
+   * Compute the min value for each numeric column for each group.
+   * The resulting [[DataFrame]] will also contain the grouping columns.
+   */
+  def min(): DataFrame = aggregateNumericColumns(Min)
+
+  /**
+   * Compute the sum for each numeric columns for each group.
+   * The resulting [[DataFrame]] will also contain the grouping columns.
+   */
+  def sum(): DataFrame = aggregateNumericColumns(Sum)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/IncomputableColumn.scala b/sql/core/src/main/scala/org/apache/spark/sql/IncomputableColumn.scala
new file mode 100644
index 0000000000000..fedd7f06ef50a
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/IncomputableColumn.scala
@@ -0,0 +1,178 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedStar}
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.sql.types.StructType
+
+
+private[sql] class IncomputableColumn(protected[sql] val expr: Expression) extends Column {
+
+  def this(name: String) = this(name match {
+    case "*" => UnresolvedStar(None)
+    case _ if name.endsWith(".*") => UnresolvedStar(Some(name.substring(0, name.length - 2)))
+    case _ => UnresolvedAttribute(name)
+  })
+
+  private def err[T](): T = {
+    throw new UnsupportedOperationException("Cannot run this method on an UncomputableColumn")
+  }
+
+  override def isComputable: Boolean = false
+
+  override val sqlContext: SQLContext = null
+
+  override def queryExecution = err()
+
+  protected[sql] override def logicalPlan: LogicalPlan = err()
+
+  override def toDataFrame(colName: String, colNames: String*): DataFrame = err()
+
+  override def schema: StructType = err()
+
+  override def dtypes: Array[(String, String)] = err()
+
+  override def columns: Array[String] = err()
+
+  override def printSchema(): Unit = err()
+
+  override def join(right: DataFrame): DataFrame = err()
+
+  override def join(right: DataFrame, joinExprs: Column): DataFrame = err()
+
+  override def join(right: DataFrame, joinExprs: Column, joinType: String): DataFrame = err()
+
+  override def sort(sortCol: String, sortCols: String*): DataFrame = err()
+
+  override def sort(sortExprs: Column*): DataFrame = err()
+
+  override def orderBy(sortCol: String, sortCols: String*): DataFrame = err()
+
+  override def orderBy(sortExprs: Column*): DataFrame = err()
+
+  override def col(colName: String): Column = err()
+
+  override def apply(projection: Product): DataFrame = err()
+
+  override def select(cols: Column*): DataFrame = err()
+
+  override def select(col: String, cols: String*): DataFrame = err()
+
+  override def selectExpr(exprs: String*): DataFrame = err()
+
+  override def filter(condition: Column): DataFrame = err()
+
+  override def filter(conditionExpr: String): DataFrame = err()
+
+  override def where(condition: Column): DataFrame = err()
+
+  override def apply(condition: Column): DataFrame = err()
+
+  override def groupBy(cols: Column*): GroupedData = err()
+
+  override def groupBy(col1: String, cols: String*): GroupedData = err()
+
+  override def limit(n: Int): DataFrame = err()
+
+  override def unionAll(other: DataFrame): DataFrame = err()
+
+  override def intersect(other: DataFrame): DataFrame = err()
+
+  override def except(other: DataFrame): DataFrame = err()
+
+  override def sample(withReplacement: Boolean, fraction: Double, seed: Long): DataFrame = err()
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  override def addColumn(colName: String, col: Column): DataFrame = err()
+
+  override def head(n: Int): Array[Row] = err()
+
+  override def head(): Row = err()
+
+  override def first(): Row = err()
+
+  override def map[R: ClassTag](f: Row => R): RDD[R] = err()
+
+  override def flatMap[R: ClassTag](f: Row => TraversableOnce[R]): RDD[R] = err()
+
+  override def mapPartitions[R: ClassTag](f: Iterator[Row] => Iterator[R]): RDD[R] = err()
+
+  override def foreach(f: Row => Unit): Unit = err()
+
+  override def foreachPartition(f: Iterator[Row] => Unit): Unit = err()
+
+  override def take(n: Int): Array[Row] = err()
+
+  override def collect(): Array[Row] = err()
+
+  override def collectAsList(): java.util.List[Row] = err()
+
+  override def count(): Long = err()
+
+  override def repartition(numPartitions: Int): DataFrame = err()
+
+  override def persist(): this.type = err()
+
+  override def persist(newLevel: StorageLevel): this.type = err()
+
+  override def unpersist(blocking: Boolean): this.type = err()
+
+  override def rdd: RDD[Row] = err()
+
+  override def registerTempTable(tableName: String): Unit = err()
+
+  override def saveAsParquetFile(path: String): Unit = err()
+
+  override def saveAsTable(tableName: String): Unit = err()
+
+  override def saveAsTable(
+      tableName: String,
+      dataSourceName: String,
+      option: (String, String),
+      options: (String, String)*): Unit = err()
+
+  override def saveAsTable(
+      tableName: String,
+      dataSourceName: String,
+      options: java.util.Map[String, String]): Unit = err()
+
+  override def save(path: String): Unit = err()
+
+  override def save(
+      dataSourceName: String,
+      option: (String, String),
+      options: (String, String)*): Unit = err()
+
+  override def save(
+      dataSourceName: String,
+      options: java.util.Map[String, String]): Unit = err()
+
+  override def insertInto(tableName: String, overwrite: Boolean): Unit = err()
+
+  override def toJSON: RDD[String] = err()
+
+  protected[sql] override def javaToPython: JavaRDD[Array[Byte]] = err()
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RDDApi.scala b/sql/core/src/main/scala/org/apache/spark/sql/RDDApi.scala
new file mode 100644
index 0000000000000..38e6382f171d5
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RDDApi.scala
@@ -0,0 +1,63 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+
+
+/**
+ * An internal interface defining the RDD-like methods for [[DataFrame]].
+ * Please use [[DataFrame]] directly, and do NOT use this.
+ */
+private[sql] trait RDDApi[T] {
+
+  def cache(): this.type = persist()
+
+  def persist(): this.type
+
+  def persist(newLevel: StorageLevel): this.type
+
+  def unpersist(): this.type = unpersist(blocking = false)
+
+  def unpersist(blocking: Boolean): this.type
+
+  def map[R: ClassTag](f: T => R): RDD[R]
+
+  def flatMap[R: ClassTag](f: T => TraversableOnce[R]): RDD[R]
+
+  def mapPartitions[R: ClassTag](f: Iterator[T] => Iterator[R]): RDD[R]
+
+  def foreach(f: T => Unit): Unit
+
+  def foreachPartition(f: Iterator[T] => Unit): Unit
+
+  def take(n: Int): Array[T]
+
+  def collect(): Array[T]
+
+  def collectAsList(): java.util.List[T]
+
+  def count(): Long
+
+  def first(): T
+
+  def repartition(numPartitions: Int): DataFrame
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index 07e6e2eccddf4..7fe17944a734e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -22,7 +22,6 @@ import scala.collection.JavaConversions._
 
 import java.util.Properties
 
-
 private[spark] object SQLConf {
   val COMPRESS_CACHED = "spark.sql.inMemoryColumnarStorage.compressed"
   val COLUMN_BATCH_SIZE = "spark.sql.inMemoryColumnarStorage.batchSize"
@@ -32,29 +31,41 @@ private[spark] object SQLConf {
   val SHUFFLE_PARTITIONS = "spark.sql.shuffle.partitions"
   val CODEGEN_ENABLED = "spark.sql.codegen"
   val DIALECT = "spark.sql.dialect"
+
   val PARQUET_BINARY_AS_STRING = "spark.sql.parquet.binaryAsString"
+  val PARQUET_INT96_AS_TIMESTAMP = "spark.sql.parquet.int96AsTimestamp"
   val PARQUET_CACHE_METADATA = "spark.sql.parquet.cacheMetadata"
   val PARQUET_COMPRESSION = "spark.sql.parquet.compression.codec"
+  val PARQUET_FILTER_PUSHDOWN_ENABLED = "spark.sql.parquet.filterPushdown"
+
   val COLUMN_NAME_OF_CORRUPT_RECORD = "spark.sql.columnNameOfCorruptRecord"
+  val BROADCAST_TIMEOUT = "spark.sql.broadcastTimeout"
+
+  // Options that control which operators can be chosen by the query planner.  These should be
+  // considered hints and may be ignored by future versions of Spark SQL.
+  val EXTERNAL_SORT = "spark.sql.planner.externalSort"
 
   // This is only used for the thriftserver
   val THRIFTSERVER_POOL = "spark.sql.thriftserver.scheduler.pool"
 
+  // This is used to set the default data source
+  val DEFAULT_DATA_SOURCE_NAME = "spark.sql.default.datasource"
+
   object Deprecated {
     val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
   }
 }
 
 /**
- * A trait that enables the setting and getting of mutable config parameters/hints.
+ * A class that enables the setting and getting of mutable config parameters/hints.
  *
  * In the presence of a SQLContext, these can be set and queried by passing SET commands
- * into Spark SQL's query functions (i.e. sql()). Otherwise, users of this trait can
- * modify the hints by programmatically calling the setters and getters of this trait.
+ * into Spark SQL's query functions (i.e. sql()). Otherwise, users of this class can
+ * modify the hints by programmatically calling the setters and getters of this class.
  *
  * SQLConf is thread-safe (internally synchronized, so safe to be used in multiple threads).
  */
-private[sql] trait SQLConf {
+private[sql] class SQLConf extends Serializable {
   import SQLConf._
 
   /** Only low degree of contention is expected for conf, thus NOT using ConcurrentHashMap. */
@@ -79,17 +90,24 @@ private[sql] trait SQLConf {
   private[spark] def dialect: String = getConf(DIALECT, "sql")
 
   /** When true tables cached using the in-memory columnar caching will be compressed. */
-  private[spark] def useCompression: Boolean = getConf(COMPRESS_CACHED, "false").toBoolean
+  private[spark] def useCompression: Boolean = getConf(COMPRESS_CACHED, "true").toBoolean
 
   /** The compression codec for writing to a Parquetfile */
-  private[spark] def parquetCompressionCodec: String = getConf(PARQUET_COMPRESSION, "snappy")
+  private[spark] def parquetCompressionCodec: String = getConf(PARQUET_COMPRESSION, "gzip")
 
   /** The number of rows that will be  */
-  private[spark] def columnBatchSize: Int = getConf(COLUMN_BATCH_SIZE, "1000").toInt
+  private[spark] def columnBatchSize: Int = getConf(COLUMN_BATCH_SIZE, "10000").toInt
 
   /** Number of partitions to use for shuffle operators. */
   private[spark] def numShufflePartitions: Int = getConf(SHUFFLE_PARTITIONS, "200").toInt
 
+  /** When true predicates will be passed to the parquet record reader when possible. */
+  private[spark] def parquetFilterPushDown =
+    getConf(PARQUET_FILTER_PUSHDOWN_ENABLED, "false").toBoolean
+
+  /** When true the planner will use the external sort, which may spill to disk. */
+  private[spark] def externalSortEnabled: Boolean = getConf(EXTERNAL_SORT, "false").toBoolean
+
   /**
    * When set to true, Spark SQL will use the Scala compiler at runtime to generate custom bytecode
    * that evaluates expressions found in queries.  In general this custom code runs much faster
@@ -106,10 +124,10 @@ private[sql] trait SQLConf {
    * a broadcast value during the physical executions of join operations.  Setting this to -1
    * effectively disables auto conversion.
    *
-   * Hive setting: hive.auto.convert.join.noconditionaltask.size, whose default value is also 10000.
+   * Hive setting: hive.auto.convert.join.noconditionaltask.size, whose default value is 10000.
    */
   private[spark] def autoBroadcastJoinThreshold: Int =
-    getConf(AUTO_BROADCASTJOIN_THRESHOLD, "10000").toInt
+    getConf(AUTO_BROADCASTJOIN_THRESHOLD, (10 * 1024 * 1024).toString).toInt
 
   /**
    * The default size in bytes to assign to a logical operator's estimation statistics.  By default,
@@ -126,6 +144,12 @@ private[sql] trait SQLConf {
   private[spark] def isParquetBinaryAsString: Boolean =
     getConf(PARQUET_BINARY_AS_STRING, "false").toBoolean
 
+  /**
+   * When set to true, we always treat INT96Values in Parquet files as timestamp.
+   */
+  private[spark] def isParquetINT96AsTimestamp: Boolean =
+    getConf(PARQUET_INT96_AS_TIMESTAMP, "true").toBoolean
+
   /**
    * When set to true, partition pruning for in-memory columnar tables is enabled.
    */
@@ -135,6 +159,15 @@ private[sql] trait SQLConf {
   private[spark] def columnNameOfCorruptRecord: String =
     getConf(COLUMN_NAME_OF_CORRUPT_RECORD, "_corrupt_record")
 
+  /**
+   * Timeout in seconds for the broadcast wait time in hash join
+   */
+  private[spark] def broadcastTimeout: Int =
+    getConf(BROADCAST_TIMEOUT, (5 * 60).toString).toInt
+
+  private[spark] def defaultDataSourceName: String =
+    getConf(DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.parquet")
+
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */
@@ -168,6 +201,10 @@ private[sql] trait SQLConf {
    */
   def getAllConfs: immutable.Map[String, String] = settings.synchronized { settings.toMap }
 
+  private[spark] def unsetConf(key: String) {
+    settings -= key
+  }
+
   private[spark] def clear() {
     settings.clear()
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 0e4a9ca60b00d..01620aa0acd49 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -17,30 +17,34 @@
 
 package org.apache.spark.sql
 
+import java.beans.Introspector
+import java.util.Properties
+
+import scala.collection.immutable
+import scala.collection.JavaConversions._
 import scala.language.implicitConversions
 import scala.reflect.runtime.universe.TypeTag
 
-import org.apache.hadoop.conf.Configuration
-
-import org.apache.spark.SparkContext
+import org.apache.spark.{SparkContext, Partition}
 import org.apache.spark.annotation.{AlphaComponent, DeveloperApi, Experimental}
+import org.apache.spark.api.java.{JavaSparkContext, JavaRDD}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.dsl.ExpressionConversions
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.optimizer.{Optimizer, DefaultOptimizer}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.optimizer.{DefaultOptimizer, Optimizer}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.catalyst.types.DataType
-import org.apache.spark.sql.columnar.InMemoryRelation
-import org.apache.spark.sql.execution.{SparkStrategies, _}
+import org.apache.spark.sql.execution._
 import org.apache.spark.sql.json._
-import org.apache.spark.sql.parquet.ParquetRelation
+import org.apache.spark.sql.jdbc.{JDBCPartition, JDBCPartitioningInfo, JDBCRelation}
+import org.apache.spark.sql.sources._
+import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
 
 /**
  * :: AlphaComponent ::
- * The entry point for running relational queries using Spark.  Allows the creation of [[SchemaRDD]]
+ * The entry point for running relational queries using Spark.  Allows the creation of [[DataFrame]]
  * objects and the execution of SQL queries.
  *
  * @groupname userf Spark SQL Functions
@@ -49,19 +53,41 @@ import org.apache.spark.sql.parquet.ParquetRelation
 @AlphaComponent
 class SQLContext(@transient val sparkContext: SparkContext)
   extends org.apache.spark.Logging
-  with SQLConf
-  with CacheManager
-  with ExpressionConversions
-  with UDFRegistration
   with Serializable {
 
   self =>
 
+  def this(sparkContext: JavaSparkContext) = this(sparkContext.sc)
+
+  // Note that this is a lazy val so we can override the default value in subclasses.
+  protected[sql] lazy val conf: SQLConf = new SQLConf
+
+  /** Set Spark SQL configuration properties. */
+  def setConf(props: Properties): Unit = conf.setConf(props)
+
+  /** Set the given Spark SQL configuration property. */
+  def setConf(key: String, value: String): Unit = conf.setConf(key, value)
+
+  /** Return the value of Spark SQL configuration property for the given key. */
+  def getConf(key: String): String = conf.getConf(key)
+
+  /**
+   * Return the value of Spark SQL configuration property for the given key. If the key is not set
+   * yet, return `defaultValue`.
+   */
+  def getConf(key: String, defaultValue: String): String = conf.getConf(key, defaultValue)
+
+  /**
+   * Return all the configuration properties that have been set (i.e. not the default).
+   * This creates a new copy of the config properties in the form of a Map.
+   */
+  def getAllConfs: immutable.Map[String, String] = conf.getAllConfs
+
   @transient
   protected[sql] lazy val catalog: Catalog = new SimpleCatalog(true)
 
   @transient
-  protected[sql] lazy val functionRegistry: FunctionRegistry = new SimpleFunctionRegistry
+  protected[sql] lazy val functionRegistry: FunctionRegistry = new SimpleFunctionRegistry(true)
 
   @transient
   protected[sql] lazy val analyzer: Analyzer =
@@ -70,45 +96,131 @@ class SQLContext(@transient val sparkContext: SparkContext)
   @transient
   protected[sql] lazy val optimizer: Optimizer = DefaultOptimizer
 
+  @transient
+  protected[sql] val ddlParser = new DDLParser
+
   @transient
   protected[sql] val sqlParser = {
     val fallback = new catalyst.SqlParser
-    new catalyst.SparkSQLParser(fallback(_))
+    new SparkSQLParser(fallback(_))
+  }
+
+  protected[sql] def parseSql(sql: String): LogicalPlan = {
+    ddlParser(sql, false).getOrElse(sqlParser(sql))
   }
 
-  protected[sql] def parseSql(sql: String): LogicalPlan = sqlParser(sql)
   protected[sql] def executeSql(sql: String): this.QueryExecution = executePlan(parseSql(sql))
-  protected[sql] def executePlan(plan: LogicalPlan): this.QueryExecution =
-    new this.QueryExecution { val logical = plan }
+
+  protected[sql] def executePlan(plan: LogicalPlan) = new this.QueryExecution(plan)
 
   sparkContext.getConf.getAll.foreach {
     case (key, value) if key.startsWith("spark.sql") => setConf(key, value)
     case _ =>
   }
 
+  protected[sql] val cacheManager = new CacheManager(this)
+
   /**
-   * :: DeveloperApi ::
-   * Allows catalyst LogicalPlans to be executed as a SchemaRDD.  Note that the LogicalPlan
-   * interface is considered internal, and thus not guaranteed to be stable.  As a result, using
-   * them directly is not recommended.
+   * A collection of methods that are considered experimental, but can be used to hook into
+   * the query planner for advanced functionalities.
    */
-  @DeveloperApi
-  implicit def logicalPlanToSparkQuery(plan: LogicalPlan): SchemaRDD = new SchemaRDD(this, plan)
+  val experimental: ExperimentalMethods = new ExperimentalMethods(this)
+
+  /**
+   * A collection of methods for registering user-defined functions (UDF).
+   *
+   * The following example registers a Scala closure as UDF:
+   * {{{
+   *   sqlContext.udf.register("myUdf", (arg1: Int, arg2: String) => arg2 + arg1)
+   * }}}
+   *
+   * The following example registers a UDF in Java:
+   * {{{
+   *   sqlContext.udf().register("myUDF",
+   *       new UDF2<Integer, String, String>() {
+   *           @Override
+   *           public String call(Integer arg1, String arg2) {
+   *               return arg2 + arg1;
+   *           }
+   *      }, DataTypes.StringType);
+   * }}}
+   *
+   * Or, to use Java 8 lambda syntax:
+   * {{{
+   *   sqlContext.udf().register("myUDF",
+   *       (Integer arg1, String arg2) -> arg2 + arg1),
+   *       DataTypes.StringType);
+   * }}}
+   */
+  val udf: UDFRegistration = new UDFRegistration(this)
+
+  /** Returns true if the table is currently cached in-memory. */
+  def isCached(tableName: String): Boolean = cacheManager.isCached(tableName)
+
+  /** Caches the specified table in-memory. */
+  def cacheTable(tableName: String): Unit = cacheManager.cacheTable(tableName)
 
+  /** Removes the specified table from the in-memory cache. */
+  def uncacheTable(tableName: String): Unit = cacheManager.uncacheTable(tableName)
+
+  // scalastyle:off
+  // Disable style checker so "implicits" object can start with lowercase i
   /**
-   * Creates a SchemaRDD from an RDD of case classes.
+   * (Scala-specific)
+   * Implicit methods available in Scala for converting common Scala objects into [[DataFrame]]s.
+   */
+  object implicits {
+    // scalastyle:on
+    /**
+     * Creates a DataFrame from an RDD of case classes.
+     *
+     * @group userf
+     */
+    implicit def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = {
+      self.createDataFrame(rdd)
+    }
+
+    /**
+     * Creates a DataFrame from a local Seq of Product.
+     */
+    implicit def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame = {
+      self.createDataFrame(data)
+    }
+  }
+
+  /**
+   * Creates a DataFrame from an RDD of case classes.
    *
    * @group userf
    */
-  implicit def createSchemaRDD[A <: Product: TypeTag](rdd: RDD[A]) = {
+  def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = {
+    SparkPlan.currentContext.set(self)
+    val schema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType]
+    val attributeSeq = schema.toAttributes
+    val rowRDD = RDDConversions.productToRowRdd(rdd, schema)
+    DataFrame(self, LogicalRDD(attributeSeq, rowRDD)(self))
+  }
+
+  /**
+   * Creates a DataFrame from a local Seq of Product.
+   */
+  def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame = {
     SparkPlan.currentContext.set(self)
-    new SchemaRDD(this,
-      LogicalRDD(ScalaReflection.attributesFor[A], RDDConversions.productToRowRdd(rdd))(self))
+    val schema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType]
+    val attributeSeq = schema.toAttributes
+    DataFrame(self, LocalRelation.fromProduct(attributeSeq, data))
+  }
+
+  /**
+   * Convert a [[BaseRelation]] created for external data sources into a [[DataFrame]].
+   */
+  def baseRelationToDataFrame(baseRelation: BaseRelation): DataFrame = {
+    DataFrame(this, LogicalRelation(baseRelation))
   }
 
   /**
    * :: DeveloperApi ::
-   * Creates a [[SchemaRDD]] from an [[RDD]] containing [[Row]]s by applying a schema to this RDD.
+   * Creates a [[DataFrame]] from an [[RDD]] containing [[Row]]s by applying a schema to this RDD.
    * It is important to make sure that the structure of every [[Row]] of the provided RDD matches
    * the provided schema. Otherwise, there will be runtime exception.
    * Example:
@@ -124,51 +236,93 @@ class SQLContext(@transient val sparkContext: SparkContext)
    *  val people =
    *    sc.textFile("examples/src/main/resources/people.txt").map(
    *      _.split(",")).map(p => Row(p(0), p(1).trim.toInt))
-   *  val peopleSchemaRDD = sqlContext. applySchema(people, schema)
-   *  peopleSchemaRDD.printSchema
+   *  val dataFrame = sqlContext. applySchema(people, schema)
+   *  dataFrame.printSchema
    *  // root
    *  // |-- name: string (nullable = false)
    *  // |-- age: integer (nullable = true)
    *
-   *    peopleSchemaRDD.registerTempTable("people")
+   *  dataFrame.registerTempTable("people")
    *  sqlContext.sql("select name from people").collect.foreach(println)
    * }}}
    *
    * @group userf
    */
   @DeveloperApi
-  def applySchema(rowRDD: RDD[Row], schema: StructType): SchemaRDD = {
-    // TODO: use MutableProjection when rowRDD is another SchemaRDD and the applied
+  def applySchema(rowRDD: RDD[Row], schema: StructType): DataFrame = {
+    // TODO: use MutableProjection when rowRDD is another DataFrame and the applied
     // schema differs from the existing schema on any field data type.
     val logicalPlan = LogicalRDD(schema.toAttributes, rowRDD)(self)
-    new SchemaRDD(this, logicalPlan)
+    DataFrame(this, logicalPlan)
+  }
+
+  @DeveloperApi
+  def applySchema(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
+    applySchema(rowRDD.rdd, schema);
+  }
+
+  /**
+   * Applies a schema to an RDD of Java Beans.
+   *
+   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
+   *          SELECT * queries will return the columns in an undefined order.
+   */
+  def applySchema(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
+    val attributeSeq = getSchema(beanClass)
+    val className = beanClass.getName
+    val rowRdd = rdd.mapPartitions { iter =>
+      // BeanInfo is not serializable so we must rediscover it remotely for each partition.
+      val localBeanInfo = Introspector.getBeanInfo(
+        Class.forName(className, true, Utils.getContextOrSparkClassLoader))
+      val extractors =
+        localBeanInfo.getPropertyDescriptors.filterNot(_.getName == "class").map(_.getReadMethod)
+
+      iter.map { row =>
+        new GenericRow(
+          extractors.zip(attributeSeq).map { case (e, attr) =>
+            DataTypeConversions.convertJavaToCatalyst(e.invoke(row), attr.dataType)
+          }.toArray[Any]
+        ) : Row
+      }
+    }
+    DataFrame(this, LogicalRDD(attributeSeq, rowRdd)(this))
+  }
+
+  /**
+   * Applies a schema to an RDD of Java Beans.
+   *
+   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
+   *          SELECT * queries will return the columns in an undefined order.
+   */
+  def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
+    applySchema(rdd.rdd, beanClass)
   }
 
   /**
-   * Loads a Parquet file, returning the result as a [[SchemaRDD]].
+   * Loads a Parquet file, returning the result as a [[DataFrame]].
    *
    * @group userf
    */
-  def parquetFile(path: String): SchemaRDD =
-    new SchemaRDD(this, parquet.ParquetRelation(path, Some(sparkContext.hadoopConfiguration), this))
+  def parquetFile(path: String): DataFrame =
+    DataFrame(this, parquet.ParquetRelation(path, Some(sparkContext.hadoopConfiguration), this))
 
   /**
-   * Loads a JSON file (one object per line), returning the result as a [[SchemaRDD]].
+   * Loads a JSON file (one object per line), returning the result as a [[DataFrame]].
    * It goes through the entire dataset once to determine the schema.
    *
    * @group userf
    */
-  def jsonFile(path: String): SchemaRDD = jsonFile(path, 1.0)
+  def jsonFile(path: String): DataFrame = jsonFile(path, 1.0)
 
   /**
    * :: Experimental ::
    * Loads a JSON file (one object per line) and applies the given schema,
-   * returning the result as a [[SchemaRDD]].
+   * returning the result as a [[DataFrame]].
    *
    * @group userf
    */
   @Experimental
-  def jsonFile(path: String, schema: StructType): SchemaRDD = {
+  def jsonFile(path: String, schema: StructType): DataFrame = {
     val json = sparkContext.textFile(path)
     jsonRDD(json, schema)
   }
@@ -177,30 +331,32 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * :: Experimental ::
    */
   @Experimental
-  def jsonFile(path: String, samplingRatio: Double): SchemaRDD = {
+  def jsonFile(path: String, samplingRatio: Double): DataFrame = {
     val json = sparkContext.textFile(path)
     jsonRDD(json, samplingRatio)
   }
 
   /**
    * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
-   * [[SchemaRDD]].
+   * [[DataFrame]].
    * It goes through the entire dataset once to determine the schema.
    *
    * @group userf
    */
-  def jsonRDD(json: RDD[String]): SchemaRDD = jsonRDD(json, 1.0)
+  def jsonRDD(json: RDD[String]): DataFrame = jsonRDD(json, 1.0)
+
+  def jsonRDD(json: JavaRDD[String]): DataFrame = jsonRDD(json.rdd, 1.0)
 
   /**
    * :: Experimental ::
    * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema,
-   * returning the result as a [[SchemaRDD]].
+   * returning the result as a [[DataFrame]].
    *
    * @group userf
    */
   @Experimental
-  def jsonRDD(json: RDD[String], schema: StructType): SchemaRDD = {
-    val columnNameOfCorruptJsonRecord = columnNameOfCorruptRecord
+  def jsonRDD(json: RDD[String], schema: StructType): DataFrame = {
+    val columnNameOfCorruptJsonRecord = conf.columnNameOfCorruptRecord
     val appliedSchema =
       Option(schema).getOrElse(
         JsonRDD.nullTypeToStringType(
@@ -209,12 +365,17 @@ class SQLContext(@transient val sparkContext: SparkContext)
     applySchema(rowRDD, appliedSchema)
   }
 
+  @Experimental
+  def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = {
+    jsonRDD(json.rdd, schema)
+  }
+
   /**
    * :: Experimental ::
    */
   @Experimental
-  def jsonRDD(json: RDD[String], samplingRatio: Double): SchemaRDD = {
-    val columnNameOfCorruptJsonRecord = columnNameOfCorruptRecord
+  def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = {
+    val columnNameOfCorruptJsonRecord = conf.columnNameOfCorruptRecord
     val appliedSchema =
       JsonRDD.nullTypeToStringType(
         JsonRDD.inferSchema(json, samplingRatio, columnNameOfCorruptJsonRecord))
@@ -222,39 +383,78 @@ class SQLContext(@transient val sparkContext: SparkContext)
     applySchema(rowRDD, appliedSchema)
   }
 
+  @Experimental
+  def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = {
+    jsonRDD(json.rdd, samplingRatio);
+  }
+
+  @Experimental
+  def load(path: String): DataFrame = {
+    val dataSourceName = conf.defaultDataSourceName
+    load(dataSourceName, ("path", path))
+  }
+
+  @Experimental
+  def load(
+      dataSourceName: String,
+      option: (String, String),
+      options: (String, String)*): DataFrame = {
+    val resolved = ResolvedDataSource(this, None, dataSourceName, (option +: options).toMap)
+    DataFrame(this, LogicalRelation(resolved.relation))
+  }
+
+  @Experimental
+  def load(
+      dataSourceName: String,
+      options: java.util.Map[String, String]): DataFrame = {
+    val opts = options.toSeq
+    load(dataSourceName, opts.head, opts.tail:_*)
+  }
+
   /**
    * :: Experimental ::
-   * Creates an empty parquet file with the schema of class `A`, which can be registered as a table.
-   * This registered table can be used as the target of future `insertInto` operations.
-   *
-   * {{{
-   *   val sqlContext = new SQLContext(...)
-   *   import sqlContext._
-   *
-   *   case class Person(name: String, age: Int)
-   *   createParquetFile[Person]("path/to/file.parquet").registerTempTable("people")
-   *   sql("INSERT INTO people SELECT 'michael', 29")
-   * }}}
-   *
-   * @tparam A A case class type that describes the desired schema of the parquet file to be
-   *           created.
-   * @param path The path where the directory containing parquet metadata should be created.
-   *             Data inserted into this table will also be stored at this location.
-   * @param allowExisting When false, an exception will be thrown if this directory already exists.
-   * @param conf A Hadoop configuration object that can be used to specify options to the parquet
-   *             output format.
-   *
-   * @group userf
+   * Construct an RDD representing the database table accessible via JDBC URL
+   * url named table.
+   */
+  @Experimental
+  def jdbcRDD(url: String, table: String): DataFrame = {
+    jdbcRDD(url, table, null.asInstanceOf[JDBCPartitioningInfo])
+  }
+
+  /**
+   * :: Experimental ::
+   * Construct an RDD representing the database table accessible via JDBC URL
+   * url named table.  The PartitioningInfo parameter
+   * gives the name of a column of integral type, a number of partitions, and
+   * advisory minimum and maximum values for the column.  The RDD is
+   * partitioned according to said column.
    */
   @Experimental
-  def createParquetFile[A <: Product : TypeTag](
-      path: String,
-      allowExisting: Boolean = true,
-      conf: Configuration = new Configuration()): SchemaRDD = {
-    new SchemaRDD(
-      this,
-      ParquetRelation.createEmpty(
-        path, ScalaReflection.attributesFor[A], allowExisting, conf, this))
+  def jdbcRDD(url: String, table: String, partitioning: JDBCPartitioningInfo):
+      DataFrame = {
+    val parts = JDBCRelation.columnPartition(partitioning)
+    jdbcRDD(url, table, parts)
+  }
+
+  /**
+   * :: Experimental ::
+   * Construct an RDD representing the database table accessible via JDBC URL
+   * url named table.  The theParts parameter gives a list expressions
+   * suitable for inclusion in WHERE clauses; each one defines one partition
+   * of the RDD.
+   */
+  @Experimental
+  def jdbcRDD(url: String, table: String, theParts: Array[String]):
+      DataFrame = {
+    val parts: Array[Partition] = theParts.zipWithIndex.map(
+        x => JDBCPartition(x._1, x._2).asInstanceOf[Partition])
+    jdbcRDD(url, table, parts)
+  }
+
+  private def jdbcRDD(url: String, table: String, parts: Array[Partition]):
+      DataFrame = {
+    val relation = JDBCRelation(url, table, parts)(this)
+    baseRelationToDataFrame(relation)
   }
 
   /**
@@ -263,39 +463,54 @@ class SQLContext(@transient val sparkContext: SparkContext)
    *
    * @group userf
    */
-  def registerRDDAsTable(rdd: SchemaRDD, tableName: String): Unit = {
-    catalog.registerTable(None, tableName, rdd.queryExecution.logical)
+  def registerRDDAsTable(rdd: DataFrame, tableName: String): Unit = {
+    catalog.registerTable(Seq(tableName), rdd.logicalPlan)
   }
 
   /**
-   * Executes a SQL query using Spark, returning the result as a SchemaRDD.  The dialect that is
+   * Drops the temporary table with the given table name in the catalog. If the table has been
+   * cached/persisted before, it's also unpersisted.
+   *
+   * @param tableName the name of the table to be unregistered.
+   *
+   * @group userf
+   */
+  def dropTempTable(tableName: String): Unit = {
+    cacheManager.tryUncacheQuery(table(tableName))
+    catalog.unregisterTable(Seq(tableName))
+  }
+
+  /**
+   * Executes a SQL query using Spark, returning the result as a [[DataFrame]]. The dialect that is
    * used for SQL parsing can be configured with 'spark.sql.dialect'.
    *
    * @group userf
    */
-  def sql(sqlText: String): SchemaRDD = {
-    if (dialect == "sql") {
-      new SchemaRDD(this, parseSql(sqlText))
+  def sql(sqlText: String): DataFrame = {
+    if (conf.dialect == "sql") {
+      DataFrame(this, parseSql(sqlText))
     } else {
-      sys.error(s"Unsupported SQL dialect: $dialect")
+      sys.error(s"Unsupported SQL dialect: ${conf.dialect}")
     }
   }
 
-  /** Returns the specified table as a SchemaRDD */
-  def table(tableName: String): SchemaRDD =
-    new SchemaRDD(this, catalog.lookupRelation(None, tableName))
+  /** Returns the specified table as a [[DataFrame]]. */
+  def table(tableName: String): DataFrame =
+    DataFrame(this, catalog.lookupRelation(Seq(tableName)))
 
   protected[sql] class SparkPlanner extends SparkStrategies {
     val sparkContext: SparkContext = self.sparkContext
 
     val sqlContext: SQLContext = self
 
-    def codegenEnabled = self.codegenEnabled
+    def codegenEnabled = self.conf.codegenEnabled
 
-    def numPartitions = self.numShufflePartitions
+    def numPartitions = self.conf.numShufflePartitions
 
-    val strategies: Seq[Strategy] =
-      CommandStrategy(self) ::
+    def strategies: Seq[Strategy] =
+      experimental.extraStrategies ++ (
+      DataSourceStrategy ::
+      DDLStrategy ::
       TakeOrdered ::
       HashAggregation ::
       LeftSemiJoin ::
@@ -304,7 +519,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
       ParquetOperations ::
       BasicOperators ::
       CartesianProduct ::
-      BroadcastNestedLoopJoin :: Nil
+      BroadcastNestedLoopJoin :: Nil)
 
     /**
      * Used to build table scan operators where complex projection and filtering are done using
@@ -370,17 +585,16 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * access to the intermediate phases of query execution for developers.
    */
   @DeveloperApi
-  protected abstract class QueryExecution {
-    def logical: LogicalPlan
+  protected[sql] class QueryExecution(val logical: LogicalPlan) {
 
-    lazy val analyzed = ExtractPythonUdfs(analyzer(logical))
-    lazy val optimizedPlan = optimizer(analyzed)
-    lazy val withCachedData = useCachedData(optimizedPlan)
+    lazy val analyzed: LogicalPlan = ExtractPythonUdfs(analyzer(logical))
+    lazy val withCachedData: LogicalPlan = cacheManager.useCachedData(analyzed)
+    lazy val optimizedPlan: LogicalPlan = optimizer(withCachedData)
 
     // TODO: Don't just pick the first one...
-    lazy val sparkPlan = {
+    lazy val sparkPlan: SparkPlan = {
       SparkPlan.currentContext.set(self)
-      planner(withCachedData).next()
+      planner(optimizedPlan).next()
     }
     // executedPlan should not be used to initialize any SparkPlan. It should be
     // only used for execution.
@@ -409,7 +623,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
          |${stringOrError(optimizedPlan)}
          |== Physical Plan ==
          |${stringOrError(executedPlan)}
-         |Code Generation: ${executedPlan.codegenEnabled}
+         |Code Generation: ${stringOrError(executedPlan.codegenEnabled)}
          |== RDD ==
       """.stripMargin.trim
   }
@@ -419,16 +633,16 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * have the same format as the one generated by `toString` in scala.
    * It is only used by PySpark.
    */
-  private[sql] def parseDataType(dataTypeString: String): DataType = {
+  protected[sql] def parseDataType(dataTypeString: String): DataType = {
     DataType.fromJson(dataTypeString)
   }
 
   /**
    * Apply a schema defined by the schemaString to an RDD. It is only used by PySpark.
    */
-  private[sql] def applySchemaToPythonRDD(
+  protected[sql] def applySchemaToPythonRDD(
       rdd: RDD[Array[Any]],
-      schemaString: String): SchemaRDD = {
+      schemaString: String): DataFrame = {
     val schema = parseDataType(schemaString).asInstanceOf[StructType]
     applySchemaToPythonRDD(rdd, schema)
   }
@@ -436,62 +650,26 @@ class SQLContext(@transient val sparkContext: SparkContext)
   /**
    * Apply a schema defined by the schema to an RDD. It is only used by PySpark.
    */
-  private[sql] def applySchemaToPythonRDD(
+  protected[sql] def applySchemaToPythonRDD(
       rdd: RDD[Array[Any]],
-      schema: StructType): SchemaRDD = {
-    import scala.collection.JavaConversions._
+      schema: StructType): DataFrame = {
 
     def needsConversion(dataType: DataType): Boolean = dataType match {
       case ByteType => true
       case ShortType => true
       case FloatType => true
+      case DateType => true
       case TimestampType => true
       case ArrayType(_, _) => true
       case MapType(_, _, _) => true
       case StructType(_) => true
+      case udt: UserDefinedType[_] => needsConversion(udt.sqlType)
       case other => false
     }
 
-    // Converts value to the type specified by the data type.
-    // Because Python does not have data types for TimestampType, FloatType, ShortType, and
-    // ByteType, we need to explicitly convert values in columns of these data types to the desired
-    // JVM data types.
-    def convert(obj: Any, dataType: DataType): Any = (obj, dataType) match {
-      // TODO: We should check nullable
-      case (null, _) => null
-
-      case (c: java.util.List[_], ArrayType(elementType, _)) =>
-        c.map { e => convert(e, elementType)}: Seq[Any]
-
-      case (c, ArrayType(elementType, _)) if c.getClass.isArray =>
-        c.asInstanceOf[Array[_]].map(e => convert(e, elementType)): Seq[Any]
-
-      case (c: java.util.Map[_, _], MapType(keyType, valueType, _)) => c.map {
-          case (key, value) => (convert(key, keyType), convert(value, valueType))
-        }.toMap
-
-      case (c, StructType(fields)) if c.getClass.isArray =>
-        new GenericRow(c.asInstanceOf[Array[_]].zip(fields).map {
-          case (e, f) => convert(e, f.dataType)
-        }): Row
-
-      case (c: java.util.Calendar, TimestampType) =>
-        new java.sql.Timestamp(c.getTime().getTime())
-
-      case (c: Int, ByteType) => c.toByte
-      case (c: Long, ByteType) => c.toByte
-      case (c: Int, ShortType) => c.toShort
-      case (c: Long, ShortType) => c.toShort
-      case (c: Long, IntegerType) => c.toInt
-      case (c: Double, FloatType) => c.toFloat
-      case (c, StringType) if !c.isInstanceOf[String] => c.toString
-
-      case (c, _) => c
-    }
-
     val convertedRdd = if (schema.fields.exists(f => needsConversion(f.dataType))) {
       rdd.map(m => m.zip(schema.fields).map {
-        case (value, field) => convert(value, field.dataType)
+        case (value, field) => EvaluatePython.fromJava(value, field.dataType)
       })
     } else {
       rdd
@@ -501,6 +679,45 @@ class SQLContext(@transient val sparkContext: SparkContext)
       iter.map { m => new GenericRow(m): Row}
     }
 
-    new SchemaRDD(this, LogicalRDD(schema.toAttributes, rowRdd)(self))
+    DataFrame(this, LogicalRDD(schema.toAttributes, rowRdd)(self))
+  }
+
+  /**
+   * Returns a Catalyst Schema for the given java bean class.
+   */
+  protected def getSchema(beanClass: Class[_]): Seq[AttributeReference] = {
+    // TODO: All of this could probably be moved to Catalyst as it is mostly not Spark specific.
+    val beanInfo = Introspector.getBeanInfo(beanClass)
+
+    // Note: The ordering of elements may differ from when the schema is inferred in Scala.
+    //       This is because beanInfo.getPropertyDescriptors gives no guarantees about
+    //       element ordering.
+    val fields = beanInfo.getPropertyDescriptors.filterNot(_.getName == "class")
+    fields.map { property =>
+      val (dataType, nullable) = property.getPropertyType match {
+        case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) =>
+          (c.getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance(), true)
+        case c: Class[_] if c == classOf[java.lang.String] => (StringType, true)
+        case c: Class[_] if c == java.lang.Short.TYPE => (ShortType, false)
+        case c: Class[_] if c == java.lang.Integer.TYPE => (IntegerType, false)
+        case c: Class[_] if c == java.lang.Long.TYPE => (LongType, false)
+        case c: Class[_] if c == java.lang.Double.TYPE => (DoubleType, false)
+        case c: Class[_] if c == java.lang.Byte.TYPE => (ByteType, false)
+        case c: Class[_] if c == java.lang.Float.TYPE => (FloatType, false)
+        case c: Class[_] if c == java.lang.Boolean.TYPE => (BooleanType, false)
+
+        case c: Class[_] if c == classOf[java.lang.Short] => (ShortType, true)
+        case c: Class[_] if c == classOf[java.lang.Integer] => (IntegerType, true)
+        case c: Class[_] if c == classOf[java.lang.Long] => (LongType, true)
+        case c: Class[_] if c == classOf[java.lang.Double] => (DoubleType, true)
+        case c: Class[_] if c == classOf[java.lang.Byte] => (ByteType, true)
+        case c: Class[_] if c == classOf[java.lang.Float] => (FloatType, true)
+        case c: Class[_] if c == classOf[java.lang.Boolean] => (BooleanType, true)
+        case c: Class[_] if c == classOf[java.math.BigDecimal] => (DecimalType(), true)
+        case c: Class[_] if c == classOf[java.sql.Date] => (DateType, true)
+        case c: Class[_] if c == classOf[java.sql.Timestamp] => (TimestampType, true)
+      }
+      AttributeReference(property.getName, dataType, nullable)()
+    }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
deleted file mode 100644
index 948122d42f0e1..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
+++ /dev/null
@@ -1,517 +0,0 @@
-/*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*    http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.spark.sql
-
-import java.util.{Map => JMap, List => JList}
-
-import org.apache.spark.storage.StorageLevel
-
-import scala.collection.JavaConversions._
-import scala.collection.JavaConverters._
-
-import net.razorvine.pickle.Pickler
-
-import org.apache.spark.{Dependency, OneToOneDependency, Partition, Partitioner, TaskContext}
-import org.apache.spark.annotation.{AlphaComponent, Experimental}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.api.java.JavaSchemaRDD
-import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
-import org.apache.spark.sql.execution.LogicalRDD
-import org.apache.spark.api.java.JavaRDD
-
-/**
- * :: AlphaComponent ::
- * An RDD of [[Row]] objects that has an associated schema. In addition to standard RDD functions,
- * SchemaRDDs can be used in relational queries, as shown in the examples below.
- *
- * Importing a SQLContext brings an implicit into scope that automatically converts a standard RDD
- * whose elements are scala case classes into a SchemaRDD.  This conversion can also be done
- * explicitly using the `createSchemaRDD` function on a [[SQLContext]].
- *
- * A `SchemaRDD` can also be created by loading data in from external sources.
- * Examples are loading data from Parquet files by using the `parquetFile` method on [[SQLContext]]
- * and loading JSON datasets by using `jsonFile` and `jsonRDD` methods on [[SQLContext]].
- *
- * == SQL Queries ==
- * A SchemaRDD can be registered as a table in the [[SQLContext]] that was used to create it.  Once
- * an RDD has been registered as a table, it can be used in the FROM clause of SQL statements.
- *
- * {{{
- *  // One method for defining the schema of an RDD is to make a case class with the desired column
- *  // names and types.
- *  case class Record(key: Int, value: String)
- *
- *  val sc: SparkContext // An existing spark context.
- *  val sqlContext = new SQLContext(sc)
- *
- *  // Importing the SQL context gives access to all the SQL functions and implicit conversions.
- *  import sqlContext._
- *
- *  val rdd = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i")))
- *  // Any RDD containing case classes can be registered as a table.  The schema of the table is
- *  // automatically inferred using scala reflection.
- *  rdd.registerTempTable("records")
- *
- *  val results: SchemaRDD = sql("SELECT * FROM records")
- * }}}
- *
- * == Language Integrated Queries ==
- *
- * {{{
- *
- *  case class Record(key: Int, value: String)
- *
- *  val sc: SparkContext // An existing spark context.
- *  val sqlContext = new SQLContext(sc)
- *
- *  // Importing the SQL context gives access to all the SQL functions and implicit conversions.
- *  import sqlContext._
- *
- *  val rdd = sc.parallelize((1 to 100).map(i => Record(i, "val_" + i)))
- *
- *  // Example of language integrated queries.
- *  rdd.where('key === 1).orderBy('value.asc).select('key).collect()
- * }}}
- *
- *  @groupname Query Language Integrated Queries
- *  @groupdesc Query Functions that create new queries from SchemaRDDs.  The
- *             result of all query functions is also a SchemaRDD, allowing multiple operations to be
- *             chained using a builder pattern.
- *  @groupprio Query -2
- *  @groupname schema SchemaRDD Functions
- *  @groupprio schema -1
- *  @groupname Ungrouped Base RDD Functions
- */
-@AlphaComponent
-class SchemaRDD(
-    @transient val sqlContext: SQLContext,
-    @transient val baseLogicalPlan: LogicalPlan)
-  extends RDD[Row](sqlContext.sparkContext, Nil) with SchemaRDDLike {
-
-  def baseSchemaRDD = this
-
-  // =========================================================================================
-  // RDD functions: Copy the internal row representation so we present immutable data to users.
-  // =========================================================================================
-
-  override def compute(split: Partition, context: TaskContext): Iterator[Row] =
-    firstParent[Row].compute(split, context).map(_.copy())
-
-  override def getPartitions: Array[Partition] = firstParent[Row].partitions
-
-  override protected def getDependencies: Seq[Dependency[_]] =
-    List(new OneToOneDependency(queryExecution.toRdd))
-
-  /** Returns the schema of this SchemaRDD (represented by a [[StructType]]).
-    *
-    * @group schema
-    */
-  def schema: StructType = queryExecution.analyzed.schema
-
-  // =======================================================================
-  // Query DSL
-  // =======================================================================
-
-  /**
-   * Changes the output of this relation to the given expressions, similar to the `SELECT` clause
-   * in SQL.
-   *
-   * {{{
-   *   schemaRDD.select('a, 'b + 'c, 'd as 'aliasedName)
-   * }}}
-   *
-   * @param exprs a set of logical expression that will be evaluated for each input row.
-   *
-   * @group Query
-   */
-  def select(exprs: Expression*): SchemaRDD = {
-    val aliases = exprs.zipWithIndex.map {
-      case (ne: NamedExpression, _) => ne
-      case (e, i) => Alias(e, s"c$i")()
-    }
-    new SchemaRDD(sqlContext, Project(aliases, logicalPlan))
-  }
-
-  /**
-   * Filters the output, only returning those rows where `condition` evaluates to true.
-   *
-   * {{{
-   *   schemaRDD.where('a === 'b)
-   *   schemaRDD.where('a === 1)
-   *   schemaRDD.where('a + 'b > 10)
-   * }}}
-   *
-   * @group Query
-   */
-  def where(condition: Expression): SchemaRDD =
-    new SchemaRDD(sqlContext, Filter(condition, logicalPlan))
-
-  /**
-   * Performs a relational join on two SchemaRDDs
-   *
-   * @param otherPlan the [[SchemaRDD]] that should be joined with this one.
-   * @param joinType One of `Inner`, `LeftOuter`, `RightOuter`, or `FullOuter`. Defaults to `Inner.`
-   * @param on       An optional condition for the join operation.  This is equivalent to the `ON`
-   *                 clause in standard SQL.  In the case of `Inner` joins, specifying a
-   *                 `condition` is equivalent to adding `where` clauses after the `join`.
-   *
-   * @group Query
-   */
-  def join(
-      otherPlan: SchemaRDD,
-      joinType: JoinType = Inner,
-      on: Option[Expression] = None): SchemaRDD =
-    new SchemaRDD(sqlContext, Join(logicalPlan, otherPlan.logicalPlan, joinType, on))
-
-  /**
-   * Sorts the results by the given expressions.
-   * {{{
-   *   schemaRDD.orderBy('a)
-   *   schemaRDD.orderBy('a, 'b)
-   *   schemaRDD.orderBy('a.asc, 'b.desc)
-   * }}}
-   *
-   * @group Query
-   */
-  def orderBy(sortExprs: SortOrder*): SchemaRDD =
-    new SchemaRDD(sqlContext, Sort(sortExprs, logicalPlan))
-
-  @deprecated("use limit with integer argument", "1.1.0")
-  def limit(limitExpr: Expression): SchemaRDD =
-    new SchemaRDD(sqlContext, Limit(limitExpr, logicalPlan))
-
-  /**
-   * Limits the results by the given integer.
-   * {{{
-   *   schemaRDD.limit(10)
-   * }}}
-   */
-  def limit(limitNum: Int): SchemaRDD =
-    new SchemaRDD(sqlContext, Limit(Literal(limitNum), logicalPlan))
-
-  /**
-   * Performs a grouping followed by an aggregation.
-   *
-   * {{{
-   *   schemaRDD.groupBy('year)(Sum('sales) as 'totalSales)
-   * }}}
-   *
-   * @group Query
-   */
-  def groupBy(groupingExprs: Expression*)(aggregateExprs: Expression*): SchemaRDD = {
-    val aliasedExprs = aggregateExprs.map {
-      case ne: NamedExpression => ne
-      case e => Alias(e, e.toString)()
-    }
-    new SchemaRDD(sqlContext, Aggregate(groupingExprs, aliasedExprs, logicalPlan))
-  }
-
-  /**
-   * Performs an aggregation over all Rows in this RDD.
-   * This is equivalent to a groupBy with no grouping expressions.
-   *
-   * {{{
-   *   schemaRDD.aggregate(Sum('sales) as 'totalSales)
-   * }}}
-   *
-   * @group Query
-   */
-  def aggregate(aggregateExprs: Expression*): SchemaRDD = {
-    groupBy()(aggregateExprs: _*)
-  }
-
-  /**
-   * Applies a qualifier to the attributes of this relation.  Can be used to disambiguate attributes
-   * with the same name, for example, when performing self-joins.
-   *
-   * {{{
-   *   val x = schemaRDD.where('a === 1).as('x)
-   *   val y = schemaRDD.where('a === 2).as('y)
-   *   x.join(y).where("x.a".attr === "y.a".attr),
-   * }}}
-   *
-   * @group Query
-   */
-  def as(alias: Symbol) =
-    new SchemaRDD(sqlContext, Subquery(alias.name, logicalPlan))
-
-  /**
-   * Combines the tuples of two RDDs with the same schema, keeping duplicates.
-   *
-   * @group Query
-   */
-  def unionAll(otherPlan: SchemaRDD) =
-    new SchemaRDD(sqlContext, Union(logicalPlan, otherPlan.logicalPlan))
-
-  /**
-   * Performs a relational except on two SchemaRDDs
-   *
-   * @param otherPlan the [[SchemaRDD]] that should be excepted from this one.
-   *
-   * @group Query
-   */
-  def except(otherPlan: SchemaRDD): SchemaRDD =
-    new SchemaRDD(sqlContext, Except(logicalPlan, otherPlan.logicalPlan))
-
-  /**
-   * Performs a relational intersect on two SchemaRDDs
-   *
-   * @param otherPlan the [[SchemaRDD]] that should be intersected with this one.
-   *
-   * @group Query
-   */
-  def intersect(otherPlan: SchemaRDD): SchemaRDD =
-    new SchemaRDD(sqlContext, Intersect(logicalPlan, otherPlan.logicalPlan))
-
-  /**
-   * Filters tuples using a function over the value of the specified column.
-   *
-   * {{{
-   *   schemaRDD.sfilter('a)((a: Int) => ...)
-   * }}}
-   *
-   * @group Query
-   */
-  def where[T1](arg1: Symbol)(udf: (T1) => Boolean) =
-    new SchemaRDD(
-      sqlContext,
-      Filter(ScalaUdf(udf, BooleanType, Seq(UnresolvedAttribute(arg1.name))), logicalPlan))
-
-  /**
-   * :: Experimental ::
-   * Filters tuples using a function over a `Dynamic` version of a given Row.  DynamicRows use
-   * scala's Dynamic trait to emulate an ORM of in a dynamically typed language.  Since the type of
-   * the column is not known at compile time, all attributes are converted to strings before
-   * being passed to the function.
-   *
-   * {{{
-   *   schemaRDD.where(r => r.firstName == "Bob" && r.lastName == "Smith")
-   * }}}
-   *
-   * @group Query
-   */
-  @Experimental
-  def where(dynamicUdf: (DynamicRow) => Boolean) =
-    new SchemaRDD(
-      sqlContext,
-      Filter(ScalaUdf(dynamicUdf, BooleanType, Seq(WrapDynamic(logicalPlan.output))), logicalPlan))
-
-  /**
-   * :: Experimental ::
-   * Returns a sampled version of the underlying dataset.
-   *
-   * @group Query
-   */
-  @Experimental
-  override
-  def sample(
-      withReplacement: Boolean = true,
-      fraction: Double,
-      seed: Long) =
-    new SchemaRDD(sqlContext, Sample(fraction, withReplacement, seed, logicalPlan))
-
-  /**
-   * :: Experimental ::
-   * Return the number of elements in the RDD. Unlike the base RDD implementation of count, this
-   * implementation leverages the query optimizer to compute the count on the SchemaRDD, which
-   * supports features such as filter pushdown.
-   */
-  @Experimental
-  override def count(): Long = aggregate(Count(Literal(1))).collect().head.getLong(0)
-
-  /**
-   * :: Experimental ::
-   * Applies the given Generator, or table generating function, to this relation.
-   *
-   * @param generator A table generating function.  The API for such functions is likely to change
-   *                  in future releases
-   * @param join when set to true, each output row of the generator is joined with the input row
-   *             that produced it.
-   * @param outer when set to true, at least one row will be produced for each input row, similar to
-   *              an `OUTER JOIN` in SQL.  When no output rows are produced by the generator for a
-   *              given row, a single row will be output, with `NULL` values for each of the
-   *              generated columns.
-   * @param alias an optional alias that can be used as qualifier for the attributes that are
-   *              produced by this generate operation.
-   *
-   * @group Query
-   */
-  @Experimental
-  def generate(
-      generator: Generator,
-      join: Boolean = false,
-      outer: Boolean = false,
-      alias: Option[String] = None) =
-    new SchemaRDD(sqlContext, Generate(generator, join, outer, alias, logicalPlan))
-
-  /**
-   * Returns this RDD as a SchemaRDD.  Intended primarily to force the invocation of the implicit
-   * conversion from a standard RDD to a SchemaRDD.
-   *
-   * @group schema
-   */
-  def toSchemaRDD = this
-
-  /**
-   * Returns this RDD as a JavaSchemaRDD.
-   *
-   * @group schema
-   */
-  def toJavaSchemaRDD: JavaSchemaRDD = new JavaSchemaRDD(sqlContext, logicalPlan)
-
-  /**
-   * Helper for converting a Row to a simple Array suitable for pyspark serialization.
-   */
-  private def rowToJArray(row: Row, structType: StructType): Array[Any] = {
-    import scala.collection.Map
-
-    def toJava(obj: Any, dataType: DataType): Any = (obj, dataType) match {
-      case (null, _) => null
-
-      case (obj: Row, struct: StructType) => rowToJArray(obj, struct)
-
-      case (seq: Seq[Any], array: ArrayType) =>
-        seq.map(x => toJava(x, array.elementType)).asJava
-      case (list: JList[_], array: ArrayType) =>
-        list.map(x => toJava(x, array.elementType)).asJava
-      case (arr, array: ArrayType) if arr.getClass.isArray =>
-        arr.asInstanceOf[Array[Any]].map(x => toJava(x, array.elementType))
-
-      case (obj: Map[_, _], mt: MapType) => obj.map {
-        case (k, v) => (k, toJava(v, mt.valueType)) // key should be primitive type
-      }.asJava
-
-      // Pyrolite can handle Timestamp
-      case (other, _) => other
-    }
-
-    val fields = structType.fields.map(field => field.dataType)
-    row.zip(fields).map {
-      case (obj, dataType) => toJava(obj, dataType)
-    }.toArray
-  }
-
-  /**
-   * Converts a JavaRDD to a PythonRDD. It is used by pyspark.
-   */
-  private[sql] def javaToPython: JavaRDD[Array[Byte]] = {
-    val rowSchema = StructType.fromAttributes(this.queryExecution.analyzed.output)
-    this.mapPartitions { iter =>
-      val pickle = new Pickler
-      iter.map { row =>
-        rowToJArray(row, rowSchema)
-      }.grouped(100).map(batched => pickle.dumps(batched.toArray))
-    }
-  }
-
-  /**
-   * Serializes the Array[Row] returned by SchemaRDD's optimized collect(), using the same
-   * format as javaToPython. It is used by pyspark.
-   */
-  private[sql] def collectToPython: JList[Array[Byte]] = {
-    val rowSchema = StructType.fromAttributes(this.queryExecution.analyzed.output)
-    val pickle = new Pickler
-    new java.util.ArrayList(collect().map { row =>
-      rowToJArray(row, rowSchema)
-    }.grouped(100).map(batched => pickle.dumps(batched.toArray)).toIterable)
-  }
-
-  /**
-   * Creates SchemaRDD by applying own schema to derived RDD. Typically used to wrap return value
-   * of base RDD functions that do not change schema.
-   *
-   * @param rdd RDD derived from this one and has same schema
-   *
-   * @group schema
-   */
-  private def applySchema(rdd: RDD[Row]): SchemaRDD = {
-    new SchemaRDD(sqlContext,
-      LogicalRDD(queryExecution.analyzed.output.map(_.newInstance()), rdd)(sqlContext))
-  }
-
-  // =======================================================================
-  // Overridden RDD actions
-  // =======================================================================
-
-  override def collect(): Array[Row] = queryExecution.executedPlan.executeCollect()
-
-  override def take(num: Int): Array[Row] = limit(num).collect()
-
-  // =======================================================================
-  // Base RDD functions that do NOT change schema
-  // =======================================================================
-
-  // Transformations (return a new RDD)
-
-  override def coalesce(numPartitions: Int, shuffle: Boolean = false)
-                       (implicit ord: Ordering[Row] = null): SchemaRDD =
-    applySchema(super.coalesce(numPartitions, shuffle)(ord))
-
-  override def distinct(): SchemaRDD =
-    applySchema(super.distinct())
-
-  override def distinct(numPartitions: Int)
-                       (implicit ord: Ordering[Row] = null): SchemaRDD =
-    applySchema(super.distinct(numPartitions)(ord))
-
-  override def filter(f: Row => Boolean): SchemaRDD =
-    applySchema(super.filter(f))
-
-  override def intersection(other: RDD[Row]): SchemaRDD =
-    applySchema(super.intersection(other))
-
-  override def intersection(other: RDD[Row], partitioner: Partitioner)
-                           (implicit ord: Ordering[Row] = null): SchemaRDD =
-    applySchema(super.intersection(other, partitioner)(ord))
-
-  override def intersection(other: RDD[Row], numPartitions: Int): SchemaRDD =
-    applySchema(super.intersection(other, numPartitions))
-
-  override def repartition(numPartitions: Int)
-                          (implicit ord: Ordering[Row] = null): SchemaRDD =
-    applySchema(super.repartition(numPartitions)(ord))
-
-  override def subtract(other: RDD[Row]): SchemaRDD =
-    applySchema(super.subtract(other))
-
-  override def subtract(other: RDD[Row], numPartitions: Int): SchemaRDD =
-    applySchema(super.subtract(other, numPartitions))
-
-  override def subtract(other: RDD[Row], p: Partitioner)
-                       (implicit ord: Ordering[Row] = null): SchemaRDD =
-    applySchema(super.subtract(other, p)(ord))
-
-  /** Overridden cache function will always use the in-memory columnar caching. */
-  override def cache(): this.type = {
-    sqlContext.cacheQuery(this)
-    this
-  }
-
-  override def persist(newLevel: StorageLevel): this.type = {
-    sqlContext.cacheQuery(this, newLevel)
-    this
-  }
-
-  override def unpersist(blocking: Boolean): this.type = {
-    sqlContext.uncacheQuery(this, blocking)
-    this
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
deleted file mode 100644
index 25ba7d88ba538..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*    http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.spark.sql
-
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
-import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.execution.LogicalRDD
-
-/**
- * Contains functions that are shared between all SchemaRDD types (i.e., Scala, Java)
- */
-private[sql] trait SchemaRDDLike {
-  @transient val sqlContext: SQLContext
-  @transient val baseLogicalPlan: LogicalPlan
-
-  private[sql] def baseSchemaRDD: SchemaRDD
-
-  /**
-   * :: DeveloperApi ::
-   * A lazily computed query execution workflow.  All other RDD operations are passed
-   * through to the RDD that is produced by this workflow. This workflow is produced lazily because
-   * invoking the whole query optimization pipeline can be expensive.
-   *
-   * The query execution is considered a Developer API as phases may be added or removed in future
-   * releases.  This execution is only exposed to provide an interface for inspecting the various
-   * phases for debugging purposes.  Applications should not depend on particular phases existing
-   * or producing any specific output, even for exactly the same query.
-   *
-   * Additionally, the RDD exposed by this execution is not designed for consumption by end users.
-   * In particular, it does not contain any schema information, and it reuses Row objects
-   * internally.  This object reuse improves performance, but can make programming against the RDD
-   * more difficult.  Instead end users should perform RDD operations on a SchemaRDD directly.
-   */
-  @transient
-  @DeveloperApi
-  lazy val queryExecution = sqlContext.executePlan(baseLogicalPlan)
-
-  @transient protected[spark] val logicalPlan: LogicalPlan = baseLogicalPlan match {
-    // For various commands (like DDL) and queries with side effects, we force query optimization to
-    // happen right away to let these side effects take place eagerly.
-    case _: Command | _: InsertIntoTable | _: CreateTableAsSelect |_: WriteToFile =>
-      LogicalRDD(queryExecution.analyzed.output, queryExecution.toRdd)(sqlContext)
-    case _ =>
-      baseLogicalPlan
-  }
-
-  override def toString =
-    s"""${super.toString}
-       |== Query Plan ==
-       |${queryExecution.simpleString}""".stripMargin.trim
-
-  /**
-   * Saves the contents of this `SchemaRDD` as a parquet file, preserving the schema.  Files that
-   * are written out using this method can be read back in as a SchemaRDD using the `parquetFile`
-   * function.
-   *
-   * @group schema
-   */
-  def saveAsParquetFile(path: String): Unit = {
-    sqlContext.executePlan(WriteToFile(path, logicalPlan)).toRdd
-  }
-
-  /**
-   * Registers this RDD as a temporary table using the given name.  The lifetime of this temporary
-   * table is tied to the [[SQLContext]] that was used to create this SchemaRDD.
-   *
-   * @group schema
-   */
-  def registerTempTable(tableName: String): Unit = {
-    sqlContext.registerRDDAsTable(baseSchemaRDD, tableName)
-  }
-
-  @deprecated("Use registerTempTable instead of registerAsTable.", "1.1")
-  def registerAsTable(tableName: String): Unit = registerTempTable(tableName)
-
-  /**
-   * :: Experimental ::
-   * Adds the rows from this RDD to the specified table, optionally overwriting the existing data.
-   *
-   * @group schema
-   */
-  @Experimental
-  def insertInto(tableName: String, overwrite: Boolean): Unit =
-    sqlContext.executePlan(
-      InsertIntoTable(UnresolvedRelation(None, tableName), Map.empty, logicalPlan, overwrite)).toRdd
-
-  /**
-   * :: Experimental ::
-   * Appends the rows from this RDD to the specified table.
-   *
-   * @group schema
-   */
-  @Experimental
-  def insertInto(tableName: String): Unit = insertInto(tableName, overwrite = false)
-
-  /**
-   * :: Experimental ::
-   * Creates a table from the the contents of this SchemaRDD.  This will fail if the table already
-   * exists.
-   *
-   * Note that this currently only works with SchemaRDDs that are created from a HiveContext as
-   * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
-   * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
-   * be the target of an `insertInto`.
-   *
-   * @group schema
-   */
-  @Experimental
-  def saveAsTable(tableName: String): Unit =
-    sqlContext.executePlan(CreateTableAsSelect(None, tableName, logicalPlan)).toRdd
-
-  /** Returns the schema as a string in the tree format.
-   *
-   * @group schema
-   */
-  def schemaString: String = baseSchemaRDD.schema.treeString
-
-  /** Prints out the schema.
-   *
-   * @group schema
-   */
-  def printSchema(): Unit = println(schemaString)
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala
new file mode 100644
index 0000000000000..f1a4053b79113
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+
+import scala.util.parsing.combinator.RegexParsers
+
+import org.apache.spark.sql.catalyst.AbstractSparkSQLParser
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.{UncacheTableCommand, CacheTableCommand, SetCommand}
+import org.apache.spark.sql.types.StringType
+
+
+/**
+ * The top level Spark SQL parser. This parser recognizes syntaxes that are available for all SQL
+ * dialects supported by Spark SQL, and delegates all the other syntaxes to the `fallback` parser.
+ *
+ * @param fallback A function that parses an input string to a logical plan
+ */
+private[sql] class SparkSQLParser(fallback: String => LogicalPlan) extends AbstractSparkSQLParser {
+
+  // A parser for the key-value part of the "SET [key = [value ]]" syntax
+  private object SetCommandParser extends RegexParsers {
+    private val key: Parser[String] = "(?m)[^=]+".r
+
+    private val value: Parser[String] = "(?m).*$".r
+
+    private val output: Seq[Attribute] = Seq(AttributeReference("", StringType, nullable = false)())
+
+    private val pair: Parser[LogicalPlan] =
+      (key ~ ("=".r ~> value).?).? ^^ {
+        case None => SetCommand(None, output)
+        case Some(k ~ v) => SetCommand(Some(k.trim -> v.map(_.trim)), output)
+      }
+
+    def apply(input: String): LogicalPlan = parseAll(pair, input) match {
+      case Success(plan, _) => plan
+      case x => sys.error(x.toString)
+    }
+  }
+
+  protected val AS      = Keyword("AS")
+  protected val CACHE   = Keyword("CACHE")
+  protected val LAZY    = Keyword("LAZY")
+  protected val SET     = Keyword("SET")
+  protected val TABLE   = Keyword("TABLE")
+  protected val UNCACHE = Keyword("UNCACHE")
+
+  override protected lazy val start: Parser[LogicalPlan] = cache | uncache | set | others
+
+  private lazy val cache: Parser[LogicalPlan] =
+    CACHE ~> LAZY.? ~ (TABLE ~> ident) ~ (AS ~> restInput).? ^^ {
+      case isLazy ~ tableName ~ plan =>
+        CacheTableCommand(tableName, plan.map(fallback), isLazy.isDefined)
+    }
+
+  private lazy val uncache: Parser[LogicalPlan] =
+    UNCACHE ~ TABLE ~> ident ^^ {
+      case tableName => UncacheTableCommand(tableName)
+    }
+
+  private lazy val set: Parser[LogicalPlan] =
+    SET ~> restInput ^^ {
+      case input => SetCommandParser(input)
+    }
+
+  private lazy val others: Parser[LogicalPlan] =
+    wholeInput ^^ {
+      case input => fallback(input)
+    }
+
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
new file mode 100644
index 0000000000000..d8b0a3b26dbab
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -0,0 +1,571 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.util.{List => JList, Map => JMap}
+
+import scala.reflect.runtime.universe.TypeTag
+
+import org.apache.spark.{Accumulator, Logging}
+import org.apache.spark.api.python.PythonBroadcast
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.api.java._
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUdf}
+import org.apache.spark.sql.execution.PythonUDF
+import org.apache.spark.sql.types.DataType
+
+
+/**
+ * Functions for registering user-defined functions.
+ */
+class UDFRegistration(sqlContext: SQLContext) extends Logging {
+
+  private val functionRegistry = sqlContext.functionRegistry
+
+  protected[sql] def registerPython(
+      name: String,
+      command: Array[Byte],
+      envVars: JMap[String, String],
+      pythonIncludes: JList[String],
+      pythonExec: String,
+      broadcastVars: JList[Broadcast[PythonBroadcast]],
+      accumulator: Accumulator[JList[Array[Byte]]],
+      stringDataType: String): Unit = {
+    log.debug(
+      s"""
+        | Registering new PythonUDF:
+        | name: $name
+        | command: ${command.toSeq}
+        | envVars: $envVars
+        | pythonIncludes: $pythonIncludes
+        | pythonExec: $pythonExec
+        | dataType: $stringDataType
+      """.stripMargin)
+
+
+    val dataType = sqlContext.parseDataType(stringDataType)
+
+    def builder(e: Seq[Expression]) =
+      PythonUDF(
+        name,
+        command,
+        envVars,
+        pythonIncludes,
+        pythonExec,
+        broadcastVars,
+        accumulator,
+        dataType,
+        e)
+
+    functionRegistry.registerFunction(name, builder)
+  }
+
+  // scalastyle:off
+
+  /* register 0-22 were generated by this script
+
+    (0 to 22).map { x =>
+      val types = (1 to x).foldRight("RT")((i, s) => {s"A$i, $s"})
+      val typeTags = (1 to x).map(i => s"A${i}: TypeTag").foldLeft("RT: TypeTag")(_ + ", " + _)
+      println(s"""
+        /**
+         * Register a Scala closure of ${x} arguments as user-defined function (UDF).
+         * @tparam RT return type of UDF.
+         */
+        def register[$typeTags](name: String, func: Function$x[$types]): UserDefinedFunction = {
+          val dataType = ScalaReflection.schemaFor[RT].dataType
+          def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+          functionRegistry.registerFunction(name, builder)
+          UserDefinedFunction(func, dataType)
+        }""")
+    }
+
+    (1 to 22).foreach { i =>
+      val extTypeArgs = (1 to i).map(_ => "_").mkString(", ")
+      val anyTypeArgs = (1 to i).map(_ => "Any").mkString(", ")
+      val anyCast = s".asInstanceOf[UDF$i[$anyTypeArgs, Any]]"
+      val anyParams = (1 to i).map(_ => "_: Any").mkString(", ")
+      println(s"""
+         |/**
+         | * Register a user-defined function with ${i} arguments.
+         | */
+         |def register(name: String, f: UDF$i[$extTypeArgs, _], returnType: DataType) = {
+         |  functionRegistry.registerFunction(
+         |    name,
+         |    (e: Seq[Expression]) => ScalaUdf(f$anyCast.call($anyParams), returnType, e))
+         |}""".stripMargin)
+    }
+    */
+
+  /**
+   * Register a Scala closure of 0 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag](name: String, func: Function0[RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 1 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag](name: String, func: Function1[A1, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 2 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag](name: String, func: Function2[A1, A2, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 3 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](name: String, func: Function3[A1, A2, A3, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 4 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](name: String, func: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 5 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](name: String, func: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 6 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](name: String, func: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 7 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](name: String, func: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 8 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](name: String, func: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 9 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](name: String, func: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 10 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](name: String, func: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 11 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag](name: String, func: Function11[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 12 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag](name: String, func: Function12[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 13 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag](name: String, func: Function13[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 14 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag](name: String, func: Function14[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 15 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag](name: String, func: Function15[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 16 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag](name: String, func: Function16[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 17 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag](name: String, func: Function17[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 18 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag](name: String, func: Function18[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 19 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag](name: String, func: Function19[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 20 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag](name: String, func: Function20[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 21 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag](name: String, func: Function21[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  /**
+   * Register a Scala closure of 22 arguments as user-defined function (UDF).
+   * @tparam RT return type of UDF.
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag, A22: TypeTag](name: String, func: Function22[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, RT]): UserDefinedFunction = {
+    val dataType = ScalaReflection.schemaFor[RT].dataType
+    def builder(e: Seq[Expression]) = ScalaUdf(func, dataType, e)
+    functionRegistry.registerFunction(name, builder)
+    UserDefinedFunction(func, dataType)
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Register a user-defined function with 1 arguments.
+   */
+  def register(name: String, f: UDF1[_, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF1[Any, Any]].call(_: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 2 arguments.
+   */
+  def register(name: String, f: UDF2[_, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF2[Any, Any, Any]].call(_: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 3 arguments.
+   */
+  def register(name: String, f: UDF3[_, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF3[Any, Any, Any, Any]].call(_: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 4 arguments.
+   */
+  def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF4[Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 5 arguments.
+   */
+  def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF5[Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 6 arguments.
+   */
+  def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF6[Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 7 arguments.
+   */
+  def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF7[Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 8 arguments.
+   */
+  def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 9 arguments.
+   */
+  def register(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 10 arguments.
+   */
+  def register(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 11 arguments.
+   */
+  def register(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF11[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 12 arguments.
+   */
+  def register(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF12[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 13 arguments.
+   */
+  def register(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF13[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 14 arguments.
+   */
+  def register(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF14[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 15 arguments.
+   */
+  def register(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF15[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 16 arguments.
+   */
+  def register(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF16[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 17 arguments.
+   */
+  def register(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF17[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 18 arguments.
+   */
+  def register(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF18[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 19 arguments.
+   */
+  def register(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF19[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 20 arguments.
+   */
+  def register(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF20[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 21 arguments.
+   */
+  def register(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF21[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  /**
+   * Register a user-defined function with 22 arguments.
+   */
+  def register(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
+    functionRegistry.registerFunction(
+      name,
+      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF22[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+  }
+
+  // scalastyle:on
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UdfRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UdfRegistration.scala
deleted file mode 100644
index 595b4aa36eae3..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/UdfRegistration.scala
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import java.util.{List => JList, Map => JMap}
-
-import org.apache.spark.Accumulator
-import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.sql.catalyst.ScalaReflection
-import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUdf}
-import org.apache.spark.sql.execution.PythonUDF
-
-import scala.reflect.runtime.universe.{TypeTag, typeTag}
-
-/**
- * Functions for registering scala lambda functions as UDFs in a SQLContext.
- */
-private[sql] trait UDFRegistration {
-  self: SQLContext =>
-
-  private[spark] def registerPython(
-      name: String,
-      command: Array[Byte],
-      envVars: JMap[String, String],
-      pythonIncludes: JList[String],
-      pythonExec: String,
-      broadcastVars: JList[Broadcast[Array[Byte]]],
-      accumulator: Accumulator[JList[Array[Byte]]],
-      stringDataType: String): Unit = {
-    log.debug(
-      s"""
-        | Registering new PythonUDF:
-        | name: $name
-        | command: ${command.toSeq}
-        | envVars: $envVars
-        | pythonIncludes: $pythonIncludes
-        | pythonExec: $pythonExec
-        | dataType: $stringDataType
-      """.stripMargin)
-
-
-    val dataType = parseDataType(stringDataType)
-
-    def builder(e: Seq[Expression]) =
-      PythonUDF(
-        name,
-        command,
-        envVars,
-        pythonIncludes,
-        pythonExec,
-        broadcastVars,
-        accumulator,
-        dataType,
-        e)
-
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  /** registerFunction 1-22 were generated by this script
-
-    (1 to 22).map { x =>
-      val types = (1 to x).map(x => "_").reduce(_ + ", " + _)
-      s"""
-        def registerFunction[T: TypeTag](name: String, func: Function$x[$types, T]): Unit = {
-          def builder(e: Seq[Expression]) =
-            ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-          functionRegistry.registerFunction(name, builder)
-        }
-      """
-    }
-  */
-
-  // scalastyle:off
-  def registerFunction[T: TypeTag](name: String, func: Function1[_, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function2[_, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function3[_, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function4[_, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function5[_, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function6[_, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function7[_, _, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function8[_, _, _, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function9[_, _, _, _, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function10[_, _, _, _, _, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function11[_, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function12[_, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function13[_, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-
-  def registerFunction[T: TypeTag](name: String, func: Function22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): Unit = {
-    def builder(e: Seq[Expression]) = ScalaUdf(func, ScalaReflection.schemaFor(typeTag[T]).dataType, e)
-    functionRegistry.registerFunction(name, builder)
-  }
-  // scalastyle:on
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala
new file mode 100644
index 0000000000000..c60d4070942a9
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala
@@ -0,0 +1,66 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql
+
+import java.util.{List => JList, Map => JMap}
+
+import org.apache.spark.Accumulator
+import org.apache.spark.api.python.PythonBroadcast
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.catalyst.expressions.ScalaUdf
+import org.apache.spark.sql.execution.PythonUDF
+import org.apache.spark.sql.types.DataType
+
+/**
+ * A user-defined function. To create one, use the `udf` functions in [[Dsl]].
+ * As an example:
+ * {{{
+ *   // Defined a UDF that returns true or false based on some numeric score.
+ *   val predict = udf((score: Double) => if (score > 0.5) true else false)
+ *
+ *   // Projects a column that adds a prediction column based on the score column.
+ *   df.select( predict(df("score")) )
+ * }}}
+ */
+case class UserDefinedFunction(f: AnyRef, dataType: DataType) {
+
+  def apply(exprs: Column*): Column = {
+    Column(ScalaUdf(f, dataType, exprs.map(_.expr)))
+  }
+}
+
+/**
+ * A user-defined Python function. To create one, use the `pythonUDF` functions in [[Dsl]].
+ * This is used by Python API.
+ */
+private[sql] case class UserDefinedPythonFunction(
+    name: String,
+    command: Array[Byte],
+    envVars: JMap[String, String],
+    pythonIncludes: JList[String],
+    pythonExec: String,
+    broadcastVars: JList[Broadcast[PythonBroadcast]],
+    accumulator: Accumulator[JList[Array[Byte]]],
+    dataType: DataType) {
+
+  def apply(exprs: Column*): Column = {
+    val udf = PythonUDF(name, command, envVars, pythonIncludes, pythonExec, broadcastVars,
+      accumulator, dataType, exprs.map(_.expr))
+    Column(udf)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala
deleted file mode 100644
index f8171c3be3207..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*    http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.spark.sql.api.java
-
-import java.beans.Introspector
-
-import org.apache.hadoop.conf.Configuration
-
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
-import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
-import org.apache.spark.sql.json.JsonRDD
-import org.apache.spark.sql.{SQLContext, StructType => SStructType}
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GenericRow, Row => ScalaRow}
-import org.apache.spark.sql.parquet.ParquetRelation
-import org.apache.spark.sql.execution.LogicalRDD
-import org.apache.spark.sql.types.util.DataTypeConversions.asScalaDataType
-import org.apache.spark.util.Utils
-
-/**
- * The entry point for executing Spark SQL queries from a Java program.
- */
-class JavaSQLContext(val sqlContext: SQLContext) extends UDFRegistration {
-
-  def this(sparkContext: JavaSparkContext) = this(new SQLContext(sparkContext.sc))
-
-  /**
-   * Executes a SQL query using Spark, returning the result as a SchemaRDD.  The dialect that is
-   * used for SQL parsing can be configured with 'spark.sql.dialect'.
-   *
-   * @group userf
-   */
-  def sql(sqlText: String): JavaSchemaRDD = {
-    if (sqlContext.dialect == "sql") {
-      new JavaSchemaRDD(sqlContext, sqlContext.parseSql(sqlText))
-    } else {
-      sys.error(s"Unsupported SQL dialect: $sqlContext.dialect")
-    }
-  }
-
-  /**
-   * :: Experimental ::
-   * Creates an empty parquet file with the schema of class `beanClass`, which can be registered as
-   * a table. This registered table can be used as the target of future `insertInto` operations.
-   *
-   * {{{
-   *   JavaSQLContext sqlCtx = new JavaSQLContext(...)
-   *
-   *   sqlCtx.createParquetFile(Person.class, "path/to/file.parquet").registerTempTable("people")
-   *   sqlCtx.sql("INSERT INTO people SELECT 'michael', 29")
-   * }}}
-   *
-   * @param beanClass A java bean class object that will be used to determine the schema of the
-   *                  parquet file.
-   * @param path The path where the directory containing parquet metadata should be created.
-   *             Data inserted into this table will also be stored at this location.
-   * @param allowExisting When false, an exception will be thrown if this directory already exists.
-   * @param conf A Hadoop configuration object that can be used to specific options to the parquet
-   *             output format.
-   */
-  @Experimental
-  def createParquetFile(
-      beanClass: Class[_],
-      path: String,
-      allowExisting: Boolean = true,
-      conf: Configuration = new Configuration()): JavaSchemaRDD = {
-    new JavaSchemaRDD(
-      sqlContext,
-      ParquetRelation.createEmpty(path, getSchema(beanClass), allowExisting, conf, sqlContext))
-  }
-
-  /**
-   * Applies a schema to an RDD of Java Beans.
-   */
-  def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): JavaSchemaRDD = {
-    val schema = getSchema(beanClass)
-    val className = beanClass.getName
-    val rowRdd = rdd.rdd.mapPartitions { iter =>
-      // BeanInfo is not serializable so we must rediscover it remotely for each partition.
-      val localBeanInfo = Introspector.getBeanInfo(
-        Class.forName(className, true, Utils.getContextOrSparkClassLoader))
-      val extractors =
-        localBeanInfo.getPropertyDescriptors.filterNot(_.getName == "class").map(_.getReadMethod)
-
-      iter.map { row =>
-        new GenericRow(extractors.map(e => e.invoke(row)).toArray[Any]): ScalaRow
-      }
-    }
-    new JavaSchemaRDD(sqlContext, LogicalRDD(schema, rowRdd)(sqlContext))
-  }
-
-  /**
-   * :: DeveloperApi ::
-   * Creates a JavaSchemaRDD from an RDD containing Rows by applying a schema to this RDD.
-   * It is important to make sure that the structure of every Row of the provided RDD matches the
-   * provided schema. Otherwise, there will be runtime exception.
-   */
-  @DeveloperApi
-  def applySchema(rowRDD: JavaRDD[Row], schema: StructType): JavaSchemaRDD = {
-    val scalaRowRDD = rowRDD.rdd.map(r => r.row)
-    val scalaSchema = asScalaDataType(schema).asInstanceOf[SStructType]
-    val logicalPlan =
-      LogicalRDD(scalaSchema.toAttributes, scalaRowRDD)(sqlContext)
-    new JavaSchemaRDD(sqlContext, logicalPlan)
-  }
-
-  /**
-   * Loads a parquet file, returning the result as a [[JavaSchemaRDD]].
-   */
-  def parquetFile(path: String): JavaSchemaRDD =
-    new JavaSchemaRDD(
-      sqlContext,
-      ParquetRelation(path, Some(sqlContext.sparkContext.hadoopConfiguration), sqlContext))
-
-  /**
-   * Loads a JSON file (one object per line), returning the result as a JavaSchemaRDD.
-   * It goes through the entire dataset once to determine the schema.
-   */
-  def jsonFile(path: String): JavaSchemaRDD =
-    jsonRDD(sqlContext.sparkContext.textFile(path))
-
-  /**
-   * :: Experimental ::
-   * Loads a JSON file (one object per line) and applies the given schema,
-   * returning the result as a JavaSchemaRDD.
-   */
-  @Experimental
-  def jsonFile(path: String, schema: StructType): JavaSchemaRDD =
-    jsonRDD(sqlContext.sparkContext.textFile(path), schema)
-
-  /**
-   * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
-   * JavaSchemaRDD.
-   * It goes through the entire dataset once to determine the schema.
-   */
-  def jsonRDD(json: JavaRDD[String]): JavaSchemaRDD = {
-    val columnNameOfCorruptJsonRecord = sqlContext.columnNameOfCorruptRecord
-    val appliedScalaSchema =
-      JsonRDD.nullTypeToStringType(
-        JsonRDD.inferSchema(json.rdd, 1.0, columnNameOfCorruptJsonRecord))
-    val scalaRowRDD =
-      JsonRDD.jsonStringToRow(json.rdd, appliedScalaSchema, columnNameOfCorruptJsonRecord)
-    val logicalPlan =
-      LogicalRDD(appliedScalaSchema.toAttributes, scalaRowRDD)(sqlContext)
-    new JavaSchemaRDD(sqlContext, logicalPlan)
-  }
-
-  /**
-   * :: Experimental ::
-   * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema,
-   * returning the result as a JavaSchemaRDD.
-   */
-  @Experimental
-  def jsonRDD(json: JavaRDD[String], schema: StructType): JavaSchemaRDD = {
-    val columnNameOfCorruptJsonRecord = sqlContext.columnNameOfCorruptRecord
-    val appliedScalaSchema =
-      Option(asScalaDataType(schema)).getOrElse(
-        JsonRDD.nullTypeToStringType(
-          JsonRDD.inferSchema(
-            json.rdd, 1.0, columnNameOfCorruptJsonRecord))).asInstanceOf[SStructType]
-    val scalaRowRDD = JsonRDD.jsonStringToRow(
-      json.rdd, appliedScalaSchema, columnNameOfCorruptJsonRecord)
-    val logicalPlan =
-      LogicalRDD(appliedScalaSchema.toAttributes, scalaRowRDD)(sqlContext)
-    new JavaSchemaRDD(sqlContext, logicalPlan)
-  }
-
-  /**
-   * Registers the given RDD as a temporary table in the catalog.  Temporary tables exist only
-   * during the lifetime of this instance of SQLContext.
-   */
-  def registerRDDAsTable(rdd: JavaSchemaRDD, tableName: String): Unit = {
-    sqlContext.registerRDDAsTable(rdd.baseSchemaRDD, tableName)
-  }
-
-  /** Returns a Catalyst Schema for the given java bean class. */
-  protected def getSchema(beanClass: Class[_]): Seq[AttributeReference] = {
-    // TODO: All of this could probably be moved to Catalyst as it is mostly not Spark specific.
-    val beanInfo = Introspector.getBeanInfo(beanClass)
-
-    val fields = beanInfo.getPropertyDescriptors.filterNot(_.getName == "class")
-    fields.map { property =>
-      val (dataType, nullable) = property.getPropertyType match {
-        case c: Class[_] if c == classOf[java.lang.String] =>
-          (org.apache.spark.sql.StringType, true)
-        case c: Class[_] if c == java.lang.Short.TYPE =>
-          (org.apache.spark.sql.ShortType, false)
-        case c: Class[_] if c == java.lang.Integer.TYPE =>
-          (org.apache.spark.sql.IntegerType, false)
-        case c: Class[_] if c == java.lang.Long.TYPE =>
-          (org.apache.spark.sql.LongType, false)
-        case c: Class[_] if c == java.lang.Double.TYPE =>
-          (org.apache.spark.sql.DoubleType, false)
-        case c: Class[_] if c == java.lang.Byte.TYPE =>
-          (org.apache.spark.sql.ByteType, false)
-        case c: Class[_] if c == java.lang.Float.TYPE =>
-          (org.apache.spark.sql.FloatType, false)
-        case c: Class[_] if c == java.lang.Boolean.TYPE =>
-          (org.apache.spark.sql.BooleanType, false)
-
-        case c: Class[_] if c == classOf[java.lang.Short] =>
-          (org.apache.spark.sql.ShortType, true)
-        case c: Class[_] if c == classOf[java.lang.Integer] =>
-          (org.apache.spark.sql.IntegerType, true)
-        case c: Class[_] if c == classOf[java.lang.Long] =>
-          (org.apache.spark.sql.LongType, true)
-        case c: Class[_] if c == classOf[java.lang.Double] =>
-          (org.apache.spark.sql.DoubleType, true)
-        case c: Class[_] if c == classOf[java.lang.Byte] =>
-          (org.apache.spark.sql.ByteType, true)
-        case c: Class[_] if c == classOf[java.lang.Float] =>
-          (org.apache.spark.sql.FloatType, true)
-        case c: Class[_] if c == classOf[java.lang.Boolean] =>
-          (org.apache.spark.sql.BooleanType, true)
-      }
-      AttributeReference(property.getName, dataType, nullable)()
-    }
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSchemaRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSchemaRDD.scala
deleted file mode 100644
index e7faba0c7f620..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSchemaRDD.scala
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java
-
-import java.util.{List => JList}
-
-import org.apache.spark.Partitioner
-import org.apache.spark.api.java.{JavaRDDLike, JavaRDD}
-import org.apache.spark.api.java.function.{Function => JFunction}
-import org.apache.spark.sql.types.util.DataTypeConversions
-import org.apache.spark.sql.{SQLContext, SchemaRDD, SchemaRDDLike}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import DataTypeConversions._
-import org.apache.spark.rdd.RDD
-import org.apache.spark.storage.StorageLevel
-
-/**
- * An RDD of [[Row]] objects that is returned as the result of a Spark SQL query.  In addition to
- * standard RDD operations, a JavaSchemaRDD can also be registered as a table in the JavaSQLContext
- * that was used to create. Registering a JavaSchemaRDD allows its contents to be queried in
- * future SQL statement.
- *
- * @groupname schema SchemaRDD Functions
- * @groupprio schema -1
- * @groupname Ungrouped Base RDD Functions
- */
-class JavaSchemaRDD(
-     @transient val sqlContext: SQLContext,
-     @transient val baseLogicalPlan: LogicalPlan)
-  extends JavaRDDLike[Row, JavaRDD[Row]]
-  with SchemaRDDLike {
-
-  private[sql] val baseSchemaRDD = new SchemaRDD(sqlContext, logicalPlan)
-
-  override val classTag = scala.reflect.classTag[Row]
-
-  override def wrapRDD(rdd: RDD[Row]): JavaRDD[Row] = JavaRDD.fromRDD(rdd)
-
-  val rdd = baseSchemaRDD.map(new Row(_))
-
-  override def toString: String = baseSchemaRDD.toString
-
-  /** Returns the schema of this JavaSchemaRDD (represented by a StructType). */
-  def schema: StructType =
-    asJavaDataType(baseSchemaRDD.schema).asInstanceOf[StructType]
-
-  // =======================================================================
-  // Base RDD functions that do NOT change schema
-  // =======================================================================
-
-  // Common RDD functions
-
-  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
-  def cache(): JavaSchemaRDD = {
-    baseSchemaRDD.cache()
-    this
-  }
-
-  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
-  def persist(): JavaSchemaRDD = {
-    baseSchemaRDD.persist()
-    this
-  }
-
-  /**
-   * Set this RDD's storage level to persist its values across operations after the first time
-   * it is computed. This can only be used to assign a new storage level if the RDD does not
-   * have a storage level set yet..
-   */
-  def persist(newLevel: StorageLevel): JavaSchemaRDD = {
-    baseSchemaRDD.persist(newLevel)
-    this
-  }
-
-  /**
-   * Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.
-   *
-   * @param blocking Whether to block until all blocks are deleted.
-   * @return This RDD.
-   */
-  def unpersist(blocking: Boolean = true): JavaSchemaRDD = {
-    baseSchemaRDD.unpersist(blocking)
-    this
-  }
-
-  /** Assign a name to this RDD */
-  def setName(name: String): JavaSchemaRDD = {
-    baseSchemaRDD.setName(name)
-    this
-  }
-
-  // Overridden actions from JavaRDDLike.
-
-  override def collect(): JList[Row] = {
-    import scala.collection.JavaConversions._
-    val arr: java.util.Collection[Row] = baseSchemaRDD.collect().toSeq.map(new Row(_))
-    new java.util.ArrayList(arr)
-  }
-
-  override def count(): Long = baseSchemaRDD.count
-
-  override def take(num: Int): JList[Row] = {
-    import scala.collection.JavaConversions._
-    val arr: java.util.Collection[Row] = baseSchemaRDD.take(num).toSeq.map(new Row(_))
-    new java.util.ArrayList(arr)
-  }
-
-  // Transformations (return a new RDD)
-
-  /**
-   * Return a new RDD that is reduced into `numPartitions` partitions.
-   */
-  def coalesce(numPartitions: Int, shuffle: Boolean = false): JavaSchemaRDD =
-    baseSchemaRDD.coalesce(numPartitions, shuffle).toJavaSchemaRDD
-
-  /**
-   * Return a new RDD containing the distinct elements in this RDD.
-   */
-  def distinct(): JavaSchemaRDD =
-    baseSchemaRDD.distinct().toJavaSchemaRDD
-
-  /**
-   * Return a new RDD containing the distinct elements in this RDD.
-   */
-  def distinct(numPartitions: Int): JavaSchemaRDD =
-    baseSchemaRDD.distinct(numPartitions).toJavaSchemaRDD
-
-  /**
-   * Return a new RDD containing only the elements that satisfy a predicate.
-   */
-  def filter(f: JFunction[Row, java.lang.Boolean]): JavaSchemaRDD =
-    baseSchemaRDD.filter(x => f.call(new Row(x)).booleanValue()).toJavaSchemaRDD
-
-  /**
-   * Return the intersection of this RDD and another one. The output will not contain any
-   * duplicate elements, even if the input RDDs did.
-   *
-   * Note that this method performs a shuffle internally.
-   */
-  def intersection(other: JavaSchemaRDD): JavaSchemaRDD =
-    this.baseSchemaRDD.intersection(other.baseSchemaRDD).toJavaSchemaRDD
-
-  /**
-   * Return the intersection of this RDD and another one. The output will not contain any
-   * duplicate elements, even if the input RDDs did.
-   *
-   * Note that this method performs a shuffle internally.
-   *
-   * @param partitioner Partitioner to use for the resulting RDD
-   */
-  def intersection(other: JavaSchemaRDD, partitioner: Partitioner): JavaSchemaRDD =
-    this.baseSchemaRDD.intersection(other.baseSchemaRDD, partitioner).toJavaSchemaRDD
-
-  /**
-   * Return the intersection of this RDD and another one. The output will not contain any
-   * duplicate elements, even if the input RDDs did.  Performs a hash partition across the cluster
-   *
-   * Note that this method performs a shuffle internally.
-   *
-   * @param numPartitions How many partitions to use in the resulting RDD
-   */
-  def intersection(other: JavaSchemaRDD, numPartitions: Int): JavaSchemaRDD =
-    this.baseSchemaRDD.intersection(other.baseSchemaRDD, numPartitions).toJavaSchemaRDD
-
-  /**
-   * Return a new RDD that has exactly `numPartitions` partitions.
-   *
-   * Can increase or decrease the level of parallelism in this RDD. Internally, this uses
-   * a shuffle to redistribute data.
-   *
-   * If you are decreasing the number of partitions in this RDD, consider using `coalesce`,
-   * which can avoid performing a shuffle.
-   */
-  def repartition(numPartitions: Int): JavaSchemaRDD =
-    baseSchemaRDD.repartition(numPartitions).toJavaSchemaRDD
-
-  /**
-   * Return an RDD with the elements from `this` that are not in `other`.
-   *
-   * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
-   * RDD will be <= us.
-   */
-  def subtract(other: JavaSchemaRDD): JavaSchemaRDD =
-    this.baseSchemaRDD.subtract(other.baseSchemaRDD).toJavaSchemaRDD
-
-  /**
-   * Return an RDD with the elements from `this` that are not in `other`.
-   */
-  def subtract(other: JavaSchemaRDD, numPartitions: Int): JavaSchemaRDD =
-    this.baseSchemaRDD.subtract(other.baseSchemaRDD, numPartitions).toJavaSchemaRDD
-
-  /**
-   * Return an RDD with the elements from `this` that are not in `other`.
-   */
-  def subtract(other: JavaSchemaRDD, p: Partitioner): JavaSchemaRDD =
-    this.baseSchemaRDD.subtract(other.baseSchemaRDD, p).toJavaSchemaRDD
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala
deleted file mode 100644
index df01411f60a05..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java
-
-import scala.annotation.varargs
-import scala.collection.convert.Wrappers.{JListWrapper, JMapWrapper}
-import scala.collection.JavaConversions
-import scala.math.BigDecimal
-
-import org.apache.spark.api.java.JavaUtils.mapAsSerializableJavaMap
-import org.apache.spark.sql.catalyst.expressions.{Row => ScalaRow}
-
-/**
- * A result row from a Spark SQL query.
- */
-class Row(private[spark] val row: ScalaRow) extends Serializable {
-
-  /** Returns the number of columns present in this Row. */
-  def length: Int = row.length
-
-  /** Returns the value of column `i`. */
-  def get(i: Int): Any =
-    Row.toJavaValue(row(i))
-
-  /** Returns true if value at column `i` is NULL. */
-  def isNullAt(i: Int) = get(i) == null
-
-  /**
-   * Returns the value of column `i` as an int.  This function will throw an exception if the value
-   * is at `i` is not an integer, or if it is null.
-   */
-  def getInt(i: Int): Int =
-    row.getInt(i)
-
-  /**
-   * Returns the value of column `i` as a long.  This function will throw an exception if the value
-   * is at `i` is not a long, or if it is null.
-   */
-  def getLong(i: Int): Long =
-    row.getLong(i)
-
-  /**
-   * Returns the value of column `i` as a double.  This function will throw an exception if the
-   * value is at `i` is not a double, or if it is null.
-   */
-  def getDouble(i: Int): Double =
-    row.getDouble(i)
-
-  /**
-   * Returns the value of column `i` as a bool.  This function will throw an exception if the value
-   * is at `i` is not a boolean, or if it is null.
-   */
-  def getBoolean(i: Int): Boolean =
-    row.getBoolean(i)
-
-  /**
-   * Returns the value of column `i` as a short.  This function will throw an exception if the value
-   * is at `i` is not a short, or if it is null.
-   */
-  def getShort(i: Int): Short =
-    row.getShort(i)
-
-  /**
-   * Returns the value of column `i` as a byte.  This function will throw an exception if the value
-   * is at `i` is not a byte, or if it is null.
-   */
-  def getByte(i: Int): Byte =
-    row.getByte(i)
-
-  /**
-   * Returns the value of column `i` as a float.  This function will throw an exception if the value
-   * is at `i` is not a float, or if it is null.
-   */
-  def getFloat(i: Int): Float =
-    row.getFloat(i)
-
-  /**
-   * Returns the value of column `i` as a String.  This function will throw an exception if the
-   * value is at `i` is not a String.
-   */
-  def getString(i: Int): String =
-    row.getString(i)
-
-  def canEqual(other: Any): Boolean = other.isInstanceOf[Row]
-
-  override def equals(other: Any): Boolean = other match {
-    case that: Row =>
-      (that canEqual this) &&
-        row == that.row
-    case _ => false
-  }
-
-  override def hashCode(): Int = row.hashCode()
-}
-
-object Row {
-
-  private def toJavaValue(value: Any): Any = value match {
-    // For values of this ScalaRow, we will do the conversion when
-    // they are actually accessed.
-    case row: ScalaRow => new Row(row)
-    case map: scala.collection.Map[_, _] =>
-      mapAsSerializableJavaMap(
-        map.map {
-          case (key, value) => (toJavaValue(key), toJavaValue(value))
-        }
-      )
-    case seq: scala.collection.Seq[_] =>
-      JavaConversions.seqAsJavaList(seq.map(toJavaValue))
-    case decimal: BigDecimal => decimal.underlying()
-    case other => other
-  }
-
-  // TODO: Consolidate the toScalaValue at here with the scalafy in JsonRDD?
-  private def toScalaValue(value: Any): Any = value match {
-    // Values of this row have been converted to Scala values.
-    case row: Row => row.row
-    case map: java.util.Map[_, _] =>
-      JMapWrapper(map).map {
-        case (key, value) => (toScalaValue(key), toScalaValue(value))
-      }
-    case list: java.util.List[_] =>
-      JListWrapper(list).map(toScalaValue)
-    case decimal: java.math.BigDecimal => BigDecimal(decimal)
-    case other => other
-  }
-
-  /**
-   * Creates a Row with the given values.
-   */
-  @varargs def create(values: Any*): Row = {
-    // Right now, we cannot use @varargs to annotate the constructor of
-    // org.apache.spark.sql.api.java.Row. See https://issues.scala-lang.org/browse/SI-8383.
-    new Row(ScalaRow(values.map(toScalaValue):_*))
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/UDFRegistration.scala
deleted file mode 100644
index 158f26e3d445f..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/java/UDFRegistration.scala
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*    http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.spark.sql.api.java
-
-import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUdf}
-import org.apache.spark.sql.types.util.DataTypeConversions._
-
-/**
- * A collection of functions that allow Java users to register UDFs.  In order to handle functions
- * of varying airities with minimal boilerplate for our users, we generate classes and functions
- * for each airity up to 22.  The code for this generation can be found in comments in this trait.
- */
-private[java] trait UDFRegistration {
-  self: JavaSQLContext =>
-
-  /* The following functions and required interfaces are generated with these code fragments:
-
-   (1 to 22).foreach { i =>
-     val extTypeArgs = (1 to i).map(_ => "_").mkString(", ")
-     val anyTypeArgs = (1 to i).map(_ => "Any").mkString(", ")
-     val anyCast = s".asInstanceOf[UDF$i[$anyTypeArgs, Any]]"
-     val anyParams = (1 to i).map(_ => "_: Any").mkString(", ")
-     println(s"""
-         |def registerFunction(
-         |    name: String, f: UDF$i[$extTypeArgs, _], @transient dataType: DataType) = {
-         |  val scalaType = asScalaDataType(dataType)
-         |  sqlContext.functionRegistry.registerFunction(
-         |    name,
-         |    (e: Seq[Expression]) => ScalaUdf(f$anyCast.call($anyParams), scalaType, e))
-         |}
-       """.stripMargin)
-   }
-
-  import java.io.File
-  import org.apache.spark.sql.catalyst.util.stringToFile
-  val directory = new File("sql/core/src/main/java/org/apache/spark/sql/api/java/")
-  (1 to 22).foreach { i =>
-    val typeArgs = (1 to i).map(i => s"T$i").mkString(", ")
-    val args = (1 to i).map(i => s"T$i t$i").mkString(", ")
-
-    val contents =
-      s"""/*
-         | * Licensed to the Apache Software Foundation (ASF) under one or more
-         | * contributor license agreements.  See the NOTICE file distributed with
-         | * this work for additional information regarding copyright ownership.
-         | * The ASF licenses this file to You under the Apache License, Version 2.0
-         | * (the "License"); you may not use this file except in compliance with
-         | * the License.  You may obtain a copy of the License at
-         | *
-         | *    http://www.apache.org/licenses/LICENSE-2.0
-         | *
-         | * Unless required by applicable law or agreed to in writing, software
-         | * distributed under the License is distributed on an "AS IS" BASIS,
-         | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-         | * See the License for the specific language governing permissions and
-         | * limitations under the License.
-         | */
-         |
-         |package org.apache.spark.sql.api.java;
-         |
-         |import java.io.Serializable;
-         |
-         |// **************************************************
-         |// THIS FILE IS AUTOGENERATED BY CODE IN
-         |// org.apache.spark.sql.api.java.FunctionRegistration
-         |// **************************************************
-         |
-         |/**
-         | * A Spark SQL UDF that has $i arguments.
-         | */
-         |public interface UDF$i<$typeArgs, R> extends Serializable {
-         |  public R call($args) throws Exception;
-         |}
-         |""".stripMargin
-
-      stringToFile(new File(directory, s"UDF$i.java"), contents)
-  }
-
-  */
-
-  // scalastyle:off
-  def registerFunction(name: String, f: UDF1[_, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF1[Any, Any]].call(_: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF2[_, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF2[Any, Any, Any]].call(_: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF3[_, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF3[Any, Any, Any, Any]].call(_: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF4[_, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF4[Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF5[_, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF5[Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF6[_, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF6[Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF7[_, _, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF7[Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF11[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF12[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF13[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF14[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF15[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF16[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF17[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF18[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF19[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF20[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF21[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  def registerFunction(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], dataType: DataType) = {
-    val scalaType = asScalaDataType(dataType)
-    sqlContext.functionRegistry.registerFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUdf(f.asInstanceOf[UDF22[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), scalaType, e))
-  }
-
-  // scalastyle:on
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnAccessor.scala
index 538dd5b734664..91c4c105b14e6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnAccessor.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark.sql.columnar
 
-import java.nio.{ByteOrder, ByteBuffer}
+import java.nio.{ByteBuffer, ByteOrder}
 
-import org.apache.spark.sql.catalyst.types.{BinaryType, NativeType, DataType}
 import org.apache.spark.sql.catalyst.expressions.MutableRow
 import org.apache.spark.sql.columnar.compression.CompressibleColumnAccessor
+import org.apache.spark.sql.types.{BinaryType, DataType, NativeType}
 
 /**
  * An `Iterator` like trait used to extract values from columnar byte buffer. When a value is
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnBuilder.scala
index 300cef15bf8a4..3a4977b836af7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnBuilder.scala
@@ -20,9 +20,9 @@ package org.apache.spark.sql.columnar
 import java.nio.{ByteBuffer, ByteOrder}
 
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.columnar.ColumnBuilder._
 import org.apache.spark.sql.columnar.compression.{AllCompressionSchemes, CompressibleColumnBuilder}
+import org.apache.spark.sql.types._
 
 private[sql] trait ColumnBuilder {
   /**
@@ -79,8 +79,9 @@ private[sql] class BasicColumnBuilder[T <: DataType, JvmType](
 }
 
 private[sql] abstract class ComplexColumnBuilder[T <: DataType, JvmType](
+    columnStats: ColumnStats,
     columnType: ColumnType[T, JvmType])
-  extends BasicColumnBuilder[T, JvmType](new NoopColumnStats, columnType)
+  extends BasicColumnBuilder[T, JvmType](columnStats, columnType)
   with NullableColumnBuilder
 
 private[sql] abstract class NativeColumnBuilder[T <: NativeType](
@@ -91,7 +92,7 @@ private[sql] abstract class NativeColumnBuilder[T <: NativeType](
   with AllCompressionSchemes
   with CompressibleColumnBuilder[T]
 
-private[sql] class BooleanColumnBuilder extends NativeColumnBuilder(new NoopColumnStats, BOOLEAN)
+private[sql] class BooleanColumnBuilder extends NativeColumnBuilder(new BooleanColumnStats, BOOLEAN)
 
 private[sql] class IntColumnBuilder extends NativeColumnBuilder(new IntColumnStats, INT)
 
@@ -112,10 +113,11 @@ private[sql] class DateColumnBuilder extends NativeColumnBuilder(new DateColumnS
 private[sql] class TimestampColumnBuilder
   extends NativeColumnBuilder(new TimestampColumnStats, TIMESTAMP)
 
-private[sql] class BinaryColumnBuilder extends ComplexColumnBuilder(BINARY)
+private[sql] class BinaryColumnBuilder extends ComplexColumnBuilder(new BinaryColumnStats, BINARY)
 
 // TODO (lian) Add support for array, struct and map
-private[sql] class GenericColumnBuilder extends ComplexColumnBuilder(GENERIC)
+private[sql] class GenericColumnBuilder
+  extends ComplexColumnBuilder(new GenericColumnStats, GENERIC)
 
 private[sql] object ColumnBuilder {
   val DEFAULT_INITIAL_BUFFER_SIZE = 1024 * 1024
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala
index b34ab255d084a..cad0667b46435 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala
@@ -17,18 +17,20 @@
 
 package org.apache.spark.sql.columnar
 
-import java.sql.{Date, Timestamp}
+import java.sql.Timestamp
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.{AttributeMap, Attribute, AttributeReference}
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 private[sql] class ColumnStatisticsSchema(a: Attribute) extends Serializable {
-  val upperBound = AttributeReference(a.name + ".upperBound", a.dataType, nullable = false)()
-  val lowerBound = AttributeReference(a.name + ".lowerBound", a.dataType, nullable = false)()
-  val nullCount =  AttributeReference(a.name + ".nullCount", IntegerType, nullable = false)()
+  val upperBound = AttributeReference(a.name + ".upperBound", a.dataType, nullable = true)()
+  val lowerBound = AttributeReference(a.name + ".lowerBound", a.dataType, nullable = true)()
+  val nullCount = AttributeReference(a.name + ".nullCount", IntegerType, nullable = false)()
+  val count = AttributeReference(a.name + ".count", IntegerType, nullable = false)()
+  val sizeInBytes = AttributeReference(a.name + ".sizeInBytes", LongType, nullable = false)()
 
-  val schema = Seq(lowerBound, upperBound, nullCount)
+  val schema = Seq(lowerBound, upperBound, nullCount, count, sizeInBytes)
 }
 
 private[sql] class PartitionStatistics(tableSchema: Seq[Attribute]) extends Serializable {
@@ -45,10 +47,21 @@ private[sql] class PartitionStatistics(tableSchema: Seq[Attribute]) extends Seri
  * brings significant performance penalty.
  */
 private[sql] sealed trait ColumnStats extends Serializable {
+  protected var count = 0
+  protected var nullCount = 0
+  protected var sizeInBytes = 0L
+
   /**
    * Gathers statistics information from `row(ordinal)`.
    */
-  def gatherStats(row: Row, ordinal: Int): Unit
+  def gatherStats(row: Row, ordinal: Int): Unit = {
+    if (row.isNullAt(ordinal)) {
+      nullCount += 1
+      // 4 bytes for null position
+      sizeInBytes += 4
+    }
+    count += 1
+  }
 
   /**
    * Column statistics represented as a single row, currently including closed lower bound, closed
@@ -57,171 +70,188 @@ private[sql] sealed trait ColumnStats extends Serializable {
   def collectedStatistics: Row
 }
 
+/**
+ * A no-op ColumnStats only used for testing purposes.
+ */
 private[sql] class NoopColumnStats extends ColumnStats {
+  override def gatherStats(row: Row, ordinal: Int): Unit = super.gatherStats(row, ordinal)
+
+  def collectedStatistics = Row(null, null, nullCount, count, 0L)
+}
 
-  override def gatherStats(row: Row, ordinal: Int): Unit = {}
+private[sql] class BooleanColumnStats extends ColumnStats {
+  protected var upper = false
+  protected var lower = true
 
-  override def collectedStatistics = Row()
+  override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
+    if (!row.isNullAt(ordinal)) {
+      val value = row.getBoolean(ordinal)
+      if (value > upper) upper = value
+      if (value < lower) lower = value
+      sizeInBytes += BOOLEAN.defaultSize
+    }
+  }
+
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
 private[sql] class ByteColumnStats extends ColumnStats {
-  var upper = Byte.MinValue
-  var lower = Byte.MaxValue
-  var nullCount = 0
+  protected var upper = Byte.MinValue
+  protected var lower = Byte.MaxValue
 
   override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
     if (!row.isNullAt(ordinal)) {
       val value = row.getByte(ordinal)
       if (value > upper) upper = value
       if (value < lower) lower = value
-    } else {
-      nullCount += 1
+      sizeInBytes += BYTE.defaultSize
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount)
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
 private[sql] class ShortColumnStats extends ColumnStats {
-  var upper = Short.MinValue
-  var lower = Short.MaxValue
-  var nullCount = 0
+  protected var upper = Short.MinValue
+  protected var lower = Short.MaxValue
 
   override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
     if (!row.isNullAt(ordinal)) {
       val value = row.getShort(ordinal)
       if (value > upper) upper = value
       if (value < lower) lower = value
-    } else {
-      nullCount += 1
+      sizeInBytes += SHORT.defaultSize
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount)
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
 private[sql] class LongColumnStats extends ColumnStats {
-  var upper = Long.MinValue
-  var lower = Long.MaxValue
-  var nullCount = 0
+  protected var upper = Long.MinValue
+  protected var lower = Long.MaxValue
 
   override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
     if (!row.isNullAt(ordinal)) {
       val value = row.getLong(ordinal)
       if (value > upper) upper = value
       if (value < lower) lower = value
-    } else {
-      nullCount += 1
+      sizeInBytes += LONG.defaultSize
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount)
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
 private[sql] class DoubleColumnStats extends ColumnStats {
-  var upper = Double.MinValue
-  var lower = Double.MaxValue
-  var nullCount = 0
+  protected var upper = Double.MinValue
+  protected var lower = Double.MaxValue
 
   override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
     if (!row.isNullAt(ordinal)) {
       val value = row.getDouble(ordinal)
       if (value > upper) upper = value
       if (value < lower) lower = value
-    } else {
-      nullCount += 1
+      sizeInBytes += DOUBLE.defaultSize
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount)
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
 private[sql] class FloatColumnStats extends ColumnStats {
-  var upper = Float.MinValue
-  var lower = Float.MaxValue
-  var nullCount = 0
+  protected var upper = Float.MinValue
+  protected var lower = Float.MaxValue
 
   override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
     if (!row.isNullAt(ordinal)) {
       val value = row.getFloat(ordinal)
       if (value > upper) upper = value
       if (value < lower) lower = value
-    } else {
-      nullCount += 1
+      sizeInBytes += FLOAT.defaultSize
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount)
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
 private[sql] class IntColumnStats extends ColumnStats {
-  var upper = Int.MinValue
-  var lower = Int.MaxValue
-  var nullCount = 0
+  protected var upper = Int.MinValue
+  protected var lower = Int.MaxValue
 
   override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
     if (!row.isNullAt(ordinal)) {
       val value = row.getInt(ordinal)
       if (value > upper) upper = value
       if (value < lower) lower = value
-    } else {
-      nullCount += 1
+      sizeInBytes += INT.defaultSize
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount)
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
 private[sql] class StringColumnStats extends ColumnStats {
-  var upper: String = null
-  var lower: String = null
-  var nullCount = 0
+  protected var upper: String = null
+  protected var lower: String = null
 
   override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
     if (!row.isNullAt(ordinal)) {
       val value = row.getString(ordinal)
       if (upper == null || value.compareTo(upper) > 0) upper = value
       if (lower == null || value.compareTo(lower) < 0) lower = value
-    } else {
-      nullCount += 1
+      sizeInBytes += STRING.actualSize(row, ordinal)
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount)
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
-private[sql] class DateColumnStats extends ColumnStats {
-  var upper: Date = null
-  var lower: Date = null
-  var nullCount = 0
+private[sql] class DateColumnStats extends IntColumnStats
+
+private[sql] class TimestampColumnStats extends ColumnStats {
+  protected var upper: Timestamp = null
+  protected var lower: Timestamp = null
 
-  override def gatherStats(row: Row, ordinal: Int) {
+  override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
     if (!row.isNullAt(ordinal)) {
-      val value = row(ordinal).asInstanceOf[Date]
+      val value = row(ordinal).asInstanceOf[Timestamp]
       if (upper == null || value.compareTo(upper) > 0) upper = value
       if (lower == null || value.compareTo(lower) < 0) lower = value
-    } else {
-      nullCount += 1
+      sizeInBytes += TIMESTAMP.defaultSize
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount)
+  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
-private[sql] class TimestampColumnStats extends ColumnStats {
-  var upper: Timestamp = null
-  var lower: Timestamp = null
-  var nullCount = 0
+private[sql] class BinaryColumnStats extends ColumnStats {
+  override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
+    if (!row.isNullAt(ordinal)) {
+      sizeInBytes += BINARY.actualSize(row, ordinal)
+    }
+  }
+
+  def collectedStatistics = Row(null, null, nullCount, count, sizeInBytes)
+}
 
+private[sql] class GenericColumnStats extends ColumnStats {
   override def gatherStats(row: Row, ordinal: Int): Unit = {
+    super.gatherStats(row, ordinal)
     if (!row.isNullAt(ordinal)) {
-      val value = row(ordinal).asInstanceOf[Timestamp]
-      if (upper == null || value.compareTo(upper) > 0) upper = value
-      if (lower == null || value.compareTo(lower) < 0) lower = value
-    } else {
-      nullCount += 1
+      sizeInBytes += GENERIC.actualSize(row, ordinal)
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount)
+  def collectedStatistics = Row(null, null, nullCount, count, sizeInBytes)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala
index ab66c85c4f242..db5bc0de363c7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala
@@ -24,8 +24,8 @@ import scala.reflect.runtime.universe.TypeTag
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.MutableRow
-import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.execution.SparkSqlSerializer
+import org.apache.spark.sql.types._
 
 /**
  * An abstract class that represents type of a column. Used to append/extract Java objects into/from
@@ -335,21 +335,20 @@ private[sql] object STRING extends NativeColumnType(StringType, 7, 8) {
   }
 }
 
-private[sql] object DATE extends NativeColumnType(DateType, 8, 8) {
+private[sql] object DATE extends NativeColumnType(DateType, 8, 4) {
   override def extract(buffer: ByteBuffer) = {
-    val date = new Date(buffer.getLong())
-    date
+    buffer.getInt
   }
 
-  override def append(v: Date, buffer: ByteBuffer): Unit = {
-    buffer.putLong(v.getTime)
+  override def append(v: Int, buffer: ByteBuffer): Unit = {
+    buffer.putInt(v)
   }
 
   override def getField(row: Row, ordinal: Int) = {
-    row(ordinal).asInstanceOf[Date]
+    row(ordinal).asInstanceOf[Int]
   }
 
-  override def setField(row: MutableRow, ordinal: Int, value: Date): Unit = {
+  def setField(row: MutableRow, ordinal: Int, value: Int): Unit = {
     row(ordinal) = value
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
index 22ab0e2613f21..11d5943fb427f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
@@ -19,13 +19,14 @@ package org.apache.spark.sql.columnar
 
 import java.nio.ByteBuffer
 
-import org.apache.spark.SparkContext._
+import scala.collection.mutable.ArrayBuffer
+
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Statistics}
 import org.apache.spark.sql.execution.{LeafNode, SparkPlan}
 import org.apache.spark.storage.StorageLevel
 
@@ -34,8 +35,9 @@ private[sql] object InMemoryRelation {
       useCompression: Boolean,
       batchSize: Int,
       storageLevel: StorageLevel,
-      child: SparkPlan): InMemoryRelation =
-    new InMemoryRelation(child.output, useCompression, batchSize, storageLevel, child)()
+      child: SparkPlan,
+      tableName: Option[String]): InMemoryRelation =
+    new InMemoryRelation(child.output, useCompression, batchSize, storageLevel, child, tableName)()
 }
 
 private[sql] case class CachedBatch(buffers: Array[Array[Byte]], stats: Row)
@@ -45,15 +47,52 @@ private[sql] case class InMemoryRelation(
     useCompression: Boolean,
     batchSize: Int,
     storageLevel: StorageLevel,
-    child: SparkPlan)
-    (private var _cachedColumnBuffers: RDD[CachedBatch] = null)
+    child: SparkPlan,
+    tableName: Option[String])(
+    private var _cachedColumnBuffers: RDD[CachedBatch] = null,
+    private var _statistics: Statistics = null)
   extends LogicalPlan with MultiInstanceRelation {
 
-  override lazy val statistics =
-    Statistics(sizeInBytes = child.sqlContext.defaultSizeInBytes)
+  private val batchStats =
+    child.sqlContext.sparkContext.accumulableCollection(ArrayBuffer.empty[Row])
 
   val partitionStatistics = new PartitionStatistics(output)
 
+  private def computeSizeInBytes = {
+    val sizeOfRow: Expression =
+      BindReferences.bindReference(
+        output.map(a => partitionStatistics.forAttribute(a).sizeInBytes).reduce(Add),
+        partitionStatistics.schema)
+
+    batchStats.value.map(row => sizeOfRow.eval(row).asInstanceOf[Long]).sum
+  }
+
+  // Statistics propagation contracts:
+  // 1. Non-null `_statistics` must reflect the actual statistics of the underlying data
+  // 2. Only propagate statistics when `_statistics` is non-null
+  private def statisticsToBePropagated = if (_statistics == null) {
+    val updatedStats = statistics
+    if (_statistics == null) null else updatedStats
+  } else {
+    _statistics
+  }
+
+  override def statistics = if (_statistics == null) {
+    if (batchStats.value.isEmpty) {
+      // Underlying columnar RDD hasn't been materialized, no useful statistics information
+      // available, return the default statistics.
+      Statistics(sizeInBytes = child.sqlContext.conf.defaultSizeInBytes)
+    } else {
+      // Underlying columnar RDD has been materialized, required information has also been collected
+      // via the `batchStats` accumulator, compute the final statistics, and update `_statistics`.
+      _statistics = Statistics(sizeInBytes = computeSizeInBytes)
+      _statistics
+    }
+  } else {
+    // Pre-computed statistics
+    _statistics
+  }
+
   // If the cached column buffers were not passed in, we calculate them in the constructor.
   // As in Spark, the actual work of caching is lazy.
   if (_cachedColumnBuffers == null) {
@@ -88,9 +127,9 @@ private[sql] case class InMemoryRelation(
             rowCount += 1
           }
 
-          val stats = Row.fromSeq(
-            columnBuilders.map(_.columnStats.collectedStatistics).foldLeft(Seq.empty[Any])(_ ++ _))
+          val stats = Row.merge(columnBuilders.map(_.columnStats.collectedStatistics) : _*)
 
+          batchStats += stats
           CachedBatch(columnBuilders.map(_.build().array()), stats)
         }
 
@@ -98,13 +137,14 @@ private[sql] case class InMemoryRelation(
       }
     }.persist(storageLevel)
 
-    cached.setName(child.toString)
+    cached.setName(tableName.map(n => s"In-memory table $n").getOrElse(child.toString))
     _cachedColumnBuffers = cached
   }
 
   def withOutput(newOutput: Seq[Attribute]): InMemoryRelation = {
     InMemoryRelation(
-      newOutput, useCompression, batchSize, storageLevel, child)(_cachedColumnBuffers)
+      newOutput, useCompression, batchSize, storageLevel, child, tableName)(
+      _cachedColumnBuffers, statisticsToBePropagated)
   }
 
   override def children = Seq.empty
@@ -115,11 +155,16 @@ private[sql] case class InMemoryRelation(
       useCompression,
       batchSize,
       storageLevel,
-      child)(
-      _cachedColumnBuffers).asInstanceOf[this.type]
+      child,
+      tableName)(
+      _cachedColumnBuffers,
+      statisticsToBePropagated).asInstanceOf[this.type]
   }
 
   def cachedColumnBuffers = _cachedColumnBuffers
+
+  override protected def otherCopyArgs: Seq[AnyRef] =
+    Seq(_cachedColumnBuffers, statisticsToBePropagated)
 }
 
 private[sql] case class InMemoryColumnarTableScan(
@@ -128,60 +173,40 @@ private[sql] case class InMemoryColumnarTableScan(
     relation: InMemoryRelation)
   extends LeafNode {
 
-  @transient override val sqlContext = relation.child.sqlContext
-
   override def output: Seq[Attribute] = attributes
 
+  private def statsFor(a: Attribute) = relation.partitionStatistics.forAttribute(a)
+
   // Returned filter predicate should return false iff it is impossible for the input expression
   // to evaluate to `true' based on statistics collected about this partition batch.
   val buildFilter: PartialFunction[Expression, Expression] = {
     case And(lhs: Expression, rhs: Expression)
-      if buildFilter.isDefinedAt(lhs) && buildFilter.isDefinedAt(rhs) =>
-      buildFilter(lhs) && buildFilter(rhs)
+      if buildFilter.isDefinedAt(lhs) || buildFilter.isDefinedAt(rhs) =>
+      (buildFilter.lift(lhs) ++ buildFilter.lift(rhs)).reduce(_ && _)
 
     case Or(lhs: Expression, rhs: Expression)
       if buildFilter.isDefinedAt(lhs) && buildFilter.isDefinedAt(rhs) =>
       buildFilter(lhs) || buildFilter(rhs)
 
     case EqualTo(a: AttributeReference, l: Literal) =>
-      val aStats = relation.partitionStatistics.forAttribute(a)
-      aStats.lowerBound <= l && l <= aStats.upperBound
-
+      statsFor(a).lowerBound <= l && l <= statsFor(a).upperBound
     case EqualTo(l: Literal, a: AttributeReference) =>
-      val aStats = relation.partitionStatistics.forAttribute(a)
-      aStats.lowerBound <= l && l <= aStats.upperBound
-
-    case LessThan(a: AttributeReference, l: Literal) =>
-      val aStats = relation.partitionStatistics.forAttribute(a)
-      aStats.lowerBound < l
-
-    case LessThan(l: Literal, a: AttributeReference) =>
-      val aStats = relation.partitionStatistics.forAttribute(a)
-      l < aStats.upperBound
-
-    case LessThanOrEqual(a: AttributeReference, l: Literal) =>
-      val aStats = relation.partitionStatistics.forAttribute(a)
-      aStats.lowerBound <= l
+      statsFor(a).lowerBound <= l && l <= statsFor(a).upperBound
 
-    case LessThanOrEqual(l: Literal, a: AttributeReference) =>
-      val aStats = relation.partitionStatistics.forAttribute(a)
-      l <= aStats.upperBound
+    case LessThan(a: AttributeReference, l: Literal) => statsFor(a).lowerBound < l
+    case LessThan(l: Literal, a: AttributeReference) => l < statsFor(a).upperBound
 
-    case GreaterThan(a: AttributeReference, l: Literal) =>
-      val aStats = relation.partitionStatistics.forAttribute(a)
-      l < aStats.upperBound
+    case LessThanOrEqual(a: AttributeReference, l: Literal) => statsFor(a).lowerBound <= l
+    case LessThanOrEqual(l: Literal, a: AttributeReference) => l <= statsFor(a).upperBound
 
-    case GreaterThan(l: Literal, a: AttributeReference) =>
-      val aStats = relation.partitionStatistics.forAttribute(a)
-      aStats.lowerBound < l
+    case GreaterThan(a: AttributeReference, l: Literal) => l < statsFor(a).upperBound
+    case GreaterThan(l: Literal, a: AttributeReference) => statsFor(a).lowerBound < l
 
-    case GreaterThanOrEqual(a: AttributeReference, l: Literal) =>
-      val aStats = relation.partitionStatistics.forAttribute(a)
-      l <= aStats.upperBound
+    case GreaterThanOrEqual(a: AttributeReference, l: Literal) => l <= statsFor(a).upperBound
+    case GreaterThanOrEqual(l: Literal, a: AttributeReference) => statsFor(a).lowerBound <= l
 
-    case GreaterThanOrEqual(l: Literal, a: AttributeReference) =>
-      val aStats = relation.partitionStatistics.forAttribute(a)
-      aStats.lowerBound <= l
+    case IsNull(a: Attribute)    => statsFor(a).nullCount > 0
+    case IsNotNull(a: Attribute) => statsFor(a).count - statsFor(a).nullCount > 0
   }
 
   val partitionFilters = {
@@ -206,7 +231,7 @@ private[sql] case class InMemoryColumnarTableScan(
   val readPartitions = sparkContext.accumulator(0)
   val readBatches = sparkContext.accumulator(0)
 
-  private val inMemoryPartitionPruningEnabled = sqlContext.inMemoryPartitionPruning
+  private val inMemoryPartitionPruningEnabled = sqlContext.conf.inMemoryPartitionPruning
 
   override def execute() = {
     readPartitions.setValue(0)
@@ -244,9 +269,10 @@ private[sql] case class InMemoryColumnarTableScan(
 
           // Extract rows via column accessors
           new Iterator[Row] {
+            private[this] val rowLen = nextRow.length
             override def next() = {
               var i = 0
-              while (i < nextRow.length) {
+              while (i < rowLen) {
                 columnAccessors(i).extractTo(nextRow, i)
                 i += 1
               }
@@ -270,7 +296,7 @@ private[sql] case class InMemoryColumnarTableScan(
           cachedBatchIterator.filter { cachedBatch =>
             if (!partitionFilter(cachedBatch.stats)) {
               def statsString = relation.partitionStatistics.schema
-                .zip(cachedBatch.stats)
+                .zip(cachedBatch.stats.toSeq)
                 .map { case (a, s) => s"${a.name}: $s" }
                 .mkString(", ")
               logInfo(s"Skipping partition based on stats $statsString")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnAccessor.scala
index 27ac5f4dbdbbc..7dff9deac8dc0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnAccessor.scala
@@ -18,8 +18,8 @@
 package org.apache.spark.sql.columnar.compression
 
 import org.apache.spark.sql.catalyst.expressions.MutableRow
-import org.apache.spark.sql.catalyst.types.NativeType
 import org.apache.spark.sql.columnar.{ColumnAccessor, NativeColumnAccessor}
+import org.apache.spark.sql.types.NativeType
 
 private[sql] trait CompressibleColumnAccessor[T <: NativeType] extends ColumnAccessor {
   this: NativeColumnAccessor[T] =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnBuilder.scala
index 628d9cec41d6b..aead768ecdf0a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnBuilder.scala
@@ -21,8 +21,8 @@ import java.nio.{ByteBuffer, ByteOrder}
 
 import org.apache.spark.Logging
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.types.NativeType
 import org.apache.spark.sql.columnar.{ColumnBuilder, NativeColumnBuilder}
+import org.apache.spark.sql.types.NativeType
 
 /**
  * A stackable trait that builds optionally compressed byte buffer for a column.  Memory layout of
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressionScheme.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressionScheme.scala
index acb06cb5376b4..879d29bcfa6f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressionScheme.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressionScheme.scala
@@ -21,8 +21,8 @@ import java.nio.{ByteBuffer, ByteOrder}
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.MutableRow
-import org.apache.spark.sql.catalyst.types.NativeType
 import org.apache.spark.sql.columnar.{ColumnType, NativeColumnType}
+import org.apache.spark.sql.types.NativeType
 
 private[sql] trait Encoder[T <: NativeType] {
   def gatherCompressibilityStats(row: Row, ordinal: Int): Unit = {}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala
index 29edcf17242c5..68a5b1de7691b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala
@@ -25,10 +25,11 @@ import scala.reflect.runtime.universe.runtimeMirror
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.{MutableRow, SpecificMutableRow}
-import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.columnar._
+import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
+
 private[sql] case object PassThrough extends CompressionScheme {
   override val typeId = 0
 
@@ -126,7 +127,7 @@ private[sql] case object RunLengthEncoding extends CompressionScheme {
         while (from.hasRemaining) {
           columnType.extract(from, value, 0)
 
-          if (value.head == currentValue.head) {
+          if (value(0) == currentValue(0)) {
             currentRun += 1
           } else {
             // Writes current run
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala
old mode 100644
new mode 100755
index be9f155253d77..ad44a01d0e164
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala
@@ -56,10 +56,6 @@ case class Aggregate(
       }
     }
 
-  // HACK: Generators don't correctly preserve their output through serializations so we grab
-  // out child's output attributes statically here.
-  private[this] val childOutput = child.output
-
   override def output = aggregateExpressions.map(_.toAttribute)
 
   /**
@@ -81,7 +77,7 @@ case class Aggregate(
       case a: AggregateExpression =>
         ComputedAggregate(
           a,
-          BindReferences.bindReference(a, childOutput),
+          BindReferences.bindReference(a, child.output),
           AttributeReference(s"aggResult:$a", a.dataType, a.nullable)())
     }
   }.toArray
@@ -150,7 +146,7 @@ case class Aggregate(
     } else {
       child.execute().mapPartitions { iter =>
         val hashTable = new HashMap[Row, Array[AggregateFunction]]
-        val groupingProjection = new InterpretedMutableProjection(groupingExpressions, childOutput)
+        val groupingProjection = new InterpretedMutableProjection(groupingExpressions, child.output)
 
         var currentRow: Row = null
         while (iter.hasNext) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
index 927f40063e47e..7c0b72aab448e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
@@ -41,14 +41,24 @@ case class Exchange(newPartitioning: Partitioning, child: SparkPlan) extends Una
   /** We must copy rows when sort based shuffle is on */
   protected def sortBasedShuffleOn = SparkEnv.get.shuffleManager.isInstanceOf[SortShuffleManager]
 
+  private val bypassMergeThreshold =
+    child.sqlContext.sparkContext.conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200)
+
   override def execute() = attachTree(this , "execute") {
     newPartitioning match {
       case HashPartitioning(expressions, numPartitions) =>
         // TODO: Eliminate redundant expressions in grouping key and value.
-        val rdd = if (sortBasedShuffleOn) {
+        // This is a workaround for SPARK-4479. When:
+        //  1. sort based shuffle is on, and
+        //  2. the partition number is under the merge threshold, and
+        //  3. no ordering is required
+        // we can avoid the defensive copies to improve performance. In the long run, we probably
+        // want to include information in shuffle dependencies to indicate whether elements in the
+        // source RDD should be copied.
+        val rdd = if (sortBasedShuffleOn && numPartitions > bypassMergeThreshold) {
           child.execute().mapPartitions { iter =>
-            val hashExpressions = newProjection(expressions, child.output)
-            iter.map(r => (hashExpressions(r), r.copy()))
+            val hashExpressions = newMutableProjection(expressions, child.output)()
+            iter.map(r => (hashExpressions(r).copy(), r.copy()))
           }
         } else {
           child.execute().mapPartitions { iter =>
@@ -82,6 +92,10 @@ case class Exchange(newPartitioning: Partitioning, child: SparkPlan) extends Una
         shuffled.map(_._1)
 
       case SinglePartition =>
+        // SPARK-4479: Can't turn off defensive copy as what we do for `HashPartitioning`, since
+        // operators like `TakeOrdered` may require an ordering within the partition, and currently
+        // `SinglePartition` doesn't include ordering information.
+        // TODO Add `SingleOrderedPartition` for operators like `TakeOrdered`
         val rdd = if (sortBasedShuffleOn) {
           child.execute().mapPartitions { iter => iter.map(r => (null, r.copy())) }
         } else {
@@ -109,7 +123,7 @@ case class Exchange(newPartitioning: Partitioning, child: SparkPlan) extends Una
  */
 private[sql] case class AddExchange(sqlContext: SQLContext) extends Rule[SparkPlan] {
   // TODO: Determine the number of partitions.
-  def numPartitions = sqlContext.numShufflePartitions
+  def numPartitions = sqlContext.conf.numShufflePartitions
 
   def apply(plan: SparkPlan): SparkPlan = plan.transformUp {
     case operator: SparkPlan =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
index 2ddf513b6fc98..248dc1512b4d3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
@@ -17,34 +17,33 @@
 
 package org.apache.spark.sql.execution
 
-import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-
-import scala.reflect.runtime.universe.TypeTag
-
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{SQLContext, Row}
+import org.apache.spark.sql.{Row, SQLContext}
 import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericMutableRow}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Statistics}
+import org.apache.spark.sql.types.StructType
 
 /**
  * :: DeveloperApi ::
  */
 @DeveloperApi
 object RDDConversions {
-  def productToRowRdd[A <: Product](data: RDD[A]): RDD[Row] = {
+  def productToRowRdd[A <: Product](data: RDD[A], schema: StructType): RDD[Row] = {
     data.mapPartitions { iterator =>
       if (iterator.isEmpty) {
         Iterator.empty
       } else {
         val bufferedIterator = iterator.buffered
         val mutableRow = new GenericMutableRow(bufferedIterator.head.productArity)
-
+        val schemaFields = schema.fields.toArray
         bufferedIterator.map { r =>
           var i = 0
           while (i < mutableRow.length) {
-            mutableRow(i) = ScalaReflection.convertToCatalyst(r.productElement(i))
+            mutableRow(i) =
+              ScalaReflection.convertToCatalyst(r.productElement(i), schemaFields(i).dataType)
             i += 1
           }
 
@@ -53,20 +52,15 @@ object RDDConversions {
       }
     }
   }
-
-  /*
-  def toLogicalPlan[A <: Product : TypeTag](productRdd: RDD[A]): LogicalPlan = {
-    LogicalRDD(ScalaReflection.attributesFor[A], productToRowRdd(productRdd))
-  }
-  */
 }
 
+/** Logical plan node for scanning data from an RDD. */
 case class LogicalRDD(output: Seq[Attribute], rdd: RDD[Row])(sqlContext: SQLContext)
   extends LogicalPlan with MultiInstanceRelation {
 
-  def children = Nil
+  override def children = Nil
 
-  def newInstance() =
+  override def newInstance() =
     LogicalRDD(output.map(_.newInstance()), rdd)(sqlContext).asInstanceOf[this.type]
 
   override def sameResult(plan: LogicalPlan) = plan match {
@@ -77,43 +71,32 @@ case class LogicalRDD(output: Seq[Attribute], rdd: RDD[Row])(sqlContext: SQLCont
   @transient override lazy val statistics = Statistics(
     // TODO: Instead of returning a default value here, find a way to return a meaningful size
     // estimate for RDDs. See PR 1238 for more discussions.
-    sizeInBytes = BigInt(sqlContext.defaultSizeInBytes)
+    sizeInBytes = BigInt(sqlContext.conf.defaultSizeInBytes)
   )
 }
 
+/** Physical plan node for scanning data from an RDD. */
 case class PhysicalRDD(output: Seq[Attribute], rdd: RDD[Row]) extends LeafNode {
   override def execute() = rdd
 }
 
-@deprecated("Use LogicalRDD", "1.2.0")
-case class ExistingRdd(output: Seq[Attribute], rdd: RDD[Row]) extends LeafNode {
-  override def execute() = rdd
-}
-
-@deprecated("Use LogicalRDD", "1.2.0")
-case class SparkLogicalPlan(alreadyPlanned: SparkPlan)(@transient sqlContext: SQLContext)
-  extends LogicalPlan with MultiInstanceRelation {
+/** Logical plan node for scanning data from a local collection. */
+case class LogicalLocalTable(output: Seq[Attribute], rows: Seq[Row])(sqlContext: SQLContext)
+   extends LogicalPlan with MultiInstanceRelation {
 
-  def output = alreadyPlanned.output
   override def children = Nil
 
-  override final def newInstance(): this.type = {
-    SparkLogicalPlan(
-      alreadyPlanned match {
-        case ExistingRdd(output, rdd) => ExistingRdd(output.map(_.newInstance), rdd)
-        case _ => sys.error("Multiple instance of the same relation detected.")
-      })(sqlContext).asInstanceOf[this.type]
-  }
+  override def newInstance() =
+    LogicalLocalTable(output.map(_.newInstance()), rows)(sqlContext).asInstanceOf[this.type]
 
   override def sameResult(plan: LogicalPlan) = plan match {
-    case SparkLogicalPlan(ExistingRdd(_, rdd)) =>
-      rdd.id == alreadyPlanned.asInstanceOf[ExistingRdd].rdd.id
+    case LogicalRDD(_, otherRDD) => rows == rows
     case _ => false
   }
 
   @transient override lazy val statistics = Statistics(
-    // TODO: Instead of returning a default value here, find a way to return a meaningful size
-    // estimate for RDDs. See PR 1238 for more discussions.
-    sizeInBytes = BigInt(sqlContext.defaultSizeInBytes)
+    // TODO: Improve the statistics estimation.
+    // This is made small enough so it can be broadcasted.
+    sizeInBytes = sqlContext.conf.autoBroadcastJoinThreshold - 1
   )
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala
new file mode 100644
index 0000000000000..95172420608f9
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.catalyst.errors._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.physical.{UnknownPartitioning, Partitioning}
+
+/**
+ * Apply the all of the GroupExpressions to every input row, hence we will get
+ * multiple output rows for a input row.
+ * @param projections The group of expressions, all of the group expressions should
+ *                    output the same schema specified bye the parameter `output`
+ * @param output      The output Schema
+ * @param child       Child operator
+ */
+@DeveloperApi
+case class Expand(
+    projections: Seq[GroupExpression],
+    output: Seq[Attribute],
+    child: SparkPlan)
+  extends UnaryNode {
+
+  // The GroupExpressions can output data with arbitrary partitioning, so set it
+  // as UNKNOWN partitioning
+  override def outputPartitioning: Partitioning = UnknownPartitioning(0)
+
+  override def execute() = attachTree(this, "execute") {
+    child.execute().mapPartitions { iter =>
+      // TODO Move out projection objects creation and transfer to
+      // workers via closure. However we can't assume the Projection
+      // is serializable because of the code gen, so we have to
+      // create the projections within each of the partition processing.
+      val groups = projections.map(ee => newProjection(ee.children, child.output)).toArray
+
+      new Iterator[Row] {
+        private[this] var result: Row = _
+        private[this] var idx = -1  // -1 means the initial state
+        private[this] var input: Row = _
+
+        override final def hasNext = (-1 < idx && idx < groups.length) || iter.hasNext
+
+        override final def next(): Row = {
+          if (idx <= 0) {
+            // in the initial (-1) or beginning(0) of a new input row, fetch the next input tuple
+            input = iter.next()
+            idx = 0
+          }
+
+          result = groups(idx)(input)
+          idx += 1
+
+          if (idx == groups.length && iter.hasNext) {
+            idx = 0
+          }
+
+          result
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala
index b3edd5020fa8c..4abe26fe4afc6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala
@@ -18,11 +18,10 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.trees._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical._
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 case class AggregateEvaluation(
     schema: Seq[Attribute],
@@ -70,33 +69,82 @@ case class GeneratedAggregate(
 
     val computeFunctions = aggregatesToCompute.map {
       case c @ Count(expr) =>
+        // If we're evaluating UnscaledValue(x), we can do Count on x directly, since its
+        // UnscaledValue will be null if and only if x is null; helps with Average on decimals
+        val toCount = expr match {
+          case UnscaledValue(e) => e
+          case _ => expr
+        }
         val currentCount = AttributeReference("currentCount", LongType, nullable = false)()
         val initialValue = Literal(0L)
-        val updateFunction = If(IsNotNull(expr), Add(currentCount, Literal(1L)), currentCount)
+        val updateFunction = If(IsNotNull(toCount), Add(currentCount, Literal(1L)), currentCount)
         val result = currentCount
 
         AggregateEvaluation(currentCount :: Nil, initialValue :: Nil, updateFunction :: Nil, result)
 
-      case Sum(expr) =>
-        val currentSum = AttributeReference("currentSum", expr.dataType, nullable = false)()
-        val initialValue = Cast(Literal(0L), expr.dataType)
+      case s @ Sum(expr) =>
+        val calcType =
+          expr.dataType match {
+            case DecimalType.Fixed(_, _) =>
+              DecimalType.Unlimited
+            case _ =>
+              expr.dataType
+          }
+
+        val currentSum = AttributeReference("currentSum", calcType, nullable = true)()
+        val initialValue = Literal(null, calcType)
 
         // Coalasce avoids double calculation...
         // but really, common sub expression elimination would be better....
-        val updateFunction = Coalesce(Add(expr, currentSum) :: currentSum :: Nil)
-        val result = currentSum
+        val zero = Cast(Literal(0), calcType)
+        val updateFunction = Coalesce(
+          Add(Coalesce(currentSum :: zero :: Nil), Cast(expr, calcType)) :: currentSum :: Nil)
+        val result =
+          expr.dataType match {
+            case DecimalType.Fixed(_, _) =>
+              Cast(currentSum, s.dataType)
+            case _ => currentSum
+          }
 
         AggregateEvaluation(currentSum :: Nil, initialValue :: Nil, updateFunction :: Nil, result)
 
       case a @ Average(expr) =>
+        val calcType =
+          expr.dataType match {
+            case DecimalType.Fixed(_, _) =>
+              DecimalType.Unlimited
+            case _ =>
+              expr.dataType
+          }
+
         val currentCount = AttributeReference("currentCount", LongType, nullable = false)()
-        val currentSum = AttributeReference("currentSum", expr.dataType, nullable = false)()
+        val currentSum = AttributeReference("currentSum", calcType, nullable = false)()
         val initialCount = Literal(0L)
-        val initialSum = Cast(Literal(0L), expr.dataType)
-        val updateCount = If(IsNotNull(expr), Add(currentCount, Literal(1L)), currentCount)
-        val updateSum = Coalesce(Add(expr, currentSum) :: currentSum :: Nil)
+        val initialSum = Cast(Literal(0L), calcType)
+
+        // If we're evaluating UnscaledValue(x), we can do Count on x directly, since its
+        // UnscaledValue will be null if and only if x is null; helps with Average on decimals
+        val toCount = expr match {
+          case UnscaledValue(e) => e
+          case _ => expr
+        }
 
-        val result = Divide(Cast(currentSum, DoubleType), Cast(currentCount, DoubleType))
+        val updateCount = If(IsNotNull(toCount), Add(currentCount, Literal(1L)), currentCount)
+        val updateSum = Coalesce(Add(Cast(expr, calcType), currentSum) :: currentSum :: Nil)
+
+        val result =
+          expr.dataType match {
+            case DecimalType.Fixed(_, _) =>
+              If(EqualTo(currentCount, Literal(0L)),
+                Literal(null, a.dataType),
+                Cast(Divide(
+                  Cast(currentSum, DecimalType.Unlimited),
+                  Cast(currentCount, DecimalType.Unlimited)), a.dataType))
+            case _ =>
+              If(EqualTo(currentCount, Literal(0L)),
+                Literal(null, a.dataType),
+                Divide(Cast(currentSum, a.dataType), Cast(currentCount, a.dataType)))
+          }
 
         AggregateEvaluation(
           currentCount :: currentSum :: Nil,
@@ -142,7 +190,7 @@ case class GeneratedAggregate(
 
     val computationSchema = computeFunctions.flatMap(_.schema)
 
-    val resultMap: Map[TreeNodeRef, Expression] = 
+    val resultMap: Map[TreeNodeRef, Expression] =
       aggregatesToCompute.zip(computeFunctions).map {
         case (agg, func) => new TreeNodeRef(agg) -> func.result
       }.toMap
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala
new file mode 100644
index 0000000000000..d6d8258f46a9a
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.Attribute
+
+
+/**
+ * Physical plan node for scanning data from a local collection.
+ */
+case class LocalTableScan(output: Seq[Attribute], rows: Seq[Row]) extends LeafNode {
+
+  private lazy val rdd = sqlContext.sparkContext.parallelize(rows)
+
+  override def execute() = rdd
+
+  override def executeCollect() = rows.toArray
+
+  override def executeTake(limit: Int) = rows.take(limit).toArray
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index b1a7948b66cb6..052766c20abc2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -20,17 +20,14 @@ package org.apache.spark.sql.execution
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.Logging
 import org.apache.spark.rdd.RDD
-
-
 import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.catalyst.trees
-import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
+import org.apache.spark.sql.catalyst.{ScalaReflection, trees}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.physical._
 
+import scala.collection.mutable.ArrayBuffer
 
 object SparkPlan {
   protected[sql] val currentContext = new ThreadLocal[SQLContext]()
@@ -49,14 +46,14 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
    * populated by the query planning infrastructure.
    */
   @transient
-  protected[spark] val sqlContext = SparkPlan.currentContext.get()
+  protected[spark] final val sqlContext = SparkPlan.currentContext.get()
 
   protected def sparkContext = sqlContext.sparkContext
 
   // sqlContext will be null when we are being deserialized on the slaves.  In this instance
   // the value of codegenEnabled will be set by the desserializer after the constructor has run.
   val codegenEnabled: Boolean = if (sqlContext != null) {
-    sqlContext.codegenEnabled
+    sqlContext.conf.codegenEnabled
   } else {
     false
   }
@@ -82,7 +79,53 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   /**
    * Runs this query returning the result as an array.
    */
-  def executeCollect(): Array[Row] = execute().map(_.copy()).collect()
+  def executeCollect(): Array[Row] = {
+    execute().map(ScalaReflection.convertRowToScala(_, schema)).collect()
+  }
+
+  /**
+   * Runs this query returning the first `n` rows as an array.
+   *
+   * This is modeled after RDD.take but never runs any job locally on the driver.
+   */
+  def executeTake(n: Int): Array[Row] = {
+    if (n == 0) {
+      return new Array[Row](0)
+    }
+
+    val childRDD = execute().map(_.copy())
+
+    val buf = new ArrayBuffer[Row]
+    val totalParts = childRDD.partitions.length
+    var partsScanned = 0
+    while (buf.size < n && partsScanned < totalParts) {
+      // The number of partitions to try in this iteration. It is ok for this number to be
+      // greater than totalParts because we actually cap it at totalParts in runJob.
+      var numPartsToTry = 1
+      if (partsScanned > 0) {
+        // If we didn't find any rows after the first iteration, just try all partitions next.
+        // Otherwise, interpolate the number of partitions we need to try, but overestimate it
+        // by 50%.
+        if (buf.size == 0) {
+          numPartsToTry = totalParts - 1
+        } else {
+          numPartsToTry = (1.5 * n * partsScanned / buf.size).toInt
+        }
+      }
+      numPartsToTry = math.max(0, numPartsToTry)  // guard against negative num of partitions
+
+      val left = n - buf.size
+      val p = partsScanned until math.min(partsScanned + numPartsToTry, totalParts)
+      val sc = sqlContext.sparkContext
+      val res =
+        sc.runJob(childRDD, (it: Iterator[Row]) => it.take(left).toArray, p, allowLocal = false)
+
+      res.foreach(buf ++= _.take(n - buf.size))
+      partsScanned += numPartsToTry
+    }
+
+    buf.toArray.map(ScalaReflection.convertRowToScala(_, this.schema))
+  }
 
   protected def newProjection(
       expressions: Seq[Expression], inputSchema: Seq[Attribute]): Projection = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala
index 077e6ebc5f11e..30564e14fa896 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution
 
 import java.nio.ByteBuffer
 
+import org.apache.spark.sql.types.Decimal
+
 import scala.reflect.ClassTag
 
 import com.clearspring.analytics.stream.cardinality.HyperLogLog
@@ -44,13 +46,15 @@ private[sql] class SparkSqlSerializer(conf: SparkConf) extends KryoSerializer(co
     kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericMutableRow])
     kryo.register(classOf[com.clearspring.analytics.stream.cardinality.HyperLogLog],
                   new HyperLogLogSerializer)
-    kryo.register(classOf[scala.math.BigDecimal], new BigDecimalSerializer)
+    kryo.register(classOf[java.math.BigDecimal], new JavaBigDecimalSerializer)
+    kryo.register(classOf[BigDecimal], new ScalaBigDecimalSerializer)
 
     // Specific hashsets must come first TODO: Move to core.
     kryo.register(classOf[IntegerHashSet], new IntegerHashSetSerializer)
     kryo.register(classOf[LongHashSet], new LongHashSetSerializer)
     kryo.register(classOf[org.apache.spark.util.collection.OpenHashSet[_]],
                   new OpenHashSetSerializer)
+    kryo.register(classOf[Decimal])
 
     kryo.setReferences(false)
     kryo.setClassLoader(Utils.getSparkClassLoader)
@@ -96,14 +100,25 @@ private[sql] object SparkSqlSerializer {
     }
 }
 
-private[sql] class BigDecimalSerializer extends Serializer[BigDecimal] {
-  def write(kryo: Kryo, output: Output, bd: math.BigDecimal) {
+private[sql] class JavaBigDecimalSerializer extends Serializer[java.math.BigDecimal] {
+  def write(kryo: Kryo, output: Output, bd: java.math.BigDecimal) {
+    // TODO: There are probably more efficient representations than strings...
+    output.writeString(bd.toString)
+  }
+
+  def read(kryo: Kryo, input: Input, tpe: Class[java.math.BigDecimal]): java.math.BigDecimal = {
+    new java.math.BigDecimal(input.readString())
+  }
+}
+
+private[sql] class ScalaBigDecimalSerializer extends Serializer[BigDecimal] {
+  def write(kryo: Kryo, output: Output, bd: BigDecimal) {
     // TODO: There are probably more efficient representations than strings...
-    output.writeString(bd.toString())
+    output.writeString(bd.toString)
   }
 
   def read(kryo: Kryo, input: Input, tpe: Class[BigDecimal]): BigDecimal = {
-    BigDecimal(input.readString())
+    new java.math.BigDecimal(input.readString())
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 79e4ddb8c4f5d..0c77d399b2eb8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -17,15 +17,16 @@
 
 package org.apache.spark.sql.execution
 
-import org.apache.spark.sql.{SQLContext, execution}
+import org.apache.spark.sql.{SQLContext, Strategy, execution}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning._
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
 import org.apache.spark.sql.catalyst.plans.physical._
-import org.apache.spark.sql.catalyst.types._
-import org.apache.spark.sql.columnar.{InMemoryRelation, InMemoryColumnarTableScan}
+import org.apache.spark.sql.columnar.{InMemoryColumnarTableScan, InMemoryRelation}
 import org.apache.spark.sql.parquet._
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.sources._
 
 
 private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
@@ -33,6 +34,12 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
   object LeftSemiJoin extends Strategy with PredicateHelper {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+      case ExtractEquiJoinKeys(LeftSemi, leftKeys, rightKeys, condition, left, right)
+        if sqlContext.conf.autoBroadcastJoinThreshold > 0 &&
+          right.statistics.sizeInBytes <= sqlContext.conf.autoBroadcastJoinThreshold =>
+        val semiJoin = joins.BroadcastLeftSemiJoinHash(
+          leftKeys, rightKeys, planLater(left), planLater(right))
+        condition.map(Filter(_, semiJoin)).getOrElse(semiJoin) :: Nil
       // Find left semi joins where at least some predicates can be evaluated by matching join keys
       case ExtractEquiJoinKeys(LeftSemi, leftKeys, rightKeys, condition, left, right) =>
         val semiJoin = joins.LeftSemiJoinHash(
@@ -74,13 +81,13 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case ExtractEquiJoinKeys(Inner, leftKeys, rightKeys, condition, left, right)
-        if sqlContext.autoBroadcastJoinThreshold > 0 &&
-           right.statistics.sizeInBytes <= sqlContext.autoBroadcastJoinThreshold =>
+        if sqlContext.conf.autoBroadcastJoinThreshold > 0 &&
+           right.statistics.sizeInBytes <= sqlContext.conf.autoBroadcastJoinThreshold =>
         makeBroadcastHashJoin(leftKeys, rightKeys, left, right, condition, joins.BuildRight)
 
       case ExtractEquiJoinKeys(Inner, leftKeys, rightKeys, condition, left, right)
-        if sqlContext.autoBroadcastJoinThreshold > 0 &&
-           left.statistics.sizeInBytes <= sqlContext.autoBroadcastJoinThreshold =>
+        if sqlContext.conf.autoBroadcastJoinThreshold > 0 &&
+           left.statistics.sizeInBytes <= sqlContext.conf.autoBroadcastJoinThreshold =>
           makeBroadcastHashJoin(leftKeys, rightKeys, left, right, condition, joins.BuildLeft)
 
       case ExtractEquiJoinKeys(Inner, leftKeys, rightKeys, condition, left, right) =>
@@ -190,7 +197,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
   object TakeOrdered extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case logical.Limit(IntegerLiteral(limit), logical.Sort(order, child)) =>
+      case logical.Limit(IntegerLiteral(limit), logical.Sort(order, true, child)) =>
         execution.TakeOrdered(limit, order, planLater(child)) :: Nil
       case _ => Nil
     }
@@ -208,23 +215,16 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         InsertIntoParquetTable(table, planLater(child), overwrite) :: Nil
       case PhysicalOperation(projectList, filters: Seq[Expression], relation: ParquetRelation) =>
         val prunePushedDownFilters =
-          if (sparkContext.conf.getBoolean(ParquetFilters.PARQUET_FILTER_PUSHDOWN_ENABLED, true)) {
-            (filters: Seq[Expression]) => {
-              filters.filter { filter =>
-                // Note: filters cannot be pushed down to Parquet if they contain more complex
-                // expressions than simple "Attribute cmp Literal" comparisons. Here we remove
-                // all filters that have been pushed down. Note that a predicate such as
-                // "(A AND B) OR C" can result in "A OR C" being pushed down.
-                val recordFilter = ParquetFilters.createFilter(filter)
-                if (!recordFilter.isDefined) {
-                  // First case: the pushdown did not result in any record filter.
-                  true
-                } else {
-                  // Second case: a record filter was created; here we are conservative in
-                  // the sense that even if "A" was pushed and we check for "A AND B" we
-                  // still want to keep "A AND B" in the higher-level filter, not just "B".
-                  !ParquetFilters.findExpression(recordFilter.get, filter).isDefined
-                }
+          if (sqlContext.conf.parquetFilterPushDown) {
+            (predicates: Seq[Expression]) => {
+              // Note: filters cannot be pushed down to Parquet if they contain more complex
+              // expressions than simple "Attribute cmp Literal" comparisons. Here we remove all
+              // filters that have been pushed down. Note that a predicate such as "(A AND B) OR C"
+              // can result in "A OR C" being pushed down. Here we are conservative in the sense
+              // that even if "A" was pushed and we check for "A AND B" we still want to keep
+              // "A AND B" in the higher-level filter, not just "B".
+              predicates.map(p => p -> ParquetFilters.createFilter(p)).collect {
+                case (predicate, None) => predicate
               }
             }
           } else {
@@ -234,7 +234,10 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
           projectList,
           filters,
           prunePushedDownFilters,
-          ParquetTableScan(_, relation, filters)) :: Nil
+          ParquetTableScan(
+            _,
+            relation,
+            if (sqlContext.conf.parquetFilterPushDown) filters else Nil)) :: Nil
 
       case _ => Nil
     }
@@ -257,30 +260,32 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
     def numPartitions = self.numPartitions
 
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+      case r: RunnableCommand => ExecutedCommand(r) :: Nil
+
       case logical.Distinct(child) =>
         execution.Distinct(partial = false,
           execution.Distinct(partial = true, planLater(child))) :: Nil
-      case logical.Sort(sortExprs, child) =>
-        // This sort is a global sort. Its requiredDistribution will be an OrderedDistribution.
-        execution.Sort(sortExprs, global = true, planLater(child)):: Nil
+
       case logical.SortPartitions(sortExprs, child) =>
         // This sort only sorts tuples within a partition. Its requiredDistribution will be
         // an UnspecifiedDistribution.
         execution.Sort(sortExprs, global = false, planLater(child)) :: Nil
+      case logical.Sort(sortExprs, global, child) if sqlContext.conf.externalSortEnabled =>
+        execution.ExternalSort(sortExprs, global, planLater(child)):: Nil
+      case logical.Sort(sortExprs, global, child) =>
+        execution.Sort(sortExprs, global, planLater(child)):: Nil
       case logical.Project(projectList, child) =>
         execution.Project(projectList, planLater(child)) :: Nil
       case logical.Filter(condition, child) =>
         execution.Filter(condition, planLater(child)) :: Nil
+      case logical.Expand(projections, output, child) =>
+        execution.Expand(projections, output, planLater(child)) :: Nil
       case logical.Aggregate(group, agg, child) =>
         execution.Aggregate(partial = false, group, agg, planLater(child)) :: Nil
       case logical.Sample(fraction, withReplacement, seed, child) =>
         execution.Sample(fraction, withReplacement, seed, planLater(child)) :: Nil
-      case SparkLogicalPlan(alreadyPlanned) => alreadyPlanned :: Nil
       case logical.LocalRelation(output, data) =>
-        val nPartitions = if (data.isEmpty) 1 else numPartitions
-        PhysicalRDD(
-          output,
-          RDDConversions.productToRowRdd(sparkContext.parallelize(data, nPartitions))) :: Nil
+        LocalTableScan(output, data) :: Nil
       case logical.Limit(IntegerLiteral(limit), child) =>
         execution.Limit(limit, planLater(child)) :: Nil
       case Unions(unionChildren) =>
@@ -302,16 +307,36 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
     }
   }
 
-  case class CommandStrategy(context: SQLContext) extends Strategy {
+  object DDLStrategy extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case logical.SetCommand(kv) =>
-        Seq(execution.SetCommand(kv, plan.output)(context))
-      case logical.ExplainCommand(logicalPlan, extended) =>
-        Seq(execution.ExplainCommand(logicalPlan, plan.output, extended)(context))
-      case logical.CacheTableCommand(tableName, optPlan, isLazy) =>
-        Seq(execution.CacheTableCommand(tableName, optPlan, isLazy))
-      case logical.UncacheTableCommand(tableName) =>
-        Seq(execution.UncacheTableCommand(tableName))
+      case CreateTableUsing(tableName, userSpecifiedSchema, provider, true, opts, false) =>
+        ExecutedCommand(
+          CreateTempTableUsing(
+            tableName, userSpecifiedSchema, provider, opts)) :: Nil
+      case c: CreateTableUsing if !c.temporary =>
+        sys.error("Tables created with SQLContext must be TEMPORARY. Use a HiveContext instead.")
+      case c: CreateTableUsing if c.temporary && c.allowExisting =>
+        sys.error("allowExisting should be set to false when creating a temporary table.")
+
+      case CreateTableUsingAsSelect(tableName, provider, true, opts, false, query) =>
+        val logicalPlan = sqlContext.parseSql(query)
+        val cmd =
+          CreateTempTableUsingAsSelect(tableName, provider, opts, logicalPlan)
+        ExecutedCommand(cmd) :: Nil
+      case c: CreateTableUsingAsSelect if !c.temporary =>
+        sys.error("Tables created with SQLContext must be TEMPORARY. Use a HiveContext instead.")
+      case c: CreateTableUsingAsSelect if c.temporary && c.allowExisting =>
+        sys.error("allowExisting should be set to false when creating a temporary table.")
+
+      case CreateTableUsingAsLogicalPlan(tableName, provider, true, opts, false, query) =>
+        val cmd =
+          CreateTempTableUsingAsSelect(tableName, provider, opts, query)
+        ExecutedCommand(cmd) :: Nil
+      case c: CreateTableUsingAsLogicalPlan if !c.temporary =>
+        sys.error("Tables created with SQLContext must be TEMPORARY. Use a HiveContext instead.")
+      case c: CreateTableUsingAsLogicalPlan if c.temporary && c.allowExisting =>
+        sys.error("allowExisting should be set to false when creating a temporary table.")
+
       case _ => Nil
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
index 977f3c9f32096..66aed5d5113d1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, OrderedDistribution, SinglePartition, UnspecifiedDistribution}
 import org.apache.spark.util.MutablePair
+import org.apache.spark.util.collection.ExternalSorter
 
 /**
  * :: DeveloperApi ::
@@ -69,7 +70,7 @@ case class Sample(fraction: Double, withReplacement: Boolean, seed: Long, child:
   override def output = child.output
 
   // TODO: How to pick seed?
-  override def execute() = child.execute().sample(withReplacement, fraction, seed)
+  override def execute() = child.execute().map(_.copy()).sample(withReplacement, fraction, seed)
 }
 
 /**
@@ -102,49 +103,7 @@ case class Limit(limit: Int, child: SparkPlan)
   override def output = child.output
   override def outputPartitioning = SinglePartition
 
-  /**
-   * A custom implementation modeled after the take function on RDDs but which never runs any job
-   * locally.  This is to avoid shipping an entire partition of data in order to retrieve only a few
-   * rows.
-   */
-  override def executeCollect(): Array[Row] = {
-    if (limit == 0) {
-      return new Array[Row](0)
-    }
-
-    val childRDD = child.execute().map(_.copy())
-
-    val buf = new ArrayBuffer[Row]
-    val totalParts = childRDD.partitions.length
-    var partsScanned = 0
-    while (buf.size < limit && partsScanned < totalParts) {
-      // The number of partitions to try in this iteration. It is ok for this number to be
-      // greater than totalParts because we actually cap it at totalParts in runJob.
-      var numPartsToTry = 1
-      if (partsScanned > 0) {
-        // If we didn't find any rows after the first iteration, just try all partitions next.
-        // Otherwise, interpolate the number of partitions we need to try, but overestimate it
-        // by 50%.
-        if (buf.size == 0) {
-          numPartsToTry = totalParts - 1
-        } else {
-          numPartsToTry = (1.5 * limit * partsScanned / buf.size).toInt
-        }
-      }
-      numPartsToTry = math.max(0, numPartsToTry)  // guard against negative num of partitions
-
-      val left = limit - buf.size
-      val p = partsScanned until math.min(partsScanned + numPartsToTry, totalParts)
-      val sc = sqlContext.sparkContext
-      val res =
-        sc.runJob(childRDD, (it: Iterator[Row]) => it.take(left).toArray, p, allowLocal = false)
-
-      res.foreach(buf ++= _.take(limit - buf.size))
-      partsScanned += numPartsToTry
-    }
-
-    buf.toArray
-  }
+  override def executeCollect(): Array[Row] = child.executeTake(limit)
 
   override def execute() = {
     val rdd: RDD[_ <: Product2[Boolean, Row]] = if (sortBasedShuffleOn) {
@@ -176,10 +135,11 @@ case class TakeOrdered(limit: Int, sortOrder: Seq[SortOrder], child: SparkPlan)
   override def output = child.output
   override def outputPartitioning = SinglePartition
 
-  val ordering = new RowOrdering(sortOrder, child.output)
+  val ord = new RowOrdering(sortOrder, child.output)
 
   // TODO: Is this copying for no reason?
-  override def executeCollect() = child.execute().map(_.copy()).takeOrdered(limit)(ordering)
+  override def executeCollect() = child.execute().map(_.copy()).takeOrdered(limit)(ord)
+    .map(ScalaReflection.convertRowToScala(_, this.schema))
 
   // TODO: Terminal split should be implemented differently from non-terminal split.
   // TODO: Pick num splits based on |limit|.
@@ -188,6 +148,9 @@ case class TakeOrdered(limit: Int, sortOrder: Seq[SortOrder], child: SparkPlan)
 
 /**
  * :: DeveloperApi ::
+ * Performs a sort on-heap.
+ * @param global when true performs a global sort of all partitions by shuffling the data first
+ *               if necessary.
  */
 @DeveloperApi
 case class Sort(
@@ -198,12 +161,37 @@ case class Sort(
   override def requiredChildDistribution =
     if (global) OrderedDistribution(sortOrder) :: Nil else UnspecifiedDistribution :: Nil
 
+  override def execute() = attachTree(this, "sort") {
+    child.execute().mapPartitions( { iterator =>
+      val ordering = newOrdering(sortOrder, child.output)
+      iterator.map(_.copy()).toArray.sorted(ordering).iterator
+    }, preservesPartitioning = true)
+  }
+
+  override def output = child.output
+}
+
+/**
+ * :: DeveloperApi ::
+ * Performs a sort, spilling to disk as needed.
+ * @param global when true performs a global sort of all partitions by shuffling the data first
+ *               if necessary.
+ */
+@DeveloperApi
+case class ExternalSort(
+    sortOrder: Seq[SortOrder],
+    global: Boolean,
+    child: SparkPlan)
+  extends UnaryNode {
+  override def requiredChildDistribution =
+    if (global) OrderedDistribution(sortOrder) :: Nil else UnspecifiedDistribution :: Nil
 
   override def execute() = attachTree(this, "sort") {
-    child.execute()
-      .mapPartitions( { iterator =>
-        val ordering = newOrdering(sortOrder, child.output)
-        iterator.map(_.copy()).toArray.sorted(ordering).iterator
+    child.execute().mapPartitions( { iterator =>
+      val ordering = newOrdering(sortOrder, child.output)
+      val sorter = new ExternalSorter[Row, Null, Row](ordering = Some(ordering))
+      sorter.insertAll(iterator.map(r => (r, null)))
+      sorter.iterator.map(_._1)
     }, preservesPartitioning = true)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index 5859eba408ee1..1bc53968c4ca3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -20,14 +20,27 @@ package org.apache.spark.sql.execution
 import org.apache.spark.Logging
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, SQLConf, SQLContext}
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
-import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.expressions.{Row, Attribute}
+import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.{Row, SQLConf, SQLContext}
 
-trait Command {
-  this: SparkPlan =>
+/**
+ * A logical command that is executed for its side-effects.  `RunnableCommand`s are
+ * wrapped in `ExecutedCommand` during execution.
+ */
+trait RunnableCommand extends logical.Command {
+  self: Product =>
+
+  def run(sqlContext: SQLContext): Seq[Row]
+}
 
+/**
+ * A physical operator that executes the run method of a `RunnableCommand` and
+ * saves the result to prevent multiple executions.
+ */
+case class ExecutedCommand(cmd: RunnableCommand) extends SparkPlan {
   /**
    * A concrete command should override this lazy field to wrap up any side effects caused by the
    * command or any other computation that should be evaluated exactly once. The value of this field
@@ -37,10 +50,16 @@ trait Command {
    * The `execute()` method of all the physical command classes should reference `sideEffectResult`
    * so that the command can be executed eagerly right after the command query is created.
    */
-  protected lazy val sideEffectResult: Seq[Row] = Seq.empty[Row]
+  protected[sql] lazy val sideEffectResult: Seq[Row] = cmd.run(sqlContext)
+
+  override def output = cmd.output
+
+  override def children = Nil
 
   override def executeCollect(): Array[Row] = sideEffectResult.toArray
 
+  override def executeTake(limit: Int): Array[Row] = sideEffectResult.take(limit).toArray
+
   override def execute(): RDD[Row] = sqlContext.sparkContext.parallelize(sideEffectResult, 1)
 }
 
@@ -48,58 +67,41 @@ trait Command {
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class SetCommand(kv: Option[(String, Option[String])], output: Seq[Attribute])(
-    @transient context: SQLContext)
-  extends LeafNode with Command with Logging {
+case class SetCommand(
+    kv: Option[(String, Option[String])],
+    override val output: Seq[Attribute]) extends RunnableCommand with Logging {
+
+  override def run(sqlContext: SQLContext) = kv match {
+    // Configures the deprecated "mapred.reduce.tasks" property.
+    case Some((SQLConf.Deprecated.MAPRED_REDUCE_TASKS, Some(value))) =>
+      logWarning(
+        s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " +
+          s"automatically converted to ${SQLConf.SHUFFLE_PARTITIONS} instead.")
+      sqlContext.setConf(SQLConf.SHUFFLE_PARTITIONS, value)
+      Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=$value"))
 
-  override protected lazy val sideEffectResult: Seq[Row] = kv match {
-    // Set value for the key.
+    // Configures a single property.
     case Some((key, Some(value))) =>
-      if (key == SQLConf.Deprecated.MAPRED_REDUCE_TASKS) {
-        logWarning(s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " +
-          s"automatically converted to ${SQLConf.SHUFFLE_PARTITIONS} instead.")
-        context.setConf(SQLConf.SHUFFLE_PARTITIONS, value)
-        Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=$value"))
-      } else {
-        context.setConf(key, value)
-        Seq(Row(s"$key=$value"))
-      }
-
-    // Query the value bound to the key.
+      sqlContext.setConf(key, value)
+      Seq(Row(s"$key=$value"))
+
+    // Queries all key-value pairs that are set in the SQLConf of the sqlContext.
+    // Notice that different from Hive, here "SET -v" is an alias of "SET".
+    // (In Hive, "SET" returns all changed properties while "SET -v" returns all properties.)
+    case Some(("-v", None)) | None =>
+      sqlContext.getAllConfs.map { case (k, v) => Row(s"$k=$v") }.toSeq
+
+    // Queries the deprecated "mapred.reduce.tasks" property.
+    case Some((SQLConf.Deprecated.MAPRED_REDUCE_TASKS, None)) =>
+      logWarning(
+        s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " +
+          s"showing ${SQLConf.SHUFFLE_PARTITIONS} instead.")
+      Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=${sqlContext.conf.numShufflePartitions}"))
+
+    // Queries a single property.
     case Some((key, None)) =>
-      // TODO (lian) This is just a workaround to make the Simba ODBC driver work.
-      // Should remove this once we get the ODBC driver updated.
-      if (key == "-v") {
-        val hiveJars = Seq(
-          "hive-exec-0.12.0.jar",
-          "hive-service-0.12.0.jar",
-          "hive-common-0.12.0.jar",
-          "hive-hwi-0.12.0.jar",
-          "hive-0.12.0.jar").mkString(":")
-
-        context.getAllConfs.map { case (k, v) =>
-          Row(s"$k=$v")
-        }.toSeq ++ Seq(
-          Row("system:java.class.path=" + hiveJars),
-          Row("system:sun.java.command=shark.SharkServer2"))
-      } else {
-        if (key == SQLConf.Deprecated.MAPRED_REDUCE_TASKS) {
-          logWarning(s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " +
-            s"showing ${SQLConf.SHUFFLE_PARTITIONS} instead.")
-          Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=${context.numShufflePartitions}"))
-        } else {
-          Seq(Row(s"$key=${context.getConf(key, "<undefined>")}"))
-        }
-      }
-
-    // Query all key-value pairs that are set in the SQLConf of the context.
-    case _ =>
-      context.getAllConfs.map { case (k, v) =>
-        Row(s"$k=$v")
-      }.toSeq
+      Seq(Row(s"$key=${sqlContext.getConf(key, "<undefined>")}"))
   }
-
-  override def otherCopyArgs = context :: Nil
 }
 
 /**
@@ -112,22 +114,19 @@ case class SetCommand(kv: Option[(String, Option[String])], output: Seq[Attribut
  */
 @DeveloperApi
 case class ExplainCommand(
-    logicalPlan: LogicalPlan, output: Seq[Attribute], extended: Boolean)(
-    @transient context: SQLContext)
-  extends LeafNode with Command {
+    logicalPlan: LogicalPlan,
+    override val output: Seq[Attribute], extended: Boolean = false) extends RunnableCommand {
 
   // Run through the optimizer to generate the physical plan.
-  override protected lazy val sideEffectResult: Seq[Row] = try {
+  override def run(sqlContext: SQLContext) = try {
     // TODO in Hive, the "extended" ExplainCommand prints the AST as well, and detailed properties.
-    val queryExecution = context.executePlan(logicalPlan)
+    val queryExecution = sqlContext.executePlan(logicalPlan)
     val outputString = if (extended) queryExecution.toString else queryExecution.simpleString
 
     outputString.split("\n").map(Row(_))
   } catch { case cause: TreeNodeException[_] =>
     ("Error occurred during query planning: \n" + cause.getMessage).split("\n").map(Row(_))
   }
-
-  override def otherCopyArgs = context :: Nil
 }
 
 /**
@@ -137,19 +136,17 @@ case class ExplainCommand(
 case class CacheTableCommand(
     tableName: String,
     plan: Option[LogicalPlan],
-    isLazy: Boolean)
-  extends LeafNode with Command {
-
-  override protected lazy val sideEffectResult = {
-    import sqlContext._
+    isLazy: Boolean) extends RunnableCommand {
 
-    plan.foreach(_.registerTempTable(tableName))
-    val schemaRDD = table(tableName)
-    schemaRDD.cache()
+  override def run(sqlContext: SQLContext) = {
+    plan.foreach { logicalPlan =>
+      sqlContext.registerRDDAsTable(DataFrame(sqlContext, logicalPlan), tableName)
+    }
+    sqlContext.cacheTable(tableName)
 
     if (!isLazy) {
       // Performs eager caching
-      schemaRDD.count()
+      sqlContext.table(tableName).count()
     }
 
     Seq.empty[Row]
@@ -163,9 +160,10 @@ case class CacheTableCommand(
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class UncacheTableCommand(tableName: String) extends LeafNode with Command {
-  override protected lazy val sideEffectResult: Seq[Row] = {
-    sqlContext.table(tableName).unpersist()
+case class UncacheTableCommand(tableName: String) extends RunnableCommand {
+
+  override def run(sqlContext: SQLContext) = {
+    sqlContext.table(tableName).unpersist(blocking = false)
     Seq.empty[Row]
   }
 
@@ -176,12 +174,11 @@ case class UncacheTableCommand(tableName: String) extends LeafNode with Command
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class DescribeCommand(child: SparkPlan, output: Seq[Attribute])(
-    @transient context: SQLContext)
-  extends LeafNode with Command {
+case class DescribeCommand(
+    child: SparkPlan,
+    override val output: Seq[Attribute]) extends RunnableCommand {
 
-  override protected lazy val sideEffectResult: Seq[Row] = {
-    Row("# Registered as a temporary table", null, null) +:
-      child.output.map(field => Row(field.name, field.dataType.toString, null))
+  override def run(sqlContext: SQLContext) = {
+    child.output.map(field => Row(field.name, field.dataType.toString, null))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index 61be5ed2db65c..5cc67cdd13944 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -22,9 +22,9 @@ import scala.collection.mutable.HashSet
 import org.apache.spark.{AccumulatorParam, Accumulator, SparkContext}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext._
-import org.apache.spark.sql.{SchemaRDD, Row}
+import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.catalyst.trees.TreeNodeRef
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 /**
  * :: DeveloperApi ::
@@ -39,10 +39,10 @@ package object debug {
 
   /**
    * :: DeveloperApi ::
-   * Augments SchemaRDDs with debug methods.
+   * Augments [[DataFrame]]s with debug methods.
    */
   @DeveloperApi
-  implicit class DebugQuery(query: SchemaRDD) {
+  implicit class DebugQuery(query: DataFrame) {
     def debug(): Unit = {
       val plan = query.queryExecution.executedPlan
       val visited = new collection.mutable.HashSet[TreeNodeRef]()
@@ -144,7 +144,7 @@ package object debug {
       case (null, _) =>
 
       case (row: Row, StructType(fields)) =>
-        row.zip(fields.map(_.dataType)).foreach { case(d,t) => typeCheck(d,t) }
+        row.toSeq.zip(fields.map(_.dataType)).foreach { case(d, t) => typeCheck(d, t) }
       case (s: Seq[_], ArrayType(elemType, _)) =>
         s.foreach(typeCheck(_, elemType))
       case (m: Map[_, _], MapType(keyType, valueType, _)) =>
@@ -166,7 +166,7 @@ package object debug {
 
   /**
    * :: DeveloperApi ::
-   * Augments SchemaRDDs with debug methods.
+   * Augments [[DataFrame]]s with debug methods.
    */
   @DeveloperApi
   private[sql] case class TypeCheck(child: SparkPlan) extends SparkPlan {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
index 8fd35880eedfe..2dd22c020ef12 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
@@ -42,6 +42,15 @@ case class BroadcastHashJoin(
     right: SparkPlan)
   extends BinaryNode with HashJoin {
 
+  val timeout = {
+    val timeoutValue = sqlContext.conf.broadcastTimeout
+    if (timeoutValue < 0) {
+      Duration.Inf
+    } else {
+      timeoutValue.seconds
+    }
+  }
+
   override def outputPartitioning: Partitioning = streamedPlan.outputPartitioning
 
   override def requiredChildDistribution =
@@ -49,13 +58,14 @@ case class BroadcastHashJoin(
 
   @transient
   private val broadcastFuture = future {
-    val input: Array[Row] = buildPlan.executeCollect()
+    // Note that we use .execute().collect() because we don't want to convert data to Scala types
+    val input: Array[Row] = buildPlan.execute().map(_.copy()).collect()
     val hashed = HashedRelation(input.iterator, buildSideKeyGenerator, input.length)
     sparkContext.broadcast(hashed)
   }
 
   override def execute() = {
-    val broadcastRelation = Await.result(broadcastFuture, 5.minute)
+    val broadcastRelation = Await.result(broadcastFuture, timeout)
 
     streamedPlan.execute().mapPartitions { streamedIter =>
       hashJoin(streamedIter, broadcastRelation.value)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastLeftSemiJoinHash.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastLeftSemiJoinHash.scala
new file mode 100644
index 0000000000000..2ab064fd0151e
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastLeftSemiJoinHash.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.joins
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.catalyst.expressions.{Expression, Row}
+import org.apache.spark.sql.catalyst.plans.physical.ClusteredDistribution
+import org.apache.spark.sql.execution.{BinaryNode, SparkPlan}
+
+/**
+ * :: DeveloperApi ::
+ * Build the right table's join keys into a HashSet, and iteratively go through the left
+ * table, to find the if join keys are in the Hash set.
+ */
+@DeveloperApi
+case class BroadcastLeftSemiJoinHash(
+    leftKeys: Seq[Expression],
+    rightKeys: Seq[Expression],
+    left: SparkPlan,
+    right: SparkPlan) extends BinaryNode with HashJoin {
+
+  override val buildSide = BuildRight
+
+  override def output = left.output
+
+  override def execute() = {
+    val buildIter= buildPlan.execute().map(_.copy()).collect().toIterator
+    val hashSet = new java.util.HashSet[Row]()
+    var currentRow: Row = null
+
+    // Create a Hash set of buildKeys
+    while (buildIter.hasNext) {
+      currentRow = buildIter.next()
+      val rowKey = buildSideKeyGenerator(currentRow)
+      if (!rowKey.anyNull) {
+        val keyExists = hashSet.contains(rowKey)
+        if (!keyExists) {
+          hashSet.add(rowKey)
+        }
+      }
+    }
+
+    val broadcastedRelation = sparkContext.broadcast(hashSet)
+
+    streamedPlan.execute().mapPartitions { streamIter =>
+      val joinKeys = streamSideKeyGenerator()
+      streamIter.filter(current => {
+        !joinKeys(current).anyNull && broadcastedRelation.value.contains(joinKeys.currentValue)
+      })
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala
index b73041d306b36..59ef904272545 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala
@@ -68,66 +68,56 @@ case class HashOuterJoin(
   @transient private[this] lazy val DUMMY_LIST = Seq[Row](null)
   @transient private[this] lazy val EMPTY_LIST = Seq.empty[Row]
 
+  @transient private[this] lazy val leftNullRow = new GenericRow(left.output.length)
+  @transient private[this] lazy val rightNullRow = new GenericRow(right.output.length)
+  @transient private[this] lazy val boundCondition =
+    condition.map(newPredicate(_, left.output ++ right.output)).getOrElse((row: Row) => true)
+
   // TODO we need to rewrite all of the iterators with our own implementation instead of the Scala
   // iterator for performance purpose.
 
   private[this] def leftOuterIterator(
-      key: Row, leftIter: Iterable[Row], rightIter: Iterable[Row]): Iterator[Row] = {
-    val joinedRow = new JoinedRow()
-    val rightNullRow = new GenericRow(right.output.length)
-    val boundCondition =
-      condition.map(newPredicate(_, left.output ++ right.output)).getOrElse((row: Row) => true)
-
-    leftIter.iterator.flatMap { l =>
-      joinedRow.withLeft(l)
-      var matched = false
-      (if (!key.anyNull) rightIter.collect { case r if (boundCondition(joinedRow.withRight(r))) =>
-        matched = true
-        joinedRow.copy
+      key: Row, joinedRow: JoinedRow, rightIter: Iterable[Row]): Iterator[Row] = {
+    val ret: Iterable[Row] = (
+      if (!key.anyNull) {
+        val temp = rightIter.collect {
+          case r if (boundCondition(joinedRow.withRight(r))) => joinedRow.copy
+        }
+        if (temp.size  == 0) {
+          joinedRow.withRight(rightNullRow).copy :: Nil
+        } else {
+          temp
+        }
       } else {
-        Nil
-      }) ++ DUMMY_LIST.filter(_ => !matched).map( _ => {
-        // DUMMY_LIST.filter(_ => !matched) is a tricky way to add additional row,
-        // as we don't know whether we need to append it until finish iterating all of the
-        // records in right side.
-        // If we didn't get any proper row, then append a single row with empty right
-        joinedRow.withRight(rightNullRow).copy
-      })
-    }
+        joinedRow.withRight(rightNullRow).copy :: Nil
+      }
+    )
+    ret.iterator
   }
 
   private[this] def rightOuterIterator(
-      key: Row, leftIter: Iterable[Row], rightIter: Iterable[Row]): Iterator[Row] = {
-    val joinedRow = new JoinedRow()
-    val leftNullRow = new GenericRow(left.output.length)
-    val boundCondition =
-      condition.map(newPredicate(_, left.output ++ right.output)).getOrElse((row: Row) => true)
-
-    rightIter.iterator.flatMap { r =>
-      joinedRow.withRight(r)
-      var matched = false
-      (if (!key.anyNull) leftIter.collect { case l if (boundCondition(joinedRow.withLeft(l))) =>
-        matched = true
-        joinedRow.copy
+      key: Row, leftIter: Iterable[Row], joinedRow: JoinedRow): Iterator[Row] = {
+
+    val ret: Iterable[Row] = (
+      if (!key.anyNull) {
+        val temp = leftIter.collect {
+          case l if (boundCondition(joinedRow.withLeft(l))) => joinedRow.copy
+        }
+        if (temp.size  == 0) {
+          joinedRow.withLeft(leftNullRow).copy :: Nil
+        } else {
+          temp
+        }
       } else {
-        Nil
-      }) ++ DUMMY_LIST.filter(_ => !matched).map( _ => {
-        // DUMMY_LIST.filter(_ => !matched) is a tricky way to add additional row,
-        // as we don't know whether we need to append it until finish iterating all of the
-        // records in left side.
-        // If we didn't get any proper row, then append a single row with empty left.
-        joinedRow.withLeft(leftNullRow).copy
-      })
-    }
+        joinedRow.withLeft(leftNullRow).copy :: Nil
+      }
+    )
+    ret.iterator
   }
 
   private[this] def fullOuterIterator(
-      key: Row, leftIter: Iterable[Row], rightIter: Iterable[Row]): Iterator[Row] = {
-    val joinedRow = new JoinedRow()
-    val leftNullRow = new GenericRow(left.output.length)
-    val rightNullRow = new GenericRow(right.output.length)
-    val boundCondition =
-      condition.map(newPredicate(_, left.output ++ right.output)).getOrElse((row: Row) => true)
+      key: Row, leftIter: Iterable[Row], rightIter: Iterable[Row],
+      joinedRow: JoinedRow): Iterator[Row] = {
 
     if (!key.anyNull) {
       // Store the positions of records in right, if one of its associated row satisfy
@@ -193,27 +183,37 @@ case class HashOuterJoin(
   }
 
   override def execute() = {
+    val joinedRow = new JoinedRow()
     left.execute().zipPartitions(right.execute()) { (leftIter, rightIter) =>
       // TODO this probably can be replaced by external sort (sort merged join?)
-      // Build HashMap for current partition in left relation
-      val leftHashTable = buildHashTable(leftIter, newProjection(leftKeys, left.output))
-      // Build HashMap for current partition in right relation
-      val rightHashTable = buildHashTable(rightIter, newProjection(rightKeys, right.output))
-      val boundCondition =
-        condition.map(newPredicate(_, left.output ++ right.output)).getOrElse((row: Row) => true)
+
       joinType match {
-        case LeftOuter => leftHashTable.keysIterator.flatMap { key =>
-          leftOuterIterator(key, leftHashTable.getOrElse(key, EMPTY_LIST),
-            rightHashTable.getOrElse(key, EMPTY_LIST))
+        case LeftOuter => {
+          val rightHashTable = buildHashTable(rightIter, newProjection(rightKeys, right.output))
+          val keyGenerator = newProjection(leftKeys, left.output)
+          leftIter.flatMap( currentRow => {
+            val rowKey = keyGenerator(currentRow)
+            joinedRow.withLeft(currentRow)
+            leftOuterIterator(rowKey, joinedRow, rightHashTable.getOrElse(rowKey, EMPTY_LIST))
+          })
         }
-        case RightOuter => rightHashTable.keysIterator.flatMap { key =>
-          rightOuterIterator(key, leftHashTable.getOrElse(key, EMPTY_LIST),
-            rightHashTable.getOrElse(key, EMPTY_LIST))
+        case RightOuter => {
+          val leftHashTable = buildHashTable(leftIter, newProjection(leftKeys, left.output))
+          val keyGenerator = newProjection(rightKeys, right.output)
+          rightIter.flatMap ( currentRow => {
+            val rowKey = keyGenerator(currentRow)
+            joinedRow.withRight(currentRow)
+            rightOuterIterator(rowKey, leftHashTable.getOrElse(rowKey, EMPTY_LIST), joinedRow)
+          })
         }
-        case FullOuter => (leftHashTable.keySet ++ rightHashTable.keySet).iterator.flatMap { key =>
-          fullOuterIterator(key,
-            leftHashTable.getOrElse(key, EMPTY_LIST),
-            rightHashTable.getOrElse(key, EMPTY_LIST))
+        case FullOuter => {
+          val leftHashTable = buildHashTable(leftIter, newProjection(leftKeys, left.output))
+          val rightHashTable = buildHashTable(rightIter, newProjection(rightKeys, right.output))
+          (leftHashTable.keySet ++ rightHashTable.keySet).iterator.flatMap { key =>
+            fullOuterIterator(key,
+              leftHashTable.getOrElse(key, EMPTY_LIST),
+              rightHashTable.getOrElse(key, EMPTY_LIST), joinedRow)
+          }
         }
         case x => throw new Exception(s"HashOuterJoin should not take $x as the JoinType")
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
index be729e5d244b0..3a2f8d75dac5e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
@@ -19,20 +19,21 @@ package org.apache.spark.sql.execution
 
 import java.util.{List => JList, Map => JMap}
 
+import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
+
 import net.razorvine.pickle.{Pickler, Unpickler}
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.api.python.PythonRDD
+import org.apache.spark.api.python.{PythonBroadcast, PythonRDD}
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 import org.apache.spark.{Accumulator, Logging => SparkLogging}
 
-import scala.collection.JavaConversions._
-
 /**
  * A serialized version of a Python lambda function.  Suitable for use in a [[PythonRDD]].
  */
@@ -42,7 +43,7 @@ private[spark] case class PythonUDF(
     envVars: JMap[String, String],
     pythonIncludes: JList[String],
     pythonExec: String,
-    broadcastVars: JList[Broadcast[Array[Byte]]],
+    broadcastVars: JList[Broadcast[PythonBroadcast]],
     accumulator: Accumulator[JList[Array[Byte]]],
     dataType: DataType,
     children: Seq[Expression]) extends Expression with SparkLogging {
@@ -108,6 +109,88 @@ private[spark] object ExtractPythonUdfs extends Rule[LogicalPlan] {
 object EvaluatePython {
   def apply(udf: PythonUDF, child: LogicalPlan) =
     new EvaluatePython(udf, child, AttributeReference("pythonUDF", udf.dataType)())
+
+  /**
+   * Helper for converting a Scala object to a java suitable for pyspark serialization.
+   */
+  def toJava(obj: Any, dataType: DataType): Any = (obj, dataType) match {
+    case (null, _) => null
+
+    case (row: Row, struct: StructType) =>
+      val fields = struct.fields.map(field => field.dataType)
+      row.toSeq.zip(fields).map {
+        case (obj, dataType) => toJava(obj, dataType)
+      }.toArray
+
+    case (seq: Seq[Any], array: ArrayType) =>
+      seq.map(x => toJava(x, array.elementType)).asJava
+    case (list: JList[_], array: ArrayType) =>
+      list.map(x => toJava(x, array.elementType)).asJava
+    case (arr, array: ArrayType) if arr.getClass.isArray =>
+      arr.asInstanceOf[Array[Any]].map(x => toJava(x, array.elementType))
+
+    case (obj: Map[_, _], mt: MapType) => obj.map {
+      case (k, v) => (toJava(k, mt.keyType), toJava(v, mt.valueType))
+    }.asJava
+
+    case (ud, udt: UserDefinedType[_]) => toJava(udt.serialize(ud), udt.sqlType)
+
+    case (date: Int, DateType) => DateUtils.toJavaDate(date)
+
+    // Pyrolite can handle Timestamp and Decimal
+    case (other, _) => other
+  }
+
+  /**
+   * Convert Row into Java Array (for pickled into Python)
+   */
+  def rowToArray(row: Row, fields: Seq[DataType]): Array[Any] = {
+    // TODO: this is slow!
+    row.toSeq.zip(fields).map {case (obj, dt) => toJava(obj, dt)}.toArray
+  }
+
+  // Converts value to the type specified by the data type.
+  // Because Python does not have data types for TimestampType, FloatType, ShortType, and
+  // ByteType, we need to explicitly convert values in columns of these data types to the desired
+  // JVM data types.
+  def fromJava(obj: Any, dataType: DataType): Any = (obj, dataType) match {
+    // TODO: We should check nullable
+    case (null, _) => null
+
+    case (c: java.util.List[_], ArrayType(elementType, _)) =>
+      c.map { e => fromJava(e, elementType)}: Seq[Any]
+
+    case (c, ArrayType(elementType, _)) if c.getClass.isArray =>
+      c.asInstanceOf[Array[_]].map(e => fromJava(e, elementType)): Seq[Any]
+
+    case (c: java.util.Map[_, _], MapType(keyType, valueType, _)) => c.map {
+      case (key, value) => (fromJava(key, keyType), fromJava(value, valueType))
+    }.toMap
+
+    case (c, StructType(fields)) if c.getClass.isArray =>
+      new GenericRow(c.asInstanceOf[Array[_]].zip(fields).map {
+        case (e, f) => fromJava(e, f.dataType)
+      }): Row
+
+    case (c: java.util.Calendar, DateType) =>
+      DateUtils.fromJavaDate(new java.sql.Date(c.getTime().getTime()))
+
+    case (c: java.util.Calendar, TimestampType) =>
+      new java.sql.Timestamp(c.getTime().getTime())
+
+    case (_, udt: UserDefinedType[_]) =>
+      fromJava(obj, udt.sqlType)
+
+    case (c: Int, ByteType) => c.toByte
+    case (c: Long, ByteType) => c.toByte
+    case (c: Int, ShortType) => c.toShort
+    case (c: Long, ShortType) => c.toShort
+    case (c: Long, IntegerType) => c.toInt
+    case (c: Double, FloatType) => c.toFloat
+    case (c, StringType) if !c.isInstanceOf[String] => c.toString
+
+    case (c, _) => c
+  }
 }
 
 /**
@@ -141,8 +224,11 @@ case class BatchPythonEvaluation(udf: PythonUDF, output: Seq[Attribute], child:
     val parent = childResults.mapPartitions { iter =>
       val pickle = new Pickler
       val currentRow = newMutableProjection(udf.children, child.output)()
+      val fields = udf.children.map(_.dataType)
       iter.grouped(1000).map { inputRows =>
-        val toBePickled = inputRows.map(currentRow(_).toArray).toArray
+        val toBePickled = inputRows.map { row =>
+          EvaluatePython.rowToArray(currentRow(row), fields)
+        }.toArray
         pickle.dumps(toBePickled)
       }
     }
@@ -165,10 +251,7 @@ case class BatchPythonEvaluation(udf: PythonUDF, output: Seq[Attribute], child:
     }.mapPartitions { iter =>
       val row = new GenericMutableRow(1)
       iter.map { result =>
-        row(0) = udf.dataType match {
-          case StringType => result.toString
-          case other => result
-        }
+        row(0) = EvaluatePython.fromJava(result, udf.dataType)
         row: Row
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DriverQuirks.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DriverQuirks.scala
new file mode 100644
index 0000000000000..1704be7fcbd30
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DriverQuirks.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import org.apache.spark.sql.types._
+
+import java.sql.Types
+
+
+/**
+ * Encapsulates workarounds for the extensions, quirks, and bugs in various
+ * databases.  Lots of databases define types that aren't explicitly supported
+ * by the JDBC spec.  Some JDBC drivers also report inaccurate
+ * information---for instance, BIT(n>1) being reported as a BIT type is quite
+ * common, even though BIT in JDBC is meant for single-bit values.  Also, there
+ * does not appear to be a standard name for an unbounded string or binary
+ * type; we use BLOB and CLOB by default but override with database-specific
+ * alternatives when these are absent or do not behave correctly.
+ *
+ * Currently, the only thing DriverQuirks does is handle type mapping.
+ * `getCatalystType` is used when reading from a JDBC table and `getJDBCType`
+ * is used when writing to a JDBC table.  If `getCatalystType` returns `null`,
+ * the default type handling is used for the given JDBC type.  Similarly,
+ * if `getJDBCType` returns `(null, None)`, the default type handling is used
+ * for the given Catalyst type.
+ */
+private[sql] abstract class DriverQuirks {
+  def getCatalystType(sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): DataType
+  def getJDBCType(dt: DataType): (String, Option[Int])
+}
+
+private[sql] object DriverQuirks {
+  /**
+   * Fetch the DriverQuirks class corresponding to a given database url.
+   */
+  def get(url: String): DriverQuirks = {
+    if (url.substring(0, 10).equals("jdbc:mysql")) {
+      new MySQLQuirks()
+    } else if (url.substring(0, 15).equals("jdbc:postgresql")) {
+      new PostgresQuirks()
+    } else {
+      new NoQuirks()
+    }
+  }
+}
+
+private[sql] class NoQuirks extends DriverQuirks {
+  def getCatalystType(sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): DataType =
+    null
+  def getJDBCType(dt: DataType): (String, Option[Int]) = (null, None)
+}
+
+private[sql] class PostgresQuirks extends DriverQuirks {
+  def getCatalystType(sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): DataType = {
+    if (sqlType == Types.BIT && typeName.equals("bit") && size != 1) {
+      BinaryType
+    } else if (sqlType == Types.OTHER && typeName.equals("cidr")) {
+      StringType
+    } else if (sqlType == Types.OTHER && typeName.equals("inet")) {
+      StringType
+    } else null
+  }
+
+  def getJDBCType(dt: DataType): (String, Option[Int]) = dt match {
+    case StringType => ("TEXT", Some(java.sql.Types.CHAR))
+    case BinaryType => ("BYTEA", Some(java.sql.Types.BINARY))
+    case BooleanType => ("BOOLEAN", Some(java.sql.Types.BOOLEAN))
+    case _ => (null, None)
+  }
+}
+
+private[sql] class MySQLQuirks extends DriverQuirks {
+  def getCatalystType(sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): DataType = {
+    if (sqlType == Types.VARBINARY && typeName.equals("BIT") && size != 1) {
+      // This could instead be a BinaryType if we'd rather return bit-vectors of up to 64 bits as
+      // byte arrays instead of longs.
+      md.putLong("binarylong", 1)
+      LongType
+    } else if (sqlType == Types.BIT && typeName.equals("TINYINT")) {
+      BooleanType
+    } else null
+  }
+  def getJDBCType(dt: DataType): (String, Option[Int]) = (null, None)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
new file mode 100644
index 0000000000000..a2f94675fb5a3
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
@@ -0,0 +1,417 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import java.sql.{Connection, DatabaseMetaData, DriverManager, ResultSet, ResultSetMetaData, SQLException}
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.{Logging, Partition, SparkContext, TaskContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.NextIterator
+import org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion
+import org.apache.spark.sql.catalyst.expressions.{Row, SpecificMutableRow}
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.sources._
+
+private[sql] object JDBCRDD extends Logging {
+  /**
+   * Maps a JDBC type to a Catalyst type.  This function is called only when
+   * the DriverQuirks class corresponding to your database driver returns null.
+   *
+   * @param sqlType - A field of java.sql.Types
+   * @return The Catalyst type corresponding to sqlType.
+   */
+  private def getCatalystType(sqlType: Int): DataType = {
+    val answer = sqlType match {
+      case java.sql.Types.ARRAY         => null
+      case java.sql.Types.BIGINT        => LongType
+      case java.sql.Types.BINARY        => BinaryType
+      case java.sql.Types.BIT           => BooleanType // Per JDBC; Quirks handles quirky drivers.
+      case java.sql.Types.BLOB          => BinaryType
+      case java.sql.Types.BOOLEAN       => BooleanType
+      case java.sql.Types.CHAR          => StringType
+      case java.sql.Types.CLOB          => StringType
+      case java.sql.Types.DATALINK      => null
+      case java.sql.Types.DATE          => DateType
+      case java.sql.Types.DECIMAL       => DecimalType.Unlimited
+      case java.sql.Types.DISTINCT      => null
+      case java.sql.Types.DOUBLE        => DoubleType
+      case java.sql.Types.FLOAT         => FloatType
+      case java.sql.Types.INTEGER       => IntegerType
+      case java.sql.Types.JAVA_OBJECT   => null
+      case java.sql.Types.LONGNVARCHAR  => StringType
+      case java.sql.Types.LONGVARBINARY => BinaryType
+      case java.sql.Types.LONGVARCHAR   => StringType
+      case java.sql.Types.NCHAR         => StringType
+      case java.sql.Types.NCLOB         => StringType
+      case java.sql.Types.NULL          => null
+      case java.sql.Types.NUMERIC       => DecimalType.Unlimited
+      case java.sql.Types.OTHER         => null
+      case java.sql.Types.REAL          => DoubleType
+      case java.sql.Types.REF           => StringType
+      case java.sql.Types.ROWID         => LongType
+      case java.sql.Types.SMALLINT      => IntegerType
+      case java.sql.Types.SQLXML        => StringType
+      case java.sql.Types.STRUCT        => StringType
+      case java.sql.Types.TIME          => TimestampType
+      case java.sql.Types.TIMESTAMP     => TimestampType
+      case java.sql.Types.TINYINT       => IntegerType
+      case java.sql.Types.VARBINARY     => BinaryType
+      case java.sql.Types.VARCHAR       => StringType
+      case _ => null
+    }
+
+    if (answer == null) throw new SQLException("Unsupported type " + sqlType)
+    answer
+  }
+
+  /**
+   * Takes a (schema, table) specification and returns the table's Catalyst
+   * schema.
+   *
+   * @param url - The JDBC url to fetch information from.
+   * @param table - The table name of the desired table.  This may also be a
+   *   SQL query wrapped in parentheses.
+   *
+   * @return A StructType giving the table's Catalyst schema.
+   * @throws SQLException if the table specification is garbage.
+   * @throws SQLException if the table contains an unsupported type.
+   */
+  def resolveTable(url: String, table: String): StructType = {
+    val quirks = DriverQuirks.get(url)
+    val conn: Connection = DriverManager.getConnection(url)
+    try {
+      val rs = conn.prepareStatement(s"SELECT * FROM $table WHERE 1=0").executeQuery()
+      try {
+        val rsmd = rs.getMetaData
+        val ncols = rsmd.getColumnCount
+        var fields = new Array[StructField](ncols);
+        var i = 0
+        while (i < ncols) {
+          val columnName = rsmd.getColumnName(i + 1)
+          val dataType = rsmd.getColumnType(i + 1)
+          val typeName = rsmd.getColumnTypeName(i + 1)
+          val fieldSize = rsmd.getPrecision(i + 1)
+          val nullable = rsmd.isNullable(i + 1) != ResultSetMetaData.columnNoNulls
+          val metadata = new MetadataBuilder().putString("name", columnName)
+          var columnType = quirks.getCatalystType(dataType, typeName, fieldSize, metadata)
+          if (columnType == null) columnType = getCatalystType(dataType)
+          fields(i) = StructField(columnName, columnType, nullable, metadata.build())
+          i = i + 1
+        }
+        return new StructType(fields)
+      } finally {
+        rs.close()
+      }
+    } finally {
+      conn.close()
+    }
+
+    throw new RuntimeException("This line is unreachable.")
+  }
+
+  /**
+   * Prune all but the specified columns from the specified Catalyst schema.
+   *
+   * @param schema - The Catalyst schema of the master table
+   * @param columns - The list of desired columns
+   *
+   * @return A Catalyst schema corresponding to columns in the given order.
+   */
+  private def pruneSchema(schema: StructType, columns: Array[String]): StructType = {
+    val fieldMap = Map(schema.fields map { x => x.metadata.getString("name") -> x }: _*)
+    new StructType(columns map { name => fieldMap(name) })
+  }
+
+  /**
+   * Given a driver string and an url, return a function that loads the
+   * specified driver string then returns a connection to the JDBC url.
+   * getConnector is run on the driver code, while the function it returns
+   * is run on the executor.
+   *
+   * @param driver - The class name of the JDBC driver for the given url.
+   * @param url - The JDBC url to connect to.
+   *
+   * @return A function that loads the driver and connects to the url.
+   */
+  def getConnector(driver: String, url: String): () => Connection = {
+    () => {
+      try {
+        if (driver != null) Class.forName(driver)
+      } catch {
+        case e: ClassNotFoundException => {
+          logWarning(s"Couldn't find class $driver", e);
+        }
+      }
+      DriverManager.getConnection(url)
+    }
+  }
+  /**
+   * Build and return JDBCRDD from the given information.
+   *
+   * @param sc - Your SparkContext.
+   * @param schema - The Catalyst schema of the underlying database table.
+   * @param driver - The class name of the JDBC driver for the given url.
+   * @param url - The JDBC url to connect to.
+   * @param fqTable - The fully-qualified table name (or paren'd SQL query) to use.
+   * @param requiredColumns - The names of the columns to SELECT.
+   * @param filters - The filters to include in all WHERE clauses.
+   * @param parts - An array of JDBCPartitions specifying partition ids and
+   *    per-partition WHERE clauses.
+   *
+   * @return An RDD representing "SELECT requiredColumns FROM fqTable".
+   */
+  def scanTable(sc: SparkContext,
+                schema: StructType,
+                driver: String,
+                url: String,
+                fqTable: String,
+                requiredColumns: Array[String],
+                filters: Array[Filter],
+                parts: Array[Partition]): RDD[Row] = {
+    val prunedSchema = pruneSchema(schema, requiredColumns)
+
+    return new JDBCRDD(sc,
+        getConnector(driver, url),
+        prunedSchema,
+        fqTable,
+        requiredColumns,
+        filters,
+        parts)
+  }
+}
+
+/**
+ * An RDD representing a table in a database accessed via JDBC.  Both the
+ * driver code and the workers must be able to access the database; the driver
+ * needs to fetch the schema while the workers need to fetch the data.
+ */
+private[sql] class JDBCRDD(
+    sc: SparkContext,
+    getConnection: () => Connection,
+    schema: StructType,
+    fqTable: String,
+    columns: Array[String],
+    filters: Array[Filter],
+    partitions: Array[Partition])
+  extends RDD[Row](sc, Nil) {
+
+  /**
+   * Retrieve the list of partitions corresponding to this RDD.
+   */
+  override def getPartitions: Array[Partition] = partitions
+
+  /**
+   * `columns`, but as a String suitable for injection into a SQL query.
+   */
+  private val columnList: String = {
+    val sb = new StringBuilder()
+    columns.foreach(x => sb.append(",").append(x))
+    if (sb.length == 0) "1" else sb.substring(1)
+  }
+
+  /**
+   * Turns a single Filter into a String representing a SQL expression.
+   * Returns null for an unhandled filter.
+   */
+  private def compileFilter(f: Filter): String = f match {
+    case EqualTo(attr, value) => s"$attr = $value"
+    case LessThan(attr, value) => s"$attr < $value"
+    case GreaterThan(attr, value) => s"$attr > $value"
+    case LessThanOrEqual(attr, value) => s"$attr <= $value"
+    case GreaterThanOrEqual(attr, value) => s"$attr >= $value"
+    case _ => null
+  }
+
+  /**
+   * `filters`, but as a WHERE clause suitable for injection into a SQL query.
+   */
+  private val filterWhereClause: String = {
+    val filterStrings = filters map compileFilter filter (_ != null)
+    if (filterStrings.size > 0) {
+      val sb = new StringBuilder("WHERE ")
+      filterStrings.foreach(x => sb.append(x).append(" AND "))
+      sb.substring(0, sb.length - 5)
+    } else ""
+  }
+
+  /**
+   * A WHERE clause representing both `filters`, if any, and the current partition.
+   */
+  private def getWhereClause(part: JDBCPartition): String = {
+    if (part.whereClause != null && filterWhereClause.length > 0) {
+      filterWhereClause + " AND " + part.whereClause
+    } else if (part.whereClause != null) {
+      "WHERE " + part.whereClause
+    } else {
+      filterWhereClause
+    }
+  }
+
+  // Each JDBC-to-Catalyst conversion corresponds to a tag defined here so that
+  // we don't have to potentially poke around in the Metadata once for every
+  // row.  
+  // Is there a better way to do this?  I'd rather be using a type that
+  // contains only the tags I define.
+  abstract class JDBCConversion
+  case object BooleanConversion extends JDBCConversion
+  case object DateConversion extends JDBCConversion
+  case object DecimalConversion extends JDBCConversion
+  case object DoubleConversion extends JDBCConversion
+  case object FloatConversion extends JDBCConversion
+  case object IntegerConversion extends JDBCConversion
+  case object LongConversion extends JDBCConversion
+  case object BinaryLongConversion extends JDBCConversion
+  case object StringConversion extends JDBCConversion
+  case object TimestampConversion extends JDBCConversion
+  case object BinaryConversion extends JDBCConversion
+
+  /**
+   * Maps a StructType to a type tag list.
+   */
+  def getConversions(schema: StructType): Array[JDBCConversion] = {
+    schema.fields.map(sf => sf.dataType match {
+      case BooleanType           => BooleanConversion
+      case DateType              => DateConversion
+      case DecimalType.Unlimited => DecimalConversion
+      case DoubleType            => DoubleConversion
+      case FloatType             => FloatConversion
+      case IntegerType           => IntegerConversion
+      case LongType              =>
+        if (sf.metadata.contains("binarylong")) BinaryLongConversion else LongConversion
+      case StringType            => StringConversion
+      case TimestampType         => TimestampConversion
+      case BinaryType            => BinaryConversion
+      case _                     => throw new IllegalArgumentException(s"Unsupported field $sf")
+    }).toArray
+  }
+
+
+  /**
+   * Runs the SQL query against the JDBC driver.
+   */
+  override def compute(thePart: Partition, context: TaskContext) = new Iterator[Row] {
+    var closed = false
+    var finished = false
+    var gotNext = false
+    var nextValue: Row = null
+
+    context.addTaskCompletionListener{ context => close() }
+    val part = thePart.asInstanceOf[JDBCPartition]
+    val conn = getConnection()
+
+    // H2's JDBC driver does not support the setSchema() method.  We pass a
+    // fully-qualified table name in the SELECT statement.  I don't know how to
+    // talk about a table in a completely portable way.
+
+    val myWhereClause = getWhereClause(part)
+
+    val sqlText = s"SELECT $columnList FROM $fqTable $myWhereClause"
+    val stmt = conn.prepareStatement(sqlText,
+        ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)
+    val rs = stmt.executeQuery()
+
+    val conversions = getConversions(schema)
+    val mutableRow = new SpecificMutableRow(schema.fields.map(x => x.dataType))
+
+    def getNext(): Row = {
+      if (rs.next()) {
+        var i = 0
+        while (i < conversions.length) {
+          val pos = i + 1
+          conversions(i) match {
+            case BooleanConversion    => mutableRow.setBoolean(i, rs.getBoolean(pos))
+            case DateConversion       => mutableRow.update(i, rs.getDate(pos))
+            case DecimalConversion    => mutableRow.update(i, rs.getBigDecimal(pos))
+            case DoubleConversion     => mutableRow.setDouble(i, rs.getDouble(pos))
+            case FloatConversion      => mutableRow.setFloat(i, rs.getFloat(pos))
+            case IntegerConversion    => mutableRow.setInt(i, rs.getInt(pos))
+            case LongConversion       => mutableRow.setLong(i, rs.getLong(pos))
+            case StringConversion     => mutableRow.setString(i, rs.getString(pos))
+            case TimestampConversion  => mutableRow.update(i, rs.getTimestamp(pos))
+            case BinaryConversion     => mutableRow.update(i, rs.getBytes(pos))
+            case BinaryLongConversion => {
+              val bytes = rs.getBytes(pos)
+              var ans = 0L
+              var j = 0
+              while (j < bytes.size) {
+                ans = 256*ans + (255 & bytes(j))
+                j = j + 1;
+              }
+              mutableRow.setLong(i, ans)
+            }
+          }
+          if (rs.wasNull) mutableRow.setNullAt(i)
+          i = i + 1
+        }
+        mutableRow
+      } else {
+        finished = true
+        null.asInstanceOf[Row]
+      }
+    }
+
+    def close() {
+      if (closed) return
+      try {
+        if (null != rs && ! rs.isClosed()) {
+          rs.close()
+        }
+      } catch {
+        case e: Exception => logWarning("Exception closing resultset", e)
+      }
+      try {
+        if (null != stmt && ! stmt.isClosed()) {
+          stmt.close()
+        }
+      } catch {
+        case e: Exception => logWarning("Exception closing statement", e)
+      }
+      try {
+        if (null != conn && ! conn.isClosed()) {
+          conn.close()
+        }
+        logInfo("closed connection")
+      } catch {
+        case e: Exception => logWarning("Exception closing connection", e)
+      }
+    }
+
+    override def hasNext: Boolean = {
+      if (!finished) {
+        if (!gotNext) {
+          nextValue = getNext()
+          if (finished) {
+            close()
+          }
+          gotNext = true
+        }
+      }
+      !finished
+    }
+
+    override def next(): Row = {
+      if (!hasNext) {
+        throw new NoSuchElementException("End of stream")
+      }
+      gotNext = false
+      nextValue
+    }
+
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala
new file mode 100644
index 0000000000000..e09125e406ba2
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import scala.collection.mutable.ArrayBuffer
+import java.sql.DriverManager
+
+import org.apache.spark.Partition
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.sources._
+
+/**
+ * Data corresponding to one partition of a JDBCRDD.
+ */
+private[sql] case class JDBCPartition(whereClause: String, idx: Int) extends Partition {
+  override def index: Int = idx
+}
+
+/**
+ * Instructions on how to partition the table among workers.
+ */
+private[sql] case class JDBCPartitioningInfo(
+    column: String,
+    lowerBound: Long,
+    upperBound: Long,
+    numPartitions: Int)
+
+private[sql] object JDBCRelation {
+  /**
+   * Given a partitioning schematic (a column of integral type, a number of
+   * partitions, and upper and lower bounds on the column's value), generate
+   * WHERE clauses for each partition so that each row in the table appears
+   * exactly once.  The parameters minValue and maxValue are advisory in that
+   * incorrect values may cause the partitioning to be poor, but no data
+   * will fail to be represented.
+   *
+   * @param column - Column name.  Must refer to a column of integral type.
+   * @param numPartitions - Number of partitions
+   * @param minValue - Smallest value of column.  Advisory.
+   * @param maxValue - Largest value of column.  Advisory.
+   */
+  def columnPartition(partitioning: JDBCPartitioningInfo): Array[Partition] = {
+    if (partitioning == null) return Array[Partition](JDBCPartition(null, 0))
+
+    val numPartitions = partitioning.numPartitions
+    val column = partitioning.column
+    if (numPartitions == 1) return Array[Partition](JDBCPartition(null, 0))
+    // Overflow and silliness can happen if you subtract then divide.
+    // Here we get a little roundoff, but that's (hopefully) OK.
+    val stride: Long = (partitioning.upperBound / numPartitions 
+                      - partitioning.lowerBound / numPartitions)
+    var i: Int = 0
+    var currentValue: Long = partitioning.lowerBound
+    var ans = new ArrayBuffer[Partition]()
+    while (i < numPartitions) {
+      val lowerBound = (if (i != 0) s"$column >= $currentValue" else null)
+      currentValue += stride
+      val upperBound = (if (i != numPartitions - 1) s"$column < $currentValue" else null)
+      val whereClause = (if (upperBound == null) lowerBound
+                    else if (lowerBound == null) upperBound
+                    else s"$lowerBound AND $upperBound")
+      ans += JDBCPartition(whereClause, i)
+      i = i + 1
+    }
+    ans.toArray
+  }
+}
+
+private[sql] class DefaultSource extends RelationProvider {
+  /** Returns a new base relation with the given parameters. */
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String]): BaseRelation = {
+    val url = parameters.getOrElse("url", sys.error("Option 'url' not specified"))
+    val driver = parameters.getOrElse("driver", null)
+    val table = parameters.getOrElse("dbtable", sys.error("Option 'dbtable' not specified"))
+    val partitionColumn = parameters.getOrElse("partitionColumn", null)
+    val lowerBound = parameters.getOrElse("lowerBound", null)
+    val upperBound = parameters.getOrElse("upperBound", null)
+    val numPartitions = parameters.getOrElse("numPartitions", null)
+
+    if (driver != null) Class.forName(driver)
+
+    if (   partitionColumn != null
+        && (lowerBound == null || upperBound == null || numPartitions == null)) {
+      sys.error("Partitioning incompletely specified")
+    }
+
+    val partitionInfo = if (partitionColumn == null) {
+      null
+    } else {
+      JDBCPartitioningInfo(partitionColumn,
+                           lowerBound.toLong, upperBound.toLong,
+                           numPartitions.toInt)
+    }
+    val parts = JDBCRelation.columnPartition(partitionInfo)
+    JDBCRelation(url, table, parts)(sqlContext)
+  }
+}
+
+private[sql] case class JDBCRelation(url: String,
+                                     table: String,
+                                     parts: Array[Partition])(
+    @transient val sqlContext: SQLContext)
+  extends PrunedFilteredScan {
+
+  override val schema = JDBCRDD.resolveTable(url, table)
+
+  override def buildScan(requiredColumns: Array[String], filters: Array[Filter]) = {
+    val driver: String = DriverManager.getDriver(url).getClass.getCanonicalName
+    JDBCRDD.scanTable(sqlContext.sparkContext,
+                      schema,
+                      driver, url,
+                      table,
+                      requiredColumns, filters,
+                      parts)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JavaJDBCTrampoline.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JavaJDBCTrampoline.scala
new file mode 100644
index 0000000000000..86bb67ec74256
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JavaJDBCTrampoline.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import org.apache.spark.sql.DataFrame
+
+private[jdbc] class JavaJDBCTrampoline {
+  def createJDBCTable(rdd: DataFrame, url: String, table: String, allowExisting: Boolean) {
+    rdd.createJDBCTable(url, table, allowExisting);
+  }
+
+  def insertIntoJDBC(rdd: DataFrame, url: String, table: String, overwrite: Boolean) {
+    rdd.insertIntoJDBC(url, table, overwrite);
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala
new file mode 100644
index 0000000000000..34a83f0a5dad8
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.sql.{Connection, DriverManager, PreparedStatement}
+import org.apache.spark.{Logging, Partition}
+import org.apache.spark.sql._
+import org.apache.spark.sql.sources.LogicalRelation
+
+import org.apache.spark.sql.jdbc.{JDBCPartitioningInfo, JDBCRelation, JDBCPartition}
+import org.apache.spark.sql.types._
+
+package object jdbc {
+  object JDBCWriteDetails extends Logging {
+    /**
+     * Returns a PreparedStatement that inserts a row into table via conn.
+     */
+    private def insertStatement(conn: Connection, table: String, rddSchema: StructType):
+        PreparedStatement = {
+      val sql = new StringBuilder(s"INSERT INTO $table VALUES (")
+      var fieldsLeft = rddSchema.fields.length
+      while (fieldsLeft > 0) {
+        sql.append("?")
+        if (fieldsLeft > 1) sql.append(", ") else sql.append(")")
+        fieldsLeft = fieldsLeft - 1
+      }
+      conn.prepareStatement(sql.toString)
+    }
+
+    /**
+     * Saves a partition of a DataFrame to the JDBC database.  This is done in
+     * a single database transaction in order to avoid repeatedly inserting
+     * data as much as possible.
+     *
+     * It is still theoretically possible for rows in a DataFrame to be
+     * inserted into the database more than once if a stage somehow fails after
+     * the commit occurs but before the stage can return successfully.
+     *
+     * This is not a closure inside saveTable() because apparently cosmetic
+     * implementation changes elsewhere might easily render such a closure
+     * non-Serializable.  Instead, we explicitly close over all variables that
+     * are used.
+     */
+    private[jdbc] def savePartition(url: String, table: String, iterator: Iterator[Row],
+        rddSchema: StructType, nullTypes: Array[Int]): Iterator[Byte] = {
+      val conn = DriverManager.getConnection(url)
+      var committed = false
+      try {
+        conn.setAutoCommit(false) // Everything in the same db transaction.
+        val stmt = insertStatement(conn, table, rddSchema)
+        try {
+          while (iterator.hasNext) {
+            val row = iterator.next()
+            val numFields = rddSchema.fields.length
+            var i = 0
+            while (i < numFields) {
+              if (row.isNullAt(i)) {
+                stmt.setNull(i + 1, nullTypes(i))
+              } else {
+                rddSchema.fields(i).dataType match {
+                  case IntegerType => stmt.setInt(i + 1, row.getInt(i))
+                  case LongType => stmt.setLong(i + 1, row.getLong(i))
+                  case DoubleType => stmt.setDouble(i + 1, row.getDouble(i))
+                  case FloatType => stmt.setFloat(i + 1, row.getFloat(i))
+                  case ShortType => stmt.setInt(i + 1, row.getShort(i))
+                  case ByteType => stmt.setInt(i + 1, row.getByte(i))
+                  case BooleanType => stmt.setBoolean(i + 1, row.getBoolean(i))
+                  case StringType => stmt.setString(i + 1, row.getString(i))
+                  case BinaryType => stmt.setBytes(i + 1, row.getAs[Array[Byte]](i))
+                  case TimestampType => stmt.setTimestamp(i + 1, row.getAs[java.sql.Timestamp](i))
+                  case DateType => stmt.setDate(i + 1, row.getAs[java.sql.Date](i))
+                  case DecimalType.Unlimited => stmt.setBigDecimal(i + 1,
+                      row.getAs[java.math.BigDecimal](i))
+                  case _ => throw new IllegalArgumentException(
+                      s"Can't translate non-null value for field $i")
+                }
+              }
+              i = i + 1
+            }
+            stmt.executeUpdate()
+          }
+        } finally {
+          stmt.close()
+        }
+        conn.commit()
+        committed = true
+      } finally {
+        if (!committed) {
+          // The stage must fail.  We got here through an exception path, so
+          // let the exception through unless rollback() or close() want to
+          // tell the user about another problem.
+          conn.rollback()
+          conn.close()
+        } else {
+          // The stage must succeed.  We cannot propagate any exception close() might throw.
+          try {
+            conn.close()
+          } catch {
+            case e: Exception => logWarning("Transaction succeeded, but closing failed", e)
+          }
+        }
+      }
+      Array[Byte]().iterator
+    }
+  }
+
+  /**
+   * Make it so that you can call createJDBCTable and insertIntoJDBC on a DataFrame.
+   */
+  implicit class JDBCDataFrame(rdd: DataFrame) {
+    /**
+     * Compute the schema string for this RDD.
+     */
+    private def schemaString(url: String): String = {
+      val sb = new StringBuilder()
+      val quirks = DriverQuirks.get(url)
+      rdd.schema.fields foreach { field => {
+        val name = field.name
+        var typ: String = quirks.getJDBCType(field.dataType)._1
+        if (typ == null) typ = field.dataType match {
+          case IntegerType => "INTEGER"
+          case LongType => "BIGINT"
+          case DoubleType => "DOUBLE PRECISION"
+          case FloatType => "REAL"
+          case ShortType => "INTEGER"
+          case ByteType => "BYTE"
+          case BooleanType => "BIT(1)"
+          case StringType => "TEXT"
+          case BinaryType => "BLOB"
+          case TimestampType => "TIMESTAMP"
+          case DateType => "DATE"
+          case DecimalType.Unlimited => "DECIMAL(40,20)"
+          case _ => throw new IllegalArgumentException(s"Don't know how to save $field to JDBC")
+        }
+        val nullable = if (field.nullable) "" else "NOT NULL"
+        sb.append(s", $name $typ $nullable")
+      }}
+      if (sb.length < 2) "" else sb.substring(2)
+    }
+
+    /**
+     * Saves the RDD to the database in a single transaction.
+     */
+    private def saveTable(url: String, table: String) {
+      val quirks = DriverQuirks.get(url)
+      var nullTypes: Array[Int] = rdd.schema.fields.map(field => {
+        var nullType: Option[Int] = quirks.getJDBCType(field.dataType)._2
+        if (nullType.isEmpty) {
+          field.dataType match {
+            case IntegerType => java.sql.Types.INTEGER
+            case LongType => java.sql.Types.BIGINT
+            case DoubleType => java.sql.Types.DOUBLE
+            case FloatType => java.sql.Types.REAL
+            case ShortType => java.sql.Types.INTEGER
+            case ByteType => java.sql.Types.INTEGER
+            case BooleanType => java.sql.Types.BIT
+            case StringType => java.sql.Types.CLOB
+            case BinaryType => java.sql.Types.BLOB
+            case TimestampType => java.sql.Types.TIMESTAMP
+            case DateType => java.sql.Types.DATE
+            case DecimalType.Unlimited => java.sql.Types.DECIMAL
+            case _ => throw new IllegalArgumentException(
+                s"Can't translate null value for field $field")
+          }
+        } else nullType.get
+      }).toArray
+
+      val rddSchema = rdd.schema
+      rdd.mapPartitions(iterator => JDBCWriteDetails.savePartition(
+          url, table, iterator, rddSchema, nullTypes)).collect()
+    }
+
+    /**
+     * Save this RDD to a JDBC database at `url` under the table name `table`.
+     * This will run a `CREATE TABLE` and a bunch of `INSERT INTO` statements.
+     * If you pass `true` for `allowExisting`, it will drop any table with the
+     * given name; if you pass `false`, it will throw if the table already
+     * exists.
+     */
+    def createJDBCTable(url: String, table: String, allowExisting: Boolean) {
+      val conn = DriverManager.getConnection(url)
+      try {
+        if (allowExisting) {
+          val sql = s"DROP TABLE IF EXISTS $table"
+          conn.prepareStatement(sql).executeUpdate()
+        }
+        val schema = schemaString(url)
+        val sql = s"CREATE TABLE $table ($schema)"
+        conn.prepareStatement(sql).executeUpdate()
+      } finally {
+        conn.close()
+      }
+      saveTable(url, table)
+    }
+
+    /**
+     * Save this RDD to a JDBC database at `url` under the table name `table`.
+     * Assumes the table already exists and has a compatible schema.  If you
+     * pass `true` for `overwrite`, it will `TRUNCATE` the table before
+     * performing the `INSERT`s.
+     *
+     * The table must already exist on the database.  It must have a schema
+     * that is compatible with the schema of this RDD; inserting the rows of
+     * the RDD in order via the simple statement
+     * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail.
+     */
+    def insertIntoJDBC(url: String, table: String, overwrite: Boolean) {
+      if (overwrite) {
+        val conn = DriverManager.getConnection(url)
+        try {
+          val sql = s"TRUNCATE TABLE $table"
+          conn.prepareStatement(sql).executeUpdate()
+        } finally {
+          conn.close()
+        }
+      }
+      saveTable(url, table)
+    }
+  } // implicit class JDBCDataFrame
+} // package object jdbc
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala
new file mode 100644
index 0000000000000..8372decbf8aa1
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.json
+
+import java.io.IOException
+
+import org.apache.hadoop.fs.Path
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.sql.sources._
+import org.apache.spark.sql.types.StructType
+
+
+private[sql] class DefaultSource
+  extends RelationProvider with SchemaRelationProvider with CreateableRelationProvider {
+
+  /** Returns a new base relation with the parameters. */
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String]): BaseRelation = {
+    val path = parameters.getOrElse("path", sys.error("Option 'path' not specified"))
+    val samplingRatio = parameters.get("samplingRatio").map(_.toDouble).getOrElse(1.0)
+
+    JSONRelation(path, samplingRatio, None)(sqlContext)
+  }
+
+  /** Returns a new base relation with the given schema and parameters. */
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String],
+      schema: StructType): BaseRelation = {
+    val path = parameters.getOrElse("path", sys.error("Option 'path' not specified"))
+    val samplingRatio = parameters.get("samplingRatio").map(_.toDouble).getOrElse(1.0)
+
+    JSONRelation(path, samplingRatio, Some(schema))(sqlContext)
+  }
+
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String],
+      data: DataFrame): BaseRelation = {
+    val path = parameters.getOrElse("path", sys.error("Option 'path' not specified"))
+    val filesystemPath = new Path(path)
+    val fs = filesystemPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
+    if (fs.exists(filesystemPath)) {
+      sys.error(s"path $path already exists.")
+    }
+    data.toJSON.saveAsTextFile(path)
+
+    createRelation(sqlContext, parameters, data.schema)
+  }
+}
+
+private[sql] case class JSONRelation(
+    path: String,
+    samplingRatio: Double,
+    userSpecifiedSchema: Option[StructType])(
+    @transient val sqlContext: SQLContext)
+  extends TableScan with InsertableRelation {
+
+  // TODO: Support partitioned JSON relation.
+  private def baseRDD = sqlContext.sparkContext.textFile(path)
+
+  override val schema = userSpecifiedSchema.getOrElse(
+    JsonRDD.nullTypeToStringType(
+      JsonRDD.inferSchema(
+        baseRDD,
+        samplingRatio,
+        sqlContext.conf.columnNameOfCorruptRecord)))
+
+  override def buildScan() =
+    JsonRDD.jsonStringToRow(baseRDD, schema, sqlContext.conf.columnNameOfCorruptRecord)
+
+  override def insert(data: DataFrame, overwrite: Boolean) = {
+    val filesystemPath = new Path(path)
+    val fs = filesystemPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
+
+    if (overwrite) {
+      try {
+        fs.delete(filesystemPath, true)
+      } catch {
+        case e: IOException =>
+          throw new IOException(
+            s"Unable to clear output directory ${filesystemPath.toString} prior"
+              + s" to INSERT OVERWRITE a JSON table:\n${e.toString}")
+      }
+      data.toJSON.saveAsTextFile(path)
+    } else {
+      // TODO: Support INSERT INTO
+      sys.error("JSON table only support INSERT OVERWRITE for now.")
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
index 61ee960aad9d2..33ce71b51b213 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
@@ -17,19 +17,21 @@
 
 package org.apache.spark.sql.json
 
+import java.io.StringWriter
+import java.sql.{Date, Timestamp}
+
 import scala.collection.Map
 import scala.collection.convert.Wrappers.{JMapWrapper, JListWrapper}
-import scala.math.BigDecimal
-import java.sql.Timestamp
 
 import com.fasterxml.jackson.core.JsonProcessingException
+import com.fasterxml.jackson.core.JsonFactory
 import com.fasterxml.jackson.databind.ObjectMapper
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.types._
 import org.apache.spark.Logging
 
 private[sql] object JsonRDD extends Logging {
@@ -71,16 +73,18 @@ private[sql] object JsonRDD extends Logging {
 
     def makeStruct(values: Seq[Seq[String]], prefix: Seq[String]): StructType = {
       val (topLevel, structLike) = values.partition(_.size == 1)
+
       val topLevelFields = topLevel.filter {
         name => resolved.get(prefix ++ name).get match {
           case ArrayType(elementType, _) => {
             def hasInnerStruct(t: DataType): Boolean = t match {
-              case s: StructType => false
+              case s: StructType => true
               case ArrayType(t1, _) => hasInnerStruct(t1)
-              case o => true
+              case o => false
             }
 
-            hasInnerStruct(elementType)
+            // Check if this array has inner struct.
+            !hasInnerStruct(elementType)
           }
           case struct: StructType => false
           case _ => true
@@ -88,8 +92,11 @@ private[sql] object JsonRDD extends Logging {
       }.map {
         a => StructField(a.head, resolved.get(prefix ++ a).get, nullable = true)
       }
+      val topLevelFieldNameSet = topLevelFields.map(_.name)
 
-      val structFields: Seq[StructField] = structLike.groupBy(_(0)).map {
+      val structFields: Seq[StructField] = structLike.groupBy(_(0)).filter {
+        case (name, _) => !topLevelFieldNameSet.contains(name)
+      }.map {
         case (name, fields) => {
           val nestedFields = fields.map(_.tail)
           val structType = makeStruct(nestedFields, prefix :+ name)
@@ -117,10 +124,7 @@ private[sql] object JsonRDD extends Logging {
         }
       }.flatMap(field => field).toSeq
 
-      StructType(
-        (topLevelFields ++ structFields).sortBy {
-        case StructField(name, _, _) => name
-      })
+      StructType((topLevelFields ++ structFields).sortBy(_.name))
     }
 
     makeStruct(resolved.keySet.toSeq, Nil)
@@ -128,7 +132,7 @@ private[sql] object JsonRDD extends Logging {
 
   private[sql] def nullTypeToStringType(struct: StructType): StructType = {
     val fields = struct.fields.map {
-      case StructField(fieldName, dataType, nullable) => {
+      case StructField(fieldName, dataType, nullable, _) => {
         val newType = dataType match {
           case NullType => StringType
           case ArrayType(NullType, containsNull) => ArrayType(StringType, containsNull)
@@ -163,9 +167,7 @@ private[sql] object JsonRDD extends Logging {
                 StructField(name, dataType, true)
               }
             }
-            StructType(newFields.toSeq.sortBy {
-              case StructField(name, _, _) => name
-            })
+            StructType(newFields.toSeq.sortBy(_.name))
           }
           case (ArrayType(elementType1, containsNull1), ArrayType(elementType2, containsNull2)) =>
             ArrayType(compatibleType(elementType1, elementType2), containsNull1 || containsNull2)
@@ -180,9 +182,9 @@ private[sql] object JsonRDD extends Logging {
     ScalaReflection.typeOfObject orElse {
       // Since we do not have a data type backed by BigInteger,
       // when we see a Java BigInteger, we use DecimalType.
-      case value: java.math.BigInteger => DecimalType
+      case value: java.math.BigInteger => DecimalType.Unlimited
       // DecimalType's JVMType is scala BigDecimal.
-      case value: java.math.BigDecimal => DecimalType
+      case value: java.math.BigDecimal => DecimalType.Unlimited
       // Unexpected data type.
       case _ => StringType
     }
@@ -240,16 +242,16 @@ private[sql] object JsonRDD extends Logging {
         // The value associated with the key is an array.
         // Handle inner structs of an array.
         def buildKeyPathForInnerStructs(v: Any, t: DataType): Seq[(String, DataType)] = t match {
-          case ArrayType(StructType(Nil), containsNull) => {
+          case ArrayType(e: StructType, containsNull) => {
             // The elements of this arrays are structs.
-            v.asInstanceOf[Seq[Map[String, Any]]].flatMap {
+            v.asInstanceOf[Seq[Map[String, Any]]].flatMap(Option(_)).flatMap {
               element => allKeysWithValueTypes(element)
             }.map {
               case (k, t) => (s"$key.$k", t)
             }
           }
           case ArrayType(t1, containsNull) =>
-            v.asInstanceOf[Seq[Any]].flatMap {
+            v.asInstanceOf[Seq[Any]].flatMap(Option(_)).flatMap {
               element => buildKeyPathForInnerStructs(element, t1)
             }
           case other => Nil
@@ -257,6 +259,8 @@ private[sql] object JsonRDD extends Logging {
         val elementType = typeOfArray(array)
         buildKeyPathForInnerStructs(array, elementType) :+ (key, elementType)
       }
+      // we couldn't tell what the type is if the value is null or empty string
+      case (key: String, value) if value == "" || value == null => (key, NullType) :: Nil
       case (key: String, value) => (key, typeOfPrimitiveValue(value)) :: Nil
     }
   }
@@ -324,13 +328,13 @@ private[sql] object JsonRDD extends Logging {
     }
   }
 
-  private def toDecimal(value: Any): BigDecimal = {
+  private def toDecimal(value: Any): Decimal = {
     value match {
-      case value: java.lang.Integer => BigDecimal(value)
-      case value: java.lang.Long => BigDecimal(value)
-      case value: java.math.BigInteger => BigDecimal(value)
-      case value: java.lang.Double => BigDecimal(value)
-      case value: java.math.BigDecimal => BigDecimal(value)
+      case value: java.lang.Integer => Decimal(value)
+      case value: java.lang.Long => Decimal(value)
+      case value: java.math.BigInteger => Decimal(new java.math.BigDecimal(value))
+      case value: java.lang.Double => Decimal(value)
+      case value: java.math.BigDecimal => Decimal(value)
     }
   }
 
@@ -357,7 +361,8 @@ private[sql] object JsonRDD extends Logging {
       case (key, value) =>
         if (count > 0) builder.append(",")
         count += 1
-        builder.append(s"""\"${key}\":${toString(value)}""")
+        val stringValue = if (value.isInstanceOf[String]) s"""\"$value\"""" else toString(value)
+        builder.append(s"""\"${key}\":${stringValue}""")
     }
     builder.append("}")
 
@@ -372,13 +377,22 @@ private[sql] object JsonRDD extends Logging {
     }
   }
 
+  private def toDate(value: Any): Int = {
+    value match {
+      // only support string as date
+      case value: java.lang.String =>
+        DateUtils.millisToDays(DataTypeConversions.stringToTime(value).getTime)
+      case value: java.sql.Date => DateUtils.fromJavaDate(value)
+    }
+  }
+
   private def toTimestamp(value: Any): Timestamp = {
     value match {
-        case value: java.lang.Integer => new Timestamp(value.asInstanceOf[Int].toLong)
-        case value: java.lang.Long => new Timestamp(value)
-        case value: java.lang.String => Timestamp.valueOf(value)
-      }
-    }  
+      case value: java.lang.Integer => new Timestamp(value.asInstanceOf[Int].toLong)
+      case value: java.lang.Long => new Timestamp(value)
+      case value: java.lang.String => toTimestamp(DataTypeConversions.stringToTime(value).getTime)
+    }
+  }
 
   private[json] def enforceCorrectType(value: Any, desiredType: DataType): Any ={
     if (value == null) {
@@ -386,16 +400,17 @@ private[sql] object JsonRDD extends Logging {
     } else {
       desiredType match {
         case StringType => toString(value)
+        case _ if value == null || value == "" => null // guard the non string type
         case IntegerType => value.asInstanceOf[IntegerType.JvmType]
         case LongType => toLong(value)
         case DoubleType => toDouble(value)
-        case DecimalType => toDecimal(value)
+        case DecimalType() => toDecimal(value)
         case BooleanType => value.asInstanceOf[BooleanType.JvmType]
         case NullType => null
-
         case ArrayType(elementType, _) =>
           value.asInstanceOf[Seq[Any]].map(enforceCorrectType(_, elementType))
         case struct: StructType => asRow(value.asInstanceOf[Map[String, Any]], struct)
+        case DateType => toDate(value)
         case TimestampType => toTimestamp(value)
       }
     }
@@ -405,11 +420,67 @@ private[sql] object JsonRDD extends Logging {
     // TODO: Reuse the row instead of creating a new one for every record.
     val row = new GenericMutableRow(schema.fields.length)
     schema.fields.zipWithIndex.foreach {
-      case (StructField(name, dataType, _), i) =>
+      case (StructField(name, dataType, _, _), i) =>
         row.update(i, json.get(name).flatMap(v => Option(v)).map(
           enforceCorrectType(_, dataType)).orNull)
     }
 
     row
   }
+
+  /** Transforms a single Row to JSON using Jackson
+    *
+    * @param jsonFactory a JsonFactory object to construct a JsonGenerator
+    * @param rowSchema the schema object used for conversion
+    * @param row The row to convert
+    */
+  private[sql] def rowToJSON(rowSchema: StructType, jsonFactory: JsonFactory)(row: Row): String = {
+    val writer = new StringWriter()
+    val gen = jsonFactory.createGenerator(writer)
+
+    def valWriter: (DataType, Any) => Unit = {
+      case (_, null) | (NullType, _)  => gen.writeNull()
+      case (StringType, v: String) => gen.writeString(v)
+      case (TimestampType, v: java.sql.Timestamp) => gen.writeString(v.toString)
+      case (IntegerType, v: Int) => gen.writeNumber(v)
+      case (ShortType, v: Short) => gen.writeNumber(v)
+      case (FloatType, v: Float) => gen.writeNumber(v)
+      case (DoubleType, v: Double) => gen.writeNumber(v)
+      case (LongType, v: Long) => gen.writeNumber(v)
+      case (DecimalType(), v: java.math.BigDecimal) => gen.writeNumber(v)
+      case (ByteType, v: Byte) => gen.writeNumber(v.toInt)
+      case (BinaryType, v: Array[Byte]) => gen.writeBinary(v)
+      case (BooleanType, v: Boolean) => gen.writeBoolean(v)
+      case (DateType, v) => gen.writeString(v.toString)
+      case (udt: UserDefinedType[_], v) => valWriter(udt.sqlType, v)
+
+      case (ArrayType(ty, _), v: Seq[_] ) =>
+        gen.writeStartArray()
+        v.foreach(valWriter(ty,_))
+        gen.writeEndArray()
+
+      case (MapType(kv,vv, _), v: Map[_,_]) =>
+        gen.writeStartObject()
+        v.foreach { p =>
+          gen.writeFieldName(p._1.toString)
+          valWriter(vv,p._2)
+        }
+        gen.writeEndObject()
+
+      case (StructType(ty), v: Row) =>
+        gen.writeStartObject()
+        ty.zip(v.toSeq).foreach {
+          case (_, null) =>
+          case (field, v) =>
+            gen.writeFieldName(field.name)
+            valWriter(field.dataType, v)
+        }
+        gen.writeEndObject()
+    }
+
+    valWriter(rowSchema, row)
+    gen.close()
+    writer.toString
+  }
+
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
index e98d151286818..02e5b015e8ec2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
@@ -18,6 +18,7 @@
 package org.apache.spark
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.execution.SparkPlan
 
 /**
  * Allows the execution of relational queries, including those expressed in SQL using Spark.
@@ -33,385 +34,14 @@ import org.apache.spark.annotation.DeveloperApi
 package object sql {
 
   /**
-   * :: DeveloperApi ::
-   *
-   * Represents one row of output from a relational operator.
-   * @group row
+   * Converts a logical plan into zero or more SparkPlans.
    */
   @DeveloperApi
-  type Row = catalyst.expressions.Row
+  protected[sql] type Strategy = org.apache.spark.sql.catalyst.planning.GenericStrategy[SparkPlan]
 
   /**
-   * :: DeveloperApi ::
-   *
-   * A [[Row]] object can be constructed by providing field values. Example:
-   * {{{
-   * import org.apache.spark.sql._
-   *
-   * // Create a Row from values.
-   * Row(value1, value2, value3, ...)
-   * // Create a Row from a Seq of values.
-   * Row.fromSeq(Seq(value1, value2, ...))
-   * }}}
-   *
-   * A value of a row can be accessed through both generic access by ordinal,
-   * which will incur boxing overhead for primitives, as well as native primitive access.
-   * An example of generic access by ordinal:
-   * {{{
-   * import org.apache.spark.sql._
-   *
-   * val row = Row(1, true, "a string", null)
-   * // row: Row = [1,true,a string,null]
-   * val firstValue = row(0)
-   * // firstValue: Any = 1
-   * val fourthValue = row(3)
-   * // fourthValue: Any = null
-   * }}}
-   *
-   * For native primitive access, it is invalid to use the native primitive interface to retrieve
-   * a value that is null, instead a user must check `isNullAt` before attempting to retrieve a
-   * value that might be null.
-   * An example of native primitive access:
-   * {{{
-   * // using the row from the previous example.
-   * val firstValue = row.getInt(0)
-   * // firstValue: Int = 1
-   * val isNull = row.isNullAt(3)
-   * // isNull: Boolean = true
-   * }}}
-   *
-   * Interfaces related to native primitive access are:
-   *
-   * `isNullAt(i: Int): Boolean`
-   *
-   * `getInt(i: Int): Int`
-   *
-   * `getLong(i: Int): Long`
-   *
-   * `getDouble(i: Int): Double`
-   *
-   * `getFloat(i: Int): Float`
-   *
-   * `getBoolean(i: Int): Boolean`
-   *
-   * `getShort(i: Int): Short`
-   *
-   * `getByte(i: Int): Byte`
-   *
-   * `getString(i: Int): String`
-   *
-   * Fields in a [[Row]] object can be extracted in a pattern match. Example:
-   * {{{
-   * import org.apache.spark.sql._
-   *
-   * val pairs = sql("SELECT key, value FROM src").rdd.map {
-   *   case Row(key: Int, value: String) =>
-   *     key -> value
-   * }
-   * }}}
-   *
-   * @group row
+   * Type alias for [[DataFrame]]. Kept here for backward source compatibility for Scala.
    */
-  @DeveloperApi
-  val Row = catalyst.expressions.Row
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * The base type of all Spark SQL data types.
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  type DataType = catalyst.types.DataType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * The data type representing `String` values
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  val StringType = catalyst.types.StringType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * The data type representing `Array[Byte]` values.
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  val BinaryType = catalyst.types.BinaryType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * The data type representing `Boolean` values.
-   *
-   *@group dataType
-   */
-  @DeveloperApi
-  val BooleanType = catalyst.types.BooleanType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * The data type representing `java.sql.Timestamp` values.
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  val TimestampType = catalyst.types.TimestampType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * The data type representing `java.sql.Date` values.
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  val DateType = catalyst.types.DateType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * The data type representing `scala.math.BigDecimal` values.
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  val DecimalType = catalyst.types.DecimalType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * The data type representing `Double` values.
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  val DoubleType = catalyst.types.DoubleType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * The data type representing `Float` values.
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  val FloatType = catalyst.types.FloatType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * The data type representing `Byte` values.
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  val ByteType = catalyst.types.ByteType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * The data type representing `Int` values.
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  val IntegerType = catalyst.types.IntegerType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * The data type representing `Long` values.
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  val LongType = catalyst.types.LongType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * The data type representing `Short` values.
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  val ShortType = catalyst.types.ShortType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * The data type for collections of multiple values.
-   * Internally these are represented as columns that contain a ``scala.collection.Seq``.
-   *
-   * An [[ArrayType]] object comprises two fields, `elementType: [[DataType]]` and
-   * `containsNull: Boolean`. The field of `elementType` is used to specify the type of
-   * array elements. The field of `containsNull` is used to specify if the array has `null` values.
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  type ArrayType = catalyst.types.ArrayType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * An [[ArrayType]] object can be constructed with two ways,
-   * {{{
-   * ArrayType(elementType: DataType, containsNull: Boolean)
-   * }}} and
-   * {{{
-   * ArrayType(elementType: DataType)
-   * }}}
-   * For `ArrayType(elementType)`, the field of `containsNull` is set to `false`.
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  val ArrayType = catalyst.types.ArrayType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * The data type representing `Map`s. A [[MapType]] object comprises three fields,
-   * `keyType: [[DataType]]`, `valueType: [[DataType]]` and `valueContainsNull: Boolean`.
-   * The field of `keyType` is used to specify the type of keys in the map.
-   * The field of `valueType` is used to specify the type of values in the map.
-   * The field of `valueContainsNull` is used to specify if values of this map has `null` values.
-   * For values of a MapType column, keys are not allowed to have `null` values.
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  type MapType = catalyst.types.MapType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * A [[MapType]] object can be constructed with two ways,
-   * {{{
-   * MapType(keyType: DataType, valueType: DataType, valueContainsNull: Boolean)
-   * }}} and
-   * {{{
-   * MapType(keyType: DataType, valueType: DataType)
-   * }}}
-   * For `MapType(keyType: DataType, valueType: DataType)`,
-   * the field of `valueContainsNull` is set to `true`.
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  val MapType = catalyst.types.MapType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * The data type representing [[Row]]s.
-   * A [[StructType]] object comprises a [[Seq]] of [[StructField]]s.
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  type StructType = catalyst.types.StructType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * A [[StructType]] object can be constructed by
-   * {{{
-   * StructType(fields: Seq[StructField])
-   * }}}
-   * For a [[StructType]] object, one or multiple [[StructField]]s can be extracted by names.
-   * If multiple [[StructField]]s are extracted, a [[StructType]] object will be returned.
-   * If a provided name does not have a matching field, it will be ignored. For the case
-   * of extracting a single StructField, a `null` will be returned.
-   * Example:
-   * {{{
-   * import org.apache.spark.sql._
-   *
-   * val struct =
-   *   StructType(
-   *     StructField("a", IntegerType, true) ::
-   *     StructField("b", LongType, false) ::
-   *     StructField("c", BooleanType, false) :: Nil)
-   *
-   * // Extract a single StructField.
-   * val singleField = struct("b")
-   * // singleField: StructField = StructField(b,LongType,false)
-   *
-   * // This struct does not have a field called "d". null will be returned.
-   * val nonExisting = struct("d")
-   * // nonExisting: StructField = null
-   *
-   * // Extract multiple StructFields. Field names are provided in a set.
-   * // A StructType object will be returned.
-   * val twoFields = struct(Set("b", "c"))
-   * // twoFields: StructType =
-   * //   StructType(List(StructField(b,LongType,false), StructField(c,BooleanType,false)))
-   *
-   * // Those names do not have matching fields will be ignored.
-   * // For the case shown below, "d" will be ignored and
-   * // it is treated as struct(Set("b", "c")).
-   * val ignoreNonExisting = struct(Set("b", "c", "d"))
-   * // ignoreNonExisting: StructType =
-   * //   StructType(List(StructField(b,LongType,false), StructField(c,BooleanType,false)))
-   * }}}
-   *
-   * A [[Row]] object is used as a value of the StructType.
-   * Example:
-   * {{{
-   * import org.apache.spark.sql._
-   *
-   * val innerStruct =
-   *   StructType(
-   *     StructField("f1", IntegerType, true) ::
-   *     StructField("f2", LongType, false) ::
-   *     StructField("f3", BooleanType, false) :: Nil)
-   *
-   * val struct = StructType(
-   *   StructField("a", innerStruct, true) :: Nil)
-   *
-   * // Create a Row with the schema defined by struct
-   * val row = Row(Row(1, 2, true))
-   * // row: Row = [[1,2,true]]
-   * }}}
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  val StructType = catalyst.types.StructType
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * A [[StructField]] object represents a field in a [[StructType]] object.
-   * A [[StructField]] object comprises three fields, `name: [[String]]`, `dataType: [[DataType]]`,
-   * and `nullable: Boolean`. The field of `name` is the name of a `StructField`. The field of
-   * `dataType` specifies the data type of a `StructField`.
-   * The field of `nullable` specifies if values of a `StructField` can contain `null` values.
-   *
-   * @group field
-   */
-  @DeveloperApi
-  type StructField = catalyst.types.StructField
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * A [[StructField]] object can be constructed by
-   * {{{
-   * StructField(name: String, dataType: DataType, nullable: Boolean)
-   * }}}
-   *
-   * @group dataType
-   */
-  @DeveloperApi
-  val StructField = catalyst.types.StructField
+  @deprecated("1.3.0", "use DataFrame")
+  type SchemaRDD = DataFrame
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala
index 2fc7e1cf23ab7..7d62f3728f036 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala
@@ -17,14 +17,20 @@
 
 package org.apache.spark.sql.parquet
 
+import java.sql.Timestamp
+import java.util.{TimeZone, Calendar}
+
 import scala.collection.mutable.{Buffer, ArrayBuffer, HashMap}
 
+import jodd.datetime.JDateTime
+import parquet.column.Dictionary
 import parquet.io.api.{PrimitiveConverter, GroupConverter, Binary, Converter}
 import parquet.schema.MessageType
 
-import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.parquet.CatalystConverter.FieldType
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.parquet.timestamp.NanoTime
 
 /**
  * Collection of converters of Parquet types (group and primitive types) that
@@ -60,13 +66,18 @@ private[sql] object CatalystConverter {
   // Using a different value will result in Parquet silently dropping columns.
   val ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME = "bag"
   val ARRAY_ELEMENTS_SCHEMA_NAME = "array"
+  // SPARK-4520: Thrift generated parquet files have different array element
+  // schema names than avro. Thrift parquet uses array_schema_name + "_tuple"
+  // as opposed to "array" used by default. For more information, check
+  // TestThriftSchemaConverter.java in parquet.thrift.
+  val THRIFT_ARRAY_ELEMENTS_SCHEMA_NAME_SUFFIX = "_tuple"
   val MAP_KEY_SCHEMA_NAME = "key"
   val MAP_VALUE_SCHEMA_NAME = "value"
   val MAP_SCHEMA_NAME = "map"
 
   // TODO: consider using Array[T] for arrays to avoid boxing of primitive types
   type ArrayScalaType[T] = Seq[T]
-  type StructScalaType[T] = Seq[T]
+  type StructScalaType[T] = Row
   type MapScalaType[K, V] = Map[K, V]
 
   protected[parquet] def createConverter(
@@ -75,6 +86,9 @@ private[sql] object CatalystConverter {
       parent: CatalystConverter): Converter = {
     val fieldType: DataType = field.dataType
     fieldType match {
+      case udt: UserDefinedType[_] => {
+        createConverter(field.copy(dataType = udt.sqlType), fieldIndex, parent)
+      }
       // For native JVM types we use a converter with native arrays
       case ArrayType(elementType: NativeType, false) => {
         new CatalystNativeArrayConverter(elementType, fieldIndex, parent)
@@ -86,8 +100,8 @@ private[sql] object CatalystConverter {
       case ArrayType(elementType: DataType, true) => {
         new CatalystArrayContainsNullConverter(elementType, fieldIndex, parent)
       }
-      case StructType(fields: Seq[StructField]) => {
-        new CatalystStructConverter(fields.toArray, fieldIndex, parent)
+      case StructType(fields: Array[StructField]) => {
+        new CatalystStructConverter(fields, fieldIndex, parent)
       }
       case MapType(keyType: DataType, valueType: DataType, valueContainsNull: Boolean) => {
         new CatalystMapConverter(
@@ -99,12 +113,8 @@ private[sql] object CatalystConverter {
       }
       // Strings, Shorts and Bytes do not have a corresponding type in Parquet
       // so we need to treat them separately
-      case StringType => {
-        new CatalystPrimitiveConverter(parent, fieldIndex) {
-          override def addBinary(value: Binary): Unit =
-            parent.updateString(fieldIndex, value)
-        }
-      }
+      case StringType =>
+        new CatalystPrimitiveStringConverter(parent, fieldIndex)
       case ShortType => {
         new CatalystPrimitiveConverter(parent, fieldIndex) {
           override def addInt(value: Int): Unit =
@@ -117,6 +127,18 @@ private[sql] object CatalystConverter {
             parent.updateByte(fieldIndex, value.asInstanceOf[ByteType.JvmType])
         }
       }
+      case d: DecimalType => {
+        new CatalystPrimitiveConverter(parent, fieldIndex) {
+          override def addBinary(value: Binary): Unit =
+            parent.updateDecimal(fieldIndex, value, d)
+        }
+      }
+      case TimestampType => {
+        new CatalystPrimitiveConverter(parent, fieldIndex) {
+          override def addBinary(value: Binary): Unit =
+            parent.updateTimestamp(fieldIndex, value)
+        }
+      }
       // All other primitive types use the default converter
       case ctype: PrimitiveType => { // note: need the type tag here!
         new CatalystPrimitiveConverter(parent, fieldIndex)
@@ -188,8 +210,14 @@ private[parquet] abstract class CatalystConverter extends GroupConverter {
   protected[parquet] def updateBinary(fieldIndex: Int, value: Binary): Unit =
     updateField(fieldIndex, value.getBytes)
 
-  protected[parquet] def updateString(fieldIndex: Int, value: Binary): Unit =
-    updateField(fieldIndex, value.toStringUsingUTF8)
+  protected[parquet] def updateString(fieldIndex: Int, value: String): Unit =
+    updateField(fieldIndex, value)
+
+  protected[parquet] def updateTimestamp(fieldIndex: Int, value: Binary): Unit =
+    updateField(fieldIndex, readTimestamp(value))
+
+  protected[parquet] def updateDecimal(fieldIndex: Int, value: Binary, ctype: DecimalType): Unit =
+    updateField(fieldIndex, readDecimal(new Decimal(), value, ctype))
 
   protected[parquet] def isRootConverter: Boolean = parent == null
 
@@ -201,6 +229,34 @@ private[parquet] abstract class CatalystConverter extends GroupConverter {
    * @return
    */
   def getCurrentRecord: Row = throw new UnsupportedOperationException
+
+  /**
+   * Read a decimal value from a Parquet Binary into "dest". Only supports decimals that fit in
+   * a long (i.e. precision <= 18)
+   */
+  protected[parquet] def readDecimal(dest: Decimal, value: Binary, ctype: DecimalType): Unit = {
+    val precision = ctype.precisionInfo.get.precision
+    val scale = ctype.precisionInfo.get.scale
+    val bytes = value.getBytes
+    require(bytes.length <= 16, "Decimal field too large to read")
+    var unscaled = 0L
+    var i = 0
+    while (i < bytes.length) {
+      unscaled = (unscaled << 8) | (bytes(i) & 0xFF)
+      i += 1
+    }
+    // Make sure unscaled has the right sign, by sign-extending the first bit
+    val numBits = 8 * bytes.length
+    unscaled = (unscaled << (64 - numBits)) >> (64 - numBits)
+    dest.set(unscaled, precision, scale)
+  }
+
+  /**
+   * Read a Timestamp value from a Parquet Int96Value
+   */
+  protected[parquet] def readTimestamp(value: Binary): Timestamp = {
+    CatalystTimestampConverter.convertToTimestamp(value)
+  }
 }
 
 /**
@@ -222,8 +278,8 @@ private[parquet] class CatalystGroupConverter(
       schema,
       index,
       parent,
-      current=null,
-      buffer=new ArrayBuffer[Row](
+      current = null,
+      buffer = new ArrayBuffer[Row](
         CatalystArrayConverter.INITIAL_ARRAY_SIZE))
 
   /**
@@ -268,7 +324,7 @@ private[parquet] class CatalystGroupConverter(
 
   override def end(): Unit = {
     if (!isRootConverter) {
-      assert(current!=null) // there should be no empty groups
+      assert(current != null) // there should be no empty groups
       buffer.append(new GenericRow(current.toArray))
       parent.updateField(index, new GenericRow(buffer.toArray.asInstanceOf[Array[Any]]))
     }
@@ -325,7 +381,7 @@ private[parquet] class CatalystPrimitiveRowConverter(
 
   override def end(): Unit = {}
 
-  // Overriden here to avoid auto-boxing for primitive types
+  // Overridden here to avoid auto-boxing for primitive types
   override protected[parquet] def updateBoolean(fieldIndex: Int, value: Boolean): Unit =
     current.setBoolean(fieldIndex, value)
 
@@ -350,8 +406,21 @@ private[parquet] class CatalystPrimitiveRowConverter(
   override protected[parquet] def updateBinary(fieldIndex: Int, value: Binary): Unit =
     current.update(fieldIndex, value.getBytes)
 
-  override protected[parquet] def updateString(fieldIndex: Int, value: Binary): Unit =
-    current.setString(fieldIndex, value.toStringUsingUTF8)
+  override protected[parquet] def updateString(fieldIndex: Int, value: String): Unit =
+    current.setString(fieldIndex, value)
+
+  override protected[parquet] def updateTimestamp(fieldIndex: Int, value: Binary): Unit =
+    current.update(fieldIndex, readTimestamp(value))
+
+  override protected[parquet] def updateDecimal(
+      fieldIndex: Int, value: Binary, ctype: DecimalType): Unit = {
+    var decimal = current(fieldIndex).asInstanceOf[Decimal]
+    if (decimal == null) {
+      decimal = new Decimal
+      current(fieldIndex) = decimal
+    }
+    readDecimal(decimal, value, ctype)
+  }
 }
 
 /**
@@ -382,15 +451,109 @@ private[parquet] class CatalystPrimitiveConverter(
     parent.updateLong(fieldIndex, value)
 }
 
+/**
+ * A `parquet.io.api.PrimitiveConverter` that converts Parquet Binary to Catalyst String.
+ * Supports dictionaries to reduce Binary to String conversion overhead.
+ *
+ * Follows pattern in Parquet of using dictionaries, where supported, for String conversion.
+ *
+ * @param parent The parent group converter.
+ * @param fieldIndex The index inside the record.
+ */
+private[parquet] class CatalystPrimitiveStringConverter(parent: CatalystConverter, fieldIndex: Int)
+  extends CatalystPrimitiveConverter(parent, fieldIndex) {
+
+  private[this] var dict: Array[String] = null
+
+  override def hasDictionarySupport: Boolean = true
+
+  override def setDictionary(dictionary: Dictionary):Unit =
+    dict = Array.tabulate(dictionary.getMaxId + 1) {dictionary.decodeToBinary(_).toStringUsingUTF8}
+
+
+  override def addValueFromDictionary(dictionaryId: Int): Unit =
+    parent.updateString(fieldIndex, dict(dictionaryId))
+
+  override def addBinary(value: Binary): Unit =
+    parent.updateString(fieldIndex, value.toStringUsingUTF8)
+}
+
 private[parquet] object CatalystArrayConverter {
   val INITIAL_ARRAY_SIZE = 20
 }
 
+private[parquet] object CatalystTimestampConverter {
+  // TODO most part of this comes from Hive-0.14
+  // Hive code might have some issues, so we need to keep an eye on it.
+  // Also we use NanoTime and Int96Values from parquet-examples.
+  // We utilize jodd to convert between NanoTime and Timestamp
+  val parquetTsCalendar = new ThreadLocal[Calendar]
+  def getCalendar = {
+    // this is a cache for the calendar instance.
+    if (parquetTsCalendar.get == null) {
+      parquetTsCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("GMT")))
+    }
+    parquetTsCalendar.get
+  }
+  val NANOS_PER_SECOND: Long = 1000000000
+  val SECONDS_PER_MINUTE: Long = 60
+  val MINUTES_PER_HOUR: Long = 60
+  val NANOS_PER_MILLI: Long = 1000000
+
+  def convertToTimestamp(value: Binary): Timestamp = {
+    val nt = NanoTime.fromBinary(value)
+    val timeOfDayNanos = nt.getTimeOfDayNanos
+    val julianDay = nt.getJulianDay
+    val jDateTime = new JDateTime(julianDay.toDouble)
+    val calendar = getCalendar
+    calendar.set(Calendar.YEAR, jDateTime.getYear)
+    calendar.set(Calendar.MONTH, jDateTime.getMonth - 1)
+    calendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay)
+
+    // written in command style
+    var remainder = timeOfDayNanos
+    calendar.set(
+      Calendar.HOUR_OF_DAY,
+      (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR)).toInt)
+    remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR)
+    calendar.set(
+      Calendar.MINUTE, (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE)).toInt)
+    remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE)
+    calendar.set(Calendar.SECOND, (remainder / NANOS_PER_SECOND).toInt)
+    val nanos = remainder % NANOS_PER_SECOND
+    val ts = new Timestamp(calendar.getTimeInMillis)
+    ts.setNanos(nanos.toInt)
+    ts
+  }
+
+  def convertFromTimestamp(ts: Timestamp): Binary = {
+    val calendar = getCalendar
+    calendar.setTime(ts)
+    val jDateTime = new JDateTime(calendar.get(Calendar.YEAR),
+      calendar.get(Calendar.MONTH) + 1, calendar.get(Calendar.DAY_OF_MONTH))
+    // Hive-0.14 didn't set hour before get day number, while the day number should
+    // has something to do with hour, since julian day number grows at 12h GMT
+    // here we just follow what hive does.
+    val julianDay = jDateTime.getJulianDayNumber
+
+    val hour = calendar.get(Calendar.HOUR_OF_DAY)
+    val minute = calendar.get(Calendar.MINUTE)
+    val second = calendar.get(Calendar.SECOND)
+    val nanos = ts.getNanos
+    // Hive-0.14 would use hours directly, that might be wrong, since the day starts
+    // from 12h in Julian. here we just follow what hive does.
+    val nanosOfDay = nanos + second * NANOS_PER_SECOND +
+      minute * NANOS_PER_SECOND * SECONDS_PER_MINUTE +
+      hour * NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR
+    NanoTime(julianDay, nanosOfDay).toBinary
+  }
+}
+
 /**
  * A `parquet.io.api.GroupConverter` that converts a single-element groups that
  * match the characteristics of an array (see
  * [[org.apache.spark.sql.parquet.ParquetTypesConverter]]) into an
- * [[org.apache.spark.sql.catalyst.types.ArrayType]].
+ * [[org.apache.spark.sql.types.ArrayType]].
  *
  * @param elementType The type of the array elements (complex or primitive)
  * @param index The position of this (array) field inside its parent converter
@@ -454,7 +617,7 @@ private[parquet] class CatalystArrayConverter(
  * A `parquet.io.api.GroupConverter` that converts a single-element groups that
  * match the characteristics of an array (see
  * [[org.apache.spark.sql.parquet.ParquetTypesConverter]]) into an
- * [[org.apache.spark.sql.catalyst.types.ArrayType]].
+ * [[org.apache.spark.sql.types.ArrayType]].
  *
  * @param elementType The type of the array elements (native)
  * @param index The position of this (array) field inside its parent converter
@@ -490,7 +653,7 @@ private[parquet] class CatalystNativeArrayConverter(
   override protected[parquet] def updateField(fieldIndex: Int, value: Any): Unit =
     throw new UnsupportedOperationException
 
-  // Overriden here to avoid auto-boxing for primitive types
+  // Overridden here to avoid auto-boxing for primitive types
   override protected[parquet] def updateBoolean(fieldIndex: Int, value: Boolean): Unit = {
     checkGrowBuffer()
     buffer(elements) = value.asInstanceOf[NativeType]
@@ -539,9 +702,9 @@ private[parquet] class CatalystNativeArrayConverter(
     elements += 1
   }
 
-  override protected[parquet] def updateString(fieldIndex: Int, value: Binary): Unit = {
+  override protected[parquet] def updateString(fieldIndex: Int, value: String): Unit = {
     checkGrowBuffer()
-    buffer(elements) = value.toStringUsingUTF8.asInstanceOf[NativeType]
+    buffer(elements) = value.asInstanceOf[NativeType]
     elements += 1
   }
 
@@ -575,7 +738,7 @@ private[parquet] class CatalystNativeArrayConverter(
  * A `parquet.io.api.GroupConverter` that converts a single-element groups that
  * match the characteristics of an array contains null (see
  * [[org.apache.spark.sql.parquet.ParquetTypesConverter]]) into an
- * [[org.apache.spark.sql.catalyst.types.ArrayType]].
+ * [[org.apache.spark.sql.types.ArrayType]].
  *
  * @param elementType The type of the array elements (complex or primitive)
  * @param index The position of this (array) field inside its parent converter
@@ -681,7 +844,7 @@ private[parquet] class CatalystStructConverter(
  * A `parquet.io.api.GroupConverter` that converts two-element groups that
  * match the characteristics of a map (see
  * [[org.apache.spark.sql.parquet.ParquetTypesConverter]]) into an
- * [[org.apache.spark.sql.catalyst.types.MapType]].
+ * [[org.apache.spark.sql.types.MapType]].
  *
  * @param schema
  * @param index
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala
index 7c83f1cad7d71..0357dcc4688be 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala
@@ -19,216 +19,215 @@ package org.apache.spark.sql.parquet
 
 import java.nio.ByteBuffer
 
-import org.apache.hadoop.conf.Configuration
-
-import parquet.filter._
-import parquet.filter.ColumnPredicates._
-import parquet.column.ColumnReader
-
 import com.google.common.io.BaseEncoding
+import org.apache.hadoop.conf.Configuration
+import parquet.filter2.compat.FilterCompat
+import parquet.filter2.compat.FilterCompat._
+import parquet.filter2.predicate.FilterApi._
+import parquet.filter2.predicate.{FilterApi, FilterPredicate}
+import parquet.io.api.Binary
 
 import org.apache.spark.SparkEnv
-import org.apache.spark.sql.catalyst.types._
-import org.apache.spark.sql.catalyst.expressions.{Predicate => CatalystPredicate}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.execution.SparkSqlSerializer
+import org.apache.spark.sql.types._
 
 private[sql] object ParquetFilters {
   val PARQUET_FILTER_DATA = "org.apache.spark.sql.parquet.row.filter"
-  // set this to false if pushdown should be disabled
-  val PARQUET_FILTER_PUSHDOWN_ENABLED = "spark.sql.hints.parquetFilterPushdown"
 
-  def createRecordFilter(filterExpressions: Seq[Expression]): UnboundRecordFilter = {
-    val filters: Seq[CatalystFilter] = filterExpressions.collect {
-      case (expression: Expression) if createFilter(expression).isDefined =>
-        createFilter(expression).get
-    }
-    if (filters.length > 0) filters.reduce(AndRecordFilter.and) else null
+  def createRecordFilter(filterExpressions: Seq[Expression]): Option[Filter] = {
+    filterExpressions.flatMap(createFilter).reduceOption(FilterApi.and).map(FilterCompat.get)
   }
 
-  def createFilter(expression: Expression): Option[CatalystFilter] = {
-    def createEqualityFilter(
-        name: String,
-        literal: Literal,
-        predicate: CatalystPredicate) = literal.dataType match {
+  def createFilter(predicate: Expression): Option[FilterPredicate] = {
+    val makeEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
       case BooleanType =>
-        ComparisonFilter.createBooleanFilter(name, literal.value.asInstanceOf[Boolean], predicate)
+        (n: String, v: Any) => FilterApi.eq(booleanColumn(n), v.asInstanceOf[java.lang.Boolean])
       case IntegerType =>
-        ComparisonFilter.createIntFilter(
-          name,
-          (x: Int) => x == literal.value.asInstanceOf[Int],
-          predicate)
+        (n: String, v: Any) => FilterApi.eq(intColumn(n), v.asInstanceOf[Integer])
       case LongType =>
-        ComparisonFilter.createLongFilter(
-          name,
-          (x: Long) => x == literal.value.asInstanceOf[Long],
-          predicate)
-      case DoubleType =>
-        ComparisonFilter.createDoubleFilter(
-          name,
-          (x: Double) => x == literal.value.asInstanceOf[Double],
-          predicate)
+        (n: String, v: Any) => FilterApi.eq(longColumn(n), v.asInstanceOf[java.lang.Long])
       case FloatType =>
-        ComparisonFilter.createFloatFilter(
-          name,
-          (x: Float) => x == literal.value.asInstanceOf[Float],
-          predicate)
+        (n: String, v: Any) => FilterApi.eq(floatColumn(n), v.asInstanceOf[java.lang.Float])
+      case DoubleType =>
+        (n: String, v: Any) => FilterApi.eq(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+
+      // Binary.fromString and Binary.fromByteArray don't accept null values
       case StringType =>
-        ComparisonFilter.createStringFilter(name, literal.value.asInstanceOf[String], predicate)
+        (n: String, v: Any) => FilterApi.eq(
+          binaryColumn(n),
+          Option(v).map(s => Binary.fromString(s.asInstanceOf[String])).orNull)
+      case BinaryType =>
+        (n: String, v: Any) => FilterApi.eq(
+          binaryColumn(n),
+          Option(v).map(b => Binary.fromByteArray(v.asInstanceOf[Array[Byte]])).orNull)
     }
-    def createLessThanFilter(
-        name: String,
-        literal: Literal,
-        predicate: CatalystPredicate) = literal.dataType match {
+
+    val makeNotEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
+      case BooleanType =>
+        (n: String, v: Any) => FilterApi.notEq(booleanColumn(n), v.asInstanceOf[java.lang.Boolean])
       case IntegerType =>
-        ComparisonFilter.createIntFilter(
-          name,
-          (x: Int) => x < literal.value.asInstanceOf[Int],
-          predicate)
+        (n: String, v: Any) => FilterApi.notEq(intColumn(n), v.asInstanceOf[Integer])
       case LongType =>
-        ComparisonFilter.createLongFilter(
-          name,
-          (x: Long) => x < literal.value.asInstanceOf[Long],
-          predicate)
-      case DoubleType =>
-        ComparisonFilter.createDoubleFilter(
-          name,
-          (x: Double) => x < literal.value.asInstanceOf[Double],
-          predicate)
+        (n: String, v: Any) => FilterApi.notEq(longColumn(n), v.asInstanceOf[java.lang.Long])
       case FloatType =>
-        ComparisonFilter.createFloatFilter(
-          name,
-          (x: Float) => x < literal.value.asInstanceOf[Float],
-          predicate)
+        (n: String, v: Any) => FilterApi.notEq(floatColumn(n), v.asInstanceOf[java.lang.Float])
+      case DoubleType =>
+        (n: String, v: Any) => FilterApi.notEq(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+      case StringType =>
+        (n: String, v: Any) => FilterApi.notEq(
+          binaryColumn(n),
+          Option(v).map(s => Binary.fromString(s.asInstanceOf[String])).orNull)
+      case BinaryType =>
+        (n: String, v: Any) => FilterApi.notEq(
+          binaryColumn(n),
+          Option(v).map(b => Binary.fromByteArray(v.asInstanceOf[Array[Byte]])).orNull)
     }
-    def createLessThanOrEqualFilter(
-        name: String,
-        literal: Literal,
-        predicate: CatalystPredicate) = literal.dataType match {
+
+    val makeLt: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
       case IntegerType =>
-        ComparisonFilter.createIntFilter(
-          name,
-          (x: Int) => x <= literal.value.asInstanceOf[Int],
-          predicate)
+        (n: String, v: Any) => FilterApi.lt(intColumn(n), v.asInstanceOf[Integer])
       case LongType =>
-        ComparisonFilter.createLongFilter(
-          name,
-          (x: Long) => x <= literal.value.asInstanceOf[Long],
-          predicate)
-      case DoubleType =>
-        ComparisonFilter.createDoubleFilter(
-          name,
-          (x: Double) => x <= literal.value.asInstanceOf[Double],
-          predicate)
+        (n: String, v: Any) => FilterApi.lt(longColumn(n), v.asInstanceOf[java.lang.Long])
       case FloatType =>
-        ComparisonFilter.createFloatFilter(
-          name,
-          (x: Float) => x <= literal.value.asInstanceOf[Float],
-          predicate)
+        (n: String, v: Any) => FilterApi.lt(floatColumn(n), v.asInstanceOf[java.lang.Float])
+      case DoubleType =>
+        (n: String, v: Any) => FilterApi.lt(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+      case StringType =>
+        (n: String, v: Any) =>
+          FilterApi.lt(binaryColumn(n), Binary.fromString(v.asInstanceOf[String]))
+      case BinaryType =>
+        (n: String, v: Any) =>
+          FilterApi.lt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]]))
     }
-    // TODO: combine these two types somehow?
-    def createGreaterThanFilter(
-        name: String,
-        literal: Literal,
-        predicate: CatalystPredicate) = literal.dataType match {
+
+    val makeLtEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
       case IntegerType =>
-        ComparisonFilter.createIntFilter(
-          name,
-          (x: Int) => x > literal.value.asInstanceOf[Int],
-          predicate)
+        (n: String, v: Any) => FilterApi.ltEq(intColumn(n), v.asInstanceOf[java.lang.Integer])
       case LongType =>
-        ComparisonFilter.createLongFilter(
-          name,
-          (x: Long) => x > literal.value.asInstanceOf[Long],
-          predicate)
-      case DoubleType =>
-        ComparisonFilter.createDoubleFilter(
-          name,
-          (x: Double) => x > literal.value.asInstanceOf[Double],
-          predicate)
+        (n: String, v: Any) => FilterApi.ltEq(longColumn(n), v.asInstanceOf[java.lang.Long])
       case FloatType =>
-        ComparisonFilter.createFloatFilter(
-          name,
-          (x: Float) => x > literal.value.asInstanceOf[Float],
-          predicate)
+        (n: String, v: Any) => FilterApi.ltEq(floatColumn(n), v.asInstanceOf[java.lang.Float])
+      case DoubleType =>
+        (n: String, v: Any) => FilterApi.ltEq(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+      case StringType =>
+        (n: String, v: Any) =>
+          FilterApi.ltEq(binaryColumn(n), Binary.fromString(v.asInstanceOf[String]))
+      case BinaryType =>
+        (n: String, v: Any) =>
+          FilterApi.ltEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]]))
     }
-    def createGreaterThanOrEqualFilter(
-        name: String,
-        literal: Literal,
-        predicate: CatalystPredicate) = literal.dataType match {
+
+    val makeGt: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
       case IntegerType =>
-        ComparisonFilter.createIntFilter(
-          name, (x: Int) => x >= literal.value.asInstanceOf[Int],
-          predicate)
+        (n: String, v: Any) => FilterApi.gt(intColumn(n), v.asInstanceOf[java.lang.Integer])
       case LongType =>
-        ComparisonFilter.createLongFilter(
-          name,
-          (x: Long) => x >= literal.value.asInstanceOf[Long],
-          predicate)
+        (n: String, v: Any) => FilterApi.gt(longColumn(n), v.asInstanceOf[java.lang.Long])
+      case FloatType =>
+        (n: String, v: Any) => FilterApi.gt(floatColumn(n), v.asInstanceOf[java.lang.Float])
       case DoubleType =>
-        ComparisonFilter.createDoubleFilter(
-          name,
-          (x: Double) => x >= literal.value.asInstanceOf[Double],
-          predicate)
+        (n: String, v: Any) => FilterApi.gt(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+      case StringType =>
+        (n: String, v: Any) =>
+          FilterApi.gt(binaryColumn(n), Binary.fromString(v.asInstanceOf[String]))
+      case BinaryType =>
+        (n: String, v: Any) =>
+          FilterApi.gt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]]))
+    }
+
+    val makeGtEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
+      case IntegerType =>
+        (n: String, v: Any) => FilterApi.gtEq(intColumn(n), v.asInstanceOf[java.lang.Integer])
+      case LongType =>
+        (n: String, v: Any) => FilterApi.gtEq(longColumn(n), v.asInstanceOf[java.lang.Long])
       case FloatType =>
-        ComparisonFilter.createFloatFilter(
-          name,
-          (x: Float) => x >= literal.value.asInstanceOf[Float],
-          predicate)
+        (n: String, v: Any) => FilterApi.gtEq(floatColumn(n), v.asInstanceOf[java.lang.Float])
+      case DoubleType =>
+        (n: String, v: Any) => FilterApi.gtEq(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+      case StringType =>
+        (n: String, v: Any) =>
+          FilterApi.gtEq(binaryColumn(n), Binary.fromString(v.asInstanceOf[String]))
+      case BinaryType =>
+        (n: String, v: Any) =>
+          FilterApi.gtEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]]))
     }
 
-    /**
-     * TODO: we currently only filter on non-nullable (Parquet REQUIRED) attributes until
-     * https://github.com/Parquet/parquet-mr/issues/371
-     * has been resolved.
-     */
-    expression match {
-      case p @ Or(left: Expression, right: Expression)
-          if createFilter(left).isDefined && createFilter(right).isDefined => {
-        // If either side of this Or-predicate is empty then this means
-        // it contains a more complex comparison than between attribute and literal
-        // (e.g., it contained a CAST). The only safe thing to do is then to disregard
-        // this disjunction, which could be contained in a conjunction. If it stands
-        // alone then it is also safe to drop it, since a Null return value of this
-        // function is interpreted as having no filters at all.
-        val leftFilter = createFilter(left).get
-        val rightFilter = createFilter(right).get
-        Some(new OrFilter(leftFilter, rightFilter))
-      }
-      case p @ And(left: Expression, right: Expression) => {
-        // This treats nested conjunctions; since either side of the conjunction
-        // may contain more complex filter expressions we may actually generate
-        // strictly weaker filter predicates in the process.
-        val leftFilter = createFilter(left)
-        val rightFilter = createFilter(right)
-        (leftFilter, rightFilter) match {
-          case (None, Some(filter)) => Some(filter)
-          case (Some(filter), None) => Some(filter)
-          case (Some(leftF), Some(rightF)) =>
-            Some(new AndFilter(leftF, rightF))
-          case _ => None
-        }
-      }
-      case p @ EqualTo(left: Literal, right: NamedExpression) if !right.nullable =>
-        Some(createEqualityFilter(right.name, left, p))
-      case p @ EqualTo(left: NamedExpression, right: Literal) if !left.nullable =>
-        Some(createEqualityFilter(left.name, right, p))
-      case p @ LessThan(left: Literal, right: NamedExpression) if !right.nullable =>
-        Some(createLessThanFilter(right.name, left, p))
-      case p @ LessThan(left: NamedExpression, right: Literal) if !left.nullable =>
-        Some(createLessThanFilter(left.name, right, p))
-      case p @ LessThanOrEqual(left: Literal, right: NamedExpression) if !right.nullable =>
-        Some(createLessThanOrEqualFilter(right.name, left, p))
-      case p @ LessThanOrEqual(left: NamedExpression, right: Literal) if !left.nullable =>
-        Some(createLessThanOrEqualFilter(left.name, right, p))
-      case p @ GreaterThan(left: Literal, right: NamedExpression) if !right.nullable =>
-        Some(createGreaterThanFilter(right.name, left, p))
-      case p @ GreaterThan(left: NamedExpression, right: Literal) if !left.nullable =>
-        Some(createGreaterThanFilter(left.name, right, p))
-      case p @ GreaterThanOrEqual(left: Literal, right: NamedExpression) if !right.nullable =>
-        Some(createGreaterThanOrEqualFilter(right.name, left, p))
-      case p @ GreaterThanOrEqual(left: NamedExpression, right: Literal) if !left.nullable =>
-        Some(createGreaterThanOrEqualFilter(left.name, right, p))
+    // NOTE:
+    //
+    // For any comparison operator `cmp`, both `a cmp NULL` and `NULL cmp a` evaluate to `NULL`,
+    // which can be casted to `false` implicitly. Please refer to the `eval` method of these
+    // operators and the `SimplifyFilters` rule for details.
+    predicate match {
+      case IsNull(NamedExpression(name, dataType)) =>
+        makeEq.lift(dataType).map(_(name, null))
+      case IsNotNull(NamedExpression(name, dataType)) =>
+        makeNotEq.lift(dataType).map(_(name, null))
+
+      case EqualTo(NamedExpression(name, _), NonNullLiteral(value, dataType)) =>
+        makeEq.lift(dataType).map(_(name, value))
+      case EqualTo(Cast(NamedExpression(name, _), dataType), NonNullLiteral(value, _)) =>
+        makeEq.lift(dataType).map(_(name, value))
+      case EqualTo(NonNullLiteral(value, dataType), NamedExpression(name, _)) =>
+        makeEq.lift(dataType).map(_(name, value))
+      case EqualTo(NonNullLiteral(value, _), Cast(NamedExpression(name, _), dataType)) =>
+        makeEq.lift(dataType).map(_(name, value))
+      
+      case Not(EqualTo(NamedExpression(name, _), NonNullLiteral(value, dataType))) =>
+        makeNotEq.lift(dataType).map(_(name, value))
+      case Not(EqualTo(Cast(NamedExpression(name, _), dataType), NonNullLiteral(value, _))) =>
+        makeNotEq.lift(dataType).map(_(name, value))
+      case Not(EqualTo(NonNullLiteral(value, dataType), NamedExpression(name, _))) =>
+        makeNotEq.lift(dataType).map(_(name, value))
+      case Not(EqualTo(NonNullLiteral(value, _), Cast(NamedExpression(name, _), dataType))) =>
+        makeNotEq.lift(dataType).map(_(name, value))
+
+      case LessThan(NamedExpression(name, _), NonNullLiteral(value, dataType)) =>
+        makeLt.lift(dataType).map(_(name, value))
+      case LessThan(Cast(NamedExpression(name, _), dataType), NonNullLiteral(value, _)) =>
+        makeLt.lift(dataType).map(_(name, value))
+      case LessThan(NonNullLiteral(value, dataType), NamedExpression(name, _)) =>
+        makeGt.lift(dataType).map(_(name, value))
+      case LessThan(NonNullLiteral(value, _), Cast(NamedExpression(name, _), dataType)) =>
+        makeGt.lift(dataType).map(_(name, value))
+
+      case LessThanOrEqual(NamedExpression(name, _), NonNullLiteral(value, dataType)) =>
+        makeLtEq.lift(dataType).map(_(name, value))
+      case LessThanOrEqual(Cast(NamedExpression(name, _), dataType), NonNullLiteral(value, _)) =>
+        makeLtEq.lift(dataType).map(_(name, value))      
+      case LessThanOrEqual(NonNullLiteral(value, dataType), NamedExpression(name, _)) =>
+        makeGtEq.lift(dataType).map(_(name, value))
+      case LessThanOrEqual(NonNullLiteral(value, _), Cast(NamedExpression(name, _), dataType)) =>
+        makeGtEq.lift(dataType).map(_(name, value))
+
+      case GreaterThan(NamedExpression(name, _), NonNullLiteral(value, dataType)) =>
+        makeGt.lift(dataType).map(_(name, value))
+      case GreaterThan(Cast(NamedExpression(name, _), dataType), NonNullLiteral(value, _)) =>
+        makeGt.lift(dataType).map(_(name, value)) 
+      case GreaterThan(NonNullLiteral(value, dataType), NamedExpression(name, _)) =>
+        makeLt.lift(dataType).map(_(name, value))
+      case GreaterThan(NonNullLiteral(value, _), Cast(NamedExpression(name, _), dataType)) =>
+        makeLt.lift(dataType).map(_(name, value))
+
+      case GreaterThanOrEqual(NamedExpression(name, _), NonNullLiteral(value, dataType)) =>
+        makeGtEq.lift(dataType).map(_(name, value))
+      case GreaterThanOrEqual(Cast(NamedExpression(name, _), dataType), NonNullLiteral(value, _)) =>
+        makeGtEq.lift(dataType).map(_(name, value)) 
+      case GreaterThanOrEqual(NonNullLiteral(value, dataType), NamedExpression(name, _)) =>
+        makeLtEq.lift(dataType).map(_(name, value))
+      case GreaterThanOrEqual(NonNullLiteral(value, _), Cast(NamedExpression(name, _), dataType)) =>
+        makeLtEq.lift(dataType).map(_(name, value))
+
+      case And(lhs, rhs) =>
+        (createFilter(lhs) ++ createFilter(rhs)).reduceOption(FilterApi.and)
+
+      case Or(lhs, rhs) =>
+        for {
+          lhsFilter <- createFilter(lhs)
+          rhsFilter <- createFilter(rhs)
+        } yield FilterApi.or(lhsFilter, rhsFilter)
+
+      case Not(pred) =>
+        createFilter(pred).map(FilterApi.not)
+
       case _ => None
     }
   }
@@ -239,7 +238,7 @@ private[sql] object ParquetFilters {
    * the actual filter predicate.
    */
   def serializeFilterExpressions(filters: Seq[Expression], conf: Configuration): Unit = {
-    if (filters.length > 0) {
+    if (filters.nonEmpty) {
       val serialized: Array[Byte] =
         SparkEnv.get.closureSerializer.newInstance().serialize(filters).array()
       val encoded: String = BaseEncoding.base64().encode(serialized)
@@ -261,181 +260,4 @@ private[sql] object ParquetFilters {
       Seq()
     }
   }
-
-  /**
-   * Try to find the given expression in the tree of filters in order to
-   * determine whether it is safe to remove it from the higher level filters. Note
-   * that strictly speaking we could stop the search whenever an expression is found
-   * that contains this expression as subexpression (e.g., when searching for "a"
-   * and "(a or c)" is found) but we don't care about optimizations here since the
-   * filter tree is assumed to be small.
-   *
-   * @param filter The [[org.apache.spark.sql.parquet.CatalystFilter]] to expand
-   *               and search
-   * @param expression The expression to look for
-   * @return An optional [[org.apache.spark.sql.parquet.CatalystFilter]] that
-   *         contains the expression.
-   */
-  def findExpression(
-      filter: CatalystFilter,
-      expression: Expression): Option[CatalystFilter] = filter match {
-    case f @ OrFilter(_, leftFilter, rightFilter, _) =>
-      if (f.predicate == expression) {
-        Some(f)
-      } else {
-        val left = findExpression(leftFilter, expression)
-        if (left.isDefined) left else findExpression(rightFilter, expression)
-      }
-    case f @ AndFilter(_, leftFilter, rightFilter, _) =>
-      if (f.predicate == expression) {
-        Some(f)
-      } else {
-        val left = findExpression(leftFilter, expression)
-        if (left.isDefined) left else findExpression(rightFilter, expression)
-      }
-    case f @ ComparisonFilter(_, _, predicate) =>
-      if (predicate == expression) Some(f) else None
-    case _ => None
-  }
-}
-
-abstract private[parquet] class CatalystFilter(
-    @transient val predicate: CatalystPredicate) extends UnboundRecordFilter
-
-private[parquet] case class ComparisonFilter(
-    val columnName: String,
-    private var filter: UnboundRecordFilter,
-    @transient override val predicate: CatalystPredicate)
-  extends CatalystFilter(predicate) {
-  override def bind(readers: java.lang.Iterable[ColumnReader]): RecordFilter = {
-    filter.bind(readers)
-  }
-}
-
-private[parquet] case class OrFilter(
-    private var filter: UnboundRecordFilter,
-    @transient val left: CatalystFilter,
-    @transient val right: CatalystFilter,
-    @transient override val predicate: Or)
-  extends CatalystFilter(predicate) {
-  def this(l: CatalystFilter, r: CatalystFilter) =
-    this(
-      OrRecordFilter.or(l, r),
-      l,
-      r,
-      Or(l.predicate, r.predicate))
-
-  override def bind(readers: java.lang.Iterable[ColumnReader]): RecordFilter = {
-    filter.bind(readers)
-  }
-}
-
-private[parquet] case class AndFilter(
-    private var filter: UnboundRecordFilter,
-    @transient val left: CatalystFilter,
-    @transient val right: CatalystFilter,
-    @transient override val predicate: And)
-  extends CatalystFilter(predicate) {
-  def this(l: CatalystFilter, r: CatalystFilter) =
-    this(
-      AndRecordFilter.and(l, r),
-      l,
-      r,
-      And(l.predicate, r.predicate))
-
-  override def bind(readers: java.lang.Iterable[ColumnReader]): RecordFilter = {
-    filter.bind(readers)
-  }
-}
-
-private[parquet] object ComparisonFilter {
-  def createBooleanFilter(
-      columnName: String,
-      value: Boolean,
-      predicate: CatalystPredicate): CatalystFilter =
-    new ComparisonFilter(
-      columnName,
-      ColumnRecordFilter.column(
-        columnName,
-        ColumnPredicates.applyFunctionToBoolean(
-          new BooleanPredicateFunction {
-            def functionToApply(input: Boolean): Boolean = input == value
-          }
-      )),
-      predicate)
-
-  def createStringFilter(
-      columnName: String,
-      value: String,
-      predicate: CatalystPredicate): CatalystFilter =
-    new ComparisonFilter(
-      columnName,
-      ColumnRecordFilter.column(
-        columnName,
-        ColumnPredicates.applyFunctionToString (
-          new ColumnPredicates.PredicateFunction[String]  {
-            def functionToApply(input: String): Boolean = input == value
-          }
-      )),
-      predicate)
-
-  def createIntFilter(
-      columnName: String,
-      func: Int => Boolean,
-      predicate: CatalystPredicate): CatalystFilter =
-    new ComparisonFilter(
-      columnName,
-      ColumnRecordFilter.column(
-        columnName,
-        ColumnPredicates.applyFunctionToInteger(
-          new IntegerPredicateFunction {
-            def functionToApply(input: Int) = func(input)
-          }
-      )),
-      predicate)
-
-  def createLongFilter(
-      columnName: String,
-      func: Long => Boolean,
-      predicate: CatalystPredicate): CatalystFilter =
-    new ComparisonFilter(
-      columnName,
-      ColumnRecordFilter.column(
-        columnName,
-        ColumnPredicates.applyFunctionToLong(
-          new LongPredicateFunction {
-            def functionToApply(input: Long) = func(input)
-          }
-      )),
-      predicate)
-
-  def createDoubleFilter(
-      columnName: String,
-      func: Double => Boolean,
-      predicate: CatalystPredicate): CatalystFilter =
-    new ComparisonFilter(
-      columnName,
-      ColumnRecordFilter.column(
-        columnName,
-        ColumnPredicates.applyFunctionToDouble(
-          new DoublePredicateFunction {
-            def functionToApply(input: Double) = func(input)
-          }
-      )),
-      predicate)
-
-  def createFloatFilter(
-      columnName: String,
-      func: Float => Boolean,
-      predicate: CatalystPredicate): CatalystFilter =
-    new ComparisonFilter(
-      columnName,
-      ColumnRecordFilter.column(
-        columnName,
-        ColumnPredicates.applyFunctionToFloat(
-          new FloatPredicateFunction {
-            def functionToApply(input: Float) = func(input)
-          }
-      )),
-      predicate)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala
index 5ae768293a22e..b0db9943a506c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala
@@ -22,21 +22,20 @@ import java.io.IOException
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.fs.permission.FsAction
-
 import parquet.hadoop.ParquetOutputFormat
 import parquet.hadoop.metadata.CompressionCodecName
 import parquet.schema.MessageType
 
-import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.{DataFrame, SQLContext}
 import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, UnresolvedException}
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, LeafNode}
+import org.apache.spark.sql.catalyst.expressions.{AttributeMap, Attribute}
+import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
 
 /**
  * Relation that consists of data stored in a Parquet columnar format.
  *
- * Users should interact with parquet files though a SchemaRDD, created by a [[SQLContext]] instead
- * of using this class directly.
+ * Users should interact with parquet files though a [[DataFrame]], created by a [[SQLContext]]
+ * instead of using this class directly.
  *
  * {{{
  *   val parquetRDD = sqlContext.parquetFile("path/to/parquet.file")
@@ -66,7 +65,9 @@ private[sql] case class ParquetRelation(
     ParquetTypesConverter.readSchemaFromFile(
       new Path(path.split(",").head),
       conf,
-      sqlContext.isParquetBinaryAsString)
+      sqlContext.conf.isParquetBinaryAsString,
+      sqlContext.conf.isParquetINT96AsTimestamp)
+  lazy val attributeMap = AttributeMap(output.map(o => o -> o))
 
   override def newInstance() = ParquetRelation(path, conf, sqlContext).asInstanceOf[this.type]
 
@@ -79,12 +80,27 @@ private[sql] case class ParquetRelation(
   }
 
   // TODO: Use data from the footers.
-  override lazy val statistics = Statistics(sizeInBytes = sqlContext.defaultSizeInBytes)
+  override lazy val statistics = Statistics(sizeInBytes = sqlContext.conf.defaultSizeInBytes)
 }
 
 private[sql] object ParquetRelation {
 
   def enableLogForwarding() {
+    // Note: the parquet.Log class has a static initializer that
+    // sets the java.util.logging Logger for "parquet". This
+    // checks first to see if there's any handlers already set
+    // and if not it creates them. If this method executes prior
+    // to that class being loaded then:
+    //  1) there's no handlers installed so there's none to 
+    // remove. But when it IS finally loaded the desired affect
+    // of removing them is circumvented.
+    //  2) The parquet.Log static initializer calls setUseParentHanders(false)
+    // undoing the attempt to override the logging here.
+    //
+    // Therefore we need to force the class to be loaded.
+    // This should really be resolved by Parquet.
+    Class.forName(classOf[parquet.Log].getName())
+
     // Note: Logger.getLogger("parquet") has a default logger
     // that appends to Console which needs to be cleared.
     val parquetLogger = java.util.logging.Logger.getLogger("parquet")
@@ -147,7 +163,8 @@ private[sql] object ParquetRelation {
                   sqlContext: SQLContext): ParquetRelation = {
     val path = checkPath(pathString, allowExisting, conf)
     conf.set(ParquetOutputFormat.COMPRESSION, shortParquetCompressionCodecNames.getOrElse(
-      sqlContext.parquetCompressionCodec.toUpperCase, CompressionCodecName.UNCOMPRESSED).name())
+      sqlContext.conf.parquetCompressionCodec.toUpperCase, CompressionCodecName.UNCOMPRESSED)
+      .name())
     ParquetRelation.enableLogForwarding()
     ParquetTypesConverter.writeMetaData(attributes, path, conf)
     new ParquetRelation(path.toString, Some(conf), sqlContext) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
index 5c6fa78ae3895..28cd17fde46ab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.parquet
 import java.io.IOException
 import java.lang.{Long => JLong}
 import java.text.SimpleDateFormat
+import java.text.NumberFormat
 import java.util.concurrent.{Callable, TimeUnit}
 import java.util.{ArrayList, Collections, Date, List => JList}
 
@@ -32,24 +33,25 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{BlockLocation, FileStatus, Path}
 import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat => NewFileInputFormat}
-import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat => NewFileOutputFormat}
-import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
-
+import org.apache.hadoop.mapreduce.lib.output.{FileOutputCommitter, FileOutputFormat => NewFileOutputFormat}
 import parquet.hadoop._
+import parquet.hadoop.api.ReadSupport.ReadContext
 import parquet.hadoop.api.{InitContext, ReadSupport}
 import parquet.hadoop.metadata.GlobalMetaData
 import parquet.hadoop.util.ContextUtil
 import parquet.io.ParquetDecodingException
 import parquet.schema.MessageType
 
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.SQLConf
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, Row}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, Row, _}
 import org.apache.spark.sql.execution.{LeafNode, SparkPlan, UnaryNode}
 import org.apache.spark.{Logging, SerializableWritable, TaskContext}
 
 /**
+ * :: DeveloperApi ::
  * Parquet table scan operator. Imports the file that backs the given
  * [[org.apache.spark.sql.parquet.ParquetRelation]] as a ``RDD[Row]``.
  */
@@ -62,20 +64,21 @@ case class ParquetTableScan(
   // The resolution of Parquet attributes is case sensitive, so we resolve the original attributes
   // by exprId. note: output cannot be transient, see
   // https://issues.apache.org/jira/browse/SPARK-1367
-  val normalOutput =
-    attributes
-      .filterNot(a => relation.partitioningAttributes.map(_.exprId).contains(a.exprId))
-      .flatMap(a => relation.output.find(o => o.exprId == a.exprId))
-
-  val partOutput =
-    attributes.flatMap(a => relation.partitioningAttributes.find(o => o.exprId == a.exprId))
+  val output = attributes.map(relation.attributeMap)
 
-  def output = partOutput ++ normalOutput
+  // A mapping of ordinals partitionRow -> finalOutput.
+  val requestedPartitionOrdinals = {
+    val partitionAttributeOrdinals = AttributeMap(relation.partitioningAttributes.zipWithIndex)
 
-  assert(normalOutput.size + partOutput.size == attributes.size,
-    s"$normalOutput + $partOutput != $attributes, ${relation.output}")
+    attributes.zipWithIndex.flatMap {
+      case (attribute, finalOrdinal) =>
+        partitionAttributeOrdinals.get(attribute).map(_ -> finalOrdinal)
+    }
+  }.toArray
 
   override def execute(): RDD[Row] = {
+    import parquet.filter2.compat.FilterCompat.FilterPredicateCompat
+
     val sc = sqlContext.sparkContext
     val job = new Job(sc.hadoopConfiguration)
     ParquetInputFormat.setReadSupportClass(job, classOf[RowReadSupport])
@@ -93,7 +96,7 @@ case class ParquetTableScan(
     // Store both requested and original schema in `Configuration`
     conf.set(
       RowReadSupport.SPARK_ROW_REQUESTED_SCHEMA,
-      ParquetTypesConverter.convertToString(normalOutput))
+      ParquetTypesConverter.convertToString(output))
     conf.set(
       RowWriteSupport.SPARK_ROW_SCHEMA,
       ParquetTypesConverter.convertToString(relation.output))
@@ -102,17 +105,16 @@ case class ParquetTableScan(
     // Note 1: the input format ignores all predicates that cannot be expressed
     // as simple column predicate filters in Parquet. Here we just record
     // the whole pruning predicate.
-    // Note 2: you can disable filter predicate pushdown by setting
-    // "spark.sql.hints.parquetFilterPushdown" to false inside SparkConf.
-    if (columnPruningPred.length > 0 &&
-      sc.conf.getBoolean(ParquetFilters.PARQUET_FILTER_PUSHDOWN_ENABLED, true)) {
-      ParquetFilters.serializeFilterExpressions(columnPruningPred, conf)
-    }
+    ParquetFilters
+      .createRecordFilter(columnPruningPred)
+      .map(_.asInstanceOf[FilterPredicateCompat].getFilterPredicate)
+      // Set this in configuration of ParquetInputFormat, needed for RowGroupFiltering
+      .foreach(ParquetInputFormat.setFilterPredicate(conf, _))
 
     // Tell FilteringParquetRowInputFormat whether it's okay to cache Parquet and FS metadata
     conf.set(
       SQLConf.PARQUET_CACHE_METADATA,
-      sqlContext.getConf(SQLConf.PARQUET_CACHE_METADATA, "false"))
+      sqlContext.getConf(SQLConf.PARQUET_CACHE_METADATA, "true"))
 
     val baseRDD =
       new org.apache.spark.rdd.NewHadoopRDD(
@@ -122,7 +124,7 @@ case class ParquetTableScan(
         classOf[Row],
         conf)
 
-    if (partOutput.nonEmpty) {
+    if (requestedPartitionOrdinals.nonEmpty) {
       baseRDD.mapPartitionsWithInputSplit { case (split, iter) =>
         val partValue = "([^=]+)=([^=]+)".r
         val partValues =
@@ -135,20 +137,30 @@ case class ParquetTableScan(
               case _ => None
             }.toMap
 
+        // Convert the partitioning attributes into the correct types
         val partitionRowValues =
-          partOutput.map(a => Cast(Literal(partValues(a.name)), a.dataType).eval(EmptyRow))
+          relation.partitioningAttributes
+            .map(a => Cast(Literal(partValues(a.name)), a.dataType).eval(EmptyRow))
 
         new Iterator[Row] {
-          private[this] val joinedRow = new JoinedRow5(Row(partitionRowValues:_*), null)
-
           def hasNext = iter.hasNext
-
-          def next() = joinedRow.withRight(iter.next()._2)
+          def next() = {
+            val row = iter.next()._2.asInstanceOf[SpecificMutableRow]
+
+            // Parquet will leave partitioning columns empty, so we fill them in here.
+            var i = 0
+            while (i < requestedPartitionOrdinals.size) {
+              row(requestedPartitionOrdinals(i)._2) =
+                partitionRowValues(requestedPartitionOrdinals(i)._1)
+              i += 1
+            }
+            row
+          }
         }
       }
     } else {
       baseRDD.map(_._2)
-    }.filter(_ != null) // Parquet's record filters may produce null values
+    }
   }
 
   /**
@@ -181,6 +193,7 @@ case class ParquetTableScan(
 }
 
 /**
+ * :: DeveloperApi ::
  * Operator that acts as a sink for queries on RDDs and can be used to
  * store the output inside a directory of Parquet files. This operator
  * is similar to Hive's INSERT INTO TABLE operation in the sense that
@@ -196,6 +209,7 @@ case class ParquetTableScan(
  * cause unpredicted behaviour and therefore results in a RuntimeException
  * (only detected via filename pattern so will not catch all cases).
  */
+@DeveloperApi
 case class InsertIntoParquetTable(
     relation: ParquetRelation,
     child: SparkPlan,
@@ -287,24 +301,24 @@ case class InsertIntoParquetTable(
       }
 
     def writeShard(context: TaskContext, iter: Iterator[Row]): Int = {
-      // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
-      // around by taking a mod. We expect that no task will be attempted 2 billion times.
-      val attemptNumber = (context.attemptId % Int.MaxValue).toInt
       /* "reduce task" <split #> <attempt # = spark task #> */
       val attemptId = newTaskAttemptID(jobtrackerID, stageId, isMap = false, context.partitionId,
-        attemptNumber)
+        context.attemptNumber)
       val hadoopContext = newTaskAttemptContext(wrappedConf.value, attemptId)
       val format = new AppendingParquetOutputFormat(taskIdOffset)
       val committer = format.getOutputCommitter(hadoopContext)
       committer.setupTask(hadoopContext)
       val writer = format.getRecordWriter(hadoopContext)
-      while (iter.hasNext) {
-        val row = iter.next()
-        writer.write(null, row)
+      try {
+        while (iter.hasNext) {
+          val row = iter.next()
+          writer.write(null, row)
+        }
+      } finally {
+        writer.close(hadoopContext)
       }
-      writer.close(hadoopContext)
       committer.commitTask(hadoopContext)
-      return 1
+      1
     }
     val jobFormat = new AppendingParquetOutputFormat(taskIdOffset)
     /* apparently we need a TaskAttemptID to construct an OutputCommitter;
@@ -331,9 +345,13 @@ private[parquet] class AppendingParquetOutputFormat(offset: Int)
 
   // override to choose output filename so not overwrite existing ones
   override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = {
+    val numfmt = NumberFormat.getInstance()
+    numfmt.setMinimumIntegerDigits(5)
+    numfmt.setGroupingUsed(false)
+
     val taskId: TaskID = getTaskAttemptID(context).getTaskID
     val partition: Int = taskId.getId
-    val filename = s"part-r-${partition + offset}.parquet"
+    val filename = "part-r-" + numfmt.format(partition + offset) + ".parquet"
     val committer: FileOutputCommitter =
       getOutputCommitter(context).asInstanceOf[FileOutputCommitter]
     new Path(committer.getWorkPath, filename)
@@ -357,31 +375,32 @@ private[parquet] class FilteringParquetRowInputFormat
 
   private var footers: JList[Footer] = _
 
-  private var fileStatuses= Map.empty[Path, FileStatus]
+  private var fileStatuses = Map.empty[Path, FileStatus]
 
   override def createRecordReader(
       inputSplit: InputSplit,
       taskAttemptContext: TaskAttemptContext): RecordReader[Void, Row] = {
+
+    import parquet.filter2.compat.FilterCompat.NoOpFilter
+
     val readSupport: ReadSupport[Row] = new RowReadSupport()
 
-    val filterExpressions =
-      ParquetFilters.deserializeFilterExpressions(ContextUtil.getConfiguration(taskAttemptContext))
-    if (filterExpressions.length > 0) {
-      logInfo(s"Pushing down predicates for RecordFilter: ${filterExpressions.mkString(", ")}")
+    val filter = ParquetInputFormat.getFilter(ContextUtil.getConfiguration(taskAttemptContext))
+    if (!filter.isInstanceOf[NoOpFilter]) {
       new ParquetRecordReader[Row](
         readSupport,
-        ParquetFilters.createRecordFilter(filterExpressions))
+        filter)
     } else {
       new ParquetRecordReader[Row](readSupport)
     }
   }
 
   override def getFooters(jobContext: JobContext): JList[Footer] = {
-    import FilteringParquetRowInputFormat.footerCache
+    import org.apache.spark.sql.parquet.FilteringParquetRowInputFormat.footerCache
 
     if (footers eq null) {
       val conf = ContextUtil.getConfiguration(jobContext)
-      val cacheMetadata = conf.getBoolean(SQLConf.PARQUET_CACHE_METADATA, false)
+      val cacheMetadata = conf.getBoolean(SQLConf.PARQUET_CACHE_METADATA, true)
       val statuses = listStatus(jobContext)
       fileStatuses = statuses.map(file => file.getPath -> file).toMap
       if (statuses.isEmpty) {
@@ -400,7 +419,9 @@ private[parquet] class FilteringParquetRowInputFormat
         }
         val newFooters = new mutable.HashMap[FileStatus, Footer]
         if (toFetch.size > 0) {
+          val startFetch = System.currentTimeMillis
           val fetched = getFooters(conf, toFetch)
+          logInfo(s"Fetched $toFetch footers in ${System.currentTimeMillis - startFetch} ms")
           for ((status, i) <- toFetch.zipWithIndex) {
             newFooters(status) = fetched.get(i)
           }
@@ -423,35 +444,76 @@ private[parquet] class FilteringParquetRowInputFormat
       configuration: Configuration,
       footers: JList[Footer]): JList[ParquetInputSplit] = {
 
-    import FilteringParquetRowInputFormat.blockLocationCache
-
-    val cacheMetadata = configuration.getBoolean(SQLConf.PARQUET_CACHE_METADATA, false)
-
+    // Use task side strategy by default
+    val taskSideMetaData = configuration.getBoolean(ParquetInputFormat.TASK_SIDE_METADATA, true)
     val maxSplitSize: JLong = configuration.getLong("mapred.max.split.size", Long.MaxValue)
     val minSplitSize: JLong =
-      Math.max(getFormatMinSplitSize(), configuration.getLong("mapred.min.split.size", 0L))
+      Math.max(getFormatMinSplitSize, configuration.getLong("mapred.min.split.size", 0L))
     if (maxSplitSize < 0 || minSplitSize < 0) {
       throw new ParquetDecodingException(
         s"maxSplitSize or minSplitSie should not be negative: maxSplitSize = $maxSplitSize;" +
           s" minSplitSize = $minSplitSize")
     }
-    val splits = mutable.ArrayBuffer.empty[ParquetInputSplit]
+
+    // Uses strict type checking by default
     val getGlobalMetaData =
       classOf[ParquetFileWriter].getDeclaredMethod("getGlobalMetaData", classOf[JList[Footer]])
     getGlobalMetaData.setAccessible(true)
     val globalMetaData = getGlobalMetaData.invoke(null, footers).asInstanceOf[GlobalMetaData]
-    // if parquet file is empty, return empty splits.
+
     if (globalMetaData == null) {
-      return splits
+     val splits = mutable.ArrayBuffer.empty[ParquetInputSplit]
+     return splits
     }
 
     val readContext = getReadSupport(configuration).init(
       new InitContext(configuration,
-        globalMetaData.getKeyValueMetaData(),
-        globalMetaData.getSchema()))
+        globalMetaData.getKeyValueMetaData,
+        globalMetaData.getSchema))
+
+    if (taskSideMetaData){
+      logInfo("Using Task Side Metadata Split Strategy")
+      getTaskSideSplits(configuration,
+        footers,
+        maxSplitSize,
+        minSplitSize,
+        readContext)
+    } else {
+      logInfo("Using Client Side Metadata Split Strategy")
+      getClientSideSplits(configuration,
+        footers,
+        maxSplitSize,
+        minSplitSize,
+        readContext)
+    }
+
+  }
+
+  def getClientSideSplits(
+    configuration: Configuration,
+    footers: JList[Footer],
+    maxSplitSize: JLong,
+    minSplitSize: JLong,
+    readContext: ReadContext): JList[ParquetInputSplit] = {
+
+    import parquet.filter2.compat.FilterCompat.Filter
+    import parquet.filter2.compat.RowGroupFilter
+    import org.apache.spark.sql.parquet.FilteringParquetRowInputFormat.blockLocationCache
+
+    val cacheMetadata = configuration.getBoolean(SQLConf.PARQUET_CACHE_METADATA, true)
 
+    val splits = mutable.ArrayBuffer.empty[ParquetInputSplit]
+    val filter: Filter = ParquetInputFormat.getFilter(configuration)
+    var rowGroupsDropped: Long = 0
+    var totalRowGroups: Long  = 0
+
+    // Ugly hack, stuck with it until PR:
+    // https://github.com/apache/incubator-parquet-mr/pull/17
+    // is resolved
     val generateSplits =
-      classOf[ParquetInputFormat[_]].getDeclaredMethods.find(_.getName == "generateSplits").get
+      Class.forName("parquet.hadoop.ClientSideMetadataSplitStrategy")
+       .getDeclaredMethods.find(_.getName == "generateSplits").getOrElse(
+         sys.error(s"Failed to reflectively invoke ClientSideMetadataSplitStrategy.generateSplits"))
     generateSplits.setAccessible(true)
 
     for (footer <- footers) {
@@ -460,29 +522,85 @@ private[parquet] class FilteringParquetRowInputFormat
       val status = fileStatuses.getOrElse(file, fs.getFileStatus(file))
       val parquetMetaData = footer.getParquetMetadata
       val blocks = parquetMetaData.getBlocks
-      var blockLocations: Array[BlockLocation] = null
-      if (!cacheMetadata) {
-        blockLocations = fs.getFileBlockLocations(status, 0, status.getLen)
-      } else {
-        blockLocations = blockLocationCache.get(status, new Callable[Array[BlockLocation]] {
-          def call(): Array[BlockLocation] = fs.getFileBlockLocations(status, 0, status.getLen)
-        })
-      }
+      totalRowGroups = totalRowGroups + blocks.size
+      val filteredBlocks = RowGroupFilter.filterRowGroups(
+        filter,
+        blocks,
+        parquetMetaData.getFileMetaData.getSchema)
+      rowGroupsDropped = rowGroupsDropped + (blocks.size - filteredBlocks.size)
+
+      if (!filteredBlocks.isEmpty){
+          var blockLocations: Array[BlockLocation] = null
+          if (!cacheMetadata) {
+            blockLocations = fs.getFileBlockLocations(status, 0, status.getLen)
+          } else {
+            blockLocations = blockLocationCache.get(status, new Callable[Array[BlockLocation]] {
+              def call(): Array[BlockLocation] = fs.getFileBlockLocations(status, 0, status.getLen)
+            })
+          }
+          splits.addAll(
+            generateSplits.invoke(
+              null,
+              filteredBlocks,
+              blockLocations,
+              status,
+              readContext.getRequestedSchema.toString,
+              readContext.getReadSupportMetadata,
+              minSplitSize,
+              maxSplitSize).asInstanceOf[JList[ParquetInputSplit]])
+        }
+    }
+
+    if (rowGroupsDropped > 0 && totalRowGroups > 0){
+      val percentDropped = ((rowGroupsDropped/totalRowGroups.toDouble) * 100).toInt
+      logInfo(s"Dropping $rowGroupsDropped row groups that do not pass filter predicate "
+        + s"($percentDropped %) !")
+    }
+    else {
+      logInfo("There were no row groups that could be dropped due to filter predicates")
+    }
+    splits
+
+  }
+
+  def getTaskSideSplits(
+    configuration: Configuration,
+    footers: JList[Footer],
+    maxSplitSize: JLong,
+    minSplitSize: JLong,
+    readContext: ReadContext): JList[ParquetInputSplit] = {
+
+    val splits = mutable.ArrayBuffer.empty[ParquetInputSplit]
+
+    // Ugly hack, stuck with it until PR:
+    // https://github.com/apache/incubator-parquet-mr/pull/17
+    // is resolved
+    val generateSplits =
+      Class.forName("parquet.hadoop.TaskSideMetadataSplitStrategy")
+       .getDeclaredMethods.find(_.getName == "generateTaskSideMDSplits").getOrElse(
+         sys.error(
+           s"Failed to reflectively invoke TaskSideMetadataSplitStrategy.generateTaskSideMDSplits"))
+    generateSplits.setAccessible(true)
+
+    for (footer <- footers) {
+      val file = footer.getFile
+      val fs = file.getFileSystem(configuration)
+      val status = fileStatuses.getOrElse(file, fs.getFileStatus(file))
+      val blockLocations = fs.getFileBlockLocations(status, 0, status.getLen)
       splits.addAll(
         generateSplits.invoke(
-          null,
-          blocks,
-          blockLocations,
-          status,
-          parquetMetaData.getFileMetaData,
-          readContext.getRequestedSchema.toString,
-          readContext.getReadSupportMetadata,
-          minSplitSize,
-          maxSplitSize).asInstanceOf[JList[ParquetInputSplit]])
+         null,
+         blockLocations,
+         status,
+         readContext.getRequestedSchema.toString,
+         readContext.getReadSupportMetadata,
+         minSplitSize,
+         maxSplitSize).asInstanceOf[JList[ParquetInputSplit]])
     }
 
     splits
   }
+
 }
 
 private[parquet] object FilteringParquetRowInputFormat {
@@ -509,7 +627,9 @@ private[parquet] object FileSystemHelper {
       throw new IllegalArgumentException(
         s"ParquetTableOperations: path $path does not exist or is not a directory")
     }
-    fs.listStatus(path).map(_.getPath)
+    fs.globStatus(path)
+      .flatMap { status => if(status.isDir) fs.listStatus(status.getPath) else List(status) }
+      .map(_.getPath)
   }
 
     /**
@@ -523,11 +643,9 @@ private[parquet] object FileSystemHelper {
     files.map(_.getName).map {
       case nameP(taskid) => taskid.toInt
       case hiddenFileP() => 0
-      case other: String => {
+      case other: String =>
         sys.error("ERROR: attempting to append to set of Parquet files and found file" +
           s"that does not match name pattern: $other")
-        0
-      }
       case _ => 0
     }.reduceLeft((a, b) => if (a < b) b else a)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
index bdf02401b21be..14c81ae4eba4e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
@@ -29,7 +29,7 @@ import parquet.schema.MessageType
 
 import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Row}
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 /**
  * A `parquet.io.api.RecordMaterializer` for Rows.
@@ -83,7 +83,8 @@ private[parquet] class RowReadSupport extends ReadSupport[Row] with Logging {
     // TODO: Why it can be null?
     if (schema == null)  {
       log.debug("falling back to Parquet read schema")
-      schema = ParquetTypesConverter.convertToAttributes(parquetSchema, false)
+      schema = ParquetTypesConverter.convertToAttributes(
+        parquetSchema, false, true)
     }
     log.debug(s"list of attributes that will be read: $schema")
     new RowRecordMaterializer(parquetSchema, schema)
@@ -98,7 +99,11 @@ private[parquet] class RowReadSupport extends ReadSupport[Row] with Logging {
     val requestedAttributes = RowReadSupport.getRequestedSchema(configuration)
 
     if (requestedAttributes != null) {
-      parquetSchema = ParquetTypesConverter.convertFromAttributes(requestedAttributes)
+      // If the parquet file is thrift derived, there is a good chance that
+      // it will have the thrift class in metadata.
+      val isThriftDerived = keyValueMetaData.keySet().contains("thrift.class")
+      parquetSchema = ParquetTypesConverter
+        .convertFromAttributes(requestedAttributes, isThriftDerived)
       metadata.put(
         RowReadSupport.SPARK_ROW_REQUESTED_SCHEMA,
         ParquetTypesConverter.convertToString(requestedAttributes))
@@ -129,7 +134,7 @@ private[parquet] object RowReadSupport {
 private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
 
   private[parquet] var writer: RecordConsumer = null
-  private[parquet] var attributes: Seq[Attribute] = null
+  private[parquet] var attributes: Array[Attribute] = null
 
   override def init(configuration: Configuration): WriteSupport.WriteContext = {
     val origAttributesStr: String = configuration.get(RowWriteSupport.SPARK_ROW_SCHEMA)
@@ -137,7 +142,7 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
     metadata.put(RowReadSupport.SPARK_METADATA_KEY, origAttributesStr)
 
     if (attributes == null) {
-      attributes = ParquetTypesConverter.convertFromString(origAttributesStr)
+      attributes = ParquetTypesConverter.convertFromString(origAttributesStr).toArray
     }
 
     log.debug(s"write support initialized for requested schema $attributes")
@@ -151,14 +156,15 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
   }
 
   override def write(record: Row): Unit = {
-    if (attributes.size > record.size) {
+    val attributesSize = attributes.size
+    if (attributesSize > record.size) {
       throw new IndexOutOfBoundsException(
-        s"Trying to write more fields than contained in row (${attributes.size}>${record.size})")
+        s"Trying to write more fields than contained in row (${attributesSize}>${record.size})")
     }
 
     var index = 0
     writer.startMessage()
-    while(index < attributes.size) {
+    while(index < attributesSize) {
       // null values indicate optional fields but we do not check currently
       if (record(index) != null) {
         writer.startField(attributes(index).name, index)
@@ -173,6 +179,7 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
   private[parquet] def writeValue(schema: DataType, value: Any): Unit = {
     if (value != null) {
       schema match {
+        case t: UserDefinedType[_] => writeValue(t.sqlType, value)
         case t @ ArrayType(_, _) => writeArray(
           t,
           value.asInstanceOf[CatalystConverter.ArrayScalaType[_]])
@@ -182,12 +189,12 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
         case t @ StructType(_) => writeStruct(
           t,
           value.asInstanceOf[CatalystConverter.StructScalaType[_]])
-        case _ => writePrimitive(schema.asInstanceOf[PrimitiveType], value)
+        case _ => writePrimitive(schema.asInstanceOf[NativeType], value)
       }
     }
   }
 
-  private[parquet] def writePrimitive(schema: PrimitiveType, value: Any): Unit = {
+  private[parquet] def writePrimitive(schema: DataType, value: Any): Unit = {
     if (value != null) {
       schema match {
         case StringType => writer.addBinary(
@@ -200,10 +207,16 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
         case IntegerType => writer.addInteger(value.asInstanceOf[Int])
         case ShortType => writer.addInteger(value.asInstanceOf[Short])
         case LongType => writer.addLong(value.asInstanceOf[Long])
+        case TimestampType => writeTimestamp(value.asInstanceOf[java.sql.Timestamp])
         case ByteType => writer.addInteger(value.asInstanceOf[Byte])
         case DoubleType => writer.addDouble(value.asInstanceOf[Double])
         case FloatType => writer.addFloat(value.asInstanceOf[Float])
         case BooleanType => writer.addBoolean(value.asInstanceOf[Boolean])
+        case d: DecimalType =>
+          if (d.precisionInfo == None || d.precisionInfo.get.precision > 18) {
+            sys.error(s"Unsupported datatype $d, cannot write to consumer")
+          }
+          writeDecimal(value.asInstanceOf[Decimal], d.precisionInfo.get.precision)
         case _ => sys.error(s"Do not know how to writer $schema to consumer")
       }
     }
@@ -283,19 +296,41 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
     }
     writer.endGroup()
   }
+
+  // Scratch array used to write decimals as fixed-length binary
+  private val scratchBytes = new Array[Byte](8)
+
+  private[parquet] def writeDecimal(decimal: Decimal, precision: Int): Unit = {
+    val numBytes = ParquetTypesConverter.BYTES_FOR_PRECISION(precision)
+    val unscaledLong = decimal.toUnscaledLong
+    var i = 0
+    var shift = 8 * (numBytes - 1)
+    while (i < numBytes) {
+      scratchBytes(i) = (unscaledLong >> shift).toByte
+      i += 1
+      shift -= 8
+    }
+    writer.addBinary(Binary.fromByteArray(scratchBytes, 0, numBytes))
+  }
+
+  private[parquet] def writeTimestamp(ts: java.sql.Timestamp): Unit = {
+    val binaryNanoTime = CatalystTimestampConverter.convertFromTimestamp(ts)
+    writer.addBinary(binaryNanoTime)
+  }
 }
 
 // Optimized for non-nested rows
 private[parquet] class MutableRowWriteSupport extends RowWriteSupport {
   override def write(record: Row): Unit = {
-    if (attributes.size > record.size) {
+    val attributesSize = attributes.size
+    if (attributesSize > record.size) {
       throw new IndexOutOfBoundsException(
-        s"Trying to write more fields than contained in row (${attributes.size}>${record.size})")
+        s"Trying to write more fields than contained in row (${attributesSize}>${record.size})")
     }
 
     var index = 0
     writer.startMessage()
-    while(index < attributes.size) {
+    while(index < attributesSize) {
       // null values indicate optional fields but we do not check currently
       if (record(index) != null && record(index) != Nil) {
         writer.startField(attributes(index).name, index)
@@ -326,6 +361,12 @@ private[parquet] class MutableRowWriteSupport extends RowWriteSupport {
       case DoubleType => writer.addDouble(record.getDouble(index))
       case FloatType => writer.addFloat(record.getFloat(index))
       case BooleanType => writer.addBoolean(record.getBoolean(index))
+      case TimestampType => writeTimestamp(record(index).asInstanceOf[java.sql.Timestamp])
+      case d: DecimalType =>
+        if (d.precisionInfo == None || d.precisionInfo.get.precision > 18) {
+          sys.error(s"Unsupported datatype $d, cannot write to consumer")
+        }
+        writeDecimal(record(index).asInstanceOf[Decimal], d.precisionInfo.get.precision)
       case _ => sys.error(s"Unsupported datatype $ctype, cannot write to consumer")
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala
new file mode 100644
index 0000000000000..8d3e094e3344d
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.parquet
+
+import java.io.File
+
+import scala.reflect.ClassTag
+import scala.reflect.runtime.universe.TypeTag
+import scala.util.Try
+
+import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.sql.catalyst.util
+import org.apache.spark.util.Utils
+
+/**
+ * A helper trait that provides convenient facilities for Parquet testing.
+ *
+ * NOTE: Considering classes `Tuple1` ... `Tuple22` all extend `Product`, it would be more
+ * convenient to use tuples rather than special case classes when writing test cases/suites.
+ * Especially, `Tuple1.apply` can be used to easily wrap a single type/value.
+ */
+trait ParquetTest {
+  val sqlContext: SQLContext
+
+  import sqlContext._
+
+  protected def configuration = sparkContext.hadoopConfiguration
+
+  /**
+   * Sets all SQL configurations specified in `pairs`, calls `f`, and then restore all SQL
+   * configurations.
+   *
+   * @todo Probably this method should be moved to a more general place
+   */
+  protected def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
+    val (keys, values) = pairs.unzip
+    val currentValues = keys.map(key => Try(getConf(key)).toOption)
+    (keys, values).zipped.foreach(setConf)
+    try f finally {
+      keys.zip(currentValues).foreach {
+        case (key, Some(value)) => setConf(key, value)
+        case (key, None) => conf.unsetConf(key)
+      }
+    }
+  }
+
+  /**
+   * Generates a temporary path without creating the actual file/directory, then pass it to `f`. If
+   * a file/directory is created there by `f`, it will be delete after `f` returns.
+   *
+   * @todo Probably this method should be moved to a more general place
+   */
+  protected def withTempPath(f: File => Unit): Unit = {
+    val file = util.getTempFilePath("parquetTest").getCanonicalFile
+    try f(file) finally if (file.exists()) Utils.deleteRecursively(file)
+  }
+
+  /**
+   * Creates a temporary directory, which is then passed to `f` and will be deleted after `f`
+   * returns.
+   *
+   * @todo Probably this method should be moved to a more general place
+   */
+  protected def withTempDir(f: File => Unit): Unit = {
+    val dir = Utils.createTempDir().getCanonicalFile
+    try f(dir) finally Utils.deleteRecursively(dir)
+  }
+
+  /**
+   * Writes `data` to a Parquet file, which is then passed to `f` and will be deleted after `f`
+   * returns.
+   */
+  protected def withParquetFile[T <: Product: ClassTag: TypeTag]
+      (data: Seq[T])
+      (f: String => Unit): Unit = {
+    import sqlContext.implicits._
+    withTempPath { file =>
+      sparkContext.parallelize(data).saveAsParquetFile(file.getCanonicalPath)
+      f(file.getCanonicalPath)
+    }
+  }
+
+  /**
+   * Writes `data` to a Parquet file and reads it back as a [[DataFrame]],
+   * which is then passed to `f`. The Parquet file will be deleted after `f` returns.
+   */
+  protected def withParquetRDD[T <: Product: ClassTag: TypeTag]
+      (data: Seq[T])
+      (f: DataFrame => Unit): Unit = {
+    withParquetFile(data)(path => f(parquetFile(path)))
+  }
+
+  /**
+   * Drops temporary table `tableName` after calling `f`.
+   */
+  protected def withTempTable(tableName: String)(f: => Unit): Unit = {
+    try f finally dropTempTable(tableName)
+  }
+
+  /**
+   * Writes `data` to a Parquet file, reads it back as a [[DataFrame]] and registers it as a
+   * temporary table named `tableName`, then call `f`. The temporary table together with the
+   * Parquet file will be dropped/deleted after `f` returns.
+   */
+  protected def withParquetTable[T <: Product: ClassTag: TypeTag]
+      (data: Seq[T], tableName: String)
+      (f: => Unit): Unit = {
+    withParquetRDD(data) { rdd =>
+      sqlContext.registerRDDAsTable(rdd, tableName)
+      withTempTable(tableName)(f)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala
index 837ea7695dbb3..e4a10aa2ae6c3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala
@@ -25,7 +25,7 @@ import org.apache.hadoop.mapreduce.Job
 import org.apache.spark.sql.test.TestSQLContext
 
 import parquet.example.data.{GroupWriter, Group}
-import parquet.example.data.simple.SimpleGroup
+import parquet.example.data.simple.{NanoTime, SimpleGroup}
 import parquet.hadoop.{ParquetReader, ParquetFileReader, ParquetWriter}
 import parquet.hadoop.api.WriteSupport
 import parquet.hadoop.api.WriteSupport.WriteContext
@@ -63,6 +63,7 @@ private[sql] object ParquetTestData {
       optional int64 mylong;
       optional float myfloat;
       optional double mydouble;
+      optional int96 mytimestamp;
       }"""
 
   // field names for test assertion error messages
@@ -72,7 +73,8 @@ private[sql] object ParquetTestData {
     "mystring:String",
     "mylong:Long",
     "myfloat:Float",
-    "mydouble:Double"
+    "mydouble:Double",
+    "mytimestamp:Timestamp"
   )
 
   val subTestSchema =
@@ -92,6 +94,13 @@ private[sql] object ParquetTestData {
       required int64 mylong;
       required float myfloat;
       required double mydouble;
+      optional boolean myoptboolean;
+      optional int32 myoptint;
+      optional binary myoptstring (UTF8);
+      optional int64 myoptlong;
+      optional float myoptfloat;
+      optional double myoptdouble;
+      optional int96 mytimestamp;
       }
     """
 
@@ -230,6 +239,7 @@ private[sql] object ParquetTestData {
       record.add(3, i.toLong << 33)
       record.add(4, 2.5F)
       record.add(5, 4.5D)
+      record.add(6, new NanoTime(1,2))
       writer.write(record)
     }
     writer.close()
@@ -255,6 +265,19 @@ private[sql] object ParquetTestData {
       record.add(3, i.toLong)
       record.add(4, i.toFloat + 0.5f)
       record.add(5, i.toDouble + 0.5d)
+      if (i % 2 == 0) {
+        if (i % 3 == 0) {
+          record.add(6, true)
+        } else {
+          record.add(6, false)
+        }
+        record.add(7, i)
+        record.add(8, i.toString)
+        record.add(9, i.toLong)
+        record.add(10, i.toFloat + 0.5f)
+        record.add(11, i.toDouble + 0.5d)
+      }
+ 
       writer.write(record)
     }
     writer.close()
@@ -403,5 +426,41 @@ private[sql] object ParquetTestData {
     val first = reader.read()
     assert(first != null)
   } */
+
+  // to test golb pattern (wild card pattern matching for parquetFile input
+  val testGlobDir = Utils.createTempDir()
+  val testGlobSubDir1 = Utils.createTempDir(testGlobDir.getPath)
+  val testGlobSubDir2 = Utils.createTempDir(testGlobDir.getPath)
+  val testGlobSubDir3 = Utils.createTempDir(testGlobDir.getPath)
+
+  def writeGlobFiles() = {
+    val subDirs = Array(testGlobSubDir1, testGlobSubDir2, testGlobSubDir3)
+
+    subDirs.foreach { dir =>
+      val path: Path = new Path(new Path(dir.toURI), new Path("part-r-0.parquet"))
+      val job = new Job()
+      val schema: MessageType = MessageTypeParser.parseMessageType(testSchema)
+      val writeSupport = new TestGroupWriteSupport(schema)
+      val writer = new ParquetWriter[Group](path, writeSupport)
+
+      for(i <- 0 until 15) {
+        val record = new SimpleGroup(schema)
+        if(i % 3 == 0) {
+          record.add(0, true)
+        } else {
+          record.add(0, false)
+        }
+        if(i % 5 == 0) {
+          record.add(1, 5)
+        }
+        record.add(2, "abc")
+        record.add(3, i.toLong << 33)
+        record.add(4, 2.5F)
+        record.add(5, 4.5D)
+        writer.write(record)
+      }
+      writer.close()
+    }
+  }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala
index e6389cf77a4c9..b646109b7c553 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala
@@ -24,50 +24,65 @@ import scala.util.Try
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.mapreduce.Job
-import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
 
+import parquet.format.converter.ParquetMetadataConverter
 import parquet.hadoop.{ParquetFileReader, Footer, ParquetFileWriter}
 import parquet.hadoop.metadata.{ParquetMetadata, FileMetaData}
 import parquet.hadoop.util.ContextUtil
-import parquet.schema.{Type => ParquetType, PrimitiveType => ParquetPrimitiveType, MessageType}
-import parquet.schema.{GroupType => ParquetGroupType, OriginalType => ParquetOriginalType, ConversionPatterns}
+import parquet.schema.{Type => ParquetType, Types => ParquetTypes, PrimitiveType => ParquetPrimitiveType, MessageType}
+import parquet.schema.{GroupType => ParquetGroupType, OriginalType => ParquetOriginalType, ConversionPatterns, DecimalMetadata}
 import parquet.schema.PrimitiveType.{PrimitiveTypeName => ParquetPrimitiveTypeName}
 import parquet.schema.Type.Repetition
 
 import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Attribute}
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 // Implicits
 import scala.collection.JavaConversions._
 
+/** A class representing Parquet info fields we care about, for passing back to Parquet */
+private[parquet] case class ParquetTypeInfo(
+  primitiveType: ParquetPrimitiveTypeName,
+  originalType: Option[ParquetOriginalType] = None,
+  decimalMetadata: Option[DecimalMetadata] = None,
+  length: Option[Int] = None)
+
 private[parquet] object ParquetTypesConverter extends Logging {
   def isPrimitiveType(ctype: DataType): Boolean =
     classOf[PrimitiveType] isAssignableFrom ctype.getClass
 
   def toPrimitiveDataType(
       parquetType: ParquetPrimitiveType,
-      binayAsString: Boolean): DataType =
+      binaryAsString: Boolean,
+      int96AsTimestamp: Boolean): DataType = {
+    val originalType = parquetType.getOriginalType
+    val decimalInfo = parquetType.getDecimalMetadata
     parquetType.getPrimitiveTypeName match {
       case ParquetPrimitiveTypeName.BINARY
-        if (parquetType.getOriginalType == ParquetOriginalType.UTF8 ||
-          binayAsString) => StringType
+        if (originalType == ParquetOriginalType.UTF8 || binaryAsString) => StringType
       case ParquetPrimitiveTypeName.BINARY => BinaryType
       case ParquetPrimitiveTypeName.BOOLEAN => BooleanType
       case ParquetPrimitiveTypeName.DOUBLE => DoubleType
       case ParquetPrimitiveTypeName.FLOAT => FloatType
       case ParquetPrimitiveTypeName.INT32 => IntegerType
       case ParquetPrimitiveTypeName.INT64 => LongType
+      case ParquetPrimitiveTypeName.INT96 if int96AsTimestamp => TimestampType
       case ParquetPrimitiveTypeName.INT96 =>
         // TODO: add BigInteger type? TODO(andre) use DecimalType instead????
         sys.error("Potential loss of precision: cannot convert INT96")
+      case ParquetPrimitiveTypeName.FIXED_LEN_BYTE_ARRAY
+        if (originalType == ParquetOriginalType.DECIMAL && decimalInfo.getPrecision <= 18) =>
+          // TODO: for now, our reader only supports decimals that fit in a Long
+          DecimalType(decimalInfo.getPrecision, decimalInfo.getScale)
       case _ => sys.error(
         s"Unsupported parquet datatype $parquetType")
     }
+  }
 
   /**
    * Converts a given Parquet `Type` into the corresponding
-   * [[org.apache.spark.sql.catalyst.types.DataType]].
+   * [[org.apache.spark.sql.types.DataType]].
    *
    * We apply the following conversion rules:
    * <ul>
@@ -90,7 +105,9 @@ private[parquet] object ParquetTypesConverter extends Logging {
    * @param parquetType The type to convert.
    * @return The corresponding Catalyst type.
    */
-  def toDataType(parquetType: ParquetType, isBinaryAsString: Boolean): DataType = {
+  def toDataType(parquetType: ParquetType,
+                 isBinaryAsString: Boolean,
+                 isInt96AsTimestamp: Boolean): DataType = {
     def correspondsToMap(groupType: ParquetGroupType): Boolean = {
       if (groupType.getFieldCount != 1 || groupType.getFields.apply(0).isPrimitive) {
         false
@@ -112,7 +129,7 @@ private[parquet] object ParquetTypesConverter extends Logging {
     }
 
     if (parquetType.isPrimitive) {
-      toPrimitiveDataType(parquetType.asPrimitiveType, isBinaryAsString)
+      toPrimitiveDataType(parquetType.asPrimitiveType, isBinaryAsString, isInt96AsTimestamp)
     } else {
       val groupType = parquetType.asGroupType()
       parquetType.getOriginalType match {
@@ -124,9 +141,12 @@ private[parquet] object ParquetTypesConverter extends Logging {
           if (field.getName == CatalystConverter.ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME) {
             val bag = field.asGroupType()
             assert(bag.getFieldCount == 1)
-            ArrayType(toDataType(bag.getFields.apply(0), isBinaryAsString), containsNull = true)
+            ArrayType(
+              toDataType(bag.getFields.apply(0), isBinaryAsString, isInt96AsTimestamp),
+              containsNull = true)
           } else {
-            ArrayType(toDataType(field, isBinaryAsString), containsNull = false)
+            ArrayType(
+              toDataType(field, isBinaryAsString, isInt96AsTimestamp), containsNull = false)
           }
         }
         case ParquetOriginalType.MAP => {
@@ -139,8 +159,10 @@ private[parquet] object ParquetTypesConverter extends Logging {
             "Parquet Map type malformatted: nested group should have 2 (key, value) fields!")
           assert(keyValueGroup.getFields.apply(0).getRepetition == Repetition.REQUIRED)
 
-          val keyType = toDataType(keyValueGroup.getFields.apply(0), isBinaryAsString)
-          val valueType = toDataType(keyValueGroup.getFields.apply(1), isBinaryAsString)
+          val keyType =
+            toDataType(keyValueGroup.getFields.apply(0), isBinaryAsString, isInt96AsTimestamp)
+          val valueType =
+            toDataType(keyValueGroup.getFields.apply(1), isBinaryAsString, isInt96AsTimestamp)
           MapType(keyType, valueType,
             keyValueGroup.getFields.apply(1).getRepetition != Repetition.REQUIRED)
         }
@@ -150,8 +172,10 @@ private[parquet] object ParquetTypesConverter extends Logging {
             val keyValueGroup = groupType.getFields.apply(0).asGroupType()
             assert(keyValueGroup.getFields.apply(0).getRepetition == Repetition.REQUIRED)
 
-            val keyType = toDataType(keyValueGroup.getFields.apply(0), isBinaryAsString)
-            val valueType = toDataType(keyValueGroup.getFields.apply(1), isBinaryAsString)
+            val keyType =
+              toDataType(keyValueGroup.getFields.apply(0), isBinaryAsString, isInt96AsTimestamp)
+            val valueType =
+              toDataType(keyValueGroup.getFields.apply(1), isBinaryAsString, isInt96AsTimestamp)
             MapType(keyType, valueType,
               keyValueGroup.getFields.apply(1).getRepetition != Repetition.REQUIRED)
           } else if (correspondsToArray(groupType)) { // ArrayType
@@ -159,16 +183,19 @@ private[parquet] object ParquetTypesConverter extends Logging {
             if (field.getName == CatalystConverter.ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME) {
               val bag = field.asGroupType()
               assert(bag.getFieldCount == 1)
-              ArrayType(toDataType(bag.getFields.apply(0), isBinaryAsString), containsNull = true)
+              ArrayType(
+                toDataType(bag.getFields.apply(0), isBinaryAsString, isInt96AsTimestamp),
+                containsNull = true)
             } else {
-              ArrayType(toDataType(field, isBinaryAsString), containsNull = false)
+              ArrayType(
+                toDataType(field, isBinaryAsString, isInt96AsTimestamp), containsNull = false)
             }
           } else { // everything else: StructType
             val fields = groupType
               .getFields
               .map(ptype => new StructField(
               ptype.getName,
-              toDataType(ptype, isBinaryAsString),
+              toDataType(ptype, isBinaryAsString, isInt96AsTimestamp),
               ptype.getRepetition != Repetition.REQUIRED))
             StructType(fields)
           }
@@ -178,44 +205,62 @@ private[parquet] object ParquetTypesConverter extends Logging {
   }
 
   /**
-   * For a given Catalyst [[org.apache.spark.sql.catalyst.types.DataType]] return
+   * For a given Catalyst [[org.apache.spark.sql.types.DataType]] return
    * the name of the corresponding Parquet primitive type or None if the given type
    * is not primitive.
    *
    * @param ctype The type to convert
-   * @return The name of the corresponding Parquet primitive type
+   * @return The name of the corresponding Parquet type properties
    */
-  def fromPrimitiveDataType(ctype: DataType):
-      Option[(ParquetPrimitiveTypeName, Option[ParquetOriginalType])] = ctype match {
-    case StringType => Some(ParquetPrimitiveTypeName.BINARY, Some(ParquetOriginalType.UTF8))
-    case BinaryType => Some(ParquetPrimitiveTypeName.BINARY, None)
-    case BooleanType => Some(ParquetPrimitiveTypeName.BOOLEAN, None)
-    case DoubleType => Some(ParquetPrimitiveTypeName.DOUBLE, None)
-    case FloatType => Some(ParquetPrimitiveTypeName.FLOAT, None)
-    case IntegerType => Some(ParquetPrimitiveTypeName.INT32, None)
+  def fromPrimitiveDataType(ctype: DataType): Option[ParquetTypeInfo] = ctype match {
+    case StringType => Some(ParquetTypeInfo(
+      ParquetPrimitiveTypeName.BINARY, Some(ParquetOriginalType.UTF8)))
+    case BinaryType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.BINARY))
+    case BooleanType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.BOOLEAN))
+    case DoubleType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.DOUBLE))
+    case FloatType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.FLOAT))
+    case IntegerType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT32))
     // There is no type for Byte or Short so we promote them to INT32.
-    case ShortType => Some(ParquetPrimitiveTypeName.INT32, None)
-    case ByteType => Some(ParquetPrimitiveTypeName.INT32, None)
-    case LongType => Some(ParquetPrimitiveTypeName.INT64, None)
+    case ShortType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT32))
+    case ByteType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT32))
+    case LongType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT64))
+    case TimestampType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT96))
+    case DecimalType.Fixed(precision, scale) if precision <= 18 =>
+      // TODO: for now, our writer only supports decimals that fit in a Long
+      Some(ParquetTypeInfo(ParquetPrimitiveTypeName.FIXED_LEN_BYTE_ARRAY,
+        Some(ParquetOriginalType.DECIMAL),
+        Some(new DecimalMetadata(precision, scale)),
+        Some(BYTES_FOR_PRECISION(precision))))
     case _ => None
   }
 
   /**
-   * Converts a given Catalyst [[org.apache.spark.sql.catalyst.types.DataType]] into
+   * Compute the FIXED_LEN_BYTE_ARRAY length needed to represent a given DECIMAL precision.
+   */
+  private[parquet] val BYTES_FOR_PRECISION = Array.tabulate[Int](38) { precision =>
+    var length = 1
+    while (math.pow(2.0, 8 * length - 1) < math.pow(10.0, precision)) {
+      length += 1
+    }
+    length
+  }
+
+  /**
+   * Converts a given Catalyst [[org.apache.spark.sql.types.DataType]] into
    * the corresponding Parquet `Type`.
    *
    * The conversion follows the rules below:
    * <ul>
    *   <li> Primitive types are converted into Parquet's primitive types.</li>
-   *   <li> [[org.apache.spark.sql.catalyst.types.StructType]]s are converted
+   *   <li> [[org.apache.spark.sql.types.StructType]]s are converted
    *        into Parquet's `GroupType` with the corresponding field types.</li>
-   *   <li> [[org.apache.spark.sql.catalyst.types.ArrayType]]s are converted
+   *   <li> [[org.apache.spark.sql.types.ArrayType]]s are converted
    *        into a 2-level nested group, where the outer group has the inner
    *        group as sole field. The inner group has name `values` and
    *        repetition level `REPEATED` and has the element type of
    *        the array as schema. We use Parquet's `ConversionPatterns` for this
    *        purpose.</li>
-   *   <li> [[org.apache.spark.sql.catalyst.types.MapType]]s are converted
+   *   <li> [[org.apache.spark.sql.types.MapType]]s are converted
    *        into a nested (2-level) Parquet `GroupType` with two fields: a key
    *        type and a value type. The nested group has repetition level
    *        `REPEATED` and name `map`. We use Parquet's `ConversionPatterns`
@@ -240,33 +285,51 @@ private[parquet] object ParquetTypesConverter extends Logging {
       ctype: DataType,
       name: String,
       nullable: Boolean = true,
-      inArray: Boolean = false): ParquetType = {
+      inArray: Boolean = false, 
+      toThriftSchemaNames: Boolean = false): ParquetType = {
     val repetition =
       if (inArray) {
         Repetition.REPEATED
       } else {
         if (nullable) Repetition.OPTIONAL else Repetition.REQUIRED
       }
-    val primitiveType = fromPrimitiveDataType(ctype)
-    primitiveType.map {
-      case (primitiveType, originalType) =>
-        new ParquetPrimitiveType(repetition, primitiveType, name, originalType.orNull)
+    val arraySchemaName = if (toThriftSchemaNames) {
+      name + CatalystConverter.THRIFT_ARRAY_ELEMENTS_SCHEMA_NAME_SUFFIX
+    } else {
+      CatalystConverter.ARRAY_ELEMENTS_SCHEMA_NAME
+    }
+    val typeInfo = fromPrimitiveDataType(ctype)
+    typeInfo.map {
+      case ParquetTypeInfo(primitiveType, originalType, decimalMetadata, length) =>
+        val builder = ParquetTypes.primitive(primitiveType, repetition).as(originalType.orNull)
+        for (len <- length) {
+          builder.length(len)
+        }
+        for (metadata <- decimalMetadata) {
+          builder.precision(metadata.getPrecision).scale(metadata.getScale)
+        }
+        builder.named(name)
     }.getOrElse {
       ctype match {
+        case udt: UserDefinedType[_] => {
+          fromDataType(udt.sqlType, name, nullable, inArray, toThriftSchemaNames)
+        }
         case ArrayType(elementType, false) => {
           val parquetElementType = fromDataType(
             elementType,
-            CatalystConverter.ARRAY_ELEMENTS_SCHEMA_NAME,
+            arraySchemaName,
             nullable = false,
-            inArray = true)
+            inArray = true,
+            toThriftSchemaNames)
           ConversionPatterns.listType(repetition, name, parquetElementType)
         }
         case ArrayType(elementType, true) => {
           val parquetElementType = fromDataType(
             elementType,
-            CatalystConverter.ARRAY_ELEMENTS_SCHEMA_NAME,
+            arraySchemaName,
             nullable = true,
-            inArray = false)
+            inArray = false,
+            toThriftSchemaNames)
           ConversionPatterns.listType(
             repetition,
             name,
@@ -277,9 +340,10 @@ private[parquet] object ParquetTypesConverter extends Logging {
         }
         case StructType(structFields) => {
           val fields = structFields.map {
-            field => fromDataType(field.dataType, field.name, field.nullable, inArray = false)
+            field => fromDataType(field.dataType, field.name, field.nullable, 
+                                  inArray = false, toThriftSchemaNames)
           }
-          new ParquetGroupType(repetition, name, fields)
+          new ParquetGroupType(repetition, name, fields.toSeq)
         }
         case MapType(keyType, valueType, valueContainsNull) => {
           val parquetKeyType =
@@ -287,13 +351,15 @@ private[parquet] object ParquetTypesConverter extends Logging {
               keyType,
               CatalystConverter.MAP_KEY_SCHEMA_NAME,
               nullable = false,
-              inArray = false)
+              inArray = false,
+              toThriftSchemaNames)
           val parquetValueType =
             fromDataType(
               valueType,
               CatalystConverter.MAP_VALUE_SCHEMA_NAME,
               nullable = valueContainsNull,
-              inArray = false)
+              inArray = false,
+              toThriftSchemaNames)
           ConversionPatterns.mapType(
             repetition,
             name,
@@ -305,7 +371,9 @@ private[parquet] object ParquetTypesConverter extends Logging {
     }
   }
 
-  def convertToAttributes(parquetSchema: ParquetType, isBinaryAsString: Boolean): Seq[Attribute] = {
+  def convertToAttributes(parquetSchema: ParquetType,
+                          isBinaryAsString: Boolean,
+                          isInt96AsTimestamp: Boolean): Seq[Attribute] = {
     parquetSchema
       .asGroupType()
       .getFields
@@ -313,14 +381,16 @@ private[parquet] object ParquetTypesConverter extends Logging {
         field =>
           new AttributeReference(
             field.getName,
-            toDataType(field, isBinaryAsString),
+            toDataType(field, isBinaryAsString, isInt96AsTimestamp),
             field.getRepetition != Repetition.REQUIRED)())
   }
 
-  def convertFromAttributes(attributes: Seq[Attribute]): MessageType = {
+  def convertFromAttributes(attributes: Seq[Attribute],
+                            toThriftSchemaNames: Boolean = false): MessageType = {
     val fields = attributes.map(
       attribute =>
-        fromDataType(attribute.dataType, attribute.name, attribute.nullable))
+        fromDataType(attribute.dataType, attribute.name, attribute.nullable,
+                     toThriftSchemaNames = toThriftSchemaNames))
     new MessageType("root", fields)
   }
 
@@ -397,10 +467,14 @@ private[parquet] object ParquetTypesConverter extends Logging {
     }
     val path = origPath.makeQualified(fs)
 
-    val children = fs.listStatus(path).filterNot { status =>
-      val name = status.getPath.getName
-      (name(0) == '.' || name(0) == '_') && name != ParquetFileWriter.PARQUET_METADATA_FILE
-    }
+    val children =
+      fs
+        .globStatus(path)
+        .flatMap { status => if(status.isDir) fs.listStatus(status.getPath) else List(status) }
+        .filterNot { status =>
+          val name = status.getPath.getName
+          (name(0) == '.' || name(0) == '_') && name != ParquetFileWriter.PARQUET_METADATA_FILE
+        }
 
     ParquetRelation.enableLogForwarding()
 
@@ -414,7 +488,7 @@ private[parquet] object ParquetTypesConverter extends Logging {
       // ... and fallback to "_metadata" if no such file exists (which implies the Parquet file is
       // empty, thus normally the "_metadata" file is expected to be fairly small).
       .orElse(children.find(_.getPath.getName == ParquetFileWriter.PARQUET_METADATA_FILE))
-      .map(ParquetFileReader.readFooter(conf, _))
+      .map(ParquetFileReader.readFooter(conf, _, ParquetMetadataConverter.NO_FILTER))
       .getOrElse(
         throw new IllegalArgumentException(s"Could not find Parquet metadata at path $path"))
   }
@@ -432,7 +506,8 @@ private[parquet] object ParquetTypesConverter extends Logging {
   def readSchemaFromFile(
       origPath: Path,
       conf: Option[Configuration],
-      isBinaryAsString: Boolean): Seq[Attribute] = {
+      isBinaryAsString: Boolean,
+      isInt96AsTimestamp: Boolean): Seq[Attribute] = {
     val keyValueMetadata: java.util.Map[String, String] =
       readMetaData(origPath, conf)
         .getFileMetaData
@@ -441,7 +516,9 @@ private[parquet] object ParquetTypesConverter extends Logging {
       convertFromString(keyValueMetadata.get(RowReadSupport.SPARK_METADATA_KEY))
     } else {
       val attributes = convertToAttributes(
-        readMetaData(origPath, conf).getFileMetaData.getSchema, isBinaryAsString)
+        readMetaData(origPath, conf).getFileMetaData.getSchema,
+        isBinaryAsString,
+        isInt96AsTimestamp)
       log.info(s"Falling back to schema conversion from Parquet types; result: $attributes")
       attributes
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
new file mode 100644
index 0000000000000..179c0d6b22239
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
@@ -0,0 +1,293 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.parquet
+
+import java.util.{List => JList}
+
+import scala.collection.JavaConversions._
+
+import org.apache.hadoop.conf.{Configurable, Configuration}
+import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
+import org.apache.hadoop.io.Writable
+import org.apache.hadoop.mapreduce.{InputSplit, Job, JobContext}
+import parquet.filter2.predicate.FilterApi
+import parquet.hadoop.ParquetInputFormat
+import parquet.hadoop.util.ContextUtil
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.rdd.{NewHadoopPartition, RDD}
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.sources._
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
+import org.apache.spark.sql.{Row, SQLConf, SQLContext}
+import org.apache.spark.{Logging, Partition => SparkPartition}
+
+
+/**
+ * Allows creation of parquet based tables using the syntax
+ * `CREATE TEMPORARY TABLE ... USING org.apache.spark.sql.parquet`.  Currently the only option
+ * required is `path`, which should be the location of a collection of, optionally partitioned,
+ * parquet files.
+ */
+class DefaultSource extends RelationProvider {
+  /** Returns a new base relation with the given parameters. */
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String]): BaseRelation = {
+    val path =
+      parameters.getOrElse("path", sys.error("'path' must be specified for parquet tables."))
+
+    ParquetRelation2(path)(sqlContext)
+  }
+}
+
+private[parquet] case class Partition(partitionValues: Map[String, Any], files: Seq[FileStatus])
+
+/**
+ * An alternative to [[ParquetRelation]] that plugs in using the data sources API.  This class is
+ * currently not intended as a full replacement of the parquet support in Spark SQL though it is
+ * likely that it will eventually subsume the existing physical plan implementation.
+ *
+ * Compared with the current implementation, this class has the following notable differences:
+ *
+ * Partitioning: Partitions are auto discovered and must be in the form of directories `key=value/`
+ * located at `path`.  Currently only a single partitioning column is supported and it must
+ * be an integer.  This class supports both fully self-describing data, which contains the partition
+ * key, and data where the partition key is only present in the folder structure.  The presence
+ * of the partitioning key in the data is also auto-detected.  The `null` partition is not yet
+ * supported.
+ *
+ * Metadata: The metadata is automatically discovered by reading the first parquet file present.
+ * There is currently no support for working with files that have different schema.  Additionally,
+ * when parquet metadata caching is turned on, the FileStatus objects for all data will be cached
+ * to improve the speed of interactive querying.  When data is added to a table it must be dropped
+ * and recreated to pick up any changes.
+ *
+ * Statistics: Statistics for the size of the table are automatically populated during metadata
+ * discovery.
+ */
+@DeveloperApi
+case class ParquetRelation2(path: String)(@transient val sqlContext: SQLContext)
+  extends CatalystScan with Logging {
+
+  def sparkContext = sqlContext.sparkContext
+
+  // Minor Hack: scala doesnt seem to respect @transient for vals declared via extraction
+  @transient
+  private var partitionKeys: Seq[String] = _
+  @transient
+  private var partitions: Seq[Partition] = _
+  discoverPartitions()
+
+  // TODO: Only finds the first partition, assumes the key is of type Integer...
+  private def discoverPartitions() = {
+    val fs = FileSystem.get(new java.net.URI(path), sparkContext.hadoopConfiguration)
+    val partValue = "([^=]+)=([^=]+)".r
+
+    val childrenOfPath = fs.listStatus(new Path(path)).filterNot(_.getPath.getName.startsWith("_"))
+    val childDirs = childrenOfPath.filter(s => s.isDir)
+
+    if (childDirs.size > 0) {
+      val partitionPairs = childDirs.map(_.getPath.getName).map {
+        case partValue(key, value) => (key, value)
+      }
+
+      val foundKeys = partitionPairs.map(_._1).distinct
+      if (foundKeys.size > 1) {
+        sys.error(s"Too many distinct partition keys: $foundKeys")
+      }
+
+      // Do a parallel lookup of partition metadata.
+      val partitionFiles =
+        childDirs.par.map { d =>
+          fs.listStatus(d.getPath)
+            // TODO: Is there a standard hadoop function for this?
+            .filterNot(_.getPath.getName.startsWith("_"))
+            .filterNot(_.getPath.getName.startsWith("."))
+        }.seq
+
+      partitionKeys = foundKeys.toSeq
+      partitions = partitionFiles.zip(partitionPairs).map { case (files, (key, value)) =>
+        Partition(Map(key -> value.toInt), files)
+      }.toSeq
+    } else {
+      partitionKeys = Nil
+      partitions = Partition(Map.empty, childrenOfPath) :: Nil
+    }
+  }
+
+  override val sizeInBytes = partitions.flatMap(_.files).map(_.getLen).sum
+
+  val dataSchema = StructType.fromAttributes( // TODO: Parquet code should not deal with attributes.
+    ParquetTypesConverter.readSchemaFromFile(
+      partitions.head.files.head.getPath,
+      Some(sparkContext.hadoopConfiguration),
+      sqlContext.conf.isParquetBinaryAsString,
+      sqlContext.conf.isParquetINT96AsTimestamp))
+
+  val dataIncludesKey =
+    partitionKeys.headOption.map(dataSchema.fieldNames.contains(_)).getOrElse(true)
+
+  override val schema =
+    if (dataIncludesKey) {
+      dataSchema
+    } else {
+      StructType(dataSchema.fields :+ StructField(partitionKeys.head, IntegerType))
+    }
+
+  override def buildScan(output: Seq[Attribute], predicates: Seq[Expression]): RDD[Row] = {
+    // This is mostly a hack so that we can use the existing parquet filter code.
+    val requiredColumns = output.map(_.name)
+
+    val job = new Job(sparkContext.hadoopConfiguration)
+    ParquetInputFormat.setReadSupportClass(job, classOf[RowReadSupport])
+    val jobConf: Configuration = ContextUtil.getConfiguration(job)
+
+    val requestedSchema = StructType(requiredColumns.map(schema(_)))
+
+    val partitionKeySet = partitionKeys.toSet
+    val rawPredicate =
+      predicates
+        .filter(_.references.map(_.name).toSet.subsetOf(partitionKeySet))
+        .reduceOption(And)
+        .getOrElse(Literal(true))
+
+    // Translate the predicate so that it reads from the information derived from the
+    // folder structure
+    val castedPredicate = rawPredicate transform {
+      case a: AttributeReference =>
+        val idx = partitionKeys.indexWhere(a.name == _)
+        BoundReference(idx, IntegerType, nullable = true)
+    }
+
+    val inputData = new GenericMutableRow(partitionKeys.size)
+    val pruningCondition = InterpretedPredicate(castedPredicate)
+
+    val selectedPartitions =
+      if (partitionKeys.nonEmpty && predicates.nonEmpty) {
+        partitions.filter { part =>
+          inputData(0) = part.partitionValues.values.head
+          pruningCondition(inputData)
+        }
+      } else {
+        partitions
+      }
+
+    val fs = FileSystem.get(new java.net.URI(path), sparkContext.hadoopConfiguration)
+    val selectedFiles = selectedPartitions.flatMap(_.files).map(f => fs.makeQualified(f.getPath))
+    // FileInputFormat cannot handle empty lists.
+    if (selectedFiles.nonEmpty) {
+      org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(job, selectedFiles: _*)
+    }
+
+    // Push down filters when possible. Notice that not all filters can be converted to Parquet
+    // filter predicate. Here we try to convert each individual predicate and only collect those
+    // convertible ones.
+    predicates
+      .flatMap(ParquetFilters.createFilter)
+      .reduceOption(FilterApi.and)
+      .filter(_ => sqlContext.conf.parquetFilterPushDown)
+      .foreach(ParquetInputFormat.setFilterPredicate(jobConf, _))
+
+    def percentRead = selectedPartitions.size.toDouble / partitions.size.toDouble * 100
+    logInfo(s"Reading $percentRead% of $path partitions")
+
+    // Store both requested and original schema in `Configuration`
+    jobConf.set(
+      RowReadSupport.SPARK_ROW_REQUESTED_SCHEMA,
+      ParquetTypesConverter.convertToString(requestedSchema.toAttributes))
+    jobConf.set(
+      RowWriteSupport.SPARK_ROW_SCHEMA,
+      ParquetTypesConverter.convertToString(schema.toAttributes))
+
+    // Tell FilteringParquetRowInputFormat whether it's okay to cache Parquet and FS metadata
+    val useCache = sqlContext.getConf(SQLConf.PARQUET_CACHE_METADATA, "true").toBoolean
+    jobConf.set(SQLConf.PARQUET_CACHE_METADATA, useCache.toString)
+
+    val baseRDD =
+      new org.apache.spark.rdd.NewHadoopRDD(
+          sparkContext,
+          classOf[FilteringParquetRowInputFormat],
+          classOf[Void],
+          classOf[Row],
+          jobConf) {
+        val cacheMetadata = useCache
+
+        @transient
+        val cachedStatus = selectedPartitions.flatMap(_.files)
+
+        // Overridden so we can inject our own cached files statuses.
+        override def getPartitions: Array[SparkPartition] = {
+          val inputFormat =
+            if (cacheMetadata) {
+              new FilteringParquetRowInputFormat {
+                override def listStatus(jobContext: JobContext): JList[FileStatus] = cachedStatus
+              }
+            } else {
+              new FilteringParquetRowInputFormat
+            }
+
+          inputFormat match {
+            case configurable: Configurable =>
+              configurable.setConf(getConf)
+            case _ =>
+          }
+          val jobContext = newJobContext(getConf, jobId)
+          val rawSplits = inputFormat.getSplits(jobContext).toArray
+          val result = new Array[SparkPartition](rawSplits.size)
+          for (i <- 0 until rawSplits.size) {
+            result(i) =
+              new NewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable])
+          }
+          result
+        }
+      }
+
+    // The ordinal for the partition key in the result row, if requested.
+    val partitionKeyLocation =
+      partitionKeys
+        .headOption
+        .map(requiredColumns.indexOf(_))
+        .getOrElse(-1)
+
+    // When the data does not include the key and the key is requested then we must fill it in
+    // based on information from the input split.
+    if (!dataIncludesKey && partitionKeyLocation != -1) {
+      baseRDD.mapPartitionsWithInputSplit { case (split, iter) =>
+        val partValue = "([^=]+)=([^=]+)".r
+        val partValues =
+          split.asInstanceOf[parquet.hadoop.ParquetInputSplit]
+            .getPath
+            .toString
+            .split("/")
+            .flatMap {
+            case partValue(key, value) => Some(key -> value)
+            case _ => None
+          }.toMap
+
+        val currentValue = partValues.values.head.toInt
+        iter.map { pair =>
+          val res = pair._2.asInstanceOf[SpecificMutableRow]
+          res.setInt(partitionKeyLocation, currentValue)
+          res
+        }
+      }
+    } else {
+      baseRDD.map(_._2)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/timestamp/NanoTime.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/timestamp/NanoTime.scala
new file mode 100644
index 0000000000000..887161684429f
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/timestamp/NanoTime.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.parquet.timestamp
+
+import java.nio.{ByteBuffer, ByteOrder}
+
+import parquet.Preconditions
+import parquet.io.api.{Binary, RecordConsumer}
+
+private[parquet] class NanoTime extends Serializable {
+  private var julianDay = 0
+  private var timeOfDayNanos = 0L
+
+  def set(julianDay: Int, timeOfDayNanos: Long) = {
+    this.julianDay = julianDay
+    this.timeOfDayNanos = timeOfDayNanos
+    this
+  }
+
+  def getJulianDay: Int = julianDay
+
+  def getTimeOfDayNanos: Long = timeOfDayNanos
+
+  def toBinary: Binary = {
+    val buf = ByteBuffer.allocate(12)
+    buf.order(ByteOrder.LITTLE_ENDIAN)
+    buf.putLong(timeOfDayNanos)
+    buf.putInt(julianDay)
+    buf.flip()
+    Binary.fromByteBuffer(buf)
+  }
+
+  def writeValue(recordConsumer: RecordConsumer) {
+    recordConsumer.addBinary(toBinary)
+  }
+
+  override def toString =
+    "NanoTime{julianDay=" + julianDay + ", timeOfDayNanos=" + timeOfDayNanos + "}"
+}
+
+object NanoTime {
+  def fromBinary(bytes: Binary): NanoTime = {
+    Preconditions.checkArgument(bytes.length() == 12, "Must be 12 bytes")
+    val buf = bytes.toByteBuffer
+    buf.order(ByteOrder.LITTLE_ENDIAN)
+    val timeOfDayNanos = buf.getLong
+    val julianDay = buf.getInt
+    new NanoTime().set(julianDay, timeOfDayNanos)
+  }
+
+  def apply(julianDay: Int, timeOfDayNanos: Long): NanoTime = {
+    new NanoTime().set(julianDay, timeOfDayNanos)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
new file mode 100644
index 0000000000000..386ff2452f1a3
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{Row, Strategy}
+import org.apache.spark.sql.catalyst.expressions
+import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeReference, AttributeSet, Expression, NamedExpression}
+import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, InsertIntoTable => LogicalInsertIntoTable}
+import org.apache.spark.sql.execution
+
+/**
+ * A Strategy for planning scans over data sources defined using the sources API.
+ */
+private[sql] object DataSourceStrategy extends Strategy {
+  def apply(plan: LogicalPlan): Seq[execution.SparkPlan] = plan match {
+    case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: CatalystScan)) =>
+      pruneFilterProjectRaw(
+        l,
+        projectList,
+        filters,
+        (a, f) => t.buildScan(a, f)) :: Nil
+
+    case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: PrunedFilteredScan)) =>
+      pruneFilterProject(
+        l,
+        projectList,
+        filters,
+        (a, f) => t.buildScan(a, f)) :: Nil
+
+    case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: PrunedScan)) =>
+      pruneFilterProject(
+        l,
+        projectList,
+        filters,
+        (a, _) => t.buildScan(a)) :: Nil
+
+    case l @ LogicalRelation(t: TableScan) =>
+      execution.PhysicalRDD(l.output, t.buildScan()) :: Nil
+
+    case i @ LogicalInsertIntoTable(
+      l @ LogicalRelation(t: InsertableRelation), partition, query, overwrite) =>
+      if (partition.nonEmpty) {
+        sys.error(s"Insert into a partition is not allowed because $l is not partitioned.")
+      }
+      execution.ExecutedCommand(InsertIntoRelation(t, query, overwrite)) :: Nil
+
+    case _ => Nil
+  }
+
+  // Based on Public API.
+  protected def pruneFilterProject(
+      relation: LogicalRelation,
+      projectList: Seq[NamedExpression],
+      filterPredicates: Seq[Expression],
+      scanBuilder: (Array[String], Array[Filter]) => RDD[Row]) = {
+    pruneFilterProjectRaw(
+      relation,
+      projectList,
+      filterPredicates,
+      (requestedColumns, pushedFilters) => {
+        scanBuilder(requestedColumns.map(_.name).toArray, selectFilters(pushedFilters).toArray)
+      })
+  }
+
+  // Based on Catalyst expressions.
+  protected def pruneFilterProjectRaw(
+      relation: LogicalRelation,
+      projectList: Seq[NamedExpression],
+      filterPredicates: Seq[Expression],
+      scanBuilder: (Seq[Attribute], Seq[Expression]) => RDD[Row]) = {
+
+    val projectSet = AttributeSet(projectList.flatMap(_.references))
+    val filterSet = AttributeSet(filterPredicates.flatMap(_.references))
+    val filterCondition = filterPredicates.reduceLeftOption(And)
+
+    val pushedFilters = filterPredicates.map { _ transform {
+      case a: AttributeReference => relation.attributeMap(a) // Match original case of attributes.
+    }}
+
+    if (projectList.map(_.toAttribute) == projectList &&
+        projectSet.size == projectList.size &&
+        filterSet.subsetOf(projectSet)) {
+      // When it is possible to just use column pruning to get the right projection and
+      // when the columns of this projection are enough to evaluate all filter conditions,
+      // just do a scan followed by a filter, with no extra project.
+      val requestedColumns =
+        projectList.asInstanceOf[Seq[Attribute]] // Safe due to if above.
+          .map(relation.attributeMap)            // Match original case of attributes.
+
+      val scan =
+        execution.PhysicalRDD(
+          projectList.map(_.toAttribute),
+          scanBuilder(requestedColumns, pushedFilters))
+      filterCondition.map(execution.Filter(_, scan)).getOrElse(scan)
+    } else {
+      val requestedColumns = (projectSet ++ filterSet).map(relation.attributeMap).toSeq
+
+      val scan =
+        execution.PhysicalRDD(requestedColumns, scanBuilder(requestedColumns, pushedFilters))
+      execution.Project(projectList, filterCondition.map(execution.Filter(_, scan)).getOrElse(scan))
+    }
+  }
+
+  /** Turn Catalyst [[Expression]]s into data source [[Filter]]s. */
+  protected[sql] def selectFilters(filters: Seq[Expression]): Seq[Filter] = filters.collect {
+    case expressions.EqualTo(a: Attribute, expressions.Literal(v, _)) => EqualTo(a.name, v)
+    case expressions.EqualTo(expressions.Literal(v, _), a: Attribute) => EqualTo(a.name, v)
+
+    case expressions.GreaterThan(a: Attribute, expressions.Literal(v, _)) => GreaterThan(a.name, v)
+    case expressions.GreaterThan(expressions.Literal(v, _), a: Attribute) => LessThan(a.name, v)
+
+    case expressions.LessThan(a: Attribute, expressions.Literal(v, _)) => LessThan(a.name, v)
+    case expressions.LessThan(expressions.Literal(v, _), a: Attribute) => GreaterThan(a.name, v)
+
+    case expressions.GreaterThanOrEqual(a: Attribute, expressions.Literal(v, _)) =>
+      GreaterThanOrEqual(a.name, v)
+    case expressions.GreaterThanOrEqual(expressions.Literal(v, _), a: Attribute) =>
+      LessThanOrEqual(a.name, v)
+
+    case expressions.LessThanOrEqual(a: Attribute, expressions.Literal(v, _)) =>
+      LessThanOrEqual(a.name, v)
+    case expressions.LessThanOrEqual(expressions.Literal(v, _), a: Attribute) =>
+      GreaterThanOrEqual(a.name, v)
+
+    case expressions.InSet(a: Attribute, set) => In(a.name, set.toArray)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/LogicalRelation.scala
new file mode 100644
index 0000000000000..12b59ba20bb10
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/LogicalRelation.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.sources
+
+import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeMap}
+import org.apache.spark.sql.catalyst.plans.logical.{Statistics, LeafNode, LogicalPlan}
+
+/**
+ * Used to link a [[BaseRelation]] in to a logical query plan.
+ */
+private[sql] case class LogicalRelation(relation: BaseRelation)
+  extends LeafNode
+  with MultiInstanceRelation {
+
+  override val output: Seq[AttributeReference] = relation.schema.toAttributes
+
+  // Logical Relations are distinct if they have different output for the sake of transformations.
+  override def equals(other: Any) = other match {
+    case l @ LogicalRelation(otherRelation) => relation == otherRelation && output == l.output
+    case  _ => false
+  }
+
+  override def sameResult(otherPlan: LogicalPlan) = otherPlan match {
+    case LogicalRelation(otherRelation) => relation == otherRelation
+    case _ => false
+  }
+
+  @transient override lazy val statistics = Statistics(
+    sizeInBytes = BigInt(relation.sizeInBytes)
+  )
+
+  /** Used to lookup original attribute capitalization */
+  val attributeMap = AttributeMap(output.map(o => (o, o)))
+
+  def newInstance() = LogicalRelation(relation).asInstanceOf[this.type]
+
+  override def simpleString = s"Relation[${output.mkString(",")}] $relation"
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
new file mode 100644
index 0000000000000..d7942dc30934b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.sources
+
+import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.RunnableCommand
+
+private[sql] case class InsertIntoRelation(
+    relation: InsertableRelation,
+    query: LogicalPlan,
+    overwrite: Boolean)
+  extends RunnableCommand {
+
+  override def run(sqlContext: SQLContext) = {
+    relation.insert(DataFrame(sqlContext, query), overwrite)
+
+    Seq.empty[Row]
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
new file mode 100644
index 0000000000000..ead827728cf4b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
@@ -0,0 +1,353 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import scala.language.implicitConversions
+
+import org.apache.spark.Logging
+import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.AbstractSparkSQLParser
+import org.apache.spark.sql.execution.RunnableCommand
+import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
+
+/**
+ * A parser for foreign DDL commands.
+ */
+private[sql] class DDLParser extends AbstractSparkSQLParser with Logging {
+
+  def apply(input: String, exceptionOnError: Boolean): Option[LogicalPlan] = {
+    try {
+      Some(apply(input))
+    } catch {
+      case ddlException: DDLException => throw ddlException
+      case _ if !exceptionOnError => None
+      case x: Throwable => throw x
+    }
+  }
+
+  def parseType(input: String): DataType = {
+    lexical.initialize(reservedWords)
+    phrase(dataType)(new lexical.Scanner(input)) match {
+      case Success(r, x) => r
+      case x => throw new DDLException(s"Unsupported dataType: $x")
+    }
+  }
+
+
+  // Keyword is a convention with AbstractSparkSQLParser, which will scan all of the `Keyword`
+  // properties via reflection the class in runtime for constructing the SqlLexical object
+  protected val CREATE = Keyword("CREATE")
+  protected val TEMPORARY = Keyword("TEMPORARY")
+  protected val TABLE = Keyword("TABLE")
+  protected val IF = Keyword("IF")
+  protected val NOT = Keyword("NOT")
+  protected val EXISTS = Keyword("EXISTS")
+  protected val USING = Keyword("USING")
+  protected val OPTIONS = Keyword("OPTIONS")
+  protected val AS = Keyword("AS")
+  protected val COMMENT = Keyword("COMMENT")
+
+  // Data types.
+  protected val STRING = Keyword("STRING")
+  protected val BINARY = Keyword("BINARY")
+  protected val BOOLEAN = Keyword("BOOLEAN")
+  protected val TINYINT = Keyword("TINYINT")
+  protected val SMALLINT = Keyword("SMALLINT")
+  protected val INT = Keyword("INT")
+  protected val BIGINT = Keyword("BIGINT")
+  protected val FLOAT = Keyword("FLOAT")
+  protected val DOUBLE = Keyword("DOUBLE")
+  protected val DECIMAL = Keyword("DECIMAL")
+  protected val DATE = Keyword("DATE")
+  protected val TIMESTAMP = Keyword("TIMESTAMP")
+  protected val VARCHAR = Keyword("VARCHAR")
+  protected val ARRAY = Keyword("ARRAY")
+  protected val MAP = Keyword("MAP")
+  protected val STRUCT = Keyword("STRUCT")
+
+  protected lazy val ddl: Parser[LogicalPlan] = createTable
+
+  protected def start: Parser[LogicalPlan] = ddl
+
+  /**
+   * `CREATE [TEMPORARY] TABLE avroTable [IF NOT EXISTS]
+   * USING org.apache.spark.sql.avro
+   * OPTIONS (path "../hive/src/test/resources/data/files/episodes.avro")`
+   * or
+   * `CREATE [TEMPORARY] TABLE avroTable(intField int, stringField string...) [IF NOT EXISTS]
+   * USING org.apache.spark.sql.avro
+   * OPTIONS (path "../hive/src/test/resources/data/files/episodes.avro")`
+   * or
+   * `CREATE [TEMPORARY] TABLE avroTable [IF NOT EXISTS]
+   * USING org.apache.spark.sql.avro
+   * OPTIONS (path "../hive/src/test/resources/data/files/episodes.avro")`
+   * AS SELECT ...
+   */
+  protected lazy val createTable: Parser[LogicalPlan] =
+  (
+    (CREATE ~> TEMPORARY.? <~ TABLE) ~ (IF ~> NOT <~ EXISTS).? ~ ident
+      ~ (tableCols).? ~ (USING ~> className) ~ (OPTIONS ~> options) ~ (AS ~> restInput).? ^^ {
+      case temp ~ allowExisting ~ tableName ~ columns ~ provider ~ opts ~ query =>
+        if (temp.isDefined && allowExisting.isDefined) {
+          throw new DDLException(
+            "a CREATE TEMPORARY TABLE statement does not allow IF NOT EXISTS clause.")
+        }
+
+        if (query.isDefined) {
+          if (columns.isDefined) {
+            throw new DDLException(
+              "a CREATE TABLE AS SELECT statement does not allow column definitions.")
+          }
+          CreateTableUsingAsSelect(tableName,
+            provider,
+            temp.isDefined,
+            opts,
+            allowExisting.isDefined,
+            query.get)
+        } else {
+          val userSpecifiedSchema = columns.flatMap(fields => Some(StructType(fields)))
+          CreateTableUsing(
+            tableName,
+            userSpecifiedSchema,
+            provider,
+            temp.isDefined,
+            opts,
+            allowExisting.isDefined)
+        }
+      }
+  )
+
+  protected lazy val tableCols: Parser[Seq[StructField]] =  "(" ~> repsep(column, ",") <~ ")"
+
+  protected lazy val options: Parser[Map[String, String]] =
+    "(" ~> repsep(pair, ",") <~ ")" ^^ { case s: Seq[(String, String)] => s.toMap }
+
+  protected lazy val className: Parser[String] = repsep(ident, ".") ^^ { case s => s.mkString(".")}
+
+  protected lazy val pair: Parser[(String, String)] = ident ~ stringLit ^^ { case k ~ v => (k,v) }
+
+  protected lazy val column: Parser[StructField] =
+    ident ~ dataType ~ (COMMENT ~> stringLit).?  ^^ { case columnName ~ typ ~ cm =>
+      val meta = cm match {
+        case Some(comment) =>
+          new MetadataBuilder().putString(COMMENT.str.toLowerCase(), comment).build()
+        case None => Metadata.empty
+      }
+      StructField(columnName, typ, true, meta)
+    }
+
+  protected lazy val primitiveType: Parser[DataType] =
+    STRING ^^^ StringType |
+    BINARY ^^^ BinaryType |
+    BOOLEAN ^^^ BooleanType |
+    TINYINT ^^^ ByteType |
+    SMALLINT ^^^ ShortType |
+    INT ^^^ IntegerType |
+    BIGINT ^^^ LongType |
+    FLOAT ^^^ FloatType |
+    DOUBLE ^^^ DoubleType |
+    fixedDecimalType |                   // decimal with precision/scale
+    DECIMAL ^^^ DecimalType.Unlimited |  // decimal with no precision/scale
+    DATE ^^^ DateType |
+    TIMESTAMP ^^^ TimestampType |
+    VARCHAR ~ "(" ~ numericLit ~ ")" ^^^ StringType
+
+  protected lazy val fixedDecimalType: Parser[DataType] =
+    (DECIMAL ~ "(" ~> numericLit) ~ ("," ~> numericLit <~ ")") ^^ {
+      case precision ~ scale => DecimalType(precision.toInt, scale.toInt)
+    }
+
+  protected lazy val arrayType: Parser[DataType] =
+    ARRAY ~> "<" ~> dataType <~ ">" ^^ {
+      case tpe => ArrayType(tpe)
+    }
+
+  protected lazy val mapType: Parser[DataType] =
+    MAP ~> "<" ~> dataType ~ "," ~ dataType <~ ">" ^^ {
+      case t1 ~ _ ~ t2 => MapType(t1, t2)
+    }
+
+  protected lazy val structField: Parser[StructField] =
+    ident ~ ":" ~ dataType ^^ {
+      case fieldName ~ _ ~ tpe => StructField(fieldName, tpe, nullable = true)
+    }
+
+  protected lazy val structType: Parser[DataType] =
+    (STRUCT ~> "<" ~> repsep(structField, ",") <~ ">" ^^ {
+    case fields => StructType(fields)
+    }) |
+    (STRUCT ~> "<>" ^^ {
+      case fields => StructType(Nil)
+    })
+
+  private[sql] lazy val dataType: Parser[DataType] =
+    arrayType |
+    mapType |
+    structType |
+    primitiveType
+}
+
+object ResolvedDataSource {
+  def apply(
+      sqlContext: SQLContext,
+      userSpecifiedSchema: Option[StructType],
+      provider: String,
+      options: Map[String, String]): ResolvedDataSource = {
+    val loader = Utils.getContextOrSparkClassLoader
+    val clazz: Class[_] = try loader.loadClass(provider) catch {
+      case cnf: java.lang.ClassNotFoundException =>
+        try loader.loadClass(provider + ".DefaultSource") catch {
+          case cnf: java.lang.ClassNotFoundException =>
+            sys.error(s"Failed to load class for data source: $provider")
+        }
+    }
+
+    val relation = userSpecifiedSchema match {
+      case Some(schema: StructType) => {
+        clazz.newInstance match {
+          case dataSource: org.apache.spark.sql.sources.SchemaRelationProvider =>
+            dataSource
+              .asInstanceOf[org.apache.spark.sql.sources.SchemaRelationProvider]
+              .createRelation(sqlContext, new CaseInsensitiveMap(options), schema)
+          case dataSource: org.apache.spark.sql.sources.RelationProvider =>
+            sys.error(s"${clazz.getCanonicalName} does not allow user-specified schemas.")
+        }
+      }
+      case None => {
+        clazz.newInstance match {
+          case dataSource: org.apache.spark.sql.sources.RelationProvider =>
+            dataSource
+              .asInstanceOf[org.apache.spark.sql.sources.RelationProvider]
+              .createRelation(sqlContext, new CaseInsensitiveMap(options))
+          case dataSource: org.apache.spark.sql.sources.SchemaRelationProvider =>
+            sys.error(s"A schema needs to be specified when using ${clazz.getCanonicalName}.")
+        }
+      }
+    }
+
+    new ResolvedDataSource(clazz, relation)
+  }
+
+  def apply(
+      sqlContext: SQLContext,
+      provider: String,
+      options: Map[String, String],
+      data: DataFrame): ResolvedDataSource = {
+    val loader = Utils.getContextOrSparkClassLoader
+    val clazz: Class[_] = try loader.loadClass(provider) catch {
+      case cnf: java.lang.ClassNotFoundException =>
+        try loader.loadClass(provider + ".DefaultSource") catch {
+          case cnf: java.lang.ClassNotFoundException =>
+            sys.error(s"Failed to load class for data source: $provider")
+        }
+    }
+
+    val relation = clazz.newInstance match {
+      case dataSource: org.apache.spark.sql.sources.CreateableRelationProvider =>
+        dataSource
+          .asInstanceOf[org.apache.spark.sql.sources.CreateableRelationProvider]
+          .createRelation(sqlContext, options, data)
+      case _ =>
+        sys.error(s"${clazz.getCanonicalName} does not allow create table as select.")
+    }
+
+    new ResolvedDataSource(clazz, relation)
+  }
+}
+
+private[sql] case class ResolvedDataSource(provider: Class[_], relation: BaseRelation)
+
+private[sql] case class CreateTableUsing(
+    tableName: String,
+    userSpecifiedSchema: Option[StructType],
+    provider: String,
+    temporary: Boolean,
+    options: Map[String, String],
+    allowExisting: Boolean) extends Command
+
+private[sql] case class CreateTableUsingAsSelect(
+    tableName: String,
+    provider: String,
+    temporary: Boolean,
+    options: Map[String, String],
+    allowExisting: Boolean,
+    query: String) extends Command
+
+private[sql] case class CreateTableUsingAsLogicalPlan(
+    tableName: String,
+    provider: String,
+    temporary: Boolean,
+    options: Map[String, String],
+    allowExisting: Boolean,
+    query: LogicalPlan) extends Command
+
+private [sql] case class CreateTempTableUsing(
+    tableName: String,
+    userSpecifiedSchema: Option[StructType],
+    provider: String,
+    options: Map[String, String]) extends RunnableCommand {
+
+  def run(sqlContext: SQLContext) = {
+    val resolved = ResolvedDataSource(sqlContext, userSpecifiedSchema, provider, options)
+    sqlContext.registerRDDAsTable(
+      DataFrame(sqlContext, LogicalRelation(resolved.relation)), tableName)
+    Seq.empty
+  }
+}
+
+private [sql] case class CreateTempTableUsingAsSelect(
+    tableName: String,
+    provider: String,
+    options: Map[String, String],
+    query: LogicalPlan) extends RunnableCommand {
+
+  def run(sqlContext: SQLContext) = {
+    val df = DataFrame(sqlContext, query)
+    val resolved = ResolvedDataSource(sqlContext, provider, options, df)
+    sqlContext.registerRDDAsTable(
+      DataFrame(sqlContext, LogicalRelation(resolved.relation)), tableName)
+
+    Seq.empty
+  }
+}
+
+/**
+ * Builds a map in which keys are case insensitive
+ */
+protected class CaseInsensitiveMap(map: Map[String, String]) extends Map[String, String] 
+  with Serializable {
+
+  val baseMap = map.map(kv => kv.copy(_1 = kv._1.toLowerCase))
+
+  override def get(k: String): Option[String] = baseMap.get(k.toLowerCase)
+
+  override def + [B1 >: String](kv: (String, B1)): Map[String, B1] =
+    baseMap + kv.copy(_1 = kv._1.toLowerCase)
+
+  override def iterator: Iterator[(String, String)] = baseMap.iterator
+
+  override def -(key: String): Map[String, String] = baseMap - key.toLowerCase()
+}
+
+/**
+ * The exception thrown from the DDL parser.
+ * @param message
+ */
+protected[sql] class DDLException(message: String) extends Exception(message)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
new file mode 100644
index 0000000000000..4a9fefc12b9ad
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+abstract class Filter
+
+case class EqualTo(attribute: String, value: Any) extends Filter
+case class GreaterThan(attribute: String, value: Any) extends Filter
+case class GreaterThanOrEqual(attribute: String, value: Any) extends Filter
+case class LessThan(attribute: String, value: Any) extends Filter
+case class LessThanOrEqual(attribute: String, value: Any) extends Filter
+case class In(attribute: String, values: Array[Any]) extends Filter
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
new file mode 100644
index 0000000000000..ad0a35b91ebc2
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.sources
+
+import org.apache.spark.annotation.{Experimental, DeveloperApi}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.sql.catalyst.expressions.{Expression, Attribute}
+import org.apache.spark.sql.types.StructType
+
+/**
+ * ::DeveloperApi::
+ * Implemented by objects that produce relations for a specific kind of data source.  When
+ * Spark SQL is given a DDL operation with a USING clause specified (to specify the implemented
+ * RelationProvider), this interface is used to pass in the parameters specified by a user.
+ *
+ * Users may specify the fully qualified class name of a given data source.  When that class is
+ * not found Spark SQL will append the class name `DefaultSource` to the path, allowing for
+ * less verbose invocation.  For example, 'org.apache.spark.sql.json' would resolve to the
+ * data source 'org.apache.spark.sql.json.DefaultSource'
+ *
+ * A new instance of this class with be instantiated each time a DDL call is made.
+ */
+@DeveloperApi
+trait RelationProvider {
+  /**
+   * Returns a new base relation with the given parameters.
+   * Note: the parameters' keywords are case insensitive and this insensitivity is enforced
+   * by the Map that is passed to the function.
+   */
+  def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation
+}
+
+/**
+ * ::DeveloperApi::
+ * Implemented by objects that produce relations for a specific kind of data source
+ * with a given schema.  When Spark SQL is given a DDL operation with a USING clause specified (
+ * to specify the implemented SchemaRelationProvider) and a user defined schema, this interface
+ * is used to pass in the parameters specified by a user.
+ *
+ * Users may specify the fully qualified class name of a given data source.  When that class is
+ * not found Spark SQL will append the class name `DefaultSource` to the path, allowing for
+ * less verbose invocation.  For example, 'org.apache.spark.sql.json' would resolve to the
+ * data source 'org.apache.spark.sql.json.DefaultSource'
+ *
+ * A new instance of this class with be instantiated each time a DDL call is made.
+ *
+ * The difference between a [[RelationProvider]] and a [[SchemaRelationProvider]] is that
+ * users need to provide a schema when using a SchemaRelationProvider.
+ * A relation provider can inherits both [[RelationProvider]] and [[SchemaRelationProvider]]
+ * if it can support both schema inference and user-specified schemas.
+ */
+@DeveloperApi
+trait SchemaRelationProvider {
+  /**
+   * Returns a new base relation with the given parameters and user defined schema.
+   * Note: the parameters' keywords are case insensitive and this insensitivity is enforced
+   * by the Map that is passed to the function.
+   */
+  def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String],
+      schema: StructType): BaseRelation
+}
+
+@DeveloperApi
+trait CreateableRelationProvider {
+  def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String],
+      data: DataFrame): BaseRelation
+}
+
+/**
+ * ::DeveloperApi::
+ * Represents a collection of tuples with a known schema.  Classes that extend BaseRelation must
+ * be able to produce the schema of their data in the form of a [[StructType]]  Concrete
+ * implementation should inherit from one of the descendant `Scan` classes, which define various
+ * abstract methods for execution.
+ *
+ * BaseRelations must also define a equality function that only returns true when the two
+ * instances will return the same data.  This equality function is used when determining when
+ * it is safe to substitute cached results for a given relation.
+ */
+@DeveloperApi
+abstract class BaseRelation {
+  def sqlContext: SQLContext
+  def schema: StructType
+
+  /**
+   * Returns an estimated size of this relation in bytes.  This information is used by the planner
+   * to decided when it is safe to broadcast a relation and can be overridden by sources that
+   * know the size ahead of time. By default, the system will assume that tables are too
+   * large to broadcast.  This method will be called multiple times during query planning
+   * and thus should not perform expensive operations for each invocation.
+   */
+  def sizeInBytes = sqlContext.conf.defaultSizeInBytes
+}
+
+/**
+ * ::DeveloperApi::
+ * A BaseRelation that can produce all of its tuples as an RDD of Row objects.
+ */
+@DeveloperApi
+trait TableScan extends BaseRelation {
+  def buildScan(): RDD[Row]
+}
+
+/**
+ * ::DeveloperApi::
+ * A BaseRelation that can eliminate unneeded columns before producing an RDD
+ * containing all of its tuples as Row objects.
+ */
+@DeveloperApi
+trait PrunedScan extends BaseRelation {
+  def buildScan(requiredColumns: Array[String]): RDD[Row]
+}
+
+/**
+ * ::DeveloperApi::
+ * A BaseRelation that can eliminate unneeded columns and filter using selected
+ * predicates before producing an RDD containing all matching tuples as Row objects.
+ *
+ * The pushed down filters are currently purely an optimization as they will all be evaluated
+ * again.  This means it is safe to use them with methods that produce false positives such
+ * as filtering partitions based on a bloom filter.
+ */
+@DeveloperApi
+trait PrunedFilteredScan extends BaseRelation {
+  def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row]
+}
+
+/**
+ * ::Experimental::
+ * An interface for experimenting with a more direct connection to the query planner.  Compared to
+ * [[PrunedFilteredScan]], this operator receives the raw expressions from the
+ * [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]].  Unlike the other APIs this
+ * interface is not designed to be binary compatible across releases and thus should only be used
+ * for experimentation.
+ */
+@Experimental
+trait CatalystScan extends BaseRelation {
+  def buildScan(requiredColumns: Seq[Attribute], filters: Seq[Expression]): RDD[Row]
+}
+
+@DeveloperApi
+trait InsertableRelation extends BaseRelation {
+  def insert(data: DataFrame, overwrite: Boolean): Unit
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/package.scala
new file mode 100644
index 0000000000000..8393c510f4f6d
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/package.scala
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+/**
+ * A set of APIs for adding data sources to Spark SQL.
+ */
+package object sources
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala b/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala
new file mode 100644
index 0000000000000..e6f622e87f7a4
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.test
+
+import java.util
+
+import scala.collection.JavaConverters._
+import org.apache.spark.sql.types._
+
+/**
+ * An example class to demonstrate UDT in Scala, Java, and Python.
+ * @param x x coordinate
+ * @param y y coordinate
+ */
+@SQLUserDefinedType(udt = classOf[ExamplePointUDT])
+private[sql] class ExamplePoint(val x: Double, val y: Double)
+
+/**
+ * User-defined type for [[ExamplePoint]].
+ */
+private[sql] class ExamplePointUDT extends UserDefinedType[ExamplePoint] {
+
+  override def sqlType: DataType = ArrayType(DoubleType, false)
+
+  override def pyUDT: String = "pyspark.sql_tests.ExamplePointUDT"
+
+  override def serialize(obj: Any): Seq[Double] = {
+    obj match {
+      case p: ExamplePoint =>
+        Seq(p.x, p.y)
+    }
+  }
+
+  override def deserialize(datum: Any): ExamplePoint = {
+    datum match {
+      case values: Seq[_] =>
+        val xy = values.asInstanceOf[Seq[Double]]
+        assert(xy.length == 2)
+        new ExamplePoint(xy(0), xy(1))
+      case values: util.ArrayList[_] =>
+        val xy = values.asInstanceOf[util.ArrayList[Double]].asScala
+        new ExamplePoint(xy(0), xy(1))
+    }
+  }
+
+  override def userClass: Class[ExamplePoint] = classOf[ExamplePoint]
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala
index 6bb81c76ed8bd..4e1ec38bd0158 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -17,8 +17,11 @@
 
 package org.apache.spark.sql.test
 
+import scala.language.implicitConversions
+
 import org.apache.spark.{SparkConf, SparkContext}
-import org.apache.spark.sql.{SQLConf, SQLContext}
+import org.apache.spark.sql.{DataFrame, SQLConf, SQLContext}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 
 /** A SQLContext that can be used for local testing. */
 object TestSQLContext
@@ -29,6 +32,16 @@ object TestSQLContext
       new SparkConf().set("spark.sql.testkey", "true"))) {
 
   /** Fewer partitions to speed up testing. */
-  override private[spark] def numShufflePartitions: Int =
-    getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt
+  protected[sql] override lazy val conf: SQLConf = new SQLConf {
+    override def numShufflePartitions: Int = this.getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt
+  }
+
+  /**
+   * Turn a logical plan into a [[DataFrame]]. This should be removed once we have an easier way to
+   * construct [[DataFrame]] directly out of local data without relying on implicits.
+   */
+  protected[sql] implicit def logicalPlanToSparkQuery(plan: LogicalPlan): DataFrame = {
+    DataFrame(this, plan)
+  }
+
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala b/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala
deleted file mode 100644
index e44cb08309523..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.types.util
-
-import org.apache.spark.sql._
-import org.apache.spark.sql.api.java.{DataType => JDataType, StructField => JStructField}
-
-import scala.collection.JavaConverters._
-
-protected[sql] object DataTypeConversions {
-
-  /**
-   * Returns the equivalent StructField in Scala for the given StructField in Java.
-   */
-  def asJavaStructField(scalaStructField: StructField): JStructField = {
-    JDataType.createStructField(
-      scalaStructField.name,
-      asJavaDataType(scalaStructField.dataType),
-      scalaStructField.nullable)
-  }
-
-  /**
-   * Returns the equivalent DataType in Java for the given DataType in Scala.
-   */
-  def asJavaDataType(scalaDataType: DataType): JDataType = scalaDataType match {
-    case StringType => JDataType.StringType
-    case BinaryType => JDataType.BinaryType
-    case BooleanType => JDataType.BooleanType
-    case DateType => JDataType.DateType
-    case TimestampType => JDataType.TimestampType
-    case DecimalType => JDataType.DecimalType
-    case DoubleType => JDataType.DoubleType
-    case FloatType => JDataType.FloatType
-    case ByteType => JDataType.ByteType
-    case IntegerType => JDataType.IntegerType
-    case LongType => JDataType.LongType
-    case ShortType => JDataType.ShortType
-
-    case arrayType: ArrayType => JDataType.createArrayType(
-        asJavaDataType(arrayType.elementType), arrayType.containsNull)
-    case mapType: MapType => JDataType.createMapType(
-        asJavaDataType(mapType.keyType),
-        asJavaDataType(mapType.valueType),
-        mapType.valueContainsNull)
-    case structType: StructType => JDataType.createStructType(
-        structType.fields.map(asJavaStructField).asJava)
-  }
-
-  /**
-   * Returns the equivalent StructField in Scala for the given StructField in Java.
-   */
-  def asScalaStructField(javaStructField: JStructField): StructField = {
-    StructField(
-      javaStructField.getName,
-      asScalaDataType(javaStructField.getDataType),
-      javaStructField.isNullable)
-  }
-
-  /**
-   * Returns the equivalent DataType in Scala for the given DataType in Java.
-   */
-  def asScalaDataType(javaDataType: JDataType): DataType = javaDataType match {
-    case stringType: org.apache.spark.sql.api.java.StringType =>
-      StringType
-    case binaryType: org.apache.spark.sql.api.java.BinaryType =>
-      BinaryType
-    case booleanType: org.apache.spark.sql.api.java.BooleanType =>
-      BooleanType
-    case dateType: org.apache.spark.sql.api.java.DateType =>
-      DateType
-    case timestampType: org.apache.spark.sql.api.java.TimestampType =>
-      TimestampType
-    case decimalType: org.apache.spark.sql.api.java.DecimalType =>
-      DecimalType
-    case doubleType: org.apache.spark.sql.api.java.DoubleType =>
-      DoubleType
-    case floatType: org.apache.spark.sql.api.java.FloatType =>
-      FloatType
-    case byteType: org.apache.spark.sql.api.java.ByteType =>
-      ByteType
-    case integerType: org.apache.spark.sql.api.java.IntegerType =>
-      IntegerType
-    case longType: org.apache.spark.sql.api.java.LongType =>
-      LongType
-    case shortType: org.apache.spark.sql.api.java.ShortType =>
-      ShortType
-
-    case arrayType: org.apache.spark.sql.api.java.ArrayType =>
-      ArrayType(asScalaDataType(arrayType.getElementType), arrayType.isContainsNull)
-    case mapType: org.apache.spark.sql.api.java.MapType =>
-      MapType(
-        asScalaDataType(mapType.getKeyType),
-        asScalaDataType(mapType.getValueType),
-        mapType.isValueContainsNull)
-    case structType: org.apache.spark.sql.api.java.StructType =>
-      StructType(structType.getFields.map(asScalaStructField))
-  }
-}
diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaAPISuite.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaAPISuite.java
index a9a11285def54..e5588938ea162 100644
--- a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaAPISuite.java
+++ b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaAPISuite.java
@@ -19,27 +19,26 @@
 
 import java.io.Serializable;
 
-import org.apache.spark.sql.api.java.UDF1;
 import org.junit.After;
-import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
-import org.junit.runners.Suite;
-import org.junit.runner.RunWith;
 
 import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.DataTypes;
 
 // The test suite itself is Serializable so that anonymous Function implementations can be
 // serialized, as an alternative to converting these anonymous classes to static inner classes;
 // see http://stackoverflow.com/questions/758570/.
 public class JavaAPISuite implements Serializable {
   private transient JavaSparkContext sc;
-  private transient JavaSQLContext sqlContext;
+  private transient SQLContext sqlContext;
 
   @Before
   public void setUp() {
     sc = new JavaSparkContext("local", "JavaAPISuite");
-    sqlContext = new JavaSQLContext(sc);
+    sqlContext = new SQLContext(sc);
   }
 
   @After
@@ -55,15 +54,14 @@ public void udf1Test() {
     // sqlContext.registerFunction(
     //   "stringLengthTest", (String str) -> str.length(), DataType.IntegerType);
 
-    sqlContext.registerFunction("stringLengthTest", new UDF1<String, Integer>() {
+    sqlContext.udf().register("stringLengthTest", new UDF1<String, Integer>() {
       @Override
       public Integer call(String str) throws Exception {
         return str.length();
       }
-    }, DataType.IntegerType);
+    }, DataTypes.IntegerType);
 
-    // TODO: Why do we need this cast?
-    Row result = (Row) sqlContext.sql("SELECT stringLengthTest('test')").first();
+    Row result = sqlContext.sql("SELECT stringLengthTest('test')").head();
     assert(result.getInt(0) == 4);
   }
 
@@ -76,15 +74,14 @@ public void udf2Test() {
     //   (String str1, String str2) -> str1.length() + str2.length,
     //   DataType.IntegerType);
 
-    sqlContext.registerFunction("stringLengthTest", new UDF2<String, String, Integer>() {
+    sqlContext.udf().register("stringLengthTest", new UDF2<String, String, Integer>() {
       @Override
       public Integer call(String str1, String str2) throws Exception {
         return str1.length() + str2.length();
       }
-    }, DataType.IntegerType);
+    }, DataTypes.IntegerType);
 
-    // TODO: Why do we need this cast?
-    Row result = (Row) sqlContext.sql("SELECT stringLengthTest('test', 'test2')").first();
+    Row result = sqlContext.sql("SELECT stringLengthTest('test', 'test2')").head();
     assert(result.getInt(0) == 9);
   }
 }
diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java
index 33e5020bc636a..8510bac499092 100644
--- a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java
+++ b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.api.java;
 
 import java.io.Serializable;
-import java.math.BigDecimal;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -31,18 +30,20 @@
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.Function;
+import org.apache.spark.sql.*;
+import org.apache.spark.sql.types.*;
 
 // The test suite itself is Serializable so that anonymous Function implementations can be
 // serialized, as an alternative to converting these anonymous classes to static inner classes;
 // see http://stackoverflow.com/questions/758570/.
 public class JavaApplySchemaSuite implements Serializable {
   private transient JavaSparkContext javaCtx;
-  private transient JavaSQLContext javaSqlCtx;
+  private transient SQLContext javaSqlCtx;
 
   @Before
   public void setUp() {
     javaCtx = new JavaSparkContext("local", "JavaApplySchemaSuite");
-    javaSqlCtx = new JavaSQLContext(javaCtx);
+    javaSqlCtx = new SQLContext(javaCtx);
   }
 
   @After
@@ -88,22 +89,64 @@ public void applySchema() {
     JavaRDD<Row> rowRDD = javaCtx.parallelize(personList).map(
       new Function<Person, Row>() {
         public Row call(Person person) throws Exception {
-          return Row.create(person.getName(), person.getAge());
+          return RowFactory.create(person.getName(), person.getAge());
         }
       });
 
     List<StructField> fields = new ArrayList<StructField>(2);
-    fields.add(DataType.createStructField("name", DataType.StringType, false));
-    fields.add(DataType.createStructField("age", DataType.IntegerType, false));
-    StructType schema = DataType.createStructType(fields);
+    fields.add(DataTypes.createStructField("name", DataTypes.StringType, false));
+    fields.add(DataTypes.createStructField("age", DataTypes.IntegerType, false));
+    StructType schema = DataTypes.createStructType(fields);
 
-    JavaSchemaRDD schemaRDD = javaSqlCtx.applySchema(rowRDD, schema);
-    schemaRDD.registerTempTable("people");
-    List<Row> actual = javaSqlCtx.sql("SELECT * FROM people").collect();
+    DataFrame df = javaSqlCtx.applySchema(rowRDD, schema);
+    df.registerTempTable("people");
+    Row[] actual = javaSqlCtx.sql("SELECT * FROM people").collect();
 
     List<Row> expected = new ArrayList<Row>(2);
-    expected.add(Row.create("Michael", 29));
-    expected.add(Row.create("Yin", 28));
+    expected.add(RowFactory.create("Michael", 29));
+    expected.add(RowFactory.create("Yin", 28));
+
+    Assert.assertEquals(expected, Arrays.asList(actual));
+  }
+
+
+
+  @Test
+  public void dataFrameRDDOperations() {
+    List<Person> personList = new ArrayList<Person>(2);
+    Person person1 = new Person();
+    person1.setName("Michael");
+    person1.setAge(29);
+    personList.add(person1);
+    Person person2 = new Person();
+    person2.setName("Yin");
+    person2.setAge(28);
+    personList.add(person2);
+
+    JavaRDD<Row> rowRDD = javaCtx.parallelize(personList).map(
+            new Function<Person, Row>() {
+              public Row call(Person person) throws Exception {
+                return RowFactory.create(person.getName(), person.getAge());
+              }
+            });
+
+    List<StructField> fields = new ArrayList<StructField>(2);
+    fields.add(DataTypes.createStructField("name", DataTypes.StringType, false));
+    fields.add(DataTypes.createStructField("age", DataTypes.IntegerType, false));
+    StructType schema = DataTypes.createStructType(fields);
+
+    DataFrame df = javaSqlCtx.applySchema(rowRDD, schema);
+    df.registerTempTable("people");
+    List<String> actual = javaSqlCtx.sql("SELECT * FROM people").toJavaRDD().map(new Function<Row, String>() {
+
+      public String call(Row row) {
+        return row.getString(0) + "_" + row.get(1).toString();
+      }
+    }).collect();
+
+    List<String> expected = new ArrayList<String>(2);
+    expected.add("Michael_29");
+    expected.add("Yin_28");
 
     Assert.assertEquals(expected, actual);
   }
@@ -118,18 +161,18 @@ public void applySchemaToJSON() {
         "\"bigInteger\":92233720368547758069, \"double\":1.7976931348623157E305, " +
         "\"boolean\":false, \"null\":null}"));
     List<StructField> fields = new ArrayList<StructField>(7);
-    fields.add(DataType.createStructField("bigInteger", DataType.DecimalType, true));
-    fields.add(DataType.createStructField("boolean", DataType.BooleanType, true));
-    fields.add(DataType.createStructField("double", DataType.DoubleType, true));
-    fields.add(DataType.createStructField("integer", DataType.IntegerType, true));
-    fields.add(DataType.createStructField("long", DataType.LongType, true));
-    fields.add(DataType.createStructField("null", DataType.StringType, true));
-    fields.add(DataType.createStructField("string", DataType.StringType, true));
-    StructType expectedSchema = DataType.createStructType(fields);
+    fields.add(DataTypes.createStructField("bigInteger", DataTypes.createDecimalType(), true));
+    fields.add(DataTypes.createStructField("boolean", DataTypes.BooleanType, true));
+    fields.add(DataTypes.createStructField("double", DataTypes.DoubleType, true));
+    fields.add(DataTypes.createStructField("integer", DataTypes.IntegerType, true));
+    fields.add(DataTypes.createStructField("long", DataTypes.LongType, true));
+    fields.add(DataTypes.createStructField("null", DataTypes.StringType, true));
+    fields.add(DataTypes.createStructField("string", DataTypes.StringType, true));
+    StructType expectedSchema = DataTypes.createStructType(fields);
     List<Row> expectedResult = new ArrayList<Row>(2);
     expectedResult.add(
-      Row.create(
-        new BigDecimal("92233720368547758070"),
+      RowFactory.create(
+        new java.math.BigDecimal("92233720368547758070"),
         true,
         1.7976931348623157E308,
         10,
@@ -137,8 +180,8 @@ public void applySchemaToJSON() {
         null,
         "this is a simple string."));
     expectedResult.add(
-      Row.create(
-        new BigDecimal("92233720368547758069"),
+      RowFactory.create(
+        new java.math.BigDecimal("92233720368547758069"),
         false,
         1.7976931348623157E305,
         11,
@@ -146,18 +189,18 @@ public void applySchemaToJSON() {
         null,
         "this is another simple string."));
 
-    JavaSchemaRDD schemaRDD1 = javaSqlCtx.jsonRDD(jsonRDD);
-    StructType actualSchema1 = schemaRDD1.schema();
+    DataFrame df1 = javaSqlCtx.jsonRDD(jsonRDD);
+    StructType actualSchema1 = df1.schema();
     Assert.assertEquals(expectedSchema, actualSchema1);
-    schemaRDD1.registerTempTable("jsonTable1");
-    List<Row> actual1 = javaSqlCtx.sql("select * from jsonTable1").collect();
+    df1.registerTempTable("jsonTable1");
+    List<Row> actual1 = javaSqlCtx.sql("select * from jsonTable1").collectAsList();
     Assert.assertEquals(expectedResult, actual1);
 
-    JavaSchemaRDD schemaRDD2 = javaSqlCtx.jsonRDD(jsonRDD, expectedSchema);
-    StructType actualSchema2 = schemaRDD2.schema();
+    DataFrame df2 = javaSqlCtx.jsonRDD(jsonRDD, expectedSchema);
+    StructType actualSchema2 = df2.schema();
     Assert.assertEquals(expectedSchema, actualSchema2);
-    schemaRDD1.registerTempTable("jsonTable2");
-    List<Row> actual2 = javaSqlCtx.sql("select * from jsonTable2").collect();
+    df2.registerTempTable("jsonTable2");
+    List<Row> actual2 = javaSqlCtx.sql("select * from jsonTable2").collectAsList();
     Assert.assertEquals(expectedResult, actual2);
   }
 }
diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaDsl.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaDsl.java
new file mode 100644
index 0000000000000..639436368c4a3
--- /dev/null
+++ b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaDsl.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import com.google.common.collect.ImmutableMap;
+
+import org.apache.spark.sql.Column;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.types.DataTypes;
+
+import static org.apache.spark.sql.Dsl.*;
+
+/**
+ * This test doesn't actually run anything. It is here to check the API compatibility for Java.
+ */
+public class JavaDsl {
+
+  public static void testDataFrame(final DataFrame df) {
+    DataFrame df1 = df.select("colA");
+    df1 = df.select("colA", "colB");
+
+    df1 = df.select(col("colA"), col("colB"), lit("literal value").$plus(1));
+
+    df1 = df.filter(col("colA"));
+
+    java.util.Map<String, String> aggExprs = ImmutableMap.<String, String>builder()
+      .put("colA", "sum")
+      .put("colB", "avg")
+      .build();
+
+    df1 = df.agg(aggExprs);
+
+    df1 = df.groupBy("groupCol").agg(aggExprs);
+
+    df1 = df.join(df1, col("key1").$eq$eq$eq(col("key2")), "outer");
+
+    df.orderBy("colA");
+    df.orderBy("colA", "colB", "colC");
+    df.orderBy(col("colA").desc());
+    df.orderBy(col("colA").desc(), col("colB").asc());
+
+    df.sort("colA");
+    df.sort("colA", "colB", "colC");
+    df.sort(col("colA").desc());
+    df.sort(col("colA").desc(), col("colB").asc());
+
+    df.as("b");
+
+    df.limit(5);
+
+    df.unionAll(df1);
+    df.intersect(df1);
+    df.except(df1);
+
+    df.sample(true, 0.1, 234);
+
+    df.head();
+    df.head(5);
+    df.first();
+    df.count();
+  }
+
+  public static void testColumn(final Column c) {
+    c.asc();
+    c.desc();
+
+    c.endsWith("abcd");
+    c.startsWith("afgasdf");
+
+    c.like("asdf%");
+    c.rlike("wef%asdf");
+
+    c.as("newcol");
+
+    c.cast("int");
+    c.cast(DataTypes.IntegerType);
+  }
+
+  public static void testDsl() {
+    // Creating a column.
+    Column c = col("abcd");
+    Column c1 = column("abcd");
+
+    // Literals
+    Column l1 = lit(1);
+    Column l2 = lit(1.0);
+    Column l3 = lit("abcd");
+
+    // Functions
+    Column a = upper(c);
+    a = lower(c);
+    a = sqrt(c);
+    a = abs(c);
+
+    // Aggregates
+    a = min(c);
+    a = max(c);
+    a = sum(c);
+    a = sumDistinct(c);
+    a = countDistinct(c, a);
+    a = avg(c);
+    a = first(c);
+    a = last(c);
+  }
+}
diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaRowSuite.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaRowSuite.java
index 52d07b5425cc3..fbfcd3f59d910 100644
--- a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaRowSuite.java
+++ b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaRowSuite.java
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.api.java;
 
 import java.math.BigDecimal;
+import java.sql.Date;
 import java.sql.Timestamp;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -28,6 +29,9 @@
 import org.junit.Before;
 import org.junit.Test;
 
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+
 public class JavaRowSuite {
   private byte byteValue;
   private short shortValue;
@@ -39,6 +43,7 @@ public class JavaRowSuite {
   private boolean booleanValue;
   private String stringValue;
   private byte[] binaryValue;
+  private Date dateValue;
   private Timestamp timestampValue;
 
   @Before
@@ -53,12 +58,13 @@ public void setUp() {
     booleanValue = true;
     stringValue = "this is a string";
     binaryValue = stringValue.getBytes();
+    dateValue = Date.valueOf("2014-06-30");
     timestampValue = Timestamp.valueOf("2014-06-30 09:20:00.0");
   }
 
   @Test
   public void constructSimpleRow() {
-    Row simpleRow = Row.create(
+    Row simpleRow = RowFactory.create(
       byteValue,                 // ByteType
       new Byte(byteValue),
       shortValue,                // ShortType
@@ -76,6 +82,7 @@ public void constructSimpleRow() {
       new Boolean(booleanValue),
       stringValue,               // StringType
       binaryValue,               // BinaryType
+      dateValue,                 // DateType
       timestampValue,            // TimestampType
       null                       // null
     );
@@ -114,9 +121,10 @@ public void constructSimpleRow() {
     Assert.assertEquals(stringValue, simpleRow.getString(15));
     Assert.assertEquals(stringValue, simpleRow.get(15));
     Assert.assertEquals(binaryValue, simpleRow.get(16));
-    Assert.assertEquals(timestampValue, simpleRow.get(17));
-    Assert.assertEquals(true, simpleRow.isNullAt(18));
-    Assert.assertEquals(null, simpleRow.get(18));
+    Assert.assertEquals(dateValue, simpleRow.get(17));
+    Assert.assertEquals(timestampValue, simpleRow.get(18));
+    Assert.assertEquals(true, simpleRow.isNullAt(19));
+    Assert.assertEquals(null, simpleRow.get(19));
   }
 
   @Test
@@ -132,10 +140,11 @@ public void constructComplexRow() {
     simpleMap.put(stringValue + " (3)", longValue - 2);
 
     // Simple struct
-    Row simpleStruct = Row.create(
+    Row simpleStruct = RowFactory.create(
       doubleValue, stringValue, timestampValue, null);
 
     // Complex array
+    @SuppressWarnings("unchecked")
     List<Map<String, Long>> arrayOfMaps = Arrays.asList(simpleMap);
     List<Row> arrayOfRows = Arrays.asList(simpleStruct);
 
@@ -144,7 +153,7 @@ public void constructComplexRow() {
     complexMap.put(arrayOfRows, simpleStruct);
 
     // Complex struct
-    Row complexStruct = Row.create(
+    Row complexStruct = RowFactory.create(
       simpleStringArray,
       simpleMap,
       simpleStruct,
@@ -161,7 +170,7 @@ public void constructComplexRow() {
     Assert.assertEquals(null, complexStruct.get(6));
 
     // A very complex row
-    Row complexRow = Row.create(arrayOfMaps, arrayOfRows, complexMap, complexStruct);
+    Row complexRow = RowFactory.create(arrayOfMaps, arrayOfRows, complexMap, complexStruct);
     Assert.assertEquals(arrayOfMaps, complexRow.get(0));
     Assert.assertEquals(arrayOfRows, complexRow.get(1));
     Assert.assertEquals(complexMap, complexRow.get(2));
diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaSideDataTypeConversionSuite.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaSideDataTypeConversionSuite.java
deleted file mode 100644
index d099a48a1f4b6..0000000000000
--- a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaSideDataTypeConversionSuite.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java;
-
-import java.util.List;
-import java.util.ArrayList;
-
-import org.junit.Assert;
-import org.junit.Test;
-
-import org.apache.spark.sql.types.util.DataTypeConversions;
-
-public class JavaSideDataTypeConversionSuite {
-  public void checkDataType(DataType javaDataType) {
-    org.apache.spark.sql.catalyst.types.DataType scalaDataType =
-      DataTypeConversions.asScalaDataType(javaDataType);
-    DataType actual = DataTypeConversions.asJavaDataType(scalaDataType);
-    Assert.assertEquals(javaDataType, actual);
-  }
-
-  @Test
-  public void createDataTypes() {
-    // Simple DataTypes.
-    checkDataType(DataType.StringType);
-    checkDataType(DataType.BinaryType);
-    checkDataType(DataType.BooleanType);
-    checkDataType(DataType.TimestampType);
-    checkDataType(DataType.DecimalType);
-    checkDataType(DataType.DoubleType);
-    checkDataType(DataType.FloatType);
-    checkDataType(DataType.ByteType);
-    checkDataType(DataType.IntegerType);
-    checkDataType(DataType.LongType);
-    checkDataType(DataType.ShortType);
-
-    // Simple ArrayType.
-    DataType simpleJavaArrayType = DataType.createArrayType(DataType.StringType, true);
-    checkDataType(simpleJavaArrayType);
-
-    // Simple MapType.
-    DataType simpleJavaMapType = DataType.createMapType(DataType.StringType, DataType.LongType);
-    checkDataType(simpleJavaMapType);
-
-    // Simple StructType.
-    List<StructField> simpleFields = new ArrayList<StructField>();
-    simpleFields.add(DataType.createStructField("a", DataType.DecimalType, false));
-    simpleFields.add(DataType.createStructField("b", DataType.BooleanType, true));
-    simpleFields.add(DataType.createStructField("c", DataType.LongType, true));
-    simpleFields.add(DataType.createStructField("d", DataType.BinaryType, false));
-    DataType simpleJavaStructType = DataType.createStructType(simpleFields);
-    checkDataType(simpleJavaStructType);
-
-    // Complex StructType.
-    List<StructField> complexFields = new ArrayList<StructField>();
-    complexFields.add(DataType.createStructField("simpleArray", simpleJavaArrayType, true));
-    complexFields.add(DataType.createStructField("simpleMap", simpleJavaMapType, true));
-    complexFields.add(DataType.createStructField("simpleStruct", simpleJavaStructType, true));
-    complexFields.add(DataType.createStructField("boolean", DataType.BooleanType, false));
-    DataType complexJavaStructType = DataType.createStructType(complexFields);
-    checkDataType(complexJavaStructType);
-
-    // Complex ArrayType.
-    DataType complexJavaArrayType = DataType.createArrayType(complexJavaStructType, true);
-    checkDataType(complexJavaArrayType);
-
-    // Complex MapType.
-    DataType complexJavaMapType =
-      DataType.createMapType(complexJavaStructType, complexJavaArrayType, false);
-    checkDataType(complexJavaMapType);
-  }
-
-  @Test
-  public void illegalArgument() {
-    // ArrayType
-    try {
-      DataType.createArrayType(null, true);
-      Assert.fail();
-    } catch (IllegalArgumentException expectedException) {
-    }
-
-    // MapType
-    try {
-      DataType.createMapType(null, DataType.StringType);
-      Assert.fail();
-    } catch (IllegalArgumentException expectedException) {
-    }
-    try {
-      DataType.createMapType(DataType.StringType, null);
-      Assert.fail();
-    } catch (IllegalArgumentException expectedException) {
-    }
-    try {
-      DataType.createMapType(null, null);
-      Assert.fail();
-    } catch (IllegalArgumentException expectedException) {
-    }
-
-    // StructField
-    try {
-      DataType.createStructField(null, DataType.StringType, true);
-    } catch (IllegalArgumentException expectedException) {
-    }
-    try {
-      DataType.createStructField("name", null, true);
-    } catch (IllegalArgumentException expectedException) {
-    }
-    try {
-      DataType.createStructField(null, null, true);
-    } catch (IllegalArgumentException expectedException) {
-    }
-
-    // StructType
-    try {
-      List<StructField> simpleFields = new ArrayList<StructField>();
-      simpleFields.add(DataType.createStructField("a", DataType.DecimalType, false));
-      simpleFields.add(DataType.createStructField("b", DataType.BooleanType, true));
-      simpleFields.add(DataType.createStructField("c", DataType.LongType, true));
-      simpleFields.add(null);
-      DataType.createStructType(simpleFields);
-      Assert.fail();
-    } catch (IllegalArgumentException expectedException) {
-    }
-    try {
-      List<StructField> simpleFields = new ArrayList<StructField>();
-      simpleFields.add(DataType.createStructField("a", DataType.DecimalType, false));
-      simpleFields.add(DataType.createStructField("a", DataType.BooleanType, true));
-      simpleFields.add(DataType.createStructField("c", DataType.LongType, true));
-      DataType.createStructType(simpleFields);
-      Assert.fail();
-    } catch (IllegalArgumentException expectedException) {
-    }
-  }
-}
diff --git a/sql/core/src/test/java/org/apache/spark/sql/jdbc/JavaJDBCTest.java b/sql/core/src/test/java/org/apache/spark/sql/jdbc/JavaJDBCTest.java
new file mode 100644
index 0000000000000..80bd74f5b5525
--- /dev/null
+++ b/sql/core/src/test/java/org/apache/spark/sql/jdbc/JavaJDBCTest.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc;
+
+import org.junit.*;
+import static org.junit.Assert.*;
+import java.sql.Connection;
+import java.sql.DriverManager;
+
+import org.apache.spark.SparkEnv;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.api.java.*;
+import org.apache.spark.sql.test.TestSQLContext$;
+
+public class JavaJDBCTest {
+  static String url = "jdbc:h2:mem:testdb1";
+
+  static Connection conn = null;
+
+  // This variable will always be null if TestSQLContext is intact when running
+  // these tests.  Some Java tests do not play nicely with others, however;
+  // they create a SparkContext of their own at startup and stop it at exit.
+  // This renders TestSQLContext inoperable, meaning we have to do the same
+  // thing.  If this variable is nonnull, that means we allocated a
+  // SparkContext of our own and that we need to stop it at teardown.
+  static JavaSparkContext localSparkContext = null;
+
+  static SQLContext sql = TestSQLContext$.MODULE$;
+
+  @Before
+  public void beforeTest() throws Exception {
+    if (SparkEnv.get() == null) { // A previous test destroyed TestSQLContext.
+      localSparkContext = new JavaSparkContext("local", "JavaAPISuite");
+      sql = new SQLContext(localSparkContext);
+    }
+    Class.forName("org.h2.Driver");
+    conn = DriverManager.getConnection(url);
+    conn.prepareStatement("create schema test").executeUpdate();
+    conn.prepareStatement("create table test.people (name TEXT(32) NOT NULL, theid INTEGER NOT NULL)").executeUpdate();
+    conn.prepareStatement("insert into test.people values ('fred', 1)").executeUpdate();
+    conn.prepareStatement("insert into test.people values ('mary', 2)").executeUpdate();
+    conn.prepareStatement("insert into test.people values ('joe', 3)").executeUpdate();
+    conn.commit();
+  }
+
+  @After
+  public void afterTest() throws Exception {
+    if (localSparkContext != null) {
+      localSparkContext.stop();
+      localSparkContext = null;
+    }
+    try {
+      conn.close();
+    } finally {
+      conn = null;
+    }
+  }
+
+  @Test
+  public void basicTest() throws Exception {
+    DataFrame rdd = JDBCUtils.jdbcRDD(sql, url, "TEST.PEOPLE");
+    Row[] rows = rdd.collect();
+    assertEquals(rows.length, 3);
+  }
+
+  @Test
+  public void partitioningTest() throws Exception {
+    String[] parts = new String[2];
+    parts[0] = "THEID < 2";
+    parts[1] = "THEID = 2"; // Deliberately forget about one of them.
+    DataFrame rdd = JDBCUtils.jdbcRDD(sql, url, "TEST.PEOPLE", parts);
+    Row[] rows = rdd.collect();
+    assertEquals(rows.length, 2);
+  }
+
+  @Test
+  public void writeTest() throws Exception {
+    DataFrame rdd = JDBCUtils.jdbcRDD(sql, url, "TEST.PEOPLE");
+    JDBCUtils.createJDBCTable(rdd, url, "TEST.PEOPLECOPY", false);
+    DataFrame rdd2 = JDBCUtils.jdbcRDD(sql, url, "TEST.PEOPLECOPY");
+    Row[] rows = rdd2.collect();
+    assertEquals(rows.length, 3);
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 444bc95009c31..1318750a4a3b0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -17,8 +17,15 @@
 
 package org.apache.spark.sql
 
+import scala.concurrent.duration._
+import scala.language.implicitConversions
+import scala.language.postfixOps
+
+import org.scalatest.concurrent.Eventually._
+
 import org.apache.spark.sql.TestData._
-import org.apache.spark.sql.columnar.{InMemoryColumnarTableScan, InMemoryRelation}
+import org.apache.spark.sql.columnar._
+import org.apache.spark.sql.Dsl._
 import org.apache.spark.sql.test.TestSQLContext._
 import org.apache.spark.storage.{StorageLevel, RDDBlockId}
 
@@ -27,17 +34,7 @@ case class BigData(s: String)
 class CachedTableSuite extends QueryTest {
   TestData // Load test tables.
 
-  def assertCached(query: SchemaRDD, numCachedTables: Int = 1): Unit = {
-    val planWithCaching = query.queryExecution.withCachedData
-    val cachedData = planWithCaching collect {
-      case cached: InMemoryRelation => cached
-    }
-
-    assert(
-      cachedData.size == numCachedTables,
-      s"Expected query to contain $numCachedTables, but it actually had ${cachedData.size}\n" +
-        planWithCaching)
-  }
+  import org.apache.spark.sql.test.TestSQLContext.implicits._
 
   def rddIdOf(tableName: String): Int = {
     val executedPlan = table(tableName).queryExecution.executedPlan
@@ -53,17 +50,61 @@ class CachedTableSuite extends QueryTest {
     sparkContext.env.blockManager.get(RDDBlockId(rddId, 0)).nonEmpty
   }
 
+  test("cache temp table") {
+    testData.select('key).registerTempTable("tempTable")
+    assertCached(sql("SELECT COUNT(*) FROM tempTable"), 0)
+    cacheTable("tempTable")
+    assertCached(sql("SELECT COUNT(*) FROM tempTable"))
+    uncacheTable("tempTable")
+  }
+
+  test("unpersist an uncached table will not raise exception") {
+    assert(None == cacheManager.lookupCachedData(testData))
+    testData.unpersist(true)
+    assert(None == cacheManager.lookupCachedData(testData))
+    testData.unpersist(false)
+    assert(None == cacheManager.lookupCachedData(testData))
+    testData.persist()
+    assert(None != cacheManager.lookupCachedData(testData))
+    testData.unpersist(true)
+    assert(None == cacheManager.lookupCachedData(testData))
+    testData.unpersist(false)
+    assert(None == cacheManager.lookupCachedData(testData))
+  }
+
+  test("cache table as select") {
+    sql("CACHE TABLE tempTable AS SELECT key FROM testData")
+    assertCached(sql("SELECT COUNT(*) FROM tempTable"))
+    uncacheTable("tempTable")
+  }
+
+  test("uncaching temp table") {
+    testData.select('key).registerTempTable("tempTable1")
+    testData.select('key).registerTempTable("tempTable2")
+    cacheTable("tempTable1")
+
+    assertCached(sql("SELECT COUNT(*) FROM tempTable1"))
+    assertCached(sql("SELECT COUNT(*) FROM tempTable2"))
+
+    // Is this valid?
+    uncacheTable("tempTable2")
+
+    // Should this be cached?
+    assertCached(sql("SELECT COUNT(*) FROM tempTable1"), 0)
+  }
+
   test("too big for memory") {
     val data = "*" * 10000
     sparkContext.parallelize(1 to 200000, 1).map(_ => BigData(data)).registerTempTable("bigData")
     table("bigData").persist(StorageLevel.MEMORY_AND_DISK)
     assert(table("bigData").count() === 200000L)
-    table("bigData").unpersist()
+    table("bigData").unpersist(blocking = true)
   }
 
   test("calling .cache() should use in-memory columnar caching") {
     table("testData").cache()
     assertCached(table("testData"))
+    table("testData").unpersist(blocking = true)
   }
 
   test("calling .unpersist() should drop in-memory columnar cache") {
@@ -105,9 +146,11 @@ class CachedTableSuite extends QueryTest {
     cacheTable("testData")
     assertResult(0, "Double InMemoryRelations found, cacheTable() is not idempotent") {
       table("testData").queryExecution.withCachedData.collect {
-        case r @ InMemoryRelation(_, _, _, _, _: InMemoryColumnarTableScan) => r
+        case r @ InMemoryRelation(_, _, _, _, _: InMemoryColumnarTableScan, _) => r
       }.size
     }
+
+    uncacheTable("testData")
   }
 
   test("read from cached table and uncache") {
@@ -156,7 +199,10 @@ class CachedTableSuite extends QueryTest {
 
     sql("UNCACHE TABLE testData")
     assert(!isCached("testData"), "Table 'testData' should not be cached")
-    assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
+
+    eventually(timeout(10 seconds)) {
+      assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
+    }
   }
 
   test("CACHE TABLE tableName AS SELECT * FROM anotherTable") {
@@ -169,7 +215,9 @@ class CachedTableSuite extends QueryTest {
       "Eagerly cached in-memory table should have already been materialized")
 
     uncacheTable("testCacheTable")
-    assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
+    eventually(timeout(10 seconds)) {
+      assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
+    }
   }
 
   test("CACHE TABLE tableName AS SELECT ...") {
@@ -182,7 +230,9 @@ class CachedTableSuite extends QueryTest {
       "Eagerly cached in-memory table should have already been materialized")
 
     uncacheTable("testCacheTable")
-    assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
+    eventually(timeout(10 seconds)) {
+      assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
+    }
   }
 
   test("CACHE LAZY TABLE tableName") {
@@ -200,6 +250,37 @@ class CachedTableSuite extends QueryTest {
       "Lazily cached in-memory table should have been materialized")
 
     uncacheTable("testData")
-    assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
+    eventually(timeout(10 seconds)) {
+      assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
+    }
+  }
+
+  test("InMemoryRelation statistics") {
+    sql("CACHE TABLE testData")
+    table("testData").queryExecution.withCachedData.collect {
+      case cached: InMemoryRelation =>
+        val actualSizeInBytes = (1 to 100).map(i => INT.defaultSize + i.toString.length + 4).sum
+        assert(cached.statistics.sizeInBytes === actualSizeInBytes)
+    }
+  }
+
+  test("Drops temporary table") {
+    testData.select('key).registerTempTable("t1")
+    table("t1")
+    dropTempTable("t1")
+    assert(intercept[RuntimeException](table("t1")).getMessage.startsWith("Table Not Found"))
+  }
+
+  test("Drops cached temporary table") {
+    testData.select('key).registerTempTable("t1")
+    testData.select('key).registerTempTable("t2")
+    cacheTable("t1")
+
+    assert(isCached("t1"))
+    assert(isCached("t2"))
+
+    dropTempTable("t1")
+    assert(intercept[RuntimeException](table("t1")).getMessage.startsWith("Table Not Found"))
+    assert(!isCached("t2"))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
new file mode 100644
index 0000000000000..fa4cdecbcb340
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -0,0 +1,345 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.Dsl._
+import org.apache.spark.sql.test.TestSQLContext
+import org.apache.spark.sql.types.{BooleanType, IntegerType, StructField, StructType}
+
+
+class ColumnExpressionSuite extends QueryTest {
+  import org.apache.spark.sql.TestData._
+
+  // TODO: Add test cases for bitwise operations.
+
+  test("computability check") {
+    def shouldBeComputable(c: Column): Unit = assert(c.isComputable === true)
+
+    def shouldNotBeComputable(c: Column): Unit = {
+      assert(c.isComputable === false)
+      intercept[UnsupportedOperationException] { c.head() }
+    }
+
+    shouldBeComputable(testData2("a"))
+    shouldBeComputable(testData2("b"))
+
+    shouldBeComputable(testData2("a") + testData2("b"))
+    shouldBeComputable(testData2("a") + testData2("b") + 1)
+
+    shouldBeComputable(-testData2("a"))
+    shouldBeComputable(!testData2("a"))
+
+    shouldBeComputable(testData2.select(($"a" + 1).as("c"))("c") + testData2("b"))
+    shouldBeComputable(
+      testData2.select(($"a" + 1).as("c"))("c") + testData2.select(($"b" / 2).as("d"))("d"))
+    shouldBeComputable(
+      testData2.select(($"a" + 1).as("c")).select(($"c" + 2).as("d"))("d") + testData2("b"))
+
+    // Literals and unresolved columns should not be computable.
+    shouldNotBeComputable(col("1"))
+    shouldNotBeComputable(col("1") + 2)
+    shouldNotBeComputable(lit(100))
+    shouldNotBeComputable(lit(100) + 10)
+    shouldNotBeComputable(-col("1"))
+    shouldNotBeComputable(!col("1"))
+
+    // Getting data from different frames should not be computable.
+    shouldNotBeComputable(testData2("a") + testData("key"))
+    shouldNotBeComputable(testData2("a") + 1 + testData("key"))
+
+    // Aggregate functions alone should not be computable.
+    shouldNotBeComputable(sum(testData2("a")))
+  }
+
+  test("star") {
+    checkAnswer(testData.select($"*"), testData.collect().toSeq)
+  }
+
+  test("star qualified by data frame object") {
+    // This is not yet supported.
+    val df = testData.toDataFrame
+    val goldAnswer = df.collect().toSeq
+    checkAnswer(df.select(df("*")), goldAnswer)
+
+    val df1 = df.select(df("*"), lit("abcd").as("litCol"))
+    checkAnswer(df1.select(df("*")), goldAnswer)
+  }
+
+  test("star qualified by table name") {
+    checkAnswer(testData.as("testData").select($"testData.*"), testData.collect().toSeq)
+  }
+
+  test("+") {
+    checkAnswer(
+      testData2.select($"a" + 1),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) + 1)))
+
+    checkAnswer(
+      testData2.select($"a" + $"b" + 2),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) + r.getInt(1) + 2)))
+  }
+
+  test("-") {
+    checkAnswer(
+      testData2.select($"a" - 1),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) - 1)))
+
+    checkAnswer(
+      testData2.select($"a" - $"b" - 2),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) - r.getInt(1) - 2)))
+  }
+
+  test("*") {
+    checkAnswer(
+      testData2.select($"a" * 10),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) * 10)))
+
+    checkAnswer(
+      testData2.select($"a" * $"b"),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) * r.getInt(1))))
+  }
+
+  test("/") {
+    checkAnswer(
+      testData2.select($"a" / 2),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0).toDouble / 2)))
+
+    checkAnswer(
+      testData2.select($"a" / $"b"),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0).toDouble / r.getInt(1))))
+  }
+
+
+  test("%") {
+    checkAnswer(
+      testData2.select($"a" % 2),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) % 2)))
+
+    checkAnswer(
+      testData2.select($"a" % $"b"),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) % r.getInt(1))))
+  }
+
+  test("unary -") {
+    checkAnswer(
+      testData2.select(-$"a"),
+      testData2.collect().toSeq.map(r => Row(-r.getInt(0))))
+  }
+
+  test("unary !") {
+    checkAnswer(
+      complexData.select(!$"b"),
+      complexData.collect().toSeq.map(r => Row(!r.getBoolean(3))))
+  }
+
+  test("isNull") {
+    checkAnswer(
+      nullStrings.toDataFrame.where($"s".isNull),
+      nullStrings.collect().toSeq.filter(r => r.getString(1) eq null))
+  }
+
+  test("isNotNull") {
+    checkAnswer(
+      nullStrings.toDataFrame.where($"s".isNotNull),
+      nullStrings.collect().toSeq.filter(r => r.getString(1) ne null))
+  }
+
+  test("===") {
+    checkAnswer(
+      testData2.filter($"a" === 1),
+      testData2.collect().toSeq.filter(r => r.getInt(0) == 1))
+
+    checkAnswer(
+      testData2.filter($"a" === $"b"),
+      testData2.collect().toSeq.filter(r => r.getInt(0) == r.getInt(1)))
+  }
+
+  test("<=>") {
+    checkAnswer(
+      testData2.filter($"a" === 1),
+      testData2.collect().toSeq.filter(r => r.getInt(0) == 1))
+
+    checkAnswer(
+      testData2.filter($"a" === $"b"),
+      testData2.collect().toSeq.filter(r => r.getInt(0) == r.getInt(1)))
+  }
+
+  test("!==") {
+    val nullData = TestSQLContext.applySchema(TestSQLContext.sparkContext.parallelize(
+      Row(1, 1) ::
+      Row(1, 2) ::
+      Row(1, null) ::
+      Row(null, null) :: Nil),
+      StructType(Seq(StructField("a", IntegerType), StructField("b", IntegerType))))
+
+    checkAnswer(
+      nullData.filter($"b" <=> 1),
+      Row(1, 1) :: Nil)
+
+    checkAnswer(
+      nullData.filter($"b" <=> null),
+      Row(1, null) :: Row(null, null) :: Nil)
+
+    checkAnswer(
+      nullData.filter($"a" <=> $"b"),
+      Row(1, 1) :: Row(null, null) :: Nil)
+  }
+
+  test(">") {
+    checkAnswer(
+      testData2.filter($"a" > 1),
+      testData2.collect().toSeq.filter(r => r.getInt(0) > 1))
+
+    checkAnswer(
+      testData2.filter($"a" > $"b"),
+      testData2.collect().toSeq.filter(r => r.getInt(0) > r.getInt(1)))
+  }
+
+  test(">=") {
+    checkAnswer(
+      testData2.filter($"a" >= 1),
+      testData2.collect().toSeq.filter(r => r.getInt(0) >= 1))
+
+    checkAnswer(
+      testData2.filter($"a" >= $"b"),
+      testData2.collect().toSeq.filter(r => r.getInt(0) >= r.getInt(1)))
+  }
+
+  test("<") {
+    checkAnswer(
+      testData2.filter($"a" < 2),
+      testData2.collect().toSeq.filter(r => r.getInt(0) < 2))
+
+    checkAnswer(
+      testData2.filter($"a" < $"b"),
+      testData2.collect().toSeq.filter(r => r.getInt(0) < r.getInt(1)))
+  }
+
+  test("<=") {
+    checkAnswer(
+      testData2.filter($"a" <= 2),
+      testData2.collect().toSeq.filter(r => r.getInt(0) <= 2))
+
+    checkAnswer(
+      testData2.filter($"a" <= $"b"),
+      testData2.collect().toSeq.filter(r => r.getInt(0) <= r.getInt(1)))
+  }
+
+  val booleanData = TestSQLContext.applySchema(TestSQLContext.sparkContext.parallelize(
+    Row(false, false) ::
+      Row(false, true) ::
+      Row(true, false) ::
+      Row(true, true) :: Nil),
+    StructType(Seq(StructField("a", BooleanType), StructField("b", BooleanType))))
+
+  test("&&") {
+    checkAnswer(
+      booleanData.filter($"a" && true),
+      Row(true, false) :: Row(true, true) :: Nil)
+
+    checkAnswer(
+      booleanData.filter($"a" && false),
+      Nil)
+
+    checkAnswer(
+      booleanData.filter($"a" && $"b"),
+      Row(true, true) :: Nil)
+  }
+
+  test("||") {
+    checkAnswer(
+      booleanData.filter($"a" || true),
+      booleanData.collect())
+
+    checkAnswer(
+      booleanData.filter($"a" || false),
+      Row(true, false) :: Row(true, true) :: Nil)
+
+    checkAnswer(
+      booleanData.filter($"a" || $"b"),
+      Row(false, true) :: Row(true, false) :: Row(true, true) :: Nil)
+  }
+
+  test("sqrt") {
+    checkAnswer(
+      testData.select(sqrt('key)).orderBy('key.asc),
+      (1 to 100).map(n => Row(math.sqrt(n)))
+    )
+
+    checkAnswer(
+      testData.select(sqrt('value), 'key).orderBy('key.asc, 'value.asc),
+      (1 to 100).map(n => Row(math.sqrt(n), n))
+    )
+
+    checkAnswer(
+      testData.select(sqrt(lit(null))),
+      (1 to 100).map(_ => Row(null))
+    )
+  }
+
+  test("abs") {
+    checkAnswer(
+      testData.select(abs('key)).orderBy('key.asc),
+      (1 to 100).map(n => Row(n))
+    )
+
+    checkAnswer(
+      negativeData.select(abs('key)).orderBy('key.desc),
+      (1 to 100).map(n => Row(n))
+    )
+
+    checkAnswer(
+      testData.select(abs(lit(null))),
+      (1 to 100).map(_ => Row(null))
+    )
+  }
+
+  test("upper") {
+    checkAnswer(
+      lowerCaseData.select(upper('l)),
+      ('a' to 'd').map(c => Row(c.toString.toUpperCase))
+    )
+
+    checkAnswer(
+      testData.select(upper('value), 'key),
+      (1 to 100).map(n => Row(n.toString, n))
+    )
+
+    checkAnswer(
+      testData.select(upper(lit(null))),
+      (1 to 100).map(n => Row(null))
+    )
+  }
+
+  test("lower") {
+    checkAnswer(
+      upperCaseData.select(lower('L)),
+      ('A' to 'F').map(c => Row(c.toString.toLowerCase))
+    )
+
+    checkAnswer(
+      testData.select(lower('value), 'key),
+      (1 to 100).map(n => Row(n.toString, n))
+    )
+
+    checkAnswer(
+      testData.select(lower(lit(null))),
+      (1 to 100).map(n => Row(null))
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
new file mode 100644
index 0000000000000..74c29459d2e47
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -0,0 +1,310 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.Dsl._
+import org.apache.spark.sql.types._
+
+/* Implicits */
+import org.apache.spark.sql.test.TestSQLContext.logicalPlanToSparkQuery
+import org.apache.spark.sql.test.TestSQLContext.implicits._
+
+import scala.language.postfixOps
+
+class DataFrameSuite extends QueryTest {
+  import org.apache.spark.sql.TestData._
+
+  test("analysis error should be eagerly reported") {
+    intercept[Exception] { testData.select('nonExistentName) }
+    intercept[Exception] {
+      testData.groupBy('key).agg(Map("nonExistentName" -> "sum"))
+    }
+    intercept[Exception] {
+      testData.groupBy("nonExistentName").agg(Map("key" -> "sum"))
+    }
+    intercept[Exception] {
+      testData.groupBy($"abcd").agg(Map("key" -> "sum"))
+    }
+  }
+
+  test("table scan") {
+    checkAnswer(
+      testData,
+      testData.collect().toSeq)
+  }
+
+  test("selectExpr") {
+    checkAnswer(
+      testData.selectExpr("abs(key)", "value"),
+      testData.collect().map(row => Row(math.abs(row.getInt(0)), row.getString(1))).toSeq)
+  }
+
+  test("filterExpr") {
+    checkAnswer(
+      testData.filter("key > 90"),
+      testData.collect().filter(_.getInt(0) > 90).toSeq)
+  }
+
+  test("repartition") {
+    checkAnswer(
+      testData.select('key).repartition(10).select('key),
+      testData.select('key).collect().toSeq)
+  }
+
+  test("agg") {
+    checkAnswer(
+      testData2.groupBy("a").agg($"a", sum($"b")),
+      Seq(Row(1,3), Row(2,3), Row(3,3))
+    )
+    checkAnswer(
+      testData2.groupBy("a").agg($"a", sum($"b").as("totB")).agg(sum('totB)),
+      Row(9)
+    )
+    checkAnswer(
+      testData2.agg(sum('b)),
+      Row(9)
+    )
+  }
+
+  test("convert $\"attribute name\" into unresolved attribute") {
+    checkAnswer(
+      testData.where($"key" === lit(1)).select($"value"),
+      Row("1"))
+  }
+
+  test("convert Scala Symbol 'attrname into unresolved attribute") {
+    checkAnswer(
+      testData.where('key === lit(1)).select('value),
+      Row("1"))
+  }
+
+  test("select *") {
+    checkAnswer(
+      testData.select($"*"),
+      testData.collect().toSeq)
+  }
+
+  test("simple select") {
+    checkAnswer(
+      testData.where('key === lit(1)).select('value),
+      Row("1"))
+  }
+
+  test("select with functions") {
+    checkAnswer(
+      testData.select(sum('value), avg('value), count(lit(1))),
+      Row(5050.0, 50.5, 100))
+
+    checkAnswer(
+      testData2.select('a + 'b, 'a < 'b),
+      Seq(
+        Row(2, false),
+        Row(3, true),
+        Row(3, false),
+        Row(4, false),
+        Row(4, false),
+        Row(5, false)))
+
+    checkAnswer(
+      testData2.select(sumDistinct('a)),
+      Row(6))
+  }
+
+  test("global sorting") {
+    checkAnswer(
+      testData2.orderBy('a.asc, 'b.asc),
+      Seq(Row(1,1), Row(1,2), Row(2,1), Row(2,2), Row(3,1), Row(3,2)))
+
+    checkAnswer(
+      testData2.orderBy('a.asc, 'b.desc),
+      Seq(Row(1,2), Row(1,1), Row(2,2), Row(2,1), Row(3,2), Row(3,1)))
+
+    checkAnswer(
+      testData2.orderBy('a.desc, 'b.desc),
+      Seq(Row(3,2), Row(3,1), Row(2,2), Row(2,1), Row(1,2), Row(1,1)))
+
+    checkAnswer(
+      testData2.orderBy('a.desc, 'b.asc),
+      Seq(Row(3,1), Row(3,2), Row(2,1), Row(2,2), Row(1,1), Row(1,2)))
+
+    checkAnswer(
+      arrayData.orderBy('data.getItem(0).asc),
+      arrayData.toDataFrame.collect().sortBy(_.getAs[Seq[Int]](0)(0)).toSeq)
+
+    checkAnswer(
+      arrayData.orderBy('data.getItem(0).desc),
+      arrayData.toDataFrame.collect().sortBy(_.getAs[Seq[Int]](0)(0)).reverse.toSeq)
+
+    checkAnswer(
+      arrayData.orderBy('data.getItem(1).asc),
+      arrayData.toDataFrame.collect().sortBy(_.getAs[Seq[Int]](0)(1)).toSeq)
+
+    checkAnswer(
+      arrayData.orderBy('data.getItem(1).desc),
+      arrayData.toDataFrame.collect().sortBy(_.getAs[Seq[Int]](0)(1)).reverse.toSeq)
+  }
+
+  test("limit") {
+    checkAnswer(
+      testData.limit(10),
+      testData.take(10).toSeq)
+
+    checkAnswer(
+      arrayData.limit(1),
+      arrayData.take(1).map(r => Row.fromSeq(r.productIterator.toSeq)))
+
+    checkAnswer(
+      mapData.limit(1),
+      mapData.take(1).map(r => Row.fromSeq(r.productIterator.toSeq)))
+  }
+
+  test("average") {
+    checkAnswer(
+      testData2.agg(avg('a)),
+      Row(2.0))
+
+    checkAnswer(
+      testData2.agg(avg('a), sumDistinct('a)), // non-partial
+      Row(2.0, 6.0) :: Nil)
+
+    checkAnswer(
+      decimalData.agg(avg('a)),
+      Row(new java.math.BigDecimal(2.0)))
+    checkAnswer(
+      decimalData.agg(avg('a), sumDistinct('a)), // non-partial
+      Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(6)) :: Nil)
+
+    checkAnswer(
+      decimalData.agg(avg('a cast DecimalType(10, 2))),
+      Row(new java.math.BigDecimal(2.0)))
+    checkAnswer(
+      decimalData.agg(avg('a cast DecimalType(10, 2)), sumDistinct('a cast DecimalType(10, 2))), // non-partial
+      Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(6)) :: Nil)
+  }
+
+  test("null average") {
+    checkAnswer(
+      testData3.agg(avg('b)),
+      Row(2.0))
+
+    checkAnswer(
+      testData3.agg(avg('b), countDistinct('b)),
+      Row(2.0, 1))
+
+    checkAnswer(
+      testData3.agg(avg('b), sumDistinct('b)), // non-partial
+      Row(2.0, 2.0))
+  }
+
+  test("zero average") {
+    checkAnswer(
+      emptyTableData.agg(avg('a)),
+      Row(null))
+
+    checkAnswer(
+      emptyTableData.agg(avg('a), sumDistinct('b)), // non-partial
+      Row(null, null))
+  }
+
+  test("count") {
+    assert(testData2.count() === testData2.map(_ => 1).count())
+
+    checkAnswer(
+      testData2.agg(count('a), sumDistinct('a)), // non-partial
+      Row(6, 6.0))
+  }
+
+  test("null count") {
+    checkAnswer(
+      testData3.groupBy('a).agg('a, count('b)),
+      Seq(Row(1,0), Row(2, 1))
+    )
+
+    checkAnswer(
+      testData3.groupBy('a).agg('a, count('a + 'b)),
+      Seq(Row(1,0), Row(2, 1))
+    )
+
+    checkAnswer(
+      testData3.agg(count('a), count('b), count(lit(1)), countDistinct('a), countDistinct('b)),
+      Row(2, 1, 2, 2, 1)
+    )
+
+    checkAnswer(
+      testData3.agg(count('b), countDistinct('b), sumDistinct('b)), // non-partial
+      Row(1, 1, 2)
+    )
+  }
+
+  test("zero count") {
+    assert(emptyTableData.count() === 0)
+
+    checkAnswer(
+      emptyTableData.agg(count('a), sumDistinct('a)), // non-partial
+      Row(0, null))
+  }
+
+  test("zero sum") {
+    checkAnswer(
+      emptyTableData.agg(sum('a)),
+      Row(null))
+  }
+
+  test("zero sum distinct") {
+    checkAnswer(
+      emptyTableData.agg(sumDistinct('a)),
+      Row(null))
+  }
+
+  test("except") {
+    checkAnswer(
+      lowerCaseData.except(upperCaseData),
+      Row(1, "a") ::
+      Row(2, "b") ::
+      Row(3, "c") ::
+      Row(4, "d") :: Nil)
+    checkAnswer(lowerCaseData.except(lowerCaseData), Nil)
+    checkAnswer(upperCaseData.except(upperCaseData), Nil)
+  }
+
+  test("intersect") {
+    checkAnswer(
+      lowerCaseData.intersect(lowerCaseData),
+      Row(1, "a") ::
+      Row(2, "b") ::
+      Row(3, "c") ::
+      Row(4, "d") :: Nil)
+    checkAnswer(lowerCaseData.intersect(upperCaseData), Nil)
+  }
+
+  test("udf") {
+    val foo = udf((a: Int, b: String) => a.toString + b)
+
+    checkAnswer(
+      // SELECT *, foo(key, value) FROM testData
+      testData.select($"*", foo('key, 'value)).limit(3),
+      Row(1, "1", "11") :: Row(2, "2", "22") :: Row(3, "3", "33") :: Nil
+    )
+  }
+
+  test("apply on query results (SPARK-5462)") {
+    val df = testData.sqlContext.sql("select key from testData")
+    checkAnswer(df("key"), testData.select('key).collect().toSeq)
+  }
+
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataTypeSuite.scala
deleted file mode 100644
index 100ecb45e9e88..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataTypeSuite.scala
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*    http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.spark.sql
-
-import org.scalatest.FunSuite
-
-import org.apache.spark.sql.catalyst.types.DataType
-
-class DataTypeSuite extends FunSuite {
-
-  test("construct an ArrayType") {
-    val array = ArrayType(StringType)
-
-    assert(ArrayType(StringType, true) === array)
-  }
-
-  test("construct an MapType") {
-    val map = MapType(StringType, IntegerType)
-
-    assert(MapType(StringType, IntegerType, true) === map)
-  }
-
-  test("extract fields from a StructType") {
-    val struct = StructType(
-      StructField("a", IntegerType, true) ::
-      StructField("b", LongType, false) ::
-      StructField("c", StringType, true) ::
-      StructField("d", FloatType, true) :: Nil)
-
-    assert(StructField("b", LongType, false) === struct("b"))
-
-    intercept[IllegalArgumentException] {
-      struct("e")
-    }
-
-    val expectedStruct = StructType(
-      StructField("b", LongType, false) ::
-      StructField("d", FloatType, true) :: Nil)
-
-    assert(expectedStruct === struct(Set("b", "d")))
-    intercept[IllegalArgumentException] {
-      struct(Set("b", "d", "e", "f"))
-    }
-  }
-
-  def checkDataTypeJsonRepr(dataType: DataType): Unit = {
-    test(s"JSON - $dataType") {
-      assert(DataType.fromJson(dataType.json) === dataType)
-    }
-  }
-
-  checkDataTypeJsonRepr(BooleanType)
-  checkDataTypeJsonRepr(ByteType)
-  checkDataTypeJsonRepr(ShortType)
-  checkDataTypeJsonRepr(IntegerType)
-  checkDataTypeJsonRepr(LongType)
-  checkDataTypeJsonRepr(FloatType)
-  checkDataTypeJsonRepr(DoubleType)
-  checkDataTypeJsonRepr(DecimalType)
-  checkDataTypeJsonRepr(TimestampType)
-  checkDataTypeJsonRepr(StringType)
-  checkDataTypeJsonRepr(BinaryType)
-  checkDataTypeJsonRepr(ArrayType(DoubleType, true))
-  checkDataTypeJsonRepr(ArrayType(StringType, false))
-  checkDataTypeJsonRepr(MapType(IntegerType, StringType, true))
-  checkDataTypeJsonRepr(MapType(IntegerType, ArrayType(DoubleType), false))
-  checkDataTypeJsonRepr(
-    StructType(Seq(
-      StructField("a", IntegerType, nullable = true),
-      StructField("b", ArrayType(DoubleType), nullable = false))))
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
deleted file mode 100644
index 45e58afe9d9a2..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.test._
-
-/* Implicits */
-import TestSQLContext._
-
-class DslQuerySuite extends QueryTest {
-  import TestData._
-
-  test("table scan") {
-    checkAnswer(
-      testData,
-      testData.collect().toSeq)
-  }
-
-  test("repartition") {
-    checkAnswer(
-      testData.select('key).repartition(10).select('key),
-      testData.select('key).collect().toSeq)
-  }
-
-  test("agg") {
-    checkAnswer(
-      testData2.groupBy('a)('a, sum('b)),
-      Seq((1,3),(2,3),(3,3))
-    )
-    checkAnswer(
-      testData2.groupBy('a)('a, sum('b) as 'totB).aggregate(sum('totB)),
-      9
-    )
-    checkAnswer(
-      testData2.aggregate(sum('b)),
-      9
-    )
-  }
-
-  test("select *") {
-    checkAnswer(
-      testData.select(Star(None)),
-      testData.collect().toSeq)
-  }
-
-  test("simple select") {
-    checkAnswer(
-      testData.where('key === 1).select('value),
-      Seq(Seq("1")))
-  }
-
-  test("select with functions") {
-    checkAnswer(
-      testData.select(sum('value), avg('value), count(1)),
-      Seq(Seq(5050.0, 50.5, 100)))
-
-    checkAnswer(
-      testData2.select('a + 'b, 'a < 'b),
-      Seq(
-        Seq(2, false),
-        Seq(3, true),
-        Seq(3, false),
-        Seq(4, false),
-        Seq(4, false),
-        Seq(5, false)))
-
-    checkAnswer(
-      testData2.select(sumDistinct('a)),
-      Seq(Seq(6)))
-  }
-
-  test("sorting") {
-    checkAnswer(
-      testData2.orderBy('a.asc, 'b.asc),
-      Seq((1,1), (1,2), (2,1), (2,2), (3,1), (3,2)))
-
-    checkAnswer(
-      testData2.orderBy('a.asc, 'b.desc),
-      Seq((1,2), (1,1), (2,2), (2,1), (3,2), (3,1)))
-
-    checkAnswer(
-      testData2.orderBy('a.desc, 'b.desc),
-      Seq((3,2), (3,1), (2,2), (2,1), (1,2), (1,1)))
-
-    checkAnswer(
-      testData2.orderBy('a.desc, 'b.asc),
-      Seq((3,1), (3,2), (2,1), (2,2), (1,1), (1,2)))
-
-    checkAnswer(
-      arrayData.orderBy('data.getItem(0).asc),
-      arrayData.collect().sortBy(_.data(0)).toSeq)
-
-    checkAnswer(
-      arrayData.orderBy('data.getItem(0).desc),
-      arrayData.collect().sortBy(_.data(0)).reverse.toSeq)
-
-    checkAnswer(
-      mapData.orderBy('data.getItem(1).asc),
-      mapData.collect().sortBy(_.data(1)).toSeq)
-
-    checkAnswer(
-      mapData.orderBy('data.getItem(1).desc),
-      mapData.collect().sortBy(_.data(1)).reverse.toSeq)
-  }
-
-  test("limit") {
-    checkAnswer(
-      testData.limit(10),
-      testData.take(10).toSeq)
-
-    checkAnswer(
-      arrayData.limit(1),
-      arrayData.take(1).toSeq)
-
-    checkAnswer(
-      mapData.limit(1),
-      mapData.take(1).toSeq)
-  }
-
-  test("SPARK-3395 limit distinct") {
-    val filtered = TestData.testData2
-      .distinct()
-      .orderBy(SortOrder('a, Ascending), SortOrder('b, Ascending))
-      .limit(1)
-      .registerTempTable("onerow")
-    checkAnswer(
-      sql("select * from onerow inner join testData2 on onerow.a = testData2.a"),
-      (1, 1, 1, 1) ::
-      (1, 1, 1, 2) :: Nil)
-  }
-
-  test("SPARK-3858 generator qualifiers are discarded") {
-    checkAnswer(
-      arrayData.as('ad)
-        .generate(Explode("data" :: Nil, 'data), alias = Some("ex"))
-        .select("ex.data".attr),
-      Seq(1, 2, 3, 2, 3, 4).map(Seq(_)))
-  }
-
-  test("average") {
-    checkAnswer(
-      testData2.groupBy()(avg('a)),
-      2.0)
-  }
-
-  test("null average") {
-    checkAnswer(
-      testData3.groupBy()(avg('b)),
-      2.0)
-
-    checkAnswer(
-      testData3.groupBy()(avg('b), countDistinct('b)),
-      (2.0, 1) :: Nil)
-  }
-
-  test("count") {
-    assert(testData2.count() === testData2.map(_ => 1).count())
-  }
-
-  test("null count") {
-    checkAnswer(
-      testData3.groupBy('a)('a, count('b)),
-      Seq((1,0), (2, 1))
-    )
-
-    checkAnswer(
-      testData3.groupBy('a)('a, count('a + 'b)),
-      Seq((1,0), (2, 1))
-    )
-
-    checkAnswer(
-      testData3.groupBy()(count('a), count('b), count(1), countDistinct('a), countDistinct('b)),
-      (2, 1, 2, 2, 1) :: Nil
-    )
-  }
-
-  test("zero count") {
-    assert(emptyTableData.count() === 0)
-  }
-
-  test("except") {
-    checkAnswer(
-      lowerCaseData.except(upperCaseData),
-      (1, "a") ::
-      (2, "b") ::
-      (3, "c") ::
-      (4, "d") :: Nil)
-    checkAnswer(lowerCaseData.except(lowerCaseData), Nil)
-    checkAnswer(upperCaseData.except(upperCaseData), Nil)
-  }
-
-  test("intersect") {
-    checkAnswer(
-      lowerCaseData.intersect(lowerCaseData),
-      (1, "a") ::
-      (2, "b") ::
-      (3, "c") ::
-      (4, "d") :: Nil)
-    checkAnswer(lowerCaseData.intersect(upperCaseData), Nil)
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/InsertIntoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/InsertIntoSuite.scala
deleted file mode 100644
index c87d762751e6d..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/InsertIntoSuite.scala
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import _root_.java.io.File
-
-/* Implicits */
-import org.apache.spark.sql.test.TestSQLContext._
-
-class InsertIntoSuite extends QueryTest {
-  TestData // Initialize TestData
-  import TestData._
-
-  test("insertInto() created parquet file") {
-    val testFilePath = File.createTempFile("sparkSql", "pqt")
-    testFilePath.delete()
-    testFilePath.deleteOnExit()
-    val testFile = createParquetFile[TestData](testFilePath.getCanonicalPath)
-    testFile.registerTempTable("createAndInsertTest")
-
-    // Add some data.
-    testData.insertInto("createAndInsertTest")
-
-    // Make sure its there for a new instance of parquet file.
-    checkAnswer(
-      parquetFile(testFilePath.getCanonicalPath),
-      testData.collect().toSeq
-    )
-
-    // Make sure the registered table has also been updated.
-    checkAnswer(
-      sql("SELECT * FROM createAndInsertTest"),
-      testData.collect().toSeq
-    )
-
-    // Add more data.
-    testData.insertInto("createAndInsertTest")
-
-    // Make sure all data is there for a new instance of parquet file.
-    checkAnswer(
-      parquetFile(testFilePath.getCanonicalPath),
-      testData.collect().toSeq ++ testData.collect().toSeq
-    )
-
-    // Make sure the registered table has also been updated.
-    checkAnswer(
-      sql("SELECT * FROM createAndInsertTest"),
-      testData.collect().toSeq ++ testData.collect().toSeq
-    )
-
-    // Now overwrite.
-    testData.insertInto("createAndInsertTest", overwrite = true)
-
-    // Make sure its there for a new instance of parquet file.
-    checkAnswer(
-      parquetFile(testFilePath.getCanonicalPath),
-      testData.collect().toSeq
-    )
-
-    // Make sure the registered table has also been updated.
-    checkAnswer(
-      sql("SELECT * FROM createAndInsertTest"),
-      testData.collect().toSeq
-    )
-
-    testFilePath.delete()
-  }
-
-  test("INSERT INTO parquet table") {
-    val testFilePath = File.createTempFile("sparkSql", "pqt")
-    testFilePath.delete()
-    testFilePath.deleteOnExit()
-    val testFile = createParquetFile[TestData](testFilePath.getCanonicalPath)
-    testFile.registerTempTable("createAndInsertSQLTest")
-
-    sql("INSERT INTO createAndInsertSQLTest SELECT * FROM testData")
-
-    // Make sure its there for a new instance of parquet file.
-    checkAnswer(
-      parquetFile(testFilePath.getCanonicalPath),
-      testData.collect().toSeq
-    )
-
-    // Make sure the registered table has also been updated.
-    checkAnswer(
-      sql("SELECT * FROM createAndInsertSQLTest"),
-      testData.collect().toSeq
-    )
-
-    // Append more data.
-    sql("INSERT INTO createAndInsertSQLTest SELECT * FROM testData")
-
-    // Make sure all data is there for a new instance of parquet file.
-    checkAnswer(
-      parquetFile(testFilePath.getCanonicalPath),
-      testData.collect().toSeq ++ testData.collect().toSeq
-    )
-
-    // Make sure the registered table has also been updated.
-    checkAnswer(
-      sql("SELECT * FROM createAndInsertSQLTest"),
-      testData.collect().toSeq ++ testData.collect().toSeq
-    )
-
-    sql("INSERT OVERWRITE INTO createAndInsertSQLTest SELECT * FROM testData")
-
-    // Make sure its there for a new instance of parquet file.
-    checkAnswer(
-      parquetFile(testFilePath.getCanonicalPath),
-      testData.collect().toSeq
-    )
-
-    // Make sure the registered table has also been updated.
-    checkAnswer(
-      sql("SELECT * FROM createAndInsertSQLTest"),
-      testData.collect().toSeq
-    )
-
-    testFilePath.delete()
-  }
-
-  test("Double create fails when allowExisting = false") {
-    val testFilePath = File.createTempFile("sparkSql", "pqt")
-    testFilePath.delete()
-    testFilePath.deleteOnExit()
-    val testFile = createParquetFile[TestData](testFilePath.getCanonicalPath)
-
-    intercept[RuntimeException] {
-      createParquetFile[TestData](testFilePath.getCanonicalPath, allowExisting = false)
-    }
-
-    testFilePath.delete()
-  }
-
-  test("Double create does not fail when allowExisting = true") {
-    val testFilePath = File.createTempFile("sparkSql", "pqt")
-    testFilePath.delete()
-    testFilePath.deleteOnExit()
-    val testFile = createParquetFile[TestData](testFilePath.getCanonicalPath)
-
-    createParquetFile[TestData](testFilePath.getCanonicalPath, allowExisting = true)
-
-    testFilePath.delete()
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 07f4d2946c1b5..f0c939dbb195f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -19,185 +19,197 @@ package org.apache.spark.sql
 
 import org.scalatest.BeforeAndAfterEach
 
-import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.TestData._
-import org.apache.spark.sql.catalyst.plans.JoinType
-import org.apache.spark.sql.catalyst.plans.{LeftOuter, RightOuter, FullOuter, Inner, LeftSemi}
-import org.apache.spark.sql.execution._
+import org.apache.spark.sql.Dsl._
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.execution.joins._
-import org.apache.spark.sql.test.TestSQLContext
 import org.apache.spark.sql.test.TestSQLContext._
 
-class JoinSuite extends QueryTest with BeforeAndAfterEach {
 
+class JoinSuite extends QueryTest with BeforeAndAfterEach {
   // Ensures tables are loaded.
   TestData
 
   test("equi-join is hash-join") {
-    val x = testData2.as('x)
-    val y = testData2.as('y)
-    val join = x.join(y, Inner, Some("x.a".attr === "y.a".attr)).queryExecution.analyzed
+    val x = testData2.as("x")
+    val y = testData2.as("y")
+    val join = x.join(y, $"x.a" === $"y.a", "inner").queryExecution.analyzed
     val planned = planner.HashJoin(join)
     assert(planned.size === 1)
   }
 
-  test("join operator selection") {
-    def assertJoin(sqlString: String, c: Class[_]): Any = {
-      val rdd = sql(sqlString)
-      val physical = rdd.queryExecution.sparkPlan
-      val operators = physical.collect {
-        case j: ShuffledHashJoin => j
-        case j: HashOuterJoin => j
-        case j: LeftSemiJoinHash => j
-        case j: BroadcastHashJoin => j
-        case j: LeftSemiJoinBNL => j
-        case j: CartesianProduct => j
-        case j: BroadcastNestedLoopJoin => j
-      }
-
-      assert(operators.size === 1)
-      if (operators(0).getClass() != c) {
-        fail(s"$sqlString expected operator: $c, but got ${operators(0)}\n physical: \n$physical")
-      }
+  def assertJoin(sqlString: String, c: Class[_]): Any = {
+    val rdd = sql(sqlString)
+    val physical = rdd.queryExecution.sparkPlan
+    val operators = physical.collect {
+      case j: ShuffledHashJoin => j
+      case j: HashOuterJoin => j
+      case j: LeftSemiJoinHash => j
+      case j: BroadcastHashJoin => j
+      case j: LeftSemiJoinBNL => j
+      case j: CartesianProduct => j
+      case j: BroadcastNestedLoopJoin => j
+      case j: BroadcastLeftSemiJoinHash => j
+    }
+
+    assert(operators.size === 1)
+    if (operators(0).getClass() != c) {
+      fail(s"$sqlString expected operator: $c, but got ${operators(0)}\n physical: \n$physical")
     }
+  }
 
-    val cases1 = Seq(
-      ("SELECT * FROM testData left semi join testData2 ON key = a", classOf[LeftSemiJoinHash]),
-      ("SELECT * FROM testData left semi join testData2", classOf[LeftSemiJoinBNL]),
-      ("SELECT * FROM testData join testData2", classOf[CartesianProduct]),
-      ("SELECT * FROM testData join testData2 where key=2", classOf[CartesianProduct]),
-      ("SELECT * FROM testData left join testData2", classOf[CartesianProduct]),
-      ("SELECT * FROM testData right join testData2", classOf[CartesianProduct]),
-      ("SELECT * FROM testData full outer join testData2", classOf[CartesianProduct]),
-      ("SELECT * FROM testData left join testData2 where key=2", classOf[CartesianProduct]),
-      ("SELECT * FROM testData right join testData2 where key=2", classOf[CartesianProduct]),
-      ("SELECT * FROM testData full outer join testData2 where key=2", classOf[CartesianProduct]),
-      ("SELECT * FROM testData join testData2 where key>a", classOf[CartesianProduct]),
-      ("SELECT * FROM testData full outer join testData2 where key>a", classOf[CartesianProduct]),
-      ("SELECT * FROM testData join testData2 ON key = a", classOf[ShuffledHashJoin]),
-      ("SELECT * FROM testData join testData2 ON key = a and key=2", classOf[ShuffledHashJoin]),
-      ("SELECT * FROM testData join testData2 ON key = a where key=2", classOf[ShuffledHashJoin]),
-      ("SELECT * FROM testData left join testData2 ON key = a", classOf[HashOuterJoin]),
-      ("SELECT * FROM testData right join testData2 ON key = a where key=2", 
+  test("join operator selection") {
+    cacheManager.clearCache()
+
+    Seq(
+      ("SELECT * FROM testData LEFT SEMI JOIN testData2 ON key = a", classOf[LeftSemiJoinHash]),
+      ("SELECT * FROM testData LEFT SEMI JOIN testData2", classOf[LeftSemiJoinBNL]),
+      ("SELECT * FROM testData JOIN testData2", classOf[CartesianProduct]),
+      ("SELECT * FROM testData JOIN testData2 WHERE key = 2", classOf[CartesianProduct]),
+      ("SELECT * FROM testData LEFT JOIN testData2", classOf[CartesianProduct]),
+      ("SELECT * FROM testData RIGHT JOIN testData2", classOf[CartesianProduct]),
+      ("SELECT * FROM testData FULL OUTER JOIN testData2", classOf[CartesianProduct]),
+      ("SELECT * FROM testData LEFT JOIN testData2 WHERE key = 2", classOf[CartesianProduct]),
+      ("SELECT * FROM testData RIGHT JOIN testData2 WHERE key = 2", classOf[CartesianProduct]),
+      ("SELECT * FROM testData FULL OUTER JOIN testData2 WHERE key = 2", classOf[CartesianProduct]),
+      ("SELECT * FROM testData JOIN testData2 WHERE key > a", classOf[CartesianProduct]),
+      ("SELECT * FROM testData FULL OUTER JOIN testData2 WHERE key > a", classOf[CartesianProduct]),
+      ("SELECT * FROM testData JOIN testData2 ON key = a", classOf[ShuffledHashJoin]),
+      ("SELECT * FROM testData JOIN testData2 ON key = a and key = 2", classOf[ShuffledHashJoin]),
+      ("SELECT * FROM testData JOIN testData2 ON key = a where key = 2", classOf[ShuffledHashJoin]),
+      ("SELECT * FROM testData LEFT JOIN testData2 ON key = a", classOf[HashOuterJoin]),
+      ("SELECT * FROM testData RIGHT JOIN testData2 ON key = a where key = 2",
         classOf[HashOuterJoin]),
-      ("SELECT * FROM testData right join testData2 ON key = a and key=2", 
+      ("SELECT * FROM testData right join testData2 ON key = a and key = 2",
         classOf[HashOuterJoin]),
       ("SELECT * FROM testData full outer join testData2 ON key = a", classOf[HashOuterJoin]),
-      ("SELECT * FROM testData join testData2 ON key = a", classOf[ShuffledHashJoin]),
-      ("SELECT * FROM testData join testData2 ON key = a and key=2", classOf[ShuffledHashJoin]),
-      ("SELECT * FROM testData join testData2 ON key = a where key=2", classOf[ShuffledHashJoin])
-    // TODO add BroadcastNestedLoopJoin
-    )
-    cases1.foreach { c => assertJoin(c._1, c._2) }
+      ("SELECT * FROM testData left JOIN testData2 ON (key * a != key + a)",
+        classOf[BroadcastNestedLoopJoin]),
+      ("SELECT * FROM testData right JOIN testData2 ON (key * a != key + a)",
+        classOf[BroadcastNestedLoopJoin]),
+      ("SELECT * FROM testData full JOIN testData2 ON (key * a != key + a)",
+        classOf[BroadcastNestedLoopJoin])
+    ).foreach { case (query, joinClass) => assertJoin(query, joinClass) }
+  }
+
+  test("broadcasted hash join operator selection") {
+    cacheManager.clearCache()
+    sql("CACHE TABLE testData")
+
+    Seq(
+      ("SELECT * FROM testData join testData2 ON key = a", classOf[BroadcastHashJoin]),
+      ("SELECT * FROM testData join testData2 ON key = a and key = 2", classOf[BroadcastHashJoin]),
+      ("SELECT * FROM testData join testData2 ON key = a where key = 2", classOf[BroadcastHashJoin])
+    ).foreach { case (query, joinClass) => assertJoin(query, joinClass) }
+
+    sql("UNCACHE TABLE testData")
   }
 
   test("multiple-key equi-join is hash-join") {
-    val x = testData2.as('x)
-    val y = testData2.as('y)
-    val join = x.join(y, Inner,
-      Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr)).queryExecution.analyzed
+    val x = testData2.as("x")
+    val y = testData2.as("y")
+    val join = x.join(y, ($"x.a" === $"y.a") && ($"x.b" === $"y.b")).queryExecution.analyzed
     val planned = planner.HashJoin(join)
     assert(planned.size === 1)
   }
 
   test("inner join where, one match per row") {
     checkAnswer(
-      upperCaseData.join(lowerCaseData, Inner).where('n === 'N),
+      upperCaseData.join(lowerCaseData).where('n === 'N),
       Seq(
-        (1, "A", 1, "a"),
-        (2, "B", 2, "b"),
-        (3, "C", 3, "c"),
-        (4, "D", 4, "d")
+        Row(1, "A", 1, "a"),
+        Row(2, "B", 2, "b"),
+        Row(3, "C", 3, "c"),
+        Row(4, "D", 4, "d")
       ))
   }
 
   test("inner join ON, one match per row") {
     checkAnswer(
-      upperCaseData.join(lowerCaseData, Inner, Some('n === 'N)),
+      upperCaseData.join(lowerCaseData, $"n" === $"N"),
       Seq(
-        (1, "A", 1, "a"),
-        (2, "B", 2, "b"),
-        (3, "C", 3, "c"),
-        (4, "D", 4, "d")
+        Row(1, "A", 1, "a"),
+        Row(2, "B", 2, "b"),
+        Row(3, "C", 3, "c"),
+        Row(4, "D", 4, "d")
       ))
   }
 
   test("inner join, where, multiple matches") {
-    val x = testData2.where('a === 1).as('x)
-    val y = testData2.where('a === 1).as('y)
+    val x = testData2.where($"a" === 1).as("x")
+    val y = testData2.where($"a" === 1).as("y")
     checkAnswer(
-      x.join(y).where("x.a".attr === "y.a".attr),
-      (1,1,1,1) ::
-      (1,1,1,2) ::
-      (1,2,1,1) ::
-      (1,2,1,2) :: Nil
+      x.join(y).where($"x.a" === $"y.a"),
+      Row(1,1,1,1) ::
+      Row(1,1,1,2) ::
+      Row(1,2,1,1) ::
+      Row(1,2,1,2) :: Nil
     )
   }
 
   test("inner join, no matches") {
-    val x = testData2.where('a === 1).as('x)
-    val y = testData2.where('a === 2).as('y)
+    val x = testData2.where($"a" === 1).as("x")
+    val y = testData2.where($"a" === 2).as("y")
     checkAnswer(
-      x.join(y).where("x.a".attr === "y.a".attr),
+      x.join(y).where($"x.a" === $"y.a"),
       Nil)
   }
 
   test("big inner join, 4 matches per row") {
     val bigData = testData.unionAll(testData).unionAll(testData).unionAll(testData)
-    val bigDataX = bigData.as('x)
-    val bigDataY = bigData.as('y)
+    val bigDataX = bigData.as("x")
+    val bigDataY = bigData.as("y")
 
     checkAnswer(
-      bigDataX.join(bigDataY).where("x.key".attr === "y.key".attr),
-      testData.flatMap(
-        row => Seq.fill(16)((row ++ row).toSeq)).collect().toSeq)
+      bigDataX.join(bigDataY).where($"x.key" === $"y.key"),
+      testData.rdd.flatMap(row => Seq.fill(16)(Row.merge(row, row))).collect().toSeq)
   }
 
   test("cartisian product join") {
     checkAnswer(
       testData3.join(testData3),
-      (1, null, 1, null) ::
-      (1, null, 2, 2) ::
-      (2, 2, 1, null) ::
-      (2, 2, 2, 2) :: Nil)
+      Row(1, null, 1, null) ::
+        Row(1, null, 2, 2) ::
+        Row(2, 2, 1, null) ::
+        Row(2, 2, 2, 2) :: Nil)
   }
 
   test("left outer join") {
     checkAnswer(
-      upperCaseData.join(lowerCaseData, LeftOuter, Some('n === 'N)),
-      (1, "A", 1, "a") ::
-      (2, "B", 2, "b") ::
-      (3, "C", 3, "c") ::
-      (4, "D", 4, "d") ::
-      (5, "E", null, null) ::
-      (6, "F", null, null) :: Nil)
-    
+      upperCaseData.join(lowerCaseData, $"n" === $"N", "left"),
+      Row(1, "A", 1, "a") ::
+        Row(2, "B", 2, "b") ::
+        Row(3, "C", 3, "c") ::
+        Row(4, "D", 4, "d") ::
+        Row(5, "E", null, null) ::
+        Row(6, "F", null, null) :: Nil)
+
     checkAnswer(
-      upperCaseData.join(lowerCaseData, LeftOuter, Some('n === 'N && 'n > 1)),
-      (1, "A", null, null) ::
-      (2, "B", 2, "b") ::
-      (3, "C", 3, "c") ::
-      (4, "D", 4, "d") ::
-      (5, "E", null, null) ::
-      (6, "F", null, null) :: Nil)
-    
+      upperCaseData.join(lowerCaseData, $"n" === $"N" && $"n" > 1, "left"),
+      Row(1, "A", null, null) ::
+        Row(2, "B", 2, "b") ::
+        Row(3, "C", 3, "c") ::
+        Row(4, "D", 4, "d") ::
+        Row(5, "E", null, null) ::
+        Row(6, "F", null, null) :: Nil)
+
     checkAnswer(
-      upperCaseData.join(lowerCaseData, LeftOuter, Some('n === 'N && 'N > 1)),
-      (1, "A", null, null) ::
-      (2, "B", 2, "b") ::
-      (3, "C", 3, "c") ::
-      (4, "D", 4, "d") ::
-      (5, "E", null, null) ::
-      (6, "F", null, null) :: Nil)
-    
+      upperCaseData.join(lowerCaseData, $"n" === $"N" && $"N" > 1, "left"),
+      Row(1, "A", null, null) ::
+        Row(2, "B", 2, "b") ::
+        Row(3, "C", 3, "c") ::
+        Row(4, "D", 4, "d") ::
+        Row(5, "E", null, null) ::
+        Row(6, "F", null, null) :: Nil)
+
     checkAnswer(
-      upperCaseData.join(lowerCaseData, LeftOuter, Some('n === 'N && 'l > 'L)),
-      (1, "A", 1, "a") ::
-      (2, "B", 2, "b") ::
-      (3, "C", 3, "c") ::
-      (4, "D", 4, "d") ::
-      (5, "E", null, null) ::
-      (6, "F", null, null) :: Nil)
+      upperCaseData.join(lowerCaseData, $"n" === $"N" && $"l" > $"L", "left"),
+      Row(1, "A", 1, "a") ::
+        Row(2, "B", 2, "b") ::
+        Row(3, "C", 3, "c") ::
+        Row(4, "D", 4, "d") ::
+        Row(5, "E", null, null) ::
+        Row(6, "F", null, null) :: Nil)
 
     // Make sure we are choosing left.outputPartitioning as the
     // outputPartitioning for the outer join operator.
@@ -208,12 +220,12 @@ class JoinSuite extends QueryTest with BeforeAndAfterEach {
           |FROM upperCaseData l LEFT OUTER JOIN allNulls r ON (l.N = r.a)
           |GROUP BY l.N
         """.stripMargin),
-      (1, 1) ::
-      (2, 1) ::
-      (3, 1) ::
-      (4, 1) ::
-      (5, 1) ::
-      (6, 1) :: Nil)
+      Row(1, 1) ::
+        Row(2, 1) ::
+        Row(3, 1) ::
+        Row(4, 1) ::
+        Row(5, 1) ::
+        Row(6, 1) :: Nil)
 
     checkAnswer(
       sql(
@@ -222,42 +234,42 @@ class JoinSuite extends QueryTest with BeforeAndAfterEach {
           |FROM upperCaseData l LEFT OUTER JOIN allNulls r ON (l.N = r.a)
           |GROUP BY r.a
         """.stripMargin),
-      (null, 6) :: Nil)
+      Row(null, 6) :: Nil)
   }
 
   test("right outer join") {
     checkAnswer(
-      lowerCaseData.join(upperCaseData, RightOuter, Some('n === 'N)),
-      (1, "a", 1, "A") ::
-      (2, "b", 2, "B") ::
-      (3, "c", 3, "C") ::
-      (4, "d", 4, "D") ::
-      (null, null, 5, "E") ::
-      (null, null, 6, "F") :: Nil)
+      lowerCaseData.join(upperCaseData, $"n" === $"N", "right"),
+      Row(1, "a", 1, "A") ::
+        Row(2, "b", 2, "B") ::
+        Row(3, "c", 3, "C") ::
+        Row(4, "d", 4, "D") ::
+        Row(null, null, 5, "E") ::
+        Row(null, null, 6, "F") :: Nil)
     checkAnswer(
-      lowerCaseData.join(upperCaseData, RightOuter, Some('n === 'N && 'n > 1)),
-      (null, null, 1, "A") ::
-      (2, "b", 2, "B") ::
-      (3, "c", 3, "C") ::
-      (4, "d", 4, "D") ::
-      (null, null, 5, "E") ::
-      (null, null, 6, "F") :: Nil)
+      lowerCaseData.join(upperCaseData, $"n" === $"N" && $"n" > 1, "right"),
+      Row(null, null, 1, "A") ::
+        Row(2, "b", 2, "B") ::
+        Row(3, "c", 3, "C") ::
+        Row(4, "d", 4, "D") ::
+        Row(null, null, 5, "E") ::
+        Row(null, null, 6, "F") :: Nil)
     checkAnswer(
-      lowerCaseData.join(upperCaseData, RightOuter, Some('n === 'N && 'N > 1)),
-      (null, null, 1, "A") ::
-      (2, "b", 2, "B") ::
-      (3, "c", 3, "C") ::
-      (4, "d", 4, "D") ::
-      (null, null, 5, "E") ::
-      (null, null, 6, "F") :: Nil)
+      lowerCaseData.join(upperCaseData, $"n" === $"N" && $"N" > 1, "right"),
+      Row(null, null, 1, "A") ::
+        Row(2, "b", 2, "B") ::
+        Row(3, "c", 3, "C") ::
+        Row(4, "d", 4, "D") ::
+        Row(null, null, 5, "E") ::
+        Row(null, null, 6, "F") :: Nil)
     checkAnswer(
-      lowerCaseData.join(upperCaseData, RightOuter, Some('n === 'N && 'l > 'L)),
-      (1, "a", 1, "A") ::
-      (2, "b", 2, "B") ::
-      (3, "c", 3, "C") ::
-      (4, "d", 4, "D") ::
-      (null, null, 5, "E") ::
-      (null, null, 6, "F") :: Nil)
+      lowerCaseData.join(upperCaseData, $"n" === $"N" && $"l" > $"L", "right"),
+      Row(1, "a", 1, "A") ::
+        Row(2, "b", 2, "B") ::
+        Row(3, "c", 3, "C") ::
+        Row(4, "d", 4, "D") ::
+        Row(null, null, 5, "E") ::
+        Row(null, null, 6, "F") :: Nil)
 
     // Make sure we are choosing right.outputPartitioning as the
     // outputPartitioning for the outer join operator.
@@ -268,7 +280,7 @@ class JoinSuite extends QueryTest with BeforeAndAfterEach {
           |FROM allNulls l RIGHT OUTER JOIN upperCaseData r ON (l.a = r.N)
           |GROUP BY l.a
         """.stripMargin),
-      (null, 6) :: Nil)
+      Row(null, 6))
 
     checkAnswer(
       sql(
@@ -277,49 +289,49 @@ class JoinSuite extends QueryTest with BeforeAndAfterEach {
           |FROM allNulls l RIGHT OUTER JOIN upperCaseData r ON (l.a = r.N)
           |GROUP BY r.N
         """.stripMargin),
-      (1, 1) ::
-      (2, 1) ::
-      (3, 1) ::
-      (4, 1) ::
-      (5, 1) ::
-      (6, 1) :: Nil)
+      Row(1, 1) ::
+        Row(2, 1) ::
+        Row(3, 1) ::
+        Row(4, 1) ::
+        Row(5, 1) ::
+        Row(6, 1) :: Nil)
   }
 
   test("full outer join") {
     upperCaseData.where('N <= 4).registerTempTable("left")
     upperCaseData.where('N >= 3).registerTempTable("right")
 
-    val left = UnresolvedRelation(None, "left", None)
-    val right = UnresolvedRelation(None, "right", None)
+    val left = UnresolvedRelation(Seq("left"), None)
+    val right = UnresolvedRelation(Seq("right"), None)
 
     checkAnswer(
-      left.join(right, FullOuter, Some("left.N".attr === "right.N".attr)),
-      (1, "A", null, null) ::
-      (2, "B", null, null) ::
-      (3, "C", 3, "C") ::
-      (4, "D", 4, "D") ::
-      (null, null, 5, "E") ::
-      (null, null, 6, "F") :: Nil)
-    
+      left.join(right, $"left.N" === $"right.N", "full"),
+      Row(1, "A", null, null) ::
+        Row(2, "B", null, null) ::
+        Row(3, "C", 3, "C") ::
+        Row(4, "D", 4, "D") ::
+        Row(null, null, 5, "E") ::
+        Row(null, null, 6, "F") :: Nil)
+
     checkAnswer(
-      left.join(right, FullOuter, Some(("left.N".attr === "right.N".attr) && ("left.N".attr !== 3))),
-      (1, "A", null, null) ::
-      (2, "B", null, null) ::
-      (3, "C", null, null) ::
-      (null, null, 3, "C") ::
-      (4, "D", 4, "D") ::
-      (null, null, 5, "E") ::
-      (null, null, 6, "F") :: Nil)
-    
+      left.join(right, ($"left.N" === $"right.N") && ($"left.N" !== 3), "full"),
+      Row(1, "A", null, null) ::
+        Row(2, "B", null, null) ::
+        Row(3, "C", null, null) ::
+        Row(null, null, 3, "C") ::
+        Row(4, "D", 4, "D") ::
+        Row(null, null, 5, "E") ::
+        Row(null, null, 6, "F") :: Nil)
+
     checkAnswer(
-      left.join(right, FullOuter, Some(("left.N".attr === "right.N".attr) && ("right.N".attr !== 3))),
-      (1, "A", null, null) ::
-      (2, "B", null, null) ::
-      (3, "C", null, null) ::
-      (null, null, 3, "C") ::
-      (4, "D", 4, "D") ::
-      (null, null, 5, "E") ::
-      (null, null, 6, "F") :: Nil)
+      left.join(right, ($"left.N" === $"right.N") && ($"right.N" !== 3), "full"),
+      Row(1, "A", null, null) ::
+        Row(2, "B", null, null) ::
+        Row(3, "C", null, null) ::
+        Row(null, null, 3, "C") ::
+        Row(4, "D", 4, "D") ::
+        Row(null, null, 5, "E") ::
+        Row(null, null, 6, "F") :: Nil)
 
     // Make sure we are UnknownPartitioning as the outputPartitioning for the outer join operator.
     checkAnswer(
@@ -329,7 +341,7 @@ class JoinSuite extends QueryTest with BeforeAndAfterEach {
           |FROM allNulls l FULL OUTER JOIN upperCaseData r ON (l.a = r.N)
           |GROUP BY l.a
         """.stripMargin),
-      (null, 10) :: Nil)
+      Row(null, 10))
 
     checkAnswer(
       sql(
@@ -338,13 +350,13 @@ class JoinSuite extends QueryTest with BeforeAndAfterEach {
           |FROM allNulls l FULL OUTER JOIN upperCaseData r ON (l.a = r.N)
           |GROUP BY r.N
         """.stripMargin),
-      (1, 1) ::
-      (2, 1) ::
-      (3, 1) ::
-      (4, 1) ::
-      (5, 1) ::
-      (6, 1) ::
-      (null, 4) :: Nil)
+      Row(1, 1) ::
+        Row(2, 1) ::
+        Row(3, 1) ::
+        Row(4, 1) ::
+        Row(5, 1) ::
+        Row(6, 1) ::
+        Row(null, 4) :: Nil)
 
     checkAnswer(
       sql(
@@ -353,13 +365,13 @@ class JoinSuite extends QueryTest with BeforeAndAfterEach {
           |FROM upperCaseData l FULL OUTER JOIN allNulls r ON (l.N = r.a)
           |GROUP BY l.N
         """.stripMargin),
-      (1, 1) ::
-      (2, 1) ::
-      (3, 1) ::
-      (4, 1) ::
-      (5, 1) ::
-      (6, 1) ::
-      (null, 4) :: Nil)
+      Row(1, 1) ::
+        Row(2, 1) ::
+        Row(3, 1) ::
+        Row(4, 1) ::
+        Row(5, 1) ::
+        Row(6, 1) ::
+        Row(null, 4) :: Nil)
 
     checkAnswer(
       sql(
@@ -368,6 +380,43 @@ class JoinSuite extends QueryTest with BeforeAndAfterEach {
           |FROM upperCaseData l FULL OUTER JOIN allNulls r ON (l.N = r.a)
           |GROUP BY r.a
         """.stripMargin),
-      (null, 10) :: Nil)
+      Row(null, 10))
+  }
+
+  test("broadcasted left semi join operator selection") {
+    cacheManager.clearCache()
+    sql("CACHE TABLE testData")
+    val tmp = conf.autoBroadcastJoinThreshold
+
+    sql(s"SET ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD}=1000000000")
+    Seq(
+      ("SELECT * FROM testData LEFT SEMI JOIN testData2 ON key = a",
+        classOf[BroadcastLeftSemiJoinHash])
+    ).foreach {
+      case (query, joinClass) => assertJoin(query, joinClass)
+    }
+
+    sql(s"SET ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD}=-1")
+
+    Seq(
+      ("SELECT * FROM testData LEFT SEMI JOIN testData2 ON key = a", classOf[LeftSemiJoinHash])
+    ).foreach {
+      case (query, joinClass) => assertJoin(query, joinClass)
+    }
+
+    setConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD, tmp.toString)
+    sql("UNCACHE TABLE testData")
+  }
+
+  test("left semi join") {
+    val rdd = sql("SELECT * FROM testData2 LEFT SEMI JOIN testData ON key = a")
+    checkAnswer(rdd,
+      Row(1, 1) ::
+        Row(1, 2) ::
+        Row(2, 1) ::
+        Row(2, 2) ::
+        Row(3, 1) ::
+        Row(3, 2) :: Nil)
+
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index 1fd8d27b34c59..a7f2faa3ecf75 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -19,24 +19,48 @@ package org.apache.spark.sql
 
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.columnar.InMemoryRelation
 
 class QueryTest extends PlanTest {
+
   /**
-   * Runs the plan and makes sure the answer matches the expected result.
-   * @param rdd the [[SchemaRDD]] to be executed
-   * @param expectedAnswer the expected result, can either be an Any, Seq[Product], or Seq[ Seq[Any] ].
+   * Runs the plan and makes sure the answer contains all of the keywords, or the
+   * none of keywords are listed in the answer
+   * @param rdd the [[DataFrame]] to be executed
+   * @param exists true for make sure the keywords are listed in the output, otherwise
+   *               to make sure none of the keyword are not listed in the output
+   * @param keywords keyword in string array
    */
-  protected def checkAnswer(rdd: SchemaRDD, expectedAnswer: Any): Unit = {
-    val convertedAnswer = expectedAnswer match {
-      case s: Seq[_] if s.isEmpty => s
-      case s: Seq[_] if s.head.isInstanceOf[Product] &&
-        !s.head.isInstanceOf[Seq[_]] => s.map(_.asInstanceOf[Product].productIterator.toIndexedSeq)
-      case s: Seq[_] => s
-      case singleItem => Seq(Seq(singleItem))
+  def checkExistence(rdd: DataFrame, exists: Boolean, keywords: String*) {
+    val outputs = rdd.collect().map(_.mkString).mkString
+    for (key <- keywords) {
+      if (exists) {
+        assert(outputs.contains(key), s"Failed for $rdd ($key doens't exist in result)")
+      } else {
+        assert(!outputs.contains(key), s"Failed for $rdd ($key existed in the result)")
+      }
     }
+  }
 
+  /**
+   * Runs the plan and makes sure the answer matches the expected result.
+   * @param rdd the [[DataFrame]] to be executed
+   * @param expectedAnswer the expected result, can either be an Any, Seq[Product], or Seq[ Seq[Any] ].
+   */
+  protected def checkAnswer(rdd: DataFrame, expectedAnswer: Seq[Row]): Unit = {
     val isSorted = rdd.logicalPlan.collect { case s: logical.Sort => s }.nonEmpty
-    def prepareAnswer(answer: Seq[Any]) = if (!isSorted) answer.sortBy(_.toString) else answer
+    def prepareAnswer(answer: Seq[Row]): Seq[Row] = {
+      // Converts data to types that we can do equality comparison using Scala collections.
+      // For BigDecimal type, the Scala type has a better definition of equality test (similar to
+      // Java's java.math.BigDecimal.compareTo).
+      val converted: Seq[Row] = answer.map { s =>
+        Row.fromSeq(s.toSeq.map {
+          case d: java.math.BigDecimal => BigDecimal(d)
+          case o => o
+        })
+      }
+      if (!isSorted) converted.sortBy(_.toString) else converted
+    }
     val sparkAnswer = try rdd.collect().toSeq catch {
       case e: Exception =>
         fail(
@@ -49,7 +73,7 @@ class QueryTest extends PlanTest {
           """.stripMargin)
     }
 
-    if (prepareAnswer(convertedAnswer) != prepareAnswer(sparkAnswer)) {
+    if (prepareAnswer(expectedAnswer) != prepareAnswer(sparkAnswer)) {
       fail(s"""
         |Results do not match for query:
         |${rdd.logicalPlan}
@@ -59,11 +83,37 @@ class QueryTest extends PlanTest {
         |${rdd.queryExecution.executedPlan}
         |== Results ==
         |${sideBySide(
-            s"== Correct Answer - ${convertedAnswer.size} ==" +:
-              prepareAnswer(convertedAnswer).map(_.toString),
-            s"== Spark Answer - ${sparkAnswer.size} ==" +:
-              prepareAnswer(sparkAnswer).map(_.toString)).mkString("\n")}
+        s"== Correct Answer - ${expectedAnswer.size} ==" +:
+          prepareAnswer(expectedAnswer).map(_.toString),
+        s"== Spark Answer - ${sparkAnswer.size} ==" +:
+          prepareAnswer(sparkAnswer).map(_.toString)).mkString("\n")}
       """.stripMargin)
     }
   }
+
+  protected def checkAnswer(rdd: DataFrame, expectedAnswer: Row): Unit = {
+    checkAnswer(rdd, Seq(expectedAnswer))
+  }
+
+  def sqlTest(sqlString: String, expectedAnswer: Seq[Row])(implicit sqlContext: SQLContext): Unit = {
+    test(sqlString) {
+      checkAnswer(sqlContext.sql(sqlString), expectedAnswer)
+    }
+  }
+
+  /**
+   * Asserts that a given [[DataFrame]] will be executed using the given number of cached results.
+   */
+  def assertCached(query: DataFrame, numCachedTables: Int = 1): Unit = {
+    val planWithCaching = query.queryExecution.withCachedData
+    val cachedData = planWithCaching collect {
+      case cached: InMemoryRelation => cached
+    }
+
+    assert(
+      cachedData.size == numCachedTables,
+      s"Expected query to contain $numCachedTables, but it actually had ${cachedData.size}\n" +
+        planWithCaching)
+  }
+
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala
index 811319e0a6601..f5b945f468dad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql
 import org.scalatest.FunSuite
 
 import org.apache.spark.sql.catalyst.expressions.{GenericMutableRow, SpecificMutableRow}
+import org.apache.spark.sql.types._
 
 class RowSuite extends FunSuite {
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala
index 60701f0e154f8..bf73d0c7074a5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala
@@ -37,7 +37,7 @@ class SQLConfSuite extends QueryTest with FunSuiteLike {
   }
 
   test("programmatic ways of basic setting and getting") {
-    clear()
+    conf.clear()
     assert(getAllConfs.size === 0)
 
     setConf(testKey, testVal)
@@ -51,11 +51,11 @@ class SQLConfSuite extends QueryTest with FunSuiteLike {
     assert(TestSQLContext.getConf(testKey, testVal + "_") == testVal)
     assert(TestSQLContext.getAllConfs.contains(testKey))
 
-    clear()
+    conf.clear()
   }
 
   test("parse SQL set commands") {
-    clear()
+    conf.clear()
     sql(s"set $testKey=$testVal")
     assert(getConf(testKey, testVal + "_") == testVal)
     assert(TestSQLContext.getConf(testKey, testVal + "_") == testVal)
@@ -73,11 +73,11 @@ class SQLConfSuite extends QueryTest with FunSuiteLike {
     sql(s"set $key=")
     assert(getConf(key, "0") == "")
 
-    clear()
+    conf.clear()
   }
 
   test("deprecated property") {
-    clear()
+    conf.clear()
     sql(s"set ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS}=10")
     assert(getConf(SQLConf.SHUFFLE_PARTITIONS) == "10")
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 3959925a2e529..dc8ee41712fcd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -17,32 +17,43 @@
 
 package org.apache.spark.sql
 
+import java.util.TimeZone
+
+import org.apache.spark.sql.test.TestSQLContext
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.sql.Dsl._
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
-import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.execution.joins.BroadcastHashJoin
-import org.apache.spark.sql.test._
-import org.scalatest.BeforeAndAfterAll
-import java.util.TimeZone
+import org.apache.spark.sql.types._
+
+import org.apache.spark.sql.TestData._
+import org.apache.spark.sql.test.TestSQLContext.{udf => _, _}
 
-/* Implicits */
-import TestSQLContext._
-import TestData._
 
 class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   // Make sure the tables are loaded.
   TestData
 
+  import org.apache.spark.sql.test.TestSQLContext.implicits._
+
   var origZone: TimeZone = _
   override protected def beforeAll() {
     origZone = TimeZone.getDefault
-    TimeZone.setDefault(TimeZone.getTimeZone("UTC"))
+    TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
   }
 
   override protected def afterAll() {
     TimeZone.setDefault(origZone)
   }
 
+  test("SPARK-4625 support SORT BY in SimpleSQLParser & DSL") {
+    checkAnswer(
+      sql("SELECT a FROM testData2 SORT BY a"),
+      Seq(1, 1, 2 ,2 ,3 ,3).map(Row(_))
+    )
+  }
+
   test("grouping on nested fields") {
     jsonRDD(sparkContext.parallelize("""{"nested": {"attribute": 1}, "value": 2}""" :: Nil))
      .registerTempTable("rows")
@@ -63,25 +74,44 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   test("SPARK-3176 Added Parser of SQL ABS()") {
     checkAnswer(
       sql("SELECT ABS(-1.3)"),
-      1.3)
+      Row(1.3))
     checkAnswer(
       sql("SELECT ABS(0.0)"),
-      0.0)
+      Row(0.0))
     checkAnswer(
       sql("SELECT ABS(2.5)"),
-      2.5)
+      Row(2.5))
+  }
+
+  test("aggregation with codegen") {
+    val originalValue = conf.codegenEnabled
+    setConf(SQLConf.CODEGEN_ENABLED, "true")
+    sql("SELECT key FROM testData GROUP BY key").collect()
+    setConf(SQLConf.CODEGEN_ENABLED, originalValue.toString)
+  }
+
+  test("Add Parser of SQL COALESCE()") {
+    checkAnswer(
+      sql("""SELECT COALESCE(1, 2)"""),
+      Row(1))
+    checkAnswer(
+      sql("SELECT COALESCE(null, 1, 1.5)"),
+      Row(1.toDouble))
+    checkAnswer(
+      sql("SELECT COALESCE(null, null, null)"),
+      Row(null))
   }
 
   test("SPARK-3176 Added Parser of SQL LAST()") {
     checkAnswer(
       sql("SELECT LAST(n) FROM lowerCaseData"),
-      4)
+      Row(4))
   }
 
   test("SPARK-2041 column name equals tablename") {
     checkAnswer(
       sql("SELECT tableName FROM tableName"),
-      "test")
+      Row("test"))
   }
 
   test("SQRT") {
@@ -101,40 +131,40 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   test("SPARK-2407 Added Parser of SQL SUBSTR()") {
     checkAnswer(
       sql("SELECT substr(tableName, 1, 2) FROM tableName"),
-      "te")
+      Row("te"))
     checkAnswer(
       sql("SELECT substr(tableName, 3) FROM tableName"),
-      "st")
+      Row("st"))
     checkAnswer(
       sql("SELECT substring(tableName, 1, 2) FROM tableName"),
-      "te")
+      Row("te"))
     checkAnswer(
       sql("SELECT substring(tableName, 3) FROM tableName"),
-      "st")
+      Row("st"))
   }
 
   test("SPARK-3173 Timestamp support in the parser") {
     checkAnswer(sql(
-      "SELECT time FROM timestamps WHERE time=CAST('1970-01-01 00:00:00.001' AS TIMESTAMP)"),
-      Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.001"))))
+      "SELECT time FROM timestamps WHERE time=CAST('1969-12-31 16:00:00.001' AS TIMESTAMP)"),
+      Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00.001")))
 
     checkAnswer(sql(
-      "SELECT time FROM timestamps WHERE time='1970-01-01 00:00:00.001'"),
-      Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.001"))))
+      "SELECT time FROM timestamps WHERE time='1969-12-31 16:00:00.001'"),
+      Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00.001")))
 
     checkAnswer(sql(
-      "SELECT time FROM timestamps WHERE '1970-01-01 00:00:00.001'=time"),
-      Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.001"))))
+      "SELECT time FROM timestamps WHERE '1969-12-31 16:00:00.001'=time"),
+      Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00.001")))
 
     checkAnswer(sql(
-      """SELECT time FROM timestamps WHERE time<'1970-01-01 00:00:00.003'
-          AND time>'1970-01-01 00:00:00.001'"""),
-      Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.002"))))
+      """SELECT time FROM timestamps WHERE time<'1969-12-31 16:00:00.003'
+          AND time>'1969-12-31 16:00:00.001'"""),
+      Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00.002")))
 
     checkAnswer(sql(
-      "SELECT time FROM timestamps WHERE time IN ('1970-01-01 00:00:00.001','1970-01-01 00:00:00.002')"),
-      Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.001")),
-        Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.002"))))
+      "SELECT time FROM timestamps WHERE time IN ('1969-12-31 16:00:00.001','1969-12-31 16:00:00.002')"),
+      Seq(Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00.001")),
+        Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00.002"))))
 
     checkAnswer(sql(
       "SELECT time FROM timestamps WHERE time='123'"),
@@ -144,13 +174,13 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   test("index into array") {
     checkAnswer(
       sql("SELECT data, data[0], data[0] + data[1], data[0 + 1] FROM arrayData"),
-      arrayData.map(d => (d.data, d.data(0), d.data(0) + d.data(1), d.data(1))).collect().toSeq)
+      arrayData.map(d => Row(d.data, d.data(0), d.data(0) + d.data(1), d.data(1))).collect())
   }
 
   test("left semi greater than predicate") {
     checkAnswer(
       sql("SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.a >= y.a + 2"),
-      Seq((3,1), (3,2))
+      Seq(Row(3,1), Row(3,2))
     )
   }
 
@@ -159,7 +189,7 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
       sql(
         "SELECT nestedData, nestedData[0][0], nestedData[0][0] + nestedData[0][1] FROM arrayData"),
       arrayData.map(d =>
-        (d.nestedData,
+        Row(d.nestedData,
          d.nestedData(0)(0),
          d.nestedData(0)(0) + d.nestedData(0)(1))).collect().toSeq)
   }
@@ -167,13 +197,22 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   test("agg") {
     checkAnswer(
       sql("SELECT a, SUM(b) FROM testData2 GROUP BY a"),
-      Seq((1,3),(2,3),(3,3)))
+      Seq(Row(1,3), Row(2,3), Row(3,3)))
+  }
+
+  test("literal in agg grouping expressions") {
+    checkAnswer(
+      sql("SELECT a, count(1) FROM testData2 GROUP BY a, 1"),
+      Seq(Row(1,2), Row(2,2), Row(3,2)))
+    checkAnswer(
+      sql("SELECT a, count(2) FROM testData2 GROUP BY a, 2"),
+      Seq(Row(1,2), Row(2,2), Row(3,2)))
   }
 
   test("aggregates with nulls") {
     checkAnswer(
       sql("SELECT MIN(a), MAX(a), AVG(a), SUM(a), COUNT(a) FROM nullInts"),
-      (1, 3, 2, 6, 3) :: Nil
+      Row(1, 3, 2, 6, 3)
     )
   }
 
@@ -186,29 +225,29 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   test("simple select") {
     checkAnswer(
       sql("SELECT value FROM testData WHERE key = 1"),
-      Seq(Seq("1")))
+      Row("1"))
   }
 
-  test("sorting") {
+  def sortTest() = {
     checkAnswer(
       sql("SELECT * FROM testData2 ORDER BY a ASC, b ASC"),
-      Seq((1,1), (1,2), (2,1), (2,2), (3,1), (3,2)))
+      Seq(Row(1,1), Row(1,2), Row(2,1), Row(2,2), Row(3,1), Row(3,2)))
 
     checkAnswer(
       sql("SELECT * FROM testData2 ORDER BY a ASC, b DESC"),
-      Seq((1,2), (1,1), (2,2), (2,1), (3,2), (3,1)))
+      Seq(Row(1,2), Row(1,1), Row(2,2), Row(2,1), Row(3,2), Row(3,1)))
 
     checkAnswer(
       sql("SELECT * FROM testData2 ORDER BY a DESC, b DESC"),
-      Seq((3,2), (3,1), (2,2), (2,1), (1,2), (1,1)))
+      Seq(Row(3,2), Row(3,1), Row(2,2), Row(2,1), Row(1,2), Row(1,1)))
 
     checkAnswer(
       sql("SELECT * FROM testData2 ORDER BY a DESC, b ASC"),
-      Seq((3,1), (3,2), (2,1), (2,2), (1,1), (1,2)))
+      Seq(Row(3,1), Row(3,2), Row(2,1), Row(2,2), Row(1,1), Row(1,2)))
 
     checkAnswer(
       sql("SELECT b FROM binaryData ORDER BY a ASC"),
-      (1 to 5).map(Row(_)).toSeq)
+      (1 to 5).map(Row(_)))
 
     checkAnswer(
       sql("SELECT b FROM binaryData ORDER BY a DESC"),
@@ -216,19 +255,33 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
 
     checkAnswer(
       sql("SELECT * FROM arrayData ORDER BY data[0] ASC"),
-      arrayData.collect().sortBy(_.data(0)).toSeq)
+      arrayData.collect().sortBy(_.data(0)).map(Row.fromTuple).toSeq)
 
     checkAnswer(
       sql("SELECT * FROM arrayData ORDER BY data[0] DESC"),
-      arrayData.collect().sortBy(_.data(0)).reverse.toSeq)
+      arrayData.collect().sortBy(_.data(0)).reverse.map(Row.fromTuple).toSeq)
 
     checkAnswer(
       sql("SELECT * FROM mapData ORDER BY data[1] ASC"),
-      mapData.collect().sortBy(_.data(1)).toSeq)
+      mapData.collect().sortBy(_.data(1)).map(Row.fromTuple).toSeq)
 
     checkAnswer(
       sql("SELECT * FROM mapData ORDER BY data[1] DESC"),
-      mapData.collect().sortBy(_.data(1)).reverse.toSeq)
+      mapData.collect().sortBy(_.data(1)).reverse.map(Row.fromTuple).toSeq)
+  }
+
+  test("sorting") {
+    val before = conf.externalSortEnabled
+    setConf(SQLConf.EXTERNAL_SORT, "false")
+    sortTest()
+    setConf(SQLConf.EXTERNAL_SORT, before.toString)
+  }
+
+  test("external sorting") {
+    val before = conf.externalSortEnabled
+    setConf(SQLConf.EXTERNAL_SORT, "true")
+    sortTest()
+    setConf(SQLConf.EXTERNAL_SORT, before.toString)
   }
 
   test("limit") {
@@ -238,78 +291,101 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
 
     checkAnswer(
       sql("SELECT * FROM arrayData LIMIT 1"),
-      arrayData.collect().take(1).toSeq)
+      arrayData.collect().take(1).map(Row.fromTuple).toSeq)
 
     checkAnswer(
       sql("SELECT * FROM mapData LIMIT 1"),
-      mapData.collect().take(1).toSeq)
+      mapData.collect().take(1).map(Row.fromTuple).toSeq)
+  }
+
+  test("date row") {
+    checkAnswer(sql(
+      """select cast("2015-01-28" as date) from testData limit 1"""),
+      Row(java.sql.Date.valueOf("2015-01-28"))
+    )
+  }
+
+  test("from follow multiple brackets") {
+    checkAnswer(sql(
+      "select key from ((select * from testData limit 1) union all (select * from testData limit 1)) x limit 1"),
+      Row(1)
+    )
+
+    checkAnswer(sql(
+      "select key from (select * from testData) x limit 1"),
+      Row(1)
+    )
+
+    checkAnswer(sql(
+      "select key from (select * from testData limit 1 union all select * from testData limit 1) x limit 1"),
+      Row(1)
+    )
   }
 
   test("average") {
     checkAnswer(
       sql("SELECT AVG(a) FROM testData2"),
-      2.0)
+      Row(2.0))
   }
 
   test("average overflow") {
     checkAnswer(
       sql("SELECT AVG(a),b FROM largeAndSmallInts group by b"),
-      Seq((2147483645.0,1),(2.0,2)))
+      Seq(Row(2147483645.0,1), Row(2.0,2)))
   }
 
   test("count") {
     checkAnswer(
       sql("SELECT COUNT(*) FROM testData2"),
-      testData2.count())
+      Row(testData2.count()))
   }
 
   test("count distinct") {
     checkAnswer(
       sql("SELECT COUNT(DISTINCT b) FROM testData2"),
-      2)
+      Row(2))
   }
 
   test("approximate count distinct") {
     checkAnswer(
       sql("SELECT APPROXIMATE COUNT(DISTINCT a) FROM testData2"),
-      3)
+      Row(3))
   }
 
   test("approximate count distinct with user provided standard deviation") {
     checkAnswer(
       sql("SELECT APPROXIMATE(0.04) COUNT(DISTINCT a) FROM testData2"),
-      3)
+      Row(3))
   }
 
-  // No support for primitive nulls yet.
-  ignore("null count") {
+  test("null count") {
     checkAnswer(
-      sql("SELECT a, COUNT(b) FROM testData3"),
-      Seq((1,0), (2, 1)))
+      sql("SELECT a, COUNT(b) FROM testData3 GROUP BY a"),
+      Seq(Row(1, 0), Row(2, 1)))
 
     checkAnswer(
-      testData3.groupBy()(Count('a), Count('b), Count(1), CountDistinct('a :: Nil), CountDistinct('b :: Nil)),
-      (2, 1, 2, 2, 1) :: Nil)
+      sql("SELECT COUNT(a), COUNT(b), COUNT(1), COUNT(DISTINCT a), COUNT(DISTINCT b) FROM testData3"),
+      Row(2, 1, 2, 2, 1))
   }
 
   test("inner join where, one match per row") {
     checkAnswer(
       sql("SELECT * FROM upperCaseData JOIN lowerCaseData WHERE n = N"),
       Seq(
-        (1, "A", 1, "a"),
-        (2, "B", 2, "b"),
-        (3, "C", 3, "c"),
-        (4, "D", 4, "d")))
+        Row(1, "A", 1, "a"),
+        Row(2, "B", 2, "b"),
+        Row(3, "C", 3, "c"),
+        Row(4, "D", 4, "d")))
   }
 
   test("inner join ON, one match per row") {
     checkAnswer(
       sql("SELECT * FROM upperCaseData JOIN lowerCaseData ON n = N"),
       Seq(
-        (1, "A", 1, "a"),
-        (2, "B", 2, "b"),
-        (3, "C", 3, "c"),
-        (4, "D", 4, "d")))
+        Row(1, "A", 1, "a"),
+        Row(2, "B", 2, "b"),
+        Row(3, "C", 3, "c"),
+        Row(4, "D", 4, "d")))
   }
 
   test("inner join, where, multiple matches") {
@@ -319,10 +395,10 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
         |  (SELECT * FROM testData2 WHERE a = 1) x JOIN
         |  (SELECT * FROM testData2 WHERE a = 1) y
         |WHERE x.a = y.a""".stripMargin),
-      (1,1,1,1) ::
-      (1,1,1,2) ::
-      (1,2,1,1) ::
-      (1,2,1,2) :: Nil)
+      Row(1,1,1,1) ::
+      Row(1,1,1,2) ::
+      Row(1,2,1,1) ::
+      Row(1,2,1,2) :: Nil)
   }
 
   test("inner join, no matches") {
@@ -337,8 +413,6 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   }
 
   test("big inner join, 4 matches per row") {
-
-
     checkAnswer(
       sql(
         """
@@ -352,39 +426,39 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
           |   SELECT * FROM testData UNION ALL
           |   SELECT * FROM testData) y
           |WHERE x.key = y.key""".stripMargin),
-      testData.flatMap(
-        row => Seq.fill(16)((row ++ row).toSeq)).collect().toSeq)
+      testData.rdd.flatMap(
+        row => Seq.fill(16)(Row.merge(row, row))).collect().toSeq)
   }
 
   ignore("cartesian product join") {
     checkAnswer(
       testData3.join(testData3),
-      (1, null, 1, null) ::
-      (1, null, 2, 2) ::
-      (2, 2, 1, null) ::
-      (2, 2, 2, 2) :: Nil)
+      Row(1, null, 1, null) ::
+      Row(1, null, 2, 2) ::
+      Row(2, 2, 1, null) ::
+      Row(2, 2, 2, 2) :: Nil)
   }
 
   test("left outer join") {
     checkAnswer(
       sql("SELECT * FROM upperCaseData LEFT OUTER JOIN lowerCaseData ON n = N"),
-      (1, "A", 1, "a") ::
-      (2, "B", 2, "b") ::
-      (3, "C", 3, "c") ::
-      (4, "D", 4, "d") ::
-      (5, "E", null, null) ::
-      (6, "F", null, null) :: Nil)
+      Row(1, "A", 1, "a") ::
+      Row(2, "B", 2, "b") ::
+      Row(3, "C", 3, "c") ::
+      Row(4, "D", 4, "d") ::
+      Row(5, "E", null, null) ::
+      Row(6, "F", null, null) :: Nil)
   }
 
   test("right outer join") {
     checkAnswer(
       sql("SELECT * FROM lowerCaseData RIGHT OUTER JOIN upperCaseData ON n = N"),
-      (1, "a", 1, "A") ::
-      (2, "b", 2, "B") ::
-      (3, "c", 3, "C") ::
-      (4, "d", 4, "D") ::
-      (null, null, 5, "E") ::
-      (null, null, 6, "F") :: Nil)
+      Row(1, "a", 1, "A") ::
+      Row(2, "b", 2, "B") ::
+      Row(3, "c", 3, "C") ::
+      Row(4, "d", 4, "D") ::
+      Row(null, null, 5, "E") ::
+      Row(null, null, 6, "F") :: Nil)
   }
 
   test("full outer join") {
@@ -396,12 +470,12 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
           |  (SELECT * FROM upperCaseData WHERE N >= 3) rightTable
           |    ON leftTable.N = rightTable.N
         """.stripMargin),
-      (1, "A", null, null) ::
-      (2, "B", null, null) ::
-      (3, "C", 3, "C") ::
-      (4, "D", 4, "D") ::
-      (null, null, 5, "E") ::
-      (null, null, 6, "F") :: Nil)
+      Row(1, "A", null, null) ::
+      Row(2, "B", null, null) ::
+      Row(3, "C", 3, "C") ::
+      Row (4, "D", 4, "D") ::
+      Row(null, null, 5, "E") ::
+      Row(null, null, 6, "F") :: Nil)
   }
 
   test("SPARK-3349 partitioning after limit") {
@@ -413,12 +487,12 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
       .registerTempTable("subset2")
     checkAnswer(
       sql("SELECT * FROM lowerCaseData INNER JOIN subset1 ON subset1.n = lowerCaseData.n"),
-      (3, "c", 3) ::
-      (4, "d", 4) :: Nil)
+      Row(3, "c", 3) ::
+      Row(4, "d", 4) :: Nil)
     checkAnswer(
       sql("SELECT * FROM lowerCaseData INNER JOIN subset2 ON subset2.n = lowerCaseData.n"),
-      (1, "a", 1) ::
-      (2, "b", 2) :: Nil)
+      Row(1, "a", 1) ::
+      Row(2, "b", 2) :: Nil)
   }
 
   test("mixed-case keywords") {
@@ -430,28 +504,28 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
           |  (sElEcT * FROM upperCaseData whERe N >= 3) rightTable
           |    oN leftTable.N = rightTable.N
         """.stripMargin),
-      (1, "A", null, null) ::
-      (2, "B", null, null) ::
-      (3, "C", 3, "C") ::
-      (4, "D", 4, "D") ::
-      (null, null, 5, "E") ::
-      (null, null, 6, "F") :: Nil)
+      Row(1, "A", null, null) ::
+      Row(2, "B", null, null) ::
+      Row(3, "C", 3, "C") ::
+      Row(4, "D", 4, "D") ::
+      Row(null, null, 5, "E") ::
+      Row(null, null, 6, "F") :: Nil)
   }
 
   test("select with table name as qualifier") {
     checkAnswer(
       sql("SELECT testData.value FROM testData WHERE testData.key = 1"),
-      Seq(Seq("1")))
+      Row("1"))
   }
 
   test("inner join ON with table name as qualifier") {
     checkAnswer(
       sql("SELECT * FROM upperCaseData JOIN lowerCaseData ON lowerCaseData.n = upperCaseData.N"),
       Seq(
-        (1, "A", 1, "a"),
-        (2, "B", 2, "b"),
-        (3, "C", 3, "c"),
-        (4, "D", 4, "d")))
+        Row(1, "A", 1, "a"),
+        Row(2, "B", 2, "b"),
+        Row(3, "C", 3, "c"),
+        Row(4, "D", 4, "d")))
   }
 
   test("qualified select with inner join ON with table name as qualifier") {
@@ -459,72 +533,72 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
       sql("SELECT upperCaseData.N, upperCaseData.L FROM upperCaseData JOIN lowerCaseData " +
         "ON lowerCaseData.n = upperCaseData.N"),
       Seq(
-        (1, "A"),
-        (2, "B"),
-        (3, "C"),
-        (4, "D")))
+        Row(1, "A"),
+        Row(2, "B"),
+        Row(3, "C"),
+        Row(4, "D")))
   }
 
   test("system function upper()") {
     checkAnswer(
       sql("SELECT n,UPPER(l) FROM lowerCaseData"),
       Seq(
-        (1, "A"),
-        (2, "B"),
-        (3, "C"),
-        (4, "D")))
+        Row(1, "A"),
+        Row(2, "B"),
+        Row(3, "C"),
+        Row(4, "D")))
 
     checkAnswer(
       sql("SELECT n, UPPER(s) FROM nullStrings"),
       Seq(
-        (1, "ABC"),
-        (2, "ABC"),
-        (3, null)))
+        Row(1, "ABC"),
+        Row(2, "ABC"),
+        Row(3, null)))
   }
 
   test("system function lower()") {
     checkAnswer(
       sql("SELECT N,LOWER(L) FROM upperCaseData"),
       Seq(
-        (1, "a"),
-        (2, "b"),
-        (3, "c"),
-        (4, "d"),
-        (5, "e"),
-        (6, "f")))
+        Row(1, "a"),
+        Row(2, "b"),
+        Row(3, "c"),
+        Row(4, "d"),
+        Row(5, "e"),
+        Row(6, "f")))
 
     checkAnswer(
       sql("SELECT n, LOWER(s) FROM nullStrings"),
       Seq(
-        (1, "abc"),
-        (2, "abc"),
-        (3, null)))
+        Row(1, "abc"),
+        Row(2, "abc"),
+        Row(3, null)))
   }
 
   test("UNION") {
     checkAnswer(
       sql("SELECT * FROM lowerCaseData UNION SELECT * FROM upperCaseData"),
-      (1, "A") :: (1, "a") :: (2, "B") :: (2, "b") :: (3, "C") :: (3, "c") ::
-      (4, "D") :: (4, "d") :: (5, "E") :: (6, "F") :: Nil)
+      Row(1, "A") :: Row(1, "a") :: Row(2, "B") :: Row(2, "b") :: Row(3, "C") :: Row(3, "c") ::
+      Row(4, "D") :: Row(4, "d") :: Row(5, "E") :: Row(6, "F") :: Nil)
     checkAnswer(
       sql("SELECT * FROM lowerCaseData UNION SELECT * FROM lowerCaseData"),
-      (1, "a") :: (2, "b") :: (3, "c") :: (4, "d") :: Nil)
+      Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Row(4, "d") :: Nil)
     checkAnswer(
       sql("SELECT * FROM lowerCaseData UNION ALL SELECT * FROM lowerCaseData"),
-      (1, "a") :: (1, "a") :: (2, "b") :: (2, "b") :: (3, "c") :: (3, "c") ::
-      (4, "d") :: (4, "d") :: Nil)
+      Row(1, "a") :: Row(1, "a") :: Row(2, "b") :: Row(2, "b") :: Row(3, "c") :: Row(3, "c") ::
+      Row(4, "d") :: Row(4, "d") :: Nil)
   }
 
   test("UNION with column mismatches") {
     // Column name mismatches are allowed.
     checkAnswer(
       sql("SELECT n,l FROM lowerCaseData UNION SELECT N as x1, L as x2 FROM upperCaseData"),
-      (1, "A") :: (1, "a") :: (2, "B") :: (2, "b") :: (3, "C") :: (3, "c") ::
-      (4, "D") :: (4, "d") :: (5, "E") :: (6, "F") :: Nil)
+      Row(1, "A") :: Row(1, "a") :: Row(2, "B") :: Row(2, "b") :: Row(3, "C") :: Row(3, "c") ::
+      Row(4, "D") :: Row(4, "d") :: Row(5, "E") :: Row(6, "F") :: Nil)
     // Column type mismatches are not allowed, forcing a type coercion.
     checkAnswer(
       sql("SELECT n FROM lowerCaseData UNION SELECT L FROM upperCaseData"),
-      ("1" :: "2" :: "3" :: "4" :: "A" :: "B" :: "C" :: "D" :: "E" :: "F" :: Nil).map(Tuple1(_)))
+      ("1" :: "2" :: "3" :: "4" :: "A" :: "B" :: "C" :: "D" :: "E" :: "F" :: Nil).map(Row(_)))
     // Column type mismatches where a coercion is not possible, in this case between integer
     // and array types, trigger a TreeNodeException.
     intercept[TreeNodeException[_]] {
@@ -535,29 +609,29 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   test("EXCEPT") {
     checkAnswer(
       sql("SELECT * FROM lowerCaseData EXCEPT SELECT * FROM upperCaseData"),
-      (1, "a") ::
-      (2, "b") ::
-      (3, "c") ::
-      (4, "d") :: Nil)
+      Row(1, "a") ::
+      Row(2, "b") ::
+      Row(3, "c") ::
+      Row(4, "d") :: Nil)
     checkAnswer(
       sql("SELECT * FROM lowerCaseData EXCEPT SELECT * FROM lowerCaseData"), Nil)
     checkAnswer(
       sql("SELECT * FROM upperCaseData EXCEPT SELECT * FROM upperCaseData"), Nil)
   }
 
- test("INTERSECT") {
+  test("INTERSECT") {
     checkAnswer(
       sql("SELECT * FROM lowerCaseData INTERSECT SELECT * FROM lowerCaseData"),
-      (1, "a") ::
-      (2, "b") ::
-      (3, "c") ::
-      (4, "d") :: Nil)
+      Row(1, "a") ::
+      Row(2, "b") ::
+      Row(3, "c") ::
+      Row(4, "d") :: Nil)
     checkAnswer(
       sql("SELECT * FROM lowerCaseData INTERSECT SELECT * FROM upperCaseData"), Nil)
   }
 
   test("SET commands semantics using sql()") {
-    clear()
+    conf.clear()
     val testKey = "test.key.0"
     val testVal = "test.val.0"
     val nonexistentKey = "nonexistent"
@@ -569,27 +643,27 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
     sql(s"SET $testKey=$testVal")
     checkAnswer(
       sql("SET"),
-      Seq(Seq(s"$testKey=$testVal"))
+      Row(s"$testKey=$testVal")
     )
 
     sql(s"SET ${testKey + testKey}=${testVal + testVal}")
     checkAnswer(
       sql("set"),
       Seq(
-        Seq(s"$testKey=$testVal"),
-        Seq(s"${testKey + testKey}=${testVal + testVal}"))
+        Row(s"$testKey=$testVal"),
+        Row(s"${testKey + testKey}=${testVal + testVal}"))
     )
 
     // "set key"
     checkAnswer(
       sql(s"SET $testKey"),
-      Seq(Seq(s"$testKey=$testVal"))
+      Row(s"$testKey=$testVal")
     )
     checkAnswer(
       sql(s"SET $nonexistentKey"),
-      Seq(Seq(s"$nonexistentKey=<undefined>"))
+      Row(s"$nonexistentKey=<undefined>")
     )
-    clear()
+    conf.clear()
   }
 
   test("apply schema") {
@@ -607,21 +681,21 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
       Row(values(0).toInt, values(1), values(2).toBoolean, v4)
     }
 
-    val schemaRDD1 = applySchema(rowRDD1, schema1)
-    schemaRDD1.registerTempTable("applySchema1")
+    val df1 = applySchema(rowRDD1, schema1)
+    df1.registerTempTable("applySchema1")
     checkAnswer(
       sql("SELECT * FROM applySchema1"),
-      (1, "A1", true, null) ::
-      (2, "B2", false, null) ::
-      (3, "C3", true, null) ::
-      (4, "D4", true, 2147483644) :: Nil)
+      Row(1, "A1", true, null) ::
+      Row(2, "B2", false, null) ::
+      Row(3, "C3", true, null) ::
+      Row(4, "D4", true, 2147483644) :: Nil)
 
     checkAnswer(
       sql("SELECT f1, f4 FROM applySchema1"),
-      (1, null) ::
-      (2, null) ::
-      (3, null) ::
-      (4, 2147483644) :: Nil)
+      Row(1, null) ::
+      Row(2, null) ::
+      Row(3, null) ::
+      Row(4, 2147483644) :: Nil)
 
     val schema2 = StructType(
       StructField("f1", StructType(
@@ -637,21 +711,21 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
       Row(Row(values(0).toInt, values(2).toBoolean), Map(values(1) -> v4))
     }
 
-    val schemaRDD2 = applySchema(rowRDD2, schema2)
-    schemaRDD2.registerTempTable("applySchema2")
+    val df2 = applySchema(rowRDD2, schema2)
+    df2.registerTempTable("applySchema2")
     checkAnswer(
       sql("SELECT * FROM applySchema2"),
-      (Seq(1, true), Map("A1" -> null)) ::
-      (Seq(2, false), Map("B2" -> null)) ::
-      (Seq(3, true), Map("C3" -> null)) ::
-      (Seq(4, true), Map("D4" -> 2147483644)) :: Nil)
+      Row(Row(1, true), Map("A1" -> null)) ::
+      Row(Row(2, false), Map("B2" -> null)) ::
+      Row(Row(3, true), Map("C3" -> null)) ::
+      Row(Row(4, true), Map("D4" -> 2147483644)) :: Nil)
 
     checkAnswer(
       sql("SELECT f1.f11, f2['D4'] FROM applySchema2"),
-      (1, null) ::
-      (2, null) ::
-      (3, null) ::
-      (4, 2147483644) :: Nil)
+      Row(1, null) ::
+      Row(2, null) ::
+      Row(3, null) ::
+      Row(4, 2147483644) :: Nil)
 
     // The value of a MapType column can be a mutable map.
     val rowRDD3 = unparsedStrings.map { r =>
@@ -662,31 +736,31 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
       Row(Row(values(0).toInt, values(2).toBoolean), scala.collection.mutable.Map(values(1) -> v4))
     }
 
-    val schemaRDD3 = applySchema(rowRDD3, schema2)
-    schemaRDD3.registerTempTable("applySchema3")
+    val df3 = applySchema(rowRDD3, schema2)
+    df3.registerTempTable("applySchema3")
 
     checkAnswer(
       sql("SELECT f1.f11, f2['D4'] FROM applySchema3"),
-      (1, null) ::
-      (2, null) ::
-      (3, null) ::
-      (4, 2147483644) :: Nil)
+      Row(1, null) ::
+      Row(2, null) ::
+      Row(3, null) ::
+      Row(4, 2147483644) :: Nil)
   }
 
   test("SPARK-3423 BETWEEN") {
     checkAnswer(
       sql("SELECT key, value FROM testData WHERE key BETWEEN 5 and 7"),
-      Seq((5, "5"), (6, "6"), (7, "7"))
+      Seq(Row(5, "5"), Row(6, "6"), Row(7, "7"))
     )
 
     checkAnswer(
       sql("SELECT key, value FROM testData WHERE key BETWEEN 7 and 7"),
-      Seq((7, "7"))
+      Row(7, "7")
     )
 
     checkAnswer(
       sql("SELECT key, value FROM testData WHERE key BETWEEN 9 and 7"),
-      Seq()
+      Nil
     )
   }
 
@@ -694,34 +768,59 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
     // TODO Ensure true/false string letter casing is consistent with Hive in all cases.
     checkAnswer(
       sql("SELECT CAST(TRUE AS STRING), CAST(FALSE AS STRING) FROM testData LIMIT 1"),
-      ("true", "false") :: Nil)
+      Row("true", "false"))
+  }
+
+  test("metadata is propagated correctly") {
+    val person: DataFrame = sql("SELECT * FROM person")
+    val schema = person.schema
+    val docKey = "doc"
+    val docValue = "first name"
+    val metadata = new MetadataBuilder()
+      .putString(docKey, docValue)
+      .build()
+    val schemaWithMeta = new StructType(Array(
+      schema("id"), schema("name").copy(metadata = metadata), schema("age")))
+    val personWithMeta = applySchema(person.rdd, schemaWithMeta)
+    def validateMetadata(rdd: DataFrame): Unit = {
+      assert(rdd.schema("name").metadata.getString(docKey) == docValue)
+    }
+    personWithMeta.registerTempTable("personWithMeta")
+    validateMetadata(personWithMeta.select($"name"))
+    validateMetadata(personWithMeta.select($"name"))
+    validateMetadata(personWithMeta.select($"id", $"name"))
+    validateMetadata(sql("SELECT * FROM personWithMeta"))
+    validateMetadata(sql("SELECT id, name FROM personWithMeta"))
+    validateMetadata(sql("SELECT * FROM personWithMeta JOIN salary ON id = personId"))
+    validateMetadata(sql("SELECT name, salary FROM personWithMeta JOIN salary ON id = personId"))
   }
 
   test("SPARK-3371 Renaming a function expression with group by gives error") {
-    registerFunction("len", (s: String) => s.length)
+    TestSQLContext.udf.register("len", (s: String) => s.length)
     checkAnswer(
-      sql("SELECT len(value) as temp FROM testData WHERE key = 1 group by len(value)"), 1)
+      sql("SELECT len(value) as temp FROM testData WHERE key = 1 group by len(value)"),
+      Row(1))
   }
 
   test("SPARK-3813 CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END") {
     checkAnswer(
-      sql("SELECT CASE key WHEN 1 THEN 1 ELSE 0 END FROM testData WHERE key = 1 group by key"), 1)
+      sql("SELECT CASE key WHEN 1 THEN 1 ELSE 0 END FROM testData WHERE key = 1 group by key"),
+      Row(1))
   }
 
   test("SPARK-3813 CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END") {
     checkAnswer(
-      sql("SELECT CASE WHEN key = 1 THEN 1 ELSE 2 END FROM testData WHERE key = 1 group by key"), 1)
+      sql("SELECT CASE WHEN key = 1 THEN 1 ELSE 2 END FROM testData WHERE key = 1 group by key"),
+      Row(1))
   }
 
   test("throw errors for non-aggregate attributes with aggregation") {
     def checkAggregation(query: String, isInvalidQuery: Boolean = true) {
-      val logicalPlan = sql(query).queryExecution.logical
-
       if (isInvalidQuery) {
         val e = intercept[TreeNodeException[LogicalPlan]](sql(query).queryExecution.analyzed)
         assert(
           e.getMessage.startsWith("Expression not in GROUP BY"),
-          "Non-aggregate attribute(s) not detected\n" + logicalPlan)
+          "Non-aggregate attribute(s) not detected\n")
       } else {
         // Should not throw
         sql(query).queryExecution.analyzed
@@ -729,7 +828,7 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
     }
 
     checkAggregation("SELECT key, COUNT(*) FROM testData")
-    checkAggregation("SELECT COUNT(key), COUNT(*) FROM testData", false)
+    checkAggregation("SELECT COUNT(key), COUNT(*) FROM testData", isInvalidQuery = false)
 
     checkAggregation("SELECT value, COUNT(*) FROM testData GROUP BY key")
     checkAggregation("SELECT COUNT(value), SUM(key) FROM testData GROUP BY key", false)
@@ -738,6 +837,136 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
     checkAggregation("SELECT key + 1 + 1, COUNT(*) FROM testData GROUP BY key + 1", false)
   }
 
+  test("Test to check we can use Long.MinValue") {
+    checkAnswer(
+      sql(s"SELECT ${Long.MinValue} FROM testData ORDER BY key LIMIT 1"), Row(Long.MinValue)
+    )
+
+    checkAnswer(
+      sql(s"SELECT key FROM testData WHERE key > ${Long.MinValue}"),
+      (1 to 100).map(Row(_)).toSeq
+    )
+  }
+
+  test("Floating point number format") {
+    checkAnswer(
+      sql("SELECT 0.3"), Row(0.3)
+    )
+
+    checkAnswer(
+      sql("SELECT -0.8"), Row(-0.8)
+    )
+
+    checkAnswer(
+      sql("SELECT .5"), Row(0.5)
+    )
+
+    checkAnswer(
+      sql("SELECT -.18"), Row(-0.18)
+    )
+  }
+
+  test("Auto cast integer type") {
+    checkAnswer(
+      sql(s"SELECT ${Int.MaxValue + 1L}"), Row(Int.MaxValue + 1L)
+    )
+
+    checkAnswer(
+      sql(s"SELECT ${Int.MinValue - 1L}"), Row(Int.MinValue - 1L)
+    )
+
+    checkAnswer(
+      sql("SELECT 9223372036854775808"), Row(new java.math.BigDecimal("9223372036854775808"))
+    )
+
+    checkAnswer(
+      sql("SELECT -9223372036854775809"), Row(new java.math.BigDecimal("-9223372036854775809"))
+    )
+  }
+
+  test("Test to check we can apply sign to expression") {
+
+    checkAnswer(
+      sql("SELECT -100"), Row(-100)
+    )
+
+    checkAnswer(
+      sql("SELECT +230"), Row(230)
+    )
+
+    checkAnswer(
+      sql("SELECT -5.2"), Row(-5.2)
+    )
+
+    checkAnswer(
+      sql("SELECT +6.8"), Row(6.8)
+    )
+
+    checkAnswer(
+      sql("SELECT -key FROM testData WHERE key = 2"), Row(-2)
+    )
+
+    checkAnswer(
+      sql("SELECT +key FROM testData WHERE key = 3"), Row(3)
+    )
+
+    checkAnswer(
+      sql("SELECT -(key + 1) FROM testData WHERE key = 1"), Row(-2)
+    )
+
+    checkAnswer(
+      sql("SELECT - key + 1 FROM testData WHERE key = 10"), Row(-9)
+    )
+
+    checkAnswer(
+      sql("SELECT +(key + 5) FROM testData WHERE key = 5"), Row(10)
+    )
+
+    checkAnswer(
+      sql("SELECT -MAX(key) FROM testData"), Row(-100)
+    )
+
+    checkAnswer(
+      sql("SELECT +MAX(key) FROM testData"), Row(100)
+    )
+
+    checkAnswer(
+      sql("SELECT - (-10)"), Row(10)
+    )
+
+    checkAnswer(
+      sql("SELECT + (-key) FROM testData WHERE key = 32"), Row(-32)
+    )
+
+    checkAnswer(
+      sql("SELECT - (+Max(key)) FROM testData"), Row(-100)
+    )
+
+    checkAnswer(
+      sql("SELECT - - 3"), Row(3)
+    )
+
+    checkAnswer(
+      sql("SELECT - + 20"), Row(-20)
+    )
+
+    checkAnswer(
+      sql("SELEcT - + 45"), Row(-45)
+    )
+
+    checkAnswer(
+      sql("SELECT + + 100"), Row(100)
+    )
+
+    checkAnswer(
+      sql("SELECT - - Max(key) FROM testData"), Row(100)
+    )
+
+    checkAnswer(
+      sql("SELECT + - key FROM testData WHERE key = 33"), Row(-33)
+    )
+  }
+
   test("Multiple join") {
     checkAnswer(
       sql(
@@ -746,6 +975,91 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
           |JOIN testData b ON a.key = b.key
           |JOIN testData c ON a.key = c.key
         """.stripMargin),
-      (1 to 100).map(i => Seq(i, i, i)))
+      (1 to 100).map(i => Row(i, i, i)))
+  }
+
+  test("SPARK-3483 Special chars in column names") {
+    val data = sparkContext.parallelize(Seq("""{"key?number1": "value1", "key.number2": "value2"}"""))
+    jsonRDD(data).registerTempTable("records")
+    sql("SELECT `key?number1` FROM records")
+  }
+
+  test("SPARK-3814 Support Bitwise & operator") {
+    checkAnswer(sql("SELECT key&1 FROM testData WHERE key = 1 "), Row(1))
+  }
+
+  test("SPARK-3814 Support Bitwise | operator") {
+    checkAnswer(sql("SELECT key|0 FROM testData WHERE key = 1 "), Row(1))
+  }
+
+  test("SPARK-3814 Support Bitwise ^ operator") {
+    checkAnswer(sql("SELECT key^0 FROM testData WHERE key = 1 "), Row(1))
+  }
+
+  test("SPARK-3814 Support Bitwise ~ operator") {
+    checkAnswer(sql("SELECT ~key FROM testData WHERE key = 1 "), Row(-2))
+  }
+
+  test("SPARK-4120 Join of multiple tables does not work in SparkSQL") {
+    checkAnswer(
+      sql(
+        """SELECT a.key, b.key, c.key
+          |FROM testData a,testData b,testData c
+          |where a.key = b.key and a.key = c.key
+        """.stripMargin),
+      (1 to 100).map(i => Row(i, i, i)))
+  }
+
+  test("SPARK-4154 Query does not work if it has 'not between' in Spark SQL and HQL") {
+    checkAnswer(sql("SELECT key FROM testData WHERE key not between 0 and 10 order by key"),
+        (11 to 100).map(i => Row(i)))
+  }
+
+  test("SPARK-4207 Query which has syntax like 'not like' is not working in Spark SQL") {
+    checkAnswer(sql("SELECT key FROM testData WHERE value not like '100%' order by key"),
+        (1 to 99).map(i => Row(i)))
+  }
+
+  test("SPARK-4322 Grouping field with struct field as sub expression") {
+    jsonRDD(sparkContext.makeRDD("""{"a": {"b": [{"c": 1}]}}""" :: Nil)).registerTempTable("data")
+    checkAnswer(sql("SELECT a.b[0].c FROM data GROUP BY a.b[0].c"), Row(1))
+    dropTempTable("data")
+
+    jsonRDD(sparkContext.makeRDD("""{"a": {"b": 1}}""" :: Nil)).registerTempTable("data")
+    checkAnswer(sql("SELECT a.b + 1 FROM data GROUP BY a.b + 1"), Row(2))
+    dropTempTable("data")
+  }
+
+  test("SPARK-4432 Fix attribute reference resolution error when using ORDER BY") {
+    checkAnswer(
+      sql("SELECT a + b FROM testData2 ORDER BY a"),
+      Seq(2, 3, 3 ,4 ,4 ,5).map(Row(_))
+    )
+  }
+
+  test("oder by asc by default when not specify ascending and descending") {
+    checkAnswer(
+      sql("SELECT a, b FROM testData2 ORDER BY a desc, b"),
+      Seq(Row(3, 1), Row(3, 2), Row(2, 1), Row(2,2), Row(1, 1), Row(1, 2))
+    )
+  }
+
+  test("Supporting relational operator '<=>' in Spark SQL") {
+    val nullCheckData1 = TestData(1,"1") :: TestData(2,null) :: Nil
+    val rdd1 = sparkContext.parallelize((0 to 1).map(i => nullCheckData1(i)))
+    rdd1.registerTempTable("nulldata1")
+    val nullCheckData2 = TestData(1,"1") :: TestData(2,null) :: Nil
+    val rdd2 = sparkContext.parallelize((0 to 1).map(i => nullCheckData2(i)))
+    rdd2.registerTempTable("nulldata2")
+    checkAnswer(sql("SELECT nulldata1.key FROM nulldata1 join " +
+      "nulldata2 on nulldata1.value <=> nulldata2.value"),
+        (1 to 2).map(i => Row(i)))
+  }
+
+  test("Multi-column COUNT(DISTINCT ...)") {
+    val data = TestData(1,"val_1") :: TestData(2,"val_2") :: Nil
+    val rdd = sparkContext.parallelize((0 to 1).map(i => data(i)))
+    rdd.registerTempTable("distinctData")
+    checkAnswer(sql("SELECT COUNT(DISTINCT key,value) FROM distinctData"), Row(2))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala
index bfa9ea416266d..93782619826f0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala
@@ -33,7 +33,7 @@ case class ReflectData(
     shortField: Short,
     byteField: Byte,
     booleanField: Boolean,
-    decimalField: BigDecimal,
+    decimalField: java.math.BigDecimal,
     date: Date,
     timestampField: Timestamp,
     seqInt: Seq[Int])
@@ -75,13 +75,19 @@ case class ComplexReflectData(
     dataField: Data)
 
 class ScalaReflectionRelationSuite extends FunSuite {
+
+  import org.apache.spark.sql.test.TestSQLContext.implicits._
+
   test("query case class RDD") {
     val data = ReflectData("a", 1, 1L, 1.toFloat, 1.toDouble, 1.toShort, 1.toByte, true,
-                           BigDecimal(1), new Date(12345), new Timestamp(12345), Seq(1,2,3))
+                           new java.math.BigDecimal(1), new Date(12345), new Timestamp(12345), Seq(1,2,3))
     val rdd = sparkContext.parallelize(data :: Nil)
     rdd.registerTempTable("reflectData")
 
-    assert(sql("SELECT * FROM reflectData").collect().head === data.productIterator.toSeq)
+    assert(sql("SELECT * FROM reflectData").collect().head ===
+      Row("a", 1, 1L, 1.toFloat, 1.toDouble, 1.toShort, 1.toByte, true,
+        new java.math.BigDecimal(1), new Date(70, 0, 1), // This is 1970-01-01
+        new Timestamp(12345), Seq(1,2,3)))
   }
 
   test("query case class RDD with nulls") {
@@ -89,7 +95,7 @@ class ScalaReflectionRelationSuite extends FunSuite {
     val rdd = sparkContext.parallelize(data :: Nil)
     rdd.registerTempTable("reflectNullData")
 
-    assert(sql("SELECT * FROM reflectNullData").collect().head === Seq.fill(7)(null))
+    assert(sql("SELECT * FROM reflectNullData").collect().head === Row.fromSeq(Seq.fill(7)(null)))
   }
 
   test("query case class RDD with Nones") {
@@ -97,7 +103,7 @@ class ScalaReflectionRelationSuite extends FunSuite {
     val rdd = sparkContext.parallelize(data :: Nil)
     rdd.registerTempTable("reflectOptionalData")
 
-    assert(sql("SELECT * FROM reflectOptionalData").collect().head === Seq.fill(7)(null))
+    assert(sql("SELECT * FROM reflectOptionalData").collect().head === Row.fromSeq(Seq.fill(7)(null)))
   }
 
   // Equality is broken for Arrays, so we test that separately.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
index 10b7979df7375..0ed437edd05fd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
@@ -20,57 +20,74 @@ package org.apache.spark.sql
 import java.sql.Timestamp
 
 import org.apache.spark.sql.catalyst.plans.logical
+import org.apache.spark.sql.Dsl._
 import org.apache.spark.sql.test._
+import org.apache.spark.sql.test.TestSQLContext.implicits._
 
-/* Implicits */
-import org.apache.spark.sql.test.TestSQLContext._
 
 case class TestData(key: Int, value: String)
 
 object TestData {
-  val testData: SchemaRDD = TestSQLContext.sparkContext.parallelize(
-    (1 to 100).map(i => TestData(i, i.toString)))
+  val testData = TestSQLContext.sparkContext.parallelize(
+    (1 to 100).map(i => TestData(i, i.toString))).toDataFrame
   testData.registerTempTable("testData")
 
+  val negativeData = TestSQLContext.sparkContext.parallelize(
+    (1 to 100).map(i => TestData(-i, (-i).toString))).toDataFrame
+  negativeData.registerTempTable("negativeData")
+
   case class LargeAndSmallInts(a: Int, b: Int)
-  val largeAndSmallInts: SchemaRDD =
+  val largeAndSmallInts =
     TestSQLContext.sparkContext.parallelize(
       LargeAndSmallInts(2147483644, 1) ::
       LargeAndSmallInts(1, 2) ::
       LargeAndSmallInts(2147483645, 1) ::
       LargeAndSmallInts(2, 2) ::
       LargeAndSmallInts(2147483646, 1) ::
-      LargeAndSmallInts(3, 2) :: Nil)
+      LargeAndSmallInts(3, 2) :: Nil).toDataFrame
   largeAndSmallInts.registerTempTable("largeAndSmallInts")
 
   case class TestData2(a: Int, b: Int)
-  val testData2: SchemaRDD =
+  val testData2 =
     TestSQLContext.sparkContext.parallelize(
       TestData2(1, 1) ::
       TestData2(1, 2) ::
       TestData2(2, 1) ::
       TestData2(2, 2) ::
       TestData2(3, 1) ::
-      TestData2(3, 2) :: Nil)
+      TestData2(3, 2) :: Nil, 2).toDataFrame
   testData2.registerTempTable("testData2")
 
+  case class DecimalData(a: BigDecimal, b: BigDecimal)
+
+  val decimalData =
+    TestSQLContext.sparkContext.parallelize(
+      DecimalData(1, 1) ::
+      DecimalData(1, 2) ::
+      DecimalData(2, 1) ::
+      DecimalData(2, 2) ::
+      DecimalData(3, 1) ::
+      DecimalData(3, 2) :: Nil).toDataFrame
+  decimalData.registerTempTable("decimalData")
+
   case class BinaryData(a: Array[Byte], b: Int)
-  val binaryData: SchemaRDD =
+  val binaryData =
     TestSQLContext.sparkContext.parallelize(
       BinaryData("12".getBytes(), 1) ::
       BinaryData("22".getBytes(), 5) ::
       BinaryData("122".getBytes(), 3) ::
       BinaryData("121".getBytes(), 2) ::
-      BinaryData("123".getBytes(), 4) :: Nil)
+      BinaryData("123".getBytes(), 4) :: Nil).toDataFrame
   binaryData.registerTempTable("binaryData")
 
-  // TODO: There is no way to express null primitives as case classes currently...
+  case class TestData3(a: Int, b: Option[Int])
   val testData3 =
-    logical.LocalRelation('a.int, 'b.int).loadData(
-      (1, null) ::
-      (2, 2) :: Nil)
+    TestSQLContext.sparkContext.parallelize(
+      TestData3(1, None) ::
+      TestData3(2, Some(2)) :: Nil).toDataFrame
+  testData3.registerTempTable("testData3")
 
-  val emptyTableData = logical.LocalRelation('a.int, 'b.int)
+  val emptyTableData = logical.LocalRelation($"a".int, $"b".int)
 
   case class UpperCaseData(N: Int, L: String)
   val upperCaseData =
@@ -80,7 +97,7 @@ object TestData {
       UpperCaseData(3, "C") ::
       UpperCaseData(4, "D") ::
       UpperCaseData(5, "E") ::
-      UpperCaseData(6, "F") :: Nil)
+      UpperCaseData(6, "F") :: Nil).toDataFrame
   upperCaseData.registerTempTable("upperCaseData")
 
   case class LowerCaseData(n: Int, l: String)
@@ -89,7 +106,7 @@ object TestData {
       LowerCaseData(1, "a") ::
       LowerCaseData(2, "b") ::
       LowerCaseData(3, "c") ::
-      LowerCaseData(4, "d") :: Nil)
+      LowerCaseData(4, "d") :: Nil).toDataFrame
   lowerCaseData.registerTempTable("lowerCaseData")
 
   case class ArrayData(data: Seq[Int], nestedData: Seq[Seq[Int]])
@@ -99,7 +116,7 @@ object TestData {
       ArrayData(Seq(2,3,4), Seq(Seq(2,3,4))) :: Nil)
   arrayData.registerTempTable("arrayData")
 
-  case class MapData(data: Map[Int, String])
+  case class MapData(data: scala.collection.Map[Int, String])
   val mapData =
     TestSQLContext.sparkContext.parallelize(
       MapData(Map(1 -> "a1", 2 -> "b1", 3 -> "c1", 4 -> "d1", 5 -> "e1")) ::
@@ -143,7 +160,7 @@ object TestData {
     TestSQLContext.sparkContext.parallelize(
       NullStrings(1, "abc") ::
       NullStrings(2, "ABC") ::
-      NullStrings(3, null) :: Nil)
+      NullStrings(3, null) :: Nil).toDataFrame
   nullStrings.registerTempTable("nullStrings")
 
   case class TableName(tableName: String)
@@ -166,4 +183,23 @@ object TestData {
   // An RDD with 4 elements and 8 partitions
   val withEmptyParts = TestSQLContext.sparkContext.parallelize((1 to 4).map(IntField), 8)
   withEmptyParts.registerTempTable("withEmptyParts")
+
+  case class Person(id: Int, name: String, age: Int)
+  case class Salary(personId: Int, salary: Double)
+  val person = TestSQLContext.sparkContext.parallelize(
+    Person(0, "mike", 30) ::
+    Person(1, "jim", 20) :: Nil)
+  person.registerTempTable("person")
+  val salary = TestSQLContext.sparkContext.parallelize(
+    Salary(0, 2000.0) ::
+    Salary(1, 1000.0) :: Nil)
+  salary.registerTempTable("salary")
+
+  case class ComplexData(m: Map[Int, String], s: TestData, a: Seq[Int], b: Boolean)
+  val complexData =
+    TestSQLContext.sparkContext.parallelize(
+      ComplexData(Map(1 -> "1"), TestData(1, "1"), Seq(1), true)
+        :: ComplexData(Map(2 -> "2"), TestData(2, "2"), Seq(2), false)
+        :: Nil).toDataFrame
+  complexData.registerTempTable("complexData")
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index ef9b76b1e251e..95923f9aad931 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.Dsl.StringToColumn
 import org.apache.spark.sql.test._
 
 /* Implicits */
@@ -27,22 +28,26 @@ case class FunctionResult(f1: String, f2: String)
 class UDFSuite extends QueryTest {
 
   test("Simple UDF") {
-    registerFunction("strLenScala", (_: String).length)
-    assert(sql("SELECT strLenScala('test')").first().getInt(0) === 4)
+    udf.register("strLenScala", (_: String).length)
+    assert(sql("SELECT strLenScala('test')").head().getInt(0) === 4)
   }
 
-  test("TwoArgument UDF") {
-    registerFunction("strLenScala", (_: String).length + (_:Int))
-    assert(sql("SELECT strLenScala('test', 1)").first().getInt(0) === 5)
+  test("ZeroArgument UDF") {
+    udf.register("random0", () => { Math.random()})
+    assert(sql("SELECT random0()").head().getDouble(0) >= 0.0)
   }
 
+  test("TwoArgument UDF") {
+    udf.register("strLenScala", (_: String).length + (_:Int))
+    assert(sql("SELECT strLenScala('test', 1)").head().getInt(0) === 5)
+  }
 
   test("struct UDF") {
-    registerFunction("returnStruct", (f1: String, f2: String) => FunctionResult(f1, f2))
+    udf.register("returnStruct", (f1: String, f2: String) => FunctionResult(f1, f2))
 
-    val result=
+    val result =
       sql("SELECT returnStruct('test', 'test2') as ret")
-        .select("ret.f1".attr).first().getString(0)
-    assert(result == "test")
+        .select($"ret.f1").head().getString(0)
+    assert(result === "test")
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
new file mode 100644
index 0000000000000..3c1657cd5fc3a
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import scala.beans.{BeanInfo, BeanProperty}
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.Dsl._
+import org.apache.spark.sql.test.TestSQLContext
+import org.apache.spark.sql.test.TestSQLContext.{sparkContext, sql}
+import org.apache.spark.sql.test.TestSQLContext.implicits._
+import org.apache.spark.sql.types._
+
+
+@SQLUserDefinedType(udt = classOf[MyDenseVectorUDT])
+private[sql] class MyDenseVector(val data: Array[Double]) extends Serializable {
+  override def equals(other: Any): Boolean = other match {
+    case v: MyDenseVector =>
+      java.util.Arrays.equals(this.data, v.data)
+    case _ => false
+  }
+}
+
+@BeanInfo
+private[sql] case class MyLabeledPoint(
+    @BeanProperty label: Double,
+    @BeanProperty features: MyDenseVector)
+
+private[sql] class MyDenseVectorUDT extends UserDefinedType[MyDenseVector] {
+
+  override def sqlType: DataType = ArrayType(DoubleType, containsNull = false)
+
+  override def serialize(obj: Any): Seq[Double] = {
+    obj match {
+      case features: MyDenseVector =>
+        features.data.toSeq
+    }
+  }
+
+  override def deserialize(datum: Any): MyDenseVector = {
+    datum match {
+      case data: Seq[_] =>
+        new MyDenseVector(data.asInstanceOf[Seq[Double]].toArray)
+    }
+  }
+
+  override def userClass = classOf[MyDenseVector]
+}
+
+class UserDefinedTypeSuite extends QueryTest {
+  val points = Seq(
+    MyLabeledPoint(1.0, new MyDenseVector(Array(0.1, 1.0))),
+    MyLabeledPoint(0.0, new MyDenseVector(Array(0.2, 2.0))))
+  val pointsRDD: RDD[MyLabeledPoint] = sparkContext.parallelize(points)
+
+
+  test("register user type: MyDenseVector for MyLabeledPoint") {
+    val labels: RDD[Double] = pointsRDD.select('label).rdd.map { case Row(v: Double) => v }
+    val labelsArrays: Array[Double] = labels.collect()
+    assert(labelsArrays.size === 2)
+    assert(labelsArrays.contains(1.0))
+    assert(labelsArrays.contains(0.0))
+
+    val features: RDD[MyDenseVector] =
+      pointsRDD.select('features).rdd.map { case Row(v: MyDenseVector) => v }
+    val featuresArrays: Array[MyDenseVector] = features.collect()
+    assert(featuresArrays.size === 2)
+    assert(featuresArrays.contains(new MyDenseVector(Array(0.1, 1.0))))
+    assert(featuresArrays.contains(new MyDenseVector(Array(0.2, 2.0))))
+  }
+
+  test("UDTs and UDFs") {
+    TestSQLContext.udf.register("testType", (d: MyDenseVector) => d.isInstanceOf[MyDenseVector])
+    pointsRDD.registerTempTable("points")
+    checkAnswer(
+      sql("SELECT testType(features) from points"),
+      Seq(Row(true), Row(true)))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/api/java/JavaSQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/api/java/JavaSQLSuite.scala
deleted file mode 100644
index 203ff847e94cc..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/api/java/JavaSQLSuite.scala
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java
-
-import scala.beans.BeanProperty
-
-import org.scalatest.FunSuite
-
-import org.apache.spark.api.java.JavaSparkContext
-import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.test.TestSQLContext
-
-// Implicits
-import scala.collection.JavaConversions._
-
-class PersonBean extends Serializable {
-  @BeanProperty
-  var name: String = _
-
-  @BeanProperty
-  var age: Int = _
-}
-
-class AllTypesBean extends Serializable {
-  @BeanProperty var stringField: String = _
-  @BeanProperty var intField: java.lang.Integer = _
-  @BeanProperty var longField: java.lang.Long = _
-  @BeanProperty var floatField: java.lang.Float = _
-  @BeanProperty var doubleField: java.lang.Double = _
-  @BeanProperty var shortField: java.lang.Short = _
-  @BeanProperty var byteField: java.lang.Byte = _
-  @BeanProperty var booleanField: java.lang.Boolean = _
-}
-
-class JavaSQLSuite extends FunSuite {
-  val javaCtx = new JavaSparkContext(TestSQLContext.sparkContext)
-  val javaSqlCtx = new JavaSQLContext(javaCtx)
-
-  test("schema from JavaBeans") {
-    val person = new PersonBean
-    person.setName("Michael")
-    person.setAge(29)
-
-    val rdd = javaCtx.parallelize(person :: Nil)
-    val schemaRDD = javaSqlCtx.applySchema(rdd, classOf[PersonBean])
-
-    schemaRDD.registerTempTable("people")
-    javaSqlCtx.sql("SELECT * FROM people").collect()
-  }
-
-  test("all types in JavaBeans") {
-    val bean = new AllTypesBean
-    bean.setStringField("")
-    bean.setIntField(0)
-    bean.setLongField(0)
-    bean.setFloatField(0.0F)
-    bean.setDoubleField(0.0)
-    bean.setShortField(0.toShort)
-    bean.setByteField(0.toByte)
-    bean.setBooleanField(false)
-
-    val rdd = javaCtx.parallelize(bean :: Nil)
-    val schemaRDD = javaSqlCtx.applySchema(rdd, classOf[AllTypesBean])
-    schemaRDD.registerTempTable("allTypes")
-
-    assert(
-      javaSqlCtx.sql(
-        """
-          |SELECT stringField, intField, longField, floatField, doubleField, shortField, byteField,
-          |       booleanField
-          |FROM allTypes
-        """.stripMargin).collect.head.row ===
-      Seq("", 0, 0L, 0F, 0.0, 0.toShort, 0.toByte, false))
-  }
-
-  test("all types null in JavaBeans") {
-    val bean = new AllTypesBean
-    bean.setStringField(null)
-    bean.setIntField(null)
-    bean.setLongField(null)
-    bean.setFloatField(null)
-    bean.setDoubleField(null)
-    bean.setShortField(null)
-    bean.setByteField(null)
-    bean.setBooleanField(null)
-
-    val rdd = javaCtx.parallelize(bean :: Nil)
-    val schemaRDD = javaSqlCtx.applySchema(rdd, classOf[AllTypesBean])
-    schemaRDD.registerTempTable("allTypes")
-
-    assert(
-      javaSqlCtx.sql(
-        """
-          |SELECT stringField, intField, longField, floatField, doubleField, shortField, byteField,
-          |       booleanField
-          |FROM allTypes
-        """.stripMargin).collect.head.row ===
-        Seq.fill(8)(null))
-  }
-
-  test("loads JSON datasets") {
-    val jsonString =
-      """{"string":"this is a simple string.",
-          "integer":10,
-          "long":21474836470,
-          "bigInteger":92233720368547758070,
-          "double":1.7976931348623157E308,
-          "boolean":true,
-          "null":null
-      }""".replaceAll("\n", " ")
-    val rdd = javaCtx.parallelize(jsonString :: Nil)
-
-    var schemaRDD = javaSqlCtx.jsonRDD(rdd)
-
-    schemaRDD.registerTempTable("jsonTable1")
-
-    assert(
-      javaSqlCtx.sql("select * from jsonTable1").collect.head.row ===
-        Seq(BigDecimal("92233720368547758070"),
-            true,
-            1.7976931348623157E308,
-            10,
-            21474836470L,
-            null,
-            "this is a simple string."))
-
-    val file = getTempFilePath("json")
-    val path = file.toString
-    rdd.saveAsTextFile(path)
-    schemaRDD = javaSqlCtx.jsonFile(path)
-
-    schemaRDD.registerTempTable("jsonTable2")
-
-    assert(
-      javaSqlCtx.sql("select * from jsonTable2").collect.head.row ===
-        Seq(BigDecimal("92233720368547758070"),
-            true,
-            1.7976931348623157E308,
-            10,
-            21474836470L,
-            null,
-            "this is a simple string."))
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/api/java/ScalaSideDataTypeConversionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/api/java/ScalaSideDataTypeConversionSuite.scala
deleted file mode 100644
index ff1debff0f8c1..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/api/java/ScalaSideDataTypeConversionSuite.scala
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.api.java
-
-import org.apache.spark.sql.types.util.DataTypeConversions
-import org.scalatest.FunSuite
-
-import org.apache.spark.sql.{DataType => SDataType, StructField => SStructField}
-import org.apache.spark.sql.{StructType => SStructType}
-import DataTypeConversions._
-
-class ScalaSideDataTypeConversionSuite extends FunSuite {
-
-  def checkDataType(scalaDataType: SDataType) {
-    val javaDataType = asJavaDataType(scalaDataType)
-    val actual = asScalaDataType(javaDataType)
-    assert(scalaDataType === actual, s"Converted data type ${actual} " +
-      s"does not equal the expected data type ${scalaDataType}")
-  }
-
-  test("convert data types") {
-    // Simple DataTypes.
-    checkDataType(org.apache.spark.sql.StringType)
-    checkDataType(org.apache.spark.sql.BinaryType)
-    checkDataType(org.apache.spark.sql.BooleanType)
-    checkDataType(org.apache.spark.sql.TimestampType)
-    checkDataType(org.apache.spark.sql.DecimalType)
-    checkDataType(org.apache.spark.sql.DoubleType)
-    checkDataType(org.apache.spark.sql.FloatType)
-    checkDataType(org.apache.spark.sql.ByteType)
-    checkDataType(org.apache.spark.sql.IntegerType)
-    checkDataType(org.apache.spark.sql.LongType)
-    checkDataType(org.apache.spark.sql.ShortType)
-
-    // Simple ArrayType.
-    val simpleScalaArrayType =
-      org.apache.spark.sql.ArrayType(org.apache.spark.sql.StringType, true)
-    checkDataType(simpleScalaArrayType)
-
-    // Simple MapType.
-    val simpleScalaMapType =
-      org.apache.spark.sql.MapType(org.apache.spark.sql.StringType, org.apache.spark.sql.LongType)
-    checkDataType(simpleScalaMapType)
-
-    // Simple StructType.
-    val simpleScalaStructType = SStructType(
-      SStructField("a", org.apache.spark.sql.DecimalType, false) ::
-      SStructField("b", org.apache.spark.sql.BooleanType, true) ::
-      SStructField("c", org.apache.spark.sql.LongType, true) ::
-      SStructField("d", org.apache.spark.sql.BinaryType, false) :: Nil)
-    checkDataType(simpleScalaStructType)
-
-    // Complex StructType.
-    val complexScalaStructType = SStructType(
-      SStructField("simpleArray", simpleScalaArrayType, true) ::
-      SStructField("simpleMap", simpleScalaMapType, true) ::
-      SStructField("simpleStruct", simpleScalaStructType, true) ::
-      SStructField("boolean", org.apache.spark.sql.BooleanType, false) :: Nil)
-    checkDataType(complexScalaStructType)
-
-    // Complex ArrayType.
-    val complexScalaArrayType =
-      org.apache.spark.sql.ArrayType(complexScalaStructType, true)
-    checkDataType(complexScalaArrayType)
-
-    // Complex MapType.
-    val complexScalaMapType =
-      org.apache.spark.sql.MapType(complexScalaStructType, complexScalaArrayType, false)
-    checkDataType(complexScalaMapType)
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnStatsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnStatsSuite.scala
index 6bdf741134e2f..581fccf8ee613 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnStatsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnStatsSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.columnar
 import org.scalatest.FunSuite
 
 import org.apache.spark.sql.catalyst.expressions.Row
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types._
 
 class ColumnStatsSuite extends FunSuite {
   testColumnStats(classOf[ByteColumnStats], BYTE, Row(Byte.MaxValue, Byte.MinValue, 0))
@@ -30,7 +30,7 @@ class ColumnStatsSuite extends FunSuite {
   testColumnStats(classOf[FloatColumnStats], FLOAT, Row(Float.MaxValue, Float.MinValue, 0))
   testColumnStats(classOf[DoubleColumnStats], DOUBLE, Row(Double.MaxValue, Double.MinValue, 0))
   testColumnStats(classOf[StringColumnStats], STRING, Row(null, null, 0))
-  testColumnStats(classOf[DateColumnStats], DATE, Row(null, null, 0))
+  testColumnStats(classOf[DateColumnStats], DATE, Row(Int.MaxValue, Int.MinValue, 0))
   testColumnStats(classOf[TimestampColumnStats], TIMESTAMP, Row(null, null, 0))
 
   def testColumnStats[T <: NativeType, U <: ColumnStats](
@@ -42,8 +42,8 @@ class ColumnStatsSuite extends FunSuite {
 
     test(s"$columnStatsName: empty") {
       val columnStats = columnStatsClass.newInstance()
-      columnStats.collectedStatistics.zip(initialStatistics).foreach { case (actual, expected) =>
-        assert(actual === expected)
+      columnStats.collectedStatistics.toSeq.zip(initialStatistics.toSeq).foreach {
+        case (actual, expected) => assert(actual === expected)
       }
     }
 
@@ -54,13 +54,19 @@ class ColumnStatsSuite extends FunSuite {
       val rows = Seq.fill(10)(makeRandomRow(columnType)) ++ Seq.fill(10)(makeNullRow(1))
       rows.foreach(columnStats.gatherStats(_, 0))
 
-      val values = rows.take(10).map(_.head.asInstanceOf[T#JvmType])
+      val values = rows.take(10).map(_(0).asInstanceOf[T#JvmType])
       val ordering = columnType.dataType.ordering.asInstanceOf[Ordering[T#JvmType]]
       val stats = columnStats.collectedStatistics
 
       assertResult(values.min(ordering), "Wrong lower bound")(stats(0))
       assertResult(values.max(ordering), "Wrong upper bound")(stats(1))
       assertResult(10, "Wrong null count")(stats(2))
+      assertResult(20, "Wrong row count")(stats(3))
+      assertResult(stats(4), "Wrong size in bytes") {
+        rows.map { row =>
+          if (row.isNullAt(0)) 4 else columnType.actualSize(row, 0)
+        }.sum
+      }
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala
index 3f3f35d50188b..9ce845912f1c7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala
@@ -18,15 +18,15 @@
 package org.apache.spark.sql.columnar
 
 import java.nio.ByteBuffer
-import java.sql.{Date, Timestamp}
+import java.sql.Timestamp
 
 import org.scalatest.FunSuite
 
 import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
-import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.columnar.ColumnarTestUtils._
 import org.apache.spark.sql.execution.SparkSqlSerializer
+import org.apache.spark.sql.types._
 
 class ColumnTypeSuite extends FunSuite with Logging {
   val DEFAULT_BUFFER_SIZE = 512
@@ -34,7 +34,7 @@ class ColumnTypeSuite extends FunSuite with Logging {
   test("defaultSize") {
     val checks = Map(
       INT -> 4, SHORT -> 2, LONG -> 8, BYTE -> 1, DOUBLE -> 8, FLOAT -> 4, BOOLEAN -> 1,
-      STRING -> 8, DATE -> 8, TIMESTAMP -> 12, BINARY -> 16, GENERIC -> 16)
+      STRING -> 8, DATE -> 4, TIMESTAMP -> 12, BINARY -> 16, GENERIC -> 16)
 
     checks.foreach { case (columnType, expectedSize) =>
       assertResult(expectedSize, s"Wrong defaultSize for $columnType") {
@@ -64,7 +64,7 @@ class ColumnTypeSuite extends FunSuite with Logging {
     checkActualSize(FLOAT,     Float.MaxValue,    4)
     checkActualSize(BOOLEAN,   true,              1)
     checkActualSize(STRING,    "hello",           4 + "hello".getBytes("utf-8").length)
-    checkActualSize(DATE,      new Date(0L),      8)
+    checkActualSize(DATE,      0,                 4)
     checkActualSize(TIMESTAMP, new Timestamp(0L), 12)
 
     val binary = Array.fill[Byte](4)(0: Byte)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnarTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnarTestUtils.scala
index a1f21219eaf2f..60ed28cc97bf1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnarTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnarTestUtils.scala
@@ -17,14 +17,14 @@
 
 package org.apache.spark.sql.columnar
 
+import java.sql.Timestamp
+
 import scala.collection.immutable.HashSet
 import scala.util.Random
 
-import java.sql.{Date, Timestamp}
-
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
-import org.apache.spark.sql.catalyst.types.{DataType, NativeType}
+import org.apache.spark.sql.types.{DataType, NativeType}
 
 object ColumnarTestUtils {
   def makeNullRow(length: Int) = {
@@ -50,7 +50,7 @@ object ColumnarTestUtils {
       case STRING    => Random.nextString(Random.nextInt(32))
       case BOOLEAN   => Random.nextBoolean()
       case BINARY    => randomBytes(Random.nextInt(32))
-      case DATE      => new Date(Random.nextLong())
+      case DATE      => Random.nextInt()
       case TIMESTAMP =>
         val timestamp = new Timestamp(Random.nextLong())
         timestamp.setNanos(Random.nextInt(999999999))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
index 9775dd26b7773..86b1b5fda1c0f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
@@ -17,18 +17,22 @@
 
 package org.apache.spark.sql.columnar
 
+import org.apache.spark.sql.Dsl._
+import org.apache.spark.sql.TestData._
 import org.apache.spark.sql.catalyst.expressions.Row
-import org.apache.spark.sql.test.TestSQLContext
+import org.apache.spark.sql.test.TestSQLContext._
 import org.apache.spark.sql.{QueryTest, TestData}
 import org.apache.spark.storage.StorageLevel.MEMORY_ONLY
 
 class InMemoryColumnarQuerySuite extends QueryTest {
-  import org.apache.spark.sql.TestData._
-  import org.apache.spark.sql.test.TestSQLContext._
+  // Make sure the tables are loaded.
+  TestData
+
+  import org.apache.spark.sql.test.TestSQLContext.implicits._
 
   test("simple columnar query") {
-    val plan = TestSQLContext.executePlan(testData.logicalPlan).executedPlan
-    val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan)
+    val plan = executePlan(testData.logicalPlan).executedPlan
+    val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None)
 
     checkAnswer(scan, testData.collect().toSeq)
   }
@@ -38,21 +42,22 @@ class InMemoryColumnarQuerySuite extends QueryTest {
     sparkContext.parallelize(1 to 10).map(i => TestData(i, i.toString)).registerTempTable("sizeTst")
     cacheTable("sizeTst")
     assert(
-      table("sizeTst").queryExecution.logical.statistics.sizeInBytes > autoBroadcastJoinThreshold)
+      table("sizeTst").queryExecution.logical.statistics.sizeInBytes >
+        conf.autoBroadcastJoinThreshold)
   }
 
   test("projection") {
-    val plan = TestSQLContext.executePlan(testData.select('value, 'key).logicalPlan).executedPlan
-    val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan)
+    val plan = executePlan(testData.select('value, 'key).logicalPlan).executedPlan
+    val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None)
 
     checkAnswer(scan, testData.collect().map {
       case Row(key: Int, value: String) => value -> key
-    }.toSeq)
+    }.map(Row.fromTuple))
   }
 
   test("SPARK-1436 regression: in-memory columns must be able to be accessed multiple times") {
-    val plan = TestSQLContext.executePlan(testData.logicalPlan).executedPlan
-    val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan)
+    val plan = executePlan(testData.logicalPlan).executedPlan
+    val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None)
 
     checkAnswer(scan, testData.collect().toSeq)
     checkAnswer(scan, testData.collect().toSeq)
@@ -61,48 +66,55 @@ class InMemoryColumnarQuerySuite extends QueryTest {
   test("SPARK-1678 regression: compression must not lose repeated values") {
     checkAnswer(
       sql("SELECT * FROM repeatedData"),
-      repeatedData.collect().toSeq)
+      repeatedData.collect().toSeq.map(Row.fromTuple))
 
-    TestSQLContext.cacheTable("repeatedData")
+    cacheTable("repeatedData")
 
     checkAnswer(
       sql("SELECT * FROM repeatedData"),
-      repeatedData.collect().toSeq)
+      repeatedData.collect().toSeq.map(Row.fromTuple))
   }
 
   test("with null values") {
     checkAnswer(
       sql("SELECT * FROM nullableRepeatedData"),
-      nullableRepeatedData.collect().toSeq)
+      nullableRepeatedData.collect().toSeq.map(Row.fromTuple))
 
-    TestSQLContext.cacheTable("nullableRepeatedData")
+    cacheTable("nullableRepeatedData")
 
     checkAnswer(
       sql("SELECT * FROM nullableRepeatedData"),
-      nullableRepeatedData.collect().toSeq)
+      nullableRepeatedData.collect().toSeq.map(Row.fromTuple))
   }
 
   test("SPARK-2729 regression: timestamp data type") {
     checkAnswer(
       sql("SELECT time FROM timestamps"),
-      timestamps.collect().toSeq)
+      timestamps.collect().toSeq.map(Row.fromTuple))
 
-    TestSQLContext.cacheTable("timestamps")
+    cacheTable("timestamps")
 
     checkAnswer(
       sql("SELECT time FROM timestamps"),
-      timestamps.collect().toSeq)
+      timestamps.collect().toSeq.map(Row.fromTuple))
   }
 
   test("SPARK-3320 regression: batched column buffer building should work with empty partitions") {
     checkAnswer(
       sql("SELECT * FROM withEmptyParts"),
-      withEmptyParts.collect().toSeq)
+      withEmptyParts.collect().toSeq.map(Row.fromTuple))
 
-    TestSQLContext.cacheTable("withEmptyParts")
+    cacheTable("withEmptyParts")
 
     checkAnswer(
       sql("SELECT * FROM withEmptyParts"),
-      withEmptyParts.collect().toSeq)
+      withEmptyParts.collect().toSeq.map(Row.fromTuple))
+  }
+
+  test("SPARK-4182 Caching complex types") {
+    complexData.cache().count()
+    // Shouldn't throw
+    complexData.count()
+    complexData.unpersist()
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/NullableColumnAccessorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/NullableColumnAccessorSuite.scala
index 21906e3fdcc6f..f95c895587f3f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/NullableColumnAccessorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/NullableColumnAccessorSuite.scala
@@ -22,7 +22,7 @@ import java.nio.ByteBuffer
 import org.scalatest.FunSuite
 
 import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
-import org.apache.spark.sql.catalyst.types.DataType
+import org.apache.spark.sql.types.DataType
 
 class TestNullableColumnAccessor[T <: DataType, JvmType](
     buffer: ByteBuffer,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/NullableColumnBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/NullableColumnBuilderSuite.scala
index cb73f3da81e24..80bd5c94570cb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/NullableColumnBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/NullableColumnBuilderSuite.scala
@@ -19,8 +19,8 @@ package org.apache.spark.sql.columnar
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.execution.SparkSqlSerializer
+import org.apache.spark.sql.types._
 
 class TestNullableColumnBuilder[T <: DataType, JvmType](columnType: ColumnType[T, JvmType])
   extends BasicColumnBuilder[T, JvmType](new NoopColumnStats, columnType)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala
index f53acc8c9f718..55a9f735b3506 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala
@@ -22,17 +22,21 @@ import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite}
 import org.apache.spark.sql._
 import org.apache.spark.sql.test.TestSQLContext._
 
-case class IntegerData(i: Int)
-
 class PartitionBatchPruningSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAfter {
-  val originalColumnBatchSize = columnBatchSize
-  val originalInMemoryPartitionPruning = inMemoryPartitionPruning
+  val originalColumnBatchSize = conf.columnBatchSize
+  val originalInMemoryPartitionPruning = conf.inMemoryPartitionPruning
+
+  import org.apache.spark.sql.test.TestSQLContext.implicits._
 
   override protected def beforeAll(): Unit = {
     // Make a table with 5 partitions, 2 batches per partition, 10 elements per batch
     setConf(SQLConf.COLUMN_BATCH_SIZE, "10")
-    val rawData = sparkContext.makeRDD(1 to 100, 5).map(IntegerData)
-    rawData.registerTempTable("intData")
+
+    val pruningData = sparkContext.makeRDD((1 to 100).map { key =>
+      val string = if (((key - 1) / 10) % 2 == 0) null else key.toString
+      TestData(key, string)
+    }, 5)
+    pruningData.registerTempTable("pruningData")
 
     // Enable in-memory partition pruning
     setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, "true")
@@ -44,53 +48,79 @@ class PartitionBatchPruningSuite extends FunSuite with BeforeAndAfterAll with Be
   }
 
   before {
-    cacheTable("intData")
+    cacheTable("pruningData")
   }
 
   after {
-    uncacheTable("intData")
+    uncacheTable("pruningData")
   }
 
   // Comparisons
-  checkBatchPruning("i = 1", Seq(1), 1, 1)
-  checkBatchPruning("1 = i", Seq(1), 1, 1)
-  checkBatchPruning("i < 12", 1 to 11, 1, 2)
-  checkBatchPruning("i <= 11", 1 to 11, 1, 2)
-  checkBatchPruning("i > 88", 89 to 100, 1, 2)
-  checkBatchPruning("i >= 89", 89 to 100, 1, 2)
-  checkBatchPruning("12 > i", 1 to 11, 1, 2)
-  checkBatchPruning("11 >= i", 1 to 11, 1, 2)
-  checkBatchPruning("88 < i", 89 to 100, 1, 2)
-  checkBatchPruning("89 <= i", 89 to 100, 1, 2)
+  checkBatchPruning("SELECT key FROM pruningData WHERE key = 1", 1, 1)(Seq(1))
+  checkBatchPruning("SELECT key FROM pruningData WHERE 1 = key", 1, 1)(Seq(1))
+  checkBatchPruning("SELECT key FROM pruningData WHERE key < 12", 1, 2)(1 to 11)
+  checkBatchPruning("SELECT key FROM pruningData WHERE key <= 11", 1, 2)(1 to 11)
+  checkBatchPruning("SELECT key FROM pruningData WHERE key > 88", 1, 2)(89 to 100)
+  checkBatchPruning("SELECT key FROM pruningData WHERE key >= 89", 1, 2)(89 to 100)
+  checkBatchPruning("SELECT key FROM pruningData WHERE 12 > key", 1, 2)(1 to 11)
+  checkBatchPruning("SELECT key FROM pruningData WHERE 11 >= key", 1, 2)(1 to 11)
+  checkBatchPruning("SELECT key FROM pruningData WHERE 88 < key", 1, 2)(89 to 100)
+  checkBatchPruning("SELECT key FROM pruningData WHERE 89 <= key", 1, 2)(89 to 100)
+
+  // IS NULL
+  checkBatchPruning("SELECT key FROM pruningData WHERE value IS NULL", 5, 5) {
+    (1 to 10) ++ (21 to 30) ++ (41 to 50) ++ (61 to 70) ++ (81 to 90)
+  }
+
+  // IS NOT NULL
+  checkBatchPruning("SELECT key FROM pruningData WHERE value IS NOT NULL", 5, 5) {
+    (11 to 20) ++ (31 to 40) ++ (51 to 60) ++ (71 to 80) ++ (91 to 100)
+  }
 
   // Conjunction and disjunction
-  checkBatchPruning("i > 8 AND i <= 21", 9 to 21, 2, 3)
-  checkBatchPruning("i < 2 OR i > 99", Seq(1, 100), 2, 2)
-  checkBatchPruning("i < 2 OR (i > 78 AND i < 92)", Seq(1) ++ (79 to 91), 3, 4)
-  checkBatchPruning("NOT (i < 88)", 88 to 100, 1, 2)
+  checkBatchPruning("SELECT key FROM pruningData WHERE key > 8 AND key <= 21", 2, 3)(9 to 21)
+  checkBatchPruning("SELECT key FROM pruningData WHERE key < 2 OR key > 99", 2, 2)(Seq(1, 100))
+  checkBatchPruning("SELECT key FROM pruningData WHERE key < 12 AND key IS NOT NULL", 1, 2)(1 to 11)
+  checkBatchPruning("SELECT key FROM pruningData WHERE key < 2 OR (key > 78 AND key < 92)", 3, 4) {
+    Seq(1) ++ (79 to 91)
+  }
+  checkBatchPruning("SELECT key FROM pruningData WHERE NOT (key < 88)", 1, 2) {
+    // Although the `NOT` operator isn't supported directly, the optimizer can transform
+    // `NOT (a < b)` to `b >= a`
+    88 to 100
+  }
 
   // With unsupported predicate
-  checkBatchPruning("i < 12 AND i IS NOT NULL", 1 to 11, 1, 2)
-  checkBatchPruning(s"NOT (i in (${(1 to 30).mkString(",")}))", 31 to 100, 5, 10)
+  {
+    val seq = (1 to 30).mkString(", ")
+    checkBatchPruning(s"SELECT key FROM pruningData WHERE NOT (key IN ($seq))", 5, 10)(31 to 100)
+    checkBatchPruning(s"SELECT key FROM pruningData WHERE NOT (key IN ($seq)) AND key > 88", 1, 2) {
+      89 to 100
+    }
+  }
 
   def checkBatchPruning(
-      filter: String,
-      expectedQueryResult: Seq[Int],
+      query: String,
       expectedReadPartitions: Int,
-      expectedReadBatches: Int): Unit = {
+      expectedReadBatches: Int)(
+      expectedQueryResult: => Seq[Int]): Unit = {
+
+    test(query) {
+      val df = sql(query)
+      val queryExecution = df.queryExecution
 
-    test(filter) {
-      val query = sql(s"SELECT * FROM intData WHERE $filter")
-      assertResult(expectedQueryResult.toArray, "Wrong query result") {
-        query.collect().map(_.head).toArray
+      assertResult(expectedQueryResult.toArray, s"Wrong query result: $queryExecution") {
+        df.collect().map(_(0)).toArray
       }
 
-      val (readPartitions, readBatches) = query.queryExecution.executedPlan.collect {
+      val (readPartitions, readBatches) = df.queryExecution.executedPlan.collect {
         case in: InMemoryColumnarTableScan => (in.readPartitions.value, in.readBatches.value)
       }.head
 
-      assert(readBatches === expectedReadBatches, "Wrong number of read batches")
-      assert(readPartitions === expectedReadPartitions, "Wrong number of read partitions")
+      assert(readBatches === expectedReadBatches, s"Wrong number of read batches: $queryExecution")
+      assert(
+        readPartitions === expectedReadPartitions,
+        s"Wrong number of read partitions: $queryExecution")
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/BooleanBitSetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/BooleanBitSetSuite.scala
index d9e488e0ffd16..8b518f094174c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/BooleanBitSetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/BooleanBitSetSuite.scala
@@ -34,7 +34,7 @@ class BooleanBitSetSuite extends FunSuite {
 
     val builder = TestCompressibleColumnBuilder(new NoopColumnStats, BOOLEAN, BooleanBitSet)
     val rows = Seq.fill[Row](count)(makeRandomRow(BOOLEAN))
-    val values = rows.map(_.head)
+    val values = rows.map(_(0))
 
     rows.foreach(builder.appendFrom(_, 0))
     val buffer = builder.build()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/DictionaryEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/DictionaryEncodingSuite.scala
index 1cdb909146d57..c82d9799359c7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/DictionaryEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/DictionaryEncodingSuite.scala
@@ -22,9 +22,9 @@ import java.nio.ByteBuffer
 import org.scalatest.FunSuite
 
 import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
-import org.apache.spark.sql.catalyst.types.NativeType
 import org.apache.spark.sql.columnar._
 import org.apache.spark.sql.columnar.ColumnarTestUtils._
+import org.apache.spark.sql.types.NativeType
 
 class DictionaryEncodingSuite extends FunSuite {
   testDictionaryEncoding(new IntColumnStats,    INT)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/IntegralDeltaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/IntegralDeltaSuite.scala
index 73f31c0233343..88011631ee4e3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/IntegralDeltaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/IntegralDeltaSuite.scala
@@ -20,9 +20,9 @@ package org.apache.spark.sql.columnar.compression
 import org.scalatest.FunSuite
 
 import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
-import org.apache.spark.sql.catalyst.types.IntegralType
 import org.apache.spark.sql.columnar._
 import org.apache.spark.sql.columnar.ColumnarTestUtils._
+import org.apache.spark.sql.types.IntegralType
 
 class IntegralDeltaSuite extends FunSuite {
   testIntegralDelta(new IntColumnStats,  INT,  IntDelta)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/RunLengthEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/RunLengthEncodingSuite.scala
index 4ce2552112c92..08df1db375097 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/RunLengthEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/RunLengthEncodingSuite.scala
@@ -20,9 +20,9 @@ package org.apache.spark.sql.columnar.compression
 import org.scalatest.FunSuite
 
 import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
-import org.apache.spark.sql.catalyst.types.NativeType
 import org.apache.spark.sql.columnar._
 import org.apache.spark.sql.columnar.ColumnarTestUtils._
+import org.apache.spark.sql.types.NativeType
 
 class RunLengthEncodingSuite extends FunSuite {
   testRunLengthEncoding(new NoopColumnStats, BOOLEAN)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/TestCompressibleColumnBuilder.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/TestCompressibleColumnBuilder.scala
index 7db723d648d80..0b18b4119268f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/TestCompressibleColumnBuilder.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/TestCompressibleColumnBuilder.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.sql.columnar.compression
 
-import org.apache.spark.sql.catalyst.types.NativeType
 import org.apache.spark.sql.columnar._
+import org.apache.spark.sql.types.NativeType
 
 class TestCompressibleColumnBuilder[T <: NativeType](
     override val columnStats: ColumnStats,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index f14ffca0e4d35..df108a9d262bb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -20,12 +20,15 @@ package org.apache.spark.sql.execution
 import org.scalatest.FunSuite
 
 import org.apache.spark.sql.{SQLConf, execution}
+import org.apache.spark.sql.Dsl._
 import org.apache.spark.sql.TestData._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoin, ShuffledHashJoin}
 import org.apache.spark.sql.test.TestSQLContext._
 import org.apache.spark.sql.test.TestSQLContext.planner._
+import org.apache.spark.sql.types._
+
 
 class PlannerSuite extends FunSuite {
   test("unions are collapsed") {
@@ -39,7 +42,7 @@ class PlannerSuite extends FunSuite {
   }
 
   test("count is partially aggregated") {
-    val query = testData.groupBy('value)(Count('key)).queryExecution.analyzed
+    val query = testData.groupBy('value).agg(count('key)).queryExecution.analyzed
     val planned = HashAggregation(query).head
     val aggregations = planned.collect { case n if n.nodeName contains "Aggregate" => n }
 
@@ -47,26 +50,89 @@ class PlannerSuite extends FunSuite {
   }
 
   test("count distinct is partially aggregated") {
-    val query = testData.groupBy('value)(CountDistinct('key :: Nil)).queryExecution.analyzed
+    val query = testData.groupBy('value).agg(countDistinct('key)).queryExecution.analyzed
     val planned = HashAggregation(query)
     assert(planned.nonEmpty)
   }
 
   test("mixed aggregates are partially aggregated") {
     val query =
-      testData.groupBy('value)(Count('value), CountDistinct('key :: Nil)).queryExecution.analyzed
+      testData.groupBy('value).agg(count('value), countDistinct('key)).queryExecution.analyzed
     val planned = HashAggregation(query)
     assert(planned.nonEmpty)
   }
 
   test("sizeInBytes estimation of limit operator for broadcast hash join optimization") {
-    val origThreshold = autoBroadcastJoinThreshold
+    def checkPlan(fieldTypes: Seq[DataType], newThreshold: Int): Unit = {
+      setConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD, newThreshold.toString)
+      val fields = fieldTypes.zipWithIndex.map {
+        case (dataType, index) => StructField(s"c${index}", dataType, true)
+      } :+ StructField("key", IntegerType, true)
+      val schema = StructType(fields)
+      val row = Row.fromSeq(Seq.fill(fields.size)(null))
+      val rowRDD = org.apache.spark.sql.test.TestSQLContext.sparkContext.parallelize(row :: Nil)
+      applySchema(rowRDD, schema).registerTempTable("testLimit")
+
+      val planned = sql(
+        """
+          |SELECT l.a, l.b
+          |FROM testData2 l JOIN (SELECT * FROM testLimit LIMIT 1) r ON (l.a = r.key)
+        """.stripMargin).queryExecution.executedPlan
+
+      val broadcastHashJoins = planned.collect { case join: BroadcastHashJoin => join }
+      val shuffledHashJoins = planned.collect { case join: ShuffledHashJoin => join }
+
+      assert(broadcastHashJoins.size === 1, "Should use broadcast hash join")
+      assert(shuffledHashJoins.isEmpty, "Should not use shuffled hash join")
+
+      dropTempTable("testLimit")
+    }
+
+    val origThreshold = conf.autoBroadcastJoinThreshold
+
+    val simpleTypes =
+      NullType ::
+      BooleanType ::
+      ByteType ::
+      ShortType ::
+      IntegerType ::
+      LongType ::
+      FloatType ::
+      DoubleType ::
+      DecimalType(10, 5) ::
+      DecimalType.Unlimited ::
+      DateType ::
+      TimestampType ::
+      StringType ::
+      BinaryType :: Nil
+
+    checkPlan(simpleTypes, newThreshold = 16434)
+
+    val complexTypes =
+      ArrayType(DoubleType, true) ::
+      ArrayType(StringType, false) ::
+      MapType(IntegerType, StringType, true) ::
+      MapType(IntegerType, ArrayType(DoubleType), false) ::
+      StructType(Seq(
+        StructField("a", IntegerType, nullable = true),
+        StructField("b", ArrayType(DoubleType), nullable = false),
+        StructField("c", DoubleType, nullable = false))) :: Nil
+
+    checkPlan(complexTypes, newThreshold = 901617)
+
+    setConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD, origThreshold.toString)
+  }
+
+  test("InMemoryRelation statistics propagation") {
+    val origThreshold = conf.autoBroadcastJoinThreshold
     setConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD, 81920.toString)
 
-    // Using a threshold that is definitely larger than the small testing table (b) below
-    val a = testData.as('a)
-    val b = testData.limit(3).as('b)
-    val planned = a.join(b, Inner, Some("a.key".attr === "b.key".attr)).queryExecution.executedPlan
+    testData.limit(3).registerTempTable("tiny")
+    sql("CACHE TABLE tiny")
+
+    val a = testData.as("a")
+    val b = table("tiny").as("b")
+    val planned = a.join(b, $"a.key" === $"b.key").queryExecution.executedPlan
 
     val broadcastHashJoins = planned.collect { case join: BroadcastHashJoin => join }
     val shuffledHashJoins = planned.collect { case join: ShuffledHashJoin => join }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/TgfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/TgfSuite.scala
deleted file mode 100644
index 2cab5e0c44d92..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/TgfSuite.scala
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution
-
-import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans._
-
-/* Implicit conversions */
-import org.apache.spark.sql.test.TestSQLContext._
-
-/**
- * This is an example TGF that uses UnresolvedAttributes 'name and 'age to access specific columns
- * from the input data.  These will be replaced during analysis with specific AttributeReferences
- * and then bound to specific ordinals during query planning. While TGFs could also access specific
- * columns using hand-coded ordinals, doing so violates data independence.
- *
- * Note: this is only a rough example of how TGFs can be expressed, the final version will likely
- * involve a lot more sugar for cleaner use in Scala/Java/etc.
- */
-case class ExampleTGF(input: Seq[Expression] = Seq('name, 'age)) extends Generator {
-  def children = input
-  protected def makeOutput() = 'nameAndAge.string :: Nil
-
-  val Seq(nameAttr, ageAttr) = input
-
-  override def eval(input: Row): TraversableOnce[Row] = {
-    val name = nameAttr.eval(input)
-    val age = ageAttr.eval(input).asInstanceOf[Int]
-
-    Iterator(
-      new GenericRow(Array[Any](s"$name is $age years old")),
-      new GenericRow(Array[Any](s"Next year, $name will be ${age + 1} years old")))
-  }
-}
-
-class TgfSuite extends QueryTest {
-  val inputData =
-    logical.LocalRelation('name.string, 'age.int).loadData(
-      ("michael", 29) :: Nil
-    )
-
-  test("simple tgf example") {
-    checkAnswer(
-      inputData.generate(ExampleTGF()),
-      Seq(
-        "michael is 29 years old" :: Nil,
-        "Next year, michael will be 30 years old" :: Nil))
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
index 87c28c334d228..4e9472c60249e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
@@ -23,11 +23,11 @@ import org.apache.spark.sql.TestData._
 import org.apache.spark.sql.test.TestSQLContext._
 
 class DebuggingSuite extends FunSuite {
-  test("SchemaRDD.debug()") {
+  test("DataFrame.debug()") {
     testData.debug()
   }
 
-  test("SchemaRDD.typeCheck()") {
+  test("DataFrame.typeCheck()") {
     testData.typeCheck()
   }
 }
\ No newline at end of file
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/DockerHacks.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/DockerHacks.scala
new file mode 100644
index 0000000000000..f332cb389f339
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/DockerHacks.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import scala.collection.mutable.MutableList
+
+import com.spotify.docker.client._
+
+/**
+ * A factory and morgue for DockerClient objects.  In the DockerClient we use,
+ * calling close() closes the desired DockerClient but also renders all other
+ * DockerClients inoperable.  This is inconvenient if we have more than one
+ * open, such as during tests.
+ */
+object DockerClientFactory {
+  var numClients: Int = 0
+  val zombies = new MutableList[DockerClient]()
+
+  def get(): DockerClient = {
+    this.synchronized {
+      numClients = numClients + 1
+      DefaultDockerClient.fromEnv.build()
+    }
+  }
+
+  def close(dc: DockerClient) {
+    this.synchronized {
+      numClients = numClients - 1
+      zombies += dc
+      if (numClients == 0) {
+        zombies.foreach(_.close())
+        zombies.clear()
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
new file mode 100644
index 0000000000000..d25c1390db15c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import java.math.BigDecimal
+import org.apache.spark.sql.test._
+import org.scalatest.{FunSuite, BeforeAndAfter}
+import java.sql.DriverManager
+import TestSQLContext._
+
+class JDBCSuite extends FunSuite with BeforeAndAfter {
+  val url = "jdbc:h2:mem:testdb0"
+  var conn: java.sql.Connection = null
+
+  val testBytes = Array[Byte](99.toByte, 134.toByte, 135.toByte, 200.toByte, 205.toByte)
+
+  before {
+    Class.forName("org.h2.Driver")
+    conn = DriverManager.getConnection(url)
+    conn.prepareStatement("create schema test").executeUpdate()
+    conn.prepareStatement("create table test.people (name TEXT(32) NOT NULL, theid INTEGER NOT NULL)").executeUpdate()
+    conn.prepareStatement("insert into test.people values ('fred', 1)").executeUpdate()
+    conn.prepareStatement("insert into test.people values ('mary', 2)").executeUpdate()
+    conn.prepareStatement("insert into test.people values ('joe', 3)").executeUpdate()
+    conn.commit()
+
+    sql(
+      s"""
+        |CREATE TEMPORARY TABLE foobar
+        |USING org.apache.spark.sql.jdbc
+        |OPTIONS (url '$url', dbtable 'TEST.PEOPLE')
+      """.stripMargin.replaceAll("\n", " "))
+
+    sql(
+      s"""
+        |CREATE TEMPORARY TABLE parts
+        |USING org.apache.spark.sql.jdbc
+        |OPTIONS (url '$url', dbtable 'TEST.PEOPLE',
+        |partitionColumn 'THEID', lowerBound '1', upperBound '4', numPartitions '3')
+      """.stripMargin.replaceAll("\n", " "))
+
+    conn.prepareStatement("create table test.inttypes (a INT, b BOOLEAN, c TINYINT, "
+      + "d SMALLINT, e BIGINT)").executeUpdate()
+    conn.prepareStatement("insert into test.inttypes values (1, false, 3, 4, 1234567890123)"
+        ).executeUpdate()
+    conn.prepareStatement("insert into test.inttypes values (null, null, null, null, null)"
+        ).executeUpdate()
+    conn.commit()
+    sql(
+      s"""
+        |CREATE TEMPORARY TABLE inttypes
+        |USING org.apache.spark.sql.jdbc
+        |OPTIONS (url '$url', dbtable 'TEST.INTTYPES')
+      """.stripMargin.replaceAll("\n", " "))
+
+    conn.prepareStatement("create table test.strtypes (a BINARY(20), b VARCHAR(20), "
+      + "c VARCHAR_IGNORECASE(20), d CHAR(20), e BLOB, f CLOB)").executeUpdate()
+    var stmt = conn.prepareStatement("insert into test.strtypes values (?, ?, ?, ?, ?, ?)")
+    stmt.setBytes(1, testBytes)
+    stmt.setString(2, "Sensitive")
+    stmt.setString(3, "Insensitive")
+    stmt.setString(4, "Twenty-byte CHAR")
+    stmt.setBytes(5, testBytes)
+    stmt.setString(6, "I am a clob!")
+    stmt.executeUpdate()
+    sql(
+      s"""
+        |CREATE TEMPORARY TABLE strtypes
+        |USING org.apache.spark.sql.jdbc
+        |OPTIONS (url '$url', dbtable 'TEST.STRTYPES')
+      """.stripMargin.replaceAll("\n", " "))
+
+    conn.prepareStatement("create table test.timetypes (a TIME, b DATE, c TIMESTAMP)"
+        ).executeUpdate()
+    conn.prepareStatement("insert into test.timetypes values ('12:34:56', "
+      + "'1996-01-01', '2002-02-20 11:22:33.543543543')").executeUpdate()
+    conn.commit()
+    sql(
+      s"""
+        |CREATE TEMPORARY TABLE timetypes
+        |USING org.apache.spark.sql.jdbc
+        |OPTIONS (url '$url', dbtable 'TEST.TIMETYPES')
+      """.stripMargin.replaceAll("\n", " "))
+
+
+    conn.prepareStatement("create table test.flttypes (a DOUBLE, b REAL, c DECIMAL(40, 20))"
+        ).executeUpdate()
+    conn.prepareStatement("insert into test.flttypes values ("
+      + "1.0000000000000002220446049250313080847263336181640625, "
+      + "1.00000011920928955078125, "
+      + "123456789012345.543215432154321)").executeUpdate()
+    conn.commit()
+    sql(
+      s"""
+        |CREATE TEMPORARY TABLE flttypes
+        |USING org.apache.spark.sql.jdbc
+        |OPTIONS (url '$url', dbtable 'TEST.FLTTYPES')
+      """.stripMargin.replaceAll("\n", " "))
+
+    // Untested: IDENTITY, OTHER, UUID, ARRAY, and GEOMETRY types.
+  }
+
+  after {
+    conn.close()
+  }
+
+  test("SELECT *") {
+    assert(sql("SELECT * FROM foobar").collect().size == 3)
+  }
+
+  test("SELECT * WHERE (simple predicates)") {
+    assert(sql("SELECT * FROM foobar WHERE THEID < 1").collect().size == 0)
+    assert(sql("SELECT * FROM foobar WHERE THEID != 2").collect().size == 2)
+    assert(sql("SELECT * FROM foobar WHERE THEID = 1").collect().size == 1)
+  }
+
+  test("SELECT first field") {
+    val names = sql("SELECT NAME FROM foobar").collect().map(x => x.getString(0)).sortWith(_ < _)
+    assert(names.size == 3)
+    assert(names(0).equals("fred"))
+    assert(names(1).equals("joe"))
+    assert(names(2).equals("mary"))
+  }
+
+  test("SELECT second field") {
+    val ids = sql("SELECT THEID FROM foobar").collect().map(x => x.getInt(0)).sortWith(_ < _)
+    assert(ids.size == 3)
+    assert(ids(0) == 1)
+    assert(ids(1) == 2)
+    assert(ids(2) == 3)
+  }
+
+  test("SELECT * partitioned") {
+    assert(sql("SELECT * FROM parts").collect().size == 3)
+  }
+
+  test("SELECT WHERE (simple predicates) partitioned") {
+    assert(sql("SELECT * FROM parts WHERE THEID < 1").collect().size == 0)
+    assert(sql("SELECT * FROM parts WHERE THEID != 2").collect().size == 2)
+    assert(sql("SELECT THEID FROM parts WHERE THEID = 1").collect().size == 1)
+  }
+
+  test("SELECT second field partitioned") {
+    val ids = sql("SELECT THEID FROM parts").collect().map(x => x.getInt(0)).sortWith(_ < _)
+    assert(ids.size == 3)
+    assert(ids(0) == 1)
+    assert(ids(1) == 2)
+    assert(ids(2) == 3)
+  }
+
+  test("Basic API") {
+    assert(TestSQLContext.jdbcRDD(url, "TEST.PEOPLE").collect.size == 3)
+  }
+
+  test("Partitioning via JDBCPartitioningInfo API") {
+    val parts = JDBCPartitioningInfo("THEID", 0, 4, 3)
+    assert(TestSQLContext.jdbcRDD(url, "TEST.PEOPLE", parts).collect.size == 3)
+  }
+
+  test("Partitioning via list-of-where-clauses API") {
+    val parts = Array[String]("THEID < 2", "THEID >= 2")
+    assert(TestSQLContext.jdbcRDD(url, "TEST.PEOPLE", parts).collect.size == 3)
+  }
+
+  test("H2 integral types") {
+    val rows = sql("SELECT * FROM inttypes WHERE A IS NOT NULL").collect()
+    assert(rows.size == 1)
+    assert(rows(0).getInt(0) == 1)
+    assert(rows(0).getBoolean(1) == false)
+    assert(rows(0).getInt(2) == 3)
+    assert(rows(0).getInt(3) == 4)
+    assert(rows(0).getLong(4) == 1234567890123L)
+  }
+
+  test("H2 null entries") {
+    val rows = sql("SELECT * FROM inttypes WHERE A IS NULL").collect()
+    assert(rows.size == 1)
+    assert(rows(0).isNullAt(0))
+    assert(rows(0).isNullAt(1))
+    assert(rows(0).isNullAt(2))
+    assert(rows(0).isNullAt(3))
+    assert(rows(0).isNullAt(4))
+  }
+
+  test("H2 string types") {
+    val rows = sql("SELECT * FROM strtypes").collect()
+    assert(rows(0).getAs[Array[Byte]](0).sameElements(testBytes))
+    assert(rows(0).getString(1).equals("Sensitive"))
+    assert(rows(0).getString(2).equals("Insensitive"))
+    assert(rows(0).getString(3).equals("Twenty-byte CHAR"))
+    assert(rows(0).getAs[Array[Byte]](4).sameElements(testBytes))
+    assert(rows(0).getString(5).equals("I am a clob!"))
+  }
+
+  test("H2 time types") {
+    val rows = sql("SELECT * FROM timetypes").collect()
+    assert(rows(0).getAs[java.sql.Timestamp](0).getHours == 12)
+    assert(rows(0).getAs[java.sql.Timestamp](0).getMinutes == 34)
+    assert(rows(0).getAs[java.sql.Timestamp](0).getSeconds == 56)
+    assert(rows(0).getAs[java.sql.Date](1).getYear == 96)
+    assert(rows(0).getAs[java.sql.Date](1).getMonth == 0)
+    assert(rows(0).getAs[java.sql.Date](1).getDate == 1)
+    assert(rows(0).getAs[java.sql.Timestamp](2).getYear == 102)
+    assert(rows(0).getAs[java.sql.Timestamp](2).getMonth == 1)
+    assert(rows(0).getAs[java.sql.Timestamp](2).getDate == 20)
+    assert(rows(0).getAs[java.sql.Timestamp](2).getHours == 11)
+    assert(rows(0).getAs[java.sql.Timestamp](2).getMinutes == 22)
+    assert(rows(0).getAs[java.sql.Timestamp](2).getSeconds == 33)
+    assert(rows(0).getAs[java.sql.Timestamp](2).getNanos == 543543543)
+  }
+
+  test("H2 floating-point types") {
+    val rows = sql("SELECT * FROM flttypes").collect()
+    assert(rows(0).getDouble(0) == 1.00000000000000022) // Yes, I meant ==.
+    assert(rows(0).getDouble(1) == 1.00000011920928955) // Yes, I meant ==.
+    assert(rows(0).getAs[BigDecimal](2)
+        .equals(new BigDecimal("123456789012345.54321543215432100000")))
+  }
+
+
+  test("SQL query as table name") {
+    sql(
+      s"""
+        |CREATE TEMPORARY TABLE hack
+        |USING org.apache.spark.sql.jdbc
+        |OPTIONS (url '$url', dbtable '(SELECT B, B*B FROM TEST.FLTTYPES)')
+      """.stripMargin.replaceAll("\n", " "))
+    val rows = sql("SELECT * FROM hack").collect()
+    assert(rows(0).getDouble(0) == 1.00000011920928955) // Yes, I meant ==.
+    // For some reason, H2 computes this square incorrectly...
+    assert(math.abs(rows(0).getDouble(1) - 1.00000023841859331) < 1e-12)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
new file mode 100644
index 0000000000000..e581ac9b50c2b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import java.math.BigDecimal
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.test._
+import org.scalatest.{FunSuite, BeforeAndAfter}
+import java.sql.DriverManager
+import TestSQLContext._
+
+class JDBCWriteSuite extends FunSuite with BeforeAndAfter {
+  val url = "jdbc:h2:mem:testdb2"
+  var conn: java.sql.Connection = null
+
+  before {
+    Class.forName("org.h2.Driver")
+    conn = DriverManager.getConnection(url)
+    conn.prepareStatement("create schema test").executeUpdate()
+  }
+
+  after {
+    conn.close()
+  }
+
+  val sc = TestSQLContext.sparkContext
+
+  val arr2x2 = Array[Row](Row.apply("dave", 42), Row.apply("mary", 222))
+  val arr1x2 = Array[Row](Row.apply("fred", 3))
+  val schema2 = StructType(
+      StructField("name", StringType) ::
+      StructField("id", IntegerType) :: Nil)
+
+  val arr2x3 = Array[Row](Row.apply("dave", 42, 1), Row.apply("mary", 222, 2))
+  val schema3 = StructType(
+      StructField("name", StringType) ::
+      StructField("id", IntegerType) ::
+      StructField("seq", IntegerType) :: Nil)
+
+  test("Basic CREATE") {
+    val srdd = TestSQLContext.applySchema(sc.parallelize(arr2x2), schema2)
+
+    srdd.createJDBCTable(url, "TEST.BASICCREATETEST", false)
+    assert(2 == TestSQLContext.jdbcRDD(url, "TEST.BASICCREATETEST").count)
+    assert(2 == TestSQLContext.jdbcRDD(url, "TEST.BASICCREATETEST").collect()(0).length)
+  }
+
+  test("CREATE with overwrite") {
+    val srdd = TestSQLContext.applySchema(sc.parallelize(arr2x3), schema3)
+    val srdd2 = TestSQLContext.applySchema(sc.parallelize(arr1x2), schema2)
+
+    srdd.createJDBCTable(url, "TEST.DROPTEST", false)
+    assert(2 == TestSQLContext.jdbcRDD(url, "TEST.DROPTEST").count)
+    assert(3 == TestSQLContext.jdbcRDD(url, "TEST.DROPTEST").collect()(0).length)
+
+    srdd2.createJDBCTable(url, "TEST.DROPTEST", true)
+    assert(1 == TestSQLContext.jdbcRDD(url, "TEST.DROPTEST").count)
+    assert(2 == TestSQLContext.jdbcRDD(url, "TEST.DROPTEST").collect()(0).length)
+  }
+
+  test("CREATE then INSERT to append") {
+    val srdd = TestSQLContext.applySchema(sc.parallelize(arr2x2), schema2)
+    val srdd2 = TestSQLContext.applySchema(sc.parallelize(arr1x2), schema2)
+
+    srdd.createJDBCTable(url, "TEST.APPENDTEST", false)
+    srdd2.insertIntoJDBC(url, "TEST.APPENDTEST", false)
+    assert(3 == TestSQLContext.jdbcRDD(url, "TEST.APPENDTEST").count)
+    assert(2 == TestSQLContext.jdbcRDD(url, "TEST.APPENDTEST").collect()(0).length)
+  }
+
+  test("CREATE then INSERT to truncate") {
+    val srdd = TestSQLContext.applySchema(sc.parallelize(arr2x2), schema2)
+    val srdd2 = TestSQLContext.applySchema(sc.parallelize(arr1x2), schema2)
+
+    srdd.createJDBCTable(url, "TEST.TRUNCATETEST", false)
+    srdd2.insertIntoJDBC(url, "TEST.TRUNCATETEST", true)
+    assert(1 == TestSQLContext.jdbcRDD(url, "TEST.TRUNCATETEST").count)
+    assert(2 == TestSQLContext.jdbcRDD(url, "TEST.TRUNCATETEST").collect()(0).length)
+  }
+
+  test("Incompatible INSERT to append") {
+    val srdd = TestSQLContext.applySchema(sc.parallelize(arr2x2), schema2)
+    val srdd2 = TestSQLContext.applySchema(sc.parallelize(arr2x3), schema3)
+
+    srdd.createJDBCTable(url, "TEST.INCOMPATIBLETEST", false)
+    intercept[org.apache.spark.SparkException] {
+      srdd2.insertIntoJDBC(url, "TEST.INCOMPATIBLETEST", true)
+    }
+  }
+
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegration.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegration.scala
new file mode 100644
index 0000000000000..89920f2650c3a
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegration.scala
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import java.math.BigDecimal
+import java.sql.{Date, DriverManager, Timestamp}
+import com.spotify.docker.client.{DefaultDockerClient, DockerClient}
+import com.spotify.docker.client.messages.ContainerConfig
+import org.scalatest.{FunSuite, BeforeAndAfterAll, Ignore}
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.SparkContext._
+import org.apache.spark.sql._
+import org.apache.spark.sql.test._
+import TestSQLContext._
+
+import org.apache.spark.sql.jdbc._
+
+class MySQLDatabase {
+  val docker: DockerClient = DockerClientFactory.get()
+  val containerId = {
+    println("Pulling mysql")
+    docker.pull("mysql")
+    println("Configuring container")
+    val config = (ContainerConfig.builder().image("mysql")
+        .env("MYSQL_ROOT_PASSWORD=rootpass")
+        .build())
+    println("Creating container")
+    val id = docker.createContainer(config).id
+    println("Starting container " + id)
+    docker.startContainer(id)
+    id
+  }
+  val ip = docker.inspectContainer(containerId).networkSettings.ipAddress
+
+  def close() {
+    try {
+      println("Killing container " + containerId)
+      docker.killContainer(containerId)
+      println("Removing container " + containerId)
+      docker.removeContainer(containerId)
+      println("Closing docker client")
+      DockerClientFactory.close(docker)
+    } catch {
+      case e: Exception => {
+        println(e)
+        println("You may need to clean this up manually.")
+        throw e
+      }
+    }
+  }
+}
+
+@Ignore class MySQLIntegration extends FunSuite with BeforeAndAfterAll {
+  var ip: String = null
+
+  def url(ip: String): String = url(ip, "mysql")
+  def url(ip: String, db: String): String = s"jdbc:mysql://$ip:3306/$db?user=root&password=rootpass"
+
+  def waitForDatabase(ip: String, maxMillis: Long) {
+    println("Waiting for database to start up.")
+    val before = System.currentTimeMillis()
+    var lastException: java.sql.SQLException = null
+    while (true) {
+      if (System.currentTimeMillis() > before + maxMillis) {
+        throw new java.sql.SQLException(s"Database not up after $maxMillis ms.", lastException)
+      }
+      try {
+        val conn = java.sql.DriverManager.getConnection(url(ip))
+        conn.close()
+        println("Database is up.")
+        return;
+      } catch {
+        case e: java.sql.SQLException => {
+          lastException = e
+          java.lang.Thread.sleep(250)
+        }
+      }
+    }
+  }
+
+  def setupDatabase(ip: String) {
+    val conn = java.sql.DriverManager.getConnection(url(ip))
+    try {
+      conn.prepareStatement("CREATE DATABASE foo").executeUpdate()
+      conn.prepareStatement("CREATE TABLE foo.tbl (x INTEGER, y TEXT(8))").executeUpdate()
+      conn.prepareStatement("INSERT INTO foo.tbl VALUES (42,'fred')").executeUpdate()
+      conn.prepareStatement("INSERT INTO foo.tbl VALUES (17,'dave')").executeUpdate()
+
+      conn.prepareStatement("CREATE TABLE foo.numbers (onebit BIT(1), tenbits BIT(10), "
+          + "small SMALLINT, med MEDIUMINT, nor INT, big BIGINT, deci DECIMAL(40,20), flt FLOAT, "
+          + "dbl DOUBLE)").executeUpdate()
+      conn.prepareStatement("INSERT INTO foo.numbers VALUES (b'0', b'1000100101', "
+          + "17, 77777, 123456789, 123456789012345, 123456789012345.123456789012345, "
+          + "42.75, 1.0000000000000002)").executeUpdate()
+
+      conn.prepareStatement("CREATE TABLE foo.dates (d DATE, t TIME, dt DATETIME, ts TIMESTAMP, "
+          + "yr YEAR)").executeUpdate()
+      conn.prepareStatement("INSERT INTO foo.dates VALUES ('1991-11-09', '13:31:24', "
+          + "'1996-01-01 01:23:45', '2009-02-13 23:31:30', '2001')").executeUpdate()
+
+      // TODO: Test locale conversion for strings.
+      conn.prepareStatement("CREATE TABLE foo.strings (a CHAR(10), b VARCHAR(10), c TINYTEXT, "
+          + "d TEXT, e MEDIUMTEXT, f LONGTEXT, g BINARY(4), h VARBINARY(10), i BLOB)"
+          ).executeUpdate()
+      conn.prepareStatement("INSERT INTO foo.strings VALUES ('the', 'quick', 'brown', 'fox', 'jumps', 'over', 'the', 'lazy', 'dog')").executeUpdate()
+    } finally {
+      conn.close()
+    }
+  }
+
+  var db: MySQLDatabase = null
+
+  override def beforeAll() {
+    // If you load the MySQL driver here, DriverManager will deadlock.  The
+    // MySQL driver gets loaded when its jar gets loaded, unlike the Postgres
+    // and H2 drivers.
+    //Class.forName("com.mysql.jdbc.Driver")
+
+    db = new MySQLDatabase()
+    waitForDatabase(db.ip, 60000)
+    setupDatabase(db.ip)
+    ip = db.ip
+  }
+
+  override def afterAll() {
+    db.close()
+  }
+
+  test("Basic test") {
+    val rdd = TestSQLContext.jdbcRDD(url(ip, "foo"), "tbl")
+    val rows = rdd.collect
+    assert(rows.length == 2)
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types.length == 2)
+    assert(types(0).equals("class java.lang.Integer"))
+    assert(types(1).equals("class java.lang.String"))
+  }
+
+  test("Numeric types") {
+    val rdd = TestSQLContext.jdbcRDD(url(ip, "foo"), "numbers")
+    val rows = rdd.collect
+    assert(rows.length == 1)
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types.length == 9)
+    println(types(1))
+    assert(types(0).equals("class java.lang.Boolean"))
+    assert(types(1).equals("class java.lang.Long"))
+    assert(types(2).equals("class java.lang.Integer"))
+    assert(types(3).equals("class java.lang.Integer"))
+    assert(types(4).equals("class java.lang.Integer"))
+    assert(types(5).equals("class java.lang.Long"))
+    assert(types(6).equals("class java.math.BigDecimal"))
+    assert(types(7).equals("class java.lang.Double"))
+    assert(types(8).equals("class java.lang.Double"))
+    assert(rows(0).getBoolean(0) == false)
+    assert(rows(0).getLong(1) == 0x225)
+    assert(rows(0).getInt(2) == 17)
+    assert(rows(0).getInt(3) == 77777)
+    assert(rows(0).getInt(4) == 123456789)
+    assert(rows(0).getLong(5) == 123456789012345L)
+    val bd = new BigDecimal("123456789012345.12345678901234500000")
+    assert(rows(0).getAs[BigDecimal](6).equals(bd))
+    assert(rows(0).getDouble(7) == 42.75)
+    assert(rows(0).getDouble(8) == 1.0000000000000002)
+  }
+
+  test("Date types") {
+    val rdd = TestSQLContext.jdbcRDD(url(ip, "foo"), "dates")
+    val rows = rdd.collect
+    assert(rows.length == 1)
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types.length == 5)
+    assert(types(0).equals("class java.sql.Date"))
+    assert(types(1).equals("class java.sql.Timestamp"))
+    assert(types(2).equals("class java.sql.Timestamp"))
+    assert(types(3).equals("class java.sql.Timestamp"))
+    assert(types(4).equals("class java.sql.Date"))
+    assert(rows(0).getAs[Date](0).equals(new Date(91, 10, 9)))
+    assert(rows(0).getAs[Timestamp](1).equals(new Timestamp(70, 0, 1, 13, 31, 24, 0)))
+    assert(rows(0).getAs[Timestamp](2).equals(new Timestamp(96, 0, 1, 1, 23, 45, 0)))
+    assert(rows(0).getAs[Timestamp](3).equals(new Timestamp(109, 1, 13, 23, 31, 30, 0)))
+    assert(rows(0).getAs[Date](4).equals(new Date(101, 0, 1)))
+  }
+
+  test("String types") {
+    val rdd = TestSQLContext.jdbcRDD(url(ip, "foo"), "strings")
+    val rows = rdd.collect
+    assert(rows.length == 1)
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types.length == 9)
+    assert(types(0).equals("class java.lang.String"))
+    assert(types(1).equals("class java.lang.String"))
+    assert(types(2).equals("class java.lang.String"))
+    assert(types(3).equals("class java.lang.String"))
+    assert(types(4).equals("class java.lang.String"))
+    assert(types(5).equals("class java.lang.String"))
+    assert(types(6).equals("class [B"))
+    assert(types(7).equals("class [B"))
+    assert(types(8).equals("class [B"))
+    assert(rows(0).getString(0).equals("the"))
+    assert(rows(0).getString(1).equals("quick"))
+    assert(rows(0).getString(2).equals("brown"))
+    assert(rows(0).getString(3).equals("fox"))
+    assert(rows(0).getString(4).equals("jumps"))
+    assert(rows(0).getString(5).equals("over"))
+    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](6), Array[Byte](116, 104, 101, 0)))
+    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](7), Array[Byte](108, 97, 122, 121)))
+    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](8), Array[Byte](100, 111, 103)))
+  }
+
+  test("Basic write test") {
+    val rdd1 = TestSQLContext.jdbcRDD(url(ip, "foo"), "numbers")
+    val rdd2 = TestSQLContext.jdbcRDD(url(ip, "foo"), "dates")
+    val rdd3 = TestSQLContext.jdbcRDD(url(ip, "foo"), "strings")
+    rdd1.createJDBCTable(url(ip, "foo"), "numberscopy", false)
+    rdd2.createJDBCTable(url(ip, "foo"), "datescopy", false)
+    rdd3.createJDBCTable(url(ip, "foo"), "stringscopy", false)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegration.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegration.scala
new file mode 100644
index 0000000000000..c174d7adb7204
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegration.scala
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import java.math.BigDecimal
+import org.apache.spark.sql.test._
+import org.scalatest.{FunSuite, BeforeAndAfterAll, Ignore}
+import java.sql.DriverManager
+import TestSQLContext._
+import com.spotify.docker.client.{DefaultDockerClient, DockerClient}
+import com.spotify.docker.client.messages.ContainerConfig
+
+class PostgresDatabase {
+  val docker: DockerClient = DockerClientFactory.get()
+  val containerId = {
+    println("Pulling postgres")
+    docker.pull("postgres")
+    println("Configuring container")
+    val config = (ContainerConfig.builder().image("postgres")
+        .env("POSTGRES_PASSWORD=rootpass")
+        .build())
+    println("Creating container")
+    val id = docker.createContainer(config).id
+    println("Starting container " + id)
+    docker.startContainer(id)
+    id
+  }
+  val ip = docker.inspectContainer(containerId).networkSettings.ipAddress
+
+  def close() {
+    try {
+      println("Killing container " + containerId)
+      docker.killContainer(containerId)
+      println("Removing container " + containerId)
+      docker.removeContainer(containerId)
+      println("Closing docker client")
+      DockerClientFactory.close(docker)
+    } catch {
+      case e: Exception => {
+        println(e)
+        println("You may need to clean this up manually.")
+        throw e
+      }
+    }
+  }
+}
+
+@Ignore class PostgresIntegration extends FunSuite with BeforeAndAfterAll {
+  lazy val db = new PostgresDatabase()
+
+  def url(ip: String) = s"jdbc:postgresql://$ip:5432/postgres?user=postgres&password=rootpass"
+
+  def waitForDatabase(ip: String, maxMillis: Long) {
+    val before = System.currentTimeMillis()
+    var lastException: java.sql.SQLException = null
+    while (true) {
+      if (System.currentTimeMillis() > before + maxMillis) {
+        throw new java.sql.SQLException(s"Database not up after $maxMillis ms.",
+ lastException)
+      }
+      try {
+        val conn = java.sql.DriverManager.getConnection(url(ip))
+        conn.close()
+        println("Database is up.")
+        return;
+      } catch {
+        case e: java.sql.SQLException => {
+          lastException = e
+          java.lang.Thread.sleep(250)
+        }
+      }
+    }
+  }
+
+  def setupDatabase(ip: String) {
+    val conn = DriverManager.getConnection(url(ip))
+    try {
+      conn.prepareStatement("CREATE DATABASE foo").executeUpdate()
+      conn.setCatalog("foo")
+      conn.prepareStatement("CREATE TABLE bar (a text, b integer, c double precision, d bigint, "
+          + "e bit(1), f bit(10), g bytea, h boolean, i inet, j cidr)").executeUpdate()
+      conn.prepareStatement("INSERT INTO bar VALUES ('hello', 42, 1.25, 123456789012345, B'0', "
+          + "B'1000100101', E'\\\\xDEADBEEF', true, '172.16.0.42', '192.168.0.0/16')").executeUpdate()
+    } finally {
+      conn.close()
+    }
+  }
+
+  override def beforeAll() {
+    println("Waiting for database to start up.")
+    waitForDatabase(db.ip, 60000)
+    println("Setting up database.")
+    setupDatabase(db.ip)
+  }
+
+  override def afterAll() {
+    db.close()
+  }
+
+  test("Type mapping for various types") {
+    val rdd = TestSQLContext.jdbcRDD(url(db.ip), "public.bar")
+    val rows = rdd.collect
+    assert(rows.length == 1)
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types.length == 10)
+    assert(types(0).equals("class java.lang.String"))
+    assert(types(1).equals("class java.lang.Integer"))
+    assert(types(2).equals("class java.lang.Double"))
+    assert(types(3).equals("class java.lang.Long"))
+    assert(types(4).equals("class java.lang.Boolean"))
+    assert(types(5).equals("class [B"))
+    assert(types(6).equals("class [B"))
+    assert(types(7).equals("class java.lang.Boolean"))
+    assert(types(8).equals("class java.lang.String"))
+    assert(types(9).equals("class java.lang.String"))
+    assert(rows(0).getString(0).equals("hello"))
+    assert(rows(0).getInt(1) == 42)
+    assert(rows(0).getDouble(2) == 1.25)
+    assert(rows(0).getLong(3) == 123456789012345L)
+    assert(rows(0).getBoolean(4) == false)
+    // BIT(10)'s come back as ASCII strings of ten ASCII 0's and 1's...
+    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](5), Array[Byte](49,48,48,48,49,48,48,49,48,49)))
+    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](6), Array[Byte](0xDE.toByte, 0xAD.toByte, 0xBE.toByte, 0xEF.toByte)))
+    assert(rows(0).getBoolean(7) == true)
+    assert(rows(0).getString(8) == "172.16.0.42")
+    assert(rows(0).getString(9) == "192.168.0.0/16")
+  }
+
+  test("Basic write test") {
+    val rdd = TestSQLContext.jdbcRDD(url(db.ip), "public.bar")
+    rdd.createJDBCTable(url(db.ip), "public.barcopy", false)
+    // Test only that it doesn't bomb out.
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
index 7bb08f1b513ce..1396c6b7246d1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
@@ -17,18 +17,20 @@
 
 package org.apache.spark.sql.json
 
-import org.apache.spark.sql.catalyst.types._
+import java.sql.{Date, Timestamp}
+
+import org.apache.spark.sql.TestData._
 import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.json.JsonRDD.{enforceCorrectType, compatibleType}
-import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.SQLConf
+import org.apache.spark.sql.Dsl._
+import org.apache.spark.sql.json.JsonRDD.{compatibleType, enforceCorrectType}
 import org.apache.spark.sql.test.TestSQLContext
 import org.apache.spark.sql.test.TestSQLContext._
-
-import java.sql.Timestamp
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.{QueryTest, Row, SQLConf}
 
 class JsonSuite extends QueryTest {
-  import TestJsonData._
+  import org.apache.spark.sql.json.TestJsonData._
+
   TestJsonData
 
   test("Type promotion") {
@@ -44,22 +46,36 @@ class JsonSuite extends QueryTest {
     checkTypePromotion(intNumber, enforceCorrectType(intNumber, IntegerType))
     checkTypePromotion(intNumber.toLong, enforceCorrectType(intNumber, LongType))
     checkTypePromotion(intNumber.toDouble, enforceCorrectType(intNumber, DoubleType))
-    checkTypePromotion(BigDecimal(intNumber), enforceCorrectType(intNumber, DecimalType))
+    checkTypePromotion(
+      Decimal(intNumber), enforceCorrectType(intNumber, DecimalType.Unlimited))
 
     val longNumber: Long = 9223372036854775807L
     checkTypePromotion(longNumber, enforceCorrectType(longNumber, LongType))
     checkTypePromotion(longNumber.toDouble, enforceCorrectType(longNumber, DoubleType))
-    checkTypePromotion(BigDecimal(longNumber), enforceCorrectType(longNumber, DecimalType))
+    checkTypePromotion(
+      Decimal(longNumber), enforceCorrectType(longNumber, DecimalType.Unlimited))
 
     val doubleNumber: Double = 1.7976931348623157E308d
     checkTypePromotion(doubleNumber.toDouble, enforceCorrectType(doubleNumber, DoubleType))
-    checkTypePromotion(BigDecimal(doubleNumber), enforceCorrectType(doubleNumber, DecimalType))
-    
+    checkTypePromotion(
+      Decimal(doubleNumber), enforceCorrectType(doubleNumber, DecimalType.Unlimited))
+
     checkTypePromotion(new Timestamp(intNumber), enforceCorrectType(intNumber, TimestampType))
-    checkTypePromotion(new Timestamp(intNumber.toLong), 
+    checkTypePromotion(new Timestamp(intNumber.toLong),
         enforceCorrectType(intNumber.toLong, TimestampType))
-    val strDate = "2014-09-30 12:34:56"
-    checkTypePromotion(Timestamp.valueOf(strDate), enforceCorrectType(strDate, TimestampType))
+    val strTime = "2014-09-30 12:34:56"
+    checkTypePromotion(Timestamp.valueOf(strTime), enforceCorrectType(strTime, TimestampType))
+
+    val strDate = "2014-10-15"
+    checkTypePromotion(
+      DateUtils.fromJavaDate(Date.valueOf(strDate)), enforceCorrectType(strDate, DateType))
+
+    val ISO8601Time1 = "1970-01-01T01:00:01.0Z"
+    checkTypePromotion(new Timestamp(3601000), enforceCorrectType(ISO8601Time1, TimestampType))
+    checkTypePromotion(DateUtils.millisToDays(3601000), enforceCorrectType(ISO8601Time1, DateType))
+    val ISO8601Time2 = "1970-01-01T02:00:01-01:00"
+    checkTypePromotion(new Timestamp(10801000), enforceCorrectType(ISO8601Time2, TimestampType))
+    checkTypePromotion(DateUtils.millisToDays(10801000), enforceCorrectType(ISO8601Time2, DateType))
   }
 
   test("Get compatible type") {
@@ -77,7 +93,7 @@ class JsonSuite extends QueryTest {
     checkDataType(NullType, IntegerType, IntegerType)
     checkDataType(NullType, LongType, LongType)
     checkDataType(NullType, DoubleType, DoubleType)
-    checkDataType(NullType, DecimalType, DecimalType)
+    checkDataType(NullType, DecimalType.Unlimited, DecimalType.Unlimited)
     checkDataType(NullType, StringType, StringType)
     checkDataType(NullType, ArrayType(IntegerType), ArrayType(IntegerType))
     checkDataType(NullType, StructType(Nil), StructType(Nil))
@@ -88,7 +104,7 @@ class JsonSuite extends QueryTest {
     checkDataType(BooleanType, IntegerType, StringType)
     checkDataType(BooleanType, LongType, StringType)
     checkDataType(BooleanType, DoubleType, StringType)
-    checkDataType(BooleanType, DecimalType, StringType)
+    checkDataType(BooleanType, DecimalType.Unlimited, StringType)
     checkDataType(BooleanType, StringType, StringType)
     checkDataType(BooleanType, ArrayType(IntegerType), StringType)
     checkDataType(BooleanType, StructType(Nil), StringType)
@@ -97,7 +113,7 @@ class JsonSuite extends QueryTest {
     checkDataType(IntegerType, IntegerType, IntegerType)
     checkDataType(IntegerType, LongType, LongType)
     checkDataType(IntegerType, DoubleType, DoubleType)
-    checkDataType(IntegerType, DecimalType, DecimalType)
+    checkDataType(IntegerType, DecimalType.Unlimited, DecimalType.Unlimited)
     checkDataType(IntegerType, StringType, StringType)
     checkDataType(IntegerType, ArrayType(IntegerType), StringType)
     checkDataType(IntegerType, StructType(Nil), StringType)
@@ -105,23 +121,23 @@ class JsonSuite extends QueryTest {
     // LongType
     checkDataType(LongType, LongType, LongType)
     checkDataType(LongType, DoubleType, DoubleType)
-    checkDataType(LongType, DecimalType, DecimalType)
+    checkDataType(LongType, DecimalType.Unlimited, DecimalType.Unlimited)
     checkDataType(LongType, StringType, StringType)
     checkDataType(LongType, ArrayType(IntegerType), StringType)
     checkDataType(LongType, StructType(Nil), StringType)
 
     // DoubleType
     checkDataType(DoubleType, DoubleType, DoubleType)
-    checkDataType(DoubleType, DecimalType, DecimalType)
+    checkDataType(DoubleType, DecimalType.Unlimited, DecimalType.Unlimited)
     checkDataType(DoubleType, StringType, StringType)
     checkDataType(DoubleType, ArrayType(IntegerType), StringType)
     checkDataType(DoubleType, StructType(Nil), StringType)
 
     // DoubleType
-    checkDataType(DecimalType, DecimalType, DecimalType)
-    checkDataType(DecimalType, StringType, StringType)
-    checkDataType(DecimalType, ArrayType(IntegerType), StringType)
-    checkDataType(DecimalType, StructType(Nil), StringType)
+    checkDataType(DecimalType.Unlimited, DecimalType.Unlimited, DecimalType.Unlimited)
+    checkDataType(DecimalType.Unlimited, StringType, StringType)
+    checkDataType(DecimalType.Unlimited, ArrayType(IntegerType), StringType)
+    checkDataType(DecimalType.Unlimited, StructType(Nil), StringType)
 
     // StringType
     checkDataType(StringType, StringType, StringType)
@@ -175,15 +191,34 @@ class JsonSuite extends QueryTest {
     checkDataType(
       StructType(
         StructField("f1", IntegerType, true) :: Nil),
-      DecimalType,
+      DecimalType.Unlimited,
       StringType)
   }
 
+  test("Complex field and type inferring with null in sampling") {
+    val jsonDF = jsonRDD(jsonNullStruct)
+    val expectedSchema = StructType(
+      StructField("headers", StructType(
+        StructField("Charset", StringType, true) ::
+          StructField("Host", StringType, true) :: Nil)
+        , true) ::
+        StructField("ip", StringType, true) ::
+        StructField("nullstr", StringType, true):: Nil)
+
+    assert(expectedSchema === jsonDF.schema)
+    jsonDF.registerTempTable("jsonTable")
+
+    checkAnswer(
+      sql("select nullstr, headers.Host from jsonTable"),
+      Seq(Row("", "1.abc.com"), Row("", null), Row("", null), Row(null, null))
+    )
+  }
+
   test("Primitive field and type inferring") {
-    val jsonSchemaRDD = jsonRDD(primitiveFieldAndType)
+    val jsonDF = jsonRDD(primitiveFieldAndType)
 
     val expectedSchema = StructType(
-      StructField("bigInteger", DecimalType, true) ::
+      StructField("bigInteger", DecimalType.Unlimited, true) ::
       StructField("boolean", BooleanType, true) ::
       StructField("double", DoubleType, true) ::
       StructField("integer", IntegerType, true) ::
@@ -191,29 +226,29 @@ class JsonSuite extends QueryTest {
       StructField("null", StringType, true) ::
       StructField("string", StringType, true) :: Nil)
 
-    assert(expectedSchema === jsonSchemaRDD.schema)
+    assert(expectedSchema === jsonDF.schema)
 
-    jsonSchemaRDD.registerTempTable("jsonTable")
+    jsonDF.registerTempTable("jsonTable")
 
     checkAnswer(
       sql("select * from jsonTable"),
-      (BigDecimal("92233720368547758070"),
-      true,
-      1.7976931348623157E308,
-      10,
-      21474836470L,
-      null,
-      "this is a simple string.") :: Nil
+      Row(new java.math.BigDecimal("92233720368547758070"),
+        true,
+        1.7976931348623157E308,
+        10,
+        21474836470L,
+        null,
+        "this is a simple string.")
     )
   }
 
   test("Complex field and type inferring") {
-    val jsonSchemaRDD = jsonRDD(complexFieldAndType)
+    val jsonDF = jsonRDD(complexFieldAndType1)
 
     val expectedSchema = StructType(
       StructField("arrayOfArray1", ArrayType(ArrayType(StringType, false), false), true) ::
       StructField("arrayOfArray2", ArrayType(ArrayType(DoubleType, false), false), true) ::
-      StructField("arrayOfBigInteger", ArrayType(DecimalType, false), true) ::
+      StructField("arrayOfBigInteger", ArrayType(DecimalType.Unlimited, false), true) ::
       StructField("arrayOfBoolean", ArrayType(BooleanType, false), true) ::
       StructField("arrayOfDouble", ArrayType(DoubleType, false), true) ::
       StructField("arrayOfInteger", ArrayType(IntegerType, false), true) ::
@@ -226,200 +261,201 @@ class JsonSuite extends QueryTest {
           StructField("field2", StringType, true) ::
           StructField("field3", StringType, true) :: Nil), false), true) ::
       StructField("struct", StructType(
-      StructField("field1", BooleanType, true) ::
-      StructField("field2", DecimalType, true) :: Nil), true) ::
+        StructField("field1", BooleanType, true) ::
+        StructField("field2", DecimalType.Unlimited, true) :: Nil), true) ::
       StructField("structWithArrayFields", StructType(
         StructField("field1", ArrayType(IntegerType, false), true) ::
         StructField("field2", ArrayType(StringType, false), true) :: Nil), true) :: Nil)
 
-    assert(expectedSchema === jsonSchemaRDD.schema)
+    assert(expectedSchema === jsonDF.schema)
 
-    jsonSchemaRDD.registerTempTable("jsonTable")
+    jsonDF.registerTempTable("jsonTable")
 
     // Access elements of a primitive array.
     checkAnswer(
       sql("select arrayOfString[0], arrayOfString[1], arrayOfString[2] from jsonTable"),
-      ("str1", "str2", null) :: Nil
+      Row("str1", "str2", null)
     )
 
     // Access an array of null values.
     checkAnswer(
       sql("select arrayOfNull from jsonTable"),
-      Seq(Seq(null, null, null, null)) :: Nil
+      Row(Seq(null, null, null, null))
     )
 
     // Access elements of a BigInteger array (we use DecimalType internally).
     checkAnswer(
       sql("select arrayOfBigInteger[0], arrayOfBigInteger[1], arrayOfBigInteger[2] from jsonTable"),
-      (BigDecimal("922337203685477580700"), BigDecimal("-922337203685477580800"), null) :: Nil
+      Row(new java.math.BigDecimal("922337203685477580700"),
+        new java.math.BigDecimal("-922337203685477580800"), null)
     )
 
     // Access elements of an array of arrays.
     checkAnswer(
       sql("select arrayOfArray1[0], arrayOfArray1[1] from jsonTable"),
-      (Seq("1", "2", "3"), Seq("str1", "str2")) :: Nil
+      Row(Seq("1", "2", "3"), Seq("str1", "str2"))
     )
 
     // Access elements of an array of arrays.
     checkAnswer(
       sql("select arrayOfArray2[0], arrayOfArray2[1] from jsonTable"),
-      (Seq(1.0, 2.0, 3.0), Seq(1.1, 2.1, 3.1)) :: Nil
+      Row(Seq(1.0, 2.0, 3.0), Seq(1.1, 2.1, 3.1))
     )
 
     // Access elements of an array inside a filed with the type of ArrayType(ArrayType).
     checkAnswer(
       sql("select arrayOfArray1[1][1], arrayOfArray2[1][1] from jsonTable"),
-      ("str2", 2.1) :: Nil
+      Row("str2", 2.1)
     )
 
     // Access elements of an array of structs.
     checkAnswer(
       sql("select arrayOfStruct[0], arrayOfStruct[1], arrayOfStruct[2], arrayOfStruct[3] " +
         "from jsonTable"),
-      (true :: "str1" :: null :: Nil,
-      false :: null :: null :: Nil,
-      null :: null :: null :: Nil,
-      null) :: Nil
+      Row(
+        Row(true, "str1", null),
+        Row(false, null, null),
+        Row(null, null, null),
+        null)
     )
 
     // Access a struct and fields inside of it.
     checkAnswer(
       sql("select struct, struct.field1, struct.field2 from jsonTable"),
-      (
-        Seq(true, BigDecimal("92233720368547758070")),
+      Row(
+        Row(true, new java.math.BigDecimal("92233720368547758070")),
         true,
-        BigDecimal("92233720368547758070")) :: Nil
+        new java.math.BigDecimal("92233720368547758070")) :: Nil
     )
 
     // Access an array field of a struct.
     checkAnswer(
       sql("select structWithArrayFields.field1, structWithArrayFields.field2 from jsonTable"),
-      (Seq(4, 5, 6), Seq("str1", "str2")) :: Nil
+      Row(Seq(4, 5, 6), Seq("str1", "str2"))
     )
 
     // Access elements of an array field of a struct.
     checkAnswer(
       sql("select structWithArrayFields.field1[1], structWithArrayFields.field2[3] from jsonTable"),
-      (5, null) :: Nil
+      Row(5, null)
     )
   }
 
   ignore("Complex field and type inferring (Ignored)") {
-    val jsonSchemaRDD = jsonRDD(complexFieldAndType)
-    jsonSchemaRDD.registerTempTable("jsonTable")
+    val jsonDF = jsonRDD(complexFieldAndType1)
+    jsonDF.registerTempTable("jsonTable")
 
     // Right now, "field1" and "field2" are treated as aliases. We should fix it.
     checkAnswer(
       sql("select arrayOfStruct[0].field1, arrayOfStruct[0].field2 from jsonTable"),
-      (true, "str1") :: Nil
+      Row(true, "str1")
     )
 
     // Right now, the analyzer cannot resolve arrayOfStruct.field1 and arrayOfStruct.field2.
     // Getting all values of a specific field from an array of structs.
     checkAnswer(
       sql("select arrayOfStruct.field1, arrayOfStruct.field2 from jsonTable"),
-      (Seq(true, false), Seq("str1", null)) :: Nil
+      Row(Seq(true, false), Seq("str1", null))
     )
   }
 
   test("Type conflict in primitive field values") {
-    val jsonSchemaRDD = jsonRDD(primitiveFieldValueTypeConflict)
+    val jsonDF = jsonRDD(primitiveFieldValueTypeConflict)
 
     val expectedSchema = StructType(
       StructField("num_bool", StringType, true) ::
       StructField("num_num_1", LongType, true) ::
-      StructField("num_num_2", DecimalType, true) ::
+      StructField("num_num_2", DecimalType.Unlimited, true) ::
       StructField("num_num_3", DoubleType, true) ::
       StructField("num_str", StringType, true) ::
       StructField("str_bool", StringType, true) :: Nil)
 
-    assert(expectedSchema === jsonSchemaRDD.schema)
+    assert(expectedSchema === jsonDF.schema)
 
-    jsonSchemaRDD.registerTempTable("jsonTable")
+    jsonDF.registerTempTable("jsonTable")
 
     checkAnswer(
       sql("select * from jsonTable"),
-      ("true", 11L, null, 1.1, "13.1", "str1") ::
-      ("12", null, BigDecimal("21474836470.9"), null, null, "true") ::
-      ("false", 21474836470L, BigDecimal("92233720368547758070"), 100, "str1", "false") ::
-      (null, 21474836570L, BigDecimal(1.1), 21474836470L, "92233720368547758070", null) :: Nil
+      Row("true", 11L, null, 1.1, "13.1", "str1") ::
+        Row("12", null, new java.math.BigDecimal("21474836470.9"), null, null, "true") ::
+        Row("false", 21474836470L, new java.math.BigDecimal("92233720368547758070"), 100, "str1", "false") ::
+        Row(null, 21474836570L, new java.math.BigDecimal("1.1"), 21474836470L, "92233720368547758070", null) :: Nil
     )
 
     // Number and Boolean conflict: resolve the type as number in this query.
     checkAnswer(
       sql("select num_bool - 10 from jsonTable where num_bool > 11"),
-      2
+      Row(2)
     )
 
     // Widening to LongType
     checkAnswer(
       sql("select num_num_1 - 100 from jsonTable where num_num_1 > 11"),
-      Seq(21474836370L) :: Seq(21474836470L) :: Nil
+      Row(21474836370L) :: Row(21474836470L) :: Nil
     )
 
     checkAnswer(
       sql("select num_num_1 - 100 from jsonTable where num_num_1 > 10"),
-      Seq(-89) :: Seq(21474836370L) :: Seq(21474836470L) :: Nil
+      Row(-89) :: Row(21474836370L) :: Row(21474836470L) :: Nil
     )
 
     // Widening to DecimalType
     checkAnswer(
       sql("select num_num_2 + 1.2 from jsonTable where num_num_2 > 1.1"),
-      Seq(BigDecimal("21474836472.1")) :: Seq(BigDecimal("92233720368547758071.2")) :: Nil
+      Row(new java.math.BigDecimal("21474836472.1")) :: Row(new java.math.BigDecimal("92233720368547758071.2")) :: Nil
     )
 
     // Widening to DoubleType
     checkAnswer(
       sql("select num_num_3 + 1.2 from jsonTable where num_num_3 > 1.1"),
-      Seq(101.2) :: Seq(21474836471.2) :: Nil
+      Row(101.2) :: Row(21474836471.2) :: Nil
     )
 
     // Number and String conflict: resolve the type as number in this query.
     checkAnswer(
       sql("select num_str + 1.2 from jsonTable where num_str > 14"),
-      92233720368547758071.2
+      Row(92233720368547758071.2)
+    )
+
+    // Number and String conflict: resolve the type as number in this query.
+    checkAnswer(
+      sql("select num_str + 1.2 from jsonTable where num_str > 92233720368547758060"),
+      Row(new java.math.BigDecimal("92233720368547758061.2").doubleValue)
     )
 
     // String and Boolean conflict: resolve the type as string.
     checkAnswer(
       sql("select * from jsonTable where str_bool = 'str1'"),
-      ("true", 11L, null, 1.1, "13.1", "str1") :: Nil
+      Row("true", 11L, null, 1.1, "13.1", "str1")
     )
   }
 
   ignore("Type conflict in primitive field values (Ignored)") {
-    val jsonSchemaRDD = jsonRDD(primitiveFieldValueTypeConflict)
-    jsonSchemaRDD.registerTempTable("jsonTable")
+    val jsonDF = jsonRDD(primitiveFieldValueTypeConflict)
+    jsonDF.registerTempTable("jsonTable")
 
     // Right now, the analyzer does not promote strings in a boolean expreesion.
     // Number and Boolean conflict: resolve the type as boolean in this query.
     checkAnswer(
       sql("select num_bool from jsonTable where NOT num_bool"),
-      false
+      Row(false)
     )
 
     checkAnswer(
       sql("select str_bool from jsonTable where NOT str_bool"),
-      false
+      Row(false)
     )
 
     // Right now, the analyzer does not know that num_bool should be treated as a boolean.
     // Number and Boolean conflict: resolve the type as boolean in this query.
     checkAnswer(
       sql("select num_bool from jsonTable where num_bool"),
-      true
+      Row(true)
     )
 
     checkAnswer(
       sql("select str_bool from jsonTable where str_bool"),
-      false
-    )
-
-    // Right now, we have a parsing error.
-    // Number and String conflict: resolve the type as number in this query.
-    checkAnswer(
-      sql("select num_str + 1.2 from jsonTable where num_str > 92233720368547758060"),
-      BigDecimal("92233720368547758061.2")
+      Row(false)
     )
 
     // The plan of the following DSL is
@@ -429,10 +465,10 @@ class JsonSuite extends QueryTest {
     // We should directly cast num_str to DecimalType and also need to do the right type promotion
     // in the Project.
     checkAnswer(
-      jsonSchemaRDD.
+      jsonDF.
         where('num_str > BigDecimal("92233720368547758060")).
-        select('num_str + 1.2 as Symbol("num")),
-      BigDecimal("92233720368547758061.2")
+        select(('num_str + 1.2).as("num")),
+      Row(new java.math.BigDecimal("92233720368547758061.2"))
     )
 
     // The following test will fail. The type of num_str is StringType.
@@ -443,12 +479,12 @@ class JsonSuite extends QueryTest {
     // Number and String conflict: resolve the type as number in this query.
     checkAnswer(
       sql("select num_str + 1.2 from jsonTable where num_str > 13"),
-      Seq(14.3) :: Seq(92233720368547758071.2) :: Nil
+      Row(14.3) :: Row(92233720368547758071.2) :: Nil
     )
   }
 
   test("Type conflict in complex field values") {
-    val jsonSchemaRDD = jsonRDD(complexFieldValueTypeConflict)
+    val jsonDF = jsonRDD(complexFieldValueTypeConflict)
 
     val expectedSchema = StructType(
       StructField("array", ArrayType(IntegerType, false), true) ::
@@ -458,46 +494,49 @@ class JsonSuite extends QueryTest {
         StructField("field", StringType, true) :: Nil), true) ::
       StructField("struct_array", StringType, true) :: Nil)
 
-    assert(expectedSchema === jsonSchemaRDD.schema)
+    assert(expectedSchema === jsonDF.schema)
 
-    jsonSchemaRDD.registerTempTable("jsonTable")
+    jsonDF.registerTempTable("jsonTable")
 
     checkAnswer(
       sql("select * from jsonTable"),
-      (Seq(), "11", "[1,2,3]", Seq(null), "[]") ::
-      (null, """{"field":false}""", null, null, "{}") ::
-      (Seq(4, 5, 6), null, "str", Seq(null), "[7,8,9]") ::
-      (Seq(7), "{}","[str1,str2,33]", Seq("str"), """{"field":true}""") :: Nil
+      Row(Seq(), "11", "[1,2,3]", Row(null), "[]") ::
+        Row(null, """{"field":false}""", null, null, "{}") ::
+        Row(Seq(4, 5, 6), null, "str", Row(null), "[7,8,9]") ::
+        Row(Seq(7), "{}","[str1,str2,33]", Row("str"), """{"field":true}""") :: Nil
     )
   }
 
   test("Type conflict in array elements") {
-    val jsonSchemaRDD = jsonRDD(arrayElementTypeConflict)
+    val jsonDF = jsonRDD(arrayElementTypeConflict)
 
     val expectedSchema = StructType(
       StructField("array1", ArrayType(StringType, true), true) ::
       StructField("array2", ArrayType(StructType(
-        StructField("field", LongType, true) :: Nil), false), true) :: Nil)
+        StructField("field", LongType, true) :: Nil), false), true) ::
+      StructField("array3", ArrayType(StringType, false), true) :: Nil)
 
-    assert(expectedSchema === jsonSchemaRDD.schema)
+    assert(expectedSchema === jsonDF.schema)
 
-    jsonSchemaRDD.registerTempTable("jsonTable")
+    jsonDF.registerTempTable("jsonTable")
 
     checkAnswer(
       sql("select * from jsonTable"),
-      Seq(Seq("1", "1.1", "true", null, "[]", "{}", "[2,3,4]",
-        """{"field":str}"""), Seq(Seq(214748364700L), Seq(1))) :: Nil
+      Row(Seq("1", "1.1", "true", null, "[]", "{}", "[2,3,4]",
+        """{"field":"str"}"""), Seq(Row(214748364700L), Row(1)), null) ::
+      Row(null, null, Seq("""{"field":"str"}""", """{"field":1}""")) ::
+      Row(null, null, Seq("1", "2", "3")) :: Nil
     )
 
     // Treat an element as a number.
     checkAnswer(
-      sql("select array1[0] + 1 from jsonTable"),
-      2
+      sql("select array1[0] + 1 from jsonTable where array1 is not null"),
+      Row(2)
     )
   }
 
   test("Handling missing fields") {
-    val jsonSchemaRDD = jsonRDD(missingFields)
+    val jsonDF = jsonRDD(missingFields)
 
     val expectedSchema = StructType(
       StructField("a", BooleanType, true) ::
@@ -507,19 +546,19 @@ class JsonSuite extends QueryTest {
         StructField("field", BooleanType, true) :: Nil), true) ::
       StructField("e", StringType, true) :: Nil)
 
-    assert(expectedSchema === jsonSchemaRDD.schema)
+    assert(expectedSchema === jsonDF.schema)
 
-    jsonSchemaRDD.registerTempTable("jsonTable")
+    jsonDF.registerTempTable("jsonTable")
   }
 
   test("Loading a JSON dataset from a text file") {
     val file = getTempFilePath("json")
     val path = file.toString
     primitiveFieldAndType.map(record => record.replaceAll("\n", " ")).saveAsTextFile(path)
-    val jsonSchemaRDD = jsonFile(path)
+    val jsonDF = jsonFile(path)
 
     val expectedSchema = StructType(
-      StructField("bigInteger", DecimalType, true) ::
+      StructField("bigInteger", DecimalType.Unlimited, true) ::
       StructField("boolean", BooleanType, true) ::
       StructField("double", DoubleType, true) ::
       StructField("integer", IntegerType, true) ::
@@ -527,19 +566,45 @@ class JsonSuite extends QueryTest {
       StructField("null", StringType, true) ::
       StructField("string", StringType, true) :: Nil)
 
-    assert(expectedSchema === jsonSchemaRDD.schema)
+    assert(expectedSchema === jsonDF.schema)
 
-    jsonSchemaRDD.registerTempTable("jsonTable")
+    jsonDF.registerTempTable("jsonTable")
 
     checkAnswer(
       sql("select * from jsonTable"),
-      (BigDecimal("92233720368547758070"),
+      Row(new java.math.BigDecimal("92233720368547758070"),
       true,
       1.7976931348623157E308,
       10,
       21474836470L,
       null,
-      "this is a simple string.") :: Nil
+      "this is a simple string.")
+    )
+  }
+
+  test("Loading a JSON dataset from a text file with SQL") {
+    val file = getTempFilePath("json")
+    val path = file.toString
+    primitiveFieldAndType.map(record => record.replaceAll("\n", " ")).saveAsTextFile(path)
+
+    sql(
+      s"""
+        |CREATE TEMPORARY TABLE jsonTableSQL
+        |USING org.apache.spark.sql.json
+        |OPTIONS (
+        |  path '$path'
+        |)
+      """.stripMargin)
+
+    checkAnswer(
+      sql("select * from jsonTableSQL"),
+      Row(new java.math.BigDecimal("92233720368547758070"),
+        true,
+        1.7976931348623157E308,
+        10,
+        21474836470L,
+        null,
+        "this is a simple string.")
     )
   }
 
@@ -549,7 +614,7 @@ class JsonSuite extends QueryTest {
     primitiveFieldAndType.map(record => record.replaceAll("\n", " ")).saveAsTextFile(path)
 
     val schema = StructType(
-      StructField("bigInteger", DecimalType, true) ::
+      StructField("bigInteger", DecimalType.Unlimited, true) ::
       StructField("boolean", BooleanType, true) ::
       StructField("double", DoubleType, true) ::
       StructField("integer", IntegerType, true) ::
@@ -557,48 +622,48 @@ class JsonSuite extends QueryTest {
       StructField("null", StringType, true) ::
       StructField("string", StringType, true) :: Nil)
 
-    val jsonSchemaRDD1 = jsonFile(path, schema)
+    val jsonDF1 = jsonFile(path, schema)
 
-    assert(schema === jsonSchemaRDD1.schema)
+    assert(schema === jsonDF1.schema)
 
-    jsonSchemaRDD1.registerTempTable("jsonTable1")
+    jsonDF1.registerTempTable("jsonTable1")
 
     checkAnswer(
       sql("select * from jsonTable1"),
-      (BigDecimal("92233720368547758070"),
+      Row(new java.math.BigDecimal("92233720368547758070"),
       true,
       1.7976931348623157E308,
       10,
       21474836470L,
       null,
-      "this is a simple string.") :: Nil
+      "this is a simple string.")
     )
 
-    val jsonSchemaRDD2 = jsonRDD(primitiveFieldAndType, schema)
+    val jsonDF2 = jsonRDD(primitiveFieldAndType, schema)
 
-    assert(schema === jsonSchemaRDD2.schema)
+    assert(schema === jsonDF2.schema)
 
-    jsonSchemaRDD2.registerTempTable("jsonTable2")
+    jsonDF2.registerTempTable("jsonTable2")
 
     checkAnswer(
       sql("select * from jsonTable2"),
-      (BigDecimal("92233720368547758070"),
+      Row(new java.math.BigDecimal("92233720368547758070"),
       true,
       1.7976931348623157E308,
       10,
       21474836470L,
       null,
-      "this is a simple string.") :: Nil
+      "this is a simple string.")
     )
   }
 
   test("SPARK-2096 Correctly parse dot notations") {
-    val jsonSchemaRDD = jsonRDD(complexFieldAndType2)
-    jsonSchemaRDD.registerTempTable("jsonTable")
+    val jsonDF = jsonRDD(complexFieldAndType2)
+    jsonDF.registerTempTable("jsonTable")
 
     checkAnswer(
       sql("select arrayOfStruct[0].field1, arrayOfStruct[0].field2 from jsonTable"),
-      (true, "str1") :: Nil
+      Row(true, "str1")
     )
     checkAnswer(
       sql(
@@ -606,13 +671,13 @@ class JsonSuite extends QueryTest {
           |select complexArrayOfStruct[0].field1[1].inner2[0], complexArrayOfStruct[1].field2[0][1]
           |from jsonTable
         """.stripMargin),
-      ("str2", 6) :: Nil
+      Row("str2", 6)
     )
   }
 
   test("SPARK-3390 Complex arrays") {
-    val jsonSchemaRDD = jsonRDD(complexFieldAndType2)
-    jsonSchemaRDD.registerTempTable("jsonTable")
+    val jsonDF = jsonRDD(complexFieldAndType2)
+    jsonDF.registerTempTable("jsonTable")
 
     checkAnswer(
       sql(
@@ -620,7 +685,7 @@ class JsonSuite extends QueryTest {
           |select arrayOfArray1[0][0][0], arrayOfArray1[1][0][1], arrayOfArray1[1][1][0]
           |from jsonTable
         """.stripMargin),
-      (5, 7, 8) :: Nil
+      Row(5, 7, 8)
     )
     checkAnswer(
       sql(
@@ -629,13 +694,13 @@ class JsonSuite extends QueryTest {
           |arrayOfArray2[1][1][1].inner2[0], arrayOfArray2[2][0][0].inner3[0][0].inner4
           |from jsonTable
         """.stripMargin),
-      ("str1", Nil, "str4", 2) :: Nil
+      Row("str1", Nil, "str4", 2)
     )
   }
 
   test("SPARK-3308 Read top level JSON arrays") {
-    val jsonSchemaRDD = jsonRDD(jsonArray)
-    jsonSchemaRDD.registerTempTable("jsonTable")
+    val jsonDF = jsonRDD(jsonArray)
+    jsonDF.registerTempTable("jsonTable")
 
     checkAnswer(
       sql(
@@ -643,20 +708,20 @@ class JsonSuite extends QueryTest {
           |select a, b, c
           |from jsonTable
         """.stripMargin),
-      ("str_a_1", null, null) ::
-      ("str_a_2", null, null) ::
-      (null, "str_b_3", null) ::
-      ("str_a_4", "str_b_4", "str_c_4") :: Nil
+      Row("str_a_1", null, null) ::
+        Row("str_a_2", null, null) ::
+        Row(null, "str_b_3", null) ::
+        Row("str_a_4", "str_b_4", "str_c_4") :: Nil
     )
   }
 
   test("Corrupt records") {
     // Test if we can query corrupt records.
-    val oldColumnNameOfCorruptRecord = TestSQLContext.columnNameOfCorruptRecord
+    val oldColumnNameOfCorruptRecord = TestSQLContext.conf.columnNameOfCorruptRecord
     TestSQLContext.setConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD, "_unparsed")
 
-    val jsonSchemaRDD = jsonRDD(corruptRecords)
-    jsonSchemaRDD.registerTempTable("jsonTable")
+    val jsonDF = jsonRDD(corruptRecords)
+    jsonDF.registerTempTable("jsonTable")
 
     val schema = StructType(
       StructField("_unparsed", StringType, true) ::
@@ -664,7 +729,7 @@ class JsonSuite extends QueryTest {
       StructField("b", StringType, true) ::
       StructField("c", StringType, true) :: Nil)
 
-    assert(schema === jsonSchemaRDD.schema)
+    assert(schema === jsonDF.schema)
 
     // In HiveContext, backticks should be used to access columns starting with a underscore.
     checkAnswer(
@@ -673,12 +738,12 @@ class JsonSuite extends QueryTest {
           |SELECT a, b, c, _unparsed
           |FROM jsonTable
         """.stripMargin),
-      (null, null, null, "{") ::
-      (null, null, null, "") ::
-      (null, null, null, """{"a":1, b:2}""") ::
-      (null, null, null, """{"a":{, b:3}""") ::
-      ("str_a_4", "str_b_4", "str_c_4", null) ::
-      (null, null, null, "]") :: Nil
+      Row(null, null, null, "{") ::
+        Row(null, null, null, "") ::
+        Row(null, null, null, """{"a":1, b:2}""") ::
+        Row(null, null, null, """{"a":{, b:3}""") ::
+        Row("str_a_4", "str_b_4", "str_c_4", null) ::
+        Row(null, null, null, "]") :: Nil
     )
 
     checkAnswer(
@@ -688,7 +753,7 @@ class JsonSuite extends QueryTest {
           |FROM jsonTable
           |WHERE _unparsed IS NULL
         """.stripMargin),
-      ("str_a_4", "str_b_4", "str_c_4") :: Nil
+      Row("str_a_4", "str_b_4", "str_c_4")
     )
 
     checkAnswer(
@@ -698,13 +763,166 @@ class JsonSuite extends QueryTest {
           |FROM jsonTable
           |WHERE _unparsed IS NOT NULL
         """.stripMargin),
-      Seq("{") ::
-      Seq("") ::
-      Seq("""{"a":1, b:2}""") ::
-      Seq("""{"a":{, b:3}""") ::
-      Seq("]") :: Nil
+      Row("{") ::
+        Row("") ::
+        Row("""{"a":1, b:2}""") ::
+        Row("""{"a":{, b:3}""") ::
+        Row("]") :: Nil
     )
 
     TestSQLContext.setConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD, oldColumnNameOfCorruptRecord)
   }
+
+  test("SPARK-4068: nulls in arrays") {
+    val jsonDF = jsonRDD(nullsInArrays)
+    jsonDF.registerTempTable("jsonTable")
+
+    val schema = StructType(
+      StructField("field1",
+        ArrayType(ArrayType(ArrayType(ArrayType(StringType, false), false), true), false), true) ::
+      StructField("field2",
+        ArrayType(ArrayType(
+          StructType(StructField("Test", IntegerType, true) :: Nil), false), true), true) ::
+      StructField("field3",
+        ArrayType(ArrayType(
+          StructType(StructField("Test", StringType, true) :: Nil), true), false), true) ::
+      StructField("field4",
+        ArrayType(ArrayType(ArrayType(IntegerType, false), true), false), true) :: Nil)
+
+    assert(schema === jsonDF.schema)
+
+    checkAnswer(
+      sql(
+        """
+          |SELECT field1, field2, field3, field4
+          |FROM jsonTable
+        """.stripMargin),
+      Row(Seq(Seq(null), Seq(Seq(Seq("Test")))), null, null, null) ::
+        Row(null, Seq(null, Seq(Row(1))), null, null) ::
+        Row(null, null, Seq(Seq(null), Seq(Row("2"))), null) ::
+        Row(null, null, null, Seq(Seq(null, Seq(1, 2, 3)))) :: Nil
+    )
+  }
+
+  test("SPARK-4228 DataFrame to JSON")
+  {
+    val schema1 = StructType(
+      StructField("f1", IntegerType, false) ::
+      StructField("f2", StringType, false) ::
+      StructField("f3", BooleanType, false) ::
+      StructField("f4", ArrayType(StringType), nullable = true) ::
+      StructField("f5", IntegerType, true) :: Nil)
+
+    val rowRDD1 = unparsedStrings.map { r =>
+      val values = r.split(",").map(_.trim)
+      val v5 = try values(3).toInt catch {
+        case _: NumberFormatException => null
+      }
+      Row(values(0).toInt, values(1), values(2).toBoolean, r.split(",").toList, v5)
+    }
+
+    val df1 = applySchema(rowRDD1, schema1)
+    df1.registerTempTable("applySchema1")
+    val df2 = df1.toDataFrame
+    val result = df2.toJSON.collect()
+    assert(result(0) == "{\"f1\":1,\"f2\":\"A1\",\"f3\":true,\"f4\":[\"1\",\" A1\",\" true\",\" null\"]}")
+    assert(result(3) == "{\"f1\":4,\"f2\":\"D4\",\"f3\":true,\"f4\":[\"4\",\" D4\",\" true\",\" 2147483644\"],\"f5\":2147483644}")
+
+    val schema2 = StructType(
+      StructField("f1", StructType(
+        StructField("f11", IntegerType, false) ::
+        StructField("f12", BooleanType, false) :: Nil), false) ::
+      StructField("f2", MapType(StringType, IntegerType, true), false) :: Nil)
+
+    val rowRDD2 = unparsedStrings.map { r =>
+      val values = r.split(",").map(_.trim)
+      val v4 = try values(3).toInt catch {
+        case _: NumberFormatException => null
+      }
+      Row(Row(values(0).toInt, values(2).toBoolean), Map(values(1) -> v4))
+    }
+
+    val df3 = applySchema(rowRDD2, schema2)
+    df3.registerTempTable("applySchema2")
+    val df4 = df3.toDataFrame
+    val result2 = df4.toJSON.collect()
+
+    assert(result2(1) == "{\"f1\":{\"f11\":2,\"f12\":false},\"f2\":{\"B2\":null}}")
+    assert(result2(3) == "{\"f1\":{\"f11\":4,\"f12\":true},\"f2\":{\"D4\":2147483644}}")
+
+    val jsonDF = jsonRDD(primitiveFieldAndType)
+    val primTable = jsonRDD(jsonDF.toJSON)
+    primTable.registerTempTable("primativeTable")
+    checkAnswer(
+        sql("select * from primativeTable"),
+      Row(new java.math.BigDecimal("92233720368547758070"),
+        true,
+        1.7976931348623157E308,
+        10,
+        21474836470L,
+        "this is a simple string.")
+      )
+
+    val complexJsonDF = jsonRDD(complexFieldAndType1)
+    val compTable = jsonRDD(complexJsonDF.toJSON)
+    compTable.registerTempTable("complexTable")
+    // Access elements of a primitive array.
+    checkAnswer(
+      sql("select arrayOfString[0], arrayOfString[1], arrayOfString[2] from complexTable"),
+      Row("str1", "str2", null)
+    )
+
+    // Access an array of null values.
+    checkAnswer(
+      sql("select arrayOfNull from complexTable"),
+      Row(Seq(null, null, null, null))
+    )
+
+    // Access elements of a BigInteger array (we use DecimalType internally).
+    checkAnswer(
+      sql("select arrayOfBigInteger[0], arrayOfBigInteger[1], arrayOfBigInteger[2] from complexTable"),
+      Row(new java.math.BigDecimal("922337203685477580700"),
+        new java.math.BigDecimal("-922337203685477580800"), null)
+    )
+
+    // Access elements of an array of arrays.
+    checkAnswer(
+      sql("select arrayOfArray1[0], arrayOfArray1[1] from complexTable"),
+      Row(Seq("1", "2", "3"), Seq("str1", "str2"))
+    )
+
+    // Access elements of an array of arrays.
+    checkAnswer(
+      sql("select arrayOfArray2[0], arrayOfArray2[1] from complexTable"),
+      Row(Seq(1.0, 2.0, 3.0), Seq(1.1, 2.1, 3.1))
+    )
+
+    // Access elements of an array inside a filed with the type of ArrayType(ArrayType).
+    checkAnswer(
+      sql("select arrayOfArray1[1][1], arrayOfArray2[1][1] from complexTable"),
+      Row("str2", 2.1)
+    )
+
+    // Access a struct and fields inside of it.
+    checkAnswer(
+      sql("select struct, struct.field1, struct.field2 from complexTable"),
+      Row(
+        Row(true, new java.math.BigDecimal("92233720368547758070")),
+        true,
+        new java.math.BigDecimal("92233720368547758070")) :: Nil
+    )
+
+    // Access an array field of a struct.
+    checkAnswer(
+      sql("select structWithArrayFields.field1, structWithArrayFields.field2 from complexTable"),
+      Row(Seq(4, 5, 6), Seq("str1", "str2"))
+    )
+
+    // Access elements of an array field of a struct.
+    checkAnswer(
+      sql("select structWithArrayFields.field1[1], structWithArrayFields.field2[3] from complexTable"),
+      Row(5, null)
+    )
+
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala
index eaca9f0508a12..3370b3c98b4be 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala
@@ -32,22 +32,6 @@ object TestJsonData {
           "null":null
       }"""  :: Nil)
 
-  val complexFieldAndType =
-    TestSQLContext.sparkContext.parallelize(
-      """{"struct":{"field1": true, "field2": 92233720368547758070},
-          "structWithArrayFields":{"field1":[4, 5, 6], "field2":["str1", "str2"]},
-          "arrayOfString":["str1", "str2"],
-          "arrayOfInteger":[1, 2147483647, -2147483648],
-          "arrayOfLong":[21474836470, 9223372036854775807, -9223372036854775808],
-          "arrayOfBigInteger":[922337203685477580700, -922337203685477580800],
-          "arrayOfDouble":[1.2, 1.7976931348623157E308, 4.9E-324, 2.2250738585072014E-308],
-          "arrayOfBoolean":[true, false, true],
-          "arrayOfNull":[null, null, null, null],
-          "arrayOfStruct":[{"field1": true, "field2": "str1"}, {"field1": false}, {"field3": null}],
-          "arrayOfArray1":[[1, 2, 3], ["str1", "str2"]],
-          "arrayOfArray2":[[1, 2, 3], [1.1, 2.1, 3.1]]
-         }"""  :: Nil)
-
   val primitiveFieldValueTypeConflict =
     TestSQLContext.sparkContext.parallelize(
       """{"num_num_1":11, "num_num_2":null, "num_num_3": 1.1,
@@ -59,6 +43,13 @@ object TestJsonData {
       """{"num_num_1":21474836570, "num_num_2":1.1, "num_num_3": 21474836470,
           "num_bool":null, "num_str":92233720368547758070, "str_bool":null}""" :: Nil)
 
+  val jsonNullStruct =
+    TestSQLContext.sparkContext.parallelize(
+      """{"nullstr":"","ip":"27.31.100.29","headers":{"Host":"1.abc.com","Charset":"UTF-8"}}""" ::
+        """{"nullstr":"","ip":"27.31.100.29","headers":{}}""" ::
+        """{"nullstr":"","ip":"27.31.100.29","headers":""}""" ::
+        """{"nullstr":null,"ip":"27.31.100.29","headers":null}""" :: Nil)
+
   val complexFieldValueTypeConflict =
     TestSQLContext.sparkContext.parallelize(
       """{"num_struct":11, "str_array":[1, 2, 3],
@@ -73,7 +64,9 @@ object TestJsonData {
   val arrayElementTypeConflict =
     TestSQLContext.sparkContext.parallelize(
       """{"array1": [1, 1.1, true, null, [], {}, [2,3,4], {"field":"str"}],
-          "array2": [{"field":214748364700}, {"field":1}]}""" :: Nil)
+          "array2": [{"field":214748364700}, {"field":1}]}""" ::
+      """{"array3": [{"field":"str"}, {"field":1}]}""" ::
+      """{"array3": [1, 2, 3]}""" :: Nil)
 
   val missingFields =
     TestSQLContext.sparkContext.parallelize(
@@ -83,6 +76,22 @@ object TestJsonData {
       """{"d":{"field":true}}""" ::
       """{"e":"str"}""" :: Nil)
 
+  val complexFieldAndType1 =
+    TestSQLContext.sparkContext.parallelize(
+      """{"struct":{"field1": true, "field2": 92233720368547758070},
+          "structWithArrayFields":{"field1":[4, 5, 6], "field2":["str1", "str2"]},
+          "arrayOfString":["str1", "str2"],
+          "arrayOfInteger":[1, 2147483647, -2147483648],
+          "arrayOfLong":[21474836470, 9223372036854775807, -9223372036854775808],
+          "arrayOfBigInteger":[922337203685477580700, -922337203685477580800],
+          "arrayOfDouble":[1.2, 1.7976931348623157E308, 4.9E-324, 2.2250738585072014E-308],
+          "arrayOfBoolean":[true, false, true],
+          "arrayOfNull":[null, null, null, null],
+          "arrayOfStruct":[{"field1": true, "field2": "str1"}, {"field1": false}, {"field3": null}],
+          "arrayOfArray1":[[1, 2, 3], ["str1", "str2"]],
+          "arrayOfArray2":[[1, 2, 3], [1.1, 2.1, 3.1]]
+         }"""  :: Nil)
+
   val complexFieldAndType2 =
     TestSQLContext.sparkContext.parallelize(
       """{"arrayOfStruct":[{"field1": true, "field2": "str1"}, {"field1": false}, {"field3": null}],
@@ -137,6 +146,13 @@ object TestJsonData {
           ]]
       }""" :: Nil)
 
+  val nullsInArrays =
+    TestSQLContext.sparkContext.parallelize(
+      """{"field1":[[null], [[["Test"]]]]}""" ::
+      """{"field2":[null, [{"Test":1}]]}""" ::
+      """{"field3":[[null], [{"Test":"2"}]]}""" ::
+      """{"field4":[[null, [1,2,3]]]}""" :: Nil)
+
   val jsonArray =
     TestSQLContext.sparkContext.parallelize(
       """[{"a":"str_a_1"}]""" ::
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
new file mode 100644
index 0000000000000..ff91a0eb42049
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
@@ -0,0 +1,296 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.parquet
+
+import parquet.filter2.predicate.Operators._
+import parquet.filter2.predicate.{FilterPredicate, Operators}
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal, Predicate, Row}
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.test.TestSQLContext
+import org.apache.spark.sql.{Column, DataFrame, QueryTest, SQLConf}
+
+/**
+ * A test suite that tests Parquet filter2 API based filter pushdown optimization.
+ *
+ * NOTE:
+ *
+ * 1. `!(a cmp b)` is always transformed to its negated form `a cmp' b` by the
+ *    `BooleanSimplification` optimization rule whenever possible. As a result, predicate `!(a < 1)`
+ *    results in a `GtEq` filter predicate rather than a `Not`.
+ *
+ * 2. `Tuple1(Option(x))` is used together with `AnyVal` types like `Int` to ensure the inferred
+ *    data type is nullable.
+ */
+class ParquetFilterSuite extends QueryTest with ParquetTest {
+  val sqlContext = TestSQLContext
+
+  private def checkFilterPredicate(
+      rdd: DataFrame,
+      predicate: Predicate,
+      filterClass: Class[_ <: FilterPredicate],
+      checker: (DataFrame, Seq[Row]) => Unit,
+      expected: Seq[Row]): Unit = {
+    val output = predicate.collect { case a: Attribute => a }.distinct
+
+    withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED -> "true") {
+      val query = rdd
+        .select(output.map(e => Column(e)): _*)
+        .where(Column(predicate))
+
+      val maybeAnalyzedPredicate = query.queryExecution.executedPlan.collect {
+        case plan: ParquetTableScan => plan.columnPruningPred
+      }.flatten.reduceOption(_ && _)
+
+      assert(maybeAnalyzedPredicate.isDefined)
+      maybeAnalyzedPredicate.foreach { pred =>
+        val maybeFilter = ParquetFilters.createFilter(pred)
+        assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $pred")
+        maybeFilter.foreach { f =>
+          // Doesn't bother checking type parameters here (e.g. `Eq[Integer]`)
+          assert(f.getClass === filterClass)
+        }
+      }
+
+      checker(query, expected)
+    }
+  }
+
+  private def checkFilterPredicate
+      (predicate: Predicate, filterClass: Class[_ <: FilterPredicate], expected: Seq[Row])
+      (implicit rdd: DataFrame): Unit = {
+    checkFilterPredicate(rdd, predicate, filterClass, checkAnswer(_, _: Seq[Row]), expected)
+  }
+
+  private def checkFilterPredicate[T]
+      (predicate: Predicate, filterClass: Class[_ <: FilterPredicate], expected: T)
+      (implicit rdd: DataFrame): Unit = {
+    checkFilterPredicate(predicate, filterClass, Seq(Row(expected)))(rdd)
+  }
+
+  test("filter pushdown - boolean") {
+    withParquetRDD((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { implicit rdd =>
+      checkFilterPredicate('_1.isNull,    classOf[Eq   [_]], Seq.empty[Row])
+      checkFilterPredicate('_1.isNotNull, classOf[NotEq[_]], Seq(Row(true), Row(false)))
+
+      checkFilterPredicate('_1 === true, classOf[Eq   [_]], true)
+      checkFilterPredicate('_1 !== true, classOf[NotEq[_]], false)
+    }
+  }
+
+  test("filter pushdown - short") {
+    withParquetRDD((1 to 4).map(i => Tuple1(Option(i.toShort)))) { implicit rdd =>
+      checkFilterPredicate(Cast('_1, IntegerType) === 1, classOf[Eq   [_]], 1)
+      checkFilterPredicate(Cast('_1, IntegerType) !== 1, classOf[NotEq[_]], (2 to 4).map(Row.apply(_)))
+      
+      checkFilterPredicate(Cast('_1, IntegerType) < 2,  classOf[Lt  [_]], 1)
+      checkFilterPredicate(Cast('_1, IntegerType) > 3,  classOf[Gt  [_]], 4)
+      checkFilterPredicate(Cast('_1, IntegerType) <= 1, classOf[LtEq[_]], 1)
+      checkFilterPredicate(Cast('_1, IntegerType) >= 4, classOf[GtEq[_]], 4)
+      
+      checkFilterPredicate(Literal(1) === Cast('_1, IntegerType), classOf[Eq  [_]], 1)
+      checkFilterPredicate(Literal(2) >   Cast('_1, IntegerType), classOf[Lt  [_]], 1)
+      checkFilterPredicate(Literal(3) <   Cast('_1, IntegerType), classOf[Gt  [_]], 4)
+      checkFilterPredicate(Literal(1) >=  Cast('_1, IntegerType), classOf[LtEq[_]], 1)
+      checkFilterPredicate(Literal(4) <=  Cast('_1, IntegerType), classOf[GtEq[_]], 4)
+      
+      checkFilterPredicate(!(Cast('_1, IntegerType) < 4), classOf[GtEq[_]], 4)
+      checkFilterPredicate(Cast('_1, IntegerType) > 2 && Cast('_1, IntegerType) < 4, 
+        classOf[Operators.And], 3)
+      checkFilterPredicate(Cast('_1, IntegerType) < 2 || Cast('_1, IntegerType) > 3, 
+        classOf[Operators.Or],  Seq(Row(1), Row(4)))
+    }
+  }
+
+  test("filter pushdown - integer") {
+    withParquetRDD((1 to 4).map(i => Tuple1(Option(i)))) { implicit rdd =>
+      checkFilterPredicate('_1.isNull, classOf[Eq[_]], Seq.empty[Row])
+      checkFilterPredicate('_1.isNotNull, classOf[NotEq[_]], (1 to 4).map(Row.apply(_)))
+
+      checkFilterPredicate('_1 === 1, classOf[Eq[_]], 1)
+      checkFilterPredicate('_1 !== 1, classOf[NotEq[_]], (2 to 4).map(Row.apply(_)))
+
+      checkFilterPredicate('_1 < 2, classOf[Lt[_]], 1)
+      checkFilterPredicate('_1 > 3, classOf[Gt[_]], 4)
+      checkFilterPredicate('_1 <= 1, classOf[LtEq[_]], 1)
+      checkFilterPredicate('_1 >= 4, classOf[GtEq[_]], 4)
+
+      checkFilterPredicate(Literal(1) === '_1, classOf[Eq  [_]], 1)
+      checkFilterPredicate(Literal(2) > '_1, classOf[Lt  [_]], 1)
+      checkFilterPredicate(Literal(3) < '_1, classOf[Gt  [_]], 4)
+      checkFilterPredicate(Literal(1) >= '_1, classOf[LtEq[_]], 1)
+      checkFilterPredicate(Literal(4) <= '_1, classOf[GtEq[_]], 4)
+
+      checkFilterPredicate(!('_1 < 4), classOf[GtEq[_]], 4)
+      checkFilterPredicate('_1 > 2 && '_1 < 4, classOf[Operators.And], 3)
+      checkFilterPredicate('_1 < 2 || '_1 > 3, classOf[Operators.Or],  Seq(Row(1), Row(4)))
+    }
+  }
+
+  test("filter pushdown - long") {
+    withParquetRDD((1 to 4).map(i => Tuple1(Option(i.toLong)))) { implicit rdd =>
+      checkFilterPredicate('_1.isNull, classOf[Eq[_]], Seq.empty[Row])
+      checkFilterPredicate('_1.isNotNull, classOf[NotEq[_]], (1 to 4).map(Row.apply(_)))
+
+      checkFilterPredicate('_1 === 1, classOf[Eq[_]], 1)
+      checkFilterPredicate('_1 !== 1, classOf[NotEq[_]], (2 to 4).map(Row.apply(_)))
+
+      checkFilterPredicate('_1 <  2, classOf[Lt[_]], 1)
+      checkFilterPredicate('_1 >  3, classOf[Gt[_]], 4)
+      checkFilterPredicate('_1 <= 1, classOf[LtEq[_]], 1)
+      checkFilterPredicate('_1 >= 4, classOf[GtEq[_]], 4)
+
+      checkFilterPredicate(Literal(1) === '_1, classOf[Eq[_]], 1)
+      checkFilterPredicate(Literal(2) >   '_1, classOf[Lt[_]], 1)
+      checkFilterPredicate(Literal(3) <   '_1, classOf[Gt[_]], 4)
+      checkFilterPredicate(Literal(1) >=  '_1, classOf[LtEq[_]], 1)
+      checkFilterPredicate(Literal(4) <=  '_1, classOf[GtEq[_]], 4)
+
+      checkFilterPredicate(!('_1 < 4), classOf[GtEq[_]], 4)
+      checkFilterPredicate('_1 > 2 && '_1 < 4, classOf[Operators.And], 3)
+      checkFilterPredicate('_1 < 2 || '_1 > 3, classOf[Operators.Or],  Seq(Row(1), Row(4)))
+    }
+  }
+
+  test("filter pushdown - float") {
+    withParquetRDD((1 to 4).map(i => Tuple1(Option(i.toFloat)))) { implicit rdd =>
+      checkFilterPredicate('_1.isNull, classOf[Eq[_]], Seq.empty[Row])
+      checkFilterPredicate('_1.isNotNull, classOf[NotEq[_]], (1 to 4).map(Row.apply(_)))
+
+      checkFilterPredicate('_1 === 1, classOf[Eq[_]], 1)
+      checkFilterPredicate('_1 !== 1, classOf[NotEq[_]], (2 to 4).map(Row.apply(_)))
+
+      checkFilterPredicate('_1 <  2, classOf[Lt[_]], 1)
+      checkFilterPredicate('_1 >  3, classOf[Gt[_]], 4)
+      checkFilterPredicate('_1 <= 1, classOf[LtEq[_]], 1)
+      checkFilterPredicate('_1 >= 4, classOf[GtEq[_]], 4)
+
+      checkFilterPredicate(Literal(1) === '_1, classOf[Eq[_]], 1)
+      checkFilterPredicate(Literal(2) > '_1, classOf[Lt[_]], 1)
+      checkFilterPredicate(Literal(3) < '_1, classOf[Gt[_]], 4)
+      checkFilterPredicate(Literal(1) >= '_1, classOf[LtEq[_]], 1)
+      checkFilterPredicate(Literal(4) <= '_1, classOf[GtEq[_]], 4)
+
+      checkFilterPredicate(!('_1 < 4), classOf[GtEq[_]], 4)
+      checkFilterPredicate('_1 > 2 && '_1 < 4, classOf[Operators.And], 3)
+      checkFilterPredicate('_1 < 2 || '_1 > 3, classOf[Operators.Or],  Seq(Row(1), Row(4)))
+    }
+  }
+
+  test("filter pushdown - double") {
+    withParquetRDD((1 to 4).map(i => Tuple1(Option(i.toDouble)))) { implicit rdd =>
+      checkFilterPredicate('_1.isNull, classOf[Eq[_]], Seq.empty[Row])
+      checkFilterPredicate('_1.isNotNull, classOf[NotEq[_]], (1 to 4).map(Row.apply(_)))
+
+      checkFilterPredicate('_1 === 1, classOf[Eq[_]], 1)
+      checkFilterPredicate('_1 !== 1, classOf[NotEq[_]], (2 to 4).map(Row.apply(_)))
+
+      checkFilterPredicate('_1 <  2, classOf[Lt[_]], 1)
+      checkFilterPredicate('_1 >  3, classOf[Gt[_]], 4)
+      checkFilterPredicate('_1 <= 1, classOf[LtEq[_]], 1)
+      checkFilterPredicate('_1 >= 4, classOf[GtEq[_]], 4)
+
+      checkFilterPredicate(Literal(1) === '_1, classOf[Eq  [_]], 1)
+      checkFilterPredicate(Literal(2) > '_1, classOf[Lt  [_]], 1)
+      checkFilterPredicate(Literal(3) < '_1, classOf[Gt  [_]], 4)
+      checkFilterPredicate(Literal(1) >= '_1, classOf[LtEq[_]], 1)
+      checkFilterPredicate(Literal(4) <= '_1, classOf[GtEq[_]], 4)
+
+      checkFilterPredicate(!('_1 < 4), classOf[GtEq[_]], 4)
+      checkFilterPredicate('_1 > 2 && '_1 < 4, classOf[Operators.And], 3)
+      checkFilterPredicate('_1 < 2 || '_1 > 3, classOf[Operators.Or],  Seq(Row(1), Row(4)))
+    }
+  }
+
+  test("filter pushdown - string") {
+    withParquetRDD((1 to 4).map(i => Tuple1(i.toString))) { implicit rdd =>
+      checkFilterPredicate('_1.isNull, classOf[Eq[_]], Seq.empty[Row])
+      checkFilterPredicate(
+        '_1.isNotNull, classOf[NotEq[_]], (1 to 4).map(i => Row.apply(i.toString)))
+
+      checkFilterPredicate('_1 === "1", classOf[Eq[_]], "1")
+      checkFilterPredicate('_1 !== "1", classOf[NotEq[_]], (2 to 4).map(i => Row.apply(i.toString)))
+
+      checkFilterPredicate('_1 <  "2", classOf[Lt[_]], "1")
+      checkFilterPredicate('_1 >  "3", classOf[Gt[_]], "4")
+      checkFilterPredicate('_1 <= "1", classOf[LtEq[_]], "1")
+      checkFilterPredicate('_1 >= "4", classOf[GtEq[_]], "4")
+
+      checkFilterPredicate(Literal("1") === '_1, classOf[Eq[_]], "1")
+      checkFilterPredicate(Literal("2") > '_1, classOf[Lt[_]], "1")
+      checkFilterPredicate(Literal("3") < '_1, classOf[Gt[_]], "4")
+      checkFilterPredicate(Literal("1") >= '_1, classOf[LtEq[_]], "1")
+      checkFilterPredicate(Literal("4") <= '_1, classOf[GtEq[_]], "4")
+
+      checkFilterPredicate(!('_1 < "4"), classOf[GtEq[_]], "4")
+      checkFilterPredicate('_1 > "2" && '_1 < "4", classOf[Operators.And], "3")
+      checkFilterPredicate('_1 < "2" || '_1 > "3", classOf[Operators.Or], Seq(Row("1"), Row("4")))
+    }
+  }
+
+  def checkBinaryFilterPredicate
+      (predicate: Predicate, filterClass: Class[_ <: FilterPredicate], expected: Seq[Row])
+      (implicit rdd: DataFrame): Unit = {
+    def checkBinaryAnswer(rdd: DataFrame, expected: Seq[Row]) = {
+      assertResult(expected.map(_.getAs[Array[Byte]](0).mkString(",")).toSeq.sorted) {
+        rdd.map(_.getAs[Array[Byte]](0).mkString(",")).collect().toSeq.sorted
+      }
+    }
+
+    checkFilterPredicate(rdd, predicate, filterClass, checkBinaryAnswer _, expected)
+  }
+
+  def checkBinaryFilterPredicate
+      (predicate: Predicate, filterClass: Class[_ <: FilterPredicate], expected: Array[Byte])
+      (implicit rdd: DataFrame): Unit = {
+    checkBinaryFilterPredicate(predicate, filterClass, Seq(Row(expected)))(rdd)
+  }
+
+  test("filter pushdown - binary") {
+    implicit class IntToBinary(int: Int) {
+      def b: Array[Byte] = int.toString.getBytes("UTF-8")
+    }
+
+    withParquetRDD((1 to 4).map(i => Tuple1(i.b))) { implicit rdd =>
+      checkBinaryFilterPredicate('_1.isNull, classOf[Eq[_]], Seq.empty[Row])
+      checkBinaryFilterPredicate(
+        '_1.isNotNull, classOf[NotEq[_]], (1 to 4).map(i => Row.apply(i.b)).toSeq)
+
+      checkBinaryFilterPredicate('_1 === 1.b, classOf[Eq   [_]], 1.b)
+      checkBinaryFilterPredicate(
+        '_1 !== 1.b, classOf[NotEq[_]], (2 to 4).map(i => Row.apply(i.b)).toSeq)
+
+      checkBinaryFilterPredicate('_1 < 2.b, classOf[Lt[_]], 1.b)
+      checkBinaryFilterPredicate('_1 > 3.b, classOf[Gt[_]], 4.b)
+      checkBinaryFilterPredicate('_1 <= 1.b, classOf[LtEq[_]], 1.b)
+      checkBinaryFilterPredicate('_1 >= 4.b, classOf[GtEq[_]], 4.b)
+
+      checkBinaryFilterPredicate(Literal(1.b) === '_1, classOf[Eq[_]], 1.b)
+      checkBinaryFilterPredicate(Literal(2.b) > '_1, classOf[Lt[_]], 1.b)
+      checkBinaryFilterPredicate(Literal(3.b) < '_1, classOf[Gt[_]], 4.b)
+      checkBinaryFilterPredicate(Literal(1.b) >= '_1, classOf[LtEq[_]], 1.b)
+      checkBinaryFilterPredicate(Literal(4.b) <= '_1, classOf[GtEq[_]], 4.b)
+
+      checkBinaryFilterPredicate(!('_1 < 4.b), classOf[GtEq[_]], 4.b)
+      checkBinaryFilterPredicate('_1 > 2.b && '_1 < 4.b, classOf[Operators.And], 3.b)
+      checkBinaryFilterPredicate(
+        '_1 < 2.b || '_1 > 3.b, classOf[Operators.Or], Seq(Row(1.b), Row(4.b)))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala
new file mode 100644
index 0000000000000..0bc246c645602
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala
@@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.parquet
+
+import scala.collection.JavaConversions._
+import scala.reflect.ClassTag
+import scala.reflect.runtime.universe.TypeTag
+
+import parquet.example.data.simple.SimpleGroup
+import parquet.example.data.{Group, GroupWriter}
+import parquet.hadoop.api.WriteSupport
+import parquet.hadoop.api.WriteSupport.WriteContext
+import parquet.hadoop.metadata.CompressionCodecName
+import parquet.hadoop.{ParquetFileWriter, ParquetWriter}
+import parquet.io.api.RecordConsumer
+import parquet.schema.{MessageType, MessageTypeParser}
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.spark.sql.{DataFrame, QueryTest, SQLConf}
+import org.apache.spark.sql.Dsl._
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.test.TestSQLContext
+import org.apache.spark.sql.test.TestSQLContext._
+import org.apache.spark.sql.types.DecimalType
+
+// Write support class for nested groups: ParquetWriter initializes GroupWriteSupport
+// with an empty configuration (it is after all not intended to be used in this way?)
+// and members are private so we need to make our own in order to pass the schema
+// to the writer.
+private[parquet] class TestGroupWriteSupport(schema: MessageType) extends WriteSupport[Group] {
+  var groupWriter: GroupWriter = null
+
+  override def prepareForWrite(recordConsumer: RecordConsumer): Unit = {
+    groupWriter = new GroupWriter(recordConsumer, schema)
+  }
+
+  override def init(configuration: Configuration): WriteContext = {
+    new WriteContext(schema, new java.util.HashMap[String, String]())
+  }
+
+  override def write(record: Group) {
+    groupWriter.write(record)
+  }
+}
+
+/**
+ * A test suite that tests basic Parquet I/O.
+ */
+class ParquetIOSuite extends QueryTest with ParquetTest {
+  val sqlContext = TestSQLContext
+
+  /**
+   * Writes `data` to a Parquet file, reads it back and check file contents.
+   */
+  protected def checkParquetFile[T <: Product : ClassTag: TypeTag](data: Seq[T]): Unit = {
+    withParquetRDD(data)(r => checkAnswer(r, data.map(Row.fromTuple)))
+  }
+
+  test("basic data types (without binary)") {
+    val data = (1 to 4).map { i =>
+      (i % 2 == 0, i, i.toLong, i.toFloat, i.toDouble)
+    }
+    checkParquetFile(data)
+  }
+
+  test("raw binary") {
+    val data = (1 to 4).map(i => Tuple1(Array.fill(3)(i.toByte)))
+    withParquetRDD(data) { rdd =>
+      assertResult(data.map(_._1.mkString(",")).sorted) {
+        rdd.collect().map(_.getAs[Array[Byte]](0).mkString(",")).sorted
+      }
+    }
+  }
+
+  test("string") {
+    val data = (1 to 4).map(i => Tuple1(i.toString))
+    // Property spark.sql.parquet.binaryAsString shouldn't affect Parquet files written by Spark SQL
+    // as we store Spark SQL schema in the extra metadata.
+    withSQLConf(SQLConf.PARQUET_BINARY_AS_STRING -> "false")(checkParquetFile(data))
+    withSQLConf(SQLConf.PARQUET_BINARY_AS_STRING -> "true")(checkParquetFile(data))
+  }
+
+  test("fixed-length decimals") {
+    import org.apache.spark.sql.test.TestSQLContext.implicits._
+
+    def makeDecimalRDD(decimal: DecimalType): DataFrame =
+      sparkContext
+        .parallelize(0 to 1000)
+        .map(i => Tuple1(i / 100.0))
+        .select($"_1" cast decimal as "abcd")
+
+    for ((precision, scale) <- Seq((5, 2), (1, 0), (1, 1), (18, 10), (18, 17))) {
+      withTempPath { dir =>
+        val data = makeDecimalRDD(DecimalType(precision, scale))
+        data.saveAsParquetFile(dir.getCanonicalPath)
+        checkAnswer(parquetFile(dir.getCanonicalPath), data.collect().toSeq)
+      }
+    }
+
+    // Decimals with precision above 18 are not yet supported
+    intercept[RuntimeException] {
+      withTempPath { dir =>
+        makeDecimalRDD(DecimalType(19, 10)).saveAsParquetFile(dir.getCanonicalPath)
+        parquetFile(dir.getCanonicalPath).collect()
+      }
+    }
+
+    // Unlimited-length decimals are not yet supported
+    intercept[RuntimeException] {
+      withTempPath { dir =>
+        makeDecimalRDD(DecimalType.Unlimited).saveAsParquetFile(dir.getCanonicalPath)
+        parquetFile(dir.getCanonicalPath).collect()
+      }
+    }
+  }
+
+  test("map") {
+    val data = (1 to 4).map(i => Tuple1(Map(i -> s"val_$i")))
+    checkParquetFile(data)
+  }
+
+  test("array") {
+    val data = (1 to 4).map(i => Tuple1(Seq(i, i + 1)))
+    checkParquetFile(data)
+  }
+
+  test("struct") {
+    val data = (1 to 4).map(i => Tuple1((i, s"val_$i")))
+    withParquetRDD(data) { rdd =>
+      // Structs are converted to `Row`s
+      checkAnswer(rdd, data.map { case Tuple1(struct) =>
+        Row(Row(struct.productIterator.toSeq: _*))
+      })
+    }
+  }
+
+  test("nested struct with array of array as field") {
+    val data = (1 to 4).map(i => Tuple1((i, Seq(Seq(s"val_$i")))))
+    withParquetRDD(data) { rdd =>
+      // Structs are converted to `Row`s
+      checkAnswer(rdd, data.map { case Tuple1(struct) =>
+        Row(Row(struct.productIterator.toSeq: _*))
+      })
+    }
+  }
+
+  test("nested map with struct as value type") {
+    val data = (1 to 4).map(i => Tuple1(Map(i -> (i, s"val_$i"))))
+    withParquetRDD(data) { rdd =>
+      checkAnswer(rdd, data.map { case Tuple1(m) =>
+        Row(m.mapValues(struct => Row(struct.productIterator.toSeq: _*)))
+      })
+    }
+  }
+
+  test("nulls") {
+    val allNulls = (
+      null.asInstanceOf[java.lang.Boolean],
+      null.asInstanceOf[Integer],
+      null.asInstanceOf[java.lang.Long],
+      null.asInstanceOf[java.lang.Float],
+      null.asInstanceOf[java.lang.Double])
+
+    withParquetRDD(allNulls :: Nil) { rdd =>
+      val rows = rdd.collect()
+      assert(rows.size === 1)
+      assert(rows.head === Row(Seq.fill(5)(null): _*))
+    }
+  }
+
+  test("nones") {
+    val allNones = (
+      None.asInstanceOf[Option[Int]],
+      None.asInstanceOf[Option[Long]],
+      None.asInstanceOf[Option[String]])
+
+    withParquetRDD(allNones :: Nil) { rdd =>
+      val rows = rdd.collect()
+      assert(rows.size === 1)
+      assert(rows.head === Row(Seq.fill(3)(null): _*))
+    }
+  }
+
+  test("compression codec") {
+    def compressionCodecFor(path: String) = {
+      val codecs = ParquetTypesConverter
+        .readMetaData(new Path(path), Some(configuration))
+        .getBlocks
+        .flatMap(_.getColumns)
+        .map(_.getCodec.name())
+        .distinct
+
+      assert(codecs.size === 1)
+      codecs.head
+    }
+
+    val data = (0 until 10).map(i => (i, i.toString))
+
+    def checkCompressionCodec(codec: CompressionCodecName): Unit = {
+      withSQLConf(SQLConf.PARQUET_COMPRESSION -> codec.name()) {
+        withParquetFile(data) { path =>
+          assertResult(conf.parquetCompressionCodec.toUpperCase) {
+            compressionCodecFor(path)
+          }
+        }
+      }
+    }
+
+    // Checks default compression codec
+    checkCompressionCodec(CompressionCodecName.fromConf(conf.parquetCompressionCodec))
+
+    checkCompressionCodec(CompressionCodecName.UNCOMPRESSED)
+    checkCompressionCodec(CompressionCodecName.GZIP)
+    checkCompressionCodec(CompressionCodecName.SNAPPY)
+  }
+
+  test("read raw Parquet file") {
+    def makeRawParquetFile(path: Path): Unit = {
+      val schema = MessageTypeParser.parseMessageType(
+        """
+          |message root {
+          |  required boolean _1;
+          |  required int32   _2;
+          |  required int64   _3;
+          |  required float   _4;
+          |  required double  _5;
+          |}
+        """.stripMargin)
+
+      val writeSupport = new TestGroupWriteSupport(schema)
+      val writer = new ParquetWriter[Group](path, writeSupport)
+
+      (0 until 10).foreach { i =>
+        val record = new SimpleGroup(schema)
+        record.add(0, i % 2 == 0)
+        record.add(1, i)
+        record.add(2, i.toLong)
+        record.add(3, i.toFloat)
+        record.add(4, i.toDouble)
+        writer.write(record)
+      }
+
+      writer.close()
+    }
+
+    withTempDir { dir =>
+      val path = new Path(dir.toURI.toString, "part-r-0.parquet")
+      makeRawParquetFile(path)
+      checkAnswer(parquetFile(path.toString), (0 until 10).map { i =>
+        Row(i % 2 == 0, i, i.toLong, i.toFloat, i.toDouble)
+      })
+    }
+  }
+
+  test("write metadata") {
+    withTempPath { file =>
+      val path = new Path(file.toURI.toString)
+      val fs = FileSystem.getLocal(configuration)
+      val attributes = ScalaReflection.attributesFor[(Int, String)]
+      ParquetTypesConverter.writeMetaData(attributes, path, configuration)
+
+      assert(fs.exists(new Path(path, ParquetFileWriter.PARQUET_COMMON_METADATA_FILE)))
+      assert(fs.exists(new Path(path, ParquetFileWriter.PARQUET_METADATA_FILE)))
+
+      val metaData = ParquetTypesConverter.readMetaData(path, Some(configuration))
+      val actualSchema = metaData.getFileMetaData.getSchema
+      val expectedSchema = ParquetTypesConverter.convertFromAttributes(attributes)
+
+      actualSchema.checkContains(expectedSchema)
+      expectedSchema.checkContains(actualSchema)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
index 25e41ecf28e2e..5ec7a156d9353 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
@@ -17,799 +17,93 @@
 
 package org.apache.spark.sql.parquet
 
-import org.apache.hadoop.fs.{FileSystem, Path}
-import org.apache.hadoop.mapreduce.Job
-import org.scalatest.{BeforeAndAfterAll, FunSuiteLike}
-import parquet.hadoop.ParquetFileWriter
-import parquet.hadoop.util.ContextUtil
-import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.types.IntegerType
-import org.apache.spark.sql.catalyst.util.getTempFilePath
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.test.TestSQLContext
 import org.apache.spark.sql.test.TestSQLContext._
-import org.apache.spark.util.Utils
 
-case class TestRDDEntry(key: Int, value: String)
-
-case class NullReflectData(
-    intField: java.lang.Integer,
-    longField: java.lang.Long,
-    floatField: java.lang.Float,
-    doubleField: java.lang.Double,
-    booleanField: java.lang.Boolean)
-
-case class OptionalReflectData(
-    intField: Option[Int],
-    longField: Option[Long],
-    floatField: Option[Float],
-    doubleField: Option[Double],
-    booleanField: Option[Boolean])
-
-case class Nested(i: Int, s: String)
-
-case class Data(array: Seq[Int], nested: Nested)
-
-case class AllDataTypes(
-    stringField: String,
-    intField: Int,
-    longField: Long,
-    floatField: Float,
-    doubleField: Double,
-    shortField: Short,
-    byteField: Byte,
-    booleanField: Boolean)
-
-case class AllDataTypesWithNonPrimitiveType(
-    stringField: String,
-    intField: Int,
-    longField: Long,
-    floatField: Float,
-    doubleField: Double,
-    shortField: Short,
-    byteField: Byte,
-    booleanField: Boolean,
-    array: Seq[Int],
-    arrayContainsNull: Seq[Option[Int]],
-    map: Map[Int, Long],
-    mapValueContainsNull: Map[Int, Option[Long]],
-    data: Data)
-
-case class BinaryData(binaryData: Array[Byte])
-
-class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterAll {
-  TestData // Load test data tables.
-
-  var testRDD: SchemaRDD = null
-
-  override def beforeAll() {
-    ParquetTestData.writeFile()
-    ParquetTestData.writeFilterFile()
-    ParquetTestData.writeNestedFile1()
-    ParquetTestData.writeNestedFile2()
-    ParquetTestData.writeNestedFile3()
-    ParquetTestData.writeNestedFile4()
-    testRDD = parquetFile(ParquetTestData.testDir.toString)
-    testRDD.registerTempTable("testsource")
-    parquetFile(ParquetTestData.testFilterDir.toString)
-      .registerTempTable("testfiltersource")
-  }
-
-  override def afterAll() {
-    Utils.deleteRecursively(ParquetTestData.testDir)
-    Utils.deleteRecursively(ParquetTestData.testFilterDir)
-    Utils.deleteRecursively(ParquetTestData.testNestedDir1)
-    Utils.deleteRecursively(ParquetTestData.testNestedDir2)
-    Utils.deleteRecursively(ParquetTestData.testNestedDir3)
-    Utils.deleteRecursively(ParquetTestData.testNestedDir4)
-    // here we should also unregister the table??
-  }
-
-  test("Read/Write All Types") {
-    val tempDir = getTempFilePath("parquetTest").getCanonicalPath
-    val range = (0 to 255)
-    val data = sparkContext.parallelize(range)
-      .map(x => AllDataTypes(s"$x", x, x.toLong, x.toFloat, x.toDouble, x.toShort, x.toByte, x % 2 == 0))
-
-    data.saveAsParquetFile(tempDir)
-
-    checkAnswer(
-      parquetFile(tempDir),
-      data.toSchemaRDD.collect().toSeq)
-  }
-
-  test("read/write binary data") {
-    // Since equality for Array[Byte] is broken we test this separately.
-    val tempDir = getTempFilePath("parquetTest").getCanonicalPath
-    sparkContext.parallelize(BinaryData("test".getBytes("utf8")) :: Nil).saveAsParquetFile(tempDir)
-    parquetFile(tempDir)
-      .map(r => new String(r(0).asInstanceOf[Array[Byte]], "utf8"))
-      .collect().toSeq == Seq("test")
-  }
-
-  ignore("Treat binary as string") {
-    val oldIsParquetBinaryAsString = TestSQLContext.isParquetBinaryAsString
-
-    // Create the test file.
-    val file = getTempFilePath("parquet")
-    val path = file.toString
-    val range = (0 to 255)
-    val rowRDD = TestSQLContext.sparkContext.parallelize(range)
-      .map(i => org.apache.spark.sql.Row(i, s"val_$i".getBytes))
-    // We need to ask Parquet to store the String column as a Binary column.
-    val schema = StructType(
-      StructField("c1", IntegerType, false) ::
-      StructField("c2", BinaryType, false) :: Nil)
-    val schemaRDD1 = applySchema(rowRDD, schema)
-    schemaRDD1.saveAsParquetFile(path)
-    checkAnswer(
-      parquetFile(path).select('c1, 'c2.cast(StringType)),
-      schemaRDD1.select('c1, 'c2.cast(StringType)).collect().toSeq)
-
-    setConf(SQLConf.PARQUET_BINARY_AS_STRING, "true")
-    parquetFile(path).printSchema()
-    checkAnswer(
-      parquetFile(path),
-      schemaRDD1.select('c1, 'c2.cast(StringType)).collect().toSeq)
-
-
-    // Set it back.
-    TestSQLContext.setConf(SQLConf.PARQUET_BINARY_AS_STRING, oldIsParquetBinaryAsString.toString)
-  }
-
-  test("Compression options for writing to a Parquetfile") {
-    val defaultParquetCompressionCodec = TestSQLContext.parquetCompressionCodec
-    import scala.collection.JavaConversions._
-
-    val file = getTempFilePath("parquet")
-    val path = file.toString
-    val rdd = TestSQLContext.sparkContext.parallelize((1 to 100))
-      .map(i => TestRDDEntry(i, s"val_$i"))
-
-    // test default compression codec
-    rdd.saveAsParquetFile(path)
-    var actualCodec = ParquetTypesConverter.readMetaData(new Path(path), Some(TestSQLContext.sparkContext.hadoopConfiguration))
-      .getBlocks.flatMap(block => block.getColumns).map(column => column.getCodec.name()).distinct
-    assert(actualCodec === TestSQLContext.parquetCompressionCodec.toUpperCase :: Nil)
-
-    parquetFile(path).registerTempTable("tmp")
-    checkAnswer(
-      sql("SELECT key, value FROM tmp WHERE value = 'val_5' OR value = 'val_7'"),
-      (5, "val_5") ::
-      (7, "val_7") :: Nil)
-
-    Utils.deleteRecursively(file)
-
-    // test uncompressed parquet file with property value "UNCOMPRESSED"
-    TestSQLContext.setConf(SQLConf.PARQUET_COMPRESSION, "UNCOMPRESSED")
-
-    rdd.saveAsParquetFile(path)
-    actualCodec = ParquetTypesConverter.readMetaData(new Path(path), Some(TestSQLContext.sparkContext.hadoopConfiguration))
-      .getBlocks.flatMap(block => block.getColumns).map(column => column.getCodec.name()).distinct
-    assert(actualCodec === TestSQLContext.parquetCompressionCodec.toUpperCase :: Nil)
-
-    parquetFile(path).registerTempTable("tmp")
-    checkAnswer(
-      sql("SELECT key, value FROM tmp WHERE value = 'val_5' OR value = 'val_7'"),
-      (5, "val_5") ::
-      (7, "val_7") :: Nil)
-
-    Utils.deleteRecursively(file)
-
-    // test uncompressed parquet file with property value "none"
-    TestSQLContext.setConf(SQLConf.PARQUET_COMPRESSION, "none")
-
-    rdd.saveAsParquetFile(path)
-    actualCodec = ParquetTypesConverter.readMetaData(new Path(path), Some(TestSQLContext.sparkContext.hadoopConfiguration))
-      .getBlocks.flatMap(block => block.getColumns).map(column => column.getCodec.name()).distinct
-    assert(actualCodec === "UNCOMPRESSED" :: Nil)
-
-    parquetFile(path).registerTempTable("tmp")
-    checkAnswer(
-      sql("SELECT key, value FROM tmp WHERE value = 'val_5' OR value = 'val_7'"),
-      (5, "val_5") ::
-      (7, "val_7") :: Nil)
-
-    Utils.deleteRecursively(file)
-
-    // test gzip compression codec
-    TestSQLContext.setConf(SQLConf.PARQUET_COMPRESSION, "gzip")
-
-    rdd.saveAsParquetFile(path)
-    actualCodec = ParquetTypesConverter.readMetaData(new Path(path), Some(TestSQLContext.sparkContext.hadoopConfiguration))
-      .getBlocks.flatMap(block => block.getColumns).map(column => column.getCodec.name()).distinct
-    assert(actualCodec === TestSQLContext.parquetCompressionCodec.toUpperCase :: Nil)
-
-    parquetFile(path).registerTempTable("tmp")
-    checkAnswer(
-      sql("SELECT key, value FROM tmp WHERE value = 'val_5' OR value = 'val_7'"),
-      (5, "val_5") ::
-      (7, "val_7") :: Nil)
-
-    Utils.deleteRecursively(file)
-
-    // test snappy compression codec
-    TestSQLContext.setConf(SQLConf.PARQUET_COMPRESSION, "snappy")
-
-    rdd.saveAsParquetFile(path)
-    actualCodec = ParquetTypesConverter.readMetaData(new Path(path), Some(TestSQLContext.sparkContext.hadoopConfiguration))
-      .getBlocks.flatMap(block => block.getColumns).map(column => column.getCodec.name()).distinct
-    assert(actualCodec === TestSQLContext.parquetCompressionCodec.toUpperCase :: Nil)
-
-    parquetFile(path).registerTempTable("tmp")
-    checkAnswer(
-      sql("SELECT key, value FROM tmp WHERE value = 'val_5' OR value = 'val_7'"),
-      (5, "val_5") ::
-      (7, "val_7") :: Nil)
-
-    Utils.deleteRecursively(file)
-
-    // TODO: Lzo requires additional external setup steps so leave it out for now
-    // ref.: https://github.com/Parquet/parquet-mr/blob/parquet-1.5.0/parquet-hadoop/src/test/java/parquet/hadoop/example/TestInputOutputFormat.java#L169
-
-    // Set it back.
-    TestSQLContext.setConf(SQLConf.PARQUET_COMPRESSION, defaultParquetCompressionCodec)
-  }
-
-  test("Read/Write All Types with non-primitive type") {
-    val tempDir = getTempFilePath("parquetTest").getCanonicalPath
-    val range = (0 to 255)
-    val data = sparkContext.parallelize(range)
-      .map(x => AllDataTypesWithNonPrimitiveType(
-        s"$x", x, x.toLong, x.toFloat, x.toDouble, x.toShort, x.toByte, x % 2 == 0,
-        (0 until x),
-        (0 until x).map(Option(_).filter(_ % 3 == 0)),
-        (0 until x).map(i => i -> i.toLong).toMap,
-        (0 until x).map(i => i -> Option(i.toLong)).toMap + (x -> None),
-        Data((0 until x), Nested(x, s"$x"))))
-    data.saveAsParquetFile(tempDir)
-
-    checkAnswer(
-      parquetFile(tempDir),
-      data.toSchemaRDD.collect().toSeq)
-  }
-
-  test("self-join parquet files") {
-    val x = ParquetTestData.testData.as('x)
-    val y = ParquetTestData.testData.as('y)
-    val query = x.join(y).where("x.myint".attr === "y.myint".attr)
+/**
+ * A test suite that tests various Parquet queries.
+ */
+class ParquetQuerySuite extends QueryTest with ParquetTest {
+  val sqlContext = TestSQLContext
 
-    // Check to make sure that the attributes from either side of the join have unique expression
-    // ids.
-    query.queryExecution.analyzed.output.filter(_.name == "myint") match {
-      case Seq(i1, i2) if(i1.exprId == i2.exprId) =>
-        fail(s"Duplicate expression IDs found in query plan: $query")
-      case Seq(_, _) => // All good
+  test("simple projection") {
+    withParquetTable((0 until 10).map(i => (i, i.toString)), "t") {
+      checkAnswer(sql("SELECT _1 FROM t"), (0 until 10).map(Row.apply(_)))
     }
+  }
 
-    val result = query.collect()
-    assert(result.size === 9, "self-join result has incorrect size")
-    assert(result(0).size === 12, "result row has incorrect size")
-    result.zipWithIndex.foreach {
-      case (row, index) => row.zipWithIndex.foreach {
-        case (field, column) => assert(field != null, s"self-join contains null value in row $index field $column")
-      }
+  test("appending") {
+    val data = (0 until 10).map(i => (i, i.toString))
+    withParquetTable(data, "t") {
+      sql("INSERT INTO TABLE t SELECT * FROM t")
+      checkAnswer(table("t"), (data ++ data).map(Row.fromTuple))
     }
   }
 
-  test("Import of simple Parquet file") {
-    val result = parquetFile(ParquetTestData.testDir.toString).collect()
-    assert(result.size === 15)
-    result.zipWithIndex.foreach {
-      case (row, index) => {
-        val checkBoolean =
-          if (index % 3 == 0)
-            row(0) == true
-          else
-            row(0) == false
-        assert(checkBoolean === true, s"boolean field value in line $index did not match")
-        if (index % 5 == 0) assert(row(1) === 5, s"int field value in line $index did not match")
-        assert(row(2) === "abc", s"string field value in line $index did not match")
-        assert(row(3) === (index.toLong << 33), s"long value in line $index did not match")
-        assert(row(4) === 2.5F, s"float field value in line $index did not match")
-        assert(row(5) === 4.5D, s"double field value in line $index did not match")
-      }
+  // This test case will trigger the NPE mentioned in
+  // https://issues.apache.org/jira/browse/PARQUET-151.
+  ignore("overwriting") {
+    val data = (0 until 10).map(i => (i, i.toString))
+    withParquetTable(data, "t") {
+      sql("INSERT OVERWRITE TABLE t SELECT * FROM t")
+      checkAnswer(table("t"), data.map(Row.fromTuple))
     }
   }
 
-  test("Projection of simple Parquet file") {
-    val result = ParquetTestData.testData.select('myboolean, 'mylong).collect()
-    result.zipWithIndex.foreach {
-      case (row, index) => {
-          if (index % 3 == 0)
-            assert(row(0) === true, s"boolean field value in line $index did not match (every third row)")
-          else
-            assert(row(0) === false, s"boolean field value in line $index did not match")
-        assert(row(1) === (index.toLong << 33), s"long field value in line $index did not match")
-        assert(row.size === 2, s"number of columns in projection in line $index is incorrect")
-      }
+  test("self-join") {
+    // 4 rows, cells of column 1 of row 2 and row 4 are null
+    val data = (1 to 4).map { i =>
+      val maybeInt = if (i % 2 == 0) None else Some(i)
+      (maybeInt, i.toString)
     }
-  }
 
-  test("Writing metadata from scratch for table CREATE") {
-    val job = new Job()
-    val path = new Path(getTempFilePath("testtable").getCanonicalFile.toURI.toString)
-    val fs: FileSystem = FileSystem.getLocal(ContextUtil.getConfiguration(job))
-    ParquetTypesConverter.writeMetaData(
-      ParquetTestData.testData.output,
-      path,
-      TestSQLContext.sparkContext.hadoopConfiguration)
-    assert(fs.exists(new Path(path, ParquetFileWriter.PARQUET_METADATA_FILE)))
-    val metaData = ParquetTypesConverter.readMetaData(path, Some(ContextUtil.getConfiguration(job)))
-    assert(metaData != null)
-    ParquetTestData
-      .testData
-      .parquetSchema
-      .checkContains(metaData.getFileMetaData.getSchema) // throws exception if incompatible
-    metaData
-      .getFileMetaData
-      .getSchema
-      .checkContains(ParquetTestData.testData.parquetSchema) // throws exception if incompatible
-    fs.delete(path, true)
-  }
+    withParquetTable(data, "t") {
+      val selfJoin = sql("SELECT * FROM t x JOIN t y WHERE x._1 = y._1")
+      val queryOutput = selfJoin.queryExecution.analyzed.output
 
-  test("Creating case class RDD table") {
-    TestSQLContext.sparkContext.parallelize((1 to 100))
-      .map(i => TestRDDEntry(i, s"val_$i"))
-      .registerTempTable("tmp")
-    val rdd = sql("SELECT * FROM tmp").collect().sortBy(_.getInt(0))
-    var counter = 1
-    rdd.foreach {
-      // '===' does not like string comparison?
-      row: Row => {
-        assert(row.getString(1).equals(s"val_$counter"), s"row $counter value ${row.getString(1)} does not match val_$counter")
-        counter = counter + 1
+      assertResult(4, s"Field count mismatches")(queryOutput.size)
+      assertResult(2, s"Duplicated expression ID in query plan:\n $selfJoin") {
+        queryOutput.filter(_.name == "_1").map(_.exprId).size
       }
-    }
-  }
-
-  test("Read a parquet file instead of a directory") {
-    val file = getTempFilePath("parquet")
-    val path = file.toString
-    val fsPath = new Path(path)
-    val fs: FileSystem = fsPath.getFileSystem(TestSQLContext.sparkContext.hadoopConfiguration)
-    val rdd = TestSQLContext.sparkContext.parallelize((1 to 100))
-      .map(i => TestRDDEntry(i, s"val_$i"))
-    rdd.coalesce(1).saveAsParquetFile(path)
 
-    val children = fs.listStatus(fsPath).filter(_.getPath.getName.endsWith(".parquet"))
-    assert(children.length > 0)
-    val readFile = parquetFile(path + "/" + children(0).getPath.getName)
-    readFile.registerTempTable("tmpx")
-    val rdd_copy = sql("SELECT * FROM tmpx").collect()
-    val rdd_orig = rdd.collect()
-    for(i <- 0 to 99) {
-      assert(rdd_copy(i).apply(0) === rdd_orig(i).key,   s"key error in line $i")
-      assert(rdd_copy(i).apply(1) === rdd_orig(i).value, s"value error in line $i")
+      checkAnswer(selfJoin, List(Row(1, "1", 1, "1"), Row(3, "3", 3, "3")))
     }
-    Utils.deleteRecursively(file)
-  }
-
-  test("Insert (overwrite) via Scala API") {
-    val dirname = Utils.createTempDir()
-    val source_rdd = TestSQLContext.sparkContext.parallelize((1 to 100))
-      .map(i => TestRDDEntry(i, s"val_$i"))
-    source_rdd.registerTempTable("source")
-    val dest_rdd = createParquetFile[TestRDDEntry](dirname.toString)
-    dest_rdd.registerTempTable("dest")
-    sql("INSERT OVERWRITE INTO dest SELECT * FROM source").collect()
-    val rdd_copy1 = sql("SELECT * FROM dest").collect()
-    assert(rdd_copy1.size === 100)
-
-    sql("INSERT INTO dest SELECT * FROM source")
-    val rdd_copy2 = sql("SELECT * FROM dest").collect().sortBy(_.getInt(0))
-    assert(rdd_copy2.size === 200)
-    Utils.deleteRecursively(dirname)
-  }
-
-  test("Insert (appending) to same table via Scala API") {
-    sql("INSERT INTO testsource SELECT * FROM testsource")
-    val double_rdd = sql("SELECT * FROM testsource").collect()
-    assert(double_rdd != null)
-    assert(double_rdd.size === 30)
-
-    // let's restore the original test data
-    Utils.deleteRecursively(ParquetTestData.testDir)
-    ParquetTestData.writeFile()
-  }
-
-  test("save and load case class RDD with nulls as parquet") {
-    val data = NullReflectData(null, null, null, null, null)
-    val rdd = sparkContext.parallelize(data :: Nil)
-
-    val file = getTempFilePath("parquet")
-    val path = file.toString
-    rdd.saveAsParquetFile(path)
-    val readFile = parquetFile(path)
-
-    val rdd_saved = readFile.collect()
-    assert(rdd_saved(0) === Seq.fill(5)(null))
-    Utils.deleteRecursively(file)
-    assert(true)
-  }
-
-  test("save and load case class RDD with Nones as parquet") {
-    val data = OptionalReflectData(None, None, None, None, None)
-    val rdd = sparkContext.parallelize(data :: Nil)
-
-    val file = getTempFilePath("parquet")
-    val path = file.toString
-    rdd.saveAsParquetFile(path)
-    val readFile = parquetFile(path)
-
-    val rdd_saved = readFile.collect()
-    assert(rdd_saved(0) === Seq.fill(5)(null))
-    Utils.deleteRecursively(file)
-    assert(true)
   }
 
-  test("create RecordFilter for simple predicates") {
-    val attribute1 = new AttributeReference("first", IntegerType, false)()
-    val predicate1 = new EqualTo(attribute1, new Literal(1, IntegerType))
-    val filter1 = ParquetFilters.createFilter(predicate1)
-    assert(filter1.isDefined)
-    assert(filter1.get.predicate == predicate1, "predicates do not match")
-    assert(filter1.get.isInstanceOf[ComparisonFilter])
-    val cmpFilter1 = filter1.get.asInstanceOf[ComparisonFilter]
-    assert(cmpFilter1.columnName == "first", "column name incorrect")
-
-    val predicate2 = new LessThan(attribute1, new Literal(4, IntegerType))
-    val filter2 = ParquetFilters.createFilter(predicate2)
-    assert(filter2.isDefined)
-    assert(filter2.get.predicate == predicate2, "predicates do not match")
-    assert(filter2.get.isInstanceOf[ComparisonFilter])
-    val cmpFilter2 = filter2.get.asInstanceOf[ComparisonFilter]
-    assert(cmpFilter2.columnName == "first", "column name incorrect")
-
-    val predicate3 = new And(predicate1, predicate2)
-    val filter3 = ParquetFilters.createFilter(predicate3)
-    assert(filter3.isDefined)
-    assert(filter3.get.predicate == predicate3, "predicates do not match")
-    assert(filter3.get.isInstanceOf[AndFilter])
-
-    val predicate4 = new Or(predicate1, predicate2)
-    val filter4 = ParquetFilters.createFilter(predicate4)
-    assert(filter4.isDefined)
-    assert(filter4.get.predicate == predicate4, "predicates do not match")
-    assert(filter4.get.isInstanceOf[OrFilter])
-
-    val attribute2 = new AttributeReference("second", IntegerType, false)()
-    val predicate5 = new GreaterThan(attribute1, attribute2)
-    val badfilter = ParquetFilters.createFilter(predicate5)
-    assert(badfilter.isDefined === false)
-
-    val predicate6 = And(GreaterThan(attribute1, attribute2), GreaterThan(attribute1, attribute2))
-    val badfilter2 = ParquetFilters.createFilter(predicate6)
-    assert(badfilter2.isDefined === false)
+  test("nested data - struct with array field") {
+    val data = (1 to 10).map(i => Tuple1((i, Seq(s"val_$i"))))
+    withParquetTable(data, "t") {
+      checkAnswer(sql("SELECT _1._2[0] FROM t"), data.map {
+        case Tuple1((_, Seq(string))) => Row(string)
+      })
+    }
   }
 
-  test("test filter by predicate pushdown") {
-    for(myval <- Seq("myint", "mylong", "mydouble", "myfloat")) {
-      val query1 = sql(s"SELECT * FROM testfiltersource WHERE $myval < 150 AND $myval >= 100")
-      assert(
-        query1.queryExecution.executedPlan(0)(0).isInstanceOf[ParquetTableScan],
-        "Top operator should be ParquetTableScan after pushdown")
-      val result1 = query1.collect()
-      assert(result1.size === 50)
-      assert(result1(0)(1) === 100)
-      assert(result1(49)(1) === 149)
-      val query2 = sql(s"SELECT * FROM testfiltersource WHERE $myval > 150 AND $myval <= 200")
-      assert(
-        query2.queryExecution.executedPlan(0)(0).isInstanceOf[ParquetTableScan],
-        "Top operator should be ParquetTableScan after pushdown")
-      val result2 = query2.collect()
-      assert(result2.size === 50)
-      if (myval == "myint" || myval == "mylong") {
-        assert(result2(0)(1) === 151)
-        assert(result2(49)(1) === 200)
-      } else {
-        assert(result2(0)(1) === 150)
-        assert(result2(49)(1) === 199)
-      }
+  test("nested data - array of struct") {
+    val data = (1 to 10).map(i => Tuple1(Seq(i -> s"val_$i")))
+    withParquetTable(data, "t") {
+      checkAnswer(sql("SELECT _1[0]._2 FROM t"), data.map {
+        case Tuple1(Seq((_, string))) => Row(string)
+      })
     }
-    for(myval <- Seq("myint", "mylong")) {
-      val query3 = sql(s"SELECT * FROM testfiltersource WHERE $myval > 190 OR $myval < 10")
-      assert(
-        query3.queryExecution.executedPlan(0)(0).isInstanceOf[ParquetTableScan],
-        "Top operator should be ParquetTableScan after pushdown")
-      val result3 = query3.collect()
-      assert(result3.size === 20)
-      assert(result3(0)(1) === 0)
-      assert(result3(9)(1) === 9)
-      assert(result3(10)(1) === 191)
-      assert(result3(19)(1) === 200)
-    }
-    for(myval <- Seq("mydouble", "myfloat")) {
-      val result4 =
-        if (myval == "mydouble") {
-          val query4 = sql(s"SELECT * FROM testfiltersource WHERE $myval > 190.5 OR $myval < 10.0")
-          assert(
-            query4.queryExecution.executedPlan(0)(0).isInstanceOf[ParquetTableScan],
-            "Top operator should be ParquetTableScan after pushdown")
-          query4.collect()
-        } else {
-          // CASTs are problematic. Here myfloat will be casted to a double and it seems there is
-          // currently no way to specify float constants in SqlParser?
-          sql(s"SELECT * FROM testfiltersource WHERE $myval > 190.5 OR $myval < 10").collect()
-        }
-      assert(result4.size === 20)
-      assert(result4(0)(1) === 0)
-      assert(result4(9)(1) === 9)
-      assert(result4(10)(1) === 191)
-      assert(result4(19)(1) === 200)
-    }
-    val query5 = sql(s"SELECT * FROM testfiltersource WHERE myboolean = true AND myint < 40")
-    assert(
-      query5.queryExecution.executedPlan(0)(0).isInstanceOf[ParquetTableScan],
-      "Top operator should be ParquetTableScan after pushdown")
-    val booleanResult = query5.collect()
-    assert(booleanResult.size === 10)
-    for(i <- 0 until 10) {
-      if (!booleanResult(i).getBoolean(0)) {
-        fail(s"Boolean value in result row $i not true")
-      }
-      if (booleanResult(i).getInt(1) != i * 4) {
-        fail(s"Int value in result row $i should be ${4*i}")
-      }
-    }
-    val query6 = sql("SELECT * FROM testfiltersource WHERE mystring = \"100\"")
-    assert(
-      query6.queryExecution.executedPlan(0)(0).isInstanceOf[ParquetTableScan],
-      "Top operator should be ParquetTableScan after pushdown")
-    val stringResult = query6.collect()
-    assert(stringResult.size === 1)
-    assert(stringResult(0).getString(2) == "100", "stringvalue incorrect")
-    assert(stringResult(0).getInt(1) === 100)
   }
 
   test("SPARK-1913 regression: columns only referenced by pushed down filters should remain") {
-    val query = sql(s"SELECT mystring FROM testfiltersource WHERE myint < 10")
-    assert(query.collect().size === 10)
-  }
-
-  test("Importing nested Parquet file (Addressbook)") {
-    val result = TestSQLContext
-      .parquetFile(ParquetTestData.testNestedDir1.toString)
-      .toSchemaRDD
-      .collect()
-    assert(result != null)
-    assert(result.size === 2)
-    val first_record = result(0)
-    val second_record = result(1)
-    assert(first_record != null)
-    assert(second_record != null)
-    assert(first_record.size === 3)
-    assert(second_record(1) === null)
-    assert(second_record(2) === null)
-    assert(second_record(0) === "A. Nonymous")
-    assert(first_record(0) === "Julien Le Dem")
-    val first_owner_numbers = first_record(1)
-      .asInstanceOf[CatalystConverter.ArrayScalaType[_]]
-    val first_contacts = first_record(2)
-      .asInstanceOf[CatalystConverter.ArrayScalaType[_]]
-    assert(first_owner_numbers != null)
-    assert(first_owner_numbers(0) === "555 123 4567")
-    assert(first_owner_numbers(2) === "XXX XXX XXXX")
-    assert(first_contacts(0)
-      .asInstanceOf[CatalystConverter.StructScalaType[_]].size === 2)
-    val first_contacts_entry_one = first_contacts(0)
-      .asInstanceOf[CatalystConverter.StructScalaType[_]]
-    assert(first_contacts_entry_one(0) === "Dmitriy Ryaboy")
-    assert(first_contacts_entry_one(1) === "555 987 6543")
-    val first_contacts_entry_two = first_contacts(1)
-      .asInstanceOf[CatalystConverter.StructScalaType[_]]
-    assert(first_contacts_entry_two(0) === "Chris Aniszczyk")
-  }
-
-  test("Importing nested Parquet file (nested numbers)") {
-    val result = TestSQLContext
-      .parquetFile(ParquetTestData.testNestedDir2.toString)
-      .toSchemaRDD
-      .collect()
-    assert(result.size === 1, "number of top-level rows incorrect")
-    assert(result(0).size === 5, "number of fields in row incorrect")
-    assert(result(0)(0) === 1)
-    assert(result(0)(1) === 7)
-    val subresult1 = result(0)(2).asInstanceOf[CatalystConverter.ArrayScalaType[_]]
-    assert(subresult1.size === 3)
-    assert(subresult1(0) === (1.toLong << 32))
-    assert(subresult1(1) === (1.toLong << 33))
-    assert(subresult1(2) === (1.toLong << 34))
-    val subresult2 = result(0)(3)
-      .asInstanceOf[CatalystConverter.ArrayScalaType[_]](0)
-      .asInstanceOf[CatalystConverter.StructScalaType[_]]
-    assert(subresult2.size === 2)
-    assert(subresult2(0) === 2.5)
-    assert(subresult2(1) === false)
-    val subresult3 = result(0)(4)
-      .asInstanceOf[CatalystConverter.ArrayScalaType[_]]
-    assert(subresult3.size === 2)
-    assert(subresult3(0).asInstanceOf[CatalystConverter.ArrayScalaType[_]].size === 2)
-    val subresult4 = subresult3(0).asInstanceOf[CatalystConverter.ArrayScalaType[_]]
-    assert(subresult4(0).asInstanceOf[CatalystConverter.ArrayScalaType[_]](0) === 7)
-    assert(subresult4(1).asInstanceOf[CatalystConverter.ArrayScalaType[_]](0) === 8)
-    assert(subresult3(1).asInstanceOf[CatalystConverter.ArrayScalaType[_]].size === 1)
-    assert(subresult3(1).asInstanceOf[CatalystConverter.ArrayScalaType[_]](0)
-      .asInstanceOf[CatalystConverter.ArrayScalaType[_]](0) === 9)
-  }
-
-  test("Simple query on addressbook") {
-    val data = TestSQLContext
-      .parquetFile(ParquetTestData.testNestedDir1.toString)
-      .toSchemaRDD
-    val tmp = data.where('owner === "Julien Le Dem").select('owner as 'a, 'contacts as 'c).collect()
-    assert(tmp.size === 1)
-    assert(tmp(0)(0) === "Julien Le Dem")
-  }
-
-  test("Projection in addressbook") {
-    val data = parquetFile(ParquetTestData.testNestedDir1.toString).toSchemaRDD
-    data.registerTempTable("data")
-    val query = sql("SELECT owner, contacts[1].name FROM data")
-    val tmp = query.collect()
-    assert(tmp.size === 2)
-    assert(tmp(0).size === 2)
-    assert(tmp(0)(0) === "Julien Le Dem")
-    assert(tmp(0)(1) === "Chris Aniszczyk")
-    assert(tmp(1)(0) === "A. Nonymous")
-    assert(tmp(1)(1) === null)
-  }
-
-  test("Simple query on nested int data") {
-    val data = parquetFile(ParquetTestData.testNestedDir2.toString).toSchemaRDD
-    data.registerTempTable("data")
-    val result1 = sql("SELECT entries[0].value FROM data").collect()
-    assert(result1.size === 1)
-    assert(result1(0).size === 1)
-    assert(result1(0)(0) === 2.5)
-    val result2 = sql("SELECT entries[0] FROM data").collect()
-    assert(result2.size === 1)
-    val subresult1 = result2(0)(0).asInstanceOf[CatalystConverter.StructScalaType[_]]
-    assert(subresult1.size === 2)
-    assert(subresult1(0) === 2.5)
-    assert(subresult1(1) === false)
-    val result3 = sql("SELECT outerouter FROM data").collect()
-    val subresult2 = result3(0)(0)
-      .asInstanceOf[CatalystConverter.ArrayScalaType[_]](0)
-      .asInstanceOf[CatalystConverter.ArrayScalaType[_]]
-    assert(subresult2(0).asInstanceOf[CatalystConverter.ArrayScalaType[_]](0) === 7)
-    assert(subresult2(1).asInstanceOf[CatalystConverter.ArrayScalaType[_]](0) === 8)
-    assert(result3(0)(0)
-      .asInstanceOf[CatalystConverter.ArrayScalaType[_]](1)
-      .asInstanceOf[CatalystConverter.ArrayScalaType[_]](0)
-      .asInstanceOf[CatalystConverter.ArrayScalaType[_]](0) === 9)
-  }
-
-  test("nested structs") {
-    val data = parquetFile(ParquetTestData.testNestedDir3.toString)
-      .toSchemaRDD
-    data.registerTempTable("data")
-    val result1 = sql("SELECT booleanNumberPairs[0].value[0].truth FROM data").collect()
-    assert(result1.size === 1)
-    assert(result1(0).size === 1)
-    assert(result1(0)(0) === false)
-    val result2 = sql("SELECT booleanNumberPairs[0].value[1].truth FROM data").collect()
-    assert(result2.size === 1)
-    assert(result2(0).size === 1)
-    assert(result2(0)(0) === true)
-    val result3 = sql("SELECT booleanNumberPairs[1].value[0].truth FROM data").collect()
-    assert(result3.size === 1)
-    assert(result3(0).size === 1)
-    assert(result3(0)(0) === false)
-  }
-
-  test("simple map") {
-    val data = TestSQLContext
-      .parquetFile(ParquetTestData.testNestedDir4.toString)
-      .toSchemaRDD
-    data.registerTempTable("mapTable")
-    val result1 = sql("SELECT data1 FROM mapTable").collect()
-    assert(result1.size === 1)
-    assert(result1(0)(0)
-      .asInstanceOf[CatalystConverter.MapScalaType[String, _]]
-      .getOrElse("key1", 0) === 1)
-    assert(result1(0)(0)
-      .asInstanceOf[CatalystConverter.MapScalaType[String, _]]
-      .getOrElse("key2", 0) === 2)
-    val result2 = sql("""SELECT data1["key1"] FROM mapTable""").collect()
-    assert(result2(0)(0) === 1)
-  }
-
-  test("map with struct values") {
-    val data = parquetFile(ParquetTestData.testNestedDir4.toString).toSchemaRDD
-    data.registerTempTable("mapTable")
-    val result1 = sql("SELECT data2 FROM mapTable").collect()
-    assert(result1.size === 1)
-    val entry1 = result1(0)(0)
-      .asInstanceOf[CatalystConverter.MapScalaType[String, CatalystConverter.StructScalaType[_]]]
-      .getOrElse("seven", null)
-    assert(entry1 != null)
-    assert(entry1(0) === 42)
-    assert(entry1(1) === "the answer")
-    val entry2 = result1(0)(0)
-      .asInstanceOf[CatalystConverter.MapScalaType[String, CatalystConverter.StructScalaType[_]]]
-      .getOrElse("eight", null)
-    assert(entry2 != null)
-    assert(entry2(0) === 49)
-    assert(entry2(1) === null)
-    val result2 = sql("""SELECT data2["seven"].payload1, data2["seven"].payload2 FROM mapTable""").collect()
-    assert(result2.size === 1)
-    assert(result2(0)(0) === 42.toLong)
-    assert(result2(0)(1) === "the answer")
-  }
-
-  test("Writing out Addressbook and reading it back in") {
-    // TODO: find out why CatalystConverter.ARRAY_ELEMENTS_SCHEMA_NAME
-    // has no effect in this test case
-    val tmpdir = Utils.createTempDir()
-    Utils.deleteRecursively(tmpdir)
-    val result = parquetFile(ParquetTestData.testNestedDir1.toString).toSchemaRDD
-    result.saveAsParquetFile(tmpdir.toString)
-    parquetFile(tmpdir.toString)
-      .toSchemaRDD
-      .registerTempTable("tmpcopy")
-    val tmpdata = sql("SELECT owner, contacts[1].name FROM tmpcopy").collect()
-    assert(tmpdata.size === 2)
-    assert(tmpdata(0).size === 2)
-    assert(tmpdata(0)(0) === "Julien Le Dem")
-    assert(tmpdata(0)(1) === "Chris Aniszczyk")
-    assert(tmpdata(1)(0) === "A. Nonymous")
-    assert(tmpdata(1)(1) === null)
-    Utils.deleteRecursively(tmpdir)
-  }
-
-  test("Writing out Map and reading it back in") {
-    val data = parquetFile(ParquetTestData.testNestedDir4.toString).toSchemaRDD
-    val tmpdir = Utils.createTempDir()
-    Utils.deleteRecursively(tmpdir)
-    data.saveAsParquetFile(tmpdir.toString)
-    parquetFile(tmpdir.toString)
-      .toSchemaRDD
-      .registerTempTable("tmpmapcopy")
-    val result1 = sql("""SELECT data1["key2"] FROM tmpmapcopy""").collect()
-    assert(result1.size === 1)
-    assert(result1(0)(0) === 2)
-    val result2 = sql("SELECT data2 FROM tmpmapcopy").collect()
-    assert(result2.size === 1)
-    val entry1 = result2(0)(0)
-      .asInstanceOf[CatalystConverter.MapScalaType[String, CatalystConverter.StructScalaType[_]]]
-      .getOrElse("seven", null)
-    assert(entry1 != null)
-    assert(entry1(0) === 42)
-    assert(entry1(1) === "the answer")
-    val entry2 = result2(0)(0)
-      .asInstanceOf[CatalystConverter.MapScalaType[String, CatalystConverter.StructScalaType[_]]]
-      .getOrElse("eight", null)
-    assert(entry2 != null)
-    assert(entry2(0) === 49)
-    assert(entry2(1) === null)
-    val result3 = sql("""SELECT data2["seven"].payload1, data2["seven"].payload2 FROM tmpmapcopy""").collect()
-    assert(result3.size === 1)
-    assert(result3(0)(0) === 42.toLong)
-    assert(result3(0)(1) === "the answer")
-    Utils.deleteRecursively(tmpdir)
-  }
-
-  test("Querying on empty parquet throws exception (SPARK-3536)") {
-    val tmpdir = Utils.createTempDir()
-    Utils.deleteRecursively(tmpdir)
-    createParquetFile[TestRDDEntry](tmpdir.toString()).registerTempTable("tmpemptytable")
-    val result1 = sql("SELECT * FROM tmpemptytable").collect()
-    assert(result1.size === 0)
-    Utils.deleteRecursively(tmpdir)
+    withParquetTable((1 to 10).map(Tuple1.apply), "t") {
+      checkAnswer(sql(s"SELECT _1 FROM t WHERE _1 < 10"), (1 to 9).map(Row.apply(_)))
+    }
   }
 
-  test("DataType string parser compatibility") {
-    val schema = StructType(List(
-      StructField("c1", IntegerType, false),
-      StructField("c2", BinaryType, false)))
+  test("SPARK-5309 strings stored using dictionary compression in parquet") {
+    withParquetTable((0 until 1000).map(i => ("same", "run_" + i /100, 1)), "t") {
 
-    val fromCaseClassString = ParquetTypesConverter.convertFromString(schema.toString)
-    val fromJson = ParquetTypesConverter.convertFromString(schema.json)
+      checkAnswer(sql(s"SELECT _1, _2, SUM(_3) FROM t GROUP BY _1, _2"),
+        (0 until 10).map(i => Row("same", "run_" + i, 100)))
 
-    (fromCaseClassString, fromJson).zipped.foreach { (a, b) =>
-      assert(a.name == b.name)
-      assert(a.dataType === b.dataType)
+      checkAnswer(sql(s"SELECT _1, _2, SUM(_3) FROM t WHERE _2 = 'run_5' GROUP BY _1, _2"),
+        List(Row("same", "run_5", 100)))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
new file mode 100644
index 0000000000000..5f7f31d395cf7
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.parquet
+
+import scala.reflect.ClassTag
+import scala.reflect.runtime.universe.TypeTag
+
+import org.scalatest.FunSuite
+import parquet.schema.MessageTypeParser
+
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.test.TestSQLContext
+
+class ParquetSchemaSuite extends FunSuite with ParquetTest {
+  val sqlContext = TestSQLContext
+
+  /**
+   * Checks whether the reflected Parquet message type for product type `T` conforms `messageType`.
+   */
+  private def testSchema[T <: Product: ClassTag: TypeTag](
+      testName: String, messageType: String, isThriftDerived: Boolean = false): Unit = {
+    test(testName) {
+      val actual = ParquetTypesConverter.convertFromAttributes(ScalaReflection.attributesFor[T], 
+                                                               isThriftDerived)
+      val expected = MessageTypeParser.parseMessageType(messageType)
+      actual.checkContains(expected)
+      expected.checkContains(actual)
+    }
+  }
+
+  testSchema[(Boolean, Int, Long, Float, Double, Array[Byte])](
+    "basic types",
+    """
+      |message root {
+      |  required boolean _1;
+      |  required int32   _2;
+      |  required int64   _3;
+      |  required float   _4;
+      |  required double  _5;
+      |  optional binary  _6;
+      |}
+    """.stripMargin)
+
+  testSchema[(Byte, Short, Int, Long)](
+    "logical integral types",
+    """
+      |message root {
+      |  required int32 _1 (INT_8);
+      |  required int32 _2 (INT_16);
+      |  required int32 _3 (INT_32);
+      |  required int64 _4 (INT_64);
+      |}
+    """.stripMargin)
+
+  // Currently String is the only supported logical binary type.
+  testSchema[Tuple1[String]](
+    "binary logical types",
+    """
+      |message root {
+      |  optional binary _1 (UTF8);
+      |}
+    """.stripMargin)
+
+  testSchema[Tuple1[Seq[Int]]](
+    "array",
+    """
+      |message root {
+      |  optional group _1 (LIST) {
+      |    repeated int32 array;
+      |  }
+      |}
+    """.stripMargin)
+
+  testSchema[Tuple1[Map[Int, String]]](
+    "map",
+    """
+      |message root {
+      |  optional group _1 (MAP) {
+      |    repeated group map (MAP_KEY_VALUE) {
+      |      required int32 key;
+      |      optional binary value (UTF8);
+      |    }
+      |  }
+      |}
+    """.stripMargin)
+
+  testSchema[Tuple1[Pair[Int, String]]](
+    "struct",
+    """
+      |message root {
+      |  optional group _1 {
+      |    required int32 _1;
+      |    optional binary _2 (UTF8);
+      |  }
+      |}
+    """.stripMargin)
+
+  testSchema[Tuple1[Map[Int, (String, Seq[(Int, Double)])]]](
+    "deeply nested type",
+    """
+      |message root {
+      |  optional group _1 (MAP) {
+      |    repeated group map (MAP_KEY_VALUE) {
+      |      required int32 key;
+      |      optional group value {
+      |        optional binary _1 (UTF8);
+      |        optional group _2 (LIST) {
+      |          repeated group bag {
+      |            optional group array {
+      |              required int32 _1;
+      |              required double _2;
+      |            }
+      |          }
+      |        }
+      |      }
+      |    }
+      |  }
+      |}
+    """.stripMargin)
+
+  testSchema[(Option[Int], Map[Int, Option[Double]])](
+    "optional types",
+    """
+      |message root {
+      |  optional int32 _1;
+      |  optional group _2 (MAP) {
+      |    repeated group map (MAP_KEY_VALUE) {
+      |      required int32 key;
+      |      optional double value;
+      |    }
+      |  }
+      |}
+    """.stripMargin)
+
+  // Test for SPARK-4520 -- ensure that thrift generated parquet schema is generated
+  // as expected from attributes
+  testSchema[(Array[Byte], Array[Byte], Array[Byte], Seq[Int], Map[Array[Byte], Seq[Int]])](
+    "thrift generated parquet schema",
+    """
+      |message root {
+      |  optional binary _1 (UTF8);
+      |  optional binary _2 (UTF8);
+      |  optional binary _3 (UTF8);
+      |  optional group _4 (LIST) {
+      |    repeated int32 _4_tuple;
+      |  }
+      |  optional group _5 (MAP) {
+      |    repeated group map (MAP_KEY_VALUE) {
+      |      required binary key (UTF8);
+      |      optional group value (LIST) {
+      |        repeated int32 value_tuple;
+      |      }
+      |    }
+      |  }
+      |}
+    """.stripMargin, isThriftDerived = true)
+
+  test("DataType string parser compatibility") {
+    // This is the generated string from previous versions of the Spark SQL, using the following:
+    // val schema = StructType(List(
+    //  StructField("c1", IntegerType, false),
+    //  StructField("c2", BinaryType, true)))
+    val caseClassString =
+      "StructType(List(StructField(c1,IntegerType,false), StructField(c2,BinaryType,true)))"
+
+    val jsonString =
+      """
+        |{"type":"struct","fields":[{"name":"c1","type":"integer","nullable":false,"metadata":{}},{"name":"c2","type":"binary","nullable":true,"metadata":{}}]}
+      """.stripMargin
+
+    val fromCaseClassString = ParquetTypesConverter.convertFromString(caseClassString)
+    val fromJson = ParquetTypesConverter.convertFromString(jsonString)
+
+    (fromCaseClassString, fromJson).zipped.foreach { (a, b) =>
+      assert(a.name == b.name)
+      assert(a.dataType === b.dataType)
+      assert(a.nullable === b.nullable)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
new file mode 100644
index 0000000000000..b02389978b625
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import java.io.File
+
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.sql.catalyst.util
+import org.apache.spark.util.Utils
+
+class CreateTableAsSelectSuite extends DataSourceTest with BeforeAndAfterAll {
+
+  import caseInsensisitiveContext._
+
+  var path: File = null
+
+  override def beforeAll(): Unit = {
+    path = util.getTempFilePath("jsonCTAS").getCanonicalFile
+    val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
+    jsonRDD(rdd).registerTempTable("jt")
+  }
+
+  override def afterAll(): Unit = {
+    dropTempTable("jt")
+  }
+
+  after {
+    if (path.exists()) Utils.deleteRecursively(path)
+  }
+
+  test("CREATE TEMPORARY TABLE AS SELECT") {
+    sql(
+      s"""
+        |CREATE TEMPORARY TABLE jsonTable
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${path.toString}'
+        |) AS
+        |SELECT a, b FROM jt
+      """.stripMargin)
+
+    checkAnswer(
+      sql("SELECT a, b FROM jsonTable"),
+      sql("SELECT a, b FROM jt").collect())
+
+    dropTempTable("jsonTable")
+  }
+
+  test("create a table, drop it and create another one with the same name") {
+    sql(
+      s"""
+        |CREATE TEMPORARY TABLE jsonTable
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${path.toString}'
+        |) AS
+        |SELECT a, b FROM jt
+      """.stripMargin)
+
+    checkAnswer(
+      sql("SELECT a, b FROM jsonTable"),
+      sql("SELECT a, b FROM jt").collect())
+
+    dropTempTable("jsonTable")
+
+    val message = intercept[RuntimeException]{
+      sql(
+        s"""
+        |CREATE TEMPORARY TABLE jsonTable
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${path.toString}'
+        |) AS
+        |SELECT a * 4 FROM jt
+      """.stripMargin)
+    }.getMessage
+    assert(
+      message.contains(s"path ${path.toString} already exists."),
+      "CREATE TEMPORARY TABLE IF NOT EXISTS should not be allowed.")
+
+    // Explicitly delete it.
+    if (path.exists()) Utils.deleteRecursively(path)
+
+    sql(
+      s"""
+        |CREATE TEMPORARY TABLE jsonTable
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${path.toString}'
+        |) AS
+        |SELECT a * 4 FROM jt
+      """.stripMargin)
+
+    checkAnswer(
+      sql("SELECT * FROM jsonTable"),
+      sql("SELECT a * 4 FROM jt").collect())
+
+    dropTempTable("jsonTable")
+  }
+
+  test("CREATE TEMPORARY TABLE AS SELECT with IF NOT EXISTS is not allowed") {
+    val message = intercept[DDLException]{
+      sql(
+        s"""
+        |CREATE TEMPORARY TABLE IF NOT EXISTS jsonTable
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${path.toString}'
+        |) AS
+        |SELECT b FROM jt
+      """.stripMargin)
+    }.getMessage
+    assert(
+      message.contains("a CREATE TEMPORARY TABLE statement does not allow IF NOT EXISTS clause."),
+      "CREATE TEMPORARY TABLE IF NOT EXISTS should not be allowed.")
+  }
+
+  test("a CTAS statement with column definitions is not allowed") {
+    intercept[DDLException]{
+      sql(
+        s"""
+        |CREATE TEMPORARY TABLE jsonTable (a int, b string)
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${path.toString}'
+        |) AS
+        |SELECT a, b FROM jt
+      """.stripMargin)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
new file mode 100644
index 0000000000000..9626252e742e5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
@@ -0,0 +1,34 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.sources
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.analysis.Analyzer
+import org.apache.spark.sql.test.TestSQLContext
+import org.scalatest.BeforeAndAfter
+
+abstract class DataSourceTest extends QueryTest with BeforeAndAfter {
+  // Case sensitivity is not configurable yet, but we want to test some edge cases.
+  // TODO: Remove when it is configurable
+  implicit val caseInsensisitiveContext = new SQLContext(TestSQLContext.sparkContext) {
+    @transient
+    override protected[sql] lazy val analyzer: Analyzer =
+      new Analyzer(catalog, functionRegistry, caseSensitive = false)
+  }
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
new file mode 100644
index 0000000000000..390538d35a348
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
@@ -0,0 +1,185 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.sources
+
+import scala.language.existentials
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.types._
+
+
+class FilteredScanSource extends RelationProvider {
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String]): BaseRelation = {
+    SimpleFilteredScan(parameters("from").toInt, parameters("to").toInt)(sqlContext)
+  }
+}
+
+case class SimpleFilteredScan(from: Int, to: Int)(@transient val sqlContext: SQLContext)
+  extends PrunedFilteredScan {
+
+  override def schema =
+    StructType(
+      StructField("a", IntegerType, nullable = false) ::
+      StructField("b", IntegerType, nullable = false) :: Nil)
+
+  override def buildScan(requiredColumns: Array[String], filters: Array[Filter]) = {
+    val rowBuilders = requiredColumns.map {
+      case "a" => (i: Int) => Seq(i)
+      case "b" => (i: Int) => Seq(i * 2)
+    }
+
+    FiltersPushed.list = filters
+
+    val filterFunctions = filters.collect {
+      case EqualTo("a", v) => (a: Int) => a == v
+      case LessThan("a", v: Int) => (a: Int) => a < v
+      case LessThanOrEqual("a", v: Int) => (a: Int) => a <= v
+      case GreaterThan("a", v: Int) => (a: Int) => a > v
+      case GreaterThanOrEqual("a", v: Int) => (a: Int) => a >= v
+      case In("a", values) => (a: Int) => values.map(_.asInstanceOf[Int]).toSet.contains(a)
+    }
+
+    def eval(a: Int) = !filterFunctions.map(_(a)).contains(false)
+
+    sqlContext.sparkContext.parallelize(from to to).filter(eval).map(i =>
+      Row.fromSeq(rowBuilders.map(_(i)).reduceOption(_ ++ _).getOrElse(Seq.empty)))
+  }
+}
+
+// A hack for better error messages when filter pushdown fails.
+object FiltersPushed {
+  var list: Seq[Filter] = Nil
+}
+
+class FilteredScanSuite extends DataSourceTest {
+
+  import caseInsensisitiveContext._
+
+  before {
+    sql(
+      """
+        |CREATE TEMPORARY TABLE oneToTenFiltered
+        |USING org.apache.spark.sql.sources.FilteredScanSource
+        |OPTIONS (
+        |  from '1',
+        |  to '10'
+        |)
+      """.stripMargin)
+  }
+
+  sqlTest(
+    "SELECT * FROM oneToTenFiltered",
+    (1 to 10).map(i => Row(i, i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT a, b FROM oneToTenFiltered",
+    (1 to 10).map(i => Row(i, i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT b, a FROM oneToTenFiltered",
+    (1 to 10).map(i => Row(i * 2, i)).toSeq)
+
+  sqlTest(
+    "SELECT a FROM oneToTenFiltered",
+    (1 to 10).map(i => Row(i)).toSeq)
+
+  sqlTest(
+    "SELECT b FROM oneToTenFiltered",
+    (1 to 10).map(i => Row(i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT a * 2 FROM oneToTenFiltered",
+    (1 to 10).map(i => Row(i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT A AS b FROM oneToTenFiltered",
+    (1 to 10).map(i => Row(i)).toSeq)
+
+  sqlTest(
+    "SELECT x.b, y.a FROM oneToTenFiltered x JOIN oneToTenFiltered y ON x.a = y.b",
+    (1 to 5).map(i => Row(i * 4, i)).toSeq)
+
+  sqlTest(
+    "SELECT x.a, y.b FROM oneToTenFiltered x JOIN oneToTenFiltered y ON x.a = y.b",
+    (2 to 10 by 2).map(i => Row(i, i)).toSeq)
+
+  sqlTest(
+    "SELECT * FROM oneToTenFiltered WHERE a = 1",
+    Seq(1).map(i => Row(i, i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT * FROM oneToTenFiltered WHERE a IN (1,3,5)",
+    Seq(1,3,5).map(i => Row(i, i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT * FROM oneToTenFiltered WHERE A = 1",
+    Seq(1).map(i => Row(i, i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT * FROM oneToTenFiltered WHERE b = 2",
+    Seq(1).map(i => Row(i, i * 2)).toSeq)
+
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE A = 1", 1)
+  testPushDown("SELECT a FROM oneToTenFiltered WHERE A = 1", 1)
+  testPushDown("SELECT b FROM oneToTenFiltered WHERE A = 1", 1)
+  testPushDown("SELECT a, b FROM oneToTenFiltered WHERE A = 1", 1)
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE a = 1", 1)
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE 1 = a", 1)
+
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE a > 1", 9)
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE a >= 2", 9)
+
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE 1 < a", 9)
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE 2 <= a", 9)
+
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE 1 > a", 0)
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE 2 >= a", 2)
+
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE a < 1", 0)
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE a <= 2", 2)
+
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE a > 1 AND a < 10", 8)
+
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE a IN (1,3,5)", 3)
+
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE a = 20", 0)
+  testPushDown("SELECT * FROM oneToTenFiltered WHERE b = 1", 10)
+
+  def testPushDown(sqlString: String, expectedCount: Int): Unit = {
+    test(s"PushDown Returns $expectedCount: $sqlString") {
+      val queryExecution = sql(sqlString).queryExecution
+      val rawPlan = queryExecution.executedPlan.collect {
+        case p: execution.PhysicalRDD => p
+      } match {
+        case Seq(p) => p
+        case _ => fail(s"More than one PhysicalRDD found\n$queryExecution")
+      }
+      val rawCount = rawPlan.execute().count()
+
+      if (rawCount != expectedCount) {
+        fail(
+          s"Wrong # of results for pushed filter. Got $rawCount, Expected $expectedCount\n" +
+            s"Filters pushed: ${FiltersPushed.list.mkString(",")}\n" +
+            queryExecution)
+      }
+    }
+  }
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertIntoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertIntoSuite.scala
new file mode 100644
index 0000000000000..f91cea6a37060
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertIntoSuite.scala
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import java.io.File
+
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.util
+import org.apache.spark.util.Utils
+
+class InsertIntoSuite extends DataSourceTest with BeforeAndAfterAll {
+
+  import caseInsensisitiveContext._
+
+  var path: File = null
+
+  override def beforeAll: Unit = {
+    path = util.getTempFilePath("jsonCTAS").getCanonicalFile
+    val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
+    jsonRDD(rdd).registerTempTable("jt")
+    sql(
+      s"""
+        |CREATE TEMPORARY TABLE jsonTable (a int, b string)
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${path.toString}'
+        |)
+      """.stripMargin)
+  }
+
+  override def afterAll: Unit = {
+    dropTempTable("jsonTable")
+    dropTempTable("jt")
+    if (path.exists()) Utils.deleteRecursively(path)
+  }
+
+  test("Simple INSERT OVERWRITE a JSONRelation") {
+    sql(
+      s"""
+        |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt
+      """.stripMargin)
+
+    checkAnswer(
+      sql("SELECT a, b FROM jsonTable"),
+      (1 to 10).map(i => Row(i, s"str$i"))
+    )
+  }
+
+  test("INSERT OVERWRITE a JSONRelation multiple times") {
+    sql(
+      s"""
+        |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt
+      """.stripMargin)
+
+    sql(
+      s"""
+        |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt
+      """.stripMargin)
+
+    sql(
+      s"""
+        |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt
+      """.stripMargin)
+
+    checkAnswer(
+      sql("SELECT a, b FROM jsonTable"),
+      (1 to 10).map(i => Row(i, s"str$i"))
+    )
+  }
+
+  test("INSERT INTO not supported for JSONRelation for now") {
+    intercept[RuntimeException]{
+      sql(
+        s"""
+        |INSERT INTO TABLE jsonTable SELECT a, b FROM jt
+      """.stripMargin)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala
new file mode 100644
index 0000000000000..a33cf1172cac9
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala
@@ -0,0 +1,140 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.sources
+
+import scala.language.existentials
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.types._
+
+class PrunedScanSource extends RelationProvider {
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String]): BaseRelation = {
+    SimplePrunedScan(parameters("from").toInt, parameters("to").toInt)(sqlContext)
+  }
+}
+
+case class SimplePrunedScan(from: Int, to: Int)(@transient val sqlContext: SQLContext)
+  extends PrunedScan {
+
+  override def schema =
+    StructType(
+      StructField("a", IntegerType, nullable = false) ::
+      StructField("b", IntegerType, nullable = false) :: Nil)
+
+  override def buildScan(requiredColumns: Array[String]) = {
+    val rowBuilders = requiredColumns.map {
+      case "a" => (i: Int) => Seq(i)
+      case "b" => (i: Int) => Seq(i * 2)
+    }
+
+    sqlContext.sparkContext.parallelize(from to to).map(i =>
+      Row.fromSeq(rowBuilders.map(_(i)).reduceOption(_ ++ _).getOrElse(Seq.empty)))
+  }
+}
+
+class PrunedScanSuite extends DataSourceTest {
+  import caseInsensisitiveContext._
+
+  before {
+    sql(
+      """
+        |CREATE TEMPORARY TABLE oneToTenPruned
+        |USING org.apache.spark.sql.sources.PrunedScanSource
+        |OPTIONS (
+        |  from '1',
+        |  to '10'
+        |)
+      """.stripMargin)
+  }
+
+  sqlTest(
+    "SELECT * FROM oneToTenPruned",
+    (1 to 10).map(i => Row(i, i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT a, b FROM oneToTenPruned",
+    (1 to 10).map(i => Row(i, i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT b, a FROM oneToTenPruned",
+    (1 to 10).map(i => Row(i * 2, i)).toSeq)
+
+  sqlTest(
+    "SELECT a FROM oneToTenPruned",
+    (1 to 10).map(i => Row(i)).toSeq)
+
+  sqlTest(
+    "SELECT a, a FROM oneToTenPruned",
+    (1 to 10).map(i => Row(i, i)).toSeq)
+
+  sqlTest(
+    "SELECT b FROM oneToTenPruned",
+    (1 to 10).map(i => Row(i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT a * 2 FROM oneToTenPruned",
+    (1 to 10).map(i => Row(i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT A AS b FROM oneToTenPruned",
+    (1 to 10).map(i => Row(i)).toSeq)
+
+  sqlTest(
+    "SELECT x.b, y.a FROM oneToTenPruned x JOIN oneToTenPruned y ON x.a = y.b",
+    (1 to 5).map(i => Row(i * 4, i)).toSeq)
+
+  sqlTest(
+    "SELECT x.a, y.b FROM oneToTenPruned x JOIN oneToTenPruned y ON x.a = y.b",
+    (2 to 10 by 2).map(i => Row(i, i)).toSeq)
+
+  testPruning("SELECT * FROM oneToTenPruned", "a", "b")
+  testPruning("SELECT a, b FROM oneToTenPruned", "a", "b")
+  testPruning("SELECT b, a FROM oneToTenPruned", "b", "a")
+  testPruning("SELECT b, b FROM oneToTenPruned", "b")
+  testPruning("SELECT a FROM oneToTenPruned", "a")
+  testPruning("SELECT b FROM oneToTenPruned", "b")
+
+  def testPruning(sqlString: String, expectedColumns: String*): Unit = {
+    test(s"Columns output ${expectedColumns.mkString(",")}: $sqlString") {
+      val queryExecution = sql(sqlString).queryExecution
+      val rawPlan = queryExecution.executedPlan.collect {
+        case p: execution.PhysicalRDD => p
+      } match {
+        case Seq(p) => p
+        case _ => fail(s"More than one PhysicalRDD found\n$queryExecution")
+      }
+      val rawColumns = rawPlan.output.map(_.name)
+      val rawOutput = rawPlan.execute().first()
+
+      if (rawColumns != expectedColumns) {
+        fail(
+          s"Wrong column names. Got $rawColumns, Expected $expectedColumns\n" +
+          s"Filters pushed: ${FiltersPushed.list.mkString(",")}\n" +
+            queryExecution)
+      }
+
+      if (rawOutput.size != expectedColumns.size) {
+        fail(s"Wrong output row. Got $rawOutput\n$queryExecution")
+      }
+    }
+  }
+
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
new file mode 100644
index 0000000000000..fe2f76cc397f5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import java.io.File
+
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.util.Utils
+
+import org.apache.spark.sql.catalyst.util
+
+class SaveLoadSuite extends DataSourceTest with BeforeAndAfterAll {
+
+  import caseInsensisitiveContext._
+
+  var originalDefaultSource: String = null
+
+  var path: File = null
+
+  var df: DataFrame = null
+
+  override def beforeAll(): Unit = {
+    originalDefaultSource = conf.defaultDataSourceName
+    conf.setConf("spark.sql.default.datasource", "org.apache.spark.sql.json")
+
+    path = util.getTempFilePath("datasource").getCanonicalFile
+
+    val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
+    df = jsonRDD(rdd)
+  }
+
+  override def afterAll(): Unit = {
+    conf.setConf("spark.sql.default.datasource", originalDefaultSource)
+  }
+
+  after {
+    if (path.exists()) Utils.deleteRecursively(path)
+  }
+
+  def checkLoad(): Unit = {
+    checkAnswer(load(path.toString), df.collect())
+    checkAnswer(load("org.apache.spark.sql.json", ("path", path.toString)), df.collect())
+  }
+
+  test("save with overwrite and load") {
+    df.save(path.toString)
+    checkLoad
+  }
+
+  test("save with data source and options, and load") {
+    df.save("org.apache.spark.sql.json", ("path", path.toString))
+    checkLoad
+  }
+
+  test("save and save again") {
+    df.save(path.toString)
+
+    val message = intercept[RuntimeException] {
+      df.save(path.toString)
+    }.getMessage
+
+    assert(
+      message.contains("already exists"),
+      "We should complain that the path already exists.")
+
+    if (path.exists()) Utils.deleteRecursively(path)
+
+    df.save(path.toString)
+    checkLoad
+  }
+}
\ No newline at end of file
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
new file mode 100644
index 0000000000000..0a4d4b6342d4f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
@@ -0,0 +1,367 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.sources
+
+import java.sql.{Timestamp, Date}
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.types._
+
+class DefaultSource extends SimpleScanSource
+
+class SimpleScanSource extends RelationProvider {
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String]): BaseRelation = {
+    SimpleScan(parameters("from").toInt, parameters("TO").toInt)(sqlContext)
+  }
+}
+
+case class SimpleScan(from: Int, to: Int)(@transient val sqlContext: SQLContext)
+  extends TableScan {
+
+  override def schema =
+    StructType(StructField("i", IntegerType, nullable = false) :: Nil)
+
+  override def buildScan() = sqlContext.sparkContext.parallelize(from to to).map(Row(_))
+}
+
+class AllDataTypesScanSource extends SchemaRelationProvider {
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String],
+      schema: StructType): BaseRelation = {
+    AllDataTypesScan(parameters("from").toInt, parameters("TO").toInt, schema)(sqlContext)
+  }
+}
+
+case class AllDataTypesScan(
+  from: Int,
+  to: Int,
+  userSpecifiedSchema: StructType)(@transient val sqlContext: SQLContext)
+  extends TableScan {
+
+  override def schema = userSpecifiedSchema
+
+  override def buildScan() = {
+    sqlContext.sparkContext.parallelize(from to to).map { i =>
+      Row(
+        s"str_$i",
+        s"str_$i".getBytes(),
+        i % 2 == 0,
+        i.toByte,
+        i.toShort,
+        i,
+        i.toLong,
+        i.toFloat,
+        i.toDouble,
+        new java.math.BigDecimal(i),
+        new java.math.BigDecimal(i),
+        new Date((i + 1) * 8640000),
+        new Timestamp(20000 + i),
+        s"varchar_$i",
+        Seq(i, i + 1),
+        Seq(Map(s"str_$i" -> Row(i.toLong))),
+        Map(i -> i.toString),
+        Map(Map(s"str_$i" -> i.toFloat) -> Row(i.toLong)),
+        Row(i, i.toString),
+        Row(Seq(s"str_$i", s"str_${i + 1}"), Row(Seq(new Date((i + 2) * 8640000)))))
+    }
+  }
+}
+
+class TableScanSuite extends DataSourceTest {
+  import caseInsensisitiveContext._
+
+  var tableWithSchemaExpected = (1 to 10).map { i =>
+    Row(
+      s"str_$i",
+      s"str_$i",
+      i % 2 == 0,
+      i.toByte,
+      i.toShort,
+      i,
+      i.toLong,
+      i.toFloat,
+      i.toDouble,
+      new java.math.BigDecimal(i),
+      new java.math.BigDecimal(i),
+      new Date((i + 1) * 8640000),
+      new Timestamp(20000 + i),
+      s"varchar_$i",
+      Seq(i, i + 1),
+      Seq(Map(s"str_$i" -> Row(i.toLong))),
+      Map(i -> i.toString),
+      Map(Map(s"str_$i" -> i.toFloat) -> Row(i.toLong)),
+      Row(i, i.toString),
+      Row(Seq(s"str_$i", s"str_${i + 1}"), Row(Seq(new Date((i + 2) * 8640000)))))
+  }.toSeq
+
+  before {
+    sql(
+      """
+        |CREATE TEMPORARY TABLE oneToTen
+        |USING org.apache.spark.sql.sources.SimpleScanSource
+        |OPTIONS (
+        |  From '1',
+        |  To '10'
+        |)
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TEMPORARY TABLE tableWithSchema (
+        |`string$%Field` stRIng,
+        |binaryField binary,
+        |`booleanField` boolean,
+        |ByteField tinyint,
+        |shortField smaLlint,
+        |int_Field iNt,
+        |`longField_:,<>=+/~^` Bigint,
+        |floatField flOat,
+        |doubleField doubLE,
+        |decimalField1 decimal,
+        |decimalField2 decimal(9,2),
+        |dateField dAte,
+        |timestampField tiMestamp,
+        |varcharField varchaR(12),
+        |arrayFieldSimple Array<inT>,
+        |arrayFieldComplex Array<Map<String, Struct<key:bigInt>>>,
+        |mapFieldSimple MAP<iNt, StRing>,
+        |mapFieldComplex Map<Map<stRING, fLOAT>, Struct<key:bigInt>>,
+        |structFieldSimple StRuct<key:INt, Value:STrINg>,
+        |structFieldComplex StRuct<key:Array<String>, Value:struct<`value_(2)`:Array<date>>>
+        |)
+        |USING org.apache.spark.sql.sources.AllDataTypesScanSource
+        |OPTIONS (
+        |  From '1',
+        |  To '10'
+        |)
+      """.stripMargin)
+  }
+
+  sqlTest(
+    "SELECT * FROM oneToTen",
+    (1 to 10).map(Row(_)).toSeq)
+
+  sqlTest(
+    "SELECT i FROM oneToTen",
+    (1 to 10).map(Row(_)).toSeq)
+
+  sqlTest(
+    "SELECT i FROM oneToTen WHERE i < 5",
+    (1 to 4).map(Row(_)).toSeq)
+
+  sqlTest(
+    "SELECT i * 2 FROM oneToTen",
+    (1 to 10).map(i => Row(i * 2)).toSeq)
+
+  sqlTest(
+    "SELECT a.i, b.i FROM oneToTen a JOIN oneToTen b ON a.i = b.i + 1",
+    (2 to 10).map(i => Row(i, i - 1)).toSeq)
+
+  test("Schema and all fields") {
+    val expectedSchema = StructType(
+      StructField("string$%Field", StringType, true) ::
+      StructField("binaryField", BinaryType, true) ::
+      StructField("booleanField", BooleanType, true) ::
+      StructField("ByteField", ByteType, true) ::
+      StructField("shortField", ShortType, true) ::
+      StructField("int_Field", IntegerType, true) ::
+      StructField("longField_:,<>=+/~^", LongType, true) ::
+      StructField("floatField", FloatType, true) ::
+      StructField("doubleField", DoubleType, true) ::
+      StructField("decimalField1", DecimalType.Unlimited, true) ::
+      StructField("decimalField2", DecimalType(9, 2), true) ::
+      StructField("dateField", DateType, true) ::
+      StructField("timestampField", TimestampType, true) ::
+      StructField("varcharField", StringType, true) ::
+      StructField("arrayFieldSimple", ArrayType(IntegerType), true) ::
+      StructField("arrayFieldComplex",
+        ArrayType(
+          MapType(StringType, StructType(StructField("key", LongType, true) :: Nil))), true) ::
+      StructField("mapFieldSimple", MapType(IntegerType, StringType), true) ::
+      StructField("mapFieldComplex",
+        MapType(
+          MapType(StringType, FloatType),
+          StructType(StructField("key", LongType, true) :: Nil)), true) ::
+      StructField("structFieldSimple",
+        StructType(
+          StructField("key", IntegerType, true) ::
+          StructField("Value", StringType, true) ::  Nil), true) ::
+      StructField("structFieldComplex",
+        StructType(
+          StructField("key", ArrayType(StringType), true) ::
+          StructField("Value",
+            StructType(
+              StructField("value_(2)", ArrayType(DateType), true) :: Nil), true) ::  Nil), true) ::
+      Nil
+    )
+
+    assert(expectedSchema == table("tableWithSchema").schema)
+
+    checkAnswer(
+      sql(
+        """SELECT
+          | `string$%Field`,
+          | cast(binaryField as string),
+          | booleanField,
+          | byteField,
+          | shortField,
+          | int_Field,
+          | `longField_:,<>=+/~^`,
+          | floatField,
+          | doubleField,
+          | decimalField1,
+          | decimalField2,
+          | dateField,
+          | timestampField,
+          | varcharField,
+          | arrayFieldSimple,
+          | arrayFieldComplex,
+          | mapFieldSimple,
+          | mapFieldComplex,
+          | structFieldSimple,
+          | structFieldComplex FROM tableWithSchema""".stripMargin),
+      tableWithSchemaExpected
+    )
+  }
+
+  sqlTest(
+    "SELECT count(*) FROM tableWithSchema",
+    Seq(Row(10)))
+
+  sqlTest(
+    "SELECT `string$%Field` FROM tableWithSchema",
+    (1 to 10).map(i => Row(s"str_$i")).toSeq)
+
+  sqlTest(
+    "SELECT int_Field FROM tableWithSchema WHERE int_Field < 5",
+    (1 to 4).map(Row(_)).toSeq)
+
+  sqlTest(
+    "SELECT `longField_:,<>=+/~^` * 2 FROM tableWithSchema",
+    (1 to 10).map(i => Row(i * 2.toLong)).toSeq)
+
+  sqlTest(
+    "SELECT structFieldSimple.key, arrayFieldSimple[1] FROM tableWithSchema a where int_Field=1",
+    Seq(Row(1, 2)))
+
+  sqlTest(
+    "SELECT structFieldComplex.Value.`value_(2)` FROM tableWithSchema",
+    (1 to 10).map(i => Row(Seq(new Date((i + 2) * 8640000)))).toSeq)
+
+  test("Caching")  {
+    // Cached Query Execution
+    cacheTable("oneToTen")
+    assertCached(sql("SELECT * FROM oneToTen"))
+    checkAnswer(
+      sql("SELECT * FROM oneToTen"),
+      (1 to 10).map(Row(_)).toSeq)
+
+    assertCached(sql("SELECT i FROM oneToTen"))
+    checkAnswer(
+      sql("SELECT i FROM oneToTen"),
+      (1 to 10).map(Row(_)).toSeq)
+
+    assertCached(sql("SELECT i FROM oneToTen WHERE i < 5"))
+    checkAnswer(
+      sql("SELECT i FROM oneToTen WHERE i < 5"),
+      (1 to 4).map(Row(_)).toSeq)
+
+    assertCached(sql("SELECT i * 2 FROM oneToTen"))
+    checkAnswer(
+      sql("SELECT i * 2 FROM oneToTen"),
+      (1 to 10).map(i => Row(i * 2)).toSeq)
+
+    assertCached(sql("SELECT a.i, b.i FROM oneToTen a JOIN oneToTen b ON a.i = b.i + 1"), 2)
+    checkAnswer(
+      sql("SELECT a.i, b.i FROM oneToTen a JOIN oneToTen b ON a.i = b.i + 1"),
+      (2 to 10).map(i => Row(i, i - 1)).toSeq)
+
+    // Verify uncaching
+    uncacheTable("oneToTen")
+    assertCached(sql("SELECT * FROM oneToTen"), 0)
+  }
+
+  test("defaultSource") {
+    sql(
+      """
+        |CREATE TEMPORARY TABLE oneToTenDef
+        |USING org.apache.spark.sql.sources
+        |OPTIONS (
+        |  from '1',
+        |  to '10'
+        |)
+      """.stripMargin)
+
+    checkAnswer(
+      sql("SELECT * FROM oneToTenDef"),
+      (1 to 10).map(Row(_)).toSeq)
+  }
+
+  test("exceptions") {
+    // Make sure we do throw correct exception when users use a relation provider that
+    // only implements the RelationProvier or the SchemaRelationProvider.
+    val schemaNotAllowed = intercept[Exception] {
+      sql(
+        """
+          |CREATE TEMPORARY TABLE relationProvierWithSchema (i int)
+          |USING org.apache.spark.sql.sources.SimpleScanSource
+          |OPTIONS (
+          |  From '1',
+          |  To '10'
+          |)
+        """.stripMargin)
+    }
+    assert(schemaNotAllowed.getMessage.contains("does not allow user-specified schemas"))
+
+    val schemaNeeded = intercept[Exception] {
+      sql(
+        """
+          |CREATE TEMPORARY TABLE schemaRelationProvierWithoutSchema
+          |USING org.apache.spark.sql.sources.AllDataTypesScanSource
+          |OPTIONS (
+          |  From '1',
+          |  To '10'
+          |)
+        """.stripMargin)
+    }
+    assert(schemaNeeded.getMessage.contains("A schema needs to be specified when using"))
+  }
+
+  test("SPARK-5196 schema field with comment") {
+    sql(
+      """
+       |CREATE TEMPORARY TABLE student(name string comment "SN", age int comment "SA", grade int)
+       |USING org.apache.spark.sql.sources.AllDataTypesScanSource
+       |OPTIONS (
+       |  from '1',
+       |  to '10'
+       |)
+       """.stripMargin)
+
+       val planned = sql("SELECT * FROM student").queryExecution.executedPlan
+       val comments = planned.schema.fields.map { field =>
+         if (field.metadata.contains("comment")) field.metadata.getString("comment")
+         else "NO_COMMENT"
+       }.mkString(",")
+
+    assert(comments === "SN,SA,NO_COMMENT")
+  }
+}
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 124fc107cb8aa..123a1f629ab1c 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -42,24 +42,16 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.spark-project.hive</groupId>
+      <groupId>${hive.group}</groupId>
       <artifactId>hive-cli</artifactId>
-      <version>${hive.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.spark-project.hive</groupId>
+      <groupId>${hive.group}</groupId>
       <artifactId>hive-jdbc</artifactId>
-      <version>${hive.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.spark-project.hive</groupId>
+      <groupId>${hive.group}</groupId>
       <artifactId>hive-beeline</artifactId>
-      <version>${hive.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
     </dependency>
   </dependencies>
   <build>
@@ -67,15 +59,22 @@
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
       <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-deploy-plugin</artifactId>
-        <configuration>
-          <skip>true</skip>
-        </configuration>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-default-sources</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>add-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>v${hive.version.short}/src/main/scala</source>
+              </sources>
+            </configuration>
+          </execution>
+        </executions>
       </plugin>
     </plugins>
   </build>
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/AbstractSparkSQLDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/AbstractSparkSQLDriver.scala
new file mode 100644
index 0000000000000..59f3a75768082
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/AbstractSparkSQLDriver.scala
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import scala.collection.JavaConversions._
+
+import org.apache.commons.lang.exception.ExceptionUtils
+import org.apache.hadoop.hive.metastore.api.{FieldSchema, Schema}
+import org.apache.hadoop.hive.ql.Driver
+import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse
+
+import org.apache.spark.Logging
+import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes}
+
+private[hive] abstract class AbstractSparkSQLDriver(
+    val context: HiveContext = SparkSQLEnv.hiveContext) extends Driver with Logging {
+
+  private[hive] var tableSchema: Schema = _
+  private[hive] var hiveResponse: Seq[String] = _
+
+  override def init(): Unit = {
+  }
+
+  private def getResultSetSchema(query: context.QueryExecution): Schema = {
+    val analyzed = query.analyzed
+    logDebug(s"Result Schema: ${analyzed.output}")
+    if (analyzed.output.size == 0) {
+      new Schema(new FieldSchema("Response code", "string", "") :: Nil, null)
+    } else {
+      val fieldSchemas = analyzed.output.map { attr =>
+        new FieldSchema(attr.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), "")
+      }
+
+      new Schema(fieldSchemas, null)
+    }
+  }
+
+  override def run(command: String): CommandProcessorResponse = {
+    // TODO unify the error code
+    try {
+      context.sparkContext.setJobDescription(command)
+      val execution = context.executePlan(context.sql(command).logicalPlan)
+      hiveResponse = execution.stringResult()
+      tableSchema = getResultSetSchema(execution)
+      new CommandProcessorResponse(0)
+    } catch {
+      case cause: Throwable =>
+        logError(s"Failed in [$command]", cause)
+        new CommandProcessorResponse(1, ExceptionUtils.getFullStackTrace(cause), null)
+    }
+  }
+
+  override def close(): Int = {
+    hiveResponse = null
+    tableSchema = null
+    0
+  }
+
+  override def getSchema: Schema = tableSchema
+
+  override def destroy() {
+    super.destroy()
+    hiveResponse = null
+    tableSchema = null
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
index 3d468d804622c..6e07df18b0e15 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
@@ -17,18 +17,17 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
-import scala.collection.JavaConversions._
-
 import org.apache.commons.logging.LogFactory
 import org.apache.hadoop.hive.conf.HiveConf
-import org.apache.hadoop.hive.ql.session.SessionState
-import org.apache.hive.service.cli.thrift.ThriftBinaryCLIService
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars
+import org.apache.hive.service.cli.thrift.{ThriftBinaryCLIService, ThriftHttpCLIService}
 import org.apache.hive.service.server.{HiveServer2, ServerOptionsProcessor}
 
 import org.apache.spark.Logging
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.sql.hive.HiveContext
 import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
+import org.apache.spark.scheduler.{SparkListenerApplicationEnd, SparkListener}
 
 /**
  * The main entry point for the Spark SQL port of HiveServer2.  Starts up a `SparkSQLContext` and a
@@ -46,29 +45,17 @@ object HiveThriftServer2 extends Logging {
     val server = new HiveThriftServer2(sqlContext)
     server.init(sqlContext.hiveconf)
     server.start()
+    sqlContext.sparkContext.addSparkListener(new HiveThriftServer2Listener(server))
   }
 
-
   def main(args: Array[String]) {
     val optionsProcessor = new ServerOptionsProcessor("HiveThriftServer2")
-
     if (!optionsProcessor.process(args)) {
       System.exit(-1)
     }
 
-    val ss = new SessionState(new HiveConf(classOf[SessionState]))
-
-    // Set all properties specified via command line.
-    val hiveConf: HiveConf = ss.getConf
-    hiveConf.getAllProperties.toSeq.sortBy(_._1).foreach { case (k, v) =>
-      logDebug(s"HiveConf var: $k=$v")
-    }
-
-    SessionState.start(ss)
-
     logInfo("Starting SparkContext")
     SparkSQLEnv.init()
-    SessionState.start(ss)
 
     Runtime.getRuntime.addShutdownHook(
       new Thread() {
@@ -80,15 +67,26 @@ object HiveThriftServer2 extends Logging {
 
     try {
       val server = new HiveThriftServer2(SparkSQLEnv.hiveContext)
-      server.init(hiveConf)
+      server.init(SparkSQLEnv.hiveContext.hiveconf)
       server.start()
       logInfo("HiveThriftServer2 started")
+      SparkSQLEnv.sparkContext.addSparkListener(new HiveThriftServer2Listener(server))
     } catch {
       case e: Exception =>
         logError("Error starting HiveThriftServer2", e)
         System.exit(-1)
     }
   }
+
+  /**
+   * A inner sparkListener called in sc.stop to clean up the HiveThriftServer2
+   */
+  class HiveThriftServer2Listener(val server: HiveServer2) extends SparkListener {
+    override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = {
+      server.stop()
+    }
+  }
+
 }
 
 private[hive] class HiveThriftServer2(hiveContext: HiveContext)
@@ -100,10 +98,22 @@ private[hive] class HiveThriftServer2(hiveContext: HiveContext)
     setSuperField(this, "cliService", sparkSqlCliService)
     addService(sparkSqlCliService)
 
-    val thriftCliService = new ThriftBinaryCLIService(sparkSqlCliService)
-    setSuperField(this, "thriftCLIService", thriftCliService)
-    addService(thriftCliService)
+    if (isHTTPTransportMode(hiveConf)) {
+      val thriftCliService = new ThriftHttpCLIService(sparkSqlCliService)
+      setSuperField(this, "thriftCLIService", thriftCliService)
+      addService(thriftCliService)
+    } else {
+      val thriftCliService = new ThriftBinaryCLIService(sparkSqlCliService)
+      setSuperField(this, "thriftCLIService", thriftCliService)
+      addService(thriftCliService)
+    }
 
     initCompositeService(hiveConf)
   }
+
+  private def isHTTPTransportMode(hiveConf: HiveConf): Boolean = {
+    val transportMode: String = hiveConf.getVar(ConfVars.HIVE_SERVER2_TRANSPORT_MODE)
+    transportMode.equalsIgnoreCase("http")
+  }
+
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 7ba4564602ecd..bb19ac232fcbe 100755
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -23,6 +23,7 @@ import java.io._
 import java.util.{ArrayList => JArrayList}
 
 import jline.{ConsoleReader, History}
+
 import org.apache.commons.lang.StringUtils
 import org.apache.commons.logging.LogFactory
 import org.apache.hadoop.conf.Configuration
@@ -38,6 +39,7 @@ import org.apache.hadoop.hive.shims.ShimLoader
 import org.apache.thrift.transport.TSocket
 
 import org.apache.spark.Logging
+import org.apache.spark.sql.hive.HiveShim
 
 private[hive] object SparkSQLCLIDriver {
   private var prompt = "spark-sql"
@@ -116,7 +118,7 @@ private[hive] object SparkSQLCLIDriver {
       }
     }
 
-    if (!sessionState.isRemoteMode && !ShimLoader.getHadoopShims.usesJobShell()) {
+    if (!sessionState.isRemoteMode) {
       // Hadoop-20 and above - we need to augment classpath using hiveconf
       // components.
       // See also: code in ExecDriver.java
@@ -258,7 +260,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
     } else {
       var ret = 0
       val hconf = conf.asInstanceOf[HiveConf]
-      val proc: CommandProcessor = CommandProcessorFactory.get(tokens(0), hconf)
+      val proc: CommandProcessor = HiveShim.getCommandProcessor(Array(tokens(0)), hconf)
 
       if (proc != null) {
         if (proc.isInstanceOf[Driver] || proc.isInstanceOf[SetProcessor]) {
@@ -270,8 +272,10 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
           if (sessionState.getIsVerbose) {
             out.println(cmd)
           }
-
           val rc = driver.run(cmd)
+          val end = System.currentTimeMillis()
+          val timeTaken:Double = (end - start) / 1000.0
+
           ret = rc.getResponseCode
           if (ret != 0) {
             console.printError(rc.getErrorMessage())
@@ -307,12 +311,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
             ret = cret
           }
 
-          val end = System.currentTimeMillis()
-          if (end > start) {
-            val timeTaken:Double = (end - start) / 1000.0
-            console.printInfo(s"Time taken: $timeTaken seconds", null)
-          }
-
+          console.printInfo(s"Time taken: $timeTaken seconds", null)
           // Destroy the driver to release all the locks.
           driver.destroy()
         } else {
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
index 42cbf363b274f..499e077d7294a 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
@@ -17,22 +17,24 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
-import scala.collection.JavaConversions._
-
 import java.io.IOException
 import java.util.{List => JList}
 import javax.security.auth.login.LoginException
 
+import scala.collection.JavaConversions._
+
 import org.apache.commons.logging.Log
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.shims.ShimLoader
+import org.apache.hadoop.security.UserGroupInformation
 import org.apache.hive.service.Service.STATE
 import org.apache.hive.service.auth.HiveAuthFactory
-import org.apache.hive.service.cli.CLIService
+import org.apache.hive.service.cli._
 import org.apache.hive.service.{AbstractService, Service, ServiceException}
 
 import org.apache.spark.sql.hive.HiveContext
 import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
+import org.apache.spark.util.Utils
 
 private[hive] class SparkSQLCLIService(hiveContext: HiveContext)
   extends CLIService
@@ -44,19 +46,30 @@ private[hive] class SparkSQLCLIService(hiveContext: HiveContext)
     val sparkSqlSessionManager = new SparkSQLSessionManager(hiveContext)
     setSuperField(this, "sessionManager", sparkSqlSessionManager)
     addService(sparkSqlSessionManager)
+    var sparkServiceUGI: UserGroupInformation = null
 
-    try {
-      HiveAuthFactory.loginFromKeytab(hiveConf)
-      val serverUserName = ShimLoader.getHadoopShims
-        .getShortUserName(ShimLoader.getHadoopShims.getUGIForConf(hiveConf))
-      setSuperField(this, "serverUserName", serverUserName)
-    } catch {
-      case e @ (_: IOException | _: LoginException) =>
-        throw new ServiceException("Unable to login to kerberos with given principal/keytab", e)
+    if (ShimLoader.getHadoopShims.isSecurityEnabled) {
+      try {
+        HiveAuthFactory.loginFromKeytab(hiveConf)
+        sparkServiceUGI = ShimLoader.getHadoopShims.getUGIForConf(hiveConf)
+        HiveThriftServerShim.setServerUserName(sparkServiceUGI, this)
+      } catch {
+        case e @ (_: IOException | _: LoginException) =>
+          throw new ServiceException("Unable to login to kerberos with given principal/keytab", e)
+      }
     }
 
     initCompositeService(hiveConf)
   }
+
+  override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = {
+    getInfoType match {
+      case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL")
+      case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL")
+      case GetInfoType.CLI_DBMS_VER => new GetInfoValue(hiveContext.sparkContext.version)
+      case _ => super.getInfo(sessionHandle, getInfoType)
+    }
+  }
 }
 
 private[thriftserver] trait ReflectedCompositeService { this: AbstractService =>
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
deleted file mode 100644
index a5c457c677564..0000000000000
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive.thriftserver
-
-import scala.collection.JavaConversions._
-
-import java.util.{ArrayList => JArrayList}
-
-import org.apache.commons.lang.exception.ExceptionUtils
-import org.apache.hadoop.hive.metastore.api.{FieldSchema, Schema}
-import org.apache.hadoop.hive.ql.Driver
-import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse
-
-import org.apache.spark.Logging
-import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes}
-
-private[hive] class SparkSQLDriver(val context: HiveContext = SparkSQLEnv.hiveContext)
-  extends Driver with Logging {
-
-  private var tableSchema: Schema = _
-  private var hiveResponse: Seq[String] = _
-
-  override def init(): Unit = {
-  }
-
-  private def getResultSetSchema(query: context.QueryExecution): Schema = {
-    val analyzed = query.analyzed
-    logDebug(s"Result Schema: ${analyzed.output}")
-    if (analyzed.output.size == 0) {
-      new Schema(new FieldSchema("Response code", "string", "") :: Nil, null)
-    } else {
-      val fieldSchemas = analyzed.output.map { attr =>
-        new FieldSchema(attr.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), "")
-      }
-
-      new Schema(fieldSchemas, null)
-    }
-  }
-
-  override def run(command: String): CommandProcessorResponse = {
-    // TODO unify the error code
-    try {
-      val execution = context.executePlan(context.sql(command).logicalPlan)
-      hiveResponse = execution.stringResult()
-      tableSchema = getResultSetSchema(execution)
-      new CommandProcessorResponse(0)
-    } catch {
-      case cause: Throwable =>
-        logError(s"Failed in [$command]", cause)
-        new CommandProcessorResponse(0, ExceptionUtils.getFullStackTrace(cause), null)
-    }
-  }
-
-  override def close(): Int = {
-    hiveResponse = null
-    tableSchema = null
-    0
-  }
-
-  override def getSchema: Schema = tableSchema
-
-  override def getResults(res: JArrayList[String]): Boolean = {
-    if (hiveResponse == null) {
-      false
-    } else {
-      res.addAll(hiveResponse)
-      hiveResponse = null
-      true
-    }
-  }
-
-  override def destroy() {
-    super.destroy()
-    hiveResponse = null
-    tableSchema = null
-  }
-}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
index 2136a2ea63543..158c225159720 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
@@ -17,12 +17,11 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
-import org.apache.hadoop.hive.ql.session.SessionState
+import scala.collection.JavaConversions._
 
-import org.apache.spark.scheduler.{SplitInfo, StatsReportListener}
-import org.apache.spark.Logging
-import org.apache.spark.sql.hive.HiveContext
-import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.scheduler.StatsReportListener
+import org.apache.spark.sql.hive.{HiveShim, HiveContext}
+import org.apache.spark.{Logging, SparkConf, SparkContext}
 
 /** A singleton object for the master program. The slaves should not access this. */
 private[hive] object SparkSQLEnv extends Logging {
@@ -33,18 +32,28 @@ private[hive] object SparkSQLEnv extends Logging {
 
   def init() {
     if (hiveContext == null) {
-      sparkContext = new SparkContext(new SparkConf()
-        .setAppName(s"SparkSQL::${java.net.InetAddress.getLocalHost.getHostName}"))
+      val sparkConf = new SparkConf(loadDefaults = true)
+      val maybeSerializer = sparkConf.getOption("spark.serializer")
+      val maybeKryoReferenceTracking = sparkConf.getOption("spark.kryo.referenceTracking")
 
+      sparkConf
+        .setAppName(s"SparkSQL::${java.net.InetAddress.getLocalHost.getHostName}")
+        .set("spark.sql.hive.version", HiveShim.version)
+        .set(
+          "spark.serializer",
+          maybeSerializer.getOrElse("org.apache.spark.serializer.KryoSerializer"))
+        .set(
+          "spark.kryo.referenceTracking",
+          maybeKryoReferenceTracking.getOrElse("false"))
+
+      sparkContext = new SparkContext(sparkConf)
       sparkContext.addSparkListener(new StatsReportListener())
+      hiveContext = new HiveContext(sparkContext)
 
-      hiveContext = new HiveContext(sparkContext) {
-        @transient override lazy val sessionState = {
-          val state = SessionState.get()
-          setConf(state.getConf.getAllProperties)
-          state
+      if (log.isDebugEnabled) {
+        hiveContext.hiveconf.getAllProperties.toSeq.sorted.foreach { case (k, v) =>
+          logDebug(s"HiveConf var: $k=$v")
         }
-        @transient override lazy val hiveconf = sessionState.getConf
       }
     }
   }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
index 6b3275b4eaf04..89e9ede7261c9 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
@@ -27,11 +27,14 @@ import org.apache.hive.service.cli.session.SessionManager
 import org.apache.spark.sql.hive.HiveContext
 import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
 import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManager
+import org.apache.hive.service.cli.SessionHandle
 
 private[hive] class SparkSQLSessionManager(hiveContext: HiveContext)
   extends SessionManager
   with ReflectedCompositeService {
 
+  private lazy val sparkSqlOperationManager = new SparkSQLOperationManager(hiveContext)
+
   override def init(hiveConf: HiveConf) {
     setSuperField(this, "hiveConf", hiveConf)
 
@@ -40,10 +43,14 @@ private[hive] class SparkSQLSessionManager(hiveContext: HiveContext)
     getAncestorField[Log](this, 3, "LOG").info(
       s"HiveServer2: Async execution pool size $backgroundPoolSize")
 
-    val sparkSqlOperationManager = new SparkSQLOperationManager(hiveContext)
     setSuperField(this, "operationManager", sparkSqlOperationManager)
     addService(sparkSqlOperationManager)
 
     initCompositeService(hiveConf)
   }
+
+  override def closeSession(sessionHandle: SessionHandle) {
+    super.closeSession(sessionHandle)
+    sparkSqlOperationManager.sessionToActivePool -= sessionHandle
+  }
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
index accf61576b804..9c0bf02391e0e 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
@@ -17,24 +17,15 @@
 
 package org.apache.spark.sql.hive.thriftserver.server
 
-import java.sql.Timestamp
 import java.util.{Map => JMap}
+import scala.collection.mutable.Map
 
-import scala.collection.JavaConversions._
-import scala.collection.mutable.{ArrayBuffer, Map}
-import scala.math.{random, round}
-
-import org.apache.hadoop.hive.common.`type`.HiveDecimal
-import org.apache.hadoop.hive.metastore.api.FieldSchema
 import org.apache.hive.service.cli._
 import org.apache.hive.service.cli.operation.{ExecuteStatementOperation, Operation, OperationManager}
 import org.apache.hive.service.cli.session.HiveSession
 import org.apache.spark.Logging
-import org.apache.spark.sql.{Row => SparkRow, SQLConf, SchemaRDD}
-import org.apache.spark.sql.catalyst.plans.logical.SetCommand
-import org.apache.spark.sql.catalyst.types._
-import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes}
-import org.apache.spark.sql.hive.thriftserver.ReflectionUtils
+import org.apache.spark.sql.hive.HiveContext
+import org.apache.spark.sql.hive.thriftserver.{SparkExecuteStatementOperation, ReflectionUtils}
 
 /**
  * Executes queries using Spark SQL, and maintains a list of handles to active queries.
@@ -45,8 +36,7 @@ private[thriftserver] class SparkSQLOperationManager(hiveContext: HiveContext)
   val handleToOperation = ReflectionUtils
     .getSuperField[JMap[OperationHandle, Operation]](this, "handleToOperation")
 
-  // TODO: Currenlty this will grow infinitely, even as sessions expire
-  val sessionToActivePool = Map[HiveSession, String]()
+  val sessionToActivePool = Map[SessionHandle, String]()
 
   override def newExecuteStatementOperation(
       parentSession: HiveSession,
@@ -54,159 +44,9 @@ private[thriftserver] class SparkSQLOperationManager(hiveContext: HiveContext)
       confOverlay: JMap[String, String],
       async: Boolean): ExecuteStatementOperation = synchronized {
 
-    val operation = new ExecuteStatementOperation(parentSession, statement, confOverlay) {
-      private var result: SchemaRDD = _
-      private var iter: Iterator[SparkRow] = _
-      private var dataTypes: Array[DataType] = _
-
-      def close(): Unit = {
-        // RDDs will be cleaned automatically upon garbage collection.
-        logDebug("CLOSING")
-      }
-
-      def getNextRowSet(order: FetchOrientation, maxRowsL: Long): RowSet = {
-        if (!iter.hasNext) {
-          new RowSet()
-        } else {
-          // maxRowsL here typically maps to java.sql.Statement.getFetchSize, which is an int
-          val maxRows = maxRowsL.toInt
-          var curRow = 0
-          var rowSet = new ArrayBuffer[Row](maxRows.min(1024))
-
-          while (curRow < maxRows && iter.hasNext) {
-            val sparkRow = iter.next()
-            val row = new Row()
-            var curCol = 0
-
-            while (curCol < sparkRow.length) {
-              if (sparkRow.isNullAt(curCol)) {
-                addNullColumnValue(sparkRow, row, curCol)
-              } else {
-                addNonNullColumnValue(sparkRow, row, curCol)
-              }
-              curCol += 1
-            }
-            rowSet += row
-            curRow += 1
-          }
-          new RowSet(rowSet, 0)
-        }
-      }
-
-      def addNonNullColumnValue(from: SparkRow, to: Row, ordinal: Int) {
-        dataTypes(ordinal) match {
-          case StringType =>
-            to.addString(from(ordinal).asInstanceOf[String])
-          case IntegerType =>
-            to.addColumnValue(ColumnValue.intValue(from.getInt(ordinal)))
-          case BooleanType =>
-            to.addColumnValue(ColumnValue.booleanValue(from.getBoolean(ordinal)))
-          case DoubleType =>
-            to.addColumnValue(ColumnValue.doubleValue(from.getDouble(ordinal)))
-          case FloatType =>
-            to.addColumnValue(ColumnValue.floatValue(from.getFloat(ordinal)))
-          case DecimalType =>
-            val hiveDecimal = from.get(ordinal).asInstanceOf[BigDecimal].bigDecimal
-            to.addColumnValue(ColumnValue.stringValue(new HiveDecimal(hiveDecimal)))
-          case LongType =>
-            to.addColumnValue(ColumnValue.longValue(from.getLong(ordinal)))
-          case ByteType =>
-            to.addColumnValue(ColumnValue.byteValue(from.getByte(ordinal)))
-          case ShortType =>
-            to.addColumnValue(ColumnValue.shortValue(from.getShort(ordinal)))
-          case TimestampType =>
-            to.addColumnValue(
-              ColumnValue.timestampValue(from.get(ordinal).asInstanceOf[Timestamp]))
-          case BinaryType | _: ArrayType | _: StructType | _: MapType =>
-            val hiveString = result
-              .queryExecution
-              .asInstanceOf[HiveContext#QueryExecution]
-              .toHiveString((from.get(ordinal), dataTypes(ordinal)))
-            to.addColumnValue(ColumnValue.stringValue(hiveString))
-        }
-      }
-
-      def addNullColumnValue(from: SparkRow, to: Row, ordinal: Int) {
-        dataTypes(ordinal) match {
-          case StringType =>
-            to.addString(null)
-          case IntegerType =>
-            to.addColumnValue(ColumnValue.intValue(null))
-          case BooleanType =>
-            to.addColumnValue(ColumnValue.booleanValue(null))
-          case DoubleType =>
-            to.addColumnValue(ColumnValue.doubleValue(null))
-          case FloatType =>
-            to.addColumnValue(ColumnValue.floatValue(null))
-          case DecimalType =>
-            to.addColumnValue(ColumnValue.stringValue(null: HiveDecimal))
-          case LongType =>
-            to.addColumnValue(ColumnValue.longValue(null))
-          case ByteType =>
-            to.addColumnValue(ColumnValue.byteValue(null))
-          case ShortType =>
-            to.addColumnValue(ColumnValue.shortValue(null))
-          case TimestampType =>
-            to.addColumnValue(ColumnValue.timestampValue(null))
-          case BinaryType | _: ArrayType | _: StructType | _: MapType =>
-            to.addColumnValue(ColumnValue.stringValue(null: String))
-        }
-      }
-
-      def getResultSetSchema: TableSchema = {
-        logInfo(s"Result Schema: ${result.queryExecution.analyzed.output}")
-        if (result.queryExecution.analyzed.output.size == 0) {
-          new TableSchema(new FieldSchema("Result", "string", "") :: Nil)
-        } else {
-          val schema = result.queryExecution.analyzed.output.map { attr =>
-            new FieldSchema(attr.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), "")
-          }
-          new TableSchema(schema)
-        }
-      }
-
-      def run(): Unit = {
-        logInfo(s"Running query '$statement'")
-        setState(OperationState.RUNNING)
-        try {
-          result = hiveContext.sql(statement)
-          logDebug(result.queryExecution.toString())
-          result.queryExecution.logical match {
-            case SetCommand(Some((SQLConf.THRIFTSERVER_POOL, Some(value)))) =>
-              sessionToActivePool(parentSession) = value
-              logInfo(s"Setting spark.scheduler.pool=$value for future statements in this session.")
-            case _ =>
-          }
-
-          val groupId = round(random * 1000000).toString
-          hiveContext.sparkContext.setJobGroup(groupId, statement)
-          sessionToActivePool.get(parentSession).foreach { pool =>
-            hiveContext.sparkContext.setLocalProperty("spark.scheduler.pool", pool)
-          }
-          iter = {
-            val resultRdd = result.queryExecution.toRdd
-            val useIncrementalCollect =
-              hiveContext.getConf("spark.sql.thriftServer.incrementalCollect", "false").toBoolean
-            if (useIncrementalCollect) {
-              resultRdd.toLocalIterator
-            } else {
-              resultRdd.collect().iterator
-            }
-          }
-          dataTypes = result.queryExecution.analyzed.output.map(_.dataType).toArray
-          setHasResultSet(true)
-        } catch {
-          // Actually do need to catch Throwable as some failures don't inherit from Exception and
-          // HiveServer will silently swallow them.
-          case e: Throwable =>
-            logError("Error executing query:",e)
-            throw new HiveSQLException(e.toString)
-        }
-        setState(OperationState.FINISHED)
-      }
-    }
-
-   handleToOperation.put(operation.getHandle, operation)
-   operation
+    val operation = new SparkExecuteStatementOperation(parentSession, statement, confOverlay)(
+      hiveContext, sessionToActivePool)
+    handleToOperation.put(operation.getHandle, operation)
+    operation
   }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 8a72e9d2aef57..60953576d0e37 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -18,19 +18,17 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
+import java.io._
+
 import scala.collection.mutable.ArrayBuffer
-import scala.concurrent.ExecutionContext.Implicits.global
 import scala.concurrent.duration._
-import scala.concurrent.{Await, Future, Promise}
+import scala.concurrent.{Await, Promise}
 import scala.sys.process.{Process, ProcessLogger}
 
-import java.io._
-import java.util.concurrent.atomic.AtomicInteger
-
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.scalatest.{BeforeAndAfterAll, FunSuite}
 
-import org.apache.spark.{SparkException, Logging}
+import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.util.getTempFilePath
 
 class CliSuite extends FunSuite with BeforeAndAfterAll with Logging {
@@ -50,32 +48,30 @@ class CliSuite extends FunSuite with BeforeAndAfterAll with Logging {
          |  --master local
          |  --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$jdbcUrl
          |  --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath
+         |  --driver-class-path ${sys.props("java.class.path")}
        """.stripMargin.split("\\s+").toSeq ++ extraArgs
     }
 
-    // AtomicInteger is needed because stderr and stdout of the forked process are handled in
-    // different threads.
-    val next = new AtomicInteger(0)
+    var next = 0
     val foundAllExpectedAnswers = Promise.apply[Unit]()
     val queryStream = new ByteArrayInputStream(queries.mkString("\n").getBytes)
     val buffer = new ArrayBuffer[String]()
+    val lock = new Object
 
-    def captureOutput(source: String)(line: String) {
+    def captureOutput(source: String)(line: String): Unit = lock.synchronized {
       buffer += s"$source> $line"
-      // If we haven't found all expected answers...
-      if (next.get() < expectedAnswers.size) {
-        // If another expected answer is found...
-        if (line.startsWith(expectedAnswers(next.get()))) {
-          // If all expected answers have been found...
-          if (next.incrementAndGet() == expectedAnswers.size) {
-            foundAllExpectedAnswers.trySuccess(())
-          }
+      // If we haven't found all expected answers and another expected answer comes up...
+      if (next < expectedAnswers.size && line.startsWith(expectedAnswers(next))) {
+        next += 1
+        // If all expected answers have been found...
+        if (next == expectedAnswers.size) {
+          foundAllExpectedAnswers.trySuccess(())
         }
       }
     }
 
     // Searching expected output line from both stdout and stderr of the CLI process
-    val process = (Process(command) #< queryStream).run(
+    val process = (Process(command, None) #< queryStream).run(
       ProcessLogger(captureOutput("stdout"), captureOutput("stderr")))
 
     try {
@@ -88,8 +84,8 @@ class CliSuite extends FunSuite with BeforeAndAfterAll with Logging {
            |=======================
            |Spark SQL CLI command line: ${command.mkString(" ")}
            |
-           |Executed query ${next.get()} "${queries(next.get())}",
-           |But failed to capture expected output "${expectedAnswers(next.get())}" within $timeout.
+           |Executed query $next "${queries(next)}",
+           |But failed to capture expected output "${expectedAnswers(next)}" within $timeout.
            |
            |${buffer.mkString("\n")}
            |===========================
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala
index e3b4e45a3d68c..b52a51d11e4ad 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala
@@ -19,9 +19,10 @@ package org.apache.spark.sql.hive.thriftserver
 
 import java.io.File
 import java.net.ServerSocket
-import java.sql.{DriverManager, Statement}
+import java.sql.{Date, DriverManager, Statement}
 import java.util.concurrent.TimeoutException
 
+import scala.collection.JavaConversions._
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
 import scala.concurrent.{Await, Promise}
@@ -30,43 +31,132 @@ import scala.util.Try
 
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hive.jdbc.HiveDriver
+import org.apache.hive.service.auth.PlainSaslHelper
+import org.apache.hive.service.cli.GetInfoType
+import org.apache.hive.service.cli.thrift.TCLIService.Client
+import org.apache.hive.service.cli.thrift._
+import org.apache.thrift.protocol.TBinaryProtocol
+import org.apache.thrift.transport.TSocket
 import org.scalatest.FunSuite
 
 import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.util.getTempFilePath
+import org.apache.spark.sql.hive.HiveShim
 
 /**
  * Tests for the HiveThriftServer2 using JDBC.
+ *
+ * NOTE: SPARK_PREPEND_CLASSES is explicitly disabled in this test suite. Assembly jar must be
+ * rebuilt after changing HiveThriftServer2 related code.
  */
 class HiveThriftServer2Suite extends FunSuite with Logging {
   Class.forName(classOf[HiveDriver].getCanonicalName)
 
-  def startThriftServerWithin(timeout: FiniteDuration = 1.minute)(f: Statement => Unit) {
+  object TestData {
+    def getTestDataFilePath(name: String) = {
+      Thread.currentThread().getContextClassLoader.getResource(s"data/files/$name")
+    }
+
+    val smallKv = getTestDataFilePath("small_kv.txt")
+    val smallKvWithNull = getTestDataFilePath("small_kv_with_null.txt")
+  }
+
+  def randomListeningPort =  {
+    // Let the system to choose a random available port to avoid collision with other parallel
+    // builds.
+    val socket = new ServerSocket(0)
+    val port = socket.getLocalPort
+    socket.close()
+    port
+  }
+
+  def withJdbcStatement(
+      serverStartTimeout: FiniteDuration = 1.minute,
+      httpMode: Boolean = false)(
+      f: Statement => Unit) {
+    val port = randomListeningPort
+
+    startThriftServer(port, serverStartTimeout, httpMode) {
+      val jdbcUri = if (httpMode) {
+        s"jdbc:hive2://${"localhost"}:$port/" +
+          "default?hive.server2.transport.mode=http;hive.server2.thrift.http.path=cliservice"
+      } else {
+        s"jdbc:hive2://${"localhost"}:$port/"
+      }
+
+      val user = System.getProperty("user.name")
+      val connection = DriverManager.getConnection(jdbcUri, user, "")
+      val statement = connection.createStatement()
+
+      try {
+        f(statement)
+      } finally {
+        statement.close()
+        connection.close()
+      }
+    }
+  }
+
+  def withCLIServiceClient(
+      serverStartTimeout: FiniteDuration = 1.minute)(
+      f: ThriftCLIServiceClient => Unit) {
+    val port = randomListeningPort
+
+    startThriftServer(port) {
+      // Transport creation logics below mimics HiveConnection.createBinaryTransport
+      val rawTransport = new TSocket("localhost", port)
+      val user = System.getProperty("user.name")
+      val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport)
+      val protocol = new TBinaryProtocol(transport)
+      val client = new ThriftCLIServiceClient(new Client(protocol))
+
+      transport.open()
+
+      try {
+        f(client)
+      } finally {
+        transport.close()
+      }
+    }
+  }
+
+  def startThriftServer(
+      port: Int,
+      serverStartTimeout: FiniteDuration = 1.minute,
+      httpMode: Boolean = false)(
+      f: => Unit) {
     val startScript = "../../sbin/start-thriftserver.sh".split("/").mkString(File.separator)
     val stopScript = "../../sbin/stop-thriftserver.sh".split("/").mkString(File.separator)
 
     val warehousePath = getTempFilePath("warehouse")
     val metastorePath = getTempFilePath("metastore")
     val metastoreJdbcUri = s"jdbc:derby:;databaseName=$metastorePath;create=true"
-    val listeningHost = "localhost"
-    val listeningPort =  {
-      // Let the system to choose a random available port to avoid collision with other parallel
-      // builds.
-      val socket = new ServerSocket(0)
-      val port = socket.getLocalPort
-      socket.close()
-      port
-    }
 
     val command =
-      s"""$startScript
-         |  --master local
-         |  --hiveconf hive.root.logger=INFO,console
-         |  --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$metastoreJdbcUri
-         |  --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath
-         |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=$listeningHost
-         |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_PORT}=$listeningPort
-       """.stripMargin.split("\\s+").toSeq
+      if (httpMode) {
+          s"""$startScript
+             |  --master local
+             |  --hiveconf hive.root.logger=INFO,console
+             |  --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$metastoreJdbcUri
+             |  --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath
+             |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost
+             |  --hiveconf ${ConfVars.HIVE_SERVER2_TRANSPORT_MODE}=http
+             |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT}=$port
+             |  --driver-class-path ${sys.props("java.class.path")}
+             |  --conf spark.ui.enabled=false
+           """.stripMargin.split("\\s+").toSeq
+      } else {
+          s"""$startScript
+             |  --master local
+             |  --hiveconf hive.root.logger=INFO,console
+             |  --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$metastoreJdbcUri
+             |  --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath
+             |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost
+             |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_PORT}=$port
+             |  --driver-class-path ${sys.props("java.class.path")}
+             |  --conf spark.ui.enabled=false
+           """.stripMargin.split("\\s+").toSeq
+      }
 
     val serverRunning = Promise[Unit]()
     val buffer = new ArrayBuffer[String]()
@@ -77,7 +167,8 @@ class HiveThriftServer2Suite extends FunSuite with Logging {
 
     def captureLogOutput(line: String): Unit = {
       buffer += line
-      if (line.contains("ThriftBinaryCLIService listening on")) {
+      if (line.contains("ThriftBinaryCLIService listening on") ||
+          line.contains("Started ThriftHttpCLIService in http")) {
         serverRunning.success(())
       }
     }
@@ -92,31 +183,22 @@ class HiveThriftServer2Suite extends FunSuite with Logging {
       }
     }
 
-    // Resets SPARK_TESTING to avoid loading Log4J configurations in testing class paths
-    Process(command, None, "SPARK_TESTING" -> "0").run(ProcessLogger(
+    val env = Seq(
+      // Resets SPARK_TESTING to avoid loading Log4J configurations in testing class paths
+      "SPARK_TESTING" -> "0")
+
+    Process(command, None, env: _*).run(ProcessLogger(
       captureThriftServerOutput("stdout"),
       captureThriftServerOutput("stderr")))
 
-    val jdbcUri = s"jdbc:hive2://$listeningHost:$listeningPort/"
-    val user = System.getProperty("user.name")
-
     try {
-      Await.result(serverRunning.future, timeout)
-
-      val connection = DriverManager.getConnection(jdbcUri, user, "")
-      val statement = connection.createStatement()
-
-      try {
-        f(statement)
-      } finally {
-        statement.close()
-        connection.close()
-      }
+      Await.result(serverRunning.future, serverStartTimeout)
+      f
     } catch {
       case cause: Exception =>
         cause match {
           case _: TimeoutException =>
-            logError(s"Failed to start Hive Thrift server within $timeout", cause)
+            logError(s"Failed to start Hive Thrift server within $serverStartTimeout", cause)
           case _ =>
         }
         logError(
@@ -125,8 +207,8 @@ class HiveThriftServer2Suite extends FunSuite with Logging {
              |HiveThriftServer2Suite failure output
              |=====================================
              |HiveThriftServer2 command line: ${command.mkString(" ")}
-             |JDBC URI: $jdbcUri
-             |User: $user
+             |Binding port: $port
+             |System user: ${System.getProperty("user.name")}
              |
              |${buffer.mkString("\n")}
              |=========================================
@@ -137,7 +219,7 @@ class HiveThriftServer2Suite extends FunSuite with Logging {
     } finally {
       warehousePath.delete()
       metastorePath.delete()
-      Process(stopScript).run().exitValue()
+      Process(stopScript, None, env: _*).run().exitValue()
       // The `spark-daemon.sh' script uses kill, which is not synchronous, have to wait for a while.
       Thread.sleep(3.seconds.toMillis)
       Option(logTailingProcess).map(_.destroy())
@@ -146,13 +228,31 @@ class HiveThriftServer2Suite extends FunSuite with Logging {
   }
 
   test("Test JDBC query execution") {
-    startThriftServerWithin() { statement =>
-      val dataFilePath =
-        Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt")
+    withJdbcStatement() { statement =>
+      val queries = Seq(
+        "SET spark.sql.shuffle.partitions=3",
+        "DROP TABLE IF EXISTS test",
+        "CREATE TABLE test(key INT, val STRING)",
+        s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test",
+        "CACHE TABLE test")
+
+      queries.foreach(statement.execute)
+
+      assertResult(5, "Row count mismatch") {
+        val resultSet = statement.executeQuery("SELECT COUNT(*) FROM test")
+        resultSet.next()
+        resultSet.getInt(1)
+      }
+    }
+  }
 
+  test("Test JDBC query execution in Http Mode") {
+    withJdbcStatement(httpMode = true) { statement =>
       val queries = Seq(
+        "SET spark.sql.shuffle.partitions=3",
+        "DROP TABLE IF EXISTS test",
         "CREATE TABLE test(key INT, val STRING)",
-        s"LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE test",
+        s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test",
         "CACHE TABLE test")
 
       queries.foreach(statement.execute)
@@ -166,15 +266,11 @@ class HiveThriftServer2Suite extends FunSuite with Logging {
   }
 
   test("SPARK-3004 regression: result set containing NULL") {
-    startThriftServerWithin() { statement =>
-      val dataFilePath =
-        Thread.currentThread().getContextClassLoader.getResource(
-          "data/files/small_kv_with_null.txt")
-
+    withJdbcStatement() { statement =>
       val queries = Seq(
         "DROP TABLE IF EXISTS test_null",
         "CREATE TABLE test_null(key INT, val STRING)",
-        s"LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE test_null")
+        s"LOAD DATA LOCAL INPATH '${TestData.smallKvWithNull}' OVERWRITE INTO TABLE test_null")
 
       queries.foreach(statement.execute)
 
@@ -189,4 +285,103 @@ class HiveThriftServer2Suite extends FunSuite with Logging {
       assert(!resultSet.next())
     }
   }
+
+  test("GetInfo Thrift API") {
+    withCLIServiceClient() { client =>
+      val user = System.getProperty("user.name")
+      val sessionHandle = client.openSession(user, "")
+
+      assertResult("Spark SQL", "Wrong GetInfo(CLI_DBMS_NAME) result") {
+        client.getInfo(sessionHandle, GetInfoType.CLI_DBMS_NAME).getStringValue
+      }
+
+      assertResult("Spark SQL", "Wrong GetInfo(CLI_SERVER_NAME) result") {
+        client.getInfo(sessionHandle, GetInfoType.CLI_SERVER_NAME).getStringValue
+      }
+
+      assertResult(true, "Spark version shouldn't be \"Unknown\"") {
+        val version = client.getInfo(sessionHandle, GetInfoType.CLI_DBMS_VER).getStringValue
+        logInfo(s"Spark version: $version")
+        version != "Unknown"
+      }
+    }
+  }
+
+  test("Checks Hive version") {
+    withJdbcStatement() { statement =>
+      val resultSet = statement.executeQuery("SET spark.sql.hive.version")
+      resultSet.next()
+      assert(resultSet.getString(1) === s"spark.sql.hive.version=${HiveShim.version}")
+    }
+  }
+
+  test("Checks Hive version in Http Mode") {
+    withJdbcStatement(httpMode = true) { statement =>
+      val resultSet = statement.executeQuery("SET spark.sql.hive.version")
+      resultSet.next()
+      assert(resultSet.getString(1) === s"spark.sql.hive.version=${HiveShim.version}")
+    }
+  }
+
+  test("SPARK-4292 regression: result set iterator issue") {
+    withJdbcStatement() { statement =>
+      val queries = Seq(
+        "DROP TABLE IF EXISTS test_4292",
+        "CREATE TABLE test_4292(key INT, val STRING)",
+        s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_4292")
+
+      queries.foreach(statement.execute)
+
+      val resultSet = statement.executeQuery("SELECT key FROM test_4292")
+
+      Seq(238, 86, 311, 27, 165).foreach { key =>
+        resultSet.next()
+        assert(resultSet.getInt(1) === key)
+      }
+
+      statement.executeQuery("DROP TABLE IF EXISTS test_4292")
+    }
+  }
+
+  test("SPARK-4309 regression: Date type support") {
+    withJdbcStatement() { statement =>
+      val queries = Seq(
+        "DROP TABLE IF EXISTS test_date",
+        "CREATE TABLE test_date(key INT, value STRING)",
+        s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_date")
+
+      queries.foreach(statement.execute)
+
+      assertResult(Date.valueOf("2011-01-01")) {
+        val resultSet = statement.executeQuery(
+          "SELECT CAST('2011-01-01' as date) FROM test_date LIMIT 1")
+        resultSet.next()
+        resultSet.getDate(1)
+      }
+    }
+  }
+
+  test("SPARK-4407 regression: Complex type support") {
+    withJdbcStatement() { statement =>
+      val queries = Seq(
+        "DROP TABLE IF EXISTS test_map",
+        "CREATE TABLE test_map(key INT, value STRING)",
+        s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_map")
+
+      queries.foreach(statement.execute)
+
+      assertResult("""{238:"val_238"}""") {
+        val resultSet = statement.executeQuery("SELECT MAP(key, value) FROM test_map LIMIT 1")
+        resultSet.next()
+        resultSet.getString(1)
+      }
+
+      assertResult("""["238","val_238"]""") {
+        val resultSet = statement.executeQuery(
+          "SELECT ARRAY(CAST(key AS STRING), value) FROM test_map LIMIT 1")
+        resultSet.next()
+        resultSet.getString(1)
+      }
+    }
+  }
 }
diff --git a/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala b/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala
new file mode 100644
index 0000000000000..ea9d61d8d0f5e
--- /dev/null
+++ b/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import java.sql.{Date, Timestamp}
+import java.util.{ArrayList => JArrayList, Map => JMap}
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.{ArrayBuffer, Map => SMap}
+
+import org.apache.hadoop.hive.common.`type`.HiveDecimal
+import org.apache.hadoop.hive.metastore.api.FieldSchema
+import org.apache.hadoop.hive.shims.ShimLoader
+import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hive.service.cli._
+import org.apache.hive.service.cli.operation.ExecuteStatementOperation
+import org.apache.hive.service.cli.session.HiveSession
+
+import org.apache.spark.Logging
+import org.apache.spark.sql.{DataFrame, SQLConf, Row => SparkRow}
+import org.apache.spark.sql.execution.SetCommand
+import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
+import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes}
+import org.apache.spark.sql.types._
+
+/**
+ * A compatibility layer for interacting with Hive version 0.12.0.
+ */
+private[thriftserver] object HiveThriftServerShim {
+  val version = "0.12.0"
+
+  def setServerUserName(sparkServiceUGI: UserGroupInformation, sparkCliService:SparkSQLCLIService) = {
+    val serverUserName = ShimLoader.getHadoopShims.getShortUserName(sparkServiceUGI)
+    setSuperField(sparkCliService, "serverUserName", serverUserName)
+  }
+}
+
+private[hive] class SparkSQLDriver(val _context: HiveContext = SparkSQLEnv.hiveContext)
+  extends AbstractSparkSQLDriver(_context) {
+  override def getResults(res: JArrayList[String]): Boolean = {
+    if (hiveResponse == null) {
+      false
+    } else {
+      res.addAll(hiveResponse)
+      hiveResponse = null
+      true
+    }
+  }
+}
+
+private[hive] class SparkExecuteStatementOperation(
+    parentSession: HiveSession,
+    statement: String,
+    confOverlay: JMap[String, String])(
+    hiveContext: HiveContext,
+    sessionToActivePool: SMap[SessionHandle, String])
+  extends ExecuteStatementOperation(parentSession, statement, confOverlay) with Logging {
+
+  private var result: DataFrame = _
+  private var iter: Iterator[SparkRow] = _
+  private var dataTypes: Array[DataType] = _
+
+  def close(): Unit = {
+    // RDDs will be cleaned automatically upon garbage collection.
+    logDebug("CLOSING")
+  }
+
+  def getNextRowSet(order: FetchOrientation, maxRowsL: Long): RowSet = {
+    if (!iter.hasNext) {
+      new RowSet()
+    } else {
+      // maxRowsL here typically maps to java.sql.Statement.getFetchSize, which is an int
+      val maxRows = maxRowsL.toInt
+      var curRow = 0
+      var rowSet = new ArrayBuffer[Row](maxRows.min(1024))
+
+      while (curRow < maxRows && iter.hasNext) {
+        val sparkRow = iter.next()
+        val row = new Row()
+        var curCol = 0
+
+        while (curCol < sparkRow.length) {
+          if (sparkRow.isNullAt(curCol)) {
+            addNullColumnValue(sparkRow, row, curCol)
+          } else {
+            addNonNullColumnValue(sparkRow, row, curCol)
+          }
+          curCol += 1
+        }
+        rowSet += row
+        curRow += 1
+      }
+      new RowSet(rowSet, 0)
+    }
+  }
+
+  def addNonNullColumnValue(from: SparkRow, to: Row, ordinal: Int) {
+    dataTypes(ordinal) match {
+      case StringType =>
+        to.addString(from(ordinal).asInstanceOf[String])
+      case IntegerType =>
+        to.addColumnValue(ColumnValue.intValue(from.getInt(ordinal)))
+      case BooleanType =>
+        to.addColumnValue(ColumnValue.booleanValue(from.getBoolean(ordinal)))
+      case DoubleType =>
+        to.addColumnValue(ColumnValue.doubleValue(from.getDouble(ordinal)))
+      case FloatType =>
+        to.addColumnValue(ColumnValue.floatValue(from.getFloat(ordinal)))
+      case DecimalType() =>
+        val hiveDecimal = from.getDecimal(ordinal)
+        to.addColumnValue(ColumnValue.stringValue(new HiveDecimal(hiveDecimal)))
+      case LongType =>
+        to.addColumnValue(ColumnValue.longValue(from.getLong(ordinal)))
+      case ByteType =>
+        to.addColumnValue(ColumnValue.byteValue(from.getByte(ordinal)))
+      case ShortType =>
+        to.addColumnValue(ColumnValue.shortValue(from.getShort(ordinal)))
+      case DateType =>
+        to.addColumnValue(ColumnValue.dateValue(from(ordinal).asInstanceOf[Date]))
+      case TimestampType =>
+        to.addColumnValue(
+          ColumnValue.timestampValue(from.get(ordinal).asInstanceOf[Timestamp]))
+      case BinaryType | _: ArrayType | _: StructType | _: MapType =>
+        val hiveString = HiveContext.toHiveString((from.get(ordinal), dataTypes(ordinal)))
+        to.addColumnValue(ColumnValue.stringValue(hiveString))
+    }
+  }
+
+  def addNullColumnValue(from: SparkRow, to: Row, ordinal: Int) {
+    dataTypes(ordinal) match {
+      case StringType =>
+        to.addString(null)
+      case IntegerType =>
+        to.addColumnValue(ColumnValue.intValue(null))
+      case BooleanType =>
+        to.addColumnValue(ColumnValue.booleanValue(null))
+      case DoubleType =>
+        to.addColumnValue(ColumnValue.doubleValue(null))
+      case FloatType =>
+        to.addColumnValue(ColumnValue.floatValue(null))
+      case DecimalType() =>
+        to.addColumnValue(ColumnValue.stringValue(null: HiveDecimal))
+      case LongType =>
+        to.addColumnValue(ColumnValue.longValue(null))
+      case ByteType =>
+        to.addColumnValue(ColumnValue.byteValue(null))
+      case ShortType =>
+        to.addColumnValue(ColumnValue.shortValue(null))
+      case DateType =>
+        to.addColumnValue(ColumnValue.dateValue(null))
+      case TimestampType =>
+        to.addColumnValue(ColumnValue.timestampValue(null))
+      case BinaryType | _: ArrayType | _: StructType | _: MapType =>
+        to.addColumnValue(ColumnValue.stringValue(null: String))
+    }
+  }
+
+  def getResultSetSchema: TableSchema = {
+    logInfo(s"Result Schema: ${result.queryExecution.analyzed.output}")
+    if (result.queryExecution.analyzed.output.size == 0) {
+      new TableSchema(new FieldSchema("Result", "string", "") :: Nil)
+    } else {
+      val schema = result.queryExecution.analyzed.output.map { attr =>
+        new FieldSchema(attr.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), "")
+      }
+      new TableSchema(schema)
+    }
+  }
+
+  def run(): Unit = {
+    logInfo(s"Running query '$statement'")
+    setState(OperationState.RUNNING)
+    try {
+      result = hiveContext.sql(statement)
+      logDebug(result.queryExecution.toString())
+      result.queryExecution.logical match {
+        case SetCommand(Some((SQLConf.THRIFTSERVER_POOL, Some(value))), _) =>
+          sessionToActivePool(parentSession.getSessionHandle) = value
+          logInfo(s"Setting spark.scheduler.pool=$value for future statements in this session.")
+        case _ =>
+      }
+      hiveContext.sparkContext.setJobDescription(statement)
+      sessionToActivePool.get(parentSession.getSessionHandle).foreach { pool =>
+        hiveContext.sparkContext.setLocalProperty("spark.scheduler.pool", pool)
+      }
+      iter = {
+        val useIncrementalCollect =
+          hiveContext.getConf("spark.sql.thriftServer.incrementalCollect", "false").toBoolean
+        if (useIncrementalCollect) {
+          result.rdd.toLocalIterator
+        } else {
+          result.collect().iterator
+        }
+      }
+      dataTypes = result.queryExecution.analyzed.output.map(_.dataType).toArray
+      setHasResultSet(true)
+    } catch {
+      // Actually do need to catch Throwable as some failures don't inherit from Exception and
+      // HiveServer will silently swallow them.
+      case e: Throwable =>
+        setState(OperationState.ERROR)
+        logError("Error executing query:",e)
+        throw new HiveSQLException(e.toString)
+    }
+    setState(OperationState.FINISHED)
+  }
+}
diff --git a/sql/hive-thriftserver/v0.13.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim13.scala b/sql/hive-thriftserver/v0.13.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim13.scala
new file mode 100644
index 0000000000000..71e3954b2c7ac
--- /dev/null
+++ b/sql/hive-thriftserver/v0.13.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim13.scala
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import java.sql.{Date, Timestamp}
+import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.{ArrayBuffer, Map => SMap}
+
+import org.apache.hadoop.hive.metastore.api.FieldSchema
+import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hive.service.cli._
+import org.apache.hive.service.cli.operation.ExecuteStatementOperation
+import org.apache.hive.service.cli.session.HiveSession
+
+import org.apache.spark.Logging
+import org.apache.spark.sql.{DataFrame, Row => SparkRow, SQLConf}
+import org.apache.spark.sql.execution.SetCommand
+import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
+import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes}
+import org.apache.spark.sql.types._
+
+/**
+ * A compatibility layer for interacting with Hive version 0.13.1.
+ */
+private[thriftserver] object HiveThriftServerShim {
+  val version = "0.13.1"
+
+  def setServerUserName(
+      sparkServiceUGI: UserGroupInformation,
+      sparkCliService:SparkSQLCLIService) = {
+    setSuperField(sparkCliService, "serviceUGI", sparkServiceUGI)
+  }
+}
+
+private[hive] class SparkSQLDriver(val _context: HiveContext = SparkSQLEnv.hiveContext)
+  extends AbstractSparkSQLDriver(_context) {
+  override def getResults(res: JList[_]): Boolean = {
+    if (hiveResponse == null) {
+      false
+    } else {
+      res.asInstanceOf[JArrayList[String]].addAll(hiveResponse)
+      hiveResponse = null
+      true
+    }
+  }
+}
+
+private[hive] class SparkExecuteStatementOperation(
+    parentSession: HiveSession,
+    statement: String,
+    confOverlay: JMap[String, String],
+    runInBackground: Boolean = true)(
+    hiveContext: HiveContext,
+    sessionToActivePool: SMap[SessionHandle, String])
+  // NOTE: `runInBackground` is set to `false` intentionally to disable asynchronous execution
+  extends ExecuteStatementOperation(parentSession, statement, confOverlay, false) with Logging {
+
+  private var result: DataFrame = _
+  private var iter: Iterator[SparkRow] = _
+  private var dataTypes: Array[DataType] = _
+
+  def close(): Unit = {
+    // RDDs will be cleaned automatically upon garbage collection.
+    logDebug("CLOSING")
+  }
+
+  def addNonNullColumnValue(from: SparkRow, to: ArrayBuffer[Any],  ordinal: Int) {
+    dataTypes(ordinal) match {
+      case StringType =>
+        to += from.getString(ordinal)
+      case IntegerType =>
+        to += from.getInt(ordinal)
+      case BooleanType =>
+        to += from.getBoolean(ordinal)
+      case DoubleType =>
+        to += from.getDouble(ordinal)
+      case FloatType =>
+        to += from.getFloat(ordinal)
+      case DecimalType() =>
+        to += from.getDecimal(ordinal)
+      case LongType =>
+        to += from.getLong(ordinal)
+      case ByteType =>
+        to += from.getByte(ordinal)
+      case ShortType =>
+        to += from.getShort(ordinal)
+      case DateType =>
+        to += from.getAs[Date](ordinal)
+      case TimestampType =>
+        to +=  from.getAs[Timestamp](ordinal)
+      case BinaryType | _: ArrayType | _: StructType | _: MapType =>
+        val hiveString = HiveContext.toHiveString((from.get(ordinal), dataTypes(ordinal)))
+        to += hiveString
+    }
+  }
+
+  def getNextRowSet(order: FetchOrientation, maxRowsL: Long): RowSet = {
+    validateDefaultFetchOrientation(order)
+    assertState(OperationState.FINISHED)
+    setHasResultSet(true)
+    val resultRowSet: RowSet = RowSetFactory.create(getResultSetSchema, getProtocolVersion)
+    if (!iter.hasNext) {
+      resultRowSet
+    } else {
+      // maxRowsL here typically maps to java.sql.Statement.getFetchSize, which is an int
+      val maxRows = maxRowsL.toInt
+      var curRow = 0
+      while (curRow < maxRows && iter.hasNext) {
+        val sparkRow = iter.next()
+        val row = ArrayBuffer[Any]()
+        var curCol = 0
+        while (curCol < sparkRow.length) {
+          if (sparkRow.isNullAt(curCol)) {
+            row += null
+          } else {
+            addNonNullColumnValue(sparkRow, row, curCol)
+          }
+          curCol += 1
+        }
+        resultRowSet.addRow(row.toArray.asInstanceOf[Array[Object]])
+        curRow += 1
+      }
+      resultRowSet
+    }
+  }
+
+  def getResultSetSchema: TableSchema = {
+    logInfo(s"Result Schema: ${result.queryExecution.analyzed.output}")
+    if (result.queryExecution.analyzed.output.size == 0) {
+      new TableSchema(new FieldSchema("Result", "string", "") :: Nil)
+    } else {
+      val schema = result.queryExecution.analyzed.output.map { attr =>
+        new FieldSchema(attr.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), "")
+      }
+      new TableSchema(schema)
+    }
+  }
+
+  def run(): Unit = {
+    logInfo(s"Running query '$statement'")
+    setState(OperationState.RUNNING)
+    try {
+      result = hiveContext.sql(statement)
+      logDebug(result.queryExecution.toString())
+      result.queryExecution.logical match {
+        case SetCommand(Some((SQLConf.THRIFTSERVER_POOL, Some(value))), _) =>
+          sessionToActivePool(parentSession.getSessionHandle) = value
+          logInfo(s"Setting spark.scheduler.pool=$value for future statements in this session.")
+        case _ =>
+      }
+      hiveContext.sparkContext.setJobDescription(statement)
+      sessionToActivePool.get(parentSession.getSessionHandle).foreach { pool =>
+        hiveContext.sparkContext.setLocalProperty("spark.scheduler.pool", pool)
+      }
+      iter = {
+        val useIncrementalCollect =
+          hiveContext.getConf("spark.sql.thriftServer.incrementalCollect", "false").toBoolean
+        if (useIncrementalCollect) {
+          result.rdd.toLocalIterator
+        } else {
+          result.collect().iterator
+        }
+      }
+      dataTypes = result.queryExecution.analyzed.output.map(_.dataType).toArray
+      setHasResultSet(true)
+    } catch {
+      // Actually do need to catch Throwable as some failures don't inherit from Exception and
+      // HiveServer will silently swallow them.
+      case e: Throwable =>
+        setState(OperationState.ERROR)
+        logError("Error executing query:", e)
+        throw new HiveSQLException(e.toString)
+    }
+    setState(OperationState.FINISHED)
+  }
+}
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 463888551a359..a6266f611c219 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -23,6 +23,7 @@ import java.util.{Locale, TimeZone}
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql.SQLConf
+import org.apache.spark.sql.hive.HiveShim
 import org.apache.spark.sql.hive.test.TestHive
 
 /**
@@ -35,8 +36,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
 
   private val originalTimeZone = TimeZone.getDefault
   private val originalLocale = Locale.getDefault
-  private val originalColumnBatchSize = TestHive.columnBatchSize
-  private val originalInMemoryPartitionPruning = TestHive.inMemoryPartitionPruning
+  private val originalColumnBatchSize = TestHive.conf.columnBatchSize
+  private val originalInMemoryPartitionPruning = TestHive.conf.inMemoryPartitionPruning
 
   def testCases = hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f)
 
@@ -88,7 +89,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "authorization_5",
     "keyword_1",
     "misc_json",
-    "create_like_tbl_props",
     "load_overwrite",
     "alter_table_serde2",
     "alter_table_not_sorted",
@@ -99,16 +99,18 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "protectmode2",
     //"describe_table",
     "describe_comment_nonascii",
-    "udf5",
-    "udf_java_method",
+
     "create_merge_compressed",
+    "create_view",
+    "create_view_partitioned",
+    "database_location",
+    "database_properties",
 
     // DFS commands
     "symlink_text_input_format",
 
     // Weird DDL differences result in failures on jenkins.
     "create_like2",
-    "create_view_translate",
     "partitions_json",
 
     // This test is totally fine except that it includes wrong queries and expects errors, but error
@@ -135,6 +137,9 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "stats20",
     "alter_merge_stats",
     "columnstats.*",
+    "annotate_stats.*",
+    "database_drop",
+    "index_serde",
 
 
     // Hive seems to think 1.0 > NaN = true && 1.0 < NaN = false... which is wrong.
@@ -211,8 +216,23 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "describe_comment_indent",
 
     // Limit clause without a ordering, which causes failure.
-    "orc_predicate_pushdown"
-  )
+    "orc_predicate_pushdown",
+
+    // Requires precision decimal support:
+    "udf_when",
+    "udf_case",
+
+    // Needs constant object inspectors
+    "udf_round",
+
+    // Sort with Limit clause causes failure.
+    "ctas",
+    "ctas_hadoop20",
+
+    // timestamp in array, the output format of Hive contains double quotes, while
+    // Spark SQL doesn't
+    "udf_sort_array"
+  ) ++ HiveShim.compatibilityBlackList
 
   /**
    * The set of tests that are believed to be working in catalyst. Tests not on whiteList or
@@ -220,23 +240,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
    */
   override def whiteList = Seq(
     "add_part_exist",
-    "dynamic_partition_skip_default",
-    "infer_bucket_sort_dyn_part",
-    "load_dyn_part1",
-    "load_dyn_part2",
-    "load_dyn_part3",
-    "load_dyn_part4",
-    "load_dyn_part5",
-    "load_dyn_part6",
-    "load_dyn_part7",
-    "load_dyn_part8",
-    "load_dyn_part9",
-    "load_dyn_part10",
-    "load_dyn_part11",
-    "load_dyn_part12",
-    "load_dyn_part13",
-    "load_dyn_part14",
-    "load_dyn_part14_win",
     "add_part_multiple",
     "add_partition_no_whitelist",
     "add_partition_with_whitelist",
@@ -256,6 +259,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "alter_varchar2",
     "alter_view_as_select",
     "ambiguous_col",
+    "annotate_stats_join",
+    "annotate_stats_limit",
+    "annotate_stats_part",
+    "annotate_stats_table",
+    "annotate_stats_union",
     "auto_join0",
     "auto_join1",
     "auto_join10",
@@ -299,6 +307,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "auto_sortmerge_join_13",
     "auto_sortmerge_join_14",
     "auto_sortmerge_join_15",
+    "auto_sortmerge_join_16",
     "auto_sortmerge_join_2",
     "auto_sortmerge_join_3",
     "auto_sortmerge_join_4",
@@ -335,14 +344,20 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "count",
     "cp_mj_rc",
     "create_insert_outputformat",
+    "create_like_tbl_props",
     "create_like_view",
     "create_nested_type",
     "create_skewed_table1",
     "create_struct_table",
+    "create_view_translate",
     "cross_join",
+    "cross_product_check_1",
+    "cross_product_check_2",
     "ct_case_insensitive",
+    "database_drop",
     "database_location",
     "database_properties",
+    "date_1",
     "date_2",
     "date_3",
     "date_4",
@@ -360,8 +375,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "diff_part_input_formats",
     "disable_file_format_check",
     "disallow_incompatible_type_change_off",
+    "distinct_stats",
+    "drop_database_removes_partition_dirs",
     "drop_function",
     "drop_index",
+    "drop_index_removes_partition_dirs",
     "drop_multi_partitions",
     "drop_partitions_filter",
     "drop_partitions_filter2",
@@ -369,23 +387,37 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "drop_partitions_ignore_protection",
     "drop_table",
     "drop_table2",
+    "drop_table_removes_partition_dirs",
     "drop_view",
+    "dynamic_partition_skip_default",
     "escape_clusterby1",
     "escape_distributeby1",
     "escape_orderby1",
     "escape_sortby1",
+    "explain_rearrange",
     "fetch_aggregation",
+    "fileformat_mix",
     "fileformat_sequencefile",
     "fileformat_text",
     "filter_join_breaktask",
     "filter_join_breaktask2",
     "groupby1",
     "groupby11",
+    "groupby12",
+    "groupby1_limit",
+    "groupby_grouping_id1",
+    "groupby_grouping_id2",
+    "groupby_grouping_sets1",
+    "groupby_grouping_sets2",
+    "groupby_grouping_sets3",
+    "groupby_grouping_sets4",
+    "groupby_grouping_sets5",
     "groupby1_map",
     "groupby1_map_nomap",
     "groupby1_map_skew",
     "groupby1_noskew",
     "groupby2",
+    "groupby2_limit",
     "groupby2_map",
     "groupby2_map_skew",
     "groupby2_noskew",
@@ -406,6 +438,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "groupby7_map_multi_single_reducer",
     "groupby7_map_skew",
     "groupby7_noskew",
+    "groupby7_noskew_multi_single_reducer",
     "groupby8",
     "groupby8_map",
     "groupby8_map_skew",
@@ -432,6 +465,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "groupby_sort_test_1",
     "having",
     "implicit_cast1",
+    "index_serde",
+    "infer_bucket_sort_dyn_part",
     "innerjoin",
     "inoutdriver",
     "input",
@@ -502,7 +537,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "join17",
     "join18",
     "join19",
-    "join_1to1",
     "join2",
     "join20",
     "join21",
@@ -534,6 +568,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "join7",
     "join8",
     "join9",
+    "join_1to1",
     "join_array",
     "join_casesensitive",
     "join_empty",
@@ -557,7 +592,21 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "literal_double",
     "literal_ints",
     "literal_string",
+    "load_dyn_part1",
+    "load_dyn_part10",
+    "load_dyn_part11",
+    "load_dyn_part12",
+    "load_dyn_part13",
+    "load_dyn_part14",
+    "load_dyn_part14_win",
+    "load_dyn_part2",
+    "load_dyn_part3",
+    "load_dyn_part4",
+    "load_dyn_part5",
+    "load_dyn_part6",
     "load_dyn_part7",
+    "load_dyn_part8",
+    "load_dyn_part9",
     "load_file_with_space_in_the_name",
     "loadpart1",
     "louter_join_ppr",
@@ -578,13 +627,13 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "merge1",
     "merge2",
     "mergejoins",
-    "multigroupby_singlemr",
+    "multiMapJoin1",
+    "multiMapJoin2",
     "multi_insert_gby",
     "multi_insert_gby3",
     "multi_insert_lateral_view",
     "multi_join_union",
-    "multiMapJoin1",
-    "multiMapJoin2",
+    "multigroupby_singlemr",
     "noalias_subq1",
     "nomore_ambiguous_table_col",
     "nonblock_op_deduplicate",
@@ -607,10 +656,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "outer_join_ppr",
     "parallel",
     "parenthesis_star_by",
-    "partcols1",
     "part_inherit_tbl_props",
     "part_inherit_tbl_props_empty",
     "part_inherit_tbl_props_with_star",
+    "partcols1",
     "partition_date",
     "partition_schema1",
     "partition_serde_format",
@@ -641,7 +690,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "ppd_outer_join5",
     "ppd_random",
     "ppd_repeated_alias",
-    "ppd_transform",
     "ppd_udf_col",
     "ppd_union",
     "ppr_allchildsarenull",
@@ -674,15 +722,15 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "serde_regex",
     "serde_reported_schema",
     "set_variable_sub",
-    "show_create_table_partitioned",
-    "show_create_table_delimited",
+    "show_columns",
     "show_create_table_alter",
-    "show_create_table_view",
-    "show_create_table_serde",
     "show_create_table_db_table",
+    "show_create_table_delimited",
     "show_create_table_does_not_exist",
     "show_create_table_index",
-    "show_columns",
+    "show_create_table_partitioned",
+    "show_create_table_serde",
+    "show_create_table_view",
     "show_describe_func_quotes",
     "show_functions",
     "show_partitions",
@@ -731,6 +779,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "touch",
     "transform_ppr1",
     "transform_ppr2",
+    "truncate_table",
     "type_cast_1",
     "type_widening",
     "udaf_collect_set",
@@ -738,12 +787,15 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udaf_covar_pop",
     "udaf_covar_samp",
     "udaf_histogram_numeric",
-    "udf_10_trims",
     "udf2",
+    "udf5",
     "udf6",
     "udf7",
     "udf8",
     "udf9",
+    "udf_10_trims",
+    "udf_E",
+    "udf_PI",
     "udf_abs",
     "udf_acos",
     "udf_add",
@@ -774,14 +826,13 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udf_cos",
     "udf_count",
     "udf_date_add",
-    "udf_datediff",
     "udf_date_sub",
+    "udf_datediff",
     "udf_day",
     "udf_dayofmonth",
     "udf_degrees",
     "udf_div",
     "udf_double",
-    "udf_E",
     "udf_elt",
     "udf_equal",
     "udf_exp",
@@ -819,6 +870,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udf_minute",
     "udf_modulo",
     "udf_month",
+    "udf_named_struct",
     "udf_negative",
     "udf_not",
     "udf_notequal",
@@ -826,7 +878,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udf_nvl",
     "udf_or",
     "udf_parse_url",
-    "udf_PI",
     "udf_pmod",
     "udf_positive",
     "udf_pow",
@@ -853,6 +904,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udf_stddev_pop",
     "udf_stddev_samp",
     "udf_string",
+    "udf_struct",
     "udf_substring",
     "udf_subtract",
     "udf_sum",
@@ -922,6 +974,9 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "union_script",
     "varchar_2",
     "varchar_join1",
-    "varchar_union1"
+    "varchar_union1",
+    "view",
+    "view_cast",
+    "view_inputs"
   )
 }
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 9d7a02bf7b0b7..58b0722464be8 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -36,11 +36,6 @@
   </properties>
 
   <dependencies>
-    <dependency>
-      <groupId>com.twitter</groupId>
-      <artifactId>parquet-hive-bundle</artifactId>
-      <version>1.5.0</version>
-    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -52,9 +47,8 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>org.spark-project.hive</groupId>
+      <groupId>${hive.group}</groupId>
       <artifactId>hive-metastore</artifactId>
-      <version>${hive.version}</version>
     </dependency>
     <dependency>
       <groupId>commons-httpclient</groupId>
@@ -62,61 +56,35 @@
       <version>3.1</version>
     </dependency>
     <dependency>
-      <groupId>org.spark-project.hive</groupId>
+      <groupId>${hive.group}</groupId>
       <artifactId>hive-exec</artifactId>
-      <version>${hive.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>commons-logging</groupId>
-          <artifactId>commons-logging</artifactId>
-        </exclusion>
-      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.codehaus.jackson</groupId>
       <artifactId>jackson-mapper-asl</artifactId>
     </dependency>
     <dependency>
-      <groupId>org.spark-project.hive</groupId>
+      <groupId>${hive.group}</groupId>
       <artifactId>hive-serde</artifactId>
-      <version>${hive.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>commons-logging</groupId>
-          <artifactId>commons-logging</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>commons-logging</groupId>
-          <artifactId>commons-logging-api</artifactId>
-        </exclusion>
-      </exclusions>
     </dependency>
     <!-- hive-serde already depends on avro, but this brings in customized config of avro deps from parent -->
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro</artifactId>
-      <version>${avro.version}</version>
     </dependency>
     <!-- use the build matching the hadoop api of avro-mapred (i.e. no classifier for hadoop 1 API,
     hadoop2 classifier for hadoop 2 API. avro-mapred is a dependency of org.spark-project.hive:hive-serde -->
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro-mapred</artifactId>
-      <version>${avro.version}</version>
       <classifier>${avro.mapred.classifier}</classifier>
     </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
   </dependencies>
-
   <profiles>
     <profile>
       <id>hive</id>
@@ -144,6 +112,16 @@
         </plugins>
       </build>
     </profile>
+    <profile>
+      <id>hive-0.12.0</id>
+      <dependencies>
+         <dependency>
+           <groupId>com.twitter</groupId>
+           <artifactId>parquet-hive-bundle</artifactId>
+           <version>1.5.0</version>
+        </dependency>
+      </dependencies>
+    </profile>
   </profiles>
 
   <build>
@@ -153,6 +131,28 @@
       <plugin>
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest-maven-plugin</artifactId>
+        <configuration>
+          <!-- Specially disable assertions since some Hive tests fail them -->
+          <argLine>-da -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
+        </configuration>
+      </plugin>
+      <plugin>
+         <groupId>org.codehaus.mojo</groupId>
+         <artifactId>build-helper-maven-plugin</artifactId>
+         <executions>
+           <execution>
+             <id>add-default-sources</id>
+             <phase>generate-sources</phase>
+             <goals>
+               <goal>add-source</goal>
+             </goals>
+             <configuration>
+               <sources>
+                 <source>v${hive.version.short}/src/main/scala</source>
+               </sources>
+             </configuration>
+           </execution>
+         </executions>
       </plugin>
 
       <!-- Deploy datanucleus jars to the spark/lib_managed/jars directory -->
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala
index 430ffb29989ea..3f20c6142e59a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala
@@ -20,29 +20,20 @@ package org.apache.spark.sql.hive
 import scala.language.implicitConversions
 
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.{AbstractSparkSQLParser, SqlLexical}
+import org.apache.spark.sql.catalyst.AbstractSparkSQLParser
+import org.apache.spark.sql.hive.execution.{AddJar, AddFile, HiveNativeCommand}
 
 /**
  * A parser that recognizes all HiveQL constructs together with Spark SQL specific extensions.
  */
 private[hive] class ExtendedHiveQlParser extends AbstractSparkSQLParser {
-  protected implicit def asParser(k: Keyword): Parser[String] =
-    lexical.allCaseVersions(k.str).map(x => x : Parser[String]).reduce(_ | _)
-
+  // Keyword is a convention with AbstractSparkSQLParser, which will scan all of the `Keyword`
+  // properties via reflection the class in runtime for constructing the SqlLexical object
   protected val ADD  = Keyword("ADD")
   protected val DFS  = Keyword("DFS")
   protected val FILE = Keyword("FILE")
   protected val JAR  = Keyword("JAR")
 
-  private val reservedWords =
-    this
-      .getClass
-      .getMethods
-      .filter(_.getReturnType == classOf[Keyword])
-      .map(_.invoke(this).asInstanceOf[Keyword].str)
-
-  override val lexical = new SqlLexical(reservedWords)
-
   protected lazy val start: Parser[LogicalPlan] = dfs | addJar | addFile | hiveQl
 
   protected lazy val hiveQl: Parser[LogicalPlan] =
@@ -52,7 +43,7 @@ private[hive] class ExtendedHiveQlParser extends AbstractSparkSQLParser {
 
   protected lazy val dfs: Parser[LogicalPlan] =
     DFS ~> wholeInput ^^ {
-      case command => NativeCommand(command.trim)
+      case command => HiveNativeCommand(command.trim)
     }
 
   private lazy val addFile: Parser[LogicalPlan] =
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 8b5a90159e1bb..1921bf6e5e1a6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -17,58 +17,32 @@
 
 package org.apache.spark.sql.hive
 
-import java.io.{BufferedReader, File, InputStreamReader, PrintStream}
-import java.sql.{Date, Timestamp}
-import java.util.{ArrayList => JArrayList}
+import java.io.{BufferedReader, InputStreamReader, PrintStream}
+import java.sql.Timestamp
 
 import scala.collection.JavaConversions._
 import scala.language.implicitConversions
-import scala.reflect.runtime.universe.{TypeTag, typeTag}
+import scala.reflect.runtime.universe.TypeTag
 
-import org.apache.hadoop.fs.FileSystem
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.ql.Driver
 import org.apache.hadoop.hive.ql.metadata.Table
 import org.apache.hadoop.hive.ql.processors._
+import org.apache.hadoop.hive.ql.parse.VariableSubstitution
 import org.apache.hadoop.hive.ql.session.SessionState
-import org.apache.hadoop.hive.ql.stats.StatsSetupConst
-import org.apache.hadoop.hive.serde2.io.TimestampWritable
-import org.apache.hadoop.hive.serde2.io.DateWritable
+import org.apache.hadoop.hive.serde2.io.{DateWritable, TimestampWritable}
 
 import org.apache.spark.SparkContext
-import org.apache.spark.rdd.RDD
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.ScalaReflection
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, EliminateAnalysisOperators}
-import org.apache.spark.sql.catalyst.analysis.{OverrideCatalog, OverrideFunctionRegistry}
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, EliminateAnalysisOperators, OverrideCatalog, OverrideFunctionRegistry}
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.execution.ExtractPythonUdfs
-import org.apache.spark.sql.execution.QueryExecutionException
-import org.apache.spark.sql.execution.{Command => PhysicalCommand}
-import org.apache.spark.sql.hive.execution.DescribeHiveTableCommand
-
-/**
- * DEPRECATED: Use HiveContext instead.
- */
-@deprecated("""
-  Use HiveContext instead.  It will still create a local metastore if one is not specified.
-  However, note that the default directory is ./metastore_db, not ./metastore
-  """, "1.1")
-class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) {
-
-  lazy val metastorePath = new File("metastore").getCanonicalPath
-  lazy val warehousePath: String = new File("warehouse").getCanonicalPath
-
-  /** Sets up the system initially or after a RESET command */
-  protected def configure() {
-    setConf("javax.jdo.option.ConnectionURL",
-      s"jdbc:derby:;databaseName=$metastorePath;create=true")
-    setConf("hive.metastore.warehouse.dir", warehousePath)
-  }
-
-  configure() // Must be called before initializing the catalog below.
-}
+import org.apache.spark.sql.execution.{ExecutedCommand, ExtractPythonUdfs, SetCommand, QueryExecutionException}
+import org.apache.spark.sql.hive.execution.{HiveNativeCommand, DescribeHiveTableCommand}
+import org.apache.spark.sql.sources.{CreateTableUsing, DataSourceStrategy}
+import org.apache.spark.sql.types._
 
 /**
  * An instance of the Spark SQL execution engine that integrates with data stored in Hive.
@@ -77,39 +51,34 @@ class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) {
 class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   self =>
 
-  // Change the default SQL dialect to HiveQL
-  override private[spark] def dialect: String = getConf(SQLConf.DIALECT, "hiveql")
+  protected[sql] override lazy val conf: SQLConf = new SQLConf {
+    override def dialect: String = getConf(SQLConf.DIALECT, "hiveql")
+  }
 
   /**
    * When true, enables an experimental feature where metastore tables that use the parquet SerDe
    * are automatically converted to use the Spark SQL parquet table scan, instead of the Hive
    * SerDe.
    */
-  private[spark] def convertMetastoreParquet: Boolean =
-    getConf("spark.sql.hive.convertMetastoreParquet", "false") == "true"
+  protected[sql] def convertMetastoreParquet: Boolean =
+    getConf("spark.sql.hive.convertMetastoreParquet", "true") == "true"
 
   override protected[sql] def executePlan(plan: LogicalPlan): this.QueryExecution =
-    new this.QueryExecution { val logical = plan }
+    new this.QueryExecution(plan)
 
-  override def sql(sqlText: String): SchemaRDD = {
+  override def sql(sqlText: String): DataFrame = {
+    val substituted = new VariableSubstitution().substitute(hiveconf, sqlText)
     // TODO: Create a framework for registering parsers instead of just hardcoding if statements.
-    if (dialect == "sql") {
-      super.sql(sqlText)
-    } else if (dialect == "hiveql") {
-      new SchemaRDD(this, HiveQl.parseSql(sqlText))
+    if (conf.dialect == "sql") {
+      super.sql(substituted)
+    } else if (conf.dialect == "hiveql") {
+      DataFrame(this,
+        ddlParser(sqlText, exceptionOnError = false).getOrElse(HiveQl.parseSql(substituted)))
     }  else {
-      sys.error(s"Unsupported SQL dialect: $dialect.  Try 'sql' or 'hiveql'")
+      sys.error(s"Unsupported SQL dialect: ${conf.dialect}.  Try 'sql' or 'hiveql'")
     }
   }
 
-  @deprecated("hiveql() is deprecated as the sql function now parses using HiveQL by default. " +
-             s"The SQL dialect for parsing can be set using ${SQLConf.DIALECT}", "1.1")
-  def hiveql(hqlQuery: String): SchemaRDD = new SchemaRDD(this, HiveQl.parseSql(hqlQuery))
-
-  @deprecated("hql() is deprecated as the sql function now parses using HiveQL by default. " +
-             s"The SQL dialect for parsing can be set using ${SQLConf.DIALECT}", "1.1")
-  def hql(hqlQuery: String): SchemaRDD = hiveql(hqlQuery)
-
   /**
    * Creates a table using the schema of the given class.
    *
@@ -117,10 +86,91 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
    * @param allowExisting When false, an exception will be thrown if the table already exists.
    * @tparam A A case class that is used to describe the schema of the table to be created.
    */
+  @Deprecated
   def createTable[A <: Product : TypeTag](tableName: String, allowExisting: Boolean = true) {
     catalog.createTable("default", tableName, ScalaReflection.attributesFor[A], allowExisting)
   }
 
+  /**
+   * Invalidate and refresh all the cached the metadata of the given table. For performance reasons,
+   * Spark SQL or the external data source library it uses might cache certain metadata about a
+   * table, such as the location of blocks. When those change outside of Spark SQL, users should
+   * call this function to invalidate the cache.
+   */
+  def refreshTable(tableName: String): Unit = {
+    // TODO: Database support...
+    catalog.refreshTable("default", tableName)
+  }
+
+  protected[hive] def invalidateTable(tableName: String): Unit = {
+    // TODO: Database support...
+    catalog.invalidateTable("default", tableName)
+  }
+
+  @Experimental
+  def createTable(tableName: String, path: String, allowExisting: Boolean): Unit = {
+    val dataSourceName = conf.defaultDataSourceName
+    createTable(tableName, dataSourceName, allowExisting, ("path", path))
+  }
+
+  @Experimental
+  def createTable(
+      tableName: String,
+      dataSourceName: String,
+      allowExisting: Boolean,
+      option: (String, String),
+      options: (String, String)*): Unit = {
+    val cmd =
+      CreateTableUsing(
+        tableName,
+        userSpecifiedSchema = None,
+        dataSourceName,
+        temporary = false,
+        (option +: options).toMap,
+        allowExisting)
+    executePlan(cmd).toRdd
+  }
+
+  @Experimental
+  def createTable(
+      tableName: String,
+      dataSourceName: String,
+      schema: StructType,
+      allowExisting: Boolean,
+      option: (String, String),
+      options: (String, String)*): Unit = {
+    val cmd =
+      CreateTableUsing(
+        tableName,
+        userSpecifiedSchema = Some(schema),
+        dataSourceName,
+        temporary = false,
+        (option +: options).toMap,
+        allowExisting)
+    executePlan(cmd).toRdd
+  }
+
+  @Experimental
+  def createTable(
+      tableName: String,
+      dataSourceName: String,
+      allowExisting: Boolean,
+      options: java.util.Map[String, String]): Unit = {
+    val opts = options.toSeq
+    createTable(tableName, dataSourceName, allowExisting, opts.head, opts.tail:_*)
+  }
+
+  @Experimental
+  def createTable(
+      tableName: String,
+      dataSourceName: String,
+      schema: StructType,
+      allowExisting: Boolean,
+      options: java.util.Map[String, String]): Unit = {
+    val opts = options.toSeq
+    createTable(tableName, dataSourceName, schema, allowExisting, opts.head, opts.tail:_*)
+  }
+
   /**
    * Analyzes the given table in the current database to generate statistics, which will be
    * used in query optimizations.
@@ -128,11 +178,12 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
    * Right now, it only supports Hive tables and it only updates the size of a Hive table
    * in the Hive metastore.
    */
+  @Experimental
   def analyze(tableName: String) {
-    val relation = EliminateAnalysisOperators(catalog.lookupRelation(None, tableName))
+    val relation = EliminateAnalysisOperators(catalog.lookupRelation(Seq(tableName)))
 
     relation match {
-      case relation: MetastoreRelation => {
+      case relation: MetastoreRelation =>
         // This method is mainly based on
         // org.apache.hadoop.hive.ql.stats.StatsUtils.getFileSizeForTable(HiveConf, Table)
         // in Hive 0.13 (except that we do not use fs.getContentSummary).
@@ -153,7 +204,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
         }
 
         def getFileSizeForTable(conf: HiveConf, table: Table): Long = {
-          val path = table.getPath()
+          val path = table.getPath
           var size: Long = 0L
           try {
             val fs = path.getFileSystem(conf)
@@ -171,25 +222,26 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
 
         val tableParameters = relation.hiveQlTable.getParameters
         val oldTotalSize =
-          Option(tableParameters.get(StatsSetupConst.TOTAL_SIZE)).map(_.toLong).getOrElse(0L)
+          Option(tableParameters.get(HiveShim.getStatsSetupConstTotalSize))
+            .map(_.toLong)
+            .getOrElse(0L)
         val newTotalSize = getFileSizeForTable(hiveconf, relation.hiveQlTable)
         // Update the Hive metastore if the total size of the table is different than the size
         // recorded in the Hive metastore.
         // This logic is based on org.apache.hadoop.hive.ql.exec.StatsTask.aggregateStats().
         if (newTotalSize > 0 && newTotalSize != oldTotalSize) {
-          tableParameters.put(StatsSetupConst.TOTAL_SIZE, newTotalSize.toString)
+          tableParameters.put(HiveShim.getStatsSetupConstTotalSize, newTotalSize.toString)
           val hiveTTable = relation.hiveQlTable.getTTable
           hiveTTable.setParameters(tableParameters)
           val tableFullName =
-            relation.hiveQlTable.getDbName() + "." + relation.hiveQlTable.getTableName()
+            relation.hiveQlTable.getDbName + "." + relation.hiveQlTable.getTableName
 
           catalog.client.alterTable(tableFullName, new Table(hiveTTable))
         }
-      }
       case otherRelation =>
         throw new NotImplementedError(
           s"Analyze has only implemented for Hive tables, " +
-            s"but ${tableName} is a ${otherRelation.nodeName}")
+            s"but $tableName is a ${otherRelation.nodeName}")
     }
   }
 
@@ -223,21 +275,29 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   }
 
   /**
-   * SQLConf and HiveConf contracts: when the hive session is first initialized, params in
-   * HiveConf will get picked up by the SQLConf.  Additionally, any properties set by
-   * set() or a SET command inside sql() will be set in the SQLConf *as well as*
-   * in the HiveConf.
+   * SQLConf and HiveConf contracts:
+   *
+   * 1. reuse existing started SessionState if any
+   * 2. when the Hive session is first initialized, params in HiveConf will get picked up by the
+   *    SQLConf.  Additionally, any properties set by set() or a SET command inside sql() will be
+   *    set in the SQLConf *as well as* in the HiveConf.
    */
-  @transient protected[hive] lazy val hiveconf = new HiveConf(classOf[SessionState])
-  @transient protected[hive] lazy val sessionState = {
-    val ss = new SessionState(hiveconf)
-    setConf(hiveconf.getAllProperties)  // Have SQLConf pick up the initial set of HiveConf.
-    SessionState.start(ss)
-    ss.err = new PrintStream(outputBuffer, true, "UTF-8")
-    ss.out = new PrintStream(outputBuffer, true, "UTF-8")
-
-    ss
-  }
+  @transient protected[hive] lazy val (hiveconf, sessionState) =
+    Option(SessionState.get())
+      .orElse {
+        val newState = new SessionState(new HiveConf(classOf[SessionState]))
+        // Only starts newly created `SessionState` instance.  Any existing `SessionState` instance
+        // returned by `SessionState.get()` must be the most recently started one.
+        SessionState.start(newState)
+        Some(newState)
+      }
+      .map { state =>
+        setConf(state.getConf.getAllProperties)
+        if (state.out == null) state.out = new PrintStream(outputBuffer, true, "UTF-8")
+        if (state.err == null) state.err = new PrintStream(outputBuffer, true, "UTF-8")
+        (state.getConf, state)
+      }
+      .get
 
   override def setConf(key: String, value: String): Unit = {
     super.setConf(key, value)
@@ -251,7 +311,9 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   // Note that HiveUDFs will be overridden by functions registered in this context.
   @transient
   override protected[sql] lazy val functionRegistry =
-    new HiveFunctionRegistry with OverrideFunctionRegistry
+    new HiveFunctionRegistry with OverrideFunctionRegistry {
+      def caseSensitive = false
+    }
 
   /* An analyzer that uses the Hive metastore. */
   @transient
@@ -261,6 +323,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
         catalog.CreateTables ::
         catalog.PreInsertionCasts ::
         ExtractPythonUdfs ::
+        ResolveUdtfsAlias ::
         Nil
     }
 
@@ -275,38 +338,42 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     results
   }
 
-
   /**
    * Execute the command using Hive and return the results as a sequence. Each element
    * in the sequence is one row.
    */
-  protected def runHive(cmd: String, maxRows: Int = 1000): Seq[String] = {
+  protected def runHive(cmd: String, maxRows: Int = 1000): Seq[String] = synchronized {
     try {
-      // Session state must be initilized before the CommandProcessor is created .
-      SessionState.start(sessionState)
-
       val cmd_trimmed: String = cmd.trim()
       val tokens: Array[String] = cmd_trimmed.split("\\s+")
       val cmd_1: String = cmd_trimmed.substring(tokens(0).length()).trim()
-      val proc: CommandProcessor = CommandProcessorFactory.get(tokens(0), hiveconf)
+      val proc: CommandProcessor = HiveShim.getCommandProcessor(Array(tokens(0)), hiveconf)
+
+      // Makes sure the session represented by the `sessionState` field is activated. This implies
+      // Spark SQL Hive support uses a single `SessionState` for all Hive operations and breaks
+      // session isolation under multi-user scenarios (i.e. HiveThriftServer2).
+      // TODO Fix session isolation
+      if (SessionState.get() != sessionState) {
+        SessionState.start(sessionState)
+      }
 
       proc match {
         case driver: Driver =>
-          driver.init()
-
-          val results = new JArrayList[String]
+          val results = HiveShim.createDriverResultsArray
           val response: CommandProcessorResponse = driver.run(cmd)
           // Throw an exception if there is an error in query processing.
           if (response.getResponseCode != 0) {
-            driver.destroy()
+            driver.close()
             throw new QueryExecutionException(response.getErrorMessage)
           }
           driver.setMaxRows(maxRows)
           driver.getResults(results)
-          driver.destroy()
-          results
+          driver.close()
+          HiveShim.processResults(results)
         case _ =>
-          sessionState.out.println(tokens(0) + " " + cmd_1)
+          if (sessionState.out != null) {
+            sessionState.out.println(tokens(0) + " " + cmd_1)
+          }
           Seq(proc.run(cmd_1).getResponseCode.toString)
       }
     } catch {
@@ -326,12 +393,14 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   }
 
   @transient
-  val hivePlanner = new SparkPlanner with HiveStrategies {
+  private val hivePlanner = new SparkPlanner with HiveStrategies {
     val hiveContext = self
 
-    override val strategies: Seq[Strategy] = Seq(
-      CommandStrategy(self),
+    override def strategies: Seq[Strategy] = experimental.extraStrategies ++ Seq(
+      DataSourceStrategy,
       HiveCommandStrategy(self),
+      HiveDDLStrategy,
+      DDLStrategy,
       TakeOrdered,
       ParquetOperations,
       InMemoryScans,
@@ -352,77 +421,88 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   override protected[sql] val planner = hivePlanner
 
   /** Extends QueryExecution with hive specific features. */
-  protected[sql] abstract class QueryExecution extends super.QueryExecution {
-
-    override lazy val toRdd: RDD[Row] = executedPlan.execute().map(_.copy())
-
-    protected val primitiveTypes =
-      Seq(StringType, IntegerType, LongType, DoubleType, FloatType, BooleanType, ByteType,
-        ShortType, DecimalType, DateType, TimestampType, BinaryType)
-
-    protected[sql] def toHiveString(a: (Any, DataType)): String = a match {
-      case (struct: Row, StructType(fields)) =>
-        struct.zip(fields).map {
-          case (v, t) => s""""${t.name}":${toHiveStructString(v, t.dataType)}"""
-        }.mkString("{", ",", "}")
-      case (seq: Seq[_], ArrayType(typ, _)) =>
-        seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
-      case (map: Map[_,_], MapType(kType, vType, _)) =>
-        map.map {
-          case (key, value) =>
-            toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
-        }.toSeq.sorted.mkString("{", ",", "}")
-      case (null, _) => "NULL"
-      case (d: Date, DateType) => new DateWritable(d).toString
-      case (t: Timestamp, TimestampType) => new TimestampWritable(t).toString
-      case (bin: Array[Byte], BinaryType) => new String(bin, "UTF-8")
-      case (other, tpe) if primitiveTypes contains tpe => other.toString
-    }
-
-    /** Hive outputs fields of structs slightly differently than top level attributes. */
-    protected def toHiveStructString(a: (Any, DataType)): String = a match {
-      case (struct: Row, StructType(fields)) =>
-        struct.zip(fields).map {
-          case (v, t) => s""""${t.name}":${toHiveStructString(v, t.dataType)}"""
-        }.mkString("{", ",", "}")
-      case (seq: Seq[_], ArrayType(typ, _)) =>
-        seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
-      case (map: Map[_, _], MapType(kType, vType, _)) =>
-        map.map {
-          case (key, value) =>
-            toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
-        }.toSeq.sorted.mkString("{", ",", "}")
-      case (null, _) => "null"
-      case (s: String, StringType) => "\"" + s + "\""
-      case (other, tpe) if primitiveTypes contains tpe => other.toString
-    }
+  protected[sql] class QueryExecution(logicalPlan: LogicalPlan)
+    extends super.QueryExecution(logicalPlan) {
 
     /**
      * Returns the result as a hive compatible sequence of strings.  For native commands, the
      * execution is simply passed back to Hive.
      */
     def stringResult(): Seq[String] = executedPlan match {
-      case describeHiveTableCommand: DescribeHiveTableCommand =>
+      case ExecutedCommand(desc: DescribeHiveTableCommand) =>
         // If it is a describe command for a Hive table, we want to have the output format
         // be similar with Hive.
-        describeHiveTableCommand.hiveString
-      case command: PhysicalCommand =>
-        command.executeCollect().map(_.head.toString)
+        desc.run(self).map {
+          case Row(name: String, dataType: String, comment) =>
+            Seq(name, dataType,
+              Option(comment.asInstanceOf[String]).getOrElse(""))
+              .map(s => String.format(s"%-20s", s))
+              .mkString("\t")
+        }
+      case command: ExecutedCommand =>
+        command.executeCollect().map(_(0).toString)
 
       case other =>
-        val result: Seq[Seq[Any]] = toRdd.collect().toSeq
+        val result: Seq[Seq[Any]] = other.executeCollect().map(_.toSeq).toSeq
         // We need the types so we can output struct field names
         val types = analyzed.output.map(_.dataType)
         // Reformat to match hive tab delimited output.
-        val asString = result.map(_.zip(types).map(toHiveString)).map(_.mkString("\t")).toSeq
-        asString
+        result.map(_.zip(types).map(HiveContext.toHiveString)).map(_.mkString("\t")).toSeq
     }
 
     override def simpleString: String =
       logical match {
-        case _: NativeCommand => "<Native command: executed by Hive>"
+        case _: HiveNativeCommand => "<Native command: executed by Hive>"
         case _: SetCommand => "<SET command: executed by Hive, and noted by SQLContext>"
         case _ => super.simpleString
       }
   }
 }
+
+
+private object HiveContext {
+  protected val primitiveTypes =
+    Seq(StringType, IntegerType, LongType, DoubleType, FloatType, BooleanType, ByteType,
+      ShortType, DateType, TimestampType, BinaryType)
+
+  protected[sql] def toHiveString(a: (Any, DataType)): String = a match {
+    case (struct: Row, StructType(fields)) =>
+      struct.toSeq.zip(fields).map {
+        case (v, t) => s""""${t.name}":${toHiveStructString(v, t.dataType)}"""
+      }.mkString("{", ",", "}")
+    case (seq: Seq[_], ArrayType(typ, _)) =>
+      seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
+    case (map: Map[_,_], MapType(kType, vType, _)) =>
+      map.map {
+        case (key, value) =>
+          toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
+      }.toSeq.sorted.mkString("{", ",", "}")
+    case (null, _) => "NULL"
+    case (d: Int, DateType) => new DateWritable(d).toString
+    case (t: Timestamp, TimestampType) => new TimestampWritable(t).toString
+    case (bin: Array[Byte], BinaryType) => new String(bin, "UTF-8")
+    case (decimal: java.math.BigDecimal, DecimalType()) =>
+      // Hive strips trailing zeros so use its toString
+      HiveShim.createDecimal(decimal).toString
+    case (other, tpe) if primitiveTypes contains tpe => other.toString
+  }
+
+  /** Hive outputs fields of structs slightly differently than top level attributes. */
+  protected def toHiveStructString(a: (Any, DataType)): String = a match {
+    case (struct: Row, StructType(fields)) =>
+      struct.toSeq.zip(fields).map {
+        case (v, t) => s""""${t.name}":${toHiveStructString(v, t.dataType)}"""
+      }.mkString("{", ",", "}")
+    case (seq: Seq[_], ArrayType(typ, _)) =>
+      seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
+    case (map: Map[_, _], MapType(kType, vType, _)) =>
+      map.map {
+        case (key, value) =>
+          toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
+      }.toSeq.sorted.mkString("{", ",", "}")
+    case (null, _) => "null"
+    case (s: String, StringType) => "\"" + s + "\""
+    case (decimal, DecimalType()) => decimal.toString
+    case (other, tpe) if primitiveTypes contains tpe => other.toString
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index 1977618b4c9f2..4afa2e71d77cc 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -17,26 +17,165 @@
 
 package org.apache.spark.sql.hive
 
-import org.apache.hadoop.hive.common.`type`.HiveDecimal
+import org.apache.hadoop.hive.common.`type`.{HiveDecimal, HiveVarchar}
 import org.apache.hadoop.hive.serde2.objectinspector._
 import org.apache.hadoop.hive.serde2.objectinspector.primitive._
 import org.apache.hadoop.hive.serde2.{io => hiveIo}
 import org.apache.hadoop.{io => hadoopIo}
 
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.types
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.types
+import org.apache.spark.sql.types._
 
 /* Implicit conversions */
 import scala.collection.JavaConversions._
 
+/**
+ * 1. The Underlying data type in catalyst and in Hive
+ * In catalyst:
+ *  Primitive  =>
+ *     java.lang.String
+ *     int / scala.Int
+ *     boolean / scala.Boolean
+ *     float / scala.Float
+ *     double / scala.Double
+ *     long / scala.Long
+ *     short / scala.Short
+ *     byte / scala.Byte
+ *     org.apache.spark.sql.types.Decimal
+ *     Array[Byte]
+ *     java.sql.Date
+ *     java.sql.Timestamp
+ *  Complex Types =>
+ *    Map: scala.collection.immutable.Map
+ *    List: scala.collection.immutable.Seq
+ *    Struct:
+ *           org.apache.spark.sql.catalyst.expression.Row
+ *    Union: NOT SUPPORTED YET
+ *  The Complex types plays as a container, which can hold arbitrary data types.
+ *
+ * In Hive, the native data types are various, in UDF/UDAF/UDTF, and associated with
+ * Object Inspectors, in Hive expression evaluation framework, the underlying data are
+ * Primitive Type
+ *   Java Boxed Primitives:
+ *       org.apache.hadoop.hive.common.type.HiveVarchar
+ *       java.lang.String
+ *       java.lang.Integer
+ *       java.lang.Boolean
+ *       java.lang.Float
+ *       java.lang.Double
+ *       java.lang.Long
+ *       java.lang.Short
+ *       java.lang.Byte
+ *       org.apache.hadoop.hive.common.`type`.HiveDecimal
+ *       byte[]
+ *       java.sql.Date
+ *       java.sql.Timestamp
+ *   Writables:
+ *       org.apache.hadoop.hive.serde2.io.HiveVarcharWritable
+ *       org.apache.hadoop.io.Text
+ *       org.apache.hadoop.io.IntWritable
+ *       org.apache.hadoop.hive.serde2.io.DoubleWritable
+ *       org.apache.hadoop.io.BooleanWritable
+ *       org.apache.hadoop.io.LongWritable
+ *       org.apache.hadoop.io.FloatWritable
+ *       org.apache.hadoop.hive.serde2.io.ShortWritable
+ *       org.apache.hadoop.hive.serde2.io.ByteWritable
+ *       org.apache.hadoop.io.BytesWritable
+ *       org.apache.hadoop.hive.serde2.io.DateWritable
+ *       org.apache.hadoop.hive.serde2.io.TimestampWritable
+ *       org.apache.hadoop.hive.serde2.io.HiveDecimalWritable
+ * Complex Type
+ *   List: Object[] / java.util.List
+ *   Map: java.util.Map
+ *   Struct: Object[] / java.util.List / java POJO
+ *   Union: class StandardUnion { byte tag; Object object }
+ *
+ * NOTICE: HiveVarchar is not supported by catalyst, it will be simply considered as String type.
+ *
+ *
+ * 2. Hive ObjectInspector is a group of flexible APIs to inspect value in different data
+ *  representation, and developers can extend those API as needed, so technically,
+ *  object inspector supports arbitrary data type in java.
+ *
+ * Fortunately, only few built-in Hive Object Inspectors are used in generic udf/udaf/udtf
+ * evaluation.
+ * 1) Primitive Types (PrimitiveObjectInspector & its sub classes)
+  {{{
+   public interface PrimitiveObjectInspector {
+     // Java Primitives (java.lang.Integer, java.lang.String etc.)
+     Object getPrimitiveJavaObject(Object o);
+     // Writables (hadoop.io.IntWritable, hadoop.io.Text etc.)
+     Object getPrimitiveWritableObject(Object o);
+     // ObjectInspector only inspect the `writable` always return true, we need to check it
+     // before invoking the methods above.
+     boolean preferWritable();
+     ...
+   }
+  }}}
+
+ * 2) Complex Types:
+ *   ListObjectInspector: inspects java array or [[java.util.List]]
+ *   MapObjectInspector: inspects [[java.util.Map]]
+ *   Struct.StructObjectInspector: inspects java array, [[java.util.List]] and
+ *                                 even a normal java object (POJO)
+ *   UnionObjectInspector: (tag: Int, object data) (TODO: not supported by SparkSQL yet)
+ *
+ * 3) ConstantObjectInspector: 
+ * Constant object inspector can be either primitive type or Complex type, and it bundles a
+ * constant value as its property, usually the value is created when the constant object inspector
+ * constructed.
+ * {{{
+   public interface ConstantObjectInspector extends ObjectInspector {
+      Object getWritableConstantValue();
+      ...
+    }
+  }}}
+ * Hive provides 3 built-in constant object inspectors:
+ * Primitive Object Inspectors: 
+ *     WritableConstantStringObjectInspector
+ *     WritableConstantHiveVarcharObjectInspector
+ *     WritableConstantHiveDecimalObjectInspector
+ *     WritableConstantTimestampObjectInspector
+ *     WritableConstantIntObjectInspector
+ *     WritableConstantDoubleObjectInspector
+ *     WritableConstantBooleanObjectInspector
+ *     WritableConstantLongObjectInspector
+ *     WritableConstantFloatObjectInspector
+ *     WritableConstantShortObjectInspector
+ *     WritableConstantByteObjectInspector
+ *     WritableConstantBinaryObjectInspector
+ *     WritableConstantDateObjectInspector
+ * Map Object Inspector: 
+ *     StandardConstantMapObjectInspector
+ * List Object Inspector: 
+ *     StandardConstantListObjectInspector]]
+ * Struct Object Inspector: Hive doesn't provide the built-in constant object inspector for Struct
+ * Union Object Inspector: Hive doesn't provide the built-in constant object inspector for Union
+ *
+ *
+ * 3. This trait facilitates:
+ *    Data Unwrapping: Hive Data => Catalyst Data (unwrap)
+ *    Data Wrapping: Catalyst Data => Hive Data (wrap)
+ *    Binding the Object Inspector for Catalyst Data (toInspector)
+ *    Retrieving the Catalyst Data Type from Object Inspector (inspectorToDataType)
+ *
+ *
+ * 4. Future Improvement (TODO)
+ *   This implementation is quite ugly and inefficient:
+ *     a. Pattern matching in runtime
+ *     b. Small objects creation in catalyst data => writable
+ *     c. Unnecessary unwrap / wrap for nested UDF invoking:
+ *       e.g. date_add(printf("%s-%s-%s", a,b,c), 3)
+ *       We don't need to unwrap the data for printf and wrap it again and passes in data_add
+ */
 private[hive] trait HiveInspectors {
 
   def javaClassToDataType(clz: Class[_]): DataType = clz match {
     // writable
     case c: Class[_] if c == classOf[hadoopIo.DoubleWritable] => DoubleType
     case c: Class[_] if c == classOf[hiveIo.DoubleWritable] => DoubleType
-    case c: Class[_] if c == classOf[hiveIo.HiveDecimalWritable] => DecimalType
+    case c: Class[_] if c == classOf[hiveIo.HiveDecimalWritable] => DecimalType.Unlimited
     case c: Class[_] if c == classOf[hiveIo.ByteWritable] => ByteType
     case c: Class[_] if c == classOf[hiveIo.ShortWritable] => ShortType
     case c: Class[_] if c == classOf[hiveIo.DateWritable] => DateType
@@ -52,8 +191,8 @@ private[hive] trait HiveInspectors {
     case c: Class[_] if c == classOf[java.lang.String] => StringType
     case c: Class[_] if c == classOf[java.sql.Date] => DateType
     case c: Class[_] if c == classOf[java.sql.Timestamp] => TimestampType
-    case c: Class[_] if c == classOf[HiveDecimal] => DecimalType
-    case c: Class[_] if c == classOf[java.math.BigDecimal] => DecimalType
+    case c: Class[_] if c == classOf[HiveDecimal] => DecimalType.Unlimited
+    case c: Class[_] if c == classOf[java.math.BigDecimal] => DecimalType.Unlimited
     case c: Class[_] if c == classOf[Array[Byte]] => BinaryType
     case c: Class[_] if c == classOf[java.lang.Short] => ShortType
     case c: Class[_] if c == classOf[java.lang.Integer] => IntegerType
@@ -78,90 +217,297 @@ private[hive] trait HiveInspectors {
     case c: Class[_] if c == classOf[java.lang.Object] => NullType
   }
 
-  /** Converts hive types to native catalyst types. */
-  def unwrap(a: Any): Any = a match {
-    case null => null
-    case i: hadoopIo.IntWritable => i.get
-    case t: hadoopIo.Text => t.toString
-    case l: hadoopIo.LongWritable => l.get
-    case d: hadoopIo.DoubleWritable => d.get
-    case d: hiveIo.DoubleWritable => d.get
-    case s: hiveIo.ShortWritable => s.get
-    case b: hadoopIo.BooleanWritable => b.get
-    case b: hiveIo.ByteWritable => b.get
-    case b: hadoopIo.FloatWritable => b.get
-    case b: hadoopIo.BytesWritable => {
-      val bytes = new Array[Byte](b.getLength)
-      System.arraycopy(b.getBytes(), 0, bytes, 0, b.getLength)
-      bytes
+  /**
+   * Converts hive types to native catalyst types.
+   * @param data the data in Hive type
+   * @param oi   the ObjectInspector associated with the Hive Type
+   * @return     convert the data into catalyst type
+   * TODO return the function of (data => Any) instead for performance consideration
+   *
+   * Strictly follows the following order in unwrapping (constant OI has the higher priority):
+   *  Constant Null object inspector =>
+   *    return null
+   *  Constant object inspector =>
+   *    extract the value from constant object inspector
+   *  Check whether the `data` is null =>
+   *    return null if true
+   *  If object inspector prefers writable =>
+   *    extract writable from `data` and then get the catalyst type from the writable
+   *  Extract the java object directly from the object inspector
+   *
+   *  NOTICE: the complex data type requires recursive unwrapping.
+   */
+  def unwrap(data: Any, oi: ObjectInspector): Any = oi match {
+    case coi: ConstantObjectInspector if coi.getWritableConstantValue == null => null
+    case poi: WritableConstantStringObjectInspector => poi.getWritableConstantValue.toString
+    case poi: WritableConstantHiveVarcharObjectInspector =>
+      poi.getWritableConstantValue.getHiveVarchar.getValue
+    case poi: WritableConstantHiveDecimalObjectInspector =>
+      HiveShim.toCatalystDecimal(
+        PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector,
+        poi.getWritableConstantValue.getHiveDecimal)
+    case poi: WritableConstantTimestampObjectInspector =>
+      poi.getWritableConstantValue.getTimestamp.clone()
+    case poi: WritableConstantIntObjectInspector => 
+      poi.getWritableConstantValue.get()
+    case poi: WritableConstantDoubleObjectInspector => 
+      poi.getWritableConstantValue.get()
+    case poi: WritableConstantBooleanObjectInspector =>
+      poi.getWritableConstantValue.get()
+    case poi: WritableConstantLongObjectInspector =>
+      poi.getWritableConstantValue.get()
+    case poi: WritableConstantFloatObjectInspector =>
+      poi.getWritableConstantValue.get()
+    case poi: WritableConstantShortObjectInspector =>
+      poi.getWritableConstantValue.get()
+    case poi: WritableConstantByteObjectInspector =>
+      poi.getWritableConstantValue.get()
+    case poi: WritableConstantBinaryObjectInspector =>
+      val writable = poi.getWritableConstantValue
+      val temp = new Array[Byte](writable.getLength)
+      System.arraycopy(writable.getBytes, 0, temp, 0, temp.length)
+      temp
+    case poi: WritableConstantDateObjectInspector =>
+      DateUtils.fromJavaDate(poi.getWritableConstantValue.get())
+    case mi: StandardConstantMapObjectInspector =>
+      // take the value from the map inspector object, rather than the input data
+      mi.getWritableConstantValue.map { case (k, v) =>
+        (unwrap(k, mi.getMapKeyObjectInspector),
+          unwrap(v, mi.getMapValueObjectInspector))
+      }.toMap
+    case li: StandardConstantListObjectInspector =>
+      // take the value from the list inspector object, rather than the input data
+      li.getWritableConstantValue.map(unwrap(_, li.getListElementObjectInspector)).toSeq
+    // if the value is null, we don't care about the object inspector type
+    case _ if data == null => null
+    case poi: VoidObjectInspector => null // always be null for void object inspector
+    case pi: PrimitiveObjectInspector => pi match {
+      // We think HiveVarchar is also a String
+      case hvoi: HiveVarcharObjectInspector if hvoi.preferWritable() =>
+        hvoi.getPrimitiveWritableObject(data).getHiveVarchar.getValue
+      case hvoi: HiveVarcharObjectInspector => hvoi.getPrimitiveJavaObject(data).getValue
+      case x: StringObjectInspector if x.preferWritable() =>
+        x.getPrimitiveWritableObject(data).toString
+      case x: IntObjectInspector if x.preferWritable() => x.get(data)
+      case x: BooleanObjectInspector if x.preferWritable() => x.get(data)
+      case x: FloatObjectInspector if x.preferWritable() => x.get(data)
+      case x: DoubleObjectInspector if x.preferWritable() => x.get(data)
+      case x: LongObjectInspector if x.preferWritable() => x.get(data)
+      case x: ShortObjectInspector if x.preferWritable() => x.get(data)
+      case x: ByteObjectInspector if x.preferWritable() => x.get(data)
+      case x: HiveDecimalObjectInspector => HiveShim.toCatalystDecimal(x, data)
+      case x: BinaryObjectInspector if x.preferWritable() =>
+        // BytesWritable.copyBytes() only available since Hadoop2
+        // In order to keep backward-compatible, we have to copy the
+        // bytes with old apis
+        val bw = x.getPrimitiveWritableObject(data)
+        val result = new Array[Byte](bw.getLength()) 
+        System.arraycopy(bw.getBytes(), 0, result, 0, bw.getLength())
+        result
+      case x: DateObjectInspector if x.preferWritable() =>
+        DateUtils.fromJavaDate(x.getPrimitiveWritableObject(data).get())
+      case x: DateObjectInspector => DateUtils.fromJavaDate(x.getPrimitiveJavaObject(data))
+      // org.apache.hadoop.hive.serde2.io.TimestampWritable.set will reset current time object
+      // if next timestamp is null, so Timestamp object is cloned
+      case x: TimestampObjectInspector if x.preferWritable() =>
+        x.getPrimitiveWritableObject(data).getTimestamp.clone()
+      case ti: TimestampObjectInspector => ti.getPrimitiveJavaObject(data).clone()
+      case _ => pi.getPrimitiveJavaObject(data)
     }
-    case d: hiveIo.DateWritable => d.get
-    case t: hiveIo.TimestampWritable => t.getTimestamp
-    case b: hiveIo.HiveDecimalWritable => BigDecimal(b.getHiveDecimal().bigDecimalValue())
-    case list: java.util.List[_] => list.map(unwrap)
-    case map: java.util.Map[_,_] => map.map { case (k, v) => (unwrap(k), unwrap(v)) }.toMap
-    case array: Array[_] => array.map(unwrap).toSeq
-    case p: java.lang.Short => p
-    case p: java.lang.Long => p
-    case p: java.lang.Float => p
-    case p: java.lang.Integer => p
-    case p: java.lang.Double => p
-    case p: java.lang.Byte => p
-    case p: java.lang.Boolean => p
-    case str: String => str
-    case p: java.math.BigDecimal => p
-    case p: Array[Byte] => p
-    case p: java.sql.Date => p
-    case p: java.sql.Timestamp => p
-  }
-
-  def unwrapData(data: Any, oi: ObjectInspector): Any = oi match {
-    case hvoi: HiveVarcharObjectInspector =>
-      if (data == null) null else hvoi.getPrimitiveJavaObject(data).getValue
-    case hdoi: HiveDecimalObjectInspector =>
-      if (data == null) null else BigDecimal(hdoi.getPrimitiveJavaObject(data).bigDecimalValue())
-    case pi: PrimitiveObjectInspector => pi.getPrimitiveJavaObject(data)
     case li: ListObjectInspector =>
       Option(li.getList(data))
-        .map(_.map(unwrapData(_, li.getListElementObjectInspector)).toSeq)
+        .map(_.map(unwrap(_, li.getListElementObjectInspector)).toSeq)
         .orNull
     case mi: MapObjectInspector =>
       Option(mi.getMap(data)).map(
         _.map {
-          case (k,v) =>
-            (unwrapData(k, mi.getMapKeyObjectInspector),
-              unwrapData(v, mi.getMapValueObjectInspector))
+          case (k, v) =>
+            (unwrap(k, mi.getMapKeyObjectInspector),
+              unwrap(v, mi.getMapValueObjectInspector))
         }.toMap).orNull
+    // currently, hive doesn't provide the ConstantStructObjectInspector
     case si: StructObjectInspector =>
       val allRefs = si.getAllStructFieldRefs
       new GenericRow(
         allRefs.map(r =>
-          unwrapData(si.getStructFieldData(data,r), r.getFieldObjectInspector)).toArray)
+          unwrap(si.getStructFieldData(data,r), r.getFieldObjectInspector)).toArray)
   }
 
-  /** Converts native catalyst types to the types expected by Hive */
-  def wrap(a: Any): AnyRef = a match {
-    case s: String => s: java.lang.String
-    case i: Int => i: java.lang.Integer
-    case b: Boolean => b: java.lang.Boolean
-    case f: Float => f: java.lang.Float
-    case d: Double => d: java.lang.Double
-    case l: Long => l: java.lang.Long
-    case l: Short => l: java.lang.Short
-    case l: Byte => l: java.lang.Byte
-    case b: BigDecimal => new HiveDecimal(b.underlying())
-    case b: Array[Byte] => b
-    case d: java.sql.Date => d
-    case t: java.sql.Timestamp => t
-    case s: Seq[_] => seqAsJavaList(s.map(wrap))
-    case m: Map[_,_] =>
+
+  /**
+   * Wraps with Hive types based on object inspector.
+   * TODO: Consolidate all hive OI/data interface code.
+   */
+  protected def wrapperFor(oi: ObjectInspector): Any => Any = oi match {
+    case _: JavaHiveVarcharObjectInspector =>
+      (o: Any) => new HiveVarchar(o.asInstanceOf[String], o.asInstanceOf[String].size)
+
+    case _: JavaHiveDecimalObjectInspector =>
+      (o: Any) => HiveShim.createDecimal(o.asInstanceOf[Decimal].toJavaBigDecimal)
+
+    case _: JavaDateObjectInspector =>
+      (o: Any) => DateUtils.toJavaDate(o.asInstanceOf[Int])
+
+    case soi: StandardStructObjectInspector =>
+      val wrappers = soi.getAllStructFieldRefs.map(ref => wrapperFor(ref.getFieldObjectInspector))
+      (o: Any) => {
+        if (o != null) {
+          val struct = soi.create()
+          (soi.getAllStructFieldRefs, wrappers, o.asInstanceOf[Row].toSeq).zipped.foreach {
+            (field, wrapper, data) => soi.setStructFieldData(struct, field, wrapper(data))
+          }
+          struct
+        } else {
+          null
+        }
+      }
+
+    case loi: ListObjectInspector =>
+      val wrapper = wrapperFor(loi.getListElementObjectInspector)
+      (o: Any) => if (o != null) seqAsJavaList(o.asInstanceOf[Seq[_]].map(wrapper)) else null
+
+    case moi: MapObjectInspector =>
+      // The Predef.Map is scala.collection.immutable.Map.
+      // Since the map values can be mutable, we explicitly import scala.collection.Map at here.
+      import scala.collection.Map
+
+      val keyWrapper = wrapperFor(moi.getMapKeyObjectInspector)
+      val valueWrapper = wrapperFor(moi.getMapValueObjectInspector)
+      (o: Any) => {
+        if (o != null) {
+          mapAsJavaMap(o.asInstanceOf[Map[_, _]].map { case (key, value) =>
+            keyWrapper(key) -> valueWrapper(value)
+          })
+        } else {
+          null
+        }
+      }
+
+    case _ =>
+      identity[Any]
+  }
+
+  /**
+   * Converts native catalyst types to the types expected by Hive
+   * @param a the value to be wrapped
+   * @param oi This ObjectInspector associated with the value returned by this function, and
+   *           the ObjectInspector should also be consistent with those returned from
+   *           toInspector: DataType => ObjectInspector and
+   *           toInspector: Expression => ObjectInspector
+   *
+   * Strictly follows the following order in wrapping (constant OI has the higher priority):
+   *   Constant object inspector => return the bundled value of Constant object inspector
+   *   Check whether the `a` is null => return null if true
+   *   If object inspector prefers writable object => return a Writable for the given data `a`
+   *   Map the catalyst data to the boxed java primitive
+   *
+   *  NOTICE: the complex data type requires recursive wrapping.
+   */
+  def wrap(a: Any, oi: ObjectInspector): AnyRef = oi match {
+    case x: ConstantObjectInspector => x.getWritableConstantValue
+    case _ if a == null => null
+    case x: PrimitiveObjectInspector => x match {
+      // TODO we don't support the HiveVarcharObjectInspector yet.
+      case _: StringObjectInspector if x.preferWritable() => HiveShim.getStringWritable(a)
+      case _: StringObjectInspector => a.asInstanceOf[java.lang.String]
+      case _: IntObjectInspector if x.preferWritable() => HiveShim.getIntWritable(a)
+      case _: IntObjectInspector => a.asInstanceOf[java.lang.Integer]
+      case _: BooleanObjectInspector if x.preferWritable() => HiveShim.getBooleanWritable(a)
+      case _: BooleanObjectInspector => a.asInstanceOf[java.lang.Boolean]
+      case _: FloatObjectInspector if x.preferWritable() => HiveShim.getFloatWritable(a)
+      case _: FloatObjectInspector => a.asInstanceOf[java.lang.Float]
+      case _: DoubleObjectInspector if x.preferWritable() => HiveShim.getDoubleWritable(a)
+      case _: DoubleObjectInspector => a.asInstanceOf[java.lang.Double]
+      case _: LongObjectInspector if x.preferWritable() => HiveShim.getLongWritable(a)
+      case _: LongObjectInspector => a.asInstanceOf[java.lang.Long]
+      case _: ShortObjectInspector if x.preferWritable() => HiveShim.getShortWritable(a)
+      case _: ShortObjectInspector => a.asInstanceOf[java.lang.Short]
+      case _: ByteObjectInspector if x.preferWritable() => HiveShim.getByteWritable(a)
+      case _: ByteObjectInspector => a.asInstanceOf[java.lang.Byte]
+      case _: HiveDecimalObjectInspector if x.preferWritable() =>
+        HiveShim.getDecimalWritable(a.asInstanceOf[Decimal])
+      case _: HiveDecimalObjectInspector =>
+        HiveShim.createDecimal(a.asInstanceOf[Decimal].toJavaBigDecimal)
+      case _: BinaryObjectInspector if x.preferWritable() => HiveShim.getBinaryWritable(a)
+      case _: BinaryObjectInspector => a.asInstanceOf[Array[Byte]]
+      case _: DateObjectInspector if x.preferWritable() => HiveShim.getDateWritable(a)
+      case _: DateObjectInspector => DateUtils.toJavaDate(a.asInstanceOf[Int])
+      case _: TimestampObjectInspector if x.preferWritable() => HiveShim.getTimestampWritable(a)
+      case _: TimestampObjectInspector => a.asInstanceOf[java.sql.Timestamp]
+    }
+    case x: SettableStructObjectInspector =>
+      val fieldRefs = x.getAllStructFieldRefs
+      val row = a.asInstanceOf[Row]
+      // 1. create the pojo (most likely) object
+      val result = x.create()
+      var i = 0
+      while (i < fieldRefs.length) {
+        // 2. set the property for the pojo
+        x.setStructFieldData(
+          result,
+          fieldRefs.get(i),
+          wrap(row(i), fieldRefs.get(i).getFieldObjectInspector))
+        i += 1
+      }
+
+      result
+    case x: StructObjectInspector =>
+      val fieldRefs = x.getAllStructFieldRefs
+      val row = a.asInstanceOf[Row]
+      val result = new java.util.ArrayList[AnyRef](fieldRefs.length)
+      var i = 0
+      while (i < fieldRefs.length) {
+        result.add(wrap(row(i), fieldRefs.get(i).getFieldObjectInspector))
+        i += 1
+      }
+
+      result
+    case x: ListObjectInspector =>
+      val list = new java.util.ArrayList[Object]
+      a.asInstanceOf[Seq[_]].foreach {
+        v => list.add(wrap(v, x.getListElementObjectInspector))
+      }
+      list
+    case x: MapObjectInspector =>
       // Some UDFs seem to assume we pass in a HashMap.
       val hashMap = new java.util.HashMap[AnyRef, AnyRef]()
-      hashMap.putAll(m.map { case (k, v) => wrap(k) -> wrap(v) })
+      hashMap.putAll(a.asInstanceOf[Map[_, _]].map {
+        case (k, v) =>
+          wrap(k, x.getMapKeyObjectInspector) -> wrap(v, x.getMapValueObjectInspector)
+      })
+
       hashMap
-    case null => null
   }
 
+  def wrap(
+      row: Row,
+      inspectors: Seq[ObjectInspector],
+      cache: Array[AnyRef]): Array[AnyRef] = {
+    var i = 0
+    while (i < inspectors.length) {
+      cache(i) = wrap(row(i), inspectors(i))
+      i += 1
+    }
+    cache
+  }
+
+  def wrap(
+    row: Seq[Any],
+    inspectors: Seq[ObjectInspector],
+    cache: Array[AnyRef]): Array[AnyRef] = {
+    var i = 0
+    while (i < inspectors.length) {
+      cache(i) = wrap(row(i), inspectors(i))
+      i += 1
+    }
+    cache
+  }
+
+  /**
+   * @param dataType Catalyst data type
+   * @return Hive java object inspector (recursively), not the Writable ObjectInspector
+   * We can easily map to the Hive built-in object inspector according to the data type.
+   */
   def toInspector(dataType: DataType): ObjectInspector = dataType match {
     case ArrayType(tpe, _) =>
       ObjectInspectorFactory.getStandardListObjectInspector(toInspector(tpe))
@@ -180,10 +526,76 @@ private[hive] trait HiveInspectors {
     case BinaryType => PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector
     case DateType => PrimitiveObjectInspectorFactory.javaDateObjectInspector
     case TimestampType => PrimitiveObjectInspectorFactory.javaTimestampObjectInspector
-    case DecimalType => PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector
+    // TODO decimal precision?
+    case DecimalType() => PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector
     case StructType(fields) =>
       ObjectInspectorFactory.getStandardStructObjectInspector(
-        fields.map(f => f.name), fields.map(f => toInspector(f.dataType)))
+        java.util.Arrays.asList(fields.map(f => f.name) :_*),
+        java.util.Arrays.asList(fields.map(f => toInspector(f.dataType)) :_*))
+  }
+
+  /**
+   * Map the catalyst expression to ObjectInspector, however,
+   * if the expression is [[Literal]] or foldable, a constant writable object inspector returns;
+   * Otherwise, we always get the object inspector according to its data type(in catalyst)
+   * @param expr Catalyst expression to be mapped
+   * @return Hive java objectinspector (recursively).
+   */
+  def toInspector(expr: Expression): ObjectInspector = expr match {
+    case Literal(value, StringType) =>
+      HiveShim.getStringWritableConstantObjectInspector(value)
+    case Literal(value, IntegerType) =>
+      HiveShim.getIntWritableConstantObjectInspector(value)
+    case Literal(value, DoubleType) =>
+      HiveShim.getDoubleWritableConstantObjectInspector(value)
+    case Literal(value, BooleanType) =>
+      HiveShim.getBooleanWritableConstantObjectInspector(value)
+    case Literal(value, LongType) =>
+      HiveShim.getLongWritableConstantObjectInspector(value)
+    case Literal(value, FloatType) =>
+      HiveShim.getFloatWritableConstantObjectInspector(value)
+    case Literal(value, ShortType) =>
+      HiveShim.getShortWritableConstantObjectInspector(value)
+    case Literal(value, ByteType) =>
+      HiveShim.getByteWritableConstantObjectInspector(value)
+    case Literal(value, BinaryType) =>
+      HiveShim.getBinaryWritableConstantObjectInspector(value)
+    case Literal(value, DateType) =>
+      HiveShim.getDateWritableConstantObjectInspector(value)
+    case Literal(value, TimestampType) =>
+      HiveShim.getTimestampWritableConstantObjectInspector(value)
+    case Literal(value, DecimalType()) =>
+      HiveShim.getDecimalWritableConstantObjectInspector(value)
+    case Literal(_, NullType) =>
+      HiveShim.getPrimitiveNullWritableConstantObjectInspector
+    case Literal(value, ArrayType(dt, _)) =>
+      val listObjectInspector = toInspector(dt)
+      if (value == null) {
+        ObjectInspectorFactory.getStandardConstantListObjectInspector(listObjectInspector, null)
+      } else {
+        val list = new java.util.ArrayList[Object]()
+        value.asInstanceOf[Seq[_]].foreach(v => list.add(wrap(v, listObjectInspector)))
+        ObjectInspectorFactory.getStandardConstantListObjectInspector(listObjectInspector, list)
+      }
+    case Literal(value, MapType(keyType, valueType, _)) =>
+      val keyOI = toInspector(keyType)
+      val valueOI = toInspector(valueType)
+      if (value == null) {
+        ObjectInspectorFactory.getStandardConstantMapObjectInspector(keyOI, valueOI, null)
+      } else {
+        val map = new java.util.HashMap[Object, Object]()
+        value.asInstanceOf[Map[_, _]].foreach (entry => {
+          map.put(wrap(entry._1, keyOI), wrap(entry._2, valueOI))
+        })
+        ObjectInspectorFactory.getStandardConstantMapObjectInspector(keyOI, valueOI, map)
+      }
+    // We will enumerate all of the possible constant expressions, throw exception if we missed
+    case Literal(_, dt) => sys.error(s"Hive doesn't support the constant type [$dt].")
+    // ideally, we don't test the foldable here(but in optimizer), however, some of the
+    // Hive UDF / UDAF requires its argument to be constant objectinspector, we do it eagerly.
+    case _ if expr.foldable => toInspector(Literal(expr.eval(), expr.dataType))
+    // For those non constant expression, map to object inspector according to its data type
+    case _ => toInspector(expr.dataType)
   }
 
   def inspectorToDataType(inspector: ObjectInspector): DataType = inspector match {
@@ -215,8 +627,8 @@ private[hive] trait HiveInspectors {
     case _: JavaFloatObjectInspector => FloatType
     case _: WritableBinaryObjectInspector => BinaryType
     case _: JavaBinaryObjectInspector => BinaryType
-    case _: WritableHiveDecimalObjectInspector => DecimalType
-    case _: JavaHiveDecimalObjectInspector => DecimalType
+    case w: WritableHiveDecimalObjectInspector => HiveShim.decimalTypeInfoToCatalyst(w)
+    case j: JavaHiveDecimalObjectInspector => HiveShim.decimalTypeInfoToCatalyst(j)
     case _: WritableDateObjectInspector => DateType
     case _: JavaDateObjectInspector => DateType
     case _: WritableTimestampObjectInspector => TimestampType
@@ -233,7 +645,9 @@ private[hive] trait HiveInspectors {
       case ArrayType(elemType, _) =>
         getListTypeInfo(elemType.toTypeInfo)
       case StructType(fields) =>
-        getStructTypeInfo(fields.map(_.name), fields.map(_.dataType.toTypeInfo))
+        getStructTypeInfo(
+          java.util.Arrays.asList(fields.map(_.name) :_*),
+          java.util.Arrays.asList(fields.map(_.dataType.toTypeInfo) :_*))
       case MapType(keyType, valueType, _) =>
         getMapTypeInfo(keyType.toTypeInfo, valueType.toTypeInfo)
       case BinaryType => binaryTypeInfo
@@ -245,7 +659,7 @@ private[hive] trait HiveInspectors {
       case LongType => longTypeInfo
       case ShortType => shortTypeInfo
       case StringType => stringTypeInfo
-      case DecimalType => decimalTypeInfo
+      case d: DecimalType => HiveShim.decimalTypeInfo(d)
       case DateType => dateTypeInfo
       case TimestampType => timestampTypeInfo
       case NullType => voidTypeInfo
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 75a19656af110..243310686d08a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -17,23 +17,29 @@
 
 package org.apache.spark.sql.hive
 
-import scala.util.parsing.combinator.RegexParsers
+import java.io.IOException
+import java.util.{List => JList}
 
-import org.apache.hadoop.hive.metastore.api.{FieldSchema, SerDeInfo, StorageDescriptor, Partition => TPartition, Table => TTable}
-import org.apache.hadoop.hive.ql.metadata.{Hive, Partition, Table}
-import org.apache.hadoop.hive.ql.plan.TableDesc
-import org.apache.hadoop.hive.ql.stats.StatsSetupConst
-import org.apache.hadoop.hive.serde2.Deserializer
+import com.google.common.cache.{LoadingCache, CacheLoader, CacheBuilder}
+
+import org.apache.hadoop.util.ReflectionUtils
+import org.apache.hadoop.hive.metastore.{Warehouse, TableType}
+import org.apache.hadoop.hive.metastore.api.{Table => TTable, Partition => TPartition, FieldSchema}
+import org.apache.hadoop.hive.ql.metadata._
+import org.apache.hadoop.hive.ql.plan.CreateTableDesc
+import org.apache.hadoop.hive.serde.serdeConstants
+import org.apache.hadoop.hive.serde2.{Deserializer, SerDeException}
+import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 
 import org.apache.spark.Logging
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.catalyst.analysis.Catalog
+import org.apache.spark.sql.catalyst.analysis.{Catalog, OverrideCatalog}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.sources.{DDLParser, LogicalRelation, ResolvedDataSource}
+import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
 /* Implicit conversions */
@@ -45,77 +51,374 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
   /** Connection to hive metastore.  Usages should lock on `this`. */
   protected[hive] val client = Hive.get(hive.hiveconf)
 
+  protected[hive] lazy val hiveWarehouse = new Warehouse(hive.hiveconf)
+
+  // TODO: Use this everywhere instead of tuples or databaseName, tableName,.
+  /** A fully qualified identifier for a table (i.e., database.tableName) */
+  case class QualifiedTableName(database: String, name: String) {
+    def toLowerCase = QualifiedTableName(database.toLowerCase, name.toLowerCase)
+  }
+
+  /** A cache of Spark SQL data source tables that have been accessed. */
+  protected[hive] val cachedDataSourceTables: LoadingCache[QualifiedTableName, LogicalPlan] = {
+    val cacheLoader = new CacheLoader[QualifiedTableName, LogicalPlan]() {
+      override def load(in: QualifiedTableName): LogicalPlan = {
+        logDebug(s"Creating new cached data source for $in")
+        val table = client.getTable(in.database, in.name)
+        val schemaString = table.getProperty("spark.sql.sources.schema")
+        val userSpecifiedSchema =
+          if (schemaString == null) {
+            None
+          } else {
+            Some(DataType.fromJson(schemaString).asInstanceOf[StructType])
+          }
+        // It does not appear that the ql client for the metastore has a way to enumerate all the
+        // SerDe properties directly...
+        val options = table.getTTable.getSd.getSerdeInfo.getParameters.toMap
+
+        val resolvedRelation =
+          ResolvedDataSource(
+            hive,
+            userSpecifiedSchema,
+            table.getProperty("spark.sql.sources.provider"),
+            options)
+
+        LogicalRelation(resolvedRelation.relation)
+      }
+    }
+
+    CacheBuilder.newBuilder().maximumSize(1000).build(cacheLoader)
+  }
+
+  def refreshTable(databaseName: String, tableName: String): Unit = {
+    cachedDataSourceTables.refresh(QualifiedTableName(databaseName, tableName).toLowerCase)
+  }
+
+  def invalidateTable(databaseName: String, tableName: String): Unit = {
+    cachedDataSourceTables.invalidate(QualifiedTableName(databaseName, tableName).toLowerCase)
+  }
+
   val caseSensitive: Boolean = false
 
-  def lookupRelation(
-      db: Option[String],
+  /** *
+    * Creates a data source table (a table created with USING clause) in Hive's metastore.
+    * Returns true when the table has been created. Otherwise, false.
+    * @param tableName
+    * @param userSpecifiedSchema
+    * @param provider
+    * @param options
+    * @param isExternal
+    * @return
+    */
+  def createDataSourceTable(
       tableName: String,
+      userSpecifiedSchema: Option[StructType],
+      provider: String,
+      options: Map[String, String],
+      isExternal: Boolean): Unit = {
+    val (dbName, tblName) = processDatabaseAndTableName("default", tableName)
+    val tbl = new Table(dbName, tblName)
+
+    tbl.setProperty("spark.sql.sources.provider", provider)
+    if (userSpecifiedSchema.isDefined) {
+      tbl.setProperty("spark.sql.sources.schema", userSpecifiedSchema.get.json)
+    }
+    options.foreach { case (key, value) => tbl.setSerdeParam(key, value) }
+
+    if (isExternal) {
+      tbl.setProperty("EXTERNAL", "TRUE")
+      tbl.setTableType(TableType.EXTERNAL_TABLE)
+    } else {
+      tbl.setProperty("EXTERNAL", "FALSE")
+      tbl.setTableType(TableType.MANAGED_TABLE)
+    }
+
+    // create the table
+    synchronized {
+      client.createTable(tbl, false)
+    }
+  }
+
+  def hiveDefaultTableFilePath(tableName: String): String = {
+    val currentDatabase = client.getDatabase(hive.sessionState.getCurrentDatabase())
+    hiveWarehouse.getTablePath(currentDatabase, tableName).toString
+  }
+
+  def tableExists(tableIdentifier: Seq[String]): Boolean = {
+    val tableIdent = processTableIdentifier(tableIdentifier)
+    val databaseName = tableIdent.lift(tableIdent.size - 2).getOrElse(
+      hive.sessionState.getCurrentDatabase)
+    val tblName = tableIdent.last
+    client.getTable(databaseName, tblName, false) != null
+  }
+
+  def lookupRelation(
+      tableIdentifier: Seq[String],
       alias: Option[String]): LogicalPlan = synchronized {
-    val (databaseName, tblName) = processDatabaseAndTableName(
-                                    db.getOrElse(hive.sessionState.getCurrentDatabase), tableName)
+    val tableIdent = processTableIdentifier(tableIdentifier)
+    val databaseName = tableIdent.lift(tableIdent.size - 2).getOrElse(
+      hive.sessionState.getCurrentDatabase)
+    val tblName = tableIdent.last
     val table = client.getTable(databaseName, tblName)
-    val partitions: Seq[Partition] =
-      if (table.isPartitioned) {
-        client.getAllPartitionsForPruner(table).toSeq
-      } else {
-        Nil
-      }
 
-    // Since HiveQL is case insensitive for table names we make them all lowercase.
-    MetastoreRelation(
-      databaseName, tblName, alias)(
-      table.getTTable, partitions.map(part => part.getTPartition))(hive)
+    if (table.getProperty("spark.sql.sources.provider") != null) {
+      cachedDataSourceTables(QualifiedTableName(databaseName, tblName).toLowerCase)
+    } else if (table.isView) {
+      // if the unresolved relation is from hive view
+      // parse the text into logic node.
+      HiveQl.createPlanForView(table, alias)
+    } else {
+      val partitions: Seq[Partition] =
+        if (table.isPartitioned) {
+          HiveShim.getAllPartitionsOf(client, table).toSeq
+        } else {
+          Nil
+        }
+
+      // Since HiveQL is case insensitive for table names we make them all lowercase.
+      MetastoreRelation(
+        databaseName, tblName, alias)(
+          table.getTTable, partitions.map(part => part.getTPartition))(hive)
+    }
   }
 
+  /**
+   * Create table with specified database, table name, table description and schema
+   * @param databaseName Database Name
+   * @param tableName Table Name
+   * @param schema Schema of the new table, if not specified, will use the schema
+   *               specified in crtTbl
+   * @param allowExisting if true, ignore AlreadyExistsException
+   * @param desc CreateTableDesc object which contains the SerDe info. Currently
+   *               we support most of the features except the bucket.
+   */
   def createTable(
       databaseName: String,
       tableName: String,
       schema: Seq[Attribute],
-      allowExisting: Boolean = false): Unit = {
+      allowExisting: Boolean = false,
+      desc: Option[CreateTableDesc] = None) {
+    val hconf = hive.hiveconf
+
     val (dbName, tblName) = processDatabaseAndTableName(databaseName, tableName)
-    val table = new Table(dbName, tblName)
-    val hiveSchema =
+    val tbl = new Table(dbName, tblName)
+
+    val crtTbl: CreateTableDesc = desc.getOrElse(null)
+
+    // We should respect the passed in schema, unless it's not set
+    val hiveSchema: JList[FieldSchema] = if (schema == null || schema.isEmpty) {
+      crtTbl.getCols
+    } else {
       schema.map(attr => new FieldSchema(attr.name, toMetastoreType(attr.dataType), ""))
-    table.setFields(hiveSchema)
-
-    val sd = new StorageDescriptor()
-    table.getTTable.setSd(sd)
-    sd.setCols(hiveSchema)
-
-    // TODO: THESE ARE ALL DEFAULTS, WE NEED TO PARSE / UNDERSTAND the output specs.
-    sd.setCompressed(false)
-    sd.setParameters(Map[String, String]())
-    sd.setInputFormat("org.apache.hadoop.mapred.TextInputFormat")
-    sd.setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")
-    val serDeInfo = new SerDeInfo()
-    serDeInfo.setName(tblName)
-    serDeInfo.setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")
-    serDeInfo.setParameters(Map[String, String]())
-    sd.setSerdeInfo(serDeInfo)
+    }
+    tbl.setFields(hiveSchema)
+
+    // Most of code are similar with the DDLTask.createTable() of Hive,
+    if (crtTbl != null && crtTbl.getTblProps() != null) {
+      tbl.getTTable().getParameters().putAll(crtTbl.getTblProps())
+    }
+
+    if (crtTbl != null && crtTbl.getPartCols() != null) {
+      tbl.setPartCols(crtTbl.getPartCols())
+    }
+
+    if (crtTbl != null && crtTbl.getStorageHandler() != null) {
+      tbl.setProperty(
+        org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE,
+        crtTbl.getStorageHandler())
+    }
+
+    /*
+     * We use LazySimpleSerDe by default.
+     *
+     * If the user didn't specify a SerDe, and any of the columns are not simple
+     * types, we will have to use DynamicSerDe instead.
+     */
+    if (crtTbl == null || crtTbl.getSerName() == null) {
+      val storageHandler = tbl.getStorageHandler()
+      if (storageHandler == null) {
+        logInfo(s"Default to LazySimpleSerDe for table $dbName.$tblName")
+        tbl.setSerializationLib(classOf[LazySimpleSerDe].getName())
+
+        import org.apache.hadoop.mapred.TextInputFormat
+        import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+        import org.apache.hadoop.io.Text
+
+        tbl.setInputFormatClass(classOf[TextInputFormat])
+        tbl.setOutputFormatClass(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]])
+        tbl.setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")
+      } else {
+        val serDeClassName = storageHandler.getSerDeClass().getName()
+        logInfo(s"Use StorageHandler-supplied $serDeClassName for table $dbName.$tblName")
+        tbl.setSerializationLib(serDeClassName)
+      }
+    } else {
+      // let's validate that the serde exists
+      val serdeName = crtTbl.getSerName()
+      try {
+        val d = ReflectionUtils.newInstance(hconf.getClassByName(serdeName), hconf)
+        if (d != null) {
+          logDebug("Found class for $serdeName")
+        }
+      } catch {
+        case e: SerDeException => throw new HiveException("Cannot validate serde: " + serdeName, e)
+      }
+      tbl.setSerializationLib(serdeName)
+    }
+
+    if (crtTbl != null && crtTbl.getFieldDelim() != null) {
+      tbl.setSerdeParam(serdeConstants.FIELD_DELIM, crtTbl.getFieldDelim())
+      tbl.setSerdeParam(serdeConstants.SERIALIZATION_FORMAT, crtTbl.getFieldDelim())
+    }
+    if (crtTbl != null && crtTbl.getFieldEscape() != null) {
+      tbl.setSerdeParam(serdeConstants.ESCAPE_CHAR, crtTbl.getFieldEscape())
+    }
+
+    if (crtTbl != null && crtTbl.getCollItemDelim() != null) {
+      tbl.setSerdeParam(serdeConstants.COLLECTION_DELIM, crtTbl.getCollItemDelim())
+    }
+    if (crtTbl != null && crtTbl.getMapKeyDelim() != null) {
+      tbl.setSerdeParam(serdeConstants.MAPKEY_DELIM, crtTbl.getMapKeyDelim())
+    }
+    if (crtTbl != null && crtTbl.getLineDelim() != null) {
+      tbl.setSerdeParam(serdeConstants.LINE_DELIM, crtTbl.getLineDelim())
+    }
+
+    if (crtTbl != null && crtTbl.getSerdeProps() != null) {
+      val iter = crtTbl.getSerdeProps().entrySet().iterator()
+      while (iter.hasNext()) {
+        val m = iter.next()
+        tbl.setSerdeParam(m.getKey(), m.getValue())
+      }
+    }
+
+    if (crtTbl != null && crtTbl.getComment() != null) {
+      tbl.setProperty("comment", crtTbl.getComment())
+    }
+
+    if (crtTbl != null && crtTbl.getLocation() != null) {
+      HiveShim.setLocation(tbl, crtTbl)
+    }
+
+    if (crtTbl != null && crtTbl.getSkewedColNames() != null) {
+      tbl.setSkewedColNames(crtTbl.getSkewedColNames())
+    }
+    if (crtTbl != null && crtTbl.getSkewedColValues() != null) {
+      tbl.setSkewedColValues(crtTbl.getSkewedColValues())
+    }
+
+    if (crtTbl != null) {
+      tbl.setStoredAsSubDirectories(crtTbl.isStoredAsSubDirectories())
+      tbl.setInputFormatClass(crtTbl.getInputFormat())
+      tbl.setOutputFormatClass(crtTbl.getOutputFormat())
+    }
+
+    tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName())
+    tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName())
+
+    if (crtTbl != null && crtTbl.isExternal()) {
+      tbl.setProperty("EXTERNAL", "TRUE")
+      tbl.setTableType(TableType.EXTERNAL_TABLE)
+    }
+
+    // set owner
+    try {
+      tbl.setOwner(hive.hiveconf.getUser)
+    } catch {
+      case e: IOException => throw new HiveException("Unable to get current user", e)
+    }
+
+    // set create time
+    tbl.setCreateTime((System.currentTimeMillis() / 1000).asInstanceOf[Int])
 
+    // TODO add bucket support
+    // TODO set more info if Hive upgrade
+
+    // create the table
     synchronized {
-      try client.createTable(table) catch {
-        case e: org.apache.hadoop.hive.ql.metadata.HiveException
-          if e.getCause.isInstanceOf[org.apache.hadoop.hive.metastore.api.AlreadyExistsException] &&
-             allowExisting => // Do nothing.
+      try client.createTable(tbl, allowExisting) catch {
+        case e: org.apache.hadoop.hive.metastore.api.AlreadyExistsException
+          if allowExisting => // Do nothing
+        case e: Throwable => throw e
       }
     }
   }
 
+  protected def processDatabaseAndTableName(
+      databaseName: Option[String],
+      tableName: String): (Option[String], String) = {
+    if (!caseSensitive) {
+      (databaseName.map(_.toLowerCase), tableName.toLowerCase)
+    } else {
+      (databaseName, tableName)
+    }
+  }
+
+  protected def processDatabaseAndTableName(
+      databaseName: String,
+      tableName: String): (String, String) = {
+    if (!caseSensitive) {
+      (databaseName.toLowerCase, tableName.toLowerCase)
+    } else {
+      (databaseName, tableName)
+    }
+  }
+
   /**
    * Creates any tables required for query execution.
    * For example, because of a CREATE TABLE X AS statement.
    */
   object CreateTables extends Rule[LogicalPlan] {
+    import org.apache.hadoop.hive.ql.Context
+    import org.apache.hadoop.hive.ql.parse.{QB, ASTNode, SemanticAnalyzer}
+
     def apply(plan: LogicalPlan): LogicalPlan = plan transform {
       // Wait until children are resolved.
       case p: LogicalPlan if !p.childrenResolved => p
 
-      case CreateTableAsSelect(db, tableName, child) =>
+      // TODO extra is in type of ASTNode which means the logical plan is not resolved
+      // Need to think about how to implement the CreateTableAsSelect.resolved
+      case CreateTableAsSelect(db, tableName, child, allowExisting, Some(extra: ASTNode)) =>
         val (dbName, tblName) = processDatabaseAndTableName(db, tableName)
         val databaseName = dbName.getOrElse(hive.sessionState.getCurrentDatabase)
 
-        CreateTableAsSelect(Some(databaseName), tableName, child)
+        // Get the CreateTableDesc from Hive SemanticAnalyzer
+        val desc: Option[CreateTableDesc] = if (tableExists(Seq(databaseName, tblName))) {
+          None
+        } else {
+          val sa = new SemanticAnalyzer(hive.hiveconf) {
+            override def analyzeInternal(ast: ASTNode) {
+              // A hack to intercept the SemanticAnalyzer.analyzeInternal,
+              // to ignore the SELECT clause of the CTAS
+              val method = classOf[SemanticAnalyzer].getDeclaredMethod(
+                "analyzeCreateTable", classOf[ASTNode], classOf[QB])
+              method.setAccessible(true)
+              method.invoke(this, ast, this.getQB)
+            }
+          }
+
+          sa.analyze(extra, new Context(hive.hiveconf))
+          Some(sa.getQB().getTableDesc)
+        }
+
+        execution.CreateTableAsSelect(
+          databaseName,
+          tableName,
+          child,
+          allowExisting,
+          desc)
+
+      case p: LogicalPlan if p.resolved => p
+
+      case p @ CreateTableAsSelect(db, tableName, child, allowExisting, None) =>
+        val (dbName, tblName) = processDatabaseAndTableName(db, tableName)
+        val databaseName = dbName.getOrElse(hive.sessionState.getCurrentDatabase)
+        execution.CreateTableAsSelect(
+          databaseName,
+          tableName,
+          child,
+          allowExisting,
+          None)
     }
   }
 
@@ -139,6 +442,12 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
 
       if (childOutputDataTypes == tableOutputDataTypes) {
         p
+      } else if (childOutputDataTypes.size == tableOutputDataTypes.size &&
+        childOutputDataTypes.zip(tableOutputDataTypes)
+          .forall { case (left, right) => DataType.equalsIgnoreNullability(left, right) }) {
+        // If both types ignoring nullability of ArrayType, MapType, StructType are the same,
+        // use InsertIntoHiveTable instead of InsertIntoTable.
+        InsertIntoHiveTable(p.table, p.partition, p.child, p.overwrite)
       } else {
         // Only do the casting when child output data types differ from table output data types.
         val castedChildOutput = child.output.zip(table.output).map {
@@ -156,90 +465,35 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
    * UNIMPLEMENTED: It needs to be decided how we will persist in-memory tables to the metastore.
    * For now, if this functionality is desired mix in the in-memory [[OverrideCatalog]].
    */
-  override def registerTable(
-      databaseName: Option[String], tableName: String, plan: LogicalPlan): Unit = ???
+  override def registerTable(tableIdentifier: Seq[String], plan: LogicalPlan): Unit = ???
 
   /**
    * UNIMPLEMENTED: It needs to be decided how we will persist in-memory tables to the metastore.
    * For now, if this functionality is desired mix in the in-memory [[OverrideCatalog]].
    */
-  override def unregisterTable(
-      databaseName: Option[String], tableName: String): Unit = ???
+  override def unregisterTable(tableIdentifier: Seq[String]): Unit = ???
 
   override def unregisterAllTables() = {}
 }
 
 /**
- * :: DeveloperApi ::
- * Provides conversions between Spark SQL data types and Hive Metastore types.
+ * A logical plan representing insertion into Hive table.
+ * This plan ignores nullability of ArrayType, MapType, StructType unlike InsertIntoTable
+ * because Hive table doesn't have nullability for ARRAY, MAP, STRUCT types.
  */
-@DeveloperApi
-object HiveMetastoreTypes extends RegexParsers {
-  protected lazy val primitiveType: Parser[DataType] =
-    "string" ^^^ StringType |
-    "float" ^^^ FloatType |
-    "int" ^^^ IntegerType |
-    "tinyint" ^^^ ByteType |
-    "smallint" ^^^ ShortType |
-    "double" ^^^ DoubleType |
-    "bigint" ^^^ LongType |
-    "binary" ^^^ BinaryType |
-    "boolean" ^^^ BooleanType |
-    "decimal" ^^^ DecimalType |
-    "date" ^^^ DateType |
-    "timestamp" ^^^ TimestampType |
-    "varchar\\((\\d+)\\)".r ^^^ StringType
-
-  protected lazy val arrayType: Parser[DataType] =
-    "array" ~> "<" ~> dataType <~ ">" ^^ {
-      case tpe => ArrayType(tpe)
-    }
-
-  protected lazy val mapType: Parser[DataType] =
-    "map" ~> "<" ~> dataType ~ "," ~ dataType <~ ">" ^^ {
-      case t1 ~ _ ~ t2 => MapType(t1, t2)
-    }
-
-  protected lazy val structField: Parser[StructField] =
-    "[a-zA-Z0-9_]*".r ~ ":" ~ dataType ^^ {
-      case name ~ _ ~ tpe => StructField(name, tpe, nullable = true)
-    }
-
-  protected lazy val structType: Parser[DataType] =
-    "struct" ~> "<" ~> repsep(structField,",") <~ ">"  ^^ {
-      case fields => new StructType(fields)
-    }
-
-  protected lazy val dataType: Parser[DataType] =
-    arrayType |
-    mapType |
-    structType |
-    primitiveType
-
-  def toDataType(metastoreType: String): DataType = parseAll(dataType, metastoreType) match {
-    case Success(result, _) => result
-    case failure: NoSuccess => sys.error(s"Unsupported dataType: $metastoreType")
-  }
-
-  def toMetastoreType(dt: DataType): String = dt match {
-    case ArrayType(elementType, _) => s"array<${toMetastoreType(elementType)}>"
-    case StructType(fields) =>
-      s"struct<${fields.map(f => s"${f.name}:${toMetastoreType(f.dataType)}").mkString(",")}>"
-    case MapType(keyType, valueType, _) =>
-      s"map<${toMetastoreType(keyType)},${toMetastoreType(valueType)}>"
-    case StringType => "string"
-    case FloatType => "float"
-    case IntegerType => "int"
-    case ByteType => "tinyint"
-    case ShortType => "smallint"
-    case DoubleType => "double"
-    case LongType => "bigint"
-    case BinaryType => "binary"
-    case BooleanType => "boolean"
-    case DateType => "date"
-    case DecimalType => "decimal"
-    case TimestampType => "timestamp"
-    case NullType => "void"
+private[hive] case class InsertIntoHiveTable(
+    table: LogicalPlan,
+    partition: Map[String, Option[String]],
+    child: LogicalPlan,
+    overwrite: Boolean)
+  extends LogicalPlan {
+
+  override def children = child :: Nil
+  override def output = child.output
+
+  override lazy val resolved = childrenResolved && child.output.zip(table.output).forall {
+    case (childAttr, tableAttr) =>
+      DataType.equalsIgnoreNullability(childAttr.dataType, tableAttr.dataType)
   }
 }
 
@@ -265,6 +519,8 @@ private[hive] case class MetastoreRelation
 
   @transient override lazy val statistics = Statistics(
     sizeInBytes = {
+      val totalSize = hiveQlTable.getParameters.get(HiveShim.getStatsSetupConstTotalSize)
+      val rawDataSize = hiveQlTable.getParameters.get(HiveShim.getStatsSetupConstRawDataSize)
       // TODO: check if this estimate is valid for tables after partition pruning.
       // NOTE: getting `totalSize` directly from params is kind of hacky, but this should be
       // relatively cheap if parameters for the table are populated into the metastore.  An
@@ -272,13 +528,25 @@ private[hive] case class MetastoreRelation
       // of RPCs are involved.  Besides `totalSize`, there are also `numFiles`, `numRows`,
       // `rawDataSize` keys (see StatsSetupConst in Hive) that we can look at in the future.
       BigInt(
-        Option(hiveQlTable.getParameters.get(StatsSetupConst.TOTAL_SIZE))
-          .map(_.toLong)
-          .getOrElse(sqlContext.defaultSizeInBytes))
+        // When table is external,`totalSize` is always zero, which will influence join strategy
+        // so when `totalSize` is zero, use `rawDataSize` instead
+        // if the size is still less than zero, we use default size
+        Option(totalSize).map(_.toLong).filter(_ > 0)
+          .getOrElse(Option(rawDataSize).map(_.toLong).filter(_ > 0)
+          .getOrElse(sqlContext.conf.defaultSizeInBytes)))
     }
   )
 
-  val tableDesc = new TableDesc(
+  /** Only compare database and tablename, not alias. */
+  override def sameResult(plan: LogicalPlan): Boolean = {
+    plan match {
+      case mr: MetastoreRelation =>
+        mr.databaseName == databaseName && mr.tableName == tableName
+      case _ => false
+    }
+  }
+
+  val tableDesc = HiveShim.getTableDesc(
     Class.forName(
       hiveQlTable.getSerializationLib,
       true,
@@ -295,7 +563,7 @@ private[hive] case class MetastoreRelation
   implicit class SchemaAttribute(f: FieldSchema) {
     def toAttribute = AttributeReference(
       f.getName,
-      HiveMetastoreTypes.toDataType(f.getType),
+      sqlContext.ddlParser.parseType(f.getType),
       // Since data can be dumped in randomly with no validation, everything is nullable.
       nullable = true
     )(qualifiers = Seq(alias.getOrElse(tableName)))
@@ -305,7 +573,43 @@ private[hive] case class MetastoreRelation
   val partitionKeys = hiveQlTable.getPartitionKeys.map(_.toAttribute)
 
   /** Non-partitionKey attributes */
-  val attributes = hiveQlTable.getCols.map(_.toAttribute) 
+  val attributes = hiveQlTable.getCols.map(_.toAttribute)
 
   val output = attributes ++ partitionKeys
+
+  /** An attribute map that can be used to lookup original attributes based on expression id. */
+  val attributeMap = AttributeMap(output.map(o => (o,o)))
+
+  /** An attribute map for determining the ordinal for non-partition columns. */
+  val columnOrdinals = AttributeMap(attributes.zipWithIndex)
+}
+
+object HiveMetastoreTypes {
+  protected val ddlParser = new DDLParser
+
+  def toDataType(metastoreType: String): DataType = synchronized {
+    ddlParser.parseType(metastoreType)
+  }
+
+  def toMetastoreType(dt: DataType): String = dt match {
+    case ArrayType(elementType, _) => s"array<${toMetastoreType(elementType)}>"
+    case StructType(fields) =>
+      s"struct<${fields.map(f => s"${f.name}:${toMetastoreType(f.dataType)}").mkString(",")}>"
+    case MapType(keyType, valueType, _) =>
+      s"map<${toMetastoreType(keyType)},${toMetastoreType(valueType)}>"
+    case StringType => "string"
+    case FloatType => "float"
+    case IntegerType => "int"
+    case ByteType => "tinyint"
+    case ShortType => "smallint"
+    case DoubleType => "double"
+    case LongType => "bigint"
+    case BinaryType => "binary"
+    case BooleanType => "boolean"
+    case DateType => "date"
+    case d: DecimalType => HiveShim.decimalMetastoreString(d)
+    case TimestampType => "timestamp"
+    case NullType => "void"
+    case udt: UserDefinedType[_] => toMetastoreType(udt.sqlType)
+  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 2b599157d15d3..62e9d92eac076 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -18,18 +18,24 @@
 package org.apache.spark.sql.hive
 
 import java.sql.Date
+import scala.collection.mutable.ArrayBuffer
 
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.ql.Context
 import org.apache.hadoop.hive.ql.lib.Node
+import org.apache.hadoop.hive.ql.metadata.Table
 import org.apache.hadoop.hive.ql.parse._
 import org.apache.hadoop.hive.ql.plan.PlanUtils
+import org.apache.spark.sql.SparkSQLParser
 
-import org.apache.spark.sql.catalyst.SparkSQLParser
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.execution.ExplainCommand
+import org.apache.spark.sql.hive.execution.{HiveNativeCommand, DropTable, AnalyzeTable, HiveScriptIOSchema}
+import org.apache.spark.sql.types._
 
 /* Implicit conversions */
 import scala.collection.JavaConversions._
@@ -41,13 +47,21 @@ import scala.collection.JavaConversions._
  */
 private[hive] case object NativePlaceholder extends Command
 
-private[hive] case class AddFile(filePath: String) extends Command
-
-private[hive] case class AddJar(path: String) extends Command
-
-private[hive] case class DropTable(tableName: String, ifExists: Boolean) extends Command
-
-private[hive] case class AnalyzeTable(tableName: String) extends Command
+/**
+ * Returned for the "DESCRIBE [EXTENDED] [dbName.]tableName" command.
+ * @param table The table to be described.
+ * @param isExtended True if "DESCRIBE EXTENDED" is used. Otherwise, false.
+ *                   It is effective only when the table is a Hive table.
+ */
+case class DescribeCommand(
+    table: LogicalPlan,
+    isExtended: Boolean) extends Command {
+  override def output = Seq(
+    // Column names are based on Hive.
+    AttributeReference("col_name", StringType, nullable = false)(),
+    AttributeReference("data_type", StringType, nullable = false)(),
+    AttributeReference("comment", StringType, nullable = false)())
+}
 
 /** Provides a mapping from HiveQL statements to catalyst logical plans and expression trees. */
 private[hive] object HiveQl {
@@ -81,6 +95,7 @@ private[hive] object HiveQl {
     "TOK_DROPFUNCTION",
 
     "TOK_ALTERDATABASE_PROPERTIES",
+    "TOK_ALTERDATABASE_OWNER",
     "TOK_ALTERINDEX_PROPERTIES",
     "TOK_ALTERINDEX_REBUILD",
     "TOK_ALTERTABLE_ADDCOLS",
@@ -104,7 +119,6 @@ private[hive] object HiveQl {
     "TOK_DROPINDEX",
     "TOK_MSCK",
 
-    // TODO(marmbrus): Figure out how view are expanded by hive, as we might need to handle this.
     "TOK_ALTERVIEW_ADDPARTS",
     "TOK_ALTERVIEW_AS",
     "TOK_ALTERVIEW_DROPPARTS",
@@ -122,8 +136,8 @@ private[hive] object HiveQl {
 
   // Commands that we do not need to explain.
   protected val noExplainCommands = Seq(
-    "TOK_CREATETABLE",
-    "TOK_DESCTABLE"
+    "TOK_DESCTABLE",
+    "TOK_TRUNCATETABLE"     // truncate table" is a NativeCommand, does not need to explain.
   ) ++ nativeCommands
 
   protected val hqlParser = {
@@ -216,7 +230,18 @@ private[hive] object HiveQl {
   /**
    * Returns the AST for the given SQL string.
    */
-  def getAst(sql: String): ASTNode = ParseUtils.findRootNonNullToken((new ParseDriver).parse(sql))
+  def getAst(sql: String): ASTNode = {
+    /*
+     * Context has to be passed in hive0.13.1.
+     * Otherwise, there will be Null pointer exception,
+     * when retrieving properties form HiveConf.
+     */
+    val hContext = new Context(new HiveConf())
+    val node = ParseUtils.findRootNonNullToken((new ParseDriver).parse(sql, hContext))
+    hContext.clear()
+    node
+  }
+
 
   /** Returns a LogicalPlan for a given HiveQL string. */
   def parseSql(sql: String): LogicalPlan = hqlParser(sql)
@@ -226,10 +251,10 @@ private[hive] object HiveQl {
     try {
       val tree = getAst(sql)
       if (nativeCommands contains tree.getText) {
-        NativeCommand(sql)
+        HiveNativeCommand(sql)
       } else {
         nodeToPlan(tree) match {
-          case NativePlaceholder => NativeCommand(sql)
+          case NativePlaceholder => HiveNativeCommand(sql)
           case other => other
         }
       }
@@ -239,10 +264,20 @@ private[hive] object HiveQl {
         s"""
           |Unsupported language features in query: $sql
           |${dumpTree(getAst(sql))}
+          |$e
+          |${e.getStackTrace.head}
         """.stripMargin)
     }
   }
 
+  /** Creates LogicalPlan for a given VIEW */
+  def createPlanForView(view: Table, alias: Option[String]) = alias match {
+    // because hive use things like `_c0` to build the expanded text
+    // currently we cannot support view from "create view v1(c1) as ..."
+    case None => Subquery(view.getTableName, createPlan(view.getViewExpandedText))
+    case Some(aliasText) => Subquery(aliasText, createPlan(view.getViewExpandedText))
+  }
+
   def parseDdl(ddl: String): Seq[Attribute] = {
     val tree =
       try {
@@ -310,13 +345,18 @@ private[hive] object HiveQl {
   }
 
   protected def nodeToDataType(node: Node): DataType = node match {
-    case Token("TOK_DECIMAL", Nil) => DecimalType
+    case Token("TOK_DECIMAL", precision :: scale :: Nil) =>
+      DecimalType(precision.getText.toInt, scale.getText.toInt)
+    case Token("TOK_DECIMAL", precision :: Nil) =>
+      DecimalType(precision.getText.toInt, 0)
+    case Token("TOK_DECIMAL", Nil) => DecimalType.Unlimited
     case Token("TOK_BIGINT", Nil) => LongType
     case Token("TOK_INT", Nil) => IntegerType
     case Token("TOK_TINYINT", Nil) => ByteType
     case Token("TOK_SMALLINT", Nil) => ShortType
     case Token("TOK_BOOLEAN", Nil) => BooleanType
     case Token("TOK_STRING", Nil) => StringType
+    case Token("TOK_VARCHAR", Token(_, Nil) :: Nil) => StringType
     case Token("TOK_FLOAT", Nil) => FloatType
     case Token("TOK_DOUBLE", Nil) => DoubleType
     case Token("TOK_DATE", Nil) => DateType
@@ -351,7 +391,7 @@ private[hive] object HiveQl {
   protected def nameExpressions(exprs: Seq[Expression]): Seq[NamedExpression] = {
     exprs.zipWithIndex.map {
       case (ne: NamedExpression, _) => ne
-      case (e, i) => Alias(e, s"c_$i")()
+      case (e, i) => Alias(e, s"_c$i")()
     }
   }
 
@@ -365,6 +405,51 @@ private[hive] object HiveQl {
     (db, tableName)
   }
 
+  protected def extractTableIdent(tableNameParts: Node): Seq[String] = {
+    tableNameParts.getChildren.map { case Token(part, Nil) => cleanIdentifier(part) } match {
+      case Seq(tableOnly) => Seq(tableOnly)
+      case Seq(databaseName, table) => Seq(databaseName, table)
+      case other => sys.error("Hive only supports tables names like 'tableName' " +
+        s"or 'databaseName.tableName', found '$other'")
+    }
+  }
+
+  /**
+   * SELECT MAX(value) FROM src GROUP BY k1, k2, k3 GROUPING SETS((k1, k2), (k2)) 
+   * is equivalent to 
+   * SELECT MAX(value) FROM src GROUP BY k1, k2 UNION SELECT MAX(value) FROM src GROUP BY k2
+   * Check the following link for details.
+   * 
+https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C+Grouping+and+Rollup
+   *
+   * The bitmask denotes the grouping expressions validity for a grouping set,
+   * the bitmask also be called as grouping id (`GROUPING__ID`, the virtual column in Hive)
+   * e.g. In superset (k1, k2, k3), (bit 0: k1, bit 1: k2, and bit 2: k3), the grouping id of 
+   * GROUPING SETS (k1, k2) and (k2) should be 3 and 2 respectively.
+   */
+  protected def extractGroupingSet(children: Seq[ASTNode]): (Seq[Expression], Seq[Int]) = {
+    val (keyASTs, setASTs) = children.partition( n => n match {
+        case Token("TOK_GROUPING_SETS_EXPRESSION", children) => false // grouping sets
+        case _ => true // grouping keys
+      })
+
+    val keys = keyASTs.map(nodeToExpr).toSeq
+    val keyMap = keyASTs.map(_.toStringTree).zipWithIndex.toMap
+
+    val bitmasks: Seq[Int] = setASTs.map(set => set match {
+      case Token("TOK_GROUPING_SETS_EXPRESSION", null) => 0
+      case Token("TOK_GROUPING_SETS_EXPRESSION", children) => 
+        children.foldLeft(0)((bitmap, col) => {
+          val colString = col.asInstanceOf[ASTNode].toStringTree()
+          require(keyMap.contains(colString), s"$colString doens't show up in the GROUP BY list")
+          bitmap | 1 << keyMap(colString)
+        })
+      case _ => sys.error("Expect GROUPING SETS clause")
+    })
+
+    (keys, bitmasks)
+  }
+
   protected def nodeToPlan(node: Node): LogicalPlan = node match {
     // Special drop table that also uncaches.
     case Token("TOK_DROPTABLE",
@@ -391,12 +476,23 @@ private[hive] object HiveQl {
     // Just fake explain for any of the native commands.
     case Token("TOK_EXPLAIN", explainArgs)
       if noExplainCommands.contains(explainArgs.head.getText) =>
-      ExplainCommand(NoRelation)
+      ExplainCommand(NoRelation, Seq(AttributeReference("plan", StringType, nullable = false)()))
+    case Token("TOK_EXPLAIN", explainArgs)
+      if "TOK_CREATETABLE" == explainArgs.head.getText =>
+      val Some(crtTbl) :: _ :: extended :: Nil =
+        getClauses(Seq("TOK_CREATETABLE", "FORMATTED", "EXTENDED"), explainArgs)
+      ExplainCommand(
+        nodeToPlan(crtTbl),
+        Seq(AttributeReference("plan", StringType,nullable = false)()),
+        extended != None)
     case Token("TOK_EXPLAIN", explainArgs) =>
       // Ignore FORMATTED if present.
       val Some(query) :: _ :: extended :: Nil =
         getClauses(Seq("TOK_QUERY", "FORMATTED", "EXTENDED"), explainArgs)
-      ExplainCommand(nodeToPlan(query), extended != None)
+      ExplainCommand(
+        nodeToPlan(query),
+        Seq(AttributeReference("plan", StringType, nullable = false)()),
+        extended != None)
 
     case Token("TOK_DESCTABLE", describeArgs) =>
       // Reference: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
@@ -413,16 +509,16 @@ private[hive] object HiveQl {
               case Token(".", dbName :: tableName :: Nil) =>
                 // It is describing a table with the format like "describe db.table".
                 // TODO: Actually, a user may mean tableName.columnName. Need to resolve this issue.
-                val (db, tableName) = extractDbNameTableName(nameParts.head)
+                val tableIdent = extractTableIdent(nameParts.head)
                 DescribeCommand(
-                  UnresolvedRelation(db, tableName, None), extended.isDefined)
+                  UnresolvedRelation(tableIdent, None), extended.isDefined)
               case Token(".", dbName :: tableName :: colName :: Nil) =>
                 // It is describing a column with the format like "describe db.table column".
                 NativePlaceholder
               case tableName =>
                 // It is describing a table with the format like "describe table".
                 DescribeCommand(
-                  UnresolvedRelation(None, tableName.getText, None),
+                  UnresolvedRelation(Seq(tableName.getText), None),
                   extended.isDefined)
             }
           }
@@ -432,14 +528,14 @@ private[hive] object HiveQl {
       }
 
     case Token("TOK_CREATETABLE", children)
-        if children.collect { case t@Token("TOK_QUERY", _) => t }.nonEmpty =>
-      // TODO: Parse other clauses.
+        if children.collect { case t @ Token("TOK_QUERY", _) => t }.nonEmpty =>
       // Reference: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
       val (
           Some(tableNameParts) ::
           _ /* likeTable */ ::
-          Some(query) +:
-          notImplemented) =
+          Some(query) ::
+          allowExisting +:
+          ignores) =
         getClauses(
           Seq(
             "TOK_TABNAME",
@@ -458,23 +554,23 @@ private[hive] object HiveQl {
             "TOK_TBLTEXTFILE", // Stored as TextFile
             "TOK_TBLRCFILE", // Stored as RCFile
             "TOK_TBLORCFILE", // Stored as ORC File
+            "TOK_TBLPARQUETFILE", // Stored as PARQUET
             "TOK_TABLEFILEFORMAT", // User-provided InputFormat and OutputFormat
             "TOK_STORAGEHANDLER", // Storage handler
             "TOK_TABLELOCATION",
             "TOK_TABLEPROPERTIES"),
           children)
-      if (notImplemented.exists(token => !token.isEmpty)) {
-        throw new NotImplementedError(
-          s"Unhandled clauses: ${notImplemented.flatten.map(dumpTree(_)).mkString("\n")}")
-      }
-
       val (db, tableName) = extractDbNameTableName(tableNameParts)
 
-      CreateTableAsSelect(db, tableName, nodeToPlan(query))
+      CreateTableAsSelect(db, tableName, nodeToPlan(query), allowExisting != None, Some(node))
 
     // If its not a "CREATE TABLE AS" like above then just pass it back to hive as a native command.
     case Token("TOK_CREATETABLE", _) => NativePlaceholder
 
+    // Support "TRUNCATE TABLE table_name [PARTITION partition_spec]"
+    case Token("TOK_TRUNCATETABLE",
+          Token("TOK_TABLE_PARTITION",table)::Nil) =>  NativePlaceholder
+
     case Token("TOK_QUERY",
            Token("TOK_FROM", fromClause :: Nil) ::
            insertClauses) =>
@@ -488,6 +584,9 @@ private[hive] object HiveQl {
             selectDistinctClause ::
             whereClause ::
             groupByClause ::
+            rollupGroupByClause ::
+            cubeGroupByClause ::
+            groupingSetsClause ::
             orderByClause ::
             havingClause ::
             sortByClause ::
@@ -503,6 +602,9 @@ private[hive] object HiveQl {
               "TOK_SELECTDI",
               "TOK_WHERE",
               "TOK_GROUPBY",
+              "TOK_ROLLUP_GROUPBY",
+              "TOK_CUBE_GROUPBY",
+              "TOK_GROUPING_SETS",
               "TOK_ORDERBY",
               "TOK_HAVING",
               "TOK_SORTBY",
@@ -528,29 +630,64 @@ private[hive] object HiveQl {
           case Token("TOK_SELEXPR",
                  Token("TOK_TRANSFORM",
                    Token("TOK_EXPLIST", inputExprs) ::
-                   Token("TOK_SERDE", Nil) ::
+                   Token("TOK_SERDE", inputSerdeClause) ::
                    Token("TOK_RECORDWRITER", writerClause) ::
                    // TODO: Need to support other types of (in/out)put
                    Token(script, Nil) ::
-                   Token("TOK_SERDE", serdeClause) ::
+                   Token("TOK_SERDE", outputSerdeClause) ::
                    Token("TOK_RECORDREADER", readerClause) ::
-                   outputClause :: Nil) :: Nil) =>
-
-            val output = outputClause match {
-              case Token("TOK_ALIASLIST", aliases) =>
-                aliases.map { case Token(name, Nil) => AttributeReference(name, StringType)() }
-              case Token("TOK_TABCOLLIST", attributes) =>
-                attributes.map { case Token("TOK_TABCOL", Token(name, Nil) :: dataType :: Nil) =>
-                  AttributeReference(name, nodeToDataType(dataType))() }
+                   outputClause) :: Nil) =>
+
+            val (output, schemaLess) = outputClause match {
+              case Token("TOK_ALIASLIST", aliases) :: Nil =>
+                (aliases.map { case Token(name, Nil) => AttributeReference(name, StringType)() },
+                  false)
+              case Token("TOK_TABCOLLIST", attributes) :: Nil =>
+                (attributes.map { case Token("TOK_TABCOL", Token(name, Nil) :: dataType :: Nil) =>
+                  AttributeReference(name, nodeToDataType(dataType))() }, false)
+              case Nil =>
+                (List(AttributeReference("key", StringType)(),
+                  AttributeReference("value", StringType)()), true)
             }
+
+            def matchSerDe(clause: Seq[ASTNode]) = clause match {
+              case Token("TOK_SERDEPROPS", propsClause) :: Nil =>
+                val rowFormat = propsClause.map {
+                  case Token(name, Token(value, Nil) :: Nil) => (name, value)
+                }
+                (rowFormat, "", Nil)
+
+              case Token("TOK_SERDENAME", Token(serdeClass, Nil) :: Nil) :: Nil =>
+                (Nil, serdeClass, Nil)
+
+              case Token("TOK_SERDENAME", Token(serdeClass, Nil) ::
+                Token("TOK_TABLEPROPERTIES",
+                Token("TOK_TABLEPROPLIST", propsClause) :: Nil) :: Nil) :: Nil =>
+                val serdeProps = propsClause.map {
+                  case Token("TOK_TABLEPROPERTY", Token(name, Nil) :: Token(value, Nil) :: Nil) =>
+                    (name, value)
+                } 
+                (Nil, serdeClass, serdeProps)
+
+              case Nil => (Nil, "", Nil)
+            }
+
+            val (inRowFormat, inSerdeClass, inSerdeProps) = matchSerDe(inputSerdeClause)
+            val (outRowFormat, outSerdeClass, outSerdeProps) = matchSerDe(outputSerdeClause)
+
             val unescapedScript = BaseSemanticAnalyzer.unescapeSQLString(script)
 
+            val schema = HiveScriptIOSchema(
+              inRowFormat, outRowFormat,
+              inSerdeClass, outSerdeClass,
+              inSerdeProps, outSerdeProps, schemaLess)
+
             Some(
               logical.ScriptTransformation(
                 inputExprs.map(nodeToExpr),
                 unescapedScript,
                 output,
-                withWhere))
+                withWhere, schema))
           case _ => None
         }
 
@@ -571,16 +708,33 @@ private[hive] object HiveQl {
 
         // The projection of the query can either be a normal projection, an aggregation
         // (if there is a group by) or a script transformation.
-        val withProject = transformation.getOrElse {
-          // Not a transformation so must be either project or aggregation.
-          val selectExpressions = nameExpressions(select.getChildren.flatMap(selExprNodeToExpr))
-
-          groupByClause match {
-            case Some(groupBy) =>
-              Aggregate(groupBy.getChildren.map(nodeToExpr), selectExpressions, withLateralView)
-            case None =>
-              Project(selectExpressions, withLateralView)
-          }
+        val withProject: LogicalPlan = transformation.getOrElse {
+          val selectExpressions = 
+            nameExpressions(select.getChildren.flatMap(selExprNodeToExpr).toSeq)
+          Seq(
+            groupByClause.map(e => e match {
+              case Token("TOK_GROUPBY", children) =>
+                // Not a transformation so must be either project or aggregation.
+                Aggregate(children.map(nodeToExpr), selectExpressions, withLateralView)
+              case _ => sys.error("Expect GROUP BY")
+            }),
+            groupingSetsClause.map(e => e match {
+              case Token("TOK_GROUPING_SETS", children) =>
+                val(groupByExprs, masks) = extractGroupingSet(children)
+                GroupingSets(masks, groupByExprs, withLateralView, selectExpressions)
+              case _ => sys.error("Expect GROUPING SETS")
+            }),
+            rollupGroupByClause.map(e => e match {
+              case Token("TOK_ROLLUP_GROUPBY", children) =>
+                Rollup(children.map(nodeToExpr), withLateralView, selectExpressions)
+              case _ => sys.error("Expect WITH ROLLUP")
+            }),
+            cubeGroupByClause.map(e => e match {
+              case Token("TOK_CUBE_GROUPBY", children) =>
+                Cube(children.map(nodeToExpr), withLateralView, selectExpressions)
+              case _ => sys.error("Expect WITH CUBE")
+            }), 
+            Some(Project(selectExpressions, withLateralView))).flatten.head
         }
 
         val withDistinct =
@@ -596,16 +750,16 @@ private[hive] object HiveQl {
         val withSort =
           (orderByClause, sortByClause, distributeByClause, clusterByClause) match {
             case (Some(totalOrdering), None, None, None) =>
-              Sort(totalOrdering.getChildren.map(nodeToSortOrder), withHaving)
+              Sort(totalOrdering.getChildren.map(nodeToSortOrder), true, withHaving)
             case (None, Some(perPartitionOrdering), None, None) =>
-              SortPartitions(perPartitionOrdering.getChildren.map(nodeToSortOrder), withHaving)
+              Sort(perPartitionOrdering.getChildren.map(nodeToSortOrder), false, withHaving)
             case (None, None, Some(partitionExprs), None) =>
               Repartition(partitionExprs.getChildren.map(nodeToExpr), withHaving)
             case (None, Some(perPartitionOrdering), Some(partitionExprs), None) =>
-              SortPartitions(perPartitionOrdering.getChildren.map(nodeToSortOrder),
+              Sort(perPartitionOrdering.getChildren.map(nodeToSortOrder), false,
                 Repartition(partitionExprs.getChildren.map(nodeToExpr), withHaving))
             case (None, None, None, Some(clusterExprs)) =>
-              SortPartitions(clusterExprs.getChildren.map(nodeToExpr).map(SortOrder(_, Ascending)),
+              Sort(clusterExprs.getChildren.map(nodeToExpr).map(SortOrder(_, Ascending)), false,
                 Repartition(clusterExprs.getChildren.map(nodeToExpr), withHaving))
             case (None, None, None, None) => withHaving
             case _ => sys.error("Unsupported set of ordering / distribution clauses.")
@@ -673,13 +827,15 @@ private[hive] object HiveQl {
           nonAliasClauses)
       }
 
-      val (db, tableName) =
+      val tableIdent =
         tableNameParts.getChildren.map{ case Token(part, Nil) => cleanIdentifier(part)} match {
-          case Seq(tableOnly) => (None, tableOnly)
-          case Seq(databaseName, table) => (Some(databaseName), table)
+          case Seq(tableOnly) => Seq(tableOnly)
+          case Seq(databaseName, table) => Seq(databaseName, table)
+          case other => sys.error("Hive only supports tables names like 'tableName' " +
+            s"or 'databaseName.tableName', found '$other'")
       }
       val alias = aliasClause.map { case Token(a, Nil) => cleanIdentifier(a) }
-      val relation = UnresolvedRelation(db, tableName, alias)
+      val relation = UnresolvedRelation(tableIdent, alias)
 
       // Apply sampling if requested.
       (bucketSampleClause orElse splitSampleClause).map {
@@ -798,7 +954,7 @@ private[hive] object HiveQl {
       val Some(tableNameParts) :: partitionClause :: Nil =
         getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs)
 
-      val (db, tableName) = extractDbNameTableName(tableNameParts)
+      val tableIdent = extractTableIdent(tableNameParts)
 
       val partitionKeys = partitionClause.map(_.getChildren.map {
         // Parse partitions. We also make keys case insensitive.
@@ -808,21 +964,28 @@ private[hive] object HiveQl {
           cleanIdentifier(key.toLowerCase) -> None
       }.toMap).getOrElse(Map.empty)
 
-      InsertIntoTable(UnresolvedRelation(db, tableName, None), partitionKeys, query, overwrite)
+      InsertIntoTable(UnresolvedRelation(tableIdent, None), partitionKeys, query, overwrite)
 
     case a: ASTNode =>
       throw new NotImplementedError(s"No parse rules for:\n ${dumpTree(a).toString} ")
   }
 
   protected def selExprNodeToExpr(node: Node): Option[Expression] = node match {
-    case Token("TOK_SELEXPR",
-           e :: Nil) =>
+    case Token("TOK_SELEXPR", e :: Nil) =>
       Some(nodeToExpr(e))
 
-    case Token("TOK_SELEXPR",
-           e :: Token(alias, Nil) :: Nil) =>
+    case Token("TOK_SELEXPR", e :: Token(alias, Nil) :: Nil) =>
       Some(Alias(nodeToExpr(e), cleanIdentifier(alias))())
 
+    case Token("TOK_SELEXPR", e :: aliasChildren) =>
+      var aliasNames = ArrayBuffer[String]()
+      aliasChildren.foreach { _ match {
+        case Token(name, Nil) => aliasNames += cleanIdentifier(name)
+        case _ =>
+        }
+      }
+      Some(MultiAlias(nodeToExpr(e), aliasNames))
+
     /* Hints are ignored */
     case Token("TOK_HINTLIST", _) => None
 
@@ -842,9 +1005,12 @@ private[hive] object HiveQl {
     HiveParser.Number,
     HiveParser.TinyintLiteral,
     HiveParser.SmallintLiteral,
-    HiveParser.BigintLiteral)
+    HiveParser.BigintLiteral,
+    HiveParser.DecimalLiteral)
 
   /* Case insensitive matches */
+  val ARRAY = "(?i)ARRAY".r
+  val COALESCE = "(?i)COALESCE".r
   val COUNT = "(?i)COUNT".r
   val AVG = "(?i)AVG".r
   val SUM = "(?i)SUM".r
@@ -882,11 +1048,11 @@ private[hive] object HiveQl {
       }
 
     /* Stars (*) */
-    case Token("TOK_ALLCOLREF", Nil) => Star(None)
+    case Token("TOK_ALLCOLREF", Nil) => UnresolvedStar(None)
     // The format of dbName.tableName.* cannot be parsed by HiveParser. TOK_TABNAME will only
     // has a single child which is tableName.
     case Token("TOK_ALLCOLREF", Token("TOK_TABNAME", Token(name, Nil) :: Nil) :: Nil) =>
-      Star(Some(name))
+      UnresolvedStar(Some(name))
 
     /* Aggregate Functions */
     case Token("TOK_FUNCTION", Token(AVG(), Nil) :: arg :: Nil) => Average(nodeToExpr(arg))
@@ -905,7 +1071,9 @@ private[hive] object HiveQl {
     /* Casts */
     case Token("TOK_FUNCTION", Token("TOK_STRING", Nil) :: arg :: Nil) =>
       Cast(nodeToExpr(arg), StringType)
-    case Token("TOK_FUNCTION", Token("TOK_VARCHAR", Nil) :: arg :: Nil) =>
+    case Token("TOK_FUNCTION", Token("TOK_VARCHAR", _) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), StringType)
+    case Token("TOK_FUNCTION", Token("TOK_CHAR", _) :: arg :: Nil) =>
       Cast(nodeToExpr(arg), StringType)
     case Token("TOK_FUNCTION", Token("TOK_INT", Nil) :: arg :: Nil) =>
       Cast(nodeToExpr(arg), IntegerType)
@@ -923,15 +1091,21 @@ private[hive] object HiveQl {
       Cast(nodeToExpr(arg), BinaryType)
     case Token("TOK_FUNCTION", Token("TOK_BOOLEAN", Nil) :: arg :: Nil) =>
       Cast(nodeToExpr(arg), BooleanType)
+    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: scale :: nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), DecimalType(precision.getText.toInt, scale.getText.toInt))
+    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: Nil) :: arg :: Nil) =>
+      Cast(nodeToExpr(arg), DecimalType(precision.getText.toInt, 0))
     case Token("TOK_FUNCTION", Token("TOK_DECIMAL", Nil) :: arg :: Nil) =>
-      Cast(nodeToExpr(arg), DecimalType)
+      Cast(nodeToExpr(arg), DecimalType.Unlimited)
     case Token("TOK_FUNCTION", Token("TOK_TIMESTAMP", Nil) :: arg :: Nil) =>
       Cast(nodeToExpr(arg), TimestampType)
     case Token("TOK_FUNCTION", Token("TOK_DATE", Nil) :: arg :: Nil) =>
       Cast(nodeToExpr(arg), DateType)
 
     /* Arithmetic */
+    case Token("+", child :: Nil) => Add(Literal(0), nodeToExpr(child))
     case Token("-", child :: Nil) => UnaryMinus(nodeToExpr(child))
+    case Token("~", child :: Nil) => BitwiseNot(nodeToExpr(child))
     case Token("+", left :: right:: Nil) => Add(nodeToExpr(left), nodeToExpr(right))
     case Token("-", left :: right:: Nil) => Subtract(nodeToExpr(left), nodeToExpr(right))
     case Token("*", left :: right:: Nil) => Multiply(nodeToExpr(left), nodeToExpr(right))
@@ -939,6 +1113,9 @@ private[hive] object HiveQl {
     case Token(DIV(), left :: right:: Nil) =>
       Cast(Divide(nodeToExpr(left), nodeToExpr(right)), LongType)
     case Token("%", left :: right:: Nil) => Remainder(nodeToExpr(left), nodeToExpr(right))
+    case Token("&", left :: right:: Nil) => BitwiseAnd(nodeToExpr(left), nodeToExpr(right))
+    case Token("|", left :: right:: Nil) => BitwiseOr(nodeToExpr(left), nodeToExpr(right))
+    case Token("^", left :: right:: Nil) => BitwiseXor(nodeToExpr(left), nodeToExpr(right))
     case Token("TOK_FUNCTION", Token(SQRT(), Nil) :: arg :: Nil) => Sqrt(nodeToExpr(arg))
 
     /* Comparisons */
@@ -962,20 +1139,26 @@ private[hive] object HiveQl {
       In(nodeToExpr(value), list.map(nodeToExpr))
     case Token("TOK_FUNCTION",
            Token(BETWEEN(), Nil) ::
-           Token("KW_FALSE", Nil) ::
+           kw ::
            target ::
            minValue ::
            maxValue :: Nil) =>
 
       val targetExpression = nodeToExpr(target)
-      And(
-        GreaterThanOrEqual(targetExpression, nodeToExpr(minValue)),
-        LessThanOrEqual(targetExpression, nodeToExpr(maxValue)))
+      val betweenExpr =
+        And(
+          GreaterThanOrEqual(targetExpression, nodeToExpr(minValue)),
+          LessThanOrEqual(targetExpression, nodeToExpr(maxValue)))
+      kw match {
+        case Token("KW_FALSE", Nil) => betweenExpr
+        case Token("KW_TRUE", Nil) => Not(betweenExpr)
+      }
 
     /* Boolean Logic */
     case Token(AND(), left :: right:: Nil) => And(nodeToExpr(left), nodeToExpr(right))
     case Token(OR(), left :: right:: Nil) => Or(nodeToExpr(left), nodeToExpr(right))
     case Token(NOT(), child :: Nil) => Not(nodeToExpr(child))
+    case Token("!", child :: Nil) => Not(nodeToExpr(child))
 
     /* Case statements */
     case Token("TOK_FUNCTION", Token(WHEN(), Nil) :: branches) =>
@@ -997,17 +1180,20 @@ private[hive] object HiveQl {
       GetItem(nodeToExpr(child), nodeToExpr(ordinal))
 
     /* Other functions */
+    case Token("TOK_FUNCTION", Token(ARRAY(), Nil) :: children) =>
+      CreateArray(children.map(nodeToExpr))
     case Token("TOK_FUNCTION", Token(RAND(), Nil) :: Nil) => Rand
     case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: Nil) =>
       Substring(nodeToExpr(string), nodeToExpr(pos), Literal(Integer.MAX_VALUE, IntegerType))
     case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: length :: Nil) =>
       Substring(nodeToExpr(string), nodeToExpr(pos), nodeToExpr(length))
+    case Token("TOK_FUNCTION", Token(COALESCE(), Nil) :: list) => Coalesce(list.map(nodeToExpr))
 
     /* UDFs - Must be last otherwise will preempt built in functions */
     case Token("TOK_FUNCTION", Token(name, Nil) :: args) =>
       UnresolvedFunction(name, args.map(nodeToExpr))
     case Token("TOK_FUNCTIONSTAR", Token(name, Nil) :: args) =>
-      UnresolvedFunction(name, Star(None) :: Nil)
+      UnresolvedFunction(name, UnresolvedStar(None) :: Nil)
 
     /* Literals */
     case Token("TOK_NULL", Nil) => Literal(null, NullType)
@@ -1030,10 +1216,10 @@ private[hive] object HiveQl {
         } else if (ast.getText.endsWith("Y")) {
           // Literal tinyint.
           v = Literal(ast.getText.substring(0, ast.getText.length() - 1).toByte, ByteType)
-        } else if (ast.getText.endsWith("BD")) {
+        } else if (ast.getText.endsWith("BD") || ast.getText.endsWith("D")) {
           // Literal decimal
-          val strVal = ast.getText.substring(0, ast.getText.length() - 2)
-          BigDecimal(strVal)
+          val strVal = ast.getText.stripSuffix("D").stripSuffix("B")
+          v = Literal(Decimal(strVal))
         } else {
           v = Literal(ast.getText.toDouble, DoubleType)
           v = Literal(ast.getText.toLong, LongType)
@@ -1044,7 +1230,7 @@ private[hive] object HiveQl {
       }
 
       if (v == null) {
-        sys.error(s"Failed to parse number ${ast.getText}")
+        sys.error(s"Failed to parse number '${ast.getText}'.")
       } else {
         v
       }
@@ -1077,7 +1263,10 @@ private[hive] object HiveQl {
         Explode(attributes, nodeToExpr(child))
 
       case Token("TOK_FUNCTION", Token(functionName, Nil) :: children) =>
-        HiveGenericUdtf(functionName, attributes, children.map(nodeToExpr))
+        HiveGenericUdtf(
+          new HiveFunctionWrapper(functionName),
+          attributes,
+          children.map(nodeToExpr))
 
       case a: ASTNode =>
         throw new NotImplementedError(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 5c66322f1ed99..d89111094b9ff 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -17,21 +17,25 @@
 
 package org.apache.spark.sql.hive
 
+import org.apache.spark.sql.catalyst.expressions.Row
+
+import scala.collection.JavaConversions._
+
 import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate
 import org.apache.spark.sql.catalyst.planning._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.types.StringType
-import org.apache.spark.sql.execution.{DescribeCommand, OutputFaker, SparkPlan}
-import org.apache.spark.sql.hive
+import org.apache.spark.sql.execution.{DescribeCommand => RunnableDescribeCommand}
+import org.apache.spark.sql.execution._
 import org.apache.spark.sql.hive.execution._
 import org.apache.spark.sql.parquet.ParquetRelation
-import org.apache.spark.sql.{SQLContext, SchemaRDD}
+import org.apache.spark.sql.sources.{CreateTableUsingAsLogicalPlan, CreateTableUsingAsSelect, CreateTableUsing}
+import org.apache.spark.sql.types.StringType
 
-import scala.collection.JavaConversions._
 
 private[hive] trait HiveStrategies {
   // Possibly being too clever with types here... or not clever enough.
@@ -52,16 +56,21 @@ private[hive] trait HiveStrategies {
    */
   @Experimental
   object ParquetConversion extends Strategy {
-    implicit class LogicalPlanHacks(s: SchemaRDD) {
-      def lowerCase =
-        new SchemaRDD(s.sqlContext, s.logicalPlan)
-
-      def addPartitioningAttributes(attrs: Seq[Attribute]) =
-        new SchemaRDD(
-          s.sqlContext,
-          s.logicalPlan transform {
-            case p: ParquetRelation => p.copy(partitioningAttributes = attrs)
-          })
+    implicit class LogicalPlanHacks(s: DataFrame) {
+      def lowerCase = DataFrame(s.sqlContext, s.logicalPlan)
+
+      def addPartitioningAttributes(attrs: Seq[Attribute]) = {
+        // Don't add the partitioning key if its already present in the data.
+        if (attrs.map(_.name).toSet.subsetOf(s.logicalPlan.output.map(_.name).toSet)) {
+          s
+        } else {
+          DataFrame(
+            s.sqlContext,
+            s.logicalPlan transform {
+              case p: ParquetRelation => p.copy(partitioningAttributes = attrs)
+            })
+        }
+      }
     }
 
     implicit class PhysicalPlanHacks(originalPlan: SparkPlan) {
@@ -88,61 +97,69 @@ private[hive] trait HiveStrategies {
         // We are going to throw the predicates and projection back at the whole optimization
         // sequence so lets unresolve all the attributes, allowing them to be rebound to the
         // matching parquet attributes.
-        val unresolvedOtherPredicates = otherPredicates.map(_ transform {
+        val unresolvedOtherPredicates = Column(otherPredicates.map(_ transform {
           case a: AttributeReference => UnresolvedAttribute(a.name)
-        }).reduceOption(And).getOrElse(Literal(true))
+        }).reduceOption(And).getOrElse(Literal(true)))
 
-        val unresolvedProjection = projectList.map(_ transform {
+        val unresolvedProjection: Seq[Column] = projectList.map(_ transform {
           case a: AttributeReference => UnresolvedAttribute(a.name)
-        })
-
-        if (relation.hiveQlTable.isPartitioned) {
-          val rawPredicate = pruningPredicates.reduceOption(And).getOrElse(Literal(true))
-          // Translate the predicate so that it automatically casts the input values to the correct
-          // data types during evaluation
-          val castedPredicate = rawPredicate transform {
-            case a: AttributeReference =>
-              val idx = relation.partitionKeys.indexWhere(a.exprId == _.exprId)
-              val key = relation.partitionKeys(idx)
-              Cast(BoundReference(idx, StringType, nullable = true), key.dataType)
-          }
-
-          val inputData = new GenericMutableRow(relation.partitionKeys.size)
-          val pruningCondition =
-            if(codegenEnabled) {
-              GeneratePredicate(castedPredicate)
-            } else {
-              InterpretedPredicate(castedPredicate)
+        }).map(Column(_))
+
+        try {
+          if (relation.hiveQlTable.isPartitioned) {
+            val rawPredicate = pruningPredicates.reduceOption(And).getOrElse(Literal(true))
+            // Translate the predicate so that it automatically casts the input values to the
+            // correct data types during evaluation.
+            val castedPredicate = rawPredicate transform {
+              case a: AttributeReference =>
+                val idx = relation.partitionKeys.indexWhere(a.exprId == _.exprId)
+                val key = relation.partitionKeys(idx)
+                Cast(BoundReference(idx, StringType, nullable = true), key.dataType)
             }
 
-          val partitions = relation.hiveQlPartitions.filter { part =>
-            val partitionValues = part.getValues
-            var i = 0
-            while (i < partitionValues.size()) {
-              inputData(i) = partitionValues(i)
-              i += 1
+            val inputData = new GenericMutableRow(relation.partitionKeys.size)
+            val pruningCondition =
+              if (codegenEnabled) {
+                GeneratePredicate(castedPredicate)
+              } else {
+                InterpretedPredicate(castedPredicate)
+              }
+
+            val partitions = relation.hiveQlPartitions.filter { part =>
+              val partitionValues = part.getValues
+              var i = 0
+              while (i < partitionValues.size()) {
+                inputData(i) = partitionValues(i)
+                i += 1
+              }
+              pruningCondition(inputData)
             }
-            pruningCondition(inputData)
-          }
 
-          hiveContext
-            .parquetFile(partitions.map(_.getLocation).mkString(","))
-            .addPartitioningAttributes(relation.partitionKeys)
-            .lowerCase
-            .where(unresolvedOtherPredicates)
-            .select(unresolvedProjection:_*)
-            .queryExecution
-            .executedPlan
-            .fakeOutput(projectList.map(_.toAttribute)):: Nil
-        } else {
-          hiveContext
-            .parquetFile(relation.hiveQlTable.getDataLocation.toString)
-            .lowerCase
-            .where(unresolvedOtherPredicates)
-            .select(unresolvedProjection:_*)
-            .queryExecution
-            .executedPlan
-            .fakeOutput(projectList.map(_.toAttribute)) :: Nil
+            hiveContext
+              .parquetFile(partitions.map(_.getLocation).mkString(","))
+              .addPartitioningAttributes(relation.partitionKeys)
+              .lowerCase
+              .where(unresolvedOtherPredicates)
+              .select(unresolvedProjection: _*)
+              .queryExecution
+              .executedPlan
+              .fakeOutput(projectList.map(_.toAttribute)) :: Nil
+          } else {
+            hiveContext
+              .parquetFile(relation.hiveQlTable.getDataLocation.toString)
+              .lowerCase
+              .where(unresolvedOtherPredicates)
+              .select(unresolvedProjection: _*)
+              .queryExecution
+              .executedPlan
+              .fakeOutput(projectList.map(_.toAttribute)) :: Nil
+          }
+        } catch {
+          // parquetFile will throw an exception when there is no data.
+          // TODO: Remove this hack for Spark 1.3.
+          case iae: java.lang.IllegalArgumentException
+              if iae.getMessage.contains("Can not create a Path from an empty string") =>
+            PhysicalRDD(plan.output, sparkContext.emptyRDD[Row]) :: Nil
         }
       case _ => Nil
     }
@@ -150,8 +167,8 @@ private[hive] trait HiveStrategies {
 
   object Scripts extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case logical.ScriptTransformation(input, script, output, child) =>
-        ScriptTransformation(input, script, output, planLater(child))(hiveContext) :: Nil
+      case logical.ScriptTransformation(input, script, output, child, schema: HiveScriptIOSchema) =>
+        ScriptTransformation(input, script, output, planLater(child), schema)(hiveContext) :: Nil
       case _ => Nil
     }
   }
@@ -159,18 +176,11 @@ private[hive] trait HiveStrategies {
   object DataSinks extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case logical.InsertIntoTable(table: MetastoreRelation, partition, child, overwrite) =>
-        InsertIntoHiveTable(table, partition, planLater(child), overwrite)(hiveContext) :: Nil
-
-      case logical.CreateTableAsSelect(database, tableName, child) =>
-        val query = planLater(child)
-        CreateTableAsSelect(
-          database.get,
-          tableName,
-          query,
-          InsertIntoHiveTable(_: MetastoreRelation,
-            Map(),
-            query,
-            overwrite = true)(hiveContext)) :: Nil
+        execution.InsertIntoHiveTable(
+          table, partition, planLater(child), overwrite) :: Nil
+      case hive.InsertIntoHiveTable(table: MetastoreRelation, partition, child, overwrite) =>
+        execution.InsertIntoHiveTable(
+          table, partition, planLater(child), overwrite) :: Nil
       case _ => Nil
     }
   }
@@ -185,8 +195,9 @@ private[hive] trait HiveStrategies {
         // Filter out all predicates that only deal with partition keys, these are given to the
         // hive table scan operator to be used for partition pruning.
         val partitionKeyIds = AttributeSet(relation.partitionKeys)
-        val (pruningPredicates, otherPredicates) = predicates.partition {
-          _.references.subsetOf(partitionKeyIds)
+        val (pruningPredicates, otherPredicates) = predicates.partition { predicate =>
+          !predicate.references.isEmpty &&
+          predicate.references.subsetOf(partitionKeyIds)
         }
 
         pruneFilterProject(
@@ -199,23 +210,38 @@ private[hive] trait HiveStrategies {
     }
   }
 
-  case class HiveCommandStrategy(context: HiveContext) extends Strategy {
+  object HiveDDLStrategy extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case logical.NativeCommand(sql) => NativeCommand(sql, plan.output)(context) :: Nil
-
-      case hive.DropTable(tableName, ifExists) => execution.DropTable(tableName, ifExists) :: Nil
-
-      case hive.AddJar(path) => execution.AddJar(path) :: Nil
+      case CreateTableUsing(tableName, userSpecifiedSchema, provider, false, opts, allowExisting) =>
+        ExecutedCommand(
+          CreateMetastoreDataSource(
+            tableName, userSpecifiedSchema, provider, opts, allowExisting)) :: Nil
+
+      case CreateTableUsingAsSelect(tableName, provider, false, opts, allowExisting, query) =>
+        val logicalPlan = hiveContext.parseSql(query)
+        val cmd =
+          CreateMetastoreDataSourceAsSelect(tableName, provider, opts, allowExisting, logicalPlan)
+        ExecutedCommand(cmd) :: Nil
+
+      case CreateTableUsingAsLogicalPlan(tableName, provider, false, opts, allowExisting, query) =>
+        val cmd =
+          CreateMetastoreDataSourceAsSelect(tableName, provider, opts, allowExisting, query)
+        ExecutedCommand(cmd) :: Nil
 
-      case hive.AnalyzeTable(tableName) => execution.AnalyzeTable(tableName) :: Nil
+      case _ => Nil
+    }
+  }
 
-      case describe: logical.DescribeCommand =>
+  case class HiveCommandStrategy(context: HiveContext) extends Strategy {
+    def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+      case describe: DescribeCommand =>
         val resolvedTable = context.executePlan(describe.table).analyzed
         resolvedTable match {
           case t: MetastoreRelation =>
-            Seq(DescribeHiveTableCommand(t, describe.output, describe.isExtended)(context))
+            ExecutedCommand(
+              DescribeHiveTableCommand(t, describe.output, describe.isExtended)) :: Nil
           case o: LogicalPlan =>
-            Seq(DescribeCommand(planLater(o), describe.output)(context))
+            ExecutedCommand(RunnableDescribeCommand(planLater(o), describe.output)) :: Nil
         }
 
       case _ => Nil
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index fd4f65e488259..effaa5a443512 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -34,6 +34,7 @@ import org.apache.spark.SerializableWritable
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, RDD, UnionRDD}
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.types.DateUtils
 
 /**
  * A trait for subclasses that handle table scans.
@@ -57,10 +58,15 @@ class HadoopTableReader(
     @transient hiveExtraConf: HiveConf)
   extends TableReader {
 
-  // Choose the minimum number of splits. If mapred.map.tasks is set, then use that unless
-  // it is smaller than what Spark suggests.
-  private val _minSplitsPerRDD = math.max(
-    sc.hiveconf.getInt("mapred.map.tasks", 1), sc.sparkContext.defaultMinPartitions)
+  // Hadoop honors "mapred.map.tasks" as hint, but will ignore when mapred.job.tracker is "local".
+  // https://hadoop.apache.org/docs/r1.0.4/mapred-default.html
+  //
+  // In order keep consistency with Hive, we will let it be 0 in local mode also.
+  private val _minSplitsPerRDD = if (sc.sparkContext.isLocal) {
+    0 // will splitted based on block by default.
+  } else {
+    math.max(sc.hiveconf.getInt("mapred.map.tasks", 1), sc.sparkContext.defaultMinPartitions)
+  }
 
   // TODO: set aws s3 credentials.
 
@@ -138,7 +144,7 @@ class HadoopTableReader(
       filterOpt: Option[PathFilter]): RDD[Row] = {
     val hivePartitionRDDs = partitionToDeserializer.map { case (partition, partDeserializer) =>
       val partDesc = Utilities.getPartitionDesc(partition)
-      val partPath = partition.getPartitionPath
+      val partPath = HiveShim.getDataLocationPath(partition)
       val inputPathStr = applyFilterIfNeeded(partPath, filterOpt)
       val ifc = partDesc.getInputFileFormatClass
         .asInstanceOf[java.lang.Class[InputFormat[Writable, Writable]]]
@@ -290,8 +296,23 @@ private[hive] object HadoopTableReader extends HiveInspectors {
           (value: Any, row: MutableRow, ordinal: Int) => row.setFloat(ordinal, oi.get(value))
         case oi: DoubleObjectInspector =>
           (value: Any, row: MutableRow, ordinal: Int) => row.setDouble(ordinal, oi.get(value))
+        case oi: HiveVarcharObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) =>
+            row.setString(ordinal, oi.getPrimitiveJavaObject(value).getValue)
+        case oi: HiveDecimalObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) =>
+            row.update(ordinal, HiveShim.toCatalystDecimal(oi, value))
+        case oi: TimestampObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) =>
+            row.update(ordinal, oi.getPrimitiveJavaObject(value).clone())
+        case oi: DateObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) =>
+            row.update(ordinal, DateUtils.fromJavaDate(oi.getPrimitiveJavaObject(value)))
+        case oi: BinaryObjectInspector =>
+          (value: Any, row: MutableRow, ordinal: Int) =>
+            row.update(ordinal, oi.getPrimitiveJavaObject(value))
         case oi =>
-          (value: Any, row: MutableRow, ordinal: Int) => row(ordinal) = unwrapData(value, oi)
+          (value: Any, row: MutableRow, ordinal: Int) => row(ordinal) = unwrap(value, oi)
       }
     }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
index 9a9e2eda6bcd4..7c1d1133c3425 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
@@ -26,6 +26,7 @@ import scala.language.implicitConversions
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry
 import org.apache.hadoop.hive.ql.io.avro.{AvroContainerInputFormat, AvroContainerOutputFormat}
 import org.apache.hadoop.hive.ql.metadata.Table
+import org.apache.hadoop.hive.ql.processors._
 import org.apache.hadoop.hive.serde2.RegexSerDe
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 import org.apache.hadoop.hive.serde2.avro.AvroSerDe
@@ -33,10 +34,12 @@ import org.apache.hadoop.hive.serde2.avro.AvroSerDe
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.util.Utils
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.plans.logical.{CacheTableCommand, LogicalPlan, NativeCommand}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.execution.CacheTableCommand
 import org.apache.spark.sql.hive._
 import org.apache.spark.sql.SQLConf
+import org.apache.spark.sql.hive.execution.HiveNativeCommand
 
 /* Implicit conversions */
 import scala.collection.JavaConversions._
@@ -63,6 +66,9 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   // By clearing the port we force Spark to pick a new one.  This allows us to rerun tests
   // without restarting the JVM.
   System.clearProperty("spark.hostPort")
+  CommandProcessorFactory.clean(hiveconf)
+
+  hiveconf.set("hive.plan.serialization.format", "javaXML")
 
   lazy val warehousePath = getTempFilePath("sparkHiveWarehouse").getCanonicalPath
   lazy val metastorePath = getTempFilePath("sparkHiveMetastore").getCanonicalPath
@@ -95,11 +101,13 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   override def runSqlHive(sql: String): Seq[String] = super.runSqlHive(rewritePaths(sql))
 
   override def executePlan(plan: LogicalPlan): this.QueryExecution =
-    new this.QueryExecution { val logical = plan }
+    new this.QueryExecution(plan)
 
   /** Fewer partitions to speed up testing. */
-  override private[spark] def numShufflePartitions: Int =
-    getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt
+  protected[sql] override lazy val conf: SQLConf = new SQLConf {
+    override def numShufflePartitions: Int = getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt
+    override def dialect: String = getConf(SQLConf.DIALECT, "hiveql")
+  }
 
   /**
    * Returns the value of specified environmental variable as a [[java.io.File]] after checking
@@ -117,7 +125,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
     if (cmd.toUpperCase contains "LOAD DATA") {
       val testDataLocation =
         hiveDevHome.map(_.getCanonicalPath).getOrElse(inRepoTests.getCanonicalPath)
-      cmd.replaceAll("\\.\\.", testDataLocation)
+      cmd.replaceAll("\\.\\./\\.\\./", testDataLocation + "/")
     } else {
       cmd
     }
@@ -144,8 +152,8 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
 
   val describedTable = "DESCRIBE (\\w+)".r
 
-  protected[hive] class HiveQLQueryExecution(hql: String) extends this.QueryExecution {
-    lazy val logical = HiveQl.parseSql(hql)
+  protected[hive] class HiveQLQueryExecution(hql: String)
+    extends this.QueryExecution(HiveQl.parseSql(hql)) {
     def hiveExec() = runSqlHive(hql)
     override def toString = hql + "\n" + super.toString
   }
@@ -153,10 +161,11 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   /**
    * Override QueryExecution with special debug workflow.
    */
-  abstract class QueryExecution extends super.QueryExecution {
+  class QueryExecution(logicalPlan: LogicalPlan)
+    extends super.QueryExecution(logicalPlan) {
     override lazy val analyzed = {
       val describedTables = logical match {
-        case NativeCommand(describedTable(tbl)) => tbl :: Nil
+        case HiveNativeCommand(describedTable(tbl)) => tbl :: Nil
         case CacheTableCommand(tbl, _, _) => tbl :: Nil
         case _ => Nil
       }
@@ -164,7 +173,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
       // Make sure any test tables referenced are loaded.
       val referencedTables =
         describedTables ++
-        logical.collect { case UnresolvedRelation(databaseName, name, _) => name }
+        logical.collect { case UnresolvedRelation(tableIdent, _) => tableIdent.last }
       val referencedTestTables = referencedTables.filter(testTables.contains)
       logDebug(s"Query references test tables: ${referencedTestTables.mkString(", ")}")
       referencedTestTables.foreach(loadTestTable)
@@ -375,6 +384,9 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
    */
   protected val originalUdfs: JavaSet[String] = FunctionRegistry.getFunctionNames
 
+  // Database default may not exist in 0.13.1, create it if not exist
+  HiveShim.createDefaultDBIfNeeded(this)
+
   /**
    * Resets the test instance by deleting any tables that have been created.
    * TODO: also clear out UDFs, views, etc.
@@ -386,8 +398,9 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
         log.asInstanceOf[org.apache.log4j.Logger].setLevel(org.apache.log4j.Level.WARN)
       }
 
-      clearCache()
+      cacheManager.clearCache()
       loadedTables.clear()
+      catalog.cachedDataSourceTables.invalidateAll()
       catalog.client.getAllTables("default").foreach { t =>
         logDebug(s"Deleting table $t")
         val table = catalog.client.getTable("default", t)
@@ -412,10 +425,14 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
         FunctionRegistry.unregisterTemporaryUDF(udfName)
       }
 
+      // Some tests corrupt this value on purpose, which breaks the RESET call below.
+      hiveconf.set("fs.default.name", new File(".").toURI.toString)
       // It is important that we RESET first as broken hooks that might have been set could break
       // other sql exec here.
       runSqlHive("RESET")
       // For some reason, RESET does not reset the following variables...
+      // https://issues.apache.org/jira/browse/HIVE-9004
+      runSqlHive("set hive.table.parameters.default=")
       runSqlHive("set datanucleus.cache.collections=true")
       runSqlHive("set datanucleus.cache.collections.lazy=true")
       // Lots of tests fail if we do not change the partition whitelist from the default.
@@ -433,10 +450,6 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
     } catch {
       case e: Exception =>
         logError("FATAL ERROR: Failed to reset TestDB state.", e)
-        // At this point there is really no reason to continue, but the test framework traps exits.
-        // So instead we just pause forever so that at least the developer can see where things
-        // started to go wrong.
-        Thread.sleep(100000)
     }
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/api/java/JavaHiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/api/java/JavaHiveContext.scala
deleted file mode 100644
index a201d2349a2ef..0000000000000
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/api/java/JavaHiveContext.scala
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive.api.java
-
-import org.apache.spark.api.java.JavaSparkContext
-import org.apache.spark.sql.api.java.{JavaSQLContext, JavaSchemaRDD}
-import org.apache.spark.sql.SQLConf
-import org.apache.spark.sql.hive.{HiveContext, HiveQl}
-
-/**
- * The entry point for executing Spark SQL queries from a Java program.
- */
-class JavaHiveContext(sparkContext: JavaSparkContext) extends JavaSQLContext(sparkContext) {
-
-  override val sqlContext = new HiveContext(sparkContext)
-
-  override def sql(sqlText: String): JavaSchemaRDD = {
-    // TODO: Create a framework for registering parsers instead of just hardcoding if statements.
-    if (sqlContext.dialect == "sql") {
-      super.sql(sqlText)
-    } else if (sqlContext.dialect == "hiveql") {
-      new JavaSchemaRDD(sqlContext, HiveQl.parseSql(sqlText))
-    }  else {
-      sys.error(s"Unsupported SQL dialect: ${sqlContext.dialect}.  Try 'sql' or 'hiveql'")
-    }
-  }
-
-  /**
-    * DEPRECATED: Use sql(...) Instead
-    */
-  @Deprecated
-  def hql(hqlQuery: String): JavaSchemaRDD =
-    new JavaSchemaRDD(sqlContext, HiveQl.parseSql(hqlQuery))
-}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
index 3625708d03175..a547babcebfff 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
@@ -17,11 +17,13 @@
 
 package org.apache.spark.sql.hive.execution
 
+import org.apache.hadoop.hive.ql.plan.CreateTableDesc
+
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.expressions.Row
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.execution.{SparkPlan, Command, LeafNode}
+import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
+import org.apache.spark.sql.execution.RunnableCommand
 import org.apache.spark.sql.hive.HiveContext
 import org.apache.spark.sql.hive.MetastoreRelation
 
@@ -30,41 +32,48 @@ import org.apache.spark.sql.hive.MetastoreRelation
  * Create table and insert the query result into it.
  * @param database the database name of the new relation
  * @param tableName the table name of the new relation
- * @param insertIntoRelation function of creating the `InsertIntoHiveTable`
- *        by specifying the `MetaStoreRelation`, the data will be inserted into that table.
- * TODO Add more table creating properties,  e.g. SerDe, StorageHandler, in-memory cache etc.
+ * @param query the query whose result will be insert into the new relation
+ * @param allowExisting allow continue working if it's already exists, otherwise
+ *                      raise exception
+ * @param desc the CreateTableDesc, which may contains serde, storage handler etc.
+
  */
 @Experimental
 case class CreateTableAsSelect(
     database: String,
     tableName: String,
-    query: SparkPlan,
-    insertIntoRelation: MetastoreRelation => InsertIntoHiveTable)
-  extends LeafNode with Command {
+    query: LogicalPlan,
+    allowExisting: Boolean,
+    desc: Option[CreateTableDesc]) extends RunnableCommand {
 
-  def output = Seq.empty
+  override def run(sqlContext: SQLContext) = {
+    val hiveContext = sqlContext.asInstanceOf[HiveContext]
+    lazy val metastoreRelation: MetastoreRelation = {
+      // Create Hive Table
+      hiveContext.catalog.createTable(database, tableName, query.output, allowExisting, desc)
 
-  // A lazy computing of the metastoreRelation
-  private[this] lazy val metastoreRelation: MetastoreRelation = {
-    // Create the table
-    val sc = sqlContext.asInstanceOf[HiveContext]
-    sc.catalog.createTable(database, tableName, query.output, false)
-    // Get the Metastore Relation
-    sc.catalog.lookupRelation(Some(database), tableName, None) match {
-      case r: MetastoreRelation => r
+      // Get the Metastore Relation
+      hiveContext.catalog.lookupRelation(Seq(database, tableName), None) match {
+        case r: MetastoreRelation => r
+      }
+    }
+    // TODO ideally, we should get the output data ready first and then
+    // add the relation into catalog, just in case of failure occurs while data
+    // processing.
+    if (hiveContext.catalog.tableExists(Seq(database, tableName))) {
+      if (allowExisting) {
+        // table already exists, will do nothing, to keep consistent with Hive
+      } else {
+        throw
+          new org.apache.hadoop.hive.metastore.api.AlreadyExistsException(s"$database.$tableName")
+      }
+    } else {
+      hiveContext.executePlan(InsertIntoTable(metastoreRelation, Map(), query, true)).toRdd
     }
-  }
 
-  override protected lazy val sideEffectResult: Seq[Row] = {
-    insertIntoRelation(metastoreRelation).execute
     Seq.empty[Row]
   }
 
-  override def execute(): RDD[Row] = {
-    sideEffectResult
-    sparkContext.emptyRDD[Row]
-  }
-
   override def argString: String = {
     s"[Database:$database, TableName: $tableName, InsertIntoHiveTable]\n" + query.toString
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
index 106cede9788ec..bfacc51ef57ab 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
@@ -22,10 +22,11 @@ import scala.collection.JavaConversions._
 import org.apache.hadoop.hive.metastore.api.FieldSchema
 
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Row}
-import org.apache.spark.sql.execution.{Command, LeafNode}
+import org.apache.spark.sql.execution.{SparkPlan, RunnableCommand}
 import org.apache.spark.sql.hive.{HiveContext, MetastoreRelation}
+import org.apache.spark.sql.hive.HiveShim
+import org.apache.spark.sql.SQLContext
 
 /**
  * Implementation for "describe [extended] table".
@@ -35,20 +36,10 @@ import org.apache.spark.sql.hive.{HiveContext, MetastoreRelation}
 @DeveloperApi
 case class DescribeHiveTableCommand(
     table: MetastoreRelation,
-    output: Seq[Attribute],
-    isExtended: Boolean)(
-    @transient context: HiveContext)
-  extends LeafNode with Command {
+    override val output: Seq[Attribute],
+    isExtended: Boolean) extends RunnableCommand {
 
-  // Strings with the format like Hive. It is used for result comparison in our unit tests.
-  lazy val hiveString: Seq[String] = sideEffectResult.map {
-    case Row(name: String, dataType: String, comment) =>
-      Seq(name, dataType, Option(comment.asInstanceOf[String]).getOrElse("None"))
-        .map(s => String.format(s"%-20s", s))
-        .mkString("\t")
-  }
-
-  override protected lazy val sideEffectResult: Seq[Row] = {
+  override def run(sqlContext: SQLContext) = {
     // Trying to mimic the format of Hive's output. But not exactly the same.
     var results: Seq[(String, String, String)] = Nil
 
@@ -73,6 +64,4 @@ case class DescribeHiveTableCommand(
       Row(name, dataType, comment)
     }
   }
-
-  override def otherCopyArgs = context :: Nil
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala
new file mode 100644
index 0000000000000..781a2e9164c82
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Row}
+import org.apache.spark.sql.execution.RunnableCommand
+import org.apache.spark.sql.hive.HiveContext
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.types.StringType
+
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
+case class HiveNativeCommand(sql: String) extends RunnableCommand {
+
+  override def output =
+    Seq(AttributeReference("result", StringType, nullable = false)())
+
+  override def run(sqlContext: SQLContext) =
+    sqlContext.asInstanceOf[HiveContext].runSqlHive(sql).map(Row(_))
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
index 5b83b77d80a22..b56175fe76376 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
@@ -19,33 +19,30 @@ package org.apache.spark.sql.hive.execution
 
 import scala.collection.JavaConversions._
 
-import org.apache.hadoop.hive.common.`type`.{HiveDecimal, HiveVarchar}
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition}
 import org.apache.hadoop.hive.serde.serdeConstants
-import org.apache.hadoop.hive.serde2.ColumnProjectionUtils
 import org.apache.hadoop.hive.serde2.objectinspector._
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption
-import org.apache.hadoop.hive.serde2.objectinspector.primitive._
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.types.{BooleanType, DataType}
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.hive._
+import org.apache.spark.sql.types.{BooleanType, DataType}
 
 /**
  * :: DeveloperApi ::
  * The Hive table scan operator.  Column and partition pruning are both handled.
  *
- * @param attributes Attributes to be fetched from the Hive table.
+ * @param requestedAttributes Attributes to be fetched from the Hive table.
  * @param relation The Hive table be be scanned.
  * @param partitionPruningPred An optional partition pruning predicate for partitioned table.
  */
 @DeveloperApi
 case class HiveTableScan(
-    attributes: Seq[Attribute],
+    requestedAttributes: Seq[Attribute],
     relation: MetastoreRelation,
     partitionPruningPred: Option[Expression])(
     @transient val context: HiveContext)
@@ -54,6 +51,9 @@ case class HiveTableScan(
   require(partitionPruningPred.isEmpty || relation.hiveQlTable.isPartitioned,
     "Partition pruning predicates only supported for partitioned tables.")
 
+  // Retrieve the original attributes based on expression ID so that capitalization matches.
+  val attributes = requestedAttributes.map(relation.attributeMap)
+
   // Bind all partition key attribute references in the partition pruning predicate for later
   // evaluation.
   private[this] val boundPruningPred = partitionPruningPred.map { pred =>
@@ -69,6 +69,9 @@ case class HiveTableScan(
   @transient
   private[this] val hiveExtraConf = new HiveConf(context.hiveconf)
 
+  // append columns ids and names before broadcast
+  addColumnMetadataToConf(hiveExtraConf)
+
   @transient
   private[this] val hadoopReader = 
     new HadoopTableReader(attributes, relation, context, hiveExtraConf)
@@ -79,12 +82,9 @@ case class HiveTableScan(
 
   private def addColumnMetadataToConf(hiveConf: HiveConf) {
     // Specifies needed column IDs for those non-partitioning columns.
-    val neededColumnIDs =
-      attributes.map(a =>
-        relation.attributes.indexWhere(_.name == a.name): Integer).filter(index => index >= 0)
+    val neededColumnIDs = attributes.flatMap(relation.columnOrdinals.get).map(o => o: Integer)
 
-    ColumnProjectionUtils.appendReadColumnIDs(hiveConf, neededColumnIDs)
-    ColumnProjectionUtils.appendReadColumnNames(hiveConf, attributes.map(_.name))
+    HiveShim.appendReadColumns(hiveConf, neededColumnIDs, attributes.map(_.name))
 
     val tableDesc = relation.tableDesc
     val deserializer = tableDesc.getDeserializerClass.newInstance
@@ -107,8 +107,6 @@ case class HiveTableScan(
     hiveConf.set(serdeConstants.LIST_COLUMNS, relation.attributes.map(_.name).mkString(","))
   }
 
-  addColumnMetadataToConf(hiveExtraConf)
-
   /**
    * Prunes partitions not involve the query plan.
    *
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index f0785d8882636..91af35f0965c0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -17,26 +17,28 @@
 
 package org.apache.spark.sql.hive.execution
 
+import java.util
+
 import scala.collection.JavaConversions._
 
-import org.apache.hadoop.hive.common.`type`.{HiveDecimal, HiveVarchar}
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hadoop.hive.metastore.MetaStoreUtils
 import org.apache.hadoop.hive.ql.metadata.Hive
-import org.apache.hadoop.hive.ql.plan.{FileSinkDesc, TableDesc}
+import org.apache.hadoop.hive.ql.plan.TableDesc
 import org.apache.hadoop.hive.ql.{Context, ErrorMsg}
 import org.apache.hadoop.hive.serde2.Serializer
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption
 import org.apache.hadoop.hive.serde2.objectinspector._
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.{JavaHiveDecimalObjectInspector, JavaHiveVarcharObjectInspector}
 import org.apache.hadoop.mapred.{FileOutputCommitter, FileOutputFormat, JobConf}
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions.Row
-import org.apache.spark.sql.execution.{Command, SparkPlan, UnaryNode}
+import org.apache.spark.sql.execution.{UnaryNode, SparkPlan}
 import org.apache.spark.sql.hive._
+import org.apache.spark.sql.hive.{ ShimFileSinkDesc => FileSinkDesc}
+import org.apache.spark.sql.hive.HiveShim._
 import org.apache.spark.{SerializableWritable, SparkException, TaskContext}
 
 /**
@@ -47,10 +49,9 @@ case class InsertIntoHiveTable(
     table: MetastoreRelation,
     partition: Map[String, Option[String]],
     child: SparkPlan,
-    overwrite: Boolean)
-    (@transient sc: HiveContext)
-  extends UnaryNode with Command {
+    overwrite: Boolean) extends UnaryNode with HiveInspectors {
 
+  @transient val sc: HiveContext = sqlContext.asInstanceOf[HiveContext]
   @transient lazy val outputClass = newSerializer(table.tableDesc).getSerializedClass
   @transient private lazy val hiveContext = new Context(sc.hiveconf)
   @transient private lazy val db = Hive.get(sc.hiveconf)
@@ -61,46 +62,8 @@ case class InsertIntoHiveTable(
     serializer
   }
 
-  override def otherCopyArgs = sc :: Nil
-
   def output = child.output
 
-  /**
-   * Wraps with Hive types based on object inspector.
-   * TODO: Consolidate all hive OI/data interface code.
-   */
-  protected def wrapperFor(oi: ObjectInspector): Any => Any = oi match {
-    case _: JavaHiveVarcharObjectInspector =>
-      (o: Any) => new HiveVarchar(o.asInstanceOf[String], o.asInstanceOf[String].size)
-
-    case _: JavaHiveDecimalObjectInspector =>
-      (o: Any) => new HiveDecimal(o.asInstanceOf[BigDecimal].underlying())
-
-    case soi: StandardStructObjectInspector =>
-      val wrappers = soi.getAllStructFieldRefs.map(ref => wrapperFor(ref.getFieldObjectInspector))
-      (o: Any) => {
-        val struct = soi.create()
-        (soi.getAllStructFieldRefs, wrappers, o.asInstanceOf[Row]).zipped.foreach {
-          (field, wrapper, data) => soi.setStructFieldData(struct, field, wrapper(data))
-        }
-        struct
-      }
-
-    case loi: ListObjectInspector =>
-      val wrapper = wrapperFor(loi.getListElementObjectInspector)
-      (o: Any) => seqAsJavaList(o.asInstanceOf[Seq[_]].map(wrapper))
-
-    case moi: MapObjectInspector =>
-      val keyWrapper = wrapperFor(moi.getMapKeyObjectInspector)
-      val valueWrapper = wrapperFor(moi.getMapValueObjectInspector)
-      (o: Any) => mapAsJavaMap(o.asInstanceOf[Map[_, _]].map { case (key, value) =>
-        keyWrapper(key) -> valueWrapper(value)
-      })
-
-    case _ =>
-      identity[Any]
-  }
-
   def saveAsHiveFile(
       rdd: RDD[Row],
       valueClass: Class[_],
@@ -137,10 +100,7 @@ case class InsertIntoHiveTable(
       val wrappers = fieldOIs.map(wrapperFor)
       val outputData = new Array[Any](fieldOIs.length)
 
-      // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
-      // around by taking a mod. We expect that no task will be attempted 2 billion times.
-      val attemptNumber = (context.attemptId % Int.MaxValue).toInt
-      writerContainer.executorSideSetup(context.stageId, context.partitionId, attemptNumber)
+      writerContainer.executorSideSetup(context.stageId, context.partitionId, context.attemptNumber)
 
       iterator.foreach { row =>
         var i = 0
@@ -165,12 +125,12 @@ case class InsertIntoHiveTable(
    *
    * Note: this is run once and then kept to avoid double insertions.
    */
-  override protected[sql] lazy val sideEffectResult: Seq[Row] = {
+  protected[sql] lazy val sideEffectResult: Seq[Row] = {
     // Have to pass the TableDesc object to RDD.mapPartitions and then instantiate new serializer
     // instances within the closure, since Serializer is not serializable while TableDesc is.
     val tableDesc = table.tableDesc
     val tableLocation = table.hiveQlTable.getDataLocation
-    val tmpLocation = hiveContext.getExternalTmpFileURI(tableLocation)
+    val tmpLocation = HiveShim.getExternalTmpPath(hiveContext, tableLocation)
     val fileSinkConf = new FileSinkDesc(tmpLocation.toString, tableDesc, false)
     val isCompressed = sc.hiveconf.getBoolean(
       ConfVars.COMPRESSRESULT.varname, ConfVars.COMPRESSRESULT.defaultBoolVal)
@@ -236,6 +196,13 @@ case class InsertIntoHiveTable(
     // holdDDLTime will be true when TOK_HOLD_DDLTIME presents in the query as a hint.
     val holdDDLTime = false
     if (partition.nonEmpty) {
+
+      // loadPartition call orders directories created on the iteration order of the this map
+      val orderedPartitionSpec = new util.LinkedHashMap[String,String]()
+      table.hiveQlTable.getPartCols().foreach{
+        entry=>
+          orderedPartitionSpec.put(entry.getName,partitionSpec.get(entry.getName).getOrElse(""))
+      }
       val partVals = MetaStoreUtils.getPvals(table.hiveQlTable.getPartCols, partitionSpec)
       db.validatePartitionNameCharacters(partVals)
       // inheritTableSpecs is set to true. It should be set to false for a IMPORT query
@@ -247,7 +214,7 @@ case class InsertIntoHiveTable(
         db.loadDynamicPartitions(
           outputPath,
           qualifiedTableName,
-          partitionSpec,
+          orderedPartitionSpec,
           overwrite,
           numDynamicPartitions,
           holdDDLTime,
@@ -257,7 +224,7 @@ case class InsertIntoHiveTable(
         db.loadPartition(
           outputPath,
           qualifiedTableName,
-          partitionSpec,
+          orderedPartitionSpec,
           overwrite,
           holdDDLTime,
           inheritTableSpecs,
@@ -272,7 +239,7 @@ case class InsertIntoHiveTable(
     }
 
     // Invalidate the cache.
-    sqlContext.invalidateCache(table)
+    sqlContext.cacheManager.invalidateCache(table)
 
     // It would be nice to just return the childRdd unchanged so insert operations could be chained,
     // however for now we return an empty list to simplify compatibility checks with hive, which
@@ -280,4 +247,8 @@ case class InsertIntoHiveTable(
     // TODO: implement hive compatibility as rules.
     Seq.empty[Row]
   }
+
+  override def executeCollect(): Array[Row] = sideEffectResult.toArray
+
+  override def execute(): RDD[Row] = sqlContext.sparkContext.parallelize(sideEffectResult, 1)
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/NativeCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/NativeCommand.scala
deleted file mode 100644
index 6930c2babd117..0000000000000
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/NativeCommand.scala
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive.execution
-
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRow, Row}
-import org.apache.spark.sql.execution.{Command, LeafNode}
-import org.apache.spark.sql.hive.HiveContext
-
-/**
- * :: DeveloperApi ::
- */
-@DeveloperApi
-case class NativeCommand(
-    sql: String, output: Seq[Attribute])(
-    @transient context: HiveContext)
-  extends LeafNode with Command {
-
-  override protected lazy val sideEffectResult: Seq[Row] = context.runSqlHive(sql).map(Row(_))
-
-  override def otherCopyArgs = context :: Nil
-}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
index 0c8f676e9c5c8..c54fbb6e24690 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
@@ -18,11 +18,26 @@
 package org.apache.spark.sql.hive.execution
 
 import java.io.{BufferedReader, InputStreamReader}
+import java.io.{DataInputStream, DataOutputStream, EOFException}
+import java.util.Properties
+
+import org.apache.hadoop.hive.serde.serdeConstants
+import org.apache.hadoop.hive.serde2.AbstractSerDe
+import org.apache.hadoop.hive.serde2.Serializer
+import org.apache.hadoop.hive.serde2.Deserializer
+import org.apache.hadoop.hive.serde2.objectinspector._
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.ScriptInputOutputSchema
 import org.apache.spark.sql.execution._
-import org.apache.spark.sql.hive.HiveContext
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.hive.{HiveContext, HiveInspectors}
+import org.apache.spark.sql.hive.HiveShim._
+import org.apache.spark.util.Utils
+
 
 /* Implicit conversions */
 import scala.collection.JavaConversions._
@@ -40,7 +55,8 @@ case class ScriptTransformation(
     input: Seq[Expression],
     script: String,
     output: Seq[Attribute],
-    child: SparkPlan)(@transient sc: HiveContext)
+    child: SparkPlan,
+    ioschema: HiveScriptIOSchema)(@transient sc: HiveContext)
   extends UnaryNode {
 
   override def otherCopyArgs = sc :: Nil
@@ -53,28 +69,202 @@ case class ScriptTransformation(
       val inputStream = proc.getInputStream
       val outputStream = proc.getOutputStream
       val reader = new BufferedReader(new InputStreamReader(inputStream))
+ 
+      val (outputSerde, outputSoi) = ioschema.initOutputSerDe(output)
+
+      val iterator: Iterator[Row] = new Iterator[Row] with HiveInspectors {
+        var cacheRow: Row = null
+        var curLine: String = null
+        var eof: Boolean = false
+
+        override def hasNext: Boolean = {
+          if (outputSerde == null) {
+            if (curLine == null) {
+              curLine = reader.readLine()
+              curLine != null
+            } else {
+              true
+            }
+          } else {
+            !eof
+          }
+        }
+
+        def deserialize(): Row = {
+          if (cacheRow != null) return cacheRow
+
+          val mutableRow = new SpecificMutableRow(output.map(_.dataType))
+          try {
+            val dataInputStream = new DataInputStream(inputStream)
+            val writable = outputSerde.getSerializedClass().newInstance
+            writable.readFields(dataInputStream)
+
+            val raw = outputSerde.deserialize(writable)
+            val dataList = outputSoi.getStructFieldsDataAsList(raw)
+            val fieldList = outputSoi.getAllStructFieldRefs()
+            
+            var i = 0
+            dataList.foreach( element => {
+              if (element == null) {
+                mutableRow.setNullAt(i)
+              } else {
+                mutableRow(i) = unwrap(element, fieldList(i).getFieldObjectInspector)
+              }
+              i += 1
+            })
+            return mutableRow
+          } catch {
+            case e: EOFException =>
+              eof = true
+              return null
+          }
+        }
 
-      // TODO: This should be exposed as an iterator instead of reading in all the data at once.
-      val outputLines = collection.mutable.ArrayBuffer[Row]()
-      val readerThread = new Thread("Transform OutputReader") {
-        override def run() {
-          var curLine = reader.readLine()
-          while (curLine != null) {
-            // TODO: Use SerDe
-            outputLines += new GenericRow(curLine.split("\t").asInstanceOf[Array[Any]])
+        override def next(): Row = {
+          if (!hasNext) {
+            throw new NoSuchElementException
+          }
+ 
+          if (outputSerde == null) {
+            val prevLine = curLine
             curLine = reader.readLine()
+ 
+            if (!ioschema.schemaLess) {
+              new GenericRow(
+                prevLine.split(ioschema.outputRowFormatMap("TOK_TABLEROWFORMATFIELD"))
+                .asInstanceOf[Array[Any]])
+            } else {
+              new GenericRow(
+                prevLine.split(ioschema.outputRowFormatMap("TOK_TABLEROWFORMATFIELD"), 2)
+                .asInstanceOf[Array[Any]])
+            }
+          } else {
+            val ret = deserialize()
+            if (!eof) {
+              cacheRow = null
+              cacheRow = deserialize()
+            }
+            ret
           }
         }
       }
-      readerThread.start()
+
+      val (inputSerde, inputSoi) = ioschema.initInputSerDe(input)
+      val dataOutputStream = new DataOutputStream(outputStream)
       val outputProjection = new InterpretedProjection(input, child.output)
+
       iter
         .map(outputProjection)
-        // TODO: Use SerDe
-        .map(_.mkString("", "\t", "\n").getBytes("utf-8")).foreach(outputStream.write)
+        .foreach { row =>
+          if (inputSerde == null) {
+            val data = row.mkString("", ioschema.inputRowFormatMap("TOK_TABLEROWFORMATFIELD"),
+            ioschema.inputRowFormatMap("TOK_TABLEROWFORMATLINES")).getBytes("utf-8")
+ 
+            outputStream.write(data)
+          } else {
+            val writable = inputSerde.serialize(row.asInstanceOf[GenericRow].values, inputSoi)
+            prepareWritable(writable).write(dataOutputStream)
+          }
+        }
       outputStream.close()
-      readerThread.join()
-      outputLines.toIterator
+      iterator
+    }
+  }
+}
+
+/**
+ * The wrapper class of Hive input and output schema properties
+ */
+case class HiveScriptIOSchema (
+    inputRowFormat: Seq[(String, String)],
+    outputRowFormat: Seq[(String, String)],
+    inputSerdeClass: String,
+    outputSerdeClass: String,
+    inputSerdeProps: Seq[(String, String)],
+    outputSerdeProps: Seq[(String, String)],
+    schemaLess: Boolean) extends ScriptInputOutputSchema with HiveInspectors {
+
+  val defaultFormat = Map(("TOK_TABLEROWFORMATFIELD", "\t"),
+                          ("TOK_TABLEROWFORMATLINES", "\n"))
+
+  val inputRowFormatMap = inputRowFormat.toMap.withDefault((k) => defaultFormat(k))
+  val outputRowFormatMap = outputRowFormat.toMap.withDefault((k) => defaultFormat(k))
+
+  
+  def initInputSerDe(input: Seq[Expression]): (AbstractSerDe, ObjectInspector) = {
+    val (columns, columnTypes) = parseAttrs(input)
+    val serde = initSerDe(inputSerdeClass, columns, columnTypes, inputSerdeProps)
+    (serde, initInputSoi(serde, columns, columnTypes))
+  }
+
+  def initOutputSerDe(output: Seq[Attribute]): (AbstractSerDe, StructObjectInspector) = {
+    val (columns, columnTypes) = parseAttrs(output)
+    val serde = initSerDe(outputSerdeClass, columns, columnTypes, outputSerdeProps)
+    (serde, initOutputputSoi(serde))
+  }
+
+  def parseAttrs(attrs: Seq[Expression]): (Seq[String], Seq[DataType]) = {
+                                                
+    val columns = attrs.map {
+      case aref: AttributeReference => aref.name
+      case e: NamedExpression => e.name
+      case _ => null
+    }
+ 
+    val columnTypes = attrs.map {
+      case aref: AttributeReference => aref.dataType
+      case e: NamedExpression => e.dataType
+      case _ =>  null
+    }
+
+    (columns, columnTypes)
+  }
+ 
+  def initSerDe(serdeClassName: String, columns: Seq[String],
+    columnTypes: Seq[DataType], serdeProps: Seq[(String, String)]): AbstractSerDe = {
+
+    val serde: AbstractSerDe = if (serdeClassName != "") {
+      val trimed_class = serdeClassName.split("'")(1)
+      Utils.classForName(trimed_class)
+        .newInstance.asInstanceOf[AbstractSerDe]
+    } else {
+      null
+    }
+
+    if (serde != null) {
+      val columnTypesNames = columnTypes.map(_.toTypeInfo.getTypeName()).mkString(",")
+
+      var propsMap = serdeProps.map(kv => {
+        (kv._1.split("'")(1), kv._2.split("'")(1))
+      }).toMap + (serdeConstants.LIST_COLUMNS -> columns.mkString(","))
+      propsMap = propsMap + (serdeConstants.LIST_COLUMN_TYPES -> columnTypesNames)
+    
+      val properties = new Properties()
+      properties.putAll(propsMap)
+      serde.initialize(null, properties)
+    }
+
+    serde
+  }
+
+  def initInputSoi(inputSerde: AbstractSerDe, columns: Seq[String], columnTypes: Seq[DataType])
+    : ObjectInspector = {
+
+    if (inputSerde != null) {
+      val fieldObjectInspectors = columnTypes.map(toInspector(_))
+      ObjectInspectorFactory
+        .getStandardStructObjectInspector(columns, fieldObjectInspectors)
+        .asInstanceOf[ObjectInspector]
+    } else {
+      null
+    }
+  }
+ 
+  def initOutputputSoi(outputSerde: AbstractSerDe): StructObjectInspector = {
+    if (outputSerde != null) {
+      outputSerde.getObjectInspector().asInstanceOf[StructObjectInspector]
+    } else {
+      null
     }
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index 0fc674af31885..95dcaccefdc54 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -18,10 +18,13 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.sources.ResolvedDataSource
+import org.apache.spark.sql.{DataFrame, SQLContext}
 import org.apache.spark.sql.catalyst.expressions.Row
-import org.apache.spark.sql.execution.{Command, LeafNode}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.RunnableCommand
 import org.apache.spark.sql.hive.HiveContext
+import org.apache.spark.sql.types.StructType
 
 /**
  * :: DeveloperApi ::
@@ -32,13 +35,10 @@ import org.apache.spark.sql.hive.HiveContext
  * in the Hive metastore.
  */
 @DeveloperApi
-case class AnalyzeTable(tableName: String) extends LeafNode with Command {
-  def hiveContext = sqlContext.asInstanceOf[HiveContext]
+case class AnalyzeTable(tableName: String) extends RunnableCommand {
 
-  def output = Seq.empty
-
-  override protected lazy val sideEffectResult: Seq[Row] = {
-    hiveContext.analyze(tableName)
+  override def run(sqlContext: SQLContext) = {
+    sqlContext.asInstanceOf[HiveContext].analyze(tableName)
     Seq.empty[Row]
   }
 }
@@ -48,15 +48,26 @@ case class AnalyzeTable(tableName: String) extends LeafNode with Command {
  * Drops a table from the metastore and removes it if it is cached.
  */
 @DeveloperApi
-case class DropTable(tableName: String, ifExists: Boolean) extends LeafNode with Command {
-  def hiveContext = sqlContext.asInstanceOf[HiveContext]
-
-  def output = Seq.empty
+case class DropTable(
+    tableName: String,
+    ifExists: Boolean) extends RunnableCommand {
 
-  override protected lazy val sideEffectResult: Seq[Row] = {
+  override def run(sqlContext: SQLContext) = {
+    val hiveContext = sqlContext.asInstanceOf[HiveContext]
     val ifExistsClause = if (ifExists) "IF EXISTS " else ""
+    try {
+      hiveContext.cacheManager.tryUncacheQuery(hiveContext.table(tableName))
+    } catch {
+      // This table's metadata is not in
+      case _: org.apache.hadoop.hive.ql.metadata.InvalidTableException =>
+      // Other exceptions can be caused by users providing wrong parameters in OPTIONS
+      // (e.g. invalid paths). We catch it and log a warning message.
+      // Users should be able to drop such kinds of tables regardless if there is an exception.
+      case e: Exception => log.warn(s"${e.getMessage}")
+    }
+    hiveContext.invalidateTable(tableName)
     hiveContext.runSqlHive(s"DROP TABLE $ifExistsClause$tableName")
-    hiveContext.catalog.unregisterTable(None, tableName)
+    hiveContext.catalog.unregisterTable(Seq(tableName))
     Seq.empty[Row]
   }
 }
@@ -65,14 +76,106 @@ case class DropTable(tableName: String, ifExists: Boolean) extends LeafNode with
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class AddJar(path: String) extends LeafNode with Command {
-  def hiveContext = sqlContext.asInstanceOf[HiveContext]
+case class AddJar(path: String) extends RunnableCommand {
 
-  override def output = Seq.empty
-
-  override protected lazy val sideEffectResult: Seq[Row] = {
+  override def run(sqlContext: SQLContext) = {
+    val hiveContext = sqlContext.asInstanceOf[HiveContext]
     hiveContext.runSqlHive(s"ADD JAR $path")
     hiveContext.sparkContext.addJar(path)
     Seq.empty[Row]
   }
 }
+
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
+case class AddFile(path: String) extends RunnableCommand {
+
+  override def run(sqlContext: SQLContext) = {
+    val hiveContext = sqlContext.asInstanceOf[HiveContext]
+    hiveContext.runSqlHive(s"ADD FILE $path")
+    hiveContext.sparkContext.addFile(path)
+    Seq.empty[Row]
+  }
+}
+
+case class CreateMetastoreDataSource(
+    tableName: String,
+    userSpecifiedSchema: Option[StructType],
+    provider: String,
+    options: Map[String, String],
+    allowExisting: Boolean) extends RunnableCommand {
+
+  override def run(sqlContext: SQLContext): Seq[Row] = {
+    val hiveContext = sqlContext.asInstanceOf[HiveContext]
+
+    if (hiveContext.catalog.tableExists(tableName :: Nil)) {
+      if (allowExisting) {
+        return Seq.empty[Row]
+      } else {
+        sys.error(s"Table $tableName already exists.")
+      }
+    }
+
+    var isExternal = true
+    val optionsWithPath =
+      if (!options.contains("path")) {
+        isExternal = false
+        options + ("path" -> hiveContext.catalog.hiveDefaultTableFilePath(tableName))
+      } else {
+        options
+      }
+
+    hiveContext.catalog.createDataSourceTable(
+      tableName,
+      userSpecifiedSchema,
+      provider,
+      optionsWithPath,
+      isExternal)
+
+    Seq.empty[Row]
+  }
+}
+
+case class CreateMetastoreDataSourceAsSelect(
+    tableName: String,
+    provider: String,
+    options: Map[String, String],
+    allowExisting: Boolean,
+    query: LogicalPlan) extends RunnableCommand {
+
+  override def run(sqlContext: SQLContext): Seq[Row] = {
+    val hiveContext = sqlContext.asInstanceOf[HiveContext]
+
+    if (hiveContext.catalog.tableExists(tableName :: Nil)) {
+      if (allowExisting) {
+        return Seq.empty[Row]
+      } else {
+        sys.error(s"Table $tableName already exists.")
+      }
+    }
+
+    val df = DataFrame(hiveContext, query)
+    var isExternal = true
+    val optionsWithPath =
+      if (!options.contains("path")) {
+        isExternal = false
+        options + ("path" -> hiveContext.catalog.hiveDefaultTableFilePath(tableName))
+      } else {
+        options
+      }
+
+    // Create the relation based on the data of df.
+    ResolvedDataSource(sqlContext, provider, optionsWithPath, df)
+
+    hiveContext.catalog.createDataSourceTable(
+      tableName,
+      None,
+      provider,
+      optionsWithPath,
+      isExternal)
+
+    Seq.empty[Row]
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
index 68f93f247d9bb..34c21c11761ae 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
@@ -21,17 +21,23 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ConversionHelper
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.hadoop.hive.common.`type`.HiveDecimal
+import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, ConstantObjectInspector}
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory
 import org.apache.hadoop.hive.ql.exec.{UDF, UDAF}
 import org.apache.hadoop.hive.ql.exec.{FunctionInfo, FunctionRegistry}
 import org.apache.hadoop.hive.ql.udf.{UDFType => HiveUDFType}
 import org.apache.hadoop.hive.ql.udf.generic._
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF._
 
 import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.types._
-import org.apache.spark.util.Utils.getContextOrSparkClassLoader
+import org.apache.spark.sql.catalyst.plans.logical.{Generate, Project, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.catalyst.analysis.MultiAlias
+import org.apache.spark.sql.catalyst.errors.TreeNodeException
 
 /* Implicit conversions */
 import scala.collection.JavaConversions._
@@ -51,53 +57,46 @@ private[hive] abstract class HiveFunctionRegistry
     val functionClassName = functionInfo.getFunctionClass.getName
 
     if (classOf[UDF].isAssignableFrom(functionInfo.getFunctionClass)) {
-      HiveSimpleUdf(functionClassName, children)
+      HiveSimpleUdf(new HiveFunctionWrapper(functionClassName), children)
     } else if (classOf[GenericUDF].isAssignableFrom(functionInfo.getFunctionClass)) {
-      HiveGenericUdf(functionClassName, children)
+      HiveGenericUdf(new HiveFunctionWrapper(functionClassName), children)
     } else if (
          classOf[AbstractGenericUDAFResolver].isAssignableFrom(functionInfo.getFunctionClass)) {
-      HiveGenericUdaf(functionClassName, children)
+      HiveGenericUdaf(new HiveFunctionWrapper(functionClassName), children)
     } else if (classOf[UDAF].isAssignableFrom(functionInfo.getFunctionClass)) {
-      HiveUdaf(functionClassName, children)
+      HiveUdaf(new HiveFunctionWrapper(functionClassName), children)
     } else if (classOf[GenericUDTF].isAssignableFrom(functionInfo.getFunctionClass)) {
-      HiveGenericUdtf(functionClassName, Nil, children)
+      HiveGenericUdtf(new HiveFunctionWrapper(functionClassName), Nil, children)
     } else {
       sys.error(s"No handler for udf ${functionInfo.getFunctionClass}")
     }
   }
 }
 
-private[hive] trait HiveFunctionFactory {
-  val functionClassName: String
-
-  def createFunction[UDFType]() =
-    getContextOrSparkClassLoader.loadClass(functionClassName).newInstance.asInstanceOf[UDFType]
-}
-
-private[hive] abstract class HiveUdf extends Expression with Logging with HiveFunctionFactory {
-  self: Product =>
-
-  type UDFType
+private[hive] case class HiveSimpleUdf(funcWrapper: HiveFunctionWrapper, children: Seq[Expression])
+  extends Expression with HiveInspectors with Logging {
   type EvaluatedType = Any
+  type UDFType = UDF
 
   def nullable = true
 
-  lazy val function = createFunction[UDFType]()
-
-  override def toString = s"$nodeName#$functionClassName(${children.mkString(",")})"
-}
-
-private[hive] case class HiveSimpleUdf(functionClassName: String, children: Seq[Expression])
-  extends HiveUdf with HiveInspectors {
-
-  type UDFType = UDF
+  @transient
+  lazy val function = funcWrapper.createFunction[UDFType]()
 
   @transient
   protected lazy val method =
     function.getResolver.getEvalMethod(children.map(_.dataType.toTypeInfo))
 
   @transient
-  protected lazy val arguments = children.map(c => toInspector(c.dataType)).toArray
+  protected lazy val arguments = children.map(toInspector).toArray
+
+  @transient
+  protected lazy val isUDFDeterministic = {
+    val udfType = function.getClass().getAnnotation(classOf[HiveUDFType])
+    udfType != null && udfType.deterministic()
+  }
+
+  override def foldable = isUDFDeterministic && children.forall(_.foldable)
 
   // Create parameter converters
   @transient
@@ -106,27 +105,52 @@ private[hive] case class HiveSimpleUdf(functionClassName: String, children: Seq[
   @transient
   lazy val dataType = javaClassToDataType(method.getReturnType)
 
+  @transient
+  lazy val returnInspector = ObjectInspectorFactory.getReflectionObjectInspector(
+    method.getGenericReturnType(), ObjectInspectorOptions.JAVA)
+
+  @transient
+  protected lazy val cached = new Array[AnyRef](children.length)
+
   // TODO: Finish input output types.
   override def eval(input: Row): Any = {
-    val evaluatedChildren = children.map(c => wrap(c.eval(input)))
+    unwrap(
+      FunctionRegistry.invoke(method, function, conversionHelper
+        .convertIfNecessary(wrap(children.map(c => c.eval(input)), arguments, cached): _*): _*),
+      returnInspector)
+  }
+
+  override def toString = s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
+}
 
-    unwrap(FunctionRegistry.invoke(method, function, conversionHelper
-      .convertIfNecessary(evaluatedChildren: _*): _*))
+// Adapter from Catalyst ExpressionResult to Hive DeferredObject
+private[hive] class DeferredObjectAdapter(oi: ObjectInspector)
+  extends DeferredObject with HiveInspectors {
+  private var func: () => Any = _
+  def set(func: () => Any) {
+    this.func = func
   }
+  override def prepare(i: Int) = {}
+  override def get(): AnyRef = wrap(func(), oi)
 }
 
-private[hive] case class HiveGenericUdf(functionClassName: String, children: Seq[Expression])
-  extends HiveUdf with HiveInspectors {
+private[hive] case class HiveGenericUdf(funcWrapper: HiveFunctionWrapper, children: Seq[Expression])
+  extends Expression with HiveInspectors with Logging {
+  type UDFType = GenericUDF
+  type EvaluatedType = Any
 
-  import org.apache.hadoop.hive.ql.udf.generic.GenericUDF._
+  def nullable = true
 
-  type UDFType = GenericUDF
+  @transient
+  lazy val function = funcWrapper.createFunction[UDFType]()
 
   @transient
-  protected lazy val argumentInspectors = children.map(_.dataType).map(toInspector)
+  protected lazy val argumentInspectors = children.map(toInspector)
 
   @transient
-  protected lazy val returnInspector = function.initialize(argumentInspectors.toArray)
+  protected lazy val returnInspector = {
+    function.initializeAndFoldConstants(argumentInspectors.toArray)
+  }
 
   @transient
   protected lazy val isUDFDeterministic = {
@@ -134,96 +158,92 @@ private[hive] case class HiveGenericUdf(functionClassName: String, children: Seq
     (udfType != null && udfType.deterministic())
   }
 
-  override def foldable = {
-    isUDFDeterministic && children.foldLeft(true)((prev, n) => prev && n.foldable)
-  }
-
-  protected lazy val deferedObjects = Array.fill[DeferredObject](children.length)({
-    new DeferredObjectAdapter
-  })
+  override def foldable =
+    isUDFDeterministic && returnInspector.isInstanceOf[ConstantObjectInspector]
 
-  // Adapter from Catalyst ExpressionResult to Hive DeferredObject
-  class DeferredObjectAdapter extends DeferredObject {
-    private var func: () => Any = _
-    def set(func: () => Any) {
-      this.func = func
-    }
-    override def prepare(i: Int) = {}
-    override def get(): AnyRef = wrap(func())
-  }
+  @transient
+  protected lazy val deferedObjects =
+    argumentInspectors.map(new DeferredObjectAdapter(_)).toArray[DeferredObject]
 
   lazy val dataType: DataType = inspectorToDataType(returnInspector)
 
   override def eval(input: Row): Any = {
     returnInspector // Make sure initialized.
+
     var i = 0
     while (i < children.length) {
       val idx = i
-      deferedObjects(i).asInstanceOf[DeferredObjectAdapter].set(() => {children(idx).eval(input)})
+      deferedObjects(i).asInstanceOf[DeferredObjectAdapter].set(
+        () => {
+          children(idx).eval(input)
+        })
       i += 1
     }
-    unwrap(function.evaluate(deferedObjects))
+    unwrap(function.evaluate(deferedObjects), returnInspector)
   }
+
+  override def toString = s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
 }
 
 private[hive] case class HiveGenericUdaf(
-    functionClassName: String,
+    funcWrapper: HiveFunctionWrapper,
     children: Seq[Expression]) extends AggregateExpression
-  with HiveInspectors
-  with HiveFunctionFactory {
+  with HiveInspectors {
 
   type UDFType = AbstractGenericUDAFResolver
 
   @transient
-  protected lazy val resolver: AbstractGenericUDAFResolver = createFunction()
+  protected lazy val resolver: AbstractGenericUDAFResolver = funcWrapper.createFunction()
 
   @transient
   protected lazy val objectInspector  = {
-    resolver.getEvaluator(children.map(_.dataType.toTypeInfo).toArray)
+    val parameterInfo = new SimpleGenericUDAFParameterInfo(inspectors.toArray, false, false)
+    resolver.getEvaluator(parameterInfo)
       .init(GenericUDAFEvaluator.Mode.COMPLETE, inspectors.toArray)
   }
 
   @transient
-  protected lazy val inspectors = children.map(_.dataType).map(toInspector)
+  protected lazy val inspectors = children.map(toInspector)
 
   def dataType: DataType = inspectorToDataType(objectInspector)
 
   def nullable: Boolean = true
 
-  override def toString = s"$nodeName#$functionClassName(${children.mkString(",")})"
+  override def toString = s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
 
-  def newInstance() = new HiveUdafFunction(functionClassName, children, this)
+  def newInstance() = new HiveUdafFunction(funcWrapper, children, this)
 }
 
 /** It is used as a wrapper for the hive functions which uses UDAF interface */
 private[hive] case class HiveUdaf(
-    functionClassName: String,
+    funcWrapper: HiveFunctionWrapper,
     children: Seq[Expression]) extends AggregateExpression
-  with HiveInspectors
-  with HiveFunctionFactory {
+  with HiveInspectors {
 
   type UDFType = UDAF
 
   @transient
-  protected lazy val resolver: AbstractGenericUDAFResolver = new GenericUDAFBridge(createFunction())
+  protected lazy val resolver: AbstractGenericUDAFResolver =
+    new GenericUDAFBridge(funcWrapper.createFunction())
 
   @transient
   protected lazy val objectInspector  = {
-    resolver.getEvaluator(children.map(_.dataType.toTypeInfo).toArray)
+    val parameterInfo = new SimpleGenericUDAFParameterInfo(inspectors.toArray, false, false)
+    resolver.getEvaluator(parameterInfo)
       .init(GenericUDAFEvaluator.Mode.COMPLETE, inspectors.toArray)
   }
 
   @transient
-  protected lazy val inspectors = children.map(_.dataType).map(toInspector)
+  protected lazy val inspectors = children.map(toInspector)
 
   def dataType: DataType = inspectorToDataType(objectInspector)
 
   def nullable: Boolean = true
 
-  override def toString = s"$nodeName#$functionClassName(${children.mkString(",")})"
+  override def toString = s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
 
   def newInstance() =
-    new HiveUdafFunction(functionClassName, children, this, true)
+    new HiveUdafFunction(funcWrapper, children, this, true)
 }
 
 /**
@@ -238,27 +258,29 @@ private[hive] case class HiveUdaf(
  * user defined aggregations, which have clean semantics even in a partitioned execution.
  */
 private[hive] case class HiveGenericUdtf(
-    functionClassName: String,
+    funcWrapper: HiveFunctionWrapper,
     aliasNames: Seq[String],
     children: Seq[Expression])
-  extends Generator with HiveInspectors with HiveFunctionFactory {
+  extends Generator with HiveInspectors {
 
   @transient
-  protected lazy val function: GenericUDTF = createFunction()
+  protected lazy val function: GenericUDTF = funcWrapper.createFunction()
 
   @transient
-  protected lazy val inputInspectors = children.map(_.dataType).map(toInspector)
+  protected lazy val inputInspectors = children.map(toInspector)
 
   @transient
-  protected lazy val outputInspectors = {
-    val structInspector = function.initialize(inputInspectors.toArray)
-    structInspector.getAllStructFieldRefs.map(_.getFieldObjectInspector)
-  }
+  protected lazy val outputInspector = function.initialize(inputInspectors.toArray)
+
+  @transient
+  protected lazy val udtInput = new Array[AnyRef](children.length)
 
-  protected lazy val outputDataTypes = outputInspectors.map(inspectorToDataType)
+  protected lazy val outputDataTypes = outputInspector.getAllStructFieldRefs.map {
+    field => inspectorToDataType(field.getFieldObjectInspector)
+  }
 
   override protected def makeOutput() = {
-    // Use column names when given, otherwise c_1, c_2, ... c_n.
+    // Use column names when given, otherwise _c1, _c2, ... _cn.
     if (aliasNames.size == outputDataTypes.size) {
       aliasNames.zip(outputDataTypes).map {
         case (attrName, attrDataType) =>
@@ -267,20 +289,18 @@ private[hive] case class HiveGenericUdtf(
     } else {
       outputDataTypes.zipWithIndex.map {
         case (attrDataType, i) =>
-          AttributeReference(s"c_$i", attrDataType, nullable = true)()
+          AttributeReference(s"_c$i", attrDataType, nullable = true)()
       }
     }
   }
 
   override def eval(input: Row): TraversableOnce[Row] = {
-    outputInspectors // Make sure initialized.
+    outputInspector // Make sure initialized.
 
     val inputProjection = new InterpretedProjection(children)
     val collector = new UDTFCollector
     function.setCollector(collector)
-
-    val udtInput = inputProjection(input).map(wrap).toArray
-    function.process(udtInput)
+    function.process(wrap(inputProjection(input), inputInspectors, udtInput))
     collector.collectRows()
   }
 
@@ -291,7 +311,7 @@ private[hive] case class HiveGenericUdtf(
       // We need to clone the input here because implementations of
       // GenericUDTF reuse the same object. Luckily they are always an array, so
       // it is easy to clone.
-      collected += new GenericRow(input.asInstanceOf[Array[_]].map(unwrap))
+      collected += unwrap(input, outputInspector).asInstanceOf[Row]
     }
 
     def collectRows() = {
@@ -301,30 +321,46 @@ private[hive] case class HiveGenericUdtf(
     }
   }
 
-  override def toString = s"$nodeName#$functionClassName(${children.mkString(",")})"
+  override def toString = s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
+}
+
+/**
+ * Resolve Udtfs Alias.
+ */
+private[spark] object ResolveUdtfsAlias extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan) = plan transform {
+    case p @ Project(projectList, _)
+      if projectList.exists(_.isInstanceOf[MultiAlias]) && projectList.size != 1 =>
+      throw new TreeNodeException(p, "only single Generator supported for SELECT clause")
+
+    case Project(Seq(MultiAlias(udtf @ HiveGenericUdtf(_, _, _), names)), child) =>
+        Generate(udtf.copy(aliasNames = names), join = false, outer = false, None, child)
+  }
 }
 
 private[hive] case class HiveUdafFunction(
-    functionClassName: String,
+    funcWrapper: HiveFunctionWrapper,
     exprs: Seq[Expression],
     base: AggregateExpression,
     isUDAFBridgeRequired: Boolean = false)
   extends AggregateFunction
-  with HiveInspectors
-  with HiveFunctionFactory {
+  with HiveInspectors {
 
   def this() = this(null, null, null)
 
   private val resolver =
     if (isUDAFBridgeRequired) {
-      new GenericUDAFBridge(createFunction[UDAF]())
+      new GenericUDAFBridge(funcWrapper.createFunction[UDAF]())
     } else {
-      createFunction[AbstractGenericUDAFResolver]()
+      funcWrapper.createFunction[AbstractGenericUDAFResolver]()
     }
-
-  private val inspectors = exprs.map(_.dataType).map(toInspector).toArray
-
-  private val function = resolver.getEvaluator(exprs.map(_.dataType.toTypeInfo).toArray)
+  
+  private val inspectors = exprs.map(toInspector).toArray
+    
+  private val function = { 
+    val parameterInfo = new SimpleGenericUDAFParameterInfo(inspectors, false, false)
+    resolver.getEvaluator(parameterInfo) 
+  }
 
   private val returnInspector = function.init(GenericUDAFEvaluator.Mode.COMPLETE, inspectors)
 
@@ -332,13 +368,17 @@ private[hive] case class HiveUdafFunction(
   private val buffer =
     function.getNewAggregationBuffer.asInstanceOf[GenericUDAFEvaluator.AbstractAggregationBuffer]
 
-  override def eval(input: Row): Any = unwrapData(function.evaluate(buffer), returnInspector)
+  override def eval(input: Row): Any = unwrap(function.evaluate(buffer), returnInspector)
 
   @transient
   val inputProjection = new InterpretedProjection(exprs)
 
+  @transient
+  protected lazy val cached = new Array[AnyRef](exprs.length)
+  
   def update(input: Row): Unit = {
-    val inputs = inputProjection(input).asInstanceOf[Seq[AnyRef]].toArray
-    function.iterate(buffer, inputs)
+    val inputs = inputProjection(input)
+    function.iterate(buffer, wrap(inputs, inspectors, cached))
   }
 }
+
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
index 6ccbc22a4acfb..aae175e426ade 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
@@ -27,12 +27,15 @@ import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hadoop.hive.ql.exec.{FileSinkOperator, Utilities}
 import org.apache.hadoop.hive.ql.io.{HiveFileFormatUtils, HiveOutputFormat}
-import org.apache.hadoop.hive.ql.plan.FileSinkDesc
+import org.apache.hadoop.hive.ql.plan.{PlanUtils, TableDesc}
 import org.apache.hadoop.io.Writable
 import org.apache.hadoop.mapred._
 
+import org.apache.spark.mapred.SparkHadoopMapRedUtil
 import org.apache.spark.sql.Row
 import org.apache.spark.{Logging, SerializableWritable, SparkHadoopWriter}
+import org.apache.spark.sql.hive.{ShimFileSinkDesc => FileSinkDesc}
+import org.apache.spark.sql.hive.HiveShim._
 
 /**
  * Internal helper class that saves an RDD using a Hive OutputFormat.
@@ -46,6 +49,13 @@ private[hive] class SparkHiveWriterContainer(
   with Serializable {
 
   private val now = new Date()
+  private val tableDesc: TableDesc = fileSinkConf.getTableInfo
+  // Add table properties from storage handler to jobConf, so any custom storage
+  // handler settings can be set to jobConf
+  if (tableDesc != null) {
+    PlanUtils.configureOutputJobPropertiesForStorageHandler(tableDesc)
+    Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf)
+  }
   protected val conf = new SerializableWritable(jobConf)
 
   private var jobID = 0
@@ -199,7 +209,7 @@ private[spark] class SparkHiveDynamicPartitionWriterContainer(
 
   override def getLocalFileWriter(row: Row): FileSinkOperator.RecordWriter = {
     val dynamicPartPath = dynamicPartColNames
-      .zip(row.takeRight(dynamicPartColNames.length))
+      .zip(row.toSeq.takeRight(dynamicPartColNames.length))
       .map { case (col, rawVal) =>
         val string = if (rawVal == null) null else String.valueOf(rawVal)
         s"/$col=${if (string == null || string.isEmpty) defaultPartName else string}"
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/package-info.java b/sql/hive/src/main/scala/org/apache/spark/sql/hive/package-info.java
index 8b29fa7d1a8f7..4b23fbf6e7362 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/package-info.java
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/package-info.java
@@ -15,4 +15,4 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hive;
\ No newline at end of file
+package org.apache.spark.sql.hive;
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/parquet/FakeParquetSerDe.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/parquet/FakeParquetSerDe.scala
index abed299cd957f..2a16c9d1a27c9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/parquet/FakeParquetSerDe.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/parquet/FakeParquetSerDe.scala
@@ -32,7 +32,7 @@ import org.apache.hadoop.io.Writable
  * when "spark.sql.hive.convertMetastoreParquet" is set to true.
  */
 @deprecated("No code should depend on FakeParquetHiveSerDe as it is only intended as a " +
-            "placeholder in the Hive MetaStore")
+            "placeholder in the Hive MetaStore", "1.2.0")
 class FakeParquetSerDe extends SerDe {
   override def getObjectInspector: ObjectInspector = new ObjectInspector {
     override def getCategory: Category = Category.PRIMITIVE
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFListListInt.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFListListInt.java
index d2d39a8c4dc28..808e2986d3b77 100644
--- a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFListListInt.java
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFListListInt.java
@@ -23,25 +23,21 @@
 
 public class UDFListListInt extends UDF {
   /**
-   *
    * @param obj
-   *   SQL schema: array<struct<x: int, y: int, z: int>>
-   *   Java Type: List<List<Integer>>
-   * @return
+   *   SQL schema: array&lt;struct&lt;x: int, y: int, z: int&gt;&gt;
+   *   Java Type: List&lt;List&lt;Integer&gt;&gt;
    */
+  @SuppressWarnings("unchecked")
   public long evaluate(Object obj) {
     if (obj == null) {
-      return 0l;
+      return 0L;
     }
-    List<List> listList = (List<List>) obj;
+    List<List<?>> listList = (List<List<?>>) obj;
     long retVal = 0;
-    for (List aList : listList) {
-      @SuppressWarnings("unchecked")
-      List<Object> list = (List<Object>) aList;
-      @SuppressWarnings("unchecked")
-      Integer someInt = (Integer) list.get(1);
+    for (List<?> aList : listList) {
+      Number someInt = (Number) aList.get(1);
       try {
-        retVal += (long) (someInt.intValue());
+        retVal += someInt.longValue();
       } catch (NullPointerException e) {
         System.out.println(e);
       }
diff --git a/sql/hive/src/test/resources/data/conf/hive-site.xml b/sql/hive/src/test/resources/data/conf/hive-site.xml
index 4e6ff16135833..7931d6a7128fe 100644
--- a/sql/hive/src/test/resources/data/conf/hive-site.xml
+++ b/sql/hive/src/test/resources/data/conf/hive-site.xml
@@ -19,6 +19,12 @@
 
 <configuration>
 
+<property>
+  <name>hive.in.test</name>
+  <value>true</value>
+  <description>Internal marker for test. Used for masking env-dependent values</description>
+</property>
+
 <!-- Hive Configuration can either be stored in this file or in the hadoop configuration files  -->
 <!-- that are implied by Hadoop setup variables.                                                -->
 <!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive    -->
@@ -28,7 +34,7 @@
 <!-- Hive Execution Parameters -->
 <property>
   <name>hadoop.tmp.dir</name>
-  <value>${build.dir.hive}/test/hadoop-${user.name}</value>
+  <value>${test.tmp.dir}/hadoop-tmp</value>
   <description>A base for other temporary directories.</description>
 </property>
 
@@ -42,22 +48,27 @@
 
 <property>
   <name>hive.exec.scratchdir</name>
-  <value>${build.dir}/scratchdir</value>
+  <value>${test.tmp.dir}/scratchdir</value>
   <description>Scratch space for Hive jobs</description>
 </property>
 
 <property>
   <name>hive.exec.local.scratchdir</name>
-  <value>${build.dir}/localscratchdir/</value>
+  <value>${test.tmp.dir}/localscratchdir/</value>
   <description>Local scratch space for Hive jobs</description>
 </property>
 
 <property>
   <name>javax.jdo.option.ConnectionURL</name>
-  <!-- note: variable substituion not working here because it's loaded by jdo, not Hive -->
-  <value>jdbc:derby:;databaseName=../build/test/junit_metastore_db;create=true</value>
+  <value>jdbc:derby:;databaseName=${test.tmp.dir}/junit_metastore_db;create=true</value>
 </property>
 
+<property>
+  <name>hive.stats.dbconnectionstring</name>
+  <value>jdbc:derby:;databaseName=${test.tmp.dir}/TempStatsStore;create=true</value>
+</property>
+
+
 <property>
   <name>javax.jdo.option.ConnectionDriverName</name>
   <value>org.apache.derby.jdbc.EmbeddedDriver</value>
@@ -82,7 +93,7 @@
 
 <property>
   <name>hive.metastore.metadb.dir</name>
-  <value>file://${build.dir}/test/data/metadb/</value>
+  <value>file://${test.tmp.dir}/metadb/</value>
   <description>
   Required by metastore server or if the uris argument below is not supplied
   </description>
@@ -90,32 +101,19 @@
 
 <property>
   <name>test.log.dir</name>
-  <value>${build.dir}/test/logs</value>
-  <description></description>
-</property>
-
-<property>
-  <name>test.src.dir</name>
-  <value>file://${build.dir}/src/test</value>
+  <value>${test.tmp.dir}/log/</value>
   <description></description>
 </property>
 
 <property>
   <name>test.data.files</name>
-  <value>${user.dir}/../data/files</value>
-  <description></description>
-</property>
-
-<property>
-  <name>test.query.file1</name>
-  <value>file://${user.dir}/../ql/src/test/org/apache/hadoop/hive/ql/input2.q</value>
-  <value></value>
+  <value>${hive.root}/data/files</value>
   <description></description>
 </property>
 
 <property>
   <name>hive.jar.path</name>
-  <value>${build.dir.hive}/ql/hive-exec-${version}.jar</value>
+  <value>${maven.local.repository}/org/apache/hive/hive-exec/${hive.version}/hive-exec-${hive.version}.jar</value>
   <description></description>
 </property>
 
@@ -127,7 +125,7 @@
 
 <property>
   <name>hive.querylog.location</name>
-  <value>${build.dir}/tmp</value>
+  <value>${test.tmp.dir}/tmp</value>
   <description>Location of the structured hive logs</description>
 </property>
 
@@ -143,18 +141,25 @@
   <description>Post Execute Hook for Tests</description>
 </property>
 
-<property>
-  <name>hive.task.progress</name>
-  <value>false</value>
-  <description>Track progress of a task</description>
-</property>
-
 <property>
   <name>hive.support.concurrency</name>
   <value>true</value>
   <description>Whether hive supports concurrency or not. A zookeeper instance must be up and running for the default hive lock manager to support read-write locks.</description>
 </property>
 
+<property>
+  <key>hive.unlock.numretries</key>
+  <value>2</value>
+  <description>The number of times you want to retry to do one unlock</description>
+</property>
+
+<property>
+  <key>hive.lock.sleep.between.retries</key>
+  <value>2</value>
+  <description>The sleep time (in seconds) between various retries</description>
+</property>
+
+
 <property>
   <name>fs.pfile.impl</name>
   <value>org.apache.hadoop.fs.ProxyLocalFileSystem</value>
@@ -194,4 +199,21 @@
   <description>The default SerDe hive will use for the rcfile format</description>
 </property>
 
+<property>
+  <name>hive.stats.dbclass</name>
+  <value>jdbc:derby</value>
+  <description>The storage for temporary stats generated by tasks. Currently, jdbc, hbase and counter types are supported</description>
+</property>
+
+<property>
+  <name>hive.stats.key.prefix.reserve.length</name>
+  <value>0</value>
+</property>
+
+<property>
+  <name>hive.conf.restricted.list</name>
+  <value>dummy.config.value</value>
+  <description>Using dummy config value above because you cannot override config with empty value</description>
+</property>
+
 </configuration>
diff --git a/sql/hive/src/test/resources/data/files/ProxyAuth.res b/sql/hive/src/test/resources/data/files/ProxyAuth.res
new file mode 100644
index 0000000000000..96eca8f61de33
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/ProxyAuth.res
@@ -0,0 +1,15 @@
++-----+-------+
+| id  | name  |
++-----+-------+
+| 1   | aaa   |
+| 2   | bbb   |
+| 3   | ccc   |
+| 4   | ddd   |
+| 5   | eee   |
++-----+-------+
++-------+-----+
+| name  | id  |
++-------+-----+
+| aaa   | 1   |
+| bbb   | 2   |
++-------+-----+
diff --git a/sql/hive/src/test/resources/data/files/alltypes.txt b/sql/hive/src/test/resources/data/files/alltypes.txt
new file mode 100644
index 0000000000000..358cf400ec815
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/alltypes.txt
@@ -0,0 +1,2 @@
+true|10|100|1000|10000|4.0|20.0|2.2222|1969-12-31 15:59:58.174|1970-01-01 00:00:00|hello|hello|k1:v1,k2:v2|100,200|{10, "foo"}
+true|20|200|2000|20000|8.0|40.0|4.2222|1970-12-31 15:59:58.174|1971-01-01 00:00:00|||k3:v3,k4:v4|200,300|{20, "bar"}
diff --git a/sql/hive/src/test/resources/data/files/alltypes2.txt b/sql/hive/src/test/resources/data/files/alltypes2.txt
new file mode 100644
index 0000000000000..c6a05a1dad893
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/alltypes2.txt
@@ -0,0 +1,2 @@
+true|10|100|1000|10000|4.0|20.0|4.2222|1969-12-31 15:59:58.174|1970-01-01|string|hello|hello|k1:v1,k2:v2|100,200|{10, "foo"}
+false|20|200|2000|20000|8.0|40.0|2.2222|1970-12-31 15:59:58.174|1971-01-01|abcd|world|world|k3:v3,k4:v4|200,300|{20, "bar"}
diff --git a/sql/hive/src/test/resources/data/files/alltypesorc b/sql/hive/src/test/resources/data/files/alltypesorc
new file mode 100644
index 0000000000000..95c68cabd15e0
Binary files /dev/null and b/sql/hive/src/test/resources/data/files/alltypesorc differ
diff --git a/sql/hive/src/test/resources/data/files/char_varchar_udf.txt b/sql/hive/src/test/resources/data/files/char_varchar_udf.txt
new file mode 100644
index 0000000000000..570078d90e65b
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/char_varchar_udf.txt
@@ -0,0 +1 @@
+47.3224	2923.29346
diff --git a/sql/hive/src/test/resources/data/files/datatypes.txt b/sql/hive/src/test/resources/data/files/datatypes.txt
index 10daa1b11c26b..0228a27e927bb 100644
--- a/sql/hive/src/test/resources/data/files/datatypes.txt
+++ b/sql/hive/src/test/resources/data/files/datatypes.txt
@@ -1,3 +1,3 @@
-\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N
--1false-1.1\N\N\N-1-1-1.0-1\N\N\N\N\N\N\N
-1true1.11121x2ykva92.2111.01abcd1111213142212212x1abcd22012-04-22 09:00:00.123456789123456789.0123456YWJjZA==2013-01-01abc123
+\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N
+-1false-1.1\N\N\N-1-1-1.0-1\N\N\N\N\N\N\N\N
+1true1.11121x2ykva92.2111.01abcd1111213142212212x1abcd22012-04-22 09:00:00.123456789123456789.0123456YWJjZA==2013-01-01abc123abc123X'01FF'
diff --git a/sql/hive/src/test/resources/data/files/decimal.txt b/sql/hive/src/test/resources/data/files/decimal.txt
new file mode 100644
index 0000000000000..28800f5278b10
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/decimal.txt
@@ -0,0 +1,18 @@
+55.33
+44.2
+435.33
+324.33
+324.33
+44.2
+55.3
+55.3
+0.0
+
+66.4
+23.22
+-87.2
+
+33.44
+55.3
+435.331
+-0.342
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/data/files/dept.txt b/sql/hive/src/test/resources/data/files/dept.txt
new file mode 100644
index 0000000000000..292bee6ee0370
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/dept.txt
@@ -0,0 +1,4 @@
+31|sales
+33|engineering
+34|clerical
+35|marketing
diff --git a/sql/hive/src/test/resources/data/files/emp.txt b/sql/hive/src/test/resources/data/files/emp.txt
new file mode 100644
index 0000000000000..a0e76b90e57dc
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/emp.txt
@@ -0,0 +1,6 @@
+Rafferty|31
+Jones|33
+Steinberg|33
+Robinson|34
+Smith|34
+John|
diff --git a/sql/hive/src/test/resources/data/files/exported_table/_metadata b/sql/hive/src/test/resources/data/files/exported_table/_metadata
new file mode 100644
index 0000000000000..81fbf63a54980
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/exported_table/_metadata
@@ -0,0 +1 @@
+{"partitions":[],"table":"{\"1\":{\"str\":\"j1_41\"},\"2\":{\"str\":\"default\"},\"3\":{\"str\":\"johndee\"},\"4\":{\"i32\":1371900915},\"5\":{\"i32\":0},\"6\":{\"i32\":0},\"7\":{\"rec\":{\"1\":{\"lst\":[\"rec\",2,{\"1\":{\"str\":\"a\"},\"2\":{\"str\":\"string\"}},{\"1\":{\"str\":\"b\"},\"2\":{\"str\":\"int\"}}]},\"2\":{\"str\":\"hdfs://hivebase01:8020/user/hive/warehouse/j1_41\"},\"3\":{\"str\":\"org.apache.hadoop.mapred.TextInputFormat\"},\"4\":{\"str\":\"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\"},\"5\":{\"tf\":0},\"6\":{\"i32\":-1},\"7\":{\"rec\":{\"2\":{\"str\":\"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\"},\"3\":{\"map\":[\"str\",\"str\",2,{\"serialization.format\":\",\",\"field.delim\":\",\"}]}}},\"8\":{\"lst\":[\"str\",0]},\"9\":{\"lst\":[\"rec\",0]},\"10\":{\"map\":[\"str\",\"str\",0,{}]}}},\"8\":{\"lst\":[\"rec\",0]},\"9\":{\"map\":[\"str\",\"str\",1,{\"transient_lastDdlTime\":\"1371900931\"}]},\"12\":{\"str\":\"MANAGED_TABLE\"}}","version":"0.1"}
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/data/files/exported_table/data/data b/sql/hive/src/test/resources/data/files/exported_table/data/data
new file mode 100644
index 0000000000000..40a75acfa0016
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/exported_table/data/data
@@ -0,0 +1,2 @@
+johndee,1
+burks,2
diff --git a/sql/hive/src/test/resources/data/files/ext_test_space/folder+with space/data.txt b/sql/hive/src/test/resources/data/files/ext_test_space/folder+with space/data.txt
new file mode 100644
index 0000000000000..6a3906944cbd1
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/ext_test_space/folder+with space/data.txt	
@@ -0,0 +1,3 @@
+12	jason
+13	steven
+15	joe
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/data/files/futurama_episodes.avro b/sql/hive/src/test/resources/data/files/futurama_episodes.avro
new file mode 100644
index 0000000000000..c08b97b46e3f1
Binary files /dev/null and b/sql/hive/src/test/resources/data/files/futurama_episodes.avro differ
diff --git a/sql/hive/src/test/resources/data/files/header_footer_table_1/0001.txt b/sql/hive/src/test/resources/data/files/header_footer_table_1/0001.txt
new file mode 100644
index 0000000000000..c242b42b6070b
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/header_footer_table_1/0001.txt
@@ -0,0 +1,8 @@
+name	message	0
+steven	hive	1
+dave	oozie	2
+xifa	phd	3
+chuan	hadoop	4
+shanyu	senior	5
+footer1	footer1	0
+footer2		0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/data/files/header_footer_table_1/0002.txt b/sql/hive/src/test/resources/data/files/header_footer_table_1/0002.txt
new file mode 100644
index 0000000000000..d5db38d168be7
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/header_footer_table_1/0002.txt
@@ -0,0 +1,8 @@
+name	message	0
+steven2	hive	11
+dave2	oozie	12
+xifa2	phd	13
+chuan2	hadoop	14
+shanyu2	senior	15
+footer1	footer1	0
+footer2		0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/data/files/header_footer_table_1/0003.txt b/sql/hive/src/test/resources/data/files/header_footer_table_1/0003.txt
new file mode 100644
index 0000000000000..f7a763d8b9638
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/header_footer_table_1/0003.txt
@@ -0,0 +1,4 @@
+name	message	0
+david3	oozie	22
+footer1	footer1	0
+footer2		0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/01/0001.txt b/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/01/0001.txt
new file mode 100644
index 0000000000000..c242b42b6070b
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/01/0001.txt
@@ -0,0 +1,8 @@
+name	message	0
+steven	hive	1
+dave	oozie	2
+xifa	phd	3
+chuan	hadoop	4
+shanyu	senior	5
+footer1	footer1	0
+footer2		0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/02/0002.txt b/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/02/0002.txt
new file mode 100644
index 0000000000000..d5db38d168be7
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/02/0002.txt
@@ -0,0 +1,8 @@
+name	message	0
+steven2	hive	11
+dave2	oozie	12
+xifa2	phd	13
+chuan2	hadoop	14
+shanyu2	senior	15
+footer1	footer1	0
+footer2		0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/03/0003.txt b/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/03/0003.txt
new file mode 100644
index 0000000000000..f7a763d8b9638
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/header_footer_table_2/2012/01/03/0003.txt
@@ -0,0 +1,4 @@
+name	message	0
+david3	oozie	22
+footer1	footer1	0
+footer2		0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter3-1-75be487df30e301e156a22eee075633d b/sql/hive/src/test/resources/data/files/header_footer_table_3/empty1.txt
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter3-1-75be487df30e301e156a22eee075633d
rename to sql/hive/src/test/resources/data/files/header_footer_table_3/empty1.txt
diff --git a/sql/hive/src/test/resources/golden/alter3-21-231db1adbff5fc90e57cca6a087f3df5 b/sql/hive/src/test/resources/data/files/header_footer_table_3/empty2.txt
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter3-21-231db1adbff5fc90e57cca6a087f3df5
rename to sql/hive/src/test/resources/data/files/header_footer_table_3/empty2.txt
diff --git a/sql/hive/src/test/resources/data/files/input.txt b/sql/hive/src/test/resources/data/files/input.txt
new file mode 100644
index 0000000000000..caea9919d1d7e
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/input.txt
@@ -0,0 +1,7 @@
+a	b	c	d	e	f	g
+a	b	c	d	e	f	g
+a	b	c	d	e	f	g
+			d	e	f	g
+a	b	c	d			
+a				e	f	g
+a			d			g
diff --git a/sql/hive/src/test/resources/data/files/issue-4077-data.txt b/sql/hive/src/test/resources/data/files/issue-4077-data.txt
new file mode 100644
index 0000000000000..18067b0a64c9c
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/issue-4077-data.txt
@@ -0,0 +1,2 @@
+2014-12-11 00:00:00,1
+2014-12-11astring00:00:00,2
diff --git a/sql/hive/src/test/resources/data/files/keystore.jks b/sql/hive/src/test/resources/data/files/keystore.jks
new file mode 100644
index 0000000000000..469d8a543a4d2
Binary files /dev/null and b/sql/hive/src/test/resources/data/files/keystore.jks differ
diff --git a/sql/hive/src/test/resources/data/files/kv9.txt b/sql/hive/src/test/resources/data/files/kv9.txt
new file mode 100644
index 0000000000000..b72475f21bff0
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/kv9.txt
@@ -0,0 +1,27 @@
+-4400 4400
+1E+99 0
+1E-99 0
+0 0
+10 10
+23232.23435 2
+2389432.23752 3
+2389432.2375 4
+10.73433 5
+0.333 0
+-0.3 0
+-0.333 0
+1.0 1
+2 2
+3.14 3
+-1.12 -1
+-1.122 -11
+1.12 1
+1.122 1
+124.00 124
+125.2 125
+-1255.49 -1255
+3.14 3
+3.140 4
+0.9999999999999999999999999 1
+-1234567890.1234567890 -1234567890
+1234567890.1234567800 1234567890
diff --git a/sql/hive/src/test/resources/data/files/loc.txt b/sql/hive/src/test/resources/data/files/loc.txt
new file mode 100644
index 0000000000000..69910b7649571
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/loc.txt
@@ -0,0 +1,8 @@
+OH|31|43201|2001
+IO|32|43202|2001
+CA|35|43809|2001
+FL|33|54342|2001
+UT|35||2001
+CA|35|43809|2001
+|34|40000|
+FL|33|54342|2001
diff --git a/sql/hive/src/test/resources/data/files/non_ascii_tbl.txt b/sql/hive/src/test/resources/data/files/non_ascii_tbl.txt
new file mode 100644
index 0000000000000..41586d61eda03
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/non_ascii_tbl.txt
@@ -0,0 +1 @@
+1|Garçu Kôkaku kidôtai
diff --git a/sql/hive/src/test/resources/data/files/orc_create_people.txt b/sql/hive/src/test/resources/data/files/orc_create_people.txt
index 884598981a13c..ab93c1400769a 100644
--- a/sql/hive/src/test/resources/data/files/orc_create_people.txt
+++ b/sql/hive/src/test/resources/data/files/orc_create_people.txt
@@ -1,100 +1,100 @@
-1CelesteBrowning959-3763 Nec, Av.Ca
-2RisaYangP.O. Box 292, 8229 Porttitor RoadOr
-3VenusSuttonAp #962-8021 Egestas Rd.Ca
-4GretchenHarrisonP.O. Box 636, 8734 Magna AvenueOr
-5LaniIrwinAp #441-5911 Iaculis, AveCa
-6VeraGeorge409-1555 Vel, AveOr
-7JessicaMalone286-9779 Aliquam RoadCa
-8AnnChapmanAp #504-3915 Placerat RoadOr
-9NigelBartlettAp #185-385 Diam StreetCa
-10AzaliaJennings5772 Diam St.Or
-11PrestonCannonAp #527-8769 Nunc AvenueCa
-12AllistairVasquez2562 Odio. St.Or
-13ReedHayes5190 Elit StreetCa
-14ElaineBarronP.O. Box 840, 8860 Sodales Av.Or
-15LydiaHoodP.O. Box 698, 5666 Semper RoadCa
-16VanceMaxwell298-3313 Malesuada RoadOr
-17KeikoDeleonP.O. Box 732, 5921 Massa. Av.Ca
-18DolanKaneAp #906-3606 Ut Rd.Or
-19MerrittPerkinsP.O. Box 228, 7090 Egestas StreetCa
-20CaseySalazar506-5065 Ut St.Or
-21SamsonNoel1370 Ultrices, RoadCa
-22ByronWalkerP.O. Box 386, 8324 Tellus AveOr
-23PiperSingletonAp #500-3561 Primis St.Ca
-24RiaMckinney3080 Dui Rd.Or
-25RahimStanley559-9016 Nascetur StreetCa
-26ChloeSteeleP.O. Box 766, 1628 Elit StreetOr
-27PalomaWardAp #390-3042 Ipsum Rd.Ca
-28RoaryShermanAp #409-6549 Metus St.Or
-29CalvinBuckner6378 Diam AvenueCa
-30CamilleGoodAp #113-8659 Suspendisse St.Or
-31SteelAyala5518 Justo St.Ca
-32JosiahGilbertAp #149-6651 At, Av.Or
-33HamiltonCruz4620 Tellus. AveCa
-34ScarletSantos586-1785 Velit. Av.Or
-35LewisMcintyre629-6419 Ac Rd.Ca
-36ArsenioMejiaP.O. Box 767, 8625 Justo Rd.Or
-37VelmaHaley1377 At Rd.Ca
-38TatumJennings829-7432 Posuere, RoadOr
-39BritanniEaton8811 Morbi StreetCa
-40AileenJacobsonP.O. Box 469, 2266 Dui, Rd.Or
-41KareemAyala2706 Ridiculus StreetCa
-42MaiteRush7592 Neque RoadOr
-43SigneVelasquezAp #868-3039 Eget St.Ca
-44ZoritaCamachoP.O. Box 651, 3340 Quis Av.Or
-45GlennaCurtis953-7965 Enim AveCa
-46QuinCortez4898 Ridiculus St.Or
-47TalonDaltonP.O. Box 408, 7597 Integer Rd.Ca
-48DarrylBlankenshipP.O. Box 771, 1471 Non Rd.Or
-49VernonReyesP.O. Box 971, 7009 Vulputate StreetCa
-50TallulahHeathP.O. Box 865, 3697 Dis AveOr
-51CiaranOlson2721 Et St.Ca
-52OrlandoWittP.O. Box 717, 1102 Nulla. Rd.Or
-53QuinnRiceAp #647-6627 Tristique AvenueCa
-54WyattPickettAp #128-3130 Vel, Rd.Or
-55EmeraldCopeland857-5119 Turpis Rd.Ca
-56JonasQuinnAp #441-7183 Ligula. StreetOr
-57WillaBerg6672 Velit AveCa
-58MalikLee998-9208 In StreetOr
-59CallieMedina1620 Dui. Rd.Ca
-60LukeMasonP.O. Box 143, 2070 Augue Rd.Or
-61ShafiraEstrada8824 Ante StreetCa
-62ElizabethRutledge315-6510 Sit St.Or
-63PandoraLevine357-3596 Nibh. AveCa
-64HilelPrince845-1229 Sociosqu Rd.Or
-65RinahTorresAp #492-9328 At St.Ca
-66YaelHobbsP.O. Box 477, 3896 In StreetOr
-67NevadaNashP.O. Box 251, 1914 Tincidunt RoadCa
-68MarnyHuffP.O. Box 818, 6086 Ultricies St.Or
-69KimberleyMilesAp #893-3685 In RoadCa
-70DuncanFullerAp #197-5216 Iaculis StreetOr
-71YardleyLeblancP.O. Box 938, 1278 Sit AveCa
-72HamishBrewerAp #854-781 Quisque St.Or
-73PetraMoon453-6609 Curabitur StreetCa
-74ReeseEstradaAp #382-3313 Malesuada St.Or
-75GageHiggins7443 Eu StreetCa
-76ZacheryCamachoAp #795-4143 Quam. St.Or
-77KellyGarnerP.O. Box 895, 2843 Cras Rd.Ca
-78HanaeCarr9440 Amet St.Or
-79AnnAlston884-7948 Dictum RoadCa
-80ChancellorCobbP.O. Box 889, 5978 Ac AvenueOr
-81DorothyHarrell6974 Tristique AveCa
-82VaughanLeon1610 Luctus Av.Or
-83WynneJimenez321-9171 Felis. AvenueCa
-84WillaMendoza489-182 Sed Av.Or
-85CamdenGoodwin4579 Ante St.Ca
-86IfeomaFrenchP.O. Box 160, 8769 Integer RoadOr
-87RamonaStrong1666 Ridiculus AvenueCa
-88BrettRamosAp #579-9879 Et, RoadOr
-89UllaGray595-7066 Malesuada RoadCa
-90KevynMccallP.O. Box 968, 1420 Aenean AvenueOr
-91GenevieveWilkins908 Turpis. StreetCa
-92ThaneOneil6766 Lectus St.Or
-93MarikoClineP.O. Box 329, 5375 Ac St.Ca
-94LaelMclean500-7010 Sit St.Or
-95WinifredHopperAp #140-8982 Velit AvenueCa
-96RafaelEnglandP.O. Box 405, 7857 Eget Av.Or
-97DanaCarter814-601 Purus. Av.Ca
-98JulietBattleAp #535-1965 Cursus St.Or
-99WynterVincent626-8492 Mollis AvenueCa
-100WangMitchell4023 Lacinia. AveOr
+1CelesteBrowning959-3763 Nec, Av.100.002011-03-12 15:20:00Ca
+2RisaYangP.O. Box 292, 8229 Porttitor Road200.002011-03-12 15:20:00Or
+3VenusSuttonAp #962-8021 Egestas Rd.300.002011-03-12 15:20:00Ca
+4GretchenHarrisonP.O. Box 636, 8734 Magna Avenue400.002011-03-12 15:20:00Or
+5LaniIrwinAp #441-5911 Iaculis, Ave500.002011-03-12 15:20:00Ca
+6VeraGeorge409-1555 Vel, Ave600.002011-03-12 15:20:00Or
+7JessicaMalone286-9779 Aliquam Road700.002011-03-12 15:20:00Ca
+8AnnChapmanAp #504-3915 Placerat Road800.002011-03-12 15:20:00Or
+9NigelBartlettAp #185-385 Diam Street900.002011-03-12 15:20:00Ca
+10AzaliaJennings5772 Diam St.100.002011-03-12 15:20:00Or
+11PrestonCannonAp #527-8769 Nunc Avenue100.002011-03-12 15:20:00Ca
+12AllistairVasquez2562 Odio. St.100.002011-03-12 15:20:00Or
+13ReedHayes5190 Elit Street100.002011-03-12 15:20:00Ca
+14ElaineBarronP.O. Box 840, 8860 Sodales Av.100.002011-03-12 15:20:00Or
+15LydiaHoodP.O. Box 698, 5666 Semper Road100.002011-03-12 15:20:00Ca
+16VanceMaxwell298-3313 Malesuada Road100.002011-03-12 15:20:00Or
+17KeikoDeleonP.O. Box 732, 5921 Massa. Av.100.002011-03-12 15:20:00Ca
+18DolanKaneAp #906-3606 Ut Rd.100.002011-03-12 15:20:00Or
+19MerrittPerkinsP.O. Box 228, 7090 Egestas Street100.002011-03-12 15:20:00Ca
+20CaseySalazar506-5065 Ut St.200.002011-03-12 15:20:00Or
+21SamsonNoel1370 Ultrices, Road200.002012-03-12 15:20:00Ca
+22ByronWalkerP.O. Box 386, 8324 Tellus Ave200.002012-03-12 15:20:00Or
+23PiperSingletonAp #500-3561 Primis St.200.002012-03-12 15:20:00Ca
+24RiaMckinney3080 Dui Rd.200.002012-03-12 15:20:00Or
+25RahimStanley559-9016 Nascetur Street200.002012-03-12 15:20:00Ca
+26ChloeSteeleP.O. Box 766, 1628 Elit Street200.002012-03-12 15:20:00Or
+27PalomaWardAp #390-3042 Ipsum Rd.200.002012-03-12 15:20:00Ca
+28RoaryShermanAp #409-6549 Metus St.200.002012-03-12 15:20:00Or
+29CalvinBuckner6378 Diam Avenue200.002012-03-12 15:20:00Ca
+30CamilleGoodAp #113-8659 Suspendisse St.300.002012-03-12 15:20:00Or
+31SteelAyala5518 Justo St.300.002012-03-12 15:20:00Ca
+32JosiahGilbertAp #149-6651 At, Av.300.002012-03-12 15:20:00Or
+33HamiltonCruz4620 Tellus. Ave300.002012-03-12 15:20:00Ca
+34ScarletSantos586-1785 Velit. Av.300.002012-03-12 15:20:00Or
+35LewisMcintyre629-6419 Ac Rd.300.002012-03-12 15:20:00Ca
+36ArsenioMejiaP.O. Box 767, 8625 Justo Rd.300.002012-03-12 15:20:00Or
+37VelmaHaley1377 At Rd.300.002012-03-12 15:20:00Ca
+38TatumJennings829-7432 Posuere, Road300.002012-03-12 15:20:00Or
+39BritanniEaton8811 Morbi Street300.002012-03-12 15:20:00Ca
+40AileenJacobsonP.O. Box 469, 2266 Dui, Rd.400.002012-03-12 15:20:00Or
+41KareemAyala2706 Ridiculus Street400.002013-03-12 15:20:00Ca
+42MaiteRush7592 Neque Road400.002013-03-12 15:20:00Or
+43SigneVelasquezAp #868-3039 Eget St.400.002013-03-12 15:20:00Ca
+44ZoritaCamachoP.O. Box 651, 3340 Quis Av.400.002013-03-12 15:20:00Or
+45GlennaCurtis953-7965 Enim Ave400.002013-03-12 15:20:00Ca
+46QuinCortez4898 Ridiculus St.400.002013-03-12 15:20:00Or
+47TalonDaltonP.O. Box 408, 7597 Integer Rd.400.002013-03-12 15:20:00Ca
+48DarrylBlankenshipP.O. Box 771, 1471 Non Rd.400.002013-03-12 15:20:00Or
+49VernonReyesP.O. Box 971, 7009 Vulputate Street400.002013-03-12 15:20:00Ca
+50TallulahHeathP.O. Box 865, 3697 Dis Ave500.002013-03-12 15:20:00Or
+51CiaranOlson2721 Et St.500.002013-03-12 15:20:00Ca
+52OrlandoWittP.O. Box 717, 1102 Nulla. Rd.500.002013-03-12 15:20:00Or
+53QuinnRiceAp #647-6627 Tristique Avenue500.002013-03-12 15:20:00Ca
+54WyattPickettAp #128-3130 Vel, Rd.500.002013-03-12 15:20:00Or
+55EmeraldCopeland857-5119 Turpis Rd.500.002013-03-12 15:20:00Ca
+56JonasQuinnAp #441-7183 Ligula. Street500.002013-03-12 15:20:00Or
+57WillaBerg6672 Velit Ave500.002013-03-12 15:20:00Ca
+58MalikLee998-9208 In Street500.002013-03-12 15:20:00Or
+59CallieMedina1620 Dui. Rd.500.002013-03-12 15:20:00Ca
+60LukeMasonP.O. Box 143, 2070 Augue Rd.600.002013-03-12 15:20:00Or
+61ShafiraEstrada8824 Ante Street600.002014-03-12 15:20:00Ca
+62ElizabethRutledge315-6510 Sit St.600.002014-03-12 15:20:00Or
+63PandoraLevine357-3596 Nibh. Ave600.002014-03-12 15:20:00Ca
+64HilelPrince845-1229 Sociosqu Rd.600.002014-03-12 15:20:00Or
+65RinahTorresAp #492-9328 At St.600.002014-03-12 15:20:00Ca
+66YaelHobbsP.O. Box 477, 3896 In Street600.002014-03-12 15:20:00Or
+67NevadaNashP.O. Box 251, 1914 Tincidunt Road600.002014-03-12 15:20:00Ca
+68MarnyHuffP.O. Box 818, 6086 Ultricies St.600.002014-03-12 15:20:00Or
+69KimberleyMilesAp #893-3685 In Road600.002014-03-12 15:20:00Ca
+70DuncanFullerAp #197-5216 Iaculis Street700.002014-03-12 15:20:00Or
+71YardleyLeblancP.O. Box 938, 1278 Sit Ave700.002014-03-12 15:20:00Ca
+72HamishBrewerAp #854-781 Quisque St.700.002014-03-12 15:20:00Or
+73PetraMoon453-6609 Curabitur Street700.002014-03-12 15:20:00Ca
+74ReeseEstradaAp #382-3313 Malesuada St.700.002014-03-12 15:20:00Or
+75GageHiggins7443 Eu Street700.002014-03-12 15:20:00Ca
+76ZacheryCamachoAp #795-4143 Quam. St.700.002014-03-12 15:20:00Or
+77KellyGarnerP.O. Box 895, 2843 Cras Rd.700.002014-03-12 15:20:00Ca
+78HanaeCarr9440 Amet St.700.002014-03-12 15:20:00Or
+79AnnAlston884-7948 Dictum Road700.002014-03-12 15:20:00Ca
+80ChancellorCobbP.O. Box 889, 5978 Ac Avenue800.002014-03-12 15:20:00Or
+81DorothyHarrell6974 Tristique Ave800.002010-03-12 15:20:00Ca
+82VaughanLeon1610 Luctus Av.800.002010-03-12 15:20:00Or
+83WynneJimenez321-9171 Felis. Avenue800.002010-03-12 15:20:00Ca
+84WillaMendoza489-182 Sed Av.800.002010-03-12 15:20:00Or
+85CamdenGoodwin4579 Ante St.800.002010-03-12 15:20:00Ca
+86IfeomaFrenchP.O. Box 160, 8769 Integer Road800.002010-03-12 15:20:00Or
+87RamonaStrong1666 Ridiculus Avenue800.002010-03-12 15:20:00Ca
+88BrettRamosAp #579-9879 Et, Road800.002010-03-12 15:20:00Or
+89UllaGray595-7066 Malesuada Road800.002010-03-12 15:20:00Ca
+90KevynMccallP.O. Box 968, 1420 Aenean Avenue900.002010-03-12 15:20:00Or
+91GenevieveWilkins908 Turpis. Street900.002010-03-12 15:20:00Ca
+92ThaneOneil6766 Lectus St.900.002010-03-12 15:20:00Or
+93MarikoClineP.O. Box 329, 5375 Ac St.900.002010-03-12 15:20:00Ca
+94LaelMclean500-7010 Sit St.900.002010-03-12 15:20:00Or
+95WinifredHopperAp #140-8982 Velit Avenue900.002010-03-12 15:20:00Ca
+96RafaelEnglandP.O. Box 405, 7857 Eget Av.900.002010-03-12 15:20:00Or
+97DanaCarter814-601 Purus. Av.900.002010-03-12 15:20:00Ca
+98JulietBattleAp #535-1965 Cursus St.900.002010-03-12 15:20:00Or
+99WynterVincent626-8492 Mollis Avenue900.002010-03-12 15:20:00Ca
+100WangMitchell4023 Lacinia. Ave100.002010-03-12 15:20:00Or
diff --git a/sql/hive/src/test/resources/data/files/orc_split_elim.orc b/sql/hive/src/test/resources/data/files/orc_split_elim.orc
new file mode 100644
index 0000000000000..cd145d3431049
Binary files /dev/null and b/sql/hive/src/test/resources/data/files/orc_split_elim.orc differ
diff --git a/sql/hive/src/test/resources/data/files/parquet_create.txt b/sql/hive/src/test/resources/data/files/parquet_create.txt
new file mode 100644
index 0000000000000..ccd48ee37a61b
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/parquet_create.txt
@@ -0,0 +1,3 @@
+1|foo line1|key11:value11,key12:value12,key13:value13|a,b,c|one,two
+2|bar line2|key21:value21,key22:value22,key23:value23|d,e,f|three,four
+3|baz line3|key31:value31,key32:value32,key33:value33|g,h,i|five,six
diff --git a/sql/hive/src/test/resources/data/files/parquet_partitioned.txt b/sql/hive/src/test/resources/data/files/parquet_partitioned.txt
new file mode 100644
index 0000000000000..8f322f3bf385c
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/parquet_partitioned.txt
@@ -0,0 +1,3 @@
+1|foo|part1
+2|bar|part2
+3|baz|part2
diff --git a/sql/hive/src/test/resources/data/files/parquet_types.txt b/sql/hive/src/test/resources/data/files/parquet_types.txt
new file mode 100644
index 0000000000000..0be390b38cea1
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/parquet_types.txt
@@ -0,0 +1,21 @@
+100|1|1|1.0|0.0|abc
+101|2|2|1.1|0.3|def
+102|3|3|1.2|0.6|ghi
+103|1|4|1.3|0.9|jkl
+104|2|5|1.4|1.2|mno
+105|3|1|1.0|1.5|pqr
+106|1|2|1.1|1.8|stu
+107|2|3|1.2|2.1|vwx
+108|3|4|1.3|2.4|yza
+109|1|5|1.4|2.7|bcd
+110|2|1|1.0|3.0|efg
+111|3|2|1.1|3.3|hij
+112|1|3|1.2|3.6|klm
+113|2|4|1.3|3.9|nop
+114|3|5|1.4|4.2|qrs
+115|1|1|1.0|4.5|tuv
+116|2|2|1.1|4.8|wxy
+117|3|3|1.2|5.1|zab
+118|1|4|1.3|5.4|cde
+119|2|5|1.4|5.7|fgh
+120|3|1|1.0|6.0|ijk
diff --git a/sql/hive/src/test/resources/data/files/person age.txt b/sql/hive/src/test/resources/data/files/person age.txt
index c902284c11155..f63d2410885a3 100644
--- a/sql/hive/src/test/resources/data/files/person age.txt	
+++ b/sql/hive/src/test/resources/data/files/person age.txt	
@@ -1,5 +1,5 @@
-John	23
-Tom	17
-Jim	31
-Boby	9
-Paul	51
\ No newline at end of file
+John23
+Tom17
+Jim31
+Boby9
+Paul51
diff --git a/sql/hive/src/test/resources/data/files/person+age.txt b/sql/hive/src/test/resources/data/files/person+age.txt
new file mode 100644
index 0000000000000..9d2fa32342d53
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/person+age.txt
@@ -0,0 +1,3 @@
+Sean29
+Tim47
+Pooh21
diff --git a/sql/hive/src/test/resources/data/files/posexplode_data.txt b/sql/hive/src/test/resources/data/files/posexplode_data.txt
new file mode 100644
index 0000000000000..d04778becc38f
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/posexplode_data.txt
@@ -0,0 +1,4 @@
+John Doe100000.0Mary SmithTodd JonesFederal Taxes.2State Taxes.05Insurance.11 Michigan Ave.ChicagoIL60600
+Mary Smith80000.0Jeremy KingFederal Taxes.2State Taxes. 05Insurance.1100 Ontario St.ChicagoIL60601
+Todd Jones70000.0Federal Taxes.15State Taxes.03Insurance. 1200 Chicago Ave.Oak ParkIL60700
+Jeremy King60000.0Federal Taxes.15State Taxes.03Insurance. 1300 Obscure Dr.ObscuriaIL60100
diff --git a/sql/hive/src/test/resources/data/files/sample.json b/sql/hive/src/test/resources/data/files/sample.json
new file mode 100644
index 0000000000000..7b749791ef419
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/sample.json
@@ -0,0 +1 @@
+{"a" : "2" ,"b" : "blah"}
diff --git a/sql/hive/src/test/resources/data/files/symlink1.txt b/sql/hive/src/test/resources/data/files/symlink1.txt
index dc1a7c5682a26..91d734607ea35 100644
--- a/sql/hive/src/test/resources/data/files/symlink1.txt
+++ b/sql/hive/src/test/resources/data/files/symlink1.txt
@@ -1,2 +1,2 @@
-../data/files/T1.txt
-../data/files/T3.txt
+../../data/files/T1.txt
+../../data/files/T3.txt
diff --git a/sql/hive/src/test/resources/data/files/symlink2.txt b/sql/hive/src/test/resources/data/files/symlink2.txt
index 8436a30adf366..487b05e941d46 100644
--- a/sql/hive/src/test/resources/data/files/symlink2.txt
+++ b/sql/hive/src/test/resources/data/files/symlink2.txt
@@ -1 +1 @@
-../data/files/T2.txt
+../../data/files/T2.txt
diff --git a/sql/hive/src/test/resources/data/files/truststore.jks b/sql/hive/src/test/resources/data/files/truststore.jks
new file mode 100644
index 0000000000000..9c5d703fba6c8
Binary files /dev/null and b/sql/hive/src/test/resources/data/files/truststore.jks differ
diff --git a/sql/hive/src/test/resources/data/scripts/input20_script.py b/sql/hive/src/test/resources/data/scripts/input20_script.py
new file mode 100644
index 0000000000000..40e3683dc3d36
--- /dev/null
+++ b/sql/hive/src/test/resources/data/scripts/input20_script.py
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import sys
+import re
+line = sys.stdin.readline()
+x = 1
+while line:
+  tem = sys.stdin.readline()
+  if line == tem:
+    x = x + 1
+  else:
+    print str(x).strip()+'\t'+re.sub('\t','_',line.strip())
+    line = tem
+    x = 1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/custom_input_output_format-0-94f3da887aa34aed74715bd2051bf3c5 b/sql/hive/src/test/resources/golden/! operator-0-81d1a187c7f4a6337baf081510a5dc5e
similarity index 100%
rename from sql/hive/src/test/resources/golden/custom_input_output_format-0-94f3da887aa34aed74715bd2051bf3c5
rename to sql/hive/src/test/resources/golden/! operator-0-81d1a187c7f4a6337baf081510a5dc5e
diff --git a/sql/hive/src/test/resources/golden/'1' + '1'-0-77504a9f3d712143beb52f3c25a904cb b/sql/hive/src/test/resources/golden/'1' + '1'-0-77504a9f3d712143beb52f3c25a904cb
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/'1' + '1'-0-77504a9f3d712143beb52f3c25a904cb	
+++ b/sql/hive/src/test/resources/golden/'1' + '1'-0-77504a9f3d712143beb52f3c25a904cb	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/'1' + 1-0-130514c6116c311d808590a075b187b b/sql/hive/src/test/resources/golden/'1' + 1-0-130514c6116c311d808590a075b187b
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/'1' + 1-0-130514c6116c311d808590a075b187b	
+++ b/sql/hive/src/test/resources/golden/'1' + 1-0-130514c6116c311d808590a075b187b	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/'1' + 1.0-0-5db3b55120a19863d96460d399c2d0e b/sql/hive/src/test/resources/golden/'1' + 1.0-0-5db3b55120a19863d96460d399c2d0e
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/'1' + 1.0-0-5db3b55120a19863d96460d399c2d0e	
+++ b/sql/hive/src/test/resources/golden/'1' + 1.0-0-5db3b55120a19863d96460d399c2d0e	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/'1' + 1L-0-657763a2cfaa4fe3f73c5b68bc84a548 b/sql/hive/src/test/resources/golden/'1' + 1L-0-657763a2cfaa4fe3f73c5b68bc84a548
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/'1' + 1L-0-657763a2cfaa4fe3f73c5b68bc84a548	
+++ b/sql/hive/src/test/resources/golden/'1' + 1L-0-657763a2cfaa4fe3f73c5b68bc84a548	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/'1' + 1S-0-c3a1c44bebcde38c4d43bd73e3849630 b/sql/hive/src/test/resources/golden/'1' + 1S-0-c3a1c44bebcde38c4d43bd73e3849630
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/'1' + 1S-0-c3a1c44bebcde38c4d43bd73e3849630	
+++ b/sql/hive/src/test/resources/golden/'1' + 1S-0-c3a1c44bebcde38c4d43bd73e3849630	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/'1' + 1Y-0-aa608227a4f07c5cb98fcafe1e20488a b/sql/hive/src/test/resources/golden/'1' + 1Y-0-aa608227a4f07c5cb98fcafe1e20488a
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/'1' + 1Y-0-aa608227a4f07c5cb98fcafe1e20488a	
+++ b/sql/hive/src/test/resources/golden/'1' + 1Y-0-aa608227a4f07c5cb98fcafe1e20488a	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1 + '1'-0-4d39227e4121e2dc9a25f21fa27f89a b/sql/hive/src/test/resources/golden/1 + '1'-0-4d39227e4121e2dc9a25f21fa27f89a
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1 + '1'-0-4d39227e4121e2dc9a25f21fa27f89a	
+++ b/sql/hive/src/test/resources/golden/1 + '1'-0-4d39227e4121e2dc9a25f21fa27f89a	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1 + 1-0-83de1c24fd6dee00409e8fdd99306ed6 b/sql/hive/src/test/resources/golden/1 + 1-0-83de1c24fd6dee00409e8fdd99306ed6
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1 + 1-0-83de1c24fd6dee00409e8fdd99306ed6	
+++ b/sql/hive/src/test/resources/golden/1 + 1-0-83de1c24fd6dee00409e8fdd99306ed6	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1 + 1.0-0-4f5da98a11db8e7192423c27db767ca6 b/sql/hive/src/test/resources/golden/1 + 1.0-0-4f5da98a11db8e7192423c27db767ca6
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1 + 1.0-0-4f5da98a11db8e7192423c27db767ca6	
+++ b/sql/hive/src/test/resources/golden/1 + 1.0-0-4f5da98a11db8e7192423c27db767ca6	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1 + 1L-0-917a033ac7f8f8b3a2e8e961dc91f35e b/sql/hive/src/test/resources/golden/1 + 1L-0-917a033ac7f8f8b3a2e8e961dc91f35e
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1 + 1L-0-917a033ac7f8f8b3a2e8e961dc91f35e	
+++ b/sql/hive/src/test/resources/golden/1 + 1L-0-917a033ac7f8f8b3a2e8e961dc91f35e	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1 + 1S-0-2e99da48f67f588c9e632a57c713522e b/sql/hive/src/test/resources/golden/1 + 1S-0-2e99da48f67f588c9e632a57c713522e
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1 + 1S-0-2e99da48f67f588c9e632a57c713522e	
+++ b/sql/hive/src/test/resources/golden/1 + 1S-0-2e99da48f67f588c9e632a57c713522e	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1 + 1Y-0-1ff4db1fdac05de5b092095c2402fc5f b/sql/hive/src/test/resources/golden/1 + 1Y-0-1ff4db1fdac05de5b092095c2402fc5f
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1 + 1Y-0-1ff4db1fdac05de5b092095c2402fc5f	
+++ b/sql/hive/src/test/resources/golden/1 + 1Y-0-1ff4db1fdac05de5b092095c2402fc5f	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1.0 + '1'-0-a6ec78b3b93d52034aab829d43210e73 b/sql/hive/src/test/resources/golden/1.0 + '1'-0-a6ec78b3b93d52034aab829d43210e73
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1.0 + '1'-0-a6ec78b3b93d52034aab829d43210e73	
+++ b/sql/hive/src/test/resources/golden/1.0 + '1'-0-a6ec78b3b93d52034aab829d43210e73	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1.0 + 1-0-30a4b1c8227906931cd0532367bebc43 b/sql/hive/src/test/resources/golden/1.0 + 1-0-30a4b1c8227906931cd0532367bebc43
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1.0 + 1-0-30a4b1c8227906931cd0532367bebc43	
+++ b/sql/hive/src/test/resources/golden/1.0 + 1-0-30a4b1c8227906931cd0532367bebc43	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1.0 + 1.0-0-87321b2e30ee2986b00b631d0e4f4d8d b/sql/hive/src/test/resources/golden/1.0 + 1.0-0-87321b2e30ee2986b00b631d0e4f4d8d
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1.0 + 1.0-0-87321b2e30ee2986b00b631d0e4f4d8d	
+++ b/sql/hive/src/test/resources/golden/1.0 + 1.0-0-87321b2e30ee2986b00b631d0e4f4d8d	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1.0 + 1L-0-44bb88a1c9280952e8119a3ab1bb4205 b/sql/hive/src/test/resources/golden/1.0 + 1L-0-44bb88a1c9280952e8119a3ab1bb4205
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1.0 + 1L-0-44bb88a1c9280952e8119a3ab1bb4205	
+++ b/sql/hive/src/test/resources/golden/1.0 + 1L-0-44bb88a1c9280952e8119a3ab1bb4205	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1.0 + 1S-0-31fbe14d01fb532176c1689680398368 b/sql/hive/src/test/resources/golden/1.0 + 1S-0-31fbe14d01fb532176c1689680398368
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1.0 + 1S-0-31fbe14d01fb532176c1689680398368	
+++ b/sql/hive/src/test/resources/golden/1.0 + 1S-0-31fbe14d01fb532176c1689680398368	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1.0 + 1Y-0-12bcf6e49e83abd2aa36ea612b418d43 b/sql/hive/src/test/resources/golden/1.0 + 1Y-0-12bcf6e49e83abd2aa36ea612b418d43
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1.0 + 1Y-0-12bcf6e49e83abd2aa36ea612b418d43	
+++ b/sql/hive/src/test/resources/golden/1.0 + 1Y-0-12bcf6e49e83abd2aa36ea612b418d43	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1L + '1'-0-6e39c7be301f3846efa9b4c939815b4a b/sql/hive/src/test/resources/golden/1L + '1'-0-6e39c7be301f3846efa9b4c939815b4a
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1L + '1'-0-6e39c7be301f3846efa9b4c939815b4a	
+++ b/sql/hive/src/test/resources/golden/1L + '1'-0-6e39c7be301f3846efa9b4c939815b4a	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1L + 1-0-1864a260554255a09e4f28b8551eef9d b/sql/hive/src/test/resources/golden/1L + 1-0-1864a260554255a09e4f28b8551eef9d
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1L + 1-0-1864a260554255a09e4f28b8551eef9d	
+++ b/sql/hive/src/test/resources/golden/1L + 1-0-1864a260554255a09e4f28b8551eef9d	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1L + 1.0-0-95a30c4b746f520f1251981a66cef5c8 b/sql/hive/src/test/resources/golden/1L + 1.0-0-95a30c4b746f520f1251981a66cef5c8
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1L + 1.0-0-95a30c4b746f520f1251981a66cef5c8	
+++ b/sql/hive/src/test/resources/golden/1L + 1.0-0-95a30c4b746f520f1251981a66cef5c8	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1L + 1L-0-e54a673c779f31597acdc5bd7d315d9f b/sql/hive/src/test/resources/golden/1L + 1L-0-e54a673c779f31597acdc5bd7d315d9f
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1L + 1L-0-e54a673c779f31597acdc5bd7d315d9f	
+++ b/sql/hive/src/test/resources/golden/1L + 1L-0-e54a673c779f31597acdc5bd7d315d9f	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1L + 1S-0-b8e70f71c32aac77e2683ba20ab99688 b/sql/hive/src/test/resources/golden/1L + 1S-0-b8e70f71c32aac77e2683ba20ab99688
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1L + 1S-0-b8e70f71c32aac77e2683ba20ab99688	
+++ b/sql/hive/src/test/resources/golden/1L + 1S-0-b8e70f71c32aac77e2683ba20ab99688	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1L + 1Y-0-55de31e21660fa7d213b1f68d636cbf9 b/sql/hive/src/test/resources/golden/1L + 1Y-0-55de31e21660fa7d213b1f68d636cbf9
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1L + 1Y-0-55de31e21660fa7d213b1f68d636cbf9	
+++ b/sql/hive/src/test/resources/golden/1L + 1Y-0-55de31e21660fa7d213b1f68d636cbf9	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1S + '1'-0-c3cf30b2c4bffc76100e7b43e7b2aec5 b/sql/hive/src/test/resources/golden/1S + '1'-0-c3cf30b2c4bffc76100e7b43e7b2aec5
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1S + '1'-0-c3cf30b2c4bffc76100e7b43e7b2aec5	
+++ b/sql/hive/src/test/resources/golden/1S + '1'-0-c3cf30b2c4bffc76100e7b43e7b2aec5	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1S + 1-0-c378b0b2a57c54b3815e8a64686756d3 b/sql/hive/src/test/resources/golden/1S + 1-0-c378b0b2a57c54b3815e8a64686756d3
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1S + 1-0-c378b0b2a57c54b3815e8a64686756d3	
+++ b/sql/hive/src/test/resources/golden/1S + 1-0-c378b0b2a57c54b3815e8a64686756d3	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1S + 1.0-0-8dfa46ec33c1be5ffba2e40cbfe5349e b/sql/hive/src/test/resources/golden/1S + 1.0-0-8dfa46ec33c1be5ffba2e40cbfe5349e
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1S + 1.0-0-8dfa46ec33c1be5ffba2e40cbfe5349e	
+++ b/sql/hive/src/test/resources/golden/1S + 1.0-0-8dfa46ec33c1be5ffba2e40cbfe5349e	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1S + 1L-0-9d3ff8b5d4883a4a5a1dc0dd4f0c1116 b/sql/hive/src/test/resources/golden/1S + 1L-0-9d3ff8b5d4883a4a5a1dc0dd4f0c1116
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1S + 1L-0-9d3ff8b5d4883a4a5a1dc0dd4f0c1116	
+++ b/sql/hive/src/test/resources/golden/1S + 1L-0-9d3ff8b5d4883a4a5a1dc0dd4f0c1116	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1S + 1S-0-5054df6e72be611d6ee2a4e135bd949e b/sql/hive/src/test/resources/golden/1S + 1S-0-5054df6e72be611d6ee2a4e135bd949e
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1S + 1S-0-5054df6e72be611d6ee2a4e135bd949e	
+++ b/sql/hive/src/test/resources/golden/1S + 1S-0-5054df6e72be611d6ee2a4e135bd949e	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1S + 1Y-0-e59bc8279cd364224476ffc504c7685b b/sql/hive/src/test/resources/golden/1S + 1Y-0-e59bc8279cd364224476ffc504c7685b
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1S + 1Y-0-e59bc8279cd364224476ffc504c7685b	
+++ b/sql/hive/src/test/resources/golden/1S + 1Y-0-e59bc8279cd364224476ffc504c7685b	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1Y + '1'-0-bdc0f1c52b94a852b595e54997eb9dfb b/sql/hive/src/test/resources/golden/1Y + '1'-0-bdc0f1c52b94a852b595e54997eb9dfb
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1Y + '1'-0-bdc0f1c52b94a852b595e54997eb9dfb	
+++ b/sql/hive/src/test/resources/golden/1Y + '1'-0-bdc0f1c52b94a852b595e54997eb9dfb	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1Y + 1-0-a4541db51882b19503649138fbb295f b/sql/hive/src/test/resources/golden/1Y + 1-0-a4541db51882b19503649138fbb295f
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1Y + 1-0-a4541db51882b19503649138fbb295f	
+++ b/sql/hive/src/test/resources/golden/1Y + 1-0-a4541db51882b19503649138fbb295f	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1Y + 1.0-0-3ad5e3db0d0300312d33231e7c2a6c8d b/sql/hive/src/test/resources/golden/1Y + 1.0-0-3ad5e3db0d0300312d33231e7c2a6c8d
index 415b19fc36234..cd5ac039d67e0 100644
--- a/sql/hive/src/test/resources/golden/1Y + 1.0-0-3ad5e3db0d0300312d33231e7c2a6c8d	
+++ b/sql/hive/src/test/resources/golden/1Y + 1.0-0-3ad5e3db0d0300312d33231e7c2a6c8d	
@@ -1 +1 @@
-2.0
\ No newline at end of file
+2.0
diff --git a/sql/hive/src/test/resources/golden/1Y + 1L-0-2aa9a7b23c741d78032def0641a21cb1 b/sql/hive/src/test/resources/golden/1Y + 1L-0-2aa9a7b23c741d78032def0641a21cb1
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1Y + 1L-0-2aa9a7b23c741d78032def0641a21cb1	
+++ b/sql/hive/src/test/resources/golden/1Y + 1L-0-2aa9a7b23c741d78032def0641a21cb1	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1Y + 1S-0-93a44c4952c4d343d3885edfc95b4b80 b/sql/hive/src/test/resources/golden/1Y + 1S-0-93a44c4952c4d343d3885edfc95b4b80
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1Y + 1S-0-93a44c4952c4d343d3885edfc95b4b80	
+++ b/sql/hive/src/test/resources/golden/1Y + 1S-0-93a44c4952c4d343d3885edfc95b4b80	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/1Y + 1Y-0-3d9619d963e7f1cb4ab3e8b5e24ff0d5 b/sql/hive/src/test/resources/golden/1Y + 1Y-0-3d9619d963e7f1cb4ab3e8b5e24ff0d5
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/1Y + 1Y-0-3d9619d963e7f1cb4ab3e8b5e24ff0d5	
+++ b/sql/hive/src/test/resources/golden/1Y + 1Y-0-3d9619d963e7f1cb4ab3e8b5e24ff0d5	
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-45f5619d9e4510195fe67f7c8d14a5c0 b/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-45f5619d9e4510195fe67f7c8d14a5c0
deleted file mode 100644
index 7643569a2c234..0000000000000
--- a/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-45f5619d9e4510195fe67f7c8d14a5c0	
+++ /dev/null
@@ -1,309 +0,0 @@
-0.0	0	0	0	3
-2.0	0	2	0	1
-4.0	0	4	0	1
-5.0	0	15	0	3
-8.0	0	8	0	1
-9.0	0	9	0	1
-10.0	0	10	0	1
-11.0	0	11	0	1
-12.0	0	24	0	2
-15.0	0	30	0	2
-17.0	0	17	0	1
-18.0	0	36	0	2
-19.0	0	19	0	1
-20.0	0	20	0	1
-24.0	0	48	0	2
-26.0	0	52	0	2
-27.0	0	27	0	1
-28.0	0	28	0	1
-30.0	0	30	0	1
-33.0	0	33	0	1
-34.0	0	34	0	1
-35.0	0	105	0	3
-37.0	0	74	0	2
-41.0	0	41	0	1
-42.0	0	84	0	2
-43.0	0	43	0	1
-44.0	0	44	0	1
-47.0	0	47	0	1
-51.0	0	102	0	2
-53.0	0	53	0	1
-54.0	0	54	0	1
-57.0	0	57	0	1
-58.0	0	116	0	2
-64.0	0	64	0	1
-65.0	0	65	0	1
-66.0	0	66	0	1
-67.0	0	134	0	2
-69.0	0	69	0	1
-70.0	0	210	0	3
-72.0	0	144	0	2
-74.0	0	74	0	1
-76.0	0	152	0	2
-77.0	0	77	0	1
-78.0	0	78	0	1
-80.0	0	80	0	1
-82.0	0	82	0	1
-83.0	0	166	0	2
-84.0	0	168	0	2
-85.0	0	85	0	1
-86.0	0	86	0	1
-87.0	0	87	0	1
-90.0	0	270	0	3
-92.0	0	92	0	1
-95.0	0	190	0	2
-96.0	0	96	0	1
-97.0	0	194	0	2
-98.0	0	196	0	2
-100.0	0	200	0	2
-103.0	0	206	0	2
-104.0	0	208	0	2
-105.0	0	105	0	1
-111.0	0	111	0	1
-113.0	0	226	0	2
-114.0	0	114	0	1
-116.0	0	116	0	1
-118.0	0	236	0	2
-119.0	0	357	0	3
-120.0	0	240	0	2
-125.0	0	250	0	2
-126.0	0	126	0	1
-128.0	0	384	0	3
-129.0	0	258	0	2
-131.0	0	131	0	1
-133.0	0	133	0	1
-134.0	0	268	0	2
-136.0	0	136	0	1
-137.0	0	274	0	2
-138.0	0	552	0	4
-143.0	0	143	0	1
-145.0	0	145	0	1
-146.0	0	292	0	2
-149.0	0	298	0	2
-150.0	0	150	0	1
-152.0	0	304	0	2
-153.0	0	153	0	1
-155.0	0	155	0	1
-156.0	0	156	0	1
-157.0	0	157	0	1
-158.0	0	158	0	1
-160.0	0	160	0	1
-162.0	0	162	0	1
-163.0	0	163	0	1
-164.0	0	328	0	2
-165.0	0	330	0	2
-166.0	0	166	0	1
-167.0	0	501	0	3
-168.0	0	168	0	1
-169.0	0	676	0	4
-170.0	0	170	0	1
-172.0	0	344	0	2
-174.0	0	348	0	2
-175.0	0	350	0	2
-176.0	0	352	0	2
-177.0	0	177	0	1
-178.0	0	178	0	1
-179.0	0	358	0	2
-180.0	0	180	0	1
-181.0	0	181	0	1
-183.0	0	183	0	1
-186.0	0	186	0	1
-187.0	0	561	0	3
-189.0	0	189	0	1
-190.0	0	190	0	1
-191.0	0	382	0	2
-192.0	0	192	0	1
-193.0	0	579	0	3
-194.0	0	194	0	1
-195.0	0	390	0	2
-196.0	0	196	0	1
-197.0	0	394	0	2
-199.0	0	597	0	3
-200.0	0	400	0	2
-201.0	0	201	0	1
-202.0	0	202	0	1
-203.0	0	406	0	2
-205.0	0	410	0	2
-207.0	0	414	0	2
-208.0	0	624	0	3
-209.0	0	418	0	2
-213.0	0	426	0	2
-214.0	0	214	0	1
-216.0	0	432	0	2
-217.0	0	434	0	2
-218.0	0	218	0	1
-219.0	0	438	0	2
-221.0	0	442	0	2
-222.0	0	222	0	1
-223.0	0	446	0	2
-224.0	0	448	0	2
-226.0	0	226	0	1
-228.0	0	228	0	1
-229.0	0	458	0	2
-230.0	0	1150	0	5
-233.0	0	466	0	2
-235.0	0	235	0	1
-237.0	0	474	0	2
-238.0	0	476	0	2
-239.0	0	478	0	2
-241.0	0	241	0	1
-242.0	0	484	0	2
-244.0	0	244	0	1
-247.0	0	247	0	1
-248.0	0	248	0	1
-249.0	0	249	0	1
-252.0	0	252	0	1
-255.0	0	510	0	2
-256.0	0	512	0	2
-257.0	0	257	0	1
-258.0	0	258	0	1
-260.0	0	260	0	1
-262.0	0	262	0	1
-263.0	0	263	0	1
-265.0	0	530	0	2
-266.0	0	266	0	1
-272.0	0	544	0	2
-273.0	0	819	0	3
-274.0	0	274	0	1
-275.0	0	275	0	1
-277.0	0	1108	0	4
-278.0	0	556	0	2
-280.0	0	560	0	2
-281.0	0	562	0	2
-282.0	0	564	0	2
-283.0	0	283	0	1
-284.0	0	284	0	1
-285.0	0	285	0	1
-286.0	0	286	0	1
-287.0	0	287	0	1
-288.0	0	576	0	2
-289.0	0	289	0	1
-291.0	0	291	0	1
-292.0	0	292	0	1
-296.0	0	296	0	1
-298.0	0	894	0	3
-302.0	0	302	0	1
-305.0	0	305	0	1
-306.0	0	306	0	1
-307.0	0	614	0	2
-308.0	0	308	0	1
-309.0	0	618	0	2
-310.0	0	310	0	1
-311.0	0	933	0	3
-315.0	0	315	0	1
-316.0	0	948	0	3
-317.0	0	634	0	2
-318.0	0	954	0	3
-321.0	0	642	0	2
-322.0	0	644	0	2
-323.0	0	323	0	1
-325.0	0	650	0	2
-327.0	0	981	0	3
-331.0	0	662	0	2
-332.0	0	332	0	1
-333.0	0	666	0	2
-335.0	0	335	0	1
-336.0	0	336	0	1
-338.0	0	338	0	1
-339.0	0	339	0	1
-341.0	0	341	0	1
-342.0	0	684	0	2
-344.0	0	688	0	2
-345.0	0	345	0	1
-348.0	0	1740	0	5
-351.0	0	351	0	1
-353.0	0	706	0	2
-356.0	0	356	0	1
-360.0	0	360	0	1
-362.0	0	362	0	1
-364.0	0	364	0	1
-365.0	0	365	0	1
-366.0	0	366	0	1
-367.0	0	734	0	2
-368.0	0	368	0	1
-369.0	0	1107	0	3
-373.0	0	373	0	1
-374.0	0	374	0	1
-375.0	0	375	0	1
-377.0	0	377	0	1
-378.0	0	378	0	1
-379.0	0	379	0	1
-382.0	0	764	0	2
-384.0	0	1152	0	3
-386.0	0	386	0	1
-389.0	0	389	0	1
-392.0	0	392	0	1
-393.0	0	393	0	1
-394.0	0	394	0	1
-395.0	0	790	0	2
-396.0	0	1188	0	3
-397.0	0	794	0	2
-399.0	0	798	0	2
-400.0	0	400	0	1
-401.0	0	2005	0	5
-402.0	0	402	0	1
-403.0	0	1209	0	3
-404.0	0	808	0	2
-406.0	0	1624	0	4
-407.0	0	407	0	1
-409.0	0	1227	0	3
-411.0	0	411	0	1
-413.0	0	826	0	2
-414.0	0	828	0	2
-417.0	0	1251	0	3
-418.0	0	418	0	1
-419.0	0	419	0	1
-421.0	0	421	0	1
-424.0	0	848	0	2
-427.0	0	427	0	1
-429.0	0	858	0	2
-430.0	0	1290	0	3
-431.0	0	1293	0	3
-432.0	0	432	0	1
-435.0	0	435	0	1
-436.0	0	436	0	1
-437.0	0	437	0	1
-438.0	0	1314	0	3
-439.0	0	878	0	2
-443.0	0	443	0	1
-444.0	0	444	0	1
-446.0	0	446	0	1
-448.0	0	448	0	1
-449.0	0	449	0	1
-452.0	0	452	0	1
-453.0	0	453	0	1
-454.0	0	1362	0	3
-455.0	0	455	0	1
-457.0	0	457	0	1
-458.0	0	916	0	2
-459.0	0	918	0	2
-460.0	0	460	0	1
-462.0	0	924	0	2
-463.0	0	926	0	2
-466.0	0	1398	0	3
-467.0	0	467	0	1
-468.0	0	1872	0	4
-469.0	0	2345	0	5
-470.0	0	470	0	1
-472.0	0	472	0	1
-475.0	0	475	0	1
-477.0	0	477	0	1
-478.0	0	956	0	2
-479.0	0	479	0	1
-480.0	0	1440	0	3
-481.0	0	481	0	1
-482.0	0	482	0	1
-483.0	0	483	0	1
-484.0	0	484	0	1
-485.0	0	485	0	1
-487.0	0	487	0	1
-489.0	0	1956	0	4
-490.0	0	490	0	1
-491.0	0	491	0	1
-492.0	0	984	0	2
-493.0	0	493	0	1
-494.0	0	494	0	1
-495.0	0	495	0	1
-496.0	0	496	0	1
-497.0	0	497	0	1
-498.0	0	1494	0	3
diff --git a/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-a393cfc24ad74f930f3284743254c10c b/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-a393cfc24ad74f930f3284743254c10c
deleted file mode 100644
index f23b45c32ecca..0000000000000
--- a/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-a393cfc24ad74f930f3284743254c10c	
+++ /dev/null
@@ -1,309 +0,0 @@
-0.0	0.0	0	0	0	3
-0.0	2.0	0	2	0	1
-0.0	4.0	0	4	0	1
-0.0	5.0	0	15	0	3
-0.0	8.0	0	8	0	1
-0.0	9.0	0	9	0	1
-0.0	10.0	0	10	0	1
-0.0	11.0	0	11	0	1
-0.0	12.0	0	24	0	2
-0.0	15.0	0	30	0	2
-0.0	17.0	0	17	0	1
-0.0	18.0	0	36	0	2
-0.0	19.0	0	19	0	1
-0.0	20.0	0	20	0	1
-0.0	24.0	0	48	0	2
-0.0	26.0	0	52	0	2
-0.0	27.0	0	27	0	1
-0.0	28.0	0	28	0	1
-0.0	30.0	0	30	0	1
-0.0	33.0	0	33	0	1
-0.0	34.0	0	34	0	1
-0.0	35.0	0	105	0	3
-0.0	37.0	0	74	0	2
-0.0	41.0	0	41	0	1
-0.0	42.0	0	84	0	2
-0.0	43.0	0	43	0	1
-0.0	44.0	0	44	0	1
-0.0	47.0	0	47	0	1
-0.0	51.0	0	102	0	2
-0.0	53.0	0	53	0	1
-0.0	54.0	0	54	0	1
-0.0	57.0	0	57	0	1
-0.0	58.0	0	116	0	2
-0.0	64.0	0	64	0	1
-0.0	65.0	0	65	0	1
-0.0	66.0	0	66	0	1
-0.0	67.0	0	134	0	2
-0.0	69.0	0	69	0	1
-0.0	70.0	0	210	0	3
-0.0	72.0	0	144	0	2
-0.0	74.0	0	74	0	1
-0.0	76.0	0	152	0	2
-0.0	77.0	0	77	0	1
-0.0	78.0	0	78	0	1
-0.0	80.0	0	80	0	1
-0.0	82.0	0	82	0	1
-0.0	83.0	0	166	0	2
-0.0	84.0	0	168	0	2
-0.0	85.0	0	85	0	1
-0.0	86.0	0	86	0	1
-0.0	87.0	0	87	0	1
-0.0	90.0	0	270	0	3
-0.0	92.0	0	92	0	1
-0.0	95.0	0	190	0	2
-0.0	96.0	0	96	0	1
-0.0	97.0	0	194	0	2
-0.0	98.0	0	196	0	2
-0.0	100.0	0	200	0	2
-0.0	103.0	0	206	0	2
-0.0	104.0	0	208	0	2
-0.0	105.0	0	105	0	1
-0.0	111.0	0	111	0	1
-0.0	113.0	0	226	0	2
-0.0	114.0	0	114	0	1
-0.0	116.0	0	116	0	1
-0.0	118.0	0	236	0	2
-0.0	119.0	0	357	0	3
-0.0	120.0	0	240	0	2
-0.0	125.0	0	250	0	2
-0.0	126.0	0	126	0	1
-0.0	128.0	0	384	0	3
-0.0	129.0	0	258	0	2
-0.0	131.0	0	131	0	1
-0.0	133.0	0	133	0	1
-0.0	134.0	0	268	0	2
-0.0	136.0	0	136	0	1
-0.0	137.0	0	274	0	2
-0.0	138.0	0	552	0	4
-0.0	143.0	0	143	0	1
-0.0	145.0	0	145	0	1
-0.0	146.0	0	292	0	2
-0.0	149.0	0	298	0	2
-0.0	150.0	0	150	0	1
-0.0	152.0	0	304	0	2
-0.0	153.0	0	153	0	1
-0.0	155.0	0	155	0	1
-0.0	156.0	0	156	0	1
-0.0	157.0	0	157	0	1
-0.0	158.0	0	158	0	1
-0.0	160.0	0	160	0	1
-0.0	162.0	0	162	0	1
-0.0	163.0	0	163	0	1
-0.0	164.0	0	328	0	2
-0.0	165.0	0	330	0	2
-0.0	166.0	0	166	0	1
-0.0	167.0	0	501	0	3
-0.0	168.0	0	168	0	1
-0.0	169.0	0	676	0	4
-0.0	170.0	0	170	0	1
-0.0	172.0	0	344	0	2
-0.0	174.0	0	348	0	2
-0.0	175.0	0	350	0	2
-0.0	176.0	0	352	0	2
-0.0	177.0	0	177	0	1
-0.0	178.0	0	178	0	1
-0.0	179.0	0	358	0	2
-0.0	180.0	0	180	0	1
-0.0	181.0	0	181	0	1
-0.0	183.0	0	183	0	1
-0.0	186.0	0	186	0	1
-0.0	187.0	0	561	0	3
-0.0	189.0	0	189	0	1
-0.0	190.0	0	190	0	1
-0.0	191.0	0	382	0	2
-0.0	192.0	0	192	0	1
-0.0	193.0	0	579	0	3
-0.0	194.0	0	194	0	1
-0.0	195.0	0	390	0	2
-0.0	196.0	0	196	0	1
-0.0	197.0	0	394	0	2
-0.0	199.0	0	597	0	3
-0.0	200.0	0	400	0	2
-0.0	201.0	0	201	0	1
-0.0	202.0	0	202	0	1
-0.0	203.0	0	406	0	2
-0.0	205.0	0	410	0	2
-0.0	207.0	0	414	0	2
-0.0	208.0	0	624	0	3
-0.0	209.0	0	418	0	2
-0.0	213.0	0	426	0	2
-0.0	214.0	0	214	0	1
-0.0	216.0	0	432	0	2
-0.0	217.0	0	434	0	2
-0.0	218.0	0	218	0	1
-0.0	219.0	0	438	0	2
-0.0	221.0	0	442	0	2
-0.0	222.0	0	222	0	1
-0.0	223.0	0	446	0	2
-0.0	224.0	0	448	0	2
-0.0	226.0	0	226	0	1
-0.0	228.0	0	228	0	1
-0.0	229.0	0	458	0	2
-0.0	230.0	0	1150	0	5
-0.0	233.0	0	466	0	2
-0.0	235.0	0	235	0	1
-0.0	237.0	0	474	0	2
-0.0	238.0	0	476	0	2
-0.0	239.0	0	478	0	2
-0.0	241.0	0	241	0	1
-0.0	242.0	0	484	0	2
-0.0	244.0	0	244	0	1
-0.0	247.0	0	247	0	1
-0.0	248.0	0	248	0	1
-0.0	249.0	0	249	0	1
-0.0	252.0	0	252	0	1
-0.0	255.0	0	510	0	2
-0.0	256.0	0	512	0	2
-0.0	257.0	0	257	0	1
-0.0	258.0	0	258	0	1
-0.0	260.0	0	260	0	1
-0.0	262.0	0	262	0	1
-0.0	263.0	0	263	0	1
-0.0	265.0	0	530	0	2
-0.0	266.0	0	266	0	1
-0.0	272.0	0	544	0	2
-0.0	273.0	0	819	0	3
-0.0	274.0	0	274	0	1
-0.0	275.0	0	275	0	1
-0.0	277.0	0	1108	0	4
-0.0	278.0	0	556	0	2
-0.0	280.0	0	560	0	2
-0.0	281.0	0	562	0	2
-0.0	282.0	0	564	0	2
-0.0	283.0	0	283	0	1
-0.0	284.0	0	284	0	1
-0.0	285.0	0	285	0	1
-0.0	286.0	0	286	0	1
-0.0	287.0	0	287	0	1
-0.0	288.0	0	576	0	2
-0.0	289.0	0	289	0	1
-0.0	291.0	0	291	0	1
-0.0	292.0	0	292	0	1
-0.0	296.0	0	296	0	1
-0.0	298.0	0	894	0	3
-0.0	302.0	0	302	0	1
-0.0	305.0	0	305	0	1
-0.0	306.0	0	306	0	1
-0.0	307.0	0	614	0	2
-0.0	308.0	0	308	0	1
-0.0	309.0	0	618	0	2
-0.0	310.0	0	310	0	1
-0.0	311.0	0	933	0	3
-0.0	315.0	0	315	0	1
-0.0	316.0	0	948	0	3
-0.0	317.0	0	634	0	2
-0.0	318.0	0	954	0	3
-0.0	321.0	0	642	0	2
-0.0	322.0	0	644	0	2
-0.0	323.0	0	323	0	1
-0.0	325.0	0	650	0	2
-0.0	327.0	0	981	0	3
-0.0	331.0	0	662	0	2
-0.0	332.0	0	332	0	1
-0.0	333.0	0	666	0	2
-0.0	335.0	0	335	0	1
-0.0	336.0	0	336	0	1
-0.0	338.0	0	338	0	1
-0.0	339.0	0	339	0	1
-0.0	341.0	0	341	0	1
-0.0	342.0	0	684	0	2
-0.0	344.0	0	688	0	2
-0.0	345.0	0	345	0	1
-0.0	348.0	0	1740	0	5
-0.0	351.0	0	351	0	1
-0.0	353.0	0	706	0	2
-0.0	356.0	0	356	0	1
-0.0	360.0	0	360	0	1
-0.0	362.0	0	362	0	1
-0.0	364.0	0	364	0	1
-0.0	365.0	0	365	0	1
-0.0	366.0	0	366	0	1
-0.0	367.0	0	734	0	2
-0.0	368.0	0	368	0	1
-0.0	369.0	0	1107	0	3
-0.0	373.0	0	373	0	1
-0.0	374.0	0	374	0	1
-0.0	375.0	0	375	0	1
-0.0	377.0	0	377	0	1
-0.0	378.0	0	378	0	1
-0.0	379.0	0	379	0	1
-0.0	382.0	0	764	0	2
-0.0	384.0	0	1152	0	3
-0.0	386.0	0	386	0	1
-0.0	389.0	0	389	0	1
-0.0	392.0	0	392	0	1
-0.0	393.0	0	393	0	1
-0.0	394.0	0	394	0	1
-0.0	395.0	0	790	0	2
-0.0	396.0	0	1188	0	3
-0.0	397.0	0	794	0	2
-0.0	399.0	0	798	0	2
-0.0	400.0	0	400	0	1
-0.0	401.0	0	2005	0	5
-0.0	402.0	0	402	0	1
-0.0	403.0	0	1209	0	3
-0.0	404.0	0	808	0	2
-0.0	406.0	0	1624	0	4
-0.0	407.0	0	407	0	1
-0.0	409.0	0	1227	0	3
-0.0	411.0	0	411	0	1
-0.0	413.0	0	826	0	2
-0.0	414.0	0	828	0	2
-0.0	417.0	0	1251	0	3
-0.0	418.0	0	418	0	1
-0.0	419.0	0	419	0	1
-0.0	421.0	0	421	0	1
-0.0	424.0	0	848	0	2
-0.0	427.0	0	427	0	1
-0.0	429.0	0	858	0	2
-0.0	430.0	0	1290	0	3
-0.0	431.0	0	1293	0	3
-0.0	432.0	0	432	0	1
-0.0	435.0	0	435	0	1
-0.0	436.0	0	436	0	1
-0.0	437.0	0	437	0	1
-0.0	438.0	0	1314	0	3
-0.0	439.0	0	878	0	2
-0.0	443.0	0	443	0	1
-0.0	444.0	0	444	0	1
-0.0	446.0	0	446	0	1
-0.0	448.0	0	448	0	1
-0.0	449.0	0	449	0	1
-0.0	452.0	0	452	0	1
-0.0	453.0	0	453	0	1
-0.0	454.0	0	1362	0	3
-0.0	455.0	0	455	0	1
-0.0	457.0	0	457	0	1
-0.0	458.0	0	916	0	2
-0.0	459.0	0	918	0	2
-0.0	460.0	0	460	0	1
-0.0	462.0	0	924	0	2
-0.0	463.0	0	926	0	2
-0.0	466.0	0	1398	0	3
-0.0	467.0	0	467	0	1
-0.0	468.0	0	1872	0	4
-0.0	469.0	0	2345	0	5
-0.0	470.0	0	470	0	1
-0.0	472.0	0	472	0	1
-0.0	475.0	0	475	0	1
-0.0	477.0	0	477	0	1
-0.0	478.0	0	956	0	2
-0.0	479.0	0	479	0	1
-0.0	480.0	0	1440	0	3
-0.0	481.0	0	481	0	1
-0.0	482.0	0	482	0	1
-0.0	483.0	0	483	0	1
-0.0	484.0	0	484	0	1
-0.0	485.0	0	485	0	1
-0.0	487.0	0	487	0	1
-0.0	489.0	0	1956	0	4
-0.0	490.0	0	490	0	1
-0.0	491.0	0	491	0	1
-0.0	492.0	0	984	0	2
-0.0	493.0	0	493	0	1
-0.0	494.0	0	494	0	1
-0.0	495.0	0	495	0	1
-0.0	496.0	0	496	0	1
-0.0	497.0	0	497	0	1
-0.0	498.0	0	1494	0	3
diff --git a/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-ae497f1556f548c1e2da9244397a985d b/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-ae497f1556f548c1e2da9244397a985d
deleted file mode 100644
index 7839d714c25d2..0000000000000
--- a/sql/hive/src/test/resources/golden/Constant Folding Optimization for AVG_SUM_COUNT-0-ae497f1556f548c1e2da9244397a985d	
+++ /dev/null
@@ -1,309 +0,0 @@
-0	3
-0	1
-0	1
-0	3
-0	1
-0	1
-0	1
-0	1
-0	2
-0	2
-0	1
-0	2
-0	1
-0	1
-0	2
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	3
-0	2
-0	1
-0	2
-0	1
-0	1
-0	1
-0	2
-0	1
-0	1
-0	1
-0	2
-0	1
-0	1
-0	1
-0	2
-0	1
-0	3
-0	2
-0	1
-0	2
-0	1
-0	1
-0	1
-0	1
-0	2
-0	2
-0	1
-0	1
-0	1
-0	3
-0	1
-0	2
-0	1
-0	2
-0	2
-0	2
-0	2
-0	2
-0	1
-0	1
-0	2
-0	1
-0	1
-0	2
-0	3
-0	2
-0	2
-0	1
-0	3
-0	2
-0	1
-0	1
-0	2
-0	1
-0	2
-0	4
-0	1
-0	1
-0	2
-0	2
-0	1
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	1
-0	1
-0	1
-0	2
-0	2
-0	1
-0	3
-0	1
-0	4
-0	1
-0	2
-0	2
-0	2
-0	2
-0	1
-0	1
-0	2
-0	1
-0	1
-0	1
-0	1
-0	3
-0	1
-0	1
-0	2
-0	1
-0	3
-0	1
-0	2
-0	1
-0	2
-0	3
-0	2
-0	1
-0	1
-0	2
-0	2
-0	2
-0	3
-0	2
-0	2
-0	1
-0	2
-0	2
-0	1
-0	2
-0	2
-0	1
-0	2
-0	2
-0	1
-0	1
-0	2
-0	5
-0	2
-0	1
-0	2
-0	2
-0	2
-0	1
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	2
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	2
-0	1
-0	2
-0	3
-0	1
-0	1
-0	4
-0	2
-0	2
-0	2
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	2
-0	1
-0	1
-0	1
-0	1
-0	3
-0	1
-0	1
-0	1
-0	2
-0	1
-0	2
-0	1
-0	3
-0	1
-0	3
-0	2
-0	3
-0	2
-0	2
-0	1
-0	2
-0	3
-0	2
-0	1
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	2
-0	2
-0	1
-0	5
-0	1
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	1
-0	2
-0	1
-0	3
-0	1
-0	1
-0	1
-0	1
-0	1
-0	1
-0	2
-0	3
-0	1
-0	1
-0	1
-0	1
-0	1
-0	2
-0	3
-0	2
-0	2
-0	1
-0	5
-0	1
-0	3
-0	2
-0	4
-0	1
-0	3
-0	1
-0	2
-0	2
-0	3
-0	1
-0	1
-0	1
-0	2
-0	1
-0	2
-0	3
-0	3
-0	1
-0	1
-0	1
-0	1
-0	3
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	1
-0	1
-0	3
-0	1
-0	1
-0	2
-0	2
-0	1
-0	2
-0	2
-0	3
-0	1
-0	4
-0	5
-0	1
-0	1
-0	1
-0	1
-0	2
-0	1
-0	3
-0	1
-0	1
-0	1
-0	1
-0	1
-0	1
-0	4
-0	1
-0	1
-0	2
-0	1
-0	1
-0	1
-0	1
-0	1
-0	3
diff --git a/sql/hive/src/test/resources/golden/Date cast-0-a7cd69b80c77a771a2c955db666be53d b/sql/hive/src/test/resources/golden/Date cast-0-a7cd69b80c77a771a2c955db666be53d
new file mode 100644
index 0000000000000..98da82fa89386
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/Date cast-0-a7cd69b80c77a771a2c955db666be53d	
@@ -0,0 +1 @@
+1970-01-01	1970-01-01	1969-12-31 16:00:00	1969-12-31 16:00:00	1970-01-01 00:00:00
diff --git a/sql/hive/src/test/resources/golden/decimal_1-4-f2bcc4f2b8db16b865d4ca396fbca575 b/sql/hive/src/test/resources/golden/Date comparison test 1-0-bde89be08a12361073ff658fef768b7e
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-4-f2bcc4f2b8db16b865d4ca396fbca575
rename to sql/hive/src/test/resources/golden/Date comparison test 1-0-bde89be08a12361073ff658fef768b7e
diff --git a/sql/hive/src/test/resources/golden/decimal_2-13-20ffe5115367abea9ea0ed1bda7a9439 b/sql/hive/src/test/resources/golden/Date comparison test 2-0-dc1b267f1d79d49e6675afe4fd2a34a5
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-13-20ffe5115367abea9ea0ed1bda7a9439
rename to sql/hive/src/test/resources/golden/Date comparison test 2-0-dc1b267f1d79d49e6675afe4fd2a34a5
diff --git a/sql/hive/src/test/resources/golden/Escape sequences-0-2f25c33d97c43f3276171624d988a286 b/sql/hive/src/test/resources/golden/Escape sequences-0-2f25c33d97c43f3276171624d988a286
index bbe37f8e2a790..2899c62e08afd 100644
--- a/sql/hive/src/test/resources/golden/Escape sequences-0-2f25c33d97c43f3276171624d988a286	
+++ b/sql/hive/src/test/resources/golden/Escape sequences-0-2f25c33d97c43f3276171624d988a286	
@@ -1 +1 @@
-86	\	\
\ No newline at end of file
+86	\	\
diff --git a/sql/hive/src/test/resources/golden/LIKE-0-8a6078c9da5f15ea95ba3682fd66e672 b/sql/hive/src/test/resources/golden/LIKE-0-8a6078c9da5f15ea95ba3682fd66e672
index 8007988316af0..16c12c524e7d2 100644
--- a/sql/hive/src/test/resources/golden/LIKE-0-8a6078c9da5f15ea95ba3682fd66e672
+++ b/sql/hive/src/test/resources/golden/LIKE-0-8a6078c9da5f15ea95ba3682fd66e672
@@ -172,4 +172,4 @@
 194	val_194
 414	val_414
 126	val_126
-169	val_169
\ No newline at end of file
+169	val_169
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-10-d8a597810b222e9e121a11a1f5658fb0 b/sql/hive/src/test/resources/golden/NaN to Decimal-0-6ca781bc343025635d72321ef0a9d425
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-10-d8a597810b222e9e121a11a1f5658fb0
rename to sql/hive/src/test/resources/golden/NaN to Decimal-0-6ca781bc343025635d72321ef0a9d425
diff --git a/sql/hive/src/test/resources/golden/Simple Average + 1 with group-0-f52ca483a3e5eadc1b20ba8320d029a7 b/sql/hive/src/test/resources/golden/Simple Average + 1 with group-0-f52ca483a3e5eadc1b20ba8320d029a7
index 595d7af04a69a..1cce52daebce9 100644
--- a/sql/hive/src/test/resources/golden/Simple Average + 1 with group-0-f52ca483a3e5eadc1b20ba8320d029a7	
+++ b/sql/hive/src/test/resources/golden/Simple Average + 1 with group-0-f52ca483a3e5eadc1b20ba8320d029a7	
@@ -306,4 +306,4 @@
 96.0	val_95
 97.0	val_96
 98.0	val_97
-99.0	val_98
\ No newline at end of file
+99.0	val_98
diff --git a/sql/hive/src/test/resources/golden/Simple Average + 1-0-5e296b921c470f0f0b5d099f28bd5935 b/sql/hive/src/test/resources/golden/Simple Average + 1-0-5e296b921c470f0f0b5d099f28bd5935
index 3d2dbbd731543..7f2be0e640110 100644
--- a/sql/hive/src/test/resources/golden/Simple Average + 1-0-5e296b921c470f0f0b5d099f28bd5935	
+++ b/sql/hive/src/test/resources/golden/Simple Average + 1-0-5e296b921c470f0f0b5d099f28bd5935	
@@ -1 +1 @@
-261.182
\ No newline at end of file
+261.182
diff --git a/sql/hive/src/test/resources/golden/Simple Average-0-c197ea78c4d8f85f1a317805b6da07e5 b/sql/hive/src/test/resources/golden/Simple Average-0-c197ea78c4d8f85f1a317805b6da07e5
index cbc70c89f8859..ed2b1a0e7c53a 100644
--- a/sql/hive/src/test/resources/golden/Simple Average-0-c197ea78c4d8f85f1a317805b6da07e5	
+++ b/sql/hive/src/test/resources/golden/Simple Average-0-c197ea78c4d8f85f1a317805b6da07e5	
@@ -1 +1 @@
-260.182
\ No newline at end of file
+260.182
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-11-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/add_part_exist-11-9c36cac1372650b703400c60dd29042c
index ecafeaea5f61a..85c1918f46567 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-11-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/add_part_exist-11-9c36cac1372650b703400c60dd29042c
@@ -1,2 +1,2 @@
 src
-srcpart
\ No newline at end of file
+srcpart
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-18-d824f22606f48dfca48ce241a7505f5b b/sql/hive/src/test/resources/golden/add_part_exist-18-d824f22606f48dfca48ce241a7505f5b
index f1ea8b0260d2c..0fd332ea35b85 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-18-d824f22606f48dfca48ce241a7505f5b
+++ b/sql/hive/src/test/resources/golden/add_part_exist-18-d824f22606f48dfca48ce241a7505f5b
@@ -1 +1 @@
-ds=2010-01-01
\ No newline at end of file
+ds=2010-01-01
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-20-d824f22606f48dfca48ce241a7505f5b b/sql/hive/src/test/resources/golden/add_part_exist-20-d824f22606f48dfca48ce241a7505f5b
index f1ea8b0260d2c..0fd332ea35b85 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-20-d824f22606f48dfca48ce241a7505f5b
+++ b/sql/hive/src/test/resources/golden/add_part_exist-20-d824f22606f48dfca48ce241a7505f5b
@@ -1 +1 @@
-ds=2010-01-01
\ No newline at end of file
+ds=2010-01-01
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-22-d824f22606f48dfca48ce241a7505f5b b/sql/hive/src/test/resources/golden/add_part_exist-22-d824f22606f48dfca48ce241a7505f5b
index 121aebdfab9af..2c7bbaac17318 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-22-d824f22606f48dfca48ce241a7505f5b
+++ b/sql/hive/src/test/resources/golden/add_part_exist-22-d824f22606f48dfca48ce241a7505f5b
@@ -1,2 +1,2 @@
 ds=2010-01-01
-ds=2010-01-02
\ No newline at end of file
+ds=2010-01-02
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-24-d824f22606f48dfca48ce241a7505f5b b/sql/hive/src/test/resources/golden/add_part_exist-24-d824f22606f48dfca48ce241a7505f5b
index 64467c12563c8..7d9d4194c5cd1 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-24-d824f22606f48dfca48ce241a7505f5b
+++ b/sql/hive/src/test/resources/golden/add_part_exist-24-d824f22606f48dfca48ce241a7505f5b
@@ -1,3 +1,3 @@
 ds=2010-01-01
 ds=2010-01-02
-ds=2010-01-03
\ No newline at end of file
+ds=2010-01-03
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-3-d824f22606f48dfca48ce241a7505f5b b/sql/hive/src/test/resources/golden/add_part_exist-3-d824f22606f48dfca48ce241a7505f5b
index f1ea8b0260d2c..0fd332ea35b85 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-3-d824f22606f48dfca48ce241a7505f5b
+++ b/sql/hive/src/test/resources/golden/add_part_exist-3-d824f22606f48dfca48ce241a7505f5b
@@ -1 +1 @@
-ds=2010-01-01
\ No newline at end of file
+ds=2010-01-01
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-5-d824f22606f48dfca48ce241a7505f5b b/sql/hive/src/test/resources/golden/add_part_exist-5-d824f22606f48dfca48ce241a7505f5b
index f1ea8b0260d2c..0fd332ea35b85 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-5-d824f22606f48dfca48ce241a7505f5b
+++ b/sql/hive/src/test/resources/golden/add_part_exist-5-d824f22606f48dfca48ce241a7505f5b
@@ -1 +1 @@
-ds=2010-01-01
\ No newline at end of file
+ds=2010-01-01
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-7-d824f22606f48dfca48ce241a7505f5b b/sql/hive/src/test/resources/golden/add_part_exist-7-d824f22606f48dfca48ce241a7505f5b
index 121aebdfab9af..2c7bbaac17318 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-7-d824f22606f48dfca48ce241a7505f5b
+++ b/sql/hive/src/test/resources/golden/add_part_exist-7-d824f22606f48dfca48ce241a7505f5b
@@ -1,2 +1,2 @@
 ds=2010-01-01
-ds=2010-01-02
\ No newline at end of file
+ds=2010-01-02
diff --git a/sql/hive/src/test/resources/golden/add_part_exist-9-d824f22606f48dfca48ce241a7505f5b b/sql/hive/src/test/resources/golden/add_part_exist-9-d824f22606f48dfca48ce241a7505f5b
index 64467c12563c8..7d9d4194c5cd1 100644
--- a/sql/hive/src/test/resources/golden/add_part_exist-9-d824f22606f48dfca48ce241a7505f5b
+++ b/sql/hive/src/test/resources/golden/add_part_exist-9-d824f22606f48dfca48ce241a7505f5b
@@ -1,3 +1,3 @@
 ds=2010-01-01
 ds=2010-01-02
-ds=2010-01-03
\ No newline at end of file
+ds=2010-01-03
diff --git a/sql/hive/src/test/resources/golden/add_part_multiple-1-4d9d4efbabc9fffef8841cc049f479c1 b/sql/hive/src/test/resources/golden/add_part_multiple-1-4d9d4efbabc9fffef8841cc049f479c1
index da21345385b2a..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/add_part_multiple-1-4d9d4efbabc9fffef8841cc049f479c1
+++ b/sql/hive/src/test/resources/golden/add_part_multiple-1-4d9d4efbabc9fffef8841cc049f479c1
@@ -1,29 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ALTERTABLE_ADDPARTS add_part_test TOK_IFNOTEXISTS (TOK_PARTSPEC (TOK_PARTVAL ds '2010-01-01')) (TOK_PARTITIONLOCATION 'A') (TOK_PARTSPEC (TOK_PARTVAL ds '2010-02-01')) (TOK_PARTITIONLOCATION 'B') (TOK_PARTSPEC (TOK_PARTVAL ds '2010-03-01')) (TOK_PARTSPEC (TOK_PARTVAL ds '2010-04-01')) (TOK_PARTITIONLOCATION 'C'))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-  Stage-2 is a root stage
-  Stage-3 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Add Partition Operator:
-          Location: A
-          Spec: {ds=2010-01-01}
-
-  Stage: Stage-1
-      Add Partition Operator:
-          Location: B
-          Spec: {ds=2010-02-01}
-
-  Stage: Stage-2
-      Add Partition Operator:
-          Spec: {ds=2010-03-01}
-
-  Stage: Stage-3
-      Add Partition Operator:
-          Location: C
-          Spec: {ds=2010-04-01}
-
diff --git a/sql/hive/src/test/resources/golden/add_part_multiple-4-7950c676506564b085b41426ed41747c b/sql/hive/src/test/resources/golden/add_part_multiple-4-7950c676506564b085b41426ed41747c
index fc4021c39737f..0587978b3efe9 100644
--- a/sql/hive/src/test/resources/golden/add_part_multiple-4-7950c676506564b085b41426ed41747c
+++ b/sql/hive/src/test/resources/golden/add_part_multiple-4-7950c676506564b085b41426ed41747c
@@ -1,4 +1,4 @@
 100	100	2010-01-01
 200	200	2010-02-01
 400	300	2010-03-01
-500	400	2010-04-01
\ No newline at end of file
+500	400	2010-04-01
diff --git a/sql/hive/src/test/resources/golden/add_partition_no_whitelist-0-3806584ff765bca682594008b90fc304 b/sql/hive/src/test/resources/golden/add_partition_no_whitelist-0-3806584ff765bca682594008b90fc304
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/add_partition_no_whitelist-0-3806584ff765bca682594008b90fc304
+++ b/sql/hive/src/test/resources/golden/add_partition_no_whitelist-0-3806584ff765bca682594008b90fc304
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/add_partition_with_whitelist-0-3c23ae800b2f6fb93620890da2dba196 b/sql/hive/src/test/resources/golden/add_partition_with_whitelist-0-3c23ae800b2f6fb93620890da2dba196
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/add_partition_with_whitelist-0-3c23ae800b2f6fb93620890da2dba196
+++ b/sql/hive/src/test/resources/golden/add_partition_with_whitelist-0-3c23ae800b2f6fb93620890da2dba196
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/alias.attr-0-42104e7e35985f4504c6d9a79b1bb4b8 b/sql/hive/src/test/resources/golden/alias.attr-0-42104e7e35985f4504c6d9a79b1bb4b8
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/alias.attr-0-42104e7e35985f4504c6d9a79b1bb4b8
+++ b/sql/hive/src/test/resources/golden/alias.attr-0-42104e7e35985f4504c6d9a79b1bb4b8
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/alter2-1-aac9c2c7033fd7264c9a107a88ff591 b/sql/hive/src/test/resources/golden/alter2-1-aac9c2c7033fd7264c9a107a88ff591
index 62a8ea1d115fb..4e4f6dc8ef5a7 100644
--- a/sql/hive/src/test/resources/golden/alter2-1-aac9c2c7033fd7264c9a107a88ff591
+++ b/sql/hive/src/test/resources/golden/alter2-1-aac9c2c7033fd7264c9a107a88ff591
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1389728471, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1389728471}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1413871628, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1413871628}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-11-aac9c2c7033fd7264c9a107a88ff591 b/sql/hive/src/test/resources/golden/alter2-11-aac9c2c7033fd7264c9a107a88ff591
index 72621add45bb3..3633169d9d5f2 100644
--- a/sql/hive/src/test/resources/golden/alter2-11-aac9c2c7033fd7264c9a107a88ff591
+++ b/sql/hive/src/test/resources/golden/alter2-11-aac9c2c7033fd7264c9a107a88ff591
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1389728473, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1389728473}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1413871628, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1413871628}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-14-aac9c2c7033fd7264c9a107a88ff591 b/sql/hive/src/test/resources/golden/alter2-14-aac9c2c7033fd7264c9a107a88ff591
index 72621add45bb3..3633169d9d5f2 100644
--- a/sql/hive/src/test/resources/golden/alter2-14-aac9c2c7033fd7264c9a107a88ff591
+++ b/sql/hive/src/test/resources/golden/alter2-14-aac9c2c7033fd7264c9a107a88ff591
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1389728473, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1389728473}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1413871628, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1413871628}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-15-75a213649242c2410ea6846f08c91d75 b/sql/hive/src/test/resources/golden/alter2-15-75a213649242c2410ea6846f08c91d75
index 15e6bc8823150..30f78a2dc1fb1 100644
--- a/sql/hive/src/test/resources/golden/alter2-15-75a213649242c2410ea6846f08c91d75
+++ b/sql/hive/src/test/resources/golden/alter2-15-75a213649242c2410ea6846f08c91d75
@@ -1 +1 @@
-insertdate=2008-01-01
\ No newline at end of file
+insertdate=2008-01-01
diff --git a/sql/hive/src/test/resources/golden/alter2-17-aac9c2c7033fd7264c9a107a88ff591 b/sql/hive/src/test/resources/golden/alter2-17-aac9c2c7033fd7264c9a107a88ff591
index 72621add45bb3..3633169d9d5f2 100644
--- a/sql/hive/src/test/resources/golden/alter2-17-aac9c2c7033fd7264c9a107a88ff591
+++ b/sql/hive/src/test/resources/golden/alter2-17-aac9c2c7033fd7264c9a107a88ff591
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1389728473, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1389728473}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1413871628, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1413871628}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-18-75a213649242c2410ea6846f08c91d75 b/sql/hive/src/test/resources/golden/alter2-18-75a213649242c2410ea6846f08c91d75
index d31318a383fc1..81b4ae118d12d 100644
--- a/sql/hive/src/test/resources/golden/alter2-18-75a213649242c2410ea6846f08c91d75
+++ b/sql/hive/src/test/resources/golden/alter2-18-75a213649242c2410ea6846f08c91d75
@@ -1,2 +1,2 @@
 insertdate=2008-01-01
-insertdate=2008-01-02
\ No newline at end of file
+insertdate=2008-01-02
diff --git a/sql/hive/src/test/resources/golden/alter2-20-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/alter2-20-9c36cac1372650b703400c60dd29042c
index ecafeaea5f61a..85c1918f46567 100644
--- a/sql/hive/src/test/resources/golden/alter2-20-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/alter2-20-9c36cac1372650b703400c60dd29042c
@@ -1,2 +1,2 @@
 src
-srcpart
\ No newline at end of file
+srcpart
diff --git a/sql/hive/src/test/resources/golden/alter2-25-4ef75e12575453225738ea167c4617e5 b/sql/hive/src/test/resources/golden/alter2-25-4ef75e12575453225738ea167c4617e5
index 83184977e9da8..ea564fc70087a 100644
--- a/sql/hive/src/test/resources/golden/alter2-25-4ef75e12575453225738ea167c4617e5
+++ b/sql/hive/src/test/resources/golden/alter2-25-4ef75e12575453225738ea167c4617e5
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1389728475, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1389728475}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1413871629, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1413871629}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-28-4ef75e12575453225738ea167c4617e5 b/sql/hive/src/test/resources/golden/alter2-28-4ef75e12575453225738ea167c4617e5
index 83184977e9da8..ea564fc70087a 100644
--- a/sql/hive/src/test/resources/golden/alter2-28-4ef75e12575453225738ea167c4617e5
+++ b/sql/hive/src/test/resources/golden/alter2-28-4ef75e12575453225738ea167c4617e5
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1389728475, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1389728475}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1413871629, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1413871629}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-29-63f13c364546ddce5d2176c6604a948f b/sql/hive/src/test/resources/golden/alter2-29-63f13c364546ddce5d2176c6604a948f
index 15e6bc8823150..30f78a2dc1fb1 100644
--- a/sql/hive/src/test/resources/golden/alter2-29-63f13c364546ddce5d2176c6604a948f
+++ b/sql/hive/src/test/resources/golden/alter2-29-63f13c364546ddce5d2176c6604a948f
@@ -1 +1 @@
-insertdate=2008-01-01
\ No newline at end of file
+insertdate=2008-01-01
diff --git a/sql/hive/src/test/resources/golden/alter2-31-4ef75e12575453225738ea167c4617e5 b/sql/hive/src/test/resources/golden/alter2-31-4ef75e12575453225738ea167c4617e5
index 83184977e9da8..ea564fc70087a 100644
--- a/sql/hive/src/test/resources/golden/alter2-31-4ef75e12575453225738ea167c4617e5
+++ b/sql/hive/src/test/resources/golden/alter2-31-4ef75e12575453225738ea167c4617e5
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1389728475, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1389728475}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1413871629, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1413871629}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-32-63f13c364546ddce5d2176c6604a948f b/sql/hive/src/test/resources/golden/alter2-32-63f13c364546ddce5d2176c6604a948f
index d31318a383fc1..81b4ae118d12d 100644
--- a/sql/hive/src/test/resources/golden/alter2-32-63f13c364546ddce5d2176c6604a948f
+++ b/sql/hive/src/test/resources/golden/alter2-32-63f13c364546ddce5d2176c6604a948f
@@ -1,2 +1,2 @@
 insertdate=2008-01-01
-insertdate=2008-01-02
\ No newline at end of file
+insertdate=2008-01-02
diff --git a/sql/hive/src/test/resources/golden/alter2-35-4ef75e12575453225738ea167c4617e5 b/sql/hive/src/test/resources/golden/alter2-35-4ef75e12575453225738ea167c4617e5
index 6e30936a881bd..808dc578804ee 100644
--- a/sql/hive/src/test/resources/golden/alter2-35-4ef75e12575453225738ea167c4617e5
+++ b/sql/hive/src/test/resources/golden/alter2-35-4ef75e12575453225738ea167c4617e5
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1389728477, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1389728477}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1413871629, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1413871629}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-38-4ef75e12575453225738ea167c4617e5 b/sql/hive/src/test/resources/golden/alter2-38-4ef75e12575453225738ea167c4617e5
index 6e30936a881bd..808dc578804ee 100644
--- a/sql/hive/src/test/resources/golden/alter2-38-4ef75e12575453225738ea167c4617e5
+++ b/sql/hive/src/test/resources/golden/alter2-38-4ef75e12575453225738ea167c4617e5
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1389728477, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1389728477}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1413871629, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1413871629}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-39-63f13c364546ddce5d2176c6604a948f b/sql/hive/src/test/resources/golden/alter2-39-63f13c364546ddce5d2176c6604a948f
index 15e6bc8823150..30f78a2dc1fb1 100644
--- a/sql/hive/src/test/resources/golden/alter2-39-63f13c364546ddce5d2176c6604a948f
+++ b/sql/hive/src/test/resources/golden/alter2-39-63f13c364546ddce5d2176c6604a948f
@@ -1 +1 @@
-insertdate=2008-01-01
\ No newline at end of file
+insertdate=2008-01-01
diff --git a/sql/hive/src/test/resources/golden/alter2-4-aac9c2c7033fd7264c9a107a88ff591 b/sql/hive/src/test/resources/golden/alter2-4-aac9c2c7033fd7264c9a107a88ff591
index 62a8ea1d115fb..4e4f6dc8ef5a7 100644
--- a/sql/hive/src/test/resources/golden/alter2-4-aac9c2c7033fd7264c9a107a88ff591
+++ b/sql/hive/src/test/resources/golden/alter2-4-aac9c2c7033fd7264c9a107a88ff591
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1389728471, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1389728471}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1413871628, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1413871628}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-41-4ef75e12575453225738ea167c4617e5 b/sql/hive/src/test/resources/golden/alter2-41-4ef75e12575453225738ea167c4617e5
index 6e30936a881bd..808dc578804ee 100644
--- a/sql/hive/src/test/resources/golden/alter2-41-4ef75e12575453225738ea167c4617e5
+++ b/sql/hive/src/test/resources/golden/alter2-41-4ef75e12575453225738ea167c4617e5
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1389728477, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1389728477}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:alter2_db, owner:marmbrus, createTime:1413871629, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2_db.db/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1413871629}, viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-42-63f13c364546ddce5d2176c6604a948f b/sql/hive/src/test/resources/golden/alter2-42-63f13c364546ddce5d2176c6604a948f
index d31318a383fc1..81b4ae118d12d 100644
--- a/sql/hive/src/test/resources/golden/alter2-42-63f13c364546ddce5d2176c6604a948f
+++ b/sql/hive/src/test/resources/golden/alter2-42-63f13c364546ddce5d2176c6604a948f
@@ -1,2 +1,2 @@
 insertdate=2008-01-01
-insertdate=2008-01-02
\ No newline at end of file
+insertdate=2008-01-02
diff --git a/sql/hive/src/test/resources/golden/alter2-5-75a213649242c2410ea6846f08c91d75 b/sql/hive/src/test/resources/golden/alter2-5-75a213649242c2410ea6846f08c91d75
index 15e6bc8823150..30f78a2dc1fb1 100644
--- a/sql/hive/src/test/resources/golden/alter2-5-75a213649242c2410ea6846f08c91d75
+++ b/sql/hive/src/test/resources/golden/alter2-5-75a213649242c2410ea6846f08c91d75
@@ -1 +1 @@
-insertdate=2008-01-01
\ No newline at end of file
+insertdate=2008-01-01
diff --git a/sql/hive/src/test/resources/golden/alter2-7-aac9c2c7033fd7264c9a107a88ff591 b/sql/hive/src/test/resources/golden/alter2-7-aac9c2c7033fd7264c9a107a88ff591
index 62a8ea1d115fb..4e4f6dc8ef5a7 100644
--- a/sql/hive/src/test/resources/golden/alter2-7-aac9c2c7033fd7264c9a107a88ff591
+++ b/sql/hive/src/test/resources/golden/alter2-7-aac9c2c7033fd7264c9a107a88ff591
@@ -1,10 +1,10 @@
-a                   	int                 	None                
-b                   	int                 	None                
-insertdate          	string              	None                
+a                   	int                 	                    
+b                   	int                 	                    
+insertdate          	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-insertdate          	string              	None                
+insertdate          	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1389728471, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1389728471}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter2, dbName:default, owner:marmbrus, createTime:1413871628, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null), FieldSchema(name:b, type:int, comment:null), FieldSchema(name:insertdate, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:insertdate, type:string, comment:null)], parameters:{transient_lastDdlTime=1413871628}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter2-8-75a213649242c2410ea6846f08c91d75 b/sql/hive/src/test/resources/golden/alter2-8-75a213649242c2410ea6846f08c91d75
index d31318a383fc1..81b4ae118d12d 100644
--- a/sql/hive/src/test/resources/golden/alter2-8-75a213649242c2410ea6846f08c91d75
+++ b/sql/hive/src/test/resources/golden/alter2-8-75a213649242c2410ea6846f08c91d75
@@ -1,2 +1,2 @@
 insertdate=2008-01-01
-insertdate=2008-01-02
\ No newline at end of file
+insertdate=2008-01-02
diff --git a/sql/hive/src/test/resources/golden/alter5-1-b8349afaf8e62dc6608a889c04ee3d4b b/sql/hive/src/test/resources/golden/alter3-1-47f70392b97b94cdb425b25bde204f58
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter5-1-b8349afaf8e62dc6608a889c04ee3d4b
rename to sql/hive/src/test/resources/golden/alter3-1-47f70392b97b94cdb425b25bde204f58
diff --git a/sql/hive/src/test/resources/golden/alter3-12-2fcb7fc251f682a584ad513fddfac506 b/sql/hive/src/test/resources/golden/alter3-12-2fcb7fc251f682a584ad513fddfac506
index ba1746da5ce69..424f827129d71 100644
--- a/sql/hive/src/test/resources/golden/alter3-12-2fcb7fc251f682a584ad513fddfac506
+++ b/sql/hive/src/test/resources/golden/alter3-12-2fcb7fc251f682a584ad513fddfac506
@@ -1,11 +1,11 @@
-col1                	string              	None                
-pcol1               	string              	None                
-pcol2               	string              	None                
+col1                	string              	                    
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-pcol1               	string              	None                
-pcol2               	string              	None                
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter3_like_renamed, dbName:default, owner:marmbrus, createTime:1389728495, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter3_like_renamed, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], parameters:{numPartitions=1, numFiles=1, last_modified_by=marmbrus, last_modified_time=1389728524, transient_lastDdlTime=1389728524, numRows=6, totalSize=171, rawDataSize=6}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter3_like_renamed, dbName:default, owner:marmbrus, createTime:1413871632, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter3_like_renamed, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413871646, transient_lastDdlTime=1413871646}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter3-16-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/alter3-16-9c36cac1372650b703400c60dd29042c
index ecafeaea5f61a..85c1918f46567 100644
--- a/sql/hive/src/test/resources/golden/alter3-16-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/alter3-16-9c36cac1372650b703400c60dd29042c
@@ -1,2 +1,2 @@
 src
-srcpart
\ No newline at end of file
+srcpart
diff --git a/sql/hive/src/test/resources/golden/alter5-15-b8349afaf8e62dc6608a889c04ee3d4b b/sql/hive/src/test/resources/golden/alter3-21-91e32b3028ecc352dad8884829148311
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter5-15-b8349afaf8e62dc6608a889c04ee3d4b
rename to sql/hive/src/test/resources/golden/alter3-21-91e32b3028ecc352dad8884829148311
diff --git a/sql/hive/src/test/resources/golden/alter3-25-568a59760e5d3241b63d65cce595face b/sql/hive/src/test/resources/golden/alter3-25-568a59760e5d3241b63d65cce595face
index 8e609740f34a7..d1f5bd707fcb4 100644
--- a/sql/hive/src/test/resources/golden/alter3-25-568a59760e5d3241b63d65cce595face
+++ b/sql/hive/src/test/resources/golden/alter3-25-568a59760e5d3241b63d65cce595face
@@ -3,4 +3,4 @@
 3	test_part:	test_part:
 4	test_part:	test_part:
 5	test_part:	test_part:
-6	test_part:	test_part:
\ No newline at end of file
+6	test_part:	test_part:
diff --git a/sql/hive/src/test/resources/golden/alter3-27-54ad133b447f67c6d1ed7d4c43803a87 b/sql/hive/src/test/resources/golden/alter3-27-54ad133b447f67c6d1ed7d4c43803a87
index 0e12e0e166336..ba284b7d4300a 100644
--- a/sql/hive/src/test/resources/golden/alter3-27-54ad133b447f67c6d1ed7d4c43803a87
+++ b/sql/hive/src/test/resources/golden/alter3-27-54ad133b447f67c6d1ed7d4c43803a87
@@ -1,11 +1,11 @@
-col1                	string              	None                
-pcol1               	string              	None                
-pcol2               	string              	None                
+col1                	string              	                    
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-pcol1               	string              	None                
-pcol2               	string              	None                
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter3_renamed, dbName:alter3_db, owner:marmbrus, createTime:1389728526, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter3_db.db/alter3_renamed, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], parameters:{numPartitions=1, numFiles=1, last_modified_by=marmbrus, last_modified_time=1389728544, transient_lastDdlTime=1389728544, numRows=6, totalSize=171, rawDataSize=6}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter3_renamed, dbName:alter3_db, owner:marmbrus, createTime:1413871647, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter3_db.db/alter3_renamed, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413871654, transient_lastDdlTime=1413871654}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter3-28-5332228ea451105c897d0c8c3c8f2773 b/sql/hive/src/test/resources/golden/alter3-28-5332228ea451105c897d0c8c3c8f2773
index 4acb920388ceb..ffd81df5a7bf4 100644
--- a/sql/hive/src/test/resources/golden/alter3-28-5332228ea451105c897d0c8c3c8f2773
+++ b/sql/hive/src/test/resources/golden/alter3-28-5332228ea451105c897d0c8c3c8f2773
@@ -1,11 +1,11 @@
-col1                	string              	None                
-pcol1               	string              	None                
-pcol2               	string              	None                
+col1                	string              	                    
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-pcol1               	string              	None                
-pcol2               	string              	None                
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[test_part:, test_part:], dbName:alter3_db, tableName:alter3_renamed, createTime:1389728543, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter3_db.db/alter3_renamed/pcol1=test_part%3A/pcol2=test_part%3A, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1389728544, numRows=6, totalSize=171, rawDataSize=6})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[test_part:, test_part:], dbName:alter3_db, tableName:alter3_renamed, createTime:1413871654, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter3_db.db/alter3_renamed/pcol1=test_part%3A/pcol2=test_part%3A, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413871654, COLUMN_STATS_ACCURATE=true, totalSize=171, numRows=6, rawDataSize=6})	
diff --git a/sql/hive/src/test/resources/golden/alter3-29-b8fba19b9009131caffbb5fe7468b67c b/sql/hive/src/test/resources/golden/alter3-29-b8fba19b9009131caffbb5fe7468b67c
index 8e609740f34a7..d1f5bd707fcb4 100644
--- a/sql/hive/src/test/resources/golden/alter3-29-b8fba19b9009131caffbb5fe7468b67c
+++ b/sql/hive/src/test/resources/golden/alter3-29-b8fba19b9009131caffbb5fe7468b67c
@@ -3,4 +3,4 @@
 3	test_part:	test_part:
 4	test_part:	test_part:
 5	test_part:	test_part:
-6	test_part:	test_part:
\ No newline at end of file
+6	test_part:	test_part:
diff --git a/sql/hive/src/test/resources/golden/alter3-32-327744965ee8ed630f56fa3e4a3c5c65 b/sql/hive/src/test/resources/golden/alter3-32-327744965ee8ed630f56fa3e4a3c5c65
index 12b038204bef4..5f38c1fce1523 100644
--- a/sql/hive/src/test/resources/golden/alter3-32-327744965ee8ed630f56fa3e4a3c5c65
+++ b/sql/hive/src/test/resources/golden/alter3-32-327744965ee8ed630f56fa3e4a3c5c65
@@ -1,11 +1,11 @@
-col1                	string              	None                
-pcol1               	string              	None                
-pcol2               	string              	None                
+col1                	string              	                    
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-pcol1               	string              	None                
-pcol2               	string              	None                
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter3_like_renamed, dbName:alter3_db, owner:marmbrus, createTime:1389728527, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter3_db.db/alter3_like_renamed, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], parameters:{numPartitions=1, numFiles=1, last_modified_by=marmbrus, last_modified_time=1389728560, transient_lastDdlTime=1389728560, numRows=6, totalSize=171, rawDataSize=6}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter3_like_renamed, dbName:alter3_db, owner:marmbrus, createTime:1413871647, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter3_db.db/alter3_like_renamed, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413871661, transient_lastDdlTime=1413871661}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter3-5-bf2a8fd1884bb584059c848332e30c97 b/sql/hive/src/test/resources/golden/alter3-5-bf2a8fd1884bb584059c848332e30c97
index 8e609740f34a7..d1f5bd707fcb4 100644
--- a/sql/hive/src/test/resources/golden/alter3-5-bf2a8fd1884bb584059c848332e30c97
+++ b/sql/hive/src/test/resources/golden/alter3-5-bf2a8fd1884bb584059c848332e30c97
@@ -3,4 +3,4 @@
 3	test_part:	test_part:
 4	test_part:	test_part:
 5	test_part:	test_part:
-6	test_part:	test_part:
\ No newline at end of file
+6	test_part:	test_part:
diff --git a/sql/hive/src/test/resources/golden/alter3-7-30be5698ca15c1fd836686e7ad48ad8 b/sql/hive/src/test/resources/golden/alter3-7-30be5698ca15c1fd836686e7ad48ad8
index 0c47fb0ac64aa..f4a4878f5ac17 100644
--- a/sql/hive/src/test/resources/golden/alter3-7-30be5698ca15c1fd836686e7ad48ad8
+++ b/sql/hive/src/test/resources/golden/alter3-7-30be5698ca15c1fd836686e7ad48ad8
@@ -1,11 +1,11 @@
-col1                	string              	None                
-pcol1               	string              	None                
-pcol2               	string              	None                
+col1                	string              	                    
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-pcol1               	string              	None                
-pcol2               	string              	None                
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter3_renamed, dbName:default, owner:marmbrus, createTime:1389728495, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter3_renamed, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], parameters:{numPartitions=1, numFiles=1, last_modified_by=marmbrus, last_modified_time=1389728509, transient_lastDdlTime=1389728509, numRows=6, totalSize=171, rawDataSize=6}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter3_renamed, dbName:default, owner:marmbrus, createTime:1413871632, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter3_renamed, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413871639, transient_lastDdlTime=1413871639}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter3-8-8f0a466bd1d021e40690865b7ae52a43 b/sql/hive/src/test/resources/golden/alter3-8-8f0a466bd1d021e40690865b7ae52a43
index 1165de3a92fd0..658503a751829 100644
--- a/sql/hive/src/test/resources/golden/alter3-8-8f0a466bd1d021e40690865b7ae52a43
+++ b/sql/hive/src/test/resources/golden/alter3-8-8f0a466bd1d021e40690865b7ae52a43
@@ -1,11 +1,11 @@
-col1                	string              	None                
-pcol1               	string              	None                
-pcol2               	string              	None                
+col1                	string              	                    
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-pcol1               	string              	None                
-pcol2               	string              	None                
+pcol1               	string              	                    
+pcol2               	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[test_part:, test_part:], dbName:default, tableName:alter3_renamed, createTime:1389728508, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/alter3_renamed/pcol1=test_part%3A/pcol2=test_part%3A, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1389728508, numRows=6, totalSize=171, rawDataSize=6})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[test_part:, test_part:], dbName:default, tableName:alter3_renamed, createTime:1413871639, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:pcol1, type:string, comment:null), FieldSchema(name:pcol2, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter3_renamed/pcol1=test_part%3A/pcol2=test_part%3A, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413871639, COLUMN_STATS_ACCURATE=true, totalSize=171, numRows=6, rawDataSize=6})	
diff --git a/sql/hive/src/test/resources/golden/alter3-9-b8a2a12aeddb84f56c7a1f6346bae3d2 b/sql/hive/src/test/resources/golden/alter3-9-b8a2a12aeddb84f56c7a1f6346bae3d2
index 8e609740f34a7..d1f5bd707fcb4 100644
--- a/sql/hive/src/test/resources/golden/alter3-9-b8a2a12aeddb84f56c7a1f6346bae3d2
+++ b/sql/hive/src/test/resources/golden/alter3-9-b8a2a12aeddb84f56c7a1f6346bae3d2
@@ -3,4 +3,4 @@
 3	test_part:	test_part:
 4	test_part:	test_part:
 5	test_part:	test_part:
-6	test_part:	test_part:
\ No newline at end of file
+6	test_part:	test_part:
diff --git a/sql/hive/src/test/resources/golden/alter4-1-7ead71f9870ae36dd1cb50b51b41fad7 b/sql/hive/src/test/resources/golden/alter4-1-7ead71f9870ae36dd1cb50b51b41fad7
index 8d2aebeb4d29d..ab967d6b42956 100644
--- a/sql/hive/src/test/resources/golden/alter4-1-7ead71f9870ae36dd1cb50b51b41fad7
+++ b/sql/hive/src/test/resources/golden/alter4-1-7ead71f9870ae36dd1cb50b51b41fad7
@@ -1,4 +1,4 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:set_bucketing_test, dbName:default, owner:marmbrus, createTime:1389735344, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5432448755546164619/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:10, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[key], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389735344}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:set_bucketing_test, dbName:default, owner:marmbrus, createTime:1413871664, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:10, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[key], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413871664}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter4-10-7ead71f9870ae36dd1cb50b51b41fad7 b/sql/hive/src/test/resources/golden/alter4-10-7ead71f9870ae36dd1cb50b51b41fad7
index cf67a009ff291..9fafeb10086b2 100644
--- a/sql/hive/src/test/resources/golden/alter4-10-7ead71f9870ae36dd1cb50b51b41fad7
+++ b/sql/hive/src/test/resources/golden/alter4-10-7ead71f9870ae36dd1cb50b51b41fad7
@@ -1,4 +1,4 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:set_bucketing_test, dbName:alter4_db, owner:marmbrus, createTime:1389735347, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5432448755546164619/alter4_db.db/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:10, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[key], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389735347}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:set_bucketing_test, dbName:alter4_db, owner:marmbrus, createTime:1413871664, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter4_db.db/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:10, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[key], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413871664}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter4-12-7ead71f9870ae36dd1cb50b51b41fad7 b/sql/hive/src/test/resources/golden/alter4-12-7ead71f9870ae36dd1cb50b51b41fad7
index a6375189a7a23..576e37d08cd09 100644
--- a/sql/hive/src/test/resources/golden/alter4-12-7ead71f9870ae36dd1cb50b51b41fad7
+++ b/sql/hive/src/test/resources/golden/alter4-12-7ead71f9870ae36dd1cb50b51b41fad7
@@ -1,4 +1,4 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:set_bucketing_test, dbName:alter4_db, owner:marmbrus, createTime:1389735347, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5432448755546164619/alter4_db.db/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1389735348, transient_lastDdlTime=1389735348}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:set_bucketing_test, dbName:alter4_db, owner:marmbrus, createTime:1413871664, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter4_db.db/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871664, transient_lastDdlTime=1413871664, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter4-16-549981e00a3d95f03dd5a9ef6044aa20 b/sql/hive/src/test/resources/golden/alter4-16-549981e00a3d95f03dd5a9ef6044aa20
index 331d858ce9b12..4ad96d51599fb 100644
--- a/sql/hive/src/test/resources/golden/alter4-16-549981e00a3d95f03dd5a9ef6044aa20
+++ b/sql/hive/src/test/resources/golden/alter4-16-549981e00a3d95f03dd5a9ef6044aa20
@@ -1 +1 @@
-default
\ No newline at end of file
+default
diff --git a/sql/hive/src/test/resources/golden/alter4-3-7ead71f9870ae36dd1cb50b51b41fad7 b/sql/hive/src/test/resources/golden/alter4-3-7ead71f9870ae36dd1cb50b51b41fad7
index 6b79a37a85f0c..0ed971efdf450 100644
--- a/sql/hive/src/test/resources/golden/alter4-3-7ead71f9870ae36dd1cb50b51b41fad7
+++ b/sql/hive/src/test/resources/golden/alter4-3-7ead71f9870ae36dd1cb50b51b41fad7
@@ -1,4 +1,4 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:set_bucketing_test, dbName:default, owner:marmbrus, createTime:1389735344, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5432448755546164619/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1389735345, transient_lastDdlTime=1389735345}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:set_bucketing_test, dbName:default, owner:marmbrus, createTime:1413871664, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871664, transient_lastDdlTime=1413871664, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter4-5-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/alter4-5-9c36cac1372650b703400c60dd29042c
index ecafeaea5f61a..85c1918f46567 100644
--- a/sql/hive/src/test/resources/golden/alter4-5-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/alter4-5-9c36cac1372650b703400c60dd29042c
@@ -1,2 +1,2 @@
 src
-srcpart
\ No newline at end of file
+srcpart
diff --git a/sql/hive/src/test/resources/golden/alter_merge_2-3-cfef140167765d259320ed1e8aba718d b/sql/hive/src/test/resources/golden/alter5-1-cbad54fbd4a08fc8717708f93358ec3e
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_merge_2-3-cfef140167765d259320ed1e8aba718d
rename to sql/hive/src/test/resources/golden/alter5-1-cbad54fbd4a08fc8717708f93358ec3e
diff --git a/sql/hive/src/test/resources/golden/alter_merge_2-4-47aa9e3236da627ab505a6bd01e563be b/sql/hive/src/test/resources/golden/alter5-15-cbad54fbd4a08fc8717708f93358ec3e
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_merge_2-4-47aa9e3236da627ab505a6bd01e563be
rename to sql/hive/src/test/resources/golden/alter5-15-cbad54fbd4a08fc8717708f93358ec3e
diff --git a/sql/hive/src/test/resources/golden/alter5-18-2a9c8219c1468a1cf0534c665d1fcebf b/sql/hive/src/test/resources/golden/alter5-18-2a9c8219c1468a1cf0534c665d1fcebf
index e180d4c53ae73..b6aa69a915a6e 100644
--- a/sql/hive/src/test/resources/golden/alter5-18-2a9c8219c1468a1cf0534c665d1fcebf
+++ b/sql/hive/src/test/resources/golden/alter5-18-2a9c8219c1468a1cf0534c665d1fcebf
@@ -1,9 +1,9 @@
-col1                	string              	None                
-dt                  	string              	None                
+col1                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[a], dbName:alter5_db, tableName:alter5, createTime:1390897177, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/alter5_db.db/alter5/parta, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1390897177})	
+Detailed Partition Information	Partition(values:[a], dbName:alter5_db, tableName:alter5, createTime:1413871673, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter5_db.db/alter5/parta, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1413871673})	
diff --git a/sql/hive/src/test/resources/golden/alter5-21-2a9c8219c1468a1cf0534c665d1fcebf b/sql/hive/src/test/resources/golden/alter5-21-2a9c8219c1468a1cf0534c665d1fcebf
index 3d4ba2636ab66..1c321d5287472 100644
--- a/sql/hive/src/test/resources/golden/alter5-21-2a9c8219c1468a1cf0534c665d1fcebf
+++ b/sql/hive/src/test/resources/golden/alter5-21-2a9c8219c1468a1cf0534c665d1fcebf
@@ -1,9 +1,9 @@
-col1                	string              	None                
-dt                  	string              	None                
+col1                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[a], dbName:alter5_db, tableName:alter5, createTime:1390897177, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/alter5_db.db/alter5/parta, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1390897186, numRows=6, totalSize=12, rawDataSize=6})	
+Detailed Partition Information	Partition(values:[a], dbName:alter5_db, tableName:alter5, createTime:1413871673, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter5_db.db/alter5/parta, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413871680, COLUMN_STATS_ACCURATE=true, totalSize=12, numRows=6, rawDataSize=6})	
diff --git a/sql/hive/src/test/resources/golden/alter5-4-2a9c8219c1468a1cf0534c665d1fcebf b/sql/hive/src/test/resources/golden/alter5-4-2a9c8219c1468a1cf0534c665d1fcebf
index 6669b628fc224..110ee88b930d1 100644
--- a/sql/hive/src/test/resources/golden/alter5-4-2a9c8219c1468a1cf0534c665d1fcebf
+++ b/sql/hive/src/test/resources/golden/alter5-4-2a9c8219c1468a1cf0534c665d1fcebf
@@ -1,9 +1,9 @@
-col1                	string              	None                
-dt                  	string              	None                
+col1                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[a], dbName:default, tableName:alter5, createTime:1390897166, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/alter5/parta, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1390897166})	
+Detailed Partition Information	Partition(values:[a], dbName:default, tableName:alter5, createTime:1413871665, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter5/parta, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1413871665})	
diff --git a/sql/hive/src/test/resources/golden/alter5-7-2a9c8219c1468a1cf0534c665d1fcebf b/sql/hive/src/test/resources/golden/alter5-7-2a9c8219c1468a1cf0534c665d1fcebf
index e866ae0fa7654..e7245656fec6c 100644
--- a/sql/hive/src/test/resources/golden/alter5-7-2a9c8219c1468a1cf0534c665d1fcebf
+++ b/sql/hive/src/test/resources/golden/alter5-7-2a9c8219c1468a1cf0534c665d1fcebf
@@ -1,9 +1,9 @@
-col1                	string              	None                
-dt                  	string              	None                
+col1                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[a], dbName:default, tableName:alter5, createTime:1390897166, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/alter5/parta, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1390897176, numRows=6, totalSize=12, rawDataSize=6})	
+Detailed Partition Information	Partition(values:[a], dbName:default, tableName:alter5, createTime:1413871665, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col1, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter5/parta, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413871672, COLUMN_STATS_ACCURATE=true, totalSize=12, numRows=6, rawDataSize=6})	
diff --git a/sql/hive/src/test/resources/golden/alter_index-2-f36cb2eed39691ca949b25182e2dd31 b/sql/hive/src/test/resources/golden/alter_index-2-f36cb2eed39691ca949b25182e2dd31
index 2860d5c6b4cfb..1bfdd079010aa 100644
--- a/sql/hive/src/test/resources/golden/alter_index-2-f36cb2eed39691ca949b25182e2dd31
+++ b/sql/hive/src/test/resources/golden/alter_index-2-f36cb2eed39691ca949b25182e2dd31
@@ -1,5 +1,5 @@
-key                 	int                 	None                
+key                 	int                 	                    
 _bucketname         	string              	                    
 _offsets            	array<bigint>       	                    
 	 	 
-Detailed Table Information	Table(tableName:default__src_src_index_8__, dbName:default, owner:null, createTime:1389733869, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2023038695216118221/default__src_src_index_8__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389733869}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:default__src_src_index_8__, dbName:default, owner:marmbrus, createTime:1413871682, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/default__src_src_index_8__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413871682}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_index-4-f36cb2eed39691ca949b25182e2dd31 b/sql/hive/src/test/resources/golden/alter_index-4-f36cb2eed39691ca949b25182e2dd31
index 2860d5c6b4cfb..1bfdd079010aa 100644
--- a/sql/hive/src/test/resources/golden/alter_index-4-f36cb2eed39691ca949b25182e2dd31
+++ b/sql/hive/src/test/resources/golden/alter_index-4-f36cb2eed39691ca949b25182e2dd31
@@ -1,5 +1,5 @@
-key                 	int                 	None                
+key                 	int                 	                    
 _bucketname         	string              	                    
 _offsets            	array<bigint>       	                    
 	 	 
-Detailed Table Information	Table(tableName:default__src_src_index_8__, dbName:default, owner:null, createTime:1389733869, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2023038695216118221/default__src_src_index_8__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389733869}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:default__src_src_index_8__, dbName:default, owner:marmbrus, createTime:1413871682, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/default__src_src_index_8__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413871682}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_index-6-489b4ceb2f4301a7132628303f99240d b/sql/hive/src/test/resources/golden/alter_index-6-489b4ceb2f4301a7132628303f99240d
index ecafeaea5f61a..85c1918f46567 100644
--- a/sql/hive/src/test/resources/golden/alter_index-6-489b4ceb2f4301a7132628303f99240d
+++ b/sql/hive/src/test/resources/golden/alter_index-6-489b4ceb2f4301a7132628303f99240d
@@ -1,2 +1,2 @@
 src
-srcpart
\ No newline at end of file
+srcpart
diff --git a/sql/hive/src/test/resources/golden/alter_merge_2-10-69fe9bb96263a49b9cca70cea7eb57e1 b/sql/hive/src/test/resources/golden/alter_merge_2-10-69fe9bb96263a49b9cca70cea7eb57e1
index c80ef36c96ad4..136628776692b 100644
--- a/sql/hive/src/test/resources/golden/alter_merge_2-10-69fe9bb96263a49b9cca70cea7eb57e1
+++ b/sql/hive/src/test/resources/golden/alter_merge_2-10-69fe9bb96263a49b9cca70cea7eb57e1
@@ -1 +1 @@
-754	-7678496319
\ No newline at end of file
+754	-7678496319
diff --git a/sql/hive/src/test/resources/golden/alter_merge_2-2-bf243aa10b608872b9e8286f89c5ff30 b/sql/hive/src/test/resources/golden/alter_merge_2-2-bf243aa10b608872b9e8286f89c5ff30
index 618c1d01b726d..d32a641c0050e 100644
--- a/sql/hive/src/test/resources/golden/alter_merge_2-2-bf243aa10b608872b9e8286f89c5ff30
+++ b/sql/hive/src/test/resources/golden/alter_merge_2-2-bf243aa10b608872b9e8286f89c5ff30
@@ -1,12 +1,12 @@
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-ds                  	string              	None                
-ts                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+ts                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-ts                  	string              	None                
+ds                  	string              	                    
+ts                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2012-01-03, 2012-01-03+14:46:31], dbName:default, tableName:src_rc_merge_test_part, createTime:1389728902, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse545253106736176469/src_rc_merge_test_part/ds=2012-01-03/ts=2012-01-03+14%3A46%3A31, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1389728902})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2012-01-03, 2012-01-03+14:46:31], dbName:default, tableName:src_rc_merge_test_part, createTime:1413930366, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:ts, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse6310217467672453297/src_rc_merge_test_part/ds=2012-01-03/ts=2012-01-03+14%3A46%3A31, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1413930366})	
diff --git a/sql/hive/src/test/resources/golden/alter_merge_2-5-43bd090cda258e25037e7c32d500a85f b/sql/hive/src/test/resources/golden/alter_merge_2-3-bc9bb363f9a2026cfc70a31bb4551352
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_merge_2-5-43bd090cda258e25037e7c32d500a85f
rename to sql/hive/src/test/resources/golden/alter_merge_2-3-bc9bb363f9a2026cfc70a31bb4551352
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-0-76649a6f1ff2f3ffa8310c34f0438e3a b/sql/hive/src/test/resources/golden/alter_merge_2-4-d3bf7703ba00cf7c40f2a2dbb8ca7224
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-0-76649a6f1ff2f3ffa8310c34f0438e3a
rename to sql/hive/src/test/resources/golden/alter_merge_2-4-d3bf7703ba00cf7c40f2a2dbb8ca7224
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-1-b9bb847419afb2b70d0cb887894f0b90 b/sql/hive/src/test/resources/golden/alter_merge_2-5-6319bf26f3739260b1a77e2ea89ef147
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-1-b9bb847419afb2b70d0cb887894f0b90
rename to sql/hive/src/test/resources/golden/alter_merge_2-5-6319bf26f3739260b1a77e2ea89ef147
diff --git a/sql/hive/src/test/resources/golden/alter_merge_2-6-f2eeb518a957cece4250cc7558839e02 b/sql/hive/src/test/resources/golden/alter_merge_2-6-f2eeb518a957cece4250cc7558839e02
index 3f10ffe7a4c47..60d3b2f4a4cd5 100644
--- a/sql/hive/src/test/resources/golden/alter_merge_2-6-f2eeb518a957cece4250cc7558839e02
+++ b/sql/hive/src/test/resources/golden/alter_merge_2-6-f2eeb518a957cece4250cc7558839e02
@@ -1 +1 @@
-15
\ No newline at end of file
+15
diff --git a/sql/hive/src/test/resources/golden/alter_merge_2-7-69fe9bb96263a49b9cca70cea7eb57e1 b/sql/hive/src/test/resources/golden/alter_merge_2-7-69fe9bb96263a49b9cca70cea7eb57e1
index c80ef36c96ad4..136628776692b 100644
--- a/sql/hive/src/test/resources/golden/alter_merge_2-7-69fe9bb96263a49b9cca70cea7eb57e1
+++ b/sql/hive/src/test/resources/golden/alter_merge_2-7-69fe9bb96263a49b9cca70cea7eb57e1
@@ -1 +1 @@
-754	-7678496319
\ No newline at end of file
+754	-7678496319
diff --git a/sql/hive/src/test/resources/golden/alter_merge_2-9-f2eeb518a957cece4250cc7558839e02 b/sql/hive/src/test/resources/golden/alter_merge_2-9-f2eeb518a957cece4250cc7558839e02
index 3f10ffe7a4c47..60d3b2f4a4cd5 100644
--- a/sql/hive/src/test/resources/golden/alter_merge_2-9-f2eeb518a957cece4250cc7558839e02
+++ b/sql/hive/src/test/resources/golden/alter_merge_2-9-f2eeb518a957cece4250cc7558839e02
@@ -1 +1 @@
-15
\ No newline at end of file
+15
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-12-84807e0be3e91250d4b8f2dc7938a256 b/sql/hive/src/test/resources/golden/alter_partition_coltype-12-84807e0be3e91250d4b8f2dc7938a256
deleted file mode 100644
index 410b14d2ce6f9..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-12-84807e0be3e91250d4b8f2dc7938a256
+++ /dev/null
@@ -1 +0,0 @@
-25
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-14-fdad866a990591083719bd45e4113f58 b/sql/hive/src/test/resources/golden/alter_partition_coltype-14-fdad866a990591083719bd45e4113f58
deleted file mode 100644
index 410b14d2ce6f9..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-14-fdad866a990591083719bd45e4113f58
+++ /dev/null
@@ -1 +0,0 @@
-25
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-16-b0534980e325d1fee63c2636ad3f8a4e b/sql/hive/src/test/resources/golden/alter_partition_coltype-16-b0534980e325d1fee63c2636ad3f8a4e
deleted file mode 100644
index b28ab5ccf8a1b..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-16-b0534980e325d1fee63c2636ad3f8a4e
+++ /dev/null
@@ -1,75 +0,0 @@
-238	val_238	10	3.0
-NULL		10	3.0
-311	val_311	10	3.0
-NULL	val_27	10	3.0
-NULL	val_165	10	3.0
-NULL	val_409	10	3.0
-255	val_255	10	3.0
-278	val_278	10	3.0
-98	val_98	10	3.0
-NULL	val_484	10	3.0
-NULL	val_265	10	3.0
-NULL	val_193	10	3.0
-401	val_401	10	3.0
-150	val_150	10	3.0
-273	val_273	10	3.0
-224		10	3.0
-369		10	3.0
-66	val_66	10	3.0
-128		10	3.0
-213	val_213	10	3.0
-146	val_146	10	3.0
-406	val_406	10	3.0
-NULL		10	3.0
-NULL		10	3.0
-NULL		10	3.0
-238	val_238	100x	3.0
-NULL		100x	3.0
-311	val_311	100x	3.0
-NULL	val_27	100x	3.0
-NULL	val_165	100x	3.0
-NULL	val_409	100x	3.0
-255	val_255	100x	3.0
-278	val_278	100x	3.0
-98	val_98	100x	3.0
-NULL	val_484	100x	3.0
-NULL	val_265	100x	3.0
-NULL	val_193	100x	3.0
-401	val_401	100x	3.0
-150	val_150	100x	3.0
-273	val_273	100x	3.0
-224		100x	3.0
-369		100x	3.0
-66	val_66	100x	3.0
-128		100x	3.0
-213	val_213	100x	3.0
-146	val_146	100x	3.0
-406	val_406	100x	3.0
-NULL		100x	3.0
-NULL		100x	3.0
-NULL		100x	3.0
-238	val_238	100x	6:30pm
-NULL		100x	6:30pm
-311	val_311	100x	6:30pm
-NULL	val_27	100x	6:30pm
-NULL	val_165	100x	6:30pm
-NULL	val_409	100x	6:30pm
-255	val_255	100x	6:30pm
-278	val_278	100x	6:30pm
-98	val_98	100x	6:30pm
-NULL	val_484	100x	6:30pm
-NULL	val_265	100x	6:30pm
-NULL	val_193	100x	6:30pm
-401	val_401	100x	6:30pm
-150	val_150	100x	6:30pm
-273	val_273	100x	6:30pm
-224		100x	6:30pm
-369		100x	6:30pm
-66	val_66	100x	6:30pm
-128		100x	6:30pm
-213	val_213	100x	6:30pm
-146	val_146	100x	6:30pm
-406	val_406	100x	6:30pm
-NULL		100x	6:30pm
-NULL		100x	6:30pm
-NULL		100x	6:30pm
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-18-fbdd117c3b1ec9c92c7c33d52d94e42c b/sql/hive/src/test/resources/golden/alter_partition_coltype-18-fbdd117c3b1ec9c92c7c33d52d94e42c
deleted file mode 100644
index c5b431b6cba29..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-18-fbdd117c3b1ec9c92c7c33d52d94e42c
+++ /dev/null
@@ -1 +0,0 @@
-50
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-19-63799ee74ccc42d9bb817e6d00a6fae3 b/sql/hive/src/test/resources/golden/alter_partition_coltype-19-63799ee74ccc42d9bb817e6d00a6fae3
deleted file mode 100644
index a76c74dcec6ab..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-19-63799ee74ccc42d9bb817e6d00a6fae3
+++ /dev/null
@@ -1 +0,0 @@
-75
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-2-17e04afbb81a724fe8c47c07b642f9a b/sql/hive/src/test/resources/golden/alter_partition_coltype-2-17e04afbb81a724fe8c47c07b642f9a
deleted file mode 100644
index 316ca7f65ba20..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-2-17e04afbb81a724fe8c47c07b642f9a
+++ /dev/null
@@ -1,10 +0,0 @@
-key                 	string              	None                
-value               	string              	None                
-dt                  	string              	None                
-ts                  	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-dt                  	string              	None                
-ts                  	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-21-17e04afbb81a724fe8c47c07b642f9a b/sql/hive/src/test/resources/golden/alter_partition_coltype-21-17e04afbb81a724fe8c47c07b642f9a
deleted file mode 100644
index a7382fabfcb49..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-21-17e04afbb81a724fe8c47c07b642f9a
+++ /dev/null
@@ -1,10 +0,0 @@
-key                 	string              	None                
-value               	string              	None                
-dt                  	string              	None                
-ts                  	double              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-dt                  	string              	None                
-ts                  	double              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-22-639cdccdea5f4b1863f9b17b04183c93 b/sql/hive/src/test/resources/golden/alter_partition_coltype-22-639cdccdea5f4b1863f9b17b04183c93
deleted file mode 100644
index a7382fabfcb49..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-22-639cdccdea5f4b1863f9b17b04183c93
+++ /dev/null
@@ -1,10 +0,0 @@
-key                 	string              	None                
-value               	string              	None                
-dt                  	string              	None                
-ts                  	double              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-dt                  	string              	None                
-ts                  	double              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-23-325be9a5d7c0277498a139c0a9fdb26a b/sql/hive/src/test/resources/golden/alter_partition_coltype-23-325be9a5d7c0277498a139c0a9fdb26a
deleted file mode 100644
index a7382fabfcb49..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-23-325be9a5d7c0277498a139c0a9fdb26a
+++ /dev/null
@@ -1,10 +0,0 @@
-key                 	string              	None                
-value               	string              	None                
-dt                  	string              	None                
-ts                  	double              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-dt                  	string              	None                
-ts                  	double              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-24-71de9e476503e83f82baf1ab17da87f6 b/sql/hive/src/test/resources/golden/alter_partition_coltype-24-71de9e476503e83f82baf1ab17da87f6
deleted file mode 100644
index a7382fabfcb49..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-24-71de9e476503e83f82baf1ab17da87f6
+++ /dev/null
@@ -1,10 +0,0 @@
-key                 	string              	None                
-value               	string              	None                
-dt                  	string              	None                
-ts                  	double              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-dt                  	string              	None                
-ts                  	double              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-3-179315b6f54dc18e6eeffd7aaa947fa5 b/sql/hive/src/test/resources/golden/alter_partition_coltype-3-179315b6f54dc18e6eeffd7aaa947fa5
deleted file mode 100644
index 410b14d2ce6f9..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-3-179315b6f54dc18e6eeffd7aaa947fa5
+++ /dev/null
@@ -1 +0,0 @@
-25
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-6-db84ad9022cdc1828c24a0340321c8fd b/sql/hive/src/test/resources/golden/alter_partition_coltype-6-db84ad9022cdc1828c24a0340321c8fd
deleted file mode 100644
index 410b14d2ce6f9..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-6-db84ad9022cdc1828c24a0340321c8fd
+++ /dev/null
@@ -1 +0,0 @@
-25
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-8-42a70ae131fbb834c79074fdbd7beea0 b/sql/hive/src/test/resources/golden/alter_partition_coltype-8-42a70ae131fbb834c79074fdbd7beea0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_partition_coltype-8-42a70ae131fbb834c79074fdbd7beea0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-1-30348eedd3afb892ac9d825dd7fdb5d8 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-1-30348eedd3afb892ac9d825dd7fdb5d8
index db182e444d31d..11487abed2b60 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-1-30348eedd3afb892ac9d825dd7fdb5d8
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-1-30348eedd3afb892ac9d825dd7fdb5d8
@@ -1,4 +1,4 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1388805891, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/alter_partition_format_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1388805891}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1413871688, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter_partition_format_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413871688}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-11-fe39b84ddc86b6bf042dc30c1b612321 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-11-fe39b84ddc86b6bf042dc30c1b612321
index 81e23f0bc1951..979969dcbfd3f 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-11-fe39b84ddc86b6bf042dc30c1b612321
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-11-fe39b84ddc86b6bf042dc30c1b612321
@@ -1,10 +1,10 @@
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-ds                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2010], dbName:default, tableName:alter_partition_format_test, createTime:1388805891, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/alter_partition_format_test/ds=2010, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{last_modified_by=marmbrus, last_modified_time=1388805891, transient_lastDdlTime=1388805891})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2010], dbName:default, tableName:alter_partition_format_test, createTime:1413871689, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter_partition_format_test/ds=2010, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871689, transient_lastDdlTime=1413871689, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1})	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-13-fe39b84ddc86b6bf042dc30c1b612321 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-13-fe39b84ddc86b6bf042dc30c1b612321
index 6dc1f3ca2c187..7e14edcdead2e 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-13-fe39b84ddc86b6bf042dc30c1b612321
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-13-fe39b84ddc86b6bf042dc30c1b612321
@@ -1,10 +1,10 @@
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-ds                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2010], dbName:default, tableName:alter_partition_format_test, createTime:1388805891, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/test/test/ds=2010, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{last_modified_by=marmbrus, last_modified_time=1388805891, transient_lastDdlTime=1388805891})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2010], dbName:default, tableName:alter_partition_format_test, createTime:1413871689, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/test/test/ds=2010, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871689, transient_lastDdlTime=1413871689, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1})	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-14-30348eedd3afb892ac9d825dd7fdb5d8 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-14-30348eedd3afb892ac9d825dd7fdb5d8
index 4b754043d63ab..77a764a814eb9 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-14-30348eedd3afb892ac9d825dd7fdb5d8
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-14-30348eedd3afb892ac9d825dd7fdb5d8
@@ -1,10 +1,10 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1388805891, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/alter_partition_format_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{transient_lastDdlTime=1388805891}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1413871689, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter_partition_format_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{transient_lastDdlTime=1413871689}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-16-30348eedd3afb892ac9d825dd7fdb5d8 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-16-30348eedd3afb892ac9d825dd7fdb5d8
index f44c28ee36760..c8606b1acad0c 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-16-30348eedd3afb892ac9d825dd7fdb5d8
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-16-30348eedd3afb892ac9d825dd7fdb5d8
@@ -1,10 +1,10 @@
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-ds                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1388805891, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/alter_partition_format_test, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1388805892, transient_lastDdlTime=1388805892}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1413871689, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter_partition_format_test, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413871689, transient_lastDdlTime=1413871689}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-18-30348eedd3afb892ac9d825dd7fdb5d8 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-18-30348eedd3afb892ac9d825dd7fdb5d8
index e739ad4992ec9..59922d3b7a086 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-18-30348eedd3afb892ac9d825dd7fdb5d8
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-18-30348eedd3afb892ac9d825dd7fdb5d8
@@ -1,10 +1,10 @@
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-ds                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1388805891, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/test/test/, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1388805892, transient_lastDdlTime=1388805892}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1413871689, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/test/test/, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413871689, transient_lastDdlTime=1413871689}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-3-30348eedd3afb892ac9d825dd7fdb5d8 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-3-30348eedd3afb892ac9d825dd7fdb5d8
index 092e5ed6e8a46..45ef755539479 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-3-30348eedd3afb892ac9d825dd7fdb5d8
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-3-30348eedd3afb892ac9d825dd7fdb5d8
@@ -1,4 +1,4 @@
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1388805891, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/alter_partition_format_test, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1388805891, transient_lastDdlTime=1388805891}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1413871688, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter_partition_format_test, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871688, transient_lastDdlTime=1413871688, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-5-30348eedd3afb892ac9d825dd7fdb5d8 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-5-30348eedd3afb892ac9d825dd7fdb5d8
index 197e67d09bf49..d6804307f3dc7 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-5-30348eedd3afb892ac9d825dd7fdb5d8
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-5-30348eedd3afb892ac9d825dd7fdb5d8
@@ -1,4 +1,4 @@
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1388805891, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/test/test/, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1388805891, transient_lastDdlTime=1388805891}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:alter_partition_format_test, dbName:default, owner:marmbrus, createTime:1413871688, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/test/test/, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871688, transient_lastDdlTime=1413871688, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_format_loc-9-fe39b84ddc86b6bf042dc30c1b612321 b/sql/hive/src/test/resources/golden/alter_partition_format_loc-9-fe39b84ddc86b6bf042dc30c1b612321
index 29b5b693b8589..77ba51afd2468 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_format_loc-9-fe39b84ddc86b6bf042dc30c1b612321
+++ b/sql/hive/src/test/resources/golden/alter_partition_format_loc-9-fe39b84ddc86b6bf042dc30c1b612321
@@ -1,10 +1,10 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2010], dbName:default, tableName:alter_partition_format_test, createTime:1388805891, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/alter_partition_format_test/ds=2010, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1388805891})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2010], dbName:default, tableName:alter_partition_format_test, createTime:1413871689, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/alter_partition_format_test/ds=2010, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1413871689})	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-10-aa739a81271c760437de572a6c951eb9 b/sql/hive/src/test/resources/golden/alter_partition_protect_mode-1-e11f1476d2f30ab7a83f95013809f9e6
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-10-aa739a81271c760437de572a6c951eb9
rename to sql/hive/src/test/resources/golden/alter_partition_protect_mode-1-e11f1476d2f30ab7a83f95013809f9e6
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-11-94b98dca970e36e6d4d65a795c9413d6 b/sql/hive/src/test/resources/golden/alter_partition_protect_mode-2-29b4b67965f24c40bcc426043efb892d
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-11-94b98dca970e36e6d4d65a795c9413d6
rename to sql/hive/src/test/resources/golden/alter_partition_protect_mode-2-29b4b67965f24c40bcc426043efb892d
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-13-60ab4c242e19b882e4b1571544311e7e b/sql/hive/src/test/resources/golden/alter_partition_protect_mode-3-2100fc7853625d8c7dad5c0ee70d4690
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-13-60ab4c242e19b882e4b1571544311e7e
rename to sql/hive/src/test/resources/golden/alter_partition_protect_mode-3-2100fc7853625d8c7dad5c0ee70d4690
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-15-634775da8cebfb8ce45b3965a0ae2880 b/sql/hive/src/test/resources/golden/alter_partition_protect_mode-4-c3fa911536c75e87febf2874e7889879
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-15-634775da8cebfb8ce45b3965a0ae2880
rename to sql/hive/src/test/resources/golden/alter_partition_protect_mode-4-c3fa911536c75e87febf2874e7889879
diff --git a/sql/hive/src/test/resources/golden/alter_partition_protect_mode-6-beb03691c7cc6cf1597d3ff16ef98d17 b/sql/hive/src/test/resources/golden/alter_partition_protect_mode-6-beb03691c7cc6cf1597d3ff16ef98d17
index 02d72f4292749..c1b66aba46e84 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_protect_mode-6-beb03691c7cc6cf1597d3ff16ef98d17
+++ b/sql/hive/src/test/resources/golden/alter_partition_protect_mode-6-beb03691c7cc6cf1597d3ff16ef98d17
@@ -9,4 +9,4 @@
 3	13	1996	12
 7	17	1996	12
 8	18	1996	12
-8	28	1996	12
\ No newline at end of file
+8	28	1996	12
diff --git a/sql/hive/src/test/resources/golden/alter_partition_protect_mode-9-d1b12be1d01eabaf244f41e74d902d9d b/sql/hive/src/test/resources/golden/alter_partition_protect_mode-9-d1b12be1d01eabaf244f41e74d902d9d
index bca47334cedaa..12ec4459dc167 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_protect_mode-9-d1b12be1d01eabaf244f41e74d902d9d
+++ b/sql/hive/src/test/resources/golden/alter_partition_protect_mode-9-d1b12be1d01eabaf244f41e74d902d9d
@@ -3,4 +3,4 @@
 3	13	1995	09
 7	17	1995	09
 8	18	1995	09
-8	28	1995	09
\ No newline at end of file
+8	28	1995	09
diff --git a/sql/hive/src/test/resources/golden/alter_partition_with_whitelist-0-3c23ae800b2f6fb93620890da2dba196 b/sql/hive/src/test/resources/golden/alter_partition_with_whitelist-0-3c23ae800b2f6fb93620890da2dba196
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/alter_partition_with_whitelist-0-3c23ae800b2f6fb93620890da2dba196
+++ b/sql/hive/src/test/resources/golden/alter_partition_with_whitelist-0-3c23ae800b2f6fb93620890da2dba196
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-11-bc84e38ccad173f5b47474d91db244d7 b/sql/hive/src/test/resources/golden/alter_rename_partition-11-bc84e38ccad173f5b47474d91db244d7
index e881b2a4b74eb..605b17f1d8ceb 100644
--- a/sql/hive/src/test/resources/golden/alter_rename_partition-11-bc84e38ccad173f5b47474d91db244d7
+++ b/sql/hive/src/test/resources/golden/alter_rename_partition-11-bc84e38ccad173f5b47474d91db244d7
@@ -3,4 +3,4 @@
 3	new_part1:	new_part2:
 4	new_part1:	new_part2:
 5	new_part1:	new_part2:
-6	new_part1:	new_part2:
\ No newline at end of file
+6	new_part1:	new_part2:
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-14-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/alter_rename_partition-14-9c36cac1372650b703400c60dd29042c
index ecafeaea5f61a..85c1918f46567 100644
--- a/sql/hive/src/test/resources/golden/alter_rename_partition-14-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/alter_rename_partition-14-9c36cac1372650b703400c60dd29042c
@@ -1,2 +1,2 @@
 src
-srcpart
\ No newline at end of file
+srcpart
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-17-13c0443789a696bde8d08c05f526643f b/sql/hive/src/test/resources/golden/alter_rename_partition-19-d862c00e5a37ae841a6c8ec6c5d68e8c
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-17-13c0443789a696bde8d08c05f526643f
rename to sql/hive/src/test/resources/golden/alter_rename_partition-19-d862c00e5a37ae841a6c8ec6c5d68e8c
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-2-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/alter_rename_partition-2-9c36cac1372650b703400c60dd29042c
index ecafeaea5f61a..85c1918f46567 100644
--- a/sql/hive/src/test/resources/golden/alter_rename_partition-2-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/alter_rename_partition-2-9c36cac1372650b703400c60dd29042c
@@ -1,2 +1,2 @@
 src
-srcpart
\ No newline at end of file
+srcpart
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-22-d50111b57d14f1ded1c47c773b0e0ac2 b/sql/hive/src/test/resources/golden/alter_rename_partition-22-d50111b57d14f1ded1c47c773b0e0ac2
index 684f1da0fd0a6..6105de433830c 100644
--- a/sql/hive/src/test/resources/golden/alter_rename_partition-22-d50111b57d14f1ded1c47c773b0e0ac2
+++ b/sql/hive/src/test/resources/golden/alter_rename_partition-22-d50111b57d14f1ded1c47c773b0e0ac2
@@ -3,4 +3,4 @@
 3	old_part1:	old_part2:
 4	old_part1:	old_part2:
 5	old_part1:	old_part2:
-6	old_part1:	old_part2:
\ No newline at end of file
+6	old_part1:	old_part2:
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-24-21dd05d56ebba285a8eb5bde5904d6a3 b/sql/hive/src/test/resources/golden/alter_rename_partition-24-21dd05d56ebba285a8eb5bde5904d6a3
index fc31ec62a1280..fb196b30f4478 100644
--- a/sql/hive/src/test/resources/golden/alter_rename_partition-24-21dd05d56ebba285a8eb5bde5904d6a3
+++ b/sql/hive/src/test/resources/golden/alter_rename_partition-24-21dd05d56ebba285a8eb5bde5904d6a3
@@ -1 +1 @@
-pcol1=new_part1%3A/pcol2=new_part2%3A
\ No newline at end of file
+pcol1=new_part1%3A/pcol2=new_part2%3A
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-26-270655c514bb6f04acd4459df52dd77b b/sql/hive/src/test/resources/golden/alter_rename_partition-26-270655c514bb6f04acd4459df52dd77b
index e881b2a4b74eb..605b17f1d8ceb 100644
--- a/sql/hive/src/test/resources/golden/alter_rename_partition-26-270655c514bb6f04acd4459df52dd77b
+++ b/sql/hive/src/test/resources/golden/alter_rename_partition-26-270655c514bb6f04acd4459df52dd77b
@@ -3,4 +3,4 @@
 3	new_part1:	new_part2:
 4	new_part1:	new_part2:
 5	new_part1:	new_part2:
-6	new_part1:	new_part2:
\ No newline at end of file
+6	new_part1:	new_part2:
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-20-b4411edb9c52a474a971640f037c8a30 b/sql/hive/src/test/resources/golden/alter_rename_partition-4-833254c8f7c5b1248e1a249560652627
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-20-b4411edb9c52a474a971640f037c8a30
rename to sql/hive/src/test/resources/golden/alter_rename_partition-4-833254c8f7c5b1248e1a249560652627
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-7-e3d9a36d53d30de215b855095c58d0d7 b/sql/hive/src/test/resources/golden/alter_rename_partition-7-e3d9a36d53d30de215b855095c58d0d7
index 684f1da0fd0a6..6105de433830c 100644
--- a/sql/hive/src/test/resources/golden/alter_rename_partition-7-e3d9a36d53d30de215b855095c58d0d7
+++ b/sql/hive/src/test/resources/golden/alter_rename_partition-7-e3d9a36d53d30de215b855095c58d0d7
@@ -3,4 +3,4 @@
 3	old_part1:	old_part2:
 4	old_part1:	old_part2:
 5	old_part1:	old_part2:
-6	old_part1:	old_part2:
\ No newline at end of file
+6	old_part1:	old_part2:
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-9-21dd05d56ebba285a8eb5bde5904d6a3 b/sql/hive/src/test/resources/golden/alter_rename_partition-9-21dd05d56ebba285a8eb5bde5904d6a3
index fc31ec62a1280..fb196b30f4478 100644
--- a/sql/hive/src/test/resources/golden/alter_rename_partition-9-21dd05d56ebba285a8eb5bde5904d6a3
+++ b/sql/hive/src/test/resources/golden/alter_rename_partition-9-21dd05d56ebba285a8eb5bde5904d6a3
@@ -1 +1 @@
-pcol1=new_part1%3A/pcol2=new_part2%3A
\ No newline at end of file
+pcol1=new_part1%3A/pcol2=new_part2%3A
diff --git a/sql/hive/src/test/resources/golden/alter_table_serde-1-5bc931a540f0fec54e852ff10f52f879 b/sql/hive/src/test/resources/golden/alter_table_serde-1-5bc931a540f0fec54e852ff10f52f879
index ccd6518a50f7c..5a608a9064b84 100644
--- a/sql/hive/src/test/resources/golden/alter_table_serde-1-5bc931a540f0fec54e852ff10f52f879
+++ b/sql/hive/src/test/resources/golden/alter_table_serde-1-5bc931a540f0fec54e852ff10f52f879
@@ -1,5 +1,5 @@
-id                  	int                 	None                
-query               	string              	None                
-name                	string              	None                
+id                  	int                 	                    
+query               	string              	                    
+name                	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:test_table, dbName:default, owner:marmbrus, createTime:1388805893, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/test_table, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1388805893}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:test_table, dbName:default, owner:marmbrus, createTime:1413871711, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test_table, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413871711}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_table_serde-11-6ee4b3a60659ec5496f06347eda232a8 b/sql/hive/src/test/resources/golden/alter_table_serde-11-6ee4b3a60659ec5496f06347eda232a8
index d135e450e6e8e..1d6d48bbe1d1c 100644
--- a/sql/hive/src/test/resources/golden/alter_table_serde-11-6ee4b3a60659ec5496f06347eda232a8
+++ b/sql/hive/src/test/resources/golden/alter_table_serde-11-6ee4b3a60659ec5496f06347eda232a8
@@ -1,11 +1,11 @@
-id                  	int                 	None                
-query               	string              	None                
-name                	string              	None                
-dt                  	string              	None                
+id                  	int                 	                    
+query               	string              	                    
+name                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1388805893, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1388805893})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1413871711, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1413871711})	
diff --git a/sql/hive/src/test/resources/golden/alter_table_serde-13-6ee4b3a60659ec5496f06347eda232a8 b/sql/hive/src/test/resources/golden/alter_table_serde-13-6ee4b3a60659ec5496f06347eda232a8
index d135e450e6e8e..1d6d48bbe1d1c 100644
--- a/sql/hive/src/test/resources/golden/alter_table_serde-13-6ee4b3a60659ec5496f06347eda232a8
+++ b/sql/hive/src/test/resources/golden/alter_table_serde-13-6ee4b3a60659ec5496f06347eda232a8
@@ -1,11 +1,11 @@
-id                  	int                 	None                
-query               	string              	None                
-name                	string              	None                
-dt                  	string              	None                
+id                  	int                 	                    
+query               	string              	                    
+name                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1388805893, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1388805893})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1413871711, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1413871711})	
diff --git a/sql/hive/src/test/resources/golden/alter_table_serde-15-6ee4b3a60659ec5496f06347eda232a8 b/sql/hive/src/test/resources/golden/alter_table_serde-15-6ee4b3a60659ec5496f06347eda232a8
index c3fac88f91a36..f33269bd266be 100644
--- a/sql/hive/src/test/resources/golden/alter_table_serde-15-6ee4b3a60659ec5496f06347eda232a8
+++ b/sql/hive/src/test/resources/golden/alter_table_serde-15-6ee4b3a60659ec5496f06347eda232a8
@@ -1,11 +1,11 @@
-id                  	int                 	None                
-query               	string              	None                
-name                	string              	None                
-dt                  	string              	None                
+id                  	int                 	                    
+query               	string              	                    
+name                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1388805893, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{last_modified_by=marmbrus, last_modified_time=1388805893, transient_lastDdlTime=1388805893})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1413871711, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871712, transient_lastDdlTime=1413871712, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1})	
diff --git a/sql/hive/src/test/resources/golden/alter_table_serde-17-6ee4b3a60659ec5496f06347eda232a8 b/sql/hive/src/test/resources/golden/alter_table_serde-17-6ee4b3a60659ec5496f06347eda232a8
index 6c8f91de7cded..41e8d696d33ea 100644
--- a/sql/hive/src/test/resources/golden/alter_table_serde-17-6ee4b3a60659ec5496f06347eda232a8
+++ b/sql/hive/src/test/resources/golden/alter_table_serde-17-6ee4b3a60659ec5496f06347eda232a8
@@ -1,11 +1,11 @@
-id                  	int                 	None                
-query               	string              	None                
-name                	string              	None                
-dt                  	string              	None                
+id                  	int                 	                    
+query               	string              	                    
+name                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1388805893, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1, field.delim=,}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{last_modified_by=marmbrus, last_modified_time=1388805893, transient_lastDdlTime=1388805893})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1413871711, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1, field.delim=,}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871712, transient_lastDdlTime=1413871712, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1})	
diff --git a/sql/hive/src/test/resources/golden/alter_table_serde-3-5bc931a540f0fec54e852ff10f52f879 b/sql/hive/src/test/resources/golden/alter_table_serde-3-5bc931a540f0fec54e852ff10f52f879
index 37a5b2cc47bad..7caf0af50b071 100644
--- a/sql/hive/src/test/resources/golden/alter_table_serde-3-5bc931a540f0fec54e852ff10f52f879
+++ b/sql/hive/src/test/resources/golden/alter_table_serde-3-5bc931a540f0fec54e852ff10f52f879
@@ -1,5 +1,5 @@
-id                  	int                 	from deserializer   
-query               	string              	from deserializer   
-name                	string              	from deserializer   
+id                  	int                 	                    
+query               	string              	                    
+name                	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:test_table, dbName:default, owner:marmbrus, createTime:1388805893, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:from deserializer), FieldSchema(name:query, type:string, comment:from deserializer), FieldSchema(name:name, type:string, comment:from deserializer)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/test_table, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1388805893, transient_lastDdlTime=1388805893}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:test_table, dbName:default, owner:marmbrus, createTime:1413871711, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test_table, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871711, transient_lastDdlTime=1413871711, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_table_serde-5-5bc931a540f0fec54e852ff10f52f879 b/sql/hive/src/test/resources/golden/alter_table_serde-5-5bc931a540f0fec54e852ff10f52f879
index 0348dd15fd4f5..a55479e69bbea 100644
--- a/sql/hive/src/test/resources/golden/alter_table_serde-5-5bc931a540f0fec54e852ff10f52f879
+++ b/sql/hive/src/test/resources/golden/alter_table_serde-5-5bc931a540f0fec54e852ff10f52f879
@@ -1,5 +1,5 @@
-id                  	int                 	from deserializer   
-query               	string              	from deserializer   
-name                	string              	from deserializer   
+id                  	int                 	                    
+query               	string              	                    
+name                	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:test_table, dbName:default, owner:marmbrus, createTime:1388805893, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:from deserializer), FieldSchema(name:query, type:string, comment:from deserializer), FieldSchema(name:name, type:string, comment:from deserializer)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/test_table, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1, field.delim=,}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1388805893, transient_lastDdlTime=1388805893}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:test_table, dbName:default, owner:marmbrus, createTime:1413871711, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test_table, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1, field.delim=,}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413871711, transient_lastDdlTime=1413871711, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/alter_table_serde-9-6ee4b3a60659ec5496f06347eda232a8 b/sql/hive/src/test/resources/golden/alter_table_serde-9-6ee4b3a60659ec5496f06347eda232a8
index d135e450e6e8e..1d6d48bbe1d1c 100644
--- a/sql/hive/src/test/resources/golden/alter_table_serde-9-6ee4b3a60659ec5496f06347eda232a8
+++ b/sql/hive/src/test/resources/golden/alter_table_serde-9-6ee4b3a60659ec5496f06347eda232a8
@@ -1,11 +1,11 @@
-id                  	int                 	None                
-query               	string              	None                
-name                	string              	None                
-dt                  	string              	None                
+id                  	int                 	                    
+query               	string              	                    
+name                	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
+dt                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1388805893, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2706017013471029005/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1388805893})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2011], dbName:default, tableName:test_table, createTime:1413871711, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:query, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:dt, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test_table/dt=2011, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1413871711})	
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-25-40b997fef00cf1a26f73ddb5013bbeb6 b/sql/hive/src/test/resources/golden/alter_varchar2-2-3a20c238eab602ad3d593b1eb3fa6dbb
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-25-40b997fef00cf1a26f73ddb5013bbeb6
rename to sql/hive/src/test/resources/golden/alter_varchar2-2-3a20c238eab602ad3d593b1eb3fa6dbb
diff --git a/sql/hive/src/test/resources/golden/alter_varchar2-3-fb3191f771e2396d5fc80659a8c68797 b/sql/hive/src/test/resources/golden/alter_varchar2-3-fb3191f771e2396d5fc80659a8c68797
index 40818a7de46d0..600b377716894 100644
--- a/sql/hive/src/test/resources/golden/alter_varchar2-3-fb3191f771e2396d5fc80659a8c68797
+++ b/sql/hive/src/test/resources/golden/alter_varchar2-3-fb3191f771e2396d5fc80659a8c68797
@@ -1 +1 @@
-val_238	7
\ No newline at end of file
+val_238	7
diff --git a/sql/hive/src/test/resources/golden/alter_varchar2-5-84e700f9dc6033c1f237fcdb95e31a0c b/sql/hive/src/test/resources/golden/alter_varchar2-5-84e700f9dc6033c1f237fcdb95e31a0c
index 827220bd4996f..ad69f390bc8db 100644
--- a/sql/hive/src/test/resources/golden/alter_varchar2-5-84e700f9dc6033c1f237fcdb95e31a0c
+++ b/sql/hive/src/test/resources/golden/alter_varchar2-5-84e700f9dc6033c1f237fcdb95e31a0c
@@ -1 +1 @@
-1	val_238	7
\ No newline at end of file
+1	val_238	7
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-4-f6ba42faebdf8ec8781716fec6f7813d b/sql/hive/src/test/resources/golden/alter_varchar2-6-3250407f20f3766c18f44b8bfae1829d
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-4-f6ba42faebdf8ec8781716fec6f7813d
rename to sql/hive/src/test/resources/golden/alter_varchar2-6-3250407f20f3766c18f44b8bfae1829d
diff --git a/sql/hive/src/test/resources/golden/combine3-0-84b74227c9f1563f530cd3ac3b333e54 b/sql/hive/src/test/resources/golden/alter_varchar2-7-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-0-84b74227c9f1563f530cd3ac3b333e54
rename to sql/hive/src/test/resources/golden/alter_varchar2-7-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/alter_varchar2-7-84e700f9dc6033c1f237fcdb95e31a0c b/sql/hive/src/test/resources/golden/alter_varchar2-7-84e700f9dc6033c1f237fcdb95e31a0c
deleted file mode 100644
index 827220bd4996f..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_varchar2-7-84e700f9dc6033c1f237fcdb95e31a0c
+++ /dev/null
@@ -1 +0,0 @@
-1	val_238	7
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_varchar2-8-4c12c4c53d99338796be34e603dc612c b/sql/hive/src/test/resources/golden/alter_varchar2-8-4c12c4c53d99338796be34e603dc612c
deleted file mode 100644
index 8a8234a35f6bb..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_varchar2-8-4c12c4c53d99338796be34e603dc612c
+++ /dev/null
@@ -1 +0,0 @@
-2	238	3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/alter_varchar2-8-84e700f9dc6033c1f237fcdb95e31a0c b/sql/hive/src/test/resources/golden/alter_varchar2-8-84e700f9dc6033c1f237fcdb95e31a0c
new file mode 100644
index 0000000000000..ad69f390bc8db
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/alter_varchar2-8-84e700f9dc6033c1f237fcdb95e31a0c
@@ -0,0 +1 @@
+1	val_238	7
diff --git a/sql/hive/src/test/resources/golden/alter_varchar2-9-4c12c4c53d99338796be34e603dc612c b/sql/hive/src/test/resources/golden/alter_varchar2-9-4c12c4c53d99338796be34e603dc612c
new file mode 100644
index 0000000000000..1f8ddaec9003a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/alter_varchar2-9-4c12c4c53d99338796be34e603dc612c
@@ -0,0 +1 @@
+2	238	3
diff --git a/sql/hive/src/test/resources/golden/alter_view_as_select-2-1ac845048a8c714a36a719ea8e4f570b b/sql/hive/src/test/resources/golden/alter_view_as_select-2-1ac845048a8c714a36a719ea8e4f570b
index a99747531cef4..ecae907d92279 100644
--- a/sql/hive/src/test/resources/golden/alter_view_as_select-2-1ac845048a8c714a36a719ea8e4f570b
+++ b/sql/hive/src/test/resources/golden/alter_view_as_select-2-1ac845048a8c714a36a719ea8e4f570b
@@ -1,20 +1,20 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Feb 07 14:47:52 PST 2014	 
+CreateTime:         	Mon Oct 20 23:10:25 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
 Table Type:         	VIRTUAL_VIEW        	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1391813272          
+	transient_lastDdlTime	1413871825          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	null                	 
diff --git a/sql/hive/src/test/resources/golden/alter_view_as_select-4-1ac845048a8c714a36a719ea8e4f570b b/sql/hive/src/test/resources/golden/alter_view_as_select-4-1ac845048a8c714a36a719ea8e4f570b
index 499c73127d890..10e3eca6c6791 100644
--- a/sql/hive/src/test/resources/golden/alter_view_as_select-4-1ac845048a8c714a36a719ea8e4f570b
+++ b/sql/hive/src/test/resources/golden/alter_view_as_select-4-1ac845048a8c714a36a719ea8e4f570b
@@ -1,17 +1,17 @@
 # col_name            	data_type           	comment             
 	 	 
-value               	string              	None                
+value               	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Feb 07 14:47:52 PST 2014	 
+CreateTime:         	Mon Oct 20 23:10:25 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
 Table Type:         	VIRTUAL_VIEW        	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1391813272          
+	transient_lastDdlTime	1413871825          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	null                	 
diff --git a/sql/hive/src/test/resources/golden/alter_view_as_select-6-1ac845048a8c714a36a719ea8e4f570b b/sql/hive/src/test/resources/golden/alter_view_as_select-6-1ac845048a8c714a36a719ea8e4f570b
index a5fba77abdf07..c3661a1f79b57 100644
--- a/sql/hive/src/test/resources/golden/alter_view_as_select-6-1ac845048a8c714a36a719ea8e4f570b
+++ b/sql/hive/src/test/resources/golden/alter_view_as_select-6-1ac845048a8c714a36a719ea8e4f570b
@@ -1,18 +1,18 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Feb 07 14:47:52 PST 2014	 
+CreateTime:         	Mon Oct 20 23:10:25 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
 Table Type:         	VIRTUAL_VIEW        	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1391813272          
+	transient_lastDdlTime	1413871825          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	null                	 
diff --git a/sql/hive/src/test/resources/golden/alter_view_rename-2-67e47ee2746463594d5c48b10ba1bb b/sql/hive/src/test/resources/golden/alter_view_rename-2-67e47ee2746463594d5c48b10ba1bb
deleted file mode 100644
index ee76e02af3aba..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_view_rename-2-67e47ee2746463594d5c48b10ba1bb
+++ /dev/null
@@ -1,5 +0,0 @@
-foo                 	int                 	None                
-bar                 	string              	None                
-ds                  	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:view1, dbName:default, owner:tnachen, createTime:1392426511, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:foo, type:int, comment:null), FieldSchema(name:bar, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:null, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:null, parameters:{}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1392426511}, viewOriginalText:SELECT * FROM invites, viewExpandedText:SELECT `invites`.`foo`, `invites`.`bar`, `invites`.`ds` FROM `default`.`invites`, tableType:VIRTUAL_VIEW)	
diff --git a/sql/hive/src/test/resources/golden/alter_view_rename-4-19c1c00f0aa99d81b7466958c15d88e3 b/sql/hive/src/test/resources/golden/alter_view_rename-4-19c1c00f0aa99d81b7466958c15d88e3
deleted file mode 100644
index 8603577477bfc..0000000000000
--- a/sql/hive/src/test/resources/golden/alter_view_rename-4-19c1c00f0aa99d81b7466958c15d88e3
+++ /dev/null
@@ -1,5 +0,0 @@
-foo                 	int                 	None                
-bar                 	string              	None                
-ds                  	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:view2, dbName:default, owner:tnachen, createTime:1392426511, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:foo, type:int, comment:null), FieldSchema(name:bar, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:null, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:null, parameters:{}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=tnachen, last_modified_time=1392426511, transient_lastDdlTime=1392426511}, viewOriginalText:SELECT * FROM invites, viewExpandedText:SELECT `invites`.`foo`, `invites`.`bar`, `invites`.`ds` FROM `default`.`invites`, tableType:VIRTUAL_VIEW)	
diff --git a/sql/hive/src/test/resources/golden/combine3-1-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/ambiguous_col-0-e91e3e5a22029b9b979ccbbef97add66
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-1-86a409d8b868dc5f1a3bd1e04c2bc28c
rename to sql/hive/src/test/resources/golden/ambiguous_col-0-e91e3e5a22029b9b979ccbbef97add66
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-5-eeb71f1bc830750174b8b0d03de8c37d b/sql/hive/src/test/resources/golden/ambiguous_col-1-b4fe82679efdf6a15e9ecff53baf8d8d
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-5-eeb71f1bc830750174b8b0d03de8c37d
rename to sql/hive/src/test/resources/golden/ambiguous_col-1-b4fe82679efdf6a15e9ecff53baf8d8d
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-7-f3d9e8fc026c62c23e1ae0d191c89bc0 b/sql/hive/src/test/resources/golden/ambiguous_col-2-dadfa3854356dead14b93c5a71a5d8ab
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-7-f3d9e8fc026c62c23e1ae0d191c89bc0
rename to sql/hive/src/test/resources/golden/ambiguous_col-2-dadfa3854356dead14b93c5a71a5d8ab
diff --git a/sql/hive/src/test/resources/golden/alter_partition_coltype-9-2f7e10db0fcc7939dea528e04f460d42 b/sql/hive/src/test/resources/golden/ambiguous_col-3-70509ccd2765d90b98666b6dff8afe1b
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_coltype-9-2f7e10db0fcc7939dea528e04f460d42
rename to sql/hive/src/test/resources/golden/ambiguous_col-3-70509ccd2765d90b98666b6dff8afe1b
diff --git a/sql/hive/src/test/resources/golden/combine3-14-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/annotate_stats_join-0-2a0c41508e1e70eaedf9de99751c8fa9
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-14-e39f59c35ebbe686a18d45d9d8bf3ab0
rename to sql/hive/src/test/resources/golden/annotate_stats_join-0-2a0c41508e1e70eaedf9de99751c8fa9
diff --git a/sql/hive/src/test/resources/golden/alter_partition_protect_mode-1-fbbdf7be607407661749730f1a0efd9c b/sql/hive/src/test/resources/golden/annotate_stats_join-1-84e7846d50fc15e836c83911ce039871
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_protect_mode-1-fbbdf7be607407661749730f1a0efd9c
rename to sql/hive/src/test/resources/golden/annotate_stats_join-1-84e7846d50fc15e836c83911ce039871
diff --git a/sql/hive/src/test/resources/golden/alter_partition_protect_mode-2-79b9075b2f86f16f2356d9fa2a9afd56 b/sql/hive/src/test/resources/golden/annotate_stats_join-10-a59dc1c01d48c82d46a5beab759f618d
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_protect_mode-2-79b9075b2f86f16f2356d9fa2a9afd56
rename to sql/hive/src/test/resources/golden/annotate_stats_join-10-a59dc1c01d48c82d46a5beab759f618d
diff --git a/sql/hive/src/test/resources/golden/alter_partition_protect_mode-3-dd92c46e933d94b35c225daeef0285d4 b/sql/hive/src/test/resources/golden/annotate_stats_join-11-3e0ade2476221f6396381f55a3b82e60
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_protect_mode-3-dd92c46e933d94b35c225daeef0285d4
rename to sql/hive/src/test/resources/golden/annotate_stats_join-11-3e0ade2476221f6396381f55a3b82e60
diff --git a/sql/hive/src/test/resources/golden/alter_partition_protect_mode-4-9114d1f6859382a125fc4221d2d3ab6 b/sql/hive/src/test/resources/golden/annotate_stats_join-12-cef44682d339a67ba765f854da21f976
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_partition_protect_mode-4-9114d1f6859382a125fc4221d2d3ab6
rename to sql/hive/src/test/resources/golden/annotate_stats_join-12-cef44682d339a67ba765f854da21f976
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-19-b2c9ded072d49abe14831bf48290319c b/sql/hive/src/test/resources/golden/annotate_stats_join-13-6292c27f0c4eaeab2a5e6c317e3afa2e
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_rename_partition-19-b2c9ded072d49abe14831bf48290319c
rename to sql/hive/src/test/resources/golden/annotate_stats_join-13-6292c27f0c4eaeab2a5e6c317e3afa2e
diff --git a/sql/hive/src/test/resources/golden/alter_rename_partition-4-3479a886936571d5028971aecade705f b/sql/hive/src/test/resources/golden/annotate_stats_join-14-4f3042b9feebd00c540ddac40e7254d1
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_rename_partition-4-3479a886936571d5028971aecade705f
rename to sql/hive/src/test/resources/golden/annotate_stats_join-14-4f3042b9feebd00c540ddac40e7254d1
diff --git a/sql/hive/src/test/resources/golden/alter_varchar2-2-325238d61f56d84c17e29033105d7b19 b/sql/hive/src/test/resources/golden/annotate_stats_join-15-a2f3b8a636e46e5df514df46c452855f
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_varchar2-2-325238d61f56d84c17e29033105d7b19
rename to sql/hive/src/test/resources/golden/annotate_stats_join-15-a2f3b8a636e46e5df514df46c452855f
diff --git a/sql/hive/src/test/resources/golden/alter_varchar2-6-eb0f1b170900bb995674b0bc1968c656 b/sql/hive/src/test/resources/golden/annotate_stats_join-16-a75699a21ea36d962a1ba45bd5a12f26
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_varchar2-6-eb0f1b170900bb995674b0bc1968c656
rename to sql/hive/src/test/resources/golden/annotate_stats_join-16-a75699a21ea36d962a1ba45bd5a12f26
diff --git a/sql/hive/src/test/resources/golden/alter_view_rename-0-bb255b994b5207324fba6988caa937e6 b/sql/hive/src/test/resources/golden/annotate_stats_join-17-64380f94a431e3a1698aa2edd3d0a6b2
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_view_rename-0-bb255b994b5207324fba6988caa937e6
rename to sql/hive/src/test/resources/golden/annotate_stats_join-17-64380f94a431e3a1698aa2edd3d0a6b2
diff --git a/sql/hive/src/test/resources/golden/alter_view_rename-1-2a83c96363ca8d12cd2e9181209c8d8d b/sql/hive/src/test/resources/golden/annotate_stats_join-18-222d1fcce7420950436e00d3a1bba957
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_view_rename-1-2a83c96363ca8d12cd2e9181209c8d8d
rename to sql/hive/src/test/resources/golden/annotate_stats_join-18-222d1fcce7420950436e00d3a1bba957
diff --git a/sql/hive/src/test/resources/golden/alter_view_rename-3-95655e33f22fc8f66549a9708812589a b/sql/hive/src/test/resources/golden/annotate_stats_join-19-dea5f8f5c703583c7a3bdcb62cd3d589
similarity index 100%
rename from sql/hive/src/test/resources/golden/alter_view_rename-3-95655e33f22fc8f66549a9708812589a
rename to sql/hive/src/test/resources/golden/annotate_stats_join-19-dea5f8f5c703583c7a3bdcb62cd3d589
diff --git a/sql/hive/src/test/resources/golden/ambiguous_col-0-b4fe82679efdf6a15e9ecff53baf8d8d b/sql/hive/src/test/resources/golden/annotate_stats_join-2-40548ec2313af8dbdcbb8ad0477d8600
similarity index 100%
rename from sql/hive/src/test/resources/golden/ambiguous_col-0-b4fe82679efdf6a15e9ecff53baf8d8d
rename to sql/hive/src/test/resources/golden/annotate_stats_join-2-40548ec2313af8dbdcbb8ad0477d8600
diff --git a/sql/hive/src/test/resources/golden/ambiguous_col-1-dadfa3854356dead14b93c5a71a5d8ab b/sql/hive/src/test/resources/golden/annotate_stats_join-20-2d5e186b97166e3a625a169d0d73f9c8
similarity index 100%
rename from sql/hive/src/test/resources/golden/ambiguous_col-1-dadfa3854356dead14b93c5a71a5d8ab
rename to sql/hive/src/test/resources/golden/annotate_stats_join-20-2d5e186b97166e3a625a169d0d73f9c8
diff --git a/sql/hive/src/test/resources/golden/ambiguous_col-2-70509ccd2765d90b98666b6dff8afe1b b/sql/hive/src/test/resources/golden/annotate_stats_join-21-ed73d4b01424287148347ccf721b37e2
similarity index 100%
rename from sql/hive/src/test/resources/golden/ambiguous_col-2-70509ccd2765d90b98666b6dff8afe1b
rename to sql/hive/src/test/resources/golden/annotate_stats_join-21-ed73d4b01424287148347ccf721b37e2
diff --git a/sql/hive/src/test/resources/golden/archive-10-f845008104fd12eb0f13f4b113f95cf1 b/sql/hive/src/test/resources/golden/annotate_stats_join-22-2cf93da6bb0efdafeaa989c9df236701
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-10-f845008104fd12eb0f13f4b113f95cf1
rename to sql/hive/src/test/resources/golden/annotate_stats_join-22-2cf93da6bb0efdafeaa989c9df236701
diff --git a/sql/hive/src/test/resources/golden/archive-12-f07653bca86e1ecb614ffd0296790d05 b/sql/hive/src/test/resources/golden/annotate_stats_join-23-fa90806f6137300311df032e28df3d4c
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-12-f07653bca86e1ecb614ffd0296790d05
rename to sql/hive/src/test/resources/golden/annotate_stats_join-23-fa90806f6137300311df032e28df3d4c
diff --git a/sql/hive/src/test/resources/golden/archive-16-892147913578bcf60620b7dd73893dd0 b/sql/hive/src/test/resources/golden/annotate_stats_join-24-4a6976344eeae35e059285ed78f9feb3
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-16-892147913578bcf60620b7dd73893dd0
rename to sql/hive/src/test/resources/golden/annotate_stats_join-24-4a6976344eeae35e059285ed78f9feb3
diff --git a/sql/hive/src/test/resources/golden/archive-18-8f980275ab3a5bcfc1784f4acd46447a b/sql/hive/src/test/resources/golden/annotate_stats_join-3-26f82fb6734d5bc6f7159b06c0949178
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-18-8f980275ab3a5bcfc1784f4acd46447a
rename to sql/hive/src/test/resources/golden/annotate_stats_join-3-26f82fb6734d5bc6f7159b06c0949178
diff --git a/sql/hive/src/test/resources/golden/archive-19-f8a52a8a40141409a667a9ba2cf9630f b/sql/hive/src/test/resources/golden/annotate_stats_join-4-a598c93d86a646cfa6ea86da54bce9b8
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-19-f8a52a8a40141409a667a9ba2cf9630f
rename to sql/hive/src/test/resources/golden/annotate_stats_join-4-a598c93d86a646cfa6ea86da54bce9b8
diff --git a/sql/hive/src/test/resources/golden/archive-2-713efc113418b01f76ffd589840193c8 b/sql/hive/src/test/resources/golden/annotate_stats_join-5-d12ba848d20d1def95379c490068f050
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-2-713efc113418b01f76ffd589840193c8
rename to sql/hive/src/test/resources/golden/annotate_stats_join-5-d12ba848d20d1def95379c490068f050
diff --git a/sql/hive/src/test/resources/golden/archive-21-f07653bca86e1ecb614ffd0296790d05 b/sql/hive/src/test/resources/golden/annotate_stats_join-6-96a4806e61c5d882affac57e115f285f
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-21-f07653bca86e1ecb614ffd0296790d05
rename to sql/hive/src/test/resources/golden/annotate_stats_join-6-96a4806e61c5d882affac57e115f285f
diff --git a/sql/hive/src/test/resources/golden/archive-23-892147913578bcf60620b7dd73893dd0 b/sql/hive/src/test/resources/golden/annotate_stats_join-7-38be41d6328f57350a78fb2097d1ebd2
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-23-892147913578bcf60620b7dd73893dd0
rename to sql/hive/src/test/resources/golden/annotate_stats_join-7-38be41d6328f57350a78fb2097d1ebd2
diff --git a/sql/hive/src/test/resources/golden/archive-25-56bea24ffa83d9b5932893a8ff1cb44f b/sql/hive/src/test/resources/golden/annotate_stats_join-8-eb11e867a36e2114ef0f0f001e01984c
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-25-56bea24ffa83d9b5932893a8ff1cb44f
rename to sql/hive/src/test/resources/golden/annotate_stats_join-8-eb11e867a36e2114ef0f0f001e01984c
diff --git a/sql/hive/src/test/resources/golden/archive-26-44fa61c2bd0fd9acfa0d889e59880d8a b/sql/hive/src/test/resources/golden/annotate_stats_join-9-e0b7cb2226234fbea8ae11ea7d9d84bd
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-26-44fa61c2bd0fd9acfa0d889e59880d8a
rename to sql/hive/src/test/resources/golden/annotate_stats_join-9-e0b7cb2226234fbea8ae11ea7d9d84bd
diff --git a/sql/hive/src/test/resources/golden/combine3-15-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/annotate_stats_limit-0-2a0c41508e1e70eaedf9de99751c8fa9
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-15-7cccbdffc32975f8935eeba14a28147
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-0-2a0c41508e1e70eaedf9de99751c8fa9
diff --git a/sql/hive/src/test/resources/golden/archive-27-9ae773ebe64a3d437a035e9d94f49e5 b/sql/hive/src/test/resources/golden/annotate_stats_limit-1-26f82fb6734d5bc6f7159b06c0949178
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-27-9ae773ebe64a3d437a035e9d94f49e5
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-1-26f82fb6734d5bc6f7159b06c0949178
diff --git a/sql/hive/src/test/resources/golden/archive-29-a0c6922e3a1dca51861c8a872dc8af19 b/sql/hive/src/test/resources/golden/annotate_stats_limit-10-a89c94fd0de0cfc96725fea890987cc0
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-29-a0c6922e3a1dca51861c8a872dc8af19
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-10-a89c94fd0de0cfc96725fea890987cc0
diff --git a/sql/hive/src/test/resources/golden/archive-3-27ad2962fed131f51ba802596ba37278 b/sql/hive/src/test/resources/golden/annotate_stats_limit-2-eb11e867a36e2114ef0f0f001e01984c
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-3-27ad2962fed131f51ba802596ba37278
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-2-eb11e867a36e2114ef0f0f001e01984c
diff --git a/sql/hive/src/test/resources/golden/archive-31-cd46bc635e3010cf1b990a652a584a09 b/sql/hive/src/test/resources/golden/annotate_stats_limit-3-e0b7cb2226234fbea8ae11ea7d9d84bd
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-31-cd46bc635e3010cf1b990a652a584a09
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-3-e0b7cb2226234fbea8ae11ea7d9d84bd
diff --git a/sql/hive/src/test/resources/golden/archive-32-27ad2962fed131f51ba802596ba37278 b/sql/hive/src/test/resources/golden/annotate_stats_limit-4-4252006172a476fbc591eebee49bffa3
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-32-27ad2962fed131f51ba802596ba37278
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-4-4252006172a476fbc591eebee49bffa3
diff --git a/sql/hive/src/test/resources/golden/archive-4-3e95421993ab28d18245ec2340f580a3 b/sql/hive/src/test/resources/golden/annotate_stats_limit-5-a2f3b8a636e46e5df514df46c452855f
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-4-3e95421993ab28d18245ec2340f580a3
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-5-a2f3b8a636e46e5df514df46c452855f
diff --git a/sql/hive/src/test/resources/golden/archive-5-c0c18ac884677231a41eea8d980d0451 b/sql/hive/src/test/resources/golden/annotate_stats_limit-6-f1fd6c403a9787947877f48c772afe96
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-5-c0c18ac884677231a41eea8d980d0451
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-6-f1fd6c403a9787947877f48c772afe96
diff --git a/sql/hive/src/test/resources/golden/archive-6-528ab9750a558af7f1a43b3108e793dd b/sql/hive/src/test/resources/golden/annotate_stats_limit-7-69b422026fce4e2d2cde9a478d6aaa40
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-6-528ab9750a558af7f1a43b3108e793dd
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-7-69b422026fce4e2d2cde9a478d6aaa40
diff --git a/sql/hive/src/test/resources/golden/archive-7-e8d1d10c308a73eef78dde414a5e40ca b/sql/hive/src/test/resources/golden/annotate_stats_limit-8-72f5e55c1e244084eea255c32a6f58eb
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-7-e8d1d10c308a73eef78dde414a5e40ca
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-8-72f5e55c1e244084eea255c32a6f58eb
diff --git a/sql/hive/src/test/resources/golden/archive-8-af459a0264559a2aeaa1341ce779ab3c b/sql/hive/src/test/resources/golden/annotate_stats_limit-9-9da67c62d5e3145d450ad7953b33361f
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-8-af459a0264559a2aeaa1341ce779ab3c
rename to sql/hive/src/test/resources/golden/annotate_stats_limit-9-9da67c62d5e3145d450ad7953b33361f
diff --git a/sql/hive/src/test/resources/golden/combine3-2-c95dc367df88c9e5cf77157f29ba2daf b/sql/hive/src/test/resources/golden/annotate_stats_part-0-2a0c41508e1e70eaedf9de99751c8fa9
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-2-c95dc367df88c9e5cf77157f29ba2daf
rename to sql/hive/src/test/resources/golden/annotate_stats_part-0-2a0c41508e1e70eaedf9de99751c8fa9
diff --git a/sql/hive/src/test/resources/golden/combine3-3-6e53a3ac93113f20db3a12f1dcf30e86 b/sql/hive/src/test/resources/golden/annotate_stats_part-1-3c29684bfd2df7439ee0551eb42cfa0
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-3-6e53a3ac93113f20db3a12f1dcf30e86
rename to sql/hive/src/test/resources/golden/annotate_stats_part-1-3c29684bfd2df7439ee0551eb42cfa0
diff --git a/sql/hive/src/test/resources/golden/archive-9-48b10f27e1459bb8e62d6c71484e2cf b/sql/hive/src/test/resources/golden/annotate_stats_part-10-5ba0295bfe42deb678e59b3a330b14ff
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive-9-48b10f27e1459bb8e62d6c71484e2cf
rename to sql/hive/src/test/resources/golden/annotate_stats_part-10-5ba0295bfe42deb678e59b3a330b14ff
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-10-f845008104fd12eb0f13f4b113f95cf1 b/sql/hive/src/test/resources/golden/annotate_stats_part-11-dbdbe2e04c5dad6c8af78b6386b329
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-10-f845008104fd12eb0f13f4b113f95cf1
rename to sql/hive/src/test/resources/golden/annotate_stats_part-11-dbdbe2e04c5dad6c8af78b6386b329
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-12-f07653bca86e1ecb614ffd0296790d05 b/sql/hive/src/test/resources/golden/annotate_stats_part-12-2856fb4f4af8932804cb238429d9de6f
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-12-f07653bca86e1ecb614ffd0296790d05
rename to sql/hive/src/test/resources/golden/annotate_stats_part-12-2856fb4f4af8932804cb238429d9de6f
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-16-892147913578bcf60620b7dd73893dd0 b/sql/hive/src/test/resources/golden/annotate_stats_part-13-4fa8b0f9fb0202ac0e82fb87538d6445
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-16-892147913578bcf60620b7dd73893dd0
rename to sql/hive/src/test/resources/golden/annotate_stats_part-13-4fa8b0f9fb0202ac0e82fb87538d6445
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-18-8f980275ab3a5bcfc1784f4acd46447a b/sql/hive/src/test/resources/golden/annotate_stats_part-14-62c557bfb7bf70694a32ebceea329ebd
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-18-8f980275ab3a5bcfc1784f4acd46447a
rename to sql/hive/src/test/resources/golden/annotate_stats_part-14-62c557bfb7bf70694a32ebceea329ebd
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-19-f8a52a8a40141409a667a9ba2cf9630f b/sql/hive/src/test/resources/golden/annotate_stats_part-15-f796cd035603726a5c4ce3e71194822b
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-19-f8a52a8a40141409a667a9ba2cf9630f
rename to sql/hive/src/test/resources/golden/annotate_stats_part-15-f796cd035603726a5c4ce3e71194822b
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-2-433a1b642df2cebe37927d6d89e0b301 b/sql/hive/src/test/resources/golden/annotate_stats_part-16-45eb5544124364714549f199f9c2b2ac
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-2-433a1b642df2cebe37927d6d89e0b301
rename to sql/hive/src/test/resources/golden/annotate_stats_part-16-45eb5544124364714549f199f9c2b2ac
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-21-f07653bca86e1ecb614ffd0296790d05 b/sql/hive/src/test/resources/golden/annotate_stats_part-17-ad61ebd912b6bef3778c4ff38c0be5ab
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-21-f07653bca86e1ecb614ffd0296790d05
rename to sql/hive/src/test/resources/golden/annotate_stats_part-17-ad61ebd912b6bef3778c4ff38c0be5ab
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-23-892147913578bcf60620b7dd73893dd0 b/sql/hive/src/test/resources/golden/annotate_stats_part-18-b9f2dff7c7b57412cea44433ea810fa7
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-23-892147913578bcf60620b7dd73893dd0
rename to sql/hive/src/test/resources/golden/annotate_stats_part-18-b9f2dff7c7b57412cea44433ea810fa7
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-25-56bea24ffa83d9b5932893a8ff1cb44f b/sql/hive/src/test/resources/golden/annotate_stats_part-19-10cab43c2966718bb39e2f22365cd6c1
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-25-56bea24ffa83d9b5932893a8ff1cb44f
rename to sql/hive/src/test/resources/golden/annotate_stats_part-19-10cab43c2966718bb39e2f22365cd6c1
diff --git a/sql/hive/src/test/resources/golden/combine3-4-84967075baa3e56fff2a23f8ab9ba076 b/sql/hive/src/test/resources/golden/annotate_stats_part-2-a4fb8359a2179ec70777aad6366071b7
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-4-84967075baa3e56fff2a23f8ab9ba076
rename to sql/hive/src/test/resources/golden/annotate_stats_part-2-a4fb8359a2179ec70777aad6366071b7
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-26-44fa61c2bd0fd9acfa0d889e59880d8a b/sql/hive/src/test/resources/golden/annotate_stats_part-20-51ec5046b50543899ed54c9fc2b885af
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-26-44fa61c2bd0fd9acfa0d889e59880d8a
rename to sql/hive/src/test/resources/golden/annotate_stats_part-20-51ec5046b50543899ed54c9fc2b885af
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-27-9ae773ebe64a3d437a035e9d94f49e5 b/sql/hive/src/test/resources/golden/annotate_stats_part-21-d69cb7b7770b51bc4b99d5d0f74d4662
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-27-9ae773ebe64a3d437a035e9d94f49e5
rename to sql/hive/src/test/resources/golden/annotate_stats_part-21-d69cb7b7770b51bc4b99d5d0f74d4662
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-29-a0c6922e3a1dca51861c8a872dc8af19 b/sql/hive/src/test/resources/golden/annotate_stats_part-22-22f430881fbfe92736988c2a4184fe0c
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-29-a0c6922e3a1dca51861c8a872dc8af19
rename to sql/hive/src/test/resources/golden/annotate_stats_part-22-22f430881fbfe92736988c2a4184fe0c
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-3-27ad2962fed131f51ba802596ba37278 b/sql/hive/src/test/resources/golden/annotate_stats_part-23-2719ee3f0b8b07f1e5c80f8329b9f87f
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-3-27ad2962fed131f51ba802596ba37278
rename to sql/hive/src/test/resources/golden/annotate_stats_part-23-2719ee3f0b8b07f1e5c80f8329b9f87f
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-31-cd46bc635e3010cf1b990a652a584a09 b/sql/hive/src/test/resources/golden/annotate_stats_part-24-1f7bdd816b83f45a6d53d08b6866109f
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-31-cd46bc635e3010cf1b990a652a584a09
rename to sql/hive/src/test/resources/golden/annotate_stats_part-24-1f7bdd816b83f45a6d53d08b6866109f
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-32-27ad2962fed131f51ba802596ba37278 b/sql/hive/src/test/resources/golden/annotate_stats_part-25-d1599e385e8bad6cd62d564a6fd0536f
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-32-27ad2962fed131f51ba802596ba37278
rename to sql/hive/src/test/resources/golden/annotate_stats_part-25-d1599e385e8bad6cd62d564a6fd0536f
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-4-3e95421993ab28d18245ec2340f580a3 b/sql/hive/src/test/resources/golden/annotate_stats_part-26-ec26bcb2587f4638b364ba494ac373e0
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-4-3e95421993ab28d18245ec2340f580a3
rename to sql/hive/src/test/resources/golden/annotate_stats_part-26-ec26bcb2587f4638b364ba494ac373e0
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-5-c0c18ac884677231a41eea8d980d0451 b/sql/hive/src/test/resources/golden/annotate_stats_part-27-7804e2e230a42a46d72546fa2381b423
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-5-c0c18ac884677231a41eea8d980d0451
rename to sql/hive/src/test/resources/golden/annotate_stats_part-27-7804e2e230a42a46d72546fa2381b423
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-6-528ab9750a558af7f1a43b3108e793dd b/sql/hive/src/test/resources/golden/annotate_stats_part-28-40f2a1f06d181ef93edf42a591cbf15e
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-6-528ab9750a558af7f1a43b3108e793dd
rename to sql/hive/src/test/resources/golden/annotate_stats_part-28-40f2a1f06d181ef93edf42a591cbf15e
diff --git a/sql/hive/src/test/resources/golden/combine3-5-2ee5d706fe3a3bcc38b795f6e94970ea b/sql/hive/src/test/resources/golden/annotate_stats_part-3-16367c381d4b189b3640c92511244bfe
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-5-2ee5d706fe3a3bcc38b795f6e94970ea
rename to sql/hive/src/test/resources/golden/annotate_stats_part-3-16367c381d4b189b3640c92511244bfe
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-7-e8d1d10c308a73eef78dde414a5e40ca b/sql/hive/src/test/resources/golden/annotate_stats_part-4-397e834a25a20b68aed5e87d55ea6174
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-7-e8d1d10c308a73eef78dde414a5e40ca
rename to sql/hive/src/test/resources/golden/annotate_stats_part-4-397e834a25a20b68aed5e87d55ea6174
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-8-af459a0264559a2aeaa1341ce779ab3c b/sql/hive/src/test/resources/golden/annotate_stats_part-5-cef44682d339a67ba765f854da21f976
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-8-af459a0264559a2aeaa1341ce779ab3c
rename to sql/hive/src/test/resources/golden/annotate_stats_part-5-cef44682d339a67ba765f854da21f976
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-9-48b10f27e1459bb8e62d6c71484e2cf b/sql/hive/src/test/resources/golden/annotate_stats_part-6-f4263aca1612c05cb08242448191ad05
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_excludeHadoop20-9-48b10f27e1459bb8e62d6c71484e2cf
rename to sql/hive/src/test/resources/golden/annotate_stats_part-6-f4263aca1612c05cb08242448191ad05
diff --git a/sql/hive/src/test/resources/golden/archive_multi-10-f845008104fd12eb0f13f4b113f95cf1 b/sql/hive/src/test/resources/golden/annotate_stats_part-7-b222533ddda09918c4f5acc408bf1a02
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-10-f845008104fd12eb0f13f4b113f95cf1
rename to sql/hive/src/test/resources/golden/annotate_stats_part-7-b222533ddda09918c4f5acc408bf1a02
diff --git a/sql/hive/src/test/resources/golden/archive_multi-12-8419ad4ed6683ebd15f993f703975b31 b/sql/hive/src/test/resources/golden/annotate_stats_part-8-84e3979946de67779a9704a3adc2184f
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-12-8419ad4ed6683ebd15f993f703975b31
rename to sql/hive/src/test/resources/golden/annotate_stats_part-8-84e3979946de67779a9704a3adc2184f
diff --git a/sql/hive/src/test/resources/golden/archive_multi-16-ad80f33c39be583ad7ebf0c8f350d11d b/sql/hive/src/test/resources/golden/annotate_stats_part-9-c6c38e93babafe56cd4f177a17d37b8
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-16-ad80f33c39be583ad7ebf0c8f350d11d
rename to sql/hive/src/test/resources/golden/annotate_stats_part-9-c6c38e93babafe56cd4f177a17d37b8
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-49-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/annotate_stats_table-0-2a0c41508e1e70eaedf9de99751c8fa9
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-49-b1e2ade89ae898650f0be4f796d8947b
rename to sql/hive/src/test/resources/golden/annotate_stats_table-0-2a0c41508e1e70eaedf9de99751c8fa9
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-50-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/annotate_stats_table-1-3c29684bfd2df7439ee0551eb42cfa0
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-50-b9d963d24994c47c3776dda6f7d3881f
rename to sql/hive/src/test/resources/golden/annotate_stats_table-1-3c29684bfd2df7439ee0551eb42cfa0
diff --git a/sql/hive/src/test/resources/golden/archive_multi-2-cd46bc635e3010cf1b990a652a584a09 b/sql/hive/src/test/resources/golden/annotate_stats_table-10-a7419af512a6c0b60ec51e556608cdad
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-2-cd46bc635e3010cf1b990a652a584a09
rename to sql/hive/src/test/resources/golden/annotate_stats_table-10-a7419af512a6c0b60ec51e556608cdad
diff --git a/sql/hive/src/test/resources/golden/archive_multi-3-27ad2962fed131f51ba802596ba37278 b/sql/hive/src/test/resources/golden/annotate_stats_table-11-4be11140c2e21b3a172224d103844785
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-3-27ad2962fed131f51ba802596ba37278
rename to sql/hive/src/test/resources/golden/annotate_stats_table-11-4be11140c2e21b3a172224d103844785
diff --git a/sql/hive/src/test/resources/golden/archive_multi-4-3e95421993ab28d18245ec2340f580a3 b/sql/hive/src/test/resources/golden/annotate_stats_table-12-84bc7cf67544461e9102820ed410dfb6
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-4-3e95421993ab28d18245ec2340f580a3
rename to sql/hive/src/test/resources/golden/annotate_stats_table-12-84bc7cf67544461e9102820ed410dfb6
diff --git a/sql/hive/src/test/resources/golden/archive_multi-5-c0c18ac884677231a41eea8d980d0451 b/sql/hive/src/test/resources/golden/annotate_stats_table-13-d2acfdf279aad035b31ed61e87bff6
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-5-c0c18ac884677231a41eea8d980d0451
rename to sql/hive/src/test/resources/golden/annotate_stats_table-13-d2acfdf279aad035b31ed61e87bff6
diff --git a/sql/hive/src/test/resources/golden/archive_multi-6-c06da7f8c1e98dc22e3171018e357f6a b/sql/hive/src/test/resources/golden/annotate_stats_table-14-60d1c44b8412c4983265e320ecfb25f0
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-6-c06da7f8c1e98dc22e3171018e357f6a
rename to sql/hive/src/test/resources/golden/annotate_stats_table-14-60d1c44b8412c4983265e320ecfb25f0
diff --git a/sql/hive/src/test/resources/golden/archive_multi-7-e8d1d10c308a73eef78dde414a5e40ca b/sql/hive/src/test/resources/golden/annotate_stats_table-15-7b185296b6a0c6ceff2df1f89fecc6b4
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-7-e8d1d10c308a73eef78dde414a5e40ca
rename to sql/hive/src/test/resources/golden/annotate_stats_table-15-7b185296b6a0c6ceff2df1f89fecc6b4
diff --git a/sql/hive/src/test/resources/golden/archive_multi-8-af459a0264559a2aeaa1341ce779ab3c b/sql/hive/src/test/resources/golden/annotate_stats_table-16-7cf8cb7d8d0fe3ea94d7b190a2e5ad3d
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-8-af459a0264559a2aeaa1341ce779ab3c
rename to sql/hive/src/test/resources/golden/annotate_stats_table-16-7cf8cb7d8d0fe3ea94d7b190a2e5ad3d
diff --git a/sql/hive/src/test/resources/golden/archive_multi-9-48b10f27e1459bb8e62d6c71484e2cf b/sql/hive/src/test/resources/golden/annotate_stats_table-17-cd84d629d569a09d139f38dae01e677c
similarity index 100%
rename from sql/hive/src/test/resources/golden/archive_multi-9-48b10f27e1459bb8e62d6c71484e2cf
rename to sql/hive/src/test/resources/golden/annotate_stats_table-17-cd84d629d569a09d139f38dae01e677c
diff --git a/sql/hive/src/test/resources/golden/auto_join16-1-bba773956a3bad8d400fe4216a3fa8bf b/sql/hive/src/test/resources/golden/annotate_stats_table-18-7012af4092c18075f4f3a00b80f491ee
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join16-1-bba773956a3bad8d400fe4216a3fa8bf
rename to sql/hive/src/test/resources/golden/annotate_stats_table-18-7012af4092c18075f4f3a00b80f491ee
diff --git a/sql/hive/src/test/resources/golden/auto_join32-12-a8b69002151b3df4383d2c354dbaa7d4 b/sql/hive/src/test/resources/golden/annotate_stats_table-2-84e7846d50fc15e836c83911ce039871
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join32-12-a8b69002151b3df4383d2c354dbaa7d4
rename to sql/hive/src/test/resources/golden/annotate_stats_table-2-84e7846d50fc15e836c83911ce039871
diff --git a/sql/hive/src/test/resources/golden/auto_join32-13-63241e3791725baad8baa00fb833ef5e b/sql/hive/src/test/resources/golden/annotate_stats_table-3-a598c93d86a646cfa6ea86da54bce9b8
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join32-13-63241e3791725baad8baa00fb833ef5e
rename to sql/hive/src/test/resources/golden/annotate_stats_table-3-a598c93d86a646cfa6ea86da54bce9b8
diff --git a/sql/hive/src/test/resources/golden/auto_join32-14-b754b2485c6a8d0caa5e65b1c63bbd0f b/sql/hive/src/test/resources/golden/annotate_stats_table-4-d12ba848d20d1def95379c490068f050
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join32-14-b754b2485c6a8d0caa5e65b1c63bbd0f
rename to sql/hive/src/test/resources/golden/annotate_stats_table-4-d12ba848d20d1def95379c490068f050
diff --git a/sql/hive/src/test/resources/golden/auto_join32-15-187fd938996ae7d96e60475fb69b8d35 b/sql/hive/src/test/resources/golden/annotate_stats_table-5-4a8ad142e57c5dce2623e92f5e5ad15a
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join32-15-187fd938996ae7d96e60475fb69b8d35
rename to sql/hive/src/test/resources/golden/annotate_stats_table-5-4a8ad142e57c5dce2623e92f5e5ad15a
diff --git a/sql/hive/src/test/resources/golden/auto_join32-20-e67740fb52998f1d3afcfa667505cf7 b/sql/hive/src/test/resources/golden/annotate_stats_table-6-a59dc1c01d48c82d46a5beab759f618d
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join32-20-e67740fb52998f1d3afcfa667505cf7
rename to sql/hive/src/test/resources/golden/annotate_stats_table-6-a59dc1c01d48c82d46a5beab759f618d
diff --git a/sql/hive/src/test/resources/golden/auto_join32-21-da5b4647f5605dff66aa47e059f9ec8b b/sql/hive/src/test/resources/golden/annotate_stats_table-7-6292c27f0c4eaeab2a5e6c317e3afa2e
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join32-21-da5b4647f5605dff66aa47e059f9ec8b
rename to sql/hive/src/test/resources/golden/annotate_stats_table-7-6292c27f0c4eaeab2a5e6c317e3afa2e
diff --git a/sql/hive/src/test/resources/golden/auto_join32-22-a1d339a0d904c3f35771192a026c7f9c b/sql/hive/src/test/resources/golden/annotate_stats_table-8-43cb040e2fe01904bc52e198fcae9b3d
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join32-22-a1d339a0d904c3f35771192a026c7f9c
rename to sql/hive/src/test/resources/golden/annotate_stats_table-8-43cb040e2fe01904bc52e198fcae9b3d
diff --git a/sql/hive/src/test/resources/golden/auto_join32-23-1948951cc3c06cdf962d59e932a84588 b/sql/hive/src/test/resources/golden/annotate_stats_table-9-eadfdc61d22bd22bbf5a69370908a82e
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join32-23-1948951cc3c06cdf962d59e932a84588
rename to sql/hive/src/test/resources/golden/annotate_stats_table-9-eadfdc61d22bd22bbf5a69370908a82e
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-53-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/annotate_stats_union-0-2a0c41508e1e70eaedf9de99751c8fa9
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-53-777edd9d575f3480ca6cebe4be57b1f6
rename to sql/hive/src/test/resources/golden/annotate_stats_union-0-2a0c41508e1e70eaedf9de99751c8fa9
diff --git a/sql/hive/src/test/resources/golden/auto_join_filters-2-100faa7fd01bfb4390c782bb262a9403 b/sql/hive/src/test/resources/golden/annotate_stats_union-1-26f82fb6734d5bc6f7159b06c0949178
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_filters-2-100faa7fd01bfb4390c782bb262a9403
rename to sql/hive/src/test/resources/golden/annotate_stats_union-1-26f82fb6734d5bc6f7159b06c0949178
diff --git a/sql/hive/src/test/resources/golden/auto_join_filters-31-aa161b0d9fe9d1aad10654fce0e3670b b/sql/hive/src/test/resources/golden/annotate_stats_union-10-3aa3eb68d092ea0157c5426a5f2ca3f9
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_filters-31-aa161b0d9fe9d1aad10654fce0e3670b
rename to sql/hive/src/test/resources/golden/annotate_stats_union-10-3aa3eb68d092ea0157c5426a5f2ca3f9
diff --git a/sql/hive/src/test/resources/golden/auto_join_filters-32-3c52df82c7d78501610f3f898103f753 b/sql/hive/src/test/resources/golden/annotate_stats_union-11-c1302094d7b4456550826535b529468b
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_filters-32-3c52df82c7d78501610f3f898103f753
rename to sql/hive/src/test/resources/golden/annotate_stats_union-11-c1302094d7b4456550826535b529468b
diff --git a/sql/hive/src/test/resources/golden/auto_join_filters-33-1d85bb008e02ef4025171a4bc0866a6c b/sql/hive/src/test/resources/golden/annotate_stats_union-12-3af760f960a2e0995d1bc4afef0c5aa0
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_filters-33-1d85bb008e02ef4025171a4bc0866a6c
rename to sql/hive/src/test/resources/golden/annotate_stats_union-12-3af760f960a2e0995d1bc4afef0c5aa0
diff --git a/sql/hive/src/test/resources/golden/auto_join_filters-34-e79c906b894fed049ddfab4496a4e3 b/sql/hive/src/test/resources/golden/annotate_stats_union-13-26f82fb6734d5bc6f7159b06c0949178
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_filters-34-e79c906b894fed049ddfab4496a4e3
rename to sql/hive/src/test/resources/golden/annotate_stats_union-13-26f82fb6734d5bc6f7159b06c0949178
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-2-97641998eb9ddf2dff56de6758486aa0 b/sql/hive/src/test/resources/golden/annotate_stats_union-14-eb11e867a36e2114ef0f0f001e01984c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_nulls-2-97641998eb9ddf2dff56de6758486aa0
rename to sql/hive/src/test/resources/golden/annotate_stats_union-14-eb11e867a36e2114ef0f0f001e01984c
diff --git a/sql/hive/src/test/resources/golden/auto_join_reordering_values-1-1247d9a5ffabd61647697dc186c7a2a2 b/sql/hive/src/test/resources/golden/annotate_stats_union-15-e0b7cb2226234fbea8ae11ea7d9d84bd
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_reordering_values-1-1247d9a5ffabd61647697dc186c7a2a2
rename to sql/hive/src/test/resources/golden/annotate_stats_union-15-e0b7cb2226234fbea8ae11ea7d9d84bd
diff --git a/sql/hive/src/test/resources/golden/auto_join_reordering_values-4-7fb82039c95389f11b174d9f22aacb35 b/sql/hive/src/test/resources/golden/annotate_stats_union-16-4252006172a476fbc591eebee49bffa3
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_reordering_values-4-7fb82039c95389f11b174d9f22aacb35
rename to sql/hive/src/test/resources/golden/annotate_stats_union-16-4252006172a476fbc591eebee49bffa3
diff --git a/sql/hive/src/test/resources/golden/auto_join_reordering_values-7-880ba1dba6057dd6cde89d1b17724a6b b/sql/hive/src/test/resources/golden/annotate_stats_union-17-a2f3b8a636e46e5df514df46c452855f
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_join_reordering_values-7-880ba1dba6057dd6cde89d1b17724a6b
rename to sql/hive/src/test/resources/golden/annotate_stats_union-17-a2f3b8a636e46e5df514df46c452855f
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-1-e3951e29e1e87b77ea735f40fd58735 b/sql/hive/src/test/resources/golden/annotate_stats_union-18-58d55f8c8b2489726232a00254499144
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-1-e3951e29e1e87b77ea735f40fd58735
rename to sql/hive/src/test/resources/golden/annotate_stats_union-18-58d55f8c8b2489726232a00254499144
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-10-9666fb18356436e2800550df9ca90c04 b/sql/hive/src/test/resources/golden/annotate_stats_union-19-70c851c23a7ac89f0366b9c26085cc3e
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-10-9666fb18356436e2800550df9ca90c04
rename to sql/hive/src/test/resources/golden/annotate_stats_union-19-70c851c23a7ac89f0366b9c26085cc3e
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-11-a54cefeeb6d79c72f01c61035e9dcf15 b/sql/hive/src/test/resources/golden/annotate_stats_union-2-eb11e867a36e2114ef0f0f001e01984c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-11-a54cefeeb6d79c72f01c61035e9dcf15
rename to sql/hive/src/test/resources/golden/annotate_stats_union-2-eb11e867a36e2114ef0f0f001e01984c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-2-c5a30be03ba36f1fb6cc0b4e7c978838 b/sql/hive/src/test/resources/golden/annotate_stats_union-20-222d1fcce7420950436e00d3a1bba957
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-2-c5a30be03ba36f1fb6cc0b4e7c978838
rename to sql/hive/src/test/resources/golden/annotate_stats_union-20-222d1fcce7420950436e00d3a1bba957
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-4-bb969d3ec0038215a2698afceeb02b3a b/sql/hive/src/test/resources/golden/annotate_stats_union-21-a5503df8f50e057415719bcfe4c69f13
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-4-bb969d3ec0038215a2698afceeb02b3a
rename to sql/hive/src/test/resources/golden/annotate_stats_union-21-a5503df8f50e057415719bcfe4c69f13
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-5-2c3617157639fcd296a8ea2f121c58ab b/sql/hive/src/test/resources/golden/annotate_stats_union-22-a23089e5a76dc35df04d9936d60c4565
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-5-2c3617157639fcd296a8ea2f121c58ab
rename to sql/hive/src/test/resources/golden/annotate_stats_union-22-a23089e5a76dc35df04d9936d60c4565
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-6-307339322d96b8f923d57c0dc9cdcb60 b/sql/hive/src/test/resources/golden/annotate_stats_union-3-e0b7cb2226234fbea8ae11ea7d9d84bd
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-6-307339322d96b8f923d57c0dc9cdcb60
rename to sql/hive/src/test/resources/golden/annotate_stats_union-3-e0b7cb2226234fbea8ae11ea7d9d84bd
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-7-94cc219f61413ab321916821e1288152 b/sql/hive/src/test/resources/golden/annotate_stats_union-4-4252006172a476fbc591eebee49bffa3
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-7-94cc219f61413ab321916821e1288152
rename to sql/hive/src/test/resources/golden/annotate_stats_union-4-4252006172a476fbc591eebee49bffa3
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-8-310c8d652c6f549b7759baec6012b77d b/sql/hive/src/test/resources/golden/annotate_stats_union-5-a2f3b8a636e46e5df514df46c452855f
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-8-310c8d652c6f549b7759baec6012b77d
rename to sql/hive/src/test/resources/golden/annotate_stats_union-5-a2f3b8a636e46e5df514df46c452855f
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-9-b806b5b4eb8a703b2ba43afdce4d0bd5 b/sql/hive/src/test/resources/golden/annotate_stats_union-6-222d1fcce7420950436e00d3a1bba957
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_1-9-b806b5b4eb8a703b2ba43afdce4d0bd5
rename to sql/hive/src/test/resources/golden/annotate_stats_union-6-222d1fcce7420950436e00d3a1bba957
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-1-e3951e29e1e87b77ea735f40fd58735 b/sql/hive/src/test/resources/golden/annotate_stats_union-7-aab1a7b7e1fe4d061a580126d67dfd0a
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-1-e3951e29e1e87b77ea735f40fd58735
rename to sql/hive/src/test/resources/golden/annotate_stats_union-7-aab1a7b7e1fe4d061a580126d67dfd0a
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-10-9666fb18356436e2800550df9ca90c04 b/sql/hive/src/test/resources/golden/annotate_stats_union-8-172e0d089bd5bcbaf54775a618d826bb
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-10-9666fb18356436e2800550df9ca90c04
rename to sql/hive/src/test/resources/golden/annotate_stats_union-8-172e0d089bd5bcbaf54775a618d826bb
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-11-a54cefeeb6d79c72f01c61035e9dcf15 b/sql/hive/src/test/resources/golden/annotate_stats_union-9-69b422026fce4e2d2cde9a478d6aaa40
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-11-a54cefeeb6d79c72f01c61035e9dcf15
rename to sql/hive/src/test/resources/golden/annotate_stats_union-9-69b422026fce4e2d2cde9a478d6aaa40
diff --git a/sql/hive/src/test/resources/golden/archive-0-89cd75b0565e8d96910d5528db9984e7 b/sql/hive/src/test/resources/golden/archive-0-89cd75b0565e8d96910d5528db9984e7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-0-89cd75b0565e8d96910d5528db9984e7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-1-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/archive-1-e39f59c35ebbe686a18d45d9d8bf3ab0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-1-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-11-27895cbe0ee6d24d7fc866314491e1bb b/sql/hive/src/test/resources/golden/archive-11-27895cbe0ee6d24d7fc866314491e1bb
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-11-27895cbe0ee6d24d7fc866314491e1bb
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-13-27895cbe0ee6d24d7fc866314491e1bb b/sql/hive/src/test/resources/golden/archive-13-27895cbe0ee6d24d7fc866314491e1bb
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-13-27895cbe0ee6d24d7fc866314491e1bb
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-14-2cde1a2d3bfcec814985f498eba0fb8 b/sql/hive/src/test/resources/golden/archive-14-2cde1a2d3bfcec814985f498eba0fb8
deleted file mode 100644
index 21b3b13a81191..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-14-2cde1a2d3bfcec814985f498eba0fb8
+++ /dev/null
@@ -1 +0,0 @@
-0	3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-15-c9f39b388ae698e385e092d0ffeb3c73 b/sql/hive/src/test/resources/golden/archive-15-c9f39b388ae698e385e092d0ffeb3c73
deleted file mode 100644
index 5e5f6ff96623f..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-15-c9f39b388ae698e385e092d0ffeb3c73
+++ /dev/null
@@ -1,9 +0,0 @@
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-17-27895cbe0ee6d24d7fc866314491e1bb b/sql/hive/src/test/resources/golden/archive-17-27895cbe0ee6d24d7fc866314491e1bb
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-17-27895cbe0ee6d24d7fc866314491e1bb
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-20-530277b0fee8b05c37b26846bceef827 b/sql/hive/src/test/resources/golden/archive-20-530277b0fee8b05c37b26846bceef827
deleted file mode 100644
index 69ca68f501ff1..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-20-530277b0fee8b05c37b26846bceef827
+++ /dev/null
@@ -1,6 +0,0 @@
-0
-0
-0
-10
-20
-30
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-22-530277b0fee8b05c37b26846bceef827 b/sql/hive/src/test/resources/golden/archive-22-530277b0fee8b05c37b26846bceef827
deleted file mode 100644
index 69ca68f501ff1..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-22-530277b0fee8b05c37b26846bceef827
+++ /dev/null
@@ -1,6 +0,0 @@
-0
-0
-0
-10
-20
-30
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-24-530277b0fee8b05c37b26846bceef827 b/sql/hive/src/test/resources/golden/archive-24-530277b0fee8b05c37b26846bceef827
deleted file mode 100644
index 69ca68f501ff1..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-24-530277b0fee8b05c37b26846bceef827
+++ /dev/null
@@ -1,6 +0,0 @@
-0
-0
-0
-10
-20
-30
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-28-188eb7912265ed8dffa5200517bbe526 b/sql/hive/src/test/resources/golden/archive-28-188eb7912265ed8dffa5200517bbe526
deleted file mode 100644
index 18a1a7925ff29..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-28-188eb7912265ed8dffa5200517bbe526
+++ /dev/null
@@ -1 +0,0 @@
-48656137
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive-30-bea4ae5a0d219d544ea0b53bf29ecc7a b/sql/hive/src/test/resources/golden/archive-30-bea4ae5a0d219d544ea0b53bf29ecc7a
deleted file mode 100644
index 18a1a7925ff29..0000000000000
--- a/sql/hive/src/test/resources/golden/archive-30-bea4ae5a0d219d544ea0b53bf29ecc7a
+++ /dev/null
@@ -1 +0,0 @@
-48656137
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-0-89cd75b0565e8d96910d5528db9984e7 b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-0-89cd75b0565e8d96910d5528db9984e7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-0-89cd75b0565e8d96910d5528db9984e7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-1-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-1-e39f59c35ebbe686a18d45d9d8bf3ab0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-1-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-11-27895cbe0ee6d24d7fc866314491e1bb b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-11-27895cbe0ee6d24d7fc866314491e1bb
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-11-27895cbe0ee6d24d7fc866314491e1bb
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-13-27895cbe0ee6d24d7fc866314491e1bb b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-13-27895cbe0ee6d24d7fc866314491e1bb
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-13-27895cbe0ee6d24d7fc866314491e1bb
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-14-2cde1a2d3bfcec814985f498eba0fb8 b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-14-2cde1a2d3bfcec814985f498eba0fb8
deleted file mode 100644
index 21b3b13a81191..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-14-2cde1a2d3bfcec814985f498eba0fb8
+++ /dev/null
@@ -1 +0,0 @@
-0	3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-15-c9f39b388ae698e385e092d0ffeb3c73 b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-15-c9f39b388ae698e385e092d0ffeb3c73
deleted file mode 100644
index 5e5f6ff96623f..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-15-c9f39b388ae698e385e092d0ffeb3c73
+++ /dev/null
@@ -1,9 +0,0 @@
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-17-27895cbe0ee6d24d7fc866314491e1bb b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-17-27895cbe0ee6d24d7fc866314491e1bb
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-17-27895cbe0ee6d24d7fc866314491e1bb
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-20-530277b0fee8b05c37b26846bceef827 b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-20-530277b0fee8b05c37b26846bceef827
deleted file mode 100644
index 69ca68f501ff1..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-20-530277b0fee8b05c37b26846bceef827
+++ /dev/null
@@ -1,6 +0,0 @@
-0
-0
-0
-10
-20
-30
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-22-530277b0fee8b05c37b26846bceef827 b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-22-530277b0fee8b05c37b26846bceef827
deleted file mode 100644
index 69ca68f501ff1..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-22-530277b0fee8b05c37b26846bceef827
+++ /dev/null
@@ -1,6 +0,0 @@
-0
-0
-0
-10
-20
-30
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-24-530277b0fee8b05c37b26846bceef827 b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-24-530277b0fee8b05c37b26846bceef827
deleted file mode 100644
index 69ca68f501ff1..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-24-530277b0fee8b05c37b26846bceef827
+++ /dev/null
@@ -1,6 +0,0 @@
-0
-0
-0
-10
-20
-30
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-28-188eb7912265ed8dffa5200517bbe526 b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-28-188eb7912265ed8dffa5200517bbe526
deleted file mode 100644
index 18a1a7925ff29..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-28-188eb7912265ed8dffa5200517bbe526
+++ /dev/null
@@ -1 +0,0 @@
-48656137
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-30-bea4ae5a0d219d544ea0b53bf29ecc7a b/sql/hive/src/test/resources/golden/archive_excludeHadoop20-30-bea4ae5a0d219d544ea0b53bf29ecc7a
deleted file mode 100644
index 18a1a7925ff29..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_excludeHadoop20-30-bea4ae5a0d219d544ea0b53bf29ecc7a
+++ /dev/null
@@ -1 +0,0 @@
-48656137
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_multi-0-89cd75b0565e8d96910d5528db9984e7 b/sql/hive/src/test/resources/golden/archive_multi-0-89cd75b0565e8d96910d5528db9984e7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_multi-0-89cd75b0565e8d96910d5528db9984e7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_multi-1-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/archive_multi-1-e39f59c35ebbe686a18d45d9d8bf3ab0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_multi-1-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_multi-11-cf5431cd843666b95ad2a82b334ac01e b/sql/hive/src/test/resources/golden/archive_multi-11-cf5431cd843666b95ad2a82b334ac01e
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_multi-11-cf5431cd843666b95ad2a82b334ac01e
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_multi-13-27895cbe0ee6d24d7fc866314491e1bb b/sql/hive/src/test/resources/golden/archive_multi-13-27895cbe0ee6d24d7fc866314491e1bb
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_multi-13-27895cbe0ee6d24d7fc866314491e1bb
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_multi-14-2cde1a2d3bfcec814985f498eba0fb8 b/sql/hive/src/test/resources/golden/archive_multi-14-2cde1a2d3bfcec814985f498eba0fb8
deleted file mode 100644
index 21b3b13a81191..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_multi-14-2cde1a2d3bfcec814985f498eba0fb8
+++ /dev/null
@@ -1 +0,0 @@
-0	3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_multi-15-c9f39b388ae698e385e092d0ffeb3c73 b/sql/hive/src/test/resources/golden/archive_multi-15-c9f39b388ae698e385e092d0ffeb3c73
deleted file mode 100644
index 5e5f6ff96623f..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_multi-15-c9f39b388ae698e385e092d0ffeb3c73
+++ /dev/null
@@ -1,9 +0,0 @@
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
-0	val_0	2008-04-08	12	0	val_0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/archive_multi-17-27895cbe0ee6d24d7fc866314491e1bb b/sql/hive/src/test/resources/golden/archive_multi-17-27895cbe0ee6d24d7fc866314491e1bb
deleted file mode 100644
index 5cd5fb9874d67..0000000000000
--- a/sql/hive/src/test/resources/golden/archive_multi-17-27895cbe0ee6d24d7fc866314491e1bb
+++ /dev/null
@@ -1 +0,0 @@
-48479881068
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/attr-0-24e06ffd262f2a5a6eec3314445d83ba b/sql/hive/src/test/resources/golden/attr-0-24e06ffd262f2a5a6eec3314445d83ba
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/attr-0-24e06ffd262f2a5a6eec3314445d83ba
+++ b/sql/hive/src/test/resources/golden/attr-0-24e06ffd262f2a5a6eec3314445d83ba
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join0-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join0-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join0-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join0-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join0-2-7bd04899197b027d81c24e45a99ad15c b/sql/hive/src/test/resources/golden/auto_join0-2-7bd04899197b027d81c24e45a99ad15c
index 308fc0924e670..8e9be7f8c620b 100644
--- a/sql/hive/src/test/resources/golden/auto_join0-2-7bd04899197b027d81c24e45a99ad15c
+++ b/sql/hive/src/test/resources/golden/auto_join0-2-7bd04899197b027d81c24e45a99ad15c
@@ -1 +1 @@
-34298511120
\ No newline at end of file
+34298511120
diff --git a/sql/hive/src/test/resources/golden/auto_join1-0-443afb71720bad780b5dbfb6dbf4b51a b/sql/hive/src/test/resources/golden/auto_join1-0-443afb71720bad780b5dbfb6dbf4b51a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join1-0-443afb71720bad780b5dbfb6dbf4b51a
+++ b/sql/hive/src/test/resources/golden/auto_join1-0-443afb71720bad780b5dbfb6dbf4b51a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join1-4-ae1247a065c41ce0329ca6078ab586e b/sql/hive/src/test/resources/golden/auto_join1-4-ae1247a065c41ce0329ca6078ab586e
index 16f90efbe50f6..d14fbdc94256c 100644
--- a/sql/hive/src/test/resources/golden/auto_join1-4-ae1247a065c41ce0329ca6078ab586e
+++ b/sql/hive/src/test/resources/golden/auto_join1-4-ae1247a065c41ce0329ca6078ab586e
@@ -1 +1 @@
-101861029915
\ No newline at end of file
+101861029915
diff --git a/sql/hive/src/test/resources/golden/auto_join10-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join10-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join10-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join10-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join10-2-eef4ee52e0783b15fb5fe17378806b13 b/sql/hive/src/test/resources/golden/auto_join10-2-eef4ee52e0783b15fb5fe17378806b13
index 16f90efbe50f6..d14fbdc94256c 100644
--- a/sql/hive/src/test/resources/golden/auto_join10-2-eef4ee52e0783b15fb5fe17378806b13
+++ b/sql/hive/src/test/resources/golden/auto_join10-2-eef4ee52e0783b15fb5fe17378806b13
@@ -1 +1 @@
-101861029915
\ No newline at end of file
+101861029915
diff --git a/sql/hive/src/test/resources/golden/auto_join11-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join11-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join11-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join11-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join11-2-5496e81f60ba1d8a95d8375589c71e05 b/sql/hive/src/test/resources/golden/auto_join11-2-5496e81f60ba1d8a95d8375589c71e05
index 69dbf8c3143e9..faeebdd1eafdf 100644
--- a/sql/hive/src/test/resources/golden/auto_join11-2-5496e81f60ba1d8a95d8375589c71e05
+++ b/sql/hive/src/test/resources/golden/auto_join11-2-5496e81f60ba1d8a95d8375589c71e05
@@ -1 +1 @@
--101339664144
\ No newline at end of file
+-101339664144
diff --git a/sql/hive/src/test/resources/golden/auto_join12-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join12-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join12-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join12-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join12-2-4df549c5f0b6bff0c843008fa35b1320 b/sql/hive/src/test/resources/golden/auto_join12-2-4df549c5f0b6bff0c843008fa35b1320
index eff107c7ce6bc..caf74830fbd05 100644
--- a/sql/hive/src/test/resources/golden/auto_join12-2-4df549c5f0b6bff0c843008fa35b1320
+++ b/sql/hive/src/test/resources/golden/auto_join12-2-4df549c5f0b6bff0c843008fa35b1320
@@ -1 +1 @@
--136852761207
\ No newline at end of file
+-136852761207
diff --git a/sql/hive/src/test/resources/golden/auto_join13-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join13-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join13-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join13-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join13-2-5ff417533a1243cd6fc556960fa170c9 b/sql/hive/src/test/resources/golden/auto_join13-2-5ff417533a1243cd6fc556960fa170c9
index de6c015da2059..e971081015515 100644
--- a/sql/hive/src/test/resources/golden/auto_join13-2-5ff417533a1243cd6fc556960fa170c9
+++ b/sql/hive/src/test/resources/golden/auto_join13-2-5ff417533a1243cd6fc556960fa170c9
@@ -1 +1 @@
--97676500536
\ No newline at end of file
+-97676500536
diff --git a/sql/hive/src/test/resources/golden/auto_join14-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join14-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join14-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join14-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join14-2-9b141c1e5917ca82c6bc36a9a2950a1e b/sql/hive/src/test/resources/golden/auto_join14-2-9b141c1e5917ca82c6bc36a9a2950a1e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join14-2-9b141c1e5917ca82c6bc36a9a2950a1e
+++ b/sql/hive/src/test/resources/golden/auto_join14-2-9b141c1e5917ca82c6bc36a9a2950a1e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join14-3-2b9ccaa793eae0e73bf76335d3d6880 b/sql/hive/src/test/resources/golden/auto_join14-3-2b9ccaa793eae0e73bf76335d3d6880
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join14-3-2b9ccaa793eae0e73bf76335d3d6880
+++ b/sql/hive/src/test/resources/golden/auto_join14-3-2b9ccaa793eae0e73bf76335d3d6880
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join14-4-bab89dfffa77258e34a595e0e79986e3 b/sql/hive/src/test/resources/golden/auto_join14-4-bab89dfffa77258e34a595e0e79986e3
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join14-4-bab89dfffa77258e34a595e0e79986e3
+++ b/sql/hive/src/test/resources/golden/auto_join14-4-bab89dfffa77258e34a595e0e79986e3
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join14-7-5b5ded1412301eae5f8f705a39e6832 b/sql/hive/src/test/resources/golden/auto_join14-7-5b5ded1412301eae5f8f705a39e6832
index f1871a4957ddb..0f27a9bde401c 100644
--- a/sql/hive/src/test/resources/golden/auto_join14-7-5b5ded1412301eae5f8f705a39e6832
+++ b/sql/hive/src/test/resources/golden/auto_join14-7-5b5ded1412301eae5f8f705a39e6832
@@ -1 +1 @@
-404554174174
\ No newline at end of file
+404554174174
diff --git a/sql/hive/src/test/resources/golden/auto_join15-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join15-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join15-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join15-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join15-2-7bf2df40dd30fb2f8c4af9a0d09e24f9 b/sql/hive/src/test/resources/golden/auto_join15-2-7bf2df40dd30fb2f8c4af9a0d09e24f9
index 006e1f82c0a47..31d87ddbd1310 100644
--- a/sql/hive/src/test/resources/golden/auto_join15-2-7bf2df40dd30fb2f8c4af9a0d09e24f9
+++ b/sql/hive/src/test/resources/golden/auto_join15-2-7bf2df40dd30fb2f8c4af9a0d09e24f9
@@ -1 +1 @@
--793937029770
\ No newline at end of file
+-793937029770
diff --git a/sql/hive/src/test/resources/golden/auto_join16-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join16-0-ce1ef910fff98f174931cc641f7cef3a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/auto_join16-0-ce1ef910fff98f174931cc641f7cef3a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/auto_join16-2-66e56dcda38eb09819ac49e47e40d125 b/sql/hive/src/test/resources/golden/auto_join16-2-66e56dcda38eb09819ac49e47e40d125
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/auto_join16-2-66e56dcda38eb09819ac49e47e40d125
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/auto_join17-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join17-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join17-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join17-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join17-4-11d706a64d44a8b0d41b290c4671c29c b/sql/hive/src/test/resources/golden/auto_join17-4-11d706a64d44a8b0d41b290c4671c29c
index 006e1f82c0a47..31d87ddbd1310 100644
--- a/sql/hive/src/test/resources/golden/auto_join17-4-11d706a64d44a8b0d41b290c4671c29c
+++ b/sql/hive/src/test/resources/golden/auto_join17-4-11d706a64d44a8b0d41b290c4671c29c
@@ -1 +1 @@
--793937029770
\ No newline at end of file
+-793937029770
diff --git a/sql/hive/src/test/resources/golden/auto_join18-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join18-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join18-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join18-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join18-2-f633ade9577c8b0e89d89124194c8d0f b/sql/hive/src/test/resources/golden/auto_join18-2-f633ade9577c8b0e89d89124194c8d0f
index 0c9b518e65ece..42af6f2e56afc 100644
--- a/sql/hive/src/test/resources/golden/auto_join18-2-f633ade9577c8b0e89d89124194c8d0f
+++ b/sql/hive/src/test/resources/golden/auto_join18-2-f633ade9577c8b0e89d89124194c8d0f
@@ -1 +1 @@
-2358131334
\ No newline at end of file
+2358131334
diff --git a/sql/hive/src/test/resources/golden/auto_join19-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join19-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join19-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join19-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join19-4-eaa70da463b92e85e1796277f016c18f b/sql/hive/src/test/resources/golden/auto_join19-4-eaa70da463b92e85e1796277f016c18f
index 795166629df40..069b64b649977 100644
--- a/sql/hive/src/test/resources/golden/auto_join19-4-eaa70da463b92e85e1796277f016c18f
+++ b/sql/hive/src/test/resources/golden/auto_join19-4-eaa70da463b92e85e1796277f016c18f
@@ -1 +1 @@
-407444119660
\ No newline at end of file
+407444119660
diff --git a/sql/hive/src/test/resources/golden/auto_join2-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join2-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join2-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join2-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join2-4-9d8144612cb3132ad9f7c8fa93586185 b/sql/hive/src/test/resources/golden/auto_join2-4-9d8144612cb3132ad9f7c8fa93586185
index 1c958900f5013..b8f473bf53aa3 100644
--- a/sql/hive/src/test/resources/golden/auto_join2-4-9d8144612cb3132ad9f7c8fa93586185
+++ b/sql/hive/src/test/resources/golden/auto_join2-4-9d8144612cb3132ad9f7c8fa93586185
@@ -1 +1 @@
-33815990627
\ No newline at end of file
+33815990627
diff --git a/sql/hive/src/test/resources/golden/auto_join20-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join20-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join20-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join20-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join20-2-903ee25e327188edaaf2040fec5a8e52 b/sql/hive/src/test/resources/golden/auto_join20-2-903ee25e327188edaaf2040fec5a8e52
index b1a6075f768c8..6b72a1789f57d 100644
--- a/sql/hive/src/test/resources/golden/auto_join20-2-903ee25e327188edaaf2040fec5a8e52
+++ b/sql/hive/src/test/resources/golden/auto_join20-2-903ee25e327188edaaf2040fec5a8e52
@@ -1 +1 @@
--24276731469
\ No newline at end of file
+-24276731469
diff --git a/sql/hive/src/test/resources/golden/auto_join20-4-e48e08d5b94719d56a58284eaef757f2 b/sql/hive/src/test/resources/golden/auto_join20-4-e48e08d5b94719d56a58284eaef757f2
index b1a6075f768c8..6b72a1789f57d 100644
--- a/sql/hive/src/test/resources/golden/auto_join20-4-e48e08d5b94719d56a58284eaef757f2
+++ b/sql/hive/src/test/resources/golden/auto_join20-4-e48e08d5b94719d56a58284eaef757f2
@@ -1 +1 @@
--24276731469
\ No newline at end of file
+-24276731469
diff --git a/sql/hive/src/test/resources/golden/auto_join21-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join21-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join21-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join21-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join21-2-3536b7d78713e86ee67f5f6c9b88958f b/sql/hive/src/test/resources/golden/auto_join21-2-3536b7d78713e86ee67f5f6c9b88958f
index 9672e21fa0323..80c230cf4a09d 100644
--- a/sql/hive/src/test/resources/golden/auto_join21-2-3536b7d78713e86ee67f5f6c9b88958f
+++ b/sql/hive/src/test/resources/golden/auto_join21-2-3536b7d78713e86ee67f5f6c9b88958f
@@ -497,4 +497,4 @@ NULL	NULL	NULL	NULL	496	val_496
 NULL	NULL	NULL	NULL	497	val_497
 NULL	NULL	NULL	NULL	498	val_498
 NULL	NULL	NULL	NULL	498	val_498
-NULL	NULL	NULL	NULL	498	val_498
\ No newline at end of file
+NULL	NULL	NULL	NULL	498	val_498
diff --git a/sql/hive/src/test/resources/golden/auto_join22-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join22-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join22-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join22-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join22-2-a4abc288c20edee53ede45d248cf3abb b/sql/hive/src/test/resources/golden/auto_join22-2-a4abc288c20edee53ede45d248cf3abb
index dba3bca53f72d..79162030a2043 100644
--- a/sql/hive/src/test/resources/golden/auto_join22-2-a4abc288c20edee53ede45d248cf3abb
+++ b/sql/hive/src/test/resources/golden/auto_join22-2-a4abc288c20edee53ede45d248cf3abb
@@ -1 +1 @@
-344337359100
\ No newline at end of file
+344337359100
diff --git a/sql/hive/src/test/resources/golden/auto_join23-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join23-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join23-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join23-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join23-2-6d2c5b58222f31658a0cf957e093a150 b/sql/hive/src/test/resources/golden/auto_join23-2-6d2c5b58222f31658a0cf957e093a150
index 5707ed08e7e54..13e88f30fc08c 100644
--- a/sql/hive/src/test/resources/golden/auto_join23-2-6d2c5b58222f31658a0cf957e093a150
+++ b/sql/hive/src/test/resources/golden/auto_join23-2-6d2c5b58222f31658a0cf957e093a150
@@ -97,4 +97,4 @@
 9	val_9	5	val_5
 9	val_9	5	val_5
 9	val_9	8	val_8
-9	val_9	9	val_9
\ No newline at end of file
+9	val_9	9	val_9
diff --git a/sql/hive/src/test/resources/golden/auto_join24-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join24-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join24-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join24-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join24-4-d79325ef6494aa87843fdfd78de7c812 b/sql/hive/src/test/resources/golden/auto_join24-4-d79325ef6494aa87843fdfd78de7c812
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/auto_join24-4-d79325ef6494aa87843fdfd78de7c812
+++ b/sql/hive/src/test/resources/golden/auto_join24-4-d79325ef6494aa87843fdfd78de7c812
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/auto_join26-1-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join26-1-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join26-1-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join26-1-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join26-4-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/auto_join26-4-c9859bf9c9942c59f3b19d82bd1a3afa
index 71094ee7360db..16b313fc58f23 100644
--- a/sql/hive/src/test/resources/golden/auto_join26-4-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/auto_join26-4-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -12,4 +12,4 @@
 311	3
 369	3
 401	5
-406	4
\ No newline at end of file
+406	4
diff --git a/sql/hive/src/test/resources/golden/auto_join27-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join27-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join27-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join27-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join27-2-fceaa1ebd63334061d2d8daf961e935e b/sql/hive/src/test/resources/golden/auto_join27-2-fceaa1ebd63334061d2d8daf961e935e
index dd35c6b71fc80..a16667d785bf8 100644
--- a/sql/hive/src/test/resources/golden/auto_join27-2-fceaa1ebd63334061d2d8daf961e935e
+++ b/sql/hive/src/test/resources/golden/auto_join27-2-fceaa1ebd63334061d2d8daf961e935e
@@ -1 +1 @@
-548
\ No newline at end of file
+548
diff --git a/sql/hive/src/test/resources/golden/auto_join28-0-10a2c01dccc8980fe6aff1f9dd65042c b/sql/hive/src/test/resources/golden/auto_join28-0-10a2c01dccc8980fe6aff1f9dd65042c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join28-0-10a2c01dccc8980fe6aff1f9dd65042c
+++ b/sql/hive/src/test/resources/golden/auto_join28-0-10a2c01dccc8980fe6aff1f9dd65042c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join28-1-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join28-1-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join28-1-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join28-1-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join3-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join3-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join3-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join3-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join3-4-eaa70da463b92e85e1796277f016c18f b/sql/hive/src/test/resources/golden/auto_join3-4-eaa70da463b92e85e1796277f016c18f
index 1434bb76ee93f..3afe52bf4b3ae 100644
--- a/sql/hive/src/test/resources/golden/auto_join3-4-eaa70da463b92e85e1796277f016c18f
+++ b/sql/hive/src/test/resources/golden/auto_join3-4-eaa70da463b92e85e1796277f016c18f
@@ -1 +1 @@
-344360994461
\ No newline at end of file
+344360994461
diff --git a/sql/hive/src/test/resources/golden/auto_join30-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join30-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join30-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join30-10-820f36ed1bdf14c1deb383f508a5ed7a b/sql/hive/src/test/resources/golden/auto_join30-10-820f36ed1bdf14c1deb383f508a5ed7a
index 1434bb76ee93f..3afe52bf4b3ae 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-10-820f36ed1bdf14c1deb383f508a5ed7a
+++ b/sql/hive/src/test/resources/golden/auto_join30-10-820f36ed1bdf14c1deb383f508a5ed7a
@@ -1 +1 @@
-344360994461
\ No newline at end of file
+344360994461
diff --git a/sql/hive/src/test/resources/golden/auto_join30-12-de6e26f52f6f9ea5ef9a4868e57d36d b/sql/hive/src/test/resources/golden/auto_join30-12-de6e26f52f6f9ea5ef9a4868e57d36d
index 1434bb76ee93f..3afe52bf4b3ae 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-12-de6e26f52f6f9ea5ef9a4868e57d36d
+++ b/sql/hive/src/test/resources/golden/auto_join30-12-de6e26f52f6f9ea5ef9a4868e57d36d
@@ -1 +1 @@
-344360994461
\ No newline at end of file
+344360994461
diff --git a/sql/hive/src/test/resources/golden/auto_join30-14-7a248488c218919ab50e072fdbdecb73 b/sql/hive/src/test/resources/golden/auto_join30-14-7a248488c218919ab50e072fdbdecb73
index 1434bb76ee93f..3afe52bf4b3ae 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-14-7a248488c218919ab50e072fdbdecb73
+++ b/sql/hive/src/test/resources/golden/auto_join30-14-7a248488c218919ab50e072fdbdecb73
@@ -1 +1 @@
-344360994461
\ No newline at end of file
+344360994461
diff --git a/sql/hive/src/test/resources/golden/auto_join30-16-f4f5bc179d84baf57e14cd2f8bd39436 b/sql/hive/src/test/resources/golden/auto_join30-16-f4f5bc179d84baf57e14cd2f8bd39436
index 1434bb76ee93f..3afe52bf4b3ae 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-16-f4f5bc179d84baf57e14cd2f8bd39436
+++ b/sql/hive/src/test/resources/golden/auto_join30-16-f4f5bc179d84baf57e14cd2f8bd39436
@@ -1 +1 @@
-344360994461
\ No newline at end of file
+344360994461
diff --git a/sql/hive/src/test/resources/golden/auto_join30-2-bc472f95600f47d5ea60fdeddc59dbc7 b/sql/hive/src/test/resources/golden/auto_join30-2-bc472f95600f47d5ea60fdeddc59dbc7
index 16f90efbe50f6..d14fbdc94256c 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-2-bc472f95600f47d5ea60fdeddc59dbc7
+++ b/sql/hive/src/test/resources/golden/auto_join30-2-bc472f95600f47d5ea60fdeddc59dbc7
@@ -1 +1 @@
-101861029915
\ No newline at end of file
+101861029915
diff --git a/sql/hive/src/test/resources/golden/auto_join30-4-f5083eca9c3df277988d8b345b8d43 b/sql/hive/src/test/resources/golden/auto_join30-4-f5083eca9c3df277988d8b345b8d43
index 16f90efbe50f6..d14fbdc94256c 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-4-f5083eca9c3df277988d8b345b8d43
+++ b/sql/hive/src/test/resources/golden/auto_join30-4-f5083eca9c3df277988d8b345b8d43
@@ -1 +1 @@
-101861029915
\ No newline at end of file
+101861029915
diff --git a/sql/hive/src/test/resources/golden/auto_join30-6-4a9144326fc7d066c9aadb13d1b95031 b/sql/hive/src/test/resources/golden/auto_join30-6-4a9144326fc7d066c9aadb13d1b95031
index 16f90efbe50f6..d14fbdc94256c 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-6-4a9144326fc7d066c9aadb13d1b95031
+++ b/sql/hive/src/test/resources/golden/auto_join30-6-4a9144326fc7d066c9aadb13d1b95031
@@ -1 +1 @@
-101861029915
\ No newline at end of file
+101861029915
diff --git a/sql/hive/src/test/resources/golden/auto_join30-8-8a27209399df7f9c4d15988b11753a61 b/sql/hive/src/test/resources/golden/auto_join30-8-8a27209399df7f9c4d15988b11753a61
index 1434bb76ee93f..3afe52bf4b3ae 100644
--- a/sql/hive/src/test/resources/golden/auto_join30-8-8a27209399df7f9c4d15988b11753a61
+++ b/sql/hive/src/test/resources/golden/auto_join30-8-8a27209399df7f9c4d15988b11753a61
@@ -1 +1 @@
-344360994461
\ No newline at end of file
+344360994461
diff --git a/sql/hive/src/test/resources/golden/auto_join31-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join31-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join31-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join31-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join31-2-a64aa9cb44edc6b85ed945fb13ca9c2 b/sql/hive/src/test/resources/golden/auto_join31-2-a64aa9cb44edc6b85ed945fb13ca9c2
index 1434bb76ee93f..3afe52bf4b3ae 100644
--- a/sql/hive/src/test/resources/golden/auto_join31-2-a64aa9cb44edc6b85ed945fb13ca9c2
+++ b/sql/hive/src/test/resources/golden/auto_join31-2-a64aa9cb44edc6b85ed945fb13ca9c2
@@ -1 +1 @@
-344360994461
\ No newline at end of file
+344360994461
diff --git a/sql/hive/src/test/resources/golden/auto_join32-0-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_join32-0-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join32-0-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_join32-0-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-2-c5a30be03ba36f1fb6cc0b4e7c978838 b/sql/hive/src/test/resources/golden/auto_join32-12-4a7d51ed5c1d98c518ea74f73c6c7d6c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-2-c5a30be03ba36f1fb6cc0b4e7c978838
rename to sql/hive/src/test/resources/golden/auto_join32-12-4a7d51ed5c1d98c518ea74f73c6c7d6c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-4-bb969d3ec0038215a2698afceeb02b3a b/sql/hive/src/test/resources/golden/auto_join32-13-ee2dcaae78ae900ffce8d19fbadc3735
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-4-bb969d3ec0038215a2698afceeb02b3a
rename to sql/hive/src/test/resources/golden/auto_join32-13-ee2dcaae78ae900ffce8d19fbadc3735
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-5-2c3617157639fcd296a8ea2f121c58ab b/sql/hive/src/test/resources/golden/auto_join32-14-7927c2ce644d1ce1de251405c8563e99
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-5-2c3617157639fcd296a8ea2f121c58ab
rename to sql/hive/src/test/resources/golden/auto_join32-14-7927c2ce644d1ce1de251405c8563e99
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-6-307339322d96b8f923d57c0dc9cdcb60 b/sql/hive/src/test/resources/golden/auto_join32-15-4cd3b51861720ac06c6deb818c83670
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-6-307339322d96b8f923d57c0dc9cdcb60
rename to sql/hive/src/test/resources/golden/auto_join32-15-4cd3b51861720ac06c6deb818c83670
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-7-94cc219f61413ab321916821e1288152 b/sql/hive/src/test/resources/golden/auto_join32-20-693874ea8e06a8b155394ac27af2b1a7
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-7-94cc219f61413ab321916821e1288152
rename to sql/hive/src/test/resources/golden/auto_join32-20-693874ea8e06a8b155394ac27af2b1a7
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-8-310c8d652c6f549b7759baec6012b77d b/sql/hive/src/test/resources/golden/auto_join32-21-bf8a1bb0baaae9fbf1c3aa656f991f42
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-8-310c8d652c6f549b7759baec6012b77d
rename to sql/hive/src/test/resources/golden/auto_join32-21-bf8a1bb0baaae9fbf1c3aa656f991f42
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-9-b806b5b4eb8a703b2ba43afdce4d0bd5 b/sql/hive/src/test/resources/golden/auto_join32-22-3d14d63e996851f51a98f987995d8da6
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_11-9-b806b5b4eb8a703b2ba43afdce4d0bd5
rename to sql/hive/src/test/resources/golden/auto_join32-22-3d14d63e996851f51a98f987995d8da6
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-1-e3951e29e1e87b77ea735f40fd58735 b/sql/hive/src/test/resources/golden/auto_join32-23-8b183ec2c164b3b530e802ffc880a5fa
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-1-e3951e29e1e87b77ea735f40fd58735
rename to sql/hive/src/test/resources/golden/auto_join32-23-8b183ec2c164b3b530e802ffc880a5fa
diff --git a/sql/hive/src/test/resources/golden/auto_join32-5-c23ea191ee4d60c0a6252ce763b1beed b/sql/hive/src/test/resources/golden/auto_join32-5-c23ea191ee4d60c0a6252ce763b1beed
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join32-5-c23ea191ee4d60c0a6252ce763b1beed
+++ b/sql/hive/src/test/resources/golden/auto_join32-5-c23ea191ee4d60c0a6252ce763b1beed
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join32-6-442e69416faaea9309bb8c2a3eb73ef b/sql/hive/src/test/resources/golden/auto_join32-6-442e69416faaea9309bb8c2a3eb73ef
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join32-6-442e69416faaea9309bb8c2a3eb73ef
+++ b/sql/hive/src/test/resources/golden/auto_join32-6-442e69416faaea9309bb8c2a3eb73ef
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join32-7-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_join32-7-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join32-7-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_join32-7-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join4-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join4-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join4-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join4-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join4-4-998c3a307b074a6505bb7fcef276be04 b/sql/hive/src/test/resources/golden/auto_join4-4-998c3a307b074a6505bb7fcef276be04
index f7d1e92d77207..9e503cc0fbeb9 100644
--- a/sql/hive/src/test/resources/golden/auto_join4-4-998c3a307b074a6505bb7fcef276be04
+++ b/sql/hive/src/test/resources/golden/auto_join4-4-998c3a307b074a6505bb7fcef276be04
@@ -1 +1 @@
-5079148035
\ No newline at end of file
+5079148035
diff --git a/sql/hive/src/test/resources/golden/auto_join5-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join5-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join5-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join5-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join5-4-998c3a307b074a6505bb7fcef276be04 b/sql/hive/src/test/resources/golden/auto_join5-4-998c3a307b074a6505bb7fcef276be04
index e859b7c4ada7b..1b298efae95b9 100644
--- a/sql/hive/src/test/resources/golden/auto_join5-4-998c3a307b074a6505bb7fcef276be04
+++ b/sql/hive/src/test/resources/golden/auto_join5-4-998c3a307b074a6505bb7fcef276be04
@@ -1 +1 @@
-9766083196
\ No newline at end of file
+9766083196
diff --git a/sql/hive/src/test/resources/golden/auto_join6-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join6-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join6-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join6-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join6-4-998c3a307b074a6505bb7fcef276be04 b/sql/hive/src/test/resources/golden/auto_join6-4-998c3a307b074a6505bb7fcef276be04
index f15ab2fb14eaa..1e9e1b54b2074 100644
--- a/sql/hive/src/test/resources/golden/auto_join6-4-998c3a307b074a6505bb7fcef276be04
+++ b/sql/hive/src/test/resources/golden/auto_join6-4-998c3a307b074a6505bb7fcef276be04
@@ -1 +1 @@
-2607643291
\ No newline at end of file
+2607643291
diff --git a/sql/hive/src/test/resources/golden/auto_join7-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join7-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join7-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join7-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join7-4-30d0c1a49784347fedbac21a69c3a899 b/sql/hive/src/test/resources/golden/auto_join7-4-30d0c1a49784347fedbac21a69c3a899
index 5f7f06c079d24..5ac39f668df1c 100644
--- a/sql/hive/src/test/resources/golden/auto_join7-4-30d0c1a49784347fedbac21a69c3a899
+++ b/sql/hive/src/test/resources/golden/auto_join7-4-30d0c1a49784347fedbac21a69c3a899
@@ -1 +1 @@
--2315698213
\ No newline at end of file
+-2315698213
diff --git a/sql/hive/src/test/resources/golden/auto_join9-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join9-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join9-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join9-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join9-4-eaa70da463b92e85e1796277f016c18f b/sql/hive/src/test/resources/golden/auto_join9-4-eaa70da463b92e85e1796277f016c18f
index 16f90efbe50f6..d14fbdc94256c 100644
--- a/sql/hive/src/test/resources/golden/auto_join9-4-eaa70da463b92e85e1796277f016c18f
+++ b/sql/hive/src/test/resources/golden/auto_join9-4-eaa70da463b92e85e1796277f016c18f
@@ -1 +1 @@
-101861029915
\ No newline at end of file
+101861029915
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-10-9666fb18356436e2800550df9ca90c04 b/sql/hive/src/test/resources/golden/auto_join_filters-2-bee6095f42de6a16708c2f9addc1b9bd
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-10-9666fb18356436e2800550df9ca90c04
rename to sql/hive/src/test/resources/golden/auto_join_filters-2-bee6095f42de6a16708c2f9addc1b9bd
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-11-a54cefeeb6d79c72f01c61035e9dcf15 b/sql/hive/src/test/resources/golden/auto_join_filters-31-268d8fb3cb9b04eb269fe7ec40a24dfe
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-11-a54cefeeb6d79c72f01c61035e9dcf15
rename to sql/hive/src/test/resources/golden/auto_join_filters-31-268d8fb3cb9b04eb269fe7ec40a24dfe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-17-a8c60901367229310f86a8521a26478a b/sql/hive/src/test/resources/golden/auto_join_filters-32-6dc6866a65c74d69538b776b41b06c16
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-17-a8c60901367229310f86a8521a26478a
rename to sql/hive/src/test/resources/golden/auto_join_filters-32-6dc6866a65c74d69538b776b41b06c16
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-18-f50f21c997d775c369fd52f9bafb9b36 b/sql/hive/src/test/resources/golden/auto_join_filters-33-e884480a0f7273d3e2f2de2ba46b855c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-18-f50f21c997d775c369fd52f9bafb9b36
rename to sql/hive/src/test/resources/golden/auto_join_filters-33-e884480a0f7273d3e2f2de2ba46b855c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-19-fe49b6f198661d2e020a0c8bd26c9237 b/sql/hive/src/test/resources/golden/auto_join_filters-34-98fd86aea9cacaa82d43c7468109dd33
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-19-fe49b6f198661d2e020a0c8bd26c9237
rename to sql/hive/src/test/resources/golden/auto_join_filters-34-98fd86aea9cacaa82d43c7468109dd33
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-0-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/auto_join_nulls-0-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-0-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-0-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-10-ad7e37acbc658b5a822ca342fd4b9d01 b/sql/hive/src/test/resources/golden/auto_join_nulls-10-ad7e37acbc658b5a822ca342fd4b9d01
index b201b9c4d9348..acf9ebdfa9579 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-10-ad7e37acbc658b5a822ca342fd4b9d01
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-10-ad7e37acbc658b5a822ca342fd4b9d01
@@ -1 +1 @@
-4542003
\ No newline at end of file
+4542003
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-11-141c550a314d15c4e200e5baeb246de2 b/sql/hive/src/test/resources/golden/auto_join_nulls-11-141c550a314d15c4e200e5baeb246de2
index d365cdf04366c..4380aa676ba67 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-11-141c550a314d15c4e200e5baeb246de2
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-11-141c550a314d15c4e200e5baeb246de2
@@ -1 +1 @@
-4542038
\ No newline at end of file
+4542038
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-12-8a65225a88da0169af26848c06cb981c b/sql/hive/src/test/resources/golden/auto_join_nulls-12-8a65225a88da0169af26848c06cb981c
index bc7bcdca25bfb..359888de9ce3c 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-12-8a65225a88da0169af26848c06cb981c
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-12-8a65225a88da0169af26848c06cb981c
@@ -1 +1 @@
-4543491
\ No newline at end of file
+4543491
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-13-e9c3ae95d7edd0c311c7d57e4cebdc80 b/sql/hive/src/test/resources/golden/auto_join_nulls-13-e9c3ae95d7edd0c311c7d57e4cebdc80
index b201b9c4d9348..acf9ebdfa9579 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-13-e9c3ae95d7edd0c311c7d57e4cebdc80
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-13-e9c3ae95d7edd0c311c7d57e4cebdc80
@@ -1 +1 @@
-4542003
\ No newline at end of file
+4542003
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-14-2be327f5d98b6ca8a45a6e1d97948ec8 b/sql/hive/src/test/resources/golden/auto_join_nulls-14-2be327f5d98b6ca8a45a6e1d97948ec8
index feea6ee0a8e0d..3b196ba0b9f87 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-14-2be327f5d98b6ca8a45a6e1d97948ec8
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-14-2be327f5d98b6ca8a45a6e1d97948ec8
@@ -1 +1 @@
-3079923
\ No newline at end of file
+3079923
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-15-d2ff8e87c24e152107bba1ebf659d0c8 b/sql/hive/src/test/resources/golden/auto_join_nulls-15-d2ff8e87c24e152107bba1ebf659d0c8
index f713b04028bbd..dba80cf2f3b4b 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-15-d2ff8e87c24e152107bba1ebf659d0c8
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-15-d2ff8e87c24e152107bba1ebf659d0c8
@@ -1 +1 @@
-4509891
\ No newline at end of file
+4509891
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-16-dbe244d2c21e477c3703c4ce1903e8af b/sql/hive/src/test/resources/golden/auto_join_nulls-16-dbe244d2c21e477c3703c4ce1903e8af
index a94eda6b2c374..7e29fae3a0aa6 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-16-dbe244d2c21e477c3703c4ce1903e8af
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-16-dbe244d2c21e477c3703c4ce1903e8af
@@ -1 +1 @@
-3113558
\ No newline at end of file
+3113558
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-17-f3cf64fcd82d5f33d249ed64bfc13621 b/sql/hive/src/test/resources/golden/auto_join_nulls-17-f3cf64fcd82d5f33d249ed64bfc13621
index feea6ee0a8e0d..3b196ba0b9f87 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-17-f3cf64fcd82d5f33d249ed64bfc13621
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-17-f3cf64fcd82d5f33d249ed64bfc13621
@@ -1 +1 @@
-3079923
\ No newline at end of file
+3079923
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-18-439a409bc50dfd86dee78c151c3de5eb b/sql/hive/src/test/resources/golden/auto_join_nulls-18-439a409bc50dfd86dee78c151c3de5eb
index 88c5f95e0d838..a4231499b4e56 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-18-439a409bc50dfd86dee78c151c3de5eb
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-18-439a409bc50dfd86dee78c151c3de5eb
@@ -1 +1 @@
-4543526
\ No newline at end of file
+4543526
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-19-92641e46934ebbf3d44e6e60de1882f4 b/sql/hive/src/test/resources/golden/auto_join_nulls-19-92641e46934ebbf3d44e6e60de1882f4
index 88c5f95e0d838..a4231499b4e56 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-19-92641e46934ebbf3d44e6e60de1882f4
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-19-92641e46934ebbf3d44e6e60de1882f4
@@ -1 +1 @@
-4543526
\ No newline at end of file
+4543526
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-2-c5a30be03ba36f1fb6cc0b4e7c978838 b/sql/hive/src/test/resources/golden/auto_join_nulls-2-75b1f5331b62fedb7dbbe6ac93a3c83f
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-2-c5a30be03ba36f1fb6cc0b4e7c978838
rename to sql/hive/src/test/resources/golden/auto_join_nulls-2-75b1f5331b62fedb7dbbe6ac93a3c83f
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-20-e34b2b210059a5f93c0a873d91859b5d b/sql/hive/src/test/resources/golden/auto_join_nulls-20-e34b2b210059a5f93c0a873d91859b5d
index 88c5f95e0d838..a4231499b4e56 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-20-e34b2b210059a5f93c0a873d91859b5d
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-20-e34b2b210059a5f93c0a873d91859b5d
@@ -1 +1 @@
-4543526
\ No newline at end of file
+4543526
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-21-c7aaa831acbd959c6d1793056e3c288a b/sql/hive/src/test/resources/golden/auto_join_nulls-21-c7aaa831acbd959c6d1793056e3c288a
index 88c5f95e0d838..a4231499b4e56 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-21-c7aaa831acbd959c6d1793056e3c288a
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-21-c7aaa831acbd959c6d1793056e3c288a
@@ -1 +1 @@
-4543526
\ No newline at end of file
+4543526
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-22-f5beafb0c5ed59a1852811c9710fe9a3 b/sql/hive/src/test/resources/golden/auto_join_nulls-22-f5beafb0c5ed59a1852811c9710fe9a3
index f2ec932ce57f4..2fa702a938a45 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-22-f5beafb0c5ed59a1852811c9710fe9a3
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-22-f5beafb0c5ed59a1852811c9710fe9a3
@@ -1 +1 @@
-3112070
\ No newline at end of file
+3112070
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-23-5805a41289a26139c06604a40bf5a6fa b/sql/hive/src/test/resources/golden/auto_join_nulls-23-5805a41289a26139c06604a40bf5a6fa
index a94eda6b2c374..7e29fae3a0aa6 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-23-5805a41289a26139c06604a40bf5a6fa
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-23-5805a41289a26139c06604a40bf5a6fa
@@ -1 +1 @@
-3113558
\ No newline at end of file
+3113558
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-24-80991af26d5d37e0864ecc2c8ab0b984 b/sql/hive/src/test/resources/golden/auto_join_nulls-24-80991af26d5d37e0864ecc2c8ab0b984
index f2ec932ce57f4..2fa702a938a45 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-24-80991af26d5d37e0864ecc2c8ab0b984
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-24-80991af26d5d37e0864ecc2c8ab0b984
@@ -1 +1 @@
-3112070
\ No newline at end of file
+3112070
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-3-f0befc0275bda075e4f3cd61eafcccc7 b/sql/hive/src/test/resources/golden/auto_join_nulls-3-f0befc0275bda075e4f3cd61eafcccc7
index 4125efd2dd065..27994c451682a 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-3-f0befc0275bda075e4f3cd61eafcccc7
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-3-f0befc0275bda075e4f3cd61eafcccc7
@@ -1 +1 @@
-13630578
\ No newline at end of file
+13630578
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-4-fc1128c86cd430db8cd4ff834be4562 b/sql/hive/src/test/resources/golden/auto_join_nulls-4-fc1128c86cd430db8cd4ff834be4562
index 4125efd2dd065..27994c451682a 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-4-fc1128c86cd430db8cd4ff834be4562
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-4-fc1128c86cd430db8cd4ff834be4562
@@ -1 +1 @@
-13630578
\ No newline at end of file
+13630578
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-5-2b5f38b7537ed5c40c0ad478b08fc1fc b/sql/hive/src/test/resources/golden/auto_join_nulls-5-2b5f38b7537ed5c40c0ad478b08fc1fc
index 4125efd2dd065..27994c451682a 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-5-2b5f38b7537ed5c40c0ad478b08fc1fc
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-5-2b5f38b7537ed5c40c0ad478b08fc1fc
@@ -1 +1 @@
-13630578
\ No newline at end of file
+13630578
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-6-d256ec23d7b98e1517cacf5dae2f4124 b/sql/hive/src/test/resources/golden/auto_join_nulls-6-d256ec23d7b98e1517cacf5dae2f4124
index e877d44372ecb..476d8eeee571a 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-6-d256ec23d7b98e1517cacf5dae2f4124
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-6-d256ec23d7b98e1517cacf5dae2f4124
@@ -1 +1 @@
-3078400
\ No newline at end of file
+3078400
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-7-8395fa78507105c2a018e88f717b95e2 b/sql/hive/src/test/resources/golden/auto_join_nulls-7-8395fa78507105c2a018e88f717b95e2
index 18be36a9bdb54..935eec97c5601 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-7-8395fa78507105c2a018e88f717b95e2
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-7-8395fa78507105c2a018e88f717b95e2
@@ -1 +1 @@
-4509856
\ No newline at end of file
+4509856
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-8-fd992f2127a139aeb554d797e748ed54 b/sql/hive/src/test/resources/golden/auto_join_nulls-8-fd992f2127a139aeb554d797e748ed54
index f2ec932ce57f4..2fa702a938a45 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-8-fd992f2127a139aeb554d797e748ed54
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-8-fd992f2127a139aeb554d797e748ed54
@@ -1 +1 @@
-3112070
\ No newline at end of file
+3112070
diff --git a/sql/hive/src/test/resources/golden/auto_join_nulls-9-e3a86622a437e910b7225d1e6108da9e b/sql/hive/src/test/resources/golden/auto_join_nulls-9-e3a86622a437e910b7225d1e6108da9e
index e877d44372ecb..476d8eeee571a 100644
--- a/sql/hive/src/test/resources/golden/auto_join_nulls-9-e3a86622a437e910b7225d1e6108da9e
+++ b/sql/hive/src/test/resources/golden/auto_join_nulls-9-e3a86622a437e910b7225d1e6108da9e
@@ -1 +1 @@
-3078400
\ No newline at end of file
+3078400
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-4-bb969d3ec0038215a2698afceeb02b3a b/sql/hive/src/test/resources/golden/auto_join_reordering_values-1-2bfb628930d072124636d21d82e3b462
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-4-bb969d3ec0038215a2698afceeb02b3a
rename to sql/hive/src/test/resources/golden/auto_join_reordering_values-1-2bfb628930d072124636d21d82e3b462
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-5-2c3617157639fcd296a8ea2f121c58ab b/sql/hive/src/test/resources/golden/auto_join_reordering_values-4-11af6838bb9e04152c2f9a7e2044abe0
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-5-2c3617157639fcd296a8ea2f121c58ab
rename to sql/hive/src/test/resources/golden/auto_join_reordering_values-4-11af6838bb9e04152c2f9a7e2044abe0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-6-307339322d96b8f923d57c0dc9cdcb60 b/sql/hive/src/test/resources/golden/auto_join_reordering_values-7-99fcaa5203ed3debb52c9086028dc8c2
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-6-307339322d96b8f923d57c0dc9cdcb60
rename to sql/hive/src/test/resources/golden/auto_join_reordering_values-7-99fcaa5203ed3debb52c9086028dc8c2
diff --git a/sql/hive/src/test/resources/golden/auto_join_reordering_values-8-950af86c321a67ab3ed0fa5b63ea6aed b/sql/hive/src/test/resources/golden/auto_join_reordering_values-8-950af86c321a67ab3ed0fa5b63ea6aed
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_join_reordering_values-8-950af86c321a67ab3ed0fa5b63ea6aed
+++ b/sql/hive/src/test/resources/golden/auto_join_reordering_values-8-950af86c321a67ab3ed0fa5b63ea6aed
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-10-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-10-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-10-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-10-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-12-94538bc7322522a5534cafc0551d2189 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-12-94538bc7322522a5534cafc0551d2189
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-12-94538bc7322522a5534cafc0551d2189
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-12-94538bc7322522a5534cafc0551d2189
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-14-d5183dfa8d9fb9175478fb1c2f2edb97 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-14-d5183dfa8d9fb9175478fb1c2f2edb97
index 62f9457511f87..1e8b314962144 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-14-d5183dfa8d9fb9175478fb1c2f2edb97
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-14-d5183dfa8d9fb9175478fb1c2f2edb97
@@ -1 +1 @@
-6
\ No newline at end of file
+6
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-16-2798f20aaf0fe5505c34b118e4b10bc5 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-16-2798f20aaf0fe5505c34b118e4b10bc5
index 2eafac63a9a98..2a51623eae15c 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-16-2798f20aaf0fe5505c34b118e4b10bc5
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-16-2798f20aaf0fe5505c34b118e4b10bc5
@@ -3,4 +3,4 @@
 4	1	1
 5	9	9
 8	1	1
-9	1	1
\ No newline at end of file
+9	1	1
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-18-21269869cd3aaf4ade2170d9017de018 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-18-21269869cd3aaf4ade2170d9017de018
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-18-21269869cd3aaf4ade2170d9017de018
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-18-21269869cd3aaf4ade2170d9017de018
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-20-4e0e8cd0626a84b21ca7d2f633623578 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-20-4e0e8cd0626a84b21ca7d2f633623578
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-20-4e0e8cd0626a84b21ca7d2f633623578
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-20-4e0e8cd0626a84b21ca7d2f633623578
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-22-2fe7b834b341bf18e36cd79dd00ec16a b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-22-2fe7b834b341bf18e36cd79dd00ec16a
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-22-2fe7b834b341bf18e36cd79dd00ec16a
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-22-2fe7b834b341bf18e36cd79dd00ec16a
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-24-43ba2c72db9db1ec18d835ec978f8da1 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-24-43ba2c72db9db1ec18d835ec978f8da1
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-24-43ba2c72db9db1ec18d835ec978f8da1
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-24-43ba2c72db9db1ec18d835ec978f8da1
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-26-b66c416fdd98d76981f19e9c14b6a562 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-26-b66c416fdd98d76981f19e9c14b6a562
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-26-b66c416fdd98d76981f19e9c14b6a562
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-26-b66c416fdd98d76981f19e9c14b6a562
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-28-b889b147255231f7fe44bd57e1f8ba66 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-28-b889b147255231f7fe44bd57e1f8ba66
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-28-b889b147255231f7fe44bd57e1f8ba66
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-28-b889b147255231f7fe44bd57e1f8ba66
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-30-b9d66e78b8898a97a42d1118300fa0ce b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-30-b9d66e78b8898a97a42d1118300fa0ce
index 2ebc6516c7df1..f6b91e0e1f8dd 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-30-b9d66e78b8898a97a42d1118300fa0ce
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-30-b9d66e78b8898a97a42d1118300fa0ce
@@ -1 +1 @@
-56
\ No newline at end of file
+56
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-32-b0ca9e20cd48457e6cf1c313d5505213 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-32-b0ca9e20cd48457e6cf1c313d5505213
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-32-b0ca9e20cd48457e6cf1c313d5505213
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-32-b0ca9e20cd48457e6cf1c313d5505213
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-37-a45927057c01fd54818b5dd50e77f60e b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-37-a45927057c01fd54818b5dd50e77f60e
index 3d2e6576f591f..f892bae472dff 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-37-a45927057c01fd54818b5dd50e77f60e
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-37-a45927057c01fd54818b5dd50e77f60e
@@ -19,4 +19,4 @@
 5	val_5
 5	val_5
 8	val_8
-9	val_9
\ No newline at end of file
+9	val_9
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-38-a988727daa49fb5e190f81c027bb7005 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-38-a988727daa49fb5e190f81c027bb7005
index 34d56da297220..74ff4beddf949 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-38-a988727daa49fb5e190f81c027bb7005
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-38-a988727daa49fb5e190f81c027bb7005
@@ -19,4 +19,4 @@
 5	val_5	val_5
 5	val_5	val_5
 8	val_8	val_8
-9	val_9	val_9
\ No newline at end of file
+9	val_9	val_9
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-43-a45927057c01fd54818b5dd50e77f60e b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-43-a45927057c01fd54818b5dd50e77f60e
index 3d2e6576f591f..f892bae472dff 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-43-a45927057c01fd54818b5dd50e77f60e
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-43-a45927057c01fd54818b5dd50e77f60e
@@ -19,4 +19,4 @@
 5	val_5
 5	val_5
 8	val_8
-9	val_9
\ No newline at end of file
+9	val_9
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-44-48b32dd521ddf1af1c8075ecbeccaa75 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-44-48b32dd521ddf1af1c8075ecbeccaa75
index 4a9735f855f96..ec7496a567609 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-44-48b32dd521ddf1af1c8075ecbeccaa75
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-44-48b32dd521ddf1af1c8075ecbeccaa75
@@ -3,4 +3,4 @@
 4	1
 5	9
 8	1
-9	1
\ No newline at end of file
+9	1
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-7-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-7-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-7-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-7-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-8-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-8-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-8-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-8-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-9-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-9-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-9-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/auto_smb_mapjoin_14-9-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-7-94cc219f61413ab321916821e1288152 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-1-4e24a5c0c67a137478e4a8be2a081872
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-7-94cc219f61413ab321916821e1288152
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-1-4e24a5c0c67a137478e4a8be2a081872
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-8-310c8d652c6f549b7759baec6012b77d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-10-5cba470fbd02e730781a3b63fd9aa3e2
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-8-310c8d652c6f549b7759baec6012b77d
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-10-5cba470fbd02e730781a3b63fd9aa3e2
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-9-b806b5b4eb8a703b2ba43afdce4d0bd5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-11-337e909605c780d00ad8895686defa06
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_12-9-b806b5b4eb8a703b2ba43afdce4d0bd5
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-11-337e909605c780d00ad8895686defa06
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-12-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-12-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-12-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-12-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-13-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-13-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-13-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-13-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-14-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-14-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-14-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-14-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-15-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-15-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-15-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-15-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-16-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-16-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-16-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-16-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-17-30259eb1873d8f5d00dccd8af0b0ccbc b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-17-30259eb1873d8f5d00dccd8af0b0ccbc
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-17-30259eb1873d8f5d00dccd8af0b0ccbc
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-17-30259eb1873d8f5d00dccd8af0b0ccbc
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-19-325432a220aa3ebe8b816069916924d8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-19-325432a220aa3ebe8b816069916924d8
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-19-325432a220aa3ebe8b816069916924d8
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-19-325432a220aa3ebe8b816069916924d8
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-1-e3951e29e1e87b77ea735f40fd58735 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-2-f42438f3f5c266b997686ba846420ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_2-1-e3951e29e1e87b77ea735f40fd58735
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-2-f42438f3f5c266b997686ba846420ebe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-21-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-21-7d0c37fc09323ce11aae0b58dc687660
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-21-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-21-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-22-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-22-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-22-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-22-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-24-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-24-7d0c37fc09323ce11aae0b58dc687660
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-24-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-24-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-2-c5a30be03ba36f1fb6cc0b4e7c978838 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-4-9e58f8a961723c40a5d1f742251a8fa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_2-2-c5a30be03ba36f1fb6cc0b4e7c978838
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-4-9e58f8a961723c40a5d1f742251a8fa5
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-3-55c1e972192580d734fad7f57dd62e6a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-5-d964114ed76536c8e3cacd231340851c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_2-3-55c1e972192580d734fad7f57dd62e6a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-5-d964114ed76536c8e3cacd231340851c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-4-210f08b7e8c20c9ff364c215af412d87 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-6-e44aff8a860cf3965752d3e1ce725cde
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_2-4-210f08b7e8c20c9ff364c215af412d87
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-6-e44aff8a860cf3965752d3e1ce725cde
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-6-bb969d3ec0038215a2698afceeb02b3a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-7-ae582a255a02d91674aab500aee79e20
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_2-6-bb969d3ec0038215a2698afceeb02b3a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-7-ae582a255a02d91674aab500aee79e20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-7-2c3617157639fcd296a8ea2f121c58ab b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-8-962264967269db1d5f28a9a6c60dbf1
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_2-7-2c3617157639fcd296a8ea2f121c58ab
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-8-962264967269db1d5f28a9a6c60dbf1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-8-310c8d652c6f549b7759baec6012b77d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_1-9-10b03ce2526bf180faaec9310cfab290
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_2-8-310c8d652c6f549b7759baec6012b77d
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_1-9-10b03ce2526bf180faaec9310cfab290
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-10-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-10-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-10-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-10-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-11-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-11-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-11-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-11-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-12-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-12-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-12-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-12-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-14-95e18bd00f2de246efca1756681c1e87 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-14-95e18bd00f2de246efca1756681c1e87
index 86ee83a4a2686..425151f3a411f 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-14-95e18bd00f2de246efca1756681c1e87
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-14-95e18bd00f2de246efca1756681c1e87
@@ -1 +1 @@
-40
\ No newline at end of file
+40
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-16-caa641c820fcc5f601758c5f0385b4e b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-16-caa641c820fcc5f601758c5f0385b4e
index 301160a93062d..45a4fb75db864 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-16-caa641c820fcc5f601758c5f0385b4e
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-16-caa641c820fcc5f601758c5f0385b4e
@@ -1 +1 @@
-8
\ No newline at end of file
+8
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-7-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-7-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-7-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-7-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-8-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-8-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-8-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-8-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-9-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-9-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-9-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_10-9-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-9-b806b5b4eb8a703b2ba43afdce4d0bd5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-1-4e24a5c0c67a137478e4a8be2a081872
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_2-9-b806b5b4eb8a703b2ba43afdce4d0bd5
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-1-4e24a5c0c67a137478e4a8be2a081872
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-1-e3951e29e1e87b77ea735f40fd58735 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-10-5cba470fbd02e730781a3b63fd9aa3e2
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_3-1-e3951e29e1e87b77ea735f40fd58735
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-10-5cba470fbd02e730781a3b63fd9aa3e2
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-2-c5a30be03ba36f1fb6cc0b4e7c978838 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-11-337e909605c780d00ad8895686defa06
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_3-2-c5a30be03ba36f1fb6cc0b4e7c978838
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-11-337e909605c780d00ad8895686defa06
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-12-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-12-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-12-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-12-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-14-325432a220aa3ebe8b816069916924d8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-14-325432a220aa3ebe8b816069916924d8
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-14-325432a220aa3ebe8b816069916924d8
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-14-325432a220aa3ebe8b816069916924d8
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-15-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-15-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-15-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-15-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-16-c23ea191ee4d60c0a6252ce763b1beed b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-16-c23ea191ee4d60c0a6252ce763b1beed
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-16-c23ea191ee4d60c0a6252ce763b1beed
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-16-c23ea191ee4d60c0a6252ce763b1beed
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-17-442e69416faaea9309bb8c2a3eb73ef b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-17-442e69416faaea9309bb8c2a3eb73ef
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-17-442e69416faaea9309bb8c2a3eb73ef
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-17-442e69416faaea9309bb8c2a3eb73ef
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-19-325432a220aa3ebe8b816069916924d8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-19-325432a220aa3ebe8b816069916924d8
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-19-325432a220aa3ebe8b816069916924d8
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-19-325432a220aa3ebe8b816069916924d8
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-3-6876517daaf54cadefb6bbbf54bd4a24 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-2-f42438f3f5c266b997686ba846420ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_3-3-6876517daaf54cadefb6bbbf54bd4a24
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-2-f42438f3f5c266b997686ba846420ebe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-21-c4d55c247c9326f474d89b29b81d60aa b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-21-c4d55c247c9326f474d89b29b81d60aa
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-21-c4d55c247c9326f474d89b29b81d60aa
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-21-c4d55c247c9326f474d89b29b81d60aa
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-4-cd25b8502c668759783aaba4d550a05f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-4-9e58f8a961723c40a5d1f742251a8fa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_3-4-cd25b8502c668759783aaba4d550a05f
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-4-9e58f8a961723c40a5d1f742251a8fa5
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-6-bb969d3ec0038215a2698afceeb02b3a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-5-d964114ed76536c8e3cacd231340851c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_3-6-bb969d3ec0038215a2698afceeb02b3a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-5-d964114ed76536c8e3cacd231340851c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-7-2c3617157639fcd296a8ea2f121c58ab b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-6-e44aff8a860cf3965752d3e1ce725cde
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_3-7-2c3617157639fcd296a8ea2f121c58ab
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-6-e44aff8a860cf3965752d3e1ce725cde
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-8-307339322d96b8f923d57c0dc9cdcb60 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-7-ae582a255a02d91674aab500aee79e20
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_3-8-307339322d96b8f923d57c0dc9cdcb60
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-7-ae582a255a02d91674aab500aee79e20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-9-94cc219f61413ab321916821e1288152 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-8-962264967269db1d5f28a9a6c60dbf1
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_3-9-94cc219f61413ab321916821e1288152
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-8-962264967269db1d5f28a9a6c60dbf1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-1-e3951e29e1e87b77ea735f40fd58735 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_11-9-10b03ce2526bf180faaec9310cfab290
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-1-e3951e29e1e87b77ea735f40fd58735
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_11-9-10b03ce2526bf180faaec9310cfab290
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-10-bb969d3ec0038215a2698afceeb02b3a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-1-4e24a5c0c67a137478e4a8be2a081872
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-10-bb969d3ec0038215a2698afceeb02b3a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-1-4e24a5c0c67a137478e4a8be2a081872
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-11-2c3617157639fcd296a8ea2f121c58ab b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-10-5cba470fbd02e730781a3b63fd9aa3e2
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-11-2c3617157639fcd296a8ea2f121c58ab
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-10-5cba470fbd02e730781a3b63fd9aa3e2
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-2-c5a30be03ba36f1fb6cc0b4e7c978838 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-11-337e909605c780d00ad8895686defa06
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-2-c5a30be03ba36f1fb6cc0b4e7c978838
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-11-337e909605c780d00ad8895686defa06
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-12-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-12-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-12-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-12-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-13-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-13-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-13-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-13-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-14-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-14-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-14-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-14-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-15-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-15-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-15-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-15-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-3-55c1e972192580d734fad7f57dd62e6a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-17-97ba394ab6aad2547f399ebbf757a4b6
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-3-55c1e972192580d734fad7f57dd62e6a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-17-97ba394ab6aad2547f399ebbf757a4b6
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-4-210f08b7e8c20c9ff364c215af412d87 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-18-73ee2d7b62e2aede20ca5de577cd7b7f
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-4-210f08b7e8c20c9ff364c215af412d87
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-18-73ee2d7b62e2aede20ca5de577cd7b7f
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-5-6876517daaf54cadefb6bbbf54bd4a24 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-19-5fb8b113a91fbdb15eb35fe1a1d1b4f
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-5-6876517daaf54cadefb6bbbf54bd4a24
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-19-5fb8b113a91fbdb15eb35fe1a1d1b4f
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-6-cd25b8502c668759783aaba4d550a05f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-2-f42438f3f5c266b997686ba846420ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-6-cd25b8502c668759783aaba4d550a05f
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-2-f42438f3f5c266b997686ba846420ebe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-21-4ecd65f0e26e981b66770b3e91e128fc b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-21-4ecd65f0e26e981b66770b3e91e128fc
index 83be903e06482..5629a958479c7 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-21-4ecd65f0e26e981b66770b3e91e128fc
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-21-4ecd65f0e26e981b66770b3e91e128fc
@@ -1 +1 @@
-570
\ No newline at end of file
+570
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-7-c20aa9939d703c529c4538994dc6f066 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-4-9e58f8a961723c40a5d1f742251a8fa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-7-c20aa9939d703c529c4538994dc6f066
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-4-9e58f8a961723c40a5d1f742251a8fa5
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-8-baa1253610c081917208199feb52a768 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-5-d964114ed76536c8e3cacd231340851c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_4-8-baa1253610c081917208199feb52a768
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-5-d964114ed76536c8e3cacd231340851c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-1-fac79d1e5c34142393fc328b2935a9b8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-6-e44aff8a860cf3965752d3e1ce725cde
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_5-1-fac79d1e5c34142393fc328b2935a9b8
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-6-e44aff8a860cf3965752d3e1ce725cde
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-2-7282f71445d3b6acef073be9b7cbab98 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-7-ae582a255a02d91674aab500aee79e20
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_5-2-7282f71445d3b6acef073be9b7cbab98
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-7-ae582a255a02d91674aab500aee79e20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-3-56f0862dbe9f7c0eecafe22d5d185c7c b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-8-962264967269db1d5f28a9a6c60dbf1
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_5-3-56f0862dbe9f7c0eecafe22d5d185c7c
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-8-962264967269db1d5f28a9a6c60dbf1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-4-be71f06ad593935a8e81d61b695b2052 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_12-9-10b03ce2526bf180faaec9310cfab290
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_5-4-be71f06ad593935a8e81d61b695b2052
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_12-9-10b03ce2526bf180faaec9310cfab290
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-10-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-10-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-10-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-10-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-11-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-11-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-11-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-11-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-12-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-12-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-12-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-12-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-13-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-13-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-13-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-13-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-14-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-14-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-14-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-14-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-15-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-15-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-15-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-15-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-18-cc27d771c9a20d3d83f87802e1a9dbe2 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-18-cc27d771c9a20d3d83f87802e1a9dbe2
index 251ff85eda52d..1a9097317aa62 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-18-cc27d771c9a20d3d83f87802e1a9dbe2
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-18-cc27d771c9a20d3d83f87802e1a9dbe2
@@ -19,4 +19,4 @@
 5	5
 5	5
 8	8
-9	9
\ No newline at end of file
+9	9
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-19-4b2ac2865384fbca7f374191d8021d51 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-19-4b2ac2865384fbca7f374191d8021d51
index af8f457e93476..225fc24a2290f 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-19-4b2ac2865384fbca7f374191d8021d51
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-19-4b2ac2865384fbca7f374191d8021d51
@@ -19,4 +19,4 @@ val_5	val_5
 val_5	val_5
 val_5	val_5
 val_8	val_8
-val_9	val_9
\ No newline at end of file
+val_9	val_9
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-20-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-20-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-20-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-20-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-21-ea23403b9eb55e8b06d1c198e439569f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-21-ea23403b9eb55e8b06d1c198e439569f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-21-ea23403b9eb55e8b06d1c198e439569f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-21-ea23403b9eb55e8b06d1c198e439569f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-24-cc27d771c9a20d3d83f87802e1a9dbe2 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-24-cc27d771c9a20d3d83f87802e1a9dbe2
index 251ff85eda52d..1a9097317aa62 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-24-cc27d771c9a20d3d83f87802e1a9dbe2
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-24-cc27d771c9a20d3d83f87802e1a9dbe2
@@ -19,4 +19,4 @@
 5	5
 5	5
 8	8
-9	9
\ No newline at end of file
+9	9
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-25-4b2ac2865384fbca7f374191d8021d51 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-25-4b2ac2865384fbca7f374191d8021d51
index af8f457e93476..225fc24a2290f 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-25-4b2ac2865384fbca7f374191d8021d51
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-25-4b2ac2865384fbca7f374191d8021d51
@@ -19,4 +19,4 @@ val_5	val_5
 val_5	val_5
 val_5	val_5
 val_8	val_8
-val_9	val_9
\ No newline at end of file
+val_9	val_9
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-26-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-26-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-26-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-26-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-29-cc27d771c9a20d3d83f87802e1a9dbe2 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-29-cc27d771c9a20d3d83f87802e1a9dbe2
index 251ff85eda52d..1a9097317aa62 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-29-cc27d771c9a20d3d83f87802e1a9dbe2
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-29-cc27d771c9a20d3d83f87802e1a9dbe2
@@ -19,4 +19,4 @@
 5	5
 5	5
 8	8
-9	9
\ No newline at end of file
+9	9
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-30-4b2ac2865384fbca7f374191d8021d51 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-30-4b2ac2865384fbca7f374191d8021d51
index af8f457e93476..225fc24a2290f 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-30-4b2ac2865384fbca7f374191d8021d51
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-30-4b2ac2865384fbca7f374191d8021d51
@@ -19,4 +19,4 @@ val_5	val_5
 val_5	val_5
 val_5	val_5
 val_8	val_8
-val_9	val_9
\ No newline at end of file
+val_9	val_9
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-9-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-9-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-9-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_13-9-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-10-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-10-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-10-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-10-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-11-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-11-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-11-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-11-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-12-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-12-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-12-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-12-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-13-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-13-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-13-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-13-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-15-43ad2152b18d711adbdd1aeb940b662a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-15-43ad2152b18d711adbdd1aeb940b662a
index 1758dddccea2b..f5c89552bd3e6 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-15-43ad2152b18d711adbdd1aeb940b662a
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-15-43ad2152b18d711adbdd1aeb940b662a
@@ -1 +1 @@
-32
\ No newline at end of file
+32
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-18-a16ff76d72ad428fb9d43ab910f259fd b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-18-a16ff76d72ad428fb9d43ab910f259fd
index bea0d09c49935..c92ba56847bf4 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-18-a16ff76d72ad428fb9d43ab910f259fd
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-18-a16ff76d72ad428fb9d43ab910f259fd
@@ -1 +1 @@
-207
\ No newline at end of file
+207
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-7-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-7-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-7-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-7-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-8-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-8-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-8-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-8-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-9-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-9-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-9-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_14-9-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-10-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-10-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-10-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-10-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-11-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-11-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-11-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-11-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-12-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-12-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-12-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-12-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-13-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-13-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-13-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-13-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-7-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-7-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-7-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-7-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-8-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-8-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-8-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-8-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-9-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-9-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-9-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_15-9-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ctas_uses_database_location-0-1c3ace37d0bbb5c8033c48cde7680d21 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-0-24ca942f094b14b92086305cc125e833
similarity index 100%
rename from sql/hive/src/test/resources/golden/ctas_uses_database_location-0-1c3ace37d0bbb5c8033c48cde7680d21
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-0-24ca942f094b14b92086305cc125e833
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-14-34916eb904b8113a401ce78e6941a204 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-1-16367c381d4b189b3640c92511244bfe
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-14-34916eb904b8113a401ce78e6941a204
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-1-16367c381d4b189b3640c92511244bfe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-6-12e01dc9146f45ded0a6655cb04467b4 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-10-4706d21b17f993cc1cc94be6b7e04c28
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_5-6-12e01dc9146f45ded0a6655cb04467b4
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-10-4706d21b17f993cc1cc94be6b7e04c28
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-7-ec1aaae06a8dbb2faf36b53246124d4a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-11-5e81d0b41cc58d8cc151046c7a111411
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_5-7-ec1aaae06a8dbb2faf36b53246124d4a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-11-5e81d0b41cc58d8cc151046c7a111411
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-1-e3951e29e1e87b77ea735f40fd58735 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-12-e8a77ff790bfd6740489fc4374ec6c3d
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-1-e3951e29e1e87b77ea735f40fd58735
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-12-e8a77ff790bfd6740489fc4374ec6c3d
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-10-bb969d3ec0038215a2698afceeb02b3a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-13-920818d557d2525dabb5c539a80a1bbb
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-10-bb969d3ec0038215a2698afceeb02b3a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-13-920818d557d2525dabb5c539a80a1bbb
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-11-2c3617157639fcd296a8ea2f121c58ab b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-14-455dfeeba27ecf53923db0cbf0aab908
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-11-2c3617157639fcd296a8ea2f121c58ab
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-14-455dfeeba27ecf53923db0cbf0aab908
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-12-310c8d652c6f549b7759baec6012b77d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-15-11f98f575685beedc14a88fc47a61041
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-12-310c8d652c6f549b7759baec6012b77d
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-15-11f98f575685beedc14a88fc47a61041
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-16-dff02d7b4c5242434d5e7449bdb67f8b b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-16-dff02d7b4c5242434d5e7449bdb67f8b
new file mode 100644
index 0000000000000..2da41004e8914
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-16-dff02d7b4c5242434d5e7449bdb67f8b
@@ -0,0 +1,24 @@
+0	val_0	val_0	day1	1
+0	val_0	val_0	day1	1
+0	val_0	val_0	day1	1
+0	val_0	val_0	day1	1
+0	val_0	val_0	day1	1
+0	val_0	val_0	day1	1
+169	val_169	val_169	day1	1
+169	val_169	val_169	day1	1
+169	val_169	val_169	day1	1
+169	val_169	val_169	day1	1
+169	val_169	val_169	day1	1
+169	val_169	val_169	day1	1
+169	val_169	val_169	day1	1
+169	val_169	val_169	day1	1
+374	val_374	val_374	day1	1
+374	val_374	val_374	day1	1
+172	val_172	val_172	day1	1
+172	val_172	val_172	day1	1
+172	val_172	val_172	day1	1
+172	val_172	val_172	day1	1
+103	val_103	val_103	day1	1
+103	val_103	val_103	day1	1
+103	val_103	val_103	day1	1
+103	val_103	val_103	day1	1
diff --git a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-0-1110d5212ef44ba8c7ac357fb2f4fc7b b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-2-7b4ad215fc2e75c71c6614a2b6322e8e
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-0-1110d5212ef44ba8c7ac357fb2f4fc7b
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-2-7b4ad215fc2e75c71c6614a2b6322e8e
diff --git a/sql/hive/src/test/resources/golden/escape1-0-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-3-365488a703b0640acda73a7d7e6efa06
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape1-0-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-3-365488a703b0640acda73a7d7e6efa06
diff --git a/sql/hive/src/test/resources/golden/escape1-1-683124e29877d2c5a96b95c8ddba97b7 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-4-d0ec6d66ff349db09fd455eec149efdb
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape1-1-683124e29877d2c5a96b95c8ddba97b7
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-4-d0ec6d66ff349db09fd455eec149efdb
diff --git a/sql/hive/src/test/resources/golden/escape2-0-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-5-3b0f76816be2c1b18a2058027a19bc9f
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-0-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-5-3b0f76816be2c1b18a2058027a19bc9f
diff --git a/sql/hive/src/test/resources/golden/escape2-1-683124e29877d2c5a96b95c8ddba97b7 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-6-86473a0498e4361e4db0b4a22f2e8571
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-1-683124e29877d2c5a96b95c8ddba97b7
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-6-86473a0498e4361e4db0b4a22f2e8571
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-13-b806b5b4eb8a703b2ba43afdce4d0bd5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-7-7e87a31677022b6a1c360922ef74754e
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-13-b806b5b4eb8a703b2ba43afdce4d0bd5
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-7-7e87a31677022b6a1c360922ef74754e
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-2-c5a30be03ba36f1fb6cc0b4e7c978838 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-8-17d5c9dd1a25e8a54dc9c7444cbe98c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-2-c5a30be03ba36f1fb6cc0b4e7c978838
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-8-17d5c9dd1a25e8a54dc9c7444cbe98c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-3-55c1e972192580d734fad7f57dd62e6a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_16-9-ae5880516ea2f924cfbaeb919adc86e
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-3-55c1e972192580d734fad7f57dd62e6a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_16-9-ae5880516ea2f924cfbaeb919adc86e
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-4-210f08b7e8c20c9ff364c215af412d87 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-1-4e24a5c0c67a137478e4a8be2a081872
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-4-210f08b7e8c20c9ff364c215af412d87
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_2-1-4e24a5c0c67a137478e4a8be2a081872
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-10-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-10-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-10-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-10-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-11-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-11-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-11-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-11-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-12-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-12-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-12-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-12-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-13-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-13-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-13-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-13-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-14-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-14-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-14-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-14-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-15-ec410b2c54c1ce7001abe7130a3b1c21 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-15-ec410b2c54c1ce7001abe7130a3b1c21
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-15-ec410b2c54c1ce7001abe7130a3b1c21
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-15-ec410b2c54c1ce7001abe7130a3b1c21
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-17-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-17-7d0c37fc09323ce11aae0b58dc687660
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-17-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-17-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-18-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-18-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-18-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-18-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-19-a0fc12fc2b968d7e85e6c1e2fd70cd94 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-19-a0fc12fc2b968d7e85e6c1e2fd70cd94
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-19-a0fc12fc2b968d7e85e6c1e2fd70cd94
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-19-a0fc12fc2b968d7e85e6c1e2fd70cd94
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-5-6876517daaf54cadefb6bbbf54bd4a24 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-2-f42438f3f5c266b997686ba846420ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-5-6876517daaf54cadefb6bbbf54bd4a24
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_2-2-f42438f3f5c266b997686ba846420ebe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-20-8180638a57b64557e02815c863031755 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-20-8180638a57b64557e02815c863031755
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-20-8180638a57b64557e02815c863031755
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-20-8180638a57b64557e02815c863031755
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-22-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-22-7d0c37fc09323ce11aae0b58dc687660
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-22-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-22-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-6-cd25b8502c668759783aaba4d550a05f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-3-aa6ec7f17b48cf49f02ab7367453ab39
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-6-cd25b8502c668759783aaba4d550a05f
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_2-3-aa6ec7f17b48cf49f02ab7367453ab39
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-7-c20aa9939d703c529c4538994dc6f066 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-4-66b07c93d79ed9958b8427dad16c3ef3
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-7-c20aa9939d703c529c4538994dc6f066
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_2-4-66b07c93d79ed9958b8427dad16c3ef3
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-8-baa1253610c081917208199feb52a768 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-6-9e58f8a961723c40a5d1f742251a8fa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_7-8-baa1253610c081917208199feb52a768
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_2-6-9e58f8a961723c40a5d1f742251a8fa5
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-1-e3951e29e1e87b77ea735f40fd58735 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-7-d964114ed76536c8e3cacd231340851c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-1-e3951e29e1e87b77ea735f40fd58735
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_2-7-d964114ed76536c8e3cacd231340851c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-10-310c8d652c6f549b7759baec6012b77d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-8-962264967269db1d5f28a9a6c60dbf1
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-10-310c8d652c6f549b7759baec6012b77d
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_2-8-962264967269db1d5f28a9a6c60dbf1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-11-b806b5b4eb8a703b2ba43afdce4d0bd5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_2-9-10b03ce2526bf180faaec9310cfab290
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-11-b806b5b4eb8a703b2ba43afdce4d0bd5
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_2-9-10b03ce2526bf180faaec9310cfab290
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-12-9666fb18356436e2800550df9ca90c04 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-1-4e24a5c0c67a137478e4a8be2a081872
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-12-9666fb18356436e2800550df9ca90c04
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_3-1-4e24a5c0c67a137478e4a8be2a081872
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-10-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-10-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-10-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-10-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-11-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-11-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-11-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-11-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-12-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-12-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-12-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-12-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-13-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-13-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-13-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-13-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-14-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-14-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-14-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-14-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-15-30259eb1873d8f5d00dccd8af0b0ccbc b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-15-30259eb1873d8f5d00dccd8af0b0ccbc
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-15-30259eb1873d8f5d00dccd8af0b0ccbc
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-15-30259eb1873d8f5d00dccd8af0b0ccbc
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-17-325432a220aa3ebe8b816069916924d8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-17-325432a220aa3ebe8b816069916924d8
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-17-325432a220aa3ebe8b816069916924d8
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-17-325432a220aa3ebe8b816069916924d8
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-19-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-19-7d0c37fc09323ce11aae0b58dc687660
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-19-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-19-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-13-a54cefeeb6d79c72f01c61035e9dcf15 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-2-f42438f3f5c266b997686ba846420ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-13-a54cefeeb6d79c72f01c61035e9dcf15
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_3-2-f42438f3f5c266b997686ba846420ebe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-20-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-20-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-20-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-20-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-22-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-22-7d0c37fc09323ce11aae0b58dc687660
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-22-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-22-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-2-c5a30be03ba36f1fb6cc0b4e7c978838 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-3-47a16cf1686c81c5ba76fd92fa5e05a1
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-2-c5a30be03ba36f1fb6cc0b4e7c978838
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_3-3-47a16cf1686c81c5ba76fd92fa5e05a1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-3-6876517daaf54cadefb6bbbf54bd4a24 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-4-45b63361c1e5178b69a1531b238c8460
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-3-6876517daaf54cadefb6bbbf54bd4a24
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_3-4-45b63361c1e5178b69a1531b238c8460
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-4-cd25b8502c668759783aaba4d550a05f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-6-9e58f8a961723c40a5d1f742251a8fa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-4-cd25b8502c668759783aaba4d550a05f
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_3-6-9e58f8a961723c40a5d1f742251a8fa5
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-6-bb969d3ec0038215a2698afceeb02b3a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-7-d964114ed76536c8e3cacd231340851c
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-6-bb969d3ec0038215a2698afceeb02b3a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_3-7-d964114ed76536c8e3cacd231340851c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-7-2c3617157639fcd296a8ea2f121c58ab b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-8-e44aff8a860cf3965752d3e1ce725cde
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-7-2c3617157639fcd296a8ea2f121c58ab
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_3-8-e44aff8a860cf3965752d3e1ce725cde
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-8-307339322d96b8f923d57c0dc9cdcb60 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_3-9-ae582a255a02d91674aab500aee79e20
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-8-307339322d96b8f923d57c0dc9cdcb60
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_3-9-ae582a255a02d91674aab500aee79e20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-9-94cc219f61413ab321916821e1288152 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-1-4e24a5c0c67a137478e4a8be2a081872
similarity index 100%
rename from sql/hive/src/test/resources/golden/auto_sortmerge_join_8-9-94cc219f61413ab321916821e1288152
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-1-4e24a5c0c67a137478e4a8be2a081872
diff --git a/sql/hive/src/test/resources/golden/ba_table1-0-943f9cca5ed3bba5b2c22f49885722c3 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-10-9e58f8a961723c40a5d1f742251a8fa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table1-0-943f9cca5ed3bba5b2c22f49885722c3
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-10-9e58f8a961723c40a5d1f742251a8fa5
diff --git a/sql/hive/src/test/resources/golden/ba_table1-1-7b43ffa8083fda74ab342029dce2e3d9 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-11-d964114ed76536c8e3cacd231340851c
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table1-1-7b43ffa8083fda74ab342029dce2e3d9
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-11-d964114ed76536c8e3cacd231340851c
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-12-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-12-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-12-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-12-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-13-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-13-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-13-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-13-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-14-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-14-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-14-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-14-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-15-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-15-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-15-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-15-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-16-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-16-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-16-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-16-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-17-b1607a2f1e7da8ac0a9a035b99f81d28 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-17-b1607a2f1e7da8ac0a9a035b99f81d28
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-17-b1607a2f1e7da8ac0a9a035b99f81d28
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-17-b1607a2f1e7da8ac0a9a035b99f81d28
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-19-325432a220aa3ebe8b816069916924d8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-19-325432a220aa3ebe8b816069916924d8
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-19-325432a220aa3ebe8b816069916924d8
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-19-325432a220aa3ebe8b816069916924d8
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/ba_table2-0-943f9cca5ed3bba5b2c22f49885722c3 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-2-f42438f3f5c266b997686ba846420ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table2-0-943f9cca5ed3bba5b2c22f49885722c3
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-2-f42438f3f5c266b997686ba846420ebe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-21-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-21-7d0c37fc09323ce11aae0b58dc687660
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-21-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-21-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-22-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-22-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-22-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-22-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-24-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-24-7d0c37fc09323ce11aae0b58dc687660
index c24b6ae77df02..e522732c77ec9 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-24-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-24-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-38
\ No newline at end of file
+38
diff --git a/sql/hive/src/test/resources/golden/ba_table2-1-6f3e37cab4fdc60491dea1ff6fc9931a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-3-aa6ec7f17b48cf49f02ab7367453ab39
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table2-1-6f3e37cab4fdc60491dea1ff6fc9931a
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-3-aa6ec7f17b48cf49f02ab7367453ab39
diff --git a/sql/hive/src/test/resources/golden/ba_table2-2-8491941c2baa0c7d96e17b8f47dfebe7 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-4-66b07c93d79ed9958b8427dad16c3ef3
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table2-2-8491941c2baa0c7d96e17b8f47dfebe7
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-4-66b07c93d79ed9958b8427dad16c3ef3
diff --git a/sql/hive/src/test/resources/golden/ba_table3-0-943f9cca5ed3bba5b2c22f49885722c3 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-5-47a16cf1686c81c5ba76fd92fa5e05a1
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table3-0-943f9cca5ed3bba5b2c22f49885722c3
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-5-47a16cf1686c81c5ba76fd92fa5e05a1
diff --git a/sql/hive/src/test/resources/golden/ba_table3-1-28b12606c5369c783e63c17826a18d0d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-6-45b63361c1e5178b69a1531b238c8460
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table3-1-28b12606c5369c783e63c17826a18d0d
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-6-45b63361c1e5178b69a1531b238c8460
diff --git a/sql/hive/src/test/resources/golden/ba_table_union-0-943f9cca5ed3bba5b2c22f49885722c3 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-7-ecca12a2f377c18c53563a534e9dd5a5
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table_union-0-943f9cca5ed3bba5b2c22f49885722c3
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-7-ecca12a2f377c18c53563a534e9dd5a5
diff --git a/sql/hive/src/test/resources/golden/ba_table_union-1-3f8df0a4ab12f1a31a7906e77e9b7b75 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_4-8-65930e1b01da720cf296ca3df668b58d
similarity index 100%
rename from sql/hive/src/test/resources/golden/ba_table_union-1-3f8df0a4ab12f1a31a7906e77e9b7b75
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_4-8-65930e1b01da720cf296ca3df668b58d
diff --git a/sql/hive/src/test/resources/golden/binary_output_format-0-483cdc9eade175b0c89b9f5b3eb505f1 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-1-fdbb11a2de2777dfdd916b59764d5c8e
similarity index 100%
rename from sql/hive/src/test/resources/golden/binary_output_format-0-483cdc9eade175b0c89b9f5b3eb505f1
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_5-1-fdbb11a2de2777dfdd916b59764d5c8e
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-10-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-10-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-10-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-10-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-11-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-11-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-11-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-11-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-12-b1607a2f1e7da8ac0a9a035b99f81d28 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-12-b1607a2f1e7da8ac0a9a035b99f81d28
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-12-b1607a2f1e7da8ac0a9a035b99f81d28
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-12-b1607a2f1e7da8ac0a9a035b99f81d28
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-14-325432a220aa3ebe8b816069916924d8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-14-325432a220aa3ebe8b816069916924d8
index dec2bf5d6199c..d6b24041cf041 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-14-325432a220aa3ebe8b816069916924d8
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-14-325432a220aa3ebe8b816069916924d8
@@ -1 +1 @@
-19
\ No newline at end of file
+19
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-16-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-16-7d0c37fc09323ce11aae0b58dc687660
index dec2bf5d6199c..d6b24041cf041 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-16-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-16-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-19
\ No newline at end of file
+19
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-17-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-17-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-17-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-17-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-18-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-18-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-18-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-18-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/binary_output_format-1-9e0909b6330578a25806527dd0ecf7ef b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-2-4f56cb50ec6c5cc57974f85d54bcc8ed
similarity index 100%
rename from sql/hive/src/test/resources/golden/binary_output_format-1-9e0909b6330578a25806527dd0ecf7ef
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_5-2-4f56cb50ec6c5cc57974f85d54bcc8ed
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-20-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-20-7d0c37fc09323ce11aae0b58dc687660
index dec2bf5d6199c..d6b24041cf041 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-20-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-20-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-19
\ No newline at end of file
+19
diff --git a/sql/hive/src/test/resources/golden/binary_output_format-2-a42be5ce444ef1d2a1dbe654a57d6f55 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-3-9878d6ab0fe143611c88ffc0602197e3
similarity index 100%
rename from sql/hive/src/test/resources/golden/binary_output_format-2-a42be5ce444ef1d2a1dbe654a57d6f55
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_5-3-9878d6ab0fe143611c88ffc0602197e3
diff --git a/sql/hive/src/test/resources/golden/binary_table_bincolserde-0-943f9cca5ed3bba5b2c22f49885722c3 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-4-a576657b9fd1c23aed85a7409887b2fb
similarity index 100%
rename from sql/hive/src/test/resources/golden/binary_table_bincolserde-0-943f9cca5ed3bba5b2c22f49885722c3
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_5-4-a576657b9fd1c23aed85a7409887b2fb
diff --git a/sql/hive/src/test/resources/golden/binary_table_bincolserde-1-3defb7199b65cfd10cb2f46275c581b2 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-6-350b202868590b5edaed18007fd5cbbe
similarity index 100%
rename from sql/hive/src/test/resources/golden/binary_table_bincolserde-1-3defb7199b65cfd10cb2f46275c581b2
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_5-6-350b202868590b5edaed18007fd5cbbe
diff --git a/sql/hive/src/test/resources/golden/binary_table_bincolserde-2-706a062089583074f30fb13c661fc81e b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-7-1155788c7c133a73c3609e8052accfa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/binary_table_bincolserde-2-706a062089583074f30fb13c661fc81e
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_5-7-1155788c7c133a73c3609e8052accfa5
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-8-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-8-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-8-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-8-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-9-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-9-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-9-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_5-9-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-11-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-11-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-11-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-11-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-12-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-12-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-12-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-12-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-13-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-13-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-13-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-13-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-14-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-14-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-14-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-14-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-15-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-15-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-15-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-15-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-16-ea23403b9eb55e8b06d1c198e439569f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-16-ea23403b9eb55e8b06d1c198e439569f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-16-ea23403b9eb55e8b06d1c198e439569f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-16-ea23403b9eb55e8b06d1c198e439569f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-17-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-17-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-17-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-17-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-19-cf41f7ce9478536e823107d1810ff1d7 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-19-cf41f7ce9478536e823107d1810ff1d7
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-19-cf41f7ce9478536e823107d1810ff1d7
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-19-cf41f7ce9478536e823107d1810ff1d7
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-21-b55506a213ec710004e6d7f3462834d0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-21-b55506a213ec710004e6d7f3462834d0
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-21-b55506a213ec710004e6d7f3462834d0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-21-b55506a213ec710004e6d7f3462834d0
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-23-4281442c87dcf6007f8bd42504eba186 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-23-4281442c87dcf6007f8bd42504eba186
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-23-4281442c87dcf6007f8bd42504eba186
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-23-4281442c87dcf6007f8bd42504eba186
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-25-52f0e65724d29e2b4054b59a50d2837b b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-25-52f0e65724d29e2b4054b59a50d2837b
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-25-52f0e65724d29e2b4054b59a50d2837b
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-25-52f0e65724d29e2b4054b59a50d2837b
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-27-961f141836f2cc9521f681cadbc3d140 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-27-961f141836f2cc9521f681cadbc3d140
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-27-961f141836f2cc9521f681cadbc3d140
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-27-961f141836f2cc9521f681cadbc3d140
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-29-fd0cc412e0987569a4ed879454b53fb0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-29-fd0cc412e0987569a4ed879454b53fb0
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-29-fd0cc412e0987569a4ed879454b53fb0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-29-fd0cc412e0987569a4ed879454b53fb0
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-30-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-30-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-30-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-30-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-32-cf41f7ce9478536e823107d1810ff1d7 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-32-cf41f7ce9478536e823107d1810ff1d7
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-32-cf41f7ce9478536e823107d1810ff1d7
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-32-cf41f7ce9478536e823107d1810ff1d7
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-34-52f0e65724d29e2b4054b59a50d2837b b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-34-52f0e65724d29e2b4054b59a50d2837b
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-34-52f0e65724d29e2b4054b59a50d2837b
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-34-52f0e65724d29e2b4054b59a50d2837b
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-36-961f141836f2cc9521f681cadbc3d140 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-36-961f141836f2cc9521f681cadbc3d140
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-36-961f141836f2cc9521f681cadbc3d140
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-36-961f141836f2cc9521f681cadbc3d140
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-38-fd0cc412e0987569a4ed879454b53fb0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-38-fd0cc412e0987569a4ed879454b53fb0
index 92c15ec11569f..f3653aba2d12e 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-38-fd0cc412e0987569a4ed879454b53fb0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_6-38-fd0cc412e0987569a4ed879454b53fb0
@@ -1 +1 @@
-2654
\ No newline at end of file
+2654
diff --git a/sql/hive/src/test/resources/golden/binary_table_colserde-0-943f9cca5ed3bba5b2c22f49885722c3 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-1-4e24a5c0c67a137478e4a8be2a081872
similarity index 100%
rename from sql/hive/src/test/resources/golden/binary_table_colserde-0-943f9cca5ed3bba5b2c22f49885722c3
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-1-4e24a5c0c67a137478e4a8be2a081872
diff --git a/sql/hive/src/test/resources/golden/binary_table_colserde-1-179ac81920d8dfa6e324cc881b5f1624 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-10-9e58f8a961723c40a5d1f742251a8fa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/binary_table_colserde-1-179ac81920d8dfa6e324cc881b5f1624
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-10-9e58f8a961723c40a5d1f742251a8fa5
diff --git a/sql/hive/src/test/resources/golden/binarysortable_1-1-4a0ed18480313e66b869ec4f49371cf5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-11-d964114ed76536c8e3cacd231340851c
similarity index 100%
rename from sql/hive/src/test/resources/golden/binarysortable_1-1-4a0ed18480313e66b869ec4f49371cf5
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-11-d964114ed76536c8e3cacd231340851c
diff --git a/sql/hive/src/test/resources/golden/columnarserde_create_shortcut-0-d300f67f11082f3614a8e93e8808960d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-12-962264967269db1d5f28a9a6c60dbf1
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnarserde_create_shortcut-0-d300f67f11082f3614a8e93e8808960d
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-12-962264967269db1d5f28a9a6c60dbf1
diff --git a/sql/hive/src/test/resources/golden/columnarserde_create_shortcut-1-3a1329c4145738961e1b8bdbd056497c b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-13-10b03ce2526bf180faaec9310cfab290
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnarserde_create_shortcut-1-3a1329c4145738961e1b8bdbd056497c
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-13-10b03ce2526bf180faaec9310cfab290
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-14-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-14-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-14-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-14-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-15-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-15-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-15-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-15-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-16-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-16-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-16-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-16-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-17-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-17-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-17-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-17-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-18-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-18-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-18-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-18-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-19-b1607a2f1e7da8ac0a9a035b99f81d28 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-19-b1607a2f1e7da8ac0a9a035b99f81d28
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-19-b1607a2f1e7da8ac0a9a035b99f81d28
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-19-b1607a2f1e7da8ac0a9a035b99f81d28
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/columnarserde_create_shortcut-2-b4b94bc85ee3bdef2b458d974d36935 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-2-f42438f3f5c266b997686ba846420ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnarserde_create_shortcut-2-b4b94bc85ee3bdef2b458d974d36935
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-2-f42438f3f5c266b997686ba846420ebe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-21-325432a220aa3ebe8b816069916924d8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-21-325432a220aa3ebe8b816069916924d8
index aa92725341cfd..dd475631baeff 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-21-325432a220aa3ebe8b816069916924d8
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-21-325432a220aa3ebe8b816069916924d8
@@ -1 +1 @@
-76
\ No newline at end of file
+76
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-23-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-23-7d0c37fc09323ce11aae0b58dc687660
index aa92725341cfd..dd475631baeff 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-23-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-23-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-76
\ No newline at end of file
+76
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-24-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-24-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-24-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-24-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-26-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-26-7d0c37fc09323ce11aae0b58dc687660
index aa92725341cfd..dd475631baeff 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-26-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-26-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-76
\ No newline at end of file
+76
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-0-78efaffd8fd417371fb888d6d1ba995c b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-3-aa6ec7f17b48cf49f02ab7367453ab39
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_partlvl-0-78efaffd8fd417371fb888d6d1ba995c
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-3-aa6ec7f17b48cf49f02ab7367453ab39
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-1-6483a7ac7f2312cbbf3fce4c4740edf4 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-4-66b07c93d79ed9958b8427dad16c3ef3
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_partlvl-1-6483a7ac7f2312cbbf3fce4c4740edf4
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-4-66b07c93d79ed9958b8427dad16c3ef3
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-2-eb06998a353abc3022a9e0a17d5dba59 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-5-47a16cf1686c81c5ba76fd92fa5e05a1
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_partlvl-2-eb06998a353abc3022a9e0a17d5dba59
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-5-47a16cf1686c81c5ba76fd92fa5e05a1
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-3-3ab5479f002e412965f259485075f6bd b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-6-45b63361c1e5178b69a1531b238c8460
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_partlvl-3-3ab5479f002e412965f259485075f6bd
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-6-45b63361c1e5178b69a1531b238c8460
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-6-86ba38eff353a720bbabf726365b6712 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-7-ecca12a2f377c18c53563a534e9dd5a5
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_partlvl-6-86ba38eff353a720bbabf726365b6712
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-7-ecca12a2f377c18c53563a534e9dd5a5
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-9-ddd27c2a530d8cea3df6f2a4917aabe b/sql/hive/src/test/resources/golden/auto_sortmerge_join_7-8-65930e1b01da720cf296ca3df668b58d
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_partlvl-9-ddd27c2a530d8cea3df6f2a4917aabe
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_7-8-65930e1b01da720cf296ca3df668b58d
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-0-ada4896eb3d16ba1cd5ed5b439f2a875 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-1-4e24a5c0c67a137478e4a8be2a081872
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_tbllvl-0-ada4896eb3d16ba1cd5ed5b439f2a875
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-1-4e24a5c0c67a137478e4a8be2a081872
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-1-d5b5623715ee672e2f12b3fa775dc67c b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-10-962264967269db1d5f28a9a6c60dbf1
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_tbllvl-1-d5b5623715ee672e2f12b3fa775dc67c
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-10-962264967269db1d5f28a9a6c60dbf1
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-2-9cfeaeeb342d7eda1f9be97b4f7991f3 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-11-10b03ce2526bf180faaec9310cfab290
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_tbllvl-2-9cfeaeeb342d7eda1f9be97b4f7991f3
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-11-10b03ce2526bf180faaec9310cfab290
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-5-ebf2d35321a3af996c150c6072d16a8c b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-12-5cba470fbd02e730781a3b63fd9aa3e2
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_tbllvl-5-ebf2d35321a3af996c150c6072d16a8c
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-12-5cba470fbd02e730781a3b63fd9aa3e2
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-6-46c090f169c8dc7dbc24c2264da20f55 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-13-337e909605c780d00ad8895686defa06
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_tbllvl-6-46c090f169c8dc7dbc24c2264da20f55
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-13-337e909605c780d00ad8895686defa06
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-14-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-14-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-14-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-14-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-15-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-15-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-15-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-15-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-16-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-16-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-16-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-16-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-17-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-17-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-17-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-17-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-18-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-18-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-18-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-18-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-19-30259eb1873d8f5d00dccd8af0b0ccbc b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-19-30259eb1873d8f5d00dccd8af0b0ccbc
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-19-30259eb1873d8f5d00dccd8af0b0ccbc
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-19-30259eb1873d8f5d00dccd8af0b0ccbc
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-8-716c74ca8a0fc8c88e898163a8e41b8f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-2-f42438f3f5c266b997686ba846420ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/columnstats_tbllvl-8-716c74ca8a0fc8c88e898163a8e41b8f
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-2-f42438f3f5c266b997686ba846420ebe
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-21-325432a220aa3ebe8b816069916924d8 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-21-325432a220aa3ebe8b816069916924d8
index aa92725341cfd..dd475631baeff 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-21-325432a220aa3ebe8b816069916924d8
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-21-325432a220aa3ebe8b816069916924d8
@@ -1 +1 @@
-76
\ No newline at end of file
+76
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-23-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-23-7d0c37fc09323ce11aae0b58dc687660
index aa92725341cfd..dd475631baeff 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-23-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-23-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-76
\ No newline at end of file
+76
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-24-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-24-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-24-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-24-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-25-a0fc12fc2b968d7e85e6c1e2fd70cd94 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-25-a0fc12fc2b968d7e85e6c1e2fd70cd94
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-25-a0fc12fc2b968d7e85e6c1e2fd70cd94
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-25-a0fc12fc2b968d7e85e6c1e2fd70cd94
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-26-8180638a57b64557e02815c863031755 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-26-8180638a57b64557e02815c863031755
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-26-8180638a57b64557e02815c863031755
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-26-8180638a57b64557e02815c863031755
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-28-7d0c37fc09323ce11aae0b58dc687660 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-28-7d0c37fc09323ce11aae0b58dc687660
index aa92725341cfd..dd475631baeff 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-28-7d0c37fc09323ce11aae0b58dc687660
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-28-7d0c37fc09323ce11aae0b58dc687660
@@ -1 +1 @@
-76
\ No newline at end of file
+76
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-0-7a9e67189d3d4151f23b12c22bde06b5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-3-47a16cf1686c81c5ba76fd92fa5e05a1
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_hadoop20-0-7a9e67189d3d4151f23b12c22bde06b5
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-3-47a16cf1686c81c5ba76fd92fa5e05a1
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-10-a5c8b73241600b6e2af8b3a41f5f5055 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-4-45b63361c1e5178b69a1531b238c8460
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_hadoop20-10-a5c8b73241600b6e2af8b3a41f5f5055
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-4-45b63361c1e5178b69a1531b238c8460
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-11-e25fd9e055710371ec90e0730c92f272 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-6-9e58f8a961723c40a5d1f742251a8fa5
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_hadoop20-11-e25fd9e055710371ec90e0730c92f272
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-6-9e58f8a961723c40a5d1f742251a8fa5
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-13-5ac3e540fd24f94fee378e49597817b3 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-7-d964114ed76536c8e3cacd231340851c
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_hadoop20-13-5ac3e540fd24f94fee378e49597817b3
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-7-d964114ed76536c8e3cacd231340851c
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-15-dd652175dac4463fed3c56aded11e6c1 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-8-e44aff8a860cf3965752d3e1ce725cde
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_hadoop20-15-dd652175dac4463fed3c56aded11e6c1
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-8-e44aff8a860cf3965752d3e1ce725cde
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-17-8e4598e3f0701478ed12042438699ce5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_8-9-ae582a255a02d91674aab500aee79e20
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_hadoop20-17-8e4598e3f0701478ed12042438699ce5
rename to sql/hive/src/test/resources/golden/auto_sortmerge_join_8-9-ae582a255a02d91674aab500aee79e20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-10-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-10-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-10-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-10-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-11-cda81d86d127fca0e2fbc2161e91400d b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-11-cda81d86d127fca0e2fbc2161e91400d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-11-cda81d86d127fca0e2fbc2161e91400d
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-11-cda81d86d127fca0e2fbc2161e91400d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-13-94538bc7322522a5534cafc0551d2189 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-13-94538bc7322522a5534cafc0551d2189
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-13-94538bc7322522a5534cafc0551d2189
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-13-94538bc7322522a5534cafc0551d2189
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-15-63261d35ddda973eeeb97b994ab7a476 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-15-63261d35ddda973eeeb97b994ab7a476
index 4a9735f855f96..ec7496a567609 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-15-63261d35ddda973eeeb97b994ab7a476
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-15-63261d35ddda973eeeb97b994ab7a476
@@ -3,4 +3,4 @@
 4	1
 5	9
 8	1
-9	1
\ No newline at end of file
+9	1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-17-d5183dfa8d9fb9175478fb1c2f2edb97 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-17-d5183dfa8d9fb9175478fb1c2f2edb97
index 62f9457511f87..1e8b314962144 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-17-d5183dfa8d9fb9175478fb1c2f2edb97
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-17-d5183dfa8d9fb9175478fb1c2f2edb97
@@ -1 +1 @@
-6
\ No newline at end of file
+6
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-19-2798f20aaf0fe5505c34b118e4b10bc5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-19-2798f20aaf0fe5505c34b118e4b10bc5
index 2eafac63a9a98..2a51623eae15c 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-19-2798f20aaf0fe5505c34b118e4b10bc5
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-19-2798f20aaf0fe5505c34b118e4b10bc5
@@ -3,4 +3,4 @@
 4	1	1
 5	9	9
 8	1	1
-9	1	1
\ No newline at end of file
+9	1	1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-21-21269869cd3aaf4ade2170d9017de018 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-21-21269869cd3aaf4ade2170d9017de018
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-21-21269869cd3aaf4ade2170d9017de018
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-21-21269869cd3aaf4ade2170d9017de018
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-23-4e0e8cd0626a84b21ca7d2f633623578 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-23-4e0e8cd0626a84b21ca7d2f633623578
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-23-4e0e8cd0626a84b21ca7d2f633623578
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-23-4e0e8cd0626a84b21ca7d2f633623578
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-25-2fe7b834b341bf18e36cd79dd00ec16a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-25-2fe7b834b341bf18e36cd79dd00ec16a
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-25-2fe7b834b341bf18e36cd79dd00ec16a
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-25-2fe7b834b341bf18e36cd79dd00ec16a
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-27-43ba2c72db9db1ec18d835ec978f8da1 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-27-43ba2c72db9db1ec18d835ec978f8da1
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-27-43ba2c72db9db1ec18d835ec978f8da1
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-27-43ba2c72db9db1ec18d835ec978f8da1
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-29-b66c416fdd98d76981f19e9c14b6a562 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-29-b66c416fdd98d76981f19e9c14b6a562
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-29-b66c416fdd98d76981f19e9c14b6a562
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-29-b66c416fdd98d76981f19e9c14b6a562
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-31-b889b147255231f7fe44bd57e1f8ba66 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-31-b889b147255231f7fe44bd57e1f8ba66
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-31-b889b147255231f7fe44bd57e1f8ba66
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-31-b889b147255231f7fe44bd57e1f8ba66
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-33-aa8d713ad4e19b72b5bd7628d60c295e b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-33-aa8d713ad4e19b72b5bd7628d60c295e
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-33-aa8d713ad4e19b72b5bd7628d60c295e
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-33-aa8d713ad4e19b72b5bd7628d60c295e
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-35-b9d66e78b8898a97a42d1118300fa0ce b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-35-b9d66e78b8898a97a42d1118300fa0ce
index 2ebc6516c7df1..f6b91e0e1f8dd 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-35-b9d66e78b8898a97a42d1118300fa0ce
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-35-b9d66e78b8898a97a42d1118300fa0ce
@@ -1 +1 @@
-56
\ No newline at end of file
+56
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-37-b0ca9e20cd48457e6cf1c313d5505213 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-37-b0ca9e20cd48457e6cf1c313d5505213
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-37-b0ca9e20cd48457e6cf1c313d5505213
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-37-b0ca9e20cd48457e6cf1c313d5505213
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-38-f135547e33c01d1f543c8b1349d60348 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-38-f135547e33c01d1f543c8b1349d60348
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-38-f135547e33c01d1f543c8b1349d60348
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-38-f135547e33c01d1f543c8b1349d60348
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-40-94538bc7322522a5534cafc0551d2189 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-40-94538bc7322522a5534cafc0551d2189
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-40-94538bc7322522a5534cafc0551d2189
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-40-94538bc7322522a5534cafc0551d2189
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-42-63261d35ddda973eeeb97b994ab7a476 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-42-63261d35ddda973eeeb97b994ab7a476
index 4a9735f855f96..ec7496a567609 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-42-63261d35ddda973eeeb97b994ab7a476
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-42-63261d35ddda973eeeb97b994ab7a476
@@ -3,4 +3,4 @@
 4	1
 5	9
 8	1
-9	1
\ No newline at end of file
+9	1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-44-d5183dfa8d9fb9175478fb1c2f2edb97 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-44-d5183dfa8d9fb9175478fb1c2f2edb97
index 62f9457511f87..1e8b314962144 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-44-d5183dfa8d9fb9175478fb1c2f2edb97
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-44-d5183dfa8d9fb9175478fb1c2f2edb97
@@ -1 +1 @@
-6
\ No newline at end of file
+6
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-46-2798f20aaf0fe5505c34b118e4b10bc5 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-46-2798f20aaf0fe5505c34b118e4b10bc5
index 2eafac63a9a98..2a51623eae15c 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-46-2798f20aaf0fe5505c34b118e4b10bc5
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-46-2798f20aaf0fe5505c34b118e4b10bc5
@@ -3,4 +3,4 @@
 4	1	1
 5	9	9
 8	1	1
-9	1	1
\ No newline at end of file
+9	1	1
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-48-21269869cd3aaf4ade2170d9017de018 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-48-21269869cd3aaf4ade2170d9017de018
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-48-21269869cd3aaf4ade2170d9017de018
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-48-21269869cd3aaf4ade2170d9017de018
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-50-4e0e8cd0626a84b21ca7d2f633623578 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-50-4e0e8cd0626a84b21ca7d2f633623578
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-50-4e0e8cd0626a84b21ca7d2f633623578
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-50-4e0e8cd0626a84b21ca7d2f633623578
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-52-2fe7b834b341bf18e36cd79dd00ec16a b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-52-2fe7b834b341bf18e36cd79dd00ec16a
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-52-2fe7b834b341bf18e36cd79dd00ec16a
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-52-2fe7b834b341bf18e36cd79dd00ec16a
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-54-43ba2c72db9db1ec18d835ec978f8da1 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-54-43ba2c72db9db1ec18d835ec978f8da1
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-54-43ba2c72db9db1ec18d835ec978f8da1
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-54-43ba2c72db9db1ec18d835ec978f8da1
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-56-b889b147255231f7fe44bd57e1f8ba66 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-56-b889b147255231f7fe44bd57e1f8ba66
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-56-b889b147255231f7fe44bd57e1f8ba66
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-56-b889b147255231f7fe44bd57e1f8ba66
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-58-aa8d713ad4e19b72b5bd7628d60c295e b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-58-aa8d713ad4e19b72b5bd7628d60c295e
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-58-aa8d713ad4e19b72b5bd7628d60c295e
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-58-aa8d713ad4e19b72b5bd7628d60c295e
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-60-b9d66e78b8898a97a42d1118300fa0ce b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-60-b9d66e78b8898a97a42d1118300fa0ce
index 2ebc6516c7df1..f6b91e0e1f8dd 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-60-b9d66e78b8898a97a42d1118300fa0ce
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-60-b9d66e78b8898a97a42d1118300fa0ce
@@ -1 +1 @@
-56
\ No newline at end of file
+56
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-62-b0ca9e20cd48457e6cf1c313d5505213 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-62-b0ca9e20cd48457e6cf1c313d5505213
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-62-b0ca9e20cd48457e6cf1c313d5505213
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-62-b0ca9e20cd48457e6cf1c313d5505213
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-7-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-7-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-7-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-7-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-8-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-8-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-8-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-8-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-9-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-9-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-9-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/auto_sortmerge_join_9-9-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ba_table1-2-7e72160489bbb59dadf24e0cc239a5f4 b/sql/hive/src/test/resources/golden/ba_table1-2-7e72160489bbb59dadf24e0cc239a5f4
deleted file mode 100644
index d0eea8a3ca661..0000000000000
--- a/sql/hive/src/test/resources/golden/ba_table1-2-7e72160489bbb59dadf24e0cc239a5f4
+++ /dev/null
@@ -1,4 +0,0 @@
-ba_key              	binary              	None                
-ba_val              	binary              	None                
-	 	 
-Detailed Table Information	Table(tableName:ba_test, dbName:default, owner:tnachen, createTime:1392426673, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:ba_key, type:binary, comment:null), FieldSchema(name:ba_val, type:binary, comment:null)], location:file:/tmp/sharkWarehouse2805388002645706641/ba_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1392426673}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/ba_table2-3-7e72160489bbb59dadf24e0cc239a5f4 b/sql/hive/src/test/resources/golden/ba_table2-3-7e72160489bbb59dadf24e0cc239a5f4
deleted file mode 100644
index 0a3c9f383a6ef..0000000000000
--- a/sql/hive/src/test/resources/golden/ba_table2-3-7e72160489bbb59dadf24e0cc239a5f4
+++ /dev/null
@@ -1,4 +0,0 @@
-ba_key              	binary              	from deserializer   
-ba_val              	binary              	from deserializer   
-	 	 
-Detailed Table Information	Table(tableName:ba_test, dbName:default, owner:tnachen, createTime:1392426674, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:ba_key, type:binary, comment:from deserializer), FieldSchema(name:ba_val, type:binary, comment:from deserializer)], location:file:/tmp/sharkWarehouse2805388002645706641/ba_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=tnachen, last_modified_time=1392426674, transient_lastDdlTime=1392426674}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/ba_table_union-2-7e72160489bbb59dadf24e0cc239a5f4 b/sql/hive/src/test/resources/golden/ba_table_union-2-7e72160489bbb59dadf24e0cc239a5f4
deleted file mode 100644
index 66d33b789f1a7..0000000000000
--- a/sql/hive/src/test/resources/golden/ba_table_union-2-7e72160489bbb59dadf24e0cc239a5f4
+++ /dev/null
@@ -1,4 +0,0 @@
-ba_key              	binary              	None                
-ba_val              	binary              	None                
-	 	 
-Detailed Table Information	Table(tableName:ba_test, dbName:default, owner:tnachen, createTime:1392426676, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:ba_key, type:binary, comment:null), FieldSchema(name:ba_val, type:binary, comment:null)], location:file:/tmp/sharkWarehouse2805388002645706641/ba_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1392426676}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/between-0-df3cf89fcf2ef64199a582fae14a3321 b/sql/hive/src/test/resources/golden/between-0-df3cf89fcf2ef64199a582fae14a3321
deleted file mode 100644
index dcd1d8643e3cb..0000000000000
--- a/sql/hive/src/test/resources/golden/between-0-df3cf89fcf2ef64199a582fae14a3321
+++ /dev/null
@@ -1 +0,0 @@
-2	val_2
diff --git a/sql/hive/src/test/resources/golden/escape2-2-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/binary_constant-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-2-86a409d8b868dc5f1a3bd1e04c2bc28c
rename to sql/hive/src/test/resources/golden/binary_constant-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/binary_constant-0-8c922b2264278dd481ef0dff2088e2b8 b/sql/hive/src/test/resources/golden/binary_constant-0-8c922b2264278dd481ef0dff2088e2b8
deleted file mode 100644
index 2e65efe2a145d..0000000000000
--- a/sql/hive/src/test/resources/golden/binary_constant-0-8c922b2264278dd481ef0dff2088e2b8
+++ /dev/null
@@ -1 +0,0 @@
-a
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/binary_constant-1-604fde0397444c5c9f1d70e6287de664 b/sql/hive/src/test/resources/golden/binary_constant-1-604fde0397444c5c9f1d70e6287de664
new file mode 100644
index 0000000000000..78981922613b2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/binary_constant-1-604fde0397444c5c9f1d70e6287de664
@@ -0,0 +1 @@
+a
diff --git a/sql/hive/src/test/resources/golden/binary_output_format-3-84db2ef4a7f2151e26457db559b862d9 b/sql/hive/src/test/resources/golden/binary_output_format-3-84db2ef4a7f2151e26457db559b862d9
deleted file mode 100644
index c5c8d29fdd13e..0000000000000
--- a/sql/hive/src/test/resources/golden/binary_output_format-3-84db2ef4a7f2151e26457db559b862d9
+++ /dev/null
@@ -1,500 +0,0 @@
-238	val_238
-86	val_86
-311	val_311
-27	val_27
-165	val_165
-409	val_409
-255	val_255
-278	val_278
-98	val_98
-484	val_484
-265	val_265
-193	val_193
-401	val_401
-150	val_150
-273	val_273
-224	val_224
-369	val_369
-66	val_66
-128	val_128
-213	val_213
-146	val_146
-406	val_406
-429	val_429
-374	val_374
-152	val_152
-469	val_469
-145	val_145
-495	val_495
-37	val_37
-327	val_327
-281	val_281
-277	val_277
-209	val_209
-15	val_15
-82	val_82
-403	val_403
-166	val_166
-417	val_417
-430	val_430
-252	val_252
-292	val_292
-219	val_219
-287	val_287
-153	val_153
-193	val_193
-338	val_338
-446	val_446
-459	val_459
-394	val_394
-237	val_237
-482	val_482
-174	val_174
-413	val_413
-494	val_494
-207	val_207
-199	val_199
-466	val_466
-208	val_208
-174	val_174
-399	val_399
-396	val_396
-247	val_247
-417	val_417
-489	val_489
-162	val_162
-377	val_377
-397	val_397
-309	val_309
-365	val_365
-266	val_266
-439	val_439
-342	val_342
-367	val_367
-325	val_325
-167	val_167
-195	val_195
-475	val_475
-17	val_17
-113	val_113
-155	val_155
-203	val_203
-339	val_339
-0	val_0
-455	val_455
-128	val_128
-311	val_311
-316	val_316
-57	val_57
-302	val_302
-205	val_205
-149	val_149
-438	val_438
-345	val_345
-129	val_129
-170	val_170
-20	val_20
-489	val_489
-157	val_157
-378	val_378
-221	val_221
-92	val_92
-111	val_111
-47	val_47
-72	val_72
-4	val_4
-280	val_280
-35	val_35
-427	val_427
-277	val_277
-208	val_208
-356	val_356
-399	val_399
-169	val_169
-382	val_382
-498	val_498
-125	val_125
-386	val_386
-437	val_437
-469	val_469
-192	val_192
-286	val_286
-187	val_187
-176	val_176
-54	val_54
-459	val_459
-51	val_51
-138	val_138
-103	val_103
-239	val_239
-213	val_213
-216	val_216
-430	val_430
-278	val_278
-176	val_176
-289	val_289
-221	val_221
-65	val_65
-318	val_318
-332	val_332
-311	val_311
-275	val_275
-137	val_137
-241	val_241
-83	val_83
-333	val_333
-180	val_180
-284	val_284
-12	val_12
-230	val_230
-181	val_181
-67	val_67
-260	val_260
-404	val_404
-384	val_384
-489	val_489
-353	val_353
-373	val_373
-272	val_272
-138	val_138
-217	val_217
-84	val_84
-348	val_348
-466	val_466
-58	val_58
-8	val_8
-411	val_411
-230	val_230
-208	val_208
-348	val_348
-24	val_24
-463	val_463
-431	val_431
-179	val_179
-172	val_172
-42	val_42
-129	val_129
-158	val_158
-119	val_119
-496	val_496
-0	val_0
-322	val_322
-197	val_197
-468	val_468
-393	val_393
-454	val_454
-100	val_100
-298	val_298
-199	val_199
-191	val_191
-418	val_418
-96	val_96
-26	val_26
-165	val_165
-327	val_327
-230	val_230
-205	val_205
-120	val_120
-131	val_131
-51	val_51
-404	val_404
-43	val_43
-436	val_436
-156	val_156
-469	val_469
-468	val_468
-308	val_308
-95	val_95
-196	val_196
-288	val_288
-481	val_481
-457	val_457
-98	val_98
-282	val_282
-197	val_197
-187	val_187
-318	val_318
-318	val_318
-409	val_409
-470	val_470
-137	val_137
-369	val_369
-316	val_316
-169	val_169
-413	val_413
-85	val_85
-77	val_77
-0	val_0
-490	val_490
-87	val_87
-364	val_364
-179	val_179
-118	val_118
-134	val_134
-395	val_395
-282	val_282
-138	val_138
-238	val_238
-419	val_419
-15	val_15
-118	val_118
-72	val_72
-90	val_90
-307	val_307
-19	val_19
-435	val_435
-10	val_10
-277	val_277
-273	val_273
-306	val_306
-224	val_224
-309	val_309
-389	val_389
-327	val_327
-242	val_242
-369	val_369
-392	val_392
-272	val_272
-331	val_331
-401	val_401
-242	val_242
-452	val_452
-177	val_177
-226	val_226
-5	val_5
-497	val_497
-402	val_402
-396	val_396
-317	val_317
-395	val_395
-58	val_58
-35	val_35
-336	val_336
-95	val_95
-11	val_11
-168	val_168
-34	val_34
-229	val_229
-233	val_233
-143	val_143
-472	val_472
-322	val_322
-498	val_498
-160	val_160
-195	val_195
-42	val_42
-321	val_321
-430	val_430
-119	val_119
-489	val_489
-458	val_458
-78	val_78
-76	val_76
-41	val_41
-223	val_223
-492	val_492
-149	val_149
-449	val_449
-218	val_218
-228	val_228
-138	val_138
-453	val_453
-30	val_30
-209	val_209
-64	val_64
-468	val_468
-76	val_76
-74	val_74
-342	val_342
-69	val_69
-230	val_230
-33	val_33
-368	val_368
-103	val_103
-296	val_296
-113	val_113
-216	val_216
-367	val_367
-344	val_344
-167	val_167
-274	val_274
-219	val_219
-239	val_239
-485	val_485
-116	val_116
-223	val_223
-256	val_256
-263	val_263
-70	val_70
-487	val_487
-480	val_480
-401	val_401
-288	val_288
-191	val_191
-5	val_5
-244	val_244
-438	val_438
-128	val_128
-467	val_467
-432	val_432
-202	val_202
-316	val_316
-229	val_229
-469	val_469
-463	val_463
-280	val_280
-2	val_2
-35	val_35
-283	val_283
-331	val_331
-235	val_235
-80	val_80
-44	val_44
-193	val_193
-321	val_321
-335	val_335
-104	val_104
-466	val_466
-366	val_366
-175	val_175
-403	val_403
-483	val_483
-53	val_53
-105	val_105
-257	val_257
-406	val_406
-409	val_409
-190	val_190
-406	val_406
-401	val_401
-114	val_114
-258	val_258
-90	val_90
-203	val_203
-262	val_262
-348	val_348
-424	val_424
-12	val_12
-396	val_396
-201	val_201
-217	val_217
-164	val_164
-431	val_431
-454	val_454
-478	val_478
-298	val_298
-125	val_125
-431	val_431
-164	val_164
-424	val_424
-187	val_187
-382	val_382
-5	val_5
-70	val_70
-397	val_397
-480	val_480
-291	val_291
-24	val_24
-351	val_351
-255	val_255
-104	val_104
-70	val_70
-163	val_163
-438	val_438
-119	val_119
-414	val_414
-200	val_200
-491	val_491
-237	val_237
-439	val_439
-360	val_360
-248	val_248
-479	val_479
-305	val_305
-417	val_417
-199	val_199
-444	val_444
-120	val_120
-429	val_429
-169	val_169
-443	val_443
-323	val_323
-325	val_325
-277	val_277
-230	val_230
-478	val_478
-178	val_178
-468	val_468
-310	val_310
-317	val_317
-333	val_333
-493	val_493
-460	val_460
-207	val_207
-249	val_249
-265	val_265
-480	val_480
-83	val_83
-136	val_136
-353	val_353
-172	val_172
-214	val_214
-462	val_462
-233	val_233
-406	val_406
-133	val_133
-175	val_175
-189	val_189
-454	val_454
-375	val_375
-401	val_401
-421	val_421
-407	val_407
-384	val_384
-256	val_256
-26	val_26
-134	val_134
-67	val_67
-384	val_384
-379	val_379
-18	val_18
-462	val_462
-492	val_492
-100	val_100
-298	val_298
-9	val_9
-341	val_341
-498	val_498
-146	val_146
-458	val_458
-362	val_362
-186	val_186
-285	val_285
-348	val_348
-167	val_167
-18	val_18
-273	val_273
-183	val_183
-281	val_281
-344	val_344
-97	val_97
-469	val_469
-315	val_315
-84	val_84
-28	val_28
-37	val_37
-448	val_448
-152	val_152
-348	val_348
-307	val_307
-194	val_194
-414	val_414
-477	val_477
-222	val_222
-126	val_126
-90	val_90
-169	val_169
-403	val_403
-400	val_400
-200	val_200
-97	val_97
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/binary_table_bincolserde-3-7e72160489bbb59dadf24e0cc239a5f4 b/sql/hive/src/test/resources/golden/binary_table_bincolserde-3-7e72160489bbb59dadf24e0cc239a5f4
deleted file mode 100644
index df14abbcc193f..0000000000000
--- a/sql/hive/src/test/resources/golden/binary_table_bincolserde-3-7e72160489bbb59dadf24e0cc239a5f4
+++ /dev/null
@@ -1,4 +0,0 @@
-ba_key              	binary              	from deserializer   
-ba_val              	binary              	from deserializer   
-	 	 
-Detailed Table Information	Table(tableName:ba_test, dbName:default, owner:tnachen, createTime:1392426678, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:ba_key, type:binary, comment:from deserializer), FieldSchema(name:ba_val, type:binary, comment:from deserializer)], location:file:/tmp/sharkWarehouse2805388002645706641/ba_test, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=tnachen, last_modified_time=1392426678, transient_lastDdlTime=1392426678}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/binary_table_colserde-2-7e72160489bbb59dadf24e0cc239a5f4 b/sql/hive/src/test/resources/golden/binary_table_colserde-2-7e72160489bbb59dadf24e0cc239a5f4
deleted file mode 100644
index 0c246ae33e56f..0000000000000
--- a/sql/hive/src/test/resources/golden/binary_table_colserde-2-7e72160489bbb59dadf24e0cc239a5f4
+++ /dev/null
@@ -1,4 +0,0 @@
-ba_key              	binary              	from deserializer   
-ba_val              	binary              	from deserializer   
-	 	 
-Detailed Table Information	Table(tableName:ba_test, dbName:default, owner:tnachen, createTime:1392426679, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:ba_key, type:binary, comment:null), FieldSchema(name:ba_val, type:binary, comment:null)], location:file:/tmp/sharkWarehouse2805388002645706641/ba_test, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1392426679}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/combine2_win-10-66ee1339a5a8cc224e83d583acc709c4 b/sql/hive/src/test/resources/golden/binarysortable_1-1-aca7ae366662c9698b5d2c01a6cb3948
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_win-10-66ee1339a5a8cc224e83d583acc709c4
rename to sql/hive/src/test/resources/golden/binarysortable_1-1-aca7ae366662c9698b5d2c01a6cb3948
diff --git a/sql/hive/src/test/resources/golden/binarysortable_1-3-d6518380547e7eef338886f3bdc7bdd2 b/sql/hive/src/test/resources/golden/binarysortable_1-3-d6518380547e7eef338886f3bdc7bdd2
index 27687b47813a9..d6ee76110e8a9 100644
--- a/sql/hive/src/test/resources/golden/binarysortable_1-3-d6518380547e7eef338886f3bdc7bdd2
+++ b/sql/hive/src/test/resources/golden/binarysortable_1-3-d6518380547e7eef338886f3bdc7bdd2
@@ -7,4 +7,4 @@
 a^@bc^A^B^A^@	1.0
 test^@^@^A^Atest	6.0
 test^@test	4.0
-test^Atest	5.0
\ No newline at end of file
+test^Atest	5.0
diff --git a/sql/hive/src/test/resources/golden/cast1-3-18dc2ce8a8b2486d268bceef63aa0c2a b/sql/hive/src/test/resources/golden/cast1-3-18dc2ce8a8b2486d268bceef63aa0c2a
index 9bdd310949be8..b62097939c833 100644
--- a/sql/hive/src/test/resources/golden/cast1-3-18dc2ce8a8b2486d268bceef63aa0c2a
+++ b/sql/hive/src/test/resources/golden/cast1-3-18dc2ce8a8b2486d268bceef63aa0c2a
@@ -1 +1 @@
-5	5.0	5.0	5.0	5	true	1
\ No newline at end of file
+5	5.0	5.0	5.0	5	true	1
diff --git a/sql/hive/src/test/resources/golden/cast_to_int-0-ec8e07c04f0e9bc9bb34db97ee6faa98 b/sql/hive/src/test/resources/golden/cast_to_int-0-ec8e07c04f0e9bc9bb34db97ee6faa98
deleted file mode 100644
index bbe268ea91ddf..0000000000000
--- a/sql/hive/src/test/resources/golden/cast_to_int-0-ec8e07c04f0e9bc9bb34db97ee6faa98
+++ /dev/null
@@ -1 +0,0 @@
-1.0	1.4	1.6	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	NULL	2147483647	-2147483648	32767	-32768	-128	127	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-12-5ac3e540fd24f94fee378e49597817b3 b/sql/hive/src/test/resources/golden/char_varchar_udf-0-4de1b2fa0908a3d856474aae2bc38c08
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_win-12-5ac3e540fd24f94fee378e49597817b3
rename to sql/hive/src/test/resources/golden/char_varchar_udf-0-4de1b2fa0908a3d856474aae2bc38c08
diff --git a/sql/hive/src/test/resources/golden/combine2_win-14-dd652175dac4463fed3c56aded11e6c1 b/sql/hive/src/test/resources/golden/char_varchar_udf-1-5b1e7c580ed5e756d30a4c557af8902
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_win-14-dd652175dac4463fed3c56aded11e6c1
rename to sql/hive/src/test/resources/golden/char_varchar_udf-1-5b1e7c580ed5e756d30a4c557af8902
diff --git a/sql/hive/src/test/resources/golden/combine2_win-16-8e4598e3f0701478ed12042438699ce5 b/sql/hive/src/test/resources/golden/char_varchar_udf-2-880ce74a83bb2bfb9c9bd584b8874ac
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_win-16-8e4598e3f0701478ed12042438699ce5
rename to sql/hive/src/test/resources/golden/char_varchar_udf-2-880ce74a83bb2bfb9c9bd584b8874ac
diff --git a/sql/hive/src/test/resources/golden/cluster-1-707a2295731e0d631a6c5f71c745c8d5 b/sql/hive/src/test/resources/golden/cluster-1-707a2295731e0d631a6c5f71c745c8d5
index 87d0f8dd52b68..ae8b7551d9630 100644
--- a/sql/hive/src/test/resources/golden/cluster-1-707a2295731e0d631a6c5f71c745c8d5
+++ b/sql/hive/src/test/resources/golden/cluster-1-707a2295731e0d631a6c5f71c745c8d5
@@ -1 +1 @@
-10	val_10
\ No newline at end of file
+10	val_10
diff --git a/sql/hive/src/test/resources/golden/cluster-11-dcf78a6537ba2b4d4b828a9a27cf545e b/sql/hive/src/test/resources/golden/cluster-11-dcf78a6537ba2b4d4b828a9a27cf545e
index 54864d264245d..a9169afab64e7 100644
--- a/sql/hive/src/test/resources/golden/cluster-11-dcf78a6537ba2b4d4b828a9a27cf545e
+++ b/sql/hive/src/test/resources/golden/cluster-11-dcf78a6537ba2b4d4b828a9a27cf545e
@@ -1 +1 @@
-20	val_20
\ No newline at end of file
+20	val_20
diff --git a/sql/hive/src/test/resources/golden/cluster-13-12635b4b7f34eba5554d5b892b5b64e7 b/sql/hive/src/test/resources/golden/cluster-13-12635b4b7f34eba5554d5b892b5b64e7
index 54864d264245d..a9169afab64e7 100644
--- a/sql/hive/src/test/resources/golden/cluster-13-12635b4b7f34eba5554d5b892b5b64e7
+++ b/sql/hive/src/test/resources/golden/cluster-13-12635b4b7f34eba5554d5b892b5b64e7
@@ -1 +1 @@
-20	val_20
\ No newline at end of file
+20	val_20
diff --git a/sql/hive/src/test/resources/golden/cluster-15-b4c15c85c18f310f1b5bc56a78ad94b0 b/sql/hive/src/test/resources/golden/cluster-15-b4c15c85c18f310f1b5bc56a78ad94b0
index 54864d264245d..a9169afab64e7 100644
--- a/sql/hive/src/test/resources/golden/cluster-15-b4c15c85c18f310f1b5bc56a78ad94b0
+++ b/sql/hive/src/test/resources/golden/cluster-15-b4c15c85c18f310f1b5bc56a78ad94b0
@@ -1 +1 @@
-20	val_20
\ No newline at end of file
+20	val_20
diff --git a/sql/hive/src/test/resources/golden/cluster-17-62979aa9e6b4e6ffb44ec452aabbef65 b/sql/hive/src/test/resources/golden/cluster-17-62979aa9e6b4e6ffb44ec452aabbef65
index 1a018b640eb6f..e2ef71e141c46 100644
--- a/sql/hive/src/test/resources/golden/cluster-17-62979aa9e6b4e6ffb44ec452aabbef65
+++ b/sql/hive/src/test/resources/golden/cluster-17-62979aa9e6b4e6ffb44ec452aabbef65
@@ -1 +1 @@
-20	val_20	20
\ No newline at end of file
+20	val_20	20
diff --git a/sql/hive/src/test/resources/golden/cluster-19-e5284c7a7c36ee55740bd127ef4bf8c7 b/sql/hive/src/test/resources/golden/cluster-19-e5284c7a7c36ee55740bd127ef4bf8c7
index 333a4cf9af123..5b04be3cdf046 100644
--- a/sql/hive/src/test/resources/golden/cluster-19-e5284c7a7c36ee55740bd127ef4bf8c7
+++ b/sql/hive/src/test/resources/golden/cluster-19-e5284c7a7c36ee55740bd127ef4bf8c7
@@ -1 +1 @@
-20	val_20	20	val_20
\ No newline at end of file
+20	val_20	20	val_20
diff --git a/sql/hive/src/test/resources/golden/cluster-21-4787b258a786cf195bcb59cd90f6013f b/sql/hive/src/test/resources/golden/cluster-21-4787b258a786cf195bcb59cd90f6013f
index 333a4cf9af123..5b04be3cdf046 100644
--- a/sql/hive/src/test/resources/golden/cluster-21-4787b258a786cf195bcb59cd90f6013f
+++ b/sql/hive/src/test/resources/golden/cluster-21-4787b258a786cf195bcb59cd90f6013f
@@ -1 +1 @@
-20	val_20	20	val_20
\ No newline at end of file
+20	val_20	20	val_20
diff --git a/sql/hive/src/test/resources/golden/cluster-23-b66ed6ead4deecd49f0f67de1f2bab2e b/sql/hive/src/test/resources/golden/cluster-23-b66ed6ead4deecd49f0f67de1f2bab2e
index 1a018b640eb6f..e2ef71e141c46 100644
--- a/sql/hive/src/test/resources/golden/cluster-23-b66ed6ead4deecd49f0f67de1f2bab2e
+++ b/sql/hive/src/test/resources/golden/cluster-23-b66ed6ead4deecd49f0f67de1f2bab2e
@@ -1 +1 @@
-20	val_20	20
\ No newline at end of file
+20	val_20	20
diff --git a/sql/hive/src/test/resources/golden/cluster-25-f57ce48b6a6e671b58c96535ab482b6a b/sql/hive/src/test/resources/golden/cluster-25-f57ce48b6a6e671b58c96535ab482b6a
index a79654385b09f..764cbe5f62b60 100644
--- a/sql/hive/src/test/resources/golden/cluster-25-f57ce48b6a6e671b58c96535ab482b6a
+++ b/sql/hive/src/test/resources/golden/cluster-25-f57ce48b6a6e671b58c96535ab482b6a
@@ -495,4 +495,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/cluster-3-530671e2a5b8983c60cfedaf486f0f0f b/sql/hive/src/test/resources/golden/cluster-3-530671e2a5b8983c60cfedaf486f0f0f
index 54864d264245d..a9169afab64e7 100644
--- a/sql/hive/src/test/resources/golden/cluster-3-530671e2a5b8983c60cfedaf486f0f0f
+++ b/sql/hive/src/test/resources/golden/cluster-3-530671e2a5b8983c60cfedaf486f0f0f
@@ -1 +1 @@
-20	val_20
\ No newline at end of file
+20	val_20
diff --git a/sql/hive/src/test/resources/golden/cluster-5-e99040f6a24c53a4c89801ff3663ff72 b/sql/hive/src/test/resources/golden/cluster-5-e99040f6a24c53a4c89801ff3663ff72
index 54864d264245d..a9169afab64e7 100644
--- a/sql/hive/src/test/resources/golden/cluster-5-e99040f6a24c53a4c89801ff3663ff72
+++ b/sql/hive/src/test/resources/golden/cluster-5-e99040f6a24c53a4c89801ff3663ff72
@@ -1 +1 @@
-20	val_20
\ No newline at end of file
+20	val_20
diff --git a/sql/hive/src/test/resources/golden/cluster-7-a22600d60c81a25061b1e20b6726c691 b/sql/hive/src/test/resources/golden/cluster-7-a22600d60c81a25061b1e20b6726c691
index 54864d264245d..a9169afab64e7 100644
--- a/sql/hive/src/test/resources/golden/cluster-7-a22600d60c81a25061b1e20b6726c691
+++ b/sql/hive/src/test/resources/golden/cluster-7-a22600d60c81a25061b1e20b6726c691
@@ -1 +1 @@
-20	val_20
\ No newline at end of file
+20	val_20
diff --git a/sql/hive/src/test/resources/golden/cluster-9-cc36ac7ee5e8c6ea21b956abbc2506e2 b/sql/hive/src/test/resources/golden/cluster-9-cc36ac7ee5e8c6ea21b956abbc2506e2
index 54864d264245d..a9169afab64e7 100644
--- a/sql/hive/src/test/resources/golden/cluster-9-cc36ac7ee5e8c6ea21b956abbc2506e2
+++ b/sql/hive/src/test/resources/golden/cluster-9-cc36ac7ee5e8c6ea21b956abbc2506e2
@@ -1 +1 @@
-20	val_20
\ No newline at end of file
+20	val_20
diff --git a/sql/hive/src/test/resources/golden/columnarserde_create_shortcut-3-a66dbbe24c9eb33f40dd353ed5a5c14f b/sql/hive/src/test/resources/golden/columnarserde_create_shortcut-3-a66dbbe24c9eb33f40dd353ed5a5c14f
deleted file mode 100644
index 15e92afeeca27..0000000000000
--- a/sql/hive/src/test/resources/golden/columnarserde_create_shortcut-3-a66dbbe24c9eb33f40dd353ed5a5c14f
+++ /dev/null
@@ -1,11 +0,0 @@
-[0,0,0]	["0","0","0"]	{"key_0":"value_0"}	1712634731	record_0
-[1,2,3]	["10","100","1000"]	{"key_1":"value_1"}	465985200	record_1
-[2,4,6]	["20","200","2000"]	{"key_2":"value_2"}	-751827638	record_2
-[3,6,9]	["30","300","3000"]	{"key_3":"value_3"}	477111222	record_3
-[4,8,12]	["40","400","4000"]	{"key_4":"value_4"}	-734328909	record_4
-[5,10,15]	["50","500","5000"]	{"key_5":"value_5"}	-1952710710	record_5
-[6,12,18]	["60","600","6000"]	{"key_6":"value_6"}	1244525190	record_6
-[7,14,21]	["70","700","7000"]	{"key_7":"value_7"}	-1461153973	record_7
-[8,16,24]	["80","800","8000"]	{"key_8":"value_8"}	1638581578	record_8
-[9,18,27]	["90","900","9000"]	{"key_9":"value_9"}	336964413	record_9
-NULL	NULL	NULL	0	NULL
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-4-30d92b61681b9ae7786ed46c3c3e808 b/sql/hive/src/test/resources/golden/columnstats_partlvl-4-30d92b61681b9ae7786ed46c3c3e808
deleted file mode 100644
index 6ad4db1788424..0000000000000
--- a/sql/hive/src/test/resources/golden/columnstats_partlvl-4-30d92b61681b9ae7786ed46c3c3e808
+++ /dev/null
@@ -1,58 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME Employee_Part) (TOK_PARTSPEC (TOK_PARTVAL employeeSalary 2000.0))) (TOK_TABCOLNAME employeeID))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        employee_part 
-          TableScan
-            alias: employee_part
-            Select Operator
-              expressions:
-                    expr: employeeid
-                    type: int
-              outputColumnNames: employeeid
-              Group By Operator
-                aggregations:
-                      expr: compute_stats(employeeid, 16)
-                bucketGroup: false
-                mode: hash
-                outputColumnNames: _col0
-                Reduce Output Operator
-                  sort order: 
-                  tag: -1
-                  value expressions:
-                        expr: _col0
-                        type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations:
-                expr: compute_stats(VALUE._col0)
-          bucketGroup: false
-          mode: mergepartial
-          outputColumnNames: _col0
-          Select Operator
-            expressions:
-                  expr: _col0
-                  type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-            outputColumnNames: _col0
-            File Output Operator
-              compressed: false
-              GlobalTableId: 0
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-
-  Stage: Stage-1
-    Column Stats Work
-      Column Stats Desc:
-          Columns: employeeID
-          Column Types: int
-          Partition: employeesalary=2000.0
-          Table: Employee_Part
-
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-5-f37a302cb19b4fe1c8280c08153294a3 b/sql/hive/src/test/resources/golden/columnstats_partlvl-5-f37a302cb19b4fe1c8280c08153294a3
deleted file mode 100644
index 91ce2a521cde1..0000000000000
--- a/sql/hive/src/test/resources/golden/columnstats_partlvl-5-f37a302cb19b4fe1c8280c08153294a3
+++ /dev/null
@@ -1,129 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME Employee_Part) (TOK_PARTSPEC (TOK_PARTVAL employeeSalary 2000.0))) (TOK_TABCOLNAME employeeID))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        employee_part 
-          TableScan
-            alias: employee_part
-            GatherStats: false
-            Select Operator
-              expressions:
-                    expr: employeeid
-                    type: int
-              outputColumnNames: employeeid
-              Group By Operator
-                aggregations:
-                      expr: compute_stats(employeeid, 16)
-                bucketGroup: false
-                mode: hash
-                outputColumnNames: _col0
-                Reduce Output Operator
-                  sort order: 
-                  tag: -1
-                  value expressions:
-                        expr: _col0
-                        type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-      Path -> Alias:
-        file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/employee_part/employeesalary=2000.0 [employee_part]
-      Path -> Partition:
-        file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/employee_part/employeesalary=2000.0 
-          Partition
-            base file name: employeesalary=2000.0
-            input format: org.apache.hadoop.mapred.TextInputFormat
-            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-            partition values:
-              employeesalary 2000.0
-            properties:
-              bucket_count -1
-              columns employeeid,employeename
-              columns.types int:string
-              field.delim |
-              file.inputformat org.apache.hadoop.mapred.TextInputFormat
-              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              location file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/employee_part/employeesalary=2000.0
-              name default.employee_part
-              numFiles 1
-              numRows 0
-              partition_columns employeesalary
-              rawDataSize 0
-              serialization.ddl struct employee_part { i32 employeeid, string employeename}
-              serialization.format |
-              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              totalSize 105
-              transient_lastDdlTime 1389728706
-            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-          
-              input format: org.apache.hadoop.mapred.TextInputFormat
-              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              properties:
-                bucket_count -1
-                columns employeeid,employeename
-                columns.types int:string
-                field.delim |
-                file.inputformat org.apache.hadoop.mapred.TextInputFormat
-                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                location file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/employee_part
-                name default.employee_part
-                numFiles 2
-                numPartitions 2
-                numRows 0
-                partition_columns employeesalary
-                rawDataSize 0
-                serialization.ddl struct employee_part { i32 employeeid, string employeename}
-                serialization.format |
-                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                totalSize 210
-                transient_lastDdlTime 1389728706
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.employee_part
-            name: default.employee_part
-      Truncated Path -> Alias:
-        /employee_part/employeesalary=2000.0 [employee_part]
-      Needs Tagging: false
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations:
-                expr: compute_stats(VALUE._col0)
-          bucketGroup: false
-          mode: mergepartial
-          outputColumnNames: _col0
-          Select Operator
-            expressions:
-                  expr: _col0
-                  type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-            outputColumnNames: _col0
-            File Output Operator
-              compressed: false
-              GlobalTableId: 0
-              directory: file:/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/marmbrus/hive_2014-01-14_11-45-07_241_106202206012377173-1/-ext-10001
-              NumFilesPerFileSink: 1
-              Stats Publishing Key Prefix: file:/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/marmbrus/hive_2014-01-14_11-45-07_241_106202206012377173-1/-ext-10001/
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    columns _col0
-                    columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-                    escape.delim \
-                    hive.serialization.extend.nesting.levels true
-                    serialization.format 1
-              TotalFiles: 1
-              GatherStats: false
-              MultiFileSpray: false
-
-  Stage: Stage-1
-    Column Stats Work
-      Column Stats Desc:
-          Columns: employeeID
-          Column Types: int
-          Partition: employeesalary=2000.0
-          Table: Employee_Part
-          Is Table Level Stats: false
-
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-7-1f91b01f40c5e87aa33ceb9b5fa0b2f1 b/sql/hive/src/test/resources/golden/columnstats_partlvl-7-1f91b01f40c5e87aa33ceb9b5fa0b2f1
deleted file mode 100644
index 777024f6946e3..0000000000000
--- a/sql/hive/src/test/resources/golden/columnstats_partlvl-7-1f91b01f40c5e87aa33ceb9b5fa0b2f1
+++ /dev/null
@@ -1,58 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME Employee_Part) (TOK_PARTSPEC (TOK_PARTVAL employeeSalary 4000.0))) (TOK_TABCOLNAME employeeID))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        employee_part 
-          TableScan
-            alias: employee_part
-            Select Operator
-              expressions:
-                    expr: employeeid
-                    type: int
-              outputColumnNames: employeeid
-              Group By Operator
-                aggregations:
-                      expr: compute_stats(employeeid, 16)
-                bucketGroup: false
-                mode: hash
-                outputColumnNames: _col0
-                Reduce Output Operator
-                  sort order: 
-                  tag: -1
-                  value expressions:
-                        expr: _col0
-                        type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations:
-                expr: compute_stats(VALUE._col0)
-          bucketGroup: false
-          mode: mergepartial
-          outputColumnNames: _col0
-          Select Operator
-            expressions:
-                  expr: _col0
-                  type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-            outputColumnNames: _col0
-            File Output Operator
-              compressed: false
-              GlobalTableId: 0
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-
-  Stage: Stage-1
-    Column Stats Work
-      Column Stats Desc:
-          Columns: employeeID
-          Column Types: int
-          Partition: employeesalary=4000.0
-          Table: Employee_Part
-
diff --git a/sql/hive/src/test/resources/golden/columnstats_partlvl-8-dc5682403f4154cef30860f2b4e37bce b/sql/hive/src/test/resources/golden/columnstats_partlvl-8-dc5682403f4154cef30860f2b4e37bce
deleted file mode 100644
index cd72c7efbf56f..0000000000000
--- a/sql/hive/src/test/resources/golden/columnstats_partlvl-8-dc5682403f4154cef30860f2b4e37bce
+++ /dev/null
@@ -1,129 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME Employee_Part) (TOK_PARTSPEC (TOK_PARTVAL employeeSalary 4000.0))) (TOK_TABCOLNAME employeeID))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        employee_part 
-          TableScan
-            alias: employee_part
-            GatherStats: false
-            Select Operator
-              expressions:
-                    expr: employeeid
-                    type: int
-              outputColumnNames: employeeid
-              Group By Operator
-                aggregations:
-                      expr: compute_stats(employeeid, 16)
-                bucketGroup: false
-                mode: hash
-                outputColumnNames: _col0
-                Reduce Output Operator
-                  sort order: 
-                  tag: -1
-                  value expressions:
-                        expr: _col0
-                        type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-      Path -> Alias:
-        file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/employee_part/employeesalary=4000.0 [employee_part]
-      Path -> Partition:
-        file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/employee_part/employeesalary=4000.0 
-          Partition
-            base file name: employeesalary=4000.0
-            input format: org.apache.hadoop.mapred.TextInputFormat
-            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-            partition values:
-              employeesalary 4000.0
-            properties:
-              bucket_count -1
-              columns employeeid,employeename
-              columns.types int:string
-              field.delim |
-              file.inputformat org.apache.hadoop.mapred.TextInputFormat
-              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              location file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/employee_part/employeesalary=4000.0
-              name default.employee_part
-              numFiles 1
-              numRows 0
-              partition_columns employeesalary
-              rawDataSize 0
-              serialization.ddl struct employee_part { i32 employeeid, string employeename}
-              serialization.format |
-              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              totalSize 105
-              transient_lastDdlTime 1389728706
-            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-          
-              input format: org.apache.hadoop.mapred.TextInputFormat
-              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              properties:
-                bucket_count -1
-                columns employeeid,employeename
-                columns.types int:string
-                field.delim |
-                file.inputformat org.apache.hadoop.mapred.TextInputFormat
-                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                location file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/employee_part
-                name default.employee_part
-                numFiles 2
-                numPartitions 2
-                numRows 0
-                partition_columns employeesalary
-                rawDataSize 0
-                serialization.ddl struct employee_part { i32 employeeid, string employeename}
-                serialization.format |
-                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                totalSize 210
-                transient_lastDdlTime 1389728706
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.employee_part
-            name: default.employee_part
-      Truncated Path -> Alias:
-        /employee_part/employeesalary=4000.0 [employee_part]
-      Needs Tagging: false
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations:
-                expr: compute_stats(VALUE._col0)
-          bucketGroup: false
-          mode: mergepartial
-          outputColumnNames: _col0
-          Select Operator
-            expressions:
-                  expr: _col0
-                  type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-            outputColumnNames: _col0
-            File Output Operator
-              compressed: false
-              GlobalTableId: 0
-              directory: file:/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/marmbrus/hive_2014-01-14_11-45-24_849_6968895828655634809-1/-ext-10001
-              NumFilesPerFileSink: 1
-              Stats Publishing Key Prefix: file:/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/marmbrus/hive_2014-01-14_11-45-24_849_6968895828655634809-1/-ext-10001/
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    columns _col0
-                    columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-                    escape.delim \
-                    hive.serialization.extend.nesting.levels true
-                    serialization.format 1
-              TotalFiles: 1
-              GatherStats: false
-              MultiFileSpray: false
-
-  Stage: Stage-1
-    Column Stats Work
-      Column Stats Desc:
-          Columns: employeeID
-          Column Types: int
-          Partition: employeesalary=4000.0
-          Table: Employee_Part
-          Is Table Level Stats: false
-
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-3-7c45bd1125420b85a0374fecbf947a95 b/sql/hive/src/test/resources/golden/columnstats_tbllvl-3-7c45bd1125420b85a0374fecbf947a95
deleted file mode 100644
index d1e5e7375467d..0000000000000
--- a/sql/hive/src/test/resources/golden/columnstats_tbllvl-3-7c45bd1125420b85a0374fecbf947a95
+++ /dev/null
@@ -1,73 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME UserVisits_web_text_none)) (TOK_TABCOLNAME sourceIP avgTimeOnSite adRevenue))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        uservisits_web_text_none 
-          TableScan
-            alias: uservisits_web_text_none
-            Select Operator
-              expressions:
-                    expr: sourceip
-                    type: string
-                    expr: avgtimeonsite
-                    type: int
-                    expr: adrevenue
-                    type: float
-              outputColumnNames: sourceip, avgtimeonsite, adrevenue
-              Group By Operator
-                aggregations:
-                      expr: compute_stats(sourceip, 16)
-                      expr: compute_stats(avgtimeonsite, 16)
-                      expr: compute_stats(adrevenue, 16)
-                bucketGroup: false
-                mode: hash
-                outputColumnNames: _col0, _col1, _col2
-                Reduce Output Operator
-                  sort order: 
-                  tag: -1
-                  value expressions:
-                        expr: _col0
-                        type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-                        expr: _col1
-                        type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-                        expr: _col2
-                        type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:string,numbitvectors:int>
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations:
-                expr: compute_stats(VALUE._col0)
-                expr: compute_stats(VALUE._col1)
-                expr: compute_stats(VALUE._col2)
-          bucketGroup: false
-          mode: mergepartial
-          outputColumnNames: _col0, _col1, _col2
-          Select Operator
-            expressions:
-                  expr: _col0
-                  type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>
-                  expr: _col1
-                  type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-                  expr: _col2
-                  type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
-            outputColumnNames: _col0, _col1, _col2
-            File Output Operator
-              compressed: false
-              GlobalTableId: 0
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-
-  Stage: Stage-1
-    Column Stats Work
-      Column Stats Desc:
-          Columns: sourceIP, avgTimeOnSite, adRevenue
-          Column Types: string, int, float
-          Table: UserVisits_web_text_none
-
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-4-d20bef3e7fe811a9029c969dec1b6770 b/sql/hive/src/test/resources/golden/columnstats_tbllvl-4-d20bef3e7fe811a9029c969dec1b6770
deleted file mode 100644
index 3f3aa581b43f9..0000000000000
--- a/sql/hive/src/test/resources/golden/columnstats_tbllvl-4-d20bef3e7fe811a9029c969dec1b6770
+++ /dev/null
@@ -1,141 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME UserVisits_web_text_none)) (TOK_TABCOLNAME sourceIP avgTimeOnSite adRevenue))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        uservisits_web_text_none 
-          TableScan
-            alias: uservisits_web_text_none
-            GatherStats: false
-            Select Operator
-              expressions:
-                    expr: sourceip
-                    type: string
-                    expr: avgtimeonsite
-                    type: int
-                    expr: adrevenue
-                    type: float
-              outputColumnNames: sourceip, avgtimeonsite, adrevenue
-              Group By Operator
-                aggregations:
-                      expr: compute_stats(sourceip, 16)
-                      expr: compute_stats(avgtimeonsite, 16)
-                      expr: compute_stats(adrevenue, 16)
-                bucketGroup: false
-                mode: hash
-                outputColumnNames: _col0, _col1, _col2
-                Reduce Output Operator
-                  sort order: 
-                  tag: -1
-                  value expressions:
-                        expr: _col0
-                        type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-                        expr: _col1
-                        type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-                        expr: _col2
-                        type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:string,numbitvectors:int>
-      Path -> Alias:
-        file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/uservisits_web_text_none [uservisits_web_text_none]
-      Path -> Partition:
-        file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/uservisits_web_text_none 
-          Partition
-            base file name: uservisits_web_text_none
-            input format: org.apache.hadoop.mapred.TextInputFormat
-            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-            properties:
-              bucket_count -1
-              columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite
-              columns.types string:string:string:float:string:string:string:string:int
-              field.delim |
-              file.inputformat org.apache.hadoop.mapred.TextInputFormat
-              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              location file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/uservisits_web_text_none
-              name default.uservisits_web_text_none
-              numFiles 1
-              numPartitions 0
-              numRows 0
-              rawDataSize 0
-              serialization.ddl struct uservisits_web_text_none { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite}
-              serialization.format |
-              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              totalSize 7060
-              transient_lastDdlTime 1389728748
-            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-          
-              input format: org.apache.hadoop.mapred.TextInputFormat
-              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              properties:
-                bucket_count -1
-                columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite
-                columns.types string:string:string:float:string:string:string:string:int
-                field.delim |
-                file.inputformat org.apache.hadoop.mapred.TextInputFormat
-                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                location file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7107609744565894054/uservisits_web_text_none
-                name default.uservisits_web_text_none
-                numFiles 1
-                numPartitions 0
-                numRows 0
-                rawDataSize 0
-                serialization.ddl struct uservisits_web_text_none { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite}
-                serialization.format |
-                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                totalSize 7060
-                transient_lastDdlTime 1389728748
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.uservisits_web_text_none
-            name: default.uservisits_web_text_none
-      Truncated Path -> Alias:
-        /uservisits_web_text_none [uservisits_web_text_none]
-      Needs Tagging: false
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations:
-                expr: compute_stats(VALUE._col0)
-                expr: compute_stats(VALUE._col1)
-                expr: compute_stats(VALUE._col2)
-          bucketGroup: false
-          mode: mergepartial
-          outputColumnNames: _col0, _col1, _col2
-          Select Operator
-            expressions:
-                  expr: _col0
-                  type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>
-                  expr: _col1
-                  type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-                  expr: _col2
-                  type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
-            outputColumnNames: _col0, _col1, _col2
-            File Output Operator
-              compressed: false
-              GlobalTableId: 0
-              directory: file:/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/marmbrus/hive_2014-01-14_11-45-49_160_8862102294255849057-1/-ext-10001
-              NumFilesPerFileSink: 1
-              Stats Publishing Key Prefix: file:/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/marmbrus/hive_2014-01-14_11-45-49_160_8862102294255849057-1/-ext-10001/
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    columns _col0,_col1,_col2
-                    columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
-                    escape.delim \
-                    hive.serialization.extend.nesting.levels true
-                    serialization.format 1
-              TotalFiles: 1
-              GatherStats: false
-              MultiFileSpray: false
-
-  Stage: Stage-1
-    Column Stats Work
-      Column Stats Desc:
-          Columns: sourceIP, avgTimeOnSite, adRevenue
-          Column Types: string, int, float
-          Table: UserVisits_web_text_none
-          Is Table Level Stats: true
-
diff --git a/sql/hive/src/test/resources/golden/columnstats_tbllvl-7-ce5ad528f8b9ad2c309aea199cbe769d b/sql/hive/src/test/resources/golden/columnstats_tbllvl-7-ce5ad528f8b9ad2c309aea199cbe769d
deleted file mode 100644
index 4ff444febde63..0000000000000
--- a/sql/hive/src/test/resources/golden/columnstats_tbllvl-7-ce5ad528f8b9ad2c309aea199cbe769d
+++ /dev/null
@@ -1,89 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME empty_tab)) (TOK_TABCOLNAME a b c d e))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        empty_tab 
-          TableScan
-            alias: empty_tab
-            Select Operator
-              expressions:
-                    expr: a
-                    type: int
-                    expr: b
-                    type: double
-                    expr: c
-                    type: string
-                    expr: d
-                    type: boolean
-                    expr: e
-                    type: binary
-              outputColumnNames: a, b, c, d, e
-              Group By Operator
-                aggregations:
-                      expr: compute_stats(a, 16)
-                      expr: compute_stats(b, 16)
-                      expr: compute_stats(c, 16)
-                      expr: compute_stats(d, 16)
-                      expr: compute_stats(e, 16)
-                bucketGroup: false
-                mode: hash
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Reduce Output Operator
-                  sort order: 
-                  tag: -1
-                  value expressions:
-                        expr: _col0
-                        type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-                        expr: _col1
-                        type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:string,numbitvectors:int>
-                        expr: _col2
-                        type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
-                        expr: _col3
-                        type: struct<columntype:string,counttrues:bigint,countfalses:bigint,countnulls:bigint>
-                        expr: _col4
-                        type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint>
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations:
-                expr: compute_stats(VALUE._col0)
-                expr: compute_stats(VALUE._col1)
-                expr: compute_stats(VALUE._col2)
-                expr: compute_stats(VALUE._col3)
-                expr: compute_stats(VALUE._col4)
-          bucketGroup: false
-          mode: mergepartial
-          outputColumnNames: _col0, _col1, _col2, _col3, _col4
-          Select Operator
-            expressions:
-                  expr: _col0
-                  type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>
-                  expr: _col1
-                  type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
-                  expr: _col2
-                  type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>
-                  expr: _col3
-                  type: struct<columntype:string,counttrues:bigint,countfalses:bigint,countnulls:bigint>
-                  expr: _col4
-                  type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint>
-            outputColumnNames: _col0, _col1, _col2, _col3, _col4
-            File Output Operator
-              compressed: false
-              GlobalTableId: 0
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-
-  Stage: Stage-1
-    Column Stats Work
-      Column Stats Desc:
-          Columns: a, b, c, d, e
-          Column Types: int, double, string, boolean, binary
-          Table: empty_tab
-
diff --git a/sql/hive/src/test/resources/golden/combine1-0-84b74227c9f1563f530cd3ac3b333e54 b/sql/hive/src/test/resources/golden/combine1-0-84b74227c9f1563f530cd3ac3b333e54
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/combine1-0-84b74227c9f1563f530cd3ac3b333e54
+++ b/sql/hive/src/test/resources/golden/combine1-0-84b74227c9f1563f530cd3ac3b333e54
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/combine1-1-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/combine1-1-86a409d8b868dc5f1a3bd1e04c2bc28c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/combine1-1-86a409d8b868dc5f1a3bd1e04c2bc28c
+++ b/sql/hive/src/test/resources/golden/combine1-1-86a409d8b868dc5f1a3bd1e04c2bc28c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/combine1-2-c95dc367df88c9e5cf77157f29ba2daf b/sql/hive/src/test/resources/golden/combine1-2-c95dc367df88c9e5cf77157f29ba2daf
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/combine1-2-c95dc367df88c9e5cf77157f29ba2daf
+++ b/sql/hive/src/test/resources/golden/combine1-2-c95dc367df88c9e5cf77157f29ba2daf
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/combine1-3-6e53a3ac93113f20db3a12f1dcf30e86 b/sql/hive/src/test/resources/golden/combine1-3-6e53a3ac93113f20db3a12f1dcf30e86
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/combine1-3-6e53a3ac93113f20db3a12f1dcf30e86
+++ b/sql/hive/src/test/resources/golden/combine1-3-6e53a3ac93113f20db3a12f1dcf30e86
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/combine1-4-84967075baa3e56fff2a23f8ab9ba076 b/sql/hive/src/test/resources/golden/combine1-4-84967075baa3e56fff2a23f8ab9ba076
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/combine1-4-84967075baa3e56fff2a23f8ab9ba076
+++ b/sql/hive/src/test/resources/golden/combine1-4-84967075baa3e56fff2a23f8ab9ba076
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/combine1-5-2ee5d706fe3a3bcc38b795f6e94970ea b/sql/hive/src/test/resources/golden/combine1-5-2ee5d706fe3a3bcc38b795f6e94970ea
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/combine1-5-2ee5d706fe3a3bcc38b795f6e94970ea
+++ b/sql/hive/src/test/resources/golden/combine1-5-2ee5d706fe3a3bcc38b795f6e94970ea
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/combine1-6-1d1f97cce07323812de3027920b04b75 b/sql/hive/src/test/resources/golden/combine1-6-1d1f97cce07323812de3027920b04b75
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/combine1-6-1d1f97cce07323812de3027920b04b75
+++ b/sql/hive/src/test/resources/golden/combine1-6-1d1f97cce07323812de3027920b04b75
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/combine1-9-e5ce23369b0ad260512a0f61c6969b73 b/sql/hive/src/test/resources/golden/combine1-9-e5ce23369b0ad260512a0f61c6969b73
index 8f8e1f4b21fe3..b40eac432935c 100644
--- a/sql/hive/src/test/resources/golden/combine1-9-e5ce23369b0ad260512a0f61c6969b73
+++ b/sql/hive/src/test/resources/golden/combine1-9-e5ce23369b0ad260512a0f61c6969b73
@@ -497,4 +497,4 @@
 97	val_97
 97	val_97
 98	val_98
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-1-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/combine2_hadoop20-1-86a409d8b868dc5f1a3bd1e04c2bc28c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-1-86a409d8b868dc5f1a3bd1e04c2bc28c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-12-cd15ffd140539cf86090814729ec4748 b/sql/hive/src/test/resources/golden/combine2_hadoop20-12-cd15ffd140539cf86090814729ec4748
deleted file mode 100644
index 80fa68b84c17e..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-12-cd15ffd140539cf86090814729ec4748
+++ /dev/null
@@ -1,8 +0,0 @@
-value=2010-04-21 09%3A45%3A00
-value=val_0
-value=val_2
-value=val_4
-value=val_5
-value=val_8
-value=val_9
-value=|
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-14-4695309eb4e91ef29c9857aa8fd6130c b/sql/hive/src/test/resources/golden/combine2_hadoop20-14-4695309eb4e91ef29c9857aa8fd6130c
deleted file mode 100644
index ff6141674e603..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-14-4695309eb4e91ef29c9857aa8fd6130c
+++ /dev/null
@@ -1,12 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-11	|
-19	2010-04-21 09:45:00
-2	val_2
-4	val_4
-5	val_5
-5	val_5
-5	val_5
-8	val_8
-9	val_9
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-16-557997716a68312e8cae75428e3ce31 b/sql/hive/src/test/resources/golden/combine2_hadoop20-16-557997716a68312e8cae75428e3ce31
deleted file mode 100644
index 3cacc0b93c9c9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-16-557997716a68312e8cae75428e3ce31
+++ /dev/null
@@ -1 +0,0 @@
-12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-18-2af7419c1d84fe155e23f3972e049b97 b/sql/hive/src/test/resources/golden/combine2_hadoop20-18-2af7419c1d84fe155e23f3972e049b97
deleted file mode 100644
index 1a0aa74952afa..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-18-2af7419c1d84fe155e23f3972e049b97
+++ /dev/null
@@ -1,2 +0,0 @@
-2008-04-08	1000
-2008-04-09	1000
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-2-c95dc367df88c9e5cf77157f29ba2daf b/sql/hive/src/test/resources/golden/combine2_hadoop20-2-c95dc367df88c9e5cf77157f29ba2daf
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-2-c95dc367df88c9e5cf77157f29ba2daf
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-3-6e53a3ac93113f20db3a12f1dcf30e86 b/sql/hive/src/test/resources/golden/combine2_hadoop20-3-6e53a3ac93113f20db3a12f1dcf30e86
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-3-6e53a3ac93113f20db3a12f1dcf30e86
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-4-84967075baa3e56fff2a23f8ab9ba076 b/sql/hive/src/test/resources/golden/combine2_hadoop20-4-84967075baa3e56fff2a23f8ab9ba076
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-4-84967075baa3e56fff2a23f8ab9ba076
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-5-2ee5d706fe3a3bcc38b795f6e94970ea b/sql/hive/src/test/resources/golden/combine2_hadoop20-5-2ee5d706fe3a3bcc38b795f6e94970ea
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-5-2ee5d706fe3a3bcc38b795f6e94970ea
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-6-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/combine2_hadoop20-6-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-6-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-7-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/combine2_hadoop20-7-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-7-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-8-99d1f07b2ce904afd6a809fd1814efe9 b/sql/hive/src/test/resources/golden/combine2_hadoop20-8-99d1f07b2ce904afd6a809fd1814efe9
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-8-99d1f07b2ce904afd6a809fd1814efe9
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_hadoop20-9-30cb07965e4b5025545361b948fc83c2 b/sql/hive/src/test/resources/golden/combine2_hadoop20-9-30cb07965e4b5025545361b948fc83c2
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_hadoop20-9-30cb07965e4b5025545361b948fc83c2
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-0-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/combine2_win-0-86a409d8b868dc5f1a3bd1e04c2bc28c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-0-86a409d8b868dc5f1a3bd1e04c2bc28c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-1-c95dc367df88c9e5cf77157f29ba2daf b/sql/hive/src/test/resources/golden/combine2_win-1-c95dc367df88c9e5cf77157f29ba2daf
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-1-c95dc367df88c9e5cf77157f29ba2daf
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-11-cd15ffd140539cf86090814729ec4748 b/sql/hive/src/test/resources/golden/combine2_win-11-cd15ffd140539cf86090814729ec4748
deleted file mode 100644
index 80fa68b84c17e..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-11-cd15ffd140539cf86090814729ec4748
+++ /dev/null
@@ -1,8 +0,0 @@
-value=2010-04-21 09%3A45%3A00
-value=val_0
-value=val_2
-value=val_4
-value=val_5
-value=val_8
-value=val_9
-value=|
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-13-4695309eb4e91ef29c9857aa8fd6130c b/sql/hive/src/test/resources/golden/combine2_win-13-4695309eb4e91ef29c9857aa8fd6130c
deleted file mode 100644
index ff6141674e603..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-13-4695309eb4e91ef29c9857aa8fd6130c
+++ /dev/null
@@ -1,12 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-11	|
-19	2010-04-21 09:45:00
-2	val_2
-4	val_4
-5	val_5
-5	val_5
-5	val_5
-8	val_8
-9	val_9
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-15-557997716a68312e8cae75428e3ce31 b/sql/hive/src/test/resources/golden/combine2_win-15-557997716a68312e8cae75428e3ce31
deleted file mode 100644
index 3cacc0b93c9c9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-15-557997716a68312e8cae75428e3ce31
+++ /dev/null
@@ -1 +0,0 @@
-12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-17-2af7419c1d84fe155e23f3972e049b97 b/sql/hive/src/test/resources/golden/combine2_win-17-2af7419c1d84fe155e23f3972e049b97
deleted file mode 100644
index 1a0aa74952afa..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-17-2af7419c1d84fe155e23f3972e049b97
+++ /dev/null
@@ -1,2 +0,0 @@
-2008-04-08	1000
-2008-04-09	1000
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-2-6e53a3ac93113f20db3a12f1dcf30e86 b/sql/hive/src/test/resources/golden/combine2_win-2-6e53a3ac93113f20db3a12f1dcf30e86
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-2-6e53a3ac93113f20db3a12f1dcf30e86
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-3-84967075baa3e56fff2a23f8ab9ba076 b/sql/hive/src/test/resources/golden/combine2_win-3-84967075baa3e56fff2a23f8ab9ba076
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-3-84967075baa3e56fff2a23f8ab9ba076
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-4-2ee5d706fe3a3bcc38b795f6e94970ea b/sql/hive/src/test/resources/golden/combine2_win-4-2ee5d706fe3a3bcc38b795f6e94970ea
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-4-2ee5d706fe3a3bcc38b795f6e94970ea
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-5-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/combine2_win-5-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-5-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-6-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/combine2_win-6-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-6-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-7-99d1f07b2ce904afd6a809fd1814efe9 b/sql/hive/src/test/resources/golden/combine2_win-7-99d1f07b2ce904afd6a809fd1814efe9
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-7-99d1f07b2ce904afd6a809fd1814efe9
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine2_win-8-30cb07965e4b5025545361b948fc83c2 b/sql/hive/src/test/resources/golden/combine2_win-8-30cb07965e4b5025545361b948fc83c2
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/combine2_win-8-30cb07965e4b5025545361b948fc83c2
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/combine3-11-30b8b79049113252bec1cbeac4018a3 b/sql/hive/src/test/resources/golden/combine3-11-30b8b79049113252bec1cbeac4018a3
deleted file mode 100644
index 80665a4d4c983..0000000000000
--- a/sql/hive/src/test/resources/golden/combine3-11-30b8b79049113252bec1cbeac4018a3
+++ /dev/null
@@ -1,12 +0,0 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-Detailed Partition Information	Partition(values:[2010-08-03, 00], dbName:default, tableName:combine_3_srcpart_seq_rc, createTime:1390898644, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/combine_3_srcpart_seq_rc/ds=2010-08-03/hr=00, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1390898644, numRows=500, totalSize=15250, rawDataSize=5312})	
diff --git a/sql/hive/src/test/resources/golden/combine3-12-f4baee25e0ad813258d67d707a6fc43b b/sql/hive/src/test/resources/golden/combine3-12-f4baee25e0ad813258d67d707a6fc43b
deleted file mode 100644
index 5a87a3aec7cf5..0000000000000
--- a/sql/hive/src/test/resources/golden/combine3-12-f4baee25e0ad813258d67d707a6fc43b
+++ /dev/null
@@ -1,12 +0,0 @@
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-Detailed Partition Information	Partition(values:[2010-08-03, 001], dbName:default, tableName:combine_3_srcpart_seq_rc, createTime:1390898653, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/combine_3_srcpart_seq_rc/ds=2010-08-03/hr=001, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1390898653, numRows=500, totalSize=2202, rawDataSize=4551})	
diff --git a/sql/hive/src/test/resources/golden/combine3-13-1c359bedf474e8e26f3b3562f7af6edc b/sql/hive/src/test/resources/golden/combine3-13-1c359bedf474e8e26f3b3562f7af6edc
deleted file mode 100644
index 7b6455db7834b..0000000000000
--- a/sql/hive/src/test/resources/golden/combine3-13-1c359bedf474e8e26f3b3562f7af6edc
+++ /dev/null
@@ -1,30 +0,0 @@
-0	val_0	2010-08-03	00
-0	val_0	2010-08-03	00
-0	val_0	2010-08-03	00
-0	val_0	2010-08-03	001
-0	val_0	2010-08-03	001
-0	val_0	2010-08-03	001
-2	val_2	2010-08-03	00
-2	val_2	2010-08-03	001
-4	val_4	2010-08-03	00
-4	val_4	2010-08-03	001
-5	val_5	2010-08-03	00
-5	val_5	2010-08-03	00
-5	val_5	2010-08-03	00
-5	val_5	2010-08-03	001
-5	val_5	2010-08-03	001
-5	val_5	2010-08-03	001
-8	val_8	2010-08-03	00
-8	val_8	2010-08-03	001
-9	val_9	2010-08-03	00
-9	val_9	2010-08-03	001
-10	val_10	2010-08-03	00
-10	val_10	2010-08-03	001
-11	val_11	2010-08-03	00
-11	val_11	2010-08-03	001
-12	val_12	2010-08-03	00
-12	val_12	2010-08-03	00
-12	val_12	2010-08-03	001
-12	val_12	2010-08-03	001
-15	val_15	2010-08-03	00
-15	val_15	2010-08-03	00
diff --git a/sql/hive/src/test/resources/golden/combine3-21-8ba8f8723c1530be062cefc2d9246e56 b/sql/hive/src/test/resources/golden/combine3-21-8ba8f8723c1530be062cefc2d9246e56
deleted file mode 100644
index d57cb5369e219..0000000000000
--- a/sql/hive/src/test/resources/golden/combine3-21-8ba8f8723c1530be062cefc2d9246e56
+++ /dev/null
@@ -1,30 +0,0 @@
-0	1
-0	1
-0	1
-0	11
-0	11
-0	11
-2	1
-2	11
-4	1
-4	11
-8	1
-8	11
-10	1
-10	11
-12	1
-12	1
-12	11
-12	11
-18	1
-18	1
-18	11
-18	11
-20	1
-20	11
-24	1
-24	1
-24	11
-24	11
-26	1
-26	1
diff --git a/sql/hive/src/test/resources/golden/combine2_win-9-7fd472d5ba7a41eb391f723c6dcf42af b/sql/hive/src/test/resources/golden/compute_stats_binary-1-44b15c6c9600109e064a5ea5f9c81051
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine2_win-9-7fd472d5ba7a41eb391f723c6dcf42af
rename to sql/hive/src/test/resources/golden/compute_stats_binary-1-44b15c6c9600109e064a5ea5f9c81051
diff --git a/sql/hive/src/test/resources/golden/combine3-10-fb173ed4483e732d367e51f88be793b1 b/sql/hive/src/test/resources/golden/compute_stats_boolean-1-72ee4bdd5cea69136940dc40e6890e1d
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-10-fb173ed4483e732d367e51f88be793b1
rename to sql/hive/src/test/resources/golden/compute_stats_boolean-1-72ee4bdd5cea69136940dc40e6890e1d
diff --git a/sql/hive/src/test/resources/golden/compute_stats_boolean-2-fbea367698de65e22d4d660a518ea95e b/sql/hive/src/test/resources/golden/compute_stats_boolean-2-fbea367698de65e22d4d660a518ea95e
index dc7b54ad01435..bb95160cb6e07 100644
--- a/sql/hive/src/test/resources/golden/compute_stats_boolean-2-fbea367698de65e22d4d660a518ea95e
+++ b/sql/hive/src/test/resources/golden/compute_stats_boolean-2-fbea367698de65e22d4d660a518ea95e
@@ -1 +1 @@
-33
\ No newline at end of file
+33
diff --git a/sql/hive/src/test/resources/golden/compute_stats_boolean-3-a14d8a5835c94829271f9f463d96d83d b/sql/hive/src/test/resources/golden/compute_stats_boolean-3-a14d8a5835c94829271f9f463d96d83d
index dd487e6fea3ff..279805d381a21 100644
--- a/sql/hive/src/test/resources/golden/compute_stats_boolean-3-a14d8a5835c94829271f9f463d96d83d
+++ b/sql/hive/src/test/resources/golden/compute_stats_boolean-3-a14d8a5835c94829271f9f463d96d83d
@@ -1 +1 @@
-{"columntype":"Boolean","counttrues":13,"countfalses":19,"countnulls":1}
\ No newline at end of file
+{"columntype":"Boolean","counttrues":13,"countfalses":19,"countnulls":1}
diff --git a/sql/hive/src/test/resources/golden/combine3-16-6635f7f5c55557b06ad3acc321eaa739 b/sql/hive/src/test/resources/golden/compute_stats_double-1-8f634b9e334fd58e71844e6283d9794d
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-16-6635f7f5c55557b06ad3acc321eaa739
rename to sql/hive/src/test/resources/golden/compute_stats_double-1-8f634b9e334fd58e71844e6283d9794d
diff --git a/sql/hive/src/test/resources/golden/combine3-17-8cb751103da7c909276db6bddb50ae6a b/sql/hive/src/test/resources/golden/compute_stats_long-1-9313f166464633b3929707d7ef11d758
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-17-8cb751103da7c909276db6bddb50ae6a
rename to sql/hive/src/test/resources/golden/compute_stats_long-1-9313f166464633b3929707d7ef11d758
diff --git a/sql/hive/src/test/resources/golden/combine3-18-31fae7c6be75b97d475512bd75a58a0b b/sql/hive/src/test/resources/golden/compute_stats_string-1-3491ef2747a8f34899108d4ae8ebc7eb
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-18-31fae7c6be75b97d475512bd75a58a0b
rename to sql/hive/src/test/resources/golden/compute_stats_string-1-3491ef2747a8f34899108d4ae8ebc7eb
diff --git a/sql/hive/src/test/resources/golden/udf_sort_array-5-5d7dfaa9e4137938559eb536e28f8d0e b/sql/hive/src/test/resources/golden/constant array-0-761ef205b10ac4a10122c8b4ce10ada
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_sort_array-5-5d7dfaa9e4137938559eb536e28f8d0e
rename to sql/hive/src/test/resources/golden/constant array-0-761ef205b10ac4a10122c8b4ce10ada
diff --git a/sql/hive/src/test/resources/golden/constant null testing-0-9a02bc7de09bcabcbd4c91f54a814c20 b/sql/hive/src/test/resources/golden/constant null testing-0-9a02bc7de09bcabcbd4c91f54a814c20
new file mode 100644
index 0000000000000..7c41615f8c184
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/constant null testing-0-9a02bc7de09bcabcbd4c91f54a814c20	
@@ -0,0 +1 @@
+1	NULL	1	NULL	1.0	NULL	true	NULL	1	NULL	1.0	NULL	1	NULL	1	NULL	1	NULL	1970-01-01	NULL	1969-12-31 16:00:00.001	NULL	1	NULL
diff --git a/sql/hive/src/test/resources/golden/constant object inspector for generic udf-0-cc120a2331158f570a073599985d3f55 b/sql/hive/src/test/resources/golden/constant object inspector for generic udf-0-cc120a2331158f570a073599985d3f55
new file mode 100644
index 0000000000000..7bc77e7f2a4d3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/constant object inspector for generic udf-0-cc120a2331158f570a073599985d3f55	
@@ -0,0 +1 @@
+{"aa":"10","aaaaaa":"11","aaaaaa":"12","bb12":"13","s14s14":"14"}
diff --git a/sql/hive/src/test/resources/golden/convert_enum_to_string-1-db089ff46f9826c7883198adacdfad59 b/sql/hive/src/test/resources/golden/convert_enum_to_string-1-db089ff46f9826c7883198adacdfad59
index c615f0148ccd1..d35bf9093ca9c 100644
--- a/sql/hive/src/test/resources/golden/convert_enum_to_string-1-db089ff46f9826c7883198adacdfad59
+++ b/sql/hive/src/test/resources/golden/convert_enum_to_string-1-db089ff46f9826c7883198adacdfad59
@@ -19,9 +19,9 @@ my_stringset        	struct<>            	from deserializer
 my_enumset          	struct<>            	from deserializer   
 my_structset        	struct<>            	from deserializer   
 optionals           	struct<>            	from deserializer   
-b                   	string              	None                
+b                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-b                   	string              	None                
\ No newline at end of file
+b                   	string              	                    
diff --git a/sql/hive/src/test/resources/golden/combine3-19-e30d6cd92dc5a7a86fb2b9154497b04f b/sql/hive/src/test/resources/golden/correlationoptimizer1-26-5522db58d123d1bec48b6e71c1b258f3
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-19-e30d6cd92dc5a7a86fb2b9154497b04f
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-26-5522db58d123d1bec48b6e71c1b258f3
diff --git a/sql/hive/src/test/resources/golden/combine3-20-5bd4bb8b05f31b14bbc59287dff01ffd b/sql/hive/src/test/resources/golden/correlationoptimizer1-32-76aad6bc7d7e4a28c33aca1f0ba30e65
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-20-5bd4bb8b05f31b14bbc59287dff01ffd
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-32-76aad6bc7d7e4a28c33aca1f0ba30e65
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-33-7722bcc896718b584f78cecdab1fdc9f b/sql/hive/src/test/resources/golden/correlationoptimizer1-33-7722bcc896718b584f78cecdab1fdc9f
new file mode 100644
index 0000000000000..fb15947b3d0bb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-33-7722bcc896718b584f78cecdab1fdc9f
@@ -0,0 +1,16 @@
+NULL	NULL	10
+66	val_66	1
+98	val_98	2
+128	NULL	1
+146	val_146	2
+150	val_150	1
+213	val_213	2
+224	NULL	1
+238	val_238	2
+255	val_255	2
+273	val_273	3
+278	val_278	2
+311	val_311	3
+369	NULL	1
+401	val_401	5
+406	val_406	4
diff --git a/sql/hive/src/test/resources/golden/combine3-22-11025483569617a9f014b5defd71e933 b/sql/hive/src/test/resources/golden/correlationoptimizer1-35-e1d4857548e626bb31d70c096b8d0a95
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-22-11025483569617a9f014b5defd71e933
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-35-e1d4857548e626bb31d70c096b8d0a95
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-36-7722bcc896718b584f78cecdab1fdc9f b/sql/hive/src/test/resources/golden/correlationoptimizer1-36-7722bcc896718b584f78cecdab1fdc9f
new file mode 100644
index 0000000000000..fb15947b3d0bb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer1-36-7722bcc896718b584f78cecdab1fdc9f
@@ -0,0 +1,16 @@
+NULL	NULL	10
+66	val_66	1
+98	val_98	2
+128	NULL	1
+146	val_146	2
+150	val_150	1
+213	val_213	2
+224	NULL	1
+238	val_238	2
+255	val_255	2
+273	val_273	3
+278	val_278	2
+311	val_311	3
+369	NULL	1
+401	val_401	5
+406	val_406	4
diff --git a/sql/hive/src/test/resources/golden/combine3-23-4725c48df09565618cbffd05953a5f62 b/sql/hive/src/test/resources/golden/correlationoptimizer1-38-ef6502d6b282c8a6d228bba395b24724
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-23-4725c48df09565618cbffd05953a5f62
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-38-ef6502d6b282c8a6d228bba395b24724
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-33-ea87e76dba02a46cb958148333e397b7 b/sql/hive/src/test/resources/golden/correlationoptimizer1-39-ea87e76dba02a46cb958148333e397b7
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-33-ea87e76dba02a46cb958148333e397b7
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-39-ea87e76dba02a46cb958148333e397b7
diff --git a/sql/hive/src/test/resources/golden/combine3-6-4725c48df09565618cbffd05953a5f62 b/sql/hive/src/test/resources/golden/correlationoptimizer1-41-b79b220859c09354e23b533c105ccbab
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-6-4725c48df09565618cbffd05953a5f62
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-41-b79b220859c09354e23b533c105ccbab
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-36-ea87e76dba02a46cb958148333e397b7 b/sql/hive/src/test/resources/golden/correlationoptimizer1-42-ea87e76dba02a46cb958148333e397b7
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-36-ea87e76dba02a46cb958148333e397b7
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-42-ea87e76dba02a46cb958148333e397b7
diff --git a/sql/hive/src/test/resources/golden/combine3-7-53a5c509ebc9ee8458f27cc9bac46d00 b/sql/hive/src/test/resources/golden/correlationoptimizer1-44-638e5300f4c892c2bf27bd91a8f81b64
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-7-53a5c509ebc9ee8458f27cc9bac46d00
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-44-638e5300f4c892c2bf27bd91a8f81b64
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-39-66010469a9cdb66851da9a727ef9fdad b/sql/hive/src/test/resources/golden/correlationoptimizer1-45-66010469a9cdb66851da9a727ef9fdad
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-39-66010469a9cdb66851da9a727ef9fdad
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-45-66010469a9cdb66851da9a727ef9fdad
diff --git a/sql/hive/src/test/resources/golden/combine3-8-68399bc39f71ddc99ed09ed9d2fd897b b/sql/hive/src/test/resources/golden/correlationoptimizer1-47-3514c74c7f68f2d70cc6d51ac46c20
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-8-68399bc39f71ddc99ed09ed9d2fd897b
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-47-3514c74c7f68f2d70cc6d51ac46c20
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-42-66010469a9cdb66851da9a727ef9fdad b/sql/hive/src/test/resources/golden/correlationoptimizer1-48-66010469a9cdb66851da9a727ef9fdad
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-42-66010469a9cdb66851da9a727ef9fdad
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-48-66010469a9cdb66851da9a727ef9fdad
diff --git a/sql/hive/src/test/resources/golden/escape2-3-1774adb1085f4ee6782a8dac0735399 b/sql/hive/src/test/resources/golden/correlationoptimizer1-49-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-3-1774adb1085f4ee6782a8dac0735399
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-49-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/combine3-9-b5703b76bbe99c41cbb63582a09a6e69 b/sql/hive/src/test/resources/golden/correlationoptimizer1-50-7490df6719cd7e47aa08dbcbc3266a92
similarity index 100%
rename from sql/hive/src/test/resources/golden/combine3-9-b5703b76bbe99c41cbb63582a09a6e69
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-50-7490df6719cd7e47aa08dbcbc3266a92
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-45-e71195e7d9f557e2abc7f03462d22dba b/sql/hive/src/test/resources/golden/correlationoptimizer1-51-e71195e7d9f557e2abc7f03462d22dba
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-45-e71195e7d9f557e2abc7f03462d22dba
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-51-e71195e7d9f557e2abc7f03462d22dba
diff --git a/sql/hive/src/test/resources/golden/exim_00_nonpart_empty-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/correlationoptimizer1-52-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_00_nonpart_empty-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-52-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/compute_stats_binary-1-8e576a57fc67a7fa78ce1d8c8a63a043 b/sql/hive/src/test/resources/golden/correlationoptimizer1-53-73da9fe2b0c2ee26c021ec3f2fa27272
similarity index 100%
rename from sql/hive/src/test/resources/golden/compute_stats_binary-1-8e576a57fc67a7fa78ce1d8c8a63a043
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-53-73da9fe2b0c2ee26c021ec3f2fa27272
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-48-e71195e7d9f557e2abc7f03462d22dba b/sql/hive/src/test/resources/golden/correlationoptimizer1-54-e71195e7d9f557e2abc7f03462d22dba
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-48-e71195e7d9f557e2abc7f03462d22dba
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-54-e71195e7d9f557e2abc7f03462d22dba
diff --git a/sql/hive/src/test/resources/golden/exim_00_nonpart_empty-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/correlationoptimizer1-55-b1e2ade89ae898650f0be4f796d8947b
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_00_nonpart_empty-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-55-b1e2ade89ae898650f0be4f796d8947b
diff --git a/sql/hive/src/test/resources/golden/compute_stats_boolean-1-8300537a2a508b3390c3172cd69c69b5 b/sql/hive/src/test/resources/golden/correlationoptimizer1-57-fcf9bcb522f542637ccdea863b408448
similarity index 100%
rename from sql/hive/src/test/resources/golden/compute_stats_boolean-1-8300537a2a508b3390c3172cd69c69b5
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-57-fcf9bcb522f542637ccdea863b408448
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-52-3070366869308907e54797927805603 b/sql/hive/src/test/resources/golden/correlationoptimizer1-58-3070366869308907e54797927805603
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-52-3070366869308907e54797927805603
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-58-3070366869308907e54797927805603
diff --git a/sql/hive/src/test/resources/golden/compute_stats_double-1-a23a25a680139ed823c77f3f9f486065 b/sql/hive/src/test/resources/golden/correlationoptimizer1-60-dad56e1f06c808b29e5dc8fb0c49efb2
similarity index 100%
rename from sql/hive/src/test/resources/golden/compute_stats_double-1-a23a25a680139ed823c77f3f9f486065
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-60-dad56e1f06c808b29e5dc8fb0c49efb2
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-55-3070366869308907e54797927805603 b/sql/hive/src/test/resources/golden/correlationoptimizer1-61-3070366869308907e54797927805603
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-55-3070366869308907e54797927805603
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-61-3070366869308907e54797927805603
diff --git a/sql/hive/src/test/resources/golden/exim_00_nonpart_empty-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/correlationoptimizer1-62-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_00_nonpart_empty-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-62-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/compute_stats_long-1-a7bc730f9862198709539e35c0208248 b/sql/hive/src/test/resources/golden/correlationoptimizer1-63-3cd3fbbbd8ee5c274fe3d6a45126cef4
similarity index 100%
rename from sql/hive/src/test/resources/golden/compute_stats_long-1-a7bc730f9862198709539e35c0208248
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-63-3cd3fbbbd8ee5c274fe3d6a45126cef4
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-58-a6bba6d9b422adb386b35c62cecb548 b/sql/hive/src/test/resources/golden/correlationoptimizer1-64-a6bba6d9b422adb386b35c62cecb548
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-58-a6bba6d9b422adb386b35c62cecb548
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-64-a6bba6d9b422adb386b35c62cecb548
diff --git a/sql/hive/src/test/resources/golden/exim_01_nonpart-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/correlationoptimizer1-65-777edd9d575f3480ca6cebe4be57b1f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_01_nonpart-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-65-777edd9d575f3480ca6cebe4be57b1f6
diff --git a/sql/hive/src/test/resources/golden/compute_stats_string-1-3bddaed6594ed44249e4a30c43e83d1f b/sql/hive/src/test/resources/golden/correlationoptimizer1-66-d6bbaf0d40010159095e4cac025c50c5
similarity index 100%
rename from sql/hive/src/test/resources/golden/compute_stats_string-1-3bddaed6594ed44249e4a30c43e83d1f
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-66-d6bbaf0d40010159095e4cac025c50c5
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-61-a6bba6d9b422adb386b35c62cecb548 b/sql/hive/src/test/resources/golden/correlationoptimizer1-67-a6bba6d9b422adb386b35c62cecb548
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-61-a6bba6d9b422adb386b35c62cecb548
rename to sql/hive/src/test/resources/golden/correlationoptimizer1-67-a6bba6d9b422adb386b35c62cecb548
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer15-0-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer15-0-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer15-0-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer15-0-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer15-1-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer15-1-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer15-1-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer15-1-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer15-3-e149747103059314a9984235702b24b6 b/sql/hive/src/test/resources/golden/correlationoptimizer15-3-e149747103059314a9984235702b24b6
index 0b1ee37d7831c..6eebe80953bf0 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer15-3-e149747103059314a9984235702b24b6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer15-3-e149747103059314a9984235702b24b6
@@ -34,4 +34,4 @@
 406	1	406
 406	1	406
 406	1	406
-406	1	406
\ No newline at end of file
+406	1	406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer15-4-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer15-4-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer15-4-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer15-4-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer15-5-9914f44ecb6ae7587b62e5349ff60d04 b/sql/hive/src/test/resources/golden/correlationoptimizer15-5-9914f44ecb6ae7587b62e5349ff60d04
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer15-5-9914f44ecb6ae7587b62e5349ff60d04
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer15-5-9914f44ecb6ae7587b62e5349ff60d04
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer15-7-e149747103059314a9984235702b24b6 b/sql/hive/src/test/resources/golden/correlationoptimizer15-7-e149747103059314a9984235702b24b6
index 0b1ee37d7831c..6eebe80953bf0 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer15-7-e149747103059314a9984235702b24b6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer15-7-e149747103059314a9984235702b24b6
@@ -34,4 +34,4 @@
 406	1	406
 406	1	406
 406	1	406
-406	1	406
\ No newline at end of file
+406	1	406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-0-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer2-0-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-0-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-0-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-1-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer2-1-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-1-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-1-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-10-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer2-10-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-10-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-10-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-12-e6b368bfaea4d2838f8038b3bd29db06 b/sql/hive/src/test/resources/golden/correlationoptimizer2-12-e6b368bfaea4d2838f8038b3bd29db06
index 6c6dc691bbff2..e7c8cc75a0d6c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-12-e6b368bfaea4d2838f8038b3bd29db06
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-12-e6b368bfaea4d2838f8038b3bd29db06
@@ -1 +1 @@
-79136	500	3556	15
\ No newline at end of file
+79136	500	3556	15
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-13-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer2-13-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-13-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-13-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-15-18f10d12e8bfa473a916c2f528500538 b/sql/hive/src/test/resources/golden/correlationoptimizer2-15-18f10d12e8bfa473a916c2f528500538
index f4bb720dfd7f8..96d1ad9dd0559 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-15-18f10d12e8bfa473a916c2f528500538
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-15-18f10d12e8bfa473a916c2f528500538
@@ -1 +1 @@
-3556	37	3556	25
\ No newline at end of file
+3556	37	3556	25
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-16-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer2-16-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-16-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-16-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-18-18f10d12e8bfa473a916c2f528500538 b/sql/hive/src/test/resources/golden/correlationoptimizer2-18-18f10d12e8bfa473a916c2f528500538
index f4bb720dfd7f8..96d1ad9dd0559 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-18-18f10d12e8bfa473a916c2f528500538
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-18-18f10d12e8bfa473a916c2f528500538
@@ -1 +1 @@
-3556	37	3556	25
\ No newline at end of file
+3556	37	3556	25
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-19-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer2-19-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-19-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-19-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-21-c0c5744805b82aa8a6e3a62dfdb8097e b/sql/hive/src/test/resources/golden/correlationoptimizer2-21-c0c5744805b82aa8a6e3a62dfdb8097e
index 4acbb60e81661..716f95a30304b 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-21-c0c5744805b82aa8a6e3a62dfdb8097e
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-21-c0c5744805b82aa8a6e3a62dfdb8097e
@@ -1 +1 @@
-79136	500	3556	25
\ No newline at end of file
+79136	500	3556	25
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-22-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer2-22-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-22-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-22-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-24-c0c5744805b82aa8a6e3a62dfdb8097e b/sql/hive/src/test/resources/golden/correlationoptimizer2-24-c0c5744805b82aa8a6e3a62dfdb8097e
index 4acbb60e81661..716f95a30304b 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-24-c0c5744805b82aa8a6e3a62dfdb8097e
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-24-c0c5744805b82aa8a6e3a62dfdb8097e
@@ -1 +1 @@
-79136	500	3556	25
\ No newline at end of file
+79136	500	3556	25
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-25-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer2-25-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-25-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-25-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-27-9b22dad2843cdc379d90687745561104 b/sql/hive/src/test/resources/golden/correlationoptimizer2-27-9b22dad2843cdc379d90687745561104
index a95fce80fd7b4..3821ee1926f17 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-27-9b22dad2843cdc379d90687745561104
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-27-9b22dad2843cdc379d90687745561104
@@ -1 +1 @@
-79136	310
\ No newline at end of file
+79136	310
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-28-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer2-28-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-28-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-28-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-3-d915fbdd493869aec42f548bdb66598d b/sql/hive/src/test/resources/golden/correlationoptimizer2-3-d915fbdd493869aec42f548bdb66598d
index c6243d7056353..7e1b7f7408e2d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-3-d915fbdd493869aec42f548bdb66598d
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-3-d915fbdd493869aec42f548bdb66598d
@@ -1 +1 @@
-3556	37	3556	15
\ No newline at end of file
+3556	37	3556	15
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-30-9b22dad2843cdc379d90687745561104 b/sql/hive/src/test/resources/golden/correlationoptimizer2-30-9b22dad2843cdc379d90687745561104
index a95fce80fd7b4..3821ee1926f17 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-30-9b22dad2843cdc379d90687745561104
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-30-9b22dad2843cdc379d90687745561104
@@ -1 +1 @@
-79136	310
\ No newline at end of file
+79136	310
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-31-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer2-31-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-31-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-31-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-33-2b9eddc3452d8fc24ae9273e5d522e4b b/sql/hive/src/test/resources/golden/correlationoptimizer2-33-2b9eddc3452d8fc24ae9273e5d522e4b
index 556b77ecfc9eb..a8707661c9399 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-33-2b9eddc3452d8fc24ae9273e5d522e4b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-33-2b9eddc3452d8fc24ae9273e5d522e4b
@@ -1 +1 @@
-9992	3531902962	9992	37
\ No newline at end of file
+9992	3531902962	9992	37
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-34-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer2-34-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-34-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-34-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-36-2b9eddc3452d8fc24ae9273e5d522e4b b/sql/hive/src/test/resources/golden/correlationoptimizer2-36-2b9eddc3452d8fc24ae9273e5d522e4b
index 556b77ecfc9eb..a8707661c9399 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-36-2b9eddc3452d8fc24ae9273e5d522e4b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-36-2b9eddc3452d8fc24ae9273e5d522e4b
@@ -1 +1 @@
-9992	3531902962	9992	37
\ No newline at end of file
+9992	3531902962	9992	37
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-4-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer2-4-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-4-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-4-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-6-d915fbdd493869aec42f548bdb66598d b/sql/hive/src/test/resources/golden/correlationoptimizer2-6-d915fbdd493869aec42f548bdb66598d
index c6243d7056353..7e1b7f7408e2d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-6-d915fbdd493869aec42f548bdb66598d
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-6-d915fbdd493869aec42f548bdb66598d
@@ -1 +1 @@
-3556	37	3556	15
\ No newline at end of file
+3556	37	3556	15
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-7-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer2-7-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-7-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-7-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer2-9-e6b368bfaea4d2838f8038b3bd29db06 b/sql/hive/src/test/resources/golden/correlationoptimizer2-9-e6b368bfaea4d2838f8038b3bd29db06
index 6c6dc691bbff2..e7c8cc75a0d6c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer2-9-e6b368bfaea4d2838f8038b3bd29db06
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer2-9-e6b368bfaea4d2838f8038b3bd29db06
@@ -1 +1 @@
-79136	500	3556	15
\ No newline at end of file
+79136	500	3556	15
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-0-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer3-0-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-0-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-0-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-1-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer3-1-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-1-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-1-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-10-e3d5ff08760b877d49c0f10f63776325 b/sql/hive/src/test/resources/golden/correlationoptimizer3-10-e3d5ff08760b877d49c0f10f63776325
index a1a6cbb91955e..9ef7747157bdd 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-10-e3d5ff08760b877d49c0f10f63776325
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-10-e3d5ff08760b877d49c0f10f63776325
@@ -1 +1 @@
-9992	107	3531902962
\ No newline at end of file
+9992	107	3531902962
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-11-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer3-11-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-11-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-11-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-12-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer3-12-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-12-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-12-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-14-1f13e03988991067d13a9f3f1b36fcf5 b/sql/hive/src/test/resources/golden/correlationoptimizer3-14-1f13e03988991067d13a9f3f1b36fcf5
index a1a6cbb91955e..9ef7747157bdd 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-14-1f13e03988991067d13a9f3f1b36fcf5
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-14-1f13e03988991067d13a9f3f1b36fcf5
@@ -1 +1 @@
-9992	107	3531902962
\ No newline at end of file
+9992	107	3531902962
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-15-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer3-15-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-15-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-15-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-17-1f13e03988991067d13a9f3f1b36fcf5 b/sql/hive/src/test/resources/golden/correlationoptimizer3-17-1f13e03988991067d13a9f3f1b36fcf5
index a1a6cbb91955e..9ef7747157bdd 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-17-1f13e03988991067d13a9f3f1b36fcf5
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-17-1f13e03988991067d13a9f3f1b36fcf5
@@ -1 +1 @@
-9992	107	3531902962
\ No newline at end of file
+9992	107	3531902962
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-18-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer3-18-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-18-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-18-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-19-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/correlationoptimizer3-19-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-19-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-19-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-21-1f13e03988991067d13a9f3f1b36fcf5 b/sql/hive/src/test/resources/golden/correlationoptimizer3-21-1f13e03988991067d13a9f3f1b36fcf5
index a1a6cbb91955e..9ef7747157bdd 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-21-1f13e03988991067d13a9f3f1b36fcf5
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-21-1f13e03988991067d13a9f3f1b36fcf5
@@ -1 +1 @@
-9992	107	3531902962
\ No newline at end of file
+9992	107	3531902962
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-3-e3d5ff08760b877d49c0f10f63776325 b/sql/hive/src/test/resources/golden/correlationoptimizer3-3-e3d5ff08760b877d49c0f10f63776325
index a1a6cbb91955e..9ef7747157bdd 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-3-e3d5ff08760b877d49c0f10f63776325
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-3-e3d5ff08760b877d49c0f10f63776325
@@ -1 +1 @@
-9992	107	3531902962
\ No newline at end of file
+9992	107	3531902962
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-4-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer3-4-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-4-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-4-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-6-e3d5ff08760b877d49c0f10f63776325 b/sql/hive/src/test/resources/golden/correlationoptimizer3-6-e3d5ff08760b877d49c0f10f63776325
index a1a6cbb91955e..9ef7747157bdd 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-6-e3d5ff08760b877d49c0f10f63776325
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-6-e3d5ff08760b877d49c0f10f63776325
@@ -1 +1 @@
-9992	107	3531902962
\ No newline at end of file
+9992	107	3531902962
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-7-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer3-7-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-7-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-7-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer3-8-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/correlationoptimizer3-8-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer3-8-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer3-8-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-26-8bcdcc5f01508f576d7bd6422c939225 b/sql/hive/src/test/resources/golden/correlationoptimizer4-1-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-26-8bcdcc5f01508f576d7bd6422c939225
rename to sql/hive/src/test/resources/golden/correlationoptimizer4-1-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-10-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer4-10-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-10-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-10-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-12-340cf26bcac4ee29bdf8fd588ddc3c2e b/sql/hive/src/test/resources/golden/correlationoptimizer4-12-340cf26bcac4ee29bdf8fd588ddc3c2e
index 14e309fdcad89..281e236e8a80d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-12-340cf26bcac4ee29bdf8fd588ddc3c2e
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-12-340cf26bcac4ee29bdf8fd588ddc3c2e
@@ -1 +1 @@
-13	10
\ No newline at end of file
+13	10
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-13-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer4-13-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-13-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-13-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-14-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/correlationoptimizer4-14-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-14-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-14-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-16-340cf26bcac4ee29bdf8fd588ddc3c2e b/sql/hive/src/test/resources/golden/correlationoptimizer4-16-340cf26bcac4ee29bdf8fd588ddc3c2e
index 14e309fdcad89..281e236e8a80d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-16-340cf26bcac4ee29bdf8fd588ddc3c2e
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-16-340cf26bcac4ee29bdf8fd588ddc3c2e
@@ -1 +1 @@
-13	10
\ No newline at end of file
+13	10
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-17-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer4-17-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-17-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-17-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-18-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer4-18-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-18-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-18-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-20-6ad79a473a876923a247f0cb57bb4208 b/sql/hive/src/test/resources/golden/correlationoptimizer4-20-6ad79a473a876923a247f0cb57bb4208
index 1b8c137073e37..1ed2737b0d1fb 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-20-6ad79a473a876923a247f0cb57bb4208
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-20-6ad79a473a876923a247f0cb57bb4208
@@ -1 +1 @@
-22	12
\ No newline at end of file
+22	12
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-21-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer4-21-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-21-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-21-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-23-6ad79a473a876923a247f0cb57bb4208 b/sql/hive/src/test/resources/golden/correlationoptimizer4-23-6ad79a473a876923a247f0cb57bb4208
index 1b8c137073e37..1ed2737b0d1fb 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-23-6ad79a473a876923a247f0cb57bb4208
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-23-6ad79a473a876923a247f0cb57bb4208
@@ -1 +1 @@
-22	12
\ No newline at end of file
+22	12
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-24-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer4-24-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-24-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-24-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-26-62a0fd05be48759c39f3c284458dde9b b/sql/hive/src/test/resources/golden/correlationoptimizer4-26-62a0fd05be48759c39f3c284458dde9b
index e6c95e600a2c0..35b3da1e4da5c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-26-62a0fd05be48759c39f3c284458dde9b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-26-62a0fd05be48759c39f3c284458dde9b
@@ -1 +1 @@
-13	12
\ No newline at end of file
+13	12
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-27-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer4-27-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-27-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-27-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-29-6fa624d24bcc899e11dbe8b19b0d5cbf b/sql/hive/src/test/resources/golden/correlationoptimizer4-29-6fa624d24bcc899e11dbe8b19b0d5cbf
index 0248cc90cb2c6..7b6dfccea7a0c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-29-6fa624d24bcc899e11dbe8b19b0d5cbf
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-29-6fa624d24bcc899e11dbe8b19b0d5cbf
@@ -1 +1 @@
-21	12
\ No newline at end of file
+21	12
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-32-ef6502d6b282c8a6d228bba395b24724 b/sql/hive/src/test/resources/golden/correlationoptimizer4-3-ade68a23d7b1a4f328623bb5a0f07488
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-32-ef6502d6b282c8a6d228bba395b24724
rename to sql/hive/src/test/resources/golden/correlationoptimizer4-3-ade68a23d7b1a4f328623bb5a0f07488
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-30-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer4-30-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-30-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-30-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-32-6fa624d24bcc899e11dbe8b19b0d5cbf b/sql/hive/src/test/resources/golden/correlationoptimizer4-32-6fa624d24bcc899e11dbe8b19b0d5cbf
index 0248cc90cb2c6..7b6dfccea7a0c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-32-6fa624d24bcc899e11dbe8b19b0d5cbf
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-32-6fa624d24bcc899e11dbe8b19b0d5cbf
@@ -1 +1 @@
-21	12
\ No newline at end of file
+21	12
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-33-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer4-33-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-33-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-33-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-35-64d8e7807af6fc5a0214675a7c6a3be8 b/sql/hive/src/test/resources/golden/correlationoptimizer4-35-64d8e7807af6fc5a0214675a7c6a3be8
index 0248cc90cb2c6..7b6dfccea7a0c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-35-64d8e7807af6fc5a0214675a7c6a3be8
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-35-64d8e7807af6fc5a0214675a7c6a3be8
@@ -1 +1 @@
-21	12
\ No newline at end of file
+21	12
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-36-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer4-36-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-36-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-36-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-38-85fff71b240d0e26bab91d670c1349f2 b/sql/hive/src/test/resources/golden/correlationoptimizer4-38-85fff71b240d0e26bab91d670c1349f2
index fd3a42ebe67e7..60ee3e8737989 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-38-85fff71b240d0e26bab91d670c1349f2
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-38-85fff71b240d0e26bab91d670c1349f2
@@ -1 +1 @@
-21	14
\ No newline at end of file
+21	14
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-39-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer4-39-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-39-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-39-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-41-85fff71b240d0e26bab91d670c1349f2 b/sql/hive/src/test/resources/golden/correlationoptimizer4-41-85fff71b240d0e26bab91d670c1349f2
index fd3a42ebe67e7..60ee3e8737989 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-41-85fff71b240d0e26bab91d670c1349f2
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-41-85fff71b240d0e26bab91d670c1349f2
@@ -1 +1 @@
-21	14
\ No newline at end of file
+21	14
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-35-b79b220859c09354e23b533c105ccbab b/sql/hive/src/test/resources/golden/correlationoptimizer4-5-b5777cff7c522c4b527f77988e7f6bf1
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-35-b79b220859c09354e23b533c105ccbab
rename to sql/hive/src/test/resources/golden/correlationoptimizer4-5-b5777cff7c522c4b527f77988e7f6bf1
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-6-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer4-6-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-6-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-6-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-7-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer4-7-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-7-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-7-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-9-340cf26bcac4ee29bdf8fd588ddc3c2e b/sql/hive/src/test/resources/golden/correlationoptimizer4-9-340cf26bcac4ee29bdf8fd588ddc3c2e
index 14e309fdcad89..281e236e8a80d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer4-9-340cf26bcac4ee29bdf8fd588ddc3c2e
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer4-9-340cf26bcac4ee29bdf8fd588ddc3c2e
@@ -1 +1 @@
-13	10
\ No newline at end of file
+13	10
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-0-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer6-0-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-0-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-0-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-1-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer6-1-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-1-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-1-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-10-25b1fe48ef86952fc7766fb74b63bf21 b/sql/hive/src/test/resources/golden/correlationoptimizer6-10-25b1fe48ef86952fc7766fb74b63bf21
index 19b8a2aea8f64..8f9dae31cc51c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-10-25b1fe48ef86952fc7766fb74b63bf21
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-10-25b1fe48ef86952fc7766fb74b63bf21
@@ -12,4 +12,4 @@
 311	1	311	9
 369	1	369	9
 401	1	401	25
-406	1	406	16
\ No newline at end of file
+406	1	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-11-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer6-11-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-11-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-11-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-12-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer6-12-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-12-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-12-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-14-84463190baec77d61b287a071c8886db b/sql/hive/src/test/resources/golden/correlationoptimizer6-14-84463190baec77d61b287a071c8886db
index c4a418f59625b..26151f7b6d0f1 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-14-84463190baec77d61b287a071c8886db
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-14-84463190baec77d61b287a071c8886db
@@ -12,4 +12,4 @@
 311	311	3
 369	369	3
 401	401	5
-406	406	4
\ No newline at end of file
+406	406	4
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-15-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-15-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-15-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-15-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-17-84463190baec77d61b287a071c8886db b/sql/hive/src/test/resources/golden/correlationoptimizer6-17-84463190baec77d61b287a071c8886db
index c4a418f59625b..26151f7b6d0f1 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-17-84463190baec77d61b287a071c8886db
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-17-84463190baec77d61b287a071c8886db
@@ -12,4 +12,4 @@
 311	311	3
 369	369	3
 401	401	5
-406	406	4
\ No newline at end of file
+406	406	4
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-18-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer6-18-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-18-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-18-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-20-4b0a2d0d62b76bbd8a44ebed4a9cc4de b/sql/hive/src/test/resources/golden/correlationoptimizer6-20-4b0a2d0d62b76bbd8a44ebed4a9cc4de
index 9c8189500649e..c7d10af90e353 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-20-4b0a2d0d62b76bbd8a44ebed4a9cc4de
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-20-4b0a2d0d62b76bbd8a44ebed4a9cc4de
@@ -12,4 +12,4 @@
 311	311	9
 369	369	9
 401	401	25
-406	406	16
\ No newline at end of file
+406	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-21-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-21-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-21-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-21-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-23-4b0a2d0d62b76bbd8a44ebed4a9cc4de b/sql/hive/src/test/resources/golden/correlationoptimizer6-23-4b0a2d0d62b76bbd8a44ebed4a9cc4de
index 9c8189500649e..c7d10af90e353 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-23-4b0a2d0d62b76bbd8a44ebed4a9cc4de
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-23-4b0a2d0d62b76bbd8a44ebed4a9cc4de
@@ -12,4 +12,4 @@
 311	311	9
 369	369	9
 401	401	25
-406	406	16
\ No newline at end of file
+406	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-24-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer6-24-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-24-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-24-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-25-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer6-25-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-25-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-25-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-27-e149747103059314a9984235702b24b6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-27-e149747103059314a9984235702b24b6
index 0b1ee37d7831c..6eebe80953bf0 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-27-e149747103059314a9984235702b24b6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-27-e149747103059314a9984235702b24b6
@@ -34,4 +34,4 @@
 406	1	406
 406	1	406
 406	1	406
-406	1	406
\ No newline at end of file
+406	1	406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-28-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-28-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-28-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-28-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-3-25b1fe48ef86952fc7766fb74b63bf21 b/sql/hive/src/test/resources/golden/correlationoptimizer6-3-25b1fe48ef86952fc7766fb74b63bf21
index 19b8a2aea8f64..8f9dae31cc51c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-3-25b1fe48ef86952fc7766fb74b63bf21
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-3-25b1fe48ef86952fc7766fb74b63bf21
@@ -12,4 +12,4 @@
 311	1	311	9
 369	1	369	9
 401	1	401	25
-406	1	406	16
\ No newline at end of file
+406	1	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-30-e149747103059314a9984235702b24b6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-30-e149747103059314a9984235702b24b6
index 0b1ee37d7831c..6eebe80953bf0 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-30-e149747103059314a9984235702b24b6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-30-e149747103059314a9984235702b24b6
@@ -34,4 +34,4 @@
 406	1	406
 406	1	406
 406	1	406
-406	1	406
\ No newline at end of file
+406	1	406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-31-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer6-31-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-31-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-31-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-33-15d991127dc684513e2fff1aea3f1560 b/sql/hive/src/test/resources/golden/correlationoptimizer6-33-15d991127dc684513e2fff1aea3f1560
index 7c591d6146fd6..4e3bbb2779bf3 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-33-15d991127dc684513e2fff1aea3f1560
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-33-15d991127dc684513e2fff1aea3f1560
@@ -34,4 +34,4 @@
 406	406	16
 406	406	16
 406	406	16
-406	406	16
\ No newline at end of file
+406	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-34-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-34-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-34-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-34-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-36-15d991127dc684513e2fff1aea3f1560 b/sql/hive/src/test/resources/golden/correlationoptimizer6-36-15d991127dc684513e2fff1aea3f1560
index 7c591d6146fd6..4e3bbb2779bf3 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-36-15d991127dc684513e2fff1aea3f1560
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-36-15d991127dc684513e2fff1aea3f1560
@@ -34,4 +34,4 @@
 406	406	16
 406	406	16
 406	406	16
-406	406	16
\ No newline at end of file
+406	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-37-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer6-37-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-37-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-37-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-39-813d72763b5e9e3bed0f06232f55c8b8 b/sql/hive/src/test/resources/golden/correlationoptimizer6-39-813d72763b5e9e3bed0f06232f55c8b8
index 7c591d6146fd6..4e3bbb2779bf3 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-39-813d72763b5e9e3bed0f06232f55c8b8
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-39-813d72763b5e9e3bed0f06232f55c8b8
@@ -34,4 +34,4 @@
 406	406	16
 406	406	16
 406	406	16
-406	406	16
\ No newline at end of file
+406	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-4-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-4-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-4-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-4-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-40-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-40-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-40-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-40-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-42-813d72763b5e9e3bed0f06232f55c8b8 b/sql/hive/src/test/resources/golden/correlationoptimizer6-42-813d72763b5e9e3bed0f06232f55c8b8
index 7c591d6146fd6..4e3bbb2779bf3 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-42-813d72763b5e9e3bed0f06232f55c8b8
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-42-813d72763b5e9e3bed0f06232f55c8b8
@@ -34,4 +34,4 @@
 406	406	16
 406	406	16
 406	406	16
-406	406	16
\ No newline at end of file
+406	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-43-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer6-43-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-43-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-43-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-45-ff6c54b483726ef15c90a4c68dc659a0 b/sql/hive/src/test/resources/golden/correlationoptimizer6-45-ff6c54b483726ef15c90a4c68dc659a0
index 8338433cd5a27..917ab36da2fbb 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-45-ff6c54b483726ef15c90a4c68dc659a0
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-45-ff6c54b483726ef15c90a4c68dc659a0
@@ -34,4 +34,4 @@
 406	4	1	406	val_406
 406	4	1	406	val_406
 406	4	1	406	val_406
-406	4	1	406	val_406
\ No newline at end of file
+406	4	1	406	val_406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-46-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-46-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-46-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-46-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-48-ff6c54b483726ef15c90a4c68dc659a0 b/sql/hive/src/test/resources/golden/correlationoptimizer6-48-ff6c54b483726ef15c90a4c68dc659a0
index 8338433cd5a27..917ab36da2fbb 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-48-ff6c54b483726ef15c90a4c68dc659a0
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-48-ff6c54b483726ef15c90a4c68dc659a0
@@ -34,4 +34,4 @@
 406	4	1	406	val_406
 406	4	1	406	val_406
 406	4	1	406	val_406
-406	4	1	406	val_406
\ No newline at end of file
+406	4	1	406	val_406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-49-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer6-49-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-49-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-49-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-51-4746d944f4193018017984ca2df3c60d b/sql/hive/src/test/resources/golden/correlationoptimizer6-51-4746d944f4193018017984ca2df3c60d
index 8b1d3cd388fa2..a6942feae66e5 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-51-4746d944f4193018017984ca2df3c60d
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-51-4746d944f4193018017984ca2df3c60d
@@ -12,4 +12,4 @@
 311	1	311	val_311	9
 369	1	369	val_369	9
 401	1	401	val_401	25
-406	1	406	val_406	16
\ No newline at end of file
+406	1	406	val_406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-52-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-52-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-52-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-52-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-54-4746d944f4193018017984ca2df3c60d b/sql/hive/src/test/resources/golden/correlationoptimizer6-54-4746d944f4193018017984ca2df3c60d
index 8b1d3cd388fa2..a6942feae66e5 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-54-4746d944f4193018017984ca2df3c60d
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-54-4746d944f4193018017984ca2df3c60d
@@ -12,4 +12,4 @@
 311	1	311	val_311	9
 369	1	369	val_369	9
 401	1	401	val_401	25
-406	1	406	val_406	16
\ No newline at end of file
+406	1	406	val_406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-55-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-55-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-55-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-55-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-56-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/correlationoptimizer6-56-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-56-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-56-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-58-4746d944f4193018017984ca2df3c60d b/sql/hive/src/test/resources/golden/correlationoptimizer6-58-4746d944f4193018017984ca2df3c60d
index 8b1d3cd388fa2..a6942feae66e5 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-58-4746d944f4193018017984ca2df3c60d
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-58-4746d944f4193018017984ca2df3c60d
@@ -12,4 +12,4 @@
 311	1	311	val_311	9
 369	1	369	val_369	9
 401	1	401	val_401	25
-406	1	406	val_406	16
\ No newline at end of file
+406	1	406	val_406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-6-25b1fe48ef86952fc7766fb74b63bf21 b/sql/hive/src/test/resources/golden/correlationoptimizer6-6-25b1fe48ef86952fc7766fb74b63bf21
index 19b8a2aea8f64..8f9dae31cc51c 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-6-25b1fe48ef86952fc7766fb74b63bf21
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-6-25b1fe48ef86952fc7766fb74b63bf21
@@ -12,4 +12,4 @@
 311	1	311	9
 369	1	369	9
 401	1	401	25
-406	1	406	16
\ No newline at end of file
+406	1	406	16
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-7-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer6-7-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-7-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-7-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer6-8-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/correlationoptimizer6-8-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer6-8-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer6-8-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-0-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/correlationoptimizer7-0-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-0-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-0-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-1-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer7-1-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-1-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-1-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-10-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer7-10-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-10-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-10-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-12-22d71fb589c53776dabb4696b38c4a42 b/sql/hive/src/test/resources/golden/correlationoptimizer7-12-22d71fb589c53776dabb4696b38c4a42
index 747b650237b53..1a82dfc153565 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-12-22d71fb589c53776dabb4696b38c4a42
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-12-22d71fb589c53776dabb4696b38c4a42
@@ -12,4 +12,4 @@
 311	3	311	val_311
 369	3	369	
 401	5	401	val_401
-406	4	406	val_406
\ No newline at end of file
+406	4	406	val_406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-13-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer7-13-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-13-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-13-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-15-22d71fb589c53776dabb4696b38c4a42 b/sql/hive/src/test/resources/golden/correlationoptimizer7-15-22d71fb589c53776dabb4696b38c4a42
index 747b650237b53..1a82dfc153565 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-15-22d71fb589c53776dabb4696b38c4a42
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-15-22d71fb589c53776dabb4696b38c4a42
@@ -12,4 +12,4 @@
 311	3	311	val_311
 369	3	369	
 401	5	401	val_401
-406	4	406	val_406
\ No newline at end of file
+406	4	406	val_406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-3-22d71fb589c53776dabb4696b38c4a42 b/sql/hive/src/test/resources/golden/correlationoptimizer7-3-22d71fb589c53776dabb4696b38c4a42
index 747b650237b53..1a82dfc153565 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-3-22d71fb589c53776dabb4696b38c4a42
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-3-22d71fb589c53776dabb4696b38c4a42
@@ -12,4 +12,4 @@
 311	3	311	val_311
 369	3	369	
 401	5	401	val_401
-406	4	406	val_406
\ No newline at end of file
+406	4	406	val_406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-4-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer7-4-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-4-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-4-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-6-22d71fb589c53776dabb4696b38c4a42 b/sql/hive/src/test/resources/golden/correlationoptimizer7-6-22d71fb589c53776dabb4696b38c4a42
index 747b650237b53..1a82dfc153565 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-6-22d71fb589c53776dabb4696b38c4a42
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-6-22d71fb589c53776dabb4696b38c4a42
@@ -12,4 +12,4 @@
 311	3	311	val_311
 369	3	369	
 401	5	401	val_401
-406	4	406	val_406
\ No newline at end of file
+406	4	406	val_406
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-7-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/correlationoptimizer7-7-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-7-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-7-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-8-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/correlationoptimizer7-8-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-8-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-8-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer7-9-4d3e60a0e9bd8c12fdba4e010493537d b/sql/hive/src/test/resources/golden/correlationoptimizer7-9-4d3e60a0e9bd8c12fdba4e010493537d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer7-9-4d3e60a0e9bd8c12fdba4e010493537d
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer7-9-4d3e60a0e9bd8c12fdba4e010493537d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-0-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/correlationoptimizer8-0-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-0-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-0-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-1-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer8-1-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-1-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-1-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-10-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer8-10-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-10-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-10-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-12-c1dfbe5cf77a97d195a3d0a65d1f1f b/sql/hive/src/test/resources/golden/correlationoptimizer8-12-c1dfbe5cf77a97d195a3d0a65d1f1f
index 7c0af7229d62a..ecbe52c536ebe 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-12-c1dfbe5cf77a97d195a3d0a65d1f1f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-12-c1dfbe5cf77a97d195a3d0a65d1f1f
@@ -21,4 +21,4 @@ val_273	1	NULL	NULL
 val_278	1	NULL	NULL
 val_311	1	NULL	NULL
 val_401	1	NULL	NULL
-val_406	1	NULL	NULL
\ No newline at end of file
+val_406	1	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-13-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer8-13-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-13-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-13-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-15-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer8-15-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-15-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-15-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-3-932db4b9935e4bc3d21dd33a8d12c275 b/sql/hive/src/test/resources/golden/correlationoptimizer8-3-932db4b9935e4bc3d21dd33a8d12c275
index efdd80c9f8b89..2f62508e3342a 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-3-932db4b9935e4bc3d21dd33a8d12c275
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-3-932db4b9935e4bc3d21dd33a8d12c275
@@ -10,4 +10,4 @@
 311	val_311	3
 369		3
 401	val_401	5
-406	val_406	4
\ No newline at end of file
+406	val_406	4
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-4-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/correlationoptimizer8-4-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-4-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-4-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-6-932db4b9935e4bc3d21dd33a8d12c275 b/sql/hive/src/test/resources/golden/correlationoptimizer8-6-932db4b9935e4bc3d21dd33a8d12c275
index efdd80c9f8b89..2f62508e3342a 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-6-932db4b9935e4bc3d21dd33a8d12c275
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-6-932db4b9935e4bc3d21dd33a8d12c275
@@ -10,4 +10,4 @@
 311	val_311	3
 369		3
 401	val_401	5
-406	val_406	4
\ No newline at end of file
+406	val_406	4
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-7-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/correlationoptimizer8-7-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-7-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-7-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer8-9-c1dfbe5cf77a97d195a3d0a65d1f1f b/sql/hive/src/test/resources/golden/correlationoptimizer8-9-c1dfbe5cf77a97d195a3d0a65d1f1f
index 7c0af7229d62a..ecbe52c536ebe 100644
--- a/sql/hive/src/test/resources/golden/correlationoptimizer8-9-c1dfbe5cf77a97d195a3d0a65d1f1f
+++ b/sql/hive/src/test/resources/golden/correlationoptimizer8-9-c1dfbe5cf77a97d195a3d0a65d1f1f
@@ -21,4 +21,4 @@ val_273	1	NULL	NULL
 val_278	1	NULL	NULL
 val_311	1	NULL	NULL
 val_401	1	NULL	NULL
-val_406	1	NULL	NULL
\ No newline at end of file
+val_406	1	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-38-638e5300f4c892c2bf27bd91a8f81b64 b/sql/hive/src/test/resources/golden/count-1-c47c4abedf055b4e734232fd2e274d55
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-38-638e5300f4c892c2bf27bd91a8f81b64
rename to sql/hive/src/test/resources/golden/count-1-c47c4abedf055b4e734232fd2e274d55
diff --git a/sql/hive/src/test/resources/golden/count-10-d0f804c7d06375db74a0fcf5f17603c6 b/sql/hive/src/test/resources/golden/count-10-d0f804c7d06375db74a0fcf5f17603c6
index 4b020e0595d2c..966f27f6c9b9b 100644
--- a/sql/hive/src/test/resources/golden/count-10-d0f804c7d06375db74a0fcf5f17603c6
+++ b/sql/hive/src/test/resources/golden/count-10-d0f804c7d06375db74a0fcf5f17603c6
@@ -1,4 +1,4 @@
 NULL	1	1	6
 10	2	2	10
 12	1	2	9
-100	1	1	3
\ No newline at end of file
+100	1	1	3
diff --git a/sql/hive/src/test/resources/golden/count-12-944f53db544c07a7b38a0544a21d8e13 b/sql/hive/src/test/resources/golden/count-12-944f53db544c07a7b38a0544a21d8e13
index 54199fdb8166d..5eec149bfcc92 100644
--- a/sql/hive/src/test/resources/golden/count-12-944f53db544c07a7b38a0544a21d8e13
+++ b/sql/hive/src/test/resources/golden/count-12-944f53db544c07a7b38a0544a21d8e13
@@ -1 +1 @@
-7	7	6	6	6	7	3	3	6	7	4	5	6	6	5	6	4	5	5	5	4
\ No newline at end of file
+7	7	6	6	6	7	3	3	6	7	4	5	6	6	5	6	4	5	5	5	4
diff --git a/sql/hive/src/test/resources/golden/count-2-461bad3feb7dbc25fb35d45c6876d698 b/sql/hive/src/test/resources/golden/count-2-461bad3feb7dbc25fb35d45c6876d698
index 2e9278da88858..162877fdb3e5d 100644
--- a/sql/hive/src/test/resources/golden/count-2-461bad3feb7dbc25fb35d45c6876d698
+++ b/sql/hive/src/test/resources/golden/count-2-461bad3feb7dbc25fb35d45c6876d698
@@ -4,4 +4,4 @@ NULL	35	23	6
 12	NULL	80	2
 10	100	NULL	5
 10	100	45	4
-12	100	75	7
\ No newline at end of file
+12	100	75	7
diff --git a/sql/hive/src/test/resources/golden/count-3-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/count-3-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/count-3-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/count-3-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/count-5-d0f804c7d06375db74a0fcf5f17603c6 b/sql/hive/src/test/resources/golden/count-5-d0f804c7d06375db74a0fcf5f17603c6
index 4b020e0595d2c..966f27f6c9b9b 100644
--- a/sql/hive/src/test/resources/golden/count-5-d0f804c7d06375db74a0fcf5f17603c6
+++ b/sql/hive/src/test/resources/golden/count-5-d0f804c7d06375db74a0fcf5f17603c6
@@ -1,4 +1,4 @@
 NULL	1	1	6
 10	2	2	10
 12	1	2	9
-100	1	1	3
\ No newline at end of file
+100	1	1	3
diff --git a/sql/hive/src/test/resources/golden/count-7-944f53db544c07a7b38a0544a21d8e13 b/sql/hive/src/test/resources/golden/count-7-944f53db544c07a7b38a0544a21d8e13
index 54199fdb8166d..5eec149bfcc92 100644
--- a/sql/hive/src/test/resources/golden/count-7-944f53db544c07a7b38a0544a21d8e13
+++ b/sql/hive/src/test/resources/golden/count-7-944f53db544c07a7b38a0544a21d8e13
@@ -1 +1 @@
-7	7	6	6	6	7	3	3	6	7	4	5	6	6	5	6	4	5	5	5	4
\ No newline at end of file
+7	7	6	6	6	7	3	3	6	7	4	5	6	6	5	6	4	5	5	5	4
diff --git a/sql/hive/src/test/resources/golden/count-8-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/count-8-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/count-8-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/count-8-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/create table as-1-b9002c1d71895be765575b62656d1928 b/sql/hive/src/test/resources/golden/create table as-1-b9002c1d71895be765575b62656d1928
index c5c8d29fdd13e..7aae61e5eb82f 100644
--- a/sql/hive/src/test/resources/golden/create table as-1-b9002c1d71895be765575b62656d1928	
+++ b/sql/hive/src/test/resources/golden/create table as-1-b9002c1d71895be765575b62656d1928	
@@ -497,4 +497,4 @@
 403	val_403
 400	val_400
 200	val_200
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/create_1-0-d57ed4bbfee1ffaffaeba0a4be84c31d b/sql/hive/src/test/resources/golden/create_1-0-d57ed4bbfee1ffaffaeba0a4be84c31d
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-0-d57ed4bbfee1ffaffaeba0a4be84c31d
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-10-b9c0b95624e601614ea2561b83aaf0ba b/sql/hive/src/test/resources/golden/create_1-10-b9c0b95624e601614ea2561b83aaf0ba
deleted file mode 100644
index 66d40e52a4539..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-10-b9c0b95624e601614ea2561b83aaf0ba
+++ /dev/null
@@ -1,4 +0,0 @@
-a                   	string              	None                
-b                   	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:table3, dbName:default, owner:marmbrus, createTime:1389343868, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5689195829966714752/table3, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=	, field.delim=
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-12-fe8680bc5ef6fe3e64c68d2638e10785 b/sql/hive/src/test/resources/golden/create_1-12-fe8680bc5ef6fe3e64c68d2638e10785
deleted file mode 100644
index 8e606f61a1c30..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-12-fe8680bc5ef6fe3e64c68d2638e10785
+++ /dev/null
@@ -1,2 +0,0 @@
-a                   	string              	None                
-b                   	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-13-5715f85456733a71fb7c83f1b254b3a b/sql/hive/src/test/resources/golden/create_1-13-5715f85456733a71fb7c83f1b254b3a
deleted file mode 100644
index 05b460270525d..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-13-5715f85456733a71fb7c83f1b254b3a
+++ /dev/null
@@ -1,4 +0,0 @@
-a                   	string              	None                
-b                   	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:table4, dbName:default, owner:marmbrus, createTime:1389343869, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5689195829966714752/table4, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=	, field.delim=
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-15-fd9415e340cf54a9473cc4bf86673816 b/sql/hive/src/test/resources/golden/create_1-15-fd9415e340cf54a9473cc4bf86673816
deleted file mode 100644
index eda3946588e3f..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-15-fd9415e340cf54a9473cc4bf86673816
+++ /dev/null
@@ -1,2 +0,0 @@
-a                   	string              	from deserializer   
-b                   	string              	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-16-c99c700ca511b68577aae8ae513a4a32 b/sql/hive/src/test/resources/golden/create_1-16-c99c700ca511b68577aae8ae513a4a32
deleted file mode 100644
index 8fc60adf10167..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-16-c99c700ca511b68577aae8ae513a4a32
+++ /dev/null
@@ -1,4 +0,0 @@
-a                   	string              	from deserializer   
-b                   	string              	from deserializer   
-	 	 
-Detailed Table Information	Table(tableName:table5, dbName:default, owner:marmbrus, createTime:1389343869, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5689195829966714752/table5, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=	, field.delim=
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-2-ecd02bc3563cd6b60b8394956cb69084 b/sql/hive/src/test/resources/golden/create_1-2-ecd02bc3563cd6b60b8394956cb69084
deleted file mode 100644
index 8e606f61a1c30..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-2-ecd02bc3563cd6b60b8394956cb69084
+++ /dev/null
@@ -1,2 +0,0 @@
-a                   	string              	None                
-b                   	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-3-c27702ff131e0ecfd71f1e1779fbe365 b/sql/hive/src/test/resources/golden/create_1-3-c27702ff131e0ecfd71f1e1779fbe365
deleted file mode 100644
index 5e9bc70c3a1a3..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-3-c27702ff131e0ecfd71f1e1779fbe365
+++ /dev/null
@@ -1,4 +0,0 @@
-a                   	string              	None                
-b                   	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:table1, dbName:default, owner:marmbrus, createTime:1389343868, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5689195829966714752/table1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389343868}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-6-52dc9f900d7f7a559698aff9565f061a b/sql/hive/src/test/resources/golden/create_1-6-52dc9f900d7f7a559698aff9565f061a
deleted file mode 100644
index 6ed3515ba6876..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-6-52dc9f900d7f7a559698aff9565f061a
+++ /dev/null
@@ -1,2 +0,0 @@
-a                   	string              	None                
-b                   	int                 	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-7-8564aa6cc2b0ee85292b3e8976fe9001 b/sql/hive/src/test/resources/golden/create_1-7-8564aa6cc2b0ee85292b3e8976fe9001
deleted file mode 100644
index bec4c72e49857..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-7-8564aa6cc2b0ee85292b3e8976fe9001
+++ /dev/null
@@ -1,4 +0,0 @@
-a                   	string              	None                
-b                   	int                 	None                
-	 	 
-Detailed Table Information	Table(tableName:table2, dbName:default, owner:marmbrus, createTime:1389343868, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:int, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5689195829966714752/table2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389343868}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_1-9-f19e6b501d5d2fb284777c71f8f6c0c3 b/sql/hive/src/test/resources/golden/create_1-9-f19e6b501d5d2fb284777c71f8f6c0c3
deleted file mode 100644
index 8e606f61a1c30..0000000000000
--- a/sql/hive/src/test/resources/golden/create_1-9-f19e6b501d5d2fb284777c71f8f6c0c3
+++ /dev/null
@@ -1,2 +0,0 @@
-a                   	string              	None                
-b                   	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_escape-1-ecd02bc3563cd6b60b8394956cb69084 b/sql/hive/src/test/resources/golden/create_escape-1-ecd02bc3563cd6b60b8394956cb69084
deleted file mode 100644
index 8e606f61a1c30..0000000000000
--- a/sql/hive/src/test/resources/golden/create_escape-1-ecd02bc3563cd6b60b8394956cb69084
+++ /dev/null
@@ -1,2 +0,0 @@
-a                   	string              	None                
-b                   	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_escape-2-c27702ff131e0ecfd71f1e1779fbe365 b/sql/hive/src/test/resources/golden/create_escape-2-c27702ff131e0ecfd71f1e1779fbe365
deleted file mode 100644
index 8ab6b24deab60..0000000000000
--- a/sql/hive/src/test/resources/golden/create_escape-2-c27702ff131e0ecfd71f1e1779fbe365
+++ /dev/null
@@ -1,4 +0,0 @@
-a                   	string              	None                
-b                   	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:table1, dbName:default, owner:marmbrus, createTime:1388825524, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3286459698772672096/table1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{escape.delim=\, serialization.format=	, field.delim=
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_escape-4-7d84873a6ce03e0e408fa67ef5dd733 b/sql/hive/src/test/resources/golden/create_escape-4-7d84873a6ce03e0e408fa67ef5dd733
deleted file mode 100644
index bbe37f8e2a790..0000000000000
--- a/sql/hive/src/test/resources/golden/create_escape-4-7d84873a6ce03e0e408fa67ef5dd733
+++ /dev/null
@@ -1 +0,0 @@
-86	\	\
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_genericudaf-0-499d36ed8229cbf74a07b59914bdf717 b/sql/hive/src/test/resources/golden/create_genericudaf-0-499d36ed8229cbf74a07b59914bdf717
deleted file mode 100644
index cbcdfbe72e8c6..0000000000000
--- a/sql/hive/src/test/resources/golden/create_genericudaf-0-499d36ed8229cbf74a07b59914bdf717
+++ /dev/null
@@ -1,9 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATEFUNCTION test_avg 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-
diff --git a/sql/hive/src/test/resources/golden/create_genericudaf-3-d3b1af3e7f037de800255d9411a690e8 b/sql/hive/src/test/resources/golden/create_genericudaf-3-d3b1af3e7f037de800255d9411a690e8
deleted file mode 100644
index 2f958c483a9df..0000000000000
--- a/sql/hive/src/test/resources/golden/create_genericudaf-3-d3b1af3e7f037de800255d9411a690e8
+++ /dev/null
@@ -1 +0,0 @@
-1.0	260.182
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_genericudf-0-dd23fb149bb6d6937b838334559ad8d1 b/sql/hive/src/test/resources/golden/create_genericudf-0-dd23fb149bb6d6937b838334559ad8d1
index 0e9c06c882602..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/create_genericudf-0-dd23fb149bb6d6937b838334559ad8d1
+++ b/sql/hive/src/test/resources/golden/create_genericudf-0-dd23fb149bb6d6937b838334559ad8d1
@@ -1,10 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATEFUNCTION test_translate 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestTranslate')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-
-
diff --git a/sql/hive/src/test/resources/golden/create_insert_outputformat-2-8c2e4081b9d69297c35faab0a2ff86dc b/sql/hive/src/test/resources/golden/create_insert_outputformat-2-8c2e4081b9d69297c35faab0a2ff86dc
index 83f572c424926..90f9bd0430a4c 100644
--- a/sql/hive/src/test/resources/golden/create_insert_outputformat-2-8c2e4081b9d69297c35faab0a2ff86dc
+++ b/sql/hive/src/test/resources/golden/create_insert_outputformat-2-8c2e4081b9d69297c35faab0a2ff86dc
@@ -1,2 +1,2 @@
-key                 	int                 	None                
-value               	string              	None                
\ No newline at end of file
+key                 	int                 	                    
+value               	string              	                    
diff --git a/sql/hive/src/test/resources/golden/create_insert_outputformat-5-8552731917a8260c25e6df79b83bf5c b/sql/hive/src/test/resources/golden/create_insert_outputformat-5-8552731917a8260c25e6df79b83bf5c
index 83f572c424926..90f9bd0430a4c 100644
--- a/sql/hive/src/test/resources/golden/create_insert_outputformat-5-8552731917a8260c25e6df79b83bf5c
+++ b/sql/hive/src/test/resources/golden/create_insert_outputformat-5-8552731917a8260c25e6df79b83bf5c
@@ -1,2 +1,2 @@
-key                 	int                 	None                
-value               	string              	None                
\ No newline at end of file
+key                 	int                 	                    
+value               	string              	                    
diff --git a/sql/hive/src/test/resources/golden/create_insert_outputformat-8-33a4312fd617c5c9f2a560dc6ce868a5 b/sql/hive/src/test/resources/golden/create_insert_outputformat-8-33a4312fd617c5c9f2a560dc6ce868a5
index 83f572c424926..90f9bd0430a4c 100644
--- a/sql/hive/src/test/resources/golden/create_insert_outputformat-8-33a4312fd617c5c9f2a560dc6ce868a5
+++ b/sql/hive/src/test/resources/golden/create_insert_outputformat-8-33a4312fd617c5c9f2a560dc6ce868a5
@@ -1,2 +1,2 @@
-key                 	int                 	None                
-value               	string              	None                
\ No newline at end of file
+key                 	int                 	                    
+value               	string              	                    
diff --git a/sql/hive/src/test/resources/golden/create_like-1-30485a2507b60b96cad3d293527e6af b/sql/hive/src/test/resources/golden/create_like-1-30485a2507b60b96cad3d293527e6af
deleted file mode 100644
index b8ddba7f50b97..0000000000000
--- a/sql/hive/src/test/resources/golden/create_like-1-30485a2507b60b96cad3d293527e6af
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-a                   	string              	None                
-b                   	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 11:13:23 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/table1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392059603          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/create_like-10-7d84873a6ce03e0e408fa67ef5dd733 b/sql/hive/src/test/resources/golden/create_like-10-7d84873a6ce03e0e408fa67ef5dd733
deleted file mode 100644
index e87fb81a6f043..0000000000000
--- a/sql/hive/src/test/resources/golden/create_like-10-7d84873a6ce03e0e408fa67ef5dd733
+++ /dev/null
@@ -1 +0,0 @@
-86	val_86
diff --git a/sql/hive/src/test/resources/golden/create_like-11-ba64f0122b21f605d8b2928753784130 b/sql/hive/src/test/resources/golden/create_like-11-ba64f0122b21f605d8b2928753784130
deleted file mode 100644
index 375dcacb8db71..0000000000000
--- a/sql/hive/src/test/resources/golden/create_like-11-ba64f0122b21f605d8b2928753784130
+++ /dev/null
@@ -1,2 +0,0 @@
-100	val_100
-100	val_100
diff --git a/sql/hive/src/test/resources/golden/create_like-3-eea111a209cf8895f31f64699669c705 b/sql/hive/src/test/resources/golden/create_like-3-eea111a209cf8895f31f64699669c705
deleted file mode 100644
index 52b637c27f98d..0000000000000
--- a/sql/hive/src/test/resources/golden/create_like-3-eea111a209cf8895f31f64699669c705
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-a                   	string              	None                
-b                   	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 11:13:23 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/table2	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392059603          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/create_like-7-231c8b6709a8dc0b6a3b3a9751191cd7 b/sql/hive/src/test/resources/golden/create_like-7-231c8b6709a8dc0b6a3b3a9751191cd7
deleted file mode 100644
index d9308798dcb1c..0000000000000
--- a/sql/hive/src/test/resources/golden/create_like-7-231c8b6709a8dc0b6a3b3a9751191cd7
+++ /dev/null
@@ -1,28 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-a                   	string              	None                
-b                   	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 11:13:23 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/table3	 
-Table Type:         	EXTERNAL_TABLE      	 
-Table Parameters:	 	 
-	EXTERNAL            	TRUE                
-	transient_lastDdlTime	1392059603          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-41-3514c74c7f68f2d70cc6d51ac46c20 b/sql/hive/src/test/resources/golden/create_like_tbl_props-0-432a3ade72afd99cfb4b886692c15e55
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-41-3514c74c7f68f2d70cc6d51ac46c20
rename to sql/hive/src/test/resources/golden/create_like_tbl_props-0-432a3ade72afd99cfb4b886692c15e55
diff --git a/sql/hive/src/test/resources/golden/create_like_tbl_props-1-f8481dcbc8f2731bab8ac3894511ff9f b/sql/hive/src/test/resources/golden/create_like_tbl_props-1-f8481dcbc8f2731bab8ac3894511ff9f
new file mode 100644
index 0000000000000..7d8744672aa11
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/create_like_tbl_props-1-f8481dcbc8f2731bab8ac3894511ff9f
@@ -0,0 +1,28 @@
+# col_name            	data_type           	comment             
+	 	 
+key                 	int                 	                    
+value               	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+Owner:              	hcheng              	 
+CreateTime:         	Fri Nov 28 00:04:15 PST 2014	 
+LastAccessTime:     	UNKNOWN             	 
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+Location:           	file:/tmp/sparkHiveWarehouse3490012261419180285/test_table	 
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	key                 	value               
+	transient_lastDdlTime	1417161855          
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-44-7490df6719cd7e47aa08dbcbc3266a92 b/sql/hive/src/test/resources/golden/create_like_tbl_props-10-2a2d6f2c92e32285dd4c4dd3d0faa9
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-44-7490df6719cd7e47aa08dbcbc3266a92
rename to sql/hive/src/test/resources/golden/create_like_tbl_props-10-2a2d6f2c92e32285dd4c4dd3d0faa9
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-47-73da9fe2b0c2ee26c021ec3f2fa27272 b/sql/hive/src/test/resources/golden/create_like_tbl_props-11-b4f47dcb46073bda6fb1d9f96e8b36e6
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-47-73da9fe2b0c2ee26c021ec3f2fa27272
rename to sql/hive/src/test/resources/golden/create_like_tbl_props-11-b4f47dcb46073bda6fb1d9f96e8b36e6
diff --git a/sql/hive/src/test/resources/golden/create_like_tbl_props-12-184ab0f730b53d1b8b4f4e1feade9824 b/sql/hive/src/test/resources/golden/create_like_tbl_props-12-184ab0f730b53d1b8b4f4e1feade9824
new file mode 100644
index 0000000000000..b55c2dcfe4934
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/create_like_tbl_props-12-184ab0f730b53d1b8b4f4e1feade9824
@@ -0,0 +1,29 @@
+# col_name            	data_type           	comment             
+	 	 
+key                 	int                 	                    
+value               	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+Owner:              	hcheng              	 
+CreateTime:         	Fri Nov 28 00:04:16 PST 2014	 
+LastAccessTime:     	UNKNOWN             	 
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+Location:           	file:/tmp/sparkHiveWarehouse3490012261419180285/test_table4	 
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	key                 	value               
+	key1                	value1              
+	transient_lastDdlTime	1417161856          
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/exim_01_nonpart-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/create_like_tbl_props-2-62c728aff7df8cd2bd2c114c9076a1ff
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_01_nonpart-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/create_like_tbl_props-2-62c728aff7df8cd2bd2c114c9076a1ff
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-51-fcf9bcb522f542637ccdea863b408448 b/sql/hive/src/test/resources/golden/create_like_tbl_props-3-3320f357baaadfe13820349b8d941865
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-51-fcf9bcb522f542637ccdea863b408448
rename to sql/hive/src/test/resources/golden/create_like_tbl_props-3-3320f357baaadfe13820349b8d941865
diff --git a/sql/hive/src/test/resources/golden/create_like_tbl_props-4-f59c262efb0482b555ae867abef4040f b/sql/hive/src/test/resources/golden/create_like_tbl_props-4-f59c262efb0482b555ae867abef4040f
new file mode 100644
index 0000000000000..8c94e5fb15561
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/create_like_tbl_props-4-f59c262efb0482b555ae867abef4040f
@@ -0,0 +1,28 @@
+# col_name            	data_type           	comment             
+	 	 
+key                 	int                 	                    
+value               	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+Owner:              	hcheng              	 
+CreateTime:         	Fri Nov 28 00:04:15 PST 2014	 
+LastAccessTime:     	UNKNOWN             	 
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+Location:           	file:/tmp/sparkHiveWarehouse3490012261419180285/test_table1	 
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	key1                	value1              
+	transient_lastDdlTime	1417161855          
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-54-dad56e1f06c808b29e5dc8fb0c49efb2 b/sql/hive/src/test/resources/golden/create_like_tbl_props-5-11e1ff4a0cf4ee27f5ccb5f267643cfd
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-54-dad56e1f06c808b29e5dc8fb0c49efb2
rename to sql/hive/src/test/resources/golden/create_like_tbl_props-5-11e1ff4a0cf4ee27f5ccb5f267643cfd
diff --git a/sql/hive/src/test/resources/golden/create_like_tbl_props-6-c49698cf69779ee8a519e2566c6b2acb b/sql/hive/src/test/resources/golden/create_like_tbl_props-6-c49698cf69779ee8a519e2566c6b2acb
new file mode 100644
index 0000000000000..ddec982d168c0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/create_like_tbl_props-6-c49698cf69779ee8a519e2566c6b2acb
@@ -0,0 +1,29 @@
+# col_name            	data_type           	comment             
+	 	 
+key                 	int                 	                    
+value               	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+Owner:              	hcheng              	 
+CreateTime:         	Fri Nov 28 00:04:16 PST 2014	 
+LastAccessTime:     	UNKNOWN             	 
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+Location:           	file:/tmp/sparkHiveWarehouse3490012261419180285/test_table2	 
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	key1                	value1              
+	key2                	value2              
+	transient_lastDdlTime	1417161856          
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/exim_01_nonpart-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/create_like_tbl_props-7-25f0c8b81d949d73737ee3a5398fc9f7
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_01_nonpart-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/create_like_tbl_props-7-25f0c8b81d949d73737ee3a5398fc9f7
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-57-3cd3fbbbd8ee5c274fe3d6a45126cef4 b/sql/hive/src/test/resources/golden/create_like_tbl_props-8-69b6bc0b259beb299874e7cdfc5edb1b
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-57-3cd3fbbbd8ee5c274fe3d6a45126cef4
rename to sql/hive/src/test/resources/golden/create_like_tbl_props-8-69b6bc0b259beb299874e7cdfc5edb1b
diff --git a/sql/hive/src/test/resources/golden/create_like_tbl_props-9-9461431e44ae60a529cc309d8f325dbc b/sql/hive/src/test/resources/golden/create_like_tbl_props-9-9461431e44ae60a529cc309d8f325dbc
new file mode 100644
index 0000000000000..547d4fbdf34d3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/create_like_tbl_props-9-9461431e44ae60a529cc309d8f325dbc
@@ -0,0 +1,29 @@
+# col_name            	data_type           	comment             
+	 	 
+key                 	int                 	                    
+value               	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+Owner:              	hcheng              	 
+CreateTime:         	Fri Nov 28 00:04:16 PST 2014	 
+LastAccessTime:     	UNKNOWN             	 
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+Location:           	file:/tmp/sparkHiveWarehouse3490012261419180285/test_table3	 
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	key1                	value1              
+	key2                	value3              
+	transient_lastDdlTime	1417161856          
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/create_like_view-10-eea111a209cf8895f31f64699669c705 b/sql/hive/src/test/resources/golden/create_like_view-10-eea111a209cf8895f31f64699669c705
index 3e2a912824ab3..0e52b5f82ef4a 100644
--- a/sql/hive/src/test/resources/golden/create_like_view-10-eea111a209cf8895f31f64699669c705
+++ b/sql/hive/src/test/resources/golden/create_like_view-10-eea111a209cf8895f31f64699669c705
@@ -1,19 +1,19 @@
 # col_name            	data_type           	comment             
 	 	 
-a                   	string              	None                
-b                   	string              	None                
+a                   	string              	                    
+b                   	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Feb 07 14:52:37 PST 2014	 
+CreateTime:         	Tue Oct 21 01:24:36 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3766080982052101504/table2	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/table2	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1391813557          
+	transient_lastDdlTime	1413879876          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
diff --git a/sql/hive/src/test/resources/golden/create_like_view-14-f19e6b501d5d2fb284777c71f8f6c0c3 b/sql/hive/src/test/resources/golden/create_like_view-14-f19e6b501d5d2fb284777c71f8f6c0c3
index 115d12fb70c81..26b5989488752 100644
--- a/sql/hive/src/test/resources/golden/create_like_view-14-f19e6b501d5d2fb284777c71f8f6c0c3
+++ b/sql/hive/src/test/resources/golden/create_like_view-14-f19e6b501d5d2fb284777c71f8f6c0c3
@@ -1,2 +1,2 @@
-a                   	string              	None                
-b                   	string              	None                
+a                   	string              	                    
+b                   	string              	                    
diff --git a/sql/hive/src/test/resources/golden/create_like_view-15-231c8b6709a8dc0b6a3b3a9751191cd7 b/sql/hive/src/test/resources/golden/create_like_view-15-231c8b6709a8dc0b6a3b3a9751191cd7
index 61d34badb1a2d..47808e8f20c83 100644
--- a/sql/hive/src/test/resources/golden/create_like_view-15-231c8b6709a8dc0b6a3b3a9751191cd7
+++ b/sql/hive/src/test/resources/golden/create_like_view-15-231c8b6709a8dc0b6a3b3a9751191cd7
@@ -1,20 +1,20 @@
 # col_name            	data_type           	comment             
 	 	 
-a                   	string              	None                
-b                   	string              	None                
+a                   	string              	                    
+b                   	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Feb 07 14:52:37 PST 2014	 
+CreateTime:         	Tue Oct 21 01:24:36 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3766080982052101504/table3	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/table3	 
 Table Type:         	EXTERNAL_TABLE      	 
 Table Parameters:	 	 
 	EXTERNAL            	TRUE                
-	transient_lastDdlTime	1391813557          
+	transient_lastDdlTime	1413879876          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
diff --git a/sql/hive/src/test/resources/golden/create_like_view-25-87a663f8fd80110a9cee249535037c0d b/sql/hive/src/test/resources/golden/create_like_view-25-87a663f8fd80110a9cee249535037c0d
index 4c2967215fe66..ad1f1f42d6b98 100644
--- a/sql/hive/src/test/resources/golden/create_like_view-25-87a663f8fd80110a9cee249535037c0d
+++ b/sql/hive/src/test/resources/golden/create_like_view-25-87a663f8fd80110a9cee249535037c0d
@@ -1,25 +1,25 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Feb 07 14:53:16 PST 2014	 
+CreateTime:         	Tue Oct 21 01:25:10 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3766080982052101504/table1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/table1	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1391813596          
+	transient_lastDdlTime	1413879910          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
diff --git a/sql/hive/src/test/resources/golden/create_like_view-5-ecd02bc3563cd6b60b8394956cb69084 b/sql/hive/src/test/resources/golden/create_like_view-5-ecd02bc3563cd6b60b8394956cb69084
index 115d12fb70c81..26b5989488752 100644
--- a/sql/hive/src/test/resources/golden/create_like_view-5-ecd02bc3563cd6b60b8394956cb69084
+++ b/sql/hive/src/test/resources/golden/create_like_view-5-ecd02bc3563cd6b60b8394956cb69084
@@ -1,2 +1,2 @@
-a                   	string              	None                
-b                   	string              	None                
+a                   	string              	                    
+b                   	string              	                    
diff --git a/sql/hive/src/test/resources/golden/create_like_view-6-30485a2507b60b96cad3d293527e6af b/sql/hive/src/test/resources/golden/create_like_view-6-30485a2507b60b96cad3d293527e6af
index 02dee147bca42..91e1ebbfee4de 100644
--- a/sql/hive/src/test/resources/golden/create_like_view-6-30485a2507b60b96cad3d293527e6af
+++ b/sql/hive/src/test/resources/golden/create_like_view-6-30485a2507b60b96cad3d293527e6af
@@ -1,19 +1,19 @@
 # col_name            	data_type           	comment             
 	 	 
-a                   	string              	None                
-b                   	string              	None                
+a                   	string              	                    
+b                   	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Feb 07 14:52:37 PST 2014	 
+CreateTime:         	Tue Oct 21 01:24:36 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3766080982052101504/table1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/table1	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1391813557          
+	transient_lastDdlTime	1413879876          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
diff --git a/sql/hive/src/test/resources/golden/create_like_view-9-52dc9f900d7f7a559698aff9565f061a b/sql/hive/src/test/resources/golden/create_like_view-9-52dc9f900d7f7a559698aff9565f061a
index 115d12fb70c81..26b5989488752 100644
--- a/sql/hive/src/test/resources/golden/create_like_view-9-52dc9f900d7f7a559698aff9565f061a
+++ b/sql/hive/src/test/resources/golden/create_like_view-9-52dc9f900d7f7a559698aff9565f061a
@@ -1,2 +1,2 @@
-a                   	string              	None                
-b                   	string              	None                
+a                   	string              	                    
+b                   	string              	                    
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-10-d6fee0f05fa9b04cb7c557862402c929 b/sql/hive/src/test/resources/golden/create_merge_compressed-10-d6fee0f05fa9b04cb7c557862402c929
deleted file mode 100644
index dbdf4585360ae..0000000000000
--- a/sql/hive/src/test/resources/golden/create_merge_compressed-10-d6fee0f05fa9b04cb7c557862402c929
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:tgt_rc_merge_test
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5689195829966714752/tgt_rc_merge_test
-inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:false
-partitionColumns:
-totalNumberFiles:1
-totalFileSize:239
-maxFileSize:239
-minFileSize:239
-lastAccessTime:0
-lastUpdateTime:1389344017000
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-11-614c34f9e88015f21bffc4b8930bc95d b/sql/hive/src/test/resources/golden/create_merge_compressed-11-614c34f9e88015f21bffc4b8930bc95d
deleted file mode 100644
index 9a037142aa3c1..0000000000000
--- a/sql/hive/src/test/resources/golden/create_merge_compressed-11-614c34f9e88015f21bffc4b8930bc95d
+++ /dev/null
@@ -1 +0,0 @@
-10
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-12-4d89cbe49f710527b54e6262472f0320 b/sql/hive/src/test/resources/golden/create_merge_compressed-12-4d89cbe49f710527b54e6262472f0320
deleted file mode 100644
index eb6c3f6aef813..0000000000000
--- a/sql/hive/src/test/resources/golden/create_merge_compressed-12-4d89cbe49f710527b54e6262472f0320
+++ /dev/null
@@ -1 +0,0 @@
-508	-751895388
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-2-84b74227c9f1563f530cd3ac3b333e54 b/sql/hive/src/test/resources/golden/create_merge_compressed-2-84b74227c9f1563f530cd3ac3b333e54
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/create_merge_compressed-2-84b74227c9f1563f530cd3ac3b333e54
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-6-d6fee0f05fa9b04cb7c557862402c929 b/sql/hive/src/test/resources/golden/create_merge_compressed-6-d6fee0f05fa9b04cb7c557862402c929
deleted file mode 100644
index 8a9d9d56a66d6..0000000000000
--- a/sql/hive/src/test/resources/golden/create_merge_compressed-6-d6fee0f05fa9b04cb7c557862402c929
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:tgt_rc_merge_test
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5689195829966714752/tgt_rc_merge_test
-inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:false
-partitionColumns:
-totalNumberFiles:2
-totalFileSize:338
-maxFileSize:169
-minFileSize:169
-lastAccessTime:0
-lastUpdateTime:1389343990000
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-7-614c34f9e88015f21bffc4b8930bc95d b/sql/hive/src/test/resources/golden/create_merge_compressed-7-614c34f9e88015f21bffc4b8930bc95d
deleted file mode 100644
index 9a037142aa3c1..0000000000000
--- a/sql/hive/src/test/resources/golden/create_merge_compressed-7-614c34f9e88015f21bffc4b8930bc95d
+++ /dev/null
@@ -1 +0,0 @@
-10
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-8-4d89cbe49f710527b54e6262472f0320 b/sql/hive/src/test/resources/golden/create_merge_compressed-8-4d89cbe49f710527b54e6262472f0320
deleted file mode 100644
index eb6c3f6aef813..0000000000000
--- a/sql/hive/src/test/resources/golden/create_merge_compressed-8-4d89cbe49f710527b54e6262472f0320
+++ /dev/null
@@ -1 +0,0 @@
-508	-751895388
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/create_nested_type-1-ecd02bc3563cd6b60b8394956cb69084 b/sql/hive/src/test/resources/golden/create_nested_type-1-ecd02bc3563cd6b60b8394956cb69084
index c49ad26c04d67..6485e594f54fe 100644
--- a/sql/hive/src/test/resources/golden/create_nested_type-1-ecd02bc3563cd6b60b8394956cb69084
+++ b/sql/hive/src/test/resources/golden/create_nested_type-1-ecd02bc3563cd6b60b8394956cb69084
@@ -1,4 +1,4 @@
-a                   	string              	None                
-b                   	array<string>       	None                
-c                   	array<map<string,string>>	None                
-d                   	map<string,array<string>>	None                
+a                   	string              	                    
+b                   	array<string>       	                    
+c                   	array<map<string,string>>	                    
+d                   	map<string,array<string>>	                    
diff --git a/sql/hive/src/test/resources/golden/create_nested_type-2-c27702ff131e0ecfd71f1e1779fbe365 b/sql/hive/src/test/resources/golden/create_nested_type-2-c27702ff131e0ecfd71f1e1779fbe365
index 41c7202c8b886..6d68db6387495 100644
--- a/sql/hive/src/test/resources/golden/create_nested_type-2-c27702ff131e0ecfd71f1e1779fbe365
+++ b/sql/hive/src/test/resources/golden/create_nested_type-2-c27702ff131e0ecfd71f1e1779fbe365
@@ -1,6 +1,6 @@
-a                   	string              	None                
-b                   	array<string>       	None                
-c                   	array<map<string,string>>	None                
-d                   	map<string,array<string>>	None                
+a                   	string              	                    
+b                   	array<string>       	                    
+c                   	array<map<string,string>>	                    
+d                   	map<string,array<string>>	                    
 	 	 
-Detailed Table Information	Table(tableName:table1, dbName:default, owner:marmbrus, createTime:1391226109, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:array<string>, comment:null), FieldSchema(name:c, type:array<map<string,string>>, comment:null), FieldSchema(name:d, type:map<string,array<string>>, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7445586986532881162/table1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1391226109}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:table1, dbName:default, owner:marmbrus, createTime:1413879912, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:array<string>, comment:null), FieldSchema(name:c, type:array<map<string,string>>, comment:null), FieldSchema(name:d, type:map<string,array<string>>, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/table1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413879912}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer1-60-d6bbaf0d40010159095e4cac025c50c5 b/sql/hive/src/test/resources/golden/create_nested_type-3-280fd0d9876e475d7dcf5383876ebc79
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer1-60-d6bbaf0d40010159095e4cac025c50c5
rename to sql/hive/src/test/resources/golden/create_nested_type-3-280fd0d9876e475d7dcf5383876ebc79
diff --git a/sql/hive/src/test/resources/golden/create_or_replace_view-2-30dc3e80e3873af5115e4f5e39078a13 b/sql/hive/src/test/resources/golden/create_or_replace_view-2-30dc3e80e3873af5115e4f5e39078a13
deleted file mode 100644
index 46869fc9e88eb..0000000000000
--- a/sql/hive/src/test/resources/golden/create_or_replace_view-2-30dc3e80e3873af5115e4f5e39078a13
+++ /dev/null
@@ -1,30 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 12:09:28 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Table Type:         	VIRTUAL_VIEW        	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392062968          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	null                	 
-InputFormat:        	org.apache.hadoop.mapred.SequenceFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-	 	 
-# View Information	 	 
-View Original Text: 	select * from srcpart	 
-View Expanded Text: 	select `srcpart`.`key`, `srcpart`.`value`, `srcpart`.`ds`, `srcpart`.`hr` from `default`.`srcpart`	 
diff --git a/sql/hive/src/test/resources/golden/create_skewed_table1-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/create_skewed_table1-0-cafed8ca348b243372b9114910be1557
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/create_skewed_table1-0-cafed8ca348b243372b9114910be1557
+++ b/sql/hive/src/test/resources/golden/create_skewed_table1-0-cafed8ca348b243372b9114910be1557
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/create_skewed_table1-4-f3f1642674545762a4bff5cb75634e20 b/sql/hive/src/test/resources/golden/create_skewed_table1-4-f3f1642674545762a4bff5cb75634e20
index d7c386e17c096..d35fc35a810d3 100644
--- a/sql/hive/src/test/resources/golden/create_skewed_table1-4-f3f1642674545762a4bff5cb75634e20
+++ b/sql/hive/src/test/resources/golden/create_skewed_table1-4-f3f1642674545762a4bff5cb75634e20
@@ -1,19 +1,19 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 22:51:44 PST 2014	 
+CreateTime:         	Tue Oct 21 01:25:13 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6750581243740872392/list_bucket_single_2	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/list_bucket_single_2	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1389768704          
+	transient_lastDdlTime	1413879913          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -26,4 +26,4 @@ Sort Columns:       	[]
 Skewed Columns:     	[key]               	 
 Skewed Values:      	[[1], [5], [6]]     	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/create_skewed_table1-5-f5e6d62497ae174fdfeeb3fd6f899b2e b/sql/hive/src/test/resources/golden/create_skewed_table1-5-f5e6d62497ae174fdfeeb3fd6f899b2e
index 0817efac83d79..e8f72845c1c7a 100644
--- a/sql/hive/src/test/resources/golden/create_skewed_table1-5-f5e6d62497ae174fdfeeb3fd6f899b2e
+++ b/sql/hive/src/test/resources/golden/create_skewed_table1-5-f5e6d62497ae174fdfeeb3fd6f899b2e
@@ -1,19 +1,19 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 22:51:44 PST 2014	 
+CreateTime:         	Tue Oct 21 01:25:13 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6750581243740872392/list_bucket_single	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/list_bucket_single	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1389768704          
+	transient_lastDdlTime	1413879913          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -26,4 +26,4 @@ Sort Columns:       	[]
 Skewed Columns:     	[key]               	 
 Skewed Values:      	[[1], [5], [6]]     	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/create_skewed_table1-6-d7a147c6b0a3609663628b43457b2cef b/sql/hive/src/test/resources/golden/create_skewed_table1-6-d7a147c6b0a3609663628b43457b2cef
index 2986dd43b0fd6..c8d58bbb1b1ce 100644
--- a/sql/hive/src/test/resources/golden/create_skewed_table1-6-d7a147c6b0a3609663628b43457b2cef
+++ b/sql/hive/src/test/resources/golden/create_skewed_table1-6-d7a147c6b0a3609663628b43457b2cef
@@ -1,20 +1,20 @@
 # col_name            	data_type           	comment             
 	 	 
-col1                	string              	None                
-col2                	int                 	None                
-col3                	string              	None                
+col1                	string              	                    
+col2                	int                 	                    
+col3                	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 22:51:44 PST 2014	 
+CreateTime:         	Tue Oct 21 01:25:13 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6750581243740872392/list_bucket_multiple	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/list_bucket_multiple	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1389768704          
+	transient_lastDdlTime	1413879913          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -27,4 +27,4 @@ Sort Columns:       	[]
 Skewed Columns:     	[col1, col2]        	 
 Skewed Values:      	[[s1, 1], [s3, 3], [s13, 13], [s78, 78]]	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-1-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/create_struct_table-1-719851d0e8b89b51bdc6be4177455a92
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer4-1-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/create_struct_table-1-719851d0e8b89b51bdc6be4177455a92
diff --git a/sql/hive/src/test/resources/golden/create_udaf-0-a69c2b11dc912ef5444af32dce6aa33e b/sql/hive/src/test/resources/golden/create_udaf-0-a69c2b11dc912ef5444af32dce6aa33e
deleted file mode 100644
index 8af6a0338d65a..0000000000000
--- a/sql/hive/src/test/resources/golden/create_udaf-0-a69c2b11dc912ef5444af32dce6aa33e
+++ /dev/null
@@ -1,10 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATEFUNCTION test_max 'org.apache.hadoop.hive.ql.udf.UDAFTestMax')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-
-
diff --git a/sql/hive/src/test/resources/golden/create_view-19-df2da604b5a8f2b236519510b5e4d34b b/sql/hive/src/test/resources/golden/create_view-19-df2da604b5a8f2b236519510b5e4d34b
deleted file mode 100644
index e87fb81a6f043..0000000000000
--- a/sql/hive/src/test/resources/golden/create_view-19-df2da604b5a8f2b236519510b5e4d34b
+++ /dev/null
@@ -1 +0,0 @@
-86	val_86
diff --git a/sql/hive/src/test/resources/golden/create_view_partitioned-4-aa9fad452f806fd16fc0bdc7cdfdc4d5 b/sql/hive/src/test/resources/golden/create_view_partitioned-4-aa9fad452f806fd16fc0bdc7cdfdc4d5
deleted file mode 100644
index 70d2ecdbc8f78..0000000000000
--- a/sql/hive/src/test/resources/golden/create_view_partitioned-4-aa9fad452f806fd16fc0bdc7cdfdc4d5
+++ /dev/null
@@ -1,13 +0,0 @@
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-value               	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:vp1, dbName:default, owner:marmbrus, createTime:1392062982, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:null, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:null, parameters:{}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:value, type:string, comment:null)], parameters:{transient_lastDdlTime=1392062982}, viewOriginalText:SELECT key, value	 
-FROM src	 	 
-WHERE key=86, viewExpandedText:SELECT `src`.`key`, `src`.`value`	 	 
-FROM `default`.`src`	 	 
-WHERE `src`.`key`=86, tableType:VIRTUAL_VIEW)		 
diff --git a/sql/hive/src/test/resources/golden/create_view_partitioned-5-d7a7d8592fca266745725192d3f875fc b/sql/hive/src/test/resources/golden/create_view_partitioned-5-d7a7d8592fca266745725192d3f875fc
deleted file mode 100644
index 43bc3de44f4df..0000000000000
--- a/sql/hive/src/test/resources/golden/create_view_partitioned-5-d7a7d8592fca266745725192d3f875fc
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-value               	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 12:09:42 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Table Type:         	VIRTUAL_VIEW        	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392062982          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	null                	 
-InputFormat:        	org.apache.hadoop.mapred.SequenceFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-	 	 
-# View Information	 	 
-View Original Text: 	SELECT key, value	 
-FROM src	 	 
-WHERE key=86	 	 
-View Expanded Text: 	SELECT `src`.`key`, `src`.`value`	 
-FROM `default`.`src`	 	 
-WHERE `src`.`key`=86	 	 
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-3-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/create_view_translate-0-dc7fc9ce5109ef459ee84ccfbb12d2c0
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer4-3-4abc4f450a58ccdd0df2e345f1276979
rename to sql/hive/src/test/resources/golden/create_view_translate-0-dc7fc9ce5109ef459ee84ccfbb12d2c0
diff --git a/sql/hive/src/test/resources/golden/correlationoptimizer4-5-c44e5ccbff08c59a5dc1b74306835cd9 b/sql/hive/src/test/resources/golden/create_view_translate-1-3896ae0e680a5fdc01833533b11c07bb
similarity index 100%
rename from sql/hive/src/test/resources/golden/correlationoptimizer4-5-c44e5ccbff08c59a5dc1b74306835cd9
rename to sql/hive/src/test/resources/golden/create_view_translate-1-3896ae0e680a5fdc01833533b11c07bb
diff --git a/sql/hive/src/test/resources/golden/count-1-3531872d964bc2b4f07d51b9d1629df2 b/sql/hive/src/test/resources/golden/create_view_translate-10-7016e1e3a4248564f3d08cddad7ae116
similarity index 100%
rename from sql/hive/src/test/resources/golden/count-1-3531872d964bc2b4f07d51b9d1629df2
rename to sql/hive/src/test/resources/golden/create_view_translate-10-7016e1e3a4248564f3d08cddad7ae116
diff --git a/sql/hive/src/test/resources/golden/create table as with db name-0-b7dfeb6a941b42f7def5fdceae99f425 b/sql/hive/src/test/resources/golden/create_view_translate-11-e27c6a59a833dcbc2e5cdb7ff7972828
similarity index 100%
rename from sql/hive/src/test/resources/golden/create table as with db name-0-b7dfeb6a941b42f7def5fdceae99f425
rename to sql/hive/src/test/resources/golden/create_view_translate-11-e27c6a59a833dcbc2e5cdb7ff7972828
diff --git a/sql/hive/src/test/resources/golden/create_1-1-c2351f011b8ea41ff7dfa8f195148da3 b/sql/hive/src/test/resources/golden/create_view_translate-2-6b4caec6d7e3a91e61720bbd6b7697f0
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_1-1-c2351f011b8ea41ff7dfa8f195148da3
rename to sql/hive/src/test/resources/golden/create_view_translate-2-6b4caec6d7e3a91e61720bbd6b7697f0
diff --git a/sql/hive/src/test/resources/golden/create_view_translate-3-30dc3e80e3873af5115e4f5e39078a13 b/sql/hive/src/test/resources/golden/create_view_translate-3-30dc3e80e3873af5115e4f5e39078a13
new file mode 100644
index 0000000000000..cec5f77033aa4
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/create_view_translate-3-30dc3e80e3873af5115e4f5e39078a13
@@ -0,0 +1,27 @@
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+Owner:              	animal              	 
+CreateTime:         	Mon Dec 29 00:57:55 PST 2014	 
+LastAccessTime:     	UNKNOWN             	 
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+Table Type:         	VIRTUAL_VIEW        	 
+Table Parameters:	 	 
+	transient_lastDdlTime	1419843475          
+	 	 
+# Storage Information	 	 
+SerDe Library:      	null                	 
+InputFormat:        	org.apache.hadoop.mapred.SequenceFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+	 	 
+# View Information	 	 
+View Original Text: 	select cast(key as string) from src	 
+View Expanded Text: 	select cast(`src`.`key` as string) from `default`.`src`	 
diff --git a/sql/hive/src/test/resources/golden/create_1-11-7daaeabd9c286e511e0628a32dc714d5 b/sql/hive/src/test/resources/golden/create_view_translate-4-cefb7530126f9e60cb4a29441d578f23
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_1-11-7daaeabd9c286e511e0628a32dc714d5
rename to sql/hive/src/test/resources/golden/create_view_translate-4-cefb7530126f9e60cb4a29441d578f23
diff --git a/sql/hive/src/test/resources/golden/create_view_translate-5-856ea995681b18a543dc0e53b8b43a8e b/sql/hive/src/test/resources/golden/create_view_translate-5-856ea995681b18a543dc0e53b8b43a8e
new file mode 100644
index 0000000000000..bf582fc0964a3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/create_view_translate-5-856ea995681b18a543dc0e53b8b43a8e
@@ -0,0 +1,32 @@
+# col_name            	data_type           	comment             
+	 	 
+key                 	int                 	                    
+value               	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+Owner:              	animal              	 
+CreateTime:         	Mon Dec 29 00:57:55 PST 2014	 
+LastAccessTime:     	UNKNOWN             	 
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+Table Type:         	VIRTUAL_VIEW        	 
+Table Parameters:	 	 
+	transient_lastDdlTime	1419843475          
+	 	 
+# Storage Information	 	 
+SerDe Library:      	null                	 
+InputFormat:        	org.apache.hadoop.mapred.SequenceFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+	 	 
+# View Information	 	 
+View Original Text: 	select key, value from (	 
+  select key, value from src	 	 
+) a	 	 
+View Expanded Text: 	select key, value from (	 
+  select `src`.`key`, `src`.`value` from `default`.`src`	 	 
+) `a`	 	 
diff --git a/sql/hive/src/test/resources/golden/create_1-14-437bd1acbae61f48e851addb769d3379 b/sql/hive/src/test/resources/golden/create_view_translate-6-a14cfe3eff322066e61023ec06c7735d
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_1-14-437bd1acbae61f48e851addb769d3379
rename to sql/hive/src/test/resources/golden/create_view_translate-6-a14cfe3eff322066e61023ec06c7735d
diff --git a/sql/hive/src/test/resources/golden/create_1-4-610b82bf7b0080d293977927e5ef780c b/sql/hive/src/test/resources/golden/create_view_translate-7-e947bf2dacc907825df154a4131a3fcc
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_1-4-610b82bf7b0080d293977927e5ef780c
rename to sql/hive/src/test/resources/golden/create_view_translate-7-e947bf2dacc907825df154a4131a3fcc
diff --git a/sql/hive/src/test/resources/golden/create_1-5-c77b018276b1558c1d9462e0625e152e b/sql/hive/src/test/resources/golden/create_view_translate-8-b1a99b0beffb0b298aec9233ecc0707f
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_1-5-c77b018276b1558c1d9462e0625e152e
rename to sql/hive/src/test/resources/golden/create_view_translate-8-b1a99b0beffb0b298aec9233ecc0707f
diff --git a/sql/hive/src/test/resources/golden/create_1-8-19331fe6a2a35f1171babfe4e1c86f59 b/sql/hive/src/test/resources/golden/create_view_translate-9-fc0dc39c4796d917685e0797bc4a9786
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_1-8-19331fe6a2a35f1171babfe4e1c86f59
rename to sql/hive/src/test/resources/golden/create_view_translate-9-fc0dc39c4796d917685e0797bc4a9786
diff --git a/sql/hive/src/test/resources/golden/create_big_view-0-bdf3c2e20793ef833f336a40791091d b/sql/hive/src/test/resources/golden/cross_product_check_1-0-d782db598869f9b19e0fcf5ea2a83594
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_big_view-0-bdf3c2e20793ef833f336a40791091d
rename to sql/hive/src/test/resources/golden/cross_product_check_1-0-d782db598869f9b19e0fcf5ea2a83594
diff --git a/sql/hive/src/test/resources/golden/create_big_view-1-fbd4b50d4d80d2a927a0db5037c62bee b/sql/hive/src/test/resources/golden/cross_product_check_1-1-bd8395c55fa2fc80b68eb043b7020b76
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_big_view-1-fbd4b50d4d80d2a927a0db5037c62bee
rename to sql/hive/src/test/resources/golden/cross_product_check_1-1-bd8395c55fa2fc80b68eb043b7020b76
diff --git a/sql/hive/src/test/resources/golden/exim_02_00_part_empty-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/cross_product_check_1-2-4e1b3108b7e1b9d8e94e73f9dfa44617
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_00_part_empty-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/cross_product_check_1-2-4e1b3108b7e1b9d8e94e73f9dfa44617
diff --git a/sql/hive/src/test/resources/golden/create_escape-0-3e860648a3f2f258d59a62fd0bbe1c3e b/sql/hive/src/test/resources/golden/cross_product_check_1-3-32a3e6eb858d37f58e225d07fb323254
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_escape-0-3e860648a3f2f258d59a62fd0bbe1c3e
rename to sql/hive/src/test/resources/golden/cross_product_check_1-3-32a3e6eb858d37f58e225d07fb323254
diff --git a/sql/hive/src/test/resources/golden/create_escape-3-9541399cde42210bd7ac1beb07ceb14 b/sql/hive/src/test/resources/golden/cross_product_check_1-4-36a6b6fb71ea08ff817dd40d1ffdb970
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_escape-3-9541399cde42210bd7ac1beb07ceb14
rename to sql/hive/src/test/resources/golden/cross_product_check_1-4-36a6b6fb71ea08ff817dd40d1ffdb970
diff --git a/sql/hive/src/test/resources/golden/create_genericudaf-1-c7f934e9c76350a0d3caa694463a673b b/sql/hive/src/test/resources/golden/cross_product_check_1-5-103739f072b849d212dbc40919f92b74
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_genericudaf-1-c7f934e9c76350a0d3caa694463a673b
rename to sql/hive/src/test/resources/golden/cross_product_check_1-5-103739f072b849d212dbc40919f92b74
diff --git a/sql/hive/src/test/resources/golden/create_genericudaf-2-84f898ffd668a17fe2ef081866d1dcd2 b/sql/hive/src/test/resources/golden/cross_product_check_1-6-9a202e3bf15be2a310504a50920e7d25
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_genericudaf-2-84f898ffd668a17fe2ef081866d1dcd2
rename to sql/hive/src/test/resources/golden/cross_product_check_1-6-9a202e3bf15be2a310504a50920e7d25
diff --git a/sql/hive/src/test/resources/golden/create_genericudaf-4-c64a3266b9a1c3383cc56bd883345c1 b/sql/hive/src/test/resources/golden/cross_product_check_1-7-e4e93f6e0dc63e1e324fb913a26fa8ac
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_genericudaf-4-c64a3266b9a1c3383cc56bd883345c1
rename to sql/hive/src/test/resources/golden/cross_product_check_1-7-e4e93f6e0dc63e1e324fb913a26fa8ac
diff --git a/sql/hive/src/test/resources/golden/create_like-0-c2351f011b8ea41ff7dfa8f195148da3 b/sql/hive/src/test/resources/golden/cross_product_check_2-0-d782db598869f9b19e0fcf5ea2a83594
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_like-0-c2351f011b8ea41ff7dfa8f195148da3
rename to sql/hive/src/test/resources/golden/cross_product_check_2-0-d782db598869f9b19e0fcf5ea2a83594
diff --git a/sql/hive/src/test/resources/golden/create_like-2-a20451f152e68606cc5e373fe5fd86a b/sql/hive/src/test/resources/golden/cross_product_check_2-1-bd8395c55fa2fc80b68eb043b7020b76
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_like-2-a20451f152e68606cc5e373fe5fd86a
rename to sql/hive/src/test/resources/golden/cross_product_check_2-1-bd8395c55fa2fc80b68eb043b7020b76
diff --git a/sql/hive/src/test/resources/golden/exim_02_00_part_empty-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/cross_product_check_2-2-24ca942f094b14b92086305cc125e833
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_00_part_empty-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/cross_product_check_2-2-24ca942f094b14b92086305cc125e833
diff --git a/sql/hive/src/test/resources/golden/exim_02_00_part_empty-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/cross_product_check_2-3-44d382ce6848d3f0b900b0808747d8e9
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_00_part_empty-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/cross_product_check_2-3-44d382ce6848d3f0b900b0808747d8e9
diff --git a/sql/hive/src/test/resources/golden/exim_02_part-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/cross_product_check_2-4-c14792ccac2ca64e3e9e21af4fd12d2c
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_part-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/cross_product_check_2-4-c14792ccac2ca64e3e9e21af4fd12d2c
diff --git a/sql/hive/src/test/resources/golden/create_like-4-39ead53334938635b60a5ffdaa2c9f86 b/sql/hive/src/test/resources/golden/cross_product_check_2-5-32a3e6eb858d37f58e225d07fb323254
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_like-4-39ead53334938635b60a5ffdaa2c9f86
rename to sql/hive/src/test/resources/golden/cross_product_check_2-5-32a3e6eb858d37f58e225d07fb323254
diff --git a/sql/hive/src/test/resources/golden/create_like-5-dc9de26002604e9e436135bd4b40636d b/sql/hive/src/test/resources/golden/cross_product_check_2-6-36a6b6fb71ea08ff817dd40d1ffdb970
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_like-5-dc9de26002604e9e436135bd4b40636d
rename to sql/hive/src/test/resources/golden/cross_product_check_2-6-36a6b6fb71ea08ff817dd40d1ffdb970
diff --git a/sql/hive/src/test/resources/golden/create_like-6-83eb00d0ac6d26d398ed5f9ddb1d3919 b/sql/hive/src/test/resources/golden/cross_product_check_2-7-103739f072b849d212dbc40919f92b74
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_like-6-83eb00d0ac6d26d398ed5f9ddb1d3919
rename to sql/hive/src/test/resources/golden/cross_product_check_2-7-103739f072b849d212dbc40919f92b74
diff --git a/sql/hive/src/test/resources/golden/create_like-8-639a13da6855b974fc2e170fd49b33cb b/sql/hive/src/test/resources/golden/cross_product_check_2-8-9a202e3bf15be2a310504a50920e7d25
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_like-8-639a13da6855b974fc2e170fd49b33cb
rename to sql/hive/src/test/resources/golden/cross_product_check_2-8-9a202e3bf15be2a310504a50920e7d25
diff --git a/sql/hive/src/test/resources/golden/create_like-9-a0ce573e299b66b8ce31da2890b318cb b/sql/hive/src/test/resources/golden/cross_product_check_2-9-e4e93f6e0dc63e1e324fb913a26fa8ac
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_like-9-a0ce573e299b66b8ce31da2890b318cb
rename to sql/hive/src/test/resources/golden/cross_product_check_2-9-e4e93f6e0dc63e1e324fb913a26fa8ac
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-7-8620d7f55849ab163b6b85f51abdaaec b/sql/hive/src/test/resources/golden/ctas_varchar-7-8620d7f55849ab163b6b85f51abdaaec
deleted file mode 100644
index 6839c16243bcd..0000000000000
--- a/sql/hive/src/test/resources/golden/ctas_varchar-7-8620d7f55849ab163b6b85f51abdaaec
+++ /dev/null
@@ -1,5 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-2	val_2
-4	val_4
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-8-c93df93c0e8688f9c7a6167589670d32 b/sql/hive/src/test/resources/golden/ctas_varchar-8-c93df93c0e8688f9c7a6167589670d32
deleted file mode 100644
index 6839c16243bcd..0000000000000
--- a/sql/hive/src/test/resources/golden/ctas_varchar-8-c93df93c0e8688f9c7a6167589670d32
+++ /dev/null
@@ -1,5 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-2	val_2
-4	val_4
diff --git a/sql/hive/src/test/resources/golden/database.table table.attr-0-6cbb13c3a48f53fa6550dbba4d2c28fd b/sql/hive/src/test/resources/golden/database.table table.attr-0-6cbb13c3a48f53fa6550dbba4d2c28fd
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/database.table table.attr-0-6cbb13c3a48f53fa6550dbba4d2c28fd	
+++ b/sql/hive/src/test/resources/golden/database.table table.attr-0-6cbb13c3a48f53fa6550dbba4d2c28fd	
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/database.table-0-c657beb729b6a7882309a203fc6f298e b/sql/hive/src/test/resources/golden/database.table-0-c657beb729b6a7882309a203fc6f298e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/database.table-0-c657beb729b6a7882309a203fc6f298e
+++ b/sql/hive/src/test/resources/golden/database.table-0-c657beb729b6a7882309a203fc6f298e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-0-366a4de0343396b9df03277f1098722c b/sql/hive/src/test/resources/golden/database_drop-10-8db536f925bf0f5058f97897e145a661
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_merge_compressed-0-366a4de0343396b9df03277f1098722c
rename to sql/hive/src/test/resources/golden/database_drop-10-8db536f925bf0f5058f97897e145a661
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-1-276fbe6fb296b13904516888ffa95342 b/sql/hive/src/test/resources/golden/database_drop-11-1b0a6cff3151cfa0ef0a6f78587973a5
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_merge_compressed-1-276fbe6fb296b13904516888ffa95342
rename to sql/hive/src/test/resources/golden/database_drop-11-1b0a6cff3151cfa0ef0a6f78587973a5
diff --git a/sql/hive/src/test/resources/golden/database_drop-11-2ea883422b74b701711e14e61472ba06 b/sql/hive/src/test/resources/golden/database_drop-12-2ea883422b74b701711e14e61472ba06
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-11-2ea883422b74b701711e14e61472ba06
rename to sql/hive/src/test/resources/golden/database_drop-12-2ea883422b74b701711e14e61472ba06
diff --git a/sql/hive/src/test/resources/golden/database_drop-12-e02a53f7e798d2741152526516f14941 b/sql/hive/src/test/resources/golden/database_drop-13-e02a53f7e798d2741152526516f14941
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-12-e02a53f7e798d2741152526516f14941
rename to sql/hive/src/test/resources/golden/database_drop-13-e02a53f7e798d2741152526516f14941
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-13-440c6f8daa221613fe796d99b494e61f b/sql/hive/src/test/resources/golden/database_drop-14-8f0fe60664d020b2a42c7f5c0c7bed35
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_merge_compressed-13-440c6f8daa221613fe796d99b494e61f
rename to sql/hive/src/test/resources/golden/database_drop-14-8f0fe60664d020b2a42c7f5c0c7bed35
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-14-32251c08304629a3153e0b471de060c5 b/sql/hive/src/test/resources/golden/database_drop-15-7928ac876f76c990fa21d74b6c9e14f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_merge_compressed-14-32251c08304629a3153e0b471de060c5
rename to sql/hive/src/test/resources/golden/database_drop-15-7928ac876f76c990fa21d74b6c9e14f6
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-3-d7cc5e5c2cc9e09e715afcf8a0fd34c3 b/sql/hive/src/test/resources/golden/database_drop-16-f9036ff3f0a3101373bdbc9a52faf00e
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_merge_compressed-3-d7cc5e5c2cc9e09e715afcf8a0fd34c3
rename to sql/hive/src/test/resources/golden/database_drop-16-f9036ff3f0a3101373bdbc9a52faf00e
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-4-c2a7e48e37375fc59126d71b9965b6c3 b/sql/hive/src/test/resources/golden/database_drop-17-95b906fd73935da8746b5277170e91e8
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_merge_compressed-4-c2a7e48e37375fc59126d71b9965b6c3
rename to sql/hive/src/test/resources/golden/database_drop-17-95b906fd73935da8746b5277170e91e8
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-5-c2a7e48e37375fc59126d71b9965b6c3 b/sql/hive/src/test/resources/golden/database_drop-18-f65bf675b83871af7906741a60fa1318
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_merge_compressed-5-c2a7e48e37375fc59126d71b9965b6c3
rename to sql/hive/src/test/resources/golden/database_drop-18-f65bf675b83871af7906741a60fa1318
diff --git a/sql/hive/src/test/resources/golden/create_merge_compressed-9-16a420c1def17d14881bd75d35a4c0e5 b/sql/hive/src/test/resources/golden/database_drop-19-d7fefbf585dbb67491e871ef58dca752
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_merge_compressed-9-16a420c1def17d14881bd75d35a4c0e5
rename to sql/hive/src/test/resources/golden/database_drop-19-d7fefbf585dbb67491e871ef58dca752
diff --git a/sql/hive/src/test/resources/golden/create_nested_type-3-ac452c9279877935983c37113898e53c b/sql/hive/src/test/resources/golden/database_drop-20-cbb84f0db4d55930a85cff28f7400bd0
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_nested_type-3-ac452c9279877935983c37113898e53c
rename to sql/hive/src/test/resources/golden/database_drop-20-cbb84f0db4d55930a85cff28f7400bd0
diff --git a/sql/hive/src/test/resources/golden/create_or_replace_view-0-a14cfe3eff322066e61023ec06c7735d b/sql/hive/src/test/resources/golden/database_drop-21-eea49f066631be60519ae9d6b614d7d0
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_or_replace_view-0-a14cfe3eff322066e61023ec06c7735d
rename to sql/hive/src/test/resources/golden/database_drop-21-eea49f066631be60519ae9d6b614d7d0
diff --git a/sql/hive/src/test/resources/golden/create_or_replace_view-1-a27131eb04bd5e071d3267c92d3f8dea b/sql/hive/src/test/resources/golden/database_drop-22-85833c3a68c29098827e438ff580bb94
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_or_replace_view-1-a27131eb04bd5e071d3267c92d3f8dea
rename to sql/hive/src/test/resources/golden/database_drop-22-85833c3a68c29098827e438ff580bb94
diff --git a/sql/hive/src/test/resources/golden/create_or_replace_view-3-5fd147edbe44a96782923a3ef6caa47d b/sql/hive/src/test/resources/golden/database_drop-23-84a5672989118a1b5792474c1469de90
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_or_replace_view-3-5fd147edbe44a96782923a3ef6caa47d
rename to sql/hive/src/test/resources/golden/database_drop-23-84a5672989118a1b5792474c1469de90
diff --git a/sql/hive/src/test/resources/golden/create_or_replace_view-4-b1880014afc9ad1f8db91ba3db3867de b/sql/hive/src/test/resources/golden/database_drop-24-441116797e8d95554c74472fa7644440
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_or_replace_view-4-b1880014afc9ad1f8db91ba3db3867de
rename to sql/hive/src/test/resources/golden/database_drop-24-441116797e8d95554c74472fa7644440
diff --git a/sql/hive/src/test/resources/golden/database_drop-3-f21dd011aeb417043ed58c03fd5c3bf0 b/sql/hive/src/test/resources/golden/database_drop-25-847ca97dd211889d83e5d487bfc44e23
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-3-f21dd011aeb417043ed58c03fd5c3bf0
rename to sql/hive/src/test/resources/golden/database_drop-25-847ca97dd211889d83e5d487bfc44e23
diff --git a/sql/hive/src/test/resources/golden/database_drop-4-d419f4ff197d4291208c2028cd158909 b/sql/hive/src/test/resources/golden/database_drop-26-bea20178515df24fcca04c0384f1c1b7
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-4-d419f4ff197d4291208c2028cd158909
rename to sql/hive/src/test/resources/golden/database_drop-26-bea20178515df24fcca04c0384f1c1b7
diff --git a/sql/hive/src/test/resources/golden/create_or_replace_view-5-b1d2deb8a13dde4bf18c9b34836e00fb b/sql/hive/src/test/resources/golden/database_drop-27-cb4b90a7f63c1646319ee7bb014a7750
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_or_replace_view-5-b1d2deb8a13dde4bf18c9b34836e00fb
rename to sql/hive/src/test/resources/golden/database_drop-27-cb4b90a7f63c1646319ee7bb014a7750
diff --git a/sql/hive/src/test/resources/golden/create_struct_table-1-2ca90a28a994405e6150c96f4a572294 b/sql/hive/src/test/resources/golden/database_drop-28-53d67cbed634cff012dac41340bf6630
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_struct_table-1-2ca90a28a994405e6150c96f4a572294
rename to sql/hive/src/test/resources/golden/database_drop-28-53d67cbed634cff012dac41340bf6630
diff --git a/sql/hive/src/test/resources/golden/create_union_table-0-8e765b54f15b948fc88392da69da283 b/sql/hive/src/test/resources/golden/database_drop-29-29d3232325eda40cbb0bd1786b7d070e
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_union_table-0-8e765b54f15b948fc88392da69da283
rename to sql/hive/src/test/resources/golden/database_drop-29-29d3232325eda40cbb0bd1786b7d070e
diff --git a/sql/hive/src/test/resources/golden/exim_02_part-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/database_drop-3-db64b724719d27c7f0db4f51f5c4edaa
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_part-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/database_drop-3-db64b724719d27c7f0db4f51f5c4edaa
diff --git a/sql/hive/src/test/resources/golden/create_union_table-1-aee4ce62fc2631423af0f569f4448353 b/sql/hive/src/test/resources/golden/database_drop-30-bbad0860316d8b9b1eed50d231f3ab5d
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_union_table-1-aee4ce62fc2631423af0f569f4448353
rename to sql/hive/src/test/resources/golden/database_drop-30-bbad0860316d8b9b1eed50d231f3ab5d
diff --git a/sql/hive/src/test/resources/golden/create_union_table-2-b1feb4a197caf28d5223e72e10a91e78 b/sql/hive/src/test/resources/golden/database_drop-31-981f8e58caeae9cbbad3a113e3043de5
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_union_table-2-b1feb4a197caf28d5223e72e10a91e78
rename to sql/hive/src/test/resources/golden/database_drop-31-981f8e58caeae9cbbad3a113e3043de5
diff --git a/sql/hive/src/test/resources/golden/create_view-0-26e7fe8b9b9769a8d6a8a95b9cfbdf91 b/sql/hive/src/test/resources/golden/database_drop-32-6c8e7590238b5aca1772721f0b914ece
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-0-26e7fe8b9b9769a8d6a8a95b9cfbdf91
rename to sql/hive/src/test/resources/golden/database_drop-32-6c8e7590238b5aca1772721f0b914ece
diff --git a/sql/hive/src/test/resources/golden/database_drop-5-b7cf74929eabe781b0db79ed1043dc24 b/sql/hive/src/test/resources/golden/database_drop-33-2bc7864932f597bdf98bdc410b1a2d9c
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-5-b7cf74929eabe781b0db79ed1043dc24
rename to sql/hive/src/test/resources/golden/database_drop-33-2bc7864932f597bdf98bdc410b1a2d9c
diff --git a/sql/hive/src/test/resources/golden/exim_00_nonpart_empty-4-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/database_drop-34-491138bed44a70cb783bb2b531e1d82
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_00_nonpart_empty-4-75eed21390055f8e397c81ab9d253a32
rename to sql/hive/src/test/resources/golden/database_drop-34-491138bed44a70cb783bb2b531e1d82
diff --git a/sql/hive/src/test/resources/golden/exim_00_nonpart_empty-5-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/database_drop-35-9e0285d0596607cdadf75a763a543866
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_00_nonpart_empty-5-2a161bb8d69da9d6e3679ca3677a0de5
rename to sql/hive/src/test/resources/golden/database_drop-35-9e0285d0596607cdadf75a763a543866
diff --git a/sql/hive/src/test/resources/golden/create_view-1-c186ac1fe46117acb6fd452df15e0d92 b/sql/hive/src/test/resources/golden/database_drop-36-e66471f3f1bbe2d4b3b214e47793656d
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-1-c186ac1fe46117acb6fd452df15e0d92
rename to sql/hive/src/test/resources/golden/database_drop-36-e66471f3f1bbe2d4b3b214e47793656d
diff --git a/sql/hive/src/test/resources/golden/create_view-10-9f71514dffc747ddd49fbb1fafb6d3dd b/sql/hive/src/test/resources/golden/database_drop-37-f6410721e3125a89836817136306eac4
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-10-9f71514dffc747ddd49fbb1fafb6d3dd
rename to sql/hive/src/test/resources/golden/database_drop-37-f6410721e3125a89836817136306eac4
diff --git a/sql/hive/src/test/resources/golden/create_view-11-2021c047203276dd2db51a56e672fcea b/sql/hive/src/test/resources/golden/database_drop-38-7d45d97adebe50917a94cbe232c112a8
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-11-2021c047203276dd2db51a56e672fcea
rename to sql/hive/src/test/resources/golden/database_drop-38-7d45d97adebe50917a94cbe232c112a8
diff --git a/sql/hive/src/test/resources/golden/create_view-12-420752b11848e29bce1c8ace7d3060fc b/sql/hive/src/test/resources/golden/database_drop-39-91b4a660ae5d5d2966d6bf3b6ae751d1
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-12-420752b11848e29bce1c8ace7d3060fc
rename to sql/hive/src/test/resources/golden/database_drop-39-91b4a660ae5d5d2966d6bf3b6ae751d1
diff --git a/sql/hive/src/test/resources/golden/exim_01_nonpart-5-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/database_drop-4-f21dd011aeb417043ed58c03fd5c3bf0
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_01_nonpart-5-75eed21390055f8e397c81ab9d253a32
rename to sql/hive/src/test/resources/golden/database_drop-4-f21dd011aeb417043ed58c03fd5c3bf0
diff --git a/sql/hive/src/test/resources/golden/create_view-13-bff53e5df8356ac16f7b9b78b157e60a b/sql/hive/src/test/resources/golden/database_drop-40-10073fb74a5c792322fc52389997695b
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-13-bff53e5df8356ac16f7b9b78b157e60a
rename to sql/hive/src/test/resources/golden/database_drop-40-10073fb74a5c792322fc52389997695b
diff --git a/sql/hive/src/test/resources/golden/create_view-14-69162f2f22978113fea529d7fc7b78d3 b/sql/hive/src/test/resources/golden/database_drop-41-7164c585e1ef4d9036ed4db275811084
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-14-69162f2f22978113fea529d7fc7b78d3
rename to sql/hive/src/test/resources/golden/database_drop-41-7164c585e1ef4d9036ed4db275811084
diff --git a/sql/hive/src/test/resources/golden/create_view-15-ceebf4cb0dc23f517a444266bc8d2447 b/sql/hive/src/test/resources/golden/database_drop-42-c55cffbfc4d950363be97ccdb028faf3
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-15-ceebf4cb0dc23f517a444266bc8d2447
rename to sql/hive/src/test/resources/golden/database_drop-42-c55cffbfc4d950363be97ccdb028faf3
diff --git a/sql/hive/src/test/resources/golden/exim_01_nonpart-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/database_drop-43-cc6860c125b8b62450cb858c72716dc2
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_01_nonpart-6-2a161bb8d69da9d6e3679ca3677a0de5
rename to sql/hive/src/test/resources/golden/database_drop-43-cc6860c125b8b62450cb858c72716dc2
diff --git a/sql/hive/src/test/resources/golden/exim_02_part-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/database_drop-44-de81fd80132350aedcd9f0d9a212fd94
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_part-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/database_drop-44-de81fd80132350aedcd9f0d9a212fd94
diff --git a/sql/hive/src/test/resources/golden/create_view-16-cb434f5704ee3ed21d1f1521a2a654f4 b/sql/hive/src/test/resources/golden/database_drop-45-64e22634663e57153119340583e93651
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-16-cb434f5704ee3ed21d1f1521a2a654f4
rename to sql/hive/src/test/resources/golden/database_drop-45-64e22634663e57153119340583e93651
diff --git a/sql/hive/src/test/resources/golden/create_view-17-b3c1023d56f3439833c246e8bfd4502a b/sql/hive/src/test/resources/golden/database_drop-46-eb28b907b605e51026f9902287e1d90d
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-17-b3c1023d56f3439833c246e8bfd4502a
rename to sql/hive/src/test/resources/golden/database_drop-46-eb28b907b605e51026f9902287e1d90d
diff --git a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/database_drop-47-44e4d2bdab2dfa4583da47281ed00ba3
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/database_drop-47-44e4d2bdab2dfa4583da47281ed00ba3
diff --git a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/database_drop-48-d419f4ff197d4291208c2028cd158909
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/database_drop-48-d419f4ff197d4291208c2028cd158909
diff --git a/sql/hive/src/test/resources/golden/exim_02_00_part_empty-4-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/database_drop-5-d419f4ff197d4291208c2028cd158909
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_00_part_empty-4-677ddd4b14eb6f19cfcf0c3d57f54e22
rename to sql/hive/src/test/resources/golden/database_drop-5-d419f4ff197d4291208c2028cd158909
diff --git a/sql/hive/src/test/resources/golden/exim_02_00_part_empty-5-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/database_drop-6-b7cf74929eabe781b0db79ed1043dc24
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_00_part_empty-5-a9f93b4185a714e4f6d14171d10a6c07
rename to sql/hive/src/test/resources/golden/database_drop-6-b7cf74929eabe781b0db79ed1043dc24
diff --git a/sql/hive/src/test/resources/golden/create_view-18-437d0a699b73c61044ebf8539feb14f6 b/sql/hive/src/test/resources/golden/database_drop-7-a47b1b070ec7c3b9ccabc34f41aebad9
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-18-437d0a699b73c61044ebf8539feb14f6
rename to sql/hive/src/test/resources/golden/database_drop-7-a47b1b070ec7c3b9ccabc34f41aebad9
diff --git a/sql/hive/src/test/resources/golden/create_view-2-d80dcd1271ab264292e9938f3162427c b/sql/hive/src/test/resources/golden/database_drop-8-b3980119a4b2d8f531951a940cba3697
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-2-d80dcd1271ab264292e9938f3162427c
rename to sql/hive/src/test/resources/golden/database_drop-8-b3980119a4b2d8f531951a940cba3697
diff --git a/sql/hive/src/test/resources/golden/create_view-20-56d203e4d0eb776bb4fa38409222b5b8 b/sql/hive/src/test/resources/golden/database_drop-9-97101266791d2b2c662bcde549422318
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-20-56d203e4d0eb776bb4fa38409222b5b8
rename to sql/hive/src/test/resources/golden/database_drop-9-97101266791d2b2c662bcde549422318
diff --git a/sql/hive/src/test/resources/golden/database_location-1-2b66b4554cf8ecf2ab6c325d4d89491 b/sql/hive/src/test/resources/golden/database_location-1-2b66b4554cf8ecf2ab6c325d4d89491
index f05305df0c3d3..a97bfdfeef098 100644
--- a/sql/hive/src/test/resources/golden/database_location-1-2b66b4554cf8ecf2ab6c325d4d89491
+++ b/sql/hive/src/test/resources/golden/database_location-1-2b66b4554cf8ecf2ab6c325d4d89491
@@ -1 +1 @@
-db1		file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db	
\ No newline at end of file
+db1		file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/db1.db	marmbrus
diff --git a/sql/hive/src/test/resources/golden/database_location-10-c5cd9c57a13da7f345563fbd75da4e45 b/sql/hive/src/test/resources/golden/database_location-10-c5cd9c57a13da7f345563fbd75da4e45
index e92c241d50092..adf00309e0fae 100644
--- a/sql/hive/src/test/resources/golden/database_location-10-c5cd9c57a13da7f345563fbd75da4e45
+++ b/sql/hive/src/test/resources/golden/database_location-10-c5cd9c57a13da7f345563fbd75da4e45
@@ -1,19 +1,19 @@
 # col_name            	data_type           	comment             
 	 	 
-name                	string              	None                
-value               	int                 	None                
+name                	string              	                    
+value               	int                 	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	db2                 	 
 Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 11:40:51 PST 2014	 
+CreateTime:         	Tue Oct 21 01:25:24 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db2/table_db2	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/db2/table_db2	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1389728451          
+	transient_lastDdlTime	1413879924          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -24,4 +24,4 @@ Num Buckets:        	-1
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/database_location-11-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/database_location-11-9c36cac1372650b703400c60dd29042c
index 7c77c06cda82b..2e4bf94808285 100644
--- a/sql/hive/src/test/resources/golden/database_location-11-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/database_location-11-9c36cac1372650b703400c60dd29042c
@@ -1 +1 @@
-table_db2
\ No newline at end of file
+table_db2
diff --git a/sql/hive/src/test/resources/golden/database_location-4-be65cf649363681d54e593c42a5ecffb b/sql/hive/src/test/resources/golden/database_location-4-be65cf649363681d54e593c42a5ecffb
index cda1736e9ca6b..e7c766a71c1d2 100644
--- a/sql/hive/src/test/resources/golden/database_location-4-be65cf649363681d54e593c42a5ecffb
+++ b/sql/hive/src/test/resources/golden/database_location-4-be65cf649363681d54e593c42a5ecffb
@@ -1,19 +1,19 @@
 # col_name            	data_type           	comment             
 	 	 
-name                	string              	None                
-value               	int                 	None                
+name                	string              	                    
+value               	int                 	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	db1                 	 
 Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 11:40:50 PST 2014	 
+CreateTime:         	Tue Oct 21 01:25:24 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/table_db1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/db1.db/table_db1	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1389728450          
+	transient_lastDdlTime	1413879924          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -24,4 +24,4 @@ Num Buckets:        	-1
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/database_location-5-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/database_location-5-9c36cac1372650b703400c60dd29042c
index 4d8fc6a6d6fe0..6eabc06bb1362 100644
--- a/sql/hive/src/test/resources/golden/database_location-5-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/database_location-5-9c36cac1372650b703400c60dd29042c
@@ -1 +1 @@
-table_db1
\ No newline at end of file
+table_db1
diff --git a/sql/hive/src/test/resources/golden/database_location-7-5698ac10441da07dbe3a947143c999c2 b/sql/hive/src/test/resources/golden/database_location-7-5698ac10441da07dbe3a947143c999c2
index 3e23970adddcf..65f9d79a3c381 100644
--- a/sql/hive/src/test/resources/golden/database_location-7-5698ac10441da07dbe3a947143c999c2
+++ b/sql/hive/src/test/resources/golden/database_location-7-5698ac10441da07dbe3a947143c999c2
@@ -1 +1 @@
-db2	database 2	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db2	
\ No newline at end of file
+db2	database 2	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/db2	marmbrus
diff --git a/sql/hive/src/test/resources/golden/database_properties-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/database_properties-0-e7bfbd9422685e9a3a6c9bd4965f828f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/database_properties-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ b/sql/hive/src/test/resources/golden/database_properties-0-e7bfbd9422685e9a3a6c9bd4965f828f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/database_properties-1-10de6a198e2b3f61974519ddd8623e68 b/sql/hive/src/test/resources/golden/database_properties-1-10de6a198e2b3f61974519ddd8623e68
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/database_properties-1-10de6a198e2b3f61974519ddd8623e68
+++ b/sql/hive/src/test/resources/golden/database_properties-1-10de6a198e2b3f61974519ddd8623e68
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/database_properties-10-26c10ff2ec4a69b16589069ced427d23 b/sql/hive/src/test/resources/golden/database_properties-10-26c10ff2ec4a69b16589069ced427d23
index 2f5fbe26f4945..4c04259aed3a7 100644
--- a/sql/hive/src/test/resources/golden/database_properties-10-26c10ff2ec4a69b16589069ced427d23
+++ b/sql/hive/src/test/resources/golden/database_properties-10-26c10ff2ec4a69b16589069ced427d23
@@ -1 +1 @@
-db2		file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db2.db	{mapred.jobtracker.url=http://my.jobtracker.com:53000, new.property=some new props, mapred.scratch.dir=hdfs://tmp.dfs.com:50029/tmp, hive.warehouse.dir=new/warehouse/dir}
\ No newline at end of file
+db2		file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/db2.db	marmbrus
diff --git a/sql/hive/src/test/resources/golden/database_properties-3-751417d45b8e80ee5cba2034458b5bc9 b/sql/hive/src/test/resources/golden/database_properties-3-751417d45b8e80ee5cba2034458b5bc9
index 86362ead004d9..3cd776a0711ff 100644
--- a/sql/hive/src/test/resources/golden/database_properties-3-751417d45b8e80ee5cba2034458b5bc9
+++ b/sql/hive/src/test/resources/golden/database_properties-3-751417d45b8e80ee5cba2034458b5bc9
@@ -1,2 +1,2 @@
 db1
-default
\ No newline at end of file
+default
diff --git a/sql/hive/src/test/resources/golden/database_properties-5-51c0974df1125b233936f25ce709ba4a b/sql/hive/src/test/resources/golden/database_properties-5-51c0974df1125b233936f25ce709ba4a
index ff89c3fe899d2..4c04259aed3a7 100644
--- a/sql/hive/src/test/resources/golden/database_properties-5-51c0974df1125b233936f25ce709ba4a
+++ b/sql/hive/src/test/resources/golden/database_properties-5-51c0974df1125b233936f25ce709ba4a
@@ -1 +1 @@
-db2		file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db2.db	
\ No newline at end of file
+db2		file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/db2.db	marmbrus
diff --git a/sql/hive/src/test/resources/golden/database_properties-6-26c10ff2ec4a69b16589069ced427d23 b/sql/hive/src/test/resources/golden/database_properties-6-26c10ff2ec4a69b16589069ced427d23
index 5827d2726d084..4c04259aed3a7 100644
--- a/sql/hive/src/test/resources/golden/database_properties-6-26c10ff2ec4a69b16589069ced427d23
+++ b/sql/hive/src/test/resources/golden/database_properties-6-26c10ff2ec4a69b16589069ced427d23
@@ -1 +1 @@
-db2		file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db2.db	{mapred.jobtracker.url=http://my.jobtracker.com:53000, mapred.scratch.dir=hdfs://tmp.dfs.com:50029/tmp, hive.warehouse.dir=/user/hive/warehouse}
\ No newline at end of file
+db2		file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/db2.db	marmbrus
diff --git a/sql/hive/src/test/resources/golden/database_properties-7-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/database_properties-7-e7bfbd9422685e9a3a6c9bd4965f828f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/database_properties-7-e7bfbd9422685e9a3a6c9bd4965f828f
+++ b/sql/hive/src/test/resources/golden/database_properties-7-e7bfbd9422685e9a3a6c9bd4965f828f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/database_properties-8-10de6a198e2b3f61974519ddd8623e68 b/sql/hive/src/test/resources/golden/database_properties-8-10de6a198e2b3f61974519ddd8623e68
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/database_properties-8-10de6a198e2b3f61974519ddd8623e68
+++ b/sql/hive/src/test/resources/golden/database_properties-8-10de6a198e2b3f61974519ddd8623e68
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/date_1-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/date_1-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/create_view-21-3609711e61b5b8d241d0e839557bfd64 b/sql/hive/src/test/resources/golden/date_1-1-23edf29bf7376c70d5ecf12720f4b1eb
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-21-3609711e61b5b8d241d0e839557bfd64
rename to sql/hive/src/test/resources/golden/date_1-1-23edf29bf7376c70d5ecf12720f4b1eb
diff --git a/sql/hive/src/test/resources/golden/date_1-3-df16364a220ff96a6ea1cd478cbc1d0b b/sql/hive/src/test/resources/golden/date_1-10-df16364a220ff96a6ea1cd478cbc1d0b
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-3-df16364a220ff96a6ea1cd478cbc1d0b
rename to sql/hive/src/test/resources/golden/date_1-10-df16364a220ff96a6ea1cd478cbc1d0b
diff --git a/sql/hive/src/test/resources/golden/date_1-10-d964bec7e5632091ab5cb6f6786dbbf9 b/sql/hive/src/test/resources/golden/date_1-11-d964bec7e5632091ab5cb6f6786dbbf9
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-10-d964bec7e5632091ab5cb6f6786dbbf9
rename to sql/hive/src/test/resources/golden/date_1-11-d964bec7e5632091ab5cb6f6786dbbf9
diff --git a/sql/hive/src/test/resources/golden/date_1-11-480c5f024a28232b7857be327c992509 b/sql/hive/src/test/resources/golden/date_1-12-480c5f024a28232b7857be327c992509
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-11-480c5f024a28232b7857be327c992509
rename to sql/hive/src/test/resources/golden/date_1-12-480c5f024a28232b7857be327c992509
diff --git a/sql/hive/src/test/resources/golden/date_1-12-4c0ed7fcb75770d8790575b586bf14f4 b/sql/hive/src/test/resources/golden/date_1-13-4c0ed7fcb75770d8790575b586bf14f4
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-12-4c0ed7fcb75770d8790575b586bf14f4
rename to sql/hive/src/test/resources/golden/date_1-13-4c0ed7fcb75770d8790575b586bf14f4
diff --git a/sql/hive/src/test/resources/golden/date_1-13-44fc74c1993062c0a9522199ff27fea b/sql/hive/src/test/resources/golden/date_1-14-44fc74c1993062c0a9522199ff27fea
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-13-44fc74c1993062c0a9522199ff27fea
rename to sql/hive/src/test/resources/golden/date_1-14-44fc74c1993062c0a9522199ff27fea
diff --git a/sql/hive/src/test/resources/golden/date_1-14-4855a66124b16d1d0d003235995ac06b b/sql/hive/src/test/resources/golden/date_1-15-4855a66124b16d1d0d003235995ac06b
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-14-4855a66124b16d1d0d003235995ac06b
rename to sql/hive/src/test/resources/golden/date_1-15-4855a66124b16d1d0d003235995ac06b
diff --git a/sql/hive/src/test/resources/golden/date_1-15-8bc190dba0f641840b5e1e198a14c55b b/sql/hive/src/test/resources/golden/date_1-16-8bc190dba0f641840b5e1e198a14c55b
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-15-8bc190dba0f641840b5e1e198a14c55b
rename to sql/hive/src/test/resources/golden/date_1-16-8bc190dba0f641840b5e1e198a14c55b
diff --git a/sql/hive/src/test/resources/golden/create_view-22-3bc364c0ee46900d2201d706d2d58d67 b/sql/hive/src/test/resources/golden/date_1-17-23edf29bf7376c70d5ecf12720f4b1eb
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-22-3bc364c0ee46900d2201d706d2d58d67
rename to sql/hive/src/test/resources/golden/date_1-17-23edf29bf7376c70d5ecf12720f4b1eb
diff --git a/sql/hive/src/test/resources/golden/create_view-3-25ffe475d52d6c399acaf120dc02afe8 b/sql/hive/src/test/resources/golden/date_1-2-4ebe3571c13a8b0c03096fbd972b7f1b
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-3-25ffe475d52d6c399acaf120dc02afe8
rename to sql/hive/src/test/resources/golden/date_1-2-4ebe3571c13a8b0c03096fbd972b7f1b
diff --git a/sql/hive/src/test/resources/golden/create_view-4-87ed262d455e99ad45c909a2265a61b0 b/sql/hive/src/test/resources/golden/date_1-3-26b5c291400dfde455b3c1b878b71d0
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-4-87ed262d455e99ad45c909a2265a61b0
rename to sql/hive/src/test/resources/golden/date_1-3-26b5c291400dfde455b3c1b878b71d0
diff --git a/sql/hive/src/test/resources/golden/date_1-6-df16364a220ff96a6ea1cd478cbc1d0b b/sql/hive/src/test/resources/golden/date_1-4-df16364a220ff96a6ea1cd478cbc1d0b
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-6-df16364a220ff96a6ea1cd478cbc1d0b
rename to sql/hive/src/test/resources/golden/date_1-4-df16364a220ff96a6ea1cd478cbc1d0b
diff --git a/sql/hive/src/test/resources/golden/date_1-4-d964bec7e5632091ab5cb6f6786dbbf9 b/sql/hive/src/test/resources/golden/date_1-5-d964bec7e5632091ab5cb6f6786dbbf9
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-4-d964bec7e5632091ab5cb6f6786dbbf9
rename to sql/hive/src/test/resources/golden/date_1-5-d964bec7e5632091ab5cb6f6786dbbf9
diff --git a/sql/hive/src/test/resources/golden/create_view-5-391caf27ff1589ec68d5f3bc4a27e711 b/sql/hive/src/test/resources/golden/date_1-6-559d01fb0b42c42f0c4927fa0f9deac4
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-5-391caf27ff1589ec68d5f3bc4a27e711
rename to sql/hive/src/test/resources/golden/date_1-6-559d01fb0b42c42f0c4927fa0f9deac4
diff --git a/sql/hive/src/test/resources/golden/date_1-9-df16364a220ff96a6ea1cd478cbc1d0b b/sql/hive/src/test/resources/golden/date_1-7-df16364a220ff96a6ea1cd478cbc1d0b
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-9-df16364a220ff96a6ea1cd478cbc1d0b
rename to sql/hive/src/test/resources/golden/date_1-7-df16364a220ff96a6ea1cd478cbc1d0b
diff --git a/sql/hive/src/test/resources/golden/date_1-7-d964bec7e5632091ab5cb6f6786dbbf9 b/sql/hive/src/test/resources/golden/date_1-8-d964bec7e5632091ab5cb6f6786dbbf9
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-7-d964bec7e5632091ab5cb6f6786dbbf9
rename to sql/hive/src/test/resources/golden/date_1-8-d964bec7e5632091ab5cb6f6786dbbf9
diff --git a/sql/hive/src/test/resources/golden/create_view-6-d8d0e830783c383e3c00e9de3919c409 b/sql/hive/src/test/resources/golden/date_1-9-8306558e0eabe936ac33dabaaa17fea4
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-6-d8d0e830783c383e3c00e9de3919c409
rename to sql/hive/src/test/resources/golden/date_1-9-8306558e0eabe936ac33dabaaa17fea4
diff --git a/sql/hive/src/test/resources/golden/create_view-7-50b35b8a1772becc96cff65bba1eaee7 b/sql/hive/src/test/resources/golden/date_2-2-efdf7f5d9f15edcb59a30f8ea166fbf1
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-7-50b35b8a1772becc96cff65bba1eaee7
rename to sql/hive/src/test/resources/golden/date_2-2-efdf7f5d9f15edcb59a30f8ea166fbf1
diff --git a/sql/hive/src/test/resources/golden/create_view-8-2ae18fc75eda9c3fe7d4e87829180805 b/sql/hive/src/test/resources/golden/date_3-3-c26f0641e7cec1093273b258e6bf7120
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-8-2ae18fc75eda9c3fe7d4e87829180805
rename to sql/hive/src/test/resources/golden/date_3-3-c26f0641e7cec1093273b258e6bf7120
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/date_4-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_all_part-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/date_4-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/create_view-9-ed7a1e8aeaed3beb95ac5aa3af216ab9 b/sql/hive/src/test/resources/golden/date_4-1-b84f7e931d710dcbe3c5126d998285a8
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view-9-ed7a1e8aeaed3beb95ac5aa3af216ab9
rename to sql/hive/src/test/resources/golden/date_4-1-b84f7e931d710dcbe3c5126d998285a8
diff --git a/sql/hive/src/test/resources/golden/create_view_partitioned-0-d98274f9b34c8968292ccd6c959491dc b/sql/hive/src/test/resources/golden/date_4-2-6272f5e518f6a20bc96a5870ff315c4f
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view_partitioned-0-d98274f9b34c8968292ccd6c959491dc
rename to sql/hive/src/test/resources/golden/date_4-2-6272f5e518f6a20bc96a5870ff315c4f
diff --git a/sql/hive/src/test/resources/golden/create_view_partitioned-1-ff29c88fac9c21eaf36469a4ce0fce18 b/sql/hive/src/test/resources/golden/date_4-3-4a0e7bde447ef616b98e0f55d2886de0
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view_partitioned-1-ff29c88fac9c21eaf36469a4ce0fce18
rename to sql/hive/src/test/resources/golden/date_4-3-4a0e7bde447ef616b98e0f55d2886de0
diff --git a/sql/hive/src/test/resources/golden/create_view_partitioned-2-45e7b89caadba56ec67638c341209f96 b/sql/hive/src/test/resources/golden/date_4-4-6c4c2941bae77147a4d3d8fcaa1c88c8
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view_partitioned-2-45e7b89caadba56ec67638c341209f96
rename to sql/hive/src/test/resources/golden/date_4-4-6c4c2941bae77147a4d3d8fcaa1c88c8
diff --git a/sql/hive/src/test/resources/golden/date_4-5-bee09a7384666043621f68297cee2e68 b/sql/hive/src/test/resources/golden/date_4-5-bee09a7384666043621f68297cee2e68
new file mode 100644
index 0000000000000..b61affde4ffce
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/date_4-5-bee09a7384666043621f68297cee2e68
@@ -0,0 +1 @@
+2011-01-01	2011-01-01
diff --git a/sql/hive/src/test/resources/golden/create_view_partitioned-3-cf44ff130f66de720a77888260ef8d16 b/sql/hive/src/test/resources/golden/date_4-6-b84f7e931d710dcbe3c5126d998285a8
similarity index 100%
rename from sql/hive/src/test/resources/golden/create_view_partitioned-3-cf44ff130f66de720a77888260ef8d16
rename to sql/hive/src/test/resources/golden/date_4-6-b84f7e931d710dcbe3c5126d998285a8
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/date_comparison-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_all_part-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/date_comparison-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-0-48751533b44ea9e8ac3131767c2fed05 b/sql/hive/src/test/resources/golden/date_comparison-1-69eec445bd045c9dc899fafa348d8495
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-0-48751533b44ea9e8ac3131767c2fed05
rename to sql/hive/src/test/resources/golden/date_comparison-1-69eec445bd045c9dc899fafa348d8495
diff --git a/sql/hive/src/test/resources/golden/decimal_2-4-20ffe5115367abea9ea0ed1bda7a9439 b/sql/hive/src/test/resources/golden/date_comparison-10-bcd987341fc1c38047a27d29dac6ae7c
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-4-20ffe5115367abea9ea0ed1bda7a9439
rename to sql/hive/src/test/resources/golden/date_comparison-10-bcd987341fc1c38047a27d29dac6ae7c
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-1-60557e7bd2822c89fa8b076a9d0520fc b/sql/hive/src/test/resources/golden/date_comparison-11-a9f2560c273163e11306d4f1dd1d9d54
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-1-60557e7bd2822c89fa8b076a9d0520fc
rename to sql/hive/src/test/resources/golden/date_comparison-11-a9f2560c273163e11306d4f1dd1d9d54
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-13-90269c1e50c7ae8e75ca9cc297982135 b/sql/hive/src/test/resources/golden/date_comparison-12-4a7bac9ddcf40db6329faaec8e426543
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-13-90269c1e50c7ae8e75ca9cc297982135
rename to sql/hive/src/test/resources/golden/date_comparison-12-4a7bac9ddcf40db6329faaec8e426543
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-3-13e17ed811165196416f777cbc162592 b/sql/hive/src/test/resources/golden/date_comparison-2-fcc400871a502009c8680509e3869ec1
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-3-13e17ed811165196416f777cbc162592
rename to sql/hive/src/test/resources/golden/date_comparison-2-fcc400871a502009c8680509e3869ec1
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-22-90269c1e50c7ae8e75ca9cc297982135 b/sql/hive/src/test/resources/golden/date_comparison-3-b8598a4d0c948c2ddcf3eeef0abf2264
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-22-90269c1e50c7ae8e75ca9cc297982135
rename to sql/hive/src/test/resources/golden/date_comparison-3-b8598a4d0c948c2ddcf3eeef0abf2264
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-6-8c8e73673a950f6b3d960b08fcea076f b/sql/hive/src/test/resources/golden/date_comparison-4-14d35f266be9cceb11a2ae09ec8b3835
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-6-8c8e73673a950f6b3d960b08fcea076f
rename to sql/hive/src/test/resources/golden/date_comparison-4-14d35f266be9cceb11a2ae09ec8b3835
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-31-90269c1e50c7ae8e75ca9cc297982135 b/sql/hive/src/test/resources/golden/date_comparison-5-c8865b14d53f2c2496fb69ee8191bf37
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-31-90269c1e50c7ae8e75ca9cc297982135
rename to sql/hive/src/test/resources/golden/date_comparison-5-c8865b14d53f2c2496fb69ee8191bf37
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-4-90269c1e50c7ae8e75ca9cc297982135 b/sql/hive/src/test/resources/golden/date_comparison-6-f2c907e64da8166a731ddc0ed19bad6c
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-4-90269c1e50c7ae8e75ca9cc297982135
rename to sql/hive/src/test/resources/golden/date_comparison-6-f2c907e64da8166a731ddc0ed19bad6c
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-10-51822ac740629bebd81d2abda6e1144 b/sql/hive/src/test/resources/golden/date_comparison-7-5606505a92bad10023ad9a3ef77eacc9
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-10-51822ac740629bebd81d2abda6e1144
rename to sql/hive/src/test/resources/golden/date_comparison-7-5606505a92bad10023ad9a3ef77eacc9
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-40-90269c1e50c7ae8e75ca9cc297982135 b/sql/hive/src/test/resources/golden/date_comparison-8-47913d4aaf0d468ab3764cc3bfd68eb
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-40-90269c1e50c7ae8e75ca9cc297982135
rename to sql/hive/src/test/resources/golden/date_comparison-8-47913d4aaf0d468ab3764cc3bfd68eb
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-11-441306cae24618c49ec63445a31bf16b b/sql/hive/src/test/resources/golden/date_comparison-9-1e5ce4f833b6fba45618437c8fb7643c
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-11-441306cae24618c49ec63445a31bf16b
rename to sql/hive/src/test/resources/golden/date_comparison-9-1e5ce4f833b6fba45618437c8fb7643c
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-0-311fdd725609cd47ea1b859f706da41e b/sql/hive/src/test/resources/golden/date_join1-2-e967e1ef6b209dfa5bdc60021dcb1964
similarity index 100%
rename from sql/hive/src/test/resources/golden/ctas_varchar-0-311fdd725609cd47ea1b859f706da41e
rename to sql/hive/src/test/resources/golden/date_join1-2-e967e1ef6b209dfa5bdc60021dcb1964
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-1-939814768fe997b27f01758d60fcd8f5 b/sql/hive/src/test/resources/golden/date_serde-7-580096b3b48db26bea91b80e1e1b081a
similarity index 100%
rename from sql/hive/src/test/resources/golden/ctas_varchar-1-939814768fe997b27f01758d60fcd8f5
rename to sql/hive/src/test/resources/golden/date_serde-7-580096b3b48db26bea91b80e1e1b081a
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-2-3223504c97628a44b65736565c1dda32 b/sql/hive/src/test/resources/golden/date_udf-7-ef82dff775f4aba5d7a638b4e5fd9c5d
similarity index 100%
rename from sql/hive/src/test/resources/golden/ctas_varchar-2-3223504c97628a44b65736565c1dda32
rename to sql/hive/src/test/resources/golden/date_udf-7-ef82dff775f4aba5d7a638b4e5fd9c5d
diff --git a/sql/hive/src/test/resources/golden/ddltime-1-426da52526f6f48c0ddeb0026fd566f1 b/sql/hive/src/test/resources/golden/ddltime-1-426da52526f6f48c0ddeb0026fd566f1
deleted file mode 100644
index c270c7cbdfa1f..0000000000000
--- a/sql/hive/src/test/resources/golden/ddltime-1-426da52526f6f48c0ddeb0026fd566f1
+++ /dev/null
@@ -1,4 +0,0 @@
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:t1, dbName:default, owner:marmbrus, createTime:1392063041, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/t1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1392063041}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/decimal_1-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_all_part-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/decimal_1-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-3-5d14fdc559b9790d81a8020bdcf4159b b/sql/hive/src/test/resources/golden/decimal_1-1-6742a91ba2b9fa9c906d30d4d0ad0972
similarity index 100%
rename from sql/hive/src/test/resources/golden/ctas_varchar-3-5d14fdc559b9790d81a8020bdcf4159b
rename to sql/hive/src/test/resources/golden/decimal_1-1-6742a91ba2b9fa9c906d30d4d0ad0972
diff --git a/sql/hive/src/test/resources/golden/decimal_1-5-cbe6b235663cf78e602673ed715a2f40 b/sql/hive/src/test/resources/golden/decimal_1-10-c20dea9d716bef1bdbdef71323b1cc5b
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-5-cbe6b235663cf78e602673ed715a2f40
rename to sql/hive/src/test/resources/golden/decimal_1-10-c20dea9d716bef1bdbdef71323b1cc5b
diff --git a/sql/hive/src/test/resources/golden/decimal_1-10-be179d261b9c42ed843dbf736b12e75 b/sql/hive/src/test/resources/golden/decimal_1-11-f2f975b73220512d4bf2b9bd93354aba
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-10-be179d261b9c42ed843dbf736b12e75
rename to sql/hive/src/test/resources/golden/decimal_1-11-f2f975b73220512d4bf2b9bd93354aba
diff --git a/sql/hive/src/test/resources/golden/decimal_1-11-4c1fefa61e10a78f6406b526a60e4937 b/sql/hive/src/test/resources/golden/decimal_1-12-2c2325880ea79c8e308398d46c8565f8
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-11-4c1fefa61e10a78f6406b526a60e4937
rename to sql/hive/src/test/resources/golden/decimal_1-12-2c2325880ea79c8e308398d46c8565f8
diff --git a/sql/hive/src/test/resources/golden/decimal_1-9-1504e1b00092e2c58bafcc9936ad178c b/sql/hive/src/test/resources/golden/decimal_1-13-c4c33bdb9f3c6cad77552f0f353092d3
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-9-1504e1b00092e2c58bafcc9936ad178c
rename to sql/hive/src/test/resources/golden/decimal_1-13-c4c33bdb9f3c6cad77552f0f353092d3
diff --git a/sql/hive/src/test/resources/golden/decimal_1-12-75b9add5e32a963cc9913c6ef4f84989 b/sql/hive/src/test/resources/golden/decimal_1-14-e45935cfffb9045394e804d0d1fc52f0
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-12-75b9add5e32a963cc9913c6ef4f84989
rename to sql/hive/src/test/resources/golden/decimal_1-14-e45935cfffb9045394e804d0d1fc52f0
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-4-ccead78e4ec4583da3b48864e78cfd44 b/sql/hive/src/test/resources/golden/decimal_1-15-31ecaab3afa056fcc656d6e54f845cf4
similarity index 100%
rename from sql/hive/src/test/resources/golden/ctas_varchar-4-ccead78e4ec4583da3b48864e78cfd44
rename to sql/hive/src/test/resources/golden/decimal_1-15-31ecaab3afa056fcc656d6e54f845cf4
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-5-ff9d92788c0a7d6d4fca8a5bf1095e7f b/sql/hive/src/test/resources/golden/decimal_1-2-ee665100ca5de3a006df43e97cfa707
similarity index 100%
rename from sql/hive/src/test/resources/golden/ctas_varchar-5-ff9d92788c0a7d6d4fca8a5bf1095e7f
rename to sql/hive/src/test/resources/golden/decimal_1-2-ee665100ca5de3a006df43e97cfa707
diff --git a/sql/hive/src/test/resources/golden/ctas_varchar-6-351fa7f09c2e809f9cc87d83e11c1539 b/sql/hive/src/test/resources/golden/decimal_1-3-80fc87cab17ceffea334afbb230a6653
similarity index 100%
rename from sql/hive/src/test/resources/golden/ctas_varchar-6-351fa7f09c2e809f9cc87d83e11c1539
rename to sql/hive/src/test/resources/golden/decimal_1-3-80fc87cab17ceffea334afbb230a6653
diff --git a/sql/hive/src/test/resources/golden/decimal_1-4-5dd925bba25f735bfd6442a841afe119 b/sql/hive/src/test/resources/golden/decimal_1-4-5dd925bba25f735bfd6442a841afe119
new file mode 100644
index 0000000000000..711809abcc925
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/decimal_1-4-5dd925bba25f735bfd6442a841afe119
@@ -0,0 +1,3 @@
+t                   	decimal(4,2)        	                    
+u                   	decimal(5,0)        	                    
+v                   	decimal(10,0)       	                    
diff --git a/sql/hive/src/test/resources/golden/database_drop-10-1b0a6cff3151cfa0ef0a6f78587973a5 b/sql/hive/src/test/resources/golden/decimal_1-5-bfab296ca5693e647e33899dfeeb256
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-10-1b0a6cff3151cfa0ef0a6f78587973a5
rename to sql/hive/src/test/resources/golden/decimal_1-5-bfab296ca5693e647e33899dfeeb256
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-49-90269c1e50c7ae8e75ca9cc297982135 b/sql/hive/src/test/resources/golden/decimal_1-6-a402201ed5159941384d40e09dc367a5
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-49-90269c1e50c7ae8e75ca9cc297982135
rename to sql/hive/src/test/resources/golden/decimal_1-6-a402201ed5159941384d40e09dc367a5
diff --git a/sql/hive/src/test/resources/golden/decimal_1-6-91b7219bd8c67673e16cb970fcc7be1b b/sql/hive/src/test/resources/golden/decimal_1-7-2cfd7d00bc37a8e433ad005896173c1
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-6-91b7219bd8c67673e16cb970fcc7be1b
rename to sql/hive/src/test/resources/golden/decimal_1-7-2cfd7d00bc37a8e433ad005896173c1
diff --git a/sql/hive/src/test/resources/golden/decimal_1-7-7b2fab0ebc0962f0a53f6c61da417a b/sql/hive/src/test/resources/golden/decimal_1-8-84cd75e494d113a48c4145298177d6d8
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-7-7b2fab0ebc0962f0a53f6c61da417a
rename to sql/hive/src/test/resources/golden/decimal_1-8-84cd75e494d113a48c4145298177d6d8
diff --git a/sql/hive/src/test/resources/golden/decimal_1-8-cdd0932288d3cc43636334439805769d b/sql/hive/src/test/resources/golden/decimal_1-9-e4e90927ac59f5920de3dc61c3288dde
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-8-cdd0932288d3cc43636334439805769d
rename to sql/hive/src/test/resources/golden/decimal_1-9-e4e90927ac59f5920de3dc61c3288dde
diff --git a/sql/hive/src/test/resources/golden/decimal_2-10-f97d72aeb605ee18d34361c073552e92 b/sql/hive/src/test/resources/golden/decimal_2-10-f97d72aeb605ee18d34361c073552e92
deleted file mode 100644
index 53aca7545dac7..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-10-f97d72aeb605ee18d34361c073552e92
+++ /dev/null
@@ -1 +0,0 @@
-17.29
diff --git a/sql/hive/src/test/resources/golden/decimal_2-11-58a090c30c59446d1e2b2a6c85fabf50 b/sql/hive/src/test/resources/golden/decimal_2-11-58a090c30c59446d1e2b2a6c85fabf50
deleted file mode 100644
index 53aca7545dac7..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-11-58a090c30c59446d1e2b2a6c85fabf50
+++ /dev/null
@@ -1 +0,0 @@
-17.29
diff --git a/sql/hive/src/test/resources/golden/decimal_2-14-3105d1029ad981af9cf1039ad9410fc0 b/sql/hive/src/test/resources/golden/decimal_2-14-3105d1029ad981af9cf1039ad9410fc0
deleted file mode 100644
index b1bd38b62a080..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-14-3105d1029ad981af9cf1039ad9410fc0
+++ /dev/null
@@ -1 +0,0 @@
-13
diff --git a/sql/hive/src/test/resources/golden/decimal_2-15-3266fde6f6ab80a8bc027de0d5574f02 b/sql/hive/src/test/resources/golden/decimal_2-15-3266fde6f6ab80a8bc027de0d5574f02
deleted file mode 100644
index de7771ac23570..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-15-3266fde6f6ab80a8bc027de0d5574f02
+++ /dev/null
@@ -1 +0,0 @@
--3827
diff --git a/sql/hive/src/test/resources/golden/decimal_2-16-dc565c6c5bc24fd8b32729ce91999580 b/sql/hive/src/test/resources/golden/decimal_2-16-dc565c6c5bc24fd8b32729ce91999580
deleted file mode 100644
index 272791f402250..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-16-dc565c6c5bc24fd8b32729ce91999580
+++ /dev/null
@@ -1 +0,0 @@
-3404045
diff --git a/sql/hive/src/test/resources/golden/decimal_2-17-a4a1ca9ae92642dd78d4bdf6af781c2d b/sql/hive/src/test/resources/golden/decimal_2-17-a4a1ca9ae92642dd78d4bdf6af781c2d
deleted file mode 100644
index 272791f402250..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-17-a4a1ca9ae92642dd78d4bdf6af781c2d
+++ /dev/null
@@ -1 +0,0 @@
-3404045
diff --git a/sql/hive/src/test/resources/golden/decimal_2-18-f7c34d67fd579c82c636415172ec675e b/sql/hive/src/test/resources/golden/decimal_2-18-f7c34d67fd579c82c636415172ec675e
deleted file mode 100644
index deb8427800ee4..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-18-f7c34d67fd579c82c636415172ec675e
+++ /dev/null
@@ -1 +0,0 @@
-3404045.5
diff --git a/sql/hive/src/test/resources/golden/decimal_2-19-f97d72aeb605ee18d34361c073552e92 b/sql/hive/src/test/resources/golden/decimal_2-19-f97d72aeb605ee18d34361c073552e92
deleted file mode 100644
index 6f31e8fe55034..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-19-f97d72aeb605ee18d34361c073552e92
+++ /dev/null
@@ -1 +0,0 @@
-3404045.5044003
diff --git a/sql/hive/src/test/resources/golden/decimal_2-20-58a090c30c59446d1e2b2a6c85fabf50 b/sql/hive/src/test/resources/golden/decimal_2-20-58a090c30c59446d1e2b2a6c85fabf50
deleted file mode 100644
index 6f31e8fe55034..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-20-58a090c30c59446d1e2b2a6c85fabf50
+++ /dev/null
@@ -1 +0,0 @@
-3404045.5044003
diff --git a/sql/hive/src/test/resources/golden/decimal_2-21-d72d68be1217c7b7a958f58456d85821 b/sql/hive/src/test/resources/golden/decimal_2-21-d72d68be1217c7b7a958f58456d85821
deleted file mode 100644
index 6324d401a069f..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-21-d72d68be1217c7b7a958f58456d85821
+++ /dev/null
@@ -1 +0,0 @@
-3.14
diff --git a/sql/hive/src/test/resources/golden/decimal_2-22-648e694eea042c59e8db30d067cb5bc8 b/sql/hive/src/test/resources/golden/decimal_2-22-648e694eea042c59e8db30d067cb5bc8
deleted file mode 100644
index 6324d401a069f..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-22-648e694eea042c59e8db30d067cb5bc8
+++ /dev/null
@@ -1 +0,0 @@
-3.14
diff --git a/sql/hive/src/test/resources/golden/decimal_2-23-a87b701c93a922b9e33ba16ae6a477ff b/sql/hive/src/test/resources/golden/decimal_2-23-a87b701c93a922b9e33ba16ae6a477ff
deleted file mode 100644
index 603f18cc37bc4..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-23-a87b701c93a922b9e33ba16ae6a477ff
+++ /dev/null
@@ -1 +0,0 @@
-1355944339.1234567
diff --git a/sql/hive/src/test/resources/golden/decimal_2-27-a5ea3949eb5ab338916e4316c676c7f6 b/sql/hive/src/test/resources/golden/decimal_2-27-a5ea3949eb5ab338916e4316c676c7f6
deleted file mode 100644
index 00750edc07d64..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-27-a5ea3949eb5ab338916e4316c676c7f6
+++ /dev/null
@@ -1 +0,0 @@
-3
diff --git a/sql/hive/src/test/resources/golden/decimal_2-28-4a5410f96c6ef0843f12b0f593c104b1 b/sql/hive/src/test/resources/golden/decimal_2-28-4a5410f96c6ef0843f12b0f593c104b1
deleted file mode 100644
index 00750edc07d64..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-28-4a5410f96c6ef0843f12b0f593c104b1
+++ /dev/null
@@ -1 +0,0 @@
-3
diff --git a/sql/hive/src/test/resources/golden/decimal_2-30-26a71d79e41353830b4ada96de6e2b8a b/sql/hive/src/test/resources/golden/decimal_2-30-26a71d79e41353830b4ada96de6e2b8a
deleted file mode 100644
index 474c8b180aea9..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-30-26a71d79e41353830b4ada96de6e2b8a
+++ /dev/null
@@ -1 +0,0 @@
-0.99999999999999999999
diff --git a/sql/hive/src/test/resources/golden/decimal_2-5-3105d1029ad981af9cf1039ad9410fc0 b/sql/hive/src/test/resources/golden/decimal_2-5-3105d1029ad981af9cf1039ad9410fc0
deleted file mode 100644
index 98d9bcb75a685..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-5-3105d1029ad981af9cf1039ad9410fc0
+++ /dev/null
@@ -1 +0,0 @@
-17
diff --git a/sql/hive/src/test/resources/golden/decimal_2-6-3266fde6f6ab80a8bc027de0d5574f02 b/sql/hive/src/test/resources/golden/decimal_2-6-3266fde6f6ab80a8bc027de0d5574f02
deleted file mode 100644
index 98d9bcb75a685..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-6-3266fde6f6ab80a8bc027de0d5574f02
+++ /dev/null
@@ -1 +0,0 @@
-17
diff --git a/sql/hive/src/test/resources/golden/decimal_2-7-dc565c6c5bc24fd8b32729ce91999580 b/sql/hive/src/test/resources/golden/decimal_2-7-dc565c6c5bc24fd8b32729ce91999580
deleted file mode 100644
index 98d9bcb75a685..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-7-dc565c6c5bc24fd8b32729ce91999580
+++ /dev/null
@@ -1 +0,0 @@
-17
diff --git a/sql/hive/src/test/resources/golden/decimal_2-8-a4a1ca9ae92642dd78d4bdf6af781c2d b/sql/hive/src/test/resources/golden/decimal_2-8-a4a1ca9ae92642dd78d4bdf6af781c2d
deleted file mode 100644
index 98d9bcb75a685..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-8-a4a1ca9ae92642dd78d4bdf6af781c2d
+++ /dev/null
@@ -1 +0,0 @@
-17
diff --git a/sql/hive/src/test/resources/golden/decimal_2-9-f7c34d67fd579c82c636415172ec675e b/sql/hive/src/test/resources/golden/decimal_2-9-f7c34d67fd579c82c636415172ec675e
deleted file mode 100644
index 53aca7545dac7..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_2-9-f7c34d67fd579c82c636415172ec675e
+++ /dev/null
@@ -1 +0,0 @@
-17.29
diff --git a/sql/hive/src/test/resources/golden/decimal_3-10-420614bb0789115e008c96a7ad822624 b/sql/hive/src/test/resources/golden/decimal_3-10-420614bb0789115e008c96a7ad822624
deleted file mode 100644
index 8d8753f153d7c..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-10-420614bb0789115e008c96a7ad822624
+++ /dev/null
@@ -1,4 +0,0 @@
-3.14	3
-3.14	3
-3.14	3
-3.14	4
diff --git a/sql/hive/src/test/resources/golden/decimal_3-11-63913753553b16d6c24e063fb49fdd15 b/sql/hive/src/test/resources/golden/decimal_3-11-63913753553b16d6c24e063fb49fdd15
deleted file mode 100644
index 8d8753f153d7c..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-11-63913753553b16d6c24e063fb49fdd15
+++ /dev/null
@@ -1,4 +0,0 @@
-3.14	3
-3.14	3
-3.14	3
-3.14	4
diff --git a/sql/hive/src/test/resources/golden/decimal_3-3-cb2d14de1d779cce9c19ba1a9690ca6b b/sql/hive/src/test/resources/golden/decimal_3-3-cb2d14de1d779cce9c19ba1a9690ca6b
deleted file mode 100644
index 3e290231c27e2..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-3-cb2d14de1d779cce9c19ba1a9690ca6b
+++ /dev/null
@@ -1,38 +0,0 @@
-NULL	0
-NULL	0
--1234567890.123456789	-1234567890
--4400	4400
--1255.49	-1255
--1.122	-11
--1.12	-1
--1.12	-1
--0.333	0
--0.33	0
--0.3	0
-0	0
-0	0
-0.01	0
-0.02	0
-0.1	0
-0.2	0
-0.3	0
-0.33	0
-0.333	0
-0.9999999999999999999999999	1
-1	1
-1	1
-1.12	1
-1.122	1
-2	2
-2	2
-3.14	3
-3.14	3
-3.14	3
-3.14	4
-10	10
-20	20
-100	100
-124	124
-125.2	125
-200	200
-1234567890.12345678	1234567890
diff --git a/sql/hive/src/test/resources/golden/decimal_3-4-b3f259a4b17b4fc585476ad4be8ed263 b/sql/hive/src/test/resources/golden/decimal_3-4-b3f259a4b17b4fc585476ad4be8ed263
deleted file mode 100644
index 64fa7bca9a81b..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-4-b3f259a4b17b4fc585476ad4be8ed263
+++ /dev/null
@@ -1,38 +0,0 @@
-1234567890.12345678	1234567890
-200	200
-125.2	125
-124	124
-100	100
-20	20
-10	10
-3.14	4
-3.14	3
-3.14	3
-3.14	3
-2	2
-2	2
-1.122	1
-1.12	1
-1	1
-1	1
-0.9999999999999999999999999	1
-0.333	0
-0.33	0
-0.3	0
-0.2	0
-0.1	0
-0.02	0
-0.01	0
-0	0
-0	0
--0.3	0
--0.33	0
--0.333	0
--1.12	-1
--1.12	-1
--1.122	-11
--1255.49	-1255
--4400	4400
--1234567890.123456789	-1234567890
-NULL	0
-NULL	0
diff --git a/sql/hive/src/test/resources/golden/decimal_3-5-cb2d14de1d779cce9c19ba1a9690ca6b b/sql/hive/src/test/resources/golden/decimal_3-5-cb2d14de1d779cce9c19ba1a9690ca6b
deleted file mode 100644
index 3e290231c27e2..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-5-cb2d14de1d779cce9c19ba1a9690ca6b
+++ /dev/null
@@ -1,38 +0,0 @@
-NULL	0
-NULL	0
--1234567890.123456789	-1234567890
--4400	4400
--1255.49	-1255
--1.122	-11
--1.12	-1
--1.12	-1
--0.333	0
--0.33	0
--0.3	0
-0	0
-0	0
-0.01	0
-0.02	0
-0.1	0
-0.2	0
-0.3	0
-0.33	0
-0.333	0
-0.9999999999999999999999999	1
-1	1
-1	1
-1.12	1
-1.122	1
-2	2
-2	2
-3.14	3
-3.14	3
-3.14	3
-3.14	4
-10	10
-20	20
-100	100
-124	124
-125.2	125
-200	200
-1234567890.12345678	1234567890
diff --git a/sql/hive/src/test/resources/golden/decimal_3-6-127a3a8400cae591c282dd24f8951e55 b/sql/hive/src/test/resources/golden/decimal_3-6-127a3a8400cae591c282dd24f8951e55
deleted file mode 100644
index 24d34ee5d8c1b..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-6-127a3a8400cae591c282dd24f8951e55
+++ /dev/null
@@ -1,30 +0,0 @@
-NULL
--1234567890.123456789
--4400
--1255.49
--1.122
--1.12
--0.333
--0.33
--0.3
-0
-0.01
-0.02
-0.1
-0.2
-0.3
-0.33
-0.333
-0.9999999999999999999999999
-1
-1.12
-1.122
-2
-3.14
-10
-20
-100
-124
-125.2
-200
-1234567890.12345678
diff --git a/sql/hive/src/test/resources/golden/decimal_3-7-9d4f27d4a4819113c5083462baa72052 b/sql/hive/src/test/resources/golden/decimal_3-7-9d4f27d4a4819113c5083462baa72052
deleted file mode 100644
index e08f588c89461..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-7-9d4f27d4a4819113c5083462baa72052
+++ /dev/null
@@ -1,30 +0,0 @@
-NULL	0
--1234567890.123456789	-1234567890
--4400	4400
--1255.49	-1255
--1.122	-11
--1.12	-2
--0.333	0
--0.33	0
--0.3	0
-0	0
-0.01	0
-0.02	0
-0.1	0
-0.2	0
-0.3	0
-0.33	0
-0.333	0
-0.9999999999999999999999999	1
-1	2
-1.12	1
-1.122	1
-2	4
-3.14	13
-10	10
-20	20
-100	100
-124	124
-125.2	125
-200	200
-1234567890.12345678	1234567890
diff --git a/sql/hive/src/test/resources/golden/decimal_3-8-f65f4df6e3d971d575654ade4b4e4800 b/sql/hive/src/test/resources/golden/decimal_3-8-f65f4df6e3d971d575654ade4b4e4800
deleted file mode 100644
index 796707d06b0dd..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-8-f65f4df6e3d971d575654ade4b4e4800
+++ /dev/null
@@ -1,17 +0,0 @@
--1234567890	-1234567890.123456789
--1255	-1255.49
--11	-1.122
--1	-2.24
-0	0.33
-1	5.2419999999999999999999999
-2	4
-3	9.42
-4	3.14
-10	10
-20	20
-100	100
-124	124
-125	125.2
-200	200
-4400	-4400
-1234567890	1234567890.12345678
diff --git a/sql/hive/src/test/resources/golden/decimal_3-9-b54243d38214362f9a9b1831548faac4 b/sql/hive/src/test/resources/golden/decimal_3-9-b54243d38214362f9a9b1831548faac4
deleted file mode 100644
index 4217ad848170e..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_3-9-b54243d38214362f9a9b1831548faac4
+++ /dev/null
@@ -1,56 +0,0 @@
--1234567890.123456789	-1234567890	-1234567890.123456789	-1234567890
--4400	4400	-4400	4400
--1255.49	-1255	-1255.49	-1255
--1.122	-11	-1.122	-11
--1.12	-1	-1.12	-1
--1.12	-1	-1.12	-1
--1.12	-1	-1.12	-1
--1.12	-1	-1.12	-1
--0.333	0	-0.333	0
--0.33	0	-0.33	0
--0.3	0	-0.3	0
-0	0	0	0
-0	0	0	0
-0	0	0	0
-0	0	0	0
-0.01	0	0.01	0
-0.02	0	0.02	0
-0.1	0	0.1	0
-0.2	0	0.2	0
-0.3	0	0.3	0
-0.33	0	0.33	0
-0.333	0	0.333	0
-0.9999999999999999999999999	1	0.9999999999999999999999999	1
-1	1	1	1
-1	1	1	1
-1	1	1	1
-1	1	1	1
-1.12	1	1.12	1
-1.122	1	1.122	1
-2	2	2	2
-2	2	2	2
-2	2	2	2
-2	2	2	2
-3.14	3	3.14	3
-3.14	3	3.14	3
-3.14	3	3.14	3
-3.14	3	3.14	3
-3.14	3	3.14	3
-3.14	3	3.14	3
-3.14	3	3.14	3
-3.14	3	3.14	3
-3.14	3	3.14	3
-3.14	3	3.14	4
-3.14	3	3.14	4
-3.14	3	3.14	4
-3.14	4	3.14	3
-3.14	4	3.14	3
-3.14	4	3.14	3
-3.14	4	3.14	4
-10	10	10	10
-20	20	20	20
-100	100	100	100
-124	124	124	124
-125.2	125	125.2	125
-200	200	200	200
-1234567890.12345678	1234567890	1234567890.12345678	1234567890
diff --git a/sql/hive/src/test/resources/golden/database_drop-6-a47b1b070ec7c3b9ccabc34f41aebad9 b/sql/hive/src/test/resources/golden/decimal_4-2-85c3185beb011f5c1e6856fc773a7484
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-6-a47b1b070ec7c3b9ccabc34f41aebad9
rename to sql/hive/src/test/resources/golden/decimal_4-2-85c3185beb011f5c1e6856fc773a7484
diff --git a/sql/hive/src/test/resources/golden/database_drop-7-693736836ccc99f6e2449b94efcfeeba b/sql/hive/src/test/resources/golden/decimal_4-3-1451d7491441c1632fd5f751876cce6e
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-7-693736836ccc99f6e2449b94efcfeeba
rename to sql/hive/src/test/resources/golden/decimal_4-3-1451d7491441c1632fd5f751876cce6e
diff --git a/sql/hive/src/test/resources/golden/database_drop-8-97101266791d2b2c662bcde549422318 b/sql/hive/src/test/resources/golden/decimal_4-4-1bf9ff1d72a06c33885ba695adf2511d
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-8-97101266791d2b2c662bcde549422318
rename to sql/hive/src/test/resources/golden/decimal_4-4-1bf9ff1d72a06c33885ba695adf2511d
diff --git a/sql/hive/src/test/resources/golden/decimal_4-6-693c2e345731f9b2b547c3b75218458e b/sql/hive/src/test/resources/golden/decimal_4-6-693c2e345731f9b2b547c3b75218458e
index 3e290231c27e2..f59549a6e4a46 100644
--- a/sql/hive/src/test/resources/golden/decimal_4-6-693c2e345731f9b2b547c3b75218458e
+++ b/sql/hive/src/test/resources/golden/decimal_4-6-693c2e345731f9b2b547c3b75218458e
@@ -1,5 +1,4 @@
 NULL	0
-NULL	0
 -1234567890.123456789	-1234567890
 -4400	4400
 -1255.49	-1255
@@ -11,6 +10,7 @@ NULL	0
 -0.3	0
 0	0
 0	0
+0	0
 0.01	0
 0.02	0
 0.1	0
diff --git a/sql/hive/src/test/resources/golden/decimal_4-7-f1eb45492510cb76cf6b452121af8531 b/sql/hive/src/test/resources/golden/decimal_4-7-f1eb45492510cb76cf6b452121af8531
index 795a4b567ab7f..6bada475c6d3d 100644
--- a/sql/hive/src/test/resources/golden/decimal_4-7-f1eb45492510cb76cf6b452121af8531
+++ b/sql/hive/src/test/resources/golden/decimal_4-7-f1eb45492510cb76cf6b452121af8531
@@ -1,5 +1,4 @@
 NULL	NULL
-NULL	NULL
 -1234567890.123456789	-3703703670.370370367
 -4400	-13200
 -1255.49	-3766.47
@@ -11,6 +10,7 @@ NULL	NULL
 -0.3	-0.9
 0	0
 0	0
+0	0
 0.01	0.03
 0.02	0.06
 0.1	0.3
diff --git a/sql/hive/src/test/resources/golden/database_drop-9-8db536f925bf0f5058f97897e145a661 b/sql/hive/src/test/resources/golden/decimal_join-0-4668e9dee2cd7a32f2b7311d7cd35508
similarity index 100%
rename from sql/hive/src/test/resources/golden/database_drop-9-8db536f925bf0f5058f97897e145a661
rename to sql/hive/src/test/resources/golden/decimal_join-0-4668e9dee2cd7a32f2b7311d7cd35508
diff --git a/sql/hive/src/test/resources/golden/date_1-0-23edf29bf7376c70d5ecf12720f4b1eb b/sql/hive/src/test/resources/golden/decimal_join-1-5098974222b22a21ed847c7906df9313
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-0-23edf29bf7376c70d5ecf12720f4b1eb
rename to sql/hive/src/test/resources/golden/decimal_join-1-5098974222b22a21ed847c7906df9313
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-3-42cb35d680b3caeeb22e1c4865b8264b b/sql/hive/src/test/resources/golden/decimal_precision-3-42cb35d680b3caeeb22e1c4865b8264b
deleted file mode 100644
index 3d9e792183f3c..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_precision-3-42cb35d680b3caeeb22e1c4865b8264b
+++ /dev/null
@@ -1,75 +0,0 @@
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
--99999999999999999999999999999999999999
--999999999999999999999999999999999999
--99999999999999999999999999999999999
--0.0000000000000000000000000000000000001
-0
-0.0000000000000000000000000000000000001
-0.123456789012345
-0.1234567890123456789012345678901234578
-1.234567890123456
-1.2345678901234567890123456789012345678
-12.34567890123456
-12.345678901234567890123456789012345678
-123.4567890123456
-123.45678901234567890123456789012345678
-1234.567890123456
-1234.5678901234567890123456789012345678
-12345.67890123456
-12345.678901234567890123456789012345678
-123456.7890123456
-123456.78901234567890123456789012345678
-1234567.890123456
-1234567.8901234567890123456789012345678
-12345678.90123456
-12345678.901234567890123456789012345678
-123456789.0123456
-123456789.01234567890123456789012345678
-1234567890.123456
-1234567890.1234567890123456789012345678
-12345678901.23456
-12345678901.234567890123456789012345678
-123456789012.3456
-123456789012.34567890123456789012345678
-1234567890123.456
-1234567890123.4567890123456789012345678
-12345678901234.56
-12345678901234.567890123456789012345678
-123456789012345.6
-123456789012345.67890123456789012345678
-1234567890123456.7890123456789012345678
-12345678901234567.890123456789012345678
-123456789012345678.90123456789012345678
-1234567890123456789.0123456789012345678
-12345678901234567890.123456789012345678
-123456789012345678901.23456789012345678
-1234567890123456789012.3456789012345678
-12345678901234567890123.456789012345678
-123456789012345678901234.56789012345678
-1234567890123456789012345.6789012345678
-12345678901234567890123456.789012345678
-123456789012345678901234567.89012345678
-1234567890123456789012345678.9012345678
-12345678901234567890123456789.012345678
-123456789012345678901234567890.12345678
-1234567890123456789012345678901.2345678
-12345678901234567890123456789012.345678
-123456789012345678901234567890123.45678
-1234567890123456789012345678901234.5678
-12345678901234567890123456789012345.678
-99999999999999999999999999999999999
-123456789012345678901234567890123456.78
-999999999999999999999999999999999999
-12345678901234567890123456789012345678
-12345678901234567890123456789012345678
-12345678901234567890123456789012345678
-12345678901234567890123456789012345678
-12345678901234567890123456789012345678
-99999999999999999999999999999999999999
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-4-38aaeba3e587b4dac72e26c4b02029fc b/sql/hive/src/test/resources/golden/decimal_precision-4-38aaeba3e587b4dac72e26c4b02029fc
deleted file mode 100644
index 9853ce72ed8c3..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_precision-4-38aaeba3e587b4dac72e26c4b02029fc
+++ /dev/null
@@ -1,75 +0,0 @@
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
--99999999999999999999999999999999999999	-99999999999999999999999999999999999998	NULL
--999999999999999999999999999999999999	-999999999999999999999999999999999998	-1000000000000000000000000000000000000
--99999999999999999999999999999999999	-99999999999999999999999999999999998	-100000000000000000000000000000000000
--0.0000000000000000000000000000000000001	0.9999999999999999999999999999999999999	-1.0000000000000000000000000000000000001
-0	1	-1
-0.0000000000000000000000000000000000001	1.0000000000000000000000000000000000001	-0.9999999999999999999999999999999999999
-0.123456789012345	1.123456789012345	-0.876543210987655
-0.1234567890123456789012345678901234578	1.1234567890123456789012345678901234578	-0.8765432109876543210987654321098765422
-1.234567890123456	2.234567890123456	0.234567890123456
-1.2345678901234567890123456789012345678	2.2345678901234567890123456789012345678	0.2345678901234567890123456789012345678
-12.34567890123456	13.34567890123456	11.34567890123456
-12.345678901234567890123456789012345678	13.345678901234567890123456789012345678	11.345678901234567890123456789012345678
-123.4567890123456	124.4567890123456	122.4567890123456
-123.45678901234567890123456789012345678	124.45678901234567890123456789012345678	122.45678901234567890123456789012345678
-1234.567890123456	1235.567890123456	1233.567890123456
-1234.5678901234567890123456789012345678	1235.5678901234567890123456789012345678	1233.5678901234567890123456789012345678
-12345.67890123456	12346.67890123456	12344.67890123456
-12345.678901234567890123456789012345678	12346.678901234567890123456789012345678	12344.678901234567890123456789012345678
-123456.7890123456	123457.7890123456	123455.7890123456
-123456.78901234567890123456789012345678	123457.78901234567890123456789012345678	123455.78901234567890123456789012345678
-1234567.890123456	1234568.890123456	1234566.890123456
-1234567.8901234567890123456789012345678	1234568.8901234567890123456789012345678	1234566.8901234567890123456789012345678
-12345678.90123456	12345679.90123456	12345677.90123456
-12345678.901234567890123456789012345678	12345679.901234567890123456789012345678	12345677.901234567890123456789012345678
-123456789.0123456	123456790.0123456	123456788.0123456
-123456789.01234567890123456789012345678	123456790.01234567890123456789012345678	123456788.01234567890123456789012345678
-1234567890.123456	1234567891.123456	1234567889.123456
-1234567890.1234567890123456789012345678	1234567891.1234567890123456789012345678	1234567889.1234567890123456789012345678
-12345678901.23456	12345678902.23456	12345678900.23456
-12345678901.234567890123456789012345678	12345678902.234567890123456789012345678	12345678900.234567890123456789012345678
-123456789012.3456	123456789013.3456	123456789011.3456
-123456789012.34567890123456789012345678	123456789013.34567890123456789012345678	123456789011.34567890123456789012345678
-1234567890123.456	1234567890124.456	1234567890122.456
-1234567890123.4567890123456789012345678	1234567890124.4567890123456789012345678	1234567890122.4567890123456789012345678
-12345678901234.56	12345678901235.56	12345678901233.56
-12345678901234.567890123456789012345678	12345678901235.567890123456789012345678	12345678901233.567890123456789012345678
-123456789012345.6	123456789012346.6	123456789012344.6
-123456789012345.67890123456789012345678	123456789012346.67890123456789012345678	123456789012344.67890123456789012345678
-1234567890123456.7890123456789012345678	1234567890123457.7890123456789012345678	1234567890123455.7890123456789012345678
-12345678901234567.890123456789012345678	12345678901234568.890123456789012345678	12345678901234566.890123456789012345678
-123456789012345678.90123456789012345678	123456789012345679.90123456789012345678	123456789012345677.90123456789012345678
-1234567890123456789.0123456789012345678	1234567890123456790.0123456789012345678	1234567890123456788.0123456789012345678
-12345678901234567890.123456789012345678	12345678901234567891.123456789012345678	12345678901234567889.123456789012345678
-123456789012345678901.23456789012345678	123456789012345678902.23456789012345678	123456789012345678900.23456789012345678
-1234567890123456789012.3456789012345678	1234567890123456789013.3456789012345678	1234567890123456789011.3456789012345678
-12345678901234567890123.456789012345678	12345678901234567890124.456789012345678	12345678901234567890122.456789012345678
-123456789012345678901234.56789012345678	123456789012345678901235.56789012345678	123456789012345678901233.56789012345678
-1234567890123456789012345.6789012345678	1234567890123456789012346.6789012345678	1234567890123456789012344.6789012345678
-12345678901234567890123456.789012345678	12345678901234567890123457.789012345678	12345678901234567890123455.789012345678
-123456789012345678901234567.89012345678	123456789012345678901234568.89012345678	123456789012345678901234566.89012345678
-1234567890123456789012345678.9012345678	1234567890123456789012345679.9012345678	1234567890123456789012345677.9012345678
-12345678901234567890123456789.012345678	12345678901234567890123456790.012345678	12345678901234567890123456788.012345678
-123456789012345678901234567890.12345678	123456789012345678901234567891.12345678	123456789012345678901234567889.12345678
-1234567890123456789012345678901.2345678	1234567890123456789012345678902.2345678	1234567890123456789012345678900.2345678
-12345678901234567890123456789012.345678	12345678901234567890123456789013.345678	12345678901234567890123456789011.345678
-123456789012345678901234567890123.45678	123456789012345678901234567890124.45678	123456789012345678901234567890122.45678
-1234567890123456789012345678901234.5678	1234567890123456789012345678901235.5678	1234567890123456789012345678901233.5678
-12345678901234567890123456789012345.678	12345678901234567890123456789012346.678	12345678901234567890123456789012344.678
-99999999999999999999999999999999999	100000000000000000000000000000000000	99999999999999999999999999999999998
-123456789012345678901234567890123456.78	123456789012345678901234567890123457.78	123456789012345678901234567890123455.78
-999999999999999999999999999999999999	1000000000000000000000000000000000000	999999999999999999999999999999999998
-12345678901234567890123456789012345678	12345678901234567890123456789012345679	12345678901234567890123456789012345677
-12345678901234567890123456789012345678	12345678901234567890123456789012345679	12345678901234567890123456789012345677
-12345678901234567890123456789012345678	12345678901234567890123456789012345679	12345678901234567890123456789012345677
-12345678901234567890123456789012345678	12345678901234567890123456789012345679	12345678901234567890123456789012345677
-12345678901234567890123456789012345678	12345678901234567890123456789012345679	12345678901234567890123456789012345677
-99999999999999999999999999999999999999	NULL	99999999999999999999999999999999999998
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-5-bb27734245ecbd0511be91af21c3b9ef b/sql/hive/src/test/resources/golden/decimal_precision-5-bb27734245ecbd0511be91af21c3b9ef
deleted file mode 100644
index 7cc75c789dee1..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_precision-5-bb27734245ecbd0511be91af21c3b9ef
+++ /dev/null
@@ -1,75 +0,0 @@
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
-NULL	NULL	NULL
--99999999999999999999999999999999999999	NULL	-33333333333333333333333333333333333333
--999999999999999999999999999999999999	-1999999999999999999999999999999999998	-333333333333333333333333333333333333
--99999999999999999999999999999999999	-199999999999999999999999999999999998	-33333333333333333333333333333333333
--0.0000000000000000000000000000000000001	-0.0000000000000000000000000000000000002	0
-0	0	0
-0.0000000000000000000000000000000000001	0.0000000000000000000000000000000000002	0
-0.123456789012345	0.24691357802469	0.041152263004115
-0.1234567890123456789012345678901234578	0.2469135780246913578024691357802469156	0.0411522630041152263004115226300411526
-1.234567890123456	2.469135780246912	0.411522630041152
-1.2345678901234567890123456789012345678	2.4691357802469135780246913578024691356	0.4115226300411522630041152263004115226
-12.34567890123456	24.69135780246912	4.11522630041152
-12.345678901234567890123456789012345678	24.691357802469135780246913578024691356	4.115226300411522630041152263004115226
-123.4567890123456	246.9135780246912	41.1522630041152
-123.45678901234567890123456789012345678	246.91357802469135780246913578024691356	41.15226300411522630041152263004115226
-1234.567890123456	2469.135780246912	411.522630041152
-1234.5678901234567890123456789012345678	2469.1357802469135780246913578024691356	411.5226300411522630041152263004115226
-12345.67890123456	24691.35780246912	4115.22630041152
-12345.678901234567890123456789012345678	24691.357802469135780246913578024691356	4115.226300411522630041152263004115226
-123456.7890123456	246913.5780246912	41152.2630041152
-123456.78901234567890123456789012345678	246913.57802469135780246913578024691356	41152.26300411522630041152263004115226
-1234567.890123456	2469135.780246912	411522.630041152
-1234567.8901234567890123456789012345678	2469135.7802469135780246913578024691356	411522.6300411522630041152263004115226
-12345678.90123456	24691357.80246912	4115226.30041152
-12345678.901234567890123456789012345678	24691357.802469135780246913578024691356	4115226.300411522630041152263004115226
-123456789.0123456	246913578.0246912	41152263.0041152
-123456789.01234567890123456789012345678	246913578.02469135780246913578024691356	41152263.00411522630041152263004115226
-1234567890.123456	2469135780.246912	411522630.041152
-1234567890.1234567890123456789012345678	2469135780.2469135780246913578024691356	411522630.0411522630041152263004115226
-12345678901.23456	24691357802.46912	4115226300.41152
-12345678901.234567890123456789012345678	24691357802.469135780246913578024691356	4115226300.411522630041152263004115226
-123456789012.3456	246913578024.6912	41152263004.1152
-123456789012.34567890123456789012345678	246913578024.69135780246913578024691356	41152263004.11522630041152263004115226
-1234567890123.456	2469135780246.912	411522630041.152
-1234567890123.4567890123456789012345678	2469135780246.9135780246913578024691356	411522630041.1522630041152263004115226
-12345678901234.56	24691357802469.12	4115226300411.52
-12345678901234.567890123456789012345678	24691357802469.135780246913578024691356	4115226300411.522630041152263004115226
-123456789012345.6	246913578024691.2	41152263004115.2
-123456789012345.67890123456789012345678	246913578024691.35780246913578024691356	41152263004115.22630041152263004115226
-1234567890123456.7890123456789012345678	2469135780246913.5780246913578024691356	411522630041152.2630041152263004115226
-12345678901234567.890123456789012345678	24691357802469135.780246913578024691356	4115226300411522.630041152263004115226
-123456789012345678.90123456789012345678	246913578024691357.80246913578024691356	41152263004115226.30041152263004115226
-1234567890123456789.0123456789012345678	2469135780246913578.0246913578024691356	411522630041152263.0041152263004115226
-12345678901234567890.123456789012345678	24691357802469135780.246913578024691356	4115226300411522630.041152263004115226
-123456789012345678901.23456789012345678	246913578024691357802.46913578024691356	41152263004115226300.41152263004115226
-1234567890123456789012.3456789012345678	2469135780246913578024.6913578024691356	411522630041152263004.1152263004115226
-12345678901234567890123.456789012345678	24691357802469135780246.913578024691356	4115226300411522630041.152263004115226
-123456789012345678901234.56789012345678	246913578024691357802469.13578024691356	41152263004115226300411.52263004115226
-1234567890123456789012345.6789012345678	2469135780246913578024691.3578024691356	411522630041152263004115.2263004115226
-12345678901234567890123456.789012345678	24691357802469135780246913.578024691356	4115226300411522630041152.263004115226
-123456789012345678901234567.89012345678	246913578024691357802469135.78024691356	41152263004115226300411522.63004115226
-1234567890123456789012345678.9012345678	2469135780246913578024691357.8024691356	411522630041152263004115226.3004115226
-12345678901234567890123456789.012345678	24691357802469135780246913578.024691356	4115226300411522630041152263.004115226
-123456789012345678901234567890.12345678	246913578024691357802469135780.24691356	41152263004115226300411522630.04115226
-1234567890123456789012345678901.2345678	2469135780246913578024691357802.4691356	411522630041152263004115226300.4115226
-12345678901234567890123456789012.345678	24691357802469135780246913578024.691356	4115226300411522630041152263004.115226
-123456789012345678901234567890123.45678	246913578024691357802469135780246.91356	41152263004115226300411522630041.15226
-1234567890123456789012345678901234.5678	2469135780246913578024691357802469.1356	411522630041152263004115226300411.5226
-12345678901234567890123456789012345.678	24691357802469135780246913578024691.356	4115226300411522630041152263004115.226
-99999999999999999999999999999999999	199999999999999999999999999999999998	33333333333333333333333333333333333
-123456789012345678901234567890123456.78	246913578024691357802469135780246913.56	41152263004115226300411522630041152.26
-999999999999999999999999999999999999	1999999999999999999999999999999999998	333333333333333333333333333333333333
-12345678901234567890123456789012345678	24691357802469135780246913578024691356	4115226300411522630041152263004115226
-12345678901234567890123456789012345678	24691357802469135780246913578024691356	4115226300411522630041152263004115226
-12345678901234567890123456789012345678	24691357802469135780246913578024691356	4115226300411522630041152263004115226
-12345678901234567890123456789012345678	24691357802469135780246913578024691356	4115226300411522630041152263004115226
-12345678901234567890123456789012345678	24691357802469135780246913578024691356	4115226300411522630041152263004115226
-99999999999999999999999999999999999999	NULL	33333333333333333333333333333333333333
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-6-b2547e6ef33325b2da12ce91b57af21 b/sql/hive/src/test/resources/golden/decimal_precision-6-b2547e6ef33325b2da12ce91b57af21
deleted file mode 100644
index c40875630d1b2..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_precision-6-b2547e6ef33325b2da12ce91b57af21
+++ /dev/null
@@ -1,75 +0,0 @@
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
--99999999999999999999999999999999999999	-11111111111111111111111111111111111111
--999999999999999999999999999999999999	-111111111111111111111111111111111111
--99999999999999999999999999999999999	-11111111111111111111111111111111111
--0.0000000000000000000000000000000000001	0
-0	0
-0.0000000000000000000000000000000000001	0
-0.123456789012345	0.0137174210013716666666666666666666667
-0.1234567890123456789012345678901234578	0.0137174210013717421001371742100137175
-1.234567890123456	0.1371742100137173333333333333333333333
-1.2345678901234567890123456789012345678	0.1371742100137174210013717421001371742
-12.34567890123456	1.3717421001371733333333333333333333333
-12.345678901234567890123456789012345678	1.371742100137174210013717421001371742
-123.4567890123456	13.717421001371733333333333333333333333
-123.45678901234567890123456789012345678	13.71742100137174210013717421001371742
-1234.567890123456	137.17421001371733333333333333333333333
-1234.5678901234567890123456789012345678	137.1742100137174210013717421001371742
-12345.67890123456	1371.7421001371733333333333333333333333
-12345.678901234567890123456789012345678	1371.742100137174210013717421001371742
-123456.7890123456	13717.421001371733333333333333333333333
-123456.78901234567890123456789012345678	13717.42100137174210013717421001371742
-1234567.890123456	137174.21001371733333333333333333333333
-1234567.8901234567890123456789012345678	137174.2100137174210013717421001371742
-12345678.90123456	1371742.1001371733333333333333333333333
-12345678.901234567890123456789012345678	1371742.100137174210013717421001371742
-123456789.0123456	13717421.001371733333333333333333333333
-123456789.01234567890123456789012345678	13717421.00137174210013717421001371742
-1234567890.123456	137174210.01371733333333333333333333333
-1234567890.1234567890123456789012345678	137174210.0137174210013717421001371742
-12345678901.23456	1371742100.1371733333333333333333333333
-12345678901.234567890123456789012345678	1371742100.137174210013717421001371742
-123456789012.3456	13717421001.371733333333333333333333333
-123456789012.34567890123456789012345678	13717421001.37174210013717421001371742
-1234567890123.456	137174210013.71733333333333333333333333
-1234567890123.4567890123456789012345678	137174210013.7174210013717421001371742
-12345678901234.56	1371742100137.1733333333333333333333333
-12345678901234.567890123456789012345678	1371742100137.174210013717421001371742
-123456789012345.6	13717421001371.733333333333333333333333
-123456789012345.67890123456789012345678	13717421001371.74210013717421001371742
-1234567890123456.7890123456789012345678	137174210013717.4210013717421001371742
-12345678901234567.890123456789012345678	1371742100137174.210013717421001371742
-123456789012345678.90123456789012345678	13717421001371742.10013717421001371742
-1234567890123456789.0123456789012345678	137174210013717421.0013717421001371742
-12345678901234567890.123456789012345678	1371742100137174210.013717421001371742
-123456789012345678901.23456789012345678	13717421001371742100.13717421001371742
-1234567890123456789012.3456789012345678	137174210013717421001.3717421001371742
-12345678901234567890123.456789012345678	1371742100137174210013.717421001371742
-123456789012345678901234.56789012345678	13717421001371742100137.17421001371742
-1234567890123456789012345.6789012345678	137174210013717421001371.7421001371742
-12345678901234567890123456.789012345678	1371742100137174210013717.421001371742
-123456789012345678901234567.89012345678	13717421001371742100137174.21001371742
-1234567890123456789012345678.9012345678	137174210013717421001371742.1001371742
-12345678901234567890123456789.012345678	1371742100137174210013717421.001371742
-123456789012345678901234567890.12345678	13717421001371742100137174210.01371742
-1234567890123456789012345678901.2345678	137174210013717421001371742100.1371742
-12345678901234567890123456789012.345678	1371742100137174210013717421001.371742
-123456789012345678901234567890123.45678	13717421001371742100137174210013.71742
-1234567890123456789012345678901234.5678	137174210013717421001371742100137.1742
-12345678901234567890123456789012345.678	1371742100137174210013717421001371.742
-99999999999999999999999999999999999	11111111111111111111111111111111111
-123456789012345678901234567890123456.78	13717421001371742100137174210013717.42
-999999999999999999999999999999999999	111111111111111111111111111111111111
-12345678901234567890123456789012345678	1371742100137174210013717421001371742
-12345678901234567890123456789012345678	1371742100137174210013717421001371742
-12345678901234567890123456789012345678	1371742100137174210013717421001371742
-12345678901234567890123456789012345678	1371742100137174210013717421001371742
-12345678901234567890123456789012345678	1371742100137174210013717421001371742
-99999999999999999999999999999999999999	11111111111111111111111111111111111111
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-7-ee5b7767c7fbb8614bb4ef907e8737b7 b/sql/hive/src/test/resources/golden/decimal_precision-7-ee5b7767c7fbb8614bb4ef907e8737b7
deleted file mode 100644
index bd23d17293f79..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_precision-7-ee5b7767c7fbb8614bb4ef907e8737b7
+++ /dev/null
@@ -1,75 +0,0 @@
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
--99999999999999999999999999999999999999	-3703703703703703703703703703703703703.7
--999999999999999999999999999999999999	-37037037037037037037037037037037037
--99999999999999999999999999999999999	-3703703703703703703703703703703703.6667
--0.0000000000000000000000000000000000001	0
-0	0
-0.0000000000000000000000000000000000001	0
-0.123456789012345	0.0045724736671238888888888888888888889
-0.1234567890123456789012345678901234578	0.0045724736671239140333790580700045725
-1.234567890123456	0.0457247366712391111111111111111111111
-1.2345678901234567890123456789012345678	0.0457247366712391403337905807000457247
-12.34567890123456	0.4572473667123911111111111111111111111
-12.345678901234567890123456789012345678	0.4572473667123914033379058070004572473
-123.4567890123456	4.5724736671239111111111111111111111111
-123.45678901234567890123456789012345678	4.5724736671239140333790580700045724733
-1234.567890123456	45.724736671239111111111111111111111111
-1234.5678901234567890123456789012345678	45.724736671239140333790580700045724733
-12345.67890123456	457.24736671239111111111111111111111111
-12345.678901234567890123456789012345678	457.24736671239140333790580700045724733
-123456.7890123456	4572.4736671239111111111111111111111111
-123456.78901234567890123456789012345678	4572.4736671239140333790580700045724733
-1234567.890123456	45724.736671239111111111111111111111111
-1234567.8901234567890123456789012345678	45724.736671239140333790580700045724733
-12345678.90123456	457247.36671239111111111111111111111111
-12345678.901234567890123456789012345678	457247.36671239140333790580700045724733
-123456789.0123456	4572473.6671239111111111111111111111111
-123456789.01234567890123456789012345678	4572473.6671239140333790580700045724733
-1234567890.123456	45724736.671239111111111111111111111111
-1234567890.1234567890123456789012345678	45724736.671239140333790580700045724733
-12345678901.23456	457247366.71239111111111111111111111111
-12345678901.234567890123456789012345678	457247366.71239140333790580700045724733
-123456789012.3456	4572473667.1239111111111111111111111111
-123456789012.34567890123456789012345678	4572473667.1239140333790580700045724733
-1234567890123.456	45724736671.239111111111111111111111111
-1234567890123.4567890123456789012345678	45724736671.239140333790580700045724733
-12345678901234.56	457247366712.39111111111111111111111111
-12345678901234.567890123456789012345678	457247366712.39140333790580700045724733
-123456789012345.6	4572473667123.9111111111111111111111111
-123456789012345.67890123456789012345678	4572473667123.9140333790580700045724733
-1234567890123456.7890123456789012345678	45724736671239.140333790580700045724733
-12345678901234567.890123456789012345678	457247366712391.40333790580700045724733
-123456789012345678.90123456789012345678	4572473667123914.0333790580700045724733
-1234567890123456789.0123456789012345678	45724736671239140.333790580700045724733
-12345678901234567890.123456789012345678	457247366712391403.33790580700045724733
-123456789012345678901.23456789012345678	4572473667123914033.3790580700045724733
-1234567890123456789012.3456789012345678	45724736671239140333.790580700045724733
-12345678901234567890123.456789012345678	457247366712391403337.90580700045724733
-123456789012345678901234.56789012345678	4572473667123914033379.0580700045724733
-1234567890123456789012345.6789012345678	45724736671239140333790.580700045724733
-12345678901234567890123456.789012345678	457247366712391403337905.80700045724733
-123456789012345678901234567.89012345678	4572473667123914033379058.0700045724733
-1234567890123456789012345678.9012345678	45724736671239140333790580.700045724733
-12345678901234567890123456789.012345678	457247366712391403337905807.00045724733
-123456789012345678901234567890.12345678	4572473667123914033379058070.0045724733
-1234567890123456789012345678901.2345678	45724736671239140333790580700.045724733
-12345678901234567890123456789012.345678	457247366712391403337905807000.45724733
-123456789012345678901234567890123.45678	4572473667123914033379058070004.5724733
-1234567890123456789012345678901234.5678	45724736671239140333790580700045.724733
-12345678901234567890123456789012345.678	457247366712391403337905807000457.24733
-99999999999999999999999999999999999	3703703703703703703703703703703703.6667
-123456789012345678901234567890123456.78	4572473667123914033379058070004572.4733
-999999999999999999999999999999999999	37037037037037037037037037037037037
-12345678901234567890123456789012345678	457247366712391403337905807000457247.33
-12345678901234567890123456789012345678	457247366712391403337905807000457247.33
-12345678901234567890123456789012345678	457247366712391403337905807000457247.33
-12345678901234567890123456789012345678	457247366712391403337905807000457247.33
-12345678901234567890123456789012345678	457247366712391403337905807000457247.33
-99999999999999999999999999999999999999	3703703703703703703703703703703703703.7
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-8-6e6bd4655de124dc1fc482ce0d11930e b/sql/hive/src/test/resources/golden/decimal_precision-8-6e6bd4655de124dc1fc482ce0d11930e
deleted file mode 100644
index c1e0db0174c63..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_precision-8-6e6bd4655de124dc1fc482ce0d11930e
+++ /dev/null
@@ -1,75 +0,0 @@
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
-NULL	NULL
--99999999999999999999999999999999999999	NULL
--999999999999999999999999999999999999	NULL
--99999999999999999999999999999999999	NULL
--0.0000000000000000000000000000000000001	NULL
-0	0
-0.0000000000000000000000000000000000001	NULL
-0.123456789012345	0.015241578753238669120562399025
-0.1234567890123456789012345678901234578	NULL
-1.234567890123456	1.524157875323881726870921383936
-1.2345678901234567890123456789012345678	NULL
-12.34567890123456	152.4157875323881726870921383936
-12.345678901234567890123456789012345678	NULL
-123.4567890123456	15241.57875323881726870921383936
-123.45678901234567890123456789012345678	NULL
-1234.567890123456	1524157.875323881726870921383936
-1234.5678901234567890123456789012345678	NULL
-12345.67890123456	152415787.5323881726870921383936
-12345.678901234567890123456789012345678	NULL
-123456.7890123456	15241578753.23881726870921383936
-123456.78901234567890123456789012345678	NULL
-1234567.890123456	1524157875323.881726870921383936
-1234567.8901234567890123456789012345678	NULL
-12345678.90123456	152415787532388.1726870921383936
-12345678.901234567890123456789012345678	NULL
-123456789.0123456	15241578753238817.26870921383936
-123456789.01234567890123456789012345678	NULL
-1234567890.123456	1524157875323881726.870921383936
-1234567890.1234567890123456789012345678	NULL
-12345678901.23456	152415787532388172687.0921383936
-12345678901.234567890123456789012345678	NULL
-123456789012.3456	15241578753238817268709.21383936
-123456789012.34567890123456789012345678	NULL
-1234567890123.456	1524157875323881726870921.383936
-1234567890123.4567890123456789012345678	NULL
-12345678901234.56	152415787532388172687092138.3936
-12345678901234.567890123456789012345678	NULL
-123456789012345.6	15241578753238817268709213839.36
-123456789012345.67890123456789012345678	NULL
-1234567890123456.7890123456789012345678	NULL
-12345678901234567.890123456789012345678	NULL
-123456789012345678.90123456789012345678	NULL
-1234567890123456789.0123456789012345678	NULL
-12345678901234567890.123456789012345678	NULL
-123456789012345678901.23456789012345678	NULL
-1234567890123456789012.3456789012345678	NULL
-12345678901234567890123.456789012345678	NULL
-123456789012345678901234.56789012345678	NULL
-1234567890123456789012345.6789012345678	NULL
-12345678901234567890123456.789012345678	NULL
-123456789012345678901234567.89012345678	NULL
-1234567890123456789012345678.9012345678	NULL
-12345678901234567890123456789.012345678	NULL
-123456789012345678901234567890.12345678	NULL
-1234567890123456789012345678901.2345678	NULL
-12345678901234567890123456789012.345678	NULL
-123456789012345678901234567890123.45678	NULL
-1234567890123456789012345678901234.5678	NULL
-12345678901234567890123456789012345.678	NULL
-99999999999999999999999999999999999	NULL
-123456789012345678901234567890123456.78	NULL
-999999999999999999999999999999999999	NULL
-12345678901234567890123456789012345678	NULL
-12345678901234567890123456789012345678	NULL
-12345678901234567890123456789012345678	NULL
-12345678901234567890123456789012345678	NULL
-12345678901234567890123456789012345678	NULL
-99999999999999999999999999999999999999	NULL
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-9-e7b465fbeb49487b2a972a314e2c01ab b/sql/hive/src/test/resources/golden/decimal_precision-9-e7b465fbeb49487b2a972a314e2c01ab
deleted file mode 100644
index 81af0e4cd3ab8..0000000000000
--- a/sql/hive/src/test/resources/golden/decimal_precision-9-e7b465fbeb49487b2a972a314e2c01ab
+++ /dev/null
@@ -1 +0,0 @@
-NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/default_partition_name-1-9de8e5f66c536d4ace89c61759db829c b/sql/hive/src/test/resources/golden/default_partition_name-1-9de8e5f66c536d4ace89c61759db829c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/default_partition_name-1-9de8e5f66c536d4ace89c61759db829c
+++ b/sql/hive/src/test/resources/golden/default_partition_name-1-9de8e5f66c536d4ace89c61759db829c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/default_partition_name-3-a7047012b4bce0158edaafe5cf0a57be b/sql/hive/src/test/resources/golden/default_partition_name-3-a7047012b4bce0158edaafe5cf0a57be
index ded23df148827..3aa0e30600f3c 100644
--- a/sql/hive/src/test/resources/golden/default_partition_name-3-a7047012b4bce0158edaafe5cf0a57be
+++ b/sql/hive/src/test/resources/golden/default_partition_name-3-a7047012b4bce0158edaafe5cf0a57be
@@ -1 +1 @@
-ds=__HIVE_DEFAULT_PARTITION__
\ No newline at end of file
+ds=__HIVE_DEFAULT_PARTITION__
diff --git a/sql/hive/src/test/resources/golden/date_1-1-4ebe3571c13a8b0c03096fbd972b7f1b b/sql/hive/src/test/resources/golden/delimiter-1-121ff21e6931a82235de8301118cbed8
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-1-4ebe3571c13a8b0c03096fbd972b7f1b
rename to sql/hive/src/test/resources/golden/delimiter-1-121ff21e6931a82235de8301118cbed8
diff --git a/sql/hive/src/test/resources/golden/delimiter-2-d7137294d2e53ea6edc259943e4c6069 b/sql/hive/src/test/resources/golden/delimiter-2-d7137294d2e53ea6edc259943e4c6069
index f438072c76b5f..424a2fee06987 100644
--- a/sql/hive/src/test/resources/golden/delimiter-2-d7137294d2e53ea6edc259943e4c6069
+++ b/sql/hive/src/test/resources/golden/delimiter-2-d7137294d2e53ea6edc259943e4c6069
@@ -1,3 +1,3 @@
 35	40
 48	32
-100100	40
\ No newline at end of file
+100100	40
diff --git a/sql/hive/src/test/resources/golden/delimiter-3-176724f76343433a8f2e6131b12206d7 b/sql/hive/src/test/resources/golden/delimiter-3-176724f76343433a8f2e6131b12206d7
index f438072c76b5f..424a2fee06987 100644
--- a/sql/hive/src/test/resources/golden/delimiter-3-176724f76343433a8f2e6131b12206d7
+++ b/sql/hive/src/test/resources/golden/delimiter-3-176724f76343433a8f2e6131b12206d7
@@ -1,3 +1,3 @@
 35	40
 48	32
-100100	40
\ No newline at end of file
+100100	40
diff --git a/sql/hive/src/test/resources/golden/describe_comment_indent-1-5536eb772d43014b971c6da3a0c44904 b/sql/hive/src/test/resources/golden/describe_comment_indent-1-5536eb772d43014b971c6da3a0c44904
deleted file mode 100644
index d980efc81b947..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_comment_indent-1-5536eb772d43014b971c6da3a0c44904
+++ /dev/null
@@ -1,6 +0,0 @@
-col1                	int                 	col1 one line comment
-col2                	string              	col2                
-                    	                    	two lines comment
-col3                	string              	col3                
-                    	                    	three lines
-                    	                    	comment
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_comment_indent-2-91bffa09f4f2caaaa6864bf935c2ea94 b/sql/hive/src/test/resources/golden/describe_comment_indent-2-91bffa09f4f2caaaa6864bf935c2ea94
deleted file mode 100644
index 01b9151074b22..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_comment_indent-2-91bffa09f4f2caaaa6864bf935c2ea94
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-col1                	int                 	col1 one line comment
-col2                	string              	col2                
-                    	                    	two lines comment
-col3                	string              	col3                
-                    	                    	three lines
-                    	                    	comment
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 11:42:35 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/test_table	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	comment             	table comment\ntwo lines
-	transient_lastDdlTime	1389728555          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-0-1110d5212ef44ba8c7ac357fb2f4fc7b b/sql/hive/src/test/resources/golden/describe_database_json-0-1110d5212ef44ba8c7ac357fb2f4fc7b
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_database_json-0-1110d5212ef44ba8c7ac357fb2f4fc7b
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-11-b05391400dc31139998dc3abaaf86320 b/sql/hive/src/test/resources/golden/describe_database_json-11-b05391400dc31139998dc3abaaf86320
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_database_json-11-b05391400dc31139998dc3abaaf86320
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-2-8e7cfe3e6069e796124ca940125385a b/sql/hive/src/test/resources/golden/describe_database_json-2-8e7cfe3e6069e796124ca940125385a
deleted file mode 100644
index c56a79e4f322e..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_database_json-2-8e7cfe3e6069e796124ca940125385a
+++ /dev/null
@@ -1 +0,0 @@
-{"location":"file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/jsondb1","comment":"Test database","database":"jsondb1"}	 	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-3-d097973152d91fa8072facb0f739e304 b/sql/hive/src/test/resources/golden/describe_database_json-3-d097973152d91fa8072facb0f739e304
deleted file mode 100644
index aa08c38c68d1d..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_database_json-3-d097973152d91fa8072facb0f739e304
+++ /dev/null
@@ -1 +0,0 @@
-{"location":"file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/jsondb1","params":{"id":"jsondb1"},"comment":"Test database","database":"jsondb1"}	 	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-4-549981e00a3d95f03dd5a9ef6044aa20 b/sql/hive/src/test/resources/golden/describe_database_json-4-549981e00a3d95f03dd5a9ef6044aa20
deleted file mode 100644
index 513aeaab1dc66..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_database_json-4-549981e00a3d95f03dd5a9ef6044aa20
+++ /dev/null
@@ -1 +0,0 @@
-{"databases":["default","jsondb1"]}
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-5-a3ee372283f45479db3f2cd7ebeedc8c b/sql/hive/src/test/resources/golden/describe_database_json-5-a3ee372283f45479db3f2cd7ebeedc8c
deleted file mode 100644
index 606069d6291b4..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_database_json-5-a3ee372283f45479db3f2cd7ebeedc8c
+++ /dev/null
@@ -1 +0,0 @@
-{"databases":["jsondb1"]}
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-8-8e7cfe3e6069e796124ca940125385a b/sql/hive/src/test/resources/golden/describe_database_json-8-8e7cfe3e6069e796124ca940125385a
deleted file mode 100644
index bea7c01440c46..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_database_json-8-8e7cfe3e6069e796124ca940125385a
+++ /dev/null
@@ -1 +0,0 @@
-{"location":"file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/jsondb1.db","database":"jsondb1"}	 	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-9-d097973152d91fa8072facb0f739e304 b/sql/hive/src/test/resources/golden/describe_database_json-9-d097973152d91fa8072facb0f739e304
deleted file mode 100644
index bea7c01440c46..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_database_json-9-d097973152d91fa8072facb0f739e304
+++ /dev/null
@@ -1 +0,0 @@
-{"location":"file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/jsondb1.db","database":"jsondb1"}	 	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned-3-b80c7ae3530bfdbc8e865d49742da826 b/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned-3-b80c7ae3530bfdbc8e865d49742da826
index 381821184d693..1ed0de6860c08 100644
--- a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned-3-b80c7ae3530bfdbc8e865d49742da826
+++ b/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned-3-b80c7ae3530bfdbc8e865d49742da826
@@ -1,19 +1,19 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	int                 	None                
+key                 	int                 	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-value               	string              	None                
+value               	string              	                    
 	 	 
 # Detailed Partition Information	 	 
 Partition Value:    	[val_86]            	 
 Database:           	default             	 
 Table:              	view_partitioned    	 
-CreateTime:         	Fri Feb 07 15:09:16 PST 2014	 
+CreateTime:         	Tue Oct 21 01:26:15 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Location:           	null                	 
 Partition Parameters:	 	 
-	transient_lastDdlTime	1391814556          
+	transient_lastDdlTime	1413879975          
diff --git a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-4-b80c7ae3530bfdbc8e865d49742da826 b/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-4-b80c7ae3530bfdbc8e865d49742da826
deleted file mode 100644
index 7b51873776ad8..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-4-b80c7ae3530bfdbc8e865d49742da826
+++ /dev/null
@@ -1 +0,0 @@
-{"columns":[{"name":"key","type":"int"}]}	 	 
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-1-dbfaa12f26f99277b8397379189172cf b/sql/hive/src/test/resources/golden/describe_pretty-1-dbfaa12f26f99277b8397379189172cf
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-1-dbfaa12f26f99277b8397379189172cf
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-10-dbfaa12f26f99277b8397379189172cf b/sql/hive/src/test/resources/golden/describe_pretty-10-dbfaa12f26f99277b8397379189172cf
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-10-dbfaa12f26f99277b8397379189172cf
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-11-e98c50f4b5cdb6ba04df147c57e0b264 b/sql/hive/src/test/resources/golden/describe_pretty-11-e98c50f4b5cdb6ba04df147c57e0b264
deleted file mode 100644
index 35fac1b6f2579..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-11-e98c50f4b5cdb6ba04df147c57e0b264
+++ /dev/null
@@ -1,12 +0,0 @@
-col_name                          data_type     comment	 	 
-	 	 
-col1                              int           col1 one line comment	 	 
-col2_abcdefghiklmnopqrstuvxyz     string        col2	 	 
-                                                two lines comment	 	 
-col3                              string        col3	 	 
-                                                three lines	 	 
-                                                comment	 	 
-col4                              string        col4 very long comment that is	 	 
-                                                greater than 80 chars and is	 	 
-                                                likely to spill into multiple	 	 
-                                                lines	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-12-7d1e5e69d235a785fa3f0e099dd894c3 b/sql/hive/src/test/resources/golden/describe_pretty-12-7d1e5e69d235a785fa3f0e099dd894c3
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-12-7d1e5e69d235a785fa3f0e099dd894c3
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-13-e98c50f4b5cdb6ba04df147c57e0b264 b/sql/hive/src/test/resources/golden/describe_pretty-13-e98c50f4b5cdb6ba04df147c57e0b264
deleted file mode 100644
index b57f8955ca397..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-13-e98c50f4b5cdb6ba04df147c57e0b264
+++ /dev/null
@@ -1,24 +0,0 @@
-col_name                          data_type     comment	 	 
-	 	 
-col1                              int           col1 one	 	 
-                                                line	 	 
-                                                comment	 	 
-col2_abcdefghiklmnopqrstuvxyz     string        col2	 	 
-                                                two lines	 	 
-                                                comment	 	 
-col3                              string        col3	 	 
-                                                three	 	 
-                                                lines	 	 
-                                                comment	 	 
-col4                              string        col4 very	 	 
-                                                long	 	 
-                                                comment	 	 
-                                                that is	 	 
-                                                greater	 	 
-                                                than 80	 	 
-                                                chars and	 	 
-                                                is likely	 	 
-                                                to spill	 	 
-                                                into	 	 
-                                                multiple	 	 
-                                                lines	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-15-db4abe06aba81e685c52db6b43ba7c03 b/sql/hive/src/test/resources/golden/describe_pretty-15-db4abe06aba81e685c52db6b43ba7c03
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-15-db4abe06aba81e685c52db6b43ba7c03
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-16-ada55b65b72e0d65563ad6161e005f22 b/sql/hive/src/test/resources/golden/describe_pretty-16-ada55b65b72e0d65563ad6161e005f22
deleted file mode 100644
index 3b7fe3c133089..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-16-ada55b65b72e0d65563ad6161e005f22
+++ /dev/null
@@ -1,37 +0,0 @@
-col_name           data_type     comment	 	 
-	 	 
-col1               int           col1 one line comment	 	 
-col2               string        col2	 	 
-                                 two lines comment	 	 
-col3               string        col3	 	 
-                                 three lines	 	 
-                                 comment	 	 
-col4               string        col4 very long comment that	 	 
-                                 is greater than 80 chars	 	 
-                                 and is likely to spill into	 	 
-                                 multiple lines	 	 
-col5               string        col5 very long multi-line	 	 
-                                 comment where each line is	 	 
-                                 very long by itself and is	 	 
-                                 likely to spill	 	 
-                                 into multiple lines.  Lorem	 	 
-                                 ipsum dolor sit amet,	 	 
-                                 consectetur adipiscing	 	 
-                                 elit. Proin in dolor nisl,	 	 
-                                 sodales	 	 
-                                 adipiscing tortor. Integer	 	 
-                                 venenatis	 	 
-col6               string        This comment has a very	 	 
-                                 long single word ABCDEFGHIJ	 	 
-                                 KLMNOPQRSTUVXYZabcdefghijkl	 	 
-                                 mnopqrstuvzxyz123 which	 	 
-                                 will not fit in a line by	 	 
-                                 itself for small column	 	 
-                                 widths.	 	 
-col7_nocomment     string        None	 	 
-ds                 string        None	 	 
-	 	 
-# Partition Information	 	 
-col_name           data_type     comment	 	 
-	 	 
-ds                 string        None	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-2-713712e0f6f18144d1f3a522e9b98861 b/sql/hive/src/test/resources/golden/describe_pretty-2-713712e0f6f18144d1f3a522e9b98861
deleted file mode 100644
index 49175da27357f..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-2-713712e0f6f18144d1f3a522e9b98861
+++ /dev/null
@@ -1,23 +0,0 @@
-col_name           data_type     comment	 	 
-	 	 
-col1               int           col1 one line comment	 	 
-col2               string        col2	 	 
-                                 two lines comment	 	 
-col3               string        col3	 	 
-                                 three lines	 	 
-                                 comment	 	 
-col4               string        col4 very long comment that is greater than 80	 	 
-                                 chars and is likely to spill into multiple	 	 
-                                 lines	 	 
-col5               string        col5 very long multi-line comment where each	 	 
-                                 line is very long by itself and is likely to	 	 
-                                 spill	 	 
-                                 into multiple lines.  Lorem ipsum dolor sit	 	 
-                                 amet, consectetur adipiscing elit. Proin in	 	 
-                                 dolor nisl, sodales	 	 
-                                 adipiscing tortor. Integer venenatis	 	 
-col6               string        This comment has a very long single word ABCDEF	 	 
-                                 GHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvzxyz12	 	 
-                                 3 which will not fit in a line by itself for	 	 
-                                 small column widths.	 	 
-col7_nocomment     string        None	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-3-87c1f2148ecdc0d77eecb72b0268d4b4 b/sql/hive/src/test/resources/golden/describe_pretty-3-87c1f2148ecdc0d77eecb72b0268d4b4
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-3-87c1f2148ecdc0d77eecb72b0268d4b4
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-4-1546db18568697fa68a7fc781802d255 b/sql/hive/src/test/resources/golden/describe_pretty-4-1546db18568697fa68a7fc781802d255
deleted file mode 100644
index c3e77e079a9d4..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-4-1546db18568697fa68a7fc781802d255
+++ /dev/null
@@ -1,14 +0,0 @@
-col_name           data_type     comment	 	 
-	 	 
-col1               int           col1 one line comment	 	 
-col2               string        col2	 	 
-                                 two lines comment	 	 
-col3               string        col3	 	 
-                                 three lines	 	 
-                                 comment	 	 
-col4               string        col4 very long comment that is greater than 80 chars and is likely to spill into multiple lines	 	 
-col5               string        col5 very long multi-line comment where each line is very long by itself and is likely to spill	 	 
-                                 into multiple lines.  Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin in dolor nisl, sodales	 	 
-                                 adipiscing tortor. Integer venenatis	 	 
-col6               string        This comment has a very long single word ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvzxyz123 which will not fit in a line by itself for small column widths.	 	 
-col7_nocomment     string        None	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-5-ce1966d8626096821b404ab8745c4914 b/sql/hive/src/test/resources/golden/describe_pretty-5-ce1966d8626096821b404ab8745c4914
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-5-ce1966d8626096821b404ab8745c4914
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-6-1546db18568697fa68a7fc781802d255 b/sql/hive/src/test/resources/golden/describe_pretty-6-1546db18568697fa68a7fc781802d255
deleted file mode 100644
index 452f75a11fdd0..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-6-1546db18568697fa68a7fc781802d255
+++ /dev/null
@@ -1,44 +0,0 @@
-col_name           data_type     comment	 	 
-	 	 
-col1               int           col1 one line	 	 
-                                 comment	 	 
-col2               string        col2	 	 
-                                 two lines comment	 	 
-col3               string        col3	 	 
-                                 three lines	 	 
-                                 comment	 	 
-col4               string        col4 very long	 	 
-                                 comment that is	 	 
-                                 greater than 80	 	 
-                                 chars and is	 	 
-                                 likely to spill	 	 
-                                 into multiple	 	 
-                                 lines	 	 
-col5               string        col5 very long	 	 
-                                 multi-line	 	 
-                                 comment where	 	 
-                                 each line is very	 	 
-                                 long by itself	 	 
-                                 and is likely to	 	 
-                                 spill	 	 
-                                 into multiple	 	 
-                                 lines.  Lorem	 	 
-                                 ipsum dolor sit	 	 
-                                 amet, consectetur	 	 
-                                 adipiscing elit.	 	 
-                                 Proin in dolor	 	 
-                                 nisl, sodales	 	 
-                                 adipiscing	 	 
-                                 tortor. Integer	 	 
-                                 venenatis	 	 
-col6               string        This comment has	 	 
-                                 a very long	 	 
-                                 single word ABCDE	 	 
-                                 FGHIJKLMNOPQRSTUV	 	 
-                                 XYZabcdefghijklmn	 	 
-                                 opqrstuvzxyz123	 	 
-                                 which will not	 	 
-                                 fit in a line by	 	 
-                                 itself for small	 	 
-                                 column widths.	 	 
-col7_nocomment     string        None	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-7-db4abe06aba81e685c52db6b43ba7c03 b/sql/hive/src/test/resources/golden/describe_pretty-7-db4abe06aba81e685c52db6b43ba7c03
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-7-db4abe06aba81e685c52db6b43ba7c03
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-8-1546db18568697fa68a7fc781802d255 b/sql/hive/src/test/resources/golden/describe_pretty-8-1546db18568697fa68a7fc781802d255
deleted file mode 100644
index ee5a10c85057a..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_pretty-8-1546db18568697fa68a7fc781802d255
+++ /dev/null
@@ -1,31 +0,0 @@
-col_name           data_type     comment	 	 
-	 	 
-col1               int           col1 one line comment	 	 
-col2               string        col2	 	 
-                                 two lines comment	 	 
-col3               string        col3	 	 
-                                 three lines	 	 
-                                 comment	 	 
-col4               string        col4 very long comment that	 	 
-                                 is greater than 80 chars	 	 
-                                 and is likely to spill into	 	 
-                                 multiple lines	 	 
-col5               string        col5 very long multi-line	 	 
-                                 comment where each line is	 	 
-                                 very long by itself and is	 	 
-                                 likely to spill	 	 
-                                 into multiple lines.  Lorem	 	 
-                                 ipsum dolor sit amet,	 	 
-                                 consectetur adipiscing	 	 
-                                 elit. Proin in dolor nisl,	 	 
-                                 sodales	 	 
-                                 adipiscing tortor. Integer	 	 
-                                 venenatis	 	 
-col6               string        This comment has a very	 	 
-                                 long single word ABCDEFGHIJ	 	 
-                                 KLMNOPQRSTUVXYZabcdefghijkl	 	 
-                                 mnopqrstuvzxyz123 which	 	 
-                                 will not fit in a line by	 	 
-                                 itself for small column	 	 
-                                 widths.	 	 
-col7_nocomment     string        None	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-10-2d15bc50701f8f3c64ec48bd03a23ac5 b/sql/hive/src/test/resources/golden/describe_syntax-10-2d15bc50701f8f3c64ec48bd03a23ac5
deleted file mode 100644
index 4184ce21dc079..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-10-2d15bc50701f8f3c64ec48bd03a23ac5
+++ /dev/null
@@ -1,12 +0,0 @@
-key1                	int                 	None                
-value1              	string              	None                
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:t1, dbName:db1, owner:marmbrus, createTime:1389728588, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key1, type:int, comment:null), FieldSchema(name:value1, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:part, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/t1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:part, type:string, comment:null)], parameters:{transient_lastDdlTime=1389728588}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-11-ab161e38c5d66b6c344c8372160ac74f b/sql/hive/src/test/resources/golden/describe_syntax-11-ab161e38c5d66b6c344c8372160ac74f
deleted file mode 100644
index c94d6dcb90042..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-11-ab161e38c5d66b6c344c8372160ac74f
+++ /dev/null
@@ -1,33 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key1                	int                 	None                
-value1              	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	db1                 	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 11:43:08 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/t1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1389728588          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-12-90c7890e1aa28e94520f35f5679560a4 b/sql/hive/src/test/resources/golden/describe_syntax-12-90c7890e1aa28e94520f35f5679560a4
deleted file mode 100644
index 0dea48c260ab2..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-12-90c7890e1aa28e94520f35f5679560a4
+++ /dev/null
@@ -1 +0,0 @@
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-13-7c1216f9852d454bf93256e5a2588758 b/sql/hive/src/test/resources/golden/describe_syntax-13-7c1216f9852d454bf93256e5a2588758
deleted file mode 100644
index 0dea48c260ab2..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-13-7c1216f9852d454bf93256e5a2588758
+++ /dev/null
@@ -1 +0,0 @@
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-14-d167187f54ec60c25f5f7e1f2f2afee9 b/sql/hive/src/test/resources/golden/describe_syntax-14-d167187f54ec60c25f5f7e1f2f2afee9
deleted file mode 100644
index f3d242157dd98..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-14-d167187f54ec60c25f5f7e1f2f2afee9
+++ /dev/null
@@ -1,3 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-15-e420b255509153b3326f00dcd25d11e4 b/sql/hive/src/test/resources/golden/describe_syntax-15-e420b255509153b3326f00dcd25d11e4
deleted file mode 100644
index 0dea48c260ab2..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-15-e420b255509153b3326f00dcd25d11e4
+++ /dev/null
@@ -1 +0,0 @@
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-16-5043ee273a313d28adeca74fd33739a7 b/sql/hive/src/test/resources/golden/describe_syntax-16-5043ee273a313d28adeca74fd33739a7
deleted file mode 100644
index 0dea48c260ab2..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-16-5043ee273a313d28adeca74fd33739a7
+++ /dev/null
@@ -1 +0,0 @@
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-17-c97a9e691cc08199678ead7f79d58b58 b/sql/hive/src/test/resources/golden/describe_syntax-17-c97a9e691cc08199678ead7f79d58b58
deleted file mode 100644
index f3d242157dd98..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-17-c97a9e691cc08199678ead7f79d58b58
+++ /dev/null
@@ -1,3 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-18-69cecdc1889b7feb40b46c95524aaad4 b/sql/hive/src/test/resources/golden/describe_syntax-18-69cecdc1889b7feb40b46c95524aaad4
deleted file mode 100644
index 0dea48c260ab2..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-18-69cecdc1889b7feb40b46c95524aaad4
+++ /dev/null
@@ -1 +0,0 @@
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-19-c1c3359705e256d7641bbffab00c43fa b/sql/hive/src/test/resources/golden/describe_syntax-19-c1c3359705e256d7641bbffab00c43fa
deleted file mode 100644
index 0dea48c260ab2..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-19-c1c3359705e256d7641bbffab00c43fa
+++ /dev/null
@@ -1 +0,0 @@
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-20-781dd449a9b26dc54f3bba8e5178cd8a b/sql/hive/src/test/resources/golden/describe_syntax-20-781dd449a9b26dc54f3bba8e5178cd8a
deleted file mode 100644
index f3d242157dd98..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-20-781dd449a9b26dc54f3bba8e5178cd8a
+++ /dev/null
@@ -1,3 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key1                	int                 	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-21-5bac87eeb7e71928d01275b006720de3 b/sql/hive/src/test/resources/golden/describe_syntax-21-5bac87eeb7e71928d01275b006720de3
deleted file mode 100644
index 4f76eaca6cd8b..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-21-5bac87eeb7e71928d01275b006720de3
+++ /dev/null
@@ -1,10 +0,0 @@
-key1                	int                 	None                
-value1              	string              	None                
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-22-719a15ffd0018bb2898f9045be886e0f b/sql/hive/src/test/resources/golden/describe_syntax-22-719a15ffd0018bb2898f9045be886e0f
deleted file mode 100644
index aa25ca5a29dd3..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-22-719a15ffd0018bb2898f9045be886e0f
+++ /dev/null
@@ -1,12 +0,0 @@
-key1                	int                 	None                
-value1              	string              	None                
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-Detailed Partition Information	Partition(values:[4, 5], dbName:db1, tableName:t1, createTime:1389728588, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key1, type:int, comment:null), FieldSchema(name:value1, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:part, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/t1/ds=4/part=5, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1389728588})	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-23-2b2f2f068fe8e8fcbe10d11506804e49 b/sql/hive/src/test/resources/golden/describe_syntax-23-2b2f2f068fe8e8fcbe10d11506804e49
deleted file mode 100644
index 311870f6ad6b0..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-23-2b2f2f068fe8e8fcbe10d11506804e49
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key1                	int                 	None                
-value1              	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[4, 5]              	 
-Database:           	db1                 	 
-Table:              	t1                  	 
-CreateTime:         	Tue Jan 14 11:43:08 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/t1/ds=4/part=5	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1389728588          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-24-ee226b42db35b219702319858e925468 b/sql/hive/src/test/resources/golden/describe_syntax-24-ee226b42db35b219702319858e925468
deleted file mode 100644
index 4f76eaca6cd8b..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-24-ee226b42db35b219702319858e925468
+++ /dev/null
@@ -1,10 +0,0 @@
-key1                	int                 	None                
-value1              	string              	None                
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-25-b6e10514fb473803c44bc793b9f9713e b/sql/hive/src/test/resources/golden/describe_syntax-25-b6e10514fb473803c44bc793b9f9713e
deleted file mode 100644
index aa25ca5a29dd3..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-25-b6e10514fb473803c44bc793b9f9713e
+++ /dev/null
@@ -1,12 +0,0 @@
-key1                	int                 	None                
-value1              	string              	None                
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-Detailed Partition Information	Partition(values:[4, 5], dbName:db1, tableName:t1, createTime:1389728588, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key1, type:int, comment:null), FieldSchema(name:value1, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:part, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/t1/ds=4/part=5, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1389728588})	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-26-4851dc65e26ec33c605ab1ed65b59bec b/sql/hive/src/test/resources/golden/describe_syntax-26-4851dc65e26ec33c605ab1ed65b59bec
deleted file mode 100644
index 311870f6ad6b0..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-26-4851dc65e26ec33c605ab1ed65b59bec
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key1                	int                 	None                
-value1              	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[4, 5]              	 
-Database:           	db1                 	 
-Table:              	t1                  	 
-CreateTime:         	Tue Jan 14 11:43:08 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/t1/ds=4/part=5	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1389728588          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-6-74668fbe18bbf3d6323e052ef2b4ca85 b/sql/hive/src/test/resources/golden/describe_syntax-6-74668fbe18bbf3d6323e052ef2b4ca85
deleted file mode 100644
index 4f76eaca6cd8b..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-6-74668fbe18bbf3d6323e052ef2b4ca85
+++ /dev/null
@@ -1,10 +0,0 @@
-key1                	int                 	None                
-value1              	string              	None                
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-7-d67ccae0a3893b4b91b0d4f1bd73b66d b/sql/hive/src/test/resources/golden/describe_syntax-7-d67ccae0a3893b4b91b0d4f1bd73b66d
deleted file mode 100644
index 4184ce21dc079..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-7-d67ccae0a3893b4b91b0d4f1bd73b66d
+++ /dev/null
@@ -1,12 +0,0 @@
-key1                	int                 	None                
-value1              	string              	None                
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:t1, dbName:db1, owner:marmbrus, createTime:1389728588, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key1, type:int, comment:null), FieldSchema(name:value1, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:part, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/t1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:part, type:string, comment:null)], parameters:{transient_lastDdlTime=1389728588}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-8-9e40f8077f34fa35fccfeae972e8c4 b/sql/hive/src/test/resources/golden/describe_syntax-8-9e40f8077f34fa35fccfeae972e8c4
deleted file mode 100644
index c94d6dcb90042..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-8-9e40f8077f34fa35fccfeae972e8c4
+++ /dev/null
@@ -1,33 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key1                	int                 	None                
-value1              	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	db1                 	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 11:43:08 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/db1.db/t1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1389728588          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-9-fb744775fb30d92297534d29b6eafd6b b/sql/hive/src/test/resources/golden/describe_syntax-9-fb744775fb30d92297534d29b6eafd6b
deleted file mode 100644
index 4f76eaca6cd8b..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_syntax-9-fb744775fb30d92297534d29b6eafd6b
+++ /dev/null
@@ -1,10 +0,0 @@
-key1                	int                 	None                
-value1              	string              	None                
-ds                  	string              	None                
-part                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-part                	string              	None                
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-0-1110d5212ef44ba8c7ac357fb2f4fc7b b/sql/hive/src/test/resources/golden/describe_table_json-0-1110d5212ef44ba8c7ac357fb2f4fc7b
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_table_json-0-1110d5212ef44ba8c7ac357fb2f4fc7b
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-2-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/describe_table_json-2-9c36cac1372650b703400c60dd29042c
deleted file mode 100644
index f8bc404bf7308..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_table_json-2-9c36cac1372650b703400c60dd29042c
+++ /dev/null
@@ -1 +0,0 @@
-{"tables":["jsontable","src","srcpart"]}
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-3-576670af142149302decb9bf8662e68a b/sql/hive/src/test/resources/golden/describe_table_json-3-576670af142149302decb9bf8662e68a
deleted file mode 100644
index 5895645dbbb50..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_table_json-3-576670af142149302decb9bf8662e68a
+++ /dev/null
@@ -1 +0,0 @@
-{"tables":["jsontable"]}
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-4-4a57591b392bb8fd18238d068d191721 b/sql/hive/src/test/resources/golden/describe_table_json-4-4a57591b392bb8fd18238d068d191721
deleted file mode 100644
index 353bf2df92f18..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_table_json-4-4a57591b392bb8fd18238d068d191721
+++ /dev/null
@@ -1 +0,0 @@
-{"tables":[]}
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-6-ac49d26a0211b804fee89bbe0808f430 b/sql/hive/src/test/resources/golden/describe_table_json-6-ac49d26a0211b804fee89bbe0808f430
deleted file mode 100644
index 96c1178ae6eab..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_table_json-6-ac49d26a0211b804fee89bbe0808f430
+++ /dev/null
@@ -1 +0,0 @@
-{"columns":[{"name":"key","type":"int"},{"name":"value","type":"string"}]}	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-7-24552fd8c4b7d9d460a8ea25eb2d6e30 b/sql/hive/src/test/resources/golden/describe_table_json-7-24552fd8c4b7d9d460a8ea25eb2d6e30
deleted file mode 100644
index 4cf10d1d762b0..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_table_json-7-24552fd8c4b7d9d460a8ea25eb2d6e30
+++ /dev/null
@@ -1 +0,0 @@
-{"columns":[{"name":"key","type":"int"},{"name":"value","type":"string"}],"tableInfo":{"owner":"marmbrus","parameters":{"id":"jsontable","last_modified_by":"marmbrus","last_modified_time":"1389728616","transient_lastDdlTime":"1389728616","comment":"json table"},"createTime":1389728615,"dbName":"default","tableName":"jsontable","privileges":null,"tableType":"MANAGED_TABLE","sd":{"location":"file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/jsontable","parameters":{},"inputFormat":"org.apache.hadoop.mapred.TextInputFormat","outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","compressed":false,"cols":[{"name":"key","type":"int","comment":null,"setName":true,"setType":true,"setComment":false},{"name":"value","type":"string","comment":null,"setName":true,"setType":true,"setComment":false}],"serdeInfo":{"name":null,"parameters":{"serialization.format":"1"},"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","setName":false,"parametersSize":1,"setParameters":true,"setSerializationLib":true},"colsSize":2,"skewedInfo":{"skewedColNames":[],"skewedColValues":[],"skewedColValueLocationMaps":{},"skewedColNamesSize":0,"skewedColNamesIterator":[],"setSkewedColNames":true,"skewedColValuesSize":0,"skewedColValuesIterator":[],"setSkewedColValues":true,"skewedColValueLocationMapsSize":0,"setSkewedColValueLocationMaps":true},"bucketCols":[],"numBuckets":-1,"sortCols":[],"storedAsSubDirectories":false,"setSkewedInfo":true,"parametersSize":0,"setParameters":true,"colsIterator":[{"name":"key","type":"int","comment":null,"setName":true,"setType":true,"setComment":false},{"name":"value","type":"string","comment":null,"setName":true,"setType":true,"setComment":false}],"setCols":true,"setLocation":true,"setInputFormat":true,"setOutputFormat":true,"setCompressed":true,"setNumBuckets":true,"setSerdeInfo":true,"bucketColsSize":0,"bucketColsIterator":[],"setBucketCols":true,"sortColsSize":0,"sortColsIterator":[],"setSortCols":true,"setStoredAsSubDirectories":true},"partitionKeys":[],"viewOriginalText":null,"lastAccessTime":0,"retention":0,"viewExpandedText":null,"partitionKeysSize":0,"setTableType":true,"setTableName":true,"setDbName":true,"setOwner":true,"setCreateTime":true,"setLastAccessTime":true,"setRetention":true,"setSd":true,"partitionKeysIterator":[],"setPartitionKeys":true,"parametersSize":5,"setParameters":true,"setViewOriginalText":false,"setViewExpandedText":false,"setPrivileges":false}}	 	 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-9-b05391400dc31139998dc3abaaf86320 b/sql/hive/src/test/resources/golden/describe_table_json-9-b05391400dc31139998dc3abaaf86320
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/describe_table_json-9-b05391400dc31139998dc3abaaf86320
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/diff_part_input_formats-4-a4890f2b20715c75e05c674d9155a5b b/sql/hive/src/test/resources/golden/diff_part_input_formats-4-a4890f2b20715c75e05c674d9155a5b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/diff_part_input_formats-4-a4890f2b20715c75e05c674d9155a5b
+++ b/sql/hive/src/test/resources/golden/diff_part_input_formats-4-a4890f2b20715c75e05c674d9155a5b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/disable_file_format_check-0-bd9d00e3ffcaea450a3cc8d0ba6f865c b/sql/hive/src/test/resources/golden/disable_file_format_check-0-bd9d00e3ffcaea450a3cc8d0ba6f865c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/disable_file_format_check-0-bd9d00e3ffcaea450a3cc8d0ba6f865c
+++ b/sql/hive/src/test/resources/golden/disable_file_format_check-0-bd9d00e3ffcaea450a3cc8d0ba6f865c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/date_1-16-23edf29bf7376c70d5ecf12720f4b1eb b/sql/hive/src/test/resources/golden/disable_file_format_check-2-2d27f92dfced693fa3a68ecce5e2e838
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-16-23edf29bf7376c70d5ecf12720f4b1eb
rename to sql/hive/src/test/resources/golden/disable_file_format_check-2-2d27f92dfced693fa3a68ecce5e2e838
diff --git a/sql/hive/src/test/resources/golden/date_1-2-abdce0c0d14d3fc7441b7c134b02f99a b/sql/hive/src/test/resources/golden/disable_file_format_check-4-a2150709a6ff73326bdf4865dd124a23
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-2-abdce0c0d14d3fc7441b7c134b02f99a
rename to sql/hive/src/test/resources/golden/disable_file_format_check-4-a2150709a6ff73326bdf4865dd124a23
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-0-a071dedef216e84d1cb2f0de6d34fd1a b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-0-a071dedef216e84d1cb2f0de6d34fd1a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-0-a071dedef216e84d1cb2f0de6d34fd1a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-1-a071dedef216e84d1cb2f0de6d34fd1a
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-1-a071dedef216e84d1cb2f0de6d34fd1a
diff --git a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-1-ce3797dc14a603cba2a5e58c8612de5b b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-1-ce3797dc14a603cba2a5e58c8612de5b
deleted file mode 100644
index 7ae602958428e..0000000000000
--- a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-1-ce3797dc14a603cba2a5e58c8612de5b
+++ /dev/null
@@ -1 +0,0 @@
-238	val_238
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/read from cached table-0-ce3797dc14a603cba2a5e58c8612de5b b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-2-ce3797dc14a603cba2a5e58c8612de5b
similarity index 100%
rename from sql/hive/src/test/resources/golden/read from cached table-0-ce3797dc14a603cba2a5e58c8612de5b
rename to sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-2-ce3797dc14a603cba2a5e58c8612de5b
diff --git a/sql/hive/src/test/resources/golden/date_1-5-5e70fc74158fbfca38134174360de12d b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-3-f5340880d2be7b0643eb995673e89d11
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-5-5e70fc74158fbfca38134174360de12d
rename to sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-3-f5340880d2be7b0643eb995673e89d11
diff --git a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-4-34064fd15c28dba55865cb8f3c5ba68c b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-4-34064fd15c28dba55865cb8f3c5ba68c
deleted file mode 100644
index ca21e093aa698..0000000000000
--- a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-4-34064fd15c28dba55865cb8f3c5ba68c
+++ /dev/null
@@ -1 +0,0 @@
-1	{"a1":"b1"}	foo1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/date_1-8-1d5c58095cd52ea539d869f2ab1ab67d b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-4-714ab8c97f4d8993680b91e1ed8f3782
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_1-8-1d5c58095cd52ea539d869f2ab1ab67d
rename to sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-4-714ab8c97f4d8993680b91e1ed8f3782
diff --git a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-5-34064fd15c28dba55865cb8f3c5ba68c b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-5-34064fd15c28dba55865cb8f3c5ba68c
new file mode 100644
index 0000000000000..573c4b56de599
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-5-34064fd15c28dba55865cb8f3c5ba68c
@@ -0,0 +1 @@
+1	{"a1":"b1"}	foo1
diff --git a/sql/hive/src/test/resources/golden/date_2-2-cab14d992c53c106ab257fae52001e04 b/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-6-f40a07d7654573e1a8517770eb8529e7
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_2-2-cab14d992c53c106ab257fae52001e04
rename to sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-6-f40a07d7654573e1a8517770eb8529e7
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/distinct_stats-0-418ec894d08c33fd712eb358f579b7a0
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/distinct_stats-0-418ec894d08c33fd712eb358f579b7a0
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/distinct_stats-1-10987e425ba8ba8d9c01538f16eab970
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_05_some_part-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/distinct_stats-1-10987e425ba8ba8d9c01538f16eab970
diff --git a/sql/hive/src/test/resources/golden/date_join1-2-894b6541812ac8b0abe2a24c966817d8 b/sql/hive/src/test/resources/golden/distinct_stats-2-a2d8f812612283b20ec3f1e92a263440
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_join1-2-894b6541812ac8b0abe2a24c966817d8
rename to sql/hive/src/test/resources/golden/distinct_stats-2-a2d8f812612283b20ec3f1e92a263440
diff --git a/sql/hive/src/test/resources/golden/date_serde-7-a34279d8ebbadb78e925e8ed9c78947d b/sql/hive/src/test/resources/golden/distinct_stats-3-a4397664f1f109ef0aa5ff36961b25b
similarity index 100%
rename from sql/hive/src/test/resources/golden/date_serde-7-a34279d8ebbadb78e925e8ed9c78947d
rename to sql/hive/src/test/resources/golden/distinct_stats-3-a4397664f1f109ef0aa5ff36961b25b
diff --git a/sql/hive/src/test/resources/golden/ddltime-0-d81aa70a19a0e7428e8f7a76b60d3fc3 b/sql/hive/src/test/resources/golden/distinct_stats-4-e540680af4a857404a0cb7cabc1bcf31
similarity index 100%
rename from sql/hive/src/test/resources/golden/ddltime-0-d81aa70a19a0e7428e8f7a76b60d3fc3
rename to sql/hive/src/test/resources/golden/distinct_stats-4-e540680af4a857404a0cb7cabc1bcf31
diff --git a/sql/hive/src/test/resources/golden/decimal_1-0-31ecaab3afa056fcc656d6e54f845cf4 b/sql/hive/src/test/resources/golden/distinct_stats-5-32e9736bf27c1d2e4399a8125e14befc
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-0-31ecaab3afa056fcc656d6e54f845cf4
rename to sql/hive/src/test/resources/golden/distinct_stats-5-32e9736bf27c1d2e4399a8125e14befc
diff --git a/sql/hive/src/test/resources/golden/decimal_1-1-f2e4dab0c9a4d9b3128aca89a424accd b/sql/hive/src/test/resources/golden/distinct_stats-6-297a53801744e6c4786e315e32c6189a
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-1-f2e4dab0c9a4d9b3128aca89a424accd
rename to sql/hive/src/test/resources/golden/distinct_stats-6-297a53801744e6c4786e315e32c6189a
diff --git a/sql/hive/src/test/resources/golden/decimal_1-13-31ecaab3afa056fcc656d6e54f845cf4 b/sql/hive/src/test/resources/golden/distinct_stats-7-92b9ef922e6b63a9de3ebcc23ee2d02d
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-13-31ecaab3afa056fcc656d6e54f845cf4
rename to sql/hive/src/test/resources/golden/distinct_stats-7-92b9ef922e6b63a9de3ebcc23ee2d02d
diff --git a/sql/hive/src/test/resources/golden/decimal_1-2-80fc87cab17ceffea334afbb230a6653 b/sql/hive/src/test/resources/golden/distinct_stats-8-ca16024e6f5399b1d035f5b9fd665163
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-2-80fc87cab17ceffea334afbb230a6653
rename to sql/hive/src/test/resources/golden/distinct_stats-8-ca16024e6f5399b1d035f5b9fd665163
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/distinct_stats-9-4b2407991ccf180e0eb38bf3d2ef2ec8
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_05_some_part-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/distinct_stats-9-4b2407991ccf180e0eb38bf3d2ef2ec8
diff --git a/sql/hive/src/test/resources/golden/decimal_1-3-2a3647b0a148236c45d8e3e9891c8ad5 b/sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-2-fb7b53f61989f4f645dac4a8f017d6ee
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_1-3-2a3647b0a148236c45d8e3e9891c8ad5
rename to sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-2-fb7b53f61989f4f645dac4a8f017d6ee
diff --git a/sql/hive/src/test/resources/golden/decimal_2-0-3c8ecb1468952afe028596c65d587bee b/sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-3-46fe5bb027667f528d7179b239e3427f
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-0-3c8ecb1468952afe028596c65d587bee
rename to sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-3-46fe5bb027667f528d7179b239e3427f
diff --git a/sql/hive/src/test/resources/golden/decimal_2-1-868e124edc1581325bd0fd10235a126b b/sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-4-26dcd2b2f263b5b417430efcf354663a
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-1-868e124edc1581325bd0fd10235a126b
rename to sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-4-26dcd2b2f263b5b417430efcf354663a
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-5-2a1bd5ed3955825a9dbb76769f7fe4ea
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_05_some_part-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-5-2a1bd5ed3955825a9dbb76769f7fe4ea
diff --git a/sql/hive/src/test/resources/golden/decimal_2-12-d63b5ea25e27852413132db4d9bfb035 b/sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-6-7a9e67189d3d4151f23b12c22bde06b5
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-12-d63b5ea25e27852413132db4d9bfb035
rename to sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-6-7a9e67189d3d4151f23b12c22bde06b5
diff --git a/sql/hive/src/test/resources/golden/decimal_2-2-6cc742523b3574e59ca21dad30f2d506 b/sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-7-16c31455a193e1cb06a2ede4e9f5d5dd
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-2-6cc742523b3574e59ca21dad30f2d506
rename to sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-7-16c31455a193e1cb06a2ede4e9f5d5dd
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-8-2a1bd5ed3955825a9dbb76769f7fe4ea
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_06_one_part-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-8-2a1bd5ed3955825a9dbb76769f7fe4ea
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-9-40110efef10f6f7b873dcd1d53463101
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_06_one_part-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/drop_database_removes_partition_dirs-9-40110efef10f6f7b873dcd1d53463101
diff --git a/sql/hive/src/test/resources/golden/drop_function-0-9a0a48e6e2e5edffb4bcca349c49fa48 b/sql/hive/src/test/resources/golden/drop_function-0-9a0a48e6e2e5edffb4bcca349c49fa48
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/drop_function-0-9a0a48e6e2e5edffb4bcca349c49fa48
+++ b/sql/hive/src/test/resources/golden/drop_function-0-9a0a48e6e2e5edffb4bcca349c49fa48
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/drop_index-0-9a0a48e6e2e5edffb4bcca349c49fa48 b/sql/hive/src/test/resources/golden/drop_index-0-9a0a48e6e2e5edffb4bcca349c49fa48
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/drop_index-0-9a0a48e6e2e5edffb4bcca349c49fa48
+++ b/sql/hive/src/test/resources/golden/drop_index-0-9a0a48e6e2e5edffb4bcca349c49fa48
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/decimal_2-3-17702ba7ecd04caad0158e2cd1f6f26e b/sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-0-a99af48bbcbaba062e9bc387ae2b4975
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-3-17702ba7ecd04caad0158e2cd1f6f26e
rename to sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-0-a99af48bbcbaba062e9bc387ae2b4975
diff --git a/sql/hive/src/test/resources/golden/decimal_2-31-3c8ecb1468952afe028596c65d587bee b/sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-1-e67a0f7ff61a97b2c49386890ea88c54
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-31-3c8ecb1468952afe028596c65d587bee
rename to sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-1-e67a0f7ff61a97b2c49386890ea88c54
diff --git a/sql/hive/src/test/resources/golden/decimal_3-0-90cd495a00051a0631b2021dbb9a4aef b/sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-2-19915227905aab376d918b3cada85c25
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_3-0-90cd495a00051a0631b2021dbb9a4aef
rename to sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-2-19915227905aab376d918b3cada85c25
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-3-e32b952789a115ff02201dfa618d92b2
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_06_one_part-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-3-e32b952789a115ff02201dfa618d92b2
diff --git a/sql/hive/src/test/resources/golden/decimal_3-1-76f900dfe9ce95e8262a53939d33fb01 b/sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-4-58aff7fa05fba3a7549629a17e285036
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_3-1-76f900dfe9ce95e8262a53939d33fb01
rename to sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-4-58aff7fa05fba3a7549629a17e285036
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-5-e32b952789a115ff02201dfa618d92b2
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-5-e32b952789a115ff02201dfa618d92b2
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-6-4642f8a18bf7409849f2e91d7a05f352
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/drop_index_removes_partition_dirs-6-4642f8a18bf7409849f2e91d7a05f352
diff --git a/sql/hive/src/test/resources/golden/drop_multi_partitions-10-9120e865aa132bac6e0a29c907f0b760 b/sql/hive/src/test/resources/golden/drop_multi_partitions-10-9120e865aa132bac6e0a29c907f0b760
index c9d701778f9ab..2895d472ca5d9 100644
--- a/sql/hive/src/test/resources/golden/drop_multi_partitions-10-9120e865aa132bac6e0a29c907f0b760
+++ b/sql/hive/src/test/resources/golden/drop_multi_partitions-10-9120e865aa132bac6e0a29c907f0b760
@@ -1 +1 @@
-b=2/c=2
\ No newline at end of file
+b=2/c=2
diff --git a/sql/hive/src/test/resources/golden/drop_multi_partitions-4-9120e865aa132bac6e0a29c907f0b760 b/sql/hive/src/test/resources/golden/drop_multi_partitions-4-9120e865aa132bac6e0a29c907f0b760
index 53e09b6e34202..0f9c2f1d90639 100644
--- a/sql/hive/src/test/resources/golden/drop_multi_partitions-4-9120e865aa132bac6e0a29c907f0b760
+++ b/sql/hive/src/test/resources/golden/drop_multi_partitions-4-9120e865aa132bac6e0a29c907f0b760
@@ -1,3 +1,3 @@
 b=1/c=1
 b=1/c=2
-b=2/c=2
\ No newline at end of file
+b=2/c=2
diff --git a/sql/hive/src/test/resources/golden/drop_multi_partitions-5-53b0c9e5b0c8c37e75c1750280cf2aa0 b/sql/hive/src/test/resources/golden/drop_multi_partitions-5-53b0c9e5b0c8c37e75c1750280cf2aa0
index 31b543e8b4122..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/drop_multi_partitions-5-53b0c9e5b0c8c37e75c1750280cf2aa0
+++ b/sql/hive/src/test/resources/golden/drop_multi_partitions-5-53b0c9e5b0c8c37e75c1750280cf2aa0
@@ -1,12 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ALTERTABLE_DROPPARTS mp (TOK_PARTSPEC (TOK_PARTVAL b = '1')))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Drop Table Operator:
-        Drop Table
-          table: mp
-
diff --git a/sql/hive/src/test/resources/golden/drop_multi_partitions-7-9120e865aa132bac6e0a29c907f0b760 b/sql/hive/src/test/resources/golden/drop_multi_partitions-7-9120e865aa132bac6e0a29c907f0b760
index c9d701778f9ab..2895d472ca5d9 100644
--- a/sql/hive/src/test/resources/golden/drop_multi_partitions-7-9120e865aa132bac6e0a29c907f0b760
+++ b/sql/hive/src/test/resources/golden/drop_multi_partitions-7-9120e865aa132bac6e0a29c907f0b760
@@ -1 +1 @@
-b=2/c=2
\ No newline at end of file
+b=2/c=2
diff --git a/sql/hive/src/test/resources/golden/drop_multi_partitions-8-46a4f646bbc04f70e7ae5ed992f102f1 b/sql/hive/src/test/resources/golden/drop_multi_partitions-8-46a4f646bbc04f70e7ae5ed992f102f1
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/drop_multi_partitions-8-46a4f646bbc04f70e7ae5ed992f102f1
+++ b/sql/hive/src/test/resources/golden/drop_multi_partitions-8-46a4f646bbc04f70e7ae5ed992f102f1
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-1-e3eda6672f6602a1b9bc93ef789662f6 b/sql/hive/src/test/resources/golden/drop_partitions_filter-1-e3eda6672f6602a1b9bc93ef789662f6
index d1e9fd1cd0a21..30eb53bdc8e70 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-1-e3eda6672f6602a1b9bc93ef789662f6
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-1-e3eda6672f6602a1b9bc93ef789662f6
@@ -1,10 +1,10 @@
-a                   	string              	None                
-b                   	int                 	None                
-c                   	string              	None                
-d                   	string              	None                
+a                   	string              	                    
+b                   	int                 	                    
+c                   	string              	                    
+d                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-c                   	string              	None                
-d                   	string              	None                
\ No newline at end of file
+c                   	string              	                    
+d                   	string              	                    
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-11-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter-11-83e3e422cdf9403523fa60d75376d7d7
index 3377ef5cf498f..5e6d96c4e60aa 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-11-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-11-83e3e422cdf9403523fa60d75376d7d7
@@ -6,4 +6,4 @@ c=India/d=3
 c=Russia/d=3
 c=US/d=1
 c=US/d=2
-c=Uganda/d=2
\ No newline at end of file
+c=Uganda/d=2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-13-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter-13-83e3e422cdf9403523fa60d75376d7d7
index 40e71fb79ad0f..e91541d1527d7 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-13-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-13-83e3e422cdf9403523fa60d75376d7d7
@@ -5,4 +5,4 @@ c=Greece/d=2
 c=India/d=3
 c=Russia/d=3
 c=US/d=2
-c=Uganda/d=2
\ No newline at end of file
+c=Uganda/d=2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-15-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter-15-83e3e422cdf9403523fa60d75376d7d7
index c03d86a551c29..316e63e21e7b1 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-15-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-15-83e3e422cdf9403523fa60d75376d7d7
@@ -3,4 +3,4 @@ c=France/d=4
 c=Germany/d=2
 c=Greece/d=2
 c=India/d=3
-c=Russia/d=3
\ No newline at end of file
+c=Russia/d=3
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-17-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter-17-83e3e422cdf9403523fa60d75376d7d7
index 133c0256f898f..231c59f365307 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-17-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-17-83e3e422cdf9403523fa60d75376d7d7
@@ -2,4 +2,4 @@ c=Canada/d=3
 c=France/d=4
 c=Germany/d=2
 c=Greece/d=2
-c=India/d=3
\ No newline at end of file
+c=India/d=3
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-19-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter-19-83e3e422cdf9403523fa60d75376d7d7
index 98e52eedc2b03..02a7003ca12de 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-19-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-19-83e3e422cdf9403523fa60d75376d7d7
@@ -1,3 +1,3 @@
 c=Canada/d=3
 c=France/d=4
-c=Germany/d=2
\ No newline at end of file
+c=Germany/d=2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-21-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter-21-83e3e422cdf9403523fa60d75376d7d7
index 687f7dd7e13a0..2ffea3b50a9b4 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-21-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-21-83e3e422cdf9403523fa60d75376d7d7
@@ -1 +1 @@
-c=France/d=4
\ No newline at end of file
+c=France/d=4
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-22-46a4f646bbc04f70e7ae5ed992f102f1 b/sql/hive/src/test/resources/golden/drop_partitions_filter-22-46a4f646bbc04f70e7ae5ed992f102f1
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-22-46a4f646bbc04f70e7ae5ed992f102f1
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-22-46a4f646bbc04f70e7ae5ed992f102f1
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter-24-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter-24-83e3e422cdf9403523fa60d75376d7d7
index 687f7dd7e13a0..2ffea3b50a9b4 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter-24-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter-24-83e3e422cdf9403523fa60d75376d7d7
@@ -1 +1 @@
-c=France/d=4
\ No newline at end of file
+c=France/d=4
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter2-1-e3eda6672f6602a1b9bc93ef789662f6 b/sql/hive/src/test/resources/golden/drop_partitions_filter2-1-e3eda6672f6602a1b9bc93ef789662f6
index 1329d173d6a21..ca4194fbcf3ff 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter2-1-e3eda6672f6602a1b9bc93ef789662f6
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter2-1-e3eda6672f6602a1b9bc93ef789662f6
@@ -1,10 +1,10 @@
-a                   	string              	None                
-b                   	int                 	None                
-c                   	int                 	None                
-d                   	int                 	None                
+a                   	string              	                    
+b                   	int                 	                    
+c                   	int                 	                    
+d                   	int                 	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-c                   	int                 	None                
-d                   	int                 	None                
\ No newline at end of file
+c                   	int                 	                    
+d                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter2-10-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter2-10-83e3e422cdf9403523fa60d75376d7d7
index 08051a26d24cc..7ace4dc662306 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter2-10-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter2-10-83e3e422cdf9403523fa60d75376d7d7
@@ -2,4 +2,4 @@ c=1/d=2
 c=2/d=1
 c=2/d=2
 c=3/d=1
-c=3/d=2
\ No newline at end of file
+c=30/d=2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter2-12-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter2-12-83e3e422cdf9403523fa60d75376d7d7
index 7f6e4ae8abf83..1ca1833c09245 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter2-12-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter2-12-83e3e422cdf9403523fa60d75376d7d7
@@ -1,3 +1,3 @@
 c=1/d=2
 c=3/d=1
-c=3/d=2
\ No newline at end of file
+c=30/d=2
diff --git a/sql/hive/src/test/resources/golden/decimal_3-12-d495d7178707ba55dcc01b9bb3398792 b/sql/hive/src/test/resources/golden/drop_partitions_filter2-13-8117981303487dc4c4873356931ef26a
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_3-12-d495d7178707ba55dcc01b9bb3398792
rename to sql/hive/src/test/resources/golden/drop_partitions_filter2-13-8117981303487dc4c4873356931ef26a
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter2-14-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter2-14-83e3e422cdf9403523fa60d75376d7d7
new file mode 100644
index 0000000000000..fee1b1ad01412
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter2-14-83e3e422cdf9403523fa60d75376d7d7
@@ -0,0 +1 @@
+c=30/d=2
diff --git a/sql/hive/src/test/resources/golden/decimal_3-2-4c5356497c8830c8e7cd8e5c02ad104 b/sql/hive/src/test/resources/golden/drop_partitions_filter2-15-67d75c72ea2d3982c55f3a850d93f83c
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_3-2-4c5356497c8830c8e7cd8e5c02ad104
rename to sql/hive/src/test/resources/golden/drop_partitions_filter2-15-67d75c72ea2d3982c55f3a850d93f83c
diff --git a/sql/hive/src/test/resources/golden/decimal_4-2-945542ec888136afaece8d7a5e20d52d b/sql/hive/src/test/resources/golden/drop_partitions_filter2-7-f34625fd49a5e655cba3abb5cb8c5417
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_4-2-945542ec888136afaece8d7a5e20d52d
rename to sql/hive/src/test/resources/golden/drop_partitions_filter2-7-f34625fd49a5e655cba3abb5cb8c5417
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter2-8-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter2-8-83e3e422cdf9403523fa60d75376d7d7
index 226ef460b53a6..b77f18cd02020 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter2-8-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter2-8-83e3e422cdf9403523fa60d75376d7d7
@@ -3,4 +3,4 @@ c=1/d=2
 c=2/d=1
 c=2/d=2
 c=3/d=1
-c=3/d=2
\ No newline at end of file
+c=30/d=2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter3-1-e3eda6672f6602a1b9bc93ef789662f6 b/sql/hive/src/test/resources/golden/drop_partitions_filter3-1-e3eda6672f6602a1b9bc93ef789662f6
index 01562f65d807f..77bc36b96870b 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter3-1-e3eda6672f6602a1b9bc93ef789662f6
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter3-1-e3eda6672f6602a1b9bc93ef789662f6
@@ -1,10 +1,10 @@
-a                   	string              	None                
-b                   	int                 	None                
-c                   	string              	None                
-d                   	int                 	None                
+a                   	string              	                    
+b                   	int                 	                    
+c                   	string              	                    
+d                   	int                 	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-c                   	string              	None                
-d                   	int                 	None                
\ No newline at end of file
+c                   	string              	                    
+d                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter3-10-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter3-10-83e3e422cdf9403523fa60d75376d7d7
index 08051a26d24cc..50e8df00f1597 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter3-10-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter3-10-83e3e422cdf9403523fa60d75376d7d7
@@ -2,4 +2,4 @@ c=1/d=2
 c=2/d=1
 c=2/d=2
 c=3/d=1
-c=3/d=2
\ No newline at end of file
+c=3/d=2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter3-12-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter3-12-83e3e422cdf9403523fa60d75376d7d7
index 7f6e4ae8abf83..20bc2b0c74d32 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter3-12-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter3-12-83e3e422cdf9403523fa60d75376d7d7
@@ -1,3 +1,3 @@
 c=1/d=2
 c=3/d=1
-c=3/d=2
\ No newline at end of file
+c=3/d=2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter3-8-83e3e422cdf9403523fa60d75376d7d7 b/sql/hive/src/test/resources/golden/drop_partitions_filter3-8-83e3e422cdf9403523fa60d75376d7d7
index 226ef460b53a6..6200b3ad2dd94 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_filter3-8-83e3e422cdf9403523fa60d75376d7d7
+++ b/sql/hive/src/test/resources/golden/drop_partitions_filter3-8-83e3e422cdf9403523fa60d75376d7d7
@@ -3,4 +3,4 @@ c=1/d=2
 c=2/d=1
 c=2/d=2
 c=3/d=1
-c=3/d=2
\ No newline at end of file
+c=3/d=2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_ignore_protection-3-312aa26fdea6da7907e3a91f75e36f1 b/sql/hive/src/test/resources/golden/drop_partitions_ignore_protection-3-312aa26fdea6da7907e3a91f75e36f1
index 8caab1c99b27d..73f873dbcf00b 100644
--- a/sql/hive/src/test/resources/golden/drop_partitions_ignore_protection-3-312aa26fdea6da7907e3a91f75e36f1
+++ b/sql/hive/src/test/resources/golden/drop_partitions_ignore_protection-3-312aa26fdea6da7907e3a91f75e36f1
@@ -1,10 +1,10 @@
-c1                  	string              	None                
-c2                  	string              	None                
-p                   	string              	None                
+c1                  	string              	                    
+c2                  	string              	                    
+p                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-p                   	string              	None                
+p                   	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[p1], dbName:default, tableName:tbl_protectmode_no_drop, createTime:1389728724, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:c1, type:string, comment:null), FieldSchema(name:c2, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5682582801957547950/tbl_protectmode_no_drop/p=p1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{last_modified_by=marmbrus, last_modified_time=1389728724, PROTECT_MODE=NO_DROP, transient_lastDdlTime=1389728724})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[p1], dbName:default, tableName:tbl_protectmode_no_drop, createTime:1413879999, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:c1, type:string, comment:null), FieldSchema(name:c2, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl_protectmode_no_drop/p=p1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413879999, PROTECT_MODE=NO_DROP, transient_lastDdlTime=1413879999, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1})	
diff --git a/sql/hive/src/test/resources/golden/drop_table-0-9a0a48e6e2e5edffb4bcca349c49fa48 b/sql/hive/src/test/resources/golden/drop_table-0-9a0a48e6e2e5edffb4bcca349c49fa48
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/drop_table-0-9a0a48e6e2e5edffb4bcca349c49fa48
+++ b/sql/hive/src/test/resources/golden/drop_table-0-9a0a48e6e2e5edffb4bcca349c49fa48
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/drop_table2-0-ac5bb9a5583f2d8968f2aaef3385b3f2 b/sql/hive/src/test/resources/golden/drop_table2-0-ac5bb9a5583f2d8968f2aaef3385b3f2
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/drop_table2-0-ac5bb9a5583f2d8968f2aaef3385b3f2
+++ b/sql/hive/src/test/resources/golden/drop_table2-0-ac5bb9a5583f2d8968f2aaef3385b3f2
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/drop_table2-5-1fdd850f6c301619f91eb58c890f2ad4 b/sql/hive/src/test/resources/golden/drop_table2-5-1fdd850f6c301619f91eb58c890f2ad4
index fbe12dca4efc8..a4b5a45443235 100644
--- a/sql/hive/src/test/resources/golden/drop_table2-5-1fdd850f6c301619f91eb58c890f2ad4
+++ b/sql/hive/src/test/resources/golden/drop_table2-5-1fdd850f6c301619f91eb58c890f2ad4
@@ -1,3 +1,3 @@
 p=p1
 p=p2
-p=p3
\ No newline at end of file
+p=p3
diff --git a/sql/hive/src/test/resources/golden/decimal_4-3-399140971a10a5a0cc6a8c97a4635e b/sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-0-97b52abf021c81b8364041c1a0bbccf3
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_4-3-399140971a10a5a0cc6a8c97a4635e
rename to sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-0-97b52abf021c81b8364041c1a0bbccf3
diff --git a/sql/hive/src/test/resources/golden/decimal_4-4-81b37675c09ed874497325ae13233e5c b/sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-1-f11a45c42752d06821ccd26d948d51ff
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_4-4-81b37675c09ed874497325ae13233e5c
rename to sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-1-f11a45c42752d06821ccd26d948d51ff
diff --git a/sql/hive/src/test/resources/golden/decimal_join-0-c7659c1efa06c9eab1db28e916b750e4 b/sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-2-c0b85445b616f93c5e6d090fa35072e7
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_join-0-c7659c1efa06c9eab1db28e916b750e4
rename to sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-2-c0b85445b616f93c5e6d090fa35072e7
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-3-10a71bca930d911cc4c2022575b17299
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-3-10a71bca930d911cc4c2022575b17299
diff --git a/sql/hive/src/test/resources/golden/decimal_join-1-c1524f17ee815171055a67ddc2f9de4e b/sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-4-b2ca31dd6cc5c32e33df700786f5b208
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_join-1-c1524f17ee815171055a67ddc2f9de4e
rename to sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-4-b2ca31dd6cc5c32e33df700786f5b208
diff --git a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-5-10a71bca930d911cc4c2022575b17299
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_08_nonpart_rename-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-5-10a71bca930d911cc4c2022575b17299
diff --git a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-6-d1c175a9d042ecd389f2f93fc867591d
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_08_nonpart_rename-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/drop_table_removes_partition_dirs-6-d1c175a9d042ecd389f2f93fc867591d
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-0-cae8ceb6b0ab342948041c511a867b8e b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-0-3cd14bc5b126ff8b337c4abc09134260
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-0-cae8ceb6b0ab342948041c511a867b8e
rename to sql/hive/src/test/resources/golden/dynamic_partition_skip_default-0-3cd14bc5b126ff8b337c4abc09134260
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-1-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-1-16367c381d4b189b3640c92511244bfe
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-1-16367c381d4b189b3640c92511244bfe
+++ b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-1-16367c381d4b189b3640c92511244bfe
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-1-3f4119830536c92e5ccd76be0259e110 b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-2-ce929ee6a92b81d8080ca322c1c38a4b
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-1-3f4119830536c92e5ccd76be0259e110
rename to sql/hive/src/test/resources/golden/dynamic_partition_skip_default-2-ce929ee6a92b81d8080ca322c1c38a4b
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-11-673b15434ba47f11c71c3e8b2a575d83 b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-3-f3a5a998099b756a21cf9122a15b09d5
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-11-673b15434ba47f11c71c3e8b2a575d83
rename to sql/hive/src/test/resources/golden/dynamic_partition_skip_default-3-f3a5a998099b756a21cf9122a15b09d5
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-15-5c49f041326bc5a9e936910094f190ce b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-4-c8aa1ebce4b0b2b7f46bb3c2502f8b49
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-15-5c49f041326bc5a9e936910094f190ce
rename to sql/hive/src/test/resources/golden/dynamic_partition_skip_default-4-c8aa1ebce4b0b2b7f46bb3c2502f8b49
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-2-d5be00a0fa6e2e290b40458442bd036c b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-5-fed732d2e7d94a4fc02e7694f9f9a39c
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-2-d5be00a0fa6e2e290b40458442bd036c
rename to sql/hive/src/test/resources/golden/dynamic_partition_skip_default-5-fed732d2e7d94a4fc02e7694f9f9a39c
diff --git a/sql/hive/src/test/resources/golden/delimiter-1-d9e405c4107da8af78fcacb83a667b41 b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-6-adab6a0187003ab7ee6f217c9e409d91
similarity index 100%
rename from sql/hive/src/test/resources/golden/delimiter-1-d9e405c4107da8af78fcacb83a667b41
rename to sql/hive/src/test/resources/golden/dynamic_partition_skip_default-6-adab6a0187003ab7ee6f217c9e409d91
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-7-e707e693aa61edf87768fb71f6e936e1 b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-7-e707e693aa61edf87768fb71f6e936e1
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-7-e707e693aa61edf87768fb71f6e936e1
+++ b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-7-e707e693aa61edf87768fb71f6e936e1
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/describe_comment_indent-0-5b66d27453f15517fb266a5e1a0e3cbb b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-8-adab6a0187003ab7ee6f217c9e409d91
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_comment_indent-0-5b66d27453f15517fb266a5e1a0e3cbb
rename to sql/hive/src/test/resources/golden/dynamic_partition_skip_default-8-adab6a0187003ab7ee6f217c9e409d91
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-1-12cc2812ab067d58718c29ea6aa3d8a3 b/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-9-4b099f87e221b8fd5c0d0d4a97c0d146
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_database_json-1-12cc2812ab067d58718c29ea6aa3d8a3
rename to sql/hive/src/test/resources/golden/dynamic_partition_skip_default-9-4b099f87e221b8fd5c0d0d4a97c0d146
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-12-18906f5c6413065621430e3fe33c7e9e b/sql/hive/src/test/resources/golden/empty aggregate input-0-bbd21aa0c1faf4c1fe6d8a822b416349
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-12-18906f5c6413065621430e3fe33c7e9e
rename to sql/hive/src/test/resources/golden/empty aggregate input-0-bbd21aa0c1faf4c1fe6d8a822b416349
diff --git a/sql/hive/src/test/resources/golden/enforce_order-2-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/enforce_order-2-ffe97dc8c1df3195982e38263fbe8717
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/enforce_order-2-ffe97dc8c1df3195982e38263fbe8717
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/enforce_order-7-5fb418612e7c2201790d6f318c715ccf b/sql/hive/src/test/resources/golden/enforce_order-7-5fb418612e7c2201790d6f318c715ccf
deleted file mode 100644
index 8273b7ed19da6..0000000000000
--- a/sql/hive/src/test/resources/golden/enforce_order-7-5fb418612e7c2201790d6f318c715ccf
+++ /dev/null
@@ -1,10 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-10	val_10
-100	val_100
-100	val_100
-103	val_103
-103	val_103
-104	val_104
-104	val_104
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/enforce_order-8-a7cb2e48b7b4fc5b008be3c9a3485314 b/sql/hive/src/test/resources/golden/enforce_order-8-a7cb2e48b7b4fc5b008be3c9a3485314
deleted file mode 100644
index 045906a29a1da..0000000000000
--- a/sql/hive/src/test/resources/golden/enforce_order-8-a7cb2e48b7b4fc5b008be3c9a3485314
+++ /dev/null
@@ -1,10 +0,0 @@
-98	val_98
-98	val_98
-97	val_97
-97	val_97
-96	val_96
-95	val_95
-95	val_95
-92	val_92
-90	val_90
-90	val_90
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/escape1-6-134b596abc363f0bfa7f770732ebb960 b/sql/hive/src/test/resources/golden/escape1-6-134b596abc363f0bfa7f770732ebb960
deleted file mode 100644
index a949a93dfcca6..0000000000000
--- a/sql/hive/src/test/resources/golden/escape1-6-134b596abc363f0bfa7f770732ebb960
+++ /dev/null
@@ -1 +0,0 @@
-128
diff --git a/sql/hive/src/test/resources/golden/escape1-7-486585cbb4de5bc908dde4c601dd7c17 b/sql/hive/src/test/resources/golden/escape1-7-486585cbb4de5bc908dde4c601dd7c17
deleted file mode 100644
index e2799aa7e14b5..0000000000000
Binary files a/sql/hive/src/test/resources/golden/escape1-7-486585cbb4de5bc908dde4c601dd7c17 and /dev/null differ
diff --git a/sql/hive/src/test/resources/golden/escape2-8-134b596abc363f0bfa7f770732ebb960 b/sql/hive/src/test/resources/golden/escape2-8-134b596abc363f0bfa7f770732ebb960
deleted file mode 100644
index a949a93dfcca6..0000000000000
--- a/sql/hive/src/test/resources/golden/escape2-8-134b596abc363f0bfa7f770732ebb960
+++ /dev/null
@@ -1 +0,0 @@
-128
diff --git a/sql/hive/src/test/resources/golden/escape2-9-486585cbb4de5bc908dde4c601dd7c17 b/sql/hive/src/test/resources/golden/escape2-9-486585cbb4de5bc908dde4c601dd7c17
deleted file mode 100644
index e2799aa7e14b5..0000000000000
Binary files a/sql/hive/src/test/resources/golden/escape2-9-486585cbb4de5bc908dde4c601dd7c17 and /dev/null differ
diff --git a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-5-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-5-75eed21390055f8e397c81ab9d253a32
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-5-75eed21390055f8e397c81ab9d253a32
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-6-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-6-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_04_all_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_04_all_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_04_all_part-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_04_all_part-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-10-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_04_evolved_parts-10-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-10-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-11-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_04_evolved_parts-11-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-11-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_05_some_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_05_some_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_05_some_part-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_05_some_part-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_06_one_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_06_one_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_06_one_part-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_06_one_part-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-5-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-5-75eed21390055f8e397c81ab9d253a32
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-5-75eed21390055f8e397c81ab9d253a32
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-6-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-6-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_10_external_managed-3-be31972099603addb71187f19f7cd25d b/sql/hive/src/test/resources/golden/exim_10_external_managed-3-be31972099603addb71187f19f7cd25d
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_10_external_managed-3-be31972099603addb71187f19f7cd25d
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_10_external_managed-4-46667daf88f9c8b9b758ced6a6b28ef1 b/sql/hive/src/test/resources/golden/exim_10_external_managed-4-46667daf88f9c8b9b758ced6a6b28ef1
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_10_external_managed-4-46667daf88f9c8b9b758ced6a6b28ef1
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_11_managed_external-5-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/exim_11_managed_external-5-75eed21390055f8e397c81ab9d253a32
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_11_managed_external-5-75eed21390055f8e397c81ab9d253a32
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_11_managed_external-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_11_managed_external-6-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_11_managed_external-6-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_12_external_location-5-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/exim_12_external_location-5-75eed21390055f8e397c81ab9d253a32
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_12_external_location-5-75eed21390055f8e397c81ab9d253a32
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_12_external_location-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_12_external_location-6-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_12_external_location-6-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_13_managed_location-5-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/exim_13_managed_location-5-75eed21390055f8e397c81ab9d253a32
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_13_managed_location-5-75eed21390055f8e397c81ab9d253a32
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_13_managed_location-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_13_managed_location-6-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_13_managed_location-6-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-5-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-5-75eed21390055f8e397c81ab9d253a32
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-5-75eed21390055f8e397c81ab9d253a32
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-6-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-6-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_15_external_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_15_external_part-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_15_external_part-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_15_external_part-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_16_part_external-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_16_part_external-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_16_part_external-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_16_part_external-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_17_part_managed-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_17_part_managed-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_17_part_managed-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_17_part_managed-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_18_part_external-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_18_part_external-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_18_part_external-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_18_part_external-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-6-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-6-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-6-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-7-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-7-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-7-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_19_part_external_location-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_19_part_external_location-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_19_part_external_location-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_19_part_external_location-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-8-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_20_part_managed_location-8-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-8-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-9-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_20_part_managed_location-9-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-9-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-6-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-6-75eed21390055f8e397c81ab9d253a32
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-6-75eed21390055f8e397c81ab9d253a32
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-7-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-7-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-7-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-5-75eed21390055f8e397c81ab9d253a32 b/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-5-75eed21390055f8e397c81ab9d253a32
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-5-75eed21390055f8e397c81ab9d253a32
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-6-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-6-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-5-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-5-677ddd4b14eb6f19cfcf0c3d57f54e22
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-5-677ddd4b14eb6f19cfcf0c3d57f54e22
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-6-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-6-a9f93b4185a714e4f6d14171d10a6c07
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-6-a9f93b4185a714e4f6d14171d10a6c07
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-5-d2ec90909f243a767be1aa299720f45d b/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-5-d2ec90909f243a767be1aa299720f45d
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-5-d2ec90909f243a767be1aa299720f45d
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-6-2a161bb8d69da9d6e3679ca3677a0de5 b/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-6-2a161bb8d69da9d6e3679ca3677a0de5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-6-2a161bb8d69da9d6e3679ca3677a0de5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-10-1afddec8522bd69f496b15980600a6e1 b/sql/hive/src/test/resources/golden/explain_rearrange-0-6f7c8515c354fb050829ebd66413425
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_database_json-10-1afddec8522bd69f496b15980600a6e1
rename to sql/hive/src/test/resources/golden/explain_rearrange-0-6f7c8515c354fb050829ebd66413425
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-6-1afddec8522bd69f496b15980600a6e1 b/sql/hive/src/test/resources/golden/explain_rearrange-1-378d42317b39c6519f15bd2f99c5ddc4
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_database_json-6-1afddec8522bd69f496b15980600a6e1
rename to sql/hive/src/test/resources/golden/explain_rearrange-1-378d42317b39c6519f15bd2f99c5ddc4
diff --git a/sql/hive/src/test/resources/golden/describe_database_json-7-7529ec337ca17cdf95d037f29e1cb793 b/sql/hive/src/test/resources/golden/explain_rearrange-10-3f2680208772a0e51aefc4ef5604dddf
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_database_json-7-7529ec337ca17cdf95d037f29e1cb793
rename to sql/hive/src/test/resources/golden/explain_rearrange-10-3f2680208772a0e51aefc4ef5604dddf
diff --git a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-2-ad7877a96aba7cd6e29edc19f4f0b394 b/sql/hive/src/test/resources/golden/explain_rearrange-11-f2ca12a948fd9b5b842168e7c7d7b768
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_08_nonpart_rename-2-ad7877a96aba7cd6e29edc19f4f0b394
rename to sql/hive/src/test/resources/golden/explain_rearrange-11-f2ca12a948fd9b5b842168e7c7d7b768
diff --git a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-1-889714213a760ae9ab3ebe199eb30b62 b/sql/hive/src/test/resources/golden/explain_rearrange-12-3d63f0bb8fbacbcff9e5989ddf1bcc8e
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-1-889714213a760ae9ab3ebe199eb30b62
rename to sql/hive/src/test/resources/golden/explain_rearrange-12-3d63f0bb8fbacbcff9e5989ddf1bcc8e
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/explain_rearrange-13-5baad22ed7efa18d73eb8349e57cf331
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/explain_rearrange-13-5baad22ed7efa18d73eb8349e57cf331
diff --git a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-2-cbd03c487eba9e34d57a8decaa3a0dfa b/sql/hive/src/test/resources/golden/explain_rearrange-14-490d6253b73064ce403e4d04a8bc18f3
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-2-cbd03c487eba9e34d57a8decaa3a0dfa
rename to sql/hive/src/test/resources/golden/explain_rearrange-14-490d6253b73064ce403e4d04a8bc18f3
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/explain_rearrange-2-24ca942f094b14b92086305cc125e833
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/explain_rearrange-2-24ca942f094b14b92086305cc125e833
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/explain_rearrange-3-3b0f76816be2c1b18a2058027a19bc9f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/explain_rearrange-3-3b0f76816be2c1b18a2058027a19bc9f
diff --git a/sql/hive/src/test/resources/golden/exim_10_external_managed-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/explain_rearrange-4-86473a0498e4361e4db0b4a22f2e8571
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_10_external_managed-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/explain_rearrange-4-86473a0498e4361e4db0b4a22f2e8571
diff --git a/sql/hive/src/test/resources/golden/exim_10_external_managed-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/explain_rearrange-5-d0ec6d66ff349db09fd455eec149efdb
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_10_external_managed-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/explain_rearrange-5-d0ec6d66ff349db09fd455eec149efdb
diff --git a/sql/hive/src/test/resources/golden/exim_10_external_managed-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/explain_rearrange-6-cda81d86d127fca0e2fbc2161e91400d
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_10_external_managed-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/explain_rearrange-6-cda81d86d127fca0e2fbc2161e91400d
diff --git a/sql/hive/src/test/resources/golden/exim_11_managed_external-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/explain_rearrange-7-5b13cfa4b730e38ef2794c1532968e04
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_11_managed_external-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/explain_rearrange-7-5b13cfa4b730e38ef2794c1532968e04
diff --git a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-3-db8910ace81a5102495905a508ef5c28 b/sql/hive/src/test/resources/golden/explain_rearrange-8-1fd9c02fc67c3a403cb73eb10ed9fc12
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-3-db8910ace81a5102495905a508ef5c28
rename to sql/hive/src/test/resources/golden/explain_rearrange-8-1fd9c02fc67c3a403cb73eb10ed9fc12
diff --git a/sql/hive/src/test/resources/golden/exim_11_managed_external-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/explain_rearrange-9-73b9ac83dbc9874dc9379ad4364d40ac
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_11_managed_external-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/explain_rearrange-9-73b9ac83dbc9874dc9379ad4364d40ac
diff --git a/sql/hive/src/test/resources/golden/fetch_aggregation-0-739356d010a919138911f295fac81607 b/sql/hive/src/test/resources/golden/fetch_aggregation-0-739356d010a919138911f295fac81607
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/fetch_aggregation-0-739356d010a919138911f295fac81607
+++ b/sql/hive/src/test/resources/golden/fetch_aggregation-0-739356d010a919138911f295fac81607
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/fetch_aggregation-2-f015c961b6c34ac56cb8fc52fb36d7c7 b/sql/hive/src/test/resources/golden/fetch_aggregation-2-f015c961b6c34ac56cb8fc52fb36d7c7
index fb12634ea81c1..8059361d2485f 100644
--- a/sql/hive/src/test/resources/golden/fetch_aggregation-2-f015c961b6c34ac56cb8fc52fb36d7c7
+++ b/sql/hive/src/test/resources/golden/fetch_aggregation-2-f015c961b6c34ac56cb8fc52fb36d7c7
@@ -1 +1 @@
-500	130091	260.182	0	498	142.92680950752384	20428.072876000006
\ No newline at end of file
+500	130091	260.182	0	498	142.92680950752384	20428.072876000006
diff --git a/sql/hive/src/test/resources/golden/file_split_for_small_table-0-7a45831bf96814d9a7fc3d78fb7bd8dc b/sql/hive/src/test/resources/golden/file_split_for_small_table-0-7a45831bf96814d9a7fc3d78fb7bd8dc
new file mode 100644
index 0000000000000..b70e127e82d05
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/file_split_for_small_table-0-7a45831bf96814d9a7fc3d78fb7bd8dc
@@ -0,0 +1,500 @@
+0	val_0
+0	val_0
+0	val_0
+2	val_2
+4	val_4
+5	val_5
+5	val_5
+5	val_5
+8	val_8
+9	val_9
+10	val_10
+11	val_11
+12	val_12
+12	val_12
+15	val_15
+15	val_15
+17	val_17
+18	val_18
+18	val_18
+19	val_19
+20	val_20
+24	val_24
+24	val_24
+26	val_26
+26	val_26
+27	val_27
+28	val_28
+30	val_30
+33	val_33
+34	val_34
+35	val_35
+35	val_35
+35	val_35
+37	val_37
+37	val_37
+41	val_41
+42	val_42
+42	val_42
+43	val_43
+44	val_44
+47	val_47
+51	val_51
+51	val_51
+53	val_53
+54	val_54
+57	val_57
+58	val_58
+58	val_58
+64	val_64
+65	val_65
+66	val_66
+67	val_67
+67	val_67
+69	val_69
+70	val_70
+70	val_70
+70	val_70
+72	val_72
+72	val_72
+74	val_74
+76	val_76
+76	val_76
+77	val_77
+78	val_78
+80	val_80
+82	val_82
+83	val_83
+83	val_83
+84	val_84
+84	val_84
+85	val_85
+86	val_86
+87	val_87
+90	val_90
+90	val_90
+90	val_90
+92	val_92
+95	val_95
+95	val_95
+96	val_96
+97	val_97
+97	val_97
+98	val_98
+98	val_98
+100	val_100
+100	val_100
+103	val_103
+103	val_103
+104	val_104
+104	val_104
+105	val_105
+111	val_111
+113	val_113
+113	val_113
+114	val_114
+116	val_116
+118	val_118
+118	val_118
+119	val_119
+119	val_119
+119	val_119
+120	val_120
+120	val_120
+125	val_125
+125	val_125
+126	val_126
+128	val_128
+128	val_128
+128	val_128
+129	val_129
+129	val_129
+131	val_131
+133	val_133
+134	val_134
+134	val_134
+136	val_136
+137	val_137
+137	val_137
+138	val_138
+138	val_138
+138	val_138
+138	val_138
+143	val_143
+145	val_145
+146	val_146
+146	val_146
+149	val_149
+149	val_149
+150	val_150
+152	val_152
+152	val_152
+153	val_153
+155	val_155
+156	val_156
+157	val_157
+158	val_158
+160	val_160
+162	val_162
+163	val_163
+164	val_164
+164	val_164
+165	val_165
+165	val_165
+166	val_166
+167	val_167
+167	val_167
+167	val_167
+168	val_168
+169	val_169
+169	val_169
+169	val_169
+169	val_169
+170	val_170
+172	val_172
+172	val_172
+174	val_174
+174	val_174
+175	val_175
+175	val_175
+176	val_176
+176	val_176
+177	val_177
+178	val_178
+179	val_179
+179	val_179
+180	val_180
+181	val_181
+183	val_183
+186	val_186
+187	val_187
+187	val_187
+187	val_187
+189	val_189
+190	val_190
+191	val_191
+191	val_191
+192	val_192
+193	val_193
+193	val_193
+193	val_193
+194	val_194
+195	val_195
+195	val_195
+196	val_196
+197	val_197
+197	val_197
+199	val_199
+199	val_199
+199	val_199
+200	val_200
+200	val_200
+201	val_201
+202	val_202
+203	val_203
+203	val_203
+205	val_205
+205	val_205
+207	val_207
+207	val_207
+208	val_208
+208	val_208
+208	val_208
+209	val_209
+209	val_209
+213	val_213
+213	val_213
+214	val_214
+216	val_216
+216	val_216
+217	val_217
+217	val_217
+218	val_218
+219	val_219
+219	val_219
+221	val_221
+221	val_221
+222	val_222
+223	val_223
+223	val_223
+224	val_224
+224	val_224
+226	val_226
+228	val_228
+229	val_229
+229	val_229
+230	val_230
+230	val_230
+230	val_230
+230	val_230
+230	val_230
+233	val_233
+233	val_233
+235	val_235
+237	val_237
+237	val_237
+238	val_238
+238	val_238
+239	val_239
+239	val_239
+241	val_241
+242	val_242
+242	val_242
+244	val_244
+247	val_247
+248	val_248
+249	val_249
+252	val_252
+255	val_255
+255	val_255
+256	val_256
+256	val_256
+257	val_257
+258	val_258
+260	val_260
+262	val_262
+263	val_263
+265	val_265
+265	val_265
+266	val_266
+272	val_272
+272	val_272
+273	val_273
+273	val_273
+273	val_273
+274	val_274
+275	val_275
+277	val_277
+277	val_277
+277	val_277
+277	val_277
+278	val_278
+278	val_278
+280	val_280
+280	val_280
+281	val_281
+281	val_281
+282	val_282
+282	val_282
+283	val_283
+284	val_284
+285	val_285
+286	val_286
+287	val_287
+288	val_288
+288	val_288
+289	val_289
+291	val_291
+292	val_292
+296	val_296
+298	val_298
+298	val_298
+298	val_298
+302	val_302
+305	val_305
+306	val_306
+307	val_307
+307	val_307
+308	val_308
+309	val_309
+309	val_309
+310	val_310
+311	val_311
+311	val_311
+311	val_311
+315	val_315
+316	val_316
+316	val_316
+316	val_316
+317	val_317
+317	val_317
+318	val_318
+318	val_318
+318	val_318
+321	val_321
+321	val_321
+322	val_322
+322	val_322
+323	val_323
+325	val_325
+325	val_325
+327	val_327
+327	val_327
+327	val_327
+331	val_331
+331	val_331
+332	val_332
+333	val_333
+333	val_333
+335	val_335
+336	val_336
+338	val_338
+339	val_339
+341	val_341
+342	val_342
+342	val_342
+344	val_344
+344	val_344
+345	val_345
+348	val_348
+348	val_348
+348	val_348
+348	val_348
+348	val_348
+351	val_351
+353	val_353
+353	val_353
+356	val_356
+360	val_360
+362	val_362
+364	val_364
+365	val_365
+366	val_366
+367	val_367
+367	val_367
+368	val_368
+369	val_369
+369	val_369
+369	val_369
+373	val_373
+374	val_374
+375	val_375
+377	val_377
+378	val_378
+379	val_379
+382	val_382
+382	val_382
+384	val_384
+384	val_384
+384	val_384
+386	val_386
+389	val_389
+392	val_392
+393	val_393
+394	val_394
+395	val_395
+395	val_395
+396	val_396
+396	val_396
+396	val_396
+397	val_397
+397	val_397
+399	val_399
+399	val_399
+400	val_400
+401	val_401
+401	val_401
+401	val_401
+401	val_401
+401	val_401
+402	val_402
+403	val_403
+403	val_403
+403	val_403
+404	val_404
+404	val_404
+406	val_406
+406	val_406
+406	val_406
+406	val_406
+407	val_407
+409	val_409
+409	val_409
+409	val_409
+411	val_411
+413	val_413
+413	val_413
+414	val_414
+414	val_414
+417	val_417
+417	val_417
+417	val_417
+418	val_418
+419	val_419
+421	val_421
+424	val_424
+424	val_424
+427	val_427
+429	val_429
+429	val_429
+430	val_430
+430	val_430
+430	val_430
+431	val_431
+431	val_431
+431	val_431
+432	val_432
+435	val_435
+436	val_436
+437	val_437
+438	val_438
+438	val_438
+438	val_438
+439	val_439
+439	val_439
+443	val_443
+444	val_444
+446	val_446
+448	val_448
+449	val_449
+452	val_452
+453	val_453
+454	val_454
+454	val_454
+454	val_454
+455	val_455
+457	val_457
+458	val_458
+458	val_458
+459	val_459
+459	val_459
+460	val_460
+462	val_462
+462	val_462
+463	val_463
+463	val_463
+466	val_466
+466	val_466
+466	val_466
+467	val_467
+468	val_468
+468	val_468
+468	val_468
+468	val_468
+469	val_469
+469	val_469
+469	val_469
+469	val_469
+469	val_469
+470	val_470
+472	val_472
+475	val_475
+477	val_477
+478	val_478
+478	val_478
+479	val_479
+480	val_480
+480	val_480
+480	val_480
+481	val_481
+482	val_482
+483	val_483
+484	val_484
+485	val_485
+487	val_487
+489	val_489
+489	val_489
+489	val_489
+489	val_489
+490	val_490
+491	val_491
+492	val_492
+492	val_492
+493	val_493
+494	val_494
+495	val_495
+496	val_496
+497	val_497
+498	val_498
+498	val_498
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/fileformat_mix-5-c2d0da9a0f01736a2163c99fc667f279 b/sql/hive/src/test/resources/golden/fileformat_mix-5-c2d0da9a0f01736a2163c99fc667f279
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/fileformat_mix-5-c2d0da9a0f01736a2163c99fc667f279
+++ b/sql/hive/src/test/resources/golden/fileformat_mix-5-c2d0da9a0f01736a2163c99fc667f279
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/fileformat_mix-6-4b658b3222b7a09ef41d023215e5b818 b/sql/hive/src/test/resources/golden/fileformat_mix-6-4b658b3222b7a09ef41d023215e5b818
index d23e05acf7ba5..e34118512c1d7 100644
--- a/sql/hive/src/test/resources/golden/fileformat_mix-6-4b658b3222b7a09ef41d023215e5b818
+++ b/sql/hive/src/test/resources/golden/fileformat_mix-6-4b658b3222b7a09ef41d023215e5b818
@@ -497,4 +497,4 @@
 403
 400
 200
-97
\ No newline at end of file
+97
diff --git a/sql/hive/src/test/resources/golden/fileformat_sequencefile-2-80ec34a069bc561aa6dc87314391b131 b/sql/hive/src/test/resources/golden/fileformat_sequencefile-2-80ec34a069bc561aa6dc87314391b131
index 6280b32facd66..1cf9f21c9ed88 100644
--- a/sql/hive/src/test/resources/golden/fileformat_sequencefile-2-80ec34a069bc561aa6dc87314391b131
+++ b/sql/hive/src/test/resources/golden/fileformat_sequencefile-2-80ec34a069bc561aa6dc87314391b131
@@ -1,4 +1,4 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:dest1, dbName:default, owner:marmbrus, createTime:1398823397, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse6323689881248298063/dest1, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1398823397}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:dest1, dbName:default, owner:marmbrus, createTime:1413880056, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/dest1, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413880056}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/fileformat_text-2-80ec34a069bc561aa6dc87314391b131 b/sql/hive/src/test/resources/golden/fileformat_text-2-80ec34a069bc561aa6dc87314391b131
index e793ec2f946e5..62d6734063fdd 100644
--- a/sql/hive/src/test/resources/golden/fileformat_text-2-80ec34a069bc561aa6dc87314391b131
+++ b/sql/hive/src/test/resources/golden/fileformat_text-2-80ec34a069bc561aa6dc87314391b131
@@ -1,4 +1,4 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:dest1, dbName:default, owner:marmbrus, createTime:1398823407, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse6323689881248298063/dest1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1398823407}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:dest1, dbName:default, owner:marmbrus, createTime:1413880064, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/dest1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413880064}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask-3-550e9b759fb088a81feddeff2e4be64e b/sql/hive/src/test/resources/golden/filter_join_breaktask-3-550e9b759fb088a81feddeff2e4be64e
index cc545367b951b..31956a614026d 100644
--- a/sql/hive/src/test/resources/golden/filter_join_breaktask-3-550e9b759fb088a81feddeff2e4be64e
+++ b/sql/hive/src/test/resources/golden/filter_join_breaktask-3-550e9b759fb088a81feddeff2e4be64e
@@ -9,4 +9,4 @@
 66	val_66
 213	val_213
 146	val_146
-406	val_406
\ No newline at end of file
+406	val_406
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask2-10-8dbe2f8cda7ab38b38fc75d0d2413efd b/sql/hive/src/test/resources/golden/filter_join_breaktask2-10-8dbe2f8cda7ab38b38fc75d0d2413efd
index 140c6590a27fc..51eb2d30c97eb 100644
--- a/sql/hive/src/test/resources/golden/filter_join_breaktask2-10-8dbe2f8cda7ab38b38fc75d0d2413efd
+++ b/sql/hive/src/test/resources/golden/filter_join_breaktask2-10-8dbe2f8cda7ab38b38fc75d0d2413efd
@@ -1 +1 @@
-4	5	0	2010-04-17
\ No newline at end of file
+4	5	0	2010-04-17
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask2-11-5cc7e24257a1cb4ad0f2fe41c7177370 b/sql/hive/src/test/resources/golden/filter_join_breaktask2-11-5cc7e24257a1cb4ad0f2fe41c7177370
index 51f5701ceae46..7dc9c13cfee04 100644
--- a/sql/hive/src/test/resources/golden/filter_join_breaktask2-11-5cc7e24257a1cb4ad0f2fe41c7177370
+++ b/sql/hive/src/test/resources/golden/filter_join_breaktask2-11-5cc7e24257a1cb4ad0f2fe41c7177370
@@ -1 +1 @@
-4	1	1	8	4	5	1	0	9	U	2	2	0	2	1	1	J	C	A	U	2	s	2	NULL	NULL	NULL	NULL	NULL	NULL	1	j	S	6	NULL	1	2	J	g	1	e	2	1	2	U	P	p	3	0	0	0	1	1	1	0	0	0	6	2	j	NULL	NULL	NULL	NULL	NULL	NULL	5	NULL	NULL	j	2	2	1	2	2	1	1	1	1	1	1	1	1	32	NULL	2010-04-17
\ No newline at end of file
+4	1	1	8	4	5	1	0	9	U	2	2	0	2	1	1	J	C	A	U	2	s	2	NULL	NULL	NULL	NULL	NULL	NULL	1	j	S	6	NULL	1	2	J	g	1	e	2	1	2	U	P	p	3	0	0	0	1	1	1	0	0	0	6	2	j	NULL	NULL	NULL	NULL	NULL	NULL	5	NULL	NULL	j	2	2	1	2	2	1	1	1	1	1	1	1	1	32	NULL	2010-04-17
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask2-12-c608f51882260caec7bb9c57a0ba3179 b/sql/hive/src/test/resources/golden/filter_join_breaktask2-12-c608f51882260caec7bb9c57a0ba3179
index 69dcc336a73f2..19a6df9f78cd7 100644
--- a/sql/hive/src/test/resources/golden/filter_join_breaktask2-12-c608f51882260caec7bb9c57a0ba3179
+++ b/sql/hive/src/test/resources/golden/filter_join_breaktask2-12-c608f51882260caec7bb9c57a0ba3179
@@ -1 +1 @@
-5	5	4
\ No newline at end of file
+5	5	4
diff --git a/sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-5-889714213a760ae9ab3ebe199eb30b62 b/sql/hive/src/test/resources/golden/filter_join_breaktask2-4-fe8b55e4e4098d7a2662338783a50306
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_formatted_view_partitioned_json-5-889714213a760ae9ab3ebe199eb30b62
rename to sql/hive/src/test/resources/golden/filter_join_breaktask2-4-fe8b55e4e4098d7a2662338783a50306
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-0-f34ca99310bf1d4793cf64423c024ad b/sql/hive/src/test/resources/golden/filter_join_breaktask2-5-cf724251613216ec54f8ac2e6b9b92fd
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_pretty-0-f34ca99310bf1d4793cf64423c024ad
rename to sql/hive/src/test/resources/golden/filter_join_breaktask2-5-cf724251613216ec54f8ac2e6b9b92fd
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-14-22db46f42dc0c1bf01a76ca360c20a7 b/sql/hive/src/test/resources/golden/filter_join_breaktask2-6-8c782ae8f8245bdbe90d068a6b577d1e
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_pretty-14-22db46f42dc0c1bf01a76ca360c20a7
rename to sql/hive/src/test/resources/golden/filter_join_breaktask2-6-8c782ae8f8245bdbe90d068a6b577d1e
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask2-8-604cf64d16b9d438ee187a10d9f6352e b/sql/hive/src/test/resources/golden/filter_join_breaktask2-8-604cf64d16b9d438ee187a10d9f6352e
index c0f577c3cd2fd..d6961f6e46e93 100644
--- a/sql/hive/src/test/resources/golden/filter_join_breaktask2-8-604cf64d16b9d438ee187a10d9f6352e
+++ b/sql/hive/src/test/resources/golden/filter_join_breaktask2-8-604cf64d16b9d438ee187a10d9f6352e
@@ -1 +1 @@
-5	name	NULL	2	kavin	NULL	9	c	8	0	0	7	1	2	0	3	2	NULL	1	NULL	3	2	0	0	5	10	2010-04-17
\ No newline at end of file
+5	name	NULL	2	kavin	NULL	9	c	8	0	0	7	1	2	0	3	2	NULL	1	NULL	3	2	0	0	5	10	2010-04-17
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask2-9-6d551990a8a745dde1cb5244947b81a1 b/sql/hive/src/test/resources/golden/filter_join_breaktask2-9-6d551990a8a745dde1cb5244947b81a1
index e426b4879bcb6..be23778bd2cc4 100644
--- a/sql/hive/src/test/resources/golden/filter_join_breaktask2-9-6d551990a8a745dde1cb5244947b81a1
+++ b/sql/hive/src/test/resources/golden/filter_join_breaktask2-9-6d551990a8a745dde1cb5244947b81a1
@@ -1 +1 @@
-5	1	1	1	0	0	4	2010-04-17
\ No newline at end of file
+5	1	1	1	0	0	4	2010-04-17
diff --git a/sql/hive/src/test/resources/golden/groupby1-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby1-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby1-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby1-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby1-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1-3-d57ed4bbfee1ffaffaeba0a4be84c31d b/sql/hive/src/test/resources/golden/groupby1-3-d57ed4bbfee1ffaffaeba0a4be84c31d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1-3-d57ed4bbfee1ffaffaeba0a4be84c31d
+++ b/sql/hive/src/test/resources/golden/groupby1-3-d57ed4bbfee1ffaffaeba0a4be84c31d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1-5-dd7bf298b8c921355edd8665c6b0c168 b/sql/hive/src/test/resources/golden/groupby1-5-dd7bf298b8c921355edd8665c6b0c168
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1-5-dd7bf298b8c921355edd8665c6b0c168
+++ b/sql/hive/src/test/resources/golden/groupby1-5-dd7bf298b8c921355edd8665c6b0c168
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1-7-c2c54378ffce53ade73a9dda783903e7 b/sql/hive/src/test/resources/golden/groupby1-7-c2c54378ffce53ade73a9dda783903e7
index 339756be98e73..4953d093489ee 100644
--- a/sql/hive/src/test/resources/golden/groupby1-7-c2c54378ffce53ade73a9dda783903e7
+++ b/sql/hive/src/test/resources/golden/groupby1-7-c2c54378ffce53ade73a9dda783903e7
@@ -306,4 +306,4 @@
 495	495.0
 496	496.0
 497	497.0
-498	1494.0
\ No newline at end of file
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby11-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby11-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby11-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby11-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby11-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby11-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby11-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby11-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby11-6-686ee4a5b2e24e51ba1d41b42215accd b/sql/hive/src/test/resources/golden/groupby11-6-686ee4a5b2e24e51ba1d41b42215accd
index 8939d9f8af186..07df09912b06e 100644
--- a/sql/hive/src/test/resources/golden/groupby11-6-686ee4a5b2e24e51ba1d41b42215accd
+++ b/sql/hive/src/test/resources/golden/groupby11-6-686ee4a5b2e24e51ba1d41b42215accd
@@ -306,4 +306,4 @@ val_92	1	1	111
 val_95	2	1	111
 val_96	1	1	111
 val_97	2	1	111
-val_98	2	1	111
\ No newline at end of file
+val_98	2	1	111
diff --git a/sql/hive/src/test/resources/golden/groupby11-7-149d359546ab38226ffeb023d7414b3d b/sql/hive/src/test/resources/golden/groupby11-7-149d359546ab38226ffeb023d7414b3d
index 82a0329b4d459..efbfb713807a0 100644
--- a/sql/hive/src/test/resources/golden/groupby11-7-149d359546ab38226ffeb023d7414b3d
+++ b/sql/hive/src/test/resources/golden/groupby11-7-149d359546ab38226ffeb023d7414b3d
@@ -306,4 +306,4 @@
 95	2	1	111
 96	1	1	111
 97	2	1	111
-98	2	1	111
\ No newline at end of file
+98	2	1	111
diff --git a/sql/hive/src/test/resources/golden/exim_11_managed_external-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/groupby12-0-67e864faaff4c6b2a8e1c9fbd188bb66
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_11_managed_external-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/groupby12-0-67e864faaff4c6b2a8e1c9fbd188bb66
diff --git a/sql/hive/src/test/resources/golden/describe_pretty-9-e382a994134aefcd2652b57af9195644 b/sql/hive/src/test/resources/golden/groupby12-1-13ab74a58da514fe01dbeda0c3e79883
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_pretty-9-e382a994134aefcd2652b57af9195644
rename to sql/hive/src/test/resources/golden/groupby12-1-13ab74a58da514fe01dbeda0c3e79883
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-0-5528e36b3b0f5b14313898cc45f9c23a b/sql/hive/src/test/resources/golden/groupby12-2-fd150794945892f3c926a1881cd819f4
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_syntax-0-5528e36b3b0f5b14313898cc45f9c23a
rename to sql/hive/src/test/resources/golden/groupby12-2-fd150794945892f3c926a1881cd819f4
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-1-4f3d3497418242124113538edab45df7 b/sql/hive/src/test/resources/golden/groupby12-3-8018bb917a0706925c14421ec2761663
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_syntax-1-4f3d3497418242124113538edab45df7
rename to sql/hive/src/test/resources/golden/groupby12-3-8018bb917a0706925c14421ec2761663
diff --git a/sql/hive/src/test/resources/golden/groupby12-4-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby12-4-adc1ec67836b26b60d8547c4996bfd8f
new file mode 100644
index 0000000000000..98f8836673e8a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby12-4-adc1ec67836b26b60d8547c4996bfd8f
@@ -0,0 +1,309 @@
+3	1
+1	1
+1	1
+3	1
+1	1
+1	1
+1	1
+1	1
+2	1
+2	1
+1	1
+2	1
+1	1
+1	1
+2	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+3	1
+2	1
+1	1
+2	1
+1	1
+1	1
+1	1
+2	1
+1	1
+1	1
+1	1
+2	1
+1	1
+1	1
+1	1
+2	1
+1	1
+3	1
+2	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+2	1
+2	1
+1	1
+1	1
+1	1
+3	1
+1	1
+2	1
+1	1
+2	1
+2	1
+2	1
+2	1
+2	1
+1	1
+1	1
+2	1
+1	1
+1	1
+2	1
+3	1
+2	1
+2	1
+1	1
+3	1
+2	1
+1	1
+1	1
+2	1
+1	1
+2	1
+4	1
+1	1
+1	1
+2	1
+2	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+2	1
+1	1
+3	1
+1	1
+4	1
+1	1
+2	1
+2	1
+2	1
+2	1
+1	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+3	1
+1	1
+1	1
+2	1
+1	1
+3	1
+1	1
+2	1
+1	1
+2	1
+3	1
+2	1
+1	1
+1	1
+2	1
+2	1
+2	1
+3	1
+2	1
+2	1
+1	1
+2	1
+2	1
+1	1
+2	1
+2	1
+1	1
+2	1
+2	1
+1	1
+1	1
+2	1
+5	1
+2	1
+1	1
+2	1
+2	1
+2	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+1	1
+2	1
+3	1
+1	1
+1	1
+4	1
+2	1
+2	1
+2	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+3	1
+1	1
+1	1
+1	1
+2	1
+1	1
+2	1
+1	1
+3	1
+1	1
+3	1
+2	1
+3	1
+2	1
+2	1
+1	1
+2	1
+3	1
+2	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+2	1
+1	1
+5	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+1	1
+3	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+3	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	1
+3	1
+2	1
+2	1
+1	1
+5	1
+1	1
+3	1
+2	1
+4	1
+1	1
+3	1
+1	1
+2	1
+2	1
+3	1
+1	1
+1	1
+1	1
+2	1
+1	1
+2	1
+3	1
+3	1
+1	1
+1	1
+1	1
+1	1
+3	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+3	1
+1	1
+1	1
+2	1
+2	1
+1	1
+2	1
+2	1
+3	1
+1	1
+4	1
+5	1
+1	1
+1	1
+1	1
+1	1
+2	1
+1	1
+3	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+4	1
+1	1
+1	1
+2	1
+1	1
+1	1
+1	1
+1	1
+1	1
+3	1
diff --git a/sql/hive/src/test/resources/golden/groupby1_limit-0-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby1_limit-0-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_limit-0-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby1_limit-0-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-2-b198700c0129910d6205ef063ee83d5a b/sql/hive/src/test/resources/golden/groupby1_limit-3-607512a8dd16cd9ddde561eeabfa51db
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_syntax-2-b198700c0129910d6205ef063ee83d5a
rename to sql/hive/src/test/resources/golden/groupby1_limit-3-607512a8dd16cd9ddde561eeabfa51db
diff --git a/sql/hive/src/test/resources/golden/groupby1_limit-4-7a5ee36a377f0869bcb8c29c2391caa2 b/sql/hive/src/test/resources/golden/groupby1_limit-4-7a5ee36a377f0869bcb8c29c2391caa2
index 9cf9606d751ac..f92f60d11d4ef 100644
--- a/sql/hive/src/test/resources/golden/groupby1_limit-4-7a5ee36a377f0869bcb8c29c2391caa2
+++ b/sql/hive/src/test/resources/golden/groupby1_limit-4-7a5ee36a377f0869bcb8c29c2391caa2
@@ -2,4 +2,4 @@
 2	2.0
 4	4.0
 5	15.0
-8	8.0
\ No newline at end of file
+8	8.0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby1_map-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby1_map-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby1_map-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby1_map-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby1_map-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby1_map-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby1_map-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby1_map-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby1_map-6-c83b156356c269b6aef263640a4f6b7b b/sql/hive/src/test/resources/golden/groupby1_map-6-c83b156356c269b6aef263640a4f6b7b
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby1_map-6-c83b156356c269b6aef263640a4f6b7b
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_nomap-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby1_map_nomap-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map_nomap-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby1_map_nomap-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_nomap-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby1_map_nomap-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map_nomap-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby1_map_nomap-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_nomap-2-fb2713a701e7e9c3fa36b5459d772f4 b/sql/hive/src/test/resources/golden/groupby1_map_nomap-2-fb2713a701e7e9c3fa36b5459d772f4
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map_nomap-2-fb2713a701e7e9c3fa36b5459d772f4
+++ b/sql/hive/src/test/resources/golden/groupby1_map_nomap-2-fb2713a701e7e9c3fa36b5459d772f4
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_nomap-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby1_map_nomap-6-adc1ec67836b26b60d8547c4996bfd8f
index 339756be98e73..4953d093489ee 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map_nomap-6-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby1_map_nomap-6-adc1ec67836b26b60d8547c4996bfd8f
@@ -306,4 +306,4 @@
 495	495.0
 496	496.0
 497	497.0
-498	1494.0
\ No newline at end of file
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_skew-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby1_map_skew-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map_skew-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby1_map_skew-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_skew-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby1_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby1_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_skew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby1_map_skew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_map_skew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby1_map_skew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby1_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby1_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby1_map_skew-6-c83b156356c269b6aef263640a4f6b7b b/sql/hive/src/test/resources/golden/groupby1_map_skew-6-c83b156356c269b6aef263640a4f6b7b
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby1_map_skew-6-c83b156356c269b6aef263640a4f6b7b
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby1_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby1_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby1_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_noskew-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby1_noskew-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_noskew-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby1_noskew-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_noskew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby1_noskew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby1_noskew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby1_noskew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_noskew-6-c2c54378ffce53ade73a9dda783903e7 b/sql/hive/src/test/resources/golden/groupby1_noskew-6-c2c54378ffce53ade73a9dda783903e7
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby1_noskew-6-c2c54378ffce53ade73a9dda783903e7
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby1_noskew-6-f7eda18efd187ec4bf4cb135833746cb b/sql/hive/src/test/resources/golden/groupby1_noskew-6-f7eda18efd187ec4bf4cb135833746cb
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby1_noskew-6-f7eda18efd187ec4bf4cb135833746cb
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby2-0-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/groupby2-0-43d53504df013e6b35f81811138a167a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2-0-43d53504df013e6b35f81811138a167a
+++ b/sql/hive/src/test/resources/golden/groupby2-0-43d53504df013e6b35f81811138a167a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2-1-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby2-1-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2-1-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby2-1-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2-2-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby2-2-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2-2-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby2-2-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2-6-41dfb7b036cae4972b275a0262ea2e4c b/sql/hive/src/test/resources/golden/groupby2-6-41dfb7b036cae4972b275a0262ea2e4c
index 3210a3048ff48..ba568b8fd6cf2 100644
--- a/sql/hive/src/test/resources/golden/groupby2-6-41dfb7b036cae4972b275a0262ea2e4c
+++ b/sql/hive/src/test/resources/golden/groupby2-6-41dfb7b036cae4972b275a0262ea2e4c
@@ -7,4 +7,4 @@
 6	5	6398.0
 7	6	7735.0
 8	8	8762.0
-9	7	91047.0
\ No newline at end of file
+9	7	91047.0
diff --git a/sql/hive/src/test/resources/golden/groupby2_limit-0-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby2_limit-0-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_limit-0-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby2_limit-0-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-3-458d6aaffeee94997f67a43b88382106 b/sql/hive/src/test/resources/golden/groupby2_limit-1-a56d6499aef913e11ef599ac8b4f2a25
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_syntax-3-458d6aaffeee94997f67a43b88382106
rename to sql/hive/src/test/resources/golden/groupby2_limit-1-a56d6499aef913e11ef599ac8b4f2a25
diff --git a/sql/hive/src/test/resources/golden/groupby2_limit-2-3460130ddc7cd91c0b1d5ce2ff98d0b9 b/sql/hive/src/test/resources/golden/groupby2_limit-2-3460130ddc7cd91c0b1d5ce2ff98d0b9
deleted file mode 100644
index 9cf9606d751ac..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby2_limit-2-3460130ddc7cd91c0b1d5ce2ff98d0b9
+++ /dev/null
@@ -1,5 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby2_limit-2-e7a95dc27fbfcb10bf92a6db61522b6a b/sql/hive/src/test/resources/golden/groupby2_limit-2-e7a95dc27fbfcb10bf92a6db61522b6a
new file mode 100644
index 0000000000000..f92f60d11d4ef
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby2_limit-2-e7a95dc27fbfcb10bf92a6db61522b6a
@@ -0,0 +1,5 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
diff --git a/sql/hive/src/test/resources/golden/groupby2_map-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby2_map-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_map-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby2_map-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_map-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby2_map-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_map-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby2_map-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_map-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby2_map-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_map-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby2_map-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_map-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby2_map-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 3210a3048ff48..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby2_map-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0
-1	71	116414.0
-2	69	225571.0
-3	62	332004.0
-4	74	452763.0
-5	6	5397.0
-6	5	6398.0
-7	6	7735.0
-8	8	8762.0
-9	7	91047.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby2_map-6-c83b156356c269b6aef263640a4f6b7b b/sql/hive/src/test/resources/golden/groupby2_map-6-c83b156356c269b6aef263640a4f6b7b
new file mode 100644
index 0000000000000..ba568b8fd6cf2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby2_map-6-c83b156356c269b6aef263640a4f6b7b
@@ -0,0 +1,10 @@
+0	1	00.0
+1	71	116414.0
+2	69	225571.0
+3	62	332004.0
+4	74	452763.0
+5	6	5397.0
+6	5	6398.0
+7	6	7735.0
+8	8	8762.0
+9	7	91047.0
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 922f4bfc83e44..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0	0	3
-1	71	116414.0	10044	115
-2	69	225571.0	15780	111
-3	62	332004.0	20119	99
-4	74	452763.0	30965	124
-5	6	5397.0	278	10
-6	5	6398.0	331	6
-7	6	7735.0	447	10
-8	8	8762.0	595	10
-9	7	91047.0	577	12
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_skew-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby2_map_skew-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_map_skew-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby2_map_skew-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_skew-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby2_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby2_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_skew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby2_map_skew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_map_skew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby2_map_skew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby2_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
index 3210a3048ff48..ba568b8fd6cf2 100644
--- a/sql/hive/src/test/resources/golden/groupby2_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby2_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
@@ -7,4 +7,4 @@
 6	5	6398.0
 7	6	7735.0
 8	8	8762.0
-9	7	91047.0
\ No newline at end of file
+9	7	91047.0
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby2_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby2_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby2_noskew-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_noskew-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby2_noskew-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby2_noskew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby2_noskew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby2_noskew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew-6-232957c90d04497da7fe4e599e0dbb94 b/sql/hive/src/test/resources/golden/groupby2_noskew-6-232957c90d04497da7fe4e599e0dbb94
new file mode 100644
index 0000000000000..ba568b8fd6cf2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby2_noskew-6-232957c90d04497da7fe4e599e0dbb94
@@ -0,0 +1,10 @@
+0	1	00.0
+1	71	116414.0
+2	69	225571.0
+3	62	332004.0
+4	74	452763.0
+5	6	5397.0
+6	5	6398.0
+7	6	7735.0
+8	8	8762.0
+9	7	91047.0
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew-6-41dfb7b036cae4972b275a0262ea2e4c b/sql/hive/src/test/resources/golden/groupby2_noskew-6-41dfb7b036cae4972b275a0262ea2e4c
deleted file mode 100644
index 3210a3048ff48..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby2_noskew-6-41dfb7b036cae4972b275a0262ea2e4c
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0
-1	71	116414.0
-2	69	225571.0
-3	62	332004.0
-4	74	452763.0
-5	6	5397.0
-6	5	6398.0
-7	6	7735.0
-8	8	8762.0
-9	7	91047.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-6-41dfb7b036cae4972b275a0262ea2e4c b/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-6-41dfb7b036cae4972b275a0262ea2e4c
deleted file mode 100644
index 922f4bfc83e44..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-6-41dfb7b036cae4972b275a0262ea2e4c
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0	0	3
-1	71	116414.0	10044	115
-2	69	225571.0	15780	111
-3	62	332004.0	20119	99
-4	74	452763.0	30965	124
-5	6	5397.0	278	10
-6	5	6398.0	331	6
-7	6	7735.0	447	10
-8	8	8762.0	595	10
-9	7	91047.0	577	12
diff --git a/sql/hive/src/test/resources/golden/groupby4-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby4-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby4-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby4-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby4-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4-5-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby4-5-adc1ec67836b26b60d8547c4996bfd8f
index f55b5c9eef39f..8b1acc12b635c 100644
--- a/sql/hive/src/test/resources/golden/groupby4-5-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby4-5-adc1ec67836b26b60d8547c4996bfd8f
@@ -7,4 +7,4 @@
 6
 7
 8
-9
\ No newline at end of file
+9
diff --git a/sql/hive/src/test/resources/golden/groupby4_map-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby4_map-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_map-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby4_map-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_map-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby4_map-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_map-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby4_map-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_map-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby4_map-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_map-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby4_map-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_map-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby4_map-6-adc1ec67836b26b60d8547c4996bfd8f
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/groupby4_map-6-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby4_map-6-adc1ec67836b26b60d8547c4996bfd8f
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/groupby4_map_skew-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby4_map_skew-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_map_skew-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby4_map_skew-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_map_skew-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby4_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby4_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_map_skew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby4_map_skew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_map_skew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby4_map_skew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby4_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/groupby4_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby4_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/groupby4_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby4_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby4_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_noskew-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby4_noskew-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_noskew-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby4_noskew-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_noskew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby4_noskew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby4_noskew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby4_noskew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby4_noskew-6-2d1fb04c7060fafe470e14061b2a5b6f b/sql/hive/src/test/resources/golden/groupby4_noskew-6-2d1fb04c7060fafe470e14061b2a5b6f
new file mode 100644
index 0000000000000..8b1acc12b635c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby4_noskew-6-2d1fb04c7060fafe470e14061b2a5b6f
@@ -0,0 +1,10 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
diff --git a/sql/hive/src/test/resources/golden/groupby4_noskew-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby4_noskew-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index f55b5c9eef39f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby4_noskew-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby5-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby5-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby5-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby5-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby5-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5-5-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby5-5-adc1ec67836b26b60d8547c4996bfd8f
index 339756be98e73..4953d093489ee 100644
--- a/sql/hive/src/test/resources/golden/groupby5-5-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby5-5-adc1ec67836b26b60d8547c4996bfd8f
@@ -306,4 +306,4 @@
 495	495.0
 496	496.0
 497	497.0
-498	1494.0
\ No newline at end of file
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby5_map-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby5_map-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_map-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby5_map-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_map-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby5_map-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_map-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby5_map-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_map-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby5_map-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_map-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby5_map-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_map-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby5_map-6-adc1ec67836b26b60d8547c4996bfd8f
index 349d8b75d942b..3975bfc1af512 100644
--- a/sql/hive/src/test/resources/golden/groupby5_map-6-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby5_map-6-adc1ec67836b26b60d8547c4996bfd8f
@@ -1 +1 @@
-130091
\ No newline at end of file
+130091
diff --git a/sql/hive/src/test/resources/golden/groupby5_map_skew-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby5_map_skew-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_map_skew-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby5_map_skew-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_map_skew-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby5_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby5_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_map_skew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby5_map_skew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_map_skew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby5_map_skew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby5_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
index 349d8b75d942b..3975bfc1af512 100644
--- a/sql/hive/src/test/resources/golden/groupby5_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby5_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
@@ -1 +1 @@
-130091
\ No newline at end of file
+130091
diff --git a/sql/hive/src/test/resources/golden/groupby5_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby5_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby5_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_noskew-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby5_noskew-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_noskew-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby5_noskew-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_noskew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby5_noskew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby5_noskew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby5_noskew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby5_noskew-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby5_noskew-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby5_noskew-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby5_noskew-6-c83b156356c269b6aef263640a4f6b7b b/sql/hive/src/test/resources/golden/groupby5_noskew-6-c83b156356c269b6aef263640a4f6b7b
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby5_noskew-6-c83b156356c269b6aef263640a4f6b7b
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby6-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby6-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby6-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby6-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby6-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6-5-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby6-5-adc1ec67836b26b60d8547c4996bfd8f
index f55b5c9eef39f..8b1acc12b635c 100644
--- a/sql/hive/src/test/resources/golden/groupby6-5-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby6-5-adc1ec67836b26b60d8547c4996bfd8f
@@ -7,4 +7,4 @@
 6
 7
 8
-9
\ No newline at end of file
+9
diff --git a/sql/hive/src/test/resources/golden/groupby6_map-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby6_map-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_map-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby6_map-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_map-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby6_map-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_map-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby6_map-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_map-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby6_map-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_map-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby6_map-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_map-6-2d1fb04c7060fafe470e14061b2a5b6f b/sql/hive/src/test/resources/golden/groupby6_map-6-2d1fb04c7060fafe470e14061b2a5b6f
new file mode 100644
index 0000000000000..8b1acc12b635c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby6_map-6-2d1fb04c7060fafe470e14061b2a5b6f
@@ -0,0 +1,10 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
diff --git a/sql/hive/src/test/resources/golden/groupby6_map-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby6_map-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index f55b5c9eef39f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby6_map-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby6_map_skew-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby6_map_skew-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_map_skew-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby6_map_skew-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_map_skew-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby6_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby6_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_map_skew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby6_map_skew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_map_skew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby6_map_skew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_map_skew-6-2d1fb04c7060fafe470e14061b2a5b6f b/sql/hive/src/test/resources/golden/groupby6_map_skew-6-2d1fb04c7060fafe470e14061b2a5b6f
new file mode 100644
index 0000000000000..8b1acc12b635c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby6_map_skew-6-2d1fb04c7060fafe470e14061b2a5b6f
@@ -0,0 +1,10 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
diff --git a/sql/hive/src/test/resources/golden/groupby6_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby6_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index f55b5c9eef39f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby6_map_skew-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby6_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby6_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby6_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_noskew-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby6_noskew-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_noskew-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby6_noskew-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_noskew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby6_noskew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby6_noskew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby6_noskew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby6_noskew-6-2d1fb04c7060fafe470e14061b2a5b6f b/sql/hive/src/test/resources/golden/groupby6_noskew-6-2d1fb04c7060fafe470e14061b2a5b6f
new file mode 100644
index 0000000000000..8b1acc12b635c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby6_noskew-6-2d1fb04c7060fafe470e14061b2a5b6f
@@ -0,0 +1,10 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
diff --git a/sql/hive/src/test/resources/golden/groupby6_noskew-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby6_noskew-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index f55b5c9eef39f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby6_noskew-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby7-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby7-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby7-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby7-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7-4-9d01ff3d1fde3ed2ab55ea9d7079fd5c b/sql/hive/src/test/resources/golden/groupby7-4-9d01ff3d1fde3ed2ab55ea9d7079fd5c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7-4-9d01ff3d1fde3ed2ab55ea9d7079fd5c
+++ b/sql/hive/src/test/resources/golden/groupby7-4-9d01ff3d1fde3ed2ab55ea9d7079fd5c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7-5-ed76c0068780120a6f23feefee303403 b/sql/hive/src/test/resources/golden/groupby7-5-ed76c0068780120a6f23feefee303403
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7-5-ed76c0068780120a6f23feefee303403
+++ b/sql/hive/src/test/resources/golden/groupby7-5-ed76c0068780120a6f23feefee303403
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7-7-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby7-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
index 339756be98e73..4953d093489ee 100644
--- a/sql/hive/src/test/resources/golden/groupby7-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/groupby7-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -306,4 +306,4 @@
 495	495.0
 496	496.0
 497	497.0
-498	1494.0
\ No newline at end of file
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7-8-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby7-8-404392d6faff5db5f36b4aa87ac8e8c9
index 339756be98e73..4953d093489ee 100644
--- a/sql/hive/src/test/resources/golden/groupby7-8-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/groupby7-8-404392d6faff5db5f36b4aa87ac8e8c9
@@ -306,4 +306,4 @@
 495	495.0
 496	496.0
 497	497.0
-498	1494.0
\ No newline at end of file
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby7_map-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby7_map-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-1-7f98b724df05f51b3ec1f087a8da414e b/sql/hive/src/test/resources/golden/groupby7_map-1-7f98b724df05f51b3ec1f087a8da414e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map-1-7f98b724df05f51b3ec1f087a8da414e
+++ b/sql/hive/src/test/resources/golden/groupby7_map-1-7f98b724df05f51b3ec1f087a8da414e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-10-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby7_map-10-a6b1560ec2eb0bd64c9dd2c8daae99c5
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby7_map-10-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-10-c1a8cf4ef8060a4703b0affe40496169 b/sql/hive/src/test/resources/golden/groupby7_map-10-c1a8cf4ef8060a4703b0affe40496169
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby7_map-10-c1a8cf4ef8060a4703b0affe40496169
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-11-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby7_map-11-404392d6faff5db5f36b4aa87ac8e8c9
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby7_map-11-404392d6faff5db5f36b4aa87ac8e8c9
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-11-6c26c5d39c4bdb61728defa9b44bfb52 b/sql/hive/src/test/resources/golden/groupby7_map-11-6c26c5d39c4bdb61728defa9b44bfb52
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby7_map-11-6c26c5d39c4bdb61728defa9b44bfb52
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-2-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby7_map-2-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map-2-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby7_map-2-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-3-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby7_map-3-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map-3-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby7_map-3-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-6-9d01ff3d1fde3ed2ab55ea9d7079fd5c b/sql/hive/src/test/resources/golden/groupby7_map-6-9d01ff3d1fde3ed2ab55ea9d7079fd5c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map-6-9d01ff3d1fde3ed2ab55ea9d7079fd5c
+++ b/sql/hive/src/test/resources/golden/groupby7_map-6-9d01ff3d1fde3ed2ab55ea9d7079fd5c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map-7-ed76c0068780120a6f23feefee303403 b/sql/hive/src/test/resources/golden/groupby7_map-7-ed76c0068780120a6f23feefee303403
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map-7-ed76c0068780120a6f23feefee303403
+++ b/sql/hive/src/test/resources/golden/groupby7_map-7-ed76c0068780120a6f23feefee303403
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-10-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-10-404392d6faff5db5f36b4aa87ac8e8c9
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-10-404392d6faff5db5f36b4aa87ac8e8c9
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-10-6c26c5d39c4bdb61728defa9b44bfb52 b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-10-6c26c5d39c4bdb61728defa9b44bfb52
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-10-6c26c5d39c4bdb61728defa9b44bfb52
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
+++ b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-6-ed76c0068780120a6f23feefee303403 b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-6-ed76c0068780120a6f23feefee303403
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-6-ed76c0068780120a6f23feefee303403
+++ b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-6-ed76c0068780120a6f23feefee303403
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-9-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-9-a6b1560ec2eb0bd64c9dd2c8daae99c5
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-9-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-9-c1a8cf4ef8060a4703b0affe40496169 b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-9-c1a8cf4ef8060a4703b0affe40496169
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby7_map_multi_single_reducer-9-c1a8cf4ef8060a4703b0affe40496169
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby7_map_skew-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_skew-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby7_map_skew-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby7_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby7_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-10-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby7_map_skew-10-404392d6faff5db5f36b4aa87ac8e8c9
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby7_map_skew-10-404392d6faff5db5f36b4aa87ac8e8c9
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-10-6c26c5d39c4bdb61728defa9b44bfb52 b/sql/hive/src/test/resources/golden/groupby7_map_skew-10-6c26c5d39c4bdb61728defa9b44bfb52
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby7_map_skew-10-6c26c5d39c4bdb61728defa9b44bfb52
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby7_map_skew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_skew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby7_map_skew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c b/sql/hive/src/test/resources/golden/groupby7_map_skew-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_skew-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
+++ b/sql/hive/src/test/resources/golden/groupby7_map_skew-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-6-ed76c0068780120a6f23feefee303403 b/sql/hive/src/test/resources/golden/groupby7_map_skew-6-ed76c0068780120a6f23feefee303403
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_map_skew-6-ed76c0068780120a6f23feefee303403
+++ b/sql/hive/src/test/resources/golden/groupby7_map_skew-6-ed76c0068780120a6f23feefee303403
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-9-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby7_map_skew-9-a6b1560ec2eb0bd64c9dd2c8daae99c5
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby7_map_skew-9-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7_map_skew-9-c1a8cf4ef8060a4703b0affe40496169 b/sql/hive/src/test/resources/golden/groupby7_map_skew-9-c1a8cf4ef8060a4703b0affe40496169
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby7_map_skew-9-c1a8cf4ef8060a4703b0affe40496169
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby7_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-1-7f98b724df05f51b3ec1f087a8da414e b/sql/hive/src/test/resources/golden/groupby7_noskew-1-7f98b724df05f51b3ec1f087a8da414e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew-1-7f98b724df05f51b3ec1f087a8da414e
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew-1-7f98b724df05f51b3ec1f087a8da414e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-10-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby7_noskew-10-a6b1560ec2eb0bd64c9dd2c8daae99c5
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby7_noskew-10-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-10-c1a8cf4ef8060a4703b0affe40496169 b/sql/hive/src/test/resources/golden/groupby7_noskew-10-c1a8cf4ef8060a4703b0affe40496169
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew-10-c1a8cf4ef8060a4703b0affe40496169
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-11-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby7_noskew-11-404392d6faff5db5f36b4aa87ac8e8c9
deleted file mode 100644
index 339756be98e73..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby7_noskew-11-404392d6faff5db5f36b4aa87ac8e8c9
+++ /dev/null
@@ -1,309 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
-17	17.0
-18	36.0
-19	19.0
-20	20.0
-24	48.0
-26	52.0
-27	27.0
-28	28.0
-30	30.0
-33	33.0
-34	34.0
-35	105.0
-37	74.0
-41	41.0
-42	84.0
-43	43.0
-44	44.0
-47	47.0
-51	102.0
-53	53.0
-54	54.0
-57	57.0
-58	116.0
-64	64.0
-65	65.0
-66	66.0
-67	134.0
-69	69.0
-70	210.0
-72	144.0
-74	74.0
-76	152.0
-77	77.0
-78	78.0
-80	80.0
-82	82.0
-83	166.0
-84	168.0
-85	85.0
-86	86.0
-87	87.0
-90	270.0
-92	92.0
-95	190.0
-96	96.0
-97	194.0
-98	196.0
-100	200.0
-103	206.0
-104	208.0
-105	105.0
-111	111.0
-113	226.0
-114	114.0
-116	116.0
-118	236.0
-119	357.0
-120	240.0
-125	250.0
-126	126.0
-128	384.0
-129	258.0
-131	131.0
-133	133.0
-134	268.0
-136	136.0
-137	274.0
-138	552.0
-143	143.0
-145	145.0
-146	292.0
-149	298.0
-150	150.0
-152	304.0
-153	153.0
-155	155.0
-156	156.0
-157	157.0
-158	158.0
-160	160.0
-162	162.0
-163	163.0
-164	328.0
-165	330.0
-166	166.0
-167	501.0
-168	168.0
-169	676.0
-170	170.0
-172	344.0
-174	348.0
-175	350.0
-176	352.0
-177	177.0
-178	178.0
-179	358.0
-180	180.0
-181	181.0
-183	183.0
-186	186.0
-187	561.0
-189	189.0
-190	190.0
-191	382.0
-192	192.0
-193	579.0
-194	194.0
-195	390.0
-196	196.0
-197	394.0
-199	597.0
-200	400.0
-201	201.0
-202	202.0
-203	406.0
-205	410.0
-207	414.0
-208	624.0
-209	418.0
-213	426.0
-214	214.0
-216	432.0
-217	434.0
-218	218.0
-219	438.0
-221	442.0
-222	222.0
-223	446.0
-224	448.0
-226	226.0
-228	228.0
-229	458.0
-230	1150.0
-233	466.0
-235	235.0
-237	474.0
-238	476.0
-239	478.0
-241	241.0
-242	484.0
-244	244.0
-247	247.0
-248	248.0
-249	249.0
-252	252.0
-255	510.0
-256	512.0
-257	257.0
-258	258.0
-260	260.0
-262	262.0
-263	263.0
-265	530.0
-266	266.0
-272	544.0
-273	819.0
-274	274.0
-275	275.0
-277	1108.0
-278	556.0
-280	560.0
-281	562.0
-282	564.0
-283	283.0
-284	284.0
-285	285.0
-286	286.0
-287	287.0
-288	576.0
-289	289.0
-291	291.0
-292	292.0
-296	296.0
-298	894.0
-302	302.0
-305	305.0
-306	306.0
-307	614.0
-308	308.0
-309	618.0
-310	310.0
-311	933.0
-315	315.0
-316	948.0
-317	634.0
-318	954.0
-321	642.0
-322	644.0
-323	323.0
-325	650.0
-327	981.0
-331	662.0
-332	332.0
-333	666.0
-335	335.0
-336	336.0
-338	338.0
-339	339.0
-341	341.0
-342	684.0
-344	688.0
-345	345.0
-348	1740.0
-351	351.0
-353	706.0
-356	356.0
-360	360.0
-362	362.0
-364	364.0
-365	365.0
-366	366.0
-367	734.0
-368	368.0
-369	1107.0
-373	373.0
-374	374.0
-375	375.0
-377	377.0
-378	378.0
-379	379.0
-382	764.0
-384	1152.0
-386	386.0
-389	389.0
-392	392.0
-393	393.0
-394	394.0
-395	790.0
-396	1188.0
-397	794.0
-399	798.0
-400	400.0
-401	2005.0
-402	402.0
-403	1209.0
-404	808.0
-406	1624.0
-407	407.0
-409	1227.0
-411	411.0
-413	826.0
-414	828.0
-417	1251.0
-418	418.0
-419	419.0
-421	421.0
-424	848.0
-427	427.0
-429	858.0
-430	1290.0
-431	1293.0
-432	432.0
-435	435.0
-436	436.0
-437	437.0
-438	1314.0
-439	878.0
-443	443.0
-444	444.0
-446	446.0
-448	448.0
-449	449.0
-452	452.0
-453	453.0
-454	1362.0
-455	455.0
-457	457.0
-458	916.0
-459	918.0
-460	460.0
-462	924.0
-463	926.0
-466	1398.0
-467	467.0
-468	1872.0
-469	2345.0
-470	470.0
-472	472.0
-475	475.0
-477	477.0
-478	956.0
-479	479.0
-480	1440.0
-481	481.0
-482	482.0
-483	483.0
-484	484.0
-485	485.0
-487	487.0
-489	1956.0
-490	490.0
-491	491.0
-492	984.0
-493	493.0
-494	494.0
-495	495.0
-496	496.0
-497	497.0
-498	1494.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-11-6c26c5d39c4bdb61728defa9b44bfb52 b/sql/hive/src/test/resources/golden/groupby7_noskew-11-6c26c5d39c4bdb61728defa9b44bfb52
new file mode 100644
index 0000000000000..4953d093489ee
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew-11-6c26c5d39c4bdb61728defa9b44bfb52
@@ -0,0 +1,309 @@
+0	0.0
+2	2.0
+4	4.0
+5	15.0
+8	8.0
+9	9.0
+10	10.0
+11	11.0
+12	24.0
+15	30.0
+17	17.0
+18	36.0
+19	19.0
+20	20.0
+24	48.0
+26	52.0
+27	27.0
+28	28.0
+30	30.0
+33	33.0
+34	34.0
+35	105.0
+37	74.0
+41	41.0
+42	84.0
+43	43.0
+44	44.0
+47	47.0
+51	102.0
+53	53.0
+54	54.0
+57	57.0
+58	116.0
+64	64.0
+65	65.0
+66	66.0
+67	134.0
+69	69.0
+70	210.0
+72	144.0
+74	74.0
+76	152.0
+77	77.0
+78	78.0
+80	80.0
+82	82.0
+83	166.0
+84	168.0
+85	85.0
+86	86.0
+87	87.0
+90	270.0
+92	92.0
+95	190.0
+96	96.0
+97	194.0
+98	196.0
+100	200.0
+103	206.0
+104	208.0
+105	105.0
+111	111.0
+113	226.0
+114	114.0
+116	116.0
+118	236.0
+119	357.0
+120	240.0
+125	250.0
+126	126.0
+128	384.0
+129	258.0
+131	131.0
+133	133.0
+134	268.0
+136	136.0
+137	274.0
+138	552.0
+143	143.0
+145	145.0
+146	292.0
+149	298.0
+150	150.0
+152	304.0
+153	153.0
+155	155.0
+156	156.0
+157	157.0
+158	158.0
+160	160.0
+162	162.0
+163	163.0
+164	328.0
+165	330.0
+166	166.0
+167	501.0
+168	168.0
+169	676.0
+170	170.0
+172	344.0
+174	348.0
+175	350.0
+176	352.0
+177	177.0
+178	178.0
+179	358.0
+180	180.0
+181	181.0
+183	183.0
+186	186.0
+187	561.0
+189	189.0
+190	190.0
+191	382.0
+192	192.0
+193	579.0
+194	194.0
+195	390.0
+196	196.0
+197	394.0
+199	597.0
+200	400.0
+201	201.0
+202	202.0
+203	406.0
+205	410.0
+207	414.0
+208	624.0
+209	418.0
+213	426.0
+214	214.0
+216	432.0
+217	434.0
+218	218.0
+219	438.0
+221	442.0
+222	222.0
+223	446.0
+224	448.0
+226	226.0
+228	228.0
+229	458.0
+230	1150.0
+233	466.0
+235	235.0
+237	474.0
+238	476.0
+239	478.0
+241	241.0
+242	484.0
+244	244.0
+247	247.0
+248	248.0
+249	249.0
+252	252.0
+255	510.0
+256	512.0
+257	257.0
+258	258.0
+260	260.0
+262	262.0
+263	263.0
+265	530.0
+266	266.0
+272	544.0
+273	819.0
+274	274.0
+275	275.0
+277	1108.0
+278	556.0
+280	560.0
+281	562.0
+282	564.0
+283	283.0
+284	284.0
+285	285.0
+286	286.0
+287	287.0
+288	576.0
+289	289.0
+291	291.0
+292	292.0
+296	296.0
+298	894.0
+302	302.0
+305	305.0
+306	306.0
+307	614.0
+308	308.0
+309	618.0
+310	310.0
+311	933.0
+315	315.0
+316	948.0
+317	634.0
+318	954.0
+321	642.0
+322	644.0
+323	323.0
+325	650.0
+327	981.0
+331	662.0
+332	332.0
+333	666.0
+335	335.0
+336	336.0
+338	338.0
+339	339.0
+341	341.0
+342	684.0
+344	688.0
+345	345.0
+348	1740.0
+351	351.0
+353	706.0
+356	356.0
+360	360.0
+362	362.0
+364	364.0
+365	365.0
+366	366.0
+367	734.0
+368	368.0
+369	1107.0
+373	373.0
+374	374.0
+375	375.0
+377	377.0
+378	378.0
+379	379.0
+382	764.0
+384	1152.0
+386	386.0
+389	389.0
+392	392.0
+393	393.0
+394	394.0
+395	790.0
+396	1188.0
+397	794.0
+399	798.0
+400	400.0
+401	2005.0
+402	402.0
+403	1209.0
+404	808.0
+406	1624.0
+407	407.0
+409	1227.0
+411	411.0
+413	826.0
+414	828.0
+417	1251.0
+418	418.0
+419	419.0
+421	421.0
+424	848.0
+427	427.0
+429	858.0
+430	1290.0
+431	1293.0
+432	432.0
+435	435.0
+436	436.0
+437	437.0
+438	1314.0
+439	878.0
+443	443.0
+444	444.0
+446	446.0
+448	448.0
+449	449.0
+452	452.0
+453	453.0
+454	1362.0
+455	455.0
+457	457.0
+458	916.0
+459	918.0
+460	460.0
+462	924.0
+463	926.0
+466	1398.0
+467	467.0
+468	1872.0
+469	2345.0
+470	470.0
+472	472.0
+475	475.0
+477	477.0
+478	956.0
+479	479.0
+480	1440.0
+481	481.0
+482	482.0
+483	483.0
+484	484.0
+485	485.0
+487	487.0
+489	1956.0
+490	490.0
+491	491.0
+492	984.0
+493	493.0
+494	494.0
+495	495.0
+496	496.0
+497	497.0
+498	1494.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-2-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby7_noskew-2-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew-2-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew-2-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-3-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby7_noskew-3-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew-3-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew-3-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-6-9d01ff3d1fde3ed2ab55ea9d7079fd5c b/sql/hive/src/test/resources/golden/groupby7_noskew-6-9d01ff3d1fde3ed2ab55ea9d7079fd5c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew-6-9d01ff3d1fde3ed2ab55ea9d7079fd5c
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew-6-9d01ff3d1fde3ed2ab55ea9d7079fd5c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew-7-ed76c0068780120a6f23feefee303403 b/sql/hive/src/test/resources/golden/groupby7_noskew-7-ed76c0068780120a6f23feefee303403
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew-7-ed76c0068780120a6f23feefee303403
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew-7-ed76c0068780120a6f23feefee303403
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-10-96d0598a2a4c17041a31b908d1f065e5 b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-10-96d0598a2a4c17041a31b908d1f065e5
index dfca4e169cbe8..951e74db0fe23 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-10-96d0598a2a4c17041a31b908d1f065e5
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-10-96d0598a2a4c17041a31b908d1f065e5
@@ -7,4 +7,4 @@
 10	10.0
 11	11.0
 12	24.0
-15	30.0
\ No newline at end of file
+15	30.0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-5-9d01ff3d1fde3ed2ab55ea9d7079fd5c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-6-ed76c0068780120a6f23feefee303403 b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-6-ed76c0068780120a6f23feefee303403
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-6-ed76c0068780120a6f23feefee303403
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-6-ed76c0068780120a6f23feefee303403
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-4-67eeb4eddd6b4e905404dd32a9f1d9c2 b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-8-ab2390a3b0ba76907a7bee390b2924a1
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_syntax-4-67eeb4eddd6b4e905404dd32a9f1d9c2
rename to sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-8-ab2390a3b0ba76907a7bee390b2924a1
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-9-652510b8dc20117c65511f06e6e73d73 b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-9-652510b8dc20117c65511f06e6e73d73
index dfca4e169cbe8..951e74db0fe23 100644
--- a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-9-652510b8dc20117c65511f06e6e73d73
+++ b/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-9-652510b8dc20117c65511f06e6e73d73
@@ -7,4 +7,4 @@
 10	10.0
 11	11.0
 12	24.0
-15	30.0
\ No newline at end of file
+15	30.0
diff --git a/sql/hive/src/test/resources/golden/groupby8-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby8-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby8-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby8-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby8-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8-11-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby8-11-a6b1560ec2eb0bd64c9dd2c8daae99c5
index 326493a1d8cb6..8bfbbf1e69f57 100644
--- a/sql/hive/src/test/resources/golden/groupby8-11-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/groupby8-11-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	1
\ No newline at end of file
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8-12-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby8-12-404392d6faff5db5f36b4aa87ac8e8c9
index 326493a1d8cb6..8bfbbf1e69f57 100644
--- a/sql/hive/src/test/resources/golden/groupby8-12-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/groupby8-12-404392d6faff5db5f36b4aa87ac8e8c9
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	1
\ No newline at end of file
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8-6-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby8-6-a6b1560ec2eb0bd64c9dd2c8daae99c5
index 326493a1d8cb6..8bfbbf1e69f57 100644
--- a/sql/hive/src/test/resources/golden/groupby8-6-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/groupby8-6-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	1
\ No newline at end of file
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8-7-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby8-7-404392d6faff5db5f36b4aa87ac8e8c9
index 326493a1d8cb6..8bfbbf1e69f57 100644
--- a/sql/hive/src/test/resources/golden/groupby8-7-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/groupby8-7-404392d6faff5db5f36b4aa87ac8e8c9
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	1
\ No newline at end of file
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8-8-7f98b724df05f51b3ec1f087a8da414e b/sql/hive/src/test/resources/golden/groupby8-8-7f98b724df05f51b3ec1f087a8da414e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8-8-7f98b724df05f51b3ec1f087a8da414e
+++ b/sql/hive/src/test/resources/golden/groupby8-8-7f98b724df05f51b3ec1f087a8da414e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_map-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby8_map-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_map-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby8_map-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_map-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby8_map-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_map-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby8_map-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_map-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby8_map-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_map-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby8_map-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_map-7-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby8_map-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
deleted file mode 100644
index 326493a1d8cb6..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby8_map-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ /dev/null
@@ -1,309 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
-20	1
-24	1
-26	1
-27	1
-28	1
-30	1
-33	1
-34	1
-35	1
-37	1
-41	1
-42	1
-43	1
-44	1
-47	1
-51	1
-53	1
-54	1
-57	1
-58	1
-64	1
-65	1
-66	1
-67	1
-69	1
-70	1
-72	1
-74	1
-76	1
-77	1
-78	1
-80	1
-82	1
-83	1
-84	1
-85	1
-86	1
-87	1
-90	1
-92	1
-95	1
-96	1
-97	1
-98	1
-100	1
-103	1
-104	1
-105	1
-111	1
-113	1
-114	1
-116	1
-118	1
-119	1
-120	1
-125	1
-126	1
-128	1
-129	1
-131	1
-133	1
-134	1
-136	1
-137	1
-138	1
-143	1
-145	1
-146	1
-149	1
-150	1
-152	1
-153	1
-155	1
-156	1
-157	1
-158	1
-160	1
-162	1
-163	1
-164	1
-165	1
-166	1
-167	1
-168	1
-169	1
-170	1
-172	1
-174	1
-175	1
-176	1
-177	1
-178	1
-179	1
-180	1
-181	1
-183	1
-186	1
-187	1
-189	1
-190	1
-191	1
-192	1
-193	1
-194	1
-195	1
-196	1
-197	1
-199	1
-200	1
-201	1
-202	1
-203	1
-205	1
-207	1
-208	1
-209	1
-213	1
-214	1
-216	1
-217	1
-218	1
-219	1
-221	1
-222	1
-223	1
-224	1
-226	1
-228	1
-229	1
-230	1
-233	1
-235	1
-237	1
-238	1
-239	1
-241	1
-242	1
-244	1
-247	1
-248	1
-249	1
-252	1
-255	1
-256	1
-257	1
-258	1
-260	1
-262	1
-263	1
-265	1
-266	1
-272	1
-273	1
-274	1
-275	1
-277	1
-278	1
-280	1
-281	1
-282	1
-283	1
-284	1
-285	1
-286	1
-287	1
-288	1
-289	1
-291	1
-292	1
-296	1
-298	1
-302	1
-305	1
-306	1
-307	1
-308	1
-309	1
-310	1
-311	1
-315	1
-316	1
-317	1
-318	1
-321	1
-322	1
-323	1
-325	1
-327	1
-331	1
-332	1
-333	1
-335	1
-336	1
-338	1
-339	1
-341	1
-342	1
-344	1
-345	1
-348	1
-351	1
-353	1
-356	1
-360	1
-362	1
-364	1
-365	1
-366	1
-367	1
-368	1
-369	1
-373	1
-374	1
-375	1
-377	1
-378	1
-379	1
-382	1
-384	1
-386	1
-389	1
-392	1
-393	1
-394	1
-395	1
-396	1
-397	1
-399	1
-400	1
-401	1
-402	1
-403	1
-404	1
-406	1
-407	1
-409	1
-411	1
-413	1
-414	1
-417	1
-418	1
-419	1
-421	1
-424	1
-427	1
-429	1
-430	1
-431	1
-432	1
-435	1
-436	1
-437	1
-438	1
-439	1
-443	1
-444	1
-446	1
-448	1
-449	1
-452	1
-453	1
-454	1
-455	1
-457	1
-458	1
-459	1
-460	1
-462	1
-463	1
-466	1
-467	1
-468	1
-469	1
-470	1
-472	1
-475	1
-477	1
-478	1
-479	1
-480	1
-481	1
-482	1
-483	1
-484	1
-485	1
-487	1
-489	1
-490	1
-491	1
-492	1
-493	1
-494	1
-495	1
-496	1
-497	1
-498	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby8_map-7-c1a8cf4ef8060a4703b0affe40496169 b/sql/hive/src/test/resources/golden/groupby8_map-7-c1a8cf4ef8060a4703b0affe40496169
new file mode 100644
index 0000000000000..8bfbbf1e69f57
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby8_map-7-c1a8cf4ef8060a4703b0affe40496169
@@ -0,0 +1,309 @@
+0	1
+2	1
+4	1
+5	1
+8	1
+9	1
+10	1
+11	1
+12	1
+15	1
+17	1
+18	1
+19	1
+20	1
+24	1
+26	1
+27	1
+28	1
+30	1
+33	1
+34	1
+35	1
+37	1
+41	1
+42	1
+43	1
+44	1
+47	1
+51	1
+53	1
+54	1
+57	1
+58	1
+64	1
+65	1
+66	1
+67	1
+69	1
+70	1
+72	1
+74	1
+76	1
+77	1
+78	1
+80	1
+82	1
+83	1
+84	1
+85	1
+86	1
+87	1
+90	1
+92	1
+95	1
+96	1
+97	1
+98	1
+100	1
+103	1
+104	1
+105	1
+111	1
+113	1
+114	1
+116	1
+118	1
+119	1
+120	1
+125	1
+126	1
+128	1
+129	1
+131	1
+133	1
+134	1
+136	1
+137	1
+138	1
+143	1
+145	1
+146	1
+149	1
+150	1
+152	1
+153	1
+155	1
+156	1
+157	1
+158	1
+160	1
+162	1
+163	1
+164	1
+165	1
+166	1
+167	1
+168	1
+169	1
+170	1
+172	1
+174	1
+175	1
+176	1
+177	1
+178	1
+179	1
+180	1
+181	1
+183	1
+186	1
+187	1
+189	1
+190	1
+191	1
+192	1
+193	1
+194	1
+195	1
+196	1
+197	1
+199	1
+200	1
+201	1
+202	1
+203	1
+205	1
+207	1
+208	1
+209	1
+213	1
+214	1
+216	1
+217	1
+218	1
+219	1
+221	1
+222	1
+223	1
+224	1
+226	1
+228	1
+229	1
+230	1
+233	1
+235	1
+237	1
+238	1
+239	1
+241	1
+242	1
+244	1
+247	1
+248	1
+249	1
+252	1
+255	1
+256	1
+257	1
+258	1
+260	1
+262	1
+263	1
+265	1
+266	1
+272	1
+273	1
+274	1
+275	1
+277	1
+278	1
+280	1
+281	1
+282	1
+283	1
+284	1
+285	1
+286	1
+287	1
+288	1
+289	1
+291	1
+292	1
+296	1
+298	1
+302	1
+305	1
+306	1
+307	1
+308	1
+309	1
+310	1
+311	1
+315	1
+316	1
+317	1
+318	1
+321	1
+322	1
+323	1
+325	1
+327	1
+331	1
+332	1
+333	1
+335	1
+336	1
+338	1
+339	1
+341	1
+342	1
+344	1
+345	1
+348	1
+351	1
+353	1
+356	1
+360	1
+362	1
+364	1
+365	1
+366	1
+367	1
+368	1
+369	1
+373	1
+374	1
+375	1
+377	1
+378	1
+379	1
+382	1
+384	1
+386	1
+389	1
+392	1
+393	1
+394	1
+395	1
+396	1
+397	1
+399	1
+400	1
+401	1
+402	1
+403	1
+404	1
+406	1
+407	1
+409	1
+411	1
+413	1
+414	1
+417	1
+418	1
+419	1
+421	1
+424	1
+427	1
+429	1
+430	1
+431	1
+432	1
+435	1
+436	1
+437	1
+438	1
+439	1
+443	1
+444	1
+446	1
+448	1
+449	1
+452	1
+453	1
+454	1
+455	1
+457	1
+458	1
+459	1
+460	1
+462	1
+463	1
+466	1
+467	1
+468	1
+469	1
+470	1
+472	1
+475	1
+477	1
+478	1
+479	1
+480	1
+481	1
+482	1
+483	1
+484	1
+485	1
+487	1
+489	1
+490	1
+491	1
+492	1
+493	1
+494	1
+495	1
+496	1
+497	1
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8_map-8-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby8_map-8-404392d6faff5db5f36b4aa87ac8e8c9
deleted file mode 100644
index 326493a1d8cb6..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby8_map-8-404392d6faff5db5f36b4aa87ac8e8c9
+++ /dev/null
@@ -1,309 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
-20	1
-24	1
-26	1
-27	1
-28	1
-30	1
-33	1
-34	1
-35	1
-37	1
-41	1
-42	1
-43	1
-44	1
-47	1
-51	1
-53	1
-54	1
-57	1
-58	1
-64	1
-65	1
-66	1
-67	1
-69	1
-70	1
-72	1
-74	1
-76	1
-77	1
-78	1
-80	1
-82	1
-83	1
-84	1
-85	1
-86	1
-87	1
-90	1
-92	1
-95	1
-96	1
-97	1
-98	1
-100	1
-103	1
-104	1
-105	1
-111	1
-113	1
-114	1
-116	1
-118	1
-119	1
-120	1
-125	1
-126	1
-128	1
-129	1
-131	1
-133	1
-134	1
-136	1
-137	1
-138	1
-143	1
-145	1
-146	1
-149	1
-150	1
-152	1
-153	1
-155	1
-156	1
-157	1
-158	1
-160	1
-162	1
-163	1
-164	1
-165	1
-166	1
-167	1
-168	1
-169	1
-170	1
-172	1
-174	1
-175	1
-176	1
-177	1
-178	1
-179	1
-180	1
-181	1
-183	1
-186	1
-187	1
-189	1
-190	1
-191	1
-192	1
-193	1
-194	1
-195	1
-196	1
-197	1
-199	1
-200	1
-201	1
-202	1
-203	1
-205	1
-207	1
-208	1
-209	1
-213	1
-214	1
-216	1
-217	1
-218	1
-219	1
-221	1
-222	1
-223	1
-224	1
-226	1
-228	1
-229	1
-230	1
-233	1
-235	1
-237	1
-238	1
-239	1
-241	1
-242	1
-244	1
-247	1
-248	1
-249	1
-252	1
-255	1
-256	1
-257	1
-258	1
-260	1
-262	1
-263	1
-265	1
-266	1
-272	1
-273	1
-274	1
-275	1
-277	1
-278	1
-280	1
-281	1
-282	1
-283	1
-284	1
-285	1
-286	1
-287	1
-288	1
-289	1
-291	1
-292	1
-296	1
-298	1
-302	1
-305	1
-306	1
-307	1
-308	1
-309	1
-310	1
-311	1
-315	1
-316	1
-317	1
-318	1
-321	1
-322	1
-323	1
-325	1
-327	1
-331	1
-332	1
-333	1
-335	1
-336	1
-338	1
-339	1
-341	1
-342	1
-344	1
-345	1
-348	1
-351	1
-353	1
-356	1
-360	1
-362	1
-364	1
-365	1
-366	1
-367	1
-368	1
-369	1
-373	1
-374	1
-375	1
-377	1
-378	1
-379	1
-382	1
-384	1
-386	1
-389	1
-392	1
-393	1
-394	1
-395	1
-396	1
-397	1
-399	1
-400	1
-401	1
-402	1
-403	1
-404	1
-406	1
-407	1
-409	1
-411	1
-413	1
-414	1
-417	1
-418	1
-419	1
-421	1
-424	1
-427	1
-429	1
-430	1
-431	1
-432	1
-435	1
-436	1
-437	1
-438	1
-439	1
-443	1
-444	1
-446	1
-448	1
-449	1
-452	1
-453	1
-454	1
-455	1
-457	1
-458	1
-459	1
-460	1
-462	1
-463	1
-466	1
-467	1
-468	1
-469	1
-470	1
-472	1
-475	1
-477	1
-478	1
-479	1
-480	1
-481	1
-482	1
-483	1
-484	1
-485	1
-487	1
-489	1
-490	1
-491	1
-492	1
-493	1
-494	1
-495	1
-496	1
-497	1
-498	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby8_map-8-6c26c5d39c4bdb61728defa9b44bfb52 b/sql/hive/src/test/resources/golden/groupby8_map-8-6c26c5d39c4bdb61728defa9b44bfb52
new file mode 100644
index 0000000000000..8bfbbf1e69f57
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby8_map-8-6c26c5d39c4bdb61728defa9b44bfb52
@@ -0,0 +1,309 @@
+0	1
+2	1
+4	1
+5	1
+8	1
+9	1
+10	1
+11	1
+12	1
+15	1
+17	1
+18	1
+19	1
+20	1
+24	1
+26	1
+27	1
+28	1
+30	1
+33	1
+34	1
+35	1
+37	1
+41	1
+42	1
+43	1
+44	1
+47	1
+51	1
+53	1
+54	1
+57	1
+58	1
+64	1
+65	1
+66	1
+67	1
+69	1
+70	1
+72	1
+74	1
+76	1
+77	1
+78	1
+80	1
+82	1
+83	1
+84	1
+85	1
+86	1
+87	1
+90	1
+92	1
+95	1
+96	1
+97	1
+98	1
+100	1
+103	1
+104	1
+105	1
+111	1
+113	1
+114	1
+116	1
+118	1
+119	1
+120	1
+125	1
+126	1
+128	1
+129	1
+131	1
+133	1
+134	1
+136	1
+137	1
+138	1
+143	1
+145	1
+146	1
+149	1
+150	1
+152	1
+153	1
+155	1
+156	1
+157	1
+158	1
+160	1
+162	1
+163	1
+164	1
+165	1
+166	1
+167	1
+168	1
+169	1
+170	1
+172	1
+174	1
+175	1
+176	1
+177	1
+178	1
+179	1
+180	1
+181	1
+183	1
+186	1
+187	1
+189	1
+190	1
+191	1
+192	1
+193	1
+194	1
+195	1
+196	1
+197	1
+199	1
+200	1
+201	1
+202	1
+203	1
+205	1
+207	1
+208	1
+209	1
+213	1
+214	1
+216	1
+217	1
+218	1
+219	1
+221	1
+222	1
+223	1
+224	1
+226	1
+228	1
+229	1
+230	1
+233	1
+235	1
+237	1
+238	1
+239	1
+241	1
+242	1
+244	1
+247	1
+248	1
+249	1
+252	1
+255	1
+256	1
+257	1
+258	1
+260	1
+262	1
+263	1
+265	1
+266	1
+272	1
+273	1
+274	1
+275	1
+277	1
+278	1
+280	1
+281	1
+282	1
+283	1
+284	1
+285	1
+286	1
+287	1
+288	1
+289	1
+291	1
+292	1
+296	1
+298	1
+302	1
+305	1
+306	1
+307	1
+308	1
+309	1
+310	1
+311	1
+315	1
+316	1
+317	1
+318	1
+321	1
+322	1
+323	1
+325	1
+327	1
+331	1
+332	1
+333	1
+335	1
+336	1
+338	1
+339	1
+341	1
+342	1
+344	1
+345	1
+348	1
+351	1
+353	1
+356	1
+360	1
+362	1
+364	1
+365	1
+366	1
+367	1
+368	1
+369	1
+373	1
+374	1
+375	1
+377	1
+378	1
+379	1
+382	1
+384	1
+386	1
+389	1
+392	1
+393	1
+394	1
+395	1
+396	1
+397	1
+399	1
+400	1
+401	1
+402	1
+403	1
+404	1
+406	1
+407	1
+409	1
+411	1
+413	1
+414	1
+417	1
+418	1
+419	1
+421	1
+424	1
+427	1
+429	1
+430	1
+431	1
+432	1
+435	1
+436	1
+437	1
+438	1
+439	1
+443	1
+444	1
+446	1
+448	1
+449	1
+452	1
+453	1
+454	1
+455	1
+457	1
+458	1
+459	1
+460	1
+462	1
+463	1
+466	1
+467	1
+468	1
+469	1
+470	1
+472	1
+475	1
+477	1
+478	1
+479	1
+480	1
+481	1
+482	1
+483	1
+484	1
+485	1
+487	1
+489	1
+490	1
+491	1
+492	1
+493	1
+494	1
+495	1
+496	1
+497	1
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8_map_skew-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby8_map_skew-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_map_skew-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby8_map_skew-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_map_skew-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby8_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/groupby8_map_skew-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_map_skew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby8_map_skew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_map_skew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby8_map_skew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_map_skew-7-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby8_map_skew-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
deleted file mode 100644
index 326493a1d8cb6..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby8_map_skew-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ /dev/null
@@ -1,309 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
-20	1
-24	1
-26	1
-27	1
-28	1
-30	1
-33	1
-34	1
-35	1
-37	1
-41	1
-42	1
-43	1
-44	1
-47	1
-51	1
-53	1
-54	1
-57	1
-58	1
-64	1
-65	1
-66	1
-67	1
-69	1
-70	1
-72	1
-74	1
-76	1
-77	1
-78	1
-80	1
-82	1
-83	1
-84	1
-85	1
-86	1
-87	1
-90	1
-92	1
-95	1
-96	1
-97	1
-98	1
-100	1
-103	1
-104	1
-105	1
-111	1
-113	1
-114	1
-116	1
-118	1
-119	1
-120	1
-125	1
-126	1
-128	1
-129	1
-131	1
-133	1
-134	1
-136	1
-137	1
-138	1
-143	1
-145	1
-146	1
-149	1
-150	1
-152	1
-153	1
-155	1
-156	1
-157	1
-158	1
-160	1
-162	1
-163	1
-164	1
-165	1
-166	1
-167	1
-168	1
-169	1
-170	1
-172	1
-174	1
-175	1
-176	1
-177	1
-178	1
-179	1
-180	1
-181	1
-183	1
-186	1
-187	1
-189	1
-190	1
-191	1
-192	1
-193	1
-194	1
-195	1
-196	1
-197	1
-199	1
-200	1
-201	1
-202	1
-203	1
-205	1
-207	1
-208	1
-209	1
-213	1
-214	1
-216	1
-217	1
-218	1
-219	1
-221	1
-222	1
-223	1
-224	1
-226	1
-228	1
-229	1
-230	1
-233	1
-235	1
-237	1
-238	1
-239	1
-241	1
-242	1
-244	1
-247	1
-248	1
-249	1
-252	1
-255	1
-256	1
-257	1
-258	1
-260	1
-262	1
-263	1
-265	1
-266	1
-272	1
-273	1
-274	1
-275	1
-277	1
-278	1
-280	1
-281	1
-282	1
-283	1
-284	1
-285	1
-286	1
-287	1
-288	1
-289	1
-291	1
-292	1
-296	1
-298	1
-302	1
-305	1
-306	1
-307	1
-308	1
-309	1
-310	1
-311	1
-315	1
-316	1
-317	1
-318	1
-321	1
-322	1
-323	1
-325	1
-327	1
-331	1
-332	1
-333	1
-335	1
-336	1
-338	1
-339	1
-341	1
-342	1
-344	1
-345	1
-348	1
-351	1
-353	1
-356	1
-360	1
-362	1
-364	1
-365	1
-366	1
-367	1
-368	1
-369	1
-373	1
-374	1
-375	1
-377	1
-378	1
-379	1
-382	1
-384	1
-386	1
-389	1
-392	1
-393	1
-394	1
-395	1
-396	1
-397	1
-399	1
-400	1
-401	1
-402	1
-403	1
-404	1
-406	1
-407	1
-409	1
-411	1
-413	1
-414	1
-417	1
-418	1
-419	1
-421	1
-424	1
-427	1
-429	1
-430	1
-431	1
-432	1
-435	1
-436	1
-437	1
-438	1
-439	1
-443	1
-444	1
-446	1
-448	1
-449	1
-452	1
-453	1
-454	1
-455	1
-457	1
-458	1
-459	1
-460	1
-462	1
-463	1
-466	1
-467	1
-468	1
-469	1
-470	1
-472	1
-475	1
-477	1
-478	1
-479	1
-480	1
-481	1
-482	1
-483	1
-484	1
-485	1
-487	1
-489	1
-490	1
-491	1
-492	1
-493	1
-494	1
-495	1
-496	1
-497	1
-498	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby8_map_skew-7-c1a8cf4ef8060a4703b0affe40496169 b/sql/hive/src/test/resources/golden/groupby8_map_skew-7-c1a8cf4ef8060a4703b0affe40496169
new file mode 100644
index 0000000000000..8bfbbf1e69f57
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby8_map_skew-7-c1a8cf4ef8060a4703b0affe40496169
@@ -0,0 +1,309 @@
+0	1
+2	1
+4	1
+5	1
+8	1
+9	1
+10	1
+11	1
+12	1
+15	1
+17	1
+18	1
+19	1
+20	1
+24	1
+26	1
+27	1
+28	1
+30	1
+33	1
+34	1
+35	1
+37	1
+41	1
+42	1
+43	1
+44	1
+47	1
+51	1
+53	1
+54	1
+57	1
+58	1
+64	1
+65	1
+66	1
+67	1
+69	1
+70	1
+72	1
+74	1
+76	1
+77	1
+78	1
+80	1
+82	1
+83	1
+84	1
+85	1
+86	1
+87	1
+90	1
+92	1
+95	1
+96	1
+97	1
+98	1
+100	1
+103	1
+104	1
+105	1
+111	1
+113	1
+114	1
+116	1
+118	1
+119	1
+120	1
+125	1
+126	1
+128	1
+129	1
+131	1
+133	1
+134	1
+136	1
+137	1
+138	1
+143	1
+145	1
+146	1
+149	1
+150	1
+152	1
+153	1
+155	1
+156	1
+157	1
+158	1
+160	1
+162	1
+163	1
+164	1
+165	1
+166	1
+167	1
+168	1
+169	1
+170	1
+172	1
+174	1
+175	1
+176	1
+177	1
+178	1
+179	1
+180	1
+181	1
+183	1
+186	1
+187	1
+189	1
+190	1
+191	1
+192	1
+193	1
+194	1
+195	1
+196	1
+197	1
+199	1
+200	1
+201	1
+202	1
+203	1
+205	1
+207	1
+208	1
+209	1
+213	1
+214	1
+216	1
+217	1
+218	1
+219	1
+221	1
+222	1
+223	1
+224	1
+226	1
+228	1
+229	1
+230	1
+233	1
+235	1
+237	1
+238	1
+239	1
+241	1
+242	1
+244	1
+247	1
+248	1
+249	1
+252	1
+255	1
+256	1
+257	1
+258	1
+260	1
+262	1
+263	1
+265	1
+266	1
+272	1
+273	1
+274	1
+275	1
+277	1
+278	1
+280	1
+281	1
+282	1
+283	1
+284	1
+285	1
+286	1
+287	1
+288	1
+289	1
+291	1
+292	1
+296	1
+298	1
+302	1
+305	1
+306	1
+307	1
+308	1
+309	1
+310	1
+311	1
+315	1
+316	1
+317	1
+318	1
+321	1
+322	1
+323	1
+325	1
+327	1
+331	1
+332	1
+333	1
+335	1
+336	1
+338	1
+339	1
+341	1
+342	1
+344	1
+345	1
+348	1
+351	1
+353	1
+356	1
+360	1
+362	1
+364	1
+365	1
+366	1
+367	1
+368	1
+369	1
+373	1
+374	1
+375	1
+377	1
+378	1
+379	1
+382	1
+384	1
+386	1
+389	1
+392	1
+393	1
+394	1
+395	1
+396	1
+397	1
+399	1
+400	1
+401	1
+402	1
+403	1
+404	1
+406	1
+407	1
+409	1
+411	1
+413	1
+414	1
+417	1
+418	1
+419	1
+421	1
+424	1
+427	1
+429	1
+430	1
+431	1
+432	1
+435	1
+436	1
+437	1
+438	1
+439	1
+443	1
+444	1
+446	1
+448	1
+449	1
+452	1
+453	1
+454	1
+455	1
+457	1
+458	1
+459	1
+460	1
+462	1
+463	1
+466	1
+467	1
+468	1
+469	1
+470	1
+472	1
+475	1
+477	1
+478	1
+479	1
+480	1
+481	1
+482	1
+483	1
+484	1
+485	1
+487	1
+489	1
+490	1
+491	1
+492	1
+493	1
+494	1
+495	1
+496	1
+497	1
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8_map_skew-8-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby8_map_skew-8-404392d6faff5db5f36b4aa87ac8e8c9
deleted file mode 100644
index 326493a1d8cb6..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby8_map_skew-8-404392d6faff5db5f36b4aa87ac8e8c9
+++ /dev/null
@@ -1,309 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
-20	1
-24	1
-26	1
-27	1
-28	1
-30	1
-33	1
-34	1
-35	1
-37	1
-41	1
-42	1
-43	1
-44	1
-47	1
-51	1
-53	1
-54	1
-57	1
-58	1
-64	1
-65	1
-66	1
-67	1
-69	1
-70	1
-72	1
-74	1
-76	1
-77	1
-78	1
-80	1
-82	1
-83	1
-84	1
-85	1
-86	1
-87	1
-90	1
-92	1
-95	1
-96	1
-97	1
-98	1
-100	1
-103	1
-104	1
-105	1
-111	1
-113	1
-114	1
-116	1
-118	1
-119	1
-120	1
-125	1
-126	1
-128	1
-129	1
-131	1
-133	1
-134	1
-136	1
-137	1
-138	1
-143	1
-145	1
-146	1
-149	1
-150	1
-152	1
-153	1
-155	1
-156	1
-157	1
-158	1
-160	1
-162	1
-163	1
-164	1
-165	1
-166	1
-167	1
-168	1
-169	1
-170	1
-172	1
-174	1
-175	1
-176	1
-177	1
-178	1
-179	1
-180	1
-181	1
-183	1
-186	1
-187	1
-189	1
-190	1
-191	1
-192	1
-193	1
-194	1
-195	1
-196	1
-197	1
-199	1
-200	1
-201	1
-202	1
-203	1
-205	1
-207	1
-208	1
-209	1
-213	1
-214	1
-216	1
-217	1
-218	1
-219	1
-221	1
-222	1
-223	1
-224	1
-226	1
-228	1
-229	1
-230	1
-233	1
-235	1
-237	1
-238	1
-239	1
-241	1
-242	1
-244	1
-247	1
-248	1
-249	1
-252	1
-255	1
-256	1
-257	1
-258	1
-260	1
-262	1
-263	1
-265	1
-266	1
-272	1
-273	1
-274	1
-275	1
-277	1
-278	1
-280	1
-281	1
-282	1
-283	1
-284	1
-285	1
-286	1
-287	1
-288	1
-289	1
-291	1
-292	1
-296	1
-298	1
-302	1
-305	1
-306	1
-307	1
-308	1
-309	1
-310	1
-311	1
-315	1
-316	1
-317	1
-318	1
-321	1
-322	1
-323	1
-325	1
-327	1
-331	1
-332	1
-333	1
-335	1
-336	1
-338	1
-339	1
-341	1
-342	1
-344	1
-345	1
-348	1
-351	1
-353	1
-356	1
-360	1
-362	1
-364	1
-365	1
-366	1
-367	1
-368	1
-369	1
-373	1
-374	1
-375	1
-377	1
-378	1
-379	1
-382	1
-384	1
-386	1
-389	1
-392	1
-393	1
-394	1
-395	1
-396	1
-397	1
-399	1
-400	1
-401	1
-402	1
-403	1
-404	1
-406	1
-407	1
-409	1
-411	1
-413	1
-414	1
-417	1
-418	1
-419	1
-421	1
-424	1
-427	1
-429	1
-430	1
-431	1
-432	1
-435	1
-436	1
-437	1
-438	1
-439	1
-443	1
-444	1
-446	1
-448	1
-449	1
-452	1
-453	1
-454	1
-455	1
-457	1
-458	1
-459	1
-460	1
-462	1
-463	1
-466	1
-467	1
-468	1
-469	1
-470	1
-472	1
-475	1
-477	1
-478	1
-479	1
-480	1
-481	1
-482	1
-483	1
-484	1
-485	1
-487	1
-489	1
-490	1
-491	1
-492	1
-493	1
-494	1
-495	1
-496	1
-497	1
-498	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby8_map_skew-8-6c26c5d39c4bdb61728defa9b44bfb52 b/sql/hive/src/test/resources/golden/groupby8_map_skew-8-6c26c5d39c4bdb61728defa9b44bfb52
new file mode 100644
index 0000000000000..8bfbbf1e69f57
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby8_map_skew-8-6c26c5d39c4bdb61728defa9b44bfb52
@@ -0,0 +1,309 @@
+0	1
+2	1
+4	1
+5	1
+8	1
+9	1
+10	1
+11	1
+12	1
+15	1
+17	1
+18	1
+19	1
+20	1
+24	1
+26	1
+27	1
+28	1
+30	1
+33	1
+34	1
+35	1
+37	1
+41	1
+42	1
+43	1
+44	1
+47	1
+51	1
+53	1
+54	1
+57	1
+58	1
+64	1
+65	1
+66	1
+67	1
+69	1
+70	1
+72	1
+74	1
+76	1
+77	1
+78	1
+80	1
+82	1
+83	1
+84	1
+85	1
+86	1
+87	1
+90	1
+92	1
+95	1
+96	1
+97	1
+98	1
+100	1
+103	1
+104	1
+105	1
+111	1
+113	1
+114	1
+116	1
+118	1
+119	1
+120	1
+125	1
+126	1
+128	1
+129	1
+131	1
+133	1
+134	1
+136	1
+137	1
+138	1
+143	1
+145	1
+146	1
+149	1
+150	1
+152	1
+153	1
+155	1
+156	1
+157	1
+158	1
+160	1
+162	1
+163	1
+164	1
+165	1
+166	1
+167	1
+168	1
+169	1
+170	1
+172	1
+174	1
+175	1
+176	1
+177	1
+178	1
+179	1
+180	1
+181	1
+183	1
+186	1
+187	1
+189	1
+190	1
+191	1
+192	1
+193	1
+194	1
+195	1
+196	1
+197	1
+199	1
+200	1
+201	1
+202	1
+203	1
+205	1
+207	1
+208	1
+209	1
+213	1
+214	1
+216	1
+217	1
+218	1
+219	1
+221	1
+222	1
+223	1
+224	1
+226	1
+228	1
+229	1
+230	1
+233	1
+235	1
+237	1
+238	1
+239	1
+241	1
+242	1
+244	1
+247	1
+248	1
+249	1
+252	1
+255	1
+256	1
+257	1
+258	1
+260	1
+262	1
+263	1
+265	1
+266	1
+272	1
+273	1
+274	1
+275	1
+277	1
+278	1
+280	1
+281	1
+282	1
+283	1
+284	1
+285	1
+286	1
+287	1
+288	1
+289	1
+291	1
+292	1
+296	1
+298	1
+302	1
+305	1
+306	1
+307	1
+308	1
+309	1
+310	1
+311	1
+315	1
+316	1
+317	1
+318	1
+321	1
+322	1
+323	1
+325	1
+327	1
+331	1
+332	1
+333	1
+335	1
+336	1
+338	1
+339	1
+341	1
+342	1
+344	1
+345	1
+348	1
+351	1
+353	1
+356	1
+360	1
+362	1
+364	1
+365	1
+366	1
+367	1
+368	1
+369	1
+373	1
+374	1
+375	1
+377	1
+378	1
+379	1
+382	1
+384	1
+386	1
+389	1
+392	1
+393	1
+394	1
+395	1
+396	1
+397	1
+399	1
+400	1
+401	1
+402	1
+403	1
+404	1
+406	1
+407	1
+409	1
+411	1
+413	1
+414	1
+417	1
+418	1
+419	1
+421	1
+424	1
+427	1
+429	1
+430	1
+431	1
+432	1
+435	1
+436	1
+437	1
+438	1
+439	1
+443	1
+444	1
+446	1
+448	1
+449	1
+452	1
+453	1
+454	1
+455	1
+457	1
+458	1
+459	1
+460	1
+462	1
+463	1
+466	1
+467	1
+468	1
+469	1
+470	1
+472	1
+475	1
+477	1
+478	1
+479	1
+480	1
+481	1
+482	1
+483	1
+484	1
+485	1
+487	1
+489	1
+490	1
+491	1
+492	1
+493	1
+494	1
+495	1
+496	1
+497	1
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby8_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby8_noskew-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_noskew-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby8_noskew-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_noskew-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby8_noskew-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_noskew-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby8_noskew-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby8_noskew-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby8_noskew-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby8_noskew-7-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby8_noskew-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
deleted file mode 100644
index 326493a1d8cb6..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby8_noskew-7-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ /dev/null
@@ -1,309 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
-20	1
-24	1
-26	1
-27	1
-28	1
-30	1
-33	1
-34	1
-35	1
-37	1
-41	1
-42	1
-43	1
-44	1
-47	1
-51	1
-53	1
-54	1
-57	1
-58	1
-64	1
-65	1
-66	1
-67	1
-69	1
-70	1
-72	1
-74	1
-76	1
-77	1
-78	1
-80	1
-82	1
-83	1
-84	1
-85	1
-86	1
-87	1
-90	1
-92	1
-95	1
-96	1
-97	1
-98	1
-100	1
-103	1
-104	1
-105	1
-111	1
-113	1
-114	1
-116	1
-118	1
-119	1
-120	1
-125	1
-126	1
-128	1
-129	1
-131	1
-133	1
-134	1
-136	1
-137	1
-138	1
-143	1
-145	1
-146	1
-149	1
-150	1
-152	1
-153	1
-155	1
-156	1
-157	1
-158	1
-160	1
-162	1
-163	1
-164	1
-165	1
-166	1
-167	1
-168	1
-169	1
-170	1
-172	1
-174	1
-175	1
-176	1
-177	1
-178	1
-179	1
-180	1
-181	1
-183	1
-186	1
-187	1
-189	1
-190	1
-191	1
-192	1
-193	1
-194	1
-195	1
-196	1
-197	1
-199	1
-200	1
-201	1
-202	1
-203	1
-205	1
-207	1
-208	1
-209	1
-213	1
-214	1
-216	1
-217	1
-218	1
-219	1
-221	1
-222	1
-223	1
-224	1
-226	1
-228	1
-229	1
-230	1
-233	1
-235	1
-237	1
-238	1
-239	1
-241	1
-242	1
-244	1
-247	1
-248	1
-249	1
-252	1
-255	1
-256	1
-257	1
-258	1
-260	1
-262	1
-263	1
-265	1
-266	1
-272	1
-273	1
-274	1
-275	1
-277	1
-278	1
-280	1
-281	1
-282	1
-283	1
-284	1
-285	1
-286	1
-287	1
-288	1
-289	1
-291	1
-292	1
-296	1
-298	1
-302	1
-305	1
-306	1
-307	1
-308	1
-309	1
-310	1
-311	1
-315	1
-316	1
-317	1
-318	1
-321	1
-322	1
-323	1
-325	1
-327	1
-331	1
-332	1
-333	1
-335	1
-336	1
-338	1
-339	1
-341	1
-342	1
-344	1
-345	1
-348	1
-351	1
-353	1
-356	1
-360	1
-362	1
-364	1
-365	1
-366	1
-367	1
-368	1
-369	1
-373	1
-374	1
-375	1
-377	1
-378	1
-379	1
-382	1
-384	1
-386	1
-389	1
-392	1
-393	1
-394	1
-395	1
-396	1
-397	1
-399	1
-400	1
-401	1
-402	1
-403	1
-404	1
-406	1
-407	1
-409	1
-411	1
-413	1
-414	1
-417	1
-418	1
-419	1
-421	1
-424	1
-427	1
-429	1
-430	1
-431	1
-432	1
-435	1
-436	1
-437	1
-438	1
-439	1
-443	1
-444	1
-446	1
-448	1
-449	1
-452	1
-453	1
-454	1
-455	1
-457	1
-458	1
-459	1
-460	1
-462	1
-463	1
-466	1
-467	1
-468	1
-469	1
-470	1
-472	1
-475	1
-477	1
-478	1
-479	1
-480	1
-481	1
-482	1
-483	1
-484	1
-485	1
-487	1
-489	1
-490	1
-491	1
-492	1
-493	1
-494	1
-495	1
-496	1
-497	1
-498	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby8_noskew-7-c1a8cf4ef8060a4703b0affe40496169 b/sql/hive/src/test/resources/golden/groupby8_noskew-7-c1a8cf4ef8060a4703b0affe40496169
new file mode 100644
index 0000000000000..8bfbbf1e69f57
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby8_noskew-7-c1a8cf4ef8060a4703b0affe40496169
@@ -0,0 +1,309 @@
+0	1
+2	1
+4	1
+5	1
+8	1
+9	1
+10	1
+11	1
+12	1
+15	1
+17	1
+18	1
+19	1
+20	1
+24	1
+26	1
+27	1
+28	1
+30	1
+33	1
+34	1
+35	1
+37	1
+41	1
+42	1
+43	1
+44	1
+47	1
+51	1
+53	1
+54	1
+57	1
+58	1
+64	1
+65	1
+66	1
+67	1
+69	1
+70	1
+72	1
+74	1
+76	1
+77	1
+78	1
+80	1
+82	1
+83	1
+84	1
+85	1
+86	1
+87	1
+90	1
+92	1
+95	1
+96	1
+97	1
+98	1
+100	1
+103	1
+104	1
+105	1
+111	1
+113	1
+114	1
+116	1
+118	1
+119	1
+120	1
+125	1
+126	1
+128	1
+129	1
+131	1
+133	1
+134	1
+136	1
+137	1
+138	1
+143	1
+145	1
+146	1
+149	1
+150	1
+152	1
+153	1
+155	1
+156	1
+157	1
+158	1
+160	1
+162	1
+163	1
+164	1
+165	1
+166	1
+167	1
+168	1
+169	1
+170	1
+172	1
+174	1
+175	1
+176	1
+177	1
+178	1
+179	1
+180	1
+181	1
+183	1
+186	1
+187	1
+189	1
+190	1
+191	1
+192	1
+193	1
+194	1
+195	1
+196	1
+197	1
+199	1
+200	1
+201	1
+202	1
+203	1
+205	1
+207	1
+208	1
+209	1
+213	1
+214	1
+216	1
+217	1
+218	1
+219	1
+221	1
+222	1
+223	1
+224	1
+226	1
+228	1
+229	1
+230	1
+233	1
+235	1
+237	1
+238	1
+239	1
+241	1
+242	1
+244	1
+247	1
+248	1
+249	1
+252	1
+255	1
+256	1
+257	1
+258	1
+260	1
+262	1
+263	1
+265	1
+266	1
+272	1
+273	1
+274	1
+275	1
+277	1
+278	1
+280	1
+281	1
+282	1
+283	1
+284	1
+285	1
+286	1
+287	1
+288	1
+289	1
+291	1
+292	1
+296	1
+298	1
+302	1
+305	1
+306	1
+307	1
+308	1
+309	1
+310	1
+311	1
+315	1
+316	1
+317	1
+318	1
+321	1
+322	1
+323	1
+325	1
+327	1
+331	1
+332	1
+333	1
+335	1
+336	1
+338	1
+339	1
+341	1
+342	1
+344	1
+345	1
+348	1
+351	1
+353	1
+356	1
+360	1
+362	1
+364	1
+365	1
+366	1
+367	1
+368	1
+369	1
+373	1
+374	1
+375	1
+377	1
+378	1
+379	1
+382	1
+384	1
+386	1
+389	1
+392	1
+393	1
+394	1
+395	1
+396	1
+397	1
+399	1
+400	1
+401	1
+402	1
+403	1
+404	1
+406	1
+407	1
+409	1
+411	1
+413	1
+414	1
+417	1
+418	1
+419	1
+421	1
+424	1
+427	1
+429	1
+430	1
+431	1
+432	1
+435	1
+436	1
+437	1
+438	1
+439	1
+443	1
+444	1
+446	1
+448	1
+449	1
+452	1
+453	1
+454	1
+455	1
+457	1
+458	1
+459	1
+460	1
+462	1
+463	1
+466	1
+467	1
+468	1
+469	1
+470	1
+472	1
+475	1
+477	1
+478	1
+479	1
+480	1
+481	1
+482	1
+483	1
+484	1
+485	1
+487	1
+489	1
+490	1
+491	1
+492	1
+493	1
+494	1
+495	1
+496	1
+497	1
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby8_noskew-8-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby8_noskew-8-404392d6faff5db5f36b4aa87ac8e8c9
deleted file mode 100644
index 326493a1d8cb6..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby8_noskew-8-404392d6faff5db5f36b4aa87ac8e8c9
+++ /dev/null
@@ -1,309 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
-20	1
-24	1
-26	1
-27	1
-28	1
-30	1
-33	1
-34	1
-35	1
-37	1
-41	1
-42	1
-43	1
-44	1
-47	1
-51	1
-53	1
-54	1
-57	1
-58	1
-64	1
-65	1
-66	1
-67	1
-69	1
-70	1
-72	1
-74	1
-76	1
-77	1
-78	1
-80	1
-82	1
-83	1
-84	1
-85	1
-86	1
-87	1
-90	1
-92	1
-95	1
-96	1
-97	1
-98	1
-100	1
-103	1
-104	1
-105	1
-111	1
-113	1
-114	1
-116	1
-118	1
-119	1
-120	1
-125	1
-126	1
-128	1
-129	1
-131	1
-133	1
-134	1
-136	1
-137	1
-138	1
-143	1
-145	1
-146	1
-149	1
-150	1
-152	1
-153	1
-155	1
-156	1
-157	1
-158	1
-160	1
-162	1
-163	1
-164	1
-165	1
-166	1
-167	1
-168	1
-169	1
-170	1
-172	1
-174	1
-175	1
-176	1
-177	1
-178	1
-179	1
-180	1
-181	1
-183	1
-186	1
-187	1
-189	1
-190	1
-191	1
-192	1
-193	1
-194	1
-195	1
-196	1
-197	1
-199	1
-200	1
-201	1
-202	1
-203	1
-205	1
-207	1
-208	1
-209	1
-213	1
-214	1
-216	1
-217	1
-218	1
-219	1
-221	1
-222	1
-223	1
-224	1
-226	1
-228	1
-229	1
-230	1
-233	1
-235	1
-237	1
-238	1
-239	1
-241	1
-242	1
-244	1
-247	1
-248	1
-249	1
-252	1
-255	1
-256	1
-257	1
-258	1
-260	1
-262	1
-263	1
-265	1
-266	1
-272	1
-273	1
-274	1
-275	1
-277	1
-278	1
-280	1
-281	1
-282	1
-283	1
-284	1
-285	1
-286	1
-287	1
-288	1
-289	1
-291	1
-292	1
-296	1
-298	1
-302	1
-305	1
-306	1
-307	1
-308	1
-309	1
-310	1
-311	1
-315	1
-316	1
-317	1
-318	1
-321	1
-322	1
-323	1
-325	1
-327	1
-331	1
-332	1
-333	1
-335	1
-336	1
-338	1
-339	1
-341	1
-342	1
-344	1
-345	1
-348	1
-351	1
-353	1
-356	1
-360	1
-362	1
-364	1
-365	1
-366	1
-367	1
-368	1
-369	1
-373	1
-374	1
-375	1
-377	1
-378	1
-379	1
-382	1
-384	1
-386	1
-389	1
-392	1
-393	1
-394	1
-395	1
-396	1
-397	1
-399	1
-400	1
-401	1
-402	1
-403	1
-404	1
-406	1
-407	1
-409	1
-411	1
-413	1
-414	1
-417	1
-418	1
-419	1
-421	1
-424	1
-427	1
-429	1
-430	1
-431	1
-432	1
-435	1
-436	1
-437	1
-438	1
-439	1
-443	1
-444	1
-446	1
-448	1
-449	1
-452	1
-453	1
-454	1
-455	1
-457	1
-458	1
-459	1
-460	1
-462	1
-463	1
-466	1
-467	1
-468	1
-469	1
-470	1
-472	1
-475	1
-477	1
-478	1
-479	1
-480	1
-481	1
-482	1
-483	1
-484	1
-485	1
-487	1
-489	1
-490	1
-491	1
-492	1
-493	1
-494	1
-495	1
-496	1
-497	1
-498	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby8_noskew-8-6c26c5d39c4bdb61728defa9b44bfb52 b/sql/hive/src/test/resources/golden/groupby8_noskew-8-6c26c5d39c4bdb61728defa9b44bfb52
new file mode 100644
index 0000000000000..8bfbbf1e69f57
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby8_noskew-8-6c26c5d39c4bdb61728defa9b44bfb52
@@ -0,0 +1,309 @@
+0	1
+2	1
+4	1
+5	1
+8	1
+9	1
+10	1
+11	1
+12	1
+15	1
+17	1
+18	1
+19	1
+20	1
+24	1
+26	1
+27	1
+28	1
+30	1
+33	1
+34	1
+35	1
+37	1
+41	1
+42	1
+43	1
+44	1
+47	1
+51	1
+53	1
+54	1
+57	1
+58	1
+64	1
+65	1
+66	1
+67	1
+69	1
+70	1
+72	1
+74	1
+76	1
+77	1
+78	1
+80	1
+82	1
+83	1
+84	1
+85	1
+86	1
+87	1
+90	1
+92	1
+95	1
+96	1
+97	1
+98	1
+100	1
+103	1
+104	1
+105	1
+111	1
+113	1
+114	1
+116	1
+118	1
+119	1
+120	1
+125	1
+126	1
+128	1
+129	1
+131	1
+133	1
+134	1
+136	1
+137	1
+138	1
+143	1
+145	1
+146	1
+149	1
+150	1
+152	1
+153	1
+155	1
+156	1
+157	1
+158	1
+160	1
+162	1
+163	1
+164	1
+165	1
+166	1
+167	1
+168	1
+169	1
+170	1
+172	1
+174	1
+175	1
+176	1
+177	1
+178	1
+179	1
+180	1
+181	1
+183	1
+186	1
+187	1
+189	1
+190	1
+191	1
+192	1
+193	1
+194	1
+195	1
+196	1
+197	1
+199	1
+200	1
+201	1
+202	1
+203	1
+205	1
+207	1
+208	1
+209	1
+213	1
+214	1
+216	1
+217	1
+218	1
+219	1
+221	1
+222	1
+223	1
+224	1
+226	1
+228	1
+229	1
+230	1
+233	1
+235	1
+237	1
+238	1
+239	1
+241	1
+242	1
+244	1
+247	1
+248	1
+249	1
+252	1
+255	1
+256	1
+257	1
+258	1
+260	1
+262	1
+263	1
+265	1
+266	1
+272	1
+273	1
+274	1
+275	1
+277	1
+278	1
+280	1
+281	1
+282	1
+283	1
+284	1
+285	1
+286	1
+287	1
+288	1
+289	1
+291	1
+292	1
+296	1
+298	1
+302	1
+305	1
+306	1
+307	1
+308	1
+309	1
+310	1
+311	1
+315	1
+316	1
+317	1
+318	1
+321	1
+322	1
+323	1
+325	1
+327	1
+331	1
+332	1
+333	1
+335	1
+336	1
+338	1
+339	1
+341	1
+342	1
+344	1
+345	1
+348	1
+351	1
+353	1
+356	1
+360	1
+362	1
+364	1
+365	1
+366	1
+367	1
+368	1
+369	1
+373	1
+374	1
+375	1
+377	1
+378	1
+379	1
+382	1
+384	1
+386	1
+389	1
+392	1
+393	1
+394	1
+395	1
+396	1
+397	1
+399	1
+400	1
+401	1
+402	1
+403	1
+404	1
+406	1
+407	1
+409	1
+411	1
+413	1
+414	1
+417	1
+418	1
+419	1
+421	1
+424	1
+427	1
+429	1
+430	1
+431	1
+432	1
+435	1
+436	1
+437	1
+438	1
+439	1
+443	1
+444	1
+446	1
+448	1
+449	1
+452	1
+453	1
+454	1
+455	1
+457	1
+458	1
+459	1
+460	1
+462	1
+463	1
+466	1
+467	1
+468	1
+469	1
+470	1
+472	1
+475	1
+477	1
+478	1
+479	1
+480	1
+481	1
+482	1
+483	1
+484	1
+485	1
+487	1
+489	1
+490	1
+491	1
+492	1
+493	1
+494	1
+495	1
+496	1
+497	1
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby9-10-7f98b724df05f51b3ec1f087a8da414e b/sql/hive/src/test/resources/golden/groupby9-10-7f98b724df05f51b3ec1f087a8da414e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby9-10-7f98b724df05f51b3ec1f087a8da414e
+++ b/sql/hive/src/test/resources/golden/groupby9-10-7f98b724df05f51b3ec1f087a8da414e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby9-13-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby9-13-a6b1560ec2eb0bd64c9dd2c8daae99c5
index 326493a1d8cb6..8bfbbf1e69f57 100644
--- a/sql/hive/src/test/resources/golden/groupby9-13-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/groupby9-13-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	1
\ No newline at end of file
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby9-14-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby9-14-404392d6faff5db5f36b4aa87ac8e8c9
index 0e40f3f1a49a9..c156bd5f5d8c6 100644
--- a/sql/hive/src/test/resources/golden/groupby9-14-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/groupby9-14-404392d6faff5db5f36b4aa87ac8e8c9
@@ -306,4 +306,4 @@
 495	val_495	1
 496	val_496	1
 497	val_497	1
-498	val_498	1
\ No newline at end of file
+498	val_498	1
diff --git a/sql/hive/src/test/resources/golden/groupby9-17-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby9-17-a6b1560ec2eb0bd64c9dd2c8daae99c5
index df07a9da29f01..93e965c771403 100644
--- a/sql/hive/src/test/resources/golden/groupby9-17-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/groupby9-17-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/groupby9-18-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby9-18-404392d6faff5db5f36b4aa87ac8e8c9
index cf0c065dc6532..742479ac713c5 100644
--- a/sql/hive/src/test/resources/golden/groupby9-18-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/groupby9-18-404392d6faff5db5f36b4aa87ac8e8c9
@@ -306,4 +306,4 @@
 495	val_495	1
 496	val_496	1
 497	val_497	1
-498	val_498	3
\ No newline at end of file
+498	val_498	3
diff --git a/sql/hive/src/test/resources/golden/groupby9-21-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby9-21-a6b1560ec2eb0bd64c9dd2c8daae99c5
index 326493a1d8cb6..8bfbbf1e69f57 100644
--- a/sql/hive/src/test/resources/golden/groupby9-21-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/groupby9-21-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	1
\ No newline at end of file
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby9-22-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby9-22-404392d6faff5db5f36b4aa87ac8e8c9
index 4ef88d57aa72b..deaf15e462910 100644
--- a/sql/hive/src/test/resources/golden/groupby9-22-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/groupby9-22-404392d6faff5db5f36b4aa87ac8e8c9
@@ -306,4 +306,4 @@
 95	val_95	1
 96	val_96	1
 97	val_97	1
-98	val_98	1
\ No newline at end of file
+98	val_98	1
diff --git a/sql/hive/src/test/resources/golden/groupby9-4-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby9-4-a6b1560ec2eb0bd64c9dd2c8daae99c5
index 326493a1d8cb6..8bfbbf1e69f57 100644
--- a/sql/hive/src/test/resources/golden/groupby9-4-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/groupby9-4-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	1
\ No newline at end of file
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby9-5-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby9-5-404392d6faff5db5f36b4aa87ac8e8c9
index 0e40f3f1a49a9..c156bd5f5d8c6 100644
--- a/sql/hive/src/test/resources/golden/groupby9-5-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/groupby9-5-404392d6faff5db5f36b4aa87ac8e8c9
@@ -306,4 +306,4 @@
 495	val_495	1
 496	val_496	1
 497	val_497	1
-498	val_498	1
\ No newline at end of file
+498	val_498	1
diff --git a/sql/hive/src/test/resources/golden/groupby9-8-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/groupby9-8-a6b1560ec2eb0bd64c9dd2c8daae99c5
index 326493a1d8cb6..8bfbbf1e69f57 100644
--- a/sql/hive/src/test/resources/golden/groupby9-8-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/groupby9-8-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	1
\ No newline at end of file
+498	1
diff --git a/sql/hive/src/test/resources/golden/groupby9-9-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/groupby9-9-404392d6faff5db5f36b4aa87ac8e8c9
index 4ef88d57aa72b..deaf15e462910 100644
--- a/sql/hive/src/test/resources/golden/groupby9-9-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/groupby9-9-404392d6faff5db5f36b4aa87ac8e8c9
@@ -306,4 +306,4 @@
 95	val_95	1
 96	val_96	1
 97	val_97	1
-98	val_98	1
\ No newline at end of file
+98	val_98	1
diff --git a/sql/hive/src/test/resources/golden/describe_syntax-5-a305e3d663b2697acca5abb9e7d897c4 b/sql/hive/src/test/resources/golden/groupby_grouping_id1-0-caf1c5fd299fdbdb655234d01d44caf2
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_syntax-5-a305e3d663b2697acca5abb9e7d897c4
rename to sql/hive/src/test/resources/golden/groupby_grouping_id1-0-caf1c5fd299fdbdb655234d01d44caf2
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-1-e48b5f50bb2ff56f9886004366cfd491 b/sql/hive/src/test/resources/golden/groupby_grouping_id1-1-b76bf9f6c92f83c9a5f351f8460d1e3b
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_table_json-1-e48b5f50bb2ff56f9886004366cfd491
rename to sql/hive/src/test/resources/golden/groupby_grouping_id1-1-b76bf9f6c92f83c9a5f351f8460d1e3b
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-5-865aeeea2647a71f7f25b03da4203ffb b/sql/hive/src/test/resources/golden/groupby_grouping_id1-1-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_table_json-5-865aeeea2647a71f7f25b03da4203ffb
rename to sql/hive/src/test/resources/golden/groupby_grouping_id1-1-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_id1-2-3dfbff77a9b56d44312814d3fb0d07fa b/sql/hive/src/test/resources/golden/groupby_grouping_id1-2-3dfbff77a9b56d44312814d3fb0d07fa
new file mode 100644
index 0000000000000..76280c6f3a1c8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_id1-2-3dfbff77a9b56d44312814d3fb0d07fa
@@ -0,0 +1,18 @@
+NULL	NULL	0
+NULL	11	2
+NULL	12	2
+NULL	13	2
+NULL	17	2
+NULL	18	2
+NULL	28	2
+1	NULL	1
+1	11	3
+2	NULL	1
+2	12	3
+3	NULL	1
+3	13	3
+7	NULL	1
+7	17	3
+8	NULL	1
+8	18	3
+8	28	3
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_id1-3-d113d984a30ad7b98c50a46158605a51 b/sql/hive/src/test/resources/golden/groupby_grouping_id1-3-d113d984a30ad7b98c50a46158605a51
new file mode 100644
index 0000000000000..b18af4e5dd637
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_id1-3-d113d984a30ad7b98c50a46158605a51
@@ -0,0 +1,12 @@
+0	NULL	NULL
+1	1	NULL
+3	1	11
+1	2	NULL
+3	2	12
+1	3	NULL
+3	3	13
+1	7	NULL
+3	7	17
+1	8	NULL
+3	8	18
+3	8	28
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_id1-4-903a499840102e2cb722dd8b83820391 b/sql/hive/src/test/resources/golden/groupby_grouping_id1-4-903a499840102e2cb722dd8b83820391
new file mode 100644
index 0000000000000..5a7ac193cb11b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_id1-4-903a499840102e2cb722dd8b83820391
@@ -0,0 +1,18 @@
+NULL	NULL	0	0
+NULL	11	2	2
+NULL	12	2	2
+NULL	13	2	2
+NULL	17	2	2
+NULL	18	2	2
+NULL	28	2	2
+1	NULL	1	1
+1	11	3	3
+2	NULL	1	1
+2	12	3	3
+3	NULL	1	1
+3	13	3	3
+7	NULL	1	1
+7	17	3	3
+8	NULL	1	1
+8	18	3	3
+8	28	3	3
diff --git a/sql/hive/src/test/resources/golden/describe_table_json-8-deaf826aef1c9a7f36d7824eafd20f11 b/sql/hive/src/test/resources/golden/groupby_grouping_id2-0-20539c642c514a590caca2f11395007e
similarity index 100%
rename from sql/hive/src/test/resources/golden/describe_table_json-8-deaf826aef1c9a7f36d7824eafd20f11
rename to sql/hive/src/test/resources/golden/groupby_grouping_id2-0-20539c642c514a590caca2f11395007e
diff --git a/sql/hive/src/test/resources/golden/disable_file_format_check-2-d3e20a1484eabcd50e2039e55b4f549 b/sql/hive/src/test/resources/golden/groupby_grouping_id2-1-d1f3215e349f056c8de60b87a6a9855e
similarity index 100%
rename from sql/hive/src/test/resources/golden/disable_file_format_check-2-d3e20a1484eabcd50e2039e55b4f549
rename to sql/hive/src/test/resources/golden/groupby_grouping_id2-1-d1f3215e349f056c8de60b87a6a9855e
diff --git a/sql/hive/src/test/resources/golden/disable_file_format_check-4-fe6f402a026c882c2bc8f5251794dbbb b/sql/hive/src/test/resources/golden/groupby_grouping_id2-1-d6fb6c164cda6a13a71290dbf95fcc6e
similarity index 100%
rename from sql/hive/src/test/resources/golden/disable_file_format_check-4-fe6f402a026c882c2bc8f5251794dbbb
rename to sql/hive/src/test/resources/golden/groupby_grouping_id2-1-d6fb6c164cda6a13a71290dbf95fcc6e
diff --git a/sql/hive/src/test/resources/golden/exim_12_external_location-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/groupby_grouping_id2-2-a00d1791b7fa7ac5a0505d95c3d12257
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_12_external_location-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/groupby_grouping_id2-2-a00d1791b7fa7ac5a0505d95c3d12257
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_id2-3-b3cb95405b1200603f40aaab24be7586 b/sql/hive/src/test/resources/golden/groupby_grouping_id2-3-b3cb95405b1200603f40aaab24be7586
new file mode 100644
index 0000000000000..66ac2d65ce245
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_id2-3-b3cb95405b1200603f40aaab24be7586
@@ -0,0 +1,11 @@
+NULL	NULL	0	6
+1	NULL	1	2
+1	NULL	3	1
+1	1	3	1
+2	NULL	1	1
+2	2	3	1
+3	NULL	1	2
+3	NULL	3	1
+3	3	3	1
+4	NULL	1	1
+4	5	3	1
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_id2-4-9bdcf67baa5d7cf70cd2eb1a3ec6de74 b/sql/hive/src/test/resources/golden/groupby_grouping_id2-4-9bdcf67baa5d7cf70cd2eb1a3ec6de74
new file mode 100644
index 0000000000000..8c1e9630ebfd7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_id2-4-9bdcf67baa5d7cf70cd2eb1a3ec6de74
@@ -0,0 +1,3 @@
+0	1
+1	4
+3	6
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_id2-5-7dd97bda7e2a900dfc509a6133620b82 b/sql/hive/src/test/resources/golden/groupby_grouping_id2-5-7dd97bda7e2a900dfc509a6133620b82
new file mode 100644
index 0000000000000..42fc2290c2cba
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_id2-5-7dd97bda7e2a900dfc509a6133620b82
@@ -0,0 +1,53 @@
+0	0
+1	1
+1	1
+1	1
+1	1
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+1	1
+1	1
+1	1
+1	1
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+1	1
+1	1
+1	1
+1	1
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+1	1
+1	1
+1	1
+1	1
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
diff --git a/sql/hive/src/test/resources/golden/exim_12_external_location-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/groupby_grouping_id2-6-a7dc16cb82c595b18d4258a38a304b1e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_12_external_location-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/groupby_grouping_id2-6-a7dc16cb82c595b18d4258a38a304b1e
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_id2-7-b3cb95405b1200603f40aaab24be7586 b/sql/hive/src/test/resources/golden/groupby_grouping_id2-7-b3cb95405b1200603f40aaab24be7586
new file mode 100644
index 0000000000000..66ac2d65ce245
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_id2-7-b3cb95405b1200603f40aaab24be7586
@@ -0,0 +1,11 @@
+NULL	NULL	0	6
+1	NULL	1	2
+1	NULL	3	1
+1	1	3	1
+2	NULL	1	1
+2	2	3	1
+3	NULL	1	2
+3	NULL	3	1
+3	3	3	1
+4	NULL	1	1
+4	5	3	1
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_id2-8-9bdcf67baa5d7cf70cd2eb1a3ec6de74 b/sql/hive/src/test/resources/golden/groupby_grouping_id2-8-9bdcf67baa5d7cf70cd2eb1a3ec6de74
new file mode 100644
index 0000000000000..8c1e9630ebfd7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_id2-8-9bdcf67baa5d7cf70cd2eb1a3ec6de74
@@ -0,0 +1,3 @@
+0	1
+1	4
+3	6
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_id2-9-7dd97bda7e2a900dfc509a6133620b82 b/sql/hive/src/test/resources/golden/groupby_grouping_id2-9-7dd97bda7e2a900dfc509a6133620b82
new file mode 100644
index 0000000000000..42fc2290c2cba
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_id2-9-7dd97bda7e2a900dfc509a6133620b82
@@ -0,0 +1,53 @@
+0	0
+1	1
+1	1
+1	1
+1	1
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+1	1
+1	1
+1	1
+1	1
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+1	1
+1	1
+1	1
+1	1
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
+1	1
+1	1
+1	1
+1	1
+3	3
+3	3
+3	3
+3	3
+3	3
+3	3
diff --git a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-2-f5340880d2be7b0643eb995673e89d11 b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-0-91128722f50ec00b51e0bf6fe5695cd1
similarity index 100%
rename from sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-2-f5340880d2be7b0643eb995673e89d11
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets1-0-91128722f50ec00b51e0bf6fe5695cd1
diff --git a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-3-b4e3c4c6b7ae6e0fad9ab7728f2ace85 b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-1-3673d61d0944adeba77438d882839de4
similarity index 100%
rename from sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-3-b4e3c4c6b7ae6e0fad9ab7728f2ace85
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets1-1-3673d61d0944adeba77438d882839de4
diff --git a/sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-5-f40a07d7654573e1a8517770eb8529e7 b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-1-d1c300ea08361fb8237689c6cf8cc1b5
similarity index 100%
rename from sql/hive/src/test/resources/golden/disallow_incompatible_type_change_off-5-f40a07d7654573e1a8517770eb8529e7
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets1-1-d1c300ea08361fb8237689c6cf8cc1b5
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_sets1-2-5c14fabebc5b4c526c459a6e867ec61a b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-2-5c14fabebc5b4c526c459a6e867ec61a
new file mode 100644
index 0000000000000..7967c04c92149
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-2-5c14fabebc5b4c526c459a6e867ec61a
@@ -0,0 +1,6 @@
+8	1	1
+5	2	2
+1	1	3
+2	2	4
+2	3	5
+3	2	8
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_sets1-3-9f501f293fe180bf6322e93d8dea025a b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-3-9f501f293fe180bf6322e93d8dea025a
new file mode 100644
index 0000000000000..da4a754efa0e0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-3-9f501f293fe180bf6322e93d8dea025a
@@ -0,0 +1,15 @@
+NULL	NULL	6
+NULL	1	2
+NULL	2	3
+NULL	3	1
+1	NULL	1
+1	1	1
+2	NULL	2
+2	2	1
+2	3	1
+3	NULL	1
+3	2	1
+5	NULL	1
+5	2	1
+8	NULL	1
+8	1	1
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_sets1-4-505ac6611b332d6cf4a364739075d49c b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-4-505ac6611b332d6cf4a364739075d49c
new file mode 100644
index 0000000000000..da4a754efa0e0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-4-505ac6611b332d6cf4a364739075d49c
@@ -0,0 +1,15 @@
+NULL	NULL	6
+NULL	1	2
+NULL	2	3
+NULL	3	1
+1	NULL	1
+1	1	1
+2	NULL	2
+2	2	1
+2	3	1
+3	NULL	1
+3	2	1
+5	NULL	1
+5	2	1
+8	NULL	1
+8	1	1
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_sets1-5-a33cc72bf5adee428eea079847034b62 b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-5-a33cc72bf5adee428eea079847034b62
new file mode 100644
index 0000000000000..2d1b73d564955
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-5-a33cc72bf5adee428eea079847034b62
@@ -0,0 +1,11 @@
+1	NULL	1
+1	1	1
+2	NULL	2
+2	2	1
+2	3	1
+3	NULL	1
+3	2	1
+5	NULL	1
+5	2	1
+8	NULL	1
+8	1	1
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_sets1-6-5f0c2e1d5489c867261e575625349542 b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-6-5f0c2e1d5489c867261e575625349542
new file mode 100644
index 0000000000000..fe6c4d001180a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-6-5f0c2e1d5489c867261e575625349542
@@ -0,0 +1,14 @@
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+1
+2
+3
+5
+8
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_sets1-7-9b3bda02733476012e2cda434d936423 b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-7-9b3bda02733476012e2cda434d936423
new file mode 100644
index 0000000000000..24d5fc2851703
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-7-9b3bda02733476012e2cda434d936423
@@ -0,0 +1,5 @@
+1
+2
+3
+5
+8
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_sets1-8-1273ad6760f8c3ddad07819362dcc324 b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-8-1273ad6760f8c3ddad07819362dcc324
new file mode 100644
index 0000000000000..8ba0d6d25a6f0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_sets1-8-1273ad6760f8c3ddad07819362dcc324
@@ -0,0 +1,5 @@
+2.0	1
+4.0	1
+5.0	2
+7.0	1
+9.0	1
diff --git a/sql/hive/src/test/resources/golden/exim_12_external_location-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/groupby_grouping_sets2-0-bb96e87a0d6b0d1a6167e424b086acf6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_12_external_location-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets2-0-bb96e87a0d6b0d1a6167e424b086acf6
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter2-13-67d75c72ea2d3982c55f3a850d93f83c b/sql/hive/src/test/resources/golden/groupby_grouping_sets2-1-91128722f50ec00b51e0bf6fe5695cd1
similarity index 100%
rename from sql/hive/src/test/resources/golden/drop_partitions_filter2-13-67d75c72ea2d3982c55f3a850d93f83c
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets2-1-91128722f50ec00b51e0bf6fe5695cd1
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_sets2-10-f39e49610430c91e5af3876d15fbdfe3 b/sql/hive/src/test/resources/golden/groupby_grouping_sets2-10-f39e49610430c91e5af3876d15fbdfe3
new file mode 100644
index 0000000000000..cabc9bb1d918f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_sets2-10-f39e49610430c91e5af3876d15fbdfe3
@@ -0,0 +1,15 @@
+NULL	NULL	46
+NULL	1	8
+NULL	2	28
+NULL	3	10
+1	NULL	6
+1	1	6
+2	NULL	18
+2	2	8
+2	3	10
+3	NULL	16
+3	2	16
+5	NULL	4
+5	2	4
+8	NULL	2
+8	1	2
diff --git a/sql/hive/src/test/resources/golden/drop_partitions_filter2-7-74ed9df854eae5a025077b7012ef7b97 b/sql/hive/src/test/resources/golden/groupby_grouping_sets2-2-3673d61d0944adeba77438d882839de4
similarity index 100%
rename from sql/hive/src/test/resources/golden/drop_partitions_filter2-7-74ed9df854eae5a025077b7012ef7b97
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets2-2-3673d61d0944adeba77438d882839de4
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-0-2bba07855af8c11899cc6b89f8c0ee02 b/sql/hive/src/test/resources/golden/groupby_grouping_sets2-2-d1c300ea08361fb8237689c6cf8cc1b5
similarity index 100%
rename from sql/hive/src/test/resources/golden/dynamic_partition_skip_default-0-2bba07855af8c11899cc6b89f8c0ee02
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets2-2-d1c300ea08361fb8237689c6cf8cc1b5
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-2-d71f115b7d42f6c67de701bf69c617a9 b/sql/hive/src/test/resources/golden/groupby_grouping_sets2-3-59adefab34d80e8e185b2ad03877d381
similarity index 100%
rename from sql/hive/src/test/resources/golden/dynamic_partition_skip_default-2-d71f115b7d42f6c67de701bf69c617a9
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets2-3-59adefab34d80e8e185b2ad03877d381
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_sets2-4-9f501f293fe180bf6322e93d8dea025a b/sql/hive/src/test/resources/golden/groupby_grouping_sets2-4-9f501f293fe180bf6322e93d8dea025a
new file mode 100644
index 0000000000000..da4a754efa0e0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_sets2-4-9f501f293fe180bf6322e93d8dea025a
@@ -0,0 +1,15 @@
+NULL	NULL	6
+NULL	1	2
+NULL	2	3
+NULL	3	1
+1	NULL	1
+1	1	1
+2	NULL	2
+2	2	1
+2	3	1
+3	NULL	1
+3	2	1
+5	NULL	1
+5	2	1
+8	NULL	1
+8	1	1
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-3-b7f2a424f616cfb015937e9ef980277 b/sql/hive/src/test/resources/golden/groupby_grouping_sets2-5-1163c486fd7e2c4346805fb035e2f268
similarity index 100%
rename from sql/hive/src/test/resources/golden/dynamic_partition_skip_default-3-b7f2a424f616cfb015937e9ef980277
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets2-5-1163c486fd7e2c4346805fb035e2f268
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_sets2-6-9459dc3e8ab1f09d6d912b686e7f37fc b/sql/hive/src/test/resources/golden/groupby_grouping_sets2-6-9459dc3e8ab1f09d6d912b686e7f37fc
new file mode 100644
index 0000000000000..b20db4c79aa70
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_sets2-6-9459dc3e8ab1f09d6d912b686e7f37fc
@@ -0,0 +1,15 @@
+NULL	NULL	23.0
+NULL	1	4.0
+NULL	2	14.0
+NULL	3	5.0
+1	NULL	3.0
+1	1	3.0
+2	NULL	9.0
+2	2	4.0
+2	3	5.0
+3	NULL	8.0
+3	2	8.0
+5	NULL	2.0
+5	2	2.0
+8	NULL	1.0
+8	1	1.0
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-4-d8c93b46de2a09a4af12017c6ba196f0 b/sql/hive/src/test/resources/golden/groupby_grouping_sets2-7-39db88427f92cb770b6daa38610c04e7
similarity index 100%
rename from sql/hive/src/test/resources/golden/dynamic_partition_skip_default-4-d8c93b46de2a09a4af12017c6ba196f0
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets2-7-39db88427f92cb770b6daa38610c04e7
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-5-e394cdeb88f69b4d4b08450680f779b9 b/sql/hive/src/test/resources/golden/groupby_grouping_sets2-8-e464ec3d5461bda47eac3d1ef8617786
similarity index 100%
rename from sql/hive/src/test/resources/golden/dynamic_partition_skip_default-5-e394cdeb88f69b4d4b08450680f779b9
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets2-8-e464ec3d5461bda47eac3d1ef8617786
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-6-725ba4225501c1279f593b9c72eaca28 b/sql/hive/src/test/resources/golden/groupby_grouping_sets2-9-74126b100714164d13cbb3bff436c2ff
similarity index 100%
rename from sql/hive/src/test/resources/golden/dynamic_partition_skip_default-6-725ba4225501c1279f593b9c72eaca28
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets2-9-74126b100714164d13cbb3bff436c2ff
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-8-725ba4225501c1279f593b9c72eaca28 b/sql/hive/src/test/resources/golden/groupby_grouping_sets3-0-4fe85ca1e68a895ef403afdcfbbf61bc
similarity index 100%
rename from sql/hive/src/test/resources/golden/dynamic_partition_skip_default-8-725ba4225501c1279f593b9c72eaca28
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets3-0-4fe85ca1e68a895ef403afdcfbbf61bc
diff --git a/sql/hive/src/test/resources/golden/dynamic_partition_skip_default-9-3b57aa58995f862f2713624b50db5b65 b/sql/hive/src/test/resources/golden/groupby_grouping_sets3-1-38373b67d392924967a4695689d2164e
similarity index 100%
rename from sql/hive/src/test/resources/golden/dynamic_partition_skip_default-9-3b57aa58995f862f2713624b50db5b65
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets3-1-38373b67d392924967a4695689d2164e
diff --git a/sql/hive/src/test/resources/golden/enforce_order-0-9b9af6de0a12a47694e6f93264c2ebf9 b/sql/hive/src/test/resources/golden/groupby_grouping_sets3-1-e8964b2aaeb388064c9fdac5ec687824
similarity index 100%
rename from sql/hive/src/test/resources/golden/enforce_order-0-9b9af6de0a12a47694e6f93264c2ebf9
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets3-1-e8964b2aaeb388064c9fdac5ec687824
diff --git a/sql/hive/src/test/resources/golden/enforce_order-1-633034e3d966737cecf2befc5df1e35d b/sql/hive/src/test/resources/golden/groupby_grouping_sets3-2-c1942a377b1a440d4ed3dd05fed445d
similarity index 100%
rename from sql/hive/src/test/resources/golden/enforce_order-1-633034e3d966737cecf2befc5df1e35d
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets3-2-c1942a377b1a440d4ed3dd05fed445d
diff --git a/sql/hive/src/test/resources/golden/enforce_order-3-794fc9cf9fb97001efa85f24fde0cd4f b/sql/hive/src/test/resources/golden/groupby_grouping_sets3-2-d95cf9dfae402d369f338b8516845e02
similarity index 100%
rename from sql/hive/src/test/resources/golden/enforce_order-3-794fc9cf9fb97001efa85f24fde0cd4f
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets3-2-d95cf9dfae402d369f338b8516845e02
diff --git a/sql/hive/src/test/resources/golden/exim_13_managed_location-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/groupby_grouping_sets3-3-b89ea2173180c8ae423d856f943e061f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_13_managed_location-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets3-3-b89ea2173180c8ae423d856f943e061f
diff --git a/sql/hive/src/test/resources/golden/exim_13_managed_location-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/groupby_grouping_sets3-4-77c57e1b4ca37c2dc715b65668cd0c59
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_13_managed_location-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets3-4-77c57e1b4ca37c2dc715b65668cd0c59
diff --git a/sql/hive/src/test/resources/golden/enforce_order-4-3136edd49e681ea21aa35d0836eab65 b/sql/hive/src/test/resources/golden/groupby_grouping_sets3-5-6623f95d90d929a6d2c8171a0698d4fd
similarity index 100%
rename from sql/hive/src/test/resources/golden/enforce_order-4-3136edd49e681ea21aa35d0836eab65
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets3-5-6623f95d90d929a6d2c8171a0698d4fd
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_sets3-6-aec59088408cc57248851d3ce04e2eef b/sql/hive/src/test/resources/golden/groupby_grouping_sets3-6-aec59088408cc57248851d3ce04e2eef
new file mode 100644
index 0000000000000..b2d08949e9795
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_sets3-6-aec59088408cc57248851d3ce04e2eef
@@ -0,0 +1,16 @@
+NULL	NULL	3.8333333333333335	12
+NULL	1	2.0	5
+NULL	2	5.2	5
+NULL	3	5.0	2
+1	NULL	2.6666666666666665	3
+1	1	3.0	2
+1	2	2.0	1
+2	NULL	5.2	5
+2	2	5.333333333333333	3
+2	3	5.0	2
+3	NULL	8.0	1
+3	2	8.0	1
+5	NULL	2.0	1
+5	1	2.0	1
+8	NULL	1.0	2
+8	1	1.0	2
diff --git a/sql/hive/src/test/resources/golden/exim_13_managed_location-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/groupby_grouping_sets3-7-bb96e87a0d6b0d1a6167e424b086acf6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_13_managed_location-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets3-7-bb96e87a0d6b0d1a6167e424b086acf6
diff --git a/sql/hive/src/test/resources/golden/enforce_order-5-15d62540b57faa68f58c5c83c3a296c9 b/sql/hive/src/test/resources/golden/groupby_grouping_sets3-8-f32c4a191759237733a10cd721b49966
similarity index 100%
rename from sql/hive/src/test/resources/golden/enforce_order-5-15d62540b57faa68f58c5c83c3a296c9
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets3-8-f32c4a191759237733a10cd721b49966
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_sets3-9-aec59088408cc57248851d3ce04e2eef b/sql/hive/src/test/resources/golden/groupby_grouping_sets3-9-aec59088408cc57248851d3ce04e2eef
new file mode 100644
index 0000000000000..b2d08949e9795
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_sets3-9-aec59088408cc57248851d3ce04e2eef
@@ -0,0 +1,16 @@
+NULL	NULL	3.8333333333333335	12
+NULL	1	2.0	5
+NULL	2	5.2	5
+NULL	3	5.0	2
+1	NULL	2.6666666666666665	3
+1	1	3.0	2
+1	2	2.0	1
+2	NULL	5.2	5
+2	2	5.333333333333333	3
+2	3	5.0	2
+3	NULL	8.0	1
+3	2	8.0	1
+5	NULL	2.0	1
+5	1	2.0	1
+8	NULL	1.0	2
+8	1	1.0	2
diff --git a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/groupby_grouping_sets4-0-d8ae5a3e613dd2dda392995b90d47565
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets4-0-d8ae5a3e613dd2dda392995b90d47565
diff --git a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/groupby_grouping_sets4-1-6ad4e855adb49babfa3ae6abac190be3
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets4-1-6ad4e855adb49babfa3ae6abac190be3
diff --git a/sql/hive/src/test/resources/golden/enforce_order-6-277e01aa70e41e8cce47236fcbbb36c2 b/sql/hive/src/test/resources/golden/groupby_grouping_sets4-2-1789808269c8bd0f6259227f07da1a6a
similarity index 100%
rename from sql/hive/src/test/resources/golden/enforce_order-6-277e01aa70e41e8cce47236fcbbb36c2
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets4-2-1789808269c8bd0f6259227f07da1a6a
diff --git a/sql/hive/src/test/resources/golden/escape1-2-395d5a528c5e7235a48b4ac90938e2d6 b/sql/hive/src/test/resources/golden/groupby_grouping_sets4-3-3673d61d0944adeba77438d882839de4
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape1-2-395d5a528c5e7235a48b4ac90938e2d6
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets4-3-3673d61d0944adeba77438d882839de4
diff --git a/sql/hive/src/test/resources/golden/escape1-3-4267651148da591da38737028fdbd80 b/sql/hive/src/test/resources/golden/groupby_grouping_sets4-3-d1c300ea08361fb8237689c6cf8cc1b5
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape1-3-4267651148da591da38737028fdbd80
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets4-3-d1c300ea08361fb8237689c6cf8cc1b5
diff --git a/sql/hive/src/test/resources/golden/escape1-4-9745f8775c680d094a2c7cbeeb9bdf62 b/sql/hive/src/test/resources/golden/groupby_grouping_sets4-4-b51d813d0c5a410d8cf9765d85005a01
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape1-4-9745f8775c680d094a2c7cbeeb9bdf62
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets4-4-b51d813d0c5a410d8cf9765d85005a01
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_sets4-5-9be98faf8588a3c8e7436f14c638e438 b/sql/hive/src/test/resources/golden/groupby_grouping_sets4-5-9be98faf8588a3c8e7436f14c638e438
new file mode 100644
index 0000000000000..33060f0d51729
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_sets4-5-9be98faf8588a3c8e7436f14c638e438
@@ -0,0 +1,13 @@
+1	NULL	1	1	NULL	1
+1	NULL	1	1	1	1
+1	1	1	1	NULL	1
+1	1	1	1	1	1
+2	NULL	2	2	NULL	2
+2	NULL	2	2	2	1
+2	NULL	2	2	3	1
+2	2	1	2	NULL	2
+2	2	1	2	2	1
+2	2	1	2	3	1
+2	3	1	2	NULL	2
+2	3	1	2	2	1
+2	3	1	2	3	1
diff --git a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/groupby_grouping_sets4-6-bb96e87a0d6b0d1a6167e424b086acf6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets4-6-bb96e87a0d6b0d1a6167e424b086acf6
diff --git a/sql/hive/src/test/resources/golden/escape1-5-70729c3d79ded87e884c176138174645 b/sql/hive/src/test/resources/golden/groupby_grouping_sets4-7-98f7522ce136cdffb0c14163e613c250
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape1-5-70729c3d79ded87e884c176138174645
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets4-7-98f7522ce136cdffb0c14163e613c250
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_sets4-8-9be98faf8588a3c8e7436f14c638e438 b/sql/hive/src/test/resources/golden/groupby_grouping_sets4-8-9be98faf8588a3c8e7436f14c638e438
new file mode 100644
index 0000000000000..33060f0d51729
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_sets4-8-9be98faf8588a3c8e7436f14c638e438
@@ -0,0 +1,13 @@
+1	NULL	1	1	NULL	1
+1	NULL	1	1	1	1
+1	1	1	1	NULL	1
+1	1	1	1	1	1
+2	NULL	2	2	NULL	2
+2	NULL	2	2	2	1
+2	NULL	2	2	3	1
+2	2	1	2	NULL	2
+2	2	1	2	2	1
+2	2	1	2	3	1
+2	3	1	2	NULL	2
+2	3	1	2	2	1
+2	3	1	2	3	1
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/groupby_grouping_sets5-0-d8ae5a3e613dd2dda392995b90d47565
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_15_external_part-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets5-0-d8ae5a3e613dd2dda392995b90d47565
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/groupby_grouping_sets5-1-6ad4e855adb49babfa3ae6abac190be3
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_15_external_part-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets5-1-6ad4e855adb49babfa3ae6abac190be3
diff --git a/sql/hive/src/test/resources/golden/escape1-8-910536a438eec89c78bd611b3c4bb7e0 b/sql/hive/src/test/resources/golden/groupby_grouping_sets5-2-1789808269c8bd0f6259227f07da1a6a
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape1-8-910536a438eec89c78bd611b3c4bb7e0
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets5-2-1789808269c8bd0f6259227f07da1a6a
diff --git a/sql/hive/src/test/resources/golden/escape2-10-13884d58efe80bd24862b3c54cb57c6e b/sql/hive/src/test/resources/golden/groupby_grouping_sets5-3-3673d61d0944adeba77438d882839de4
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-10-13884d58efe80bd24862b3c54cb57c6e
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets5-3-3673d61d0944adeba77438d882839de4
diff --git a/sql/hive/src/test/resources/golden/escape2-4-bf9010ab0f7bc30bd6db771c5b3d0abe b/sql/hive/src/test/resources/golden/groupby_grouping_sets5-3-d1c300ea08361fb8237689c6cf8cc1b5
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-4-bf9010ab0f7bc30bd6db771c5b3d0abe
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets5-3-d1c300ea08361fb8237689c6cf8cc1b5
diff --git a/sql/hive/src/test/resources/golden/escape2-5-fd0c88ad6ad131a16d1b78adbea65800 b/sql/hive/src/test/resources/golden/groupby_grouping_sets5-4-6a89a34347f1c0eb9e0763ecedddb6f9
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-5-fd0c88ad6ad131a16d1b78adbea65800
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets5-4-6a89a34347f1c0eb9e0763ecedddb6f9
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_sets5-5-af3569757b9f52fb9b1ead33130e1b4f b/sql/hive/src/test/resources/golden/groupby_grouping_sets5-5-af3569757b9f52fb9b1ead33130e1b4f
new file mode 100644
index 0000000000000..da4a754efa0e0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_sets5-5-af3569757b9f52fb9b1ead33130e1b4f
@@ -0,0 +1,15 @@
+NULL	NULL	6
+NULL	1	2
+NULL	2	3
+NULL	3	1
+1	NULL	1
+1	1	1
+2	NULL	2
+2	2	1
+2	3	1
+3	NULL	1
+3	2	1
+5	NULL	1
+5	2	1
+8	NULL	1
+8	1	1
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/groupby_grouping_sets5-6-bb96e87a0d6b0d1a6167e424b086acf6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_15_external_part-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets5-6-bb96e87a0d6b0d1a6167e424b086acf6
diff --git a/sql/hive/src/test/resources/golden/escape2-6-9745f8775c680d094a2c7cbeeb9bdf62 b/sql/hive/src/test/resources/golden/groupby_grouping_sets5-7-6544a382d851f916616c4386fdcf0ed8
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-6-9745f8775c680d094a2c7cbeeb9bdf62
rename to sql/hive/src/test/resources/golden/groupby_grouping_sets5-7-6544a382d851f916616c4386fdcf0ed8
diff --git a/sql/hive/src/test/resources/golden/groupby_grouping_sets5-8-af3569757b9f52fb9b1ead33130e1b4f b/sql/hive/src/test/resources/golden/groupby_grouping_sets5-8-af3569757b9f52fb9b1ead33130e1b4f
new file mode 100644
index 0000000000000..da4a754efa0e0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_grouping_sets5-8-af3569757b9f52fb9b1ead33130e1b4f
@@ -0,0 +1,15 @@
+NULL	NULL	6
+NULL	1	2
+NULL	2	3
+NULL	3	1
+1	NULL	1
+1	1	1
+2	NULL	2
+2	2	1
+2	3	1
+3	NULL	1
+3	2	1
+5	NULL	1
+5	2	1
+8	NULL	1
+8	1	1
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby_map_ppr-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_map_ppr-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby_map_ppr-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby_map_ppr-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_map_ppr-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby_map_ppr-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby_map_ppr-2-83c59d378571a6e487aa20217bd87817
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_map_ppr-2-83c59d378571a6e487aa20217bd87817
+++ b/sql/hive/src/test/resources/golden/groupby_map_ppr-2-83c59d378571a6e487aa20217bd87817
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby_map_ppr-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index ae9bbc3e2c2f7..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_map_ppr-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0
-1	71	132828.0
-2	69	251142.0
-3	62	364008.0
-4	74	4105526.0
-5	6	5794.0
-6	5	6796.0
-7	6	71470.0
-8	8	81524.0
-9	7	92094.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr-6-c83b156356c269b6aef263640a4f6b7b b/sql/hive/src/test/resources/golden/groupby_map_ppr-6-c83b156356c269b6aef263640a4f6b7b
new file mode 100644
index 0000000000000..1b9d97300aa08
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/groupby_map_ppr-6-c83b156356c269b6aef263640a4f6b7b
@@ -0,0 +1,10 @@
+0	1	00.0
+1	71	132828.0
+2	69	251142.0
+3	62	364008.0
+4	74	4105526.0
+5	6	5794.0
+6	5	6796.0
+7	6	71470.0
+8	8	81524.0
+9	7	92094.0
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-6-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 64bb7c62c1885..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-6-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0	0	1
-1	71	132828.0	10044	71
-2	69	251142.0	15780	69
-3	62	364008.0	20119	62
-4	74	4105526.0	30965	74
-5	6	5794.0	278	6
-6	5	6796.0	331	5
-7	6	71470.0	447	6
-8	8	81524.0	595	8
-9	7	92094.0	577	7
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-10-6aa66df624cd32601218eee200977ce6 b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-10-6aa66df624cd32601218eee200977ce6
index 6b6a788e382db..0b7e79a79bd11 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-10-6aa66df624cd32601218eee200977ce6
+++ b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-10-6aa66df624cd32601218eee200977ce6
@@ -3,4 +3,4 @@
 4	1
 5	1
 8	1
-9	1
\ No newline at end of file
+9	1
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-11-59a77127c166fef57504c011ccb427a6 b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-11-59a77127c166fef57504c011ccb427a6
index c33eed60c8d5b..d63ed5dbe78bb 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-11-59a77127c166fef57504c011ccb427a6
+++ b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-11-59a77127c166fef57504c011ccb427a6
@@ -3,4 +3,4 @@
 8	1
 10	1
 16	1
-18	1
\ No newline at end of file
+18	1
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-5-6aa66df624cd32601218eee200977ce6 b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-5-6aa66df624cd32601218eee200977ce6
index 6b6a788e382db..0b7e79a79bd11 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-5-6aa66df624cd32601218eee200977ce6
+++ b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-5-6aa66df624cd32601218eee200977ce6
@@ -3,4 +3,4 @@
 4	1
 5	1
 8	1
-9	1
\ No newline at end of file
+9	1
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-6-59a77127c166fef57504c011ccb427a6 b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-6-59a77127c166fef57504c011ccb427a6
index c33eed60c8d5b..d63ed5dbe78bb 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-6-59a77127c166fef57504c011ccb427a6
+++ b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-6-59a77127c166fef57504c011ccb427a6
@@ -3,4 +3,4 @@
 8	1
 10	1
 16	1
-18	1
\ No newline at end of file
+18	1
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-7-293182ac89effb268855f5ac53e1ec11 b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-7-293182ac89effb268855f5ac53e1ec11
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-7-293182ac89effb268855f5ac53e1ec11
+++ b/sql/hive/src/test/resources/golden/groupby_multi_insert_common_distinct-7-293182ac89effb268855f5ac53e1ec11
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-10-3b3ef4d975716744c85af560aa240abd b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-10-3b3ef4d975716744c85af560aa240abd
deleted file mode 100644
index 922f4bfc83e44..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-10-3b3ef4d975716744c85af560aa240abd
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0	0	3
-1	71	116414.0	10044	115
-2	69	225571.0	15780	111
-3	62	332004.0	20119	99
-4	74	452763.0	30965	124
-5	6	5397.0	278	10
-6	5	6398.0	331	6
-7	6	7735.0	447	10
-8	8	8762.0	595	10
-9	7	91047.0	577	12
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-13-38f492067df78144c272bb212633cc5e b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-13-38f492067df78144c272bb212633cc5e
deleted file mode 100644
index 106132fc993d3..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-13-38f492067df78144c272bb212633cc5e
+++ /dev/null
@@ -1,5 +0,0 @@
-5	6	5397.0	278	10
-6	5	6398.0	331	6
-7	6	7735.0	447	10
-8	8	8762.0	595	10
-9	7	91047.0	577	12
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-14-d7b3676444a9e95811184637dd0b3231 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-14-d7b3676444a9e95811184637dd0b3231
deleted file mode 100644
index 65235356ea425..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-14-d7b3676444a9e95811184637dd0b3231
+++ /dev/null
@@ -1,5 +0,0 @@
-0	1	00.0	0	3
-1	71	116414.0	10044	115
-2	69	225571.0	15780	111
-3	62	332004.0	20119	99
-4	74	452763.0	30965	124
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-15-3b3ef4d975716744c85af560aa240abd b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-15-3b3ef4d975716744c85af560aa240abd
deleted file mode 100644
index 922f4bfc83e44..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-15-3b3ef4d975716744c85af560aa240abd
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0	0	3
-1	71	116414.0	10044	115
-2	69	225571.0	15780	111
-3	62	332004.0	20119	99
-4	74	452763.0	30965	124
-5	6	5397.0	278	10
-6	5	6398.0	331	6
-7	6	7735.0	447	10
-8	8	8762.0	595	10
-9	7	91047.0	577	12
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-16-b21ae88a5fc7f9b5c25ca954889ee421 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-16-b21ae88a5fc7f9b5c25ca954889ee421
deleted file mode 100644
index 017878bc9bee5..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-16-b21ae88a5fc7f9b5c25ca954889ee421
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0	0	3
-1	4	1878.0	878	6
-1	5	1729.0	729	8
-1	6	11282.0	1282	12
-1	6	11494.0	1494	11
-1	7	11171.0	1171	11
-1	7	11516.0	1516	10
-1	8	11263.0	1263	10
-1	9	12294.0	2294	14
-1	9	12654.0	2654	16
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-17-7f344c2f066d74ecd135c43d39658bae b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-17-7f344c2f066d74ecd135c43d39658bae
deleted file mode 100644
index f21a658e3c68f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-17-7f344c2f066d74ecd135c43d39658bae
+++ /dev/null
@@ -1,32 +0,0 @@
-5	1	5102.0	102	2
-5	1	5116.0	116	2
-5	1	515.0	15	3
-5	1	553.0	53	1
-5	1	554.0	54	1
-5	1	557.0	57	1
-6	1	6134.0	134	2
-6	1	664.0	64	1
-6	1	665.0	65	1
-6	1	666.0	66	1
-6	1	669.0	69	1
-7	1	7144.0	144	2
-7	1	7152.0	152	2
-7	1	7210.0	210	3
-7	1	774.0	74	1
-7	1	777.0	77	1
-7	1	778.0	78	1
-8	1	8166.0	166	2
-8	1	8168.0	168	2
-8	1	88.0	8	1
-8	1	880.0	80	1
-8	1	882.0	82	1
-8	1	885.0	85	1
-8	1	886.0	86	1
-8	1	887.0	87	1
-9	1	9190.0	190	2
-9	1	9194.0	194	2
-9	1	9196.0	196	2
-9	1	9270.0	270	3
-9	1	99.0	9	1
-9	1	992.0	92	1
-9	1	996.0	96	1
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-8-38f492067df78144c272bb212633cc5e b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-8-38f492067df78144c272bb212633cc5e
deleted file mode 100644
index 106132fc993d3..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-8-38f492067df78144c272bb212633cc5e
+++ /dev/null
@@ -1,5 +0,0 @@
-5	6	5397.0	278	10
-6	5	6398.0	331	6
-7	6	7735.0	447	10
-8	8	8762.0	595	10
-9	7	91047.0	577	12
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-9-d7b3676444a9e95811184637dd0b3231 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-9-d7b3676444a9e95811184637dd0b3231
deleted file mode 100644
index 65235356ea425..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-9-d7b3676444a9e95811184637dd0b3231
+++ /dev/null
@@ -1,5 +0,0 @@
-0	1	00.0	0	3
-1	71	116414.0	10044	115
-2	69	225571.0	15780	111
-3	62	332004.0	20119	99
-4	74	452763.0	30965	124
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-0-b31bf66c43bb9f7ddc09b138b7bf36e0 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-0-b31bf66c43bb9f7ddc09b138b7bf36e0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-0-b31bf66c43bb9f7ddc09b138b7bf36e0
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-0-b31bf66c43bb9f7ddc09b138b7bf36e0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-5-376542befbcab97d864e874251720c40 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-5-376542befbcab97d864e874251720c40
index dbc7aebdbbae5..dcb604016a969 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-5-376542befbcab97d864e874251720c40
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-5-376542befbcab97d864e874251720c40
@@ -2,4 +2,4 @@
 6	5
 7	6
 8	8
-9	7
\ No newline at end of file
+9	7
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-6-8566e4c94d34b0fc4a71d34e89529c74 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-6-8566e4c94d34b0fc4a71d34e89529c74
index ebf9d6978dec6..cecfbbd281537 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-6-8566e4c94d34b0fc4a71d34e89529c74
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer2-6-8566e4c94d34b0fc4a71d34e89529c74
@@ -2,4 +2,4 @@
 1	71	115
 2	69	111
 3	62	99
-4	74	124
\ No newline at end of file
+4	74	124
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-10-73819ea1a7c0653a61652b3766afb003 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-10-73819ea1a7c0653a61652b3766afb003
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-10-73819ea1a7c0653a61652b3766afb003
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-10-73819ea1a7c0653a61652b3766afb003
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-13-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-13-521e0c1054cfa35116c02245874a4e69
index a5ae9e2a62227..83d4ac2489823 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-13-521e0c1054cfa35116c02245874a4e69
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-13-521e0c1054cfa35116c02245874a4e69
@@ -1,2 +1,2 @@
 100	2
-200	2
\ No newline at end of file
+200	2
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-14-a9a491ed99b4629ee15bce994d5c6c63 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-14-a9a491ed99b4629ee15bce994d5c6c63
index 16c7a647a2344..ab65c1a003b5b 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-14-a9a491ed99b4629ee15bce994d5c6c63
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-14-a9a491ed99b4629ee15bce994d5c6c63
@@ -1 +1 @@
-400	1
\ No newline at end of file
+400	1
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-17-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-17-521e0c1054cfa35116c02245874a4e69
index 67f077e51a647..11c303a71007c 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-17-521e0c1054cfa35116c02245874a4e69
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-17-521e0c1054cfa35116c02245874a4e69
@@ -1,2 +1,2 @@
 val_100	2
-val_200	2
\ No newline at end of file
+val_200	2
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-18-a9a491ed99b4629ee15bce994d5c6c63 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-18-a9a491ed99b4629ee15bce994d5c6c63
index b06ad20135fbe..1e4c20551b68b 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-18-a9a491ed99b4629ee15bce994d5c6c63
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-18-a9a491ed99b4629ee15bce994d5c6c63
@@ -1 +1 @@
-val_200	2
\ No newline at end of file
+val_200	2
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-4-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-4-521e0c1054cfa35116c02245874a4e69
index a5ae9e2a62227..83d4ac2489823 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-4-521e0c1054cfa35116c02245874a4e69
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-4-521e0c1054cfa35116c02245874a4e69
@@ -1,2 +1,2 @@
 100	2
-200	2
\ No newline at end of file
+200	2
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-5-a9a491ed99b4629ee15bce994d5c6c63 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-5-a9a491ed99b4629ee15bce994d5c6c63
index 16c7a647a2344..ab65c1a003b5b 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-5-a9a491ed99b4629ee15bce994d5c6c63
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-5-a9a491ed99b4629ee15bce994d5c6c63
@@ -1 +1 @@
-400	1
\ No newline at end of file
+400	1
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-8-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-8-521e0c1054cfa35116c02245874a4e69
index 67f077e51a647..11c303a71007c 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-8-521e0c1054cfa35116c02245874a4e69
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-8-521e0c1054cfa35116c02245874a4e69
@@ -1,2 +1,2 @@
 val_100	2
-val_200	2
\ No newline at end of file
+val_200	2
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-9-a9a491ed99b4629ee15bce994d5c6c63 b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-9-a9a491ed99b4629ee15bce994d5c6c63
index b06ad20135fbe..1e4c20551b68b 100644
--- a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-9-a9a491ed99b4629ee15bce994d5c6c63
+++ b/sql/hive/src/test/resources/golden/groupby_multi_single_reducer3-9-a9a491ed99b4629ee15bce994d5c6c63
@@ -1 +1 @@
-val_200	2
\ No newline at end of file
+val_200	2
diff --git a/sql/hive/src/test/resources/golden/groupby_neg_float-0-dd44874fbefeee1ed0a0dd8bfbab96eb b/sql/hive/src/test/resources/golden/groupby_neg_float-0-dd44874fbefeee1ed0a0dd8bfbab96eb
index efe5cc7795c65..26dbfc34ad4fb 100644
--- a/sql/hive/src/test/resources/golden/groupby_neg_float-0-dd44874fbefeee1ed0a0dd8bfbab96eb
+++ b/sql/hive/src/test/resources/golden/groupby_neg_float-0-dd44874fbefeee1ed0a0dd8bfbab96eb
@@ -1 +1 @@
--30.33
\ No newline at end of file
+-30.33
diff --git a/sql/hive/src/test/resources/golden/groupby_neg_float-1-4a435e268a327404f75725b82a32ee03 b/sql/hive/src/test/resources/golden/groupby_neg_float-1-4a435e268a327404f75725b82a32ee03
index efe5cc7795c65..26dbfc34ad4fb 100644
--- a/sql/hive/src/test/resources/golden/groupby_neg_float-1-4a435e268a327404f75725b82a32ee03
+++ b/sql/hive/src/test/resources/golden/groupby_neg_float-1-4a435e268a327404f75725b82a32ee03
@@ -1 +1 @@
--30.33
\ No newline at end of file
+-30.33
diff --git a/sql/hive/src/test/resources/golden/groupby_position-0-422c2068a838f59324c1d9861225c824 b/sql/hive/src/test/resources/golden/groupby_position-0-422c2068a838f59324c1d9861225c824
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_position-0-422c2068a838f59324c1d9861225c824
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_position-10-b4c6c0aedcc924e2af65549b87b3f3f7 b/sql/hive/src/test/resources/golden/groupby_position-10-b4c6c0aedcc924e2af65549b87b3f3f7
deleted file mode 100644
index f2a91fe3bfab0..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_position-10-b4c6c0aedcc924e2af65549b87b3f3f7
+++ /dev/null
@@ -1,13 +0,0 @@
-0	val_0	1
-2	val_2	1
-4	val_4	1
-5	val_5	1
-8	val_8	1
-9	val_9	1
-10	val_10	1
-11	val_11	1
-12	val_12	1
-15	val_15	1
-17	val_17	1
-18	val_18	1
-19	val_19	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_position-12-5583e5cfcf8083d45a3bd80434c1829f b/sql/hive/src/test/resources/golden/groupby_position-12-5583e5cfcf8083d45a3bd80434c1829f
deleted file mode 100644
index c8e666cb01e8e..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_position-12-5583e5cfcf8083d45a3bd80434c1829f
+++ /dev/null
@@ -1,14 +0,0 @@
-0	3
-5	3
-12	2
-15	2
-18	2
-2	1
-4	1
-8	1
-9	1
-10	1
-11	1
-17	1
-19	1
-20	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_position-14-6f54558123eb5d63eeeb3e261c1dc5fb b/sql/hive/src/test/resources/golden/groupby_position-14-6f54558123eb5d63eeeb3e261c1dc5fb
deleted file mode 100644
index 96a824a81c589..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_position-14-6f54558123eb5d63eeeb3e261c1dc5fb
+++ /dev/null
@@ -1,3 +0,0 @@
-19	val_19	19	val_19
-18	val_18	18	val_18
-17	val_17	17	val_17
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_position-5-e2470670c5d709001fb17ecbc6e4f85d b/sql/hive/src/test/resources/golden/groupby_position-5-e2470670c5d709001fb17ecbc6e4f85d
deleted file mode 100644
index a79396dac079a..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_position-5-e2470670c5d709001fb17ecbc6e4f85d
+++ /dev/null
@@ -1,13 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_position-6-b4c6c0aedcc924e2af65549b87b3f3f7 b/sql/hive/src/test/resources/golden/groupby_position-6-b4c6c0aedcc924e2af65549b87b3f3f7
deleted file mode 100644
index f2a91fe3bfab0..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_position-6-b4c6c0aedcc924e2af65549b87b3f3f7
+++ /dev/null
@@ -1,13 +0,0 @@
-0	val_0	1
-2	val_2	1
-4	val_4	1
-5	val_5	1
-8	val_8	1
-9	val_9	1
-10	val_10	1
-11	val_11	1
-12	val_12	1
-15	val_15	1
-17	val_17	1
-18	val_18	1
-19	val_19	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_position-9-e2470670c5d709001fb17ecbc6e4f85d b/sql/hive/src/test/resources/golden/groupby_position-9-e2470670c5d709001fb17ecbc6e4f85d
deleted file mode 100644
index a79396dac079a..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_position-9-e2470670c5d709001fb17ecbc6e4f85d
+++ /dev/null
@@ -1,13 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/groupby_ppr-0-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_ppr-0-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/groupby_ppr-0-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/groupby_ppr-1-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_ppr-1-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/groupby_ppr-1-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr-5-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby_ppr-5-adc1ec67836b26b60d8547c4996bfd8f
index ae9bbc3e2c2f7..1b9d97300aa08 100644
--- a/sql/hive/src/test/resources/golden/groupby_ppr-5-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/groupby_ppr-5-adc1ec67836b26b60d8547c4996bfd8f
@@ -7,4 +7,4 @@
 6	5	6796.0
 7	6	71470.0
 8	8	81524.0
-9	7	92094.0
\ No newline at end of file
+9	7	92094.0
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-5-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-5-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 64bb7c62c1885..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-5-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-0	1	00.0	0	1
-1	71	132828.0	10044	71
-2	69	251142.0	15780	69
-3	62	364008.0	20119	62
-4	74	4105526.0	30965	74
-5	6	5794.0	278	6
-6	5	6796.0	331	5
-7	6	71470.0	447	6
-8	8	81524.0	595	8
-9	7	92094.0	577	7
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_1-1-ffe97dc8c1df3195982e38263fbe8717
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-1-ffe97dc8c1df3195982e38263fbe8717
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-10-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_1-10-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-10-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-14-bbdd53118f788d7bb679d094c41243c8 b/sql/hive/src/test/resources/golden/groupby_sort_1-14-bbdd53118f788d7bb679d094c41243c8
deleted file mode 100644
index 10f4a1f5ff34c..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-14-bbdd53118f788d7bb679d094c41243c8
+++ /dev/null
@@ -1,6 +0,0 @@
-1	11	1
-2	12	1
-3	13	1
-7	17	1
-8	18	1
-8	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-17-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_1-17-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-17-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_1-2-fc30020d09151dc29be807795ad9475e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-2-fc30020d09151dc29be807795ad9475e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-20-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_1-20-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-20-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-24-d53196339980a00a619788bd799a32e7 b/sql/hive/src/test/resources/golden/groupby_sort_1-24-d53196339980a00a619788bd799a32e7
deleted file mode 100644
index c1cc4ee204773..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-24-d53196339980a00a619788bd799a32e7
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1	1
-1	2	1
-1	3	1
-1	7	1
-1	8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-28-c4ec0433a832ef551d70254957e3afca b/sql/hive/src/test/resources/golden/groupby_sort_1-28-c4ec0433a832ef551d70254957e3afca
deleted file mode 100644
index 97a3b8c2f5977..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-28-c4ec0433a832ef551d70254957e3afca
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	11	1
-2	1	12	1
-3	1	13	1
-7	1	17	1
-8	1	18	1
-8	1	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_1-3-fffea659b633b1f269b38556a7f54634
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-3-fffea659b633b1f269b38556a7f54634
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-31-d53196339980a00a619788bd799a32e7 b/sql/hive/src/test/resources/golden/groupby_sort_1-31-d53196339980a00a619788bd799a32e7
deleted file mode 100644
index f0192040e147b..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-31-d53196339980a00a619788bd799a32e7
+++ /dev/null
@@ -1,5 +0,0 @@
-1	2	1
-2	3	1
-3	4	1
-7	8	1
-8	9	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-34-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_1-34-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index b6c2eb98e5e49..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-34-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-2	1
-4	1
-6	1
-14	1
-16	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-37-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_1-37-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 8e7ee8a2b47bb..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-37-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,10 +0,0 @@
-1	1
-1	1
-2	1
-2	1
-3	1
-3	1
-7	1
-7	1
-8	2
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-40-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_1-40-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 0b2b54cd94e4f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-40-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,10 +0,0 @@
-1	1
-2	1
-2	1
-3	1
-4	1
-6	1
-7	1
-8	2
-14	1
-16	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-43-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_1-43-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 3d4708b7c9d64..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-43-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	2
-2	2
-3	2
-7	2
-8	4
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-49-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_1-49-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-49-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-52-c4ec0433a832ef551d70254957e3afca b/sql/hive/src/test/resources/golden/groupby_sort_1-52-c4ec0433a832ef551d70254957e3afca
deleted file mode 100644
index 97a3b8c2f5977..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-52-c4ec0433a832ef551d70254957e3afca
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	11	1
-2	1	12	1
-3	1	13	1
-7	1	17	1
-8	1	18	1
-8	1	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-56-5373c5449884d95bc7db9dab55239a49 b/sql/hive/src/test/resources/golden/groupby_sort_1-56-5373c5449884d95bc7db9dab55239a49
deleted file mode 100644
index 7ca6b0b28a960..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-56-5373c5449884d95bc7db9dab55239a49
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	11	2	1
-2	1	12	2	1
-3	1	13	2	1
-7	1	17	2	1
-8	1	18	2	1
-8	1	28	2	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-59-c4ec0433a832ef551d70254957e3afca b/sql/hive/src/test/resources/golden/groupby_sort_1-59-c4ec0433a832ef551d70254957e3afca
deleted file mode 100644
index 97a3b8c2f5977..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-59-c4ec0433a832ef551d70254957e3afca
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	11	1
-2	1	12	1
-3	1	13	1
-7	1	17	1
-8	1	18	1
-8	1	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-62-c4ec0433a832ef551d70254957e3afca b/sql/hive/src/test/resources/golden/groupby_sort_1-62-c4ec0433a832ef551d70254957e3afca
deleted file mode 100644
index 58e16ef3c0ef3..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-62-c4ec0433a832ef551d70254957e3afca
+++ /dev/null
@@ -1,6 +0,0 @@
-1	2	11	1
-2	2	12	1
-3	2	13	1
-7	2	17	1
-8	2	18	1
-8	2	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-63-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby_sort_1-63-dbcec232623048c7748b708123e18bf0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-63-dbcec232623048c7748b708123e18bf0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-64-7f98b724df05f51b3ec1f087a8da414e b/sql/hive/src/test/resources/golden/groupby_sort_1-64-7f98b724df05f51b3ec1f087a8da414e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-64-7f98b724df05f51b3ec1f087a8da414e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-65-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby_sort_1-65-83c59d378571a6e487aa20217bd87817
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-65-83c59d378571a6e487aa20217bd87817
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-68-9d01ff3d1fde3ed2ab55ea9d7079fd5c b/sql/hive/src/test/resources/golden/groupby_sort_1-68-9d01ff3d1fde3ed2ab55ea9d7079fd5c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-68-9d01ff3d1fde3ed2ab55ea9d7079fd5c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-69-ed76c0068780120a6f23feefee303403 b/sql/hive/src/test/resources/golden/groupby_sort_1-69-ed76c0068780120a6f23feefee303403
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-69-ed76c0068780120a6f23feefee303403
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-72-d2fa5e7bdd6b7934d10d5905cacd5715 b/sql/hive/src/test/resources/golden/groupby_sort_1-72-d2fa5e7bdd6b7934d10d5905cacd5715
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-72-d2fa5e7bdd6b7934d10d5905cacd5715
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-73-6296dde4e71acf7e7f42ee58cf3b5acd b/sql/hive/src/test/resources/golden/groupby_sort_1-73-6296dde4e71acf7e7f42ee58cf3b5acd
deleted file mode 100644
index 10f4a1f5ff34c..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-73-6296dde4e71acf7e7f42ee58cf3b5acd
+++ /dev/null
@@ -1,6 +0,0 @@
-1	11	1
-2	12	1
-3	13	1
-7	17	1
-8	18	1
-8	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-76-d2fa5e7bdd6b7934d10d5905cacd5715 b/sql/hive/src/test/resources/golden/groupby_sort_1-76-d2fa5e7bdd6b7934d10d5905cacd5715
deleted file mode 100644
index d15db8c5d079f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-76-d2fa5e7bdd6b7934d10d5905cacd5715
+++ /dev/null
@@ -1 +0,0 @@
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-77-6d6eac558569563cc47a657daffa8ba7 b/sql/hive/src/test/resources/golden/groupby_sort_1-77-6d6eac558569563cc47a657daffa8ba7
deleted file mode 100644
index 3d1609d961673..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_1-77-6d6eac558569563cc47a657daffa8ba7
+++ /dev/null
@@ -1,2 +0,0 @@
-8	18	1
-8	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_10-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_10-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_10-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_10-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_10-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_10-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_10-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_10-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_10-10-b4e225bc4787d7718bae6f00d8addfe2 b/sql/hive/src/test/resources/golden/groupby_sort_10-10-b4e225bc4787d7718bae6f00d8addfe2
index c6bb9dbfd6497..bfca78293c988 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_10-10-b4e225bc4787d7718bae6f00d8addfe2
+++ b/sql/hive/src/test/resources/golden/groupby_sort_10-10-b4e225bc4787d7718bae6f00d8addfe2
@@ -1,2 +1,2 @@
 0
-11
\ No newline at end of file
+11
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_10-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_10-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_10-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_10-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_10-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_10-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_10-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_10-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_10-7-b4e225bc4787d7718bae6f00d8addfe2 b/sql/hive/src/test/resources/golden/groupby_sort_10-7-b4e225bc4787d7718bae6f00d8addfe2
index c6bb9dbfd6497..bfca78293c988 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_10-7-b4e225bc4787d7718bae6f00d8addfe2
+++ b/sql/hive/src/test/resources/golden/groupby_sort_10-7-b4e225bc4787d7718bae6f00d8addfe2
@@ -1,2 +1,2 @@
 0
-11
\ No newline at end of file
+11
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-11-d9bf7e80b71121935ed4b008ae916cb1 b/sql/hive/src/test/resources/golden/groupby_sort_11-11-d9bf7e80b71121935ed4b008ae916cb1
deleted file mode 100644
index ded2854cdf564..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_11-11-d9bf7e80b71121935ed4b008ae916cb1
+++ /dev/null
@@ -1,6 +0,0 @@
-1	3	3	0.0
-1	1	1	2.0
-1	1	1	4.0
-1	3	3	5.0
-1	1	1	8.0
-1	1	1	9.0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-13-c7d70702783eb90d4f53028a63c318f8 b/sql/hive/src/test/resources/golden/groupby_sort_11-13-c7d70702783eb90d4f53028a63c318f8
deleted file mode 100644
index 487b4c4a5cc6f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_11-13-c7d70702783eb90d4f53028a63c318f8
+++ /dev/null
@@ -1,6 +0,0 @@
-0	1	3	3	0.0
-2	1	1	1	2.0
-4	1	1	1	4.0
-5	1	3	3	5.0
-8	1	1	1	8.0
-9	1	1	1	9.0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-15-9fd5f5fce6f3821f2d7801eb0e83a015 b/sql/hive/src/test/resources/golden/groupby_sort_11-15-9fd5f5fce6f3821f2d7801eb0e83a015
deleted file mode 100644
index 1e8b314962144..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_11-15-9fd5f5fce6f3821f2d7801eb0e83a015
+++ /dev/null
@@ -1 +0,0 @@
-6
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-20-bd99462ed878bf4bec74b3cb9132908d b/sql/hive/src/test/resources/golden/groupby_sort_11-20-bd99462ed878bf4bec74b3cb9132908d
deleted file mode 100644
index 1e8b314962144..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_11-20-bd99462ed878bf4bec74b3cb9132908d
+++ /dev/null
@@ -1 +0,0 @@
-6
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-7-bd99462ed878bf4bec74b3cb9132908d b/sql/hive/src/test/resources/golden/groupby_sort_11-7-bd99462ed878bf4bec74b3cb9132908d
deleted file mode 100644
index 1e8b314962144..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_11-7-bd99462ed878bf4bec74b3cb9132908d
+++ /dev/null
@@ -1 +0,0 @@
-6
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-9-9be51f5537a03d7dbf56693d9fdc1688 b/sql/hive/src/test/resources/golden/groupby_sort_11-9-9be51f5537a03d7dbf56693d9fdc1688
deleted file mode 100644
index 6a5fe2835fc56..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_11-9-9be51f5537a03d7dbf56693d9fdc1688
+++ /dev/null
@@ -1 +0,0 @@
-6	10	10	28.0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_2-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_2-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_2-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_2-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_2-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_2-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_2-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_2-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_2-10-5032dd0941fab4871eefd79f7e4a5c86 b/sql/hive/src/test/resources/golden/groupby_sort_2-10-5032dd0941fab4871eefd79f7e4a5c86
index c5b99ed941efc..e6a233467dcf5 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_2-10-5032dd0941fab4871eefd79f7e4a5c86
+++ b/sql/hive/src/test/resources/golden/groupby_sort_2-10-5032dd0941fab4871eefd79f7e4a5c86
@@ -3,4 +3,4 @@
 13	1
 17	1
 18	1
-28	1
\ No newline at end of file
+28	1
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_2-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_2-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_2-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_2-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_2-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_2-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_2-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_2-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/escape2-7-70729c3d79ded87e884c176138174645 b/sql/hive/src/test/resources/golden/groupby_sort_2-5-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/escape2-7-70729c3d79ded87e884c176138174645
rename to sql/hive/src/test/resources/golden/groupby_sort_2-5-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_3-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_3-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_3-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_3-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_3-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_3-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_3-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_3-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_3-10-4bf8dba6e66e733423a3408d61897926 b/sql/hive/src/test/resources/golden/groupby_sort_3-10-4bf8dba6e66e733423a3408d61897926
index 10f4a1f5ff34c..0f333f42821a0 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_3-10-4bf8dba6e66e733423a3408d61897926
+++ b/sql/hive/src/test/resources/golden/groupby_sort_3-10-4bf8dba6e66e733423a3408d61897926
@@ -3,4 +3,4 @@
 3	13	1
 7	17	1
 8	18	1
-8	28	1
\ No newline at end of file
+8	28	1
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_3-14-1c8def3eb5fc590046d9cdd02d1cbf3f b/sql/hive/src/test/resources/golden/groupby_sort_3-14-1c8def3eb5fc590046d9cdd02d1cbf3f
index 4e31460a412ba..e7273779ac1b8 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_3-14-1c8def3eb5fc590046d9cdd02d1cbf3f
+++ b/sql/hive/src/test/resources/golden/groupby_sort_3-14-1c8def3eb5fc590046d9cdd02d1cbf3f
@@ -2,4 +2,4 @@
 2	1
 3	1
 7	1
-8	2
\ No newline at end of file
+8	2
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_3-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_3-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_3-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_3-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_3-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_3-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_3-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_3-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_00_nonpart_empty-3-4f20db97105fb03ad21ffbf3edab7b77 b/sql/hive/src/test/resources/golden/groupby_sort_3-5-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_00_nonpart_empty-3-4f20db97105fb03ad21ffbf3edab7b77
rename to sql/hive/src/test/resources/golden/groupby_sort_3-5-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_4-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_4-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_4-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_4-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_4-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_4-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_4-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_4-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_4-10-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_4-10-f3f94c4814c3bff60a0b06edf0c884bd
index 4e31460a412ba..e7273779ac1b8 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_4-10-f3f94c4814c3bff60a0b06edf0c884bd
+++ b/sql/hive/src/test/resources/golden/groupby_sort_4-10-f3f94c4814c3bff60a0b06edf0c884bd
@@ -2,4 +2,4 @@
 2	1
 3	1
 7	1
-8	2
\ No newline at end of file
+8	2
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_4-14-75d59344b6114c0bb20d5eac301c2170 b/sql/hive/src/test/resources/golden/groupby_sort_4-14-75d59344b6114c0bb20d5eac301c2170
index 10f4a1f5ff34c..0f333f42821a0 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_4-14-75d59344b6114c0bb20d5eac301c2170
+++ b/sql/hive/src/test/resources/golden/groupby_sort_4-14-75d59344b6114c0bb20d5eac301c2170
@@ -3,4 +3,4 @@
 3	13	1
 7	17	1
 8	18	1
-8	28	1
\ No newline at end of file
+8	28	1
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_4-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_4-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_4-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_4-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_4-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_4-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_4-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_4-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_01_nonpart-3-4f20db97105fb03ad21ffbf3edab7b77 b/sql/hive/src/test/resources/golden/groupby_sort_4-5-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_01_nonpart-3-4f20db97105fb03ad21ffbf3edab7b77
rename to sql/hive/src/test/resources/golden/groupby_sort_4-5-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_5-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_5-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_5-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_5-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_5-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_5-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-10-4bf8dba6e66e733423a3408d61897926 b/sql/hive/src/test/resources/golden/groupby_sort_5-10-4bf8dba6e66e733423a3408d61897926
index 10f4a1f5ff34c..0f333f42821a0 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_5-10-4bf8dba6e66e733423a3408d61897926
+++ b/sql/hive/src/test/resources/golden/groupby_sort_5-10-4bf8dba6e66e733423a3408d61897926
@@ -3,4 +3,4 @@
 3	13	1
 7	17	1
 8	18	1
-8	28	1
\ No newline at end of file
+8	28	1
diff --git a/sql/hive/src/test/resources/golden/exim_01_nonpart-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/groupby_sort_5-13-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_01_nonpart-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/groupby_sort_5-13-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-17-4bf8dba6e66e733423a3408d61897926 b/sql/hive/src/test/resources/golden/groupby_sort_5-17-4bf8dba6e66e733423a3408d61897926
index 10f4a1f5ff34c..0f333f42821a0 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_5-17-4bf8dba6e66e733423a3408d61897926
+++ b/sql/hive/src/test/resources/golden/groupby_sort_5-17-4bf8dba6e66e733423a3408d61897926
@@ -3,4 +3,4 @@
 3	13	1
 7	17	1
 8	18	1
-8	28	1
\ No newline at end of file
+8	28	1
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_5-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_5-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_5-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_02_00_part_empty-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/groupby_sort_5-20-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_00_part_empty-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/groupby_sort_5-20-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-25-1c8def3eb5fc590046d9cdd02d1cbf3f b/sql/hive/src/test/resources/golden/groupby_sort_5-25-1c8def3eb5fc590046d9cdd02d1cbf3f
index 4e31460a412ba..e7273779ac1b8 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_5-25-1c8def3eb5fc590046d9cdd02d1cbf3f
+++ b/sql/hive/src/test/resources/golden/groupby_sort_5-25-1c8def3eb5fc590046d9cdd02d1cbf3f
@@ -2,4 +2,4 @@
 2	1
 3	1
 7	1
-8	2
\ No newline at end of file
+8	2
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_5-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_5-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_5-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_02_part-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/groupby_sort_5-5-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_part-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/groupby_sort_5-5-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_6-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_6-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_6-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_6-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_6-15-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_6-15-f3f94c4814c3bff60a0b06edf0c884bd
index 4e31460a412ba..e7273779ac1b8 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_6-15-f3f94c4814c3bff60a0b06edf0c884bd
+++ b/sql/hive/src/test/resources/golden/groupby_sort_6-15-f3f94c4814c3bff60a0b06edf0c884bd
@@ -2,4 +2,4 @@
 2	1
 3	1
 7	1
-8	2
\ No newline at end of file
+8	2
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_6-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_6-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_6-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_6-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_6-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_6-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_6-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_6-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_02_part-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/groupby_sort_6-9-591e03d1cfc10821a601498df1ed6675
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_part-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/groupby_sort_6-9-591e03d1cfc10821a601498df1ed6675
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_7-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_7-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_7-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_7-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_7-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_7-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_7-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_7-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_7-10-4bf8dba6e66e733423a3408d61897926 b/sql/hive/src/test/resources/golden/groupby_sort_7-10-4bf8dba6e66e733423a3408d61897926
index 10f4a1f5ff34c..0f333f42821a0 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_7-10-4bf8dba6e66e733423a3408d61897926
+++ b/sql/hive/src/test/resources/golden/groupby_sort_7-10-4bf8dba6e66e733423a3408d61897926
@@ -3,4 +3,4 @@
 3	13	1
 7	17	1
 8	18	1
-8	28	1
\ No newline at end of file
+8	28	1
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_7-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_7-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_7-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_7-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_7-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_7-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_7-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_7-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-3-4f20db97105fb03ad21ffbf3edab7b77 b/sql/hive/src/test/resources/golden/groupby_sort_7-5-43e94a517107a5bcf6fee78e6c88a1cc
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-3-4f20db97105fb03ad21ffbf3edab7b77
rename to sql/hive/src/test/resources/golden/groupby_sort_7-5-43e94a517107a5bcf6fee78e6c88a1cc
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_8-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_8-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_8-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_8-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_8-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_8-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_8-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_8-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_8-11-bd99462ed878bf4bec74b3cb9132908d b/sql/hive/src/test/resources/golden/groupby_sort_8-11-bd99462ed878bf4bec74b3cb9132908d
index 7813681f5b41c..7ed6ff82de6bc 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_8-11-bd99462ed878bf4bec74b3cb9132908d
+++ b/sql/hive/src/test/resources/golden/groupby_sort_8-11-bd99462ed878bf4bec74b3cb9132908d
@@ -1 +1 @@
-5
\ No newline at end of file
+5
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_8-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_8-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_8-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_8-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_8-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_8-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_8-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_8-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/groupby_sort_8-5-43e94a517107a5bcf6fee78e6c88a1cc
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_03_nonpart_over_compat-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/groupby_sort_8-5-43e94a517107a5bcf6fee78e6c88a1cc
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_8-8-bd99462ed878bf4bec74b3cb9132908d b/sql/hive/src/test/resources/golden/groupby_sort_8-8-bd99462ed878bf4bec74b3cb9132908d
index 7813681f5b41c..7ed6ff82de6bc 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_8-8-bd99462ed878bf4bec74b3cb9132908d
+++ b/sql/hive/src/test/resources/golden/groupby_sort_8-8-bd99462ed878bf4bec74b3cb9132908d
@@ -1 +1 @@
-5
\ No newline at end of file
+5
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_8-9-8e06b51e940e956f14a8c7679c3d423a b/sql/hive/src/test/resources/golden/groupby_sort_8-9-8e06b51e940e956f14a8c7679c3d423a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_8-9-8e06b51e940e956f14a8c7679c3d423a
+++ b/sql/hive/src/test/resources/golden/groupby_sort_8-9-8e06b51e940e956f14a8c7679c3d423a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_9-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_9-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_9-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_9-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_9-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_9-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_9-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_9-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_9-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_9-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_9-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_9-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_9-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_9-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_9-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_9-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/groupby_sort_9-5-43e94a517107a5bcf6fee78e6c88a1cc
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_all_part-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/groupby_sort_9-5-43e94a517107a5bcf6fee78e6c88a1cc
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_9-9-feec69facdc973a0ff78455f766845c b/sql/hive/src/test/resources/golden/groupby_sort_9-9-feec69facdc973a0ff78455f766845c
index 3d4708b7c9d64..612dcbb640d46 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_9-9-feec69facdc973a0ff78455f766845c
+++ b/sql/hive/src/test/resources/golden/groupby_sort_9-9-feec69facdc973a0ff78455f766845c
@@ -2,4 +2,4 @@
 2	2
 3	2
 7	2
-8	4
\ No newline at end of file
+8	4
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-1-ffe97dc8c1df3195982e38263fbe8717
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-1-ffe97dc8c1df3195982e38263fbe8717
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-11-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-11-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-11-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-15-bbdd53118f788d7bb679d094c41243c8 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-15-bbdd53118f788d7bb679d094c41243c8
deleted file mode 100644
index 10f4a1f5ff34c..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-15-bbdd53118f788d7bb679d094c41243c8
+++ /dev/null
@@ -1,6 +0,0 @@
-1	11	1
-2	12	1
-3	13	1
-7	17	1
-8	18	1
-8	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-18-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-18-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-18-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-2-fc30020d09151dc29be807795ad9475e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-2-fc30020d09151dc29be807795ad9475e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-21-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-21-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-21-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-25-d53196339980a00a619788bd799a32e7 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-25-d53196339980a00a619788bd799a32e7
deleted file mode 100644
index c1cc4ee204773..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-25-d53196339980a00a619788bd799a32e7
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1	1
-1	2	1
-1	3	1
-1	7	1
-1	8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-29-c4ec0433a832ef551d70254957e3afca b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-29-c4ec0433a832ef551d70254957e3afca
deleted file mode 100644
index 97a3b8c2f5977..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-29-c4ec0433a832ef551d70254957e3afca
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	11	1
-2	1	12	1
-3	1	13	1
-7	1	17	1
-8	1	18	1
-8	1	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-3-fffea659b633b1f269b38556a7f54634
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-3-fffea659b633b1f269b38556a7f54634
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-32-d53196339980a00a619788bd799a32e7 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-32-d53196339980a00a619788bd799a32e7
deleted file mode 100644
index f0192040e147b..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-32-d53196339980a00a619788bd799a32e7
+++ /dev/null
@@ -1,5 +0,0 @@
-1	2	1
-2	3	1
-3	4	1
-7	8	1
-8	9	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-35-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-35-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index b6c2eb98e5e49..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-35-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-2	1
-4	1
-6	1
-14	1
-16	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-38-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-38-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 8e7ee8a2b47bb..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-38-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,10 +0,0 @@
-1	1
-1	1
-2	1
-2	1
-3	1
-3	1
-7	1
-7	1
-8	2
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-4-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-4-c67a488530dc7e20a9e7acf02c14380f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-4-c67a488530dc7e20a9e7acf02c14380f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-41-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-41-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 0b2b54cd94e4f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-41-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,10 +0,0 @@
-1	1
-2	1
-2	1
-3	1
-4	1
-6	1
-7	1
-8	2
-14	1
-16	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-44-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-44-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 3d4708b7c9d64..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-44-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	2
-2	2
-3	2
-7	2
-8	4
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-50-f3f94c4814c3bff60a0b06edf0c884bd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-50-f3f94c4814c3bff60a0b06edf0c884bd
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-50-f3f94c4814c3bff60a0b06edf0c884bd
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-53-c4ec0433a832ef551d70254957e3afca b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-53-c4ec0433a832ef551d70254957e3afca
deleted file mode 100644
index 97a3b8c2f5977..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-53-c4ec0433a832ef551d70254957e3afca
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	11	1
-2	1	12	1
-3	1	13	1
-7	1	17	1
-8	1	18	1
-8	1	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-57-5373c5449884d95bc7db9dab55239a49 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-57-5373c5449884d95bc7db9dab55239a49
deleted file mode 100644
index 7ca6b0b28a960..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-57-5373c5449884d95bc7db9dab55239a49
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	11	2	1
-2	1	12	2	1
-3	1	13	2	1
-7	1	17	2	1
-8	1	18	2	1
-8	1	28	2	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-60-c4ec0433a832ef551d70254957e3afca b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-60-c4ec0433a832ef551d70254957e3afca
deleted file mode 100644
index 97a3b8c2f5977..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-60-c4ec0433a832ef551d70254957e3afca
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	11	1
-2	1	12	1
-3	1	13	1
-7	1	17	1
-8	1	18	1
-8	1	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-63-c4ec0433a832ef551d70254957e3afca b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-63-c4ec0433a832ef551d70254957e3afca
deleted file mode 100644
index 58e16ef3c0ef3..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-63-c4ec0433a832ef551d70254957e3afca
+++ /dev/null
@@ -1,6 +0,0 @@
-1	2	11	1
-2	2	12	1
-3	2	13	1
-7	2	17	1
-8	2	18	1
-8	2	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-64-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-64-dbcec232623048c7748b708123e18bf0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-64-dbcec232623048c7748b708123e18bf0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-65-7f98b724df05f51b3ec1f087a8da414e b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-65-7f98b724df05f51b3ec1f087a8da414e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-65-7f98b724df05f51b3ec1f087a8da414e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-66-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-66-83c59d378571a6e487aa20217bd87817
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-66-83c59d378571a6e487aa20217bd87817
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-69-9d01ff3d1fde3ed2ab55ea9d7079fd5c b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-69-9d01ff3d1fde3ed2ab55ea9d7079fd5c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-69-9d01ff3d1fde3ed2ab55ea9d7079fd5c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-70-ed76c0068780120a6f23feefee303403 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-70-ed76c0068780120a6f23feefee303403
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-70-ed76c0068780120a6f23feefee303403
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-73-d2fa5e7bdd6b7934d10d5905cacd5715 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-73-d2fa5e7bdd6b7934d10d5905cacd5715
deleted file mode 100644
index 4e31460a412ba..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-73-d2fa5e7bdd6b7934d10d5905cacd5715
+++ /dev/null
@@ -1,5 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-74-6296dde4e71acf7e7f42ee58cf3b5acd b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-74-6296dde4e71acf7e7f42ee58cf3b5acd
deleted file mode 100644
index 10f4a1f5ff34c..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-74-6296dde4e71acf7e7f42ee58cf3b5acd
+++ /dev/null
@@ -1,6 +0,0 @@
-1	11	1
-2	12	1
-3	13	1
-7	17	1
-8	18	1
-8	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-77-d2fa5e7bdd6b7934d10d5905cacd5715 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-77-d2fa5e7bdd6b7934d10d5905cacd5715
deleted file mode 100644
index d15db8c5d079f..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-77-d2fa5e7bdd6b7934d10d5905cacd5715
+++ /dev/null
@@ -1 +0,0 @@
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-78-6d6eac558569563cc47a657daffa8ba7 b/sql/hive/src/test/resources/golden/groupby_sort_skew_1-78-6d6eac558569563cc47a657daffa8ba7
deleted file mode 100644
index 3d1609d961673..0000000000000
--- a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-78-6d6eac558569563cc47a657daffa8ba7
+++ /dev/null
@@ -1,2 +0,0 @@
-8	18	1
-8	28	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_test_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/groupby_sort_test_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_test_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/groupby_sort_test_1-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_test_1-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/groupby_sort_test_1-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_test_1-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/groupby_sort_test_1-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_test_1-2-fc30020d09151dc29be807795ad9475e b/sql/hive/src/test/resources/golden/groupby_sort_test_1-2-fc30020d09151dc29be807795ad9475e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_test_1-2-fc30020d09151dc29be807795ad9475e
+++ b/sql/hive/src/test/resources/golden/groupby_sort_test_1-2-fc30020d09151dc29be807795ad9475e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_test_1-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/groupby_sort_test_1-3-fffea659b633b1f269b38556a7f54634
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_test_1-3-fffea659b633b1f269b38556a7f54634
+++ b/sql/hive/src/test/resources/golden/groupby_sort_test_1-3-fffea659b633b1f269b38556a7f54634
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_test_1-4-8e06b51e940e956f14a8c7679c3d423a b/sql/hive/src/test/resources/golden/groupby_sort_test_1-4-8e06b51e940e956f14a8c7679c3d423a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/groupby_sort_test_1-4-8e06b51e940e956f14a8c7679c3d423a
+++ b/sql/hive/src/test/resources/golden/groupby_sort_test_1-4-8e06b51e940e956f14a8c7679c3d423a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/groupby_sort_test_1-6-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_all_part-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/groupby_sort_test_1-6-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/hash-0-a658b129316d666d4b01c1581eed1c1f b/sql/hive/src/test/resources/golden/hash-0-a658b129316d666d4b01c1581eed1c1f
index a99cee758fe57..3cb614bdd84e8 100644
--- a/sql/hive/src/test/resources/golden/hash-0-a658b129316d666d4b01c1581eed1c1f
+++ b/sql/hive/src/test/resources/golden/hash-0-a658b129316d666d4b01c1581eed1c1f
@@ -1 +1 @@
-3556498
\ No newline at end of file
+3556498
diff --git a/sql/hive/src/test/resources/golden/index_creation-10-4bd6c86cd3f6a94964b3d13235e8e261 b/sql/hive/src/test/resources/golden/index_creation-10-4bd6c86cd3f6a94964b3d13235e8e261
deleted file mode 100644
index d7c6f236687d8..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-10-4bd6c86cd3f6a94964b3d13235e8e261
+++ /dev/null
@@ -1,5 +0,0 @@
-key                 	int                 	None                
-_bucketname         	string              	                    
-_offsets            	array<bigint>       	                    
-	 	 
-Detailed Table Information	Table(tableName:default__src_src_index_2__, dbName:default, owner:null, createTime:1389344545, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4441354405523276795/default__src_src_index_2__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389344545}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-12-9cc02e06c6051810c50e225bb2c66669 b/sql/hive/src/test/resources/golden/index_creation-12-9cc02e06c6051810c50e225bb2c66669
deleted file mode 100644
index 4c6ec0ba34bb8..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-12-9cc02e06c6051810c50e225bb2c66669
+++ /dev/null
@@ -1,5 +0,0 @@
-key                 	int                 	None                
-_bucketname         	string              	                    
-_offsets            	array<bigint>       	                    
-	 	 
-Detailed Table Information	Table(tableName:src_idx_src_index_3, dbName:default, owner:null, createTime:1389344545, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4441354405523276795/src_idx_src_index_3, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389344545}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-14-a8c1ed9913d9fbcd1b3299a2f8bb2165 b/sql/hive/src/test/resources/golden/index_creation-14-a8c1ed9913d9fbcd1b3299a2f8bb2165
deleted file mode 100644
index 0c6af94247b85..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-14-a8c1ed9913d9fbcd1b3299a2f8bb2165
+++ /dev/null
@@ -1,5 +0,0 @@
-key                 	int                 	None                
-_bucketname         	string              	                    
-_offsets            	array<bigint>       	                    
-	 	 
-Detailed Table Information	Table(tableName:default__src_src_index_4__, dbName:default, owner:null, createTime:1389344545, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4441354405523276795/default__src_src_index_4__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=	, field.delim=
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-16-ffa6d3fcef97b7322dd3759d4a70881d b/sql/hive/src/test/resources/golden/index_creation-16-ffa6d3fcef97b7322dd3759d4a70881d
deleted file mode 100644
index e4a5816e6f1cc..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-16-ffa6d3fcef97b7322dd3759d4a70881d
+++ /dev/null
@@ -1,5 +0,0 @@
-key                 	int                 	None                
-_bucketname         	string              	                    
-_offsets            	array<bigint>       	                    
-	 	 
-Detailed Table Information	Table(tableName:default__src_src_index_5__, dbName:default, owner:null, createTime:1389344546, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4441354405523276795/default__src_src_index_5__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{escape.delim=\, serialization.format=	, field.delim=
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-18-bcdb19db031341c4a50264ccf49328e4 b/sql/hive/src/test/resources/golden/index_creation-18-bcdb19db031341c4a50264ccf49328e4
deleted file mode 100644
index 3d8751ca47049..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-18-bcdb19db031341c4a50264ccf49328e4
+++ /dev/null
@@ -1,5 +0,0 @@
-key                 	int                 	from deserializer   
-_bucketname         	string              	from deserializer   
-_offsets            	array<bigint>       	from deserializer   
-	 	 
-Detailed Table Information	Table(tableName:default__src_src_index_6__, dbName:default, owner:null, createTime:1389344546, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4441354405523276795/default__src_src_index_6__, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389344546}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-20-68c5c98581c683b17ceaf1c0fd192871 b/sql/hive/src/test/resources/golden/index_creation-20-68c5c98581c683b17ceaf1c0fd192871
deleted file mode 100644
index 0e4852b319052..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-20-68c5c98581c683b17ceaf1c0fd192871
+++ /dev/null
@@ -1,5 +0,0 @@
-key                 	int                 	from deserializer   
-_bucketname         	string              	from deserializer   
-_offsets            	array<bigint>       	from deserializer   
-	 	 
-Detailed Table Information	Table(tableName:src_idx_src_index_7, dbName:default, owner:null, createTime:1389344546, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4441354405523276795/src_idx_src_index_7, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389344546}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-22-f36cb2eed39691ca949b25182e2dd31 b/sql/hive/src/test/resources/golden/index_creation-22-f36cb2eed39691ca949b25182e2dd31
deleted file mode 100644
index 41a5492fc5331..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-22-f36cb2eed39691ca949b25182e2dd31
+++ /dev/null
@@ -1,5 +0,0 @@
-key                 	int                 	None                
-_bucketname         	string              	                    
-_offsets            	array<bigint>       	                    
-	 	 
-Detailed Table Information	Table(tableName:default__src_src_index_8__, dbName:default, owner:null, createTime:1389344546, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4441354405523276795/default__src_src_index_8__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389344546}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-24-8cb102bd5addf7fece0e2691468bc3bf b/sql/hive/src/test/resources/golden/index_creation-24-8cb102bd5addf7fece0e2691468bc3bf
deleted file mode 100644
index 8212bf8b7d2fb..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-24-8cb102bd5addf7fece0e2691468bc3bf
+++ /dev/null
@@ -1,5 +0,0 @@
-key                 	int                 	None                
-_bucketname         	string              	                    
-_offsets            	array<bigint>       	                    
-	 	 
-Detailed Table Information	Table(tableName:default__src_src_index_9__, dbName:default, owner:null, createTime:1389344546, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4441354405523276795/default__src_src_index_9__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{prop2=val2, prop1=val1, transient_lastDdlTime=1389344546}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-39-489b4ceb2f4301a7132628303f99240d b/sql/hive/src/test/resources/golden/index_creation-39-489b4ceb2f4301a7132628303f99240d
deleted file mode 100644
index e8310385c56dc..0000000000000
--- a/sql/hive/src/test/resources/golden/index_creation-39-489b4ceb2f4301a7132628303f99240d
+++ /dev/null
@@ -1 +0,0 @@
-src
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/index_serde-0-db64b724719d27c7f0db4f51f5c4edaa
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_16_part_external-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/index_serde-0-db64b724719d27c7f0db4f51f5c4edaa
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/index_serde-1-6560d12b69d55e5297a145ebc4bb0cb3
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_all_part-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/index_serde-1-6560d12b69d55e5297a145ebc4bb0cb3
diff --git a/sql/hive/src/test/resources/golden/index_serde-1-f92d6c66d21791c11d2a822df04c1b63 b/sql/hive/src/test/resources/golden/index_serde-1-f92d6c66d21791c11d2a822df04c1b63
deleted file mode 100644
index 48522980f81a8..0000000000000
--- a/sql/hive/src/test/resources/golden/index_serde-1-f92d6c66d21791c11d2a822df04c1b63
+++ /dev/null
@@ -1,3 +0,0 @@
-number              	int                 	from deserializer   
-first_name          	string              	from deserializer   
-last_name           	string              	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_serde-10-123301a057d4a46072d0431e00e20c4b b/sql/hive/src/test/resources/golden/index_serde-10-123301a057d4a46072d0431e00e20c4b
deleted file mode 100644
index 4ed570f9070eb..0000000000000
--- a/sql/hive/src/test/resources/golden/index_serde-10-123301a057d4a46072d0431e00e20c4b
+++ /dev/null
@@ -1,5 +0,0 @@
-7	Sylvester	McCoy
-8	Paul	McGann
-9	Christopher	Eccleston
-10	David	Tennant
-11	Matt	Smith
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/index_serde-10-c85e061ea9c5b90ca69b7450faad14b6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_all_part-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/index_serde-10-c85e061ea9c5b90ca69b7450faad14b6
diff --git a/sql/hive/src/test/resources/golden/index_serde-11-123301a057d4a46072d0431e00e20c4b b/sql/hive/src/test/resources/golden/index_serde-11-123301a057d4a46072d0431e00e20c4b
new file mode 100644
index 0000000000000..63d56733b58b0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/index_serde-11-123301a057d4a46072d0431e00e20c4b
@@ -0,0 +1,5 @@
+7	Sylvester	McCoy
+8	Paul	McGann
+9	Christopher	Eccleston
+10	David	Tennant
+11	Matt	Smith
diff --git a/sql/hive/src/test/resources/golden/exim_04_all_part-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/index_serde-12-309e916d683a1a12ab62565697cb0046
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_all_part-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/index_serde-12-309e916d683a1a12ab62565697cb0046
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-3-c148cf39c4f8f02d44964cfd6919fa29 b/sql/hive/src/test/resources/golden/index_serde-13-d590fd7cb9d433143de490d75686dd4
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-3-c148cf39c4f8f02d44964cfd6919fa29
rename to sql/hive/src/test/resources/golden/index_serde-13-d590fd7cb9d433143de490d75686dd4
diff --git a/sql/hive/src/test/resources/golden/index_serde-2-f92d6c66d21791c11d2a822df04c1b63 b/sql/hive/src/test/resources/golden/index_serde-2-f92d6c66d21791c11d2a822df04c1b63
new file mode 100644
index 0000000000000..e716294e919d1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/index_serde-2-f92d6c66d21791c11d2a822df04c1b63
@@ -0,0 +1,3 @@
+number              	int                 	from deserializer   
+first_name          	string              	from deserializer   
+last_name           	string              	from deserializer   
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-4-3c27502d4f6977b959e0928755b43be3 b/sql/hive/src/test/resources/golden/index_serde-3-ebab588c84a7a29f03b41dcd98132229
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-4-3c27502d4f6977b959e0928755b43be3
rename to sql/hive/src/test/resources/golden/index_serde-3-ebab588c84a7a29f03b41dcd98132229
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-5-2d68fb88796f07b4bbe2b6895464ee62 b/sql/hive/src/test/resources/golden/index_serde-4-afcf2a156ccd4f79a0489b4593908d79
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-5-2d68fb88796f07b4bbe2b6895464ee62
rename to sql/hive/src/test/resources/golden/index_serde-4-afcf2a156ccd4f79a0489b4593908d79
diff --git a/sql/hive/src/test/resources/golden/index_serde-4-d7547751c37375a9238043bbe250e716 b/sql/hive/src/test/resources/golden/index_serde-4-d7547751c37375a9238043bbe250e716
deleted file mode 100644
index d2ca633d0ae01..0000000000000
--- a/sql/hive/src/test/resources/golden/index_serde-4-d7547751c37375a9238043bbe250e716
+++ /dev/null
@@ -1,5 +0,0 @@
-number              	int                 	from deserializer   
-_bucketname         	string              	                    
-_offsets            	array<bigint>       	                    
-	 	 
-Detailed Table Information	Table(tableName:default__doctors_doctors_index__, dbName:default, owner:null, createTime:1389729651, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:number, type:int, comment:from deserializer), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/default__doctors_doctors_index__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:number, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389729651}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_serde-5-d7547751c37375a9238043bbe250e716 b/sql/hive/src/test/resources/golden/index_serde-5-d7547751c37375a9238043bbe250e716
new file mode 100644
index 0000000000000..c344129fb8f69
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/index_serde-5-d7547751c37375a9238043bbe250e716
@@ -0,0 +1,5 @@
+number              	int                 	from deserializer   
+_bucketname         	string              	                    
+_offsets            	array<bigint>       	                    
+	 	 
+Detailed Table Information	Table(tableName:default__doctors_doctors_index__, dbName:default, owner:marmbrus, createTime:1414101838, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:number, type:int, comment:from deserializer), FieldSchema(name:_bucketname, type:string, comment:), FieldSchema(name:_offsets, type:array<bigint>, comment:)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1438070583820061187/default__doctors_doctors_index__, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[Order(col:number, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1414101838}, viewOriginalText:null, viewExpandedText:null, tableType:INDEX_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/index_serde-6-c9d7dcde469d3b9a66965a64dd15e4ae b/sql/hive/src/test/resources/golden/index_serde-6-c9d7dcde469d3b9a66965a64dd15e4ae
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/index_serde-6-c9d7dcde469d3b9a66965a64dd15e4ae
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-6-10c086eb97e0cae9a2d4b79d90925e85 b/sql/hive/src/test/resources/golden/index_serde-6-e6ff4b23b7f102e359afb4d53a1dedc3
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-6-10c086eb97e0cae9a2d4b79d90925e85
rename to sql/hive/src/test/resources/golden/index_serde-6-e6ff4b23b7f102e359afb4d53a1dedc3
diff --git a/sql/hive/src/test/resources/golden/index_serde-7-3b03210f94ec40db9ab02620645014d1 b/sql/hive/src/test/resources/golden/index_serde-7-3b03210f94ec40db9ab02620645014d1
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/index_serde-7-3b03210f94ec40db9ab02620645014d1
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/index_serde-7-c9d7dcde469d3b9a66965a64dd15e4ae
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_16_part_external-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/index_serde-7-c9d7dcde469d3b9a66965a64dd15e4ae
diff --git a/sql/hive/src/test/resources/golden/index_serde-8-35f48c7d6fa164bb84643657bc9280a8 b/sql/hive/src/test/resources/golden/index_serde-8-35f48c7d6fa164bb84643657bc9280a8
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/index_serde-8-35f48c7d6fa164bb84643657bc9280a8
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/index_serde-8-3b03210f94ec40db9ab02620645014d1
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_16_part_external-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/index_serde-8-3b03210f94ec40db9ab02620645014d1
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/index_serde-9-35f48c7d6fa164bb84643657bc9280a8
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_17_part_managed-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/index_serde-9-35f48c7d6fa164bb84643657bc9280a8
diff --git a/sql/hive/src/test/resources/golden/innerjoin-0-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/innerjoin-0-43d53504df013e6b35f81811138a167a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/innerjoin-0-43d53504df013e6b35f81811138a167a
+++ b/sql/hive/src/test/resources/golden/innerjoin-0-43d53504df013e6b35f81811138a167a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/innerjoin-4-35c7611180562dcd9dab834f41654095 b/sql/hive/src/test/resources/golden/innerjoin-4-35c7611180562dcd9dab834f41654095
index 3b7cf42f96358..821c3c8c89252 100644
--- a/sql/hive/src/test/resources/golden/innerjoin-4-35c7611180562dcd9dab834f41654095
+++ b/sql/hive/src/test/resources/golden/innerjoin-4-35c7611180562dcd9dab834f41654095
@@ -1025,4 +1025,4 @@
 498	val_498
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/inoutdriver-1-b2f337566a5075f3e3e81335008d95d3 b/sql/hive/src/test/resources/golden/inoutdriver-1-b2f337566a5075f3e3e81335008d95d3
index 010e999c36749..e8a910f80f457 100644
--- a/sql/hive/src/test/resources/golden/inoutdriver-1-b2f337566a5075f3e3e81335008d95d3
+++ b/sql/hive/src/test/resources/golden/inoutdriver-1-b2f337566a5075f3e3e81335008d95d3
@@ -1,3 +1,3 @@
-a                   	int                 	None                
+a                   	int                 	                    
 	 	 
-Detailed Table Information	Table(tableName:test, dbName:default, owner:marmbrus, createTime:1389729862, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/test, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1389729862}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:test, dbName:default, owner:marmbrus, createTime:1413881850, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1413881850}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/input-1-6558e385bb08991302a72076dd7b7ff5 b/sql/hive/src/test/resources/golden/input-1-6558e385bb08991302a72076dd7b7ff5
index c5c8d29fdd13e..7aae61e5eb82f 100644
--- a/sql/hive/src/test/resources/golden/input-1-6558e385bb08991302a72076dd7b7ff5
+++ b/sql/hive/src/test/resources/golden/input-1-6558e385bb08991302a72076dd7b7ff5
@@ -497,4 +497,4 @@
 403	val_403
 400	val_400
 200	val_200
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/input0-1-efefd4364cd2790447fb0f908e87501f b/sql/hive/src/test/resources/golden/input0-1-efefd4364cd2790447fb0f908e87501f
index c5c8d29fdd13e..7aae61e5eb82f 100644
--- a/sql/hive/src/test/resources/golden/input0-1-efefd4364cd2790447fb0f908e87501f
+++ b/sql/hive/src/test/resources/golden/input0-1-efefd4364cd2790447fb0f908e87501f
@@ -497,4 +497,4 @@
 403	val_403
 400	val_400
 200	val_200
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/input1-1-8aaad4ee49c9bdf7b34642cc75f6a1a0 b/sql/hive/src/test/resources/golden/input1-1-8aaad4ee49c9bdf7b34642cc75f6a1a0
index e00bf4ec2c7e5..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/input1-1-8aaad4ee49c9bdf7b34642cc75f6a1a0
+++ b/sql/hive/src/test/resources/golden/input1-1-8aaad4ee49c9bdf7b34642cc75f6a1a0
@@ -1,17 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_DESCTABLE (TOK_TABTYPE TEST1))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Describe Table Operator:
-        Describe Table
-          table: TEST1
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/input1-2-d3aa54d5436b7b59ff5c7091b7ca6145 b/sql/hive/src/test/resources/golden/input1-2-d3aa54d5436b7b59ff5c7091b7ca6145
index 743be67e8d1c3..d3ffb995aff4b 100644
--- a/sql/hive/src/test/resources/golden/input1-2-d3aa54d5436b7b59ff5c7091b7ca6145
+++ b/sql/hive/src/test/resources/golden/input1-2-d3aa54d5436b7b59ff5c7091b7ca6145
@@ -1,2 +1,2 @@
-a                   	int                 	None                
-b                   	double              	None                
\ No newline at end of file
+a                   	int                 	                    
+b                   	double              	                    
diff --git a/sql/hive/src/test/resources/golden/input10-1-6970b6d2d451612b59fccbfd7ec68f74 b/sql/hive/src/test/resources/golden/input10-1-6970b6d2d451612b59fccbfd7ec68f74
index 175d371fd09c9..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/input10-1-6970b6d2d451612b59fccbfd7ec68f74
+++ b/sql/hive/src/test/resources/golden/input10-1-6970b6d2d451612b59fccbfd7ec68f74
@@ -1,17 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_DESCTABLE (TOK_TABTYPE TEST10))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Describe Table Operator:
-        Describe Table
-          table: TEST10
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/input10-2-73f00da5cfc254745d1d80f913eb6449 b/sql/hive/src/test/resources/golden/input10-2-73f00da5cfc254745d1d80f913eb6449
index 8dcdf43e31be3..4cb356c235573 100644
--- a/sql/hive/src/test/resources/golden/input10-2-73f00da5cfc254745d1d80f913eb6449
+++ b/sql/hive/src/test/resources/golden/input10-2-73f00da5cfc254745d1d80f913eb6449
@@ -1,10 +1,10 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
\ No newline at end of file
+ds                  	string              	                    
+hr                  	string              	                    
diff --git a/sql/hive/src/test/resources/golden/input11-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input11-3-adc1ec67836b26b60d8547c4996bfd8f
index 37dd922fd0787..46057aa0a8fca 100644
--- a/sql/hive/src/test/resources/golden/input11-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/input11-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -81,4 +81,4 @@
 28	val_28
 37	val_37
 90	val_90
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/input11_limit-3-8a0c68a4f7386ff214db5d9eed0876d5 b/sql/hive/src/test/resources/golden/input11_limit-3-8a0c68a4f7386ff214db5d9eed0876d5
index fdf93911ee847..d8bd3b13b83f1 100644
--- a/sql/hive/src/test/resources/golden/input11_limit-3-8a0c68a4f7386ff214db5d9eed0876d5
+++ b/sql/hive/src/test/resources/golden/input11_limit-3-8a0c68a4f7386ff214db5d9eed0876d5
@@ -7,4 +7,4 @@
 66	val_66
 82	val_82
 86	val_86
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/input12-0-9b141c1e5917ca82c6bc36a9a2950a1e b/sql/hive/src/test/resources/golden/input12-0-9b141c1e5917ca82c6bc36a9a2950a1e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/input12-0-9b141c1e5917ca82c6bc36a9a2950a1e
+++ b/sql/hive/src/test/resources/golden/input12-0-9b141c1e5917ca82c6bc36a9a2950a1e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/input12-1-2b9ccaa793eae0e73bf76335d3d6880 b/sql/hive/src/test/resources/golden/input12-1-2b9ccaa793eae0e73bf76335d3d6880
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/input12-1-2b9ccaa793eae0e73bf76335d3d6880
+++ b/sql/hive/src/test/resources/golden/input12-1-2b9ccaa793eae0e73bf76335d3d6880
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/input12-10-4d9eb316259a8e7ed6627bc27a639f7c b/sql/hive/src/test/resources/golden/input12-10-4d9eb316259a8e7ed6627bc27a639f7c
index 4220cf5f30392..a66a07386eef8 100644
--- a/sql/hive/src/test/resources/golden/input12-10-4d9eb316259a8e7ed6627bc27a639f7c
+++ b/sql/hive/src/test/resources/golden/input12-10-4d9eb316259a8e7ed6627bc27a639f7c
@@ -308,4 +308,4 @@
 222	2008-04-08	12
 403	2008-04-08	12
 400	2008-04-08	12
-200	2008-04-08	12
\ No newline at end of file
+200	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/input12-2-bab89dfffa77258e34a595e0e79986e3 b/sql/hive/src/test/resources/golden/input12-2-bab89dfffa77258e34a595e0e79986e3
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/input12-2-bab89dfffa77258e34a595e0e79986e3
+++ b/sql/hive/src/test/resources/golden/input12-2-bab89dfffa77258e34a595e0e79986e3
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/input12-8-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input12-8-adc1ec67836b26b60d8547c4996bfd8f
index 37dd922fd0787..46057aa0a8fca 100644
--- a/sql/hive/src/test/resources/golden/input12-8-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/input12-8-adc1ec67836b26b60d8547c4996bfd8f
@@ -81,4 +81,4 @@
 28	val_28
 37	val_37
 90	val_90
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/input12-9-3d08dc27c1a133c2497fc554c0d169bd b/sql/hive/src/test/resources/golden/input12-9-3d08dc27c1a133c2497fc554c0d169bd
index d6e0c29932b9b..9ee31317478d5 100644
--- a/sql/hive/src/test/resources/golden/input12-9-3d08dc27c1a133c2497fc554c0d169bd
+++ b/sql/hive/src/test/resources/golden/input12-9-3d08dc27c1a133c2497fc554c0d169bd
@@ -102,4 +102,4 @@
 152	val_152
 194	val_194
 126	val_126
-169	val_169
\ No newline at end of file
+169	val_169
diff --git a/sql/hive/src/test/resources/golden/input14-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input14-3-adc1ec67836b26b60d8547c4996bfd8f
index 703a4eef24f3f..0190981db84ed 100644
--- a/sql/hive/src/test/resources/golden/input14-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/input14-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -81,4 +81,4 @@
 97	val_97
 97	val_97
 98	val_98
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/input14_limit-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input14_limit-3-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 4335dce6a9929..0000000000000
--- a/sql/hive/src/test/resources/golden/input14_limit-3-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,5 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-10	val_10
-11	val_11
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input15-2-ae5e0fbdf88ecca2c7d67df1ad141919 b/sql/hive/src/test/resources/golden/input15-2-ae5e0fbdf88ecca2c7d67df1ad141919
index ded361eb294f0..90f9bd0430a4c 100644
--- a/sql/hive/src/test/resources/golden/input15-2-ae5e0fbdf88ecca2c7d67df1ad141919
+++ b/sql/hive/src/test/resources/golden/input15-2-ae5e0fbdf88ecca2c7d67df1ad141919
@@ -1,2 +1,2 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
diff --git a/sql/hive/src/test/resources/golden/input17-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input17-3-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 1c48b3680a3ac..0000000000000
--- a/sql/hive/src/test/resources/golden/input17-3-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,11 +0,0 @@
-NULL	NULL
--1461153966	{"myint":49,"mystring":"343","underscore_int":7}
--1952710705	{"myint":25,"mystring":"125","underscore_int":5}
--734328905	{"myint":16,"mystring":"64","underscore_int":4}
--751827636	{"myint":4,"mystring":"8","underscore_int":2}
-1244525196	{"myint":36,"mystring":"216","underscore_int":6}
-1638581586	{"myint":64,"mystring":"512","underscore_int":8}
-1712634731	{"myint":0,"mystring":"0","underscore_int":0}
-336964422	{"myint":81,"mystring":"729","underscore_int":9}
-465985201	{"myint":1,"mystring":"1","underscore_int":1}
-477111225	{"myint":9,"mystring":"27","underscore_int":3}
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-7-bff74501ebaea5bd2227d029003dbe08 b/sql/hive/src/test/resources/golden/input19-1-f32df514de8156b5f5b435eea2c9be40
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-7-bff74501ebaea5bd2227d029003dbe08
rename to sql/hive/src/test/resources/golden/input19-1-f32df514de8156b5f5b435eea2c9be40
diff --git a/sql/hive/src/test/resources/golden/input19-2-5a804e02e4419e785d15e7f39d6c5730 b/sql/hive/src/test/resources/golden/input19-2-5a804e02e4419e785d15e7f39d6c5730
index 795dbe3a976eb..db9438946fc84 100644
--- a/sql/hive/src/test/resources/golden/input19-2-5a804e02e4419e785d15e7f39d6c5730
+++ b/sql/hive/src/test/resources/golden/input19-2-5a804e02e4419e785d15e7f39d6c5730
@@ -1 +1 @@
-127.0.0.1	NULL	frank	10/Oct/2000:13:55:36 -0700	GET /apache_pb.gif HTTP/1.0	200	2326
\ No newline at end of file
+127.0.0.1	NULL	frank	10/Oct/2000:13:55:36 -0700	GET /apache_pb.gif HTTP/1.0	200	2326
diff --git a/sql/hive/src/test/resources/golden/input1_limit-4-8a0c68a4f7386ff214db5d9eed0876d5 b/sql/hive/src/test/resources/golden/input1_limit-4-8a0c68a4f7386ff214db5d9eed0876d5
index fdf93911ee847..d8bd3b13b83f1 100644
--- a/sql/hive/src/test/resources/golden/input1_limit-4-8a0c68a4f7386ff214db5d9eed0876d5
+++ b/sql/hive/src/test/resources/golden/input1_limit-4-8a0c68a4f7386ff214db5d9eed0876d5
@@ -7,4 +7,4 @@
 66	val_66
 82	val_82
 86	val_86
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/input1_limit-5-eaaf713833e28a803c798562c7d6cd23 b/sql/hive/src/test/resources/golden/input1_limit-5-eaaf713833e28a803c798562c7d6cd23
index 8a8f1a1b8bbe1..f2f1112224cd7 100644
--- a/sql/hive/src/test/resources/golden/input1_limit-5-eaaf713833e28a803c798562c7d6cd23
+++ b/sql/hive/src/test/resources/golden/input1_limit-5-eaaf713833e28a803c798562c7d6cd23
@@ -2,4 +2,4 @@
 37	val_37
 66	val_66
 86	val_86
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/input2-1-e0efeda558cd0194f4764a5735147b16 b/sql/hive/src/test/resources/golden/input2-1-e0efeda558cd0194f4764a5735147b16
index 743be67e8d1c3..d3ffb995aff4b 100644
--- a/sql/hive/src/test/resources/golden/input2-1-e0efeda558cd0194f4764a5735147b16
+++ b/sql/hive/src/test/resources/golden/input2-1-e0efeda558cd0194f4764a5735147b16
@@ -1,2 +1,2 @@
-a                   	int                 	None                
-b                   	double              	None                
\ No newline at end of file
+a                   	int                 	                    
+b                   	double              	                    
diff --git a/sql/hive/src/test/resources/golden/input2-2-aa9ab0598e0cb7a12c719f9b3d98dbfd b/sql/hive/src/test/resources/golden/input2-2-aa9ab0598e0cb7a12c719f9b3d98dbfd
index 743be67e8d1c3..d3ffb995aff4b 100644
--- a/sql/hive/src/test/resources/golden/input2-2-aa9ab0598e0cb7a12c719f9b3d98dbfd
+++ b/sql/hive/src/test/resources/golden/input2-2-aa9ab0598e0cb7a12c719f9b3d98dbfd
@@ -1,2 +1,2 @@
-a                   	int                 	None                
-b                   	double              	None                
\ No newline at end of file
+a                   	int                 	                    
+b                   	double              	                    
diff --git a/sql/hive/src/test/resources/golden/input2-4-235f92683416fab031e6e7490487b15b b/sql/hive/src/test/resources/golden/input2-4-235f92683416fab031e6e7490487b15b
index ca0726f517eeb..77eaef91c9c3f 100644
--- a/sql/hive/src/test/resources/golden/input2-4-235f92683416fab031e6e7490487b15b
+++ b/sql/hive/src/test/resources/golden/input2-4-235f92683416fab031e6e7490487b15b
@@ -1,3 +1,3 @@
-a                   	array<int>          	None                
-b                   	double              	None                
-c                   	map<double,int>     	None                
\ No newline at end of file
+a                   	array<int>          	                    
+b                   	double              	                    
+c                   	map<double,int>     	                    
diff --git a/sql/hive/src/test/resources/golden/input2-5-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/input2-5-9c36cac1372650b703400c60dd29042c
index 2c861553f9aa2..2dd749277aa48 100644
--- a/sql/hive/src/test/resources/golden/input2-5-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/input2-5-9c36cac1372650b703400c60dd29042c
@@ -1,4 +1,4 @@
 src
 srcpart
 test2a
-test2b
\ No newline at end of file
+test2b
diff --git a/sql/hive/src/test/resources/golden/input2-7-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/input2-7-9c36cac1372650b703400c60dd29042c
index 3e40a0c866d4d..d2cb69524ba34 100644
--- a/sql/hive/src/test/resources/golden/input2-7-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/input2-7-9c36cac1372650b703400c60dd29042c
@@ -1,3 +1,3 @@
 src
 srcpart
-test2b
\ No newline at end of file
+test2b
diff --git a/sql/hive/src/test/resources/golden/input2-9-48bf8c06ed0a264d0863613fe79795e1 b/sql/hive/src/test/resources/golden/input2-9-48bf8c06ed0a264d0863613fe79795e1
index 5337f342fedd8..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/input2-9-48bf8c06ed0a264d0863613fe79795e1
+++ b/sql/hive/src/test/resources/golden/input2-9-48bf8c06ed0a264d0863613fe79795e1
@@ -1,17 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  TOK_SHOWTABLES
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Table Operator:
-        Show Tables
-          database name: default
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-8-9c8594d9438bbceaa3e6c6f98278cf60 b/sql/hive/src/test/resources/golden/input21-1-c45ad493e95150b580be778da6065f36
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-8-9c8594d9438bbceaa3e6c6f98278cf60
rename to sql/hive/src/test/resources/golden/input21-1-c45ad493e95150b580be778da6065f36
diff --git a/sql/hive/src/test/resources/golden/input21-3-9809b74435cbaedef0dc6e6b88b180fe b/sql/hive/src/test/resources/golden/input21-3-9809b74435cbaedef0dc6e6b88b180fe
index c6c298df200ca..8b39955512bc7 100644
--- a/sql/hive/src/test/resources/golden/input21-3-9809b74435cbaedef0dc6e6b88b180fe
+++ b/sql/hive/src/test/resources/golden/input21-3-9809b74435cbaedef0dc6e6b88b180fe
@@ -7,4 +7,4 @@ NULL	1	same	5
 NULL	NULL	same	6
 1.0	NULL	same	7
 1.0	1	same	8
-1.0	1	same	9
\ No newline at end of file
+1.0	1	same	9
diff --git a/sql/hive/src/test/resources/golden/exim_04_evolved_parts-9-18a8affd3b07841b7cbe64003470a9f7 b/sql/hive/src/test/resources/golden/input22-1-b663ec84da3f9d9b9594ea2da81b1442
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_04_evolved_parts-9-18a8affd3b07841b7cbe64003470a9f7
rename to sql/hive/src/test/resources/golden/input22-1-b663ec84da3f9d9b9594ea2da81b1442
diff --git a/sql/hive/src/test/resources/golden/input22-3-8285c1934441b12f6c016e13cb410e79 b/sql/hive/src/test/resources/golden/input22-3-8285c1934441b12f6c016e13cb410e79
index 336ba0545635d..891dedb34c1dd 100644
--- a/sql/hive/src/test/resources/golden/input22-3-8285c1934441b12f6c016e13cb410e79
+++ b/sql/hive/src/test/resources/golden/input22-3-8285c1934441b12f6c016e13cb410e79
@@ -7,4 +7,4 @@
 103
 103
 104
-104
\ No newline at end of file
+104
diff --git a/sql/hive/src/test/resources/golden/input24-3-3189f3b2990de94619b9cb583d9dd3c5 b/sql/hive/src/test/resources/golden/input24-3-3189f3b2990de94619b9cb583d9dd3c5
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/input24-3-3189f3b2990de94619b9cb583d9dd3c5
+++ b/sql/hive/src/test/resources/golden/input24-3-3189f3b2990de94619b9cb583d9dd3c5
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/input26-1-8272225744e83ba4cbe158a5c113fce3 b/sql/hive/src/test/resources/golden/input26-1-8272225744e83ba4cbe158a5c113fce3
index b8fd0ab6545d6..833b21e4d6e4d 100644
--- a/sql/hive/src/test/resources/golden/input26-1-8272225744e83ba4cbe158a5c113fce3
+++ b/sql/hive/src/test/resources/golden/input26-1-8272225744e83ba4cbe158a5c113fce3
@@ -2,4 +2,4 @@
 0	val_0	2008-04-08	11
 0	val_0	2008-04-08	11
 2	val_2	2008-04-08	11
-4	val_4	2008-04-08	11
\ No newline at end of file
+4	val_4	2008-04-08	11
diff --git a/sql/hive/src/test/resources/golden/input2_limit-1-fed7e0bb996623da7dd17793e835f785 b/sql/hive/src/test/resources/golden/input2_limit-1-fed7e0bb996623da7dd17793e835f785
index 899417ee77ad8..badda49814562 100644
--- a/sql/hive/src/test/resources/golden/input2_limit-1-fed7e0bb996623da7dd17793e835f785
+++ b/sql/hive/src/test/resources/golden/input2_limit-1-fed7e0bb996623da7dd17793e835f785
@@ -2,4 +2,4 @@
 86	val_86
 27	val_27
 165	val_165
-255	val_255
\ No newline at end of file
+255	val_255
diff --git a/sql/hive/src/test/resources/golden/input3-1-6ec8e282bd39883a57aecd9e4c8cdf1d b/sql/hive/src/test/resources/golden/input3-1-6ec8e282bd39883a57aecd9e4c8cdf1d
index 743be67e8d1c3..d3ffb995aff4b 100644
--- a/sql/hive/src/test/resources/golden/input3-1-6ec8e282bd39883a57aecd9e4c8cdf1d
+++ b/sql/hive/src/test/resources/golden/input3-1-6ec8e282bd39883a57aecd9e4c8cdf1d
@@ -1,2 +1,2 @@
-a                   	int                 	None                
-b                   	double              	None                
\ No newline at end of file
+a                   	int                 	                    
+b                   	double              	                    
diff --git a/sql/hive/src/test/resources/golden/input3-10-10a1a8a97f6417c3da16829f7e519475 b/sql/hive/src/test/resources/golden/input3-10-10a1a8a97f6417c3da16829f7e519475
index 594b29ca1410f..bd673a6c1f1d4 100644
--- a/sql/hive/src/test/resources/golden/input3-10-10a1a8a97f6417c3da16829f7e519475
+++ b/sql/hive/src/test/resources/golden/input3-10-10a1a8a97f6417c3da16829f7e519475
@@ -1,4 +1,4 @@
-a                   	array<int>          	None                
-b                   	double              	None                
-c                   	map<double,int>     	None                
-x                   	double              	None                
\ No newline at end of file
+a                   	array<int>          	                    
+b                   	double              	                    
+c                   	map<double,int>     	                    
+x                   	double              	                    
diff --git a/sql/hive/src/test/resources/golden/input3-11-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/input3-11-9c36cac1372650b703400c60dd29042c
index ac382c7369264..f5b9883df09c0 100644
--- a/sql/hive/src/test/resources/golden/input3-11-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/input3-11-9c36cac1372650b703400c60dd29042c
@@ -1,4 +1,4 @@
 src
 srcpart
 test3a
-test3c
\ No newline at end of file
+test3c
diff --git a/sql/hive/src/test/resources/golden/input3-12-a22d09de72e5067a0a94113cdecdaa95 b/sql/hive/src/test/resources/golden/input3-12-a22d09de72e5067a0a94113cdecdaa95
index cb17be511e875..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/input3-12-a22d09de72e5067a0a94113cdecdaa95
+++ b/sql/hive/src/test/resources/golden/input3-12-a22d09de72e5067a0a94113cdecdaa95
@@ -1,14 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ALTERTABLE_REPLACECOLS TEST3c (TOK_TABCOLLIST (TOK_TABCOL R1 TOK_INT) (TOK_TABCOL R2 TOK_DOUBLE)))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Alter Table Operator:
-        Alter Table
-          type: replace columns
-          new columns: r1 int, r2 double
-          old name: TEST3c
-
diff --git a/sql/hive/src/test/resources/golden/input3-14-efee6816e20fe61595a4a2a991071219 b/sql/hive/src/test/resources/golden/input3-14-efee6816e20fe61595a4a2a991071219
index b906fd3c2e775..ea55abd792314 100644
--- a/sql/hive/src/test/resources/golden/input3-14-efee6816e20fe61595a4a2a991071219
+++ b/sql/hive/src/test/resources/golden/input3-14-efee6816e20fe61595a4a2a991071219
@@ -1,4 +1,4 @@
-r1                  	int                 	None                
-r2                  	double              	None                
+r1                  	int                 	                    
+r2                  	double              	                    
 	 	 
-Detailed Table Information	Table(tableName:test3c, dbName:default, owner:marmbrus, createTime:1389730377, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:r1, type:int, comment:null), FieldSchema(name:r2, type:double, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/test3c, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1389730378, transient_lastDdlTime=1389730378}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:test3c, dbName:default, owner:marmbrus, createTime:1413882084, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:r1, type:int, comment:null), FieldSchema(name:r2, type:double, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/test3c, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413882084, transient_lastDdlTime=1413882084, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/input3-3-1c5990b1aed2be48311810dae3019994 b/sql/hive/src/test/resources/golden/input3-3-1c5990b1aed2be48311810dae3019994
index ca0726f517eeb..77eaef91c9c3f 100644
--- a/sql/hive/src/test/resources/golden/input3-3-1c5990b1aed2be48311810dae3019994
+++ b/sql/hive/src/test/resources/golden/input3-3-1c5990b1aed2be48311810dae3019994
@@ -1,3 +1,3 @@
-a                   	array<int>          	None                
-b                   	double              	None                
-c                   	map<double,int>     	None                
\ No newline at end of file
+a                   	array<int>          	                    
+b                   	double              	                    
+c                   	map<double,int>     	                    
diff --git a/sql/hive/src/test/resources/golden/input3-4-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/input3-4-9c36cac1372650b703400c60dd29042c
index 6b1ce270d97e9..b584fd7c6fd36 100644
--- a/sql/hive/src/test/resources/golden/input3-4-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/input3-4-9c36cac1372650b703400c60dd29042c
@@ -1,4 +1,4 @@
 src
 srcpart
 test3a
-test3b
\ No newline at end of file
+test3b
diff --git a/sql/hive/src/test/resources/golden/input3-5-f40b7cc4ac38c0121ccab9ef4e7e9fd2 b/sql/hive/src/test/resources/golden/input3-5-f40b7cc4ac38c0121ccab9ef4e7e9fd2
index 92c0ed68f8a7b..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/input3-5-f40b7cc4ac38c0121ccab9ef4e7e9fd2
+++ b/sql/hive/src/test/resources/golden/input3-5-f40b7cc4ac38c0121ccab9ef4e7e9fd2
@@ -1,14 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ALTERTABLE_ADDCOLS TEST3b (TOK_TABCOLLIST (TOK_TABCOL X TOK_DOUBLE)))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Alter Table Operator:
-        Alter Table
-          type: add columns
-          new columns: x double
-          old name: TEST3b
-
diff --git a/sql/hive/src/test/resources/golden/input3-7-1c5990b1aed2be48311810dae3019994 b/sql/hive/src/test/resources/golden/input3-7-1c5990b1aed2be48311810dae3019994
index 594b29ca1410f..bd673a6c1f1d4 100644
--- a/sql/hive/src/test/resources/golden/input3-7-1c5990b1aed2be48311810dae3019994
+++ b/sql/hive/src/test/resources/golden/input3-7-1c5990b1aed2be48311810dae3019994
@@ -1,4 +1,4 @@
-a                   	array<int>          	None                
-b                   	double              	None                
-c                   	map<double,int>     	None                
-x                   	double              	None                
\ No newline at end of file
+a                   	array<int>          	                    
+b                   	double              	                    
+c                   	map<double,int>     	                    
+x                   	double              	                    
diff --git a/sql/hive/src/test/resources/golden/input3-8-4dc0fefca4d158fd2ab40551ae9e35be b/sql/hive/src/test/resources/golden/input3-8-4dc0fefca4d158fd2ab40551ae9e35be
index 09bbc29377720..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/input3-8-4dc0fefca4d158fd2ab40551ae9e35be
+++ b/sql/hive/src/test/resources/golden/input3-8-4dc0fefca4d158fd2ab40551ae9e35be
@@ -1,14 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ALTERTABLE_RENAME TEST3b TEST3c)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Alter Table Operator:
-        Alter Table
-          type: rename
-          new name: TEST3c
-          old name: TEST3b
-
diff --git a/sql/hive/src/test/resources/golden/input30-2-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/input30-2-823920925ca9c8a2ca9016f52c0f4ee
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input30-2-823920925ca9c8a2ca9016f52c0f4ee
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input30-3-c21dba410fb07a098f93430a9d21df79 b/sql/hive/src/test/resources/golden/input30-3-c21dba410fb07a098f93430a9d21df79
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input30-3-c21dba410fb07a098f93430a9d21df79
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input30-6-f120ac8c87db9eebb5da7ac99b48600 b/sql/hive/src/test/resources/golden/input30-6-f120ac8c87db9eebb5da7ac99b48600
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input30-6-f120ac8c87db9eebb5da7ac99b48600
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input30-7-95d10d459c088d5fbefd00bdd8d44c3f b/sql/hive/src/test/resources/golden/input30-7-95d10d459c088d5fbefd00bdd8d44c3f
deleted file mode 100644
index 25bf17fc5aaab..0000000000000
--- a/sql/hive/src/test/resources/golden/input30-7-95d10d459c088d5fbefd00bdd8d44c3f
+++ /dev/null
@@ -1 +0,0 @@
-18
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-11-6d0814cbb72eb96bfc75c95d06f1e528 b/sql/hive/src/test/resources/golden/input39-11-6d0814cbb72eb96bfc75c95d06f1e528
deleted file mode 100644
index 25bf17fc5aaab..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-11-6d0814cbb72eb96bfc75c95d06f1e528
+++ /dev/null
@@ -1 +0,0 @@
-18
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-12-f120ac8c87db9eebb5da7ac99b48600 b/sql/hive/src/test/resources/golden/input39-12-f120ac8c87db9eebb5da7ac99b48600
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-12-f120ac8c87db9eebb5da7ac99b48600
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-13-c9c5b254ce9b439f09e72a0dce5ec8f0 b/sql/hive/src/test/resources/golden/input39-13-c9c5b254ce9b439f09e72a0dce5ec8f0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-13-c9c5b254ce9b439f09e72a0dce5ec8f0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-14-bcc1d5fd287f81bac1092a913b09956d b/sql/hive/src/test/resources/golden/input39-14-bcc1d5fd287f81bac1092a913b09956d
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-14-bcc1d5fd287f81bac1092a913b09956d
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-5-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/input39-5-823920925ca9c8a2ca9016f52c0f4ee
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-5-823920925ca9c8a2ca9016f52c0f4ee
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-6-763ab5853bff619e6525c01e46b2a923 b/sql/hive/src/test/resources/golden/input39-6-763ab5853bff619e6525c01e46b2a923
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-6-763ab5853bff619e6525c01e46b2a923
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-7-9b141c1e5917ca82c6bc36a9a2950a1e b/sql/hive/src/test/resources/golden/input39-7-9b141c1e5917ca82c6bc36a9a2950a1e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-7-9b141c1e5917ca82c6bc36a9a2950a1e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-8-2b9ccaa793eae0e73bf76335d3d6880 b/sql/hive/src/test/resources/golden/input39-8-2b9ccaa793eae0e73bf76335d3d6880
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-8-2b9ccaa793eae0e73bf76335d3d6880
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-9-bab89dfffa77258e34a595e0e79986e3 b/sql/hive/src/test/resources/golden/input39-9-bab89dfffa77258e34a595e0e79986e3
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39-9-bab89dfffa77258e34a595e0e79986e3
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-10-6d0814cbb72eb96bfc75c95d06f1e528 b/sql/hive/src/test/resources/golden/input39_hadoop20-10-6d0814cbb72eb96bfc75c95d06f1e528
deleted file mode 100644
index 25bf17fc5aaab..0000000000000
--- a/sql/hive/src/test/resources/golden/input39_hadoop20-10-6d0814cbb72eb96bfc75c95d06f1e528
+++ /dev/null
@@ -1 +0,0 @@
-18
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-11-f120ac8c87db9eebb5da7ac99b48600 b/sql/hive/src/test/resources/golden/input39_hadoop20-11-f120ac8c87db9eebb5da7ac99b48600
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39_hadoop20-11-f120ac8c87db9eebb5da7ac99b48600
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-12-a6a77ae749a7e7f8022d71c03322fc21 b/sql/hive/src/test/resources/golden/input39_hadoop20-12-a6a77ae749a7e7f8022d71c03322fc21
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39_hadoop20-12-a6a77ae749a7e7f8022d71c03322fc21
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-5-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/input39_hadoop20-5-823920925ca9c8a2ca9016f52c0f4ee
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39_hadoop20-5-823920925ca9c8a2ca9016f52c0f4ee
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-6-763ab5853bff619e6525c01e46b2a923 b/sql/hive/src/test/resources/golden/input39_hadoop20-6-763ab5853bff619e6525c01e46b2a923
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39_hadoop20-6-763ab5853bff619e6525c01e46b2a923
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-7-db1cd54a4cb36de2087605f32e41824f b/sql/hive/src/test/resources/golden/input39_hadoop20-7-db1cd54a4cb36de2087605f32e41824f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39_hadoop20-7-db1cd54a4cb36de2087605f32e41824f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-8-bab89dfffa77258e34a595e0e79986e3 b/sql/hive/src/test/resources/golden/input39_hadoop20-8-bab89dfffa77258e34a595e0e79986e3
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input39_hadoop20-8-bab89dfffa77258e34a595e0e79986e3
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input3_limit-6-7fec232bd656e1c1cf6cd731afc55d67 b/sql/hive/src/test/resources/golden/input3_limit-6-7fec232bd656e1c1cf6cd731afc55d67
deleted file mode 100644
index 6731b0cb0baaa..0000000000000
--- a/sql/hive/src/test/resources/golden/input3_limit-6-7fec232bd656e1c1cf6cd731afc55d67
+++ /dev/null
@@ -1,20 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-0	val_1
-0	val_1
-1	val_2
-10	val_10
-10	val_11
-100	val_100
-100	val_100
-100	val_101
-100	val_101
-101	val_102
-102	val_103
-103	val_103
-103	val_103
-104	val_104
-104	val_104
-104	val_105
-104	val_105
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/input4-1-7ce0bc5e5feeb09bf3fc139e102fb00e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_05_some_part-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/input4-1-7ce0bc5e5feeb09bf3fc139e102fb00e
diff --git a/sql/hive/src/test/resources/golden/input4-1-c139adc70f9942e527142e3be7fd2b87 b/sql/hive/src/test/resources/golden/input4-1-c139adc70f9942e527142e3be7fd2b87
deleted file mode 100644
index 65a457b52b0a6..0000000000000
--- a/sql/hive/src/test/resources/golden/input4-1-c139adc70f9942e527142e3be7fd2b87
+++ /dev/null
@@ -1,27 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_LOAD '/Users/marmbrus/workspace/hive/data/files/kv1.txt' (TOK_TAB (TOK_TABNAME INPUT4)) LOCAL)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-  Stage-2 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-0
-    Copy
-      source: file:/Users/marmbrus/workspace/hive/data/files/kv1.txt
-      destination: file:/tmp/hive-marmbrus/hive_2014-01-14_12-16-46_262_527870677085258278-1/-ext-10000
-
-  Stage: Stage-1
-    Move Operator
-      tables:
-          replace: false
-          table:
-              input format: org.apache.hadoop.mapred.TextInputFormat
-              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.input4
-
-  Stage: Stage-2
-    Stats-Aggr Operator
-
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/input4-2-b663ec84da3f9d9b9594ea2da81b1442
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_05_some_part-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/input4-2-b663ec84da3f9d9b9594ea2da81b1442
diff --git a/sql/hive/src/test/resources/golden/input4-4-271b04e0fbee2ee81bae21dcb46d55e4 b/sql/hive/src/test/resources/golden/input4-4-271b04e0fbee2ee81bae21dcb46d55e4
index 218c223b050b7..103b537db90c6 100644
--- a/sql/hive/src/test/resources/golden/input4-4-271b04e0fbee2ee81bae21dcb46d55e4
+++ b/sql/hive/src/test/resources/golden/input4-4-271b04e0fbee2ee81bae21dcb46d55e4
@@ -497,4 +497,4 @@ val_169	169
 val_403	403
 val_400	400
 val_200	200
-val_97	97
\ No newline at end of file
+val_97	97
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/input40-1-a8adb8ae1d13607851431a1baf7578ba
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_05_some_part-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/input40-1-a8adb8ae1d13607851431a1baf7578ba
diff --git a/sql/hive/src/test/resources/golden/input40-2-e7ab3c9244fcfda76061b4153d796e93 b/sql/hive/src/test/resources/golden/input40-2-e7ab3c9244fcfda76061b4153d796e93
index c5c8d29fdd13e..7aae61e5eb82f 100644
--- a/sql/hive/src/test/resources/golden/input40-2-e7ab3c9244fcfda76061b4153d796e93
+++ b/sql/hive/src/test/resources/golden/input40-2-e7ab3c9244fcfda76061b4153d796e93
@@ -497,4 +497,4 @@
 403	val_403
 400	val_400
 200	val_200
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/input40-4-fdeea6b676c670b17c8d91e24a97a127
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_05_some_part-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/input40-4-fdeea6b676c670b17c8d91e24a97a127
diff --git a/sql/hive/src/test/resources/golden/input40-5-ccdc9c87a8fdbeee0fa48927f9700361 b/sql/hive/src/test/resources/golden/input40-5-ccdc9c87a8fdbeee0fa48927f9700361
index 6bc66cd8fe19b..e4b818f03539d 100644
--- a/sql/hive/src/test/resources/golden/input40-5-ccdc9c87a8fdbeee0fa48927f9700361
+++ b/sql/hive/src/test/resources/golden/input40-5-ccdc9c87a8fdbeee0fa48927f9700361
@@ -497,4 +497,4 @@
 97	val_97	2009-08-01
 97	val_97	2009-08-01
 98	val_98	2009-08-01
-98	val_98	2009-08-01
\ No newline at end of file
+98	val_98	2009-08-01
diff --git a/sql/hive/src/test/resources/golden/exim_05_some_part-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/input40-6-6651f53efc5d03ed2d43b9d7aecc0002
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_05_some_part-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/input40-6-6651f53efc5d03ed2d43b9d7aecc0002
diff --git a/sql/hive/src/test/resources/golden/input40-7-ccdc9c87a8fdbeee0fa48927f9700361 b/sql/hive/src/test/resources/golden/input40-7-ccdc9c87a8fdbeee0fa48927f9700361
index ee3dddd8cff71..4467e7af00c0e 100644
--- a/sql/hive/src/test/resources/golden/input40-7-ccdc9c87a8fdbeee0fa48927f9700361
+++ b/sql/hive/src/test/resources/golden/input40-7-ccdc9c87a8fdbeee0fa48927f9700361
@@ -997,4 +997,4 @@
 97	val_98	2009-08-01
 98	val_98	2009-08-01
 98	val_98	2009-08-01
-99	val_100	2009-08-01
\ No newline at end of file
+99	val_100	2009-08-01
diff --git a/sql/hive/src/test/resources/golden/input41-0-763ab5853bff619e6525c01e46b2a923 b/sql/hive/src/test/resources/golden/input41-0-763ab5853bff619e6525c01e46b2a923
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/input41-0-763ab5853bff619e6525c01e46b2a923
+++ b/sql/hive/src/test/resources/golden/input41-0-763ab5853bff619e6525c01e46b2a923
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/input41-3-526399455dc5ecd4ea9f676b09fafeee b/sql/hive/src/test/resources/golden/input41-3-526399455dc5ecd4ea9f676b09fafeee
index 579784a58a66c..61191cde2953b 100644
--- a/sql/hive/src/test/resources/golden/input41-3-526399455dc5ecd4ea9f676b09fafeee
+++ b/sql/hive/src/test/resources/golden/input41-3-526399455dc5ecd4ea9f676b09fafeee
@@ -1,2 +1,2 @@
 0
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/input4_cb_delim-1-353d2238b781a117888a67bb7b2b2537
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_06_one_part-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/input4_cb_delim-1-353d2238b781a117888a67bb7b2b2537
diff --git a/sql/hive/src/test/resources/golden/input4_cb_delim-1-5692d0e91dd0114729b8eb3aee388b72 b/sql/hive/src/test/resources/golden/input4_cb_delim-1-5692d0e91dd0114729b8eb3aee388b72
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input4_cb_delim-2-e52787bf798a941c854eb09d75efe617 b/sql/hive/src/test/resources/golden/input4_cb_delim-2-e52787bf798a941c854eb09d75efe617
index 218c223b050b7..103b537db90c6 100644
--- a/sql/hive/src/test/resources/golden/input4_cb_delim-2-e52787bf798a941c854eb09d75efe617
+++ b/sql/hive/src/test/resources/golden/input4_cb_delim-2-e52787bf798a941c854eb09d75efe617
@@ -497,4 +497,4 @@ val_169	169
 val_403	403
 val_400	400
 val_200	200
-val_97	97
\ No newline at end of file
+val_97	97
diff --git a/sql/hive/src/test/resources/golden/input4_limit-0-4f0124854141b8be1defa7a6d0877d8d b/sql/hive/src/test/resources/golden/input4_limit-0-4f0124854141b8be1defa7a6d0877d8d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input4_limit-1-c634fc723fb3aac3ce007069bdcb2af b/sql/hive/src/test/resources/golden/input4_limit-1-c634fc723fb3aac3ce007069bdcb2af
deleted file mode 100644
index 217a1915f8826..0000000000000
--- a/sql/hive/src/test/resources/golden/input4_limit-1-c634fc723fb3aac3ce007069bdcb2af
+++ /dev/null
@@ -1,10 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-2	val_2
-4	val_4
-5	val_5
-5	val_5
-5	val_5
-8	val_8
-9	val_9
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input5-0-659e06570690cceeb3f37e10e855d2ea b/sql/hive/src/test/resources/golden/input5-0-659e06570690cceeb3f37e10e855d2ea
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input5-1-b1062ddf6be670dbfc66a6e7dc0e7a56 b/sql/hive/src/test/resources/golden/input5-1-b1062ddf6be670dbfc66a6e7dc0e7a56
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input5-2-3d6eb15b4fe23d0a1aa303da818d97ad b/sql/hive/src/test/resources/golden/input5-2-3d6eb15b4fe23d0a1aa303da818d97ad
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input5-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input5-3-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index f8aa003a65bb4..0000000000000
--- a/sql/hive/src/test/resources/golden/input5-3-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,11 +0,0 @@
-NULL	NULL
-[0,0,0]	[{"myint":0,"mystring":"0","underscore_int":0}]
-[1,2,3]	[{"myint":1,"mystring":"1","underscore_int":1}]
-[2,4,6]	[{"myint":4,"mystring":"8","underscore_int":2}]
-[3,6,9]	[{"myint":9,"mystring":"27","underscore_int":3}]
-[4,8,12]	[{"myint":16,"mystring":"64","underscore_int":4}]
-[5,10,15]	[{"myint":25,"mystring":"125","underscore_int":5}]
-[6,12,18]	[{"myint":36,"mystring":"216","underscore_int":6}]
-[7,14,21]	[{"myint":49,"mystring":"343","underscore_int":7}]
-[8,16,24]	[{"myint":64,"mystring":"512","underscore_int":8}]
-[9,18,27]	[{"myint":81,"mystring":"729","underscore_int":9}]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input6-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input6-3-adc1ec67836b26b60d8547c4996bfd8f
index b4dc488518f24..f3f63f08fcf70 100644
--- a/sql/hive/src/test/resources/golden/input6-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/input6-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -7,4 +7,4 @@ NULL	val_265
 NULL	val_193
 NULL	
 NULL	
-NULL	
\ No newline at end of file
+NULL	
diff --git a/sql/hive/src/test/resources/golden/input7-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input7-3-adc1ec67836b26b60d8547c4996bfd8f
index 0b8a8960a992b..65cada3d45b2b 100644
--- a/sql/hive/src/test/resources/golden/input7-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/input7-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -22,4 +22,4 @@ NULL	146
 NULL	406
 NULL	NULL
 NULL	NULL
-NULL	NULL
\ No newline at end of file
+NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/input8-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input8-3-adc1ec67836b26b60d8547c4996bfd8f
index c87107a2f1168..416fbfb9e5228 100644
--- a/sql/hive/src/test/resources/golden/input8-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/input8-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -22,4 +22,4 @@ NULL	NULL	NULL
 NULL	NULL	NULL
 NULL	NULL	NULL
 NULL	NULL	NULL
-NULL	NULL	NULL
\ No newline at end of file
+NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/input_columnarserde-0-df919fd41f281bf7b45a2340d0c9d43e b/sql/hive/src/test/resources/golden/input_columnarserde-0-df919fd41f281bf7b45a2340d0c9d43e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_columnarserde-1-2db9e6115d209fabcb0c06e5e666fa3 b/sql/hive/src/test/resources/golden/input_columnarserde-1-2db9e6115d209fabcb0c06e5e666fa3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_columnarserde-2-ac60752a883d3204c215fa01811701be b/sql/hive/src/test/resources/golden/input_columnarserde-2-ac60752a883d3204c215fa01811701be
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_columnarserde-3-3455e6f385b0f60be5d0c842ade0f1d7 b/sql/hive/src/test/resources/golden/input_columnarserde-3-3455e6f385b0f60be5d0c842ade0f1d7
deleted file mode 100644
index e782acd4d1e7f..0000000000000
--- a/sql/hive/src/test/resources/golden/input_columnarserde-3-3455e6f385b0f60be5d0c842ade0f1d7
+++ /dev/null
@@ -1,11 +0,0 @@
-[0,0,0]	["0","0","0"]	{"key_0":"value_0"}	1712634731	record_0
-[1,2,3]	["10","100","1000"]	{"key_1":"value_1"}	465985200	record_1
-[2,4,6]	["20","200","2000"]	{"key_2":"value_2"}	-751827638	record_2
-[3,6,9]	["30","300","3000"]	{"key_3":"value_3"}	477111222	record_3
-[4,8,12]	["40","400","4000"]	{"key_4":"value_4"}	-734328909	record_4
-[5,10,15]	["50","500","5000"]	{"key_5":"value_5"}	-1952710710	record_5
-[6,12,18]	["60","600","6000"]	{"key_6":"value_6"}	1244525190	record_6
-[7,14,21]	["70","700","7000"]	{"key_7":"value_7"}	-1461153973	record_7
-[8,16,24]	["80","800","8000"]	{"key_8":"value_8"}	1638581578	record_8
-[9,18,27]	["90","900","9000"]	{"key_9":"value_9"}	336964413	record_9
-NULL	NULL	NULL	0	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_columnarserde-4-c471b057cdb4f3c3405b311ea2b92aa3 b/sql/hive/src/test/resources/golden/input_columnarserde-4-c471b057cdb4f3c3405b311ea2b92aa3
deleted file mode 100644
index 6038b8aa32884..0000000000000
--- a/sql/hive/src/test/resources/golden/input_columnarserde-4-c471b057cdb4f3c3405b311ea2b92aa3
+++ /dev/null
@@ -1,11 +0,0 @@
-0	0	NULL	1712634731	record_0
-1	10	NULL	465985200	record_1
-2	20	NULL	-751827638	record_2
-3	30	NULL	477111222	record_3
-4	40	NULL	-734328909	record_4
-5	50	NULL	-1952710710	record_5
-6	60	NULL	1244525190	record_6
-7	70	NULL	-1461153973	record_7
-8	80	NULL	1638581578	record_8
-9	90	NULL	336964413	record_9
-NULL	NULL	NULL	0	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_dynamicserde-0-92c95af00fd419aa106571f72fcad67d b/sql/hive/src/test/resources/golden/input_dynamicserde-0-92c95af00fd419aa106571f72fcad67d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_dynamicserde-1-39bc8a2aea379563a62a465cc54aecbc b/sql/hive/src/test/resources/golden/input_dynamicserde-1-39bc8a2aea379563a62a465cc54aecbc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_dynamicserde-2-7bee1cc403a04b53d0a0324819e1d768 b/sql/hive/src/test/resources/golden/input_dynamicserde-2-7bee1cc403a04b53d0a0324819e1d768
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_dynamicserde-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input_dynamicserde-3-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index e782acd4d1e7f..0000000000000
--- a/sql/hive/src/test/resources/golden/input_dynamicserde-3-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,11 +0,0 @@
-[0,0,0]	["0","0","0"]	{"key_0":"value_0"}	1712634731	record_0
-[1,2,3]	["10","100","1000"]	{"key_1":"value_1"}	465985200	record_1
-[2,4,6]	["20","200","2000"]	{"key_2":"value_2"}	-751827638	record_2
-[3,6,9]	["30","300","3000"]	{"key_3":"value_3"}	477111222	record_3
-[4,8,12]	["40","400","4000"]	{"key_4":"value_4"}	-734328909	record_4
-[5,10,15]	["50","500","5000"]	{"key_5":"value_5"}	-1952710710	record_5
-[6,12,18]	["60","600","6000"]	{"key_6":"value_6"}	1244525190	record_6
-[7,14,21]	["70","700","7000"]	{"key_7":"value_7"}	-1461153973	record_7
-[8,16,24]	["80","800","8000"]	{"key_8":"value_8"}	1638581578	record_8
-[9,18,27]	["90","900","9000"]	{"key_9":"value_9"}	336964413	record_9
-NULL	NULL	NULL	0	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_dynamicserde-4-8bf42e02f82b0ac58e7d0b525a993b31 b/sql/hive/src/test/resources/golden/input_dynamicserde-4-8bf42e02f82b0ac58e7d0b525a993b31
deleted file mode 100644
index 6038b8aa32884..0000000000000
--- a/sql/hive/src/test/resources/golden/input_dynamicserde-4-8bf42e02f82b0ac58e7d0b525a993b31
+++ /dev/null
@@ -1,11 +0,0 @@
-0	0	NULL	1712634731	record_0
-1	10	NULL	465985200	record_1
-2	20	NULL	-751827638	record_2
-3	30	NULL	477111222	record_3
-4	40	NULL	-734328909	record_4
-5	50	NULL	-1952710710	record_5
-6	60	NULL	1244525190	record_6
-7	70	NULL	-1461153973	record_7
-8	80	NULL	1638581578	record_8
-9	90	NULL	336964413	record_9
-NULL	NULL	NULL	0	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-0-92c95af00fd419aa106571f72fcad67d b/sql/hive/src/test/resources/golden/input_lazyserde-0-92c95af00fd419aa106571f72fcad67d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-1-bf8ce1d1366256d5c07fc4b55dde7ba7 b/sql/hive/src/test/resources/golden/input_lazyserde-1-bf8ce1d1366256d5c07fc4b55dde7ba7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-10-d915efbc5233a49f48e81e74eef2a3c8 b/sql/hive/src/test/resources/golden/input_lazyserde-10-d915efbc5233a49f48e81e74eef2a3c8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-11-6aaa88142e86a9a39e980caed145e32c b/sql/hive/src/test/resources/golden/input_lazyserde-11-6aaa88142e86a9a39e980caed145e32c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-12-abde20a4a37ed330bc7128576dd18d7c b/sql/hive/src/test/resources/golden/input_lazyserde-12-abde20a4a37ed330bc7128576dd18d7c
deleted file mode 100644
index 6af528eab23a8..0000000000000
--- a/sql/hive/src/test/resources/golden/input_lazyserde-12-abde20a4a37ed330bc7128576dd18d7c
+++ /dev/null
@@ -1,11 +0,0 @@
-{"key_0":"value_0"}
-{"key_1":"value_1"}
-{"key_2":"value_2"}
-{"key_3":"value_3"}
-{"key_4":"value_4"}
-{"key_5":"value_5"}
-{"key_6":"value_6"}
-{"key_7":"value_7"}
-{"key_8":"value_8"}
-{"key_9":"value_9"}
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-2-7a165139976654640c86db8e4e5871cc b/sql/hive/src/test/resources/golden/input_lazyserde-2-7a165139976654640c86db8e4e5871cc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-3-bdf93641b97ab6347ece67e2fb636e97 b/sql/hive/src/test/resources/golden/input_lazyserde-3-bdf93641b97ab6347ece67e2fb636e97
deleted file mode 100644
index e782acd4d1e7f..0000000000000
--- a/sql/hive/src/test/resources/golden/input_lazyserde-3-bdf93641b97ab6347ece67e2fb636e97
+++ /dev/null
@@ -1,11 +0,0 @@
-[0,0,0]	["0","0","0"]	{"key_0":"value_0"}	1712634731	record_0
-[1,2,3]	["10","100","1000"]	{"key_1":"value_1"}	465985200	record_1
-[2,4,6]	["20","200","2000"]	{"key_2":"value_2"}	-751827638	record_2
-[3,6,9]	["30","300","3000"]	{"key_3":"value_3"}	477111222	record_3
-[4,8,12]	["40","400","4000"]	{"key_4":"value_4"}	-734328909	record_4
-[5,10,15]	["50","500","5000"]	{"key_5":"value_5"}	-1952710710	record_5
-[6,12,18]	["60","600","6000"]	{"key_6":"value_6"}	1244525190	record_6
-[7,14,21]	["70","700","7000"]	{"key_7":"value_7"}	-1461153973	record_7
-[8,16,24]	["80","800","8000"]	{"key_8":"value_8"}	1638581578	record_8
-[9,18,27]	["90","900","9000"]	{"key_9":"value_9"}	336964413	record_9
-NULL	NULL	NULL	0	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-4-8cc058fb7986f59965976cad813267db b/sql/hive/src/test/resources/golden/input_lazyserde-4-8cc058fb7986f59965976cad813267db
deleted file mode 100644
index 6038b8aa32884..0000000000000
--- a/sql/hive/src/test/resources/golden/input_lazyserde-4-8cc058fb7986f59965976cad813267db
+++ /dev/null
@@ -1,11 +0,0 @@
-0	0	NULL	1712634731	record_0
-1	10	NULL	465985200	record_1
-2	20	NULL	-751827638	record_2
-3	30	NULL	477111222	record_3
-4	40	NULL	-734328909	record_4
-5	50	NULL	-1952710710	record_5
-6	60	NULL	1244525190	record_6
-7	70	NULL	-1461153973	record_7
-8	80	NULL	1638581578	record_8
-9	90	NULL	336964413	record_9
-NULL	NULL	NULL	0	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-5-460dffb0f1ab0ac0ebc4fd545809aa9a b/sql/hive/src/test/resources/golden/input_lazyserde-5-460dffb0f1ab0ac0ebc4fd545809aa9a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-6-42e2838ee69484bf5301475905cee12 b/sql/hive/src/test/resources/golden/input_lazyserde-6-42e2838ee69484bf5301475905cee12
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-7-b44a44827ad0dce044badd6b258eabd5 b/sql/hive/src/test/resources/golden/input_lazyserde-7-b44a44827ad0dce044badd6b258eabd5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-8-abde20a4a37ed330bc7128576dd18d7c b/sql/hive/src/test/resources/golden/input_lazyserde-8-abde20a4a37ed330bc7128576dd18d7c
deleted file mode 100644
index 1bb008b44d6ee..0000000000000
--- a/sql/hive/src/test/resources/golden/input_lazyserde-8-abde20a4a37ed330bc7128576dd18d7c
+++ /dev/null
@@ -1,11 +0,0 @@
-[0,0,0]
-[1,2,3]
-[2,4,6]
-[3,6,9]
-[4,8,12]
-[5,10,15]
-[6,12,18]
-[7,14,21]
-[8,16,24]
-[9,18,27]
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_lazyserde-9-460dffb0f1ab0ac0ebc4fd545809aa9a b/sql/hive/src/test/resources/golden/input_lazyserde-9-460dffb0f1ab0ac0ebc4fd545809aa9a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_limit-1-77d7012bc901b0a9bcf9dae500ae2034 b/sql/hive/src/test/resources/golden/input_limit-1-77d7012bc901b0a9bcf9dae500ae2034
index 17f24d2991d14..03887aed65852 100644
--- a/sql/hive/src/test/resources/golden/input_limit-1-77d7012bc901b0a9bcf9dae500ae2034
+++ b/sql/hive/src/test/resources/golden/input_limit-1-77d7012bc901b0a9bcf9dae500ae2034
@@ -17,4 +17,4 @@
 369	val_369
 66	val_66
 128	val_128
-213	val_213
\ No newline at end of file
+213	val_213
diff --git a/sql/hive/src/test/resources/golden/input_part0-1-1aa1486a207bedc275035acc3b37cbdb b/sql/hive/src/test/resources/golden/input_part0-1-1aa1486a207bedc275035acc3b37cbdb
index 499e6b8ab6b7e..d7a8f25b41301 100644
--- a/sql/hive/src/test/resources/golden/input_part0-1-1aa1486a207bedc275035acc3b37cbdb
+++ b/sql/hive/src/test/resources/golden/input_part0-1-1aa1486a207bedc275035acc3b37cbdb
@@ -997,4 +997,4 @@
 403	val_403	2008-04-08	12
 400	val_400	2008-04-08	12
 200	val_200	2008-04-08	12
-97	val_97	2008-04-08	12
\ No newline at end of file
+97	val_97	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/input_part1-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input_part1-3-adc1ec67836b26b60d8547c4996bfd8f
index 3e5ae10e4670a..7ae7ecbe28de6 100644
--- a/sql/hive/src/test/resources/golden/input_part1-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/input_part1-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -81,4 +81,4 @@
 28	val_28	12	2008-04-08
 37	val_37	12	2008-04-08
 90	val_90	12	2008-04-08
-97	val_97	12	2008-04-08
\ No newline at end of file
+97	val_97	12	2008-04-08
diff --git a/sql/hive/src/test/resources/golden/input_part10-3-48b242bc305c9bf879e083fa11edc967 b/sql/hive/src/test/resources/golden/input_part10-3-48b242bc305c9bf879e083fa11edc967
index 82116102c1f54..89c49ce857f5a 100644
--- a/sql/hive/src/test/resources/golden/input_part10-3-48b242bc305c9bf879e083fa11edc967
+++ b/sql/hive/src/test/resources/golden/input_part10-3-48b242bc305c9bf879e083fa11edc967
@@ -1,12 +1,12 @@
-a                   	string              	None                
-b                   	string              	None                
-ds                  	string              	None                
-ts                  	string              	None                
+a                   	string              	                    
+b                   	string              	                    
+ds                  	string              	                    
+ts                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-ts                  	string              	None                
+ds                  	string              	                    
+ts                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2008 04 08, 10:11:12=455], dbName:default, tableName:part_special, createTime:1388798899, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:ts, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6540137288252557391/part_special/ds=2008 04 08/ts=10%3A11%3A12%3D455, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1388798899})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2008 04 08, 10:11:12=455], dbName:default, tableName:part_special, createTime:1413882241, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:ts, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/part_special/ds=2008 04 08/ts=10%3A11%3A12%3D455, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413882241, COLUMN_STATS_ACCURATE=true, totalSize=4, numRows=1, rawDataSize=3})	
diff --git a/sql/hive/src/test/resources/golden/input_part10-4-d0ba28297a8b73569d93605aa890aa09 b/sql/hive/src/test/resources/golden/input_part10-4-d0ba28297a8b73569d93605aa890aa09
index c8d0d55930069..6de1c02821c77 100644
--- a/sql/hive/src/test/resources/golden/input_part10-4-d0ba28297a8b73569d93605aa890aa09
+++ b/sql/hive/src/test/resources/golden/input_part10-4-d0ba28297a8b73569d93605aa890aa09
@@ -1 +1 @@
-1	2	2008 04 08	10:11:12=455
\ No newline at end of file
+1	2	2008 04 08	10:11:12=455
diff --git a/sql/hive/src/test/resources/golden/input_part10_win-3-48b242bc305c9bf879e083fa11edc967 b/sql/hive/src/test/resources/golden/input_part10_win-3-48b242bc305c9bf879e083fa11edc967
index 869eb58e70d82..f5c60fae1925e 100644
--- a/sql/hive/src/test/resources/golden/input_part10_win-3-48b242bc305c9bf879e083fa11edc967
+++ b/sql/hive/src/test/resources/golden/input_part10_win-3-48b242bc305c9bf879e083fa11edc967
@@ -1,12 +1,12 @@
-a                   	string              	None                
-b                   	string              	None                
-ds                  	string              	None                
-ts                  	string              	None                
+a                   	string              	                    
+b                   	string              	                    
+ds                  	string              	                    
+ts                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-ts                  	string              	None                
+ds                  	string              	                    
+ts                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2008 04 08, 10:11:12=455], dbName:default, tableName:part_special, createTime:1388798920, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:ts, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6540137288252557391/part_special/ds=2008 04 08/ts=10%3A11%3A12%3D455, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1388798920, numRows=1, totalSize=4, rawDataSize=3})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2008 04 08, 10:11:12=455], dbName:default, tableName:part_special, createTime:1413882252, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:ts, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/part_special/ds=2008 04 08/ts=10%3A11%3A12%3D455, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413882252, COLUMN_STATS_ACCURATE=true, totalSize=4, numRows=1, rawDataSize=3})	
diff --git a/sql/hive/src/test/resources/golden/input_part10_win-4-d0ba28297a8b73569d93605aa890aa09 b/sql/hive/src/test/resources/golden/input_part10_win-4-d0ba28297a8b73569d93605aa890aa09
index c8d0d55930069..6de1c02821c77 100644
--- a/sql/hive/src/test/resources/golden/input_part10_win-4-d0ba28297a8b73569d93605aa890aa09
+++ b/sql/hive/src/test/resources/golden/input_part10_win-4-d0ba28297a8b73569d93605aa890aa09
@@ -1 +1 @@
-1	2	2008 04 08	10:11:12=455
\ No newline at end of file
+1	2	2008 04 08	10:11:12=455
diff --git a/sql/hive/src/test/resources/golden/input_part2-4-93c97e1760e0d41b3791d6f08010a665 b/sql/hive/src/test/resources/golden/input_part2-4-93c97e1760e0d41b3791d6f08010a665
index fd945cc15d9ca..883de3e945c46 100644
--- a/sql/hive/src/test/resources/golden/input_part2-4-93c97e1760e0d41b3791d6f08010a665
+++ b/sql/hive/src/test/resources/golden/input_part2-4-93c97e1760e0d41b3791d6f08010a665
@@ -81,4 +81,4 @@
 97	val_97	12	2008-04-08
 97	val_97	12	2008-04-08
 98	val_98	12	2008-04-08
-98	val_98	12	2008-04-08
\ No newline at end of file
+98	val_98	12	2008-04-08
diff --git a/sql/hive/src/test/resources/golden/input_part2-5-9d0992a91951e6e4242b5b4c38d4e861 b/sql/hive/src/test/resources/golden/input_part2-5-9d0992a91951e6e4242b5b4c38d4e861
index f23877361f33b..b23aa27263654 100644
--- a/sql/hive/src/test/resources/golden/input_part2-5-9d0992a91951e6e4242b5b4c38d4e861
+++ b/sql/hive/src/test/resources/golden/input_part2-5-9d0992a91951e6e4242b5b4c38d4e861
@@ -81,4 +81,4 @@
 97	val_97	12	2008-04-09
 97	val_97	12	2008-04-09
 98	val_98	12	2008-04-09
-98	val_98	12	2008-04-09
\ No newline at end of file
+98	val_98	12	2008-04-09
diff --git a/sql/hive/src/test/resources/golden/input_part3-1-ba5256285fb22a43b491253a2d519730 b/sql/hive/src/test/resources/golden/input_part3-1-ba5256285fb22a43b491253a2d519730
index f4026a591a958..31b575a403f81 100644
--- a/sql/hive/src/test/resources/golden/input_part3-1-ba5256285fb22a43b491253a2d519730
+++ b/sql/hive/src/test/resources/golden/input_part3-1-ba5256285fb22a43b491253a2d519730
@@ -497,4 +497,4 @@
 403	val_403	2008-04-08	11
 400	val_400	2008-04-08	11
 200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
\ No newline at end of file
+97	val_97	2008-04-08	11
diff --git a/sql/hive/src/test/resources/golden/input_part5-3-e4419c33287ca1f48a43f61cca5b5928 b/sql/hive/src/test/resources/golden/input_part5-3-e4419c33287ca1f48a43f61cca5b5928
index 853c3bc8df7f0..dfabe7827837c 100644
--- a/sql/hive/src/test/resources/golden/input_part5-3-e4419c33287ca1f48a43f61cca5b5928
+++ b/sql/hive/src/test/resources/golden/input_part5-3-e4419c33287ca1f48a43f61cca5b5928
@@ -165,4 +165,4 @@
 98	val_98	2008-04-08	11
 98	val_98	2008-04-08	11
 98	val_98	2008-04-08	12
-98	val_98	2008-04-08	12
\ No newline at end of file
+98	val_98	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/input_part7-1-affad4cedcd29bb136bc477fc07e6ea0 b/sql/hive/src/test/resources/golden/input_part7-1-affad4cedcd29bb136bc477fc07e6ea0
index 540ca86636f3c..ed8993a9cbd0c 100644
--- a/sql/hive/src/test/resources/golden/input_part7-1-affad4cedcd29bb136bc477fc07e6ea0
+++ b/sql/hive/src/test/resources/golden/input_part7-1-affad4cedcd29bb136bc477fc07e6ea0
@@ -333,4 +333,4 @@
 98	val_98	2008-04-08	12
 98	val_98	2008-04-08	12
 98	val_98	2008-04-08	12
-98	val_98	2008-04-08	12
\ No newline at end of file
+98	val_98	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/input_part8-1-60b409a520999ba50e8b7c6e30de3474 b/sql/hive/src/test/resources/golden/input_part8-1-60b409a520999ba50e8b7c6e30de3474
index a1728c82f0b35..5ee171a64f7ab 100644
--- a/sql/hive/src/test/resources/golden/input_part8-1-60b409a520999ba50e8b7c6e30de3474
+++ b/sql/hive/src/test/resources/golden/input_part8-1-60b409a520999ba50e8b7c6e30de3474
@@ -7,4 +7,4 @@
 255	val_255	2008-04-08	11
 278	val_278	2008-04-08	11
 98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
\ No newline at end of file
+484	val_484	2008-04-08	11
diff --git a/sql/hive/src/test/resources/golden/input_part9-1-e60c60afc073367464898b8396e8f643 b/sql/hive/src/test/resources/golden/input_part9-1-e60c60afc073367464898b8396e8f643
index 438355d7b06f5..b7704cd0a2f0f 100644
--- a/sql/hive/src/test/resources/golden/input_part9-1-e60c60afc073367464898b8396e8f643
+++ b/sql/hive/src/test/resources/golden/input_part9-1-e60c60afc073367464898b8396e8f643
@@ -997,4 +997,4 @@
 498	val_498	2008-04-08	11
 498	val_498	2008-04-08	12
 498	val_498	2008-04-08	12
-498	val_498	2008-04-08	12
\ No newline at end of file
+498	val_498	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/input_testsequencefile-0-68975193b30cb34102b380e647d8d5f4 b/sql/hive/src/test/resources/golden/input_testsequencefile-0-68975193b30cb34102b380e647d8d5f4
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/input_testsequencefile-0-68975193b30cb34102b380e647d8d5f4
+++ b/sql/hive/src/test/resources/golden/input_testsequencefile-0-68975193b30cb34102b380e647d8d5f4
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/input_testsequencefile-1-1c0f3be2d837dee49312e0a80440447e b/sql/hive/src/test/resources/golden/input_testsequencefile-1-1c0f3be2d837dee49312e0a80440447e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/input_testsequencefile-1-1c0f3be2d837dee49312e0a80440447e
+++ b/sql/hive/src/test/resources/golden/input_testsequencefile-1-1c0f3be2d837dee49312e0a80440447e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/input_testsequencefile-5-3708198aac609695b22e19e89306034c b/sql/hive/src/test/resources/golden/input_testsequencefile-5-3708198aac609695b22e19e89306034c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/input_testsequencefile-5-3708198aac609695b22e19e89306034c
+++ b/sql/hive/src/test/resources/golden/input_testsequencefile-5-3708198aac609695b22e19e89306034c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/input_testsequencefile-6-6716fa5aec125f0f8e9520e4318f71b9 b/sql/hive/src/test/resources/golden/input_testsequencefile-6-6716fa5aec125f0f8e9520e4318f71b9
index c5c8d29fdd13e..7aae61e5eb82f 100644
--- a/sql/hive/src/test/resources/golden/input_testsequencefile-6-6716fa5aec125f0f8e9520e4318f71b9
+++ b/sql/hive/src/test/resources/golden/input_testsequencefile-6-6716fa5aec125f0f8e9520e4318f71b9
@@ -497,4 +497,4 @@
 403	val_403
 400	val_400
 200	val_200
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/input_testxpath-0-3c8a098a179d578119f75e5d7b214bd5 b/sql/hive/src/test/resources/golden/input_testxpath-0-3c8a098a179d578119f75e5d7b214bd5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_testxpath-1-ac18d29e8bd8aeed00296992edc17c0f b/sql/hive/src/test/resources/golden/input_testxpath-1-ac18d29e8bd8aeed00296992edc17c0f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_testxpath-2-86b0a60940ffc4bdaafcc4f5a8c0972 b/sql/hive/src/test/resources/golden/input_testxpath-2-86b0a60940ffc4bdaafcc4f5a8c0972
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_testxpath-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input_testxpath-3-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index c1a6abba038e7..0000000000000
--- a/sql/hive/src/test/resources/golden/input_testxpath-3-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,11 +0,0 @@
-0	0	NULL
-2	1	NULL
-4	8	value_2
-6	27	NULL
-8	64	NULL
-10	125	NULL
-12	216	NULL
-14	343	NULL
-16	512	NULL
-18	729	NULL
-NULL	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_testxpath2-0-a1baa22f15f745a2dfe27ce52d363704 b/sql/hive/src/test/resources/golden/input_testxpath2-0-a1baa22f15f745a2dfe27ce52d363704
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_testxpath2-1-27b77465d23aebf66a50c8074a75b755 b/sql/hive/src/test/resources/golden/input_testxpath2-1-27b77465d23aebf66a50c8074a75b755
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_testxpath2-2-6b8b2daaeaa985a7de0e377ffc4436dd b/sql/hive/src/test/resources/golden/input_testxpath2-2-6b8b2daaeaa985a7de0e377ffc4436dd
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_testxpath2-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/input_testxpath2-3-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 51645b2a07a39..0000000000000
--- a/sql/hive/src/test/resources/golden/input_testxpath2-3-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,10 +0,0 @@
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_testxpath3-0-f05134d54292acd1f2067027889a4fac b/sql/hive/src/test/resources/golden/input_testxpath3-0-f05134d54292acd1f2067027889a4fac
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_testxpath3-1-807b097ac2f785f774db03069ebbde11 b/sql/hive/src/test/resources/golden/input_testxpath3-1-807b097ac2f785f774db03069ebbde11
deleted file mode 100644
index 373a573714f4e..0000000000000
--- a/sql/hive/src/test/resources/golden/input_testxpath3-1-807b097ac2f785f774db03069ebbde11
+++ /dev/null
@@ -1,11 +0,0 @@
-NULL	[0]
-NULL	[1]
-NULL	[4]
-NULL	[9]
-NULL	[16]
-NULL	[25]
-NULL	[36]
-NULL	[49]
-NULL	[64]
-value_9	[81]
-NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_testxpath4-0-73819ea1a7c0653a61652b3766afb003 b/sql/hive/src/test/resources/golden/input_testxpath4-0-73819ea1a7c0653a61652b3766afb003
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input_testxpath4-0-73819ea1a7c0653a61652b3766afb003
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_testxpath4-1-f746888141a38ba707fad01d86d41960 b/sql/hive/src/test/resources/golden/input_testxpath4-1-f746888141a38ba707fad01d86d41960
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_testxpath4-2-e4beab1294dcac60ff36e4f2561810b9 b/sql/hive/src/test/resources/golden/input_testxpath4-2-e4beab1294dcac60ff36e4f2561810b9
deleted file mode 100644
index 7490d2d44d71a..0000000000000
--- a/sql/hive/src/test/resources/golden/input_testxpath4-2-e4beab1294dcac60ff36e4f2561810b9
+++ /dev/null
@@ -1,10 +0,0 @@
-NULL	[0]
-NULL	[1]
-NULL	[4]
-NULL	[9]
-NULL	[16]
-NULL	[25]
-NULL	[36]
-NULL	[49]
-NULL	[64]
-value_9	[81]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_testxpath4-3-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/input_testxpath4-3-ae225e86c2ae20519ffdf23190454161
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/input_testxpath4-3-ae225e86c2ae20519ffdf23190454161
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input_testxpath4-4-f746888141a38ba707fad01d86d41960 b/sql/hive/src/test/resources/golden/input_testxpath4-4-f746888141a38ba707fad01d86d41960
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input_testxpath4-5-e4beab1294dcac60ff36e4f2561810b9 b/sql/hive/src/test/resources/golden/input_testxpath4-5-e4beab1294dcac60ff36e4f2561810b9
deleted file mode 100644
index 7490d2d44d71a..0000000000000
--- a/sql/hive/src/test/resources/golden/input_testxpath4-5-e4beab1294dcac60ff36e4f2561810b9
+++ /dev/null
@@ -1,10 +0,0 @@
-NULL	[0]
-NULL	[1]
-NULL	[4]
-NULL	[9]
-NULL	[16]
-NULL	[25]
-NULL	[36]
-NULL	[49]
-NULL	[64]
-value_9	[81]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/inputddl2-2-7c1c0606b5ea8a1c90d46fe221993b57 b/sql/hive/src/test/resources/golden/inputddl2-2-7c1c0606b5ea8a1c90d46fe221993b57
index 679d54cb5cb5e..3577c8a431869 100644
--- a/sql/hive/src/test/resources/golden/inputddl2-2-7c1c0606b5ea8a1c90d46fe221993b57
+++ b/sql/hive/src/test/resources/golden/inputddl2-2-7c1c0606b5ea8a1c90d46fe221993b57
@@ -1,10 +1,10 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-country             	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+country             	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-country             	string              	None                
+ds                  	string              	                    
+country             	string              	                    
diff --git a/sql/hive/src/test/resources/golden/inputddl3-2-73f945a673d2e388847c317f683f160c b/sql/hive/src/test/resources/golden/inputddl3-2-73f945a673d2e388847c317f683f160c
index ded361eb294f0..90f9bd0430a4c 100644
--- a/sql/hive/src/test/resources/golden/inputddl3-2-73f945a673d2e388847c317f683f160c
+++ b/sql/hive/src/test/resources/golden/inputddl3-2-73f945a673d2e388847c317f683f160c
@@ -1,2 +1,2 @@
-key                 	int                 	None                
-value               	string              	None                
+key                 	int                 	                    
+value               	string              	                    
diff --git a/sql/hive/src/test/resources/golden/inputddl4-1-dd94576788fa50ebcf950cdf837fbcf6 b/sql/hive/src/test/resources/golden/inputddl4-1-dd94576788fa50ebcf950cdf837fbcf6
index d5a489a9a5ed5..454e27ff0c28d 100644
--- a/sql/hive/src/test/resources/golden/inputddl4-1-dd94576788fa50ebcf950cdf837fbcf6
+++ b/sql/hive/src/test/resources/golden/inputddl4-1-dd94576788fa50ebcf950cdf837fbcf6
@@ -1,15 +1,15 @@
-viewtime            	string              	None                
-userid              	int                 	None                
-page_url            	string              	None                
-referrer_url        	string              	None                
-friends             	array<bigint>       	None                
-properties          	map<string,string>  	None                
+viewtime            	string              	                    
+userid              	int                 	                    
+page_url            	string              	                    
+referrer_url        	string              	                    
+friends             	array<bigint>       	                    
+properties          	map<string,string>  	                    
 ip                  	string              	IP Address of the User
-ds                  	string              	None                
-country             	string              	None                
+ds                  	string              	                    
+country             	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-country             	string              	None                
\ No newline at end of file
+ds                  	string              	                    
+country             	string              	                    
diff --git a/sql/hive/src/test/resources/golden/inputddl4-2-7fdf00ff5c22ae284728e0f035396865 b/sql/hive/src/test/resources/golden/inputddl4-2-7fdf00ff5c22ae284728e0f035396865
index e0bedb0512cfc..0ea7cee2a9cf9 100644
--- a/sql/hive/src/test/resources/golden/inputddl4-2-7fdf00ff5c22ae284728e0f035396865
+++ b/sql/hive/src/test/resources/golden/inputddl4-2-7fdf00ff5c22ae284728e0f035396865
@@ -1,17 +1,17 @@
-viewtime            	string              	None                
-userid              	int                 	None                
-page_url            	string              	None                
-referrer_url        	string              	None                
-friends             	array<bigint>       	None                
-properties          	map<string,string>  	None                
+viewtime            	string              	                    
+userid              	int                 	                    
+page_url            	string              	                    
+referrer_url        	string              	                    
+friends             	array<bigint>       	                    
+properties          	map<string,string>  	                    
 ip                  	string              	IP Address of the User
-ds                  	string              	None                
-country             	string              	None                
+ds                  	string              	                    
+country             	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-country             	string              	None                
+ds                  	string              	                    
+country             	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:inputddl4, dbName:default, owner:marmbrus, createTime:1389731336, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:viewtime, type:string, comment:null), FieldSchema(name:userid, type:int, comment:null), FieldSchema(name:page_url, type:string, comment:null), FieldSchema(name:referrer_url, type:string, comment:null), FieldSchema(name:friends, type:array<bigint>, comment:null), FieldSchema(name:properties, type:map<string,string>, comment:null), FieldSchema(name:ip, type:string, comment:IP Address of the User), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:country, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/inputddl4, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:32, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[userid], sortCols:[Order(col:viewtime, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:country, type:string, comment:null)], parameters:{transient_lastDdlTime=1389731336, comment=This is the page view table}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:inputddl4, dbName:default, owner:marmbrus, createTime:1413882343, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:viewtime, type:string, comment:null), FieldSchema(name:userid, type:int, comment:null), FieldSchema(name:page_url, type:string, comment:null), FieldSchema(name:referrer_url, type:string, comment:null), FieldSchema(name:friends, type:array<bigint>, comment:null), FieldSchema(name:properties, type:map<string,string>, comment:null), FieldSchema(name:ip, type:string, comment:IP Address of the User), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:country, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/inputddl4, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:32, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[userid], sortCols:[Order(col:viewtime, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:country, type:string, comment:null)], parameters:{transient_lastDdlTime=1413882343, comment=This is the page view table}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/inputddl6-1-572c1abb70f09726d1ba77bdc884597b
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_06_one_part-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/inputddl6-1-572c1abb70f09726d1ba77bdc884597b
diff --git a/sql/hive/src/test/resources/golden/inputddl6-1-8b96b4fba4cf54c399a008d0f3be1edf b/sql/hive/src/test/resources/golden/inputddl6-1-8b96b4fba4cf54c399a008d0f3be1edf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/inputddl6-2-3a4def4b370f75c5fcc1174626490363
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_06_one_part-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/inputddl6-2-3a4def4b370f75c5fcc1174626490363
diff --git a/sql/hive/src/test/resources/golden/inputddl6-2-c4c902d39d8dd9568f1d95ac3a8e5c6c b/sql/hive/src/test/resources/golden/inputddl6-2-c4c902d39d8dd9568f1d95ac3a8e5c6c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/inputddl6-3-e81b962986706e1c16f059b407e3f05c b/sql/hive/src/test/resources/golden/inputddl6-3-e81b962986706e1c16f059b407e3f05c
index f1427896e8330..a8a418d9736d9 100644
--- a/sql/hive/src/test/resources/golden/inputddl6-3-e81b962986706e1c16f059b407e3f05c
+++ b/sql/hive/src/test/resources/golden/inputddl6-3-e81b962986706e1c16f059b407e3f05c
@@ -1,10 +1,10 @@
-key                 	string              	None                
-value               	string              	None                
-ds                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:inputddl6, dbName:default, owner:marmbrus, createTime:1389731342, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/inputddl6, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{numPartitions=2, numFiles=2, transient_lastDdlTime=1389731342, numRows=0, totalSize=11624, rawDataSize=0}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:inputddl6, dbName:default, owner:marmbrus, createTime:1413882344, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/inputddl6, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{transient_lastDdlTime=1413882344}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/inputddl6-4-5855e2998e26f63e927854afa86c1f03 b/sql/hive/src/test/resources/golden/inputddl6-4-5855e2998e26f63e927854afa86c1f03
index 822897217e867..05507162a9244 100644
--- a/sql/hive/src/test/resources/golden/inputddl6-4-5855e2998e26f63e927854afa86c1f03
+++ b/sql/hive/src/test/resources/golden/inputddl6-4-5855e2998e26f63e927854afa86c1f03
@@ -1,10 +1,10 @@
-key                 	string              	None                
-value               	string              	None                
-ds                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2008-04-08], dbName:default, tableName:inputddl6, createTime:1389731342, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/inputddl6/ds=2008-04-08, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1389731342, numRows=0, totalSize=5812, rawDataSize=0})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2008-04-08], dbName:default, tableName:inputddl6, createTime:1413882344, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/inputddl6/ds=2008-04-08, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413882344, COLUMN_STATS_ACCURATE=true, totalSize=5812, numRows=0, rawDataSize=0})	
diff --git a/sql/hive/src/test/resources/golden/inputddl6-5-47e6a8d33932cb014830812e1f7b1f94 b/sql/hive/src/test/resources/golden/inputddl6-5-47e6a8d33932cb014830812e1f7b1f94
index 94bcaaee2408c..6e2459ea22ee1 100644
--- a/sql/hive/src/test/resources/golden/inputddl6-5-47e6a8d33932cb014830812e1f7b1f94
+++ b/sql/hive/src/test/resources/golden/inputddl6-5-47e6a8d33932cb014830812e1f7b1f94
@@ -1,2 +1,2 @@
 ds=2008-04-08
-ds=2008-04-09
\ No newline at end of file
+ds=2008-04-09
diff --git a/sql/hive/src/test/resources/golden/inputddl6-7-47e6a8d33932cb014830812e1f7b1f94 b/sql/hive/src/test/resources/golden/inputddl6-7-47e6a8d33932cb014830812e1f7b1f94
index b12a9f82cd90a..017a142ab30b7 100644
--- a/sql/hive/src/test/resources/golden/inputddl6-7-47e6a8d33932cb014830812e1f7b1f94
+++ b/sql/hive/src/test/resources/golden/inputddl6-7-47e6a8d33932cb014830812e1f7b1f94
@@ -1 +1 @@
-ds=2008-04-09
\ No newline at end of file
+ds=2008-04-09
diff --git a/sql/hive/src/test/resources/golden/inputddl6-8-f9e6ee98eb448f9ab68fa77bea027aa5 b/sql/hive/src/test/resources/golden/inputddl6-8-f9e6ee98eb448f9ab68fa77bea027aa5
index a6c282ab6f573..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/inputddl6-8-f9e6ee98eb448f9ab68fa77bea027aa5
+++ b/sql/hive/src/test/resources/golden/inputddl6-8-f9e6ee98eb448f9ab68fa77bea027aa5
@@ -1,19 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_DESCTABLE (TOK_TABTYPE INPUTDDL6 (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-09'))) EXTENDED)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Describe Table Operator:
-        Describe Table
-          partition:
-            ds 2008-04-09
-          table: INPUTDDL6
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/inputddl7-1-7195712efb4910294f63303ebce24453
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_06_one_part-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/inputddl7-1-7195712efb4910294f63303ebce24453
diff --git a/sql/hive/src/test/resources/golden/inputddl7-1-7b46b8baf9c7628da9c190c96b917057 b/sql/hive/src/test/resources/golden/inputddl7-1-7b46b8baf9c7628da9c190c96b917057
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/inputddl7-10-4eec8cef401b6bba00457dfbec58cc2d b/sql/hive/src/test/resources/golden/inputddl7-10-4eec8cef401b6bba00457dfbec58cc2d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_06_one_part-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/inputddl7-10-7c9248b56948716913d332bd712d69bd
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_06_one_part-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/inputddl7-10-7c9248b56948716913d332bd712d69bd
diff --git a/sql/hive/src/test/resources/golden/inputddl7-11-6e1f1709d770ed76aee6ff5f76856e63 b/sql/hive/src/test/resources/golden/inputddl7-11-6e1f1709d770ed76aee6ff5f76856e63
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/inputddl7-11-6e1f1709d770ed76aee6ff5f76856e63
+++ b/sql/hive/src/test/resources/golden/inputddl7-11-6e1f1709d770ed76aee6ff5f76856e63
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/inputddl7-12-2c56d4a781242b0521f82bb0d2cd277 b/sql/hive/src/test/resources/golden/inputddl7-12-2c56d4a781242b0521f82bb0d2cd277
index ef633a4aa09e0..edfcdbb1211bb 100644
--- a/sql/hive/src/test/resources/golden/inputddl7-12-2c56d4a781242b0521f82bb0d2cd277
+++ b/sql/hive/src/test/resources/golden/inputddl7-12-2c56d4a781242b0521f82bb0d2cd277
@@ -1,3 +1,3 @@
-name                	string              	None                
+name                	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:t1, dbName:default, owner:marmbrus, createTime:1389731349, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/t1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numPartitions=0, numFiles=1, transient_lastDdlTime=1389731349, numRows=0, totalSize=5812, rawDataSize=0}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:t1, dbName:default, owner:marmbrus, createTime:1413882345, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/t1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=1, transient_lastDdlTime=1413882345, COLUMN_STATS_ACCURATE=true, totalSize=5812, numRows=0, rawDataSize=0}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/inputddl7-13-45059a21c202b4658285738ee62a018a b/sql/hive/src/test/resources/golden/inputddl7-13-45059a21c202b4658285738ee62a018a
index 86d5d8a125fbe..86c9b459e36c5 100644
--- a/sql/hive/src/test/resources/golden/inputddl7-13-45059a21c202b4658285738ee62a018a
+++ b/sql/hive/src/test/resources/golden/inputddl7-13-45059a21c202b4658285738ee62a018a
@@ -1,3 +1,3 @@
-name                	string              	None                
+name                	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:t2, dbName:default, owner:marmbrus, createTime:1389731362, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/t2, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numPartitions=0, numFiles=1, transient_lastDdlTime=1389731362, numRows=0, totalSize=10508, rawDataSize=0}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:t2, dbName:default, owner:marmbrus, createTime:1413882355, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/t2, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=1, transient_lastDdlTime=1413882355, COLUMN_STATS_ACCURATE=true, totalSize=10508, numRows=0, rawDataSize=0}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/inputddl7-14-30c87bc734c2afa4fea0facdf7279145 b/sql/hive/src/test/resources/golden/inputddl7-14-30c87bc734c2afa4fea0facdf7279145
index b14d78536150c..04956c2c1c447 100644
--- a/sql/hive/src/test/resources/golden/inputddl7-14-30c87bc734c2afa4fea0facdf7279145
+++ b/sql/hive/src/test/resources/golden/inputddl7-14-30c87bc734c2afa4fea0facdf7279145
@@ -1,9 +1,9 @@
-name                	string              	None                
-ds                  	string              	None                
+name                	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2008-04-09], dbName:default, tableName:t3, createTime:1389731375, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/t3/ds=2008-04-09, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1389731375, numRows=0, totalSize=5812, rawDataSize=0})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2008-04-09], dbName:default, tableName:t3, createTime:1413882365, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/t3/ds=2008-04-09, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413882365, COLUMN_STATS_ACCURATE=true, totalSize=5812, numRows=0, rawDataSize=0})	
diff --git a/sql/hive/src/test/resources/golden/inputddl7-15-fed9badc255db68c3ed1cd1446d9fbe b/sql/hive/src/test/resources/golden/inputddl7-15-fed9badc255db68c3ed1cd1446d9fbe
index e476a7fa3959b..76ecadd3851a6 100644
--- a/sql/hive/src/test/resources/golden/inputddl7-15-fed9badc255db68c3ed1cd1446d9fbe
+++ b/sql/hive/src/test/resources/golden/inputddl7-15-fed9badc255db68c3ed1cd1446d9fbe
@@ -1,9 +1,9 @@
-name                	string              	None                
-ds                  	string              	None                
+name                	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[2008-04-09], dbName:default, tableName:t4, createTime:1389731388, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/t4/ds=2008-04-09, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1389731388, numRows=0, totalSize=10508, rawDataSize=0})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[2008-04-09], dbName:default, tableName:t4, createTime:1413882375, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/t4/ds=2008-04-09, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413882375, COLUMN_STATS_ACCURATE=true, totalSize=10508, numRows=0, rawDataSize=0})	
diff --git a/sql/hive/src/test/resources/golden/inputddl7-2-7bc7f4f4a29dc0721ad4bb6fb9b64291 b/sql/hive/src/test/resources/golden/inputddl7-2-7bc7f4f4a29dc0721ad4bb6fb9b64291
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/inputddl7-2-7bc7f4f4a29dc0721ad4bb6fb9b64291
+++ b/sql/hive/src/test/resources/golden/inputddl7-2-7bc7f4f4a29dc0721ad4bb6fb9b64291
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/inputddl7-4-68715ba2c11220be62394c86453e6d54
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/inputddl7-4-68715ba2c11220be62394c86453e6d54
diff --git a/sql/hive/src/test/resources/golden/inputddl7-4-7513658e8abe9d9b72c7219321c56fa9 b/sql/hive/src/test/resources/golden/inputddl7-4-7513658e8abe9d9b72c7219321c56fa9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/inputddl7-5-e3c5d1248a06e6b33b15fc6ec8c67f68 b/sql/hive/src/test/resources/golden/inputddl7-5-e3c5d1248a06e6b33b15fc6ec8c67f68
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/inputddl7-5-e3c5d1248a06e6b33b15fc6ec8c67f68
+++ b/sql/hive/src/test/resources/golden/inputddl7-5-e3c5d1248a06e6b33b15fc6ec8c67f68
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/inputddl7-7-48640fff8428a0dc6e90a7243adaf730 b/sql/hive/src/test/resources/golden/inputddl7-7-48640fff8428a0dc6e90a7243adaf730
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/inputddl7-7-59dd2d2556769e19bdc0a444f40f8a71
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/inputddl7-7-59dd2d2556769e19bdc0a444f40f8a71
diff --git a/sql/hive/src/test/resources/golden/inputddl7-8-495dc87b0bde752c890f213ff9531508 b/sql/hive/src/test/resources/golden/inputddl7-8-495dc87b0bde752c890f213ff9531508
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/inputddl7-8-495dc87b0bde752c890f213ff9531508
+++ b/sql/hive/src/test/resources/golden/inputddl7-8-495dc87b0bde752c890f213ff9531508
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/inputddl8-1-c70f2d2544633366b76b92bcff18e995 b/sql/hive/src/test/resources/golden/inputddl8-1-c70f2d2544633366b76b92bcff18e995
index 66e6efceed3dc..5166f3678f405 100644
--- a/sql/hive/src/test/resources/golden/inputddl8-1-c70f2d2544633366b76b92bcff18e995
+++ b/sql/hive/src/test/resources/golden/inputddl8-1-c70f2d2544633366b76b92bcff18e995
@@ -4,13 +4,13 @@ lint                	array<int>          	from deserializer
 lstring             	array<string>       	from deserializer   
 lintstring          	array<struct<myint:int,mystring:string,underscore_int:int>>	from deserializer   
 mstringstring       	map<string,string>  	from deserializer   
-ds                  	string              	None                
-country             	string              	None                
+ds                  	string              	                    
+country             	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-country             	string              	None                
+ds                  	string              	                    
+country             	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:inputddl8, dbName:default, owner:marmbrus, createTime:1389731407, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7216708901107607121/inputddl8, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:32, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer, parameters:{serialization.class=org.apache.hadoop.hive.serde2.thrift.test.Complex, serialization.format=com.facebook.thrift.protocol.TBinaryProtocol}), bucketCols:[aint], sortCols:[Order(col:lint, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:country, type:string, comment:null)], parameters:{transient_lastDdlTime=1389731407, comment=This is a thrift based table}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:inputddl8, dbName:default, owner:marmbrus, createTime:1413882387, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/inputddl8, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:32, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer, parameters:{serialization.class=org.apache.hadoop.hive.serde2.thrift.test.Complex, serialization.format=com.facebook.thrift.protocol.TBinaryProtocol}), bucketCols:[aint], sortCols:[Order(col:lint, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:country, type:string, comment:null)], parameters:{transient_lastDdlTime=1413882387, comment=This is a thrift based table}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/insert2_overwrite_partitions-3-86653b3af59df59f225ee00ff5fc119f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/insert2_overwrite_partitions-3-86653b3af59df59f225ee00ff5fc119f
diff --git a/sql/hive/src/test/resources/golden/insert2_overwrite_partitions-3-b7aaedd7d624af4e48637ff1acabe485 b/sql/hive/src/test/resources/golden/insert2_overwrite_partitions-3-b7aaedd7d624af4e48637ff1acabe485
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert2_overwrite_partitions-4-dece2650bf0615e566cd6c84181ce026 b/sql/hive/src/test/resources/golden/insert2_overwrite_partitions-4-dece2650bf0615e566cd6c84181ce026
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/insert2_overwrite_partitions-4-e81d45a5bec5642ec4b762f1c1a482af
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/insert2_overwrite_partitions-4-e81d45a5bec5642ec4b762f1c1a482af
diff --git a/sql/hive/src/test/resources/golden/insert_compressed-0-ea607fbed28d20e5726f4501285d698d b/sql/hive/src/test/resources/golden/insert_compressed-0-ea607fbed28d20e5726f4501285d698d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/insert_compressed-0-ea607fbed28d20e5726f4501285d698d
+++ b/sql/hive/src/test/resources/golden/insert_compressed-0-ea607fbed28d20e5726f4501285d698d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/insert_compressed-4-5133d2457097962811a2adf0ecd9e4ef b/sql/hive/src/test/resources/golden/insert_compressed-4-5133d2457097962811a2adf0ecd9e4ef
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/insert_compressed-4-5133d2457097962811a2adf0ecd9e4ef
+++ b/sql/hive/src/test/resources/golden/insert_compressed-4-5133d2457097962811a2adf0ecd9e4ef
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/insert_compressed-6-5133d2457097962811a2adf0ecd9e4ef b/sql/hive/src/test/resources/golden/insert_compressed-6-5133d2457097962811a2adf0ecd9e4ef
index e37d32abba426..83b33d238dab9 100644
--- a/sql/hive/src/test/resources/golden/insert_compressed-6-5133d2457097962811a2adf0ecd9e4ef
+++ b/sql/hive/src/test/resources/golden/insert_compressed-6-5133d2457097962811a2adf0ecd9e4ef
@@ -1 +1 @@
-1000
\ No newline at end of file
+1000
diff --git a/sql/hive/src/test/resources/golden/insert_compressed-8-5133d2457097962811a2adf0ecd9e4ef b/sql/hive/src/test/resources/golden/insert_compressed-8-5133d2457097962811a2adf0ecd9e4ef
index 37021f4a27201..3d86ec6498f3f 100644
--- a/sql/hive/src/test/resources/golden/insert_compressed-8-5133d2457097962811a2adf0ecd9e4ef
+++ b/sql/hive/src/test/resources/golden/insert_compressed-8-5133d2457097962811a2adf0ecd9e4ef
@@ -1 +1 @@
-1500
\ No newline at end of file
+1500
diff --git a/sql/hive/src/test/resources/golden/insert_into1-0-ae5ea07929262bde22fbe7ebe80d4992 b/sql/hive/src/test/resources/golden/insert_into1-0-ae5ea07929262bde22fbe7ebe80d4992
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into1-1-f1699bc0ef0a84dd9c23ccff37e13d7b b/sql/hive/src/test/resources/golden/insert_into1-1-f1699bc0ef0a84dd9c23ccff37e13d7b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into1-10-c260979323c1ebdf68c6fbe003d43792 b/sql/hive/src/test/resources/golden/insert_into1-10-c260979323c1ebdf68c6fbe003d43792
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into1-11-41015d6409c5ebf670eed4999157fdb b/sql/hive/src/test/resources/golden/insert_into1-11-41015d6409c5ebf670eed4999157fdb
deleted file mode 100644
index 5e96d815b6b78..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into1-11-41015d6409c5ebf670eed4999157fdb
+++ /dev/null
@@ -1 +0,0 @@
--826625916
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into1-12-ae5ea07929262bde22fbe7ebe80d4992 b/sql/hive/src/test/resources/golden/insert_into1-12-ae5ea07929262bde22fbe7ebe80d4992
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into1-2-ff6a1b25c911def274921df1bae476b7 b/sql/hive/src/test/resources/golden/insert_into1-2-ff6a1b25c911def274921df1bae476b7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into1-3-6687f7d58bd1faf1c2ee4f52f03ce048 b/sql/hive/src/test/resources/golden/insert_into1-3-6687f7d58bd1faf1c2ee4f52f03ce048
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into1-4-41015d6409c5ebf670eed4999157fdb b/sql/hive/src/test/resources/golden/insert_into1-4-41015d6409c5ebf670eed4999157fdb
deleted file mode 100644
index eb9dc5833c2f9..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into1-4-41015d6409c5ebf670eed4999157fdb
+++ /dev/null
@@ -1 +0,0 @@
-10226524244
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into1-5-d47a5e2ff879b37c8b6ca948ed47b7d2 b/sql/hive/src/test/resources/golden/insert_into1-5-d47a5e2ff879b37c8b6ca948ed47b7d2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into1-6-4b2e32fe57b2297d22bccb4656bdee30 b/sql/hive/src/test/resources/golden/insert_into1-6-4b2e32fe57b2297d22bccb4656bdee30
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into1-7-41015d6409c5ebf670eed4999157fdb b/sql/hive/src/test/resources/golden/insert_into1-7-41015d6409c5ebf670eed4999157fdb
deleted file mode 100644
index 28ced898ab537..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into1-7-41015d6409c5ebf670eed4999157fdb
+++ /dev/null
@@ -1 +0,0 @@
-20453048488
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into1-8-2a2bd9dc41eaa041aff7eca70a72cb0d b/sql/hive/src/test/resources/golden/insert_into1-8-2a2bd9dc41eaa041aff7eca70a72cb0d
deleted file mode 100644
index ae4ee13c08e76..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into1-8-2a2bd9dc41eaa041aff7eca70a72cb0d
+++ /dev/null
@@ -1 +0,0 @@
-200
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into1-9-31eb4770dc60e4765065ac7f84811d1b b/sql/hive/src/test/resources/golden/insert_into1-9-31eb4770dc60e4765065ac7f84811d1b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into2-0-957c6402df0fd7d2fccbc688e49e9661 b/sql/hive/src/test/resources/golden/insert_into2-0-957c6402df0fd7d2fccbc688e49e9661
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into2-1-9828bb831fd11667b48678e5952a0941 b/sql/hive/src/test/resources/golden/insert_into2-1-9828bb831fd11667b48678e5952a0941
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into2-10-df53336f364fe09e9591e769c13b5519 b/sql/hive/src/test/resources/golden/insert_into2-10-df53336f364fe09e9591e769c13b5519
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into2-11-a19a7ab57f6ff69f1dff405bc3d4b7aa b/sql/hive/src/test/resources/golden/insert_into2-11-a19a7ab57f6ff69f1dff405bc3d4b7aa
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into2-12-4cf03cb3982a457f2f72220265ecc844 b/sql/hive/src/test/resources/golden/insert_into2-12-4cf03cb3982a457f2f72220265ecc844
deleted file mode 100644
index 84d7f3929d86d..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into2-12-4cf03cb3982a457f2f72220265ecc844
+++ /dev/null
@@ -1 +0,0 @@
--27100860056
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into2-13-957c6402df0fd7d2fccbc688e49e9661 b/sql/hive/src/test/resources/golden/insert_into2-13-957c6402df0fd7d2fccbc688e49e9661
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into2-2-f83e3ad75a2c7b290f8cf5f6153b9671 b/sql/hive/src/test/resources/golden/insert_into2-2-f83e3ad75a2c7b290f8cf5f6153b9671
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into2-3-9d5556040de01fd02d5501d141effff2 b/sql/hive/src/test/resources/golden/insert_into2-3-9d5556040de01fd02d5501d141effff2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into2-4-9d5556040de01fd02d5501d141effff2 b/sql/hive/src/test/resources/golden/insert_into2-4-9d5556040de01fd02d5501d141effff2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into2-5-de6e50a2ae0ab5a9c466998b57f86b08 b/sql/hive/src/test/resources/golden/insert_into2-5-de6e50a2ae0ab5a9c466998b57f86b08
deleted file mode 100644
index ae4ee13c08e76..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into2-5-de6e50a2ae0ab5a9c466998b57f86b08
+++ /dev/null
@@ -1 +0,0 @@
-200
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into2-6-4cf03cb3982a457f2f72220265ecc844 b/sql/hive/src/test/resources/golden/insert_into2-6-4cf03cb3982a457f2f72220265ecc844
deleted file mode 100644
index 3395f3bcc7b51..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into2-6-4cf03cb3982a457f2f72220265ecc844
+++ /dev/null
@@ -1 +0,0 @@
--24159954504
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into2-7-6db9da2d1a64be8cd58f0313c4970fc9 b/sql/hive/src/test/resources/golden/insert_into2-7-6db9da2d1a64be8cd58f0313c4970fc9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into2-8-452111285dda40205ee587de8e972896 b/sql/hive/src/test/resources/golden/insert_into2-8-452111285dda40205ee587de8e972896
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into2-9-4cf03cb3982a457f2f72220265ecc844 b/sql/hive/src/test/resources/golden/insert_into2-9-4cf03cb3982a457f2f72220265ecc844
deleted file mode 100644
index ee0a47c9f6e00..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into2-9-4cf03cb3982a457f2f72220265ecc844
+++ /dev/null
@@ -1 +0,0 @@
--36239931656
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into3-0-7e35c30348a53b0748bc1bb85f75c195 b/sql/hive/src/test/resources/golden/insert_into3-0-7e35c30348a53b0748bc1bb85f75c195
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into3-1-f6dd7262d45a21781d85bc343ede8fb5 b/sql/hive/src/test/resources/golden/insert_into3-1-f6dd7262d45a21781d85bc343ede8fb5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into3-10-40a108b4c89bc5d6424f21f3b8a2f5e7 b/sql/hive/src/test/resources/golden/insert_into3-10-40a108b4c89bc5d6424f21f3b8a2f5e7
deleted file mode 100644
index 5e96d815b6b78..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into3-10-40a108b4c89bc5d6424f21f3b8a2f5e7
+++ /dev/null
@@ -1 +0,0 @@
--826625916
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into3-11-9cfd2d054f84262eb74a870b6365db87 b/sql/hive/src/test/resources/golden/insert_into3-11-9cfd2d054f84262eb74a870b6365db87
deleted file mode 100644
index 2ed5a7da11dcd..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into3-11-9cfd2d054f84262eb74a870b6365db87
+++ /dev/null
@@ -1 +0,0 @@
-9399898328
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into3-12-7e35c30348a53b0748bc1bb85f75c195 b/sql/hive/src/test/resources/golden/insert_into3-12-7e35c30348a53b0748bc1bb85f75c195
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into3-13-f6dd7262d45a21781d85bc343ede8fb5 b/sql/hive/src/test/resources/golden/insert_into3-13-f6dd7262d45a21781d85bc343ede8fb5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into3-2-e51c25bae2408422a56826a263479468 b/sql/hive/src/test/resources/golden/insert_into3-2-e51c25bae2408422a56826a263479468
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into3-3-c46699c465fefe6baab35499a32b452d b/sql/hive/src/test/resources/golden/insert_into3-3-c46699c465fefe6baab35499a32b452d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into3-4-e9f4f47686fe97482b0a769a15481dd b/sql/hive/src/test/resources/golden/insert_into3-4-e9f4f47686fe97482b0a769a15481dd
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into3-5-a83d22fa3c2fb32bd08305a1729bf7f8 b/sql/hive/src/test/resources/golden/insert_into3-5-a83d22fa3c2fb32bd08305a1729bf7f8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into3-6-40a108b4c89bc5d6424f21f3b8a2f5e7 b/sql/hive/src/test/resources/golden/insert_into3-6-40a108b4c89bc5d6424f21f3b8a2f5e7
deleted file mode 100644
index 00ffdd24b0cff..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into3-6-40a108b4c89bc5d6424f21f3b8a2f5e7
+++ /dev/null
@@ -1 +0,0 @@
-7813690682
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into3-7-9cfd2d054f84262eb74a870b6365db87 b/sql/hive/src/test/resources/golden/insert_into3-7-9cfd2d054f84262eb74a870b6365db87
deleted file mode 100644
index eb9dc5833c2f9..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into3-7-9cfd2d054f84262eb74a870b6365db87
+++ /dev/null
@@ -1 +0,0 @@
-10226524244
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into3-8-e3b8d90def4a6ec1e3b5ee9cdaf31c01 b/sql/hive/src/test/resources/golden/insert_into3-8-e3b8d90def4a6ec1e3b5ee9cdaf31c01
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into3-9-19d1be183f7985f7066f86572abc82c8 b/sql/hive/src/test/resources/golden/insert_into3-9-19d1be183f7985f7066f86572abc82c8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into4-0-b4831621b2a02fc4e8e655b03c289310 b/sql/hive/src/test/resources/golden/insert_into4-0-b4831621b2a02fc4e8e655b03c289310
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into4-0-b4831621b2a02fc4e8e655b03c289310
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into4-1-3d466d45197fcf1eff55d76ef0a29720 b/sql/hive/src/test/resources/golden/insert_into4-1-3d466d45197fcf1eff55d76ef0a29720
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into4-10-a8016ca816f7b362cf3a0384ca91b159 b/sql/hive/src/test/resources/golden/insert_into4-10-a8016ca816f7b362cf3a0384ca91b159
deleted file mode 100644
index e2954bd63682e..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into4-10-a8016ca816f7b362cf3a0384ca91b159
+++ /dev/null
@@ -1 +0,0 @@
--1653251832
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into4-11-bdbfaf06bc4e323863db65fd29882eaa b/sql/hive/src/test/resources/golden/insert_into4-11-bdbfaf06bc4e323863db65fd29882eaa
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into4-12-49b12993ebc1647b9dca8b9af19deca9 b/sql/hive/src/test/resources/golden/insert_into4-12-49b12993ebc1647b9dca8b9af19deca9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into4-13-a5bb41af609f38f01d8c71334cc8d71b b/sql/hive/src/test/resources/golden/insert_into4-13-a5bb41af609f38f01d8c71334cc8d71b
deleted file mode 100644
index e2954bd63682e..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into4-13-a5bb41af609f38f01d8c71334cc8d71b
+++ /dev/null
@@ -1 +0,0 @@
--1653251832
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into4-14-3d466d45197fcf1eff55d76ef0a29720 b/sql/hive/src/test/resources/golden/insert_into4-14-3d466d45197fcf1eff55d76ef0a29720
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into4-15-f6ad1a49459fb6cd232fccc4a6062b25 b/sql/hive/src/test/resources/golden/insert_into4-15-f6ad1a49459fb6cd232fccc4a6062b25
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into4-2-f6ad1a49459fb6cd232fccc4a6062b25 b/sql/hive/src/test/resources/golden/insert_into4-2-f6ad1a49459fb6cd232fccc4a6062b25
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into4-3-43629aaa698fb6e2db4586124561e69b b/sql/hive/src/test/resources/golden/insert_into4-3-43629aaa698fb6e2db4586124561e69b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into4-4-8adba808fd505f4bf0ffcc61a618480a b/sql/hive/src/test/resources/golden/insert_into4-4-8adba808fd505f4bf0ffcc61a618480a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into4-5-6bc47894aa917051abb98d0b52f43881 b/sql/hive/src/test/resources/golden/insert_into4-5-6bc47894aa917051abb98d0b52f43881
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into4-6-8c1683bee2927da76bb0dbf44a373738 b/sql/hive/src/test/resources/golden/insert_into4-6-8c1683bee2927da76bb0dbf44a373738
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into4-7-a8016ca816f7b362cf3a0384ca91b159 b/sql/hive/src/test/resources/golden/insert_into4-7-a8016ca816f7b362cf3a0384ca91b159
deleted file mode 100644
index 5e96d815b6b78..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into4-7-a8016ca816f7b362cf3a0384ca91b159
+++ /dev/null
@@ -1 +0,0 @@
--826625916
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into4-8-6bc47894aa917051abb98d0b52f43881 b/sql/hive/src/test/resources/golden/insert_into4-8-6bc47894aa917051abb98d0b52f43881
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into4-9-8c1683bee2927da76bb0dbf44a373738 b/sql/hive/src/test/resources/golden/insert_into4-9-8c1683bee2927da76bb0dbf44a373738
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-0-9afa473f2111cf0d9ae62041bd97f840 b/sql/hive/src/test/resources/golden/insert_into5-0-9afa473f2111cf0d9ae62041bd97f840
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-1-8fc8db6a5564324339192f23208ffc1c b/sql/hive/src/test/resources/golden/insert_into5-1-8fc8db6a5564324339192f23208ffc1c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-10-375cae396c768c1efe5d17b9f5f45f8 b/sql/hive/src/test/resources/golden/insert_into5-10-375cae396c768c1efe5d17b9f5f45f8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-11-a36eeaf2fb0ef52b63a21f0f9b80b27d b/sql/hive/src/test/resources/golden/insert_into5-11-a36eeaf2fb0ef52b63a21f0f9b80b27d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-12-a49d025c7556a83f301b6ecf9ceb2ce7 b/sql/hive/src/test/resources/golden/insert_into5-12-a49d025c7556a83f301b6ecf9ceb2ce7
deleted file mode 100644
index 3b6ef434b37e7..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into5-12-a49d025c7556a83f301b6ecf9ceb2ce7
+++ /dev/null
@@ -1 +0,0 @@
--18626052920
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into5-13-e06a0b7252278141d50466e08f15b391 b/sql/hive/src/test/resources/golden/insert_into5-13-e06a0b7252278141d50466e08f15b391
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-14-a3a4c16b0b723f97f654b4b5e80575c2 b/sql/hive/src/test/resources/golden/insert_into5-14-a3a4c16b0b723f97f654b4b5e80575c2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-15-a49d025c7556a83f301b6ecf9ceb2ce7 b/sql/hive/src/test/resources/golden/insert_into5-15-a49d025c7556a83f301b6ecf9ceb2ce7
deleted file mode 100644
index 0744f3dae0e0a..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into5-15-a49d025c7556a83f301b6ecf9ceb2ce7
+++ /dev/null
@@ -1 +0,0 @@
--37252105840
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into5-16-9afa473f2111cf0d9ae62041bd97f840 b/sql/hive/src/test/resources/golden/insert_into5-16-9afa473f2111cf0d9ae62041bd97f840
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-2-548a3a10c15c333c63fc1c239ee8b62c b/sql/hive/src/test/resources/golden/insert_into5-2-548a3a10c15c333c63fc1c239ee8b62c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-3-a4b25f172af356ec98035329b95ddbd3 b/sql/hive/src/test/resources/golden/insert_into5-3-a4b25f172af356ec98035329b95ddbd3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-4-3d5343a79ee8b680f3b74b22db6658e6 b/sql/hive/src/test/resources/golden/insert_into5-4-3d5343a79ee8b680f3b74b22db6658e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-5-f382c5b31afe9e0251fa9457c46c12a5 b/sql/hive/src/test/resources/golden/insert_into5-5-f382c5b31afe9e0251fa9457c46c12a5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-6-a058ba199b9777d48b6c6595f2388533 b/sql/hive/src/test/resources/golden/insert_into5-6-a058ba199b9777d48b6c6595f2388533
deleted file mode 100644
index c0066b75af40e..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into5-6-a058ba199b9777d48b6c6595f2388533
+++ /dev/null
@@ -1 +0,0 @@
-481928560
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into5-7-2004b4ecf6ceddb033727f8d5ebab42f b/sql/hive/src/test/resources/golden/insert_into5-7-2004b4ecf6ceddb033727f8d5ebab42f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-8-4e489b45a3dacf7fdf0b12e4f8ab7873 b/sql/hive/src/test/resources/golden/insert_into5-8-4e489b45a3dacf7fdf0b12e4f8ab7873
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into5-9-a058ba199b9777d48b6c6595f2388533 b/sql/hive/src/test/resources/golden/insert_into5-9-a058ba199b9777d48b6c6595f2388533
deleted file mode 100644
index 0b4a44e064f85..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into5-9-a058ba199b9777d48b6c6595f2388533
+++ /dev/null
@@ -1 +0,0 @@
-963857120
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into6-0-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/insert_into6-0-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into6-0-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into6-1-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/insert_into6-1-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into6-1-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into6-10-16500f4af2c8638a670e867e59f6d457 b/sql/hive/src/test/resources/golden/insert_into6-10-16500f4af2c8638a670e867e59f6d457
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-11-95297f4dcc4dd6a51de4785ccd25fbba b/sql/hive/src/test/resources/golden/insert_into6-11-95297f4dcc4dd6a51de4785ccd25fbba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-12-29afaab2cf10eaa65afaa6c0fcca0902 b/sql/hive/src/test/resources/golden/insert_into6-12-29afaab2cf10eaa65afaa6c0fcca0902
deleted file mode 100644
index d541b5d207233..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into6-12-29afaab2cf10eaa65afaa6c0fcca0902
+++ /dev/null
@@ -1 +0,0 @@
--35226404960
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into6-13-b357a845fb5f668b09dea94c6d2e7d66 b/sql/hive/src/test/resources/golden/insert_into6-13-b357a845fb5f668b09dea94c6d2e7d66
deleted file mode 100644
index c21f4017362c1..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into6-13-b357a845fb5f668b09dea94c6d2e7d66
+++ /dev/null
@@ -1,2 +0,0 @@
-ds=1
-ds=2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insert_into6-14-1f00d9cc34225cb358a54c6d4008cb47 b/sql/hive/src/test/resources/golden/insert_into6-14-1f00d9cc34225cb358a54c6d4008cb47
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-15-14d4d62ee9a5da9fbd7c3e0c021fdc0d b/sql/hive/src/test/resources/golden/insert_into6-15-14d4d62ee9a5da9fbd7c3e0c021fdc0d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-2-1f00d9cc34225cb358a54c6d4008cb47 b/sql/hive/src/test/resources/golden/insert_into6-2-1f00d9cc34225cb358a54c6d4008cb47
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-3-14d4d62ee9a5da9fbd7c3e0c021fdc0d b/sql/hive/src/test/resources/golden/insert_into6-3-14d4d62ee9a5da9fbd7c3e0c021fdc0d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-4-5f12794e99c74355a23d2fda9c7c170f b/sql/hive/src/test/resources/golden/insert_into6-4-5f12794e99c74355a23d2fda9c7c170f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-5-de641eb41a9100e755a9ae641c752b30 b/sql/hive/src/test/resources/golden/insert_into6-5-de641eb41a9100e755a9ae641c752b30
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-6-f6e7141a435922193937aa10085b0656 b/sql/hive/src/test/resources/golden/insert_into6-6-f6e7141a435922193937aa10085b0656
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-7-6826c610b8d04ab3464712bd9cddbcd3 b/sql/hive/src/test/resources/golden/insert_into6-7-6826c610b8d04ab3464712bd9cddbcd3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-8-33ec9514947e0b737e957bdcbbd87573 b/sql/hive/src/test/resources/golden/insert_into6-8-33ec9514947e0b737e957bdcbbd87573
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insert_into6-9-3ccb7bc735e406ad1723c758f01bcaab b/sql/hive/src/test/resources/golden/insert_into6-9-3ccb7bc735e406ad1723c758f01bcaab
deleted file mode 100644
index d541b5d207233..0000000000000
--- a/sql/hive/src/test/resources/golden/insert_into6-9-3ccb7bc735e406ad1723c758f01bcaab
+++ /dev/null
@@ -1 +0,0 @@
--35226404960
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/insertexternal1-0-eb0745518b859c8497506a627bfd9860 b/sql/hive/src/test/resources/golden/insertexternal1-0-eb0745518b859c8497506a627bfd9860
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/insertexternal1-1-6d1b6c44f9f52ec67f9d4a3cdf580a1a b/sql/hive/src/test/resources/golden/insertexternal1-1-6d1b6c44f9f52ec67f9d4a3cdf580a1a
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/insertexternal1-1-6d1b6c44f9f52ec67f9d4a3cdf580a1a
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/insertexternal1-2-bc513eeb5cbba0b15c8f425b9cc3cd7b b/sql/hive/src/test/resources/golden/insertexternal1-2-bc513eeb5cbba0b15c8f425b9cc3cd7b
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/insertexternal1-2-bc513eeb5cbba0b15c8f425b9cc3cd7b
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/insertexternal1-3-f64289fb03ab105e12659fc3972ca241 b/sql/hive/src/test/resources/golden/insertexternal1-3-f64289fb03ab105e12659fc3972ca241
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/insertexternal1-3-f64289fb03ab105e12659fc3972ca241
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/join0-2-52055f2dce8eac4e5249d02c42c0da87 b/sql/hive/src/test/resources/golden/join0-2-52055f2dce8eac4e5249d02c42c0da87
index 5707ed08e7e54..13e88f30fc08c 100644
--- a/sql/hive/src/test/resources/golden/join0-2-52055f2dce8eac4e5249d02c42c0da87
+++ b/sql/hive/src/test/resources/golden/join0-2-52055f2dce8eac4e5249d02c42c0da87
@@ -97,4 +97,4 @@
 9	val_9	5	val_5
 9	val_9	5	val_5
 9	val_9	8	val_8
-9	val_9	9	val_9
\ No newline at end of file
+9	val_9	9	val_9
diff --git a/sql/hive/src/test/resources/golden/join1-0-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/join1-0-43d53504df013e6b35f81811138a167a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join1-0-43d53504df013e6b35f81811138a167a
+++ b/sql/hive/src/test/resources/golden/join1-0-43d53504df013e6b35f81811138a167a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join1-4-35c7611180562dcd9dab834f41654095 b/sql/hive/src/test/resources/golden/join1-4-35c7611180562dcd9dab834f41654095
index 59821aeea3008..c9c79b85dfe24 100644
--- a/sql/hive/src/test/resources/golden/join1-4-35c7611180562dcd9dab834f41654095
+++ b/sql/hive/src/test/resources/golden/join1-4-35c7611180562dcd9dab834f41654095
@@ -1025,4 +1025,4 @@
 200	val_200
 200	val_200
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/join10-1-73136f8e6e9ba82f75570afd15c2828d b/sql/hive/src/test/resources/golden/join10-1-73136f8e6e9ba82f75570afd15c2828d
index 59821aeea3008..c9c79b85dfe24 100644
--- a/sql/hive/src/test/resources/golden/join10-1-73136f8e6e9ba82f75570afd15c2828d
+++ b/sql/hive/src/test/resources/golden/join10-1-73136f8e6e9ba82f75570afd15c2828d
@@ -1025,4 +1025,4 @@
 200	val_200
 200	val_200
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/join11-1-b580f87daf1763cd8c5e59ad5b271232 b/sql/hive/src/test/resources/golden/join11-1-b580f87daf1763cd8c5e59ad5b271232
index c888cfd6a0479..e9016953d4e48 100644
--- a/sql/hive/src/test/resources/golden/join11-1-b580f87daf1763cd8c5e59ad5b271232
+++ b/sql/hive/src/test/resources/golden/join11-1-b580f87daf1763cd8c5e59ad5b271232
@@ -145,4 +145,4 @@
 90	val_90
 90	val_90
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/join12-1-496c8aabaf3261e8fefd7b357f2ac7f b/sql/hive/src/test/resources/golden/join12-1-496c8aabaf3261e8fefd7b357f2ac7f
index 1780076ae513f..598608f124d38 100644
--- a/sql/hive/src/test/resources/golden/join12-1-496c8aabaf3261e8fefd7b357f2ac7f
+++ b/sql/hive/src/test/resources/golden/join12-1-496c8aabaf3261e8fefd7b357f2ac7f
@@ -229,4 +229,4 @@
 37	val_37
 37	val_37
 37	val_37
-37	val_37
\ No newline at end of file
+37	val_37
diff --git a/sql/hive/src/test/resources/golden/join13-1-696b36d15c4358145f77c2b15b7507d5 b/sql/hive/src/test/resources/golden/join13-1-696b36d15c4358145f77c2b15b7507d5
index a82a9a22c6b87..f9890a4c2d194 100644
--- a/sql/hive/src/test/resources/golden/join13-1-696b36d15c4358145f77c2b15b7507d5
+++ b/sql/hive/src/test/resources/golden/join13-1-696b36d15c4358145f77c2b15b7507d5
@@ -161,4 +161,4 @@
 90	val_90
 90	val_90
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/join14-1-9b141c1e5917ca82c6bc36a9a2950a1e b/sql/hive/src/test/resources/golden/join14-1-9b141c1e5917ca82c6bc36a9a2950a1e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join14-1-9b141c1e5917ca82c6bc36a9a2950a1e
+++ b/sql/hive/src/test/resources/golden/join14-1-9b141c1e5917ca82c6bc36a9a2950a1e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join14-2-2b9ccaa793eae0e73bf76335d3d6880 b/sql/hive/src/test/resources/golden/join14-2-2b9ccaa793eae0e73bf76335d3d6880
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join14-2-2b9ccaa793eae0e73bf76335d3d6880
+++ b/sql/hive/src/test/resources/golden/join14-2-2b9ccaa793eae0e73bf76335d3d6880
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join14-3-bab89dfffa77258e34a595e0e79986e3 b/sql/hive/src/test/resources/golden/join14-3-bab89dfffa77258e34a595e0e79986e3
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join14-3-bab89dfffa77258e34a595e0e79986e3
+++ b/sql/hive/src/test/resources/golden/join14-3-bab89dfffa77258e34a595e0e79986e3
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join14-4-27f1a57fbb815d169af86ae2f8305cb6 b/sql/hive/src/test/resources/golden/join14-4-27f1a57fbb815d169af86ae2f8305cb6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join14-4-27f1a57fbb815d169af86ae2f8305cb6
+++ b/sql/hive/src/test/resources/golden/join14-4-27f1a57fbb815d169af86ae2f8305cb6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join14-7-4e04dbb8b2e0ee18f6cb4bd89b0ae3d3 b/sql/hive/src/test/resources/golden/join14-7-4e04dbb8b2e0ee18f6cb4bd89b0ae3d3
index 941e6bb7c887e..bd6afa8dada84 100644
--- a/sql/hive/src/test/resources/golden/join14-7-4e04dbb8b2e0ee18f6cb4bd89b0ae3d3
+++ b/sql/hive/src/test/resources/golden/join14-7-4e04dbb8b2e0ee18f6cb4bd89b0ae3d3
@@ -1749,4 +1749,4 @@
 403	val_403
 400	val_400
 200	val_200
-200	val_200
\ No newline at end of file
+200	val_200
diff --git a/sql/hive/src/test/resources/golden/join14_hadoop20-1-db1cd54a4cb36de2087605f32e41824f b/sql/hive/src/test/resources/golden/join14_hadoop20-1-db1cd54a4cb36de2087605f32e41824f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join14_hadoop20-1-db1cd54a4cb36de2087605f32e41824f
+++ b/sql/hive/src/test/resources/golden/join14_hadoop20-1-db1cd54a4cb36de2087605f32e41824f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join14_hadoop20-2-bab89dfffa77258e34a595e0e79986e3 b/sql/hive/src/test/resources/golden/join14_hadoop20-2-bab89dfffa77258e34a595e0e79986e3
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join14_hadoop20-2-bab89dfffa77258e34a595e0e79986e3
+++ b/sql/hive/src/test/resources/golden/join14_hadoop20-2-bab89dfffa77258e34a595e0e79986e3
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join14_hadoop20-5-4e04dbb8b2e0ee18f6cb4bd89b0ae3d3 b/sql/hive/src/test/resources/golden/join14_hadoop20-5-4e04dbb8b2e0ee18f6cb4bd89b0ae3d3
index 941e6bb7c887e..bd6afa8dada84 100644
--- a/sql/hive/src/test/resources/golden/join14_hadoop20-5-4e04dbb8b2e0ee18f6cb4bd89b0ae3d3
+++ b/sql/hive/src/test/resources/golden/join14_hadoop20-5-4e04dbb8b2e0ee18f6cb4bd89b0ae3d3
@@ -1749,4 +1749,4 @@
 403	val_403
 400	val_400
 200	val_200
-200	val_200
\ No newline at end of file
+200	val_200
diff --git a/sql/hive/src/test/resources/golden/join15-1-81d76d3bf59889b07b413b6f88772667 b/sql/hive/src/test/resources/golden/join15-1-81d76d3bf59889b07b413b6f88772667
index c2e3ea8b0c8e2..b212e93a0a8c2 100644
--- a/sql/hive/src/test/resources/golden/join15-1-81d76d3bf59889b07b413b6f88772667
+++ b/sql/hive/src/test/resources/golden/join15-1-81d76d3bf59889b07b413b6f88772667
@@ -1025,4 +1025,4 @@
 498	val_498	498	val_498
 498	val_498	498	val_498
 498	val_498	498	val_498
-498	val_498	498	val_498
\ No newline at end of file
+498	val_498	498	val_498
diff --git a/sql/hive/src/test/resources/golden/join17-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/join17-3-adc1ec67836b26b60d8547c4996bfd8f
index 66fafbdf72b63..3df4716f0b05f 100644
--- a/sql/hive/src/test/resources/golden/join17-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/join17-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -1025,4 +1025,4 @@
 200	val_200	200	val_200
 200	val_200	200	val_200
 97	val_97	97	val_97
-97	val_97	97	val_97
\ No newline at end of file
+97	val_97	97	val_97
diff --git a/sql/hive/src/test/resources/golden/join18-1-49f8ba8d43a6bb1d892ba66a812304f5 b/sql/hive/src/test/resources/golden/join18-1-49f8ba8d43a6bb1d892ba66a812304f5
index 10628f6c64bd4..947ed199b0d4d 100644
--- a/sql/hive/src/test/resources/golden/join18-1-49f8ba8d43a6bb1d892ba66a812304f5
+++ b/sql/hive/src/test/resources/golden/join18-1-49f8ba8d43a6bb1d892ba66a812304f5
@@ -307,4 +307,4 @@ NULL	NULL	NULL	7
 495	1	NULL	NULL
 496	1	NULL	NULL
 497	1	NULL	NULL
-498	3	NULL	NULL
\ No newline at end of file
+498	3	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join2-3-cac2c9e0f8601dd56822c990774e0696 b/sql/hive/src/test/resources/golden/join2-3-cac2c9e0f8601dd56822c990774e0696
index 2945c31ed4f00..12a176c7bc7cb 100644
--- a/sql/hive/src/test/resources/golden/join2-3-cac2c9e0f8601dd56822c990774e0696
+++ b/sql/hive/src/test/resources/golden/join2-3-cac2c9e0f8601dd56822c990774e0696
@@ -469,4 +469,4 @@
 200	val_400
 200	val_400
 97	val_194
-97	val_194
\ No newline at end of file
+97	val_194
diff --git a/sql/hive/src/test/resources/golden/join20-1-9685f2d31ffe922d3ea217de32ca3049 b/sql/hive/src/test/resources/golden/join20-1-9685f2d31ffe922d3ea217de32ca3049
index bd6e20b8de1e4..7568f8cde28f5 100644
--- a/sql/hive/src/test/resources/golden/join20-1-9685f2d31ffe922d3ea217de32ca3049
+++ b/sql/hive/src/test/resources/golden/join20-1-9685f2d31ffe922d3ea217de32ca3049
@@ -545,4 +545,4 @@ NULL	NULL	NULL	NULL	498	val_498
 5	val_5	5	val_5	5	val_5
 5	val_5	5	val_5	5	val_5
 8	val_8	8	val_8	8	val_8
-9	val_9	9	val_9	9	val_9
\ No newline at end of file
+9	val_9	9	val_9	9	val_9
diff --git a/sql/hive/src/test/resources/golden/join20-3-3331a020843caccf2fa32a1225c5c3a9 b/sql/hive/src/test/resources/golden/join20-3-3331a020843caccf2fa32a1225c5c3a9
index bd6e20b8de1e4..7568f8cde28f5 100644
--- a/sql/hive/src/test/resources/golden/join20-3-3331a020843caccf2fa32a1225c5c3a9
+++ b/sql/hive/src/test/resources/golden/join20-3-3331a020843caccf2fa32a1225c5c3a9
@@ -545,4 +545,4 @@ NULL	NULL	NULL	NULL	498	val_498
 5	val_5	5	val_5	5	val_5
 5	val_5	5	val_5	5	val_5
 8	val_8	8	val_8	8	val_8
-9	val_9	9	val_9	9	val_9
\ No newline at end of file
+9	val_9	9	val_9	9	val_9
diff --git a/sql/hive/src/test/resources/golden/join21-1-3536b7d78713e86ee67f5f6c9b88958f b/sql/hive/src/test/resources/golden/join21-1-3536b7d78713e86ee67f5f6c9b88958f
index 9672e21fa0323..80c230cf4a09d 100644
--- a/sql/hive/src/test/resources/golden/join21-1-3536b7d78713e86ee67f5f6c9b88958f
+++ b/sql/hive/src/test/resources/golden/join21-1-3536b7d78713e86ee67f5f6c9b88958f
@@ -497,4 +497,4 @@ NULL	NULL	NULL	NULL	496	val_496
 NULL	NULL	NULL	NULL	497	val_497
 NULL	NULL	NULL	NULL	498	val_498
 NULL	NULL	NULL	NULL	498	val_498
-NULL	NULL	NULL	NULL	498	val_498
\ No newline at end of file
+NULL	NULL	NULL	NULL	498	val_498
diff --git a/sql/hive/src/test/resources/golden/join23-1-91b8e7fe75a7e3ba8147c56734436681 b/sql/hive/src/test/resources/golden/join23-1-91b8e7fe75a7e3ba8147c56734436681
index 5707ed08e7e54..13e88f30fc08c 100644
--- a/sql/hive/src/test/resources/golden/join23-1-91b8e7fe75a7e3ba8147c56734436681
+++ b/sql/hive/src/test/resources/golden/join23-1-91b8e7fe75a7e3ba8147c56734436681
@@ -97,4 +97,4 @@
 9	val_9	5	val_5
 9	val_9	5	val_5
 9	val_9	8	val_8
-9	val_9	9	val_9
\ No newline at end of file
+9	val_9	9	val_9
diff --git a/sql/hive/src/test/resources/golden/join24-2-d79325ef6494aa87843fdfd78de7c812 b/sql/hive/src/test/resources/golden/join24-2-d79325ef6494aa87843fdfd78de7c812
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/join24-2-d79325ef6494aa87843fdfd78de7c812
+++ b/sql/hive/src/test/resources/golden/join24-2-d79325ef6494aa87843fdfd78de7c812
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/join25-0-8934d9b821aa4b34b760f73eff56cd06 b/sql/hive/src/test/resources/golden/join25-0-8934d9b821aa4b34b760f73eff56cd06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join25-0-8934d9b821aa4b34b760f73eff56cd06
+++ b/sql/hive/src/test/resources/golden/join25-0-8934d9b821aa4b34b760f73eff56cd06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join25-4-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join25-4-c9859bf9c9942c59f3b19d82bd1a3afa
index 183353e5c705b..d3d377e8ae74d 100644
--- a/sql/hive/src/test/resources/golden/join25-4-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join25-4-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -34,4 +34,4 @@
 406	val_406	val_406
 406	val_406	val_406
 406	val_406	val_406
-406	val_406	val_406
\ No newline at end of file
+406	val_406	val_406
diff --git a/sql/hive/src/test/resources/golden/join26-3-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join26-3-c9859bf9c9942c59f3b19d82bd1a3afa
index 4c88927e56579..120056ea10c60 100644
--- a/sql/hive/src/test/resources/golden/join26-3-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join26-3-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -104,4 +104,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join27-3-e86808fdbd54120d1e2356f8f61c02f9 b/sql/hive/src/test/resources/golden/join27-3-e86808fdbd54120d1e2356f8f61c02f9
index 2b75023ea4a9f..2adcbc0f14b62 100644
--- a/sql/hive/src/test/resources/golden/join27-3-e86808fdbd54120d1e2356f8f61c02f9
+++ b/sql/hive/src/test/resources/golden/join27-3-e86808fdbd54120d1e2356f8f61c02f9
@@ -38,4 +38,4 @@ NULL	val_484	val_484
 406	val_406	val_406
 406	val_406	val_406
 406	val_406	val_406
-406	val_406	val_406
\ No newline at end of file
+406	val_406	val_406
diff --git a/sql/hive/src/test/resources/golden/join28-1-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join28-1-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join28-1-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join28-1-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join28-2-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join28-2-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join28-2-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join28-2-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join28-3-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/join28-3-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join28-3-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/join28-3-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join28-6-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join28-6-c9859bf9c9942c59f3b19d82bd1a3afa
index da3c427cab73f..c998494e1c524 100644
--- a/sql/hive/src/test/resources/golden/join28-6-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join28-6-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -104,4 +104,4 @@
 98	val_98
 98	val_98
 98	val_98
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/join29-1-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join29-1-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join29-1-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join29-1-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join29-2-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join29-2-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join29-2-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join29-2-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join29-3-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/join29-3-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join29-3-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/join29-3-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join29-6-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join29-6-c9859bf9c9942c59f3b19d82bd1a3afa
index f553ce0ca41f3..c8445b6e2c784 100644
--- a/sql/hive/src/test/resources/golden/join29-6-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join29-6-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -12,4 +12,4 @@
 401	1	5
 406	1	4
 66	1	1
-98	1	2
\ No newline at end of file
+98	1	2
diff --git a/sql/hive/src/test/resources/golden/join3-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/join3-3-adc1ec67836b26b60d8547c4996bfd8f
index 8886dc7e8f229..9c33812fa7ea4 100644
--- a/sql/hive/src/test/resources/golden/join3-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/join3-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -2651,4 +2651,4 @@
 97	val_97
 97	val_97
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/join30-3-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join30-3-c9859bf9c9942c59f3b19d82bd1a3afa
index 71094ee7360db..16b313fc58f23 100644
--- a/sql/hive/src/test/resources/golden/join30-3-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join30-3-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -12,4 +12,4 @@
 311	3
 369	3
 401	5
-406	4
\ No newline at end of file
+406	4
diff --git a/sql/hive/src/test/resources/golden/join31-1-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join31-1-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join31-1-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join31-1-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join31-2-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join31-2-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join31-2-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join31-2-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join31-3-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/join31-3-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join31-3-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/join31-3-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join31-6-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join31-6-c9859bf9c9942c59f3b19d82bd1a3afa
index 5d28208ab255c..7c33b34887d6b 100644
--- a/sql/hive/src/test/resources/golden/join31-6-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join31-6-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -12,4 +12,4 @@
 401	1
 406	1
 66	1
-98	1
\ No newline at end of file
+98	1
diff --git a/sql/hive/src/test/resources/golden/join32-1-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join32-1-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join32-1-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join32-1-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join32-2-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join32-2-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join32-2-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join32-2-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join32-3-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/join32-3-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join32-3-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/join32-3-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join32-6-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join32-6-c9859bf9c9942c59f3b19d82bd1a3afa
index a6538b605a817..a4dde6240cac7 100644
--- a/sql/hive/src/test/resources/golden/join32-6-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join32-6-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -82,4 +82,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-10-e4ca54ef5e9c845b5bd7fb6b56cdc45a b/sql/hive/src/test/resources/golden/join32_lessSize-10-e4ca54ef5e9c845b5bd7fb6b56cdc45a
index a6538b605a817..a4dde6240cac7 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-10-e4ca54ef5e9c845b5bd7fb6b56cdc45a
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-10-e4ca54ef5e9c845b5bd7fb6b56cdc45a
@@ -82,4 +82,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-13-ed70124968560328930327ecb108c4e9 b/sql/hive/src/test/resources/golden/join32_lessSize-13-ed70124968560328930327ecb108c4e9
index a6538b605a817..a4dde6240cac7 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-13-ed70124968560328930327ecb108c4e9
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-13-ed70124968560328930327ecb108c4e9
@@ -82,4 +82,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-16-ed70124968560328930327ecb108c4e9 b/sql/hive/src/test/resources/golden/join32_lessSize-16-ed70124968560328930327ecb108c4e9
index b1251794645ce..13c35f8a6c6ba 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-16-ed70124968560328930327ecb108c4e9
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-16-ed70124968560328930327ecb108c4e9
@@ -94,4 +94,4 @@ NULL	val_484	val_484
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-19-ed70124968560328930327ecb108c4e9 b/sql/hive/src/test/resources/golden/join32_lessSize-19-ed70124968560328930327ecb108c4e9
index a6538b605a817..a4dde6240cac7 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-19-ed70124968560328930327ecb108c4e9
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-19-ed70124968560328930327ecb108c4e9
@@ -82,4 +82,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-2-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join32_lessSize-2-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-2-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-2-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-22-ed70124968560328930327ecb108c4e9 b/sql/hive/src/test/resources/golden/join32_lessSize-22-ed70124968560328930327ecb108c4e9
index a6538b605a817..a4dde6240cac7 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-22-ed70124968560328930327ecb108c4e9
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-22-ed70124968560328930327ecb108c4e9
@@ -82,4 +82,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-3-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join32_lessSize-3-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-3-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-3-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-4-7f7f3daa7ff45944c3d534f9feff3bb6 b/sql/hive/src/test/resources/golden/join32_lessSize-4-7f7f3daa7ff45944c3d534f9feff3bb6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-4-7f7f3daa7ff45944c3d534f9feff3bb6
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-4-7f7f3daa7ff45944c3d534f9feff3bb6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join32_lessSize-7-e4ca54ef5e9c845b5bd7fb6b56cdc45a b/sql/hive/src/test/resources/golden/join32_lessSize-7-e4ca54ef5e9c845b5bd7fb6b56cdc45a
index a6538b605a817..a4dde6240cac7 100644
--- a/sql/hive/src/test/resources/golden/join32_lessSize-7-e4ca54ef5e9c845b5bd7fb6b56cdc45a
+++ b/sql/hive/src/test/resources/golden/join32_lessSize-7-e4ca54ef5e9c845b5bd7fb6b56cdc45a
@@ -82,4 +82,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join33-1-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join33-1-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join33-1-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join33-1-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join33-2-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join33-2-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join33-2-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join33-2-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join33-3-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/join33-3-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join33-3-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/join33-3-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join33-6-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join33-6-c9859bf9c9942c59f3b19d82bd1a3afa
index a6538b605a817..a4dde6240cac7 100644
--- a/sql/hive/src/test/resources/golden/join33-6-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join33-6-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -82,4 +82,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join34-1-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join34-1-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join34-1-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join34-1-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join34-2-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join34-2-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join34-2-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join34-2-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join34-3-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/join34-3-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join34-3-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/join34-3-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join34-6-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join34-6-c9859bf9c9942c59f3b19d82bd1a3afa
index e18d1ff802c93..6fbe456119b70 100644
--- a/sql/hive/src/test/resources/golden/join34-6-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join34-6-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -31,4 +31,4 @@
 406	val_406	val_406
 406	val_406	val_406
 406	val_406	val_406
-406	val_406	val_406
\ No newline at end of file
+406	val_406	val_406
diff --git a/sql/hive/src/test/resources/golden/join35-1-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join35-1-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join35-1-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join35-1-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join35-2-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join35-2-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join35-2-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join35-2-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join35-3-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/join35-3-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join35-3-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/join35-3-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join35-6-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join35-6-c9859bf9c9942c59f3b19d82bd1a3afa
index efdd80c9f8b89..2f62508e3342a 100644
--- a/sql/hive/src/test/resources/golden/join35-6-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join35-6-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -10,4 +10,4 @@
 311	val_311	3
 369		3
 401	val_401	5
-406	val_406	4
\ No newline at end of file
+406	val_406	4
diff --git a/sql/hive/src/test/resources/golden/join36-0-8934d9b821aa4b34b760f73eff56cd06 b/sql/hive/src/test/resources/golden/join36-0-8934d9b821aa4b34b760f73eff56cd06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join36-0-8934d9b821aa4b34b760f73eff56cd06
+++ b/sql/hive/src/test/resources/golden/join36-0-8934d9b821aa4b34b760f73eff56cd06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join36-8-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join36-8-c9859bf9c9942c59f3b19d82bd1a3afa
index a250f202c4df0..4cb7ec20b1a33 100644
--- a/sql/hive/src/test/resources/golden/join36-8-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join36-8-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -306,4 +306,4 @@
 495	1	1
 496	1	1
 497	1	1
-498	3	3
\ No newline at end of file
+498	3	3
diff --git a/sql/hive/src/test/resources/golden/join37-0-8934d9b821aa4b34b760f73eff56cd06 b/sql/hive/src/test/resources/golden/join37-0-8934d9b821aa4b34b760f73eff56cd06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join37-0-8934d9b821aa4b34b760f73eff56cd06
+++ b/sql/hive/src/test/resources/golden/join37-0-8934d9b821aa4b34b760f73eff56cd06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join37-4-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join37-4-c9859bf9c9942c59f3b19d82bd1a3afa
index 183353e5c705b..d3d377e8ae74d 100644
--- a/sql/hive/src/test/resources/golden/join37-4-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join37-4-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -34,4 +34,4 @@
 406	val_406	val_406
 406	val_406	val_406
 406	val_406	val_406
-406	val_406	val_406
\ No newline at end of file
+406	val_406	val_406
diff --git a/sql/hive/src/test/resources/golden/join38-2-eacdb3417c4a563982c488812d654c9 b/sql/hive/src/test/resources/golden/join38-2-eacdb3417c4a563982c488812d654c9
index 5e020e3aff653..3dcf0e17586a9 100644
--- a/sql/hive/src/test/resources/golden/join38-2-eacdb3417c4a563982c488812d654c9
+++ b/sql/hive/src/test/resources/golden/join38-2-eacdb3417c4a563982c488812d654c9
@@ -1,2 +1,2 @@
 100	101	102	103	104	105	106	107	108	109	110	111
-100	101	102	103	104	105	106	107	108	109	110	111
\ No newline at end of file
+100	101	102	103	104	105	106	107	108	109	110	111
diff --git a/sql/hive/src/test/resources/golden/join38-4-53d219706847e890de1dcd369563ebef b/sql/hive/src/test/resources/golden/join38-4-53d219706847e890de1dcd369563ebef
index fce97c9211916..edbff1207ded9 100644
--- a/sql/hive/src/test/resources/golden/join38-4-53d219706847e890de1dcd369563ebef
+++ b/sql/hive/src/test/resources/golden/join38-4-53d219706847e890de1dcd369563ebef
@@ -1 +1 @@
-val_111	105	2
\ No newline at end of file
+val_111	105	2
diff --git a/sql/hive/src/test/resources/golden/join39-3-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join39-3-c9859bf9c9942c59f3b19d82bd1a3afa
index 5b45955ceb274..56d4dbe5b4d25 100644
--- a/sql/hive/src/test/resources/golden/join39-3-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join39-3-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -563,4 +563,4 @@
 98	val_98	98	val_98
 98	val_98	98	val_98
 98	val_98	98	val_98
-98	val_98	98	val_98
\ No newline at end of file
+98	val_98	98	val_98
diff --git a/sql/hive/src/test/resources/golden/join4-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/join4-3-adc1ec67836b26b60d8547c4996bfd8f
index e45f3234d5a88..dc3a273d7a367 100644
--- a/sql/hive/src/test/resources/golden/join4-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/join4-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -8,4 +8,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/join40-0-d2b5e23edec42a62e61750b110ecbaac b/sql/hive/src/test/resources/golden/join40-0-d2b5e23edec42a62e61750b110ecbaac
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join40-0-d2b5e23edec42a62e61750b110ecbaac
+++ b/sql/hive/src/test/resources/golden/join40-0-d2b5e23edec42a62e61750b110ecbaac
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join40-10-1d1f79e8e773d630f355c1a89d84b5aa b/sql/hive/src/test/resources/golden/join40-10-1d1f79e8e773d630f355c1a89d84b5aa
index 8543fe173f87f..50237fbde9e34 100644
--- a/sql/hive/src/test/resources/golden/join40-10-1d1f79e8e773d630f355c1a89d84b5aa
+++ b/sql/hive/src/test/resources/golden/join40-10-1d1f79e8e773d630f355c1a89d84b5aa
@@ -563,4 +563,4 @@
 400	val_400	NULL	NULL
 200	val_200	NULL	NULL
 97	val_97	97	val_97
-97	val_97	97	val_97
\ No newline at end of file
+97	val_97	97	val_97
diff --git a/sql/hive/src/test/resources/golden/join40-12-aaddbef9069aba3ebeb310be74671cda b/sql/hive/src/test/resources/golden/join40-12-aaddbef9069aba3ebeb310be74671cda
index 67d892c80f493..1f3d8a7a1fc08 100644
--- a/sql/hive/src/test/resources/golden/join40-12-aaddbef9069aba3ebeb310be74671cda
+++ b/sql/hive/src/test/resources/golden/join40-12-aaddbef9069aba3ebeb310be74671cda
@@ -1 +1 @@
-1028
\ No newline at end of file
+1028
diff --git a/sql/hive/src/test/resources/golden/join40-2-507b1d9f6abbdb756a589d7bc4826251 b/sql/hive/src/test/resources/golden/join40-2-507b1d9f6abbdb756a589d7bc4826251
index 8543fe173f87f..50237fbde9e34 100644
--- a/sql/hive/src/test/resources/golden/join40-2-507b1d9f6abbdb756a589d7bc4826251
+++ b/sql/hive/src/test/resources/golden/join40-2-507b1d9f6abbdb756a589d7bc4826251
@@ -563,4 +563,4 @@
 400	val_400	NULL	NULL
 200	val_200	NULL	NULL
 97	val_97	97	val_97
-97	val_97	97	val_97
\ No newline at end of file
+97	val_97	97	val_97
diff --git a/sql/hive/src/test/resources/golden/join40-4-61fb097bda1751910de966d6a4a8f0b7 b/sql/hive/src/test/resources/golden/join40-4-61fb097bda1751910de966d6a4a8f0b7
index 59821aeea3008..c9c79b85dfe24 100644
--- a/sql/hive/src/test/resources/golden/join40-4-61fb097bda1751910de966d6a4a8f0b7
+++ b/sql/hive/src/test/resources/golden/join40-4-61fb097bda1751910de966d6a4a8f0b7
@@ -1025,4 +1025,4 @@
 200	val_200
 200	val_200
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/join40-6-9685f2d31ffe922d3ea217de32ca3049 b/sql/hive/src/test/resources/golden/join40-6-9685f2d31ffe922d3ea217de32ca3049
index bd6e20b8de1e4..7568f8cde28f5 100644
--- a/sql/hive/src/test/resources/golden/join40-6-9685f2d31ffe922d3ea217de32ca3049
+++ b/sql/hive/src/test/resources/golden/join40-6-9685f2d31ffe922d3ea217de32ca3049
@@ -545,4 +545,4 @@ NULL	NULL	NULL	NULL	498	val_498
 5	val_5	5	val_5	5	val_5
 5	val_5	5	val_5	5	val_5
 8	val_8	8	val_8	8	val_8
-9	val_9	9	val_9	9	val_9
\ No newline at end of file
+9	val_9	9	val_9	9	val_9
diff --git a/sql/hive/src/test/resources/golden/join40-8-3331a020843caccf2fa32a1225c5c3a9 b/sql/hive/src/test/resources/golden/join40-8-3331a020843caccf2fa32a1225c5c3a9
index bd6e20b8de1e4..7568f8cde28f5 100644
--- a/sql/hive/src/test/resources/golden/join40-8-3331a020843caccf2fa32a1225c5c3a9
+++ b/sql/hive/src/test/resources/golden/join40-8-3331a020843caccf2fa32a1225c5c3a9
@@ -545,4 +545,4 @@ NULL	NULL	NULL	NULL	498	val_498
 5	val_5	5	val_5	5	val_5
 5	val_5	5	val_5	5	val_5
 8	val_8	8	val_8	8	val_8
-9	val_9	9	val_9	9	val_9
\ No newline at end of file
+9	val_9	9	val_9	9	val_9
diff --git a/sql/hive/src/test/resources/golden/join41-1-25e434b6d05e08fdd5f4d9957438917 b/sql/hive/src/test/resources/golden/join41-1-25e434b6d05e08fdd5f4d9957438917
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join41-1-25e434b6d05e08fdd5f4d9957438917
+++ b/sql/hive/src/test/resources/golden/join41-1-25e434b6d05e08fdd5f4d9957438917
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join41-3-1dc98363e7da167dc45711a87ef3a988 b/sql/hive/src/test/resources/golden/join41-3-1dc98363e7da167dc45711a87ef3a988
index 7265626e5dbde..c20eb16d47f91 100644
--- a/sql/hive/src/test/resources/golden/join41-3-1dc98363e7da167dc45711a87ef3a988
+++ b/sql/hive/src/test/resources/golden/join41-3-1dc98363e7da167dc45711a87ef3a988
@@ -1,3 +1,3 @@
 0	val_0	NULL	NULL
 0	val_0	NULL	NULL
-0	val_0	NULL	NULL
\ No newline at end of file
+0	val_0	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join41-4-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join41-4-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join41-4-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join41-4-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join41-6-1dc98363e7da167dc45711a87ef3a988 b/sql/hive/src/test/resources/golden/join41-6-1dc98363e7da167dc45711a87ef3a988
index 7265626e5dbde..c20eb16d47f91 100644
--- a/sql/hive/src/test/resources/golden/join41-6-1dc98363e7da167dc45711a87ef3a988
+++ b/sql/hive/src/test/resources/golden/join41-6-1dc98363e7da167dc45711a87ef3a988
@@ -1,3 +1,3 @@
 0	val_0	NULL	NULL
 0	val_0	NULL	NULL
-0	val_0	NULL	NULL
\ No newline at end of file
+0	val_0	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join5-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/join5-3-adc1ec67836b26b60d8547c4996bfd8f
index 06b2b4d7e6d47..b52cff5c472e4 100644
--- a/sql/hive/src/test/resources/golden/join5-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/join5-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -6,4 +6,4 @@ NULL	NULL	24	val_24
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/join6-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/join6-3-adc1ec67836b26b60d8547c4996bfd8f
index 36ca4a0805f4b..fb58885263569 100644
--- a/sql/hive/src/test/resources/golden/join6-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/join6-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -11,4 +11,4 @@
 19	val_19	19	val_19
 NULL	NULL	20	val_20
 NULL	NULL	24	val_24
-NULL	NULL	24	val_24
\ No newline at end of file
+NULL	NULL	24	val_24
diff --git a/sql/hive/src/test/resources/golden/join7-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/join7-3-adc1ec67836b26b60d8547c4996bfd8f
index eb0a6246c1988..0e75c1b63abb5 100644
--- a/sql/hive/src/test/resources/golden/join7-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/join7-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -11,4 +11,4 @@
 19	val_19	19	val_19	NULL	NULL
 NULL	NULL	20	val_20	NULL	NULL
 NULL	NULL	24	val_24	NULL	NULL
-NULL	NULL	24	val_24	NULL	NULL
\ No newline at end of file
+NULL	NULL	24	val_24	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join8-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/join8-3-adc1ec67836b26b60d8547c4996bfd8f
index f2e6e07d8fa1f..c3f5359beb06f 100644
--- a/sql/hive/src/test/resources/golden/join8-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/join8-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -2,4 +2,4 @@
 12	val_12	NULL	NULL
 15	val_15	NULL	NULL
 11	val_11	NULL	NULL
-12	val_12	NULL	NULL
\ No newline at end of file
+12	val_12	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join9-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/join9-3-adc1ec67836b26b60d8547c4996bfd8f
index 59821aeea3008..c9c79b85dfe24 100644
--- a/sql/hive/src/test/resources/golden/join9-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/join9-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -1025,4 +1025,4 @@
 200	val_200
 200	val_200
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/join_1to1-1-5bab379018a4fbef12cc93658f26580a b/sql/hive/src/test/resources/golden/join_1to1-1-5bab379018a4fbef12cc93658f26580a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/join_1to1-1-789b2636cfb6a08965e0bd190e419762
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_07_all_part_over_nonoverlap-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/join_1to1-1-789b2636cfb6a08965e0bd190e419762
diff --git a/sql/hive/src/test/resources/golden/join_1to1-10-a1caf8c546f519e2dfb5e17c8addf62e b/sql/hive/src/test/resources/golden/join_1to1-10-a1caf8c546f519e2dfb5e17c8addf62e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-10-a1caf8c546f519e2dfb5e17c8addf62e
+++ b/sql/hive/src/test/resources/golden/join_1to1-10-a1caf8c546f519e2dfb5e17c8addf62e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_1to1-11-ce1542ccec99ccfdc9b5c3f713ab1c0e b/sql/hive/src/test/resources/golden/join_1to1-11-ce1542ccec99ccfdc9b5c3f713ab1c0e
index 52a4d2c18e701..b57efb6ce27a2 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-11-ce1542ccec99ccfdc9b5c3f713ab1c0e
+++ b/sql/hive/src/test/resources/golden/join_1to1-11-ce1542ccec99ccfdc9b5c3f713ab1c0e
@@ -27,4 +27,4 @@
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-12-6081eb976b4aef2132418510756a385b b/sql/hive/src/test/resources/golden/join_1to1-12-6081eb976b4aef2132418510756a385b
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-12-6081eb976b4aef2132418510756a385b
+++ b/sql/hive/src/test/resources/golden/join_1to1-12-6081eb976b4aef2132418510756a385b
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-13-281b888188eac90c4bf670417f25cc0c b/sql/hive/src/test/resources/golden/join_1to1-13-281b888188eac90c4bf670417f25cc0c
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-13-281b888188eac90c4bf670417f25cc0c
+++ b/sql/hive/src/test/resources/golden/join_1to1-13-281b888188eac90c4bf670417f25cc0c
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-14-2bc944d7dcc8eba8f25794d99ea35b84 b/sql/hive/src/test/resources/golden/join_1to1-14-2bc944d7dcc8eba8f25794d99ea35b84
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-14-2bc944d7dcc8eba8f25794d99ea35b84
+++ b/sql/hive/src/test/resources/golden/join_1to1-14-2bc944d7dcc8eba8f25794d99ea35b84
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-15-5e48ba086f1376939535081b60f82727 b/sql/hive/src/test/resources/golden/join_1to1-15-5e48ba086f1376939535081b60f82727
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-15-5e48ba086f1376939535081b60f82727
+++ b/sql/hive/src/test/resources/golden/join_1to1-15-5e48ba086f1376939535081b60f82727
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-16-9914f44ecb6ae7587b62e5349ff60d04 b/sql/hive/src/test/resources/golden/join_1to1-16-9914f44ecb6ae7587b62e5349ff60d04
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-16-9914f44ecb6ae7587b62e5349ff60d04
+++ b/sql/hive/src/test/resources/golden/join_1to1-16-9914f44ecb6ae7587b62e5349ff60d04
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_1to1-17-ce1542ccec99ccfdc9b5c3f713ab1c0e b/sql/hive/src/test/resources/golden/join_1to1-17-ce1542ccec99ccfdc9b5c3f713ab1c0e
index 52a4d2c18e701..b57efb6ce27a2 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-17-ce1542ccec99ccfdc9b5c3f713ab1c0e
+++ b/sql/hive/src/test/resources/golden/join_1to1-17-ce1542ccec99ccfdc9b5c3f713ab1c0e
@@ -27,4 +27,4 @@
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-18-6081eb976b4aef2132418510756a385b b/sql/hive/src/test/resources/golden/join_1to1-18-6081eb976b4aef2132418510756a385b
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-18-6081eb976b4aef2132418510756a385b
+++ b/sql/hive/src/test/resources/golden/join_1to1-18-6081eb976b4aef2132418510756a385b
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-19-281b888188eac90c4bf670417f25cc0c b/sql/hive/src/test/resources/golden/join_1to1-19-281b888188eac90c4bf670417f25cc0c
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-19-281b888188eac90c4bf670417f25cc0c
+++ b/sql/hive/src/test/resources/golden/join_1to1-19-281b888188eac90c4bf670417f25cc0c
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-20-2bc944d7dcc8eba8f25794d99ea35b84 b/sql/hive/src/test/resources/golden/join_1to1-20-2bc944d7dcc8eba8f25794d99ea35b84
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-20-2bc944d7dcc8eba8f25794d99ea35b84
+++ b/sql/hive/src/test/resources/golden/join_1to1-20-2bc944d7dcc8eba8f25794d99ea35b84
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-21-5e48ba086f1376939535081b60f82727 b/sql/hive/src/test/resources/golden/join_1to1-21-5e48ba086f1376939535081b60f82727
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-21-5e48ba086f1376939535081b60f82727
+++ b/sql/hive/src/test/resources/golden/join_1to1-21-5e48ba086f1376939535081b60f82727
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-22-5a065a27a36bb0ff980fa7ffef3a2600 b/sql/hive/src/test/resources/golden/join_1to1-22-5a065a27a36bb0ff980fa7ffef3a2600
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-22-5a065a27a36bb0ff980fa7ffef3a2600
+++ b/sql/hive/src/test/resources/golden/join_1to1-22-5a065a27a36bb0ff980fa7ffef3a2600
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_1to1-23-ce1542ccec99ccfdc9b5c3f713ab1c0e b/sql/hive/src/test/resources/golden/join_1to1-23-ce1542ccec99ccfdc9b5c3f713ab1c0e
index 52a4d2c18e701..b57efb6ce27a2 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-23-ce1542ccec99ccfdc9b5c3f713ab1c0e
+++ b/sql/hive/src/test/resources/golden/join_1to1-23-ce1542ccec99ccfdc9b5c3f713ab1c0e
@@ -27,4 +27,4 @@
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-24-6081eb976b4aef2132418510756a385b b/sql/hive/src/test/resources/golden/join_1to1-24-6081eb976b4aef2132418510756a385b
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-24-6081eb976b4aef2132418510756a385b
+++ b/sql/hive/src/test/resources/golden/join_1to1-24-6081eb976b4aef2132418510756a385b
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-25-281b888188eac90c4bf670417f25cc0c b/sql/hive/src/test/resources/golden/join_1to1-25-281b888188eac90c4bf670417f25cc0c
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-25-281b888188eac90c4bf670417f25cc0c
+++ b/sql/hive/src/test/resources/golden/join_1to1-25-281b888188eac90c4bf670417f25cc0c
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-26-2bc944d7dcc8eba8f25794d99ea35b84 b/sql/hive/src/test/resources/golden/join_1to1-26-2bc944d7dcc8eba8f25794d99ea35b84
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-26-2bc944d7dcc8eba8f25794d99ea35b84
+++ b/sql/hive/src/test/resources/golden/join_1to1-26-2bc944d7dcc8eba8f25794d99ea35b84
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-27-5e48ba086f1376939535081b60f82727 b/sql/hive/src/test/resources/golden/join_1to1-27-5e48ba086f1376939535081b60f82727
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-27-5e48ba086f1376939535081b60f82727
+++ b/sql/hive/src/test/resources/golden/join_1to1-27-5e48ba086f1376939535081b60f82727
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-28-a1caf8c546f519e2dfb5e17c8addf62e b/sql/hive/src/test/resources/golden/join_1to1-28-a1caf8c546f519e2dfb5e17c8addf62e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-28-a1caf8c546f519e2dfb5e17c8addf62e
+++ b/sql/hive/src/test/resources/golden/join_1to1-28-a1caf8c546f519e2dfb5e17c8addf62e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_1to1-29-ce1542ccec99ccfdc9b5c3f713ab1c0e b/sql/hive/src/test/resources/golden/join_1to1-29-ce1542ccec99ccfdc9b5c3f713ab1c0e
index 52a4d2c18e701..b57efb6ce27a2 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-29-ce1542ccec99ccfdc9b5c3f713ab1c0e
+++ b/sql/hive/src/test/resources/golden/join_1to1-29-ce1542ccec99ccfdc9b5c3f713ab1c0e
@@ -27,4 +27,4 @@
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-3-4f20db97105fb03ad21ffbf3edab7b77 b/sql/hive/src/test/resources/golden/join_1to1-3-6228e662e573a00ed04550d049d97a3b
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_08_nonpart_rename-3-4f20db97105fb03ad21ffbf3edab7b77
rename to sql/hive/src/test/resources/golden/join_1to1-3-6228e662e573a00ed04550d049d97a3b
diff --git a/sql/hive/src/test/resources/golden/join_1to1-3-ee6db4188755bf471a12316ec7301500 b/sql/hive/src/test/resources/golden/join_1to1-3-ee6db4188755bf471a12316ec7301500
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_1to1-30-6081eb976b4aef2132418510756a385b b/sql/hive/src/test/resources/golden/join_1to1-30-6081eb976b4aef2132418510756a385b
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-30-6081eb976b4aef2132418510756a385b
+++ b/sql/hive/src/test/resources/golden/join_1to1-30-6081eb976b4aef2132418510756a385b
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-31-281b888188eac90c4bf670417f25cc0c b/sql/hive/src/test/resources/golden/join_1to1-31-281b888188eac90c4bf670417f25cc0c
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-31-281b888188eac90c4bf670417f25cc0c
+++ b/sql/hive/src/test/resources/golden/join_1to1-31-281b888188eac90c4bf670417f25cc0c
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-32-2bc944d7dcc8eba8f25794d99ea35b84 b/sql/hive/src/test/resources/golden/join_1to1-32-2bc944d7dcc8eba8f25794d99ea35b84
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-32-2bc944d7dcc8eba8f25794d99ea35b84
+++ b/sql/hive/src/test/resources/golden/join_1to1-32-2bc944d7dcc8eba8f25794d99ea35b84
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-33-5e48ba086f1376939535081b60f82727 b/sql/hive/src/test/resources/golden/join_1to1-33-5e48ba086f1376939535081b60f82727
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-33-5e48ba086f1376939535081b60f82727
+++ b/sql/hive/src/test/resources/golden/join_1to1-33-5e48ba086f1376939535081b60f82727
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-34-9914f44ecb6ae7587b62e5349ff60d04 b/sql/hive/src/test/resources/golden/join_1to1-34-9914f44ecb6ae7587b62e5349ff60d04
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-34-9914f44ecb6ae7587b62e5349ff60d04
+++ b/sql/hive/src/test/resources/golden/join_1to1-34-9914f44ecb6ae7587b62e5349ff60d04
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_1to1-35-ce1542ccec99ccfdc9b5c3f713ab1c0e b/sql/hive/src/test/resources/golden/join_1to1-35-ce1542ccec99ccfdc9b5c3f713ab1c0e
index 52a4d2c18e701..b57efb6ce27a2 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-35-ce1542ccec99ccfdc9b5c3f713ab1c0e
+++ b/sql/hive/src/test/resources/golden/join_1to1-35-ce1542ccec99ccfdc9b5c3f713ab1c0e
@@ -27,4 +27,4 @@
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-36-6081eb976b4aef2132418510756a385b b/sql/hive/src/test/resources/golden/join_1to1-36-6081eb976b4aef2132418510756a385b
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-36-6081eb976b4aef2132418510756a385b
+++ b/sql/hive/src/test/resources/golden/join_1to1-36-6081eb976b4aef2132418510756a385b
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-37-281b888188eac90c4bf670417f25cc0c b/sql/hive/src/test/resources/golden/join_1to1-37-281b888188eac90c4bf670417f25cc0c
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-37-281b888188eac90c4bf670417f25cc0c
+++ b/sql/hive/src/test/resources/golden/join_1to1-37-281b888188eac90c4bf670417f25cc0c
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-38-2bc944d7dcc8eba8f25794d99ea35b84 b/sql/hive/src/test/resources/golden/join_1to1-38-2bc944d7dcc8eba8f25794d99ea35b84
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-38-2bc944d7dcc8eba8f25794d99ea35b84
+++ b/sql/hive/src/test/resources/golden/join_1to1-38-2bc944d7dcc8eba8f25794d99ea35b84
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-39-5e48ba086f1376939535081b60f82727 b/sql/hive/src/test/resources/golden/join_1to1-39-5e48ba086f1376939535081b60f82727
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-39-5e48ba086f1376939535081b60f82727
+++ b/sql/hive/src/test/resources/golden/join_1to1-39-5e48ba086f1376939535081b60f82727
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-4-5a065a27a36bb0ff980fa7ffef3a2600 b/sql/hive/src/test/resources/golden/join_1to1-4-5a065a27a36bb0ff980fa7ffef3a2600
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-4-5a065a27a36bb0ff980fa7ffef3a2600
+++ b/sql/hive/src/test/resources/golden/join_1to1-4-5a065a27a36bb0ff980fa7ffef3a2600
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_1to1-5-ce1542ccec99ccfdc9b5c3f713ab1c0e b/sql/hive/src/test/resources/golden/join_1to1-5-ce1542ccec99ccfdc9b5c3f713ab1c0e
index 52a4d2c18e701..b57efb6ce27a2 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-5-ce1542ccec99ccfdc9b5c3f713ab1c0e
+++ b/sql/hive/src/test/resources/golden/join_1to1-5-ce1542ccec99ccfdc9b5c3f713ab1c0e
@@ -27,4 +27,4 @@
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-6-6081eb976b4aef2132418510756a385b b/sql/hive/src/test/resources/golden/join_1to1-6-6081eb976b4aef2132418510756a385b
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-6-6081eb976b4aef2132418510756a385b
+++ b/sql/hive/src/test/resources/golden/join_1to1-6-6081eb976b4aef2132418510756a385b
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-7-281b888188eac90c4bf670417f25cc0c b/sql/hive/src/test/resources/golden/join_1to1-7-281b888188eac90c4bf670417f25cc0c
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-7-281b888188eac90c4bf670417f25cc0c
+++ b/sql/hive/src/test/resources/golden/join_1to1-7-281b888188eac90c4bf670417f25cc0c
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/join_1to1-8-2bc944d7dcc8eba8f25794d99ea35b84 b/sql/hive/src/test/resources/golden/join_1to1-8-2bc944d7dcc8eba8f25794d99ea35b84
index fb5560e2d006c..72e8fcf3ec4d0 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-8-2bc944d7dcc8eba8f25794d99ea35b84
+++ b/sql/hive/src/test/resources/golden/join_1to1-8-2bc944d7dcc8eba8f25794d99ea35b84
@@ -33,4 +33,4 @@ NULL	10050	66	NULL	NULL	NULL
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_1to1-9-5e48ba086f1376939535081b60f82727 b/sql/hive/src/test/resources/golden/join_1to1-9-5e48ba086f1376939535081b60f82727
index 0ae4eb2d33595..0f21800b43be9 100644
--- a/sql/hive/src/test/resources/golden/join_1to1-9-5e48ba086f1376939535081b60f82727
+++ b/sql/hive/src/test/resources/golden/join_1to1-9-5e48ba086f1376939535081b60f82727
@@ -31,4 +31,4 @@ NULL	10050	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 70	10040	66	NULL	NULL	NULL
 80	10040	88	NULL	NULL	NULL
-80	10040	88	NULL	NULL	NULL
\ No newline at end of file
+80	10040	88	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/exim_08_nonpart_rename-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/join_array-2-16840a0266cad03a1a0b134d105b854f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_08_nonpart_rename-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/join_array-2-16840a0266cad03a1a0b134d105b854f
diff --git a/sql/hive/src/test/resources/golden/join_array-2-a4363f7c7e4b7d717ed90e77c37581de b/sql/hive/src/test/resources/golden/join_array-2-a4363f7c7e4b7d717ed90e77c37581de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/join_array-3-a6ca6b64324596831033fdfe5b63a942
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/join_array-3-a6ca6b64324596831033fdfe5b63a942
diff --git a/sql/hive/src/test/resources/golden/join_array-3-ddd65703cdad8959cd0cd831304b0ab9 b/sql/hive/src/test/resources/golden/join_array-3-ddd65703cdad8959cd0cd831304b0ab9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_array-4-b235265cd6bd58fd743c27b02e547d62 b/sql/hive/src/test/resources/golden/join_array-4-b235265cd6bd58fd743c27b02e547d62
index ce9688a065b34..84660b3eb4327 100644
--- a/sql/hive/src/test/resources/golden/join_array-4-b235265cd6bd58fd743c27b02e547d62
+++ b/sql/hive/src/test/resources/golden/join_array-4-b235265cd6bd58fd743c27b02e547d62
@@ -1 +1 @@
-10320092026892491	3312
\ No newline at end of file
+10320092026892491	3312
diff --git a/sql/hive/src/test/resources/golden/join_array-5-a9b9419b94631f8fe1c2297ebf103a9a b/sql/hive/src/test/resources/golden/join_array-5-a9b9419b94631f8fe1c2297ebf103a9a
index f937af554adef..1d1c48ae5a48f 100644
--- a/sql/hive/src/test/resources/golden/join_array-5-a9b9419b94631f8fe1c2297ebf103a9a
+++ b/sql/hive/src/test/resources/golden/join_array-5-a9b9419b94631f8fe1c2297ebf103a9a
@@ -1,2 +1,2 @@
 10320092002467760	[0,23,37,48,53,55,55,56,60,66,72,76,77,78,80,81,87,88,90,90,91,90,92,97,100,103,104,107,108,108,109,110,113,113,113,113,113,113,114,116,116,116,117,116,117,117,117,115,115,117,117,117,121,120,131,131,131,125,125,124,124,128,128,131,131,132,133,134,134,134,134,26,26,null,null,null,null,116]
-10320092026892491	[0,2,59,106,131,142,159,244,320,398,417,433,553,616,710,826,917,971,1046,1051,1093,1112,1142,1215,1220,1226,1232,1267,1364,1549,1646,1948,2170,2272,2325,2433,2534,2852,2925,2992,3119,3207,3279,3323,3412,3637,3645,3634,3450,3473,3638,3688,3736,3758,3812,3862,3873,3868,3883,4118,4134,4127,4170,4216,null,null,null,null,3139]
\ No newline at end of file
+10320092026892491	[0,2,59,106,131,142,159,244,320,398,417,433,553,616,710,826,917,971,1046,1051,1093,1112,1142,1215,1220,1226,1232,1267,1364,1549,1646,1948,2170,2272,2325,2433,2534,2852,2925,2992,3119,3207,3279,3323,3412,3637,3645,3634,3450,3473,3638,3688,3736,3758,3812,3862,3873,3868,3883,4118,4134,4127,4170,4216,null,null,null,null,3139]
diff --git a/sql/hive/src/test/resources/golden/join_array-6-6eded94bd39189ea6d67fe383f9b865c b/sql/hive/src/test/resources/golden/join_array-6-6eded94bd39189ea6d67fe383f9b865c
index f937af554adef..1d1c48ae5a48f 100644
--- a/sql/hive/src/test/resources/golden/join_array-6-6eded94bd39189ea6d67fe383f9b865c
+++ b/sql/hive/src/test/resources/golden/join_array-6-6eded94bd39189ea6d67fe383f9b865c
@@ -1,2 +1,2 @@
 10320092002467760	[0,23,37,48,53,55,55,56,60,66,72,76,77,78,80,81,87,88,90,90,91,90,92,97,100,103,104,107,108,108,109,110,113,113,113,113,113,113,114,116,116,116,117,116,117,117,117,115,115,117,117,117,121,120,131,131,131,125,125,124,124,128,128,131,131,132,133,134,134,134,134,26,26,null,null,null,null,116]
-10320092026892491	[0,2,59,106,131,142,159,244,320,398,417,433,553,616,710,826,917,971,1046,1051,1093,1112,1142,1215,1220,1226,1232,1267,1364,1549,1646,1948,2170,2272,2325,2433,2534,2852,2925,2992,3119,3207,3279,3323,3412,3637,3645,3634,3450,3473,3638,3688,3736,3758,3812,3862,3873,3868,3883,4118,4134,4127,4170,4216,null,null,null,null,3139]
\ No newline at end of file
+10320092026892491	[0,2,59,106,131,142,159,244,320,398,417,433,553,616,710,826,917,971,1046,1051,1093,1112,1142,1215,1220,1226,1232,1267,1364,1549,1646,1948,2170,2272,2325,2433,2534,2852,2925,2992,3119,3207,3279,3323,3412,3637,3645,3634,3450,3473,3638,3688,3736,3758,3812,3862,3873,3868,3883,4118,4134,4127,4170,4216,null,null,null,null,3139]
diff --git a/sql/hive/src/test/resources/golden/join_casesensitive-1-404d691e85c7b74bad73576ee80de290 b/sql/hive/src/test/resources/golden/join_casesensitive-1-404d691e85c7b74bad73576ee80de290
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/join_casesensitive-1-d1e9ae71a3ed691c39bb8f77ab28edbf
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/join_casesensitive-1-d1e9ae71a3ed691c39bb8f77ab28edbf
diff --git a/sql/hive/src/test/resources/golden/join_casesensitive-3-3c88c1f52d27e9fb093aaf10f97c7cfc b/sql/hive/src/test/resources/golden/join_casesensitive-3-3c88c1f52d27e9fb093aaf10f97c7cfc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/join_casesensitive-3-93300f4a9242fa2804a5b368538d83f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/join_casesensitive-3-93300f4a9242fa2804a5b368538d83f
diff --git a/sql/hive/src/test/resources/golden/join_casesensitive-4-c880b2256f97413b8fe68d19d99747fd b/sql/hive/src/test/resources/golden/join_casesensitive-4-c880b2256f97413b8fe68d19d99747fd
index 810f01f682c37..f7d190a11d65c 100644
--- a/sql/hive/src/test/resources/golden/join_casesensitive-4-c880b2256f97413b8fe68d19d99747fd
+++ b/sql/hive/src/test/resources/golden/join_casesensitive-4-c880b2256f97413b8fe68d19d99747fd
@@ -82,4 +82,4 @@ NULL	10050	66	50	10050	88
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
 80	10040	88	80	10040	66
-80	10040	88	80	10040	66
\ No newline at end of file
+80	10040	88	80	10040	66
diff --git a/sql/hive/src/test/resources/golden/join_empty-4-df2401785dfa257de49c3ad80b0f480a b/sql/hive/src/test/resources/golden/join_empty-4-df2401785dfa257de49c3ad80b0f480a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_empty-4-df2401785dfa257de49c3ad80b0f480a
+++ b/sql/hive/src/test/resources/golden/join_empty-4-df2401785dfa257de49c3ad80b0f480a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_empty-5-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/join_empty-5-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_empty-5-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/join_empty-5-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_filters-1-100faa7fd01bfb4390c782bb262a9403 b/sql/hive/src/test/resources/golden/join_filters-1-100faa7fd01bfb4390c782bb262a9403
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/join_filters-1-bee6095f42de6a16708c2f9addc1b9bd
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/join_filters-1-bee6095f42de6a16708c2f9addc1b9bd
diff --git a/sql/hive/src/test/resources/golden/join_filters-10-f0c5c51de4151a17a77c780be0c13e01 b/sql/hive/src/test/resources/golden/join_filters-10-f0c5c51de4151a17a77c780be0c13e01
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-10-f0c5c51de4151a17a77c780be0c13e01
+++ b/sql/hive/src/test/resources/golden/join_filters-10-f0c5c51de4151a17a77c780be0c13e01
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-100-a4c7cd87175222bea19fd33018890efe b/sql/hive/src/test/resources/golden/join_filters-100-a4c7cd87175222bea19fd33018890efe
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-100-a4c7cd87175222bea19fd33018890efe
+++ b/sql/hive/src/test/resources/golden/join_filters-100-a4c7cd87175222bea19fd33018890efe
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-101-f086409eb336282af5a00f4c7192ef2b b/sql/hive/src/test/resources/golden/join_filters-101-f086409eb336282af5a00f4c7192ef2b
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-101-f086409eb336282af5a00f4c7192ef2b
+++ b/sql/hive/src/test/resources/golden/join_filters-101-f086409eb336282af5a00f4c7192ef2b
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-102-634888c4fa78ce10181c9514c6393554 b/sql/hive/src/test/resources/golden/join_filters-102-634888c4fa78ce10181c9514c6393554
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-102-634888c4fa78ce10181c9514c6393554
+++ b/sql/hive/src/test/resources/golden/join_filters-102-634888c4fa78ce10181c9514c6393554
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-103-c020d3461658ae8e118281f40264ae5b b/sql/hive/src/test/resources/golden/join_filters-103-c020d3461658ae8e118281f40264ae5b
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-103-c020d3461658ae8e118281f40264ae5b
+++ b/sql/hive/src/test/resources/golden/join_filters-103-c020d3461658ae8e118281f40264ae5b
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-104-c9b79f30e1f25672ec89014f966b41b0 b/sql/hive/src/test/resources/golden/join_filters-104-c9b79f30e1f25672ec89014f966b41b0
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-104-c9b79f30e1f25672ec89014f966b41b0
+++ b/sql/hive/src/test/resources/golden/join_filters-104-c9b79f30e1f25672ec89014f966b41b0
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-105-b3d9dcbb7e1caef97646eb89edf82eb b/sql/hive/src/test/resources/golden/join_filters-105-b3d9dcbb7e1caef97646eb89edf82eb
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-105-b3d9dcbb7e1caef97646eb89edf82eb
+++ b/sql/hive/src/test/resources/golden/join_filters-105-b3d9dcbb7e1caef97646eb89edf82eb
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-106-870999978978b22f21997899f1e652b8 b/sql/hive/src/test/resources/golden/join_filters-106-870999978978b22f21997899f1e652b8
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-106-870999978978b22f21997899f1e652b8
+++ b/sql/hive/src/test/resources/golden/join_filters-106-870999978978b22f21997899f1e652b8
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-107-94824a62d882227f270a66ec7ef28cd4 b/sql/hive/src/test/resources/golden/join_filters-107-94824a62d882227f270a66ec7ef28cd4
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-107-94824a62d882227f270a66ec7ef28cd4
+++ b/sql/hive/src/test/resources/golden/join_filters-107-94824a62d882227f270a66ec7ef28cd4
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-108-d793c1514545499f58fb1b355cbd3f0e b/sql/hive/src/test/resources/golden/join_filters-108-d793c1514545499f58fb1b355cbd3f0e
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-108-d793c1514545499f58fb1b355cbd3f0e
+++ b/sql/hive/src/test/resources/golden/join_filters-108-d793c1514545499f58fb1b355cbd3f0e
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-109-2709001b4aa57ed01ba975e83b556475 b/sql/hive/src/test/resources/golden/join_filters-109-2709001b4aa57ed01ba975e83b556475
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-109-2709001b4aa57ed01ba975e83b556475
+++ b/sql/hive/src/test/resources/golden/join_filters-109-2709001b4aa57ed01ba975e83b556475
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-11-6fb35e1a65211e68de5df4a1cb9c7117 b/sql/hive/src/test/resources/golden/join_filters-11-6fb35e1a65211e68de5df4a1cb9c7117
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-11-6fb35e1a65211e68de5df4a1cb9c7117
+++ b/sql/hive/src/test/resources/golden/join_filters-11-6fb35e1a65211e68de5df4a1cb9c7117
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-110-86868ef0f28c5b42f04fb9ca64aaa3ba b/sql/hive/src/test/resources/golden/join_filters-110-86868ef0f28c5b42f04fb9ca64aaa3ba
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-110-86868ef0f28c5b42f04fb9ca64aaa3ba
+++ b/sql/hive/src/test/resources/golden/join_filters-110-86868ef0f28c5b42f04fb9ca64aaa3ba
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-111-fd3188d025e4c84d45cbb265d08ca569 b/sql/hive/src/test/resources/golden/join_filters-111-fd3188d025e4c84d45cbb265d08ca569
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-111-fd3188d025e4c84d45cbb265d08ca569
+++ b/sql/hive/src/test/resources/golden/join_filters-111-fd3188d025e4c84d45cbb265d08ca569
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-112-a0cd07949ff7dbc2287dc060bedb1942 b/sql/hive/src/test/resources/golden/join_filters-112-a0cd07949ff7dbc2287dc060bedb1942
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-112-a0cd07949ff7dbc2287dc060bedb1942
+++ b/sql/hive/src/test/resources/golden/join_filters-112-a0cd07949ff7dbc2287dc060bedb1942
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-113-48c4978768872751832149d72cdf0ba b/sql/hive/src/test/resources/golden/join_filters-113-48c4978768872751832149d72cdf0ba
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-113-48c4978768872751832149d72cdf0ba
+++ b/sql/hive/src/test/resources/golden/join_filters-113-48c4978768872751832149d72cdf0ba
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-114-58b8db46ea5080791c7416838d3e8f95 b/sql/hive/src/test/resources/golden/join_filters-114-58b8db46ea5080791c7416838d3e8f95
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-114-58b8db46ea5080791c7416838d3e8f95
+++ b/sql/hive/src/test/resources/golden/join_filters-114-58b8db46ea5080791c7416838d3e8f95
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-115-98e45a3167d19d09219076a2d93afa49 b/sql/hive/src/test/resources/golden/join_filters-115-98e45a3167d19d09219076a2d93afa49
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-115-98e45a3167d19d09219076a2d93afa49
+++ b/sql/hive/src/test/resources/golden/join_filters-115-98e45a3167d19d09219076a2d93afa49
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-116-bec2950fff2c1b3c4dc5be846653ea6 b/sql/hive/src/test/resources/golden/join_filters-116-bec2950fff2c1b3c4dc5be846653ea6
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-116-bec2950fff2c1b3c4dc5be846653ea6
+++ b/sql/hive/src/test/resources/golden/join_filters-116-bec2950fff2c1b3c4dc5be846653ea6
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-117-ee8471d93c3778f4b48c5c59f473dd35 b/sql/hive/src/test/resources/golden/join_filters-117-ee8471d93c3778f4b48c5c59f473dd35
index b81820f886894..1ef56d9d1d051 100644
--- a/sql/hive/src/test/resources/golden/join_filters-117-ee8471d93c3778f4b48c5c59f473dd35
+++ b/sql/hive/src/test/resources/golden/join_filters-117-ee8471d93c3778f4b48c5c59f473dd35
@@ -3,4 +3,4 @@ NULL	135	NULL	NULL
 48	NULL	NULL	NULL
 100	100	100	100
 148	NULL	NULL	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-118-9b395d7db8722a467d46588d0f27fc9e b/sql/hive/src/test/resources/golden/join_filters-118-9b395d7db8722a467d46588d0f27fc9e
index b81820f886894..1ef56d9d1d051 100644
--- a/sql/hive/src/test/resources/golden/join_filters-118-9b395d7db8722a467d46588d0f27fc9e
+++ b/sql/hive/src/test/resources/golden/join_filters-118-9b395d7db8722a467d46588d0f27fc9e
@@ -3,4 +3,4 @@ NULL	135	NULL	NULL
 48	NULL	NULL	NULL
 100	100	100	100
 148	NULL	NULL	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-119-7688784396db55ff148292a9755c918a b/sql/hive/src/test/resources/golden/join_filters-119-7688784396db55ff148292a9755c918a
index b81820f886894..1ef56d9d1d051 100644
--- a/sql/hive/src/test/resources/golden/join_filters-119-7688784396db55ff148292a9755c918a
+++ b/sql/hive/src/test/resources/golden/join_filters-119-7688784396db55ff148292a9755c918a
@@ -3,4 +3,4 @@ NULL	135	NULL	NULL
 48	NULL	NULL	NULL
 100	100	100	100
 148	NULL	NULL	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-12-4c29d6be8717481332cd1ee7ca17690e b/sql/hive/src/test/resources/golden/join_filters-12-4c29d6be8717481332cd1ee7ca17690e
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-12-4c29d6be8717481332cd1ee7ca17690e
+++ b/sql/hive/src/test/resources/golden/join_filters-12-4c29d6be8717481332cd1ee7ca17690e
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-120-6578651ffbf95c0b02825e2125e32709 b/sql/hive/src/test/resources/golden/join_filters-120-6578651ffbf95c0b02825e2125e32709
index 71cd5ce0041b8..768ebfc0854e4 100644
--- a/sql/hive/src/test/resources/golden/join_filters-120-6578651ffbf95c0b02825e2125e32709
+++ b/sql/hive/src/test/resources/golden/join_filters-120-6578651ffbf95c0b02825e2125e32709
@@ -3,4 +3,4 @@ NULL	NULL	NULL	135
 NULL	NULL	48	NULL
 NULL	NULL	148	NULL
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-121-6dc4977da7f8dc7c636a03d7ad892ada b/sql/hive/src/test/resources/golden/join_filters-121-6dc4977da7f8dc7c636a03d7ad892ada
index 71cd5ce0041b8..768ebfc0854e4 100644
--- a/sql/hive/src/test/resources/golden/join_filters-121-6dc4977da7f8dc7c636a03d7ad892ada
+++ b/sql/hive/src/test/resources/golden/join_filters-121-6dc4977da7f8dc7c636a03d7ad892ada
@@ -3,4 +3,4 @@ NULL	NULL	NULL	135
 NULL	NULL	48	NULL
 NULL	NULL	148	NULL
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-122-64bb9f9a0eef32d2392b80b93c9b2c98 b/sql/hive/src/test/resources/golden/join_filters-122-64bb9f9a0eef32d2392b80b93c9b2c98
index 71cd5ce0041b8..768ebfc0854e4 100644
--- a/sql/hive/src/test/resources/golden/join_filters-122-64bb9f9a0eef32d2392b80b93c9b2c98
+++ b/sql/hive/src/test/resources/golden/join_filters-122-64bb9f9a0eef32d2392b80b93c9b2c98
@@ -3,4 +3,4 @@ NULL	NULL	NULL	135
 NULL	NULL	48	NULL
 NULL	NULL	148	NULL
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-13-c6a291879bdb37f0c84f6074f257d52 b/sql/hive/src/test/resources/golden/join_filters-13-c6a291879bdb37f0c84f6074f257d52
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-13-c6a291879bdb37f0c84f6074f257d52
+++ b/sql/hive/src/test/resources/golden/join_filters-13-c6a291879bdb37f0c84f6074f257d52
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-14-ef8255dcad808f9128d79e6ee9f368cf b/sql/hive/src/test/resources/golden/join_filters-14-ef8255dcad808f9128d79e6ee9f368cf
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-14-ef8255dcad808f9128d79e6ee9f368cf
+++ b/sql/hive/src/test/resources/golden/join_filters-14-ef8255dcad808f9128d79e6ee9f368cf
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-15-a83678913c62249c8fdf2dac1f6e3046 b/sql/hive/src/test/resources/golden/join_filters-15-a83678913c62249c8fdf2dac1f6e3046
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-15-a83678913c62249c8fdf2dac1f6e3046
+++ b/sql/hive/src/test/resources/golden/join_filters-15-a83678913c62249c8fdf2dac1f6e3046
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-16-a6511579b5e4c2d845664f0c4a6ce622 b/sql/hive/src/test/resources/golden/join_filters-16-a6511579b5e4c2d845664f0c4a6ce622
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-16-a6511579b5e4c2d845664f0c4a6ce622
+++ b/sql/hive/src/test/resources/golden/join_filters-16-a6511579b5e4c2d845664f0c4a6ce622
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-17-c22aab929016fa6f4b6d1e14cc3a4940 b/sql/hive/src/test/resources/golden/join_filters-17-c22aab929016fa6f4b6d1e14cc3a4940
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-17-c22aab929016fa6f4b6d1e14cc3a4940
+++ b/sql/hive/src/test/resources/golden/join_filters-17-c22aab929016fa6f4b6d1e14cc3a4940
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-18-8ef51af7455eaeeaefff1c98e16dcc65 b/sql/hive/src/test/resources/golden/join_filters-18-8ef51af7455eaeeaefff1c98e16dcc65
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-18-8ef51af7455eaeeaefff1c98e16dcc65
+++ b/sql/hive/src/test/resources/golden/join_filters-18-8ef51af7455eaeeaefff1c98e16dcc65
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-19-e164192e6538b428c8b53e008a978d3f b/sql/hive/src/test/resources/golden/join_filters-19-e164192e6538b428c8b53e008a978d3f
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-19-e164192e6538b428c8b53e008a978d3f
+++ b/sql/hive/src/test/resources/golden/join_filters-19-e164192e6538b428c8b53e008a978d3f
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-2-67fff9e6931a7320444e857e01b3d496 b/sql/hive/src/test/resources/golden/join_filters-2-67fff9e6931a7320444e857e01b3d496
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-2-67fff9e6931a7320444e857e01b3d496
+++ b/sql/hive/src/test/resources/golden/join_filters-2-67fff9e6931a7320444e857e01b3d496
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-20-7a5da20822bf51ed69ccf640cbb816cf b/sql/hive/src/test/resources/golden/join_filters-20-7a5da20822bf51ed69ccf640cbb816cf
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-20-7a5da20822bf51ed69ccf640cbb816cf
+++ b/sql/hive/src/test/resources/golden/join_filters-20-7a5da20822bf51ed69ccf640cbb816cf
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-21-13d6d5335625fc3386a8011dc557002e b/sql/hive/src/test/resources/golden/join_filters-21-13d6d5335625fc3386a8011dc557002e
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-21-13d6d5335625fc3386a8011dc557002e
+++ b/sql/hive/src/test/resources/golden/join_filters-21-13d6d5335625fc3386a8011dc557002e
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-22-f12ffd6cc9213d9c64cbea1b6db0632e b/sql/hive/src/test/resources/golden/join_filters-22-f12ffd6cc9213d9c64cbea1b6db0632e
index d735f349a9542..db3f1d99ec261 100644
--- a/sql/hive/src/test/resources/golden/join_filters-22-f12ffd6cc9213d9c64cbea1b6db0632e
+++ b/sql/hive/src/test/resources/golden/join_filters-22-f12ffd6cc9213d9c64cbea1b6db0632e
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	NULL	NULL	40
 NULL	NULL	NULL	NULL	12	35
 NULL	NULL	NULL	NULL	48	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-23-a800b885358695758afdb719cdefa94f b/sql/hive/src/test/resources/golden/join_filters-23-a800b885358695758afdb719cdefa94f
index ea646edf6d143..04ecda59b21bd 100644
--- a/sql/hive/src/test/resources/golden/join_filters-23-a800b885358695758afdb719cdefa94f
+++ b/sql/hive/src/test/resources/golden/join_filters-23-a800b885358695758afdb719cdefa94f
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40	NULL	NULL
 NULL	NULL	12	35	NULL	NULL
 NULL	NULL	48	NULL	NULL	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-24-df3db5a91f3f4e88e18d2b1cc5b47113 b/sql/hive/src/test/resources/golden/join_filters-24-df3db5a91f3f4e88e18d2b1cc5b47113
index d735f349a9542..db3f1d99ec261 100644
--- a/sql/hive/src/test/resources/golden/join_filters-24-df3db5a91f3f4e88e18d2b1cc5b47113
+++ b/sql/hive/src/test/resources/golden/join_filters-24-df3db5a91f3f4e88e18d2b1cc5b47113
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	NULL	NULL	40
 NULL	NULL	NULL	NULL	12	35
 NULL	NULL	NULL	NULL	48	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-25-435ecfc7f9cb5f32019fe498c21ccad3 b/sql/hive/src/test/resources/golden/join_filters-25-435ecfc7f9cb5f32019fe498c21ccad3
index d735f349a9542..db3f1d99ec261 100644
--- a/sql/hive/src/test/resources/golden/join_filters-25-435ecfc7f9cb5f32019fe498c21ccad3
+++ b/sql/hive/src/test/resources/golden/join_filters-25-435ecfc7f9cb5f32019fe498c21ccad3
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	NULL	NULL	40
 NULL	NULL	NULL	NULL	12	35
 NULL	NULL	NULL	NULL	48	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-26-da36fab3d1686aedd415a7a0f0253eca b/sql/hive/src/test/resources/golden/join_filters-26-da36fab3d1686aedd415a7a0f0253eca
index ea646edf6d143..04ecda59b21bd 100644
--- a/sql/hive/src/test/resources/golden/join_filters-26-da36fab3d1686aedd415a7a0f0253eca
+++ b/sql/hive/src/test/resources/golden/join_filters-26-da36fab3d1686aedd415a7a0f0253eca
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40	NULL	NULL
 NULL	NULL	12	35	NULL	NULL
 NULL	NULL	48	NULL	NULL	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-27-5f4a5437696f2a60bec9ac1443244242 b/sql/hive/src/test/resources/golden/join_filters-27-5f4a5437696f2a60bec9ac1443244242
index d735f349a9542..db3f1d99ec261 100644
--- a/sql/hive/src/test/resources/golden/join_filters-27-5f4a5437696f2a60bec9ac1443244242
+++ b/sql/hive/src/test/resources/golden/join_filters-27-5f4a5437696f2a60bec9ac1443244242
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	NULL	NULL	40
 NULL	NULL	NULL	NULL	12	35
 NULL	NULL	NULL	NULL	48	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-28-2acf41a9f6efac0d800df557db716359 b/sql/hive/src/test/resources/golden/join_filters-28-2acf41a9f6efac0d800df557db716359
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-28-2acf41a9f6efac0d800df557db716359
+++ b/sql/hive/src/test/resources/golden/join_filters-28-2acf41a9f6efac0d800df557db716359
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-29-cf5fef4ddc313209f5ab1e5ea3763e35 b/sql/hive/src/test/resources/golden/join_filters-29-cf5fef4ddc313209f5ab1e5ea3763e35
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-29-cf5fef4ddc313209f5ab1e5ea3763e35
+++ b/sql/hive/src/test/resources/golden/join_filters-29-cf5fef4ddc313209f5ab1e5ea3763e35
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-3-989b3dbd15ba601ae80fe454e03213d7 b/sql/hive/src/test/resources/golden/join_filters-3-989b3dbd15ba601ae80fe454e03213d7
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-3-989b3dbd15ba601ae80fe454e03213d7
+++ b/sql/hive/src/test/resources/golden/join_filters-3-989b3dbd15ba601ae80fe454e03213d7
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-30-97f94f35ebc30f776a19bed59c7fb4bf b/sql/hive/src/test/resources/golden/join_filters-30-97f94f35ebc30f776a19bed59c7fb4bf
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-30-97f94f35ebc30f776a19bed59c7fb4bf
+++ b/sql/hive/src/test/resources/golden/join_filters-30-97f94f35ebc30f776a19bed59c7fb4bf
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-31-4923f8ba429f0c71ad9b6b9499e73a7f b/sql/hive/src/test/resources/golden/join_filters-31-4923f8ba429f0c71ad9b6b9499e73a7f
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-31-4923f8ba429f0c71ad9b6b9499e73a7f
+++ b/sql/hive/src/test/resources/golden/join_filters-31-4923f8ba429f0c71ad9b6b9499e73a7f
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-32-5978cd7936c296493a16a31b926043ab b/sql/hive/src/test/resources/golden/join_filters-32-5978cd7936c296493a16a31b926043ab
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-32-5978cd7936c296493a16a31b926043ab
+++ b/sql/hive/src/test/resources/golden/join_filters-32-5978cd7936c296493a16a31b926043ab
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-33-607d64d50ef9aad424bd22b358efe027 b/sql/hive/src/test/resources/golden/join_filters-33-607d64d50ef9aad424bd22b358efe027
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-33-607d64d50ef9aad424bd22b358efe027
+++ b/sql/hive/src/test/resources/golden/join_filters-33-607d64d50ef9aad424bd22b358efe027
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-34-35c2c8d74bc0ebc4710c81333cb254a9 b/sql/hive/src/test/resources/golden/join_filters-34-35c2c8d74bc0ebc4710c81333cb254a9
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-34-35c2c8d74bc0ebc4710c81333cb254a9
+++ b/sql/hive/src/test/resources/golden/join_filters-34-35c2c8d74bc0ebc4710c81333cb254a9
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-35-a4c7cd87175222bea19fd33018890efe b/sql/hive/src/test/resources/golden/join_filters-35-a4c7cd87175222bea19fd33018890efe
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-35-a4c7cd87175222bea19fd33018890efe
+++ b/sql/hive/src/test/resources/golden/join_filters-35-a4c7cd87175222bea19fd33018890efe
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-36-f086409eb336282af5a00f4c7192ef2b b/sql/hive/src/test/resources/golden/join_filters-36-f086409eb336282af5a00f4c7192ef2b
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-36-f086409eb336282af5a00f4c7192ef2b
+++ b/sql/hive/src/test/resources/golden/join_filters-36-f086409eb336282af5a00f4c7192ef2b
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-37-634888c4fa78ce10181c9514c6393554 b/sql/hive/src/test/resources/golden/join_filters-37-634888c4fa78ce10181c9514c6393554
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-37-634888c4fa78ce10181c9514c6393554
+++ b/sql/hive/src/test/resources/golden/join_filters-37-634888c4fa78ce10181c9514c6393554
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-38-c020d3461658ae8e118281f40264ae5b b/sql/hive/src/test/resources/golden/join_filters-38-c020d3461658ae8e118281f40264ae5b
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-38-c020d3461658ae8e118281f40264ae5b
+++ b/sql/hive/src/test/resources/golden/join_filters-38-c020d3461658ae8e118281f40264ae5b
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-39-c9b79f30e1f25672ec89014f966b41b0 b/sql/hive/src/test/resources/golden/join_filters-39-c9b79f30e1f25672ec89014f966b41b0
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-39-c9b79f30e1f25672ec89014f966b41b0
+++ b/sql/hive/src/test/resources/golden/join_filters-39-c9b79f30e1f25672ec89014f966b41b0
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-4-33bfcd576019d7e32683556f66e3757 b/sql/hive/src/test/resources/golden/join_filters-4-33bfcd576019d7e32683556f66e3757
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-4-33bfcd576019d7e32683556f66e3757
+++ b/sql/hive/src/test/resources/golden/join_filters-4-33bfcd576019d7e32683556f66e3757
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-40-b3d9dcbb7e1caef97646eb89edf82eb b/sql/hive/src/test/resources/golden/join_filters-40-b3d9dcbb7e1caef97646eb89edf82eb
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-40-b3d9dcbb7e1caef97646eb89edf82eb
+++ b/sql/hive/src/test/resources/golden/join_filters-40-b3d9dcbb7e1caef97646eb89edf82eb
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-41-870999978978b22f21997899f1e652b8 b/sql/hive/src/test/resources/golden/join_filters-41-870999978978b22f21997899f1e652b8
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-41-870999978978b22f21997899f1e652b8
+++ b/sql/hive/src/test/resources/golden/join_filters-41-870999978978b22f21997899f1e652b8
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-42-94824a62d882227f270a66ec7ef28cd4 b/sql/hive/src/test/resources/golden/join_filters-42-94824a62d882227f270a66ec7ef28cd4
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-42-94824a62d882227f270a66ec7ef28cd4
+++ b/sql/hive/src/test/resources/golden/join_filters-42-94824a62d882227f270a66ec7ef28cd4
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-43-d793c1514545499f58fb1b355cbd3f0e b/sql/hive/src/test/resources/golden/join_filters-43-d793c1514545499f58fb1b355cbd3f0e
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-43-d793c1514545499f58fb1b355cbd3f0e
+++ b/sql/hive/src/test/resources/golden/join_filters-43-d793c1514545499f58fb1b355cbd3f0e
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/join_filters-46-268d8fb3cb9b04eb269fe7ec40a24dfe
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_09_part_spec_nonoverlap-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/join_filters-46-268d8fb3cb9b04eb269fe7ec40a24dfe
diff --git a/sql/hive/src/test/resources/golden/join_filters-46-aa161b0d9fe9d1aad10654fce0e3670b b/sql/hive/src/test/resources/golden/join_filters-46-aa161b0d9fe9d1aad10654fce0e3670b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_filters-47-3c52df82c7d78501610f3f898103f753 b/sql/hive/src/test/resources/golden/join_filters-47-3c52df82c7d78501610f3f898103f753
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_11_managed_external-3-4f20db97105fb03ad21ffbf3edab7b77 b/sql/hive/src/test/resources/golden/join_filters-47-6dc6866a65c74d69538b776b41b06c16
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_11_managed_external-3-4f20db97105fb03ad21ffbf3edab7b77
rename to sql/hive/src/test/resources/golden/join_filters-47-6dc6866a65c74d69538b776b41b06c16
diff --git a/sql/hive/src/test/resources/golden/join_filters-48-1d85bb008e02ef4025171a4bc0866a6c b/sql/hive/src/test/resources/golden/join_filters-48-1d85bb008e02ef4025171a4bc0866a6c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_11_managed_external-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/join_filters-48-e884480a0f7273d3e2f2de2ba46b855c
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_11_managed_external-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/join_filters-48-e884480a0f7273d3e2f2de2ba46b855c
diff --git a/sql/hive/src/test/resources/golden/exim_12_external_location-3-4f20db97105fb03ad21ffbf3edab7b77 b/sql/hive/src/test/resources/golden/join_filters-49-98fd86aea9cacaa82d43c7468109dd33
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_12_external_location-3-4f20db97105fb03ad21ffbf3edab7b77
rename to sql/hive/src/test/resources/golden/join_filters-49-98fd86aea9cacaa82d43c7468109dd33
diff --git a/sql/hive/src/test/resources/golden/join_filters-49-e79c906b894fed049ddfab4496a4e3 b/sql/hive/src/test/resources/golden/join_filters-49-e79c906b894fed049ddfab4496a4e3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_filters-5-f0c0d07019afb1bbe162e3183e18023e b/sql/hive/src/test/resources/golden/join_filters-5-f0c0d07019afb1bbe162e3183e18023e
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-5-f0c0d07019afb1bbe162e3183e18023e
+++ b/sql/hive/src/test/resources/golden/join_filters-5-f0c0d07019afb1bbe162e3183e18023e
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-50-3e6612a89e9124592e790594775054b1 b/sql/hive/src/test/resources/golden/join_filters-50-3e6612a89e9124592e790594775054b1
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_filters-50-3e6612a89e9124592e790594775054b1
+++ b/sql/hive/src/test/resources/golden/join_filters-50-3e6612a89e9124592e790594775054b1
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_filters-51-60a5f56f33fc8854a2b687005f0d96ac b/sql/hive/src/test/resources/golden/join_filters-51-60a5f56f33fc8854a2b687005f0d96ac
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_filters-51-60a5f56f33fc8854a2b687005f0d96ac
+++ b/sql/hive/src/test/resources/golden/join_filters-51-60a5f56f33fc8854a2b687005f0d96ac
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_filters-52-64cabe5164130a94f387288f37b62d71 b/sql/hive/src/test/resources/golden/join_filters-52-64cabe5164130a94f387288f37b62d71
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_filters-52-64cabe5164130a94f387288f37b62d71
+++ b/sql/hive/src/test/resources/golden/join_filters-52-64cabe5164130a94f387288f37b62d71
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_filters-53-2709001b4aa57ed01ba975e83b556475 b/sql/hive/src/test/resources/golden/join_filters-53-2709001b4aa57ed01ba975e83b556475
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-53-2709001b4aa57ed01ba975e83b556475
+++ b/sql/hive/src/test/resources/golden/join_filters-53-2709001b4aa57ed01ba975e83b556475
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-54-86868ef0f28c5b42f04fb9ca64aaa3ba b/sql/hive/src/test/resources/golden/join_filters-54-86868ef0f28c5b42f04fb9ca64aaa3ba
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-54-86868ef0f28c5b42f04fb9ca64aaa3ba
+++ b/sql/hive/src/test/resources/golden/join_filters-54-86868ef0f28c5b42f04fb9ca64aaa3ba
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-55-fd3188d025e4c84d45cbb265d08ca569 b/sql/hive/src/test/resources/golden/join_filters-55-fd3188d025e4c84d45cbb265d08ca569
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-55-fd3188d025e4c84d45cbb265d08ca569
+++ b/sql/hive/src/test/resources/golden/join_filters-55-fd3188d025e4c84d45cbb265d08ca569
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-56-a0cd07949ff7dbc2287dc060bedb1942 b/sql/hive/src/test/resources/golden/join_filters-56-a0cd07949ff7dbc2287dc060bedb1942
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-56-a0cd07949ff7dbc2287dc060bedb1942
+++ b/sql/hive/src/test/resources/golden/join_filters-56-a0cd07949ff7dbc2287dc060bedb1942
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-57-48c4978768872751832149d72cdf0ba b/sql/hive/src/test/resources/golden/join_filters-57-48c4978768872751832149d72cdf0ba
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-57-48c4978768872751832149d72cdf0ba
+++ b/sql/hive/src/test/resources/golden/join_filters-57-48c4978768872751832149d72cdf0ba
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-58-58b8db46ea5080791c7416838d3e8f95 b/sql/hive/src/test/resources/golden/join_filters-58-58b8db46ea5080791c7416838d3e8f95
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-58-58b8db46ea5080791c7416838d3e8f95
+++ b/sql/hive/src/test/resources/golden/join_filters-58-58b8db46ea5080791c7416838d3e8f95
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-59-98e45a3167d19d09219076a2d93afa49 b/sql/hive/src/test/resources/golden/join_filters-59-98e45a3167d19d09219076a2d93afa49
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-59-98e45a3167d19d09219076a2d93afa49
+++ b/sql/hive/src/test/resources/golden/join_filters-59-98e45a3167d19d09219076a2d93afa49
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-6-c0c40d001cac0bc91095dddda1513ad9 b/sql/hive/src/test/resources/golden/join_filters-6-c0c40d001cac0bc91095dddda1513ad9
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-6-c0c40d001cac0bc91095dddda1513ad9
+++ b/sql/hive/src/test/resources/golden/join_filters-6-c0c40d001cac0bc91095dddda1513ad9
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-60-bec2950fff2c1b3c4dc5be846653ea6 b/sql/hive/src/test/resources/golden/join_filters-60-bec2950fff2c1b3c4dc5be846653ea6
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_filters-60-bec2950fff2c1b3c4dc5be846653ea6
+++ b/sql/hive/src/test/resources/golden/join_filters-60-bec2950fff2c1b3c4dc5be846653ea6
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-61-ee8471d93c3778f4b48c5c59f473dd35 b/sql/hive/src/test/resources/golden/join_filters-61-ee8471d93c3778f4b48c5c59f473dd35
index b81820f886894..1ef56d9d1d051 100644
--- a/sql/hive/src/test/resources/golden/join_filters-61-ee8471d93c3778f4b48c5c59f473dd35
+++ b/sql/hive/src/test/resources/golden/join_filters-61-ee8471d93c3778f4b48c5c59f473dd35
@@ -3,4 +3,4 @@ NULL	135	NULL	NULL
 48	NULL	NULL	NULL
 100	100	100	100
 148	NULL	NULL	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-62-9b395d7db8722a467d46588d0f27fc9e b/sql/hive/src/test/resources/golden/join_filters-62-9b395d7db8722a467d46588d0f27fc9e
index b81820f886894..1ef56d9d1d051 100644
--- a/sql/hive/src/test/resources/golden/join_filters-62-9b395d7db8722a467d46588d0f27fc9e
+++ b/sql/hive/src/test/resources/golden/join_filters-62-9b395d7db8722a467d46588d0f27fc9e
@@ -3,4 +3,4 @@ NULL	135	NULL	NULL
 48	NULL	NULL	NULL
 100	100	100	100
 148	NULL	NULL	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-63-7688784396db55ff148292a9755c918a b/sql/hive/src/test/resources/golden/join_filters-63-7688784396db55ff148292a9755c918a
index b81820f886894..1ef56d9d1d051 100644
--- a/sql/hive/src/test/resources/golden/join_filters-63-7688784396db55ff148292a9755c918a
+++ b/sql/hive/src/test/resources/golden/join_filters-63-7688784396db55ff148292a9755c918a
@@ -3,4 +3,4 @@ NULL	135	NULL	NULL
 48	NULL	NULL	NULL
 100	100	100	100
 148	NULL	NULL	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-64-6578651ffbf95c0b02825e2125e32709 b/sql/hive/src/test/resources/golden/join_filters-64-6578651ffbf95c0b02825e2125e32709
index 71cd5ce0041b8..768ebfc0854e4 100644
--- a/sql/hive/src/test/resources/golden/join_filters-64-6578651ffbf95c0b02825e2125e32709
+++ b/sql/hive/src/test/resources/golden/join_filters-64-6578651ffbf95c0b02825e2125e32709
@@ -3,4 +3,4 @@ NULL	NULL	NULL	135
 NULL	NULL	48	NULL
 NULL	NULL	148	NULL
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-65-6dc4977da7f8dc7c636a03d7ad892ada b/sql/hive/src/test/resources/golden/join_filters-65-6dc4977da7f8dc7c636a03d7ad892ada
index 71cd5ce0041b8..768ebfc0854e4 100644
--- a/sql/hive/src/test/resources/golden/join_filters-65-6dc4977da7f8dc7c636a03d7ad892ada
+++ b/sql/hive/src/test/resources/golden/join_filters-65-6dc4977da7f8dc7c636a03d7ad892ada
@@ -3,4 +3,4 @@ NULL	NULL	NULL	135
 NULL	NULL	48	NULL
 NULL	NULL	148	NULL
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-66-64bb9f9a0eef32d2392b80b93c9b2c98 b/sql/hive/src/test/resources/golden/join_filters-66-64bb9f9a0eef32d2392b80b93c9b2c98
index 71cd5ce0041b8..768ebfc0854e4 100644
--- a/sql/hive/src/test/resources/golden/join_filters-66-64bb9f9a0eef32d2392b80b93c9b2c98
+++ b/sql/hive/src/test/resources/golden/join_filters-66-64bb9f9a0eef32d2392b80b93c9b2c98
@@ -3,4 +3,4 @@ NULL	NULL	NULL	135
 NULL	NULL	48	NULL
 NULL	NULL	148	NULL
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_filters-67-67fff9e6931a7320444e857e01b3d496 b/sql/hive/src/test/resources/golden/join_filters-67-67fff9e6931a7320444e857e01b3d496
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-67-67fff9e6931a7320444e857e01b3d496
+++ b/sql/hive/src/test/resources/golden/join_filters-67-67fff9e6931a7320444e857e01b3d496
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-68-989b3dbd15ba601ae80fe454e03213d7 b/sql/hive/src/test/resources/golden/join_filters-68-989b3dbd15ba601ae80fe454e03213d7
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-68-989b3dbd15ba601ae80fe454e03213d7
+++ b/sql/hive/src/test/resources/golden/join_filters-68-989b3dbd15ba601ae80fe454e03213d7
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-69-33bfcd576019d7e32683556f66e3757 b/sql/hive/src/test/resources/golden/join_filters-69-33bfcd576019d7e32683556f66e3757
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-69-33bfcd576019d7e32683556f66e3757
+++ b/sql/hive/src/test/resources/golden/join_filters-69-33bfcd576019d7e32683556f66e3757
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-7-89963646509154a2fb1ddbbf1f55349d b/sql/hive/src/test/resources/golden/join_filters-7-89963646509154a2fb1ddbbf1f55349d
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-7-89963646509154a2fb1ddbbf1f55349d
+++ b/sql/hive/src/test/resources/golden/join_filters-7-89963646509154a2fb1ddbbf1f55349d
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-70-f0c0d07019afb1bbe162e3183e18023e b/sql/hive/src/test/resources/golden/join_filters-70-f0c0d07019afb1bbe162e3183e18023e
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-70-f0c0d07019afb1bbe162e3183e18023e
+++ b/sql/hive/src/test/resources/golden/join_filters-70-f0c0d07019afb1bbe162e3183e18023e
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-71-c0c40d001cac0bc91095dddda1513ad9 b/sql/hive/src/test/resources/golden/join_filters-71-c0c40d001cac0bc91095dddda1513ad9
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-71-c0c40d001cac0bc91095dddda1513ad9
+++ b/sql/hive/src/test/resources/golden/join_filters-71-c0c40d001cac0bc91095dddda1513ad9
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-72-89963646509154a2fb1ddbbf1f55349d b/sql/hive/src/test/resources/golden/join_filters-72-89963646509154a2fb1ddbbf1f55349d
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-72-89963646509154a2fb1ddbbf1f55349d
+++ b/sql/hive/src/test/resources/golden/join_filters-72-89963646509154a2fb1ddbbf1f55349d
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-73-69e0235472d7cee7d83037cd083544a5 b/sql/hive/src/test/resources/golden/join_filters-73-69e0235472d7cee7d83037cd083544a5
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-73-69e0235472d7cee7d83037cd083544a5
+++ b/sql/hive/src/test/resources/golden/join_filters-73-69e0235472d7cee7d83037cd083544a5
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-74-b6372cc006844e8488a3b7836c67daaa b/sql/hive/src/test/resources/golden/join_filters-74-b6372cc006844e8488a3b7836c67daaa
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-74-b6372cc006844e8488a3b7836c67daaa
+++ b/sql/hive/src/test/resources/golden/join_filters-74-b6372cc006844e8488a3b7836c67daaa
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-75-f0c5c51de4151a17a77c780be0c13e01 b/sql/hive/src/test/resources/golden/join_filters-75-f0c5c51de4151a17a77c780be0c13e01
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-75-f0c5c51de4151a17a77c780be0c13e01
+++ b/sql/hive/src/test/resources/golden/join_filters-75-f0c5c51de4151a17a77c780be0c13e01
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-76-6fb35e1a65211e68de5df4a1cb9c7117 b/sql/hive/src/test/resources/golden/join_filters-76-6fb35e1a65211e68de5df4a1cb9c7117
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-76-6fb35e1a65211e68de5df4a1cb9c7117
+++ b/sql/hive/src/test/resources/golden/join_filters-76-6fb35e1a65211e68de5df4a1cb9c7117
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-77-4c29d6be8717481332cd1ee7ca17690e b/sql/hive/src/test/resources/golden/join_filters-77-4c29d6be8717481332cd1ee7ca17690e
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-77-4c29d6be8717481332cd1ee7ca17690e
+++ b/sql/hive/src/test/resources/golden/join_filters-77-4c29d6be8717481332cd1ee7ca17690e
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-78-c6a291879bdb37f0c84f6074f257d52 b/sql/hive/src/test/resources/golden/join_filters-78-c6a291879bdb37f0c84f6074f257d52
index 51a29d71030ee..a8dc3bebed62c 100644
--- a/sql/hive/src/test/resources/golden/join_filters-78-c6a291879bdb37f0c84f6074f257d52
+++ b/sql/hive/src/test/resources/golden/join_filters-78-c6a291879bdb37f0c84f6074f257d52
@@ -1,4 +1,4 @@
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-79-ef8255dcad808f9128d79e6ee9f368cf b/sql/hive/src/test/resources/golden/join_filters-79-ef8255dcad808f9128d79e6ee9f368cf
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-79-ef8255dcad808f9128d79e6ee9f368cf
+++ b/sql/hive/src/test/resources/golden/join_filters-79-ef8255dcad808f9128d79e6ee9f368cf
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-8-69e0235472d7cee7d83037cd083544a5 b/sql/hive/src/test/resources/golden/join_filters-8-69e0235472d7cee7d83037cd083544a5
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-8-69e0235472d7cee7d83037cd083544a5
+++ b/sql/hive/src/test/resources/golden/join_filters-8-69e0235472d7cee7d83037cd083544a5
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-80-a83678913c62249c8fdf2dac1f6e3046 b/sql/hive/src/test/resources/golden/join_filters-80-a83678913c62249c8fdf2dac1f6e3046
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-80-a83678913c62249c8fdf2dac1f6e3046
+++ b/sql/hive/src/test/resources/golden/join_filters-80-a83678913c62249c8fdf2dac1f6e3046
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-81-a6511579b5e4c2d845664f0c4a6ce622 b/sql/hive/src/test/resources/golden/join_filters-81-a6511579b5e4c2d845664f0c4a6ce622
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-81-a6511579b5e4c2d845664f0c4a6ce622
+++ b/sql/hive/src/test/resources/golden/join_filters-81-a6511579b5e4c2d845664f0c4a6ce622
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-82-c22aab929016fa6f4b6d1e14cc3a4940 b/sql/hive/src/test/resources/golden/join_filters-82-c22aab929016fa6f4b6d1e14cc3a4940
index 759113b3f1c96..0fd19b648c185 100644
--- a/sql/hive/src/test/resources/golden/join_filters-82-c22aab929016fa6f4b6d1e14cc3a4940
+++ b/sql/hive/src/test/resources/golden/join_filters-82-c22aab929016fa6f4b6d1e14cc3a4940
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40
 NULL	NULL	12	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-83-8ef51af7455eaeeaefff1c98e16dcc65 b/sql/hive/src/test/resources/golden/join_filters-83-8ef51af7455eaeeaefff1c98e16dcc65
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-83-8ef51af7455eaeeaefff1c98e16dcc65
+++ b/sql/hive/src/test/resources/golden/join_filters-83-8ef51af7455eaeeaefff1c98e16dcc65
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-84-e164192e6538b428c8b53e008a978d3f b/sql/hive/src/test/resources/golden/join_filters-84-e164192e6538b428c8b53e008a978d3f
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-84-e164192e6538b428c8b53e008a978d3f
+++ b/sql/hive/src/test/resources/golden/join_filters-84-e164192e6538b428c8b53e008a978d3f
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-85-7a5da20822bf51ed69ccf640cbb816cf b/sql/hive/src/test/resources/golden/join_filters-85-7a5da20822bf51ed69ccf640cbb816cf
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-85-7a5da20822bf51ed69ccf640cbb816cf
+++ b/sql/hive/src/test/resources/golden/join_filters-85-7a5da20822bf51ed69ccf640cbb816cf
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-86-13d6d5335625fc3386a8011dc557002e b/sql/hive/src/test/resources/golden/join_filters-86-13d6d5335625fc3386a8011dc557002e
index 24a0c79d42923..3414d86363093 100644
--- a/sql/hive/src/test/resources/golden/join_filters-86-13d6d5335625fc3386a8011dc557002e
+++ b/sql/hive/src/test/resources/golden/join_filters-86-13d6d5335625fc3386a8011dc557002e
@@ -4,4 +4,4 @@ NULL	NULL	48	NULL
 NULL	40	NULL	NULL
 12	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-87-f12ffd6cc9213d9c64cbea1b6db0632e b/sql/hive/src/test/resources/golden/join_filters-87-f12ffd6cc9213d9c64cbea1b6db0632e
index d735f349a9542..db3f1d99ec261 100644
--- a/sql/hive/src/test/resources/golden/join_filters-87-f12ffd6cc9213d9c64cbea1b6db0632e
+++ b/sql/hive/src/test/resources/golden/join_filters-87-f12ffd6cc9213d9c64cbea1b6db0632e
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	NULL	NULL	40
 NULL	NULL	NULL	NULL	12	35
 NULL	NULL	NULL	NULL	48	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-88-a800b885358695758afdb719cdefa94f b/sql/hive/src/test/resources/golden/join_filters-88-a800b885358695758afdb719cdefa94f
index ea646edf6d143..04ecda59b21bd 100644
--- a/sql/hive/src/test/resources/golden/join_filters-88-a800b885358695758afdb719cdefa94f
+++ b/sql/hive/src/test/resources/golden/join_filters-88-a800b885358695758afdb719cdefa94f
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40	NULL	NULL
 NULL	NULL	12	35	NULL	NULL
 NULL	NULL	48	NULL	NULL	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-89-df3db5a91f3f4e88e18d2b1cc5b47113 b/sql/hive/src/test/resources/golden/join_filters-89-df3db5a91f3f4e88e18d2b1cc5b47113
index d735f349a9542..db3f1d99ec261 100644
--- a/sql/hive/src/test/resources/golden/join_filters-89-df3db5a91f3f4e88e18d2b1cc5b47113
+++ b/sql/hive/src/test/resources/golden/join_filters-89-df3db5a91f3f4e88e18d2b1cc5b47113
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	NULL	NULL	40
 NULL	NULL	NULL	NULL	12	35
 NULL	NULL	NULL	NULL	48	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-9-b6372cc006844e8488a3b7836c67daaa b/sql/hive/src/test/resources/golden/join_filters-9-b6372cc006844e8488a3b7836c67daaa
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-9-b6372cc006844e8488a3b7836c67daaa
+++ b/sql/hive/src/test/resources/golden/join_filters-9-b6372cc006844e8488a3b7836c67daaa
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-90-435ecfc7f9cb5f32019fe498c21ccad3 b/sql/hive/src/test/resources/golden/join_filters-90-435ecfc7f9cb5f32019fe498c21ccad3
index d735f349a9542..db3f1d99ec261 100644
--- a/sql/hive/src/test/resources/golden/join_filters-90-435ecfc7f9cb5f32019fe498c21ccad3
+++ b/sql/hive/src/test/resources/golden/join_filters-90-435ecfc7f9cb5f32019fe498c21ccad3
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	NULL	NULL	40
 NULL	NULL	NULL	NULL	12	35
 NULL	NULL	NULL	NULL	48	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-91-da36fab3d1686aedd415a7a0f0253eca b/sql/hive/src/test/resources/golden/join_filters-91-da36fab3d1686aedd415a7a0f0253eca
index ea646edf6d143..04ecda59b21bd 100644
--- a/sql/hive/src/test/resources/golden/join_filters-91-da36fab3d1686aedd415a7a0f0253eca
+++ b/sql/hive/src/test/resources/golden/join_filters-91-da36fab3d1686aedd415a7a0f0253eca
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	40	NULL	NULL
 NULL	NULL	12	35	NULL	NULL
 NULL	NULL	48	NULL	NULL	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-92-5f4a5437696f2a60bec9ac1443244242 b/sql/hive/src/test/resources/golden/join_filters-92-5f4a5437696f2a60bec9ac1443244242
index d735f349a9542..db3f1d99ec261 100644
--- a/sql/hive/src/test/resources/golden/join_filters-92-5f4a5437696f2a60bec9ac1443244242
+++ b/sql/hive/src/test/resources/golden/join_filters-92-5f4a5437696f2a60bec9ac1443244242
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	NULL	NULL	40
 NULL	NULL	NULL	NULL	12	35
 NULL	NULL	NULL	NULL	48	NULL
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-93-2acf41a9f6efac0d800df557db716359 b/sql/hive/src/test/resources/golden/join_filters-93-2acf41a9f6efac0d800df557db716359
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-93-2acf41a9f6efac0d800df557db716359
+++ b/sql/hive/src/test/resources/golden/join_filters-93-2acf41a9f6efac0d800df557db716359
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-94-cf5fef4ddc313209f5ab1e5ea3763e35 b/sql/hive/src/test/resources/golden/join_filters-94-cf5fef4ddc313209f5ab1e5ea3763e35
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-94-cf5fef4ddc313209f5ab1e5ea3763e35
+++ b/sql/hive/src/test/resources/golden/join_filters-94-cf5fef4ddc313209f5ab1e5ea3763e35
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-95-97f94f35ebc30f776a19bed59c7fb4bf b/sql/hive/src/test/resources/golden/join_filters-95-97f94f35ebc30f776a19bed59c7fb4bf
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-95-97f94f35ebc30f776a19bed59c7fb4bf
+++ b/sql/hive/src/test/resources/golden/join_filters-95-97f94f35ebc30f776a19bed59c7fb4bf
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-96-4923f8ba429f0c71ad9b6b9499e73a7f b/sql/hive/src/test/resources/golden/join_filters-96-4923f8ba429f0c71ad9b6b9499e73a7f
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-96-4923f8ba429f0c71ad9b6b9499e73a7f
+++ b/sql/hive/src/test/resources/golden/join_filters-96-4923f8ba429f0c71ad9b6b9499e73a7f
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-97-5978cd7936c296493a16a31b926043ab b/sql/hive/src/test/resources/golden/join_filters-97-5978cd7936c296493a16a31b926043ab
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-97-5978cd7936c296493a16a31b926043ab
+++ b/sql/hive/src/test/resources/golden/join_filters-97-5978cd7936c296493a16a31b926043ab
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-98-607d64d50ef9aad424bd22b358efe027 b/sql/hive/src/test/resources/golden/join_filters-98-607d64d50ef9aad424bd22b358efe027
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-98-607d64d50ef9aad424bd22b358efe027
+++ b/sql/hive/src/test/resources/golden/join_filters-98-607d64d50ef9aad424bd22b358efe027
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters-99-35c2c8d74bc0ebc4710c81333cb254a9 b/sql/hive/src/test/resources/golden/join_filters-99-35c2c8d74bc0ebc4710c81333cb254a9
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_filters-99-35c2c8d74bc0ebc4710c81333cb254a9
+++ b/sql/hive/src/test/resources/golden/join_filters-99-35c2c8d74bc0ebc4710c81333cb254a9
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_filters_overlap-0-990e447b6447ced0d9684eb7db9e63ce b/sql/hive/src/test/resources/golden/join_filters_overlap-0-990e447b6447ced0d9684eb7db9e63ce
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_filters_overlap-1-a7336cd2d643f92715b42f9c6eb2c108 b/sql/hive/src/test/resources/golden/join_filters_overlap-1-a7336cd2d643f92715b42f9c6eb2c108
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_hive_626-3-2609f2809e0c6ae1ca93806e37960990 b/sql/hive/src/test/resources/golden/join_hive_626-3-2609f2809e0c6ae1ca93806e37960990
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_12_external_location-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/join_hive_626-3-4a2f2f2858540afea9a195b5322941ee
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_12_external_location-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/join_hive_626-3-4a2f2f2858540afea9a195b5322941ee
diff --git a/sql/hive/src/test/resources/golden/join_hive_626-4-387e3bc517b49d4e1c9752c07b72b790 b/sql/hive/src/test/resources/golden/join_hive_626-4-387e3bc517b49d4e1c9752c07b72b790
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_13_managed_location-3-4f20db97105fb03ad21ffbf3edab7b77 b/sql/hive/src/test/resources/golden/join_hive_626-4-4bb73b33747da4ed852df381b7b45a71
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_13_managed_location-3-4f20db97105fb03ad21ffbf3edab7b77
rename to sql/hive/src/test/resources/golden/join_hive_626-4-4bb73b33747da4ed852df381b7b45a71
diff --git a/sql/hive/src/test/resources/golden/join_hive_626-5-a0eb25c15b6ca1a04da14781b1213e11 b/sql/hive/src/test/resources/golden/join_hive_626-5-a0eb25c15b6ca1a04da14781b1213e11
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_13_managed_location-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/join_hive_626-5-c6a3ae6f3539ab48b996060fb51d8ebe
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_13_managed_location-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/join_hive_626-5-c6a3ae6f3539ab48b996060fb51d8ebe
diff --git a/sql/hive/src/test/resources/golden/join_hive_626-7-d9c817eebc533bedc3ef9172d325a2c2 b/sql/hive/src/test/resources/golden/join_hive_626-7-d9c817eebc533bedc3ef9172d325a2c2
index e3143e49c2bad..8b00ef084005b 100644
--- a/sql/hive/src/test/resources/golden/join_hive_626-7-d9c817eebc533bedc3ef9172d325a2c2
+++ b/sql/hive/src/test/resources/golden/join_hive_626-7-d9c817eebc533bedc3ef9172d325a2c2
@@ -1 +1 @@
-foo1	bar10	2
\ No newline at end of file
+foo1	bar10	2
diff --git a/sql/hive/src/test/resources/golden/join_map_ppr-10-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join_map_ppr-10-c9859bf9c9942c59f3b19d82bd1a3afa
index 4c88927e56579..120056ea10c60 100644
--- a/sql/hive/src/test/resources/golden/join_map_ppr-10-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join_map_ppr-10-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -104,4 +104,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/join_map_ppr-3-c9859bf9c9942c59f3b19d82bd1a3afa b/sql/hive/src/test/resources/golden/join_map_ppr-3-c9859bf9c9942c59f3b19d82bd1a3afa
index 4c88927e56579..120056ea10c60 100644
--- a/sql/hive/src/test/resources/golden/join_map_ppr-3-c9859bf9c9942c59f3b19d82bd1a3afa
+++ b/sql/hive/src/test/resources/golden/join_map_ppr-3-c9859bf9c9942c59f3b19d82bd1a3afa
@@ -104,4 +104,4 @@
 98	val_98	val_98
 98	val_98	val_98
 98	val_98	val_98
-98	val_98	val_98
\ No newline at end of file
+98	val_98	val_98
diff --git a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-3-4f20db97105fb03ad21ffbf3edab7b77 b/sql/hive/src/test/resources/golden/join_nulls-1-75b1f5331b62fedb7dbbe6ac93a3c83f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-3-4f20db97105fb03ad21ffbf3edab7b77
rename to sql/hive/src/test/resources/golden/join_nulls-1-75b1f5331b62fedb7dbbe6ac93a3c83f
diff --git a/sql/hive/src/test/resources/golden/join_nulls-1-97641998eb9ddf2dff56de6758486aa0 b/sql/hive/src/test/resources/golden/join_nulls-1-97641998eb9ddf2dff56de6758486aa0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_nulls-10-39071d8e6b246cfd405714dbf0b5337b b/sql/hive/src/test/resources/golden/join_nulls-10-39071d8e6b246cfd405714dbf0b5337b
index 4bc2d3969f17f..610f43b2ac6dc 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-10-39071d8e6b246cfd405714dbf0b5337b
+++ b/sql/hive/src/test/resources/golden/join_nulls-10-39071d8e6b246cfd405714dbf0b5337b
@@ -1,3 +1,3 @@
 NULL	35	NULL	35
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-11-545dff878ea0d79cdfee517572c9e0c8 b/sql/hive/src/test/resources/golden/join_nulls-11-545dff878ea0d79cdfee517572c9e0c8
index 8414cfbede40a..19621e45f3758 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-11-545dff878ea0d79cdfee517572c9e0c8
+++ b/sql/hive/src/test/resources/golden/join_nulls-11-545dff878ea0d79cdfee517572c9e0c8
@@ -1,3 +1,3 @@
 NULL	35	NULL	NULL
 48	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-12-5e1ca8627aa685435142d8e339e77062 b/sql/hive/src/test/resources/golden/join_nulls-12-5e1ca8627aa685435142d8e339e77062
index c11ae6cce7abb..b512581d48dfe 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-12-5e1ca8627aa685435142d8e339e77062
+++ b/sql/hive/src/test/resources/golden/join_nulls-12-5e1ca8627aa685435142d8e339e77062
@@ -1,3 +1,3 @@
 NULL	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-13-5f670a20cbec42a34685ee0398ad7e2d b/sql/hive/src/test/resources/golden/join_nulls-13-5f670a20cbec42a34685ee0398ad7e2d
index e28e1b1e5e8b1..ee5b5e1ec3096 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-13-5f670a20cbec42a34685ee0398ad7e2d
+++ b/sql/hive/src/test/resources/golden/join_nulls-13-5f670a20cbec42a34685ee0398ad7e2d
@@ -1,3 +1,3 @@
 NULL	NULL	NULL	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-14-e97ba69145da387a4a66635b8499077 b/sql/hive/src/test/resources/golden/join_nulls-14-e97ba69145da387a4a66635b8499077
index 85192b86801e2..9a862c2d08e7c 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-14-e97ba69145da387a4a66635b8499077
+++ b/sql/hive/src/test/resources/golden/join_nulls-14-e97ba69145da387a4a66635b8499077
@@ -1,3 +1,3 @@
 NULL	NULL	NULL	35
 48	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-15-e9f9be8be6e936fb832483dfa0be5126 b/sql/hive/src/test/resources/golden/join_nulls-15-e9f9be8be6e936fb832483dfa0be5126
index c19237d5d4309..5d0c8eb89e421 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-15-e9f9be8be6e936fb832483dfa0be5126
+++ b/sql/hive/src/test/resources/golden/join_nulls-15-e9f9be8be6e936fb832483dfa0be5126
@@ -1,3 +1,3 @@
 NULL	NULL	48	NULL
 NULL	35	NULL	35
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-16-b000ccd6c2c05af1875e21428aa3d9b9 b/sql/hive/src/test/resources/golden/join_nulls-16-b000ccd6c2c05af1875e21428aa3d9b9
index e28e1b1e5e8b1..ee5b5e1ec3096 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-16-b000ccd6c2c05af1875e21428aa3d9b9
+++ b/sql/hive/src/test/resources/golden/join_nulls-16-b000ccd6c2c05af1875e21428aa3d9b9
@@ -1,3 +1,3 @@
 NULL	NULL	NULL	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-17-2b0bfe5754456475ceb6af4694165f b/sql/hive/src/test/resources/golden/join_nulls-17-2b0bfe5754456475ceb6af4694165f
index 56ca29ffe263c..25db723b06378 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-17-2b0bfe5754456475ceb6af4694165f
+++ b/sql/hive/src/test/resources/golden/join_nulls-17-2b0bfe5754456475ceb6af4694165f
@@ -2,4 +2,4 @@ NULL	NULL	NULL	35
 NULL	NULL	48	NULL
 NULL	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-18-321cf9d31dac835c3def6ca3b3b860a2 b/sql/hive/src/test/resources/golden/join_nulls-18-321cf9d31dac835c3def6ca3b3b860a2
index 92e228d381628..810287110dc40 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-18-321cf9d31dac835c3def6ca3b3b860a2
+++ b/sql/hive/src/test/resources/golden/join_nulls-18-321cf9d31dac835c3def6ca3b3b860a2
@@ -1,4 +1,4 @@
 NULL	NULL	NULL	35
 NULL	35	NULL	NULL
 48	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-19-739bf8e440e698540d18c29226c3564c b/sql/hive/src/test/resources/golden/join_nulls-19-739bf8e440e698540d18c29226c3564c
index f57bbd5152852..115f8d9531800 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-19-739bf8e440e698540d18c29226c3564c
+++ b/sql/hive/src/test/resources/golden/join_nulls-19-739bf8e440e698540d18c29226c3564c
@@ -1,4 +1,4 @@
 NULL	NULL	48	NULL
 NULL	35	NULL	35
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-2-f4b71841cfff5294bc1e5ac163eadbe5 b/sql/hive/src/test/resources/golden/join_nulls-2-f4b71841cfff5294bc1e5ac163eadbe5
index 0d14bf4e38509..bb1fee6e62e99 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-2-f4b71841cfff5294bc1e5ac163eadbe5
+++ b/sql/hive/src/test/resources/golden/join_nulls-2-f4b71841cfff5294bc1e5ac163eadbe5
@@ -6,4 +6,4 @@ NULL	35	100	100
 48	NULL	100	100
 100	100	NULL	35
 100	100	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-20-fff960f1cb832bc93e3d1de519e573d5 b/sql/hive/src/test/resources/golden/join_nulls-20-fff960f1cb832bc93e3d1de519e573d5
index 56ca29ffe263c..25db723b06378 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-20-fff960f1cb832bc93e3d1de519e573d5
+++ b/sql/hive/src/test/resources/golden/join_nulls-20-fff960f1cb832bc93e3d1de519e573d5
@@ -2,4 +2,4 @@ NULL	NULL	NULL	35
 NULL	NULL	48	NULL
 NULL	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-21-62a25fb819ae5c1ea757b6e759082a2e b/sql/hive/src/test/resources/golden/join_nulls-21-62a25fb819ae5c1ea757b6e759082a2e
index 0db9ffd61a3ad..9bca4244a2eb4 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-21-62a25fb819ae5c1ea757b6e759082a2e
+++ b/sql/hive/src/test/resources/golden/join_nulls-21-62a25fb819ae5c1ea757b6e759082a2e
@@ -1,3 +1,3 @@
 NULL	NULL	NULL	NULL	48	NULL
 NULL	35	NULL	35	NULL	35
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-22-5b2df5518994ae86c041484561857da0 b/sql/hive/src/test/resources/golden/join_nulls-22-5b2df5518994ae86c041484561857da0
index ed1b35fbe80ae..f7ff69b919598 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-22-5b2df5518994ae86c041484561857da0
+++ b/sql/hive/src/test/resources/golden/join_nulls-22-5b2df5518994ae86c041484561857da0
@@ -1,3 +1,3 @@
 NULL	NULL	48	NULL	NULL	NULL
 NULL	35	NULL	35	NULL	35
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-23-982c3e55235cafa3d89b5dee4366fdf8 b/sql/hive/src/test/resources/golden/join_nulls-23-982c3e55235cafa3d89b5dee4366fdf8
index 0db9ffd61a3ad..9bca4244a2eb4 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-23-982c3e55235cafa3d89b5dee4366fdf8
+++ b/sql/hive/src/test/resources/golden/join_nulls-23-982c3e55235cafa3d89b5dee4366fdf8
@@ -1,3 +1,3 @@
 NULL	NULL	NULL	NULL	48	NULL
 NULL	35	NULL	35	NULL	35
-100	100	100	100	100	100
\ No newline at end of file
+100	100	100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-24-86ad66543a735d396f7336cb5bdfa495 b/sql/hive/src/test/resources/golden/join_nulls-24-86ad66543a735d396f7336cb5bdfa495
index 0d14bf4e38509..bb1fee6e62e99 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-24-86ad66543a735d396f7336cb5bdfa495
+++ b/sql/hive/src/test/resources/golden/join_nulls-24-86ad66543a735d396f7336cb5bdfa495
@@ -6,4 +6,4 @@ NULL	35	100	100
 48	NULL	100	100
 100	100	NULL	35
 100	100	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-25-45bd90c71bc44c2accf0009f1ceb5f56 b/sql/hive/src/test/resources/golden/join_nulls-25-45bd90c71bc44c2accf0009f1ceb5f56
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-25-45bd90c71bc44c2accf0009f1ceb5f56
+++ b/sql/hive/src/test/resources/golden/join_nulls-25-45bd90c71bc44c2accf0009f1ceb5f56
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-26-3fb736c1d060bbf9ec7ce58be4531dc9 b/sql/hive/src/test/resources/golden/join_nulls-26-3fb736c1d060bbf9ec7ce58be4531dc9
index bdd09da460bbe..309eec4c7d309 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-26-3fb736c1d060bbf9ec7ce58be4531dc9
+++ b/sql/hive/src/test/resources/golden/join_nulls-26-3fb736c1d060bbf9ec7ce58be4531dc9
@@ -1,2 +1,2 @@
 48	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-27-6ee7affed896b1c539628ab081842b83 b/sql/hive/src/test/resources/golden/join_nulls-27-6ee7affed896b1c539628ab081842b83
index dfe4f4318fc66..96e8fb0241578 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-27-6ee7affed896b1c539628ab081842b83
+++ b/sql/hive/src/test/resources/golden/join_nulls-27-6ee7affed896b1c539628ab081842b83
@@ -1,2 +1,2 @@
 NULL	35	NULL	35
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-28-455aace3472c5840a885b6fab6a046cb b/sql/hive/src/test/resources/golden/join_nulls-28-455aace3472c5840a885b6fab6a046cb
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-28-455aace3472c5840a885b6fab6a046cb
+++ b/sql/hive/src/test/resources/golden/join_nulls-28-455aace3472c5840a885b6fab6a046cb
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-29-2c278a88713aef5cd30ff3720ef3eeeb b/sql/hive/src/test/resources/golden/join_nulls-29-2c278a88713aef5cd30ff3720ef3eeeb
index bdd09da460bbe..309eec4c7d309 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-29-2c278a88713aef5cd30ff3720ef3eeeb
+++ b/sql/hive/src/test/resources/golden/join_nulls-29-2c278a88713aef5cd30ff3720ef3eeeb
@@ -1,2 +1,2 @@
 48	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-3-88d2da3bcb2c8b04df1b6f18ff2226c4 b/sql/hive/src/test/resources/golden/join_nulls-3-88d2da3bcb2c8b04df1b6f18ff2226c4
index 0d14bf4e38509..bb1fee6e62e99 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-3-88d2da3bcb2c8b04df1b6f18ff2226c4
+++ b/sql/hive/src/test/resources/golden/join_nulls-3-88d2da3bcb2c8b04df1b6f18ff2226c4
@@ -6,4 +6,4 @@ NULL	35	100	100
 48	NULL	100	100
 100	100	NULL	35
 100	100	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-30-2c0c41da38714d1b16feffb00fa08bb1 b/sql/hive/src/test/resources/golden/join_nulls-30-2c0c41da38714d1b16feffb00fa08bb1
index dfe4f4318fc66..96e8fb0241578 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-30-2c0c41da38714d1b16feffb00fa08bb1
+++ b/sql/hive/src/test/resources/golden/join_nulls-30-2c0c41da38714d1b16feffb00fa08bb1
@@ -1,2 +1,2 @@
 NULL	35	NULL	35
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-31-a33c48d38817ee3a7aca511dc7793486 b/sql/hive/src/test/resources/golden/join_nulls-31-a33c48d38817ee3a7aca511dc7793486
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-31-a33c48d38817ee3a7aca511dc7793486
+++ b/sql/hive/src/test/resources/golden/join_nulls-31-a33c48d38817ee3a7aca511dc7793486
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-32-e6b104ae96622ff75d510efc6efc9352 b/sql/hive/src/test/resources/golden/join_nulls-32-e6b104ae96622ff75d510efc6efc9352
index c11ae6cce7abb..b512581d48dfe 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-32-e6b104ae96622ff75d510efc6efc9352
+++ b/sql/hive/src/test/resources/golden/join_nulls-32-e6b104ae96622ff75d510efc6efc9352
@@ -1,3 +1,3 @@
 NULL	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-33-1284a11bf6aeef8ff87b471d41985f26 b/sql/hive/src/test/resources/golden/join_nulls-33-1284a11bf6aeef8ff87b471d41985f26
index 8414cfbede40a..19621e45f3758 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-33-1284a11bf6aeef8ff87b471d41985f26
+++ b/sql/hive/src/test/resources/golden/join_nulls-33-1284a11bf6aeef8ff87b471d41985f26
@@ -1,3 +1,3 @@
 NULL	35	NULL	NULL
 48	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-34-aeb90811861431cadc5512637793afc1 b/sql/hive/src/test/resources/golden/join_nulls-34-aeb90811861431cadc5512637793afc1
index 4bc2d3969f17f..610f43b2ac6dc 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-34-aeb90811861431cadc5512637793afc1
+++ b/sql/hive/src/test/resources/golden/join_nulls-34-aeb90811861431cadc5512637793afc1
@@ -1,3 +1,3 @@
 NULL	35	NULL	35
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-35-2d1d18d4e9775ec69333947fbd62bc82 b/sql/hive/src/test/resources/golden/join_nulls-35-2d1d18d4e9775ec69333947fbd62bc82
index e28e1b1e5e8b1..ee5b5e1ec3096 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-35-2d1d18d4e9775ec69333947fbd62bc82
+++ b/sql/hive/src/test/resources/golden/join_nulls-35-2d1d18d4e9775ec69333947fbd62bc82
@@ -1,3 +1,3 @@
 NULL	NULL	NULL	35
 NULL	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-36-7c029c91141b36af79ba0dc1de73a257 b/sql/hive/src/test/resources/golden/join_nulls-36-7c029c91141b36af79ba0dc1de73a257
index 85192b86801e2..9a862c2d08e7c 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-36-7c029c91141b36af79ba0dc1de73a257
+++ b/sql/hive/src/test/resources/golden/join_nulls-36-7c029c91141b36af79ba0dc1de73a257
@@ -1,3 +1,3 @@
 NULL	NULL	NULL	35
 48	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-37-fa84731f5a6beec20d64a7981815b9bc b/sql/hive/src/test/resources/golden/join_nulls-37-fa84731f5a6beec20d64a7981815b9bc
index c19237d5d4309..5d0c8eb89e421 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-37-fa84731f5a6beec20d64a7981815b9bc
+++ b/sql/hive/src/test/resources/golden/join_nulls-37-fa84731f5a6beec20d64a7981815b9bc
@@ -1,3 +1,3 @@
 NULL	NULL	48	NULL
 NULL	35	NULL	35
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-4-a1f20b4863428627bae1316755cc2d94 b/sql/hive/src/test/resources/golden/join_nulls-4-a1f20b4863428627bae1316755cc2d94
index 0d14bf4e38509..bb1fee6e62e99 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-4-a1f20b4863428627bae1316755cc2d94
+++ b/sql/hive/src/test/resources/golden/join_nulls-4-a1f20b4863428627bae1316755cc2d94
@@ -6,4 +6,4 @@ NULL	35	100	100
 48	NULL	100	100
 100	100	NULL	35
 100	100	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/join_nulls-40-268d8fb3cb9b04eb269fe7ec40a24dfe
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_14_managed_location_over_existing-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/join_nulls-40-268d8fb3cb9b04eb269fe7ec40a24dfe
diff --git a/sql/hive/src/test/resources/golden/join_nulls-40-aa161b0d9fe9d1aad10654fce0e3670b b/sql/hive/src/test/resources/golden/join_nulls-40-aa161b0d9fe9d1aad10654fce0e3670b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_nulls-41-3c52df82c7d78501610f3f898103f753 b/sql/hive/src/test/resources/golden/join_nulls-41-3c52df82c7d78501610f3f898103f753
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/join_nulls-41-6dc6866a65c74d69538b776b41b06c16
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_15_external_part-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/join_nulls-41-6dc6866a65c74d69538b776b41b06c16
diff --git a/sql/hive/src/test/resources/golden/join_nulls-42-1d85bb008e02ef4025171a4bc0866a6c b/sql/hive/src/test/resources/golden/join_nulls-42-1d85bb008e02ef4025171a4bc0866a6c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/join_nulls-42-e884480a0f7273d3e2f2de2ba46b855c
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_15_external_part-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/join_nulls-42-e884480a0f7273d3e2f2de2ba46b855c
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/join_nulls-43-98fd86aea9cacaa82d43c7468109dd33
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_15_external_part-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/join_nulls-43-98fd86aea9cacaa82d43c7468109dd33
diff --git a/sql/hive/src/test/resources/golden/join_nulls-43-e79c906b894fed049ddfab4496a4e3 b/sql/hive/src/test/resources/golden/join_nulls-43-e79c906b894fed049ddfab4496a4e3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_nulls-44-2db30531137611e06fdba478ca7a8412 b/sql/hive/src/test/resources/golden/join_nulls-44-2db30531137611e06fdba478ca7a8412
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-44-2db30531137611e06fdba478ca7a8412
+++ b/sql/hive/src/test/resources/golden/join_nulls-44-2db30531137611e06fdba478ca7a8412
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_nulls-45-e58b2754e8d9c56a473557a549d0d2b9 b/sql/hive/src/test/resources/golden/join_nulls-45-e58b2754e8d9c56a473557a549d0d2b9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-45-e58b2754e8d9c56a473557a549d0d2b9
+++ b/sql/hive/src/test/resources/golden/join_nulls-45-e58b2754e8d9c56a473557a549d0d2b9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_nulls-46-64cabe5164130a94f387288f37b62d71 b/sql/hive/src/test/resources/golden/join_nulls-46-64cabe5164130a94f387288f37b62d71
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-46-64cabe5164130a94f387288f37b62d71
+++ b/sql/hive/src/test/resources/golden/join_nulls-46-64cabe5164130a94f387288f37b62d71
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_nulls-47-ebf794e8b51be738e2d664f249869de1 b/sql/hive/src/test/resources/golden/join_nulls-47-ebf794e8b51be738e2d664f249869de1
index 7d9efd15fa287..c823f539ee328 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-47-ebf794e8b51be738e2d664f249869de1
+++ b/sql/hive/src/test/resources/golden/join_nulls-47-ebf794e8b51be738e2d664f249869de1
@@ -1,4 +1,4 @@
 48	NULL	48	NULL
 100	100	100	100
 148	NULL	148	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-48-979c3ee0ee49bfd246d5372b8873fd3 b/sql/hive/src/test/resources/golden/join_nulls-48-979c3ee0ee49bfd246d5372b8873fd3
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-48-979c3ee0ee49bfd246d5372b8873fd3
+++ b/sql/hive/src/test/resources/golden/join_nulls-48-979c3ee0ee49bfd246d5372b8873fd3
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-49-2d20d8f4221281a6b1cd579e761f81b7 b/sql/hive/src/test/resources/golden/join_nulls-49-2d20d8f4221281a6b1cd579e761f81b7
index 4d2ff6a237416..2b06d1c29d984 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-49-2d20d8f4221281a6b1cd579e761f81b7
+++ b/sql/hive/src/test/resources/golden/join_nulls-49-2d20d8f4221281a6b1cd579e761f81b7
@@ -3,4 +3,4 @@ NULL	NULL	NULL	135
 48	NULL	48	NULL
 100	100	100	100
 148	NULL	148	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-5-5ec6e3df7181e0738244623cc01bf22c b/sql/hive/src/test/resources/golden/join_nulls-5-5ec6e3df7181e0738244623cc01bf22c
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-5-5ec6e3df7181e0738244623cc01bf22c
+++ b/sql/hive/src/test/resources/golden/join_nulls-5-5ec6e3df7181e0738244623cc01bf22c
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-50-8b26343e74c161cf74ece5f0bd21470 b/sql/hive/src/test/resources/golden/join_nulls-50-8b26343e74c161cf74ece5f0bd21470
index 7d9efd15fa287..c823f539ee328 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-50-8b26343e74c161cf74ece5f0bd21470
+++ b/sql/hive/src/test/resources/golden/join_nulls-50-8b26343e74c161cf74ece5f0bd21470
@@ -1,4 +1,4 @@
 48	NULL	48	NULL
 100	100	100	100
 148	NULL	148	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-51-75339d2eb2afabf5dd088074b2563d8f b/sql/hive/src/test/resources/golden/join_nulls-51-75339d2eb2afabf5dd088074b2563d8f
index 695bb1544ec89..06f37e109247c 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-51-75339d2eb2afabf5dd088074b2563d8f
+++ b/sql/hive/src/test/resources/golden/join_nulls-51-75339d2eb2afabf5dd088074b2563d8f
@@ -3,4 +3,4 @@ NULL	135	NULL	NULL
 48	NULL	48	NULL
 100	100	100	100
 148	NULL	148	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-52-caad1db99085760daaf8f96c0ce5564 b/sql/hive/src/test/resources/golden/join_nulls-52-caad1db99085760daaf8f96c0ce5564
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-52-caad1db99085760daaf8f96c0ce5564
+++ b/sql/hive/src/test/resources/golden/join_nulls-52-caad1db99085760daaf8f96c0ce5564
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-53-ec965e66e7fad403cd0ea388c3e70363 b/sql/hive/src/test/resources/golden/join_nulls-53-ec965e66e7fad403cd0ea388c3e70363
index b89f5e6d42a0f..340baa63de2f7 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-53-ec965e66e7fad403cd0ea388c3e70363
+++ b/sql/hive/src/test/resources/golden/join_nulls-53-ec965e66e7fad403cd0ea388c3e70363
@@ -1,2 +1,2 @@
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-54-6f15c794b5587430ebb685cc61c502 b/sql/hive/src/test/resources/golden/join_nulls-54-6f15c794b5587430ebb685cc61c502
index b81820f886894..1ef56d9d1d051 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-54-6f15c794b5587430ebb685cc61c502
+++ b/sql/hive/src/test/resources/golden/join_nulls-54-6f15c794b5587430ebb685cc61c502
@@ -3,4 +3,4 @@ NULL	135	NULL	NULL
 48	NULL	NULL	NULL
 100	100	100	100
 148	NULL	NULL	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-55-a1c73326f8c8d9beccda3ba365352564 b/sql/hive/src/test/resources/golden/join_nulls-55-a1c73326f8c8d9beccda3ba365352564
index 71cd5ce0041b8..768ebfc0854e4 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-55-a1c73326f8c8d9beccda3ba365352564
+++ b/sql/hive/src/test/resources/golden/join_nulls-55-a1c73326f8c8d9beccda3ba365352564
@@ -3,4 +3,4 @@ NULL	NULL	NULL	135
 NULL	NULL	48	NULL
 NULL	NULL	148	NULL
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-56-f7b9629093c818b6c384c79d1458d178 b/sql/hive/src/test/resources/golden/join_nulls-56-f7b9629093c818b6c384c79d1458d178
index 86b3dab6bffc8..88b1f5331b84e 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-56-f7b9629093c818b6c384c79d1458d178
+++ b/sql/hive/src/test/resources/golden/join_nulls-56-f7b9629093c818b6c384c79d1458d178
@@ -1,4 +1,4 @@
 NULL	35	NULL	35
 NULL	135	NULL	135
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-57-cf353446d7f358a508f17d0984b90158 b/sql/hive/src/test/resources/golden/join_nulls-57-cf353446d7f358a508f17d0984b90158
index 169df31887b96..b060f40ffdf48 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-57-cf353446d7f358a508f17d0984b90158
+++ b/sql/hive/src/test/resources/golden/join_nulls-57-cf353446d7f358a508f17d0984b90158
@@ -3,4 +3,4 @@ NULL	NULL	148	NULL
 NULL	35	NULL	35
 NULL	135	NULL	135
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-58-5f9a59160a76f9b649102a97987ed33a b/sql/hive/src/test/resources/golden/join_nulls-58-5f9a59160a76f9b649102a97987ed33a
index 86b3dab6bffc8..88b1f5331b84e 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-58-5f9a59160a76f9b649102a97987ed33a
+++ b/sql/hive/src/test/resources/golden/join_nulls-58-5f9a59160a76f9b649102a97987ed33a
@@ -1,4 +1,4 @@
 NULL	35	NULL	35
 NULL	135	NULL	135
 100	100	100	100
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-59-8753a39412ac59c7a05951aeeea73b24 b/sql/hive/src/test/resources/golden/join_nulls-59-8753a39412ac59c7a05951aeeea73b24
index bdd5dcf63fd00..1256e9451914b 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-59-8753a39412ac59c7a05951aeeea73b24
+++ b/sql/hive/src/test/resources/golden/join_nulls-59-8753a39412ac59c7a05951aeeea73b24
@@ -3,4 +3,4 @@ NULL	135	NULL	135
 48	NULL	NULL	NULL
 100	100	100	100
 148	NULL	NULL	NULL
-200	200	200	200
\ No newline at end of file
+200	200	200	200
diff --git a/sql/hive/src/test/resources/golden/join_nulls-6-7eea211c80e7f1146098e80ffb890d67 b/sql/hive/src/test/resources/golden/join_nulls-6-7eea211c80e7f1146098e80ffb890d67
index bdd09da460bbe..309eec4c7d309 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-6-7eea211c80e7f1146098e80ffb890d67
+++ b/sql/hive/src/test/resources/golden/join_nulls-6-7eea211c80e7f1146098e80ffb890d67
@@ -1,2 +1,2 @@
 48	NULL	48	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-7-1692e12155c37a8d94d63d274a9eb8e0 b/sql/hive/src/test/resources/golden/join_nulls-7-1692e12155c37a8d94d63d274a9eb8e0
index dfe4f4318fc66..96e8fb0241578 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-7-1692e12155c37a8d94d63d274a9eb8e0
+++ b/sql/hive/src/test/resources/golden/join_nulls-7-1692e12155c37a8d94d63d274a9eb8e0
@@ -1,2 +1,2 @@
 NULL	35	NULL	35
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-8-609f6bd812a44b20da0a39c827e4d870 b/sql/hive/src/test/resources/golden/join_nulls-8-609f6bd812a44b20da0a39c827e4d870
index 6dc85dd550540..46a059f07aca6 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-8-609f6bd812a44b20da0a39c827e4d870
+++ b/sql/hive/src/test/resources/golden/join_nulls-8-609f6bd812a44b20da0a39c827e4d870
@@ -1 +1 @@
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/join_nulls-9-ef4b27a877efc377cd5750af6725194b b/sql/hive/src/test/resources/golden/join_nulls-9-ef4b27a877efc377cd5750af6725194b
index c11ae6cce7abb..b512581d48dfe 100644
--- a/sql/hive/src/test/resources/golden/join_nulls-9-ef4b27a877efc377cd5750af6725194b
+++ b/sql/hive/src/test/resources/golden/join_nulls-9-ef4b27a877efc377cd5750af6725194b
@@ -1,3 +1,3 @@
 NULL	35	NULL	NULL
 48	NULL	NULL	NULL
-100	100	100	100
\ No newline at end of file
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/join_nullsafe-2-5bb63fafa390b1d4c20e225a8a648dcf
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_15_external_part-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/join_nullsafe-2-5bb63fafa390b1d4c20e225a8a648dcf
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-2-793e288c9e0971f0bf3f37493f76dc7 b/sql/hive/src/test/resources/golden/join_nullsafe-2-793e288c9e0971f0bf3f37493f76dc7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_15_external_part-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/join_nullsafe-20-88faf8a93ba6759bd6f2bbcbdcfecda0
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_15_external_part-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/join_nullsafe-20-88faf8a93ba6759bd6f2bbcbdcfecda0
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-20-d6fc260320c577eec9a5db0d4135d224 b/sql/hive/src/test/resources/golden/join_nullsafe-20-d6fc260320c577eec9a5db0d4135d224
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/join_nullsafe-21-24332b9390108fb3379e1acc599293a1
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_16_part_external-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/join_nullsafe-21-24332b9390108fb3379e1acc599293a1
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-21-a60dae725ffc543f805242611d99de4e b/sql/hive/src/test/resources/golden/join_nullsafe-21-a60dae725ffc543f805242611d99de4e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-22-24c80d0f9e3d72c48d947770fa184985 b/sql/hive/src/test/resources/golden/join_nullsafe-22-24c80d0f9e3d72c48d947770fa184985
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/join_nullsafe-22-4be80634a6bd916e3ebd60a124f0a48e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_16_part_external-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/join_nullsafe-22-4be80634a6bd916e3ebd60a124f0a48e
diff --git a/sql/hive/src/test/resources/golden/join_nullsafe-23-3fe6ae20cab3417759dcc654a3a26746 b/sql/hive/src/test/resources/golden/join_nullsafe-23-3fe6ae20cab3417759dcc654a3a26746
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/join_nullsafe-23-e4425d56be43c21124d95160653ce0ac
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_16_part_external-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/join_nullsafe-23-e4425d56be43c21124d95160653ce0ac
diff --git a/sql/hive/src/test/resources/golden/join_rc-5-1aef75afe38d512addb44dbf9a650263 b/sql/hive/src/test/resources/golden/join_rc-5-1aef75afe38d512addb44dbf9a650263
index 59821aeea3008..c9c79b85dfe24 100644
--- a/sql/hive/src/test/resources/golden/join_rc-5-1aef75afe38d512addb44dbf9a650263
+++ b/sql/hive/src/test/resources/golden/join_rc-5-1aef75afe38d512addb44dbf9a650263
@@ -1025,4 +1025,4 @@
 200	val_200
 200	val_200
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/join_reorder2-11-f8460b061fa00f6afbfe8eeccf4d3564 b/sql/hive/src/test/resources/golden/join_reorder2-11-f8460b061fa00f6afbfe8eeccf4d3564
index 06a83e95d4f79..33e6a1546ca65 100644
--- a/sql/hive/src/test/resources/golden/join_reorder2-11-f8460b061fa00f6afbfe8eeccf4d3564
+++ b/sql/hive/src/test/resources/golden/join_reorder2-11-f8460b061fa00f6afbfe8eeccf4d3564
@@ -1 +1 @@
-2	12	2	22	2	12	2	12
\ No newline at end of file
+2	12	2	22	2	12	2	12
diff --git a/sql/hive/src/test/resources/golden/join_reorder2-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/join_reorder2-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/join_reorder2-4-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_16_part_external-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/join_reorder2-4-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/join_reorder2-5-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/join_reorder2-5-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_16_part_external-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/join_reorder2-5-ade68a23d7b1a4f328623bb5a0f07488
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_16_part_external-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/join_reorder2-5-ade68a23d7b1a4f328623bb5a0f07488
diff --git a/sql/hive/src/test/resources/golden/join_reorder2-6-3fda17e4414d191f837631438a19e700 b/sql/hive/src/test/resources/golden/join_reorder2-6-3fda17e4414d191f837631438a19e700
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/join_reorder2-6-8eb53fb8f05a43ee377aa1c927857e7c
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_17_part_managed-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/join_reorder2-6-8eb53fb8f05a43ee377aa1c927857e7c
diff --git a/sql/hive/src/test/resources/golden/join_reorder2-7-512b75ccb9459a6334da1d9699f4a5ec b/sql/hive/src/test/resources/golden/join_reorder2-7-512b75ccb9459a6334da1d9699f4a5ec
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/join_reorder2-7-5f4cfbbe53c5e808ee08b26514272034
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_17_part_managed-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/join_reorder2-7-5f4cfbbe53c5e808ee08b26514272034
diff --git a/sql/hive/src/test/resources/golden/join_reorder2-9-26ffed826eceda953b7124ee39ace828 b/sql/hive/src/test/resources/golden/join_reorder2-9-26ffed826eceda953b7124ee39ace828
index 06a83e95d4f79..33e6a1546ca65 100644
--- a/sql/hive/src/test/resources/golden/join_reorder2-9-26ffed826eceda953b7124ee39ace828
+++ b/sql/hive/src/test/resources/golden/join_reorder2-9-26ffed826eceda953b7124ee39ace828
@@ -1 +1 @@
-2	12	2	22	2	12	2	12
\ No newline at end of file
+2	12	2	22	2	12	2	12
diff --git a/sql/hive/src/test/resources/golden/join_reorder3-11-d6392b851f7dd5e3705e8ff51e1c6583 b/sql/hive/src/test/resources/golden/join_reorder3-11-d6392b851f7dd5e3705e8ff51e1c6583
index 06a83e95d4f79..33e6a1546ca65 100644
--- a/sql/hive/src/test/resources/golden/join_reorder3-11-d6392b851f7dd5e3705e8ff51e1c6583
+++ b/sql/hive/src/test/resources/golden/join_reorder3-11-d6392b851f7dd5e3705e8ff51e1c6583
@@ -1 +1 @@
-2	12	2	22	2	12	2	12
\ No newline at end of file
+2	12	2	22	2	12	2	12
diff --git a/sql/hive/src/test/resources/golden/join_reorder3-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/join_reorder3-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/join_reorder3-4-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_17_part_managed-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/join_reorder3-4-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/join_reorder3-5-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/join_reorder3-5-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/join_reorder3-5-ade68a23d7b1a4f328623bb5a0f07488
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_17_part_managed-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/join_reorder3-5-ade68a23d7b1a4f328623bb5a0f07488
diff --git a/sql/hive/src/test/resources/golden/join_reorder3-6-3fda17e4414d191f837631438a19e700 b/sql/hive/src/test/resources/golden/join_reorder3-6-3fda17e4414d191f837631438a19e700
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/join_reorder3-6-8eb53fb8f05a43ee377aa1c927857e7c
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_17_part_managed-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/join_reorder3-6-8eb53fb8f05a43ee377aa1c927857e7c
diff --git a/sql/hive/src/test/resources/golden/join_reorder3-7-512b75ccb9459a6334da1d9699f4a5ec b/sql/hive/src/test/resources/golden/join_reorder3-7-512b75ccb9459a6334da1d9699f4a5ec
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/join_reorder3-7-5f4cfbbe53c5e808ee08b26514272034
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_18_part_external-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/join_reorder3-7-5f4cfbbe53c5e808ee08b26514272034
diff --git a/sql/hive/src/test/resources/golden/join_reorder3-9-9d45e66a3990b7c53fd780f07cd52e13 b/sql/hive/src/test/resources/golden/join_reorder3-9-9d45e66a3990b7c53fd780f07cd52e13
index 06a83e95d4f79..33e6a1546ca65 100644
--- a/sql/hive/src/test/resources/golden/join_reorder3-9-9d45e66a3990b7c53fd780f07cd52e13
+++ b/sql/hive/src/test/resources/golden/join_reorder3-9-9d45e66a3990b7c53fd780f07cd52e13
@@ -1 +1 @@
-2	12	2	22	2	12	2	12
\ No newline at end of file
+2	12	2	22	2	12	2	12
diff --git a/sql/hive/src/test/resources/golden/join_reorder4-10-1d6b948747ac31296710a491a5652e3f b/sql/hive/src/test/resources/golden/join_reorder4-10-1d6b948747ac31296710a491a5652e3f
index 872146532307a..19304c010452e 100644
--- a/sql/hive/src/test/resources/golden/join_reorder4-10-1d6b948747ac31296710a491a5652e3f
+++ b/sql/hive/src/test/resources/golden/join_reorder4-10-1d6b948747ac31296710a491a5652e3f
@@ -1 +1 @@
-2	12	2	22	2	12
\ No newline at end of file
+2	12	2	22	2	12
diff --git a/sql/hive/src/test/resources/golden/join_reorder4-12-203aed2c4298eba4a3c51816a21a1c1 b/sql/hive/src/test/resources/golden/join_reorder4-12-203aed2c4298eba4a3c51816a21a1c1
index 872146532307a..19304c010452e 100644
--- a/sql/hive/src/test/resources/golden/join_reorder4-12-203aed2c4298eba4a3c51816a21a1c1
+++ b/sql/hive/src/test/resources/golden/join_reorder4-12-203aed2c4298eba4a3c51816a21a1c1
@@ -1 +1 @@
-2	12	2	22	2	12
\ No newline at end of file
+2	12	2	22	2	12
diff --git a/sql/hive/src/test/resources/golden/join_reorder4-3-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/join_reorder4-3-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/join_reorder4-3-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_18_part_external-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/join_reorder4-3-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/join_reorder4-4-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/join_reorder4-4-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/join_reorder4-4-ade68a23d7b1a4f328623bb5a0f07488
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_18_part_external-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/join_reorder4-4-ade68a23d7b1a4f328623bb5a0f07488
diff --git a/sql/hive/src/test/resources/golden/join_reorder4-5-3fda17e4414d191f837631438a19e700 b/sql/hive/src/test/resources/golden/join_reorder4-5-3fda17e4414d191f837631438a19e700
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/join_reorder4-5-8eb53fb8f05a43ee377aa1c927857e7c
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_18_part_external-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/join_reorder4-5-8eb53fb8f05a43ee377aa1c927857e7c
diff --git a/sql/hive/src/test/resources/golden/join_reorder4-6-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join_reorder4-6-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_reorder4-6-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join_reorder4-6-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_reorder4-8-4bbb8e937245e2fa7cafbb423814754 b/sql/hive/src/test/resources/golden/join_reorder4-8-4bbb8e937245e2fa7cafbb423814754
index 872146532307a..19304c010452e 100644
--- a/sql/hive/src/test/resources/golden/join_reorder4-8-4bbb8e937245e2fa7cafbb423814754
+++ b/sql/hive/src/test/resources/golden/join_reorder4-8-4bbb8e937245e2fa7cafbb423814754
@@ -1 +1 @@
-2	12	2	22	2	12
\ No newline at end of file
+2	12	2	22	2	12
diff --git a/sql/hive/src/test/resources/golden/join_star-10-57ce75f989b3b3bfd2f2eceb228e892e b/sql/hive/src/test/resources/golden/join_star-10-57ce75f989b3b3bfd2f2eceb228e892e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/join_star-10-a9e579038e3d4826fdae475d7058ab82
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_18_part_external-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/join_star-10-a9e579038e3d4826fdae475d7058ab82
diff --git a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/join_star-11-72730ecdad9c0fd4c6ce64a0cb89fb74
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_00_part_external_location-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/join_star-11-72730ecdad9c0fd4c6ce64a0cb89fb74
diff --git a/sql/hive/src/test/resources/golden/join_star-11-eba1397e66f25cba4fd264209cc92bae b/sql/hive/src/test/resources/golden/join_star-11-eba1397e66f25cba4fd264209cc92bae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_star-12-89b53ae954ec88171ef87e0459f6eb82 b/sql/hive/src/test/resources/golden/join_star-12-89b53ae954ec88171ef87e0459f6eb82
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/join_star-12-f581d6d305d652cd0f4e4fa912eb578d
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_00_part_external_location-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/join_star-12-f581d6d305d652cd0f4e4fa912eb578d
diff --git a/sql/hive/src/test/resources/golden/join_star-13-342b7249c9ce1484869169b1b33191cb b/sql/hive/src/test/resources/golden/join_star-13-342b7249c9ce1484869169b1b33191cb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-5-75f428bb2aa8624ac08095cdfd7a6993 b/sql/hive/src/test/resources/golden/join_star-13-7268564732cbb7489248f9d818f80c14
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_00_part_external_location-5-75f428bb2aa8624ac08095cdfd7a6993
rename to sql/hive/src/test/resources/golden/join_star-13-7268564732cbb7489248f9d818f80c14
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/join_star-14-2ee0fcf000f8687fc8941bf212477e57
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_part_external_location-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/join_star-14-2ee0fcf000f8687fc8941bf212477e57
diff --git a/sql/hive/src/test/resources/golden/join_star-14-75513308d30b781fd2e06d81963c4363 b/sql/hive/src/test/resources/golden/join_star-14-75513308d30b781fd2e06d81963c4363
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_star-15-24a0b1d0257bad4f85b3a122acf6bef9 b/sql/hive/src/test/resources/golden/join_star-15-24a0b1d0257bad4f85b3a122acf6bef9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/join_star-15-43b0b3b5e40044f8dbaeef2c7fc9e3e9
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_part_external_location-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/join_star-15-43b0b3b5e40044f8dbaeef2c7fc9e3e9
diff --git a/sql/hive/src/test/resources/golden/join_star-16-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/join_star-16-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_star-16-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/join_star-16-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_star-17-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/join_star-17-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_star-17-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/join_star-17-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_star-18-1c22e451845667bd6b4eac3c49c36965 b/sql/hive/src/test/resources/golden/join_star-18-1c22e451845667bd6b4eac3c49c36965
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/join_star-18-1c22e451845667bd6b4eac3c49c36965
+++ b/sql/hive/src/test/resources/golden/join_star-18-1c22e451845667bd6b4eac3c49c36965
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/join_star-20-76473cb68a69b9408207fd43ddae9339 b/sql/hive/src/test/resources/golden/join_star-20-76473cb68a69b9408207fd43ddae9339
index e60bca6269264..f97ee71bc2333 100644
--- a/sql/hive/src/test/resources/golden/join_star-20-76473cb68a69b9408207fd43ddae9339
+++ b/sql/hive/src/test/resources/golden/join_star-20-76473cb68a69b9408207fd43ddae9339
@@ -7,4 +7,4 @@
 61	62	3
 71	72	3
 81	82	3
-91	92	3
\ No newline at end of file
+91	92	3
diff --git a/sql/hive/src/test/resources/golden/join_star-22-429cfd140488242d569aee6594aae76b b/sql/hive/src/test/resources/golden/join_star-22-429cfd140488242d569aee6594aae76b
index e08d04997c56b..e56cb5c03eaa4 100644
--- a/sql/hive/src/test/resources/golden/join_star-22-429cfd140488242d569aee6594aae76b
+++ b/sql/hive/src/test/resources/golden/join_star-22-429cfd140488242d569aee6594aae76b
@@ -7,4 +7,4 @@
 61	62	3	4
 71	72	3	4
 81	82	3	4
-91	92	3	4
\ No newline at end of file
+91	92	3	4
diff --git a/sql/hive/src/test/resources/golden/join_star-24-e11ab68d72d874c7c6c658c1018f5a49 b/sql/hive/src/test/resources/golden/join_star-24-e11ab68d72d874c7c6c658c1018f5a49
index b89911131bd13..80924380b7d76 100644
--- a/sql/hive/src/test/resources/golden/join_star-24-e11ab68d72d874c7c6c658c1018f5a49
+++ b/sql/hive/src/test/resources/golden/join_star-24-e11ab68d72d874c7c6c658c1018f5a49
@@ -7,4 +7,4 @@
 61	62	3	3
 71	72	3	3
 81	82	3	3
-91	92	3	3
\ No newline at end of file
+91	92	3	3
diff --git a/sql/hive/src/test/resources/golden/join_star-26-a412a0620a359a62e2ab4e45fa8e0330 b/sql/hive/src/test/resources/golden/join_star-26-a412a0620a359a62e2ab4e45fa8e0330
index b89911131bd13..80924380b7d76 100644
--- a/sql/hive/src/test/resources/golden/join_star-26-a412a0620a359a62e2ab4e45fa8e0330
+++ b/sql/hive/src/test/resources/golden/join_star-26-a412a0620a359a62e2ab4e45fa8e0330
@@ -7,4 +7,4 @@
 61	62	3	3
 71	72	3	3
 81	82	3	3
-91	92	3	3
\ No newline at end of file
+91	92	3	3
diff --git a/sql/hive/src/test/resources/golden/join_star-28-98a3f4d8a2209e771c57462d4b723ff9 b/sql/hive/src/test/resources/golden/join_star-28-98a3f4d8a2209e771c57462d4b723ff9
index 19611e75e33c3..ebfbb5cdae331 100644
--- a/sql/hive/src/test/resources/golden/join_star-28-98a3f4d8a2209e771c57462d4b723ff9
+++ b/sql/hive/src/test/resources/golden/join_star-28-98a3f4d8a2209e771c57462d4b723ff9
@@ -7,4 +7,4 @@
 61	62	3	3	4	4	4	4	4
 71	72	3	3	4	4	4	4	4
 81	82	3	3	4	4	4	4	4
-91	92	3	3	4	4	4	4	4
\ No newline at end of file
+91	92	3	3	4	4	4	4	4
diff --git a/sql/hive/src/test/resources/golden/join_star-8-a957982d8981ff0a35397ca449297024 b/sql/hive/src/test/resources/golden/join_star-8-a957982d8981ff0a35397ca449297024
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/join_star-8-c3d53a4daab9614a09870dc8e9571f74
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_part_external_location-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/join_star-8-c3d53a4daab9614a09870dc8e9571f74
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/join_star-9-3f7ccccc2488de5f33a38cb3cc3eb628
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_part_external_location-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/join_star-9-3f7ccccc2488de5f33a38cb3cc3eb628
diff --git a/sql/hive/src/test/resources/golden/join_star-9-904e30d8615eb411fb24c2cc08df94f4 b/sql/hive/src/test/resources/golden/join_star-9-904e30d8615eb411fb24c2cc08df94f4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_thrift-0-811b62ecbacdb26c67fa260ff3747a41 b/sql/hive/src/test/resources/golden/join_thrift-0-811b62ecbacdb26c67fa260ff3747a41
deleted file mode 100644
index 07433297e6de1..0000000000000
--- a/sql/hive/src/test/resources/golden/join_thrift-0-811b62ecbacdb26c67fa260ff3747a41
+++ /dev/null
@@ -1,6 +0,0 @@
-aint                	int                 	from deserializer   
-astring             	string              	from deserializer   
-lint                	array<int>          	from deserializer   
-lstring             	array<string>       	from deserializer   
-lintstring          	array<struct<myint:int,mystring:string,underscore_int:int>>	from deserializer   
-mstringstring       	map<string,string>  	from deserializer   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/join_thrift-1-4f47dd107d2be1212411bda214c0d1db b/sql/hive/src/test/resources/golden/join_thrift-1-4f47dd107d2be1212411bda214c0d1db
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_thrift-2-d4bfb7bab262dc2898431680711dec1b b/sql/hive/src/test/resources/golden/join_thrift-2-d4bfb7bab262dc2898431680711dec1b
deleted file mode 100644
index 4aeaf4ff1f1b0..0000000000000
--- a/sql/hive/src/test/resources/golden/join_thrift-2-d4bfb7bab262dc2898431680711dec1b
+++ /dev/null
@@ -1,11 +0,0 @@
-1712634731	[{"myint":0,"mystring":"0","underscore_int":0}]
-465985200	[{"myint":1,"mystring":"1","underscore_int":1}]
--751827638	[{"myint":4,"mystring":"8","underscore_int":2}]
-477111222	[{"myint":9,"mystring":"27","underscore_int":3}]
--734328909	[{"myint":16,"mystring":"64","underscore_int":4}]
--1952710710	[{"myint":25,"mystring":"125","underscore_int":5}]
-1244525190	[{"myint":36,"mystring":"216","underscore_int":6}]
--1461153973	[{"myint":49,"mystring":"343","underscore_int":7}]
-1638581578	[{"myint":64,"mystring":"512","underscore_int":8}]
-336964413	[{"myint":81,"mystring":"729","underscore_int":9}]
-0	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/join_vc-0-f6269c9b545c9f908ef9aebf1a2ac097 b/sql/hive/src/test/resources/golden/join_vc-0-f6269c9b545c9f908ef9aebf1a2ac097
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-0-d286410aa1d5f5c8d91b863a6d6e29c5 b/sql/hive/src/test/resources/golden/join_view-0-d286410aa1d5f5c8d91b863a6d6e29c5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-1-4e6a4fd729bac739f5f9b9e2c42b2467 b/sql/hive/src/test/resources/golden/join_view-1-4e6a4fd729bac739f5f9b9e2c42b2467
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-2-eda40dba9678df62dde73fc5dafb2b44 b/sql/hive/src/test/resources/golden/join_view-2-eda40dba9678df62dde73fc5dafb2b44
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-3-eccb00d8dada8ab56a48c373e381e02b b/sql/hive/src/test/resources/golden/join_view-3-eccb00d8dada8ab56a48c373e381e02b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-5-85baeea910adc4589bc3ec2ce0b1e856 b/sql/hive/src/test/resources/golden/join_view-5-85baeea910adc4589bc3ec2ce0b1e856
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-6-c37b7962ab7371f94a9c54d61f7638ef b/sql/hive/src/test/resources/golden/join_view-6-c37b7962ab7371f94a9c54d61f7638ef
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-7-a14cfe3eff322066e61023ec06c7735d b/sql/hive/src/test/resources/golden/join_view-7-a14cfe3eff322066e61023ec06c7735d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-8-d286410aa1d5f5c8d91b863a6d6e29c5 b/sql/hive/src/test/resources/golden/join_view-8-d286410aa1d5f5c8d91b863a6d6e29c5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/join_view-9-4e6a4fd729bac739f5f9b9e2c42b2467 b/sql/hive/src/test/resources/golden/join_view-9-4e6a4fd729bac739f5f9b9e2c42b2467
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-0-72509f06e1f7c5d5ccc292f775f8eea7 b/sql/hive/src/test/resources/golden/lateral_view_noalias-0-72509f06e1f7c5d5ccc292f775f8eea7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-1-6d5806dd1d2511911a5de1e205523f42 b/sql/hive/src/test/resources/golden/lateral_view_noalias-1-6d5806dd1d2511911a5de1e205523f42
deleted file mode 100644
index 0da0d93886e01..0000000000000
--- a/sql/hive/src/test/resources/golden/lateral_view_noalias-1-6d5806dd1d2511911a5de1e205523f42
+++ /dev/null
@@ -1,2 +0,0 @@
-key1	100
-key2	200
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-2-155b3cc2f5054725a9c2acca3c38c00a b/sql/hive/src/test/resources/golden/lateral_view_noalias-2-155b3cc2f5054725a9c2acca3c38c00a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-3-3b7045ace234af8e5e86d8ac23ccee56 b/sql/hive/src/test/resources/golden/lateral_view_noalias-3-3b7045ace234af8e5e86d8ac23ccee56
deleted file mode 100644
index 0da0d93886e01..0000000000000
--- a/sql/hive/src/test/resources/golden/lateral_view_noalias-3-3b7045ace234af8e5e86d8ac23ccee56
+++ /dev/null
@@ -1,2 +0,0 @@
-key1	100
-key2	200
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-4-e1eca4e08216897d090259d4fd1e3fe b/sql/hive/src/test/resources/golden/lateral_view_noalias-4-e1eca4e08216897d090259d4fd1e3fe
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-5-16d227442dd775615c6ecfceedc6c612 b/sql/hive/src/test/resources/golden/lateral_view_noalias-5-16d227442dd775615c6ecfceedc6c612
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_outer-0-b66c363516d1f717765df9b91da3b5b4 b/sql/hive/src/test/resources/golden/lateral_view_outer-0-b66c363516d1f717765df9b91da3b5b4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_outer-1-8d4332785ff69bb86607700c133d0baa b/sql/hive/src/test/resources/golden/lateral_view_outer-1-8d4332785ff69bb86607700c133d0baa
deleted file mode 100644
index a2a7fdd233a2a..0000000000000
--- a/sql/hive/src/test/resources/golden/lateral_view_outer-1-8d4332785ff69bb86607700c133d0baa
+++ /dev/null
@@ -1,10 +0,0 @@
-238	val_238	NULL
-86	val_86	NULL
-311	val_311	NULL
-27	val_27	NULL
-165	val_165	NULL
-409	val_409	NULL
-255	val_255	NULL
-278	val_278	NULL
-98	val_98	NULL
-484	val_484	NULL
diff --git a/sql/hive/src/test/resources/golden/lateral_view_outer-2-b4474ec12d042fca7a21a2cd551c1068 b/sql/hive/src/test/resources/golden/lateral_view_outer-2-b4474ec12d042fca7a21a2cd551c1068
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_outer-3-57b2ffd7b60708551238c491a2a8685d b/sql/hive/src/test/resources/golden/lateral_view_outer-3-57b2ffd7b60708551238c491a2a8685d
deleted file mode 100644
index 37d918a386d7d..0000000000000
--- a/sql/hive/src/test/resources/golden/lateral_view_outer-3-57b2ffd7b60708551238c491a2a8685d
+++ /dev/null
@@ -1,10 +0,0 @@
-238	val_238	4
-238	val_238	5
-86	val_86	4
-86	val_86	5
-311	val_311	4
-311	val_311	5
-27	val_27	4
-27	val_27	5
-165	val_165	4
-165	val_165	5
diff --git a/sql/hive/src/test/resources/golden/lateral_view_outer-4-96fe3dc4f8116e535c322437a59b5e4e b/sql/hive/src/test/resources/golden/lateral_view_outer-4-96fe3dc4f8116e535c322437a59b5e4e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_outer-5-2ec3aeb923621c64da042402709e64e9 b/sql/hive/src/test/resources/golden/lateral_view_outer-5-2ec3aeb923621c64da042402709e64e9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lateral_view_outer-6-511e4df505342e04c20e50fda8962120 b/sql/hive/src/test/resources/golden/lateral_view_outer-6-511e4df505342e04c20e50fda8962120
deleted file mode 100644
index 3ad56f646ed85..0000000000000
--- a/sql/hive/src/test/resources/golden/lateral_view_outer-6-511e4df505342e04c20e50fda8962120
+++ /dev/null
@@ -1,10 +0,0 @@
-238	NULL	NULL
-86	NULL	NULL
-311	["val_311","val_311"]	val_311
-311	["val_311","val_311"]	val_311
-27	NULL	NULL
-165	NULL	NULL
-409	["val_409","val_409"]	val_409
-409	["val_409","val_409"]	val_409
-255	NULL	NULL
-278	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/lateral_view_ppd-10-a537ad7282d1c9957cdae74ad87c790b b/sql/hive/src/test/resources/golden/lateral_view_ppd-10-a537ad7282d1c9957cdae74ad87c790b
new file mode 100644
index 0000000000000..cbeb8081f06c6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/lateral_view_ppd-10-a537ad7282d1c9957cdae74ad87c790b
@@ -0,0 +1,6 @@
+val_0	2
+val_0	3
+val_0	2
+val_0	3
+val_0	2
+val_0	3
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/lateral_view_ppd-9-dc6fea663d875b082d38bd326d21cd95
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_part_external_location-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/lateral_view_ppd-9-dc6fea663d875b082d38bd326d21cd95
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-5-9c307c0559d735960ce77efa95b2b17b b/sql/hive/src/test/resources/golden/leftsemijoin-5-9c307c0559d735960ce77efa95b2b17b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/leftsemijoin-5-aba449db0d4fe6dc9771426e102bb543
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_20_part_managed_location-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/leftsemijoin-5-aba449db0d4fe6dc9771426e102bb543
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-6-82921fc96eef547ec0f71027ee88298c b/sql/hive/src/test/resources/golden/leftsemijoin-6-82921fc96eef547ec0f71027ee88298c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/leftsemijoin-6-9f50dce576b019c0be997055b8876621
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_20_part_managed_location-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/leftsemijoin-6-9f50dce576b019c0be997055b8876621
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-7-b30aa3b4a45db6b64bb46b4d9bd32ff0 b/sql/hive/src/test/resources/golden/leftsemijoin-7-b30aa3b4a45db6b64bb46b4d9bd32ff0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-5-93aba23b0fa5247d2ed67e5fa976bc0a b/sql/hive/src/test/resources/golden/leftsemijoin-7-fff6ca40e6048d52dc2d3afc68e8353e
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_20_part_managed_location-5-93aba23b0fa5247d2ed67e5fa976bc0a
rename to sql/hive/src/test/resources/golden/leftsemijoin-7-fff6ca40e6048d52dc2d3afc68e8353e
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-6-a14fc179cf3755a0aa7e63d4a514d394 b/sql/hive/src/test/resources/golden/leftsemijoin_mr-1-5b2e555868faa404ea09928936178181
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_20_part_managed_location-6-a14fc179cf3755a0aa7e63d4a514d394
rename to sql/hive/src/test/resources/golden/leftsemijoin_mr-1-5b2e555868faa404ea09928936178181
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin_mr-1-aa3f07f028027ffd13ab5535dc821593 b/sql/hive/src/test/resources/golden/leftsemijoin_mr-1-aa3f07f028027ffd13ab5535dc821593
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin_mr-3-645cf8b871c9b27418d6fa1d1bda9a52 b/sql/hive/src/test/resources/golden/leftsemijoin_mr-3-645cf8b871c9b27418d6fa1d1bda9a52
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-7-308a4e8e07efb2b777d9c7de5abab1d1 b/sql/hive/src/test/resources/golden/leftsemijoin_mr-3-c019cb2a855138da0d0b1e5c67cd6354
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_20_part_managed_location-7-308a4e8e07efb2b777d9c7de5abab1d1
rename to sql/hive/src/test/resources/golden/leftsemijoin_mr-3-c019cb2a855138da0d0b1e5c67cd6354
diff --git a/sql/hive/src/test/resources/golden/length.udf-0-e85ebb8ce5d939964fd87bd13b326c02 b/sql/hive/src/test/resources/golden/length.udf-0-e85ebb8ce5d939964fd87bd13b326c02
index bf0d87ab1b2b0..b8626c4cff284 100644
--- a/sql/hive/src/test/resources/golden/length.udf-0-e85ebb8ce5d939964fd87bd13b326c02
+++ b/sql/hive/src/test/resources/golden/length.udf-0-e85ebb8ce5d939964fd87bd13b326c02
@@ -1 +1 @@
-4
\ No newline at end of file
+4
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-0-79b294d0081c3dfd36c5b8b5e78dc7fb b/sql/hive/src/test/resources/golden/limit_pushdown-0-79b294d0081c3dfd36c5b8b5e78dc7fb
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-0-79b294d0081c3dfd36c5b8b5e78dc7fb
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-1-d0a93f40892e3894460553b443c77428 b/sql/hive/src/test/resources/golden/limit_pushdown-1-d0a93f40892e3894460553b443c77428
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-1-d0a93f40892e3894460553b443c77428
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-10-19842c7708a0787e59cc5e6b8c3a4250 b/sql/hive/src/test/resources/golden/limit_pushdown-10-19842c7708a0787e59cc5e6b8c3a4250
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-11-b435672262fc3c02d1ed1a93ff8d856f b/sql/hive/src/test/resources/golden/limit_pushdown-11-b435672262fc3c02d1ed1a93ff8d856f
deleted file mode 100644
index 9e3b31ad52c13..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-11-b435672262fc3c02d1ed1a93ff8d856f
+++ /dev/null
@@ -1,20 +0,0 @@
-0
-2
-4
-5
-8
-9
-10
-11
-12
-15
-17
-18
-19
-20
-24
-26
-27
-28
-30
-33
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-12-a3516c212d6c79986536edbd9c961098 b/sql/hive/src/test/resources/golden/limit_pushdown-12-a3516c212d6c79986536edbd9c961098
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-13-93906f7e87e5e3dc4b9590ec13b6af78 b/sql/hive/src/test/resources/golden/limit_pushdown-13-93906f7e87e5e3dc4b9590ec13b6af78
deleted file mode 100644
index ebf6c0424c26d..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-13-93906f7e87e5e3dc4b9590ec13b6af78
+++ /dev/null
@@ -1,20 +0,0 @@
-0	1
-2	1
-4	1
-5	1
-8	1
-9	1
-10	1
-11	1
-12	1
-15	1
-17	1
-18	1
-19	1
-20	1
-24	1
-26	1
-27	1
-28	1
-30	1
-33	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-14-cfae77212d164efb18208f71332fd368 b/sql/hive/src/test/resources/golden/limit_pushdown-14-cfae77212d164efb18208f71332fd368
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-15-62a90d3ba54cc68055b3502c3864a3c1 b/sql/hive/src/test/resources/golden/limit_pushdown-15-62a90d3ba54cc68055b3502c3864a3c1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-16-b618008e10acc4ee96b6ea2dbdf0f32 b/sql/hive/src/test/resources/golden/limit_pushdown-16-b618008e10acc4ee96b6ea2dbdf0f32
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-17-11c349c0db0f869be88351256650fe60 b/sql/hive/src/test/resources/golden/limit_pushdown-17-11c349c0db0f869be88351256650fe60
deleted file mode 100644
index 153dcec21bc5b..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-17-11c349c0db0f869be88351256650fe60
+++ /dev/null
@@ -1,20 +0,0 @@
-val_0	0
-val_2	2
-val_4	4
-val_8	8
-val_9	9
-val_10	10
-val_11	11
-val_5	15
-val_17	17
-val_19	19
-val_20	20
-val_12	24
-val_27	27
-val_28	28
-val_30	30
-val_15	30
-val_33	33
-val_34	34
-val_18	36
-val_41	41
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-18-66ecdd019c936dec1ab5c9389c9071b3 b/sql/hive/src/test/resources/golden/limit_pushdown-18-66ecdd019c936dec1ab5c9389c9071b3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-19-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/limit_pushdown-19-67e864faaff4c6b2a8e1c9fbd188bb66
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-19-67e864faaff4c6b2a8e1c9fbd188bb66
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-2-4d1e292b00635298240ff909be64dce4 b/sql/hive/src/test/resources/golden/limit_pushdown-2-4d1e292b00635298240ff909be64dce4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-20-d4de935f7a059cce44889c6ba69cfddb b/sql/hive/src/test/resources/golden/limit_pushdown-20-d4de935f7a059cce44889c6ba69cfddb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-21-f04dee0f94443ca01320657897cbb914 b/sql/hive/src/test/resources/golden/limit_pushdown-21-f04dee0f94443ca01320657897cbb914
deleted file mode 100644
index ae8f0265b71ca..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-21-f04dee0f94443ca01320657897cbb914
+++ /dev/null
@@ -1,20 +0,0 @@
-val_0	0
-val_10	10
-val_100	200
-val_103	206
-val_104	208
-val_105	105
-val_11	11
-val_111	111
-val_113	226
-val_114	114
-val_116	116
-val_118	236
-val_119	357
-val_12	24
-val_120	240
-val_125	250
-val_126	126
-val_128	384
-val_129	258
-val_131	131
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-22-4d378725e22e7e48c861983ba935bf5e b/sql/hive/src/test/resources/golden/limit_pushdown-22-4d378725e22e7e48c861983ba935bf5e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-22-4d378725e22e7e48c861983ba935bf5e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-23-53b4dd4899cb2ba681a801a3b5ca155a b/sql/hive/src/test/resources/golden/limit_pushdown-23-53b4dd4899cb2ba681a801a3b5ca155a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-24-cb58113b9dfd3e93d4623cba6a090976 b/sql/hive/src/test/resources/golden/limit_pushdown-24-cb58113b9dfd3e93d4623cba6a090976
deleted file mode 100644
index e604892422d59..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-24-cb58113b9dfd3e93d4623cba6a090976
+++ /dev/null
@@ -1,100 +0,0 @@
-0	val_0	val_0	val_0	val_0	val_0	val_0	val_0	val_0
-0	val_0	val_0	val_0	val_0	val_0	val_0	val_0	val_0
-0	val_0	val_0	val_0	val_0	val_0	val_0	val_0	val_0
-2	val_2	val_2	val_2	val_2	val_2	val_2	val_2	val_2
-4	val_4	val_4	val_4	val_4	val_4	val_4	val_4	val_4
-5	val_5	val_5	val_5	val_5	val_5	val_5	val_5	val_5
-5	val_5	val_5	val_5	val_5	val_5	val_5	val_5	val_5
-5	val_5	val_5	val_5	val_5	val_5	val_5	val_5	val_5
-8	val_8	val_8	val_8	val_8	val_8	val_8	val_8	val_8
-9	val_9	val_9	val_9	val_9	val_9	val_9	val_9	val_9
-10	val_10	val_10	val_10	val_10	val_10	val_10	val_10	val_10
-11	val_11	val_11	val_11	val_11	val_11	val_11	val_11	val_11
-12	val_12	val_12	val_12	val_12	val_12	val_12	val_12	val_12
-12	val_12	val_12	val_12	val_12	val_12	val_12	val_12	val_12
-15	val_15	val_15	val_15	val_15	val_15	val_15	val_15	val_15
-15	val_15	val_15	val_15	val_15	val_15	val_15	val_15	val_15
-17	val_17	val_17	val_17	val_17	val_17	val_17	val_17	val_17
-18	val_18	val_18	val_18	val_18	val_18	val_18	val_18	val_18
-18	val_18	val_18	val_18	val_18	val_18	val_18	val_18	val_18
-19	val_19	val_19	val_19	val_19	val_19	val_19	val_19	val_19
-20	val_20	val_20	val_20	val_20	val_20	val_20	val_20	val_20
-24	val_24	val_24	val_24	val_24	val_24	val_24	val_24	val_24
-24	val_24	val_24	val_24	val_24	val_24	val_24	val_24	val_24
-26	val_26	val_26	val_26	val_26	val_26	val_26	val_26	val_26
-26	val_26	val_26	val_26	val_26	val_26	val_26	val_26	val_26
-27	val_27	val_27	val_27	val_27	val_27	val_27	val_27	val_27
-28	val_28	val_28	val_28	val_28	val_28	val_28	val_28	val_28
-30	val_30	val_30	val_30	val_30	val_30	val_30	val_30	val_30
-33	val_33	val_33	val_33	val_33	val_33	val_33	val_33	val_33
-34	val_34	val_34	val_34	val_34	val_34	val_34	val_34	val_34
-35	val_35	val_35	val_35	val_35	val_35	val_35	val_35	val_35
-35	val_35	val_35	val_35	val_35	val_35	val_35	val_35	val_35
-35	val_35	val_35	val_35	val_35	val_35	val_35	val_35	val_35
-37	val_37	val_37	val_37	val_37	val_37	val_37	val_37	val_37
-37	val_37	val_37	val_37	val_37	val_37	val_37	val_37	val_37
-41	val_41	val_41	val_41	val_41	val_41	val_41	val_41	val_41
-42	val_42	val_42	val_42	val_42	val_42	val_42	val_42	val_42
-42	val_42	val_42	val_42	val_42	val_42	val_42	val_42	val_42
-43	val_43	val_43	val_43	val_43	val_43	val_43	val_43	val_43
-44	val_44	val_44	val_44	val_44	val_44	val_44	val_44	val_44
-47	val_47	val_47	val_47	val_47	val_47	val_47	val_47	val_47
-51	val_51	val_51	val_51	val_51	val_51	val_51	val_51	val_51
-51	val_51	val_51	val_51	val_51	val_51	val_51	val_51	val_51
-53	val_53	val_53	val_53	val_53	val_53	val_53	val_53	val_53
-54	val_54	val_54	val_54	val_54	val_54	val_54	val_54	val_54
-57	val_57	val_57	val_57	val_57	val_57	val_57	val_57	val_57
-58	val_58	val_58	val_58	val_58	val_58	val_58	val_58	val_58
-58	val_58	val_58	val_58	val_58	val_58	val_58	val_58	val_58
-64	val_64	val_64	val_64	val_64	val_64	val_64	val_64	val_64
-65	val_65	val_65	val_65	val_65	val_65	val_65	val_65	val_65
-66	val_66	val_66	val_66	val_66	val_66	val_66	val_66	val_66
-67	val_67	val_67	val_67	val_67	val_67	val_67	val_67	val_67
-67	val_67	val_67	val_67	val_67	val_67	val_67	val_67	val_67
-69	val_69	val_69	val_69	val_69	val_69	val_69	val_69	val_69
-70	val_70	val_70	val_70	val_70	val_70	val_70	val_70	val_70
-70	val_70	val_70	val_70	val_70	val_70	val_70	val_70	val_70
-70	val_70	val_70	val_70	val_70	val_70	val_70	val_70	val_70
-72	val_72	val_72	val_72	val_72	val_72	val_72	val_72	val_72
-72	val_72	val_72	val_72	val_72	val_72	val_72	val_72	val_72
-74	val_74	val_74	val_74	val_74	val_74	val_74	val_74	val_74
-76	val_76	val_76	val_76	val_76	val_76	val_76	val_76	val_76
-76	val_76	val_76	val_76	val_76	val_76	val_76	val_76	val_76
-77	val_77	val_77	val_77	val_77	val_77	val_77	val_77	val_77
-78	val_78	val_78	val_78	val_78	val_78	val_78	val_78	val_78
-80	val_80	val_80	val_80	val_80	val_80	val_80	val_80	val_80
-82	val_82	val_82	val_82	val_82	val_82	val_82	val_82	val_82
-83	val_83	val_83	val_83	val_83	val_83	val_83	val_83	val_83
-83	val_83	val_83	val_83	val_83	val_83	val_83	val_83	val_83
-84	val_84	val_84	val_84	val_84	val_84	val_84	val_84	val_84
-84	val_84	val_84	val_84	val_84	val_84	val_84	val_84	val_84
-85	val_85	val_85	val_85	val_85	val_85	val_85	val_85	val_85
-86	val_86	val_86	val_86	val_86	val_86	val_86	val_86	val_86
-87	val_87	val_87	val_87	val_87	val_87	val_87	val_87	val_87
-90	val_90	val_90	val_90	val_90	val_90	val_90	val_90	val_90
-90	val_90	val_90	val_90	val_90	val_90	val_90	val_90	val_90
-90	val_90	val_90	val_90	val_90	val_90	val_90	val_90	val_90
-92	val_92	val_92	val_92	val_92	val_92	val_92	val_92	val_92
-95	val_95	val_95	val_95	val_95	val_95	val_95	val_95	val_95
-95	val_95	val_95	val_95	val_95	val_95	val_95	val_95	val_95
-96	val_96	val_96	val_96	val_96	val_96	val_96	val_96	val_96
-97	val_97	val_97	val_97	val_97	val_97	val_97	val_97	val_97
-97	val_97	val_97	val_97	val_97	val_97	val_97	val_97	val_97
-98	val_98	val_98	val_98	val_98	val_98	val_98	val_98	val_98
-98	val_98	val_98	val_98	val_98	val_98	val_98	val_98	val_98
-100	val_100	val_100	val_100	val_100	val_100	val_100	val_100	val_100
-100	val_100	val_100	val_100	val_100	val_100	val_100	val_100	val_100
-103	val_103	val_103	val_103	val_103	val_103	val_103	val_103	val_103
-103	val_103	val_103	val_103	val_103	val_103	val_103	val_103	val_103
-104	val_104	val_104	val_104	val_104	val_104	val_104	val_104	val_104
-104	val_104	val_104	val_104	val_104	val_104	val_104	val_104	val_104
-105	val_105	val_105	val_105	val_105	val_105	val_105	val_105	val_105
-111	val_111	val_111	val_111	val_111	val_111	val_111	val_111	val_111
-113	val_113	val_113	val_113	val_113	val_113	val_113	val_113	val_113
-113	val_113	val_113	val_113	val_113	val_113	val_113	val_113	val_113
-114	val_114	val_114	val_114	val_114	val_114	val_114	val_114	val_114
-116	val_116	val_116	val_116	val_116	val_116	val_116	val_116	val_116
-118	val_118	val_118	val_118	val_118	val_118	val_118	val_118	val_118
-118	val_118	val_118	val_118	val_118	val_118	val_118	val_118	val_118
-119	val_119	val_119	val_119	val_119	val_119	val_119	val_119	val_119
-119	val_119	val_119	val_119	val_119	val_119	val_119	val_119	val_119
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-25-1b6cdcbc002f5f1bde0f369a0dd1632a b/sql/hive/src/test/resources/golden/limit_pushdown-25-1b6cdcbc002f5f1bde0f369a0dd1632a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-26-e691b284f1b830d7b83a36750105737c b/sql/hive/src/test/resources/golden/limit_pushdown-26-e691b284f1b830d7b83a36750105737c
deleted file mode 100644
index 92dc6ce9dbf9e..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-26-e691b284f1b830d7b83a36750105737c
+++ /dev/null
@@ -1,100 +0,0 @@
-0
-200
-206
-208
-105
-10
-111
-226
-114
-116
-236
-357
-11
-240
-250
-126
-384
-258
-24
-131
-133
-268
-136
-274
-552
-143
-145
-292
-298
-150
-304
-153
-155
-156
-157
-158
-30
-160
-162
-163
-328
-330
-166
-501
-168
-676
-170
-344
-348
-350
-352
-177
-178
-358
-17
-180
-181
-183
-186
-561
-189
-36
-190
-382
-192
-579
-194
-390
-196
-394
-597
-19
-400
-201
-202
-406
-410
-414
-624
-418
-20
-426
-214
-432
-434
-218
-438
-442
-222
-446
-448
-226
-228
-458
-1150
-466
-235
-474
-476
-478
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-3-cc674af3ce71c06670e366932df43462 b/sql/hive/src/test/resources/golden/limit_pushdown-3-cc674af3ce71c06670e366932df43462
deleted file mode 100644
index 95f5492558a9b..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-3-cc674af3ce71c06670e366932df43462
+++ /dev/null
@@ -1,20 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-2	val_2
-4	val_4
-5	val_5
-5	val_5
-5	val_5
-8	val_8
-9	val_9
-10	val_10
-11	val_11
-12	val_12
-12	val_12
-15	val_15
-15	val_15
-17	val_17
-18	val_18
-18	val_18
-19	val_19
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-4-81bbb7300da27bc63f7a139677faac3f b/sql/hive/src/test/resources/golden/limit_pushdown-4-81bbb7300da27bc63f7a139677faac3f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-5-c0664bc5bff121823ac2f37df13d6bd7 b/sql/hive/src/test/resources/golden/limit_pushdown-5-c0664bc5bff121823ac2f37df13d6bd7
deleted file mode 100644
index 0bac402cfa497..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-5-c0664bc5bff121823ac2f37df13d6bd7
+++ /dev/null
@@ -1,20 +0,0 @@
-498	val_498
-498	val_498
-498	val_498
-497	val_497
-496	val_496
-495	val_495
-494	val_494
-493	val_493
-492	val_492
-492	val_492
-491	val_491
-490	val_490
-489	val_489
-489	val_489
-489	val_489
-489	val_489
-487	val_487
-485	val_485
-484	val_484
-483	val_483
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-6-b722630e977a5ca509234fc417d7b30 b/sql/hive/src/test/resources/golden/limit_pushdown-6-b722630e977a5ca509234fc417d7b30
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-7-e8c8087a3910ea53a725b01b9d05b921 b/sql/hive/src/test/resources/golden/limit_pushdown-7-e8c8087a3910ea53a725b01b9d05b921
deleted file mode 100644
index b57d0cc951566..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-7-e8c8087a3910ea53a725b01b9d05b921
+++ /dev/null
@@ -1,20 +0,0 @@
-val_0	3
-val_10	11
-val_100	202
-val_103	208
-val_104	210
-val_105	106
-val_11	12
-val_111	112
-val_113	228
-val_114	115
-val_116	117
-val_118	238
-val_119	360
-val_12	26
-val_120	242
-val_125	252
-val_126	127
-val_128	387
-val_129	260
-val_131	132
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-8-6194a19c1d5065731ec26ea1d5a390e1 b/sql/hive/src/test/resources/golden/limit_pushdown-8-6194a19c1d5065731ec26ea1d5a390e1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown-9-1446f634128a1c3e7cd224ea82452d0a b/sql/hive/src/test/resources/golden/limit_pushdown-9-1446f634128a1c3e7cd224ea82452d0a
deleted file mode 100644
index 0e95c446f3b0d..0000000000000
--- a/sql/hive/src/test/resources/golden/limit_pushdown-9-1446f634128a1c3e7cd224ea82452d0a
+++ /dev/null
@@ -1,20 +0,0 @@
-val_0	1.0
-val_10	11.0
-val_100	101.0
-val_103	104.0
-val_104	105.0
-val_105	106.0
-val_11	12.0
-val_111	112.0
-val_113	114.0
-val_114	115.0
-val_116	117.0
-val_118	119.0
-val_119	120.0
-val_12	13.0
-val_120	121.0
-val_125	126.0
-val_126	127.0
-val_128	129.0
-val_129	130.0
-val_131	132.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/limit_pushdown_negative-7-fb7bf3783d4fb43673a202c4111d9092 b/sql/hive/src/test/resources/golden/limit_pushdown_negative-7-fb7bf3783d4fb43673a202c4111d9092
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/literal_double-0-10ef1098e35d900983be3814de8f974f b/sql/hive/src/test/resources/golden/literal_double-0-10ef1098e35d900983be3814de8f974f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/literal_double-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_17_part_managed-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/literal_double-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-2-f5d86ed3cbc46bb0c7619703081d5873 b/sql/hive/src/test/resources/golden/literal_double-1-10ef1098e35d900983be3814de8f974f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_21_export_authsuccess-2-f5d86ed3cbc46bb0c7619703081d5873
rename to sql/hive/src/test/resources/golden/literal_double-1-10ef1098e35d900983be3814de8f974f
diff --git a/sql/hive/src/test/resources/golden/literal_double-1-3863c17e03c9c1cd68452106a8721d13 b/sql/hive/src/test/resources/golden/literal_double-1-3863c17e03c9c1cd68452106a8721d13
deleted file mode 100644
index 69505e73feb1d..0000000000000
--- a/sql/hive/src/test/resources/golden/literal_double-1-3863c17e03c9c1cd68452106a8721d13
+++ /dev/null
@@ -1 +0,0 @@
-3.14	-3.14	3.14E8	3.14E-8	-3.14E8	-3.14E-8	3.14E8	3.14E8	3.14E-8
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/literal_double-2-3863c17e03c9c1cd68452106a8721d13 b/sql/hive/src/test/resources/golden/literal_double-2-3863c17e03c9c1cd68452106a8721d13
new file mode 100644
index 0000000000000..24ca45210038a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/literal_double-2-3863c17e03c9c1cd68452106a8721d13
@@ -0,0 +1 @@
+3.14	-3.14	3.14E8	3.14E-8	-3.14E8	-3.14E-8	3.14E8	3.14E8	3.14E-8
diff --git a/sql/hive/src/test/resources/golden/exim_17_part_managed-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/literal_ints-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_17_part_managed-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/literal_ints-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/literal_ints-0-5ffd1b49cdda4149aef2c61c53a56890 b/sql/hive/src/test/resources/golden/literal_ints-0-5ffd1b49cdda4149aef2c61c53a56890
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-3-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/literal_ints-1-5ffd1b49cdda4149aef2c61c53a56890
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_21_export_authsuccess-3-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/literal_ints-1-5ffd1b49cdda4149aef2c61c53a56890
diff --git a/sql/hive/src/test/resources/golden/literal_ints-1-b41c42ce2f6ba483b68bb08752b95ec4 b/sql/hive/src/test/resources/golden/literal_ints-1-b41c42ce2f6ba483b68bb08752b95ec4
deleted file mode 100644
index 6dc85dd550540..0000000000000
--- a/sql/hive/src/test/resources/golden/literal_ints-1-b41c42ce2f6ba483b68bb08752b95ec4
+++ /dev/null
@@ -1 +0,0 @@
-100	100	100	100
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/literal_ints-2-b41c42ce2f6ba483b68bb08752b95ec4 b/sql/hive/src/test/resources/golden/literal_ints-2-b41c42ce2f6ba483b68bb08752b95ec4
new file mode 100644
index 0000000000000..46a059f07aca6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/literal_ints-2-b41c42ce2f6ba483b68bb08752b95ec4
@@ -0,0 +1 @@
+100	100	100	100
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/literal_string-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_18_part_external-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/literal_string-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/literal_string-0-9b48d41e5c9e41ddc070e2fd31ace15 b/sql/hive/src/test/resources/golden/literal_string-0-9b48d41e5c9e41ddc070e2fd31ace15
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/literal_string-1-2cf4b7268b47246afdf6c792acca379d b/sql/hive/src/test/resources/golden/literal_string-1-2cf4b7268b47246afdf6c792acca379d
deleted file mode 100644
index 6323f4efa99c7..0000000000000
--- a/sql/hive/src/test/resources/golden/literal_string-1-2cf4b7268b47246afdf6c792acca379d
+++ /dev/null
@@ -1 +0,0 @@
-facebook	facebook	facebook	facebook	facebook	facebook	facebook	facebook	facebook	facebook
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-5-760e902318ec521eed07cf23e0f256a2 b/sql/hive/src/test/resources/golden/literal_string-1-9b48d41e5c9e41ddc070e2fd31ace15
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_21_export_authsuccess-5-760e902318ec521eed07cf23e0f256a2
rename to sql/hive/src/test/resources/golden/literal_string-1-9b48d41e5c9e41ddc070e2fd31ace15
diff --git a/sql/hive/src/test/resources/golden/literal_string-2-2cf4b7268b47246afdf6c792acca379d b/sql/hive/src/test/resources/golden/literal_string-2-2cf4b7268b47246afdf6c792acca379d
new file mode 100644
index 0000000000000..1d05317d62547
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/literal_string-2-2cf4b7268b47246afdf6c792acca379d
@@ -0,0 +1 @@
+facebook	facebook	facebook	facebook	facebook	facebook	facebook	facebook	facebook	facebook
diff --git a/sql/hive/src/test/resources/golden/load_binary_data-0-491edd0c42ceb79e799ba50555bc8c15 b/sql/hive/src/test/resources/golden/load_binary_data-0-491edd0c42ceb79e799ba50555bc8c15
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_binary_data-1-5d72f8449b69df3c08e3f444f09428bc b/sql/hive/src/test/resources/golden/load_binary_data-1-5d72f8449b69df3c08e3f444f09428bc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_binary_data-2-242b1655c7e7325ee9f26552ea8fc25 b/sql/hive/src/test/resources/golden/load_binary_data-2-242b1655c7e7325ee9f26552ea8fc25
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_binary_data-3-2a72df8d3e398d0963ef91162ce7d268 b/sql/hive/src/test/resources/golden/load_binary_data-3-2a72df8d3e398d0963ef91162ce7d268
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_binary_data-4-3ebc340f7f63740f8534706d42dd37ca b/sql/hive/src/test/resources/golden/load_binary_data-4-3ebc340f7f63740f8534706d42dd37ca
deleted file mode 100644
index de5212a3c320f..0000000000000
Binary files a/sql/hive/src/test/resources/golden/load_binary_data-4-3ebc340f7f63740f8534706d42dd37ca and /dev/null differ
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part1-10-c66fea7ad025cd1f513a98a4cc1036b1 b/sql/hive/src/test/resources/golden/load_dyn_part1-10-c66fea7ad025cd1f513a98a4cc1036b1
new file mode 100644
index 0000000000000..04b36182974f9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/load_dyn_part1-10-c66fea7ad025cd1f513a98a4cc1036b1
@@ -0,0 +1,1000 @@
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+2	val_2	2008-04-08	11
+4	val_4	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+8	val_8	2008-04-08	11
+9	val_9	2008-04-08	11
+10	val_10	2008-04-08	11
+11	val_11	2008-04-08	11
+12	val_12	2008-04-08	11
+12	val_12	2008-04-08	11
+15	val_15	2008-04-08	11
+15	val_15	2008-04-08	11
+17	val_17	2008-04-08	11
+18	val_18	2008-04-08	11
+18	val_18	2008-04-08	11
+19	val_19	2008-04-08	11
+20	val_20	2008-04-08	11
+24	val_24	2008-04-08	11
+24	val_24	2008-04-08	11
+26	val_26	2008-04-08	11
+26	val_26	2008-04-08	11
+27	val_27	2008-04-08	11
+28	val_28	2008-04-08	11
+30	val_30	2008-04-08	11
+33	val_33	2008-04-08	11
+34	val_34	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+37	val_37	2008-04-08	11
+37	val_37	2008-04-08	11
+41	val_41	2008-04-08	11
+42	val_42	2008-04-08	11
+42	val_42	2008-04-08	11
+43	val_43	2008-04-08	11
+44	val_44	2008-04-08	11
+47	val_47	2008-04-08	11
+51	val_51	2008-04-08	11
+51	val_51	2008-04-08	11
+53	val_53	2008-04-08	11
+54	val_54	2008-04-08	11
+57	val_57	2008-04-08	11
+58	val_58	2008-04-08	11
+58	val_58	2008-04-08	11
+64	val_64	2008-04-08	11
+65	val_65	2008-04-08	11
+66	val_66	2008-04-08	11
+67	val_67	2008-04-08	11
+67	val_67	2008-04-08	11
+69	val_69	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+72	val_72	2008-04-08	11
+72	val_72	2008-04-08	11
+74	val_74	2008-04-08	11
+76	val_76	2008-04-08	11
+76	val_76	2008-04-08	11
+77	val_77	2008-04-08	11
+78	val_78	2008-04-08	11
+80	val_80	2008-04-08	11
+82	val_82	2008-04-08	11
+83	val_83	2008-04-08	11
+83	val_83	2008-04-08	11
+84	val_84	2008-04-08	11
+84	val_84	2008-04-08	11
+85	val_85	2008-04-08	11
+86	val_86	2008-04-08	11
+87	val_87	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+92	val_92	2008-04-08	11
+95	val_95	2008-04-08	11
+95	val_95	2008-04-08	11
+96	val_96	2008-04-08	11
+97	val_97	2008-04-08	11
+97	val_97	2008-04-08	11
+98	val_98	2008-04-08	11
+98	val_98	2008-04-08	11
+100	val_100	2008-04-08	11
+100	val_100	2008-04-08	11
+103	val_103	2008-04-08	11
+103	val_103	2008-04-08	11
+104	val_104	2008-04-08	11
+104	val_104	2008-04-08	11
+105	val_105	2008-04-08	11
+111	val_111	2008-04-08	11
+113	val_113	2008-04-08	11
+113	val_113	2008-04-08	11
+114	val_114	2008-04-08	11
+116	val_116	2008-04-08	11
+118	val_118	2008-04-08	11
+118	val_118	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+120	val_120	2008-04-08	11
+120	val_120	2008-04-08	11
+125	val_125	2008-04-08	11
+125	val_125	2008-04-08	11
+126	val_126	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+129	val_129	2008-04-08	11
+129	val_129	2008-04-08	11
+131	val_131	2008-04-08	11
+133	val_133	2008-04-08	11
+134	val_134	2008-04-08	11
+134	val_134	2008-04-08	11
+136	val_136	2008-04-08	11
+137	val_137	2008-04-08	11
+137	val_137	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+143	val_143	2008-04-08	11
+145	val_145	2008-04-08	11
+146	val_146	2008-04-08	11
+146	val_146	2008-04-08	11
+149	val_149	2008-04-08	11
+149	val_149	2008-04-08	11
+150	val_150	2008-04-08	11
+152	val_152	2008-04-08	11
+152	val_152	2008-04-08	11
+153	val_153	2008-04-08	11
+155	val_155	2008-04-08	11
+156	val_156	2008-04-08	11
+157	val_157	2008-04-08	11
+158	val_158	2008-04-08	11
+160	val_160	2008-04-08	11
+162	val_162	2008-04-08	11
+163	val_163	2008-04-08	11
+164	val_164	2008-04-08	11
+164	val_164	2008-04-08	11
+165	val_165	2008-04-08	11
+165	val_165	2008-04-08	11
+166	val_166	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+168	val_168	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+170	val_170	2008-04-08	11
+172	val_172	2008-04-08	11
+172	val_172	2008-04-08	11
+174	val_174	2008-04-08	11
+174	val_174	2008-04-08	11
+175	val_175	2008-04-08	11
+175	val_175	2008-04-08	11
+176	val_176	2008-04-08	11
+176	val_176	2008-04-08	11
+177	val_177	2008-04-08	11
+178	val_178	2008-04-08	11
+179	val_179	2008-04-08	11
+179	val_179	2008-04-08	11
+180	val_180	2008-04-08	11
+181	val_181	2008-04-08	11
+183	val_183	2008-04-08	11
+186	val_186	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+189	val_189	2008-04-08	11
+190	val_190	2008-04-08	11
+191	val_191	2008-04-08	11
+191	val_191	2008-04-08	11
+192	val_192	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+194	val_194	2008-04-08	11
+195	val_195	2008-04-08	11
+195	val_195	2008-04-08	11
+196	val_196	2008-04-08	11
+197	val_197	2008-04-08	11
+197	val_197	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+200	val_200	2008-04-08	11
+200	val_200	2008-04-08	11
+201	val_201	2008-04-08	11
+202	val_202	2008-04-08	11
+203	val_203	2008-04-08	11
+203	val_203	2008-04-08	11
+205	val_205	2008-04-08	11
+205	val_205	2008-04-08	11
+207	val_207	2008-04-08	11
+207	val_207	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+209	val_209	2008-04-08	11
+209	val_209	2008-04-08	11
+213	val_213	2008-04-08	11
+213	val_213	2008-04-08	11
+214	val_214	2008-04-08	11
+216	val_216	2008-04-08	11
+216	val_216	2008-04-08	11
+217	val_217	2008-04-08	11
+217	val_217	2008-04-08	11
+218	val_218	2008-04-08	11
+219	val_219	2008-04-08	11
+219	val_219	2008-04-08	11
+221	val_221	2008-04-08	11
+221	val_221	2008-04-08	11
+222	val_222	2008-04-08	11
+223	val_223	2008-04-08	11
+223	val_223	2008-04-08	11
+224	val_224	2008-04-08	11
+224	val_224	2008-04-08	11
+226	val_226	2008-04-08	11
+228	val_228	2008-04-08	11
+229	val_229	2008-04-08	11
+229	val_229	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+233	val_233	2008-04-08	11
+233	val_233	2008-04-08	11
+235	val_235	2008-04-08	11
+237	val_237	2008-04-08	11
+237	val_237	2008-04-08	11
+238	val_238	2008-04-08	11
+238	val_238	2008-04-08	11
+239	val_239	2008-04-08	11
+239	val_239	2008-04-08	11
+241	val_241	2008-04-08	11
+242	val_242	2008-04-08	11
+242	val_242	2008-04-08	11
+244	val_244	2008-04-08	11
+247	val_247	2008-04-08	11
+248	val_248	2008-04-08	11
+249	val_249	2008-04-08	11
+252	val_252	2008-04-08	11
+255	val_255	2008-04-08	11
+255	val_255	2008-04-08	11
+256	val_256	2008-04-08	11
+256	val_256	2008-04-08	11
+257	val_257	2008-04-08	11
+258	val_258	2008-04-08	11
+260	val_260	2008-04-08	11
+262	val_262	2008-04-08	11
+263	val_263	2008-04-08	11
+265	val_265	2008-04-08	11
+265	val_265	2008-04-08	11
+266	val_266	2008-04-08	11
+272	val_272	2008-04-08	11
+272	val_272	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+274	val_274	2008-04-08	11
+275	val_275	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+278	val_278	2008-04-08	11
+278	val_278	2008-04-08	11
+280	val_280	2008-04-08	11
+280	val_280	2008-04-08	11
+281	val_281	2008-04-08	11
+281	val_281	2008-04-08	11
+282	val_282	2008-04-08	11
+282	val_282	2008-04-08	11
+283	val_283	2008-04-08	11
+284	val_284	2008-04-08	11
+285	val_285	2008-04-08	11
+286	val_286	2008-04-08	11
+287	val_287	2008-04-08	11
+288	val_288	2008-04-08	11
+288	val_288	2008-04-08	11
+289	val_289	2008-04-08	11
+291	val_291	2008-04-08	11
+292	val_292	2008-04-08	11
+296	val_296	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+302	val_302	2008-04-08	11
+305	val_305	2008-04-08	11
+306	val_306	2008-04-08	11
+307	val_307	2008-04-08	11
+307	val_307	2008-04-08	11
+308	val_308	2008-04-08	11
+309	val_309	2008-04-08	11
+309	val_309	2008-04-08	11
+310	val_310	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+315	val_315	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+317	val_317	2008-04-08	11
+317	val_317	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+321	val_321	2008-04-08	11
+321	val_321	2008-04-08	11
+322	val_322	2008-04-08	11
+322	val_322	2008-04-08	11
+323	val_323	2008-04-08	11
+325	val_325	2008-04-08	11
+325	val_325	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+331	val_331	2008-04-08	11
+331	val_331	2008-04-08	11
+332	val_332	2008-04-08	11
+333	val_333	2008-04-08	11
+333	val_333	2008-04-08	11
+335	val_335	2008-04-08	11
+336	val_336	2008-04-08	11
+338	val_338	2008-04-08	11
+339	val_339	2008-04-08	11
+341	val_341	2008-04-08	11
+342	val_342	2008-04-08	11
+342	val_342	2008-04-08	11
+344	val_344	2008-04-08	11
+344	val_344	2008-04-08	11
+345	val_345	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+351	val_351	2008-04-08	11
+353	val_353	2008-04-08	11
+353	val_353	2008-04-08	11
+356	val_356	2008-04-08	11
+360	val_360	2008-04-08	11
+362	val_362	2008-04-08	11
+364	val_364	2008-04-08	11
+365	val_365	2008-04-08	11
+366	val_366	2008-04-08	11
+367	val_367	2008-04-08	11
+367	val_367	2008-04-08	11
+368	val_368	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+373	val_373	2008-04-08	11
+374	val_374	2008-04-08	11
+375	val_375	2008-04-08	11
+377	val_377	2008-04-08	11
+378	val_378	2008-04-08	11
+379	val_379	2008-04-08	11
+382	val_382	2008-04-08	11
+382	val_382	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+386	val_386	2008-04-08	11
+389	val_389	2008-04-08	11
+392	val_392	2008-04-08	11
+393	val_393	2008-04-08	11
+394	val_394	2008-04-08	11
+395	val_395	2008-04-08	11
+395	val_395	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+397	val_397	2008-04-08	11
+397	val_397	2008-04-08	11
+399	val_399	2008-04-08	11
+399	val_399	2008-04-08	11
+400	val_400	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+402	val_402	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+404	val_404	2008-04-08	11
+404	val_404	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+407	val_407	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+411	val_411	2008-04-08	11
+413	val_413	2008-04-08	11
+413	val_413	2008-04-08	11
+414	val_414	2008-04-08	11
+414	val_414	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+418	val_418	2008-04-08	11
+419	val_419	2008-04-08	11
+421	val_421	2008-04-08	11
+424	val_424	2008-04-08	11
+424	val_424	2008-04-08	11
+427	val_427	2008-04-08	11
+429	val_429	2008-04-08	11
+429	val_429	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+432	val_432	2008-04-08	11
+435	val_435	2008-04-08	11
+436	val_436	2008-04-08	11
+437	val_437	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+439	val_439	2008-04-08	11
+439	val_439	2008-04-08	11
+443	val_443	2008-04-08	11
+444	val_444	2008-04-08	11
+446	val_446	2008-04-08	11
+448	val_448	2008-04-08	11
+449	val_449	2008-04-08	11
+452	val_452	2008-04-08	11
+453	val_453	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+455	val_455	2008-04-08	11
+457	val_457	2008-04-08	11
+458	val_458	2008-04-08	11
+458	val_458	2008-04-08	11
+459	val_459	2008-04-08	11
+459	val_459	2008-04-08	11
+460	val_460	2008-04-08	11
+462	val_462	2008-04-08	11
+462	val_462	2008-04-08	11
+463	val_463	2008-04-08	11
+463	val_463	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+467	val_467	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+470	val_470	2008-04-08	11
+472	val_472	2008-04-08	11
+475	val_475	2008-04-08	11
+477	val_477	2008-04-08	11
+478	val_478	2008-04-08	11
+478	val_478	2008-04-08	11
+479	val_479	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+481	val_481	2008-04-08	11
+482	val_482	2008-04-08	11
+483	val_483	2008-04-08	11
+484	val_484	2008-04-08	11
+485	val_485	2008-04-08	11
+487	val_487	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+490	val_490	2008-04-08	11
+491	val_491	2008-04-08	11
+492	val_492	2008-04-08	11
+492	val_492	2008-04-08	11
+493	val_493	2008-04-08	11
+494	val_494	2008-04-08	11
+495	val_495	2008-04-08	11
+496	val_496	2008-04-08	11
+497	val_497	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+2	val_2	2008-04-08	12
+4	val_4	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+8	val_8	2008-04-08	12
+9	val_9	2008-04-08	12
+10	val_10	2008-04-08	12
+11	val_11	2008-04-08	12
+12	val_12	2008-04-08	12
+12	val_12	2008-04-08	12
+15	val_15	2008-04-08	12
+15	val_15	2008-04-08	12
+17	val_17	2008-04-08	12
+18	val_18	2008-04-08	12
+18	val_18	2008-04-08	12
+19	val_19	2008-04-08	12
+20	val_20	2008-04-08	12
+24	val_24	2008-04-08	12
+24	val_24	2008-04-08	12
+26	val_26	2008-04-08	12
+26	val_26	2008-04-08	12
+27	val_27	2008-04-08	12
+28	val_28	2008-04-08	12
+30	val_30	2008-04-08	12
+33	val_33	2008-04-08	12
+34	val_34	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+37	val_37	2008-04-08	12
+37	val_37	2008-04-08	12
+41	val_41	2008-04-08	12
+42	val_42	2008-04-08	12
+42	val_42	2008-04-08	12
+43	val_43	2008-04-08	12
+44	val_44	2008-04-08	12
+47	val_47	2008-04-08	12
+51	val_51	2008-04-08	12
+51	val_51	2008-04-08	12
+53	val_53	2008-04-08	12
+54	val_54	2008-04-08	12
+57	val_57	2008-04-08	12
+58	val_58	2008-04-08	12
+58	val_58	2008-04-08	12
+64	val_64	2008-04-08	12
+65	val_65	2008-04-08	12
+66	val_66	2008-04-08	12
+67	val_67	2008-04-08	12
+67	val_67	2008-04-08	12
+69	val_69	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+72	val_72	2008-04-08	12
+72	val_72	2008-04-08	12
+74	val_74	2008-04-08	12
+76	val_76	2008-04-08	12
+76	val_76	2008-04-08	12
+77	val_77	2008-04-08	12
+78	val_78	2008-04-08	12
+80	val_80	2008-04-08	12
+82	val_82	2008-04-08	12
+83	val_83	2008-04-08	12
+83	val_83	2008-04-08	12
+84	val_84	2008-04-08	12
+84	val_84	2008-04-08	12
+85	val_85	2008-04-08	12
+86	val_86	2008-04-08	12
+87	val_87	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+92	val_92	2008-04-08	12
+95	val_95	2008-04-08	12
+95	val_95	2008-04-08	12
+96	val_96	2008-04-08	12
+97	val_97	2008-04-08	12
+97	val_97	2008-04-08	12
+98	val_98	2008-04-08	12
+98	val_98	2008-04-08	12
+100	val_100	2008-04-08	12
+100	val_100	2008-04-08	12
+103	val_103	2008-04-08	12
+103	val_103	2008-04-08	12
+104	val_104	2008-04-08	12
+104	val_104	2008-04-08	12
+105	val_105	2008-04-08	12
+111	val_111	2008-04-08	12
+113	val_113	2008-04-08	12
+113	val_113	2008-04-08	12
+114	val_114	2008-04-08	12
+116	val_116	2008-04-08	12
+118	val_118	2008-04-08	12
+118	val_118	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+120	val_120	2008-04-08	12
+120	val_120	2008-04-08	12
+125	val_125	2008-04-08	12
+125	val_125	2008-04-08	12
+126	val_126	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+129	val_129	2008-04-08	12
+129	val_129	2008-04-08	12
+131	val_131	2008-04-08	12
+133	val_133	2008-04-08	12
+134	val_134	2008-04-08	12
+134	val_134	2008-04-08	12
+136	val_136	2008-04-08	12
+137	val_137	2008-04-08	12
+137	val_137	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+143	val_143	2008-04-08	12
+145	val_145	2008-04-08	12
+146	val_146	2008-04-08	12
+146	val_146	2008-04-08	12
+149	val_149	2008-04-08	12
+149	val_149	2008-04-08	12
+150	val_150	2008-04-08	12
+152	val_152	2008-04-08	12
+152	val_152	2008-04-08	12
+153	val_153	2008-04-08	12
+155	val_155	2008-04-08	12
+156	val_156	2008-04-08	12
+157	val_157	2008-04-08	12
+158	val_158	2008-04-08	12
+160	val_160	2008-04-08	12
+162	val_162	2008-04-08	12
+163	val_163	2008-04-08	12
+164	val_164	2008-04-08	12
+164	val_164	2008-04-08	12
+165	val_165	2008-04-08	12
+165	val_165	2008-04-08	12
+166	val_166	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+168	val_168	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+170	val_170	2008-04-08	12
+172	val_172	2008-04-08	12
+172	val_172	2008-04-08	12
+174	val_174	2008-04-08	12
+174	val_174	2008-04-08	12
+175	val_175	2008-04-08	12
+175	val_175	2008-04-08	12
+176	val_176	2008-04-08	12
+176	val_176	2008-04-08	12
+177	val_177	2008-04-08	12
+178	val_178	2008-04-08	12
+179	val_179	2008-04-08	12
+179	val_179	2008-04-08	12
+180	val_180	2008-04-08	12
+181	val_181	2008-04-08	12
+183	val_183	2008-04-08	12
+186	val_186	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+189	val_189	2008-04-08	12
+190	val_190	2008-04-08	12
+191	val_191	2008-04-08	12
+191	val_191	2008-04-08	12
+192	val_192	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+194	val_194	2008-04-08	12
+195	val_195	2008-04-08	12
+195	val_195	2008-04-08	12
+196	val_196	2008-04-08	12
+197	val_197	2008-04-08	12
+197	val_197	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+200	val_200	2008-04-08	12
+200	val_200	2008-04-08	12
+201	val_201	2008-04-08	12
+202	val_202	2008-04-08	12
+203	val_203	2008-04-08	12
+203	val_203	2008-04-08	12
+205	val_205	2008-04-08	12
+205	val_205	2008-04-08	12
+207	val_207	2008-04-08	12
+207	val_207	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+209	val_209	2008-04-08	12
+209	val_209	2008-04-08	12
+213	val_213	2008-04-08	12
+213	val_213	2008-04-08	12
+214	val_214	2008-04-08	12
+216	val_216	2008-04-08	12
+216	val_216	2008-04-08	12
+217	val_217	2008-04-08	12
+217	val_217	2008-04-08	12
+218	val_218	2008-04-08	12
+219	val_219	2008-04-08	12
+219	val_219	2008-04-08	12
+221	val_221	2008-04-08	12
+221	val_221	2008-04-08	12
+222	val_222	2008-04-08	12
+223	val_223	2008-04-08	12
+223	val_223	2008-04-08	12
+224	val_224	2008-04-08	12
+224	val_224	2008-04-08	12
+226	val_226	2008-04-08	12
+228	val_228	2008-04-08	12
+229	val_229	2008-04-08	12
+229	val_229	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+233	val_233	2008-04-08	12
+233	val_233	2008-04-08	12
+235	val_235	2008-04-08	12
+237	val_237	2008-04-08	12
+237	val_237	2008-04-08	12
+238	val_238	2008-04-08	12
+238	val_238	2008-04-08	12
+239	val_239	2008-04-08	12
+239	val_239	2008-04-08	12
+241	val_241	2008-04-08	12
+242	val_242	2008-04-08	12
+242	val_242	2008-04-08	12
+244	val_244	2008-04-08	12
+247	val_247	2008-04-08	12
+248	val_248	2008-04-08	12
+249	val_249	2008-04-08	12
+252	val_252	2008-04-08	12
+255	val_255	2008-04-08	12
+255	val_255	2008-04-08	12
+256	val_256	2008-04-08	12
+256	val_256	2008-04-08	12
+257	val_257	2008-04-08	12
+258	val_258	2008-04-08	12
+260	val_260	2008-04-08	12
+262	val_262	2008-04-08	12
+263	val_263	2008-04-08	12
+265	val_265	2008-04-08	12
+265	val_265	2008-04-08	12
+266	val_266	2008-04-08	12
+272	val_272	2008-04-08	12
+272	val_272	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+274	val_274	2008-04-08	12
+275	val_275	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+278	val_278	2008-04-08	12
+278	val_278	2008-04-08	12
+280	val_280	2008-04-08	12
+280	val_280	2008-04-08	12
+281	val_281	2008-04-08	12
+281	val_281	2008-04-08	12
+282	val_282	2008-04-08	12
+282	val_282	2008-04-08	12
+283	val_283	2008-04-08	12
+284	val_284	2008-04-08	12
+285	val_285	2008-04-08	12
+286	val_286	2008-04-08	12
+287	val_287	2008-04-08	12
+288	val_288	2008-04-08	12
+288	val_288	2008-04-08	12
+289	val_289	2008-04-08	12
+291	val_291	2008-04-08	12
+292	val_292	2008-04-08	12
+296	val_296	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+302	val_302	2008-04-08	12
+305	val_305	2008-04-08	12
+306	val_306	2008-04-08	12
+307	val_307	2008-04-08	12
+307	val_307	2008-04-08	12
+308	val_308	2008-04-08	12
+309	val_309	2008-04-08	12
+309	val_309	2008-04-08	12
+310	val_310	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+315	val_315	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+317	val_317	2008-04-08	12
+317	val_317	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+321	val_321	2008-04-08	12
+321	val_321	2008-04-08	12
+322	val_322	2008-04-08	12
+322	val_322	2008-04-08	12
+323	val_323	2008-04-08	12
+325	val_325	2008-04-08	12
+325	val_325	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+331	val_331	2008-04-08	12
+331	val_331	2008-04-08	12
+332	val_332	2008-04-08	12
+333	val_333	2008-04-08	12
+333	val_333	2008-04-08	12
+335	val_335	2008-04-08	12
+336	val_336	2008-04-08	12
+338	val_338	2008-04-08	12
+339	val_339	2008-04-08	12
+341	val_341	2008-04-08	12
+342	val_342	2008-04-08	12
+342	val_342	2008-04-08	12
+344	val_344	2008-04-08	12
+344	val_344	2008-04-08	12
+345	val_345	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+351	val_351	2008-04-08	12
+353	val_353	2008-04-08	12
+353	val_353	2008-04-08	12
+356	val_356	2008-04-08	12
+360	val_360	2008-04-08	12
+362	val_362	2008-04-08	12
+364	val_364	2008-04-08	12
+365	val_365	2008-04-08	12
+366	val_366	2008-04-08	12
+367	val_367	2008-04-08	12
+367	val_367	2008-04-08	12
+368	val_368	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+373	val_373	2008-04-08	12
+374	val_374	2008-04-08	12
+375	val_375	2008-04-08	12
+377	val_377	2008-04-08	12
+378	val_378	2008-04-08	12
+379	val_379	2008-04-08	12
+382	val_382	2008-04-08	12
+382	val_382	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+386	val_386	2008-04-08	12
+389	val_389	2008-04-08	12
+392	val_392	2008-04-08	12
+393	val_393	2008-04-08	12
+394	val_394	2008-04-08	12
+395	val_395	2008-04-08	12
+395	val_395	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+397	val_397	2008-04-08	12
+397	val_397	2008-04-08	12
+399	val_399	2008-04-08	12
+399	val_399	2008-04-08	12
+400	val_400	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+402	val_402	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+404	val_404	2008-04-08	12
+404	val_404	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+407	val_407	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+411	val_411	2008-04-08	12
+413	val_413	2008-04-08	12
+413	val_413	2008-04-08	12
+414	val_414	2008-04-08	12
+414	val_414	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+418	val_418	2008-04-08	12
+419	val_419	2008-04-08	12
+421	val_421	2008-04-08	12
+424	val_424	2008-04-08	12
+424	val_424	2008-04-08	12
+427	val_427	2008-04-08	12
+429	val_429	2008-04-08	12
+429	val_429	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+432	val_432	2008-04-08	12
+435	val_435	2008-04-08	12
+436	val_436	2008-04-08	12
+437	val_437	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+439	val_439	2008-04-08	12
+439	val_439	2008-04-08	12
+443	val_443	2008-04-08	12
+444	val_444	2008-04-08	12
+446	val_446	2008-04-08	12
+448	val_448	2008-04-08	12
+449	val_449	2008-04-08	12
+452	val_452	2008-04-08	12
+453	val_453	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+455	val_455	2008-04-08	12
+457	val_457	2008-04-08	12
+458	val_458	2008-04-08	12
+458	val_458	2008-04-08	12
+459	val_459	2008-04-08	12
+459	val_459	2008-04-08	12
+460	val_460	2008-04-08	12
+462	val_462	2008-04-08	12
+462	val_462	2008-04-08	12
+463	val_463	2008-04-08	12
+463	val_463	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+467	val_467	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+470	val_470	2008-04-08	12
+472	val_472	2008-04-08	12
+475	val_475	2008-04-08	12
+477	val_477	2008-04-08	12
+478	val_478	2008-04-08	12
+478	val_478	2008-04-08	12
+479	val_479	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+481	val_481	2008-04-08	12
+482	val_482	2008-04-08	12
+483	val_483	2008-04-08	12
+484	val_484	2008-04-08	12
+485	val_485	2008-04-08	12
+487	val_487	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+490	val_490	2008-04-08	12
+491	val_491	2008-04-08	12
+492	val_492	2008-04-08	12
+492	val_492	2008-04-08	12
+493	val_493	2008-04-08	12
+494	val_494	2008-04-08	12
+495	val_495	2008-04-08	12
+496	val_496	2008-04-08	12
+497	val_497	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part1-10-fca9513ea05bfb8b7e0e6f337d184d66 b/sql/hive/src/test/resources/golden/load_dyn_part1-10-fca9513ea05bfb8b7e0e6f337d184d66
deleted file mode 100644
index d7a8f25b41301..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part1-10-fca9513ea05bfb8b7e0e6f337d184d66
+++ /dev/null
@@ -1,1000 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
-265	val_265	2008-04-08	11
-193	val_193	2008-04-08	11
-401	val_401	2008-04-08	11
-150	val_150	2008-04-08	11
-273	val_273	2008-04-08	11
-224	val_224	2008-04-08	11
-369	val_369	2008-04-08	11
-66	val_66	2008-04-08	11
-128	val_128	2008-04-08	11
-213	val_213	2008-04-08	11
-146	val_146	2008-04-08	11
-406	val_406	2008-04-08	11
-429	val_429	2008-04-08	11
-374	val_374	2008-04-08	11
-152	val_152	2008-04-08	11
-469	val_469	2008-04-08	11
-145	val_145	2008-04-08	11
-495	val_495	2008-04-08	11
-37	val_37	2008-04-08	11
-327	val_327	2008-04-08	11
-281	val_281	2008-04-08	11
-277	val_277	2008-04-08	11
-209	val_209	2008-04-08	11
-15	val_15	2008-04-08	11
-82	val_82	2008-04-08	11
-403	val_403	2008-04-08	11
-166	val_166	2008-04-08	11
-417	val_417	2008-04-08	11
-430	val_430	2008-04-08	11
-252	val_252	2008-04-08	11
-292	val_292	2008-04-08	11
-219	val_219	2008-04-08	11
-287	val_287	2008-04-08	11
-153	val_153	2008-04-08	11
-193	val_193	2008-04-08	11
-338	val_338	2008-04-08	11
-446	val_446	2008-04-08	11
-459	val_459	2008-04-08	11
-394	val_394	2008-04-08	11
-237	val_237	2008-04-08	11
-482	val_482	2008-04-08	11
-174	val_174	2008-04-08	11
-413	val_413	2008-04-08	11
-494	val_494	2008-04-08	11
-207	val_207	2008-04-08	11
-199	val_199	2008-04-08	11
-466	val_466	2008-04-08	11
-208	val_208	2008-04-08	11
-174	val_174	2008-04-08	11
-399	val_399	2008-04-08	11
-396	val_396	2008-04-08	11
-247	val_247	2008-04-08	11
-417	val_417	2008-04-08	11
-489	val_489	2008-04-08	11
-162	val_162	2008-04-08	11
-377	val_377	2008-04-08	11
-397	val_397	2008-04-08	11
-309	val_309	2008-04-08	11
-365	val_365	2008-04-08	11
-266	val_266	2008-04-08	11
-439	val_439	2008-04-08	11
-342	val_342	2008-04-08	11
-367	val_367	2008-04-08	11
-325	val_325	2008-04-08	11
-167	val_167	2008-04-08	11
-195	val_195	2008-04-08	11
-475	val_475	2008-04-08	11
-17	val_17	2008-04-08	11
-113	val_113	2008-04-08	11
-155	val_155	2008-04-08	11
-203	val_203	2008-04-08	11
-339	val_339	2008-04-08	11
-0	val_0	2008-04-08	11
-455	val_455	2008-04-08	11
-128	val_128	2008-04-08	11
-311	val_311	2008-04-08	11
-316	val_316	2008-04-08	11
-57	val_57	2008-04-08	11
-302	val_302	2008-04-08	11
-205	val_205	2008-04-08	11
-149	val_149	2008-04-08	11
-438	val_438	2008-04-08	11
-345	val_345	2008-04-08	11
-129	val_129	2008-04-08	11
-170	val_170	2008-04-08	11
-20	val_20	2008-04-08	11
-489	val_489	2008-04-08	11
-157	val_157	2008-04-08	11
-378	val_378	2008-04-08	11
-221	val_221	2008-04-08	11
-92	val_92	2008-04-08	11
-111	val_111	2008-04-08	11
-47	val_47	2008-04-08	11
-72	val_72	2008-04-08	11
-4	val_4	2008-04-08	11
-280	val_280	2008-04-08	11
-35	val_35	2008-04-08	11
-427	val_427	2008-04-08	11
-277	val_277	2008-04-08	11
-208	val_208	2008-04-08	11
-356	val_356	2008-04-08	11
-399	val_399	2008-04-08	11
-169	val_169	2008-04-08	11
-382	val_382	2008-04-08	11
-498	val_498	2008-04-08	11
-125	val_125	2008-04-08	11
-386	val_386	2008-04-08	11
-437	val_437	2008-04-08	11
-469	val_469	2008-04-08	11
-192	val_192	2008-04-08	11
-286	val_286	2008-04-08	11
-187	val_187	2008-04-08	11
-176	val_176	2008-04-08	11
-54	val_54	2008-04-08	11
-459	val_459	2008-04-08	11
-51	val_51	2008-04-08	11
-138	val_138	2008-04-08	11
-103	val_103	2008-04-08	11
-239	val_239	2008-04-08	11
-213	val_213	2008-04-08	11
-216	val_216	2008-04-08	11
-430	val_430	2008-04-08	11
-278	val_278	2008-04-08	11
-176	val_176	2008-04-08	11
-289	val_289	2008-04-08	11
-221	val_221	2008-04-08	11
-65	val_65	2008-04-08	11
-318	val_318	2008-04-08	11
-332	val_332	2008-04-08	11
-311	val_311	2008-04-08	11
-275	val_275	2008-04-08	11
-137	val_137	2008-04-08	11
-241	val_241	2008-04-08	11
-83	val_83	2008-04-08	11
-333	val_333	2008-04-08	11
-180	val_180	2008-04-08	11
-284	val_284	2008-04-08	11
-12	val_12	2008-04-08	11
-230	val_230	2008-04-08	11
-181	val_181	2008-04-08	11
-67	val_67	2008-04-08	11
-260	val_260	2008-04-08	11
-404	val_404	2008-04-08	11
-384	val_384	2008-04-08	11
-489	val_489	2008-04-08	11
-353	val_353	2008-04-08	11
-373	val_373	2008-04-08	11
-272	val_272	2008-04-08	11
-138	val_138	2008-04-08	11
-217	val_217	2008-04-08	11
-84	val_84	2008-04-08	11
-348	val_348	2008-04-08	11
-466	val_466	2008-04-08	11
-58	val_58	2008-04-08	11
-8	val_8	2008-04-08	11
-411	val_411	2008-04-08	11
-230	val_230	2008-04-08	11
-208	val_208	2008-04-08	11
-348	val_348	2008-04-08	11
-24	val_24	2008-04-08	11
-463	val_463	2008-04-08	11
-431	val_431	2008-04-08	11
-179	val_179	2008-04-08	11
-172	val_172	2008-04-08	11
-42	val_42	2008-04-08	11
-129	val_129	2008-04-08	11
-158	val_158	2008-04-08	11
-119	val_119	2008-04-08	11
-496	val_496	2008-04-08	11
-0	val_0	2008-04-08	11
-322	val_322	2008-04-08	11
-197	val_197	2008-04-08	11
-468	val_468	2008-04-08	11
-393	val_393	2008-04-08	11
-454	val_454	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-199	val_199	2008-04-08	11
-191	val_191	2008-04-08	11
-418	val_418	2008-04-08	11
-96	val_96	2008-04-08	11
-26	val_26	2008-04-08	11
-165	val_165	2008-04-08	11
-327	val_327	2008-04-08	11
-230	val_230	2008-04-08	11
-205	val_205	2008-04-08	11
-120	val_120	2008-04-08	11
-131	val_131	2008-04-08	11
-51	val_51	2008-04-08	11
-404	val_404	2008-04-08	11
-43	val_43	2008-04-08	11
-436	val_436	2008-04-08	11
-156	val_156	2008-04-08	11
-469	val_469	2008-04-08	11
-468	val_468	2008-04-08	11
-308	val_308	2008-04-08	11
-95	val_95	2008-04-08	11
-196	val_196	2008-04-08	11
-288	val_288	2008-04-08	11
-481	val_481	2008-04-08	11
-457	val_457	2008-04-08	11
-98	val_98	2008-04-08	11
-282	val_282	2008-04-08	11
-197	val_197	2008-04-08	11
-187	val_187	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-409	val_409	2008-04-08	11
-470	val_470	2008-04-08	11
-137	val_137	2008-04-08	11
-369	val_369	2008-04-08	11
-316	val_316	2008-04-08	11
-169	val_169	2008-04-08	11
-413	val_413	2008-04-08	11
-85	val_85	2008-04-08	11
-77	val_77	2008-04-08	11
-0	val_0	2008-04-08	11
-490	val_490	2008-04-08	11
-87	val_87	2008-04-08	11
-364	val_364	2008-04-08	11
-179	val_179	2008-04-08	11
-118	val_118	2008-04-08	11
-134	val_134	2008-04-08	11
-395	val_395	2008-04-08	11
-282	val_282	2008-04-08	11
-138	val_138	2008-04-08	11
-238	val_238	2008-04-08	11
-419	val_419	2008-04-08	11
-15	val_15	2008-04-08	11
-118	val_118	2008-04-08	11
-72	val_72	2008-04-08	11
-90	val_90	2008-04-08	11
-307	val_307	2008-04-08	11
-19	val_19	2008-04-08	11
-435	val_435	2008-04-08	11
-10	val_10	2008-04-08	11
-277	val_277	2008-04-08	11
-273	val_273	2008-04-08	11
-306	val_306	2008-04-08	11
-224	val_224	2008-04-08	11
-309	val_309	2008-04-08	11
-389	val_389	2008-04-08	11
-327	val_327	2008-04-08	11
-242	val_242	2008-04-08	11
-369	val_369	2008-04-08	11
-392	val_392	2008-04-08	11
-272	val_272	2008-04-08	11
-331	val_331	2008-04-08	11
-401	val_401	2008-04-08	11
-242	val_242	2008-04-08	11
-452	val_452	2008-04-08	11
-177	val_177	2008-04-08	11
-226	val_226	2008-04-08	11
-5	val_5	2008-04-08	11
-497	val_497	2008-04-08	11
-402	val_402	2008-04-08	11
-396	val_396	2008-04-08	11
-317	val_317	2008-04-08	11
-395	val_395	2008-04-08	11
-58	val_58	2008-04-08	11
-35	val_35	2008-04-08	11
-336	val_336	2008-04-08	11
-95	val_95	2008-04-08	11
-11	val_11	2008-04-08	11
-168	val_168	2008-04-08	11
-34	val_34	2008-04-08	11
-229	val_229	2008-04-08	11
-233	val_233	2008-04-08	11
-143	val_143	2008-04-08	11
-472	val_472	2008-04-08	11
-322	val_322	2008-04-08	11
-498	val_498	2008-04-08	11
-160	val_160	2008-04-08	11
-195	val_195	2008-04-08	11
-42	val_42	2008-04-08	11
-321	val_321	2008-04-08	11
-430	val_430	2008-04-08	11
-119	val_119	2008-04-08	11
-489	val_489	2008-04-08	11
-458	val_458	2008-04-08	11
-78	val_78	2008-04-08	11
-76	val_76	2008-04-08	11
-41	val_41	2008-04-08	11
-223	val_223	2008-04-08	11
-492	val_492	2008-04-08	11
-149	val_149	2008-04-08	11
-449	val_449	2008-04-08	11
-218	val_218	2008-04-08	11
-228	val_228	2008-04-08	11
-138	val_138	2008-04-08	11
-453	val_453	2008-04-08	11
-30	val_30	2008-04-08	11
-209	val_209	2008-04-08	11
-64	val_64	2008-04-08	11
-468	val_468	2008-04-08	11
-76	val_76	2008-04-08	11
-74	val_74	2008-04-08	11
-342	val_342	2008-04-08	11
-69	val_69	2008-04-08	11
-230	val_230	2008-04-08	11
-33	val_33	2008-04-08	11
-368	val_368	2008-04-08	11
-103	val_103	2008-04-08	11
-296	val_296	2008-04-08	11
-113	val_113	2008-04-08	11
-216	val_216	2008-04-08	11
-367	val_367	2008-04-08	11
-344	val_344	2008-04-08	11
-167	val_167	2008-04-08	11
-274	val_274	2008-04-08	11
-219	val_219	2008-04-08	11
-239	val_239	2008-04-08	11
-485	val_485	2008-04-08	11
-116	val_116	2008-04-08	11
-223	val_223	2008-04-08	11
-256	val_256	2008-04-08	11
-263	val_263	2008-04-08	11
-70	val_70	2008-04-08	11
-487	val_487	2008-04-08	11
-480	val_480	2008-04-08	11
-401	val_401	2008-04-08	11
-288	val_288	2008-04-08	11
-191	val_191	2008-04-08	11
-5	val_5	2008-04-08	11
-244	val_244	2008-04-08	11
-438	val_438	2008-04-08	11
-128	val_128	2008-04-08	11
-467	val_467	2008-04-08	11
-432	val_432	2008-04-08	11
-202	val_202	2008-04-08	11
-316	val_316	2008-04-08	11
-229	val_229	2008-04-08	11
-469	val_469	2008-04-08	11
-463	val_463	2008-04-08	11
-280	val_280	2008-04-08	11
-2	val_2	2008-04-08	11
-35	val_35	2008-04-08	11
-283	val_283	2008-04-08	11
-331	val_331	2008-04-08	11
-235	val_235	2008-04-08	11
-80	val_80	2008-04-08	11
-44	val_44	2008-04-08	11
-193	val_193	2008-04-08	11
-321	val_321	2008-04-08	11
-335	val_335	2008-04-08	11
-104	val_104	2008-04-08	11
-466	val_466	2008-04-08	11
-366	val_366	2008-04-08	11
-175	val_175	2008-04-08	11
-403	val_403	2008-04-08	11
-483	val_483	2008-04-08	11
-53	val_53	2008-04-08	11
-105	val_105	2008-04-08	11
-257	val_257	2008-04-08	11
-406	val_406	2008-04-08	11
-409	val_409	2008-04-08	11
-190	val_190	2008-04-08	11
-406	val_406	2008-04-08	11
-401	val_401	2008-04-08	11
-114	val_114	2008-04-08	11
-258	val_258	2008-04-08	11
-90	val_90	2008-04-08	11
-203	val_203	2008-04-08	11
-262	val_262	2008-04-08	11
-348	val_348	2008-04-08	11
-424	val_424	2008-04-08	11
-12	val_12	2008-04-08	11
-396	val_396	2008-04-08	11
-201	val_201	2008-04-08	11
-217	val_217	2008-04-08	11
-164	val_164	2008-04-08	11
-431	val_431	2008-04-08	11
-454	val_454	2008-04-08	11
-478	val_478	2008-04-08	11
-298	val_298	2008-04-08	11
-125	val_125	2008-04-08	11
-431	val_431	2008-04-08	11
-164	val_164	2008-04-08	11
-424	val_424	2008-04-08	11
-187	val_187	2008-04-08	11
-382	val_382	2008-04-08	11
-5	val_5	2008-04-08	11
-70	val_70	2008-04-08	11
-397	val_397	2008-04-08	11
-480	val_480	2008-04-08	11
-291	val_291	2008-04-08	11
-24	val_24	2008-04-08	11
-351	val_351	2008-04-08	11
-255	val_255	2008-04-08	11
-104	val_104	2008-04-08	11
-70	val_70	2008-04-08	11
-163	val_163	2008-04-08	11
-438	val_438	2008-04-08	11
-119	val_119	2008-04-08	11
-414	val_414	2008-04-08	11
-200	val_200	2008-04-08	11
-491	val_491	2008-04-08	11
-237	val_237	2008-04-08	11
-439	val_439	2008-04-08	11
-360	val_360	2008-04-08	11
-248	val_248	2008-04-08	11
-479	val_479	2008-04-08	11
-305	val_305	2008-04-08	11
-417	val_417	2008-04-08	11
-199	val_199	2008-04-08	11
-444	val_444	2008-04-08	11
-120	val_120	2008-04-08	11
-429	val_429	2008-04-08	11
-169	val_169	2008-04-08	11
-443	val_443	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-277	val_277	2008-04-08	11
-230	val_230	2008-04-08	11
-478	val_478	2008-04-08	11
-178	val_178	2008-04-08	11
-468	val_468	2008-04-08	11
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-333	val_333	2008-04-08	11
-493	val_493	2008-04-08	11
-460	val_460	2008-04-08	11
-207	val_207	2008-04-08	11
-249	val_249	2008-04-08	11
-265	val_265	2008-04-08	11
-480	val_480	2008-04-08	11
-83	val_83	2008-04-08	11
-136	val_136	2008-04-08	11
-353	val_353	2008-04-08	11
-172	val_172	2008-04-08	11
-214	val_214	2008-04-08	11
-462	val_462	2008-04-08	11
-233	val_233	2008-04-08	11
-406	val_406	2008-04-08	11
-133	val_133	2008-04-08	11
-175	val_175	2008-04-08	11
-189	val_189	2008-04-08	11
-454	val_454	2008-04-08	11
-375	val_375	2008-04-08	11
-401	val_401	2008-04-08	11
-421	val_421	2008-04-08	11
-407	val_407	2008-04-08	11
-384	val_384	2008-04-08	11
-256	val_256	2008-04-08	11
-26	val_26	2008-04-08	11
-134	val_134	2008-04-08	11
-67	val_67	2008-04-08	11
-384	val_384	2008-04-08	11
-379	val_379	2008-04-08	11
-18	val_18	2008-04-08	11
-462	val_462	2008-04-08	11
-492	val_492	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-9	val_9	2008-04-08	11
-341	val_341	2008-04-08	11
-498	val_498	2008-04-08	11
-146	val_146	2008-04-08	11
-458	val_458	2008-04-08	11
-362	val_362	2008-04-08	11
-186	val_186	2008-04-08	11
-285	val_285	2008-04-08	11
-348	val_348	2008-04-08	11
-167	val_167	2008-04-08	11
-18	val_18	2008-04-08	11
-273	val_273	2008-04-08	11
-183	val_183	2008-04-08	11
-281	val_281	2008-04-08	11
-344	val_344	2008-04-08	11
-97	val_97	2008-04-08	11
-469	val_469	2008-04-08	11
-315	val_315	2008-04-08	11
-84	val_84	2008-04-08	11
-28	val_28	2008-04-08	11
-37	val_37	2008-04-08	11
-448	val_448	2008-04-08	11
-152	val_152	2008-04-08	11
-348	val_348	2008-04-08	11
-307	val_307	2008-04-08	11
-194	val_194	2008-04-08	11
-414	val_414	2008-04-08	11
-477	val_477	2008-04-08	11
-222	val_222	2008-04-08	11
-126	val_126	2008-04-08	11
-90	val_90	2008-04-08	11
-169	val_169	2008-04-08	11
-403	val_403	2008-04-08	11
-400	val_400	2008-04-08	11
-200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
-238	val_238	2008-04-08	12
-86	val_86	2008-04-08	12
-311	val_311	2008-04-08	12
-27	val_27	2008-04-08	12
-165	val_165	2008-04-08	12
-409	val_409	2008-04-08	12
-255	val_255	2008-04-08	12
-278	val_278	2008-04-08	12
-98	val_98	2008-04-08	12
-484	val_484	2008-04-08	12
-265	val_265	2008-04-08	12
-193	val_193	2008-04-08	12
-401	val_401	2008-04-08	12
-150	val_150	2008-04-08	12
-273	val_273	2008-04-08	12
-224	val_224	2008-04-08	12
-369	val_369	2008-04-08	12
-66	val_66	2008-04-08	12
-128	val_128	2008-04-08	12
-213	val_213	2008-04-08	12
-146	val_146	2008-04-08	12
-406	val_406	2008-04-08	12
-429	val_429	2008-04-08	12
-374	val_374	2008-04-08	12
-152	val_152	2008-04-08	12
-469	val_469	2008-04-08	12
-145	val_145	2008-04-08	12
-495	val_495	2008-04-08	12
-37	val_37	2008-04-08	12
-327	val_327	2008-04-08	12
-281	val_281	2008-04-08	12
-277	val_277	2008-04-08	12
-209	val_209	2008-04-08	12
-15	val_15	2008-04-08	12
-82	val_82	2008-04-08	12
-403	val_403	2008-04-08	12
-166	val_166	2008-04-08	12
-417	val_417	2008-04-08	12
-430	val_430	2008-04-08	12
-252	val_252	2008-04-08	12
-292	val_292	2008-04-08	12
-219	val_219	2008-04-08	12
-287	val_287	2008-04-08	12
-153	val_153	2008-04-08	12
-193	val_193	2008-04-08	12
-338	val_338	2008-04-08	12
-446	val_446	2008-04-08	12
-459	val_459	2008-04-08	12
-394	val_394	2008-04-08	12
-237	val_237	2008-04-08	12
-482	val_482	2008-04-08	12
-174	val_174	2008-04-08	12
-413	val_413	2008-04-08	12
-494	val_494	2008-04-08	12
-207	val_207	2008-04-08	12
-199	val_199	2008-04-08	12
-466	val_466	2008-04-08	12
-208	val_208	2008-04-08	12
-174	val_174	2008-04-08	12
-399	val_399	2008-04-08	12
-396	val_396	2008-04-08	12
-247	val_247	2008-04-08	12
-417	val_417	2008-04-08	12
-489	val_489	2008-04-08	12
-162	val_162	2008-04-08	12
-377	val_377	2008-04-08	12
-397	val_397	2008-04-08	12
-309	val_309	2008-04-08	12
-365	val_365	2008-04-08	12
-266	val_266	2008-04-08	12
-439	val_439	2008-04-08	12
-342	val_342	2008-04-08	12
-367	val_367	2008-04-08	12
-325	val_325	2008-04-08	12
-167	val_167	2008-04-08	12
-195	val_195	2008-04-08	12
-475	val_475	2008-04-08	12
-17	val_17	2008-04-08	12
-113	val_113	2008-04-08	12
-155	val_155	2008-04-08	12
-203	val_203	2008-04-08	12
-339	val_339	2008-04-08	12
-0	val_0	2008-04-08	12
-455	val_455	2008-04-08	12
-128	val_128	2008-04-08	12
-311	val_311	2008-04-08	12
-316	val_316	2008-04-08	12
-57	val_57	2008-04-08	12
-302	val_302	2008-04-08	12
-205	val_205	2008-04-08	12
-149	val_149	2008-04-08	12
-438	val_438	2008-04-08	12
-345	val_345	2008-04-08	12
-129	val_129	2008-04-08	12
-170	val_170	2008-04-08	12
-20	val_20	2008-04-08	12
-489	val_489	2008-04-08	12
-157	val_157	2008-04-08	12
-378	val_378	2008-04-08	12
-221	val_221	2008-04-08	12
-92	val_92	2008-04-08	12
-111	val_111	2008-04-08	12
-47	val_47	2008-04-08	12
-72	val_72	2008-04-08	12
-4	val_4	2008-04-08	12
-280	val_280	2008-04-08	12
-35	val_35	2008-04-08	12
-427	val_427	2008-04-08	12
-277	val_277	2008-04-08	12
-208	val_208	2008-04-08	12
-356	val_356	2008-04-08	12
-399	val_399	2008-04-08	12
-169	val_169	2008-04-08	12
-382	val_382	2008-04-08	12
-498	val_498	2008-04-08	12
-125	val_125	2008-04-08	12
-386	val_386	2008-04-08	12
-437	val_437	2008-04-08	12
-469	val_469	2008-04-08	12
-192	val_192	2008-04-08	12
-286	val_286	2008-04-08	12
-187	val_187	2008-04-08	12
-176	val_176	2008-04-08	12
-54	val_54	2008-04-08	12
-459	val_459	2008-04-08	12
-51	val_51	2008-04-08	12
-138	val_138	2008-04-08	12
-103	val_103	2008-04-08	12
-239	val_239	2008-04-08	12
-213	val_213	2008-04-08	12
-216	val_216	2008-04-08	12
-430	val_430	2008-04-08	12
-278	val_278	2008-04-08	12
-176	val_176	2008-04-08	12
-289	val_289	2008-04-08	12
-221	val_221	2008-04-08	12
-65	val_65	2008-04-08	12
-318	val_318	2008-04-08	12
-332	val_332	2008-04-08	12
-311	val_311	2008-04-08	12
-275	val_275	2008-04-08	12
-137	val_137	2008-04-08	12
-241	val_241	2008-04-08	12
-83	val_83	2008-04-08	12
-333	val_333	2008-04-08	12
-180	val_180	2008-04-08	12
-284	val_284	2008-04-08	12
-12	val_12	2008-04-08	12
-230	val_230	2008-04-08	12
-181	val_181	2008-04-08	12
-67	val_67	2008-04-08	12
-260	val_260	2008-04-08	12
-404	val_404	2008-04-08	12
-384	val_384	2008-04-08	12
-489	val_489	2008-04-08	12
-353	val_353	2008-04-08	12
-373	val_373	2008-04-08	12
-272	val_272	2008-04-08	12
-138	val_138	2008-04-08	12
-217	val_217	2008-04-08	12
-84	val_84	2008-04-08	12
-348	val_348	2008-04-08	12
-466	val_466	2008-04-08	12
-58	val_58	2008-04-08	12
-8	val_8	2008-04-08	12
-411	val_411	2008-04-08	12
-230	val_230	2008-04-08	12
-208	val_208	2008-04-08	12
-348	val_348	2008-04-08	12
-24	val_24	2008-04-08	12
-463	val_463	2008-04-08	12
-431	val_431	2008-04-08	12
-179	val_179	2008-04-08	12
-172	val_172	2008-04-08	12
-42	val_42	2008-04-08	12
-129	val_129	2008-04-08	12
-158	val_158	2008-04-08	12
-119	val_119	2008-04-08	12
-496	val_496	2008-04-08	12
-0	val_0	2008-04-08	12
-322	val_322	2008-04-08	12
-197	val_197	2008-04-08	12
-468	val_468	2008-04-08	12
-393	val_393	2008-04-08	12
-454	val_454	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-199	val_199	2008-04-08	12
-191	val_191	2008-04-08	12
-418	val_418	2008-04-08	12
-96	val_96	2008-04-08	12
-26	val_26	2008-04-08	12
-165	val_165	2008-04-08	12
-327	val_327	2008-04-08	12
-230	val_230	2008-04-08	12
-205	val_205	2008-04-08	12
-120	val_120	2008-04-08	12
-131	val_131	2008-04-08	12
-51	val_51	2008-04-08	12
-404	val_404	2008-04-08	12
-43	val_43	2008-04-08	12
-436	val_436	2008-04-08	12
-156	val_156	2008-04-08	12
-469	val_469	2008-04-08	12
-468	val_468	2008-04-08	12
-308	val_308	2008-04-08	12
-95	val_95	2008-04-08	12
-196	val_196	2008-04-08	12
-288	val_288	2008-04-08	12
-481	val_481	2008-04-08	12
-457	val_457	2008-04-08	12
-98	val_98	2008-04-08	12
-282	val_282	2008-04-08	12
-197	val_197	2008-04-08	12
-187	val_187	2008-04-08	12
-318	val_318	2008-04-08	12
-318	val_318	2008-04-08	12
-409	val_409	2008-04-08	12
-470	val_470	2008-04-08	12
-137	val_137	2008-04-08	12
-369	val_369	2008-04-08	12
-316	val_316	2008-04-08	12
-169	val_169	2008-04-08	12
-413	val_413	2008-04-08	12
-85	val_85	2008-04-08	12
-77	val_77	2008-04-08	12
-0	val_0	2008-04-08	12
-490	val_490	2008-04-08	12
-87	val_87	2008-04-08	12
-364	val_364	2008-04-08	12
-179	val_179	2008-04-08	12
-118	val_118	2008-04-08	12
-134	val_134	2008-04-08	12
-395	val_395	2008-04-08	12
-282	val_282	2008-04-08	12
-138	val_138	2008-04-08	12
-238	val_238	2008-04-08	12
-419	val_419	2008-04-08	12
-15	val_15	2008-04-08	12
-118	val_118	2008-04-08	12
-72	val_72	2008-04-08	12
-90	val_90	2008-04-08	12
-307	val_307	2008-04-08	12
-19	val_19	2008-04-08	12
-435	val_435	2008-04-08	12
-10	val_10	2008-04-08	12
-277	val_277	2008-04-08	12
-273	val_273	2008-04-08	12
-306	val_306	2008-04-08	12
-224	val_224	2008-04-08	12
-309	val_309	2008-04-08	12
-389	val_389	2008-04-08	12
-327	val_327	2008-04-08	12
-242	val_242	2008-04-08	12
-369	val_369	2008-04-08	12
-392	val_392	2008-04-08	12
-272	val_272	2008-04-08	12
-331	val_331	2008-04-08	12
-401	val_401	2008-04-08	12
-242	val_242	2008-04-08	12
-452	val_452	2008-04-08	12
-177	val_177	2008-04-08	12
-226	val_226	2008-04-08	12
-5	val_5	2008-04-08	12
-497	val_497	2008-04-08	12
-402	val_402	2008-04-08	12
-396	val_396	2008-04-08	12
-317	val_317	2008-04-08	12
-395	val_395	2008-04-08	12
-58	val_58	2008-04-08	12
-35	val_35	2008-04-08	12
-336	val_336	2008-04-08	12
-95	val_95	2008-04-08	12
-11	val_11	2008-04-08	12
-168	val_168	2008-04-08	12
-34	val_34	2008-04-08	12
-229	val_229	2008-04-08	12
-233	val_233	2008-04-08	12
-143	val_143	2008-04-08	12
-472	val_472	2008-04-08	12
-322	val_322	2008-04-08	12
-498	val_498	2008-04-08	12
-160	val_160	2008-04-08	12
-195	val_195	2008-04-08	12
-42	val_42	2008-04-08	12
-321	val_321	2008-04-08	12
-430	val_430	2008-04-08	12
-119	val_119	2008-04-08	12
-489	val_489	2008-04-08	12
-458	val_458	2008-04-08	12
-78	val_78	2008-04-08	12
-76	val_76	2008-04-08	12
-41	val_41	2008-04-08	12
-223	val_223	2008-04-08	12
-492	val_492	2008-04-08	12
-149	val_149	2008-04-08	12
-449	val_449	2008-04-08	12
-218	val_218	2008-04-08	12
-228	val_228	2008-04-08	12
-138	val_138	2008-04-08	12
-453	val_453	2008-04-08	12
-30	val_30	2008-04-08	12
-209	val_209	2008-04-08	12
-64	val_64	2008-04-08	12
-468	val_468	2008-04-08	12
-76	val_76	2008-04-08	12
-74	val_74	2008-04-08	12
-342	val_342	2008-04-08	12
-69	val_69	2008-04-08	12
-230	val_230	2008-04-08	12
-33	val_33	2008-04-08	12
-368	val_368	2008-04-08	12
-103	val_103	2008-04-08	12
-296	val_296	2008-04-08	12
-113	val_113	2008-04-08	12
-216	val_216	2008-04-08	12
-367	val_367	2008-04-08	12
-344	val_344	2008-04-08	12
-167	val_167	2008-04-08	12
-274	val_274	2008-04-08	12
-219	val_219	2008-04-08	12
-239	val_239	2008-04-08	12
-485	val_485	2008-04-08	12
-116	val_116	2008-04-08	12
-223	val_223	2008-04-08	12
-256	val_256	2008-04-08	12
-263	val_263	2008-04-08	12
-70	val_70	2008-04-08	12
-487	val_487	2008-04-08	12
-480	val_480	2008-04-08	12
-401	val_401	2008-04-08	12
-288	val_288	2008-04-08	12
-191	val_191	2008-04-08	12
-5	val_5	2008-04-08	12
-244	val_244	2008-04-08	12
-438	val_438	2008-04-08	12
-128	val_128	2008-04-08	12
-467	val_467	2008-04-08	12
-432	val_432	2008-04-08	12
-202	val_202	2008-04-08	12
-316	val_316	2008-04-08	12
-229	val_229	2008-04-08	12
-469	val_469	2008-04-08	12
-463	val_463	2008-04-08	12
-280	val_280	2008-04-08	12
-2	val_2	2008-04-08	12
-35	val_35	2008-04-08	12
-283	val_283	2008-04-08	12
-331	val_331	2008-04-08	12
-235	val_235	2008-04-08	12
-80	val_80	2008-04-08	12
-44	val_44	2008-04-08	12
-193	val_193	2008-04-08	12
-321	val_321	2008-04-08	12
-335	val_335	2008-04-08	12
-104	val_104	2008-04-08	12
-466	val_466	2008-04-08	12
-366	val_366	2008-04-08	12
-175	val_175	2008-04-08	12
-403	val_403	2008-04-08	12
-483	val_483	2008-04-08	12
-53	val_53	2008-04-08	12
-105	val_105	2008-04-08	12
-257	val_257	2008-04-08	12
-406	val_406	2008-04-08	12
-409	val_409	2008-04-08	12
-190	val_190	2008-04-08	12
-406	val_406	2008-04-08	12
-401	val_401	2008-04-08	12
-114	val_114	2008-04-08	12
-258	val_258	2008-04-08	12
-90	val_90	2008-04-08	12
-203	val_203	2008-04-08	12
-262	val_262	2008-04-08	12
-348	val_348	2008-04-08	12
-424	val_424	2008-04-08	12
-12	val_12	2008-04-08	12
-396	val_396	2008-04-08	12
-201	val_201	2008-04-08	12
-217	val_217	2008-04-08	12
-164	val_164	2008-04-08	12
-431	val_431	2008-04-08	12
-454	val_454	2008-04-08	12
-478	val_478	2008-04-08	12
-298	val_298	2008-04-08	12
-125	val_125	2008-04-08	12
-431	val_431	2008-04-08	12
-164	val_164	2008-04-08	12
-424	val_424	2008-04-08	12
-187	val_187	2008-04-08	12
-382	val_382	2008-04-08	12
-5	val_5	2008-04-08	12
-70	val_70	2008-04-08	12
-397	val_397	2008-04-08	12
-480	val_480	2008-04-08	12
-291	val_291	2008-04-08	12
-24	val_24	2008-04-08	12
-351	val_351	2008-04-08	12
-255	val_255	2008-04-08	12
-104	val_104	2008-04-08	12
-70	val_70	2008-04-08	12
-163	val_163	2008-04-08	12
-438	val_438	2008-04-08	12
-119	val_119	2008-04-08	12
-414	val_414	2008-04-08	12
-200	val_200	2008-04-08	12
-491	val_491	2008-04-08	12
-237	val_237	2008-04-08	12
-439	val_439	2008-04-08	12
-360	val_360	2008-04-08	12
-248	val_248	2008-04-08	12
-479	val_479	2008-04-08	12
-305	val_305	2008-04-08	12
-417	val_417	2008-04-08	12
-199	val_199	2008-04-08	12
-444	val_444	2008-04-08	12
-120	val_120	2008-04-08	12
-429	val_429	2008-04-08	12
-169	val_169	2008-04-08	12
-443	val_443	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-277	val_277	2008-04-08	12
-230	val_230	2008-04-08	12
-478	val_478	2008-04-08	12
-178	val_178	2008-04-08	12
-468	val_468	2008-04-08	12
-310	val_310	2008-04-08	12
-317	val_317	2008-04-08	12
-333	val_333	2008-04-08	12
-493	val_493	2008-04-08	12
-460	val_460	2008-04-08	12
-207	val_207	2008-04-08	12
-249	val_249	2008-04-08	12
-265	val_265	2008-04-08	12
-480	val_480	2008-04-08	12
-83	val_83	2008-04-08	12
-136	val_136	2008-04-08	12
-353	val_353	2008-04-08	12
-172	val_172	2008-04-08	12
-214	val_214	2008-04-08	12
-462	val_462	2008-04-08	12
-233	val_233	2008-04-08	12
-406	val_406	2008-04-08	12
-133	val_133	2008-04-08	12
-175	val_175	2008-04-08	12
-189	val_189	2008-04-08	12
-454	val_454	2008-04-08	12
-375	val_375	2008-04-08	12
-401	val_401	2008-04-08	12
-421	val_421	2008-04-08	12
-407	val_407	2008-04-08	12
-384	val_384	2008-04-08	12
-256	val_256	2008-04-08	12
-26	val_26	2008-04-08	12
-134	val_134	2008-04-08	12
-67	val_67	2008-04-08	12
-384	val_384	2008-04-08	12
-379	val_379	2008-04-08	12
-18	val_18	2008-04-08	12
-462	val_462	2008-04-08	12
-492	val_492	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-9	val_9	2008-04-08	12
-341	val_341	2008-04-08	12
-498	val_498	2008-04-08	12
-146	val_146	2008-04-08	12
-458	val_458	2008-04-08	12
-362	val_362	2008-04-08	12
-186	val_186	2008-04-08	12
-285	val_285	2008-04-08	12
-348	val_348	2008-04-08	12
-167	val_167	2008-04-08	12
-18	val_18	2008-04-08	12
-273	val_273	2008-04-08	12
-183	val_183	2008-04-08	12
-281	val_281	2008-04-08	12
-344	val_344	2008-04-08	12
-97	val_97	2008-04-08	12
-469	val_469	2008-04-08	12
-315	val_315	2008-04-08	12
-84	val_84	2008-04-08	12
-28	val_28	2008-04-08	12
-37	val_37	2008-04-08	12
-448	val_448	2008-04-08	12
-152	val_152	2008-04-08	12
-348	val_348	2008-04-08	12
-307	val_307	2008-04-08	12
-194	val_194	2008-04-08	12
-414	val_414	2008-04-08	12
-477	val_477	2008-04-08	12
-222	val_222	2008-04-08	12
-126	val_126	2008-04-08	12
-90	val_90	2008-04-08	12
-169	val_169	2008-04-08	12
-403	val_403	2008-04-08	12
-400	val_400	2008-04-08	12
-200	val_200	2008-04-08	12
-97	val_97	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part1-11-8b6be78ae0527e5b7efd7db758966853 b/sql/hive/src/test/resources/golden/load_dyn_part1-11-8b6be78ae0527e5b7efd7db758966853
new file mode 100644
index 0000000000000..ee4601f59e708
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/load_dyn_part1-11-8b6be78ae0527e5b7efd7db758966853
@@ -0,0 +1,1000 @@
+0	val_0	2008-12-31	11
+0	val_0	2008-12-31	11
+0	val_0	2008-12-31	11
+2	val_2	2008-12-31	11
+4	val_4	2008-12-31	11
+5	val_5	2008-12-31	11
+5	val_5	2008-12-31	11
+5	val_5	2008-12-31	11
+8	val_8	2008-12-31	11
+9	val_9	2008-12-31	11
+10	val_10	2008-12-31	11
+11	val_11	2008-12-31	11
+12	val_12	2008-12-31	11
+12	val_12	2008-12-31	11
+15	val_15	2008-12-31	11
+15	val_15	2008-12-31	11
+17	val_17	2008-12-31	11
+18	val_18	2008-12-31	11
+18	val_18	2008-12-31	11
+19	val_19	2008-12-31	11
+20	val_20	2008-12-31	11
+24	val_24	2008-12-31	11
+24	val_24	2008-12-31	11
+26	val_26	2008-12-31	11
+26	val_26	2008-12-31	11
+27	val_27	2008-12-31	11
+28	val_28	2008-12-31	11
+30	val_30	2008-12-31	11
+33	val_33	2008-12-31	11
+34	val_34	2008-12-31	11
+35	val_35	2008-12-31	11
+35	val_35	2008-12-31	11
+35	val_35	2008-12-31	11
+37	val_37	2008-12-31	11
+37	val_37	2008-12-31	11
+41	val_41	2008-12-31	11
+42	val_42	2008-12-31	11
+42	val_42	2008-12-31	11
+43	val_43	2008-12-31	11
+44	val_44	2008-12-31	11
+47	val_47	2008-12-31	11
+51	val_51	2008-12-31	11
+51	val_51	2008-12-31	11
+53	val_53	2008-12-31	11
+54	val_54	2008-12-31	11
+57	val_57	2008-12-31	11
+58	val_58	2008-12-31	11
+58	val_58	2008-12-31	11
+64	val_64	2008-12-31	11
+65	val_65	2008-12-31	11
+66	val_66	2008-12-31	11
+67	val_67	2008-12-31	11
+67	val_67	2008-12-31	11
+69	val_69	2008-12-31	11
+70	val_70	2008-12-31	11
+70	val_70	2008-12-31	11
+70	val_70	2008-12-31	11
+72	val_72	2008-12-31	11
+72	val_72	2008-12-31	11
+74	val_74	2008-12-31	11
+76	val_76	2008-12-31	11
+76	val_76	2008-12-31	11
+77	val_77	2008-12-31	11
+78	val_78	2008-12-31	11
+80	val_80	2008-12-31	11
+82	val_82	2008-12-31	11
+83	val_83	2008-12-31	11
+83	val_83	2008-12-31	11
+84	val_84	2008-12-31	11
+84	val_84	2008-12-31	11
+85	val_85	2008-12-31	11
+86	val_86	2008-12-31	11
+87	val_87	2008-12-31	11
+90	val_90	2008-12-31	11
+90	val_90	2008-12-31	11
+90	val_90	2008-12-31	11
+92	val_92	2008-12-31	11
+95	val_95	2008-12-31	11
+95	val_95	2008-12-31	11
+96	val_96	2008-12-31	11
+97	val_97	2008-12-31	11
+97	val_97	2008-12-31	11
+98	val_98	2008-12-31	11
+98	val_98	2008-12-31	11
+100	val_100	2008-12-31	11
+100	val_100	2008-12-31	11
+103	val_103	2008-12-31	11
+103	val_103	2008-12-31	11
+104	val_104	2008-12-31	11
+104	val_104	2008-12-31	11
+105	val_105	2008-12-31	11
+111	val_111	2008-12-31	11
+113	val_113	2008-12-31	11
+113	val_113	2008-12-31	11
+114	val_114	2008-12-31	11
+116	val_116	2008-12-31	11
+118	val_118	2008-12-31	11
+118	val_118	2008-12-31	11
+119	val_119	2008-12-31	11
+119	val_119	2008-12-31	11
+119	val_119	2008-12-31	11
+120	val_120	2008-12-31	11
+120	val_120	2008-12-31	11
+125	val_125	2008-12-31	11
+125	val_125	2008-12-31	11
+126	val_126	2008-12-31	11
+128	val_128	2008-12-31	11
+128	val_128	2008-12-31	11
+128	val_128	2008-12-31	11
+129	val_129	2008-12-31	11
+129	val_129	2008-12-31	11
+131	val_131	2008-12-31	11
+133	val_133	2008-12-31	11
+134	val_134	2008-12-31	11
+134	val_134	2008-12-31	11
+136	val_136	2008-12-31	11
+137	val_137	2008-12-31	11
+137	val_137	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+143	val_143	2008-12-31	11
+145	val_145	2008-12-31	11
+146	val_146	2008-12-31	11
+146	val_146	2008-12-31	11
+149	val_149	2008-12-31	11
+149	val_149	2008-12-31	11
+150	val_150	2008-12-31	11
+152	val_152	2008-12-31	11
+152	val_152	2008-12-31	11
+153	val_153	2008-12-31	11
+155	val_155	2008-12-31	11
+156	val_156	2008-12-31	11
+157	val_157	2008-12-31	11
+158	val_158	2008-12-31	11
+160	val_160	2008-12-31	11
+162	val_162	2008-12-31	11
+163	val_163	2008-12-31	11
+164	val_164	2008-12-31	11
+164	val_164	2008-12-31	11
+165	val_165	2008-12-31	11
+165	val_165	2008-12-31	11
+166	val_166	2008-12-31	11
+167	val_167	2008-12-31	11
+167	val_167	2008-12-31	11
+167	val_167	2008-12-31	11
+168	val_168	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+170	val_170	2008-12-31	11
+172	val_172	2008-12-31	11
+172	val_172	2008-12-31	11
+174	val_174	2008-12-31	11
+174	val_174	2008-12-31	11
+175	val_175	2008-12-31	11
+175	val_175	2008-12-31	11
+176	val_176	2008-12-31	11
+176	val_176	2008-12-31	11
+177	val_177	2008-12-31	11
+178	val_178	2008-12-31	11
+179	val_179	2008-12-31	11
+179	val_179	2008-12-31	11
+180	val_180	2008-12-31	11
+181	val_181	2008-12-31	11
+183	val_183	2008-12-31	11
+186	val_186	2008-12-31	11
+187	val_187	2008-12-31	11
+187	val_187	2008-12-31	11
+187	val_187	2008-12-31	11
+189	val_189	2008-12-31	11
+190	val_190	2008-12-31	11
+191	val_191	2008-12-31	11
+191	val_191	2008-12-31	11
+192	val_192	2008-12-31	11
+193	val_193	2008-12-31	11
+193	val_193	2008-12-31	11
+193	val_193	2008-12-31	11
+194	val_194	2008-12-31	11
+195	val_195	2008-12-31	11
+195	val_195	2008-12-31	11
+196	val_196	2008-12-31	11
+197	val_197	2008-12-31	11
+197	val_197	2008-12-31	11
+199	val_199	2008-12-31	11
+199	val_199	2008-12-31	11
+199	val_199	2008-12-31	11
+200	val_200	2008-12-31	11
+200	val_200	2008-12-31	11
+201	val_201	2008-12-31	11
+202	val_202	2008-12-31	11
+203	val_203	2008-12-31	11
+203	val_203	2008-12-31	11
+205	val_205	2008-12-31	11
+205	val_205	2008-12-31	11
+207	val_207	2008-12-31	11
+207	val_207	2008-12-31	11
+208	val_208	2008-12-31	11
+208	val_208	2008-12-31	11
+208	val_208	2008-12-31	11
+209	val_209	2008-12-31	11
+209	val_209	2008-12-31	11
+213	val_213	2008-12-31	11
+213	val_213	2008-12-31	11
+214	val_214	2008-12-31	11
+216	val_216	2008-12-31	11
+216	val_216	2008-12-31	11
+217	val_217	2008-12-31	11
+217	val_217	2008-12-31	11
+218	val_218	2008-12-31	11
+219	val_219	2008-12-31	11
+219	val_219	2008-12-31	11
+221	val_221	2008-12-31	11
+221	val_221	2008-12-31	11
+222	val_222	2008-12-31	11
+223	val_223	2008-12-31	11
+223	val_223	2008-12-31	11
+224	val_224	2008-12-31	11
+224	val_224	2008-12-31	11
+226	val_226	2008-12-31	11
+228	val_228	2008-12-31	11
+229	val_229	2008-12-31	11
+229	val_229	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+233	val_233	2008-12-31	11
+233	val_233	2008-12-31	11
+235	val_235	2008-12-31	11
+237	val_237	2008-12-31	11
+237	val_237	2008-12-31	11
+238	val_238	2008-12-31	11
+238	val_238	2008-12-31	11
+239	val_239	2008-12-31	11
+239	val_239	2008-12-31	11
+241	val_241	2008-12-31	11
+242	val_242	2008-12-31	11
+242	val_242	2008-12-31	11
+244	val_244	2008-12-31	11
+247	val_247	2008-12-31	11
+248	val_248	2008-12-31	11
+249	val_249	2008-12-31	11
+252	val_252	2008-12-31	11
+255	val_255	2008-12-31	11
+255	val_255	2008-12-31	11
+256	val_256	2008-12-31	11
+256	val_256	2008-12-31	11
+257	val_257	2008-12-31	11
+258	val_258	2008-12-31	11
+260	val_260	2008-12-31	11
+262	val_262	2008-12-31	11
+263	val_263	2008-12-31	11
+265	val_265	2008-12-31	11
+265	val_265	2008-12-31	11
+266	val_266	2008-12-31	11
+272	val_272	2008-12-31	11
+272	val_272	2008-12-31	11
+273	val_273	2008-12-31	11
+273	val_273	2008-12-31	11
+273	val_273	2008-12-31	11
+274	val_274	2008-12-31	11
+275	val_275	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+278	val_278	2008-12-31	11
+278	val_278	2008-12-31	11
+280	val_280	2008-12-31	11
+280	val_280	2008-12-31	11
+281	val_281	2008-12-31	11
+281	val_281	2008-12-31	11
+282	val_282	2008-12-31	11
+282	val_282	2008-12-31	11
+283	val_283	2008-12-31	11
+284	val_284	2008-12-31	11
+285	val_285	2008-12-31	11
+286	val_286	2008-12-31	11
+287	val_287	2008-12-31	11
+288	val_288	2008-12-31	11
+288	val_288	2008-12-31	11
+289	val_289	2008-12-31	11
+291	val_291	2008-12-31	11
+292	val_292	2008-12-31	11
+296	val_296	2008-12-31	11
+298	val_298	2008-12-31	11
+298	val_298	2008-12-31	11
+298	val_298	2008-12-31	11
+302	val_302	2008-12-31	11
+305	val_305	2008-12-31	11
+306	val_306	2008-12-31	11
+307	val_307	2008-12-31	11
+307	val_307	2008-12-31	11
+308	val_308	2008-12-31	11
+309	val_309	2008-12-31	11
+309	val_309	2008-12-31	11
+310	val_310	2008-12-31	11
+311	val_311	2008-12-31	11
+311	val_311	2008-12-31	11
+311	val_311	2008-12-31	11
+315	val_315	2008-12-31	11
+316	val_316	2008-12-31	11
+316	val_316	2008-12-31	11
+316	val_316	2008-12-31	11
+317	val_317	2008-12-31	11
+317	val_317	2008-12-31	11
+318	val_318	2008-12-31	11
+318	val_318	2008-12-31	11
+318	val_318	2008-12-31	11
+321	val_321	2008-12-31	11
+321	val_321	2008-12-31	11
+322	val_322	2008-12-31	11
+322	val_322	2008-12-31	11
+323	val_323	2008-12-31	11
+325	val_325	2008-12-31	11
+325	val_325	2008-12-31	11
+327	val_327	2008-12-31	11
+327	val_327	2008-12-31	11
+327	val_327	2008-12-31	11
+331	val_331	2008-12-31	11
+331	val_331	2008-12-31	11
+332	val_332	2008-12-31	11
+333	val_333	2008-12-31	11
+333	val_333	2008-12-31	11
+335	val_335	2008-12-31	11
+336	val_336	2008-12-31	11
+338	val_338	2008-12-31	11
+339	val_339	2008-12-31	11
+341	val_341	2008-12-31	11
+342	val_342	2008-12-31	11
+342	val_342	2008-12-31	11
+344	val_344	2008-12-31	11
+344	val_344	2008-12-31	11
+345	val_345	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+351	val_351	2008-12-31	11
+353	val_353	2008-12-31	11
+353	val_353	2008-12-31	11
+356	val_356	2008-12-31	11
+360	val_360	2008-12-31	11
+362	val_362	2008-12-31	11
+364	val_364	2008-12-31	11
+365	val_365	2008-12-31	11
+366	val_366	2008-12-31	11
+367	val_367	2008-12-31	11
+367	val_367	2008-12-31	11
+368	val_368	2008-12-31	11
+369	val_369	2008-12-31	11
+369	val_369	2008-12-31	11
+369	val_369	2008-12-31	11
+373	val_373	2008-12-31	11
+374	val_374	2008-12-31	11
+375	val_375	2008-12-31	11
+377	val_377	2008-12-31	11
+378	val_378	2008-12-31	11
+379	val_379	2008-12-31	11
+382	val_382	2008-12-31	11
+382	val_382	2008-12-31	11
+384	val_384	2008-12-31	11
+384	val_384	2008-12-31	11
+384	val_384	2008-12-31	11
+386	val_386	2008-12-31	11
+389	val_389	2008-12-31	11
+392	val_392	2008-12-31	11
+393	val_393	2008-12-31	11
+394	val_394	2008-12-31	11
+395	val_395	2008-12-31	11
+395	val_395	2008-12-31	11
+396	val_396	2008-12-31	11
+396	val_396	2008-12-31	11
+396	val_396	2008-12-31	11
+397	val_397	2008-12-31	11
+397	val_397	2008-12-31	11
+399	val_399	2008-12-31	11
+399	val_399	2008-12-31	11
+400	val_400	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+402	val_402	2008-12-31	11
+403	val_403	2008-12-31	11
+403	val_403	2008-12-31	11
+403	val_403	2008-12-31	11
+404	val_404	2008-12-31	11
+404	val_404	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+407	val_407	2008-12-31	11
+409	val_409	2008-12-31	11
+409	val_409	2008-12-31	11
+409	val_409	2008-12-31	11
+411	val_411	2008-12-31	11
+413	val_413	2008-12-31	11
+413	val_413	2008-12-31	11
+414	val_414	2008-12-31	11
+414	val_414	2008-12-31	11
+417	val_417	2008-12-31	11
+417	val_417	2008-12-31	11
+417	val_417	2008-12-31	11
+418	val_418	2008-12-31	11
+419	val_419	2008-12-31	11
+421	val_421	2008-12-31	11
+424	val_424	2008-12-31	11
+424	val_424	2008-12-31	11
+427	val_427	2008-12-31	11
+429	val_429	2008-12-31	11
+429	val_429	2008-12-31	11
+430	val_430	2008-12-31	11
+430	val_430	2008-12-31	11
+430	val_430	2008-12-31	11
+431	val_431	2008-12-31	11
+431	val_431	2008-12-31	11
+431	val_431	2008-12-31	11
+432	val_432	2008-12-31	11
+435	val_435	2008-12-31	11
+436	val_436	2008-12-31	11
+437	val_437	2008-12-31	11
+438	val_438	2008-12-31	11
+438	val_438	2008-12-31	11
+438	val_438	2008-12-31	11
+439	val_439	2008-12-31	11
+439	val_439	2008-12-31	11
+443	val_443	2008-12-31	11
+444	val_444	2008-12-31	11
+446	val_446	2008-12-31	11
+448	val_448	2008-12-31	11
+449	val_449	2008-12-31	11
+452	val_452	2008-12-31	11
+453	val_453	2008-12-31	11
+454	val_454	2008-12-31	11
+454	val_454	2008-12-31	11
+454	val_454	2008-12-31	11
+455	val_455	2008-12-31	11
+457	val_457	2008-12-31	11
+458	val_458	2008-12-31	11
+458	val_458	2008-12-31	11
+459	val_459	2008-12-31	11
+459	val_459	2008-12-31	11
+460	val_460	2008-12-31	11
+462	val_462	2008-12-31	11
+462	val_462	2008-12-31	11
+463	val_463	2008-12-31	11
+463	val_463	2008-12-31	11
+466	val_466	2008-12-31	11
+466	val_466	2008-12-31	11
+466	val_466	2008-12-31	11
+467	val_467	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+470	val_470	2008-12-31	11
+472	val_472	2008-12-31	11
+475	val_475	2008-12-31	11
+477	val_477	2008-12-31	11
+478	val_478	2008-12-31	11
+478	val_478	2008-12-31	11
+479	val_479	2008-12-31	11
+480	val_480	2008-12-31	11
+480	val_480	2008-12-31	11
+480	val_480	2008-12-31	11
+481	val_481	2008-12-31	11
+482	val_482	2008-12-31	11
+483	val_483	2008-12-31	11
+484	val_484	2008-12-31	11
+485	val_485	2008-12-31	11
+487	val_487	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+490	val_490	2008-12-31	11
+491	val_491	2008-12-31	11
+492	val_492	2008-12-31	11
+492	val_492	2008-12-31	11
+493	val_493	2008-12-31	11
+494	val_494	2008-12-31	11
+495	val_495	2008-12-31	11
+496	val_496	2008-12-31	11
+497	val_497	2008-12-31	11
+498	val_498	2008-12-31	11
+498	val_498	2008-12-31	11
+498	val_498	2008-12-31	11
+0	val_0	2008-12-31	12
+0	val_0	2008-12-31	12
+0	val_0	2008-12-31	12
+2	val_2	2008-12-31	12
+4	val_4	2008-12-31	12
+5	val_5	2008-12-31	12
+5	val_5	2008-12-31	12
+5	val_5	2008-12-31	12
+8	val_8	2008-12-31	12
+9	val_9	2008-12-31	12
+10	val_10	2008-12-31	12
+11	val_11	2008-12-31	12
+12	val_12	2008-12-31	12
+12	val_12	2008-12-31	12
+15	val_15	2008-12-31	12
+15	val_15	2008-12-31	12
+17	val_17	2008-12-31	12
+18	val_18	2008-12-31	12
+18	val_18	2008-12-31	12
+19	val_19	2008-12-31	12
+20	val_20	2008-12-31	12
+24	val_24	2008-12-31	12
+24	val_24	2008-12-31	12
+26	val_26	2008-12-31	12
+26	val_26	2008-12-31	12
+27	val_27	2008-12-31	12
+28	val_28	2008-12-31	12
+30	val_30	2008-12-31	12
+33	val_33	2008-12-31	12
+34	val_34	2008-12-31	12
+35	val_35	2008-12-31	12
+35	val_35	2008-12-31	12
+35	val_35	2008-12-31	12
+37	val_37	2008-12-31	12
+37	val_37	2008-12-31	12
+41	val_41	2008-12-31	12
+42	val_42	2008-12-31	12
+42	val_42	2008-12-31	12
+43	val_43	2008-12-31	12
+44	val_44	2008-12-31	12
+47	val_47	2008-12-31	12
+51	val_51	2008-12-31	12
+51	val_51	2008-12-31	12
+53	val_53	2008-12-31	12
+54	val_54	2008-12-31	12
+57	val_57	2008-12-31	12
+58	val_58	2008-12-31	12
+58	val_58	2008-12-31	12
+64	val_64	2008-12-31	12
+65	val_65	2008-12-31	12
+66	val_66	2008-12-31	12
+67	val_67	2008-12-31	12
+67	val_67	2008-12-31	12
+69	val_69	2008-12-31	12
+70	val_70	2008-12-31	12
+70	val_70	2008-12-31	12
+70	val_70	2008-12-31	12
+72	val_72	2008-12-31	12
+72	val_72	2008-12-31	12
+74	val_74	2008-12-31	12
+76	val_76	2008-12-31	12
+76	val_76	2008-12-31	12
+77	val_77	2008-12-31	12
+78	val_78	2008-12-31	12
+80	val_80	2008-12-31	12
+82	val_82	2008-12-31	12
+83	val_83	2008-12-31	12
+83	val_83	2008-12-31	12
+84	val_84	2008-12-31	12
+84	val_84	2008-12-31	12
+85	val_85	2008-12-31	12
+86	val_86	2008-12-31	12
+87	val_87	2008-12-31	12
+90	val_90	2008-12-31	12
+90	val_90	2008-12-31	12
+90	val_90	2008-12-31	12
+92	val_92	2008-12-31	12
+95	val_95	2008-12-31	12
+95	val_95	2008-12-31	12
+96	val_96	2008-12-31	12
+97	val_97	2008-12-31	12
+97	val_97	2008-12-31	12
+98	val_98	2008-12-31	12
+98	val_98	2008-12-31	12
+100	val_100	2008-12-31	12
+100	val_100	2008-12-31	12
+103	val_103	2008-12-31	12
+103	val_103	2008-12-31	12
+104	val_104	2008-12-31	12
+104	val_104	2008-12-31	12
+105	val_105	2008-12-31	12
+111	val_111	2008-12-31	12
+113	val_113	2008-12-31	12
+113	val_113	2008-12-31	12
+114	val_114	2008-12-31	12
+116	val_116	2008-12-31	12
+118	val_118	2008-12-31	12
+118	val_118	2008-12-31	12
+119	val_119	2008-12-31	12
+119	val_119	2008-12-31	12
+119	val_119	2008-12-31	12
+120	val_120	2008-12-31	12
+120	val_120	2008-12-31	12
+125	val_125	2008-12-31	12
+125	val_125	2008-12-31	12
+126	val_126	2008-12-31	12
+128	val_128	2008-12-31	12
+128	val_128	2008-12-31	12
+128	val_128	2008-12-31	12
+129	val_129	2008-12-31	12
+129	val_129	2008-12-31	12
+131	val_131	2008-12-31	12
+133	val_133	2008-12-31	12
+134	val_134	2008-12-31	12
+134	val_134	2008-12-31	12
+136	val_136	2008-12-31	12
+137	val_137	2008-12-31	12
+137	val_137	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+143	val_143	2008-12-31	12
+145	val_145	2008-12-31	12
+146	val_146	2008-12-31	12
+146	val_146	2008-12-31	12
+149	val_149	2008-12-31	12
+149	val_149	2008-12-31	12
+150	val_150	2008-12-31	12
+152	val_152	2008-12-31	12
+152	val_152	2008-12-31	12
+153	val_153	2008-12-31	12
+155	val_155	2008-12-31	12
+156	val_156	2008-12-31	12
+157	val_157	2008-12-31	12
+158	val_158	2008-12-31	12
+160	val_160	2008-12-31	12
+162	val_162	2008-12-31	12
+163	val_163	2008-12-31	12
+164	val_164	2008-12-31	12
+164	val_164	2008-12-31	12
+165	val_165	2008-12-31	12
+165	val_165	2008-12-31	12
+166	val_166	2008-12-31	12
+167	val_167	2008-12-31	12
+167	val_167	2008-12-31	12
+167	val_167	2008-12-31	12
+168	val_168	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+170	val_170	2008-12-31	12
+172	val_172	2008-12-31	12
+172	val_172	2008-12-31	12
+174	val_174	2008-12-31	12
+174	val_174	2008-12-31	12
+175	val_175	2008-12-31	12
+175	val_175	2008-12-31	12
+176	val_176	2008-12-31	12
+176	val_176	2008-12-31	12
+177	val_177	2008-12-31	12
+178	val_178	2008-12-31	12
+179	val_179	2008-12-31	12
+179	val_179	2008-12-31	12
+180	val_180	2008-12-31	12
+181	val_181	2008-12-31	12
+183	val_183	2008-12-31	12
+186	val_186	2008-12-31	12
+187	val_187	2008-12-31	12
+187	val_187	2008-12-31	12
+187	val_187	2008-12-31	12
+189	val_189	2008-12-31	12
+190	val_190	2008-12-31	12
+191	val_191	2008-12-31	12
+191	val_191	2008-12-31	12
+192	val_192	2008-12-31	12
+193	val_193	2008-12-31	12
+193	val_193	2008-12-31	12
+193	val_193	2008-12-31	12
+194	val_194	2008-12-31	12
+195	val_195	2008-12-31	12
+195	val_195	2008-12-31	12
+196	val_196	2008-12-31	12
+197	val_197	2008-12-31	12
+197	val_197	2008-12-31	12
+199	val_199	2008-12-31	12
+199	val_199	2008-12-31	12
+199	val_199	2008-12-31	12
+200	val_200	2008-12-31	12
+200	val_200	2008-12-31	12
+201	val_201	2008-12-31	12
+202	val_202	2008-12-31	12
+203	val_203	2008-12-31	12
+203	val_203	2008-12-31	12
+205	val_205	2008-12-31	12
+205	val_205	2008-12-31	12
+207	val_207	2008-12-31	12
+207	val_207	2008-12-31	12
+208	val_208	2008-12-31	12
+208	val_208	2008-12-31	12
+208	val_208	2008-12-31	12
+209	val_209	2008-12-31	12
+209	val_209	2008-12-31	12
+213	val_213	2008-12-31	12
+213	val_213	2008-12-31	12
+214	val_214	2008-12-31	12
+216	val_216	2008-12-31	12
+216	val_216	2008-12-31	12
+217	val_217	2008-12-31	12
+217	val_217	2008-12-31	12
+218	val_218	2008-12-31	12
+219	val_219	2008-12-31	12
+219	val_219	2008-12-31	12
+221	val_221	2008-12-31	12
+221	val_221	2008-12-31	12
+222	val_222	2008-12-31	12
+223	val_223	2008-12-31	12
+223	val_223	2008-12-31	12
+224	val_224	2008-12-31	12
+224	val_224	2008-12-31	12
+226	val_226	2008-12-31	12
+228	val_228	2008-12-31	12
+229	val_229	2008-12-31	12
+229	val_229	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+233	val_233	2008-12-31	12
+233	val_233	2008-12-31	12
+235	val_235	2008-12-31	12
+237	val_237	2008-12-31	12
+237	val_237	2008-12-31	12
+238	val_238	2008-12-31	12
+238	val_238	2008-12-31	12
+239	val_239	2008-12-31	12
+239	val_239	2008-12-31	12
+241	val_241	2008-12-31	12
+242	val_242	2008-12-31	12
+242	val_242	2008-12-31	12
+244	val_244	2008-12-31	12
+247	val_247	2008-12-31	12
+248	val_248	2008-12-31	12
+249	val_249	2008-12-31	12
+252	val_252	2008-12-31	12
+255	val_255	2008-12-31	12
+255	val_255	2008-12-31	12
+256	val_256	2008-12-31	12
+256	val_256	2008-12-31	12
+257	val_257	2008-12-31	12
+258	val_258	2008-12-31	12
+260	val_260	2008-12-31	12
+262	val_262	2008-12-31	12
+263	val_263	2008-12-31	12
+265	val_265	2008-12-31	12
+265	val_265	2008-12-31	12
+266	val_266	2008-12-31	12
+272	val_272	2008-12-31	12
+272	val_272	2008-12-31	12
+273	val_273	2008-12-31	12
+273	val_273	2008-12-31	12
+273	val_273	2008-12-31	12
+274	val_274	2008-12-31	12
+275	val_275	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+278	val_278	2008-12-31	12
+278	val_278	2008-12-31	12
+280	val_280	2008-12-31	12
+280	val_280	2008-12-31	12
+281	val_281	2008-12-31	12
+281	val_281	2008-12-31	12
+282	val_282	2008-12-31	12
+282	val_282	2008-12-31	12
+283	val_283	2008-12-31	12
+284	val_284	2008-12-31	12
+285	val_285	2008-12-31	12
+286	val_286	2008-12-31	12
+287	val_287	2008-12-31	12
+288	val_288	2008-12-31	12
+288	val_288	2008-12-31	12
+289	val_289	2008-12-31	12
+291	val_291	2008-12-31	12
+292	val_292	2008-12-31	12
+296	val_296	2008-12-31	12
+298	val_298	2008-12-31	12
+298	val_298	2008-12-31	12
+298	val_298	2008-12-31	12
+302	val_302	2008-12-31	12
+305	val_305	2008-12-31	12
+306	val_306	2008-12-31	12
+307	val_307	2008-12-31	12
+307	val_307	2008-12-31	12
+308	val_308	2008-12-31	12
+309	val_309	2008-12-31	12
+309	val_309	2008-12-31	12
+310	val_310	2008-12-31	12
+311	val_311	2008-12-31	12
+311	val_311	2008-12-31	12
+311	val_311	2008-12-31	12
+315	val_315	2008-12-31	12
+316	val_316	2008-12-31	12
+316	val_316	2008-12-31	12
+316	val_316	2008-12-31	12
+317	val_317	2008-12-31	12
+317	val_317	2008-12-31	12
+318	val_318	2008-12-31	12
+318	val_318	2008-12-31	12
+318	val_318	2008-12-31	12
+321	val_321	2008-12-31	12
+321	val_321	2008-12-31	12
+322	val_322	2008-12-31	12
+322	val_322	2008-12-31	12
+323	val_323	2008-12-31	12
+325	val_325	2008-12-31	12
+325	val_325	2008-12-31	12
+327	val_327	2008-12-31	12
+327	val_327	2008-12-31	12
+327	val_327	2008-12-31	12
+331	val_331	2008-12-31	12
+331	val_331	2008-12-31	12
+332	val_332	2008-12-31	12
+333	val_333	2008-12-31	12
+333	val_333	2008-12-31	12
+335	val_335	2008-12-31	12
+336	val_336	2008-12-31	12
+338	val_338	2008-12-31	12
+339	val_339	2008-12-31	12
+341	val_341	2008-12-31	12
+342	val_342	2008-12-31	12
+342	val_342	2008-12-31	12
+344	val_344	2008-12-31	12
+344	val_344	2008-12-31	12
+345	val_345	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+351	val_351	2008-12-31	12
+353	val_353	2008-12-31	12
+353	val_353	2008-12-31	12
+356	val_356	2008-12-31	12
+360	val_360	2008-12-31	12
+362	val_362	2008-12-31	12
+364	val_364	2008-12-31	12
+365	val_365	2008-12-31	12
+366	val_366	2008-12-31	12
+367	val_367	2008-12-31	12
+367	val_367	2008-12-31	12
+368	val_368	2008-12-31	12
+369	val_369	2008-12-31	12
+369	val_369	2008-12-31	12
+369	val_369	2008-12-31	12
+373	val_373	2008-12-31	12
+374	val_374	2008-12-31	12
+375	val_375	2008-12-31	12
+377	val_377	2008-12-31	12
+378	val_378	2008-12-31	12
+379	val_379	2008-12-31	12
+382	val_382	2008-12-31	12
+382	val_382	2008-12-31	12
+384	val_384	2008-12-31	12
+384	val_384	2008-12-31	12
+384	val_384	2008-12-31	12
+386	val_386	2008-12-31	12
+389	val_389	2008-12-31	12
+392	val_392	2008-12-31	12
+393	val_393	2008-12-31	12
+394	val_394	2008-12-31	12
+395	val_395	2008-12-31	12
+395	val_395	2008-12-31	12
+396	val_396	2008-12-31	12
+396	val_396	2008-12-31	12
+396	val_396	2008-12-31	12
+397	val_397	2008-12-31	12
+397	val_397	2008-12-31	12
+399	val_399	2008-12-31	12
+399	val_399	2008-12-31	12
+400	val_400	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+402	val_402	2008-12-31	12
+403	val_403	2008-12-31	12
+403	val_403	2008-12-31	12
+403	val_403	2008-12-31	12
+404	val_404	2008-12-31	12
+404	val_404	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+407	val_407	2008-12-31	12
+409	val_409	2008-12-31	12
+409	val_409	2008-12-31	12
+409	val_409	2008-12-31	12
+411	val_411	2008-12-31	12
+413	val_413	2008-12-31	12
+413	val_413	2008-12-31	12
+414	val_414	2008-12-31	12
+414	val_414	2008-12-31	12
+417	val_417	2008-12-31	12
+417	val_417	2008-12-31	12
+417	val_417	2008-12-31	12
+418	val_418	2008-12-31	12
+419	val_419	2008-12-31	12
+421	val_421	2008-12-31	12
+424	val_424	2008-12-31	12
+424	val_424	2008-12-31	12
+427	val_427	2008-12-31	12
+429	val_429	2008-12-31	12
+429	val_429	2008-12-31	12
+430	val_430	2008-12-31	12
+430	val_430	2008-12-31	12
+430	val_430	2008-12-31	12
+431	val_431	2008-12-31	12
+431	val_431	2008-12-31	12
+431	val_431	2008-12-31	12
+432	val_432	2008-12-31	12
+435	val_435	2008-12-31	12
+436	val_436	2008-12-31	12
+437	val_437	2008-12-31	12
+438	val_438	2008-12-31	12
+438	val_438	2008-12-31	12
+438	val_438	2008-12-31	12
+439	val_439	2008-12-31	12
+439	val_439	2008-12-31	12
+443	val_443	2008-12-31	12
+444	val_444	2008-12-31	12
+446	val_446	2008-12-31	12
+448	val_448	2008-12-31	12
+449	val_449	2008-12-31	12
+452	val_452	2008-12-31	12
+453	val_453	2008-12-31	12
+454	val_454	2008-12-31	12
+454	val_454	2008-12-31	12
+454	val_454	2008-12-31	12
+455	val_455	2008-12-31	12
+457	val_457	2008-12-31	12
+458	val_458	2008-12-31	12
+458	val_458	2008-12-31	12
+459	val_459	2008-12-31	12
+459	val_459	2008-12-31	12
+460	val_460	2008-12-31	12
+462	val_462	2008-12-31	12
+462	val_462	2008-12-31	12
+463	val_463	2008-12-31	12
+463	val_463	2008-12-31	12
+466	val_466	2008-12-31	12
+466	val_466	2008-12-31	12
+466	val_466	2008-12-31	12
+467	val_467	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+470	val_470	2008-12-31	12
+472	val_472	2008-12-31	12
+475	val_475	2008-12-31	12
+477	val_477	2008-12-31	12
+478	val_478	2008-12-31	12
+478	val_478	2008-12-31	12
+479	val_479	2008-12-31	12
+480	val_480	2008-12-31	12
+480	val_480	2008-12-31	12
+480	val_480	2008-12-31	12
+481	val_481	2008-12-31	12
+482	val_482	2008-12-31	12
+483	val_483	2008-12-31	12
+484	val_484	2008-12-31	12
+485	val_485	2008-12-31	12
+487	val_487	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+490	val_490	2008-12-31	12
+491	val_491	2008-12-31	12
+492	val_492	2008-12-31	12
+492	val_492	2008-12-31	12
+493	val_493	2008-12-31	12
+494	val_494	2008-12-31	12
+495	val_495	2008-12-31	12
+496	val_496	2008-12-31	12
+497	val_497	2008-12-31	12
+498	val_498	2008-12-31	12
+498	val_498	2008-12-31	12
+498	val_498	2008-12-31	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part1-11-9c82167763a771c175c656786d545798 b/sql/hive/src/test/resources/golden/load_dyn_part1-11-9c82167763a771c175c656786d545798
deleted file mode 100644
index 653516475da22..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part1-11-9c82167763a771c175c656786d545798
+++ /dev/null
@@ -1,1000 +0,0 @@
-238	val_238	2008-12-31	11
-86	val_86	2008-12-31	11
-311	val_311	2008-12-31	11
-27	val_27	2008-12-31	11
-165	val_165	2008-12-31	11
-409	val_409	2008-12-31	11
-255	val_255	2008-12-31	11
-278	val_278	2008-12-31	11
-98	val_98	2008-12-31	11
-484	val_484	2008-12-31	11
-265	val_265	2008-12-31	11
-193	val_193	2008-12-31	11
-401	val_401	2008-12-31	11
-150	val_150	2008-12-31	11
-273	val_273	2008-12-31	11
-224	val_224	2008-12-31	11
-369	val_369	2008-12-31	11
-66	val_66	2008-12-31	11
-128	val_128	2008-12-31	11
-213	val_213	2008-12-31	11
-146	val_146	2008-12-31	11
-406	val_406	2008-12-31	11
-429	val_429	2008-12-31	11
-374	val_374	2008-12-31	11
-152	val_152	2008-12-31	11
-469	val_469	2008-12-31	11
-145	val_145	2008-12-31	11
-495	val_495	2008-12-31	11
-37	val_37	2008-12-31	11
-327	val_327	2008-12-31	11
-281	val_281	2008-12-31	11
-277	val_277	2008-12-31	11
-209	val_209	2008-12-31	11
-15	val_15	2008-12-31	11
-82	val_82	2008-12-31	11
-403	val_403	2008-12-31	11
-166	val_166	2008-12-31	11
-417	val_417	2008-12-31	11
-430	val_430	2008-12-31	11
-252	val_252	2008-12-31	11
-292	val_292	2008-12-31	11
-219	val_219	2008-12-31	11
-287	val_287	2008-12-31	11
-153	val_153	2008-12-31	11
-193	val_193	2008-12-31	11
-338	val_338	2008-12-31	11
-446	val_446	2008-12-31	11
-459	val_459	2008-12-31	11
-394	val_394	2008-12-31	11
-237	val_237	2008-12-31	11
-482	val_482	2008-12-31	11
-174	val_174	2008-12-31	11
-413	val_413	2008-12-31	11
-494	val_494	2008-12-31	11
-207	val_207	2008-12-31	11
-199	val_199	2008-12-31	11
-466	val_466	2008-12-31	11
-208	val_208	2008-12-31	11
-174	val_174	2008-12-31	11
-399	val_399	2008-12-31	11
-396	val_396	2008-12-31	11
-247	val_247	2008-12-31	11
-417	val_417	2008-12-31	11
-489	val_489	2008-12-31	11
-162	val_162	2008-12-31	11
-377	val_377	2008-12-31	11
-397	val_397	2008-12-31	11
-309	val_309	2008-12-31	11
-365	val_365	2008-12-31	11
-266	val_266	2008-12-31	11
-439	val_439	2008-12-31	11
-342	val_342	2008-12-31	11
-367	val_367	2008-12-31	11
-325	val_325	2008-12-31	11
-167	val_167	2008-12-31	11
-195	val_195	2008-12-31	11
-475	val_475	2008-12-31	11
-17	val_17	2008-12-31	11
-113	val_113	2008-12-31	11
-155	val_155	2008-12-31	11
-203	val_203	2008-12-31	11
-339	val_339	2008-12-31	11
-0	val_0	2008-12-31	11
-455	val_455	2008-12-31	11
-128	val_128	2008-12-31	11
-311	val_311	2008-12-31	11
-316	val_316	2008-12-31	11
-57	val_57	2008-12-31	11
-302	val_302	2008-12-31	11
-205	val_205	2008-12-31	11
-149	val_149	2008-12-31	11
-438	val_438	2008-12-31	11
-345	val_345	2008-12-31	11
-129	val_129	2008-12-31	11
-170	val_170	2008-12-31	11
-20	val_20	2008-12-31	11
-489	val_489	2008-12-31	11
-157	val_157	2008-12-31	11
-378	val_378	2008-12-31	11
-221	val_221	2008-12-31	11
-92	val_92	2008-12-31	11
-111	val_111	2008-12-31	11
-47	val_47	2008-12-31	11
-72	val_72	2008-12-31	11
-4	val_4	2008-12-31	11
-280	val_280	2008-12-31	11
-35	val_35	2008-12-31	11
-427	val_427	2008-12-31	11
-277	val_277	2008-12-31	11
-208	val_208	2008-12-31	11
-356	val_356	2008-12-31	11
-399	val_399	2008-12-31	11
-169	val_169	2008-12-31	11
-382	val_382	2008-12-31	11
-498	val_498	2008-12-31	11
-125	val_125	2008-12-31	11
-386	val_386	2008-12-31	11
-437	val_437	2008-12-31	11
-469	val_469	2008-12-31	11
-192	val_192	2008-12-31	11
-286	val_286	2008-12-31	11
-187	val_187	2008-12-31	11
-176	val_176	2008-12-31	11
-54	val_54	2008-12-31	11
-459	val_459	2008-12-31	11
-51	val_51	2008-12-31	11
-138	val_138	2008-12-31	11
-103	val_103	2008-12-31	11
-239	val_239	2008-12-31	11
-213	val_213	2008-12-31	11
-216	val_216	2008-12-31	11
-430	val_430	2008-12-31	11
-278	val_278	2008-12-31	11
-176	val_176	2008-12-31	11
-289	val_289	2008-12-31	11
-221	val_221	2008-12-31	11
-65	val_65	2008-12-31	11
-318	val_318	2008-12-31	11
-332	val_332	2008-12-31	11
-311	val_311	2008-12-31	11
-275	val_275	2008-12-31	11
-137	val_137	2008-12-31	11
-241	val_241	2008-12-31	11
-83	val_83	2008-12-31	11
-333	val_333	2008-12-31	11
-180	val_180	2008-12-31	11
-284	val_284	2008-12-31	11
-12	val_12	2008-12-31	11
-230	val_230	2008-12-31	11
-181	val_181	2008-12-31	11
-67	val_67	2008-12-31	11
-260	val_260	2008-12-31	11
-404	val_404	2008-12-31	11
-384	val_384	2008-12-31	11
-489	val_489	2008-12-31	11
-353	val_353	2008-12-31	11
-373	val_373	2008-12-31	11
-272	val_272	2008-12-31	11
-138	val_138	2008-12-31	11
-217	val_217	2008-12-31	11
-84	val_84	2008-12-31	11
-348	val_348	2008-12-31	11
-466	val_466	2008-12-31	11
-58	val_58	2008-12-31	11
-8	val_8	2008-12-31	11
-411	val_411	2008-12-31	11
-230	val_230	2008-12-31	11
-208	val_208	2008-12-31	11
-348	val_348	2008-12-31	11
-24	val_24	2008-12-31	11
-463	val_463	2008-12-31	11
-431	val_431	2008-12-31	11
-179	val_179	2008-12-31	11
-172	val_172	2008-12-31	11
-42	val_42	2008-12-31	11
-129	val_129	2008-12-31	11
-158	val_158	2008-12-31	11
-119	val_119	2008-12-31	11
-496	val_496	2008-12-31	11
-0	val_0	2008-12-31	11
-322	val_322	2008-12-31	11
-197	val_197	2008-12-31	11
-468	val_468	2008-12-31	11
-393	val_393	2008-12-31	11
-454	val_454	2008-12-31	11
-100	val_100	2008-12-31	11
-298	val_298	2008-12-31	11
-199	val_199	2008-12-31	11
-191	val_191	2008-12-31	11
-418	val_418	2008-12-31	11
-96	val_96	2008-12-31	11
-26	val_26	2008-12-31	11
-165	val_165	2008-12-31	11
-327	val_327	2008-12-31	11
-230	val_230	2008-12-31	11
-205	val_205	2008-12-31	11
-120	val_120	2008-12-31	11
-131	val_131	2008-12-31	11
-51	val_51	2008-12-31	11
-404	val_404	2008-12-31	11
-43	val_43	2008-12-31	11
-436	val_436	2008-12-31	11
-156	val_156	2008-12-31	11
-469	val_469	2008-12-31	11
-468	val_468	2008-12-31	11
-308	val_308	2008-12-31	11
-95	val_95	2008-12-31	11
-196	val_196	2008-12-31	11
-288	val_288	2008-12-31	11
-481	val_481	2008-12-31	11
-457	val_457	2008-12-31	11
-98	val_98	2008-12-31	11
-282	val_282	2008-12-31	11
-197	val_197	2008-12-31	11
-187	val_187	2008-12-31	11
-318	val_318	2008-12-31	11
-318	val_318	2008-12-31	11
-409	val_409	2008-12-31	11
-470	val_470	2008-12-31	11
-137	val_137	2008-12-31	11
-369	val_369	2008-12-31	11
-316	val_316	2008-12-31	11
-169	val_169	2008-12-31	11
-413	val_413	2008-12-31	11
-85	val_85	2008-12-31	11
-77	val_77	2008-12-31	11
-0	val_0	2008-12-31	11
-490	val_490	2008-12-31	11
-87	val_87	2008-12-31	11
-364	val_364	2008-12-31	11
-179	val_179	2008-12-31	11
-118	val_118	2008-12-31	11
-134	val_134	2008-12-31	11
-395	val_395	2008-12-31	11
-282	val_282	2008-12-31	11
-138	val_138	2008-12-31	11
-238	val_238	2008-12-31	11
-419	val_419	2008-12-31	11
-15	val_15	2008-12-31	11
-118	val_118	2008-12-31	11
-72	val_72	2008-12-31	11
-90	val_90	2008-12-31	11
-307	val_307	2008-12-31	11
-19	val_19	2008-12-31	11
-435	val_435	2008-12-31	11
-10	val_10	2008-12-31	11
-277	val_277	2008-12-31	11
-273	val_273	2008-12-31	11
-306	val_306	2008-12-31	11
-224	val_224	2008-12-31	11
-309	val_309	2008-12-31	11
-389	val_389	2008-12-31	11
-327	val_327	2008-12-31	11
-242	val_242	2008-12-31	11
-369	val_369	2008-12-31	11
-392	val_392	2008-12-31	11
-272	val_272	2008-12-31	11
-331	val_331	2008-12-31	11
-401	val_401	2008-12-31	11
-242	val_242	2008-12-31	11
-452	val_452	2008-12-31	11
-177	val_177	2008-12-31	11
-226	val_226	2008-12-31	11
-5	val_5	2008-12-31	11
-497	val_497	2008-12-31	11
-402	val_402	2008-12-31	11
-396	val_396	2008-12-31	11
-317	val_317	2008-12-31	11
-395	val_395	2008-12-31	11
-58	val_58	2008-12-31	11
-35	val_35	2008-12-31	11
-336	val_336	2008-12-31	11
-95	val_95	2008-12-31	11
-11	val_11	2008-12-31	11
-168	val_168	2008-12-31	11
-34	val_34	2008-12-31	11
-229	val_229	2008-12-31	11
-233	val_233	2008-12-31	11
-143	val_143	2008-12-31	11
-472	val_472	2008-12-31	11
-322	val_322	2008-12-31	11
-498	val_498	2008-12-31	11
-160	val_160	2008-12-31	11
-195	val_195	2008-12-31	11
-42	val_42	2008-12-31	11
-321	val_321	2008-12-31	11
-430	val_430	2008-12-31	11
-119	val_119	2008-12-31	11
-489	val_489	2008-12-31	11
-458	val_458	2008-12-31	11
-78	val_78	2008-12-31	11
-76	val_76	2008-12-31	11
-41	val_41	2008-12-31	11
-223	val_223	2008-12-31	11
-492	val_492	2008-12-31	11
-149	val_149	2008-12-31	11
-449	val_449	2008-12-31	11
-218	val_218	2008-12-31	11
-228	val_228	2008-12-31	11
-138	val_138	2008-12-31	11
-453	val_453	2008-12-31	11
-30	val_30	2008-12-31	11
-209	val_209	2008-12-31	11
-64	val_64	2008-12-31	11
-468	val_468	2008-12-31	11
-76	val_76	2008-12-31	11
-74	val_74	2008-12-31	11
-342	val_342	2008-12-31	11
-69	val_69	2008-12-31	11
-230	val_230	2008-12-31	11
-33	val_33	2008-12-31	11
-368	val_368	2008-12-31	11
-103	val_103	2008-12-31	11
-296	val_296	2008-12-31	11
-113	val_113	2008-12-31	11
-216	val_216	2008-12-31	11
-367	val_367	2008-12-31	11
-344	val_344	2008-12-31	11
-167	val_167	2008-12-31	11
-274	val_274	2008-12-31	11
-219	val_219	2008-12-31	11
-239	val_239	2008-12-31	11
-485	val_485	2008-12-31	11
-116	val_116	2008-12-31	11
-223	val_223	2008-12-31	11
-256	val_256	2008-12-31	11
-263	val_263	2008-12-31	11
-70	val_70	2008-12-31	11
-487	val_487	2008-12-31	11
-480	val_480	2008-12-31	11
-401	val_401	2008-12-31	11
-288	val_288	2008-12-31	11
-191	val_191	2008-12-31	11
-5	val_5	2008-12-31	11
-244	val_244	2008-12-31	11
-438	val_438	2008-12-31	11
-128	val_128	2008-12-31	11
-467	val_467	2008-12-31	11
-432	val_432	2008-12-31	11
-202	val_202	2008-12-31	11
-316	val_316	2008-12-31	11
-229	val_229	2008-12-31	11
-469	val_469	2008-12-31	11
-463	val_463	2008-12-31	11
-280	val_280	2008-12-31	11
-2	val_2	2008-12-31	11
-35	val_35	2008-12-31	11
-283	val_283	2008-12-31	11
-331	val_331	2008-12-31	11
-235	val_235	2008-12-31	11
-80	val_80	2008-12-31	11
-44	val_44	2008-12-31	11
-193	val_193	2008-12-31	11
-321	val_321	2008-12-31	11
-335	val_335	2008-12-31	11
-104	val_104	2008-12-31	11
-466	val_466	2008-12-31	11
-366	val_366	2008-12-31	11
-175	val_175	2008-12-31	11
-403	val_403	2008-12-31	11
-483	val_483	2008-12-31	11
-53	val_53	2008-12-31	11
-105	val_105	2008-12-31	11
-257	val_257	2008-12-31	11
-406	val_406	2008-12-31	11
-409	val_409	2008-12-31	11
-190	val_190	2008-12-31	11
-406	val_406	2008-12-31	11
-401	val_401	2008-12-31	11
-114	val_114	2008-12-31	11
-258	val_258	2008-12-31	11
-90	val_90	2008-12-31	11
-203	val_203	2008-12-31	11
-262	val_262	2008-12-31	11
-348	val_348	2008-12-31	11
-424	val_424	2008-12-31	11
-12	val_12	2008-12-31	11
-396	val_396	2008-12-31	11
-201	val_201	2008-12-31	11
-217	val_217	2008-12-31	11
-164	val_164	2008-12-31	11
-431	val_431	2008-12-31	11
-454	val_454	2008-12-31	11
-478	val_478	2008-12-31	11
-298	val_298	2008-12-31	11
-125	val_125	2008-12-31	11
-431	val_431	2008-12-31	11
-164	val_164	2008-12-31	11
-424	val_424	2008-12-31	11
-187	val_187	2008-12-31	11
-382	val_382	2008-12-31	11
-5	val_5	2008-12-31	11
-70	val_70	2008-12-31	11
-397	val_397	2008-12-31	11
-480	val_480	2008-12-31	11
-291	val_291	2008-12-31	11
-24	val_24	2008-12-31	11
-351	val_351	2008-12-31	11
-255	val_255	2008-12-31	11
-104	val_104	2008-12-31	11
-70	val_70	2008-12-31	11
-163	val_163	2008-12-31	11
-438	val_438	2008-12-31	11
-119	val_119	2008-12-31	11
-414	val_414	2008-12-31	11
-200	val_200	2008-12-31	11
-491	val_491	2008-12-31	11
-237	val_237	2008-12-31	11
-439	val_439	2008-12-31	11
-360	val_360	2008-12-31	11
-248	val_248	2008-12-31	11
-479	val_479	2008-12-31	11
-305	val_305	2008-12-31	11
-417	val_417	2008-12-31	11
-199	val_199	2008-12-31	11
-444	val_444	2008-12-31	11
-120	val_120	2008-12-31	11
-429	val_429	2008-12-31	11
-169	val_169	2008-12-31	11
-443	val_443	2008-12-31	11
-323	val_323	2008-12-31	11
-325	val_325	2008-12-31	11
-277	val_277	2008-12-31	11
-230	val_230	2008-12-31	11
-478	val_478	2008-12-31	11
-178	val_178	2008-12-31	11
-468	val_468	2008-12-31	11
-310	val_310	2008-12-31	11
-317	val_317	2008-12-31	11
-333	val_333	2008-12-31	11
-493	val_493	2008-12-31	11
-460	val_460	2008-12-31	11
-207	val_207	2008-12-31	11
-249	val_249	2008-12-31	11
-265	val_265	2008-12-31	11
-480	val_480	2008-12-31	11
-83	val_83	2008-12-31	11
-136	val_136	2008-12-31	11
-353	val_353	2008-12-31	11
-172	val_172	2008-12-31	11
-214	val_214	2008-12-31	11
-462	val_462	2008-12-31	11
-233	val_233	2008-12-31	11
-406	val_406	2008-12-31	11
-133	val_133	2008-12-31	11
-175	val_175	2008-12-31	11
-189	val_189	2008-12-31	11
-454	val_454	2008-12-31	11
-375	val_375	2008-12-31	11
-401	val_401	2008-12-31	11
-421	val_421	2008-12-31	11
-407	val_407	2008-12-31	11
-384	val_384	2008-12-31	11
-256	val_256	2008-12-31	11
-26	val_26	2008-12-31	11
-134	val_134	2008-12-31	11
-67	val_67	2008-12-31	11
-384	val_384	2008-12-31	11
-379	val_379	2008-12-31	11
-18	val_18	2008-12-31	11
-462	val_462	2008-12-31	11
-492	val_492	2008-12-31	11
-100	val_100	2008-12-31	11
-298	val_298	2008-12-31	11
-9	val_9	2008-12-31	11
-341	val_341	2008-12-31	11
-498	val_498	2008-12-31	11
-146	val_146	2008-12-31	11
-458	val_458	2008-12-31	11
-362	val_362	2008-12-31	11
-186	val_186	2008-12-31	11
-285	val_285	2008-12-31	11
-348	val_348	2008-12-31	11
-167	val_167	2008-12-31	11
-18	val_18	2008-12-31	11
-273	val_273	2008-12-31	11
-183	val_183	2008-12-31	11
-281	val_281	2008-12-31	11
-344	val_344	2008-12-31	11
-97	val_97	2008-12-31	11
-469	val_469	2008-12-31	11
-315	val_315	2008-12-31	11
-84	val_84	2008-12-31	11
-28	val_28	2008-12-31	11
-37	val_37	2008-12-31	11
-448	val_448	2008-12-31	11
-152	val_152	2008-12-31	11
-348	val_348	2008-12-31	11
-307	val_307	2008-12-31	11
-194	val_194	2008-12-31	11
-414	val_414	2008-12-31	11
-477	val_477	2008-12-31	11
-222	val_222	2008-12-31	11
-126	val_126	2008-12-31	11
-90	val_90	2008-12-31	11
-169	val_169	2008-12-31	11
-403	val_403	2008-12-31	11
-400	val_400	2008-12-31	11
-200	val_200	2008-12-31	11
-97	val_97	2008-12-31	11
-238	val_238	2008-12-31	12
-86	val_86	2008-12-31	12
-311	val_311	2008-12-31	12
-27	val_27	2008-12-31	12
-165	val_165	2008-12-31	12
-409	val_409	2008-12-31	12
-255	val_255	2008-12-31	12
-278	val_278	2008-12-31	12
-98	val_98	2008-12-31	12
-484	val_484	2008-12-31	12
-265	val_265	2008-12-31	12
-193	val_193	2008-12-31	12
-401	val_401	2008-12-31	12
-150	val_150	2008-12-31	12
-273	val_273	2008-12-31	12
-224	val_224	2008-12-31	12
-369	val_369	2008-12-31	12
-66	val_66	2008-12-31	12
-128	val_128	2008-12-31	12
-213	val_213	2008-12-31	12
-146	val_146	2008-12-31	12
-406	val_406	2008-12-31	12
-429	val_429	2008-12-31	12
-374	val_374	2008-12-31	12
-152	val_152	2008-12-31	12
-469	val_469	2008-12-31	12
-145	val_145	2008-12-31	12
-495	val_495	2008-12-31	12
-37	val_37	2008-12-31	12
-327	val_327	2008-12-31	12
-281	val_281	2008-12-31	12
-277	val_277	2008-12-31	12
-209	val_209	2008-12-31	12
-15	val_15	2008-12-31	12
-82	val_82	2008-12-31	12
-403	val_403	2008-12-31	12
-166	val_166	2008-12-31	12
-417	val_417	2008-12-31	12
-430	val_430	2008-12-31	12
-252	val_252	2008-12-31	12
-292	val_292	2008-12-31	12
-219	val_219	2008-12-31	12
-287	val_287	2008-12-31	12
-153	val_153	2008-12-31	12
-193	val_193	2008-12-31	12
-338	val_338	2008-12-31	12
-446	val_446	2008-12-31	12
-459	val_459	2008-12-31	12
-394	val_394	2008-12-31	12
-237	val_237	2008-12-31	12
-482	val_482	2008-12-31	12
-174	val_174	2008-12-31	12
-413	val_413	2008-12-31	12
-494	val_494	2008-12-31	12
-207	val_207	2008-12-31	12
-199	val_199	2008-12-31	12
-466	val_466	2008-12-31	12
-208	val_208	2008-12-31	12
-174	val_174	2008-12-31	12
-399	val_399	2008-12-31	12
-396	val_396	2008-12-31	12
-247	val_247	2008-12-31	12
-417	val_417	2008-12-31	12
-489	val_489	2008-12-31	12
-162	val_162	2008-12-31	12
-377	val_377	2008-12-31	12
-397	val_397	2008-12-31	12
-309	val_309	2008-12-31	12
-365	val_365	2008-12-31	12
-266	val_266	2008-12-31	12
-439	val_439	2008-12-31	12
-342	val_342	2008-12-31	12
-367	val_367	2008-12-31	12
-325	val_325	2008-12-31	12
-167	val_167	2008-12-31	12
-195	val_195	2008-12-31	12
-475	val_475	2008-12-31	12
-17	val_17	2008-12-31	12
-113	val_113	2008-12-31	12
-155	val_155	2008-12-31	12
-203	val_203	2008-12-31	12
-339	val_339	2008-12-31	12
-0	val_0	2008-12-31	12
-455	val_455	2008-12-31	12
-128	val_128	2008-12-31	12
-311	val_311	2008-12-31	12
-316	val_316	2008-12-31	12
-57	val_57	2008-12-31	12
-302	val_302	2008-12-31	12
-205	val_205	2008-12-31	12
-149	val_149	2008-12-31	12
-438	val_438	2008-12-31	12
-345	val_345	2008-12-31	12
-129	val_129	2008-12-31	12
-170	val_170	2008-12-31	12
-20	val_20	2008-12-31	12
-489	val_489	2008-12-31	12
-157	val_157	2008-12-31	12
-378	val_378	2008-12-31	12
-221	val_221	2008-12-31	12
-92	val_92	2008-12-31	12
-111	val_111	2008-12-31	12
-47	val_47	2008-12-31	12
-72	val_72	2008-12-31	12
-4	val_4	2008-12-31	12
-280	val_280	2008-12-31	12
-35	val_35	2008-12-31	12
-427	val_427	2008-12-31	12
-277	val_277	2008-12-31	12
-208	val_208	2008-12-31	12
-356	val_356	2008-12-31	12
-399	val_399	2008-12-31	12
-169	val_169	2008-12-31	12
-382	val_382	2008-12-31	12
-498	val_498	2008-12-31	12
-125	val_125	2008-12-31	12
-386	val_386	2008-12-31	12
-437	val_437	2008-12-31	12
-469	val_469	2008-12-31	12
-192	val_192	2008-12-31	12
-286	val_286	2008-12-31	12
-187	val_187	2008-12-31	12
-176	val_176	2008-12-31	12
-54	val_54	2008-12-31	12
-459	val_459	2008-12-31	12
-51	val_51	2008-12-31	12
-138	val_138	2008-12-31	12
-103	val_103	2008-12-31	12
-239	val_239	2008-12-31	12
-213	val_213	2008-12-31	12
-216	val_216	2008-12-31	12
-430	val_430	2008-12-31	12
-278	val_278	2008-12-31	12
-176	val_176	2008-12-31	12
-289	val_289	2008-12-31	12
-221	val_221	2008-12-31	12
-65	val_65	2008-12-31	12
-318	val_318	2008-12-31	12
-332	val_332	2008-12-31	12
-311	val_311	2008-12-31	12
-275	val_275	2008-12-31	12
-137	val_137	2008-12-31	12
-241	val_241	2008-12-31	12
-83	val_83	2008-12-31	12
-333	val_333	2008-12-31	12
-180	val_180	2008-12-31	12
-284	val_284	2008-12-31	12
-12	val_12	2008-12-31	12
-230	val_230	2008-12-31	12
-181	val_181	2008-12-31	12
-67	val_67	2008-12-31	12
-260	val_260	2008-12-31	12
-404	val_404	2008-12-31	12
-384	val_384	2008-12-31	12
-489	val_489	2008-12-31	12
-353	val_353	2008-12-31	12
-373	val_373	2008-12-31	12
-272	val_272	2008-12-31	12
-138	val_138	2008-12-31	12
-217	val_217	2008-12-31	12
-84	val_84	2008-12-31	12
-348	val_348	2008-12-31	12
-466	val_466	2008-12-31	12
-58	val_58	2008-12-31	12
-8	val_8	2008-12-31	12
-411	val_411	2008-12-31	12
-230	val_230	2008-12-31	12
-208	val_208	2008-12-31	12
-348	val_348	2008-12-31	12
-24	val_24	2008-12-31	12
-463	val_463	2008-12-31	12
-431	val_431	2008-12-31	12
-179	val_179	2008-12-31	12
-172	val_172	2008-12-31	12
-42	val_42	2008-12-31	12
-129	val_129	2008-12-31	12
-158	val_158	2008-12-31	12
-119	val_119	2008-12-31	12
-496	val_496	2008-12-31	12
-0	val_0	2008-12-31	12
-322	val_322	2008-12-31	12
-197	val_197	2008-12-31	12
-468	val_468	2008-12-31	12
-393	val_393	2008-12-31	12
-454	val_454	2008-12-31	12
-100	val_100	2008-12-31	12
-298	val_298	2008-12-31	12
-199	val_199	2008-12-31	12
-191	val_191	2008-12-31	12
-418	val_418	2008-12-31	12
-96	val_96	2008-12-31	12
-26	val_26	2008-12-31	12
-165	val_165	2008-12-31	12
-327	val_327	2008-12-31	12
-230	val_230	2008-12-31	12
-205	val_205	2008-12-31	12
-120	val_120	2008-12-31	12
-131	val_131	2008-12-31	12
-51	val_51	2008-12-31	12
-404	val_404	2008-12-31	12
-43	val_43	2008-12-31	12
-436	val_436	2008-12-31	12
-156	val_156	2008-12-31	12
-469	val_469	2008-12-31	12
-468	val_468	2008-12-31	12
-308	val_308	2008-12-31	12
-95	val_95	2008-12-31	12
-196	val_196	2008-12-31	12
-288	val_288	2008-12-31	12
-481	val_481	2008-12-31	12
-457	val_457	2008-12-31	12
-98	val_98	2008-12-31	12
-282	val_282	2008-12-31	12
-197	val_197	2008-12-31	12
-187	val_187	2008-12-31	12
-318	val_318	2008-12-31	12
-318	val_318	2008-12-31	12
-409	val_409	2008-12-31	12
-470	val_470	2008-12-31	12
-137	val_137	2008-12-31	12
-369	val_369	2008-12-31	12
-316	val_316	2008-12-31	12
-169	val_169	2008-12-31	12
-413	val_413	2008-12-31	12
-85	val_85	2008-12-31	12
-77	val_77	2008-12-31	12
-0	val_0	2008-12-31	12
-490	val_490	2008-12-31	12
-87	val_87	2008-12-31	12
-364	val_364	2008-12-31	12
-179	val_179	2008-12-31	12
-118	val_118	2008-12-31	12
-134	val_134	2008-12-31	12
-395	val_395	2008-12-31	12
-282	val_282	2008-12-31	12
-138	val_138	2008-12-31	12
-238	val_238	2008-12-31	12
-419	val_419	2008-12-31	12
-15	val_15	2008-12-31	12
-118	val_118	2008-12-31	12
-72	val_72	2008-12-31	12
-90	val_90	2008-12-31	12
-307	val_307	2008-12-31	12
-19	val_19	2008-12-31	12
-435	val_435	2008-12-31	12
-10	val_10	2008-12-31	12
-277	val_277	2008-12-31	12
-273	val_273	2008-12-31	12
-306	val_306	2008-12-31	12
-224	val_224	2008-12-31	12
-309	val_309	2008-12-31	12
-389	val_389	2008-12-31	12
-327	val_327	2008-12-31	12
-242	val_242	2008-12-31	12
-369	val_369	2008-12-31	12
-392	val_392	2008-12-31	12
-272	val_272	2008-12-31	12
-331	val_331	2008-12-31	12
-401	val_401	2008-12-31	12
-242	val_242	2008-12-31	12
-452	val_452	2008-12-31	12
-177	val_177	2008-12-31	12
-226	val_226	2008-12-31	12
-5	val_5	2008-12-31	12
-497	val_497	2008-12-31	12
-402	val_402	2008-12-31	12
-396	val_396	2008-12-31	12
-317	val_317	2008-12-31	12
-395	val_395	2008-12-31	12
-58	val_58	2008-12-31	12
-35	val_35	2008-12-31	12
-336	val_336	2008-12-31	12
-95	val_95	2008-12-31	12
-11	val_11	2008-12-31	12
-168	val_168	2008-12-31	12
-34	val_34	2008-12-31	12
-229	val_229	2008-12-31	12
-233	val_233	2008-12-31	12
-143	val_143	2008-12-31	12
-472	val_472	2008-12-31	12
-322	val_322	2008-12-31	12
-498	val_498	2008-12-31	12
-160	val_160	2008-12-31	12
-195	val_195	2008-12-31	12
-42	val_42	2008-12-31	12
-321	val_321	2008-12-31	12
-430	val_430	2008-12-31	12
-119	val_119	2008-12-31	12
-489	val_489	2008-12-31	12
-458	val_458	2008-12-31	12
-78	val_78	2008-12-31	12
-76	val_76	2008-12-31	12
-41	val_41	2008-12-31	12
-223	val_223	2008-12-31	12
-492	val_492	2008-12-31	12
-149	val_149	2008-12-31	12
-449	val_449	2008-12-31	12
-218	val_218	2008-12-31	12
-228	val_228	2008-12-31	12
-138	val_138	2008-12-31	12
-453	val_453	2008-12-31	12
-30	val_30	2008-12-31	12
-209	val_209	2008-12-31	12
-64	val_64	2008-12-31	12
-468	val_468	2008-12-31	12
-76	val_76	2008-12-31	12
-74	val_74	2008-12-31	12
-342	val_342	2008-12-31	12
-69	val_69	2008-12-31	12
-230	val_230	2008-12-31	12
-33	val_33	2008-12-31	12
-368	val_368	2008-12-31	12
-103	val_103	2008-12-31	12
-296	val_296	2008-12-31	12
-113	val_113	2008-12-31	12
-216	val_216	2008-12-31	12
-367	val_367	2008-12-31	12
-344	val_344	2008-12-31	12
-167	val_167	2008-12-31	12
-274	val_274	2008-12-31	12
-219	val_219	2008-12-31	12
-239	val_239	2008-12-31	12
-485	val_485	2008-12-31	12
-116	val_116	2008-12-31	12
-223	val_223	2008-12-31	12
-256	val_256	2008-12-31	12
-263	val_263	2008-12-31	12
-70	val_70	2008-12-31	12
-487	val_487	2008-12-31	12
-480	val_480	2008-12-31	12
-401	val_401	2008-12-31	12
-288	val_288	2008-12-31	12
-191	val_191	2008-12-31	12
-5	val_5	2008-12-31	12
-244	val_244	2008-12-31	12
-438	val_438	2008-12-31	12
-128	val_128	2008-12-31	12
-467	val_467	2008-12-31	12
-432	val_432	2008-12-31	12
-202	val_202	2008-12-31	12
-316	val_316	2008-12-31	12
-229	val_229	2008-12-31	12
-469	val_469	2008-12-31	12
-463	val_463	2008-12-31	12
-280	val_280	2008-12-31	12
-2	val_2	2008-12-31	12
-35	val_35	2008-12-31	12
-283	val_283	2008-12-31	12
-331	val_331	2008-12-31	12
-235	val_235	2008-12-31	12
-80	val_80	2008-12-31	12
-44	val_44	2008-12-31	12
-193	val_193	2008-12-31	12
-321	val_321	2008-12-31	12
-335	val_335	2008-12-31	12
-104	val_104	2008-12-31	12
-466	val_466	2008-12-31	12
-366	val_366	2008-12-31	12
-175	val_175	2008-12-31	12
-403	val_403	2008-12-31	12
-483	val_483	2008-12-31	12
-53	val_53	2008-12-31	12
-105	val_105	2008-12-31	12
-257	val_257	2008-12-31	12
-406	val_406	2008-12-31	12
-409	val_409	2008-12-31	12
-190	val_190	2008-12-31	12
-406	val_406	2008-12-31	12
-401	val_401	2008-12-31	12
-114	val_114	2008-12-31	12
-258	val_258	2008-12-31	12
-90	val_90	2008-12-31	12
-203	val_203	2008-12-31	12
-262	val_262	2008-12-31	12
-348	val_348	2008-12-31	12
-424	val_424	2008-12-31	12
-12	val_12	2008-12-31	12
-396	val_396	2008-12-31	12
-201	val_201	2008-12-31	12
-217	val_217	2008-12-31	12
-164	val_164	2008-12-31	12
-431	val_431	2008-12-31	12
-454	val_454	2008-12-31	12
-478	val_478	2008-12-31	12
-298	val_298	2008-12-31	12
-125	val_125	2008-12-31	12
-431	val_431	2008-12-31	12
-164	val_164	2008-12-31	12
-424	val_424	2008-12-31	12
-187	val_187	2008-12-31	12
-382	val_382	2008-12-31	12
-5	val_5	2008-12-31	12
-70	val_70	2008-12-31	12
-397	val_397	2008-12-31	12
-480	val_480	2008-12-31	12
-291	val_291	2008-12-31	12
-24	val_24	2008-12-31	12
-351	val_351	2008-12-31	12
-255	val_255	2008-12-31	12
-104	val_104	2008-12-31	12
-70	val_70	2008-12-31	12
-163	val_163	2008-12-31	12
-438	val_438	2008-12-31	12
-119	val_119	2008-12-31	12
-414	val_414	2008-12-31	12
-200	val_200	2008-12-31	12
-491	val_491	2008-12-31	12
-237	val_237	2008-12-31	12
-439	val_439	2008-12-31	12
-360	val_360	2008-12-31	12
-248	val_248	2008-12-31	12
-479	val_479	2008-12-31	12
-305	val_305	2008-12-31	12
-417	val_417	2008-12-31	12
-199	val_199	2008-12-31	12
-444	val_444	2008-12-31	12
-120	val_120	2008-12-31	12
-429	val_429	2008-12-31	12
-169	val_169	2008-12-31	12
-443	val_443	2008-12-31	12
-323	val_323	2008-12-31	12
-325	val_325	2008-12-31	12
-277	val_277	2008-12-31	12
-230	val_230	2008-12-31	12
-478	val_478	2008-12-31	12
-178	val_178	2008-12-31	12
-468	val_468	2008-12-31	12
-310	val_310	2008-12-31	12
-317	val_317	2008-12-31	12
-333	val_333	2008-12-31	12
-493	val_493	2008-12-31	12
-460	val_460	2008-12-31	12
-207	val_207	2008-12-31	12
-249	val_249	2008-12-31	12
-265	val_265	2008-12-31	12
-480	val_480	2008-12-31	12
-83	val_83	2008-12-31	12
-136	val_136	2008-12-31	12
-353	val_353	2008-12-31	12
-172	val_172	2008-12-31	12
-214	val_214	2008-12-31	12
-462	val_462	2008-12-31	12
-233	val_233	2008-12-31	12
-406	val_406	2008-12-31	12
-133	val_133	2008-12-31	12
-175	val_175	2008-12-31	12
-189	val_189	2008-12-31	12
-454	val_454	2008-12-31	12
-375	val_375	2008-12-31	12
-401	val_401	2008-12-31	12
-421	val_421	2008-12-31	12
-407	val_407	2008-12-31	12
-384	val_384	2008-12-31	12
-256	val_256	2008-12-31	12
-26	val_26	2008-12-31	12
-134	val_134	2008-12-31	12
-67	val_67	2008-12-31	12
-384	val_384	2008-12-31	12
-379	val_379	2008-12-31	12
-18	val_18	2008-12-31	12
-462	val_462	2008-12-31	12
-492	val_492	2008-12-31	12
-100	val_100	2008-12-31	12
-298	val_298	2008-12-31	12
-9	val_9	2008-12-31	12
-341	val_341	2008-12-31	12
-498	val_498	2008-12-31	12
-146	val_146	2008-12-31	12
-458	val_458	2008-12-31	12
-362	val_362	2008-12-31	12
-186	val_186	2008-12-31	12
-285	val_285	2008-12-31	12
-348	val_348	2008-12-31	12
-167	val_167	2008-12-31	12
-18	val_18	2008-12-31	12
-273	val_273	2008-12-31	12
-183	val_183	2008-12-31	12
-281	val_281	2008-12-31	12
-344	val_344	2008-12-31	12
-97	val_97	2008-12-31	12
-469	val_469	2008-12-31	12
-315	val_315	2008-12-31	12
-84	val_84	2008-12-31	12
-28	val_28	2008-12-31	12
-37	val_37	2008-12-31	12
-448	val_448	2008-12-31	12
-152	val_152	2008-12-31	12
-348	val_348	2008-12-31	12
-307	val_307	2008-12-31	12
-194	val_194	2008-12-31	12
-414	val_414	2008-12-31	12
-477	val_477	2008-12-31	12
-222	val_222	2008-12-31	12
-126	val_126	2008-12-31	12
-90	val_90	2008-12-31	12
-169	val_169	2008-12-31	12
-403	val_403	2008-12-31	12
-400	val_400	2008-12-31	12
-200	val_200	2008-12-31	12
-97	val_97	2008-12-31	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part1-3-e17dba1884f6a1d2b5002925afd7c2d3 b/sql/hive/src/test/resources/golden/load_dyn_part1-3-e17dba1884f6a1d2b5002925afd7c2d3
index e0dcec0869734..bf80c353fd1ea 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part1-3-e17dba1884f6a1d2b5002925afd7c2d3
+++ b/sql/hive/src/test/resources/golden/load_dyn_part1-3-e17dba1884f6a1d2b5002925afd7c2d3
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part1, dbName:default, owner:marmbrus, createTime:1390899591, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{p3=v3, transient_lastDdlTime=1390899591}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:nzhang_part1, dbName:default, owner:marmbrus, createTime:1413887363, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887363}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part10-0-463330cf55370dbe92d6ed74ef91302 b/sql/hive/src/test/resources/golden/load_dyn_part10-0-463330cf55370dbe92d6ed74ef91302
index e9c723bbd136e..8c43153cf66f9 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part10-0-463330cf55370dbe92d6ed74ef91302
+++ b/sql/hive/src/test/resources/golden/load_dyn_part10-0-463330cf55370dbe92d6ed74ef91302
@@ -1,4 +1,4 @@
 ds=2008-04-08/hr=11
 ds=2008-04-08/hr=12
 ds=2008-04-09/hr=11
-ds=2008-04-09/hr=12
\ No newline at end of file
+ds=2008-04-09/hr=12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part10-2-151ba0c3b8317902f1235ac07d58135e b/sql/hive/src/test/resources/golden/load_dyn_part10-2-151ba0c3b8317902f1235ac07d58135e
index 9d3a522ff81dc..63350bdd8517f 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part10-2-151ba0c3b8317902f1235ac07d58135e
+++ b/sql/hive/src/test/resources/golden/load_dyn_part10-2-151ba0c3b8317902f1235ac07d58135e
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part10, dbName:default, owner:marmbrus, createTime:1389738873, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4075462935071533647/nzhang_part10, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1389738873}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:nzhang_part10, dbName:default, owner:marmbrus, createTime:1413887405, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part10, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887405}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part10-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/load_dyn_part10-3-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part10-3-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/load_dyn_part10-3-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part10-4-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/load_dyn_part10-4-a4fb8359a2179ec70777aad6366071b7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part10-4-a4fb8359a2179ec70777aad6366071b7
+++ b/sql/hive/src/test/resources/golden/load_dyn_part10-4-a4fb8359a2179ec70777aad6366071b7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part10-5-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/load_dyn_part10-5-16367c381d4b189b3640c92511244bfe
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part10-5-16367c381d4b189b3640c92511244bfe
+++ b/sql/hive/src/test/resources/golden/load_dyn_part10-5-16367c381d4b189b3640c92511244bfe
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part10-8-245027204484e281e1cfaf74386d2967 b/sql/hive/src/test/resources/golden/load_dyn_part10-8-245027204484e281e1cfaf74386d2967
index 574727266b183..1634adfc4d70d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part10-8-245027204484e281e1cfaf74386d2967
+++ b/sql/hive/src/test/resources/golden/load_dyn_part10-8-245027204484e281e1cfaf74386d2967
@@ -1,2 +1,2 @@
 ds=2008-12-31/hr=11
-ds=2008-12-31/hr=12
\ No newline at end of file
+ds=2008-12-31/hr=12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part10-9-a646fd106fe73b8753fe3fee86d56ebf b/sql/hive/src/test/resources/golden/load_dyn_part10-9-a646fd106fe73b8753fe3fee86d56ebf
new file mode 100644
index 0000000000000..ee4601f59e708
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/load_dyn_part10-9-a646fd106fe73b8753fe3fee86d56ebf
@@ -0,0 +1,1000 @@
+0	val_0	2008-12-31	11
+0	val_0	2008-12-31	11
+0	val_0	2008-12-31	11
+2	val_2	2008-12-31	11
+4	val_4	2008-12-31	11
+5	val_5	2008-12-31	11
+5	val_5	2008-12-31	11
+5	val_5	2008-12-31	11
+8	val_8	2008-12-31	11
+9	val_9	2008-12-31	11
+10	val_10	2008-12-31	11
+11	val_11	2008-12-31	11
+12	val_12	2008-12-31	11
+12	val_12	2008-12-31	11
+15	val_15	2008-12-31	11
+15	val_15	2008-12-31	11
+17	val_17	2008-12-31	11
+18	val_18	2008-12-31	11
+18	val_18	2008-12-31	11
+19	val_19	2008-12-31	11
+20	val_20	2008-12-31	11
+24	val_24	2008-12-31	11
+24	val_24	2008-12-31	11
+26	val_26	2008-12-31	11
+26	val_26	2008-12-31	11
+27	val_27	2008-12-31	11
+28	val_28	2008-12-31	11
+30	val_30	2008-12-31	11
+33	val_33	2008-12-31	11
+34	val_34	2008-12-31	11
+35	val_35	2008-12-31	11
+35	val_35	2008-12-31	11
+35	val_35	2008-12-31	11
+37	val_37	2008-12-31	11
+37	val_37	2008-12-31	11
+41	val_41	2008-12-31	11
+42	val_42	2008-12-31	11
+42	val_42	2008-12-31	11
+43	val_43	2008-12-31	11
+44	val_44	2008-12-31	11
+47	val_47	2008-12-31	11
+51	val_51	2008-12-31	11
+51	val_51	2008-12-31	11
+53	val_53	2008-12-31	11
+54	val_54	2008-12-31	11
+57	val_57	2008-12-31	11
+58	val_58	2008-12-31	11
+58	val_58	2008-12-31	11
+64	val_64	2008-12-31	11
+65	val_65	2008-12-31	11
+66	val_66	2008-12-31	11
+67	val_67	2008-12-31	11
+67	val_67	2008-12-31	11
+69	val_69	2008-12-31	11
+70	val_70	2008-12-31	11
+70	val_70	2008-12-31	11
+70	val_70	2008-12-31	11
+72	val_72	2008-12-31	11
+72	val_72	2008-12-31	11
+74	val_74	2008-12-31	11
+76	val_76	2008-12-31	11
+76	val_76	2008-12-31	11
+77	val_77	2008-12-31	11
+78	val_78	2008-12-31	11
+80	val_80	2008-12-31	11
+82	val_82	2008-12-31	11
+83	val_83	2008-12-31	11
+83	val_83	2008-12-31	11
+84	val_84	2008-12-31	11
+84	val_84	2008-12-31	11
+85	val_85	2008-12-31	11
+86	val_86	2008-12-31	11
+87	val_87	2008-12-31	11
+90	val_90	2008-12-31	11
+90	val_90	2008-12-31	11
+90	val_90	2008-12-31	11
+92	val_92	2008-12-31	11
+95	val_95	2008-12-31	11
+95	val_95	2008-12-31	11
+96	val_96	2008-12-31	11
+97	val_97	2008-12-31	11
+97	val_97	2008-12-31	11
+98	val_98	2008-12-31	11
+98	val_98	2008-12-31	11
+100	val_100	2008-12-31	11
+100	val_100	2008-12-31	11
+103	val_103	2008-12-31	11
+103	val_103	2008-12-31	11
+104	val_104	2008-12-31	11
+104	val_104	2008-12-31	11
+105	val_105	2008-12-31	11
+111	val_111	2008-12-31	11
+113	val_113	2008-12-31	11
+113	val_113	2008-12-31	11
+114	val_114	2008-12-31	11
+116	val_116	2008-12-31	11
+118	val_118	2008-12-31	11
+118	val_118	2008-12-31	11
+119	val_119	2008-12-31	11
+119	val_119	2008-12-31	11
+119	val_119	2008-12-31	11
+120	val_120	2008-12-31	11
+120	val_120	2008-12-31	11
+125	val_125	2008-12-31	11
+125	val_125	2008-12-31	11
+126	val_126	2008-12-31	11
+128	val_128	2008-12-31	11
+128	val_128	2008-12-31	11
+128	val_128	2008-12-31	11
+129	val_129	2008-12-31	11
+129	val_129	2008-12-31	11
+131	val_131	2008-12-31	11
+133	val_133	2008-12-31	11
+134	val_134	2008-12-31	11
+134	val_134	2008-12-31	11
+136	val_136	2008-12-31	11
+137	val_137	2008-12-31	11
+137	val_137	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+143	val_143	2008-12-31	11
+145	val_145	2008-12-31	11
+146	val_146	2008-12-31	11
+146	val_146	2008-12-31	11
+149	val_149	2008-12-31	11
+149	val_149	2008-12-31	11
+150	val_150	2008-12-31	11
+152	val_152	2008-12-31	11
+152	val_152	2008-12-31	11
+153	val_153	2008-12-31	11
+155	val_155	2008-12-31	11
+156	val_156	2008-12-31	11
+157	val_157	2008-12-31	11
+158	val_158	2008-12-31	11
+160	val_160	2008-12-31	11
+162	val_162	2008-12-31	11
+163	val_163	2008-12-31	11
+164	val_164	2008-12-31	11
+164	val_164	2008-12-31	11
+165	val_165	2008-12-31	11
+165	val_165	2008-12-31	11
+166	val_166	2008-12-31	11
+167	val_167	2008-12-31	11
+167	val_167	2008-12-31	11
+167	val_167	2008-12-31	11
+168	val_168	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+170	val_170	2008-12-31	11
+172	val_172	2008-12-31	11
+172	val_172	2008-12-31	11
+174	val_174	2008-12-31	11
+174	val_174	2008-12-31	11
+175	val_175	2008-12-31	11
+175	val_175	2008-12-31	11
+176	val_176	2008-12-31	11
+176	val_176	2008-12-31	11
+177	val_177	2008-12-31	11
+178	val_178	2008-12-31	11
+179	val_179	2008-12-31	11
+179	val_179	2008-12-31	11
+180	val_180	2008-12-31	11
+181	val_181	2008-12-31	11
+183	val_183	2008-12-31	11
+186	val_186	2008-12-31	11
+187	val_187	2008-12-31	11
+187	val_187	2008-12-31	11
+187	val_187	2008-12-31	11
+189	val_189	2008-12-31	11
+190	val_190	2008-12-31	11
+191	val_191	2008-12-31	11
+191	val_191	2008-12-31	11
+192	val_192	2008-12-31	11
+193	val_193	2008-12-31	11
+193	val_193	2008-12-31	11
+193	val_193	2008-12-31	11
+194	val_194	2008-12-31	11
+195	val_195	2008-12-31	11
+195	val_195	2008-12-31	11
+196	val_196	2008-12-31	11
+197	val_197	2008-12-31	11
+197	val_197	2008-12-31	11
+199	val_199	2008-12-31	11
+199	val_199	2008-12-31	11
+199	val_199	2008-12-31	11
+200	val_200	2008-12-31	11
+200	val_200	2008-12-31	11
+201	val_201	2008-12-31	11
+202	val_202	2008-12-31	11
+203	val_203	2008-12-31	11
+203	val_203	2008-12-31	11
+205	val_205	2008-12-31	11
+205	val_205	2008-12-31	11
+207	val_207	2008-12-31	11
+207	val_207	2008-12-31	11
+208	val_208	2008-12-31	11
+208	val_208	2008-12-31	11
+208	val_208	2008-12-31	11
+209	val_209	2008-12-31	11
+209	val_209	2008-12-31	11
+213	val_213	2008-12-31	11
+213	val_213	2008-12-31	11
+214	val_214	2008-12-31	11
+216	val_216	2008-12-31	11
+216	val_216	2008-12-31	11
+217	val_217	2008-12-31	11
+217	val_217	2008-12-31	11
+218	val_218	2008-12-31	11
+219	val_219	2008-12-31	11
+219	val_219	2008-12-31	11
+221	val_221	2008-12-31	11
+221	val_221	2008-12-31	11
+222	val_222	2008-12-31	11
+223	val_223	2008-12-31	11
+223	val_223	2008-12-31	11
+224	val_224	2008-12-31	11
+224	val_224	2008-12-31	11
+226	val_226	2008-12-31	11
+228	val_228	2008-12-31	11
+229	val_229	2008-12-31	11
+229	val_229	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+233	val_233	2008-12-31	11
+233	val_233	2008-12-31	11
+235	val_235	2008-12-31	11
+237	val_237	2008-12-31	11
+237	val_237	2008-12-31	11
+238	val_238	2008-12-31	11
+238	val_238	2008-12-31	11
+239	val_239	2008-12-31	11
+239	val_239	2008-12-31	11
+241	val_241	2008-12-31	11
+242	val_242	2008-12-31	11
+242	val_242	2008-12-31	11
+244	val_244	2008-12-31	11
+247	val_247	2008-12-31	11
+248	val_248	2008-12-31	11
+249	val_249	2008-12-31	11
+252	val_252	2008-12-31	11
+255	val_255	2008-12-31	11
+255	val_255	2008-12-31	11
+256	val_256	2008-12-31	11
+256	val_256	2008-12-31	11
+257	val_257	2008-12-31	11
+258	val_258	2008-12-31	11
+260	val_260	2008-12-31	11
+262	val_262	2008-12-31	11
+263	val_263	2008-12-31	11
+265	val_265	2008-12-31	11
+265	val_265	2008-12-31	11
+266	val_266	2008-12-31	11
+272	val_272	2008-12-31	11
+272	val_272	2008-12-31	11
+273	val_273	2008-12-31	11
+273	val_273	2008-12-31	11
+273	val_273	2008-12-31	11
+274	val_274	2008-12-31	11
+275	val_275	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+278	val_278	2008-12-31	11
+278	val_278	2008-12-31	11
+280	val_280	2008-12-31	11
+280	val_280	2008-12-31	11
+281	val_281	2008-12-31	11
+281	val_281	2008-12-31	11
+282	val_282	2008-12-31	11
+282	val_282	2008-12-31	11
+283	val_283	2008-12-31	11
+284	val_284	2008-12-31	11
+285	val_285	2008-12-31	11
+286	val_286	2008-12-31	11
+287	val_287	2008-12-31	11
+288	val_288	2008-12-31	11
+288	val_288	2008-12-31	11
+289	val_289	2008-12-31	11
+291	val_291	2008-12-31	11
+292	val_292	2008-12-31	11
+296	val_296	2008-12-31	11
+298	val_298	2008-12-31	11
+298	val_298	2008-12-31	11
+298	val_298	2008-12-31	11
+302	val_302	2008-12-31	11
+305	val_305	2008-12-31	11
+306	val_306	2008-12-31	11
+307	val_307	2008-12-31	11
+307	val_307	2008-12-31	11
+308	val_308	2008-12-31	11
+309	val_309	2008-12-31	11
+309	val_309	2008-12-31	11
+310	val_310	2008-12-31	11
+311	val_311	2008-12-31	11
+311	val_311	2008-12-31	11
+311	val_311	2008-12-31	11
+315	val_315	2008-12-31	11
+316	val_316	2008-12-31	11
+316	val_316	2008-12-31	11
+316	val_316	2008-12-31	11
+317	val_317	2008-12-31	11
+317	val_317	2008-12-31	11
+318	val_318	2008-12-31	11
+318	val_318	2008-12-31	11
+318	val_318	2008-12-31	11
+321	val_321	2008-12-31	11
+321	val_321	2008-12-31	11
+322	val_322	2008-12-31	11
+322	val_322	2008-12-31	11
+323	val_323	2008-12-31	11
+325	val_325	2008-12-31	11
+325	val_325	2008-12-31	11
+327	val_327	2008-12-31	11
+327	val_327	2008-12-31	11
+327	val_327	2008-12-31	11
+331	val_331	2008-12-31	11
+331	val_331	2008-12-31	11
+332	val_332	2008-12-31	11
+333	val_333	2008-12-31	11
+333	val_333	2008-12-31	11
+335	val_335	2008-12-31	11
+336	val_336	2008-12-31	11
+338	val_338	2008-12-31	11
+339	val_339	2008-12-31	11
+341	val_341	2008-12-31	11
+342	val_342	2008-12-31	11
+342	val_342	2008-12-31	11
+344	val_344	2008-12-31	11
+344	val_344	2008-12-31	11
+345	val_345	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+351	val_351	2008-12-31	11
+353	val_353	2008-12-31	11
+353	val_353	2008-12-31	11
+356	val_356	2008-12-31	11
+360	val_360	2008-12-31	11
+362	val_362	2008-12-31	11
+364	val_364	2008-12-31	11
+365	val_365	2008-12-31	11
+366	val_366	2008-12-31	11
+367	val_367	2008-12-31	11
+367	val_367	2008-12-31	11
+368	val_368	2008-12-31	11
+369	val_369	2008-12-31	11
+369	val_369	2008-12-31	11
+369	val_369	2008-12-31	11
+373	val_373	2008-12-31	11
+374	val_374	2008-12-31	11
+375	val_375	2008-12-31	11
+377	val_377	2008-12-31	11
+378	val_378	2008-12-31	11
+379	val_379	2008-12-31	11
+382	val_382	2008-12-31	11
+382	val_382	2008-12-31	11
+384	val_384	2008-12-31	11
+384	val_384	2008-12-31	11
+384	val_384	2008-12-31	11
+386	val_386	2008-12-31	11
+389	val_389	2008-12-31	11
+392	val_392	2008-12-31	11
+393	val_393	2008-12-31	11
+394	val_394	2008-12-31	11
+395	val_395	2008-12-31	11
+395	val_395	2008-12-31	11
+396	val_396	2008-12-31	11
+396	val_396	2008-12-31	11
+396	val_396	2008-12-31	11
+397	val_397	2008-12-31	11
+397	val_397	2008-12-31	11
+399	val_399	2008-12-31	11
+399	val_399	2008-12-31	11
+400	val_400	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+402	val_402	2008-12-31	11
+403	val_403	2008-12-31	11
+403	val_403	2008-12-31	11
+403	val_403	2008-12-31	11
+404	val_404	2008-12-31	11
+404	val_404	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+407	val_407	2008-12-31	11
+409	val_409	2008-12-31	11
+409	val_409	2008-12-31	11
+409	val_409	2008-12-31	11
+411	val_411	2008-12-31	11
+413	val_413	2008-12-31	11
+413	val_413	2008-12-31	11
+414	val_414	2008-12-31	11
+414	val_414	2008-12-31	11
+417	val_417	2008-12-31	11
+417	val_417	2008-12-31	11
+417	val_417	2008-12-31	11
+418	val_418	2008-12-31	11
+419	val_419	2008-12-31	11
+421	val_421	2008-12-31	11
+424	val_424	2008-12-31	11
+424	val_424	2008-12-31	11
+427	val_427	2008-12-31	11
+429	val_429	2008-12-31	11
+429	val_429	2008-12-31	11
+430	val_430	2008-12-31	11
+430	val_430	2008-12-31	11
+430	val_430	2008-12-31	11
+431	val_431	2008-12-31	11
+431	val_431	2008-12-31	11
+431	val_431	2008-12-31	11
+432	val_432	2008-12-31	11
+435	val_435	2008-12-31	11
+436	val_436	2008-12-31	11
+437	val_437	2008-12-31	11
+438	val_438	2008-12-31	11
+438	val_438	2008-12-31	11
+438	val_438	2008-12-31	11
+439	val_439	2008-12-31	11
+439	val_439	2008-12-31	11
+443	val_443	2008-12-31	11
+444	val_444	2008-12-31	11
+446	val_446	2008-12-31	11
+448	val_448	2008-12-31	11
+449	val_449	2008-12-31	11
+452	val_452	2008-12-31	11
+453	val_453	2008-12-31	11
+454	val_454	2008-12-31	11
+454	val_454	2008-12-31	11
+454	val_454	2008-12-31	11
+455	val_455	2008-12-31	11
+457	val_457	2008-12-31	11
+458	val_458	2008-12-31	11
+458	val_458	2008-12-31	11
+459	val_459	2008-12-31	11
+459	val_459	2008-12-31	11
+460	val_460	2008-12-31	11
+462	val_462	2008-12-31	11
+462	val_462	2008-12-31	11
+463	val_463	2008-12-31	11
+463	val_463	2008-12-31	11
+466	val_466	2008-12-31	11
+466	val_466	2008-12-31	11
+466	val_466	2008-12-31	11
+467	val_467	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+470	val_470	2008-12-31	11
+472	val_472	2008-12-31	11
+475	val_475	2008-12-31	11
+477	val_477	2008-12-31	11
+478	val_478	2008-12-31	11
+478	val_478	2008-12-31	11
+479	val_479	2008-12-31	11
+480	val_480	2008-12-31	11
+480	val_480	2008-12-31	11
+480	val_480	2008-12-31	11
+481	val_481	2008-12-31	11
+482	val_482	2008-12-31	11
+483	val_483	2008-12-31	11
+484	val_484	2008-12-31	11
+485	val_485	2008-12-31	11
+487	val_487	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+490	val_490	2008-12-31	11
+491	val_491	2008-12-31	11
+492	val_492	2008-12-31	11
+492	val_492	2008-12-31	11
+493	val_493	2008-12-31	11
+494	val_494	2008-12-31	11
+495	val_495	2008-12-31	11
+496	val_496	2008-12-31	11
+497	val_497	2008-12-31	11
+498	val_498	2008-12-31	11
+498	val_498	2008-12-31	11
+498	val_498	2008-12-31	11
+0	val_0	2008-12-31	12
+0	val_0	2008-12-31	12
+0	val_0	2008-12-31	12
+2	val_2	2008-12-31	12
+4	val_4	2008-12-31	12
+5	val_5	2008-12-31	12
+5	val_5	2008-12-31	12
+5	val_5	2008-12-31	12
+8	val_8	2008-12-31	12
+9	val_9	2008-12-31	12
+10	val_10	2008-12-31	12
+11	val_11	2008-12-31	12
+12	val_12	2008-12-31	12
+12	val_12	2008-12-31	12
+15	val_15	2008-12-31	12
+15	val_15	2008-12-31	12
+17	val_17	2008-12-31	12
+18	val_18	2008-12-31	12
+18	val_18	2008-12-31	12
+19	val_19	2008-12-31	12
+20	val_20	2008-12-31	12
+24	val_24	2008-12-31	12
+24	val_24	2008-12-31	12
+26	val_26	2008-12-31	12
+26	val_26	2008-12-31	12
+27	val_27	2008-12-31	12
+28	val_28	2008-12-31	12
+30	val_30	2008-12-31	12
+33	val_33	2008-12-31	12
+34	val_34	2008-12-31	12
+35	val_35	2008-12-31	12
+35	val_35	2008-12-31	12
+35	val_35	2008-12-31	12
+37	val_37	2008-12-31	12
+37	val_37	2008-12-31	12
+41	val_41	2008-12-31	12
+42	val_42	2008-12-31	12
+42	val_42	2008-12-31	12
+43	val_43	2008-12-31	12
+44	val_44	2008-12-31	12
+47	val_47	2008-12-31	12
+51	val_51	2008-12-31	12
+51	val_51	2008-12-31	12
+53	val_53	2008-12-31	12
+54	val_54	2008-12-31	12
+57	val_57	2008-12-31	12
+58	val_58	2008-12-31	12
+58	val_58	2008-12-31	12
+64	val_64	2008-12-31	12
+65	val_65	2008-12-31	12
+66	val_66	2008-12-31	12
+67	val_67	2008-12-31	12
+67	val_67	2008-12-31	12
+69	val_69	2008-12-31	12
+70	val_70	2008-12-31	12
+70	val_70	2008-12-31	12
+70	val_70	2008-12-31	12
+72	val_72	2008-12-31	12
+72	val_72	2008-12-31	12
+74	val_74	2008-12-31	12
+76	val_76	2008-12-31	12
+76	val_76	2008-12-31	12
+77	val_77	2008-12-31	12
+78	val_78	2008-12-31	12
+80	val_80	2008-12-31	12
+82	val_82	2008-12-31	12
+83	val_83	2008-12-31	12
+83	val_83	2008-12-31	12
+84	val_84	2008-12-31	12
+84	val_84	2008-12-31	12
+85	val_85	2008-12-31	12
+86	val_86	2008-12-31	12
+87	val_87	2008-12-31	12
+90	val_90	2008-12-31	12
+90	val_90	2008-12-31	12
+90	val_90	2008-12-31	12
+92	val_92	2008-12-31	12
+95	val_95	2008-12-31	12
+95	val_95	2008-12-31	12
+96	val_96	2008-12-31	12
+97	val_97	2008-12-31	12
+97	val_97	2008-12-31	12
+98	val_98	2008-12-31	12
+98	val_98	2008-12-31	12
+100	val_100	2008-12-31	12
+100	val_100	2008-12-31	12
+103	val_103	2008-12-31	12
+103	val_103	2008-12-31	12
+104	val_104	2008-12-31	12
+104	val_104	2008-12-31	12
+105	val_105	2008-12-31	12
+111	val_111	2008-12-31	12
+113	val_113	2008-12-31	12
+113	val_113	2008-12-31	12
+114	val_114	2008-12-31	12
+116	val_116	2008-12-31	12
+118	val_118	2008-12-31	12
+118	val_118	2008-12-31	12
+119	val_119	2008-12-31	12
+119	val_119	2008-12-31	12
+119	val_119	2008-12-31	12
+120	val_120	2008-12-31	12
+120	val_120	2008-12-31	12
+125	val_125	2008-12-31	12
+125	val_125	2008-12-31	12
+126	val_126	2008-12-31	12
+128	val_128	2008-12-31	12
+128	val_128	2008-12-31	12
+128	val_128	2008-12-31	12
+129	val_129	2008-12-31	12
+129	val_129	2008-12-31	12
+131	val_131	2008-12-31	12
+133	val_133	2008-12-31	12
+134	val_134	2008-12-31	12
+134	val_134	2008-12-31	12
+136	val_136	2008-12-31	12
+137	val_137	2008-12-31	12
+137	val_137	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+143	val_143	2008-12-31	12
+145	val_145	2008-12-31	12
+146	val_146	2008-12-31	12
+146	val_146	2008-12-31	12
+149	val_149	2008-12-31	12
+149	val_149	2008-12-31	12
+150	val_150	2008-12-31	12
+152	val_152	2008-12-31	12
+152	val_152	2008-12-31	12
+153	val_153	2008-12-31	12
+155	val_155	2008-12-31	12
+156	val_156	2008-12-31	12
+157	val_157	2008-12-31	12
+158	val_158	2008-12-31	12
+160	val_160	2008-12-31	12
+162	val_162	2008-12-31	12
+163	val_163	2008-12-31	12
+164	val_164	2008-12-31	12
+164	val_164	2008-12-31	12
+165	val_165	2008-12-31	12
+165	val_165	2008-12-31	12
+166	val_166	2008-12-31	12
+167	val_167	2008-12-31	12
+167	val_167	2008-12-31	12
+167	val_167	2008-12-31	12
+168	val_168	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+170	val_170	2008-12-31	12
+172	val_172	2008-12-31	12
+172	val_172	2008-12-31	12
+174	val_174	2008-12-31	12
+174	val_174	2008-12-31	12
+175	val_175	2008-12-31	12
+175	val_175	2008-12-31	12
+176	val_176	2008-12-31	12
+176	val_176	2008-12-31	12
+177	val_177	2008-12-31	12
+178	val_178	2008-12-31	12
+179	val_179	2008-12-31	12
+179	val_179	2008-12-31	12
+180	val_180	2008-12-31	12
+181	val_181	2008-12-31	12
+183	val_183	2008-12-31	12
+186	val_186	2008-12-31	12
+187	val_187	2008-12-31	12
+187	val_187	2008-12-31	12
+187	val_187	2008-12-31	12
+189	val_189	2008-12-31	12
+190	val_190	2008-12-31	12
+191	val_191	2008-12-31	12
+191	val_191	2008-12-31	12
+192	val_192	2008-12-31	12
+193	val_193	2008-12-31	12
+193	val_193	2008-12-31	12
+193	val_193	2008-12-31	12
+194	val_194	2008-12-31	12
+195	val_195	2008-12-31	12
+195	val_195	2008-12-31	12
+196	val_196	2008-12-31	12
+197	val_197	2008-12-31	12
+197	val_197	2008-12-31	12
+199	val_199	2008-12-31	12
+199	val_199	2008-12-31	12
+199	val_199	2008-12-31	12
+200	val_200	2008-12-31	12
+200	val_200	2008-12-31	12
+201	val_201	2008-12-31	12
+202	val_202	2008-12-31	12
+203	val_203	2008-12-31	12
+203	val_203	2008-12-31	12
+205	val_205	2008-12-31	12
+205	val_205	2008-12-31	12
+207	val_207	2008-12-31	12
+207	val_207	2008-12-31	12
+208	val_208	2008-12-31	12
+208	val_208	2008-12-31	12
+208	val_208	2008-12-31	12
+209	val_209	2008-12-31	12
+209	val_209	2008-12-31	12
+213	val_213	2008-12-31	12
+213	val_213	2008-12-31	12
+214	val_214	2008-12-31	12
+216	val_216	2008-12-31	12
+216	val_216	2008-12-31	12
+217	val_217	2008-12-31	12
+217	val_217	2008-12-31	12
+218	val_218	2008-12-31	12
+219	val_219	2008-12-31	12
+219	val_219	2008-12-31	12
+221	val_221	2008-12-31	12
+221	val_221	2008-12-31	12
+222	val_222	2008-12-31	12
+223	val_223	2008-12-31	12
+223	val_223	2008-12-31	12
+224	val_224	2008-12-31	12
+224	val_224	2008-12-31	12
+226	val_226	2008-12-31	12
+228	val_228	2008-12-31	12
+229	val_229	2008-12-31	12
+229	val_229	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+233	val_233	2008-12-31	12
+233	val_233	2008-12-31	12
+235	val_235	2008-12-31	12
+237	val_237	2008-12-31	12
+237	val_237	2008-12-31	12
+238	val_238	2008-12-31	12
+238	val_238	2008-12-31	12
+239	val_239	2008-12-31	12
+239	val_239	2008-12-31	12
+241	val_241	2008-12-31	12
+242	val_242	2008-12-31	12
+242	val_242	2008-12-31	12
+244	val_244	2008-12-31	12
+247	val_247	2008-12-31	12
+248	val_248	2008-12-31	12
+249	val_249	2008-12-31	12
+252	val_252	2008-12-31	12
+255	val_255	2008-12-31	12
+255	val_255	2008-12-31	12
+256	val_256	2008-12-31	12
+256	val_256	2008-12-31	12
+257	val_257	2008-12-31	12
+258	val_258	2008-12-31	12
+260	val_260	2008-12-31	12
+262	val_262	2008-12-31	12
+263	val_263	2008-12-31	12
+265	val_265	2008-12-31	12
+265	val_265	2008-12-31	12
+266	val_266	2008-12-31	12
+272	val_272	2008-12-31	12
+272	val_272	2008-12-31	12
+273	val_273	2008-12-31	12
+273	val_273	2008-12-31	12
+273	val_273	2008-12-31	12
+274	val_274	2008-12-31	12
+275	val_275	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+278	val_278	2008-12-31	12
+278	val_278	2008-12-31	12
+280	val_280	2008-12-31	12
+280	val_280	2008-12-31	12
+281	val_281	2008-12-31	12
+281	val_281	2008-12-31	12
+282	val_282	2008-12-31	12
+282	val_282	2008-12-31	12
+283	val_283	2008-12-31	12
+284	val_284	2008-12-31	12
+285	val_285	2008-12-31	12
+286	val_286	2008-12-31	12
+287	val_287	2008-12-31	12
+288	val_288	2008-12-31	12
+288	val_288	2008-12-31	12
+289	val_289	2008-12-31	12
+291	val_291	2008-12-31	12
+292	val_292	2008-12-31	12
+296	val_296	2008-12-31	12
+298	val_298	2008-12-31	12
+298	val_298	2008-12-31	12
+298	val_298	2008-12-31	12
+302	val_302	2008-12-31	12
+305	val_305	2008-12-31	12
+306	val_306	2008-12-31	12
+307	val_307	2008-12-31	12
+307	val_307	2008-12-31	12
+308	val_308	2008-12-31	12
+309	val_309	2008-12-31	12
+309	val_309	2008-12-31	12
+310	val_310	2008-12-31	12
+311	val_311	2008-12-31	12
+311	val_311	2008-12-31	12
+311	val_311	2008-12-31	12
+315	val_315	2008-12-31	12
+316	val_316	2008-12-31	12
+316	val_316	2008-12-31	12
+316	val_316	2008-12-31	12
+317	val_317	2008-12-31	12
+317	val_317	2008-12-31	12
+318	val_318	2008-12-31	12
+318	val_318	2008-12-31	12
+318	val_318	2008-12-31	12
+321	val_321	2008-12-31	12
+321	val_321	2008-12-31	12
+322	val_322	2008-12-31	12
+322	val_322	2008-12-31	12
+323	val_323	2008-12-31	12
+325	val_325	2008-12-31	12
+325	val_325	2008-12-31	12
+327	val_327	2008-12-31	12
+327	val_327	2008-12-31	12
+327	val_327	2008-12-31	12
+331	val_331	2008-12-31	12
+331	val_331	2008-12-31	12
+332	val_332	2008-12-31	12
+333	val_333	2008-12-31	12
+333	val_333	2008-12-31	12
+335	val_335	2008-12-31	12
+336	val_336	2008-12-31	12
+338	val_338	2008-12-31	12
+339	val_339	2008-12-31	12
+341	val_341	2008-12-31	12
+342	val_342	2008-12-31	12
+342	val_342	2008-12-31	12
+344	val_344	2008-12-31	12
+344	val_344	2008-12-31	12
+345	val_345	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+351	val_351	2008-12-31	12
+353	val_353	2008-12-31	12
+353	val_353	2008-12-31	12
+356	val_356	2008-12-31	12
+360	val_360	2008-12-31	12
+362	val_362	2008-12-31	12
+364	val_364	2008-12-31	12
+365	val_365	2008-12-31	12
+366	val_366	2008-12-31	12
+367	val_367	2008-12-31	12
+367	val_367	2008-12-31	12
+368	val_368	2008-12-31	12
+369	val_369	2008-12-31	12
+369	val_369	2008-12-31	12
+369	val_369	2008-12-31	12
+373	val_373	2008-12-31	12
+374	val_374	2008-12-31	12
+375	val_375	2008-12-31	12
+377	val_377	2008-12-31	12
+378	val_378	2008-12-31	12
+379	val_379	2008-12-31	12
+382	val_382	2008-12-31	12
+382	val_382	2008-12-31	12
+384	val_384	2008-12-31	12
+384	val_384	2008-12-31	12
+384	val_384	2008-12-31	12
+386	val_386	2008-12-31	12
+389	val_389	2008-12-31	12
+392	val_392	2008-12-31	12
+393	val_393	2008-12-31	12
+394	val_394	2008-12-31	12
+395	val_395	2008-12-31	12
+395	val_395	2008-12-31	12
+396	val_396	2008-12-31	12
+396	val_396	2008-12-31	12
+396	val_396	2008-12-31	12
+397	val_397	2008-12-31	12
+397	val_397	2008-12-31	12
+399	val_399	2008-12-31	12
+399	val_399	2008-12-31	12
+400	val_400	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+402	val_402	2008-12-31	12
+403	val_403	2008-12-31	12
+403	val_403	2008-12-31	12
+403	val_403	2008-12-31	12
+404	val_404	2008-12-31	12
+404	val_404	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+407	val_407	2008-12-31	12
+409	val_409	2008-12-31	12
+409	val_409	2008-12-31	12
+409	val_409	2008-12-31	12
+411	val_411	2008-12-31	12
+413	val_413	2008-12-31	12
+413	val_413	2008-12-31	12
+414	val_414	2008-12-31	12
+414	val_414	2008-12-31	12
+417	val_417	2008-12-31	12
+417	val_417	2008-12-31	12
+417	val_417	2008-12-31	12
+418	val_418	2008-12-31	12
+419	val_419	2008-12-31	12
+421	val_421	2008-12-31	12
+424	val_424	2008-12-31	12
+424	val_424	2008-12-31	12
+427	val_427	2008-12-31	12
+429	val_429	2008-12-31	12
+429	val_429	2008-12-31	12
+430	val_430	2008-12-31	12
+430	val_430	2008-12-31	12
+430	val_430	2008-12-31	12
+431	val_431	2008-12-31	12
+431	val_431	2008-12-31	12
+431	val_431	2008-12-31	12
+432	val_432	2008-12-31	12
+435	val_435	2008-12-31	12
+436	val_436	2008-12-31	12
+437	val_437	2008-12-31	12
+438	val_438	2008-12-31	12
+438	val_438	2008-12-31	12
+438	val_438	2008-12-31	12
+439	val_439	2008-12-31	12
+439	val_439	2008-12-31	12
+443	val_443	2008-12-31	12
+444	val_444	2008-12-31	12
+446	val_446	2008-12-31	12
+448	val_448	2008-12-31	12
+449	val_449	2008-12-31	12
+452	val_452	2008-12-31	12
+453	val_453	2008-12-31	12
+454	val_454	2008-12-31	12
+454	val_454	2008-12-31	12
+454	val_454	2008-12-31	12
+455	val_455	2008-12-31	12
+457	val_457	2008-12-31	12
+458	val_458	2008-12-31	12
+458	val_458	2008-12-31	12
+459	val_459	2008-12-31	12
+459	val_459	2008-12-31	12
+460	val_460	2008-12-31	12
+462	val_462	2008-12-31	12
+462	val_462	2008-12-31	12
+463	val_463	2008-12-31	12
+463	val_463	2008-12-31	12
+466	val_466	2008-12-31	12
+466	val_466	2008-12-31	12
+466	val_466	2008-12-31	12
+467	val_467	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+470	val_470	2008-12-31	12
+472	val_472	2008-12-31	12
+475	val_475	2008-12-31	12
+477	val_477	2008-12-31	12
+478	val_478	2008-12-31	12
+478	val_478	2008-12-31	12
+479	val_479	2008-12-31	12
+480	val_480	2008-12-31	12
+480	val_480	2008-12-31	12
+480	val_480	2008-12-31	12
+481	val_481	2008-12-31	12
+482	val_482	2008-12-31	12
+483	val_483	2008-12-31	12
+484	val_484	2008-12-31	12
+485	val_485	2008-12-31	12
+487	val_487	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+490	val_490	2008-12-31	12
+491	val_491	2008-12-31	12
+492	val_492	2008-12-31	12
+492	val_492	2008-12-31	12
+493	val_493	2008-12-31	12
+494	val_494	2008-12-31	12
+495	val_495	2008-12-31	12
+496	val_496	2008-12-31	12
+497	val_497	2008-12-31	12
+498	val_498	2008-12-31	12
+498	val_498	2008-12-31	12
+498	val_498	2008-12-31	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part10-9-eb29a5b2392bf821b2ff51b70863d531 b/sql/hive/src/test/resources/golden/load_dyn_part10-9-eb29a5b2392bf821b2ff51b70863d531
deleted file mode 100644
index 3994499f92576..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part10-9-eb29a5b2392bf821b2ff51b70863d531
+++ /dev/null
@@ -1,1000 +0,0 @@
-238	val_238	2008-12-31	11
-86	val_86	2008-12-31	11
-311	val_311	2008-12-31	11
-27	val_27	2008-12-31	11
-165	val_165	2008-12-31	11
-409	val_409	2008-12-31	11
-255	val_255	2008-12-31	11
-278	val_278	2008-12-31	11
-98	val_98	2008-12-31	11
-484	val_484	2008-12-31	11
-265	val_265	2008-12-31	11
-193	val_193	2008-12-31	11
-401	val_401	2008-12-31	11
-150	val_150	2008-12-31	11
-273	val_273	2008-12-31	11
-224	val_224	2008-12-31	11
-369	val_369	2008-12-31	11
-66	val_66	2008-12-31	11
-128	val_128	2008-12-31	11
-213	val_213	2008-12-31	11
-146	val_146	2008-12-31	11
-406	val_406	2008-12-31	11
-429	val_429	2008-12-31	11
-374	val_374	2008-12-31	11
-152	val_152	2008-12-31	11
-469	val_469	2008-12-31	11
-145	val_145	2008-12-31	11
-495	val_495	2008-12-31	11
-37	val_37	2008-12-31	11
-327	val_327	2008-12-31	11
-281	val_281	2008-12-31	11
-277	val_277	2008-12-31	11
-209	val_209	2008-12-31	11
-15	val_15	2008-12-31	11
-82	val_82	2008-12-31	11
-403	val_403	2008-12-31	11
-166	val_166	2008-12-31	11
-417	val_417	2008-12-31	11
-430	val_430	2008-12-31	11
-252	val_252	2008-12-31	11
-292	val_292	2008-12-31	11
-219	val_219	2008-12-31	11
-287	val_287	2008-12-31	11
-153	val_153	2008-12-31	11
-193	val_193	2008-12-31	11
-338	val_338	2008-12-31	11
-446	val_446	2008-12-31	11
-459	val_459	2008-12-31	11
-394	val_394	2008-12-31	11
-237	val_237	2008-12-31	11
-482	val_482	2008-12-31	11
-174	val_174	2008-12-31	11
-413	val_413	2008-12-31	11
-494	val_494	2008-12-31	11
-207	val_207	2008-12-31	11
-199	val_199	2008-12-31	11
-466	val_466	2008-12-31	11
-208	val_208	2008-12-31	11
-174	val_174	2008-12-31	11
-399	val_399	2008-12-31	11
-396	val_396	2008-12-31	11
-247	val_247	2008-12-31	11
-417	val_417	2008-12-31	11
-489	val_489	2008-12-31	11
-162	val_162	2008-12-31	11
-377	val_377	2008-12-31	11
-397	val_397	2008-12-31	11
-309	val_309	2008-12-31	11
-365	val_365	2008-12-31	11
-266	val_266	2008-12-31	11
-439	val_439	2008-12-31	11
-342	val_342	2008-12-31	11
-367	val_367	2008-12-31	11
-325	val_325	2008-12-31	11
-167	val_167	2008-12-31	11
-195	val_195	2008-12-31	11
-475	val_475	2008-12-31	11
-17	val_17	2008-12-31	11
-113	val_113	2008-12-31	11
-155	val_155	2008-12-31	11
-203	val_203	2008-12-31	11
-339	val_339	2008-12-31	11
-0	val_0	2008-12-31	11
-455	val_455	2008-12-31	11
-128	val_128	2008-12-31	11
-311	val_311	2008-12-31	11
-316	val_316	2008-12-31	11
-57	val_57	2008-12-31	11
-302	val_302	2008-12-31	11
-205	val_205	2008-12-31	11
-149	val_149	2008-12-31	11
-438	val_438	2008-12-31	11
-345	val_345	2008-12-31	11
-129	val_129	2008-12-31	11
-170	val_170	2008-12-31	11
-20	val_20	2008-12-31	11
-489	val_489	2008-12-31	11
-157	val_157	2008-12-31	11
-378	val_378	2008-12-31	11
-221	val_221	2008-12-31	11
-92	val_92	2008-12-31	11
-111	val_111	2008-12-31	11
-47	val_47	2008-12-31	11
-72	val_72	2008-12-31	11
-4	val_4	2008-12-31	11
-280	val_280	2008-12-31	11
-35	val_35	2008-12-31	11
-427	val_427	2008-12-31	11
-277	val_277	2008-12-31	11
-208	val_208	2008-12-31	11
-356	val_356	2008-12-31	11
-399	val_399	2008-12-31	11
-169	val_169	2008-12-31	11
-382	val_382	2008-12-31	11
-498	val_498	2008-12-31	11
-125	val_125	2008-12-31	11
-386	val_386	2008-12-31	11
-437	val_437	2008-12-31	11
-469	val_469	2008-12-31	11
-192	val_192	2008-12-31	11
-286	val_286	2008-12-31	11
-187	val_187	2008-12-31	11
-176	val_176	2008-12-31	11
-54	val_54	2008-12-31	11
-459	val_459	2008-12-31	11
-51	val_51	2008-12-31	11
-138	val_138	2008-12-31	11
-103	val_103	2008-12-31	11
-239	val_239	2008-12-31	11
-213	val_213	2008-12-31	11
-216	val_216	2008-12-31	11
-430	val_430	2008-12-31	11
-278	val_278	2008-12-31	11
-176	val_176	2008-12-31	11
-289	val_289	2008-12-31	11
-221	val_221	2008-12-31	11
-65	val_65	2008-12-31	11
-318	val_318	2008-12-31	11
-332	val_332	2008-12-31	11
-311	val_311	2008-12-31	11
-275	val_275	2008-12-31	11
-137	val_137	2008-12-31	11
-241	val_241	2008-12-31	11
-83	val_83	2008-12-31	11
-333	val_333	2008-12-31	11
-180	val_180	2008-12-31	11
-284	val_284	2008-12-31	11
-12	val_12	2008-12-31	11
-230	val_230	2008-12-31	11
-181	val_181	2008-12-31	11
-67	val_67	2008-12-31	11
-260	val_260	2008-12-31	11
-404	val_404	2008-12-31	11
-384	val_384	2008-12-31	11
-489	val_489	2008-12-31	11
-353	val_353	2008-12-31	11
-373	val_373	2008-12-31	11
-272	val_272	2008-12-31	11
-138	val_138	2008-12-31	11
-217	val_217	2008-12-31	11
-84	val_84	2008-12-31	11
-348	val_348	2008-12-31	11
-466	val_466	2008-12-31	11
-58	val_58	2008-12-31	11
-8	val_8	2008-12-31	11
-411	val_411	2008-12-31	11
-230	val_230	2008-12-31	11
-208	val_208	2008-12-31	11
-348	val_348	2008-12-31	11
-24	val_24	2008-12-31	11
-463	val_463	2008-12-31	11
-431	val_431	2008-12-31	11
-179	val_179	2008-12-31	11
-172	val_172	2008-12-31	11
-42	val_42	2008-12-31	11
-129	val_129	2008-12-31	11
-158	val_158	2008-12-31	11
-119	val_119	2008-12-31	11
-496	val_496	2008-12-31	11
-0	val_0	2008-12-31	11
-322	val_322	2008-12-31	11
-197	val_197	2008-12-31	11
-468	val_468	2008-12-31	11
-393	val_393	2008-12-31	11
-454	val_454	2008-12-31	11
-100	val_100	2008-12-31	11
-298	val_298	2008-12-31	11
-199	val_199	2008-12-31	11
-191	val_191	2008-12-31	11
-418	val_418	2008-12-31	11
-96	val_96	2008-12-31	11
-26	val_26	2008-12-31	11
-165	val_165	2008-12-31	11
-327	val_327	2008-12-31	11
-230	val_230	2008-12-31	11
-205	val_205	2008-12-31	11
-120	val_120	2008-12-31	11
-131	val_131	2008-12-31	11
-51	val_51	2008-12-31	11
-404	val_404	2008-12-31	11
-43	val_43	2008-12-31	11
-436	val_436	2008-12-31	11
-156	val_156	2008-12-31	11
-469	val_469	2008-12-31	11
-468	val_468	2008-12-31	11
-308	val_308	2008-12-31	11
-95	val_95	2008-12-31	11
-196	val_196	2008-12-31	11
-288	val_288	2008-12-31	11
-481	val_481	2008-12-31	11
-457	val_457	2008-12-31	11
-98	val_98	2008-12-31	11
-282	val_282	2008-12-31	11
-197	val_197	2008-12-31	11
-187	val_187	2008-12-31	11
-318	val_318	2008-12-31	11
-318	val_318	2008-12-31	11
-409	val_409	2008-12-31	11
-470	val_470	2008-12-31	11
-137	val_137	2008-12-31	11
-369	val_369	2008-12-31	11
-316	val_316	2008-12-31	11
-169	val_169	2008-12-31	11
-413	val_413	2008-12-31	11
-85	val_85	2008-12-31	11
-77	val_77	2008-12-31	11
-0	val_0	2008-12-31	11
-490	val_490	2008-12-31	11
-87	val_87	2008-12-31	11
-364	val_364	2008-12-31	11
-179	val_179	2008-12-31	11
-118	val_118	2008-12-31	11
-134	val_134	2008-12-31	11
-395	val_395	2008-12-31	11
-282	val_282	2008-12-31	11
-138	val_138	2008-12-31	11
-238	val_238	2008-12-31	11
-419	val_419	2008-12-31	11
-15	val_15	2008-12-31	11
-118	val_118	2008-12-31	11
-72	val_72	2008-12-31	11
-90	val_90	2008-12-31	11
-307	val_307	2008-12-31	11
-19	val_19	2008-12-31	11
-435	val_435	2008-12-31	11
-10	val_10	2008-12-31	11
-277	val_277	2008-12-31	11
-273	val_273	2008-12-31	11
-306	val_306	2008-12-31	11
-224	val_224	2008-12-31	11
-309	val_309	2008-12-31	11
-389	val_389	2008-12-31	11
-327	val_327	2008-12-31	11
-242	val_242	2008-12-31	11
-369	val_369	2008-12-31	11
-392	val_392	2008-12-31	11
-272	val_272	2008-12-31	11
-331	val_331	2008-12-31	11
-401	val_401	2008-12-31	11
-242	val_242	2008-12-31	11
-452	val_452	2008-12-31	11
-177	val_177	2008-12-31	11
-226	val_226	2008-12-31	11
-5	val_5	2008-12-31	11
-497	val_497	2008-12-31	11
-402	val_402	2008-12-31	11
-396	val_396	2008-12-31	11
-317	val_317	2008-12-31	11
-395	val_395	2008-12-31	11
-58	val_58	2008-12-31	11
-35	val_35	2008-12-31	11
-336	val_336	2008-12-31	11
-95	val_95	2008-12-31	11
-11	val_11	2008-12-31	11
-168	val_168	2008-12-31	11
-34	val_34	2008-12-31	11
-229	val_229	2008-12-31	11
-233	val_233	2008-12-31	11
-143	val_143	2008-12-31	11
-472	val_472	2008-12-31	11
-322	val_322	2008-12-31	11
-498	val_498	2008-12-31	11
-160	val_160	2008-12-31	11
-195	val_195	2008-12-31	11
-42	val_42	2008-12-31	11
-321	val_321	2008-12-31	11
-430	val_430	2008-12-31	11
-119	val_119	2008-12-31	11
-489	val_489	2008-12-31	11
-458	val_458	2008-12-31	11
-78	val_78	2008-12-31	11
-76	val_76	2008-12-31	11
-41	val_41	2008-12-31	11
-223	val_223	2008-12-31	11
-492	val_492	2008-12-31	11
-149	val_149	2008-12-31	11
-449	val_449	2008-12-31	11
-218	val_218	2008-12-31	11
-228	val_228	2008-12-31	11
-138	val_138	2008-12-31	11
-453	val_453	2008-12-31	11
-30	val_30	2008-12-31	11
-209	val_209	2008-12-31	11
-64	val_64	2008-12-31	11
-468	val_468	2008-12-31	11
-76	val_76	2008-12-31	11
-74	val_74	2008-12-31	11
-342	val_342	2008-12-31	11
-69	val_69	2008-12-31	11
-230	val_230	2008-12-31	11
-33	val_33	2008-12-31	11
-368	val_368	2008-12-31	11
-103	val_103	2008-12-31	11
-296	val_296	2008-12-31	11
-113	val_113	2008-12-31	11
-216	val_216	2008-12-31	11
-367	val_367	2008-12-31	11
-344	val_344	2008-12-31	11
-167	val_167	2008-12-31	11
-274	val_274	2008-12-31	11
-219	val_219	2008-12-31	11
-239	val_239	2008-12-31	11
-485	val_485	2008-12-31	11
-116	val_116	2008-12-31	11
-223	val_223	2008-12-31	11
-256	val_256	2008-12-31	11
-263	val_263	2008-12-31	11
-70	val_70	2008-12-31	11
-487	val_487	2008-12-31	11
-480	val_480	2008-12-31	11
-401	val_401	2008-12-31	11
-288	val_288	2008-12-31	11
-191	val_191	2008-12-31	11
-5	val_5	2008-12-31	11
-244	val_244	2008-12-31	11
-438	val_438	2008-12-31	11
-128	val_128	2008-12-31	11
-467	val_467	2008-12-31	11
-432	val_432	2008-12-31	11
-202	val_202	2008-12-31	11
-316	val_316	2008-12-31	11
-229	val_229	2008-12-31	11
-469	val_469	2008-12-31	11
-463	val_463	2008-12-31	11
-280	val_280	2008-12-31	11
-2	val_2	2008-12-31	11
-35	val_35	2008-12-31	11
-283	val_283	2008-12-31	11
-331	val_331	2008-12-31	11
-235	val_235	2008-12-31	11
-80	val_80	2008-12-31	11
-44	val_44	2008-12-31	11
-193	val_193	2008-12-31	11
-321	val_321	2008-12-31	11
-335	val_335	2008-12-31	11
-104	val_104	2008-12-31	11
-466	val_466	2008-12-31	11
-366	val_366	2008-12-31	11
-175	val_175	2008-12-31	11
-403	val_403	2008-12-31	11
-483	val_483	2008-12-31	11
-53	val_53	2008-12-31	11
-105	val_105	2008-12-31	11
-257	val_257	2008-12-31	11
-406	val_406	2008-12-31	11
-409	val_409	2008-12-31	11
-190	val_190	2008-12-31	11
-406	val_406	2008-12-31	11
-401	val_401	2008-12-31	11
-114	val_114	2008-12-31	11
-258	val_258	2008-12-31	11
-90	val_90	2008-12-31	11
-203	val_203	2008-12-31	11
-262	val_262	2008-12-31	11
-348	val_348	2008-12-31	11
-424	val_424	2008-12-31	11
-12	val_12	2008-12-31	11
-396	val_396	2008-12-31	11
-201	val_201	2008-12-31	11
-217	val_217	2008-12-31	11
-164	val_164	2008-12-31	11
-431	val_431	2008-12-31	11
-454	val_454	2008-12-31	11
-478	val_478	2008-12-31	11
-298	val_298	2008-12-31	11
-125	val_125	2008-12-31	11
-431	val_431	2008-12-31	11
-164	val_164	2008-12-31	11
-424	val_424	2008-12-31	11
-187	val_187	2008-12-31	11
-382	val_382	2008-12-31	11
-5	val_5	2008-12-31	11
-70	val_70	2008-12-31	11
-397	val_397	2008-12-31	11
-480	val_480	2008-12-31	11
-291	val_291	2008-12-31	11
-24	val_24	2008-12-31	11
-351	val_351	2008-12-31	11
-255	val_255	2008-12-31	11
-104	val_104	2008-12-31	11
-70	val_70	2008-12-31	11
-163	val_163	2008-12-31	11
-438	val_438	2008-12-31	11
-119	val_119	2008-12-31	11
-414	val_414	2008-12-31	11
-200	val_200	2008-12-31	11
-491	val_491	2008-12-31	11
-237	val_237	2008-12-31	11
-439	val_439	2008-12-31	11
-360	val_360	2008-12-31	11
-248	val_248	2008-12-31	11
-479	val_479	2008-12-31	11
-305	val_305	2008-12-31	11
-417	val_417	2008-12-31	11
-199	val_199	2008-12-31	11
-444	val_444	2008-12-31	11
-120	val_120	2008-12-31	11
-429	val_429	2008-12-31	11
-169	val_169	2008-12-31	11
-443	val_443	2008-12-31	11
-323	val_323	2008-12-31	11
-325	val_325	2008-12-31	11
-277	val_277	2008-12-31	11
-230	val_230	2008-12-31	11
-478	val_478	2008-12-31	11
-178	val_178	2008-12-31	11
-468	val_468	2008-12-31	11
-310	val_310	2008-12-31	11
-317	val_317	2008-12-31	11
-333	val_333	2008-12-31	11
-493	val_493	2008-12-31	11
-460	val_460	2008-12-31	11
-207	val_207	2008-12-31	11
-249	val_249	2008-12-31	11
-265	val_265	2008-12-31	11
-480	val_480	2008-12-31	11
-83	val_83	2008-12-31	11
-136	val_136	2008-12-31	11
-353	val_353	2008-12-31	11
-172	val_172	2008-12-31	11
-214	val_214	2008-12-31	11
-462	val_462	2008-12-31	11
-233	val_233	2008-12-31	11
-406	val_406	2008-12-31	11
-133	val_133	2008-12-31	11
-175	val_175	2008-12-31	11
-189	val_189	2008-12-31	11
-454	val_454	2008-12-31	11
-375	val_375	2008-12-31	11
-401	val_401	2008-12-31	11
-421	val_421	2008-12-31	11
-407	val_407	2008-12-31	11
-384	val_384	2008-12-31	11
-256	val_256	2008-12-31	11
-26	val_26	2008-12-31	11
-134	val_134	2008-12-31	11
-67	val_67	2008-12-31	11
-384	val_384	2008-12-31	11
-379	val_379	2008-12-31	11
-18	val_18	2008-12-31	11
-462	val_462	2008-12-31	11
-492	val_492	2008-12-31	11
-100	val_100	2008-12-31	11
-298	val_298	2008-12-31	11
-9	val_9	2008-12-31	11
-341	val_341	2008-12-31	11
-498	val_498	2008-12-31	11
-146	val_146	2008-12-31	11
-458	val_458	2008-12-31	11
-362	val_362	2008-12-31	11
-186	val_186	2008-12-31	11
-285	val_285	2008-12-31	11
-348	val_348	2008-12-31	11
-167	val_167	2008-12-31	11
-18	val_18	2008-12-31	11
-273	val_273	2008-12-31	11
-183	val_183	2008-12-31	11
-281	val_281	2008-12-31	11
-344	val_344	2008-12-31	11
-97	val_97	2008-12-31	11
-469	val_469	2008-12-31	11
-315	val_315	2008-12-31	11
-84	val_84	2008-12-31	11
-28	val_28	2008-12-31	11
-37	val_37	2008-12-31	11
-448	val_448	2008-12-31	11
-152	val_152	2008-12-31	11
-348	val_348	2008-12-31	11
-307	val_307	2008-12-31	11
-194	val_194	2008-12-31	11
-414	val_414	2008-12-31	11
-477	val_477	2008-12-31	11
-222	val_222	2008-12-31	11
-126	val_126	2008-12-31	11
-90	val_90	2008-12-31	11
-169	val_169	2008-12-31	11
-403	val_403	2008-12-31	11
-400	val_400	2008-12-31	11
-200	val_200	2008-12-31	11
-97	val_97	2008-12-31	11
-238	val_238	2008-12-31	12
-86	val_86	2008-12-31	12
-311	val_311	2008-12-31	12
-27	val_27	2008-12-31	12
-165	val_165	2008-12-31	12
-409	val_409	2008-12-31	12
-255	val_255	2008-12-31	12
-278	val_278	2008-12-31	12
-98	val_98	2008-12-31	12
-484	val_484	2008-12-31	12
-265	val_265	2008-12-31	12
-193	val_193	2008-12-31	12
-401	val_401	2008-12-31	12
-150	val_150	2008-12-31	12
-273	val_273	2008-12-31	12
-224	val_224	2008-12-31	12
-369	val_369	2008-12-31	12
-66	val_66	2008-12-31	12
-128	val_128	2008-12-31	12
-213	val_213	2008-12-31	12
-146	val_146	2008-12-31	12
-406	val_406	2008-12-31	12
-429	val_429	2008-12-31	12
-374	val_374	2008-12-31	12
-152	val_152	2008-12-31	12
-469	val_469	2008-12-31	12
-145	val_145	2008-12-31	12
-495	val_495	2008-12-31	12
-37	val_37	2008-12-31	12
-327	val_327	2008-12-31	12
-281	val_281	2008-12-31	12
-277	val_277	2008-12-31	12
-209	val_209	2008-12-31	12
-15	val_15	2008-12-31	12
-82	val_82	2008-12-31	12
-403	val_403	2008-12-31	12
-166	val_166	2008-12-31	12
-417	val_417	2008-12-31	12
-430	val_430	2008-12-31	12
-252	val_252	2008-12-31	12
-292	val_292	2008-12-31	12
-219	val_219	2008-12-31	12
-287	val_287	2008-12-31	12
-153	val_153	2008-12-31	12
-193	val_193	2008-12-31	12
-338	val_338	2008-12-31	12
-446	val_446	2008-12-31	12
-459	val_459	2008-12-31	12
-394	val_394	2008-12-31	12
-237	val_237	2008-12-31	12
-482	val_482	2008-12-31	12
-174	val_174	2008-12-31	12
-413	val_413	2008-12-31	12
-494	val_494	2008-12-31	12
-207	val_207	2008-12-31	12
-199	val_199	2008-12-31	12
-466	val_466	2008-12-31	12
-208	val_208	2008-12-31	12
-174	val_174	2008-12-31	12
-399	val_399	2008-12-31	12
-396	val_396	2008-12-31	12
-247	val_247	2008-12-31	12
-417	val_417	2008-12-31	12
-489	val_489	2008-12-31	12
-162	val_162	2008-12-31	12
-377	val_377	2008-12-31	12
-397	val_397	2008-12-31	12
-309	val_309	2008-12-31	12
-365	val_365	2008-12-31	12
-266	val_266	2008-12-31	12
-439	val_439	2008-12-31	12
-342	val_342	2008-12-31	12
-367	val_367	2008-12-31	12
-325	val_325	2008-12-31	12
-167	val_167	2008-12-31	12
-195	val_195	2008-12-31	12
-475	val_475	2008-12-31	12
-17	val_17	2008-12-31	12
-113	val_113	2008-12-31	12
-155	val_155	2008-12-31	12
-203	val_203	2008-12-31	12
-339	val_339	2008-12-31	12
-0	val_0	2008-12-31	12
-455	val_455	2008-12-31	12
-128	val_128	2008-12-31	12
-311	val_311	2008-12-31	12
-316	val_316	2008-12-31	12
-57	val_57	2008-12-31	12
-302	val_302	2008-12-31	12
-205	val_205	2008-12-31	12
-149	val_149	2008-12-31	12
-438	val_438	2008-12-31	12
-345	val_345	2008-12-31	12
-129	val_129	2008-12-31	12
-170	val_170	2008-12-31	12
-20	val_20	2008-12-31	12
-489	val_489	2008-12-31	12
-157	val_157	2008-12-31	12
-378	val_378	2008-12-31	12
-221	val_221	2008-12-31	12
-92	val_92	2008-12-31	12
-111	val_111	2008-12-31	12
-47	val_47	2008-12-31	12
-72	val_72	2008-12-31	12
-4	val_4	2008-12-31	12
-280	val_280	2008-12-31	12
-35	val_35	2008-12-31	12
-427	val_427	2008-12-31	12
-277	val_277	2008-12-31	12
-208	val_208	2008-12-31	12
-356	val_356	2008-12-31	12
-399	val_399	2008-12-31	12
-169	val_169	2008-12-31	12
-382	val_382	2008-12-31	12
-498	val_498	2008-12-31	12
-125	val_125	2008-12-31	12
-386	val_386	2008-12-31	12
-437	val_437	2008-12-31	12
-469	val_469	2008-12-31	12
-192	val_192	2008-12-31	12
-286	val_286	2008-12-31	12
-187	val_187	2008-12-31	12
-176	val_176	2008-12-31	12
-54	val_54	2008-12-31	12
-459	val_459	2008-12-31	12
-51	val_51	2008-12-31	12
-138	val_138	2008-12-31	12
-103	val_103	2008-12-31	12
-239	val_239	2008-12-31	12
-213	val_213	2008-12-31	12
-216	val_216	2008-12-31	12
-430	val_430	2008-12-31	12
-278	val_278	2008-12-31	12
-176	val_176	2008-12-31	12
-289	val_289	2008-12-31	12
-221	val_221	2008-12-31	12
-65	val_65	2008-12-31	12
-318	val_318	2008-12-31	12
-332	val_332	2008-12-31	12
-311	val_311	2008-12-31	12
-275	val_275	2008-12-31	12
-137	val_137	2008-12-31	12
-241	val_241	2008-12-31	12
-83	val_83	2008-12-31	12
-333	val_333	2008-12-31	12
-180	val_180	2008-12-31	12
-284	val_284	2008-12-31	12
-12	val_12	2008-12-31	12
-230	val_230	2008-12-31	12
-181	val_181	2008-12-31	12
-67	val_67	2008-12-31	12
-260	val_260	2008-12-31	12
-404	val_404	2008-12-31	12
-384	val_384	2008-12-31	12
-489	val_489	2008-12-31	12
-353	val_353	2008-12-31	12
-373	val_373	2008-12-31	12
-272	val_272	2008-12-31	12
-138	val_138	2008-12-31	12
-217	val_217	2008-12-31	12
-84	val_84	2008-12-31	12
-348	val_348	2008-12-31	12
-466	val_466	2008-12-31	12
-58	val_58	2008-12-31	12
-8	val_8	2008-12-31	12
-411	val_411	2008-12-31	12
-230	val_230	2008-12-31	12
-208	val_208	2008-12-31	12
-348	val_348	2008-12-31	12
-24	val_24	2008-12-31	12
-463	val_463	2008-12-31	12
-431	val_431	2008-12-31	12
-179	val_179	2008-12-31	12
-172	val_172	2008-12-31	12
-42	val_42	2008-12-31	12
-129	val_129	2008-12-31	12
-158	val_158	2008-12-31	12
-119	val_119	2008-12-31	12
-496	val_496	2008-12-31	12
-0	val_0	2008-12-31	12
-322	val_322	2008-12-31	12
-197	val_197	2008-12-31	12
-468	val_468	2008-12-31	12
-393	val_393	2008-12-31	12
-454	val_454	2008-12-31	12
-100	val_100	2008-12-31	12
-298	val_298	2008-12-31	12
-199	val_199	2008-12-31	12
-191	val_191	2008-12-31	12
-418	val_418	2008-12-31	12
-96	val_96	2008-12-31	12
-26	val_26	2008-12-31	12
-165	val_165	2008-12-31	12
-327	val_327	2008-12-31	12
-230	val_230	2008-12-31	12
-205	val_205	2008-12-31	12
-120	val_120	2008-12-31	12
-131	val_131	2008-12-31	12
-51	val_51	2008-12-31	12
-404	val_404	2008-12-31	12
-43	val_43	2008-12-31	12
-436	val_436	2008-12-31	12
-156	val_156	2008-12-31	12
-469	val_469	2008-12-31	12
-468	val_468	2008-12-31	12
-308	val_308	2008-12-31	12
-95	val_95	2008-12-31	12
-196	val_196	2008-12-31	12
-288	val_288	2008-12-31	12
-481	val_481	2008-12-31	12
-457	val_457	2008-12-31	12
-98	val_98	2008-12-31	12
-282	val_282	2008-12-31	12
-197	val_197	2008-12-31	12
-187	val_187	2008-12-31	12
-318	val_318	2008-12-31	12
-318	val_318	2008-12-31	12
-409	val_409	2008-12-31	12
-470	val_470	2008-12-31	12
-137	val_137	2008-12-31	12
-369	val_369	2008-12-31	12
-316	val_316	2008-12-31	12
-169	val_169	2008-12-31	12
-413	val_413	2008-12-31	12
-85	val_85	2008-12-31	12
-77	val_77	2008-12-31	12
-0	val_0	2008-12-31	12
-490	val_490	2008-12-31	12
-87	val_87	2008-12-31	12
-364	val_364	2008-12-31	12
-179	val_179	2008-12-31	12
-118	val_118	2008-12-31	12
-134	val_134	2008-12-31	12
-395	val_395	2008-12-31	12
-282	val_282	2008-12-31	12
-138	val_138	2008-12-31	12
-238	val_238	2008-12-31	12
-419	val_419	2008-12-31	12
-15	val_15	2008-12-31	12
-118	val_118	2008-12-31	12
-72	val_72	2008-12-31	12
-90	val_90	2008-12-31	12
-307	val_307	2008-12-31	12
-19	val_19	2008-12-31	12
-435	val_435	2008-12-31	12
-10	val_10	2008-12-31	12
-277	val_277	2008-12-31	12
-273	val_273	2008-12-31	12
-306	val_306	2008-12-31	12
-224	val_224	2008-12-31	12
-309	val_309	2008-12-31	12
-389	val_389	2008-12-31	12
-327	val_327	2008-12-31	12
-242	val_242	2008-12-31	12
-369	val_369	2008-12-31	12
-392	val_392	2008-12-31	12
-272	val_272	2008-12-31	12
-331	val_331	2008-12-31	12
-401	val_401	2008-12-31	12
-242	val_242	2008-12-31	12
-452	val_452	2008-12-31	12
-177	val_177	2008-12-31	12
-226	val_226	2008-12-31	12
-5	val_5	2008-12-31	12
-497	val_497	2008-12-31	12
-402	val_402	2008-12-31	12
-396	val_396	2008-12-31	12
-317	val_317	2008-12-31	12
-395	val_395	2008-12-31	12
-58	val_58	2008-12-31	12
-35	val_35	2008-12-31	12
-336	val_336	2008-12-31	12
-95	val_95	2008-12-31	12
-11	val_11	2008-12-31	12
-168	val_168	2008-12-31	12
-34	val_34	2008-12-31	12
-229	val_229	2008-12-31	12
-233	val_233	2008-12-31	12
-143	val_143	2008-12-31	12
-472	val_472	2008-12-31	12
-322	val_322	2008-12-31	12
-498	val_498	2008-12-31	12
-160	val_160	2008-12-31	12
-195	val_195	2008-12-31	12
-42	val_42	2008-12-31	12
-321	val_321	2008-12-31	12
-430	val_430	2008-12-31	12
-119	val_119	2008-12-31	12
-489	val_489	2008-12-31	12
-458	val_458	2008-12-31	12
-78	val_78	2008-12-31	12
-76	val_76	2008-12-31	12
-41	val_41	2008-12-31	12
-223	val_223	2008-12-31	12
-492	val_492	2008-12-31	12
-149	val_149	2008-12-31	12
-449	val_449	2008-12-31	12
-218	val_218	2008-12-31	12
-228	val_228	2008-12-31	12
-138	val_138	2008-12-31	12
-453	val_453	2008-12-31	12
-30	val_30	2008-12-31	12
-209	val_209	2008-12-31	12
-64	val_64	2008-12-31	12
-468	val_468	2008-12-31	12
-76	val_76	2008-12-31	12
-74	val_74	2008-12-31	12
-342	val_342	2008-12-31	12
-69	val_69	2008-12-31	12
-230	val_230	2008-12-31	12
-33	val_33	2008-12-31	12
-368	val_368	2008-12-31	12
-103	val_103	2008-12-31	12
-296	val_296	2008-12-31	12
-113	val_113	2008-12-31	12
-216	val_216	2008-12-31	12
-367	val_367	2008-12-31	12
-344	val_344	2008-12-31	12
-167	val_167	2008-12-31	12
-274	val_274	2008-12-31	12
-219	val_219	2008-12-31	12
-239	val_239	2008-12-31	12
-485	val_485	2008-12-31	12
-116	val_116	2008-12-31	12
-223	val_223	2008-12-31	12
-256	val_256	2008-12-31	12
-263	val_263	2008-12-31	12
-70	val_70	2008-12-31	12
-487	val_487	2008-12-31	12
-480	val_480	2008-12-31	12
-401	val_401	2008-12-31	12
-288	val_288	2008-12-31	12
-191	val_191	2008-12-31	12
-5	val_5	2008-12-31	12
-244	val_244	2008-12-31	12
-438	val_438	2008-12-31	12
-128	val_128	2008-12-31	12
-467	val_467	2008-12-31	12
-432	val_432	2008-12-31	12
-202	val_202	2008-12-31	12
-316	val_316	2008-12-31	12
-229	val_229	2008-12-31	12
-469	val_469	2008-12-31	12
-463	val_463	2008-12-31	12
-280	val_280	2008-12-31	12
-2	val_2	2008-12-31	12
-35	val_35	2008-12-31	12
-283	val_283	2008-12-31	12
-331	val_331	2008-12-31	12
-235	val_235	2008-12-31	12
-80	val_80	2008-12-31	12
-44	val_44	2008-12-31	12
-193	val_193	2008-12-31	12
-321	val_321	2008-12-31	12
-335	val_335	2008-12-31	12
-104	val_104	2008-12-31	12
-466	val_466	2008-12-31	12
-366	val_366	2008-12-31	12
-175	val_175	2008-12-31	12
-403	val_403	2008-12-31	12
-483	val_483	2008-12-31	12
-53	val_53	2008-12-31	12
-105	val_105	2008-12-31	12
-257	val_257	2008-12-31	12
-406	val_406	2008-12-31	12
-409	val_409	2008-12-31	12
-190	val_190	2008-12-31	12
-406	val_406	2008-12-31	12
-401	val_401	2008-12-31	12
-114	val_114	2008-12-31	12
-258	val_258	2008-12-31	12
-90	val_90	2008-12-31	12
-203	val_203	2008-12-31	12
-262	val_262	2008-12-31	12
-348	val_348	2008-12-31	12
-424	val_424	2008-12-31	12
-12	val_12	2008-12-31	12
-396	val_396	2008-12-31	12
-201	val_201	2008-12-31	12
-217	val_217	2008-12-31	12
-164	val_164	2008-12-31	12
-431	val_431	2008-12-31	12
-454	val_454	2008-12-31	12
-478	val_478	2008-12-31	12
-298	val_298	2008-12-31	12
-125	val_125	2008-12-31	12
-431	val_431	2008-12-31	12
-164	val_164	2008-12-31	12
-424	val_424	2008-12-31	12
-187	val_187	2008-12-31	12
-382	val_382	2008-12-31	12
-5	val_5	2008-12-31	12
-70	val_70	2008-12-31	12
-397	val_397	2008-12-31	12
-480	val_480	2008-12-31	12
-291	val_291	2008-12-31	12
-24	val_24	2008-12-31	12
-351	val_351	2008-12-31	12
-255	val_255	2008-12-31	12
-104	val_104	2008-12-31	12
-70	val_70	2008-12-31	12
-163	val_163	2008-12-31	12
-438	val_438	2008-12-31	12
-119	val_119	2008-12-31	12
-414	val_414	2008-12-31	12
-200	val_200	2008-12-31	12
-491	val_491	2008-12-31	12
-237	val_237	2008-12-31	12
-439	val_439	2008-12-31	12
-360	val_360	2008-12-31	12
-248	val_248	2008-12-31	12
-479	val_479	2008-12-31	12
-305	val_305	2008-12-31	12
-417	val_417	2008-12-31	12
-199	val_199	2008-12-31	12
-444	val_444	2008-12-31	12
-120	val_120	2008-12-31	12
-429	val_429	2008-12-31	12
-169	val_169	2008-12-31	12
-443	val_443	2008-12-31	12
-323	val_323	2008-12-31	12
-325	val_325	2008-12-31	12
-277	val_277	2008-12-31	12
-230	val_230	2008-12-31	12
-478	val_478	2008-12-31	12
-178	val_178	2008-12-31	12
-468	val_468	2008-12-31	12
-310	val_310	2008-12-31	12
-317	val_317	2008-12-31	12
-333	val_333	2008-12-31	12
-493	val_493	2008-12-31	12
-460	val_460	2008-12-31	12
-207	val_207	2008-12-31	12
-249	val_249	2008-12-31	12
-265	val_265	2008-12-31	12
-480	val_480	2008-12-31	12
-83	val_83	2008-12-31	12
-136	val_136	2008-12-31	12
-353	val_353	2008-12-31	12
-172	val_172	2008-12-31	12
-214	val_214	2008-12-31	12
-462	val_462	2008-12-31	12
-233	val_233	2008-12-31	12
-406	val_406	2008-12-31	12
-133	val_133	2008-12-31	12
-175	val_175	2008-12-31	12
-189	val_189	2008-12-31	12
-454	val_454	2008-12-31	12
-375	val_375	2008-12-31	12
-401	val_401	2008-12-31	12
-421	val_421	2008-12-31	12
-407	val_407	2008-12-31	12
-384	val_384	2008-12-31	12
-256	val_256	2008-12-31	12
-26	val_26	2008-12-31	12
-134	val_134	2008-12-31	12
-67	val_67	2008-12-31	12
-384	val_384	2008-12-31	12
-379	val_379	2008-12-31	12
-18	val_18	2008-12-31	12
-462	val_462	2008-12-31	12
-492	val_492	2008-12-31	12
-100	val_100	2008-12-31	12
-298	val_298	2008-12-31	12
-9	val_9	2008-12-31	12
-341	val_341	2008-12-31	12
-498	val_498	2008-12-31	12
-146	val_146	2008-12-31	12
-458	val_458	2008-12-31	12
-362	val_362	2008-12-31	12
-186	val_186	2008-12-31	12
-285	val_285	2008-12-31	12
-348	val_348	2008-12-31	12
-167	val_167	2008-12-31	12
-18	val_18	2008-12-31	12
-273	val_273	2008-12-31	12
-183	val_183	2008-12-31	12
-281	val_281	2008-12-31	12
-344	val_344	2008-12-31	12
-97	val_97	2008-12-31	12
-469	val_469	2008-12-31	12
-315	val_315	2008-12-31	12
-84	val_84	2008-12-31	12
-28	val_28	2008-12-31	12
-37	val_37	2008-12-31	12
-448	val_448	2008-12-31	12
-152	val_152	2008-12-31	12
-348	val_348	2008-12-31	12
-307	val_307	2008-12-31	12
-194	val_194	2008-12-31	12
-414	val_414	2008-12-31	12
-477	val_477	2008-12-31	12
-222	val_222	2008-12-31	12
-126	val_126	2008-12-31	12
-90	val_90	2008-12-31	12
-169	val_169	2008-12-31	12
-403	val_403	2008-12-31	12
-400	val_400	2008-12-31	12
-200	val_200	2008-12-31	12
-97	val_97	2008-12-31	12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part11-0-463330cf55370dbe92d6ed74ef91302 b/sql/hive/src/test/resources/golden/load_dyn_part11-0-463330cf55370dbe92d6ed74ef91302
index e9c723bbd136e..8c43153cf66f9 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part11-0-463330cf55370dbe92d6ed74ef91302
+++ b/sql/hive/src/test/resources/golden/load_dyn_part11-0-463330cf55370dbe92d6ed74ef91302
@@ -1,4 +1,4 @@
 ds=2008-04-08/hr=11
 ds=2008-04-08/hr=12
 ds=2008-04-09/hr=11
-ds=2008-04-09/hr=12
\ No newline at end of file
+ds=2008-04-09/hr=12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part11-2-4301f87a8dbf9576788637386e26f9a2 b/sql/hive/src/test/resources/golden/load_dyn_part11-2-4301f87a8dbf9576788637386e26f9a2
index 6aa67737fa92d..36add9b0fc2ce 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part11-2-4301f87a8dbf9576788637386e26f9a2
+++ b/sql/hive/src/test/resources/golden/load_dyn_part11-2-4301f87a8dbf9576788637386e26f9a2
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part, dbName:default, owner:marmbrus, createTime:1389738838, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2847673635801001933/nzhang_part, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1389738838}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:nzhang_part, dbName:default, owner:marmbrus, createTime:1413887427, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887427}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part11-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/load_dyn_part11-3-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part11-3-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/load_dyn_part11-3-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part11-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/load_dyn_part11-4-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part11-4-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/load_dyn_part11-4-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part11-5-ea607fbed28d20e5726f4501285d698d b/sql/hive/src/test/resources/golden/load_dyn_part11-5-ea607fbed28d20e5726f4501285d698d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part11-5-ea607fbed28d20e5726f4501285d698d
+++ b/sql/hive/src/test/resources/golden/load_dyn_part11-5-ea607fbed28d20e5726f4501285d698d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part11-6-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/load_dyn_part11-6-a4fb8359a2179ec70777aad6366071b7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part11-6-a4fb8359a2179ec70777aad6366071b7
+++ b/sql/hive/src/test/resources/golden/load_dyn_part11-6-a4fb8359a2179ec70777aad6366071b7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part11-8-9a4433518ac9ff49cb4b71812705adbb b/sql/hive/src/test/resources/golden/load_dyn_part11-8-9a4433518ac9ff49cb4b71812705adbb
index 29ade2aff0542..7df5f90186db3 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part11-8-9a4433518ac9ff49cb4b71812705adbb
+++ b/sql/hive/src/test/resources/golden/load_dyn_part11-8-9a4433518ac9ff49cb4b71812705adbb
@@ -997,4 +997,4 @@
 403	val_403	2010-03-03	11
 400	val_400	2010-03-03	11
 200	val_200	2010-03-03	11
-97	val_97	2010-03-03	11
\ No newline at end of file
+97	val_97	2010-03-03	11
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part11-9-3889a0cba1cf3c8f8c2e67957e69406a b/sql/hive/src/test/resources/golden/load_dyn_part11-9-3889a0cba1cf3c8f8c2e67957e69406a
index b7b1b914db857..c869646753baa 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part11-9-3889a0cba1cf3c8f8c2e67957e69406a
+++ b/sql/hive/src/test/resources/golden/load_dyn_part11-9-3889a0cba1cf3c8f8c2e67957e69406a
@@ -997,4 +997,4 @@
 403	val_403	2010-03-03	12
 400	val_400	2010-03-03	12
 200	val_200	2010-03-03	12
-97	val_97	2010-03-03	12
\ No newline at end of file
+97	val_97	2010-03-03	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part12-0-463330cf55370dbe92d6ed74ef91302 b/sql/hive/src/test/resources/golden/load_dyn_part12-0-463330cf55370dbe92d6ed74ef91302
index e9c723bbd136e..8c43153cf66f9 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part12-0-463330cf55370dbe92d6ed74ef91302
+++ b/sql/hive/src/test/resources/golden/load_dyn_part12-0-463330cf55370dbe92d6ed74ef91302
@@ -1,4 +1,4 @@
 ds=2008-04-08/hr=11
 ds=2008-04-08/hr=12
 ds=2008-04-09/hr=11
-ds=2008-04-09/hr=12
\ No newline at end of file
+ds=2008-04-09/hr=12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part12-2-4a10b19bdc478379bb8c8c9e5fe52c9b b/sql/hive/src/test/resources/golden/load_dyn_part12-2-4a10b19bdc478379bb8c8c9e5fe52c9b
index 0c283c5378a6f..d42b4efb2cac8 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part12-2-4a10b19bdc478379bb8c8c9e5fe52c9b
+++ b/sql/hive/src/test/resources/golden/load_dyn_part12-2-4a10b19bdc478379bb8c8c9e5fe52c9b
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part12, dbName:default, owner:marmbrus, createTime:1389738821, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1431818582215388621/nzhang_part12, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1389738821}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:nzhang_part12, dbName:default, owner:marmbrus, createTime:1413887435, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part12, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887435}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part12-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/load_dyn_part12-3-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part12-3-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/load_dyn_part12-3-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part12-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/load_dyn_part12-4-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part12-4-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/load_dyn_part12-4-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part12-5-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/load_dyn_part12-5-a4fb8359a2179ec70777aad6366071b7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part12-5-a4fb8359a2179ec70777aad6366071b7
+++ b/sql/hive/src/test/resources/golden/load_dyn_part12-5-a4fb8359a2179ec70777aad6366071b7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part12-7-fd1422a86c9b12ce915a7fa2f2b7fc97 b/sql/hive/src/test/resources/golden/load_dyn_part12-7-fd1422a86c9b12ce915a7fa2f2b7fc97
index 8eb67466a7a89..9c35fe7429150 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part12-7-fd1422a86c9b12ce915a7fa2f2b7fc97
+++ b/sql/hive/src/test/resources/golden/load_dyn_part12-7-fd1422a86c9b12ce915a7fa2f2b7fc97
@@ -1,2 +1,2 @@
 ds=2010-03-03/hr=22
-ds=2010-03-03/hr=24
\ No newline at end of file
+ds=2010-03-03/hr=24
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part12-8-fd656b581b8f8fbb8ac22f444dbc345 b/sql/hive/src/test/resources/golden/load_dyn_part12-8-fd656b581b8f8fbb8ac22f444dbc345
index 9f50245c18177..861dc02d49d02 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part12-8-fd656b581b8f8fbb8ac22f444dbc345
+++ b/sql/hive/src/test/resources/golden/load_dyn_part12-8-fd656b581b8f8fbb8ac22f444dbc345
@@ -1997,4 +1997,4 @@
 403	val_403	2010-03-03	24
 400	val_400	2010-03-03	24
 200	val_200	2010-03-03	24
-97	val_97	2010-03-03	24
\ No newline at end of file
+97	val_97	2010-03-03	24
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part13-0-463330cf55370dbe92d6ed74ef91302 b/sql/hive/src/test/resources/golden/load_dyn_part13-0-463330cf55370dbe92d6ed74ef91302
index e9c723bbd136e..8c43153cf66f9 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part13-0-463330cf55370dbe92d6ed74ef91302
+++ b/sql/hive/src/test/resources/golden/load_dyn_part13-0-463330cf55370dbe92d6ed74ef91302
@@ -1,4 +1,4 @@
 ds=2008-04-08/hr=11
 ds=2008-04-08/hr=12
 ds=2008-04-09/hr=11
-ds=2008-04-09/hr=12
\ No newline at end of file
+ds=2008-04-09/hr=12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part13-2-d52536b8ac62f6e8152e394fe135a3e0 b/sql/hive/src/test/resources/golden/load_dyn_part13-2-d52536b8ac62f6e8152e394fe135a3e0
index 08699f286e384..06a16f6477cfa 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part13-2-d52536b8ac62f6e8152e394fe135a3e0
+++ b/sql/hive/src/test/resources/golden/load_dyn_part13-2-d52536b8ac62f6e8152e394fe135a3e0
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part13, dbName:default, owner:marmbrus, createTime:1389739606, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5344690580869150883/nzhang_part13, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1389739606}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:nzhang_part13, dbName:default, owner:marmbrus, createTime:1413887445, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part13, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887445}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part13-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/load_dyn_part13-3-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part13-3-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/load_dyn_part13-3-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part13-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/load_dyn_part13-4-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part13-4-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/load_dyn_part13-4-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part13-5-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/load_dyn_part13-5-a4fb8359a2179ec70777aad6366071b7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part13-5-a4fb8359a2179ec70777aad6366071b7
+++ b/sql/hive/src/test/resources/golden/load_dyn_part13-5-a4fb8359a2179ec70777aad6366071b7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part13-8-930d8e7a94f0cdf922322dae4a02e16 b/sql/hive/src/test/resources/golden/load_dyn_part13-8-930d8e7a94f0cdf922322dae4a02e16
index dfe9bcc93bcd3..c00f33327744a 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part13-8-930d8e7a94f0cdf922322dae4a02e16
+++ b/sql/hive/src/test/resources/golden/load_dyn_part13-8-930d8e7a94f0cdf922322dae4a02e16
@@ -1,2 +1,2 @@
 ds=2010-03-03/hr=22
-ds=2010-03-03/hr=33
\ No newline at end of file
+ds=2010-03-03/hr=33
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part13-9-c8de411bc094b37b59a2eb0baf6de55d b/sql/hive/src/test/resources/golden/load_dyn_part13-9-c8de411bc094b37b59a2eb0baf6de55d
index 88fe75804e584..b349a406e5e69 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part13-9-c8de411bc094b37b59a2eb0baf6de55d
+++ b/sql/hive/src/test/resources/golden/load_dyn_part13-9-c8de411bc094b37b59a2eb0baf6de55d
@@ -31,4 +31,4 @@
 24	val_24	2010-03-03	33
 26	val_26	2010-03-03	33
 28	val_28	2010-03-03	33
-37	val_37	2010-03-03	33
\ No newline at end of file
+37	val_37	2010-03-03	33
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14-1-253e2a15bfaef9aa781dc29fa324b51e b/sql/hive/src/test/resources/golden/load_dyn_part14-1-253e2a15bfaef9aa781dc29fa324b51e
index 170e3b095c5a9..0a50eb00d58d7 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14-1-253e2a15bfaef9aa781dc29fa324b51e
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14-1-253e2a15bfaef9aa781dc29fa324b51e
@@ -1,9 +1,9 @@
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-value               	string              	None                
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part14, dbName:default, owner:marmbrus, createTime:1389739459, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1999157106458385464/nzhang_part14, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:value, type:string, comment:null)], parameters:{transient_lastDdlTime=1389739459}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:nzhang_part14, dbName:default, owner:marmbrus, createTime:1413887453, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part14, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:value, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887453}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/load_dyn_part14-2-a4fb8359a2179ec70777aad6366071b7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14-2-a4fb8359a2179ec70777aad6366071b7
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14-2-a4fb8359a2179ec70777aad6366071b7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/load_dyn_part14-3-16367c381d4b189b3640c92511244bfe
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14-3-16367c381d4b189b3640c92511244bfe
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14-3-16367c381d4b189b3640c92511244bfe
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14-6-3fd73cd9f8f9b991b7e72405c00cf5d6 b/sql/hive/src/test/resources/golden/load_dyn_part14-6-3fd73cd9f8f9b991b7e72405c00cf5d6
index 538ed22d29976..18b3cb0d99a4a 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14-6-3fd73cd9f8f9b991b7e72405c00cf5d6
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14-6-3fd73cd9f8f9b991b7e72405c00cf5d6
@@ -1,2 +1,2 @@
 value= 
-value=__HIVE_DEFAULT_PARTITION__
\ No newline at end of file
+value=__HIVE_DEFAULT_PARTITION__
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14-7-7c931249465f330d51ef0610f214429e b/sql/hive/src/test/resources/golden/load_dyn_part14-7-7c931249465f330d51ef0610f214429e
index 9e1bc82fe15f9..bdbb3fb9b611f 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14-7-7c931249465f330d51ef0610f214429e
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14-7-7c931249465f330d51ef0610f214429e
@@ -3,4 +3,4 @@ k1	__HIVE_DEFAULT_PARTITION__
 k2	__HIVE_DEFAULT_PARTITION__
 k2	__HIVE_DEFAULT_PARTITION__
 k3	 
-k3	 
\ No newline at end of file
+k3	 
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14_win-1-253e2a15bfaef9aa781dc29fa324b51e b/sql/hive/src/test/resources/golden/load_dyn_part14_win-1-253e2a15bfaef9aa781dc29fa324b51e
index df0090edb6b9e..c51eca2423434 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14_win-1-253e2a15bfaef9aa781dc29fa324b51e
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14_win-1-253e2a15bfaef9aa781dc29fa324b51e
@@ -1,9 +1,9 @@
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-value               	string              	None                
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part14, dbName:default, owner:marmbrus, createTime:1389738860, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2847673635801001933/nzhang_part14, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:value, type:string, comment:null)], parameters:{transient_lastDdlTime=1389738860}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:nzhang_part14, dbName:default, owner:marmbrus, createTime:1413887510, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part14, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:value, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887510}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14_win-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/load_dyn_part14_win-2-a4fb8359a2179ec70777aad6366071b7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14_win-2-a4fb8359a2179ec70777aad6366071b7
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14_win-2-a4fb8359a2179ec70777aad6366071b7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14_win-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/load_dyn_part14_win-3-16367c381d4b189b3640c92511244bfe
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14_win-3-16367c381d4b189b3640c92511244bfe
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14_win-3-16367c381d4b189b3640c92511244bfe
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14_win-6-3fd73cd9f8f9b991b7e72405c00cf5d6 b/sql/hive/src/test/resources/golden/load_dyn_part14_win-6-3fd73cd9f8f9b991b7e72405c00cf5d6
index 538ed22d29976..18b3cb0d99a4a 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14_win-6-3fd73cd9f8f9b991b7e72405c00cf5d6
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14_win-6-3fd73cd9f8f9b991b7e72405c00cf5d6
@@ -1,2 +1,2 @@
 value= 
-value=__HIVE_DEFAULT_PARTITION__
\ No newline at end of file
+value=__HIVE_DEFAULT_PARTITION__
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part14_win-7-7c931249465f330d51ef0610f214429e b/sql/hive/src/test/resources/golden/load_dyn_part14_win-7-7c931249465f330d51ef0610f214429e
index 9e1bc82fe15f9..bdbb3fb9b611f 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part14_win-7-7c931249465f330d51ef0610f214429e
+++ b/sql/hive/src/test/resources/golden/load_dyn_part14_win-7-7c931249465f330d51ef0610f214429e
@@ -3,4 +3,4 @@ k1	__HIVE_DEFAULT_PARTITION__
 k2	__HIVE_DEFAULT_PARTITION__
 k2	__HIVE_DEFAULT_PARTITION__
 k3	 
-k3	 
\ No newline at end of file
+k3	 
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part15-2-1379abc2de057dc6d240a526f0dd8a3c b/sql/hive/src/test/resources/golden/load_dyn_part15-2-1379abc2de057dc6d240a526f0dd8a3c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part15-3-9940fad8d8590e60bd726e9503ae4fa9 b/sql/hive/src/test/resources/golden/load_dyn_part15-3-9940fad8d8590e60bd726e9503ae4fa9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part15-4-fa01199bab3e6aa47a82c9aec88aa76a b/sql/hive/src/test/resources/golden/load_dyn_part15-4-fa01199bab3e6aa47a82c9aec88aa76a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part15-5-9940fad8d8590e60bd726e9503ae4fa9 b/sql/hive/src/test/resources/golden/load_dyn_part15-5-9940fad8d8590e60bd726e9503ae4fa9
deleted file mode 100644
index 99a66d603300d..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part15-5-9940fad8d8590e60bd726e9503ae4fa9
+++ /dev/null
@@ -1,3 +0,0 @@
-part_key=%7B2
-part_key=1
-part_key=3%5D
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part2-1-845923af04bb331c9f5995a3a3e84424 b/sql/hive/src/test/resources/golden/load_dyn_part2-1-845923af04bb331c9f5995a3a3e84424
index def850839a47b..5bdf30a775a6e 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part2-1-845923af04bb331c9f5995a3a3e84424
+++ b/sql/hive/src/test/resources/golden/load_dyn_part2-1-845923af04bb331c9f5995a3a3e84424
@@ -1,12 +1,12 @@
-key                 	string              	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part_bucket, dbName:default, owner:marmbrus, createTime:1389739342, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/nzhang_part_bucket, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:10, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[key], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1389739342}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:nzhang_part_bucket, dbName:default, owner:marmbrus, createTime:1413887567, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part_bucket, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:10, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[key], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887567}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part2-2-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/load_dyn_part2-2-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part2-2-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/load_dyn_part2-2-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part2-3-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/load_dyn_part2-3-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part2-3-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/load_dyn_part2-3-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part2-4-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/load_dyn_part2-4-a4fb8359a2179ec70777aad6366071b7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part2-4-a4fb8359a2179ec70777aad6366071b7
+++ b/sql/hive/src/test/resources/golden/load_dyn_part2-4-a4fb8359a2179ec70777aad6366071b7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part2-7-86ffa99b03fa88235b61bf1af7062c33 b/sql/hive/src/test/resources/golden/load_dyn_part2-7-86ffa99b03fa88235b61bf1af7062c33
index 1f345dad614ad..7342c15206a35 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part2-7-86ffa99b03fa88235b61bf1af7062c33
+++ b/sql/hive/src/test/resources/golden/load_dyn_part2-7-86ffa99b03fa88235b61bf1af7062c33
@@ -1,2 +1,2 @@
 ds=2010-03-23/hr=11
-ds=2010-03-23/hr=12
\ No newline at end of file
+ds=2010-03-23/hr=12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part2-8-a1ff8a12d94378e7e1165bd78cf724cf b/sql/hive/src/test/resources/golden/load_dyn_part2-8-a1ff8a12d94378e7e1165bd78cf724cf
index 5f936fa91d2f5..ceb6e5f13d3c5 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part2-8-a1ff8a12d94378e7e1165bd78cf724cf
+++ b/sql/hive/src/test/resources/golden/load_dyn_part2-8-a1ff8a12d94378e7e1165bd78cf724cf
@@ -997,4 +997,4 @@
 98	val_98	2010-03-23	11
 98	val_98	2010-03-23	11
 98	val_98	2010-03-23	11
-98	val_98	2010-03-23	11
\ No newline at end of file
+98	val_98	2010-03-23	11
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part2-9-3f29de9877006f9448272ef2422d6132 b/sql/hive/src/test/resources/golden/load_dyn_part2-9-3f29de9877006f9448272ef2422d6132
index 45c45d0082ee3..bae05fd209e81 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part2-9-3f29de9877006f9448272ef2422d6132
+++ b/sql/hive/src/test/resources/golden/load_dyn_part2-9-3f29de9877006f9448272ef2422d6132
@@ -997,4 +997,4 @@
 98	val_98	2010-03-23	12
 98	val_98	2010-03-23	12
 98	val_98	2010-03-23	12
-98	val_98	2010-03-23	12
\ No newline at end of file
+98	val_98	2010-03-23	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part3-2-dbbba335c008a61a13c1472b34470397 b/sql/hive/src/test/resources/golden/load_dyn_part3-2-dbbba335c008a61a13c1472b34470397
index d35fbec80c19e..8dbbd4321c367 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part3-2-dbbba335c008a61a13c1472b34470397
+++ b/sql/hive/src/test/resources/golden/load_dyn_part3-2-dbbba335c008a61a13c1472b34470397
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part3, dbName:default, owner:marmbrus, createTime:1390899609, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part3, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{p3=v3, transient_lastDdlTime=1390899609}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:nzhang_part3, dbName:default, owner:marmbrus, createTime:1413887598, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part3, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887598}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part3-8-c32770da4784bfea4d0dd77fdcba4f0a b/sql/hive/src/test/resources/golden/load_dyn_part3-8-c32770da4784bfea4d0dd77fdcba4f0a
new file mode 100644
index 0000000000000..c38f4300e4058
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/load_dyn_part3-8-c32770da4784bfea4d0dd77fdcba4f0a
@@ -0,0 +1,2000 @@
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+2	val_2	2008-04-08	11
+4	val_4	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+8	val_8	2008-04-08	11
+9	val_9	2008-04-08	11
+10	val_10	2008-04-08	11
+11	val_11	2008-04-08	11
+12	val_12	2008-04-08	11
+12	val_12	2008-04-08	11
+15	val_15	2008-04-08	11
+15	val_15	2008-04-08	11
+17	val_17	2008-04-08	11
+18	val_18	2008-04-08	11
+18	val_18	2008-04-08	11
+19	val_19	2008-04-08	11
+20	val_20	2008-04-08	11
+24	val_24	2008-04-08	11
+24	val_24	2008-04-08	11
+26	val_26	2008-04-08	11
+26	val_26	2008-04-08	11
+27	val_27	2008-04-08	11
+28	val_28	2008-04-08	11
+30	val_30	2008-04-08	11
+33	val_33	2008-04-08	11
+34	val_34	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+37	val_37	2008-04-08	11
+37	val_37	2008-04-08	11
+41	val_41	2008-04-08	11
+42	val_42	2008-04-08	11
+42	val_42	2008-04-08	11
+43	val_43	2008-04-08	11
+44	val_44	2008-04-08	11
+47	val_47	2008-04-08	11
+51	val_51	2008-04-08	11
+51	val_51	2008-04-08	11
+53	val_53	2008-04-08	11
+54	val_54	2008-04-08	11
+57	val_57	2008-04-08	11
+58	val_58	2008-04-08	11
+58	val_58	2008-04-08	11
+64	val_64	2008-04-08	11
+65	val_65	2008-04-08	11
+66	val_66	2008-04-08	11
+67	val_67	2008-04-08	11
+67	val_67	2008-04-08	11
+69	val_69	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+72	val_72	2008-04-08	11
+72	val_72	2008-04-08	11
+74	val_74	2008-04-08	11
+76	val_76	2008-04-08	11
+76	val_76	2008-04-08	11
+77	val_77	2008-04-08	11
+78	val_78	2008-04-08	11
+80	val_80	2008-04-08	11
+82	val_82	2008-04-08	11
+83	val_83	2008-04-08	11
+83	val_83	2008-04-08	11
+84	val_84	2008-04-08	11
+84	val_84	2008-04-08	11
+85	val_85	2008-04-08	11
+86	val_86	2008-04-08	11
+87	val_87	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+92	val_92	2008-04-08	11
+95	val_95	2008-04-08	11
+95	val_95	2008-04-08	11
+96	val_96	2008-04-08	11
+97	val_97	2008-04-08	11
+97	val_97	2008-04-08	11
+98	val_98	2008-04-08	11
+98	val_98	2008-04-08	11
+100	val_100	2008-04-08	11
+100	val_100	2008-04-08	11
+103	val_103	2008-04-08	11
+103	val_103	2008-04-08	11
+104	val_104	2008-04-08	11
+104	val_104	2008-04-08	11
+105	val_105	2008-04-08	11
+111	val_111	2008-04-08	11
+113	val_113	2008-04-08	11
+113	val_113	2008-04-08	11
+114	val_114	2008-04-08	11
+116	val_116	2008-04-08	11
+118	val_118	2008-04-08	11
+118	val_118	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+120	val_120	2008-04-08	11
+120	val_120	2008-04-08	11
+125	val_125	2008-04-08	11
+125	val_125	2008-04-08	11
+126	val_126	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+129	val_129	2008-04-08	11
+129	val_129	2008-04-08	11
+131	val_131	2008-04-08	11
+133	val_133	2008-04-08	11
+134	val_134	2008-04-08	11
+134	val_134	2008-04-08	11
+136	val_136	2008-04-08	11
+137	val_137	2008-04-08	11
+137	val_137	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+143	val_143	2008-04-08	11
+145	val_145	2008-04-08	11
+146	val_146	2008-04-08	11
+146	val_146	2008-04-08	11
+149	val_149	2008-04-08	11
+149	val_149	2008-04-08	11
+150	val_150	2008-04-08	11
+152	val_152	2008-04-08	11
+152	val_152	2008-04-08	11
+153	val_153	2008-04-08	11
+155	val_155	2008-04-08	11
+156	val_156	2008-04-08	11
+157	val_157	2008-04-08	11
+158	val_158	2008-04-08	11
+160	val_160	2008-04-08	11
+162	val_162	2008-04-08	11
+163	val_163	2008-04-08	11
+164	val_164	2008-04-08	11
+164	val_164	2008-04-08	11
+165	val_165	2008-04-08	11
+165	val_165	2008-04-08	11
+166	val_166	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+168	val_168	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+170	val_170	2008-04-08	11
+172	val_172	2008-04-08	11
+172	val_172	2008-04-08	11
+174	val_174	2008-04-08	11
+174	val_174	2008-04-08	11
+175	val_175	2008-04-08	11
+175	val_175	2008-04-08	11
+176	val_176	2008-04-08	11
+176	val_176	2008-04-08	11
+177	val_177	2008-04-08	11
+178	val_178	2008-04-08	11
+179	val_179	2008-04-08	11
+179	val_179	2008-04-08	11
+180	val_180	2008-04-08	11
+181	val_181	2008-04-08	11
+183	val_183	2008-04-08	11
+186	val_186	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+189	val_189	2008-04-08	11
+190	val_190	2008-04-08	11
+191	val_191	2008-04-08	11
+191	val_191	2008-04-08	11
+192	val_192	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+194	val_194	2008-04-08	11
+195	val_195	2008-04-08	11
+195	val_195	2008-04-08	11
+196	val_196	2008-04-08	11
+197	val_197	2008-04-08	11
+197	val_197	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+200	val_200	2008-04-08	11
+200	val_200	2008-04-08	11
+201	val_201	2008-04-08	11
+202	val_202	2008-04-08	11
+203	val_203	2008-04-08	11
+203	val_203	2008-04-08	11
+205	val_205	2008-04-08	11
+205	val_205	2008-04-08	11
+207	val_207	2008-04-08	11
+207	val_207	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+209	val_209	2008-04-08	11
+209	val_209	2008-04-08	11
+213	val_213	2008-04-08	11
+213	val_213	2008-04-08	11
+214	val_214	2008-04-08	11
+216	val_216	2008-04-08	11
+216	val_216	2008-04-08	11
+217	val_217	2008-04-08	11
+217	val_217	2008-04-08	11
+218	val_218	2008-04-08	11
+219	val_219	2008-04-08	11
+219	val_219	2008-04-08	11
+221	val_221	2008-04-08	11
+221	val_221	2008-04-08	11
+222	val_222	2008-04-08	11
+223	val_223	2008-04-08	11
+223	val_223	2008-04-08	11
+224	val_224	2008-04-08	11
+224	val_224	2008-04-08	11
+226	val_226	2008-04-08	11
+228	val_228	2008-04-08	11
+229	val_229	2008-04-08	11
+229	val_229	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+233	val_233	2008-04-08	11
+233	val_233	2008-04-08	11
+235	val_235	2008-04-08	11
+237	val_237	2008-04-08	11
+237	val_237	2008-04-08	11
+238	val_238	2008-04-08	11
+238	val_238	2008-04-08	11
+239	val_239	2008-04-08	11
+239	val_239	2008-04-08	11
+241	val_241	2008-04-08	11
+242	val_242	2008-04-08	11
+242	val_242	2008-04-08	11
+244	val_244	2008-04-08	11
+247	val_247	2008-04-08	11
+248	val_248	2008-04-08	11
+249	val_249	2008-04-08	11
+252	val_252	2008-04-08	11
+255	val_255	2008-04-08	11
+255	val_255	2008-04-08	11
+256	val_256	2008-04-08	11
+256	val_256	2008-04-08	11
+257	val_257	2008-04-08	11
+258	val_258	2008-04-08	11
+260	val_260	2008-04-08	11
+262	val_262	2008-04-08	11
+263	val_263	2008-04-08	11
+265	val_265	2008-04-08	11
+265	val_265	2008-04-08	11
+266	val_266	2008-04-08	11
+272	val_272	2008-04-08	11
+272	val_272	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+274	val_274	2008-04-08	11
+275	val_275	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+278	val_278	2008-04-08	11
+278	val_278	2008-04-08	11
+280	val_280	2008-04-08	11
+280	val_280	2008-04-08	11
+281	val_281	2008-04-08	11
+281	val_281	2008-04-08	11
+282	val_282	2008-04-08	11
+282	val_282	2008-04-08	11
+283	val_283	2008-04-08	11
+284	val_284	2008-04-08	11
+285	val_285	2008-04-08	11
+286	val_286	2008-04-08	11
+287	val_287	2008-04-08	11
+288	val_288	2008-04-08	11
+288	val_288	2008-04-08	11
+289	val_289	2008-04-08	11
+291	val_291	2008-04-08	11
+292	val_292	2008-04-08	11
+296	val_296	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+302	val_302	2008-04-08	11
+305	val_305	2008-04-08	11
+306	val_306	2008-04-08	11
+307	val_307	2008-04-08	11
+307	val_307	2008-04-08	11
+308	val_308	2008-04-08	11
+309	val_309	2008-04-08	11
+309	val_309	2008-04-08	11
+310	val_310	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+315	val_315	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+317	val_317	2008-04-08	11
+317	val_317	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+321	val_321	2008-04-08	11
+321	val_321	2008-04-08	11
+322	val_322	2008-04-08	11
+322	val_322	2008-04-08	11
+323	val_323	2008-04-08	11
+325	val_325	2008-04-08	11
+325	val_325	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+331	val_331	2008-04-08	11
+331	val_331	2008-04-08	11
+332	val_332	2008-04-08	11
+333	val_333	2008-04-08	11
+333	val_333	2008-04-08	11
+335	val_335	2008-04-08	11
+336	val_336	2008-04-08	11
+338	val_338	2008-04-08	11
+339	val_339	2008-04-08	11
+341	val_341	2008-04-08	11
+342	val_342	2008-04-08	11
+342	val_342	2008-04-08	11
+344	val_344	2008-04-08	11
+344	val_344	2008-04-08	11
+345	val_345	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+351	val_351	2008-04-08	11
+353	val_353	2008-04-08	11
+353	val_353	2008-04-08	11
+356	val_356	2008-04-08	11
+360	val_360	2008-04-08	11
+362	val_362	2008-04-08	11
+364	val_364	2008-04-08	11
+365	val_365	2008-04-08	11
+366	val_366	2008-04-08	11
+367	val_367	2008-04-08	11
+367	val_367	2008-04-08	11
+368	val_368	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+373	val_373	2008-04-08	11
+374	val_374	2008-04-08	11
+375	val_375	2008-04-08	11
+377	val_377	2008-04-08	11
+378	val_378	2008-04-08	11
+379	val_379	2008-04-08	11
+382	val_382	2008-04-08	11
+382	val_382	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+386	val_386	2008-04-08	11
+389	val_389	2008-04-08	11
+392	val_392	2008-04-08	11
+393	val_393	2008-04-08	11
+394	val_394	2008-04-08	11
+395	val_395	2008-04-08	11
+395	val_395	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+397	val_397	2008-04-08	11
+397	val_397	2008-04-08	11
+399	val_399	2008-04-08	11
+399	val_399	2008-04-08	11
+400	val_400	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+402	val_402	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+404	val_404	2008-04-08	11
+404	val_404	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+407	val_407	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+411	val_411	2008-04-08	11
+413	val_413	2008-04-08	11
+413	val_413	2008-04-08	11
+414	val_414	2008-04-08	11
+414	val_414	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+418	val_418	2008-04-08	11
+419	val_419	2008-04-08	11
+421	val_421	2008-04-08	11
+424	val_424	2008-04-08	11
+424	val_424	2008-04-08	11
+427	val_427	2008-04-08	11
+429	val_429	2008-04-08	11
+429	val_429	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+432	val_432	2008-04-08	11
+435	val_435	2008-04-08	11
+436	val_436	2008-04-08	11
+437	val_437	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+439	val_439	2008-04-08	11
+439	val_439	2008-04-08	11
+443	val_443	2008-04-08	11
+444	val_444	2008-04-08	11
+446	val_446	2008-04-08	11
+448	val_448	2008-04-08	11
+449	val_449	2008-04-08	11
+452	val_452	2008-04-08	11
+453	val_453	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+455	val_455	2008-04-08	11
+457	val_457	2008-04-08	11
+458	val_458	2008-04-08	11
+458	val_458	2008-04-08	11
+459	val_459	2008-04-08	11
+459	val_459	2008-04-08	11
+460	val_460	2008-04-08	11
+462	val_462	2008-04-08	11
+462	val_462	2008-04-08	11
+463	val_463	2008-04-08	11
+463	val_463	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+467	val_467	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+470	val_470	2008-04-08	11
+472	val_472	2008-04-08	11
+475	val_475	2008-04-08	11
+477	val_477	2008-04-08	11
+478	val_478	2008-04-08	11
+478	val_478	2008-04-08	11
+479	val_479	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+481	val_481	2008-04-08	11
+482	val_482	2008-04-08	11
+483	val_483	2008-04-08	11
+484	val_484	2008-04-08	11
+485	val_485	2008-04-08	11
+487	val_487	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+490	val_490	2008-04-08	11
+491	val_491	2008-04-08	11
+492	val_492	2008-04-08	11
+492	val_492	2008-04-08	11
+493	val_493	2008-04-08	11
+494	val_494	2008-04-08	11
+495	val_495	2008-04-08	11
+496	val_496	2008-04-08	11
+497	val_497	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+2	val_2	2008-04-08	12
+4	val_4	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+8	val_8	2008-04-08	12
+9	val_9	2008-04-08	12
+10	val_10	2008-04-08	12
+11	val_11	2008-04-08	12
+12	val_12	2008-04-08	12
+12	val_12	2008-04-08	12
+15	val_15	2008-04-08	12
+15	val_15	2008-04-08	12
+17	val_17	2008-04-08	12
+18	val_18	2008-04-08	12
+18	val_18	2008-04-08	12
+19	val_19	2008-04-08	12
+20	val_20	2008-04-08	12
+24	val_24	2008-04-08	12
+24	val_24	2008-04-08	12
+26	val_26	2008-04-08	12
+26	val_26	2008-04-08	12
+27	val_27	2008-04-08	12
+28	val_28	2008-04-08	12
+30	val_30	2008-04-08	12
+33	val_33	2008-04-08	12
+34	val_34	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+37	val_37	2008-04-08	12
+37	val_37	2008-04-08	12
+41	val_41	2008-04-08	12
+42	val_42	2008-04-08	12
+42	val_42	2008-04-08	12
+43	val_43	2008-04-08	12
+44	val_44	2008-04-08	12
+47	val_47	2008-04-08	12
+51	val_51	2008-04-08	12
+51	val_51	2008-04-08	12
+53	val_53	2008-04-08	12
+54	val_54	2008-04-08	12
+57	val_57	2008-04-08	12
+58	val_58	2008-04-08	12
+58	val_58	2008-04-08	12
+64	val_64	2008-04-08	12
+65	val_65	2008-04-08	12
+66	val_66	2008-04-08	12
+67	val_67	2008-04-08	12
+67	val_67	2008-04-08	12
+69	val_69	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+72	val_72	2008-04-08	12
+72	val_72	2008-04-08	12
+74	val_74	2008-04-08	12
+76	val_76	2008-04-08	12
+76	val_76	2008-04-08	12
+77	val_77	2008-04-08	12
+78	val_78	2008-04-08	12
+80	val_80	2008-04-08	12
+82	val_82	2008-04-08	12
+83	val_83	2008-04-08	12
+83	val_83	2008-04-08	12
+84	val_84	2008-04-08	12
+84	val_84	2008-04-08	12
+85	val_85	2008-04-08	12
+86	val_86	2008-04-08	12
+87	val_87	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+92	val_92	2008-04-08	12
+95	val_95	2008-04-08	12
+95	val_95	2008-04-08	12
+96	val_96	2008-04-08	12
+97	val_97	2008-04-08	12
+97	val_97	2008-04-08	12
+98	val_98	2008-04-08	12
+98	val_98	2008-04-08	12
+100	val_100	2008-04-08	12
+100	val_100	2008-04-08	12
+103	val_103	2008-04-08	12
+103	val_103	2008-04-08	12
+104	val_104	2008-04-08	12
+104	val_104	2008-04-08	12
+105	val_105	2008-04-08	12
+111	val_111	2008-04-08	12
+113	val_113	2008-04-08	12
+113	val_113	2008-04-08	12
+114	val_114	2008-04-08	12
+116	val_116	2008-04-08	12
+118	val_118	2008-04-08	12
+118	val_118	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+120	val_120	2008-04-08	12
+120	val_120	2008-04-08	12
+125	val_125	2008-04-08	12
+125	val_125	2008-04-08	12
+126	val_126	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+129	val_129	2008-04-08	12
+129	val_129	2008-04-08	12
+131	val_131	2008-04-08	12
+133	val_133	2008-04-08	12
+134	val_134	2008-04-08	12
+134	val_134	2008-04-08	12
+136	val_136	2008-04-08	12
+137	val_137	2008-04-08	12
+137	val_137	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+143	val_143	2008-04-08	12
+145	val_145	2008-04-08	12
+146	val_146	2008-04-08	12
+146	val_146	2008-04-08	12
+149	val_149	2008-04-08	12
+149	val_149	2008-04-08	12
+150	val_150	2008-04-08	12
+152	val_152	2008-04-08	12
+152	val_152	2008-04-08	12
+153	val_153	2008-04-08	12
+155	val_155	2008-04-08	12
+156	val_156	2008-04-08	12
+157	val_157	2008-04-08	12
+158	val_158	2008-04-08	12
+160	val_160	2008-04-08	12
+162	val_162	2008-04-08	12
+163	val_163	2008-04-08	12
+164	val_164	2008-04-08	12
+164	val_164	2008-04-08	12
+165	val_165	2008-04-08	12
+165	val_165	2008-04-08	12
+166	val_166	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+168	val_168	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+170	val_170	2008-04-08	12
+172	val_172	2008-04-08	12
+172	val_172	2008-04-08	12
+174	val_174	2008-04-08	12
+174	val_174	2008-04-08	12
+175	val_175	2008-04-08	12
+175	val_175	2008-04-08	12
+176	val_176	2008-04-08	12
+176	val_176	2008-04-08	12
+177	val_177	2008-04-08	12
+178	val_178	2008-04-08	12
+179	val_179	2008-04-08	12
+179	val_179	2008-04-08	12
+180	val_180	2008-04-08	12
+181	val_181	2008-04-08	12
+183	val_183	2008-04-08	12
+186	val_186	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+189	val_189	2008-04-08	12
+190	val_190	2008-04-08	12
+191	val_191	2008-04-08	12
+191	val_191	2008-04-08	12
+192	val_192	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+194	val_194	2008-04-08	12
+195	val_195	2008-04-08	12
+195	val_195	2008-04-08	12
+196	val_196	2008-04-08	12
+197	val_197	2008-04-08	12
+197	val_197	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+200	val_200	2008-04-08	12
+200	val_200	2008-04-08	12
+201	val_201	2008-04-08	12
+202	val_202	2008-04-08	12
+203	val_203	2008-04-08	12
+203	val_203	2008-04-08	12
+205	val_205	2008-04-08	12
+205	val_205	2008-04-08	12
+207	val_207	2008-04-08	12
+207	val_207	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+209	val_209	2008-04-08	12
+209	val_209	2008-04-08	12
+213	val_213	2008-04-08	12
+213	val_213	2008-04-08	12
+214	val_214	2008-04-08	12
+216	val_216	2008-04-08	12
+216	val_216	2008-04-08	12
+217	val_217	2008-04-08	12
+217	val_217	2008-04-08	12
+218	val_218	2008-04-08	12
+219	val_219	2008-04-08	12
+219	val_219	2008-04-08	12
+221	val_221	2008-04-08	12
+221	val_221	2008-04-08	12
+222	val_222	2008-04-08	12
+223	val_223	2008-04-08	12
+223	val_223	2008-04-08	12
+224	val_224	2008-04-08	12
+224	val_224	2008-04-08	12
+226	val_226	2008-04-08	12
+228	val_228	2008-04-08	12
+229	val_229	2008-04-08	12
+229	val_229	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+233	val_233	2008-04-08	12
+233	val_233	2008-04-08	12
+235	val_235	2008-04-08	12
+237	val_237	2008-04-08	12
+237	val_237	2008-04-08	12
+238	val_238	2008-04-08	12
+238	val_238	2008-04-08	12
+239	val_239	2008-04-08	12
+239	val_239	2008-04-08	12
+241	val_241	2008-04-08	12
+242	val_242	2008-04-08	12
+242	val_242	2008-04-08	12
+244	val_244	2008-04-08	12
+247	val_247	2008-04-08	12
+248	val_248	2008-04-08	12
+249	val_249	2008-04-08	12
+252	val_252	2008-04-08	12
+255	val_255	2008-04-08	12
+255	val_255	2008-04-08	12
+256	val_256	2008-04-08	12
+256	val_256	2008-04-08	12
+257	val_257	2008-04-08	12
+258	val_258	2008-04-08	12
+260	val_260	2008-04-08	12
+262	val_262	2008-04-08	12
+263	val_263	2008-04-08	12
+265	val_265	2008-04-08	12
+265	val_265	2008-04-08	12
+266	val_266	2008-04-08	12
+272	val_272	2008-04-08	12
+272	val_272	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+274	val_274	2008-04-08	12
+275	val_275	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+278	val_278	2008-04-08	12
+278	val_278	2008-04-08	12
+280	val_280	2008-04-08	12
+280	val_280	2008-04-08	12
+281	val_281	2008-04-08	12
+281	val_281	2008-04-08	12
+282	val_282	2008-04-08	12
+282	val_282	2008-04-08	12
+283	val_283	2008-04-08	12
+284	val_284	2008-04-08	12
+285	val_285	2008-04-08	12
+286	val_286	2008-04-08	12
+287	val_287	2008-04-08	12
+288	val_288	2008-04-08	12
+288	val_288	2008-04-08	12
+289	val_289	2008-04-08	12
+291	val_291	2008-04-08	12
+292	val_292	2008-04-08	12
+296	val_296	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+302	val_302	2008-04-08	12
+305	val_305	2008-04-08	12
+306	val_306	2008-04-08	12
+307	val_307	2008-04-08	12
+307	val_307	2008-04-08	12
+308	val_308	2008-04-08	12
+309	val_309	2008-04-08	12
+309	val_309	2008-04-08	12
+310	val_310	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+315	val_315	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+317	val_317	2008-04-08	12
+317	val_317	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+321	val_321	2008-04-08	12
+321	val_321	2008-04-08	12
+322	val_322	2008-04-08	12
+322	val_322	2008-04-08	12
+323	val_323	2008-04-08	12
+325	val_325	2008-04-08	12
+325	val_325	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+331	val_331	2008-04-08	12
+331	val_331	2008-04-08	12
+332	val_332	2008-04-08	12
+333	val_333	2008-04-08	12
+333	val_333	2008-04-08	12
+335	val_335	2008-04-08	12
+336	val_336	2008-04-08	12
+338	val_338	2008-04-08	12
+339	val_339	2008-04-08	12
+341	val_341	2008-04-08	12
+342	val_342	2008-04-08	12
+342	val_342	2008-04-08	12
+344	val_344	2008-04-08	12
+344	val_344	2008-04-08	12
+345	val_345	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+351	val_351	2008-04-08	12
+353	val_353	2008-04-08	12
+353	val_353	2008-04-08	12
+356	val_356	2008-04-08	12
+360	val_360	2008-04-08	12
+362	val_362	2008-04-08	12
+364	val_364	2008-04-08	12
+365	val_365	2008-04-08	12
+366	val_366	2008-04-08	12
+367	val_367	2008-04-08	12
+367	val_367	2008-04-08	12
+368	val_368	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+373	val_373	2008-04-08	12
+374	val_374	2008-04-08	12
+375	val_375	2008-04-08	12
+377	val_377	2008-04-08	12
+378	val_378	2008-04-08	12
+379	val_379	2008-04-08	12
+382	val_382	2008-04-08	12
+382	val_382	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+386	val_386	2008-04-08	12
+389	val_389	2008-04-08	12
+392	val_392	2008-04-08	12
+393	val_393	2008-04-08	12
+394	val_394	2008-04-08	12
+395	val_395	2008-04-08	12
+395	val_395	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+397	val_397	2008-04-08	12
+397	val_397	2008-04-08	12
+399	val_399	2008-04-08	12
+399	val_399	2008-04-08	12
+400	val_400	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+402	val_402	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+404	val_404	2008-04-08	12
+404	val_404	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+407	val_407	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+411	val_411	2008-04-08	12
+413	val_413	2008-04-08	12
+413	val_413	2008-04-08	12
+414	val_414	2008-04-08	12
+414	val_414	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+418	val_418	2008-04-08	12
+419	val_419	2008-04-08	12
+421	val_421	2008-04-08	12
+424	val_424	2008-04-08	12
+424	val_424	2008-04-08	12
+427	val_427	2008-04-08	12
+429	val_429	2008-04-08	12
+429	val_429	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+432	val_432	2008-04-08	12
+435	val_435	2008-04-08	12
+436	val_436	2008-04-08	12
+437	val_437	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+439	val_439	2008-04-08	12
+439	val_439	2008-04-08	12
+443	val_443	2008-04-08	12
+444	val_444	2008-04-08	12
+446	val_446	2008-04-08	12
+448	val_448	2008-04-08	12
+449	val_449	2008-04-08	12
+452	val_452	2008-04-08	12
+453	val_453	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+455	val_455	2008-04-08	12
+457	val_457	2008-04-08	12
+458	val_458	2008-04-08	12
+458	val_458	2008-04-08	12
+459	val_459	2008-04-08	12
+459	val_459	2008-04-08	12
+460	val_460	2008-04-08	12
+462	val_462	2008-04-08	12
+462	val_462	2008-04-08	12
+463	val_463	2008-04-08	12
+463	val_463	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+467	val_467	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+470	val_470	2008-04-08	12
+472	val_472	2008-04-08	12
+475	val_475	2008-04-08	12
+477	val_477	2008-04-08	12
+478	val_478	2008-04-08	12
+478	val_478	2008-04-08	12
+479	val_479	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+481	val_481	2008-04-08	12
+482	val_482	2008-04-08	12
+483	val_483	2008-04-08	12
+484	val_484	2008-04-08	12
+485	val_485	2008-04-08	12
+487	val_487	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+490	val_490	2008-04-08	12
+491	val_491	2008-04-08	12
+492	val_492	2008-04-08	12
+492	val_492	2008-04-08	12
+493	val_493	2008-04-08	12
+494	val_494	2008-04-08	12
+495	val_495	2008-04-08	12
+496	val_496	2008-04-08	12
+497	val_497	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+0	val_0	2008-04-09	11
+0	val_0	2008-04-09	11
+0	val_0	2008-04-09	11
+2	val_2	2008-04-09	11
+4	val_4	2008-04-09	11
+5	val_5	2008-04-09	11
+5	val_5	2008-04-09	11
+5	val_5	2008-04-09	11
+8	val_8	2008-04-09	11
+9	val_9	2008-04-09	11
+10	val_10	2008-04-09	11
+11	val_11	2008-04-09	11
+12	val_12	2008-04-09	11
+12	val_12	2008-04-09	11
+15	val_15	2008-04-09	11
+15	val_15	2008-04-09	11
+17	val_17	2008-04-09	11
+18	val_18	2008-04-09	11
+18	val_18	2008-04-09	11
+19	val_19	2008-04-09	11
+20	val_20	2008-04-09	11
+24	val_24	2008-04-09	11
+24	val_24	2008-04-09	11
+26	val_26	2008-04-09	11
+26	val_26	2008-04-09	11
+27	val_27	2008-04-09	11
+28	val_28	2008-04-09	11
+30	val_30	2008-04-09	11
+33	val_33	2008-04-09	11
+34	val_34	2008-04-09	11
+35	val_35	2008-04-09	11
+35	val_35	2008-04-09	11
+35	val_35	2008-04-09	11
+37	val_37	2008-04-09	11
+37	val_37	2008-04-09	11
+41	val_41	2008-04-09	11
+42	val_42	2008-04-09	11
+42	val_42	2008-04-09	11
+43	val_43	2008-04-09	11
+44	val_44	2008-04-09	11
+47	val_47	2008-04-09	11
+51	val_51	2008-04-09	11
+51	val_51	2008-04-09	11
+53	val_53	2008-04-09	11
+54	val_54	2008-04-09	11
+57	val_57	2008-04-09	11
+58	val_58	2008-04-09	11
+58	val_58	2008-04-09	11
+64	val_64	2008-04-09	11
+65	val_65	2008-04-09	11
+66	val_66	2008-04-09	11
+67	val_67	2008-04-09	11
+67	val_67	2008-04-09	11
+69	val_69	2008-04-09	11
+70	val_70	2008-04-09	11
+70	val_70	2008-04-09	11
+70	val_70	2008-04-09	11
+72	val_72	2008-04-09	11
+72	val_72	2008-04-09	11
+74	val_74	2008-04-09	11
+76	val_76	2008-04-09	11
+76	val_76	2008-04-09	11
+77	val_77	2008-04-09	11
+78	val_78	2008-04-09	11
+80	val_80	2008-04-09	11
+82	val_82	2008-04-09	11
+83	val_83	2008-04-09	11
+83	val_83	2008-04-09	11
+84	val_84	2008-04-09	11
+84	val_84	2008-04-09	11
+85	val_85	2008-04-09	11
+86	val_86	2008-04-09	11
+87	val_87	2008-04-09	11
+90	val_90	2008-04-09	11
+90	val_90	2008-04-09	11
+90	val_90	2008-04-09	11
+92	val_92	2008-04-09	11
+95	val_95	2008-04-09	11
+95	val_95	2008-04-09	11
+96	val_96	2008-04-09	11
+97	val_97	2008-04-09	11
+97	val_97	2008-04-09	11
+98	val_98	2008-04-09	11
+98	val_98	2008-04-09	11
+100	val_100	2008-04-09	11
+100	val_100	2008-04-09	11
+103	val_103	2008-04-09	11
+103	val_103	2008-04-09	11
+104	val_104	2008-04-09	11
+104	val_104	2008-04-09	11
+105	val_105	2008-04-09	11
+111	val_111	2008-04-09	11
+113	val_113	2008-04-09	11
+113	val_113	2008-04-09	11
+114	val_114	2008-04-09	11
+116	val_116	2008-04-09	11
+118	val_118	2008-04-09	11
+118	val_118	2008-04-09	11
+119	val_119	2008-04-09	11
+119	val_119	2008-04-09	11
+119	val_119	2008-04-09	11
+120	val_120	2008-04-09	11
+120	val_120	2008-04-09	11
+125	val_125	2008-04-09	11
+125	val_125	2008-04-09	11
+126	val_126	2008-04-09	11
+128	val_128	2008-04-09	11
+128	val_128	2008-04-09	11
+128	val_128	2008-04-09	11
+129	val_129	2008-04-09	11
+129	val_129	2008-04-09	11
+131	val_131	2008-04-09	11
+133	val_133	2008-04-09	11
+134	val_134	2008-04-09	11
+134	val_134	2008-04-09	11
+136	val_136	2008-04-09	11
+137	val_137	2008-04-09	11
+137	val_137	2008-04-09	11
+138	val_138	2008-04-09	11
+138	val_138	2008-04-09	11
+138	val_138	2008-04-09	11
+138	val_138	2008-04-09	11
+143	val_143	2008-04-09	11
+145	val_145	2008-04-09	11
+146	val_146	2008-04-09	11
+146	val_146	2008-04-09	11
+149	val_149	2008-04-09	11
+149	val_149	2008-04-09	11
+150	val_150	2008-04-09	11
+152	val_152	2008-04-09	11
+152	val_152	2008-04-09	11
+153	val_153	2008-04-09	11
+155	val_155	2008-04-09	11
+156	val_156	2008-04-09	11
+157	val_157	2008-04-09	11
+158	val_158	2008-04-09	11
+160	val_160	2008-04-09	11
+162	val_162	2008-04-09	11
+163	val_163	2008-04-09	11
+164	val_164	2008-04-09	11
+164	val_164	2008-04-09	11
+165	val_165	2008-04-09	11
+165	val_165	2008-04-09	11
+166	val_166	2008-04-09	11
+167	val_167	2008-04-09	11
+167	val_167	2008-04-09	11
+167	val_167	2008-04-09	11
+168	val_168	2008-04-09	11
+169	val_169	2008-04-09	11
+169	val_169	2008-04-09	11
+169	val_169	2008-04-09	11
+169	val_169	2008-04-09	11
+170	val_170	2008-04-09	11
+172	val_172	2008-04-09	11
+172	val_172	2008-04-09	11
+174	val_174	2008-04-09	11
+174	val_174	2008-04-09	11
+175	val_175	2008-04-09	11
+175	val_175	2008-04-09	11
+176	val_176	2008-04-09	11
+176	val_176	2008-04-09	11
+177	val_177	2008-04-09	11
+178	val_178	2008-04-09	11
+179	val_179	2008-04-09	11
+179	val_179	2008-04-09	11
+180	val_180	2008-04-09	11
+181	val_181	2008-04-09	11
+183	val_183	2008-04-09	11
+186	val_186	2008-04-09	11
+187	val_187	2008-04-09	11
+187	val_187	2008-04-09	11
+187	val_187	2008-04-09	11
+189	val_189	2008-04-09	11
+190	val_190	2008-04-09	11
+191	val_191	2008-04-09	11
+191	val_191	2008-04-09	11
+192	val_192	2008-04-09	11
+193	val_193	2008-04-09	11
+193	val_193	2008-04-09	11
+193	val_193	2008-04-09	11
+194	val_194	2008-04-09	11
+195	val_195	2008-04-09	11
+195	val_195	2008-04-09	11
+196	val_196	2008-04-09	11
+197	val_197	2008-04-09	11
+197	val_197	2008-04-09	11
+199	val_199	2008-04-09	11
+199	val_199	2008-04-09	11
+199	val_199	2008-04-09	11
+200	val_200	2008-04-09	11
+200	val_200	2008-04-09	11
+201	val_201	2008-04-09	11
+202	val_202	2008-04-09	11
+203	val_203	2008-04-09	11
+203	val_203	2008-04-09	11
+205	val_205	2008-04-09	11
+205	val_205	2008-04-09	11
+207	val_207	2008-04-09	11
+207	val_207	2008-04-09	11
+208	val_208	2008-04-09	11
+208	val_208	2008-04-09	11
+208	val_208	2008-04-09	11
+209	val_209	2008-04-09	11
+209	val_209	2008-04-09	11
+213	val_213	2008-04-09	11
+213	val_213	2008-04-09	11
+214	val_214	2008-04-09	11
+216	val_216	2008-04-09	11
+216	val_216	2008-04-09	11
+217	val_217	2008-04-09	11
+217	val_217	2008-04-09	11
+218	val_218	2008-04-09	11
+219	val_219	2008-04-09	11
+219	val_219	2008-04-09	11
+221	val_221	2008-04-09	11
+221	val_221	2008-04-09	11
+222	val_222	2008-04-09	11
+223	val_223	2008-04-09	11
+223	val_223	2008-04-09	11
+224	val_224	2008-04-09	11
+224	val_224	2008-04-09	11
+226	val_226	2008-04-09	11
+228	val_228	2008-04-09	11
+229	val_229	2008-04-09	11
+229	val_229	2008-04-09	11
+230	val_230	2008-04-09	11
+230	val_230	2008-04-09	11
+230	val_230	2008-04-09	11
+230	val_230	2008-04-09	11
+230	val_230	2008-04-09	11
+233	val_233	2008-04-09	11
+233	val_233	2008-04-09	11
+235	val_235	2008-04-09	11
+237	val_237	2008-04-09	11
+237	val_237	2008-04-09	11
+238	val_238	2008-04-09	11
+238	val_238	2008-04-09	11
+239	val_239	2008-04-09	11
+239	val_239	2008-04-09	11
+241	val_241	2008-04-09	11
+242	val_242	2008-04-09	11
+242	val_242	2008-04-09	11
+244	val_244	2008-04-09	11
+247	val_247	2008-04-09	11
+248	val_248	2008-04-09	11
+249	val_249	2008-04-09	11
+252	val_252	2008-04-09	11
+255	val_255	2008-04-09	11
+255	val_255	2008-04-09	11
+256	val_256	2008-04-09	11
+256	val_256	2008-04-09	11
+257	val_257	2008-04-09	11
+258	val_258	2008-04-09	11
+260	val_260	2008-04-09	11
+262	val_262	2008-04-09	11
+263	val_263	2008-04-09	11
+265	val_265	2008-04-09	11
+265	val_265	2008-04-09	11
+266	val_266	2008-04-09	11
+272	val_272	2008-04-09	11
+272	val_272	2008-04-09	11
+273	val_273	2008-04-09	11
+273	val_273	2008-04-09	11
+273	val_273	2008-04-09	11
+274	val_274	2008-04-09	11
+275	val_275	2008-04-09	11
+277	val_277	2008-04-09	11
+277	val_277	2008-04-09	11
+277	val_277	2008-04-09	11
+277	val_277	2008-04-09	11
+278	val_278	2008-04-09	11
+278	val_278	2008-04-09	11
+280	val_280	2008-04-09	11
+280	val_280	2008-04-09	11
+281	val_281	2008-04-09	11
+281	val_281	2008-04-09	11
+282	val_282	2008-04-09	11
+282	val_282	2008-04-09	11
+283	val_283	2008-04-09	11
+284	val_284	2008-04-09	11
+285	val_285	2008-04-09	11
+286	val_286	2008-04-09	11
+287	val_287	2008-04-09	11
+288	val_288	2008-04-09	11
+288	val_288	2008-04-09	11
+289	val_289	2008-04-09	11
+291	val_291	2008-04-09	11
+292	val_292	2008-04-09	11
+296	val_296	2008-04-09	11
+298	val_298	2008-04-09	11
+298	val_298	2008-04-09	11
+298	val_298	2008-04-09	11
+302	val_302	2008-04-09	11
+305	val_305	2008-04-09	11
+306	val_306	2008-04-09	11
+307	val_307	2008-04-09	11
+307	val_307	2008-04-09	11
+308	val_308	2008-04-09	11
+309	val_309	2008-04-09	11
+309	val_309	2008-04-09	11
+310	val_310	2008-04-09	11
+311	val_311	2008-04-09	11
+311	val_311	2008-04-09	11
+311	val_311	2008-04-09	11
+315	val_315	2008-04-09	11
+316	val_316	2008-04-09	11
+316	val_316	2008-04-09	11
+316	val_316	2008-04-09	11
+317	val_317	2008-04-09	11
+317	val_317	2008-04-09	11
+318	val_318	2008-04-09	11
+318	val_318	2008-04-09	11
+318	val_318	2008-04-09	11
+321	val_321	2008-04-09	11
+321	val_321	2008-04-09	11
+322	val_322	2008-04-09	11
+322	val_322	2008-04-09	11
+323	val_323	2008-04-09	11
+325	val_325	2008-04-09	11
+325	val_325	2008-04-09	11
+327	val_327	2008-04-09	11
+327	val_327	2008-04-09	11
+327	val_327	2008-04-09	11
+331	val_331	2008-04-09	11
+331	val_331	2008-04-09	11
+332	val_332	2008-04-09	11
+333	val_333	2008-04-09	11
+333	val_333	2008-04-09	11
+335	val_335	2008-04-09	11
+336	val_336	2008-04-09	11
+338	val_338	2008-04-09	11
+339	val_339	2008-04-09	11
+341	val_341	2008-04-09	11
+342	val_342	2008-04-09	11
+342	val_342	2008-04-09	11
+344	val_344	2008-04-09	11
+344	val_344	2008-04-09	11
+345	val_345	2008-04-09	11
+348	val_348	2008-04-09	11
+348	val_348	2008-04-09	11
+348	val_348	2008-04-09	11
+348	val_348	2008-04-09	11
+348	val_348	2008-04-09	11
+351	val_351	2008-04-09	11
+353	val_353	2008-04-09	11
+353	val_353	2008-04-09	11
+356	val_356	2008-04-09	11
+360	val_360	2008-04-09	11
+362	val_362	2008-04-09	11
+364	val_364	2008-04-09	11
+365	val_365	2008-04-09	11
+366	val_366	2008-04-09	11
+367	val_367	2008-04-09	11
+367	val_367	2008-04-09	11
+368	val_368	2008-04-09	11
+369	val_369	2008-04-09	11
+369	val_369	2008-04-09	11
+369	val_369	2008-04-09	11
+373	val_373	2008-04-09	11
+374	val_374	2008-04-09	11
+375	val_375	2008-04-09	11
+377	val_377	2008-04-09	11
+378	val_378	2008-04-09	11
+379	val_379	2008-04-09	11
+382	val_382	2008-04-09	11
+382	val_382	2008-04-09	11
+384	val_384	2008-04-09	11
+384	val_384	2008-04-09	11
+384	val_384	2008-04-09	11
+386	val_386	2008-04-09	11
+389	val_389	2008-04-09	11
+392	val_392	2008-04-09	11
+393	val_393	2008-04-09	11
+394	val_394	2008-04-09	11
+395	val_395	2008-04-09	11
+395	val_395	2008-04-09	11
+396	val_396	2008-04-09	11
+396	val_396	2008-04-09	11
+396	val_396	2008-04-09	11
+397	val_397	2008-04-09	11
+397	val_397	2008-04-09	11
+399	val_399	2008-04-09	11
+399	val_399	2008-04-09	11
+400	val_400	2008-04-09	11
+401	val_401	2008-04-09	11
+401	val_401	2008-04-09	11
+401	val_401	2008-04-09	11
+401	val_401	2008-04-09	11
+401	val_401	2008-04-09	11
+402	val_402	2008-04-09	11
+403	val_403	2008-04-09	11
+403	val_403	2008-04-09	11
+403	val_403	2008-04-09	11
+404	val_404	2008-04-09	11
+404	val_404	2008-04-09	11
+406	val_406	2008-04-09	11
+406	val_406	2008-04-09	11
+406	val_406	2008-04-09	11
+406	val_406	2008-04-09	11
+407	val_407	2008-04-09	11
+409	val_409	2008-04-09	11
+409	val_409	2008-04-09	11
+409	val_409	2008-04-09	11
+411	val_411	2008-04-09	11
+413	val_413	2008-04-09	11
+413	val_413	2008-04-09	11
+414	val_414	2008-04-09	11
+414	val_414	2008-04-09	11
+417	val_417	2008-04-09	11
+417	val_417	2008-04-09	11
+417	val_417	2008-04-09	11
+418	val_418	2008-04-09	11
+419	val_419	2008-04-09	11
+421	val_421	2008-04-09	11
+424	val_424	2008-04-09	11
+424	val_424	2008-04-09	11
+427	val_427	2008-04-09	11
+429	val_429	2008-04-09	11
+429	val_429	2008-04-09	11
+430	val_430	2008-04-09	11
+430	val_430	2008-04-09	11
+430	val_430	2008-04-09	11
+431	val_431	2008-04-09	11
+431	val_431	2008-04-09	11
+431	val_431	2008-04-09	11
+432	val_432	2008-04-09	11
+435	val_435	2008-04-09	11
+436	val_436	2008-04-09	11
+437	val_437	2008-04-09	11
+438	val_438	2008-04-09	11
+438	val_438	2008-04-09	11
+438	val_438	2008-04-09	11
+439	val_439	2008-04-09	11
+439	val_439	2008-04-09	11
+443	val_443	2008-04-09	11
+444	val_444	2008-04-09	11
+446	val_446	2008-04-09	11
+448	val_448	2008-04-09	11
+449	val_449	2008-04-09	11
+452	val_452	2008-04-09	11
+453	val_453	2008-04-09	11
+454	val_454	2008-04-09	11
+454	val_454	2008-04-09	11
+454	val_454	2008-04-09	11
+455	val_455	2008-04-09	11
+457	val_457	2008-04-09	11
+458	val_458	2008-04-09	11
+458	val_458	2008-04-09	11
+459	val_459	2008-04-09	11
+459	val_459	2008-04-09	11
+460	val_460	2008-04-09	11
+462	val_462	2008-04-09	11
+462	val_462	2008-04-09	11
+463	val_463	2008-04-09	11
+463	val_463	2008-04-09	11
+466	val_466	2008-04-09	11
+466	val_466	2008-04-09	11
+466	val_466	2008-04-09	11
+467	val_467	2008-04-09	11
+468	val_468	2008-04-09	11
+468	val_468	2008-04-09	11
+468	val_468	2008-04-09	11
+468	val_468	2008-04-09	11
+469	val_469	2008-04-09	11
+469	val_469	2008-04-09	11
+469	val_469	2008-04-09	11
+469	val_469	2008-04-09	11
+469	val_469	2008-04-09	11
+470	val_470	2008-04-09	11
+472	val_472	2008-04-09	11
+475	val_475	2008-04-09	11
+477	val_477	2008-04-09	11
+478	val_478	2008-04-09	11
+478	val_478	2008-04-09	11
+479	val_479	2008-04-09	11
+480	val_480	2008-04-09	11
+480	val_480	2008-04-09	11
+480	val_480	2008-04-09	11
+481	val_481	2008-04-09	11
+482	val_482	2008-04-09	11
+483	val_483	2008-04-09	11
+484	val_484	2008-04-09	11
+485	val_485	2008-04-09	11
+487	val_487	2008-04-09	11
+489	val_489	2008-04-09	11
+489	val_489	2008-04-09	11
+489	val_489	2008-04-09	11
+489	val_489	2008-04-09	11
+490	val_490	2008-04-09	11
+491	val_491	2008-04-09	11
+492	val_492	2008-04-09	11
+492	val_492	2008-04-09	11
+493	val_493	2008-04-09	11
+494	val_494	2008-04-09	11
+495	val_495	2008-04-09	11
+496	val_496	2008-04-09	11
+497	val_497	2008-04-09	11
+498	val_498	2008-04-09	11
+498	val_498	2008-04-09	11
+498	val_498	2008-04-09	11
+0	val_0	2008-04-09	12
+0	val_0	2008-04-09	12
+0	val_0	2008-04-09	12
+2	val_2	2008-04-09	12
+4	val_4	2008-04-09	12
+5	val_5	2008-04-09	12
+5	val_5	2008-04-09	12
+5	val_5	2008-04-09	12
+8	val_8	2008-04-09	12
+9	val_9	2008-04-09	12
+10	val_10	2008-04-09	12
+11	val_11	2008-04-09	12
+12	val_12	2008-04-09	12
+12	val_12	2008-04-09	12
+15	val_15	2008-04-09	12
+15	val_15	2008-04-09	12
+17	val_17	2008-04-09	12
+18	val_18	2008-04-09	12
+18	val_18	2008-04-09	12
+19	val_19	2008-04-09	12
+20	val_20	2008-04-09	12
+24	val_24	2008-04-09	12
+24	val_24	2008-04-09	12
+26	val_26	2008-04-09	12
+26	val_26	2008-04-09	12
+27	val_27	2008-04-09	12
+28	val_28	2008-04-09	12
+30	val_30	2008-04-09	12
+33	val_33	2008-04-09	12
+34	val_34	2008-04-09	12
+35	val_35	2008-04-09	12
+35	val_35	2008-04-09	12
+35	val_35	2008-04-09	12
+37	val_37	2008-04-09	12
+37	val_37	2008-04-09	12
+41	val_41	2008-04-09	12
+42	val_42	2008-04-09	12
+42	val_42	2008-04-09	12
+43	val_43	2008-04-09	12
+44	val_44	2008-04-09	12
+47	val_47	2008-04-09	12
+51	val_51	2008-04-09	12
+51	val_51	2008-04-09	12
+53	val_53	2008-04-09	12
+54	val_54	2008-04-09	12
+57	val_57	2008-04-09	12
+58	val_58	2008-04-09	12
+58	val_58	2008-04-09	12
+64	val_64	2008-04-09	12
+65	val_65	2008-04-09	12
+66	val_66	2008-04-09	12
+67	val_67	2008-04-09	12
+67	val_67	2008-04-09	12
+69	val_69	2008-04-09	12
+70	val_70	2008-04-09	12
+70	val_70	2008-04-09	12
+70	val_70	2008-04-09	12
+72	val_72	2008-04-09	12
+72	val_72	2008-04-09	12
+74	val_74	2008-04-09	12
+76	val_76	2008-04-09	12
+76	val_76	2008-04-09	12
+77	val_77	2008-04-09	12
+78	val_78	2008-04-09	12
+80	val_80	2008-04-09	12
+82	val_82	2008-04-09	12
+83	val_83	2008-04-09	12
+83	val_83	2008-04-09	12
+84	val_84	2008-04-09	12
+84	val_84	2008-04-09	12
+85	val_85	2008-04-09	12
+86	val_86	2008-04-09	12
+87	val_87	2008-04-09	12
+90	val_90	2008-04-09	12
+90	val_90	2008-04-09	12
+90	val_90	2008-04-09	12
+92	val_92	2008-04-09	12
+95	val_95	2008-04-09	12
+95	val_95	2008-04-09	12
+96	val_96	2008-04-09	12
+97	val_97	2008-04-09	12
+97	val_97	2008-04-09	12
+98	val_98	2008-04-09	12
+98	val_98	2008-04-09	12
+100	val_100	2008-04-09	12
+100	val_100	2008-04-09	12
+103	val_103	2008-04-09	12
+103	val_103	2008-04-09	12
+104	val_104	2008-04-09	12
+104	val_104	2008-04-09	12
+105	val_105	2008-04-09	12
+111	val_111	2008-04-09	12
+113	val_113	2008-04-09	12
+113	val_113	2008-04-09	12
+114	val_114	2008-04-09	12
+116	val_116	2008-04-09	12
+118	val_118	2008-04-09	12
+118	val_118	2008-04-09	12
+119	val_119	2008-04-09	12
+119	val_119	2008-04-09	12
+119	val_119	2008-04-09	12
+120	val_120	2008-04-09	12
+120	val_120	2008-04-09	12
+125	val_125	2008-04-09	12
+125	val_125	2008-04-09	12
+126	val_126	2008-04-09	12
+128	val_128	2008-04-09	12
+128	val_128	2008-04-09	12
+128	val_128	2008-04-09	12
+129	val_129	2008-04-09	12
+129	val_129	2008-04-09	12
+131	val_131	2008-04-09	12
+133	val_133	2008-04-09	12
+134	val_134	2008-04-09	12
+134	val_134	2008-04-09	12
+136	val_136	2008-04-09	12
+137	val_137	2008-04-09	12
+137	val_137	2008-04-09	12
+138	val_138	2008-04-09	12
+138	val_138	2008-04-09	12
+138	val_138	2008-04-09	12
+138	val_138	2008-04-09	12
+143	val_143	2008-04-09	12
+145	val_145	2008-04-09	12
+146	val_146	2008-04-09	12
+146	val_146	2008-04-09	12
+149	val_149	2008-04-09	12
+149	val_149	2008-04-09	12
+150	val_150	2008-04-09	12
+152	val_152	2008-04-09	12
+152	val_152	2008-04-09	12
+153	val_153	2008-04-09	12
+155	val_155	2008-04-09	12
+156	val_156	2008-04-09	12
+157	val_157	2008-04-09	12
+158	val_158	2008-04-09	12
+160	val_160	2008-04-09	12
+162	val_162	2008-04-09	12
+163	val_163	2008-04-09	12
+164	val_164	2008-04-09	12
+164	val_164	2008-04-09	12
+165	val_165	2008-04-09	12
+165	val_165	2008-04-09	12
+166	val_166	2008-04-09	12
+167	val_167	2008-04-09	12
+167	val_167	2008-04-09	12
+167	val_167	2008-04-09	12
+168	val_168	2008-04-09	12
+169	val_169	2008-04-09	12
+169	val_169	2008-04-09	12
+169	val_169	2008-04-09	12
+169	val_169	2008-04-09	12
+170	val_170	2008-04-09	12
+172	val_172	2008-04-09	12
+172	val_172	2008-04-09	12
+174	val_174	2008-04-09	12
+174	val_174	2008-04-09	12
+175	val_175	2008-04-09	12
+175	val_175	2008-04-09	12
+176	val_176	2008-04-09	12
+176	val_176	2008-04-09	12
+177	val_177	2008-04-09	12
+178	val_178	2008-04-09	12
+179	val_179	2008-04-09	12
+179	val_179	2008-04-09	12
+180	val_180	2008-04-09	12
+181	val_181	2008-04-09	12
+183	val_183	2008-04-09	12
+186	val_186	2008-04-09	12
+187	val_187	2008-04-09	12
+187	val_187	2008-04-09	12
+187	val_187	2008-04-09	12
+189	val_189	2008-04-09	12
+190	val_190	2008-04-09	12
+191	val_191	2008-04-09	12
+191	val_191	2008-04-09	12
+192	val_192	2008-04-09	12
+193	val_193	2008-04-09	12
+193	val_193	2008-04-09	12
+193	val_193	2008-04-09	12
+194	val_194	2008-04-09	12
+195	val_195	2008-04-09	12
+195	val_195	2008-04-09	12
+196	val_196	2008-04-09	12
+197	val_197	2008-04-09	12
+197	val_197	2008-04-09	12
+199	val_199	2008-04-09	12
+199	val_199	2008-04-09	12
+199	val_199	2008-04-09	12
+200	val_200	2008-04-09	12
+200	val_200	2008-04-09	12
+201	val_201	2008-04-09	12
+202	val_202	2008-04-09	12
+203	val_203	2008-04-09	12
+203	val_203	2008-04-09	12
+205	val_205	2008-04-09	12
+205	val_205	2008-04-09	12
+207	val_207	2008-04-09	12
+207	val_207	2008-04-09	12
+208	val_208	2008-04-09	12
+208	val_208	2008-04-09	12
+208	val_208	2008-04-09	12
+209	val_209	2008-04-09	12
+209	val_209	2008-04-09	12
+213	val_213	2008-04-09	12
+213	val_213	2008-04-09	12
+214	val_214	2008-04-09	12
+216	val_216	2008-04-09	12
+216	val_216	2008-04-09	12
+217	val_217	2008-04-09	12
+217	val_217	2008-04-09	12
+218	val_218	2008-04-09	12
+219	val_219	2008-04-09	12
+219	val_219	2008-04-09	12
+221	val_221	2008-04-09	12
+221	val_221	2008-04-09	12
+222	val_222	2008-04-09	12
+223	val_223	2008-04-09	12
+223	val_223	2008-04-09	12
+224	val_224	2008-04-09	12
+224	val_224	2008-04-09	12
+226	val_226	2008-04-09	12
+228	val_228	2008-04-09	12
+229	val_229	2008-04-09	12
+229	val_229	2008-04-09	12
+230	val_230	2008-04-09	12
+230	val_230	2008-04-09	12
+230	val_230	2008-04-09	12
+230	val_230	2008-04-09	12
+230	val_230	2008-04-09	12
+233	val_233	2008-04-09	12
+233	val_233	2008-04-09	12
+235	val_235	2008-04-09	12
+237	val_237	2008-04-09	12
+237	val_237	2008-04-09	12
+238	val_238	2008-04-09	12
+238	val_238	2008-04-09	12
+239	val_239	2008-04-09	12
+239	val_239	2008-04-09	12
+241	val_241	2008-04-09	12
+242	val_242	2008-04-09	12
+242	val_242	2008-04-09	12
+244	val_244	2008-04-09	12
+247	val_247	2008-04-09	12
+248	val_248	2008-04-09	12
+249	val_249	2008-04-09	12
+252	val_252	2008-04-09	12
+255	val_255	2008-04-09	12
+255	val_255	2008-04-09	12
+256	val_256	2008-04-09	12
+256	val_256	2008-04-09	12
+257	val_257	2008-04-09	12
+258	val_258	2008-04-09	12
+260	val_260	2008-04-09	12
+262	val_262	2008-04-09	12
+263	val_263	2008-04-09	12
+265	val_265	2008-04-09	12
+265	val_265	2008-04-09	12
+266	val_266	2008-04-09	12
+272	val_272	2008-04-09	12
+272	val_272	2008-04-09	12
+273	val_273	2008-04-09	12
+273	val_273	2008-04-09	12
+273	val_273	2008-04-09	12
+274	val_274	2008-04-09	12
+275	val_275	2008-04-09	12
+277	val_277	2008-04-09	12
+277	val_277	2008-04-09	12
+277	val_277	2008-04-09	12
+277	val_277	2008-04-09	12
+278	val_278	2008-04-09	12
+278	val_278	2008-04-09	12
+280	val_280	2008-04-09	12
+280	val_280	2008-04-09	12
+281	val_281	2008-04-09	12
+281	val_281	2008-04-09	12
+282	val_282	2008-04-09	12
+282	val_282	2008-04-09	12
+283	val_283	2008-04-09	12
+284	val_284	2008-04-09	12
+285	val_285	2008-04-09	12
+286	val_286	2008-04-09	12
+287	val_287	2008-04-09	12
+288	val_288	2008-04-09	12
+288	val_288	2008-04-09	12
+289	val_289	2008-04-09	12
+291	val_291	2008-04-09	12
+292	val_292	2008-04-09	12
+296	val_296	2008-04-09	12
+298	val_298	2008-04-09	12
+298	val_298	2008-04-09	12
+298	val_298	2008-04-09	12
+302	val_302	2008-04-09	12
+305	val_305	2008-04-09	12
+306	val_306	2008-04-09	12
+307	val_307	2008-04-09	12
+307	val_307	2008-04-09	12
+308	val_308	2008-04-09	12
+309	val_309	2008-04-09	12
+309	val_309	2008-04-09	12
+310	val_310	2008-04-09	12
+311	val_311	2008-04-09	12
+311	val_311	2008-04-09	12
+311	val_311	2008-04-09	12
+315	val_315	2008-04-09	12
+316	val_316	2008-04-09	12
+316	val_316	2008-04-09	12
+316	val_316	2008-04-09	12
+317	val_317	2008-04-09	12
+317	val_317	2008-04-09	12
+318	val_318	2008-04-09	12
+318	val_318	2008-04-09	12
+318	val_318	2008-04-09	12
+321	val_321	2008-04-09	12
+321	val_321	2008-04-09	12
+322	val_322	2008-04-09	12
+322	val_322	2008-04-09	12
+323	val_323	2008-04-09	12
+325	val_325	2008-04-09	12
+325	val_325	2008-04-09	12
+327	val_327	2008-04-09	12
+327	val_327	2008-04-09	12
+327	val_327	2008-04-09	12
+331	val_331	2008-04-09	12
+331	val_331	2008-04-09	12
+332	val_332	2008-04-09	12
+333	val_333	2008-04-09	12
+333	val_333	2008-04-09	12
+335	val_335	2008-04-09	12
+336	val_336	2008-04-09	12
+338	val_338	2008-04-09	12
+339	val_339	2008-04-09	12
+341	val_341	2008-04-09	12
+342	val_342	2008-04-09	12
+342	val_342	2008-04-09	12
+344	val_344	2008-04-09	12
+344	val_344	2008-04-09	12
+345	val_345	2008-04-09	12
+348	val_348	2008-04-09	12
+348	val_348	2008-04-09	12
+348	val_348	2008-04-09	12
+348	val_348	2008-04-09	12
+348	val_348	2008-04-09	12
+351	val_351	2008-04-09	12
+353	val_353	2008-04-09	12
+353	val_353	2008-04-09	12
+356	val_356	2008-04-09	12
+360	val_360	2008-04-09	12
+362	val_362	2008-04-09	12
+364	val_364	2008-04-09	12
+365	val_365	2008-04-09	12
+366	val_366	2008-04-09	12
+367	val_367	2008-04-09	12
+367	val_367	2008-04-09	12
+368	val_368	2008-04-09	12
+369	val_369	2008-04-09	12
+369	val_369	2008-04-09	12
+369	val_369	2008-04-09	12
+373	val_373	2008-04-09	12
+374	val_374	2008-04-09	12
+375	val_375	2008-04-09	12
+377	val_377	2008-04-09	12
+378	val_378	2008-04-09	12
+379	val_379	2008-04-09	12
+382	val_382	2008-04-09	12
+382	val_382	2008-04-09	12
+384	val_384	2008-04-09	12
+384	val_384	2008-04-09	12
+384	val_384	2008-04-09	12
+386	val_386	2008-04-09	12
+389	val_389	2008-04-09	12
+392	val_392	2008-04-09	12
+393	val_393	2008-04-09	12
+394	val_394	2008-04-09	12
+395	val_395	2008-04-09	12
+395	val_395	2008-04-09	12
+396	val_396	2008-04-09	12
+396	val_396	2008-04-09	12
+396	val_396	2008-04-09	12
+397	val_397	2008-04-09	12
+397	val_397	2008-04-09	12
+399	val_399	2008-04-09	12
+399	val_399	2008-04-09	12
+400	val_400	2008-04-09	12
+401	val_401	2008-04-09	12
+401	val_401	2008-04-09	12
+401	val_401	2008-04-09	12
+401	val_401	2008-04-09	12
+401	val_401	2008-04-09	12
+402	val_402	2008-04-09	12
+403	val_403	2008-04-09	12
+403	val_403	2008-04-09	12
+403	val_403	2008-04-09	12
+404	val_404	2008-04-09	12
+404	val_404	2008-04-09	12
+406	val_406	2008-04-09	12
+406	val_406	2008-04-09	12
+406	val_406	2008-04-09	12
+406	val_406	2008-04-09	12
+407	val_407	2008-04-09	12
+409	val_409	2008-04-09	12
+409	val_409	2008-04-09	12
+409	val_409	2008-04-09	12
+411	val_411	2008-04-09	12
+413	val_413	2008-04-09	12
+413	val_413	2008-04-09	12
+414	val_414	2008-04-09	12
+414	val_414	2008-04-09	12
+417	val_417	2008-04-09	12
+417	val_417	2008-04-09	12
+417	val_417	2008-04-09	12
+418	val_418	2008-04-09	12
+419	val_419	2008-04-09	12
+421	val_421	2008-04-09	12
+424	val_424	2008-04-09	12
+424	val_424	2008-04-09	12
+427	val_427	2008-04-09	12
+429	val_429	2008-04-09	12
+429	val_429	2008-04-09	12
+430	val_430	2008-04-09	12
+430	val_430	2008-04-09	12
+430	val_430	2008-04-09	12
+431	val_431	2008-04-09	12
+431	val_431	2008-04-09	12
+431	val_431	2008-04-09	12
+432	val_432	2008-04-09	12
+435	val_435	2008-04-09	12
+436	val_436	2008-04-09	12
+437	val_437	2008-04-09	12
+438	val_438	2008-04-09	12
+438	val_438	2008-04-09	12
+438	val_438	2008-04-09	12
+439	val_439	2008-04-09	12
+439	val_439	2008-04-09	12
+443	val_443	2008-04-09	12
+444	val_444	2008-04-09	12
+446	val_446	2008-04-09	12
+448	val_448	2008-04-09	12
+449	val_449	2008-04-09	12
+452	val_452	2008-04-09	12
+453	val_453	2008-04-09	12
+454	val_454	2008-04-09	12
+454	val_454	2008-04-09	12
+454	val_454	2008-04-09	12
+455	val_455	2008-04-09	12
+457	val_457	2008-04-09	12
+458	val_458	2008-04-09	12
+458	val_458	2008-04-09	12
+459	val_459	2008-04-09	12
+459	val_459	2008-04-09	12
+460	val_460	2008-04-09	12
+462	val_462	2008-04-09	12
+462	val_462	2008-04-09	12
+463	val_463	2008-04-09	12
+463	val_463	2008-04-09	12
+466	val_466	2008-04-09	12
+466	val_466	2008-04-09	12
+466	val_466	2008-04-09	12
+467	val_467	2008-04-09	12
+468	val_468	2008-04-09	12
+468	val_468	2008-04-09	12
+468	val_468	2008-04-09	12
+468	val_468	2008-04-09	12
+469	val_469	2008-04-09	12
+469	val_469	2008-04-09	12
+469	val_469	2008-04-09	12
+469	val_469	2008-04-09	12
+469	val_469	2008-04-09	12
+470	val_470	2008-04-09	12
+472	val_472	2008-04-09	12
+475	val_475	2008-04-09	12
+477	val_477	2008-04-09	12
+478	val_478	2008-04-09	12
+478	val_478	2008-04-09	12
+479	val_479	2008-04-09	12
+480	val_480	2008-04-09	12
+480	val_480	2008-04-09	12
+480	val_480	2008-04-09	12
+481	val_481	2008-04-09	12
+482	val_482	2008-04-09	12
+483	val_483	2008-04-09	12
+484	val_484	2008-04-09	12
+485	val_485	2008-04-09	12
+487	val_487	2008-04-09	12
+489	val_489	2008-04-09	12
+489	val_489	2008-04-09	12
+489	val_489	2008-04-09	12
+489	val_489	2008-04-09	12
+490	val_490	2008-04-09	12
+491	val_491	2008-04-09	12
+492	val_492	2008-04-09	12
+492	val_492	2008-04-09	12
+493	val_493	2008-04-09	12
+494	val_494	2008-04-09	12
+495	val_495	2008-04-09	12
+496	val_496	2008-04-09	12
+497	val_497	2008-04-09	12
+498	val_498	2008-04-09	12
+498	val_498	2008-04-09	12
+498	val_498	2008-04-09	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part3-8-c3b6f0cc860b84851dd60c53b36a6437 b/sql/hive/src/test/resources/golden/load_dyn_part3-8-c3b6f0cc860b84851dd60c53b36a6437
deleted file mode 100644
index 2857cdf0aba86..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part3-8-c3b6f0cc860b84851dd60c53b36a6437
+++ /dev/null
@@ -1,2000 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
-265	val_265	2008-04-08	11
-193	val_193	2008-04-08	11
-401	val_401	2008-04-08	11
-150	val_150	2008-04-08	11
-273	val_273	2008-04-08	11
-224	val_224	2008-04-08	11
-369	val_369	2008-04-08	11
-66	val_66	2008-04-08	11
-128	val_128	2008-04-08	11
-213	val_213	2008-04-08	11
-146	val_146	2008-04-08	11
-406	val_406	2008-04-08	11
-429	val_429	2008-04-08	11
-374	val_374	2008-04-08	11
-152	val_152	2008-04-08	11
-469	val_469	2008-04-08	11
-145	val_145	2008-04-08	11
-495	val_495	2008-04-08	11
-37	val_37	2008-04-08	11
-327	val_327	2008-04-08	11
-281	val_281	2008-04-08	11
-277	val_277	2008-04-08	11
-209	val_209	2008-04-08	11
-15	val_15	2008-04-08	11
-82	val_82	2008-04-08	11
-403	val_403	2008-04-08	11
-166	val_166	2008-04-08	11
-417	val_417	2008-04-08	11
-430	val_430	2008-04-08	11
-252	val_252	2008-04-08	11
-292	val_292	2008-04-08	11
-219	val_219	2008-04-08	11
-287	val_287	2008-04-08	11
-153	val_153	2008-04-08	11
-193	val_193	2008-04-08	11
-338	val_338	2008-04-08	11
-446	val_446	2008-04-08	11
-459	val_459	2008-04-08	11
-394	val_394	2008-04-08	11
-237	val_237	2008-04-08	11
-482	val_482	2008-04-08	11
-174	val_174	2008-04-08	11
-413	val_413	2008-04-08	11
-494	val_494	2008-04-08	11
-207	val_207	2008-04-08	11
-199	val_199	2008-04-08	11
-466	val_466	2008-04-08	11
-208	val_208	2008-04-08	11
-174	val_174	2008-04-08	11
-399	val_399	2008-04-08	11
-396	val_396	2008-04-08	11
-247	val_247	2008-04-08	11
-417	val_417	2008-04-08	11
-489	val_489	2008-04-08	11
-162	val_162	2008-04-08	11
-377	val_377	2008-04-08	11
-397	val_397	2008-04-08	11
-309	val_309	2008-04-08	11
-365	val_365	2008-04-08	11
-266	val_266	2008-04-08	11
-439	val_439	2008-04-08	11
-342	val_342	2008-04-08	11
-367	val_367	2008-04-08	11
-325	val_325	2008-04-08	11
-167	val_167	2008-04-08	11
-195	val_195	2008-04-08	11
-475	val_475	2008-04-08	11
-17	val_17	2008-04-08	11
-113	val_113	2008-04-08	11
-155	val_155	2008-04-08	11
-203	val_203	2008-04-08	11
-339	val_339	2008-04-08	11
-0	val_0	2008-04-08	11
-455	val_455	2008-04-08	11
-128	val_128	2008-04-08	11
-311	val_311	2008-04-08	11
-316	val_316	2008-04-08	11
-57	val_57	2008-04-08	11
-302	val_302	2008-04-08	11
-205	val_205	2008-04-08	11
-149	val_149	2008-04-08	11
-438	val_438	2008-04-08	11
-345	val_345	2008-04-08	11
-129	val_129	2008-04-08	11
-170	val_170	2008-04-08	11
-20	val_20	2008-04-08	11
-489	val_489	2008-04-08	11
-157	val_157	2008-04-08	11
-378	val_378	2008-04-08	11
-221	val_221	2008-04-08	11
-92	val_92	2008-04-08	11
-111	val_111	2008-04-08	11
-47	val_47	2008-04-08	11
-72	val_72	2008-04-08	11
-4	val_4	2008-04-08	11
-280	val_280	2008-04-08	11
-35	val_35	2008-04-08	11
-427	val_427	2008-04-08	11
-277	val_277	2008-04-08	11
-208	val_208	2008-04-08	11
-356	val_356	2008-04-08	11
-399	val_399	2008-04-08	11
-169	val_169	2008-04-08	11
-382	val_382	2008-04-08	11
-498	val_498	2008-04-08	11
-125	val_125	2008-04-08	11
-386	val_386	2008-04-08	11
-437	val_437	2008-04-08	11
-469	val_469	2008-04-08	11
-192	val_192	2008-04-08	11
-286	val_286	2008-04-08	11
-187	val_187	2008-04-08	11
-176	val_176	2008-04-08	11
-54	val_54	2008-04-08	11
-459	val_459	2008-04-08	11
-51	val_51	2008-04-08	11
-138	val_138	2008-04-08	11
-103	val_103	2008-04-08	11
-239	val_239	2008-04-08	11
-213	val_213	2008-04-08	11
-216	val_216	2008-04-08	11
-430	val_430	2008-04-08	11
-278	val_278	2008-04-08	11
-176	val_176	2008-04-08	11
-289	val_289	2008-04-08	11
-221	val_221	2008-04-08	11
-65	val_65	2008-04-08	11
-318	val_318	2008-04-08	11
-332	val_332	2008-04-08	11
-311	val_311	2008-04-08	11
-275	val_275	2008-04-08	11
-137	val_137	2008-04-08	11
-241	val_241	2008-04-08	11
-83	val_83	2008-04-08	11
-333	val_333	2008-04-08	11
-180	val_180	2008-04-08	11
-284	val_284	2008-04-08	11
-12	val_12	2008-04-08	11
-230	val_230	2008-04-08	11
-181	val_181	2008-04-08	11
-67	val_67	2008-04-08	11
-260	val_260	2008-04-08	11
-404	val_404	2008-04-08	11
-384	val_384	2008-04-08	11
-489	val_489	2008-04-08	11
-353	val_353	2008-04-08	11
-373	val_373	2008-04-08	11
-272	val_272	2008-04-08	11
-138	val_138	2008-04-08	11
-217	val_217	2008-04-08	11
-84	val_84	2008-04-08	11
-348	val_348	2008-04-08	11
-466	val_466	2008-04-08	11
-58	val_58	2008-04-08	11
-8	val_8	2008-04-08	11
-411	val_411	2008-04-08	11
-230	val_230	2008-04-08	11
-208	val_208	2008-04-08	11
-348	val_348	2008-04-08	11
-24	val_24	2008-04-08	11
-463	val_463	2008-04-08	11
-431	val_431	2008-04-08	11
-179	val_179	2008-04-08	11
-172	val_172	2008-04-08	11
-42	val_42	2008-04-08	11
-129	val_129	2008-04-08	11
-158	val_158	2008-04-08	11
-119	val_119	2008-04-08	11
-496	val_496	2008-04-08	11
-0	val_0	2008-04-08	11
-322	val_322	2008-04-08	11
-197	val_197	2008-04-08	11
-468	val_468	2008-04-08	11
-393	val_393	2008-04-08	11
-454	val_454	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-199	val_199	2008-04-08	11
-191	val_191	2008-04-08	11
-418	val_418	2008-04-08	11
-96	val_96	2008-04-08	11
-26	val_26	2008-04-08	11
-165	val_165	2008-04-08	11
-327	val_327	2008-04-08	11
-230	val_230	2008-04-08	11
-205	val_205	2008-04-08	11
-120	val_120	2008-04-08	11
-131	val_131	2008-04-08	11
-51	val_51	2008-04-08	11
-404	val_404	2008-04-08	11
-43	val_43	2008-04-08	11
-436	val_436	2008-04-08	11
-156	val_156	2008-04-08	11
-469	val_469	2008-04-08	11
-468	val_468	2008-04-08	11
-308	val_308	2008-04-08	11
-95	val_95	2008-04-08	11
-196	val_196	2008-04-08	11
-288	val_288	2008-04-08	11
-481	val_481	2008-04-08	11
-457	val_457	2008-04-08	11
-98	val_98	2008-04-08	11
-282	val_282	2008-04-08	11
-197	val_197	2008-04-08	11
-187	val_187	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-409	val_409	2008-04-08	11
-470	val_470	2008-04-08	11
-137	val_137	2008-04-08	11
-369	val_369	2008-04-08	11
-316	val_316	2008-04-08	11
-169	val_169	2008-04-08	11
-413	val_413	2008-04-08	11
-85	val_85	2008-04-08	11
-77	val_77	2008-04-08	11
-0	val_0	2008-04-08	11
-490	val_490	2008-04-08	11
-87	val_87	2008-04-08	11
-364	val_364	2008-04-08	11
-179	val_179	2008-04-08	11
-118	val_118	2008-04-08	11
-134	val_134	2008-04-08	11
-395	val_395	2008-04-08	11
-282	val_282	2008-04-08	11
-138	val_138	2008-04-08	11
-238	val_238	2008-04-08	11
-419	val_419	2008-04-08	11
-15	val_15	2008-04-08	11
-118	val_118	2008-04-08	11
-72	val_72	2008-04-08	11
-90	val_90	2008-04-08	11
-307	val_307	2008-04-08	11
-19	val_19	2008-04-08	11
-435	val_435	2008-04-08	11
-10	val_10	2008-04-08	11
-277	val_277	2008-04-08	11
-273	val_273	2008-04-08	11
-306	val_306	2008-04-08	11
-224	val_224	2008-04-08	11
-309	val_309	2008-04-08	11
-389	val_389	2008-04-08	11
-327	val_327	2008-04-08	11
-242	val_242	2008-04-08	11
-369	val_369	2008-04-08	11
-392	val_392	2008-04-08	11
-272	val_272	2008-04-08	11
-331	val_331	2008-04-08	11
-401	val_401	2008-04-08	11
-242	val_242	2008-04-08	11
-452	val_452	2008-04-08	11
-177	val_177	2008-04-08	11
-226	val_226	2008-04-08	11
-5	val_5	2008-04-08	11
-497	val_497	2008-04-08	11
-402	val_402	2008-04-08	11
-396	val_396	2008-04-08	11
-317	val_317	2008-04-08	11
-395	val_395	2008-04-08	11
-58	val_58	2008-04-08	11
-35	val_35	2008-04-08	11
-336	val_336	2008-04-08	11
-95	val_95	2008-04-08	11
-11	val_11	2008-04-08	11
-168	val_168	2008-04-08	11
-34	val_34	2008-04-08	11
-229	val_229	2008-04-08	11
-233	val_233	2008-04-08	11
-143	val_143	2008-04-08	11
-472	val_472	2008-04-08	11
-322	val_322	2008-04-08	11
-498	val_498	2008-04-08	11
-160	val_160	2008-04-08	11
-195	val_195	2008-04-08	11
-42	val_42	2008-04-08	11
-321	val_321	2008-04-08	11
-430	val_430	2008-04-08	11
-119	val_119	2008-04-08	11
-489	val_489	2008-04-08	11
-458	val_458	2008-04-08	11
-78	val_78	2008-04-08	11
-76	val_76	2008-04-08	11
-41	val_41	2008-04-08	11
-223	val_223	2008-04-08	11
-492	val_492	2008-04-08	11
-149	val_149	2008-04-08	11
-449	val_449	2008-04-08	11
-218	val_218	2008-04-08	11
-228	val_228	2008-04-08	11
-138	val_138	2008-04-08	11
-453	val_453	2008-04-08	11
-30	val_30	2008-04-08	11
-209	val_209	2008-04-08	11
-64	val_64	2008-04-08	11
-468	val_468	2008-04-08	11
-76	val_76	2008-04-08	11
-74	val_74	2008-04-08	11
-342	val_342	2008-04-08	11
-69	val_69	2008-04-08	11
-230	val_230	2008-04-08	11
-33	val_33	2008-04-08	11
-368	val_368	2008-04-08	11
-103	val_103	2008-04-08	11
-296	val_296	2008-04-08	11
-113	val_113	2008-04-08	11
-216	val_216	2008-04-08	11
-367	val_367	2008-04-08	11
-344	val_344	2008-04-08	11
-167	val_167	2008-04-08	11
-274	val_274	2008-04-08	11
-219	val_219	2008-04-08	11
-239	val_239	2008-04-08	11
-485	val_485	2008-04-08	11
-116	val_116	2008-04-08	11
-223	val_223	2008-04-08	11
-256	val_256	2008-04-08	11
-263	val_263	2008-04-08	11
-70	val_70	2008-04-08	11
-487	val_487	2008-04-08	11
-480	val_480	2008-04-08	11
-401	val_401	2008-04-08	11
-288	val_288	2008-04-08	11
-191	val_191	2008-04-08	11
-5	val_5	2008-04-08	11
-244	val_244	2008-04-08	11
-438	val_438	2008-04-08	11
-128	val_128	2008-04-08	11
-467	val_467	2008-04-08	11
-432	val_432	2008-04-08	11
-202	val_202	2008-04-08	11
-316	val_316	2008-04-08	11
-229	val_229	2008-04-08	11
-469	val_469	2008-04-08	11
-463	val_463	2008-04-08	11
-280	val_280	2008-04-08	11
-2	val_2	2008-04-08	11
-35	val_35	2008-04-08	11
-283	val_283	2008-04-08	11
-331	val_331	2008-04-08	11
-235	val_235	2008-04-08	11
-80	val_80	2008-04-08	11
-44	val_44	2008-04-08	11
-193	val_193	2008-04-08	11
-321	val_321	2008-04-08	11
-335	val_335	2008-04-08	11
-104	val_104	2008-04-08	11
-466	val_466	2008-04-08	11
-366	val_366	2008-04-08	11
-175	val_175	2008-04-08	11
-403	val_403	2008-04-08	11
-483	val_483	2008-04-08	11
-53	val_53	2008-04-08	11
-105	val_105	2008-04-08	11
-257	val_257	2008-04-08	11
-406	val_406	2008-04-08	11
-409	val_409	2008-04-08	11
-190	val_190	2008-04-08	11
-406	val_406	2008-04-08	11
-401	val_401	2008-04-08	11
-114	val_114	2008-04-08	11
-258	val_258	2008-04-08	11
-90	val_90	2008-04-08	11
-203	val_203	2008-04-08	11
-262	val_262	2008-04-08	11
-348	val_348	2008-04-08	11
-424	val_424	2008-04-08	11
-12	val_12	2008-04-08	11
-396	val_396	2008-04-08	11
-201	val_201	2008-04-08	11
-217	val_217	2008-04-08	11
-164	val_164	2008-04-08	11
-431	val_431	2008-04-08	11
-454	val_454	2008-04-08	11
-478	val_478	2008-04-08	11
-298	val_298	2008-04-08	11
-125	val_125	2008-04-08	11
-431	val_431	2008-04-08	11
-164	val_164	2008-04-08	11
-424	val_424	2008-04-08	11
-187	val_187	2008-04-08	11
-382	val_382	2008-04-08	11
-5	val_5	2008-04-08	11
-70	val_70	2008-04-08	11
-397	val_397	2008-04-08	11
-480	val_480	2008-04-08	11
-291	val_291	2008-04-08	11
-24	val_24	2008-04-08	11
-351	val_351	2008-04-08	11
-255	val_255	2008-04-08	11
-104	val_104	2008-04-08	11
-70	val_70	2008-04-08	11
-163	val_163	2008-04-08	11
-438	val_438	2008-04-08	11
-119	val_119	2008-04-08	11
-414	val_414	2008-04-08	11
-200	val_200	2008-04-08	11
-491	val_491	2008-04-08	11
-237	val_237	2008-04-08	11
-439	val_439	2008-04-08	11
-360	val_360	2008-04-08	11
-248	val_248	2008-04-08	11
-479	val_479	2008-04-08	11
-305	val_305	2008-04-08	11
-417	val_417	2008-04-08	11
-199	val_199	2008-04-08	11
-444	val_444	2008-04-08	11
-120	val_120	2008-04-08	11
-429	val_429	2008-04-08	11
-169	val_169	2008-04-08	11
-443	val_443	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-277	val_277	2008-04-08	11
-230	val_230	2008-04-08	11
-478	val_478	2008-04-08	11
-178	val_178	2008-04-08	11
-468	val_468	2008-04-08	11
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-333	val_333	2008-04-08	11
-493	val_493	2008-04-08	11
-460	val_460	2008-04-08	11
-207	val_207	2008-04-08	11
-249	val_249	2008-04-08	11
-265	val_265	2008-04-08	11
-480	val_480	2008-04-08	11
-83	val_83	2008-04-08	11
-136	val_136	2008-04-08	11
-353	val_353	2008-04-08	11
-172	val_172	2008-04-08	11
-214	val_214	2008-04-08	11
-462	val_462	2008-04-08	11
-233	val_233	2008-04-08	11
-406	val_406	2008-04-08	11
-133	val_133	2008-04-08	11
-175	val_175	2008-04-08	11
-189	val_189	2008-04-08	11
-454	val_454	2008-04-08	11
-375	val_375	2008-04-08	11
-401	val_401	2008-04-08	11
-421	val_421	2008-04-08	11
-407	val_407	2008-04-08	11
-384	val_384	2008-04-08	11
-256	val_256	2008-04-08	11
-26	val_26	2008-04-08	11
-134	val_134	2008-04-08	11
-67	val_67	2008-04-08	11
-384	val_384	2008-04-08	11
-379	val_379	2008-04-08	11
-18	val_18	2008-04-08	11
-462	val_462	2008-04-08	11
-492	val_492	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-9	val_9	2008-04-08	11
-341	val_341	2008-04-08	11
-498	val_498	2008-04-08	11
-146	val_146	2008-04-08	11
-458	val_458	2008-04-08	11
-362	val_362	2008-04-08	11
-186	val_186	2008-04-08	11
-285	val_285	2008-04-08	11
-348	val_348	2008-04-08	11
-167	val_167	2008-04-08	11
-18	val_18	2008-04-08	11
-273	val_273	2008-04-08	11
-183	val_183	2008-04-08	11
-281	val_281	2008-04-08	11
-344	val_344	2008-04-08	11
-97	val_97	2008-04-08	11
-469	val_469	2008-04-08	11
-315	val_315	2008-04-08	11
-84	val_84	2008-04-08	11
-28	val_28	2008-04-08	11
-37	val_37	2008-04-08	11
-448	val_448	2008-04-08	11
-152	val_152	2008-04-08	11
-348	val_348	2008-04-08	11
-307	val_307	2008-04-08	11
-194	val_194	2008-04-08	11
-414	val_414	2008-04-08	11
-477	val_477	2008-04-08	11
-222	val_222	2008-04-08	11
-126	val_126	2008-04-08	11
-90	val_90	2008-04-08	11
-169	val_169	2008-04-08	11
-403	val_403	2008-04-08	11
-400	val_400	2008-04-08	11
-200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
-238	val_238	2008-04-08	12
-86	val_86	2008-04-08	12
-311	val_311	2008-04-08	12
-27	val_27	2008-04-08	12
-165	val_165	2008-04-08	12
-409	val_409	2008-04-08	12
-255	val_255	2008-04-08	12
-278	val_278	2008-04-08	12
-98	val_98	2008-04-08	12
-484	val_484	2008-04-08	12
-265	val_265	2008-04-08	12
-193	val_193	2008-04-08	12
-401	val_401	2008-04-08	12
-150	val_150	2008-04-08	12
-273	val_273	2008-04-08	12
-224	val_224	2008-04-08	12
-369	val_369	2008-04-08	12
-66	val_66	2008-04-08	12
-128	val_128	2008-04-08	12
-213	val_213	2008-04-08	12
-146	val_146	2008-04-08	12
-406	val_406	2008-04-08	12
-429	val_429	2008-04-08	12
-374	val_374	2008-04-08	12
-152	val_152	2008-04-08	12
-469	val_469	2008-04-08	12
-145	val_145	2008-04-08	12
-495	val_495	2008-04-08	12
-37	val_37	2008-04-08	12
-327	val_327	2008-04-08	12
-281	val_281	2008-04-08	12
-277	val_277	2008-04-08	12
-209	val_209	2008-04-08	12
-15	val_15	2008-04-08	12
-82	val_82	2008-04-08	12
-403	val_403	2008-04-08	12
-166	val_166	2008-04-08	12
-417	val_417	2008-04-08	12
-430	val_430	2008-04-08	12
-252	val_252	2008-04-08	12
-292	val_292	2008-04-08	12
-219	val_219	2008-04-08	12
-287	val_287	2008-04-08	12
-153	val_153	2008-04-08	12
-193	val_193	2008-04-08	12
-338	val_338	2008-04-08	12
-446	val_446	2008-04-08	12
-459	val_459	2008-04-08	12
-394	val_394	2008-04-08	12
-237	val_237	2008-04-08	12
-482	val_482	2008-04-08	12
-174	val_174	2008-04-08	12
-413	val_413	2008-04-08	12
-494	val_494	2008-04-08	12
-207	val_207	2008-04-08	12
-199	val_199	2008-04-08	12
-466	val_466	2008-04-08	12
-208	val_208	2008-04-08	12
-174	val_174	2008-04-08	12
-399	val_399	2008-04-08	12
-396	val_396	2008-04-08	12
-247	val_247	2008-04-08	12
-417	val_417	2008-04-08	12
-489	val_489	2008-04-08	12
-162	val_162	2008-04-08	12
-377	val_377	2008-04-08	12
-397	val_397	2008-04-08	12
-309	val_309	2008-04-08	12
-365	val_365	2008-04-08	12
-266	val_266	2008-04-08	12
-439	val_439	2008-04-08	12
-342	val_342	2008-04-08	12
-367	val_367	2008-04-08	12
-325	val_325	2008-04-08	12
-167	val_167	2008-04-08	12
-195	val_195	2008-04-08	12
-475	val_475	2008-04-08	12
-17	val_17	2008-04-08	12
-113	val_113	2008-04-08	12
-155	val_155	2008-04-08	12
-203	val_203	2008-04-08	12
-339	val_339	2008-04-08	12
-0	val_0	2008-04-08	12
-455	val_455	2008-04-08	12
-128	val_128	2008-04-08	12
-311	val_311	2008-04-08	12
-316	val_316	2008-04-08	12
-57	val_57	2008-04-08	12
-302	val_302	2008-04-08	12
-205	val_205	2008-04-08	12
-149	val_149	2008-04-08	12
-438	val_438	2008-04-08	12
-345	val_345	2008-04-08	12
-129	val_129	2008-04-08	12
-170	val_170	2008-04-08	12
-20	val_20	2008-04-08	12
-489	val_489	2008-04-08	12
-157	val_157	2008-04-08	12
-378	val_378	2008-04-08	12
-221	val_221	2008-04-08	12
-92	val_92	2008-04-08	12
-111	val_111	2008-04-08	12
-47	val_47	2008-04-08	12
-72	val_72	2008-04-08	12
-4	val_4	2008-04-08	12
-280	val_280	2008-04-08	12
-35	val_35	2008-04-08	12
-427	val_427	2008-04-08	12
-277	val_277	2008-04-08	12
-208	val_208	2008-04-08	12
-356	val_356	2008-04-08	12
-399	val_399	2008-04-08	12
-169	val_169	2008-04-08	12
-382	val_382	2008-04-08	12
-498	val_498	2008-04-08	12
-125	val_125	2008-04-08	12
-386	val_386	2008-04-08	12
-437	val_437	2008-04-08	12
-469	val_469	2008-04-08	12
-192	val_192	2008-04-08	12
-286	val_286	2008-04-08	12
-187	val_187	2008-04-08	12
-176	val_176	2008-04-08	12
-54	val_54	2008-04-08	12
-459	val_459	2008-04-08	12
-51	val_51	2008-04-08	12
-138	val_138	2008-04-08	12
-103	val_103	2008-04-08	12
-239	val_239	2008-04-08	12
-213	val_213	2008-04-08	12
-216	val_216	2008-04-08	12
-430	val_430	2008-04-08	12
-278	val_278	2008-04-08	12
-176	val_176	2008-04-08	12
-289	val_289	2008-04-08	12
-221	val_221	2008-04-08	12
-65	val_65	2008-04-08	12
-318	val_318	2008-04-08	12
-332	val_332	2008-04-08	12
-311	val_311	2008-04-08	12
-275	val_275	2008-04-08	12
-137	val_137	2008-04-08	12
-241	val_241	2008-04-08	12
-83	val_83	2008-04-08	12
-333	val_333	2008-04-08	12
-180	val_180	2008-04-08	12
-284	val_284	2008-04-08	12
-12	val_12	2008-04-08	12
-230	val_230	2008-04-08	12
-181	val_181	2008-04-08	12
-67	val_67	2008-04-08	12
-260	val_260	2008-04-08	12
-404	val_404	2008-04-08	12
-384	val_384	2008-04-08	12
-489	val_489	2008-04-08	12
-353	val_353	2008-04-08	12
-373	val_373	2008-04-08	12
-272	val_272	2008-04-08	12
-138	val_138	2008-04-08	12
-217	val_217	2008-04-08	12
-84	val_84	2008-04-08	12
-348	val_348	2008-04-08	12
-466	val_466	2008-04-08	12
-58	val_58	2008-04-08	12
-8	val_8	2008-04-08	12
-411	val_411	2008-04-08	12
-230	val_230	2008-04-08	12
-208	val_208	2008-04-08	12
-348	val_348	2008-04-08	12
-24	val_24	2008-04-08	12
-463	val_463	2008-04-08	12
-431	val_431	2008-04-08	12
-179	val_179	2008-04-08	12
-172	val_172	2008-04-08	12
-42	val_42	2008-04-08	12
-129	val_129	2008-04-08	12
-158	val_158	2008-04-08	12
-119	val_119	2008-04-08	12
-496	val_496	2008-04-08	12
-0	val_0	2008-04-08	12
-322	val_322	2008-04-08	12
-197	val_197	2008-04-08	12
-468	val_468	2008-04-08	12
-393	val_393	2008-04-08	12
-454	val_454	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-199	val_199	2008-04-08	12
-191	val_191	2008-04-08	12
-418	val_418	2008-04-08	12
-96	val_96	2008-04-08	12
-26	val_26	2008-04-08	12
-165	val_165	2008-04-08	12
-327	val_327	2008-04-08	12
-230	val_230	2008-04-08	12
-205	val_205	2008-04-08	12
-120	val_120	2008-04-08	12
-131	val_131	2008-04-08	12
-51	val_51	2008-04-08	12
-404	val_404	2008-04-08	12
-43	val_43	2008-04-08	12
-436	val_436	2008-04-08	12
-156	val_156	2008-04-08	12
-469	val_469	2008-04-08	12
-468	val_468	2008-04-08	12
-308	val_308	2008-04-08	12
-95	val_95	2008-04-08	12
-196	val_196	2008-04-08	12
-288	val_288	2008-04-08	12
-481	val_481	2008-04-08	12
-457	val_457	2008-04-08	12
-98	val_98	2008-04-08	12
-282	val_282	2008-04-08	12
-197	val_197	2008-04-08	12
-187	val_187	2008-04-08	12
-318	val_318	2008-04-08	12
-318	val_318	2008-04-08	12
-409	val_409	2008-04-08	12
-470	val_470	2008-04-08	12
-137	val_137	2008-04-08	12
-369	val_369	2008-04-08	12
-316	val_316	2008-04-08	12
-169	val_169	2008-04-08	12
-413	val_413	2008-04-08	12
-85	val_85	2008-04-08	12
-77	val_77	2008-04-08	12
-0	val_0	2008-04-08	12
-490	val_490	2008-04-08	12
-87	val_87	2008-04-08	12
-364	val_364	2008-04-08	12
-179	val_179	2008-04-08	12
-118	val_118	2008-04-08	12
-134	val_134	2008-04-08	12
-395	val_395	2008-04-08	12
-282	val_282	2008-04-08	12
-138	val_138	2008-04-08	12
-238	val_238	2008-04-08	12
-419	val_419	2008-04-08	12
-15	val_15	2008-04-08	12
-118	val_118	2008-04-08	12
-72	val_72	2008-04-08	12
-90	val_90	2008-04-08	12
-307	val_307	2008-04-08	12
-19	val_19	2008-04-08	12
-435	val_435	2008-04-08	12
-10	val_10	2008-04-08	12
-277	val_277	2008-04-08	12
-273	val_273	2008-04-08	12
-306	val_306	2008-04-08	12
-224	val_224	2008-04-08	12
-309	val_309	2008-04-08	12
-389	val_389	2008-04-08	12
-327	val_327	2008-04-08	12
-242	val_242	2008-04-08	12
-369	val_369	2008-04-08	12
-392	val_392	2008-04-08	12
-272	val_272	2008-04-08	12
-331	val_331	2008-04-08	12
-401	val_401	2008-04-08	12
-242	val_242	2008-04-08	12
-452	val_452	2008-04-08	12
-177	val_177	2008-04-08	12
-226	val_226	2008-04-08	12
-5	val_5	2008-04-08	12
-497	val_497	2008-04-08	12
-402	val_402	2008-04-08	12
-396	val_396	2008-04-08	12
-317	val_317	2008-04-08	12
-395	val_395	2008-04-08	12
-58	val_58	2008-04-08	12
-35	val_35	2008-04-08	12
-336	val_336	2008-04-08	12
-95	val_95	2008-04-08	12
-11	val_11	2008-04-08	12
-168	val_168	2008-04-08	12
-34	val_34	2008-04-08	12
-229	val_229	2008-04-08	12
-233	val_233	2008-04-08	12
-143	val_143	2008-04-08	12
-472	val_472	2008-04-08	12
-322	val_322	2008-04-08	12
-498	val_498	2008-04-08	12
-160	val_160	2008-04-08	12
-195	val_195	2008-04-08	12
-42	val_42	2008-04-08	12
-321	val_321	2008-04-08	12
-430	val_430	2008-04-08	12
-119	val_119	2008-04-08	12
-489	val_489	2008-04-08	12
-458	val_458	2008-04-08	12
-78	val_78	2008-04-08	12
-76	val_76	2008-04-08	12
-41	val_41	2008-04-08	12
-223	val_223	2008-04-08	12
-492	val_492	2008-04-08	12
-149	val_149	2008-04-08	12
-449	val_449	2008-04-08	12
-218	val_218	2008-04-08	12
-228	val_228	2008-04-08	12
-138	val_138	2008-04-08	12
-453	val_453	2008-04-08	12
-30	val_30	2008-04-08	12
-209	val_209	2008-04-08	12
-64	val_64	2008-04-08	12
-468	val_468	2008-04-08	12
-76	val_76	2008-04-08	12
-74	val_74	2008-04-08	12
-342	val_342	2008-04-08	12
-69	val_69	2008-04-08	12
-230	val_230	2008-04-08	12
-33	val_33	2008-04-08	12
-368	val_368	2008-04-08	12
-103	val_103	2008-04-08	12
-296	val_296	2008-04-08	12
-113	val_113	2008-04-08	12
-216	val_216	2008-04-08	12
-367	val_367	2008-04-08	12
-344	val_344	2008-04-08	12
-167	val_167	2008-04-08	12
-274	val_274	2008-04-08	12
-219	val_219	2008-04-08	12
-239	val_239	2008-04-08	12
-485	val_485	2008-04-08	12
-116	val_116	2008-04-08	12
-223	val_223	2008-04-08	12
-256	val_256	2008-04-08	12
-263	val_263	2008-04-08	12
-70	val_70	2008-04-08	12
-487	val_487	2008-04-08	12
-480	val_480	2008-04-08	12
-401	val_401	2008-04-08	12
-288	val_288	2008-04-08	12
-191	val_191	2008-04-08	12
-5	val_5	2008-04-08	12
-244	val_244	2008-04-08	12
-438	val_438	2008-04-08	12
-128	val_128	2008-04-08	12
-467	val_467	2008-04-08	12
-432	val_432	2008-04-08	12
-202	val_202	2008-04-08	12
-316	val_316	2008-04-08	12
-229	val_229	2008-04-08	12
-469	val_469	2008-04-08	12
-463	val_463	2008-04-08	12
-280	val_280	2008-04-08	12
-2	val_2	2008-04-08	12
-35	val_35	2008-04-08	12
-283	val_283	2008-04-08	12
-331	val_331	2008-04-08	12
-235	val_235	2008-04-08	12
-80	val_80	2008-04-08	12
-44	val_44	2008-04-08	12
-193	val_193	2008-04-08	12
-321	val_321	2008-04-08	12
-335	val_335	2008-04-08	12
-104	val_104	2008-04-08	12
-466	val_466	2008-04-08	12
-366	val_366	2008-04-08	12
-175	val_175	2008-04-08	12
-403	val_403	2008-04-08	12
-483	val_483	2008-04-08	12
-53	val_53	2008-04-08	12
-105	val_105	2008-04-08	12
-257	val_257	2008-04-08	12
-406	val_406	2008-04-08	12
-409	val_409	2008-04-08	12
-190	val_190	2008-04-08	12
-406	val_406	2008-04-08	12
-401	val_401	2008-04-08	12
-114	val_114	2008-04-08	12
-258	val_258	2008-04-08	12
-90	val_90	2008-04-08	12
-203	val_203	2008-04-08	12
-262	val_262	2008-04-08	12
-348	val_348	2008-04-08	12
-424	val_424	2008-04-08	12
-12	val_12	2008-04-08	12
-396	val_396	2008-04-08	12
-201	val_201	2008-04-08	12
-217	val_217	2008-04-08	12
-164	val_164	2008-04-08	12
-431	val_431	2008-04-08	12
-454	val_454	2008-04-08	12
-478	val_478	2008-04-08	12
-298	val_298	2008-04-08	12
-125	val_125	2008-04-08	12
-431	val_431	2008-04-08	12
-164	val_164	2008-04-08	12
-424	val_424	2008-04-08	12
-187	val_187	2008-04-08	12
-382	val_382	2008-04-08	12
-5	val_5	2008-04-08	12
-70	val_70	2008-04-08	12
-397	val_397	2008-04-08	12
-480	val_480	2008-04-08	12
-291	val_291	2008-04-08	12
-24	val_24	2008-04-08	12
-351	val_351	2008-04-08	12
-255	val_255	2008-04-08	12
-104	val_104	2008-04-08	12
-70	val_70	2008-04-08	12
-163	val_163	2008-04-08	12
-438	val_438	2008-04-08	12
-119	val_119	2008-04-08	12
-414	val_414	2008-04-08	12
-200	val_200	2008-04-08	12
-491	val_491	2008-04-08	12
-237	val_237	2008-04-08	12
-439	val_439	2008-04-08	12
-360	val_360	2008-04-08	12
-248	val_248	2008-04-08	12
-479	val_479	2008-04-08	12
-305	val_305	2008-04-08	12
-417	val_417	2008-04-08	12
-199	val_199	2008-04-08	12
-444	val_444	2008-04-08	12
-120	val_120	2008-04-08	12
-429	val_429	2008-04-08	12
-169	val_169	2008-04-08	12
-443	val_443	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-277	val_277	2008-04-08	12
-230	val_230	2008-04-08	12
-478	val_478	2008-04-08	12
-178	val_178	2008-04-08	12
-468	val_468	2008-04-08	12
-310	val_310	2008-04-08	12
-317	val_317	2008-04-08	12
-333	val_333	2008-04-08	12
-493	val_493	2008-04-08	12
-460	val_460	2008-04-08	12
-207	val_207	2008-04-08	12
-249	val_249	2008-04-08	12
-265	val_265	2008-04-08	12
-480	val_480	2008-04-08	12
-83	val_83	2008-04-08	12
-136	val_136	2008-04-08	12
-353	val_353	2008-04-08	12
-172	val_172	2008-04-08	12
-214	val_214	2008-04-08	12
-462	val_462	2008-04-08	12
-233	val_233	2008-04-08	12
-406	val_406	2008-04-08	12
-133	val_133	2008-04-08	12
-175	val_175	2008-04-08	12
-189	val_189	2008-04-08	12
-454	val_454	2008-04-08	12
-375	val_375	2008-04-08	12
-401	val_401	2008-04-08	12
-421	val_421	2008-04-08	12
-407	val_407	2008-04-08	12
-384	val_384	2008-04-08	12
-256	val_256	2008-04-08	12
-26	val_26	2008-04-08	12
-134	val_134	2008-04-08	12
-67	val_67	2008-04-08	12
-384	val_384	2008-04-08	12
-379	val_379	2008-04-08	12
-18	val_18	2008-04-08	12
-462	val_462	2008-04-08	12
-492	val_492	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-9	val_9	2008-04-08	12
-341	val_341	2008-04-08	12
-498	val_498	2008-04-08	12
-146	val_146	2008-04-08	12
-458	val_458	2008-04-08	12
-362	val_362	2008-04-08	12
-186	val_186	2008-04-08	12
-285	val_285	2008-04-08	12
-348	val_348	2008-04-08	12
-167	val_167	2008-04-08	12
-18	val_18	2008-04-08	12
-273	val_273	2008-04-08	12
-183	val_183	2008-04-08	12
-281	val_281	2008-04-08	12
-344	val_344	2008-04-08	12
-97	val_97	2008-04-08	12
-469	val_469	2008-04-08	12
-315	val_315	2008-04-08	12
-84	val_84	2008-04-08	12
-28	val_28	2008-04-08	12
-37	val_37	2008-04-08	12
-448	val_448	2008-04-08	12
-152	val_152	2008-04-08	12
-348	val_348	2008-04-08	12
-307	val_307	2008-04-08	12
-194	val_194	2008-04-08	12
-414	val_414	2008-04-08	12
-477	val_477	2008-04-08	12
-222	val_222	2008-04-08	12
-126	val_126	2008-04-08	12
-90	val_90	2008-04-08	12
-169	val_169	2008-04-08	12
-403	val_403	2008-04-08	12
-400	val_400	2008-04-08	12
-200	val_200	2008-04-08	12
-97	val_97	2008-04-08	12
-238	val_238	2008-04-09	11
-86	val_86	2008-04-09	11
-311	val_311	2008-04-09	11
-27	val_27	2008-04-09	11
-165	val_165	2008-04-09	11
-409	val_409	2008-04-09	11
-255	val_255	2008-04-09	11
-278	val_278	2008-04-09	11
-98	val_98	2008-04-09	11
-484	val_484	2008-04-09	11
-265	val_265	2008-04-09	11
-193	val_193	2008-04-09	11
-401	val_401	2008-04-09	11
-150	val_150	2008-04-09	11
-273	val_273	2008-04-09	11
-224	val_224	2008-04-09	11
-369	val_369	2008-04-09	11
-66	val_66	2008-04-09	11
-128	val_128	2008-04-09	11
-213	val_213	2008-04-09	11
-146	val_146	2008-04-09	11
-406	val_406	2008-04-09	11
-429	val_429	2008-04-09	11
-374	val_374	2008-04-09	11
-152	val_152	2008-04-09	11
-469	val_469	2008-04-09	11
-145	val_145	2008-04-09	11
-495	val_495	2008-04-09	11
-37	val_37	2008-04-09	11
-327	val_327	2008-04-09	11
-281	val_281	2008-04-09	11
-277	val_277	2008-04-09	11
-209	val_209	2008-04-09	11
-15	val_15	2008-04-09	11
-82	val_82	2008-04-09	11
-403	val_403	2008-04-09	11
-166	val_166	2008-04-09	11
-417	val_417	2008-04-09	11
-430	val_430	2008-04-09	11
-252	val_252	2008-04-09	11
-292	val_292	2008-04-09	11
-219	val_219	2008-04-09	11
-287	val_287	2008-04-09	11
-153	val_153	2008-04-09	11
-193	val_193	2008-04-09	11
-338	val_338	2008-04-09	11
-446	val_446	2008-04-09	11
-459	val_459	2008-04-09	11
-394	val_394	2008-04-09	11
-237	val_237	2008-04-09	11
-482	val_482	2008-04-09	11
-174	val_174	2008-04-09	11
-413	val_413	2008-04-09	11
-494	val_494	2008-04-09	11
-207	val_207	2008-04-09	11
-199	val_199	2008-04-09	11
-466	val_466	2008-04-09	11
-208	val_208	2008-04-09	11
-174	val_174	2008-04-09	11
-399	val_399	2008-04-09	11
-396	val_396	2008-04-09	11
-247	val_247	2008-04-09	11
-417	val_417	2008-04-09	11
-489	val_489	2008-04-09	11
-162	val_162	2008-04-09	11
-377	val_377	2008-04-09	11
-397	val_397	2008-04-09	11
-309	val_309	2008-04-09	11
-365	val_365	2008-04-09	11
-266	val_266	2008-04-09	11
-439	val_439	2008-04-09	11
-342	val_342	2008-04-09	11
-367	val_367	2008-04-09	11
-325	val_325	2008-04-09	11
-167	val_167	2008-04-09	11
-195	val_195	2008-04-09	11
-475	val_475	2008-04-09	11
-17	val_17	2008-04-09	11
-113	val_113	2008-04-09	11
-155	val_155	2008-04-09	11
-203	val_203	2008-04-09	11
-339	val_339	2008-04-09	11
-0	val_0	2008-04-09	11
-455	val_455	2008-04-09	11
-128	val_128	2008-04-09	11
-311	val_311	2008-04-09	11
-316	val_316	2008-04-09	11
-57	val_57	2008-04-09	11
-302	val_302	2008-04-09	11
-205	val_205	2008-04-09	11
-149	val_149	2008-04-09	11
-438	val_438	2008-04-09	11
-345	val_345	2008-04-09	11
-129	val_129	2008-04-09	11
-170	val_170	2008-04-09	11
-20	val_20	2008-04-09	11
-489	val_489	2008-04-09	11
-157	val_157	2008-04-09	11
-378	val_378	2008-04-09	11
-221	val_221	2008-04-09	11
-92	val_92	2008-04-09	11
-111	val_111	2008-04-09	11
-47	val_47	2008-04-09	11
-72	val_72	2008-04-09	11
-4	val_4	2008-04-09	11
-280	val_280	2008-04-09	11
-35	val_35	2008-04-09	11
-427	val_427	2008-04-09	11
-277	val_277	2008-04-09	11
-208	val_208	2008-04-09	11
-356	val_356	2008-04-09	11
-399	val_399	2008-04-09	11
-169	val_169	2008-04-09	11
-382	val_382	2008-04-09	11
-498	val_498	2008-04-09	11
-125	val_125	2008-04-09	11
-386	val_386	2008-04-09	11
-437	val_437	2008-04-09	11
-469	val_469	2008-04-09	11
-192	val_192	2008-04-09	11
-286	val_286	2008-04-09	11
-187	val_187	2008-04-09	11
-176	val_176	2008-04-09	11
-54	val_54	2008-04-09	11
-459	val_459	2008-04-09	11
-51	val_51	2008-04-09	11
-138	val_138	2008-04-09	11
-103	val_103	2008-04-09	11
-239	val_239	2008-04-09	11
-213	val_213	2008-04-09	11
-216	val_216	2008-04-09	11
-430	val_430	2008-04-09	11
-278	val_278	2008-04-09	11
-176	val_176	2008-04-09	11
-289	val_289	2008-04-09	11
-221	val_221	2008-04-09	11
-65	val_65	2008-04-09	11
-318	val_318	2008-04-09	11
-332	val_332	2008-04-09	11
-311	val_311	2008-04-09	11
-275	val_275	2008-04-09	11
-137	val_137	2008-04-09	11
-241	val_241	2008-04-09	11
-83	val_83	2008-04-09	11
-333	val_333	2008-04-09	11
-180	val_180	2008-04-09	11
-284	val_284	2008-04-09	11
-12	val_12	2008-04-09	11
-230	val_230	2008-04-09	11
-181	val_181	2008-04-09	11
-67	val_67	2008-04-09	11
-260	val_260	2008-04-09	11
-404	val_404	2008-04-09	11
-384	val_384	2008-04-09	11
-489	val_489	2008-04-09	11
-353	val_353	2008-04-09	11
-373	val_373	2008-04-09	11
-272	val_272	2008-04-09	11
-138	val_138	2008-04-09	11
-217	val_217	2008-04-09	11
-84	val_84	2008-04-09	11
-348	val_348	2008-04-09	11
-466	val_466	2008-04-09	11
-58	val_58	2008-04-09	11
-8	val_8	2008-04-09	11
-411	val_411	2008-04-09	11
-230	val_230	2008-04-09	11
-208	val_208	2008-04-09	11
-348	val_348	2008-04-09	11
-24	val_24	2008-04-09	11
-463	val_463	2008-04-09	11
-431	val_431	2008-04-09	11
-179	val_179	2008-04-09	11
-172	val_172	2008-04-09	11
-42	val_42	2008-04-09	11
-129	val_129	2008-04-09	11
-158	val_158	2008-04-09	11
-119	val_119	2008-04-09	11
-496	val_496	2008-04-09	11
-0	val_0	2008-04-09	11
-322	val_322	2008-04-09	11
-197	val_197	2008-04-09	11
-468	val_468	2008-04-09	11
-393	val_393	2008-04-09	11
-454	val_454	2008-04-09	11
-100	val_100	2008-04-09	11
-298	val_298	2008-04-09	11
-199	val_199	2008-04-09	11
-191	val_191	2008-04-09	11
-418	val_418	2008-04-09	11
-96	val_96	2008-04-09	11
-26	val_26	2008-04-09	11
-165	val_165	2008-04-09	11
-327	val_327	2008-04-09	11
-230	val_230	2008-04-09	11
-205	val_205	2008-04-09	11
-120	val_120	2008-04-09	11
-131	val_131	2008-04-09	11
-51	val_51	2008-04-09	11
-404	val_404	2008-04-09	11
-43	val_43	2008-04-09	11
-436	val_436	2008-04-09	11
-156	val_156	2008-04-09	11
-469	val_469	2008-04-09	11
-468	val_468	2008-04-09	11
-308	val_308	2008-04-09	11
-95	val_95	2008-04-09	11
-196	val_196	2008-04-09	11
-288	val_288	2008-04-09	11
-481	val_481	2008-04-09	11
-457	val_457	2008-04-09	11
-98	val_98	2008-04-09	11
-282	val_282	2008-04-09	11
-197	val_197	2008-04-09	11
-187	val_187	2008-04-09	11
-318	val_318	2008-04-09	11
-318	val_318	2008-04-09	11
-409	val_409	2008-04-09	11
-470	val_470	2008-04-09	11
-137	val_137	2008-04-09	11
-369	val_369	2008-04-09	11
-316	val_316	2008-04-09	11
-169	val_169	2008-04-09	11
-413	val_413	2008-04-09	11
-85	val_85	2008-04-09	11
-77	val_77	2008-04-09	11
-0	val_0	2008-04-09	11
-490	val_490	2008-04-09	11
-87	val_87	2008-04-09	11
-364	val_364	2008-04-09	11
-179	val_179	2008-04-09	11
-118	val_118	2008-04-09	11
-134	val_134	2008-04-09	11
-395	val_395	2008-04-09	11
-282	val_282	2008-04-09	11
-138	val_138	2008-04-09	11
-238	val_238	2008-04-09	11
-419	val_419	2008-04-09	11
-15	val_15	2008-04-09	11
-118	val_118	2008-04-09	11
-72	val_72	2008-04-09	11
-90	val_90	2008-04-09	11
-307	val_307	2008-04-09	11
-19	val_19	2008-04-09	11
-435	val_435	2008-04-09	11
-10	val_10	2008-04-09	11
-277	val_277	2008-04-09	11
-273	val_273	2008-04-09	11
-306	val_306	2008-04-09	11
-224	val_224	2008-04-09	11
-309	val_309	2008-04-09	11
-389	val_389	2008-04-09	11
-327	val_327	2008-04-09	11
-242	val_242	2008-04-09	11
-369	val_369	2008-04-09	11
-392	val_392	2008-04-09	11
-272	val_272	2008-04-09	11
-331	val_331	2008-04-09	11
-401	val_401	2008-04-09	11
-242	val_242	2008-04-09	11
-452	val_452	2008-04-09	11
-177	val_177	2008-04-09	11
-226	val_226	2008-04-09	11
-5	val_5	2008-04-09	11
-497	val_497	2008-04-09	11
-402	val_402	2008-04-09	11
-396	val_396	2008-04-09	11
-317	val_317	2008-04-09	11
-395	val_395	2008-04-09	11
-58	val_58	2008-04-09	11
-35	val_35	2008-04-09	11
-336	val_336	2008-04-09	11
-95	val_95	2008-04-09	11
-11	val_11	2008-04-09	11
-168	val_168	2008-04-09	11
-34	val_34	2008-04-09	11
-229	val_229	2008-04-09	11
-233	val_233	2008-04-09	11
-143	val_143	2008-04-09	11
-472	val_472	2008-04-09	11
-322	val_322	2008-04-09	11
-498	val_498	2008-04-09	11
-160	val_160	2008-04-09	11
-195	val_195	2008-04-09	11
-42	val_42	2008-04-09	11
-321	val_321	2008-04-09	11
-430	val_430	2008-04-09	11
-119	val_119	2008-04-09	11
-489	val_489	2008-04-09	11
-458	val_458	2008-04-09	11
-78	val_78	2008-04-09	11
-76	val_76	2008-04-09	11
-41	val_41	2008-04-09	11
-223	val_223	2008-04-09	11
-492	val_492	2008-04-09	11
-149	val_149	2008-04-09	11
-449	val_449	2008-04-09	11
-218	val_218	2008-04-09	11
-228	val_228	2008-04-09	11
-138	val_138	2008-04-09	11
-453	val_453	2008-04-09	11
-30	val_30	2008-04-09	11
-209	val_209	2008-04-09	11
-64	val_64	2008-04-09	11
-468	val_468	2008-04-09	11
-76	val_76	2008-04-09	11
-74	val_74	2008-04-09	11
-342	val_342	2008-04-09	11
-69	val_69	2008-04-09	11
-230	val_230	2008-04-09	11
-33	val_33	2008-04-09	11
-368	val_368	2008-04-09	11
-103	val_103	2008-04-09	11
-296	val_296	2008-04-09	11
-113	val_113	2008-04-09	11
-216	val_216	2008-04-09	11
-367	val_367	2008-04-09	11
-344	val_344	2008-04-09	11
-167	val_167	2008-04-09	11
-274	val_274	2008-04-09	11
-219	val_219	2008-04-09	11
-239	val_239	2008-04-09	11
-485	val_485	2008-04-09	11
-116	val_116	2008-04-09	11
-223	val_223	2008-04-09	11
-256	val_256	2008-04-09	11
-263	val_263	2008-04-09	11
-70	val_70	2008-04-09	11
-487	val_487	2008-04-09	11
-480	val_480	2008-04-09	11
-401	val_401	2008-04-09	11
-288	val_288	2008-04-09	11
-191	val_191	2008-04-09	11
-5	val_5	2008-04-09	11
-244	val_244	2008-04-09	11
-438	val_438	2008-04-09	11
-128	val_128	2008-04-09	11
-467	val_467	2008-04-09	11
-432	val_432	2008-04-09	11
-202	val_202	2008-04-09	11
-316	val_316	2008-04-09	11
-229	val_229	2008-04-09	11
-469	val_469	2008-04-09	11
-463	val_463	2008-04-09	11
-280	val_280	2008-04-09	11
-2	val_2	2008-04-09	11
-35	val_35	2008-04-09	11
-283	val_283	2008-04-09	11
-331	val_331	2008-04-09	11
-235	val_235	2008-04-09	11
-80	val_80	2008-04-09	11
-44	val_44	2008-04-09	11
-193	val_193	2008-04-09	11
-321	val_321	2008-04-09	11
-335	val_335	2008-04-09	11
-104	val_104	2008-04-09	11
-466	val_466	2008-04-09	11
-366	val_366	2008-04-09	11
-175	val_175	2008-04-09	11
-403	val_403	2008-04-09	11
-483	val_483	2008-04-09	11
-53	val_53	2008-04-09	11
-105	val_105	2008-04-09	11
-257	val_257	2008-04-09	11
-406	val_406	2008-04-09	11
-409	val_409	2008-04-09	11
-190	val_190	2008-04-09	11
-406	val_406	2008-04-09	11
-401	val_401	2008-04-09	11
-114	val_114	2008-04-09	11
-258	val_258	2008-04-09	11
-90	val_90	2008-04-09	11
-203	val_203	2008-04-09	11
-262	val_262	2008-04-09	11
-348	val_348	2008-04-09	11
-424	val_424	2008-04-09	11
-12	val_12	2008-04-09	11
-396	val_396	2008-04-09	11
-201	val_201	2008-04-09	11
-217	val_217	2008-04-09	11
-164	val_164	2008-04-09	11
-431	val_431	2008-04-09	11
-454	val_454	2008-04-09	11
-478	val_478	2008-04-09	11
-298	val_298	2008-04-09	11
-125	val_125	2008-04-09	11
-431	val_431	2008-04-09	11
-164	val_164	2008-04-09	11
-424	val_424	2008-04-09	11
-187	val_187	2008-04-09	11
-382	val_382	2008-04-09	11
-5	val_5	2008-04-09	11
-70	val_70	2008-04-09	11
-397	val_397	2008-04-09	11
-480	val_480	2008-04-09	11
-291	val_291	2008-04-09	11
-24	val_24	2008-04-09	11
-351	val_351	2008-04-09	11
-255	val_255	2008-04-09	11
-104	val_104	2008-04-09	11
-70	val_70	2008-04-09	11
-163	val_163	2008-04-09	11
-438	val_438	2008-04-09	11
-119	val_119	2008-04-09	11
-414	val_414	2008-04-09	11
-200	val_200	2008-04-09	11
-491	val_491	2008-04-09	11
-237	val_237	2008-04-09	11
-439	val_439	2008-04-09	11
-360	val_360	2008-04-09	11
-248	val_248	2008-04-09	11
-479	val_479	2008-04-09	11
-305	val_305	2008-04-09	11
-417	val_417	2008-04-09	11
-199	val_199	2008-04-09	11
-444	val_444	2008-04-09	11
-120	val_120	2008-04-09	11
-429	val_429	2008-04-09	11
-169	val_169	2008-04-09	11
-443	val_443	2008-04-09	11
-323	val_323	2008-04-09	11
-325	val_325	2008-04-09	11
-277	val_277	2008-04-09	11
-230	val_230	2008-04-09	11
-478	val_478	2008-04-09	11
-178	val_178	2008-04-09	11
-468	val_468	2008-04-09	11
-310	val_310	2008-04-09	11
-317	val_317	2008-04-09	11
-333	val_333	2008-04-09	11
-493	val_493	2008-04-09	11
-460	val_460	2008-04-09	11
-207	val_207	2008-04-09	11
-249	val_249	2008-04-09	11
-265	val_265	2008-04-09	11
-480	val_480	2008-04-09	11
-83	val_83	2008-04-09	11
-136	val_136	2008-04-09	11
-353	val_353	2008-04-09	11
-172	val_172	2008-04-09	11
-214	val_214	2008-04-09	11
-462	val_462	2008-04-09	11
-233	val_233	2008-04-09	11
-406	val_406	2008-04-09	11
-133	val_133	2008-04-09	11
-175	val_175	2008-04-09	11
-189	val_189	2008-04-09	11
-454	val_454	2008-04-09	11
-375	val_375	2008-04-09	11
-401	val_401	2008-04-09	11
-421	val_421	2008-04-09	11
-407	val_407	2008-04-09	11
-384	val_384	2008-04-09	11
-256	val_256	2008-04-09	11
-26	val_26	2008-04-09	11
-134	val_134	2008-04-09	11
-67	val_67	2008-04-09	11
-384	val_384	2008-04-09	11
-379	val_379	2008-04-09	11
-18	val_18	2008-04-09	11
-462	val_462	2008-04-09	11
-492	val_492	2008-04-09	11
-100	val_100	2008-04-09	11
-298	val_298	2008-04-09	11
-9	val_9	2008-04-09	11
-341	val_341	2008-04-09	11
-498	val_498	2008-04-09	11
-146	val_146	2008-04-09	11
-458	val_458	2008-04-09	11
-362	val_362	2008-04-09	11
-186	val_186	2008-04-09	11
-285	val_285	2008-04-09	11
-348	val_348	2008-04-09	11
-167	val_167	2008-04-09	11
-18	val_18	2008-04-09	11
-273	val_273	2008-04-09	11
-183	val_183	2008-04-09	11
-281	val_281	2008-04-09	11
-344	val_344	2008-04-09	11
-97	val_97	2008-04-09	11
-469	val_469	2008-04-09	11
-315	val_315	2008-04-09	11
-84	val_84	2008-04-09	11
-28	val_28	2008-04-09	11
-37	val_37	2008-04-09	11
-448	val_448	2008-04-09	11
-152	val_152	2008-04-09	11
-348	val_348	2008-04-09	11
-307	val_307	2008-04-09	11
-194	val_194	2008-04-09	11
-414	val_414	2008-04-09	11
-477	val_477	2008-04-09	11
-222	val_222	2008-04-09	11
-126	val_126	2008-04-09	11
-90	val_90	2008-04-09	11
-169	val_169	2008-04-09	11
-403	val_403	2008-04-09	11
-400	val_400	2008-04-09	11
-200	val_200	2008-04-09	11
-97	val_97	2008-04-09	11
-238	val_238	2008-04-09	12
-86	val_86	2008-04-09	12
-311	val_311	2008-04-09	12
-27	val_27	2008-04-09	12
-165	val_165	2008-04-09	12
-409	val_409	2008-04-09	12
-255	val_255	2008-04-09	12
-278	val_278	2008-04-09	12
-98	val_98	2008-04-09	12
-484	val_484	2008-04-09	12
-265	val_265	2008-04-09	12
-193	val_193	2008-04-09	12
-401	val_401	2008-04-09	12
-150	val_150	2008-04-09	12
-273	val_273	2008-04-09	12
-224	val_224	2008-04-09	12
-369	val_369	2008-04-09	12
-66	val_66	2008-04-09	12
-128	val_128	2008-04-09	12
-213	val_213	2008-04-09	12
-146	val_146	2008-04-09	12
-406	val_406	2008-04-09	12
-429	val_429	2008-04-09	12
-374	val_374	2008-04-09	12
-152	val_152	2008-04-09	12
-469	val_469	2008-04-09	12
-145	val_145	2008-04-09	12
-495	val_495	2008-04-09	12
-37	val_37	2008-04-09	12
-327	val_327	2008-04-09	12
-281	val_281	2008-04-09	12
-277	val_277	2008-04-09	12
-209	val_209	2008-04-09	12
-15	val_15	2008-04-09	12
-82	val_82	2008-04-09	12
-403	val_403	2008-04-09	12
-166	val_166	2008-04-09	12
-417	val_417	2008-04-09	12
-430	val_430	2008-04-09	12
-252	val_252	2008-04-09	12
-292	val_292	2008-04-09	12
-219	val_219	2008-04-09	12
-287	val_287	2008-04-09	12
-153	val_153	2008-04-09	12
-193	val_193	2008-04-09	12
-338	val_338	2008-04-09	12
-446	val_446	2008-04-09	12
-459	val_459	2008-04-09	12
-394	val_394	2008-04-09	12
-237	val_237	2008-04-09	12
-482	val_482	2008-04-09	12
-174	val_174	2008-04-09	12
-413	val_413	2008-04-09	12
-494	val_494	2008-04-09	12
-207	val_207	2008-04-09	12
-199	val_199	2008-04-09	12
-466	val_466	2008-04-09	12
-208	val_208	2008-04-09	12
-174	val_174	2008-04-09	12
-399	val_399	2008-04-09	12
-396	val_396	2008-04-09	12
-247	val_247	2008-04-09	12
-417	val_417	2008-04-09	12
-489	val_489	2008-04-09	12
-162	val_162	2008-04-09	12
-377	val_377	2008-04-09	12
-397	val_397	2008-04-09	12
-309	val_309	2008-04-09	12
-365	val_365	2008-04-09	12
-266	val_266	2008-04-09	12
-439	val_439	2008-04-09	12
-342	val_342	2008-04-09	12
-367	val_367	2008-04-09	12
-325	val_325	2008-04-09	12
-167	val_167	2008-04-09	12
-195	val_195	2008-04-09	12
-475	val_475	2008-04-09	12
-17	val_17	2008-04-09	12
-113	val_113	2008-04-09	12
-155	val_155	2008-04-09	12
-203	val_203	2008-04-09	12
-339	val_339	2008-04-09	12
-0	val_0	2008-04-09	12
-455	val_455	2008-04-09	12
-128	val_128	2008-04-09	12
-311	val_311	2008-04-09	12
-316	val_316	2008-04-09	12
-57	val_57	2008-04-09	12
-302	val_302	2008-04-09	12
-205	val_205	2008-04-09	12
-149	val_149	2008-04-09	12
-438	val_438	2008-04-09	12
-345	val_345	2008-04-09	12
-129	val_129	2008-04-09	12
-170	val_170	2008-04-09	12
-20	val_20	2008-04-09	12
-489	val_489	2008-04-09	12
-157	val_157	2008-04-09	12
-378	val_378	2008-04-09	12
-221	val_221	2008-04-09	12
-92	val_92	2008-04-09	12
-111	val_111	2008-04-09	12
-47	val_47	2008-04-09	12
-72	val_72	2008-04-09	12
-4	val_4	2008-04-09	12
-280	val_280	2008-04-09	12
-35	val_35	2008-04-09	12
-427	val_427	2008-04-09	12
-277	val_277	2008-04-09	12
-208	val_208	2008-04-09	12
-356	val_356	2008-04-09	12
-399	val_399	2008-04-09	12
-169	val_169	2008-04-09	12
-382	val_382	2008-04-09	12
-498	val_498	2008-04-09	12
-125	val_125	2008-04-09	12
-386	val_386	2008-04-09	12
-437	val_437	2008-04-09	12
-469	val_469	2008-04-09	12
-192	val_192	2008-04-09	12
-286	val_286	2008-04-09	12
-187	val_187	2008-04-09	12
-176	val_176	2008-04-09	12
-54	val_54	2008-04-09	12
-459	val_459	2008-04-09	12
-51	val_51	2008-04-09	12
-138	val_138	2008-04-09	12
-103	val_103	2008-04-09	12
-239	val_239	2008-04-09	12
-213	val_213	2008-04-09	12
-216	val_216	2008-04-09	12
-430	val_430	2008-04-09	12
-278	val_278	2008-04-09	12
-176	val_176	2008-04-09	12
-289	val_289	2008-04-09	12
-221	val_221	2008-04-09	12
-65	val_65	2008-04-09	12
-318	val_318	2008-04-09	12
-332	val_332	2008-04-09	12
-311	val_311	2008-04-09	12
-275	val_275	2008-04-09	12
-137	val_137	2008-04-09	12
-241	val_241	2008-04-09	12
-83	val_83	2008-04-09	12
-333	val_333	2008-04-09	12
-180	val_180	2008-04-09	12
-284	val_284	2008-04-09	12
-12	val_12	2008-04-09	12
-230	val_230	2008-04-09	12
-181	val_181	2008-04-09	12
-67	val_67	2008-04-09	12
-260	val_260	2008-04-09	12
-404	val_404	2008-04-09	12
-384	val_384	2008-04-09	12
-489	val_489	2008-04-09	12
-353	val_353	2008-04-09	12
-373	val_373	2008-04-09	12
-272	val_272	2008-04-09	12
-138	val_138	2008-04-09	12
-217	val_217	2008-04-09	12
-84	val_84	2008-04-09	12
-348	val_348	2008-04-09	12
-466	val_466	2008-04-09	12
-58	val_58	2008-04-09	12
-8	val_8	2008-04-09	12
-411	val_411	2008-04-09	12
-230	val_230	2008-04-09	12
-208	val_208	2008-04-09	12
-348	val_348	2008-04-09	12
-24	val_24	2008-04-09	12
-463	val_463	2008-04-09	12
-431	val_431	2008-04-09	12
-179	val_179	2008-04-09	12
-172	val_172	2008-04-09	12
-42	val_42	2008-04-09	12
-129	val_129	2008-04-09	12
-158	val_158	2008-04-09	12
-119	val_119	2008-04-09	12
-496	val_496	2008-04-09	12
-0	val_0	2008-04-09	12
-322	val_322	2008-04-09	12
-197	val_197	2008-04-09	12
-468	val_468	2008-04-09	12
-393	val_393	2008-04-09	12
-454	val_454	2008-04-09	12
-100	val_100	2008-04-09	12
-298	val_298	2008-04-09	12
-199	val_199	2008-04-09	12
-191	val_191	2008-04-09	12
-418	val_418	2008-04-09	12
-96	val_96	2008-04-09	12
-26	val_26	2008-04-09	12
-165	val_165	2008-04-09	12
-327	val_327	2008-04-09	12
-230	val_230	2008-04-09	12
-205	val_205	2008-04-09	12
-120	val_120	2008-04-09	12
-131	val_131	2008-04-09	12
-51	val_51	2008-04-09	12
-404	val_404	2008-04-09	12
-43	val_43	2008-04-09	12
-436	val_436	2008-04-09	12
-156	val_156	2008-04-09	12
-469	val_469	2008-04-09	12
-468	val_468	2008-04-09	12
-308	val_308	2008-04-09	12
-95	val_95	2008-04-09	12
-196	val_196	2008-04-09	12
-288	val_288	2008-04-09	12
-481	val_481	2008-04-09	12
-457	val_457	2008-04-09	12
-98	val_98	2008-04-09	12
-282	val_282	2008-04-09	12
-197	val_197	2008-04-09	12
-187	val_187	2008-04-09	12
-318	val_318	2008-04-09	12
-318	val_318	2008-04-09	12
-409	val_409	2008-04-09	12
-470	val_470	2008-04-09	12
-137	val_137	2008-04-09	12
-369	val_369	2008-04-09	12
-316	val_316	2008-04-09	12
-169	val_169	2008-04-09	12
-413	val_413	2008-04-09	12
-85	val_85	2008-04-09	12
-77	val_77	2008-04-09	12
-0	val_0	2008-04-09	12
-490	val_490	2008-04-09	12
-87	val_87	2008-04-09	12
-364	val_364	2008-04-09	12
-179	val_179	2008-04-09	12
-118	val_118	2008-04-09	12
-134	val_134	2008-04-09	12
-395	val_395	2008-04-09	12
-282	val_282	2008-04-09	12
-138	val_138	2008-04-09	12
-238	val_238	2008-04-09	12
-419	val_419	2008-04-09	12
-15	val_15	2008-04-09	12
-118	val_118	2008-04-09	12
-72	val_72	2008-04-09	12
-90	val_90	2008-04-09	12
-307	val_307	2008-04-09	12
-19	val_19	2008-04-09	12
-435	val_435	2008-04-09	12
-10	val_10	2008-04-09	12
-277	val_277	2008-04-09	12
-273	val_273	2008-04-09	12
-306	val_306	2008-04-09	12
-224	val_224	2008-04-09	12
-309	val_309	2008-04-09	12
-389	val_389	2008-04-09	12
-327	val_327	2008-04-09	12
-242	val_242	2008-04-09	12
-369	val_369	2008-04-09	12
-392	val_392	2008-04-09	12
-272	val_272	2008-04-09	12
-331	val_331	2008-04-09	12
-401	val_401	2008-04-09	12
-242	val_242	2008-04-09	12
-452	val_452	2008-04-09	12
-177	val_177	2008-04-09	12
-226	val_226	2008-04-09	12
-5	val_5	2008-04-09	12
-497	val_497	2008-04-09	12
-402	val_402	2008-04-09	12
-396	val_396	2008-04-09	12
-317	val_317	2008-04-09	12
-395	val_395	2008-04-09	12
-58	val_58	2008-04-09	12
-35	val_35	2008-04-09	12
-336	val_336	2008-04-09	12
-95	val_95	2008-04-09	12
-11	val_11	2008-04-09	12
-168	val_168	2008-04-09	12
-34	val_34	2008-04-09	12
-229	val_229	2008-04-09	12
-233	val_233	2008-04-09	12
-143	val_143	2008-04-09	12
-472	val_472	2008-04-09	12
-322	val_322	2008-04-09	12
-498	val_498	2008-04-09	12
-160	val_160	2008-04-09	12
-195	val_195	2008-04-09	12
-42	val_42	2008-04-09	12
-321	val_321	2008-04-09	12
-430	val_430	2008-04-09	12
-119	val_119	2008-04-09	12
-489	val_489	2008-04-09	12
-458	val_458	2008-04-09	12
-78	val_78	2008-04-09	12
-76	val_76	2008-04-09	12
-41	val_41	2008-04-09	12
-223	val_223	2008-04-09	12
-492	val_492	2008-04-09	12
-149	val_149	2008-04-09	12
-449	val_449	2008-04-09	12
-218	val_218	2008-04-09	12
-228	val_228	2008-04-09	12
-138	val_138	2008-04-09	12
-453	val_453	2008-04-09	12
-30	val_30	2008-04-09	12
-209	val_209	2008-04-09	12
-64	val_64	2008-04-09	12
-468	val_468	2008-04-09	12
-76	val_76	2008-04-09	12
-74	val_74	2008-04-09	12
-342	val_342	2008-04-09	12
-69	val_69	2008-04-09	12
-230	val_230	2008-04-09	12
-33	val_33	2008-04-09	12
-368	val_368	2008-04-09	12
-103	val_103	2008-04-09	12
-296	val_296	2008-04-09	12
-113	val_113	2008-04-09	12
-216	val_216	2008-04-09	12
-367	val_367	2008-04-09	12
-344	val_344	2008-04-09	12
-167	val_167	2008-04-09	12
-274	val_274	2008-04-09	12
-219	val_219	2008-04-09	12
-239	val_239	2008-04-09	12
-485	val_485	2008-04-09	12
-116	val_116	2008-04-09	12
-223	val_223	2008-04-09	12
-256	val_256	2008-04-09	12
-263	val_263	2008-04-09	12
-70	val_70	2008-04-09	12
-487	val_487	2008-04-09	12
-480	val_480	2008-04-09	12
-401	val_401	2008-04-09	12
-288	val_288	2008-04-09	12
-191	val_191	2008-04-09	12
-5	val_5	2008-04-09	12
-244	val_244	2008-04-09	12
-438	val_438	2008-04-09	12
-128	val_128	2008-04-09	12
-467	val_467	2008-04-09	12
-432	val_432	2008-04-09	12
-202	val_202	2008-04-09	12
-316	val_316	2008-04-09	12
-229	val_229	2008-04-09	12
-469	val_469	2008-04-09	12
-463	val_463	2008-04-09	12
-280	val_280	2008-04-09	12
-2	val_2	2008-04-09	12
-35	val_35	2008-04-09	12
-283	val_283	2008-04-09	12
-331	val_331	2008-04-09	12
-235	val_235	2008-04-09	12
-80	val_80	2008-04-09	12
-44	val_44	2008-04-09	12
-193	val_193	2008-04-09	12
-321	val_321	2008-04-09	12
-335	val_335	2008-04-09	12
-104	val_104	2008-04-09	12
-466	val_466	2008-04-09	12
-366	val_366	2008-04-09	12
-175	val_175	2008-04-09	12
-403	val_403	2008-04-09	12
-483	val_483	2008-04-09	12
-53	val_53	2008-04-09	12
-105	val_105	2008-04-09	12
-257	val_257	2008-04-09	12
-406	val_406	2008-04-09	12
-409	val_409	2008-04-09	12
-190	val_190	2008-04-09	12
-406	val_406	2008-04-09	12
-401	val_401	2008-04-09	12
-114	val_114	2008-04-09	12
-258	val_258	2008-04-09	12
-90	val_90	2008-04-09	12
-203	val_203	2008-04-09	12
-262	val_262	2008-04-09	12
-348	val_348	2008-04-09	12
-424	val_424	2008-04-09	12
-12	val_12	2008-04-09	12
-396	val_396	2008-04-09	12
-201	val_201	2008-04-09	12
-217	val_217	2008-04-09	12
-164	val_164	2008-04-09	12
-431	val_431	2008-04-09	12
-454	val_454	2008-04-09	12
-478	val_478	2008-04-09	12
-298	val_298	2008-04-09	12
-125	val_125	2008-04-09	12
-431	val_431	2008-04-09	12
-164	val_164	2008-04-09	12
-424	val_424	2008-04-09	12
-187	val_187	2008-04-09	12
-382	val_382	2008-04-09	12
-5	val_5	2008-04-09	12
-70	val_70	2008-04-09	12
-397	val_397	2008-04-09	12
-480	val_480	2008-04-09	12
-291	val_291	2008-04-09	12
-24	val_24	2008-04-09	12
-351	val_351	2008-04-09	12
-255	val_255	2008-04-09	12
-104	val_104	2008-04-09	12
-70	val_70	2008-04-09	12
-163	val_163	2008-04-09	12
-438	val_438	2008-04-09	12
-119	val_119	2008-04-09	12
-414	val_414	2008-04-09	12
-200	val_200	2008-04-09	12
-491	val_491	2008-04-09	12
-237	val_237	2008-04-09	12
-439	val_439	2008-04-09	12
-360	val_360	2008-04-09	12
-248	val_248	2008-04-09	12
-479	val_479	2008-04-09	12
-305	val_305	2008-04-09	12
-417	val_417	2008-04-09	12
-199	val_199	2008-04-09	12
-444	val_444	2008-04-09	12
-120	val_120	2008-04-09	12
-429	val_429	2008-04-09	12
-169	val_169	2008-04-09	12
-443	val_443	2008-04-09	12
-323	val_323	2008-04-09	12
-325	val_325	2008-04-09	12
-277	val_277	2008-04-09	12
-230	val_230	2008-04-09	12
-478	val_478	2008-04-09	12
-178	val_178	2008-04-09	12
-468	val_468	2008-04-09	12
-310	val_310	2008-04-09	12
-317	val_317	2008-04-09	12
-333	val_333	2008-04-09	12
-493	val_493	2008-04-09	12
-460	val_460	2008-04-09	12
-207	val_207	2008-04-09	12
-249	val_249	2008-04-09	12
-265	val_265	2008-04-09	12
-480	val_480	2008-04-09	12
-83	val_83	2008-04-09	12
-136	val_136	2008-04-09	12
-353	val_353	2008-04-09	12
-172	val_172	2008-04-09	12
-214	val_214	2008-04-09	12
-462	val_462	2008-04-09	12
-233	val_233	2008-04-09	12
-406	val_406	2008-04-09	12
-133	val_133	2008-04-09	12
-175	val_175	2008-04-09	12
-189	val_189	2008-04-09	12
-454	val_454	2008-04-09	12
-375	val_375	2008-04-09	12
-401	val_401	2008-04-09	12
-421	val_421	2008-04-09	12
-407	val_407	2008-04-09	12
-384	val_384	2008-04-09	12
-256	val_256	2008-04-09	12
-26	val_26	2008-04-09	12
-134	val_134	2008-04-09	12
-67	val_67	2008-04-09	12
-384	val_384	2008-04-09	12
-379	val_379	2008-04-09	12
-18	val_18	2008-04-09	12
-462	val_462	2008-04-09	12
-492	val_492	2008-04-09	12
-100	val_100	2008-04-09	12
-298	val_298	2008-04-09	12
-9	val_9	2008-04-09	12
-341	val_341	2008-04-09	12
-498	val_498	2008-04-09	12
-146	val_146	2008-04-09	12
-458	val_458	2008-04-09	12
-362	val_362	2008-04-09	12
-186	val_186	2008-04-09	12
-285	val_285	2008-04-09	12
-348	val_348	2008-04-09	12
-167	val_167	2008-04-09	12
-18	val_18	2008-04-09	12
-273	val_273	2008-04-09	12
-183	val_183	2008-04-09	12
-281	val_281	2008-04-09	12
-344	val_344	2008-04-09	12
-97	val_97	2008-04-09	12
-469	val_469	2008-04-09	12
-315	val_315	2008-04-09	12
-84	val_84	2008-04-09	12
-28	val_28	2008-04-09	12
-37	val_37	2008-04-09	12
-448	val_448	2008-04-09	12
-152	val_152	2008-04-09	12
-348	val_348	2008-04-09	12
-307	val_307	2008-04-09	12
-194	val_194	2008-04-09	12
-414	val_414	2008-04-09	12
-477	val_477	2008-04-09	12
-222	val_222	2008-04-09	12
-126	val_126	2008-04-09	12
-90	val_90	2008-04-09	12
-169	val_169	2008-04-09	12
-403	val_403	2008-04-09	12
-400	val_400	2008-04-09	12
-200	val_200	2008-04-09	12
-97	val_97	2008-04-09	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part4-10-297cf42ec45b5aa78b80d9c35a79555a b/sql/hive/src/test/resources/golden/load_dyn_part4-10-297cf42ec45b5aa78b80d9c35a79555a
new file mode 100644
index 0000000000000..699bddc10b2ef
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/load_dyn_part4-10-297cf42ec45b5aa78b80d9c35a79555a
@@ -0,0 +1,1500 @@
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+2	val_2	2008-04-08	11
+4	val_4	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+8	val_8	2008-04-08	11
+9	val_9	2008-04-08	11
+10	val_10	2008-04-08	11
+11	val_11	2008-04-08	11
+12	val_12	2008-04-08	11
+12	val_12	2008-04-08	11
+15	val_15	2008-04-08	11
+15	val_15	2008-04-08	11
+17	val_17	2008-04-08	11
+18	val_18	2008-04-08	11
+18	val_18	2008-04-08	11
+19	val_19	2008-04-08	11
+20	val_20	2008-04-08	11
+24	val_24	2008-04-08	11
+24	val_24	2008-04-08	11
+26	val_26	2008-04-08	11
+26	val_26	2008-04-08	11
+27	val_27	2008-04-08	11
+28	val_28	2008-04-08	11
+30	val_30	2008-04-08	11
+33	val_33	2008-04-08	11
+34	val_34	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+37	val_37	2008-04-08	11
+37	val_37	2008-04-08	11
+41	val_41	2008-04-08	11
+42	val_42	2008-04-08	11
+42	val_42	2008-04-08	11
+43	val_43	2008-04-08	11
+44	val_44	2008-04-08	11
+47	val_47	2008-04-08	11
+51	val_51	2008-04-08	11
+51	val_51	2008-04-08	11
+53	val_53	2008-04-08	11
+54	val_54	2008-04-08	11
+57	val_57	2008-04-08	11
+58	val_58	2008-04-08	11
+58	val_58	2008-04-08	11
+64	val_64	2008-04-08	11
+65	val_65	2008-04-08	11
+66	val_66	2008-04-08	11
+67	val_67	2008-04-08	11
+67	val_67	2008-04-08	11
+69	val_69	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+72	val_72	2008-04-08	11
+72	val_72	2008-04-08	11
+74	val_74	2008-04-08	11
+76	val_76	2008-04-08	11
+76	val_76	2008-04-08	11
+77	val_77	2008-04-08	11
+78	val_78	2008-04-08	11
+80	val_80	2008-04-08	11
+82	val_82	2008-04-08	11
+83	val_83	2008-04-08	11
+83	val_83	2008-04-08	11
+84	val_84	2008-04-08	11
+84	val_84	2008-04-08	11
+85	val_85	2008-04-08	11
+86	val_86	2008-04-08	11
+87	val_87	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+92	val_92	2008-04-08	11
+95	val_95	2008-04-08	11
+95	val_95	2008-04-08	11
+96	val_96	2008-04-08	11
+97	val_97	2008-04-08	11
+97	val_97	2008-04-08	11
+98	val_98	2008-04-08	11
+98	val_98	2008-04-08	11
+100	val_100	2008-04-08	11
+100	val_100	2008-04-08	11
+103	val_103	2008-04-08	11
+103	val_103	2008-04-08	11
+104	val_104	2008-04-08	11
+104	val_104	2008-04-08	11
+105	val_105	2008-04-08	11
+111	val_111	2008-04-08	11
+113	val_113	2008-04-08	11
+113	val_113	2008-04-08	11
+114	val_114	2008-04-08	11
+116	val_116	2008-04-08	11
+118	val_118	2008-04-08	11
+118	val_118	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+120	val_120	2008-04-08	11
+120	val_120	2008-04-08	11
+125	val_125	2008-04-08	11
+125	val_125	2008-04-08	11
+126	val_126	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+129	val_129	2008-04-08	11
+129	val_129	2008-04-08	11
+131	val_131	2008-04-08	11
+133	val_133	2008-04-08	11
+134	val_134	2008-04-08	11
+134	val_134	2008-04-08	11
+136	val_136	2008-04-08	11
+137	val_137	2008-04-08	11
+137	val_137	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+143	val_143	2008-04-08	11
+145	val_145	2008-04-08	11
+146	val_146	2008-04-08	11
+146	val_146	2008-04-08	11
+149	val_149	2008-04-08	11
+149	val_149	2008-04-08	11
+150	val_150	2008-04-08	11
+152	val_152	2008-04-08	11
+152	val_152	2008-04-08	11
+153	val_153	2008-04-08	11
+155	val_155	2008-04-08	11
+156	val_156	2008-04-08	11
+157	val_157	2008-04-08	11
+158	val_158	2008-04-08	11
+160	val_160	2008-04-08	11
+162	val_162	2008-04-08	11
+163	val_163	2008-04-08	11
+164	val_164	2008-04-08	11
+164	val_164	2008-04-08	11
+165	val_165	2008-04-08	11
+165	val_165	2008-04-08	11
+166	val_166	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+168	val_168	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+170	val_170	2008-04-08	11
+172	val_172	2008-04-08	11
+172	val_172	2008-04-08	11
+174	val_174	2008-04-08	11
+174	val_174	2008-04-08	11
+175	val_175	2008-04-08	11
+175	val_175	2008-04-08	11
+176	val_176	2008-04-08	11
+176	val_176	2008-04-08	11
+177	val_177	2008-04-08	11
+178	val_178	2008-04-08	11
+179	val_179	2008-04-08	11
+179	val_179	2008-04-08	11
+180	val_180	2008-04-08	11
+181	val_181	2008-04-08	11
+183	val_183	2008-04-08	11
+186	val_186	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+189	val_189	2008-04-08	11
+190	val_190	2008-04-08	11
+191	val_191	2008-04-08	11
+191	val_191	2008-04-08	11
+192	val_192	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+194	val_194	2008-04-08	11
+195	val_195	2008-04-08	11
+195	val_195	2008-04-08	11
+196	val_196	2008-04-08	11
+197	val_197	2008-04-08	11
+197	val_197	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+200	val_200	2008-04-08	11
+200	val_200	2008-04-08	11
+201	val_201	2008-04-08	11
+202	val_202	2008-04-08	11
+203	val_203	2008-04-08	11
+203	val_203	2008-04-08	11
+205	val_205	2008-04-08	11
+205	val_205	2008-04-08	11
+207	val_207	2008-04-08	11
+207	val_207	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+209	val_209	2008-04-08	11
+209	val_209	2008-04-08	11
+213	val_213	2008-04-08	11
+213	val_213	2008-04-08	11
+214	val_214	2008-04-08	11
+216	val_216	2008-04-08	11
+216	val_216	2008-04-08	11
+217	val_217	2008-04-08	11
+217	val_217	2008-04-08	11
+218	val_218	2008-04-08	11
+219	val_219	2008-04-08	11
+219	val_219	2008-04-08	11
+221	val_221	2008-04-08	11
+221	val_221	2008-04-08	11
+222	val_222	2008-04-08	11
+223	val_223	2008-04-08	11
+223	val_223	2008-04-08	11
+224	val_224	2008-04-08	11
+224	val_224	2008-04-08	11
+226	val_226	2008-04-08	11
+228	val_228	2008-04-08	11
+229	val_229	2008-04-08	11
+229	val_229	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+233	val_233	2008-04-08	11
+233	val_233	2008-04-08	11
+235	val_235	2008-04-08	11
+237	val_237	2008-04-08	11
+237	val_237	2008-04-08	11
+238	val_238	2008-04-08	11
+238	val_238	2008-04-08	11
+239	val_239	2008-04-08	11
+239	val_239	2008-04-08	11
+241	val_241	2008-04-08	11
+242	val_242	2008-04-08	11
+242	val_242	2008-04-08	11
+244	val_244	2008-04-08	11
+247	val_247	2008-04-08	11
+248	val_248	2008-04-08	11
+249	val_249	2008-04-08	11
+252	val_252	2008-04-08	11
+255	val_255	2008-04-08	11
+255	val_255	2008-04-08	11
+256	val_256	2008-04-08	11
+256	val_256	2008-04-08	11
+257	val_257	2008-04-08	11
+258	val_258	2008-04-08	11
+260	val_260	2008-04-08	11
+262	val_262	2008-04-08	11
+263	val_263	2008-04-08	11
+265	val_265	2008-04-08	11
+265	val_265	2008-04-08	11
+266	val_266	2008-04-08	11
+272	val_272	2008-04-08	11
+272	val_272	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+274	val_274	2008-04-08	11
+275	val_275	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+278	val_278	2008-04-08	11
+278	val_278	2008-04-08	11
+280	val_280	2008-04-08	11
+280	val_280	2008-04-08	11
+281	val_281	2008-04-08	11
+281	val_281	2008-04-08	11
+282	val_282	2008-04-08	11
+282	val_282	2008-04-08	11
+283	val_283	2008-04-08	11
+284	val_284	2008-04-08	11
+285	val_285	2008-04-08	11
+286	val_286	2008-04-08	11
+287	val_287	2008-04-08	11
+288	val_288	2008-04-08	11
+288	val_288	2008-04-08	11
+289	val_289	2008-04-08	11
+291	val_291	2008-04-08	11
+292	val_292	2008-04-08	11
+296	val_296	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+302	val_302	2008-04-08	11
+305	val_305	2008-04-08	11
+306	val_306	2008-04-08	11
+307	val_307	2008-04-08	11
+307	val_307	2008-04-08	11
+308	val_308	2008-04-08	11
+309	val_309	2008-04-08	11
+309	val_309	2008-04-08	11
+310	val_310	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+315	val_315	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+317	val_317	2008-04-08	11
+317	val_317	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+321	val_321	2008-04-08	11
+321	val_321	2008-04-08	11
+322	val_322	2008-04-08	11
+322	val_322	2008-04-08	11
+323	val_323	2008-04-08	11
+325	val_325	2008-04-08	11
+325	val_325	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+331	val_331	2008-04-08	11
+331	val_331	2008-04-08	11
+332	val_332	2008-04-08	11
+333	val_333	2008-04-08	11
+333	val_333	2008-04-08	11
+335	val_335	2008-04-08	11
+336	val_336	2008-04-08	11
+338	val_338	2008-04-08	11
+339	val_339	2008-04-08	11
+341	val_341	2008-04-08	11
+342	val_342	2008-04-08	11
+342	val_342	2008-04-08	11
+344	val_344	2008-04-08	11
+344	val_344	2008-04-08	11
+345	val_345	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+351	val_351	2008-04-08	11
+353	val_353	2008-04-08	11
+353	val_353	2008-04-08	11
+356	val_356	2008-04-08	11
+360	val_360	2008-04-08	11
+362	val_362	2008-04-08	11
+364	val_364	2008-04-08	11
+365	val_365	2008-04-08	11
+366	val_366	2008-04-08	11
+367	val_367	2008-04-08	11
+367	val_367	2008-04-08	11
+368	val_368	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+373	val_373	2008-04-08	11
+374	val_374	2008-04-08	11
+375	val_375	2008-04-08	11
+377	val_377	2008-04-08	11
+378	val_378	2008-04-08	11
+379	val_379	2008-04-08	11
+382	val_382	2008-04-08	11
+382	val_382	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+386	val_386	2008-04-08	11
+389	val_389	2008-04-08	11
+392	val_392	2008-04-08	11
+393	val_393	2008-04-08	11
+394	val_394	2008-04-08	11
+395	val_395	2008-04-08	11
+395	val_395	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+397	val_397	2008-04-08	11
+397	val_397	2008-04-08	11
+399	val_399	2008-04-08	11
+399	val_399	2008-04-08	11
+400	val_400	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+402	val_402	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+404	val_404	2008-04-08	11
+404	val_404	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+407	val_407	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+411	val_411	2008-04-08	11
+413	val_413	2008-04-08	11
+413	val_413	2008-04-08	11
+414	val_414	2008-04-08	11
+414	val_414	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+418	val_418	2008-04-08	11
+419	val_419	2008-04-08	11
+421	val_421	2008-04-08	11
+424	val_424	2008-04-08	11
+424	val_424	2008-04-08	11
+427	val_427	2008-04-08	11
+429	val_429	2008-04-08	11
+429	val_429	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+432	val_432	2008-04-08	11
+435	val_435	2008-04-08	11
+436	val_436	2008-04-08	11
+437	val_437	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+439	val_439	2008-04-08	11
+439	val_439	2008-04-08	11
+443	val_443	2008-04-08	11
+444	val_444	2008-04-08	11
+446	val_446	2008-04-08	11
+448	val_448	2008-04-08	11
+449	val_449	2008-04-08	11
+452	val_452	2008-04-08	11
+453	val_453	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+455	val_455	2008-04-08	11
+457	val_457	2008-04-08	11
+458	val_458	2008-04-08	11
+458	val_458	2008-04-08	11
+459	val_459	2008-04-08	11
+459	val_459	2008-04-08	11
+460	val_460	2008-04-08	11
+462	val_462	2008-04-08	11
+462	val_462	2008-04-08	11
+463	val_463	2008-04-08	11
+463	val_463	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+467	val_467	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+470	val_470	2008-04-08	11
+472	val_472	2008-04-08	11
+475	val_475	2008-04-08	11
+477	val_477	2008-04-08	11
+478	val_478	2008-04-08	11
+478	val_478	2008-04-08	11
+479	val_479	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+481	val_481	2008-04-08	11
+482	val_482	2008-04-08	11
+483	val_483	2008-04-08	11
+484	val_484	2008-04-08	11
+485	val_485	2008-04-08	11
+487	val_487	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+490	val_490	2008-04-08	11
+491	val_491	2008-04-08	11
+492	val_492	2008-04-08	11
+492	val_492	2008-04-08	11
+493	val_493	2008-04-08	11
+494	val_494	2008-04-08	11
+495	val_495	2008-04-08	11
+496	val_496	2008-04-08	11
+497	val_497	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+2	val_2	2008-04-08	12
+4	val_4	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+8	val_8	2008-04-08	12
+9	val_9	2008-04-08	12
+10	val_10	2008-04-08	12
+11	val_11	2008-04-08	12
+12	val_12	2008-04-08	12
+12	val_12	2008-04-08	12
+15	val_15	2008-04-08	12
+15	val_15	2008-04-08	12
+17	val_17	2008-04-08	12
+18	val_18	2008-04-08	12
+18	val_18	2008-04-08	12
+19	val_19	2008-04-08	12
+20	val_20	2008-04-08	12
+24	val_24	2008-04-08	12
+24	val_24	2008-04-08	12
+26	val_26	2008-04-08	12
+26	val_26	2008-04-08	12
+27	val_27	2008-04-08	12
+28	val_28	2008-04-08	12
+30	val_30	2008-04-08	12
+33	val_33	2008-04-08	12
+34	val_34	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+37	val_37	2008-04-08	12
+37	val_37	2008-04-08	12
+41	val_41	2008-04-08	12
+42	val_42	2008-04-08	12
+42	val_42	2008-04-08	12
+43	val_43	2008-04-08	12
+44	val_44	2008-04-08	12
+47	val_47	2008-04-08	12
+51	val_51	2008-04-08	12
+51	val_51	2008-04-08	12
+53	val_53	2008-04-08	12
+54	val_54	2008-04-08	12
+57	val_57	2008-04-08	12
+58	val_58	2008-04-08	12
+58	val_58	2008-04-08	12
+64	val_64	2008-04-08	12
+65	val_65	2008-04-08	12
+66	val_66	2008-04-08	12
+67	val_67	2008-04-08	12
+67	val_67	2008-04-08	12
+69	val_69	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+72	val_72	2008-04-08	12
+72	val_72	2008-04-08	12
+74	val_74	2008-04-08	12
+76	val_76	2008-04-08	12
+76	val_76	2008-04-08	12
+77	val_77	2008-04-08	12
+78	val_78	2008-04-08	12
+80	val_80	2008-04-08	12
+82	val_82	2008-04-08	12
+83	val_83	2008-04-08	12
+83	val_83	2008-04-08	12
+84	val_84	2008-04-08	12
+84	val_84	2008-04-08	12
+85	val_85	2008-04-08	12
+86	val_86	2008-04-08	12
+87	val_87	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+92	val_92	2008-04-08	12
+95	val_95	2008-04-08	12
+95	val_95	2008-04-08	12
+96	val_96	2008-04-08	12
+97	val_97	2008-04-08	12
+97	val_97	2008-04-08	12
+98	val_98	2008-04-08	12
+98	val_98	2008-04-08	12
+100	val_100	2008-04-08	12
+100	val_100	2008-04-08	12
+103	val_103	2008-04-08	12
+103	val_103	2008-04-08	12
+104	val_104	2008-04-08	12
+104	val_104	2008-04-08	12
+105	val_105	2008-04-08	12
+111	val_111	2008-04-08	12
+113	val_113	2008-04-08	12
+113	val_113	2008-04-08	12
+114	val_114	2008-04-08	12
+116	val_116	2008-04-08	12
+118	val_118	2008-04-08	12
+118	val_118	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+120	val_120	2008-04-08	12
+120	val_120	2008-04-08	12
+125	val_125	2008-04-08	12
+125	val_125	2008-04-08	12
+126	val_126	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+129	val_129	2008-04-08	12
+129	val_129	2008-04-08	12
+131	val_131	2008-04-08	12
+133	val_133	2008-04-08	12
+134	val_134	2008-04-08	12
+134	val_134	2008-04-08	12
+136	val_136	2008-04-08	12
+137	val_137	2008-04-08	12
+137	val_137	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+143	val_143	2008-04-08	12
+145	val_145	2008-04-08	12
+146	val_146	2008-04-08	12
+146	val_146	2008-04-08	12
+149	val_149	2008-04-08	12
+149	val_149	2008-04-08	12
+150	val_150	2008-04-08	12
+152	val_152	2008-04-08	12
+152	val_152	2008-04-08	12
+153	val_153	2008-04-08	12
+155	val_155	2008-04-08	12
+156	val_156	2008-04-08	12
+157	val_157	2008-04-08	12
+158	val_158	2008-04-08	12
+160	val_160	2008-04-08	12
+162	val_162	2008-04-08	12
+163	val_163	2008-04-08	12
+164	val_164	2008-04-08	12
+164	val_164	2008-04-08	12
+165	val_165	2008-04-08	12
+165	val_165	2008-04-08	12
+166	val_166	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+168	val_168	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+170	val_170	2008-04-08	12
+172	val_172	2008-04-08	12
+172	val_172	2008-04-08	12
+174	val_174	2008-04-08	12
+174	val_174	2008-04-08	12
+175	val_175	2008-04-08	12
+175	val_175	2008-04-08	12
+176	val_176	2008-04-08	12
+176	val_176	2008-04-08	12
+177	val_177	2008-04-08	12
+178	val_178	2008-04-08	12
+179	val_179	2008-04-08	12
+179	val_179	2008-04-08	12
+180	val_180	2008-04-08	12
+181	val_181	2008-04-08	12
+183	val_183	2008-04-08	12
+186	val_186	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+189	val_189	2008-04-08	12
+190	val_190	2008-04-08	12
+191	val_191	2008-04-08	12
+191	val_191	2008-04-08	12
+192	val_192	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+194	val_194	2008-04-08	12
+195	val_195	2008-04-08	12
+195	val_195	2008-04-08	12
+196	val_196	2008-04-08	12
+197	val_197	2008-04-08	12
+197	val_197	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+200	val_200	2008-04-08	12
+200	val_200	2008-04-08	12
+201	val_201	2008-04-08	12
+202	val_202	2008-04-08	12
+203	val_203	2008-04-08	12
+203	val_203	2008-04-08	12
+205	val_205	2008-04-08	12
+205	val_205	2008-04-08	12
+207	val_207	2008-04-08	12
+207	val_207	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+209	val_209	2008-04-08	12
+209	val_209	2008-04-08	12
+213	val_213	2008-04-08	12
+213	val_213	2008-04-08	12
+214	val_214	2008-04-08	12
+216	val_216	2008-04-08	12
+216	val_216	2008-04-08	12
+217	val_217	2008-04-08	12
+217	val_217	2008-04-08	12
+218	val_218	2008-04-08	12
+219	val_219	2008-04-08	12
+219	val_219	2008-04-08	12
+221	val_221	2008-04-08	12
+221	val_221	2008-04-08	12
+222	val_222	2008-04-08	12
+223	val_223	2008-04-08	12
+223	val_223	2008-04-08	12
+224	val_224	2008-04-08	12
+224	val_224	2008-04-08	12
+226	val_226	2008-04-08	12
+228	val_228	2008-04-08	12
+229	val_229	2008-04-08	12
+229	val_229	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+233	val_233	2008-04-08	12
+233	val_233	2008-04-08	12
+235	val_235	2008-04-08	12
+237	val_237	2008-04-08	12
+237	val_237	2008-04-08	12
+238	val_238	2008-04-08	12
+238	val_238	2008-04-08	12
+239	val_239	2008-04-08	12
+239	val_239	2008-04-08	12
+241	val_241	2008-04-08	12
+242	val_242	2008-04-08	12
+242	val_242	2008-04-08	12
+244	val_244	2008-04-08	12
+247	val_247	2008-04-08	12
+248	val_248	2008-04-08	12
+249	val_249	2008-04-08	12
+252	val_252	2008-04-08	12
+255	val_255	2008-04-08	12
+255	val_255	2008-04-08	12
+256	val_256	2008-04-08	12
+256	val_256	2008-04-08	12
+257	val_257	2008-04-08	12
+258	val_258	2008-04-08	12
+260	val_260	2008-04-08	12
+262	val_262	2008-04-08	12
+263	val_263	2008-04-08	12
+265	val_265	2008-04-08	12
+265	val_265	2008-04-08	12
+266	val_266	2008-04-08	12
+272	val_272	2008-04-08	12
+272	val_272	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+274	val_274	2008-04-08	12
+275	val_275	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+278	val_278	2008-04-08	12
+278	val_278	2008-04-08	12
+280	val_280	2008-04-08	12
+280	val_280	2008-04-08	12
+281	val_281	2008-04-08	12
+281	val_281	2008-04-08	12
+282	val_282	2008-04-08	12
+282	val_282	2008-04-08	12
+283	val_283	2008-04-08	12
+284	val_284	2008-04-08	12
+285	val_285	2008-04-08	12
+286	val_286	2008-04-08	12
+287	val_287	2008-04-08	12
+288	val_288	2008-04-08	12
+288	val_288	2008-04-08	12
+289	val_289	2008-04-08	12
+291	val_291	2008-04-08	12
+292	val_292	2008-04-08	12
+296	val_296	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+302	val_302	2008-04-08	12
+305	val_305	2008-04-08	12
+306	val_306	2008-04-08	12
+307	val_307	2008-04-08	12
+307	val_307	2008-04-08	12
+308	val_308	2008-04-08	12
+309	val_309	2008-04-08	12
+309	val_309	2008-04-08	12
+310	val_310	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+315	val_315	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+317	val_317	2008-04-08	12
+317	val_317	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+321	val_321	2008-04-08	12
+321	val_321	2008-04-08	12
+322	val_322	2008-04-08	12
+322	val_322	2008-04-08	12
+323	val_323	2008-04-08	12
+325	val_325	2008-04-08	12
+325	val_325	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+331	val_331	2008-04-08	12
+331	val_331	2008-04-08	12
+332	val_332	2008-04-08	12
+333	val_333	2008-04-08	12
+333	val_333	2008-04-08	12
+335	val_335	2008-04-08	12
+336	val_336	2008-04-08	12
+338	val_338	2008-04-08	12
+339	val_339	2008-04-08	12
+341	val_341	2008-04-08	12
+342	val_342	2008-04-08	12
+342	val_342	2008-04-08	12
+344	val_344	2008-04-08	12
+344	val_344	2008-04-08	12
+345	val_345	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+351	val_351	2008-04-08	12
+353	val_353	2008-04-08	12
+353	val_353	2008-04-08	12
+356	val_356	2008-04-08	12
+360	val_360	2008-04-08	12
+362	val_362	2008-04-08	12
+364	val_364	2008-04-08	12
+365	val_365	2008-04-08	12
+366	val_366	2008-04-08	12
+367	val_367	2008-04-08	12
+367	val_367	2008-04-08	12
+368	val_368	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+373	val_373	2008-04-08	12
+374	val_374	2008-04-08	12
+375	val_375	2008-04-08	12
+377	val_377	2008-04-08	12
+378	val_378	2008-04-08	12
+379	val_379	2008-04-08	12
+382	val_382	2008-04-08	12
+382	val_382	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+386	val_386	2008-04-08	12
+389	val_389	2008-04-08	12
+392	val_392	2008-04-08	12
+393	val_393	2008-04-08	12
+394	val_394	2008-04-08	12
+395	val_395	2008-04-08	12
+395	val_395	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+397	val_397	2008-04-08	12
+397	val_397	2008-04-08	12
+399	val_399	2008-04-08	12
+399	val_399	2008-04-08	12
+400	val_400	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+402	val_402	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+404	val_404	2008-04-08	12
+404	val_404	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+407	val_407	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+411	val_411	2008-04-08	12
+413	val_413	2008-04-08	12
+413	val_413	2008-04-08	12
+414	val_414	2008-04-08	12
+414	val_414	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+418	val_418	2008-04-08	12
+419	val_419	2008-04-08	12
+421	val_421	2008-04-08	12
+424	val_424	2008-04-08	12
+424	val_424	2008-04-08	12
+427	val_427	2008-04-08	12
+429	val_429	2008-04-08	12
+429	val_429	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+432	val_432	2008-04-08	12
+435	val_435	2008-04-08	12
+436	val_436	2008-04-08	12
+437	val_437	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+439	val_439	2008-04-08	12
+439	val_439	2008-04-08	12
+443	val_443	2008-04-08	12
+444	val_444	2008-04-08	12
+446	val_446	2008-04-08	12
+448	val_448	2008-04-08	12
+449	val_449	2008-04-08	12
+452	val_452	2008-04-08	12
+453	val_453	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+455	val_455	2008-04-08	12
+457	val_457	2008-04-08	12
+458	val_458	2008-04-08	12
+458	val_458	2008-04-08	12
+459	val_459	2008-04-08	12
+459	val_459	2008-04-08	12
+460	val_460	2008-04-08	12
+462	val_462	2008-04-08	12
+462	val_462	2008-04-08	12
+463	val_463	2008-04-08	12
+463	val_463	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+467	val_467	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+470	val_470	2008-04-08	12
+472	val_472	2008-04-08	12
+475	val_475	2008-04-08	12
+477	val_477	2008-04-08	12
+478	val_478	2008-04-08	12
+478	val_478	2008-04-08	12
+479	val_479	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+481	val_481	2008-04-08	12
+482	val_482	2008-04-08	12
+483	val_483	2008-04-08	12
+484	val_484	2008-04-08	12
+485	val_485	2008-04-08	12
+487	val_487	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+490	val_490	2008-04-08	12
+491	val_491	2008-04-08	12
+492	val_492	2008-04-08	12
+492	val_492	2008-04-08	12
+493	val_493	2008-04-08	12
+494	val_494	2008-04-08	12
+495	val_495	2008-04-08	12
+496	val_496	2008-04-08	12
+497	val_497	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+0	val_0	2008-04-08	existing_value
+0	val_0	2008-04-08	existing_value
+0	val_0	2008-04-08	existing_value
+2	val_2	2008-04-08	existing_value
+4	val_4	2008-04-08	existing_value
+5	val_5	2008-04-08	existing_value
+5	val_5	2008-04-08	existing_value
+5	val_5	2008-04-08	existing_value
+8	val_8	2008-04-08	existing_value
+9	val_9	2008-04-08	existing_value
+10	val_10	2008-04-08	existing_value
+11	val_11	2008-04-08	existing_value
+12	val_12	2008-04-08	existing_value
+12	val_12	2008-04-08	existing_value
+15	val_15	2008-04-08	existing_value
+15	val_15	2008-04-08	existing_value
+17	val_17	2008-04-08	existing_value
+18	val_18	2008-04-08	existing_value
+18	val_18	2008-04-08	existing_value
+19	val_19	2008-04-08	existing_value
+20	val_20	2008-04-08	existing_value
+24	val_24	2008-04-08	existing_value
+24	val_24	2008-04-08	existing_value
+26	val_26	2008-04-08	existing_value
+26	val_26	2008-04-08	existing_value
+27	val_27	2008-04-08	existing_value
+28	val_28	2008-04-08	existing_value
+30	val_30	2008-04-08	existing_value
+33	val_33	2008-04-08	existing_value
+34	val_34	2008-04-08	existing_value
+35	val_35	2008-04-08	existing_value
+35	val_35	2008-04-08	existing_value
+35	val_35	2008-04-08	existing_value
+37	val_37	2008-04-08	existing_value
+37	val_37	2008-04-08	existing_value
+41	val_41	2008-04-08	existing_value
+42	val_42	2008-04-08	existing_value
+42	val_42	2008-04-08	existing_value
+43	val_43	2008-04-08	existing_value
+44	val_44	2008-04-08	existing_value
+47	val_47	2008-04-08	existing_value
+51	val_51	2008-04-08	existing_value
+51	val_51	2008-04-08	existing_value
+53	val_53	2008-04-08	existing_value
+54	val_54	2008-04-08	existing_value
+57	val_57	2008-04-08	existing_value
+58	val_58	2008-04-08	existing_value
+58	val_58	2008-04-08	existing_value
+64	val_64	2008-04-08	existing_value
+65	val_65	2008-04-08	existing_value
+66	val_66	2008-04-08	existing_value
+67	val_67	2008-04-08	existing_value
+67	val_67	2008-04-08	existing_value
+69	val_69	2008-04-08	existing_value
+70	val_70	2008-04-08	existing_value
+70	val_70	2008-04-08	existing_value
+70	val_70	2008-04-08	existing_value
+72	val_72	2008-04-08	existing_value
+72	val_72	2008-04-08	existing_value
+74	val_74	2008-04-08	existing_value
+76	val_76	2008-04-08	existing_value
+76	val_76	2008-04-08	existing_value
+77	val_77	2008-04-08	existing_value
+78	val_78	2008-04-08	existing_value
+80	val_80	2008-04-08	existing_value
+82	val_82	2008-04-08	existing_value
+83	val_83	2008-04-08	existing_value
+83	val_83	2008-04-08	existing_value
+84	val_84	2008-04-08	existing_value
+84	val_84	2008-04-08	existing_value
+85	val_85	2008-04-08	existing_value
+86	val_86	2008-04-08	existing_value
+87	val_87	2008-04-08	existing_value
+90	val_90	2008-04-08	existing_value
+90	val_90	2008-04-08	existing_value
+90	val_90	2008-04-08	existing_value
+92	val_92	2008-04-08	existing_value
+95	val_95	2008-04-08	existing_value
+95	val_95	2008-04-08	existing_value
+96	val_96	2008-04-08	existing_value
+97	val_97	2008-04-08	existing_value
+97	val_97	2008-04-08	existing_value
+98	val_98	2008-04-08	existing_value
+98	val_98	2008-04-08	existing_value
+100	val_100	2008-04-08	existing_value
+100	val_100	2008-04-08	existing_value
+103	val_103	2008-04-08	existing_value
+103	val_103	2008-04-08	existing_value
+104	val_104	2008-04-08	existing_value
+104	val_104	2008-04-08	existing_value
+105	val_105	2008-04-08	existing_value
+111	val_111	2008-04-08	existing_value
+113	val_113	2008-04-08	existing_value
+113	val_113	2008-04-08	existing_value
+114	val_114	2008-04-08	existing_value
+116	val_116	2008-04-08	existing_value
+118	val_118	2008-04-08	existing_value
+118	val_118	2008-04-08	existing_value
+119	val_119	2008-04-08	existing_value
+119	val_119	2008-04-08	existing_value
+119	val_119	2008-04-08	existing_value
+120	val_120	2008-04-08	existing_value
+120	val_120	2008-04-08	existing_value
+125	val_125	2008-04-08	existing_value
+125	val_125	2008-04-08	existing_value
+126	val_126	2008-04-08	existing_value
+128	val_128	2008-04-08	existing_value
+128	val_128	2008-04-08	existing_value
+128	val_128	2008-04-08	existing_value
+129	val_129	2008-04-08	existing_value
+129	val_129	2008-04-08	existing_value
+131	val_131	2008-04-08	existing_value
+133	val_133	2008-04-08	existing_value
+134	val_134	2008-04-08	existing_value
+134	val_134	2008-04-08	existing_value
+136	val_136	2008-04-08	existing_value
+137	val_137	2008-04-08	existing_value
+137	val_137	2008-04-08	existing_value
+138	val_138	2008-04-08	existing_value
+138	val_138	2008-04-08	existing_value
+138	val_138	2008-04-08	existing_value
+138	val_138	2008-04-08	existing_value
+143	val_143	2008-04-08	existing_value
+145	val_145	2008-04-08	existing_value
+146	val_146	2008-04-08	existing_value
+146	val_146	2008-04-08	existing_value
+149	val_149	2008-04-08	existing_value
+149	val_149	2008-04-08	existing_value
+150	val_150	2008-04-08	existing_value
+152	val_152	2008-04-08	existing_value
+152	val_152	2008-04-08	existing_value
+153	val_153	2008-04-08	existing_value
+155	val_155	2008-04-08	existing_value
+156	val_156	2008-04-08	existing_value
+157	val_157	2008-04-08	existing_value
+158	val_158	2008-04-08	existing_value
+160	val_160	2008-04-08	existing_value
+162	val_162	2008-04-08	existing_value
+163	val_163	2008-04-08	existing_value
+164	val_164	2008-04-08	existing_value
+164	val_164	2008-04-08	existing_value
+165	val_165	2008-04-08	existing_value
+165	val_165	2008-04-08	existing_value
+166	val_166	2008-04-08	existing_value
+167	val_167	2008-04-08	existing_value
+167	val_167	2008-04-08	existing_value
+167	val_167	2008-04-08	existing_value
+168	val_168	2008-04-08	existing_value
+169	val_169	2008-04-08	existing_value
+169	val_169	2008-04-08	existing_value
+169	val_169	2008-04-08	existing_value
+169	val_169	2008-04-08	existing_value
+170	val_170	2008-04-08	existing_value
+172	val_172	2008-04-08	existing_value
+172	val_172	2008-04-08	existing_value
+174	val_174	2008-04-08	existing_value
+174	val_174	2008-04-08	existing_value
+175	val_175	2008-04-08	existing_value
+175	val_175	2008-04-08	existing_value
+176	val_176	2008-04-08	existing_value
+176	val_176	2008-04-08	existing_value
+177	val_177	2008-04-08	existing_value
+178	val_178	2008-04-08	existing_value
+179	val_179	2008-04-08	existing_value
+179	val_179	2008-04-08	existing_value
+180	val_180	2008-04-08	existing_value
+181	val_181	2008-04-08	existing_value
+183	val_183	2008-04-08	existing_value
+186	val_186	2008-04-08	existing_value
+187	val_187	2008-04-08	existing_value
+187	val_187	2008-04-08	existing_value
+187	val_187	2008-04-08	existing_value
+189	val_189	2008-04-08	existing_value
+190	val_190	2008-04-08	existing_value
+191	val_191	2008-04-08	existing_value
+191	val_191	2008-04-08	existing_value
+192	val_192	2008-04-08	existing_value
+193	val_193	2008-04-08	existing_value
+193	val_193	2008-04-08	existing_value
+193	val_193	2008-04-08	existing_value
+194	val_194	2008-04-08	existing_value
+195	val_195	2008-04-08	existing_value
+195	val_195	2008-04-08	existing_value
+196	val_196	2008-04-08	existing_value
+197	val_197	2008-04-08	existing_value
+197	val_197	2008-04-08	existing_value
+199	val_199	2008-04-08	existing_value
+199	val_199	2008-04-08	existing_value
+199	val_199	2008-04-08	existing_value
+200	val_200	2008-04-08	existing_value
+200	val_200	2008-04-08	existing_value
+201	val_201	2008-04-08	existing_value
+202	val_202	2008-04-08	existing_value
+203	val_203	2008-04-08	existing_value
+203	val_203	2008-04-08	existing_value
+205	val_205	2008-04-08	existing_value
+205	val_205	2008-04-08	existing_value
+207	val_207	2008-04-08	existing_value
+207	val_207	2008-04-08	existing_value
+208	val_208	2008-04-08	existing_value
+208	val_208	2008-04-08	existing_value
+208	val_208	2008-04-08	existing_value
+209	val_209	2008-04-08	existing_value
+209	val_209	2008-04-08	existing_value
+213	val_213	2008-04-08	existing_value
+213	val_213	2008-04-08	existing_value
+214	val_214	2008-04-08	existing_value
+216	val_216	2008-04-08	existing_value
+216	val_216	2008-04-08	existing_value
+217	val_217	2008-04-08	existing_value
+217	val_217	2008-04-08	existing_value
+218	val_218	2008-04-08	existing_value
+219	val_219	2008-04-08	existing_value
+219	val_219	2008-04-08	existing_value
+221	val_221	2008-04-08	existing_value
+221	val_221	2008-04-08	existing_value
+222	val_222	2008-04-08	existing_value
+223	val_223	2008-04-08	existing_value
+223	val_223	2008-04-08	existing_value
+224	val_224	2008-04-08	existing_value
+224	val_224	2008-04-08	existing_value
+226	val_226	2008-04-08	existing_value
+228	val_228	2008-04-08	existing_value
+229	val_229	2008-04-08	existing_value
+229	val_229	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+233	val_233	2008-04-08	existing_value
+233	val_233	2008-04-08	existing_value
+235	val_235	2008-04-08	existing_value
+237	val_237	2008-04-08	existing_value
+237	val_237	2008-04-08	existing_value
+238	val_238	2008-04-08	existing_value
+238	val_238	2008-04-08	existing_value
+239	val_239	2008-04-08	existing_value
+239	val_239	2008-04-08	existing_value
+241	val_241	2008-04-08	existing_value
+242	val_242	2008-04-08	existing_value
+242	val_242	2008-04-08	existing_value
+244	val_244	2008-04-08	existing_value
+247	val_247	2008-04-08	existing_value
+248	val_248	2008-04-08	existing_value
+249	val_249	2008-04-08	existing_value
+252	val_252	2008-04-08	existing_value
+255	val_255	2008-04-08	existing_value
+255	val_255	2008-04-08	existing_value
+256	val_256	2008-04-08	existing_value
+256	val_256	2008-04-08	existing_value
+257	val_257	2008-04-08	existing_value
+258	val_258	2008-04-08	existing_value
+260	val_260	2008-04-08	existing_value
+262	val_262	2008-04-08	existing_value
+263	val_263	2008-04-08	existing_value
+265	val_265	2008-04-08	existing_value
+265	val_265	2008-04-08	existing_value
+266	val_266	2008-04-08	existing_value
+272	val_272	2008-04-08	existing_value
+272	val_272	2008-04-08	existing_value
+273	val_273	2008-04-08	existing_value
+273	val_273	2008-04-08	existing_value
+273	val_273	2008-04-08	existing_value
+274	val_274	2008-04-08	existing_value
+275	val_275	2008-04-08	existing_value
+277	val_277	2008-04-08	existing_value
+277	val_277	2008-04-08	existing_value
+277	val_277	2008-04-08	existing_value
+277	val_277	2008-04-08	existing_value
+278	val_278	2008-04-08	existing_value
+278	val_278	2008-04-08	existing_value
+280	val_280	2008-04-08	existing_value
+280	val_280	2008-04-08	existing_value
+281	val_281	2008-04-08	existing_value
+281	val_281	2008-04-08	existing_value
+282	val_282	2008-04-08	existing_value
+282	val_282	2008-04-08	existing_value
+283	val_283	2008-04-08	existing_value
+284	val_284	2008-04-08	existing_value
+285	val_285	2008-04-08	existing_value
+286	val_286	2008-04-08	existing_value
+287	val_287	2008-04-08	existing_value
+288	val_288	2008-04-08	existing_value
+288	val_288	2008-04-08	existing_value
+289	val_289	2008-04-08	existing_value
+291	val_291	2008-04-08	existing_value
+292	val_292	2008-04-08	existing_value
+296	val_296	2008-04-08	existing_value
+298	val_298	2008-04-08	existing_value
+298	val_298	2008-04-08	existing_value
+298	val_298	2008-04-08	existing_value
+302	val_302	2008-04-08	existing_value
+305	val_305	2008-04-08	existing_value
+306	val_306	2008-04-08	existing_value
+307	val_307	2008-04-08	existing_value
+307	val_307	2008-04-08	existing_value
+308	val_308	2008-04-08	existing_value
+309	val_309	2008-04-08	existing_value
+309	val_309	2008-04-08	existing_value
+310	val_310	2008-04-08	existing_value
+311	val_311	2008-04-08	existing_value
+311	val_311	2008-04-08	existing_value
+311	val_311	2008-04-08	existing_value
+315	val_315	2008-04-08	existing_value
+316	val_316	2008-04-08	existing_value
+316	val_316	2008-04-08	existing_value
+316	val_316	2008-04-08	existing_value
+317	val_317	2008-04-08	existing_value
+317	val_317	2008-04-08	existing_value
+318	val_318	2008-04-08	existing_value
+318	val_318	2008-04-08	existing_value
+318	val_318	2008-04-08	existing_value
+321	val_321	2008-04-08	existing_value
+321	val_321	2008-04-08	existing_value
+322	val_322	2008-04-08	existing_value
+322	val_322	2008-04-08	existing_value
+323	val_323	2008-04-08	existing_value
+325	val_325	2008-04-08	existing_value
+325	val_325	2008-04-08	existing_value
+327	val_327	2008-04-08	existing_value
+327	val_327	2008-04-08	existing_value
+327	val_327	2008-04-08	existing_value
+331	val_331	2008-04-08	existing_value
+331	val_331	2008-04-08	existing_value
+332	val_332	2008-04-08	existing_value
+333	val_333	2008-04-08	existing_value
+333	val_333	2008-04-08	existing_value
+335	val_335	2008-04-08	existing_value
+336	val_336	2008-04-08	existing_value
+338	val_338	2008-04-08	existing_value
+339	val_339	2008-04-08	existing_value
+341	val_341	2008-04-08	existing_value
+342	val_342	2008-04-08	existing_value
+342	val_342	2008-04-08	existing_value
+344	val_344	2008-04-08	existing_value
+344	val_344	2008-04-08	existing_value
+345	val_345	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+351	val_351	2008-04-08	existing_value
+353	val_353	2008-04-08	existing_value
+353	val_353	2008-04-08	existing_value
+356	val_356	2008-04-08	existing_value
+360	val_360	2008-04-08	existing_value
+362	val_362	2008-04-08	existing_value
+364	val_364	2008-04-08	existing_value
+365	val_365	2008-04-08	existing_value
+366	val_366	2008-04-08	existing_value
+367	val_367	2008-04-08	existing_value
+367	val_367	2008-04-08	existing_value
+368	val_368	2008-04-08	existing_value
+369	val_369	2008-04-08	existing_value
+369	val_369	2008-04-08	existing_value
+369	val_369	2008-04-08	existing_value
+373	val_373	2008-04-08	existing_value
+374	val_374	2008-04-08	existing_value
+375	val_375	2008-04-08	existing_value
+377	val_377	2008-04-08	existing_value
+378	val_378	2008-04-08	existing_value
+379	val_379	2008-04-08	existing_value
+382	val_382	2008-04-08	existing_value
+382	val_382	2008-04-08	existing_value
+384	val_384	2008-04-08	existing_value
+384	val_384	2008-04-08	existing_value
+384	val_384	2008-04-08	existing_value
+386	val_386	2008-04-08	existing_value
+389	val_389	2008-04-08	existing_value
+392	val_392	2008-04-08	existing_value
+393	val_393	2008-04-08	existing_value
+394	val_394	2008-04-08	existing_value
+395	val_395	2008-04-08	existing_value
+395	val_395	2008-04-08	existing_value
+396	val_396	2008-04-08	existing_value
+396	val_396	2008-04-08	existing_value
+396	val_396	2008-04-08	existing_value
+397	val_397	2008-04-08	existing_value
+397	val_397	2008-04-08	existing_value
+399	val_399	2008-04-08	existing_value
+399	val_399	2008-04-08	existing_value
+400	val_400	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+402	val_402	2008-04-08	existing_value
+403	val_403	2008-04-08	existing_value
+403	val_403	2008-04-08	existing_value
+403	val_403	2008-04-08	existing_value
+404	val_404	2008-04-08	existing_value
+404	val_404	2008-04-08	existing_value
+406	val_406	2008-04-08	existing_value
+406	val_406	2008-04-08	existing_value
+406	val_406	2008-04-08	existing_value
+406	val_406	2008-04-08	existing_value
+407	val_407	2008-04-08	existing_value
+409	val_409	2008-04-08	existing_value
+409	val_409	2008-04-08	existing_value
+409	val_409	2008-04-08	existing_value
+411	val_411	2008-04-08	existing_value
+413	val_413	2008-04-08	existing_value
+413	val_413	2008-04-08	existing_value
+414	val_414	2008-04-08	existing_value
+414	val_414	2008-04-08	existing_value
+417	val_417	2008-04-08	existing_value
+417	val_417	2008-04-08	existing_value
+417	val_417	2008-04-08	existing_value
+418	val_418	2008-04-08	existing_value
+419	val_419	2008-04-08	existing_value
+421	val_421	2008-04-08	existing_value
+424	val_424	2008-04-08	existing_value
+424	val_424	2008-04-08	existing_value
+427	val_427	2008-04-08	existing_value
+429	val_429	2008-04-08	existing_value
+429	val_429	2008-04-08	existing_value
+430	val_430	2008-04-08	existing_value
+430	val_430	2008-04-08	existing_value
+430	val_430	2008-04-08	existing_value
+431	val_431	2008-04-08	existing_value
+431	val_431	2008-04-08	existing_value
+431	val_431	2008-04-08	existing_value
+432	val_432	2008-04-08	existing_value
+435	val_435	2008-04-08	existing_value
+436	val_436	2008-04-08	existing_value
+437	val_437	2008-04-08	existing_value
+438	val_438	2008-04-08	existing_value
+438	val_438	2008-04-08	existing_value
+438	val_438	2008-04-08	existing_value
+439	val_439	2008-04-08	existing_value
+439	val_439	2008-04-08	existing_value
+443	val_443	2008-04-08	existing_value
+444	val_444	2008-04-08	existing_value
+446	val_446	2008-04-08	existing_value
+448	val_448	2008-04-08	existing_value
+449	val_449	2008-04-08	existing_value
+452	val_452	2008-04-08	existing_value
+453	val_453	2008-04-08	existing_value
+454	val_454	2008-04-08	existing_value
+454	val_454	2008-04-08	existing_value
+454	val_454	2008-04-08	existing_value
+455	val_455	2008-04-08	existing_value
+457	val_457	2008-04-08	existing_value
+458	val_458	2008-04-08	existing_value
+458	val_458	2008-04-08	existing_value
+459	val_459	2008-04-08	existing_value
+459	val_459	2008-04-08	existing_value
+460	val_460	2008-04-08	existing_value
+462	val_462	2008-04-08	existing_value
+462	val_462	2008-04-08	existing_value
+463	val_463	2008-04-08	existing_value
+463	val_463	2008-04-08	existing_value
+466	val_466	2008-04-08	existing_value
+466	val_466	2008-04-08	existing_value
+466	val_466	2008-04-08	existing_value
+467	val_467	2008-04-08	existing_value
+468	val_468	2008-04-08	existing_value
+468	val_468	2008-04-08	existing_value
+468	val_468	2008-04-08	existing_value
+468	val_468	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+470	val_470	2008-04-08	existing_value
+472	val_472	2008-04-08	existing_value
+475	val_475	2008-04-08	existing_value
+477	val_477	2008-04-08	existing_value
+478	val_478	2008-04-08	existing_value
+478	val_478	2008-04-08	existing_value
+479	val_479	2008-04-08	existing_value
+480	val_480	2008-04-08	existing_value
+480	val_480	2008-04-08	existing_value
+480	val_480	2008-04-08	existing_value
+481	val_481	2008-04-08	existing_value
+482	val_482	2008-04-08	existing_value
+483	val_483	2008-04-08	existing_value
+484	val_484	2008-04-08	existing_value
+485	val_485	2008-04-08	existing_value
+487	val_487	2008-04-08	existing_value
+489	val_489	2008-04-08	existing_value
+489	val_489	2008-04-08	existing_value
+489	val_489	2008-04-08	existing_value
+489	val_489	2008-04-08	existing_value
+490	val_490	2008-04-08	existing_value
+491	val_491	2008-04-08	existing_value
+492	val_492	2008-04-08	existing_value
+492	val_492	2008-04-08	existing_value
+493	val_493	2008-04-08	existing_value
+494	val_494	2008-04-08	existing_value
+495	val_495	2008-04-08	existing_value
+496	val_496	2008-04-08	existing_value
+497	val_497	2008-04-08	existing_value
+498	val_498	2008-04-08	existing_value
+498	val_498	2008-04-08	existing_value
+498	val_498	2008-04-08	existing_value
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part4-10-7f0c1e54518d4d1d345577a100e670e8 b/sql/hive/src/test/resources/golden/load_dyn_part4-10-7f0c1e54518d4d1d345577a100e670e8
deleted file mode 100644
index 3bbd322e374ff..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part4-10-7f0c1e54518d4d1d345577a100e670e8
+++ /dev/null
@@ -1,1500 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
-265	val_265	2008-04-08	11
-193	val_193	2008-04-08	11
-401	val_401	2008-04-08	11
-150	val_150	2008-04-08	11
-273	val_273	2008-04-08	11
-224	val_224	2008-04-08	11
-369	val_369	2008-04-08	11
-66	val_66	2008-04-08	11
-128	val_128	2008-04-08	11
-213	val_213	2008-04-08	11
-146	val_146	2008-04-08	11
-406	val_406	2008-04-08	11
-429	val_429	2008-04-08	11
-374	val_374	2008-04-08	11
-152	val_152	2008-04-08	11
-469	val_469	2008-04-08	11
-145	val_145	2008-04-08	11
-495	val_495	2008-04-08	11
-37	val_37	2008-04-08	11
-327	val_327	2008-04-08	11
-281	val_281	2008-04-08	11
-277	val_277	2008-04-08	11
-209	val_209	2008-04-08	11
-15	val_15	2008-04-08	11
-82	val_82	2008-04-08	11
-403	val_403	2008-04-08	11
-166	val_166	2008-04-08	11
-417	val_417	2008-04-08	11
-430	val_430	2008-04-08	11
-252	val_252	2008-04-08	11
-292	val_292	2008-04-08	11
-219	val_219	2008-04-08	11
-287	val_287	2008-04-08	11
-153	val_153	2008-04-08	11
-193	val_193	2008-04-08	11
-338	val_338	2008-04-08	11
-446	val_446	2008-04-08	11
-459	val_459	2008-04-08	11
-394	val_394	2008-04-08	11
-237	val_237	2008-04-08	11
-482	val_482	2008-04-08	11
-174	val_174	2008-04-08	11
-413	val_413	2008-04-08	11
-494	val_494	2008-04-08	11
-207	val_207	2008-04-08	11
-199	val_199	2008-04-08	11
-466	val_466	2008-04-08	11
-208	val_208	2008-04-08	11
-174	val_174	2008-04-08	11
-399	val_399	2008-04-08	11
-396	val_396	2008-04-08	11
-247	val_247	2008-04-08	11
-417	val_417	2008-04-08	11
-489	val_489	2008-04-08	11
-162	val_162	2008-04-08	11
-377	val_377	2008-04-08	11
-397	val_397	2008-04-08	11
-309	val_309	2008-04-08	11
-365	val_365	2008-04-08	11
-266	val_266	2008-04-08	11
-439	val_439	2008-04-08	11
-342	val_342	2008-04-08	11
-367	val_367	2008-04-08	11
-325	val_325	2008-04-08	11
-167	val_167	2008-04-08	11
-195	val_195	2008-04-08	11
-475	val_475	2008-04-08	11
-17	val_17	2008-04-08	11
-113	val_113	2008-04-08	11
-155	val_155	2008-04-08	11
-203	val_203	2008-04-08	11
-339	val_339	2008-04-08	11
-0	val_0	2008-04-08	11
-455	val_455	2008-04-08	11
-128	val_128	2008-04-08	11
-311	val_311	2008-04-08	11
-316	val_316	2008-04-08	11
-57	val_57	2008-04-08	11
-302	val_302	2008-04-08	11
-205	val_205	2008-04-08	11
-149	val_149	2008-04-08	11
-438	val_438	2008-04-08	11
-345	val_345	2008-04-08	11
-129	val_129	2008-04-08	11
-170	val_170	2008-04-08	11
-20	val_20	2008-04-08	11
-489	val_489	2008-04-08	11
-157	val_157	2008-04-08	11
-378	val_378	2008-04-08	11
-221	val_221	2008-04-08	11
-92	val_92	2008-04-08	11
-111	val_111	2008-04-08	11
-47	val_47	2008-04-08	11
-72	val_72	2008-04-08	11
-4	val_4	2008-04-08	11
-280	val_280	2008-04-08	11
-35	val_35	2008-04-08	11
-427	val_427	2008-04-08	11
-277	val_277	2008-04-08	11
-208	val_208	2008-04-08	11
-356	val_356	2008-04-08	11
-399	val_399	2008-04-08	11
-169	val_169	2008-04-08	11
-382	val_382	2008-04-08	11
-498	val_498	2008-04-08	11
-125	val_125	2008-04-08	11
-386	val_386	2008-04-08	11
-437	val_437	2008-04-08	11
-469	val_469	2008-04-08	11
-192	val_192	2008-04-08	11
-286	val_286	2008-04-08	11
-187	val_187	2008-04-08	11
-176	val_176	2008-04-08	11
-54	val_54	2008-04-08	11
-459	val_459	2008-04-08	11
-51	val_51	2008-04-08	11
-138	val_138	2008-04-08	11
-103	val_103	2008-04-08	11
-239	val_239	2008-04-08	11
-213	val_213	2008-04-08	11
-216	val_216	2008-04-08	11
-430	val_430	2008-04-08	11
-278	val_278	2008-04-08	11
-176	val_176	2008-04-08	11
-289	val_289	2008-04-08	11
-221	val_221	2008-04-08	11
-65	val_65	2008-04-08	11
-318	val_318	2008-04-08	11
-332	val_332	2008-04-08	11
-311	val_311	2008-04-08	11
-275	val_275	2008-04-08	11
-137	val_137	2008-04-08	11
-241	val_241	2008-04-08	11
-83	val_83	2008-04-08	11
-333	val_333	2008-04-08	11
-180	val_180	2008-04-08	11
-284	val_284	2008-04-08	11
-12	val_12	2008-04-08	11
-230	val_230	2008-04-08	11
-181	val_181	2008-04-08	11
-67	val_67	2008-04-08	11
-260	val_260	2008-04-08	11
-404	val_404	2008-04-08	11
-384	val_384	2008-04-08	11
-489	val_489	2008-04-08	11
-353	val_353	2008-04-08	11
-373	val_373	2008-04-08	11
-272	val_272	2008-04-08	11
-138	val_138	2008-04-08	11
-217	val_217	2008-04-08	11
-84	val_84	2008-04-08	11
-348	val_348	2008-04-08	11
-466	val_466	2008-04-08	11
-58	val_58	2008-04-08	11
-8	val_8	2008-04-08	11
-411	val_411	2008-04-08	11
-230	val_230	2008-04-08	11
-208	val_208	2008-04-08	11
-348	val_348	2008-04-08	11
-24	val_24	2008-04-08	11
-463	val_463	2008-04-08	11
-431	val_431	2008-04-08	11
-179	val_179	2008-04-08	11
-172	val_172	2008-04-08	11
-42	val_42	2008-04-08	11
-129	val_129	2008-04-08	11
-158	val_158	2008-04-08	11
-119	val_119	2008-04-08	11
-496	val_496	2008-04-08	11
-0	val_0	2008-04-08	11
-322	val_322	2008-04-08	11
-197	val_197	2008-04-08	11
-468	val_468	2008-04-08	11
-393	val_393	2008-04-08	11
-454	val_454	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-199	val_199	2008-04-08	11
-191	val_191	2008-04-08	11
-418	val_418	2008-04-08	11
-96	val_96	2008-04-08	11
-26	val_26	2008-04-08	11
-165	val_165	2008-04-08	11
-327	val_327	2008-04-08	11
-230	val_230	2008-04-08	11
-205	val_205	2008-04-08	11
-120	val_120	2008-04-08	11
-131	val_131	2008-04-08	11
-51	val_51	2008-04-08	11
-404	val_404	2008-04-08	11
-43	val_43	2008-04-08	11
-436	val_436	2008-04-08	11
-156	val_156	2008-04-08	11
-469	val_469	2008-04-08	11
-468	val_468	2008-04-08	11
-308	val_308	2008-04-08	11
-95	val_95	2008-04-08	11
-196	val_196	2008-04-08	11
-288	val_288	2008-04-08	11
-481	val_481	2008-04-08	11
-457	val_457	2008-04-08	11
-98	val_98	2008-04-08	11
-282	val_282	2008-04-08	11
-197	val_197	2008-04-08	11
-187	val_187	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-409	val_409	2008-04-08	11
-470	val_470	2008-04-08	11
-137	val_137	2008-04-08	11
-369	val_369	2008-04-08	11
-316	val_316	2008-04-08	11
-169	val_169	2008-04-08	11
-413	val_413	2008-04-08	11
-85	val_85	2008-04-08	11
-77	val_77	2008-04-08	11
-0	val_0	2008-04-08	11
-490	val_490	2008-04-08	11
-87	val_87	2008-04-08	11
-364	val_364	2008-04-08	11
-179	val_179	2008-04-08	11
-118	val_118	2008-04-08	11
-134	val_134	2008-04-08	11
-395	val_395	2008-04-08	11
-282	val_282	2008-04-08	11
-138	val_138	2008-04-08	11
-238	val_238	2008-04-08	11
-419	val_419	2008-04-08	11
-15	val_15	2008-04-08	11
-118	val_118	2008-04-08	11
-72	val_72	2008-04-08	11
-90	val_90	2008-04-08	11
-307	val_307	2008-04-08	11
-19	val_19	2008-04-08	11
-435	val_435	2008-04-08	11
-10	val_10	2008-04-08	11
-277	val_277	2008-04-08	11
-273	val_273	2008-04-08	11
-306	val_306	2008-04-08	11
-224	val_224	2008-04-08	11
-309	val_309	2008-04-08	11
-389	val_389	2008-04-08	11
-327	val_327	2008-04-08	11
-242	val_242	2008-04-08	11
-369	val_369	2008-04-08	11
-392	val_392	2008-04-08	11
-272	val_272	2008-04-08	11
-331	val_331	2008-04-08	11
-401	val_401	2008-04-08	11
-242	val_242	2008-04-08	11
-452	val_452	2008-04-08	11
-177	val_177	2008-04-08	11
-226	val_226	2008-04-08	11
-5	val_5	2008-04-08	11
-497	val_497	2008-04-08	11
-402	val_402	2008-04-08	11
-396	val_396	2008-04-08	11
-317	val_317	2008-04-08	11
-395	val_395	2008-04-08	11
-58	val_58	2008-04-08	11
-35	val_35	2008-04-08	11
-336	val_336	2008-04-08	11
-95	val_95	2008-04-08	11
-11	val_11	2008-04-08	11
-168	val_168	2008-04-08	11
-34	val_34	2008-04-08	11
-229	val_229	2008-04-08	11
-233	val_233	2008-04-08	11
-143	val_143	2008-04-08	11
-472	val_472	2008-04-08	11
-322	val_322	2008-04-08	11
-498	val_498	2008-04-08	11
-160	val_160	2008-04-08	11
-195	val_195	2008-04-08	11
-42	val_42	2008-04-08	11
-321	val_321	2008-04-08	11
-430	val_430	2008-04-08	11
-119	val_119	2008-04-08	11
-489	val_489	2008-04-08	11
-458	val_458	2008-04-08	11
-78	val_78	2008-04-08	11
-76	val_76	2008-04-08	11
-41	val_41	2008-04-08	11
-223	val_223	2008-04-08	11
-492	val_492	2008-04-08	11
-149	val_149	2008-04-08	11
-449	val_449	2008-04-08	11
-218	val_218	2008-04-08	11
-228	val_228	2008-04-08	11
-138	val_138	2008-04-08	11
-453	val_453	2008-04-08	11
-30	val_30	2008-04-08	11
-209	val_209	2008-04-08	11
-64	val_64	2008-04-08	11
-468	val_468	2008-04-08	11
-76	val_76	2008-04-08	11
-74	val_74	2008-04-08	11
-342	val_342	2008-04-08	11
-69	val_69	2008-04-08	11
-230	val_230	2008-04-08	11
-33	val_33	2008-04-08	11
-368	val_368	2008-04-08	11
-103	val_103	2008-04-08	11
-296	val_296	2008-04-08	11
-113	val_113	2008-04-08	11
-216	val_216	2008-04-08	11
-367	val_367	2008-04-08	11
-344	val_344	2008-04-08	11
-167	val_167	2008-04-08	11
-274	val_274	2008-04-08	11
-219	val_219	2008-04-08	11
-239	val_239	2008-04-08	11
-485	val_485	2008-04-08	11
-116	val_116	2008-04-08	11
-223	val_223	2008-04-08	11
-256	val_256	2008-04-08	11
-263	val_263	2008-04-08	11
-70	val_70	2008-04-08	11
-487	val_487	2008-04-08	11
-480	val_480	2008-04-08	11
-401	val_401	2008-04-08	11
-288	val_288	2008-04-08	11
-191	val_191	2008-04-08	11
-5	val_5	2008-04-08	11
-244	val_244	2008-04-08	11
-438	val_438	2008-04-08	11
-128	val_128	2008-04-08	11
-467	val_467	2008-04-08	11
-432	val_432	2008-04-08	11
-202	val_202	2008-04-08	11
-316	val_316	2008-04-08	11
-229	val_229	2008-04-08	11
-469	val_469	2008-04-08	11
-463	val_463	2008-04-08	11
-280	val_280	2008-04-08	11
-2	val_2	2008-04-08	11
-35	val_35	2008-04-08	11
-283	val_283	2008-04-08	11
-331	val_331	2008-04-08	11
-235	val_235	2008-04-08	11
-80	val_80	2008-04-08	11
-44	val_44	2008-04-08	11
-193	val_193	2008-04-08	11
-321	val_321	2008-04-08	11
-335	val_335	2008-04-08	11
-104	val_104	2008-04-08	11
-466	val_466	2008-04-08	11
-366	val_366	2008-04-08	11
-175	val_175	2008-04-08	11
-403	val_403	2008-04-08	11
-483	val_483	2008-04-08	11
-53	val_53	2008-04-08	11
-105	val_105	2008-04-08	11
-257	val_257	2008-04-08	11
-406	val_406	2008-04-08	11
-409	val_409	2008-04-08	11
-190	val_190	2008-04-08	11
-406	val_406	2008-04-08	11
-401	val_401	2008-04-08	11
-114	val_114	2008-04-08	11
-258	val_258	2008-04-08	11
-90	val_90	2008-04-08	11
-203	val_203	2008-04-08	11
-262	val_262	2008-04-08	11
-348	val_348	2008-04-08	11
-424	val_424	2008-04-08	11
-12	val_12	2008-04-08	11
-396	val_396	2008-04-08	11
-201	val_201	2008-04-08	11
-217	val_217	2008-04-08	11
-164	val_164	2008-04-08	11
-431	val_431	2008-04-08	11
-454	val_454	2008-04-08	11
-478	val_478	2008-04-08	11
-298	val_298	2008-04-08	11
-125	val_125	2008-04-08	11
-431	val_431	2008-04-08	11
-164	val_164	2008-04-08	11
-424	val_424	2008-04-08	11
-187	val_187	2008-04-08	11
-382	val_382	2008-04-08	11
-5	val_5	2008-04-08	11
-70	val_70	2008-04-08	11
-397	val_397	2008-04-08	11
-480	val_480	2008-04-08	11
-291	val_291	2008-04-08	11
-24	val_24	2008-04-08	11
-351	val_351	2008-04-08	11
-255	val_255	2008-04-08	11
-104	val_104	2008-04-08	11
-70	val_70	2008-04-08	11
-163	val_163	2008-04-08	11
-438	val_438	2008-04-08	11
-119	val_119	2008-04-08	11
-414	val_414	2008-04-08	11
-200	val_200	2008-04-08	11
-491	val_491	2008-04-08	11
-237	val_237	2008-04-08	11
-439	val_439	2008-04-08	11
-360	val_360	2008-04-08	11
-248	val_248	2008-04-08	11
-479	val_479	2008-04-08	11
-305	val_305	2008-04-08	11
-417	val_417	2008-04-08	11
-199	val_199	2008-04-08	11
-444	val_444	2008-04-08	11
-120	val_120	2008-04-08	11
-429	val_429	2008-04-08	11
-169	val_169	2008-04-08	11
-443	val_443	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-277	val_277	2008-04-08	11
-230	val_230	2008-04-08	11
-478	val_478	2008-04-08	11
-178	val_178	2008-04-08	11
-468	val_468	2008-04-08	11
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-333	val_333	2008-04-08	11
-493	val_493	2008-04-08	11
-460	val_460	2008-04-08	11
-207	val_207	2008-04-08	11
-249	val_249	2008-04-08	11
-265	val_265	2008-04-08	11
-480	val_480	2008-04-08	11
-83	val_83	2008-04-08	11
-136	val_136	2008-04-08	11
-353	val_353	2008-04-08	11
-172	val_172	2008-04-08	11
-214	val_214	2008-04-08	11
-462	val_462	2008-04-08	11
-233	val_233	2008-04-08	11
-406	val_406	2008-04-08	11
-133	val_133	2008-04-08	11
-175	val_175	2008-04-08	11
-189	val_189	2008-04-08	11
-454	val_454	2008-04-08	11
-375	val_375	2008-04-08	11
-401	val_401	2008-04-08	11
-421	val_421	2008-04-08	11
-407	val_407	2008-04-08	11
-384	val_384	2008-04-08	11
-256	val_256	2008-04-08	11
-26	val_26	2008-04-08	11
-134	val_134	2008-04-08	11
-67	val_67	2008-04-08	11
-384	val_384	2008-04-08	11
-379	val_379	2008-04-08	11
-18	val_18	2008-04-08	11
-462	val_462	2008-04-08	11
-492	val_492	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-9	val_9	2008-04-08	11
-341	val_341	2008-04-08	11
-498	val_498	2008-04-08	11
-146	val_146	2008-04-08	11
-458	val_458	2008-04-08	11
-362	val_362	2008-04-08	11
-186	val_186	2008-04-08	11
-285	val_285	2008-04-08	11
-348	val_348	2008-04-08	11
-167	val_167	2008-04-08	11
-18	val_18	2008-04-08	11
-273	val_273	2008-04-08	11
-183	val_183	2008-04-08	11
-281	val_281	2008-04-08	11
-344	val_344	2008-04-08	11
-97	val_97	2008-04-08	11
-469	val_469	2008-04-08	11
-315	val_315	2008-04-08	11
-84	val_84	2008-04-08	11
-28	val_28	2008-04-08	11
-37	val_37	2008-04-08	11
-448	val_448	2008-04-08	11
-152	val_152	2008-04-08	11
-348	val_348	2008-04-08	11
-307	val_307	2008-04-08	11
-194	val_194	2008-04-08	11
-414	val_414	2008-04-08	11
-477	val_477	2008-04-08	11
-222	val_222	2008-04-08	11
-126	val_126	2008-04-08	11
-90	val_90	2008-04-08	11
-169	val_169	2008-04-08	11
-403	val_403	2008-04-08	11
-400	val_400	2008-04-08	11
-200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
-238	val_238	2008-04-08	12
-86	val_86	2008-04-08	12
-311	val_311	2008-04-08	12
-27	val_27	2008-04-08	12
-165	val_165	2008-04-08	12
-409	val_409	2008-04-08	12
-255	val_255	2008-04-08	12
-278	val_278	2008-04-08	12
-98	val_98	2008-04-08	12
-484	val_484	2008-04-08	12
-265	val_265	2008-04-08	12
-193	val_193	2008-04-08	12
-401	val_401	2008-04-08	12
-150	val_150	2008-04-08	12
-273	val_273	2008-04-08	12
-224	val_224	2008-04-08	12
-369	val_369	2008-04-08	12
-66	val_66	2008-04-08	12
-128	val_128	2008-04-08	12
-213	val_213	2008-04-08	12
-146	val_146	2008-04-08	12
-406	val_406	2008-04-08	12
-429	val_429	2008-04-08	12
-374	val_374	2008-04-08	12
-152	val_152	2008-04-08	12
-469	val_469	2008-04-08	12
-145	val_145	2008-04-08	12
-495	val_495	2008-04-08	12
-37	val_37	2008-04-08	12
-327	val_327	2008-04-08	12
-281	val_281	2008-04-08	12
-277	val_277	2008-04-08	12
-209	val_209	2008-04-08	12
-15	val_15	2008-04-08	12
-82	val_82	2008-04-08	12
-403	val_403	2008-04-08	12
-166	val_166	2008-04-08	12
-417	val_417	2008-04-08	12
-430	val_430	2008-04-08	12
-252	val_252	2008-04-08	12
-292	val_292	2008-04-08	12
-219	val_219	2008-04-08	12
-287	val_287	2008-04-08	12
-153	val_153	2008-04-08	12
-193	val_193	2008-04-08	12
-338	val_338	2008-04-08	12
-446	val_446	2008-04-08	12
-459	val_459	2008-04-08	12
-394	val_394	2008-04-08	12
-237	val_237	2008-04-08	12
-482	val_482	2008-04-08	12
-174	val_174	2008-04-08	12
-413	val_413	2008-04-08	12
-494	val_494	2008-04-08	12
-207	val_207	2008-04-08	12
-199	val_199	2008-04-08	12
-466	val_466	2008-04-08	12
-208	val_208	2008-04-08	12
-174	val_174	2008-04-08	12
-399	val_399	2008-04-08	12
-396	val_396	2008-04-08	12
-247	val_247	2008-04-08	12
-417	val_417	2008-04-08	12
-489	val_489	2008-04-08	12
-162	val_162	2008-04-08	12
-377	val_377	2008-04-08	12
-397	val_397	2008-04-08	12
-309	val_309	2008-04-08	12
-365	val_365	2008-04-08	12
-266	val_266	2008-04-08	12
-439	val_439	2008-04-08	12
-342	val_342	2008-04-08	12
-367	val_367	2008-04-08	12
-325	val_325	2008-04-08	12
-167	val_167	2008-04-08	12
-195	val_195	2008-04-08	12
-475	val_475	2008-04-08	12
-17	val_17	2008-04-08	12
-113	val_113	2008-04-08	12
-155	val_155	2008-04-08	12
-203	val_203	2008-04-08	12
-339	val_339	2008-04-08	12
-0	val_0	2008-04-08	12
-455	val_455	2008-04-08	12
-128	val_128	2008-04-08	12
-311	val_311	2008-04-08	12
-316	val_316	2008-04-08	12
-57	val_57	2008-04-08	12
-302	val_302	2008-04-08	12
-205	val_205	2008-04-08	12
-149	val_149	2008-04-08	12
-438	val_438	2008-04-08	12
-345	val_345	2008-04-08	12
-129	val_129	2008-04-08	12
-170	val_170	2008-04-08	12
-20	val_20	2008-04-08	12
-489	val_489	2008-04-08	12
-157	val_157	2008-04-08	12
-378	val_378	2008-04-08	12
-221	val_221	2008-04-08	12
-92	val_92	2008-04-08	12
-111	val_111	2008-04-08	12
-47	val_47	2008-04-08	12
-72	val_72	2008-04-08	12
-4	val_4	2008-04-08	12
-280	val_280	2008-04-08	12
-35	val_35	2008-04-08	12
-427	val_427	2008-04-08	12
-277	val_277	2008-04-08	12
-208	val_208	2008-04-08	12
-356	val_356	2008-04-08	12
-399	val_399	2008-04-08	12
-169	val_169	2008-04-08	12
-382	val_382	2008-04-08	12
-498	val_498	2008-04-08	12
-125	val_125	2008-04-08	12
-386	val_386	2008-04-08	12
-437	val_437	2008-04-08	12
-469	val_469	2008-04-08	12
-192	val_192	2008-04-08	12
-286	val_286	2008-04-08	12
-187	val_187	2008-04-08	12
-176	val_176	2008-04-08	12
-54	val_54	2008-04-08	12
-459	val_459	2008-04-08	12
-51	val_51	2008-04-08	12
-138	val_138	2008-04-08	12
-103	val_103	2008-04-08	12
-239	val_239	2008-04-08	12
-213	val_213	2008-04-08	12
-216	val_216	2008-04-08	12
-430	val_430	2008-04-08	12
-278	val_278	2008-04-08	12
-176	val_176	2008-04-08	12
-289	val_289	2008-04-08	12
-221	val_221	2008-04-08	12
-65	val_65	2008-04-08	12
-318	val_318	2008-04-08	12
-332	val_332	2008-04-08	12
-311	val_311	2008-04-08	12
-275	val_275	2008-04-08	12
-137	val_137	2008-04-08	12
-241	val_241	2008-04-08	12
-83	val_83	2008-04-08	12
-333	val_333	2008-04-08	12
-180	val_180	2008-04-08	12
-284	val_284	2008-04-08	12
-12	val_12	2008-04-08	12
-230	val_230	2008-04-08	12
-181	val_181	2008-04-08	12
-67	val_67	2008-04-08	12
-260	val_260	2008-04-08	12
-404	val_404	2008-04-08	12
-384	val_384	2008-04-08	12
-489	val_489	2008-04-08	12
-353	val_353	2008-04-08	12
-373	val_373	2008-04-08	12
-272	val_272	2008-04-08	12
-138	val_138	2008-04-08	12
-217	val_217	2008-04-08	12
-84	val_84	2008-04-08	12
-348	val_348	2008-04-08	12
-466	val_466	2008-04-08	12
-58	val_58	2008-04-08	12
-8	val_8	2008-04-08	12
-411	val_411	2008-04-08	12
-230	val_230	2008-04-08	12
-208	val_208	2008-04-08	12
-348	val_348	2008-04-08	12
-24	val_24	2008-04-08	12
-463	val_463	2008-04-08	12
-431	val_431	2008-04-08	12
-179	val_179	2008-04-08	12
-172	val_172	2008-04-08	12
-42	val_42	2008-04-08	12
-129	val_129	2008-04-08	12
-158	val_158	2008-04-08	12
-119	val_119	2008-04-08	12
-496	val_496	2008-04-08	12
-0	val_0	2008-04-08	12
-322	val_322	2008-04-08	12
-197	val_197	2008-04-08	12
-468	val_468	2008-04-08	12
-393	val_393	2008-04-08	12
-454	val_454	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-199	val_199	2008-04-08	12
-191	val_191	2008-04-08	12
-418	val_418	2008-04-08	12
-96	val_96	2008-04-08	12
-26	val_26	2008-04-08	12
-165	val_165	2008-04-08	12
-327	val_327	2008-04-08	12
-230	val_230	2008-04-08	12
-205	val_205	2008-04-08	12
-120	val_120	2008-04-08	12
-131	val_131	2008-04-08	12
-51	val_51	2008-04-08	12
-404	val_404	2008-04-08	12
-43	val_43	2008-04-08	12
-436	val_436	2008-04-08	12
-156	val_156	2008-04-08	12
-469	val_469	2008-04-08	12
-468	val_468	2008-04-08	12
-308	val_308	2008-04-08	12
-95	val_95	2008-04-08	12
-196	val_196	2008-04-08	12
-288	val_288	2008-04-08	12
-481	val_481	2008-04-08	12
-457	val_457	2008-04-08	12
-98	val_98	2008-04-08	12
-282	val_282	2008-04-08	12
-197	val_197	2008-04-08	12
-187	val_187	2008-04-08	12
-318	val_318	2008-04-08	12
-318	val_318	2008-04-08	12
-409	val_409	2008-04-08	12
-470	val_470	2008-04-08	12
-137	val_137	2008-04-08	12
-369	val_369	2008-04-08	12
-316	val_316	2008-04-08	12
-169	val_169	2008-04-08	12
-413	val_413	2008-04-08	12
-85	val_85	2008-04-08	12
-77	val_77	2008-04-08	12
-0	val_0	2008-04-08	12
-490	val_490	2008-04-08	12
-87	val_87	2008-04-08	12
-364	val_364	2008-04-08	12
-179	val_179	2008-04-08	12
-118	val_118	2008-04-08	12
-134	val_134	2008-04-08	12
-395	val_395	2008-04-08	12
-282	val_282	2008-04-08	12
-138	val_138	2008-04-08	12
-238	val_238	2008-04-08	12
-419	val_419	2008-04-08	12
-15	val_15	2008-04-08	12
-118	val_118	2008-04-08	12
-72	val_72	2008-04-08	12
-90	val_90	2008-04-08	12
-307	val_307	2008-04-08	12
-19	val_19	2008-04-08	12
-435	val_435	2008-04-08	12
-10	val_10	2008-04-08	12
-277	val_277	2008-04-08	12
-273	val_273	2008-04-08	12
-306	val_306	2008-04-08	12
-224	val_224	2008-04-08	12
-309	val_309	2008-04-08	12
-389	val_389	2008-04-08	12
-327	val_327	2008-04-08	12
-242	val_242	2008-04-08	12
-369	val_369	2008-04-08	12
-392	val_392	2008-04-08	12
-272	val_272	2008-04-08	12
-331	val_331	2008-04-08	12
-401	val_401	2008-04-08	12
-242	val_242	2008-04-08	12
-452	val_452	2008-04-08	12
-177	val_177	2008-04-08	12
-226	val_226	2008-04-08	12
-5	val_5	2008-04-08	12
-497	val_497	2008-04-08	12
-402	val_402	2008-04-08	12
-396	val_396	2008-04-08	12
-317	val_317	2008-04-08	12
-395	val_395	2008-04-08	12
-58	val_58	2008-04-08	12
-35	val_35	2008-04-08	12
-336	val_336	2008-04-08	12
-95	val_95	2008-04-08	12
-11	val_11	2008-04-08	12
-168	val_168	2008-04-08	12
-34	val_34	2008-04-08	12
-229	val_229	2008-04-08	12
-233	val_233	2008-04-08	12
-143	val_143	2008-04-08	12
-472	val_472	2008-04-08	12
-322	val_322	2008-04-08	12
-498	val_498	2008-04-08	12
-160	val_160	2008-04-08	12
-195	val_195	2008-04-08	12
-42	val_42	2008-04-08	12
-321	val_321	2008-04-08	12
-430	val_430	2008-04-08	12
-119	val_119	2008-04-08	12
-489	val_489	2008-04-08	12
-458	val_458	2008-04-08	12
-78	val_78	2008-04-08	12
-76	val_76	2008-04-08	12
-41	val_41	2008-04-08	12
-223	val_223	2008-04-08	12
-492	val_492	2008-04-08	12
-149	val_149	2008-04-08	12
-449	val_449	2008-04-08	12
-218	val_218	2008-04-08	12
-228	val_228	2008-04-08	12
-138	val_138	2008-04-08	12
-453	val_453	2008-04-08	12
-30	val_30	2008-04-08	12
-209	val_209	2008-04-08	12
-64	val_64	2008-04-08	12
-468	val_468	2008-04-08	12
-76	val_76	2008-04-08	12
-74	val_74	2008-04-08	12
-342	val_342	2008-04-08	12
-69	val_69	2008-04-08	12
-230	val_230	2008-04-08	12
-33	val_33	2008-04-08	12
-368	val_368	2008-04-08	12
-103	val_103	2008-04-08	12
-296	val_296	2008-04-08	12
-113	val_113	2008-04-08	12
-216	val_216	2008-04-08	12
-367	val_367	2008-04-08	12
-344	val_344	2008-04-08	12
-167	val_167	2008-04-08	12
-274	val_274	2008-04-08	12
-219	val_219	2008-04-08	12
-239	val_239	2008-04-08	12
-485	val_485	2008-04-08	12
-116	val_116	2008-04-08	12
-223	val_223	2008-04-08	12
-256	val_256	2008-04-08	12
-263	val_263	2008-04-08	12
-70	val_70	2008-04-08	12
-487	val_487	2008-04-08	12
-480	val_480	2008-04-08	12
-401	val_401	2008-04-08	12
-288	val_288	2008-04-08	12
-191	val_191	2008-04-08	12
-5	val_5	2008-04-08	12
-244	val_244	2008-04-08	12
-438	val_438	2008-04-08	12
-128	val_128	2008-04-08	12
-467	val_467	2008-04-08	12
-432	val_432	2008-04-08	12
-202	val_202	2008-04-08	12
-316	val_316	2008-04-08	12
-229	val_229	2008-04-08	12
-469	val_469	2008-04-08	12
-463	val_463	2008-04-08	12
-280	val_280	2008-04-08	12
-2	val_2	2008-04-08	12
-35	val_35	2008-04-08	12
-283	val_283	2008-04-08	12
-331	val_331	2008-04-08	12
-235	val_235	2008-04-08	12
-80	val_80	2008-04-08	12
-44	val_44	2008-04-08	12
-193	val_193	2008-04-08	12
-321	val_321	2008-04-08	12
-335	val_335	2008-04-08	12
-104	val_104	2008-04-08	12
-466	val_466	2008-04-08	12
-366	val_366	2008-04-08	12
-175	val_175	2008-04-08	12
-403	val_403	2008-04-08	12
-483	val_483	2008-04-08	12
-53	val_53	2008-04-08	12
-105	val_105	2008-04-08	12
-257	val_257	2008-04-08	12
-406	val_406	2008-04-08	12
-409	val_409	2008-04-08	12
-190	val_190	2008-04-08	12
-406	val_406	2008-04-08	12
-401	val_401	2008-04-08	12
-114	val_114	2008-04-08	12
-258	val_258	2008-04-08	12
-90	val_90	2008-04-08	12
-203	val_203	2008-04-08	12
-262	val_262	2008-04-08	12
-348	val_348	2008-04-08	12
-424	val_424	2008-04-08	12
-12	val_12	2008-04-08	12
-396	val_396	2008-04-08	12
-201	val_201	2008-04-08	12
-217	val_217	2008-04-08	12
-164	val_164	2008-04-08	12
-431	val_431	2008-04-08	12
-454	val_454	2008-04-08	12
-478	val_478	2008-04-08	12
-298	val_298	2008-04-08	12
-125	val_125	2008-04-08	12
-431	val_431	2008-04-08	12
-164	val_164	2008-04-08	12
-424	val_424	2008-04-08	12
-187	val_187	2008-04-08	12
-382	val_382	2008-04-08	12
-5	val_5	2008-04-08	12
-70	val_70	2008-04-08	12
-397	val_397	2008-04-08	12
-480	val_480	2008-04-08	12
-291	val_291	2008-04-08	12
-24	val_24	2008-04-08	12
-351	val_351	2008-04-08	12
-255	val_255	2008-04-08	12
-104	val_104	2008-04-08	12
-70	val_70	2008-04-08	12
-163	val_163	2008-04-08	12
-438	val_438	2008-04-08	12
-119	val_119	2008-04-08	12
-414	val_414	2008-04-08	12
-200	val_200	2008-04-08	12
-491	val_491	2008-04-08	12
-237	val_237	2008-04-08	12
-439	val_439	2008-04-08	12
-360	val_360	2008-04-08	12
-248	val_248	2008-04-08	12
-479	val_479	2008-04-08	12
-305	val_305	2008-04-08	12
-417	val_417	2008-04-08	12
-199	val_199	2008-04-08	12
-444	val_444	2008-04-08	12
-120	val_120	2008-04-08	12
-429	val_429	2008-04-08	12
-169	val_169	2008-04-08	12
-443	val_443	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-277	val_277	2008-04-08	12
-230	val_230	2008-04-08	12
-478	val_478	2008-04-08	12
-178	val_178	2008-04-08	12
-468	val_468	2008-04-08	12
-310	val_310	2008-04-08	12
-317	val_317	2008-04-08	12
-333	val_333	2008-04-08	12
-493	val_493	2008-04-08	12
-460	val_460	2008-04-08	12
-207	val_207	2008-04-08	12
-249	val_249	2008-04-08	12
-265	val_265	2008-04-08	12
-480	val_480	2008-04-08	12
-83	val_83	2008-04-08	12
-136	val_136	2008-04-08	12
-353	val_353	2008-04-08	12
-172	val_172	2008-04-08	12
-214	val_214	2008-04-08	12
-462	val_462	2008-04-08	12
-233	val_233	2008-04-08	12
-406	val_406	2008-04-08	12
-133	val_133	2008-04-08	12
-175	val_175	2008-04-08	12
-189	val_189	2008-04-08	12
-454	val_454	2008-04-08	12
-375	val_375	2008-04-08	12
-401	val_401	2008-04-08	12
-421	val_421	2008-04-08	12
-407	val_407	2008-04-08	12
-384	val_384	2008-04-08	12
-256	val_256	2008-04-08	12
-26	val_26	2008-04-08	12
-134	val_134	2008-04-08	12
-67	val_67	2008-04-08	12
-384	val_384	2008-04-08	12
-379	val_379	2008-04-08	12
-18	val_18	2008-04-08	12
-462	val_462	2008-04-08	12
-492	val_492	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-9	val_9	2008-04-08	12
-341	val_341	2008-04-08	12
-498	val_498	2008-04-08	12
-146	val_146	2008-04-08	12
-458	val_458	2008-04-08	12
-362	val_362	2008-04-08	12
-186	val_186	2008-04-08	12
-285	val_285	2008-04-08	12
-348	val_348	2008-04-08	12
-167	val_167	2008-04-08	12
-18	val_18	2008-04-08	12
-273	val_273	2008-04-08	12
-183	val_183	2008-04-08	12
-281	val_281	2008-04-08	12
-344	val_344	2008-04-08	12
-97	val_97	2008-04-08	12
-469	val_469	2008-04-08	12
-315	val_315	2008-04-08	12
-84	val_84	2008-04-08	12
-28	val_28	2008-04-08	12
-37	val_37	2008-04-08	12
-448	val_448	2008-04-08	12
-152	val_152	2008-04-08	12
-348	val_348	2008-04-08	12
-307	val_307	2008-04-08	12
-194	val_194	2008-04-08	12
-414	val_414	2008-04-08	12
-477	val_477	2008-04-08	12
-222	val_222	2008-04-08	12
-126	val_126	2008-04-08	12
-90	val_90	2008-04-08	12
-169	val_169	2008-04-08	12
-403	val_403	2008-04-08	12
-400	val_400	2008-04-08	12
-200	val_200	2008-04-08	12
-97	val_97	2008-04-08	12
-238	val_238	2008-04-08	existing_value
-86	val_86	2008-04-08	existing_value
-311	val_311	2008-04-08	existing_value
-27	val_27	2008-04-08	existing_value
-165	val_165	2008-04-08	existing_value
-409	val_409	2008-04-08	existing_value
-255	val_255	2008-04-08	existing_value
-278	val_278	2008-04-08	existing_value
-98	val_98	2008-04-08	existing_value
-484	val_484	2008-04-08	existing_value
-265	val_265	2008-04-08	existing_value
-193	val_193	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-150	val_150	2008-04-08	existing_value
-273	val_273	2008-04-08	existing_value
-224	val_224	2008-04-08	existing_value
-369	val_369	2008-04-08	existing_value
-66	val_66	2008-04-08	existing_value
-128	val_128	2008-04-08	existing_value
-213	val_213	2008-04-08	existing_value
-146	val_146	2008-04-08	existing_value
-406	val_406	2008-04-08	existing_value
-429	val_429	2008-04-08	existing_value
-374	val_374	2008-04-08	existing_value
-152	val_152	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-145	val_145	2008-04-08	existing_value
-495	val_495	2008-04-08	existing_value
-37	val_37	2008-04-08	existing_value
-327	val_327	2008-04-08	existing_value
-281	val_281	2008-04-08	existing_value
-277	val_277	2008-04-08	existing_value
-209	val_209	2008-04-08	existing_value
-15	val_15	2008-04-08	existing_value
-82	val_82	2008-04-08	existing_value
-403	val_403	2008-04-08	existing_value
-166	val_166	2008-04-08	existing_value
-417	val_417	2008-04-08	existing_value
-430	val_430	2008-04-08	existing_value
-252	val_252	2008-04-08	existing_value
-292	val_292	2008-04-08	existing_value
-219	val_219	2008-04-08	existing_value
-287	val_287	2008-04-08	existing_value
-153	val_153	2008-04-08	existing_value
-193	val_193	2008-04-08	existing_value
-338	val_338	2008-04-08	existing_value
-446	val_446	2008-04-08	existing_value
-459	val_459	2008-04-08	existing_value
-394	val_394	2008-04-08	existing_value
-237	val_237	2008-04-08	existing_value
-482	val_482	2008-04-08	existing_value
-174	val_174	2008-04-08	existing_value
-413	val_413	2008-04-08	existing_value
-494	val_494	2008-04-08	existing_value
-207	val_207	2008-04-08	existing_value
-199	val_199	2008-04-08	existing_value
-466	val_466	2008-04-08	existing_value
-208	val_208	2008-04-08	existing_value
-174	val_174	2008-04-08	existing_value
-399	val_399	2008-04-08	existing_value
-396	val_396	2008-04-08	existing_value
-247	val_247	2008-04-08	existing_value
-417	val_417	2008-04-08	existing_value
-489	val_489	2008-04-08	existing_value
-162	val_162	2008-04-08	existing_value
-377	val_377	2008-04-08	existing_value
-397	val_397	2008-04-08	existing_value
-309	val_309	2008-04-08	existing_value
-365	val_365	2008-04-08	existing_value
-266	val_266	2008-04-08	existing_value
-439	val_439	2008-04-08	existing_value
-342	val_342	2008-04-08	existing_value
-367	val_367	2008-04-08	existing_value
-325	val_325	2008-04-08	existing_value
-167	val_167	2008-04-08	existing_value
-195	val_195	2008-04-08	existing_value
-475	val_475	2008-04-08	existing_value
-17	val_17	2008-04-08	existing_value
-113	val_113	2008-04-08	existing_value
-155	val_155	2008-04-08	existing_value
-203	val_203	2008-04-08	existing_value
-339	val_339	2008-04-08	existing_value
-0	val_0	2008-04-08	existing_value
-455	val_455	2008-04-08	existing_value
-128	val_128	2008-04-08	existing_value
-311	val_311	2008-04-08	existing_value
-316	val_316	2008-04-08	existing_value
-57	val_57	2008-04-08	existing_value
-302	val_302	2008-04-08	existing_value
-205	val_205	2008-04-08	existing_value
-149	val_149	2008-04-08	existing_value
-438	val_438	2008-04-08	existing_value
-345	val_345	2008-04-08	existing_value
-129	val_129	2008-04-08	existing_value
-170	val_170	2008-04-08	existing_value
-20	val_20	2008-04-08	existing_value
-489	val_489	2008-04-08	existing_value
-157	val_157	2008-04-08	existing_value
-378	val_378	2008-04-08	existing_value
-221	val_221	2008-04-08	existing_value
-92	val_92	2008-04-08	existing_value
-111	val_111	2008-04-08	existing_value
-47	val_47	2008-04-08	existing_value
-72	val_72	2008-04-08	existing_value
-4	val_4	2008-04-08	existing_value
-280	val_280	2008-04-08	existing_value
-35	val_35	2008-04-08	existing_value
-427	val_427	2008-04-08	existing_value
-277	val_277	2008-04-08	existing_value
-208	val_208	2008-04-08	existing_value
-356	val_356	2008-04-08	existing_value
-399	val_399	2008-04-08	existing_value
-169	val_169	2008-04-08	existing_value
-382	val_382	2008-04-08	existing_value
-498	val_498	2008-04-08	existing_value
-125	val_125	2008-04-08	existing_value
-386	val_386	2008-04-08	existing_value
-437	val_437	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-192	val_192	2008-04-08	existing_value
-286	val_286	2008-04-08	existing_value
-187	val_187	2008-04-08	existing_value
-176	val_176	2008-04-08	existing_value
-54	val_54	2008-04-08	existing_value
-459	val_459	2008-04-08	existing_value
-51	val_51	2008-04-08	existing_value
-138	val_138	2008-04-08	existing_value
-103	val_103	2008-04-08	existing_value
-239	val_239	2008-04-08	existing_value
-213	val_213	2008-04-08	existing_value
-216	val_216	2008-04-08	existing_value
-430	val_430	2008-04-08	existing_value
-278	val_278	2008-04-08	existing_value
-176	val_176	2008-04-08	existing_value
-289	val_289	2008-04-08	existing_value
-221	val_221	2008-04-08	existing_value
-65	val_65	2008-04-08	existing_value
-318	val_318	2008-04-08	existing_value
-332	val_332	2008-04-08	existing_value
-311	val_311	2008-04-08	existing_value
-275	val_275	2008-04-08	existing_value
-137	val_137	2008-04-08	existing_value
-241	val_241	2008-04-08	existing_value
-83	val_83	2008-04-08	existing_value
-333	val_333	2008-04-08	existing_value
-180	val_180	2008-04-08	existing_value
-284	val_284	2008-04-08	existing_value
-12	val_12	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-181	val_181	2008-04-08	existing_value
-67	val_67	2008-04-08	existing_value
-260	val_260	2008-04-08	existing_value
-404	val_404	2008-04-08	existing_value
-384	val_384	2008-04-08	existing_value
-489	val_489	2008-04-08	existing_value
-353	val_353	2008-04-08	existing_value
-373	val_373	2008-04-08	existing_value
-272	val_272	2008-04-08	existing_value
-138	val_138	2008-04-08	existing_value
-217	val_217	2008-04-08	existing_value
-84	val_84	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-466	val_466	2008-04-08	existing_value
-58	val_58	2008-04-08	existing_value
-8	val_8	2008-04-08	existing_value
-411	val_411	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-208	val_208	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-24	val_24	2008-04-08	existing_value
-463	val_463	2008-04-08	existing_value
-431	val_431	2008-04-08	existing_value
-179	val_179	2008-04-08	existing_value
-172	val_172	2008-04-08	existing_value
-42	val_42	2008-04-08	existing_value
-129	val_129	2008-04-08	existing_value
-158	val_158	2008-04-08	existing_value
-119	val_119	2008-04-08	existing_value
-496	val_496	2008-04-08	existing_value
-0	val_0	2008-04-08	existing_value
-322	val_322	2008-04-08	existing_value
-197	val_197	2008-04-08	existing_value
-468	val_468	2008-04-08	existing_value
-393	val_393	2008-04-08	existing_value
-454	val_454	2008-04-08	existing_value
-100	val_100	2008-04-08	existing_value
-298	val_298	2008-04-08	existing_value
-199	val_199	2008-04-08	existing_value
-191	val_191	2008-04-08	existing_value
-418	val_418	2008-04-08	existing_value
-96	val_96	2008-04-08	existing_value
-26	val_26	2008-04-08	existing_value
-165	val_165	2008-04-08	existing_value
-327	val_327	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-205	val_205	2008-04-08	existing_value
-120	val_120	2008-04-08	existing_value
-131	val_131	2008-04-08	existing_value
-51	val_51	2008-04-08	existing_value
-404	val_404	2008-04-08	existing_value
-43	val_43	2008-04-08	existing_value
-436	val_436	2008-04-08	existing_value
-156	val_156	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-468	val_468	2008-04-08	existing_value
-308	val_308	2008-04-08	existing_value
-95	val_95	2008-04-08	existing_value
-196	val_196	2008-04-08	existing_value
-288	val_288	2008-04-08	existing_value
-481	val_481	2008-04-08	existing_value
-457	val_457	2008-04-08	existing_value
-98	val_98	2008-04-08	existing_value
-282	val_282	2008-04-08	existing_value
-197	val_197	2008-04-08	existing_value
-187	val_187	2008-04-08	existing_value
-318	val_318	2008-04-08	existing_value
-318	val_318	2008-04-08	existing_value
-409	val_409	2008-04-08	existing_value
-470	val_470	2008-04-08	existing_value
-137	val_137	2008-04-08	existing_value
-369	val_369	2008-04-08	existing_value
-316	val_316	2008-04-08	existing_value
-169	val_169	2008-04-08	existing_value
-413	val_413	2008-04-08	existing_value
-85	val_85	2008-04-08	existing_value
-77	val_77	2008-04-08	existing_value
-0	val_0	2008-04-08	existing_value
-490	val_490	2008-04-08	existing_value
-87	val_87	2008-04-08	existing_value
-364	val_364	2008-04-08	existing_value
-179	val_179	2008-04-08	existing_value
-118	val_118	2008-04-08	existing_value
-134	val_134	2008-04-08	existing_value
-395	val_395	2008-04-08	existing_value
-282	val_282	2008-04-08	existing_value
-138	val_138	2008-04-08	existing_value
-238	val_238	2008-04-08	existing_value
-419	val_419	2008-04-08	existing_value
-15	val_15	2008-04-08	existing_value
-118	val_118	2008-04-08	existing_value
-72	val_72	2008-04-08	existing_value
-90	val_90	2008-04-08	existing_value
-307	val_307	2008-04-08	existing_value
-19	val_19	2008-04-08	existing_value
-435	val_435	2008-04-08	existing_value
-10	val_10	2008-04-08	existing_value
-277	val_277	2008-04-08	existing_value
-273	val_273	2008-04-08	existing_value
-306	val_306	2008-04-08	existing_value
-224	val_224	2008-04-08	existing_value
-309	val_309	2008-04-08	existing_value
-389	val_389	2008-04-08	existing_value
-327	val_327	2008-04-08	existing_value
-242	val_242	2008-04-08	existing_value
-369	val_369	2008-04-08	existing_value
-392	val_392	2008-04-08	existing_value
-272	val_272	2008-04-08	existing_value
-331	val_331	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-242	val_242	2008-04-08	existing_value
-452	val_452	2008-04-08	existing_value
-177	val_177	2008-04-08	existing_value
-226	val_226	2008-04-08	existing_value
-5	val_5	2008-04-08	existing_value
-497	val_497	2008-04-08	existing_value
-402	val_402	2008-04-08	existing_value
-396	val_396	2008-04-08	existing_value
-317	val_317	2008-04-08	existing_value
-395	val_395	2008-04-08	existing_value
-58	val_58	2008-04-08	existing_value
-35	val_35	2008-04-08	existing_value
-336	val_336	2008-04-08	existing_value
-95	val_95	2008-04-08	existing_value
-11	val_11	2008-04-08	existing_value
-168	val_168	2008-04-08	existing_value
-34	val_34	2008-04-08	existing_value
-229	val_229	2008-04-08	existing_value
-233	val_233	2008-04-08	existing_value
-143	val_143	2008-04-08	existing_value
-472	val_472	2008-04-08	existing_value
-322	val_322	2008-04-08	existing_value
-498	val_498	2008-04-08	existing_value
-160	val_160	2008-04-08	existing_value
-195	val_195	2008-04-08	existing_value
-42	val_42	2008-04-08	existing_value
-321	val_321	2008-04-08	existing_value
-430	val_430	2008-04-08	existing_value
-119	val_119	2008-04-08	existing_value
-489	val_489	2008-04-08	existing_value
-458	val_458	2008-04-08	existing_value
-78	val_78	2008-04-08	existing_value
-76	val_76	2008-04-08	existing_value
-41	val_41	2008-04-08	existing_value
-223	val_223	2008-04-08	existing_value
-492	val_492	2008-04-08	existing_value
-149	val_149	2008-04-08	existing_value
-449	val_449	2008-04-08	existing_value
-218	val_218	2008-04-08	existing_value
-228	val_228	2008-04-08	existing_value
-138	val_138	2008-04-08	existing_value
-453	val_453	2008-04-08	existing_value
-30	val_30	2008-04-08	existing_value
-209	val_209	2008-04-08	existing_value
-64	val_64	2008-04-08	existing_value
-468	val_468	2008-04-08	existing_value
-76	val_76	2008-04-08	existing_value
-74	val_74	2008-04-08	existing_value
-342	val_342	2008-04-08	existing_value
-69	val_69	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-33	val_33	2008-04-08	existing_value
-368	val_368	2008-04-08	existing_value
-103	val_103	2008-04-08	existing_value
-296	val_296	2008-04-08	existing_value
-113	val_113	2008-04-08	existing_value
-216	val_216	2008-04-08	existing_value
-367	val_367	2008-04-08	existing_value
-344	val_344	2008-04-08	existing_value
-167	val_167	2008-04-08	existing_value
-274	val_274	2008-04-08	existing_value
-219	val_219	2008-04-08	existing_value
-239	val_239	2008-04-08	existing_value
-485	val_485	2008-04-08	existing_value
-116	val_116	2008-04-08	existing_value
-223	val_223	2008-04-08	existing_value
-256	val_256	2008-04-08	existing_value
-263	val_263	2008-04-08	existing_value
-70	val_70	2008-04-08	existing_value
-487	val_487	2008-04-08	existing_value
-480	val_480	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-288	val_288	2008-04-08	existing_value
-191	val_191	2008-04-08	existing_value
-5	val_5	2008-04-08	existing_value
-244	val_244	2008-04-08	existing_value
-438	val_438	2008-04-08	existing_value
-128	val_128	2008-04-08	existing_value
-467	val_467	2008-04-08	existing_value
-432	val_432	2008-04-08	existing_value
-202	val_202	2008-04-08	existing_value
-316	val_316	2008-04-08	existing_value
-229	val_229	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-463	val_463	2008-04-08	existing_value
-280	val_280	2008-04-08	existing_value
-2	val_2	2008-04-08	existing_value
-35	val_35	2008-04-08	existing_value
-283	val_283	2008-04-08	existing_value
-331	val_331	2008-04-08	existing_value
-235	val_235	2008-04-08	existing_value
-80	val_80	2008-04-08	existing_value
-44	val_44	2008-04-08	existing_value
-193	val_193	2008-04-08	existing_value
-321	val_321	2008-04-08	existing_value
-335	val_335	2008-04-08	existing_value
-104	val_104	2008-04-08	existing_value
-466	val_466	2008-04-08	existing_value
-366	val_366	2008-04-08	existing_value
-175	val_175	2008-04-08	existing_value
-403	val_403	2008-04-08	existing_value
-483	val_483	2008-04-08	existing_value
-53	val_53	2008-04-08	existing_value
-105	val_105	2008-04-08	existing_value
-257	val_257	2008-04-08	existing_value
-406	val_406	2008-04-08	existing_value
-409	val_409	2008-04-08	existing_value
-190	val_190	2008-04-08	existing_value
-406	val_406	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-114	val_114	2008-04-08	existing_value
-258	val_258	2008-04-08	existing_value
-90	val_90	2008-04-08	existing_value
-203	val_203	2008-04-08	existing_value
-262	val_262	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-424	val_424	2008-04-08	existing_value
-12	val_12	2008-04-08	existing_value
-396	val_396	2008-04-08	existing_value
-201	val_201	2008-04-08	existing_value
-217	val_217	2008-04-08	existing_value
-164	val_164	2008-04-08	existing_value
-431	val_431	2008-04-08	existing_value
-454	val_454	2008-04-08	existing_value
-478	val_478	2008-04-08	existing_value
-298	val_298	2008-04-08	existing_value
-125	val_125	2008-04-08	existing_value
-431	val_431	2008-04-08	existing_value
-164	val_164	2008-04-08	existing_value
-424	val_424	2008-04-08	existing_value
-187	val_187	2008-04-08	existing_value
-382	val_382	2008-04-08	existing_value
-5	val_5	2008-04-08	existing_value
-70	val_70	2008-04-08	existing_value
-397	val_397	2008-04-08	existing_value
-480	val_480	2008-04-08	existing_value
-291	val_291	2008-04-08	existing_value
-24	val_24	2008-04-08	existing_value
-351	val_351	2008-04-08	existing_value
-255	val_255	2008-04-08	existing_value
-104	val_104	2008-04-08	existing_value
-70	val_70	2008-04-08	existing_value
-163	val_163	2008-04-08	existing_value
-438	val_438	2008-04-08	existing_value
-119	val_119	2008-04-08	existing_value
-414	val_414	2008-04-08	existing_value
-200	val_200	2008-04-08	existing_value
-491	val_491	2008-04-08	existing_value
-237	val_237	2008-04-08	existing_value
-439	val_439	2008-04-08	existing_value
-360	val_360	2008-04-08	existing_value
-248	val_248	2008-04-08	existing_value
-479	val_479	2008-04-08	existing_value
-305	val_305	2008-04-08	existing_value
-417	val_417	2008-04-08	existing_value
-199	val_199	2008-04-08	existing_value
-444	val_444	2008-04-08	existing_value
-120	val_120	2008-04-08	existing_value
-429	val_429	2008-04-08	existing_value
-169	val_169	2008-04-08	existing_value
-443	val_443	2008-04-08	existing_value
-323	val_323	2008-04-08	existing_value
-325	val_325	2008-04-08	existing_value
-277	val_277	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-478	val_478	2008-04-08	existing_value
-178	val_178	2008-04-08	existing_value
-468	val_468	2008-04-08	existing_value
-310	val_310	2008-04-08	existing_value
-317	val_317	2008-04-08	existing_value
-333	val_333	2008-04-08	existing_value
-493	val_493	2008-04-08	existing_value
-460	val_460	2008-04-08	existing_value
-207	val_207	2008-04-08	existing_value
-249	val_249	2008-04-08	existing_value
-265	val_265	2008-04-08	existing_value
-480	val_480	2008-04-08	existing_value
-83	val_83	2008-04-08	existing_value
-136	val_136	2008-04-08	existing_value
-353	val_353	2008-04-08	existing_value
-172	val_172	2008-04-08	existing_value
-214	val_214	2008-04-08	existing_value
-462	val_462	2008-04-08	existing_value
-233	val_233	2008-04-08	existing_value
-406	val_406	2008-04-08	existing_value
-133	val_133	2008-04-08	existing_value
-175	val_175	2008-04-08	existing_value
-189	val_189	2008-04-08	existing_value
-454	val_454	2008-04-08	existing_value
-375	val_375	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-421	val_421	2008-04-08	existing_value
-407	val_407	2008-04-08	existing_value
-384	val_384	2008-04-08	existing_value
-256	val_256	2008-04-08	existing_value
-26	val_26	2008-04-08	existing_value
-134	val_134	2008-04-08	existing_value
-67	val_67	2008-04-08	existing_value
-384	val_384	2008-04-08	existing_value
-379	val_379	2008-04-08	existing_value
-18	val_18	2008-04-08	existing_value
-462	val_462	2008-04-08	existing_value
-492	val_492	2008-04-08	existing_value
-100	val_100	2008-04-08	existing_value
-298	val_298	2008-04-08	existing_value
-9	val_9	2008-04-08	existing_value
-341	val_341	2008-04-08	existing_value
-498	val_498	2008-04-08	existing_value
-146	val_146	2008-04-08	existing_value
-458	val_458	2008-04-08	existing_value
-362	val_362	2008-04-08	existing_value
-186	val_186	2008-04-08	existing_value
-285	val_285	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-167	val_167	2008-04-08	existing_value
-18	val_18	2008-04-08	existing_value
-273	val_273	2008-04-08	existing_value
-183	val_183	2008-04-08	existing_value
-281	val_281	2008-04-08	existing_value
-344	val_344	2008-04-08	existing_value
-97	val_97	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-315	val_315	2008-04-08	existing_value
-84	val_84	2008-04-08	existing_value
-28	val_28	2008-04-08	existing_value
-37	val_37	2008-04-08	existing_value
-448	val_448	2008-04-08	existing_value
-152	val_152	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-307	val_307	2008-04-08	existing_value
-194	val_194	2008-04-08	existing_value
-414	val_414	2008-04-08	existing_value
-477	val_477	2008-04-08	existing_value
-222	val_222	2008-04-08	existing_value
-126	val_126	2008-04-08	existing_value
-90	val_90	2008-04-08	existing_value
-169	val_169	2008-04-08	existing_value
-403	val_403	2008-04-08	existing_value
-400	val_400	2008-04-08	existing_value
-200	val_200	2008-04-08	existing_value
-97	val_97	2008-04-08	existing_value
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part4-11-1313644c9dad948bfcebd7386c309ab7 b/sql/hive/src/test/resources/golden/load_dyn_part4-11-1313644c9dad948bfcebd7386c309ab7
deleted file mode 100644
index f1801743dd4e1..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part4-11-1313644c9dad948bfcebd7386c309ab7
+++ /dev/null
@@ -1,2500 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
-265	val_265	2008-04-08	11
-193	val_193	2008-04-08	11
-401	val_401	2008-04-08	11
-150	val_150	2008-04-08	11
-273	val_273	2008-04-08	11
-224	val_224	2008-04-08	11
-369	val_369	2008-04-08	11
-66	val_66	2008-04-08	11
-128	val_128	2008-04-08	11
-213	val_213	2008-04-08	11
-146	val_146	2008-04-08	11
-406	val_406	2008-04-08	11
-429	val_429	2008-04-08	11
-374	val_374	2008-04-08	11
-152	val_152	2008-04-08	11
-469	val_469	2008-04-08	11
-145	val_145	2008-04-08	11
-495	val_495	2008-04-08	11
-37	val_37	2008-04-08	11
-327	val_327	2008-04-08	11
-281	val_281	2008-04-08	11
-277	val_277	2008-04-08	11
-209	val_209	2008-04-08	11
-15	val_15	2008-04-08	11
-82	val_82	2008-04-08	11
-403	val_403	2008-04-08	11
-166	val_166	2008-04-08	11
-417	val_417	2008-04-08	11
-430	val_430	2008-04-08	11
-252	val_252	2008-04-08	11
-292	val_292	2008-04-08	11
-219	val_219	2008-04-08	11
-287	val_287	2008-04-08	11
-153	val_153	2008-04-08	11
-193	val_193	2008-04-08	11
-338	val_338	2008-04-08	11
-446	val_446	2008-04-08	11
-459	val_459	2008-04-08	11
-394	val_394	2008-04-08	11
-237	val_237	2008-04-08	11
-482	val_482	2008-04-08	11
-174	val_174	2008-04-08	11
-413	val_413	2008-04-08	11
-494	val_494	2008-04-08	11
-207	val_207	2008-04-08	11
-199	val_199	2008-04-08	11
-466	val_466	2008-04-08	11
-208	val_208	2008-04-08	11
-174	val_174	2008-04-08	11
-399	val_399	2008-04-08	11
-396	val_396	2008-04-08	11
-247	val_247	2008-04-08	11
-417	val_417	2008-04-08	11
-489	val_489	2008-04-08	11
-162	val_162	2008-04-08	11
-377	val_377	2008-04-08	11
-397	val_397	2008-04-08	11
-309	val_309	2008-04-08	11
-365	val_365	2008-04-08	11
-266	val_266	2008-04-08	11
-439	val_439	2008-04-08	11
-342	val_342	2008-04-08	11
-367	val_367	2008-04-08	11
-325	val_325	2008-04-08	11
-167	val_167	2008-04-08	11
-195	val_195	2008-04-08	11
-475	val_475	2008-04-08	11
-17	val_17	2008-04-08	11
-113	val_113	2008-04-08	11
-155	val_155	2008-04-08	11
-203	val_203	2008-04-08	11
-339	val_339	2008-04-08	11
-0	val_0	2008-04-08	11
-455	val_455	2008-04-08	11
-128	val_128	2008-04-08	11
-311	val_311	2008-04-08	11
-316	val_316	2008-04-08	11
-57	val_57	2008-04-08	11
-302	val_302	2008-04-08	11
-205	val_205	2008-04-08	11
-149	val_149	2008-04-08	11
-438	val_438	2008-04-08	11
-345	val_345	2008-04-08	11
-129	val_129	2008-04-08	11
-170	val_170	2008-04-08	11
-20	val_20	2008-04-08	11
-489	val_489	2008-04-08	11
-157	val_157	2008-04-08	11
-378	val_378	2008-04-08	11
-221	val_221	2008-04-08	11
-92	val_92	2008-04-08	11
-111	val_111	2008-04-08	11
-47	val_47	2008-04-08	11
-72	val_72	2008-04-08	11
-4	val_4	2008-04-08	11
-280	val_280	2008-04-08	11
-35	val_35	2008-04-08	11
-427	val_427	2008-04-08	11
-277	val_277	2008-04-08	11
-208	val_208	2008-04-08	11
-356	val_356	2008-04-08	11
-399	val_399	2008-04-08	11
-169	val_169	2008-04-08	11
-382	val_382	2008-04-08	11
-498	val_498	2008-04-08	11
-125	val_125	2008-04-08	11
-386	val_386	2008-04-08	11
-437	val_437	2008-04-08	11
-469	val_469	2008-04-08	11
-192	val_192	2008-04-08	11
-286	val_286	2008-04-08	11
-187	val_187	2008-04-08	11
-176	val_176	2008-04-08	11
-54	val_54	2008-04-08	11
-459	val_459	2008-04-08	11
-51	val_51	2008-04-08	11
-138	val_138	2008-04-08	11
-103	val_103	2008-04-08	11
-239	val_239	2008-04-08	11
-213	val_213	2008-04-08	11
-216	val_216	2008-04-08	11
-430	val_430	2008-04-08	11
-278	val_278	2008-04-08	11
-176	val_176	2008-04-08	11
-289	val_289	2008-04-08	11
-221	val_221	2008-04-08	11
-65	val_65	2008-04-08	11
-318	val_318	2008-04-08	11
-332	val_332	2008-04-08	11
-311	val_311	2008-04-08	11
-275	val_275	2008-04-08	11
-137	val_137	2008-04-08	11
-241	val_241	2008-04-08	11
-83	val_83	2008-04-08	11
-333	val_333	2008-04-08	11
-180	val_180	2008-04-08	11
-284	val_284	2008-04-08	11
-12	val_12	2008-04-08	11
-230	val_230	2008-04-08	11
-181	val_181	2008-04-08	11
-67	val_67	2008-04-08	11
-260	val_260	2008-04-08	11
-404	val_404	2008-04-08	11
-384	val_384	2008-04-08	11
-489	val_489	2008-04-08	11
-353	val_353	2008-04-08	11
-373	val_373	2008-04-08	11
-272	val_272	2008-04-08	11
-138	val_138	2008-04-08	11
-217	val_217	2008-04-08	11
-84	val_84	2008-04-08	11
-348	val_348	2008-04-08	11
-466	val_466	2008-04-08	11
-58	val_58	2008-04-08	11
-8	val_8	2008-04-08	11
-411	val_411	2008-04-08	11
-230	val_230	2008-04-08	11
-208	val_208	2008-04-08	11
-348	val_348	2008-04-08	11
-24	val_24	2008-04-08	11
-463	val_463	2008-04-08	11
-431	val_431	2008-04-08	11
-179	val_179	2008-04-08	11
-172	val_172	2008-04-08	11
-42	val_42	2008-04-08	11
-129	val_129	2008-04-08	11
-158	val_158	2008-04-08	11
-119	val_119	2008-04-08	11
-496	val_496	2008-04-08	11
-0	val_0	2008-04-08	11
-322	val_322	2008-04-08	11
-197	val_197	2008-04-08	11
-468	val_468	2008-04-08	11
-393	val_393	2008-04-08	11
-454	val_454	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-199	val_199	2008-04-08	11
-191	val_191	2008-04-08	11
-418	val_418	2008-04-08	11
-96	val_96	2008-04-08	11
-26	val_26	2008-04-08	11
-165	val_165	2008-04-08	11
-327	val_327	2008-04-08	11
-230	val_230	2008-04-08	11
-205	val_205	2008-04-08	11
-120	val_120	2008-04-08	11
-131	val_131	2008-04-08	11
-51	val_51	2008-04-08	11
-404	val_404	2008-04-08	11
-43	val_43	2008-04-08	11
-436	val_436	2008-04-08	11
-156	val_156	2008-04-08	11
-469	val_469	2008-04-08	11
-468	val_468	2008-04-08	11
-308	val_308	2008-04-08	11
-95	val_95	2008-04-08	11
-196	val_196	2008-04-08	11
-288	val_288	2008-04-08	11
-481	val_481	2008-04-08	11
-457	val_457	2008-04-08	11
-98	val_98	2008-04-08	11
-282	val_282	2008-04-08	11
-197	val_197	2008-04-08	11
-187	val_187	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-409	val_409	2008-04-08	11
-470	val_470	2008-04-08	11
-137	val_137	2008-04-08	11
-369	val_369	2008-04-08	11
-316	val_316	2008-04-08	11
-169	val_169	2008-04-08	11
-413	val_413	2008-04-08	11
-85	val_85	2008-04-08	11
-77	val_77	2008-04-08	11
-0	val_0	2008-04-08	11
-490	val_490	2008-04-08	11
-87	val_87	2008-04-08	11
-364	val_364	2008-04-08	11
-179	val_179	2008-04-08	11
-118	val_118	2008-04-08	11
-134	val_134	2008-04-08	11
-395	val_395	2008-04-08	11
-282	val_282	2008-04-08	11
-138	val_138	2008-04-08	11
-238	val_238	2008-04-08	11
-419	val_419	2008-04-08	11
-15	val_15	2008-04-08	11
-118	val_118	2008-04-08	11
-72	val_72	2008-04-08	11
-90	val_90	2008-04-08	11
-307	val_307	2008-04-08	11
-19	val_19	2008-04-08	11
-435	val_435	2008-04-08	11
-10	val_10	2008-04-08	11
-277	val_277	2008-04-08	11
-273	val_273	2008-04-08	11
-306	val_306	2008-04-08	11
-224	val_224	2008-04-08	11
-309	val_309	2008-04-08	11
-389	val_389	2008-04-08	11
-327	val_327	2008-04-08	11
-242	val_242	2008-04-08	11
-369	val_369	2008-04-08	11
-392	val_392	2008-04-08	11
-272	val_272	2008-04-08	11
-331	val_331	2008-04-08	11
-401	val_401	2008-04-08	11
-242	val_242	2008-04-08	11
-452	val_452	2008-04-08	11
-177	val_177	2008-04-08	11
-226	val_226	2008-04-08	11
-5	val_5	2008-04-08	11
-497	val_497	2008-04-08	11
-402	val_402	2008-04-08	11
-396	val_396	2008-04-08	11
-317	val_317	2008-04-08	11
-395	val_395	2008-04-08	11
-58	val_58	2008-04-08	11
-35	val_35	2008-04-08	11
-336	val_336	2008-04-08	11
-95	val_95	2008-04-08	11
-11	val_11	2008-04-08	11
-168	val_168	2008-04-08	11
-34	val_34	2008-04-08	11
-229	val_229	2008-04-08	11
-233	val_233	2008-04-08	11
-143	val_143	2008-04-08	11
-472	val_472	2008-04-08	11
-322	val_322	2008-04-08	11
-498	val_498	2008-04-08	11
-160	val_160	2008-04-08	11
-195	val_195	2008-04-08	11
-42	val_42	2008-04-08	11
-321	val_321	2008-04-08	11
-430	val_430	2008-04-08	11
-119	val_119	2008-04-08	11
-489	val_489	2008-04-08	11
-458	val_458	2008-04-08	11
-78	val_78	2008-04-08	11
-76	val_76	2008-04-08	11
-41	val_41	2008-04-08	11
-223	val_223	2008-04-08	11
-492	val_492	2008-04-08	11
-149	val_149	2008-04-08	11
-449	val_449	2008-04-08	11
-218	val_218	2008-04-08	11
-228	val_228	2008-04-08	11
-138	val_138	2008-04-08	11
-453	val_453	2008-04-08	11
-30	val_30	2008-04-08	11
-209	val_209	2008-04-08	11
-64	val_64	2008-04-08	11
-468	val_468	2008-04-08	11
-76	val_76	2008-04-08	11
-74	val_74	2008-04-08	11
-342	val_342	2008-04-08	11
-69	val_69	2008-04-08	11
-230	val_230	2008-04-08	11
-33	val_33	2008-04-08	11
-368	val_368	2008-04-08	11
-103	val_103	2008-04-08	11
-296	val_296	2008-04-08	11
-113	val_113	2008-04-08	11
-216	val_216	2008-04-08	11
-367	val_367	2008-04-08	11
-344	val_344	2008-04-08	11
-167	val_167	2008-04-08	11
-274	val_274	2008-04-08	11
-219	val_219	2008-04-08	11
-239	val_239	2008-04-08	11
-485	val_485	2008-04-08	11
-116	val_116	2008-04-08	11
-223	val_223	2008-04-08	11
-256	val_256	2008-04-08	11
-263	val_263	2008-04-08	11
-70	val_70	2008-04-08	11
-487	val_487	2008-04-08	11
-480	val_480	2008-04-08	11
-401	val_401	2008-04-08	11
-288	val_288	2008-04-08	11
-191	val_191	2008-04-08	11
-5	val_5	2008-04-08	11
-244	val_244	2008-04-08	11
-438	val_438	2008-04-08	11
-128	val_128	2008-04-08	11
-467	val_467	2008-04-08	11
-432	val_432	2008-04-08	11
-202	val_202	2008-04-08	11
-316	val_316	2008-04-08	11
-229	val_229	2008-04-08	11
-469	val_469	2008-04-08	11
-463	val_463	2008-04-08	11
-280	val_280	2008-04-08	11
-2	val_2	2008-04-08	11
-35	val_35	2008-04-08	11
-283	val_283	2008-04-08	11
-331	val_331	2008-04-08	11
-235	val_235	2008-04-08	11
-80	val_80	2008-04-08	11
-44	val_44	2008-04-08	11
-193	val_193	2008-04-08	11
-321	val_321	2008-04-08	11
-335	val_335	2008-04-08	11
-104	val_104	2008-04-08	11
-466	val_466	2008-04-08	11
-366	val_366	2008-04-08	11
-175	val_175	2008-04-08	11
-403	val_403	2008-04-08	11
-483	val_483	2008-04-08	11
-53	val_53	2008-04-08	11
-105	val_105	2008-04-08	11
-257	val_257	2008-04-08	11
-406	val_406	2008-04-08	11
-409	val_409	2008-04-08	11
-190	val_190	2008-04-08	11
-406	val_406	2008-04-08	11
-401	val_401	2008-04-08	11
-114	val_114	2008-04-08	11
-258	val_258	2008-04-08	11
-90	val_90	2008-04-08	11
-203	val_203	2008-04-08	11
-262	val_262	2008-04-08	11
-348	val_348	2008-04-08	11
-424	val_424	2008-04-08	11
-12	val_12	2008-04-08	11
-396	val_396	2008-04-08	11
-201	val_201	2008-04-08	11
-217	val_217	2008-04-08	11
-164	val_164	2008-04-08	11
-431	val_431	2008-04-08	11
-454	val_454	2008-04-08	11
-478	val_478	2008-04-08	11
-298	val_298	2008-04-08	11
-125	val_125	2008-04-08	11
-431	val_431	2008-04-08	11
-164	val_164	2008-04-08	11
-424	val_424	2008-04-08	11
-187	val_187	2008-04-08	11
-382	val_382	2008-04-08	11
-5	val_5	2008-04-08	11
-70	val_70	2008-04-08	11
-397	val_397	2008-04-08	11
-480	val_480	2008-04-08	11
-291	val_291	2008-04-08	11
-24	val_24	2008-04-08	11
-351	val_351	2008-04-08	11
-255	val_255	2008-04-08	11
-104	val_104	2008-04-08	11
-70	val_70	2008-04-08	11
-163	val_163	2008-04-08	11
-438	val_438	2008-04-08	11
-119	val_119	2008-04-08	11
-414	val_414	2008-04-08	11
-200	val_200	2008-04-08	11
-491	val_491	2008-04-08	11
-237	val_237	2008-04-08	11
-439	val_439	2008-04-08	11
-360	val_360	2008-04-08	11
-248	val_248	2008-04-08	11
-479	val_479	2008-04-08	11
-305	val_305	2008-04-08	11
-417	val_417	2008-04-08	11
-199	val_199	2008-04-08	11
-444	val_444	2008-04-08	11
-120	val_120	2008-04-08	11
-429	val_429	2008-04-08	11
-169	val_169	2008-04-08	11
-443	val_443	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-277	val_277	2008-04-08	11
-230	val_230	2008-04-08	11
-478	val_478	2008-04-08	11
-178	val_178	2008-04-08	11
-468	val_468	2008-04-08	11
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-333	val_333	2008-04-08	11
-493	val_493	2008-04-08	11
-460	val_460	2008-04-08	11
-207	val_207	2008-04-08	11
-249	val_249	2008-04-08	11
-265	val_265	2008-04-08	11
-480	val_480	2008-04-08	11
-83	val_83	2008-04-08	11
-136	val_136	2008-04-08	11
-353	val_353	2008-04-08	11
-172	val_172	2008-04-08	11
-214	val_214	2008-04-08	11
-462	val_462	2008-04-08	11
-233	val_233	2008-04-08	11
-406	val_406	2008-04-08	11
-133	val_133	2008-04-08	11
-175	val_175	2008-04-08	11
-189	val_189	2008-04-08	11
-454	val_454	2008-04-08	11
-375	val_375	2008-04-08	11
-401	val_401	2008-04-08	11
-421	val_421	2008-04-08	11
-407	val_407	2008-04-08	11
-384	val_384	2008-04-08	11
-256	val_256	2008-04-08	11
-26	val_26	2008-04-08	11
-134	val_134	2008-04-08	11
-67	val_67	2008-04-08	11
-384	val_384	2008-04-08	11
-379	val_379	2008-04-08	11
-18	val_18	2008-04-08	11
-462	val_462	2008-04-08	11
-492	val_492	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-9	val_9	2008-04-08	11
-341	val_341	2008-04-08	11
-498	val_498	2008-04-08	11
-146	val_146	2008-04-08	11
-458	val_458	2008-04-08	11
-362	val_362	2008-04-08	11
-186	val_186	2008-04-08	11
-285	val_285	2008-04-08	11
-348	val_348	2008-04-08	11
-167	val_167	2008-04-08	11
-18	val_18	2008-04-08	11
-273	val_273	2008-04-08	11
-183	val_183	2008-04-08	11
-281	val_281	2008-04-08	11
-344	val_344	2008-04-08	11
-97	val_97	2008-04-08	11
-469	val_469	2008-04-08	11
-315	val_315	2008-04-08	11
-84	val_84	2008-04-08	11
-28	val_28	2008-04-08	11
-37	val_37	2008-04-08	11
-448	val_448	2008-04-08	11
-152	val_152	2008-04-08	11
-348	val_348	2008-04-08	11
-307	val_307	2008-04-08	11
-194	val_194	2008-04-08	11
-414	val_414	2008-04-08	11
-477	val_477	2008-04-08	11
-222	val_222	2008-04-08	11
-126	val_126	2008-04-08	11
-90	val_90	2008-04-08	11
-169	val_169	2008-04-08	11
-403	val_403	2008-04-08	11
-400	val_400	2008-04-08	11
-200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
-238	val_238	2008-04-08	12
-86	val_86	2008-04-08	12
-311	val_311	2008-04-08	12
-27	val_27	2008-04-08	12
-165	val_165	2008-04-08	12
-409	val_409	2008-04-08	12
-255	val_255	2008-04-08	12
-278	val_278	2008-04-08	12
-98	val_98	2008-04-08	12
-484	val_484	2008-04-08	12
-265	val_265	2008-04-08	12
-193	val_193	2008-04-08	12
-401	val_401	2008-04-08	12
-150	val_150	2008-04-08	12
-273	val_273	2008-04-08	12
-224	val_224	2008-04-08	12
-369	val_369	2008-04-08	12
-66	val_66	2008-04-08	12
-128	val_128	2008-04-08	12
-213	val_213	2008-04-08	12
-146	val_146	2008-04-08	12
-406	val_406	2008-04-08	12
-429	val_429	2008-04-08	12
-374	val_374	2008-04-08	12
-152	val_152	2008-04-08	12
-469	val_469	2008-04-08	12
-145	val_145	2008-04-08	12
-495	val_495	2008-04-08	12
-37	val_37	2008-04-08	12
-327	val_327	2008-04-08	12
-281	val_281	2008-04-08	12
-277	val_277	2008-04-08	12
-209	val_209	2008-04-08	12
-15	val_15	2008-04-08	12
-82	val_82	2008-04-08	12
-403	val_403	2008-04-08	12
-166	val_166	2008-04-08	12
-417	val_417	2008-04-08	12
-430	val_430	2008-04-08	12
-252	val_252	2008-04-08	12
-292	val_292	2008-04-08	12
-219	val_219	2008-04-08	12
-287	val_287	2008-04-08	12
-153	val_153	2008-04-08	12
-193	val_193	2008-04-08	12
-338	val_338	2008-04-08	12
-446	val_446	2008-04-08	12
-459	val_459	2008-04-08	12
-394	val_394	2008-04-08	12
-237	val_237	2008-04-08	12
-482	val_482	2008-04-08	12
-174	val_174	2008-04-08	12
-413	val_413	2008-04-08	12
-494	val_494	2008-04-08	12
-207	val_207	2008-04-08	12
-199	val_199	2008-04-08	12
-466	val_466	2008-04-08	12
-208	val_208	2008-04-08	12
-174	val_174	2008-04-08	12
-399	val_399	2008-04-08	12
-396	val_396	2008-04-08	12
-247	val_247	2008-04-08	12
-417	val_417	2008-04-08	12
-489	val_489	2008-04-08	12
-162	val_162	2008-04-08	12
-377	val_377	2008-04-08	12
-397	val_397	2008-04-08	12
-309	val_309	2008-04-08	12
-365	val_365	2008-04-08	12
-266	val_266	2008-04-08	12
-439	val_439	2008-04-08	12
-342	val_342	2008-04-08	12
-367	val_367	2008-04-08	12
-325	val_325	2008-04-08	12
-167	val_167	2008-04-08	12
-195	val_195	2008-04-08	12
-475	val_475	2008-04-08	12
-17	val_17	2008-04-08	12
-113	val_113	2008-04-08	12
-155	val_155	2008-04-08	12
-203	val_203	2008-04-08	12
-339	val_339	2008-04-08	12
-0	val_0	2008-04-08	12
-455	val_455	2008-04-08	12
-128	val_128	2008-04-08	12
-311	val_311	2008-04-08	12
-316	val_316	2008-04-08	12
-57	val_57	2008-04-08	12
-302	val_302	2008-04-08	12
-205	val_205	2008-04-08	12
-149	val_149	2008-04-08	12
-438	val_438	2008-04-08	12
-345	val_345	2008-04-08	12
-129	val_129	2008-04-08	12
-170	val_170	2008-04-08	12
-20	val_20	2008-04-08	12
-489	val_489	2008-04-08	12
-157	val_157	2008-04-08	12
-378	val_378	2008-04-08	12
-221	val_221	2008-04-08	12
-92	val_92	2008-04-08	12
-111	val_111	2008-04-08	12
-47	val_47	2008-04-08	12
-72	val_72	2008-04-08	12
-4	val_4	2008-04-08	12
-280	val_280	2008-04-08	12
-35	val_35	2008-04-08	12
-427	val_427	2008-04-08	12
-277	val_277	2008-04-08	12
-208	val_208	2008-04-08	12
-356	val_356	2008-04-08	12
-399	val_399	2008-04-08	12
-169	val_169	2008-04-08	12
-382	val_382	2008-04-08	12
-498	val_498	2008-04-08	12
-125	val_125	2008-04-08	12
-386	val_386	2008-04-08	12
-437	val_437	2008-04-08	12
-469	val_469	2008-04-08	12
-192	val_192	2008-04-08	12
-286	val_286	2008-04-08	12
-187	val_187	2008-04-08	12
-176	val_176	2008-04-08	12
-54	val_54	2008-04-08	12
-459	val_459	2008-04-08	12
-51	val_51	2008-04-08	12
-138	val_138	2008-04-08	12
-103	val_103	2008-04-08	12
-239	val_239	2008-04-08	12
-213	val_213	2008-04-08	12
-216	val_216	2008-04-08	12
-430	val_430	2008-04-08	12
-278	val_278	2008-04-08	12
-176	val_176	2008-04-08	12
-289	val_289	2008-04-08	12
-221	val_221	2008-04-08	12
-65	val_65	2008-04-08	12
-318	val_318	2008-04-08	12
-332	val_332	2008-04-08	12
-311	val_311	2008-04-08	12
-275	val_275	2008-04-08	12
-137	val_137	2008-04-08	12
-241	val_241	2008-04-08	12
-83	val_83	2008-04-08	12
-333	val_333	2008-04-08	12
-180	val_180	2008-04-08	12
-284	val_284	2008-04-08	12
-12	val_12	2008-04-08	12
-230	val_230	2008-04-08	12
-181	val_181	2008-04-08	12
-67	val_67	2008-04-08	12
-260	val_260	2008-04-08	12
-404	val_404	2008-04-08	12
-384	val_384	2008-04-08	12
-489	val_489	2008-04-08	12
-353	val_353	2008-04-08	12
-373	val_373	2008-04-08	12
-272	val_272	2008-04-08	12
-138	val_138	2008-04-08	12
-217	val_217	2008-04-08	12
-84	val_84	2008-04-08	12
-348	val_348	2008-04-08	12
-466	val_466	2008-04-08	12
-58	val_58	2008-04-08	12
-8	val_8	2008-04-08	12
-411	val_411	2008-04-08	12
-230	val_230	2008-04-08	12
-208	val_208	2008-04-08	12
-348	val_348	2008-04-08	12
-24	val_24	2008-04-08	12
-463	val_463	2008-04-08	12
-431	val_431	2008-04-08	12
-179	val_179	2008-04-08	12
-172	val_172	2008-04-08	12
-42	val_42	2008-04-08	12
-129	val_129	2008-04-08	12
-158	val_158	2008-04-08	12
-119	val_119	2008-04-08	12
-496	val_496	2008-04-08	12
-0	val_0	2008-04-08	12
-322	val_322	2008-04-08	12
-197	val_197	2008-04-08	12
-468	val_468	2008-04-08	12
-393	val_393	2008-04-08	12
-454	val_454	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-199	val_199	2008-04-08	12
-191	val_191	2008-04-08	12
-418	val_418	2008-04-08	12
-96	val_96	2008-04-08	12
-26	val_26	2008-04-08	12
-165	val_165	2008-04-08	12
-327	val_327	2008-04-08	12
-230	val_230	2008-04-08	12
-205	val_205	2008-04-08	12
-120	val_120	2008-04-08	12
-131	val_131	2008-04-08	12
-51	val_51	2008-04-08	12
-404	val_404	2008-04-08	12
-43	val_43	2008-04-08	12
-436	val_436	2008-04-08	12
-156	val_156	2008-04-08	12
-469	val_469	2008-04-08	12
-468	val_468	2008-04-08	12
-308	val_308	2008-04-08	12
-95	val_95	2008-04-08	12
-196	val_196	2008-04-08	12
-288	val_288	2008-04-08	12
-481	val_481	2008-04-08	12
-457	val_457	2008-04-08	12
-98	val_98	2008-04-08	12
-282	val_282	2008-04-08	12
-197	val_197	2008-04-08	12
-187	val_187	2008-04-08	12
-318	val_318	2008-04-08	12
-318	val_318	2008-04-08	12
-409	val_409	2008-04-08	12
-470	val_470	2008-04-08	12
-137	val_137	2008-04-08	12
-369	val_369	2008-04-08	12
-316	val_316	2008-04-08	12
-169	val_169	2008-04-08	12
-413	val_413	2008-04-08	12
-85	val_85	2008-04-08	12
-77	val_77	2008-04-08	12
-0	val_0	2008-04-08	12
-490	val_490	2008-04-08	12
-87	val_87	2008-04-08	12
-364	val_364	2008-04-08	12
-179	val_179	2008-04-08	12
-118	val_118	2008-04-08	12
-134	val_134	2008-04-08	12
-395	val_395	2008-04-08	12
-282	val_282	2008-04-08	12
-138	val_138	2008-04-08	12
-238	val_238	2008-04-08	12
-419	val_419	2008-04-08	12
-15	val_15	2008-04-08	12
-118	val_118	2008-04-08	12
-72	val_72	2008-04-08	12
-90	val_90	2008-04-08	12
-307	val_307	2008-04-08	12
-19	val_19	2008-04-08	12
-435	val_435	2008-04-08	12
-10	val_10	2008-04-08	12
-277	val_277	2008-04-08	12
-273	val_273	2008-04-08	12
-306	val_306	2008-04-08	12
-224	val_224	2008-04-08	12
-309	val_309	2008-04-08	12
-389	val_389	2008-04-08	12
-327	val_327	2008-04-08	12
-242	val_242	2008-04-08	12
-369	val_369	2008-04-08	12
-392	val_392	2008-04-08	12
-272	val_272	2008-04-08	12
-331	val_331	2008-04-08	12
-401	val_401	2008-04-08	12
-242	val_242	2008-04-08	12
-452	val_452	2008-04-08	12
-177	val_177	2008-04-08	12
-226	val_226	2008-04-08	12
-5	val_5	2008-04-08	12
-497	val_497	2008-04-08	12
-402	val_402	2008-04-08	12
-396	val_396	2008-04-08	12
-317	val_317	2008-04-08	12
-395	val_395	2008-04-08	12
-58	val_58	2008-04-08	12
-35	val_35	2008-04-08	12
-336	val_336	2008-04-08	12
-95	val_95	2008-04-08	12
-11	val_11	2008-04-08	12
-168	val_168	2008-04-08	12
-34	val_34	2008-04-08	12
-229	val_229	2008-04-08	12
-233	val_233	2008-04-08	12
-143	val_143	2008-04-08	12
-472	val_472	2008-04-08	12
-322	val_322	2008-04-08	12
-498	val_498	2008-04-08	12
-160	val_160	2008-04-08	12
-195	val_195	2008-04-08	12
-42	val_42	2008-04-08	12
-321	val_321	2008-04-08	12
-430	val_430	2008-04-08	12
-119	val_119	2008-04-08	12
-489	val_489	2008-04-08	12
-458	val_458	2008-04-08	12
-78	val_78	2008-04-08	12
-76	val_76	2008-04-08	12
-41	val_41	2008-04-08	12
-223	val_223	2008-04-08	12
-492	val_492	2008-04-08	12
-149	val_149	2008-04-08	12
-449	val_449	2008-04-08	12
-218	val_218	2008-04-08	12
-228	val_228	2008-04-08	12
-138	val_138	2008-04-08	12
-453	val_453	2008-04-08	12
-30	val_30	2008-04-08	12
-209	val_209	2008-04-08	12
-64	val_64	2008-04-08	12
-468	val_468	2008-04-08	12
-76	val_76	2008-04-08	12
-74	val_74	2008-04-08	12
-342	val_342	2008-04-08	12
-69	val_69	2008-04-08	12
-230	val_230	2008-04-08	12
-33	val_33	2008-04-08	12
-368	val_368	2008-04-08	12
-103	val_103	2008-04-08	12
-296	val_296	2008-04-08	12
-113	val_113	2008-04-08	12
-216	val_216	2008-04-08	12
-367	val_367	2008-04-08	12
-344	val_344	2008-04-08	12
-167	val_167	2008-04-08	12
-274	val_274	2008-04-08	12
-219	val_219	2008-04-08	12
-239	val_239	2008-04-08	12
-485	val_485	2008-04-08	12
-116	val_116	2008-04-08	12
-223	val_223	2008-04-08	12
-256	val_256	2008-04-08	12
-263	val_263	2008-04-08	12
-70	val_70	2008-04-08	12
-487	val_487	2008-04-08	12
-480	val_480	2008-04-08	12
-401	val_401	2008-04-08	12
-288	val_288	2008-04-08	12
-191	val_191	2008-04-08	12
-5	val_5	2008-04-08	12
-244	val_244	2008-04-08	12
-438	val_438	2008-04-08	12
-128	val_128	2008-04-08	12
-467	val_467	2008-04-08	12
-432	val_432	2008-04-08	12
-202	val_202	2008-04-08	12
-316	val_316	2008-04-08	12
-229	val_229	2008-04-08	12
-469	val_469	2008-04-08	12
-463	val_463	2008-04-08	12
-280	val_280	2008-04-08	12
-2	val_2	2008-04-08	12
-35	val_35	2008-04-08	12
-283	val_283	2008-04-08	12
-331	val_331	2008-04-08	12
-235	val_235	2008-04-08	12
-80	val_80	2008-04-08	12
-44	val_44	2008-04-08	12
-193	val_193	2008-04-08	12
-321	val_321	2008-04-08	12
-335	val_335	2008-04-08	12
-104	val_104	2008-04-08	12
-466	val_466	2008-04-08	12
-366	val_366	2008-04-08	12
-175	val_175	2008-04-08	12
-403	val_403	2008-04-08	12
-483	val_483	2008-04-08	12
-53	val_53	2008-04-08	12
-105	val_105	2008-04-08	12
-257	val_257	2008-04-08	12
-406	val_406	2008-04-08	12
-409	val_409	2008-04-08	12
-190	val_190	2008-04-08	12
-406	val_406	2008-04-08	12
-401	val_401	2008-04-08	12
-114	val_114	2008-04-08	12
-258	val_258	2008-04-08	12
-90	val_90	2008-04-08	12
-203	val_203	2008-04-08	12
-262	val_262	2008-04-08	12
-348	val_348	2008-04-08	12
-424	val_424	2008-04-08	12
-12	val_12	2008-04-08	12
-396	val_396	2008-04-08	12
-201	val_201	2008-04-08	12
-217	val_217	2008-04-08	12
-164	val_164	2008-04-08	12
-431	val_431	2008-04-08	12
-454	val_454	2008-04-08	12
-478	val_478	2008-04-08	12
-298	val_298	2008-04-08	12
-125	val_125	2008-04-08	12
-431	val_431	2008-04-08	12
-164	val_164	2008-04-08	12
-424	val_424	2008-04-08	12
-187	val_187	2008-04-08	12
-382	val_382	2008-04-08	12
-5	val_5	2008-04-08	12
-70	val_70	2008-04-08	12
-397	val_397	2008-04-08	12
-480	val_480	2008-04-08	12
-291	val_291	2008-04-08	12
-24	val_24	2008-04-08	12
-351	val_351	2008-04-08	12
-255	val_255	2008-04-08	12
-104	val_104	2008-04-08	12
-70	val_70	2008-04-08	12
-163	val_163	2008-04-08	12
-438	val_438	2008-04-08	12
-119	val_119	2008-04-08	12
-414	val_414	2008-04-08	12
-200	val_200	2008-04-08	12
-491	val_491	2008-04-08	12
-237	val_237	2008-04-08	12
-439	val_439	2008-04-08	12
-360	val_360	2008-04-08	12
-248	val_248	2008-04-08	12
-479	val_479	2008-04-08	12
-305	val_305	2008-04-08	12
-417	val_417	2008-04-08	12
-199	val_199	2008-04-08	12
-444	val_444	2008-04-08	12
-120	val_120	2008-04-08	12
-429	val_429	2008-04-08	12
-169	val_169	2008-04-08	12
-443	val_443	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-277	val_277	2008-04-08	12
-230	val_230	2008-04-08	12
-478	val_478	2008-04-08	12
-178	val_178	2008-04-08	12
-468	val_468	2008-04-08	12
-310	val_310	2008-04-08	12
-317	val_317	2008-04-08	12
-333	val_333	2008-04-08	12
-493	val_493	2008-04-08	12
-460	val_460	2008-04-08	12
-207	val_207	2008-04-08	12
-249	val_249	2008-04-08	12
-265	val_265	2008-04-08	12
-480	val_480	2008-04-08	12
-83	val_83	2008-04-08	12
-136	val_136	2008-04-08	12
-353	val_353	2008-04-08	12
-172	val_172	2008-04-08	12
-214	val_214	2008-04-08	12
-462	val_462	2008-04-08	12
-233	val_233	2008-04-08	12
-406	val_406	2008-04-08	12
-133	val_133	2008-04-08	12
-175	val_175	2008-04-08	12
-189	val_189	2008-04-08	12
-454	val_454	2008-04-08	12
-375	val_375	2008-04-08	12
-401	val_401	2008-04-08	12
-421	val_421	2008-04-08	12
-407	val_407	2008-04-08	12
-384	val_384	2008-04-08	12
-256	val_256	2008-04-08	12
-26	val_26	2008-04-08	12
-134	val_134	2008-04-08	12
-67	val_67	2008-04-08	12
-384	val_384	2008-04-08	12
-379	val_379	2008-04-08	12
-18	val_18	2008-04-08	12
-462	val_462	2008-04-08	12
-492	val_492	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-9	val_9	2008-04-08	12
-341	val_341	2008-04-08	12
-498	val_498	2008-04-08	12
-146	val_146	2008-04-08	12
-458	val_458	2008-04-08	12
-362	val_362	2008-04-08	12
-186	val_186	2008-04-08	12
-285	val_285	2008-04-08	12
-348	val_348	2008-04-08	12
-167	val_167	2008-04-08	12
-18	val_18	2008-04-08	12
-273	val_273	2008-04-08	12
-183	val_183	2008-04-08	12
-281	val_281	2008-04-08	12
-344	val_344	2008-04-08	12
-97	val_97	2008-04-08	12
-469	val_469	2008-04-08	12
-315	val_315	2008-04-08	12
-84	val_84	2008-04-08	12
-28	val_28	2008-04-08	12
-37	val_37	2008-04-08	12
-448	val_448	2008-04-08	12
-152	val_152	2008-04-08	12
-348	val_348	2008-04-08	12
-307	val_307	2008-04-08	12
-194	val_194	2008-04-08	12
-414	val_414	2008-04-08	12
-477	val_477	2008-04-08	12
-222	val_222	2008-04-08	12
-126	val_126	2008-04-08	12
-90	val_90	2008-04-08	12
-169	val_169	2008-04-08	12
-403	val_403	2008-04-08	12
-400	val_400	2008-04-08	12
-200	val_200	2008-04-08	12
-97	val_97	2008-04-08	12
-238	val_238	2008-04-08	existing_value
-86	val_86	2008-04-08	existing_value
-311	val_311	2008-04-08	existing_value
-27	val_27	2008-04-08	existing_value
-165	val_165	2008-04-08	existing_value
-409	val_409	2008-04-08	existing_value
-255	val_255	2008-04-08	existing_value
-278	val_278	2008-04-08	existing_value
-98	val_98	2008-04-08	existing_value
-484	val_484	2008-04-08	existing_value
-265	val_265	2008-04-08	existing_value
-193	val_193	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-150	val_150	2008-04-08	existing_value
-273	val_273	2008-04-08	existing_value
-224	val_224	2008-04-08	existing_value
-369	val_369	2008-04-08	existing_value
-66	val_66	2008-04-08	existing_value
-128	val_128	2008-04-08	existing_value
-213	val_213	2008-04-08	existing_value
-146	val_146	2008-04-08	existing_value
-406	val_406	2008-04-08	existing_value
-429	val_429	2008-04-08	existing_value
-374	val_374	2008-04-08	existing_value
-152	val_152	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-145	val_145	2008-04-08	existing_value
-495	val_495	2008-04-08	existing_value
-37	val_37	2008-04-08	existing_value
-327	val_327	2008-04-08	existing_value
-281	val_281	2008-04-08	existing_value
-277	val_277	2008-04-08	existing_value
-209	val_209	2008-04-08	existing_value
-15	val_15	2008-04-08	existing_value
-82	val_82	2008-04-08	existing_value
-403	val_403	2008-04-08	existing_value
-166	val_166	2008-04-08	existing_value
-417	val_417	2008-04-08	existing_value
-430	val_430	2008-04-08	existing_value
-252	val_252	2008-04-08	existing_value
-292	val_292	2008-04-08	existing_value
-219	val_219	2008-04-08	existing_value
-287	val_287	2008-04-08	existing_value
-153	val_153	2008-04-08	existing_value
-193	val_193	2008-04-08	existing_value
-338	val_338	2008-04-08	existing_value
-446	val_446	2008-04-08	existing_value
-459	val_459	2008-04-08	existing_value
-394	val_394	2008-04-08	existing_value
-237	val_237	2008-04-08	existing_value
-482	val_482	2008-04-08	existing_value
-174	val_174	2008-04-08	existing_value
-413	val_413	2008-04-08	existing_value
-494	val_494	2008-04-08	existing_value
-207	val_207	2008-04-08	existing_value
-199	val_199	2008-04-08	existing_value
-466	val_466	2008-04-08	existing_value
-208	val_208	2008-04-08	existing_value
-174	val_174	2008-04-08	existing_value
-399	val_399	2008-04-08	existing_value
-396	val_396	2008-04-08	existing_value
-247	val_247	2008-04-08	existing_value
-417	val_417	2008-04-08	existing_value
-489	val_489	2008-04-08	existing_value
-162	val_162	2008-04-08	existing_value
-377	val_377	2008-04-08	existing_value
-397	val_397	2008-04-08	existing_value
-309	val_309	2008-04-08	existing_value
-365	val_365	2008-04-08	existing_value
-266	val_266	2008-04-08	existing_value
-439	val_439	2008-04-08	existing_value
-342	val_342	2008-04-08	existing_value
-367	val_367	2008-04-08	existing_value
-325	val_325	2008-04-08	existing_value
-167	val_167	2008-04-08	existing_value
-195	val_195	2008-04-08	existing_value
-475	val_475	2008-04-08	existing_value
-17	val_17	2008-04-08	existing_value
-113	val_113	2008-04-08	existing_value
-155	val_155	2008-04-08	existing_value
-203	val_203	2008-04-08	existing_value
-339	val_339	2008-04-08	existing_value
-0	val_0	2008-04-08	existing_value
-455	val_455	2008-04-08	existing_value
-128	val_128	2008-04-08	existing_value
-311	val_311	2008-04-08	existing_value
-316	val_316	2008-04-08	existing_value
-57	val_57	2008-04-08	existing_value
-302	val_302	2008-04-08	existing_value
-205	val_205	2008-04-08	existing_value
-149	val_149	2008-04-08	existing_value
-438	val_438	2008-04-08	existing_value
-345	val_345	2008-04-08	existing_value
-129	val_129	2008-04-08	existing_value
-170	val_170	2008-04-08	existing_value
-20	val_20	2008-04-08	existing_value
-489	val_489	2008-04-08	existing_value
-157	val_157	2008-04-08	existing_value
-378	val_378	2008-04-08	existing_value
-221	val_221	2008-04-08	existing_value
-92	val_92	2008-04-08	existing_value
-111	val_111	2008-04-08	existing_value
-47	val_47	2008-04-08	existing_value
-72	val_72	2008-04-08	existing_value
-4	val_4	2008-04-08	existing_value
-280	val_280	2008-04-08	existing_value
-35	val_35	2008-04-08	existing_value
-427	val_427	2008-04-08	existing_value
-277	val_277	2008-04-08	existing_value
-208	val_208	2008-04-08	existing_value
-356	val_356	2008-04-08	existing_value
-399	val_399	2008-04-08	existing_value
-169	val_169	2008-04-08	existing_value
-382	val_382	2008-04-08	existing_value
-498	val_498	2008-04-08	existing_value
-125	val_125	2008-04-08	existing_value
-386	val_386	2008-04-08	existing_value
-437	val_437	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-192	val_192	2008-04-08	existing_value
-286	val_286	2008-04-08	existing_value
-187	val_187	2008-04-08	existing_value
-176	val_176	2008-04-08	existing_value
-54	val_54	2008-04-08	existing_value
-459	val_459	2008-04-08	existing_value
-51	val_51	2008-04-08	existing_value
-138	val_138	2008-04-08	existing_value
-103	val_103	2008-04-08	existing_value
-239	val_239	2008-04-08	existing_value
-213	val_213	2008-04-08	existing_value
-216	val_216	2008-04-08	existing_value
-430	val_430	2008-04-08	existing_value
-278	val_278	2008-04-08	existing_value
-176	val_176	2008-04-08	existing_value
-289	val_289	2008-04-08	existing_value
-221	val_221	2008-04-08	existing_value
-65	val_65	2008-04-08	existing_value
-318	val_318	2008-04-08	existing_value
-332	val_332	2008-04-08	existing_value
-311	val_311	2008-04-08	existing_value
-275	val_275	2008-04-08	existing_value
-137	val_137	2008-04-08	existing_value
-241	val_241	2008-04-08	existing_value
-83	val_83	2008-04-08	existing_value
-333	val_333	2008-04-08	existing_value
-180	val_180	2008-04-08	existing_value
-284	val_284	2008-04-08	existing_value
-12	val_12	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-181	val_181	2008-04-08	existing_value
-67	val_67	2008-04-08	existing_value
-260	val_260	2008-04-08	existing_value
-404	val_404	2008-04-08	existing_value
-384	val_384	2008-04-08	existing_value
-489	val_489	2008-04-08	existing_value
-353	val_353	2008-04-08	existing_value
-373	val_373	2008-04-08	existing_value
-272	val_272	2008-04-08	existing_value
-138	val_138	2008-04-08	existing_value
-217	val_217	2008-04-08	existing_value
-84	val_84	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-466	val_466	2008-04-08	existing_value
-58	val_58	2008-04-08	existing_value
-8	val_8	2008-04-08	existing_value
-411	val_411	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-208	val_208	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-24	val_24	2008-04-08	existing_value
-463	val_463	2008-04-08	existing_value
-431	val_431	2008-04-08	existing_value
-179	val_179	2008-04-08	existing_value
-172	val_172	2008-04-08	existing_value
-42	val_42	2008-04-08	existing_value
-129	val_129	2008-04-08	existing_value
-158	val_158	2008-04-08	existing_value
-119	val_119	2008-04-08	existing_value
-496	val_496	2008-04-08	existing_value
-0	val_0	2008-04-08	existing_value
-322	val_322	2008-04-08	existing_value
-197	val_197	2008-04-08	existing_value
-468	val_468	2008-04-08	existing_value
-393	val_393	2008-04-08	existing_value
-454	val_454	2008-04-08	existing_value
-100	val_100	2008-04-08	existing_value
-298	val_298	2008-04-08	existing_value
-199	val_199	2008-04-08	existing_value
-191	val_191	2008-04-08	existing_value
-418	val_418	2008-04-08	existing_value
-96	val_96	2008-04-08	existing_value
-26	val_26	2008-04-08	existing_value
-165	val_165	2008-04-08	existing_value
-327	val_327	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-205	val_205	2008-04-08	existing_value
-120	val_120	2008-04-08	existing_value
-131	val_131	2008-04-08	existing_value
-51	val_51	2008-04-08	existing_value
-404	val_404	2008-04-08	existing_value
-43	val_43	2008-04-08	existing_value
-436	val_436	2008-04-08	existing_value
-156	val_156	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-468	val_468	2008-04-08	existing_value
-308	val_308	2008-04-08	existing_value
-95	val_95	2008-04-08	existing_value
-196	val_196	2008-04-08	existing_value
-288	val_288	2008-04-08	existing_value
-481	val_481	2008-04-08	existing_value
-457	val_457	2008-04-08	existing_value
-98	val_98	2008-04-08	existing_value
-282	val_282	2008-04-08	existing_value
-197	val_197	2008-04-08	existing_value
-187	val_187	2008-04-08	existing_value
-318	val_318	2008-04-08	existing_value
-318	val_318	2008-04-08	existing_value
-409	val_409	2008-04-08	existing_value
-470	val_470	2008-04-08	existing_value
-137	val_137	2008-04-08	existing_value
-369	val_369	2008-04-08	existing_value
-316	val_316	2008-04-08	existing_value
-169	val_169	2008-04-08	existing_value
-413	val_413	2008-04-08	existing_value
-85	val_85	2008-04-08	existing_value
-77	val_77	2008-04-08	existing_value
-0	val_0	2008-04-08	existing_value
-490	val_490	2008-04-08	existing_value
-87	val_87	2008-04-08	existing_value
-364	val_364	2008-04-08	existing_value
-179	val_179	2008-04-08	existing_value
-118	val_118	2008-04-08	existing_value
-134	val_134	2008-04-08	existing_value
-395	val_395	2008-04-08	existing_value
-282	val_282	2008-04-08	existing_value
-138	val_138	2008-04-08	existing_value
-238	val_238	2008-04-08	existing_value
-419	val_419	2008-04-08	existing_value
-15	val_15	2008-04-08	existing_value
-118	val_118	2008-04-08	existing_value
-72	val_72	2008-04-08	existing_value
-90	val_90	2008-04-08	existing_value
-307	val_307	2008-04-08	existing_value
-19	val_19	2008-04-08	existing_value
-435	val_435	2008-04-08	existing_value
-10	val_10	2008-04-08	existing_value
-277	val_277	2008-04-08	existing_value
-273	val_273	2008-04-08	existing_value
-306	val_306	2008-04-08	existing_value
-224	val_224	2008-04-08	existing_value
-309	val_309	2008-04-08	existing_value
-389	val_389	2008-04-08	existing_value
-327	val_327	2008-04-08	existing_value
-242	val_242	2008-04-08	existing_value
-369	val_369	2008-04-08	existing_value
-392	val_392	2008-04-08	existing_value
-272	val_272	2008-04-08	existing_value
-331	val_331	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-242	val_242	2008-04-08	existing_value
-452	val_452	2008-04-08	existing_value
-177	val_177	2008-04-08	existing_value
-226	val_226	2008-04-08	existing_value
-5	val_5	2008-04-08	existing_value
-497	val_497	2008-04-08	existing_value
-402	val_402	2008-04-08	existing_value
-396	val_396	2008-04-08	existing_value
-317	val_317	2008-04-08	existing_value
-395	val_395	2008-04-08	existing_value
-58	val_58	2008-04-08	existing_value
-35	val_35	2008-04-08	existing_value
-336	val_336	2008-04-08	existing_value
-95	val_95	2008-04-08	existing_value
-11	val_11	2008-04-08	existing_value
-168	val_168	2008-04-08	existing_value
-34	val_34	2008-04-08	existing_value
-229	val_229	2008-04-08	existing_value
-233	val_233	2008-04-08	existing_value
-143	val_143	2008-04-08	existing_value
-472	val_472	2008-04-08	existing_value
-322	val_322	2008-04-08	existing_value
-498	val_498	2008-04-08	existing_value
-160	val_160	2008-04-08	existing_value
-195	val_195	2008-04-08	existing_value
-42	val_42	2008-04-08	existing_value
-321	val_321	2008-04-08	existing_value
-430	val_430	2008-04-08	existing_value
-119	val_119	2008-04-08	existing_value
-489	val_489	2008-04-08	existing_value
-458	val_458	2008-04-08	existing_value
-78	val_78	2008-04-08	existing_value
-76	val_76	2008-04-08	existing_value
-41	val_41	2008-04-08	existing_value
-223	val_223	2008-04-08	existing_value
-492	val_492	2008-04-08	existing_value
-149	val_149	2008-04-08	existing_value
-449	val_449	2008-04-08	existing_value
-218	val_218	2008-04-08	existing_value
-228	val_228	2008-04-08	existing_value
-138	val_138	2008-04-08	existing_value
-453	val_453	2008-04-08	existing_value
-30	val_30	2008-04-08	existing_value
-209	val_209	2008-04-08	existing_value
-64	val_64	2008-04-08	existing_value
-468	val_468	2008-04-08	existing_value
-76	val_76	2008-04-08	existing_value
-74	val_74	2008-04-08	existing_value
-342	val_342	2008-04-08	existing_value
-69	val_69	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-33	val_33	2008-04-08	existing_value
-368	val_368	2008-04-08	existing_value
-103	val_103	2008-04-08	existing_value
-296	val_296	2008-04-08	existing_value
-113	val_113	2008-04-08	existing_value
-216	val_216	2008-04-08	existing_value
-367	val_367	2008-04-08	existing_value
-344	val_344	2008-04-08	existing_value
-167	val_167	2008-04-08	existing_value
-274	val_274	2008-04-08	existing_value
-219	val_219	2008-04-08	existing_value
-239	val_239	2008-04-08	existing_value
-485	val_485	2008-04-08	existing_value
-116	val_116	2008-04-08	existing_value
-223	val_223	2008-04-08	existing_value
-256	val_256	2008-04-08	existing_value
-263	val_263	2008-04-08	existing_value
-70	val_70	2008-04-08	existing_value
-487	val_487	2008-04-08	existing_value
-480	val_480	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-288	val_288	2008-04-08	existing_value
-191	val_191	2008-04-08	existing_value
-5	val_5	2008-04-08	existing_value
-244	val_244	2008-04-08	existing_value
-438	val_438	2008-04-08	existing_value
-128	val_128	2008-04-08	existing_value
-467	val_467	2008-04-08	existing_value
-432	val_432	2008-04-08	existing_value
-202	val_202	2008-04-08	existing_value
-316	val_316	2008-04-08	existing_value
-229	val_229	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-463	val_463	2008-04-08	existing_value
-280	val_280	2008-04-08	existing_value
-2	val_2	2008-04-08	existing_value
-35	val_35	2008-04-08	existing_value
-283	val_283	2008-04-08	existing_value
-331	val_331	2008-04-08	existing_value
-235	val_235	2008-04-08	existing_value
-80	val_80	2008-04-08	existing_value
-44	val_44	2008-04-08	existing_value
-193	val_193	2008-04-08	existing_value
-321	val_321	2008-04-08	existing_value
-335	val_335	2008-04-08	existing_value
-104	val_104	2008-04-08	existing_value
-466	val_466	2008-04-08	existing_value
-366	val_366	2008-04-08	existing_value
-175	val_175	2008-04-08	existing_value
-403	val_403	2008-04-08	existing_value
-483	val_483	2008-04-08	existing_value
-53	val_53	2008-04-08	existing_value
-105	val_105	2008-04-08	existing_value
-257	val_257	2008-04-08	existing_value
-406	val_406	2008-04-08	existing_value
-409	val_409	2008-04-08	existing_value
-190	val_190	2008-04-08	existing_value
-406	val_406	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-114	val_114	2008-04-08	existing_value
-258	val_258	2008-04-08	existing_value
-90	val_90	2008-04-08	existing_value
-203	val_203	2008-04-08	existing_value
-262	val_262	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-424	val_424	2008-04-08	existing_value
-12	val_12	2008-04-08	existing_value
-396	val_396	2008-04-08	existing_value
-201	val_201	2008-04-08	existing_value
-217	val_217	2008-04-08	existing_value
-164	val_164	2008-04-08	existing_value
-431	val_431	2008-04-08	existing_value
-454	val_454	2008-04-08	existing_value
-478	val_478	2008-04-08	existing_value
-298	val_298	2008-04-08	existing_value
-125	val_125	2008-04-08	existing_value
-431	val_431	2008-04-08	existing_value
-164	val_164	2008-04-08	existing_value
-424	val_424	2008-04-08	existing_value
-187	val_187	2008-04-08	existing_value
-382	val_382	2008-04-08	existing_value
-5	val_5	2008-04-08	existing_value
-70	val_70	2008-04-08	existing_value
-397	val_397	2008-04-08	existing_value
-480	val_480	2008-04-08	existing_value
-291	val_291	2008-04-08	existing_value
-24	val_24	2008-04-08	existing_value
-351	val_351	2008-04-08	existing_value
-255	val_255	2008-04-08	existing_value
-104	val_104	2008-04-08	existing_value
-70	val_70	2008-04-08	existing_value
-163	val_163	2008-04-08	existing_value
-438	val_438	2008-04-08	existing_value
-119	val_119	2008-04-08	existing_value
-414	val_414	2008-04-08	existing_value
-200	val_200	2008-04-08	existing_value
-491	val_491	2008-04-08	existing_value
-237	val_237	2008-04-08	existing_value
-439	val_439	2008-04-08	existing_value
-360	val_360	2008-04-08	existing_value
-248	val_248	2008-04-08	existing_value
-479	val_479	2008-04-08	existing_value
-305	val_305	2008-04-08	existing_value
-417	val_417	2008-04-08	existing_value
-199	val_199	2008-04-08	existing_value
-444	val_444	2008-04-08	existing_value
-120	val_120	2008-04-08	existing_value
-429	val_429	2008-04-08	existing_value
-169	val_169	2008-04-08	existing_value
-443	val_443	2008-04-08	existing_value
-323	val_323	2008-04-08	existing_value
-325	val_325	2008-04-08	existing_value
-277	val_277	2008-04-08	existing_value
-230	val_230	2008-04-08	existing_value
-478	val_478	2008-04-08	existing_value
-178	val_178	2008-04-08	existing_value
-468	val_468	2008-04-08	existing_value
-310	val_310	2008-04-08	existing_value
-317	val_317	2008-04-08	existing_value
-333	val_333	2008-04-08	existing_value
-493	val_493	2008-04-08	existing_value
-460	val_460	2008-04-08	existing_value
-207	val_207	2008-04-08	existing_value
-249	val_249	2008-04-08	existing_value
-265	val_265	2008-04-08	existing_value
-480	val_480	2008-04-08	existing_value
-83	val_83	2008-04-08	existing_value
-136	val_136	2008-04-08	existing_value
-353	val_353	2008-04-08	existing_value
-172	val_172	2008-04-08	existing_value
-214	val_214	2008-04-08	existing_value
-462	val_462	2008-04-08	existing_value
-233	val_233	2008-04-08	existing_value
-406	val_406	2008-04-08	existing_value
-133	val_133	2008-04-08	existing_value
-175	val_175	2008-04-08	existing_value
-189	val_189	2008-04-08	existing_value
-454	val_454	2008-04-08	existing_value
-375	val_375	2008-04-08	existing_value
-401	val_401	2008-04-08	existing_value
-421	val_421	2008-04-08	existing_value
-407	val_407	2008-04-08	existing_value
-384	val_384	2008-04-08	existing_value
-256	val_256	2008-04-08	existing_value
-26	val_26	2008-04-08	existing_value
-134	val_134	2008-04-08	existing_value
-67	val_67	2008-04-08	existing_value
-384	val_384	2008-04-08	existing_value
-379	val_379	2008-04-08	existing_value
-18	val_18	2008-04-08	existing_value
-462	val_462	2008-04-08	existing_value
-492	val_492	2008-04-08	existing_value
-100	val_100	2008-04-08	existing_value
-298	val_298	2008-04-08	existing_value
-9	val_9	2008-04-08	existing_value
-341	val_341	2008-04-08	existing_value
-498	val_498	2008-04-08	existing_value
-146	val_146	2008-04-08	existing_value
-458	val_458	2008-04-08	existing_value
-362	val_362	2008-04-08	existing_value
-186	val_186	2008-04-08	existing_value
-285	val_285	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-167	val_167	2008-04-08	existing_value
-18	val_18	2008-04-08	existing_value
-273	val_273	2008-04-08	existing_value
-183	val_183	2008-04-08	existing_value
-281	val_281	2008-04-08	existing_value
-344	val_344	2008-04-08	existing_value
-97	val_97	2008-04-08	existing_value
-469	val_469	2008-04-08	existing_value
-315	val_315	2008-04-08	existing_value
-84	val_84	2008-04-08	existing_value
-28	val_28	2008-04-08	existing_value
-37	val_37	2008-04-08	existing_value
-448	val_448	2008-04-08	existing_value
-152	val_152	2008-04-08	existing_value
-348	val_348	2008-04-08	existing_value
-307	val_307	2008-04-08	existing_value
-194	val_194	2008-04-08	existing_value
-414	val_414	2008-04-08	existing_value
-477	val_477	2008-04-08	existing_value
-222	val_222	2008-04-08	existing_value
-126	val_126	2008-04-08	existing_value
-90	val_90	2008-04-08	existing_value
-169	val_169	2008-04-08	existing_value
-403	val_403	2008-04-08	existing_value
-400	val_400	2008-04-08	existing_value
-200	val_200	2008-04-08	existing_value
-97	val_97	2008-04-08	existing_value
-238	val_238	2008-04-09	11
-86	val_86	2008-04-09	11
-311	val_311	2008-04-09	11
-27	val_27	2008-04-09	11
-165	val_165	2008-04-09	11
-409	val_409	2008-04-09	11
-255	val_255	2008-04-09	11
-278	val_278	2008-04-09	11
-98	val_98	2008-04-09	11
-484	val_484	2008-04-09	11
-265	val_265	2008-04-09	11
-193	val_193	2008-04-09	11
-401	val_401	2008-04-09	11
-150	val_150	2008-04-09	11
-273	val_273	2008-04-09	11
-224	val_224	2008-04-09	11
-369	val_369	2008-04-09	11
-66	val_66	2008-04-09	11
-128	val_128	2008-04-09	11
-213	val_213	2008-04-09	11
-146	val_146	2008-04-09	11
-406	val_406	2008-04-09	11
-429	val_429	2008-04-09	11
-374	val_374	2008-04-09	11
-152	val_152	2008-04-09	11
-469	val_469	2008-04-09	11
-145	val_145	2008-04-09	11
-495	val_495	2008-04-09	11
-37	val_37	2008-04-09	11
-327	val_327	2008-04-09	11
-281	val_281	2008-04-09	11
-277	val_277	2008-04-09	11
-209	val_209	2008-04-09	11
-15	val_15	2008-04-09	11
-82	val_82	2008-04-09	11
-403	val_403	2008-04-09	11
-166	val_166	2008-04-09	11
-417	val_417	2008-04-09	11
-430	val_430	2008-04-09	11
-252	val_252	2008-04-09	11
-292	val_292	2008-04-09	11
-219	val_219	2008-04-09	11
-287	val_287	2008-04-09	11
-153	val_153	2008-04-09	11
-193	val_193	2008-04-09	11
-338	val_338	2008-04-09	11
-446	val_446	2008-04-09	11
-459	val_459	2008-04-09	11
-394	val_394	2008-04-09	11
-237	val_237	2008-04-09	11
-482	val_482	2008-04-09	11
-174	val_174	2008-04-09	11
-413	val_413	2008-04-09	11
-494	val_494	2008-04-09	11
-207	val_207	2008-04-09	11
-199	val_199	2008-04-09	11
-466	val_466	2008-04-09	11
-208	val_208	2008-04-09	11
-174	val_174	2008-04-09	11
-399	val_399	2008-04-09	11
-396	val_396	2008-04-09	11
-247	val_247	2008-04-09	11
-417	val_417	2008-04-09	11
-489	val_489	2008-04-09	11
-162	val_162	2008-04-09	11
-377	val_377	2008-04-09	11
-397	val_397	2008-04-09	11
-309	val_309	2008-04-09	11
-365	val_365	2008-04-09	11
-266	val_266	2008-04-09	11
-439	val_439	2008-04-09	11
-342	val_342	2008-04-09	11
-367	val_367	2008-04-09	11
-325	val_325	2008-04-09	11
-167	val_167	2008-04-09	11
-195	val_195	2008-04-09	11
-475	val_475	2008-04-09	11
-17	val_17	2008-04-09	11
-113	val_113	2008-04-09	11
-155	val_155	2008-04-09	11
-203	val_203	2008-04-09	11
-339	val_339	2008-04-09	11
-0	val_0	2008-04-09	11
-455	val_455	2008-04-09	11
-128	val_128	2008-04-09	11
-311	val_311	2008-04-09	11
-316	val_316	2008-04-09	11
-57	val_57	2008-04-09	11
-302	val_302	2008-04-09	11
-205	val_205	2008-04-09	11
-149	val_149	2008-04-09	11
-438	val_438	2008-04-09	11
-345	val_345	2008-04-09	11
-129	val_129	2008-04-09	11
-170	val_170	2008-04-09	11
-20	val_20	2008-04-09	11
-489	val_489	2008-04-09	11
-157	val_157	2008-04-09	11
-378	val_378	2008-04-09	11
-221	val_221	2008-04-09	11
-92	val_92	2008-04-09	11
-111	val_111	2008-04-09	11
-47	val_47	2008-04-09	11
-72	val_72	2008-04-09	11
-4	val_4	2008-04-09	11
-280	val_280	2008-04-09	11
-35	val_35	2008-04-09	11
-427	val_427	2008-04-09	11
-277	val_277	2008-04-09	11
-208	val_208	2008-04-09	11
-356	val_356	2008-04-09	11
-399	val_399	2008-04-09	11
-169	val_169	2008-04-09	11
-382	val_382	2008-04-09	11
-498	val_498	2008-04-09	11
-125	val_125	2008-04-09	11
-386	val_386	2008-04-09	11
-437	val_437	2008-04-09	11
-469	val_469	2008-04-09	11
-192	val_192	2008-04-09	11
-286	val_286	2008-04-09	11
-187	val_187	2008-04-09	11
-176	val_176	2008-04-09	11
-54	val_54	2008-04-09	11
-459	val_459	2008-04-09	11
-51	val_51	2008-04-09	11
-138	val_138	2008-04-09	11
-103	val_103	2008-04-09	11
-239	val_239	2008-04-09	11
-213	val_213	2008-04-09	11
-216	val_216	2008-04-09	11
-430	val_430	2008-04-09	11
-278	val_278	2008-04-09	11
-176	val_176	2008-04-09	11
-289	val_289	2008-04-09	11
-221	val_221	2008-04-09	11
-65	val_65	2008-04-09	11
-318	val_318	2008-04-09	11
-332	val_332	2008-04-09	11
-311	val_311	2008-04-09	11
-275	val_275	2008-04-09	11
-137	val_137	2008-04-09	11
-241	val_241	2008-04-09	11
-83	val_83	2008-04-09	11
-333	val_333	2008-04-09	11
-180	val_180	2008-04-09	11
-284	val_284	2008-04-09	11
-12	val_12	2008-04-09	11
-230	val_230	2008-04-09	11
-181	val_181	2008-04-09	11
-67	val_67	2008-04-09	11
-260	val_260	2008-04-09	11
-404	val_404	2008-04-09	11
-384	val_384	2008-04-09	11
-489	val_489	2008-04-09	11
-353	val_353	2008-04-09	11
-373	val_373	2008-04-09	11
-272	val_272	2008-04-09	11
-138	val_138	2008-04-09	11
-217	val_217	2008-04-09	11
-84	val_84	2008-04-09	11
-348	val_348	2008-04-09	11
-466	val_466	2008-04-09	11
-58	val_58	2008-04-09	11
-8	val_8	2008-04-09	11
-411	val_411	2008-04-09	11
-230	val_230	2008-04-09	11
-208	val_208	2008-04-09	11
-348	val_348	2008-04-09	11
-24	val_24	2008-04-09	11
-463	val_463	2008-04-09	11
-431	val_431	2008-04-09	11
-179	val_179	2008-04-09	11
-172	val_172	2008-04-09	11
-42	val_42	2008-04-09	11
-129	val_129	2008-04-09	11
-158	val_158	2008-04-09	11
-119	val_119	2008-04-09	11
-496	val_496	2008-04-09	11
-0	val_0	2008-04-09	11
-322	val_322	2008-04-09	11
-197	val_197	2008-04-09	11
-468	val_468	2008-04-09	11
-393	val_393	2008-04-09	11
-454	val_454	2008-04-09	11
-100	val_100	2008-04-09	11
-298	val_298	2008-04-09	11
-199	val_199	2008-04-09	11
-191	val_191	2008-04-09	11
-418	val_418	2008-04-09	11
-96	val_96	2008-04-09	11
-26	val_26	2008-04-09	11
-165	val_165	2008-04-09	11
-327	val_327	2008-04-09	11
-230	val_230	2008-04-09	11
-205	val_205	2008-04-09	11
-120	val_120	2008-04-09	11
-131	val_131	2008-04-09	11
-51	val_51	2008-04-09	11
-404	val_404	2008-04-09	11
-43	val_43	2008-04-09	11
-436	val_436	2008-04-09	11
-156	val_156	2008-04-09	11
-469	val_469	2008-04-09	11
-468	val_468	2008-04-09	11
-308	val_308	2008-04-09	11
-95	val_95	2008-04-09	11
-196	val_196	2008-04-09	11
-288	val_288	2008-04-09	11
-481	val_481	2008-04-09	11
-457	val_457	2008-04-09	11
-98	val_98	2008-04-09	11
-282	val_282	2008-04-09	11
-197	val_197	2008-04-09	11
-187	val_187	2008-04-09	11
-318	val_318	2008-04-09	11
-318	val_318	2008-04-09	11
-409	val_409	2008-04-09	11
-470	val_470	2008-04-09	11
-137	val_137	2008-04-09	11
-369	val_369	2008-04-09	11
-316	val_316	2008-04-09	11
-169	val_169	2008-04-09	11
-413	val_413	2008-04-09	11
-85	val_85	2008-04-09	11
-77	val_77	2008-04-09	11
-0	val_0	2008-04-09	11
-490	val_490	2008-04-09	11
-87	val_87	2008-04-09	11
-364	val_364	2008-04-09	11
-179	val_179	2008-04-09	11
-118	val_118	2008-04-09	11
-134	val_134	2008-04-09	11
-395	val_395	2008-04-09	11
-282	val_282	2008-04-09	11
-138	val_138	2008-04-09	11
-238	val_238	2008-04-09	11
-419	val_419	2008-04-09	11
-15	val_15	2008-04-09	11
-118	val_118	2008-04-09	11
-72	val_72	2008-04-09	11
-90	val_90	2008-04-09	11
-307	val_307	2008-04-09	11
-19	val_19	2008-04-09	11
-435	val_435	2008-04-09	11
-10	val_10	2008-04-09	11
-277	val_277	2008-04-09	11
-273	val_273	2008-04-09	11
-306	val_306	2008-04-09	11
-224	val_224	2008-04-09	11
-309	val_309	2008-04-09	11
-389	val_389	2008-04-09	11
-327	val_327	2008-04-09	11
-242	val_242	2008-04-09	11
-369	val_369	2008-04-09	11
-392	val_392	2008-04-09	11
-272	val_272	2008-04-09	11
-331	val_331	2008-04-09	11
-401	val_401	2008-04-09	11
-242	val_242	2008-04-09	11
-452	val_452	2008-04-09	11
-177	val_177	2008-04-09	11
-226	val_226	2008-04-09	11
-5	val_5	2008-04-09	11
-497	val_497	2008-04-09	11
-402	val_402	2008-04-09	11
-396	val_396	2008-04-09	11
-317	val_317	2008-04-09	11
-395	val_395	2008-04-09	11
-58	val_58	2008-04-09	11
-35	val_35	2008-04-09	11
-336	val_336	2008-04-09	11
-95	val_95	2008-04-09	11
-11	val_11	2008-04-09	11
-168	val_168	2008-04-09	11
-34	val_34	2008-04-09	11
-229	val_229	2008-04-09	11
-233	val_233	2008-04-09	11
-143	val_143	2008-04-09	11
-472	val_472	2008-04-09	11
-322	val_322	2008-04-09	11
-498	val_498	2008-04-09	11
-160	val_160	2008-04-09	11
-195	val_195	2008-04-09	11
-42	val_42	2008-04-09	11
-321	val_321	2008-04-09	11
-430	val_430	2008-04-09	11
-119	val_119	2008-04-09	11
-489	val_489	2008-04-09	11
-458	val_458	2008-04-09	11
-78	val_78	2008-04-09	11
-76	val_76	2008-04-09	11
-41	val_41	2008-04-09	11
-223	val_223	2008-04-09	11
-492	val_492	2008-04-09	11
-149	val_149	2008-04-09	11
-449	val_449	2008-04-09	11
-218	val_218	2008-04-09	11
-228	val_228	2008-04-09	11
-138	val_138	2008-04-09	11
-453	val_453	2008-04-09	11
-30	val_30	2008-04-09	11
-209	val_209	2008-04-09	11
-64	val_64	2008-04-09	11
-468	val_468	2008-04-09	11
-76	val_76	2008-04-09	11
-74	val_74	2008-04-09	11
-342	val_342	2008-04-09	11
-69	val_69	2008-04-09	11
-230	val_230	2008-04-09	11
-33	val_33	2008-04-09	11
-368	val_368	2008-04-09	11
-103	val_103	2008-04-09	11
-296	val_296	2008-04-09	11
-113	val_113	2008-04-09	11
-216	val_216	2008-04-09	11
-367	val_367	2008-04-09	11
-344	val_344	2008-04-09	11
-167	val_167	2008-04-09	11
-274	val_274	2008-04-09	11
-219	val_219	2008-04-09	11
-239	val_239	2008-04-09	11
-485	val_485	2008-04-09	11
-116	val_116	2008-04-09	11
-223	val_223	2008-04-09	11
-256	val_256	2008-04-09	11
-263	val_263	2008-04-09	11
-70	val_70	2008-04-09	11
-487	val_487	2008-04-09	11
-480	val_480	2008-04-09	11
-401	val_401	2008-04-09	11
-288	val_288	2008-04-09	11
-191	val_191	2008-04-09	11
-5	val_5	2008-04-09	11
-244	val_244	2008-04-09	11
-438	val_438	2008-04-09	11
-128	val_128	2008-04-09	11
-467	val_467	2008-04-09	11
-432	val_432	2008-04-09	11
-202	val_202	2008-04-09	11
-316	val_316	2008-04-09	11
-229	val_229	2008-04-09	11
-469	val_469	2008-04-09	11
-463	val_463	2008-04-09	11
-280	val_280	2008-04-09	11
-2	val_2	2008-04-09	11
-35	val_35	2008-04-09	11
-283	val_283	2008-04-09	11
-331	val_331	2008-04-09	11
-235	val_235	2008-04-09	11
-80	val_80	2008-04-09	11
-44	val_44	2008-04-09	11
-193	val_193	2008-04-09	11
-321	val_321	2008-04-09	11
-335	val_335	2008-04-09	11
-104	val_104	2008-04-09	11
-466	val_466	2008-04-09	11
-366	val_366	2008-04-09	11
-175	val_175	2008-04-09	11
-403	val_403	2008-04-09	11
-483	val_483	2008-04-09	11
-53	val_53	2008-04-09	11
-105	val_105	2008-04-09	11
-257	val_257	2008-04-09	11
-406	val_406	2008-04-09	11
-409	val_409	2008-04-09	11
-190	val_190	2008-04-09	11
-406	val_406	2008-04-09	11
-401	val_401	2008-04-09	11
-114	val_114	2008-04-09	11
-258	val_258	2008-04-09	11
-90	val_90	2008-04-09	11
-203	val_203	2008-04-09	11
-262	val_262	2008-04-09	11
-348	val_348	2008-04-09	11
-424	val_424	2008-04-09	11
-12	val_12	2008-04-09	11
-396	val_396	2008-04-09	11
-201	val_201	2008-04-09	11
-217	val_217	2008-04-09	11
-164	val_164	2008-04-09	11
-431	val_431	2008-04-09	11
-454	val_454	2008-04-09	11
-478	val_478	2008-04-09	11
-298	val_298	2008-04-09	11
-125	val_125	2008-04-09	11
-431	val_431	2008-04-09	11
-164	val_164	2008-04-09	11
-424	val_424	2008-04-09	11
-187	val_187	2008-04-09	11
-382	val_382	2008-04-09	11
-5	val_5	2008-04-09	11
-70	val_70	2008-04-09	11
-397	val_397	2008-04-09	11
-480	val_480	2008-04-09	11
-291	val_291	2008-04-09	11
-24	val_24	2008-04-09	11
-351	val_351	2008-04-09	11
-255	val_255	2008-04-09	11
-104	val_104	2008-04-09	11
-70	val_70	2008-04-09	11
-163	val_163	2008-04-09	11
-438	val_438	2008-04-09	11
-119	val_119	2008-04-09	11
-414	val_414	2008-04-09	11
-200	val_200	2008-04-09	11
-491	val_491	2008-04-09	11
-237	val_237	2008-04-09	11
-439	val_439	2008-04-09	11
-360	val_360	2008-04-09	11
-248	val_248	2008-04-09	11
-479	val_479	2008-04-09	11
-305	val_305	2008-04-09	11
-417	val_417	2008-04-09	11
-199	val_199	2008-04-09	11
-444	val_444	2008-04-09	11
-120	val_120	2008-04-09	11
-429	val_429	2008-04-09	11
-169	val_169	2008-04-09	11
-443	val_443	2008-04-09	11
-323	val_323	2008-04-09	11
-325	val_325	2008-04-09	11
-277	val_277	2008-04-09	11
-230	val_230	2008-04-09	11
-478	val_478	2008-04-09	11
-178	val_178	2008-04-09	11
-468	val_468	2008-04-09	11
-310	val_310	2008-04-09	11
-317	val_317	2008-04-09	11
-333	val_333	2008-04-09	11
-493	val_493	2008-04-09	11
-460	val_460	2008-04-09	11
-207	val_207	2008-04-09	11
-249	val_249	2008-04-09	11
-265	val_265	2008-04-09	11
-480	val_480	2008-04-09	11
-83	val_83	2008-04-09	11
-136	val_136	2008-04-09	11
-353	val_353	2008-04-09	11
-172	val_172	2008-04-09	11
-214	val_214	2008-04-09	11
-462	val_462	2008-04-09	11
-233	val_233	2008-04-09	11
-406	val_406	2008-04-09	11
-133	val_133	2008-04-09	11
-175	val_175	2008-04-09	11
-189	val_189	2008-04-09	11
-454	val_454	2008-04-09	11
-375	val_375	2008-04-09	11
-401	val_401	2008-04-09	11
-421	val_421	2008-04-09	11
-407	val_407	2008-04-09	11
-384	val_384	2008-04-09	11
-256	val_256	2008-04-09	11
-26	val_26	2008-04-09	11
-134	val_134	2008-04-09	11
-67	val_67	2008-04-09	11
-384	val_384	2008-04-09	11
-379	val_379	2008-04-09	11
-18	val_18	2008-04-09	11
-462	val_462	2008-04-09	11
-492	val_492	2008-04-09	11
-100	val_100	2008-04-09	11
-298	val_298	2008-04-09	11
-9	val_9	2008-04-09	11
-341	val_341	2008-04-09	11
-498	val_498	2008-04-09	11
-146	val_146	2008-04-09	11
-458	val_458	2008-04-09	11
-362	val_362	2008-04-09	11
-186	val_186	2008-04-09	11
-285	val_285	2008-04-09	11
-348	val_348	2008-04-09	11
-167	val_167	2008-04-09	11
-18	val_18	2008-04-09	11
-273	val_273	2008-04-09	11
-183	val_183	2008-04-09	11
-281	val_281	2008-04-09	11
-344	val_344	2008-04-09	11
-97	val_97	2008-04-09	11
-469	val_469	2008-04-09	11
-315	val_315	2008-04-09	11
-84	val_84	2008-04-09	11
-28	val_28	2008-04-09	11
-37	val_37	2008-04-09	11
-448	val_448	2008-04-09	11
-152	val_152	2008-04-09	11
-348	val_348	2008-04-09	11
-307	val_307	2008-04-09	11
-194	val_194	2008-04-09	11
-414	val_414	2008-04-09	11
-477	val_477	2008-04-09	11
-222	val_222	2008-04-09	11
-126	val_126	2008-04-09	11
-90	val_90	2008-04-09	11
-169	val_169	2008-04-09	11
-403	val_403	2008-04-09	11
-400	val_400	2008-04-09	11
-200	val_200	2008-04-09	11
-97	val_97	2008-04-09	11
-238	val_238	2008-04-09	12
-86	val_86	2008-04-09	12
-311	val_311	2008-04-09	12
-27	val_27	2008-04-09	12
-165	val_165	2008-04-09	12
-409	val_409	2008-04-09	12
-255	val_255	2008-04-09	12
-278	val_278	2008-04-09	12
-98	val_98	2008-04-09	12
-484	val_484	2008-04-09	12
-265	val_265	2008-04-09	12
-193	val_193	2008-04-09	12
-401	val_401	2008-04-09	12
-150	val_150	2008-04-09	12
-273	val_273	2008-04-09	12
-224	val_224	2008-04-09	12
-369	val_369	2008-04-09	12
-66	val_66	2008-04-09	12
-128	val_128	2008-04-09	12
-213	val_213	2008-04-09	12
-146	val_146	2008-04-09	12
-406	val_406	2008-04-09	12
-429	val_429	2008-04-09	12
-374	val_374	2008-04-09	12
-152	val_152	2008-04-09	12
-469	val_469	2008-04-09	12
-145	val_145	2008-04-09	12
-495	val_495	2008-04-09	12
-37	val_37	2008-04-09	12
-327	val_327	2008-04-09	12
-281	val_281	2008-04-09	12
-277	val_277	2008-04-09	12
-209	val_209	2008-04-09	12
-15	val_15	2008-04-09	12
-82	val_82	2008-04-09	12
-403	val_403	2008-04-09	12
-166	val_166	2008-04-09	12
-417	val_417	2008-04-09	12
-430	val_430	2008-04-09	12
-252	val_252	2008-04-09	12
-292	val_292	2008-04-09	12
-219	val_219	2008-04-09	12
-287	val_287	2008-04-09	12
-153	val_153	2008-04-09	12
-193	val_193	2008-04-09	12
-338	val_338	2008-04-09	12
-446	val_446	2008-04-09	12
-459	val_459	2008-04-09	12
-394	val_394	2008-04-09	12
-237	val_237	2008-04-09	12
-482	val_482	2008-04-09	12
-174	val_174	2008-04-09	12
-413	val_413	2008-04-09	12
-494	val_494	2008-04-09	12
-207	val_207	2008-04-09	12
-199	val_199	2008-04-09	12
-466	val_466	2008-04-09	12
-208	val_208	2008-04-09	12
-174	val_174	2008-04-09	12
-399	val_399	2008-04-09	12
-396	val_396	2008-04-09	12
-247	val_247	2008-04-09	12
-417	val_417	2008-04-09	12
-489	val_489	2008-04-09	12
-162	val_162	2008-04-09	12
-377	val_377	2008-04-09	12
-397	val_397	2008-04-09	12
-309	val_309	2008-04-09	12
-365	val_365	2008-04-09	12
-266	val_266	2008-04-09	12
-439	val_439	2008-04-09	12
-342	val_342	2008-04-09	12
-367	val_367	2008-04-09	12
-325	val_325	2008-04-09	12
-167	val_167	2008-04-09	12
-195	val_195	2008-04-09	12
-475	val_475	2008-04-09	12
-17	val_17	2008-04-09	12
-113	val_113	2008-04-09	12
-155	val_155	2008-04-09	12
-203	val_203	2008-04-09	12
-339	val_339	2008-04-09	12
-0	val_0	2008-04-09	12
-455	val_455	2008-04-09	12
-128	val_128	2008-04-09	12
-311	val_311	2008-04-09	12
-316	val_316	2008-04-09	12
-57	val_57	2008-04-09	12
-302	val_302	2008-04-09	12
-205	val_205	2008-04-09	12
-149	val_149	2008-04-09	12
-438	val_438	2008-04-09	12
-345	val_345	2008-04-09	12
-129	val_129	2008-04-09	12
-170	val_170	2008-04-09	12
-20	val_20	2008-04-09	12
-489	val_489	2008-04-09	12
-157	val_157	2008-04-09	12
-378	val_378	2008-04-09	12
-221	val_221	2008-04-09	12
-92	val_92	2008-04-09	12
-111	val_111	2008-04-09	12
-47	val_47	2008-04-09	12
-72	val_72	2008-04-09	12
-4	val_4	2008-04-09	12
-280	val_280	2008-04-09	12
-35	val_35	2008-04-09	12
-427	val_427	2008-04-09	12
-277	val_277	2008-04-09	12
-208	val_208	2008-04-09	12
-356	val_356	2008-04-09	12
-399	val_399	2008-04-09	12
-169	val_169	2008-04-09	12
-382	val_382	2008-04-09	12
-498	val_498	2008-04-09	12
-125	val_125	2008-04-09	12
-386	val_386	2008-04-09	12
-437	val_437	2008-04-09	12
-469	val_469	2008-04-09	12
-192	val_192	2008-04-09	12
-286	val_286	2008-04-09	12
-187	val_187	2008-04-09	12
-176	val_176	2008-04-09	12
-54	val_54	2008-04-09	12
-459	val_459	2008-04-09	12
-51	val_51	2008-04-09	12
-138	val_138	2008-04-09	12
-103	val_103	2008-04-09	12
-239	val_239	2008-04-09	12
-213	val_213	2008-04-09	12
-216	val_216	2008-04-09	12
-430	val_430	2008-04-09	12
-278	val_278	2008-04-09	12
-176	val_176	2008-04-09	12
-289	val_289	2008-04-09	12
-221	val_221	2008-04-09	12
-65	val_65	2008-04-09	12
-318	val_318	2008-04-09	12
-332	val_332	2008-04-09	12
-311	val_311	2008-04-09	12
-275	val_275	2008-04-09	12
-137	val_137	2008-04-09	12
-241	val_241	2008-04-09	12
-83	val_83	2008-04-09	12
-333	val_333	2008-04-09	12
-180	val_180	2008-04-09	12
-284	val_284	2008-04-09	12
-12	val_12	2008-04-09	12
-230	val_230	2008-04-09	12
-181	val_181	2008-04-09	12
-67	val_67	2008-04-09	12
-260	val_260	2008-04-09	12
-404	val_404	2008-04-09	12
-384	val_384	2008-04-09	12
-489	val_489	2008-04-09	12
-353	val_353	2008-04-09	12
-373	val_373	2008-04-09	12
-272	val_272	2008-04-09	12
-138	val_138	2008-04-09	12
-217	val_217	2008-04-09	12
-84	val_84	2008-04-09	12
-348	val_348	2008-04-09	12
-466	val_466	2008-04-09	12
-58	val_58	2008-04-09	12
-8	val_8	2008-04-09	12
-411	val_411	2008-04-09	12
-230	val_230	2008-04-09	12
-208	val_208	2008-04-09	12
-348	val_348	2008-04-09	12
-24	val_24	2008-04-09	12
-463	val_463	2008-04-09	12
-431	val_431	2008-04-09	12
-179	val_179	2008-04-09	12
-172	val_172	2008-04-09	12
-42	val_42	2008-04-09	12
-129	val_129	2008-04-09	12
-158	val_158	2008-04-09	12
-119	val_119	2008-04-09	12
-496	val_496	2008-04-09	12
-0	val_0	2008-04-09	12
-322	val_322	2008-04-09	12
-197	val_197	2008-04-09	12
-468	val_468	2008-04-09	12
-393	val_393	2008-04-09	12
-454	val_454	2008-04-09	12
-100	val_100	2008-04-09	12
-298	val_298	2008-04-09	12
-199	val_199	2008-04-09	12
-191	val_191	2008-04-09	12
-418	val_418	2008-04-09	12
-96	val_96	2008-04-09	12
-26	val_26	2008-04-09	12
-165	val_165	2008-04-09	12
-327	val_327	2008-04-09	12
-230	val_230	2008-04-09	12
-205	val_205	2008-04-09	12
-120	val_120	2008-04-09	12
-131	val_131	2008-04-09	12
-51	val_51	2008-04-09	12
-404	val_404	2008-04-09	12
-43	val_43	2008-04-09	12
-436	val_436	2008-04-09	12
-156	val_156	2008-04-09	12
-469	val_469	2008-04-09	12
-468	val_468	2008-04-09	12
-308	val_308	2008-04-09	12
-95	val_95	2008-04-09	12
-196	val_196	2008-04-09	12
-288	val_288	2008-04-09	12
-481	val_481	2008-04-09	12
-457	val_457	2008-04-09	12
-98	val_98	2008-04-09	12
-282	val_282	2008-04-09	12
-197	val_197	2008-04-09	12
-187	val_187	2008-04-09	12
-318	val_318	2008-04-09	12
-318	val_318	2008-04-09	12
-409	val_409	2008-04-09	12
-470	val_470	2008-04-09	12
-137	val_137	2008-04-09	12
-369	val_369	2008-04-09	12
-316	val_316	2008-04-09	12
-169	val_169	2008-04-09	12
-413	val_413	2008-04-09	12
-85	val_85	2008-04-09	12
-77	val_77	2008-04-09	12
-0	val_0	2008-04-09	12
-490	val_490	2008-04-09	12
-87	val_87	2008-04-09	12
-364	val_364	2008-04-09	12
-179	val_179	2008-04-09	12
-118	val_118	2008-04-09	12
-134	val_134	2008-04-09	12
-395	val_395	2008-04-09	12
-282	val_282	2008-04-09	12
-138	val_138	2008-04-09	12
-238	val_238	2008-04-09	12
-419	val_419	2008-04-09	12
-15	val_15	2008-04-09	12
-118	val_118	2008-04-09	12
-72	val_72	2008-04-09	12
-90	val_90	2008-04-09	12
-307	val_307	2008-04-09	12
-19	val_19	2008-04-09	12
-435	val_435	2008-04-09	12
-10	val_10	2008-04-09	12
-277	val_277	2008-04-09	12
-273	val_273	2008-04-09	12
-306	val_306	2008-04-09	12
-224	val_224	2008-04-09	12
-309	val_309	2008-04-09	12
-389	val_389	2008-04-09	12
-327	val_327	2008-04-09	12
-242	val_242	2008-04-09	12
-369	val_369	2008-04-09	12
-392	val_392	2008-04-09	12
-272	val_272	2008-04-09	12
-331	val_331	2008-04-09	12
-401	val_401	2008-04-09	12
-242	val_242	2008-04-09	12
-452	val_452	2008-04-09	12
-177	val_177	2008-04-09	12
-226	val_226	2008-04-09	12
-5	val_5	2008-04-09	12
-497	val_497	2008-04-09	12
-402	val_402	2008-04-09	12
-396	val_396	2008-04-09	12
-317	val_317	2008-04-09	12
-395	val_395	2008-04-09	12
-58	val_58	2008-04-09	12
-35	val_35	2008-04-09	12
-336	val_336	2008-04-09	12
-95	val_95	2008-04-09	12
-11	val_11	2008-04-09	12
-168	val_168	2008-04-09	12
-34	val_34	2008-04-09	12
-229	val_229	2008-04-09	12
-233	val_233	2008-04-09	12
-143	val_143	2008-04-09	12
-472	val_472	2008-04-09	12
-322	val_322	2008-04-09	12
-498	val_498	2008-04-09	12
-160	val_160	2008-04-09	12
-195	val_195	2008-04-09	12
-42	val_42	2008-04-09	12
-321	val_321	2008-04-09	12
-430	val_430	2008-04-09	12
-119	val_119	2008-04-09	12
-489	val_489	2008-04-09	12
-458	val_458	2008-04-09	12
-78	val_78	2008-04-09	12
-76	val_76	2008-04-09	12
-41	val_41	2008-04-09	12
-223	val_223	2008-04-09	12
-492	val_492	2008-04-09	12
-149	val_149	2008-04-09	12
-449	val_449	2008-04-09	12
-218	val_218	2008-04-09	12
-228	val_228	2008-04-09	12
-138	val_138	2008-04-09	12
-453	val_453	2008-04-09	12
-30	val_30	2008-04-09	12
-209	val_209	2008-04-09	12
-64	val_64	2008-04-09	12
-468	val_468	2008-04-09	12
-76	val_76	2008-04-09	12
-74	val_74	2008-04-09	12
-342	val_342	2008-04-09	12
-69	val_69	2008-04-09	12
-230	val_230	2008-04-09	12
-33	val_33	2008-04-09	12
-368	val_368	2008-04-09	12
-103	val_103	2008-04-09	12
-296	val_296	2008-04-09	12
-113	val_113	2008-04-09	12
-216	val_216	2008-04-09	12
-367	val_367	2008-04-09	12
-344	val_344	2008-04-09	12
-167	val_167	2008-04-09	12
-274	val_274	2008-04-09	12
-219	val_219	2008-04-09	12
-239	val_239	2008-04-09	12
-485	val_485	2008-04-09	12
-116	val_116	2008-04-09	12
-223	val_223	2008-04-09	12
-256	val_256	2008-04-09	12
-263	val_263	2008-04-09	12
-70	val_70	2008-04-09	12
-487	val_487	2008-04-09	12
-480	val_480	2008-04-09	12
-401	val_401	2008-04-09	12
-288	val_288	2008-04-09	12
-191	val_191	2008-04-09	12
-5	val_5	2008-04-09	12
-244	val_244	2008-04-09	12
-438	val_438	2008-04-09	12
-128	val_128	2008-04-09	12
-467	val_467	2008-04-09	12
-432	val_432	2008-04-09	12
-202	val_202	2008-04-09	12
-316	val_316	2008-04-09	12
-229	val_229	2008-04-09	12
-469	val_469	2008-04-09	12
-463	val_463	2008-04-09	12
-280	val_280	2008-04-09	12
-2	val_2	2008-04-09	12
-35	val_35	2008-04-09	12
-283	val_283	2008-04-09	12
-331	val_331	2008-04-09	12
-235	val_235	2008-04-09	12
-80	val_80	2008-04-09	12
-44	val_44	2008-04-09	12
-193	val_193	2008-04-09	12
-321	val_321	2008-04-09	12
-335	val_335	2008-04-09	12
-104	val_104	2008-04-09	12
-466	val_466	2008-04-09	12
-366	val_366	2008-04-09	12
-175	val_175	2008-04-09	12
-403	val_403	2008-04-09	12
-483	val_483	2008-04-09	12
-53	val_53	2008-04-09	12
-105	val_105	2008-04-09	12
-257	val_257	2008-04-09	12
-406	val_406	2008-04-09	12
-409	val_409	2008-04-09	12
-190	val_190	2008-04-09	12
-406	val_406	2008-04-09	12
-401	val_401	2008-04-09	12
-114	val_114	2008-04-09	12
-258	val_258	2008-04-09	12
-90	val_90	2008-04-09	12
-203	val_203	2008-04-09	12
-262	val_262	2008-04-09	12
-348	val_348	2008-04-09	12
-424	val_424	2008-04-09	12
-12	val_12	2008-04-09	12
-396	val_396	2008-04-09	12
-201	val_201	2008-04-09	12
-217	val_217	2008-04-09	12
-164	val_164	2008-04-09	12
-431	val_431	2008-04-09	12
-454	val_454	2008-04-09	12
-478	val_478	2008-04-09	12
-298	val_298	2008-04-09	12
-125	val_125	2008-04-09	12
-431	val_431	2008-04-09	12
-164	val_164	2008-04-09	12
-424	val_424	2008-04-09	12
-187	val_187	2008-04-09	12
-382	val_382	2008-04-09	12
-5	val_5	2008-04-09	12
-70	val_70	2008-04-09	12
-397	val_397	2008-04-09	12
-480	val_480	2008-04-09	12
-291	val_291	2008-04-09	12
-24	val_24	2008-04-09	12
-351	val_351	2008-04-09	12
-255	val_255	2008-04-09	12
-104	val_104	2008-04-09	12
-70	val_70	2008-04-09	12
-163	val_163	2008-04-09	12
-438	val_438	2008-04-09	12
-119	val_119	2008-04-09	12
-414	val_414	2008-04-09	12
-200	val_200	2008-04-09	12
-491	val_491	2008-04-09	12
-237	val_237	2008-04-09	12
-439	val_439	2008-04-09	12
-360	val_360	2008-04-09	12
-248	val_248	2008-04-09	12
-479	val_479	2008-04-09	12
-305	val_305	2008-04-09	12
-417	val_417	2008-04-09	12
-199	val_199	2008-04-09	12
-444	val_444	2008-04-09	12
-120	val_120	2008-04-09	12
-429	val_429	2008-04-09	12
-169	val_169	2008-04-09	12
-443	val_443	2008-04-09	12
-323	val_323	2008-04-09	12
-325	val_325	2008-04-09	12
-277	val_277	2008-04-09	12
-230	val_230	2008-04-09	12
-478	val_478	2008-04-09	12
-178	val_178	2008-04-09	12
-468	val_468	2008-04-09	12
-310	val_310	2008-04-09	12
-317	val_317	2008-04-09	12
-333	val_333	2008-04-09	12
-493	val_493	2008-04-09	12
-460	val_460	2008-04-09	12
-207	val_207	2008-04-09	12
-249	val_249	2008-04-09	12
-265	val_265	2008-04-09	12
-480	val_480	2008-04-09	12
-83	val_83	2008-04-09	12
-136	val_136	2008-04-09	12
-353	val_353	2008-04-09	12
-172	val_172	2008-04-09	12
-214	val_214	2008-04-09	12
-462	val_462	2008-04-09	12
-233	val_233	2008-04-09	12
-406	val_406	2008-04-09	12
-133	val_133	2008-04-09	12
-175	val_175	2008-04-09	12
-189	val_189	2008-04-09	12
-454	val_454	2008-04-09	12
-375	val_375	2008-04-09	12
-401	val_401	2008-04-09	12
-421	val_421	2008-04-09	12
-407	val_407	2008-04-09	12
-384	val_384	2008-04-09	12
-256	val_256	2008-04-09	12
-26	val_26	2008-04-09	12
-134	val_134	2008-04-09	12
-67	val_67	2008-04-09	12
-384	val_384	2008-04-09	12
-379	val_379	2008-04-09	12
-18	val_18	2008-04-09	12
-462	val_462	2008-04-09	12
-492	val_492	2008-04-09	12
-100	val_100	2008-04-09	12
-298	val_298	2008-04-09	12
-9	val_9	2008-04-09	12
-341	val_341	2008-04-09	12
-498	val_498	2008-04-09	12
-146	val_146	2008-04-09	12
-458	val_458	2008-04-09	12
-362	val_362	2008-04-09	12
-186	val_186	2008-04-09	12
-285	val_285	2008-04-09	12
-348	val_348	2008-04-09	12
-167	val_167	2008-04-09	12
-18	val_18	2008-04-09	12
-273	val_273	2008-04-09	12
-183	val_183	2008-04-09	12
-281	val_281	2008-04-09	12
-344	val_344	2008-04-09	12
-97	val_97	2008-04-09	12
-469	val_469	2008-04-09	12
-315	val_315	2008-04-09	12
-84	val_84	2008-04-09	12
-28	val_28	2008-04-09	12
-37	val_37	2008-04-09	12
-448	val_448	2008-04-09	12
-152	val_152	2008-04-09	12
-348	val_348	2008-04-09	12
-307	val_307	2008-04-09	12
-194	val_194	2008-04-09	12
-414	val_414	2008-04-09	12
-477	val_477	2008-04-09	12
-222	val_222	2008-04-09	12
-126	val_126	2008-04-09	12
-90	val_90	2008-04-09	12
-169	val_169	2008-04-09	12
-403	val_403	2008-04-09	12
-400	val_400	2008-04-09	12
-200	val_200	2008-04-09	12
-97	val_97	2008-04-09	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part4-11-24618a43c4656b72f81683b45708045e b/sql/hive/src/test/resources/golden/load_dyn_part4-11-24618a43c4656b72f81683b45708045e
new file mode 100644
index 0000000000000..370744fed4f27
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/load_dyn_part4-11-24618a43c4656b72f81683b45708045e
@@ -0,0 +1,2500 @@
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+2	val_2	2008-04-08	11
+4	val_4	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+8	val_8	2008-04-08	11
+9	val_9	2008-04-08	11
+10	val_10	2008-04-08	11
+11	val_11	2008-04-08	11
+12	val_12	2008-04-08	11
+12	val_12	2008-04-08	11
+15	val_15	2008-04-08	11
+15	val_15	2008-04-08	11
+17	val_17	2008-04-08	11
+18	val_18	2008-04-08	11
+18	val_18	2008-04-08	11
+19	val_19	2008-04-08	11
+20	val_20	2008-04-08	11
+24	val_24	2008-04-08	11
+24	val_24	2008-04-08	11
+26	val_26	2008-04-08	11
+26	val_26	2008-04-08	11
+27	val_27	2008-04-08	11
+28	val_28	2008-04-08	11
+30	val_30	2008-04-08	11
+33	val_33	2008-04-08	11
+34	val_34	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+37	val_37	2008-04-08	11
+37	val_37	2008-04-08	11
+41	val_41	2008-04-08	11
+42	val_42	2008-04-08	11
+42	val_42	2008-04-08	11
+43	val_43	2008-04-08	11
+44	val_44	2008-04-08	11
+47	val_47	2008-04-08	11
+51	val_51	2008-04-08	11
+51	val_51	2008-04-08	11
+53	val_53	2008-04-08	11
+54	val_54	2008-04-08	11
+57	val_57	2008-04-08	11
+58	val_58	2008-04-08	11
+58	val_58	2008-04-08	11
+64	val_64	2008-04-08	11
+65	val_65	2008-04-08	11
+66	val_66	2008-04-08	11
+67	val_67	2008-04-08	11
+67	val_67	2008-04-08	11
+69	val_69	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+72	val_72	2008-04-08	11
+72	val_72	2008-04-08	11
+74	val_74	2008-04-08	11
+76	val_76	2008-04-08	11
+76	val_76	2008-04-08	11
+77	val_77	2008-04-08	11
+78	val_78	2008-04-08	11
+80	val_80	2008-04-08	11
+82	val_82	2008-04-08	11
+83	val_83	2008-04-08	11
+83	val_83	2008-04-08	11
+84	val_84	2008-04-08	11
+84	val_84	2008-04-08	11
+85	val_85	2008-04-08	11
+86	val_86	2008-04-08	11
+87	val_87	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+92	val_92	2008-04-08	11
+95	val_95	2008-04-08	11
+95	val_95	2008-04-08	11
+96	val_96	2008-04-08	11
+97	val_97	2008-04-08	11
+97	val_97	2008-04-08	11
+98	val_98	2008-04-08	11
+98	val_98	2008-04-08	11
+100	val_100	2008-04-08	11
+100	val_100	2008-04-08	11
+103	val_103	2008-04-08	11
+103	val_103	2008-04-08	11
+104	val_104	2008-04-08	11
+104	val_104	2008-04-08	11
+105	val_105	2008-04-08	11
+111	val_111	2008-04-08	11
+113	val_113	2008-04-08	11
+113	val_113	2008-04-08	11
+114	val_114	2008-04-08	11
+116	val_116	2008-04-08	11
+118	val_118	2008-04-08	11
+118	val_118	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+120	val_120	2008-04-08	11
+120	val_120	2008-04-08	11
+125	val_125	2008-04-08	11
+125	val_125	2008-04-08	11
+126	val_126	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+129	val_129	2008-04-08	11
+129	val_129	2008-04-08	11
+131	val_131	2008-04-08	11
+133	val_133	2008-04-08	11
+134	val_134	2008-04-08	11
+134	val_134	2008-04-08	11
+136	val_136	2008-04-08	11
+137	val_137	2008-04-08	11
+137	val_137	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+143	val_143	2008-04-08	11
+145	val_145	2008-04-08	11
+146	val_146	2008-04-08	11
+146	val_146	2008-04-08	11
+149	val_149	2008-04-08	11
+149	val_149	2008-04-08	11
+150	val_150	2008-04-08	11
+152	val_152	2008-04-08	11
+152	val_152	2008-04-08	11
+153	val_153	2008-04-08	11
+155	val_155	2008-04-08	11
+156	val_156	2008-04-08	11
+157	val_157	2008-04-08	11
+158	val_158	2008-04-08	11
+160	val_160	2008-04-08	11
+162	val_162	2008-04-08	11
+163	val_163	2008-04-08	11
+164	val_164	2008-04-08	11
+164	val_164	2008-04-08	11
+165	val_165	2008-04-08	11
+165	val_165	2008-04-08	11
+166	val_166	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+168	val_168	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+170	val_170	2008-04-08	11
+172	val_172	2008-04-08	11
+172	val_172	2008-04-08	11
+174	val_174	2008-04-08	11
+174	val_174	2008-04-08	11
+175	val_175	2008-04-08	11
+175	val_175	2008-04-08	11
+176	val_176	2008-04-08	11
+176	val_176	2008-04-08	11
+177	val_177	2008-04-08	11
+178	val_178	2008-04-08	11
+179	val_179	2008-04-08	11
+179	val_179	2008-04-08	11
+180	val_180	2008-04-08	11
+181	val_181	2008-04-08	11
+183	val_183	2008-04-08	11
+186	val_186	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+189	val_189	2008-04-08	11
+190	val_190	2008-04-08	11
+191	val_191	2008-04-08	11
+191	val_191	2008-04-08	11
+192	val_192	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+194	val_194	2008-04-08	11
+195	val_195	2008-04-08	11
+195	val_195	2008-04-08	11
+196	val_196	2008-04-08	11
+197	val_197	2008-04-08	11
+197	val_197	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+200	val_200	2008-04-08	11
+200	val_200	2008-04-08	11
+201	val_201	2008-04-08	11
+202	val_202	2008-04-08	11
+203	val_203	2008-04-08	11
+203	val_203	2008-04-08	11
+205	val_205	2008-04-08	11
+205	val_205	2008-04-08	11
+207	val_207	2008-04-08	11
+207	val_207	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+209	val_209	2008-04-08	11
+209	val_209	2008-04-08	11
+213	val_213	2008-04-08	11
+213	val_213	2008-04-08	11
+214	val_214	2008-04-08	11
+216	val_216	2008-04-08	11
+216	val_216	2008-04-08	11
+217	val_217	2008-04-08	11
+217	val_217	2008-04-08	11
+218	val_218	2008-04-08	11
+219	val_219	2008-04-08	11
+219	val_219	2008-04-08	11
+221	val_221	2008-04-08	11
+221	val_221	2008-04-08	11
+222	val_222	2008-04-08	11
+223	val_223	2008-04-08	11
+223	val_223	2008-04-08	11
+224	val_224	2008-04-08	11
+224	val_224	2008-04-08	11
+226	val_226	2008-04-08	11
+228	val_228	2008-04-08	11
+229	val_229	2008-04-08	11
+229	val_229	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+233	val_233	2008-04-08	11
+233	val_233	2008-04-08	11
+235	val_235	2008-04-08	11
+237	val_237	2008-04-08	11
+237	val_237	2008-04-08	11
+238	val_238	2008-04-08	11
+238	val_238	2008-04-08	11
+239	val_239	2008-04-08	11
+239	val_239	2008-04-08	11
+241	val_241	2008-04-08	11
+242	val_242	2008-04-08	11
+242	val_242	2008-04-08	11
+244	val_244	2008-04-08	11
+247	val_247	2008-04-08	11
+248	val_248	2008-04-08	11
+249	val_249	2008-04-08	11
+252	val_252	2008-04-08	11
+255	val_255	2008-04-08	11
+255	val_255	2008-04-08	11
+256	val_256	2008-04-08	11
+256	val_256	2008-04-08	11
+257	val_257	2008-04-08	11
+258	val_258	2008-04-08	11
+260	val_260	2008-04-08	11
+262	val_262	2008-04-08	11
+263	val_263	2008-04-08	11
+265	val_265	2008-04-08	11
+265	val_265	2008-04-08	11
+266	val_266	2008-04-08	11
+272	val_272	2008-04-08	11
+272	val_272	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+274	val_274	2008-04-08	11
+275	val_275	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+278	val_278	2008-04-08	11
+278	val_278	2008-04-08	11
+280	val_280	2008-04-08	11
+280	val_280	2008-04-08	11
+281	val_281	2008-04-08	11
+281	val_281	2008-04-08	11
+282	val_282	2008-04-08	11
+282	val_282	2008-04-08	11
+283	val_283	2008-04-08	11
+284	val_284	2008-04-08	11
+285	val_285	2008-04-08	11
+286	val_286	2008-04-08	11
+287	val_287	2008-04-08	11
+288	val_288	2008-04-08	11
+288	val_288	2008-04-08	11
+289	val_289	2008-04-08	11
+291	val_291	2008-04-08	11
+292	val_292	2008-04-08	11
+296	val_296	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+302	val_302	2008-04-08	11
+305	val_305	2008-04-08	11
+306	val_306	2008-04-08	11
+307	val_307	2008-04-08	11
+307	val_307	2008-04-08	11
+308	val_308	2008-04-08	11
+309	val_309	2008-04-08	11
+309	val_309	2008-04-08	11
+310	val_310	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+315	val_315	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+317	val_317	2008-04-08	11
+317	val_317	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+321	val_321	2008-04-08	11
+321	val_321	2008-04-08	11
+322	val_322	2008-04-08	11
+322	val_322	2008-04-08	11
+323	val_323	2008-04-08	11
+325	val_325	2008-04-08	11
+325	val_325	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+331	val_331	2008-04-08	11
+331	val_331	2008-04-08	11
+332	val_332	2008-04-08	11
+333	val_333	2008-04-08	11
+333	val_333	2008-04-08	11
+335	val_335	2008-04-08	11
+336	val_336	2008-04-08	11
+338	val_338	2008-04-08	11
+339	val_339	2008-04-08	11
+341	val_341	2008-04-08	11
+342	val_342	2008-04-08	11
+342	val_342	2008-04-08	11
+344	val_344	2008-04-08	11
+344	val_344	2008-04-08	11
+345	val_345	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+351	val_351	2008-04-08	11
+353	val_353	2008-04-08	11
+353	val_353	2008-04-08	11
+356	val_356	2008-04-08	11
+360	val_360	2008-04-08	11
+362	val_362	2008-04-08	11
+364	val_364	2008-04-08	11
+365	val_365	2008-04-08	11
+366	val_366	2008-04-08	11
+367	val_367	2008-04-08	11
+367	val_367	2008-04-08	11
+368	val_368	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+373	val_373	2008-04-08	11
+374	val_374	2008-04-08	11
+375	val_375	2008-04-08	11
+377	val_377	2008-04-08	11
+378	val_378	2008-04-08	11
+379	val_379	2008-04-08	11
+382	val_382	2008-04-08	11
+382	val_382	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+386	val_386	2008-04-08	11
+389	val_389	2008-04-08	11
+392	val_392	2008-04-08	11
+393	val_393	2008-04-08	11
+394	val_394	2008-04-08	11
+395	val_395	2008-04-08	11
+395	val_395	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+397	val_397	2008-04-08	11
+397	val_397	2008-04-08	11
+399	val_399	2008-04-08	11
+399	val_399	2008-04-08	11
+400	val_400	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+402	val_402	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+404	val_404	2008-04-08	11
+404	val_404	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+407	val_407	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+411	val_411	2008-04-08	11
+413	val_413	2008-04-08	11
+413	val_413	2008-04-08	11
+414	val_414	2008-04-08	11
+414	val_414	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+418	val_418	2008-04-08	11
+419	val_419	2008-04-08	11
+421	val_421	2008-04-08	11
+424	val_424	2008-04-08	11
+424	val_424	2008-04-08	11
+427	val_427	2008-04-08	11
+429	val_429	2008-04-08	11
+429	val_429	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+432	val_432	2008-04-08	11
+435	val_435	2008-04-08	11
+436	val_436	2008-04-08	11
+437	val_437	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+439	val_439	2008-04-08	11
+439	val_439	2008-04-08	11
+443	val_443	2008-04-08	11
+444	val_444	2008-04-08	11
+446	val_446	2008-04-08	11
+448	val_448	2008-04-08	11
+449	val_449	2008-04-08	11
+452	val_452	2008-04-08	11
+453	val_453	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+455	val_455	2008-04-08	11
+457	val_457	2008-04-08	11
+458	val_458	2008-04-08	11
+458	val_458	2008-04-08	11
+459	val_459	2008-04-08	11
+459	val_459	2008-04-08	11
+460	val_460	2008-04-08	11
+462	val_462	2008-04-08	11
+462	val_462	2008-04-08	11
+463	val_463	2008-04-08	11
+463	val_463	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+467	val_467	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+470	val_470	2008-04-08	11
+472	val_472	2008-04-08	11
+475	val_475	2008-04-08	11
+477	val_477	2008-04-08	11
+478	val_478	2008-04-08	11
+478	val_478	2008-04-08	11
+479	val_479	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+481	val_481	2008-04-08	11
+482	val_482	2008-04-08	11
+483	val_483	2008-04-08	11
+484	val_484	2008-04-08	11
+485	val_485	2008-04-08	11
+487	val_487	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+490	val_490	2008-04-08	11
+491	val_491	2008-04-08	11
+492	val_492	2008-04-08	11
+492	val_492	2008-04-08	11
+493	val_493	2008-04-08	11
+494	val_494	2008-04-08	11
+495	val_495	2008-04-08	11
+496	val_496	2008-04-08	11
+497	val_497	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+2	val_2	2008-04-08	12
+4	val_4	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+8	val_8	2008-04-08	12
+9	val_9	2008-04-08	12
+10	val_10	2008-04-08	12
+11	val_11	2008-04-08	12
+12	val_12	2008-04-08	12
+12	val_12	2008-04-08	12
+15	val_15	2008-04-08	12
+15	val_15	2008-04-08	12
+17	val_17	2008-04-08	12
+18	val_18	2008-04-08	12
+18	val_18	2008-04-08	12
+19	val_19	2008-04-08	12
+20	val_20	2008-04-08	12
+24	val_24	2008-04-08	12
+24	val_24	2008-04-08	12
+26	val_26	2008-04-08	12
+26	val_26	2008-04-08	12
+27	val_27	2008-04-08	12
+28	val_28	2008-04-08	12
+30	val_30	2008-04-08	12
+33	val_33	2008-04-08	12
+34	val_34	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+37	val_37	2008-04-08	12
+37	val_37	2008-04-08	12
+41	val_41	2008-04-08	12
+42	val_42	2008-04-08	12
+42	val_42	2008-04-08	12
+43	val_43	2008-04-08	12
+44	val_44	2008-04-08	12
+47	val_47	2008-04-08	12
+51	val_51	2008-04-08	12
+51	val_51	2008-04-08	12
+53	val_53	2008-04-08	12
+54	val_54	2008-04-08	12
+57	val_57	2008-04-08	12
+58	val_58	2008-04-08	12
+58	val_58	2008-04-08	12
+64	val_64	2008-04-08	12
+65	val_65	2008-04-08	12
+66	val_66	2008-04-08	12
+67	val_67	2008-04-08	12
+67	val_67	2008-04-08	12
+69	val_69	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+72	val_72	2008-04-08	12
+72	val_72	2008-04-08	12
+74	val_74	2008-04-08	12
+76	val_76	2008-04-08	12
+76	val_76	2008-04-08	12
+77	val_77	2008-04-08	12
+78	val_78	2008-04-08	12
+80	val_80	2008-04-08	12
+82	val_82	2008-04-08	12
+83	val_83	2008-04-08	12
+83	val_83	2008-04-08	12
+84	val_84	2008-04-08	12
+84	val_84	2008-04-08	12
+85	val_85	2008-04-08	12
+86	val_86	2008-04-08	12
+87	val_87	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+92	val_92	2008-04-08	12
+95	val_95	2008-04-08	12
+95	val_95	2008-04-08	12
+96	val_96	2008-04-08	12
+97	val_97	2008-04-08	12
+97	val_97	2008-04-08	12
+98	val_98	2008-04-08	12
+98	val_98	2008-04-08	12
+100	val_100	2008-04-08	12
+100	val_100	2008-04-08	12
+103	val_103	2008-04-08	12
+103	val_103	2008-04-08	12
+104	val_104	2008-04-08	12
+104	val_104	2008-04-08	12
+105	val_105	2008-04-08	12
+111	val_111	2008-04-08	12
+113	val_113	2008-04-08	12
+113	val_113	2008-04-08	12
+114	val_114	2008-04-08	12
+116	val_116	2008-04-08	12
+118	val_118	2008-04-08	12
+118	val_118	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+120	val_120	2008-04-08	12
+120	val_120	2008-04-08	12
+125	val_125	2008-04-08	12
+125	val_125	2008-04-08	12
+126	val_126	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+129	val_129	2008-04-08	12
+129	val_129	2008-04-08	12
+131	val_131	2008-04-08	12
+133	val_133	2008-04-08	12
+134	val_134	2008-04-08	12
+134	val_134	2008-04-08	12
+136	val_136	2008-04-08	12
+137	val_137	2008-04-08	12
+137	val_137	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+143	val_143	2008-04-08	12
+145	val_145	2008-04-08	12
+146	val_146	2008-04-08	12
+146	val_146	2008-04-08	12
+149	val_149	2008-04-08	12
+149	val_149	2008-04-08	12
+150	val_150	2008-04-08	12
+152	val_152	2008-04-08	12
+152	val_152	2008-04-08	12
+153	val_153	2008-04-08	12
+155	val_155	2008-04-08	12
+156	val_156	2008-04-08	12
+157	val_157	2008-04-08	12
+158	val_158	2008-04-08	12
+160	val_160	2008-04-08	12
+162	val_162	2008-04-08	12
+163	val_163	2008-04-08	12
+164	val_164	2008-04-08	12
+164	val_164	2008-04-08	12
+165	val_165	2008-04-08	12
+165	val_165	2008-04-08	12
+166	val_166	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+168	val_168	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+170	val_170	2008-04-08	12
+172	val_172	2008-04-08	12
+172	val_172	2008-04-08	12
+174	val_174	2008-04-08	12
+174	val_174	2008-04-08	12
+175	val_175	2008-04-08	12
+175	val_175	2008-04-08	12
+176	val_176	2008-04-08	12
+176	val_176	2008-04-08	12
+177	val_177	2008-04-08	12
+178	val_178	2008-04-08	12
+179	val_179	2008-04-08	12
+179	val_179	2008-04-08	12
+180	val_180	2008-04-08	12
+181	val_181	2008-04-08	12
+183	val_183	2008-04-08	12
+186	val_186	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+189	val_189	2008-04-08	12
+190	val_190	2008-04-08	12
+191	val_191	2008-04-08	12
+191	val_191	2008-04-08	12
+192	val_192	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+194	val_194	2008-04-08	12
+195	val_195	2008-04-08	12
+195	val_195	2008-04-08	12
+196	val_196	2008-04-08	12
+197	val_197	2008-04-08	12
+197	val_197	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+200	val_200	2008-04-08	12
+200	val_200	2008-04-08	12
+201	val_201	2008-04-08	12
+202	val_202	2008-04-08	12
+203	val_203	2008-04-08	12
+203	val_203	2008-04-08	12
+205	val_205	2008-04-08	12
+205	val_205	2008-04-08	12
+207	val_207	2008-04-08	12
+207	val_207	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+209	val_209	2008-04-08	12
+209	val_209	2008-04-08	12
+213	val_213	2008-04-08	12
+213	val_213	2008-04-08	12
+214	val_214	2008-04-08	12
+216	val_216	2008-04-08	12
+216	val_216	2008-04-08	12
+217	val_217	2008-04-08	12
+217	val_217	2008-04-08	12
+218	val_218	2008-04-08	12
+219	val_219	2008-04-08	12
+219	val_219	2008-04-08	12
+221	val_221	2008-04-08	12
+221	val_221	2008-04-08	12
+222	val_222	2008-04-08	12
+223	val_223	2008-04-08	12
+223	val_223	2008-04-08	12
+224	val_224	2008-04-08	12
+224	val_224	2008-04-08	12
+226	val_226	2008-04-08	12
+228	val_228	2008-04-08	12
+229	val_229	2008-04-08	12
+229	val_229	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+233	val_233	2008-04-08	12
+233	val_233	2008-04-08	12
+235	val_235	2008-04-08	12
+237	val_237	2008-04-08	12
+237	val_237	2008-04-08	12
+238	val_238	2008-04-08	12
+238	val_238	2008-04-08	12
+239	val_239	2008-04-08	12
+239	val_239	2008-04-08	12
+241	val_241	2008-04-08	12
+242	val_242	2008-04-08	12
+242	val_242	2008-04-08	12
+244	val_244	2008-04-08	12
+247	val_247	2008-04-08	12
+248	val_248	2008-04-08	12
+249	val_249	2008-04-08	12
+252	val_252	2008-04-08	12
+255	val_255	2008-04-08	12
+255	val_255	2008-04-08	12
+256	val_256	2008-04-08	12
+256	val_256	2008-04-08	12
+257	val_257	2008-04-08	12
+258	val_258	2008-04-08	12
+260	val_260	2008-04-08	12
+262	val_262	2008-04-08	12
+263	val_263	2008-04-08	12
+265	val_265	2008-04-08	12
+265	val_265	2008-04-08	12
+266	val_266	2008-04-08	12
+272	val_272	2008-04-08	12
+272	val_272	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+274	val_274	2008-04-08	12
+275	val_275	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+278	val_278	2008-04-08	12
+278	val_278	2008-04-08	12
+280	val_280	2008-04-08	12
+280	val_280	2008-04-08	12
+281	val_281	2008-04-08	12
+281	val_281	2008-04-08	12
+282	val_282	2008-04-08	12
+282	val_282	2008-04-08	12
+283	val_283	2008-04-08	12
+284	val_284	2008-04-08	12
+285	val_285	2008-04-08	12
+286	val_286	2008-04-08	12
+287	val_287	2008-04-08	12
+288	val_288	2008-04-08	12
+288	val_288	2008-04-08	12
+289	val_289	2008-04-08	12
+291	val_291	2008-04-08	12
+292	val_292	2008-04-08	12
+296	val_296	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+302	val_302	2008-04-08	12
+305	val_305	2008-04-08	12
+306	val_306	2008-04-08	12
+307	val_307	2008-04-08	12
+307	val_307	2008-04-08	12
+308	val_308	2008-04-08	12
+309	val_309	2008-04-08	12
+309	val_309	2008-04-08	12
+310	val_310	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+315	val_315	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+317	val_317	2008-04-08	12
+317	val_317	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+321	val_321	2008-04-08	12
+321	val_321	2008-04-08	12
+322	val_322	2008-04-08	12
+322	val_322	2008-04-08	12
+323	val_323	2008-04-08	12
+325	val_325	2008-04-08	12
+325	val_325	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+331	val_331	2008-04-08	12
+331	val_331	2008-04-08	12
+332	val_332	2008-04-08	12
+333	val_333	2008-04-08	12
+333	val_333	2008-04-08	12
+335	val_335	2008-04-08	12
+336	val_336	2008-04-08	12
+338	val_338	2008-04-08	12
+339	val_339	2008-04-08	12
+341	val_341	2008-04-08	12
+342	val_342	2008-04-08	12
+342	val_342	2008-04-08	12
+344	val_344	2008-04-08	12
+344	val_344	2008-04-08	12
+345	val_345	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+351	val_351	2008-04-08	12
+353	val_353	2008-04-08	12
+353	val_353	2008-04-08	12
+356	val_356	2008-04-08	12
+360	val_360	2008-04-08	12
+362	val_362	2008-04-08	12
+364	val_364	2008-04-08	12
+365	val_365	2008-04-08	12
+366	val_366	2008-04-08	12
+367	val_367	2008-04-08	12
+367	val_367	2008-04-08	12
+368	val_368	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+373	val_373	2008-04-08	12
+374	val_374	2008-04-08	12
+375	val_375	2008-04-08	12
+377	val_377	2008-04-08	12
+378	val_378	2008-04-08	12
+379	val_379	2008-04-08	12
+382	val_382	2008-04-08	12
+382	val_382	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+386	val_386	2008-04-08	12
+389	val_389	2008-04-08	12
+392	val_392	2008-04-08	12
+393	val_393	2008-04-08	12
+394	val_394	2008-04-08	12
+395	val_395	2008-04-08	12
+395	val_395	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+397	val_397	2008-04-08	12
+397	val_397	2008-04-08	12
+399	val_399	2008-04-08	12
+399	val_399	2008-04-08	12
+400	val_400	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+402	val_402	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+404	val_404	2008-04-08	12
+404	val_404	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+407	val_407	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+411	val_411	2008-04-08	12
+413	val_413	2008-04-08	12
+413	val_413	2008-04-08	12
+414	val_414	2008-04-08	12
+414	val_414	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+418	val_418	2008-04-08	12
+419	val_419	2008-04-08	12
+421	val_421	2008-04-08	12
+424	val_424	2008-04-08	12
+424	val_424	2008-04-08	12
+427	val_427	2008-04-08	12
+429	val_429	2008-04-08	12
+429	val_429	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+432	val_432	2008-04-08	12
+435	val_435	2008-04-08	12
+436	val_436	2008-04-08	12
+437	val_437	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+439	val_439	2008-04-08	12
+439	val_439	2008-04-08	12
+443	val_443	2008-04-08	12
+444	val_444	2008-04-08	12
+446	val_446	2008-04-08	12
+448	val_448	2008-04-08	12
+449	val_449	2008-04-08	12
+452	val_452	2008-04-08	12
+453	val_453	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+455	val_455	2008-04-08	12
+457	val_457	2008-04-08	12
+458	val_458	2008-04-08	12
+458	val_458	2008-04-08	12
+459	val_459	2008-04-08	12
+459	val_459	2008-04-08	12
+460	val_460	2008-04-08	12
+462	val_462	2008-04-08	12
+462	val_462	2008-04-08	12
+463	val_463	2008-04-08	12
+463	val_463	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+467	val_467	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+470	val_470	2008-04-08	12
+472	val_472	2008-04-08	12
+475	val_475	2008-04-08	12
+477	val_477	2008-04-08	12
+478	val_478	2008-04-08	12
+478	val_478	2008-04-08	12
+479	val_479	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+481	val_481	2008-04-08	12
+482	val_482	2008-04-08	12
+483	val_483	2008-04-08	12
+484	val_484	2008-04-08	12
+485	val_485	2008-04-08	12
+487	val_487	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+490	val_490	2008-04-08	12
+491	val_491	2008-04-08	12
+492	val_492	2008-04-08	12
+492	val_492	2008-04-08	12
+493	val_493	2008-04-08	12
+494	val_494	2008-04-08	12
+495	val_495	2008-04-08	12
+496	val_496	2008-04-08	12
+497	val_497	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+0	val_0	2008-04-08	existing_value
+0	val_0	2008-04-08	existing_value
+0	val_0	2008-04-08	existing_value
+2	val_2	2008-04-08	existing_value
+4	val_4	2008-04-08	existing_value
+5	val_5	2008-04-08	existing_value
+5	val_5	2008-04-08	existing_value
+5	val_5	2008-04-08	existing_value
+8	val_8	2008-04-08	existing_value
+9	val_9	2008-04-08	existing_value
+10	val_10	2008-04-08	existing_value
+11	val_11	2008-04-08	existing_value
+12	val_12	2008-04-08	existing_value
+12	val_12	2008-04-08	existing_value
+15	val_15	2008-04-08	existing_value
+15	val_15	2008-04-08	existing_value
+17	val_17	2008-04-08	existing_value
+18	val_18	2008-04-08	existing_value
+18	val_18	2008-04-08	existing_value
+19	val_19	2008-04-08	existing_value
+20	val_20	2008-04-08	existing_value
+24	val_24	2008-04-08	existing_value
+24	val_24	2008-04-08	existing_value
+26	val_26	2008-04-08	existing_value
+26	val_26	2008-04-08	existing_value
+27	val_27	2008-04-08	existing_value
+28	val_28	2008-04-08	existing_value
+30	val_30	2008-04-08	existing_value
+33	val_33	2008-04-08	existing_value
+34	val_34	2008-04-08	existing_value
+35	val_35	2008-04-08	existing_value
+35	val_35	2008-04-08	existing_value
+35	val_35	2008-04-08	existing_value
+37	val_37	2008-04-08	existing_value
+37	val_37	2008-04-08	existing_value
+41	val_41	2008-04-08	existing_value
+42	val_42	2008-04-08	existing_value
+42	val_42	2008-04-08	existing_value
+43	val_43	2008-04-08	existing_value
+44	val_44	2008-04-08	existing_value
+47	val_47	2008-04-08	existing_value
+51	val_51	2008-04-08	existing_value
+51	val_51	2008-04-08	existing_value
+53	val_53	2008-04-08	existing_value
+54	val_54	2008-04-08	existing_value
+57	val_57	2008-04-08	existing_value
+58	val_58	2008-04-08	existing_value
+58	val_58	2008-04-08	existing_value
+64	val_64	2008-04-08	existing_value
+65	val_65	2008-04-08	existing_value
+66	val_66	2008-04-08	existing_value
+67	val_67	2008-04-08	existing_value
+67	val_67	2008-04-08	existing_value
+69	val_69	2008-04-08	existing_value
+70	val_70	2008-04-08	existing_value
+70	val_70	2008-04-08	existing_value
+70	val_70	2008-04-08	existing_value
+72	val_72	2008-04-08	existing_value
+72	val_72	2008-04-08	existing_value
+74	val_74	2008-04-08	existing_value
+76	val_76	2008-04-08	existing_value
+76	val_76	2008-04-08	existing_value
+77	val_77	2008-04-08	existing_value
+78	val_78	2008-04-08	existing_value
+80	val_80	2008-04-08	existing_value
+82	val_82	2008-04-08	existing_value
+83	val_83	2008-04-08	existing_value
+83	val_83	2008-04-08	existing_value
+84	val_84	2008-04-08	existing_value
+84	val_84	2008-04-08	existing_value
+85	val_85	2008-04-08	existing_value
+86	val_86	2008-04-08	existing_value
+87	val_87	2008-04-08	existing_value
+90	val_90	2008-04-08	existing_value
+90	val_90	2008-04-08	existing_value
+90	val_90	2008-04-08	existing_value
+92	val_92	2008-04-08	existing_value
+95	val_95	2008-04-08	existing_value
+95	val_95	2008-04-08	existing_value
+96	val_96	2008-04-08	existing_value
+97	val_97	2008-04-08	existing_value
+97	val_97	2008-04-08	existing_value
+98	val_98	2008-04-08	existing_value
+98	val_98	2008-04-08	existing_value
+100	val_100	2008-04-08	existing_value
+100	val_100	2008-04-08	existing_value
+103	val_103	2008-04-08	existing_value
+103	val_103	2008-04-08	existing_value
+104	val_104	2008-04-08	existing_value
+104	val_104	2008-04-08	existing_value
+105	val_105	2008-04-08	existing_value
+111	val_111	2008-04-08	existing_value
+113	val_113	2008-04-08	existing_value
+113	val_113	2008-04-08	existing_value
+114	val_114	2008-04-08	existing_value
+116	val_116	2008-04-08	existing_value
+118	val_118	2008-04-08	existing_value
+118	val_118	2008-04-08	existing_value
+119	val_119	2008-04-08	existing_value
+119	val_119	2008-04-08	existing_value
+119	val_119	2008-04-08	existing_value
+120	val_120	2008-04-08	existing_value
+120	val_120	2008-04-08	existing_value
+125	val_125	2008-04-08	existing_value
+125	val_125	2008-04-08	existing_value
+126	val_126	2008-04-08	existing_value
+128	val_128	2008-04-08	existing_value
+128	val_128	2008-04-08	existing_value
+128	val_128	2008-04-08	existing_value
+129	val_129	2008-04-08	existing_value
+129	val_129	2008-04-08	existing_value
+131	val_131	2008-04-08	existing_value
+133	val_133	2008-04-08	existing_value
+134	val_134	2008-04-08	existing_value
+134	val_134	2008-04-08	existing_value
+136	val_136	2008-04-08	existing_value
+137	val_137	2008-04-08	existing_value
+137	val_137	2008-04-08	existing_value
+138	val_138	2008-04-08	existing_value
+138	val_138	2008-04-08	existing_value
+138	val_138	2008-04-08	existing_value
+138	val_138	2008-04-08	existing_value
+143	val_143	2008-04-08	existing_value
+145	val_145	2008-04-08	existing_value
+146	val_146	2008-04-08	existing_value
+146	val_146	2008-04-08	existing_value
+149	val_149	2008-04-08	existing_value
+149	val_149	2008-04-08	existing_value
+150	val_150	2008-04-08	existing_value
+152	val_152	2008-04-08	existing_value
+152	val_152	2008-04-08	existing_value
+153	val_153	2008-04-08	existing_value
+155	val_155	2008-04-08	existing_value
+156	val_156	2008-04-08	existing_value
+157	val_157	2008-04-08	existing_value
+158	val_158	2008-04-08	existing_value
+160	val_160	2008-04-08	existing_value
+162	val_162	2008-04-08	existing_value
+163	val_163	2008-04-08	existing_value
+164	val_164	2008-04-08	existing_value
+164	val_164	2008-04-08	existing_value
+165	val_165	2008-04-08	existing_value
+165	val_165	2008-04-08	existing_value
+166	val_166	2008-04-08	existing_value
+167	val_167	2008-04-08	existing_value
+167	val_167	2008-04-08	existing_value
+167	val_167	2008-04-08	existing_value
+168	val_168	2008-04-08	existing_value
+169	val_169	2008-04-08	existing_value
+169	val_169	2008-04-08	existing_value
+169	val_169	2008-04-08	existing_value
+169	val_169	2008-04-08	existing_value
+170	val_170	2008-04-08	existing_value
+172	val_172	2008-04-08	existing_value
+172	val_172	2008-04-08	existing_value
+174	val_174	2008-04-08	existing_value
+174	val_174	2008-04-08	existing_value
+175	val_175	2008-04-08	existing_value
+175	val_175	2008-04-08	existing_value
+176	val_176	2008-04-08	existing_value
+176	val_176	2008-04-08	existing_value
+177	val_177	2008-04-08	existing_value
+178	val_178	2008-04-08	existing_value
+179	val_179	2008-04-08	existing_value
+179	val_179	2008-04-08	existing_value
+180	val_180	2008-04-08	existing_value
+181	val_181	2008-04-08	existing_value
+183	val_183	2008-04-08	existing_value
+186	val_186	2008-04-08	existing_value
+187	val_187	2008-04-08	existing_value
+187	val_187	2008-04-08	existing_value
+187	val_187	2008-04-08	existing_value
+189	val_189	2008-04-08	existing_value
+190	val_190	2008-04-08	existing_value
+191	val_191	2008-04-08	existing_value
+191	val_191	2008-04-08	existing_value
+192	val_192	2008-04-08	existing_value
+193	val_193	2008-04-08	existing_value
+193	val_193	2008-04-08	existing_value
+193	val_193	2008-04-08	existing_value
+194	val_194	2008-04-08	existing_value
+195	val_195	2008-04-08	existing_value
+195	val_195	2008-04-08	existing_value
+196	val_196	2008-04-08	existing_value
+197	val_197	2008-04-08	existing_value
+197	val_197	2008-04-08	existing_value
+199	val_199	2008-04-08	existing_value
+199	val_199	2008-04-08	existing_value
+199	val_199	2008-04-08	existing_value
+200	val_200	2008-04-08	existing_value
+200	val_200	2008-04-08	existing_value
+201	val_201	2008-04-08	existing_value
+202	val_202	2008-04-08	existing_value
+203	val_203	2008-04-08	existing_value
+203	val_203	2008-04-08	existing_value
+205	val_205	2008-04-08	existing_value
+205	val_205	2008-04-08	existing_value
+207	val_207	2008-04-08	existing_value
+207	val_207	2008-04-08	existing_value
+208	val_208	2008-04-08	existing_value
+208	val_208	2008-04-08	existing_value
+208	val_208	2008-04-08	existing_value
+209	val_209	2008-04-08	existing_value
+209	val_209	2008-04-08	existing_value
+213	val_213	2008-04-08	existing_value
+213	val_213	2008-04-08	existing_value
+214	val_214	2008-04-08	existing_value
+216	val_216	2008-04-08	existing_value
+216	val_216	2008-04-08	existing_value
+217	val_217	2008-04-08	existing_value
+217	val_217	2008-04-08	existing_value
+218	val_218	2008-04-08	existing_value
+219	val_219	2008-04-08	existing_value
+219	val_219	2008-04-08	existing_value
+221	val_221	2008-04-08	existing_value
+221	val_221	2008-04-08	existing_value
+222	val_222	2008-04-08	existing_value
+223	val_223	2008-04-08	existing_value
+223	val_223	2008-04-08	existing_value
+224	val_224	2008-04-08	existing_value
+224	val_224	2008-04-08	existing_value
+226	val_226	2008-04-08	existing_value
+228	val_228	2008-04-08	existing_value
+229	val_229	2008-04-08	existing_value
+229	val_229	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+230	val_230	2008-04-08	existing_value
+233	val_233	2008-04-08	existing_value
+233	val_233	2008-04-08	existing_value
+235	val_235	2008-04-08	existing_value
+237	val_237	2008-04-08	existing_value
+237	val_237	2008-04-08	existing_value
+238	val_238	2008-04-08	existing_value
+238	val_238	2008-04-08	existing_value
+239	val_239	2008-04-08	existing_value
+239	val_239	2008-04-08	existing_value
+241	val_241	2008-04-08	existing_value
+242	val_242	2008-04-08	existing_value
+242	val_242	2008-04-08	existing_value
+244	val_244	2008-04-08	existing_value
+247	val_247	2008-04-08	existing_value
+248	val_248	2008-04-08	existing_value
+249	val_249	2008-04-08	existing_value
+252	val_252	2008-04-08	existing_value
+255	val_255	2008-04-08	existing_value
+255	val_255	2008-04-08	existing_value
+256	val_256	2008-04-08	existing_value
+256	val_256	2008-04-08	existing_value
+257	val_257	2008-04-08	existing_value
+258	val_258	2008-04-08	existing_value
+260	val_260	2008-04-08	existing_value
+262	val_262	2008-04-08	existing_value
+263	val_263	2008-04-08	existing_value
+265	val_265	2008-04-08	existing_value
+265	val_265	2008-04-08	existing_value
+266	val_266	2008-04-08	existing_value
+272	val_272	2008-04-08	existing_value
+272	val_272	2008-04-08	existing_value
+273	val_273	2008-04-08	existing_value
+273	val_273	2008-04-08	existing_value
+273	val_273	2008-04-08	existing_value
+274	val_274	2008-04-08	existing_value
+275	val_275	2008-04-08	existing_value
+277	val_277	2008-04-08	existing_value
+277	val_277	2008-04-08	existing_value
+277	val_277	2008-04-08	existing_value
+277	val_277	2008-04-08	existing_value
+278	val_278	2008-04-08	existing_value
+278	val_278	2008-04-08	existing_value
+280	val_280	2008-04-08	existing_value
+280	val_280	2008-04-08	existing_value
+281	val_281	2008-04-08	existing_value
+281	val_281	2008-04-08	existing_value
+282	val_282	2008-04-08	existing_value
+282	val_282	2008-04-08	existing_value
+283	val_283	2008-04-08	existing_value
+284	val_284	2008-04-08	existing_value
+285	val_285	2008-04-08	existing_value
+286	val_286	2008-04-08	existing_value
+287	val_287	2008-04-08	existing_value
+288	val_288	2008-04-08	existing_value
+288	val_288	2008-04-08	existing_value
+289	val_289	2008-04-08	existing_value
+291	val_291	2008-04-08	existing_value
+292	val_292	2008-04-08	existing_value
+296	val_296	2008-04-08	existing_value
+298	val_298	2008-04-08	existing_value
+298	val_298	2008-04-08	existing_value
+298	val_298	2008-04-08	existing_value
+302	val_302	2008-04-08	existing_value
+305	val_305	2008-04-08	existing_value
+306	val_306	2008-04-08	existing_value
+307	val_307	2008-04-08	existing_value
+307	val_307	2008-04-08	existing_value
+308	val_308	2008-04-08	existing_value
+309	val_309	2008-04-08	existing_value
+309	val_309	2008-04-08	existing_value
+310	val_310	2008-04-08	existing_value
+311	val_311	2008-04-08	existing_value
+311	val_311	2008-04-08	existing_value
+311	val_311	2008-04-08	existing_value
+315	val_315	2008-04-08	existing_value
+316	val_316	2008-04-08	existing_value
+316	val_316	2008-04-08	existing_value
+316	val_316	2008-04-08	existing_value
+317	val_317	2008-04-08	existing_value
+317	val_317	2008-04-08	existing_value
+318	val_318	2008-04-08	existing_value
+318	val_318	2008-04-08	existing_value
+318	val_318	2008-04-08	existing_value
+321	val_321	2008-04-08	existing_value
+321	val_321	2008-04-08	existing_value
+322	val_322	2008-04-08	existing_value
+322	val_322	2008-04-08	existing_value
+323	val_323	2008-04-08	existing_value
+325	val_325	2008-04-08	existing_value
+325	val_325	2008-04-08	existing_value
+327	val_327	2008-04-08	existing_value
+327	val_327	2008-04-08	existing_value
+327	val_327	2008-04-08	existing_value
+331	val_331	2008-04-08	existing_value
+331	val_331	2008-04-08	existing_value
+332	val_332	2008-04-08	existing_value
+333	val_333	2008-04-08	existing_value
+333	val_333	2008-04-08	existing_value
+335	val_335	2008-04-08	existing_value
+336	val_336	2008-04-08	existing_value
+338	val_338	2008-04-08	existing_value
+339	val_339	2008-04-08	existing_value
+341	val_341	2008-04-08	existing_value
+342	val_342	2008-04-08	existing_value
+342	val_342	2008-04-08	existing_value
+344	val_344	2008-04-08	existing_value
+344	val_344	2008-04-08	existing_value
+345	val_345	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+348	val_348	2008-04-08	existing_value
+351	val_351	2008-04-08	existing_value
+353	val_353	2008-04-08	existing_value
+353	val_353	2008-04-08	existing_value
+356	val_356	2008-04-08	existing_value
+360	val_360	2008-04-08	existing_value
+362	val_362	2008-04-08	existing_value
+364	val_364	2008-04-08	existing_value
+365	val_365	2008-04-08	existing_value
+366	val_366	2008-04-08	existing_value
+367	val_367	2008-04-08	existing_value
+367	val_367	2008-04-08	existing_value
+368	val_368	2008-04-08	existing_value
+369	val_369	2008-04-08	existing_value
+369	val_369	2008-04-08	existing_value
+369	val_369	2008-04-08	existing_value
+373	val_373	2008-04-08	existing_value
+374	val_374	2008-04-08	existing_value
+375	val_375	2008-04-08	existing_value
+377	val_377	2008-04-08	existing_value
+378	val_378	2008-04-08	existing_value
+379	val_379	2008-04-08	existing_value
+382	val_382	2008-04-08	existing_value
+382	val_382	2008-04-08	existing_value
+384	val_384	2008-04-08	existing_value
+384	val_384	2008-04-08	existing_value
+384	val_384	2008-04-08	existing_value
+386	val_386	2008-04-08	existing_value
+389	val_389	2008-04-08	existing_value
+392	val_392	2008-04-08	existing_value
+393	val_393	2008-04-08	existing_value
+394	val_394	2008-04-08	existing_value
+395	val_395	2008-04-08	existing_value
+395	val_395	2008-04-08	existing_value
+396	val_396	2008-04-08	existing_value
+396	val_396	2008-04-08	existing_value
+396	val_396	2008-04-08	existing_value
+397	val_397	2008-04-08	existing_value
+397	val_397	2008-04-08	existing_value
+399	val_399	2008-04-08	existing_value
+399	val_399	2008-04-08	existing_value
+400	val_400	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+401	val_401	2008-04-08	existing_value
+402	val_402	2008-04-08	existing_value
+403	val_403	2008-04-08	existing_value
+403	val_403	2008-04-08	existing_value
+403	val_403	2008-04-08	existing_value
+404	val_404	2008-04-08	existing_value
+404	val_404	2008-04-08	existing_value
+406	val_406	2008-04-08	existing_value
+406	val_406	2008-04-08	existing_value
+406	val_406	2008-04-08	existing_value
+406	val_406	2008-04-08	existing_value
+407	val_407	2008-04-08	existing_value
+409	val_409	2008-04-08	existing_value
+409	val_409	2008-04-08	existing_value
+409	val_409	2008-04-08	existing_value
+411	val_411	2008-04-08	existing_value
+413	val_413	2008-04-08	existing_value
+413	val_413	2008-04-08	existing_value
+414	val_414	2008-04-08	existing_value
+414	val_414	2008-04-08	existing_value
+417	val_417	2008-04-08	existing_value
+417	val_417	2008-04-08	existing_value
+417	val_417	2008-04-08	existing_value
+418	val_418	2008-04-08	existing_value
+419	val_419	2008-04-08	existing_value
+421	val_421	2008-04-08	existing_value
+424	val_424	2008-04-08	existing_value
+424	val_424	2008-04-08	existing_value
+427	val_427	2008-04-08	existing_value
+429	val_429	2008-04-08	existing_value
+429	val_429	2008-04-08	existing_value
+430	val_430	2008-04-08	existing_value
+430	val_430	2008-04-08	existing_value
+430	val_430	2008-04-08	existing_value
+431	val_431	2008-04-08	existing_value
+431	val_431	2008-04-08	existing_value
+431	val_431	2008-04-08	existing_value
+432	val_432	2008-04-08	existing_value
+435	val_435	2008-04-08	existing_value
+436	val_436	2008-04-08	existing_value
+437	val_437	2008-04-08	existing_value
+438	val_438	2008-04-08	existing_value
+438	val_438	2008-04-08	existing_value
+438	val_438	2008-04-08	existing_value
+439	val_439	2008-04-08	existing_value
+439	val_439	2008-04-08	existing_value
+443	val_443	2008-04-08	existing_value
+444	val_444	2008-04-08	existing_value
+446	val_446	2008-04-08	existing_value
+448	val_448	2008-04-08	existing_value
+449	val_449	2008-04-08	existing_value
+452	val_452	2008-04-08	existing_value
+453	val_453	2008-04-08	existing_value
+454	val_454	2008-04-08	existing_value
+454	val_454	2008-04-08	existing_value
+454	val_454	2008-04-08	existing_value
+455	val_455	2008-04-08	existing_value
+457	val_457	2008-04-08	existing_value
+458	val_458	2008-04-08	existing_value
+458	val_458	2008-04-08	existing_value
+459	val_459	2008-04-08	existing_value
+459	val_459	2008-04-08	existing_value
+460	val_460	2008-04-08	existing_value
+462	val_462	2008-04-08	existing_value
+462	val_462	2008-04-08	existing_value
+463	val_463	2008-04-08	existing_value
+463	val_463	2008-04-08	existing_value
+466	val_466	2008-04-08	existing_value
+466	val_466	2008-04-08	existing_value
+466	val_466	2008-04-08	existing_value
+467	val_467	2008-04-08	existing_value
+468	val_468	2008-04-08	existing_value
+468	val_468	2008-04-08	existing_value
+468	val_468	2008-04-08	existing_value
+468	val_468	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+469	val_469	2008-04-08	existing_value
+470	val_470	2008-04-08	existing_value
+472	val_472	2008-04-08	existing_value
+475	val_475	2008-04-08	existing_value
+477	val_477	2008-04-08	existing_value
+478	val_478	2008-04-08	existing_value
+478	val_478	2008-04-08	existing_value
+479	val_479	2008-04-08	existing_value
+480	val_480	2008-04-08	existing_value
+480	val_480	2008-04-08	existing_value
+480	val_480	2008-04-08	existing_value
+481	val_481	2008-04-08	existing_value
+482	val_482	2008-04-08	existing_value
+483	val_483	2008-04-08	existing_value
+484	val_484	2008-04-08	existing_value
+485	val_485	2008-04-08	existing_value
+487	val_487	2008-04-08	existing_value
+489	val_489	2008-04-08	existing_value
+489	val_489	2008-04-08	existing_value
+489	val_489	2008-04-08	existing_value
+489	val_489	2008-04-08	existing_value
+490	val_490	2008-04-08	existing_value
+491	val_491	2008-04-08	existing_value
+492	val_492	2008-04-08	existing_value
+492	val_492	2008-04-08	existing_value
+493	val_493	2008-04-08	existing_value
+494	val_494	2008-04-08	existing_value
+495	val_495	2008-04-08	existing_value
+496	val_496	2008-04-08	existing_value
+497	val_497	2008-04-08	existing_value
+498	val_498	2008-04-08	existing_value
+498	val_498	2008-04-08	existing_value
+498	val_498	2008-04-08	existing_value
+0	val_0	2008-04-09	11
+0	val_0	2008-04-09	11
+0	val_0	2008-04-09	11
+2	val_2	2008-04-09	11
+4	val_4	2008-04-09	11
+5	val_5	2008-04-09	11
+5	val_5	2008-04-09	11
+5	val_5	2008-04-09	11
+8	val_8	2008-04-09	11
+9	val_9	2008-04-09	11
+10	val_10	2008-04-09	11
+11	val_11	2008-04-09	11
+12	val_12	2008-04-09	11
+12	val_12	2008-04-09	11
+15	val_15	2008-04-09	11
+15	val_15	2008-04-09	11
+17	val_17	2008-04-09	11
+18	val_18	2008-04-09	11
+18	val_18	2008-04-09	11
+19	val_19	2008-04-09	11
+20	val_20	2008-04-09	11
+24	val_24	2008-04-09	11
+24	val_24	2008-04-09	11
+26	val_26	2008-04-09	11
+26	val_26	2008-04-09	11
+27	val_27	2008-04-09	11
+28	val_28	2008-04-09	11
+30	val_30	2008-04-09	11
+33	val_33	2008-04-09	11
+34	val_34	2008-04-09	11
+35	val_35	2008-04-09	11
+35	val_35	2008-04-09	11
+35	val_35	2008-04-09	11
+37	val_37	2008-04-09	11
+37	val_37	2008-04-09	11
+41	val_41	2008-04-09	11
+42	val_42	2008-04-09	11
+42	val_42	2008-04-09	11
+43	val_43	2008-04-09	11
+44	val_44	2008-04-09	11
+47	val_47	2008-04-09	11
+51	val_51	2008-04-09	11
+51	val_51	2008-04-09	11
+53	val_53	2008-04-09	11
+54	val_54	2008-04-09	11
+57	val_57	2008-04-09	11
+58	val_58	2008-04-09	11
+58	val_58	2008-04-09	11
+64	val_64	2008-04-09	11
+65	val_65	2008-04-09	11
+66	val_66	2008-04-09	11
+67	val_67	2008-04-09	11
+67	val_67	2008-04-09	11
+69	val_69	2008-04-09	11
+70	val_70	2008-04-09	11
+70	val_70	2008-04-09	11
+70	val_70	2008-04-09	11
+72	val_72	2008-04-09	11
+72	val_72	2008-04-09	11
+74	val_74	2008-04-09	11
+76	val_76	2008-04-09	11
+76	val_76	2008-04-09	11
+77	val_77	2008-04-09	11
+78	val_78	2008-04-09	11
+80	val_80	2008-04-09	11
+82	val_82	2008-04-09	11
+83	val_83	2008-04-09	11
+83	val_83	2008-04-09	11
+84	val_84	2008-04-09	11
+84	val_84	2008-04-09	11
+85	val_85	2008-04-09	11
+86	val_86	2008-04-09	11
+87	val_87	2008-04-09	11
+90	val_90	2008-04-09	11
+90	val_90	2008-04-09	11
+90	val_90	2008-04-09	11
+92	val_92	2008-04-09	11
+95	val_95	2008-04-09	11
+95	val_95	2008-04-09	11
+96	val_96	2008-04-09	11
+97	val_97	2008-04-09	11
+97	val_97	2008-04-09	11
+98	val_98	2008-04-09	11
+98	val_98	2008-04-09	11
+100	val_100	2008-04-09	11
+100	val_100	2008-04-09	11
+103	val_103	2008-04-09	11
+103	val_103	2008-04-09	11
+104	val_104	2008-04-09	11
+104	val_104	2008-04-09	11
+105	val_105	2008-04-09	11
+111	val_111	2008-04-09	11
+113	val_113	2008-04-09	11
+113	val_113	2008-04-09	11
+114	val_114	2008-04-09	11
+116	val_116	2008-04-09	11
+118	val_118	2008-04-09	11
+118	val_118	2008-04-09	11
+119	val_119	2008-04-09	11
+119	val_119	2008-04-09	11
+119	val_119	2008-04-09	11
+120	val_120	2008-04-09	11
+120	val_120	2008-04-09	11
+125	val_125	2008-04-09	11
+125	val_125	2008-04-09	11
+126	val_126	2008-04-09	11
+128	val_128	2008-04-09	11
+128	val_128	2008-04-09	11
+128	val_128	2008-04-09	11
+129	val_129	2008-04-09	11
+129	val_129	2008-04-09	11
+131	val_131	2008-04-09	11
+133	val_133	2008-04-09	11
+134	val_134	2008-04-09	11
+134	val_134	2008-04-09	11
+136	val_136	2008-04-09	11
+137	val_137	2008-04-09	11
+137	val_137	2008-04-09	11
+138	val_138	2008-04-09	11
+138	val_138	2008-04-09	11
+138	val_138	2008-04-09	11
+138	val_138	2008-04-09	11
+143	val_143	2008-04-09	11
+145	val_145	2008-04-09	11
+146	val_146	2008-04-09	11
+146	val_146	2008-04-09	11
+149	val_149	2008-04-09	11
+149	val_149	2008-04-09	11
+150	val_150	2008-04-09	11
+152	val_152	2008-04-09	11
+152	val_152	2008-04-09	11
+153	val_153	2008-04-09	11
+155	val_155	2008-04-09	11
+156	val_156	2008-04-09	11
+157	val_157	2008-04-09	11
+158	val_158	2008-04-09	11
+160	val_160	2008-04-09	11
+162	val_162	2008-04-09	11
+163	val_163	2008-04-09	11
+164	val_164	2008-04-09	11
+164	val_164	2008-04-09	11
+165	val_165	2008-04-09	11
+165	val_165	2008-04-09	11
+166	val_166	2008-04-09	11
+167	val_167	2008-04-09	11
+167	val_167	2008-04-09	11
+167	val_167	2008-04-09	11
+168	val_168	2008-04-09	11
+169	val_169	2008-04-09	11
+169	val_169	2008-04-09	11
+169	val_169	2008-04-09	11
+169	val_169	2008-04-09	11
+170	val_170	2008-04-09	11
+172	val_172	2008-04-09	11
+172	val_172	2008-04-09	11
+174	val_174	2008-04-09	11
+174	val_174	2008-04-09	11
+175	val_175	2008-04-09	11
+175	val_175	2008-04-09	11
+176	val_176	2008-04-09	11
+176	val_176	2008-04-09	11
+177	val_177	2008-04-09	11
+178	val_178	2008-04-09	11
+179	val_179	2008-04-09	11
+179	val_179	2008-04-09	11
+180	val_180	2008-04-09	11
+181	val_181	2008-04-09	11
+183	val_183	2008-04-09	11
+186	val_186	2008-04-09	11
+187	val_187	2008-04-09	11
+187	val_187	2008-04-09	11
+187	val_187	2008-04-09	11
+189	val_189	2008-04-09	11
+190	val_190	2008-04-09	11
+191	val_191	2008-04-09	11
+191	val_191	2008-04-09	11
+192	val_192	2008-04-09	11
+193	val_193	2008-04-09	11
+193	val_193	2008-04-09	11
+193	val_193	2008-04-09	11
+194	val_194	2008-04-09	11
+195	val_195	2008-04-09	11
+195	val_195	2008-04-09	11
+196	val_196	2008-04-09	11
+197	val_197	2008-04-09	11
+197	val_197	2008-04-09	11
+199	val_199	2008-04-09	11
+199	val_199	2008-04-09	11
+199	val_199	2008-04-09	11
+200	val_200	2008-04-09	11
+200	val_200	2008-04-09	11
+201	val_201	2008-04-09	11
+202	val_202	2008-04-09	11
+203	val_203	2008-04-09	11
+203	val_203	2008-04-09	11
+205	val_205	2008-04-09	11
+205	val_205	2008-04-09	11
+207	val_207	2008-04-09	11
+207	val_207	2008-04-09	11
+208	val_208	2008-04-09	11
+208	val_208	2008-04-09	11
+208	val_208	2008-04-09	11
+209	val_209	2008-04-09	11
+209	val_209	2008-04-09	11
+213	val_213	2008-04-09	11
+213	val_213	2008-04-09	11
+214	val_214	2008-04-09	11
+216	val_216	2008-04-09	11
+216	val_216	2008-04-09	11
+217	val_217	2008-04-09	11
+217	val_217	2008-04-09	11
+218	val_218	2008-04-09	11
+219	val_219	2008-04-09	11
+219	val_219	2008-04-09	11
+221	val_221	2008-04-09	11
+221	val_221	2008-04-09	11
+222	val_222	2008-04-09	11
+223	val_223	2008-04-09	11
+223	val_223	2008-04-09	11
+224	val_224	2008-04-09	11
+224	val_224	2008-04-09	11
+226	val_226	2008-04-09	11
+228	val_228	2008-04-09	11
+229	val_229	2008-04-09	11
+229	val_229	2008-04-09	11
+230	val_230	2008-04-09	11
+230	val_230	2008-04-09	11
+230	val_230	2008-04-09	11
+230	val_230	2008-04-09	11
+230	val_230	2008-04-09	11
+233	val_233	2008-04-09	11
+233	val_233	2008-04-09	11
+235	val_235	2008-04-09	11
+237	val_237	2008-04-09	11
+237	val_237	2008-04-09	11
+238	val_238	2008-04-09	11
+238	val_238	2008-04-09	11
+239	val_239	2008-04-09	11
+239	val_239	2008-04-09	11
+241	val_241	2008-04-09	11
+242	val_242	2008-04-09	11
+242	val_242	2008-04-09	11
+244	val_244	2008-04-09	11
+247	val_247	2008-04-09	11
+248	val_248	2008-04-09	11
+249	val_249	2008-04-09	11
+252	val_252	2008-04-09	11
+255	val_255	2008-04-09	11
+255	val_255	2008-04-09	11
+256	val_256	2008-04-09	11
+256	val_256	2008-04-09	11
+257	val_257	2008-04-09	11
+258	val_258	2008-04-09	11
+260	val_260	2008-04-09	11
+262	val_262	2008-04-09	11
+263	val_263	2008-04-09	11
+265	val_265	2008-04-09	11
+265	val_265	2008-04-09	11
+266	val_266	2008-04-09	11
+272	val_272	2008-04-09	11
+272	val_272	2008-04-09	11
+273	val_273	2008-04-09	11
+273	val_273	2008-04-09	11
+273	val_273	2008-04-09	11
+274	val_274	2008-04-09	11
+275	val_275	2008-04-09	11
+277	val_277	2008-04-09	11
+277	val_277	2008-04-09	11
+277	val_277	2008-04-09	11
+277	val_277	2008-04-09	11
+278	val_278	2008-04-09	11
+278	val_278	2008-04-09	11
+280	val_280	2008-04-09	11
+280	val_280	2008-04-09	11
+281	val_281	2008-04-09	11
+281	val_281	2008-04-09	11
+282	val_282	2008-04-09	11
+282	val_282	2008-04-09	11
+283	val_283	2008-04-09	11
+284	val_284	2008-04-09	11
+285	val_285	2008-04-09	11
+286	val_286	2008-04-09	11
+287	val_287	2008-04-09	11
+288	val_288	2008-04-09	11
+288	val_288	2008-04-09	11
+289	val_289	2008-04-09	11
+291	val_291	2008-04-09	11
+292	val_292	2008-04-09	11
+296	val_296	2008-04-09	11
+298	val_298	2008-04-09	11
+298	val_298	2008-04-09	11
+298	val_298	2008-04-09	11
+302	val_302	2008-04-09	11
+305	val_305	2008-04-09	11
+306	val_306	2008-04-09	11
+307	val_307	2008-04-09	11
+307	val_307	2008-04-09	11
+308	val_308	2008-04-09	11
+309	val_309	2008-04-09	11
+309	val_309	2008-04-09	11
+310	val_310	2008-04-09	11
+311	val_311	2008-04-09	11
+311	val_311	2008-04-09	11
+311	val_311	2008-04-09	11
+315	val_315	2008-04-09	11
+316	val_316	2008-04-09	11
+316	val_316	2008-04-09	11
+316	val_316	2008-04-09	11
+317	val_317	2008-04-09	11
+317	val_317	2008-04-09	11
+318	val_318	2008-04-09	11
+318	val_318	2008-04-09	11
+318	val_318	2008-04-09	11
+321	val_321	2008-04-09	11
+321	val_321	2008-04-09	11
+322	val_322	2008-04-09	11
+322	val_322	2008-04-09	11
+323	val_323	2008-04-09	11
+325	val_325	2008-04-09	11
+325	val_325	2008-04-09	11
+327	val_327	2008-04-09	11
+327	val_327	2008-04-09	11
+327	val_327	2008-04-09	11
+331	val_331	2008-04-09	11
+331	val_331	2008-04-09	11
+332	val_332	2008-04-09	11
+333	val_333	2008-04-09	11
+333	val_333	2008-04-09	11
+335	val_335	2008-04-09	11
+336	val_336	2008-04-09	11
+338	val_338	2008-04-09	11
+339	val_339	2008-04-09	11
+341	val_341	2008-04-09	11
+342	val_342	2008-04-09	11
+342	val_342	2008-04-09	11
+344	val_344	2008-04-09	11
+344	val_344	2008-04-09	11
+345	val_345	2008-04-09	11
+348	val_348	2008-04-09	11
+348	val_348	2008-04-09	11
+348	val_348	2008-04-09	11
+348	val_348	2008-04-09	11
+348	val_348	2008-04-09	11
+351	val_351	2008-04-09	11
+353	val_353	2008-04-09	11
+353	val_353	2008-04-09	11
+356	val_356	2008-04-09	11
+360	val_360	2008-04-09	11
+362	val_362	2008-04-09	11
+364	val_364	2008-04-09	11
+365	val_365	2008-04-09	11
+366	val_366	2008-04-09	11
+367	val_367	2008-04-09	11
+367	val_367	2008-04-09	11
+368	val_368	2008-04-09	11
+369	val_369	2008-04-09	11
+369	val_369	2008-04-09	11
+369	val_369	2008-04-09	11
+373	val_373	2008-04-09	11
+374	val_374	2008-04-09	11
+375	val_375	2008-04-09	11
+377	val_377	2008-04-09	11
+378	val_378	2008-04-09	11
+379	val_379	2008-04-09	11
+382	val_382	2008-04-09	11
+382	val_382	2008-04-09	11
+384	val_384	2008-04-09	11
+384	val_384	2008-04-09	11
+384	val_384	2008-04-09	11
+386	val_386	2008-04-09	11
+389	val_389	2008-04-09	11
+392	val_392	2008-04-09	11
+393	val_393	2008-04-09	11
+394	val_394	2008-04-09	11
+395	val_395	2008-04-09	11
+395	val_395	2008-04-09	11
+396	val_396	2008-04-09	11
+396	val_396	2008-04-09	11
+396	val_396	2008-04-09	11
+397	val_397	2008-04-09	11
+397	val_397	2008-04-09	11
+399	val_399	2008-04-09	11
+399	val_399	2008-04-09	11
+400	val_400	2008-04-09	11
+401	val_401	2008-04-09	11
+401	val_401	2008-04-09	11
+401	val_401	2008-04-09	11
+401	val_401	2008-04-09	11
+401	val_401	2008-04-09	11
+402	val_402	2008-04-09	11
+403	val_403	2008-04-09	11
+403	val_403	2008-04-09	11
+403	val_403	2008-04-09	11
+404	val_404	2008-04-09	11
+404	val_404	2008-04-09	11
+406	val_406	2008-04-09	11
+406	val_406	2008-04-09	11
+406	val_406	2008-04-09	11
+406	val_406	2008-04-09	11
+407	val_407	2008-04-09	11
+409	val_409	2008-04-09	11
+409	val_409	2008-04-09	11
+409	val_409	2008-04-09	11
+411	val_411	2008-04-09	11
+413	val_413	2008-04-09	11
+413	val_413	2008-04-09	11
+414	val_414	2008-04-09	11
+414	val_414	2008-04-09	11
+417	val_417	2008-04-09	11
+417	val_417	2008-04-09	11
+417	val_417	2008-04-09	11
+418	val_418	2008-04-09	11
+419	val_419	2008-04-09	11
+421	val_421	2008-04-09	11
+424	val_424	2008-04-09	11
+424	val_424	2008-04-09	11
+427	val_427	2008-04-09	11
+429	val_429	2008-04-09	11
+429	val_429	2008-04-09	11
+430	val_430	2008-04-09	11
+430	val_430	2008-04-09	11
+430	val_430	2008-04-09	11
+431	val_431	2008-04-09	11
+431	val_431	2008-04-09	11
+431	val_431	2008-04-09	11
+432	val_432	2008-04-09	11
+435	val_435	2008-04-09	11
+436	val_436	2008-04-09	11
+437	val_437	2008-04-09	11
+438	val_438	2008-04-09	11
+438	val_438	2008-04-09	11
+438	val_438	2008-04-09	11
+439	val_439	2008-04-09	11
+439	val_439	2008-04-09	11
+443	val_443	2008-04-09	11
+444	val_444	2008-04-09	11
+446	val_446	2008-04-09	11
+448	val_448	2008-04-09	11
+449	val_449	2008-04-09	11
+452	val_452	2008-04-09	11
+453	val_453	2008-04-09	11
+454	val_454	2008-04-09	11
+454	val_454	2008-04-09	11
+454	val_454	2008-04-09	11
+455	val_455	2008-04-09	11
+457	val_457	2008-04-09	11
+458	val_458	2008-04-09	11
+458	val_458	2008-04-09	11
+459	val_459	2008-04-09	11
+459	val_459	2008-04-09	11
+460	val_460	2008-04-09	11
+462	val_462	2008-04-09	11
+462	val_462	2008-04-09	11
+463	val_463	2008-04-09	11
+463	val_463	2008-04-09	11
+466	val_466	2008-04-09	11
+466	val_466	2008-04-09	11
+466	val_466	2008-04-09	11
+467	val_467	2008-04-09	11
+468	val_468	2008-04-09	11
+468	val_468	2008-04-09	11
+468	val_468	2008-04-09	11
+468	val_468	2008-04-09	11
+469	val_469	2008-04-09	11
+469	val_469	2008-04-09	11
+469	val_469	2008-04-09	11
+469	val_469	2008-04-09	11
+469	val_469	2008-04-09	11
+470	val_470	2008-04-09	11
+472	val_472	2008-04-09	11
+475	val_475	2008-04-09	11
+477	val_477	2008-04-09	11
+478	val_478	2008-04-09	11
+478	val_478	2008-04-09	11
+479	val_479	2008-04-09	11
+480	val_480	2008-04-09	11
+480	val_480	2008-04-09	11
+480	val_480	2008-04-09	11
+481	val_481	2008-04-09	11
+482	val_482	2008-04-09	11
+483	val_483	2008-04-09	11
+484	val_484	2008-04-09	11
+485	val_485	2008-04-09	11
+487	val_487	2008-04-09	11
+489	val_489	2008-04-09	11
+489	val_489	2008-04-09	11
+489	val_489	2008-04-09	11
+489	val_489	2008-04-09	11
+490	val_490	2008-04-09	11
+491	val_491	2008-04-09	11
+492	val_492	2008-04-09	11
+492	val_492	2008-04-09	11
+493	val_493	2008-04-09	11
+494	val_494	2008-04-09	11
+495	val_495	2008-04-09	11
+496	val_496	2008-04-09	11
+497	val_497	2008-04-09	11
+498	val_498	2008-04-09	11
+498	val_498	2008-04-09	11
+498	val_498	2008-04-09	11
+0	val_0	2008-04-09	12
+0	val_0	2008-04-09	12
+0	val_0	2008-04-09	12
+2	val_2	2008-04-09	12
+4	val_4	2008-04-09	12
+5	val_5	2008-04-09	12
+5	val_5	2008-04-09	12
+5	val_5	2008-04-09	12
+8	val_8	2008-04-09	12
+9	val_9	2008-04-09	12
+10	val_10	2008-04-09	12
+11	val_11	2008-04-09	12
+12	val_12	2008-04-09	12
+12	val_12	2008-04-09	12
+15	val_15	2008-04-09	12
+15	val_15	2008-04-09	12
+17	val_17	2008-04-09	12
+18	val_18	2008-04-09	12
+18	val_18	2008-04-09	12
+19	val_19	2008-04-09	12
+20	val_20	2008-04-09	12
+24	val_24	2008-04-09	12
+24	val_24	2008-04-09	12
+26	val_26	2008-04-09	12
+26	val_26	2008-04-09	12
+27	val_27	2008-04-09	12
+28	val_28	2008-04-09	12
+30	val_30	2008-04-09	12
+33	val_33	2008-04-09	12
+34	val_34	2008-04-09	12
+35	val_35	2008-04-09	12
+35	val_35	2008-04-09	12
+35	val_35	2008-04-09	12
+37	val_37	2008-04-09	12
+37	val_37	2008-04-09	12
+41	val_41	2008-04-09	12
+42	val_42	2008-04-09	12
+42	val_42	2008-04-09	12
+43	val_43	2008-04-09	12
+44	val_44	2008-04-09	12
+47	val_47	2008-04-09	12
+51	val_51	2008-04-09	12
+51	val_51	2008-04-09	12
+53	val_53	2008-04-09	12
+54	val_54	2008-04-09	12
+57	val_57	2008-04-09	12
+58	val_58	2008-04-09	12
+58	val_58	2008-04-09	12
+64	val_64	2008-04-09	12
+65	val_65	2008-04-09	12
+66	val_66	2008-04-09	12
+67	val_67	2008-04-09	12
+67	val_67	2008-04-09	12
+69	val_69	2008-04-09	12
+70	val_70	2008-04-09	12
+70	val_70	2008-04-09	12
+70	val_70	2008-04-09	12
+72	val_72	2008-04-09	12
+72	val_72	2008-04-09	12
+74	val_74	2008-04-09	12
+76	val_76	2008-04-09	12
+76	val_76	2008-04-09	12
+77	val_77	2008-04-09	12
+78	val_78	2008-04-09	12
+80	val_80	2008-04-09	12
+82	val_82	2008-04-09	12
+83	val_83	2008-04-09	12
+83	val_83	2008-04-09	12
+84	val_84	2008-04-09	12
+84	val_84	2008-04-09	12
+85	val_85	2008-04-09	12
+86	val_86	2008-04-09	12
+87	val_87	2008-04-09	12
+90	val_90	2008-04-09	12
+90	val_90	2008-04-09	12
+90	val_90	2008-04-09	12
+92	val_92	2008-04-09	12
+95	val_95	2008-04-09	12
+95	val_95	2008-04-09	12
+96	val_96	2008-04-09	12
+97	val_97	2008-04-09	12
+97	val_97	2008-04-09	12
+98	val_98	2008-04-09	12
+98	val_98	2008-04-09	12
+100	val_100	2008-04-09	12
+100	val_100	2008-04-09	12
+103	val_103	2008-04-09	12
+103	val_103	2008-04-09	12
+104	val_104	2008-04-09	12
+104	val_104	2008-04-09	12
+105	val_105	2008-04-09	12
+111	val_111	2008-04-09	12
+113	val_113	2008-04-09	12
+113	val_113	2008-04-09	12
+114	val_114	2008-04-09	12
+116	val_116	2008-04-09	12
+118	val_118	2008-04-09	12
+118	val_118	2008-04-09	12
+119	val_119	2008-04-09	12
+119	val_119	2008-04-09	12
+119	val_119	2008-04-09	12
+120	val_120	2008-04-09	12
+120	val_120	2008-04-09	12
+125	val_125	2008-04-09	12
+125	val_125	2008-04-09	12
+126	val_126	2008-04-09	12
+128	val_128	2008-04-09	12
+128	val_128	2008-04-09	12
+128	val_128	2008-04-09	12
+129	val_129	2008-04-09	12
+129	val_129	2008-04-09	12
+131	val_131	2008-04-09	12
+133	val_133	2008-04-09	12
+134	val_134	2008-04-09	12
+134	val_134	2008-04-09	12
+136	val_136	2008-04-09	12
+137	val_137	2008-04-09	12
+137	val_137	2008-04-09	12
+138	val_138	2008-04-09	12
+138	val_138	2008-04-09	12
+138	val_138	2008-04-09	12
+138	val_138	2008-04-09	12
+143	val_143	2008-04-09	12
+145	val_145	2008-04-09	12
+146	val_146	2008-04-09	12
+146	val_146	2008-04-09	12
+149	val_149	2008-04-09	12
+149	val_149	2008-04-09	12
+150	val_150	2008-04-09	12
+152	val_152	2008-04-09	12
+152	val_152	2008-04-09	12
+153	val_153	2008-04-09	12
+155	val_155	2008-04-09	12
+156	val_156	2008-04-09	12
+157	val_157	2008-04-09	12
+158	val_158	2008-04-09	12
+160	val_160	2008-04-09	12
+162	val_162	2008-04-09	12
+163	val_163	2008-04-09	12
+164	val_164	2008-04-09	12
+164	val_164	2008-04-09	12
+165	val_165	2008-04-09	12
+165	val_165	2008-04-09	12
+166	val_166	2008-04-09	12
+167	val_167	2008-04-09	12
+167	val_167	2008-04-09	12
+167	val_167	2008-04-09	12
+168	val_168	2008-04-09	12
+169	val_169	2008-04-09	12
+169	val_169	2008-04-09	12
+169	val_169	2008-04-09	12
+169	val_169	2008-04-09	12
+170	val_170	2008-04-09	12
+172	val_172	2008-04-09	12
+172	val_172	2008-04-09	12
+174	val_174	2008-04-09	12
+174	val_174	2008-04-09	12
+175	val_175	2008-04-09	12
+175	val_175	2008-04-09	12
+176	val_176	2008-04-09	12
+176	val_176	2008-04-09	12
+177	val_177	2008-04-09	12
+178	val_178	2008-04-09	12
+179	val_179	2008-04-09	12
+179	val_179	2008-04-09	12
+180	val_180	2008-04-09	12
+181	val_181	2008-04-09	12
+183	val_183	2008-04-09	12
+186	val_186	2008-04-09	12
+187	val_187	2008-04-09	12
+187	val_187	2008-04-09	12
+187	val_187	2008-04-09	12
+189	val_189	2008-04-09	12
+190	val_190	2008-04-09	12
+191	val_191	2008-04-09	12
+191	val_191	2008-04-09	12
+192	val_192	2008-04-09	12
+193	val_193	2008-04-09	12
+193	val_193	2008-04-09	12
+193	val_193	2008-04-09	12
+194	val_194	2008-04-09	12
+195	val_195	2008-04-09	12
+195	val_195	2008-04-09	12
+196	val_196	2008-04-09	12
+197	val_197	2008-04-09	12
+197	val_197	2008-04-09	12
+199	val_199	2008-04-09	12
+199	val_199	2008-04-09	12
+199	val_199	2008-04-09	12
+200	val_200	2008-04-09	12
+200	val_200	2008-04-09	12
+201	val_201	2008-04-09	12
+202	val_202	2008-04-09	12
+203	val_203	2008-04-09	12
+203	val_203	2008-04-09	12
+205	val_205	2008-04-09	12
+205	val_205	2008-04-09	12
+207	val_207	2008-04-09	12
+207	val_207	2008-04-09	12
+208	val_208	2008-04-09	12
+208	val_208	2008-04-09	12
+208	val_208	2008-04-09	12
+209	val_209	2008-04-09	12
+209	val_209	2008-04-09	12
+213	val_213	2008-04-09	12
+213	val_213	2008-04-09	12
+214	val_214	2008-04-09	12
+216	val_216	2008-04-09	12
+216	val_216	2008-04-09	12
+217	val_217	2008-04-09	12
+217	val_217	2008-04-09	12
+218	val_218	2008-04-09	12
+219	val_219	2008-04-09	12
+219	val_219	2008-04-09	12
+221	val_221	2008-04-09	12
+221	val_221	2008-04-09	12
+222	val_222	2008-04-09	12
+223	val_223	2008-04-09	12
+223	val_223	2008-04-09	12
+224	val_224	2008-04-09	12
+224	val_224	2008-04-09	12
+226	val_226	2008-04-09	12
+228	val_228	2008-04-09	12
+229	val_229	2008-04-09	12
+229	val_229	2008-04-09	12
+230	val_230	2008-04-09	12
+230	val_230	2008-04-09	12
+230	val_230	2008-04-09	12
+230	val_230	2008-04-09	12
+230	val_230	2008-04-09	12
+233	val_233	2008-04-09	12
+233	val_233	2008-04-09	12
+235	val_235	2008-04-09	12
+237	val_237	2008-04-09	12
+237	val_237	2008-04-09	12
+238	val_238	2008-04-09	12
+238	val_238	2008-04-09	12
+239	val_239	2008-04-09	12
+239	val_239	2008-04-09	12
+241	val_241	2008-04-09	12
+242	val_242	2008-04-09	12
+242	val_242	2008-04-09	12
+244	val_244	2008-04-09	12
+247	val_247	2008-04-09	12
+248	val_248	2008-04-09	12
+249	val_249	2008-04-09	12
+252	val_252	2008-04-09	12
+255	val_255	2008-04-09	12
+255	val_255	2008-04-09	12
+256	val_256	2008-04-09	12
+256	val_256	2008-04-09	12
+257	val_257	2008-04-09	12
+258	val_258	2008-04-09	12
+260	val_260	2008-04-09	12
+262	val_262	2008-04-09	12
+263	val_263	2008-04-09	12
+265	val_265	2008-04-09	12
+265	val_265	2008-04-09	12
+266	val_266	2008-04-09	12
+272	val_272	2008-04-09	12
+272	val_272	2008-04-09	12
+273	val_273	2008-04-09	12
+273	val_273	2008-04-09	12
+273	val_273	2008-04-09	12
+274	val_274	2008-04-09	12
+275	val_275	2008-04-09	12
+277	val_277	2008-04-09	12
+277	val_277	2008-04-09	12
+277	val_277	2008-04-09	12
+277	val_277	2008-04-09	12
+278	val_278	2008-04-09	12
+278	val_278	2008-04-09	12
+280	val_280	2008-04-09	12
+280	val_280	2008-04-09	12
+281	val_281	2008-04-09	12
+281	val_281	2008-04-09	12
+282	val_282	2008-04-09	12
+282	val_282	2008-04-09	12
+283	val_283	2008-04-09	12
+284	val_284	2008-04-09	12
+285	val_285	2008-04-09	12
+286	val_286	2008-04-09	12
+287	val_287	2008-04-09	12
+288	val_288	2008-04-09	12
+288	val_288	2008-04-09	12
+289	val_289	2008-04-09	12
+291	val_291	2008-04-09	12
+292	val_292	2008-04-09	12
+296	val_296	2008-04-09	12
+298	val_298	2008-04-09	12
+298	val_298	2008-04-09	12
+298	val_298	2008-04-09	12
+302	val_302	2008-04-09	12
+305	val_305	2008-04-09	12
+306	val_306	2008-04-09	12
+307	val_307	2008-04-09	12
+307	val_307	2008-04-09	12
+308	val_308	2008-04-09	12
+309	val_309	2008-04-09	12
+309	val_309	2008-04-09	12
+310	val_310	2008-04-09	12
+311	val_311	2008-04-09	12
+311	val_311	2008-04-09	12
+311	val_311	2008-04-09	12
+315	val_315	2008-04-09	12
+316	val_316	2008-04-09	12
+316	val_316	2008-04-09	12
+316	val_316	2008-04-09	12
+317	val_317	2008-04-09	12
+317	val_317	2008-04-09	12
+318	val_318	2008-04-09	12
+318	val_318	2008-04-09	12
+318	val_318	2008-04-09	12
+321	val_321	2008-04-09	12
+321	val_321	2008-04-09	12
+322	val_322	2008-04-09	12
+322	val_322	2008-04-09	12
+323	val_323	2008-04-09	12
+325	val_325	2008-04-09	12
+325	val_325	2008-04-09	12
+327	val_327	2008-04-09	12
+327	val_327	2008-04-09	12
+327	val_327	2008-04-09	12
+331	val_331	2008-04-09	12
+331	val_331	2008-04-09	12
+332	val_332	2008-04-09	12
+333	val_333	2008-04-09	12
+333	val_333	2008-04-09	12
+335	val_335	2008-04-09	12
+336	val_336	2008-04-09	12
+338	val_338	2008-04-09	12
+339	val_339	2008-04-09	12
+341	val_341	2008-04-09	12
+342	val_342	2008-04-09	12
+342	val_342	2008-04-09	12
+344	val_344	2008-04-09	12
+344	val_344	2008-04-09	12
+345	val_345	2008-04-09	12
+348	val_348	2008-04-09	12
+348	val_348	2008-04-09	12
+348	val_348	2008-04-09	12
+348	val_348	2008-04-09	12
+348	val_348	2008-04-09	12
+351	val_351	2008-04-09	12
+353	val_353	2008-04-09	12
+353	val_353	2008-04-09	12
+356	val_356	2008-04-09	12
+360	val_360	2008-04-09	12
+362	val_362	2008-04-09	12
+364	val_364	2008-04-09	12
+365	val_365	2008-04-09	12
+366	val_366	2008-04-09	12
+367	val_367	2008-04-09	12
+367	val_367	2008-04-09	12
+368	val_368	2008-04-09	12
+369	val_369	2008-04-09	12
+369	val_369	2008-04-09	12
+369	val_369	2008-04-09	12
+373	val_373	2008-04-09	12
+374	val_374	2008-04-09	12
+375	val_375	2008-04-09	12
+377	val_377	2008-04-09	12
+378	val_378	2008-04-09	12
+379	val_379	2008-04-09	12
+382	val_382	2008-04-09	12
+382	val_382	2008-04-09	12
+384	val_384	2008-04-09	12
+384	val_384	2008-04-09	12
+384	val_384	2008-04-09	12
+386	val_386	2008-04-09	12
+389	val_389	2008-04-09	12
+392	val_392	2008-04-09	12
+393	val_393	2008-04-09	12
+394	val_394	2008-04-09	12
+395	val_395	2008-04-09	12
+395	val_395	2008-04-09	12
+396	val_396	2008-04-09	12
+396	val_396	2008-04-09	12
+396	val_396	2008-04-09	12
+397	val_397	2008-04-09	12
+397	val_397	2008-04-09	12
+399	val_399	2008-04-09	12
+399	val_399	2008-04-09	12
+400	val_400	2008-04-09	12
+401	val_401	2008-04-09	12
+401	val_401	2008-04-09	12
+401	val_401	2008-04-09	12
+401	val_401	2008-04-09	12
+401	val_401	2008-04-09	12
+402	val_402	2008-04-09	12
+403	val_403	2008-04-09	12
+403	val_403	2008-04-09	12
+403	val_403	2008-04-09	12
+404	val_404	2008-04-09	12
+404	val_404	2008-04-09	12
+406	val_406	2008-04-09	12
+406	val_406	2008-04-09	12
+406	val_406	2008-04-09	12
+406	val_406	2008-04-09	12
+407	val_407	2008-04-09	12
+409	val_409	2008-04-09	12
+409	val_409	2008-04-09	12
+409	val_409	2008-04-09	12
+411	val_411	2008-04-09	12
+413	val_413	2008-04-09	12
+413	val_413	2008-04-09	12
+414	val_414	2008-04-09	12
+414	val_414	2008-04-09	12
+417	val_417	2008-04-09	12
+417	val_417	2008-04-09	12
+417	val_417	2008-04-09	12
+418	val_418	2008-04-09	12
+419	val_419	2008-04-09	12
+421	val_421	2008-04-09	12
+424	val_424	2008-04-09	12
+424	val_424	2008-04-09	12
+427	val_427	2008-04-09	12
+429	val_429	2008-04-09	12
+429	val_429	2008-04-09	12
+430	val_430	2008-04-09	12
+430	val_430	2008-04-09	12
+430	val_430	2008-04-09	12
+431	val_431	2008-04-09	12
+431	val_431	2008-04-09	12
+431	val_431	2008-04-09	12
+432	val_432	2008-04-09	12
+435	val_435	2008-04-09	12
+436	val_436	2008-04-09	12
+437	val_437	2008-04-09	12
+438	val_438	2008-04-09	12
+438	val_438	2008-04-09	12
+438	val_438	2008-04-09	12
+439	val_439	2008-04-09	12
+439	val_439	2008-04-09	12
+443	val_443	2008-04-09	12
+444	val_444	2008-04-09	12
+446	val_446	2008-04-09	12
+448	val_448	2008-04-09	12
+449	val_449	2008-04-09	12
+452	val_452	2008-04-09	12
+453	val_453	2008-04-09	12
+454	val_454	2008-04-09	12
+454	val_454	2008-04-09	12
+454	val_454	2008-04-09	12
+455	val_455	2008-04-09	12
+457	val_457	2008-04-09	12
+458	val_458	2008-04-09	12
+458	val_458	2008-04-09	12
+459	val_459	2008-04-09	12
+459	val_459	2008-04-09	12
+460	val_460	2008-04-09	12
+462	val_462	2008-04-09	12
+462	val_462	2008-04-09	12
+463	val_463	2008-04-09	12
+463	val_463	2008-04-09	12
+466	val_466	2008-04-09	12
+466	val_466	2008-04-09	12
+466	val_466	2008-04-09	12
+467	val_467	2008-04-09	12
+468	val_468	2008-04-09	12
+468	val_468	2008-04-09	12
+468	val_468	2008-04-09	12
+468	val_468	2008-04-09	12
+469	val_469	2008-04-09	12
+469	val_469	2008-04-09	12
+469	val_469	2008-04-09	12
+469	val_469	2008-04-09	12
+469	val_469	2008-04-09	12
+470	val_470	2008-04-09	12
+472	val_472	2008-04-09	12
+475	val_475	2008-04-09	12
+477	val_477	2008-04-09	12
+478	val_478	2008-04-09	12
+478	val_478	2008-04-09	12
+479	val_479	2008-04-09	12
+480	val_480	2008-04-09	12
+480	val_480	2008-04-09	12
+480	val_480	2008-04-09	12
+481	val_481	2008-04-09	12
+482	val_482	2008-04-09	12
+483	val_483	2008-04-09	12
+484	val_484	2008-04-09	12
+485	val_485	2008-04-09	12
+487	val_487	2008-04-09	12
+489	val_489	2008-04-09	12
+489	val_489	2008-04-09	12
+489	val_489	2008-04-09	12
+489	val_489	2008-04-09	12
+490	val_490	2008-04-09	12
+491	val_491	2008-04-09	12
+492	val_492	2008-04-09	12
+492	val_492	2008-04-09	12
+493	val_493	2008-04-09	12
+494	val_494	2008-04-09	12
+495	val_495	2008-04-09	12
+496	val_496	2008-04-09	12
+497	val_497	2008-04-09	12
+498	val_498	2008-04-09	12
+498	val_498	2008-04-09	12
+498	val_498	2008-04-09	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part4-2-9893d8c02ab8bd59d3e9fb0665f508bd b/sql/hive/src/test/resources/golden/load_dyn_part4-2-9893d8c02ab8bd59d3e9fb0665f508bd
index 8017948fc5f2f..3951005f24d2f 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part4-2-9893d8c02ab8bd59d3e9fb0665f508bd
+++ b/sql/hive/src/test/resources/golden/load_dyn_part4-2-9893d8c02ab8bd59d3e9fb0665f508bd
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part4, dbName:default, owner:marmbrus, createTime:1390899619, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part4, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{p3=v3, transient_lastDdlTime=1390899619}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:nzhang_part4, dbName:default, owner:marmbrus, createTime:1413887619, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part4, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887619}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part5-1-9a4d1f6a14227bb66bd01557a464da8b b/sql/hive/src/test/resources/golden/load_dyn_part5-1-9a4d1f6a14227bb66bd01557a464da8b
index 3f5f6b6f83715..6f2ec09483989 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part5-1-9a4d1f6a14227bb66bd01557a464da8b
+++ b/sql/hive/src/test/resources/golden/load_dyn_part5-1-9a4d1f6a14227bb66bd01557a464da8b
@@ -1,9 +1,9 @@
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-value               	string              	None                
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part5, dbName:default, owner:marmbrus, createTime:1390899637, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part5, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:value, type:string, comment:null)], parameters:{p3=v3, transient_lastDdlTime=1390899637}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:nzhang_part5, dbName:default, owner:marmbrus, createTime:1413887658, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part5, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:value, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887658}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part6-2-779aa345cf2875089312ec26b046415d b/sql/hive/src/test/resources/golden/load_dyn_part6-2-779aa345cf2875089312ec26b046415d
index 4b9e5b31f4401..3d4fc3e4d1fb1 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part6-2-779aa345cf2875089312ec26b046415d
+++ b/sql/hive/src/test/resources/golden/load_dyn_part6-2-779aa345cf2875089312ec26b046415d
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part6, dbName:default, owner:marmbrus, createTime:1390899654, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part6, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{p3=v3, transient_lastDdlTime=1390899654}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:nzhang_part6, dbName:default, owner:marmbrus, createTime:1413887681, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part6, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887681}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part7-2-5d4c3c48f53d55e26ca142ee70d1706a b/sql/hive/src/test/resources/golden/load_dyn_part7-2-5d4c3c48f53d55e26ca142ee70d1706a
index d62156d392d5e..be50a6974cd82 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part7-2-5d4c3c48f53d55e26ca142ee70d1706a
+++ b/sql/hive/src/test/resources/golden/load_dyn_part7-2-5d4c3c48f53d55e26ca142ee70d1706a
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part7, dbName:default, owner:marmbrus, createTime:1390899664, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part7, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{p3=v3, transient_lastDdlTime=1390899664}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:nzhang_part7, dbName:default, owner:marmbrus, createTime:1413887689, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part7, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887689}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part8-2-9e1df41acabef83f62464f52c2396c8a b/sql/hive/src/test/resources/golden/load_dyn_part8-2-9e1df41acabef83f62464f52c2396c8a
index 91fa51871a4b8..5496973d9b2eb 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part8-2-9e1df41acabef83f62464f52c2396c8a
+++ b/sql/hive/src/test/resources/golden/load_dyn_part8-2-9e1df41acabef83f62464f52c2396c8a
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part8, dbName:default, owner:marmbrus, createTime:1390899674, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part8, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{p3=v3, transient_lastDdlTime=1390899674}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:nzhang_part8, dbName:default, owner:marmbrus, createTime:1413887697, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part8, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887697}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part8-9-704bd110d9aaa2ac678b7fbf645abdb9 b/sql/hive/src/test/resources/golden/load_dyn_part8-9-704bd110d9aaa2ac678b7fbf645abdb9
new file mode 100644
index 0000000000000..c59426f6c153e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/load_dyn_part8-9-704bd110d9aaa2ac678b7fbf645abdb9
@@ -0,0 +1,2000 @@
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+2	val_2	2008-04-08	11
+4	val_4	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+8	val_8	2008-04-08	11
+9	val_9	2008-04-08	11
+10	val_10	2008-04-08	11
+11	val_11	2008-04-08	11
+12	val_12	2008-04-08	11
+12	val_12	2008-04-08	11
+15	val_15	2008-04-08	11
+15	val_15	2008-04-08	11
+17	val_17	2008-04-08	11
+18	val_18	2008-04-08	11
+18	val_18	2008-04-08	11
+19	val_19	2008-04-08	11
+20	val_20	2008-04-08	11
+24	val_24	2008-04-08	11
+24	val_24	2008-04-08	11
+26	val_26	2008-04-08	11
+26	val_26	2008-04-08	11
+27	val_27	2008-04-08	11
+28	val_28	2008-04-08	11
+30	val_30	2008-04-08	11
+33	val_33	2008-04-08	11
+34	val_34	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+37	val_37	2008-04-08	11
+37	val_37	2008-04-08	11
+41	val_41	2008-04-08	11
+42	val_42	2008-04-08	11
+42	val_42	2008-04-08	11
+43	val_43	2008-04-08	11
+44	val_44	2008-04-08	11
+47	val_47	2008-04-08	11
+51	val_51	2008-04-08	11
+51	val_51	2008-04-08	11
+53	val_53	2008-04-08	11
+54	val_54	2008-04-08	11
+57	val_57	2008-04-08	11
+58	val_58	2008-04-08	11
+58	val_58	2008-04-08	11
+64	val_64	2008-04-08	11
+65	val_65	2008-04-08	11
+66	val_66	2008-04-08	11
+67	val_67	2008-04-08	11
+67	val_67	2008-04-08	11
+69	val_69	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+72	val_72	2008-04-08	11
+72	val_72	2008-04-08	11
+74	val_74	2008-04-08	11
+76	val_76	2008-04-08	11
+76	val_76	2008-04-08	11
+77	val_77	2008-04-08	11
+78	val_78	2008-04-08	11
+80	val_80	2008-04-08	11
+82	val_82	2008-04-08	11
+83	val_83	2008-04-08	11
+83	val_83	2008-04-08	11
+84	val_84	2008-04-08	11
+84	val_84	2008-04-08	11
+85	val_85	2008-04-08	11
+86	val_86	2008-04-08	11
+87	val_87	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+92	val_92	2008-04-08	11
+95	val_95	2008-04-08	11
+95	val_95	2008-04-08	11
+96	val_96	2008-04-08	11
+97	val_97	2008-04-08	11
+97	val_97	2008-04-08	11
+98	val_98	2008-04-08	11
+98	val_98	2008-04-08	11
+100	val_100	2008-04-08	11
+100	val_100	2008-04-08	11
+103	val_103	2008-04-08	11
+103	val_103	2008-04-08	11
+104	val_104	2008-04-08	11
+104	val_104	2008-04-08	11
+105	val_105	2008-04-08	11
+111	val_111	2008-04-08	11
+113	val_113	2008-04-08	11
+113	val_113	2008-04-08	11
+114	val_114	2008-04-08	11
+116	val_116	2008-04-08	11
+118	val_118	2008-04-08	11
+118	val_118	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+120	val_120	2008-04-08	11
+120	val_120	2008-04-08	11
+125	val_125	2008-04-08	11
+125	val_125	2008-04-08	11
+126	val_126	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+129	val_129	2008-04-08	11
+129	val_129	2008-04-08	11
+131	val_131	2008-04-08	11
+133	val_133	2008-04-08	11
+134	val_134	2008-04-08	11
+134	val_134	2008-04-08	11
+136	val_136	2008-04-08	11
+137	val_137	2008-04-08	11
+137	val_137	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+143	val_143	2008-04-08	11
+145	val_145	2008-04-08	11
+146	val_146	2008-04-08	11
+146	val_146	2008-04-08	11
+149	val_149	2008-04-08	11
+149	val_149	2008-04-08	11
+150	val_150	2008-04-08	11
+152	val_152	2008-04-08	11
+152	val_152	2008-04-08	11
+153	val_153	2008-04-08	11
+155	val_155	2008-04-08	11
+156	val_156	2008-04-08	11
+157	val_157	2008-04-08	11
+158	val_158	2008-04-08	11
+160	val_160	2008-04-08	11
+162	val_162	2008-04-08	11
+163	val_163	2008-04-08	11
+164	val_164	2008-04-08	11
+164	val_164	2008-04-08	11
+165	val_165	2008-04-08	11
+165	val_165	2008-04-08	11
+166	val_166	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+168	val_168	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+170	val_170	2008-04-08	11
+172	val_172	2008-04-08	11
+172	val_172	2008-04-08	11
+174	val_174	2008-04-08	11
+174	val_174	2008-04-08	11
+175	val_175	2008-04-08	11
+175	val_175	2008-04-08	11
+176	val_176	2008-04-08	11
+176	val_176	2008-04-08	11
+177	val_177	2008-04-08	11
+178	val_178	2008-04-08	11
+179	val_179	2008-04-08	11
+179	val_179	2008-04-08	11
+180	val_180	2008-04-08	11
+181	val_181	2008-04-08	11
+183	val_183	2008-04-08	11
+186	val_186	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+189	val_189	2008-04-08	11
+190	val_190	2008-04-08	11
+191	val_191	2008-04-08	11
+191	val_191	2008-04-08	11
+192	val_192	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+194	val_194	2008-04-08	11
+195	val_195	2008-04-08	11
+195	val_195	2008-04-08	11
+196	val_196	2008-04-08	11
+197	val_197	2008-04-08	11
+197	val_197	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+200	val_200	2008-04-08	11
+200	val_200	2008-04-08	11
+201	val_201	2008-04-08	11
+202	val_202	2008-04-08	11
+203	val_203	2008-04-08	11
+203	val_203	2008-04-08	11
+205	val_205	2008-04-08	11
+205	val_205	2008-04-08	11
+207	val_207	2008-04-08	11
+207	val_207	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+209	val_209	2008-04-08	11
+209	val_209	2008-04-08	11
+213	val_213	2008-04-08	11
+213	val_213	2008-04-08	11
+214	val_214	2008-04-08	11
+216	val_216	2008-04-08	11
+216	val_216	2008-04-08	11
+217	val_217	2008-04-08	11
+217	val_217	2008-04-08	11
+218	val_218	2008-04-08	11
+219	val_219	2008-04-08	11
+219	val_219	2008-04-08	11
+221	val_221	2008-04-08	11
+221	val_221	2008-04-08	11
+222	val_222	2008-04-08	11
+223	val_223	2008-04-08	11
+223	val_223	2008-04-08	11
+224	val_224	2008-04-08	11
+224	val_224	2008-04-08	11
+226	val_226	2008-04-08	11
+228	val_228	2008-04-08	11
+229	val_229	2008-04-08	11
+229	val_229	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+233	val_233	2008-04-08	11
+233	val_233	2008-04-08	11
+235	val_235	2008-04-08	11
+237	val_237	2008-04-08	11
+237	val_237	2008-04-08	11
+238	val_238	2008-04-08	11
+238	val_238	2008-04-08	11
+239	val_239	2008-04-08	11
+239	val_239	2008-04-08	11
+241	val_241	2008-04-08	11
+242	val_242	2008-04-08	11
+242	val_242	2008-04-08	11
+244	val_244	2008-04-08	11
+247	val_247	2008-04-08	11
+248	val_248	2008-04-08	11
+249	val_249	2008-04-08	11
+252	val_252	2008-04-08	11
+255	val_255	2008-04-08	11
+255	val_255	2008-04-08	11
+256	val_256	2008-04-08	11
+256	val_256	2008-04-08	11
+257	val_257	2008-04-08	11
+258	val_258	2008-04-08	11
+260	val_260	2008-04-08	11
+262	val_262	2008-04-08	11
+263	val_263	2008-04-08	11
+265	val_265	2008-04-08	11
+265	val_265	2008-04-08	11
+266	val_266	2008-04-08	11
+272	val_272	2008-04-08	11
+272	val_272	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+274	val_274	2008-04-08	11
+275	val_275	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+278	val_278	2008-04-08	11
+278	val_278	2008-04-08	11
+280	val_280	2008-04-08	11
+280	val_280	2008-04-08	11
+281	val_281	2008-04-08	11
+281	val_281	2008-04-08	11
+282	val_282	2008-04-08	11
+282	val_282	2008-04-08	11
+283	val_283	2008-04-08	11
+284	val_284	2008-04-08	11
+285	val_285	2008-04-08	11
+286	val_286	2008-04-08	11
+287	val_287	2008-04-08	11
+288	val_288	2008-04-08	11
+288	val_288	2008-04-08	11
+289	val_289	2008-04-08	11
+291	val_291	2008-04-08	11
+292	val_292	2008-04-08	11
+296	val_296	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+302	val_302	2008-04-08	11
+305	val_305	2008-04-08	11
+306	val_306	2008-04-08	11
+307	val_307	2008-04-08	11
+307	val_307	2008-04-08	11
+308	val_308	2008-04-08	11
+309	val_309	2008-04-08	11
+309	val_309	2008-04-08	11
+310	val_310	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+315	val_315	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+317	val_317	2008-04-08	11
+317	val_317	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+321	val_321	2008-04-08	11
+321	val_321	2008-04-08	11
+322	val_322	2008-04-08	11
+322	val_322	2008-04-08	11
+323	val_323	2008-04-08	11
+325	val_325	2008-04-08	11
+325	val_325	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+331	val_331	2008-04-08	11
+331	val_331	2008-04-08	11
+332	val_332	2008-04-08	11
+333	val_333	2008-04-08	11
+333	val_333	2008-04-08	11
+335	val_335	2008-04-08	11
+336	val_336	2008-04-08	11
+338	val_338	2008-04-08	11
+339	val_339	2008-04-08	11
+341	val_341	2008-04-08	11
+342	val_342	2008-04-08	11
+342	val_342	2008-04-08	11
+344	val_344	2008-04-08	11
+344	val_344	2008-04-08	11
+345	val_345	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+351	val_351	2008-04-08	11
+353	val_353	2008-04-08	11
+353	val_353	2008-04-08	11
+356	val_356	2008-04-08	11
+360	val_360	2008-04-08	11
+362	val_362	2008-04-08	11
+364	val_364	2008-04-08	11
+365	val_365	2008-04-08	11
+366	val_366	2008-04-08	11
+367	val_367	2008-04-08	11
+367	val_367	2008-04-08	11
+368	val_368	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+373	val_373	2008-04-08	11
+374	val_374	2008-04-08	11
+375	val_375	2008-04-08	11
+377	val_377	2008-04-08	11
+378	val_378	2008-04-08	11
+379	val_379	2008-04-08	11
+382	val_382	2008-04-08	11
+382	val_382	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+386	val_386	2008-04-08	11
+389	val_389	2008-04-08	11
+392	val_392	2008-04-08	11
+393	val_393	2008-04-08	11
+394	val_394	2008-04-08	11
+395	val_395	2008-04-08	11
+395	val_395	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+397	val_397	2008-04-08	11
+397	val_397	2008-04-08	11
+399	val_399	2008-04-08	11
+399	val_399	2008-04-08	11
+400	val_400	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+402	val_402	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+404	val_404	2008-04-08	11
+404	val_404	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+407	val_407	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+411	val_411	2008-04-08	11
+413	val_413	2008-04-08	11
+413	val_413	2008-04-08	11
+414	val_414	2008-04-08	11
+414	val_414	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+418	val_418	2008-04-08	11
+419	val_419	2008-04-08	11
+421	val_421	2008-04-08	11
+424	val_424	2008-04-08	11
+424	val_424	2008-04-08	11
+427	val_427	2008-04-08	11
+429	val_429	2008-04-08	11
+429	val_429	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+432	val_432	2008-04-08	11
+435	val_435	2008-04-08	11
+436	val_436	2008-04-08	11
+437	val_437	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+439	val_439	2008-04-08	11
+439	val_439	2008-04-08	11
+443	val_443	2008-04-08	11
+444	val_444	2008-04-08	11
+446	val_446	2008-04-08	11
+448	val_448	2008-04-08	11
+449	val_449	2008-04-08	11
+452	val_452	2008-04-08	11
+453	val_453	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+455	val_455	2008-04-08	11
+457	val_457	2008-04-08	11
+458	val_458	2008-04-08	11
+458	val_458	2008-04-08	11
+459	val_459	2008-04-08	11
+459	val_459	2008-04-08	11
+460	val_460	2008-04-08	11
+462	val_462	2008-04-08	11
+462	val_462	2008-04-08	11
+463	val_463	2008-04-08	11
+463	val_463	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+467	val_467	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+470	val_470	2008-04-08	11
+472	val_472	2008-04-08	11
+475	val_475	2008-04-08	11
+477	val_477	2008-04-08	11
+478	val_478	2008-04-08	11
+478	val_478	2008-04-08	11
+479	val_479	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+481	val_481	2008-04-08	11
+482	val_482	2008-04-08	11
+483	val_483	2008-04-08	11
+484	val_484	2008-04-08	11
+485	val_485	2008-04-08	11
+487	val_487	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+490	val_490	2008-04-08	11
+491	val_491	2008-04-08	11
+492	val_492	2008-04-08	11
+492	val_492	2008-04-08	11
+493	val_493	2008-04-08	11
+494	val_494	2008-04-08	11
+495	val_495	2008-04-08	11
+496	val_496	2008-04-08	11
+497	val_497	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+2	val_2	2008-04-08	12
+4	val_4	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+8	val_8	2008-04-08	12
+9	val_9	2008-04-08	12
+10	val_10	2008-04-08	12
+11	val_11	2008-04-08	12
+12	val_12	2008-04-08	12
+12	val_12	2008-04-08	12
+15	val_15	2008-04-08	12
+15	val_15	2008-04-08	12
+17	val_17	2008-04-08	12
+18	val_18	2008-04-08	12
+18	val_18	2008-04-08	12
+19	val_19	2008-04-08	12
+20	val_20	2008-04-08	12
+24	val_24	2008-04-08	12
+24	val_24	2008-04-08	12
+26	val_26	2008-04-08	12
+26	val_26	2008-04-08	12
+27	val_27	2008-04-08	12
+28	val_28	2008-04-08	12
+30	val_30	2008-04-08	12
+33	val_33	2008-04-08	12
+34	val_34	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+37	val_37	2008-04-08	12
+37	val_37	2008-04-08	12
+41	val_41	2008-04-08	12
+42	val_42	2008-04-08	12
+42	val_42	2008-04-08	12
+43	val_43	2008-04-08	12
+44	val_44	2008-04-08	12
+47	val_47	2008-04-08	12
+51	val_51	2008-04-08	12
+51	val_51	2008-04-08	12
+53	val_53	2008-04-08	12
+54	val_54	2008-04-08	12
+57	val_57	2008-04-08	12
+58	val_58	2008-04-08	12
+58	val_58	2008-04-08	12
+64	val_64	2008-04-08	12
+65	val_65	2008-04-08	12
+66	val_66	2008-04-08	12
+67	val_67	2008-04-08	12
+67	val_67	2008-04-08	12
+69	val_69	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+72	val_72	2008-04-08	12
+72	val_72	2008-04-08	12
+74	val_74	2008-04-08	12
+76	val_76	2008-04-08	12
+76	val_76	2008-04-08	12
+77	val_77	2008-04-08	12
+78	val_78	2008-04-08	12
+80	val_80	2008-04-08	12
+82	val_82	2008-04-08	12
+83	val_83	2008-04-08	12
+83	val_83	2008-04-08	12
+84	val_84	2008-04-08	12
+84	val_84	2008-04-08	12
+85	val_85	2008-04-08	12
+86	val_86	2008-04-08	12
+87	val_87	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+92	val_92	2008-04-08	12
+95	val_95	2008-04-08	12
+95	val_95	2008-04-08	12
+96	val_96	2008-04-08	12
+97	val_97	2008-04-08	12
+97	val_97	2008-04-08	12
+98	val_98	2008-04-08	12
+98	val_98	2008-04-08	12
+100	val_100	2008-04-08	12
+100	val_100	2008-04-08	12
+103	val_103	2008-04-08	12
+103	val_103	2008-04-08	12
+104	val_104	2008-04-08	12
+104	val_104	2008-04-08	12
+105	val_105	2008-04-08	12
+111	val_111	2008-04-08	12
+113	val_113	2008-04-08	12
+113	val_113	2008-04-08	12
+114	val_114	2008-04-08	12
+116	val_116	2008-04-08	12
+118	val_118	2008-04-08	12
+118	val_118	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+120	val_120	2008-04-08	12
+120	val_120	2008-04-08	12
+125	val_125	2008-04-08	12
+125	val_125	2008-04-08	12
+126	val_126	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+129	val_129	2008-04-08	12
+129	val_129	2008-04-08	12
+131	val_131	2008-04-08	12
+133	val_133	2008-04-08	12
+134	val_134	2008-04-08	12
+134	val_134	2008-04-08	12
+136	val_136	2008-04-08	12
+137	val_137	2008-04-08	12
+137	val_137	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+143	val_143	2008-04-08	12
+145	val_145	2008-04-08	12
+146	val_146	2008-04-08	12
+146	val_146	2008-04-08	12
+149	val_149	2008-04-08	12
+149	val_149	2008-04-08	12
+150	val_150	2008-04-08	12
+152	val_152	2008-04-08	12
+152	val_152	2008-04-08	12
+153	val_153	2008-04-08	12
+155	val_155	2008-04-08	12
+156	val_156	2008-04-08	12
+157	val_157	2008-04-08	12
+158	val_158	2008-04-08	12
+160	val_160	2008-04-08	12
+162	val_162	2008-04-08	12
+163	val_163	2008-04-08	12
+164	val_164	2008-04-08	12
+164	val_164	2008-04-08	12
+165	val_165	2008-04-08	12
+165	val_165	2008-04-08	12
+166	val_166	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+168	val_168	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+170	val_170	2008-04-08	12
+172	val_172	2008-04-08	12
+172	val_172	2008-04-08	12
+174	val_174	2008-04-08	12
+174	val_174	2008-04-08	12
+175	val_175	2008-04-08	12
+175	val_175	2008-04-08	12
+176	val_176	2008-04-08	12
+176	val_176	2008-04-08	12
+177	val_177	2008-04-08	12
+178	val_178	2008-04-08	12
+179	val_179	2008-04-08	12
+179	val_179	2008-04-08	12
+180	val_180	2008-04-08	12
+181	val_181	2008-04-08	12
+183	val_183	2008-04-08	12
+186	val_186	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+189	val_189	2008-04-08	12
+190	val_190	2008-04-08	12
+191	val_191	2008-04-08	12
+191	val_191	2008-04-08	12
+192	val_192	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+194	val_194	2008-04-08	12
+195	val_195	2008-04-08	12
+195	val_195	2008-04-08	12
+196	val_196	2008-04-08	12
+197	val_197	2008-04-08	12
+197	val_197	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+200	val_200	2008-04-08	12
+200	val_200	2008-04-08	12
+201	val_201	2008-04-08	12
+202	val_202	2008-04-08	12
+203	val_203	2008-04-08	12
+203	val_203	2008-04-08	12
+205	val_205	2008-04-08	12
+205	val_205	2008-04-08	12
+207	val_207	2008-04-08	12
+207	val_207	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+209	val_209	2008-04-08	12
+209	val_209	2008-04-08	12
+213	val_213	2008-04-08	12
+213	val_213	2008-04-08	12
+214	val_214	2008-04-08	12
+216	val_216	2008-04-08	12
+216	val_216	2008-04-08	12
+217	val_217	2008-04-08	12
+217	val_217	2008-04-08	12
+218	val_218	2008-04-08	12
+219	val_219	2008-04-08	12
+219	val_219	2008-04-08	12
+221	val_221	2008-04-08	12
+221	val_221	2008-04-08	12
+222	val_222	2008-04-08	12
+223	val_223	2008-04-08	12
+223	val_223	2008-04-08	12
+224	val_224	2008-04-08	12
+224	val_224	2008-04-08	12
+226	val_226	2008-04-08	12
+228	val_228	2008-04-08	12
+229	val_229	2008-04-08	12
+229	val_229	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+233	val_233	2008-04-08	12
+233	val_233	2008-04-08	12
+235	val_235	2008-04-08	12
+237	val_237	2008-04-08	12
+237	val_237	2008-04-08	12
+238	val_238	2008-04-08	12
+238	val_238	2008-04-08	12
+239	val_239	2008-04-08	12
+239	val_239	2008-04-08	12
+241	val_241	2008-04-08	12
+242	val_242	2008-04-08	12
+242	val_242	2008-04-08	12
+244	val_244	2008-04-08	12
+247	val_247	2008-04-08	12
+248	val_248	2008-04-08	12
+249	val_249	2008-04-08	12
+252	val_252	2008-04-08	12
+255	val_255	2008-04-08	12
+255	val_255	2008-04-08	12
+256	val_256	2008-04-08	12
+256	val_256	2008-04-08	12
+257	val_257	2008-04-08	12
+258	val_258	2008-04-08	12
+260	val_260	2008-04-08	12
+262	val_262	2008-04-08	12
+263	val_263	2008-04-08	12
+265	val_265	2008-04-08	12
+265	val_265	2008-04-08	12
+266	val_266	2008-04-08	12
+272	val_272	2008-04-08	12
+272	val_272	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+274	val_274	2008-04-08	12
+275	val_275	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+278	val_278	2008-04-08	12
+278	val_278	2008-04-08	12
+280	val_280	2008-04-08	12
+280	val_280	2008-04-08	12
+281	val_281	2008-04-08	12
+281	val_281	2008-04-08	12
+282	val_282	2008-04-08	12
+282	val_282	2008-04-08	12
+283	val_283	2008-04-08	12
+284	val_284	2008-04-08	12
+285	val_285	2008-04-08	12
+286	val_286	2008-04-08	12
+287	val_287	2008-04-08	12
+288	val_288	2008-04-08	12
+288	val_288	2008-04-08	12
+289	val_289	2008-04-08	12
+291	val_291	2008-04-08	12
+292	val_292	2008-04-08	12
+296	val_296	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+302	val_302	2008-04-08	12
+305	val_305	2008-04-08	12
+306	val_306	2008-04-08	12
+307	val_307	2008-04-08	12
+307	val_307	2008-04-08	12
+308	val_308	2008-04-08	12
+309	val_309	2008-04-08	12
+309	val_309	2008-04-08	12
+310	val_310	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+315	val_315	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+317	val_317	2008-04-08	12
+317	val_317	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+321	val_321	2008-04-08	12
+321	val_321	2008-04-08	12
+322	val_322	2008-04-08	12
+322	val_322	2008-04-08	12
+323	val_323	2008-04-08	12
+325	val_325	2008-04-08	12
+325	val_325	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+331	val_331	2008-04-08	12
+331	val_331	2008-04-08	12
+332	val_332	2008-04-08	12
+333	val_333	2008-04-08	12
+333	val_333	2008-04-08	12
+335	val_335	2008-04-08	12
+336	val_336	2008-04-08	12
+338	val_338	2008-04-08	12
+339	val_339	2008-04-08	12
+341	val_341	2008-04-08	12
+342	val_342	2008-04-08	12
+342	val_342	2008-04-08	12
+344	val_344	2008-04-08	12
+344	val_344	2008-04-08	12
+345	val_345	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+351	val_351	2008-04-08	12
+353	val_353	2008-04-08	12
+353	val_353	2008-04-08	12
+356	val_356	2008-04-08	12
+360	val_360	2008-04-08	12
+362	val_362	2008-04-08	12
+364	val_364	2008-04-08	12
+365	val_365	2008-04-08	12
+366	val_366	2008-04-08	12
+367	val_367	2008-04-08	12
+367	val_367	2008-04-08	12
+368	val_368	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+373	val_373	2008-04-08	12
+374	val_374	2008-04-08	12
+375	val_375	2008-04-08	12
+377	val_377	2008-04-08	12
+378	val_378	2008-04-08	12
+379	val_379	2008-04-08	12
+382	val_382	2008-04-08	12
+382	val_382	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+386	val_386	2008-04-08	12
+389	val_389	2008-04-08	12
+392	val_392	2008-04-08	12
+393	val_393	2008-04-08	12
+394	val_394	2008-04-08	12
+395	val_395	2008-04-08	12
+395	val_395	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+397	val_397	2008-04-08	12
+397	val_397	2008-04-08	12
+399	val_399	2008-04-08	12
+399	val_399	2008-04-08	12
+400	val_400	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+402	val_402	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+404	val_404	2008-04-08	12
+404	val_404	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+407	val_407	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+411	val_411	2008-04-08	12
+413	val_413	2008-04-08	12
+413	val_413	2008-04-08	12
+414	val_414	2008-04-08	12
+414	val_414	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+418	val_418	2008-04-08	12
+419	val_419	2008-04-08	12
+421	val_421	2008-04-08	12
+424	val_424	2008-04-08	12
+424	val_424	2008-04-08	12
+427	val_427	2008-04-08	12
+429	val_429	2008-04-08	12
+429	val_429	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+432	val_432	2008-04-08	12
+435	val_435	2008-04-08	12
+436	val_436	2008-04-08	12
+437	val_437	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+439	val_439	2008-04-08	12
+439	val_439	2008-04-08	12
+443	val_443	2008-04-08	12
+444	val_444	2008-04-08	12
+446	val_446	2008-04-08	12
+448	val_448	2008-04-08	12
+449	val_449	2008-04-08	12
+452	val_452	2008-04-08	12
+453	val_453	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+455	val_455	2008-04-08	12
+457	val_457	2008-04-08	12
+458	val_458	2008-04-08	12
+458	val_458	2008-04-08	12
+459	val_459	2008-04-08	12
+459	val_459	2008-04-08	12
+460	val_460	2008-04-08	12
+462	val_462	2008-04-08	12
+462	val_462	2008-04-08	12
+463	val_463	2008-04-08	12
+463	val_463	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+467	val_467	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+470	val_470	2008-04-08	12
+472	val_472	2008-04-08	12
+475	val_475	2008-04-08	12
+477	val_477	2008-04-08	12
+478	val_478	2008-04-08	12
+478	val_478	2008-04-08	12
+479	val_479	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+481	val_481	2008-04-08	12
+482	val_482	2008-04-08	12
+483	val_483	2008-04-08	12
+484	val_484	2008-04-08	12
+485	val_485	2008-04-08	12
+487	val_487	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+490	val_490	2008-04-08	12
+491	val_491	2008-04-08	12
+492	val_492	2008-04-08	12
+492	val_492	2008-04-08	12
+493	val_493	2008-04-08	12
+494	val_494	2008-04-08	12
+495	val_495	2008-04-08	12
+496	val_496	2008-04-08	12
+497	val_497	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+0	val_0	2008-12-31	11
+0	val_0	2008-12-31	11
+0	val_0	2008-12-31	11
+2	val_2	2008-12-31	11
+4	val_4	2008-12-31	11
+5	val_5	2008-12-31	11
+5	val_5	2008-12-31	11
+5	val_5	2008-12-31	11
+8	val_8	2008-12-31	11
+9	val_9	2008-12-31	11
+10	val_10	2008-12-31	11
+11	val_11	2008-12-31	11
+12	val_12	2008-12-31	11
+12	val_12	2008-12-31	11
+15	val_15	2008-12-31	11
+15	val_15	2008-12-31	11
+17	val_17	2008-12-31	11
+18	val_18	2008-12-31	11
+18	val_18	2008-12-31	11
+19	val_19	2008-12-31	11
+20	val_20	2008-12-31	11
+24	val_24	2008-12-31	11
+24	val_24	2008-12-31	11
+26	val_26	2008-12-31	11
+26	val_26	2008-12-31	11
+27	val_27	2008-12-31	11
+28	val_28	2008-12-31	11
+30	val_30	2008-12-31	11
+33	val_33	2008-12-31	11
+34	val_34	2008-12-31	11
+35	val_35	2008-12-31	11
+35	val_35	2008-12-31	11
+35	val_35	2008-12-31	11
+37	val_37	2008-12-31	11
+37	val_37	2008-12-31	11
+41	val_41	2008-12-31	11
+42	val_42	2008-12-31	11
+42	val_42	2008-12-31	11
+43	val_43	2008-12-31	11
+44	val_44	2008-12-31	11
+47	val_47	2008-12-31	11
+51	val_51	2008-12-31	11
+51	val_51	2008-12-31	11
+53	val_53	2008-12-31	11
+54	val_54	2008-12-31	11
+57	val_57	2008-12-31	11
+58	val_58	2008-12-31	11
+58	val_58	2008-12-31	11
+64	val_64	2008-12-31	11
+65	val_65	2008-12-31	11
+66	val_66	2008-12-31	11
+67	val_67	2008-12-31	11
+67	val_67	2008-12-31	11
+69	val_69	2008-12-31	11
+70	val_70	2008-12-31	11
+70	val_70	2008-12-31	11
+70	val_70	2008-12-31	11
+72	val_72	2008-12-31	11
+72	val_72	2008-12-31	11
+74	val_74	2008-12-31	11
+76	val_76	2008-12-31	11
+76	val_76	2008-12-31	11
+77	val_77	2008-12-31	11
+78	val_78	2008-12-31	11
+80	val_80	2008-12-31	11
+82	val_82	2008-12-31	11
+83	val_83	2008-12-31	11
+83	val_83	2008-12-31	11
+84	val_84	2008-12-31	11
+84	val_84	2008-12-31	11
+85	val_85	2008-12-31	11
+86	val_86	2008-12-31	11
+87	val_87	2008-12-31	11
+90	val_90	2008-12-31	11
+90	val_90	2008-12-31	11
+90	val_90	2008-12-31	11
+92	val_92	2008-12-31	11
+95	val_95	2008-12-31	11
+95	val_95	2008-12-31	11
+96	val_96	2008-12-31	11
+97	val_97	2008-12-31	11
+97	val_97	2008-12-31	11
+98	val_98	2008-12-31	11
+98	val_98	2008-12-31	11
+100	val_100	2008-12-31	11
+100	val_100	2008-12-31	11
+103	val_103	2008-12-31	11
+103	val_103	2008-12-31	11
+104	val_104	2008-12-31	11
+104	val_104	2008-12-31	11
+105	val_105	2008-12-31	11
+111	val_111	2008-12-31	11
+113	val_113	2008-12-31	11
+113	val_113	2008-12-31	11
+114	val_114	2008-12-31	11
+116	val_116	2008-12-31	11
+118	val_118	2008-12-31	11
+118	val_118	2008-12-31	11
+119	val_119	2008-12-31	11
+119	val_119	2008-12-31	11
+119	val_119	2008-12-31	11
+120	val_120	2008-12-31	11
+120	val_120	2008-12-31	11
+125	val_125	2008-12-31	11
+125	val_125	2008-12-31	11
+126	val_126	2008-12-31	11
+128	val_128	2008-12-31	11
+128	val_128	2008-12-31	11
+128	val_128	2008-12-31	11
+129	val_129	2008-12-31	11
+129	val_129	2008-12-31	11
+131	val_131	2008-12-31	11
+133	val_133	2008-12-31	11
+134	val_134	2008-12-31	11
+134	val_134	2008-12-31	11
+136	val_136	2008-12-31	11
+137	val_137	2008-12-31	11
+137	val_137	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+138	val_138	2008-12-31	11
+143	val_143	2008-12-31	11
+145	val_145	2008-12-31	11
+146	val_146	2008-12-31	11
+146	val_146	2008-12-31	11
+149	val_149	2008-12-31	11
+149	val_149	2008-12-31	11
+150	val_150	2008-12-31	11
+152	val_152	2008-12-31	11
+152	val_152	2008-12-31	11
+153	val_153	2008-12-31	11
+155	val_155	2008-12-31	11
+156	val_156	2008-12-31	11
+157	val_157	2008-12-31	11
+158	val_158	2008-12-31	11
+160	val_160	2008-12-31	11
+162	val_162	2008-12-31	11
+163	val_163	2008-12-31	11
+164	val_164	2008-12-31	11
+164	val_164	2008-12-31	11
+165	val_165	2008-12-31	11
+165	val_165	2008-12-31	11
+166	val_166	2008-12-31	11
+167	val_167	2008-12-31	11
+167	val_167	2008-12-31	11
+167	val_167	2008-12-31	11
+168	val_168	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+169	val_169	2008-12-31	11
+170	val_170	2008-12-31	11
+172	val_172	2008-12-31	11
+172	val_172	2008-12-31	11
+174	val_174	2008-12-31	11
+174	val_174	2008-12-31	11
+175	val_175	2008-12-31	11
+175	val_175	2008-12-31	11
+176	val_176	2008-12-31	11
+176	val_176	2008-12-31	11
+177	val_177	2008-12-31	11
+178	val_178	2008-12-31	11
+179	val_179	2008-12-31	11
+179	val_179	2008-12-31	11
+180	val_180	2008-12-31	11
+181	val_181	2008-12-31	11
+183	val_183	2008-12-31	11
+186	val_186	2008-12-31	11
+187	val_187	2008-12-31	11
+187	val_187	2008-12-31	11
+187	val_187	2008-12-31	11
+189	val_189	2008-12-31	11
+190	val_190	2008-12-31	11
+191	val_191	2008-12-31	11
+191	val_191	2008-12-31	11
+192	val_192	2008-12-31	11
+193	val_193	2008-12-31	11
+193	val_193	2008-12-31	11
+193	val_193	2008-12-31	11
+194	val_194	2008-12-31	11
+195	val_195	2008-12-31	11
+195	val_195	2008-12-31	11
+196	val_196	2008-12-31	11
+197	val_197	2008-12-31	11
+197	val_197	2008-12-31	11
+199	val_199	2008-12-31	11
+199	val_199	2008-12-31	11
+199	val_199	2008-12-31	11
+200	val_200	2008-12-31	11
+200	val_200	2008-12-31	11
+201	val_201	2008-12-31	11
+202	val_202	2008-12-31	11
+203	val_203	2008-12-31	11
+203	val_203	2008-12-31	11
+205	val_205	2008-12-31	11
+205	val_205	2008-12-31	11
+207	val_207	2008-12-31	11
+207	val_207	2008-12-31	11
+208	val_208	2008-12-31	11
+208	val_208	2008-12-31	11
+208	val_208	2008-12-31	11
+209	val_209	2008-12-31	11
+209	val_209	2008-12-31	11
+213	val_213	2008-12-31	11
+213	val_213	2008-12-31	11
+214	val_214	2008-12-31	11
+216	val_216	2008-12-31	11
+216	val_216	2008-12-31	11
+217	val_217	2008-12-31	11
+217	val_217	2008-12-31	11
+218	val_218	2008-12-31	11
+219	val_219	2008-12-31	11
+219	val_219	2008-12-31	11
+221	val_221	2008-12-31	11
+221	val_221	2008-12-31	11
+222	val_222	2008-12-31	11
+223	val_223	2008-12-31	11
+223	val_223	2008-12-31	11
+224	val_224	2008-12-31	11
+224	val_224	2008-12-31	11
+226	val_226	2008-12-31	11
+228	val_228	2008-12-31	11
+229	val_229	2008-12-31	11
+229	val_229	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+230	val_230	2008-12-31	11
+233	val_233	2008-12-31	11
+233	val_233	2008-12-31	11
+235	val_235	2008-12-31	11
+237	val_237	2008-12-31	11
+237	val_237	2008-12-31	11
+238	val_238	2008-12-31	11
+238	val_238	2008-12-31	11
+239	val_239	2008-12-31	11
+239	val_239	2008-12-31	11
+241	val_241	2008-12-31	11
+242	val_242	2008-12-31	11
+242	val_242	2008-12-31	11
+244	val_244	2008-12-31	11
+247	val_247	2008-12-31	11
+248	val_248	2008-12-31	11
+249	val_249	2008-12-31	11
+252	val_252	2008-12-31	11
+255	val_255	2008-12-31	11
+255	val_255	2008-12-31	11
+256	val_256	2008-12-31	11
+256	val_256	2008-12-31	11
+257	val_257	2008-12-31	11
+258	val_258	2008-12-31	11
+260	val_260	2008-12-31	11
+262	val_262	2008-12-31	11
+263	val_263	2008-12-31	11
+265	val_265	2008-12-31	11
+265	val_265	2008-12-31	11
+266	val_266	2008-12-31	11
+272	val_272	2008-12-31	11
+272	val_272	2008-12-31	11
+273	val_273	2008-12-31	11
+273	val_273	2008-12-31	11
+273	val_273	2008-12-31	11
+274	val_274	2008-12-31	11
+275	val_275	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+277	val_277	2008-12-31	11
+278	val_278	2008-12-31	11
+278	val_278	2008-12-31	11
+280	val_280	2008-12-31	11
+280	val_280	2008-12-31	11
+281	val_281	2008-12-31	11
+281	val_281	2008-12-31	11
+282	val_282	2008-12-31	11
+282	val_282	2008-12-31	11
+283	val_283	2008-12-31	11
+284	val_284	2008-12-31	11
+285	val_285	2008-12-31	11
+286	val_286	2008-12-31	11
+287	val_287	2008-12-31	11
+288	val_288	2008-12-31	11
+288	val_288	2008-12-31	11
+289	val_289	2008-12-31	11
+291	val_291	2008-12-31	11
+292	val_292	2008-12-31	11
+296	val_296	2008-12-31	11
+298	val_298	2008-12-31	11
+298	val_298	2008-12-31	11
+298	val_298	2008-12-31	11
+302	val_302	2008-12-31	11
+305	val_305	2008-12-31	11
+306	val_306	2008-12-31	11
+307	val_307	2008-12-31	11
+307	val_307	2008-12-31	11
+308	val_308	2008-12-31	11
+309	val_309	2008-12-31	11
+309	val_309	2008-12-31	11
+310	val_310	2008-12-31	11
+311	val_311	2008-12-31	11
+311	val_311	2008-12-31	11
+311	val_311	2008-12-31	11
+315	val_315	2008-12-31	11
+316	val_316	2008-12-31	11
+316	val_316	2008-12-31	11
+316	val_316	2008-12-31	11
+317	val_317	2008-12-31	11
+317	val_317	2008-12-31	11
+318	val_318	2008-12-31	11
+318	val_318	2008-12-31	11
+318	val_318	2008-12-31	11
+321	val_321	2008-12-31	11
+321	val_321	2008-12-31	11
+322	val_322	2008-12-31	11
+322	val_322	2008-12-31	11
+323	val_323	2008-12-31	11
+325	val_325	2008-12-31	11
+325	val_325	2008-12-31	11
+327	val_327	2008-12-31	11
+327	val_327	2008-12-31	11
+327	val_327	2008-12-31	11
+331	val_331	2008-12-31	11
+331	val_331	2008-12-31	11
+332	val_332	2008-12-31	11
+333	val_333	2008-12-31	11
+333	val_333	2008-12-31	11
+335	val_335	2008-12-31	11
+336	val_336	2008-12-31	11
+338	val_338	2008-12-31	11
+339	val_339	2008-12-31	11
+341	val_341	2008-12-31	11
+342	val_342	2008-12-31	11
+342	val_342	2008-12-31	11
+344	val_344	2008-12-31	11
+344	val_344	2008-12-31	11
+345	val_345	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+348	val_348	2008-12-31	11
+351	val_351	2008-12-31	11
+353	val_353	2008-12-31	11
+353	val_353	2008-12-31	11
+356	val_356	2008-12-31	11
+360	val_360	2008-12-31	11
+362	val_362	2008-12-31	11
+364	val_364	2008-12-31	11
+365	val_365	2008-12-31	11
+366	val_366	2008-12-31	11
+367	val_367	2008-12-31	11
+367	val_367	2008-12-31	11
+368	val_368	2008-12-31	11
+369	val_369	2008-12-31	11
+369	val_369	2008-12-31	11
+369	val_369	2008-12-31	11
+373	val_373	2008-12-31	11
+374	val_374	2008-12-31	11
+375	val_375	2008-12-31	11
+377	val_377	2008-12-31	11
+378	val_378	2008-12-31	11
+379	val_379	2008-12-31	11
+382	val_382	2008-12-31	11
+382	val_382	2008-12-31	11
+384	val_384	2008-12-31	11
+384	val_384	2008-12-31	11
+384	val_384	2008-12-31	11
+386	val_386	2008-12-31	11
+389	val_389	2008-12-31	11
+392	val_392	2008-12-31	11
+393	val_393	2008-12-31	11
+394	val_394	2008-12-31	11
+395	val_395	2008-12-31	11
+395	val_395	2008-12-31	11
+396	val_396	2008-12-31	11
+396	val_396	2008-12-31	11
+396	val_396	2008-12-31	11
+397	val_397	2008-12-31	11
+397	val_397	2008-12-31	11
+399	val_399	2008-12-31	11
+399	val_399	2008-12-31	11
+400	val_400	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+401	val_401	2008-12-31	11
+402	val_402	2008-12-31	11
+403	val_403	2008-12-31	11
+403	val_403	2008-12-31	11
+403	val_403	2008-12-31	11
+404	val_404	2008-12-31	11
+404	val_404	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+406	val_406	2008-12-31	11
+407	val_407	2008-12-31	11
+409	val_409	2008-12-31	11
+409	val_409	2008-12-31	11
+409	val_409	2008-12-31	11
+411	val_411	2008-12-31	11
+413	val_413	2008-12-31	11
+413	val_413	2008-12-31	11
+414	val_414	2008-12-31	11
+414	val_414	2008-12-31	11
+417	val_417	2008-12-31	11
+417	val_417	2008-12-31	11
+417	val_417	2008-12-31	11
+418	val_418	2008-12-31	11
+419	val_419	2008-12-31	11
+421	val_421	2008-12-31	11
+424	val_424	2008-12-31	11
+424	val_424	2008-12-31	11
+427	val_427	2008-12-31	11
+429	val_429	2008-12-31	11
+429	val_429	2008-12-31	11
+430	val_430	2008-12-31	11
+430	val_430	2008-12-31	11
+430	val_430	2008-12-31	11
+431	val_431	2008-12-31	11
+431	val_431	2008-12-31	11
+431	val_431	2008-12-31	11
+432	val_432	2008-12-31	11
+435	val_435	2008-12-31	11
+436	val_436	2008-12-31	11
+437	val_437	2008-12-31	11
+438	val_438	2008-12-31	11
+438	val_438	2008-12-31	11
+438	val_438	2008-12-31	11
+439	val_439	2008-12-31	11
+439	val_439	2008-12-31	11
+443	val_443	2008-12-31	11
+444	val_444	2008-12-31	11
+446	val_446	2008-12-31	11
+448	val_448	2008-12-31	11
+449	val_449	2008-12-31	11
+452	val_452	2008-12-31	11
+453	val_453	2008-12-31	11
+454	val_454	2008-12-31	11
+454	val_454	2008-12-31	11
+454	val_454	2008-12-31	11
+455	val_455	2008-12-31	11
+457	val_457	2008-12-31	11
+458	val_458	2008-12-31	11
+458	val_458	2008-12-31	11
+459	val_459	2008-12-31	11
+459	val_459	2008-12-31	11
+460	val_460	2008-12-31	11
+462	val_462	2008-12-31	11
+462	val_462	2008-12-31	11
+463	val_463	2008-12-31	11
+463	val_463	2008-12-31	11
+466	val_466	2008-12-31	11
+466	val_466	2008-12-31	11
+466	val_466	2008-12-31	11
+467	val_467	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+468	val_468	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+469	val_469	2008-12-31	11
+470	val_470	2008-12-31	11
+472	val_472	2008-12-31	11
+475	val_475	2008-12-31	11
+477	val_477	2008-12-31	11
+478	val_478	2008-12-31	11
+478	val_478	2008-12-31	11
+479	val_479	2008-12-31	11
+480	val_480	2008-12-31	11
+480	val_480	2008-12-31	11
+480	val_480	2008-12-31	11
+481	val_481	2008-12-31	11
+482	val_482	2008-12-31	11
+483	val_483	2008-12-31	11
+484	val_484	2008-12-31	11
+485	val_485	2008-12-31	11
+487	val_487	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+489	val_489	2008-12-31	11
+490	val_490	2008-12-31	11
+491	val_491	2008-12-31	11
+492	val_492	2008-12-31	11
+492	val_492	2008-12-31	11
+493	val_493	2008-12-31	11
+494	val_494	2008-12-31	11
+495	val_495	2008-12-31	11
+496	val_496	2008-12-31	11
+497	val_497	2008-12-31	11
+498	val_498	2008-12-31	11
+498	val_498	2008-12-31	11
+498	val_498	2008-12-31	11
+0	val_0	2008-12-31	12
+0	val_0	2008-12-31	12
+0	val_0	2008-12-31	12
+2	val_2	2008-12-31	12
+4	val_4	2008-12-31	12
+5	val_5	2008-12-31	12
+5	val_5	2008-12-31	12
+5	val_5	2008-12-31	12
+8	val_8	2008-12-31	12
+9	val_9	2008-12-31	12
+10	val_10	2008-12-31	12
+11	val_11	2008-12-31	12
+12	val_12	2008-12-31	12
+12	val_12	2008-12-31	12
+15	val_15	2008-12-31	12
+15	val_15	2008-12-31	12
+17	val_17	2008-12-31	12
+18	val_18	2008-12-31	12
+18	val_18	2008-12-31	12
+19	val_19	2008-12-31	12
+20	val_20	2008-12-31	12
+24	val_24	2008-12-31	12
+24	val_24	2008-12-31	12
+26	val_26	2008-12-31	12
+26	val_26	2008-12-31	12
+27	val_27	2008-12-31	12
+28	val_28	2008-12-31	12
+30	val_30	2008-12-31	12
+33	val_33	2008-12-31	12
+34	val_34	2008-12-31	12
+35	val_35	2008-12-31	12
+35	val_35	2008-12-31	12
+35	val_35	2008-12-31	12
+37	val_37	2008-12-31	12
+37	val_37	2008-12-31	12
+41	val_41	2008-12-31	12
+42	val_42	2008-12-31	12
+42	val_42	2008-12-31	12
+43	val_43	2008-12-31	12
+44	val_44	2008-12-31	12
+47	val_47	2008-12-31	12
+51	val_51	2008-12-31	12
+51	val_51	2008-12-31	12
+53	val_53	2008-12-31	12
+54	val_54	2008-12-31	12
+57	val_57	2008-12-31	12
+58	val_58	2008-12-31	12
+58	val_58	2008-12-31	12
+64	val_64	2008-12-31	12
+65	val_65	2008-12-31	12
+66	val_66	2008-12-31	12
+67	val_67	2008-12-31	12
+67	val_67	2008-12-31	12
+69	val_69	2008-12-31	12
+70	val_70	2008-12-31	12
+70	val_70	2008-12-31	12
+70	val_70	2008-12-31	12
+72	val_72	2008-12-31	12
+72	val_72	2008-12-31	12
+74	val_74	2008-12-31	12
+76	val_76	2008-12-31	12
+76	val_76	2008-12-31	12
+77	val_77	2008-12-31	12
+78	val_78	2008-12-31	12
+80	val_80	2008-12-31	12
+82	val_82	2008-12-31	12
+83	val_83	2008-12-31	12
+83	val_83	2008-12-31	12
+84	val_84	2008-12-31	12
+84	val_84	2008-12-31	12
+85	val_85	2008-12-31	12
+86	val_86	2008-12-31	12
+87	val_87	2008-12-31	12
+90	val_90	2008-12-31	12
+90	val_90	2008-12-31	12
+90	val_90	2008-12-31	12
+92	val_92	2008-12-31	12
+95	val_95	2008-12-31	12
+95	val_95	2008-12-31	12
+96	val_96	2008-12-31	12
+97	val_97	2008-12-31	12
+97	val_97	2008-12-31	12
+98	val_98	2008-12-31	12
+98	val_98	2008-12-31	12
+100	val_100	2008-12-31	12
+100	val_100	2008-12-31	12
+103	val_103	2008-12-31	12
+103	val_103	2008-12-31	12
+104	val_104	2008-12-31	12
+104	val_104	2008-12-31	12
+105	val_105	2008-12-31	12
+111	val_111	2008-12-31	12
+113	val_113	2008-12-31	12
+113	val_113	2008-12-31	12
+114	val_114	2008-12-31	12
+116	val_116	2008-12-31	12
+118	val_118	2008-12-31	12
+118	val_118	2008-12-31	12
+119	val_119	2008-12-31	12
+119	val_119	2008-12-31	12
+119	val_119	2008-12-31	12
+120	val_120	2008-12-31	12
+120	val_120	2008-12-31	12
+125	val_125	2008-12-31	12
+125	val_125	2008-12-31	12
+126	val_126	2008-12-31	12
+128	val_128	2008-12-31	12
+128	val_128	2008-12-31	12
+128	val_128	2008-12-31	12
+129	val_129	2008-12-31	12
+129	val_129	2008-12-31	12
+131	val_131	2008-12-31	12
+133	val_133	2008-12-31	12
+134	val_134	2008-12-31	12
+134	val_134	2008-12-31	12
+136	val_136	2008-12-31	12
+137	val_137	2008-12-31	12
+137	val_137	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+138	val_138	2008-12-31	12
+143	val_143	2008-12-31	12
+145	val_145	2008-12-31	12
+146	val_146	2008-12-31	12
+146	val_146	2008-12-31	12
+149	val_149	2008-12-31	12
+149	val_149	2008-12-31	12
+150	val_150	2008-12-31	12
+152	val_152	2008-12-31	12
+152	val_152	2008-12-31	12
+153	val_153	2008-12-31	12
+155	val_155	2008-12-31	12
+156	val_156	2008-12-31	12
+157	val_157	2008-12-31	12
+158	val_158	2008-12-31	12
+160	val_160	2008-12-31	12
+162	val_162	2008-12-31	12
+163	val_163	2008-12-31	12
+164	val_164	2008-12-31	12
+164	val_164	2008-12-31	12
+165	val_165	2008-12-31	12
+165	val_165	2008-12-31	12
+166	val_166	2008-12-31	12
+167	val_167	2008-12-31	12
+167	val_167	2008-12-31	12
+167	val_167	2008-12-31	12
+168	val_168	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+169	val_169	2008-12-31	12
+170	val_170	2008-12-31	12
+172	val_172	2008-12-31	12
+172	val_172	2008-12-31	12
+174	val_174	2008-12-31	12
+174	val_174	2008-12-31	12
+175	val_175	2008-12-31	12
+175	val_175	2008-12-31	12
+176	val_176	2008-12-31	12
+176	val_176	2008-12-31	12
+177	val_177	2008-12-31	12
+178	val_178	2008-12-31	12
+179	val_179	2008-12-31	12
+179	val_179	2008-12-31	12
+180	val_180	2008-12-31	12
+181	val_181	2008-12-31	12
+183	val_183	2008-12-31	12
+186	val_186	2008-12-31	12
+187	val_187	2008-12-31	12
+187	val_187	2008-12-31	12
+187	val_187	2008-12-31	12
+189	val_189	2008-12-31	12
+190	val_190	2008-12-31	12
+191	val_191	2008-12-31	12
+191	val_191	2008-12-31	12
+192	val_192	2008-12-31	12
+193	val_193	2008-12-31	12
+193	val_193	2008-12-31	12
+193	val_193	2008-12-31	12
+194	val_194	2008-12-31	12
+195	val_195	2008-12-31	12
+195	val_195	2008-12-31	12
+196	val_196	2008-12-31	12
+197	val_197	2008-12-31	12
+197	val_197	2008-12-31	12
+199	val_199	2008-12-31	12
+199	val_199	2008-12-31	12
+199	val_199	2008-12-31	12
+200	val_200	2008-12-31	12
+200	val_200	2008-12-31	12
+201	val_201	2008-12-31	12
+202	val_202	2008-12-31	12
+203	val_203	2008-12-31	12
+203	val_203	2008-12-31	12
+205	val_205	2008-12-31	12
+205	val_205	2008-12-31	12
+207	val_207	2008-12-31	12
+207	val_207	2008-12-31	12
+208	val_208	2008-12-31	12
+208	val_208	2008-12-31	12
+208	val_208	2008-12-31	12
+209	val_209	2008-12-31	12
+209	val_209	2008-12-31	12
+213	val_213	2008-12-31	12
+213	val_213	2008-12-31	12
+214	val_214	2008-12-31	12
+216	val_216	2008-12-31	12
+216	val_216	2008-12-31	12
+217	val_217	2008-12-31	12
+217	val_217	2008-12-31	12
+218	val_218	2008-12-31	12
+219	val_219	2008-12-31	12
+219	val_219	2008-12-31	12
+221	val_221	2008-12-31	12
+221	val_221	2008-12-31	12
+222	val_222	2008-12-31	12
+223	val_223	2008-12-31	12
+223	val_223	2008-12-31	12
+224	val_224	2008-12-31	12
+224	val_224	2008-12-31	12
+226	val_226	2008-12-31	12
+228	val_228	2008-12-31	12
+229	val_229	2008-12-31	12
+229	val_229	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+230	val_230	2008-12-31	12
+233	val_233	2008-12-31	12
+233	val_233	2008-12-31	12
+235	val_235	2008-12-31	12
+237	val_237	2008-12-31	12
+237	val_237	2008-12-31	12
+238	val_238	2008-12-31	12
+238	val_238	2008-12-31	12
+239	val_239	2008-12-31	12
+239	val_239	2008-12-31	12
+241	val_241	2008-12-31	12
+242	val_242	2008-12-31	12
+242	val_242	2008-12-31	12
+244	val_244	2008-12-31	12
+247	val_247	2008-12-31	12
+248	val_248	2008-12-31	12
+249	val_249	2008-12-31	12
+252	val_252	2008-12-31	12
+255	val_255	2008-12-31	12
+255	val_255	2008-12-31	12
+256	val_256	2008-12-31	12
+256	val_256	2008-12-31	12
+257	val_257	2008-12-31	12
+258	val_258	2008-12-31	12
+260	val_260	2008-12-31	12
+262	val_262	2008-12-31	12
+263	val_263	2008-12-31	12
+265	val_265	2008-12-31	12
+265	val_265	2008-12-31	12
+266	val_266	2008-12-31	12
+272	val_272	2008-12-31	12
+272	val_272	2008-12-31	12
+273	val_273	2008-12-31	12
+273	val_273	2008-12-31	12
+273	val_273	2008-12-31	12
+274	val_274	2008-12-31	12
+275	val_275	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+277	val_277	2008-12-31	12
+278	val_278	2008-12-31	12
+278	val_278	2008-12-31	12
+280	val_280	2008-12-31	12
+280	val_280	2008-12-31	12
+281	val_281	2008-12-31	12
+281	val_281	2008-12-31	12
+282	val_282	2008-12-31	12
+282	val_282	2008-12-31	12
+283	val_283	2008-12-31	12
+284	val_284	2008-12-31	12
+285	val_285	2008-12-31	12
+286	val_286	2008-12-31	12
+287	val_287	2008-12-31	12
+288	val_288	2008-12-31	12
+288	val_288	2008-12-31	12
+289	val_289	2008-12-31	12
+291	val_291	2008-12-31	12
+292	val_292	2008-12-31	12
+296	val_296	2008-12-31	12
+298	val_298	2008-12-31	12
+298	val_298	2008-12-31	12
+298	val_298	2008-12-31	12
+302	val_302	2008-12-31	12
+305	val_305	2008-12-31	12
+306	val_306	2008-12-31	12
+307	val_307	2008-12-31	12
+307	val_307	2008-12-31	12
+308	val_308	2008-12-31	12
+309	val_309	2008-12-31	12
+309	val_309	2008-12-31	12
+310	val_310	2008-12-31	12
+311	val_311	2008-12-31	12
+311	val_311	2008-12-31	12
+311	val_311	2008-12-31	12
+315	val_315	2008-12-31	12
+316	val_316	2008-12-31	12
+316	val_316	2008-12-31	12
+316	val_316	2008-12-31	12
+317	val_317	2008-12-31	12
+317	val_317	2008-12-31	12
+318	val_318	2008-12-31	12
+318	val_318	2008-12-31	12
+318	val_318	2008-12-31	12
+321	val_321	2008-12-31	12
+321	val_321	2008-12-31	12
+322	val_322	2008-12-31	12
+322	val_322	2008-12-31	12
+323	val_323	2008-12-31	12
+325	val_325	2008-12-31	12
+325	val_325	2008-12-31	12
+327	val_327	2008-12-31	12
+327	val_327	2008-12-31	12
+327	val_327	2008-12-31	12
+331	val_331	2008-12-31	12
+331	val_331	2008-12-31	12
+332	val_332	2008-12-31	12
+333	val_333	2008-12-31	12
+333	val_333	2008-12-31	12
+335	val_335	2008-12-31	12
+336	val_336	2008-12-31	12
+338	val_338	2008-12-31	12
+339	val_339	2008-12-31	12
+341	val_341	2008-12-31	12
+342	val_342	2008-12-31	12
+342	val_342	2008-12-31	12
+344	val_344	2008-12-31	12
+344	val_344	2008-12-31	12
+345	val_345	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+348	val_348	2008-12-31	12
+351	val_351	2008-12-31	12
+353	val_353	2008-12-31	12
+353	val_353	2008-12-31	12
+356	val_356	2008-12-31	12
+360	val_360	2008-12-31	12
+362	val_362	2008-12-31	12
+364	val_364	2008-12-31	12
+365	val_365	2008-12-31	12
+366	val_366	2008-12-31	12
+367	val_367	2008-12-31	12
+367	val_367	2008-12-31	12
+368	val_368	2008-12-31	12
+369	val_369	2008-12-31	12
+369	val_369	2008-12-31	12
+369	val_369	2008-12-31	12
+373	val_373	2008-12-31	12
+374	val_374	2008-12-31	12
+375	val_375	2008-12-31	12
+377	val_377	2008-12-31	12
+378	val_378	2008-12-31	12
+379	val_379	2008-12-31	12
+382	val_382	2008-12-31	12
+382	val_382	2008-12-31	12
+384	val_384	2008-12-31	12
+384	val_384	2008-12-31	12
+384	val_384	2008-12-31	12
+386	val_386	2008-12-31	12
+389	val_389	2008-12-31	12
+392	val_392	2008-12-31	12
+393	val_393	2008-12-31	12
+394	val_394	2008-12-31	12
+395	val_395	2008-12-31	12
+395	val_395	2008-12-31	12
+396	val_396	2008-12-31	12
+396	val_396	2008-12-31	12
+396	val_396	2008-12-31	12
+397	val_397	2008-12-31	12
+397	val_397	2008-12-31	12
+399	val_399	2008-12-31	12
+399	val_399	2008-12-31	12
+400	val_400	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+401	val_401	2008-12-31	12
+402	val_402	2008-12-31	12
+403	val_403	2008-12-31	12
+403	val_403	2008-12-31	12
+403	val_403	2008-12-31	12
+404	val_404	2008-12-31	12
+404	val_404	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+406	val_406	2008-12-31	12
+407	val_407	2008-12-31	12
+409	val_409	2008-12-31	12
+409	val_409	2008-12-31	12
+409	val_409	2008-12-31	12
+411	val_411	2008-12-31	12
+413	val_413	2008-12-31	12
+413	val_413	2008-12-31	12
+414	val_414	2008-12-31	12
+414	val_414	2008-12-31	12
+417	val_417	2008-12-31	12
+417	val_417	2008-12-31	12
+417	val_417	2008-12-31	12
+418	val_418	2008-12-31	12
+419	val_419	2008-12-31	12
+421	val_421	2008-12-31	12
+424	val_424	2008-12-31	12
+424	val_424	2008-12-31	12
+427	val_427	2008-12-31	12
+429	val_429	2008-12-31	12
+429	val_429	2008-12-31	12
+430	val_430	2008-12-31	12
+430	val_430	2008-12-31	12
+430	val_430	2008-12-31	12
+431	val_431	2008-12-31	12
+431	val_431	2008-12-31	12
+431	val_431	2008-12-31	12
+432	val_432	2008-12-31	12
+435	val_435	2008-12-31	12
+436	val_436	2008-12-31	12
+437	val_437	2008-12-31	12
+438	val_438	2008-12-31	12
+438	val_438	2008-12-31	12
+438	val_438	2008-12-31	12
+439	val_439	2008-12-31	12
+439	val_439	2008-12-31	12
+443	val_443	2008-12-31	12
+444	val_444	2008-12-31	12
+446	val_446	2008-12-31	12
+448	val_448	2008-12-31	12
+449	val_449	2008-12-31	12
+452	val_452	2008-12-31	12
+453	val_453	2008-12-31	12
+454	val_454	2008-12-31	12
+454	val_454	2008-12-31	12
+454	val_454	2008-12-31	12
+455	val_455	2008-12-31	12
+457	val_457	2008-12-31	12
+458	val_458	2008-12-31	12
+458	val_458	2008-12-31	12
+459	val_459	2008-12-31	12
+459	val_459	2008-12-31	12
+460	val_460	2008-12-31	12
+462	val_462	2008-12-31	12
+462	val_462	2008-12-31	12
+463	val_463	2008-12-31	12
+463	val_463	2008-12-31	12
+466	val_466	2008-12-31	12
+466	val_466	2008-12-31	12
+466	val_466	2008-12-31	12
+467	val_467	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+468	val_468	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+469	val_469	2008-12-31	12
+470	val_470	2008-12-31	12
+472	val_472	2008-12-31	12
+475	val_475	2008-12-31	12
+477	val_477	2008-12-31	12
+478	val_478	2008-12-31	12
+478	val_478	2008-12-31	12
+479	val_479	2008-12-31	12
+480	val_480	2008-12-31	12
+480	val_480	2008-12-31	12
+480	val_480	2008-12-31	12
+481	val_481	2008-12-31	12
+482	val_482	2008-12-31	12
+483	val_483	2008-12-31	12
+484	val_484	2008-12-31	12
+485	val_485	2008-12-31	12
+487	val_487	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+489	val_489	2008-12-31	12
+490	val_490	2008-12-31	12
+491	val_491	2008-12-31	12
+492	val_492	2008-12-31	12
+492	val_492	2008-12-31	12
+493	val_493	2008-12-31	12
+494	val_494	2008-12-31	12
+495	val_495	2008-12-31	12
+496	val_496	2008-12-31	12
+497	val_497	2008-12-31	12
+498	val_498	2008-12-31	12
+498	val_498	2008-12-31	12
+498	val_498	2008-12-31	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part8-9-a7456fceb35f62a282db750384f480db b/sql/hive/src/test/resources/golden/load_dyn_part8-9-a7456fceb35f62a282db750384f480db
deleted file mode 100644
index 4c0ed5d6fc5f1..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part8-9-a7456fceb35f62a282db750384f480db
+++ /dev/null
@@ -1,2000 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
-265	val_265	2008-04-08	11
-193	val_193	2008-04-08	11
-401	val_401	2008-04-08	11
-150	val_150	2008-04-08	11
-273	val_273	2008-04-08	11
-224	val_224	2008-04-08	11
-369	val_369	2008-04-08	11
-66	val_66	2008-04-08	11
-128	val_128	2008-04-08	11
-213	val_213	2008-04-08	11
-146	val_146	2008-04-08	11
-406	val_406	2008-04-08	11
-429	val_429	2008-04-08	11
-374	val_374	2008-04-08	11
-152	val_152	2008-04-08	11
-469	val_469	2008-04-08	11
-145	val_145	2008-04-08	11
-495	val_495	2008-04-08	11
-37	val_37	2008-04-08	11
-327	val_327	2008-04-08	11
-281	val_281	2008-04-08	11
-277	val_277	2008-04-08	11
-209	val_209	2008-04-08	11
-15	val_15	2008-04-08	11
-82	val_82	2008-04-08	11
-403	val_403	2008-04-08	11
-166	val_166	2008-04-08	11
-417	val_417	2008-04-08	11
-430	val_430	2008-04-08	11
-252	val_252	2008-04-08	11
-292	val_292	2008-04-08	11
-219	val_219	2008-04-08	11
-287	val_287	2008-04-08	11
-153	val_153	2008-04-08	11
-193	val_193	2008-04-08	11
-338	val_338	2008-04-08	11
-446	val_446	2008-04-08	11
-459	val_459	2008-04-08	11
-394	val_394	2008-04-08	11
-237	val_237	2008-04-08	11
-482	val_482	2008-04-08	11
-174	val_174	2008-04-08	11
-413	val_413	2008-04-08	11
-494	val_494	2008-04-08	11
-207	val_207	2008-04-08	11
-199	val_199	2008-04-08	11
-466	val_466	2008-04-08	11
-208	val_208	2008-04-08	11
-174	val_174	2008-04-08	11
-399	val_399	2008-04-08	11
-396	val_396	2008-04-08	11
-247	val_247	2008-04-08	11
-417	val_417	2008-04-08	11
-489	val_489	2008-04-08	11
-162	val_162	2008-04-08	11
-377	val_377	2008-04-08	11
-397	val_397	2008-04-08	11
-309	val_309	2008-04-08	11
-365	val_365	2008-04-08	11
-266	val_266	2008-04-08	11
-439	val_439	2008-04-08	11
-342	val_342	2008-04-08	11
-367	val_367	2008-04-08	11
-325	val_325	2008-04-08	11
-167	val_167	2008-04-08	11
-195	val_195	2008-04-08	11
-475	val_475	2008-04-08	11
-17	val_17	2008-04-08	11
-113	val_113	2008-04-08	11
-155	val_155	2008-04-08	11
-203	val_203	2008-04-08	11
-339	val_339	2008-04-08	11
-0	val_0	2008-04-08	11
-455	val_455	2008-04-08	11
-128	val_128	2008-04-08	11
-311	val_311	2008-04-08	11
-316	val_316	2008-04-08	11
-57	val_57	2008-04-08	11
-302	val_302	2008-04-08	11
-205	val_205	2008-04-08	11
-149	val_149	2008-04-08	11
-438	val_438	2008-04-08	11
-345	val_345	2008-04-08	11
-129	val_129	2008-04-08	11
-170	val_170	2008-04-08	11
-20	val_20	2008-04-08	11
-489	val_489	2008-04-08	11
-157	val_157	2008-04-08	11
-378	val_378	2008-04-08	11
-221	val_221	2008-04-08	11
-92	val_92	2008-04-08	11
-111	val_111	2008-04-08	11
-47	val_47	2008-04-08	11
-72	val_72	2008-04-08	11
-4	val_4	2008-04-08	11
-280	val_280	2008-04-08	11
-35	val_35	2008-04-08	11
-427	val_427	2008-04-08	11
-277	val_277	2008-04-08	11
-208	val_208	2008-04-08	11
-356	val_356	2008-04-08	11
-399	val_399	2008-04-08	11
-169	val_169	2008-04-08	11
-382	val_382	2008-04-08	11
-498	val_498	2008-04-08	11
-125	val_125	2008-04-08	11
-386	val_386	2008-04-08	11
-437	val_437	2008-04-08	11
-469	val_469	2008-04-08	11
-192	val_192	2008-04-08	11
-286	val_286	2008-04-08	11
-187	val_187	2008-04-08	11
-176	val_176	2008-04-08	11
-54	val_54	2008-04-08	11
-459	val_459	2008-04-08	11
-51	val_51	2008-04-08	11
-138	val_138	2008-04-08	11
-103	val_103	2008-04-08	11
-239	val_239	2008-04-08	11
-213	val_213	2008-04-08	11
-216	val_216	2008-04-08	11
-430	val_430	2008-04-08	11
-278	val_278	2008-04-08	11
-176	val_176	2008-04-08	11
-289	val_289	2008-04-08	11
-221	val_221	2008-04-08	11
-65	val_65	2008-04-08	11
-318	val_318	2008-04-08	11
-332	val_332	2008-04-08	11
-311	val_311	2008-04-08	11
-275	val_275	2008-04-08	11
-137	val_137	2008-04-08	11
-241	val_241	2008-04-08	11
-83	val_83	2008-04-08	11
-333	val_333	2008-04-08	11
-180	val_180	2008-04-08	11
-284	val_284	2008-04-08	11
-12	val_12	2008-04-08	11
-230	val_230	2008-04-08	11
-181	val_181	2008-04-08	11
-67	val_67	2008-04-08	11
-260	val_260	2008-04-08	11
-404	val_404	2008-04-08	11
-384	val_384	2008-04-08	11
-489	val_489	2008-04-08	11
-353	val_353	2008-04-08	11
-373	val_373	2008-04-08	11
-272	val_272	2008-04-08	11
-138	val_138	2008-04-08	11
-217	val_217	2008-04-08	11
-84	val_84	2008-04-08	11
-348	val_348	2008-04-08	11
-466	val_466	2008-04-08	11
-58	val_58	2008-04-08	11
-8	val_8	2008-04-08	11
-411	val_411	2008-04-08	11
-230	val_230	2008-04-08	11
-208	val_208	2008-04-08	11
-348	val_348	2008-04-08	11
-24	val_24	2008-04-08	11
-463	val_463	2008-04-08	11
-431	val_431	2008-04-08	11
-179	val_179	2008-04-08	11
-172	val_172	2008-04-08	11
-42	val_42	2008-04-08	11
-129	val_129	2008-04-08	11
-158	val_158	2008-04-08	11
-119	val_119	2008-04-08	11
-496	val_496	2008-04-08	11
-0	val_0	2008-04-08	11
-322	val_322	2008-04-08	11
-197	val_197	2008-04-08	11
-468	val_468	2008-04-08	11
-393	val_393	2008-04-08	11
-454	val_454	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-199	val_199	2008-04-08	11
-191	val_191	2008-04-08	11
-418	val_418	2008-04-08	11
-96	val_96	2008-04-08	11
-26	val_26	2008-04-08	11
-165	val_165	2008-04-08	11
-327	val_327	2008-04-08	11
-230	val_230	2008-04-08	11
-205	val_205	2008-04-08	11
-120	val_120	2008-04-08	11
-131	val_131	2008-04-08	11
-51	val_51	2008-04-08	11
-404	val_404	2008-04-08	11
-43	val_43	2008-04-08	11
-436	val_436	2008-04-08	11
-156	val_156	2008-04-08	11
-469	val_469	2008-04-08	11
-468	val_468	2008-04-08	11
-308	val_308	2008-04-08	11
-95	val_95	2008-04-08	11
-196	val_196	2008-04-08	11
-288	val_288	2008-04-08	11
-481	val_481	2008-04-08	11
-457	val_457	2008-04-08	11
-98	val_98	2008-04-08	11
-282	val_282	2008-04-08	11
-197	val_197	2008-04-08	11
-187	val_187	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-409	val_409	2008-04-08	11
-470	val_470	2008-04-08	11
-137	val_137	2008-04-08	11
-369	val_369	2008-04-08	11
-316	val_316	2008-04-08	11
-169	val_169	2008-04-08	11
-413	val_413	2008-04-08	11
-85	val_85	2008-04-08	11
-77	val_77	2008-04-08	11
-0	val_0	2008-04-08	11
-490	val_490	2008-04-08	11
-87	val_87	2008-04-08	11
-364	val_364	2008-04-08	11
-179	val_179	2008-04-08	11
-118	val_118	2008-04-08	11
-134	val_134	2008-04-08	11
-395	val_395	2008-04-08	11
-282	val_282	2008-04-08	11
-138	val_138	2008-04-08	11
-238	val_238	2008-04-08	11
-419	val_419	2008-04-08	11
-15	val_15	2008-04-08	11
-118	val_118	2008-04-08	11
-72	val_72	2008-04-08	11
-90	val_90	2008-04-08	11
-307	val_307	2008-04-08	11
-19	val_19	2008-04-08	11
-435	val_435	2008-04-08	11
-10	val_10	2008-04-08	11
-277	val_277	2008-04-08	11
-273	val_273	2008-04-08	11
-306	val_306	2008-04-08	11
-224	val_224	2008-04-08	11
-309	val_309	2008-04-08	11
-389	val_389	2008-04-08	11
-327	val_327	2008-04-08	11
-242	val_242	2008-04-08	11
-369	val_369	2008-04-08	11
-392	val_392	2008-04-08	11
-272	val_272	2008-04-08	11
-331	val_331	2008-04-08	11
-401	val_401	2008-04-08	11
-242	val_242	2008-04-08	11
-452	val_452	2008-04-08	11
-177	val_177	2008-04-08	11
-226	val_226	2008-04-08	11
-5	val_5	2008-04-08	11
-497	val_497	2008-04-08	11
-402	val_402	2008-04-08	11
-396	val_396	2008-04-08	11
-317	val_317	2008-04-08	11
-395	val_395	2008-04-08	11
-58	val_58	2008-04-08	11
-35	val_35	2008-04-08	11
-336	val_336	2008-04-08	11
-95	val_95	2008-04-08	11
-11	val_11	2008-04-08	11
-168	val_168	2008-04-08	11
-34	val_34	2008-04-08	11
-229	val_229	2008-04-08	11
-233	val_233	2008-04-08	11
-143	val_143	2008-04-08	11
-472	val_472	2008-04-08	11
-322	val_322	2008-04-08	11
-498	val_498	2008-04-08	11
-160	val_160	2008-04-08	11
-195	val_195	2008-04-08	11
-42	val_42	2008-04-08	11
-321	val_321	2008-04-08	11
-430	val_430	2008-04-08	11
-119	val_119	2008-04-08	11
-489	val_489	2008-04-08	11
-458	val_458	2008-04-08	11
-78	val_78	2008-04-08	11
-76	val_76	2008-04-08	11
-41	val_41	2008-04-08	11
-223	val_223	2008-04-08	11
-492	val_492	2008-04-08	11
-149	val_149	2008-04-08	11
-449	val_449	2008-04-08	11
-218	val_218	2008-04-08	11
-228	val_228	2008-04-08	11
-138	val_138	2008-04-08	11
-453	val_453	2008-04-08	11
-30	val_30	2008-04-08	11
-209	val_209	2008-04-08	11
-64	val_64	2008-04-08	11
-468	val_468	2008-04-08	11
-76	val_76	2008-04-08	11
-74	val_74	2008-04-08	11
-342	val_342	2008-04-08	11
-69	val_69	2008-04-08	11
-230	val_230	2008-04-08	11
-33	val_33	2008-04-08	11
-368	val_368	2008-04-08	11
-103	val_103	2008-04-08	11
-296	val_296	2008-04-08	11
-113	val_113	2008-04-08	11
-216	val_216	2008-04-08	11
-367	val_367	2008-04-08	11
-344	val_344	2008-04-08	11
-167	val_167	2008-04-08	11
-274	val_274	2008-04-08	11
-219	val_219	2008-04-08	11
-239	val_239	2008-04-08	11
-485	val_485	2008-04-08	11
-116	val_116	2008-04-08	11
-223	val_223	2008-04-08	11
-256	val_256	2008-04-08	11
-263	val_263	2008-04-08	11
-70	val_70	2008-04-08	11
-487	val_487	2008-04-08	11
-480	val_480	2008-04-08	11
-401	val_401	2008-04-08	11
-288	val_288	2008-04-08	11
-191	val_191	2008-04-08	11
-5	val_5	2008-04-08	11
-244	val_244	2008-04-08	11
-438	val_438	2008-04-08	11
-128	val_128	2008-04-08	11
-467	val_467	2008-04-08	11
-432	val_432	2008-04-08	11
-202	val_202	2008-04-08	11
-316	val_316	2008-04-08	11
-229	val_229	2008-04-08	11
-469	val_469	2008-04-08	11
-463	val_463	2008-04-08	11
-280	val_280	2008-04-08	11
-2	val_2	2008-04-08	11
-35	val_35	2008-04-08	11
-283	val_283	2008-04-08	11
-331	val_331	2008-04-08	11
-235	val_235	2008-04-08	11
-80	val_80	2008-04-08	11
-44	val_44	2008-04-08	11
-193	val_193	2008-04-08	11
-321	val_321	2008-04-08	11
-335	val_335	2008-04-08	11
-104	val_104	2008-04-08	11
-466	val_466	2008-04-08	11
-366	val_366	2008-04-08	11
-175	val_175	2008-04-08	11
-403	val_403	2008-04-08	11
-483	val_483	2008-04-08	11
-53	val_53	2008-04-08	11
-105	val_105	2008-04-08	11
-257	val_257	2008-04-08	11
-406	val_406	2008-04-08	11
-409	val_409	2008-04-08	11
-190	val_190	2008-04-08	11
-406	val_406	2008-04-08	11
-401	val_401	2008-04-08	11
-114	val_114	2008-04-08	11
-258	val_258	2008-04-08	11
-90	val_90	2008-04-08	11
-203	val_203	2008-04-08	11
-262	val_262	2008-04-08	11
-348	val_348	2008-04-08	11
-424	val_424	2008-04-08	11
-12	val_12	2008-04-08	11
-396	val_396	2008-04-08	11
-201	val_201	2008-04-08	11
-217	val_217	2008-04-08	11
-164	val_164	2008-04-08	11
-431	val_431	2008-04-08	11
-454	val_454	2008-04-08	11
-478	val_478	2008-04-08	11
-298	val_298	2008-04-08	11
-125	val_125	2008-04-08	11
-431	val_431	2008-04-08	11
-164	val_164	2008-04-08	11
-424	val_424	2008-04-08	11
-187	val_187	2008-04-08	11
-382	val_382	2008-04-08	11
-5	val_5	2008-04-08	11
-70	val_70	2008-04-08	11
-397	val_397	2008-04-08	11
-480	val_480	2008-04-08	11
-291	val_291	2008-04-08	11
-24	val_24	2008-04-08	11
-351	val_351	2008-04-08	11
-255	val_255	2008-04-08	11
-104	val_104	2008-04-08	11
-70	val_70	2008-04-08	11
-163	val_163	2008-04-08	11
-438	val_438	2008-04-08	11
-119	val_119	2008-04-08	11
-414	val_414	2008-04-08	11
-200	val_200	2008-04-08	11
-491	val_491	2008-04-08	11
-237	val_237	2008-04-08	11
-439	val_439	2008-04-08	11
-360	val_360	2008-04-08	11
-248	val_248	2008-04-08	11
-479	val_479	2008-04-08	11
-305	val_305	2008-04-08	11
-417	val_417	2008-04-08	11
-199	val_199	2008-04-08	11
-444	val_444	2008-04-08	11
-120	val_120	2008-04-08	11
-429	val_429	2008-04-08	11
-169	val_169	2008-04-08	11
-443	val_443	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-277	val_277	2008-04-08	11
-230	val_230	2008-04-08	11
-478	val_478	2008-04-08	11
-178	val_178	2008-04-08	11
-468	val_468	2008-04-08	11
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-333	val_333	2008-04-08	11
-493	val_493	2008-04-08	11
-460	val_460	2008-04-08	11
-207	val_207	2008-04-08	11
-249	val_249	2008-04-08	11
-265	val_265	2008-04-08	11
-480	val_480	2008-04-08	11
-83	val_83	2008-04-08	11
-136	val_136	2008-04-08	11
-353	val_353	2008-04-08	11
-172	val_172	2008-04-08	11
-214	val_214	2008-04-08	11
-462	val_462	2008-04-08	11
-233	val_233	2008-04-08	11
-406	val_406	2008-04-08	11
-133	val_133	2008-04-08	11
-175	val_175	2008-04-08	11
-189	val_189	2008-04-08	11
-454	val_454	2008-04-08	11
-375	val_375	2008-04-08	11
-401	val_401	2008-04-08	11
-421	val_421	2008-04-08	11
-407	val_407	2008-04-08	11
-384	val_384	2008-04-08	11
-256	val_256	2008-04-08	11
-26	val_26	2008-04-08	11
-134	val_134	2008-04-08	11
-67	val_67	2008-04-08	11
-384	val_384	2008-04-08	11
-379	val_379	2008-04-08	11
-18	val_18	2008-04-08	11
-462	val_462	2008-04-08	11
-492	val_492	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-9	val_9	2008-04-08	11
-341	val_341	2008-04-08	11
-498	val_498	2008-04-08	11
-146	val_146	2008-04-08	11
-458	val_458	2008-04-08	11
-362	val_362	2008-04-08	11
-186	val_186	2008-04-08	11
-285	val_285	2008-04-08	11
-348	val_348	2008-04-08	11
-167	val_167	2008-04-08	11
-18	val_18	2008-04-08	11
-273	val_273	2008-04-08	11
-183	val_183	2008-04-08	11
-281	val_281	2008-04-08	11
-344	val_344	2008-04-08	11
-97	val_97	2008-04-08	11
-469	val_469	2008-04-08	11
-315	val_315	2008-04-08	11
-84	val_84	2008-04-08	11
-28	val_28	2008-04-08	11
-37	val_37	2008-04-08	11
-448	val_448	2008-04-08	11
-152	val_152	2008-04-08	11
-348	val_348	2008-04-08	11
-307	val_307	2008-04-08	11
-194	val_194	2008-04-08	11
-414	val_414	2008-04-08	11
-477	val_477	2008-04-08	11
-222	val_222	2008-04-08	11
-126	val_126	2008-04-08	11
-90	val_90	2008-04-08	11
-169	val_169	2008-04-08	11
-403	val_403	2008-04-08	11
-400	val_400	2008-04-08	11
-200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
-238	val_238	2008-04-08	12
-86	val_86	2008-04-08	12
-311	val_311	2008-04-08	12
-27	val_27	2008-04-08	12
-165	val_165	2008-04-08	12
-409	val_409	2008-04-08	12
-255	val_255	2008-04-08	12
-278	val_278	2008-04-08	12
-98	val_98	2008-04-08	12
-484	val_484	2008-04-08	12
-265	val_265	2008-04-08	12
-193	val_193	2008-04-08	12
-401	val_401	2008-04-08	12
-150	val_150	2008-04-08	12
-273	val_273	2008-04-08	12
-224	val_224	2008-04-08	12
-369	val_369	2008-04-08	12
-66	val_66	2008-04-08	12
-128	val_128	2008-04-08	12
-213	val_213	2008-04-08	12
-146	val_146	2008-04-08	12
-406	val_406	2008-04-08	12
-429	val_429	2008-04-08	12
-374	val_374	2008-04-08	12
-152	val_152	2008-04-08	12
-469	val_469	2008-04-08	12
-145	val_145	2008-04-08	12
-495	val_495	2008-04-08	12
-37	val_37	2008-04-08	12
-327	val_327	2008-04-08	12
-281	val_281	2008-04-08	12
-277	val_277	2008-04-08	12
-209	val_209	2008-04-08	12
-15	val_15	2008-04-08	12
-82	val_82	2008-04-08	12
-403	val_403	2008-04-08	12
-166	val_166	2008-04-08	12
-417	val_417	2008-04-08	12
-430	val_430	2008-04-08	12
-252	val_252	2008-04-08	12
-292	val_292	2008-04-08	12
-219	val_219	2008-04-08	12
-287	val_287	2008-04-08	12
-153	val_153	2008-04-08	12
-193	val_193	2008-04-08	12
-338	val_338	2008-04-08	12
-446	val_446	2008-04-08	12
-459	val_459	2008-04-08	12
-394	val_394	2008-04-08	12
-237	val_237	2008-04-08	12
-482	val_482	2008-04-08	12
-174	val_174	2008-04-08	12
-413	val_413	2008-04-08	12
-494	val_494	2008-04-08	12
-207	val_207	2008-04-08	12
-199	val_199	2008-04-08	12
-466	val_466	2008-04-08	12
-208	val_208	2008-04-08	12
-174	val_174	2008-04-08	12
-399	val_399	2008-04-08	12
-396	val_396	2008-04-08	12
-247	val_247	2008-04-08	12
-417	val_417	2008-04-08	12
-489	val_489	2008-04-08	12
-162	val_162	2008-04-08	12
-377	val_377	2008-04-08	12
-397	val_397	2008-04-08	12
-309	val_309	2008-04-08	12
-365	val_365	2008-04-08	12
-266	val_266	2008-04-08	12
-439	val_439	2008-04-08	12
-342	val_342	2008-04-08	12
-367	val_367	2008-04-08	12
-325	val_325	2008-04-08	12
-167	val_167	2008-04-08	12
-195	val_195	2008-04-08	12
-475	val_475	2008-04-08	12
-17	val_17	2008-04-08	12
-113	val_113	2008-04-08	12
-155	val_155	2008-04-08	12
-203	val_203	2008-04-08	12
-339	val_339	2008-04-08	12
-0	val_0	2008-04-08	12
-455	val_455	2008-04-08	12
-128	val_128	2008-04-08	12
-311	val_311	2008-04-08	12
-316	val_316	2008-04-08	12
-57	val_57	2008-04-08	12
-302	val_302	2008-04-08	12
-205	val_205	2008-04-08	12
-149	val_149	2008-04-08	12
-438	val_438	2008-04-08	12
-345	val_345	2008-04-08	12
-129	val_129	2008-04-08	12
-170	val_170	2008-04-08	12
-20	val_20	2008-04-08	12
-489	val_489	2008-04-08	12
-157	val_157	2008-04-08	12
-378	val_378	2008-04-08	12
-221	val_221	2008-04-08	12
-92	val_92	2008-04-08	12
-111	val_111	2008-04-08	12
-47	val_47	2008-04-08	12
-72	val_72	2008-04-08	12
-4	val_4	2008-04-08	12
-280	val_280	2008-04-08	12
-35	val_35	2008-04-08	12
-427	val_427	2008-04-08	12
-277	val_277	2008-04-08	12
-208	val_208	2008-04-08	12
-356	val_356	2008-04-08	12
-399	val_399	2008-04-08	12
-169	val_169	2008-04-08	12
-382	val_382	2008-04-08	12
-498	val_498	2008-04-08	12
-125	val_125	2008-04-08	12
-386	val_386	2008-04-08	12
-437	val_437	2008-04-08	12
-469	val_469	2008-04-08	12
-192	val_192	2008-04-08	12
-286	val_286	2008-04-08	12
-187	val_187	2008-04-08	12
-176	val_176	2008-04-08	12
-54	val_54	2008-04-08	12
-459	val_459	2008-04-08	12
-51	val_51	2008-04-08	12
-138	val_138	2008-04-08	12
-103	val_103	2008-04-08	12
-239	val_239	2008-04-08	12
-213	val_213	2008-04-08	12
-216	val_216	2008-04-08	12
-430	val_430	2008-04-08	12
-278	val_278	2008-04-08	12
-176	val_176	2008-04-08	12
-289	val_289	2008-04-08	12
-221	val_221	2008-04-08	12
-65	val_65	2008-04-08	12
-318	val_318	2008-04-08	12
-332	val_332	2008-04-08	12
-311	val_311	2008-04-08	12
-275	val_275	2008-04-08	12
-137	val_137	2008-04-08	12
-241	val_241	2008-04-08	12
-83	val_83	2008-04-08	12
-333	val_333	2008-04-08	12
-180	val_180	2008-04-08	12
-284	val_284	2008-04-08	12
-12	val_12	2008-04-08	12
-230	val_230	2008-04-08	12
-181	val_181	2008-04-08	12
-67	val_67	2008-04-08	12
-260	val_260	2008-04-08	12
-404	val_404	2008-04-08	12
-384	val_384	2008-04-08	12
-489	val_489	2008-04-08	12
-353	val_353	2008-04-08	12
-373	val_373	2008-04-08	12
-272	val_272	2008-04-08	12
-138	val_138	2008-04-08	12
-217	val_217	2008-04-08	12
-84	val_84	2008-04-08	12
-348	val_348	2008-04-08	12
-466	val_466	2008-04-08	12
-58	val_58	2008-04-08	12
-8	val_8	2008-04-08	12
-411	val_411	2008-04-08	12
-230	val_230	2008-04-08	12
-208	val_208	2008-04-08	12
-348	val_348	2008-04-08	12
-24	val_24	2008-04-08	12
-463	val_463	2008-04-08	12
-431	val_431	2008-04-08	12
-179	val_179	2008-04-08	12
-172	val_172	2008-04-08	12
-42	val_42	2008-04-08	12
-129	val_129	2008-04-08	12
-158	val_158	2008-04-08	12
-119	val_119	2008-04-08	12
-496	val_496	2008-04-08	12
-0	val_0	2008-04-08	12
-322	val_322	2008-04-08	12
-197	val_197	2008-04-08	12
-468	val_468	2008-04-08	12
-393	val_393	2008-04-08	12
-454	val_454	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-199	val_199	2008-04-08	12
-191	val_191	2008-04-08	12
-418	val_418	2008-04-08	12
-96	val_96	2008-04-08	12
-26	val_26	2008-04-08	12
-165	val_165	2008-04-08	12
-327	val_327	2008-04-08	12
-230	val_230	2008-04-08	12
-205	val_205	2008-04-08	12
-120	val_120	2008-04-08	12
-131	val_131	2008-04-08	12
-51	val_51	2008-04-08	12
-404	val_404	2008-04-08	12
-43	val_43	2008-04-08	12
-436	val_436	2008-04-08	12
-156	val_156	2008-04-08	12
-469	val_469	2008-04-08	12
-468	val_468	2008-04-08	12
-308	val_308	2008-04-08	12
-95	val_95	2008-04-08	12
-196	val_196	2008-04-08	12
-288	val_288	2008-04-08	12
-481	val_481	2008-04-08	12
-457	val_457	2008-04-08	12
-98	val_98	2008-04-08	12
-282	val_282	2008-04-08	12
-197	val_197	2008-04-08	12
-187	val_187	2008-04-08	12
-318	val_318	2008-04-08	12
-318	val_318	2008-04-08	12
-409	val_409	2008-04-08	12
-470	val_470	2008-04-08	12
-137	val_137	2008-04-08	12
-369	val_369	2008-04-08	12
-316	val_316	2008-04-08	12
-169	val_169	2008-04-08	12
-413	val_413	2008-04-08	12
-85	val_85	2008-04-08	12
-77	val_77	2008-04-08	12
-0	val_0	2008-04-08	12
-490	val_490	2008-04-08	12
-87	val_87	2008-04-08	12
-364	val_364	2008-04-08	12
-179	val_179	2008-04-08	12
-118	val_118	2008-04-08	12
-134	val_134	2008-04-08	12
-395	val_395	2008-04-08	12
-282	val_282	2008-04-08	12
-138	val_138	2008-04-08	12
-238	val_238	2008-04-08	12
-419	val_419	2008-04-08	12
-15	val_15	2008-04-08	12
-118	val_118	2008-04-08	12
-72	val_72	2008-04-08	12
-90	val_90	2008-04-08	12
-307	val_307	2008-04-08	12
-19	val_19	2008-04-08	12
-435	val_435	2008-04-08	12
-10	val_10	2008-04-08	12
-277	val_277	2008-04-08	12
-273	val_273	2008-04-08	12
-306	val_306	2008-04-08	12
-224	val_224	2008-04-08	12
-309	val_309	2008-04-08	12
-389	val_389	2008-04-08	12
-327	val_327	2008-04-08	12
-242	val_242	2008-04-08	12
-369	val_369	2008-04-08	12
-392	val_392	2008-04-08	12
-272	val_272	2008-04-08	12
-331	val_331	2008-04-08	12
-401	val_401	2008-04-08	12
-242	val_242	2008-04-08	12
-452	val_452	2008-04-08	12
-177	val_177	2008-04-08	12
-226	val_226	2008-04-08	12
-5	val_5	2008-04-08	12
-497	val_497	2008-04-08	12
-402	val_402	2008-04-08	12
-396	val_396	2008-04-08	12
-317	val_317	2008-04-08	12
-395	val_395	2008-04-08	12
-58	val_58	2008-04-08	12
-35	val_35	2008-04-08	12
-336	val_336	2008-04-08	12
-95	val_95	2008-04-08	12
-11	val_11	2008-04-08	12
-168	val_168	2008-04-08	12
-34	val_34	2008-04-08	12
-229	val_229	2008-04-08	12
-233	val_233	2008-04-08	12
-143	val_143	2008-04-08	12
-472	val_472	2008-04-08	12
-322	val_322	2008-04-08	12
-498	val_498	2008-04-08	12
-160	val_160	2008-04-08	12
-195	val_195	2008-04-08	12
-42	val_42	2008-04-08	12
-321	val_321	2008-04-08	12
-430	val_430	2008-04-08	12
-119	val_119	2008-04-08	12
-489	val_489	2008-04-08	12
-458	val_458	2008-04-08	12
-78	val_78	2008-04-08	12
-76	val_76	2008-04-08	12
-41	val_41	2008-04-08	12
-223	val_223	2008-04-08	12
-492	val_492	2008-04-08	12
-149	val_149	2008-04-08	12
-449	val_449	2008-04-08	12
-218	val_218	2008-04-08	12
-228	val_228	2008-04-08	12
-138	val_138	2008-04-08	12
-453	val_453	2008-04-08	12
-30	val_30	2008-04-08	12
-209	val_209	2008-04-08	12
-64	val_64	2008-04-08	12
-468	val_468	2008-04-08	12
-76	val_76	2008-04-08	12
-74	val_74	2008-04-08	12
-342	val_342	2008-04-08	12
-69	val_69	2008-04-08	12
-230	val_230	2008-04-08	12
-33	val_33	2008-04-08	12
-368	val_368	2008-04-08	12
-103	val_103	2008-04-08	12
-296	val_296	2008-04-08	12
-113	val_113	2008-04-08	12
-216	val_216	2008-04-08	12
-367	val_367	2008-04-08	12
-344	val_344	2008-04-08	12
-167	val_167	2008-04-08	12
-274	val_274	2008-04-08	12
-219	val_219	2008-04-08	12
-239	val_239	2008-04-08	12
-485	val_485	2008-04-08	12
-116	val_116	2008-04-08	12
-223	val_223	2008-04-08	12
-256	val_256	2008-04-08	12
-263	val_263	2008-04-08	12
-70	val_70	2008-04-08	12
-487	val_487	2008-04-08	12
-480	val_480	2008-04-08	12
-401	val_401	2008-04-08	12
-288	val_288	2008-04-08	12
-191	val_191	2008-04-08	12
-5	val_5	2008-04-08	12
-244	val_244	2008-04-08	12
-438	val_438	2008-04-08	12
-128	val_128	2008-04-08	12
-467	val_467	2008-04-08	12
-432	val_432	2008-04-08	12
-202	val_202	2008-04-08	12
-316	val_316	2008-04-08	12
-229	val_229	2008-04-08	12
-469	val_469	2008-04-08	12
-463	val_463	2008-04-08	12
-280	val_280	2008-04-08	12
-2	val_2	2008-04-08	12
-35	val_35	2008-04-08	12
-283	val_283	2008-04-08	12
-331	val_331	2008-04-08	12
-235	val_235	2008-04-08	12
-80	val_80	2008-04-08	12
-44	val_44	2008-04-08	12
-193	val_193	2008-04-08	12
-321	val_321	2008-04-08	12
-335	val_335	2008-04-08	12
-104	val_104	2008-04-08	12
-466	val_466	2008-04-08	12
-366	val_366	2008-04-08	12
-175	val_175	2008-04-08	12
-403	val_403	2008-04-08	12
-483	val_483	2008-04-08	12
-53	val_53	2008-04-08	12
-105	val_105	2008-04-08	12
-257	val_257	2008-04-08	12
-406	val_406	2008-04-08	12
-409	val_409	2008-04-08	12
-190	val_190	2008-04-08	12
-406	val_406	2008-04-08	12
-401	val_401	2008-04-08	12
-114	val_114	2008-04-08	12
-258	val_258	2008-04-08	12
-90	val_90	2008-04-08	12
-203	val_203	2008-04-08	12
-262	val_262	2008-04-08	12
-348	val_348	2008-04-08	12
-424	val_424	2008-04-08	12
-12	val_12	2008-04-08	12
-396	val_396	2008-04-08	12
-201	val_201	2008-04-08	12
-217	val_217	2008-04-08	12
-164	val_164	2008-04-08	12
-431	val_431	2008-04-08	12
-454	val_454	2008-04-08	12
-478	val_478	2008-04-08	12
-298	val_298	2008-04-08	12
-125	val_125	2008-04-08	12
-431	val_431	2008-04-08	12
-164	val_164	2008-04-08	12
-424	val_424	2008-04-08	12
-187	val_187	2008-04-08	12
-382	val_382	2008-04-08	12
-5	val_5	2008-04-08	12
-70	val_70	2008-04-08	12
-397	val_397	2008-04-08	12
-480	val_480	2008-04-08	12
-291	val_291	2008-04-08	12
-24	val_24	2008-04-08	12
-351	val_351	2008-04-08	12
-255	val_255	2008-04-08	12
-104	val_104	2008-04-08	12
-70	val_70	2008-04-08	12
-163	val_163	2008-04-08	12
-438	val_438	2008-04-08	12
-119	val_119	2008-04-08	12
-414	val_414	2008-04-08	12
-200	val_200	2008-04-08	12
-491	val_491	2008-04-08	12
-237	val_237	2008-04-08	12
-439	val_439	2008-04-08	12
-360	val_360	2008-04-08	12
-248	val_248	2008-04-08	12
-479	val_479	2008-04-08	12
-305	val_305	2008-04-08	12
-417	val_417	2008-04-08	12
-199	val_199	2008-04-08	12
-444	val_444	2008-04-08	12
-120	val_120	2008-04-08	12
-429	val_429	2008-04-08	12
-169	val_169	2008-04-08	12
-443	val_443	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-277	val_277	2008-04-08	12
-230	val_230	2008-04-08	12
-478	val_478	2008-04-08	12
-178	val_178	2008-04-08	12
-468	val_468	2008-04-08	12
-310	val_310	2008-04-08	12
-317	val_317	2008-04-08	12
-333	val_333	2008-04-08	12
-493	val_493	2008-04-08	12
-460	val_460	2008-04-08	12
-207	val_207	2008-04-08	12
-249	val_249	2008-04-08	12
-265	val_265	2008-04-08	12
-480	val_480	2008-04-08	12
-83	val_83	2008-04-08	12
-136	val_136	2008-04-08	12
-353	val_353	2008-04-08	12
-172	val_172	2008-04-08	12
-214	val_214	2008-04-08	12
-462	val_462	2008-04-08	12
-233	val_233	2008-04-08	12
-406	val_406	2008-04-08	12
-133	val_133	2008-04-08	12
-175	val_175	2008-04-08	12
-189	val_189	2008-04-08	12
-454	val_454	2008-04-08	12
-375	val_375	2008-04-08	12
-401	val_401	2008-04-08	12
-421	val_421	2008-04-08	12
-407	val_407	2008-04-08	12
-384	val_384	2008-04-08	12
-256	val_256	2008-04-08	12
-26	val_26	2008-04-08	12
-134	val_134	2008-04-08	12
-67	val_67	2008-04-08	12
-384	val_384	2008-04-08	12
-379	val_379	2008-04-08	12
-18	val_18	2008-04-08	12
-462	val_462	2008-04-08	12
-492	val_492	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-9	val_9	2008-04-08	12
-341	val_341	2008-04-08	12
-498	val_498	2008-04-08	12
-146	val_146	2008-04-08	12
-458	val_458	2008-04-08	12
-362	val_362	2008-04-08	12
-186	val_186	2008-04-08	12
-285	val_285	2008-04-08	12
-348	val_348	2008-04-08	12
-167	val_167	2008-04-08	12
-18	val_18	2008-04-08	12
-273	val_273	2008-04-08	12
-183	val_183	2008-04-08	12
-281	val_281	2008-04-08	12
-344	val_344	2008-04-08	12
-97	val_97	2008-04-08	12
-469	val_469	2008-04-08	12
-315	val_315	2008-04-08	12
-84	val_84	2008-04-08	12
-28	val_28	2008-04-08	12
-37	val_37	2008-04-08	12
-448	val_448	2008-04-08	12
-152	val_152	2008-04-08	12
-348	val_348	2008-04-08	12
-307	val_307	2008-04-08	12
-194	val_194	2008-04-08	12
-414	val_414	2008-04-08	12
-477	val_477	2008-04-08	12
-222	val_222	2008-04-08	12
-126	val_126	2008-04-08	12
-90	val_90	2008-04-08	12
-169	val_169	2008-04-08	12
-403	val_403	2008-04-08	12
-400	val_400	2008-04-08	12
-200	val_200	2008-04-08	12
-97	val_97	2008-04-08	12
-238	val_238	2008-12-31	11
-86	val_86	2008-12-31	11
-311	val_311	2008-12-31	11
-27	val_27	2008-12-31	11
-165	val_165	2008-12-31	11
-409	val_409	2008-12-31	11
-255	val_255	2008-12-31	11
-278	val_278	2008-12-31	11
-98	val_98	2008-12-31	11
-484	val_484	2008-12-31	11
-265	val_265	2008-12-31	11
-193	val_193	2008-12-31	11
-401	val_401	2008-12-31	11
-150	val_150	2008-12-31	11
-273	val_273	2008-12-31	11
-224	val_224	2008-12-31	11
-369	val_369	2008-12-31	11
-66	val_66	2008-12-31	11
-128	val_128	2008-12-31	11
-213	val_213	2008-12-31	11
-146	val_146	2008-12-31	11
-406	val_406	2008-12-31	11
-429	val_429	2008-12-31	11
-374	val_374	2008-12-31	11
-152	val_152	2008-12-31	11
-469	val_469	2008-12-31	11
-145	val_145	2008-12-31	11
-495	val_495	2008-12-31	11
-37	val_37	2008-12-31	11
-327	val_327	2008-12-31	11
-281	val_281	2008-12-31	11
-277	val_277	2008-12-31	11
-209	val_209	2008-12-31	11
-15	val_15	2008-12-31	11
-82	val_82	2008-12-31	11
-403	val_403	2008-12-31	11
-166	val_166	2008-12-31	11
-417	val_417	2008-12-31	11
-430	val_430	2008-12-31	11
-252	val_252	2008-12-31	11
-292	val_292	2008-12-31	11
-219	val_219	2008-12-31	11
-287	val_287	2008-12-31	11
-153	val_153	2008-12-31	11
-193	val_193	2008-12-31	11
-338	val_338	2008-12-31	11
-446	val_446	2008-12-31	11
-459	val_459	2008-12-31	11
-394	val_394	2008-12-31	11
-237	val_237	2008-12-31	11
-482	val_482	2008-12-31	11
-174	val_174	2008-12-31	11
-413	val_413	2008-12-31	11
-494	val_494	2008-12-31	11
-207	val_207	2008-12-31	11
-199	val_199	2008-12-31	11
-466	val_466	2008-12-31	11
-208	val_208	2008-12-31	11
-174	val_174	2008-12-31	11
-399	val_399	2008-12-31	11
-396	val_396	2008-12-31	11
-247	val_247	2008-12-31	11
-417	val_417	2008-12-31	11
-489	val_489	2008-12-31	11
-162	val_162	2008-12-31	11
-377	val_377	2008-12-31	11
-397	val_397	2008-12-31	11
-309	val_309	2008-12-31	11
-365	val_365	2008-12-31	11
-266	val_266	2008-12-31	11
-439	val_439	2008-12-31	11
-342	val_342	2008-12-31	11
-367	val_367	2008-12-31	11
-325	val_325	2008-12-31	11
-167	val_167	2008-12-31	11
-195	val_195	2008-12-31	11
-475	val_475	2008-12-31	11
-17	val_17	2008-12-31	11
-113	val_113	2008-12-31	11
-155	val_155	2008-12-31	11
-203	val_203	2008-12-31	11
-339	val_339	2008-12-31	11
-0	val_0	2008-12-31	11
-455	val_455	2008-12-31	11
-128	val_128	2008-12-31	11
-311	val_311	2008-12-31	11
-316	val_316	2008-12-31	11
-57	val_57	2008-12-31	11
-302	val_302	2008-12-31	11
-205	val_205	2008-12-31	11
-149	val_149	2008-12-31	11
-438	val_438	2008-12-31	11
-345	val_345	2008-12-31	11
-129	val_129	2008-12-31	11
-170	val_170	2008-12-31	11
-20	val_20	2008-12-31	11
-489	val_489	2008-12-31	11
-157	val_157	2008-12-31	11
-378	val_378	2008-12-31	11
-221	val_221	2008-12-31	11
-92	val_92	2008-12-31	11
-111	val_111	2008-12-31	11
-47	val_47	2008-12-31	11
-72	val_72	2008-12-31	11
-4	val_4	2008-12-31	11
-280	val_280	2008-12-31	11
-35	val_35	2008-12-31	11
-427	val_427	2008-12-31	11
-277	val_277	2008-12-31	11
-208	val_208	2008-12-31	11
-356	val_356	2008-12-31	11
-399	val_399	2008-12-31	11
-169	val_169	2008-12-31	11
-382	val_382	2008-12-31	11
-498	val_498	2008-12-31	11
-125	val_125	2008-12-31	11
-386	val_386	2008-12-31	11
-437	val_437	2008-12-31	11
-469	val_469	2008-12-31	11
-192	val_192	2008-12-31	11
-286	val_286	2008-12-31	11
-187	val_187	2008-12-31	11
-176	val_176	2008-12-31	11
-54	val_54	2008-12-31	11
-459	val_459	2008-12-31	11
-51	val_51	2008-12-31	11
-138	val_138	2008-12-31	11
-103	val_103	2008-12-31	11
-239	val_239	2008-12-31	11
-213	val_213	2008-12-31	11
-216	val_216	2008-12-31	11
-430	val_430	2008-12-31	11
-278	val_278	2008-12-31	11
-176	val_176	2008-12-31	11
-289	val_289	2008-12-31	11
-221	val_221	2008-12-31	11
-65	val_65	2008-12-31	11
-318	val_318	2008-12-31	11
-332	val_332	2008-12-31	11
-311	val_311	2008-12-31	11
-275	val_275	2008-12-31	11
-137	val_137	2008-12-31	11
-241	val_241	2008-12-31	11
-83	val_83	2008-12-31	11
-333	val_333	2008-12-31	11
-180	val_180	2008-12-31	11
-284	val_284	2008-12-31	11
-12	val_12	2008-12-31	11
-230	val_230	2008-12-31	11
-181	val_181	2008-12-31	11
-67	val_67	2008-12-31	11
-260	val_260	2008-12-31	11
-404	val_404	2008-12-31	11
-384	val_384	2008-12-31	11
-489	val_489	2008-12-31	11
-353	val_353	2008-12-31	11
-373	val_373	2008-12-31	11
-272	val_272	2008-12-31	11
-138	val_138	2008-12-31	11
-217	val_217	2008-12-31	11
-84	val_84	2008-12-31	11
-348	val_348	2008-12-31	11
-466	val_466	2008-12-31	11
-58	val_58	2008-12-31	11
-8	val_8	2008-12-31	11
-411	val_411	2008-12-31	11
-230	val_230	2008-12-31	11
-208	val_208	2008-12-31	11
-348	val_348	2008-12-31	11
-24	val_24	2008-12-31	11
-463	val_463	2008-12-31	11
-431	val_431	2008-12-31	11
-179	val_179	2008-12-31	11
-172	val_172	2008-12-31	11
-42	val_42	2008-12-31	11
-129	val_129	2008-12-31	11
-158	val_158	2008-12-31	11
-119	val_119	2008-12-31	11
-496	val_496	2008-12-31	11
-0	val_0	2008-12-31	11
-322	val_322	2008-12-31	11
-197	val_197	2008-12-31	11
-468	val_468	2008-12-31	11
-393	val_393	2008-12-31	11
-454	val_454	2008-12-31	11
-100	val_100	2008-12-31	11
-298	val_298	2008-12-31	11
-199	val_199	2008-12-31	11
-191	val_191	2008-12-31	11
-418	val_418	2008-12-31	11
-96	val_96	2008-12-31	11
-26	val_26	2008-12-31	11
-165	val_165	2008-12-31	11
-327	val_327	2008-12-31	11
-230	val_230	2008-12-31	11
-205	val_205	2008-12-31	11
-120	val_120	2008-12-31	11
-131	val_131	2008-12-31	11
-51	val_51	2008-12-31	11
-404	val_404	2008-12-31	11
-43	val_43	2008-12-31	11
-436	val_436	2008-12-31	11
-156	val_156	2008-12-31	11
-469	val_469	2008-12-31	11
-468	val_468	2008-12-31	11
-308	val_308	2008-12-31	11
-95	val_95	2008-12-31	11
-196	val_196	2008-12-31	11
-288	val_288	2008-12-31	11
-481	val_481	2008-12-31	11
-457	val_457	2008-12-31	11
-98	val_98	2008-12-31	11
-282	val_282	2008-12-31	11
-197	val_197	2008-12-31	11
-187	val_187	2008-12-31	11
-318	val_318	2008-12-31	11
-318	val_318	2008-12-31	11
-409	val_409	2008-12-31	11
-470	val_470	2008-12-31	11
-137	val_137	2008-12-31	11
-369	val_369	2008-12-31	11
-316	val_316	2008-12-31	11
-169	val_169	2008-12-31	11
-413	val_413	2008-12-31	11
-85	val_85	2008-12-31	11
-77	val_77	2008-12-31	11
-0	val_0	2008-12-31	11
-490	val_490	2008-12-31	11
-87	val_87	2008-12-31	11
-364	val_364	2008-12-31	11
-179	val_179	2008-12-31	11
-118	val_118	2008-12-31	11
-134	val_134	2008-12-31	11
-395	val_395	2008-12-31	11
-282	val_282	2008-12-31	11
-138	val_138	2008-12-31	11
-238	val_238	2008-12-31	11
-419	val_419	2008-12-31	11
-15	val_15	2008-12-31	11
-118	val_118	2008-12-31	11
-72	val_72	2008-12-31	11
-90	val_90	2008-12-31	11
-307	val_307	2008-12-31	11
-19	val_19	2008-12-31	11
-435	val_435	2008-12-31	11
-10	val_10	2008-12-31	11
-277	val_277	2008-12-31	11
-273	val_273	2008-12-31	11
-306	val_306	2008-12-31	11
-224	val_224	2008-12-31	11
-309	val_309	2008-12-31	11
-389	val_389	2008-12-31	11
-327	val_327	2008-12-31	11
-242	val_242	2008-12-31	11
-369	val_369	2008-12-31	11
-392	val_392	2008-12-31	11
-272	val_272	2008-12-31	11
-331	val_331	2008-12-31	11
-401	val_401	2008-12-31	11
-242	val_242	2008-12-31	11
-452	val_452	2008-12-31	11
-177	val_177	2008-12-31	11
-226	val_226	2008-12-31	11
-5	val_5	2008-12-31	11
-497	val_497	2008-12-31	11
-402	val_402	2008-12-31	11
-396	val_396	2008-12-31	11
-317	val_317	2008-12-31	11
-395	val_395	2008-12-31	11
-58	val_58	2008-12-31	11
-35	val_35	2008-12-31	11
-336	val_336	2008-12-31	11
-95	val_95	2008-12-31	11
-11	val_11	2008-12-31	11
-168	val_168	2008-12-31	11
-34	val_34	2008-12-31	11
-229	val_229	2008-12-31	11
-233	val_233	2008-12-31	11
-143	val_143	2008-12-31	11
-472	val_472	2008-12-31	11
-322	val_322	2008-12-31	11
-498	val_498	2008-12-31	11
-160	val_160	2008-12-31	11
-195	val_195	2008-12-31	11
-42	val_42	2008-12-31	11
-321	val_321	2008-12-31	11
-430	val_430	2008-12-31	11
-119	val_119	2008-12-31	11
-489	val_489	2008-12-31	11
-458	val_458	2008-12-31	11
-78	val_78	2008-12-31	11
-76	val_76	2008-12-31	11
-41	val_41	2008-12-31	11
-223	val_223	2008-12-31	11
-492	val_492	2008-12-31	11
-149	val_149	2008-12-31	11
-449	val_449	2008-12-31	11
-218	val_218	2008-12-31	11
-228	val_228	2008-12-31	11
-138	val_138	2008-12-31	11
-453	val_453	2008-12-31	11
-30	val_30	2008-12-31	11
-209	val_209	2008-12-31	11
-64	val_64	2008-12-31	11
-468	val_468	2008-12-31	11
-76	val_76	2008-12-31	11
-74	val_74	2008-12-31	11
-342	val_342	2008-12-31	11
-69	val_69	2008-12-31	11
-230	val_230	2008-12-31	11
-33	val_33	2008-12-31	11
-368	val_368	2008-12-31	11
-103	val_103	2008-12-31	11
-296	val_296	2008-12-31	11
-113	val_113	2008-12-31	11
-216	val_216	2008-12-31	11
-367	val_367	2008-12-31	11
-344	val_344	2008-12-31	11
-167	val_167	2008-12-31	11
-274	val_274	2008-12-31	11
-219	val_219	2008-12-31	11
-239	val_239	2008-12-31	11
-485	val_485	2008-12-31	11
-116	val_116	2008-12-31	11
-223	val_223	2008-12-31	11
-256	val_256	2008-12-31	11
-263	val_263	2008-12-31	11
-70	val_70	2008-12-31	11
-487	val_487	2008-12-31	11
-480	val_480	2008-12-31	11
-401	val_401	2008-12-31	11
-288	val_288	2008-12-31	11
-191	val_191	2008-12-31	11
-5	val_5	2008-12-31	11
-244	val_244	2008-12-31	11
-438	val_438	2008-12-31	11
-128	val_128	2008-12-31	11
-467	val_467	2008-12-31	11
-432	val_432	2008-12-31	11
-202	val_202	2008-12-31	11
-316	val_316	2008-12-31	11
-229	val_229	2008-12-31	11
-469	val_469	2008-12-31	11
-463	val_463	2008-12-31	11
-280	val_280	2008-12-31	11
-2	val_2	2008-12-31	11
-35	val_35	2008-12-31	11
-283	val_283	2008-12-31	11
-331	val_331	2008-12-31	11
-235	val_235	2008-12-31	11
-80	val_80	2008-12-31	11
-44	val_44	2008-12-31	11
-193	val_193	2008-12-31	11
-321	val_321	2008-12-31	11
-335	val_335	2008-12-31	11
-104	val_104	2008-12-31	11
-466	val_466	2008-12-31	11
-366	val_366	2008-12-31	11
-175	val_175	2008-12-31	11
-403	val_403	2008-12-31	11
-483	val_483	2008-12-31	11
-53	val_53	2008-12-31	11
-105	val_105	2008-12-31	11
-257	val_257	2008-12-31	11
-406	val_406	2008-12-31	11
-409	val_409	2008-12-31	11
-190	val_190	2008-12-31	11
-406	val_406	2008-12-31	11
-401	val_401	2008-12-31	11
-114	val_114	2008-12-31	11
-258	val_258	2008-12-31	11
-90	val_90	2008-12-31	11
-203	val_203	2008-12-31	11
-262	val_262	2008-12-31	11
-348	val_348	2008-12-31	11
-424	val_424	2008-12-31	11
-12	val_12	2008-12-31	11
-396	val_396	2008-12-31	11
-201	val_201	2008-12-31	11
-217	val_217	2008-12-31	11
-164	val_164	2008-12-31	11
-431	val_431	2008-12-31	11
-454	val_454	2008-12-31	11
-478	val_478	2008-12-31	11
-298	val_298	2008-12-31	11
-125	val_125	2008-12-31	11
-431	val_431	2008-12-31	11
-164	val_164	2008-12-31	11
-424	val_424	2008-12-31	11
-187	val_187	2008-12-31	11
-382	val_382	2008-12-31	11
-5	val_5	2008-12-31	11
-70	val_70	2008-12-31	11
-397	val_397	2008-12-31	11
-480	val_480	2008-12-31	11
-291	val_291	2008-12-31	11
-24	val_24	2008-12-31	11
-351	val_351	2008-12-31	11
-255	val_255	2008-12-31	11
-104	val_104	2008-12-31	11
-70	val_70	2008-12-31	11
-163	val_163	2008-12-31	11
-438	val_438	2008-12-31	11
-119	val_119	2008-12-31	11
-414	val_414	2008-12-31	11
-200	val_200	2008-12-31	11
-491	val_491	2008-12-31	11
-237	val_237	2008-12-31	11
-439	val_439	2008-12-31	11
-360	val_360	2008-12-31	11
-248	val_248	2008-12-31	11
-479	val_479	2008-12-31	11
-305	val_305	2008-12-31	11
-417	val_417	2008-12-31	11
-199	val_199	2008-12-31	11
-444	val_444	2008-12-31	11
-120	val_120	2008-12-31	11
-429	val_429	2008-12-31	11
-169	val_169	2008-12-31	11
-443	val_443	2008-12-31	11
-323	val_323	2008-12-31	11
-325	val_325	2008-12-31	11
-277	val_277	2008-12-31	11
-230	val_230	2008-12-31	11
-478	val_478	2008-12-31	11
-178	val_178	2008-12-31	11
-468	val_468	2008-12-31	11
-310	val_310	2008-12-31	11
-317	val_317	2008-12-31	11
-333	val_333	2008-12-31	11
-493	val_493	2008-12-31	11
-460	val_460	2008-12-31	11
-207	val_207	2008-12-31	11
-249	val_249	2008-12-31	11
-265	val_265	2008-12-31	11
-480	val_480	2008-12-31	11
-83	val_83	2008-12-31	11
-136	val_136	2008-12-31	11
-353	val_353	2008-12-31	11
-172	val_172	2008-12-31	11
-214	val_214	2008-12-31	11
-462	val_462	2008-12-31	11
-233	val_233	2008-12-31	11
-406	val_406	2008-12-31	11
-133	val_133	2008-12-31	11
-175	val_175	2008-12-31	11
-189	val_189	2008-12-31	11
-454	val_454	2008-12-31	11
-375	val_375	2008-12-31	11
-401	val_401	2008-12-31	11
-421	val_421	2008-12-31	11
-407	val_407	2008-12-31	11
-384	val_384	2008-12-31	11
-256	val_256	2008-12-31	11
-26	val_26	2008-12-31	11
-134	val_134	2008-12-31	11
-67	val_67	2008-12-31	11
-384	val_384	2008-12-31	11
-379	val_379	2008-12-31	11
-18	val_18	2008-12-31	11
-462	val_462	2008-12-31	11
-492	val_492	2008-12-31	11
-100	val_100	2008-12-31	11
-298	val_298	2008-12-31	11
-9	val_9	2008-12-31	11
-341	val_341	2008-12-31	11
-498	val_498	2008-12-31	11
-146	val_146	2008-12-31	11
-458	val_458	2008-12-31	11
-362	val_362	2008-12-31	11
-186	val_186	2008-12-31	11
-285	val_285	2008-12-31	11
-348	val_348	2008-12-31	11
-167	val_167	2008-12-31	11
-18	val_18	2008-12-31	11
-273	val_273	2008-12-31	11
-183	val_183	2008-12-31	11
-281	val_281	2008-12-31	11
-344	val_344	2008-12-31	11
-97	val_97	2008-12-31	11
-469	val_469	2008-12-31	11
-315	val_315	2008-12-31	11
-84	val_84	2008-12-31	11
-28	val_28	2008-12-31	11
-37	val_37	2008-12-31	11
-448	val_448	2008-12-31	11
-152	val_152	2008-12-31	11
-348	val_348	2008-12-31	11
-307	val_307	2008-12-31	11
-194	val_194	2008-12-31	11
-414	val_414	2008-12-31	11
-477	val_477	2008-12-31	11
-222	val_222	2008-12-31	11
-126	val_126	2008-12-31	11
-90	val_90	2008-12-31	11
-169	val_169	2008-12-31	11
-403	val_403	2008-12-31	11
-400	val_400	2008-12-31	11
-200	val_200	2008-12-31	11
-97	val_97	2008-12-31	11
-238	val_238	2008-12-31	12
-86	val_86	2008-12-31	12
-311	val_311	2008-12-31	12
-27	val_27	2008-12-31	12
-165	val_165	2008-12-31	12
-409	val_409	2008-12-31	12
-255	val_255	2008-12-31	12
-278	val_278	2008-12-31	12
-98	val_98	2008-12-31	12
-484	val_484	2008-12-31	12
-265	val_265	2008-12-31	12
-193	val_193	2008-12-31	12
-401	val_401	2008-12-31	12
-150	val_150	2008-12-31	12
-273	val_273	2008-12-31	12
-224	val_224	2008-12-31	12
-369	val_369	2008-12-31	12
-66	val_66	2008-12-31	12
-128	val_128	2008-12-31	12
-213	val_213	2008-12-31	12
-146	val_146	2008-12-31	12
-406	val_406	2008-12-31	12
-429	val_429	2008-12-31	12
-374	val_374	2008-12-31	12
-152	val_152	2008-12-31	12
-469	val_469	2008-12-31	12
-145	val_145	2008-12-31	12
-495	val_495	2008-12-31	12
-37	val_37	2008-12-31	12
-327	val_327	2008-12-31	12
-281	val_281	2008-12-31	12
-277	val_277	2008-12-31	12
-209	val_209	2008-12-31	12
-15	val_15	2008-12-31	12
-82	val_82	2008-12-31	12
-403	val_403	2008-12-31	12
-166	val_166	2008-12-31	12
-417	val_417	2008-12-31	12
-430	val_430	2008-12-31	12
-252	val_252	2008-12-31	12
-292	val_292	2008-12-31	12
-219	val_219	2008-12-31	12
-287	val_287	2008-12-31	12
-153	val_153	2008-12-31	12
-193	val_193	2008-12-31	12
-338	val_338	2008-12-31	12
-446	val_446	2008-12-31	12
-459	val_459	2008-12-31	12
-394	val_394	2008-12-31	12
-237	val_237	2008-12-31	12
-482	val_482	2008-12-31	12
-174	val_174	2008-12-31	12
-413	val_413	2008-12-31	12
-494	val_494	2008-12-31	12
-207	val_207	2008-12-31	12
-199	val_199	2008-12-31	12
-466	val_466	2008-12-31	12
-208	val_208	2008-12-31	12
-174	val_174	2008-12-31	12
-399	val_399	2008-12-31	12
-396	val_396	2008-12-31	12
-247	val_247	2008-12-31	12
-417	val_417	2008-12-31	12
-489	val_489	2008-12-31	12
-162	val_162	2008-12-31	12
-377	val_377	2008-12-31	12
-397	val_397	2008-12-31	12
-309	val_309	2008-12-31	12
-365	val_365	2008-12-31	12
-266	val_266	2008-12-31	12
-439	val_439	2008-12-31	12
-342	val_342	2008-12-31	12
-367	val_367	2008-12-31	12
-325	val_325	2008-12-31	12
-167	val_167	2008-12-31	12
-195	val_195	2008-12-31	12
-475	val_475	2008-12-31	12
-17	val_17	2008-12-31	12
-113	val_113	2008-12-31	12
-155	val_155	2008-12-31	12
-203	val_203	2008-12-31	12
-339	val_339	2008-12-31	12
-0	val_0	2008-12-31	12
-455	val_455	2008-12-31	12
-128	val_128	2008-12-31	12
-311	val_311	2008-12-31	12
-316	val_316	2008-12-31	12
-57	val_57	2008-12-31	12
-302	val_302	2008-12-31	12
-205	val_205	2008-12-31	12
-149	val_149	2008-12-31	12
-438	val_438	2008-12-31	12
-345	val_345	2008-12-31	12
-129	val_129	2008-12-31	12
-170	val_170	2008-12-31	12
-20	val_20	2008-12-31	12
-489	val_489	2008-12-31	12
-157	val_157	2008-12-31	12
-378	val_378	2008-12-31	12
-221	val_221	2008-12-31	12
-92	val_92	2008-12-31	12
-111	val_111	2008-12-31	12
-47	val_47	2008-12-31	12
-72	val_72	2008-12-31	12
-4	val_4	2008-12-31	12
-280	val_280	2008-12-31	12
-35	val_35	2008-12-31	12
-427	val_427	2008-12-31	12
-277	val_277	2008-12-31	12
-208	val_208	2008-12-31	12
-356	val_356	2008-12-31	12
-399	val_399	2008-12-31	12
-169	val_169	2008-12-31	12
-382	val_382	2008-12-31	12
-498	val_498	2008-12-31	12
-125	val_125	2008-12-31	12
-386	val_386	2008-12-31	12
-437	val_437	2008-12-31	12
-469	val_469	2008-12-31	12
-192	val_192	2008-12-31	12
-286	val_286	2008-12-31	12
-187	val_187	2008-12-31	12
-176	val_176	2008-12-31	12
-54	val_54	2008-12-31	12
-459	val_459	2008-12-31	12
-51	val_51	2008-12-31	12
-138	val_138	2008-12-31	12
-103	val_103	2008-12-31	12
-239	val_239	2008-12-31	12
-213	val_213	2008-12-31	12
-216	val_216	2008-12-31	12
-430	val_430	2008-12-31	12
-278	val_278	2008-12-31	12
-176	val_176	2008-12-31	12
-289	val_289	2008-12-31	12
-221	val_221	2008-12-31	12
-65	val_65	2008-12-31	12
-318	val_318	2008-12-31	12
-332	val_332	2008-12-31	12
-311	val_311	2008-12-31	12
-275	val_275	2008-12-31	12
-137	val_137	2008-12-31	12
-241	val_241	2008-12-31	12
-83	val_83	2008-12-31	12
-333	val_333	2008-12-31	12
-180	val_180	2008-12-31	12
-284	val_284	2008-12-31	12
-12	val_12	2008-12-31	12
-230	val_230	2008-12-31	12
-181	val_181	2008-12-31	12
-67	val_67	2008-12-31	12
-260	val_260	2008-12-31	12
-404	val_404	2008-12-31	12
-384	val_384	2008-12-31	12
-489	val_489	2008-12-31	12
-353	val_353	2008-12-31	12
-373	val_373	2008-12-31	12
-272	val_272	2008-12-31	12
-138	val_138	2008-12-31	12
-217	val_217	2008-12-31	12
-84	val_84	2008-12-31	12
-348	val_348	2008-12-31	12
-466	val_466	2008-12-31	12
-58	val_58	2008-12-31	12
-8	val_8	2008-12-31	12
-411	val_411	2008-12-31	12
-230	val_230	2008-12-31	12
-208	val_208	2008-12-31	12
-348	val_348	2008-12-31	12
-24	val_24	2008-12-31	12
-463	val_463	2008-12-31	12
-431	val_431	2008-12-31	12
-179	val_179	2008-12-31	12
-172	val_172	2008-12-31	12
-42	val_42	2008-12-31	12
-129	val_129	2008-12-31	12
-158	val_158	2008-12-31	12
-119	val_119	2008-12-31	12
-496	val_496	2008-12-31	12
-0	val_0	2008-12-31	12
-322	val_322	2008-12-31	12
-197	val_197	2008-12-31	12
-468	val_468	2008-12-31	12
-393	val_393	2008-12-31	12
-454	val_454	2008-12-31	12
-100	val_100	2008-12-31	12
-298	val_298	2008-12-31	12
-199	val_199	2008-12-31	12
-191	val_191	2008-12-31	12
-418	val_418	2008-12-31	12
-96	val_96	2008-12-31	12
-26	val_26	2008-12-31	12
-165	val_165	2008-12-31	12
-327	val_327	2008-12-31	12
-230	val_230	2008-12-31	12
-205	val_205	2008-12-31	12
-120	val_120	2008-12-31	12
-131	val_131	2008-12-31	12
-51	val_51	2008-12-31	12
-404	val_404	2008-12-31	12
-43	val_43	2008-12-31	12
-436	val_436	2008-12-31	12
-156	val_156	2008-12-31	12
-469	val_469	2008-12-31	12
-468	val_468	2008-12-31	12
-308	val_308	2008-12-31	12
-95	val_95	2008-12-31	12
-196	val_196	2008-12-31	12
-288	val_288	2008-12-31	12
-481	val_481	2008-12-31	12
-457	val_457	2008-12-31	12
-98	val_98	2008-12-31	12
-282	val_282	2008-12-31	12
-197	val_197	2008-12-31	12
-187	val_187	2008-12-31	12
-318	val_318	2008-12-31	12
-318	val_318	2008-12-31	12
-409	val_409	2008-12-31	12
-470	val_470	2008-12-31	12
-137	val_137	2008-12-31	12
-369	val_369	2008-12-31	12
-316	val_316	2008-12-31	12
-169	val_169	2008-12-31	12
-413	val_413	2008-12-31	12
-85	val_85	2008-12-31	12
-77	val_77	2008-12-31	12
-0	val_0	2008-12-31	12
-490	val_490	2008-12-31	12
-87	val_87	2008-12-31	12
-364	val_364	2008-12-31	12
-179	val_179	2008-12-31	12
-118	val_118	2008-12-31	12
-134	val_134	2008-12-31	12
-395	val_395	2008-12-31	12
-282	val_282	2008-12-31	12
-138	val_138	2008-12-31	12
-238	val_238	2008-12-31	12
-419	val_419	2008-12-31	12
-15	val_15	2008-12-31	12
-118	val_118	2008-12-31	12
-72	val_72	2008-12-31	12
-90	val_90	2008-12-31	12
-307	val_307	2008-12-31	12
-19	val_19	2008-12-31	12
-435	val_435	2008-12-31	12
-10	val_10	2008-12-31	12
-277	val_277	2008-12-31	12
-273	val_273	2008-12-31	12
-306	val_306	2008-12-31	12
-224	val_224	2008-12-31	12
-309	val_309	2008-12-31	12
-389	val_389	2008-12-31	12
-327	val_327	2008-12-31	12
-242	val_242	2008-12-31	12
-369	val_369	2008-12-31	12
-392	val_392	2008-12-31	12
-272	val_272	2008-12-31	12
-331	val_331	2008-12-31	12
-401	val_401	2008-12-31	12
-242	val_242	2008-12-31	12
-452	val_452	2008-12-31	12
-177	val_177	2008-12-31	12
-226	val_226	2008-12-31	12
-5	val_5	2008-12-31	12
-497	val_497	2008-12-31	12
-402	val_402	2008-12-31	12
-396	val_396	2008-12-31	12
-317	val_317	2008-12-31	12
-395	val_395	2008-12-31	12
-58	val_58	2008-12-31	12
-35	val_35	2008-12-31	12
-336	val_336	2008-12-31	12
-95	val_95	2008-12-31	12
-11	val_11	2008-12-31	12
-168	val_168	2008-12-31	12
-34	val_34	2008-12-31	12
-229	val_229	2008-12-31	12
-233	val_233	2008-12-31	12
-143	val_143	2008-12-31	12
-472	val_472	2008-12-31	12
-322	val_322	2008-12-31	12
-498	val_498	2008-12-31	12
-160	val_160	2008-12-31	12
-195	val_195	2008-12-31	12
-42	val_42	2008-12-31	12
-321	val_321	2008-12-31	12
-430	val_430	2008-12-31	12
-119	val_119	2008-12-31	12
-489	val_489	2008-12-31	12
-458	val_458	2008-12-31	12
-78	val_78	2008-12-31	12
-76	val_76	2008-12-31	12
-41	val_41	2008-12-31	12
-223	val_223	2008-12-31	12
-492	val_492	2008-12-31	12
-149	val_149	2008-12-31	12
-449	val_449	2008-12-31	12
-218	val_218	2008-12-31	12
-228	val_228	2008-12-31	12
-138	val_138	2008-12-31	12
-453	val_453	2008-12-31	12
-30	val_30	2008-12-31	12
-209	val_209	2008-12-31	12
-64	val_64	2008-12-31	12
-468	val_468	2008-12-31	12
-76	val_76	2008-12-31	12
-74	val_74	2008-12-31	12
-342	val_342	2008-12-31	12
-69	val_69	2008-12-31	12
-230	val_230	2008-12-31	12
-33	val_33	2008-12-31	12
-368	val_368	2008-12-31	12
-103	val_103	2008-12-31	12
-296	val_296	2008-12-31	12
-113	val_113	2008-12-31	12
-216	val_216	2008-12-31	12
-367	val_367	2008-12-31	12
-344	val_344	2008-12-31	12
-167	val_167	2008-12-31	12
-274	val_274	2008-12-31	12
-219	val_219	2008-12-31	12
-239	val_239	2008-12-31	12
-485	val_485	2008-12-31	12
-116	val_116	2008-12-31	12
-223	val_223	2008-12-31	12
-256	val_256	2008-12-31	12
-263	val_263	2008-12-31	12
-70	val_70	2008-12-31	12
-487	val_487	2008-12-31	12
-480	val_480	2008-12-31	12
-401	val_401	2008-12-31	12
-288	val_288	2008-12-31	12
-191	val_191	2008-12-31	12
-5	val_5	2008-12-31	12
-244	val_244	2008-12-31	12
-438	val_438	2008-12-31	12
-128	val_128	2008-12-31	12
-467	val_467	2008-12-31	12
-432	val_432	2008-12-31	12
-202	val_202	2008-12-31	12
-316	val_316	2008-12-31	12
-229	val_229	2008-12-31	12
-469	val_469	2008-12-31	12
-463	val_463	2008-12-31	12
-280	val_280	2008-12-31	12
-2	val_2	2008-12-31	12
-35	val_35	2008-12-31	12
-283	val_283	2008-12-31	12
-331	val_331	2008-12-31	12
-235	val_235	2008-12-31	12
-80	val_80	2008-12-31	12
-44	val_44	2008-12-31	12
-193	val_193	2008-12-31	12
-321	val_321	2008-12-31	12
-335	val_335	2008-12-31	12
-104	val_104	2008-12-31	12
-466	val_466	2008-12-31	12
-366	val_366	2008-12-31	12
-175	val_175	2008-12-31	12
-403	val_403	2008-12-31	12
-483	val_483	2008-12-31	12
-53	val_53	2008-12-31	12
-105	val_105	2008-12-31	12
-257	val_257	2008-12-31	12
-406	val_406	2008-12-31	12
-409	val_409	2008-12-31	12
-190	val_190	2008-12-31	12
-406	val_406	2008-12-31	12
-401	val_401	2008-12-31	12
-114	val_114	2008-12-31	12
-258	val_258	2008-12-31	12
-90	val_90	2008-12-31	12
-203	val_203	2008-12-31	12
-262	val_262	2008-12-31	12
-348	val_348	2008-12-31	12
-424	val_424	2008-12-31	12
-12	val_12	2008-12-31	12
-396	val_396	2008-12-31	12
-201	val_201	2008-12-31	12
-217	val_217	2008-12-31	12
-164	val_164	2008-12-31	12
-431	val_431	2008-12-31	12
-454	val_454	2008-12-31	12
-478	val_478	2008-12-31	12
-298	val_298	2008-12-31	12
-125	val_125	2008-12-31	12
-431	val_431	2008-12-31	12
-164	val_164	2008-12-31	12
-424	val_424	2008-12-31	12
-187	val_187	2008-12-31	12
-382	val_382	2008-12-31	12
-5	val_5	2008-12-31	12
-70	val_70	2008-12-31	12
-397	val_397	2008-12-31	12
-480	val_480	2008-12-31	12
-291	val_291	2008-12-31	12
-24	val_24	2008-12-31	12
-351	val_351	2008-12-31	12
-255	val_255	2008-12-31	12
-104	val_104	2008-12-31	12
-70	val_70	2008-12-31	12
-163	val_163	2008-12-31	12
-438	val_438	2008-12-31	12
-119	val_119	2008-12-31	12
-414	val_414	2008-12-31	12
-200	val_200	2008-12-31	12
-491	val_491	2008-12-31	12
-237	val_237	2008-12-31	12
-439	val_439	2008-12-31	12
-360	val_360	2008-12-31	12
-248	val_248	2008-12-31	12
-479	val_479	2008-12-31	12
-305	val_305	2008-12-31	12
-417	val_417	2008-12-31	12
-199	val_199	2008-12-31	12
-444	val_444	2008-12-31	12
-120	val_120	2008-12-31	12
-429	val_429	2008-12-31	12
-169	val_169	2008-12-31	12
-443	val_443	2008-12-31	12
-323	val_323	2008-12-31	12
-325	val_325	2008-12-31	12
-277	val_277	2008-12-31	12
-230	val_230	2008-12-31	12
-478	val_478	2008-12-31	12
-178	val_178	2008-12-31	12
-468	val_468	2008-12-31	12
-310	val_310	2008-12-31	12
-317	val_317	2008-12-31	12
-333	val_333	2008-12-31	12
-493	val_493	2008-12-31	12
-460	val_460	2008-12-31	12
-207	val_207	2008-12-31	12
-249	val_249	2008-12-31	12
-265	val_265	2008-12-31	12
-480	val_480	2008-12-31	12
-83	val_83	2008-12-31	12
-136	val_136	2008-12-31	12
-353	val_353	2008-12-31	12
-172	val_172	2008-12-31	12
-214	val_214	2008-12-31	12
-462	val_462	2008-12-31	12
-233	val_233	2008-12-31	12
-406	val_406	2008-12-31	12
-133	val_133	2008-12-31	12
-175	val_175	2008-12-31	12
-189	val_189	2008-12-31	12
-454	val_454	2008-12-31	12
-375	val_375	2008-12-31	12
-401	val_401	2008-12-31	12
-421	val_421	2008-12-31	12
-407	val_407	2008-12-31	12
-384	val_384	2008-12-31	12
-256	val_256	2008-12-31	12
-26	val_26	2008-12-31	12
-134	val_134	2008-12-31	12
-67	val_67	2008-12-31	12
-384	val_384	2008-12-31	12
-379	val_379	2008-12-31	12
-18	val_18	2008-12-31	12
-462	val_462	2008-12-31	12
-492	val_492	2008-12-31	12
-100	val_100	2008-12-31	12
-298	val_298	2008-12-31	12
-9	val_9	2008-12-31	12
-341	val_341	2008-12-31	12
-498	val_498	2008-12-31	12
-146	val_146	2008-12-31	12
-458	val_458	2008-12-31	12
-362	val_362	2008-12-31	12
-186	val_186	2008-12-31	12
-285	val_285	2008-12-31	12
-348	val_348	2008-12-31	12
-167	val_167	2008-12-31	12
-18	val_18	2008-12-31	12
-273	val_273	2008-12-31	12
-183	val_183	2008-12-31	12
-281	val_281	2008-12-31	12
-344	val_344	2008-12-31	12
-97	val_97	2008-12-31	12
-469	val_469	2008-12-31	12
-315	val_315	2008-12-31	12
-84	val_84	2008-12-31	12
-28	val_28	2008-12-31	12
-37	val_37	2008-12-31	12
-448	val_448	2008-12-31	12
-152	val_152	2008-12-31	12
-348	val_348	2008-12-31	12
-307	val_307	2008-12-31	12
-194	val_194	2008-12-31	12
-414	val_414	2008-12-31	12
-477	val_477	2008-12-31	12
-222	val_222	2008-12-31	12
-126	val_126	2008-12-31	12
-90	val_90	2008-12-31	12
-169	val_169	2008-12-31	12
-403	val_403	2008-12-31	12
-400	val_400	2008-12-31	12
-200	val_200	2008-12-31	12
-97	val_97	2008-12-31	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part9-2-748ac33315295f8e55f2480f7714c27a b/sql/hive/src/test/resources/golden/load_dyn_part9-2-748ac33315295f8e55f2480f7714c27a
index 267cb634d6bd3..1240429107ddb 100644
--- a/sql/hive/src/test/resources/golden/load_dyn_part9-2-748ac33315295f8e55f2480f7714c27a
+++ b/sql/hive/src/test/resources/golden/load_dyn_part9-2-748ac33315295f8e55f2480f7714c27a
@@ -1,12 +1,12 @@
-key                 	int                 	None                
-value               	string              	None                
-ds                  	string              	None                
-hr                  	string              	None                
+key                 	int                 	                    
+value               	string              	                    
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
-hr                  	string              	None                
+ds                  	string              	                    
+hr                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:nzhang_part9, dbName:default, owner:marmbrus, createTime:1390899685, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part9, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{p3=v3, transient_lastDdlTime=1390899685}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:nzhang_part9, dbName:default, owner:marmbrus, createTime:1413887729, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/nzhang_part9, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null), FieldSchema(name:hr, type:string, comment:null)], parameters:{transient_lastDdlTime=1413887729}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part9-9-6954f5fc8dd82ca2c076ab8bcdbc148 b/sql/hive/src/test/resources/golden/load_dyn_part9-9-6954f5fc8dd82ca2c076ab8bcdbc148
new file mode 100644
index 0000000000000..04b36182974f9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/load_dyn_part9-9-6954f5fc8dd82ca2c076ab8bcdbc148
@@ -0,0 +1,1000 @@
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+0	val_0	2008-04-08	11
+2	val_2	2008-04-08	11
+4	val_4	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+5	val_5	2008-04-08	11
+8	val_8	2008-04-08	11
+9	val_9	2008-04-08	11
+10	val_10	2008-04-08	11
+11	val_11	2008-04-08	11
+12	val_12	2008-04-08	11
+12	val_12	2008-04-08	11
+15	val_15	2008-04-08	11
+15	val_15	2008-04-08	11
+17	val_17	2008-04-08	11
+18	val_18	2008-04-08	11
+18	val_18	2008-04-08	11
+19	val_19	2008-04-08	11
+20	val_20	2008-04-08	11
+24	val_24	2008-04-08	11
+24	val_24	2008-04-08	11
+26	val_26	2008-04-08	11
+26	val_26	2008-04-08	11
+27	val_27	2008-04-08	11
+28	val_28	2008-04-08	11
+30	val_30	2008-04-08	11
+33	val_33	2008-04-08	11
+34	val_34	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+35	val_35	2008-04-08	11
+37	val_37	2008-04-08	11
+37	val_37	2008-04-08	11
+41	val_41	2008-04-08	11
+42	val_42	2008-04-08	11
+42	val_42	2008-04-08	11
+43	val_43	2008-04-08	11
+44	val_44	2008-04-08	11
+47	val_47	2008-04-08	11
+51	val_51	2008-04-08	11
+51	val_51	2008-04-08	11
+53	val_53	2008-04-08	11
+54	val_54	2008-04-08	11
+57	val_57	2008-04-08	11
+58	val_58	2008-04-08	11
+58	val_58	2008-04-08	11
+64	val_64	2008-04-08	11
+65	val_65	2008-04-08	11
+66	val_66	2008-04-08	11
+67	val_67	2008-04-08	11
+67	val_67	2008-04-08	11
+69	val_69	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+70	val_70	2008-04-08	11
+72	val_72	2008-04-08	11
+72	val_72	2008-04-08	11
+74	val_74	2008-04-08	11
+76	val_76	2008-04-08	11
+76	val_76	2008-04-08	11
+77	val_77	2008-04-08	11
+78	val_78	2008-04-08	11
+80	val_80	2008-04-08	11
+82	val_82	2008-04-08	11
+83	val_83	2008-04-08	11
+83	val_83	2008-04-08	11
+84	val_84	2008-04-08	11
+84	val_84	2008-04-08	11
+85	val_85	2008-04-08	11
+86	val_86	2008-04-08	11
+87	val_87	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+90	val_90	2008-04-08	11
+92	val_92	2008-04-08	11
+95	val_95	2008-04-08	11
+95	val_95	2008-04-08	11
+96	val_96	2008-04-08	11
+97	val_97	2008-04-08	11
+97	val_97	2008-04-08	11
+98	val_98	2008-04-08	11
+98	val_98	2008-04-08	11
+100	val_100	2008-04-08	11
+100	val_100	2008-04-08	11
+103	val_103	2008-04-08	11
+103	val_103	2008-04-08	11
+104	val_104	2008-04-08	11
+104	val_104	2008-04-08	11
+105	val_105	2008-04-08	11
+111	val_111	2008-04-08	11
+113	val_113	2008-04-08	11
+113	val_113	2008-04-08	11
+114	val_114	2008-04-08	11
+116	val_116	2008-04-08	11
+118	val_118	2008-04-08	11
+118	val_118	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+119	val_119	2008-04-08	11
+120	val_120	2008-04-08	11
+120	val_120	2008-04-08	11
+125	val_125	2008-04-08	11
+125	val_125	2008-04-08	11
+126	val_126	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+128	val_128	2008-04-08	11
+129	val_129	2008-04-08	11
+129	val_129	2008-04-08	11
+131	val_131	2008-04-08	11
+133	val_133	2008-04-08	11
+134	val_134	2008-04-08	11
+134	val_134	2008-04-08	11
+136	val_136	2008-04-08	11
+137	val_137	2008-04-08	11
+137	val_137	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+138	val_138	2008-04-08	11
+143	val_143	2008-04-08	11
+145	val_145	2008-04-08	11
+146	val_146	2008-04-08	11
+146	val_146	2008-04-08	11
+149	val_149	2008-04-08	11
+149	val_149	2008-04-08	11
+150	val_150	2008-04-08	11
+152	val_152	2008-04-08	11
+152	val_152	2008-04-08	11
+153	val_153	2008-04-08	11
+155	val_155	2008-04-08	11
+156	val_156	2008-04-08	11
+157	val_157	2008-04-08	11
+158	val_158	2008-04-08	11
+160	val_160	2008-04-08	11
+162	val_162	2008-04-08	11
+163	val_163	2008-04-08	11
+164	val_164	2008-04-08	11
+164	val_164	2008-04-08	11
+165	val_165	2008-04-08	11
+165	val_165	2008-04-08	11
+166	val_166	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+167	val_167	2008-04-08	11
+168	val_168	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+169	val_169	2008-04-08	11
+170	val_170	2008-04-08	11
+172	val_172	2008-04-08	11
+172	val_172	2008-04-08	11
+174	val_174	2008-04-08	11
+174	val_174	2008-04-08	11
+175	val_175	2008-04-08	11
+175	val_175	2008-04-08	11
+176	val_176	2008-04-08	11
+176	val_176	2008-04-08	11
+177	val_177	2008-04-08	11
+178	val_178	2008-04-08	11
+179	val_179	2008-04-08	11
+179	val_179	2008-04-08	11
+180	val_180	2008-04-08	11
+181	val_181	2008-04-08	11
+183	val_183	2008-04-08	11
+186	val_186	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+187	val_187	2008-04-08	11
+189	val_189	2008-04-08	11
+190	val_190	2008-04-08	11
+191	val_191	2008-04-08	11
+191	val_191	2008-04-08	11
+192	val_192	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+193	val_193	2008-04-08	11
+194	val_194	2008-04-08	11
+195	val_195	2008-04-08	11
+195	val_195	2008-04-08	11
+196	val_196	2008-04-08	11
+197	val_197	2008-04-08	11
+197	val_197	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+199	val_199	2008-04-08	11
+200	val_200	2008-04-08	11
+200	val_200	2008-04-08	11
+201	val_201	2008-04-08	11
+202	val_202	2008-04-08	11
+203	val_203	2008-04-08	11
+203	val_203	2008-04-08	11
+205	val_205	2008-04-08	11
+205	val_205	2008-04-08	11
+207	val_207	2008-04-08	11
+207	val_207	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+208	val_208	2008-04-08	11
+209	val_209	2008-04-08	11
+209	val_209	2008-04-08	11
+213	val_213	2008-04-08	11
+213	val_213	2008-04-08	11
+214	val_214	2008-04-08	11
+216	val_216	2008-04-08	11
+216	val_216	2008-04-08	11
+217	val_217	2008-04-08	11
+217	val_217	2008-04-08	11
+218	val_218	2008-04-08	11
+219	val_219	2008-04-08	11
+219	val_219	2008-04-08	11
+221	val_221	2008-04-08	11
+221	val_221	2008-04-08	11
+222	val_222	2008-04-08	11
+223	val_223	2008-04-08	11
+223	val_223	2008-04-08	11
+224	val_224	2008-04-08	11
+224	val_224	2008-04-08	11
+226	val_226	2008-04-08	11
+228	val_228	2008-04-08	11
+229	val_229	2008-04-08	11
+229	val_229	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+230	val_230	2008-04-08	11
+233	val_233	2008-04-08	11
+233	val_233	2008-04-08	11
+235	val_235	2008-04-08	11
+237	val_237	2008-04-08	11
+237	val_237	2008-04-08	11
+238	val_238	2008-04-08	11
+238	val_238	2008-04-08	11
+239	val_239	2008-04-08	11
+239	val_239	2008-04-08	11
+241	val_241	2008-04-08	11
+242	val_242	2008-04-08	11
+242	val_242	2008-04-08	11
+244	val_244	2008-04-08	11
+247	val_247	2008-04-08	11
+248	val_248	2008-04-08	11
+249	val_249	2008-04-08	11
+252	val_252	2008-04-08	11
+255	val_255	2008-04-08	11
+255	val_255	2008-04-08	11
+256	val_256	2008-04-08	11
+256	val_256	2008-04-08	11
+257	val_257	2008-04-08	11
+258	val_258	2008-04-08	11
+260	val_260	2008-04-08	11
+262	val_262	2008-04-08	11
+263	val_263	2008-04-08	11
+265	val_265	2008-04-08	11
+265	val_265	2008-04-08	11
+266	val_266	2008-04-08	11
+272	val_272	2008-04-08	11
+272	val_272	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+273	val_273	2008-04-08	11
+274	val_274	2008-04-08	11
+275	val_275	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+277	val_277	2008-04-08	11
+278	val_278	2008-04-08	11
+278	val_278	2008-04-08	11
+280	val_280	2008-04-08	11
+280	val_280	2008-04-08	11
+281	val_281	2008-04-08	11
+281	val_281	2008-04-08	11
+282	val_282	2008-04-08	11
+282	val_282	2008-04-08	11
+283	val_283	2008-04-08	11
+284	val_284	2008-04-08	11
+285	val_285	2008-04-08	11
+286	val_286	2008-04-08	11
+287	val_287	2008-04-08	11
+288	val_288	2008-04-08	11
+288	val_288	2008-04-08	11
+289	val_289	2008-04-08	11
+291	val_291	2008-04-08	11
+292	val_292	2008-04-08	11
+296	val_296	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+298	val_298	2008-04-08	11
+302	val_302	2008-04-08	11
+305	val_305	2008-04-08	11
+306	val_306	2008-04-08	11
+307	val_307	2008-04-08	11
+307	val_307	2008-04-08	11
+308	val_308	2008-04-08	11
+309	val_309	2008-04-08	11
+309	val_309	2008-04-08	11
+310	val_310	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+311	val_311	2008-04-08	11
+315	val_315	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+316	val_316	2008-04-08	11
+317	val_317	2008-04-08	11
+317	val_317	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+318	val_318	2008-04-08	11
+321	val_321	2008-04-08	11
+321	val_321	2008-04-08	11
+322	val_322	2008-04-08	11
+322	val_322	2008-04-08	11
+323	val_323	2008-04-08	11
+325	val_325	2008-04-08	11
+325	val_325	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+327	val_327	2008-04-08	11
+331	val_331	2008-04-08	11
+331	val_331	2008-04-08	11
+332	val_332	2008-04-08	11
+333	val_333	2008-04-08	11
+333	val_333	2008-04-08	11
+335	val_335	2008-04-08	11
+336	val_336	2008-04-08	11
+338	val_338	2008-04-08	11
+339	val_339	2008-04-08	11
+341	val_341	2008-04-08	11
+342	val_342	2008-04-08	11
+342	val_342	2008-04-08	11
+344	val_344	2008-04-08	11
+344	val_344	2008-04-08	11
+345	val_345	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+348	val_348	2008-04-08	11
+351	val_351	2008-04-08	11
+353	val_353	2008-04-08	11
+353	val_353	2008-04-08	11
+356	val_356	2008-04-08	11
+360	val_360	2008-04-08	11
+362	val_362	2008-04-08	11
+364	val_364	2008-04-08	11
+365	val_365	2008-04-08	11
+366	val_366	2008-04-08	11
+367	val_367	2008-04-08	11
+367	val_367	2008-04-08	11
+368	val_368	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+369	val_369	2008-04-08	11
+373	val_373	2008-04-08	11
+374	val_374	2008-04-08	11
+375	val_375	2008-04-08	11
+377	val_377	2008-04-08	11
+378	val_378	2008-04-08	11
+379	val_379	2008-04-08	11
+382	val_382	2008-04-08	11
+382	val_382	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+384	val_384	2008-04-08	11
+386	val_386	2008-04-08	11
+389	val_389	2008-04-08	11
+392	val_392	2008-04-08	11
+393	val_393	2008-04-08	11
+394	val_394	2008-04-08	11
+395	val_395	2008-04-08	11
+395	val_395	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+396	val_396	2008-04-08	11
+397	val_397	2008-04-08	11
+397	val_397	2008-04-08	11
+399	val_399	2008-04-08	11
+399	val_399	2008-04-08	11
+400	val_400	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+401	val_401	2008-04-08	11
+402	val_402	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+403	val_403	2008-04-08	11
+404	val_404	2008-04-08	11
+404	val_404	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+406	val_406	2008-04-08	11
+407	val_407	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+409	val_409	2008-04-08	11
+411	val_411	2008-04-08	11
+413	val_413	2008-04-08	11
+413	val_413	2008-04-08	11
+414	val_414	2008-04-08	11
+414	val_414	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+417	val_417	2008-04-08	11
+418	val_418	2008-04-08	11
+419	val_419	2008-04-08	11
+421	val_421	2008-04-08	11
+424	val_424	2008-04-08	11
+424	val_424	2008-04-08	11
+427	val_427	2008-04-08	11
+429	val_429	2008-04-08	11
+429	val_429	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+430	val_430	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+431	val_431	2008-04-08	11
+432	val_432	2008-04-08	11
+435	val_435	2008-04-08	11
+436	val_436	2008-04-08	11
+437	val_437	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+438	val_438	2008-04-08	11
+439	val_439	2008-04-08	11
+439	val_439	2008-04-08	11
+443	val_443	2008-04-08	11
+444	val_444	2008-04-08	11
+446	val_446	2008-04-08	11
+448	val_448	2008-04-08	11
+449	val_449	2008-04-08	11
+452	val_452	2008-04-08	11
+453	val_453	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+454	val_454	2008-04-08	11
+455	val_455	2008-04-08	11
+457	val_457	2008-04-08	11
+458	val_458	2008-04-08	11
+458	val_458	2008-04-08	11
+459	val_459	2008-04-08	11
+459	val_459	2008-04-08	11
+460	val_460	2008-04-08	11
+462	val_462	2008-04-08	11
+462	val_462	2008-04-08	11
+463	val_463	2008-04-08	11
+463	val_463	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+466	val_466	2008-04-08	11
+467	val_467	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+468	val_468	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+469	val_469	2008-04-08	11
+470	val_470	2008-04-08	11
+472	val_472	2008-04-08	11
+475	val_475	2008-04-08	11
+477	val_477	2008-04-08	11
+478	val_478	2008-04-08	11
+478	val_478	2008-04-08	11
+479	val_479	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+480	val_480	2008-04-08	11
+481	val_481	2008-04-08	11
+482	val_482	2008-04-08	11
+483	val_483	2008-04-08	11
+484	val_484	2008-04-08	11
+485	val_485	2008-04-08	11
+487	val_487	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+489	val_489	2008-04-08	11
+490	val_490	2008-04-08	11
+491	val_491	2008-04-08	11
+492	val_492	2008-04-08	11
+492	val_492	2008-04-08	11
+493	val_493	2008-04-08	11
+494	val_494	2008-04-08	11
+495	val_495	2008-04-08	11
+496	val_496	2008-04-08	11
+497	val_497	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+498	val_498	2008-04-08	11
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+0	val_0	2008-04-08	12
+2	val_2	2008-04-08	12
+4	val_4	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+5	val_5	2008-04-08	12
+8	val_8	2008-04-08	12
+9	val_9	2008-04-08	12
+10	val_10	2008-04-08	12
+11	val_11	2008-04-08	12
+12	val_12	2008-04-08	12
+12	val_12	2008-04-08	12
+15	val_15	2008-04-08	12
+15	val_15	2008-04-08	12
+17	val_17	2008-04-08	12
+18	val_18	2008-04-08	12
+18	val_18	2008-04-08	12
+19	val_19	2008-04-08	12
+20	val_20	2008-04-08	12
+24	val_24	2008-04-08	12
+24	val_24	2008-04-08	12
+26	val_26	2008-04-08	12
+26	val_26	2008-04-08	12
+27	val_27	2008-04-08	12
+28	val_28	2008-04-08	12
+30	val_30	2008-04-08	12
+33	val_33	2008-04-08	12
+34	val_34	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+35	val_35	2008-04-08	12
+37	val_37	2008-04-08	12
+37	val_37	2008-04-08	12
+41	val_41	2008-04-08	12
+42	val_42	2008-04-08	12
+42	val_42	2008-04-08	12
+43	val_43	2008-04-08	12
+44	val_44	2008-04-08	12
+47	val_47	2008-04-08	12
+51	val_51	2008-04-08	12
+51	val_51	2008-04-08	12
+53	val_53	2008-04-08	12
+54	val_54	2008-04-08	12
+57	val_57	2008-04-08	12
+58	val_58	2008-04-08	12
+58	val_58	2008-04-08	12
+64	val_64	2008-04-08	12
+65	val_65	2008-04-08	12
+66	val_66	2008-04-08	12
+67	val_67	2008-04-08	12
+67	val_67	2008-04-08	12
+69	val_69	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+70	val_70	2008-04-08	12
+72	val_72	2008-04-08	12
+72	val_72	2008-04-08	12
+74	val_74	2008-04-08	12
+76	val_76	2008-04-08	12
+76	val_76	2008-04-08	12
+77	val_77	2008-04-08	12
+78	val_78	2008-04-08	12
+80	val_80	2008-04-08	12
+82	val_82	2008-04-08	12
+83	val_83	2008-04-08	12
+83	val_83	2008-04-08	12
+84	val_84	2008-04-08	12
+84	val_84	2008-04-08	12
+85	val_85	2008-04-08	12
+86	val_86	2008-04-08	12
+87	val_87	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+90	val_90	2008-04-08	12
+92	val_92	2008-04-08	12
+95	val_95	2008-04-08	12
+95	val_95	2008-04-08	12
+96	val_96	2008-04-08	12
+97	val_97	2008-04-08	12
+97	val_97	2008-04-08	12
+98	val_98	2008-04-08	12
+98	val_98	2008-04-08	12
+100	val_100	2008-04-08	12
+100	val_100	2008-04-08	12
+103	val_103	2008-04-08	12
+103	val_103	2008-04-08	12
+104	val_104	2008-04-08	12
+104	val_104	2008-04-08	12
+105	val_105	2008-04-08	12
+111	val_111	2008-04-08	12
+113	val_113	2008-04-08	12
+113	val_113	2008-04-08	12
+114	val_114	2008-04-08	12
+116	val_116	2008-04-08	12
+118	val_118	2008-04-08	12
+118	val_118	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+119	val_119	2008-04-08	12
+120	val_120	2008-04-08	12
+120	val_120	2008-04-08	12
+125	val_125	2008-04-08	12
+125	val_125	2008-04-08	12
+126	val_126	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+128	val_128	2008-04-08	12
+129	val_129	2008-04-08	12
+129	val_129	2008-04-08	12
+131	val_131	2008-04-08	12
+133	val_133	2008-04-08	12
+134	val_134	2008-04-08	12
+134	val_134	2008-04-08	12
+136	val_136	2008-04-08	12
+137	val_137	2008-04-08	12
+137	val_137	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+138	val_138	2008-04-08	12
+143	val_143	2008-04-08	12
+145	val_145	2008-04-08	12
+146	val_146	2008-04-08	12
+146	val_146	2008-04-08	12
+149	val_149	2008-04-08	12
+149	val_149	2008-04-08	12
+150	val_150	2008-04-08	12
+152	val_152	2008-04-08	12
+152	val_152	2008-04-08	12
+153	val_153	2008-04-08	12
+155	val_155	2008-04-08	12
+156	val_156	2008-04-08	12
+157	val_157	2008-04-08	12
+158	val_158	2008-04-08	12
+160	val_160	2008-04-08	12
+162	val_162	2008-04-08	12
+163	val_163	2008-04-08	12
+164	val_164	2008-04-08	12
+164	val_164	2008-04-08	12
+165	val_165	2008-04-08	12
+165	val_165	2008-04-08	12
+166	val_166	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+167	val_167	2008-04-08	12
+168	val_168	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+169	val_169	2008-04-08	12
+170	val_170	2008-04-08	12
+172	val_172	2008-04-08	12
+172	val_172	2008-04-08	12
+174	val_174	2008-04-08	12
+174	val_174	2008-04-08	12
+175	val_175	2008-04-08	12
+175	val_175	2008-04-08	12
+176	val_176	2008-04-08	12
+176	val_176	2008-04-08	12
+177	val_177	2008-04-08	12
+178	val_178	2008-04-08	12
+179	val_179	2008-04-08	12
+179	val_179	2008-04-08	12
+180	val_180	2008-04-08	12
+181	val_181	2008-04-08	12
+183	val_183	2008-04-08	12
+186	val_186	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+187	val_187	2008-04-08	12
+189	val_189	2008-04-08	12
+190	val_190	2008-04-08	12
+191	val_191	2008-04-08	12
+191	val_191	2008-04-08	12
+192	val_192	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+193	val_193	2008-04-08	12
+194	val_194	2008-04-08	12
+195	val_195	2008-04-08	12
+195	val_195	2008-04-08	12
+196	val_196	2008-04-08	12
+197	val_197	2008-04-08	12
+197	val_197	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+199	val_199	2008-04-08	12
+200	val_200	2008-04-08	12
+200	val_200	2008-04-08	12
+201	val_201	2008-04-08	12
+202	val_202	2008-04-08	12
+203	val_203	2008-04-08	12
+203	val_203	2008-04-08	12
+205	val_205	2008-04-08	12
+205	val_205	2008-04-08	12
+207	val_207	2008-04-08	12
+207	val_207	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+208	val_208	2008-04-08	12
+209	val_209	2008-04-08	12
+209	val_209	2008-04-08	12
+213	val_213	2008-04-08	12
+213	val_213	2008-04-08	12
+214	val_214	2008-04-08	12
+216	val_216	2008-04-08	12
+216	val_216	2008-04-08	12
+217	val_217	2008-04-08	12
+217	val_217	2008-04-08	12
+218	val_218	2008-04-08	12
+219	val_219	2008-04-08	12
+219	val_219	2008-04-08	12
+221	val_221	2008-04-08	12
+221	val_221	2008-04-08	12
+222	val_222	2008-04-08	12
+223	val_223	2008-04-08	12
+223	val_223	2008-04-08	12
+224	val_224	2008-04-08	12
+224	val_224	2008-04-08	12
+226	val_226	2008-04-08	12
+228	val_228	2008-04-08	12
+229	val_229	2008-04-08	12
+229	val_229	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+230	val_230	2008-04-08	12
+233	val_233	2008-04-08	12
+233	val_233	2008-04-08	12
+235	val_235	2008-04-08	12
+237	val_237	2008-04-08	12
+237	val_237	2008-04-08	12
+238	val_238	2008-04-08	12
+238	val_238	2008-04-08	12
+239	val_239	2008-04-08	12
+239	val_239	2008-04-08	12
+241	val_241	2008-04-08	12
+242	val_242	2008-04-08	12
+242	val_242	2008-04-08	12
+244	val_244	2008-04-08	12
+247	val_247	2008-04-08	12
+248	val_248	2008-04-08	12
+249	val_249	2008-04-08	12
+252	val_252	2008-04-08	12
+255	val_255	2008-04-08	12
+255	val_255	2008-04-08	12
+256	val_256	2008-04-08	12
+256	val_256	2008-04-08	12
+257	val_257	2008-04-08	12
+258	val_258	2008-04-08	12
+260	val_260	2008-04-08	12
+262	val_262	2008-04-08	12
+263	val_263	2008-04-08	12
+265	val_265	2008-04-08	12
+265	val_265	2008-04-08	12
+266	val_266	2008-04-08	12
+272	val_272	2008-04-08	12
+272	val_272	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+273	val_273	2008-04-08	12
+274	val_274	2008-04-08	12
+275	val_275	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+277	val_277	2008-04-08	12
+278	val_278	2008-04-08	12
+278	val_278	2008-04-08	12
+280	val_280	2008-04-08	12
+280	val_280	2008-04-08	12
+281	val_281	2008-04-08	12
+281	val_281	2008-04-08	12
+282	val_282	2008-04-08	12
+282	val_282	2008-04-08	12
+283	val_283	2008-04-08	12
+284	val_284	2008-04-08	12
+285	val_285	2008-04-08	12
+286	val_286	2008-04-08	12
+287	val_287	2008-04-08	12
+288	val_288	2008-04-08	12
+288	val_288	2008-04-08	12
+289	val_289	2008-04-08	12
+291	val_291	2008-04-08	12
+292	val_292	2008-04-08	12
+296	val_296	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+298	val_298	2008-04-08	12
+302	val_302	2008-04-08	12
+305	val_305	2008-04-08	12
+306	val_306	2008-04-08	12
+307	val_307	2008-04-08	12
+307	val_307	2008-04-08	12
+308	val_308	2008-04-08	12
+309	val_309	2008-04-08	12
+309	val_309	2008-04-08	12
+310	val_310	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+311	val_311	2008-04-08	12
+315	val_315	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+316	val_316	2008-04-08	12
+317	val_317	2008-04-08	12
+317	val_317	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+318	val_318	2008-04-08	12
+321	val_321	2008-04-08	12
+321	val_321	2008-04-08	12
+322	val_322	2008-04-08	12
+322	val_322	2008-04-08	12
+323	val_323	2008-04-08	12
+325	val_325	2008-04-08	12
+325	val_325	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+327	val_327	2008-04-08	12
+331	val_331	2008-04-08	12
+331	val_331	2008-04-08	12
+332	val_332	2008-04-08	12
+333	val_333	2008-04-08	12
+333	val_333	2008-04-08	12
+335	val_335	2008-04-08	12
+336	val_336	2008-04-08	12
+338	val_338	2008-04-08	12
+339	val_339	2008-04-08	12
+341	val_341	2008-04-08	12
+342	val_342	2008-04-08	12
+342	val_342	2008-04-08	12
+344	val_344	2008-04-08	12
+344	val_344	2008-04-08	12
+345	val_345	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+348	val_348	2008-04-08	12
+351	val_351	2008-04-08	12
+353	val_353	2008-04-08	12
+353	val_353	2008-04-08	12
+356	val_356	2008-04-08	12
+360	val_360	2008-04-08	12
+362	val_362	2008-04-08	12
+364	val_364	2008-04-08	12
+365	val_365	2008-04-08	12
+366	val_366	2008-04-08	12
+367	val_367	2008-04-08	12
+367	val_367	2008-04-08	12
+368	val_368	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+369	val_369	2008-04-08	12
+373	val_373	2008-04-08	12
+374	val_374	2008-04-08	12
+375	val_375	2008-04-08	12
+377	val_377	2008-04-08	12
+378	val_378	2008-04-08	12
+379	val_379	2008-04-08	12
+382	val_382	2008-04-08	12
+382	val_382	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+384	val_384	2008-04-08	12
+386	val_386	2008-04-08	12
+389	val_389	2008-04-08	12
+392	val_392	2008-04-08	12
+393	val_393	2008-04-08	12
+394	val_394	2008-04-08	12
+395	val_395	2008-04-08	12
+395	val_395	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+396	val_396	2008-04-08	12
+397	val_397	2008-04-08	12
+397	val_397	2008-04-08	12
+399	val_399	2008-04-08	12
+399	val_399	2008-04-08	12
+400	val_400	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+401	val_401	2008-04-08	12
+402	val_402	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+403	val_403	2008-04-08	12
+404	val_404	2008-04-08	12
+404	val_404	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+406	val_406	2008-04-08	12
+407	val_407	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+409	val_409	2008-04-08	12
+411	val_411	2008-04-08	12
+413	val_413	2008-04-08	12
+413	val_413	2008-04-08	12
+414	val_414	2008-04-08	12
+414	val_414	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+417	val_417	2008-04-08	12
+418	val_418	2008-04-08	12
+419	val_419	2008-04-08	12
+421	val_421	2008-04-08	12
+424	val_424	2008-04-08	12
+424	val_424	2008-04-08	12
+427	val_427	2008-04-08	12
+429	val_429	2008-04-08	12
+429	val_429	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+430	val_430	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+431	val_431	2008-04-08	12
+432	val_432	2008-04-08	12
+435	val_435	2008-04-08	12
+436	val_436	2008-04-08	12
+437	val_437	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+438	val_438	2008-04-08	12
+439	val_439	2008-04-08	12
+439	val_439	2008-04-08	12
+443	val_443	2008-04-08	12
+444	val_444	2008-04-08	12
+446	val_446	2008-04-08	12
+448	val_448	2008-04-08	12
+449	val_449	2008-04-08	12
+452	val_452	2008-04-08	12
+453	val_453	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+454	val_454	2008-04-08	12
+455	val_455	2008-04-08	12
+457	val_457	2008-04-08	12
+458	val_458	2008-04-08	12
+458	val_458	2008-04-08	12
+459	val_459	2008-04-08	12
+459	val_459	2008-04-08	12
+460	val_460	2008-04-08	12
+462	val_462	2008-04-08	12
+462	val_462	2008-04-08	12
+463	val_463	2008-04-08	12
+463	val_463	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+466	val_466	2008-04-08	12
+467	val_467	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+468	val_468	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+469	val_469	2008-04-08	12
+470	val_470	2008-04-08	12
+472	val_472	2008-04-08	12
+475	val_475	2008-04-08	12
+477	val_477	2008-04-08	12
+478	val_478	2008-04-08	12
+478	val_478	2008-04-08	12
+479	val_479	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+480	val_480	2008-04-08	12
+481	val_481	2008-04-08	12
+482	val_482	2008-04-08	12
+483	val_483	2008-04-08	12
+484	val_484	2008-04-08	12
+485	val_485	2008-04-08	12
+487	val_487	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+489	val_489	2008-04-08	12
+490	val_490	2008-04-08	12
+491	val_491	2008-04-08	12
+492	val_492	2008-04-08	12
+492	val_492	2008-04-08	12
+493	val_493	2008-04-08	12
+494	val_494	2008-04-08	12
+495	val_495	2008-04-08	12
+496	val_496	2008-04-08	12
+497	val_497	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
+498	val_498	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part9-9-72b1ad2231269b704130903b35ac04bb b/sql/hive/src/test/resources/golden/load_dyn_part9-9-72b1ad2231269b704130903b35ac04bb
deleted file mode 100644
index d7a8f25b41301..0000000000000
--- a/sql/hive/src/test/resources/golden/load_dyn_part9-9-72b1ad2231269b704130903b35ac04bb
+++ /dev/null
@@ -1,1000 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
-265	val_265	2008-04-08	11
-193	val_193	2008-04-08	11
-401	val_401	2008-04-08	11
-150	val_150	2008-04-08	11
-273	val_273	2008-04-08	11
-224	val_224	2008-04-08	11
-369	val_369	2008-04-08	11
-66	val_66	2008-04-08	11
-128	val_128	2008-04-08	11
-213	val_213	2008-04-08	11
-146	val_146	2008-04-08	11
-406	val_406	2008-04-08	11
-429	val_429	2008-04-08	11
-374	val_374	2008-04-08	11
-152	val_152	2008-04-08	11
-469	val_469	2008-04-08	11
-145	val_145	2008-04-08	11
-495	val_495	2008-04-08	11
-37	val_37	2008-04-08	11
-327	val_327	2008-04-08	11
-281	val_281	2008-04-08	11
-277	val_277	2008-04-08	11
-209	val_209	2008-04-08	11
-15	val_15	2008-04-08	11
-82	val_82	2008-04-08	11
-403	val_403	2008-04-08	11
-166	val_166	2008-04-08	11
-417	val_417	2008-04-08	11
-430	val_430	2008-04-08	11
-252	val_252	2008-04-08	11
-292	val_292	2008-04-08	11
-219	val_219	2008-04-08	11
-287	val_287	2008-04-08	11
-153	val_153	2008-04-08	11
-193	val_193	2008-04-08	11
-338	val_338	2008-04-08	11
-446	val_446	2008-04-08	11
-459	val_459	2008-04-08	11
-394	val_394	2008-04-08	11
-237	val_237	2008-04-08	11
-482	val_482	2008-04-08	11
-174	val_174	2008-04-08	11
-413	val_413	2008-04-08	11
-494	val_494	2008-04-08	11
-207	val_207	2008-04-08	11
-199	val_199	2008-04-08	11
-466	val_466	2008-04-08	11
-208	val_208	2008-04-08	11
-174	val_174	2008-04-08	11
-399	val_399	2008-04-08	11
-396	val_396	2008-04-08	11
-247	val_247	2008-04-08	11
-417	val_417	2008-04-08	11
-489	val_489	2008-04-08	11
-162	val_162	2008-04-08	11
-377	val_377	2008-04-08	11
-397	val_397	2008-04-08	11
-309	val_309	2008-04-08	11
-365	val_365	2008-04-08	11
-266	val_266	2008-04-08	11
-439	val_439	2008-04-08	11
-342	val_342	2008-04-08	11
-367	val_367	2008-04-08	11
-325	val_325	2008-04-08	11
-167	val_167	2008-04-08	11
-195	val_195	2008-04-08	11
-475	val_475	2008-04-08	11
-17	val_17	2008-04-08	11
-113	val_113	2008-04-08	11
-155	val_155	2008-04-08	11
-203	val_203	2008-04-08	11
-339	val_339	2008-04-08	11
-0	val_0	2008-04-08	11
-455	val_455	2008-04-08	11
-128	val_128	2008-04-08	11
-311	val_311	2008-04-08	11
-316	val_316	2008-04-08	11
-57	val_57	2008-04-08	11
-302	val_302	2008-04-08	11
-205	val_205	2008-04-08	11
-149	val_149	2008-04-08	11
-438	val_438	2008-04-08	11
-345	val_345	2008-04-08	11
-129	val_129	2008-04-08	11
-170	val_170	2008-04-08	11
-20	val_20	2008-04-08	11
-489	val_489	2008-04-08	11
-157	val_157	2008-04-08	11
-378	val_378	2008-04-08	11
-221	val_221	2008-04-08	11
-92	val_92	2008-04-08	11
-111	val_111	2008-04-08	11
-47	val_47	2008-04-08	11
-72	val_72	2008-04-08	11
-4	val_4	2008-04-08	11
-280	val_280	2008-04-08	11
-35	val_35	2008-04-08	11
-427	val_427	2008-04-08	11
-277	val_277	2008-04-08	11
-208	val_208	2008-04-08	11
-356	val_356	2008-04-08	11
-399	val_399	2008-04-08	11
-169	val_169	2008-04-08	11
-382	val_382	2008-04-08	11
-498	val_498	2008-04-08	11
-125	val_125	2008-04-08	11
-386	val_386	2008-04-08	11
-437	val_437	2008-04-08	11
-469	val_469	2008-04-08	11
-192	val_192	2008-04-08	11
-286	val_286	2008-04-08	11
-187	val_187	2008-04-08	11
-176	val_176	2008-04-08	11
-54	val_54	2008-04-08	11
-459	val_459	2008-04-08	11
-51	val_51	2008-04-08	11
-138	val_138	2008-04-08	11
-103	val_103	2008-04-08	11
-239	val_239	2008-04-08	11
-213	val_213	2008-04-08	11
-216	val_216	2008-04-08	11
-430	val_430	2008-04-08	11
-278	val_278	2008-04-08	11
-176	val_176	2008-04-08	11
-289	val_289	2008-04-08	11
-221	val_221	2008-04-08	11
-65	val_65	2008-04-08	11
-318	val_318	2008-04-08	11
-332	val_332	2008-04-08	11
-311	val_311	2008-04-08	11
-275	val_275	2008-04-08	11
-137	val_137	2008-04-08	11
-241	val_241	2008-04-08	11
-83	val_83	2008-04-08	11
-333	val_333	2008-04-08	11
-180	val_180	2008-04-08	11
-284	val_284	2008-04-08	11
-12	val_12	2008-04-08	11
-230	val_230	2008-04-08	11
-181	val_181	2008-04-08	11
-67	val_67	2008-04-08	11
-260	val_260	2008-04-08	11
-404	val_404	2008-04-08	11
-384	val_384	2008-04-08	11
-489	val_489	2008-04-08	11
-353	val_353	2008-04-08	11
-373	val_373	2008-04-08	11
-272	val_272	2008-04-08	11
-138	val_138	2008-04-08	11
-217	val_217	2008-04-08	11
-84	val_84	2008-04-08	11
-348	val_348	2008-04-08	11
-466	val_466	2008-04-08	11
-58	val_58	2008-04-08	11
-8	val_8	2008-04-08	11
-411	val_411	2008-04-08	11
-230	val_230	2008-04-08	11
-208	val_208	2008-04-08	11
-348	val_348	2008-04-08	11
-24	val_24	2008-04-08	11
-463	val_463	2008-04-08	11
-431	val_431	2008-04-08	11
-179	val_179	2008-04-08	11
-172	val_172	2008-04-08	11
-42	val_42	2008-04-08	11
-129	val_129	2008-04-08	11
-158	val_158	2008-04-08	11
-119	val_119	2008-04-08	11
-496	val_496	2008-04-08	11
-0	val_0	2008-04-08	11
-322	val_322	2008-04-08	11
-197	val_197	2008-04-08	11
-468	val_468	2008-04-08	11
-393	val_393	2008-04-08	11
-454	val_454	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-199	val_199	2008-04-08	11
-191	val_191	2008-04-08	11
-418	val_418	2008-04-08	11
-96	val_96	2008-04-08	11
-26	val_26	2008-04-08	11
-165	val_165	2008-04-08	11
-327	val_327	2008-04-08	11
-230	val_230	2008-04-08	11
-205	val_205	2008-04-08	11
-120	val_120	2008-04-08	11
-131	val_131	2008-04-08	11
-51	val_51	2008-04-08	11
-404	val_404	2008-04-08	11
-43	val_43	2008-04-08	11
-436	val_436	2008-04-08	11
-156	val_156	2008-04-08	11
-469	val_469	2008-04-08	11
-468	val_468	2008-04-08	11
-308	val_308	2008-04-08	11
-95	val_95	2008-04-08	11
-196	val_196	2008-04-08	11
-288	val_288	2008-04-08	11
-481	val_481	2008-04-08	11
-457	val_457	2008-04-08	11
-98	val_98	2008-04-08	11
-282	val_282	2008-04-08	11
-197	val_197	2008-04-08	11
-187	val_187	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-409	val_409	2008-04-08	11
-470	val_470	2008-04-08	11
-137	val_137	2008-04-08	11
-369	val_369	2008-04-08	11
-316	val_316	2008-04-08	11
-169	val_169	2008-04-08	11
-413	val_413	2008-04-08	11
-85	val_85	2008-04-08	11
-77	val_77	2008-04-08	11
-0	val_0	2008-04-08	11
-490	val_490	2008-04-08	11
-87	val_87	2008-04-08	11
-364	val_364	2008-04-08	11
-179	val_179	2008-04-08	11
-118	val_118	2008-04-08	11
-134	val_134	2008-04-08	11
-395	val_395	2008-04-08	11
-282	val_282	2008-04-08	11
-138	val_138	2008-04-08	11
-238	val_238	2008-04-08	11
-419	val_419	2008-04-08	11
-15	val_15	2008-04-08	11
-118	val_118	2008-04-08	11
-72	val_72	2008-04-08	11
-90	val_90	2008-04-08	11
-307	val_307	2008-04-08	11
-19	val_19	2008-04-08	11
-435	val_435	2008-04-08	11
-10	val_10	2008-04-08	11
-277	val_277	2008-04-08	11
-273	val_273	2008-04-08	11
-306	val_306	2008-04-08	11
-224	val_224	2008-04-08	11
-309	val_309	2008-04-08	11
-389	val_389	2008-04-08	11
-327	val_327	2008-04-08	11
-242	val_242	2008-04-08	11
-369	val_369	2008-04-08	11
-392	val_392	2008-04-08	11
-272	val_272	2008-04-08	11
-331	val_331	2008-04-08	11
-401	val_401	2008-04-08	11
-242	val_242	2008-04-08	11
-452	val_452	2008-04-08	11
-177	val_177	2008-04-08	11
-226	val_226	2008-04-08	11
-5	val_5	2008-04-08	11
-497	val_497	2008-04-08	11
-402	val_402	2008-04-08	11
-396	val_396	2008-04-08	11
-317	val_317	2008-04-08	11
-395	val_395	2008-04-08	11
-58	val_58	2008-04-08	11
-35	val_35	2008-04-08	11
-336	val_336	2008-04-08	11
-95	val_95	2008-04-08	11
-11	val_11	2008-04-08	11
-168	val_168	2008-04-08	11
-34	val_34	2008-04-08	11
-229	val_229	2008-04-08	11
-233	val_233	2008-04-08	11
-143	val_143	2008-04-08	11
-472	val_472	2008-04-08	11
-322	val_322	2008-04-08	11
-498	val_498	2008-04-08	11
-160	val_160	2008-04-08	11
-195	val_195	2008-04-08	11
-42	val_42	2008-04-08	11
-321	val_321	2008-04-08	11
-430	val_430	2008-04-08	11
-119	val_119	2008-04-08	11
-489	val_489	2008-04-08	11
-458	val_458	2008-04-08	11
-78	val_78	2008-04-08	11
-76	val_76	2008-04-08	11
-41	val_41	2008-04-08	11
-223	val_223	2008-04-08	11
-492	val_492	2008-04-08	11
-149	val_149	2008-04-08	11
-449	val_449	2008-04-08	11
-218	val_218	2008-04-08	11
-228	val_228	2008-04-08	11
-138	val_138	2008-04-08	11
-453	val_453	2008-04-08	11
-30	val_30	2008-04-08	11
-209	val_209	2008-04-08	11
-64	val_64	2008-04-08	11
-468	val_468	2008-04-08	11
-76	val_76	2008-04-08	11
-74	val_74	2008-04-08	11
-342	val_342	2008-04-08	11
-69	val_69	2008-04-08	11
-230	val_230	2008-04-08	11
-33	val_33	2008-04-08	11
-368	val_368	2008-04-08	11
-103	val_103	2008-04-08	11
-296	val_296	2008-04-08	11
-113	val_113	2008-04-08	11
-216	val_216	2008-04-08	11
-367	val_367	2008-04-08	11
-344	val_344	2008-04-08	11
-167	val_167	2008-04-08	11
-274	val_274	2008-04-08	11
-219	val_219	2008-04-08	11
-239	val_239	2008-04-08	11
-485	val_485	2008-04-08	11
-116	val_116	2008-04-08	11
-223	val_223	2008-04-08	11
-256	val_256	2008-04-08	11
-263	val_263	2008-04-08	11
-70	val_70	2008-04-08	11
-487	val_487	2008-04-08	11
-480	val_480	2008-04-08	11
-401	val_401	2008-04-08	11
-288	val_288	2008-04-08	11
-191	val_191	2008-04-08	11
-5	val_5	2008-04-08	11
-244	val_244	2008-04-08	11
-438	val_438	2008-04-08	11
-128	val_128	2008-04-08	11
-467	val_467	2008-04-08	11
-432	val_432	2008-04-08	11
-202	val_202	2008-04-08	11
-316	val_316	2008-04-08	11
-229	val_229	2008-04-08	11
-469	val_469	2008-04-08	11
-463	val_463	2008-04-08	11
-280	val_280	2008-04-08	11
-2	val_2	2008-04-08	11
-35	val_35	2008-04-08	11
-283	val_283	2008-04-08	11
-331	val_331	2008-04-08	11
-235	val_235	2008-04-08	11
-80	val_80	2008-04-08	11
-44	val_44	2008-04-08	11
-193	val_193	2008-04-08	11
-321	val_321	2008-04-08	11
-335	val_335	2008-04-08	11
-104	val_104	2008-04-08	11
-466	val_466	2008-04-08	11
-366	val_366	2008-04-08	11
-175	val_175	2008-04-08	11
-403	val_403	2008-04-08	11
-483	val_483	2008-04-08	11
-53	val_53	2008-04-08	11
-105	val_105	2008-04-08	11
-257	val_257	2008-04-08	11
-406	val_406	2008-04-08	11
-409	val_409	2008-04-08	11
-190	val_190	2008-04-08	11
-406	val_406	2008-04-08	11
-401	val_401	2008-04-08	11
-114	val_114	2008-04-08	11
-258	val_258	2008-04-08	11
-90	val_90	2008-04-08	11
-203	val_203	2008-04-08	11
-262	val_262	2008-04-08	11
-348	val_348	2008-04-08	11
-424	val_424	2008-04-08	11
-12	val_12	2008-04-08	11
-396	val_396	2008-04-08	11
-201	val_201	2008-04-08	11
-217	val_217	2008-04-08	11
-164	val_164	2008-04-08	11
-431	val_431	2008-04-08	11
-454	val_454	2008-04-08	11
-478	val_478	2008-04-08	11
-298	val_298	2008-04-08	11
-125	val_125	2008-04-08	11
-431	val_431	2008-04-08	11
-164	val_164	2008-04-08	11
-424	val_424	2008-04-08	11
-187	val_187	2008-04-08	11
-382	val_382	2008-04-08	11
-5	val_5	2008-04-08	11
-70	val_70	2008-04-08	11
-397	val_397	2008-04-08	11
-480	val_480	2008-04-08	11
-291	val_291	2008-04-08	11
-24	val_24	2008-04-08	11
-351	val_351	2008-04-08	11
-255	val_255	2008-04-08	11
-104	val_104	2008-04-08	11
-70	val_70	2008-04-08	11
-163	val_163	2008-04-08	11
-438	val_438	2008-04-08	11
-119	val_119	2008-04-08	11
-414	val_414	2008-04-08	11
-200	val_200	2008-04-08	11
-491	val_491	2008-04-08	11
-237	val_237	2008-04-08	11
-439	val_439	2008-04-08	11
-360	val_360	2008-04-08	11
-248	val_248	2008-04-08	11
-479	val_479	2008-04-08	11
-305	val_305	2008-04-08	11
-417	val_417	2008-04-08	11
-199	val_199	2008-04-08	11
-444	val_444	2008-04-08	11
-120	val_120	2008-04-08	11
-429	val_429	2008-04-08	11
-169	val_169	2008-04-08	11
-443	val_443	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-277	val_277	2008-04-08	11
-230	val_230	2008-04-08	11
-478	val_478	2008-04-08	11
-178	val_178	2008-04-08	11
-468	val_468	2008-04-08	11
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-333	val_333	2008-04-08	11
-493	val_493	2008-04-08	11
-460	val_460	2008-04-08	11
-207	val_207	2008-04-08	11
-249	val_249	2008-04-08	11
-265	val_265	2008-04-08	11
-480	val_480	2008-04-08	11
-83	val_83	2008-04-08	11
-136	val_136	2008-04-08	11
-353	val_353	2008-04-08	11
-172	val_172	2008-04-08	11
-214	val_214	2008-04-08	11
-462	val_462	2008-04-08	11
-233	val_233	2008-04-08	11
-406	val_406	2008-04-08	11
-133	val_133	2008-04-08	11
-175	val_175	2008-04-08	11
-189	val_189	2008-04-08	11
-454	val_454	2008-04-08	11
-375	val_375	2008-04-08	11
-401	val_401	2008-04-08	11
-421	val_421	2008-04-08	11
-407	val_407	2008-04-08	11
-384	val_384	2008-04-08	11
-256	val_256	2008-04-08	11
-26	val_26	2008-04-08	11
-134	val_134	2008-04-08	11
-67	val_67	2008-04-08	11
-384	val_384	2008-04-08	11
-379	val_379	2008-04-08	11
-18	val_18	2008-04-08	11
-462	val_462	2008-04-08	11
-492	val_492	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-9	val_9	2008-04-08	11
-341	val_341	2008-04-08	11
-498	val_498	2008-04-08	11
-146	val_146	2008-04-08	11
-458	val_458	2008-04-08	11
-362	val_362	2008-04-08	11
-186	val_186	2008-04-08	11
-285	val_285	2008-04-08	11
-348	val_348	2008-04-08	11
-167	val_167	2008-04-08	11
-18	val_18	2008-04-08	11
-273	val_273	2008-04-08	11
-183	val_183	2008-04-08	11
-281	val_281	2008-04-08	11
-344	val_344	2008-04-08	11
-97	val_97	2008-04-08	11
-469	val_469	2008-04-08	11
-315	val_315	2008-04-08	11
-84	val_84	2008-04-08	11
-28	val_28	2008-04-08	11
-37	val_37	2008-04-08	11
-448	val_448	2008-04-08	11
-152	val_152	2008-04-08	11
-348	val_348	2008-04-08	11
-307	val_307	2008-04-08	11
-194	val_194	2008-04-08	11
-414	val_414	2008-04-08	11
-477	val_477	2008-04-08	11
-222	val_222	2008-04-08	11
-126	val_126	2008-04-08	11
-90	val_90	2008-04-08	11
-169	val_169	2008-04-08	11
-403	val_403	2008-04-08	11
-400	val_400	2008-04-08	11
-200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
-238	val_238	2008-04-08	12
-86	val_86	2008-04-08	12
-311	val_311	2008-04-08	12
-27	val_27	2008-04-08	12
-165	val_165	2008-04-08	12
-409	val_409	2008-04-08	12
-255	val_255	2008-04-08	12
-278	val_278	2008-04-08	12
-98	val_98	2008-04-08	12
-484	val_484	2008-04-08	12
-265	val_265	2008-04-08	12
-193	val_193	2008-04-08	12
-401	val_401	2008-04-08	12
-150	val_150	2008-04-08	12
-273	val_273	2008-04-08	12
-224	val_224	2008-04-08	12
-369	val_369	2008-04-08	12
-66	val_66	2008-04-08	12
-128	val_128	2008-04-08	12
-213	val_213	2008-04-08	12
-146	val_146	2008-04-08	12
-406	val_406	2008-04-08	12
-429	val_429	2008-04-08	12
-374	val_374	2008-04-08	12
-152	val_152	2008-04-08	12
-469	val_469	2008-04-08	12
-145	val_145	2008-04-08	12
-495	val_495	2008-04-08	12
-37	val_37	2008-04-08	12
-327	val_327	2008-04-08	12
-281	val_281	2008-04-08	12
-277	val_277	2008-04-08	12
-209	val_209	2008-04-08	12
-15	val_15	2008-04-08	12
-82	val_82	2008-04-08	12
-403	val_403	2008-04-08	12
-166	val_166	2008-04-08	12
-417	val_417	2008-04-08	12
-430	val_430	2008-04-08	12
-252	val_252	2008-04-08	12
-292	val_292	2008-04-08	12
-219	val_219	2008-04-08	12
-287	val_287	2008-04-08	12
-153	val_153	2008-04-08	12
-193	val_193	2008-04-08	12
-338	val_338	2008-04-08	12
-446	val_446	2008-04-08	12
-459	val_459	2008-04-08	12
-394	val_394	2008-04-08	12
-237	val_237	2008-04-08	12
-482	val_482	2008-04-08	12
-174	val_174	2008-04-08	12
-413	val_413	2008-04-08	12
-494	val_494	2008-04-08	12
-207	val_207	2008-04-08	12
-199	val_199	2008-04-08	12
-466	val_466	2008-04-08	12
-208	val_208	2008-04-08	12
-174	val_174	2008-04-08	12
-399	val_399	2008-04-08	12
-396	val_396	2008-04-08	12
-247	val_247	2008-04-08	12
-417	val_417	2008-04-08	12
-489	val_489	2008-04-08	12
-162	val_162	2008-04-08	12
-377	val_377	2008-04-08	12
-397	val_397	2008-04-08	12
-309	val_309	2008-04-08	12
-365	val_365	2008-04-08	12
-266	val_266	2008-04-08	12
-439	val_439	2008-04-08	12
-342	val_342	2008-04-08	12
-367	val_367	2008-04-08	12
-325	val_325	2008-04-08	12
-167	val_167	2008-04-08	12
-195	val_195	2008-04-08	12
-475	val_475	2008-04-08	12
-17	val_17	2008-04-08	12
-113	val_113	2008-04-08	12
-155	val_155	2008-04-08	12
-203	val_203	2008-04-08	12
-339	val_339	2008-04-08	12
-0	val_0	2008-04-08	12
-455	val_455	2008-04-08	12
-128	val_128	2008-04-08	12
-311	val_311	2008-04-08	12
-316	val_316	2008-04-08	12
-57	val_57	2008-04-08	12
-302	val_302	2008-04-08	12
-205	val_205	2008-04-08	12
-149	val_149	2008-04-08	12
-438	val_438	2008-04-08	12
-345	val_345	2008-04-08	12
-129	val_129	2008-04-08	12
-170	val_170	2008-04-08	12
-20	val_20	2008-04-08	12
-489	val_489	2008-04-08	12
-157	val_157	2008-04-08	12
-378	val_378	2008-04-08	12
-221	val_221	2008-04-08	12
-92	val_92	2008-04-08	12
-111	val_111	2008-04-08	12
-47	val_47	2008-04-08	12
-72	val_72	2008-04-08	12
-4	val_4	2008-04-08	12
-280	val_280	2008-04-08	12
-35	val_35	2008-04-08	12
-427	val_427	2008-04-08	12
-277	val_277	2008-04-08	12
-208	val_208	2008-04-08	12
-356	val_356	2008-04-08	12
-399	val_399	2008-04-08	12
-169	val_169	2008-04-08	12
-382	val_382	2008-04-08	12
-498	val_498	2008-04-08	12
-125	val_125	2008-04-08	12
-386	val_386	2008-04-08	12
-437	val_437	2008-04-08	12
-469	val_469	2008-04-08	12
-192	val_192	2008-04-08	12
-286	val_286	2008-04-08	12
-187	val_187	2008-04-08	12
-176	val_176	2008-04-08	12
-54	val_54	2008-04-08	12
-459	val_459	2008-04-08	12
-51	val_51	2008-04-08	12
-138	val_138	2008-04-08	12
-103	val_103	2008-04-08	12
-239	val_239	2008-04-08	12
-213	val_213	2008-04-08	12
-216	val_216	2008-04-08	12
-430	val_430	2008-04-08	12
-278	val_278	2008-04-08	12
-176	val_176	2008-04-08	12
-289	val_289	2008-04-08	12
-221	val_221	2008-04-08	12
-65	val_65	2008-04-08	12
-318	val_318	2008-04-08	12
-332	val_332	2008-04-08	12
-311	val_311	2008-04-08	12
-275	val_275	2008-04-08	12
-137	val_137	2008-04-08	12
-241	val_241	2008-04-08	12
-83	val_83	2008-04-08	12
-333	val_333	2008-04-08	12
-180	val_180	2008-04-08	12
-284	val_284	2008-04-08	12
-12	val_12	2008-04-08	12
-230	val_230	2008-04-08	12
-181	val_181	2008-04-08	12
-67	val_67	2008-04-08	12
-260	val_260	2008-04-08	12
-404	val_404	2008-04-08	12
-384	val_384	2008-04-08	12
-489	val_489	2008-04-08	12
-353	val_353	2008-04-08	12
-373	val_373	2008-04-08	12
-272	val_272	2008-04-08	12
-138	val_138	2008-04-08	12
-217	val_217	2008-04-08	12
-84	val_84	2008-04-08	12
-348	val_348	2008-04-08	12
-466	val_466	2008-04-08	12
-58	val_58	2008-04-08	12
-8	val_8	2008-04-08	12
-411	val_411	2008-04-08	12
-230	val_230	2008-04-08	12
-208	val_208	2008-04-08	12
-348	val_348	2008-04-08	12
-24	val_24	2008-04-08	12
-463	val_463	2008-04-08	12
-431	val_431	2008-04-08	12
-179	val_179	2008-04-08	12
-172	val_172	2008-04-08	12
-42	val_42	2008-04-08	12
-129	val_129	2008-04-08	12
-158	val_158	2008-04-08	12
-119	val_119	2008-04-08	12
-496	val_496	2008-04-08	12
-0	val_0	2008-04-08	12
-322	val_322	2008-04-08	12
-197	val_197	2008-04-08	12
-468	val_468	2008-04-08	12
-393	val_393	2008-04-08	12
-454	val_454	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-199	val_199	2008-04-08	12
-191	val_191	2008-04-08	12
-418	val_418	2008-04-08	12
-96	val_96	2008-04-08	12
-26	val_26	2008-04-08	12
-165	val_165	2008-04-08	12
-327	val_327	2008-04-08	12
-230	val_230	2008-04-08	12
-205	val_205	2008-04-08	12
-120	val_120	2008-04-08	12
-131	val_131	2008-04-08	12
-51	val_51	2008-04-08	12
-404	val_404	2008-04-08	12
-43	val_43	2008-04-08	12
-436	val_436	2008-04-08	12
-156	val_156	2008-04-08	12
-469	val_469	2008-04-08	12
-468	val_468	2008-04-08	12
-308	val_308	2008-04-08	12
-95	val_95	2008-04-08	12
-196	val_196	2008-04-08	12
-288	val_288	2008-04-08	12
-481	val_481	2008-04-08	12
-457	val_457	2008-04-08	12
-98	val_98	2008-04-08	12
-282	val_282	2008-04-08	12
-197	val_197	2008-04-08	12
-187	val_187	2008-04-08	12
-318	val_318	2008-04-08	12
-318	val_318	2008-04-08	12
-409	val_409	2008-04-08	12
-470	val_470	2008-04-08	12
-137	val_137	2008-04-08	12
-369	val_369	2008-04-08	12
-316	val_316	2008-04-08	12
-169	val_169	2008-04-08	12
-413	val_413	2008-04-08	12
-85	val_85	2008-04-08	12
-77	val_77	2008-04-08	12
-0	val_0	2008-04-08	12
-490	val_490	2008-04-08	12
-87	val_87	2008-04-08	12
-364	val_364	2008-04-08	12
-179	val_179	2008-04-08	12
-118	val_118	2008-04-08	12
-134	val_134	2008-04-08	12
-395	val_395	2008-04-08	12
-282	val_282	2008-04-08	12
-138	val_138	2008-04-08	12
-238	val_238	2008-04-08	12
-419	val_419	2008-04-08	12
-15	val_15	2008-04-08	12
-118	val_118	2008-04-08	12
-72	val_72	2008-04-08	12
-90	val_90	2008-04-08	12
-307	val_307	2008-04-08	12
-19	val_19	2008-04-08	12
-435	val_435	2008-04-08	12
-10	val_10	2008-04-08	12
-277	val_277	2008-04-08	12
-273	val_273	2008-04-08	12
-306	val_306	2008-04-08	12
-224	val_224	2008-04-08	12
-309	val_309	2008-04-08	12
-389	val_389	2008-04-08	12
-327	val_327	2008-04-08	12
-242	val_242	2008-04-08	12
-369	val_369	2008-04-08	12
-392	val_392	2008-04-08	12
-272	val_272	2008-04-08	12
-331	val_331	2008-04-08	12
-401	val_401	2008-04-08	12
-242	val_242	2008-04-08	12
-452	val_452	2008-04-08	12
-177	val_177	2008-04-08	12
-226	val_226	2008-04-08	12
-5	val_5	2008-04-08	12
-497	val_497	2008-04-08	12
-402	val_402	2008-04-08	12
-396	val_396	2008-04-08	12
-317	val_317	2008-04-08	12
-395	val_395	2008-04-08	12
-58	val_58	2008-04-08	12
-35	val_35	2008-04-08	12
-336	val_336	2008-04-08	12
-95	val_95	2008-04-08	12
-11	val_11	2008-04-08	12
-168	val_168	2008-04-08	12
-34	val_34	2008-04-08	12
-229	val_229	2008-04-08	12
-233	val_233	2008-04-08	12
-143	val_143	2008-04-08	12
-472	val_472	2008-04-08	12
-322	val_322	2008-04-08	12
-498	val_498	2008-04-08	12
-160	val_160	2008-04-08	12
-195	val_195	2008-04-08	12
-42	val_42	2008-04-08	12
-321	val_321	2008-04-08	12
-430	val_430	2008-04-08	12
-119	val_119	2008-04-08	12
-489	val_489	2008-04-08	12
-458	val_458	2008-04-08	12
-78	val_78	2008-04-08	12
-76	val_76	2008-04-08	12
-41	val_41	2008-04-08	12
-223	val_223	2008-04-08	12
-492	val_492	2008-04-08	12
-149	val_149	2008-04-08	12
-449	val_449	2008-04-08	12
-218	val_218	2008-04-08	12
-228	val_228	2008-04-08	12
-138	val_138	2008-04-08	12
-453	val_453	2008-04-08	12
-30	val_30	2008-04-08	12
-209	val_209	2008-04-08	12
-64	val_64	2008-04-08	12
-468	val_468	2008-04-08	12
-76	val_76	2008-04-08	12
-74	val_74	2008-04-08	12
-342	val_342	2008-04-08	12
-69	val_69	2008-04-08	12
-230	val_230	2008-04-08	12
-33	val_33	2008-04-08	12
-368	val_368	2008-04-08	12
-103	val_103	2008-04-08	12
-296	val_296	2008-04-08	12
-113	val_113	2008-04-08	12
-216	val_216	2008-04-08	12
-367	val_367	2008-04-08	12
-344	val_344	2008-04-08	12
-167	val_167	2008-04-08	12
-274	val_274	2008-04-08	12
-219	val_219	2008-04-08	12
-239	val_239	2008-04-08	12
-485	val_485	2008-04-08	12
-116	val_116	2008-04-08	12
-223	val_223	2008-04-08	12
-256	val_256	2008-04-08	12
-263	val_263	2008-04-08	12
-70	val_70	2008-04-08	12
-487	val_487	2008-04-08	12
-480	val_480	2008-04-08	12
-401	val_401	2008-04-08	12
-288	val_288	2008-04-08	12
-191	val_191	2008-04-08	12
-5	val_5	2008-04-08	12
-244	val_244	2008-04-08	12
-438	val_438	2008-04-08	12
-128	val_128	2008-04-08	12
-467	val_467	2008-04-08	12
-432	val_432	2008-04-08	12
-202	val_202	2008-04-08	12
-316	val_316	2008-04-08	12
-229	val_229	2008-04-08	12
-469	val_469	2008-04-08	12
-463	val_463	2008-04-08	12
-280	val_280	2008-04-08	12
-2	val_2	2008-04-08	12
-35	val_35	2008-04-08	12
-283	val_283	2008-04-08	12
-331	val_331	2008-04-08	12
-235	val_235	2008-04-08	12
-80	val_80	2008-04-08	12
-44	val_44	2008-04-08	12
-193	val_193	2008-04-08	12
-321	val_321	2008-04-08	12
-335	val_335	2008-04-08	12
-104	val_104	2008-04-08	12
-466	val_466	2008-04-08	12
-366	val_366	2008-04-08	12
-175	val_175	2008-04-08	12
-403	val_403	2008-04-08	12
-483	val_483	2008-04-08	12
-53	val_53	2008-04-08	12
-105	val_105	2008-04-08	12
-257	val_257	2008-04-08	12
-406	val_406	2008-04-08	12
-409	val_409	2008-04-08	12
-190	val_190	2008-04-08	12
-406	val_406	2008-04-08	12
-401	val_401	2008-04-08	12
-114	val_114	2008-04-08	12
-258	val_258	2008-04-08	12
-90	val_90	2008-04-08	12
-203	val_203	2008-04-08	12
-262	val_262	2008-04-08	12
-348	val_348	2008-04-08	12
-424	val_424	2008-04-08	12
-12	val_12	2008-04-08	12
-396	val_396	2008-04-08	12
-201	val_201	2008-04-08	12
-217	val_217	2008-04-08	12
-164	val_164	2008-04-08	12
-431	val_431	2008-04-08	12
-454	val_454	2008-04-08	12
-478	val_478	2008-04-08	12
-298	val_298	2008-04-08	12
-125	val_125	2008-04-08	12
-431	val_431	2008-04-08	12
-164	val_164	2008-04-08	12
-424	val_424	2008-04-08	12
-187	val_187	2008-04-08	12
-382	val_382	2008-04-08	12
-5	val_5	2008-04-08	12
-70	val_70	2008-04-08	12
-397	val_397	2008-04-08	12
-480	val_480	2008-04-08	12
-291	val_291	2008-04-08	12
-24	val_24	2008-04-08	12
-351	val_351	2008-04-08	12
-255	val_255	2008-04-08	12
-104	val_104	2008-04-08	12
-70	val_70	2008-04-08	12
-163	val_163	2008-04-08	12
-438	val_438	2008-04-08	12
-119	val_119	2008-04-08	12
-414	val_414	2008-04-08	12
-200	val_200	2008-04-08	12
-491	val_491	2008-04-08	12
-237	val_237	2008-04-08	12
-439	val_439	2008-04-08	12
-360	val_360	2008-04-08	12
-248	val_248	2008-04-08	12
-479	val_479	2008-04-08	12
-305	val_305	2008-04-08	12
-417	val_417	2008-04-08	12
-199	val_199	2008-04-08	12
-444	val_444	2008-04-08	12
-120	val_120	2008-04-08	12
-429	val_429	2008-04-08	12
-169	val_169	2008-04-08	12
-443	val_443	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-277	val_277	2008-04-08	12
-230	val_230	2008-04-08	12
-478	val_478	2008-04-08	12
-178	val_178	2008-04-08	12
-468	val_468	2008-04-08	12
-310	val_310	2008-04-08	12
-317	val_317	2008-04-08	12
-333	val_333	2008-04-08	12
-493	val_493	2008-04-08	12
-460	val_460	2008-04-08	12
-207	val_207	2008-04-08	12
-249	val_249	2008-04-08	12
-265	val_265	2008-04-08	12
-480	val_480	2008-04-08	12
-83	val_83	2008-04-08	12
-136	val_136	2008-04-08	12
-353	val_353	2008-04-08	12
-172	val_172	2008-04-08	12
-214	val_214	2008-04-08	12
-462	val_462	2008-04-08	12
-233	val_233	2008-04-08	12
-406	val_406	2008-04-08	12
-133	val_133	2008-04-08	12
-175	val_175	2008-04-08	12
-189	val_189	2008-04-08	12
-454	val_454	2008-04-08	12
-375	val_375	2008-04-08	12
-401	val_401	2008-04-08	12
-421	val_421	2008-04-08	12
-407	val_407	2008-04-08	12
-384	val_384	2008-04-08	12
-256	val_256	2008-04-08	12
-26	val_26	2008-04-08	12
-134	val_134	2008-04-08	12
-67	val_67	2008-04-08	12
-384	val_384	2008-04-08	12
-379	val_379	2008-04-08	12
-18	val_18	2008-04-08	12
-462	val_462	2008-04-08	12
-492	val_492	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-9	val_9	2008-04-08	12
-341	val_341	2008-04-08	12
-498	val_498	2008-04-08	12
-146	val_146	2008-04-08	12
-458	val_458	2008-04-08	12
-362	val_362	2008-04-08	12
-186	val_186	2008-04-08	12
-285	val_285	2008-04-08	12
-348	val_348	2008-04-08	12
-167	val_167	2008-04-08	12
-18	val_18	2008-04-08	12
-273	val_273	2008-04-08	12
-183	val_183	2008-04-08	12
-281	val_281	2008-04-08	12
-344	val_344	2008-04-08	12
-97	val_97	2008-04-08	12
-469	val_469	2008-04-08	12
-315	val_315	2008-04-08	12
-84	val_84	2008-04-08	12
-28	val_28	2008-04-08	12
-37	val_37	2008-04-08	12
-448	val_448	2008-04-08	12
-152	val_152	2008-04-08	12
-348	val_348	2008-04-08	12
-307	val_307	2008-04-08	12
-194	val_194	2008-04-08	12
-414	val_414	2008-04-08	12
-477	val_477	2008-04-08	12
-222	val_222	2008-04-08	12
-126	val_126	2008-04-08	12
-90	val_90	2008-04-08	12
-169	val_169	2008-04-08	12
-403	val_403	2008-04-08	12
-400	val_400	2008-04-08	12
-200	val_200	2008-04-08	12
-97	val_97	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/load_exist_part_authsuccess-0-84028c4ca541d126baffc20d6d876810 b/sql/hive/src/test/resources/golden/load_exist_part_authsuccess-0-84028c4ca541d126baffc20d6d876810
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_exist_part_authsuccess-1-c84cd1b5b491bded8ac3b0521de599c5 b/sql/hive/src/test/resources/golden/load_exist_part_authsuccess-1-c84cd1b5b491bded8ac3b0521de599c5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_exist_part_authsuccess-3-9ee887603dcba5200918ae5200afa5d5 b/sql/hive/src/test/resources/golden/load_exist_part_authsuccess-3-9ee887603dcba5200918ae5200afa5d5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_file_with_space_in_the_name-1-4399d9b3f970d3c5a34d1f9bf7b7447f b/sql/hive/src/test/resources/golden/load_file_with_space_in_the_name-1-4399d9b3f970d3c5a34d1f9bf7b7447f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-3-f5d86ed3cbc46bb0c7619703081d5873 b/sql/hive/src/test/resources/golden/load_file_with_space_in_the_name-1-d19201e2fcaee4d451292bd740e6c637
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-3-f5d86ed3cbc46bb0c7619703081d5873
rename to sql/hive/src/test/resources/golden/load_file_with_space_in_the_name-1-d19201e2fcaee4d451292bd740e6c637
diff --git a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/load_file_with_space_in_the_name-2-ad8795e50f5998ea1d2eb64a0c02e6e5
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/load_file_with_space_in_the_name-2-ad8795e50f5998ea1d2eb64a0c02e6e5
diff --git a/sql/hive/src/test/resources/golden/load_fs2-0-517732da2c84ae17095b0e1d96f74d97 b/sql/hive/src/test/resources/golden/load_fs2-0-517732da2c84ae17095b0e1d96f74d97
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_fs2-1-5018c84e09be70bf663594a89f3ad731 b/sql/hive/src/test/resources/golden/load_fs2-1-5018c84e09be70bf663594a89f3ad731
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_fs2-2-94d2317b453b3b49bb9f2b58040b4748 b/sql/hive/src/test/resources/golden/load_fs2-2-94d2317b453b3b49bb9f2b58040b4748
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_hdfs_file_with_space_in_the_name-0-f99b4f29506d65c841fb1db18263fbcc b/sql/hive/src/test/resources/golden/load_hdfs_file_with_space_in_the_name-0-f99b4f29506d65c841fb1db18263fbcc
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/load_hdfs_file_with_space_in_the_name-0-f99b4f29506d65c841fb1db18263fbcc
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/load_hdfs_file_with_space_in_the_name-1-b64a19f7101a4fb3b5d08b2f6e296400 b/sql/hive/src/test/resources/golden/load_hdfs_file_with_space_in_the_name-1-b64a19f7101a4fb3b5d08b2f6e296400
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/load_hdfs_file_with_space_in_the_name-1-b64a19f7101a4fb3b5d08b2f6e296400
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/load_hdfs_file_with_space_in_the_name-2-2087e00fe000e00f64e819dca59be450 b/sql/hive/src/test/resources/golden/load_hdfs_file_with_space_in_the_name-2-2087e00fe000e00f64e819dca59be450
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_nonpart_authsuccess-0-fa705a031ff5d97558f29c2b5b9de282 b/sql/hive/src/test/resources/golden/load_nonpart_authsuccess-0-fa705a031ff5d97558f29c2b5b9de282
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_nonpart_authsuccess-2-9ee887603dcba5200918ae5200afa5d5 b/sql/hive/src/test/resources/golden/load_nonpart_authsuccess-2-9ee887603dcba5200918ae5200afa5d5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_part_authsuccess-0-84028c4ca541d126baffc20d6d876810 b/sql/hive/src/test/resources/golden/load_part_authsuccess-0-84028c4ca541d126baffc20d6d876810
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/load_part_authsuccess-2-9ee887603dcba5200918ae5200afa5d5 b/sql/hive/src/test/resources/golden/load_part_authsuccess-2-9ee887603dcba5200918ae5200afa5d5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/loadpart1-1-4bf1504274319c44d370b58092fe016c b/sql/hive/src/test/resources/golden/loadpart1-1-4bf1504274319c44d370b58092fe016c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-3-3430d89fb70985e8a62fb19aa280f2e8 b/sql/hive/src/test/resources/golden/loadpart1-1-6cc94d19c536a996592629f7c82c2ac9
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-3-3430d89fb70985e8a62fb19aa280f2e8
rename to sql/hive/src/test/resources/golden/loadpart1-1-6cc94d19c536a996592629f7c82c2ac9
diff --git a/sql/hive/src/test/resources/golden/loadpart1-4-e2e5e7a0378c6f0c28391c447ec9cee9 b/sql/hive/src/test/resources/golden/loadpart1-4-e2e5e7a0378c6f0c28391c447ec9cee9
index cf367dacbdf0e..bf2a7f452edc3 100644
--- a/sql/hive/src/test/resources/golden/loadpart1-4-e2e5e7a0378c6f0c28391c447ec9cee9
+++ b/sql/hive/src/test/resources/golden/loadpart1-4-e2e5e7a0378c6f0c28391c447ec9cee9
@@ -3,4 +3,4 @@
 3	test_part	test_Part
 4	test_part	test_Part
 5	test_part	test_Part
-6	test_part	test_Part
\ No newline at end of file
+6	test_part	test_Part
diff --git a/sql/hive/src/test/resources/golden/loadpart1-7-c6493490f898e72dc7ed1bc2d4721aa4 b/sql/hive/src/test/resources/golden/loadpart1-7-c6493490f898e72dc7ed1bc2d4721aa4
index cf367dacbdf0e..bf2a7f452edc3 100644
--- a/sql/hive/src/test/resources/golden/loadpart1-7-c6493490f898e72dc7ed1bc2d4721aa4
+++ b/sql/hive/src/test/resources/golden/loadpart1-7-c6493490f898e72dc7ed1bc2d4721aa4
@@ -3,4 +3,4 @@
 3	test_part	test_Part
 4	test_part	test_Part
 5	test_part	test_Part
-6	test_part	test_Part
\ No newline at end of file
+6	test_part	test_Part
diff --git a/sql/hive/src/test/resources/golden/loadpart_err-2-21fe8ff9059167209647e7ea086f483e b/sql/hive/src/test/resources/golden/loadpart_err-2-21fe8ff9059167209647e7ea086f483e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock1-0-cd46bc635e3010cf1b990a652a584a09 b/sql/hive/src/test/resources/golden/lock1-0-cd46bc635e3010cf1b990a652a584a09
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock1-1-3e95421993ab28d18245ec2340f580a3 b/sql/hive/src/test/resources/golden/lock1-1-3e95421993ab28d18245ec2340f580a3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock1-2-c0c18ac884677231a41eea8d980d0451 b/sql/hive/src/test/resources/golden/lock1-2-c0c18ac884677231a41eea8d980d0451
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock2-0-cd46bc635e3010cf1b990a652a584a09 b/sql/hive/src/test/resources/golden/lock2-0-cd46bc635e3010cf1b990a652a584a09
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock2-1-3e95421993ab28d18245ec2340f580a3 b/sql/hive/src/test/resources/golden/lock2-1-3e95421993ab28d18245ec2340f580a3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock2-2-c0c18ac884677231a41eea8d980d0451 b/sql/hive/src/test/resources/golden/lock2-2-c0c18ac884677231a41eea8d980d0451
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock2-3-27ad2962fed131f51ba802596ba37278 b/sql/hive/src/test/resources/golden/lock2-3-27ad2962fed131f51ba802596ba37278
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock2-4-c06da7f8c1e98dc22e3171018e357f6a b/sql/hive/src/test/resources/golden/lock2-4-c06da7f8c1e98dc22e3171018e357f6a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock2-5-e8d1d10c308a73eef78dde414a5e40ca b/sql/hive/src/test/resources/golden/lock2-5-e8d1d10c308a73eef78dde414a5e40ca
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock3-0-27ad2962fed131f51ba802596ba37278 b/sql/hive/src/test/resources/golden/lock3-0-27ad2962fed131f51ba802596ba37278
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock3-1-c06da7f8c1e98dc22e3171018e357f6a b/sql/hive/src/test/resources/golden/lock3-1-c06da7f8c1e98dc22e3171018e357f6a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock3-2-b1ca816784e88f105b2fce1175340c33 b/sql/hive/src/test/resources/golden/lock3-2-b1ca816784e88f105b2fce1175340c33
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock3-5-8096935c5c1755f9b88583e8c72921ac b/sql/hive/src/test/resources/golden/lock3-5-8096935c5c1755f9b88583e8c72921ac
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock3-6-598ba296ba1c6931f4161a9f50b00cbe b/sql/hive/src/test/resources/golden/lock3-6-598ba296ba1c6931f4161a9f50b00cbe
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock4-1-27ad2962fed131f51ba802596ba37278 b/sql/hive/src/test/resources/golden/lock4-1-27ad2962fed131f51ba802596ba37278
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock4-2-c06da7f8c1e98dc22e3171018e357f6a b/sql/hive/src/test/resources/golden/lock4-2-c06da7f8c1e98dc22e3171018e357f6a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock4-3-b1ca816784e88f105b2fce1175340c33 b/sql/hive/src/test/resources/golden/lock4-3-b1ca816784e88f105b2fce1175340c33
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock4-6-8096935c5c1755f9b88583e8c72921ac b/sql/hive/src/test/resources/golden/lock4-6-8096935c5c1755f9b88583e8c72921ac
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock4-7-598ba296ba1c6931f4161a9f50b00cbe b/sql/hive/src/test/resources/golden/lock4-7-598ba296ba1c6931f4161a9f50b00cbe
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/louter_join_ppr-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/louter_join_ppr-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/louter_join_ppr-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/louter_join_ppr-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/louter_join_ppr-2-3d41e966f69a64babb783d1aad0f1b73 b/sql/hive/src/test/resources/golden/louter_join_ppr-2-3d41e966f69a64babb783d1aad0f1b73
index 19492fd335bcb..a86e9c5af723e 100644
--- a/sql/hive/src/test/resources/golden/louter_join_ppr-2-3d41e966f69a64babb783d1aad0f1b73
+++ b/sql/hive/src/test/resources/golden/louter_join_ppr-2-3d41e966f69a64babb783d1aad0f1b73
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/louter_join_ppr-4-a4f0ff6d2a367242836379fa9e3fe3ef b/sql/hive/src/test/resources/golden/louter_join_ppr-4-a4f0ff6d2a367242836379fa9e3fe3ef
index 3a57720041fb3..d091388cd5e19 100644
--- a/sql/hive/src/test/resources/golden/louter_join_ppr-4-a4f0ff6d2a367242836379fa9e3fe3ef
+++ b/sql/hive/src/test/resources/golden/louter_join_ppr-4-a4f0ff6d2a367242836379fa9e3fe3ef
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/louter_join_ppr-6-dedfbaea184f5e3a29226e6e6bc6735 b/sql/hive/src/test/resources/golden/louter_join_ppr-6-dedfbaea184f5e3a29226e6e6bc6735
index 19492fd335bcb..a86e9c5af723e 100644
--- a/sql/hive/src/test/resources/golden/louter_join_ppr-6-dedfbaea184f5e3a29226e6e6bc6735
+++ b/sql/hive/src/test/resources/golden/louter_join_ppr-6-dedfbaea184f5e3a29226e6e6bc6735
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/louter_join_ppr-8-6fca189c46645f124d5fcb82564b703 b/sql/hive/src/test/resources/golden/louter_join_ppr-8-6fca189c46645f124d5fcb82564b703
index 3a57720041fb3..d091388cd5e19 100644
--- a/sql/hive/src/test/resources/golden/louter_join_ppr-8-6fca189c46645f124d5fcb82564b703
+++ b/sql/hive/src/test/resources/golden/louter_join_ppr-8-6fca189c46645f124d5fcb82564b703
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-0-a267d586eb00766a0ac1b16f5b45cf9d b/sql/hive/src/test/resources/golden/mapjoin1-0-a267d586eb00766a0ac1b16f5b45cf9d
deleted file mode 100644
index 657eea30d475f..0000000000000
--- a/sql/hive/src/test/resources/golden/mapjoin1-0-a267d586eb00766a0ac1b16f5b45cf9d
+++ /dev/null
@@ -1 +0,0 @@
-1114788
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-1-abd9364d276ec89352232da5e2237768 b/sql/hive/src/test/resources/golden/mapjoin1-1-abd9364d276ec89352232da5e2237768
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-10-c08fefa00b89f50dd365208151593505 b/sql/hive/src/test/resources/golden/mapjoin1-10-c08fefa00b89f50dd365208151593505
deleted file mode 100644
index cfae441c4a10a..0000000000000
--- a/sql/hive/src/test/resources/golden/mapjoin1-10-c08fefa00b89f50dd365208151593505
+++ /dev/null
@@ -1,10 +0,0 @@
-NULL	NULL	238	val_238
-86	val_86	86	val_86
-NULL	NULL	311	val_311
-27	val_27	27	val_27
-NULL	NULL	165	val_165
-NULL	NULL	409	val_409
-NULL	NULL	255	val_255
-NULL	NULL	278	val_278
-98	val_98	98	val_98
-98	val_98	98	val_98
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-11-fb5e414c98754b7e79c744606aa6ccb7 b/sql/hive/src/test/resources/golden/mapjoin1-11-fb5e414c98754b7e79c744606aa6ccb7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-12-6fa8df1d49ba571bb9d2615ad22958d9 b/sql/hive/src/test/resources/golden/mapjoin1-12-6fa8df1d49ba571bb9d2615ad22958d9
deleted file mode 100644
index 6d8155efd76cd..0000000000000
--- a/sql/hive/src/test/resources/golden/mapjoin1-12-6fa8df1d49ba571bb9d2615ad22958d9
+++ /dev/null
@@ -1,10 +0,0 @@
-238	val_238	238	{"key":238,"value":"val_238"}
-238	val_238	238	{"key":238,"value":"val_238"}
-NULL	NULL	86	{"key":86,"value":"val_86"}
-311	val_311	311	{"key":311,"value":"val_311"}
-311	val_311	311	{"key":311,"value":"val_311"}
-311	val_311	311	{"key":311,"value":"val_311"}
-NULL	NULL	27	{"key":27,"value":"val_27"}
-NULL	NULL	165	{"key":165,"value":"val_165"}
-409	val_409	409	{"key":409,"value":"val_409"}
-409	val_409	409	{"key":409,"value":"val_409"}
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-2-fe84593f006c85e68fbb797394cdccd0 b/sql/hive/src/test/resources/golden/mapjoin1-2-fe84593f006c85e68fbb797394cdccd0
deleted file mode 100644
index 44f1acd59de68..0000000000000
--- a/sql/hive/src/test/resources/golden/mapjoin1-2-fe84593f006c85e68fbb797394cdccd0
+++ /dev/null
@@ -1,10 +0,0 @@
-238	val_238	238	val_238
-238	val_238	238	val_238
-86	val_86	86	val_86
-311	val_311	311	val_311
-311	val_311	311	val_311
-311	val_311	311	val_311
-27	val_27	27	val_27
-165	val_165	165	val_165
-165	val_165	165	val_165
-409	val_409	409	val_409
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-3-8439a0592619790b64d16d2506f2233d b/sql/hive/src/test/resources/golden/mapjoin1-3-8439a0592619790b64d16d2506f2233d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-4-c08fefa00b89f50dd365208151593505 b/sql/hive/src/test/resources/golden/mapjoin1-4-c08fefa00b89f50dd365208151593505
deleted file mode 100644
index cfae441c4a10a..0000000000000
--- a/sql/hive/src/test/resources/golden/mapjoin1-4-c08fefa00b89f50dd365208151593505
+++ /dev/null
@@ -1,10 +0,0 @@
-NULL	NULL	238	val_238
-86	val_86	86	val_86
-NULL	NULL	311	val_311
-27	val_27	27	val_27
-NULL	NULL	165	val_165
-NULL	NULL	409	val_409
-NULL	NULL	255	val_255
-NULL	NULL	278	val_278
-98	val_98	98	val_98
-98	val_98	98	val_98
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-5-72068bd4cdac40e4d18fd729f39855ba b/sql/hive/src/test/resources/golden/mapjoin1-5-72068bd4cdac40e4d18fd729f39855ba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-6-6fa8df1d49ba571bb9d2615ad22958d9 b/sql/hive/src/test/resources/golden/mapjoin1-6-6fa8df1d49ba571bb9d2615ad22958d9
deleted file mode 100644
index 6d8155efd76cd..0000000000000
--- a/sql/hive/src/test/resources/golden/mapjoin1-6-6fa8df1d49ba571bb9d2615ad22958d9
+++ /dev/null
@@ -1,10 +0,0 @@
-238	val_238	238	{"key":238,"value":"val_238"}
-238	val_238	238	{"key":238,"value":"val_238"}
-NULL	NULL	86	{"key":86,"value":"val_86"}
-311	val_311	311	{"key":311,"value":"val_311"}
-311	val_311	311	{"key":311,"value":"val_311"}
-311	val_311	311	{"key":311,"value":"val_311"}
-NULL	NULL	27	{"key":27,"value":"val_27"}
-NULL	NULL	165	{"key":165,"value":"val_165"}
-409	val_409	409	{"key":409,"value":"val_409"}
-409	val_409	409	{"key":409,"value":"val_409"}
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-7-757dfb540b8a49b3663f8caba7476ec5 b/sql/hive/src/test/resources/golden/mapjoin1-7-757dfb540b8a49b3663f8caba7476ec5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-8-fe84593f006c85e68fbb797394cdccd0 b/sql/hive/src/test/resources/golden/mapjoin1-8-fe84593f006c85e68fbb797394cdccd0
deleted file mode 100644
index 44f1acd59de68..0000000000000
--- a/sql/hive/src/test/resources/golden/mapjoin1-8-fe84593f006c85e68fbb797394cdccd0
+++ /dev/null
@@ -1,10 +0,0 @@
-238	val_238	238	val_238
-238	val_238	238	val_238
-86	val_86	86	val_86
-311	val_311	311	val_311
-311	val_311	311	val_311
-311	val_311	311	val_311
-27	val_27	27	val_27
-165	val_165	165	val_165
-165	val_165	165	val_165
-409	val_409	409	val_409
diff --git a/sql/hive/src/test/resources/golden/mapjoin1-9-5eabdf151ff9fedb64559d2fbd1ae266 b/sql/hive/src/test/resources/golden/mapjoin1-9-5eabdf151ff9fedb64559d2fbd1ae266
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/mapjoin_distinct-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-1-a00d1791b7fa7ac5a0505d95c3d12257 b/sql/hive/src/test/resources/golden/mapjoin_distinct-1-a00d1791b7fa7ac5a0505d95c3d12257
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-1-a00d1791b7fa7ac5a0505d95c3d12257
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-1-a00d1791b7fa7ac5a0505d95c3d12257
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-11-4489654b888efc588b13ee1cda1b6a9f b/sql/hive/src/test/resources/golden/mapjoin_distinct-11-4489654b888efc588b13ee1cda1b6a9f
index eab7fd7a51ea4..36b4fccb585a8 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-11-4489654b888efc588b13ee1cda1b6a9f
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-11-4489654b888efc588b13ee1cda1b6a9f
@@ -7,4 +7,4 @@ val_105
 val_11
 val_111
 val_113
-val_114
\ No newline at end of file
+val_114
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-12-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/mapjoin_distinct-12-1d351f7e821fcaf66c6f7503e42fb291
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-12-1d351f7e821fcaf66c6f7503e42fb291
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-12-1d351f7e821fcaf66c6f7503e42fb291
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-13-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/mapjoin_distinct-13-a7dc16cb82c595b18d4258a38a304b1e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-13-a7dc16cb82c595b18d4258a38a304b1e
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-13-a7dc16cb82c595b18d4258a38a304b1e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-15-4489654b888efc588b13ee1cda1b6a9f b/sql/hive/src/test/resources/golden/mapjoin_distinct-15-4489654b888efc588b13ee1cda1b6a9f
index eab7fd7a51ea4..36b4fccb585a8 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-15-4489654b888efc588b13ee1cda1b6a9f
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-15-4489654b888efc588b13ee1cda1b6a9f
@@ -7,4 +7,4 @@ val_105
 val_11
 val_111
 val_113
-val_114
\ No newline at end of file
+val_114
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-3-4489654b888efc588b13ee1cda1b6a9f b/sql/hive/src/test/resources/golden/mapjoin_distinct-3-4489654b888efc588b13ee1cda1b6a9f
index eab7fd7a51ea4..36b4fccb585a8 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-3-4489654b888efc588b13ee1cda1b6a9f
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-3-4489654b888efc588b13ee1cda1b6a9f
@@ -7,4 +7,4 @@ val_105
 val_11
 val_111
 val_113
-val_114
\ No newline at end of file
+val_114
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-4-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/mapjoin_distinct-4-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-4-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-4-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-5-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/mapjoin_distinct-5-a7dc16cb82c595b18d4258a38a304b1e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-5-a7dc16cb82c595b18d4258a38a304b1e
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-5-a7dc16cb82c595b18d4258a38a304b1e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-7-4489654b888efc588b13ee1cda1b6a9f b/sql/hive/src/test/resources/golden/mapjoin_distinct-7-4489654b888efc588b13ee1cda1b6a9f
index eab7fd7a51ea4..36b4fccb585a8 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-7-4489654b888efc588b13ee1cda1b6a9f
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-7-4489654b888efc588b13ee1cda1b6a9f
@@ -7,4 +7,4 @@ val_105
 val_11
 val_111
 val_113
-val_114
\ No newline at end of file
+val_114
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-8-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/mapjoin_distinct-8-1d351f7e821fcaf66c6f7503e42fb291
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-8-1d351f7e821fcaf66c6f7503e42fb291
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-8-1d351f7e821fcaf66c6f7503e42fb291
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_distinct-9-a00d1791b7fa7ac5a0505d95c3d12257 b/sql/hive/src/test/resources/golden/mapjoin_distinct-9-a00d1791b7fa7ac5a0505d95c3d12257
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_distinct-9-a00d1791b7fa7ac5a0505d95c3d12257
+++ b/sql/hive/src/test/resources/golden/mapjoin_distinct-9-a00d1791b7fa7ac5a0505d95c3d12257
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-0-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-0-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-0-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-0-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-1-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-1-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-1-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-1-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-2-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-2-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-2-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-2-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-4-88b581725ecdd603117a1706ab9c34dc b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-4-5166a5b9d30dfacbe33dd909c0df6310
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-4-88b581725ecdd603117a1706ab9c34dc
rename to sql/hive/src/test/resources/golden/mapjoin_mapjoin-4-5166a5b9d30dfacbe33dd909c0df6310
diff --git a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-4-f9a2e0792bfe37c48895b8044a3a3702 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-4-f9a2e0792bfe37c48895b8044a3a3702
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-5-5ee898cab883074f3297198c52445ee4 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-5-5ee898cab883074f3297198c52445ee4
new file mode 100644
index 0000000000000..0588d12b85bd3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-5-5ee898cab883074f3297198c52445ee4
@@ -0,0 +1,20 @@
+98
+98
+66
+98
+98
+98
+98
+66
+98
+98
+98
+98
+66
+98
+98
+98
+98
+66
+98
+98
diff --git a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-5-c47698bac140454637a999e583941ce7 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-5-c47698bac140454637a999e583941ce7
deleted file mode 100644
index 8e7fe1e4cae08..0000000000000
--- a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-5-c47698bac140454637a999e583941ce7
+++ /dev/null
@@ -1,2 +0,0 @@
-5308
-5308
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-3-f5d86ed3cbc46bb0c7619703081d5873 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-6-dca8c08a235b45d1cdcb94e363afb17
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-3-f5d86ed3cbc46bb0c7619703081d5873
rename to sql/hive/src/test/resources/golden/mapjoin_mapjoin-6-dca8c08a235b45d1cdcb94e363afb17
diff --git a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-7-fddbdea343a9ddb5f8dedc18147640b7 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-7-fddbdea343a9ddb5f8dedc18147640b7
new file mode 100644
index 0000000000000..293e1c3f1edca
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-7-fddbdea343a9ddb5f8dedc18147640b7
@@ -0,0 +1,2 @@
+5308
+5308
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-8-2be637ed4f6146e8525ae1a863e72736
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_18_part_external-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/mapjoin_mapjoin-8-2be637ed4f6146e8525ae1a863e72736
diff --git a/sql/hive/src/test/resources/golden/mapjoin_mapjoin-9-c47698bac140454637a999e583941ce7 b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-9-c47698bac140454637a999e583941ce7
new file mode 100644
index 0000000000000..293e1c3f1edca
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/mapjoin_mapjoin-9-c47698bac140454637a999e583941ce7
@@ -0,0 +1,2 @@
+5308
+5308
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery-0-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/mapjoin_subquery-0-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery-0-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery-0-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery-1-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/mapjoin_subquery-1-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery-1-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery-1-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery-2-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/mapjoin_subquery-2-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery-2-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery-2-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery-4-7df121f9774cb23edc557b98ad1e1924 b/sql/hive/src/test/resources/golden/mapjoin_subquery-4-7df121f9774cb23edc557b98ad1e1924
index 482848c1ef8aa..89ea689f2a33b 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery-4-7df121f9774cb23edc557b98ad1e1924
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery-4-7df121f9774cb23edc557b98ad1e1924
@@ -104,4 +104,4 @@
 406	val_406
 406	val_406
 406	val_406
-406	val_406
\ No newline at end of file
+406	val_406
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery-6-dcdc6b87a9b87b0ab10ad0e44a197f1b b/sql/hive/src/test/resources/golden/mapjoin_subquery-6-dcdc6b87a9b87b0ab10ad0e44a197f1b
index 482848c1ef8aa..89ea689f2a33b 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery-6-dcdc6b87a9b87b0ab10ad0e44a197f1b
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery-6-dcdc6b87a9b87b0ab10ad0e44a197f1b
@@ -104,4 +104,4 @@
 406	val_406
 406	val_406
 406	val_406
-406	val_406
\ No newline at end of file
+406	val_406
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery2-10-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/mapjoin_subquery2-10-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery2-10-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery2-10-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery2-11-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/mapjoin_subquery2-11-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery2-11-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery2-11-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery2-13-c876a518451059f17fc15e29f6f57951 b/sql/hive/src/test/resources/golden/mapjoin_subquery2-13-c876a518451059f17fc15e29f6f57951
index 5c62121a6600c..7b3f58fdde1af 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery2-13-c876a518451059f17fc15e29f6f57951
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery2-13-c876a518451059f17fc15e29f6f57951
@@ -1,2 +1,2 @@
 2	Joe	2	Tie	2	Tie
-2	Hank	2	Tie	2	Tie
\ No newline at end of file
+2	Hank	2	Tie	2	Tie
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery2-6-5353ee601eb42d5842690d3941683be1 b/sql/hive/src/test/resources/golden/mapjoin_subquery2-6-5353ee601eb42d5842690d3941683be1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-4-9fb7c47b98513bf3355e077ee9732cdd b/sql/hive/src/test/resources/golden/mapjoin_subquery2-6-9bf06af695892b0d7067d5b30e0b2425
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-4-9fb7c47b98513bf3355e077ee9732cdd
rename to sql/hive/src/test/resources/golden/mapjoin_subquery2-6-9bf06af695892b0d7067d5b30e0b2425
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask2-4-7accb0b0e00dcfd6468a6ff6058bb4e8 b/sql/hive/src/test/resources/golden/mapjoin_subquery2-7-c6b0cdb137f13f8362c0c49c544151a4
similarity index 100%
rename from sql/hive/src/test/resources/golden/filter_join_breaktask2-4-7accb0b0e00dcfd6468a6ff6058bb4e8
rename to sql/hive/src/test/resources/golden/mapjoin_subquery2-7-c6b0cdb137f13f8362c0c49c544151a4
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery2-7-fb516ed5906b1f485d3e7e7eeaedd862 b/sql/hive/src/test/resources/golden/mapjoin_subquery2-7-fb516ed5906b1f485d3e7e7eeaedd862
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask2-5-25f891cf5e9138c14ba8c385c83230ba b/sql/hive/src/test/resources/golden/mapjoin_subquery2-8-131ae5ecfff2733b04bdfada0108cf40
similarity index 100%
rename from sql/hive/src/test/resources/golden/filter_join_breaktask2-5-25f891cf5e9138c14ba8c385c83230ba
rename to sql/hive/src/test/resources/golden/mapjoin_subquery2-8-131ae5ecfff2733b04bdfada0108cf40
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery2-8-d524906728fef9f559709fe0922ab24e b/sql/hive/src/test/resources/golden/mapjoin_subquery2-8-d524906728fef9f559709fe0922ab24e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mapjoin_subquery2-9-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/mapjoin_subquery2-9-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_subquery2-9-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/mapjoin_subquery2-9-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_test_outer-0-407016bf2679fb9e9d076a2d115e859d b/sql/hive/src/test/resources/golden/mapjoin_test_outer-0-407016bf2679fb9e9d076a2d115e859d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_test_outer-0-407016bf2679fb9e9d076a2d115e859d
+++ b/sql/hive/src/test/resources/golden/mapjoin_test_outer-0-407016bf2679fb9e9d076a2d115e859d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_test_outer-10-ce1ef910fff98f174931cc641f7cef3a b/sql/hive/src/test/resources/golden/mapjoin_test_outer-10-ce1ef910fff98f174931cc641f7cef3a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_test_outer-10-ce1ef910fff98f174931cc641f7cef3a
+++ b/sql/hive/src/test/resources/golden/mapjoin_test_outer-10-ce1ef910fff98f174931cc641f7cef3a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/mapjoin_test_outer-11-cfaaae6c3ef2a5eb8cd7ec5065ca7795 b/sql/hive/src/test/resources/golden/mapjoin_test_outer-11-cfaaae6c3ef2a5eb8cd7ec5065ca7795
index 271f3e168fd23..984f4f37f676f 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_test_outer-11-cfaaae6c3ef2a5eb8cd7ec5065ca7795
+++ b/sql/hive/src/test/resources/golden/mapjoin_test_outer-11-cfaaae6c3ef2a5eb8cd7ec5065ca7795
@@ -1,4 +1,4 @@
 128		128		128	
 146	val_146	146	val_146	146	val_146
 224		224		224	
-369		369		369	
\ No newline at end of file
+369		369		369	
diff --git a/sql/hive/src/test/resources/golden/mapjoin_test_outer-12-80993ab7f757001e1f058bf8609f8420 b/sql/hive/src/test/resources/golden/mapjoin_test_outer-12-80993ab7f757001e1f058bf8609f8420
index 271f3e168fd23..984f4f37f676f 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_test_outer-12-80993ab7f757001e1f058bf8609f8420
+++ b/sql/hive/src/test/resources/golden/mapjoin_test_outer-12-80993ab7f757001e1f058bf8609f8420
@@ -1,4 +1,4 @@
 128		128		128	
 146	val_146	146	val_146	146	val_146
 224		224		224	
-369		369		369	
\ No newline at end of file
+369		369		369	
diff --git a/sql/hive/src/test/resources/golden/mapjoin_test_outer-14-7fe52008c4a98853d086d17fc3c21906 b/sql/hive/src/test/resources/golden/mapjoin_test_outer-14-7fe52008c4a98853d086d17fc3c21906
index f00666f6113d2..ece00d9946257 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_test_outer-14-7fe52008c4a98853d086d17fc3c21906
+++ b/sql/hive/src/test/resources/golden/mapjoin_test_outer-14-7fe52008c4a98853d086d17fc3c21906
@@ -2,4 +2,4 @@ NULL	NULL	333444	555666	333444	555666
 128		128		128	
 146	val_146	146	val_146	146	val_146
 224		224		224	
-369		369		369	
\ No newline at end of file
+369		369		369	
diff --git a/sql/hive/src/test/resources/golden/mapjoin_test_outer-6-7fe52008c4a98853d086d17fc3c21906 b/sql/hive/src/test/resources/golden/mapjoin_test_outer-6-7fe52008c4a98853d086d17fc3c21906
index f00666f6113d2..ece00d9946257 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_test_outer-6-7fe52008c4a98853d086d17fc3c21906
+++ b/sql/hive/src/test/resources/golden/mapjoin_test_outer-6-7fe52008c4a98853d086d17fc3c21906
@@ -2,4 +2,4 @@ NULL	NULL	333444	555666	333444	555666
 128		128		128	
 146	val_146	146	val_146	146	val_146
 224		224		224	
-369		369		369	
\ No newline at end of file
+369		369		369	
diff --git a/sql/hive/src/test/resources/golden/mapjoin_test_outer-8-dfb08d397d3fe163d75c3b758097b68a b/sql/hive/src/test/resources/golden/mapjoin_test_outer-8-dfb08d397d3fe163d75c3b758097b68a
index f00666f6113d2..ece00d9946257 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_test_outer-8-dfb08d397d3fe163d75c3b758097b68a
+++ b/sql/hive/src/test/resources/golden/mapjoin_test_outer-8-dfb08d397d3fe163d75c3b758097b68a
@@ -2,4 +2,4 @@ NULL	NULL	333444	555666	333444	555666
 128		128		128	
 146	val_146	146	val_146	146	val_146
 224		224		224	
-369		369		369	
\ No newline at end of file
+369		369		369	
diff --git a/sql/hive/src/test/resources/golden/mapjoin_test_outer-9-6c45ce60b3dfce0e8bd19eedd57ee017 b/sql/hive/src/test/resources/golden/mapjoin_test_outer-9-6c45ce60b3dfce0e8bd19eedd57ee017
index 271f3e168fd23..984f4f37f676f 100644
--- a/sql/hive/src/test/resources/golden/mapjoin_test_outer-9-6c45ce60b3dfce0e8bd19eedd57ee017
+++ b/sql/hive/src/test/resources/golden/mapjoin_test_outer-9-6c45ce60b3dfce0e8bd19eedd57ee017
@@ -1,4 +1,4 @@
 128		128		128	
 146	val_146	146	val_146	146	val_146
 224		224		224	
-369		369		369	
\ No newline at end of file
+369		369		369	
diff --git a/sql/hive/src/test/resources/golden/mapreduce2-3-adea843673e541da8a735a5a34e7c7dc b/sql/hive/src/test/resources/golden/mapreduce2-3-adea843673e541da8a735a5a34e7c7dc
index 4ab18ace38f46..777c07c766030 100644
--- a/sql/hive/src/test/resources/golden/mapreduce2-3-adea843673e541da8a735a5a34e7c7dc
+++ b/sql/hive/src/test/resources/golden/mapreduce2-3-adea843673e541da8a735a5a34e7c7dc
@@ -497,4 +497,4 @@
 497	49	7	val_497
 498	49	8	val_498
 498	49	8	val_498
-498	49	8	val_498
\ No newline at end of file
+498	49	8	val_498
diff --git a/sql/hive/src/test/resources/golden/mapreduce3-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/mapreduce3-3-adc1ec67836b26b60d8547c4996bfd8f
index 1504c19f78eb2..5aa1d6b005a24 100644
--- a/sql/hive/src/test/resources/golden/mapreduce3-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/mapreduce3-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -497,4 +497,4 @@
 97	9	7	val_97
 97	9	7	val_97
 98	9	8	val_98
-98	9	8	val_98
\ No newline at end of file
+98	9	8	val_98
diff --git a/sql/hive/src/test/resources/golden/merge1-0-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/merge1-0-593999fae618b6b38322bc9ae4e0c027
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge1-0-593999fae618b6b38322bc9ae4e0c027
+++ b/sql/hive/src/test/resources/golden/merge1-0-593999fae618b6b38322bc9ae4e0c027
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge1-12-2e8e4adbfb21f25e7557dd86363c7138 b/sql/hive/src/test/resources/golden/merge1-12-2e8e4adbfb21f25e7557dd86363c7138
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge1-12-2e8e4adbfb21f25e7557dd86363c7138
+++ b/sql/hive/src/test/resources/golden/merge1-12-2e8e4adbfb21f25e7557dd86363c7138
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge1-4-3277fe538b66923cd879b45371838d2b b/sql/hive/src/test/resources/golden/merge1-4-3277fe538b66923cd879b45371838d2b
index df07a9da29f01..93e965c771403 100644
--- a/sql/hive/src/test/resources/golden/merge1-4-3277fe538b66923cd879b45371838d2b
+++ b/sql/hive/src/test/resources/golden/merge1-4-3277fe538b66923cd879b45371838d2b
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/merge2-0-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/merge2-0-b12e5c70d6d29757471b900b6160fa8a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge2-0-b12e5c70d6d29757471b900b6160fa8a
+++ b/sql/hive/src/test/resources/golden/merge2-0-b12e5c70d6d29757471b900b6160fa8a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge2-1-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/merge2-1-593999fae618b6b38322bc9ae4e0c027
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge2-1-593999fae618b6b38322bc9ae4e0c027
+++ b/sql/hive/src/test/resources/golden/merge2-1-593999fae618b6b38322bc9ae4e0c027
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge2-17-2e8e4adbfb21f25e7557dd86363c7138 b/sql/hive/src/test/resources/golden/merge2-17-2e8e4adbfb21f25e7557dd86363c7138
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge2-17-2e8e4adbfb21f25e7557dd86363c7138
+++ b/sql/hive/src/test/resources/golden/merge2-17-2e8e4adbfb21f25e7557dd86363c7138
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge2-2-c95dc367df88c9e5cf77157f29ba2daf b/sql/hive/src/test/resources/golden/merge2-2-c95dc367df88c9e5cf77157f29ba2daf
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge2-2-c95dc367df88c9e5cf77157f29ba2daf
+++ b/sql/hive/src/test/resources/golden/merge2-2-c95dc367df88c9e5cf77157f29ba2daf
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge2-3-6e53a3ac93113f20db3a12f1dcf30e86 b/sql/hive/src/test/resources/golden/merge2-3-6e53a3ac93113f20db3a12f1dcf30e86
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge2-3-6e53a3ac93113f20db3a12f1dcf30e86
+++ b/sql/hive/src/test/resources/golden/merge2-3-6e53a3ac93113f20db3a12f1dcf30e86
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge2-4-84967075baa3e56fff2a23f8ab9ba076 b/sql/hive/src/test/resources/golden/merge2-4-84967075baa3e56fff2a23f8ab9ba076
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge2-4-84967075baa3e56fff2a23f8ab9ba076
+++ b/sql/hive/src/test/resources/golden/merge2-4-84967075baa3e56fff2a23f8ab9ba076
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge2-5-2ee5d706fe3a3bcc38b795f6e94970ea b/sql/hive/src/test/resources/golden/merge2-5-2ee5d706fe3a3bcc38b795f6e94970ea
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/merge2-5-2ee5d706fe3a3bcc38b795f6e94970ea
+++ b/sql/hive/src/test/resources/golden/merge2-5-2ee5d706fe3a3bcc38b795f6e94970ea
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/merge2-9-b81efaa65e1263e48278ef9062cca1dd b/sql/hive/src/test/resources/golden/merge2-9-b81efaa65e1263e48278ef9062cca1dd
index df07a9da29f01..93e965c771403 100644
--- a/sql/hive/src/test/resources/golden/merge2-9-b81efaa65e1263e48278ef9062cca1dd
+++ b/sql/hive/src/test/resources/golden/merge2-9-b81efaa65e1263e48278ef9062cca1dd
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/merge4-10-692a197bd688b48f762e72978f54aa32 b/sql/hive/src/test/resources/golden/merge4-10-692a197bd688b48f762e72978f54aa32
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge4-11-ca0336ac3f600cb8b4230d9904686868 b/sql/hive/src/test/resources/golden/merge4-11-ca0336ac3f600cb8b4230d9904686868
deleted file mode 100644
index 67c6db8591549..0000000000000
--- a/sql/hive/src/test/resources/golden/merge4-11-ca0336ac3f600cb8b4230d9904686868
+++ /dev/null
@@ -1,1500 +0,0 @@
-238	val_238	2010-08-15	11
-86	val_86	2010-08-15	11
-311	val_311	2010-08-15	11
-27	val_27	2010-08-15	11
-165	val_165	2010-08-15	11
-409	val_409	2010-08-15	11
-255	val_255	2010-08-15	11
-278	val_278	2010-08-15	11
-98	val_98	2010-08-15	11
-484	val_484	2010-08-15	11
-265	val_265	2010-08-15	11
-193	val_193	2010-08-15	11
-401	val_401	2010-08-15	11
-150	val_150	2010-08-15	11
-273	val_273	2010-08-15	11
-224	val_224	2010-08-15	11
-369	val_369	2010-08-15	11
-66	val_66	2010-08-15	11
-128	val_128	2010-08-15	11
-213	val_213	2010-08-15	11
-146	val_146	2010-08-15	11
-406	val_406	2010-08-15	11
-429	val_429	2010-08-15	11
-374	val_374	2010-08-15	11
-152	val_152	2010-08-15	11
-469	val_469	2010-08-15	11
-145	val_145	2010-08-15	11
-495	val_495	2010-08-15	11
-37	val_37	2010-08-15	11
-327	val_327	2010-08-15	11
-281	val_281	2010-08-15	11
-277	val_277	2010-08-15	11
-209	val_209	2010-08-15	11
-15	val_15	2010-08-15	11
-82	val_82	2010-08-15	11
-403	val_403	2010-08-15	11
-166	val_166	2010-08-15	11
-417	val_417	2010-08-15	11
-430	val_430	2010-08-15	11
-252	val_252	2010-08-15	11
-292	val_292	2010-08-15	11
-219	val_219	2010-08-15	11
-287	val_287	2010-08-15	11
-153	val_153	2010-08-15	11
-193	val_193	2010-08-15	11
-338	val_338	2010-08-15	11
-446	val_446	2010-08-15	11
-459	val_459	2010-08-15	11
-394	val_394	2010-08-15	11
-237	val_237	2010-08-15	11
-482	val_482	2010-08-15	11
-174	val_174	2010-08-15	11
-413	val_413	2010-08-15	11
-494	val_494	2010-08-15	11
-207	val_207	2010-08-15	11
-199	val_199	2010-08-15	11
-466	val_466	2010-08-15	11
-208	val_208	2010-08-15	11
-174	val_174	2010-08-15	11
-399	val_399	2010-08-15	11
-396	val_396	2010-08-15	11
-247	val_247	2010-08-15	11
-417	val_417	2010-08-15	11
-489	val_489	2010-08-15	11
-162	val_162	2010-08-15	11
-377	val_377	2010-08-15	11
-397	val_397	2010-08-15	11
-309	val_309	2010-08-15	11
-365	val_365	2010-08-15	11
-266	val_266	2010-08-15	11
-439	val_439	2010-08-15	11
-342	val_342	2010-08-15	11
-367	val_367	2010-08-15	11
-325	val_325	2010-08-15	11
-167	val_167	2010-08-15	11
-195	val_195	2010-08-15	11
-475	val_475	2010-08-15	11
-17	val_17	2010-08-15	11
-113	val_113	2010-08-15	11
-155	val_155	2010-08-15	11
-203	val_203	2010-08-15	11
-339	val_339	2010-08-15	11
-0	val_0	2010-08-15	11
-455	val_455	2010-08-15	11
-128	val_128	2010-08-15	11
-311	val_311	2010-08-15	11
-316	val_316	2010-08-15	11
-57	val_57	2010-08-15	11
-302	val_302	2010-08-15	11
-205	val_205	2010-08-15	11
-149	val_149	2010-08-15	11
-438	val_438	2010-08-15	11
-345	val_345	2010-08-15	11
-129	val_129	2010-08-15	11
-170	val_170	2010-08-15	11
-20	val_20	2010-08-15	11
-489	val_489	2010-08-15	11
-157	val_157	2010-08-15	11
-378	val_378	2010-08-15	11
-221	val_221	2010-08-15	11
-92	val_92	2010-08-15	11
-111	val_111	2010-08-15	11
-47	val_47	2010-08-15	11
-72	val_72	2010-08-15	11
-4	val_4	2010-08-15	11
-280	val_280	2010-08-15	11
-35	val_35	2010-08-15	11
-427	val_427	2010-08-15	11
-277	val_277	2010-08-15	11
-208	val_208	2010-08-15	11
-356	val_356	2010-08-15	11
-399	val_399	2010-08-15	11
-169	val_169	2010-08-15	11
-382	val_382	2010-08-15	11
-498	val_498	2010-08-15	11
-125	val_125	2010-08-15	11
-386	val_386	2010-08-15	11
-437	val_437	2010-08-15	11
-469	val_469	2010-08-15	11
-192	val_192	2010-08-15	11
-286	val_286	2010-08-15	11
-187	val_187	2010-08-15	11
-176	val_176	2010-08-15	11
-54	val_54	2010-08-15	11
-459	val_459	2010-08-15	11
-51	val_51	2010-08-15	11
-138	val_138	2010-08-15	11
-103	val_103	2010-08-15	11
-239	val_239	2010-08-15	11
-213	val_213	2010-08-15	11
-216	val_216	2010-08-15	11
-430	val_430	2010-08-15	11
-278	val_278	2010-08-15	11
-176	val_176	2010-08-15	11
-289	val_289	2010-08-15	11
-221	val_221	2010-08-15	11
-65	val_65	2010-08-15	11
-318	val_318	2010-08-15	11
-332	val_332	2010-08-15	11
-311	val_311	2010-08-15	11
-275	val_275	2010-08-15	11
-137	val_137	2010-08-15	11
-241	val_241	2010-08-15	11
-83	val_83	2010-08-15	11
-333	val_333	2010-08-15	11
-180	val_180	2010-08-15	11
-284	val_284	2010-08-15	11
-12	val_12	2010-08-15	11
-230	val_230	2010-08-15	11
-181	val_181	2010-08-15	11
-67	val_67	2010-08-15	11
-260	val_260	2010-08-15	11
-404	val_404	2010-08-15	11
-384	val_384	2010-08-15	11
-489	val_489	2010-08-15	11
-353	val_353	2010-08-15	11
-373	val_373	2010-08-15	11
-272	val_272	2010-08-15	11
-138	val_138	2010-08-15	11
-217	val_217	2010-08-15	11
-84	val_84	2010-08-15	11
-348	val_348	2010-08-15	11
-466	val_466	2010-08-15	11
-58	val_58	2010-08-15	11
-8	val_8	2010-08-15	11
-411	val_411	2010-08-15	11
-230	val_230	2010-08-15	11
-208	val_208	2010-08-15	11
-348	val_348	2010-08-15	11
-24	val_24	2010-08-15	11
-463	val_463	2010-08-15	11
-431	val_431	2010-08-15	11
-179	val_179	2010-08-15	11
-172	val_172	2010-08-15	11
-42	val_42	2010-08-15	11
-129	val_129	2010-08-15	11
-158	val_158	2010-08-15	11
-119	val_119	2010-08-15	11
-496	val_496	2010-08-15	11
-0	val_0	2010-08-15	11
-322	val_322	2010-08-15	11
-197	val_197	2010-08-15	11
-468	val_468	2010-08-15	11
-393	val_393	2010-08-15	11
-454	val_454	2010-08-15	11
-100	val_100	2010-08-15	11
-298	val_298	2010-08-15	11
-199	val_199	2010-08-15	11
-191	val_191	2010-08-15	11
-418	val_418	2010-08-15	11
-96	val_96	2010-08-15	11
-26	val_26	2010-08-15	11
-165	val_165	2010-08-15	11
-327	val_327	2010-08-15	11
-230	val_230	2010-08-15	11
-205	val_205	2010-08-15	11
-120	val_120	2010-08-15	11
-131	val_131	2010-08-15	11
-51	val_51	2010-08-15	11
-404	val_404	2010-08-15	11
-43	val_43	2010-08-15	11
-436	val_436	2010-08-15	11
-156	val_156	2010-08-15	11
-469	val_469	2010-08-15	11
-468	val_468	2010-08-15	11
-308	val_308	2010-08-15	11
-95	val_95	2010-08-15	11
-196	val_196	2010-08-15	11
-288	val_288	2010-08-15	11
-481	val_481	2010-08-15	11
-457	val_457	2010-08-15	11
-98	val_98	2010-08-15	11
-282	val_282	2010-08-15	11
-197	val_197	2010-08-15	11
-187	val_187	2010-08-15	11
-318	val_318	2010-08-15	11
-318	val_318	2010-08-15	11
-409	val_409	2010-08-15	11
-470	val_470	2010-08-15	11
-137	val_137	2010-08-15	11
-369	val_369	2010-08-15	11
-316	val_316	2010-08-15	11
-169	val_169	2010-08-15	11
-413	val_413	2010-08-15	11
-85	val_85	2010-08-15	11
-77	val_77	2010-08-15	11
-0	val_0	2010-08-15	11
-490	val_490	2010-08-15	11
-87	val_87	2010-08-15	11
-364	val_364	2010-08-15	11
-179	val_179	2010-08-15	11
-118	val_118	2010-08-15	11
-134	val_134	2010-08-15	11
-395	val_395	2010-08-15	11
-282	val_282	2010-08-15	11
-138	val_138	2010-08-15	11
-238	val_238	2010-08-15	11
-419	val_419	2010-08-15	11
-15	val_15	2010-08-15	11
-118	val_118	2010-08-15	11
-72	val_72	2010-08-15	11
-90	val_90	2010-08-15	11
-307	val_307	2010-08-15	11
-19	val_19	2010-08-15	11
-435	val_435	2010-08-15	11
-10	val_10	2010-08-15	11
-277	val_277	2010-08-15	11
-273	val_273	2010-08-15	11
-306	val_306	2010-08-15	11
-224	val_224	2010-08-15	11
-309	val_309	2010-08-15	11
-389	val_389	2010-08-15	11
-327	val_327	2010-08-15	11
-242	val_242	2010-08-15	11
-369	val_369	2010-08-15	11
-392	val_392	2010-08-15	11
-272	val_272	2010-08-15	11
-331	val_331	2010-08-15	11
-401	val_401	2010-08-15	11
-242	val_242	2010-08-15	11
-452	val_452	2010-08-15	11
-177	val_177	2010-08-15	11
-226	val_226	2010-08-15	11
-5	val_5	2010-08-15	11
-497	val_497	2010-08-15	11
-402	val_402	2010-08-15	11
-396	val_396	2010-08-15	11
-317	val_317	2010-08-15	11
-395	val_395	2010-08-15	11
-58	val_58	2010-08-15	11
-35	val_35	2010-08-15	11
-336	val_336	2010-08-15	11
-95	val_95	2010-08-15	11
-11	val_11	2010-08-15	11
-168	val_168	2010-08-15	11
-34	val_34	2010-08-15	11
-229	val_229	2010-08-15	11
-233	val_233	2010-08-15	11
-143	val_143	2010-08-15	11
-472	val_472	2010-08-15	11
-322	val_322	2010-08-15	11
-498	val_498	2010-08-15	11
-160	val_160	2010-08-15	11
-195	val_195	2010-08-15	11
-42	val_42	2010-08-15	11
-321	val_321	2010-08-15	11
-430	val_430	2010-08-15	11
-119	val_119	2010-08-15	11
-489	val_489	2010-08-15	11
-458	val_458	2010-08-15	11
-78	val_78	2010-08-15	11
-76	val_76	2010-08-15	11
-41	val_41	2010-08-15	11
-223	val_223	2010-08-15	11
-492	val_492	2010-08-15	11
-149	val_149	2010-08-15	11
-449	val_449	2010-08-15	11
-218	val_218	2010-08-15	11
-228	val_228	2010-08-15	11
-138	val_138	2010-08-15	11
-453	val_453	2010-08-15	11
-30	val_30	2010-08-15	11
-209	val_209	2010-08-15	11
-64	val_64	2010-08-15	11
-468	val_468	2010-08-15	11
-76	val_76	2010-08-15	11
-74	val_74	2010-08-15	11
-342	val_342	2010-08-15	11
-69	val_69	2010-08-15	11
-230	val_230	2010-08-15	11
-33	val_33	2010-08-15	11
-368	val_368	2010-08-15	11
-103	val_103	2010-08-15	11
-296	val_296	2010-08-15	11
-113	val_113	2010-08-15	11
-216	val_216	2010-08-15	11
-367	val_367	2010-08-15	11
-344	val_344	2010-08-15	11
-167	val_167	2010-08-15	11
-274	val_274	2010-08-15	11
-219	val_219	2010-08-15	11
-239	val_239	2010-08-15	11
-485	val_485	2010-08-15	11
-116	val_116	2010-08-15	11
-223	val_223	2010-08-15	11
-256	val_256	2010-08-15	11
-263	val_263	2010-08-15	11
-70	val_70	2010-08-15	11
-487	val_487	2010-08-15	11
-480	val_480	2010-08-15	11
-401	val_401	2010-08-15	11
-288	val_288	2010-08-15	11
-191	val_191	2010-08-15	11
-5	val_5	2010-08-15	11
-244	val_244	2010-08-15	11
-438	val_438	2010-08-15	11
-128	val_128	2010-08-15	11
-467	val_467	2010-08-15	11
-432	val_432	2010-08-15	11
-202	val_202	2010-08-15	11
-316	val_316	2010-08-15	11
-229	val_229	2010-08-15	11
-469	val_469	2010-08-15	11
-463	val_463	2010-08-15	11
-280	val_280	2010-08-15	11
-2	val_2	2010-08-15	11
-35	val_35	2010-08-15	11
-283	val_283	2010-08-15	11
-331	val_331	2010-08-15	11
-235	val_235	2010-08-15	11
-80	val_80	2010-08-15	11
-44	val_44	2010-08-15	11
-193	val_193	2010-08-15	11
-321	val_321	2010-08-15	11
-335	val_335	2010-08-15	11
-104	val_104	2010-08-15	11
-466	val_466	2010-08-15	11
-366	val_366	2010-08-15	11
-175	val_175	2010-08-15	11
-403	val_403	2010-08-15	11
-483	val_483	2010-08-15	11
-53	val_53	2010-08-15	11
-105	val_105	2010-08-15	11
-257	val_257	2010-08-15	11
-406	val_406	2010-08-15	11
-409	val_409	2010-08-15	11
-190	val_190	2010-08-15	11
-406	val_406	2010-08-15	11
-401	val_401	2010-08-15	11
-114	val_114	2010-08-15	11
-258	val_258	2010-08-15	11
-90	val_90	2010-08-15	11
-203	val_203	2010-08-15	11
-262	val_262	2010-08-15	11
-348	val_348	2010-08-15	11
-424	val_424	2010-08-15	11
-12	val_12	2010-08-15	11
-396	val_396	2010-08-15	11
-201	val_201	2010-08-15	11
-217	val_217	2010-08-15	11
-164	val_164	2010-08-15	11
-431	val_431	2010-08-15	11
-454	val_454	2010-08-15	11
-478	val_478	2010-08-15	11
-298	val_298	2010-08-15	11
-125	val_125	2010-08-15	11
-431	val_431	2010-08-15	11
-164	val_164	2010-08-15	11
-424	val_424	2010-08-15	11
-187	val_187	2010-08-15	11
-382	val_382	2010-08-15	11
-5	val_5	2010-08-15	11
-70	val_70	2010-08-15	11
-397	val_397	2010-08-15	11
-480	val_480	2010-08-15	11
-291	val_291	2010-08-15	11
-24	val_24	2010-08-15	11
-351	val_351	2010-08-15	11
-255	val_255	2010-08-15	11
-104	val_104	2010-08-15	11
-70	val_70	2010-08-15	11
-163	val_163	2010-08-15	11
-438	val_438	2010-08-15	11
-119	val_119	2010-08-15	11
-414	val_414	2010-08-15	11
-200	val_200	2010-08-15	11
-491	val_491	2010-08-15	11
-237	val_237	2010-08-15	11
-439	val_439	2010-08-15	11
-360	val_360	2010-08-15	11
-248	val_248	2010-08-15	11
-479	val_479	2010-08-15	11
-305	val_305	2010-08-15	11
-417	val_417	2010-08-15	11
-199	val_199	2010-08-15	11
-444	val_444	2010-08-15	11
-120	val_120	2010-08-15	11
-429	val_429	2010-08-15	11
-169	val_169	2010-08-15	11
-443	val_443	2010-08-15	11
-323	val_323	2010-08-15	11
-325	val_325	2010-08-15	11
-277	val_277	2010-08-15	11
-230	val_230	2010-08-15	11
-478	val_478	2010-08-15	11
-178	val_178	2010-08-15	11
-468	val_468	2010-08-15	11
-310	val_310	2010-08-15	11
-317	val_317	2010-08-15	11
-333	val_333	2010-08-15	11
-493	val_493	2010-08-15	11
-460	val_460	2010-08-15	11
-207	val_207	2010-08-15	11
-249	val_249	2010-08-15	11
-265	val_265	2010-08-15	11
-480	val_480	2010-08-15	11
-83	val_83	2010-08-15	11
-136	val_136	2010-08-15	11
-353	val_353	2010-08-15	11
-172	val_172	2010-08-15	11
-214	val_214	2010-08-15	11
-462	val_462	2010-08-15	11
-233	val_233	2010-08-15	11
-406	val_406	2010-08-15	11
-133	val_133	2010-08-15	11
-175	val_175	2010-08-15	11
-189	val_189	2010-08-15	11
-454	val_454	2010-08-15	11
-375	val_375	2010-08-15	11
-401	val_401	2010-08-15	11
-421	val_421	2010-08-15	11
-407	val_407	2010-08-15	11
-384	val_384	2010-08-15	11
-256	val_256	2010-08-15	11
-26	val_26	2010-08-15	11
-134	val_134	2010-08-15	11
-67	val_67	2010-08-15	11
-384	val_384	2010-08-15	11
-379	val_379	2010-08-15	11
-18	val_18	2010-08-15	11
-462	val_462	2010-08-15	11
-492	val_492	2010-08-15	11
-100	val_100	2010-08-15	11
-298	val_298	2010-08-15	11
-9	val_9	2010-08-15	11
-341	val_341	2010-08-15	11
-498	val_498	2010-08-15	11
-146	val_146	2010-08-15	11
-458	val_458	2010-08-15	11
-362	val_362	2010-08-15	11
-186	val_186	2010-08-15	11
-285	val_285	2010-08-15	11
-348	val_348	2010-08-15	11
-167	val_167	2010-08-15	11
-18	val_18	2010-08-15	11
-273	val_273	2010-08-15	11
-183	val_183	2010-08-15	11
-281	val_281	2010-08-15	11
-344	val_344	2010-08-15	11
-97	val_97	2010-08-15	11
-469	val_469	2010-08-15	11
-315	val_315	2010-08-15	11
-84	val_84	2010-08-15	11
-28	val_28	2010-08-15	11
-37	val_37	2010-08-15	11
-448	val_448	2010-08-15	11
-152	val_152	2010-08-15	11
-348	val_348	2010-08-15	11
-307	val_307	2010-08-15	11
-194	val_194	2010-08-15	11
-414	val_414	2010-08-15	11
-477	val_477	2010-08-15	11
-222	val_222	2010-08-15	11
-126	val_126	2010-08-15	11
-90	val_90	2010-08-15	11
-169	val_169	2010-08-15	11
-403	val_403	2010-08-15	11
-400	val_400	2010-08-15	11
-200	val_200	2010-08-15	11
-97	val_97	2010-08-15	11
-238	val_238	2010-08-15	11
-86	val_86	2010-08-15	11
-311	val_311	2010-08-15	11
-27	val_27	2010-08-15	11
-165	val_165	2010-08-15	11
-409	val_409	2010-08-15	11
-255	val_255	2010-08-15	11
-278	val_278	2010-08-15	11
-98	val_98	2010-08-15	11
-484	val_484	2010-08-15	11
-265	val_265	2010-08-15	11
-193	val_193	2010-08-15	11
-401	val_401	2010-08-15	11
-150	val_150	2010-08-15	11
-273	val_273	2010-08-15	11
-224	val_224	2010-08-15	11
-369	val_369	2010-08-15	11
-66	val_66	2010-08-15	11
-128	val_128	2010-08-15	11
-213	val_213	2010-08-15	11
-146	val_146	2010-08-15	11
-406	val_406	2010-08-15	11
-429	val_429	2010-08-15	11
-374	val_374	2010-08-15	11
-152	val_152	2010-08-15	11
-469	val_469	2010-08-15	11
-145	val_145	2010-08-15	11
-495	val_495	2010-08-15	11
-37	val_37	2010-08-15	11
-327	val_327	2010-08-15	11
-281	val_281	2010-08-15	11
-277	val_277	2010-08-15	11
-209	val_209	2010-08-15	11
-15	val_15	2010-08-15	11
-82	val_82	2010-08-15	11
-403	val_403	2010-08-15	11
-166	val_166	2010-08-15	11
-417	val_417	2010-08-15	11
-430	val_430	2010-08-15	11
-252	val_252	2010-08-15	11
-292	val_292	2010-08-15	11
-219	val_219	2010-08-15	11
-287	val_287	2010-08-15	11
-153	val_153	2010-08-15	11
-193	val_193	2010-08-15	11
-338	val_338	2010-08-15	11
-446	val_446	2010-08-15	11
-459	val_459	2010-08-15	11
-394	val_394	2010-08-15	11
-237	val_237	2010-08-15	11
-482	val_482	2010-08-15	11
-174	val_174	2010-08-15	11
-413	val_413	2010-08-15	11
-494	val_494	2010-08-15	11
-207	val_207	2010-08-15	11
-199	val_199	2010-08-15	11
-466	val_466	2010-08-15	11
-208	val_208	2010-08-15	11
-174	val_174	2010-08-15	11
-399	val_399	2010-08-15	11
-396	val_396	2010-08-15	11
-247	val_247	2010-08-15	11
-417	val_417	2010-08-15	11
-489	val_489	2010-08-15	11
-162	val_162	2010-08-15	11
-377	val_377	2010-08-15	11
-397	val_397	2010-08-15	11
-309	val_309	2010-08-15	11
-365	val_365	2010-08-15	11
-266	val_266	2010-08-15	11
-439	val_439	2010-08-15	11
-342	val_342	2010-08-15	11
-367	val_367	2010-08-15	11
-325	val_325	2010-08-15	11
-167	val_167	2010-08-15	11
-195	val_195	2010-08-15	11
-475	val_475	2010-08-15	11
-17	val_17	2010-08-15	11
-113	val_113	2010-08-15	11
-155	val_155	2010-08-15	11
-203	val_203	2010-08-15	11
-339	val_339	2010-08-15	11
-0	val_0	2010-08-15	11
-455	val_455	2010-08-15	11
-128	val_128	2010-08-15	11
-311	val_311	2010-08-15	11
-316	val_316	2010-08-15	11
-57	val_57	2010-08-15	11
-302	val_302	2010-08-15	11
-205	val_205	2010-08-15	11
-149	val_149	2010-08-15	11
-438	val_438	2010-08-15	11
-345	val_345	2010-08-15	11
-129	val_129	2010-08-15	11
-170	val_170	2010-08-15	11
-20	val_20	2010-08-15	11
-489	val_489	2010-08-15	11
-157	val_157	2010-08-15	11
-378	val_378	2010-08-15	11
-221	val_221	2010-08-15	11
-92	val_92	2010-08-15	11
-111	val_111	2010-08-15	11
-47	val_47	2010-08-15	11
-72	val_72	2010-08-15	11
-4	val_4	2010-08-15	11
-280	val_280	2010-08-15	11
-35	val_35	2010-08-15	11
-427	val_427	2010-08-15	11
-277	val_277	2010-08-15	11
-208	val_208	2010-08-15	11
-356	val_356	2010-08-15	11
-399	val_399	2010-08-15	11
-169	val_169	2010-08-15	11
-382	val_382	2010-08-15	11
-498	val_498	2010-08-15	11
-125	val_125	2010-08-15	11
-386	val_386	2010-08-15	11
-437	val_437	2010-08-15	11
-469	val_469	2010-08-15	11
-192	val_192	2010-08-15	11
-286	val_286	2010-08-15	11
-187	val_187	2010-08-15	11
-176	val_176	2010-08-15	11
-54	val_54	2010-08-15	11
-459	val_459	2010-08-15	11
-51	val_51	2010-08-15	11
-138	val_138	2010-08-15	11
-103	val_103	2010-08-15	11
-239	val_239	2010-08-15	11
-213	val_213	2010-08-15	11
-216	val_216	2010-08-15	11
-430	val_430	2010-08-15	11
-278	val_278	2010-08-15	11
-176	val_176	2010-08-15	11
-289	val_289	2010-08-15	11
-221	val_221	2010-08-15	11
-65	val_65	2010-08-15	11
-318	val_318	2010-08-15	11
-332	val_332	2010-08-15	11
-311	val_311	2010-08-15	11
-275	val_275	2010-08-15	11
-137	val_137	2010-08-15	11
-241	val_241	2010-08-15	11
-83	val_83	2010-08-15	11
-333	val_333	2010-08-15	11
-180	val_180	2010-08-15	11
-284	val_284	2010-08-15	11
-12	val_12	2010-08-15	11
-230	val_230	2010-08-15	11
-181	val_181	2010-08-15	11
-67	val_67	2010-08-15	11
-260	val_260	2010-08-15	11
-404	val_404	2010-08-15	11
-384	val_384	2010-08-15	11
-489	val_489	2010-08-15	11
-353	val_353	2010-08-15	11
-373	val_373	2010-08-15	11
-272	val_272	2010-08-15	11
-138	val_138	2010-08-15	11
-217	val_217	2010-08-15	11
-84	val_84	2010-08-15	11
-348	val_348	2010-08-15	11
-466	val_466	2010-08-15	11
-58	val_58	2010-08-15	11
-8	val_8	2010-08-15	11
-411	val_411	2010-08-15	11
-230	val_230	2010-08-15	11
-208	val_208	2010-08-15	11
-348	val_348	2010-08-15	11
-24	val_24	2010-08-15	11
-463	val_463	2010-08-15	11
-431	val_431	2010-08-15	11
-179	val_179	2010-08-15	11
-172	val_172	2010-08-15	11
-42	val_42	2010-08-15	11
-129	val_129	2010-08-15	11
-158	val_158	2010-08-15	11
-119	val_119	2010-08-15	11
-496	val_496	2010-08-15	11
-0	val_0	2010-08-15	11
-322	val_322	2010-08-15	11
-197	val_197	2010-08-15	11
-468	val_468	2010-08-15	11
-393	val_393	2010-08-15	11
-454	val_454	2010-08-15	11
-100	val_100	2010-08-15	11
-298	val_298	2010-08-15	11
-199	val_199	2010-08-15	11
-191	val_191	2010-08-15	11
-418	val_418	2010-08-15	11
-96	val_96	2010-08-15	11
-26	val_26	2010-08-15	11
-165	val_165	2010-08-15	11
-327	val_327	2010-08-15	11
-230	val_230	2010-08-15	11
-205	val_205	2010-08-15	11
-120	val_120	2010-08-15	11
-131	val_131	2010-08-15	11
-51	val_51	2010-08-15	11
-404	val_404	2010-08-15	11
-43	val_43	2010-08-15	11
-436	val_436	2010-08-15	11
-156	val_156	2010-08-15	11
-469	val_469	2010-08-15	11
-468	val_468	2010-08-15	11
-308	val_308	2010-08-15	11
-95	val_95	2010-08-15	11
-196	val_196	2010-08-15	11
-288	val_288	2010-08-15	11
-481	val_481	2010-08-15	11
-457	val_457	2010-08-15	11
-98	val_98	2010-08-15	11
-282	val_282	2010-08-15	11
-197	val_197	2010-08-15	11
-187	val_187	2010-08-15	11
-318	val_318	2010-08-15	11
-318	val_318	2010-08-15	11
-409	val_409	2010-08-15	11
-470	val_470	2010-08-15	11
-137	val_137	2010-08-15	11
-369	val_369	2010-08-15	11
-316	val_316	2010-08-15	11
-169	val_169	2010-08-15	11
-413	val_413	2010-08-15	11
-85	val_85	2010-08-15	11
-77	val_77	2010-08-15	11
-0	val_0	2010-08-15	11
-490	val_490	2010-08-15	11
-87	val_87	2010-08-15	11
-364	val_364	2010-08-15	11
-179	val_179	2010-08-15	11
-118	val_118	2010-08-15	11
-134	val_134	2010-08-15	11
-395	val_395	2010-08-15	11
-282	val_282	2010-08-15	11
-138	val_138	2010-08-15	11
-238	val_238	2010-08-15	11
-419	val_419	2010-08-15	11
-15	val_15	2010-08-15	11
-118	val_118	2010-08-15	11
-72	val_72	2010-08-15	11
-90	val_90	2010-08-15	11
-307	val_307	2010-08-15	11
-19	val_19	2010-08-15	11
-435	val_435	2010-08-15	11
-10	val_10	2010-08-15	11
-277	val_277	2010-08-15	11
-273	val_273	2010-08-15	11
-306	val_306	2010-08-15	11
-224	val_224	2010-08-15	11
-309	val_309	2010-08-15	11
-389	val_389	2010-08-15	11
-327	val_327	2010-08-15	11
-242	val_242	2010-08-15	11
-369	val_369	2010-08-15	11
-392	val_392	2010-08-15	11
-272	val_272	2010-08-15	11
-331	val_331	2010-08-15	11
-401	val_401	2010-08-15	11
-242	val_242	2010-08-15	11
-452	val_452	2010-08-15	11
-177	val_177	2010-08-15	11
-226	val_226	2010-08-15	11
-5	val_5	2010-08-15	11
-497	val_497	2010-08-15	11
-402	val_402	2010-08-15	11
-396	val_396	2010-08-15	11
-317	val_317	2010-08-15	11
-395	val_395	2010-08-15	11
-58	val_58	2010-08-15	11
-35	val_35	2010-08-15	11
-336	val_336	2010-08-15	11
-95	val_95	2010-08-15	11
-11	val_11	2010-08-15	11
-168	val_168	2010-08-15	11
-34	val_34	2010-08-15	11
-229	val_229	2010-08-15	11
-233	val_233	2010-08-15	11
-143	val_143	2010-08-15	11
-472	val_472	2010-08-15	11
-322	val_322	2010-08-15	11
-498	val_498	2010-08-15	11
-160	val_160	2010-08-15	11
-195	val_195	2010-08-15	11
-42	val_42	2010-08-15	11
-321	val_321	2010-08-15	11
-430	val_430	2010-08-15	11
-119	val_119	2010-08-15	11
-489	val_489	2010-08-15	11
-458	val_458	2010-08-15	11
-78	val_78	2010-08-15	11
-76	val_76	2010-08-15	11
-41	val_41	2010-08-15	11
-223	val_223	2010-08-15	11
-492	val_492	2010-08-15	11
-149	val_149	2010-08-15	11
-449	val_449	2010-08-15	11
-218	val_218	2010-08-15	11
-228	val_228	2010-08-15	11
-138	val_138	2010-08-15	11
-453	val_453	2010-08-15	11
-30	val_30	2010-08-15	11
-209	val_209	2010-08-15	11
-64	val_64	2010-08-15	11
-468	val_468	2010-08-15	11
-76	val_76	2010-08-15	11
-74	val_74	2010-08-15	11
-342	val_342	2010-08-15	11
-69	val_69	2010-08-15	11
-230	val_230	2010-08-15	11
-33	val_33	2010-08-15	11
-368	val_368	2010-08-15	11
-103	val_103	2010-08-15	11
-296	val_296	2010-08-15	11
-113	val_113	2010-08-15	11
-216	val_216	2010-08-15	11
-367	val_367	2010-08-15	11
-344	val_344	2010-08-15	11
-167	val_167	2010-08-15	11
-274	val_274	2010-08-15	11
-219	val_219	2010-08-15	11
-239	val_239	2010-08-15	11
-485	val_485	2010-08-15	11
-116	val_116	2010-08-15	11
-223	val_223	2010-08-15	11
-256	val_256	2010-08-15	11
-263	val_263	2010-08-15	11
-70	val_70	2010-08-15	11
-487	val_487	2010-08-15	11
-480	val_480	2010-08-15	11
-401	val_401	2010-08-15	11
-288	val_288	2010-08-15	11
-191	val_191	2010-08-15	11
-5	val_5	2010-08-15	11
-244	val_244	2010-08-15	11
-438	val_438	2010-08-15	11
-128	val_128	2010-08-15	11
-467	val_467	2010-08-15	11
-432	val_432	2010-08-15	11
-202	val_202	2010-08-15	11
-316	val_316	2010-08-15	11
-229	val_229	2010-08-15	11
-469	val_469	2010-08-15	11
-463	val_463	2010-08-15	11
-280	val_280	2010-08-15	11
-2	val_2	2010-08-15	11
-35	val_35	2010-08-15	11
-283	val_283	2010-08-15	11
-331	val_331	2010-08-15	11
-235	val_235	2010-08-15	11
-80	val_80	2010-08-15	11
-44	val_44	2010-08-15	11
-193	val_193	2010-08-15	11
-321	val_321	2010-08-15	11
-335	val_335	2010-08-15	11
-104	val_104	2010-08-15	11
-466	val_466	2010-08-15	11
-366	val_366	2010-08-15	11
-175	val_175	2010-08-15	11
-403	val_403	2010-08-15	11
-483	val_483	2010-08-15	11
-53	val_53	2010-08-15	11
-105	val_105	2010-08-15	11
-257	val_257	2010-08-15	11
-406	val_406	2010-08-15	11
-409	val_409	2010-08-15	11
-190	val_190	2010-08-15	11
-406	val_406	2010-08-15	11
-401	val_401	2010-08-15	11
-114	val_114	2010-08-15	11
-258	val_258	2010-08-15	11
-90	val_90	2010-08-15	11
-203	val_203	2010-08-15	11
-262	val_262	2010-08-15	11
-348	val_348	2010-08-15	11
-424	val_424	2010-08-15	11
-12	val_12	2010-08-15	11
-396	val_396	2010-08-15	11
-201	val_201	2010-08-15	11
-217	val_217	2010-08-15	11
-164	val_164	2010-08-15	11
-431	val_431	2010-08-15	11
-454	val_454	2010-08-15	11
-478	val_478	2010-08-15	11
-298	val_298	2010-08-15	11
-125	val_125	2010-08-15	11
-431	val_431	2010-08-15	11
-164	val_164	2010-08-15	11
-424	val_424	2010-08-15	11
-187	val_187	2010-08-15	11
-382	val_382	2010-08-15	11
-5	val_5	2010-08-15	11
-70	val_70	2010-08-15	11
-397	val_397	2010-08-15	11
-480	val_480	2010-08-15	11
-291	val_291	2010-08-15	11
-24	val_24	2010-08-15	11
-351	val_351	2010-08-15	11
-255	val_255	2010-08-15	11
-104	val_104	2010-08-15	11
-70	val_70	2010-08-15	11
-163	val_163	2010-08-15	11
-438	val_438	2010-08-15	11
-119	val_119	2010-08-15	11
-414	val_414	2010-08-15	11
-200	val_200	2010-08-15	11
-491	val_491	2010-08-15	11
-237	val_237	2010-08-15	11
-439	val_439	2010-08-15	11
-360	val_360	2010-08-15	11
-248	val_248	2010-08-15	11
-479	val_479	2010-08-15	11
-305	val_305	2010-08-15	11
-417	val_417	2010-08-15	11
-199	val_199	2010-08-15	11
-444	val_444	2010-08-15	11
-120	val_120	2010-08-15	11
-429	val_429	2010-08-15	11
-169	val_169	2010-08-15	11
-443	val_443	2010-08-15	11
-323	val_323	2010-08-15	11
-325	val_325	2010-08-15	11
-277	val_277	2010-08-15	11
-230	val_230	2010-08-15	11
-478	val_478	2010-08-15	11
-178	val_178	2010-08-15	11
-468	val_468	2010-08-15	11
-310	val_310	2010-08-15	11
-317	val_317	2010-08-15	11
-333	val_333	2010-08-15	11
-493	val_493	2010-08-15	11
-460	val_460	2010-08-15	11
-207	val_207	2010-08-15	11
-249	val_249	2010-08-15	11
-265	val_265	2010-08-15	11
-480	val_480	2010-08-15	11
-83	val_83	2010-08-15	11
-136	val_136	2010-08-15	11
-353	val_353	2010-08-15	11
-172	val_172	2010-08-15	11
-214	val_214	2010-08-15	11
-462	val_462	2010-08-15	11
-233	val_233	2010-08-15	11
-406	val_406	2010-08-15	11
-133	val_133	2010-08-15	11
-175	val_175	2010-08-15	11
-189	val_189	2010-08-15	11
-454	val_454	2010-08-15	11
-375	val_375	2010-08-15	11
-401	val_401	2010-08-15	11
-421	val_421	2010-08-15	11
-407	val_407	2010-08-15	11
-384	val_384	2010-08-15	11
-256	val_256	2010-08-15	11
-26	val_26	2010-08-15	11
-134	val_134	2010-08-15	11
-67	val_67	2010-08-15	11
-384	val_384	2010-08-15	11
-379	val_379	2010-08-15	11
-18	val_18	2010-08-15	11
-462	val_462	2010-08-15	11
-492	val_492	2010-08-15	11
-100	val_100	2010-08-15	11
-298	val_298	2010-08-15	11
-9	val_9	2010-08-15	11
-341	val_341	2010-08-15	11
-498	val_498	2010-08-15	11
-146	val_146	2010-08-15	11
-458	val_458	2010-08-15	11
-362	val_362	2010-08-15	11
-186	val_186	2010-08-15	11
-285	val_285	2010-08-15	11
-348	val_348	2010-08-15	11
-167	val_167	2010-08-15	11
-18	val_18	2010-08-15	11
-273	val_273	2010-08-15	11
-183	val_183	2010-08-15	11
-281	val_281	2010-08-15	11
-344	val_344	2010-08-15	11
-97	val_97	2010-08-15	11
-469	val_469	2010-08-15	11
-315	val_315	2010-08-15	11
-84	val_84	2010-08-15	11
-28	val_28	2010-08-15	11
-37	val_37	2010-08-15	11
-448	val_448	2010-08-15	11
-152	val_152	2010-08-15	11
-348	val_348	2010-08-15	11
-307	val_307	2010-08-15	11
-194	val_194	2010-08-15	11
-414	val_414	2010-08-15	11
-477	val_477	2010-08-15	11
-222	val_222	2010-08-15	11
-126	val_126	2010-08-15	11
-90	val_90	2010-08-15	11
-169	val_169	2010-08-15	11
-403	val_403	2010-08-15	11
-400	val_400	2010-08-15	11
-200	val_200	2010-08-15	11
-97	val_97	2010-08-15	11
-238	val_238	2010-08-15	12
-86	val_86	2010-08-15	12
-311	val_311	2010-08-15	12
-27	val_27	2010-08-15	12
-165	val_165	2010-08-15	12
-409	val_409	2010-08-15	12
-255	val_255	2010-08-15	12
-278	val_278	2010-08-15	12
-98	val_98	2010-08-15	12
-484	val_484	2010-08-15	12
-265	val_265	2010-08-15	12
-193	val_193	2010-08-15	12
-401	val_401	2010-08-15	12
-150	val_150	2010-08-15	12
-273	val_273	2010-08-15	12
-224	val_224	2010-08-15	12
-369	val_369	2010-08-15	12
-66	val_66	2010-08-15	12
-128	val_128	2010-08-15	12
-213	val_213	2010-08-15	12
-146	val_146	2010-08-15	12
-406	val_406	2010-08-15	12
-429	val_429	2010-08-15	12
-374	val_374	2010-08-15	12
-152	val_152	2010-08-15	12
-469	val_469	2010-08-15	12
-145	val_145	2010-08-15	12
-495	val_495	2010-08-15	12
-37	val_37	2010-08-15	12
-327	val_327	2010-08-15	12
-281	val_281	2010-08-15	12
-277	val_277	2010-08-15	12
-209	val_209	2010-08-15	12
-15	val_15	2010-08-15	12
-82	val_82	2010-08-15	12
-403	val_403	2010-08-15	12
-166	val_166	2010-08-15	12
-417	val_417	2010-08-15	12
-430	val_430	2010-08-15	12
-252	val_252	2010-08-15	12
-292	val_292	2010-08-15	12
-219	val_219	2010-08-15	12
-287	val_287	2010-08-15	12
-153	val_153	2010-08-15	12
-193	val_193	2010-08-15	12
-338	val_338	2010-08-15	12
-446	val_446	2010-08-15	12
-459	val_459	2010-08-15	12
-394	val_394	2010-08-15	12
-237	val_237	2010-08-15	12
-482	val_482	2010-08-15	12
-174	val_174	2010-08-15	12
-413	val_413	2010-08-15	12
-494	val_494	2010-08-15	12
-207	val_207	2010-08-15	12
-199	val_199	2010-08-15	12
-466	val_466	2010-08-15	12
-208	val_208	2010-08-15	12
-174	val_174	2010-08-15	12
-399	val_399	2010-08-15	12
-396	val_396	2010-08-15	12
-247	val_247	2010-08-15	12
-417	val_417	2010-08-15	12
-489	val_489	2010-08-15	12
-162	val_162	2010-08-15	12
-377	val_377	2010-08-15	12
-397	val_397	2010-08-15	12
-309	val_309	2010-08-15	12
-365	val_365	2010-08-15	12
-266	val_266	2010-08-15	12
-439	val_439	2010-08-15	12
-342	val_342	2010-08-15	12
-367	val_367	2010-08-15	12
-325	val_325	2010-08-15	12
-167	val_167	2010-08-15	12
-195	val_195	2010-08-15	12
-475	val_475	2010-08-15	12
-17	val_17	2010-08-15	12
-113	val_113	2010-08-15	12
-155	val_155	2010-08-15	12
-203	val_203	2010-08-15	12
-339	val_339	2010-08-15	12
-0	val_0	2010-08-15	12
-455	val_455	2010-08-15	12
-128	val_128	2010-08-15	12
-311	val_311	2010-08-15	12
-316	val_316	2010-08-15	12
-57	val_57	2010-08-15	12
-302	val_302	2010-08-15	12
-205	val_205	2010-08-15	12
-149	val_149	2010-08-15	12
-438	val_438	2010-08-15	12
-345	val_345	2010-08-15	12
-129	val_129	2010-08-15	12
-170	val_170	2010-08-15	12
-20	val_20	2010-08-15	12
-489	val_489	2010-08-15	12
-157	val_157	2010-08-15	12
-378	val_378	2010-08-15	12
-221	val_221	2010-08-15	12
-92	val_92	2010-08-15	12
-111	val_111	2010-08-15	12
-47	val_47	2010-08-15	12
-72	val_72	2010-08-15	12
-4	val_4	2010-08-15	12
-280	val_280	2010-08-15	12
-35	val_35	2010-08-15	12
-427	val_427	2010-08-15	12
-277	val_277	2010-08-15	12
-208	val_208	2010-08-15	12
-356	val_356	2010-08-15	12
-399	val_399	2010-08-15	12
-169	val_169	2010-08-15	12
-382	val_382	2010-08-15	12
-498	val_498	2010-08-15	12
-125	val_125	2010-08-15	12
-386	val_386	2010-08-15	12
-437	val_437	2010-08-15	12
-469	val_469	2010-08-15	12
-192	val_192	2010-08-15	12
-286	val_286	2010-08-15	12
-187	val_187	2010-08-15	12
-176	val_176	2010-08-15	12
-54	val_54	2010-08-15	12
-459	val_459	2010-08-15	12
-51	val_51	2010-08-15	12
-138	val_138	2010-08-15	12
-103	val_103	2010-08-15	12
-239	val_239	2010-08-15	12
-213	val_213	2010-08-15	12
-216	val_216	2010-08-15	12
-430	val_430	2010-08-15	12
-278	val_278	2010-08-15	12
-176	val_176	2010-08-15	12
-289	val_289	2010-08-15	12
-221	val_221	2010-08-15	12
-65	val_65	2010-08-15	12
-318	val_318	2010-08-15	12
-332	val_332	2010-08-15	12
-311	val_311	2010-08-15	12
-275	val_275	2010-08-15	12
-137	val_137	2010-08-15	12
-241	val_241	2010-08-15	12
-83	val_83	2010-08-15	12
-333	val_333	2010-08-15	12
-180	val_180	2010-08-15	12
-284	val_284	2010-08-15	12
-12	val_12	2010-08-15	12
-230	val_230	2010-08-15	12
-181	val_181	2010-08-15	12
-67	val_67	2010-08-15	12
-260	val_260	2010-08-15	12
-404	val_404	2010-08-15	12
-384	val_384	2010-08-15	12
-489	val_489	2010-08-15	12
-353	val_353	2010-08-15	12
-373	val_373	2010-08-15	12
-272	val_272	2010-08-15	12
-138	val_138	2010-08-15	12
-217	val_217	2010-08-15	12
-84	val_84	2010-08-15	12
-348	val_348	2010-08-15	12
-466	val_466	2010-08-15	12
-58	val_58	2010-08-15	12
-8	val_8	2010-08-15	12
-411	val_411	2010-08-15	12
-230	val_230	2010-08-15	12
-208	val_208	2010-08-15	12
-348	val_348	2010-08-15	12
-24	val_24	2010-08-15	12
-463	val_463	2010-08-15	12
-431	val_431	2010-08-15	12
-179	val_179	2010-08-15	12
-172	val_172	2010-08-15	12
-42	val_42	2010-08-15	12
-129	val_129	2010-08-15	12
-158	val_158	2010-08-15	12
-119	val_119	2010-08-15	12
-496	val_496	2010-08-15	12
-0	val_0	2010-08-15	12
-322	val_322	2010-08-15	12
-197	val_197	2010-08-15	12
-468	val_468	2010-08-15	12
-393	val_393	2010-08-15	12
-454	val_454	2010-08-15	12
-100	val_100	2010-08-15	12
-298	val_298	2010-08-15	12
-199	val_199	2010-08-15	12
-191	val_191	2010-08-15	12
-418	val_418	2010-08-15	12
-96	val_96	2010-08-15	12
-26	val_26	2010-08-15	12
-165	val_165	2010-08-15	12
-327	val_327	2010-08-15	12
-230	val_230	2010-08-15	12
-205	val_205	2010-08-15	12
-120	val_120	2010-08-15	12
-131	val_131	2010-08-15	12
-51	val_51	2010-08-15	12
-404	val_404	2010-08-15	12
-43	val_43	2010-08-15	12
-436	val_436	2010-08-15	12
-156	val_156	2010-08-15	12
-469	val_469	2010-08-15	12
-468	val_468	2010-08-15	12
-308	val_308	2010-08-15	12
-95	val_95	2010-08-15	12
-196	val_196	2010-08-15	12
-288	val_288	2010-08-15	12
-481	val_481	2010-08-15	12
-457	val_457	2010-08-15	12
-98	val_98	2010-08-15	12
-282	val_282	2010-08-15	12
-197	val_197	2010-08-15	12
-187	val_187	2010-08-15	12
-318	val_318	2010-08-15	12
-318	val_318	2010-08-15	12
-409	val_409	2010-08-15	12
-470	val_470	2010-08-15	12
-137	val_137	2010-08-15	12
-369	val_369	2010-08-15	12
-316	val_316	2010-08-15	12
-169	val_169	2010-08-15	12
-413	val_413	2010-08-15	12
-85	val_85	2010-08-15	12
-77	val_77	2010-08-15	12
-0	val_0	2010-08-15	12
-490	val_490	2010-08-15	12
-87	val_87	2010-08-15	12
-364	val_364	2010-08-15	12
-179	val_179	2010-08-15	12
-118	val_118	2010-08-15	12
-134	val_134	2010-08-15	12
-395	val_395	2010-08-15	12
-282	val_282	2010-08-15	12
-138	val_138	2010-08-15	12
-238	val_238	2010-08-15	12
-419	val_419	2010-08-15	12
-15	val_15	2010-08-15	12
-118	val_118	2010-08-15	12
-72	val_72	2010-08-15	12
-90	val_90	2010-08-15	12
-307	val_307	2010-08-15	12
-19	val_19	2010-08-15	12
-435	val_435	2010-08-15	12
-10	val_10	2010-08-15	12
-277	val_277	2010-08-15	12
-273	val_273	2010-08-15	12
-306	val_306	2010-08-15	12
-224	val_224	2010-08-15	12
-309	val_309	2010-08-15	12
-389	val_389	2010-08-15	12
-327	val_327	2010-08-15	12
-242	val_242	2010-08-15	12
-369	val_369	2010-08-15	12
-392	val_392	2010-08-15	12
-272	val_272	2010-08-15	12
-331	val_331	2010-08-15	12
-401	val_401	2010-08-15	12
-242	val_242	2010-08-15	12
-452	val_452	2010-08-15	12
-177	val_177	2010-08-15	12
-226	val_226	2010-08-15	12
-5	val_5	2010-08-15	12
-497	val_497	2010-08-15	12
-402	val_402	2010-08-15	12
-396	val_396	2010-08-15	12
-317	val_317	2010-08-15	12
-395	val_395	2010-08-15	12
-58	val_58	2010-08-15	12
-35	val_35	2010-08-15	12
-336	val_336	2010-08-15	12
-95	val_95	2010-08-15	12
-11	val_11	2010-08-15	12
-168	val_168	2010-08-15	12
-34	val_34	2010-08-15	12
-229	val_229	2010-08-15	12
-233	val_233	2010-08-15	12
-143	val_143	2010-08-15	12
-472	val_472	2010-08-15	12
-322	val_322	2010-08-15	12
-498	val_498	2010-08-15	12
-160	val_160	2010-08-15	12
-195	val_195	2010-08-15	12
-42	val_42	2010-08-15	12
-321	val_321	2010-08-15	12
-430	val_430	2010-08-15	12
-119	val_119	2010-08-15	12
-489	val_489	2010-08-15	12
-458	val_458	2010-08-15	12
-78	val_78	2010-08-15	12
-76	val_76	2010-08-15	12
-41	val_41	2010-08-15	12
-223	val_223	2010-08-15	12
-492	val_492	2010-08-15	12
-149	val_149	2010-08-15	12
-449	val_449	2010-08-15	12
-218	val_218	2010-08-15	12
-228	val_228	2010-08-15	12
-138	val_138	2010-08-15	12
-453	val_453	2010-08-15	12
-30	val_30	2010-08-15	12
-209	val_209	2010-08-15	12
-64	val_64	2010-08-15	12
-468	val_468	2010-08-15	12
-76	val_76	2010-08-15	12
-74	val_74	2010-08-15	12
-342	val_342	2010-08-15	12
-69	val_69	2010-08-15	12
-230	val_230	2010-08-15	12
-33	val_33	2010-08-15	12
-368	val_368	2010-08-15	12
-103	val_103	2010-08-15	12
-296	val_296	2010-08-15	12
-113	val_113	2010-08-15	12
-216	val_216	2010-08-15	12
-367	val_367	2010-08-15	12
-344	val_344	2010-08-15	12
-167	val_167	2010-08-15	12
-274	val_274	2010-08-15	12
-219	val_219	2010-08-15	12
-239	val_239	2010-08-15	12
-485	val_485	2010-08-15	12
-116	val_116	2010-08-15	12
-223	val_223	2010-08-15	12
-256	val_256	2010-08-15	12
-263	val_263	2010-08-15	12
-70	val_70	2010-08-15	12
-487	val_487	2010-08-15	12
-480	val_480	2010-08-15	12
-401	val_401	2010-08-15	12
-288	val_288	2010-08-15	12
-191	val_191	2010-08-15	12
-5	val_5	2010-08-15	12
-244	val_244	2010-08-15	12
-438	val_438	2010-08-15	12
-128	val_128	2010-08-15	12
-467	val_467	2010-08-15	12
-432	val_432	2010-08-15	12
-202	val_202	2010-08-15	12
-316	val_316	2010-08-15	12
-229	val_229	2010-08-15	12
-469	val_469	2010-08-15	12
-463	val_463	2010-08-15	12
-280	val_280	2010-08-15	12
-2	val_2	2010-08-15	12
-35	val_35	2010-08-15	12
-283	val_283	2010-08-15	12
-331	val_331	2010-08-15	12
-235	val_235	2010-08-15	12
-80	val_80	2010-08-15	12
-44	val_44	2010-08-15	12
-193	val_193	2010-08-15	12
-321	val_321	2010-08-15	12
-335	val_335	2010-08-15	12
-104	val_104	2010-08-15	12
-466	val_466	2010-08-15	12
-366	val_366	2010-08-15	12
-175	val_175	2010-08-15	12
-403	val_403	2010-08-15	12
-483	val_483	2010-08-15	12
-53	val_53	2010-08-15	12
-105	val_105	2010-08-15	12
-257	val_257	2010-08-15	12
-406	val_406	2010-08-15	12
-409	val_409	2010-08-15	12
-190	val_190	2010-08-15	12
-406	val_406	2010-08-15	12
-401	val_401	2010-08-15	12
-114	val_114	2010-08-15	12
-258	val_258	2010-08-15	12
-90	val_90	2010-08-15	12
-203	val_203	2010-08-15	12
-262	val_262	2010-08-15	12
-348	val_348	2010-08-15	12
-424	val_424	2010-08-15	12
-12	val_12	2010-08-15	12
-396	val_396	2010-08-15	12
-201	val_201	2010-08-15	12
-217	val_217	2010-08-15	12
-164	val_164	2010-08-15	12
-431	val_431	2010-08-15	12
-454	val_454	2010-08-15	12
-478	val_478	2010-08-15	12
-298	val_298	2010-08-15	12
-125	val_125	2010-08-15	12
-431	val_431	2010-08-15	12
-164	val_164	2010-08-15	12
-424	val_424	2010-08-15	12
-187	val_187	2010-08-15	12
-382	val_382	2010-08-15	12
-5	val_5	2010-08-15	12
-70	val_70	2010-08-15	12
-397	val_397	2010-08-15	12
-480	val_480	2010-08-15	12
-291	val_291	2010-08-15	12
-24	val_24	2010-08-15	12
-351	val_351	2010-08-15	12
-255	val_255	2010-08-15	12
-104	val_104	2010-08-15	12
-70	val_70	2010-08-15	12
-163	val_163	2010-08-15	12
-438	val_438	2010-08-15	12
-119	val_119	2010-08-15	12
-414	val_414	2010-08-15	12
-200	val_200	2010-08-15	12
-491	val_491	2010-08-15	12
-237	val_237	2010-08-15	12
-439	val_439	2010-08-15	12
-360	val_360	2010-08-15	12
-248	val_248	2010-08-15	12
-479	val_479	2010-08-15	12
-305	val_305	2010-08-15	12
-417	val_417	2010-08-15	12
-199	val_199	2010-08-15	12
-444	val_444	2010-08-15	12
-120	val_120	2010-08-15	12
-429	val_429	2010-08-15	12
-169	val_169	2010-08-15	12
-443	val_443	2010-08-15	12
-323	val_323	2010-08-15	12
-325	val_325	2010-08-15	12
-277	val_277	2010-08-15	12
-230	val_230	2010-08-15	12
-478	val_478	2010-08-15	12
-178	val_178	2010-08-15	12
-468	val_468	2010-08-15	12
-310	val_310	2010-08-15	12
-317	val_317	2010-08-15	12
-333	val_333	2010-08-15	12
-493	val_493	2010-08-15	12
-460	val_460	2010-08-15	12
-207	val_207	2010-08-15	12
-249	val_249	2010-08-15	12
-265	val_265	2010-08-15	12
-480	val_480	2010-08-15	12
-83	val_83	2010-08-15	12
-136	val_136	2010-08-15	12
-353	val_353	2010-08-15	12
-172	val_172	2010-08-15	12
-214	val_214	2010-08-15	12
-462	val_462	2010-08-15	12
-233	val_233	2010-08-15	12
-406	val_406	2010-08-15	12
-133	val_133	2010-08-15	12
-175	val_175	2010-08-15	12
-189	val_189	2010-08-15	12
-454	val_454	2010-08-15	12
-375	val_375	2010-08-15	12
-401	val_401	2010-08-15	12
-421	val_421	2010-08-15	12
-407	val_407	2010-08-15	12
-384	val_384	2010-08-15	12
-256	val_256	2010-08-15	12
-26	val_26	2010-08-15	12
-134	val_134	2010-08-15	12
-67	val_67	2010-08-15	12
-384	val_384	2010-08-15	12
-379	val_379	2010-08-15	12
-18	val_18	2010-08-15	12
-462	val_462	2010-08-15	12
-492	val_492	2010-08-15	12
-100	val_100	2010-08-15	12
-298	val_298	2010-08-15	12
-9	val_9	2010-08-15	12
-341	val_341	2010-08-15	12
-498	val_498	2010-08-15	12
-146	val_146	2010-08-15	12
-458	val_458	2010-08-15	12
-362	val_362	2010-08-15	12
-186	val_186	2010-08-15	12
-285	val_285	2010-08-15	12
-348	val_348	2010-08-15	12
-167	val_167	2010-08-15	12
-18	val_18	2010-08-15	12
-273	val_273	2010-08-15	12
-183	val_183	2010-08-15	12
-281	val_281	2010-08-15	12
-344	val_344	2010-08-15	12
-97	val_97	2010-08-15	12
-469	val_469	2010-08-15	12
-315	val_315	2010-08-15	12
-84	val_84	2010-08-15	12
-28	val_28	2010-08-15	12
-37	val_37	2010-08-15	12
-448	val_448	2010-08-15	12
-152	val_152	2010-08-15	12
-348	val_348	2010-08-15	12
-307	val_307	2010-08-15	12
-194	val_194	2010-08-15	12
-414	val_414	2010-08-15	12
-477	val_477	2010-08-15	12
-222	val_222	2010-08-15	12
-126	val_126	2010-08-15	12
-90	val_90	2010-08-15	12
-169	val_169	2010-08-15	12
-403	val_403	2010-08-15	12
-400	val_400	2010-08-15	12
-200	val_200	2010-08-15	12
-97	val_97	2010-08-15	12
diff --git a/sql/hive/src/test/resources/golden/merge4-12-62541540a18d68a3cb8497a741061d11 b/sql/hive/src/test/resources/golden/merge4-12-62541540a18d68a3cb8497a741061d11
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge4-13-ed1103f06609365b40e78d13c654cc71 b/sql/hive/src/test/resources/golden/merge4-13-ed1103f06609365b40e78d13c654cc71
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge4-14-ba5dbcd0527b8ddab284bc322255bfc7 b/sql/hive/src/test/resources/golden/merge4-14-ba5dbcd0527b8ddab284bc322255bfc7
deleted file mode 100644
index 30becc42d7b5a..0000000000000
--- a/sql/hive/src/test/resources/golden/merge4-14-ba5dbcd0527b8ddab284bc322255bfc7
+++ /dev/null
@@ -1,3 +0,0 @@
-ds=2010-08-15/hr=11
-ds=2010-08-15/hr=12
-ds=2010-08-15/hr=file,
diff --git a/sql/hive/src/test/resources/golden/merge4-15-68f50dc2ad6ff803a372bdd88dd8e19a b/sql/hive/src/test/resources/golden/merge4-15-68f50dc2ad6ff803a372bdd88dd8e19a
deleted file mode 100644
index 4c867a5deff08..0000000000000
--- a/sql/hive/src/test/resources/golden/merge4-15-68f50dc2ad6ff803a372bdd88dd8e19a
+++ /dev/null
@@ -1 +0,0 @@
-1	1	2010-08-15	file,
diff --git a/sql/hive/src/test/resources/golden/merge4-5-3d24d877366c42030f6d9a596665720d b/sql/hive/src/test/resources/golden/merge4-5-3d24d877366c42030f6d9a596665720d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge4-6-b3a76420183795720ab3a384046e5af b/sql/hive/src/test/resources/golden/merge4-6-b3a76420183795720ab3a384046e5af
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge4-7-631a45828eae3f5f562d992efe4cd56d b/sql/hive/src/test/resources/golden/merge4-7-631a45828eae3f5f562d992efe4cd56d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge4-8-ca0336ac3f600cb8b4230d9904686868 b/sql/hive/src/test/resources/golden/merge4-8-ca0336ac3f600cb8b4230d9904686868
deleted file mode 100644
index 9feba1dea5fd8..0000000000000
--- a/sql/hive/src/test/resources/golden/merge4-8-ca0336ac3f600cb8b4230d9904686868
+++ /dev/null
@@ -1,1000 +0,0 @@
-238	val_238	2010-08-15	11
-86	val_86	2010-08-15	11
-311	val_311	2010-08-15	11
-27	val_27	2010-08-15	11
-165	val_165	2010-08-15	11
-409	val_409	2010-08-15	11
-255	val_255	2010-08-15	11
-278	val_278	2010-08-15	11
-98	val_98	2010-08-15	11
-484	val_484	2010-08-15	11
-265	val_265	2010-08-15	11
-193	val_193	2010-08-15	11
-401	val_401	2010-08-15	11
-150	val_150	2010-08-15	11
-273	val_273	2010-08-15	11
-224	val_224	2010-08-15	11
-369	val_369	2010-08-15	11
-66	val_66	2010-08-15	11
-128	val_128	2010-08-15	11
-213	val_213	2010-08-15	11
-146	val_146	2010-08-15	11
-406	val_406	2010-08-15	11
-429	val_429	2010-08-15	11
-374	val_374	2010-08-15	11
-152	val_152	2010-08-15	11
-469	val_469	2010-08-15	11
-145	val_145	2010-08-15	11
-495	val_495	2010-08-15	11
-37	val_37	2010-08-15	11
-327	val_327	2010-08-15	11
-281	val_281	2010-08-15	11
-277	val_277	2010-08-15	11
-209	val_209	2010-08-15	11
-15	val_15	2010-08-15	11
-82	val_82	2010-08-15	11
-403	val_403	2010-08-15	11
-166	val_166	2010-08-15	11
-417	val_417	2010-08-15	11
-430	val_430	2010-08-15	11
-252	val_252	2010-08-15	11
-292	val_292	2010-08-15	11
-219	val_219	2010-08-15	11
-287	val_287	2010-08-15	11
-153	val_153	2010-08-15	11
-193	val_193	2010-08-15	11
-338	val_338	2010-08-15	11
-446	val_446	2010-08-15	11
-459	val_459	2010-08-15	11
-394	val_394	2010-08-15	11
-237	val_237	2010-08-15	11
-482	val_482	2010-08-15	11
-174	val_174	2010-08-15	11
-413	val_413	2010-08-15	11
-494	val_494	2010-08-15	11
-207	val_207	2010-08-15	11
-199	val_199	2010-08-15	11
-466	val_466	2010-08-15	11
-208	val_208	2010-08-15	11
-174	val_174	2010-08-15	11
-399	val_399	2010-08-15	11
-396	val_396	2010-08-15	11
-247	val_247	2010-08-15	11
-417	val_417	2010-08-15	11
-489	val_489	2010-08-15	11
-162	val_162	2010-08-15	11
-377	val_377	2010-08-15	11
-397	val_397	2010-08-15	11
-309	val_309	2010-08-15	11
-365	val_365	2010-08-15	11
-266	val_266	2010-08-15	11
-439	val_439	2010-08-15	11
-342	val_342	2010-08-15	11
-367	val_367	2010-08-15	11
-325	val_325	2010-08-15	11
-167	val_167	2010-08-15	11
-195	val_195	2010-08-15	11
-475	val_475	2010-08-15	11
-17	val_17	2010-08-15	11
-113	val_113	2010-08-15	11
-155	val_155	2010-08-15	11
-203	val_203	2010-08-15	11
-339	val_339	2010-08-15	11
-0	val_0	2010-08-15	11
-455	val_455	2010-08-15	11
-128	val_128	2010-08-15	11
-311	val_311	2010-08-15	11
-316	val_316	2010-08-15	11
-57	val_57	2010-08-15	11
-302	val_302	2010-08-15	11
-205	val_205	2010-08-15	11
-149	val_149	2010-08-15	11
-438	val_438	2010-08-15	11
-345	val_345	2010-08-15	11
-129	val_129	2010-08-15	11
-170	val_170	2010-08-15	11
-20	val_20	2010-08-15	11
-489	val_489	2010-08-15	11
-157	val_157	2010-08-15	11
-378	val_378	2010-08-15	11
-221	val_221	2010-08-15	11
-92	val_92	2010-08-15	11
-111	val_111	2010-08-15	11
-47	val_47	2010-08-15	11
-72	val_72	2010-08-15	11
-4	val_4	2010-08-15	11
-280	val_280	2010-08-15	11
-35	val_35	2010-08-15	11
-427	val_427	2010-08-15	11
-277	val_277	2010-08-15	11
-208	val_208	2010-08-15	11
-356	val_356	2010-08-15	11
-399	val_399	2010-08-15	11
-169	val_169	2010-08-15	11
-382	val_382	2010-08-15	11
-498	val_498	2010-08-15	11
-125	val_125	2010-08-15	11
-386	val_386	2010-08-15	11
-437	val_437	2010-08-15	11
-469	val_469	2010-08-15	11
-192	val_192	2010-08-15	11
-286	val_286	2010-08-15	11
-187	val_187	2010-08-15	11
-176	val_176	2010-08-15	11
-54	val_54	2010-08-15	11
-459	val_459	2010-08-15	11
-51	val_51	2010-08-15	11
-138	val_138	2010-08-15	11
-103	val_103	2010-08-15	11
-239	val_239	2010-08-15	11
-213	val_213	2010-08-15	11
-216	val_216	2010-08-15	11
-430	val_430	2010-08-15	11
-278	val_278	2010-08-15	11
-176	val_176	2010-08-15	11
-289	val_289	2010-08-15	11
-221	val_221	2010-08-15	11
-65	val_65	2010-08-15	11
-318	val_318	2010-08-15	11
-332	val_332	2010-08-15	11
-311	val_311	2010-08-15	11
-275	val_275	2010-08-15	11
-137	val_137	2010-08-15	11
-241	val_241	2010-08-15	11
-83	val_83	2010-08-15	11
-333	val_333	2010-08-15	11
-180	val_180	2010-08-15	11
-284	val_284	2010-08-15	11
-12	val_12	2010-08-15	11
-230	val_230	2010-08-15	11
-181	val_181	2010-08-15	11
-67	val_67	2010-08-15	11
-260	val_260	2010-08-15	11
-404	val_404	2010-08-15	11
-384	val_384	2010-08-15	11
-489	val_489	2010-08-15	11
-353	val_353	2010-08-15	11
-373	val_373	2010-08-15	11
-272	val_272	2010-08-15	11
-138	val_138	2010-08-15	11
-217	val_217	2010-08-15	11
-84	val_84	2010-08-15	11
-348	val_348	2010-08-15	11
-466	val_466	2010-08-15	11
-58	val_58	2010-08-15	11
-8	val_8	2010-08-15	11
-411	val_411	2010-08-15	11
-230	val_230	2010-08-15	11
-208	val_208	2010-08-15	11
-348	val_348	2010-08-15	11
-24	val_24	2010-08-15	11
-463	val_463	2010-08-15	11
-431	val_431	2010-08-15	11
-179	val_179	2010-08-15	11
-172	val_172	2010-08-15	11
-42	val_42	2010-08-15	11
-129	val_129	2010-08-15	11
-158	val_158	2010-08-15	11
-119	val_119	2010-08-15	11
-496	val_496	2010-08-15	11
-0	val_0	2010-08-15	11
-322	val_322	2010-08-15	11
-197	val_197	2010-08-15	11
-468	val_468	2010-08-15	11
-393	val_393	2010-08-15	11
-454	val_454	2010-08-15	11
-100	val_100	2010-08-15	11
-298	val_298	2010-08-15	11
-199	val_199	2010-08-15	11
-191	val_191	2010-08-15	11
-418	val_418	2010-08-15	11
-96	val_96	2010-08-15	11
-26	val_26	2010-08-15	11
-165	val_165	2010-08-15	11
-327	val_327	2010-08-15	11
-230	val_230	2010-08-15	11
-205	val_205	2010-08-15	11
-120	val_120	2010-08-15	11
-131	val_131	2010-08-15	11
-51	val_51	2010-08-15	11
-404	val_404	2010-08-15	11
-43	val_43	2010-08-15	11
-436	val_436	2010-08-15	11
-156	val_156	2010-08-15	11
-469	val_469	2010-08-15	11
-468	val_468	2010-08-15	11
-308	val_308	2010-08-15	11
-95	val_95	2010-08-15	11
-196	val_196	2010-08-15	11
-288	val_288	2010-08-15	11
-481	val_481	2010-08-15	11
-457	val_457	2010-08-15	11
-98	val_98	2010-08-15	11
-282	val_282	2010-08-15	11
-197	val_197	2010-08-15	11
-187	val_187	2010-08-15	11
-318	val_318	2010-08-15	11
-318	val_318	2010-08-15	11
-409	val_409	2010-08-15	11
-470	val_470	2010-08-15	11
-137	val_137	2010-08-15	11
-369	val_369	2010-08-15	11
-316	val_316	2010-08-15	11
-169	val_169	2010-08-15	11
-413	val_413	2010-08-15	11
-85	val_85	2010-08-15	11
-77	val_77	2010-08-15	11
-0	val_0	2010-08-15	11
-490	val_490	2010-08-15	11
-87	val_87	2010-08-15	11
-364	val_364	2010-08-15	11
-179	val_179	2010-08-15	11
-118	val_118	2010-08-15	11
-134	val_134	2010-08-15	11
-395	val_395	2010-08-15	11
-282	val_282	2010-08-15	11
-138	val_138	2010-08-15	11
-238	val_238	2010-08-15	11
-419	val_419	2010-08-15	11
-15	val_15	2010-08-15	11
-118	val_118	2010-08-15	11
-72	val_72	2010-08-15	11
-90	val_90	2010-08-15	11
-307	val_307	2010-08-15	11
-19	val_19	2010-08-15	11
-435	val_435	2010-08-15	11
-10	val_10	2010-08-15	11
-277	val_277	2010-08-15	11
-273	val_273	2010-08-15	11
-306	val_306	2010-08-15	11
-224	val_224	2010-08-15	11
-309	val_309	2010-08-15	11
-389	val_389	2010-08-15	11
-327	val_327	2010-08-15	11
-242	val_242	2010-08-15	11
-369	val_369	2010-08-15	11
-392	val_392	2010-08-15	11
-272	val_272	2010-08-15	11
-331	val_331	2010-08-15	11
-401	val_401	2010-08-15	11
-242	val_242	2010-08-15	11
-452	val_452	2010-08-15	11
-177	val_177	2010-08-15	11
-226	val_226	2010-08-15	11
-5	val_5	2010-08-15	11
-497	val_497	2010-08-15	11
-402	val_402	2010-08-15	11
-396	val_396	2010-08-15	11
-317	val_317	2010-08-15	11
-395	val_395	2010-08-15	11
-58	val_58	2010-08-15	11
-35	val_35	2010-08-15	11
-336	val_336	2010-08-15	11
-95	val_95	2010-08-15	11
-11	val_11	2010-08-15	11
-168	val_168	2010-08-15	11
-34	val_34	2010-08-15	11
-229	val_229	2010-08-15	11
-233	val_233	2010-08-15	11
-143	val_143	2010-08-15	11
-472	val_472	2010-08-15	11
-322	val_322	2010-08-15	11
-498	val_498	2010-08-15	11
-160	val_160	2010-08-15	11
-195	val_195	2010-08-15	11
-42	val_42	2010-08-15	11
-321	val_321	2010-08-15	11
-430	val_430	2010-08-15	11
-119	val_119	2010-08-15	11
-489	val_489	2010-08-15	11
-458	val_458	2010-08-15	11
-78	val_78	2010-08-15	11
-76	val_76	2010-08-15	11
-41	val_41	2010-08-15	11
-223	val_223	2010-08-15	11
-492	val_492	2010-08-15	11
-149	val_149	2010-08-15	11
-449	val_449	2010-08-15	11
-218	val_218	2010-08-15	11
-228	val_228	2010-08-15	11
-138	val_138	2010-08-15	11
-453	val_453	2010-08-15	11
-30	val_30	2010-08-15	11
-209	val_209	2010-08-15	11
-64	val_64	2010-08-15	11
-468	val_468	2010-08-15	11
-76	val_76	2010-08-15	11
-74	val_74	2010-08-15	11
-342	val_342	2010-08-15	11
-69	val_69	2010-08-15	11
-230	val_230	2010-08-15	11
-33	val_33	2010-08-15	11
-368	val_368	2010-08-15	11
-103	val_103	2010-08-15	11
-296	val_296	2010-08-15	11
-113	val_113	2010-08-15	11
-216	val_216	2010-08-15	11
-367	val_367	2010-08-15	11
-344	val_344	2010-08-15	11
-167	val_167	2010-08-15	11
-274	val_274	2010-08-15	11
-219	val_219	2010-08-15	11
-239	val_239	2010-08-15	11
-485	val_485	2010-08-15	11
-116	val_116	2010-08-15	11
-223	val_223	2010-08-15	11
-256	val_256	2010-08-15	11
-263	val_263	2010-08-15	11
-70	val_70	2010-08-15	11
-487	val_487	2010-08-15	11
-480	val_480	2010-08-15	11
-401	val_401	2010-08-15	11
-288	val_288	2010-08-15	11
-191	val_191	2010-08-15	11
-5	val_5	2010-08-15	11
-244	val_244	2010-08-15	11
-438	val_438	2010-08-15	11
-128	val_128	2010-08-15	11
-467	val_467	2010-08-15	11
-432	val_432	2010-08-15	11
-202	val_202	2010-08-15	11
-316	val_316	2010-08-15	11
-229	val_229	2010-08-15	11
-469	val_469	2010-08-15	11
-463	val_463	2010-08-15	11
-280	val_280	2010-08-15	11
-2	val_2	2010-08-15	11
-35	val_35	2010-08-15	11
-283	val_283	2010-08-15	11
-331	val_331	2010-08-15	11
-235	val_235	2010-08-15	11
-80	val_80	2010-08-15	11
-44	val_44	2010-08-15	11
-193	val_193	2010-08-15	11
-321	val_321	2010-08-15	11
-335	val_335	2010-08-15	11
-104	val_104	2010-08-15	11
-466	val_466	2010-08-15	11
-366	val_366	2010-08-15	11
-175	val_175	2010-08-15	11
-403	val_403	2010-08-15	11
-483	val_483	2010-08-15	11
-53	val_53	2010-08-15	11
-105	val_105	2010-08-15	11
-257	val_257	2010-08-15	11
-406	val_406	2010-08-15	11
-409	val_409	2010-08-15	11
-190	val_190	2010-08-15	11
-406	val_406	2010-08-15	11
-401	val_401	2010-08-15	11
-114	val_114	2010-08-15	11
-258	val_258	2010-08-15	11
-90	val_90	2010-08-15	11
-203	val_203	2010-08-15	11
-262	val_262	2010-08-15	11
-348	val_348	2010-08-15	11
-424	val_424	2010-08-15	11
-12	val_12	2010-08-15	11
-396	val_396	2010-08-15	11
-201	val_201	2010-08-15	11
-217	val_217	2010-08-15	11
-164	val_164	2010-08-15	11
-431	val_431	2010-08-15	11
-454	val_454	2010-08-15	11
-478	val_478	2010-08-15	11
-298	val_298	2010-08-15	11
-125	val_125	2010-08-15	11
-431	val_431	2010-08-15	11
-164	val_164	2010-08-15	11
-424	val_424	2010-08-15	11
-187	val_187	2010-08-15	11
-382	val_382	2010-08-15	11
-5	val_5	2010-08-15	11
-70	val_70	2010-08-15	11
-397	val_397	2010-08-15	11
-480	val_480	2010-08-15	11
-291	val_291	2010-08-15	11
-24	val_24	2010-08-15	11
-351	val_351	2010-08-15	11
-255	val_255	2010-08-15	11
-104	val_104	2010-08-15	11
-70	val_70	2010-08-15	11
-163	val_163	2010-08-15	11
-438	val_438	2010-08-15	11
-119	val_119	2010-08-15	11
-414	val_414	2010-08-15	11
-200	val_200	2010-08-15	11
-491	val_491	2010-08-15	11
-237	val_237	2010-08-15	11
-439	val_439	2010-08-15	11
-360	val_360	2010-08-15	11
-248	val_248	2010-08-15	11
-479	val_479	2010-08-15	11
-305	val_305	2010-08-15	11
-417	val_417	2010-08-15	11
-199	val_199	2010-08-15	11
-444	val_444	2010-08-15	11
-120	val_120	2010-08-15	11
-429	val_429	2010-08-15	11
-169	val_169	2010-08-15	11
-443	val_443	2010-08-15	11
-323	val_323	2010-08-15	11
-325	val_325	2010-08-15	11
-277	val_277	2010-08-15	11
-230	val_230	2010-08-15	11
-478	val_478	2010-08-15	11
-178	val_178	2010-08-15	11
-468	val_468	2010-08-15	11
-310	val_310	2010-08-15	11
-317	val_317	2010-08-15	11
-333	val_333	2010-08-15	11
-493	val_493	2010-08-15	11
-460	val_460	2010-08-15	11
-207	val_207	2010-08-15	11
-249	val_249	2010-08-15	11
-265	val_265	2010-08-15	11
-480	val_480	2010-08-15	11
-83	val_83	2010-08-15	11
-136	val_136	2010-08-15	11
-353	val_353	2010-08-15	11
-172	val_172	2010-08-15	11
-214	val_214	2010-08-15	11
-462	val_462	2010-08-15	11
-233	val_233	2010-08-15	11
-406	val_406	2010-08-15	11
-133	val_133	2010-08-15	11
-175	val_175	2010-08-15	11
-189	val_189	2010-08-15	11
-454	val_454	2010-08-15	11
-375	val_375	2010-08-15	11
-401	val_401	2010-08-15	11
-421	val_421	2010-08-15	11
-407	val_407	2010-08-15	11
-384	val_384	2010-08-15	11
-256	val_256	2010-08-15	11
-26	val_26	2010-08-15	11
-134	val_134	2010-08-15	11
-67	val_67	2010-08-15	11
-384	val_384	2010-08-15	11
-379	val_379	2010-08-15	11
-18	val_18	2010-08-15	11
-462	val_462	2010-08-15	11
-492	val_492	2010-08-15	11
-100	val_100	2010-08-15	11
-298	val_298	2010-08-15	11
-9	val_9	2010-08-15	11
-341	val_341	2010-08-15	11
-498	val_498	2010-08-15	11
-146	val_146	2010-08-15	11
-458	val_458	2010-08-15	11
-362	val_362	2010-08-15	11
-186	val_186	2010-08-15	11
-285	val_285	2010-08-15	11
-348	val_348	2010-08-15	11
-167	val_167	2010-08-15	11
-18	val_18	2010-08-15	11
-273	val_273	2010-08-15	11
-183	val_183	2010-08-15	11
-281	val_281	2010-08-15	11
-344	val_344	2010-08-15	11
-97	val_97	2010-08-15	11
-469	val_469	2010-08-15	11
-315	val_315	2010-08-15	11
-84	val_84	2010-08-15	11
-28	val_28	2010-08-15	11
-37	val_37	2010-08-15	11
-448	val_448	2010-08-15	11
-152	val_152	2010-08-15	11
-348	val_348	2010-08-15	11
-307	val_307	2010-08-15	11
-194	val_194	2010-08-15	11
-414	val_414	2010-08-15	11
-477	val_477	2010-08-15	11
-222	val_222	2010-08-15	11
-126	val_126	2010-08-15	11
-90	val_90	2010-08-15	11
-169	val_169	2010-08-15	11
-403	val_403	2010-08-15	11
-400	val_400	2010-08-15	11
-200	val_200	2010-08-15	11
-97	val_97	2010-08-15	11
-238	val_238	2010-08-15	12
-86	val_86	2010-08-15	12
-311	val_311	2010-08-15	12
-27	val_27	2010-08-15	12
-165	val_165	2010-08-15	12
-409	val_409	2010-08-15	12
-255	val_255	2010-08-15	12
-278	val_278	2010-08-15	12
-98	val_98	2010-08-15	12
-484	val_484	2010-08-15	12
-265	val_265	2010-08-15	12
-193	val_193	2010-08-15	12
-401	val_401	2010-08-15	12
-150	val_150	2010-08-15	12
-273	val_273	2010-08-15	12
-224	val_224	2010-08-15	12
-369	val_369	2010-08-15	12
-66	val_66	2010-08-15	12
-128	val_128	2010-08-15	12
-213	val_213	2010-08-15	12
-146	val_146	2010-08-15	12
-406	val_406	2010-08-15	12
-429	val_429	2010-08-15	12
-374	val_374	2010-08-15	12
-152	val_152	2010-08-15	12
-469	val_469	2010-08-15	12
-145	val_145	2010-08-15	12
-495	val_495	2010-08-15	12
-37	val_37	2010-08-15	12
-327	val_327	2010-08-15	12
-281	val_281	2010-08-15	12
-277	val_277	2010-08-15	12
-209	val_209	2010-08-15	12
-15	val_15	2010-08-15	12
-82	val_82	2010-08-15	12
-403	val_403	2010-08-15	12
-166	val_166	2010-08-15	12
-417	val_417	2010-08-15	12
-430	val_430	2010-08-15	12
-252	val_252	2010-08-15	12
-292	val_292	2010-08-15	12
-219	val_219	2010-08-15	12
-287	val_287	2010-08-15	12
-153	val_153	2010-08-15	12
-193	val_193	2010-08-15	12
-338	val_338	2010-08-15	12
-446	val_446	2010-08-15	12
-459	val_459	2010-08-15	12
-394	val_394	2010-08-15	12
-237	val_237	2010-08-15	12
-482	val_482	2010-08-15	12
-174	val_174	2010-08-15	12
-413	val_413	2010-08-15	12
-494	val_494	2010-08-15	12
-207	val_207	2010-08-15	12
-199	val_199	2010-08-15	12
-466	val_466	2010-08-15	12
-208	val_208	2010-08-15	12
-174	val_174	2010-08-15	12
-399	val_399	2010-08-15	12
-396	val_396	2010-08-15	12
-247	val_247	2010-08-15	12
-417	val_417	2010-08-15	12
-489	val_489	2010-08-15	12
-162	val_162	2010-08-15	12
-377	val_377	2010-08-15	12
-397	val_397	2010-08-15	12
-309	val_309	2010-08-15	12
-365	val_365	2010-08-15	12
-266	val_266	2010-08-15	12
-439	val_439	2010-08-15	12
-342	val_342	2010-08-15	12
-367	val_367	2010-08-15	12
-325	val_325	2010-08-15	12
-167	val_167	2010-08-15	12
-195	val_195	2010-08-15	12
-475	val_475	2010-08-15	12
-17	val_17	2010-08-15	12
-113	val_113	2010-08-15	12
-155	val_155	2010-08-15	12
-203	val_203	2010-08-15	12
-339	val_339	2010-08-15	12
-0	val_0	2010-08-15	12
-455	val_455	2010-08-15	12
-128	val_128	2010-08-15	12
-311	val_311	2010-08-15	12
-316	val_316	2010-08-15	12
-57	val_57	2010-08-15	12
-302	val_302	2010-08-15	12
-205	val_205	2010-08-15	12
-149	val_149	2010-08-15	12
-438	val_438	2010-08-15	12
-345	val_345	2010-08-15	12
-129	val_129	2010-08-15	12
-170	val_170	2010-08-15	12
-20	val_20	2010-08-15	12
-489	val_489	2010-08-15	12
-157	val_157	2010-08-15	12
-378	val_378	2010-08-15	12
-221	val_221	2010-08-15	12
-92	val_92	2010-08-15	12
-111	val_111	2010-08-15	12
-47	val_47	2010-08-15	12
-72	val_72	2010-08-15	12
-4	val_4	2010-08-15	12
-280	val_280	2010-08-15	12
-35	val_35	2010-08-15	12
-427	val_427	2010-08-15	12
-277	val_277	2010-08-15	12
-208	val_208	2010-08-15	12
-356	val_356	2010-08-15	12
-399	val_399	2010-08-15	12
-169	val_169	2010-08-15	12
-382	val_382	2010-08-15	12
-498	val_498	2010-08-15	12
-125	val_125	2010-08-15	12
-386	val_386	2010-08-15	12
-437	val_437	2010-08-15	12
-469	val_469	2010-08-15	12
-192	val_192	2010-08-15	12
-286	val_286	2010-08-15	12
-187	val_187	2010-08-15	12
-176	val_176	2010-08-15	12
-54	val_54	2010-08-15	12
-459	val_459	2010-08-15	12
-51	val_51	2010-08-15	12
-138	val_138	2010-08-15	12
-103	val_103	2010-08-15	12
-239	val_239	2010-08-15	12
-213	val_213	2010-08-15	12
-216	val_216	2010-08-15	12
-430	val_430	2010-08-15	12
-278	val_278	2010-08-15	12
-176	val_176	2010-08-15	12
-289	val_289	2010-08-15	12
-221	val_221	2010-08-15	12
-65	val_65	2010-08-15	12
-318	val_318	2010-08-15	12
-332	val_332	2010-08-15	12
-311	val_311	2010-08-15	12
-275	val_275	2010-08-15	12
-137	val_137	2010-08-15	12
-241	val_241	2010-08-15	12
-83	val_83	2010-08-15	12
-333	val_333	2010-08-15	12
-180	val_180	2010-08-15	12
-284	val_284	2010-08-15	12
-12	val_12	2010-08-15	12
-230	val_230	2010-08-15	12
-181	val_181	2010-08-15	12
-67	val_67	2010-08-15	12
-260	val_260	2010-08-15	12
-404	val_404	2010-08-15	12
-384	val_384	2010-08-15	12
-489	val_489	2010-08-15	12
-353	val_353	2010-08-15	12
-373	val_373	2010-08-15	12
-272	val_272	2010-08-15	12
-138	val_138	2010-08-15	12
-217	val_217	2010-08-15	12
-84	val_84	2010-08-15	12
-348	val_348	2010-08-15	12
-466	val_466	2010-08-15	12
-58	val_58	2010-08-15	12
-8	val_8	2010-08-15	12
-411	val_411	2010-08-15	12
-230	val_230	2010-08-15	12
-208	val_208	2010-08-15	12
-348	val_348	2010-08-15	12
-24	val_24	2010-08-15	12
-463	val_463	2010-08-15	12
-431	val_431	2010-08-15	12
-179	val_179	2010-08-15	12
-172	val_172	2010-08-15	12
-42	val_42	2010-08-15	12
-129	val_129	2010-08-15	12
-158	val_158	2010-08-15	12
-119	val_119	2010-08-15	12
-496	val_496	2010-08-15	12
-0	val_0	2010-08-15	12
-322	val_322	2010-08-15	12
-197	val_197	2010-08-15	12
-468	val_468	2010-08-15	12
-393	val_393	2010-08-15	12
-454	val_454	2010-08-15	12
-100	val_100	2010-08-15	12
-298	val_298	2010-08-15	12
-199	val_199	2010-08-15	12
-191	val_191	2010-08-15	12
-418	val_418	2010-08-15	12
-96	val_96	2010-08-15	12
-26	val_26	2010-08-15	12
-165	val_165	2010-08-15	12
-327	val_327	2010-08-15	12
-230	val_230	2010-08-15	12
-205	val_205	2010-08-15	12
-120	val_120	2010-08-15	12
-131	val_131	2010-08-15	12
-51	val_51	2010-08-15	12
-404	val_404	2010-08-15	12
-43	val_43	2010-08-15	12
-436	val_436	2010-08-15	12
-156	val_156	2010-08-15	12
-469	val_469	2010-08-15	12
-468	val_468	2010-08-15	12
-308	val_308	2010-08-15	12
-95	val_95	2010-08-15	12
-196	val_196	2010-08-15	12
-288	val_288	2010-08-15	12
-481	val_481	2010-08-15	12
-457	val_457	2010-08-15	12
-98	val_98	2010-08-15	12
-282	val_282	2010-08-15	12
-197	val_197	2010-08-15	12
-187	val_187	2010-08-15	12
-318	val_318	2010-08-15	12
-318	val_318	2010-08-15	12
-409	val_409	2010-08-15	12
-470	val_470	2010-08-15	12
-137	val_137	2010-08-15	12
-369	val_369	2010-08-15	12
-316	val_316	2010-08-15	12
-169	val_169	2010-08-15	12
-413	val_413	2010-08-15	12
-85	val_85	2010-08-15	12
-77	val_77	2010-08-15	12
-0	val_0	2010-08-15	12
-490	val_490	2010-08-15	12
-87	val_87	2010-08-15	12
-364	val_364	2010-08-15	12
-179	val_179	2010-08-15	12
-118	val_118	2010-08-15	12
-134	val_134	2010-08-15	12
-395	val_395	2010-08-15	12
-282	val_282	2010-08-15	12
-138	val_138	2010-08-15	12
-238	val_238	2010-08-15	12
-419	val_419	2010-08-15	12
-15	val_15	2010-08-15	12
-118	val_118	2010-08-15	12
-72	val_72	2010-08-15	12
-90	val_90	2010-08-15	12
-307	val_307	2010-08-15	12
-19	val_19	2010-08-15	12
-435	val_435	2010-08-15	12
-10	val_10	2010-08-15	12
-277	val_277	2010-08-15	12
-273	val_273	2010-08-15	12
-306	val_306	2010-08-15	12
-224	val_224	2010-08-15	12
-309	val_309	2010-08-15	12
-389	val_389	2010-08-15	12
-327	val_327	2010-08-15	12
-242	val_242	2010-08-15	12
-369	val_369	2010-08-15	12
-392	val_392	2010-08-15	12
-272	val_272	2010-08-15	12
-331	val_331	2010-08-15	12
-401	val_401	2010-08-15	12
-242	val_242	2010-08-15	12
-452	val_452	2010-08-15	12
-177	val_177	2010-08-15	12
-226	val_226	2010-08-15	12
-5	val_5	2010-08-15	12
-497	val_497	2010-08-15	12
-402	val_402	2010-08-15	12
-396	val_396	2010-08-15	12
-317	val_317	2010-08-15	12
-395	val_395	2010-08-15	12
-58	val_58	2010-08-15	12
-35	val_35	2010-08-15	12
-336	val_336	2010-08-15	12
-95	val_95	2010-08-15	12
-11	val_11	2010-08-15	12
-168	val_168	2010-08-15	12
-34	val_34	2010-08-15	12
-229	val_229	2010-08-15	12
-233	val_233	2010-08-15	12
-143	val_143	2010-08-15	12
-472	val_472	2010-08-15	12
-322	val_322	2010-08-15	12
-498	val_498	2010-08-15	12
-160	val_160	2010-08-15	12
-195	val_195	2010-08-15	12
-42	val_42	2010-08-15	12
-321	val_321	2010-08-15	12
-430	val_430	2010-08-15	12
-119	val_119	2010-08-15	12
-489	val_489	2010-08-15	12
-458	val_458	2010-08-15	12
-78	val_78	2010-08-15	12
-76	val_76	2010-08-15	12
-41	val_41	2010-08-15	12
-223	val_223	2010-08-15	12
-492	val_492	2010-08-15	12
-149	val_149	2010-08-15	12
-449	val_449	2010-08-15	12
-218	val_218	2010-08-15	12
-228	val_228	2010-08-15	12
-138	val_138	2010-08-15	12
-453	val_453	2010-08-15	12
-30	val_30	2010-08-15	12
-209	val_209	2010-08-15	12
-64	val_64	2010-08-15	12
-468	val_468	2010-08-15	12
-76	val_76	2010-08-15	12
-74	val_74	2010-08-15	12
-342	val_342	2010-08-15	12
-69	val_69	2010-08-15	12
-230	val_230	2010-08-15	12
-33	val_33	2010-08-15	12
-368	val_368	2010-08-15	12
-103	val_103	2010-08-15	12
-296	val_296	2010-08-15	12
-113	val_113	2010-08-15	12
-216	val_216	2010-08-15	12
-367	val_367	2010-08-15	12
-344	val_344	2010-08-15	12
-167	val_167	2010-08-15	12
-274	val_274	2010-08-15	12
-219	val_219	2010-08-15	12
-239	val_239	2010-08-15	12
-485	val_485	2010-08-15	12
-116	val_116	2010-08-15	12
-223	val_223	2010-08-15	12
-256	val_256	2010-08-15	12
-263	val_263	2010-08-15	12
-70	val_70	2010-08-15	12
-487	val_487	2010-08-15	12
-480	val_480	2010-08-15	12
-401	val_401	2010-08-15	12
-288	val_288	2010-08-15	12
-191	val_191	2010-08-15	12
-5	val_5	2010-08-15	12
-244	val_244	2010-08-15	12
-438	val_438	2010-08-15	12
-128	val_128	2010-08-15	12
-467	val_467	2010-08-15	12
-432	val_432	2010-08-15	12
-202	val_202	2010-08-15	12
-316	val_316	2010-08-15	12
-229	val_229	2010-08-15	12
-469	val_469	2010-08-15	12
-463	val_463	2010-08-15	12
-280	val_280	2010-08-15	12
-2	val_2	2010-08-15	12
-35	val_35	2010-08-15	12
-283	val_283	2010-08-15	12
-331	val_331	2010-08-15	12
-235	val_235	2010-08-15	12
-80	val_80	2010-08-15	12
-44	val_44	2010-08-15	12
-193	val_193	2010-08-15	12
-321	val_321	2010-08-15	12
-335	val_335	2010-08-15	12
-104	val_104	2010-08-15	12
-466	val_466	2010-08-15	12
-366	val_366	2010-08-15	12
-175	val_175	2010-08-15	12
-403	val_403	2010-08-15	12
-483	val_483	2010-08-15	12
-53	val_53	2010-08-15	12
-105	val_105	2010-08-15	12
-257	val_257	2010-08-15	12
-406	val_406	2010-08-15	12
-409	val_409	2010-08-15	12
-190	val_190	2010-08-15	12
-406	val_406	2010-08-15	12
-401	val_401	2010-08-15	12
-114	val_114	2010-08-15	12
-258	val_258	2010-08-15	12
-90	val_90	2010-08-15	12
-203	val_203	2010-08-15	12
-262	val_262	2010-08-15	12
-348	val_348	2010-08-15	12
-424	val_424	2010-08-15	12
-12	val_12	2010-08-15	12
-396	val_396	2010-08-15	12
-201	val_201	2010-08-15	12
-217	val_217	2010-08-15	12
-164	val_164	2010-08-15	12
-431	val_431	2010-08-15	12
-454	val_454	2010-08-15	12
-478	val_478	2010-08-15	12
-298	val_298	2010-08-15	12
-125	val_125	2010-08-15	12
-431	val_431	2010-08-15	12
-164	val_164	2010-08-15	12
-424	val_424	2010-08-15	12
-187	val_187	2010-08-15	12
-382	val_382	2010-08-15	12
-5	val_5	2010-08-15	12
-70	val_70	2010-08-15	12
-397	val_397	2010-08-15	12
-480	val_480	2010-08-15	12
-291	val_291	2010-08-15	12
-24	val_24	2010-08-15	12
-351	val_351	2010-08-15	12
-255	val_255	2010-08-15	12
-104	val_104	2010-08-15	12
-70	val_70	2010-08-15	12
-163	val_163	2010-08-15	12
-438	val_438	2010-08-15	12
-119	val_119	2010-08-15	12
-414	val_414	2010-08-15	12
-200	val_200	2010-08-15	12
-491	val_491	2010-08-15	12
-237	val_237	2010-08-15	12
-439	val_439	2010-08-15	12
-360	val_360	2010-08-15	12
-248	val_248	2010-08-15	12
-479	val_479	2010-08-15	12
-305	val_305	2010-08-15	12
-417	val_417	2010-08-15	12
-199	val_199	2010-08-15	12
-444	val_444	2010-08-15	12
-120	val_120	2010-08-15	12
-429	val_429	2010-08-15	12
-169	val_169	2010-08-15	12
-443	val_443	2010-08-15	12
-323	val_323	2010-08-15	12
-325	val_325	2010-08-15	12
-277	val_277	2010-08-15	12
-230	val_230	2010-08-15	12
-478	val_478	2010-08-15	12
-178	val_178	2010-08-15	12
-468	val_468	2010-08-15	12
-310	val_310	2010-08-15	12
-317	val_317	2010-08-15	12
-333	val_333	2010-08-15	12
-493	val_493	2010-08-15	12
-460	val_460	2010-08-15	12
-207	val_207	2010-08-15	12
-249	val_249	2010-08-15	12
-265	val_265	2010-08-15	12
-480	val_480	2010-08-15	12
-83	val_83	2010-08-15	12
-136	val_136	2010-08-15	12
-353	val_353	2010-08-15	12
-172	val_172	2010-08-15	12
-214	val_214	2010-08-15	12
-462	val_462	2010-08-15	12
-233	val_233	2010-08-15	12
-406	val_406	2010-08-15	12
-133	val_133	2010-08-15	12
-175	val_175	2010-08-15	12
-189	val_189	2010-08-15	12
-454	val_454	2010-08-15	12
-375	val_375	2010-08-15	12
-401	val_401	2010-08-15	12
-421	val_421	2010-08-15	12
-407	val_407	2010-08-15	12
-384	val_384	2010-08-15	12
-256	val_256	2010-08-15	12
-26	val_26	2010-08-15	12
-134	val_134	2010-08-15	12
-67	val_67	2010-08-15	12
-384	val_384	2010-08-15	12
-379	val_379	2010-08-15	12
-18	val_18	2010-08-15	12
-462	val_462	2010-08-15	12
-492	val_492	2010-08-15	12
-100	val_100	2010-08-15	12
-298	val_298	2010-08-15	12
-9	val_9	2010-08-15	12
-341	val_341	2010-08-15	12
-498	val_498	2010-08-15	12
-146	val_146	2010-08-15	12
-458	val_458	2010-08-15	12
-362	val_362	2010-08-15	12
-186	val_186	2010-08-15	12
-285	val_285	2010-08-15	12
-348	val_348	2010-08-15	12
-167	val_167	2010-08-15	12
-18	val_18	2010-08-15	12
-273	val_273	2010-08-15	12
-183	val_183	2010-08-15	12
-281	val_281	2010-08-15	12
-344	val_344	2010-08-15	12
-97	val_97	2010-08-15	12
-469	val_469	2010-08-15	12
-315	val_315	2010-08-15	12
-84	val_84	2010-08-15	12
-28	val_28	2010-08-15	12
-37	val_37	2010-08-15	12
-448	val_448	2010-08-15	12
-152	val_152	2010-08-15	12
-348	val_348	2010-08-15	12
-307	val_307	2010-08-15	12
-194	val_194	2010-08-15	12
-414	val_414	2010-08-15	12
-477	val_477	2010-08-15	12
-222	val_222	2010-08-15	12
-126	val_126	2010-08-15	12
-90	val_90	2010-08-15	12
-169	val_169	2010-08-15	12
-403	val_403	2010-08-15	12
-400	val_400	2010-08-15	12
-200	val_200	2010-08-15	12
-97	val_97	2010-08-15	12
diff --git a/sql/hive/src/test/resources/golden/merge4-9-ad3dc168c8b6f048717e39ab16b0a319 b/sql/hive/src/test/resources/golden/merge4-9-ad3dc168c8b6f048717e39ab16b0a319
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-0-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-0-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-0-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-1-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/merge_dynamic_partition-1-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-1-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-10-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/merge_dynamic_partition-10-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-10-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-11-a49c9ee01ce8858a5f00c05523329200 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-11-a49c9ee01ce8858a5f00c05523329200
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-11-a49c9ee01ce8858a5f00c05523329200
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-12-dbf4eae8430a97a6e70b1c6222218cd3 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-12-dbf4eae8430a97a6e70b1c6222218cd3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-13-16adcdb0e324ad233769e124b5b349da b/sql/hive/src/test/resources/golden/merge_dynamic_partition-13-16adcdb0e324ad233769e124b5b349da
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-14-79da9a1ce5c2d058b924387ac9fcde92 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-14-79da9a1ce5c2d058b924387ac9fcde92
deleted file mode 100644
index a4c81ff9f99cd..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-14-79da9a1ce5c2d058b924387ac9fcde92
+++ /dev/null
@@ -1,500 +0,0 @@
-0	val_0	2008-04-08	11
-0	val_0	2008-04-08	11
-0	val_0	2008-04-08	11
-2	val_2	2008-04-08	11
-4	val_4	2008-04-08	11
-5	val_5	2008-04-08	11
-5	val_5	2008-04-08	11
-5	val_5	2008-04-08	11
-8	val_8	2008-04-08	11
-9	val_9	2008-04-08	11
-10	val_10	2008-04-08	11
-11	val_11	2008-04-08	11
-12	val_12	2008-04-08	11
-12	val_12	2008-04-08	11
-15	val_15	2008-04-08	11
-15	val_15	2008-04-08	11
-17	val_17	2008-04-08	11
-18	val_18	2008-04-08	11
-18	val_18	2008-04-08	11
-19	val_19	2008-04-08	11
-20	val_20	2008-04-08	11
-24	val_24	2008-04-08	11
-24	val_24	2008-04-08	11
-26	val_26	2008-04-08	11
-26	val_26	2008-04-08	11
-27	val_27	2008-04-08	11
-28	val_28	2008-04-08	11
-30	val_30	2008-04-08	11
-33	val_33	2008-04-08	11
-34	val_34	2008-04-08	11
-35	val_35	2008-04-08	11
-35	val_35	2008-04-08	11
-35	val_35	2008-04-08	11
-37	val_37	2008-04-08	11
-37	val_37	2008-04-08	11
-41	val_41	2008-04-08	11
-42	val_42	2008-04-08	11
-42	val_42	2008-04-08	11
-43	val_43	2008-04-08	11
-44	val_44	2008-04-08	11
-47	val_47	2008-04-08	11
-51	val_51	2008-04-08	11
-51	val_51	2008-04-08	11
-53	val_53	2008-04-08	11
-54	val_54	2008-04-08	11
-57	val_57	2008-04-08	11
-58	val_58	2008-04-08	11
-58	val_58	2008-04-08	11
-64	val_64	2008-04-08	11
-65	val_65	2008-04-08	11
-66	val_66	2008-04-08	11
-67	val_67	2008-04-08	11
-67	val_67	2008-04-08	11
-69	val_69	2008-04-08	11
-70	val_70	2008-04-08	11
-70	val_70	2008-04-08	11
-70	val_70	2008-04-08	11
-72	val_72	2008-04-08	11
-72	val_72	2008-04-08	11
-74	val_74	2008-04-08	11
-76	val_76	2008-04-08	11
-76	val_76	2008-04-08	11
-77	val_77	2008-04-08	11
-78	val_78	2008-04-08	11
-80	val_80	2008-04-08	11
-82	val_82	2008-04-08	11
-83	val_83	2008-04-08	11
-83	val_83	2008-04-08	11
-84	val_84	2008-04-08	11
-84	val_84	2008-04-08	11
-85	val_85	2008-04-08	11
-86	val_86	2008-04-08	11
-87	val_87	2008-04-08	11
-90	val_90	2008-04-08	11
-90	val_90	2008-04-08	11
-90	val_90	2008-04-08	11
-92	val_92	2008-04-08	11
-95	val_95	2008-04-08	11
-95	val_95	2008-04-08	11
-96	val_96	2008-04-08	11
-97	val_97	2008-04-08	11
-97	val_97	2008-04-08	11
-98	val_98	2008-04-08	11
-98	val_98	2008-04-08	11
-100	val_100	2008-04-08	11
-100	val_100	2008-04-08	11
-103	val_103	2008-04-08	11
-103	val_103	2008-04-08	11
-104	val_104	2008-04-08	11
-104	val_104	2008-04-08	11
-105	val_105	2008-04-08	11
-111	val_111	2008-04-08	11
-113	val_113	2008-04-08	11
-113	val_113	2008-04-08	11
-114	val_114	2008-04-08	11
-116	val_116	2008-04-08	11
-118	val_118	2008-04-08	11
-118	val_118	2008-04-08	11
-119	val_119	2008-04-08	11
-119	val_119	2008-04-08	11
-119	val_119	2008-04-08	11
-120	val_120	2008-04-08	11
-120	val_120	2008-04-08	11
-125	val_125	2008-04-08	11
-125	val_125	2008-04-08	11
-126	val_126	2008-04-08	11
-128	val_128	2008-04-08	11
-128	val_128	2008-04-08	11
-128	val_128	2008-04-08	11
-129	val_129	2008-04-08	11
-129	val_129	2008-04-08	11
-131	val_131	2008-04-08	11
-133	val_133	2008-04-08	11
-134	val_134	2008-04-08	11
-134	val_134	2008-04-08	11
-136	val_136	2008-04-08	11
-137	val_137	2008-04-08	11
-137	val_137	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-143	val_143	2008-04-08	11
-145	val_145	2008-04-08	11
-146	val_146	2008-04-08	11
-146	val_146	2008-04-08	11
-149	val_149	2008-04-08	11
-149	val_149	2008-04-08	11
-150	val_150	2008-04-08	11
-152	val_152	2008-04-08	11
-152	val_152	2008-04-08	11
-153	val_153	2008-04-08	11
-155	val_155	2008-04-08	11
-156	val_156	2008-04-08	11
-157	val_157	2008-04-08	11
-158	val_158	2008-04-08	11
-160	val_160	2008-04-08	11
-162	val_162	2008-04-08	11
-163	val_163	2008-04-08	11
-164	val_164	2008-04-08	11
-164	val_164	2008-04-08	11
-165	val_165	2008-04-08	11
-165	val_165	2008-04-08	11
-166	val_166	2008-04-08	11
-167	val_167	2008-04-08	11
-167	val_167	2008-04-08	11
-167	val_167	2008-04-08	11
-168	val_168	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-170	val_170	2008-04-08	11
-172	val_172	2008-04-08	11
-172	val_172	2008-04-08	11
-174	val_174	2008-04-08	11
-174	val_174	2008-04-08	11
-175	val_175	2008-04-08	11
-175	val_175	2008-04-08	11
-176	val_176	2008-04-08	11
-176	val_176	2008-04-08	11
-177	val_177	2008-04-08	11
-178	val_178	2008-04-08	11
-179	val_179	2008-04-08	11
-179	val_179	2008-04-08	11
-180	val_180	2008-04-08	11
-181	val_181	2008-04-08	11
-183	val_183	2008-04-08	11
-186	val_186	2008-04-08	11
-187	val_187	2008-04-08	11
-187	val_187	2008-04-08	11
-187	val_187	2008-04-08	11
-189	val_189	2008-04-08	11
-190	val_190	2008-04-08	11
-191	val_191	2008-04-08	11
-191	val_191	2008-04-08	11
-192	val_192	2008-04-08	11
-193	val_193	2008-04-08	11
-193	val_193	2008-04-08	11
-193	val_193	2008-04-08	11
-194	val_194	2008-04-08	11
-195	val_195	2008-04-08	11
-195	val_195	2008-04-08	11
-196	val_196	2008-04-08	11
-197	val_197	2008-04-08	11
-197	val_197	2008-04-08	11
-199	val_199	2008-04-08	11
-199	val_199	2008-04-08	11
-199	val_199	2008-04-08	11
-200	val_200	2008-04-08	11
-200	val_200	2008-04-08	11
-201	val_201	2008-04-08	11
-202	val_202	2008-04-08	11
-203	val_203	2008-04-08	11
-203	val_203	2008-04-08	11
-205	val_205	2008-04-08	11
-205	val_205	2008-04-08	11
-207	val_207	2008-04-08	11
-207	val_207	2008-04-08	11
-208	val_208	2008-04-08	11
-208	val_208	2008-04-08	11
-208	val_208	2008-04-08	11
-209	val_209	2008-04-08	11
-209	val_209	2008-04-08	11
-213	val_213	2008-04-08	11
-213	val_213	2008-04-08	11
-214	val_214	2008-04-08	11
-216	val_216	2008-04-08	11
-216	val_216	2008-04-08	11
-217	val_217	2008-04-08	11
-217	val_217	2008-04-08	11
-218	val_218	2008-04-08	11
-219	val_219	2008-04-08	11
-219	val_219	2008-04-08	11
-221	val_221	2008-04-08	11
-221	val_221	2008-04-08	11
-222	val_222	2008-04-08	11
-223	val_223	2008-04-08	11
-223	val_223	2008-04-08	11
-224	val_224	2008-04-08	11
-224	val_224	2008-04-08	11
-226	val_226	2008-04-08	11
-228	val_228	2008-04-08	11
-229	val_229	2008-04-08	11
-229	val_229	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-233	val_233	2008-04-08	11
-233	val_233	2008-04-08	11
-235	val_235	2008-04-08	11
-237	val_237	2008-04-08	11
-237	val_237	2008-04-08	11
-238	val_238	2008-04-08	11
-238	val_238	2008-04-08	11
-239	val_239	2008-04-08	11
-239	val_239	2008-04-08	11
-241	val_241	2008-04-08	11
-242	val_242	2008-04-08	11
-242	val_242	2008-04-08	11
-244	val_244	2008-04-08	11
-247	val_247	2008-04-08	11
-248	val_248	2008-04-08	11
-249	val_249	2008-04-08	11
-252	val_252	2008-04-08	11
-255	val_255	2008-04-08	11
-255	val_255	2008-04-08	11
-256	val_256	2008-04-08	11
-256	val_256	2008-04-08	11
-257	val_257	2008-04-08	11
-258	val_258	2008-04-08	11
-260	val_260	2008-04-08	11
-262	val_262	2008-04-08	11
-263	val_263	2008-04-08	11
-265	val_265	2008-04-08	11
-265	val_265	2008-04-08	11
-266	val_266	2008-04-08	11
-272	val_272	2008-04-08	11
-272	val_272	2008-04-08	11
-273	val_273	2008-04-08	11
-273	val_273	2008-04-08	11
-273	val_273	2008-04-08	11
-274	val_274	2008-04-08	11
-275	val_275	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-278	val_278	2008-04-08	11
-278	val_278	2008-04-08	11
-280	val_280	2008-04-08	11
-280	val_280	2008-04-08	11
-281	val_281	2008-04-08	11
-281	val_281	2008-04-08	11
-282	val_282	2008-04-08	11
-282	val_282	2008-04-08	11
-283	val_283	2008-04-08	11
-284	val_284	2008-04-08	11
-285	val_285	2008-04-08	11
-286	val_286	2008-04-08	11
-287	val_287	2008-04-08	11
-288	val_288	2008-04-08	11
-288	val_288	2008-04-08	11
-289	val_289	2008-04-08	11
-291	val_291	2008-04-08	11
-292	val_292	2008-04-08	11
-296	val_296	2008-04-08	11
-298	val_298	2008-04-08	11
-298	val_298	2008-04-08	11
-298	val_298	2008-04-08	11
-302	val_302	2008-04-08	11
-305	val_305	2008-04-08	11
-306	val_306	2008-04-08	11
-307	val_307	2008-04-08	11
-307	val_307	2008-04-08	11
-308	val_308	2008-04-08	11
-309	val_309	2008-04-08	11
-309	val_309	2008-04-08	11
-310	val_310	2008-04-08	11
-311	val_311	2008-04-08	11
-311	val_311	2008-04-08	11
-311	val_311	2008-04-08	11
-315	val_315	2008-04-08	11
-316	val_316	2008-04-08	11
-316	val_316	2008-04-08	11
-316	val_316	2008-04-08	11
-317	val_317	2008-04-08	11
-317	val_317	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-321	val_321	2008-04-08	11
-321	val_321	2008-04-08	11
-322	val_322	2008-04-08	11
-322	val_322	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-325	val_325	2008-04-08	11
-327	val_327	2008-04-08	11
-327	val_327	2008-04-08	11
-327	val_327	2008-04-08	11
-331	val_331	2008-04-08	11
-331	val_331	2008-04-08	11
-332	val_332	2008-04-08	11
-333	val_333	2008-04-08	11
-333	val_333	2008-04-08	11
-335	val_335	2008-04-08	11
-336	val_336	2008-04-08	11
-338	val_338	2008-04-08	11
-339	val_339	2008-04-08	11
-341	val_341	2008-04-08	11
-342	val_342	2008-04-08	11
-342	val_342	2008-04-08	11
-344	val_344	2008-04-08	11
-344	val_344	2008-04-08	11
-345	val_345	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-351	val_351	2008-04-08	11
-353	val_353	2008-04-08	11
-353	val_353	2008-04-08	11
-356	val_356	2008-04-08	11
-360	val_360	2008-04-08	11
-362	val_362	2008-04-08	11
-364	val_364	2008-04-08	11
-365	val_365	2008-04-08	11
-366	val_366	2008-04-08	11
-367	val_367	2008-04-08	11
-367	val_367	2008-04-08	11
-368	val_368	2008-04-08	11
-369	val_369	2008-04-08	11
-369	val_369	2008-04-08	11
-369	val_369	2008-04-08	11
-373	val_373	2008-04-08	11
-374	val_374	2008-04-08	11
-375	val_375	2008-04-08	11
-377	val_377	2008-04-08	11
-378	val_378	2008-04-08	11
-379	val_379	2008-04-08	11
-382	val_382	2008-04-08	11
-382	val_382	2008-04-08	11
-384	val_384	2008-04-08	11
-384	val_384	2008-04-08	11
-384	val_384	2008-04-08	11
-386	val_386	2008-04-08	11
-389	val_389	2008-04-08	11
-392	val_392	2008-04-08	11
-393	val_393	2008-04-08	11
-394	val_394	2008-04-08	11
-395	val_395	2008-04-08	11
-395	val_395	2008-04-08	11
-396	val_396	2008-04-08	11
-396	val_396	2008-04-08	11
-396	val_396	2008-04-08	11
-397	val_397	2008-04-08	11
-397	val_397	2008-04-08	11
-399	val_399	2008-04-08	11
-399	val_399	2008-04-08	11
-400	val_400	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-402	val_402	2008-04-08	11
-403	val_403	2008-04-08	11
-403	val_403	2008-04-08	11
-403	val_403	2008-04-08	11
-404	val_404	2008-04-08	11
-404	val_404	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-407	val_407	2008-04-08	11
-409	val_409	2008-04-08	11
-409	val_409	2008-04-08	11
-409	val_409	2008-04-08	11
-411	val_411	2008-04-08	11
-413	val_413	2008-04-08	11
-413	val_413	2008-04-08	11
-414	val_414	2008-04-08	11
-414	val_414	2008-04-08	11
-417	val_417	2008-04-08	11
-417	val_417	2008-04-08	11
-417	val_417	2008-04-08	11
-418	val_418	2008-04-08	11
-419	val_419	2008-04-08	11
-421	val_421	2008-04-08	11
-424	val_424	2008-04-08	11
-424	val_424	2008-04-08	11
-427	val_427	2008-04-08	11
-429	val_429	2008-04-08	11
-429	val_429	2008-04-08	11
-430	val_430	2008-04-08	11
-430	val_430	2008-04-08	11
-430	val_430	2008-04-08	11
-431	val_431	2008-04-08	11
-431	val_431	2008-04-08	11
-431	val_431	2008-04-08	11
-432	val_432	2008-04-08	11
-435	val_435	2008-04-08	11
-436	val_436	2008-04-08	11
-437	val_437	2008-04-08	11
-438	val_438	2008-04-08	11
-438	val_438	2008-04-08	11
-438	val_438	2008-04-08	11
-439	val_439	2008-04-08	11
-439	val_439	2008-04-08	11
-443	val_443	2008-04-08	11
-444	val_444	2008-04-08	11
-446	val_446	2008-04-08	11
-448	val_448	2008-04-08	11
-449	val_449	2008-04-08	11
-452	val_452	2008-04-08	11
-453	val_453	2008-04-08	11
-454	val_454	2008-04-08	11
-454	val_454	2008-04-08	11
-454	val_454	2008-04-08	11
-455	val_455	2008-04-08	11
-457	val_457	2008-04-08	11
-458	val_458	2008-04-08	11
-458	val_458	2008-04-08	11
-459	val_459	2008-04-08	11
-459	val_459	2008-04-08	11
-460	val_460	2008-04-08	11
-462	val_462	2008-04-08	11
-462	val_462	2008-04-08	11
-463	val_463	2008-04-08	11
-463	val_463	2008-04-08	11
-466	val_466	2008-04-08	11
-466	val_466	2008-04-08	11
-466	val_466	2008-04-08	11
-467	val_467	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-470	val_470	2008-04-08	11
-472	val_472	2008-04-08	11
-475	val_475	2008-04-08	11
-477	val_477	2008-04-08	11
-478	val_478	2008-04-08	11
-478	val_478	2008-04-08	11
-479	val_479	2008-04-08	11
-480	val_480	2008-04-08	11
-480	val_480	2008-04-08	11
-480	val_480	2008-04-08	11
-481	val_481	2008-04-08	11
-482	val_482	2008-04-08	11
-483	val_483	2008-04-08	11
-484	val_484	2008-04-08	11
-485	val_485	2008-04-08	11
-487	val_487	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-490	val_490	2008-04-08	11
-491	val_491	2008-04-08	11
-492	val_492	2008-04-08	11
-492	val_492	2008-04-08	11
-493	val_493	2008-04-08	11
-494	val_494	2008-04-08	11
-495	val_495	2008-04-08	11
-496	val_496	2008-04-08	11
-497	val_497	2008-04-08	11
-498	val_498	2008-04-08	11
-498	val_498	2008-04-08	11
-498	val_498	2008-04-08	11
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-15-d60297fed03b455c29daa4afb4d1e858 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-15-d60297fed03b455c29daa4afb4d1e858
deleted file mode 100644
index d66ce5d097ce6..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-15-d60297fed03b455c29daa4afb4d1e858
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:merge_dynamic_part
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1431818582215388621/merge_dynamic_part
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string ds, string hr}
-totalNumberFiles:4
-totalFileSize:5812
-maxFileSize:1612
-minFileSize:1358
-lastAccessTime:0
-lastUpdateTime:1389738875000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-16-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/merge_dynamic_partition-16-86a409d8b868dc5f1a3bd1e04c2bc28c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-16-86a409d8b868dc5f1a3bd1e04c2bc28c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-17-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/merge_dynamic_partition-17-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-17-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-18-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-18-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-18-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-19-a49c9ee01ce8858a5f00c05523329200 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-19-a49c9ee01ce8858a5f00c05523329200
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-19-a49c9ee01ce8858a5f00c05523329200
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-2-190cefc93e46906e404039de0fd5f513 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-2-190cefc93e46906e404039de0fd5f513
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-20-d295db835d4fdeea34298702295ff7c5 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-20-d295db835d4fdeea34298702295ff7c5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-21-9b9493a68ef7663e95ad86d02c45ec88 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-21-9b9493a68ef7663e95ad86d02c45ec88
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-22-79da9a1ce5c2d058b924387ac9fcde92 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-22-79da9a1ce5c2d058b924387ac9fcde92
deleted file mode 100644
index a4c81ff9f99cd..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-22-79da9a1ce5c2d058b924387ac9fcde92
+++ /dev/null
@@ -1,500 +0,0 @@
-0	val_0	2008-04-08	11
-0	val_0	2008-04-08	11
-0	val_0	2008-04-08	11
-2	val_2	2008-04-08	11
-4	val_4	2008-04-08	11
-5	val_5	2008-04-08	11
-5	val_5	2008-04-08	11
-5	val_5	2008-04-08	11
-8	val_8	2008-04-08	11
-9	val_9	2008-04-08	11
-10	val_10	2008-04-08	11
-11	val_11	2008-04-08	11
-12	val_12	2008-04-08	11
-12	val_12	2008-04-08	11
-15	val_15	2008-04-08	11
-15	val_15	2008-04-08	11
-17	val_17	2008-04-08	11
-18	val_18	2008-04-08	11
-18	val_18	2008-04-08	11
-19	val_19	2008-04-08	11
-20	val_20	2008-04-08	11
-24	val_24	2008-04-08	11
-24	val_24	2008-04-08	11
-26	val_26	2008-04-08	11
-26	val_26	2008-04-08	11
-27	val_27	2008-04-08	11
-28	val_28	2008-04-08	11
-30	val_30	2008-04-08	11
-33	val_33	2008-04-08	11
-34	val_34	2008-04-08	11
-35	val_35	2008-04-08	11
-35	val_35	2008-04-08	11
-35	val_35	2008-04-08	11
-37	val_37	2008-04-08	11
-37	val_37	2008-04-08	11
-41	val_41	2008-04-08	11
-42	val_42	2008-04-08	11
-42	val_42	2008-04-08	11
-43	val_43	2008-04-08	11
-44	val_44	2008-04-08	11
-47	val_47	2008-04-08	11
-51	val_51	2008-04-08	11
-51	val_51	2008-04-08	11
-53	val_53	2008-04-08	11
-54	val_54	2008-04-08	11
-57	val_57	2008-04-08	11
-58	val_58	2008-04-08	11
-58	val_58	2008-04-08	11
-64	val_64	2008-04-08	11
-65	val_65	2008-04-08	11
-66	val_66	2008-04-08	11
-67	val_67	2008-04-08	11
-67	val_67	2008-04-08	11
-69	val_69	2008-04-08	11
-70	val_70	2008-04-08	11
-70	val_70	2008-04-08	11
-70	val_70	2008-04-08	11
-72	val_72	2008-04-08	11
-72	val_72	2008-04-08	11
-74	val_74	2008-04-08	11
-76	val_76	2008-04-08	11
-76	val_76	2008-04-08	11
-77	val_77	2008-04-08	11
-78	val_78	2008-04-08	11
-80	val_80	2008-04-08	11
-82	val_82	2008-04-08	11
-83	val_83	2008-04-08	11
-83	val_83	2008-04-08	11
-84	val_84	2008-04-08	11
-84	val_84	2008-04-08	11
-85	val_85	2008-04-08	11
-86	val_86	2008-04-08	11
-87	val_87	2008-04-08	11
-90	val_90	2008-04-08	11
-90	val_90	2008-04-08	11
-90	val_90	2008-04-08	11
-92	val_92	2008-04-08	11
-95	val_95	2008-04-08	11
-95	val_95	2008-04-08	11
-96	val_96	2008-04-08	11
-97	val_97	2008-04-08	11
-97	val_97	2008-04-08	11
-98	val_98	2008-04-08	11
-98	val_98	2008-04-08	11
-100	val_100	2008-04-08	11
-100	val_100	2008-04-08	11
-103	val_103	2008-04-08	11
-103	val_103	2008-04-08	11
-104	val_104	2008-04-08	11
-104	val_104	2008-04-08	11
-105	val_105	2008-04-08	11
-111	val_111	2008-04-08	11
-113	val_113	2008-04-08	11
-113	val_113	2008-04-08	11
-114	val_114	2008-04-08	11
-116	val_116	2008-04-08	11
-118	val_118	2008-04-08	11
-118	val_118	2008-04-08	11
-119	val_119	2008-04-08	11
-119	val_119	2008-04-08	11
-119	val_119	2008-04-08	11
-120	val_120	2008-04-08	11
-120	val_120	2008-04-08	11
-125	val_125	2008-04-08	11
-125	val_125	2008-04-08	11
-126	val_126	2008-04-08	11
-128	val_128	2008-04-08	11
-128	val_128	2008-04-08	11
-128	val_128	2008-04-08	11
-129	val_129	2008-04-08	11
-129	val_129	2008-04-08	11
-131	val_131	2008-04-08	11
-133	val_133	2008-04-08	11
-134	val_134	2008-04-08	11
-134	val_134	2008-04-08	11
-136	val_136	2008-04-08	11
-137	val_137	2008-04-08	11
-137	val_137	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-143	val_143	2008-04-08	11
-145	val_145	2008-04-08	11
-146	val_146	2008-04-08	11
-146	val_146	2008-04-08	11
-149	val_149	2008-04-08	11
-149	val_149	2008-04-08	11
-150	val_150	2008-04-08	11
-152	val_152	2008-04-08	11
-152	val_152	2008-04-08	11
-153	val_153	2008-04-08	11
-155	val_155	2008-04-08	11
-156	val_156	2008-04-08	11
-157	val_157	2008-04-08	11
-158	val_158	2008-04-08	11
-160	val_160	2008-04-08	11
-162	val_162	2008-04-08	11
-163	val_163	2008-04-08	11
-164	val_164	2008-04-08	11
-164	val_164	2008-04-08	11
-165	val_165	2008-04-08	11
-165	val_165	2008-04-08	11
-166	val_166	2008-04-08	11
-167	val_167	2008-04-08	11
-167	val_167	2008-04-08	11
-167	val_167	2008-04-08	11
-168	val_168	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-170	val_170	2008-04-08	11
-172	val_172	2008-04-08	11
-172	val_172	2008-04-08	11
-174	val_174	2008-04-08	11
-174	val_174	2008-04-08	11
-175	val_175	2008-04-08	11
-175	val_175	2008-04-08	11
-176	val_176	2008-04-08	11
-176	val_176	2008-04-08	11
-177	val_177	2008-04-08	11
-178	val_178	2008-04-08	11
-179	val_179	2008-04-08	11
-179	val_179	2008-04-08	11
-180	val_180	2008-04-08	11
-181	val_181	2008-04-08	11
-183	val_183	2008-04-08	11
-186	val_186	2008-04-08	11
-187	val_187	2008-04-08	11
-187	val_187	2008-04-08	11
-187	val_187	2008-04-08	11
-189	val_189	2008-04-08	11
-190	val_190	2008-04-08	11
-191	val_191	2008-04-08	11
-191	val_191	2008-04-08	11
-192	val_192	2008-04-08	11
-193	val_193	2008-04-08	11
-193	val_193	2008-04-08	11
-193	val_193	2008-04-08	11
-194	val_194	2008-04-08	11
-195	val_195	2008-04-08	11
-195	val_195	2008-04-08	11
-196	val_196	2008-04-08	11
-197	val_197	2008-04-08	11
-197	val_197	2008-04-08	11
-199	val_199	2008-04-08	11
-199	val_199	2008-04-08	11
-199	val_199	2008-04-08	11
-200	val_200	2008-04-08	11
-200	val_200	2008-04-08	11
-201	val_201	2008-04-08	11
-202	val_202	2008-04-08	11
-203	val_203	2008-04-08	11
-203	val_203	2008-04-08	11
-205	val_205	2008-04-08	11
-205	val_205	2008-04-08	11
-207	val_207	2008-04-08	11
-207	val_207	2008-04-08	11
-208	val_208	2008-04-08	11
-208	val_208	2008-04-08	11
-208	val_208	2008-04-08	11
-209	val_209	2008-04-08	11
-209	val_209	2008-04-08	11
-213	val_213	2008-04-08	11
-213	val_213	2008-04-08	11
-214	val_214	2008-04-08	11
-216	val_216	2008-04-08	11
-216	val_216	2008-04-08	11
-217	val_217	2008-04-08	11
-217	val_217	2008-04-08	11
-218	val_218	2008-04-08	11
-219	val_219	2008-04-08	11
-219	val_219	2008-04-08	11
-221	val_221	2008-04-08	11
-221	val_221	2008-04-08	11
-222	val_222	2008-04-08	11
-223	val_223	2008-04-08	11
-223	val_223	2008-04-08	11
-224	val_224	2008-04-08	11
-224	val_224	2008-04-08	11
-226	val_226	2008-04-08	11
-228	val_228	2008-04-08	11
-229	val_229	2008-04-08	11
-229	val_229	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-233	val_233	2008-04-08	11
-233	val_233	2008-04-08	11
-235	val_235	2008-04-08	11
-237	val_237	2008-04-08	11
-237	val_237	2008-04-08	11
-238	val_238	2008-04-08	11
-238	val_238	2008-04-08	11
-239	val_239	2008-04-08	11
-239	val_239	2008-04-08	11
-241	val_241	2008-04-08	11
-242	val_242	2008-04-08	11
-242	val_242	2008-04-08	11
-244	val_244	2008-04-08	11
-247	val_247	2008-04-08	11
-248	val_248	2008-04-08	11
-249	val_249	2008-04-08	11
-252	val_252	2008-04-08	11
-255	val_255	2008-04-08	11
-255	val_255	2008-04-08	11
-256	val_256	2008-04-08	11
-256	val_256	2008-04-08	11
-257	val_257	2008-04-08	11
-258	val_258	2008-04-08	11
-260	val_260	2008-04-08	11
-262	val_262	2008-04-08	11
-263	val_263	2008-04-08	11
-265	val_265	2008-04-08	11
-265	val_265	2008-04-08	11
-266	val_266	2008-04-08	11
-272	val_272	2008-04-08	11
-272	val_272	2008-04-08	11
-273	val_273	2008-04-08	11
-273	val_273	2008-04-08	11
-273	val_273	2008-04-08	11
-274	val_274	2008-04-08	11
-275	val_275	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-278	val_278	2008-04-08	11
-278	val_278	2008-04-08	11
-280	val_280	2008-04-08	11
-280	val_280	2008-04-08	11
-281	val_281	2008-04-08	11
-281	val_281	2008-04-08	11
-282	val_282	2008-04-08	11
-282	val_282	2008-04-08	11
-283	val_283	2008-04-08	11
-284	val_284	2008-04-08	11
-285	val_285	2008-04-08	11
-286	val_286	2008-04-08	11
-287	val_287	2008-04-08	11
-288	val_288	2008-04-08	11
-288	val_288	2008-04-08	11
-289	val_289	2008-04-08	11
-291	val_291	2008-04-08	11
-292	val_292	2008-04-08	11
-296	val_296	2008-04-08	11
-298	val_298	2008-04-08	11
-298	val_298	2008-04-08	11
-298	val_298	2008-04-08	11
-302	val_302	2008-04-08	11
-305	val_305	2008-04-08	11
-306	val_306	2008-04-08	11
-307	val_307	2008-04-08	11
-307	val_307	2008-04-08	11
-308	val_308	2008-04-08	11
-309	val_309	2008-04-08	11
-309	val_309	2008-04-08	11
-310	val_310	2008-04-08	11
-311	val_311	2008-04-08	11
-311	val_311	2008-04-08	11
-311	val_311	2008-04-08	11
-315	val_315	2008-04-08	11
-316	val_316	2008-04-08	11
-316	val_316	2008-04-08	11
-316	val_316	2008-04-08	11
-317	val_317	2008-04-08	11
-317	val_317	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-321	val_321	2008-04-08	11
-321	val_321	2008-04-08	11
-322	val_322	2008-04-08	11
-322	val_322	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-325	val_325	2008-04-08	11
-327	val_327	2008-04-08	11
-327	val_327	2008-04-08	11
-327	val_327	2008-04-08	11
-331	val_331	2008-04-08	11
-331	val_331	2008-04-08	11
-332	val_332	2008-04-08	11
-333	val_333	2008-04-08	11
-333	val_333	2008-04-08	11
-335	val_335	2008-04-08	11
-336	val_336	2008-04-08	11
-338	val_338	2008-04-08	11
-339	val_339	2008-04-08	11
-341	val_341	2008-04-08	11
-342	val_342	2008-04-08	11
-342	val_342	2008-04-08	11
-344	val_344	2008-04-08	11
-344	val_344	2008-04-08	11
-345	val_345	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-351	val_351	2008-04-08	11
-353	val_353	2008-04-08	11
-353	val_353	2008-04-08	11
-356	val_356	2008-04-08	11
-360	val_360	2008-04-08	11
-362	val_362	2008-04-08	11
-364	val_364	2008-04-08	11
-365	val_365	2008-04-08	11
-366	val_366	2008-04-08	11
-367	val_367	2008-04-08	11
-367	val_367	2008-04-08	11
-368	val_368	2008-04-08	11
-369	val_369	2008-04-08	11
-369	val_369	2008-04-08	11
-369	val_369	2008-04-08	11
-373	val_373	2008-04-08	11
-374	val_374	2008-04-08	11
-375	val_375	2008-04-08	11
-377	val_377	2008-04-08	11
-378	val_378	2008-04-08	11
-379	val_379	2008-04-08	11
-382	val_382	2008-04-08	11
-382	val_382	2008-04-08	11
-384	val_384	2008-04-08	11
-384	val_384	2008-04-08	11
-384	val_384	2008-04-08	11
-386	val_386	2008-04-08	11
-389	val_389	2008-04-08	11
-392	val_392	2008-04-08	11
-393	val_393	2008-04-08	11
-394	val_394	2008-04-08	11
-395	val_395	2008-04-08	11
-395	val_395	2008-04-08	11
-396	val_396	2008-04-08	11
-396	val_396	2008-04-08	11
-396	val_396	2008-04-08	11
-397	val_397	2008-04-08	11
-397	val_397	2008-04-08	11
-399	val_399	2008-04-08	11
-399	val_399	2008-04-08	11
-400	val_400	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-402	val_402	2008-04-08	11
-403	val_403	2008-04-08	11
-403	val_403	2008-04-08	11
-403	val_403	2008-04-08	11
-404	val_404	2008-04-08	11
-404	val_404	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-407	val_407	2008-04-08	11
-409	val_409	2008-04-08	11
-409	val_409	2008-04-08	11
-409	val_409	2008-04-08	11
-411	val_411	2008-04-08	11
-413	val_413	2008-04-08	11
-413	val_413	2008-04-08	11
-414	val_414	2008-04-08	11
-414	val_414	2008-04-08	11
-417	val_417	2008-04-08	11
-417	val_417	2008-04-08	11
-417	val_417	2008-04-08	11
-418	val_418	2008-04-08	11
-419	val_419	2008-04-08	11
-421	val_421	2008-04-08	11
-424	val_424	2008-04-08	11
-424	val_424	2008-04-08	11
-427	val_427	2008-04-08	11
-429	val_429	2008-04-08	11
-429	val_429	2008-04-08	11
-430	val_430	2008-04-08	11
-430	val_430	2008-04-08	11
-430	val_430	2008-04-08	11
-431	val_431	2008-04-08	11
-431	val_431	2008-04-08	11
-431	val_431	2008-04-08	11
-432	val_432	2008-04-08	11
-435	val_435	2008-04-08	11
-436	val_436	2008-04-08	11
-437	val_437	2008-04-08	11
-438	val_438	2008-04-08	11
-438	val_438	2008-04-08	11
-438	val_438	2008-04-08	11
-439	val_439	2008-04-08	11
-439	val_439	2008-04-08	11
-443	val_443	2008-04-08	11
-444	val_444	2008-04-08	11
-446	val_446	2008-04-08	11
-448	val_448	2008-04-08	11
-449	val_449	2008-04-08	11
-452	val_452	2008-04-08	11
-453	val_453	2008-04-08	11
-454	val_454	2008-04-08	11
-454	val_454	2008-04-08	11
-454	val_454	2008-04-08	11
-455	val_455	2008-04-08	11
-457	val_457	2008-04-08	11
-458	val_458	2008-04-08	11
-458	val_458	2008-04-08	11
-459	val_459	2008-04-08	11
-459	val_459	2008-04-08	11
-460	val_460	2008-04-08	11
-462	val_462	2008-04-08	11
-462	val_462	2008-04-08	11
-463	val_463	2008-04-08	11
-463	val_463	2008-04-08	11
-466	val_466	2008-04-08	11
-466	val_466	2008-04-08	11
-466	val_466	2008-04-08	11
-467	val_467	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-470	val_470	2008-04-08	11
-472	val_472	2008-04-08	11
-475	val_475	2008-04-08	11
-477	val_477	2008-04-08	11
-478	val_478	2008-04-08	11
-478	val_478	2008-04-08	11
-479	val_479	2008-04-08	11
-480	val_480	2008-04-08	11
-480	val_480	2008-04-08	11
-480	val_480	2008-04-08	11
-481	val_481	2008-04-08	11
-482	val_482	2008-04-08	11
-483	val_483	2008-04-08	11
-484	val_484	2008-04-08	11
-485	val_485	2008-04-08	11
-487	val_487	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-490	val_490	2008-04-08	11
-491	val_491	2008-04-08	11
-492	val_492	2008-04-08	11
-492	val_492	2008-04-08	11
-493	val_493	2008-04-08	11
-494	val_494	2008-04-08	11
-495	val_495	2008-04-08	11
-496	val_496	2008-04-08	11
-497	val_497	2008-04-08	11
-498	val_498	2008-04-08	11
-498	val_498	2008-04-08	11
-498	val_498	2008-04-08	11
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-23-d60297fed03b455c29daa4afb4d1e858 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-23-d60297fed03b455c29daa4afb4d1e858
deleted file mode 100644
index 06444f372bd60..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-23-d60297fed03b455c29daa4afb4d1e858
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:merge_dynamic_part
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1431818582215388621/merge_dynamic_part
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string ds, string hr}
-totalNumberFiles:1
-totalFileSize:5812
-maxFileSize:5812
-minFileSize:5812
-lastAccessTime:0
-lastUpdateTime:1389738910000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-24-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/merge_dynamic_partition-24-86a409d8b868dc5f1a3bd1e04c2bc28c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-24-86a409d8b868dc5f1a3bd1e04c2bc28c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-25-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/merge_dynamic_partition-25-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-25-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-26-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-26-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-26-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-27-a49c9ee01ce8858a5f00c05523329200 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-27-a49c9ee01ce8858a5f00c05523329200
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-27-a49c9ee01ce8858a5f00c05523329200
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-28-ef7b35be7210f099d46448994d9dc605 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-28-ef7b35be7210f099d46448994d9dc605
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-29-876c8fa15a32ac5bfcc6cb60993f6a4d b/sql/hive/src/test/resources/golden/merge_dynamic_partition-29-876c8fa15a32ac5bfcc6cb60993f6a4d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-3-25401dd2c1c258e06f1b96fefd19e27f b/sql/hive/src/test/resources/golden/merge_dynamic_partition-3-25401dd2c1c258e06f1b96fefd19e27f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-30-79da9a1ce5c2d058b924387ac9fcde92 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-30-79da9a1ce5c2d058b924387ac9fcde92
deleted file mode 100644
index a4c81ff9f99cd..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-30-79da9a1ce5c2d058b924387ac9fcde92
+++ /dev/null
@@ -1,500 +0,0 @@
-0	val_0	2008-04-08	11
-0	val_0	2008-04-08	11
-0	val_0	2008-04-08	11
-2	val_2	2008-04-08	11
-4	val_4	2008-04-08	11
-5	val_5	2008-04-08	11
-5	val_5	2008-04-08	11
-5	val_5	2008-04-08	11
-8	val_8	2008-04-08	11
-9	val_9	2008-04-08	11
-10	val_10	2008-04-08	11
-11	val_11	2008-04-08	11
-12	val_12	2008-04-08	11
-12	val_12	2008-04-08	11
-15	val_15	2008-04-08	11
-15	val_15	2008-04-08	11
-17	val_17	2008-04-08	11
-18	val_18	2008-04-08	11
-18	val_18	2008-04-08	11
-19	val_19	2008-04-08	11
-20	val_20	2008-04-08	11
-24	val_24	2008-04-08	11
-24	val_24	2008-04-08	11
-26	val_26	2008-04-08	11
-26	val_26	2008-04-08	11
-27	val_27	2008-04-08	11
-28	val_28	2008-04-08	11
-30	val_30	2008-04-08	11
-33	val_33	2008-04-08	11
-34	val_34	2008-04-08	11
-35	val_35	2008-04-08	11
-35	val_35	2008-04-08	11
-35	val_35	2008-04-08	11
-37	val_37	2008-04-08	11
-37	val_37	2008-04-08	11
-41	val_41	2008-04-08	11
-42	val_42	2008-04-08	11
-42	val_42	2008-04-08	11
-43	val_43	2008-04-08	11
-44	val_44	2008-04-08	11
-47	val_47	2008-04-08	11
-51	val_51	2008-04-08	11
-51	val_51	2008-04-08	11
-53	val_53	2008-04-08	11
-54	val_54	2008-04-08	11
-57	val_57	2008-04-08	11
-58	val_58	2008-04-08	11
-58	val_58	2008-04-08	11
-64	val_64	2008-04-08	11
-65	val_65	2008-04-08	11
-66	val_66	2008-04-08	11
-67	val_67	2008-04-08	11
-67	val_67	2008-04-08	11
-69	val_69	2008-04-08	11
-70	val_70	2008-04-08	11
-70	val_70	2008-04-08	11
-70	val_70	2008-04-08	11
-72	val_72	2008-04-08	11
-72	val_72	2008-04-08	11
-74	val_74	2008-04-08	11
-76	val_76	2008-04-08	11
-76	val_76	2008-04-08	11
-77	val_77	2008-04-08	11
-78	val_78	2008-04-08	11
-80	val_80	2008-04-08	11
-82	val_82	2008-04-08	11
-83	val_83	2008-04-08	11
-83	val_83	2008-04-08	11
-84	val_84	2008-04-08	11
-84	val_84	2008-04-08	11
-85	val_85	2008-04-08	11
-86	val_86	2008-04-08	11
-87	val_87	2008-04-08	11
-90	val_90	2008-04-08	11
-90	val_90	2008-04-08	11
-90	val_90	2008-04-08	11
-92	val_92	2008-04-08	11
-95	val_95	2008-04-08	11
-95	val_95	2008-04-08	11
-96	val_96	2008-04-08	11
-97	val_97	2008-04-08	11
-97	val_97	2008-04-08	11
-98	val_98	2008-04-08	11
-98	val_98	2008-04-08	11
-100	val_100	2008-04-08	11
-100	val_100	2008-04-08	11
-103	val_103	2008-04-08	11
-103	val_103	2008-04-08	11
-104	val_104	2008-04-08	11
-104	val_104	2008-04-08	11
-105	val_105	2008-04-08	11
-111	val_111	2008-04-08	11
-113	val_113	2008-04-08	11
-113	val_113	2008-04-08	11
-114	val_114	2008-04-08	11
-116	val_116	2008-04-08	11
-118	val_118	2008-04-08	11
-118	val_118	2008-04-08	11
-119	val_119	2008-04-08	11
-119	val_119	2008-04-08	11
-119	val_119	2008-04-08	11
-120	val_120	2008-04-08	11
-120	val_120	2008-04-08	11
-125	val_125	2008-04-08	11
-125	val_125	2008-04-08	11
-126	val_126	2008-04-08	11
-128	val_128	2008-04-08	11
-128	val_128	2008-04-08	11
-128	val_128	2008-04-08	11
-129	val_129	2008-04-08	11
-129	val_129	2008-04-08	11
-131	val_131	2008-04-08	11
-133	val_133	2008-04-08	11
-134	val_134	2008-04-08	11
-134	val_134	2008-04-08	11
-136	val_136	2008-04-08	11
-137	val_137	2008-04-08	11
-137	val_137	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-138	val_138	2008-04-08	11
-143	val_143	2008-04-08	11
-145	val_145	2008-04-08	11
-146	val_146	2008-04-08	11
-146	val_146	2008-04-08	11
-149	val_149	2008-04-08	11
-149	val_149	2008-04-08	11
-150	val_150	2008-04-08	11
-152	val_152	2008-04-08	11
-152	val_152	2008-04-08	11
-153	val_153	2008-04-08	11
-155	val_155	2008-04-08	11
-156	val_156	2008-04-08	11
-157	val_157	2008-04-08	11
-158	val_158	2008-04-08	11
-160	val_160	2008-04-08	11
-162	val_162	2008-04-08	11
-163	val_163	2008-04-08	11
-164	val_164	2008-04-08	11
-164	val_164	2008-04-08	11
-165	val_165	2008-04-08	11
-165	val_165	2008-04-08	11
-166	val_166	2008-04-08	11
-167	val_167	2008-04-08	11
-167	val_167	2008-04-08	11
-167	val_167	2008-04-08	11
-168	val_168	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-169	val_169	2008-04-08	11
-170	val_170	2008-04-08	11
-172	val_172	2008-04-08	11
-172	val_172	2008-04-08	11
-174	val_174	2008-04-08	11
-174	val_174	2008-04-08	11
-175	val_175	2008-04-08	11
-175	val_175	2008-04-08	11
-176	val_176	2008-04-08	11
-176	val_176	2008-04-08	11
-177	val_177	2008-04-08	11
-178	val_178	2008-04-08	11
-179	val_179	2008-04-08	11
-179	val_179	2008-04-08	11
-180	val_180	2008-04-08	11
-181	val_181	2008-04-08	11
-183	val_183	2008-04-08	11
-186	val_186	2008-04-08	11
-187	val_187	2008-04-08	11
-187	val_187	2008-04-08	11
-187	val_187	2008-04-08	11
-189	val_189	2008-04-08	11
-190	val_190	2008-04-08	11
-191	val_191	2008-04-08	11
-191	val_191	2008-04-08	11
-192	val_192	2008-04-08	11
-193	val_193	2008-04-08	11
-193	val_193	2008-04-08	11
-193	val_193	2008-04-08	11
-194	val_194	2008-04-08	11
-195	val_195	2008-04-08	11
-195	val_195	2008-04-08	11
-196	val_196	2008-04-08	11
-197	val_197	2008-04-08	11
-197	val_197	2008-04-08	11
-199	val_199	2008-04-08	11
-199	val_199	2008-04-08	11
-199	val_199	2008-04-08	11
-200	val_200	2008-04-08	11
-200	val_200	2008-04-08	11
-201	val_201	2008-04-08	11
-202	val_202	2008-04-08	11
-203	val_203	2008-04-08	11
-203	val_203	2008-04-08	11
-205	val_205	2008-04-08	11
-205	val_205	2008-04-08	11
-207	val_207	2008-04-08	11
-207	val_207	2008-04-08	11
-208	val_208	2008-04-08	11
-208	val_208	2008-04-08	11
-208	val_208	2008-04-08	11
-209	val_209	2008-04-08	11
-209	val_209	2008-04-08	11
-213	val_213	2008-04-08	11
-213	val_213	2008-04-08	11
-214	val_214	2008-04-08	11
-216	val_216	2008-04-08	11
-216	val_216	2008-04-08	11
-217	val_217	2008-04-08	11
-217	val_217	2008-04-08	11
-218	val_218	2008-04-08	11
-219	val_219	2008-04-08	11
-219	val_219	2008-04-08	11
-221	val_221	2008-04-08	11
-221	val_221	2008-04-08	11
-222	val_222	2008-04-08	11
-223	val_223	2008-04-08	11
-223	val_223	2008-04-08	11
-224	val_224	2008-04-08	11
-224	val_224	2008-04-08	11
-226	val_226	2008-04-08	11
-228	val_228	2008-04-08	11
-229	val_229	2008-04-08	11
-229	val_229	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-230	val_230	2008-04-08	11
-233	val_233	2008-04-08	11
-233	val_233	2008-04-08	11
-235	val_235	2008-04-08	11
-237	val_237	2008-04-08	11
-237	val_237	2008-04-08	11
-238	val_238	2008-04-08	11
-238	val_238	2008-04-08	11
-239	val_239	2008-04-08	11
-239	val_239	2008-04-08	11
-241	val_241	2008-04-08	11
-242	val_242	2008-04-08	11
-242	val_242	2008-04-08	11
-244	val_244	2008-04-08	11
-247	val_247	2008-04-08	11
-248	val_248	2008-04-08	11
-249	val_249	2008-04-08	11
-252	val_252	2008-04-08	11
-255	val_255	2008-04-08	11
-255	val_255	2008-04-08	11
-256	val_256	2008-04-08	11
-256	val_256	2008-04-08	11
-257	val_257	2008-04-08	11
-258	val_258	2008-04-08	11
-260	val_260	2008-04-08	11
-262	val_262	2008-04-08	11
-263	val_263	2008-04-08	11
-265	val_265	2008-04-08	11
-265	val_265	2008-04-08	11
-266	val_266	2008-04-08	11
-272	val_272	2008-04-08	11
-272	val_272	2008-04-08	11
-273	val_273	2008-04-08	11
-273	val_273	2008-04-08	11
-273	val_273	2008-04-08	11
-274	val_274	2008-04-08	11
-275	val_275	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	11
-278	val_278	2008-04-08	11
-278	val_278	2008-04-08	11
-280	val_280	2008-04-08	11
-280	val_280	2008-04-08	11
-281	val_281	2008-04-08	11
-281	val_281	2008-04-08	11
-282	val_282	2008-04-08	11
-282	val_282	2008-04-08	11
-283	val_283	2008-04-08	11
-284	val_284	2008-04-08	11
-285	val_285	2008-04-08	11
-286	val_286	2008-04-08	11
-287	val_287	2008-04-08	11
-288	val_288	2008-04-08	11
-288	val_288	2008-04-08	11
-289	val_289	2008-04-08	11
-291	val_291	2008-04-08	11
-292	val_292	2008-04-08	11
-296	val_296	2008-04-08	11
-298	val_298	2008-04-08	11
-298	val_298	2008-04-08	11
-298	val_298	2008-04-08	11
-302	val_302	2008-04-08	11
-305	val_305	2008-04-08	11
-306	val_306	2008-04-08	11
-307	val_307	2008-04-08	11
-307	val_307	2008-04-08	11
-308	val_308	2008-04-08	11
-309	val_309	2008-04-08	11
-309	val_309	2008-04-08	11
-310	val_310	2008-04-08	11
-311	val_311	2008-04-08	11
-311	val_311	2008-04-08	11
-311	val_311	2008-04-08	11
-315	val_315	2008-04-08	11
-316	val_316	2008-04-08	11
-316	val_316	2008-04-08	11
-316	val_316	2008-04-08	11
-317	val_317	2008-04-08	11
-317	val_317	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-321	val_321	2008-04-08	11
-321	val_321	2008-04-08	11
-322	val_322	2008-04-08	11
-322	val_322	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-325	val_325	2008-04-08	11
-327	val_327	2008-04-08	11
-327	val_327	2008-04-08	11
-327	val_327	2008-04-08	11
-331	val_331	2008-04-08	11
-331	val_331	2008-04-08	11
-332	val_332	2008-04-08	11
-333	val_333	2008-04-08	11
-333	val_333	2008-04-08	11
-335	val_335	2008-04-08	11
-336	val_336	2008-04-08	11
-338	val_338	2008-04-08	11
-339	val_339	2008-04-08	11
-341	val_341	2008-04-08	11
-342	val_342	2008-04-08	11
-342	val_342	2008-04-08	11
-344	val_344	2008-04-08	11
-344	val_344	2008-04-08	11
-345	val_345	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	11
-351	val_351	2008-04-08	11
-353	val_353	2008-04-08	11
-353	val_353	2008-04-08	11
-356	val_356	2008-04-08	11
-360	val_360	2008-04-08	11
-362	val_362	2008-04-08	11
-364	val_364	2008-04-08	11
-365	val_365	2008-04-08	11
-366	val_366	2008-04-08	11
-367	val_367	2008-04-08	11
-367	val_367	2008-04-08	11
-368	val_368	2008-04-08	11
-369	val_369	2008-04-08	11
-369	val_369	2008-04-08	11
-369	val_369	2008-04-08	11
-373	val_373	2008-04-08	11
-374	val_374	2008-04-08	11
-375	val_375	2008-04-08	11
-377	val_377	2008-04-08	11
-378	val_378	2008-04-08	11
-379	val_379	2008-04-08	11
-382	val_382	2008-04-08	11
-382	val_382	2008-04-08	11
-384	val_384	2008-04-08	11
-384	val_384	2008-04-08	11
-384	val_384	2008-04-08	11
-386	val_386	2008-04-08	11
-389	val_389	2008-04-08	11
-392	val_392	2008-04-08	11
-393	val_393	2008-04-08	11
-394	val_394	2008-04-08	11
-395	val_395	2008-04-08	11
-395	val_395	2008-04-08	11
-396	val_396	2008-04-08	11
-396	val_396	2008-04-08	11
-396	val_396	2008-04-08	11
-397	val_397	2008-04-08	11
-397	val_397	2008-04-08	11
-399	val_399	2008-04-08	11
-399	val_399	2008-04-08	11
-400	val_400	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-401	val_401	2008-04-08	11
-402	val_402	2008-04-08	11
-403	val_403	2008-04-08	11
-403	val_403	2008-04-08	11
-403	val_403	2008-04-08	11
-404	val_404	2008-04-08	11
-404	val_404	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-406	val_406	2008-04-08	11
-407	val_407	2008-04-08	11
-409	val_409	2008-04-08	11
-409	val_409	2008-04-08	11
-409	val_409	2008-04-08	11
-411	val_411	2008-04-08	11
-413	val_413	2008-04-08	11
-413	val_413	2008-04-08	11
-414	val_414	2008-04-08	11
-414	val_414	2008-04-08	11
-417	val_417	2008-04-08	11
-417	val_417	2008-04-08	11
-417	val_417	2008-04-08	11
-418	val_418	2008-04-08	11
-419	val_419	2008-04-08	11
-421	val_421	2008-04-08	11
-424	val_424	2008-04-08	11
-424	val_424	2008-04-08	11
-427	val_427	2008-04-08	11
-429	val_429	2008-04-08	11
-429	val_429	2008-04-08	11
-430	val_430	2008-04-08	11
-430	val_430	2008-04-08	11
-430	val_430	2008-04-08	11
-431	val_431	2008-04-08	11
-431	val_431	2008-04-08	11
-431	val_431	2008-04-08	11
-432	val_432	2008-04-08	11
-435	val_435	2008-04-08	11
-436	val_436	2008-04-08	11
-437	val_437	2008-04-08	11
-438	val_438	2008-04-08	11
-438	val_438	2008-04-08	11
-438	val_438	2008-04-08	11
-439	val_439	2008-04-08	11
-439	val_439	2008-04-08	11
-443	val_443	2008-04-08	11
-444	val_444	2008-04-08	11
-446	val_446	2008-04-08	11
-448	val_448	2008-04-08	11
-449	val_449	2008-04-08	11
-452	val_452	2008-04-08	11
-453	val_453	2008-04-08	11
-454	val_454	2008-04-08	11
-454	val_454	2008-04-08	11
-454	val_454	2008-04-08	11
-455	val_455	2008-04-08	11
-457	val_457	2008-04-08	11
-458	val_458	2008-04-08	11
-458	val_458	2008-04-08	11
-459	val_459	2008-04-08	11
-459	val_459	2008-04-08	11
-460	val_460	2008-04-08	11
-462	val_462	2008-04-08	11
-462	val_462	2008-04-08	11
-463	val_463	2008-04-08	11
-463	val_463	2008-04-08	11
-466	val_466	2008-04-08	11
-466	val_466	2008-04-08	11
-466	val_466	2008-04-08	11
-467	val_467	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-468	val_468	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-470	val_470	2008-04-08	11
-472	val_472	2008-04-08	11
-475	val_475	2008-04-08	11
-477	val_477	2008-04-08	11
-478	val_478	2008-04-08	11
-478	val_478	2008-04-08	11
-479	val_479	2008-04-08	11
-480	val_480	2008-04-08	11
-480	val_480	2008-04-08	11
-480	val_480	2008-04-08	11
-481	val_481	2008-04-08	11
-482	val_482	2008-04-08	11
-483	val_483	2008-04-08	11
-484	val_484	2008-04-08	11
-485	val_485	2008-04-08	11
-487	val_487	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-489	val_489	2008-04-08	11
-490	val_490	2008-04-08	11
-491	val_491	2008-04-08	11
-492	val_492	2008-04-08	11
-492	val_492	2008-04-08	11
-493	val_493	2008-04-08	11
-494	val_494	2008-04-08	11
-495	val_495	2008-04-08	11
-496	val_496	2008-04-08	11
-497	val_497	2008-04-08	11
-498	val_498	2008-04-08	11
-498	val_498	2008-04-08	11
-498	val_498	2008-04-08	11
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-31-d60297fed03b455c29daa4afb4d1e858 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-31-d60297fed03b455c29daa4afb4d1e858
deleted file mode 100644
index 352ab5a6b96a4..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-31-d60297fed03b455c29daa4afb4d1e858
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:merge_dynamic_part
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1431818582215388621/merge_dynamic_part
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string ds, string hr}
-totalNumberFiles:1
-totalFileSize:5812
-maxFileSize:5812
-minFileSize:5812
-lastAccessTime:0
-lastUpdateTime:1389738939000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-4-41df7b4938cff8b7ebffc3f5c701dccf b/sql/hive/src/test/resources/golden/merge_dynamic_partition-4-41df7b4938cff8b7ebffc3f5c701dccf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-5-8026cdd2390eec2c72a0591ae5668185 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-5-8026cdd2390eec2c72a0591ae5668185
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-6-3b57c0e3fccea5322373f3725c95ec00 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-6-3b57c0e3fccea5322373f3725c95ec00
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-7-777de794b7f27ea63f29a9784663393b b/sql/hive/src/test/resources/golden/merge_dynamic_partition-7-777de794b7f27ea63f29a9784663393b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-8-2f1578dbc029b62daa9d47d8fa473960 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-8-2f1578dbc029b62daa9d47d8fa473960
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-8-2f1578dbc029b62daa9d47d8fa473960
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition-9-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/merge_dynamic_partition-9-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition-9-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-0-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-0-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-0-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-1-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-1-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-1-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-10-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-10-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-10-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-11-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-11-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-11-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-12-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-12-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-12-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-13-2a9cffbef1ebd3df8e4de4eb22777cf9 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-13-2a9cffbef1ebd3df8e4de4eb22777cf9
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-13-2a9cffbef1ebd3df8e4de4eb22777cf9
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-14-3a4c24fd561f459025264baa3fb6d87 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-14-3a4c24fd561f459025264baa3fb6d87
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-14-3a4c24fd561f459025264baa3fb6d87
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-15-dbf4eae8430a97a6e70b1c6222218cd3 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-15-dbf4eae8430a97a6e70b1c6222218cd3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-16-16adcdb0e324ad233769e124b5b349da b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-16-16adcdb0e324ad233769e124b5b349da
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-17-d60297fed03b455c29daa4afb4d1e858 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-17-d60297fed03b455c29daa4afb4d1e858
deleted file mode 100644
index f3812861b3d6d..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-17-d60297fed03b455c29daa4afb4d1e858
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:merge_dynamic_part
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5646492510204438812/merge_dynamic_part
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string ds, string hr}
-totalNumberFiles:3
-totalFileSize:17415
-maxFileSize:5901
-minFileSize:5702
-lastAccessTime:0
-lastUpdateTime:1389740265000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-2-190cefc93e46906e404039de0fd5f513 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-2-190cefc93e46906e404039de0fd5f513
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-3-25401dd2c1c258e06f1b96fefd19e27f b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-3-25401dd2c1c258e06f1b96fefd19e27f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-4-41df7b4938cff8b7ebffc3f5c701dccf b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-4-41df7b4938cff8b7ebffc3f5c701dccf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-5-8026cdd2390eec2c72a0591ae5668185 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-5-8026cdd2390eec2c72a0591ae5668185
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-6-3b57c0e3fccea5322373f3725c95ec00 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-6-3b57c0e3fccea5322373f3725c95ec00
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-7-777de794b7f27ea63f29a9784663393b b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-7-777de794b7f27ea63f29a9784663393b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-8-6f7f59de1fbd607e844a2dc9394a2df8 b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-8-6f7f59de1fbd607e844a2dc9394a2df8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition2-9-982f10fc4e4f6bab9d4ca5f3cecca2ba b/sql/hive/src/test/resources/golden/merge_dynamic_partition2-9-982f10fc4e4f6bab9d4ca5f3cecca2ba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-0-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-0-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-0-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-1-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-1-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-1-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-10-d176a1b243ac7190fbc319d73a164e2d b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-10-d176a1b243ac7190fbc319d73a164e2d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-11-f64c176e6e3b2d9ffa1b9e14491dc4c6 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-11-f64c176e6e3b2d9ffa1b9e14491dc4c6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-12-a37d4d5c4eec82d56f91754e5fdb0f9a b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-12-a37d4d5c4eec82d56f91754e5fdb0f9a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-13-c512eee654e7313ff9c6efb35c5b0a88 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-13-c512eee654e7313ff9c6efb35c5b0a88
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-14-ea3e89ffe987e20dffd8388a2dbcc260 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-14-ea3e89ffe987e20dffd8388a2dbcc260
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-15-e525a096de36a3d157db1b4947e1fbb0 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-15-e525a096de36a3d157db1b4947e1fbb0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-16-5621a4ac88bf8cffea061cb7cb9f8d73 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-16-5621a4ac88bf8cffea061cb7cb9f8d73
deleted file mode 100644
index e9c723bbd136e..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-16-5621a4ac88bf8cffea061cb7cb9f8d73
+++ /dev/null
@@ -1,4 +0,0 @@
-ds=2008-04-08/hr=11
-ds=2008-04-08/hr=12
-ds=2008-04-09/hr=11
-ds=2008-04-09/hr=12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-17-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-17-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-17-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-18-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-18-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-18-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-19-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-19-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-19-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-2-190cefc93e46906e404039de0fd5f513 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-2-190cefc93e46906e404039de0fd5f513
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-20-2a9cffbef1ebd3df8e4de4eb22777cf9 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-20-2a9cffbef1ebd3df8e4de4eb22777cf9
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-20-2a9cffbef1ebd3df8e4de4eb22777cf9
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-21-3a4c24fd561f459025264baa3fb6d87 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-21-3a4c24fd561f459025264baa3fb6d87
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-21-3a4c24fd561f459025264baa3fb6d87
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-22-bf8877c86d4b4cd7da2939cdf517acc5 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-22-bf8877c86d4b4cd7da2939cdf517acc5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-23-3ffba3098571099bc2b13614ae3defc5 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-23-3ffba3098571099bc2b13614ae3defc5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-24-c9f91a11d9be1e981e6ec284572bbb2a b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-24-c9f91a11d9be1e981e6ec284572bbb2a
deleted file mode 100644
index 5e44ab6b5cef9..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-24-c9f91a11d9be1e981e6ec284572bbb2a
+++ /dev/null
@@ -1,4 +0,0 @@
-2008-04-08	11	500
-2008-04-08	12	500
-2008-04-09	11	1000
-2008-04-09	12	1000
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-25-d60297fed03b455c29daa4afb4d1e858 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-25-d60297fed03b455c29daa4afb4d1e858
deleted file mode 100644
index 8cd9e4d2c201a..0000000000000
--- a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-25-d60297fed03b455c29daa4afb4d1e858
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:merge_dynamic_part
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/merge_dynamic_part
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string ds, string hr}
-totalNumberFiles:6
-totalFileSize:34830
-maxFileSize:5812
-minFileSize:5791
-lastAccessTime:0
-lastUpdateTime:1389739573000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-3-25401dd2c1c258e06f1b96fefd19e27f b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-3-25401dd2c1c258e06f1b96fefd19e27f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-4-41df7b4938cff8b7ebffc3f5c701dccf b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-4-41df7b4938cff8b7ebffc3f5c701dccf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-5-8026cdd2390eec2c72a0591ae5668185 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-5-8026cdd2390eec2c72a0591ae5668185
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-6-3b57c0e3fccea5322373f3725c95ec00 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-6-3b57c0e3fccea5322373f3725c95ec00
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-7-777de794b7f27ea63f29a9784663393b b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-7-777de794b7f27ea63f29a9784663393b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-8-6916eceaa04091d1453a7d0d5257213c b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-8-6916eceaa04091d1453a7d0d5257213c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/merge_dynamic_partition3-9-8d0305d089aa5198601cc39073fff528 b/sql/hive/src/test/resources/golden/merge_dynamic_partition3-9-8d0305d089aa5198601cc39073fff528
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-0-7537f6537cfced8ba5bd203e3d1c8a87 b/sql/hive/src/test/resources/golden/mergejoins_mixed-0-7537f6537cfced8ba5bd203e3d1c8a87
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-1-bd83c916b6b06b6e813d7ecc6ada1d03 b/sql/hive/src/test/resources/golden/mergejoins_mixed-1-bd83c916b6b06b6e813d7ecc6ada1d03
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-10-abb02d2cadc535ff51660d83e6609dc8 b/sql/hive/src/test/resources/golden/mergejoins_mixed-10-abb02d2cadc535ff51660d83e6609dc8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-11-725ed77dd110398f461926f190b4b5c8 b/sql/hive/src/test/resources/golden/mergejoins_mixed-11-725ed77dd110398f461926f190b4b5c8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-12-b5d5c30fab0edd7cb5dc535000d20e3b b/sql/hive/src/test/resources/golden/mergejoins_mixed-12-b5d5c30fab0edd7cb5dc535000d20e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-2-de24edb80e51049d241fa0ce2e3165ff b/sql/hive/src/test/resources/golden/mergejoins_mixed-2-de24edb80e51049d241fa0ce2e3165ff
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-3-5d4bbcec2f91b849642725fa843bf4e8 b/sql/hive/src/test/resources/golden/mergejoins_mixed-3-5d4bbcec2f91b849642725fa843bf4e8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-4-24a8048db8d561e28bcb4941498b9687 b/sql/hive/src/test/resources/golden/mergejoins_mixed-4-24a8048db8d561e28bcb4941498b9687
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-5-ab187bec261e23cca73a144d4ffcb4d8 b/sql/hive/src/test/resources/golden/mergejoins_mixed-5-ab187bec261e23cca73a144d4ffcb4d8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-6-e108b1560a601946194cecaf4da12491 b/sql/hive/src/test/resources/golden/mergejoins_mixed-6-e108b1560a601946194cecaf4da12491
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-7-d2068e6569b5d253932ce9d59be5221 b/sql/hive/src/test/resources/golden/mergejoins_mixed-7-d2068e6569b5d253932ce9d59be5221
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-8-bdba45fc8a7bd7af0a8f983160d550b0 b/sql/hive/src/test/resources/golden/mergejoins_mixed-8-bdba45fc8a7bd7af0a8f983160d550b0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mergejoins_mixed-9-b7a719a0596b89bf6c35b3dc5dfbe1e6 b/sql/hive/src/test/resources/golden/mergejoins_mixed-9-b7a719a0596b89bf6c35b3dc5dfbe1e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-0-f0c7b56d5ad529ae6c98875501a9548d b/sql/hive/src/test/resources/golden/metadataonly1-0-f0c7b56d5ad529ae6c98875501a9548d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-1-4d93504b19d34fd360de6af7fb1f3715 b/sql/hive/src/test/resources/golden/metadataonly1-1-4d93504b19d34fd360de6af7fb1f3715
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-10-1c7bb3877b2e1e7bb9166537715d442d b/sql/hive/src/test/resources/golden/metadataonly1-10-1c7bb3877b2e1e7bb9166537715d442d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-11-30df79b460916f3f67ccf1d7b7a076f2 b/sql/hive/src/test/resources/golden/metadataonly1-11-30df79b460916f3f67ccf1d7b7a076f2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-3-7980a98d580a002b7ad7eef780039f67 b/sql/hive/src/test/resources/golden/metadataonly1-3-7980a98d580a002b7ad7eef780039f67
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-4-4d93504b19d34fd360de6af7fb1f3715 b/sql/hive/src/test/resources/golden/metadataonly1-4-4d93504b19d34fd360de6af7fb1f3715
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-6-537256f669bc9101d4834df67aae8cdf b/sql/hive/src/test/resources/golden/metadataonly1-6-537256f669bc9101d4834df67aae8cdf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-8-b140da3b2e7f4bdf101a8bfb0db88999 b/sql/hive/src/test/resources/golden/metadataonly1-8-b140da3b2e7f4bdf101a8bfb0db88999
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mi-0-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/mi-0-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/mi-0-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/mi-1-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/mi-1-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/mi-1-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/mi-2-abf8847fb25b96e0f9477808d8378e5e b/sql/hive/src/test/resources/golden/mi-2-abf8847fb25b96e0f9477808d8378e5e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mi-3-b66a495f7bdf106a7886b72267b8659d b/sql/hive/src/test/resources/golden/mi-3-b66a495f7bdf106a7886b72267b8659d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mi-4-ba6a06aef35cbd1e7c5a3c253f757b2c b/sql/hive/src/test/resources/golden/mi-4-ba6a06aef35cbd1e7c5a3c253f757b2c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/mi-5-4aad9be12cbe4e7be5540924e6b5e7dc b/sql/hive/src/test/resources/golden/mi-5-4aad9be12cbe4e7be5540924e6b5e7dc
deleted file mode 100644
index 3b011a048ba42..0000000000000
--- a/sql/hive/src/test/resources/golden/mi-5-4aad9be12cbe4e7be5540924e6b5e7dc
+++ /dev/null
@@ -1 +0,0 @@
-ds=2008-04-08/hr=11
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/mi-6-b331d7aa963adac3b2e1de803f90e199 b/sql/hive/src/test/resources/golden/mi-6-b331d7aa963adac3b2e1de803f90e199
deleted file mode 100644
index 0cdd3e8594c59..0000000000000
--- a/sql/hive/src/test/resources/golden/mi-6-b331d7aa963adac3b2e1de803f90e199
+++ /dev/null
@@ -1 +0,0 @@
-ds=2008-04-08/hr=12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/mi-7-fca4e06de103c3cbb675fa43e7077800 b/sql/hive/src/test/resources/golden/mi-7-fca4e06de103c3cbb675fa43e7077800
deleted file mode 100644
index f4026a591a958..0000000000000
--- a/sql/hive/src/test/resources/golden/mi-7-fca4e06de103c3cbb675fa43e7077800
+++ /dev/null
@@ -1,500 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
-265	val_265	2008-04-08	11
-193	val_193	2008-04-08	11
-401	val_401	2008-04-08	11
-150	val_150	2008-04-08	11
-273	val_273	2008-04-08	11
-224	val_224	2008-04-08	11
-369	val_369	2008-04-08	11
-66	val_66	2008-04-08	11
-128	val_128	2008-04-08	11
-213	val_213	2008-04-08	11
-146	val_146	2008-04-08	11
-406	val_406	2008-04-08	11
-429	val_429	2008-04-08	11
-374	val_374	2008-04-08	11
-152	val_152	2008-04-08	11
-469	val_469	2008-04-08	11
-145	val_145	2008-04-08	11
-495	val_495	2008-04-08	11
-37	val_37	2008-04-08	11
-327	val_327	2008-04-08	11
-281	val_281	2008-04-08	11
-277	val_277	2008-04-08	11
-209	val_209	2008-04-08	11
-15	val_15	2008-04-08	11
-82	val_82	2008-04-08	11
-403	val_403	2008-04-08	11
-166	val_166	2008-04-08	11
-417	val_417	2008-04-08	11
-430	val_430	2008-04-08	11
-252	val_252	2008-04-08	11
-292	val_292	2008-04-08	11
-219	val_219	2008-04-08	11
-287	val_287	2008-04-08	11
-153	val_153	2008-04-08	11
-193	val_193	2008-04-08	11
-338	val_338	2008-04-08	11
-446	val_446	2008-04-08	11
-459	val_459	2008-04-08	11
-394	val_394	2008-04-08	11
-237	val_237	2008-04-08	11
-482	val_482	2008-04-08	11
-174	val_174	2008-04-08	11
-413	val_413	2008-04-08	11
-494	val_494	2008-04-08	11
-207	val_207	2008-04-08	11
-199	val_199	2008-04-08	11
-466	val_466	2008-04-08	11
-208	val_208	2008-04-08	11
-174	val_174	2008-04-08	11
-399	val_399	2008-04-08	11
-396	val_396	2008-04-08	11
-247	val_247	2008-04-08	11
-417	val_417	2008-04-08	11
-489	val_489	2008-04-08	11
-162	val_162	2008-04-08	11
-377	val_377	2008-04-08	11
-397	val_397	2008-04-08	11
-309	val_309	2008-04-08	11
-365	val_365	2008-04-08	11
-266	val_266	2008-04-08	11
-439	val_439	2008-04-08	11
-342	val_342	2008-04-08	11
-367	val_367	2008-04-08	11
-325	val_325	2008-04-08	11
-167	val_167	2008-04-08	11
-195	val_195	2008-04-08	11
-475	val_475	2008-04-08	11
-17	val_17	2008-04-08	11
-113	val_113	2008-04-08	11
-155	val_155	2008-04-08	11
-203	val_203	2008-04-08	11
-339	val_339	2008-04-08	11
-0	val_0	2008-04-08	11
-455	val_455	2008-04-08	11
-128	val_128	2008-04-08	11
-311	val_311	2008-04-08	11
-316	val_316	2008-04-08	11
-57	val_57	2008-04-08	11
-302	val_302	2008-04-08	11
-205	val_205	2008-04-08	11
-149	val_149	2008-04-08	11
-438	val_438	2008-04-08	11
-345	val_345	2008-04-08	11
-129	val_129	2008-04-08	11
-170	val_170	2008-04-08	11
-20	val_20	2008-04-08	11
-489	val_489	2008-04-08	11
-157	val_157	2008-04-08	11
-378	val_378	2008-04-08	11
-221	val_221	2008-04-08	11
-92	val_92	2008-04-08	11
-111	val_111	2008-04-08	11
-47	val_47	2008-04-08	11
-72	val_72	2008-04-08	11
-4	val_4	2008-04-08	11
-280	val_280	2008-04-08	11
-35	val_35	2008-04-08	11
-427	val_427	2008-04-08	11
-277	val_277	2008-04-08	11
-208	val_208	2008-04-08	11
-356	val_356	2008-04-08	11
-399	val_399	2008-04-08	11
-169	val_169	2008-04-08	11
-382	val_382	2008-04-08	11
-498	val_498	2008-04-08	11
-125	val_125	2008-04-08	11
-386	val_386	2008-04-08	11
-437	val_437	2008-04-08	11
-469	val_469	2008-04-08	11
-192	val_192	2008-04-08	11
-286	val_286	2008-04-08	11
-187	val_187	2008-04-08	11
-176	val_176	2008-04-08	11
-54	val_54	2008-04-08	11
-459	val_459	2008-04-08	11
-51	val_51	2008-04-08	11
-138	val_138	2008-04-08	11
-103	val_103	2008-04-08	11
-239	val_239	2008-04-08	11
-213	val_213	2008-04-08	11
-216	val_216	2008-04-08	11
-430	val_430	2008-04-08	11
-278	val_278	2008-04-08	11
-176	val_176	2008-04-08	11
-289	val_289	2008-04-08	11
-221	val_221	2008-04-08	11
-65	val_65	2008-04-08	11
-318	val_318	2008-04-08	11
-332	val_332	2008-04-08	11
-311	val_311	2008-04-08	11
-275	val_275	2008-04-08	11
-137	val_137	2008-04-08	11
-241	val_241	2008-04-08	11
-83	val_83	2008-04-08	11
-333	val_333	2008-04-08	11
-180	val_180	2008-04-08	11
-284	val_284	2008-04-08	11
-12	val_12	2008-04-08	11
-230	val_230	2008-04-08	11
-181	val_181	2008-04-08	11
-67	val_67	2008-04-08	11
-260	val_260	2008-04-08	11
-404	val_404	2008-04-08	11
-384	val_384	2008-04-08	11
-489	val_489	2008-04-08	11
-353	val_353	2008-04-08	11
-373	val_373	2008-04-08	11
-272	val_272	2008-04-08	11
-138	val_138	2008-04-08	11
-217	val_217	2008-04-08	11
-84	val_84	2008-04-08	11
-348	val_348	2008-04-08	11
-466	val_466	2008-04-08	11
-58	val_58	2008-04-08	11
-8	val_8	2008-04-08	11
-411	val_411	2008-04-08	11
-230	val_230	2008-04-08	11
-208	val_208	2008-04-08	11
-348	val_348	2008-04-08	11
-24	val_24	2008-04-08	11
-463	val_463	2008-04-08	11
-431	val_431	2008-04-08	11
-179	val_179	2008-04-08	11
-172	val_172	2008-04-08	11
-42	val_42	2008-04-08	11
-129	val_129	2008-04-08	11
-158	val_158	2008-04-08	11
-119	val_119	2008-04-08	11
-496	val_496	2008-04-08	11
-0	val_0	2008-04-08	11
-322	val_322	2008-04-08	11
-197	val_197	2008-04-08	11
-468	val_468	2008-04-08	11
-393	val_393	2008-04-08	11
-454	val_454	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-199	val_199	2008-04-08	11
-191	val_191	2008-04-08	11
-418	val_418	2008-04-08	11
-96	val_96	2008-04-08	11
-26	val_26	2008-04-08	11
-165	val_165	2008-04-08	11
-327	val_327	2008-04-08	11
-230	val_230	2008-04-08	11
-205	val_205	2008-04-08	11
-120	val_120	2008-04-08	11
-131	val_131	2008-04-08	11
-51	val_51	2008-04-08	11
-404	val_404	2008-04-08	11
-43	val_43	2008-04-08	11
-436	val_436	2008-04-08	11
-156	val_156	2008-04-08	11
-469	val_469	2008-04-08	11
-468	val_468	2008-04-08	11
-308	val_308	2008-04-08	11
-95	val_95	2008-04-08	11
-196	val_196	2008-04-08	11
-288	val_288	2008-04-08	11
-481	val_481	2008-04-08	11
-457	val_457	2008-04-08	11
-98	val_98	2008-04-08	11
-282	val_282	2008-04-08	11
-197	val_197	2008-04-08	11
-187	val_187	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-409	val_409	2008-04-08	11
-470	val_470	2008-04-08	11
-137	val_137	2008-04-08	11
-369	val_369	2008-04-08	11
-316	val_316	2008-04-08	11
-169	val_169	2008-04-08	11
-413	val_413	2008-04-08	11
-85	val_85	2008-04-08	11
-77	val_77	2008-04-08	11
-0	val_0	2008-04-08	11
-490	val_490	2008-04-08	11
-87	val_87	2008-04-08	11
-364	val_364	2008-04-08	11
-179	val_179	2008-04-08	11
-118	val_118	2008-04-08	11
-134	val_134	2008-04-08	11
-395	val_395	2008-04-08	11
-282	val_282	2008-04-08	11
-138	val_138	2008-04-08	11
-238	val_238	2008-04-08	11
-419	val_419	2008-04-08	11
-15	val_15	2008-04-08	11
-118	val_118	2008-04-08	11
-72	val_72	2008-04-08	11
-90	val_90	2008-04-08	11
-307	val_307	2008-04-08	11
-19	val_19	2008-04-08	11
-435	val_435	2008-04-08	11
-10	val_10	2008-04-08	11
-277	val_277	2008-04-08	11
-273	val_273	2008-04-08	11
-306	val_306	2008-04-08	11
-224	val_224	2008-04-08	11
-309	val_309	2008-04-08	11
-389	val_389	2008-04-08	11
-327	val_327	2008-04-08	11
-242	val_242	2008-04-08	11
-369	val_369	2008-04-08	11
-392	val_392	2008-04-08	11
-272	val_272	2008-04-08	11
-331	val_331	2008-04-08	11
-401	val_401	2008-04-08	11
-242	val_242	2008-04-08	11
-452	val_452	2008-04-08	11
-177	val_177	2008-04-08	11
-226	val_226	2008-04-08	11
-5	val_5	2008-04-08	11
-497	val_497	2008-04-08	11
-402	val_402	2008-04-08	11
-396	val_396	2008-04-08	11
-317	val_317	2008-04-08	11
-395	val_395	2008-04-08	11
-58	val_58	2008-04-08	11
-35	val_35	2008-04-08	11
-336	val_336	2008-04-08	11
-95	val_95	2008-04-08	11
-11	val_11	2008-04-08	11
-168	val_168	2008-04-08	11
-34	val_34	2008-04-08	11
-229	val_229	2008-04-08	11
-233	val_233	2008-04-08	11
-143	val_143	2008-04-08	11
-472	val_472	2008-04-08	11
-322	val_322	2008-04-08	11
-498	val_498	2008-04-08	11
-160	val_160	2008-04-08	11
-195	val_195	2008-04-08	11
-42	val_42	2008-04-08	11
-321	val_321	2008-04-08	11
-430	val_430	2008-04-08	11
-119	val_119	2008-04-08	11
-489	val_489	2008-04-08	11
-458	val_458	2008-04-08	11
-78	val_78	2008-04-08	11
-76	val_76	2008-04-08	11
-41	val_41	2008-04-08	11
-223	val_223	2008-04-08	11
-492	val_492	2008-04-08	11
-149	val_149	2008-04-08	11
-449	val_449	2008-04-08	11
-218	val_218	2008-04-08	11
-228	val_228	2008-04-08	11
-138	val_138	2008-04-08	11
-453	val_453	2008-04-08	11
-30	val_30	2008-04-08	11
-209	val_209	2008-04-08	11
-64	val_64	2008-04-08	11
-468	val_468	2008-04-08	11
-76	val_76	2008-04-08	11
-74	val_74	2008-04-08	11
-342	val_342	2008-04-08	11
-69	val_69	2008-04-08	11
-230	val_230	2008-04-08	11
-33	val_33	2008-04-08	11
-368	val_368	2008-04-08	11
-103	val_103	2008-04-08	11
-296	val_296	2008-04-08	11
-113	val_113	2008-04-08	11
-216	val_216	2008-04-08	11
-367	val_367	2008-04-08	11
-344	val_344	2008-04-08	11
-167	val_167	2008-04-08	11
-274	val_274	2008-04-08	11
-219	val_219	2008-04-08	11
-239	val_239	2008-04-08	11
-485	val_485	2008-04-08	11
-116	val_116	2008-04-08	11
-223	val_223	2008-04-08	11
-256	val_256	2008-04-08	11
-263	val_263	2008-04-08	11
-70	val_70	2008-04-08	11
-487	val_487	2008-04-08	11
-480	val_480	2008-04-08	11
-401	val_401	2008-04-08	11
-288	val_288	2008-04-08	11
-191	val_191	2008-04-08	11
-5	val_5	2008-04-08	11
-244	val_244	2008-04-08	11
-438	val_438	2008-04-08	11
-128	val_128	2008-04-08	11
-467	val_467	2008-04-08	11
-432	val_432	2008-04-08	11
-202	val_202	2008-04-08	11
-316	val_316	2008-04-08	11
-229	val_229	2008-04-08	11
-469	val_469	2008-04-08	11
-463	val_463	2008-04-08	11
-280	val_280	2008-04-08	11
-2	val_2	2008-04-08	11
-35	val_35	2008-04-08	11
-283	val_283	2008-04-08	11
-331	val_331	2008-04-08	11
-235	val_235	2008-04-08	11
-80	val_80	2008-04-08	11
-44	val_44	2008-04-08	11
-193	val_193	2008-04-08	11
-321	val_321	2008-04-08	11
-335	val_335	2008-04-08	11
-104	val_104	2008-04-08	11
-466	val_466	2008-04-08	11
-366	val_366	2008-04-08	11
-175	val_175	2008-04-08	11
-403	val_403	2008-04-08	11
-483	val_483	2008-04-08	11
-53	val_53	2008-04-08	11
-105	val_105	2008-04-08	11
-257	val_257	2008-04-08	11
-406	val_406	2008-04-08	11
-409	val_409	2008-04-08	11
-190	val_190	2008-04-08	11
-406	val_406	2008-04-08	11
-401	val_401	2008-04-08	11
-114	val_114	2008-04-08	11
-258	val_258	2008-04-08	11
-90	val_90	2008-04-08	11
-203	val_203	2008-04-08	11
-262	val_262	2008-04-08	11
-348	val_348	2008-04-08	11
-424	val_424	2008-04-08	11
-12	val_12	2008-04-08	11
-396	val_396	2008-04-08	11
-201	val_201	2008-04-08	11
-217	val_217	2008-04-08	11
-164	val_164	2008-04-08	11
-431	val_431	2008-04-08	11
-454	val_454	2008-04-08	11
-478	val_478	2008-04-08	11
-298	val_298	2008-04-08	11
-125	val_125	2008-04-08	11
-431	val_431	2008-04-08	11
-164	val_164	2008-04-08	11
-424	val_424	2008-04-08	11
-187	val_187	2008-04-08	11
-382	val_382	2008-04-08	11
-5	val_5	2008-04-08	11
-70	val_70	2008-04-08	11
-397	val_397	2008-04-08	11
-480	val_480	2008-04-08	11
-291	val_291	2008-04-08	11
-24	val_24	2008-04-08	11
-351	val_351	2008-04-08	11
-255	val_255	2008-04-08	11
-104	val_104	2008-04-08	11
-70	val_70	2008-04-08	11
-163	val_163	2008-04-08	11
-438	val_438	2008-04-08	11
-119	val_119	2008-04-08	11
-414	val_414	2008-04-08	11
-200	val_200	2008-04-08	11
-491	val_491	2008-04-08	11
-237	val_237	2008-04-08	11
-439	val_439	2008-04-08	11
-360	val_360	2008-04-08	11
-248	val_248	2008-04-08	11
-479	val_479	2008-04-08	11
-305	val_305	2008-04-08	11
-417	val_417	2008-04-08	11
-199	val_199	2008-04-08	11
-444	val_444	2008-04-08	11
-120	val_120	2008-04-08	11
-429	val_429	2008-04-08	11
-169	val_169	2008-04-08	11
-443	val_443	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-277	val_277	2008-04-08	11
-230	val_230	2008-04-08	11
-478	val_478	2008-04-08	11
-178	val_178	2008-04-08	11
-468	val_468	2008-04-08	11
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-333	val_333	2008-04-08	11
-493	val_493	2008-04-08	11
-460	val_460	2008-04-08	11
-207	val_207	2008-04-08	11
-249	val_249	2008-04-08	11
-265	val_265	2008-04-08	11
-480	val_480	2008-04-08	11
-83	val_83	2008-04-08	11
-136	val_136	2008-04-08	11
-353	val_353	2008-04-08	11
-172	val_172	2008-04-08	11
-214	val_214	2008-04-08	11
-462	val_462	2008-04-08	11
-233	val_233	2008-04-08	11
-406	val_406	2008-04-08	11
-133	val_133	2008-04-08	11
-175	val_175	2008-04-08	11
-189	val_189	2008-04-08	11
-454	val_454	2008-04-08	11
-375	val_375	2008-04-08	11
-401	val_401	2008-04-08	11
-421	val_421	2008-04-08	11
-407	val_407	2008-04-08	11
-384	val_384	2008-04-08	11
-256	val_256	2008-04-08	11
-26	val_26	2008-04-08	11
-134	val_134	2008-04-08	11
-67	val_67	2008-04-08	11
-384	val_384	2008-04-08	11
-379	val_379	2008-04-08	11
-18	val_18	2008-04-08	11
-462	val_462	2008-04-08	11
-492	val_492	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-9	val_9	2008-04-08	11
-341	val_341	2008-04-08	11
-498	val_498	2008-04-08	11
-146	val_146	2008-04-08	11
-458	val_458	2008-04-08	11
-362	val_362	2008-04-08	11
-186	val_186	2008-04-08	11
-285	val_285	2008-04-08	11
-348	val_348	2008-04-08	11
-167	val_167	2008-04-08	11
-18	val_18	2008-04-08	11
-273	val_273	2008-04-08	11
-183	val_183	2008-04-08	11
-281	val_281	2008-04-08	11
-344	val_344	2008-04-08	11
-97	val_97	2008-04-08	11
-469	val_469	2008-04-08	11
-315	val_315	2008-04-08	11
-84	val_84	2008-04-08	11
-28	val_28	2008-04-08	11
-37	val_37	2008-04-08	11
-448	val_448	2008-04-08	11
-152	val_152	2008-04-08	11
-348	val_348	2008-04-08	11
-307	val_307	2008-04-08	11
-194	val_194	2008-04-08	11
-414	val_414	2008-04-08	11
-477	val_477	2008-04-08	11
-222	val_222	2008-04-08	11
-126	val_126	2008-04-08	11
-90	val_90	2008-04-08	11
-169	val_169	2008-04-08	11
-403	val_403	2008-04-08	11
-400	val_400	2008-04-08	11
-200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/mi-8-e946bdb81b0a831908c1c8196fdff215 b/sql/hive/src/test/resources/golden/mi-8-e946bdb81b0a831908c1c8196fdff215
deleted file mode 100644
index f132aba9f3ee1..0000000000000
--- a/sql/hive/src/test/resources/golden/mi-8-e946bdb81b0a831908c1c8196fdff215
+++ /dev/null
@@ -1,309 +0,0 @@
-0	val_0	2008-04-08	12
-2	val_2	2008-04-08	12
-4	val_4	2008-04-08	12
-5	val_5	2008-04-08	12
-8	val_8	2008-04-08	12
-9	val_9	2008-04-08	12
-10	val_10	2008-04-08	12
-11	val_11	2008-04-08	12
-12	val_12	2008-04-08	12
-15	val_15	2008-04-08	12
-17	val_17	2008-04-08	12
-18	val_18	2008-04-08	12
-19	val_19	2008-04-08	12
-20	val_20	2008-04-08	12
-24	val_24	2008-04-08	12
-26	val_26	2008-04-08	12
-27	val_27	2008-04-08	12
-28	val_28	2008-04-08	12
-30	val_30	2008-04-08	12
-33	val_33	2008-04-08	12
-34	val_34	2008-04-08	12
-35	val_35	2008-04-08	12
-37	val_37	2008-04-08	12
-41	val_41	2008-04-08	12
-42	val_42	2008-04-08	12
-43	val_43	2008-04-08	12
-44	val_44	2008-04-08	12
-47	val_47	2008-04-08	12
-51	val_51	2008-04-08	12
-53	val_53	2008-04-08	12
-54	val_54	2008-04-08	12
-57	val_57	2008-04-08	12
-58	val_58	2008-04-08	12
-64	val_64	2008-04-08	12
-65	val_65	2008-04-08	12
-66	val_66	2008-04-08	12
-67	val_67	2008-04-08	12
-69	val_69	2008-04-08	12
-70	val_70	2008-04-08	12
-72	val_72	2008-04-08	12
-74	val_74	2008-04-08	12
-76	val_76	2008-04-08	12
-77	val_77	2008-04-08	12
-78	val_78	2008-04-08	12
-80	val_80	2008-04-08	12
-82	val_82	2008-04-08	12
-83	val_83	2008-04-08	12
-84	val_84	2008-04-08	12
-85	val_85	2008-04-08	12
-86	val_86	2008-04-08	12
-87	val_87	2008-04-08	12
-90	val_90	2008-04-08	12
-92	val_92	2008-04-08	12
-95	val_95	2008-04-08	12
-96	val_96	2008-04-08	12
-97	val_97	2008-04-08	12
-98	val_98	2008-04-08	12
-100	val_100	2008-04-08	12
-103	val_103	2008-04-08	12
-104	val_104	2008-04-08	12
-105	val_105	2008-04-08	12
-111	val_111	2008-04-08	12
-113	val_113	2008-04-08	12
-114	val_114	2008-04-08	12
-116	val_116	2008-04-08	12
-118	val_118	2008-04-08	12
-119	val_119	2008-04-08	12
-120	val_120	2008-04-08	12
-125	val_125	2008-04-08	12
-126	val_126	2008-04-08	12
-128	val_128	2008-04-08	12
-129	val_129	2008-04-08	12
-131	val_131	2008-04-08	12
-133	val_133	2008-04-08	12
-134	val_134	2008-04-08	12
-136	val_136	2008-04-08	12
-137	val_137	2008-04-08	12
-138	val_138	2008-04-08	12
-143	val_143	2008-04-08	12
-145	val_145	2008-04-08	12
-146	val_146	2008-04-08	12
-149	val_149	2008-04-08	12
-150	val_150	2008-04-08	12
-152	val_152	2008-04-08	12
-153	val_153	2008-04-08	12
-155	val_155	2008-04-08	12
-156	val_156	2008-04-08	12
-157	val_157	2008-04-08	12
-158	val_158	2008-04-08	12
-160	val_160	2008-04-08	12
-162	val_162	2008-04-08	12
-163	val_163	2008-04-08	12
-164	val_164	2008-04-08	12
-165	val_165	2008-04-08	12
-166	val_166	2008-04-08	12
-167	val_167	2008-04-08	12
-168	val_168	2008-04-08	12
-169	val_169	2008-04-08	12
-170	val_170	2008-04-08	12
-172	val_172	2008-04-08	12
-174	val_174	2008-04-08	12
-175	val_175	2008-04-08	12
-176	val_176	2008-04-08	12
-177	val_177	2008-04-08	12
-178	val_178	2008-04-08	12
-179	val_179	2008-04-08	12
-180	val_180	2008-04-08	12
-181	val_181	2008-04-08	12
-183	val_183	2008-04-08	12
-186	val_186	2008-04-08	12
-187	val_187	2008-04-08	12
-189	val_189	2008-04-08	12
-190	val_190	2008-04-08	12
-191	val_191	2008-04-08	12
-192	val_192	2008-04-08	12
-193	val_193	2008-04-08	12
-194	val_194	2008-04-08	12
-195	val_195	2008-04-08	12
-196	val_196	2008-04-08	12
-197	val_197	2008-04-08	12
-199	val_199	2008-04-08	12
-200	val_200	2008-04-08	12
-201	val_201	2008-04-08	12
-202	val_202	2008-04-08	12
-203	val_203	2008-04-08	12
-205	val_205	2008-04-08	12
-207	val_207	2008-04-08	12
-208	val_208	2008-04-08	12
-209	val_209	2008-04-08	12
-213	val_213	2008-04-08	12
-214	val_214	2008-04-08	12
-216	val_216	2008-04-08	12
-217	val_217	2008-04-08	12
-218	val_218	2008-04-08	12
-219	val_219	2008-04-08	12
-221	val_221	2008-04-08	12
-222	val_222	2008-04-08	12
-223	val_223	2008-04-08	12
-224	val_224	2008-04-08	12
-226	val_226	2008-04-08	12
-228	val_228	2008-04-08	12
-229	val_229	2008-04-08	12
-230	val_230	2008-04-08	12
-233	val_233	2008-04-08	12
-235	val_235	2008-04-08	12
-237	val_237	2008-04-08	12
-238	val_238	2008-04-08	12
-239	val_239	2008-04-08	12
-241	val_241	2008-04-08	12
-242	val_242	2008-04-08	12
-244	val_244	2008-04-08	12
-247	val_247	2008-04-08	12
-248	val_248	2008-04-08	12
-249	val_249	2008-04-08	12
-252	val_252	2008-04-08	12
-255	val_255	2008-04-08	12
-256	val_256	2008-04-08	12
-257	val_257	2008-04-08	12
-258	val_258	2008-04-08	12
-260	val_260	2008-04-08	12
-262	val_262	2008-04-08	12
-263	val_263	2008-04-08	12
-265	val_265	2008-04-08	12
-266	val_266	2008-04-08	12
-272	val_272	2008-04-08	12
-273	val_273	2008-04-08	12
-274	val_274	2008-04-08	12
-275	val_275	2008-04-08	12
-277	val_277	2008-04-08	12
-278	val_278	2008-04-08	12
-280	val_280	2008-04-08	12
-281	val_281	2008-04-08	12
-282	val_282	2008-04-08	12
-283	val_283	2008-04-08	12
-284	val_284	2008-04-08	12
-285	val_285	2008-04-08	12
-286	val_286	2008-04-08	12
-287	val_287	2008-04-08	12
-288	val_288	2008-04-08	12
-289	val_289	2008-04-08	12
-291	val_291	2008-04-08	12
-292	val_292	2008-04-08	12
-296	val_296	2008-04-08	12
-298	val_298	2008-04-08	12
-302	val_302	2008-04-08	12
-305	val_305	2008-04-08	12
-306	val_306	2008-04-08	12
-307	val_307	2008-04-08	12
-308	val_308	2008-04-08	12
-309	val_309	2008-04-08	12
-310	val_310	2008-04-08	12
-311	val_311	2008-04-08	12
-315	val_315	2008-04-08	12
-316	val_316	2008-04-08	12
-317	val_317	2008-04-08	12
-318	val_318	2008-04-08	12
-321	val_321	2008-04-08	12
-322	val_322	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-327	val_327	2008-04-08	12
-331	val_331	2008-04-08	12
-332	val_332	2008-04-08	12
-333	val_333	2008-04-08	12
-335	val_335	2008-04-08	12
-336	val_336	2008-04-08	12
-338	val_338	2008-04-08	12
-339	val_339	2008-04-08	12
-341	val_341	2008-04-08	12
-342	val_342	2008-04-08	12
-344	val_344	2008-04-08	12
-345	val_345	2008-04-08	12
-348	val_348	2008-04-08	12
-351	val_351	2008-04-08	12
-353	val_353	2008-04-08	12
-356	val_356	2008-04-08	12
-360	val_360	2008-04-08	12
-362	val_362	2008-04-08	12
-364	val_364	2008-04-08	12
-365	val_365	2008-04-08	12
-366	val_366	2008-04-08	12
-367	val_367	2008-04-08	12
-368	val_368	2008-04-08	12
-369	val_369	2008-04-08	12
-373	val_373	2008-04-08	12
-374	val_374	2008-04-08	12
-375	val_375	2008-04-08	12
-377	val_377	2008-04-08	12
-378	val_378	2008-04-08	12
-379	val_379	2008-04-08	12
-382	val_382	2008-04-08	12
-384	val_384	2008-04-08	12
-386	val_386	2008-04-08	12
-389	val_389	2008-04-08	12
-392	val_392	2008-04-08	12
-393	val_393	2008-04-08	12
-394	val_394	2008-04-08	12
-395	val_395	2008-04-08	12
-396	val_396	2008-04-08	12
-397	val_397	2008-04-08	12
-399	val_399	2008-04-08	12
-400	val_400	2008-04-08	12
-401	val_401	2008-04-08	12
-402	val_402	2008-04-08	12
-403	val_403	2008-04-08	12
-404	val_404	2008-04-08	12
-406	val_406	2008-04-08	12
-407	val_407	2008-04-08	12
-409	val_409	2008-04-08	12
-411	val_411	2008-04-08	12
-413	val_413	2008-04-08	12
-414	val_414	2008-04-08	12
-417	val_417	2008-04-08	12
-418	val_418	2008-04-08	12
-419	val_419	2008-04-08	12
-421	val_421	2008-04-08	12
-424	val_424	2008-04-08	12
-427	val_427	2008-04-08	12
-429	val_429	2008-04-08	12
-430	val_430	2008-04-08	12
-431	val_431	2008-04-08	12
-432	val_432	2008-04-08	12
-435	val_435	2008-04-08	12
-436	val_436	2008-04-08	12
-437	val_437	2008-04-08	12
-438	val_438	2008-04-08	12
-439	val_439	2008-04-08	12
-443	val_443	2008-04-08	12
-444	val_444	2008-04-08	12
-446	val_446	2008-04-08	12
-448	val_448	2008-04-08	12
-449	val_449	2008-04-08	12
-452	val_452	2008-04-08	12
-453	val_453	2008-04-08	12
-454	val_454	2008-04-08	12
-455	val_455	2008-04-08	12
-457	val_457	2008-04-08	12
-458	val_458	2008-04-08	12
-459	val_459	2008-04-08	12
-460	val_460	2008-04-08	12
-462	val_462	2008-04-08	12
-463	val_463	2008-04-08	12
-466	val_466	2008-04-08	12
-467	val_467	2008-04-08	12
-468	val_468	2008-04-08	12
-469	val_469	2008-04-08	12
-470	val_470	2008-04-08	12
-472	val_472	2008-04-08	12
-475	val_475	2008-04-08	12
-477	val_477	2008-04-08	12
-478	val_478	2008-04-08	12
-479	val_479	2008-04-08	12
-480	val_480	2008-04-08	12
-481	val_481	2008-04-08	12
-482	val_482	2008-04-08	12
-483	val_483	2008-04-08	12
-484	val_484	2008-04-08	12
-485	val_485	2008-04-08	12
-487	val_487	2008-04-08	12
-489	val_489	2008-04-08	12
-490	val_490	2008-04-08	12
-491	val_491	2008-04-08	12
-492	val_492	2008-04-08	12
-493	val_493	2008-04-08	12
-494	val_494	2008-04-08	12
-495	val_495	2008-04-08	12
-496	val_496	2008-04-08	12
-497	val_497	2008-04-08	12
-498	val_498	2008-04-08	12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-10-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/multiMapJoin1-10-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-10-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-10-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-12-204073e1287b0582d50e652d466f1e66 b/sql/hive/src/test/resources/golden/multiMapJoin1-12-204073e1287b0582d50e652d466f1e66
index 55b22b1aa9f24..4eb9d0d9dbd22 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-12-204073e1287b0582d50e652d466f1e66
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-12-204073e1287b0582d50e652d466f1e66
@@ -1 +1 @@
-580
\ No newline at end of file
+580
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-13-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/multiMapJoin1-13-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-13-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-13-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-14-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/multiMapJoin1-14-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-14-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-14-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-16-c14b300770b329ecb71e0275c88532d3 b/sql/hive/src/test/resources/golden/multiMapJoin1-16-c14b300770b329ecb71e0275c88532d3
index 55b22b1aa9f24..4eb9d0d9dbd22 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-16-c14b300770b329ecb71e0275c88532d3
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-16-c14b300770b329ecb71e0275c88532d3
@@ -1 +1 @@
-580
\ No newline at end of file
+580
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-18-49bc7f430b2591978067ca8f7d181cee b/sql/hive/src/test/resources/golden/multiMapJoin1-18-49bc7f430b2591978067ca8f7d181cee
index fa8ef14301a13..7eb898446847a 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-18-49bc7f430b2591978067ca8f7d181cee
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-18-49bc7f430b2591978067ca8f7d181cee
@@ -3,4 +3,4 @@
 10
 270
 10
-10
\ No newline at end of file
+10
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-22-25e434b6d05e08fdd5f4d9957438917 b/sql/hive/src/test/resources/golden/multiMapJoin1-22-25e434b6d05e08fdd5f4d9957438917
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-22-25e434b6d05e08fdd5f4d9957438917
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-22-25e434b6d05e08fdd5f4d9957438917
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-24-feed626e3216bcbda66b17f48305b5a1 b/sql/hive/src/test/resources/golden/multiMapJoin1-24-feed626e3216bcbda66b17f48305b5a1
index a3a38a80b7910..654cce85277c0 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-24-feed626e3216bcbda66b17f48305b5a1
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-24-feed626e3216bcbda66b17f48305b5a1
@@ -1 +1 @@
-247580	247580	247580	247580	247580	247580	548662743780	548662743780
\ No newline at end of file
+247580	247580	247580	247580	247580	247580	548662743780	548662743780
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-25-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/multiMapJoin1-25-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-25-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-25-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-26-25fc734982956a164adde6bb1d4d8751 b/sql/hive/src/test/resources/golden/multiMapJoin1-26-25fc734982956a164adde6bb1d4d8751
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-26-25fc734982956a164adde6bb1d4d8751
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-26-25fc734982956a164adde6bb1d4d8751
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-28-feed626e3216bcbda66b17f48305b5a1 b/sql/hive/src/test/resources/golden/multiMapJoin1-28-feed626e3216bcbda66b17f48305b5a1
index a3a38a80b7910..654cce85277c0 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-28-feed626e3216bcbda66b17f48305b5a1
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-28-feed626e3216bcbda66b17f48305b5a1
@@ -1 +1 @@
-247580	247580	247580	247580	247580	247580	548662743780	548662743780
\ No newline at end of file
+247580	247580	247580	247580	247580	247580	548662743780	548662743780
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-29-ea23403b9eb55e8b06d1c198e439569f b/sql/hive/src/test/resources/golden/multiMapJoin1-29-ea23403b9eb55e8b06d1c198e439569f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-29-ea23403b9eb55e8b06d1c198e439569f
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-29-ea23403b9eb55e8b06d1c198e439569f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-31-feed626e3216bcbda66b17f48305b5a1 b/sql/hive/src/test/resources/golden/multiMapJoin1-31-feed626e3216bcbda66b17f48305b5a1
index a3a38a80b7910..654cce85277c0 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-31-feed626e3216bcbda66b17f48305b5a1
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-31-feed626e3216bcbda66b17f48305b5a1
@@ -1 +1 @@
-247580	247580	247580	247580	247580	247580	548662743780	548662743780
\ No newline at end of file
+247580	247580	247580	247580	247580	247580	548662743780	548662743780
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-32-e93301ee4ba157b466d7460775f3d350 b/sql/hive/src/test/resources/golden/multiMapJoin1-32-e93301ee4ba157b466d7460775f3d350
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-32-e93301ee4ba157b466d7460775f3d350
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-32-e93301ee4ba157b466d7460775f3d350
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin1-34-feed626e3216bcbda66b17f48305b5a1 b/sql/hive/src/test/resources/golden/multiMapJoin1-34-feed626e3216bcbda66b17f48305b5a1
index a3a38a80b7910..654cce85277c0 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin1-34-feed626e3216bcbda66b17f48305b5a1
+++ b/sql/hive/src/test/resources/golden/multiMapJoin1-34-feed626e3216bcbda66b17f48305b5a1
@@ -1 +1 @@
-247580	247580	247580	247580	247580	247580	548662743780	548662743780
\ No newline at end of file
+247580	247580	247580	247580	247580	247580	548662743780	548662743780
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-0-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/multiMapJoin2-0-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-0-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-0-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-1-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/multiMapJoin2-1-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-1-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-1-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-10-1905c7759350b107679aef86226739f8 b/sql/hive/src/test/resources/golden/multiMapJoin2-10-1905c7759350b107679aef86226739f8
index 3c505d9663010..d5bad34b0246e 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-10-1905c7759350b107679aef86226739f8
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-10-1905c7759350b107679aef86226739f8
@@ -50,4 +50,4 @@ NULL
 406
 406
 406
-406
\ No newline at end of file
+406
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-11-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/multiMapJoin2-11-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-11-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-11-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-13-6b984427a771fe650fa875be98722cbe b/sql/hive/src/test/resources/golden/multiMapJoin2-13-6b984427a771fe650fa875be98722cbe
index a877195ef3ab1..d00aeb4be0340 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-13-6b984427a771fe650fa875be98722cbe
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-13-6b984427a771fe650fa875be98722cbe
@@ -12,4 +12,4 @@
 311	1
 369	1
 401	1
-406	1
\ No newline at end of file
+406	1
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-14-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/multiMapJoin2-14-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-14-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-14-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-16-6b984427a771fe650fa875be98722cbe b/sql/hive/src/test/resources/golden/multiMapJoin2-16-6b984427a771fe650fa875be98722cbe
index a877195ef3ab1..d00aeb4be0340 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-16-6b984427a771fe650fa875be98722cbe
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-16-6b984427a771fe650fa875be98722cbe
@@ -12,4 +12,4 @@
 311	1
 369	1
 401	1
-406	1
\ No newline at end of file
+406	1
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-17-b9d963d24994c47c3776dda6f7d3881f b/sql/hive/src/test/resources/golden/multiMapJoin2-17-b9d963d24994c47c3776dda6f7d3881f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-17-b9d963d24994c47c3776dda6f7d3881f
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-17-b9d963d24994c47c3776dda6f7d3881f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-19-77324702b091d514ca16d029f65d3d56 b/sql/hive/src/test/resources/golden/multiMapJoin2-19-77324702b091d514ca16d029f65d3d56
index a877195ef3ab1..d00aeb4be0340 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-19-77324702b091d514ca16d029f65d3d56
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-19-77324702b091d514ca16d029f65d3d56
@@ -12,4 +12,4 @@
 311	1
 369	1
 401	1
-406	1
\ No newline at end of file
+406	1
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-2-7f7f3daa7ff45944c3d534f9feff3bb6 b/sql/hive/src/test/resources/golden/multiMapJoin2-2-7f7f3daa7ff45944c3d534f9feff3bb6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-2-7f7f3daa7ff45944c3d534f9feff3bb6
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-2-7f7f3daa7ff45944c3d534f9feff3bb6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-20-777edd9d575f3480ca6cebe4be57b1f6 b/sql/hive/src/test/resources/golden/multiMapJoin2-20-777edd9d575f3480ca6cebe4be57b1f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-20-777edd9d575f3480ca6cebe4be57b1f6
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-20-777edd9d575f3480ca6cebe4be57b1f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-22-77324702b091d514ca16d029f65d3d56 b/sql/hive/src/test/resources/golden/multiMapJoin2-22-77324702b091d514ca16d029f65d3d56
index a877195ef3ab1..d00aeb4be0340 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-22-77324702b091d514ca16d029f65d3d56
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-22-77324702b091d514ca16d029f65d3d56
@@ -12,4 +12,4 @@
 311	1
 369	1
 401	1
-406	1
\ No newline at end of file
+406	1
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-27-d28d0f671f5d913a56d75812d24cca8e b/sql/hive/src/test/resources/golden/multiMapJoin2-27-d28d0f671f5d913a56d75812d24cca8e
index 25bf17fc5aaab..3c032078a4a21 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-27-d28d0f671f5d913a56d75812d24cca8e
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-27-d28d0f671f5d913a56d75812d24cca8e
@@ -1 +1 @@
-18
\ No newline at end of file
+18
diff --git a/sql/hive/src/test/resources/golden/exim_18_part_external-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/multiMapJoin2-28-c14792ccac2ca64e3e9e21af4fd12d2c
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_18_part_external-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/multiMapJoin2-28-c14792ccac2ca64e3e9e21af4fd12d2c
diff --git a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/multiMapJoin2-29-b9d963d24994c47c3776dda6f7d3881f
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_00_part_external_location-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/multiMapJoin2-29-b9d963d24994c47c3776dda6f7d3881f
diff --git a/sql/hive/src/test/resources/golden/filter_join_breaktask2-6-4aaf479be27e3f8b38465ea946b530e3 b/sql/hive/src/test/resources/golden/multiMapJoin2-30-6d1c7f7014fc709148b0f401c5f23cb3
similarity index 100%
rename from sql/hive/src/test/resources/golden/filter_join_breaktask2-6-4aaf479be27e3f8b38465ea946b530e3
rename to sql/hive/src/test/resources/golden/multiMapJoin2-30-6d1c7f7014fc709148b0f401c5f23cb3
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-31-2e9c4d5e2bd709e96f311488ada116b0 b/sql/hive/src/test/resources/golden/multiMapJoin2-31-2e9c4d5e2bd709e96f311488ada116b0
new file mode 100644
index 0000000000000..d07c26f55176d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-31-2e9c4d5e2bd709e96f311488ada116b0
@@ -0,0 +1,1000 @@
+0
+0
+0
+2
+4
+5
+5
+5
+8
+9
+10
+11
+12
+12
+15
+15
+17
+18
+18
+19
+20
+24
+24
+26
+26
+27
+28
+30
+33
+34
+35
+35
+35
+37
+37
+41
+42
+42
+43
+44
+47
+51
+51
+53
+54
+57
+58
+58
+64
+65
+66
+67
+67
+69
+70
+70
+70
+72
+72
+74
+76
+76
+77
+78
+80
+82
+83
+83
+84
+84
+85
+86
+87
+90
+90
+90
+92
+95
+95
+96
+97
+97
+98
+98
+100
+100
+103
+103
+104
+104
+105
+111
+113
+113
+114
+116
+118
+118
+119
+119
+119
+120
+120
+125
+125
+126
+128
+128
+128
+129
+129
+131
+133
+134
+134
+136
+137
+137
+138
+138
+138
+138
+143
+145
+146
+146
+149
+149
+150
+152
+152
+153
+155
+156
+157
+158
+160
+162
+163
+164
+164
+165
+165
+166
+167
+167
+167
+168
+169
+169
+169
+169
+170
+172
+172
+174
+174
+175
+175
+176
+176
+177
+178
+179
+179
+180
+181
+183
+186
+187
+187
+187
+189
+190
+191
+191
+192
+193
+193
+193
+194
+195
+195
+196
+197
+197
+199
+199
+199
+200
+200
+201
+202
+203
+203
+205
+205
+207
+207
+208
+208
+208
+209
+209
+213
+213
+214
+216
+216
+217
+217
+218
+219
+219
+221
+221
+222
+223
+223
+224
+224
+226
+228
+229
+229
+230
+230
+230
+230
+230
+233
+233
+235
+237
+237
+238
+238
+239
+239
+241
+242
+242
+244
+247
+248
+249
+252
+255
+255
+256
+256
+257
+258
+260
+262
+263
+265
+265
+266
+272
+272
+273
+273
+273
+274
+275
+277
+277
+277
+277
+278
+278
+280
+280
+281
+281
+282
+282
+283
+284
+285
+286
+287
+288
+288
+289
+291
+292
+296
+298
+298
+298
+302
+305
+306
+307
+307
+308
+309
+309
+310
+311
+311
+311
+315
+316
+316
+316
+317
+317
+318
+318
+318
+321
+321
+322
+322
+323
+325
+325
+327
+327
+327
+331
+331
+332
+333
+333
+335
+336
+338
+339
+341
+342
+342
+344
+344
+345
+348
+348
+348
+348
+348
+351
+353
+353
+356
+360
+362
+364
+365
+366
+367
+367
+368
+369
+369
+369
+373
+374
+375
+377
+378
+379
+382
+382
+384
+384
+384
+386
+389
+392
+393
+394
+395
+395
+396
+396
+396
+397
+397
+399
+399
+400
+401
+401
+401
+401
+401
+402
+403
+403
+403
+404
+404
+406
+406
+406
+406
+407
+409
+409
+409
+411
+413
+413
+414
+414
+417
+417
+417
+418
+419
+421
+424
+424
+427
+429
+429
+430
+430
+430
+431
+431
+431
+432
+435
+436
+437
+438
+438
+438
+439
+439
+443
+444
+446
+448
+449
+452
+453
+454
+454
+454
+455
+457
+458
+458
+459
+459
+460
+462
+462
+463
+463
+466
+466
+466
+467
+468
+468
+468
+468
+469
+469
+469
+469
+469
+470
+472
+475
+477
+478
+478
+479
+480
+480
+480
+481
+482
+483
+484
+485
+487
+489
+489
+489
+489
+490
+491
+492
+492
+493
+494
+495
+496
+497
+498
+498
+498
+0
+0
+0
+2
+4
+5
+5
+5
+8
+9
+10
+11
+12
+12
+15
+15
+17
+18
+18
+19
+20
+24
+24
+26
+26
+27
+28
+30
+33
+34
+35
+35
+35
+37
+37
+41
+42
+42
+43
+44
+47
+51
+51
+53
+54
+57
+58
+58
+64
+65
+66
+67
+67
+69
+70
+70
+70
+72
+72
+74
+76
+76
+77
+78
+80
+82
+83
+83
+84
+84
+85
+86
+87
+90
+90
+90
+92
+95
+95
+96
+97
+97
+98
+98
+100
+100
+103
+103
+104
+104
+105
+111
+113
+113
+114
+116
+118
+118
+119
+119
+119
+120
+120
+125
+125
+126
+128
+128
+128
+129
+129
+131
+133
+134
+134
+136
+137
+137
+138
+138
+138
+138
+143
+145
+146
+146
+149
+149
+150
+152
+152
+153
+155
+156
+157
+158
+160
+162
+163
+164
+164
+165
+165
+166
+167
+167
+167
+168
+169
+169
+169
+169
+170
+172
+172
+174
+174
+175
+175
+176
+176
+177
+178
+179
+179
+180
+181
+183
+186
+187
+187
+187
+189
+190
+191
+191
+192
+193
+193
+193
+194
+195
+195
+196
+197
+197
+199
+199
+199
+200
+200
+201
+202
+203
+203
+205
+205
+207
+207
+208
+208
+208
+209
+209
+213
+213
+214
+216
+216
+217
+217
+218
+219
+219
+221
+221
+222
+223
+223
+224
+224
+226
+228
+229
+229
+230
+230
+230
+230
+230
+233
+233
+235
+237
+237
+238
+238
+239
+239
+241
+242
+242
+244
+247
+248
+249
+252
+255
+255
+256
+256
+257
+258
+260
+262
+263
+265
+265
+266
+272
+272
+273
+273
+273
+274
+275
+277
+277
+277
+277
+278
+278
+280
+280
+281
+281
+282
+282
+283
+284
+285
+286
+287
+288
+288
+289
+291
+292
+296
+298
+298
+298
+302
+305
+306
+307
+307
+308
+309
+309
+310
+311
+311
+311
+315
+316
+316
+316
+317
+317
+318
+318
+318
+321
+321
+322
+322
+323
+325
+325
+327
+327
+327
+331
+331
+332
+333
+333
+335
+336
+338
+339
+341
+342
+342
+344
+344
+345
+348
+348
+348
+348
+348
+351
+353
+353
+356
+360
+362
+364
+365
+366
+367
+367
+368
+369
+369
+369
+373
+374
+375
+377
+378
+379
+382
+382
+384
+384
+384
+386
+389
+392
+393
+394
+395
+395
+396
+396
+396
+397
+397
+399
+399
+400
+401
+401
+401
+401
+401
+402
+403
+403
+403
+404
+404
+406
+406
+406
+406
+407
+409
+409
+409
+411
+413
+413
+414
+414
+417
+417
+417
+418
+419
+421
+424
+424
+427
+429
+429
+430
+430
+430
+431
+431
+431
+432
+435
+436
+437
+438
+438
+438
+439
+439
+443
+444
+446
+448
+449
+452
+453
+454
+454
+454
+455
+457
+458
+458
+459
+459
+460
+462
+462
+463
+463
+466
+466
+466
+467
+468
+468
+468
+468
+469
+469
+469
+469
+469
+470
+472
+475
+477
+478
+478
+479
+480
+480
+480
+481
+482
+483
+484
+485
+487
+489
+489
+489
+489
+490
+491
+492
+492
+493
+494
+495
+496
+497
+498
+498
+498
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-4-5ede8243cc4ba2fbd24a77578502a656 b/sql/hive/src/test/resources/golden/multiMapJoin2-4-5ede8243cc4ba2fbd24a77578502a656
index 8984af9eb83dd..be996088edb51 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-4-5ede8243cc4ba2fbd24a77578502a656
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-4-5ede8243cc4ba2fbd24a77578502a656
@@ -71,4 +71,4 @@
 406
 406
 406
-406
\ No newline at end of file
+406
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-5-21f96f651fb4f28ae5e2999fc1f82192 b/sql/hive/src/test/resources/golden/multiMapJoin2-5-21f96f651fb4f28ae5e2999fc1f82192
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-5-21f96f651fb4f28ae5e2999fc1f82192
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-5-21f96f651fb4f28ae5e2999fc1f82192
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-7-5ede8243cc4ba2fbd24a77578502a656 b/sql/hive/src/test/resources/golden/multiMapJoin2-7-5ede8243cc4ba2fbd24a77578502a656
index 8984af9eb83dd..be996088edb51 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-7-5ede8243cc4ba2fbd24a77578502a656
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-7-5ede8243cc4ba2fbd24a77578502a656
@@ -71,4 +71,4 @@
 406
 406
 406
-406
\ No newline at end of file
+406
diff --git a/sql/hive/src/test/resources/golden/multiMapJoin2-8-7f7f3daa7ff45944c3d534f9feff3bb6 b/sql/hive/src/test/resources/golden/multiMapJoin2-8-7f7f3daa7ff45944c3d534f9feff3bb6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multiMapJoin2-8-7f7f3daa7ff45944c3d534f9feff3bb6
+++ b/sql/hive/src/test/resources/golden/multiMapJoin2-8-7f7f3daa7ff45944c3d534f9feff3bb6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby-4-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/multi_insert_gby-4-521e0c1054cfa35116c02245874a4e69
index 0e3cc2ad6a4e6..891c047cfaa8a 100644
--- a/sql/hive/src/test/resources/golden/multi_insert_gby-4-521e0c1054cfa35116c02245874a4e69
+++ b/sql/hive/src/test/resources/golden/multi_insert_gby-4-521e0c1054cfa35116c02245874a4e69
@@ -34,4 +34,4 @@
 495	1
 496	1
 497	1
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby-8-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/multi_insert_gby-8-521e0c1054cfa35116c02245874a4e69
index 0e3cc2ad6a4e6..891c047cfaa8a 100644
--- a/sql/hive/src/test/resources/golden/multi_insert_gby-8-521e0c1054cfa35116c02245874a4e69
+++ b/sql/hive/src/test/resources/golden/multi_insert_gby-8-521e0c1054cfa35116c02245874a4e69
@@ -34,4 +34,4 @@
 495	1
 496	1
 497	1
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby-9-a9a491ed99b4629ee15bce994d5c6c63 b/sql/hive/src/test/resources/golden/multi_insert_gby-9-a9a491ed99b4629ee15bce994d5c6c63
index df07a9da29f01..93e965c771403 100644
--- a/sql/hive/src/test/resources/golden/multi_insert_gby-9-a9a491ed99b4629ee15bce994d5c6c63
+++ b/sql/hive/src/test/resources/golden/multi_insert_gby-9-a9a491ed99b4629ee15bce994d5c6c63
@@ -306,4 +306,4 @@
 495	1
 496	1
 497	1
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby2-0-b3ee4be40513342084411c5333416d69 b/sql/hive/src/test/resources/golden/multi_insert_gby2-0-b3ee4be40513342084411c5333416d69
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby2-1-2fd65cd047d2295eadfc4935cf67ddf7 b/sql/hive/src/test/resources/golden/multi_insert_gby2-1-2fd65cd047d2295eadfc4935cf67ddf7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby2-2-fc8c8df72e2136557a4cad9a78e921d2 b/sql/hive/src/test/resources/golden/multi_insert_gby2-2-fc8c8df72e2136557a4cad9a78e921d2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby2-3-538a02b95c066b307652c8d503470c8e b/sql/hive/src/test/resources/golden/multi_insert_gby2-3-538a02b95c066b307652c8d503470c8e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby2-4-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/multi_insert_gby2-4-521e0c1054cfa35116c02245874a4e69
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/multi_insert_gby2-4-521e0c1054cfa35116c02245874a4e69
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby2-5-a9a491ed99b4629ee15bce994d5c6c63 b/sql/hive/src/test/resources/golden/multi_insert_gby2-5-a9a491ed99b4629ee15bce994d5c6c63
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/multi_insert_gby2-5-a9a491ed99b4629ee15bce994d5c6c63
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby3-10-a9a491ed99b4629ee15bce994d5c6c63 b/sql/hive/src/test/resources/golden/multi_insert_gby3-10-a9a491ed99b4629ee15bce994d5c6c63
index e8804e62d0aaf..3c5c916a9fbcc 100644
--- a/sql/hive/src/test/resources/golden/multi_insert_gby3-10-a9a491ed99b4629ee15bce994d5c6c63
+++ b/sql/hive/src/test/resources/golden/multi_insert_gby3-10-a9a491ed99b4629ee15bce994d5c6c63
@@ -306,4 +306,4 @@
 495	495.0	val_495
 496	496.0	val_496
 497	497.0	val_497
-498	1494.0	val_498
\ No newline at end of file
+498	1494.0	val_498
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby3-6-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/multi_insert_gby3-6-521e0c1054cfa35116c02245874a4e69
index fc0b77c7a3628..4494c27e47f59 100644
--- a/sql/hive/src/test/resources/golden/multi_insert_gby3-6-521e0c1054cfa35116c02245874a4e69
+++ b/sql/hive/src/test/resources/golden/multi_insert_gby3-6-521e0c1054cfa35116c02245874a4e69
@@ -306,4 +306,4 @@
 495	1.0
 496	1.0
 497	1.0
-498	1.0
\ No newline at end of file
+498	1.0
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby3-7-a9a491ed99b4629ee15bce994d5c6c63 b/sql/hive/src/test/resources/golden/multi_insert_gby3-7-a9a491ed99b4629ee15bce994d5c6c63
index e8804e62d0aaf..3c5c916a9fbcc 100644
--- a/sql/hive/src/test/resources/golden/multi_insert_gby3-7-a9a491ed99b4629ee15bce994d5c6c63
+++ b/sql/hive/src/test/resources/golden/multi_insert_gby3-7-a9a491ed99b4629ee15bce994d5c6c63
@@ -306,4 +306,4 @@
 495	495.0	val_495
 496	496.0	val_496
 497	497.0	val_497
-498	1494.0	val_498
\ No newline at end of file
+498	1494.0	val_498
diff --git a/sql/hive/src/test/resources/golden/multi_insert_gby3-9-521e0c1054cfa35116c02245874a4e69 b/sql/hive/src/test/resources/golden/multi_insert_gby3-9-521e0c1054cfa35116c02245874a4e69
index fc0b77c7a3628..4494c27e47f59 100644
--- a/sql/hive/src/test/resources/golden/multi_insert_gby3-9-521e0c1054cfa35116c02245874a4e69
+++ b/sql/hive/src/test/resources/golden/multi_insert_gby3-9-521e0c1054cfa35116c02245874a4e69
@@ -306,4 +306,4 @@
 495	1.0
 496	1.0
 497	1.0
-498	1.0
\ No newline at end of file
+498	1.0
diff --git a/sql/hive/src/test/resources/golden/multi_join_union-0-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/multi_join_union-0-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/multi_join_union-0-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/multi_join_union-0-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nested_complex-0-6a7c4841dab05ebae84309c9571bec6 b/sql/hive/src/test/resources/golden/nested_complex-0-6a7c4841dab05ebae84309c9571bec6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nested_complex-1-abac744dee1a4f4152781b1565fe1364 b/sql/hive/src/test/resources/golden/nested_complex-1-abac744dee1a4f4152781b1565fe1364
deleted file mode 100644
index c10c17978a0d8..0000000000000
--- a/sql/hive/src/test/resources/golden/nested_complex-1-abac744dee1a4f4152781b1565fe1364
+++ /dev/null
@@ -1,5 +0,0 @@
-simple_int          	int                 	None                
-max_nested_array    	array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<int>>>>>>>>>>>>>>>>>>>>>>>	None                
-max_nested_map      	array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<map<string,string>>>>>>>>>>>>>>>>>>>>>>	None                
-max_nested_struct   	array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<struct<s:string,i:bigint>>>>>>>>>>>>>>>>>>>>>>>	None                
-simple_string       	string              	None                
diff --git a/sql/hive/src/test/resources/golden/nested_complex-2-f07e9814229ed440bd0ac7e4bd924ce9 b/sql/hive/src/test/resources/golden/nested_complex-2-f07e9814229ed440bd0ac7e4bd924ce9
deleted file mode 100644
index 8ce70d8c71782..0000000000000
--- a/sql/hive/src/test/resources/golden/nested_complex-2-f07e9814229ed440bd0ac7e4bd924ce9
+++ /dev/null
@@ -1,8 +0,0 @@
-simple_int          	int                 	None                
-max_nested_array    	array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<int>>>>>>>>>>>>>>>>>>>>>>>	None                
-max_nested_map      	array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<map<string,string>>>>>>>>>>>>>>>>>>>>>>	None                
-max_nested_struct   	array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<struct<s:string,i:bigint>>>>>>>>>>>>>>>>>>>>>>>	None                
-simple_string       	string              	None                
-	 	 
-Detailed Table Information	Table(tableName:nestedcomplex, dbName:default, owner:marmbrus, createTime:1391226936, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:simple_int, type:int, comment:null), FieldSchema(name:max_nested_array, type:array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<int>>>>>>>>>>>>>>>>>>>>>>>, comment:null), FieldSchema(name:max_nested_map, type:array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<map<string,string>>>>>>>>>>>>>>>>>>>>>>, comment:null), FieldSchema(name:max_nested_struct, type:array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<array<struct<s:string,i:bigint>>>>>>>>>>>>>>>>>>>>>>>, comment:null), FieldSchema(name:simple_string, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7445586986532881162/nestedcomplex, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1, line.delim=	 
-, hive.serialization.extend.nesting.levels=true}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1391226936}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)		 
diff --git a/sql/hive/src/test/resources/golden/nested_complex-3-fb014bc1e9bfd919a3703cf5afb77448 b/sql/hive/src/test/resources/golden/nested_complex-3-fb014bc1e9bfd919a3703cf5afb77448
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nested_complex-4-8fd07a5dec8db8e1618ab5b5f9c05c7d b/sql/hive/src/test/resources/golden/nested_complex-4-8fd07a5dec8db8e1618ab5b5f9c05c7d
deleted file mode 100644
index 228853cffe527..0000000000000
--- a/sql/hive/src/test/resources/golden/nested_complex-4-8fd07a5dec8db8e1618ab5b5f9c05c7d
+++ /dev/null
@@ -1,2 +0,0 @@
-2	[[[[[[[[[[[[[[[[[[[[[[[0,3,2]]]]]]]]]]]]]]]]]]]]]]]	[[[[[[[[[[[[[[[[[[[[[{"k1":"v1","k3":"v3"}]]]]]]]]]]]]]]]]]]]]]	[[[[[[[[[[[[[[[[[[[[[[{"s":"b","i":10}]]]]]]]]]]]]]]]]]]]]]]	2
-3	[[[[[[[[[[[[[[[[[[[[[[[0,1,2]]]]]]]]]]]]]]]]]]]]]]]	[[[[[[[[[[[[[[[[[[[[[{"k1":"v1","k2":"v2"}]]]]]]]]]]]]]]]]]]]]]	[[[[[[[[[[[[[[[[[[[[[[{"s":"a","i":10}]]]]]]]]]]]]]]]]]]]]]]	2
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-0-e9a72fa546e90b8cbbd2e9470450cb56 b/sql/hive/src/test/resources/golden/nestedvirtual-0-e9a72fa546e90b8cbbd2e9470450cb56
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-1-1ea9df2d7af3c79ebef07d6087c8106f b/sql/hive/src/test/resources/golden/nestedvirtual-1-1ea9df2d7af3c79ebef07d6087c8106f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-10-89696914fad2d7b7bfc5b7729a7e7c34 b/sql/hive/src/test/resources/golden/nestedvirtual-10-89696914fad2d7b7bfc5b7729a7e7c34
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-11-a299c8b1a9f8c2772989a5454574f4e5 b/sql/hive/src/test/resources/golden/nestedvirtual-11-a299c8b1a9f8c2772989a5454574f4e5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-12-e9a72fa546e90b8cbbd2e9470450cb56 b/sql/hive/src/test/resources/golden/nestedvirtual-12-e9a72fa546e90b8cbbd2e9470450cb56
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-13-1ea9df2d7af3c79ebef07d6087c8106f b/sql/hive/src/test/resources/golden/nestedvirtual-13-1ea9df2d7af3c79ebef07d6087c8106f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-14-67b834deba21676e02c155b25195a019 b/sql/hive/src/test/resources/golden/nestedvirtual-14-67b834deba21676e02c155b25195a019
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-15-52f90e6bf3f2c17b82ed34318d2c612f b/sql/hive/src/test/resources/golden/nestedvirtual-15-52f90e6bf3f2c17b82ed34318d2c612f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-16-89696914fad2d7b7bfc5b7729a7e7c34 b/sql/hive/src/test/resources/golden/nestedvirtual-16-89696914fad2d7b7bfc5b7729a7e7c34
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-17-a299c8b1a9f8c2772989a5454574f4e5 b/sql/hive/src/test/resources/golden/nestedvirtual-17-a299c8b1a9f8c2772989a5454574f4e5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-2-67b834deba21676e02c155b25195a019 b/sql/hive/src/test/resources/golden/nestedvirtual-2-67b834deba21676e02c155b25195a019
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-3-52f90e6bf3f2c17b82ed34318d2c612f b/sql/hive/src/test/resources/golden/nestedvirtual-3-52f90e6bf3f2c17b82ed34318d2c612f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-4-89696914fad2d7b7bfc5b7729a7e7c34 b/sql/hive/src/test/resources/golden/nestedvirtual-4-89696914fad2d7b7bfc5b7729a7e7c34
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-5-a299c8b1a9f8c2772989a5454574f4e5 b/sql/hive/src/test/resources/golden/nestedvirtual-5-a299c8b1a9f8c2772989a5454574f4e5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-6-e9a72fa546e90b8cbbd2e9470450cb56 b/sql/hive/src/test/resources/golden/nestedvirtual-6-e9a72fa546e90b8cbbd2e9470450cb56
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-7-1ea9df2d7af3c79ebef07d6087c8106f b/sql/hive/src/test/resources/golden/nestedvirtual-7-1ea9df2d7af3c79ebef07d6087c8106f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-8-67b834deba21676e02c155b25195a019 b/sql/hive/src/test/resources/golden/nestedvirtual-8-67b834deba21676e02c155b25195a019
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nestedvirtual-9-52f90e6bf3f2c17b82ed34318d2c612f b/sql/hive/src/test/resources/golden/nestedvirtual-9-52f90e6bf3f2c17b82ed34318d2c612f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/newline-2-4eb54a664e549614d56ca088c8867d b/sql/hive/src/test/resources/golden/newline-2-4eb54a664e549614d56ca088c8867d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/noalias_subq1-1-f91e87acd51f7477d96620b5f9deece6 b/sql/hive/src/test/resources/golden/noalias_subq1-1-f91e87acd51f7477d96620b5f9deece6
index cdcd12dca3ca2..9c4a8fd131407 100644
--- a/sql/hive/src/test/resources/golden/noalias_subq1-1-f91e87acd51f7477d96620b5f9deece6
+++ b/sql/hive/src/test/resources/golden/noalias_subq1-1-f91e87acd51f7477d96620b5f9deece6
@@ -81,4 +81,4 @@ val_84
 val_28
 val_37
 val_90
-val_97
\ No newline at end of file
+val_97
diff --git a/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-1-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-1-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-1-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-1-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-3-a873d2d6991308b21ecdc46ac777c716 b/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-3-a873d2d6991308b21ecdc46ac777c716
index a0cfcf2621f2e..34e8c1127f016 100644
--- a/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-3-a873d2d6991308b21ecdc46ac777c716
+++ b/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-3-a873d2d6991308b21ecdc46ac777c716
@@ -22,4 +22,4 @@ NULL	val_484	25
 311	val_311	25
 369		25
 401	val_401	25
-406	val_406	25
\ No newline at end of file
+406	val_406	25
diff --git a/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-4-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-4-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-4-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-4-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-6-a873d2d6991308b21ecdc46ac777c716 b/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-6-a873d2d6991308b21ecdc46ac777c716
index a0cfcf2621f2e..34e8c1127f016 100644
--- a/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-6-a873d2d6991308b21ecdc46ac777c716
+++ b/sql/hive/src/test/resources/golden/nonblock_op_deduplicate-6-a873d2d6991308b21ecdc46ac777c716
@@ -22,4 +22,4 @@ NULL	val_484	25
 311	val_311	25
 369		25
 401	val_401	25
-406	val_406	25
\ No newline at end of file
+406	val_406	25
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_input37-0-6ed1b2ff177492c003161ee91e982c10 b/sql/hive/src/test/resources/golden/nonreserved_keywords_input37-0-6ed1b2ff177492c003161ee91e982c10
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_input37-1-e3ab2563222fb8678c7c269e09e1e8d8 b/sql/hive/src/test/resources/golden/nonreserved_keywords_input37-1-e3ab2563222fb8678c7c269e09e1e8d8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-0-d3afbe52e3a1eb246a79c1320f82c480 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-0-d3afbe52e3a1eb246a79c1320f82c480
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-1-5c7fc72601c9add7bc86df7e4c24af63 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-1-5c7fc72601c9add7bc86df7e4c24af63
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-10-3c8f9dff0a12ca2b929d04b4873a4681 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-10-3c8f9dff0a12ca2b929d04b4873a4681
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-11-6f59e35684a552a855e4dc3aee667092 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-11-6f59e35684a552a855e4dc3aee667092
deleted file mode 100644
index 24e19ec6caa8f..0000000000000
--- a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-11-6f59e35684a552a855e4dc3aee667092
+++ /dev/null
@@ -1 +0,0 @@
--826625916
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-12-d3afbe52e3a1eb246a79c1320f82c480 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-12-d3afbe52e3a1eb246a79c1320f82c480
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-2-ef259cb012c8547e19dd4a75ac4f8ef5 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-2-ef259cb012c8547e19dd4a75ac4f8ef5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-3-2983d09b973ea94bc701970a17fc3687 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-3-2983d09b973ea94bc701970a17fc3687
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-4-a2f6c6c77e94833197095dc48643f9c9 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-4-a2f6c6c77e94833197095dc48643f9c9
deleted file mode 100644
index 05b0c43f926bd..0000000000000
--- a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-4-a2f6c6c77e94833197095dc48643f9c9
+++ /dev/null
@@ -1 +0,0 @@
-10226524244
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-5-ef259cb012c8547e19dd4a75ac4f8ef5 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-5-ef259cb012c8547e19dd4a75ac4f8ef5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-6-2983d09b973ea94bc701970a17fc3687 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-6-2983d09b973ea94bc701970a17fc3687
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-7-fe194a16b48b763e6efdf6fcc6116296 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-7-fe194a16b48b763e6efdf6fcc6116296
deleted file mode 100644
index bc15b9ca54985..0000000000000
--- a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-7-fe194a16b48b763e6efdf6fcc6116296
+++ /dev/null
@@ -1 +0,0 @@
-20453048488
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-8-ea9a965c3d3c8fb9271d8f7c5eee19ad b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-8-ea9a965c3d3c8fb9271d8f7c5eee19ad
deleted file mode 100644
index 08839f6bb296e..0000000000000
--- a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-8-ea9a965c3d3c8fb9271d8f7c5eee19ad
+++ /dev/null
@@ -1 +0,0 @@
-200
diff --git a/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-9-461110270cec5b6d012b2c685cf2cce9 b/sql/hive/src/test/resources/golden/nonreserved_keywords_insert_into1-9-461110270cec5b6d012b2c685cf2cce9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/notable_alias1-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/notable_alias1-3-adc1ec67836b26b60d8547c4996bfd8f
index fce122031e88f..138522d542895 100644
--- a/sql/hive/src/test/resources/golden/notable_alias1-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/notable_alias1-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -54,4 +54,4 @@
 1234	95	2.0
 1234	96	1.0
 1234	97	2.0
-1234	98	2.0
\ No newline at end of file
+1234	98	2.0
diff --git a/sql/hive/src/test/resources/golden/notable_alias2-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/notable_alias2-3-adc1ec67836b26b60d8547c4996bfd8f
index fce122031e88f..138522d542895 100644
--- a/sql/hive/src/test/resources/golden/notable_alias2-3-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/notable_alias2-3-adc1ec67836b26b60d8547c4996bfd8f
@@ -54,4 +54,4 @@
 1234	95	2.0
 1234	96	1.0
 1234	97	2.0
-1234	98	2.0
\ No newline at end of file
+1234	98	2.0
diff --git a/sql/hive/src/test/resources/golden/null_cast-0-48a80d4fdc8009234af31ebcb6e03364 b/sql/hive/src/test/resources/golden/null_cast-0-48a80d4fdc8009234af31ebcb6e03364
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/null_cast-1-7257e6f8170e545962d27741353f672c b/sql/hive/src/test/resources/golden/null_cast-1-7257e6f8170e545962d27741353f672c
deleted file mode 100644
index d111428eaabb0..0000000000000
--- a/sql/hive/src/test/resources/golden/null_cast-1-7257e6f8170e545962d27741353f672c
+++ /dev/null
@@ -1 +0,0 @@
-[null,0]	[null,[]]	[null,{}]	[null,{"col1":0}]
diff --git a/sql/hive/src/test/resources/golden/nullgroup-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/nullgroup-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/nullgroup-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/nullgroup-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/nullgroup-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-11-54a5fd76cdeff565c8c7724695aca302 b/sql/hive/src/test/resources/golden/nullgroup-11-54a5fd76cdeff565c8c7724695aca302
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-11-54a5fd76cdeff565c8c7724695aca302
+++ b/sql/hive/src/test/resources/golden/nullgroup-11-54a5fd76cdeff565c8c7724695aca302
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-12-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/nullgroup-12-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-12-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/nullgroup-12-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-13-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/nullgroup-13-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-13-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/nullgroup-13-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-15-54a5fd76cdeff565c8c7724695aca302 b/sql/hive/src/test/resources/golden/nullgroup-15-54a5fd76cdeff565c8c7724695aca302
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-15-54a5fd76cdeff565c8c7724695aca302
+++ b/sql/hive/src/test/resources/golden/nullgroup-15-54a5fd76cdeff565c8c7724695aca302
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-3-54a5fd76cdeff565c8c7724695aca302 b/sql/hive/src/test/resources/golden/nullgroup-3-54a5fd76cdeff565c8c7724695aca302
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-3-54a5fd76cdeff565c8c7724695aca302
+++ b/sql/hive/src/test/resources/golden/nullgroup-3-54a5fd76cdeff565c8c7724695aca302
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-4-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/nullgroup-4-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-4-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/nullgroup-4-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-5-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/nullgroup-5-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-5-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/nullgroup-5-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-7-54a5fd76cdeff565c8c7724695aca302 b/sql/hive/src/test/resources/golden/nullgroup-7-54a5fd76cdeff565c8c7724695aca302
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-7-54a5fd76cdeff565c8c7724695aca302
+++ b/sql/hive/src/test/resources/golden/nullgroup-7-54a5fd76cdeff565c8c7724695aca302
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-8-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/nullgroup-8-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-8-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/nullgroup-8-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup-9-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/nullgroup-9-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup-9-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/nullgroup-9-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup2-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/nullgroup2-0-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup2-0-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/nullgroup2-0-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup2-1-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/nullgroup2-1-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup2-1-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/nullgroup2-1-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup2-12-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/nullgroup2-12-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup2-12-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/nullgroup2-12-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup2-13-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/nullgroup2-13-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup2-13-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/nullgroup2-13-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup2-4-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/nullgroup2-4-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup2-4-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/nullgroup2-4-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup2-5-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/nullgroup2-5-85c4f90b754cd88147d6b74e17d22063
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup2-5-85c4f90b754cd88147d6b74e17d22063
+++ b/sql/hive/src/test/resources/golden/nullgroup2-5-85c4f90b754cd88147d6b74e17d22063
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup2-8-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/nullgroup2-8-67e864faaff4c6b2a8e1c9fbd188bb66
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup2-8-67e864faaff4c6b2a8e1c9fbd188bb66
+++ b/sql/hive/src/test/resources/golden/nullgroup2-8-67e864faaff4c6b2a8e1c9fbd188bb66
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup2-9-c67a488530dc7e20a9e7acf02c14380f b/sql/hive/src/test/resources/golden/nullgroup2-9-c67a488530dc7e20a9e7acf02c14380f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup2-9-c67a488530dc7e20a9e7acf02c14380f
+++ b/sql/hive/src/test/resources/golden/nullgroup2-9-c67a488530dc7e20a9e7acf02c14380f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby1_limit-3-deff149e2f91d6d605a3dccf1ca8bdd5 b/sql/hive/src/test/resources/golden/nullgroup3-1-61ead7f73d525e0d9e21beba91a3d39e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby1_limit-3-deff149e2f91d6d605a3dccf1ca8bdd5
rename to sql/hive/src/test/resources/golden/nullgroup3-1-61ead7f73d525e0d9e21beba91a3d39e
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-1-a4d6d87d12f0e353d7a3ae5c087dab44 b/sql/hive/src/test/resources/golden/nullgroup3-1-a4d6d87d12f0e353d7a3ae5c087dab44
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby2_limit-1-d6db5e2b44be5a3927eab70e4cf60c70 b/sql/hive/src/test/resources/golden/nullgroup3-12-61ead7f73d525e0d9e21beba91a3d39e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_limit-1-d6db5e2b44be5a3927eab70e4cf60c70
rename to sql/hive/src/test/resources/golden/nullgroup3-12-61ead7f73d525e0d9e21beba91a3d39e
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-12-a4d6d87d12f0e353d7a3ae5c087dab44 b/sql/hive/src/test/resources/golden/nullgroup3-12-a4d6d87d12f0e353d7a3ae5c087dab44
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-3-3aa4057488720c0f514696154f2070b5 b/sql/hive/src/test/resources/golden/nullgroup3-13-a5e12cfbc1799ce9fa9628d81b8c0b06
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-3-3aa4057488720c0f514696154f2070b5
rename to sql/hive/src/test/resources/golden/nullgroup3-13-a5e12cfbc1799ce9fa9628d81b8c0b06
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-13-f529994bef750d8edd02ede8a4323afb b/sql/hive/src/test/resources/golden/nullgroup3-13-f529994bef750d8edd02ede8a4323afb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-15-cd90e16da918bac569e9f04faaedd280 b/sql/hive/src/test/resources/golden/nullgroup3-15-cd90e16da918bac569e9f04faaedd280
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/nullgroup3-15-cd90e16da918bac569e9f04faaedd280
+++ b/sql/hive/src/test/resources/golden/nullgroup3-15-cd90e16da918bac569e9f04faaedd280
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-4-485f8328cdc8639bcdea5fb9f58c8695 b/sql/hive/src/test/resources/golden/nullgroup3-18-113e2b587784d54d2a5b5f091389397e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-4-485f8328cdc8639bcdea5fb9f58c8695
rename to sql/hive/src/test/resources/golden/nullgroup3-18-113e2b587784d54d2a5b5f091389397e
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-18-a24e178327b39f57ca7dfa2e69296bb2 b/sql/hive/src/test/resources/golden/nullgroup3-18-a24e178327b39f57ca7dfa2e69296bb2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-5-23478129b7b6e377dcfb7caaa17dfb48 b/sql/hive/src/test/resources/golden/nullgroup3-19-77de4b2d65eee228848625cdd422178d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-5-23478129b7b6e377dcfb7caaa17dfb48
rename to sql/hive/src/test/resources/golden/nullgroup3-19-77de4b2d65eee228848625cdd422178d
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-19-b8f60d35c1c09c57efd4c339799e5404 b/sql/hive/src/test/resources/golden/nullgroup3-19-b8f60d35c1c09c57efd4c339799e5404
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-3-f64b982c4bf34931f03447e635ae33d2 b/sql/hive/src/test/resources/golden/nullgroup3-2-a5e12cfbc1799ce9fa9628d81b8c0b06
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-3-f64b982c4bf34931f03447e635ae33d2
rename to sql/hive/src/test/resources/golden/nullgroup3-2-a5e12cfbc1799ce9fa9628d81b8c0b06
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-2-f529994bef750d8edd02ede8a4323afb b/sql/hive/src/test/resources/golden/nullgroup3-2-f529994bef750d8edd02ede8a4323afb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-21-e09c6bf2f6112981793fbd4386d43ff6 b/sql/hive/src/test/resources/golden/nullgroup3-21-e09c6bf2f6112981793fbd4386d43ff6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup3-21-e09c6bf2f6112981793fbd4386d43ff6
+++ b/sql/hive/src/test/resources/golden/nullgroup3-21-e09c6bf2f6112981793fbd4386d43ff6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-4-cd90e16da918bac569e9f04faaedd280 b/sql/hive/src/test/resources/golden/nullgroup3-4-cd90e16da918bac569e9f04faaedd280
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/nullgroup3-4-cd90e16da918bac569e9f04faaedd280
+++ b/sql/hive/src/test/resources/golden/nullgroup3-4-cd90e16da918bac569e9f04faaedd280
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-4-2115f3e7d207621ce2b07b6e33563844 b/sql/hive/src/test/resources/golden/nullgroup3-6-113e2b587784d54d2a5b5f091389397e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-4-2115f3e7d207621ce2b07b6e33563844
rename to sql/hive/src/test/resources/golden/nullgroup3-6-113e2b587784d54d2a5b5f091389397e
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-6-a24e178327b39f57ca7dfa2e69296bb2 b/sql/hive/src/test/resources/golden/nullgroup3-6-a24e178327b39f57ca7dfa2e69296bb2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-5-92891db0de9b8cd00892f0a790aff494 b/sql/hive/src/test/resources/golden/nullgroup3-7-77de4b2d65eee228848625cdd422178d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-5-92891db0de9b8cd00892f0a790aff494
rename to sql/hive/src/test/resources/golden/nullgroup3-7-77de4b2d65eee228848625cdd422178d
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-7-b8f60d35c1c09c57efd4c339799e5404 b/sql/hive/src/test/resources/golden/nullgroup3-7-b8f60d35c1c09c57efd4c339799e5404
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nullgroup3-9-e09c6bf2f6112981793fbd4386d43ff6 b/sql/hive/src/test/resources/golden/nullgroup3-9-e09c6bf2f6112981793fbd4386d43ff6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullgroup3-9-e09c6bf2f6112981793fbd4386d43ff6
+++ b/sql/hive/src/test/resources/golden/nullgroup3-9-e09c6bf2f6112981793fbd4386d43ff6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-8-e404ba29e723df8bd8662d4f48129c7a b/sql/hive/src/test/resources/golden/nullgroup5-1-642e12a05bf01a6716bfa15ed0012629
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby7_noskew_multi_single_reducer-8-e404ba29e723df8bd8662d4f48129c7a
rename to sql/hive/src/test/resources/golden/nullgroup5-1-642e12a05bf01a6716bfa15ed0012629
diff --git a/sql/hive/src/test/resources/golden/nullgroup5-1-c75bafea030f127bce9a4a5dbb14f40b b/sql/hive/src/test/resources/golden/nullgroup5-1-c75bafea030f127bce9a4a5dbb14f40b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_complex_types-0-de39f8b5f4305136d945da94d5222283 b/sql/hive/src/test/resources/golden/nullgroup5-3-35517978e09aa1bd1d15f51d11e08fd5
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_complex_types-0-de39f8b5f4305136d945da94d5222283
rename to sql/hive/src/test/resources/golden/nullgroup5-3-35517978e09aa1bd1d15f51d11e08fd5
diff --git a/sql/hive/src/test/resources/golden/nullgroup5-3-4492a9ce0d8502584b872860d53c449c b/sql/hive/src/test/resources/golden/nullgroup5-3-4492a9ce0d8502584b872860d53c449c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/nullgroup5-5-60d7180632a63c79eeba47e30b854f4c b/sql/hive/src/test/resources/golden/nullgroup5-5-60d7180632a63c79eeba47e30b854f4c
index c5c8d29fdd13e..7aae61e5eb82f 100644
--- a/sql/hive/src/test/resources/golden/nullgroup5-5-60d7180632a63c79eeba47e30b854f4c
+++ b/sql/hive/src/test/resources/golden/nullgroup5-5-60d7180632a63c79eeba47e30b854f4c
@@ -497,4 +497,4 @@
 403	val_403
 400	val_400
 200	val_200
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/nullinput2-2-21058230c4992a682c4adef9881fa9a2 b/sql/hive/src/test/resources/golden/nullinput2-2-21058230c4992a682c4adef9881fa9a2
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/nullinput2-2-21058230c4992a682c4adef9881fa9a2
+++ b/sql/hive/src/test/resources/golden/nullinput2-2-21058230c4992a682c4adef9881fa9a2
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/nullscript-1-3729d183a27e89a87ca9b9c9946e13a5 b/sql/hive/src/test/resources/golden/nullscript-1-3729d183a27e89a87ca9b9c9946e13a5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_complex_types-1-10fe6134247226ab2b309bb62460d080 b/sql/hive/src/test/resources/golden/nullscript-1-f07dfd6ef687e038083deca5941d8174
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_complex_types-1-10fe6134247226ab2b309bb62460d080
rename to sql/hive/src/test/resources/golden/nullscript-1-f07dfd6ef687e038083deca5941d8174
diff --git a/sql/hive/src/test/resources/golden/nullscript-2-17238164053203d56d30704e2c098e80 b/sql/hive/src/test/resources/golden/nullscript-2-17238164053203d56d30704e2c098e80
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_complex_types-2-2500cd8e85b71222253a05a979442a4a b/sql/hive/src/test/resources/golden/nullscript-2-bb1abcf2f4a2a5cd5c058104901627bb
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_complex_types-2-2500cd8e85b71222253a05a979442a4a
rename to sql/hive/src/test/resources/golden/nullscript-2-bb1abcf2f4a2a5cd5c058104901627bb
diff --git a/sql/hive/src/test/resources/golden/nullscript-4-472199a0c6b8f760a90863deb69e9710 b/sql/hive/src/test/resources/golden/nullscript-4-472199a0c6b8f760a90863deb69e9710
index d23e05acf7ba5..e34118512c1d7 100644
--- a/sql/hive/src/test/resources/golden/nullscript-4-472199a0c6b8f760a90863deb69e9710
+++ b/sql/hive/src/test/resources/golden/nullscript-4-472199a0c6b8f760a90863deb69e9710
@@ -497,4 +497,4 @@
 403
 400
 200
-97
\ No newline at end of file
+97
diff --git a/sql/hive/src/test/resources/golden/num_op_type_conv-0-c3f17f31d887479896bf454a2f6b15dc b/sql/hive/src/test/resources/golden/num_op_type_conv-0-c3f17f31d887479896bf454a2f6b15dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/num_op_type_conv-1-aad06d01bc3c868b70dbebf88c7e64b8 b/sql/hive/src/test/resources/golden/num_op_type_conv-1-aad06d01bc3c868b70dbebf88c7e64b8
deleted file mode 100644
index da0ddb96957e1..0000000000000
--- a/sql/hive/src/test/resources/golden/num_op_type_conv-1-aad06d01bc3c868b70dbebf88c7e64b8
+++ /dev/null
@@ -1 +0,0 @@
-NULL	NULL	NULL	1	0	0.0
diff --git a/sql/hive/src/test/resources/golden/orc_create-0-9e7ba3cead1b5040ee3c23e8fc235d25 b/sql/hive/src/test/resources/golden/orc_create-0-9e7ba3cead1b5040ee3c23e8fc235d25
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-1-b5209e6f850fc958dc9ebced21519684 b/sql/hive/src/test/resources/golden/orc_create-1-b5209e6f850fc958dc9ebced21519684
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-10-3027edcda6a6030c6bff93681529c34d b/sql/hive/src/test/resources/golden/orc_create-10-3027edcda6a6030c6bff93681529c34d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-11-14ea7dcc6898979aaa61650e3fc46187 b/sql/hive/src/test/resources/golden/orc_create-11-14ea7dcc6898979aaa61650e3fc46187
deleted file mode 100644
index 7541739b48608..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-11-14ea7dcc6898979aaa61650e3fc46187
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 31 19:56:27 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7445586986532881162/orc_create	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1391226987          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/orc_create-12-dd590639ea8d5b27ccf3312be2562fc2 b/sql/hive/src/test/resources/golden/orc_create-12-dd590639ea8d5b27ccf3312be2562fc2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-13-14ea7dcc6898979aaa61650e3fc46187 b/sql/hive/src/test/resources/golden/orc_create-13-14ea7dcc6898979aaa61650e3fc46187
deleted file mode 100644
index 78c0010bebbc6..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-13-14ea7dcc6898979aaa61650e3fc46187
+++ /dev/null
@@ -1,34 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 31 19:56:27 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7445586986532881162/orc_create	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	last_modified_by    	marmbrus            
-	last_modified_time  	1391226987          
-	transient_lastDdlTime	1391226987          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.ql.io.orc.OrcSerde	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.orc.OrcInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/orc_create-14-9e7ba3cead1b5040ee3c23e8fc235d25 b/sql/hive/src/test/resources/golden/orc_create-14-9e7ba3cead1b5040ee3c23e8fc235d25
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-16-3027edcda6a6030c6bff93681529c34d b/sql/hive/src/test/resources/golden/orc_create-16-3027edcda6a6030c6bff93681529c34d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-18-14ea7dcc6898979aaa61650e3fc46187 b/sql/hive/src/test/resources/golden/orc_create-18-14ea7dcc6898979aaa61650e3fc46187
deleted file mode 100644
index 6881640dd3cd4..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-18-14ea7dcc6898979aaa61650e3fc46187
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 31 19:56:27 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7445586986532881162/orc_create	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1391226987          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.ql.io.orc.OrcSerde	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.orc.OrcInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/orc_create-19-a85f67347e5a19fc879ae83aa24c0ce4 b/sql/hive/src/test/resources/golden/orc_create-19-a85f67347e5a19fc879ae83aa24c0ce4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-2-f375f322df98df65e6b1bd0d4ce8e208 b/sql/hive/src/test/resources/golden/orc_create-2-f375f322df98df65e6b1bd0d4ce8e208
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-20-176d469a0edba57404416535c7d48023 b/sql/hive/src/test/resources/golden/orc_create-20-176d469a0edba57404416535c7d48023
deleted file mode 100644
index dd4fa77f326ad..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-20-176d469a0edba57404416535c7d48023
+++ /dev/null
@@ -1,29 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-str                 	string              	from deserializer   
-mp                  	map<string,string>  	from deserializer   
-lst                 	array<string>       	from deserializer   
-strct               	struct<A:string,B:string>	from deserializer   
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 31 19:56:27 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7445586986532881162/orc_create_complex	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1391226987          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.ql.io.orc.OrcSerde	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.orc.OrcInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/orc_create-21-e3accaf8ebc11bac7091db28d01fd2a7 b/sql/hive/src/test/resources/golden/orc_create-21-e3accaf8ebc11bac7091db28d01fd2a7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-22-31944ad765bbf262b4ebafdb06df59a6 b/sql/hive/src/test/resources/golden/orc_create-22-31944ad765bbf262b4ebafdb06df59a6
deleted file mode 100644
index 88c8812029d82..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-22-31944ad765bbf262b4ebafdb06df59a6
+++ /dev/null
@@ -1,3 +0,0 @@
-line1	{"key11":"value11","key12":"value12","key13":"value13"}	["a","b","c"]	{"a":"one","b":"two"}
-line2	{"key21":"value21","key22":"value22","key23":"value23"}	["d","e","f"]	{"a":"three","b":"four"}
-line3	{"key31":"value31","key32":"value32","key33":"value33"}	["g","h","i"]	{"a":"five","b":"six"}
diff --git a/sql/hive/src/test/resources/golden/orc_create-23-be779533ea8967231e644209114c8350 b/sql/hive/src/test/resources/golden/orc_create-23-be779533ea8967231e644209114c8350
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-24-fe59ff341395bc347dfb9cfdee397da2 b/sql/hive/src/test/resources/golden/orc_create-24-fe59ff341395bc347dfb9cfdee397da2
deleted file mode 100644
index 67946888f1baf..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-24-fe59ff341395bc347dfb9cfdee397da2
+++ /dev/null
@@ -1,3 +0,0 @@
-line1	{"key12":"value12","key11":"value11","key13":"value13"}	["a","b","c"]	{"A":"one","B":"two"}
-line2	{"key21":"value21","key23":"value23","key22":"value22"}	["d","e","f"]	{"A":"three","B":"four"}
-line3	{"key33":"value33","key31":"value31","key32":"value32"}	["g","h","i"]	{"A":"five","B":"six"}
diff --git a/sql/hive/src/test/resources/golden/orc_create-25-c55e620d82921c40ebcdb94454ac1ead b/sql/hive/src/test/resources/golden/orc_create-25-c55e620d82921c40ebcdb94454ac1ead
deleted file mode 100644
index 83db48f84ec87..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-25-c55e620d82921c40ebcdb94454ac1ead
+++ /dev/null
@@ -1,3 +0,0 @@
-line1
-line2
-line3
diff --git a/sql/hive/src/test/resources/golden/orc_create-26-aa4e0c8a3de340c1133d9fdddda8a18c b/sql/hive/src/test/resources/golden/orc_create-26-aa4e0c8a3de340c1133d9fdddda8a18c
deleted file mode 100644
index db4876dd3c809..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-26-aa4e0c8a3de340c1133d9fdddda8a18c
+++ /dev/null
@@ -1,3 +0,0 @@
-{"key12":"value12","key11":"value11","key13":"value13"}
-{"key21":"value21","key23":"value23","key22":"value22"}
-{"key33":"value33","key31":"value31","key32":"value32"}
diff --git a/sql/hive/src/test/resources/golden/orc_create-27-2eefc0bb4283fc255409a5c41203c89 b/sql/hive/src/test/resources/golden/orc_create-27-2eefc0bb4283fc255409a5c41203c89
deleted file mode 100644
index 3b2e1cf7e0098..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-27-2eefc0bb4283fc255409a5c41203c89
+++ /dev/null
@@ -1,3 +0,0 @@
-["a","b","c"]
-["d","e","f"]
-["g","h","i"]
diff --git a/sql/hive/src/test/resources/golden/orc_create-28-cf8aa1014707dfe576820041e47436e2 b/sql/hive/src/test/resources/golden/orc_create-28-cf8aa1014707dfe576820041e47436e2
deleted file mode 100644
index 0f890dfd226d6..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-28-cf8aa1014707dfe576820041e47436e2
+++ /dev/null
@@ -1,3 +0,0 @@
-{"a":"one","b":"two"}
-{"a":"three","b":"four"}
-{"a":"five","b":"six"}
diff --git a/sql/hive/src/test/resources/golden/orc_create-29-cee6b57822d79ce80e14e58663bf7b86 b/sql/hive/src/test/resources/golden/orc_create-29-cee6b57822d79ce80e14e58663bf7b86
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-3-8480c37b4f3a6768f459361cf8470dae b/sql/hive/src/test/resources/golden/orc_create-3-8480c37b4f3a6768f459361cf8470dae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-30-b5e831bf4eb811ac4f06e9ac11a5283d b/sql/hive/src/test/resources/golden/orc_create-30-b5e831bf4eb811ac4f06e9ac11a5283d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-31-c934a1ca351b76c5c4ef87cb5655f1d3 b/sql/hive/src/test/resources/golden/orc_create-31-c934a1ca351b76c5c4ef87cb5655f1d3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-33-2151d07a07ab6b28782b7e8e4f848e36 b/sql/hive/src/test/resources/golden/orc_create-33-2151d07a07ab6b28782b7e8e4f848e36
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-36-ca15a32658195ecaffe11d11f487fb0a b/sql/hive/src/test/resources/golden/orc_create-36-ca15a32658195ecaffe11d11f487fb0a
deleted file mode 100644
index 0c6f532488607..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-36-ca15a32658195ecaffe11d11f487fb0a
+++ /dev/null
@@ -1,10 +0,0 @@
-91	Genevieve	Wilkins	908 Turpis. Street
-93	Mariko	Cline	P.O. Box 329, 5375 Ac St.
-95	Winifred	Hopper	Ap #140-8982 Velit Avenue
-97	Dana	Carter	814-601 Purus. Av.
-99	Wynter	Vincent	626-8492 Mollis Avenue
-92	Thane	Oneil	6766 Lectus St.
-94	Lael	Mclean	500-7010 Sit St.
-96	Rafael	England	P.O. Box 405, 7857 Eget Av.
-98	Juliet	Battle	Ap #535-1965 Cursus St.
-100	Wang	Mitchell	4023 Lacinia. Ave
diff --git a/sql/hive/src/test/resources/golden/orc_create-37-9e7ba3cead1b5040ee3c23e8fc235d25 b/sql/hive/src/test/resources/golden/orc_create-37-9e7ba3cead1b5040ee3c23e8fc235d25
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-38-b5209e6f850fc958dc9ebced21519684 b/sql/hive/src/test/resources/golden/orc_create-38-b5209e6f850fc958dc9ebced21519684
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-39-f375f322df98df65e6b1bd0d4ce8e208 b/sql/hive/src/test/resources/golden/orc_create-39-f375f322df98df65e6b1bd0d4ce8e208
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-4-7a04a1c1fe76e48f3dc8ee07146cd1e3 b/sql/hive/src/test/resources/golden/orc_create-4-7a04a1c1fe76e48f3dc8ee07146cd1e3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-40-8480c37b4f3a6768f459361cf8470dae b/sql/hive/src/test/resources/golden/orc_create-40-8480c37b4f3a6768f459361cf8470dae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-41-7a04a1c1fe76e48f3dc8ee07146cd1e3 b/sql/hive/src/test/resources/golden/orc_create-41-7a04a1c1fe76e48f3dc8ee07146cd1e3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-5-4f8e2d1f18399679a97c49631c4ef921 b/sql/hive/src/test/resources/golden/orc_create-5-4f8e2d1f18399679a97c49631c4ef921
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-6-2ae0fd655aa777b41015e4125f680324 b/sql/hive/src/test/resources/golden/orc_create-6-2ae0fd655aa777b41015e4125f680324
deleted file mode 100644
index ecd0704ce3386..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-6-2ae0fd655aa777b41015e4125f680324
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-str                 	string              	None                
-mp                  	map<string,string>  	None                
-lst                 	array<string>       	None                
-strct               	struct<A:string,B:string>	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 31 19:56:26 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7445586986532881162/orc_create_staging	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1391226986          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	colelction.delim    	,                   
-	field.delim         	|                   
-	mapkey.delim        	:                   
-	serialization.format	|                   
diff --git a/sql/hive/src/test/resources/golden/orc_create-7-27aa4a8093e80a7437266f349ea927c0 b/sql/hive/src/test/resources/golden/orc_create-7-27aa4a8093e80a7437266f349ea927c0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_create-8-14ea7dcc6898979aaa61650e3fc46187 b/sql/hive/src/test/resources/golden/orc_create-8-14ea7dcc6898979aaa61650e3fc46187
deleted file mode 100644
index 6881640dd3cd4..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_create-8-14ea7dcc6898979aaa61650e3fc46187
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 31 19:56:27 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse7445586986532881162/orc_create	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1391226987          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.ql.io.orc.OrcSerde	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.orc.OrcInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/orc_create-9-9e7ba3cead1b5040ee3c23e8fc235d25 b/sql/hive/src/test/resources/golden/orc_create-9-9e7ba3cead1b5040ee3c23e8fc235d25
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-0-a399c0ef0f1578f9d2456727008dee16 b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-0-a399c0ef0f1578f9d2456727008dee16
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-0-a399c0ef0f1578f9d2456727008dee16
+++ b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-0-a399c0ef0f1578f9d2456727008dee16
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-3-1a7f96f49e2992b93d97edcbdb0075d b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-3-1a7f96f49e2992b93d97edcbdb0075d
index f799fca27c353..5bba4beaf3355 100644
--- a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-3-1a7f96f49e2992b93d97edcbdb0075d
+++ b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-3-1a7f96f49e2992b93d97edcbdb0075d
@@ -7,4 +7,4 @@
 255
 278
 98
-484
\ No newline at end of file
+484
diff --git a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-6-272f5d299289829dc22cc31f70115dd9 b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-6-272f5d299289829dc22cc31f70115dd9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_complex_types-3-85850072f0f9604d4e55a08fb9c45ba6 b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-6-6ced6a6f5189c7a315d92ebf3dcc68d3
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_complex_types-3-85850072f0f9604d4e55a08fb9c45ba6
rename to sql/hive/src/test/resources/golden/orc_dictionary_threshold-6-6ced6a6f5189c7a315d92ebf3dcc68d3
diff --git a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-7-acf39b28d4b76382acf5c56c21932ff9 b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-7-acf39b28d4b76382acf5c56c21932ff9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-7-acf39b28d4b76382acf5c56c21932ff9
+++ b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-7-acf39b28d4b76382acf5c56c21932ff9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-9-f7b722063a6948d22aaaab0707cddde1 b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-9-f7b722063a6948d22aaaab0707cddde1
index f953881c894c1..0b6d77d2e825b 100644
--- a/sql/hive/src/test/resources/golden/orc_dictionary_threshold-9-f7b722063a6948d22aaaab0707cddde1
+++ b/sql/hive/src/test/resources/golden/orc_dictionary_threshold-9-f7b722063a6948d22aaaab0707cddde1
@@ -1 +1 @@
-1082202951192
\ No newline at end of file
+1082202951192
diff --git a/sql/hive/src/test/resources/golden/orc_diff_part_cols-0-bfdd54175515a0557f8bd427ec23c453 b/sql/hive/src/test/resources/golden/orc_diff_part_cols-0-bfdd54175515a0557f8bd427ec23c453
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_diff_part_cols-1-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/orc_diff_part_cols-1-86a409d8b868dc5f1a3bd1e04c2bc28c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_diff_part_cols-1-86a409d8b868dc5f1a3bd1e04c2bc28c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/orc_diff_part_cols-2-ab12575b9b663420d9b3702f5a32a086 b/sql/hive/src/test/resources/golden/orc_diff_part_cols-2-ab12575b9b663420d9b3702f5a32a086
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_diff_part_cols-3-50ef26d05e69c02cd6fc2423fde00cd9 b/sql/hive/src/test/resources/golden/orc_diff_part_cols-3-50ef26d05e69c02cd6fc2423fde00cd9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_diff_part_cols-4-1c1eab8fc34159875afe38eb2413434e b/sql/hive/src/test/resources/golden/orc_diff_part_cols-4-1c1eab8fc34159875afe38eb2413434e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_diff_part_cols-5-c0dce07a38f3c540c2b17db0a495a657 b/sql/hive/src/test/resources/golden/orc_diff_part_cols-5-c0dce07a38f3c540c2b17db0a495a657
deleted file mode 100644
index f172be3e72712..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_diff_part_cols-5-c0dce07a38f3c540c2b17db0a495a657
+++ /dev/null
@@ -1,10 +0,0 @@
-0	3	2
-165	NULL	1
-2	1	2
-238	NULL	1
-27	NULL	1
-311	NULL	1
-4	1	2
-5	3	2
-8	1	2
-86	NULL	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/orc_empty_strings-0-a338239d2fc4e22e23ab82fa5f62139 b/sql/hive/src/test/resources/golden/orc_empty_strings-0-a338239d2fc4e22e23ab82fa5f62139
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_empty_strings-1-a67f0c90e3f99257a7364cc96e5405cf b/sql/hive/src/test/resources/golden/orc_empty_strings-1-a67f0c90e3f99257a7364cc96e5405cf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_empty_strings-2-a34bd419a63852c1c75f195a495ff333 b/sql/hive/src/test/resources/golden/orc_empty_strings-2-a34bd419a63852c1c75f195a495ff333
deleted file mode 100644
index e3c0074c9d4f6..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_empty_strings-2-a34bd419a63852c1c75f195a495ff333
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
-
-
-
-
-
-
-
diff --git a/sql/hive/src/test/resources/golden/orc_empty_strings-3-3339ace17de3201296847caf29c42e99 b/sql/hive/src/test/resources/golden/orc_empty_strings-3-3339ace17de3201296847caf29c42e99
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_empty_strings-4-eee6b7f3a881c8f56510bbd53aeebe1e b/sql/hive/src/test/resources/golden/orc_empty_strings-4-eee6b7f3a881c8f56510bbd53aeebe1e
deleted file mode 100644
index 4cd1242d9fe61..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_empty_strings-4-eee6b7f3a881c8f56510bbd53aeebe1e
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-
-
-
-
-165
-255
-27
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/orc_ends_with_nulls-0-a338239d2fc4e22e23ab82fa5f62139 b/sql/hive/src/test/resources/golden/orc_ends_with_nulls-0-a338239d2fc4e22e23ab82fa5f62139
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_ends_with_nulls-1-2c16215823a5e6904059a48a3077da4e b/sql/hive/src/test/resources/golden/orc_ends_with_nulls-1-2c16215823a5e6904059a48a3077da4e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_ends_with_nulls-2-2ffa91c54b8de552e8010bf00ecd2d43 b/sql/hive/src/test/resources/golden/orc_ends_with_nulls-2-2ffa91c54b8de552e8010bf00ecd2d43
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_ends_with_nulls-3-bbe8d353c397b795e2732bd59648d291 b/sql/hive/src/test/resources/golden/orc_ends_with_nulls-3-bbe8d353c397b795e2732bd59648d291
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_ends_with_nulls-4-a0231a52d63534679bfab3f6a0b8effc b/sql/hive/src/test/resources/golden/orc_ends_with_nulls-4-a0231a52d63534679bfab3f6a0b8effc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/orc_ends_with_nulls-5-4d467d3e186c59dee2c93c940a7b0e7f b/sql/hive/src/test/resources/golden/orc_ends_with_nulls-5-4d467d3e186c59dee2c93c940a7b0e7f
deleted file mode 100644
index e7700b8aee5c6..0000000000000
--- a/sql/hive/src/test/resources/golden/orc_ends_with_nulls-5-4d467d3e186c59dee2c93c940a7b0e7f
+++ /dev/null
@@ -1,5 +0,0 @@
-1
-NULL
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/order-1-57d93bd7619dfc460dfa763c12142bb9 b/sql/hive/src/test/resources/golden/order-1-57d93bd7619dfc460dfa763c12142bb9
index 217a1915f8826..4dba25230ed1b 100644
--- a/sql/hive/src/test/resources/golden/order-1-57d93bd7619dfc460dfa763c12142bb9
+++ b/sql/hive/src/test/resources/golden/order-1-57d93bd7619dfc460dfa763c12142bb9
@@ -7,4 +7,4 @@
 5	val_5
 5	val_5
 8	val_8
-9	val_9
\ No newline at end of file
+9	val_9
diff --git a/sql/hive/src/test/resources/golden/order-3-8f2ca0c3a07d78ebfff23a7bc77e85e5 b/sql/hive/src/test/resources/golden/order-3-8f2ca0c3a07d78ebfff23a7bc77e85e5
index ab34c87e42364..fde2ef3e83ce5 100644
--- a/sql/hive/src/test/resources/golden/order-3-8f2ca0c3a07d78ebfff23a7bc77e85e5
+++ b/sql/hive/src/test/resources/golden/order-3-8f2ca0c3a07d78ebfff23a7bc77e85e5
@@ -7,4 +7,4 @@
 494	val_494
 493	val_493
 492	val_492
-492	val_492
\ No newline at end of file
+492	val_492
diff --git a/sql/hive/src/test/resources/golden/order2-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/order2-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/order2-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/order2-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/order2-2-4162aa366dc0836eed15cc819226907f b/sql/hive/src/test/resources/golden/order2-2-4162aa366dc0836eed15cc819226907f
index 217a1915f8826..4dba25230ed1b 100644
--- a/sql/hive/src/test/resources/golden/order2-2-4162aa366dc0836eed15cc819226907f
+++ b/sql/hive/src/test/resources/golden/order2-2-4162aa366dc0836eed15cc819226907f
@@ -7,4 +7,4 @@
 5	val_5
 5	val_5
 8	val_8
-9	val_9
\ No newline at end of file
+9	val_9
diff --git a/sql/hive/src/test/resources/golden/outer_join_ppr-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/outer_join_ppr-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/outer_join_ppr-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/outer_join_ppr-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/outer_join_ppr-2-b60ab17f7372863504804717c4276595 b/sql/hive/src/test/resources/golden/outer_join_ppr-2-b60ab17f7372863504804717c4276595
index 549da558b18da..83a26e92a694b 100644
--- a/sql/hive/src/test/resources/golden/outer_join_ppr-2-b60ab17f7372863504804717c4276595
+++ b/sql/hive/src/test/resources/golden/outer_join_ppr-2-b60ab17f7372863504804717c4276595
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 19	val_19	19	val_19
-19	val_19	19	val_19
\ No newline at end of file
+19	val_19	19	val_19
diff --git a/sql/hive/src/test/resources/golden/outer_join_ppr-4-be64a45a645ea5363109323a378d335 b/sql/hive/src/test/resources/golden/outer_join_ppr-4-be64a45a645ea5363109323a378d335
index 549da558b18da..83a26e92a694b 100644
--- a/sql/hive/src/test/resources/golden/outer_join_ppr-4-be64a45a645ea5363109323a378d335
+++ b/sql/hive/src/test/resources/golden/outer_join_ppr-4-be64a45a645ea5363109323a378d335
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 19	val_19	19	val_19
-19	val_19	19	val_19
\ No newline at end of file
+19	val_19	19	val_19
diff --git a/sql/hive/src/test/resources/golden/parallel-0-23a4feaede17467a8cc26e4d86ec30f9 b/sql/hive/src/test/resources/golden/parallel-0-23a4feaede17467a8cc26e4d86ec30f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/parallel-0-23a4feaede17467a8cc26e4d86ec30f9
+++ b/sql/hive/src/test/resources/golden/parallel-0-23a4feaede17467a8cc26e4d86ec30f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/parallel-1-851e262128626126ae1ad87869db7c54 b/sql/hive/src/test/resources/golden/parallel-1-851e262128626126ae1ad87869db7c54
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/parallel-1-851e262128626126ae1ad87869db7c54
+++ b/sql/hive/src/test/resources/golden/parallel-1-851e262128626126ae1ad87869db7c54
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/parallel-11-6230286bc168af7b010968b543690a2a b/sql/hive/src/test/resources/golden/parallel-11-6230286bc168af7b010968b543690a2a
index c912af4a5c676..5710fb29e7e02 100644
--- a/sql/hive/src/test/resources/golden/parallel-11-6230286bc168af7b010968b543690a2a
+++ b/sql/hive/src/test/resources/golden/parallel-11-6230286bc168af7b010968b543690a2a
@@ -306,4 +306,4 @@
 495	val_495
 496	val_496
 497	val_497
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parallel-12-73a915d42e62c0e895a82602a502ee43 b/sql/hive/src/test/resources/golden/parallel-12-73a915d42e62c0e895a82602a502ee43
index c912af4a5c676..5710fb29e7e02 100644
--- a/sql/hive/src/test/resources/golden/parallel-12-73a915d42e62c0e895a82602a502ee43
+++ b/sql/hive/src/test/resources/golden/parallel-12-73a915d42e62c0e895a82602a502ee43
@@ -306,4 +306,4 @@
 495	val_495
 496	val_496
 497	val_497
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parallel-2-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/parallel-2-43d53504df013e6b35f81811138a167a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/parallel-2-43d53504df013e6b35f81811138a167a
+++ b/sql/hive/src/test/resources/golden/parallel-2-43d53504df013e6b35f81811138a167a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/parallel-7-6230286bc168af7b010968b543690a2a b/sql/hive/src/test/resources/golden/parallel-7-6230286bc168af7b010968b543690a2a
index c912af4a5c676..5710fb29e7e02 100644
--- a/sql/hive/src/test/resources/golden/parallel-7-6230286bc168af7b010968b543690a2a
+++ b/sql/hive/src/test/resources/golden/parallel-7-6230286bc168af7b010968b543690a2a
@@ -306,4 +306,4 @@
 495	val_495
 496	val_496
 497	val_497
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parallel-8-73a915d42e62c0e895a82602a502ee43 b/sql/hive/src/test/resources/golden/parallel-8-73a915d42e62c0e895a82602a502ee43
index c912af4a5c676..5710fb29e7e02 100644
--- a/sql/hive/src/test/resources/golden/parallel-8-73a915d42e62c0e895a82602a502ee43
+++ b/sql/hive/src/test/resources/golden/parallel-8-73a915d42e62c0e895a82602a502ee43
@@ -306,4 +306,4 @@
 495	val_495
 496	val_496
 497	val_497
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parallel-9-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/parallel-9-86a409d8b868dc5f1a3bd1e04c2bc28c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/parallel-9-86a409d8b868dc5f1a3bd1e04c2bc28c
+++ b/sql/hive/src/test/resources/golden/parallel-9-86a409d8b868dc5f1a3bd1e04c2bc28c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-0-3ca1c197c5209d2fc9423fe84ad66e7d b/sql/hive/src/test/resources/golden/parallel_orderby-0-3ca1c197c5209d2fc9423fe84ad66e7d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-1-be7423a5e77b7289153f74bc3fd97f44 b/sql/hive/src/test/resources/golden/parallel_orderby-1-be7423a5e77b7289153f74bc3fd97f44
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-11-39767502cbda75590c0c4b8fd089b793 b/sql/hive/src/test/resources/golden/parallel_orderby-11-39767502cbda75590c0c4b8fd089b793
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-12-7943485bcc72b2040c45c62e45ac8853 b/sql/hive/src/test/resources/golden/parallel_orderby-12-7943485bcc72b2040c45c62e45ac8853
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-13-988591bf9ab008fdc4d71560aa57a736 b/sql/hive/src/test/resources/golden/parallel_orderby-13-988591bf9ab008fdc4d71560aa57a736
deleted file mode 100644
index b431d3fc6dcf6..0000000000000
--- a/sql/hive/src/test/resources/golden/parallel_orderby-13-988591bf9ab008fdc4d71560aa57a736
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-value               	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Apr 29 20:55:07 PDT 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse6323689881248298063/total_ordered	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	1                   
-	numPartitions       	0                   
-	numRows             	48                  
-	rawDataSize         	512                 
-	totalSize           	560                 
-	transient_lastDdlTime	1398830107          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-14-d93951df2ffc18dc09ab0ba2d46f1823 b/sql/hive/src/test/resources/golden/parallel_orderby-14-d93951df2ffc18dc09ab0ba2d46f1823
deleted file mode 100644
index 3b733e2d6c451..0000000000000
--- a/sql/hive/src/test/resources/golden/parallel_orderby-14-d93951df2ffc18dc09ab0ba2d46f1823
+++ /dev/null
@@ -1,48 +0,0 @@
-128	val_128
-128	val_128
-150	val_150
-150	val_150
-165	val_165
-165	val_165
-193	val_193
-193	val_193
-213	val_213
-213	val_213
-213	val_213
-213	val_213
-213	val_214
-213	val_214
-224	val_224
-224	val_224
-238	val_238
-238	val_238
-238	val_239
-238	val_239
-238	val_240
-238	val_240
-255	val_255
-255	val_255
-265	val_265
-265	val_265
-27	val_27
-27	val_27
-273	val_273
-273	val_273
-278	val_278
-278	val_278
-311	val_311
-311	val_311
-369	val_369
-369	val_369
-401	val_401
-401	val_401
-409	val_409
-409	val_409
-484	val_484
-484	val_484
-66	val_66
-66	val_66
-86	val_86
-86	val_86
-98	val_98
-98	val_98
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-2-be7423a5e77b7289153f74bc3fd97f44 b/sql/hive/src/test/resources/golden/parallel_orderby-2-be7423a5e77b7289153f74bc3fd97f44
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-6-670ec1b1f28d92d72a924c29d622aa8f b/sql/hive/src/test/resources/golden/parallel_orderby-6-670ec1b1f28d92d72a924c29d622aa8f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-7-7943485bcc72b2040c45c62e45ac8853 b/sql/hive/src/test/resources/golden/parallel_orderby-7-7943485bcc72b2040c45c62e45ac8853
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-8-988591bf9ab008fdc4d71560aa57a736 b/sql/hive/src/test/resources/golden/parallel_orderby-8-988591bf9ab008fdc4d71560aa57a736
deleted file mode 100644
index 7f67251e61787..0000000000000
--- a/sql/hive/src/test/resources/golden/parallel_orderby-8-988591bf9ab008fdc4d71560aa57a736
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-value               	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Apr 29 20:54:55 PDT 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse6323689881248298063/total_ordered	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	1                   
-	numPartitions       	0                   
-	numRows             	48                  
-	rawDataSize         	512                 
-	totalSize           	560                 
-	transient_lastDdlTime	1398830095          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-9-d93951df2ffc18dc09ab0ba2d46f1823 b/sql/hive/src/test/resources/golden/parallel_orderby-9-d93951df2ffc18dc09ab0ba2d46f1823
deleted file mode 100644
index 3b733e2d6c451..0000000000000
--- a/sql/hive/src/test/resources/golden/parallel_orderby-9-d93951df2ffc18dc09ab0ba2d46f1823
+++ /dev/null
@@ -1,48 +0,0 @@
-128	val_128
-128	val_128
-150	val_150
-150	val_150
-165	val_165
-165	val_165
-193	val_193
-193	val_193
-213	val_213
-213	val_213
-213	val_213
-213	val_213
-213	val_214
-213	val_214
-224	val_224
-224	val_224
-238	val_238
-238	val_238
-238	val_239
-238	val_239
-238	val_240
-238	val_240
-255	val_255
-255	val_255
-265	val_265
-265	val_265
-27	val_27
-27	val_27
-273	val_273
-273	val_273
-278	val_278
-278	val_278
-311	val_311
-311	val_311
-369	val_369
-369	val_369
-401	val_401
-401	val_401
-409	val_409
-409	val_409
-484	val_484
-484	val_484
-66	val_66
-66	val_66
-86	val_86
-86	val_86
-98	val_98
-98	val_98
diff --git a/sql/hive/src/test/resources/golden/parenthesis_star_by-0-57a4ea931689f9475b687292f34abfa4 b/sql/hive/src/test/resources/golden/parenthesis_star_by-0-57a4ea931689f9475b687292f34abfa4
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/parenthesis_star_by-0-57a4ea931689f9475b687292f34abfa4
+++ b/sql/hive/src/test/resources/golden/parenthesis_star_by-0-57a4ea931689f9475b687292f34abfa4
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parenthesis_star_by-1-41d474f5e6d7c61c36f74b4bec4e9e44 b/sql/hive/src/test/resources/golden/parenthesis_star_by-1-41d474f5e6d7c61c36f74b4bec4e9e44
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/parenthesis_star_by-1-41d474f5e6d7c61c36f74b4bec4e9e44
+++ b/sql/hive/src/test/resources/golden/parenthesis_star_by-1-41d474f5e6d7c61c36f74b4bec4e9e44
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parenthesis_star_by-2-7a45831bf96814d9a7fc3d78fb7bd8dc b/sql/hive/src/test/resources/golden/parenthesis_star_by-2-7a45831bf96814d9a7fc3d78fb7bd8dc
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/parenthesis_star_by-2-7a45831bf96814d9a7fc3d78fb7bd8dc
+++ b/sql/hive/src/test/resources/golden/parenthesis_star_by-2-7a45831bf96814d9a7fc3d78fb7bd8dc
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parenthesis_star_by-3-498e2973594ccf45448ba19552bfb1cd b/sql/hive/src/test/resources/golden/parenthesis_star_by-3-498e2973594ccf45448ba19552bfb1cd
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/parenthesis_star_by-3-498e2973594ccf45448ba19552bfb1cd
+++ b/sql/hive/src/test/resources/golden/parenthesis_star_by-3-498e2973594ccf45448ba19552bfb1cd
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parenthesis_star_by-4-dc9d9d8bbb6259c89a97065902a345ec b/sql/hive/src/test/resources/golden/parenthesis_star_by-4-dc9d9d8bbb6259c89a97065902a345ec
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/parenthesis_star_by-4-dc9d9d8bbb6259c89a97065902a345ec
+++ b/sql/hive/src/test/resources/golden/parenthesis_star_by-4-dc9d9d8bbb6259c89a97065902a345ec
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parenthesis_star_by-5-6888c7f7894910538d82eefa23443189 b/sql/hive/src/test/resources/golden/parenthesis_star_by-5-6888c7f7894910538d82eefa23443189
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/parenthesis_star_by-5-6888c7f7894910538d82eefa23443189
+++ b/sql/hive/src/test/resources/golden/parenthesis_star_by-5-6888c7f7894910538d82eefa23443189
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parenthesis_star_by-6-6b7a4fa7be24cf775fae1b8c540c3b02 b/sql/hive/src/test/resources/golden/parenthesis_star_by-6-6b7a4fa7be24cf775fae1b8c540c3b02
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/parenthesis_star_by-6-6b7a4fa7be24cf775fae1b8c540c3b02
+++ b/sql/hive/src/test/resources/golden/parenthesis_star_by-6-6b7a4fa7be24cf775fae1b8c540c3b02
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/parenthesis_star_by-7-f36c71c612ab8da4f4191980c1b61fea b/sql/hive/src/test/resources/golden/parenthesis_star_by-7-f36c71c612ab8da4f4191980c1b61fea
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/parenthesis_star_by-7-f36c71c612ab8da4f4191980c1b61fea
+++ b/sql/hive/src/test/resources/golden/parenthesis_star_by-7-f36c71c612ab8da4f4191980c1b61fea
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/partInit-0-aaa3124841a8140a41d3556a4ccaa6a5 b/sql/hive/src/test/resources/golden/partInit-0-aaa3124841a8140a41d3556a4ccaa6a5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partInit-1-c0ec92801bec7ece0a156d407b601f7b b/sql/hive/src/test/resources/golden/partInit-1-c0ec92801bec7ece0a156d407b601f7b
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/partInit-1-c0ec92801bec7ece0a156d407b601f7b
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partInit-2-5db6fb2267a648ac57af8f56f91cf9a2 b/sql/hive/src/test/resources/golden/partInit-2-5db6fb2267a648ac57af8f56f91cf9a2
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/partInit-2-5db6fb2267a648ac57af8f56f91cf9a2
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partInit-3-878a82ca7519e3eafd3f2661b2ac1b88 b/sql/hive/src/test/resources/golden/partInit-3-878a82ca7519e3eafd3f2661b2ac1b88
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partInit-4-5db6fb2267a648ac57af8f56f91cf9a2 b/sql/hive/src/test/resources/golden/partInit-4-5db6fb2267a648ac57af8f56f91cf9a2
deleted file mode 100644
index 56a6051ca2b02..0000000000000
--- a/sql/hive/src/test/resources/golden/partInit-4-5db6fb2267a648ac57af8f56f91cf9a2
+++ /dev/null
@@ -1 +0,0 @@
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/part_inherit_tbl_props-0-8ea0fbf5d1f4c19b56bda7f05764a0c0 b/sql/hive/src/test/resources/golden/part_inherit_tbl_props-0-8ea0fbf5d1f4c19b56bda7f05764a0c0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/part_inherit_tbl_props-0-8ea0fbf5d1f4c19b56bda7f05764a0c0
+++ b/sql/hive/src/test/resources/golden/part_inherit_tbl_props-0-8ea0fbf5d1f4c19b56bda7f05764a0c0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/part_inherit_tbl_props-3-7e6487ca8473fa3264fdd9fa3e4a2db2 b/sql/hive/src/test/resources/golden/part_inherit_tbl_props-3-7e6487ca8473fa3264fdd9fa3e4a2db2
index ffa0de8a0c322..a98dc8e8e1b86 100644
--- a/sql/hive/src/test/resources/golden/part_inherit_tbl_props-3-7e6487ca8473fa3264fdd9fa3e4a2db2
+++ b/sql/hive/src/test/resources/golden/part_inherit_tbl_props-3-7e6487ca8473fa3264fdd9fa3e4a2db2
@@ -1,24 +1,24 @@
 # col_name            	data_type           	comment             
 	 	 
-c1                  	tinyint             	None                
+c1                  	tinyint             	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-c2                  	string              	None                
+c2                  	string              	                    
 	 	 
 # Detailed Partition Information	 	 
 Partition Value:    	[v1]                	 
 Database:           	default             	 
 Table:              	mytbl               	 
-CreateTime:         	Fri Jan 03 17:18:52 PST 2014	 
+CreateTime:         	Tue Oct 21 04:00:26 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/mytbl/c2=v1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/mytbl/c2=v1	 
 Partition Parameters:	 	 
 	a                   	myval               
 	b                   	yourval             
-	transient_lastDdlTime	1388798332          
+	transient_lastDdlTime	1413889226          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -29,4 +29,4 @@ Num Buckets:        	-1
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/part_inherit_tbl_props-4-c04c695a6ebed215889ce75edcb33eb4 b/sql/hive/src/test/resources/golden/part_inherit_tbl_props-4-c04c695a6ebed215889ce75edcb33eb4
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/part_inherit_tbl_props-4-c04c695a6ebed215889ce75edcb33eb4
+++ b/sql/hive/src/test/resources/golden/part_inherit_tbl_props-4-c04c695a6ebed215889ce75edcb33eb4
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_empty-0-c367ba7f534037ab96efc7f2273508c7 b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_empty-0-c367ba7f534037ab96efc7f2273508c7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_empty-0-c367ba7f534037ab96efc7f2273508c7
+++ b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_empty-0-c367ba7f534037ab96efc7f2273508c7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_empty-3-7e6487ca8473fa3264fdd9fa3e4a2db2 b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_empty-3-7e6487ca8473fa3264fdd9fa3e4a2db2
index 0909bab11a6ec..2bef99dafeb29 100644
--- a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_empty-3-7e6487ca8473fa3264fdd9fa3e4a2db2
+++ b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_empty-3-7e6487ca8473fa3264fdd9fa3e4a2db2
@@ -1,22 +1,22 @@
 # col_name            	data_type           	comment             
 	 	 
-c1                  	tinyint             	None                
+c1                  	tinyint             	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-c2                  	string              	None                
+c2                  	string              	                    
 	 	 
 # Detailed Partition Information	 	 
 Partition Value:    	[v1]                	 
 Database:           	default             	 
 Table:              	mytbl               	 
-CreateTime:         	Fri Jan 03 17:18:59 PST 2014	 
+CreateTime:         	Tue Oct 21 04:00:27 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/mytbl/c2=v1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/mytbl/c2=v1	 
 Partition Parameters:	 	 
-	transient_lastDdlTime	1388798339          
+	transient_lastDdlTime	1413889227          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -27,4 +27,4 @@ Num Buckets:        	-1
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-0-9131f0b22a7303a9f3bd9ec0d1c85b06 b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-0-9131f0b22a7303a9f3bd9ec0d1c85b06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-0-9131f0b22a7303a9f3bd9ec0d1c85b06
+++ b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-0-9131f0b22a7303a9f3bd9ec0d1c85b06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-3-7e6487ca8473fa3264fdd9fa3e4a2db2 b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-3-7e6487ca8473fa3264fdd9fa3e4a2db2
index 55fb6df62f0a7..7b6b90f7f4200 100644
--- a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-3-7e6487ca8473fa3264fdd9fa3e4a2db2
+++ b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-3-7e6487ca8473fa3264fdd9fa3e4a2db2
@@ -1,25 +1,25 @@
 # col_name            	data_type           	comment             
 	 	 
-c1                  	tinyint             	None                
+c1                  	tinyint             	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-c2                  	string              	None                
+c2                  	string              	                    
 	 	 
 # Detailed Partition Information	 	 
 Partition Value:    	[v1]                	 
 Database:           	default             	 
 Table:              	mytbl               	 
-CreateTime:         	Fri Jan 03 17:19:02 PST 2014	 
+CreateTime:         	Tue Oct 21 04:00:28 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/mytbl/c2=v1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/mytbl/c2=v1	 
 Partition Parameters:	 	 
 	a                   	myval               
 	b                   	yourval             
 	c                   	noval               
-	transient_lastDdlTime	1388798342          
+	transient_lastDdlTime	1413889228          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -30,4 +30,4 @@ Num Buckets:        	-1
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-4-c04c695a6ebed215889ce75edcb33eb4 b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-4-c04c695a6ebed215889ce75edcb33eb4
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-4-c04c695a6ebed215889ce75edcb33eb4
+++ b/sql/hive/src/test/resources/golden/part_inherit_tbl_props_with_star-4-c04c695a6ebed215889ce75edcb33eb4
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/partcols1-1-5ea0e79c5da4b87d90ab30a56f3598d4 b/sql/hive/src/test/resources/golden/partcols1-1-5ea0e79c5da4b87d90ab30a56f3598d4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_complex_types_multi_single_reducer-1-de39f8b5f4305136d945da94d5222283 b/sql/hive/src/test/resources/golden/partcols1-1-b562ff3e36de23a686d122967a1f91c8
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_complex_types_multi_single_reducer-1-de39f8b5f4305136d945da94d5222283
rename to sql/hive/src/test/resources/golden/partcols1-1-b562ff3e36de23a686d122967a1f91c8
diff --git a/sql/hive/src/test/resources/golden/groupby_complex_types_multi_single_reducer-2-10fe6134247226ab2b309bb62460d080 b/sql/hive/src/test/resources/golden/partition_date-1-6b0952309c3ebdd6dcb7066891d1bd74
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_complex_types_multi_single_reducer-2-10fe6134247226ab2b309bb62460d080
rename to sql/hive/src/test/resources/golden/partition_date-1-6b0952309c3ebdd6dcb7066891d1bd74
diff --git a/sql/hive/src/test/resources/golden/partition_date-10-aad6078a09b7bd8f5141437e86bb229f b/sql/hive/src/test/resources/golden/partition_date-10-aad6078a09b7bd8f5141437e86bb229f
new file mode 100644
index 0000000000000..60d3b2f4a4cd5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-10-aad6078a09b7bd8f5141437e86bb229f
@@ -0,0 +1 @@
+15
diff --git a/sql/hive/src/test/resources/golden/orc_create-35-a7bdd6d4d98ead017c08349ee53e9ac2 b/sql/hive/src/test/resources/golden/partition_date-11-a01c3791f59e819c750e213b6c65d084
similarity index 100%
rename from sql/hive/src/test/resources/golden/orc_create-35-a7bdd6d4d98ead017c08349ee53e9ac2
rename to sql/hive/src/test/resources/golden/partition_date-11-a01c3791f59e819c750e213b6c65d084
diff --git a/sql/hive/src/test/resources/golden/partition_date-12-2662a237c86cf9e4a150a4f1856b8239 b/sql/hive/src/test/resources/golden/partition_date-12-2662a237c86cf9e4a150a4f1856b8239
new file mode 100644
index 0000000000000..b4de394767536
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-12-2662a237c86cf9e4a150a4f1856b8239
@@ -0,0 +1 @@
+11
diff --git a/sql/hive/src/test/resources/golden/partition_date-13-aa513c8ee1cbfd1c94f5772c110d4dc9 b/sql/hive/src/test/resources/golden/partition_date-13-aa513c8ee1cbfd1c94f5772c110d4dc9
new file mode 100644
index 0000000000000..64bb6b746dcea
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-13-aa513c8ee1cbfd1c94f5772c110d4dc9
@@ -0,0 +1 @@
+30
diff --git a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/partition_date-14-3f187dad9a2fdfc6f7a3566309016f9c
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_00_part_external_location-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/partition_date-14-3f187dad9a2fdfc6f7a3566309016f9c
diff --git a/sql/hive/src/test/resources/golden/exim_19_00_part_external_location-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/partition_date-15-e4366325f3a0c4a8e92be59f4de73fce
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_00_part_external_location-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/partition_date-15-e4366325f3a0c4a8e92be59f4de73fce
diff --git a/sql/hive/src/test/resources/golden/partition_date-16-32cf81c1a36451eccb07b20dffd930ac b/sql/hive/src/test/resources/golden/partition_date-16-32cf81c1a36451eccb07b20dffd930ac
new file mode 100644
index 0000000000000..209e3ef4b6247
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-16-32cf81c1a36451eccb07b20dffd930ac
@@ -0,0 +1 @@
+20
diff --git a/sql/hive/src/test/resources/golden/partition_date-17-8654528691598a5eef8e3c1059d24117 b/sql/hive/src/test/resources/golden/partition_date-17-8654528691598a5eef8e3c1059d24117
new file mode 100644
index 0000000000000..f599e28b8ab0d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-17-8654528691598a5eef8e3c1059d24117
@@ -0,0 +1 @@
+10
diff --git a/sql/hive/src/test/resources/golden/partition_date-18-a1e769630ac3caed7325a3a256713b24 b/sql/hive/src/test/resources/golden/partition_date-18-a1e769630ac3caed7325a3a256713b24
new file mode 100644
index 0000000000000..209e3ef4b6247
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-18-a1e769630ac3caed7325a3a256713b24
@@ -0,0 +1 @@
+20
diff --git a/sql/hive/src/test/resources/golden/partition_date-19-95f1399e12124840caee7492c0f3036d b/sql/hive/src/test/resources/golden/partition_date-19-95f1399e12124840caee7492c0f3036d
new file mode 100644
index 0000000000000..f599e28b8ab0d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-19-95f1399e12124840caee7492c0f3036d
@@ -0,0 +1 @@
+10
diff --git a/sql/hive/src/test/resources/golden/groupby_complex_types_multi_single_reducer-3-28264b197e3997f7c5fe88c1c2f7d5c5 b/sql/hive/src/test/resources/golden/partition_date-2-220048240a7050a98411ddbc6b1f82cf
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_complex_types_multi_single_reducer-3-28264b197e3997f7c5fe88c1c2f7d5c5
rename to sql/hive/src/test/resources/golden/partition_date-2-220048240a7050a98411ddbc6b1f82cf
diff --git a/sql/hive/src/test/resources/golden/partition_date-20-e734eb4fc8894c9a2b2b9cdac4270fba b/sql/hive/src/test/resources/golden/partition_date-20-e734eb4fc8894c9a2b2b9cdac4270fba
new file mode 100644
index 0000000000000..209e3ef4b6247
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-20-e734eb4fc8894c9a2b2b9cdac4270fba
@@ -0,0 +1 @@
+20
diff --git a/sql/hive/src/test/resources/golden/partition_date-21-b60c0a3677591991152a0aa500bdce68 b/sql/hive/src/test/resources/golden/partition_date-21-b60c0a3677591991152a0aa500bdce68
new file mode 100644
index 0000000000000..f599e28b8ab0d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-21-b60c0a3677591991152a0aa500bdce68
@@ -0,0 +1 @@
+10
diff --git a/sql/hive/src/test/resources/golden/type_cast_1-1-53a667981ad567b2ab977f67d65c5825 b/sql/hive/src/test/resources/golden/partition_date-22-fe277bd0a30e016826d242fd5bd61714
similarity index 100%
rename from sql/hive/src/test/resources/golden/type_cast_1-1-53a667981ad567b2ab977f67d65c5825
rename to sql/hive/src/test/resources/golden/partition_date-22-fe277bd0a30e016826d242fd5bd61714
diff --git a/sql/hive/src/test/resources/golden/partition_date-23-7f014b494185e4c2a6048efb89131344 b/sql/hive/src/test/resources/golden/partition_date-23-7f014b494185e4c2a6048efb89131344
new file mode 100644
index 0000000000000..7ed6ff82de6bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-23-7f014b494185e4c2a6048efb89131344
@@ -0,0 +1 @@
+5
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-3-a04c523002e79c588e350486c815e785 b/sql/hive/src/test/resources/golden/partition_date-24-7ec1f3a845e2c49191460e15af30aa30
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-3-a04c523002e79c588e350486c815e785
rename to sql/hive/src/test/resources/golden/partition_date-24-7ec1f3a845e2c49191460e15af30aa30
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-4-d6c134387c0c9343ec6ea88b5086dbe2 b/sql/hive/src/test/resources/golden/partition_date-3-f8887dd18c21bf0306e293d463b3e1d7
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-4-d6c134387c0c9343ec6ea88b5086dbe2
rename to sql/hive/src/test/resources/golden/partition_date-3-f8887dd18c21bf0306e293d463b3e1d7
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-5-2efbb90d3df011282b70623e0905c390 b/sql/hive/src/test/resources/golden/partition_date-4-f7c20c208e052305c1710a5e1e80c5c8
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-5-2efbb90d3df011282b70623e0905c390
rename to sql/hive/src/test/resources/golden/partition_date-4-f7c20c208e052305c1710a5e1e80c5c8
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-1-f64b982c4bf34931f03447e635ae33d2 b/sql/hive/src/test/resources/golden/partition_date-5-59355f4e222dcd4f77f51d15bd896f11
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-1-f64b982c4bf34931f03447e635ae33d2
rename to sql/hive/src/test/resources/golden/partition_date-5-59355f4e222dcd4f77f51d15bd896f11
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-11-4e95946ec07f04479da42ba5cbfa531b b/sql/hive/src/test/resources/golden/partition_date-6-416ab10ac818d432cd3a81870d6e5164
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-11-4e95946ec07f04479da42ba5cbfa531b
rename to sql/hive/src/test/resources/golden/partition_date-6-416ab10ac818d432cd3a81870d6e5164
diff --git a/sql/hive/src/test/resources/golden/partition_date-7-1405c311915f27b0cc616c83d39eaacc b/sql/hive/src/test/resources/golden/partition_date-7-1405c311915f27b0cc616c83d39eaacc
new file mode 100644
index 0000000000000..758a01e987473
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-7-1405c311915f27b0cc616c83d39eaacc
@@ -0,0 +1,3 @@
+2000-01-01
+2013-08-08
+2013-12-10
diff --git a/sql/hive/src/test/resources/golden/partition_date-8-7703adfcfdd44c9250f9eba478004714 b/sql/hive/src/test/resources/golden/partition_date-8-7703adfcfdd44c9250f9eba478004714
new file mode 100644
index 0000000000000..24192eefd2caf
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-8-7703adfcfdd44c9250f9eba478004714
@@ -0,0 +1,5 @@
+165	val_165	2000-01-01	2
+238	val_238	2000-01-01	2
+27	val_27	2000-01-01	2
+311	val_311	2000-01-01	2
+86	val_86	2000-01-01	2
diff --git a/sql/hive/src/test/resources/golden/partition_date-9-a425c11c12c9ce4c9c43d4fbccee5347 b/sql/hive/src/test/resources/golden/partition_date-9-a425c11c12c9ce4c9c43d4fbccee5347
new file mode 100644
index 0000000000000..60d3b2f4a4cd5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/partition_date-9-a425c11c12c9ce4c9c43d4fbccee5347
@@ -0,0 +1 @@
+15
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-0-6ae3e0805cbfa967f50ad222bc33b772 b/sql/hive/src/test/resources/golden/partition_decode_name-0-6ae3e0805cbfa967f50ad222bc33b772
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-1-4de8e998198c8df484e9102f60ba05c1 b/sql/hive/src/test/resources/golden/partition_decode_name-1-4de8e998198c8df484e9102f60ba05c1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-10-f1c90c8f4afd0c2ee66019e017997eb5 b/sql/hive/src/test/resources/golden/partition_decode_name-10-f1c90c8f4afd0c2ee66019e017997eb5
deleted file mode 100644
index 81df179618406..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_decode_name-10-f1c90c8f4afd0c2ee66019e017997eb5
+++ /dev/null
@@ -1,3 +0,0 @@
-ts=2011-01-11+14:18:26
-ts=2011-01-11+15:18:26
-ts=2011-01-11+16:18:26
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-11-94d637f6e5cee2771b9844438008a618 b/sql/hive/src/test/resources/golden/partition_decode_name-11-94d637f6e5cee2771b9844438008a618
deleted file mode 100644
index 00750edc07d64..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_decode_name-11-94d637f6e5cee2771b9844438008a618
+++ /dev/null
@@ -1 +0,0 @@
-3
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-5-a3e8e57df86f00101d2f4dd0ce8c2bd7 b/sql/hive/src/test/resources/golden/partition_decode_name-5-a3e8e57df86f00101d2f4dd0ce8c2bd7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-6-f1c90c8f4afd0c2ee66019e017997eb5 b/sql/hive/src/test/resources/golden/partition_decode_name-6-f1c90c8f4afd0c2ee66019e017997eb5
deleted file mode 100644
index e16053e3110ae..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_decode_name-6-f1c90c8f4afd0c2ee66019e017997eb5
+++ /dev/null
@@ -1,3 +0,0 @@
-ts=2011-01-11+14%3A18%3A26
-ts=2011-01-11+15%3A18%3A26
-ts=2011-01-11+16%3A18%3A26
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-7-94d637f6e5cee2771b9844438008a618 b/sql/hive/src/test/resources/golden/partition_decode_name-7-94d637f6e5cee2771b9844438008a618
deleted file mode 100644
index 00750edc07d64..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_decode_name-7-94d637f6e5cee2771b9844438008a618
+++ /dev/null
@@ -1 +0,0 @@
-3
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-9-a3e8e57df86f00101d2f4dd0ce8c2bd7 b/sql/hive/src/test/resources/golden/partition_decode_name-9-a3e8e57df86f00101d2f4dd0ce8c2bd7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_schema1-2-4fcfc1d26e1de1ce3071f1f93c012988 b/sql/hive/src/test/resources/golden/partition_schema1-2-4fcfc1d26e1de1ce3071f1f93c012988
index 8af58e2b29f4a..c97e50a8a58cd 100644
--- a/sql/hive/src/test/resources/golden/partition_schema1-2-4fcfc1d26e1de1ce3071f1f93c012988
+++ b/sql/hive/src/test/resources/golden/partition_schema1-2-4fcfc1d26e1de1ce3071f1f93c012988
@@ -1,8 +1,8 @@
-key                 	string              	None                
-value               	string              	None                
-dt                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
\ No newline at end of file
+dt                  	string              	                    
diff --git a/sql/hive/src/test/resources/golden/partition_schema1-4-9b756f83973c37236e72f6927b1c02d7 b/sql/hive/src/test/resources/golden/partition_schema1-4-9b756f83973c37236e72f6927b1c02d7
index 82a07522b0af0..39db984c884ad 100644
--- a/sql/hive/src/test/resources/golden/partition_schema1-4-9b756f83973c37236e72f6927b1c02d7
+++ b/sql/hive/src/test/resources/golden/partition_schema1-4-9b756f83973c37236e72f6927b1c02d7
@@ -1,9 +1,9 @@
-key                 	string              	None                
-value               	string              	None                
-x                   	string              	None                
-dt                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+x                   	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
\ No newline at end of file
+dt                  	string              	                    
diff --git a/sql/hive/src/test/resources/golden/partition_schema1-5-52a518a4f7132598998c4f6781fd7634 b/sql/hive/src/test/resources/golden/partition_schema1-5-52a518a4f7132598998c4f6781fd7634
index 8af58e2b29f4a..c97e50a8a58cd 100644
--- a/sql/hive/src/test/resources/golden/partition_schema1-5-52a518a4f7132598998c4f6781fd7634
+++ b/sql/hive/src/test/resources/golden/partition_schema1-5-52a518a4f7132598998c4f6781fd7634
@@ -1,8 +1,8 @@
-key                 	string              	None                
-value               	string              	None                
-dt                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+dt                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-dt                  	string              	None                
\ No newline at end of file
+dt                  	string              	                    
diff --git a/sql/hive/src/test/resources/golden/partition_serde_format-3-54d18742b4eab85edd1946ef139771b4 b/sql/hive/src/test/resources/golden/partition_serde_format-3-54d18742b4eab85edd1946ef139771b4
index 95f5492558a9b..7e53c64af58aa 100644
--- a/sql/hive/src/test/resources/golden/partition_serde_format-3-54d18742b4eab85edd1946ef139771b4
+++ b/sql/hive/src/test/resources/golden/partition_serde_format-3-54d18742b4eab85edd1946ef139771b4
@@ -17,4 +17,4 @@
 17	val_17
 18	val_18
 18	val_18
-19	val_19
\ No newline at end of file
+19	val_19
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-0-6ae3e0805cbfa967f50ad222bc33b772 b/sql/hive/src/test/resources/golden/partition_special_char-0-6ae3e0805cbfa967f50ad222bc33b772
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-1-4de8e998198c8df484e9102f60ba05c1 b/sql/hive/src/test/resources/golden/partition_special_char-1-4de8e998198c8df484e9102f60ba05c1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-4-a3e8e57df86f00101d2f4dd0ce8c2bd7 b/sql/hive/src/test/resources/golden/partition_special_char-4-a3e8e57df86f00101d2f4dd0ce8c2bd7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-5-f1c90c8f4afd0c2ee66019e017997eb5 b/sql/hive/src/test/resources/golden/partition_special_char-5-f1c90c8f4afd0c2ee66019e017997eb5
deleted file mode 100644
index e16053e3110ae..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_special_char-5-f1c90c8f4afd0c2ee66019e017997eb5
+++ /dev/null
@@ -1,3 +0,0 @@
-ts=2011-01-11+14%3A18%3A26
-ts=2011-01-11+15%3A18%3A26
-ts=2011-01-11+16%3A18%3A26
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-6-94d637f6e5cee2771b9844438008a618 b/sql/hive/src/test/resources/golden/partition_special_char-6-94d637f6e5cee2771b9844438008a618
deleted file mode 100644
index 00750edc07d64..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_special_char-6-94d637f6e5cee2771b9844438008a618
+++ /dev/null
@@ -1 +0,0 @@
-3
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-7-a3e8e57df86f00101d2f4dd0ce8c2bd7 b/sql/hive/src/test/resources/golden/partition_special_char-7-a3e8e57df86f00101d2f4dd0ce8c2bd7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-8-f1c90c8f4afd0c2ee66019e017997eb5 b/sql/hive/src/test/resources/golden/partition_special_char-8-f1c90c8f4afd0c2ee66019e017997eb5
deleted file mode 100644
index e16053e3110ae..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_special_char-8-f1c90c8f4afd0c2ee66019e017997eb5
+++ /dev/null
@@ -1,3 +0,0 @@
-ts=2011-01-11+14%3A18%3A26
-ts=2011-01-11+15%3A18%3A26
-ts=2011-01-11+16%3A18%3A26
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-9-94d637f6e5cee2771b9844438008a618 b/sql/hive/src/test/resources/golden/partition_special_char-9-94d637f6e5cee2771b9844438008a618
deleted file mode 100644
index 00750edc07d64..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_special_char-9-94d637f6e5cee2771b9844438008a618
+++ /dev/null
@@ -1 +0,0 @@
-3
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-12-51fe5b5a17ddc56fb8712340b38773b2 b/sql/hive/src/test/resources/golden/partition_type_check-11-60aad884be613d18d3b89fca3b90dc94
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-12-51fe5b5a17ddc56fb8712340b38773b2
rename to sql/hive/src/test/resources/golden/partition_type_check-11-60aad884be613d18d3b89fca3b90dc94
diff --git a/sql/hive/src/test/resources/golden/partition_type_check-11-a1164f1770d2f787b520fbc3d345911a b/sql/hive/src/test/resources/golden/partition_type_check-11-a1164f1770d2f787b520fbc3d345911a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-18-4938ddc6b516cf67779be0d7dc29e7ab b/sql/hive/src/test/resources/golden/partition_type_check-2-3a6bb204d9524299f28adf1cc35d6f4d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-18-4938ddc6b516cf67779be0d7dc29e7ab
rename to sql/hive/src/test/resources/golden/partition_type_check-2-3a6bb204d9524299f28adf1cc35d6f4d
diff --git a/sql/hive/src/test/resources/golden/partition_type_check-2-5e857e1536264658caf0df9b7e28652f b/sql/hive/src/test/resources/golden/partition_type_check-2-5e857e1536264658caf0df9b7e28652f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_type_check-6-1f5f161f5e992a6aa7358dcbbffc5939 b/sql/hive/src/test/resources/golden/partition_type_check-6-1f5f161f5e992a6aa7358dcbbffc5939
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-19-180b611e08d4080fa098ed69169c2478 b/sql/hive/src/test/resources/golden/partition_type_check-6-663051c7106b8ee6913ca1b007157941
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-19-180b611e08d4080fa098ed69169c2478
rename to sql/hive/src/test/resources/golden/partition_type_check-6-663051c7106b8ee6913ca1b007157941
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-2-392062177be62090adedf1ab6c0a0b78 b/sql/hive/src/test/resources/golden/partition_varchar1-2-bca5c9edccc3a84e0a9ef92ebcbe746
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-2-392062177be62090adedf1ab6c0a0b78
rename to sql/hive/src/test/resources/golden/partition_varchar1-2-bca5c9edccc3a84e0a9ef92ebcbe746
diff --git a/sql/hive/src/test/resources/golden/partition_varchar1-2-fc9381cdfb786fc3b7b9b35df6003474 b/sql/hive/src/test/resources/golden/partition_varchar1-2-fc9381cdfb786fc3b7b9b35df6003474
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_varchar1-3-968759281b7d1bf0a60991ed04953b93 b/sql/hive/src/test/resources/golden/partition_varchar1-3-968759281b7d1bf0a60991ed04953b93
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-20-db2149b42cdbf998187034233fc846cc b/sql/hive/src/test/resources/golden/partition_varchar1-3-9cec4b1d156f5a9cb587470b98de15
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-20-db2149b42cdbf998187034233fc846cc
rename to sql/hive/src/test/resources/golden/partition_varchar1-3-9cec4b1d156f5a9cb587470b98de15
diff --git a/sql/hive/src/test/resources/golden/partition_varchar1-4-deb9b7715610152bda285a3a33b772ef b/sql/hive/src/test/resources/golden/partition_varchar1-4-deb9b7715610152bda285a3a33b772ef
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-21-322f23866cf3ca62d4ba93cf904c520 b/sql/hive/src/test/resources/golden/partition_varchar1-4-e9c4530e270db6d44cc54292e4eff680
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-21-322f23866cf3ca62d4ba93cf904c520
rename to sql/hive/src/test/resources/golden/partition_varchar1-4-e9c4530e270db6d44cc54292e4eff680
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-22-21fcbcafc3b5b22572ddd5c87df1c06f b/sql/hive/src/test/resources/golden/partition_varchar1-5-661aefd18c44c1eb02c2aaf8fab59f73
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-22-21fcbcafc3b5b22572ddd5c87df1c06f
rename to sql/hive/src/test/resources/golden/partition_varchar1-5-661aefd18c44c1eb02c2aaf8fab59f73
diff --git a/sql/hive/src/test/resources/golden/partition_varchar1-5-f77df9235261b945ca0a55e1afe2e5ce b/sql/hive/src/test/resources/golden/partition_varchar1-5-f77df9235261b945ca0a55e1afe2e5ce
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_vs_table_metadata-0-c25482149887659ec66d7534cad22f63 b/sql/hive/src/test/resources/golden/partition_vs_table_metadata-0-c25482149887659ec66d7534cad22f63
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_vs_table_metadata-1-29f193b3e8def419733366c578e8a236 b/sql/hive/src/test/resources/golden/partition_vs_table_metadata-1-29f193b3e8def419733366c578e8a236
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_vs_table_metadata-2-14033db9e60aa0bc4f64376c96195adf b/sql/hive/src/test/resources/golden/partition_vs_table_metadata-2-14033db9e60aa0bc4f64376c96195adf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_vs_table_metadata-3-9bdf636332ca722093413523366efa86 b/sql/hive/src/test/resources/golden/partition_vs_table_metadata-3-9bdf636332ca722093413523366efa86
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_vs_table_metadata-4-e784348b7221bb26830cc1ebda69bdde b/sql/hive/src/test/resources/golden/partition_vs_table_metadata-4-e784348b7221bb26830cc1ebda69bdde
deleted file mode 100644
index 39c80f1b77eab..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_vs_table_metadata-4-e784348b7221bb26830cc1ebda69bdde
+++ /dev/null
@@ -1,1000 +0,0 @@
-0	val_0	NULL
-0	val_0	NULL
-0	val_0	NULL
-0	val_0	0
-0	val_0	0
-0	val_0	0
-10	val_10	NULL
-10	val_10	10
-100	val_100	NULL
-100	val_100	NULL
-100	val_100	100
-100	val_100	100
-103	val_103	NULL
-103	val_103	NULL
-103	val_103	103
-103	val_103	103
-104	val_104	NULL
-104	val_104	NULL
-104	val_104	104
-104	val_104	104
-105	val_105	NULL
-105	val_105	105
-11	val_11	NULL
-11	val_11	11
-111	val_111	NULL
-111	val_111	111
-113	val_113	NULL
-113	val_113	NULL
-113	val_113	113
-113	val_113	113
-114	val_114	NULL
-114	val_114	114
-116	val_116	NULL
-116	val_116	116
-118	val_118	NULL
-118	val_118	NULL
-118	val_118	118
-118	val_118	118
-119	val_119	NULL
-119	val_119	NULL
-119	val_119	NULL
-119	val_119	119
-119	val_119	119
-119	val_119	119
-12	val_12	NULL
-12	val_12	NULL
-12	val_12	12
-12	val_12	12
-120	val_120	NULL
-120	val_120	NULL
-120	val_120	120
-120	val_120	120
-125	val_125	NULL
-125	val_125	NULL
-125	val_125	125
-125	val_125	125
-126	val_126	NULL
-126	val_126	126
-128	val_128	NULL
-128	val_128	NULL
-128	val_128	NULL
-128	val_128	128
-128	val_128	128
-128	val_128	128
-129	val_129	NULL
-129	val_129	NULL
-129	val_129	129
-129	val_129	129
-131	val_131	NULL
-131	val_131	131
-133	val_133	NULL
-133	val_133	133
-134	val_134	NULL
-134	val_134	NULL
-134	val_134	134
-134	val_134	134
-136	val_136	NULL
-136	val_136	136
-137	val_137	NULL
-137	val_137	NULL
-137	val_137	137
-137	val_137	137
-138	val_138	NULL
-138	val_138	NULL
-138	val_138	NULL
-138	val_138	NULL
-138	val_138	138
-138	val_138	138
-138	val_138	138
-138	val_138	138
-143	val_143	NULL
-143	val_143	143
-145	val_145	NULL
-145	val_145	145
-146	val_146	NULL
-146	val_146	NULL
-146	val_146	146
-146	val_146	146
-149	val_149	NULL
-149	val_149	NULL
-149	val_149	149
-149	val_149	149
-15	val_15	NULL
-15	val_15	NULL
-15	val_15	15
-15	val_15	15
-150	val_150	NULL
-150	val_150	150
-152	val_152	NULL
-152	val_152	NULL
-152	val_152	152
-152	val_152	152
-153	val_153	NULL
-153	val_153	153
-155	val_155	NULL
-155	val_155	155
-156	val_156	NULL
-156	val_156	156
-157	val_157	NULL
-157	val_157	157
-158	val_158	NULL
-158	val_158	158
-160	val_160	NULL
-160	val_160	160
-162	val_162	NULL
-162	val_162	162
-163	val_163	NULL
-163	val_163	163
-164	val_164	NULL
-164	val_164	NULL
-164	val_164	164
-164	val_164	164
-165	val_165	NULL
-165	val_165	NULL
-165	val_165	165
-165	val_165	165
-166	val_166	NULL
-166	val_166	166
-167	val_167	NULL
-167	val_167	NULL
-167	val_167	NULL
-167	val_167	167
-167	val_167	167
-167	val_167	167
-168	val_168	NULL
-168	val_168	168
-169	val_169	NULL
-169	val_169	NULL
-169	val_169	NULL
-169	val_169	NULL
-169	val_169	169
-169	val_169	169
-169	val_169	169
-169	val_169	169
-17	val_17	NULL
-17	val_17	17
-170	val_170	NULL
-170	val_170	170
-172	val_172	NULL
-172	val_172	NULL
-172	val_172	172
-172	val_172	172
-174	val_174	NULL
-174	val_174	NULL
-174	val_174	174
-174	val_174	174
-175	val_175	NULL
-175	val_175	NULL
-175	val_175	175
-175	val_175	175
-176	val_176	NULL
-176	val_176	NULL
-176	val_176	176
-176	val_176	176
-177	val_177	NULL
-177	val_177	177
-178	val_178	NULL
-178	val_178	178
-179	val_179	NULL
-179	val_179	NULL
-179	val_179	179
-179	val_179	179
-18	val_18	NULL
-18	val_18	NULL
-18	val_18	18
-18	val_18	18
-180	val_180	NULL
-180	val_180	180
-181	val_181	NULL
-181	val_181	181
-183	val_183	NULL
-183	val_183	183
-186	val_186	NULL
-186	val_186	186
-187	val_187	NULL
-187	val_187	NULL
-187	val_187	NULL
-187	val_187	187
-187	val_187	187
-187	val_187	187
-189	val_189	NULL
-189	val_189	189
-19	val_19	NULL
-19	val_19	19
-190	val_190	NULL
-190	val_190	190
-191	val_191	NULL
-191	val_191	NULL
-191	val_191	191
-191	val_191	191
-192	val_192	NULL
-192	val_192	192
-193	val_193	NULL
-193	val_193	NULL
-193	val_193	NULL
-193	val_193	193
-193	val_193	193
-193	val_193	193
-194	val_194	NULL
-194	val_194	194
-195	val_195	NULL
-195	val_195	NULL
-195	val_195	195
-195	val_195	195
-196	val_196	NULL
-196	val_196	196
-197	val_197	NULL
-197	val_197	NULL
-197	val_197	197
-197	val_197	197
-199	val_199	NULL
-199	val_199	NULL
-199	val_199	NULL
-199	val_199	199
-199	val_199	199
-199	val_199	199
-2	val_2	NULL
-2	val_2	2
-20	val_20	NULL
-20	val_20	20
-200	val_200	NULL
-200	val_200	NULL
-200	val_200	200
-200	val_200	200
-201	val_201	NULL
-201	val_201	201
-202	val_202	NULL
-202	val_202	202
-203	val_203	NULL
-203	val_203	NULL
-203	val_203	203
-203	val_203	203
-205	val_205	NULL
-205	val_205	NULL
-205	val_205	205
-205	val_205	205
-207	val_207	NULL
-207	val_207	NULL
-207	val_207	207
-207	val_207	207
-208	val_208	NULL
-208	val_208	NULL
-208	val_208	NULL
-208	val_208	208
-208	val_208	208
-208	val_208	208
-209	val_209	NULL
-209	val_209	NULL
-209	val_209	209
-209	val_209	209
-213	val_213	NULL
-213	val_213	NULL
-213	val_213	213
-213	val_213	213
-214	val_214	NULL
-214	val_214	214
-216	val_216	NULL
-216	val_216	NULL
-216	val_216	216
-216	val_216	216
-217	val_217	NULL
-217	val_217	NULL
-217	val_217	217
-217	val_217	217
-218	val_218	NULL
-218	val_218	218
-219	val_219	NULL
-219	val_219	NULL
-219	val_219	219
-219	val_219	219
-221	val_221	NULL
-221	val_221	NULL
-221	val_221	221
-221	val_221	221
-222	val_222	NULL
-222	val_222	222
-223	val_223	NULL
-223	val_223	NULL
-223	val_223	223
-223	val_223	223
-224	val_224	NULL
-224	val_224	NULL
-224	val_224	224
-224	val_224	224
-226	val_226	NULL
-226	val_226	226
-228	val_228	NULL
-228	val_228	228
-229	val_229	NULL
-229	val_229	NULL
-229	val_229	229
-229	val_229	229
-230	val_230	NULL
-230	val_230	NULL
-230	val_230	NULL
-230	val_230	NULL
-230	val_230	NULL
-230	val_230	230
-230	val_230	230
-230	val_230	230
-230	val_230	230
-230	val_230	230
-233	val_233	NULL
-233	val_233	NULL
-233	val_233	233
-233	val_233	233
-235	val_235	NULL
-235	val_235	235
-237	val_237	NULL
-237	val_237	NULL
-237	val_237	237
-237	val_237	237
-238	val_238	NULL
-238	val_238	NULL
-238	val_238	238
-238	val_238	238
-239	val_239	NULL
-239	val_239	NULL
-239	val_239	239
-239	val_239	239
-24	val_24	NULL
-24	val_24	NULL
-24	val_24	24
-24	val_24	24
-241	val_241	NULL
-241	val_241	241
-242	val_242	NULL
-242	val_242	NULL
-242	val_242	242
-242	val_242	242
-244	val_244	NULL
-244	val_244	244
-247	val_247	NULL
-247	val_247	247
-248	val_248	NULL
-248	val_248	248
-249	val_249	NULL
-249	val_249	249
-252	val_252	NULL
-252	val_252	252
-255	val_255	NULL
-255	val_255	NULL
-255	val_255	255
-255	val_255	255
-256	val_256	NULL
-256	val_256	NULL
-256	val_256	256
-256	val_256	256
-257	val_257	NULL
-257	val_257	257
-258	val_258	NULL
-258	val_258	258
-26	val_26	NULL
-26	val_26	NULL
-26	val_26	26
-26	val_26	26
-260	val_260	NULL
-260	val_260	260
-262	val_262	NULL
-262	val_262	262
-263	val_263	NULL
-263	val_263	263
-265	val_265	NULL
-265	val_265	NULL
-265	val_265	265
-265	val_265	265
-266	val_266	NULL
-266	val_266	266
-27	val_27	NULL
-27	val_27	27
-272	val_272	NULL
-272	val_272	NULL
-272	val_272	272
-272	val_272	272
-273	val_273	NULL
-273	val_273	NULL
-273	val_273	NULL
-273	val_273	273
-273	val_273	273
-273	val_273	273
-274	val_274	NULL
-274	val_274	274
-275	val_275	NULL
-275	val_275	275
-277	val_277	NULL
-277	val_277	NULL
-277	val_277	NULL
-277	val_277	NULL
-277	val_277	277
-277	val_277	277
-277	val_277	277
-277	val_277	277
-278	val_278	NULL
-278	val_278	NULL
-278	val_278	278
-278	val_278	278
-28	val_28	NULL
-28	val_28	28
-280	val_280	NULL
-280	val_280	NULL
-280	val_280	280
-280	val_280	280
-281	val_281	NULL
-281	val_281	NULL
-281	val_281	281
-281	val_281	281
-282	val_282	NULL
-282	val_282	NULL
-282	val_282	282
-282	val_282	282
-283	val_283	NULL
-283	val_283	283
-284	val_284	NULL
-284	val_284	284
-285	val_285	NULL
-285	val_285	285
-286	val_286	NULL
-286	val_286	286
-287	val_287	NULL
-287	val_287	287
-288	val_288	NULL
-288	val_288	NULL
-288	val_288	288
-288	val_288	288
-289	val_289	NULL
-289	val_289	289
-291	val_291	NULL
-291	val_291	291
-292	val_292	NULL
-292	val_292	292
-296	val_296	NULL
-296	val_296	296
-298	val_298	NULL
-298	val_298	NULL
-298	val_298	NULL
-298	val_298	298
-298	val_298	298
-298	val_298	298
-30	val_30	NULL
-30	val_30	30
-302	val_302	NULL
-302	val_302	302
-305	val_305	NULL
-305	val_305	305
-306	val_306	NULL
-306	val_306	306
-307	val_307	NULL
-307	val_307	NULL
-307	val_307	307
-307	val_307	307
-308	val_308	NULL
-308	val_308	308
-309	val_309	NULL
-309	val_309	NULL
-309	val_309	309
-309	val_309	309
-310	val_310	NULL
-310	val_310	310
-311	val_311	NULL
-311	val_311	NULL
-311	val_311	NULL
-311	val_311	311
-311	val_311	311
-311	val_311	311
-315	val_315	NULL
-315	val_315	315
-316	val_316	NULL
-316	val_316	NULL
-316	val_316	NULL
-316	val_316	316
-316	val_316	316
-316	val_316	316
-317	val_317	NULL
-317	val_317	NULL
-317	val_317	317
-317	val_317	317
-318	val_318	NULL
-318	val_318	NULL
-318	val_318	NULL
-318	val_318	318
-318	val_318	318
-318	val_318	318
-321	val_321	NULL
-321	val_321	NULL
-321	val_321	321
-321	val_321	321
-322	val_322	NULL
-322	val_322	NULL
-322	val_322	322
-322	val_322	322
-323	val_323	NULL
-323	val_323	323
-325	val_325	NULL
-325	val_325	NULL
-325	val_325	325
-325	val_325	325
-327	val_327	NULL
-327	val_327	NULL
-327	val_327	NULL
-327	val_327	327
-327	val_327	327
-327	val_327	327
-33	val_33	NULL
-33	val_33	33
-331	val_331	NULL
-331	val_331	NULL
-331	val_331	331
-331	val_331	331
-332	val_332	NULL
-332	val_332	332
-333	val_333	NULL
-333	val_333	NULL
-333	val_333	333
-333	val_333	333
-335	val_335	NULL
-335	val_335	335
-336	val_336	NULL
-336	val_336	336
-338	val_338	NULL
-338	val_338	338
-339	val_339	NULL
-339	val_339	339
-34	val_34	NULL
-34	val_34	34
-341	val_341	NULL
-341	val_341	341
-342	val_342	NULL
-342	val_342	NULL
-342	val_342	342
-342	val_342	342
-344	val_344	NULL
-344	val_344	NULL
-344	val_344	344
-344	val_344	344
-345	val_345	NULL
-345	val_345	345
-348	val_348	NULL
-348	val_348	NULL
-348	val_348	NULL
-348	val_348	NULL
-348	val_348	NULL
-348	val_348	348
-348	val_348	348
-348	val_348	348
-348	val_348	348
-348	val_348	348
-35	val_35	NULL
-35	val_35	NULL
-35	val_35	NULL
-35	val_35	35
-35	val_35	35
-35	val_35	35
-351	val_351	NULL
-351	val_351	351
-353	val_353	NULL
-353	val_353	NULL
-353	val_353	353
-353	val_353	353
-356	val_356	NULL
-356	val_356	356
-360	val_360	NULL
-360	val_360	360
-362	val_362	NULL
-362	val_362	362
-364	val_364	NULL
-364	val_364	364
-365	val_365	NULL
-365	val_365	365
-366	val_366	NULL
-366	val_366	366
-367	val_367	NULL
-367	val_367	NULL
-367	val_367	367
-367	val_367	367
-368	val_368	NULL
-368	val_368	368
-369	val_369	NULL
-369	val_369	NULL
-369	val_369	NULL
-369	val_369	369
-369	val_369	369
-369	val_369	369
-37	val_37	NULL
-37	val_37	NULL
-37	val_37	37
-37	val_37	37
-373	val_373	NULL
-373	val_373	373
-374	val_374	NULL
-374	val_374	374
-375	val_375	NULL
-375	val_375	375
-377	val_377	NULL
-377	val_377	377
-378	val_378	NULL
-378	val_378	378
-379	val_379	NULL
-379	val_379	379
-382	val_382	NULL
-382	val_382	NULL
-382	val_382	382
-382	val_382	382
-384	val_384	NULL
-384	val_384	NULL
-384	val_384	NULL
-384	val_384	384
-384	val_384	384
-384	val_384	384
-386	val_386	NULL
-386	val_386	386
-389	val_389	NULL
-389	val_389	389
-392	val_392	NULL
-392	val_392	392
-393	val_393	NULL
-393	val_393	393
-394	val_394	NULL
-394	val_394	394
-395	val_395	NULL
-395	val_395	NULL
-395	val_395	395
-395	val_395	395
-396	val_396	NULL
-396	val_396	NULL
-396	val_396	NULL
-396	val_396	396
-396	val_396	396
-396	val_396	396
-397	val_397	NULL
-397	val_397	NULL
-397	val_397	397
-397	val_397	397
-399	val_399	NULL
-399	val_399	NULL
-399	val_399	399
-399	val_399	399
-4	val_4	NULL
-4	val_4	4
-400	val_400	NULL
-400	val_400	400
-401	val_401	NULL
-401	val_401	NULL
-401	val_401	NULL
-401	val_401	NULL
-401	val_401	NULL
-401	val_401	401
-401	val_401	401
-401	val_401	401
-401	val_401	401
-401	val_401	401
-402	val_402	NULL
-402	val_402	402
-403	val_403	NULL
-403	val_403	NULL
-403	val_403	NULL
-403	val_403	403
-403	val_403	403
-403	val_403	403
-404	val_404	NULL
-404	val_404	NULL
-404	val_404	404
-404	val_404	404
-406	val_406	NULL
-406	val_406	NULL
-406	val_406	NULL
-406	val_406	NULL
-406	val_406	406
-406	val_406	406
-406	val_406	406
-406	val_406	406
-407	val_407	NULL
-407	val_407	407
-409	val_409	NULL
-409	val_409	NULL
-409	val_409	NULL
-409	val_409	409
-409	val_409	409
-409	val_409	409
-41	val_41	NULL
-41	val_41	41
-411	val_411	NULL
-411	val_411	411
-413	val_413	NULL
-413	val_413	NULL
-413	val_413	413
-413	val_413	413
-414	val_414	NULL
-414	val_414	NULL
-414	val_414	414
-414	val_414	414
-417	val_417	NULL
-417	val_417	NULL
-417	val_417	NULL
-417	val_417	417
-417	val_417	417
-417	val_417	417
-418	val_418	NULL
-418	val_418	418
-419	val_419	NULL
-419	val_419	419
-42	val_42	NULL
-42	val_42	NULL
-42	val_42	42
-42	val_42	42
-421	val_421	NULL
-421	val_421	421
-424	val_424	NULL
-424	val_424	NULL
-424	val_424	424
-424	val_424	424
-427	val_427	NULL
-427	val_427	427
-429	val_429	NULL
-429	val_429	NULL
-429	val_429	429
-429	val_429	429
-43	val_43	NULL
-43	val_43	43
-430	val_430	NULL
-430	val_430	NULL
-430	val_430	NULL
-430	val_430	430
-430	val_430	430
-430	val_430	430
-431	val_431	NULL
-431	val_431	NULL
-431	val_431	NULL
-431	val_431	431
-431	val_431	431
-431	val_431	431
-432	val_432	NULL
-432	val_432	432
-435	val_435	NULL
-435	val_435	435
-436	val_436	NULL
-436	val_436	436
-437	val_437	NULL
-437	val_437	437
-438	val_438	NULL
-438	val_438	NULL
-438	val_438	NULL
-438	val_438	438
-438	val_438	438
-438	val_438	438
-439	val_439	NULL
-439	val_439	NULL
-439	val_439	439
-439	val_439	439
-44	val_44	NULL
-44	val_44	44
-443	val_443	NULL
-443	val_443	443
-444	val_444	NULL
-444	val_444	444
-446	val_446	NULL
-446	val_446	446
-448	val_448	NULL
-448	val_448	448
-449	val_449	NULL
-449	val_449	449
-452	val_452	NULL
-452	val_452	452
-453	val_453	NULL
-453	val_453	453
-454	val_454	NULL
-454	val_454	NULL
-454	val_454	NULL
-454	val_454	454
-454	val_454	454
-454	val_454	454
-455	val_455	NULL
-455	val_455	455
-457	val_457	NULL
-457	val_457	457
-458	val_458	NULL
-458	val_458	NULL
-458	val_458	458
-458	val_458	458
-459	val_459	NULL
-459	val_459	NULL
-459	val_459	459
-459	val_459	459
-460	val_460	NULL
-460	val_460	460
-462	val_462	NULL
-462	val_462	NULL
-462	val_462	462
-462	val_462	462
-463	val_463	NULL
-463	val_463	NULL
-463	val_463	463
-463	val_463	463
-466	val_466	NULL
-466	val_466	NULL
-466	val_466	NULL
-466	val_466	466
-466	val_466	466
-466	val_466	466
-467	val_467	NULL
-467	val_467	467
-468	val_468	NULL
-468	val_468	NULL
-468	val_468	NULL
-468	val_468	NULL
-468	val_468	468
-468	val_468	468
-468	val_468	468
-468	val_468	468
-469	val_469	NULL
-469	val_469	NULL
-469	val_469	NULL
-469	val_469	NULL
-469	val_469	NULL
-469	val_469	469
-469	val_469	469
-469	val_469	469
-469	val_469	469
-469	val_469	469
-47	val_47	NULL
-47	val_47	47
-470	val_470	NULL
-470	val_470	470
-472	val_472	NULL
-472	val_472	472
-475	val_475	NULL
-475	val_475	475
-477	val_477	NULL
-477	val_477	477
-478	val_478	NULL
-478	val_478	NULL
-478	val_478	478
-478	val_478	478
-479	val_479	NULL
-479	val_479	479
-480	val_480	NULL
-480	val_480	NULL
-480	val_480	NULL
-480	val_480	480
-480	val_480	480
-480	val_480	480
-481	val_481	NULL
-481	val_481	481
-482	val_482	NULL
-482	val_482	482
-483	val_483	NULL
-483	val_483	483
-484	val_484	NULL
-484	val_484	484
-485	val_485	NULL
-485	val_485	485
-487	val_487	NULL
-487	val_487	487
-489	val_489	NULL
-489	val_489	NULL
-489	val_489	NULL
-489	val_489	NULL
-489	val_489	489
-489	val_489	489
-489	val_489	489
-489	val_489	489
-490	val_490	NULL
-490	val_490	490
-491	val_491	NULL
-491	val_491	491
-492	val_492	NULL
-492	val_492	NULL
-492	val_492	492
-492	val_492	492
-493	val_493	NULL
-493	val_493	493
-494	val_494	NULL
-494	val_494	494
-495	val_495	NULL
-495	val_495	495
-496	val_496	NULL
-496	val_496	496
-497	val_497	NULL
-497	val_497	497
-498	val_498	NULL
-498	val_498	NULL
-498	val_498	NULL
-498	val_498	498
-498	val_498	498
-498	val_498	498
-5	val_5	NULL
-5	val_5	NULL
-5	val_5	NULL
-5	val_5	5
-5	val_5	5
-5	val_5	5
-51	val_51	NULL
-51	val_51	NULL
-51	val_51	51
-51	val_51	51
-53	val_53	NULL
-53	val_53	53
-54	val_54	NULL
-54	val_54	54
-57	val_57	NULL
-57	val_57	57
-58	val_58	NULL
-58	val_58	NULL
-58	val_58	58
-58	val_58	58
-64	val_64	NULL
-64	val_64	64
-65	val_65	NULL
-65	val_65	65
-66	val_66	NULL
-66	val_66	66
-67	val_67	NULL
-67	val_67	NULL
-67	val_67	67
-67	val_67	67
-69	val_69	NULL
-69	val_69	69
-70	val_70	NULL
-70	val_70	NULL
-70	val_70	NULL
-70	val_70	70
-70	val_70	70
-70	val_70	70
-72	val_72	NULL
-72	val_72	NULL
-72	val_72	72
-72	val_72	72
-74	val_74	NULL
-74	val_74	74
-76	val_76	NULL
-76	val_76	NULL
-76	val_76	76
-76	val_76	76
-77	val_77	NULL
-77	val_77	77
-78	val_78	NULL
-78	val_78	78
-8	val_8	NULL
-8	val_8	8
-80	val_80	NULL
-80	val_80	80
-82	val_82	NULL
-82	val_82	82
-83	val_83	NULL
-83	val_83	NULL
-83	val_83	83
-83	val_83	83
-84	val_84	NULL
-84	val_84	NULL
-84	val_84	84
-84	val_84	84
-85	val_85	NULL
-85	val_85	85
-86	val_86	NULL
-86	val_86	86
-87	val_87	NULL
-87	val_87	87
-9	val_9	NULL
-9	val_9	9
-90	val_90	NULL
-90	val_90	NULL
-90	val_90	NULL
-90	val_90	90
-90	val_90	90
-90	val_90	90
-92	val_92	NULL
-92	val_92	92
-95	val_95	NULL
-95	val_95	NULL
-95	val_95	95
-95	val_95	95
-96	val_96	NULL
-96	val_96	96
-97	val_97	NULL
-97	val_97	NULL
-97	val_97	97
-97	val_97	97
-98	val_98	NULL
-98	val_98	NULL
-98	val_98	98
-98	val_98	98
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-0-c854b607353e810be297d3159be30da4 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-0-c854b607353e810be297d3159be30da4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-1-9fad934993b77eef15a5d10eb203a378 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-1-9fad934993b77eef15a5d10eb203a378
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-10-f89a3f7c0ee5bc3f6bd04aa0b459dd4a b/sql/hive/src/test/resources/golden/partition_wise_fileformat-10-f89a3f7c0ee5bc3f6bd04aa0b459dd4a
deleted file mode 100644
index dcf40c875b9ad..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-10-f89a3f7c0ee5bc3f6bd04aa0b459dd4a
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned/dt=101
-inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:284
-maxFileSize:284
-minFileSize:284
-lastAccessTime:0
-lastUpdateTime:1390900729000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-11-606ad10de7caf7e65e09778f2673e712 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-11-606ad10de7caf7e65e09778f2673e712
deleted file mode 100644
index 9b9389353dd5f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-11-606ad10de7caf7e65e09778f2673e712
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-12-c6bf6ee8fdc50f2abb3a71305d1e6882 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-12-c6bf6ee8fdc50f2abb3a71305d1e6882
deleted file mode 100644
index 9b9389353dd5f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-12-c6bf6ee8fdc50f2abb3a71305d1e6882
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-13-c262e8c736b67119b9806f69eb492ef3 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-13-c262e8c736b67119b9806f69eb492ef3
deleted file mode 100644
index 9639a1e84c615..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-13-c262e8c736b67119b9806f69eb492ef3
+++ /dev/null
@@ -1,50 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-14-da1b1887eb530c7e9d37667b99c9793f b/sql/hive/src/test/resources/golden/partition_wise_fileformat-14-da1b1887eb530c7e9d37667b99c9793f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-15-517aaa22478287fa80eef4a19f2cb9ff b/sql/hive/src/test/resources/golden/partition_wise_fileformat-15-517aaa22478287fa80eef4a19f2cb9ff
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-16-7d619408a560b5c8d4a06dcd0ee106e5 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-16-7d619408a560b5c8d4a06dcd0ee106e5
deleted file mode 100644
index 707036ebbc76c..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-16-7d619408a560b5c8d4a06dcd0ee106e5
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned
-inputformat:org.apache.hadoop.mapred.SequenceFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:3
-totalFileSize:1415
-maxFileSize:895
-minFileSize:236
-lastAccessTime:0
-lastUpdateTime:1390900762000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-17-a488cb92e1388a7542d8787a22f4fb55 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-17-a488cb92e1388a7542d8787a22f4fb55
deleted file mode 100644
index 06316a924c38b..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-17-a488cb92e1388a7542d8787a22f4fb55
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned/dt=100
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:236
-maxFileSize:236
-minFileSize:236
-lastAccessTime:0
-lastUpdateTime:1390900762000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-18-f89a3f7c0ee5bc3f6bd04aa0b459dd4a b/sql/hive/src/test/resources/golden/partition_wise_fileformat-18-f89a3f7c0ee5bc3f6bd04aa0b459dd4a
deleted file mode 100644
index 735d41e54e958..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-18-f89a3f7c0ee5bc3f6bd04aa0b459dd4a
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned/dt=101
-inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:284
-maxFileSize:284
-minFileSize:284
-lastAccessTime:0
-lastUpdateTime:1390900762000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-19-e3b55daf170a032dcc8ed12ee26ccf63 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-19-e3b55daf170a032dcc8ed12ee26ccf63
deleted file mode 100644
index 27dfc622c9bb6..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-19-e3b55daf170a032dcc8ed12ee26ccf63
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned/dt=102
-inputformat:org.apache.hadoop.mapred.SequenceFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:895
-maxFileSize:895
-minFileSize:895
-lastAccessTime:0
-lastUpdateTime:1390900762000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-2-7d619408a560b5c8d4a06dcd0ee106e5 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-2-7d619408a560b5c8d4a06dcd0ee106e5
deleted file mode 100644
index 1812e0d53e443..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-2-7d619408a560b5c8d4a06dcd0ee106e5
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:236
-maxFileSize:236
-minFileSize:236
-lastAccessTime:0
-lastUpdateTime:1390900706000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-20-606ad10de7caf7e65e09778f2673e712 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-20-606ad10de7caf7e65e09778f2673e712
deleted file mode 100644
index 9b9389353dd5f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-20-606ad10de7caf7e65e09778f2673e712
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-21-c6bf6ee8fdc50f2abb3a71305d1e6882 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-21-c6bf6ee8fdc50f2abb3a71305d1e6882
deleted file mode 100644
index 9b9389353dd5f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-21-c6bf6ee8fdc50f2abb3a71305d1e6882
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-22-c56c391659f5701a9d2e8782a60f7f8a b/sql/hive/src/test/resources/golden/partition_wise_fileformat-22-c56c391659f5701a9d2e8782a60f7f8a
deleted file mode 100644
index 9b9389353dd5f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-22-c56c391659f5701a9d2e8782a60f7f8a
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-23-c262e8c736b67119b9806f69eb492ef3 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-23-c262e8c736b67119b9806f69eb492ef3
deleted file mode 100644
index fca99e91bbd8f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-23-c262e8c736b67119b9806f69eb492ef3
+++ /dev/null
@@ -1,75 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-24-fe564b4f68241ec5c5884e44a1306f4f b/sql/hive/src/test/resources/golden/partition_wise_fileformat-24-fe564b4f68241ec5c5884e44a1306f4f
deleted file mode 100644
index fca99e91bbd8f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-24-fe564b4f68241ec5c5884e44a1306f4f
+++ /dev/null
@@ -1,75 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-3-a488cb92e1388a7542d8787a22f4fb55 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-3-a488cb92e1388a7542d8787a22f4fb55
deleted file mode 100644
index 3532257511613..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-3-a488cb92e1388a7542d8787a22f4fb55
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned/dt=100
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:236
-maxFileSize:236
-minFileSize:236
-lastAccessTime:0
-lastUpdateTime:1390900706000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-4-606ad10de7caf7e65e09778f2673e712 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-4-606ad10de7caf7e65e09778f2673e712
deleted file mode 100644
index 9b9389353dd5f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-4-606ad10de7caf7e65e09778f2673e712
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-5-c262e8c736b67119b9806f69eb492ef3 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-5-c262e8c736b67119b9806f69eb492ef3
deleted file mode 100644
index 9b9389353dd5f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-5-c262e8c736b67119b9806f69eb492ef3
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-6-6c4f7b115f18953dcc7710fa97287459 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-6-6c4f7b115f18953dcc7710fa97287459
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-7-f5f427b174dca478c14eddc371c0025a b/sql/hive/src/test/resources/golden/partition_wise_fileformat-7-f5f427b174dca478c14eddc371c0025a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-8-7d619408a560b5c8d4a06dcd0ee106e5 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-8-7d619408a560b5c8d4a06dcd0ee106e5
deleted file mode 100644
index a02458b88bf52..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-8-7d619408a560b5c8d4a06dcd0ee106e5
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned
-inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:2
-totalFileSize:520
-maxFileSize:284
-minFileSize:236
-lastAccessTime:0
-lastUpdateTime:1390900729000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat-9-a488cb92e1388a7542d8787a22f4fb55 b/sql/hive/src/test/resources/golden/partition_wise_fileformat-9-a488cb92e1388a7542d8787a22f4fb55
deleted file mode 100644
index 301bff44316ff..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat-9-a488cb92e1388a7542d8787a22f4fb55
+++ /dev/null
@@ -1,15 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/partition_test_partitioned/dt=100
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:236
-maxFileSize:236
-minFileSize:236
-lastAccessTime:0
-lastUpdateTime:1390900729000
-
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/partition_wise_fileformat10-0-66ee62178e3576fb38cb09800cb610bf
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-0-66ee62178e3576fb38cb09800cb610bf
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-1-198cb7d650a506ec3420b94b82a01375 b/sql/hive/src/test/resources/golden/partition_wise_fileformat10-1-198cb7d650a506ec3420b94b82a01375
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-2-f723dedd396bd468107755b4495c1031 b/sql/hive/src/test/resources/golden/partition_wise_fileformat10-2-f723dedd396bd468107755b4495c1031
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-3-c278fd699aa25809bdef310fb92f510e b/sql/hive/src/test/resources/golden/partition_wise_fileformat10-3-c278fd699aa25809bdef310fb92f510e
deleted file mode 100644
index 001841c8a1cd4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-3-c278fd699aa25809bdef310fb92f510e
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-4-620cb6a290ef5297ac1d3a9ea776e2bf b/sql/hive/src/test/resources/golden/partition_wise_fileformat10-4-620cb6a290ef5297ac1d3a9ea776e2bf
deleted file mode 100644
index 63f35fd827de3..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-4-620cb6a290ef5297ac1d3a9ea776e2bf
+++ /dev/null
@@ -1,2 +0,0 @@
-476.0	val_238
-476.0	val_238
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-5-dd4c3f1636af9a7371edf7142abee088 b/sql/hive/src/test/resources/golden/partition_wise_fileformat10-5-dd4c3f1636af9a7371edf7142abee088
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-6-620cb6a290ef5297ac1d3a9ea776e2bf b/sql/hive/src/test/resources/golden/partition_wise_fileformat10-6-620cb6a290ef5297ac1d3a9ea776e2bf
deleted file mode 100644
index 63f35fd827de3..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-6-620cb6a290ef5297ac1d3a9ea776e2bf
+++ /dev/null
@@ -1,2 +0,0 @@
-476.0	val_238
-476.0	val_238
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-7-c278fd699aa25809bdef310fb92f510e b/sql/hive/src/test/resources/golden/partition_wise_fileformat10-7-c278fd699aa25809bdef310fb92f510e
deleted file mode 100644
index f75126345f351..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat10-7-c278fd699aa25809bdef310fb92f510e
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	NULL	1
-238	val_238	NULL	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-1-b30db33650de0545cbcd53769ed7cf40 b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-1-b30db33650de0545cbcd53769ed7cf40
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-10-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-10-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index de31196d97c3f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-10-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476	val_238
-476	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-11-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-11-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 11542e84a3768..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-11-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	NULL	1
-238	val_238	NULL	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-2-92bfcf88ca528eb6c9259142bf6541e5 b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-2-92bfcf88ca528eb6c9259142bf6541e5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-3-61f1abcdc66a64c11df85dded920d167 b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-3-61f1abcdc66a64c11df85dded920d167
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-4-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-4-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 6d512a4f0bdc4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-4-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-5-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-5-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index 53aab40f88b50..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-5-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476.0	val_238
-476.0	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-6-ee7394c912991b8cd4401fb94942351f b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-6-ee7394c912991b8cd4401fb94942351f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-7-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-7-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index de31196d97c3f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-7-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476	val_238
-476	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-8-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-8-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 6d512a4f0bdc4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-8-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-9-ed1e2dbef8eb8efbf83a50dc646485ba b/sql/hive/src/test/resources/golden/partition_wise_fileformat11-9-ed1e2dbef8eb8efbf83a50dc646485ba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-1-b30db33650de0545cbcd53769ed7cf40 b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-1-b30db33650de0545cbcd53769ed7cf40
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-10-ed1e2dbef8eb8efbf83a50dc646485ba b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-10-ed1e2dbef8eb8efbf83a50dc646485ba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-11-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-11-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index 1b97612ce3f5a..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-11-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,4 +0,0 @@
-476	val_238
-476	val_238
-194	val_97
-194	val_97
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-12-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-12-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index f8e13d5235028..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-12-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,4 +0,0 @@
-238	val_238	NULL	1
-238	val_238	NULL	1
-97	val_97	NULL	2
-97	val_97	NULL	2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-13-26a9a9cafa15d0223b934eba4777aea7 b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-13-26a9a9cafa15d0223b934eba4777aea7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-14-d35f445501407d6fae5c3ad161fb2236 b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-14-d35f445501407d6fae5c3ad161fb2236
deleted file mode 100644
index 919a66a94c5cb..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-14-d35f445501407d6fae5c3ad161fb2236
+++ /dev/null
@@ -1,6 +0,0 @@
-476	val_238	NULL
-476	val_238	NULL
-194	val_97	NULL
-194	val_97	NULL
-400	val_200	val_200
-400	val_200	val_200
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-15-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-15-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 45c169f0d8330..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-15-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,6 +0,0 @@
-238	val_238	NULL	1
-238	val_238	NULL	1
-97	val_97	NULL	2
-97	val_97	NULL	2
-200	val_200	val_200	3
-200	val_200	val_200	3
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-2-92bfcf88ca528eb6c9259142bf6541e5 b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-2-92bfcf88ca528eb6c9259142bf6541e5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-3-61f1abcdc66a64c11df85dded920d167 b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-3-61f1abcdc66a64c11df85dded920d167
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-4-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-4-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 6d512a4f0bdc4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-4-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-5-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-5-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index 53aab40f88b50..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-5-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476.0	val_238
-476.0	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-6-ee7394c912991b8cd4401fb94942351f b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-6-ee7394c912991b8cd4401fb94942351f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-7-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-7-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index de31196d97c3f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-7-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476	val_238
-476	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-8-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-8-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 6d512a4f0bdc4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-8-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-9-b8aed3dd7d7c151f5d96050c4058e71d b/sql/hive/src/test/resources/golden/partition_wise_fileformat12-9-b8aed3dd7d7c151f5d96050c4058e71d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-1-9a812f44c4c7a4c72b4be95139e6fd6 b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-1-9a812f44c4c7a4c72b4be95139e6fd6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-10-5bc619cec1d519c1dc6423f841b1c0a4 b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-10-5bc619cec1d519c1dc6423f841b1c0a4
deleted file mode 100644
index b6a7d89c68e0c..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-10-5bc619cec1d519c1dc6423f841b1c0a4
+++ /dev/null
@@ -1 +0,0 @@
-16
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-2-3b6e28e3c1c071583e9b3b8f1f997f75 b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-2-3b6e28e3c1c071583e9b3b8f1f997f75
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-3-9b3e4a236550f1900a36566830b24024 b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-3-9b3e4a236550f1900a36566830b24024
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-4-49cf189a09e11c2c635fbb574b89a2af b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-4-49cf189a09e11c2c635fbb574b89a2af
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-5-2ebe5fe98e830143b9571b13fe59dc0b b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-5-2ebe5fe98e830143b9571b13fe59dc0b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-6-fa2f56078df18add8e5a77e538756488 b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-6-fa2f56078df18add8e5a77e538756488
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-7-1d822cc037d9281ce172e2d5685b1495 b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-7-1d822cc037d9281ce172e2d5685b1495
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-8-e4531456a7418952ec1d69e18bc8460b b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-8-e4531456a7418952ec1d69e18bc8460b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-9-8f6983fda271fba18d218c75b8fb2b3d b/sql/hive/src/test/resources/golden/partition_wise_fileformat13-9-8f6983fda271fba18d218c75b8fb2b3d
deleted file mode 100644
index b6a7d89c68e0c..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-9-8f6983fda271fba18d218c75b8fb2b3d
+++ /dev/null
@@ -1 +0,0 @@
-16
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-10-e5e7913d0875ad01f5d6f5031e86f0a0 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-10-e5e7913d0875ad01f5d6f5031e86f0a0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-11-bbfb0a95274044dea4732e35c1d7ecbe b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-11-bbfb0a95274044dea4732e35c1d7ecbe
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-12-be84e8199b0a3b9f72e552018854ac15 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-12-be84e8199b0a3b9f72e552018854ac15
deleted file mode 100644
index 425151f3a411f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-12-be84e8199b0a3b9f72e552018854ac15
+++ /dev/null
@@ -1 +0,0 @@
-40
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-15-dd6e9965d271bd35604059540c23d967 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-15-dd6e9965d271bd35604059540c23d967
deleted file mode 100644
index 425151f3a411f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-15-dd6e9965d271bd35604059540c23d967
+++ /dev/null
@@ -1 +0,0 @@
-40
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-17-f4a3e39d5df18861e9fe67ef37af57e1 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-17-f4a3e39d5df18861e9fe67ef37af57e1
deleted file mode 100644
index 425151f3a411f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-17-f4a3e39d5df18861e9fe67ef37af57e1
+++ /dev/null
@@ -1 +0,0 @@
-40
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-18-58080916a9f5883121bcaad719be0309 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-18-58080916a9f5883121bcaad719be0309
deleted file mode 100644
index c739b42c4d2ce..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-18-58080916a9f5883121bcaad719be0309
+++ /dev/null
@@ -1 +0,0 @@
-44
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-3-2683f9835169a568c1f03dae859d27d2 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-3-2683f9835169a568c1f03dae859d27d2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-4-2d2e1149d2f035017bb37bbfaad4def0 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-4-2d2e1149d2f035017bb37bbfaad4def0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-5-61a8225e20f36e286b4d02501d2c80d9 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-5-61a8225e20f36e286b4d02501d2c80d9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-6-b5165befb75ebeed42f3e69d4d64375c b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-6-b5165befb75ebeed42f3e69d4d64375c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-7-e438f7ec386b2ab19660e9da7260dd95 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-7-e438f7ec386b2ab19660e9da7260dd95
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-8-f0e3ddaa1e6ea067444b1f540bfac293 b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-8-f0e3ddaa1e6ea067444b1f540bfac293
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-9-adeaa033260f16c5bc106e761e5fef8b b/sql/hive/src/test/resources/golden/partition_wise_fileformat14-9-adeaa033260f16c5bc106e761e5fef8b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-1-e1cf6c355de3ae8db7564b1676199117 b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-1-e1cf6c355de3ae8db7564b1676199117
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-10-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-10-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 11542e84a3768..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-10-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	NULL	1
-238	val_238	NULL	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-11-662bece7f71ef854ca6e25f0eef5b830 b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-11-662bece7f71ef854ca6e25f0eef5b830
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-12-8250422b5ed16120ee33bd4fa69d3f47 b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-12-8250422b5ed16120ee33bd4fa69d3f47
deleted file mode 100644
index 025abe4ec330b..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-12-8250422b5ed16120ee33bd4fa69d3f47
+++ /dev/null
@@ -1,3 +0,0 @@
-476	val_238	NULL	1
-476	val_238	NULL	1
-172	val_86	val_86	2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-13-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-13-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index f067c1ed602dc..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-13-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,3 +0,0 @@
-238	val_238	NULL	1
-238	val_238	NULL	1
-86	val_86	val_86	2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-2-107d7c681b43611df056238be242127b b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-2-107d7c681b43611df056238be242127b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-3-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-3-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 6d512a4f0bdc4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-3-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-4-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-4-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index 53aab40f88b50..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-4-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476.0	val_238
-476.0	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-5-ee7394c912991b8cd4401fb94942351f b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-5-ee7394c912991b8cd4401fb94942351f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-6-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-6-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index de31196d97c3f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-6-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476	val_238
-476	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-7-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-7-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 6d512a4f0bdc4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-7-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-8-ed1e2dbef8eb8efbf83a50dc646485ba b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-8-ed1e2dbef8eb8efbf83a50dc646485ba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-9-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat15-9-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index de31196d97c3f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-9-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476	val_238
-476	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-1-5b9fe6eb0e500ae6bf97e2a34d3d0ad9 b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-1-5b9fe6eb0e500ae6bf97e2a34d3d0ad9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-10-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-10-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 11542e84a3768..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-10-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	NULL	1
-238	val_238	NULL	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-11-662bece7f71ef854ca6e25f0eef5b830 b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-11-662bece7f71ef854ca6e25f0eef5b830
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-12-8250422b5ed16120ee33bd4fa69d3f47 b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-12-8250422b5ed16120ee33bd4fa69d3f47
deleted file mode 100644
index 025abe4ec330b..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-12-8250422b5ed16120ee33bd4fa69d3f47
+++ /dev/null
@@ -1,3 +0,0 @@
-476	val_238	NULL	1
-476	val_238	NULL	1
-172	val_86	val_86	2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-13-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-13-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index f067c1ed602dc..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-13-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,3 +0,0 @@
-238	val_238	NULL	1
-238	val_238	NULL	1
-86	val_86	val_86	2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-2-107d7c681b43611df056238be242127b b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-2-107d7c681b43611df056238be242127b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-3-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-3-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 6d512a4f0bdc4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-3-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-4-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-4-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index 53aab40f88b50..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-4-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476.0	val_238
-476.0	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-5-ee7394c912991b8cd4401fb94942351f b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-5-ee7394c912991b8cd4401fb94942351f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-6-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-6-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index de31196d97c3f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-6-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476	val_238
-476	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-7-3fc394a7a3d43a70968282ef1ee21dbd b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-7-3fc394a7a3d43a70968282ef1ee21dbd
deleted file mode 100644
index 6d512a4f0bdc4..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-7-3fc394a7a3d43a70968282ef1ee21dbd
+++ /dev/null
@@ -1,2 +0,0 @@
-238	val_238	1
-238	val_238	1
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-8-ed1e2dbef8eb8efbf83a50dc646485ba b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-8-ed1e2dbef8eb8efbf83a50dc646485ba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-9-75a0aa2df39cbdc6a470b4c88803b42b b/sql/hive/src/test/resources/golden/partition_wise_fileformat16-9-75a0aa2df39cbdc6a470b4c88803b42b
deleted file mode 100644
index de31196d97c3f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-9-75a0aa2df39cbdc6a470b4c88803b42b
+++ /dev/null
@@ -1,2 +0,0 @@
-476	val_238
-476	val_238
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat17-0-7c49277a7217a147685d30e27822d273 b/sql/hive/src/test/resources/golden/partition_wise_fileformat17-0-7c49277a7217a147685d30e27822d273
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-0-c854b607353e810be297d3159be30da4 b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-0-c854b607353e810be297d3159be30da4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-1-9fad934993b77eef15a5d10eb203a378 b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-1-9fad934993b77eef15a5d10eb203a378
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-10-561ef0dbccfcbfbb0c75f33ebfd4203d b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-10-561ef0dbccfcbfbb0c75f33ebfd4203d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-11-20a02894f5e9340e89b55a30bef252b7 b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-11-20a02894f5e9340e89b55a30bef252b7
deleted file mode 100644
index 325e26b3d9737..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-11-20a02894f5e9340e89b55a30bef252b7
+++ /dev/null
@@ -1,75 +0,0 @@
-238	val_238	100
-NULL		100
-311	val_311	100
-NULL	val_27	100
-NULL	val_165	100
-NULL	val_409	100
-255	val_255	100
-278	val_278	100
-98	val_98	100
-NULL	val_484	100
-NULL	val_265	100
-NULL	val_193	100
-401	val_401	100
-150	val_150	100
-273	val_273	100
-224		100
-369		100
-66	val_66	100
-128		100
-213	val_213	100
-146	val_146	100
-406	val_406	100
-NULL		100
-NULL		100
-NULL		100
-238	val_238	101
-NULL		101
-311	val_311	101
-NULL	val_27	101
-NULL	val_165	101
-NULL	val_409	101
-255	val_255	101
-278	val_278	101
-98	val_98	101
-NULL	val_484	101
-NULL	val_265	101
-NULL	val_193	101
-401	val_401	101
-150	val_150	101
-273	val_273	101
-224		101
-369		101
-66	val_66	101
-128		101
-213	val_213	101
-146	val_146	101
-406	val_406	101
-NULL		101
-NULL		101
-NULL		101
-238	val_238	102
-NULL		102
-311	val_311	102
-NULL	val_27	102
-NULL	val_165	102
-NULL	val_409	102
-255	val_255	102
-278	val_278	102
-98	val_98	102
-NULL	val_484	102
-NULL	val_265	102
-NULL	val_193	102
-401	val_401	102
-150	val_150	102
-273	val_273	102
-224		102
-369		102
-66	val_66	102
-128		102
-213	val_213	102
-146	val_146	102
-406	val_406	102
-NULL		102
-NULL		102
-NULL		102
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-2-6c4f7b115f18953dcc7710fa97287459 b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-2-6c4f7b115f18953dcc7710fa97287459
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-3-f5f427b174dca478c14eddc371c0025a b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-3-f5f427b174dca478c14eddc371c0025a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-4-da1b1887eb530c7e9d37667b99c9793f b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-4-da1b1887eb530c7e9d37667b99c9793f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-5-517aaa22478287fa80eef4a19f2cb9ff b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-5-517aaa22478287fa80eef4a19f2cb9ff
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-6-32b36a6c3344c5fcca0ad6c93ffcab62 b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-6-32b36a6c3344c5fcca0ad6c93ffcab62
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-6-32b36a6c3344c5fcca0ad6c93ffcab62
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-7-561ef0dbccfcbfbb0c75f33ebfd4203d b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-7-561ef0dbccfcbfbb0c75f33ebfd4203d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-8-20a02894f5e9340e89b55a30bef252b7 b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-8-20a02894f5e9340e89b55a30bef252b7
deleted file mode 100644
index 325e26b3d9737..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-8-20a02894f5e9340e89b55a30bef252b7
+++ /dev/null
@@ -1,75 +0,0 @@
-238	val_238	100
-NULL		100
-311	val_311	100
-NULL	val_27	100
-NULL	val_165	100
-NULL	val_409	100
-255	val_255	100
-278	val_278	100
-98	val_98	100
-NULL	val_484	100
-NULL	val_265	100
-NULL	val_193	100
-401	val_401	100
-150	val_150	100
-273	val_273	100
-224		100
-369		100
-66	val_66	100
-128		100
-213	val_213	100
-146	val_146	100
-406	val_406	100
-NULL		100
-NULL		100
-NULL		100
-238	val_238	101
-NULL		101
-311	val_311	101
-NULL	val_27	101
-NULL	val_165	101
-NULL	val_409	101
-255	val_255	101
-278	val_278	101
-98	val_98	101
-NULL	val_484	101
-NULL	val_265	101
-NULL	val_193	101
-401	val_401	101
-150	val_150	101
-273	val_273	101
-224		101
-369		101
-66	val_66	101
-128		101
-213	val_213	101
-146	val_146	101
-406	val_406	101
-NULL		101
-NULL		101
-NULL		101
-238	val_238	102
-NULL		102
-311	val_311	102
-NULL	val_27	102
-NULL	val_165	102
-NULL	val_409	102
-255	val_255	102
-278	val_278	102
-98	val_98	102
-NULL	val_484	102
-NULL	val_265	102
-NULL	val_193	102
-401	val_401	102
-150	val_150	102
-273	val_273	102
-224		102
-369		102
-66	val_66	102
-128		102
-213	val_213	102
-146	val_146	102
-406	val_406	102
-NULL		102
-NULL		102
-NULL		102
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-9-50131c0ba7b7a6b65c789a5a8497bada b/sql/hive/src/test/resources/golden/partition_wise_fileformat2-9-50131c0ba7b7a6b65c789a5a8497bada
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat2-9-50131c0ba7b7a6b65c789a5a8497bada
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-0-c854b607353e810be297d3159be30da4 b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-0-c854b607353e810be297d3159be30da4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-1-6c4f7b115f18953dcc7710fa97287459 b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-1-6c4f7b115f18953dcc7710fa97287459
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-10-c6bf6ee8fdc50f2abb3a71305d1e6882 b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-10-c6bf6ee8fdc50f2abb3a71305d1e6882
deleted file mode 100644
index 79c8f8e614a1f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-10-c6bf6ee8fdc50f2abb3a71305d1e6882
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-2-f5f427b174dca478c14eddc371c0025a b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-2-f5f427b174dca478c14eddc371c0025a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-3-f89a3f7c0ee5bc3f6bd04aa0b459dd4a b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-3-f89a3f7c0ee5bc3f6bd04aa0b459dd4a
deleted file mode 100644
index f487b340cd55b..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-3-f89a3f7c0ee5bc3f6bd04aa0b459dd4a
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/partition_test_partitioned/dt=101
-inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:284
-maxFileSize:284
-minFileSize:284
-lastAccessTime:0
-lastUpdateTime:1388799388000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-4-da1b1887eb530c7e9d37667b99c9793f b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-4-da1b1887eb530c7e9d37667b99c9793f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-5-517aaa22478287fa80eef4a19f2cb9ff b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-5-517aaa22478287fa80eef4a19f2cb9ff
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-6-e3b55daf170a032dcc8ed12ee26ccf63 b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-6-e3b55daf170a032dcc8ed12ee26ccf63
deleted file mode 100644
index 0c7739c2b9fbb..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-6-e3b55daf170a032dcc8ed12ee26ccf63
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/partition_test_partitioned/dt=102
-inputformat:org.apache.hadoop.mapred.SequenceFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:895
-maxFileSize:895
-minFileSize:895
-lastAccessTime:0
-lastUpdateTime:1388799405000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-7-c56c391659f5701a9d2e8782a60f7f8a b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-7-c56c391659f5701a9d2e8782a60f7f8a
deleted file mode 100644
index 79c8f8e614a1f..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-7-c56c391659f5701a9d2e8782a60f7f8a
+++ /dev/null
@@ -1,25 +0,0 @@
-238
-NULL
-311
-NULL
-NULL
-NULL
-255
-278
-98
-NULL
-NULL
-NULL
-401
-150
-273
-224
-369
-66
-128
-213
-146
-406
-NULL
-NULL
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-8-f5f427b174dca478c14eddc371c0025a b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-8-f5f427b174dca478c14eddc371c0025a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-9-f89a3f7c0ee5bc3f6bd04aa0b459dd4a b/sql/hive/src/test/resources/golden/partition_wise_fileformat3-9-f89a3f7c0ee5bc3f6bd04aa0b459dd4a
deleted file mode 100644
index 0c6fbc997de7a..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat3-9-f89a3f7c0ee5bc3f6bd04aa0b459dd4a
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:partition_test_partitioned
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/partition_test_partitioned/dt=101
-inputformat:org.apache.hadoop.mapred.SequenceFileInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-columns:struct columns { string key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string dt}
-totalNumberFiles:1
-totalFileSize:895
-maxFileSize:895
-minFileSize:895
-lastAccessTime:0
-lastUpdateTime:1388799434000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat5-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/partition_wise_fileformat5-0-66ee62178e3576fb38cb09800cb610bf
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat5-0-66ee62178e3576fb38cb09800cb610bf
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat5-0-66ee62178e3576fb38cb09800cb610bf
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat5-6-a0e23b26ee1777ccc8947fb5eb1e8745 b/sql/hive/src/test/resources/golden/partition_wise_fileformat5-6-a0e23b26ee1777ccc8947fb5eb1e8745
index f0d140e18c053..eb4c6a843cb5d 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat5-6-a0e23b26ee1777ccc8947fb5eb1e8745
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat5-6-a0e23b26ee1777ccc8947fb5eb1e8745
@@ -1,2 +1,2 @@
 101	25
-102	25
\ No newline at end of file
+102	25
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat5-8-a0e23b26ee1777ccc8947fb5eb1e8745 b/sql/hive/src/test/resources/golden/partition_wise_fileformat5-8-a0e23b26ee1777ccc8947fb5eb1e8745
index 6425bae2a6a8f..95846abf28b2f 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat5-8-a0e23b26ee1777ccc8947fb5eb1e8745
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat5-8-a0e23b26ee1777ccc8947fb5eb1e8745
@@ -1,3 +1,3 @@
 101	25
 102	25
-103	25
\ No newline at end of file
+103	25
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat6-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/partition_wise_fileformat6-0-66ee62178e3576fb38cb09800cb610bf
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat6-0-66ee62178e3576fb38cb09800cb610bf
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat6-0-66ee62178e3576fb38cb09800cb610bf
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat6-6-e95296c9f7056b0075007c61d4e5e92f b/sql/hive/src/test/resources/golden/partition_wise_fileformat6-6-e95296c9f7056b0075007c61d4e5e92f
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat6-6-e95296c9f7056b0075007c61d4e5e92f
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat6-6-e95296c9f7056b0075007c61d4e5e92f
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat6-7-4758d41d052eba37a9acd90c2dbc58f0 b/sql/hive/src/test/resources/golden/partition_wise_fileformat6-7-4758d41d052eba37a9acd90c2dbc58f0
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat6-7-4758d41d052eba37a9acd90c2dbc58f0
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat6-7-4758d41d052eba37a9acd90c2dbc58f0
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat7-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/partition_wise_fileformat7-0-66ee62178e3576fb38cb09800cb610bf
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat7-0-66ee62178e3576fb38cb09800cb610bf
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat7-0-66ee62178e3576fb38cb09800cb610bf
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat7-4-a34505bd397bb2a66e46408d1dfb6bf2 b/sql/hive/src/test/resources/golden/partition_wise_fileformat7-4-a34505bd397bb2a66e46408d1dfb6bf2
index 3f10ffe7a4c47..60d3b2f4a4cd5 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat7-4-a34505bd397bb2a66e46408d1dfb6bf2
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat7-4-a34505bd397bb2a66e46408d1dfb6bf2
@@ -1 +1 @@
-15
\ No newline at end of file
+15
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat7-5-f2c42f1f32eb3cb300420fb36cbf2362 b/sql/hive/src/test/resources/golden/partition_wise_fileformat7-5-f2c42f1f32eb3cb300420fb36cbf2362
index d8263ee986059..0cfbf08886fca 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat7-5-f2c42f1f32eb3cb300420fb36cbf2362
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat7-5-f2c42f1f32eb3cb300420fb36cbf2362
@@ -1 +1 @@
-2
\ No newline at end of file
+2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-0-66ee62178e3576fb38cb09800cb610bf
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-0-66ee62178e3576fb38cb09800cb610bf
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-1-22e3d59a0423473051535684bca72b27 b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-1-22e3d59a0423473051535684bca72b27
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-2-55ae9fbf6daa36225dd386e34025dd38 b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-2-55ae9fbf6daa36225dd386e34025dd38
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-3-c561806d8f9ad419dc9b17ae995aab68 b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-3-c561806d8f9ad419dc9b17ae995aab68
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-4-ae71ce67b5d4a91bce1b34acde830268 b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-4-ae71ce67b5d4a91bce1b34acde830268
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-5-cb3d7c0fec42ef7d68b67c8e4cdeab3e b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-5-cb3d7c0fec42ef7d68b67c8e4cdeab3e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-6-ae6a5016b6a6ace6b6f2576a4c666b4e b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-6-ae6a5016b6a6ace6b6f2576a4c666b4e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-7-d1a5913edaaec9654edb333e8207f57b b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-7-d1a5913edaaec9654edb333e8207f57b
deleted file mode 100644
index 5e06930239fea..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-7-d1a5913edaaec9654edb333e8207f57b
+++ /dev/null
@@ -1,20 +0,0 @@
-0	val_0	1
-0	val_0	1
-0	val_0	1
-0	val_0	2
-0	val_0	2
-0	val_0	2
-0	val_0	3
-0	val_0	3
-0	val_0	3
-10	val_10	1
-10	val_10	2
-10	val_10	3
-100	val_100	1
-100	val_100	1
-100	val_100	2
-100	val_100	2
-100	val_100	3
-100	val_100	3
-103	val_103	1
-103	val_103	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-8-624b059dfaa86e2c78f065169de325cd b/sql/hive/src/test/resources/golden/partition_wise_fileformat8-8-624b059dfaa86e2c78f065169de325cd
deleted file mode 100644
index 1bd9063a9c63c..0000000000000
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat8-8-624b059dfaa86e2c78f065169de325cd
+++ /dev/null
@@ -1,20 +0,0 @@
-0.0	val_0	1
-0.0	val_0	1
-0.0	val_0	1
-0.0	val_0	2
-0.0	val_0	2
-0.0	val_0	2
-0.0	val_0	3
-0.0	val_0	3
-0.0	val_0	3
-4.0	val_2	1
-4.0	val_2	2
-4.0	val_2	3
-8.0	val_4	1
-8.0	val_4	2
-8.0	val_4	3
-10.0	val_5	1
-10.0	val_5	1
-10.0	val_5	1
-10.0	val_5	2
-10.0	val_5	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat9-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/partition_wise_fileformat9-0-66ee62178e3576fb38cb09800cb610bf
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat9-0-66ee62178e3576fb38cb09800cb610bf
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat9-0-66ee62178e3576fb38cb09800cb610bf
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat9-5-d1a5913edaaec9654edb333e8207f57b b/sql/hive/src/test/resources/golden/partition_wise_fileformat9-5-d1a5913edaaec9654edb333e8207f57b
index f259b4eefc608..967305a18236e 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat9-5-d1a5913edaaec9654edb333e8207f57b
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat9-5-d1a5913edaaec9654edb333e8207f57b
@@ -17,4 +17,4 @@
 104	val_104	1
 104	val_104	1
 104	val_104	2
-104	val_104	2
\ No newline at end of file
+104	val_104	2
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat9-6-624b059dfaa86e2c78f065169de325cd b/sql/hive/src/test/resources/golden/partition_wise_fileformat9-6-624b059dfaa86e2c78f065169de325cd
index 89afce150e7a9..f26a7375a6819 100644
--- a/sql/hive/src/test/resources/golden/partition_wise_fileformat9-6-624b059dfaa86e2c78f065169de325cd
+++ b/sql/hive/src/test/resources/golden/partition_wise_fileformat9-6-624b059dfaa86e2c78f065169de325cd
@@ -17,4 +17,4 @@
 16.0	val_8	1
 16.0	val_8	2
 18.0	val_9	1
-18.0	val_9	2
\ No newline at end of file
+18.0	val_9	2
diff --git a/sql/hive/src/test/resources/golden/ppd1-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd1-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd1-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd1-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd1-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd1-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd1-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd1-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd1-3-1d3f71876ba45f942e93a68c0be15ef5 b/sql/hive/src/test/resources/golden/ppd1-3-1d3f71876ba45f942e93a68c0be15ef5
index 55b2a1b47afa2..ef98fd7908a01 100644
--- a/sql/hive/src/test/resources/golden/ppd1-3-1d3f71876ba45f942e93a68c0be15ef5
+++ b/sql/hive/src/test/resources/golden/ppd1-3-1d3f71876ba45f942e93a68c0be15ef5
@@ -493,4 +493,4 @@
 403
 400
 200
-97
\ No newline at end of file
+97
diff --git a/sql/hive/src/test/resources/golden/ppd1-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd1-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd1-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd1-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd1-6-1d3f71876ba45f942e93a68c0be15ef5 b/sql/hive/src/test/resources/golden/ppd1-6-1d3f71876ba45f942e93a68c0be15ef5
index 55b2a1b47afa2..ef98fd7908a01 100644
--- a/sql/hive/src/test/resources/golden/ppd1-6-1d3f71876ba45f942e93a68c0be15ef5
+++ b/sql/hive/src/test/resources/golden/ppd1-6-1d3f71876ba45f942e93a68c0be15ef5
@@ -493,4 +493,4 @@
 403
 400
 200
-97
\ No newline at end of file
+97
diff --git a/sql/hive/src/test/resources/golden/ppd2-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd2-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd2-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd2-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd2-1-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd2-1-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd2-1-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd2-1-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd2-3-fccdc7a24b626308d9ec17608f36564b b/sql/hive/src/test/resources/golden/ppd2-3-fccdc7a24b626308d9ec17608f36564b
index 878c86cb46b8a..d2aff0f701be9 100644
--- a/sql/hive/src/test/resources/golden/ppd2-3-fccdc7a24b626308d9ec17608f36564b
+++ b/sql/hive/src/test/resources/golden/ppd2-3-fccdc7a24b626308d9ec17608f36564b
@@ -133,4 +133,4 @@
 480	3
 489	4
 492	2
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/ppd2-6-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd2-6-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd2-6-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd2-6-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd2-7-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd2-7-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd2-7-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd2-7-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd2-9-fccdc7a24b626308d9ec17608f36564b b/sql/hive/src/test/resources/golden/ppd2-9-fccdc7a24b626308d9ec17608f36564b
index 878c86cb46b8a..d2aff0f701be9 100644
--- a/sql/hive/src/test/resources/golden/ppd2-9-fccdc7a24b626308d9ec17608f36564b
+++ b/sql/hive/src/test/resources/golden/ppd2-9-fccdc7a24b626308d9ec17608f36564b
@@ -133,4 +133,4 @@
 480	3
 489	4
 492	2
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/ppd_constant_expr-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_constant_expr-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_constant_expr-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_constant_expr-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_constant_expr-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_constant_expr-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_constant_expr-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_constant_expr-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_constant_expr-5-7da8c74586c99d96254f6f031bcaeb1c b/sql/hive/src/test/resources/golden/ppd_constant_expr-5-7da8c74586c99d96254f6f031bcaeb1c
index c87107a2f1168..416fbfb9e5228 100644
--- a/sql/hive/src/test/resources/golden/ppd_constant_expr-5-7da8c74586c99d96254f6f031bcaeb1c
+++ b/sql/hive/src/test/resources/golden/ppd_constant_expr-5-7da8c74586c99d96254f6f031bcaeb1c
@@ -22,4 +22,4 @@ NULL	NULL	NULL
 NULL	NULL	NULL
 NULL	NULL	NULL
 NULL	NULL	NULL
-NULL	NULL	NULL
\ No newline at end of file
+NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/ppd_constant_expr-6-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_constant_expr-6-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_constant_expr-6-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_constant_expr-6-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_constant_expr-9-7da8c74586c99d96254f6f031bcaeb1c b/sql/hive/src/test/resources/golden/ppd_constant_expr-9-7da8c74586c99d96254f6f031bcaeb1c
index c87107a2f1168..416fbfb9e5228 100644
--- a/sql/hive/src/test/resources/golden/ppd_constant_expr-9-7da8c74586c99d96254f6f031bcaeb1c
+++ b/sql/hive/src/test/resources/golden/ppd_constant_expr-9-7da8c74586c99d96254f6f031bcaeb1c
@@ -22,4 +22,4 @@ NULL	NULL	NULL
 NULL	NULL	NULL
 NULL	NULL	NULL
 NULL	NULL	NULL
-NULL	NULL	NULL
\ No newline at end of file
+NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/ppd_constant_where-1-84c951641740895ca1c8ddc098805da5 b/sql/hive/src/test/resources/golden/ppd_constant_where-1-84c951641740895ca1c8ddc098805da5
index e37d32abba426..83b33d238dab9 100644
--- a/sql/hive/src/test/resources/golden/ppd_constant_where-1-84c951641740895ca1c8ddc098805da5
+++ b/sql/hive/src/test/resources/golden/ppd_constant_where-1-84c951641740895ca1c8ddc098805da5
@@ -1 +1 @@
-1000
\ No newline at end of file
+1000
diff --git a/sql/hive/src/test/resources/golden/ppd_gby-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_gby-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_gby-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_gby-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_gby-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_gby-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_gby-3-97b8d0091058060f5f17cf5e81cce73d b/sql/hive/src/test/resources/golden/ppd_gby-3-97b8d0091058060f5f17cf5e81cce73d
index e1659e6072577..f005a4fe5bf86 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby-3-97b8d0091058060f5f17cf5e81cce73d
+++ b/sql/hive/src/test/resources/golden/ppd_gby-3-97b8d0091058060f5f17cf5e81cce73d
@@ -126,4 +126,4 @@ val_395
 val_396
 val_397
 val_399
-val_4
\ No newline at end of file
+val_4
diff --git a/sql/hive/src/test/resources/golden/ppd_gby-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_gby-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_gby-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_gby-6-97b8d0091058060f5f17cf5e81cce73d b/sql/hive/src/test/resources/golden/ppd_gby-6-97b8d0091058060f5f17cf5e81cce73d
index e1659e6072577..f005a4fe5bf86 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby-6-97b8d0091058060f5f17cf5e81cce73d
+++ b/sql/hive/src/test/resources/golden/ppd_gby-6-97b8d0091058060f5f17cf5e81cce73d
@@ -126,4 +126,4 @@ val_395
 val_396
 val_397
 val_399
-val_4
\ No newline at end of file
+val_4
diff --git a/sql/hive/src/test/resources/golden/ppd_gby2-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_gby2-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby2-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_gby2-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_gby2-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_gby2-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby2-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_gby2-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_gby2-3-321628d4c52f6992f2680a3a162f19f b/sql/hive/src/test/resources/golden/ppd_gby2-3-321628d4c52f6992f2680a3a162f19f
index 7e66866e2dd60..ab02a73437ed7 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby2-3-321628d4c52f6992f2680a3a162f19f
+++ b/sql/hive/src/test/resources/golden/ppd_gby2-3-321628d4c52f6992f2680a3a162f19f
@@ -2,4 +2,4 @@ val_4	1
 val_399	2
 val_396	3
 val_277	4
-val_348	5
\ No newline at end of file
+val_348	5
diff --git a/sql/hive/src/test/resources/golden/ppd_gby2-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_gby2-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby2-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_gby2-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_gby2-6-321628d4c52f6992f2680a3a162f19f b/sql/hive/src/test/resources/golden/ppd_gby2-6-321628d4c52f6992f2680a3a162f19f
index 7e66866e2dd60..ab02a73437ed7 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby2-6-321628d4c52f6992f2680a3a162f19f
+++ b/sql/hive/src/test/resources/golden/ppd_gby2-6-321628d4c52f6992f2680a3a162f19f
@@ -2,4 +2,4 @@ val_4	1
 val_399	2
 val_396	3
 val_277	4
-val_348	5
\ No newline at end of file
+val_348	5
diff --git a/sql/hive/src/test/resources/golden/ppd_gby_join-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_gby_join-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby_join-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_gby_join-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_gby_join-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_gby_join-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby_join-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_gby_join-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_gby_join-3-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_gby_join-3-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_gby_join-3-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_gby_join-3-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_join-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_join-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_join-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_join-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join-3-aab5d092ca17ed23ee71d3a6ef653998 b/sql/hive/src/test/resources/golden/ppd_join-3-aab5d092ca17ed23ee71d3a6ef653998
index fbce9efa766d7..a29747aef8046 100644
--- a/sql/hive/src/test/resources/golden/ppd_join-3-aab5d092ca17ed23ee71d3a6ef653998
+++ b/sql/hive/src/test/resources/golden/ppd_join-3-aab5d092ca17ed23ee71d3a6ef653998
@@ -709,4 +709,4 @@
 200	val_200
 200	val_200
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/ppd_join-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_join-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_join-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join-6-aab5d092ca17ed23ee71d3a6ef653998 b/sql/hive/src/test/resources/golden/ppd_join-6-aab5d092ca17ed23ee71d3a6ef653998
index fbce9efa766d7..a29747aef8046 100644
--- a/sql/hive/src/test/resources/golden/ppd_join-6-aab5d092ca17ed23ee71d3a6ef653998
+++ b/sql/hive/src/test/resources/golden/ppd_join-6-aab5d092ca17ed23ee71d3a6ef653998
@@ -709,4 +709,4 @@
 200	val_200
 200	val_200
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/ppd_join2-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_join2-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join2-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_join2-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join2-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_join2-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join2-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_join2-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join2-3-d03c0ad3ab713691cf9d3b27ae1223f9 b/sql/hive/src/test/resources/golden/ppd_join2-3-d03c0ad3ab713691cf9d3b27ae1223f9
index 9e6c4359e78a8..f2748faa4ea7e 100644
--- a/sql/hive/src/test/resources/golden/ppd_join2-3-d03c0ad3ab713691cf9d3b27ae1223f9
+++ b/sql/hive/src/test/resources/golden/ppd_join2-3-d03c0ad3ab713691cf9d3b27ae1223f9
@@ -1707,4 +1707,4 @@
 97	val_97
 97	val_97
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/ppd_join2-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_join2-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join2-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_join2-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join2-6-d03c0ad3ab713691cf9d3b27ae1223f9 b/sql/hive/src/test/resources/golden/ppd_join2-6-d03c0ad3ab713691cf9d3b27ae1223f9
index 9e6c4359e78a8..f2748faa4ea7e 100644
--- a/sql/hive/src/test/resources/golden/ppd_join2-6-d03c0ad3ab713691cf9d3b27ae1223f9
+++ b/sql/hive/src/test/resources/golden/ppd_join2-6-d03c0ad3ab713691cf9d3b27ae1223f9
@@ -1707,4 +1707,4 @@
 97	val_97
 97	val_97
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/ppd_join3-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_join3-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join3-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_join3-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join3-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_join3-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join3-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_join3-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join3-3-42cd793c031af1f7961d7b5e237de76b b/sql/hive/src/test/resources/golden/ppd_join3-3-42cd793c031af1f7961d7b5e237de76b
index 0e11aea14d61d..91f2ce2b61787 100644
--- a/sql/hive/src/test/resources/golden/ppd_join3-3-42cd793c031af1f7961d7b5e237de76b
+++ b/sql/hive/src/test/resources/golden/ppd_join3-3-42cd793c031af1f7961d7b5e237de76b
@@ -1764,4 +1764,4 @@
 97	val_97
 97	val_97
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/ppd_join3-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_join3-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_join3-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_join3-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_join3-6-42cd793c031af1f7961d7b5e237de76b b/sql/hive/src/test/resources/golden/ppd_join3-6-42cd793c031af1f7961d7b5e237de76b
index 0e11aea14d61d..91f2ce2b61787 100644
--- a/sql/hive/src/test/resources/golden/ppd_join3-6-42cd793c031af1f7961d7b5e237de76b
+++ b/sql/hive/src/test/resources/golden/ppd_join3-6-42cd793c031af1f7961d7b5e237de76b
@@ -1764,4 +1764,4 @@
 97	val_97
 97	val_97
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join1-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_outer_join1-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join1-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join1-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join1-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_outer_join1-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join1-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join1-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join1-3-9dfd333c14f82fd71c213e1d39c83524 b/sql/hive/src/test/resources/golden/ppd_outer_join1-3-9dfd333c14f82fd71c213e1d39c83524
index 997f37b76bedb..c0ffb7aeca9b6 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join1-3-9dfd333c14f82fd71c213e1d39c83524
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join1-3-9dfd333c14f82fd71c213e1d39c83524
@@ -3,4 +3,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join1-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_outer_join1-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join1-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join1-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join1-6-9dfd333c14f82fd71c213e1d39c83524 b/sql/hive/src/test/resources/golden/ppd_outer_join1-6-9dfd333c14f82fd71c213e1d39c83524
index 997f37b76bedb..c0ffb7aeca9b6 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join1-6-9dfd333c14f82fd71c213e1d39c83524
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join1-6-9dfd333c14f82fd71c213e1d39c83524
@@ -3,4 +3,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join2-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_outer_join2-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join2-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join2-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join2-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_outer_join2-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join2-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join2-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join2-3-5340cd2d88dcf41dc18119389a475e36 b/sql/hive/src/test/resources/golden/ppd_outer_join2-3-5340cd2d88dcf41dc18119389a475e36
index 997f37b76bedb..c0ffb7aeca9b6 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join2-3-5340cd2d88dcf41dc18119389a475e36
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join2-3-5340cd2d88dcf41dc18119389a475e36
@@ -3,4 +3,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join2-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_outer_join2-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join2-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join2-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join2-6-5340cd2d88dcf41dc18119389a475e36 b/sql/hive/src/test/resources/golden/ppd_outer_join2-6-5340cd2d88dcf41dc18119389a475e36
index 997f37b76bedb..c0ffb7aeca9b6 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join2-6-5340cd2d88dcf41dc18119389a475e36
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join2-6-5340cd2d88dcf41dc18119389a475e36
@@ -3,4 +3,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join3-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_outer_join3-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join3-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join3-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join3-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_outer_join3-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join3-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join3-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join3-3-1e3af37cc2d9c2059488b5940a33a1d0 b/sql/hive/src/test/resources/golden/ppd_outer_join3-3-1e3af37cc2d9c2059488b5940a33a1d0
index b3bf95dd32bac..9365b77dec065 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join3-3-1e3af37cc2d9c2059488b5940a33a1d0
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join3-3-1e3af37cc2d9c2059488b5940a33a1d0
@@ -3,4 +3,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-19	val_19	19	val_19
\ No newline at end of file
+19	val_19	19	val_19
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join3-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_outer_join3-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join3-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join3-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join3-6-1e3af37cc2d9c2059488b5940a33a1d0 b/sql/hive/src/test/resources/golden/ppd_outer_join3-6-1e3af37cc2d9c2059488b5940a33a1d0
index b3bf95dd32bac..9365b77dec065 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join3-6-1e3af37cc2d9c2059488b5940a33a1d0
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join3-6-1e3af37cc2d9c2059488b5940a33a1d0
@@ -3,4 +3,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-19	val_19	19	val_19
\ No newline at end of file
+19	val_19	19	val_19
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join4-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_outer_join4-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join4-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join4-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join4-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_outer_join4-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join4-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join4-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join4-3-ac57dc2a7151f20029d6a97049d6eebe b/sql/hive/src/test/resources/golden/ppd_outer_join4-3-ac57dc2a7151f20029d6a97049d6eebe
index d4a363c49aeaf..daa4ea309e2c1 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join4-3-ac57dc2a7151f20029d6a97049d6eebe
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join4-3-ac57dc2a7151f20029d6a97049d6eebe
@@ -7,4 +7,4 @@
 18	val_18	18	val_18	18
 18	val_18	18	val_18	18
 18	val_18	18	val_18	18
-18	val_18	18	val_18	18
\ No newline at end of file
+18	val_18	18	val_18	18
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join4-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_outer_join4-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join4-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join4-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join4-6-ac57dc2a7151f20029d6a97049d6eebe b/sql/hive/src/test/resources/golden/ppd_outer_join4-6-ac57dc2a7151f20029d6a97049d6eebe
index d4a363c49aeaf..daa4ea309e2c1 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join4-6-ac57dc2a7151f20029d6a97049d6eebe
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join4-6-ac57dc2a7151f20029d6a97049d6eebe
@@ -7,4 +7,4 @@
 18	val_18	18	val_18	18
 18	val_18	18	val_18	18
 18	val_18	18	val_18	18
-18	val_18	18	val_18	18
\ No newline at end of file
+18	val_18	18	val_18	18
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join5-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_outer_join5-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join5-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join5-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_outer_join5-1-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_outer_join5-1-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_outer_join5-1-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_outer_join5-1-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_random-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_random-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_random-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_random-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_random-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_random-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_random-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_random-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_random-3-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_random-3-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_random-3-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_random-3-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_transform-2-75f6b66b7ad7ca4ca8f1357d0de41bd9 b/sql/hive/src/test/resources/golden/ppd_transform-2-75f6b66b7ad7ca4ca8f1357d0de41bd9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_transform-3-5d0e4236af589d3e63a8dd84e663b745 b/sql/hive/src/test/resources/golden/ppd_transform-3-5d0e4236af589d3e63a8dd84e663b745
deleted file mode 100644
index 0190981db84ed..0000000000000
--- a/sql/hive/src/test/resources/golden/ppd_transform-3-5d0e4236af589d3e63a8dd84e663b745
+++ /dev/null
@@ -1,84 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-10	val_10
-11	val_11
-12	val_12
-12	val_12
-15	val_15
-15	val_15
-17	val_17
-18	val_18
-18	val_18
-19	val_19
-2	val_2
-20	val_20
-24	val_24
-24	val_24
-26	val_26
-26	val_26
-27	val_27
-28	val_28
-30	val_30
-33	val_33
-34	val_34
-35	val_35
-35	val_35
-35	val_35
-37	val_37
-37	val_37
-4	val_4
-41	val_41
-42	val_42
-42	val_42
-43	val_43
-44	val_44
-47	val_47
-5	val_5
-5	val_5
-5	val_5
-51	val_51
-51	val_51
-53	val_53
-54	val_54
-57	val_57
-58	val_58
-58	val_58
-64	val_64
-65	val_65
-66	val_66
-67	val_67
-67	val_67
-69	val_69
-70	val_70
-70	val_70
-70	val_70
-72	val_72
-72	val_72
-74	val_74
-76	val_76
-76	val_76
-77	val_77
-78	val_78
-8	val_8
-80	val_80
-82	val_82
-83	val_83
-83	val_83
-84	val_84
-84	val_84
-85	val_85
-86	val_86
-87	val_87
-9	val_9
-90	val_90
-90	val_90
-90	val_90
-92	val_92
-95	val_95
-95	val_95
-96	val_96
-97	val_97
-97	val_97
-98	val_98
-98	val_98
diff --git a/sql/hive/src/test/resources/golden/ppd_transform-5-75f6b66b7ad7ca4ca8f1357d0de41bd9 b/sql/hive/src/test/resources/golden/ppd_transform-5-75f6b66b7ad7ca4ca8f1357d0de41bd9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_transform-6-5d0e4236af589d3e63a8dd84e663b745 b/sql/hive/src/test/resources/golden/ppd_transform-6-5d0e4236af589d3e63a8dd84e663b745
deleted file mode 100644
index 0190981db84ed..0000000000000
--- a/sql/hive/src/test/resources/golden/ppd_transform-6-5d0e4236af589d3e63a8dd84e663b745
+++ /dev/null
@@ -1,84 +0,0 @@
-0	val_0
-0	val_0
-0	val_0
-10	val_10
-11	val_11
-12	val_12
-12	val_12
-15	val_15
-15	val_15
-17	val_17
-18	val_18
-18	val_18
-19	val_19
-2	val_2
-20	val_20
-24	val_24
-24	val_24
-26	val_26
-26	val_26
-27	val_27
-28	val_28
-30	val_30
-33	val_33
-34	val_34
-35	val_35
-35	val_35
-35	val_35
-37	val_37
-37	val_37
-4	val_4
-41	val_41
-42	val_42
-42	val_42
-43	val_43
-44	val_44
-47	val_47
-5	val_5
-5	val_5
-5	val_5
-51	val_51
-51	val_51
-53	val_53
-54	val_54
-57	val_57
-58	val_58
-58	val_58
-64	val_64
-65	val_65
-66	val_66
-67	val_67
-67	val_67
-69	val_69
-70	val_70
-70	val_70
-70	val_70
-72	val_72
-72	val_72
-74	val_74
-76	val_76
-76	val_76
-77	val_77
-78	val_78
-8	val_8
-80	val_80
-82	val_82
-83	val_83
-83	val_83
-84	val_84
-84	val_84
-85	val_85
-86	val_86
-87	val_87
-9	val_9
-90	val_90
-90	val_90
-90	val_90
-92	val_92
-95	val_95
-95	val_95
-96	val_96
-97	val_97
-97	val_97
-98	val_98
-98	val_98
diff --git a/sql/hive/src/test/resources/golden/ppd_udf_col-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_udf_col-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_udf_col-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_udf_col-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_udf_col-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_udf_col-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_udf_col-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_udf_col-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_udf_col-6-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_udf_col-6-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_udf_col-6-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_udf_col-6-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_union-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/ppd_union-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_union-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/ppd_union-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_union-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/ppd_union-1-f18babdee8d2d4206ce4f2a93b6575f9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_union-1-f18babdee8d2d4206ce4f2a93b6575f9
+++ b/sql/hive/src/test/resources/golden/ppd_union-1-f18babdee8d2d4206ce4f2a93b6575f9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_union-3-678c8197f458b459171c266f7431683e b/sql/hive/src/test/resources/golden/ppd_union-3-678c8197f458b459171c266f7431683e
index 4c3919232c73e..c30d4a581ba6c 100644
--- a/sql/hive/src/test/resources/golden/ppd_union-3-678c8197f458b459171c266f7431683e
+++ b/sql/hive/src/test/resources/golden/ppd_union-3-678c8197f458b459171c266f7431683e
@@ -168,4 +168,4 @@
 90	val_90
 403	val_403
 400	val_400
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/ppd_union-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/ppd_union-4-145c2779dadb5bd921dc2baac608b803
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppd_union-4-145c2779dadb5bd921dc2baac608b803
+++ b/sql/hive/src/test/resources/golden/ppd_union-4-145c2779dadb5bd921dc2baac608b803
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppd_union-6-678c8197f458b459171c266f7431683e b/sql/hive/src/test/resources/golden/ppd_union-6-678c8197f458b459171c266f7431683e
index 4c3919232c73e..c30d4a581ba6c 100644
--- a/sql/hive/src/test/resources/golden/ppd_union-6-678c8197f458b459171c266f7431683e
+++ b/sql/hive/src/test/resources/golden/ppd_union-6-678c8197f458b459171c266f7431683e
@@ -168,4 +168,4 @@
 90	val_90
 403	val_403
 400	val_400
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-0-d680c8ac7e5121c6971458a9022c00b9 b/sql/hive/src/test/resources/golden/ppd_union_view-0-d680c8ac7e5121c6971458a9022c00b9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-1-bfb5ca0dd2d5f070ce359790f8b91480 b/sql/hive/src/test/resources/golden/ppd_union_view-1-bfb5ca0dd2d5f070ce359790f8b91480
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-10-a74a5ff32204d842175b3d880477178f b/sql/hive/src/test/resources/golden/ppd_union_view-10-a74a5ff32204d842175b3d880477178f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-11-745e750f4f4a36af27e87338a979240c b/sql/hive/src/test/resources/golden/ppd_union_view-11-745e750f4f4a36af27e87338a979240c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-2-ac54e78582e6bd5d5533a0f3bfb51369 b/sql/hive/src/test/resources/golden/ppd_union_view-2-ac54e78582e6bd5d5533a0f3bfb51369
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-3-f6fd9a5271e172e4a65ea57aaa41d512 b/sql/hive/src/test/resources/golden/ppd_union_view-3-f6fd9a5271e172e4a65ea57aaa41d512
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-4-19cd3ea4e274befd809e4aad21da8d50 b/sql/hive/src/test/resources/golden/ppd_union_view-4-19cd3ea4e274befd809e4aad21da8d50
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-5-443c0979b586f6a6dfb0dc5d28cd5124 b/sql/hive/src/test/resources/golden/ppd_union_view-5-443c0979b586f6a6dfb0dc5d28cd5124
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-6-b57b2b4f4cd8012fbfcd0b69c8d95e13 b/sql/hive/src/test/resources/golden/ppd_union_view-6-b57b2b4f4cd8012fbfcd0b69c8d95e13
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-7-aab9d79f65d6edfc1cae88a14b8e106c b/sql/hive/src/test/resources/golden/ppd_union_view-7-aab9d79f65d6edfc1cae88a14b8e106c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-8-e3ee46daeac605b05c7ada97b3c43578 b/sql/hive/src/test/resources/golden/ppd_union_view-8-e3ee46daeac605b05c7ada97b3c43578
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_union_view-9-8a77c30b6b07717d1c0bee971fb0132c b/sql/hive/src/test/resources/golden/ppd_union_view-9-8a77c30b6b07717d1c0bee971fb0132c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_vc-0-cf479fbfecc042e8c9ea63e761da62a6 b/sql/hive/src/test/resources/golden/ppd_vc-0-cf479fbfecc042e8c9ea63e761da62a6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_vc-1-e442e970ac492f95e5d8e55e21c0d229 b/sql/hive/src/test/resources/golden/ppd_vc-1-e442e970ac492f95e5d8e55e21c0d229
deleted file mode 100644
index d183a30ddf3b6..0000000000000
--- a/sql/hive/src/test/resources/golden/ppd_vc-1-e442e970ac492f95e5d8e55e21c0d229
+++ /dev/null
@@ -1,36 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-238	val_238	2008-04-08	12
-86	val_86	2008-04-08	12
-311	val_311	2008-04-08	12
-27	val_27	2008-04-08	12
-165	val_165	2008-04-08	12
-409	val_409	2008-04-08	12
-255	val_255	2008-04-08	12
-278	val_278	2008-04-08	12
-98	val_98	2008-04-08	12
-238	val_238	2008-04-09	11
-86	val_86	2008-04-09	11
-311	val_311	2008-04-09	11
-27	val_27	2008-04-09	11
-165	val_165	2008-04-09	11
-409	val_409	2008-04-09	11
-255	val_255	2008-04-09	11
-278	val_278	2008-04-09	11
-98	val_98	2008-04-09	11
-238	val_238	2008-04-09	12
-86	val_86	2008-04-09	12
-311	val_311	2008-04-09	12
-27	val_27	2008-04-09	12
-165	val_165	2008-04-09	12
-409	val_409	2008-04-09	12
-255	val_255	2008-04-09	12
-278	val_278	2008-04-09	12
-98	val_98	2008-04-09	12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppd_vc-2-c9e44ddbb494ff7f02027205610bcb65 b/sql/hive/src/test/resources/golden/ppd_vc-2-c9e44ddbb494ff7f02027205610bcb65
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppd_vc-3-be78760949abf728580442e9e37a3ce7 b/sql/hive/src/test/resources/golden/ppd_vc-3-be78760949abf728580442e9e37a3ce7
deleted file mode 100644
index 5b2461e35b5f4..0000000000000
--- a/sql/hive/src/test/resources/golden/ppd_vc-3-be78760949abf728580442e9e37a3ce7
+++ /dev/null
@@ -1,36 +0,0 @@
-238	val_238	2008-04-08	11	0
-238	val_238	2008-04-08	11	0
-86	val_86	2008-04-08	11	12
-311	val_311	2008-04-08	11	22
-311	val_311	2008-04-08	11	22
-311	val_311	2008-04-08	11	22
-27	val_27	2008-04-08	11	34
-165	val_165	2008-04-08	11	44
-165	val_165	2008-04-08	11	44
-238	val_238	2008-04-08	12	0
-238	val_238	2008-04-08	12	0
-86	val_86	2008-04-08	12	12
-311	val_311	2008-04-08	12	22
-311	val_311	2008-04-08	12	22
-311	val_311	2008-04-08	12	22
-27	val_27	2008-04-08	12	34
-165	val_165	2008-04-08	12	44
-165	val_165	2008-04-08	12	44
-238	val_238	2008-04-09	11	0
-238	val_238	2008-04-09	11	0
-86	val_86	2008-04-09	11	12
-311	val_311	2008-04-09	11	22
-311	val_311	2008-04-09	11	22
-311	val_311	2008-04-09	11	22
-27	val_27	2008-04-09	11	34
-165	val_165	2008-04-09	11	44
-165	val_165	2008-04-09	11	44
-238	val_238	2008-04-09	12	0
-238	val_238	2008-04-09	12	0
-86	val_86	2008-04-09	12	12
-311	val_311	2008-04-09	12	22
-311	val_311	2008-04-09	12	22
-311	val_311	2008-04-09	12	22
-27	val_27	2008-04-09	12	34
-165	val_165	2008-04-09	12	44
-165	val_165	2008-04-09	12	44
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/ppr_pushdown-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_part_external_location-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/ppr_pushdown-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-0-855b235f9c760ba9d6d0200bfd1ad08b b/sql/hive/src/test/resources/golden/ppr_pushdown-0-855b235f9c760ba9d6d0200bfd1ad08b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-3-82b65775ae9c6dcb85ca87804dbb0288 b/sql/hive/src/test/resources/golden/ppr_pushdown-1-855b235f9c760ba9d6d0200bfd1ad08b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-3-82b65775ae9c6dcb85ca87804dbb0288
rename to sql/hive/src/test/resources/golden/ppr_pushdown-1-855b235f9c760ba9d6d0200bfd1ad08b
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-1-f689aaf15b08c433d1e93ce977a9b6d5 b/sql/hive/src/test/resources/golden/ppr_pushdown-1-f689aaf15b08c433d1e93ce977a9b6d5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-10-178be64f122542983ff4384df4bb1530 b/sql/hive/src/test/resources/golden/ppr_pushdown-10-178be64f122542983ff4384df4bb1530
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-4-7df7c16e4063683d0ca40360da460799 b/sql/hive/src/test/resources/golden/ppr_pushdown-10-2957fd9b211cee5f0372525a1de55c19
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-4-7df7c16e4063683d0ca40360da460799
rename to sql/hive/src/test/resources/golden/ppr_pushdown-10-2957fd9b211cee5f0372525a1de55c19
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-11-3828393aa33a55cf1aea707e1be0a452 b/sql/hive/src/test/resources/golden/ppr_pushdown-11-3828393aa33a55cf1aea707e1be0a452
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-5-c19e7bf0732e7a103966ebc43eb0bcea b/sql/hive/src/test/resources/golden/ppr_pushdown-11-b8d6f0ffc8294497c792b26958adee45
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-5-c19e7bf0732e7a103966ebc43eb0bcea
rename to sql/hive/src/test/resources/golden/ppr_pushdown-11-b8d6f0ffc8294497c792b26958adee45
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-12-5affd35b94b0de3fb2a43f8729526055 b/sql/hive/src/test/resources/golden/ppr_pushdown-12-5affd35b94b0de3fb2a43f8729526055
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-6-3be7ae6a87b9091bc61d221de8bdd55b b/sql/hive/src/test/resources/golden/ppr_pushdown-12-ab1b9a0cdd9586c96d8856a9d632b89c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-6-3be7ae6a87b9091bc61d221de8bdd55b
rename to sql/hive/src/test/resources/golden/ppr_pushdown-12-ab1b9a0cdd9586c96d8856a9d632b89c
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-7-ad39c0f14b6e0752489479138516bd57 b/sql/hive/src/test/resources/golden/ppr_pushdown-13-2c316c67fd494d878fbbea107d283c3b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-7-ad39c0f14b6e0752489479138516bd57
rename to sql/hive/src/test/resources/golden/ppr_pushdown-13-2c316c67fd494d878fbbea107d283c3b
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-13-2c64f5abe8c23957d7f0602e9c257cd1 b/sql/hive/src/test/resources/golden/ppr_pushdown-13-2c64f5abe8c23957d7f0602e9c257cd1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_position-1-6b06902de5c0ca13cebe03018d86f447 b/sql/hive/src/test/resources/golden/ppr_pushdown-14-53b4be82a1538844d03b200429efa02b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_position-1-6b06902de5c0ca13cebe03018d86f447
rename to sql/hive/src/test/resources/golden/ppr_pushdown-14-53b4be82a1538844d03b200429efa02b
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-14-c3188230751166d9d90082cd357ecb0c b/sql/hive/src/test/resources/golden/ppr_pushdown-14-c3188230751166d9d90082cd357ecb0c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_position-11-75a28e558d2fb7a78f43f55b0479c646 b/sql/hive/src/test/resources/golden/ppr_pushdown-15-71ab93d38ed2908069091c7c8cc0aba1
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_position-11-75a28e558d2fb7a78f43f55b0479c646
rename to sql/hive/src/test/resources/golden/ppr_pushdown-15-71ab93d38ed2908069091c7c8cc0aba1
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-15-e52a39743f82af47902341a7ecd38afb b/sql/hive/src/test/resources/golden/ppr_pushdown-15-e52a39743f82af47902341a7ecd38afb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-16-70bef3ba441873327e188ff2ec6d68ae b/sql/hive/src/test/resources/golden/ppr_pushdown-16-70bef3ba441873327e188ff2ec6d68ae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_position-13-9ee2150594ad2eece6ee14424155d396 b/sql/hive/src/test/resources/golden/ppr_pushdown-16-855cb54d28034fdb20a3615ee0918d63
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_position-13-9ee2150594ad2eece6ee14424155d396
rename to sql/hive/src/test/resources/golden/ppr_pushdown-16-855cb54d28034fdb20a3615ee0918d63
diff --git a/sql/hive/src/test/resources/golden/groupby_position-2-627bb7be9c0edb5ba4c677912800d364 b/sql/hive/src/test/resources/golden/ppr_pushdown-17-d3ccf2722a8b7281fcee61b2544772c8
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_position-2-627bb7be9c0edb5ba4c677912800d364
rename to sql/hive/src/test/resources/golden/ppr_pushdown-17-d3ccf2722a8b7281fcee61b2544772c8
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-17-fcd10fbfc55a2c0aa843fe618f9613c6 b/sql/hive/src/test/resources/golden/ppr_pushdown-18-fcd10fbfc55a2c0aa843fe618f9613c6
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-17-fcd10fbfc55a2c0aa843fe618f9613c6
rename to sql/hive/src/test/resources/golden/ppr_pushdown-18-fcd10fbfc55a2c0aa843fe618f9613c6
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-18-ff36e3978571ac05f11e8322c024e4b6 b/sql/hive/src/test/resources/golden/ppr_pushdown-19-ff36e3978571ac05f11e8322c024e4b6
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-18-ff36e3978571ac05f11e8322c024e4b6
rename to sql/hive/src/test/resources/golden/ppr_pushdown-19-ff36e3978571ac05f11e8322c024e4b6
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-2-5eede4874e678021938683fc2f5dc900 b/sql/hive/src/test/resources/golden/ppr_pushdown-2-5eede4874e678021938683fc2f5dc900
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_position-3-c39bd6c3c24658ec082bef9876d6e2ac b/sql/hive/src/test/resources/golden/ppr_pushdown-2-f689aaf15b08c433d1e93ce977a9b6d5
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_position-3-c39bd6c3c24658ec082bef9876d6e2ac
rename to sql/hive/src/test/resources/golden/ppr_pushdown-2-f689aaf15b08c433d1e93ce977a9b6d5
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-19-481005cf034ef3d7b998da32eb82aa9a b/sql/hive/src/test/resources/golden/ppr_pushdown-20-481005cf034ef3d7b998da32eb82aa9a
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-19-481005cf034ef3d7b998da32eb82aa9a
rename to sql/hive/src/test/resources/golden/ppr_pushdown-20-481005cf034ef3d7b998da32eb82aa9a
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-20-9073825e8b9804331f780980d1f9fa92 b/sql/hive/src/test/resources/golden/ppr_pushdown-21-9073825e8b9804331f780980d1f9fa92
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-20-9073825e8b9804331f780980d1f9fa92
rename to sql/hive/src/test/resources/golden/ppr_pushdown-21-9073825e8b9804331f780980d1f9fa92
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-21-9cb28f0d3a434c9d1ab039192906ec9d b/sql/hive/src/test/resources/golden/ppr_pushdown-22-9cb28f0d3a434c9d1ab039192906ec9d
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-21-9cb28f0d3a434c9d1ab039192906ec9d
rename to sql/hive/src/test/resources/golden/ppr_pushdown-22-9cb28f0d3a434c9d1ab039192906ec9d
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-22-678f50025924fee7b59f66e2abdb472d b/sql/hive/src/test/resources/golden/ppr_pushdown-23-678f50025924fee7b59f66e2abdb472d
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-22-678f50025924fee7b59f66e2abdb472d
rename to sql/hive/src/test/resources/golden/ppr_pushdown-23-678f50025924fee7b59f66e2abdb472d
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-23-4d5bfa800ba434f464b07bf9b5d39f20 b/sql/hive/src/test/resources/golden/ppr_pushdown-24-4d5bfa800ba434f464b07bf9b5d39f20
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-23-4d5bfa800ba434f464b07bf9b5d39f20
rename to sql/hive/src/test/resources/golden/ppr_pushdown-24-4d5bfa800ba434f464b07bf9b5d39f20
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-24-22663f09ea1c1bc303756067e84df5a7 b/sql/hive/src/test/resources/golden/ppr_pushdown-25-22663f09ea1c1bc303756067e84df5a7
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-24-22663f09ea1c1bc303756067e84df5a7
rename to sql/hive/src/test/resources/golden/ppr_pushdown-25-22663f09ea1c1bc303756067e84df5a7
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-25-e789d9b469aa1fffe4ce0a15a8c1fb9b b/sql/hive/src/test/resources/golden/ppr_pushdown-25-e789d9b469aa1fffe4ce0a15a8c1fb9b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_position-4-19461cbd2be1f2f3f3b65478e3eeb53c b/sql/hive/src/test/resources/golden/ppr_pushdown-26-e789d9b469aa1fffe4ce0a15a8c1fb9b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_position-4-19461cbd2be1f2f3f3b65478e3eeb53c
rename to sql/hive/src/test/resources/golden/ppr_pushdown-26-e789d9b469aa1fffe4ce0a15a8c1fb9b
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-26-8065c18e387fd8bffae19a80af8dc1d4 b/sql/hive/src/test/resources/golden/ppr_pushdown-27-8065c18e387fd8bffae19a80af8dc1d4
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-26-8065c18e387fd8bffae19a80af8dc1d4
rename to sql/hive/src/test/resources/golden/ppr_pushdown-27-8065c18e387fd8bffae19a80af8dc1d4
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-27-b72de558c88ae91460989938000e0d27 b/sql/hive/src/test/resources/golden/ppr_pushdown-28-b72de558c88ae91460989938000e0d27
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-27-b72de558c88ae91460989938000e0d27
rename to sql/hive/src/test/resources/golden/ppr_pushdown-28-b72de558c88ae91460989938000e0d27
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-28-7217735d357770af4ffee730e4e9add4 b/sql/hive/src/test/resources/golden/ppr_pushdown-29-7217735d357770af4ffee730e4e9add4
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-28-7217735d357770af4ffee730e4e9add4
rename to sql/hive/src/test/resources/golden/ppr_pushdown-29-7217735d357770af4ffee730e4e9add4
diff --git a/sql/hive/src/test/resources/golden/groupby_position-7-5b32a45af11e04b46f8566bd27a28014 b/sql/hive/src/test/resources/golden/ppr_pushdown-3-5eede4874e678021938683fc2f5dc900
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_position-7-5b32a45af11e04b46f8566bd27a28014
rename to sql/hive/src/test/resources/golden/ppr_pushdown-3-5eede4874e678021938683fc2f5dc900
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-3-c5c542f8ee81cd0afd44e67fc7b4d306 b/sql/hive/src/test/resources/golden/ppr_pushdown-3-c5c542f8ee81cd0afd44e67fc7b4d306
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-29-630e2f7918b7727fc4ca057fa21e2eea b/sql/hive/src/test/resources/golden/ppr_pushdown-30-630e2f7918b7727fc4ca057fa21e2eea
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-29-630e2f7918b7727fc4ca057fa21e2eea
rename to sql/hive/src/test/resources/golden/ppr_pushdown-30-630e2f7918b7727fc4ca057fa21e2eea
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-30-2e062414293b643ea4d7f6df92f939e4 b/sql/hive/src/test/resources/golden/ppr_pushdown-31-2e062414293b643ea4d7f6df92f939e4
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-30-2e062414293b643ea4d7f6df92f939e4
rename to sql/hive/src/test/resources/golden/ppr_pushdown-31-2e062414293b643ea4d7f6df92f939e4
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-31-5eba4bf10315099129eae319d73636cf b/sql/hive/src/test/resources/golden/ppr_pushdown-32-5eba4bf10315099129eae319d73636cf
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-31-5eba4bf10315099129eae319d73636cf
rename to sql/hive/src/test/resources/golden/ppr_pushdown-32-5eba4bf10315099129eae319d73636cf
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-32-35af0585a4f98bc222c786688cb6de6b b/sql/hive/src/test/resources/golden/ppr_pushdown-33-35af0585a4f98bc222c786688cb6de6b
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-32-35af0585a4f98bc222c786688cb6de6b
rename to sql/hive/src/test/resources/golden/ppr_pushdown-33-35af0585a4f98bc222c786688cb6de6b
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-33-a5594625510703427ab8bae5d0563f73 b/sql/hive/src/test/resources/golden/ppr_pushdown-34-a5594625510703427ab8bae5d0563f73
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppr_pushdown-33-a5594625510703427ab8bae5d0563f73
rename to sql/hive/src/test/resources/golden/ppr_pushdown-34-a5594625510703427ab8bae5d0563f73
diff --git a/sql/hive/src/test/resources/golden/groupby_position-8-3ff6c255dda69ec117c6a7ec93db2f1c b/sql/hive/src/test/resources/golden/ppr_pushdown-4-c5c542f8ee81cd0afd44e67fc7b4d306
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_position-8-3ff6c255dda69ec117c6a7ec93db2f1c
rename to sql/hive/src/test/resources/golden/ppr_pushdown-4-c5c542f8ee81cd0afd44e67fc7b4d306
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-4-f54bebec398f0fdfdbc0393123dba234 b/sql/hive/src/test/resources/golden/ppr_pushdown-4-f54bebec398f0fdfdbc0393123dba234
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-5-78af12432bcbf99d4a0d41c25f964de b/sql/hive/src/test/resources/golden/ppr_pushdown-5-78af12432bcbf99d4a0d41c25f964de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-2-3aa4057488720c0f514696154f2070b5 b/sql/hive/src/test/resources/golden/ppr_pushdown-5-f54bebec398f0fdfdbc0393123dba234
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-2-3aa4057488720c0f514696154f2070b5
rename to sql/hive/src/test/resources/golden/ppr_pushdown-5-f54bebec398f0fdfdbc0393123dba234
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-6-58724fbe96a0b3deceef20c8cc5e318d b/sql/hive/src/test/resources/golden/ppr_pushdown-6-58724fbe96a0b3deceef20c8cc5e318d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-3-ace7b2624b125764e9f1f6b5559f023d b/sql/hive/src/test/resources/golden/ppr_pushdown-6-78af12432bcbf99d4a0d41c25f964de
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-3-ace7b2624b125764e9f1f6b5559f023d
rename to sql/hive/src/test/resources/golden/ppr_pushdown-6-78af12432bcbf99d4a0d41c25f964de
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-7-14570f946e75924d7926c809485951d1 b/sql/hive/src/test/resources/golden/ppr_pushdown-7-14570f946e75924d7926c809485951d1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-4-61ae2f9b3a9a2f60a307aa25e42425d b/sql/hive/src/test/resources/golden/ppr_pushdown-7-58724fbe96a0b3deceef20c8cc5e318d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-4-61ae2f9b3a9a2f60a307aa25e42425d
rename to sql/hive/src/test/resources/golden/ppr_pushdown-7-58724fbe96a0b3deceef20c8cc5e318d
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-11-c166396bbdb62490f56ab0bc84aaa7d9 b/sql/hive/src/test/resources/golden/ppr_pushdown-8-14570f946e75924d7926c809485951d1
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-11-c166396bbdb62490f56ab0bc84aaa7d9
rename to sql/hive/src/test/resources/golden/ppr_pushdown-8-14570f946e75924d7926c809485951d1
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-8-3ba325662296fc455f07f1c835495e4c b/sql/hive/src/test/resources/golden/ppr_pushdown-8-3ba325662296fc455f07f1c835495e4c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-12-c8a51e8a269da4c4ae98ac105a573f3c b/sql/hive/src/test/resources/golden/ppr_pushdown-9-3ba325662296fc455f07f1c835495e4c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-12-c8a51e8a269da4c4ae98ac105a573f3c
rename to sql/hive/src/test/resources/golden/ppr_pushdown-9-3ba325662296fc455f07f1c835495e4c
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown-9-4375f17bed264d5554a67d119fa5dd1 b/sql/hive/src/test/resources/golden/ppr_pushdown-9-4375f17bed264d5554a67d119fa5dd1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/ppr_pushdown2-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_part_external_location-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-0-855b235f9c760ba9d6d0200bfd1ad08b b/sql/hive/src/test/resources/golden/ppr_pushdown2-0-855b235f9c760ba9d6d0200bfd1ad08b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-1-1f31dc385f79c5a7ae6a0d55b49bd583 b/sql/hive/src/test/resources/golden/ppr_pushdown2-1-1f31dc385f79c5a7ae6a0d55b49bd583
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-13-46c4a3675c8de0510b648856a193f3e7 b/sql/hive/src/test/resources/golden/ppr_pushdown2-1-855b235f9c760ba9d6d0200bfd1ad08b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-13-46c4a3675c8de0510b648856a193f3e7
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-1-855b235f9c760ba9d6d0200bfd1ad08b
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-10-4fa4ba4c739b6f44975d41e4705d4389 b/sql/hive/src/test/resources/golden/ppr_pushdown2-10-4fa4ba4c739b6f44975d41e4705d4389
new file mode 100644
index 0000000000000..f50a5fea8dd5d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-10-4fa4ba4c739b6f44975d41e4705d4389
@@ -0,0 +1 @@
+1	1	2
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-10-ab3e46183487096745d7d90e3020e94c b/sql/hive/src/test/resources/golden/ppr_pushdown2-10-ab3e46183487096745d7d90e3020e94c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-11-680316eba447eb4649530fdc1c37d95b b/sql/hive/src/test/resources/golden/ppr_pushdown2-11-680316eba447eb4649530fdc1c37d95b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-15-54f3a2a97939d3eca8a601b74ef30dea b/sql/hive/src/test/resources/golden/ppr_pushdown2-11-ab3e46183487096745d7d90e3020e94c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-15-54f3a2a97939d3eca8a601b74ef30dea
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-11-ab3e46183487096745d7d90e3020e94c
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-12-720582e599a974ee9ca46b653678a14a b/sql/hive/src/test/resources/golden/ppr_pushdown2-12-720582e599a974ee9ca46b653678a14a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-16-236d51792f4913b5858f367e3cff2c60 b/sql/hive/src/test/resources/golden/ppr_pushdown2-12-b10b9e14f5a7a7a92c0c68df6dbc656a
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-16-236d51792f4913b5858f367e3cff2c60
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-12-b10b9e14f5a7a7a92c0c68df6dbc656a
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-18-1e77dcdd6f54334dbae6a4d11ad6ff64 b/sql/hive/src/test/resources/golden/ppr_pushdown2-13-154f9859bd0822e287fbfdff12fd45ff
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-18-1e77dcdd6f54334dbae6a4d11ad6ff64
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-13-154f9859bd0822e287fbfdff12fd45ff
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-13-bd9067aeee8884a32db921b6d478f806 b/sql/hive/src/test/resources/golden/ppr_pushdown2-13-bd9067aeee8884a32db921b6d478f806
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-14-8d01597374157f2d3d066840983ba1f8 b/sql/hive/src/test/resources/golden/ppr_pushdown2-14-8d01597374157f2d3d066840983ba1f8
deleted file mode 100644
index e3e04ee48543d..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-14-8d01597374157f2d3d066840983ba1f8
+++ /dev/null
@@ -1 +0,0 @@
-3	1	2	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-19-90c84358c50e51be5ce210bd7dec9bc6 b/sql/hive/src/test/resources/golden/ppr_pushdown2-14-cf4a30b5c8329d8d79ddf762f318fbb3
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-19-90c84358c50e51be5ce210bd7dec9bc6
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-14-cf4a30b5c8329d8d79ddf762f318fbb3
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-15-5614065e1b8e709f68be4fa67666f41 b/sql/hive/src/test/resources/golden/ppr_pushdown2-15-5614065e1b8e709f68be4fa67666f41
deleted file mode 100644
index c458b0f57aba9..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-15-5614065e1b8e709f68be4fa67666f41
+++ /dev/null
@@ -1 +0,0 @@
-2	1	1	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-15-8d01597374157f2d3d066840983ba1f8 b/sql/hive/src/test/resources/golden/ppr_pushdown2-15-8d01597374157f2d3d066840983ba1f8
new file mode 100644
index 0000000000000..3def25c3c65af
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-15-8d01597374157f2d3d066840983ba1f8
@@ -0,0 +1 @@
+3	1	2	1
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-16-3a67618e47c977f58c9dd8f4b9a576eb b/sql/hive/src/test/resources/golden/ppr_pushdown2-16-3a67618e47c977f58c9dd8f4b9a576eb
deleted file mode 100644
index c458b0f57aba9..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-16-3a67618e47c977f58c9dd8f4b9a576eb
+++ /dev/null
@@ -1 +0,0 @@
-2	1	1	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-16-5614065e1b8e709f68be4fa67666f41 b/sql/hive/src/test/resources/golden/ppr_pushdown2-16-5614065e1b8e709f68be4fa67666f41
new file mode 100644
index 0000000000000..55c794b56ec9a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-16-5614065e1b8e709f68be4fa67666f41
@@ -0,0 +1 @@
+2	1	1	2
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-17-3a67618e47c977f58c9dd8f4b9a576eb b/sql/hive/src/test/resources/golden/ppr_pushdown2-17-3a67618e47c977f58c9dd8f4b9a576eb
new file mode 100644
index 0000000000000..55c794b56ec9a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-17-3a67618e47c977f58c9dd8f4b9a576eb
@@ -0,0 +1 @@
+2	1	1	2
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-17-44e5f3ac566f60d8b17ef19c18a11ebe b/sql/hive/src/test/resources/golden/ppr_pushdown2-17-44e5f3ac566f60d8b17ef19c18a11ebe
deleted file mode 100644
index 63511415ddf55..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-17-44e5f3ac566f60d8b17ef19c18a11ebe
+++ /dev/null
@@ -1,2 +0,0 @@
-3	1	2	1
-1	1	2	3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-18-44e5f3ac566f60d8b17ef19c18a11ebe b/sql/hive/src/test/resources/golden/ppr_pushdown2-18-44e5f3ac566f60d8b17ef19c18a11ebe
new file mode 100644
index 0000000000000..8d13286371dab
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-18-44e5f3ac566f60d8b17ef19c18a11ebe
@@ -0,0 +1,2 @@
+3	1	2	1
+1	1	2	3
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-18-e2c7e9b01ec95dfcc685827e24d66775 b/sql/hive/src/test/resources/golden/ppr_pushdown2-18-e2c7e9b01ec95dfcc685827e24d66775
deleted file mode 100644
index c592b5d505b0e..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-18-e2c7e9b01ec95dfcc685827e24d66775
+++ /dev/null
@@ -1,3 +0,0 @@
-2	1	1	2
-3	1	2	1
-1	1	2	3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-19-bd7e1917f8d2cf50c062a22ef3fa15b5 b/sql/hive/src/test/resources/golden/ppr_pushdown2-19-bd7e1917f8d2cf50c062a22ef3fa15b5
deleted file mode 100644
index e3e04ee48543d..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-19-bd7e1917f8d2cf50c062a22ef3fa15b5
+++ /dev/null
@@ -1 +0,0 @@
-3	1	2	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-19-e2c7e9b01ec95dfcc685827e24d66775 b/sql/hive/src/test/resources/golden/ppr_pushdown2-19-e2c7e9b01ec95dfcc685827e24d66775
new file mode 100644
index 0000000000000..2bc7fedb12a50
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-19-e2c7e9b01ec95dfcc685827e24d66775
@@ -0,0 +1,3 @@
+2	1	1	2
+3	1	2	1
+1	1	2	3
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-2-efd3e4c333d4efb81218df8921e58f9e b/sql/hive/src/test/resources/golden/ppr_pushdown2-2-efd3e4c333d4efb81218df8921e58f9e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-21-27e2e662d517f32952145cc2a51bf564 b/sql/hive/src/test/resources/golden/ppr_pushdown2-2-ffa167b63d612a4986d02f5c0623ea7b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-21-27e2e662d517f32952145cc2a51bf564
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-2-ffa167b63d612a4986d02f5c0623ea7b
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-20-bd7e1917f8d2cf50c062a22ef3fa15b5 b/sql/hive/src/test/resources/golden/ppr_pushdown2-20-bd7e1917f8d2cf50c062a22ef3fa15b5
new file mode 100644
index 0000000000000..3def25c3c65af
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-20-bd7e1917f8d2cf50c062a22ef3fa15b5
@@ -0,0 +1 @@
+3	1	2	1
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-20-ece6fe0efc1e658b36ddc10f0653d229 b/sql/hive/src/test/resources/golden/ppr_pushdown2-20-ece6fe0efc1e658b36ddc10f0653d229
deleted file mode 100644
index e3e04ee48543d..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-20-ece6fe0efc1e658b36ddc10f0653d229
+++ /dev/null
@@ -1 +0,0 @@
-3	1	2	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-21-ece6fe0efc1e658b36ddc10f0653d229 b/sql/hive/src/test/resources/golden/ppr_pushdown2-21-ece6fe0efc1e658b36ddc10f0653d229
new file mode 100644
index 0000000000000..3def25c3c65af
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-21-ece6fe0efc1e658b36ddc10f0653d229
@@ -0,0 +1 @@
+3	1	2	1
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-3-1886675984606b6c972c4a26dca6fd2c b/sql/hive/src/test/resources/golden/ppr_pushdown2-3-1886675984606b6c972c4a26dca6fd2c
deleted file mode 100644
index 679926f7d738c..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-3-1886675984606b6c972c4a26dca6fd2c
+++ /dev/null
@@ -1 +0,0 @@
-2	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-22-6775cb6aee040e22508cf3cac0b55f06 b/sql/hive/src/test/resources/golden/ppr_pushdown2-3-c7753746c190414723d66a8f876499c7
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-22-6775cb6aee040e22508cf3cac0b55f06
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-3-c7753746c190414723d66a8f876499c7
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-4-1886675984606b6c972c4a26dca6fd2c b/sql/hive/src/test/resources/golden/ppr_pushdown2-4-1886675984606b6c972c4a26dca6fd2c
new file mode 100644
index 0000000000000..bfde072a7963c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-4-1886675984606b6c972c4a26dca6fd2c
@@ -0,0 +1 @@
+2	2
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-4-33b137b28e7246ec3c2acb937c638910 b/sql/hive/src/test/resources/golden/ppr_pushdown2-4-33b137b28e7246ec3c2acb937c638910
deleted file mode 100644
index fcc7be2cb12aa..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-4-33b137b28e7246ec3c2acb937c638910
+++ /dev/null
@@ -1 +0,0 @@
-22	22
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-5-33b137b28e7246ec3c2acb937c638910 b/sql/hive/src/test/resources/golden/ppr_pushdown2-5-33b137b28e7246ec3c2acb937c638910
new file mode 100644
index 0000000000000..38212d1943095
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-5-33b137b28e7246ec3c2acb937c638910
@@ -0,0 +1 @@
+22	22
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-5-b5a2518af801f95fe52a75dfc1d3e867 b/sql/hive/src/test/resources/golden/ppr_pushdown2-5-b5a2518af801f95fe52a75dfc1d3e867
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-6-96059d8042ccb2ce355541daf9893954 b/sql/hive/src/test/resources/golden/ppr_pushdown2-6-96059d8042ccb2ce355541daf9893954
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-23-eb2b625279d8794390d7e2dc8f2dc907 b/sql/hive/src/test/resources/golden/ppr_pushdown2-6-b5a2518af801f95fe52a75dfc1d3e867
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-23-eb2b625279d8794390d7e2dc8f2dc907
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-6-b5a2518af801f95fe52a75dfc1d3e867
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-7-93922fe8d88643e1b8aa7e3ef4195404 b/sql/hive/src/test/resources/golden/ppr_pushdown2-7-93922fe8d88643e1b8aa7e3ef4195404
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-25-138e93f004f7bd16e63bbf8d9090af21 b/sql/hive/src/test/resources/golden/ppr_pushdown2-7-e89a8d1f66fdf9ce68f345de1f728c5b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-25-138e93f004f7bd16e63bbf8d9090af21
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-7-e89a8d1f66fdf9ce68f345de1f728c5b
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-26-93153fd67c1d19bb9ad64f98294e4981 b/sql/hive/src/test/resources/golden/ppr_pushdown2-8-4507a3f200b3ce384191c91acd324dc7
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-26-93153fd67c1d19bb9ad64f98294e4981
rename to sql/hive/src/test/resources/golden/ppr_pushdown2-8-4507a3f200b3ce384191c91acd324dc7
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-8-c86940e70f705f50e9091c257ee2bb40 b/sql/hive/src/test/resources/golden/ppr_pushdown2-8-c86940e70f705f50e9091c257ee2bb40
deleted file mode 100644
index 3ebc6d7fa2375..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-8-c86940e70f705f50e9091c257ee2bb40
+++ /dev/null
@@ -1 +0,0 @@
-2	2	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-9-4fa4ba4c739b6f44975d41e4705d4389 b/sql/hive/src/test/resources/golden/ppr_pushdown2-9-4fa4ba4c739b6f44975d41e4705d4389
deleted file mode 100644
index a4544ab84afa3..0000000000000
--- a/sql/hive/src/test/resources/golden/ppr_pushdown2-9-4fa4ba4c739b6f44975d41e4705d4389
+++ /dev/null
@@ -1 +0,0 @@
-1	1	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown2-9-c86940e70f705f50e9091c257ee2bb40 b/sql/hive/src/test/resources/golden/ppr_pushdown2-9-c86940e70f705f50e9091c257ee2bb40
new file mode 100644
index 0000000000000..699fa0cd95c4f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown2-9-c86940e70f705f50e9091c257ee2bb40
@@ -0,0 +1 @@
+2	2	1
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown3-0-df2401785dfa257de49c3ad80b0f480a b/sql/hive/src/test/resources/golden/ppr_pushdown3-0-df2401785dfa257de49c3ad80b0f480a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/ppr_pushdown3-0-df2401785dfa257de49c3ad80b0f480a
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown3-0-df2401785dfa257de49c3ad80b0f480a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown3-2-e879051803d0b64139e703e40fb007d0 b/sql/hive/src/test/resources/golden/ppr_pushdown3-2-e879051803d0b64139e703e40fb007d0
index e22f840876d2e..546fd0b0e1051 100644
--- a/sql/hive/src/test/resources/golden/ppr_pushdown3-2-e879051803d0b64139e703e40fb007d0
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown3-2-e879051803d0b64139e703e40fb007d0
@@ -37,4 +37,4 @@
 5	val_5	2008-04-09	12
 2	val_2	2008-04-09	12
 5	val_5	2008-04-09	12
-9	val_9	2008-04-09	12
\ No newline at end of file
+9	val_9	2008-04-09	12
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown3-4-a1c18483e5f5d2fe351be09af658acbb b/sql/hive/src/test/resources/golden/ppr_pushdown3-4-a1c18483e5f5d2fe351be09af658acbb
index 355ed1617e200..2857cdf0aba86 100644
--- a/sql/hive/src/test/resources/golden/ppr_pushdown3-4-a1c18483e5f5d2fe351be09af658acbb
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown3-4-a1c18483e5f5d2fe351be09af658acbb
@@ -1997,4 +1997,4 @@
 403	val_403	2008-04-09	12
 400	val_400	2008-04-09	12
 200	val_200	2008-04-09	12
-97	val_97	2008-04-09	12
\ No newline at end of file
+97	val_97	2008-04-09	12
diff --git a/sql/hive/src/test/resources/golden/ppr_pushdown3-6-67118904e6cc8e9f5210fab88e87fb8f b/sql/hive/src/test/resources/golden/ppr_pushdown3-6-67118904e6cc8e9f5210fab88e87fb8f
index 57005044dde38..7f3ca6e01ea06 100644
--- a/sql/hive/src/test/resources/golden/ppr_pushdown3-6-67118904e6cc8e9f5210fab88e87fb8f
+++ b/sql/hive/src/test/resources/golden/ppr_pushdown3-6-67118904e6cc8e9f5210fab88e87fb8f
@@ -1997,4 +1997,4 @@
 403
 400
 200
-97
\ No newline at end of file
+97
diff --git a/sql/hive/src/test/resources/golden/print_header-1-8540676fc16ac91f3629c40f393a890a b/sql/hive/src/test/resources/golden/print_header-1-8540676fc16ac91f3629c40f393a890a
deleted file mode 100644
index 943e1be13b615..0000000000000
--- a/sql/hive/src/test/resources/golden/print_header-1-8540676fc16ac91f3629c40f393a890a
+++ /dev/null
@@ -1,10 +0,0 @@
-0	val_0	0	val_0
-0	val_0	0	val_0
-0	val_0	0	val_0
-0	val_0	0	val_0
-0	val_0	0	val_0
-0	val_0	0	val_0
-0	val_0	0	val_0
-0	val_0	0	val_0
-0	val_0	0	val_0
-0	val_0	2	val_2
diff --git a/sql/hive/src/test/resources/golden/print_header-2-5cff10d4b561206e7e0b2e81d862ff93 b/sql/hive/src/test/resources/golden/print_header-2-5cff10d4b561206e7e0b2e81d862ff93
deleted file mode 100644
index 951e74db0fe23..0000000000000
--- a/sql/hive/src/test/resources/golden/print_header-2-5cff10d4b561206e7e0b2e81d862ff93
+++ /dev/null
@@ -1,10 +0,0 @@
-0	0.0
-2	2.0
-4	4.0
-5	15.0
-8	8.0
-9	9.0
-10	10.0
-11	11.0
-12	24.0
-15	30.0
diff --git a/sql/hive/src/test/resources/golden/print_header-3-e86d559aeb84a4cc017a103182c22bfb b/sql/hive/src/test/resources/golden/print_header-3-e86d559aeb84a4cc017a103182c22bfb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/progress_1-0-fe903cc5a573e7aced5ee17b35d2fb04 b/sql/hive/src/test/resources/golden/progress_1-0-fe903cc5a573e7aced5ee17b35d2fb04
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/progress_1-0-fe903cc5a573e7aced5ee17b35d2fb04
+++ b/sql/hive/src/test/resources/golden/progress_1-0-fe903cc5a573e7aced5ee17b35d2fb04
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/progress_1-2-70ba81c09588aa586e62ebaee2af685f b/sql/hive/src/test/resources/golden/progress_1-2-70ba81c09588aa586e62ebaee2af685f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-27-ca369ab23c32070e2d42ba8df036175f b/sql/hive/src/test/resources/golden/progress_1-2-b6c8c8fc9df98af4dead5efabf5f162c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-27-ca369ab23c32070e2d42ba8df036175f
rename to sql/hive/src/test/resources/golden/progress_1-2-b6c8c8fc9df98af4dead5efabf5f162c
diff --git a/sql/hive/src/test/resources/golden/progress_1-3-43d286eebddaee26cf95f26e62a75fe4 b/sql/hive/src/test/resources/golden/progress_1-3-43d286eebddaee26cf95f26e62a75fe4
index 0b3e0a69a8c79..e9c02dad1826a 100644
--- a/sql/hive/src/test/resources/golden/progress_1-3-43d286eebddaee26cf95f26e62a75fe4
+++ b/sql/hive/src/test/resources/golden/progress_1-3-43d286eebddaee26cf95f26e62a75fe4
@@ -1 +1 @@
-5000
\ No newline at end of file
+5000
diff --git a/sql/hive/src/test/resources/golden/protectmode-19-b6e156f104768706aa587b762a9d4d18 b/sql/hive/src/test/resources/golden/protectmode-19-b6e156f104768706aa587b762a9d4d18
index eb63636d0bfd4..41b92dbf0214a 100644
--- a/sql/hive/src/test/resources/golden/protectmode-19-b6e156f104768706aa587b762a9d4d18
+++ b/sql/hive/src/test/resources/golden/protectmode-19-b6e156f104768706aa587b762a9d4d18
@@ -1,9 +1,9 @@
-col                 	string              	None                
-p                   	string              	None                
+col                 	string              	                    
+p                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-p                   	string              	None                
+p                   	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[p1], dbName:default, tableName:tbl2, createTime:1388801768, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/tbl2/p=p1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{last_modified_by=marmbrus, last_modified_time=1388801769, PROTECT_MODE=OFFLINE, transient_lastDdlTime=1388801769})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[p1], dbName:default, tableName:tbl2, createTime:1413890256, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl2/p=p1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413890256, PROTECT_MODE=OFFLINE, transient_lastDdlTime=1413890256, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1})	
diff --git a/sql/hive/src/test/resources/golden/protectmode-21-a31442a7d6c6950c137856ab861d622d b/sql/hive/src/test/resources/golden/protectmode-21-a31442a7d6c6950c137856ab861d622d
index 964ede006ad21..27254dd97006d 100644
--- a/sql/hive/src/test/resources/golden/protectmode-21-a31442a7d6c6950c137856ab861d622d
+++ b/sql/hive/src/test/resources/golden/protectmode-21-a31442a7d6c6950c137856ab861d622d
@@ -1,9 +1,9 @@
-col                 	string              	None                
-p                   	string              	None                
+col                 	string              	                    
+p                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-p                   	string              	None                
+p                   	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:tbl2, dbName:default, owner:marmbrus, createTime:1388801768, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/tbl2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:p, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1388801770, PROTECT_MODE=OFFLINE, transient_lastDdlTime=1388801770}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:tbl2, dbName:default, owner:marmbrus, createTime:1413890256, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:p, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413890256, PROTECT_MODE=OFFLINE, transient_lastDdlTime=1413890256}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/protectmode-23-a31442a7d6c6950c137856ab861d622d b/sql/hive/src/test/resources/golden/protectmode-23-a31442a7d6c6950c137856ab861d622d
index 68b03670f5f80..4d09f8c357ea7 100644
--- a/sql/hive/src/test/resources/golden/protectmode-23-a31442a7d6c6950c137856ab861d622d
+++ b/sql/hive/src/test/resources/golden/protectmode-23-a31442a7d6c6950c137856ab861d622d
@@ -1,9 +1,9 @@
-col                 	string              	None                
-p                   	string              	None                
+col                 	string              	                    
+p                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-p                   	string              	None                
+p                   	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:tbl2, dbName:default, owner:marmbrus, createTime:1388801768, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/tbl2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:p, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1388801770, PROTECT_MODE=OFFLINE,NO_DROP, transient_lastDdlTime=1388801770}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:tbl2, dbName:default, owner:marmbrus, createTime:1413890256, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:p, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413890256, PROTECT_MODE=OFFLINE,NO_DROP, transient_lastDdlTime=1413890256}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/protectmode-26-a31442a7d6c6950c137856ab861d622d b/sql/hive/src/test/resources/golden/protectmode-26-a31442a7d6c6950c137856ab861d622d
index ec18ce7411535..dbc128d2a1d8c 100644
--- a/sql/hive/src/test/resources/golden/protectmode-26-a31442a7d6c6950c137856ab861d622d
+++ b/sql/hive/src/test/resources/golden/protectmode-26-a31442a7d6c6950c137856ab861d622d
@@ -1,9 +1,9 @@
-col                 	string              	None                
-p                   	string              	None                
+col                 	string              	                    
+p                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-p                   	string              	None                
+p                   	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:tbl2, dbName:default, owner:marmbrus, createTime:1388801768, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/tbl2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:p, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1388801770, PROTECT_MODE=NO_DROP, transient_lastDdlTime=1388801770}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:tbl2, dbName:default, owner:marmbrus, createTime:1413890256, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:p, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413890256, PROTECT_MODE=NO_DROP, transient_lastDdlTime=1413890256}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/protectmode-28-a31442a7d6c6950c137856ab861d622d b/sql/hive/src/test/resources/golden/protectmode-28-a31442a7d6c6950c137856ab861d622d
index a6202ac5fc0eb..7774c774cc8a1 100644
--- a/sql/hive/src/test/resources/golden/protectmode-28-a31442a7d6c6950c137856ab861d622d
+++ b/sql/hive/src/test/resources/golden/protectmode-28-a31442a7d6c6950c137856ab861d622d
@@ -1,9 +1,9 @@
-col                 	string              	None                
-p                   	string              	None                
+col                 	string              	                    
+p                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-p                   	string              	None                
+p                   	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:tbl2, dbName:default, owner:marmbrus, createTime:1388801768, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/tbl2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:p, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1388801771, transient_lastDdlTime=1388801771}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:tbl2, dbName:default, owner:marmbrus, createTime:1413890256, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:p, type:string, comment:null)], parameters:{last_modified_by=marmbrus, last_modified_time=1413890256, transient_lastDdlTime=1413890256}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/protectmode-32-b6e156f104768706aa587b762a9d4d18 b/sql/hive/src/test/resources/golden/protectmode-32-b6e156f104768706aa587b762a9d4d18
index 0a6cebbbd0b82..567b9b3a5d228 100644
--- a/sql/hive/src/test/resources/golden/protectmode-32-b6e156f104768706aa587b762a9d4d18
+++ b/sql/hive/src/test/resources/golden/protectmode-32-b6e156f104768706aa587b762a9d4d18
@@ -1,9 +1,9 @@
-col                 	string              	None                
-p                   	string              	None                
+col                 	string              	                    
+p                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-p                   	string              	None                
+p                   	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[p1], dbName:default, tableName:tbl2, createTime:1388801768, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/tbl2/p=p1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{last_modified_by=marmbrus, last_modified_time=1388801783, transient_lastDdlTime=1388801783})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[p1], dbName:default, tableName:tbl2, createTime:1413890256, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null), FieldSchema(name:p, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl2/p=p1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413890263, transient_lastDdlTime=1413890263, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1})	
diff --git a/sql/hive/src/test/resources/golden/protectmode-6-e2d1fc9c94e0d3597f393aec99850510 b/sql/hive/src/test/resources/golden/protectmode-6-e2d1fc9c94e0d3597f393aec99850510
index a1ff1f8341f30..be56722166fe4 100644
--- a/sql/hive/src/test/resources/golden/protectmode-6-e2d1fc9c94e0d3597f393aec99850510
+++ b/sql/hive/src/test/resources/golden/protectmode-6-e2d1fc9c94e0d3597f393aec99850510
@@ -1,3 +1,3 @@
-col                 	string              	None                
+col                 	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:tbl1, dbName:default, owner:marmbrus, createTime:1388801746, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/tbl1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1388801756, PROTECT_MODE=OFFLINE, transient_lastDdlTime=1388801756}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:tbl1, dbName:default, owner:marmbrus, createTime:1413890242, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413890249, PROTECT_MODE=OFFLINE, transient_lastDdlTime=1413890249, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/protectmode-8-e2d1fc9c94e0d3597f393aec99850510 b/sql/hive/src/test/resources/golden/protectmode-8-e2d1fc9c94e0d3597f393aec99850510
index 9098c9419556b..a3c3c67860fdf 100644
--- a/sql/hive/src/test/resources/golden/protectmode-8-e2d1fc9c94e0d3597f393aec99850510
+++ b/sql/hive/src/test/resources/golden/protectmode-8-e2d1fc9c94e0d3597f393aec99850510
@@ -1,3 +1,3 @@
-col                 	string              	None                
+col                 	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:tbl1, dbName:default, owner:marmbrus, createTime:1388801746, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse3973694235577030193/tbl1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_by=marmbrus, last_modified_time=1388801756, transient_lastDdlTime=1388801756}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:tbl1, dbName:default, owner:marmbrus, createTime:1413890242, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tbl1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=0, last_modified_by=marmbrus, last_modified_time=1413890249, transient_lastDdlTime=1413890249, COLUMN_STATS_ACCURATE=false, totalSize=0, numRows=-1, rawDataSize=-1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/push_or-5-c94def4b18b9c8c00e7a93eb19ec694a b/sql/hive/src/test/resources/golden/push_or-5-c94def4b18b9c8c00e7a93eb19ec694a
index 3a2f20c637883..e3c6c66098c10 100644
--- a/sql/hive/src/test/resources/golden/push_or-5-c94def4b18b9c8c00e7a93eb19ec694a
+++ b/sql/hive/src/test/resources/golden/push_or-5-c94def4b18b9c8c00e7a93eb19ec694a
@@ -20,4 +20,4 @@
 17	val_17	2000-04-09
 18	val_18	2000-04-09
 18	val_18	2000-04-09
-19	val_19	2000-04-09
\ No newline at end of file
+19	val_19	2000-04-09
diff --git a/sql/hive/src/test/resources/golden/query_with_semi-0-3731ce715b60549c17b7993927d48436 b/sql/hive/src/test/resources/golden/query_with_semi-0-3731ce715b60549c17b7993927d48436
index 2f3fe0189d2c6..10db2ad303244 100644
--- a/sql/hive/src/test/resources/golden/query_with_semi-0-3731ce715b60549c17b7993927d48436
+++ b/sql/hive/src/test/resources/golden/query_with_semi-0-3731ce715b60549c17b7993927d48436
@@ -1 +1 @@
-aa;
\ No newline at end of file
+aa;
diff --git a/sql/hive/src/test/resources/golden/query_with_semi-1-3f53ec3b276b32cf81729433e47010cb b/sql/hive/src/test/resources/golden/query_with_semi-1-3f53ec3b276b32cf81729433e47010cb
index b5b5773c405b4..e0b3f1b09bd18 100644
--- a/sql/hive/src/test/resources/golden/query_with_semi-1-3f53ec3b276b32cf81729433e47010cb
+++ b/sql/hive/src/test/resources/golden/query_with_semi-1-3f53ec3b276b32cf81729433e47010cb
@@ -1 +1 @@
-bb
\ No newline at end of file
+bb
diff --git a/sql/hive/src/test/resources/golden/query_with_semi-2-bf8cb175f9b13fcc23ba46be674b5767 b/sql/hive/src/test/resources/golden/query_with_semi-2-bf8cb175f9b13fcc23ba46be674b5767
index 2652f5f42c003..46c1d6125b7b4 100644
--- a/sql/hive/src/test/resources/golden/query_with_semi-2-bf8cb175f9b13fcc23ba46be674b5767
+++ b/sql/hive/src/test/resources/golden/query_with_semi-2-bf8cb175f9b13fcc23ba46be674b5767
@@ -1 +1 @@
-cc
\ No newline at end of file
+cc
diff --git a/sql/hive/src/test/resources/golden/exim_19_part_external_location-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/quote2-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_19_part_external_location-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/quote2-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/quote2-0-ea1a1d0c5f9a3248afbb65e6632c5118 b/sql/hive/src/test/resources/golden/quote2-0-ea1a1d0c5f9a3248afbb65e6632c5118
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/quote2-1-34f3c423b2fb1f0b11457f45a60042b9 b/sql/hive/src/test/resources/golden/quote2-1-34f3c423b2fb1f0b11457f45a60042b9
deleted file mode 100644
index ed1f53a6588d0..0000000000000
--- a/sql/hive/src/test/resources/golden/quote2-1-34f3c423b2fb1f0b11457f45a60042b9
+++ /dev/null
@@ -1 +0,0 @@
-abc	abc	abc'	abc"	abc\	abc\	abc\'	abc\"	abc\\	abc\\	abc\\'	abc\\"	abc\\\	abc\\\	abc""""\	abc''''\	awk '{print NR"\t"$0}'	tab	tab	tab	tab
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-29-4095695e88e23dd42acb1749a83bdbb7 b/sql/hive/src/test/resources/golden/quote2-1-ea1a1d0c5f9a3248afbb65e6632c5118
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-29-4095695e88e23dd42acb1749a83bdbb7
rename to sql/hive/src/test/resources/golden/quote2-1-ea1a1d0c5f9a3248afbb65e6632c5118
diff --git a/sql/hive/src/test/resources/golden/quote2-2-34f3c423b2fb1f0b11457f45a60042b9 b/sql/hive/src/test/resources/golden/quote2-2-34f3c423b2fb1f0b11457f45a60042b9
new file mode 100644
index 0000000000000..4c8564d085999
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/quote2-2-34f3c423b2fb1f0b11457f45a60042b9
@@ -0,0 +1 @@
+abc	abc	abc'	abc"	abc\	abc\	abc\'	abc\"	abc\\	abc\\	abc\\'	abc\\"	abc\\\	abc\\\	abc""""\	abc''''\	awk '{print NR"\t"$0}'	tab	tab	tab	tab
diff --git a/sql/hive/src/test/resources/golden/quoted alias.attr-0-97b3c408090f758257e4bd20597a525e b/sql/hive/src/test/resources/golden/quoted alias.attr-0-97b3c408090f758257e4bd20597a525e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/quoted alias.attr-0-97b3c408090f758257e4bd20597a525e	
+++ b/sql/hive/src/test/resources/golden/quoted alias.attr-0-97b3c408090f758257e4bd20597a525e	
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/rand_partitionpruner1-0-a7e4414330751eb8ad486bb11643f64d b/sql/hive/src/test/resources/golden/rand_partitionpruner1-0-a7e4414330751eb8ad486bb11643f64d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rand_partitionpruner1-1-11cdebc422d7a0e7b257279ac9524321 b/sql/hive/src/test/resources/golden/rand_partitionpruner1-1-11cdebc422d7a0e7b257279ac9524321
deleted file mode 100644
index 73b8edab36833..0000000000000
--- a/sql/hive/src/test/resources/golden/rand_partitionpruner1-1-11cdebc422d7a0e7b257279ac9524321
+++ /dev/null
@@ -1,56 +0,0 @@
-409	val_409
-429	val_429
-209	val_209
-153	val_153
-203	val_203
-170	val_170
-489	val_489
-378	val_378
-221	val_221
-498	val_498
-469	val_469
-176	val_176
-176	val_176
-384	val_384
-217	val_217
-431	val_431
-51	val_51
-288	val_288
-457	val_457
-197	val_197
-77	val_77
-138	val_138
-277	val_277
-224	val_224
-309	val_309
-389	val_389
-331	val_331
-317	val_317
-336	val_336
-42	val_42
-458	val_458
-78	val_78
-453	val_453
-74	val_74
-103	val_103
-467	val_467
-202	val_202
-469	val_469
-44	val_44
-454	val_454
-70	val_70
-491	val_491
-199	val_199
-169	val_169
-310	val_310
-233	val_233
-133	val_133
-26	val_26
-134	val_134
-18	val_18
-298	val_298
-348	val_348
-469	val_469
-37	val_37
-152	val_152
-400	val_400
diff --git a/sql/hive/src/test/resources/golden/rand_partitionpruner2-0-b9598847d77e2c425423f51d755380e8 b/sql/hive/src/test/resources/golden/rand_partitionpruner2-0-b9598847d77e2c425423f51d755380e8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rand_partitionpruner2-1-b7dcf0277eab6c02a7ca47aea7703bf7 b/sql/hive/src/test/resources/golden/rand_partitionpruner2-1-b7dcf0277eab6c02a7ca47aea7703bf7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rand_partitionpruner2-2-db276de57ad86e8880037336886cd557 b/sql/hive/src/test/resources/golden/rand_partitionpruner2-2-db276de57ad86e8880037336886cd557
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rand_partitionpruner2-3-e4419c33287ca1f48a43f61cca5b5928 b/sql/hive/src/test/resources/golden/rand_partitionpruner2-3-e4419c33287ca1f48a43f61cca5b5928
deleted file mode 100644
index e8738e9c837ee..0000000000000
--- a/sql/hive/src/test/resources/golden/rand_partitionpruner2-3-e4419c33287ca1f48a43f61cca5b5928
+++ /dev/null
@@ -1,116 +0,0 @@
-103	val_103	2008-04-08	11
-118	val_118	2008-04-08	12
-119	val_119	2008-04-08	12
-119	val_119	2008-04-08	12
-126	val_126	2008-04-08	12
-131	val_131	2008-04-08	12
-133	val_133	2008-04-08	11
-134	val_134	2008-04-08	11
-138	val_138	2008-04-08	11
-143	val_143	2008-04-08	12
-152	val_152	2008-04-08	11
-153	val_153	2008-04-08	11
-162	val_162	2008-04-08	12
-169	val_169	2008-04-08	11
-170	val_170	2008-04-08	11
-175	val_175	2008-04-08	12
-176	val_176	2008-04-08	11
-176	val_176	2008-04-08	11
-18	val_18	2008-04-08	11
-18	val_18	2008-04-08	12
-191	val_191	2008-04-08	12
-197	val_197	2008-04-08	11
-199	val_199	2008-04-08	11
-200	val_200	2008-04-08	12
-201	val_201	2008-04-08	12
-202	val_202	2008-04-08	11
-203	val_203	2008-04-08	11
-209	val_209	2008-04-08	11
-214	val_214	2008-04-08	12
-217	val_217	2008-04-08	11
-218	val_218	2008-04-08	12
-221	val_221	2008-04-08	11
-223	val_223	2008-04-08	12
-224	val_224	2008-04-08	11
-229	val_229	2008-04-08	12
-230	val_230	2008-04-08	12
-233	val_233	2008-04-08	11
-233	val_233	2008-04-08	12
-237	val_237	2008-04-08	12
-238	val_238	2008-04-08	12
-256	val_256	2008-04-08	12
-26	val_26	2008-04-08	11
-265	val_265	2008-04-08	12
-273	val_273	2008-04-08	12
-277	val_277	2008-04-08	11
-277	val_277	2008-04-08	12
-280	val_280	2008-04-08	12
-286	val_286	2008-04-08	12
-288	val_288	2008-04-08	11
-298	val_298	2008-04-08	11
-309	val_309	2008-04-08	11
-309	val_309	2008-04-08	12
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-322	val_322	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-331	val_331	2008-04-08	11
-332	val_332	2008-04-08	12
-336	val_336	2008-04-08	11
-336	val_336	2008-04-08	12
-339	val_339	2008-04-08	12
-341	val_341	2008-04-08	12
-342	val_342	2008-04-08	12
-348	val_348	2008-04-08	11
-348	val_348	2008-04-08	12
-35	val_35	2008-04-08	12
-364	val_364	2008-04-08	12
-37	val_37	2008-04-08	11
-378	val_378	2008-04-08	11
-384	val_384	2008-04-08	11
-389	val_389	2008-04-08	11
-400	val_400	2008-04-08	11
-403	val_403	2008-04-08	12
-407	val_407	2008-04-08	12
-409	val_409	2008-04-08	11
-417	val_417	2008-04-08	12
-42	val_42	2008-04-08	11
-424	val_424	2008-04-08	12
-429	val_429	2008-04-08	11
-429	val_429	2008-04-08	12
-430	val_430	2008-04-08	12
-431	val_431	2008-04-08	11
-432	val_432	2008-04-08	12
-44	val_44	2008-04-08	11
-453	val_453	2008-04-08	11
-454	val_454	2008-04-08	11
-457	val_457	2008-04-08	11
-457	val_457	2008-04-08	12
-458	val_458	2008-04-08	11
-466	val_466	2008-04-08	12
-467	val_467	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-469	val_469	2008-04-08	11
-47	val_47	2008-04-08	12
-470	val_470	2008-04-08	12
-489	val_489	2008-04-08	11
-491	val_491	2008-04-08	11
-496	val_496	2008-04-08	12
-498	val_498	2008-04-08	11
-498	val_498	2008-04-08	12
-51	val_51	2008-04-08	11
-58	val_58	2008-04-08	12
-70	val_70	2008-04-08	11
-72	val_72	2008-04-08	12
-74	val_74	2008-04-08	11
-77	val_77	2008-04-08	11
-77	val_77	2008-04-08	12
-78	val_78	2008-04-08	11
-82	val_82	2008-04-08	12
-87	val_87	2008-04-08	12
-90	val_90	2008-04-08	12
-97	val_97	2008-04-08	12
-97	val_97	2008-04-08	12
-98	val_98	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/rcfile_bigdata-2-3688b45adbdb190d58799c0b6d601055 b/sql/hive/src/test/resources/golden/rcfile_bigdata-2-3688b45adbdb190d58799c0b6d601055
deleted file mode 100644
index d00491fd7e5bb..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_bigdata-2-3688b45adbdb190d58799c0b6d601055
+++ /dev/null
@@ -1 +0,0 @@
-1
diff --git a/sql/hive/src/test/resources/golden/rcfile_bigdata-3-fc0c054cdfbf5c130532e139d497866a b/sql/hive/src/test/resources/golden/rcfile_bigdata-3-fc0c054cdfbf5c130532e139d497866a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_columnar-2-b2f56374f2ea8a967b38e77b57138d4b b/sql/hive/src/test/resources/golden/rcfile_columnar-2-b2f56374f2ea8a967b38e77b57138d4b
index d1b3011bffd91..2d2bcebee4a8d 100644
--- a/sql/hive/src/test/resources/golden/rcfile_columnar-2-b2f56374f2ea8a967b38e77b57138d4b
+++ b/sql/hive/src/test/resources/golden/rcfile_columnar-2-b2f56374f2ea8a967b38e77b57138d4b
@@ -1,2 +1,2 @@
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-11-3fb2f07dd710f180a3c39dd17beccaa5 b/sql/hive/src/test/resources/golden/rcfile_merge1-11-3fb2f07dd710f180a3c39dd17beccaa5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-12-150cb190dc2343a747ea89298eb6352c b/sql/hive/src/test/resources/golden/rcfile_merge1-12-150cb190dc2343a747ea89298eb6352c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-13-af7b1e79eb40854ea82d61debaafcf40 b/sql/hive/src/test/resources/golden/rcfile_merge1-13-af7b1e79eb40854ea82d61debaafcf40
deleted file mode 100644
index b141fed81f206..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_merge1-13-af7b1e79eb40854ea82d61debaafcf40
+++ /dev/null
@@ -1 +0,0 @@
-53278638794
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-14-4547f75ed3cb94914c2d025c1e057b5 b/sql/hive/src/test/resources/golden/rcfile_merge1-14-4547f75ed3cb94914c2d025c1e057b5
deleted file mode 100644
index b141fed81f206..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_merge1-14-4547f75ed3cb94914c2d025c1e057b5
+++ /dev/null
@@ -1 +0,0 @@
-53278638794
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-15-2f190c7e30999cbdf1ce62e8f31438f4 b/sql/hive/src/test/resources/golden/rcfile_merge1-15-2f190c7e30999cbdf1ce62e8f31438f4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-16-c198c437f48c3844d8d0ace881b3107e b/sql/hive/src/test/resources/golden/rcfile_merge1-16-c198c437f48c3844d8d0ace881b3107e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-4-2f190c7e30999cbdf1ce62e8f31438f4 b/sql/hive/src/test/resources/golden/rcfile_merge1-4-2f190c7e30999cbdf1ce62e8f31438f4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-5-c198c437f48c3844d8d0ace881b3107e b/sql/hive/src/test/resources/golden/rcfile_merge1-5-c198c437f48c3844d8d0ace881b3107e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-6-8adb6d3de3839fc1f0bd2598fdcc3d46 b/sql/hive/src/test/resources/golden/rcfile_merge1-6-8adb6d3de3839fc1f0bd2598fdcc3d46
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-7-8c76c8d41133f29366359c308b0c9cc0 b/sql/hive/src/test/resources/golden/rcfile_merge1-7-8c76c8d41133f29366359c308b0c9cc0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-8-ed4ca53754ed08a0fab1d788e3c65d6f b/sql/hive/src/test/resources/golden/rcfile_merge1-8-ed4ca53754ed08a0fab1d788e3c65d6f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-9-780b166bd3fcc932cb217273b0636d63 b/sql/hive/src/test/resources/golden/rcfile_merge1-9-780b166bd3fcc932cb217273b0636d63
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-10-5b4fb8038f522877101a4e429f082f11 b/sql/hive/src/test/resources/golden/rcfile_merge2-10-5b4fb8038f522877101a4e429f082f11
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-4-5b4fb8038f522877101a4e429f082f11 b/sql/hive/src/test/resources/golden/rcfile_merge2-4-5b4fb8038f522877101a4e429f082f11
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-5-78b97f09b49452e054cba2ae461813c3 b/sql/hive/src/test/resources/golden/rcfile_merge2-5-78b97f09b49452e054cba2ae461813c3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-6-d5a91ca1ff9039b2dc55af8735d0249a b/sql/hive/src/test/resources/golden/rcfile_merge2-6-d5a91ca1ff9039b2dc55af8735d0249a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-7-805fc662f9b3b7a587d1043b659e2424 b/sql/hive/src/test/resources/golden/rcfile_merge2-7-805fc662f9b3b7a587d1043b659e2424
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-8-afd119f412a66bdf5cd1c74ae8965685 b/sql/hive/src/test/resources/golden/rcfile_merge2-8-afd119f412a66bdf5cd1c74ae8965685
deleted file mode 100644
index 8d25d618795b9..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_merge2-8-afd119f412a66bdf5cd1c74ae8965685
+++ /dev/null
@@ -1 +0,0 @@
--4208881187
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-9-351c25ee0ca991ca9e0dbafa873b7592 b/sql/hive/src/test/resources/golden/rcfile_merge2-9-351c25ee0ca991ca9e0dbafa873b7592
deleted file mode 100644
index 8d25d618795b9..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_merge2-9-351c25ee0ca991ca9e0dbafa873b7592
+++ /dev/null
@@ -1 +0,0 @@
--4208881187
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-10-1486feb77f13bb9a0ed606fe795ef686 b/sql/hive/src/test/resources/golden/rcfile_merge3-10-1486feb77f13bb9a0ed606fe795ef686
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-11-7674266b00c52a2b6755910ea0793b69 b/sql/hive/src/test/resources/golden/rcfile_merge3-11-7674266b00c52a2b6755910ea0793b69
deleted file mode 100644
index 11ff946b46f0f..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_merge3-11-7674266b00c52a2b6755910ea0793b69
+++ /dev/null
@@ -1 +0,0 @@
-14412220296
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-12-56703d58e54ca8b4fb86c92ffd74cc1f b/sql/hive/src/test/resources/golden/rcfile_merge3-12-56703d58e54ca8b4fb86c92ffd74cc1f
deleted file mode 100644
index 11ff946b46f0f..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_merge3-12-56703d58e54ca8b4fb86c92ffd74cc1f
+++ /dev/null
@@ -1 +0,0 @@
-14412220296
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-13-d57efd24ca36f282a37be5d0bf6452e6 b/sql/hive/src/test/resources/golden/rcfile_merge3-13-d57efd24ca36f282a37be5d0bf6452e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-14-5ccde6b58d36e11d6e3c02a867eced7b b/sql/hive/src/test/resources/golden/rcfile_merge3-14-5ccde6b58d36e11d6e3c02a867eced7b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-3-d57efd24ca36f282a37be5d0bf6452e6 b/sql/hive/src/test/resources/golden/rcfile_merge3-3-d57efd24ca36f282a37be5d0bf6452e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-4-5ccde6b58d36e11d6e3c02a867eced7b b/sql/hive/src/test/resources/golden/rcfile_merge3-4-5ccde6b58d36e11d6e3c02a867eced7b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-5-bb2196ee46a7719cc88dcd6976a6eb38 b/sql/hive/src/test/resources/golden/rcfile_merge3-5-bb2196ee46a7719cc88dcd6976a6eb38
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-6-e4d1c54cf6a802eef3342bec2505f69b b/sql/hive/src/test/resources/golden/rcfile_merge3-6-e4d1c54cf6a802eef3342bec2505f69b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-7-f9661455ce2c3bbe3d9cb4dc4ce0eb98 b/sql/hive/src/test/resources/golden/rcfile_merge3-7-f9661455ce2c3bbe3d9cb4dc4ce0eb98
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-8-4891ccd223f2403e235e2c9d6b5fc18 b/sql/hive/src/test/resources/golden/rcfile_merge3-8-4891ccd223f2403e235e2c9d6b5fc18
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-9-7c38c42af102069e08a85a27a735eb61 b/sql/hive/src/test/resources/golden/rcfile_merge3-9-7c38c42af102069e08a85a27a735eb61
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-10-1486feb77f13bb9a0ed606fe795ef686 b/sql/hive/src/test/resources/golden/rcfile_merge4-10-1486feb77f13bb9a0ed606fe795ef686
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-11-7674266b00c52a2b6755910ea0793b69 b/sql/hive/src/test/resources/golden/rcfile_merge4-11-7674266b00c52a2b6755910ea0793b69
deleted file mode 100644
index 11ff946b46f0f..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_merge4-11-7674266b00c52a2b6755910ea0793b69
+++ /dev/null
@@ -1 +0,0 @@
-14412220296
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-12-56703d58e54ca8b4fb86c92ffd74cc1f b/sql/hive/src/test/resources/golden/rcfile_merge4-12-56703d58e54ca8b4fb86c92ffd74cc1f
deleted file mode 100644
index 11ff946b46f0f..0000000000000
--- a/sql/hive/src/test/resources/golden/rcfile_merge4-12-56703d58e54ca8b4fb86c92ffd74cc1f
+++ /dev/null
@@ -1 +0,0 @@
-14412220296
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-13-d57efd24ca36f282a37be5d0bf6452e6 b/sql/hive/src/test/resources/golden/rcfile_merge4-13-d57efd24ca36f282a37be5d0bf6452e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-14-5ccde6b58d36e11d6e3c02a867eced7b b/sql/hive/src/test/resources/golden/rcfile_merge4-14-5ccde6b58d36e11d6e3c02a867eced7b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-3-d57efd24ca36f282a37be5d0bf6452e6 b/sql/hive/src/test/resources/golden/rcfile_merge4-3-d57efd24ca36f282a37be5d0bf6452e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-4-5ccde6b58d36e11d6e3c02a867eced7b b/sql/hive/src/test/resources/golden/rcfile_merge4-4-5ccde6b58d36e11d6e3c02a867eced7b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-5-18f216bf1ea33debc2576ac85ac1a449 b/sql/hive/src/test/resources/golden/rcfile_merge4-5-18f216bf1ea33debc2576ac85ac1a449
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-6-3e292c7212dd02d7d6000837e09a42f9 b/sql/hive/src/test/resources/golden/rcfile_merge4-6-3e292c7212dd02d7d6000837e09a42f9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-7-f9661455ce2c3bbe3d9cb4dc4ce0eb98 b/sql/hive/src/test/resources/golden/rcfile_merge4-7-f9661455ce2c3bbe3d9cb4dc4ce0eb98
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-8-4891ccd223f2403e235e2c9d6b5fc18 b/sql/hive/src/test/resources/golden/rcfile_merge4-8-4891ccd223f2403e235e2c9d6b5fc18
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-9-7c38c42af102069e08a85a27a735eb61 b/sql/hive/src/test/resources/golden/rcfile_merge4-9-7c38c42af102069e08a85a27a735eb61
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rcfile_null_value-2-e721b8138774fdefca7171d1051841ee b/sql/hive/src/test/resources/golden/rcfile_null_value-2-e721b8138774fdefca7171d1051841ee
index b0135533064c9..2918f17e964c0 100644
--- a/sql/hive/src/test/resources/golden/rcfile_null_value-2-e721b8138774fdefca7171d1051841ee
+++ b/sql/hive/src/test/resources/golden/rcfile_null_value-2-e721b8138774fdefca7171d1051841ee
@@ -22,4 +22,4 @@ NULL	val_193
 406	val_406
 NULL	
 NULL	
-NULL	
\ No newline at end of file
+NULL	
diff --git a/sql/hive/src/test/resources/golden/rcfile_null_value-6-a7a74aeb8c5cac1f2fd2362398a52d2 b/sql/hive/src/test/resources/golden/rcfile_null_value-6-a7a74aeb8c5cac1f2fd2362398a52d2
index 06b2b4d7e6d47..b52cff5c472e4 100644
--- a/sql/hive/src/test/resources/golden/rcfile_null_value-6-a7a74aeb8c5cac1f2fd2362398a52d2
+++ b/sql/hive/src/test/resources/golden/rcfile_null_value-6-a7a74aeb8c5cac1f2fd2362398a52d2
@@ -6,4 +6,4 @@ NULL	NULL	24	val_24
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-1-a9f718201b71fa855fb81a29e1d7bd3b b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-1-a9f718201b71fa855fb81a29e1d7bd3b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-1-a9f718201b71fa855fb81a29e1d7bd3b
+++ b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-1-a9f718201b71fa855fb81a29e1d7bd3b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-2-4a9a9175758ef576804c8b7309b019e8 b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-2-4a9a9175758ef576804c8b7309b019e8
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-2-4a9a9175758ef576804c8b7309b019e8
+++ b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-2-4a9a9175758ef576804c8b7309b019e8
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-3-ea607fbed28d20e5726f4501285d698d b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-3-ea607fbed28d20e5726f4501285d698d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-3-ea607fbed28d20e5726f4501285d698d
+++ b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-3-ea607fbed28d20e5726f4501285d698d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-5-6a3af12e36cec853c876a2cbae61c23a b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-5-6a3af12e36cec853c876a2cbae61c23a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-5-6a3af12e36cec853c876a2cbae61c23a
+++ b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-5-6a3af12e36cec853c876a2cbae61c23a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-6-718032defb864225dd664b1719f3b590 b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-6-718032defb864225dd664b1719f3b590
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-6-718032defb864225dd664b1719f3b590
+++ b/sql/hive/src/test/resources/golden/rcfile_toleratecorruptions-6-718032defb864225dd664b1719f3b590
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/read from uncached table-0-ce3797dc14a603cba2a5e58c8612de5b b/sql/hive/src/test/resources/golden/read from uncached table-0-ce3797dc14a603cba2a5e58c8612de5b
deleted file mode 100644
index 60878ffb77064..0000000000000
--- a/sql/hive/src/test/resources/golden/read from uncached table-0-ce3797dc14a603cba2a5e58c8612de5b	
+++ /dev/null
@@ -1 +0,0 @@
-238	val_238
diff --git a/sql/hive/src/test/resources/golden/recursive_dir-0-fb096f0f4ecc530357ad76ae0353d338 b/sql/hive/src/test/resources/golden/recursive_dir-0-fb096f0f4ecc530357ad76ae0353d338
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/reduce_deduplicate_exclude_join-0-aa047b3a8b40b68b93c4ad11e173c767 b/sql/hive/src/test/resources/golden/reduce_deduplicate_exclude_join-0-aa047b3a8b40b68b93c4ad11e173c767
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/reduce_deduplicate_exclude_join-0-aa047b3a8b40b68b93c4ad11e173c767
+++ b/sql/hive/src/test/resources/golden/reduce_deduplicate_exclude_join-0-aa047b3a8b40b68b93c4ad11e173c767
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/reduce_deduplicate_exclude_join-1-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/reduce_deduplicate_exclude_join-1-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/reduce_deduplicate_exclude_join-1-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/reduce_deduplicate_exclude_join-1-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/regex_col-0-ac78bd83c9aa538c2827598dd6007a69 b/sql/hive/src/test/resources/golden/regex_col-0-ac78bd83c9aa538c2827598dd6007a69
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/regex_col-1-42751bfc3f1e83e7a014db9272d597db b/sql/hive/src/test/resources/golden/regex_col-1-42751bfc3f1e83e7a014db9272d597db
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/regex_col-2-21564f64cdfd46098e1254380490701 b/sql/hive/src/test/resources/golden/regex_col-2-21564f64cdfd46098e1254380490701
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/regex_col-3-f0c653593a7a7b701f0614f3f0a7ba61 b/sql/hive/src/test/resources/golden/regex_col-3-f0c653593a7a7b701f0614f3f0a7ba61
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/regex_col-4-daf9d3ca22b243870a138ba90d1593c4 b/sql/hive/src/test/resources/golden/regex_col-4-daf9d3ca22b243870a138ba90d1593c4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/remote_script-0-4e2e94d649b1792c5bd3fd879349ef92 b/sql/hive/src/test/resources/golden/remote_script-0-4e2e94d649b1792c5bd3fd879349ef92
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/remote_script-0-4e2e94d649b1792c5bd3fd879349ef92
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/remote_script-1-e168f471980470d93b790702a70238fa b/sql/hive/src/test/resources/golden/remote_script-1-e168f471980470d93b790702a70238fa
deleted file mode 100644
index d00491fd7e5bb..0000000000000
--- a/sql/hive/src/test/resources/golden/remote_script-1-e168f471980470d93b790702a70238fa
+++ /dev/null
@@ -1 +0,0 @@
-1
diff --git a/sql/hive/src/test/resources/golden/remote_script-3-4eb54a664e549614d56ca088c8867d b/sql/hive/src/test/resources/golden/remote_script-3-4eb54a664e549614d56ca088c8867d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/rename_column-1-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-1-6a3bbeb3203ce4df35275dccc4c4e37b
index 726e0947d8302..017e14d2ebed4 100644
--- a/sql/hive/src/test/resources/golden/rename_column-1-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-1-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-a                   	int                 	None                
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+a                   	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-11-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-11-6a3bbeb3203ce4df35275dccc4c4e37b
index 500201be8d922..a92663b0674bf 100644
--- a/sql/hive/src/test/resources/golden/rename_column-11-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-11-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-b                   	int                 	None                
+b                   	int                 	                    
 a1                  	int                 	test comment1       
-c                   	int                 	None                
\ No newline at end of file
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-13-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-13-6a3bbeb3203ce4df35275dccc4c4e37b
index 71af16a5fa4d7..899341a881857 100644
--- a/sql/hive/src/test/resources/golden/rename_column-13-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-13-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
 a2                  	int                 	test comment2       
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-15-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-15-6a3bbeb3203ce4df35275dccc4c4e37b
index f5b2e72aeccab..26b38dcc6d855 100644
--- a/sql/hive/src/test/resources/golden/rename_column-15-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-15-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-b                   	int                 	None                
+b                   	int                 	                    
 a                   	int                 	test comment2       
-c                   	int                 	None                
\ No newline at end of file
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-17-9c36cac1372650b703400c60dd29042c b/sql/hive/src/test/resources/golden/rename_column-17-9c36cac1372650b703400c60dd29042c
index ecafeaea5f61a..85c1918f46567 100644
--- a/sql/hive/src/test/resources/golden/rename_column-17-9c36cac1372650b703400c60dd29042c
+++ b/sql/hive/src/test/resources/golden/rename_column-17-9c36cac1372650b703400c60dd29042c
@@ -1,2 +1,2 @@
 src
-srcpart
\ No newline at end of file
+srcpart
diff --git a/sql/hive/src/test/resources/golden/rename_column-21-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-21-6a3bbeb3203ce4df35275dccc4c4e37b
index 726e0947d8302..017e14d2ebed4 100644
--- a/sql/hive/src/test/resources/golden/rename_column-21-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-21-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-a                   	int                 	None                
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+a                   	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-23-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-23-6a3bbeb3203ce4df35275dccc4c4e37b
index 17127eaec9755..2fbb615dd5994 100644
--- a/sql/hive/src/test/resources/golden/rename_column-23-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-23-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-a                   	string              	None                
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+a                   	string              	                    
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-25-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-25-6a3bbeb3203ce4df35275dccc4c4e37b
index bfcefe4d18046..173fbad7b1eb3 100644
--- a/sql/hive/src/test/resources/golden/rename_column-25-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-25-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-a1                  	int                 	None                
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+a1                  	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-27-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-27-6a3bbeb3203ce4df35275dccc4c4e37b
index c436c39a16b8a..bad9feb96a886 100644
--- a/sql/hive/src/test/resources/golden/rename_column-27-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-27-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-a2                  	int                 	None                
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+a2                  	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-29-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-29-6a3bbeb3203ce4df35275dccc4c4e37b
index bb1507e7488f5..4f23db53afff2 100644
--- a/sql/hive/src/test/resources/golden/rename_column-29-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-29-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-b                   	int                 	None                
-a                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+b                   	int                 	                    
+a                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-3-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-3-6a3bbeb3203ce4df35275dccc4c4e37b
index 17127eaec9755..2fbb615dd5994 100644
--- a/sql/hive/src/test/resources/golden/rename_column-3-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-3-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-a                   	string              	None                
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+a                   	string              	                    
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-31-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-31-6a3bbeb3203ce4df35275dccc4c4e37b
index 500201be8d922..a92663b0674bf 100644
--- a/sql/hive/src/test/resources/golden/rename_column-31-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-31-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-b                   	int                 	None                
+b                   	int                 	                    
 a1                  	int                 	test comment1       
-c                   	int                 	None                
\ No newline at end of file
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-33-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-33-6a3bbeb3203ce4df35275dccc4c4e37b
index 71af16a5fa4d7..899341a881857 100644
--- a/sql/hive/src/test/resources/golden/rename_column-33-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-33-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
 a2                  	int                 	test comment2       
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-35-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-35-6a3bbeb3203ce4df35275dccc4c4e37b
index f5b2e72aeccab..26b38dcc6d855 100644
--- a/sql/hive/src/test/resources/golden/rename_column-35-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-35-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-b                   	int                 	None                
+b                   	int                 	                    
 a                   	int                 	test comment2       
-c                   	int                 	None                
\ No newline at end of file
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-5-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-5-6a3bbeb3203ce4df35275dccc4c4e37b
index bfcefe4d18046..173fbad7b1eb3 100644
--- a/sql/hive/src/test/resources/golden/rename_column-5-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-5-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-a1                  	int                 	None                
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+a1                  	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-7-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-7-6a3bbeb3203ce4df35275dccc4c4e37b
index c436c39a16b8a..bad9feb96a886 100644
--- a/sql/hive/src/test/resources/golden/rename_column-7-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-7-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-a2                  	int                 	None                
-b                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+a2                  	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_column-9-6a3bbeb3203ce4df35275dccc4c4e37b b/sql/hive/src/test/resources/golden/rename_column-9-6a3bbeb3203ce4df35275dccc4c4e37b
index bb1507e7488f5..4f23db53afff2 100644
--- a/sql/hive/src/test/resources/golden/rename_column-9-6a3bbeb3203ce4df35275dccc4c4e37b
+++ b/sql/hive/src/test/resources/golden/rename_column-9-6a3bbeb3203ce4df35275dccc4c4e37b
@@ -1,3 +1,3 @@
-b                   	int                 	None                
-a                   	int                 	None                
-c                   	int                 	None                
\ No newline at end of file
+b                   	int                 	                    
+a                   	int                 	                    
+c                   	int                 	                    
diff --git a/sql/hive/src/test/resources/golden/rename_external_partition_location-0-5c73d46fb91e9d4b3dc916622df09290 b/sql/hive/src/test/resources/golden/rename_external_partition_location-0-5c73d46fb91e9d4b3dc916622df09290
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/rename_external_partition_location-0-5c73d46fb91e9d4b3dc916622df09290
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/router_join_ppr-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/router_join_ppr-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/router_join_ppr-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/router_join_ppr-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/router_join_ppr-2-49b8b038ed8f5513405071c532967c47 b/sql/hive/src/test/resources/golden/router_join_ppr-2-49b8b038ed8f5513405071c532967c47
index 3a57720041fb3..d091388cd5e19 100644
--- a/sql/hive/src/test/resources/golden/router_join_ppr-2-49b8b038ed8f5513405071c532967c47
+++ b/sql/hive/src/test/resources/golden/router_join_ppr-2-49b8b038ed8f5513405071c532967c47
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/router_join_ppr-4-baaf33e5c11c65e1aa2b6be37de6eef6 b/sql/hive/src/test/resources/golden/router_join_ppr-4-baaf33e5c11c65e1aa2b6be37de6eef6
index 19492fd335bcb..a86e9c5af723e 100644
--- a/sql/hive/src/test/resources/golden/router_join_ppr-4-baaf33e5c11c65e1aa2b6be37de6eef6
+++ b/sql/hive/src/test/resources/golden/router_join_ppr-4-baaf33e5c11c65e1aa2b6be37de6eef6
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/router_join_ppr-6-2edf8e4ca99543089fdacbf506caa94f b/sql/hive/src/test/resources/golden/router_join_ppr-6-2edf8e4ca99543089fdacbf506caa94f
index 3a57720041fb3..d091388cd5e19 100644
--- a/sql/hive/src/test/resources/golden/router_join_ppr-6-2edf8e4ca99543089fdacbf506caa94f
+++ b/sql/hive/src/test/resources/golden/router_join_ppr-6-2edf8e4ca99543089fdacbf506caa94f
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/router_join_ppr-8-b07ad21f47a74162f438abf579675f8e b/sql/hive/src/test/resources/golden/router_join_ppr-8-b07ad21f47a74162f438abf579675f8e
index 19492fd335bcb..a86e9c5af723e 100644
--- a/sql/hive/src/test/resources/golden/router_join_ppr-8-b07ad21f47a74162f438abf579675f8e
+++ b/sql/hive/src/test/resources/golden/router_join_ppr-8-b07ad21f47a74162f438abf579675f8e
@@ -9,4 +9,4 @@
 18	val_18	18	val_18
 18	val_18	18	val_18
 18	val_18	18	val_18
-18	val_18	18	val_18
\ No newline at end of file
+18	val_18	18	val_18
diff --git a/sql/hive/src/test/resources/golden/sample2-0-13ab74a58da514fe01dbeda0c3e79883 b/sql/hive/src/test/resources/golden/sample2-0-13ab74a58da514fe01dbeda0c3e79883
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample2-1-a1d8184eab25b242a961533cc016efd1 b/sql/hive/src/test/resources/golden/sample2-1-a1d8184eab25b242a961533cc016efd1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-1-2b1df88619e34f221d39598b5cd73283 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-1-2b1df88619e34f221d39598b5cd73283
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-1-2b1df88619e34f221d39598b5cd73283
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-10-60eadbb52f8857830a3034952c631ace b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-10-60eadbb52f8857830a3034952c631ace
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-11-dbe79f90862dc5c6cc4a4fa4b4b6c655 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-11-dbe79f90862dc5c6cc4a4fa4b4b6c655
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-12-60018cae9a0476dc6a0ab4264310edb5 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-12-60018cae9a0476dc6a0ab4264310edb5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-2-7562d4fee13f3ba935a2e824f86a4224 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-2-7562d4fee13f3ba935a2e824f86a4224
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-2-7562d4fee13f3ba935a2e824f86a4224
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-3-bdb30a5d6887ee4fb089f8676313eafd b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-3-bdb30a5d6887ee4fb089f8676313eafd
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-3-bdb30a5d6887ee4fb089f8676313eafd
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-4-10713b30ecb3c88acdd775bf9628c38c b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-4-10713b30ecb3c88acdd775bf9628c38c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-4-10713b30ecb3c88acdd775bf9628c38c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-5-bab89dfffa77258e34a595e0e79986e3 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-5-bab89dfffa77258e34a595e0e79986e3
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-5-bab89dfffa77258e34a595e0e79986e3
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-6-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-6-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-6-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-7-ad4ddb5c5d6b994f4dba35f6162b6a9f b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-7-ad4ddb5c5d6b994f4dba35f6162b6a9f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-8-f9dd797f1c90e2108cfee585f443c132 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-8-f9dd797f1c90e2108cfee585f443c132
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-9-22fdd8380f2652de2492b34a425d46d7 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook-9-22fdd8380f2652de2492b34a425d46d7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-0-7a9e67189d3d4151f23b12c22bde06b5 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-0-7a9e67189d3d4151f23b12c22bde06b5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-1-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-1-86a409d8b868dc5f1a3bd1e04c2bc28c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-1-86a409d8b868dc5f1a3bd1e04c2bc28c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-10-22fdd8380f2652de2492b34a425d46d7 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-10-22fdd8380f2652de2492b34a425d46d7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-11-60eadbb52f8857830a3034952c631ace b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-11-60eadbb52f8857830a3034952c631ace
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-12-dbe79f90862dc5c6cc4a4fa4b4b6c655 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-12-dbe79f90862dc5c6cc4a4fa4b4b6c655
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-13-60018cae9a0476dc6a0ab4264310edb5 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-13-60018cae9a0476dc6a0ab4264310edb5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-2-2b1df88619e34f221d39598b5cd73283 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-2-2b1df88619e34f221d39598b5cd73283
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-2-2b1df88619e34f221d39598b5cd73283
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-3-7562d4fee13f3ba935a2e824f86a4224 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-3-7562d4fee13f3ba935a2e824f86a4224
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-3-7562d4fee13f3ba935a2e824f86a4224
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-4-bdb30a5d6887ee4fb089f8676313eafd b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-4-bdb30a5d6887ee4fb089f8676313eafd
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-4-bdb30a5d6887ee4fb089f8676313eafd
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-5-10713b30ecb3c88acdd775bf9628c38c b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-5-10713b30ecb3c88acdd775bf9628c38c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-5-10713b30ecb3c88acdd775bf9628c38c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-6-bab89dfffa77258e34a595e0e79986e3 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-6-bab89dfffa77258e34a595e0e79986e3
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-6-bab89dfffa77258e34a595e0e79986e3
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-7-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-7-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-7-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-8-7a45282169e5a15d70ae0afb9e67ec9a b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-8-7a45282169e5a15d70ae0afb9e67ec9a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-9-f9dd797f1c90e2108cfee585f443c132 b/sql/hive/src/test/resources/golden/sample_islocalmode_hook_hadoop20-9-f9dd797f1c90e2108cfee585f443c132
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/create table as with db name-1-417609d2bb67ba26de38e92ad834008f b/sql/hive/src/test/resources/golden/schema-less transform-0-d5738de14dd6e29da712ec3318f4118f
similarity index 100%
rename from sql/hive/src/test/resources/golden/create table as with db name-1-417609d2bb67ba26de38e92ad834008f
rename to sql/hive/src/test/resources/golden/schema-less transform-0-d5738de14dd6e29da712ec3318f4118f
diff --git a/sql/hive/src/test/resources/golden/schema-less transform-1-49624ef4e2c3cc2040c06660b926219b b/sql/hive/src/test/resources/golden/schema-less transform-1-49624ef4e2c3cc2040c06660b926219b
new file mode 100644
index 0000000000000..7aae61e5eb82f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/schema-less transform-1-49624ef4e2c3cc2040c06660b926219b	
@@ -0,0 +1,500 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+302	val_302
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/script_env_var1-0-16015162957e1d8e0ef586b44b276f64 b/sql/hive/src/test/resources/golden/script_env_var1-0-16015162957e1d8e0ef586b44b276f64
deleted file mode 100644
index 0bc999a3e6aa2..0000000000000
--- a/sql/hive/src/test/resources/golden/script_env_var1-0-16015162957e1d8e0ef586b44b276f64
+++ /dev/null
@@ -1,2 +0,0 @@
-1
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/script_env_var2-0-e5c4893b2ff412f9df0632328d404cef b/sql/hive/src/test/resources/golden/script_env_var2-0-e5c4893b2ff412f9df0632328d404cef
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/script_env_var2-0-e5c4893b2ff412f9df0632328d404cef
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/script_env_var2-1-81cb3e18ab89c533a253defff35e90f0 b/sql/hive/src/test/resources/golden/script_env_var2-1-81cb3e18ab89c533a253defff35e90f0
deleted file mode 100644
index 0bc999a3e6aa2..0000000000000
--- a/sql/hive/src/test/resources/golden/script_env_var2-1-81cb3e18ab89c533a253defff35e90f0
+++ /dev/null
@@ -1,2 +0,0 @@
-1
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/script_pipe-0-415536ae9ab41c3997f3f31cd52bcbb9 b/sql/hive/src/test/resources/golden/script_pipe-0-415536ae9ab41c3997f3f31cd52bcbb9
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/script_pipe-0-415536ae9ab41c3997f3f31cd52bcbb9
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/script_pipe-1-a6827d31c2c855e36bd51a21a16aecf0 b/sql/hive/src/test/resources/golden/script_pipe-1-a6827d31c2c855e36bd51a21a16aecf0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/script_pipe-2-3bf368261963ee10883e97c7746796b5 b/sql/hive/src/test/resources/golden/script_pipe-2-3bf368261963ee10883e97c7746796b5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/script_pipe-3-afe5db164ccf986c5badd0655e009ea1 b/sql/hive/src/test/resources/golden/script_pipe-3-afe5db164ccf986c5badd0655e009ea1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/script_pipe-4-7fe60c2fcca928a497252d99436b513f b/sql/hive/src/test/resources/golden/script_pipe-4-7fe60c2fcca928a497252d99436b513f
deleted file mode 100644
index 9cf5170e82d7d..0000000000000
--- a/sql/hive/src/test/resources/golden/script_pipe-4-7fe60c2fcca928a497252d99436b513f
+++ /dev/null
@@ -1 +0,0 @@
-238	val_238	238	val_238
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/scriptfile1-0-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/scriptfile1-0-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/scriptfile1-0-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/scriptfile1-1-89cf592f66b7276add70eb2c7689aa34 b/sql/hive/src/test/resources/golden/scriptfile1-1-89cf592f66b7276add70eb2c7689aa34
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/scriptfile1-2-b08adb4c792bd84b426a7f0bb9e835a5 b/sql/hive/src/test/resources/golden/scriptfile1-2-b08adb4c792bd84b426a7f0bb9e835a5
deleted file mode 100644
index d00491fd7e5bb..0000000000000
--- a/sql/hive/src/test/resources/golden/scriptfile1-2-b08adb4c792bd84b426a7f0bb9e835a5
+++ /dev/null
@@ -1 +0,0 @@
-1
diff --git a/sql/hive/src/test/resources/golden/select from thrift based table-0-304c4992f5517febd10f43c57df4da49 b/sql/hive/src/test/resources/golden/select from thrift based table-0-304c4992f5517febd10f43c57df4da49
new file mode 100644
index 0000000000000..a81afd7b04319
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/select from thrift based table-0-304c4992f5517febd10f43c57df4da49	
@@ -0,0 +1,11 @@
+1712634731	record_0	[0,0,0]	["0","0","0"]	[{"myint":0,"mystring":"0","underscore_int":0}]	{"key_0":"value_0"}
+465985200	record_1	[1,2,3]	["10","100","1000"]	[{"myint":1,"mystring":"1","underscore_int":1}]	{"key_1":"value_1"}
+-751827638	record_2	[2,4,6]	["20","200","2000"]	[{"myint":4,"mystring":"8","underscore_int":2}]	{"key_2":"value_2"}
+477111222	record_3	[3,6,9]	["30","300","3000"]	[{"myint":9,"mystring":"27","underscore_int":3}]	{"key_3":"value_3"}
+-734328909	record_4	[4,8,12]	["40","400","4000"]	[{"myint":16,"mystring":"64","underscore_int":4}]	{"key_4":"value_4"}
+-1952710710	record_5	[5,10,15]	["50","500","5000"]	[{"myint":25,"mystring":"125","underscore_int":5}]	{"key_5":"value_5"}
+1244525190	record_6	[6,12,18]	["60","600","6000"]	[{"myint":36,"mystring":"216","underscore_int":6}]	{"key_6":"value_6"}
+-1461153973	record_7	[7,14,21]	["70","700","7000"]	[{"myint":49,"mystring":"343","underscore_int":7}]	{"key_7":"value_7"}
+1638581578	record_8	[8,16,24]	["80","800","8000"]	[{"myint":64,"mystring":"512","underscore_int":8}]	{"key_8":"value_8"}
+336964413	record_9	[9,18,27]	["90","900","9000"]	[{"myint":81,"mystring":"729","underscore_int":9}]	{"key_9":"value_9"}
+0	NULL	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/select_as_omitted-1-99d8c7e0fa02f47d19df6a0d7dabe145 b/sql/hive/src/test/resources/golden/select_as_omitted-1-99d8c7e0fa02f47d19df6a0d7dabe145
index 5f4de85940513..016f64cc26f2a 100644
--- a/sql/hive/src/test/resources/golden/select_as_omitted-1-99d8c7e0fa02f47d19df6a0d7dabe145
+++ b/sql/hive/src/test/resources/golden/select_as_omitted-1-99d8c7e0fa02f47d19df6a0d7dabe145
@@ -1 +1 @@
-0	val_0
\ No newline at end of file
+0	val_0
diff --git a/sql/hive/src/test/resources/golden/select_unquote_and-3-683007138a712792041ef3c8b84e914e b/sql/hive/src/test/resources/golden/select_unquote_and-3-683007138a712792041ef3c8b84e914e
index f35d9602e1a7b..f9aaa4d565f4a 100644
--- a/sql/hive/src/test/resources/golden/select_unquote_and-3-683007138a712792041ef3c8b84e914e
+++ b/sql/hive/src/test/resources/golden/select_unquote_and-3-683007138a712792041ef3c8b84e914e
@@ -1 +1 @@
-498
\ No newline at end of file
+498
diff --git a/sql/hive/src/test/resources/golden/select_unquote_and-5-1a5a68a098bfb9c93b76a458a9faf232 b/sql/hive/src/test/resources/golden/select_unquote_and-5-1a5a68a098bfb9c93b76a458a9faf232
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/select_unquote_and-5-1a5a68a098bfb9c93b76a458a9faf232
+++ b/sql/hive/src/test/resources/golden/select_unquote_and-5-1a5a68a098bfb9c93b76a458a9faf232
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/select_unquote_not-3-683007138a712792041ef3c8b84e914e b/sql/hive/src/test/resources/golden/select_unquote_not-3-683007138a712792041ef3c8b84e914e
index f35d9602e1a7b..f9aaa4d565f4a 100644
--- a/sql/hive/src/test/resources/golden/select_unquote_not-3-683007138a712792041ef3c8b84e914e
+++ b/sql/hive/src/test/resources/golden/select_unquote_not-3-683007138a712792041ef3c8b84e914e
@@ -1 +1 @@
-498
\ No newline at end of file
+498
diff --git a/sql/hive/src/test/resources/golden/select_unquote_not-5-ceada0d3fa65bb2ec65154e797332cde b/sql/hive/src/test/resources/golden/select_unquote_not-5-ceada0d3fa65bb2ec65154e797332cde
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/select_unquote_not-5-ceada0d3fa65bb2ec65154e797332cde
+++ b/sql/hive/src/test/resources/golden/select_unquote_not-5-ceada0d3fa65bb2ec65154e797332cde
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/select_unquote_or-3-683007138a712792041ef3c8b84e914e b/sql/hive/src/test/resources/golden/select_unquote_or-3-683007138a712792041ef3c8b84e914e
index f35d9602e1a7b..f9aaa4d565f4a 100644
--- a/sql/hive/src/test/resources/golden/select_unquote_or-3-683007138a712792041ef3c8b84e914e
+++ b/sql/hive/src/test/resources/golden/select_unquote_or-3-683007138a712792041ef3c8b84e914e
@@ -1 +1 @@
-498
\ No newline at end of file
+498
diff --git a/sql/hive/src/test/resources/golden/select_unquote_or-5-55cd874f705673f9de6ec8e3643c760f b/sql/hive/src/test/resources/golden/select_unquote_or-5-55cd874f705673f9de6ec8e3643c760f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/select_unquote_or-5-55cd874f705673f9de6ec8e3643c760f
+++ b/sql/hive/src/test/resources/golden/select_unquote_or-5-55cd874f705673f9de6ec8e3643c760f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/serde_regex-0-60462d14b99bb445b02800e9fb22760e b/sql/hive/src/test/resources/golden/serde_regex-0-60462d14b99bb445b02800e9fb22760e
index d00ee7786a57c..e69de29bb2d1d 100644
--- a/sql/hive/src/test/resources/golden/serde_regex-0-60462d14b99bb445b02800e9fb22760e
+++ b/sql/hive/src/test/resources/golden/serde_regex-0-60462d14b99bb445b02800e9fb22760e
@@ -1,22 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATETABLE (TOK_TABNAME serde_regex) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL host TOK_STRING) (TOK_TABCOL identity TOK_STRING) (TOK_TABCOL user TOK_STRING) (TOK_TABCOL time TOK_STRING) (TOK_TABCOL request TOK_STRING) (TOK_TABCOL status TOK_STRING) (TOK_TABCOL size TOK_INT) (TOK_TABCOL referer TOK_STRING) (TOK_TABCOL agent TOK_STRING)) (TOK_TABLESERIALIZER (TOK_SERDENAME 'org.apache.hadoop.hive.serde2.RegexSerDe' (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY "input.regex" "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))?"))))) TOK_TBLTEXTFILE)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Create Table Operator:
-        Create Table
-          columns: host string, identity string, user string, time string, request string, status string, size int, referer string, agent string
-          if not exists: false
-          input format: org.apache.hadoop.mapred.TextInputFormat
-          # buckets: -1
-          output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
-          serde name: org.apache.hadoop.hive.serde2.RegexSerDe
-          serde properties:
-            input.regex ([^ ]*) ([^ ]*) ([^ ]*) (-|\[[^\]]*\]) ([^ "]*|"[^"]*") (-|[0-9]*) (-|[0-9]*)(?: ([^ "]*|"[^"]*") ([^ "]*|"[^"]*"))?
-          name: serde_regex
-          isExternal: false
-
-
diff --git a/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429 b/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429
index 3e290231c27e2..93cdc5c85645c 100644
--- a/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429
+++ b/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429
@@ -1,5 +1,4 @@
 NULL	0
-NULL	0
 -1234567890.123456789	-1234567890
 -4400	4400
 -1255.49	-1255
@@ -11,6 +10,7 @@ NULL	0
 -0.3	0
 0	0
 0	0
+0	0
 0.01	0
 0.02	0
 0.1	0
@@ -18,7 +18,7 @@ NULL	0
 0.3	0
 0.33	0
 0.333	0
-0.9999999999999999999999999	1
+1	1
 1	1
 1	1
 1.12	1
diff --git a/sql/hive/src/test/resources/golden/serde_regex-2-9d00484beaee46cf72b154a1351aeee9 b/sql/hive/src/test/resources/golden/serde_regex-2-9d00484beaee46cf72b154a1351aeee9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-30-d9ba55c20c3f5df262e81cbf5dab5387 b/sql/hive/src/test/resources/golden/serde_regex-2-e84d30fcc6cf11e82b54ea63e7d1d611
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-30-d9ba55c20c3f5df262e81cbf5dab5387
rename to sql/hive/src/test/resources/golden/serde_regex-2-e84d30fcc6cf11e82b54ea63e7d1d611
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-32-c88ee457dee7bb141a198a2ae39d787f b/sql/hive/src/test/resources/golden/serde_regex-3-3ee9e78ff563d6b48741a41885f92c81
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-32-c88ee457dee7bb141a198a2ae39d787f
rename to sql/hive/src/test/resources/golden/serde_regex-3-3ee9e78ff563d6b48741a41885f92c81
diff --git a/sql/hive/src/test/resources/golden/serde_regex-3-817190d8871b70611483cd2abe2e55dc b/sql/hive/src/test/resources/golden/serde_regex-3-817190d8871b70611483cd2abe2e55dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/serde_regex-7-4db287576a17c0963219ca63ee0b20e0 b/sql/hive/src/test/resources/golden/serde_regex-7-4db287576a17c0963219ca63ee0b20e0
deleted file mode 100644
index da61769c6599d..0000000000000
--- a/sql/hive/src/test/resources/golden/serde_regex-7-4db287576a17c0963219ca63ee0b20e0
+++ /dev/null
@@ -1,22 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATETABLE (TOK_TABNAME serde_regex1) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL key TOK_DECIMAL) (TOK_TABCOL value TOK_INT)) (TOK_TABLESERIALIZER (TOK_SERDENAME 'org.apache.hadoop.hive.serde2.RegexSerDe' (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY "input.regex" "([^ ]*) ([^ ]*)"))))) TOK_TBLTEXTFILE)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Create Table Operator:
-        Create Table
-          columns: key decimal, value int
-          if not exists: false
-          input format: org.apache.hadoop.mapred.TextInputFormat
-          # buckets: -1
-          output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
-          serde name: org.apache.hadoop.hive.serde2.RegexSerDe
-          serde properties:
-            input.regex ([^ ]*) ([^ ]*)
-          name: serde_regex1
-          isExternal: false
-
-
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-33-c04167e880fe3e942ce77e75d660f1ef b/sql/hive/src/test/resources/golden/serde_regex-7-bf456bcf6be7334488424dfeadf27d75
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-33-c04167e880fe3e942ce77e75d660f1ef
rename to sql/hive/src/test/resources/golden/serde_regex-7-bf456bcf6be7334488424dfeadf27d75
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-35-deb3f1793f51d1edf011a8405abf4968 b/sql/hive/src/test/resources/golden/serde_regex-8-a4cf34af32b83e40e5c8b6d083938b54
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-35-deb3f1793f51d1edf011a8405abf4968
rename to sql/hive/src/test/resources/golden/serde_regex-8-a4cf34af32b83e40e5c8b6d083938b54
diff --git a/sql/hive/src/test/resources/golden/serde_regex-8-c429ee76b751e674992f61a29c95af77 b/sql/hive/src/test/resources/golden/serde_regex-8-c429ee76b751e674992f61a29c95af77
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/serde_regex-9-f0e8d394ad18dcbd381792fe9bd8894b b/sql/hive/src/test/resources/golden/serde_regex-9-f0e8d394ad18dcbd381792fe9bd8894b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-36-7871722f392f801a868e0e2fb372c610 b/sql/hive/src/test/resources/golden/serde_regex-9-f1175f3322abec6f258dd49a5905bce0
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-36-7871722f392f801a868e0e2fb372c610
rename to sql/hive/src/test/resources/golden/serde_regex-9-f1175f3322abec6f258dd49a5905bce0
diff --git a/sql/hive/src/test/resources/golden/serde_reported_schema-1-e8c6de8cd50be582d78c9a8244cd92a1 b/sql/hive/src/test/resources/golden/serde_reported_schema-1-e8c6de8cd50be582d78c9a8244cd92a1
index d1168556e09d4..b1663e9a8c00d 100644
--- a/sql/hive/src/test/resources/golden/serde_reported_schema-1-e8c6de8cd50be582d78c9a8244cd92a1
+++ b/sql/hive/src/test/resources/golden/serde_reported_schema-1-e8c6de8cd50be582d78c9a8244cd92a1
@@ -1,11 +1,11 @@
 myint               	int                 	from deserializer   
 mystring            	string              	from deserializer   
 underscore_int      	int                 	from deserializer   
-b                   	string              	None                
+b                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-b                   	string              	None                
+b                   	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:int_string, dbName:default, owner:marmbrus, createTime:1389733035, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4281266679489657486/int_string, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer, parameters:{serialization.class=org.apache.hadoop.hive.serde2.thrift.test.IntString, serialization.format=org.apache.thrift.protocol.TBinaryProtocol}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:b, type:string, comment:null)], parameters:{transient_lastDdlTime=1389733035}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
\ No newline at end of file
+Detailed Table Information	Table(tableName:int_string, dbName:default, owner:marmbrus, createTime:1413891326, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/int_string, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer, parameters:{serialization.class=org.apache.hadoop.hive.serde2.thrift.test.IntString, serialization.format=org.apache.thrift.protocol.TBinaryProtocol}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:b, type:string, comment:null)], parameters:{transient_lastDdlTime=1413891326}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/serde_reported_schema-3-738e1d72a19c3db37ded87ca2fb148fa b/sql/hive/src/test/resources/golden/serde_reported_schema-3-738e1d72a19c3db37ded87ca2fb148fa
index 0b8f428b24193..f5ec7a9aca8f3 100644
--- a/sql/hive/src/test/resources/golden/serde_reported_schema-3-738e1d72a19c3db37ded87ca2fb148fa
+++ b/sql/hive/src/test/resources/golden/serde_reported_schema-3-738e1d72a19c3db37ded87ca2fb148fa
@@ -1,11 +1,11 @@
 myint               	int                 	from deserializer   
 mystring            	string              	from deserializer   
 underscore_int      	int                 	from deserializer   
-b                   	string              	None                
+b                   	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-b                   	string              	None                
+b                   	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[part1], dbName:default, tableName:int_string, createTime:1389733036, lastAccessTime:0, sd:StorageDescriptor(cols:[], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4281266679489657486/int_string/b=part1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer, parameters:{serialization.class=org.apache.hadoop.hive.serde2.thrift.test.IntString, serialization.format=org.apache.thrift.protocol.TBinaryProtocol}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1389733036})	
\ No newline at end of file
+Detailed Partition Information	Partition(values:[part1], dbName:default, tableName:int_string, createTime:1413891326, lastAccessTime:0, sd:StorageDescriptor(cols:[], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/int_string/b=part1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer, parameters:{serialization.class=org.apache.hadoop.hive.serde2.thrift.test.IntString, serialization.format=org.apache.thrift.protocol.TBinaryProtocol}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{transient_lastDdlTime=1413891326})	
diff --git a/sql/hive/src/test/resources/golden/set_variable_sub-0-b0b2ec9de0599f8dc2eb160299a2699d b/sql/hive/src/test/resources/golden/set_variable_sub-0-b0b2ec9de0599f8dc2eb160299a2699d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/set_variable_sub-0-b0b2ec9de0599f8dc2eb160299a2699d
+++ b/sql/hive/src/test/resources/golden/set_variable_sub-0-b0b2ec9de0599f8dc2eb160299a2699d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/set_variable_sub-10-bf5d1e710ce0974a67b4084aaa868f67 b/sql/hive/src/test/resources/golden/set_variable_sub-10-bf5d1e710ce0974a67b4084aaa868f67
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/set_variable_sub-10-bf5d1e710ce0974a67b4084aaa868f67
+++ b/sql/hive/src/test/resources/golden/set_variable_sub-10-bf5d1e710ce0974a67b4084aaa868f67
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/set_variable_sub-3-266170978f88a14c20c3944bfb55f5c7 b/sql/hive/src/test/resources/golden/set_variable_sub-3-266170978f88a14c20c3944bfb55f5c7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/set_variable_sub-3-266170978f88a14c20c3944bfb55f5c7
+++ b/sql/hive/src/test/resources/golden/set_variable_sub-3-266170978f88a14c20c3944bfb55f5c7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/set_variable_sub-4-3839b7ac878ed6bdd4f9d242f965ded7 b/sql/hive/src/test/resources/golden/set_variable_sub-4-3839b7ac878ed6bdd4f9d242f965ded7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/set_variable_sub-4-3839b7ac878ed6bdd4f9d242f965ded7
+++ b/sql/hive/src/test/resources/golden/set_variable_sub-4-3839b7ac878ed6bdd4f9d242f965ded7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/set_variable_sub-5-ee3a31bb9bb742f90daea98b290c34cd b/sql/hive/src/test/resources/golden/set_variable_sub-5-ee3a31bb9bb742f90daea98b290c34cd
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/set_variable_sub-5-ee3a31bb9bb742f90daea98b290c34cd
+++ b/sql/hive/src/test/resources/golden/set_variable_sub-5-ee3a31bb9bb742f90daea98b290c34cd
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/set_variable_sub-7-e25bdc67a6d9ea3f5d65676a92d1217b b/sql/hive/src/test/resources/golden/set_variable_sub-7-e25bdc67a6d9ea3f5d65676a92d1217b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/set_variable_sub-7-e25bdc67a6d9ea3f5d65676a92d1217b
+++ b/sql/hive/src/test/resources/golden/set_variable_sub-7-e25bdc67a6d9ea3f5d65676a92d1217b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/set_variable_sub-8-f321516e93eb0206453465a9b85cf67d b/sql/hive/src/test/resources/golden/set_variable_sub-8-f321516e93eb0206453465a9b85cf67d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/set_variable_sub-8-f321516e93eb0206453465a9b85cf67d
+++ b/sql/hive/src/test/resources/golden/set_variable_sub-8-f321516e93eb0206453465a9b85cf67d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/set_variable_sub-9-fbb54d457caeaafce723856429bbc0b2 b/sql/hive/src/test/resources/golden/set_variable_sub-9-fbb54d457caeaafce723856429bbc0b2
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/set_variable_sub-9-fbb54d457caeaafce723856429bbc0b2
+++ b/sql/hive/src/test/resources/golden/set_variable_sub-9-fbb54d457caeaafce723856429bbc0b2
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/show_create_table_alter-1-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_alter-1-2a91d52719cf4552ebeb867204552a26
index 3c1fc128bedce..69c7709aa90cb 100644
--- a/sql/hive/src/test/resources/golden/show_create_table_alter-1-2a91d52719cf4552ebeb867204552a26
+++ b/sql/hive/src/test/resources/golden/show_create_table_alter-1-2a91d52719cf4552ebeb867204552a26
@@ -1,6 +1,6 @@
-CREATE EXTERNAL TABLE tmp_showcrt1(
-  key smallint, 
-  value float)
+CREATE EXTERNAL TABLE `tmp_showcrt1`(
+  `key` smallint, 
+  `value` float)
 CLUSTERED BY ( 
   key) 
 SORTED BY ( 
@@ -13,6 +13,6 @@ STORED AS INPUTFORMAT
 OUTPUTFORMAT 
   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
 LOCATION
-  'file:/tmp/sparkHiveWarehouse1280221975983654134/tmp_showcrt1'
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_showcrt1'
 TBLPROPERTIES (
-  'transient_lastDdlTime'='1407132100')
+  'transient_lastDdlTime'='1413891329')
diff --git a/sql/hive/src/test/resources/golden/show_create_table_alter-3-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_alter-3-2a91d52719cf4552ebeb867204552a26
index 2ece813dd7d56..501bb6ab32f25 100644
--- a/sql/hive/src/test/resources/golden/show_create_table_alter-3-2a91d52719cf4552ebeb867204552a26
+++ b/sql/hive/src/test/resources/golden/show_create_table_alter-3-2a91d52719cf4552ebeb867204552a26
@@ -1,6 +1,6 @@
-CREATE  TABLE tmp_showcrt1(
-  key smallint, 
-  value float)
+CREATE  TABLE `tmp_showcrt1`(
+  `key` smallint, 
+  `value` float)
 COMMENT 'temporary table'
 CLUSTERED BY ( 
   key) 
@@ -14,9 +14,14 @@ STORED AS INPUTFORMAT
 OUTPUTFORMAT 
   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
 LOCATION
-  'file:/tmp/sparkHiveWarehouse1280221975983654134/tmp_showcrt1'
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_showcrt1'
 TBLPROPERTIES (
+  'numFiles'='0', 
   'EXTERNAL'='FALSE', 
-  'last_modified_by'='tianyi', 
-  'last_modified_time'='1407132100', 
-  'transient_lastDdlTime'='1407132100')
+  'last_modified_by'='marmbrus', 
+  'last_modified_time'='1413891329', 
+  'transient_lastDdlTime'='1413891329', 
+  'COLUMN_STATS_ACCURATE'='false', 
+  'totalSize'='0', 
+  'numRows'='-1', 
+  'rawDataSize'='-1')
diff --git a/sql/hive/src/test/resources/golden/show_create_table_alter-5-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_alter-5-2a91d52719cf4552ebeb867204552a26
index 2af657bd29506..6e353675b5ed8 100644
--- a/sql/hive/src/test/resources/golden/show_create_table_alter-5-2a91d52719cf4552ebeb867204552a26
+++ b/sql/hive/src/test/resources/golden/show_create_table_alter-5-2a91d52719cf4552ebeb867204552a26
@@ -1,6 +1,6 @@
-CREATE EXTERNAL TABLE tmp_showcrt1(
-  key smallint, 
-  value float)
+CREATE EXTERNAL TABLE `tmp_showcrt1`(
+  `key` smallint, 
+  `value` float)
 COMMENT 'changed comment'
 CLUSTERED BY ( 
   key) 
@@ -14,8 +14,13 @@ STORED AS INPUTFORMAT
 OUTPUTFORMAT 
   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
 LOCATION
-  'file:/tmp/sparkHiveWarehouse1280221975983654134/tmp_showcrt1'
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_showcrt1'
 TBLPROPERTIES (
-  'last_modified_by'='tianyi', 
-  'last_modified_time'='1407132100', 
-  'transient_lastDdlTime'='1407132100')
+  'numFiles'='0', 
+  'last_modified_by'='marmbrus', 
+  'last_modified_time'='1413891329', 
+  'transient_lastDdlTime'='1413891329', 
+  'COLUMN_STATS_ACCURATE'='false', 
+  'totalSize'='0', 
+  'numRows'='-1', 
+  'rawDataSize'='-1')
diff --git a/sql/hive/src/test/resources/golden/show_create_table_alter-7-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_alter-7-2a91d52719cf4552ebeb867204552a26
index f793ffb7a0bfd..6e353675b5ed8 100644
--- a/sql/hive/src/test/resources/golden/show_create_table_alter-7-2a91d52719cf4552ebeb867204552a26
+++ b/sql/hive/src/test/resources/golden/show_create_table_alter-7-2a91d52719cf4552ebeb867204552a26
@@ -1,6 +1,6 @@
-CREATE EXTERNAL TABLE tmp_showcrt1(
-  key smallint, 
-  value float)
+CREATE EXTERNAL TABLE `tmp_showcrt1`(
+  `key` smallint, 
+  `value` float)
 COMMENT 'changed comment'
 CLUSTERED BY ( 
   key) 
@@ -14,8 +14,13 @@ STORED AS INPUTFORMAT
 OUTPUTFORMAT 
   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
 LOCATION
-  'file:/tmp/sparkHiveWarehouse1280221975983654134/tmp_showcrt1'
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_showcrt1'
 TBLPROPERTIES (
-  'last_modified_by'='tianyi', 
-  'last_modified_time'='1407132101', 
-  'transient_lastDdlTime'='1407132101')
+  'numFiles'='0', 
+  'last_modified_by'='marmbrus', 
+  'last_modified_time'='1413891329', 
+  'transient_lastDdlTime'='1413891329', 
+  'COLUMN_STATS_ACCURATE'='false', 
+  'totalSize'='0', 
+  'numRows'='-1', 
+  'rawDataSize'='-1')
diff --git a/sql/hive/src/test/resources/golden/show_create_table_alter-9-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_alter-9-2a91d52719cf4552ebeb867204552a26
index c65aff26a7fc1..da849512f4d8f 100644
--- a/sql/hive/src/test/resources/golden/show_create_table_alter-9-2a91d52719cf4552ebeb867204552a26
+++ b/sql/hive/src/test/resources/golden/show_create_table_alter-9-2a91d52719cf4552ebeb867204552a26
@@ -1,6 +1,6 @@
-CREATE EXTERNAL TABLE tmp_showcrt1(
-  key smallint, 
-  value float)
+CREATE EXTERNAL TABLE `tmp_showcrt1`(
+  `key` smallint, 
+  `value` float)
 COMMENT 'changed comment'
 CLUSTERED BY ( 
   key) 
@@ -14,8 +14,13 @@ STORED BY
 WITH SERDEPROPERTIES ( 
   'serialization.format'='1')
 LOCATION
-  'file:/tmp/sparkHiveWarehouse1280221975983654134/tmp_showcrt1'
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_showcrt1'
 TBLPROPERTIES (
-  'last_modified_by'='tianyi', 
-  'last_modified_time'='1407132101', 
-  'transient_lastDdlTime'='1407132101')
+  'numFiles'='0', 
+  'last_modified_by'='marmbrus', 
+  'last_modified_time'='1413891329', 
+  'transient_lastDdlTime'='1413891329', 
+  'COLUMN_STATS_ACCURATE'='false', 
+  'totalSize'='0', 
+  'numRows'='-1', 
+  'rawDataSize'='-1')
diff --git a/sql/hive/src/test/resources/golden/show_create_table_db_table-4-b585371b624cbab2616a49f553a870a0 b/sql/hive/src/test/resources/golden/show_create_table_db_table-4-b585371b624cbab2616a49f553a870a0
index b5a18368ed85e..90f8415a1c6be 100644
--- a/sql/hive/src/test/resources/golden/show_create_table_db_table-4-b585371b624cbab2616a49f553a870a0
+++ b/sql/hive/src/test/resources/golden/show_create_table_db_table-4-b585371b624cbab2616a49f553a870a0
@@ -1,6 +1,6 @@
-CREATE  TABLE tmp_feng.tmp_showcrt(
-  key string, 
-  value int)
+CREATE  TABLE `tmp_feng.tmp_showcrt`(
+  `key` string, 
+  `value` int)
 ROW FORMAT SERDE 
   'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
 STORED AS INPUTFORMAT 
@@ -8,6 +8,6 @@ STORED AS INPUTFORMAT
 OUTPUTFORMAT 
   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
 LOCATION
-  'file:/tmp/sparkHiveWarehouse1280221975983654134/tmp_feng.db/tmp_showcrt'
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_feng.db/tmp_showcrt'
 TBLPROPERTIES (
-  'transient_lastDdlTime'='1407132107')
+  'transient_lastDdlTime'='1413891330')
diff --git a/sql/hive/src/test/resources/golden/show_create_table_delimited-0-97228478b9925f06726ceebb6571bf34 b/sql/hive/src/test/resources/golden/show_create_table_delimited-0-97228478b9925f06726ceebb6571bf34
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_create_table_delimited-1-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_delimited-1-2a91d52719cf4552ebeb867204552a26
index d36ad25dc8273..4ee22e5230316 100644
--- a/sql/hive/src/test/resources/golden/show_create_table_delimited-1-2a91d52719cf4552ebeb867204552a26
+++ b/sql/hive/src/test/resources/golden/show_create_table_delimited-1-2a91d52719cf4552ebeb867204552a26
@@ -1,7 +1,7 @@
-CREATE  TABLE tmp_showcrt1(
-  key int, 
-  value string, 
-  newvalue bigint)
+CREATE  TABLE `tmp_showcrt1`(
+  `key` int, 
+  `value` string, 
+  `newvalue` bigint)
 ROW FORMAT DELIMITED 
   FIELDS TERMINATED BY ',' 
   COLLECTION ITEMS TERMINATED BY '|' 
@@ -12,6 +12,6 @@ STORED AS INPUTFORMAT
 OUTPUTFORMAT 
   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
 LOCATION
-  'file:/tmp/tmp_showcrt1'
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/testTempFiles4427612185729633290spark.hive.tmp/tmp_showcrt1'
 TBLPROPERTIES (
-  'transient_lastDdlTime'='1407132730')
+  'transient_lastDdlTime'='1413891331')
diff --git a/sql/hive/src/test/resources/golden/show_create_table_partitioned-1-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_partitioned-1-2a91d52719cf4552ebeb867204552a26
index 9e572c0d7df6a..2a1acca6efb8d 100644
--- a/sql/hive/src/test/resources/golden/show_create_table_partitioned-1-2a91d52719cf4552ebeb867204552a26
+++ b/sql/hive/src/test/resources/golden/show_create_table_partitioned-1-2a91d52719cf4552ebeb867204552a26
@@ -1,9 +1,9 @@
-CREATE EXTERNAL TABLE tmp_showcrt1(
-  key string, 
-  newvalue boolean COMMENT 'a new value')
+CREATE EXTERNAL TABLE `tmp_showcrt1`(
+  `key` string, 
+  `newvalue` boolean COMMENT 'a new value')
 COMMENT 'temporary table'
 PARTITIONED BY ( 
-  value bigint COMMENT 'some value')
+  `value` bigint COMMENT 'some value')
 ROW FORMAT SERDE 
   'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
 STORED AS INPUTFORMAT 
@@ -11,6 +11,6 @@ STORED AS INPUTFORMAT
 OUTPUTFORMAT 
   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
 LOCATION
-  'file:/tmp/sparkHiveWarehouse1280221975983654134/tmp_showcrt1'
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_showcrt1'
 TBLPROPERTIES (
-  'transient_lastDdlTime'='1407132112')
+  'transient_lastDdlTime'='1413891331')
diff --git a/sql/hive/src/test/resources/golden/show_create_table_serde-1-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_serde-1-2a91d52719cf4552ebeb867204552a26
index 69a38e1a7b20a..6fda2570b53f1 100644
--- a/sql/hive/src/test/resources/golden/show_create_table_serde-1-2a91d52719cf4552ebeb867204552a26
+++ b/sql/hive/src/test/resources/golden/show_create_table_serde-1-2a91d52719cf4552ebeb867204552a26
@@ -1,7 +1,7 @@
-CREATE  TABLE tmp_showcrt1(
-  key int, 
-  value string, 
-  newvalue bigint)
+CREATE  TABLE `tmp_showcrt1`(
+  `key` int, 
+  `value` string, 
+  `newvalue` bigint)
 COMMENT 'temporary table'
 ROW FORMAT SERDE 
   'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' 
@@ -10,6 +10,11 @@ STORED AS INPUTFORMAT
 OUTPUTFORMAT 
   'org.apache.hadoop.hive.ql.io.RCFileOutputFormat'
 LOCATION
-  'file:/tmp/sparkHiveWarehouse1280221975983654134/tmp_showcrt1'
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_showcrt1'
 TBLPROPERTIES (
-  'transient_lastDdlTime'='1407132115')
+  'numFiles'='0', 
+  'transient_lastDdlTime'='1413891332', 
+  'COLUMN_STATS_ACCURATE'='false', 
+  'totalSize'='0', 
+  'numRows'='-1', 
+  'rawDataSize'='-1')
diff --git a/sql/hive/src/test/resources/golden/show_create_table_serde-4-2a91d52719cf4552ebeb867204552a26 b/sql/hive/src/test/resources/golden/show_create_table_serde-4-2a91d52719cf4552ebeb867204552a26
index b4e693dc622fb..cbbbb7b3ce3c7 100644
--- a/sql/hive/src/test/resources/golden/show_create_table_serde-4-2a91d52719cf4552ebeb867204552a26
+++ b/sql/hive/src/test/resources/golden/show_create_table_serde-4-2a91d52719cf4552ebeb867204552a26
@@ -1,6 +1,6 @@
-CREATE EXTERNAL TABLE tmp_showcrt1(
-  key string, 
-  value boolean)
+CREATE EXTERNAL TABLE `tmp_showcrt1`(
+  `key` string, 
+  `value` boolean)
 ROW FORMAT SERDE 
   'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' 
 STORED BY 
@@ -9,6 +9,6 @@ WITH SERDEPROPERTIES (
   'serialization.format'='$', 
   'field.delim'=',')
 LOCATION
-  'file:/tmp/sparkHiveWarehouse1280221975983654134/tmp_showcrt1'
+  'file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmp_showcrt1'
 TBLPROPERTIES (
-  'transient_lastDdlTime'='1407132115')
+  'transient_lastDdlTime'='1413891332')
diff --git a/sql/hive/src/test/resources/golden/show_create_table_view-1-1e931ea3fa6065107859ffbb29bb0ed7 b/sql/hive/src/test/resources/golden/show_create_table_view-1-1e931ea3fa6065107859ffbb29bb0ed7
index be3fb3ce30960..a721f07bb90b7 100644
--- a/sql/hive/src/test/resources/golden/show_create_table_view-1-1e931ea3fa6065107859ffbb29bb0ed7
+++ b/sql/hive/src/test/resources/golden/show_create_table_view-1-1e931ea3fa6065107859ffbb29bb0ed7
@@ -1 +1 @@
-CREATE VIEW tmp_copy_src AS SELECT `src`.`key`, `src`.`value` FROM `default`.`src`
+CREATE VIEW `tmp_copy_src` AS SELECT `src`.`key`, `src`.`value` FROM `default`.`src`
diff --git a/sql/hive/src/test/resources/golden/show_describe_func_quotes-0-65fee14fcf58502241f0772b21096780 b/sql/hive/src/test/resources/golden/show_describe_func_quotes-0-65fee14fcf58502241f0772b21096780
index fd33cfcc9ab20..442b05b4cf4ee 100644
--- a/sql/hive/src/test/resources/golden/show_describe_func_quotes-0-65fee14fcf58502241f0772b21096780
+++ b/sql/hive/src/test/resources/golden/show_describe_func_quotes-0-65fee14fcf58502241f0772b21096780
@@ -1 +1 @@
-concat
\ No newline at end of file
+concat
diff --git a/sql/hive/src/test/resources/golden/show_describe_func_quotes-1-26b98b2901556449d5431d731aaa642d b/sql/hive/src/test/resources/golden/show_describe_func_quotes-1-26b98b2901556449d5431d731aaa642d
index fd33cfcc9ab20..442b05b4cf4ee 100644
--- a/sql/hive/src/test/resources/golden/show_describe_func_quotes-1-26b98b2901556449d5431d731aaa642d
+++ b/sql/hive/src/test/resources/golden/show_describe_func_quotes-1-26b98b2901556449d5431d731aaa642d
@@ -1 +1 @@
-concat
\ No newline at end of file
+concat
diff --git a/sql/hive/src/test/resources/golden/show_describe_func_quotes-2-f795383fcecedf7266cd7aed8283cec3 b/sql/hive/src/test/resources/golden/show_describe_func_quotes-2-f795383fcecedf7266cd7aed8283cec3
index a3998eeeee623..b576089faa484 100644
--- a/sql/hive/src/test/resources/golden/show_describe_func_quotes-2-f795383fcecedf7266cd7aed8283cec3
+++ b/sql/hive/src/test/resources/golden/show_describe_func_quotes-2-f795383fcecedf7266cd7aed8283cec3
@@ -1 +1 @@
-concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data  bin1, bin2, ... binN
\ No newline at end of file
+concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data  bin1, bin2, ... binN
diff --git a/sql/hive/src/test/resources/golden/show_describe_func_quotes-3-7bc53505a4e6587132870d8d0a704d2 b/sql/hive/src/test/resources/golden/show_describe_func_quotes-3-7bc53505a4e6587132870d8d0a704d2
index a3998eeeee623..b576089faa484 100644
--- a/sql/hive/src/test/resources/golden/show_describe_func_quotes-3-7bc53505a4e6587132870d8d0a704d2
+++ b/sql/hive/src/test/resources/golden/show_describe_func_quotes-3-7bc53505a4e6587132870d8d0a704d2
@@ -1 +1 @@
-concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data  bin1, bin2, ... binN
\ No newline at end of file
+concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data  bin1, bin2, ... binN
diff --git a/sql/hive/src/test/resources/golden/show_functions-0-45a7762c39f1b0f26f076220e2764043 b/sql/hive/src/test/resources/golden/show_functions-0-45a7762c39f1b0f26f076220e2764043
index 9d40ffaef5862..3049cd6243ad8 100644
--- a/sql/hive/src/test/resources/golden/show_functions-0-45a7762c39f1b0f26f076220e2764043
+++ b/sql/hive/src/test/resources/golden/show_functions-0-45a7762c39f1b0f26f076220e2764043
@@ -32,6 +32,7 @@ case
 ceil
 ceiling
 coalesce
+collect_list
 collect_set
 compute_stats
 concat
@@ -45,6 +46,7 @@ covar_pop
 covar_samp
 create_union
 cume_dist
+current_database
 date_add
 date_sub
 datediff
@@ -123,6 +125,7 @@ percentile
 percentile_approx
 pi
 pmod
+posexplode
 positive
 pow
 power
@@ -189,4 +192,4 @@ xpath_short
 xpath_string
 year
 |
-~
\ No newline at end of file
+~
diff --git a/sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d797 b/sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d797
index c62b965cb1559..175795534fff5 100644
--- a/sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d797
+++ b/sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d797
@@ -2,6 +2,7 @@ case
 ceil
 ceiling
 coalesce
+collect_list
 collect_set
 compute_stats
 concat
@@ -14,4 +15,5 @@ count
 covar_pop
 covar_samp
 create_union
-cume_dist
\ No newline at end of file
+cume_dist
+current_database
diff --git a/sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c b/sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c
index a56b5a3766c5c..3c25d656bda1c 100644
--- a/sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c
+++ b/sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c
@@ -1,6 +1,7 @@
 assert_true
 case
 coalesce
+current_database
 decode
 e
 encode
@@ -19,6 +20,7 @@ negative
 ntile
 parse_url_tuple
 percentile
+posexplode
 positive
 regexp_replace
 reverse
@@ -29,4 +31,4 @@ to_date
 translate
 ucase
 variance
-xpath_double
\ No newline at end of file
+xpath_double
diff --git a/sql/hive/src/test/resources/golden/show_functions-3-86945c60aed23626c43b507ee4ee6049 b/sql/hive/src/test/resources/golden/show_functions-3-86945c60aed23626c43b507ee4ee6049
index 86605075c3d25..b5a372a5ff50a 100644
--- a/sql/hive/src/test/resources/golden/show_functions-3-86945c60aed23626c43b507ee4ee6049
+++ b/sql/hive/src/test/resources/golden/show_functions-3-86945c60aed23626c43b507ee4ee6049
@@ -1,3 +1,3 @@
 log
 log10
-log2
\ No newline at end of file
+log2
diff --git a/sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd48 b/sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd48
index 312f6cdbf68c7..cd2e58d04a4ef 100644
--- a/sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd48
+++ b/sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd48
@@ -1,4 +1,4 @@
 date_add
 date_sub
 datediff
-to_date
\ No newline at end of file
+to_date
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-0-9acff7f5096cdafc92aa2ddb0f296f83 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-0-9acff7f5096cdafc92aa2ddb0f296f83
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-1-3f42728fb0083beb962f07c43dd9c9b7 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-1-3f42728fb0083beb962f07c43dd9c9b7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-10-d759a63f08c878456c3401626f253ff5 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-10-d759a63f08c878456c3401626f253ff5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-11-afe66851d1cdc5d8a8a0d21c5705a59e b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-11-afe66851d1cdc5d8a8a0d21c5705a59e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-12-f2dd38f0a56cd2137c9e1b870271550b b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-12-f2dd38f0a56cd2137c9e1b870271550b
deleted file mode 100644
index e772f4a83fecd..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-12-f2dd38f0a56cd2137c9e1b870271550b
+++ /dev/null
@@ -1,16 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOWINDEXES show_idx_full)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Index Operator:
-        Show Indexes
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-13-a9224a7a0012e407da67041bf680d490 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-13-a9224a7a0012e407da67041bf680d490
deleted file mode 100644
index d68fbfc9c1e0f..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-13-a9224a7a0012e407da67041bf680d490
+++ /dev/null
@@ -1,4 +0,0 @@
-idx_1               	show_idx_full       	key                 	default__show_idx_full_idx_1__	compact             	
-idx_2               	show_idx_full       	value1              	default__show_idx_full_idx_2__	compact             	
-idx_comment         	show_idx_full       	value2              	default__show_idx_full_idx_comment__	compact             	index comment       
-idx_compound        	show_idx_full       	key, value1         	default__show_idx_full_idx_compound__	compact             	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-14-9d121385c5ab32d659dc7d0374ae8d6e b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-14-9d121385c5ab32d659dc7d0374ae8d6e
deleted file mode 100644
index 94ee57be9dcaf..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-14-9d121385c5ab32d659dc7d0374ae8d6e
+++ /dev/null
@@ -1,16 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOWINDEXES show_idx_empty)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Index Operator:
-        Show Indexes
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-15-b032f4869c43d1278a890328d0591d5d b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-15-b032f4869c43d1278a890328d0591d5d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-16-8b2dc53c795584e1e23a8d631c82b43f b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-16-8b2dc53c795584e1e23a8d631c82b43f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-17-c93fd07893f47b712165725c78d95555 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-17-c93fd07893f47b712165725c78d95555
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-18-9acff7f5096cdafc92aa2ddb0f296f83 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-18-9acff7f5096cdafc92aa2ddb0f296f83
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-19-3f42728fb0083beb962f07c43dd9c9b7 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-19-3f42728fb0083beb962f07c43dd9c9b7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-2-e90c14f9edaeab8cf4540e9a35b11546 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-2-e90c14f9edaeab8cf4540e9a35b11546
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-3-aa5935155586821fb35e17156c8d8460 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-3-aa5935155586821fb35e17156c8d8460
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-4-6eb587e2751942de625c9229872ca0dc b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-4-6eb587e2751942de625c9229872ca0dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-5-ad307c2c7edb4bb2e6c34ef1eb7b47f9 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-5-ad307c2c7edb4bb2e6c34ef1eb7b47f9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-6-ae97a64481efe733a19007ed400925bc b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-6-ae97a64481efe733a19007ed400925bc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-7-34016fb262ce768de45ec1b7693fd6c8 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-7-34016fb262ce768de45ec1b7693fd6c8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-8-16d39297488db165145e1546c4cb222c b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-8-16d39297488db165145e1546c4cb222c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_edge_cases-9-fba02256147a1a753d40f56825449471 b/sql/hive/src/test/resources/golden/show_indexes_edge_cases-9-fba02256147a1a753d40f56825449471
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-0-b6a94a6f5b3938d53ddf419ff97a87ec b/sql/hive/src/test/resources/golden/show_indexes_syntax-0-b6a94a6f5b3938d53ddf419ff97a87ec
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-1-fe3da04846e702e0bbe22e05b136a3b3 b/sql/hive/src/test/resources/golden/show_indexes_syntax-1-fe3da04846e702e0bbe22e05b136a3b3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-10-b6a94a6f5b3938d53ddf419ff97a87ec b/sql/hive/src/test/resources/golden/show_indexes_syntax-10-b6a94a6f5b3938d53ddf419ff97a87ec
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-2-8207d7ca034ed62b9fb2c727497c22b3 b/sql/hive/src/test/resources/golden/show_indexes_syntax-2-8207d7ca034ed62b9fb2c727497c22b3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-3-d687cc8f5d39bfbad3537a6e92788bb7 b/sql/hive/src/test/resources/golden/show_indexes_syntax-3-d687cc8f5d39bfbad3537a6e92788bb7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-4-187e7dfb63d6b70982db8b4ddeb82ed7 b/sql/hive/src/test/resources/golden/show_indexes_syntax-4-187e7dfb63d6b70982db8b4ddeb82ed7
deleted file mode 100644
index 7e68a8acb1a87..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_syntax-4-187e7dfb63d6b70982db8b4ddeb82ed7
+++ /dev/null
@@ -1,16 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOWINDEXES show_idx_t1)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Index Operator:
-        Show Indexes
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-5-47d09f8540512a5f3e21a3e1d4fd2d49 b/sql/hive/src/test/resources/golden/show_indexes_syntax-5-47d09f8540512a5f3e21a3e1d4fd2d49
deleted file mode 100644
index 36d22451eba3e..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_syntax-5-47d09f8540512a5f3e21a3e1d4fd2d49
+++ /dev/null
@@ -1 +0,0 @@
-idx_t1              	show_idx_t1         	key                 	default__show_idx_t1_idx_t1__	compact             	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-6-de64752733e0dcae32e692d2ad38e3d4 b/sql/hive/src/test/resources/golden/show_indexes_syntax-6-de64752733e0dcae32e692d2ad38e3d4
deleted file mode 100644
index 7e68a8acb1a87..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_syntax-6-de64752733e0dcae32e692d2ad38e3d4
+++ /dev/null
@@ -1,16 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOWINDEXES show_idx_t1)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Index Operator:
-        Show Indexes
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-7-991839c8e50c5b4e490ec7faf2228d58 b/sql/hive/src/test/resources/golden/show_indexes_syntax-7-991839c8e50c5b4e490ec7faf2228d58
deleted file mode 100644
index 36d22451eba3e..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_syntax-7-991839c8e50c5b4e490ec7faf2228d58
+++ /dev/null
@@ -1 +0,0 @@
-idx_t1              	show_idx_t1         	key                 	default__show_idx_t1_idx_t1__	compact             	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-8-8c10f994f51bce851ecb0acee65ced7e b/sql/hive/src/test/resources/golden/show_indexes_syntax-8-8c10f994f51bce851ecb0acee65ced7e
deleted file mode 100644
index 4dddeee9a233f..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_syntax-8-8c10f994f51bce851ecb0acee65ced7e
+++ /dev/null
@@ -1,16 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOWINDEXES show_idx_t1 FORMATTED)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Index Operator:
-        Show Indexes
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/show_indexes_syntax-9-f8385127c6405a2c9e48b5988184b515 b/sql/hive/src/test/resources/golden/show_indexes_syntax-9-f8385127c6405a2c9e48b5988184b515
deleted file mode 100644
index 76e0434294b4f..0000000000000
--- a/sql/hive/src/test/resources/golden/show_indexes_syntax-9-f8385127c6405a2c9e48b5988184b515
+++ /dev/null
@@ -1,4 +0,0 @@
-idx_name            	tab_name            	col_names           	idx_tab_name        	idx_type            	comment             
-	 	 	 	 	 
-	 	 	 	 	 
-idx_t1              	show_idx_t1         	key                 	default__show_idx_t1_idx_t1__	compact             	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_partitions-0-73d0fdcea0bd9b828cbc3c2e88acb51a b/sql/hive/src/test/resources/golden/show_partitions-0-73d0fdcea0bd9b828cbc3c2e88acb51a
index e9c723bbd136e..8c43153cf66f9 100644
--- a/sql/hive/src/test/resources/golden/show_partitions-0-73d0fdcea0bd9b828cbc3c2e88acb51a
+++ b/sql/hive/src/test/resources/golden/show_partitions-0-73d0fdcea0bd9b828cbc3c2e88acb51a
@@ -1,4 +1,4 @@
 ds=2008-04-08/hr=11
 ds=2008-04-08/hr=12
 ds=2008-04-09/hr=11
-ds=2008-04-09/hr=12
\ No newline at end of file
+ds=2008-04-09/hr=12
diff --git a/sql/hive/src/test/resources/golden/stats4-2-463330cf55370dbe92d6ed74ef91302 b/sql/hive/src/test/resources/golden/show_partitions-1-e69b801a3c6c5f6692050bcdb0e31db9
similarity index 100%
rename from sql/hive/src/test/resources/golden/stats4-2-463330cf55370dbe92d6ed74ef91302
rename to sql/hive/src/test/resources/golden/show_partitions-1-e69b801a3c6c5f6692050bcdb0e31db9
diff --git a/sql/hive/src/test/resources/golden/show_partitions-1-e94d4100cb64c67f1127b4e255d28ae0 b/sql/hive/src/test/resources/golden/show_partitions-1-e94d4100cb64c67f1127b4e255d28ae0
deleted file mode 100644
index 19b4a62499762..0000000000000
--- a/sql/hive/src/test/resources/golden/show_partitions-1-e94d4100cb64c67f1127b4e255d28ae0
+++ /dev/null
@@ -1,2 +0,0 @@
-ds=2008-04-08/hr=11
-ds=2008-04-09/hr=11
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_partitions-2-a1bde7c2c040b4d45ddceac9983c2ca b/sql/hive/src/test/resources/golden/show_partitions-2-a1bde7c2c040b4d45ddceac9983c2ca
deleted file mode 100644
index f3614273fa8fa..0000000000000
--- a/sql/hive/src/test/resources/golden/show_partitions-2-a1bde7c2c040b4d45ddceac9983c2ca
+++ /dev/null
@@ -1,2 +0,0 @@
-ds=2008-04-08/hr=11
-ds=2008-04-08/hr=12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_partitions-2-e94d4100cb64c67f1127b4e255d28ae0 b/sql/hive/src/test/resources/golden/show_partitions-2-e94d4100cb64c67f1127b4e255d28ae0
new file mode 100644
index 0000000000000..8b3fd053b6fb6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_partitions-2-e94d4100cb64c67f1127b4e255d28ae0
@@ -0,0 +1,2 @@
+ds=2008-04-08/hr=11
+ds=2008-04-09/hr=11
diff --git a/sql/hive/src/test/resources/golden/show_partitions-3-9e3f80cb6ed9883c715ea8aa3f391d70 b/sql/hive/src/test/resources/golden/show_partitions-3-9e3f80cb6ed9883c715ea8aa3f391d70
deleted file mode 100644
index 0cdd3e8594c59..0000000000000
--- a/sql/hive/src/test/resources/golden/show_partitions-3-9e3f80cb6ed9883c715ea8aa3f391d70
+++ /dev/null
@@ -1 +0,0 @@
-ds=2008-04-08/hr=12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats4-11-ea921e0af59a4940a11c94143b1c4b32 b/sql/hive/src/test/resources/golden/show_partitions-3-a1bde7c2c040b4d45ddceac9983c2ca
similarity index 100%
rename from sql/hive/src/test/resources/golden/stats4-11-ea921e0af59a4940a11c94143b1c4b32
rename to sql/hive/src/test/resources/golden/show_partitions-3-a1bde7c2c040b4d45ddceac9983c2ca
diff --git a/sql/hive/src/test/resources/golden/show_partitions-4-9e3f80cb6ed9883c715ea8aa3f391d70 b/sql/hive/src/test/resources/golden/show_partitions-4-9e3f80cb6ed9883c715ea8aa3f391d70
new file mode 100644
index 0000000000000..dbd11ad78405b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/show_partitions-4-9e3f80cb6ed9883c715ea8aa3f391d70
@@ -0,0 +1 @@
+ds=2008-04-08/hr=12
diff --git a/sql/hive/src/test/resources/golden/show_tables-0-679cc07f8475a20b49927a5bbbd3d702 b/sql/hive/src/test/resources/golden/show_tables-0-679cc07f8475a20b49927a5bbbd3d702
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-1-ac1c8cca812353544d3f7dead5d033ce b/sql/hive/src/test/resources/golden/show_tables-1-ac1c8cca812353544d3f7dead5d033ce
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-10-643b24446d74450c0f83144b1d0ec433 b/sql/hive/src/test/resources/golden/show_tables-10-643b24446d74450c0f83144b1d0ec433
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-11-3f9a7f993510123059493826470f78f7 b/sql/hive/src/test/resources/golden/show_tables-11-3f9a7f993510123059493826470f78f7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-12-c31d2f4105ec3714cfc55eef68d3f60c b/sql/hive/src/test/resources/golden/show_tables-12-c31d2f4105ec3714cfc55eef68d3f60c
deleted file mode 100644
index 60f7943eda4a9..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-12-c31d2f4105ec3714cfc55eef68d3f60c
+++ /dev/null
@@ -1,3 +0,0 @@
-bar
-baz
-foo
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_tables-13-f72d7ab6f3f04161ce2b8f8332244657 b/sql/hive/src/test/resources/golden/show_tables-13-f72d7ab6f3f04161ce2b8f8332244657
deleted file mode 100644
index 4ffc580e2b8f3..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-13-f72d7ab6f3f04161ce2b8f8332244657
+++ /dev/null
@@ -1,4 +0,0 @@
-shtb_test1
-shtb_test2
-src
-srcpart
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_tables-14-26ca9b5509544ebac182d8aa4264ff1c b/sql/hive/src/test/resources/golden/show_tables-14-26ca9b5509544ebac182d8aa4264ff1c
deleted file mode 100644
index 60f7943eda4a9..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-14-26ca9b5509544ebac182d8aa4264ff1c
+++ /dev/null
@@ -1,3 +0,0 @@
-bar
-baz
-foo
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_tables-15-72a95694f749cb3d5760a69083e9cafe b/sql/hive/src/test/resources/golden/show_tables-15-72a95694f749cb3d5760a69083e9cafe
deleted file mode 100644
index 4ffc580e2b8f3..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-15-72a95694f749cb3d5760a69083e9cafe
+++ /dev/null
@@ -1,4 +0,0 @@
-shtb_test1
-shtb_test2
-src
-srcpart
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_tables-16-dfd802554b6764b3105b6fd8dbb8e173 b/sql/hive/src/test/resources/golden/show_tables-16-dfd802554b6764b3105b6fd8dbb8e173
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-17-49777c49d2627373ed5e459c4848c9ab b/sql/hive/src/test/resources/golden/show_tables-17-49777c49d2627373ed5e459c4848c9ab
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-18-c22c5af6ef0bcb03fdafe3c4df34ec93 b/sql/hive/src/test/resources/golden/show_tables-18-c22c5af6ef0bcb03fdafe3c4df34ec93
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-19-695a68c82308540eba1d0a04e032cf39 b/sql/hive/src/test/resources/golden/show_tables-19-695a68c82308540eba1d0a04e032cf39
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-2-c96604d76bcb3721d5e5a327cac0d5e5 b/sql/hive/src/test/resources/golden/show_tables-2-c96604d76bcb3721d5e5a327cac0d5e5
deleted file mode 100644
index 916ac1482c061..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-2-c96604d76bcb3721d5e5a327cac0d5e5
+++ /dev/null
@@ -1,18 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOWTABLES 'shtb_*')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Table Operator:
-        Show Tables
-          database name: default
-          pattern: shtb_*
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/show_tables-20-691b4e6664e6d435233ea4e8c3b585d5 b/sql/hive/src/test/resources/golden/show_tables-20-691b4e6664e6d435233ea4e8c3b585d5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-21-7a9e67189d3d4151f23b12c22bde06b5 b/sql/hive/src/test/resources/golden/show_tables-21-7a9e67189d3d4151f23b12c22bde06b5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-22-274454ebeb7f98690a3c152617a0e391 b/sql/hive/src/test/resources/golden/show_tables-22-274454ebeb7f98690a3c152617a0e391
deleted file mode 100644
index 19102815663d2..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-22-274454ebeb7f98690a3c152617a0e391
+++ /dev/null
@@ -1 +0,0 @@
-foo
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_tables-3-a56f51be557c6f636f99fafdbbbbcd9c b/sql/hive/src/test/resources/golden/show_tables-3-a56f51be557c6f636f99fafdbbbbcd9c
deleted file mode 100644
index b67b816ee4b45..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-3-a56f51be557c6f636f99fafdbbbbcd9c
+++ /dev/null
@@ -1,2 +0,0 @@
-shtb_test1
-shtb_test2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_tables-4-743d585ec3da5fbb927581cd0683ae35 b/sql/hive/src/test/resources/golden/show_tables-4-743d585ec3da5fbb927581cd0683ae35
deleted file mode 100644
index 8f06e234b2a6e..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-4-743d585ec3da5fbb927581cd0683ae35
+++ /dev/null
@@ -1,18 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOWTABLES 'shtb_test1|shtb_test2')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Table Operator:
-        Show Tables
-          database name: default
-          pattern: shtb_test1|shtb_test2
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/show_tables-5-c685b358b604bf3ef980a78d9178d87d b/sql/hive/src/test/resources/golden/show_tables-5-c685b358b604bf3ef980a78d9178d87d
deleted file mode 100644
index b67b816ee4b45..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tables-5-c685b358b604bf3ef980a78d9178d87d
+++ /dev/null
@@ -1,2 +0,0 @@
-shtb_test1
-shtb_test2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/show_tables-6-1086ed68a5cf2540a72b3e949b9ea5f8 b/sql/hive/src/test/resources/golden/show_tables-6-1086ed68a5cf2540a72b3e949b9ea5f8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-7-a62fc229d241303bffb29b34ad125f8c b/sql/hive/src/test/resources/golden/show_tables-7-a62fc229d241303bffb29b34ad125f8c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-8-691b4e6664e6d435233ea4e8c3b585d5 b/sql/hive/src/test/resources/golden/show_tables-8-691b4e6664e6d435233ea4e8c3b585d5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tables-9-64c9bf0618541518f2ba30ec24a94423 b/sql/hive/src/test/resources/golden/show_tables-9-64c9bf0618541518f2ba30ec24a94423
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-0-4fa957197c8678b0a3a64d8f4f6da1fa b/sql/hive/src/test/resources/golden/show_tablestatus-0-4fa957197c8678b0a3a64d8f4f6da1fa
deleted file mode 100644
index bec424bb026e9..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tablestatus-0-4fa957197c8678b0a3a64d8f4f6da1fa
+++ /dev/null
@@ -1,14 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOW_TABLESTATUS `src` default)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-1-4c31924711bdb64603a14ce57da86ab7 b/sql/hive/src/test/resources/golden/show_tablestatus-1-4c31924711bdb64603a14ce57da86ab7
deleted file mode 100644
index 9392b7dc686f6..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tablestatus-1-4c31924711bdb64603a14ce57da86ab7
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:src
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4281266679489657486/src
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:false
-partitionColumns:
-totalNumberFiles:1
-totalFileSize:5812
-maxFileSize:5812
-minFileSize:5812
-lastAccessTime:0
-lastUpdateTime:1389733248000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-2-ecddce523f2af516700677a051581330 b/sql/hive/src/test/resources/golden/show_tablestatus-2-ecddce523f2af516700677a051581330
deleted file mode 100644
index 9392b7dc686f6..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tablestatus-2-ecddce523f2af516700677a051581330
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:src
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4281266679489657486/src
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:false
-partitionColumns:
-totalNumberFiles:1
-totalFileSize:5812
-maxFileSize:5812
-minFileSize:5812
-lastAccessTime:0
-lastUpdateTime:1389733248000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-3-dccfbc8b5a223a9fe47120ca771ee61d b/sql/hive/src/test/resources/golden/show_tablestatus-3-dccfbc8b5a223a9fe47120ca771ee61d
deleted file mode 100644
index 9392b7dc686f6..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tablestatus-3-dccfbc8b5a223a9fe47120ca771ee61d
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:src
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4281266679489657486/src
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:false
-partitionColumns:
-totalNumberFiles:1
-totalFileSize:5812
-maxFileSize:5812
-minFileSize:5812
-lastAccessTime:0
-lastUpdateTime:1389733248000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-4-5208ae4e509cb7f10dd4e0d29b5ab346 b/sql/hive/src/test/resources/golden/show_tablestatus-4-5208ae4e509cb7f10dd4e0d29b5ab346
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-5-5bd2196d71aa5308a5d01d9e9df3f59 b/sql/hive/src/test/resources/golden/show_tablestatus-5-5bd2196d71aa5308a5d01d9e9df3f59
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-6-d5e2f1950de4bf0ff77b7c66cddf3eb8 b/sql/hive/src/test/resources/golden/show_tablestatus-6-d5e2f1950de4bf0ff77b7c66cddf3eb8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-7-f7b9148c16045269344c5d74fb8a449c b/sql/hive/src/test/resources/golden/show_tablestatus-7-f7b9148c16045269344c5d74fb8a449c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-8-1cd5acb4091d916e5e18948a39979b51 b/sql/hive/src/test/resources/golden/show_tablestatus-8-1cd5acb4091d916e5e18948a39979b51
deleted file mode 100644
index f8b64f6056ea7..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tablestatus-8-1cd5acb4091d916e5e18948a39979b51
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:srcpart
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4281266679489657486/srcpart/ds=2008-04-08/hr=11
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:true
-partitionColumns:struct partition_columns { string ds, string hr}
-totalNumberFiles:1
-totalFileSize:5812
-maxFileSize:5812
-minFileSize:5812
-lastAccessTime:0
-lastUpdateTime:1389733249000
diff --git a/sql/hive/src/test/resources/golden/show_tablestatus-9-e3cc1823000abb51d2635e0c824e81a9 b/sql/hive/src/test/resources/golden/show_tablestatus-9-e3cc1823000abb51d2635e0c824e81a9
deleted file mode 100644
index 9392b7dc686f6..0000000000000
--- a/sql/hive/src/test/resources/golden/show_tablestatus-9-e3cc1823000abb51d2635e0c824e81a9
+++ /dev/null
@@ -1,14 +0,0 @@
-tableName:src
-owner:marmbrus
-location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4281266679489657486/src
-inputformat:org.apache.hadoop.mapred.TextInputFormat
-outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-columns:struct columns { i32 key, string value}
-partitioned:false
-partitionColumns:
-totalNumberFiles:1
-totalFileSize:5812
-maxFileSize:5812
-minFileSize:5812
-lastAccessTime:0
-lastUpdateTime:1389733248000
diff --git a/sql/hive/src/test/resources/golden/show_tblproperties-4-6c63215ea599f6533666c4d70606b139 b/sql/hive/src/test/resources/golden/show_tblproperties-4-6c63215ea599f6533666c4d70606b139
index ce1a3441a1bc0..d882eea53ca3c 100644
--- a/sql/hive/src/test/resources/golden/show_tblproperties-4-6c63215ea599f6533666c4d70606b139
+++ b/sql/hive/src/test/resources/golden/show_tblproperties-4-6c63215ea599f6533666c4d70606b139
@@ -1,6 +1,10 @@
-	 
-last_modified_by	ocquery
-last_modified_time	1408598216
+numFiles	0
+last_modified_by	marmbrus
+last_modified_time	1413891337
 tmp	true
-transient_lastDdlTime	1408598216
+transient_lastDdlTime	1413891337
+COLUMN_STATS_ACCURATE	false
+totalSize	0
+numRows	-1
 bar	bar value
+rawDataSize	-1
diff --git a/sql/hive/src/test/resources/golden/showparts-0-593619bb962b318b82896658deaea1f1 b/sql/hive/src/test/resources/golden/showparts-0-593619bb962b318b82896658deaea1f1
deleted file mode 100644
index b590724bca78d..0000000000000
--- a/sql/hive/src/test/resources/golden/showparts-0-593619bb962b318b82896658deaea1f1
+++ /dev/null
@@ -1,17 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_SHOWPARTITIONS srcpart)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-      Show Partitions Operator:
-        Show Partitions
-          table: srcpart
-
-  Stage: Stage-1
-    Fetch Operator
-      limit: -1
-
diff --git a/sql/hive/src/test/resources/golden/showparts-1-73d0fdcea0bd9b828cbc3c2e88acb51a b/sql/hive/src/test/resources/golden/showparts-1-73d0fdcea0bd9b828cbc3c2e88acb51a
deleted file mode 100644
index e9c723bbd136e..0000000000000
--- a/sql/hive/src/test/resources/golden/showparts-1-73d0fdcea0bd9b828cbc3c2e88acb51a
+++ /dev/null
@@ -1,4 +0,0 @@
-ds=2008-04-08/hr=11
-ds=2008-04-08/hr=12
-ds=2008-04-09/hr=11
-ds=2008-04-09/hr=12
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-0-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-0-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-0-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-1-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-1-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-1-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-10-fa00cf008a039908eec64ad5dd415c5b b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-10-fa00cf008a039908eec64ad5dd415c5b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-11-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-11-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-12-dd683e148baed6b27642eebacaa87a4f b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-12-dd683e148baed6b27642eebacaa87a4f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-13-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-13-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-13-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-3-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-3-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-3-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-4-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-4-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-4-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-5-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-5-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-5-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-6-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-6-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-6-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-7-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-7-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-7-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-8-daf10744f465e055b35809a528135370 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-8-daf10744f465e055b35809a528135370
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-9-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoin_union_remove_1-9-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-0-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-0-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-0-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-1-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-1-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-1-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-10-bebf0a312f3110d0b518153543030f06 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-10-bebf0a312f3110d0b518153543030f06
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-11-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-11-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-12-fa53198d9eecb9d274b09d4351b9274e b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-12-fa53198d9eecb9d274b09d4351b9274e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-13-3fda17e4414d191f837631438a19e700 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-13-3fda17e4414d191f837631438a19e700
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-14-bf8bd6dbf9485c05f8fd4f84e2530724 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-14-bf8bd6dbf9485c05f8fd4f84e2530724
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-15-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-15-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-15-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-3-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-3-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-3-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-4-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-4-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-4-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-5-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-5-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-5-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-6-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-6-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-6-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-7-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-7-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-7-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-8-c64266431d312784ebc7b9ca07ab5188 b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-8-c64266431d312784ebc7b9ca07ab5188
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-9-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoin_union_remove_2-9-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt1-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt1-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt1-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt1-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt1-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt1-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt1-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt1-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt1-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt1-3-9669bca0e2da11221b2e9eb21322e0c6 b/sql/hive/src/test/resources/golden/skewjoinopt1-3-9669bca0e2da11221b2e9eb21322e0c6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt1-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt1-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt1-5-fa00cf008a039908eec64ad5dd415c5b b/sql/hive/src/test/resources/golden/skewjoinopt1-5-fa00cf008a039908eec64ad5dd415c5b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt1-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt1-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt1-7-f66c3935651d3cc5fef7d0284e123614 b/sql/hive/src/test/resources/golden/skewjoinopt1-7-f66c3935651d3cc5fef7d0284e123614
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt10-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt10-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt10-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt10-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt10-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt10-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-3-27fc8f7d7456a761e1d0c2c075b84dc6 b/sql/hive/src/test/resources/golden/skewjoinopt10-3-27fc8f7d7456a761e1d0c2c075b84dc6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt10-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-5-9abe9cb11e3336a689a4116f8804c02a b/sql/hive/src/test/resources/golden/skewjoinopt10-5-9abe9cb11e3336a689a4116f8804c02a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-6-bc7008e74f5eccab48d820877d83e7e4 b/sql/hive/src/test/resources/golden/skewjoinopt10-6-bc7008e74f5eccab48d820877d83e7e4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-7-f4015c993efd5fc155e2faec784718d0 b/sql/hive/src/test/resources/golden/skewjoinopt10-7-f4015c993efd5fc155e2faec784718d0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt10-8-c9624d1650d395f18d9e510cab4fab79 b/sql/hive/src/test/resources/golden/skewjoinopt10-8-c9624d1650d395f18d9e510cab4fab79
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt11-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt11-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt11-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt11-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt11-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt11-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt11-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt11-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt11-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt11-3-a079ede4f4245e62b02f624efedfb597 b/sql/hive/src/test/resources/golden/skewjoinopt11-3-a079ede4f4245e62b02f624efedfb597
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt11-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt11-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt11-5-744a018c78bae6e09853dd202981e850 b/sql/hive/src/test/resources/golden/skewjoinopt11-5-744a018c78bae6e09853dd202981e850
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt11-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt11-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt11-7-9e7e6016590d33c617cb568cbd45ef68 b/sql/hive/src/test/resources/golden/skewjoinopt11-7-9e7e6016590d33c617cb568cbd45ef68
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt12-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt12-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt12-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt12-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt12-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt12-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt12-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt12-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt12-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt12-3-da45999e75a09b27161af2c7414c1170 b/sql/hive/src/test/resources/golden/skewjoinopt12-3-da45999e75a09b27161af2c7414c1170
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt12-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt12-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt12-5-e6db5d1ec4694475ab0b8f43eba415cd b/sql/hive/src/test/resources/golden/skewjoinopt12-5-e6db5d1ec4694475ab0b8f43eba415cd
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt12-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt12-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt12-7-8bbc680be8a68053db008789f335c2f0 b/sql/hive/src/test/resources/golden/skewjoinopt12-7-8bbc680be8a68053db008789f335c2f0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt13-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt13-0-cafed8ca348b243372b9114910be1557
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt13-0-cafed8ca348b243372b9114910be1557
+++ b/sql/hive/src/test/resources/golden/skewjoinopt13-0-cafed8ca348b243372b9114910be1557
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt13-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt13-1-16a1f74642d7ea4dac66a5ce15469c22
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt13-1-16a1f74642d7ea4dac66a5ce15469c22
+++ b/sql/hive/src/test/resources/golden/skewjoinopt13-1-16a1f74642d7ea4dac66a5ce15469c22
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt13-10-b81a7fa7b6158dd4d77fa4c62db1f223 b/sql/hive/src/test/resources/golden/skewjoinopt13-10-b81a7fa7b6158dd4d77fa4c62db1f223
index 872146532307a..19304c010452e 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt13-10-b81a7fa7b6158dd4d77fa4c62db1f223
+++ b/sql/hive/src/test/resources/golden/skewjoinopt13-10-b81a7fa7b6158dd4d77fa4c62db1f223
@@ -1 +1 @@
-2	12	2	22	2	12
\ No newline at end of file
+2	12	2	22	2	12
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt13-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt13-2-114600d46ae86edcb66a500b4cac657d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt13-2-114600d46ae86edcb66a500b4cac657d
+++ b/sql/hive/src/test/resources/golden/skewjoinopt13-2-114600d46ae86edcb66a500b4cac657d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt13-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt13-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-38-b71bdaa2b7c4b5c51a9773c123e5306d b/sql/hive/src/test/resources/golden/skewjoinopt13-4-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-38-b71bdaa2b7c4b5c51a9773c123e5306d
rename to sql/hive/src/test/resources/golden/skewjoinopt13-4-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt13-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt13-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-39-89aa7bab4272546e06cd7e504380d96b b/sql/hive/src/test/resources/golden/skewjoinopt13-6-ade68a23d7b1a4f328623bb5a0f07488
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-39-89aa7bab4272546e06cd7e504380d96b
rename to sql/hive/src/test/resources/golden/skewjoinopt13-6-ade68a23d7b1a4f328623bb5a0f07488
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt13-8-3fda17e4414d191f837631438a19e700 b/sql/hive/src/test/resources/golden/skewjoinopt13-8-3fda17e4414d191f837631438a19e700
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-4-e906be6d27c9dfcffd4af171541639ad b/sql/hive/src/test/resources/golden/skewjoinopt13-8-8eb53fb8f05a43ee377aa1c927857e7c
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-4-e906be6d27c9dfcffd4af171541639ad
rename to sql/hive/src/test/resources/golden/skewjoinopt13-8-8eb53fb8f05a43ee377aa1c927857e7c
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt14-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt14-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt14-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt14-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt14-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt14-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-3-9669bca0e2da11221b2e9eb21322e0c6 b/sql/hive/src/test/resources/golden/skewjoinopt14-3-9669bca0e2da11221b2e9eb21322e0c6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt14-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-5-744a018c78bae6e09853dd202981e850 b/sql/hive/src/test/resources/golden/skewjoinopt14-5-744a018c78bae6e09853dd202981e850
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt14-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-7-c329b937ad1d7cf1c838640ef5f4d135 b/sql/hive/src/test/resources/golden/skewjoinopt14-7-c329b937ad1d7cf1c838640ef5f4d135
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-8-3fda17e4414d191f837631438a19e700 b/sql/hive/src/test/resources/golden/skewjoinopt14-8-3fda17e4414d191f837631438a19e700
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt14-9-cdf19a17f3295447b66e6e6445742a74 b/sql/hive/src/test/resources/golden/skewjoinopt14-9-cdf19a17f3295447b66e6e6445742a74
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt15-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt15-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt15-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt15-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-10-7df9fe6075a96bf9849848c93e449469 b/sql/hive/src/test/resources/golden/skewjoinopt15-10-7df9fe6075a96bf9849848c93e449469
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-11-3f5ba247cb51c79bacdd56ef3ecbb601 b/sql/hive/src/test/resources/golden/skewjoinopt15-11-3f5ba247cb51c79bacdd56ef3ecbb601
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt15-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt15-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-3-32fed3a53e7c15b549a71c0e71d93484 b/sql/hive/src/test/resources/golden/skewjoinopt15-3-32fed3a53e7c15b549a71c0e71d93484
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-4-27dc133d5ad9806d0c8ff0ebf8f9a469 b/sql/hive/src/test/resources/golden/skewjoinopt15-4-27dc133d5ad9806d0c8ff0ebf8f9a469
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-5-dff9d122eb83760f08d1d77814c24c91 b/sql/hive/src/test/resources/golden/skewjoinopt15-5-dff9d122eb83760f08d1d77814c24c91
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-6-717b85f496a5cf006cb352f9d884608d b/sql/hive/src/test/resources/golden/skewjoinopt15-6-717b85f496a5cf006cb352f9d884608d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-7-ba43a86694107dd4cb754d676935408 b/sql/hive/src/test/resources/golden/skewjoinopt15-7-ba43a86694107dd4cb754d676935408
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-8-7381c1f36c997afac91d8f8f29e865f3 b/sql/hive/src/test/resources/golden/skewjoinopt15-8-7381c1f36c997afac91d8f8f29e865f3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt15-9-ccb54093d518eaca1e4644686e9e776e b/sql/hive/src/test/resources/golden/skewjoinopt15-9-ccb54093d518eaca1e4644686e9e776e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt16-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt16-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt16-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt16-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt16-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt16-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt16-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt16-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt16-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt16-3-25f94adcba19b899d1db3af93ea1c95b b/sql/hive/src/test/resources/golden/skewjoinopt16-3-25f94adcba19b899d1db3af93ea1c95b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt16-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt16-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt16-5-fa00cf008a039908eec64ad5dd415c5b b/sql/hive/src/test/resources/golden/skewjoinopt16-5-fa00cf008a039908eec64ad5dd415c5b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt16-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt16-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt16-7-b3208400a48223a410b76a4bd1362da5 b/sql/hive/src/test/resources/golden/skewjoinopt16-7-b3208400a48223a410b76a4bd1362da5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt17-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt17-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt17-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt17-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt17-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt17-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt17-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt17-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt17-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt17-3-25f94adcba19b899d1db3af93ea1c95b b/sql/hive/src/test/resources/golden/skewjoinopt17-3-25f94adcba19b899d1db3af93ea1c95b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt17-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt17-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt17-5-2e4b9b1d820a7ef31c51bd4fd2cc28f3 b/sql/hive/src/test/resources/golden/skewjoinopt17-5-2e4b9b1d820a7ef31c51bd4fd2cc28f3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt17-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt17-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt17-7-373b8a6363139ca37395b1cc8c9808d9 b/sql/hive/src/test/resources/golden/skewjoinopt17-7-373b8a6363139ca37395b1cc8c9808d9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt18-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt18-0-cafed8ca348b243372b9114910be1557
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt18-0-cafed8ca348b243372b9114910be1557
+++ b/sql/hive/src/test/resources/golden/skewjoinopt18-0-cafed8ca348b243372b9114910be1557
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt18-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt18-1-16a1f74642d7ea4dac66a5ce15469c22
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt18-1-16a1f74642d7ea4dac66a5ce15469c22
+++ b/sql/hive/src/test/resources/golden/skewjoinopt18-1-16a1f74642d7ea4dac66a5ce15469c22
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt18-10-8c180ce74ca1ac0eefab2d70d38e44fa b/sql/hive/src/test/resources/golden/skewjoinopt18-10-8c180ce74ca1ac0eefab2d70d38e44fa
index ee1bb6b112381..6ca70c5267e65 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt18-10-8c180ce74ca1ac0eefab2d70d38e44fa
+++ b/sql/hive/src/test/resources/golden/skewjoinopt18-10-8c180ce74ca1ac0eefab2d70d38e44fa
@@ -3,4 +3,4 @@
 8	18	8	18
 8	18	8	18
 8	28	8	18
-8	28	8	18
\ No newline at end of file
+8	28	8	18
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt18-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt18-2-114600d46ae86edcb66a500b4cac657d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt18-2-114600d46ae86edcb66a500b4cac657d
+++ b/sql/hive/src/test/resources/golden/skewjoinopt18-2-114600d46ae86edcb66a500b4cac657d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt18-4-27dc133d5ad9806d0c8ff0ebf8f9a469 b/sql/hive/src/test/resources/golden/skewjoinopt18-4-27dc133d5ad9806d0c8ff0ebf8f9a469
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-41-348b0126cb1d214fea58d4af9d3dbf67 b/sql/hive/src/test/resources/golden/skewjoinopt18-4-abf4b7f158999af331d5dbfddf32fa68
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-41-348b0126cb1d214fea58d4af9d3dbf67
rename to sql/hive/src/test/resources/golden/skewjoinopt18-4-abf4b7f158999af331d5dbfddf32fa68
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt18-8-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt18-8-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-42-83889e7dc73d796cc869160b6b35102c b/sql/hive/src/test/resources/golden/skewjoinopt18-8-ade68a23d7b1a4f328623bb5a0f07488
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-42-83889e7dc73d796cc869160b6b35102c
rename to sql/hive/src/test/resources/golden/skewjoinopt18-8-ade68a23d7b1a4f328623bb5a0f07488
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt19-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt19-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt19-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt19-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt19-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt19-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt19-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt19-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt19-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt19-3-6eda8acf464a18cfd9909255ddcef37e b/sql/hive/src/test/resources/golden/skewjoinopt19-3-6eda8acf464a18cfd9909255ddcef37e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt19-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt19-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt19-5-744a018c78bae6e09853dd202981e850 b/sql/hive/src/test/resources/golden/skewjoinopt19-5-744a018c78bae6e09853dd202981e850
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt19-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt19-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt19-7-1e9c17669089eea559f8fa0b6977b249 b/sql/hive/src/test/resources/golden/skewjoinopt19-7-1e9c17669089eea559f8fa0b6977b249
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt2-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt2-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt2-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt2-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt2-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt2-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt2-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt2-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt2-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt2-3-52247b4dd98092bf829254e17424657d b/sql/hive/src/test/resources/golden/skewjoinopt2-3-52247b4dd98092bf829254e17424657d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt2-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt2-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt2-5-bebf0a312f3110d0b518153543030f06 b/sql/hive/src/test/resources/golden/skewjoinopt2-5-bebf0a312f3110d0b518153543030f06
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt2-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt2-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt2-7-2a2e8dfb78c2dfcba51a4cf91da25ae4 b/sql/hive/src/test/resources/golden/skewjoinopt2-7-2a2e8dfb78c2dfcba51a4cf91da25ae4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt20-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt20-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt20-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt20-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt20-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt20-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt20-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt20-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt20-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt20-3-4420d88e35aa84327bc95153eed299e0 b/sql/hive/src/test/resources/golden/skewjoinopt20-3-4420d88e35aa84327bc95153eed299e0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt20-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt20-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt20-5-744a018c78bae6e09853dd202981e850 b/sql/hive/src/test/resources/golden/skewjoinopt20-5-744a018c78bae6e09853dd202981e850
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt20-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt20-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt20-7-e209254ae404366e6adca673d666aecb b/sql/hive/src/test/resources/golden/skewjoinopt20-7-e209254ae404366e6adca673d666aecb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt3-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt3-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt3-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt3-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt3-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt3-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt3-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt3-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt3-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt3-3-c64266431d312784ebc7b9ca07ab5188 b/sql/hive/src/test/resources/golden/skewjoinopt3-3-c64266431d312784ebc7b9ca07ab5188
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt3-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt3-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt3-5-bebf0a312f3110d0b518153543030f06 b/sql/hive/src/test/resources/golden/skewjoinopt3-5-bebf0a312f3110d0b518153543030f06
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt3-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt3-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt3-7-e54bbab48fcc3c41806a101293577e9f b/sql/hive/src/test/resources/golden/skewjoinopt3-7-e54bbab48fcc3c41806a101293577e9f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt4-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt4-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt4-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt4-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt4-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt4-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt4-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt4-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt4-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt4-3-9669bca0e2da11221b2e9eb21322e0c6 b/sql/hive/src/test/resources/golden/skewjoinopt4-3-9669bca0e2da11221b2e9eb21322e0c6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt4-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt4-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt4-5-744a018c78bae6e09853dd202981e850 b/sql/hive/src/test/resources/golden/skewjoinopt4-5-744a018c78bae6e09853dd202981e850
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt4-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt4-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt4-7-c7e2ccaba57a581f27cfdcca76891133 b/sql/hive/src/test/resources/golden/skewjoinopt4-7-c7e2ccaba57a581f27cfdcca76891133
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt5-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt5-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt5-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt5-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt5-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt5-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt5-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt5-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt5-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt5-3-25f94adcba19b899d1db3af93ea1c95b b/sql/hive/src/test/resources/golden/skewjoinopt5-3-25f94adcba19b899d1db3af93ea1c95b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt5-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt5-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt5-5-fa00cf008a039908eec64ad5dd415c5b b/sql/hive/src/test/resources/golden/skewjoinopt5-5-fa00cf008a039908eec64ad5dd415c5b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt5-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt5-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt5-7-f38878761b2aeeee0c04387dff60894d b/sql/hive/src/test/resources/golden/skewjoinopt5-7-f38878761b2aeeee0c04387dff60894d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt6-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt6-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt6-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt6-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt6-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt6-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt6-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt6-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt6-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt6-3-da45999e75a09b27161af2c7414c1170 b/sql/hive/src/test/resources/golden/skewjoinopt6-3-da45999e75a09b27161af2c7414c1170
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt6-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt6-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt6-5-e6db5d1ec4694475ab0b8f43eba415cd b/sql/hive/src/test/resources/golden/skewjoinopt6-5-e6db5d1ec4694475ab0b8f43eba415cd
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt6-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt6-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt6-7-4dd78e79dc6ccab0cf472c8745d1f384 b/sql/hive/src/test/resources/golden/skewjoinopt6-7-4dd78e79dc6ccab0cf472c8745d1f384
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt7-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt7-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt7-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt7-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt7-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt7-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-3-c64266431d312784ebc7b9ca07ab5188 b/sql/hive/src/test/resources/golden/skewjoinopt7-3-c64266431d312784ebc7b9ca07ab5188
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt7-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-5-bebf0a312f3110d0b518153543030f06 b/sql/hive/src/test/resources/golden/skewjoinopt7-5-bebf0a312f3110d0b518153543030f06
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt7-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-7-fa53198d9eecb9d274b09d4351b9274e b/sql/hive/src/test/resources/golden/skewjoinopt7-7-fa53198d9eecb9d274b09d4351b9274e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-8-3fda17e4414d191f837631438a19e700 b/sql/hive/src/test/resources/golden/skewjoinopt7-8-3fda17e4414d191f837631438a19e700
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt7-9-b54d2a1f5d3bea81680ab06dead952c b/sql/hive/src/test/resources/golden/skewjoinopt7-9-b54d2a1f5d3bea81680ab06dead952c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt8-0-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt8-0-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt8-1-16a1f74642d7ea4dac66a5ce15469c22
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt8-1-16a1f74642d7ea4dac66a5ce15469c22
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt8-2-114600d46ae86edcb66a500b4cac657d
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/skewjoinopt8-2-114600d46ae86edcb66a500b4cac657d
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-3-caf1c5fd299fdbdb655234d01d44caf2 b/sql/hive/src/test/resources/golden/skewjoinopt8-3-caf1c5fd299fdbdb655234d01d44caf2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt8-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-5-bebf0a312f3110d0b518153543030f06 b/sql/hive/src/test/resources/golden/skewjoinopt8-5-bebf0a312f3110d0b518153543030f06
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt8-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-7-fa53198d9eecb9d274b09d4351b9274e b/sql/hive/src/test/resources/golden/skewjoinopt8-7-fa53198d9eecb9d274b09d4351b9274e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-8-3fda17e4414d191f837631438a19e700 b/sql/hive/src/test/resources/golden/skewjoinopt8-8-3fda17e4414d191f837631438a19e700
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt8-9-9b26e8e33d3109e059e7494b53aee6fd b/sql/hive/src/test/resources/golden/skewjoinopt8-9-9b26e8e33d3109e059e7494b53aee6fd
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt9-0-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/skewjoinopt9-0-cafed8ca348b243372b9114910be1557
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt9-0-cafed8ca348b243372b9114910be1557
+++ b/sql/hive/src/test/resources/golden/skewjoinopt9-0-cafed8ca348b243372b9114910be1557
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt9-1-16a1f74642d7ea4dac66a5ce15469c22 b/sql/hive/src/test/resources/golden/skewjoinopt9-1-16a1f74642d7ea4dac66a5ce15469c22
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt9-1-16a1f74642d7ea4dac66a5ce15469c22
+++ b/sql/hive/src/test/resources/golden/skewjoinopt9-1-16a1f74642d7ea4dac66a5ce15469c22
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt9-10-5c8be465ceef8151f172b82a13e81fa8 b/sql/hive/src/test/resources/golden/skewjoinopt9-10-5c8be465ceef8151f172b82a13e81fa8
index acd4039d35669..d3938a35d72f5 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt9-10-5c8be465ceef8151f172b82a13e81fa8
+++ b/sql/hive/src/test/resources/golden/skewjoinopt9-10-5c8be465ceef8151f172b82a13e81fa8
@@ -1,4 +1,4 @@
 2	1	2	22
 3	1	3	13
 8	2	8	18
-8	2	8	18
\ No newline at end of file
+8	2	8	18
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt9-2-114600d46ae86edcb66a500b4cac657d b/sql/hive/src/test/resources/golden/skewjoinopt9-2-114600d46ae86edcb66a500b4cac657d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt9-2-114600d46ae86edcb66a500b4cac657d
+++ b/sql/hive/src/test/resources/golden/skewjoinopt9-2-114600d46ae86edcb66a500b4cac657d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt9-4-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/skewjoinopt9-4-b76bf9f6c92f83c9a5f351f8460d1e3b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-44-41462b2e60bf44571a7b1fb435374d6a b/sql/hive/src/test/resources/golden/skewjoinopt9-4-c0f14def6a135cc50cba364e810ce28e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-44-41462b2e60bf44571a7b1fb435374d6a
rename to sql/hive/src/test/resources/golden/skewjoinopt9-4-c0f14def6a135cc50cba364e810ce28e
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt9-6-4abc4f450a58ccdd0df2e345f1276979 b/sql/hive/src/test/resources/golden/skewjoinopt9-6-4abc4f450a58ccdd0df2e345f1276979
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-45-8aea6edf0481e2a10f14879acba62648 b/sql/hive/src/test/resources/golden/skewjoinopt9-6-ade68a23d7b1a4f328623bb5a0f07488
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-45-8aea6edf0481e2a10f14879acba62648
rename to sql/hive/src/test/resources/golden/skewjoinopt9-6-ade68a23d7b1a4f328623bb5a0f07488
diff --git a/sql/hive/src/test/resources/golden/skewjoinopt9-8-446c5e33062b109341add64a9860207d b/sql/hive/src/test/resources/golden/skewjoinopt9-8-446c5e33062b109341add64a9860207d
index f608d53f51ebe..826338ea56810 100644
--- a/sql/hive/src/test/resources/golden/skewjoinopt9-8-446c5e33062b109341add64a9860207d
+++ b/sql/hive/src/test/resources/golden/skewjoinopt9-8-446c5e33062b109341add64a9860207d
@@ -9,4 +9,4 @@
 8	28	8	18
 8	28	8	18
 8	28	8	18
-8	28	8	18
\ No newline at end of file
+8	28	8	18
diff --git a/sql/hive/src/test/resources/golden/small.cartesian-0-e3c7f62795b0e7c14b41b0dc29c47619 b/sql/hive/src/test/resources/golden/small.cartesian-0-e3c7f62795b0e7c14b41b0dc29c47619
index 7a442f02e8d7a..1179e20c2847c 100644
--- a/sql/hive/src/test/resources/golden/small.cartesian-0-e3c7f62795b0e7c14b41b0dc29c47619
+++ b/sql/hive/src/test/resources/golden/small.cartesian-0-e3c7f62795b0e7c14b41b0dc29c47619
@@ -1,3 +1,3 @@
 0	2
 0	2
-0	2
\ No newline at end of file
+0	2
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-10-b1114520feaa15ad7621b6a0e571c244 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-10-b1114520feaa15ad7621b6a0e571c244
index 836ee718649ad..da83658b68646 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-10-b1114520feaa15ad7621b6a0e571c244
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-10-b1114520feaa15ad7621b6a0e571c244
@@ -1 +1 @@
-51	val_3	51	val_30
\ No newline at end of file
+51	val_3	51	val_30
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-12-8fb6fea440e845ce23b06deed8f519fa b/sql/hive/src/test/resources/golden/smb_mapjoin_1-12-8fb6fea440e845ce23b06deed8f519fa
index 8f358bffec51d..1ab49661a01f4 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-12-8fb6fea440e845ce23b06deed8f519fa
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-12-8fb6fea440e845ce23b06deed8f519fa
@@ -2,4 +2,4 @@
 51	val_3	51	val_30
 52	val_4	NULL	NULL
 53	val_5	NULL	NULL
-49	val_10	NULL	NULL
\ No newline at end of file
+49	val_10	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-14-b9e32ef49286a471ae2c4e7e66a2f5e1 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-14-b9e32ef49286a471ae2c4e7e66a2f5e1
index 6197cf72c3454..ba7969b99d79f 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-14-b9e32ef49286a471ae2c4e7e66a2f5e1
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-14-b9e32ef49286a471ae2c4e7e66a2f5e1
@@ -1,4 +1,4 @@
 NULL	NULL	50	val_20
 NULL	NULL	50	val_23
 NULL	NULL	50	val_25
-51	val_3	51	val_30
\ No newline at end of file
+51	val_3	51	val_30
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-16-c120e505c143721a36287bf992dbc1a1 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-16-c120e505c143721a36287bf992dbc1a1
index d650d44f42404..ad8b511265e20 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-16-c120e505c143721a36287bf992dbc1a1
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-16-c120e505c143721a36287bf992dbc1a1
@@ -5,4 +5,4 @@ NULL	NULL	50	val_23
 NULL	NULL	50	val_25
 51	val_3	51	val_30
 52	val_4	NULL	NULL
-53	val_5	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-18-bcd8f7a7a4a77c8d6a39e38b93e5c5a1 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-18-bcd8f7a7a4a77c8d6a39e38b93e5c5a1
index 836ee718649ad..da83658b68646 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-18-bcd8f7a7a4a77c8d6a39e38b93e5c5a1
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-18-bcd8f7a7a4a77c8d6a39e38b93e5c5a1
@@ -1 +1 @@
-51	val_3	51	val_30
\ No newline at end of file
+51	val_3	51	val_30
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-20-d7979e9ab355d8856c2d12e66e7bb838 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-20-d7979e9ab355d8856c2d12e66e7bb838
index 8f358bffec51d..1ab49661a01f4 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-20-d7979e9ab355d8856c2d12e66e7bb838
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-20-d7979e9ab355d8856c2d12e66e7bb838
@@ -2,4 +2,4 @@
 51	val_3	51	val_30
 52	val_4	NULL	NULL
 53	val_5	NULL	NULL
-49	val_10	NULL	NULL
\ No newline at end of file
+49	val_10	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-22-df6bdfe3c5a7927687f4d1fbf0c9c25b b/sql/hive/src/test/resources/golden/smb_mapjoin_1-22-df6bdfe3c5a7927687f4d1fbf0c9c25b
index 6197cf72c3454..ba7969b99d79f 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-22-df6bdfe3c5a7927687f4d1fbf0c9c25b
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-22-df6bdfe3c5a7927687f4d1fbf0c9c25b
@@ -1,4 +1,4 @@
 NULL	NULL	50	val_20
 NULL	NULL	50	val_23
 NULL	NULL	50	val_25
-51	val_3	51	val_30
\ No newline at end of file
+51	val_3	51	val_30
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-24-558e9ab6324f8082231b2fdd0e12f7ae b/sql/hive/src/test/resources/golden/smb_mapjoin_1-24-558e9ab6324f8082231b2fdd0e12f7ae
index d650d44f42404..ad8b511265e20 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-24-558e9ab6324f8082231b2fdd0e12f7ae
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-24-558e9ab6324f8082231b2fdd0e12f7ae
@@ -5,4 +5,4 @@ NULL	NULL	50	val_23
 NULL	NULL	50	val_25
 51	val_3	51	val_30
 52	val_4	NULL	NULL
-53	val_5	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-46-4999db9eb86d9455c1d75e97b052f279 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-3-bd7036a4c0b57349a588b974ffaa502
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-46-4999db9eb86d9455c1d75e97b052f279
rename to sql/hive/src/test/resources/golden/smb_mapjoin_1-3-bd7036a4c0b57349a588b974ffaa502
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-3-d0e31f8ed366038ca6f4f4955d2bc796 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-3-d0e31f8ed366038ca6f4f4955d2bc796
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-47-fecb9f2203aeb2ac4b693a97badde6fa b/sql/hive/src/test/resources/golden/smb_mapjoin_1-4-22ace1b9a0302d2b8a4aa57a2c2f6423
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-47-fecb9f2203aeb2ac4b693a97badde6fa
rename to sql/hive/src/test/resources/golden/smb_mapjoin_1-4-22ace1b9a0302d2b8a4aa57a2c2f6423
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-4-3af09654f8d38d21b5d26e6cc21210de b/sql/hive/src/test/resources/golden/smb_mapjoin_1-4-3af09654f8d38d21b5d26e6cc21210de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-5-35ab67c91c53dc725f2eab0fb8c9e62 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-5-35ab67c91c53dc725f2eab0fb8c9e62
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-48-f0f18d5fa2824735799edc4bdeb1afb2 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-5-6d835f651b099615df163be284e833de
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-48-f0f18d5fa2824735799edc4bdeb1afb2
rename to sql/hive/src/test/resources/golden/smb_mapjoin_1-5-6d835f651b099615df163be284e833de
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-6-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_1-6-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-6-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-6-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-7-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_1-7-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-7-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-7-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_1-8-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_1-8-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_1-8-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_1-8-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-5-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/smb_mapjoin_10-3-68d65d622e45f86d4a6c7d1d09ef823b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-5-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/smb_mapjoin_10-3-68d65d622e45f86d4a6c7d1d09ef823b
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_10-3-bfb76fa2eccda1c64a85ea3841202705 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-3-bfb76fa2eccda1c64a85ea3841202705
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-50-461847b174096e7a255fb07cb35ab434 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-4-d31ad2289181131982ef3e9cd8c6386e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-50-461847b174096e7a255fb07cb35ab434
rename to sql/hive/src/test/resources/golden/smb_mapjoin_10-4-d31ad2289181131982ef3e9cd8c6386e
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_10-4-d4746bf376bce0bf561450c75b83fb74 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-4-d4746bf376bce0bf561450c75b83fb74
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_10-5-f05f1ef1d08dbe6b02139fe9d0a68ed8 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-5-f05f1ef1d08dbe6b02139fe9d0a68ed8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-51-8da967e7c2210ad044ba8b08d1685065 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-5-f7fb003fa65cadcd0b13cbdd7b355988
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-51-8da967e7c2210ad044ba8b08d1685065
rename to sql/hive/src/test/resources/golden/smb_mapjoin_10-5-f7fb003fa65cadcd0b13cbdd7b355988
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_10-6-1094dbf800159e1e2382ec238b2466d7 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-6-1094dbf800159e1e2382ec238b2466d7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-53-fdc295aaefba56548a22dfcddc2a94f2 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-6-14b8b2e10032ab2d4a0e7a18979cdb59
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-53-fdc295aaefba56548a22dfcddc2a94f2
rename to sql/hive/src/test/resources/golden/smb_mapjoin_10-6-14b8b2e10032ab2d4a0e7a18979cdb59
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_10-7-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_10-7-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_10-7-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_10-7-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_10-8-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_10-8-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_10-8-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_10-8-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_10-9-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_10-9-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_10-9-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_10-9-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_11-0-3b0f76816be2c1b18a2058027a19bc9f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-0-3b0f76816be2c1b18a2058027a19bc9f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-1-86473a0498e4361e4db0b4a22f2e8571
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-1-86473a0498e4361e4db0b4a22f2e8571
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-10-3d92573eecd22525a75464ad27b1dcaa b/sql/hive/src/test/resources/golden/smb_mapjoin_11-10-3d92573eecd22525a75464ad27b1dcaa
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-11-685ec4fbbf73330e026fba9b8cc53e92 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-11-685ec4fbbf73330e026fba9b8cc53e92
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-11-685ec4fbbf73330e026fba9b8cc53e92
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-12-c05c09243793df14546e8577ee369d58 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-12-c05c09243793df14546e8577ee369d58
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-12-c05c09243793df14546e8577ee369d58
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-13-1e0f21b664c1940d10531b3025be7f10 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-13-1e0f21b664c1940d10531b3025be7f10
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-14-f13be826d8f11be64c5414d591f70fd6 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-14-f13be826d8f11be64c5414d591f70fd6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-15-b62714cb184523454314d19949dba9f4 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-15-b62714cb184523454314d19949dba9f4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-16-bee943a85cd82bd199b089fbdd7450af b/sql/hive/src/test/resources/golden/smb_mapjoin_11-16-bee943a85cd82bd199b089fbdd7450af
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-16-bee943a85cd82bd199b089fbdd7450af
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-2-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_11-2-b89ea2173180c8ae423d856f943e061f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-2-b89ea2173180c8ae423d856f943e061f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-3-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_11-3-7b4ad215fc2e75c71c6614a2b6322e8e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-3-7b4ad215fc2e75c71c6614a2b6322e8e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-4-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-4-365488a703b0640acda73a7d7e6efa06
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-4-365488a703b0640acda73a7d7e6efa06
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-5-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-5-7cccbdffc32975f8935eeba14a28147
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-5-7cccbdffc32975f8935eeba14a28147
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-6-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-6-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-6-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-7-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_11-7-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_11-7-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-8-beae7266b997c97798631b9dc558534f b/sql/hive/src/test/resources/golden/smb_mapjoin_11-8-beae7266b997c97798631b9dc558534f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_11-9-74936eafc274242beb49bc83d7a8af30 b/sql/hive/src/test/resources/golden/smb_mapjoin_11-9-74936eafc274242beb49bc83d7a8af30
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_12-0-3b0f76816be2c1b18a2058027a19bc9f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-0-3b0f76816be2c1b18a2058027a19bc9f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-1-86473a0498e4361e4db0b4a22f2e8571
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-1-86473a0498e4361e4db0b4a22f2e8571
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-10-bc8140f238cfd13ea4fc4c4173a05454 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-10-bc8140f238cfd13ea4fc4c4173a05454
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-11-685ec4fbbf73330e026fba9b8cc53e92 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-11-685ec4fbbf73330e026fba9b8cc53e92
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-11-685ec4fbbf73330e026fba9b8cc53e92
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-12-c05c09243793df14546e8577ee369d58 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-12-c05c09243793df14546e8577ee369d58
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-12-c05c09243793df14546e8577ee369d58
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-13-7e7645d5ee7d79991d8fdde072c8dbb b/sql/hive/src/test/resources/golden/smb_mapjoin_12-13-7e7645d5ee7d79991d8fdde072c8dbb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-14-e9715c66355e9bc54155c79a4e82b34f b/sql/hive/src/test/resources/golden/smb_mapjoin_12-14-e9715c66355e9bc54155c79a4e82b34f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-15-42b623410c408e09153a773db91c0334 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-15-42b623410c408e09153a773db91c0334
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-16-bee943a85cd82bd199b089fbdd7450af b/sql/hive/src/test/resources/golden/smb_mapjoin_12-16-bee943a85cd82bd199b089fbdd7450af
deleted file mode 100644
index 8975db9a05036..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-16-bee943a85cd82bd199b089fbdd7450af
+++ /dev/null
@@ -1 +0,0 @@
-293
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-17-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_12-17-3b0f76816be2c1b18a2058027a19bc9f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-17-3b0f76816be2c1b18a2058027a19bc9f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-18-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-18-86473a0498e4361e4db0b4a22f2e8571
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-18-86473a0498e4361e4db0b4a22f2e8571
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-19-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_12-19-b89ea2173180c8ae423d856f943e061f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-19-b89ea2173180c8ae423d856f943e061f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-2-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_12-2-b89ea2173180c8ae423d856f943e061f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-2-b89ea2173180c8ae423d856f943e061f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-20-1fc1f40252a7e9d2ad5133f358b71f6b b/sql/hive/src/test/resources/golden/smb_mapjoin_12-20-1fc1f40252a7e9d2ad5133f358b71f6b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-21-3814ec07d1b074eb0fc44e8f77d1f40e b/sql/hive/src/test/resources/golden/smb_mapjoin_12-21-3814ec07d1b074eb0fc44e8f77d1f40e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-22-b1e1754efd667aa801b194985d41eb6e b/sql/hive/src/test/resources/golden/smb_mapjoin_12-22-b1e1754efd667aa801b194985d41eb6e
deleted file mode 100644
index 8975db9a05036..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-22-b1e1754efd667aa801b194985d41eb6e
+++ /dev/null
@@ -1 +0,0 @@
-293
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-3-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_12-3-7b4ad215fc2e75c71c6614a2b6322e8e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-3-7b4ad215fc2e75c71c6614a2b6322e8e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-4-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-4-365488a703b0640acda73a7d7e6efa06
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-4-365488a703b0640acda73a7d7e6efa06
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-5-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-5-7cccbdffc32975f8935eeba14a28147
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-5-7cccbdffc32975f8935eeba14a28147
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-6-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-6-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-6-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-7-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_12-7-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_12-7-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-8-3d016b9a5b0143b7a01b34472b569fb9 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-8-3d016b9a5b0143b7a01b34472b569fb9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_12-9-74936eafc274242beb49bc83d7a8af30 b/sql/hive/src/test/resources/golden/smb_mapjoin_12-9-74936eafc274242beb49bc83d7a8af30
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_13-0-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-0-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-0-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_13-1-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-1-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-1-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-14-b92cb43f66838319f5d607c431fe1eb3 b/sql/hive/src/test/resources/golden/smb_mapjoin_13-14-b92cb43f66838319f5d607c431fe1eb3
index b828077157966..9f4c46e548d04 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-14-b92cb43f66838319f5d607c431fe1eb3
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-14-b92cb43f66838319f5d607c431fe1eb3
@@ -7,4 +7,4 @@
 0	val_0	0	val_0
 0	val_0	0	val_0
 0	val_0	0	val_0
-2	val_2	2	val_2
\ No newline at end of file
+2	val_2	2	val_2
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-2-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_13-2-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-2-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-2-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-3-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_13-3-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-3-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-3-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-4-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_13-4-365488a703b0640acda73a7d7e6efa06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-4-365488a703b0640acda73a7d7e6efa06
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-4-365488a703b0640acda73a7d7e6efa06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-5-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_13-5-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-5-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-5-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-6-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_13-6-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-6-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-6-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_13-7-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_13-7-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_13-7-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_13-7-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-11-23d2ee09b01309b9cfcd0d625afc535d b/sql/hive/src/test/resources/golden/smb_mapjoin_14-11-23d2ee09b01309b9cfcd0d625afc535d
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-11-23d2ee09b01309b9cfcd0d625afc535d
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-11-23d2ee09b01309b9cfcd0d625afc535d
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-13-95a09a0af2a77ac6b772c41a0f6a885a b/sql/hive/src/test/resources/golden/smb_mapjoin_14-13-95a09a0af2a77ac6b772c41a0f6a885a
index 4a9735f855f96..ec7496a567609 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-13-95a09a0af2a77ac6b772c41a0f6a885a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-13-95a09a0af2a77ac6b772c41a0f6a885a
@@ -3,4 +3,4 @@
 4	1
 5	9
 8	1
-9	1
\ No newline at end of file
+9	1
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-15-e0f20915e07acf5ddfdbde40ef924e55 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-15-e0f20915e07acf5ddfdbde40ef924e55
index 62f9457511f87..1e8b314962144 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-15-e0f20915e07acf5ddfdbde40ef924e55
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-15-e0f20915e07acf5ddfdbde40ef924e55
@@ -1 +1 @@
-6
\ No newline at end of file
+6
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-17-5983d1e12e5a2bdd0f41549110e066ee b/sql/hive/src/test/resources/golden/smb_mapjoin_14-17-5983d1e12e5a2bdd0f41549110e066ee
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-17-5983d1e12e5a2bdd0f41549110e066ee
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-17-5983d1e12e5a2bdd0f41549110e066ee
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-19-163af33279f8d08d747a00ffd1cdfac7 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-19-163af33279f8d08d747a00ffd1cdfac7
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-19-163af33279f8d08d747a00ffd1cdfac7
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-19-163af33279f8d08d747a00ffd1cdfac7
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-21-b6c3a3f68f212a966450286e23f59713 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-21-b6c3a3f68f212a966450286e23f59713
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-21-b6c3a3f68f212a966450286e23f59713
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-21-b6c3a3f68f212a966450286e23f59713
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-23-941d6ef1aaf1b2f16cf9b55eaea49068 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-23-941d6ef1aaf1b2f16cf9b55eaea49068
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-23-941d6ef1aaf1b2f16cf9b55eaea49068
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-23-941d6ef1aaf1b2f16cf9b55eaea49068
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-25-23f011143d8db18dd0f3d684adf7b8e b/sql/hive/src/test/resources/golden/smb_mapjoin_14-25-23f011143d8db18dd0f3d684adf7b8e
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-25-23f011143d8db18dd0f3d684adf7b8e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-25-23f011143d8db18dd0f3d684adf7b8e
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-27-ba437062dd6661fc2fdcd41514711093 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-27-ba437062dd6661fc2fdcd41514711093
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-27-ba437062dd6661fc2fdcd41514711093
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-27-ba437062dd6661fc2fdcd41514711093
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-29-d191c9ace84072ef430d7ed36ea8181b b/sql/hive/src/test/resources/golden/smb_mapjoin_14-29-d191c9ace84072ef430d7ed36ea8181b
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-29-d191c9ace84072ef430d7ed36ea8181b
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-29-d191c9ace84072ef430d7ed36ea8181b
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-31-fde2ad19052435bd8c155f18fe579070 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-31-fde2ad19052435bd8c155f18fe579070
index 2ebc6516c7df1..f6b91e0e1f8dd 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-31-fde2ad19052435bd8c155f18fe579070
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-31-fde2ad19052435bd8c155f18fe579070
@@ -1 +1 @@
-56
\ No newline at end of file
+56
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-33-fb532dd5af8cfe6b2af5c4752a3b0a44 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-33-fb532dd5af8cfe6b2af5c4752a3b0a44
index 2edeafb09db00..209e3ef4b6247 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-33-fb532dd5af8cfe6b2af5c4752a3b0a44
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-33-fb532dd5af8cfe6b2af5c4752a3b0a44
@@ -1 +1 @@
-20
\ No newline at end of file
+20
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-7-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_14-7-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-7-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-7-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-8-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_14-8-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-8-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-8-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_14-9-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_14-9-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_14-9-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_14-9-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_15-0-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-0-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-0-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_15-1-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-1-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-1-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-12-b5a588fb78fda8e3c41bbc4d973c1a7a b/sql/hive/src/test/resources/golden/smb_mapjoin_15-12-b5a588fb78fda8e3c41bbc4d973c1a7a
index b828077157966..9f4c46e548d04 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-12-b5a588fb78fda8e3c41bbc4d973c1a7a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-12-b5a588fb78fda8e3c41bbc4d973c1a7a
@@ -7,4 +7,4 @@
 0	val_0	0	val_0
 0	val_0	0	val_0
 0	val_0	0	val_0
-2	val_2	2	val_2
\ No newline at end of file
+2	val_2	2	val_2
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-19-f49ef85423bb1766362f05651c9bb37f b/sql/hive/src/test/resources/golden/smb_mapjoin_15-19-f49ef85423bb1766362f05651c9bb37f
index 471d725e7bfa3..46d384b9dfb9b 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-19-f49ef85423bb1766362f05651c9bb37f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-19-f49ef85423bb1766362f05651c9bb37f
@@ -7,4 +7,4 @@
 0	0	val_0	0	0	val_0
 0	0	val_0	0	0	val_0
 0	0	val_0	0	0	val_0
-2	2	val_2	2	2	val_2
\ No newline at end of file
+2	2	val_2	2	2	val_2
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-2-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_15-2-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-2-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-2-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-21-af3880637379684acd440830c2361f6e b/sql/hive/src/test/resources/golden/smb_mapjoin_15-21-af3880637379684acd440830c2361f6e
index 471d725e7bfa3..46d384b9dfb9b 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-21-af3880637379684acd440830c2361f6e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-21-af3880637379684acd440830c2361f6e
@@ -7,4 +7,4 @@
 0	0	val_0	0	0	val_0
 0	0	val_0	0	0	val_0
 0	0	val_0	0	0	val_0
-2	2	val_2	2	2	val_2
\ No newline at end of file
+2	2	val_2	2	2	val_2
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-23-e5e54dd17b69773812af376bfec33200 b/sql/hive/src/test/resources/golden/smb_mapjoin_15-23-e5e54dd17b69773812af376bfec33200
index 471d725e7bfa3..46d384b9dfb9b 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-23-e5e54dd17b69773812af376bfec33200
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-23-e5e54dd17b69773812af376bfec33200
@@ -7,4 +7,4 @@
 0	0	val_0	0	0	val_0
 0	0	val_0	0	0	val_0
 0	0	val_0	0	0	val_0
-2	2	val_2	2	2	val_2
\ No newline at end of file
+2	2	val_2	2	2	val_2
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-3-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_15-3-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-3-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-3-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-4-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_15-4-365488a703b0640acda73a7d7e6efa06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-4-365488a703b0640acda73a7d7e6efa06
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-4-365488a703b0640acda73a7d7e6efa06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-5-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_15-5-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-5-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-5-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-6-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_15-6-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-6-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-6-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_15-7-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_15-7-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_15-7-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_15-7-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_16-0-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-0-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-0-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_16-1-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-1-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-1-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-12-32f3716e22149e3d0c1f3ac26d414baf b/sql/hive/src/test/resources/golden/smb_mapjoin_16-12-32f3716e22149e3d0c1f3ac26d414baf
index 67d892c80f493..1f3d8a7a1fc08 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-12-32f3716e22149e3d0c1f3ac26d414baf
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-12-32f3716e22149e3d0c1f3ac26d414baf
@@ -1 +1 @@
-1028
\ No newline at end of file
+1028
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-2-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_16-2-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-2-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-2-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-3-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_16-3-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-3-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-3-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-4-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_16-4-365488a703b0640acda73a7d7e6efa06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-4-365488a703b0640acda73a7d7e6efa06
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-4-365488a703b0640acda73a7d7e6efa06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-5-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_16-5-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-5-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-5-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-6-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_16-6-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-6-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-6-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_16-7-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_16-7-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_16-7-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_16-7-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_17-0-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-0-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-0-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_17-1-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-1-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-1-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-2-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_17-2-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-2-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-2-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-25-f066907fca3448b27aab623d05258a9a b/sql/hive/src/test/resources/golden/smb_mapjoin_17-25-f066907fca3448b27aab623d05258a9a
index 25398d9017c7f..a84e60c846ab2 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-25-f066907fca3448b27aab623d05258a9a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-25-f066907fca3448b27aab623d05258a9a
@@ -1 +1 @@
-4378
\ No newline at end of file
+4378
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-26-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/smb_mapjoin_17-26-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-26-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-26-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-27-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/smb_mapjoin_17-27-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-27-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-27-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-29-65d3ae14a785f319658812b51e4620a3 b/sql/hive/src/test/resources/golden/smb_mapjoin_17-29-65d3ae14a785f319658812b51e4620a3
index 25398d9017c7f..a84e60c846ab2 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-29-65d3ae14a785f319658812b51e4620a3
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-29-65d3ae14a785f319658812b51e4620a3
@@ -1 +1 @@
-4378
\ No newline at end of file
+4378
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-3-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_17-3-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-3-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-3-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-31-783fc1b07f117cd027395cf1c09149bc b/sql/hive/src/test/resources/golden/smb_mapjoin_17-31-783fc1b07f117cd027395cf1c09149bc
index 09b5b315bcf45..e0fa4e2d8601d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-31-783fc1b07f117cd027395cf1c09149bc
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-31-783fc1b07f117cd027395cf1c09149bc
@@ -1 +1 @@
-13126
\ No newline at end of file
+13126
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-4-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_17-4-365488a703b0640acda73a7d7e6efa06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-4-365488a703b0640acda73a7d7e6efa06
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-4-365488a703b0640acda73a7d7e6efa06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-5-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_17-5-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-5-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-5-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-6-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_17-6-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-6-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-6-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_17-7-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_17-7-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_17-7-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_17-7-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_18-0-3b0f76816be2c1b18a2058027a19bc9f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-0-3b0f76816be2c1b18a2058027a19bc9f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-1-86473a0498e4361e4db0b4a22f2e8571
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-1-86473a0498e4361e4db0b4a22f2e8571
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-10-75e2e1eb0f45f4fad3e1ce24517dd81e b/sql/hive/src/test/resources/golden/smb_mapjoin_18-10-75e2e1eb0f45f4fad3e1ce24517dd81e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-11-fb15bd6eceb333302535c0bcdd2d078f b/sql/hive/src/test/resources/golden/smb_mapjoin_18-11-fb15bd6eceb333302535c0bcdd2d078f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-12-27762aa3d846e8f50d97350eaa7563a1 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-12-27762aa3d846e8f50d97350eaa7563a1
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-12-27762aa3d846e8f50d97350eaa7563a1
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-13-91f2af0da94e5a83601d02045980f556 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-13-91f2af0da94e5a83601d02045980f556
deleted file mode 100644
index 34251f6b242e7..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-13-91f2af0da94e5a83601d02045980f556
+++ /dev/null
@@ -1 +0,0 @@
-247
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-14-1d0b88d6e6f84e485a05c712dd185531 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-14-1d0b88d6e6f84e485a05c712dd185531
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-14-1d0b88d6e6f84e485a05c712dd185531
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-15-21a6e09c24697160bf70cb17254ff504 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-15-21a6e09c24697160bf70cb17254ff504
deleted file mode 100644
index 34251f6b242e7..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-15-21a6e09c24697160bf70cb17254ff504
+++ /dev/null
@@ -1 +0,0 @@
-247
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-16-35dfd037075aac5a9891bf99ea01e156 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-16-35dfd037075aac5a9891bf99ea01e156
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-16-35dfd037075aac5a9891bf99ea01e156
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-17-6bea38b0ee0cccb3dfe7fe47c7c3e9c4 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-17-6bea38b0ee0cccb3dfe7fe47c7c3e9c4
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-17-6bea38b0ee0cccb3dfe7fe47c7c3e9c4
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-18-ba77d1d4a1754489e2a8d5006b0af54d b/sql/hive/src/test/resources/golden/smb_mapjoin_18-18-ba77d1d4a1754489e2a8d5006b0af54d
deleted file mode 100644
index 34251f6b242e7..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-18-ba77d1d4a1754489e2a8d5006b0af54d
+++ /dev/null
@@ -1 +0,0 @@
-247
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-19-da6eb8ded1f72f518617339f58202cc5 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-19-da6eb8ded1f72f518617339f58202cc5
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-19-da6eb8ded1f72f518617339f58202cc5
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-2-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_18-2-7b4ad215fc2e75c71c6614a2b6322e8e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-2-7b4ad215fc2e75c71c6614a2b6322e8e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-20-940ff79d8f3b401a05b19b9417824d7a b/sql/hive/src/test/resources/golden/smb_mapjoin_18-20-940ff79d8f3b401a05b19b9417824d7a
deleted file mode 100644
index 34251f6b242e7..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-20-940ff79d8f3b401a05b19b9417824d7a
+++ /dev/null
@@ -1 +0,0 @@
-247
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-21-7ce17310f9fd362e2cc8a80211063264 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-21-7ce17310f9fd362e2cc8a80211063264
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-21-7ce17310f9fd362e2cc8a80211063264
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-22-a92f50cba6740abb3596c885a8157861 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-22-a92f50cba6740abb3596c885a8157861
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-23-f0675c53cddf75b012f64e797f5824c8 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-23-f0675c53cddf75b012f64e797f5824c8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-24-a919b505c34a237ead34eea40b7d136c b/sql/hive/src/test/resources/golden/smb_mapjoin_18-24-a919b505c34a237ead34eea40b7d136c
deleted file mode 100644
index d8263ee986059..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-24-a919b505c34a237ead34eea40b7d136c
+++ /dev/null
@@ -1 +0,0 @@
-2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-25-d014ae435859316a1ad43548b72ecb7 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-25-d014ae435859316a1ad43548b72ecb7
deleted file mode 100644
index d8263ee986059..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-25-d014ae435859316a1ad43548b72ecb7
+++ /dev/null
@@ -1 +0,0 @@
-2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-26-52d3bbbbef7c580a324d991f52f44e1f b/sql/hive/src/test/resources/golden/smb_mapjoin_18-26-52d3bbbbef7c580a324d991f52f44e1f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-26-52d3bbbbef7c580a324d991f52f44e1f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-27-6c5e30c84cb539cbf689a0d4cb4ed0e3 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-27-6c5e30c84cb539cbf689a0d4cb4ed0e3
deleted file mode 100644
index d8263ee986059..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-27-6c5e30c84cb539cbf689a0d4cb4ed0e3
+++ /dev/null
@@ -1 +0,0 @@
-2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-28-c83e75a3e18e68ef10d9970b3b8857ab b/sql/hive/src/test/resources/golden/smb_mapjoin_18-28-c83e75a3e18e68ef10d9970b3b8857ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-28-c83e75a3e18e68ef10d9970b3b8857ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-29-ecbfca5624b26a8eaa468a6bf46e3189 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-29-ecbfca5624b26a8eaa468a6bf46e3189
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-3-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-3-365488a703b0640acda73a7d7e6efa06
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-3-365488a703b0640acda73a7d7e6efa06
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-30-76e467313085467a3aa929b3665f9863 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-30-76e467313085467a3aa929b3665f9863
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-31-7f1e9114f4db63982985068c4bf36a29 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-31-7f1e9114f4db63982985068c4bf36a29
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-31-7f1e9114f4db63982985068c4bf36a29
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-32-b59c406dae57fa6ab656a27e310ac54c b/sql/hive/src/test/resources/golden/smb_mapjoin_18-32-b59c406dae57fa6ab656a27e310ac54c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-32-b59c406dae57fa6ab656a27e310ac54c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-33-95b42e61bf5eed67123e30d482e0c7fe b/sql/hive/src/test/resources/golden/smb_mapjoin_18-33-95b42e61bf5eed67123e30d482e0c7fe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-33-95b42e61bf5eed67123e30d482e0c7fe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-34-14ae716c8c0f47db61b89a2a17e89415 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-34-14ae716c8c0f47db61b89a2a17e89415
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-34-14ae716c8c0f47db61b89a2a17e89415
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-35-549b3df453bf939e1c0a38df861245e5 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-35-549b3df453bf939e1c0a38df861245e5
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-35-549b3df453bf939e1c0a38df861245e5
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-4-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-4-7cccbdffc32975f8935eeba14a28147
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-4-7cccbdffc32975f8935eeba14a28147
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-5-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-5-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-5-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-6-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_18-6-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_18-6-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-7-c248759cecf0e2c223579f5e37f6669c b/sql/hive/src/test/resources/golden/smb_mapjoin_18-7-c248759cecf0e2c223579f5e37f6669c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-8-724d37bd4a841f1fa4062f4f3e3eb353 b/sql/hive/src/test/resources/golden/smb_mapjoin_18-8-724d37bd4a841f1fa4062f4f3e3eb353
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_18-9-c0c82db5bd80edc57f6b3cb0e807f2ec b/sql/hive/src/test/resources/golden/smb_mapjoin_18-9-c0c82db5bd80edc57f6b3cb0e807f2ec
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_19-0-3b0f76816be2c1b18a2058027a19bc9f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-0-3b0f76816be2c1b18a2058027a19bc9f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-1-86473a0498e4361e4db0b4a22f2e8571
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-1-86473a0498e4361e4db0b4a22f2e8571
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-10-75e2e1eb0f45f4fad3e1ce24517dd81e b/sql/hive/src/test/resources/golden/smb_mapjoin_19-10-75e2e1eb0f45f4fad3e1ce24517dd81e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-11-fb15bd6eceb333302535c0bcdd2d078f b/sql/hive/src/test/resources/golden/smb_mapjoin_19-11-fb15bd6eceb333302535c0bcdd2d078f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-12-27762aa3d846e8f50d97350eaa7563a1 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-12-27762aa3d846e8f50d97350eaa7563a1
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-12-27762aa3d846e8f50d97350eaa7563a1
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-13-4876f6e3c0ffde24afd82ec462962f19 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-13-4876f6e3c0ffde24afd82ec462962f19
deleted file mode 100644
index dce6588ca1420..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-13-4876f6e3c0ffde24afd82ec462962f19
+++ /dev/null
@@ -1 +0,0 @@
-36
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-14-cfc24b330d7c6a11ac2e4f2ea17b3f06 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-14-cfc24b330d7c6a11ac2e4f2ea17b3f06
deleted file mode 100644
index 86ee83a4a2686..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-14-cfc24b330d7c6a11ac2e4f2ea17b3f06
+++ /dev/null
@@ -1 +0,0 @@
-40
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-15-44ad799a82d847054f02d37139cc6aab b/sql/hive/src/test/resources/golden/smb_mapjoin_19-15-44ad799a82d847054f02d37139cc6aab
deleted file mode 100644
index d99e90eb9675f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-15-44ad799a82d847054f02d37139cc6aab
+++ /dev/null
@@ -1 +0,0 @@
-29
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-16-8ee972ce0d73f28e98f46361082c83dc b/sql/hive/src/test/resources/golden/smb_mapjoin_19-16-8ee972ce0d73f28e98f46361082c83dc
deleted file mode 100644
index dce6588ca1420..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-16-8ee972ce0d73f28e98f46361082c83dc
+++ /dev/null
@@ -1 +0,0 @@
-36
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-17-af5c2dab8b5e2cb53b58fdeee911b7ec b/sql/hive/src/test/resources/golden/smb_mapjoin_19-17-af5c2dab8b5e2cb53b58fdeee911b7ec
deleted file mode 100644
index 86ee83a4a2686..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-17-af5c2dab8b5e2cb53b58fdeee911b7ec
+++ /dev/null
@@ -1 +0,0 @@
-40
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-18-b2546caccb501fca356930e6cd00ea2e b/sql/hive/src/test/resources/golden/smb_mapjoin_19-18-b2546caccb501fca356930e6cd00ea2e
deleted file mode 100644
index d99e90eb9675f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-18-b2546caccb501fca356930e6cd00ea2e
+++ /dev/null
@@ -1 +0,0 @@
-29
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-19-6bea38b0ee0cccb3dfe7fe47c7c3e9c4 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-19-6bea38b0ee0cccb3dfe7fe47c7c3e9c4
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-19-6bea38b0ee0cccb3dfe7fe47c7c3e9c4
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-2-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_19-2-7b4ad215fc2e75c71c6614a2b6322e8e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-2-7b4ad215fc2e75c71c6614a2b6322e8e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-20-bceffa38b796fbc2a85daac23357da7b b/sql/hive/src/test/resources/golden/smb_mapjoin_19-20-bceffa38b796fbc2a85daac23357da7b
deleted file mode 100644
index dce6588ca1420..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-20-bceffa38b796fbc2a85daac23357da7b
+++ /dev/null
@@ -1 +0,0 @@
-36
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-21-a1f4332461d1810334e7ae6d7d43f829 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-21-a1f4332461d1810334e7ae6d7d43f829
deleted file mode 100644
index 86ee83a4a2686..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-21-a1f4332461d1810334e7ae6d7d43f829
+++ /dev/null
@@ -1 +0,0 @@
-40
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-22-ebd323090f771227039cd21b1b8c4f3f b/sql/hive/src/test/resources/golden/smb_mapjoin_19-22-ebd323090f771227039cd21b1b8c4f3f
deleted file mode 100644
index d99e90eb9675f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-22-ebd323090f771227039cd21b1b8c4f3f
+++ /dev/null
@@ -1 +0,0 @@
-29
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-23-90f0bad0606e63e4405eac9ec8c11deb b/sql/hive/src/test/resources/golden/smb_mapjoin_19-23-90f0bad0606e63e4405eac9ec8c11deb
deleted file mode 100644
index dce6588ca1420..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-23-90f0bad0606e63e4405eac9ec8c11deb
+++ /dev/null
@@ -1 +0,0 @@
-36
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-24-3ae0277bb7a74cd6bee704313dc102e6 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-24-3ae0277bb7a74cd6bee704313dc102e6
deleted file mode 100644
index 86ee83a4a2686..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-24-3ae0277bb7a74cd6bee704313dc102e6
+++ /dev/null
@@ -1 +0,0 @@
-40
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-25-e621350131d50867015e75677cca031f b/sql/hive/src/test/resources/golden/smb_mapjoin_19-25-e621350131d50867015e75677cca031f
deleted file mode 100644
index d99e90eb9675f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-25-e621350131d50867015e75677cca031f
+++ /dev/null
@@ -1 +0,0 @@
-29
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-3-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-3-365488a703b0640acda73a7d7e6efa06
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-3-365488a703b0640acda73a7d7e6efa06
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-4-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-4-7cccbdffc32975f8935eeba14a28147
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-4-7cccbdffc32975f8935eeba14a28147
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-5-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-5-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-5-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-6-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_19-6-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_19-6-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-7-64b3fd0c215f4b8187866fa7eb55d34d b/sql/hive/src/test/resources/golden/smb_mapjoin_19-7-64b3fd0c215f4b8187866fa7eb55d34d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-8-ee04de92100063f9b2063baddb204864 b/sql/hive/src/test/resources/golden/smb_mapjoin_19-8-ee04de92100063f9b2063baddb204864
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_19-9-c0c82db5bd80edc57f6b3cb0e807f2ec b/sql/hive/src/test/resources/golden/smb_mapjoin_19-9-c0c82db5bd80edc57f6b3cb0e807f2ec
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-10-1530d7565a22ace89ed16e0e1f9988ac b/sql/hive/src/test/resources/golden/smb_mapjoin_2-10-1530d7565a22ace89ed16e0e1f9988ac
index 7b5974818c085..4482f7ff91c62 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-10-1530d7565a22ace89ed16e0e1f9988ac
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-10-1530d7565a22ace89ed16e0e1f9988ac
@@ -4,4 +4,4 @@
 49	val_1	49	val_17
 49	val_10	49	val_17
 49	val_1	49	val_19
-49	val_10	49	val_19
\ No newline at end of file
+49	val_10	49	val_19
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-12-48e7d8fcb2a4c31c5304045517a3bb89 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-12-48e7d8fcb2a4c31c5304045517a3bb89
index 0e1d132524064..cdacc0434caa7 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-12-48e7d8fcb2a4c31c5304045517a3bb89
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-12-48e7d8fcb2a4c31c5304045517a3bb89
@@ -6,4 +6,4 @@
 53	val_5	NULL	NULL
 49	val_10	49	val_10
 49	val_10	49	val_17
-49	val_10	49	val_19
\ No newline at end of file
+49	val_10	49	val_19
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-14-3e7d435c7a5560abe539918adc4fa922 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-14-3e7d435c7a5560abe539918adc4fa922
index 32be455fba8c4..37d71f5b522c6 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-14-3e7d435c7a5560abe539918adc4fa922
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-14-3e7d435c7a5560abe539918adc4fa922
@@ -6,4 +6,4 @@
 49	val_1	49	val_19
 49	val_10	49	val_19
 NULL	NULL	50	val_20
-NULL	NULL	50	val_23
\ No newline at end of file
+NULL	NULL	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-16-daeabb554f29b77f3c5ff7acff8c58ca b/sql/hive/src/test/resources/golden/smb_mapjoin_2-16-daeabb554f29b77f3c5ff7acff8c58ca
index 2496c4400b0b9..364a70c242916 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-16-daeabb554f29b77f3c5ff7acff8c58ca
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-16-daeabb554f29b77f3c5ff7acff8c58ca
@@ -8,4 +8,4 @@ NULL	NULL	50	val_20
 NULL	NULL	50	val_23
 51	val_3	NULL	NULL
 52	val_4	52	val_4
-53	val_5	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-18-411bc909c1c29811d184d1f4aceb25b3 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-18-411bc909c1c29811d184d1f4aceb25b3
index 7b5974818c085..4482f7ff91c62 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-18-411bc909c1c29811d184d1f4aceb25b3
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-18-411bc909c1c29811d184d1f4aceb25b3
@@ -4,4 +4,4 @@
 49	val_1	49	val_17
 49	val_10	49	val_17
 49	val_1	49	val_19
-49	val_10	49	val_19
\ No newline at end of file
+49	val_10	49	val_19
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-20-43bc9e7e9e4d1bb1f42e7911bd79ea4b b/sql/hive/src/test/resources/golden/smb_mapjoin_2-20-43bc9e7e9e4d1bb1f42e7911bd79ea4b
index 0e1d132524064..cdacc0434caa7 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-20-43bc9e7e9e4d1bb1f42e7911bd79ea4b
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-20-43bc9e7e9e4d1bb1f42e7911bd79ea4b
@@ -6,4 +6,4 @@
 53	val_5	NULL	NULL
 49	val_10	49	val_10
 49	val_10	49	val_17
-49	val_10	49	val_19
\ No newline at end of file
+49	val_10	49	val_19
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-22-de0b3ef8ef1c5902908dadd06042b84 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-22-de0b3ef8ef1c5902908dadd06042b84
index 32be455fba8c4..37d71f5b522c6 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-22-de0b3ef8ef1c5902908dadd06042b84
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-22-de0b3ef8ef1c5902908dadd06042b84
@@ -6,4 +6,4 @@
 49	val_1	49	val_19
 49	val_10	49	val_19
 NULL	NULL	50	val_20
-NULL	NULL	50	val_23
\ No newline at end of file
+NULL	NULL	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-24-709966d157a75ffb1c6ef70d7c72a498 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-24-709966d157a75ffb1c6ef70d7c72a498
index 2496c4400b0b9..364a70c242916 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-24-709966d157a75ffb1c6ef70d7c72a498
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-24-709966d157a75ffb1c6ef70d7c72a498
@@ -8,4 +8,4 @@ NULL	NULL	50	val_20
 NULL	NULL	50	val_23
 51	val_3	NULL	NULL
 52	val_4	52	val_4
-53	val_5	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-54-74bfe3fbf9d68a75013fba1c3c7bbd7c b/sql/hive/src/test/resources/golden/smb_mapjoin_2-3-bd7036a4c0b57349a588b974ffaa502
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-54-74bfe3fbf9d68a75013fba1c3c7bbd7c
rename to sql/hive/src/test/resources/golden/smb_mapjoin_2-3-bd7036a4c0b57349a588b974ffaa502
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-3-d0e31f8ed366038ca6f4f4955d2bc796 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-3-d0e31f8ed366038ca6f4f4955d2bc796
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-55-1013d1ad014aa203b1dce26085b09c01 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-4-22ace1b9a0302d2b8a4aa57a2c2f6423
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-55-1013d1ad014aa203b1dce26085b09c01
rename to sql/hive/src/test/resources/golden/smb_mapjoin_2-4-22ace1b9a0302d2b8a4aa57a2c2f6423
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-4-3af09654f8d38d21b5d26e6cc21210de b/sql/hive/src/test/resources/golden/smb_mapjoin_2-4-3af09654f8d38d21b5d26e6cc21210de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-5-35ab67c91c53dc725f2eab0fb8c9e62 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-5-35ab67c91c53dc725f2eab0fb8c9e62
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-57-825135844e8ac6d8d5254cc961ec8fd0 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-5-6d835f651b099615df163be284e833de
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-57-825135844e8ac6d8d5254cc961ec8fd0
rename to sql/hive/src/test/resources/golden/smb_mapjoin_2-5-6d835f651b099615df163be284e833de
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-6-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_2-6-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-6-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-6-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-7-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_2-7-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-7-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-7-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_2-8-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_2-8-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_2-8-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_2-8-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_20-0-3b0f76816be2c1b18a2058027a19bc9f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-0-3b0f76816be2c1b18a2058027a19bc9f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-1-86473a0498e4361e4db0b4a22f2e8571
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-1-86473a0498e4361e4db0b4a22f2e8571
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-10-82b3bc9314fa0cdb7fa59c58f22fb598 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-10-82b3bc9314fa0cdb7fa59c58f22fb598
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-11-63ba770ebeff6032b68ba1aabbc1bee8 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-11-63ba770ebeff6032b68ba1aabbc1bee8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-12-6bea38b0ee0cccb3dfe7fe47c7c3e9c4 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-12-6bea38b0ee0cccb3dfe7fe47c7c3e9c4
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-12-6bea38b0ee0cccb3dfe7fe47c7c3e9c4
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-13-ba77d1d4a1754489e2a8d5006b0af54d b/sql/hive/src/test/resources/golden/smb_mapjoin_20-13-ba77d1d4a1754489e2a8d5006b0af54d
deleted file mode 100644
index b6e27607fb529..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-13-ba77d1d4a1754489e2a8d5006b0af54d
+++ /dev/null
@@ -1 +0,0 @@
-242
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-14-da6eb8ded1f72f518617339f58202cc5 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-14-da6eb8ded1f72f518617339f58202cc5
deleted file mode 100644
index ce83bd94b3310..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-14-da6eb8ded1f72f518617339f58202cc5
+++ /dev/null
@@ -1 +0,0 @@
-258
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-15-5acb0ec2e1abcc6d57de7529f414a75a b/sql/hive/src/test/resources/golden/smb_mapjoin_20-15-5acb0ec2e1abcc6d57de7529f414a75a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-16-5b9583aecebb7480d778dc2a8605434a b/sql/hive/src/test/resources/golden/smb_mapjoin_20-16-5b9583aecebb7480d778dc2a8605434a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-17-e26f212ca34d303036299ba709f65522 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-17-e26f212ca34d303036299ba709f65522
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-18-7fb8778a58cc27dc04d5947df15b250e b/sql/hive/src/test/resources/golden/smb_mapjoin_20-18-7fb8778a58cc27dc04d5947df15b250e
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-18-7fb8778a58cc27dc04d5947df15b250e
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-19-6dd859e98c140df728f858a7a7598462 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-19-6dd859e98c140df728f858a7a7598462
deleted file mode 100644
index 34251f6b242e7..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-19-6dd859e98c140df728f858a7a7598462
+++ /dev/null
@@ -1 +0,0 @@
-247
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-2-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_20-2-7b4ad215fc2e75c71c6614a2b6322e8e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-2-7b4ad215fc2e75c71c6614a2b6322e8e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-20-fca1f7361c63f0ba1e9d365cf0743845 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-20-fca1f7361c63f0ba1e9d365cf0743845
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-20-fca1f7361c63f0ba1e9d365cf0743845
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-21-28d04e54c0a986079ae9d52788846519 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-21-28d04e54c0a986079ae9d52788846519
deleted file mode 100644
index 34251f6b242e7..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-21-28d04e54c0a986079ae9d52788846519
+++ /dev/null
@@ -1 +0,0 @@
-247
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-22-1baa93dd15ab33240255c5d6d5d57366 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-22-1baa93dd15ab33240255c5d6d5d57366
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-22-1baa93dd15ab33240255c5d6d5d57366
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-23-5c5eaf9922e7110c6d7260c738b17457 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-23-5c5eaf9922e7110c6d7260c738b17457
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-3-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-3-365488a703b0640acda73a7d7e6efa06
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-3-365488a703b0640acda73a7d7e6efa06
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-4-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-4-7cccbdffc32975f8935eeba14a28147
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-4-7cccbdffc32975f8935eeba14a28147
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-5-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-5-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-5-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-6-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_20-6-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_20-6-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-7-c9981ec081cbf54c9323e0dee977934 b/sql/hive/src/test/resources/golden/smb_mapjoin_20-7-c9981ec081cbf54c9323e0dee977934
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-8-5c9994d48509136efd1dcb4e3f3e7aff b/sql/hive/src/test/resources/golden/smb_mapjoin_20-8-5c9994d48509136efd1dcb4e3f3e7aff
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_20-9-c0c82db5bd80edc57f6b3cb0e807f2ec b/sql/hive/src/test/resources/golden/smb_mapjoin_20-9-c0c82db5bd80edc57f6b3cb0e807f2ec
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_21-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_21-0-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_21-0-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_21-0-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_21-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_21-1-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_21-1-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_21-1-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_21-2-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_21-2-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_21-2-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_21-2-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_21-3-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_21-3-365488a703b0640acda73a7d7e6efa06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_21-3-365488a703b0640acda73a7d7e6efa06
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_21-3-365488a703b0640acda73a7d7e6efa06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_21-4-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_21-4-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_21-4-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_21-4-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_21-5-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_21-5-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_21-5-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_21-5-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_21-6-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_21-6-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_21-6-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_21-6-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-0-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_22-0-3b0f76816be2c1b18a2058027a19bc9f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-0-3b0f76816be2c1b18a2058027a19bc9f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-1-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-1-86473a0498e4361e4db0b4a22f2e8571
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-1-86473a0498e4361e4db0b4a22f2e8571
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-10-f0def0d77c93f6faebeca9b7a5340dbc b/sql/hive/src/test/resources/golden/smb_mapjoin_22-10-f0def0d77c93f6faebeca9b7a5340dbc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-11-b3e577f3099b5e4acffdd050f4c7f4dc b/sql/hive/src/test/resources/golden/smb_mapjoin_22-11-b3e577f3099b5e4acffdd050f4c7f4dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-12-187c201f61c23833d0d193031926445a b/sql/hive/src/test/resources/golden/smb_mapjoin_22-12-187c201f61c23833d0d193031926445a
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-12-187c201f61c23833d0d193031926445a
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-13-5c5c521954e5ade621fd0cbff5794c80 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-13-5c5c521954e5ade621fd0cbff5794c80
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-13-5c5c521954e5ade621fd0cbff5794c80
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-14-b2534937758d2ff0a08e729f7b3ace4 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-14-b2534937758d2ff0a08e729f7b3ace4
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-14-b2534937758d2ff0a08e729f7b3ace4
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-15-83d9e3d23d6612a926e53d57a5d07307 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-15-83d9e3d23d6612a926e53d57a5d07307
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-15-83d9e3d23d6612a926e53d57a5d07307
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-16-abc95b36345d3e37abb196088897c7fe b/sql/hive/src/test/resources/golden/smb_mapjoin_22-16-abc95b36345d3e37abb196088897c7fe
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-17-720e78ad8ffc6620cc89a7e03bea3c4b b/sql/hive/src/test/resources/golden/smb_mapjoin_22-17-720e78ad8ffc6620cc89a7e03bea3c4b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-18-2c29fc18b24368938f880a1bf3d5eb54 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-18-2c29fc18b24368938f880a1bf3d5eb54
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-19-5dc91a74bf4c43e20a8a213ad08c352e b/sql/hive/src/test/resources/golden/smb_mapjoin_22-19-5dc91a74bf4c43e20a8a213ad08c352e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-2-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_22-2-7b4ad215fc2e75c71c6614a2b6322e8e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-2-7b4ad215fc2e75c71c6614a2b6322e8e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-20-ac918fa1a8e75031a0cc3eef0c9b77ad b/sql/hive/src/test/resources/golden/smb_mapjoin_22-20-ac918fa1a8e75031a0cc3eef0c9b77ad
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-21-f0def0d77c93f6faebeca9b7a5340dbc b/sql/hive/src/test/resources/golden/smb_mapjoin_22-21-f0def0d77c93f6faebeca9b7a5340dbc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-22-b3e577f3099b5e4acffdd050f4c7f4dc b/sql/hive/src/test/resources/golden/smb_mapjoin_22-22-b3e577f3099b5e4acffdd050f4c7f4dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-23-187c201f61c23833d0d193031926445a b/sql/hive/src/test/resources/golden/smb_mapjoin_22-23-187c201f61c23833d0d193031926445a
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-23-187c201f61c23833d0d193031926445a
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-24-5c5c521954e5ade621fd0cbff5794c80 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-24-5c5c521954e5ade621fd0cbff5794c80
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-24-5c5c521954e5ade621fd0cbff5794c80
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-25-b2534937758d2ff0a08e729f7b3ace4 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-25-b2534937758d2ff0a08e729f7b3ace4
deleted file mode 100644
index eb1f49486af7c..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-25-b2534937758d2ff0a08e729f7b3ace4
+++ /dev/null
@@ -1 +0,0 @@
-500
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-26-83d9e3d23d6612a926e53d57a5d07307 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-26-83d9e3d23d6612a926e53d57a5d07307
deleted file mode 100644
index 99bc3d518639f..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-26-83d9e3d23d6612a926e53d57a5d07307
+++ /dev/null
@@ -1 +0,0 @@
-253
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-3-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-3-365488a703b0640acda73a7d7e6efa06
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-3-365488a703b0640acda73a7d7e6efa06
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-4-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-4-7cccbdffc32975f8935eeba14a28147
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-4-7cccbdffc32975f8935eeba14a28147
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-5-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/smb_mapjoin_22-5-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-5-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-6-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/smb_mapjoin_22-6-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_22-6-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-7-8317b719ffcf85da8c618e8f2379a31b b/sql/hive/src/test/resources/golden/smb_mapjoin_22-7-8317b719ffcf85da8c618e8f2379a31b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-8-8cfa26d100b454c0b0f3443f62389abb b/sql/hive/src/test/resources/golden/smb_mapjoin_22-8-8cfa26d100b454c0b0f3443f62389abb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_22-9-ac918fa1a8e75031a0cc3eef0c9b77ad b/sql/hive/src/test/resources/golden/smb_mapjoin_22-9-ac918fa1a8e75031a0cc3eef0c9b77ad
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-0-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/smb_mapjoin_25-0-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-0-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-0-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-1-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-1-365488a703b0640acda73a7d7e6efa06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-1-365488a703b0640acda73a7d7e6efa06
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-1-365488a703b0640acda73a7d7e6efa06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-58-e671e63f6b70094048563a9c33748c97 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-10-bd7036a4c0b57349a588b974ffaa502
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-58-e671e63f6b70094048563a9c33748c97
rename to sql/hive/src/test/resources/golden/smb_mapjoin_25-10-bd7036a4c0b57349a588b974ffaa502
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-10-d0e31f8ed366038ca6f4f4955d2bc796 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-10-d0e31f8ed366038ca6f4f4955d2bc796
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-6-b6c452a800ff333aacb863bb3243c15b b/sql/hive/src/test/resources/golden/smb_mapjoin_25-11-22ace1b9a0302d2b8a4aa57a2c2f6423
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-6-b6c452a800ff333aacb863bb3243c15b
rename to sql/hive/src/test/resources/golden/smb_mapjoin_25-11-22ace1b9a0302d2b8a4aa57a2c2f6423
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-11-3af09654f8d38d21b5d26e6cc21210de b/sql/hive/src/test/resources/golden/smb_mapjoin_25-11-3af09654f8d38d21b5d26e6cc21210de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-12-35ab67c91c53dc725f2eab0fb8c9e62 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-12-35ab67c91c53dc725f2eab0fb8c9e62
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-60-51824b04f2a008f63e1469695e60d9c8 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-12-6d835f651b099615df163be284e833de
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-60-51824b04f2a008f63e1469695e60d9c8
rename to sql/hive/src/test/resources/golden/smb_mapjoin_25-12-6d835f651b099615df163be284e833de
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-14-c23ea191ee4d60c0a6252ce763b1beed b/sql/hive/src/test/resources/golden/smb_mapjoin_25-14-c23ea191ee4d60c0a6252ce763b1beed
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-14-c23ea191ee4d60c0a6252ce763b1beed
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-14-c23ea191ee4d60c0a6252ce763b1beed
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-15-442e69416faaea9309bb8c2a3eb73ef b/sql/hive/src/test/resources/golden/smb_mapjoin_25-15-442e69416faaea9309bb8c2a3eb73ef
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-15-442e69416faaea9309bb8c2a3eb73ef
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-15-442e69416faaea9309bb8c2a3eb73ef
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-16-beaea10543cfd753458b43d8aeb7571f b/sql/hive/src/test/resources/golden/smb_mapjoin_25-16-beaea10543cfd753458b43d8aeb7571f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-16-beaea10543cfd753458b43d8aeb7571f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-16-beaea10543cfd753458b43d8aeb7571f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-17-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-17-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-17-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-17-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-18-d0ec6d66ff349db09fd455eec149efdb b/sql/hive/src/test/resources/golden/smb_mapjoin_25-18-d0ec6d66ff349db09fd455eec149efdb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-18-d0ec6d66ff349db09fd455eec149efdb
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-18-d0ec6d66ff349db09fd455eec149efdb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-19-16112d7ada2ccc6f4a3b5d627410cb01 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-19-16112d7ada2ccc6f4a3b5d627410cb01
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-19-16112d7ada2ccc6f4a3b5d627410cb01
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-19-16112d7ada2ccc6f4a3b5d627410cb01
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-2-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/smb_mapjoin_25-2-16367c381d4b189b3640c92511244bfe
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-2-16367c381d4b189b3640c92511244bfe
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-2-16367c381d4b189b3640c92511244bfe
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-20-44d382ce6848d3f0b900b0808747d8e9 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-20-44d382ce6848d3f0b900b0808747d8e9
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-20-44d382ce6848d3f0b900b0808747d8e9
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-20-44d382ce6848d3f0b900b0808747d8e9
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-21-4d3e60a0e9bd8c12fdba4e010493537d b/sql/hive/src/test/resources/golden/smb_mapjoin_25-21-4d3e60a0e9bd8c12fdba4e010493537d
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-21-4d3e60a0e9bd8c12fdba4e010493537d
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-21-4d3e60a0e9bd8c12fdba4e010493537d
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-22-d0a93f40892e3894460553b443c77428 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-22-d0a93f40892e3894460553b443c77428
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-22-d0a93f40892e3894460553b443c77428
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-22-d0a93f40892e3894460553b443c77428
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-23-ae45f6382268c1035c11aa4fc8a23e2 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-23-ae45f6382268c1035c11aa4fc8a23e2
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-23-ae45f6382268c1035c11aa4fc8a23e2
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-23-ae45f6382268c1035c11aa4fc8a23e2
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-24-d5ad76f9178cb787cee037f25b19b270 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-24-d5ad76f9178cb787cee037f25b19b270
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-24-d5ad76f9178cb787cee037f25b19b270
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-24-d5ad76f9178cb787cee037f25b19b270
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-3-bcde511b8d560ca83888265b96a781ff b/sql/hive/src/test/resources/golden/smb_mapjoin_25-3-bcde511b8d560ca83888265b96a781ff
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-3-bcde511b8d560ca83888265b96a781ff
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-3-bcde511b8d560ca83888265b96a781ff
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-4-8067b7ab260021bc70af9ab47309ee35 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-4-8067b7ab260021bc70af9ab47309ee35
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-4-8067b7ab260021bc70af9ab47309ee35
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-4-8067b7ab260021bc70af9ab47309ee35
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-5-545c0ea2ebd7a141526ee13059a857f6 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-5-545c0ea2ebd7a141526ee13059a857f6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-5-545c0ea2ebd7a141526ee13059a857f6
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-5-545c0ea2ebd7a141526ee13059a857f6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_25-6-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/smb_mapjoin_25-6-dbcec232623048c7748b708123e18bf0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_25-6-dbcec232623048c7748b708123e18bf0
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_25-6-dbcec232623048c7748b708123e18bf0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-10-efadb45f09b92d27233601394d53d79 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-10-efadb45f09b92d27233601394d53d79
index 2c05a72679b22..8aa583680ba51 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-10-efadb45f09b92d27233601394d53d79
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-10-efadb45f09b92d27233601394d53d79
@@ -3,4 +3,4 @@
 50	val_25	50	val_20
 50	val_20	50	val_23
 50	val_23	50	val_23
-50	val_25	50	val_23
\ No newline at end of file
+50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-12-77988b41de4b5a165c93942fbb2220d6 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-12-77988b41de4b5a165c93942fbb2220d6
index 0d9bce83ea87c..8f8addd11eabf 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-12-77988b41de4b5a165c93942fbb2220d6
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-12-77988b41de4b5a165c93942fbb2220d6
@@ -4,4 +4,4 @@
 50	val_23	50	val_23
 50	val_25	50	val_20
 50	val_25	50	val_23
-51	val_30	NULL	NULL
\ No newline at end of file
+51	val_30	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-14-5456a3a43bfabcfdbb57257b633e299e b/sql/hive/src/test/resources/golden/smb_mapjoin_3-14-5456a3a43bfabcfdbb57257b633e299e
index d4c1adc92802d..c94cb185c6199 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-14-5456a3a43bfabcfdbb57257b633e299e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-14-5456a3a43bfabcfdbb57257b633e299e
@@ -7,4 +7,4 @@ NULL	NULL	49	val_19
 50	val_25	50	val_20
 50	val_20	50	val_23
 50	val_23	50	val_23
-50	val_25	50	val_23
\ No newline at end of file
+50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-16-fff1acc77150f6ea92fe8eefc04b079a b/sql/hive/src/test/resources/golden/smb_mapjoin_3-16-fff1acc77150f6ea92fe8eefc04b079a
index b62eec8583c92..fb499272e90c5 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-16-fff1acc77150f6ea92fe8eefc04b079a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-16-fff1acc77150f6ea92fe8eefc04b079a
@@ -8,4 +8,4 @@ NULL	NULL	49	val_19
 50	val_25	50	val_20
 50	val_25	50	val_23
 51	val_30	NULL	NULL
-NULL	NULL	52	val_4
\ No newline at end of file
+NULL	NULL	52	val_4
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-18-96a658e934543dd77020ad264ec9b8c1 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-18-96a658e934543dd77020ad264ec9b8c1
index 2c05a72679b22..8aa583680ba51 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-18-96a658e934543dd77020ad264ec9b8c1
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-18-96a658e934543dd77020ad264ec9b8c1
@@ -3,4 +3,4 @@
 50	val_25	50	val_20
 50	val_20	50	val_23
 50	val_23	50	val_23
-50	val_25	50	val_23
\ No newline at end of file
+50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-20-867845ed2cb38b55928f5310b4ae91bd b/sql/hive/src/test/resources/golden/smb_mapjoin_3-20-867845ed2cb38b55928f5310b4ae91bd
index 0d9bce83ea87c..8f8addd11eabf 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-20-867845ed2cb38b55928f5310b4ae91bd
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-20-867845ed2cb38b55928f5310b4ae91bd
@@ -4,4 +4,4 @@
 50	val_23	50	val_23
 50	val_25	50	val_20
 50	val_25	50	val_23
-51	val_30	NULL	NULL
\ No newline at end of file
+51	val_30	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-22-5826f60da3a4b0c731c53340d50b0a06 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-22-5826f60da3a4b0c731c53340d50b0a06
index d4c1adc92802d..c94cb185c6199 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-22-5826f60da3a4b0c731c53340d50b0a06
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-22-5826f60da3a4b0c731c53340d50b0a06
@@ -7,4 +7,4 @@ NULL	NULL	49	val_19
 50	val_25	50	val_20
 50	val_20	50	val_23
 50	val_23	50	val_23
-50	val_25	50	val_23
\ No newline at end of file
+50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-24-c73176ea199d4d20934cf2659d42ec25 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-24-c73176ea199d4d20934cf2659d42ec25
index b62eec8583c92..fb499272e90c5 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-24-c73176ea199d4d20934cf2659d42ec25
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-24-c73176ea199d4d20934cf2659d42ec25
@@ -8,4 +8,4 @@ NULL	NULL	49	val_19
 50	val_25	50	val_20
 50	val_25	50	val_23
 51	val_30	NULL	NULL
-NULL	NULL	52	val_4
\ No newline at end of file
+NULL	NULL	52	val_4
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-61-18b0757f6d9e29808061ca3763b8b6d9 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-3-bd7036a4c0b57349a588b974ffaa502
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-61-18b0757f6d9e29808061ca3763b8b6d9
rename to sql/hive/src/test/resources/golden/smb_mapjoin_3-3-bd7036a4c0b57349a588b974ffaa502
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-3-d0e31f8ed366038ca6f4f4955d2bc796 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-3-d0e31f8ed366038ca6f4f4955d2bc796
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-66-b4c5b3eeb74085711866a2eec27bcb37 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-4-22ace1b9a0302d2b8a4aa57a2c2f6423
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-66-b4c5b3eeb74085711866a2eec27bcb37
rename to sql/hive/src/test/resources/golden/smb_mapjoin_3-4-22ace1b9a0302d2b8a4aa57a2c2f6423
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-4-3af09654f8d38d21b5d26e6cc21210de b/sql/hive/src/test/resources/golden/smb_mapjoin_3-4-3af09654f8d38d21b5d26e6cc21210de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-5-35ab67c91c53dc725f2eab0fb8c9e62 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-5-35ab67c91c53dc725f2eab0fb8c9e62
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-67-b4fec0996399be2239961594897d6715 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-5-6d835f651b099615df163be284e833de
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-67-b4fec0996399be2239961594897d6715
rename to sql/hive/src/test/resources/golden/smb_mapjoin_3-5-6d835f651b099615df163be284e833de
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-6-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_3-6-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-6-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-6-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-7-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_3-7-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-7-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-7-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_3-8-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_3-8-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_3-8-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_3-8-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-14-a6efb153fe9be91c7055a5642e0f642d b/sql/hive/src/test/resources/golden/smb_mapjoin_4-14-a6efb153fe9be91c7055a5642e0f642d
index 46af2ffd2b620..31c2549443bb1 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-14-a6efb153fe9be91c7055a5642e0f642d
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-14-a6efb153fe9be91c7055a5642e0f642d
@@ -2,4 +2,4 @@
 51	val_3	51	val_30	NULL	NULL
 52	val_4	NULL	NULL	NULL	NULL
 53	val_5	NULL	NULL	NULL	NULL
-49	val_10	NULL	NULL	NULL	NULL
\ No newline at end of file
+49	val_10	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-16-514034935af5348f67f1934cf5429d57 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-16-514034935af5348f67f1934cf5429d57
index 9047baeb236e5..fa5860b62f611 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-16-514034935af5348f67f1934cf5429d57
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-16-514034935af5348f67f1934cf5429d57
@@ -3,4 +3,4 @@ NULL	NULL	NULL	NULL	49	val_10
 NULL	NULL	NULL	NULL	49	val_17
 NULL	NULL	NULL	NULL	49	val_19
 NULL	NULL	NULL	NULL	50	val_20
-NULL	NULL	NULL	NULL	50	val_23
\ No newline at end of file
+NULL	NULL	NULL	NULL	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-18-7f8bfb8ce6c1825708c37daa826fe5 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-18-7f8bfb8ce6c1825708c37daa826fe5
index 47626758b4182..00e10b37e2fa1 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-18-7f8bfb8ce6c1825708c37daa826fe5
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-18-7f8bfb8ce6c1825708c37daa826fe5
@@ -8,4 +8,4 @@ NULL	NULL	NULL	NULL	50	val_23
 51	val_3	51	val_30	NULL	NULL
 52	val_4	NULL	NULL	NULL	NULL
 NULL	NULL	NULL	NULL	52	val_4
-53	val_5	NULL	NULL	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-20-be9350a00a523e1758427a1c8e4bdf09 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-20-be9350a00a523e1758427a1c8e4bdf09
index 29616d6368661..84486137a37e4 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-20-be9350a00a523e1758427a1c8e4bdf09
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-20-be9350a00a523e1758427a1c8e4bdf09
@@ -3,4 +3,4 @@ NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_20	50	val_23
 NULL	NULL	50	val_23	50	val_23
-NULL	NULL	50	val_25	50	val_23
\ No newline at end of file
+NULL	NULL	50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-22-da023084bbab6727dc3823cfce500308 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-22-da023084bbab6727dc3823cfce500308
index 6f90cbf839656..e2c6bab43530d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-22-da023084bbab6727dc3823cfce500308
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-22-da023084bbab6727dc3823cfce500308
@@ -4,4 +4,4 @@ NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_23	50	val_23
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_25	50	val_23
-51	val_3	51	val_30	NULL	NULL
\ No newline at end of file
+51	val_3	51	val_30	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-24-7783ab33a26d29a2c0235f52f0e4ad86 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-24-7783ab33a26d29a2c0235f52f0e4ad86
index 3b195f9dff935..8ea9abf42f678 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-24-7783ab33a26d29a2c0235f52f0e4ad86
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-24-7783ab33a26d29a2c0235f52f0e4ad86
@@ -7,4 +7,4 @@ NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_20	50	val_23
 NULL	NULL	50	val_23	50	val_23
-NULL	NULL	50	val_25	50	val_23
\ No newline at end of file
+NULL	NULL	50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-26-de14aa74d0da48dff6271410cc5dd98e b/sql/hive/src/test/resources/golden/smb_mapjoin_4-26-de14aa74d0da48dff6271410cc5dd98e
index e18ae75d14a37..f0bda77df9202 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-26-de14aa74d0da48dff6271410cc5dd98e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-26-de14aa74d0da48dff6271410cc5dd98e
@@ -8,4 +8,4 @@ NULL	NULL	50	val_23	50	val_23
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_25	50	val_23
 51	val_3	51	val_30	NULL	NULL
-NULL	NULL	NULL	NULL	52	val_4
\ No newline at end of file
+NULL	NULL	NULL	NULL	52	val_4
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-28-f71280b4b556515d068d074378c3a54 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-28-f71280b4b556515d068d074378c3a54
index 8e51f0864314a..0c9e2268914bd 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-28-f71280b4b556515d068d074378c3a54
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-28-f71280b4b556515d068d074378c3a54
@@ -3,4 +3,4 @@ NULL	NULL	50	val_20	50	val_23
 NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_23	50	val_23
 NULL	NULL	50	val_25	50	val_20
-NULL	NULL	50	val_25	50	val_23
\ No newline at end of file
+NULL	NULL	50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-7-c0ea81b686236d661166912040a16ea7 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-3-bd7036a4c0b57349a588b974ffaa502
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-7-c0ea81b686236d661166912040a16ea7
rename to sql/hive/src/test/resources/golden/smb_mapjoin_4-3-bd7036a4c0b57349a588b974ffaa502
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-3-d0e31f8ed366038ca6f4f4955d2bc796 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-3-d0e31f8ed366038ca6f4f4955d2bc796
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-30-a0d3abda611809460bcea4101967f21f b/sql/hive/src/test/resources/golden/smb_mapjoin_4-30-a0d3abda611809460bcea4101967f21f
index 51676ca2c94e4..c9ca4763cbabe 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-30-a0d3abda611809460bcea4101967f21f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-30-a0d3abda611809460bcea4101967f21f
@@ -8,4 +8,4 @@ NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_25	50	val_23
 51	val_3	51	val_30	NULL	NULL
 52	val_4	NULL	NULL	NULL	NULL
-53	val_5	NULL	NULL	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-32-6477640b8a0a96f7a1c9290b8e71b5a8 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-32-6477640b8a0a96f7a1c9290b8e71b5a8
index 4a125291de271..812d839a1c642 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-32-6477640b8a0a96f7a1c9290b8e71b5a8
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-32-6477640b8a0a96f7a1c9290b8e71b5a8
@@ -7,4 +7,4 @@ NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_23	50	val_23
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_25	50	val_23
-NULL	NULL	NULL	NULL	52	val_4
\ No newline at end of file
+NULL	NULL	NULL	NULL	52	val_4
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-34-48869ba51ea36864e720f94b09bf6b3 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-34-48869ba51ea36864e720f94b09bf6b3
index 660e9044e889a..c019550d25827 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-34-48869ba51ea36864e720f94b09bf6b3
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-34-48869ba51ea36864e720f94b09bf6b3
@@ -12,4 +12,4 @@ NULL	NULL	50	val_25	50	val_23
 51	val_3	51	val_30	NULL	NULL
 52	val_4	NULL	NULL	NULL	NULL
 NULL	NULL	NULL	NULL	52	val_4
-53	val_5	NULL	NULL	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-70-2d5403943a3efdf9fd3eccb6927499cc b/sql/hive/src/test/resources/golden/smb_mapjoin_4-4-22ace1b9a0302d2b8a4aa57a2c2f6423
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-70-2d5403943a3efdf9fd3eccb6927499cc
rename to sql/hive/src/test/resources/golden/smb_mapjoin_4-4-22ace1b9a0302d2b8a4aa57a2c2f6423
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-4-3af09654f8d38d21b5d26e6cc21210de b/sql/hive/src/test/resources/golden/smb_mapjoin_4-4-3af09654f8d38d21b5d26e6cc21210de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-5-35ab67c91c53dc725f2eab0fb8c9e62 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-5-35ab67c91c53dc725f2eab0fb8c9e62
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-71-ca16ff548ebb9bab4b76f2e339064a9e b/sql/hive/src/test/resources/golden/smb_mapjoin_4-5-6d835f651b099615df163be284e833de
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-71-ca16ff548ebb9bab4b76f2e339064a9e
rename to sql/hive/src/test/resources/golden/smb_mapjoin_4-5-6d835f651b099615df163be284e833de
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-6-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_4-6-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-6-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-6-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-7-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_4-7-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-7-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-7-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_4-8-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_4-8-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_4-8-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_4-8-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-14-e20bcd28cfd26259bcde9cffec8d2280 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-14-e20bcd28cfd26259bcde9cffec8d2280
index 46af2ffd2b620..31c2549443bb1 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-14-e20bcd28cfd26259bcde9cffec8d2280
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-14-e20bcd28cfd26259bcde9cffec8d2280
@@ -2,4 +2,4 @@
 51	val_3	51	val_30	NULL	NULL
 52	val_4	NULL	NULL	NULL	NULL
 53	val_5	NULL	NULL	NULL	NULL
-49	val_10	NULL	NULL	NULL	NULL
\ No newline at end of file
+49	val_10	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-16-8a8dc418de3716f5112b9868f472e4b9 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-16-8a8dc418de3716f5112b9868f472e4b9
index 9047baeb236e5..fa5860b62f611 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-16-8a8dc418de3716f5112b9868f472e4b9
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-16-8a8dc418de3716f5112b9868f472e4b9
@@ -3,4 +3,4 @@ NULL	NULL	NULL	NULL	49	val_10
 NULL	NULL	NULL	NULL	49	val_17
 NULL	NULL	NULL	NULL	49	val_19
 NULL	NULL	NULL	NULL	50	val_20
-NULL	NULL	NULL	NULL	50	val_23
\ No newline at end of file
+NULL	NULL	NULL	NULL	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-18-1f0dd26e6088c4f359fb691a8ef650bc b/sql/hive/src/test/resources/golden/smb_mapjoin_5-18-1f0dd26e6088c4f359fb691a8ef650bc
index 47626758b4182..00e10b37e2fa1 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-18-1f0dd26e6088c4f359fb691a8ef650bc
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-18-1f0dd26e6088c4f359fb691a8ef650bc
@@ -8,4 +8,4 @@ NULL	NULL	NULL	NULL	50	val_23
 51	val_3	51	val_30	NULL	NULL
 52	val_4	NULL	NULL	NULL	NULL
 NULL	NULL	NULL	NULL	52	val_4
-53	val_5	NULL	NULL	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-20-18fe4e120d3c0d663c360b2504b4f88d b/sql/hive/src/test/resources/golden/smb_mapjoin_5-20-18fe4e120d3c0d663c360b2504b4f88d
index 29616d6368661..84486137a37e4 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-20-18fe4e120d3c0d663c360b2504b4f88d
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-20-18fe4e120d3c0d663c360b2504b4f88d
@@ -3,4 +3,4 @@ NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_20	50	val_23
 NULL	NULL	50	val_23	50	val_23
-NULL	NULL	50	val_25	50	val_23
\ No newline at end of file
+NULL	NULL	50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-22-6ed2620fe017e454459a83061f25951a b/sql/hive/src/test/resources/golden/smb_mapjoin_5-22-6ed2620fe017e454459a83061f25951a
index 6f90cbf839656..e2c6bab43530d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-22-6ed2620fe017e454459a83061f25951a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-22-6ed2620fe017e454459a83061f25951a
@@ -4,4 +4,4 @@ NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_23	50	val_23
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_25	50	val_23
-51	val_3	51	val_30	NULL	NULL
\ No newline at end of file
+51	val_3	51	val_30	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-24-87f758a20c15fa3b97b4ba6703ae541b b/sql/hive/src/test/resources/golden/smb_mapjoin_5-24-87f758a20c15fa3b97b4ba6703ae541b
index 3b195f9dff935..8ea9abf42f678 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-24-87f758a20c15fa3b97b4ba6703ae541b
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-24-87f758a20c15fa3b97b4ba6703ae541b
@@ -7,4 +7,4 @@ NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_20	50	val_23
 NULL	NULL	50	val_23	50	val_23
-NULL	NULL	50	val_25	50	val_23
\ No newline at end of file
+NULL	NULL	50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-26-1c1a9519e207edc3c3a927c986a37177 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-26-1c1a9519e207edc3c3a927c986a37177
index e18ae75d14a37..f0bda77df9202 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-26-1c1a9519e207edc3c3a927c986a37177
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-26-1c1a9519e207edc3c3a927c986a37177
@@ -8,4 +8,4 @@ NULL	NULL	50	val_23	50	val_23
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_25	50	val_23
 51	val_3	51	val_30	NULL	NULL
-NULL	NULL	NULL	NULL	52	val_4
\ No newline at end of file
+NULL	NULL	NULL	NULL	52	val_4
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-28-94440444fff7d2629a23a30fd778fcc7 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-28-94440444fff7d2629a23a30fd778fcc7
index 8e51f0864314a..0c9e2268914bd 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-28-94440444fff7d2629a23a30fd778fcc7
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-28-94440444fff7d2629a23a30fd778fcc7
@@ -3,4 +3,4 @@ NULL	NULL	50	val_20	50	val_23
 NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_23	50	val_23
 NULL	NULL	50	val_25	50	val_20
-NULL	NULL	50	val_25	50	val_23
\ No newline at end of file
+NULL	NULL	50	val_25	50	val_23
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-74-6f7caef1c773268350c9cf94ad85be01 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-3-bd7036a4c0b57349a588b974ffaa502
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-74-6f7caef1c773268350c9cf94ad85be01
rename to sql/hive/src/test/resources/golden/smb_mapjoin_5-3-bd7036a4c0b57349a588b974ffaa502
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-3-d0e31f8ed366038ca6f4f4955d2bc796 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-3-d0e31f8ed366038ca6f4f4955d2bc796
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-30-574d26a5179e5cebbbff5bbb425a9609 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-30-574d26a5179e5cebbbff5bbb425a9609
index 51676ca2c94e4..c9ca4763cbabe 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-30-574d26a5179e5cebbbff5bbb425a9609
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-30-574d26a5179e5cebbbff5bbb425a9609
@@ -8,4 +8,4 @@ NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_25	50	val_23
 51	val_3	51	val_30	NULL	NULL
 52	val_4	NULL	NULL	NULL	NULL
-53	val_5	NULL	NULL	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-32-17c99f827824994cfd21c40dbf4abfc b/sql/hive/src/test/resources/golden/smb_mapjoin_5-32-17c99f827824994cfd21c40dbf4abfc
index 4a125291de271..812d839a1c642 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-32-17c99f827824994cfd21c40dbf4abfc
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-32-17c99f827824994cfd21c40dbf4abfc
@@ -7,4 +7,4 @@ NULL	NULL	50	val_23	50	val_20
 NULL	NULL	50	val_23	50	val_23
 NULL	NULL	50	val_25	50	val_20
 NULL	NULL	50	val_25	50	val_23
-NULL	NULL	NULL	NULL	52	val_4
\ No newline at end of file
+NULL	NULL	NULL	NULL	52	val_4
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-34-4db871582cf4f3038d43d0a2d5ae6895 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-34-4db871582cf4f3038d43d0a2d5ae6895
index 660e9044e889a..c019550d25827 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-34-4db871582cf4f3038d43d0a2d5ae6895
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-34-4db871582cf4f3038d43d0a2d5ae6895
@@ -12,4 +12,4 @@ NULL	NULL	50	val_25	50	val_23
 51	val_3	51	val_30	NULL	NULL
 52	val_4	NULL	NULL	NULL	NULL
 NULL	NULL	NULL	NULL	52	val_4
-53	val_5	NULL	NULL	NULL	NULL
\ No newline at end of file
+53	val_5	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-75-4931c5a72a5231f67317d27ca025bb97 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-4-22ace1b9a0302d2b8a4aa57a2c2f6423
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-75-4931c5a72a5231f67317d27ca025bb97
rename to sql/hive/src/test/resources/golden/smb_mapjoin_5-4-22ace1b9a0302d2b8a4aa57a2c2f6423
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-4-3af09654f8d38d21b5d26e6cc21210de b/sql/hive/src/test/resources/golden/smb_mapjoin_5-4-3af09654f8d38d21b5d26e6cc21210de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-5-35ab67c91c53dc725f2eab0fb8c9e62 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-5-35ab67c91c53dc725f2eab0fb8c9e62
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-8-f0ee61903aeacb758e2eada242e5e14 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-5-6d835f651b099615df163be284e833de
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-8-f0ee61903aeacb758e2eada242e5e14
rename to sql/hive/src/test/resources/golden/smb_mapjoin_5-5-6d835f651b099615df163be284e833de
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-6-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_5-6-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-6-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-6-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-7-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_5-7-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-7-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-7-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_5-8-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_5-8-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_5-8-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_5-8-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/smb_mapjoin_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/smb_mapjoin_6-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-10-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_6-10-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-10-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-10-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-11-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_6-11-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-11-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-11-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-14-43123d2821871aa5b4d4a9e22e03d1ca b/sql/hive/src/test/resources/golden/smb_mapjoin_6-14-43123d2821871aa5b4d4a9e22e03d1ca
index c2e3ea8b0c8e2..b212e93a0a8c2 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-14-43123d2821871aa5b4d4a9e22e03d1ca
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-14-43123d2821871aa5b4d4a9e22e03d1ca
@@ -1025,4 +1025,4 @@
 498	val_498	498	val_498
 498	val_498	498	val_498
 498	val_498	498	val_498
-498	val_498	498	val_498
\ No newline at end of file
+498	val_498	498	val_498
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-16-449cb60e6537ba0810ea6879a7351a1e b/sql/hive/src/test/resources/golden/smb_mapjoin_6-16-449cb60e6537ba0810ea6879a7351a1e
index 0e17d179f4167..11bd621866ba8 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-16-449cb60e6537ba0810ea6879a7351a1e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-16-449cb60e6537ba0810ea6879a7351a1e
@@ -1 +1 @@
-278697	278697	101852390308	101852390308
\ No newline at end of file
+278697	278697	101852390308	101852390308
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-17-aa5f237005fb69b3f52808c80b4a276a b/sql/hive/src/test/resources/golden/smb_mapjoin_6-17-aa5f237005fb69b3f52808c80b4a276a
index 0e17d179f4167..11bd621866ba8 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-17-aa5f237005fb69b3f52808c80b4a276a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-17-aa5f237005fb69b3f52808c80b4a276a
@@ -1 +1 @@
-278697	278697	101852390308	101852390308
\ No newline at end of file
+278697	278697	101852390308	101852390308
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_6-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-21-43123d2821871aa5b4d4a9e22e03d1ca b/sql/hive/src/test/resources/golden/smb_mapjoin_6-21-43123d2821871aa5b4d4a9e22e03d1ca
index c2e3ea8b0c8e2..b212e93a0a8c2 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-21-43123d2821871aa5b4d4a9e22e03d1ca
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-21-43123d2821871aa5b4d4a9e22e03d1ca
@@ -1025,4 +1025,4 @@
 498	val_498	498	val_498
 498	val_498	498	val_498
 498	val_498	498	val_498
-498	val_498	498	val_498
\ No newline at end of file
+498	val_498	498	val_498
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-23-449cb60e6537ba0810ea6879a7351a1e b/sql/hive/src/test/resources/golden/smb_mapjoin_6-23-449cb60e6537ba0810ea6879a7351a1e
index 0e17d179f4167..11bd621866ba8 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-23-449cb60e6537ba0810ea6879a7351a1e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-23-449cb60e6537ba0810ea6879a7351a1e
@@ -1 +1 @@
-278697	278697	101852390308	101852390308
\ No newline at end of file
+278697	278697	101852390308	101852390308
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-24-aa5f237005fb69b3f52808c80b4a276a b/sql/hive/src/test/resources/golden/smb_mapjoin_6-24-aa5f237005fb69b3f52808c80b4a276a
index 0e17d179f4167..11bd621866ba8 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-24-aa5f237005fb69b3f52808c80b4a276a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-24-aa5f237005fb69b3f52808c80b4a276a
@@ -1 +1 @@
-278697	278697	101852390308	101852390308
\ No newline at end of file
+278697	278697	101852390308	101852390308
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_6-9-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_6-9-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_6-9-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_6-9-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/smb_mapjoin_7-0-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-0-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-0-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/smb_mapjoin_7-1-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-1-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-1-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-11-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_7-11-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-11-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-11-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-12-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_7-12-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-12-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-12-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-13-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_7-13-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-13-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-13-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-16-18d2a689883588ae3c24b2810663ab80 b/sql/hive/src/test/resources/golden/smb_mapjoin_7-16-18d2a689883588ae3c24b2810663ab80
index 4cd5eefea2a45..dfdc3444cc072 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-16-18d2a689883588ae3c24b2810663ab80
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-16-18d2a689883588ae3c24b2810663ab80
@@ -497,4 +497,4 @@ NULL	NULL	496	val_496
 NULL	NULL	497	val_497
 NULL	NULL	498	val_498
 NULL	NULL	498	val_498
-NULL	NULL	498	val_498
\ No newline at end of file
+NULL	NULL	498	val_498
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-19-ab46164010b27950c293f32fb89f9f8a b/sql/hive/src/test/resources/golden/smb_mapjoin_7-19-ab46164010b27950c293f32fb89f9f8a
index 4cd5eefea2a45..dfdc3444cc072 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-19-ab46164010b27950c293f32fb89f9f8a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-19-ab46164010b27950c293f32fb89f9f8a
@@ -497,4 +497,4 @@ NULL	NULL	496	val_496
 NULL	NULL	497	val_497
 NULL	NULL	498	val_498
 NULL	NULL	498	val_498
-NULL	NULL	498	val_498
\ No newline at end of file
+NULL	NULL	498	val_498
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_7-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-21-449cb60e6537ba0810ea6879a7351a1e b/sql/hive/src/test/resources/golden/smb_mapjoin_7-21-449cb60e6537ba0810ea6879a7351a1e
index ca97f45c90026..2e711200bae28 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-21-449cb60e6537ba0810ea6879a7351a1e
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-21-449cb60e6537ba0810ea6879a7351a1e
@@ -1 +1 @@
-0	130091	0	36210398070
\ No newline at end of file
+0	130091	0	36210398070
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-22-aa5f237005fb69b3f52808c80b4a276a b/sql/hive/src/test/resources/golden/smb_mapjoin_7-22-aa5f237005fb69b3f52808c80b4a276a
index ca97f45c90026..2e711200bae28 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-22-aa5f237005fb69b3f52808c80b4a276a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-22-aa5f237005fb69b3f52808c80b4a276a
@@ -1 +1 @@
-0	130091	0	36210398070
\ No newline at end of file
+0	130091	0	36210398070
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-23-1b6140f49d49eb1195c8551270b97ff b/sql/hive/src/test/resources/golden/smb_mapjoin_7-23-1b6140f49d49eb1195c8551270b97ff
index ca97f45c90026..2e711200bae28 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_7-23-1b6140f49d49eb1195c8551270b97ff
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_7-23-1b6140f49d49eb1195c8551270b97ff
@@ -1 +1 @@
-0	130091	0	36210398070
\ No newline at end of file
+0	130091	0	36210398070
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-8-5f141437e3e929cde1b2e5d8458b46eb b/sql/hive/src/test/resources/golden/smb_mapjoin_7-8-5f141437e3e929cde1b2e5d8458b46eb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_1-9-ebc7ac3b2dfdb958d161cd7c8f947a72 b/sql/hive/src/test/resources/golden/smb_mapjoin_7-8-f983875c44b290b0884a22b6be6adc8
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_1-9-ebc7ac3b2dfdb958d161cd7c8f947a72
rename to sql/hive/src/test/resources/golden/smb_mapjoin_7-8-f983875c44b290b0884a22b6be6adc8
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_7-9-461966a6ca78552b62bbbae4ff5abf1f b/sql/hive/src/test/resources/golden/smb_mapjoin_7-9-461966a6ca78552b62bbbae4ff5abf1f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-10-dc4ffd56a4be5e2c52df2f70c614720d b/sql/hive/src/test/resources/golden/smb_mapjoin_7-9-84a394d962965e38593883742cc32c0d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-10-dc4ffd56a4be5e2c52df2f70c614720d
rename to sql/hive/src/test/resources/golden/smb_mapjoin_7-9-84a394d962965e38593883742cc32c0d
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_8-0-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/smb_mapjoin_8-0-43d53504df013e6b35f81811138a167a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_8-0-43d53504df013e6b35f81811138a167a
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_8-0-43d53504df013e6b35f81811138a167a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_8-1-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/smb_mapjoin_8-1-e39f59c35ebbe686a18d45d9d8bf3ab0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_8-1-e39f59c35ebbe686a18d45d9d8bf3ab0
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_8-1-e39f59c35ebbe686a18d45d9d8bf3ab0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_8-11-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/smb_mapjoin_8-11-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_8-11-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_8-11-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_8-12-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/smb_mapjoin_8-12-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_8-12-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_8-12-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_8-13-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/smb_mapjoin_8-13-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_8-13-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_8-13-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_8-2-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/smb_mapjoin_8-2-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_8-2-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_8-2-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_8-3-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/smb_mapjoin_8-3-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/smb_mapjoin_8-3-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/smb_mapjoin_8-3-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/smb_mapjoin_8-5-a1f5562186e9e22e18ebd42208943525 b/sql/hive/src/test/resources/golden/smb_mapjoin_8-5-a1f5562186e9e22e18ebd42208943525
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-12-56f9f2a13698e71c4f00c93c48dffc30 b/sql/hive/src/test/resources/golden/smb_mapjoin_8-5-eee18fc4192a4aa92a066eb66513be93
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-12-56f9f2a13698e71c4f00c93c48dffc30
rename to sql/hive/src/test/resources/golden/smb_mapjoin_8-5-eee18fc4192a4aa92a066eb66513be93
diff --git a/sql/hive/src/test/resources/golden/sort-1-10c8b0a592ed15eff488a71ec5909f45 b/sql/hive/src/test/resources/golden/sort-1-10c8b0a592ed15eff488a71ec5909f45
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/sort-1-10c8b0a592ed15eff488a71ec5909f45
+++ b/sql/hive/src/test/resources/golden/sort-1-10c8b0a592ed15eff488a71ec5909f45
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-11-3599c6aa6da9420bdcec89c8516ffe89 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-11-3599c6aa6da9420bdcec89c8516ffe89
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-11-3599c6aa6da9420bdcec89c8516ffe89
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-11-3599c6aa6da9420bdcec89c8516ffe89
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-2-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-2-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-2-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-2-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-7-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-7-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-7-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-7-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-8-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-8-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-8-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-8-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-9-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-9-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-9-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_1-9-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-11-47bc9cb5f049aaca33b394ea78578bdd b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-11-47bc9cb5f049aaca33b394ea78578bdd
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-11-47bc9cb5f049aaca33b394ea78578bdd
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-11-47bc9cb5f049aaca33b394ea78578bdd
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-2-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-2-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-2-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-2-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-7-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-7-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-7-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-7-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-8-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-8-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-8-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-8-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-9-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-9-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-9-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_2-9-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-11-47bc9cb5f049aaca33b394ea78578bdd b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-11-47bc9cb5f049aaca33b394ea78578bdd
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-11-47bc9cb5f049aaca33b394ea78578bdd
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-11-47bc9cb5f049aaca33b394ea78578bdd
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-2-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-2-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-2-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-2-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-7-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-7-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-7-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-7-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-8-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-8-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-8-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-8-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-9-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-9-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-9-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_3-9-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-11-47bc9cb5f049aaca33b394ea78578bdd b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-11-47bc9cb5f049aaca33b394ea78578bdd
index 8fdd954df9831..2bd5a0a98a36c 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-11-47bc9cb5f049aaca33b394ea78578bdd
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-11-47bc9cb5f049aaca33b394ea78578bdd
@@ -1 +1 @@
-22
\ No newline at end of file
+22
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-2-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-2-ffe97dc8c1df3195982e38263fbe8717
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-2-ffe97dc8c1df3195982e38263fbe8717
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-2-ffe97dc8c1df3195982e38263fbe8717
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-7-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-7-3b0f76816be2c1b18a2058027a19bc9f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-7-3b0f76816be2c1b18a2058027a19bc9f
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-7-3b0f76816be2c1b18a2058027a19bc9f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-8-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-8-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-8-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-8-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-9-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-9-b89ea2173180c8ae423d856f943e061f
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-9-b89ea2173180c8ae423d856f943e061f
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_4-9-b89ea2173180c8ae423d856f943e061f
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-0-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-0-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-0-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-0-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-1-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-1-365488a703b0640acda73a7d7e6efa06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-1-365488a703b0640acda73a7d7e6efa06
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-1-365488a703b0640acda73a7d7e6efa06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-10-cd2f3c647c0821eb53e67d36b1556a4a b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-10-cd2f3c647c0821eb53e67d36b1556a4a
index 67d892c80f493..1f3d8a7a1fc08 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-10-cd2f3c647c0821eb53e67d36b1556a4a
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-10-cd2f3c647c0821eb53e67d36b1556a4a
@@ -1 +1 @@
-1028
\ No newline at end of file
+1028
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-7-c23ea191ee4d60c0a6252ce763b1beed b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-7-c23ea191ee4d60c0a6252ce763b1beed
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-7-c23ea191ee4d60c0a6252ce763b1beed
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-7-c23ea191ee4d60c0a6252ce763b1beed
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-8-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-8-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-8-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_5-8-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-0-7b4ad215fc2e75c71c6614a2b6322e8e b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-0-7b4ad215fc2e75c71c6614a2b6322e8e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-0-7b4ad215fc2e75c71c6614a2b6322e8e
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-0-7b4ad215fc2e75c71c6614a2b6322e8e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-1-365488a703b0640acda73a7d7e6efa06 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-1-365488a703b0640acda73a7d7e6efa06
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-1-365488a703b0640acda73a7d7e6efa06
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-1-365488a703b0640acda73a7d7e6efa06
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-11-cd2f3c647c0821eb53e67d36b1556a4a b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-11-cd2f3c647c0821eb53e67d36b1556a4a
index 67d892c80f493..1f3d8a7a1fc08 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-11-cd2f3c647c0821eb53e67d36b1556a4a
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-11-cd2f3c647c0821eb53e67d36b1556a4a
@@ -1 +1 @@
-1028
\ No newline at end of file
+1028
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-2-7cccbdffc32975f8935eeba14a28147
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-2-7cccbdffc32975f8935eeba14a28147
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-2-7cccbdffc32975f8935eeba14a28147
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-8-c23ea191ee4d60c0a6252ce763b1beed b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-8-c23ea191ee4d60c0a6252ce763b1beed
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-8-c23ea191ee4d60c0a6252ce763b1beed
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-8-c23ea191ee4d60c0a6252ce763b1beed
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-9-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-9-86473a0498e4361e4db0b4a22f2e8571
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-9-86473a0498e4361e4db0b4a22f2e8571
+++ b/sql/hive/src/test/resources/golden/sort_merge_join_desc_6-9-86473a0498e4361e4db0b4a22f2e8571
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats0-14-4f18f4b06db06844920b14e2d19471a9 b/sql/hive/src/test/resources/golden/stats0-14-4f18f4b06db06844920b14e2d19471a9
index 688eea009d292..d3a7b34f283b1 100644
--- a/sql/hive/src/test/resources/golden/stats0-14-4f18f4b06db06844920b14e2d19471a9
+++ b/sql/hive/src/test/resources/golden/stats0-14-4f18f4b06db06844920b14e2d19471a9
@@ -1,10 +1,10 @@
-key                 	string              	None                
-value               	string              	None                
-ds                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[1], dbName:default, tableName:stats_partitioned, createTime:1390903702, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/stats_partitioned/ds=1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1390903702, numRows=500, totalSize=5812, rawDataSize=5312})	
+Detailed Partition Information	Partition(values:[1], dbName:default, tableName:stats_partitioned, createTime:1413893941, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/stats_partitioned/ds=1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413893941, COLUMN_STATS_ACCURATE=true, totalSize=5812, numRows=500, rawDataSize=5312})	
diff --git a/sql/hive/src/test/resources/golden/stats0-15-4a7ed5b7e3deb4106204f8f950357e1c b/sql/hive/src/test/resources/golden/stats0-15-4a7ed5b7e3deb4106204f8f950357e1c
index 4d3a3d8f237b3..965913df89c64 100644
--- a/sql/hive/src/test/resources/golden/stats0-15-4a7ed5b7e3deb4106204f8f950357e1c
+++ b/sql/hive/src/test/resources/golden/stats0-15-4a7ed5b7e3deb4106204f8f950357e1c
@@ -1,10 +1,10 @@
-key                 	string              	None                
-value               	string              	None                
-ds                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:stats_partitioned, dbName:default, owner:marmbrus, createTime:1390903694, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/stats_partitioned, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{numPartitions=1, numFiles=1, p3=v3, transient_lastDdlTime=1390903702, numRows=500, totalSize=5812, rawDataSize=5312}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:stats_partitioned, dbName:default, owner:marmbrus, createTime:1413893934, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/stats_partitioned, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{transient_lastDdlTime=1413893934}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/stats0-23-247568f4b3ce6b97979ca9d28c2ba05c b/sql/hive/src/test/resources/golden/stats0-23-247568f4b3ce6b97979ca9d28c2ba05c
index a18ab93992cc5..b34342d7815e1 100644
--- a/sql/hive/src/test/resources/golden/stats0-23-247568f4b3ce6b97979ca9d28c2ba05c
+++ b/sql/hive/src/test/resources/golden/stats0-23-247568f4b3ce6b97979ca9d28c2ba05c
@@ -1,4 +1,4 @@
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:stats_non_partitioned, dbName:default, owner:marmbrus, createTime:1390903702, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/stats_non_partitioned, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numPartitions=0, numFiles=1, p3=v3, transient_lastDdlTime=1390903710, numRows=500, totalSize=5812, rawDataSize=5312}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:stats_non_partitioned, dbName:default, owner:marmbrus, createTime:1413893941, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/stats_non_partitioned, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=1, transient_lastDdlTime=1413893948, COLUMN_STATS_ACCURATE=true, totalSize=5812, numRows=500, rawDataSize=5312}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/stats0-30-4f18f4b06db06844920b14e2d19471a9 b/sql/hive/src/test/resources/golden/stats0-30-4f18f4b06db06844920b14e2d19471a9
index d861060f13cae..c1e56cd3180cb 100644
--- a/sql/hive/src/test/resources/golden/stats0-30-4f18f4b06db06844920b14e2d19471a9
+++ b/sql/hive/src/test/resources/golden/stats0-30-4f18f4b06db06844920b14e2d19471a9
@@ -1,10 +1,10 @@
-key                 	string              	None                
-value               	string              	None                
-ds                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Partition Information	Partition(values:[1], dbName:default, tableName:stats_partitioned, createTime:1390903719, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/stats_partitioned/ds=1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1390903719, numRows=500, totalSize=5812, rawDataSize=5312})	
+Detailed Partition Information	Partition(values:[1], dbName:default, tableName:stats_partitioned, createTime:1413893955, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/stats_partitioned/ds=1, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{numFiles=1, transient_lastDdlTime=1413893955, COLUMN_STATS_ACCURATE=true, totalSize=5812, numRows=500, rawDataSize=5312})	
diff --git a/sql/hive/src/test/resources/golden/stats0-31-4a7ed5b7e3deb4106204f8f950357e1c b/sql/hive/src/test/resources/golden/stats0-31-4a7ed5b7e3deb4106204f8f950357e1c
index aa4dfe1e5ba39..25d227999c390 100644
--- a/sql/hive/src/test/resources/golden/stats0-31-4a7ed5b7e3deb4106204f8f950357e1c
+++ b/sql/hive/src/test/resources/golden/stats0-31-4a7ed5b7e3deb4106204f8f950357e1c
@@ -1,10 +1,10 @@
-key                 	string              	None                
-value               	string              	None                
-ds                  	string              	None                
+key                 	string              	                    
+value               	string              	                    
+ds                  	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-ds                  	string              	None                
+ds                  	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:stats_partitioned, dbName:default, owner:marmbrus, createTime:1390903710, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/stats_partitioned, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{numPartitions=1, numFiles=1, p3=v3, transient_lastDdlTime=1390903719, numRows=500, totalSize=5812, rawDataSize=5312}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:stats_partitioned, dbName:default, owner:marmbrus, createTime:1413893948, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/stats_partitioned, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{transient_lastDdlTime=1413893948}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/stats0-7-247568f4b3ce6b97979ca9d28c2ba05c b/sql/hive/src/test/resources/golden/stats0-7-247568f4b3ce6b97979ca9d28c2ba05c
index 8a5ae7cda1c57..8b503b045f6ec 100644
--- a/sql/hive/src/test/resources/golden/stats0-7-247568f4b3ce6b97979ca9d28c2ba05c
+++ b/sql/hive/src/test/resources/golden/stats0-7-247568f4b3ce6b97979ca9d28c2ba05c
@@ -1,4 +1,4 @@
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
 	 	 
-Detailed Table Information	Table(tableName:stats_non_partitioned, dbName:default, owner:marmbrus, createTime:1390903686, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/stats_non_partitioned, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numPartitions=0, numFiles=1, p3=v3, transient_lastDdlTime=1390903694, numRows=500, totalSize=5812, rawDataSize=5312}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
+Detailed Table Information	Table(tableName:stats_non_partitioned, dbName:default, owner:marmbrus, createTime:1413893927, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/stats_non_partitioned, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=1, transient_lastDdlTime=1413893934, COLUMN_STATS_ACCURATE=true, totalSize=5812, numRows=500, rawDataSize=5312}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
diff --git a/sql/hive/src/test/resources/golden/stats2-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats2-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats2-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats2-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats2-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats2-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats2-10-56dfd0f30574154dc1c2f55c29e2fa0e b/sql/hive/src/test/resources/golden/stats2-10-56dfd0f30574154dc1c2f55c29e2fa0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats2-11-912c785dbcef3251dd1d6b7fc56eae5b b/sql/hive/src/test/resources/golden/stats2-11-912c785dbcef3251dd1d6b7fc56eae5b
deleted file mode 100644
index f87b67cb2006f..0000000000000
--- a/sql/hive/src/test/resources/golden/stats2-11-912c785dbcef3251dd1d6b7fc56eae5b
+++ /dev/null
@@ -1,39 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:17:02 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_t1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	4                   
-	numPartitions       	4                   
-	numRows             	2000                
-	p3                  	v3                  
-	rawDataSize         	21248               
-	totalSize           	23248               
-	transient_lastDdlTime	1390904237          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats2-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/stats2-2-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats2-2-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats2-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats2-3-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats2-3-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats2-4-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/stats2-4-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats2-4-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats2-5-6717e1026e24a38af19b6bce1951e3d7 b/sql/hive/src/test/resources/golden/stats2-5-6717e1026e24a38af19b6bce1951e3d7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats2-6-45d4fb785fc94d266096fc29a9e90d73 b/sql/hive/src/test/resources/golden/stats2-6-45d4fb785fc94d266096fc29a9e90d73
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats2-7-6436db7a7506b560d0d1759db94a76b9 b/sql/hive/src/test/resources/golden/stats2-7-6436db7a7506b560d0d1759db94a76b9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats2-8-72621dba638b15d244850018e9f64d7 b/sql/hive/src/test/resources/golden/stats2-8-72621dba638b15d244850018e9f64d7
deleted file mode 100644
index 19a5e1e042ae7..0000000000000
--- a/sql/hive/src/test/resources/golden/stats2-8-72621dba638b15d244850018e9f64d7
+++ /dev/null
@@ -1,34 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:17:02 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_t1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	p3                  	v3                  
-	transient_lastDdlTime	1390904222          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats2-9-6d93732dc2ca622eb60c171389caee8e b/sql/hive/src/test/resources/golden/stats2-9-6d93732dc2ca622eb60c171389caee8e
deleted file mode 100644
index 7ddea146475d1..0000000000000
--- a/sql/hive/src/test/resources/golden/stats2-9-6d93732dc2ca622eb60c171389caee8e
+++ /dev/null
@@ -1,19 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_t1) (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr))))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        analyze_t1 
-          TableScan
-            alias: analyze_t1
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats20-0-418ec894d08c33fd712eb358f579b7a0 b/sql/hive/src/test/resources/golden/stats20-0-418ec894d08c33fd712eb358f579b7a0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats20-0-418ec894d08c33fd712eb358f579b7a0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats20-1-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats20-1-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats20-1-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats20-2-4711e55648c25c86bb526ed50b0c3d09 b/sql/hive/src/test/resources/golden/stats20-2-4711e55648c25c86bb526ed50b0c3d09
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats20-2-4711e55648c25c86bb526ed50b0c3d09
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats20-3-98c925a2b2c4de06e270e1b52437a98b b/sql/hive/src/test/resources/golden/stats20-3-98c925a2b2c4de06e270e1b52437a98b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats20-4-82294461be4728b4b191414bf2fb3bd7 b/sql/hive/src/test/resources/golden/stats20-4-82294461be4728b4b191414bf2fb3bd7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats20-5-76509775cfe11bb98ee088188a07668a b/sql/hive/src/test/resources/golden/stats20-5-76509775cfe11bb98ee088188a07668a
deleted file mode 100644
index 5c8878cba5e71..0000000000000
--- a/sql/hive/src/test/resources/golden/stats20-5-76509775cfe11bb98ee088188a07668a
+++ /dev/null
@@ -1,37 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 15:57:53 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2617911713640090101/stats_partitioned	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	1                   
-	numPartitions       	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390953481          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats20-6-1f7f76b2a9d50f86de42edf6ba8a3f84 b/sql/hive/src/test/resources/golden/stats20-6-1f7f76b2a9d50f86de42edf6ba8a3f84
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats20-6-1f7f76b2a9d50f86de42edf6ba8a3f84
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats20-7-82294461be4728b4b191414bf2fb3bd7 b/sql/hive/src/test/resources/golden/stats20-7-82294461be4728b4b191414bf2fb3bd7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats20-8-300c971de74642118d36d36349bc81aa b/sql/hive/src/test/resources/golden/stats20-8-300c971de74642118d36d36349bc81aa
deleted file mode 100644
index d9ba3814de367..0000000000000
--- a/sql/hive/src/test/resources/golden/stats20-8-300c971de74642118d36d36349bc81aa
+++ /dev/null
@@ -1,37 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 15:57:53 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2617911713640090101/stats_partitioned	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	1                   
-	numPartitions       	1                   
-	numRows             	500                 
-	rawDataSize         	0                   
-	totalSize           	5812                
-	transient_lastDdlTime	1390953492          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats3-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats3-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats3-1-418ec894d08c33fd712eb358f579b7a0 b/sql/hive/src/test/resources/golden/stats3-1-418ec894d08c33fd712eb358f579b7a0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-1-418ec894d08c33fd712eb358f579b7a0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats3-10-e2e5e7a0378c6f0c28391c447ec9cee9 b/sql/hive/src/test/resources/golden/stats3-10-e2e5e7a0378c6f0c28391c447ec9cee9
deleted file mode 100644
index bf2a7f452edc3..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-10-e2e5e7a0378c6f0c28391c447ec9cee9
+++ /dev/null
@@ -1,6 +0,0 @@
-1	test_part	test_Part
-2	test_part	test_Part
-3	test_part	test_Part
-4	test_part	test_Part
-5	test_part	test_Part
-6	test_part	test_Part
diff --git a/sql/hive/src/test/resources/golden/stats3-11-2e8d5cefd9a1b1e8f25b117ca68bfeaa b/sql/hive/src/test/resources/golden/stats3-11-2e8d5cefd9a1b1e8f25b117ca68bfeaa
deleted file mode 100644
index 1e8b314962144..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-11-2e8d5cefd9a1b1e8f25b117ca68bfeaa
+++ /dev/null
@@ -1 +0,0 @@
-6
diff --git a/sql/hive/src/test/resources/golden/stats3-12-892cb7ecc26e84f1c033b95a3ee3edc b/sql/hive/src/test/resources/golden/stats3-12-892cb7ecc26e84f1c033b95a3ee3edc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-13-ca5e3149f2b190d7df923a3e5c1cb07 b/sql/hive/src/test/resources/golden/stats3-13-ca5e3149f2b190d7df923a3e5c1cb07
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-14-2e8d5cefd9a1b1e8f25b117ca68bfeaa b/sql/hive/src/test/resources/golden/stats3-14-2e8d5cefd9a1b1e8f25b117ca68bfeaa
deleted file mode 100644
index 1e8b314962144..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-14-2e8d5cefd9a1b1e8f25b117ca68bfeaa
+++ /dev/null
@@ -1 +0,0 @@
-6
diff --git a/sql/hive/src/test/resources/golden/stats3-15-c6493490f898e72dc7ed1bc2d4721aa4 b/sql/hive/src/test/resources/golden/stats3-15-c6493490f898e72dc7ed1bc2d4721aa4
deleted file mode 100644
index bf2a7f452edc3..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-15-c6493490f898e72dc7ed1bc2d4721aa4
+++ /dev/null
@@ -1,6 +0,0 @@
-1	test_part	test_Part
-2	test_part	test_Part
-3	test_part	test_Part
-4	test_part	test_Part
-5	test_part	test_Part
-6	test_part	test_Part
diff --git a/sql/hive/src/test/resources/golden/stats3-16-ca5e3149f2b190d7df923a3e5c1cb07 b/sql/hive/src/test/resources/golden/stats3-16-ca5e3149f2b190d7df923a3e5c1cb07
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-17-c012b29f0d7720fbc515aa5fe2759ac7 b/sql/hive/src/test/resources/golden/stats3-17-c012b29f0d7720fbc515aa5fe2759ac7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-18-eefbb3ee8b538aec85c609351e52901b b/sql/hive/src/test/resources/golden/stats3-18-eefbb3ee8b538aec85c609351e52901b
deleted file mode 100644
index cb1c9def371df..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-18-eefbb3ee8b538aec85c609351e52901b
+++ /dev/null
@@ -1,38 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-col1                	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-pcol1               	string              	None                
-pcol2               	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:17:38 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/hive_test_dst	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	1                   
-	numPartitions       	1                   
-	numRows             	6                   
-	p3                  	v3                  
-	rawDataSize         	6                   
-	totalSize           	171                 
-	transient_lastDdlTime	1390904285          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.SequenceFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats3-19-c1bbac06a43b5b00a69356955e4a1dd3 b/sql/hive/src/test/resources/golden/stats3-19-c1bbac06a43b5b00a69356955e4a1dd3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-2-c1bbac06a43b5b00a69356955e4a1dd3 b/sql/hive/src/test/resources/golden/stats3-2-c1bbac06a43b5b00a69356955e4a1dd3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-20-ca048ad81b3df7159822073d206f0790 b/sql/hive/src/test/resources/golden/stats3-20-ca048ad81b3df7159822073d206f0790
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-3-ca048ad81b3df7159822073d206f0790 b/sql/hive/src/test/resources/golden/stats3-3-ca048ad81b3df7159822073d206f0790
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-4-fa705a031ff5d97558f29c2b5b9de282 b/sql/hive/src/test/resources/golden/stats3-4-fa705a031ff5d97558f29c2b5b9de282
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-5-1b2be7f8bd7bacc8f77a24ffdc0830f1 b/sql/hive/src/test/resources/golden/stats3-5-1b2be7f8bd7bacc8f77a24ffdc0830f1
deleted file mode 100644
index 8bc7c14574497..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-5-1b2be7f8bd7bacc8f77a24ffdc0830f1
+++ /dev/null
@@ -1,43 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_LOAD '/Users/marmbrus/workspace/hive/data/files/test.dat' (TOK_TAB (TOK_TABNAME hive_test_src)) local overwrite)
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-  Stage-2 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-0
-    Copy
-      source: file:/Users/marmbrus/workspace/hive/data/files/test.dat
-      destination: file:/tmp/hive-marmbrus/hive_2014-01-28_02-17-38_072_1020656193356018580-1/-ext-10000
-
-  Stage: Stage-1
-    Move Operator
-      tables:
-          replace: true
-          source: file:/tmp/hive-marmbrus/hive_2014-01-28_02-17-38_072_1020656193356018580-1/-ext-10000
-          table:
-              input format: org.apache.hadoop.mapred.TextInputFormat
-              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              properties:
-                bucket_count -1
-                columns col1
-                columns.types string
-                file.inputformat org.apache.hadoop.mapred.TextInputFormat
-                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                location file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/hive_test_src
-                name default.hive_test_src
-                p3 v3
-                serialization.ddl struct hive_test_src { string col1}
-                serialization.format 1
-                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                transient_lastDdlTime 1390904258
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.hive_test_src
-          tmp directory: file:/tmp/hive-marmbrus/hive_2014-01-28_02-17-38_072_1020656193356018580-1/-ext-10001
-
-  Stage: Stage-2
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats3-6-4bf1504274319c44d370b58092fe016c b/sql/hive/src/test/resources/golden/stats3-6-4bf1504274319c44d370b58092fe016c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-7-73d7d55d6e5a57aacce8618902904d b/sql/hive/src/test/resources/golden/stats3-7-73d7d55d6e5a57aacce8618902904d
deleted file mode 100644
index 4916ef91b0866..0000000000000
--- a/sql/hive/src/test/resources/golden/stats3-7-73d7d55d6e5a57aacce8618902904d
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-col1                	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:17:38 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/hive_test_src	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	1                   
-	numPartitions       	0                   
-	numRows             	0                   
-	p3                  	v3                  
-	rawDataSize         	0                   
-	totalSize           	11                  
-	transient_lastDdlTime	1390904258          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats3-8-b0ebbe71c220979b8fd4a36ffa501bf6 b/sql/hive/src/test/resources/golden/stats3-8-b0ebbe71c220979b8fd4a36ffa501bf6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats3-9-21f4ee91fa1c65e8579e4cbe4777d7a0 b/sql/hive/src/test/resources/golden/stats3-9-21f4ee91fa1c65e8579e4cbe4777d7a0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats4-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats4-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats4-1-418ec894d08c33fd712eb358f579b7a0 b/sql/hive/src/test/resources/golden/stats4-1-418ec894d08c33fd712eb358f579b7a0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-1-418ec894d08c33fd712eb358f579b7a0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats4-10-a33b2c9d962e4921c98e62387f3989f7 b/sql/hive/src/test/resources/golden/stats4-10-a33b2c9d962e4921c98e62387f3989f7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats4-12-30bc31441828a053d1a675b225a5d617 b/sql/hive/src/test/resources/golden/stats4-12-30bc31441828a053d1a675b225a5d617
deleted file mode 100644
index 1634adfc4d70d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-12-30bc31441828a053d1a675b225a5d617
+++ /dev/null
@@ -1,2 +0,0 @@
-ds=2008-12-31/hr=11
-ds=2008-12-31/hr=12
diff --git a/sql/hive/src/test/resources/golden/stats4-13-fca9513ea05bfb8b7e0e6f337d184d66 b/sql/hive/src/test/resources/golden/stats4-13-fca9513ea05bfb8b7e0e6f337d184d66
deleted file mode 100644
index d7a8f25b41301..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-13-fca9513ea05bfb8b7e0e6f337d184d66
+++ /dev/null
@@ -1,1000 +0,0 @@
-238	val_238	2008-04-08	11
-86	val_86	2008-04-08	11
-311	val_311	2008-04-08	11
-27	val_27	2008-04-08	11
-165	val_165	2008-04-08	11
-409	val_409	2008-04-08	11
-255	val_255	2008-04-08	11
-278	val_278	2008-04-08	11
-98	val_98	2008-04-08	11
-484	val_484	2008-04-08	11
-265	val_265	2008-04-08	11
-193	val_193	2008-04-08	11
-401	val_401	2008-04-08	11
-150	val_150	2008-04-08	11
-273	val_273	2008-04-08	11
-224	val_224	2008-04-08	11
-369	val_369	2008-04-08	11
-66	val_66	2008-04-08	11
-128	val_128	2008-04-08	11
-213	val_213	2008-04-08	11
-146	val_146	2008-04-08	11
-406	val_406	2008-04-08	11
-429	val_429	2008-04-08	11
-374	val_374	2008-04-08	11
-152	val_152	2008-04-08	11
-469	val_469	2008-04-08	11
-145	val_145	2008-04-08	11
-495	val_495	2008-04-08	11
-37	val_37	2008-04-08	11
-327	val_327	2008-04-08	11
-281	val_281	2008-04-08	11
-277	val_277	2008-04-08	11
-209	val_209	2008-04-08	11
-15	val_15	2008-04-08	11
-82	val_82	2008-04-08	11
-403	val_403	2008-04-08	11
-166	val_166	2008-04-08	11
-417	val_417	2008-04-08	11
-430	val_430	2008-04-08	11
-252	val_252	2008-04-08	11
-292	val_292	2008-04-08	11
-219	val_219	2008-04-08	11
-287	val_287	2008-04-08	11
-153	val_153	2008-04-08	11
-193	val_193	2008-04-08	11
-338	val_338	2008-04-08	11
-446	val_446	2008-04-08	11
-459	val_459	2008-04-08	11
-394	val_394	2008-04-08	11
-237	val_237	2008-04-08	11
-482	val_482	2008-04-08	11
-174	val_174	2008-04-08	11
-413	val_413	2008-04-08	11
-494	val_494	2008-04-08	11
-207	val_207	2008-04-08	11
-199	val_199	2008-04-08	11
-466	val_466	2008-04-08	11
-208	val_208	2008-04-08	11
-174	val_174	2008-04-08	11
-399	val_399	2008-04-08	11
-396	val_396	2008-04-08	11
-247	val_247	2008-04-08	11
-417	val_417	2008-04-08	11
-489	val_489	2008-04-08	11
-162	val_162	2008-04-08	11
-377	val_377	2008-04-08	11
-397	val_397	2008-04-08	11
-309	val_309	2008-04-08	11
-365	val_365	2008-04-08	11
-266	val_266	2008-04-08	11
-439	val_439	2008-04-08	11
-342	val_342	2008-04-08	11
-367	val_367	2008-04-08	11
-325	val_325	2008-04-08	11
-167	val_167	2008-04-08	11
-195	val_195	2008-04-08	11
-475	val_475	2008-04-08	11
-17	val_17	2008-04-08	11
-113	val_113	2008-04-08	11
-155	val_155	2008-04-08	11
-203	val_203	2008-04-08	11
-339	val_339	2008-04-08	11
-0	val_0	2008-04-08	11
-455	val_455	2008-04-08	11
-128	val_128	2008-04-08	11
-311	val_311	2008-04-08	11
-316	val_316	2008-04-08	11
-57	val_57	2008-04-08	11
-302	val_302	2008-04-08	11
-205	val_205	2008-04-08	11
-149	val_149	2008-04-08	11
-438	val_438	2008-04-08	11
-345	val_345	2008-04-08	11
-129	val_129	2008-04-08	11
-170	val_170	2008-04-08	11
-20	val_20	2008-04-08	11
-489	val_489	2008-04-08	11
-157	val_157	2008-04-08	11
-378	val_378	2008-04-08	11
-221	val_221	2008-04-08	11
-92	val_92	2008-04-08	11
-111	val_111	2008-04-08	11
-47	val_47	2008-04-08	11
-72	val_72	2008-04-08	11
-4	val_4	2008-04-08	11
-280	val_280	2008-04-08	11
-35	val_35	2008-04-08	11
-427	val_427	2008-04-08	11
-277	val_277	2008-04-08	11
-208	val_208	2008-04-08	11
-356	val_356	2008-04-08	11
-399	val_399	2008-04-08	11
-169	val_169	2008-04-08	11
-382	val_382	2008-04-08	11
-498	val_498	2008-04-08	11
-125	val_125	2008-04-08	11
-386	val_386	2008-04-08	11
-437	val_437	2008-04-08	11
-469	val_469	2008-04-08	11
-192	val_192	2008-04-08	11
-286	val_286	2008-04-08	11
-187	val_187	2008-04-08	11
-176	val_176	2008-04-08	11
-54	val_54	2008-04-08	11
-459	val_459	2008-04-08	11
-51	val_51	2008-04-08	11
-138	val_138	2008-04-08	11
-103	val_103	2008-04-08	11
-239	val_239	2008-04-08	11
-213	val_213	2008-04-08	11
-216	val_216	2008-04-08	11
-430	val_430	2008-04-08	11
-278	val_278	2008-04-08	11
-176	val_176	2008-04-08	11
-289	val_289	2008-04-08	11
-221	val_221	2008-04-08	11
-65	val_65	2008-04-08	11
-318	val_318	2008-04-08	11
-332	val_332	2008-04-08	11
-311	val_311	2008-04-08	11
-275	val_275	2008-04-08	11
-137	val_137	2008-04-08	11
-241	val_241	2008-04-08	11
-83	val_83	2008-04-08	11
-333	val_333	2008-04-08	11
-180	val_180	2008-04-08	11
-284	val_284	2008-04-08	11
-12	val_12	2008-04-08	11
-230	val_230	2008-04-08	11
-181	val_181	2008-04-08	11
-67	val_67	2008-04-08	11
-260	val_260	2008-04-08	11
-404	val_404	2008-04-08	11
-384	val_384	2008-04-08	11
-489	val_489	2008-04-08	11
-353	val_353	2008-04-08	11
-373	val_373	2008-04-08	11
-272	val_272	2008-04-08	11
-138	val_138	2008-04-08	11
-217	val_217	2008-04-08	11
-84	val_84	2008-04-08	11
-348	val_348	2008-04-08	11
-466	val_466	2008-04-08	11
-58	val_58	2008-04-08	11
-8	val_8	2008-04-08	11
-411	val_411	2008-04-08	11
-230	val_230	2008-04-08	11
-208	val_208	2008-04-08	11
-348	val_348	2008-04-08	11
-24	val_24	2008-04-08	11
-463	val_463	2008-04-08	11
-431	val_431	2008-04-08	11
-179	val_179	2008-04-08	11
-172	val_172	2008-04-08	11
-42	val_42	2008-04-08	11
-129	val_129	2008-04-08	11
-158	val_158	2008-04-08	11
-119	val_119	2008-04-08	11
-496	val_496	2008-04-08	11
-0	val_0	2008-04-08	11
-322	val_322	2008-04-08	11
-197	val_197	2008-04-08	11
-468	val_468	2008-04-08	11
-393	val_393	2008-04-08	11
-454	val_454	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-199	val_199	2008-04-08	11
-191	val_191	2008-04-08	11
-418	val_418	2008-04-08	11
-96	val_96	2008-04-08	11
-26	val_26	2008-04-08	11
-165	val_165	2008-04-08	11
-327	val_327	2008-04-08	11
-230	val_230	2008-04-08	11
-205	val_205	2008-04-08	11
-120	val_120	2008-04-08	11
-131	val_131	2008-04-08	11
-51	val_51	2008-04-08	11
-404	val_404	2008-04-08	11
-43	val_43	2008-04-08	11
-436	val_436	2008-04-08	11
-156	val_156	2008-04-08	11
-469	val_469	2008-04-08	11
-468	val_468	2008-04-08	11
-308	val_308	2008-04-08	11
-95	val_95	2008-04-08	11
-196	val_196	2008-04-08	11
-288	val_288	2008-04-08	11
-481	val_481	2008-04-08	11
-457	val_457	2008-04-08	11
-98	val_98	2008-04-08	11
-282	val_282	2008-04-08	11
-197	val_197	2008-04-08	11
-187	val_187	2008-04-08	11
-318	val_318	2008-04-08	11
-318	val_318	2008-04-08	11
-409	val_409	2008-04-08	11
-470	val_470	2008-04-08	11
-137	val_137	2008-04-08	11
-369	val_369	2008-04-08	11
-316	val_316	2008-04-08	11
-169	val_169	2008-04-08	11
-413	val_413	2008-04-08	11
-85	val_85	2008-04-08	11
-77	val_77	2008-04-08	11
-0	val_0	2008-04-08	11
-490	val_490	2008-04-08	11
-87	val_87	2008-04-08	11
-364	val_364	2008-04-08	11
-179	val_179	2008-04-08	11
-118	val_118	2008-04-08	11
-134	val_134	2008-04-08	11
-395	val_395	2008-04-08	11
-282	val_282	2008-04-08	11
-138	val_138	2008-04-08	11
-238	val_238	2008-04-08	11
-419	val_419	2008-04-08	11
-15	val_15	2008-04-08	11
-118	val_118	2008-04-08	11
-72	val_72	2008-04-08	11
-90	val_90	2008-04-08	11
-307	val_307	2008-04-08	11
-19	val_19	2008-04-08	11
-435	val_435	2008-04-08	11
-10	val_10	2008-04-08	11
-277	val_277	2008-04-08	11
-273	val_273	2008-04-08	11
-306	val_306	2008-04-08	11
-224	val_224	2008-04-08	11
-309	val_309	2008-04-08	11
-389	val_389	2008-04-08	11
-327	val_327	2008-04-08	11
-242	val_242	2008-04-08	11
-369	val_369	2008-04-08	11
-392	val_392	2008-04-08	11
-272	val_272	2008-04-08	11
-331	val_331	2008-04-08	11
-401	val_401	2008-04-08	11
-242	val_242	2008-04-08	11
-452	val_452	2008-04-08	11
-177	val_177	2008-04-08	11
-226	val_226	2008-04-08	11
-5	val_5	2008-04-08	11
-497	val_497	2008-04-08	11
-402	val_402	2008-04-08	11
-396	val_396	2008-04-08	11
-317	val_317	2008-04-08	11
-395	val_395	2008-04-08	11
-58	val_58	2008-04-08	11
-35	val_35	2008-04-08	11
-336	val_336	2008-04-08	11
-95	val_95	2008-04-08	11
-11	val_11	2008-04-08	11
-168	val_168	2008-04-08	11
-34	val_34	2008-04-08	11
-229	val_229	2008-04-08	11
-233	val_233	2008-04-08	11
-143	val_143	2008-04-08	11
-472	val_472	2008-04-08	11
-322	val_322	2008-04-08	11
-498	val_498	2008-04-08	11
-160	val_160	2008-04-08	11
-195	val_195	2008-04-08	11
-42	val_42	2008-04-08	11
-321	val_321	2008-04-08	11
-430	val_430	2008-04-08	11
-119	val_119	2008-04-08	11
-489	val_489	2008-04-08	11
-458	val_458	2008-04-08	11
-78	val_78	2008-04-08	11
-76	val_76	2008-04-08	11
-41	val_41	2008-04-08	11
-223	val_223	2008-04-08	11
-492	val_492	2008-04-08	11
-149	val_149	2008-04-08	11
-449	val_449	2008-04-08	11
-218	val_218	2008-04-08	11
-228	val_228	2008-04-08	11
-138	val_138	2008-04-08	11
-453	val_453	2008-04-08	11
-30	val_30	2008-04-08	11
-209	val_209	2008-04-08	11
-64	val_64	2008-04-08	11
-468	val_468	2008-04-08	11
-76	val_76	2008-04-08	11
-74	val_74	2008-04-08	11
-342	val_342	2008-04-08	11
-69	val_69	2008-04-08	11
-230	val_230	2008-04-08	11
-33	val_33	2008-04-08	11
-368	val_368	2008-04-08	11
-103	val_103	2008-04-08	11
-296	val_296	2008-04-08	11
-113	val_113	2008-04-08	11
-216	val_216	2008-04-08	11
-367	val_367	2008-04-08	11
-344	val_344	2008-04-08	11
-167	val_167	2008-04-08	11
-274	val_274	2008-04-08	11
-219	val_219	2008-04-08	11
-239	val_239	2008-04-08	11
-485	val_485	2008-04-08	11
-116	val_116	2008-04-08	11
-223	val_223	2008-04-08	11
-256	val_256	2008-04-08	11
-263	val_263	2008-04-08	11
-70	val_70	2008-04-08	11
-487	val_487	2008-04-08	11
-480	val_480	2008-04-08	11
-401	val_401	2008-04-08	11
-288	val_288	2008-04-08	11
-191	val_191	2008-04-08	11
-5	val_5	2008-04-08	11
-244	val_244	2008-04-08	11
-438	val_438	2008-04-08	11
-128	val_128	2008-04-08	11
-467	val_467	2008-04-08	11
-432	val_432	2008-04-08	11
-202	val_202	2008-04-08	11
-316	val_316	2008-04-08	11
-229	val_229	2008-04-08	11
-469	val_469	2008-04-08	11
-463	val_463	2008-04-08	11
-280	val_280	2008-04-08	11
-2	val_2	2008-04-08	11
-35	val_35	2008-04-08	11
-283	val_283	2008-04-08	11
-331	val_331	2008-04-08	11
-235	val_235	2008-04-08	11
-80	val_80	2008-04-08	11
-44	val_44	2008-04-08	11
-193	val_193	2008-04-08	11
-321	val_321	2008-04-08	11
-335	val_335	2008-04-08	11
-104	val_104	2008-04-08	11
-466	val_466	2008-04-08	11
-366	val_366	2008-04-08	11
-175	val_175	2008-04-08	11
-403	val_403	2008-04-08	11
-483	val_483	2008-04-08	11
-53	val_53	2008-04-08	11
-105	val_105	2008-04-08	11
-257	val_257	2008-04-08	11
-406	val_406	2008-04-08	11
-409	val_409	2008-04-08	11
-190	val_190	2008-04-08	11
-406	val_406	2008-04-08	11
-401	val_401	2008-04-08	11
-114	val_114	2008-04-08	11
-258	val_258	2008-04-08	11
-90	val_90	2008-04-08	11
-203	val_203	2008-04-08	11
-262	val_262	2008-04-08	11
-348	val_348	2008-04-08	11
-424	val_424	2008-04-08	11
-12	val_12	2008-04-08	11
-396	val_396	2008-04-08	11
-201	val_201	2008-04-08	11
-217	val_217	2008-04-08	11
-164	val_164	2008-04-08	11
-431	val_431	2008-04-08	11
-454	val_454	2008-04-08	11
-478	val_478	2008-04-08	11
-298	val_298	2008-04-08	11
-125	val_125	2008-04-08	11
-431	val_431	2008-04-08	11
-164	val_164	2008-04-08	11
-424	val_424	2008-04-08	11
-187	val_187	2008-04-08	11
-382	val_382	2008-04-08	11
-5	val_5	2008-04-08	11
-70	val_70	2008-04-08	11
-397	val_397	2008-04-08	11
-480	val_480	2008-04-08	11
-291	val_291	2008-04-08	11
-24	val_24	2008-04-08	11
-351	val_351	2008-04-08	11
-255	val_255	2008-04-08	11
-104	val_104	2008-04-08	11
-70	val_70	2008-04-08	11
-163	val_163	2008-04-08	11
-438	val_438	2008-04-08	11
-119	val_119	2008-04-08	11
-414	val_414	2008-04-08	11
-200	val_200	2008-04-08	11
-491	val_491	2008-04-08	11
-237	val_237	2008-04-08	11
-439	val_439	2008-04-08	11
-360	val_360	2008-04-08	11
-248	val_248	2008-04-08	11
-479	val_479	2008-04-08	11
-305	val_305	2008-04-08	11
-417	val_417	2008-04-08	11
-199	val_199	2008-04-08	11
-444	val_444	2008-04-08	11
-120	val_120	2008-04-08	11
-429	val_429	2008-04-08	11
-169	val_169	2008-04-08	11
-443	val_443	2008-04-08	11
-323	val_323	2008-04-08	11
-325	val_325	2008-04-08	11
-277	val_277	2008-04-08	11
-230	val_230	2008-04-08	11
-478	val_478	2008-04-08	11
-178	val_178	2008-04-08	11
-468	val_468	2008-04-08	11
-310	val_310	2008-04-08	11
-317	val_317	2008-04-08	11
-333	val_333	2008-04-08	11
-493	val_493	2008-04-08	11
-460	val_460	2008-04-08	11
-207	val_207	2008-04-08	11
-249	val_249	2008-04-08	11
-265	val_265	2008-04-08	11
-480	val_480	2008-04-08	11
-83	val_83	2008-04-08	11
-136	val_136	2008-04-08	11
-353	val_353	2008-04-08	11
-172	val_172	2008-04-08	11
-214	val_214	2008-04-08	11
-462	val_462	2008-04-08	11
-233	val_233	2008-04-08	11
-406	val_406	2008-04-08	11
-133	val_133	2008-04-08	11
-175	val_175	2008-04-08	11
-189	val_189	2008-04-08	11
-454	val_454	2008-04-08	11
-375	val_375	2008-04-08	11
-401	val_401	2008-04-08	11
-421	val_421	2008-04-08	11
-407	val_407	2008-04-08	11
-384	val_384	2008-04-08	11
-256	val_256	2008-04-08	11
-26	val_26	2008-04-08	11
-134	val_134	2008-04-08	11
-67	val_67	2008-04-08	11
-384	val_384	2008-04-08	11
-379	val_379	2008-04-08	11
-18	val_18	2008-04-08	11
-462	val_462	2008-04-08	11
-492	val_492	2008-04-08	11
-100	val_100	2008-04-08	11
-298	val_298	2008-04-08	11
-9	val_9	2008-04-08	11
-341	val_341	2008-04-08	11
-498	val_498	2008-04-08	11
-146	val_146	2008-04-08	11
-458	val_458	2008-04-08	11
-362	val_362	2008-04-08	11
-186	val_186	2008-04-08	11
-285	val_285	2008-04-08	11
-348	val_348	2008-04-08	11
-167	val_167	2008-04-08	11
-18	val_18	2008-04-08	11
-273	val_273	2008-04-08	11
-183	val_183	2008-04-08	11
-281	val_281	2008-04-08	11
-344	val_344	2008-04-08	11
-97	val_97	2008-04-08	11
-469	val_469	2008-04-08	11
-315	val_315	2008-04-08	11
-84	val_84	2008-04-08	11
-28	val_28	2008-04-08	11
-37	val_37	2008-04-08	11
-448	val_448	2008-04-08	11
-152	val_152	2008-04-08	11
-348	val_348	2008-04-08	11
-307	val_307	2008-04-08	11
-194	val_194	2008-04-08	11
-414	val_414	2008-04-08	11
-477	val_477	2008-04-08	11
-222	val_222	2008-04-08	11
-126	val_126	2008-04-08	11
-90	val_90	2008-04-08	11
-169	val_169	2008-04-08	11
-403	val_403	2008-04-08	11
-400	val_400	2008-04-08	11
-200	val_200	2008-04-08	11
-97	val_97	2008-04-08	11
-238	val_238	2008-04-08	12
-86	val_86	2008-04-08	12
-311	val_311	2008-04-08	12
-27	val_27	2008-04-08	12
-165	val_165	2008-04-08	12
-409	val_409	2008-04-08	12
-255	val_255	2008-04-08	12
-278	val_278	2008-04-08	12
-98	val_98	2008-04-08	12
-484	val_484	2008-04-08	12
-265	val_265	2008-04-08	12
-193	val_193	2008-04-08	12
-401	val_401	2008-04-08	12
-150	val_150	2008-04-08	12
-273	val_273	2008-04-08	12
-224	val_224	2008-04-08	12
-369	val_369	2008-04-08	12
-66	val_66	2008-04-08	12
-128	val_128	2008-04-08	12
-213	val_213	2008-04-08	12
-146	val_146	2008-04-08	12
-406	val_406	2008-04-08	12
-429	val_429	2008-04-08	12
-374	val_374	2008-04-08	12
-152	val_152	2008-04-08	12
-469	val_469	2008-04-08	12
-145	val_145	2008-04-08	12
-495	val_495	2008-04-08	12
-37	val_37	2008-04-08	12
-327	val_327	2008-04-08	12
-281	val_281	2008-04-08	12
-277	val_277	2008-04-08	12
-209	val_209	2008-04-08	12
-15	val_15	2008-04-08	12
-82	val_82	2008-04-08	12
-403	val_403	2008-04-08	12
-166	val_166	2008-04-08	12
-417	val_417	2008-04-08	12
-430	val_430	2008-04-08	12
-252	val_252	2008-04-08	12
-292	val_292	2008-04-08	12
-219	val_219	2008-04-08	12
-287	val_287	2008-04-08	12
-153	val_153	2008-04-08	12
-193	val_193	2008-04-08	12
-338	val_338	2008-04-08	12
-446	val_446	2008-04-08	12
-459	val_459	2008-04-08	12
-394	val_394	2008-04-08	12
-237	val_237	2008-04-08	12
-482	val_482	2008-04-08	12
-174	val_174	2008-04-08	12
-413	val_413	2008-04-08	12
-494	val_494	2008-04-08	12
-207	val_207	2008-04-08	12
-199	val_199	2008-04-08	12
-466	val_466	2008-04-08	12
-208	val_208	2008-04-08	12
-174	val_174	2008-04-08	12
-399	val_399	2008-04-08	12
-396	val_396	2008-04-08	12
-247	val_247	2008-04-08	12
-417	val_417	2008-04-08	12
-489	val_489	2008-04-08	12
-162	val_162	2008-04-08	12
-377	val_377	2008-04-08	12
-397	val_397	2008-04-08	12
-309	val_309	2008-04-08	12
-365	val_365	2008-04-08	12
-266	val_266	2008-04-08	12
-439	val_439	2008-04-08	12
-342	val_342	2008-04-08	12
-367	val_367	2008-04-08	12
-325	val_325	2008-04-08	12
-167	val_167	2008-04-08	12
-195	val_195	2008-04-08	12
-475	val_475	2008-04-08	12
-17	val_17	2008-04-08	12
-113	val_113	2008-04-08	12
-155	val_155	2008-04-08	12
-203	val_203	2008-04-08	12
-339	val_339	2008-04-08	12
-0	val_0	2008-04-08	12
-455	val_455	2008-04-08	12
-128	val_128	2008-04-08	12
-311	val_311	2008-04-08	12
-316	val_316	2008-04-08	12
-57	val_57	2008-04-08	12
-302	val_302	2008-04-08	12
-205	val_205	2008-04-08	12
-149	val_149	2008-04-08	12
-438	val_438	2008-04-08	12
-345	val_345	2008-04-08	12
-129	val_129	2008-04-08	12
-170	val_170	2008-04-08	12
-20	val_20	2008-04-08	12
-489	val_489	2008-04-08	12
-157	val_157	2008-04-08	12
-378	val_378	2008-04-08	12
-221	val_221	2008-04-08	12
-92	val_92	2008-04-08	12
-111	val_111	2008-04-08	12
-47	val_47	2008-04-08	12
-72	val_72	2008-04-08	12
-4	val_4	2008-04-08	12
-280	val_280	2008-04-08	12
-35	val_35	2008-04-08	12
-427	val_427	2008-04-08	12
-277	val_277	2008-04-08	12
-208	val_208	2008-04-08	12
-356	val_356	2008-04-08	12
-399	val_399	2008-04-08	12
-169	val_169	2008-04-08	12
-382	val_382	2008-04-08	12
-498	val_498	2008-04-08	12
-125	val_125	2008-04-08	12
-386	val_386	2008-04-08	12
-437	val_437	2008-04-08	12
-469	val_469	2008-04-08	12
-192	val_192	2008-04-08	12
-286	val_286	2008-04-08	12
-187	val_187	2008-04-08	12
-176	val_176	2008-04-08	12
-54	val_54	2008-04-08	12
-459	val_459	2008-04-08	12
-51	val_51	2008-04-08	12
-138	val_138	2008-04-08	12
-103	val_103	2008-04-08	12
-239	val_239	2008-04-08	12
-213	val_213	2008-04-08	12
-216	val_216	2008-04-08	12
-430	val_430	2008-04-08	12
-278	val_278	2008-04-08	12
-176	val_176	2008-04-08	12
-289	val_289	2008-04-08	12
-221	val_221	2008-04-08	12
-65	val_65	2008-04-08	12
-318	val_318	2008-04-08	12
-332	val_332	2008-04-08	12
-311	val_311	2008-04-08	12
-275	val_275	2008-04-08	12
-137	val_137	2008-04-08	12
-241	val_241	2008-04-08	12
-83	val_83	2008-04-08	12
-333	val_333	2008-04-08	12
-180	val_180	2008-04-08	12
-284	val_284	2008-04-08	12
-12	val_12	2008-04-08	12
-230	val_230	2008-04-08	12
-181	val_181	2008-04-08	12
-67	val_67	2008-04-08	12
-260	val_260	2008-04-08	12
-404	val_404	2008-04-08	12
-384	val_384	2008-04-08	12
-489	val_489	2008-04-08	12
-353	val_353	2008-04-08	12
-373	val_373	2008-04-08	12
-272	val_272	2008-04-08	12
-138	val_138	2008-04-08	12
-217	val_217	2008-04-08	12
-84	val_84	2008-04-08	12
-348	val_348	2008-04-08	12
-466	val_466	2008-04-08	12
-58	val_58	2008-04-08	12
-8	val_8	2008-04-08	12
-411	val_411	2008-04-08	12
-230	val_230	2008-04-08	12
-208	val_208	2008-04-08	12
-348	val_348	2008-04-08	12
-24	val_24	2008-04-08	12
-463	val_463	2008-04-08	12
-431	val_431	2008-04-08	12
-179	val_179	2008-04-08	12
-172	val_172	2008-04-08	12
-42	val_42	2008-04-08	12
-129	val_129	2008-04-08	12
-158	val_158	2008-04-08	12
-119	val_119	2008-04-08	12
-496	val_496	2008-04-08	12
-0	val_0	2008-04-08	12
-322	val_322	2008-04-08	12
-197	val_197	2008-04-08	12
-468	val_468	2008-04-08	12
-393	val_393	2008-04-08	12
-454	val_454	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-199	val_199	2008-04-08	12
-191	val_191	2008-04-08	12
-418	val_418	2008-04-08	12
-96	val_96	2008-04-08	12
-26	val_26	2008-04-08	12
-165	val_165	2008-04-08	12
-327	val_327	2008-04-08	12
-230	val_230	2008-04-08	12
-205	val_205	2008-04-08	12
-120	val_120	2008-04-08	12
-131	val_131	2008-04-08	12
-51	val_51	2008-04-08	12
-404	val_404	2008-04-08	12
-43	val_43	2008-04-08	12
-436	val_436	2008-04-08	12
-156	val_156	2008-04-08	12
-469	val_469	2008-04-08	12
-468	val_468	2008-04-08	12
-308	val_308	2008-04-08	12
-95	val_95	2008-04-08	12
-196	val_196	2008-04-08	12
-288	val_288	2008-04-08	12
-481	val_481	2008-04-08	12
-457	val_457	2008-04-08	12
-98	val_98	2008-04-08	12
-282	val_282	2008-04-08	12
-197	val_197	2008-04-08	12
-187	val_187	2008-04-08	12
-318	val_318	2008-04-08	12
-318	val_318	2008-04-08	12
-409	val_409	2008-04-08	12
-470	val_470	2008-04-08	12
-137	val_137	2008-04-08	12
-369	val_369	2008-04-08	12
-316	val_316	2008-04-08	12
-169	val_169	2008-04-08	12
-413	val_413	2008-04-08	12
-85	val_85	2008-04-08	12
-77	val_77	2008-04-08	12
-0	val_0	2008-04-08	12
-490	val_490	2008-04-08	12
-87	val_87	2008-04-08	12
-364	val_364	2008-04-08	12
-179	val_179	2008-04-08	12
-118	val_118	2008-04-08	12
-134	val_134	2008-04-08	12
-395	val_395	2008-04-08	12
-282	val_282	2008-04-08	12
-138	val_138	2008-04-08	12
-238	val_238	2008-04-08	12
-419	val_419	2008-04-08	12
-15	val_15	2008-04-08	12
-118	val_118	2008-04-08	12
-72	val_72	2008-04-08	12
-90	val_90	2008-04-08	12
-307	val_307	2008-04-08	12
-19	val_19	2008-04-08	12
-435	val_435	2008-04-08	12
-10	val_10	2008-04-08	12
-277	val_277	2008-04-08	12
-273	val_273	2008-04-08	12
-306	val_306	2008-04-08	12
-224	val_224	2008-04-08	12
-309	val_309	2008-04-08	12
-389	val_389	2008-04-08	12
-327	val_327	2008-04-08	12
-242	val_242	2008-04-08	12
-369	val_369	2008-04-08	12
-392	val_392	2008-04-08	12
-272	val_272	2008-04-08	12
-331	val_331	2008-04-08	12
-401	val_401	2008-04-08	12
-242	val_242	2008-04-08	12
-452	val_452	2008-04-08	12
-177	val_177	2008-04-08	12
-226	val_226	2008-04-08	12
-5	val_5	2008-04-08	12
-497	val_497	2008-04-08	12
-402	val_402	2008-04-08	12
-396	val_396	2008-04-08	12
-317	val_317	2008-04-08	12
-395	val_395	2008-04-08	12
-58	val_58	2008-04-08	12
-35	val_35	2008-04-08	12
-336	val_336	2008-04-08	12
-95	val_95	2008-04-08	12
-11	val_11	2008-04-08	12
-168	val_168	2008-04-08	12
-34	val_34	2008-04-08	12
-229	val_229	2008-04-08	12
-233	val_233	2008-04-08	12
-143	val_143	2008-04-08	12
-472	val_472	2008-04-08	12
-322	val_322	2008-04-08	12
-498	val_498	2008-04-08	12
-160	val_160	2008-04-08	12
-195	val_195	2008-04-08	12
-42	val_42	2008-04-08	12
-321	val_321	2008-04-08	12
-430	val_430	2008-04-08	12
-119	val_119	2008-04-08	12
-489	val_489	2008-04-08	12
-458	val_458	2008-04-08	12
-78	val_78	2008-04-08	12
-76	val_76	2008-04-08	12
-41	val_41	2008-04-08	12
-223	val_223	2008-04-08	12
-492	val_492	2008-04-08	12
-149	val_149	2008-04-08	12
-449	val_449	2008-04-08	12
-218	val_218	2008-04-08	12
-228	val_228	2008-04-08	12
-138	val_138	2008-04-08	12
-453	val_453	2008-04-08	12
-30	val_30	2008-04-08	12
-209	val_209	2008-04-08	12
-64	val_64	2008-04-08	12
-468	val_468	2008-04-08	12
-76	val_76	2008-04-08	12
-74	val_74	2008-04-08	12
-342	val_342	2008-04-08	12
-69	val_69	2008-04-08	12
-230	val_230	2008-04-08	12
-33	val_33	2008-04-08	12
-368	val_368	2008-04-08	12
-103	val_103	2008-04-08	12
-296	val_296	2008-04-08	12
-113	val_113	2008-04-08	12
-216	val_216	2008-04-08	12
-367	val_367	2008-04-08	12
-344	val_344	2008-04-08	12
-167	val_167	2008-04-08	12
-274	val_274	2008-04-08	12
-219	val_219	2008-04-08	12
-239	val_239	2008-04-08	12
-485	val_485	2008-04-08	12
-116	val_116	2008-04-08	12
-223	val_223	2008-04-08	12
-256	val_256	2008-04-08	12
-263	val_263	2008-04-08	12
-70	val_70	2008-04-08	12
-487	val_487	2008-04-08	12
-480	val_480	2008-04-08	12
-401	val_401	2008-04-08	12
-288	val_288	2008-04-08	12
-191	val_191	2008-04-08	12
-5	val_5	2008-04-08	12
-244	val_244	2008-04-08	12
-438	val_438	2008-04-08	12
-128	val_128	2008-04-08	12
-467	val_467	2008-04-08	12
-432	val_432	2008-04-08	12
-202	val_202	2008-04-08	12
-316	val_316	2008-04-08	12
-229	val_229	2008-04-08	12
-469	val_469	2008-04-08	12
-463	val_463	2008-04-08	12
-280	val_280	2008-04-08	12
-2	val_2	2008-04-08	12
-35	val_35	2008-04-08	12
-283	val_283	2008-04-08	12
-331	val_331	2008-04-08	12
-235	val_235	2008-04-08	12
-80	val_80	2008-04-08	12
-44	val_44	2008-04-08	12
-193	val_193	2008-04-08	12
-321	val_321	2008-04-08	12
-335	val_335	2008-04-08	12
-104	val_104	2008-04-08	12
-466	val_466	2008-04-08	12
-366	val_366	2008-04-08	12
-175	val_175	2008-04-08	12
-403	val_403	2008-04-08	12
-483	val_483	2008-04-08	12
-53	val_53	2008-04-08	12
-105	val_105	2008-04-08	12
-257	val_257	2008-04-08	12
-406	val_406	2008-04-08	12
-409	val_409	2008-04-08	12
-190	val_190	2008-04-08	12
-406	val_406	2008-04-08	12
-401	val_401	2008-04-08	12
-114	val_114	2008-04-08	12
-258	val_258	2008-04-08	12
-90	val_90	2008-04-08	12
-203	val_203	2008-04-08	12
-262	val_262	2008-04-08	12
-348	val_348	2008-04-08	12
-424	val_424	2008-04-08	12
-12	val_12	2008-04-08	12
-396	val_396	2008-04-08	12
-201	val_201	2008-04-08	12
-217	val_217	2008-04-08	12
-164	val_164	2008-04-08	12
-431	val_431	2008-04-08	12
-454	val_454	2008-04-08	12
-478	val_478	2008-04-08	12
-298	val_298	2008-04-08	12
-125	val_125	2008-04-08	12
-431	val_431	2008-04-08	12
-164	val_164	2008-04-08	12
-424	val_424	2008-04-08	12
-187	val_187	2008-04-08	12
-382	val_382	2008-04-08	12
-5	val_5	2008-04-08	12
-70	val_70	2008-04-08	12
-397	val_397	2008-04-08	12
-480	val_480	2008-04-08	12
-291	val_291	2008-04-08	12
-24	val_24	2008-04-08	12
-351	val_351	2008-04-08	12
-255	val_255	2008-04-08	12
-104	val_104	2008-04-08	12
-70	val_70	2008-04-08	12
-163	val_163	2008-04-08	12
-438	val_438	2008-04-08	12
-119	val_119	2008-04-08	12
-414	val_414	2008-04-08	12
-200	val_200	2008-04-08	12
-491	val_491	2008-04-08	12
-237	val_237	2008-04-08	12
-439	val_439	2008-04-08	12
-360	val_360	2008-04-08	12
-248	val_248	2008-04-08	12
-479	val_479	2008-04-08	12
-305	val_305	2008-04-08	12
-417	val_417	2008-04-08	12
-199	val_199	2008-04-08	12
-444	val_444	2008-04-08	12
-120	val_120	2008-04-08	12
-429	val_429	2008-04-08	12
-169	val_169	2008-04-08	12
-443	val_443	2008-04-08	12
-323	val_323	2008-04-08	12
-325	val_325	2008-04-08	12
-277	val_277	2008-04-08	12
-230	val_230	2008-04-08	12
-478	val_478	2008-04-08	12
-178	val_178	2008-04-08	12
-468	val_468	2008-04-08	12
-310	val_310	2008-04-08	12
-317	val_317	2008-04-08	12
-333	val_333	2008-04-08	12
-493	val_493	2008-04-08	12
-460	val_460	2008-04-08	12
-207	val_207	2008-04-08	12
-249	val_249	2008-04-08	12
-265	val_265	2008-04-08	12
-480	val_480	2008-04-08	12
-83	val_83	2008-04-08	12
-136	val_136	2008-04-08	12
-353	val_353	2008-04-08	12
-172	val_172	2008-04-08	12
-214	val_214	2008-04-08	12
-462	val_462	2008-04-08	12
-233	val_233	2008-04-08	12
-406	val_406	2008-04-08	12
-133	val_133	2008-04-08	12
-175	val_175	2008-04-08	12
-189	val_189	2008-04-08	12
-454	val_454	2008-04-08	12
-375	val_375	2008-04-08	12
-401	val_401	2008-04-08	12
-421	val_421	2008-04-08	12
-407	val_407	2008-04-08	12
-384	val_384	2008-04-08	12
-256	val_256	2008-04-08	12
-26	val_26	2008-04-08	12
-134	val_134	2008-04-08	12
-67	val_67	2008-04-08	12
-384	val_384	2008-04-08	12
-379	val_379	2008-04-08	12
-18	val_18	2008-04-08	12
-462	val_462	2008-04-08	12
-492	val_492	2008-04-08	12
-100	val_100	2008-04-08	12
-298	val_298	2008-04-08	12
-9	val_9	2008-04-08	12
-341	val_341	2008-04-08	12
-498	val_498	2008-04-08	12
-146	val_146	2008-04-08	12
-458	val_458	2008-04-08	12
-362	val_362	2008-04-08	12
-186	val_186	2008-04-08	12
-285	val_285	2008-04-08	12
-348	val_348	2008-04-08	12
-167	val_167	2008-04-08	12
-18	val_18	2008-04-08	12
-273	val_273	2008-04-08	12
-183	val_183	2008-04-08	12
-281	val_281	2008-04-08	12
-344	val_344	2008-04-08	12
-97	val_97	2008-04-08	12
-469	val_469	2008-04-08	12
-315	val_315	2008-04-08	12
-84	val_84	2008-04-08	12
-28	val_28	2008-04-08	12
-37	val_37	2008-04-08	12
-448	val_448	2008-04-08	12
-152	val_152	2008-04-08	12
-348	val_348	2008-04-08	12
-307	val_307	2008-04-08	12
-194	val_194	2008-04-08	12
-414	val_414	2008-04-08	12
-477	val_477	2008-04-08	12
-222	val_222	2008-04-08	12
-126	val_126	2008-04-08	12
-90	val_90	2008-04-08	12
-169	val_169	2008-04-08	12
-403	val_403	2008-04-08	12
-400	val_400	2008-04-08	12
-200	val_200	2008-04-08	12
-97	val_97	2008-04-08	12
diff --git a/sql/hive/src/test/resources/golden/stats4-14-9c82167763a771c175c656786d545798 b/sql/hive/src/test/resources/golden/stats4-14-9c82167763a771c175c656786d545798
deleted file mode 100644
index 653516475da22..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-14-9c82167763a771c175c656786d545798
+++ /dev/null
@@ -1,1000 +0,0 @@
-238	val_238	2008-12-31	11
-86	val_86	2008-12-31	11
-311	val_311	2008-12-31	11
-27	val_27	2008-12-31	11
-165	val_165	2008-12-31	11
-409	val_409	2008-12-31	11
-255	val_255	2008-12-31	11
-278	val_278	2008-12-31	11
-98	val_98	2008-12-31	11
-484	val_484	2008-12-31	11
-265	val_265	2008-12-31	11
-193	val_193	2008-12-31	11
-401	val_401	2008-12-31	11
-150	val_150	2008-12-31	11
-273	val_273	2008-12-31	11
-224	val_224	2008-12-31	11
-369	val_369	2008-12-31	11
-66	val_66	2008-12-31	11
-128	val_128	2008-12-31	11
-213	val_213	2008-12-31	11
-146	val_146	2008-12-31	11
-406	val_406	2008-12-31	11
-429	val_429	2008-12-31	11
-374	val_374	2008-12-31	11
-152	val_152	2008-12-31	11
-469	val_469	2008-12-31	11
-145	val_145	2008-12-31	11
-495	val_495	2008-12-31	11
-37	val_37	2008-12-31	11
-327	val_327	2008-12-31	11
-281	val_281	2008-12-31	11
-277	val_277	2008-12-31	11
-209	val_209	2008-12-31	11
-15	val_15	2008-12-31	11
-82	val_82	2008-12-31	11
-403	val_403	2008-12-31	11
-166	val_166	2008-12-31	11
-417	val_417	2008-12-31	11
-430	val_430	2008-12-31	11
-252	val_252	2008-12-31	11
-292	val_292	2008-12-31	11
-219	val_219	2008-12-31	11
-287	val_287	2008-12-31	11
-153	val_153	2008-12-31	11
-193	val_193	2008-12-31	11
-338	val_338	2008-12-31	11
-446	val_446	2008-12-31	11
-459	val_459	2008-12-31	11
-394	val_394	2008-12-31	11
-237	val_237	2008-12-31	11
-482	val_482	2008-12-31	11
-174	val_174	2008-12-31	11
-413	val_413	2008-12-31	11
-494	val_494	2008-12-31	11
-207	val_207	2008-12-31	11
-199	val_199	2008-12-31	11
-466	val_466	2008-12-31	11
-208	val_208	2008-12-31	11
-174	val_174	2008-12-31	11
-399	val_399	2008-12-31	11
-396	val_396	2008-12-31	11
-247	val_247	2008-12-31	11
-417	val_417	2008-12-31	11
-489	val_489	2008-12-31	11
-162	val_162	2008-12-31	11
-377	val_377	2008-12-31	11
-397	val_397	2008-12-31	11
-309	val_309	2008-12-31	11
-365	val_365	2008-12-31	11
-266	val_266	2008-12-31	11
-439	val_439	2008-12-31	11
-342	val_342	2008-12-31	11
-367	val_367	2008-12-31	11
-325	val_325	2008-12-31	11
-167	val_167	2008-12-31	11
-195	val_195	2008-12-31	11
-475	val_475	2008-12-31	11
-17	val_17	2008-12-31	11
-113	val_113	2008-12-31	11
-155	val_155	2008-12-31	11
-203	val_203	2008-12-31	11
-339	val_339	2008-12-31	11
-0	val_0	2008-12-31	11
-455	val_455	2008-12-31	11
-128	val_128	2008-12-31	11
-311	val_311	2008-12-31	11
-316	val_316	2008-12-31	11
-57	val_57	2008-12-31	11
-302	val_302	2008-12-31	11
-205	val_205	2008-12-31	11
-149	val_149	2008-12-31	11
-438	val_438	2008-12-31	11
-345	val_345	2008-12-31	11
-129	val_129	2008-12-31	11
-170	val_170	2008-12-31	11
-20	val_20	2008-12-31	11
-489	val_489	2008-12-31	11
-157	val_157	2008-12-31	11
-378	val_378	2008-12-31	11
-221	val_221	2008-12-31	11
-92	val_92	2008-12-31	11
-111	val_111	2008-12-31	11
-47	val_47	2008-12-31	11
-72	val_72	2008-12-31	11
-4	val_4	2008-12-31	11
-280	val_280	2008-12-31	11
-35	val_35	2008-12-31	11
-427	val_427	2008-12-31	11
-277	val_277	2008-12-31	11
-208	val_208	2008-12-31	11
-356	val_356	2008-12-31	11
-399	val_399	2008-12-31	11
-169	val_169	2008-12-31	11
-382	val_382	2008-12-31	11
-498	val_498	2008-12-31	11
-125	val_125	2008-12-31	11
-386	val_386	2008-12-31	11
-437	val_437	2008-12-31	11
-469	val_469	2008-12-31	11
-192	val_192	2008-12-31	11
-286	val_286	2008-12-31	11
-187	val_187	2008-12-31	11
-176	val_176	2008-12-31	11
-54	val_54	2008-12-31	11
-459	val_459	2008-12-31	11
-51	val_51	2008-12-31	11
-138	val_138	2008-12-31	11
-103	val_103	2008-12-31	11
-239	val_239	2008-12-31	11
-213	val_213	2008-12-31	11
-216	val_216	2008-12-31	11
-430	val_430	2008-12-31	11
-278	val_278	2008-12-31	11
-176	val_176	2008-12-31	11
-289	val_289	2008-12-31	11
-221	val_221	2008-12-31	11
-65	val_65	2008-12-31	11
-318	val_318	2008-12-31	11
-332	val_332	2008-12-31	11
-311	val_311	2008-12-31	11
-275	val_275	2008-12-31	11
-137	val_137	2008-12-31	11
-241	val_241	2008-12-31	11
-83	val_83	2008-12-31	11
-333	val_333	2008-12-31	11
-180	val_180	2008-12-31	11
-284	val_284	2008-12-31	11
-12	val_12	2008-12-31	11
-230	val_230	2008-12-31	11
-181	val_181	2008-12-31	11
-67	val_67	2008-12-31	11
-260	val_260	2008-12-31	11
-404	val_404	2008-12-31	11
-384	val_384	2008-12-31	11
-489	val_489	2008-12-31	11
-353	val_353	2008-12-31	11
-373	val_373	2008-12-31	11
-272	val_272	2008-12-31	11
-138	val_138	2008-12-31	11
-217	val_217	2008-12-31	11
-84	val_84	2008-12-31	11
-348	val_348	2008-12-31	11
-466	val_466	2008-12-31	11
-58	val_58	2008-12-31	11
-8	val_8	2008-12-31	11
-411	val_411	2008-12-31	11
-230	val_230	2008-12-31	11
-208	val_208	2008-12-31	11
-348	val_348	2008-12-31	11
-24	val_24	2008-12-31	11
-463	val_463	2008-12-31	11
-431	val_431	2008-12-31	11
-179	val_179	2008-12-31	11
-172	val_172	2008-12-31	11
-42	val_42	2008-12-31	11
-129	val_129	2008-12-31	11
-158	val_158	2008-12-31	11
-119	val_119	2008-12-31	11
-496	val_496	2008-12-31	11
-0	val_0	2008-12-31	11
-322	val_322	2008-12-31	11
-197	val_197	2008-12-31	11
-468	val_468	2008-12-31	11
-393	val_393	2008-12-31	11
-454	val_454	2008-12-31	11
-100	val_100	2008-12-31	11
-298	val_298	2008-12-31	11
-199	val_199	2008-12-31	11
-191	val_191	2008-12-31	11
-418	val_418	2008-12-31	11
-96	val_96	2008-12-31	11
-26	val_26	2008-12-31	11
-165	val_165	2008-12-31	11
-327	val_327	2008-12-31	11
-230	val_230	2008-12-31	11
-205	val_205	2008-12-31	11
-120	val_120	2008-12-31	11
-131	val_131	2008-12-31	11
-51	val_51	2008-12-31	11
-404	val_404	2008-12-31	11
-43	val_43	2008-12-31	11
-436	val_436	2008-12-31	11
-156	val_156	2008-12-31	11
-469	val_469	2008-12-31	11
-468	val_468	2008-12-31	11
-308	val_308	2008-12-31	11
-95	val_95	2008-12-31	11
-196	val_196	2008-12-31	11
-288	val_288	2008-12-31	11
-481	val_481	2008-12-31	11
-457	val_457	2008-12-31	11
-98	val_98	2008-12-31	11
-282	val_282	2008-12-31	11
-197	val_197	2008-12-31	11
-187	val_187	2008-12-31	11
-318	val_318	2008-12-31	11
-318	val_318	2008-12-31	11
-409	val_409	2008-12-31	11
-470	val_470	2008-12-31	11
-137	val_137	2008-12-31	11
-369	val_369	2008-12-31	11
-316	val_316	2008-12-31	11
-169	val_169	2008-12-31	11
-413	val_413	2008-12-31	11
-85	val_85	2008-12-31	11
-77	val_77	2008-12-31	11
-0	val_0	2008-12-31	11
-490	val_490	2008-12-31	11
-87	val_87	2008-12-31	11
-364	val_364	2008-12-31	11
-179	val_179	2008-12-31	11
-118	val_118	2008-12-31	11
-134	val_134	2008-12-31	11
-395	val_395	2008-12-31	11
-282	val_282	2008-12-31	11
-138	val_138	2008-12-31	11
-238	val_238	2008-12-31	11
-419	val_419	2008-12-31	11
-15	val_15	2008-12-31	11
-118	val_118	2008-12-31	11
-72	val_72	2008-12-31	11
-90	val_90	2008-12-31	11
-307	val_307	2008-12-31	11
-19	val_19	2008-12-31	11
-435	val_435	2008-12-31	11
-10	val_10	2008-12-31	11
-277	val_277	2008-12-31	11
-273	val_273	2008-12-31	11
-306	val_306	2008-12-31	11
-224	val_224	2008-12-31	11
-309	val_309	2008-12-31	11
-389	val_389	2008-12-31	11
-327	val_327	2008-12-31	11
-242	val_242	2008-12-31	11
-369	val_369	2008-12-31	11
-392	val_392	2008-12-31	11
-272	val_272	2008-12-31	11
-331	val_331	2008-12-31	11
-401	val_401	2008-12-31	11
-242	val_242	2008-12-31	11
-452	val_452	2008-12-31	11
-177	val_177	2008-12-31	11
-226	val_226	2008-12-31	11
-5	val_5	2008-12-31	11
-497	val_497	2008-12-31	11
-402	val_402	2008-12-31	11
-396	val_396	2008-12-31	11
-317	val_317	2008-12-31	11
-395	val_395	2008-12-31	11
-58	val_58	2008-12-31	11
-35	val_35	2008-12-31	11
-336	val_336	2008-12-31	11
-95	val_95	2008-12-31	11
-11	val_11	2008-12-31	11
-168	val_168	2008-12-31	11
-34	val_34	2008-12-31	11
-229	val_229	2008-12-31	11
-233	val_233	2008-12-31	11
-143	val_143	2008-12-31	11
-472	val_472	2008-12-31	11
-322	val_322	2008-12-31	11
-498	val_498	2008-12-31	11
-160	val_160	2008-12-31	11
-195	val_195	2008-12-31	11
-42	val_42	2008-12-31	11
-321	val_321	2008-12-31	11
-430	val_430	2008-12-31	11
-119	val_119	2008-12-31	11
-489	val_489	2008-12-31	11
-458	val_458	2008-12-31	11
-78	val_78	2008-12-31	11
-76	val_76	2008-12-31	11
-41	val_41	2008-12-31	11
-223	val_223	2008-12-31	11
-492	val_492	2008-12-31	11
-149	val_149	2008-12-31	11
-449	val_449	2008-12-31	11
-218	val_218	2008-12-31	11
-228	val_228	2008-12-31	11
-138	val_138	2008-12-31	11
-453	val_453	2008-12-31	11
-30	val_30	2008-12-31	11
-209	val_209	2008-12-31	11
-64	val_64	2008-12-31	11
-468	val_468	2008-12-31	11
-76	val_76	2008-12-31	11
-74	val_74	2008-12-31	11
-342	val_342	2008-12-31	11
-69	val_69	2008-12-31	11
-230	val_230	2008-12-31	11
-33	val_33	2008-12-31	11
-368	val_368	2008-12-31	11
-103	val_103	2008-12-31	11
-296	val_296	2008-12-31	11
-113	val_113	2008-12-31	11
-216	val_216	2008-12-31	11
-367	val_367	2008-12-31	11
-344	val_344	2008-12-31	11
-167	val_167	2008-12-31	11
-274	val_274	2008-12-31	11
-219	val_219	2008-12-31	11
-239	val_239	2008-12-31	11
-485	val_485	2008-12-31	11
-116	val_116	2008-12-31	11
-223	val_223	2008-12-31	11
-256	val_256	2008-12-31	11
-263	val_263	2008-12-31	11
-70	val_70	2008-12-31	11
-487	val_487	2008-12-31	11
-480	val_480	2008-12-31	11
-401	val_401	2008-12-31	11
-288	val_288	2008-12-31	11
-191	val_191	2008-12-31	11
-5	val_5	2008-12-31	11
-244	val_244	2008-12-31	11
-438	val_438	2008-12-31	11
-128	val_128	2008-12-31	11
-467	val_467	2008-12-31	11
-432	val_432	2008-12-31	11
-202	val_202	2008-12-31	11
-316	val_316	2008-12-31	11
-229	val_229	2008-12-31	11
-469	val_469	2008-12-31	11
-463	val_463	2008-12-31	11
-280	val_280	2008-12-31	11
-2	val_2	2008-12-31	11
-35	val_35	2008-12-31	11
-283	val_283	2008-12-31	11
-331	val_331	2008-12-31	11
-235	val_235	2008-12-31	11
-80	val_80	2008-12-31	11
-44	val_44	2008-12-31	11
-193	val_193	2008-12-31	11
-321	val_321	2008-12-31	11
-335	val_335	2008-12-31	11
-104	val_104	2008-12-31	11
-466	val_466	2008-12-31	11
-366	val_366	2008-12-31	11
-175	val_175	2008-12-31	11
-403	val_403	2008-12-31	11
-483	val_483	2008-12-31	11
-53	val_53	2008-12-31	11
-105	val_105	2008-12-31	11
-257	val_257	2008-12-31	11
-406	val_406	2008-12-31	11
-409	val_409	2008-12-31	11
-190	val_190	2008-12-31	11
-406	val_406	2008-12-31	11
-401	val_401	2008-12-31	11
-114	val_114	2008-12-31	11
-258	val_258	2008-12-31	11
-90	val_90	2008-12-31	11
-203	val_203	2008-12-31	11
-262	val_262	2008-12-31	11
-348	val_348	2008-12-31	11
-424	val_424	2008-12-31	11
-12	val_12	2008-12-31	11
-396	val_396	2008-12-31	11
-201	val_201	2008-12-31	11
-217	val_217	2008-12-31	11
-164	val_164	2008-12-31	11
-431	val_431	2008-12-31	11
-454	val_454	2008-12-31	11
-478	val_478	2008-12-31	11
-298	val_298	2008-12-31	11
-125	val_125	2008-12-31	11
-431	val_431	2008-12-31	11
-164	val_164	2008-12-31	11
-424	val_424	2008-12-31	11
-187	val_187	2008-12-31	11
-382	val_382	2008-12-31	11
-5	val_5	2008-12-31	11
-70	val_70	2008-12-31	11
-397	val_397	2008-12-31	11
-480	val_480	2008-12-31	11
-291	val_291	2008-12-31	11
-24	val_24	2008-12-31	11
-351	val_351	2008-12-31	11
-255	val_255	2008-12-31	11
-104	val_104	2008-12-31	11
-70	val_70	2008-12-31	11
-163	val_163	2008-12-31	11
-438	val_438	2008-12-31	11
-119	val_119	2008-12-31	11
-414	val_414	2008-12-31	11
-200	val_200	2008-12-31	11
-491	val_491	2008-12-31	11
-237	val_237	2008-12-31	11
-439	val_439	2008-12-31	11
-360	val_360	2008-12-31	11
-248	val_248	2008-12-31	11
-479	val_479	2008-12-31	11
-305	val_305	2008-12-31	11
-417	val_417	2008-12-31	11
-199	val_199	2008-12-31	11
-444	val_444	2008-12-31	11
-120	val_120	2008-12-31	11
-429	val_429	2008-12-31	11
-169	val_169	2008-12-31	11
-443	val_443	2008-12-31	11
-323	val_323	2008-12-31	11
-325	val_325	2008-12-31	11
-277	val_277	2008-12-31	11
-230	val_230	2008-12-31	11
-478	val_478	2008-12-31	11
-178	val_178	2008-12-31	11
-468	val_468	2008-12-31	11
-310	val_310	2008-12-31	11
-317	val_317	2008-12-31	11
-333	val_333	2008-12-31	11
-493	val_493	2008-12-31	11
-460	val_460	2008-12-31	11
-207	val_207	2008-12-31	11
-249	val_249	2008-12-31	11
-265	val_265	2008-12-31	11
-480	val_480	2008-12-31	11
-83	val_83	2008-12-31	11
-136	val_136	2008-12-31	11
-353	val_353	2008-12-31	11
-172	val_172	2008-12-31	11
-214	val_214	2008-12-31	11
-462	val_462	2008-12-31	11
-233	val_233	2008-12-31	11
-406	val_406	2008-12-31	11
-133	val_133	2008-12-31	11
-175	val_175	2008-12-31	11
-189	val_189	2008-12-31	11
-454	val_454	2008-12-31	11
-375	val_375	2008-12-31	11
-401	val_401	2008-12-31	11
-421	val_421	2008-12-31	11
-407	val_407	2008-12-31	11
-384	val_384	2008-12-31	11
-256	val_256	2008-12-31	11
-26	val_26	2008-12-31	11
-134	val_134	2008-12-31	11
-67	val_67	2008-12-31	11
-384	val_384	2008-12-31	11
-379	val_379	2008-12-31	11
-18	val_18	2008-12-31	11
-462	val_462	2008-12-31	11
-492	val_492	2008-12-31	11
-100	val_100	2008-12-31	11
-298	val_298	2008-12-31	11
-9	val_9	2008-12-31	11
-341	val_341	2008-12-31	11
-498	val_498	2008-12-31	11
-146	val_146	2008-12-31	11
-458	val_458	2008-12-31	11
-362	val_362	2008-12-31	11
-186	val_186	2008-12-31	11
-285	val_285	2008-12-31	11
-348	val_348	2008-12-31	11
-167	val_167	2008-12-31	11
-18	val_18	2008-12-31	11
-273	val_273	2008-12-31	11
-183	val_183	2008-12-31	11
-281	val_281	2008-12-31	11
-344	val_344	2008-12-31	11
-97	val_97	2008-12-31	11
-469	val_469	2008-12-31	11
-315	val_315	2008-12-31	11
-84	val_84	2008-12-31	11
-28	val_28	2008-12-31	11
-37	val_37	2008-12-31	11
-448	val_448	2008-12-31	11
-152	val_152	2008-12-31	11
-348	val_348	2008-12-31	11
-307	val_307	2008-12-31	11
-194	val_194	2008-12-31	11
-414	val_414	2008-12-31	11
-477	val_477	2008-12-31	11
-222	val_222	2008-12-31	11
-126	val_126	2008-12-31	11
-90	val_90	2008-12-31	11
-169	val_169	2008-12-31	11
-403	val_403	2008-12-31	11
-400	val_400	2008-12-31	11
-200	val_200	2008-12-31	11
-97	val_97	2008-12-31	11
-238	val_238	2008-12-31	12
-86	val_86	2008-12-31	12
-311	val_311	2008-12-31	12
-27	val_27	2008-12-31	12
-165	val_165	2008-12-31	12
-409	val_409	2008-12-31	12
-255	val_255	2008-12-31	12
-278	val_278	2008-12-31	12
-98	val_98	2008-12-31	12
-484	val_484	2008-12-31	12
-265	val_265	2008-12-31	12
-193	val_193	2008-12-31	12
-401	val_401	2008-12-31	12
-150	val_150	2008-12-31	12
-273	val_273	2008-12-31	12
-224	val_224	2008-12-31	12
-369	val_369	2008-12-31	12
-66	val_66	2008-12-31	12
-128	val_128	2008-12-31	12
-213	val_213	2008-12-31	12
-146	val_146	2008-12-31	12
-406	val_406	2008-12-31	12
-429	val_429	2008-12-31	12
-374	val_374	2008-12-31	12
-152	val_152	2008-12-31	12
-469	val_469	2008-12-31	12
-145	val_145	2008-12-31	12
-495	val_495	2008-12-31	12
-37	val_37	2008-12-31	12
-327	val_327	2008-12-31	12
-281	val_281	2008-12-31	12
-277	val_277	2008-12-31	12
-209	val_209	2008-12-31	12
-15	val_15	2008-12-31	12
-82	val_82	2008-12-31	12
-403	val_403	2008-12-31	12
-166	val_166	2008-12-31	12
-417	val_417	2008-12-31	12
-430	val_430	2008-12-31	12
-252	val_252	2008-12-31	12
-292	val_292	2008-12-31	12
-219	val_219	2008-12-31	12
-287	val_287	2008-12-31	12
-153	val_153	2008-12-31	12
-193	val_193	2008-12-31	12
-338	val_338	2008-12-31	12
-446	val_446	2008-12-31	12
-459	val_459	2008-12-31	12
-394	val_394	2008-12-31	12
-237	val_237	2008-12-31	12
-482	val_482	2008-12-31	12
-174	val_174	2008-12-31	12
-413	val_413	2008-12-31	12
-494	val_494	2008-12-31	12
-207	val_207	2008-12-31	12
-199	val_199	2008-12-31	12
-466	val_466	2008-12-31	12
-208	val_208	2008-12-31	12
-174	val_174	2008-12-31	12
-399	val_399	2008-12-31	12
-396	val_396	2008-12-31	12
-247	val_247	2008-12-31	12
-417	val_417	2008-12-31	12
-489	val_489	2008-12-31	12
-162	val_162	2008-12-31	12
-377	val_377	2008-12-31	12
-397	val_397	2008-12-31	12
-309	val_309	2008-12-31	12
-365	val_365	2008-12-31	12
-266	val_266	2008-12-31	12
-439	val_439	2008-12-31	12
-342	val_342	2008-12-31	12
-367	val_367	2008-12-31	12
-325	val_325	2008-12-31	12
-167	val_167	2008-12-31	12
-195	val_195	2008-12-31	12
-475	val_475	2008-12-31	12
-17	val_17	2008-12-31	12
-113	val_113	2008-12-31	12
-155	val_155	2008-12-31	12
-203	val_203	2008-12-31	12
-339	val_339	2008-12-31	12
-0	val_0	2008-12-31	12
-455	val_455	2008-12-31	12
-128	val_128	2008-12-31	12
-311	val_311	2008-12-31	12
-316	val_316	2008-12-31	12
-57	val_57	2008-12-31	12
-302	val_302	2008-12-31	12
-205	val_205	2008-12-31	12
-149	val_149	2008-12-31	12
-438	val_438	2008-12-31	12
-345	val_345	2008-12-31	12
-129	val_129	2008-12-31	12
-170	val_170	2008-12-31	12
-20	val_20	2008-12-31	12
-489	val_489	2008-12-31	12
-157	val_157	2008-12-31	12
-378	val_378	2008-12-31	12
-221	val_221	2008-12-31	12
-92	val_92	2008-12-31	12
-111	val_111	2008-12-31	12
-47	val_47	2008-12-31	12
-72	val_72	2008-12-31	12
-4	val_4	2008-12-31	12
-280	val_280	2008-12-31	12
-35	val_35	2008-12-31	12
-427	val_427	2008-12-31	12
-277	val_277	2008-12-31	12
-208	val_208	2008-12-31	12
-356	val_356	2008-12-31	12
-399	val_399	2008-12-31	12
-169	val_169	2008-12-31	12
-382	val_382	2008-12-31	12
-498	val_498	2008-12-31	12
-125	val_125	2008-12-31	12
-386	val_386	2008-12-31	12
-437	val_437	2008-12-31	12
-469	val_469	2008-12-31	12
-192	val_192	2008-12-31	12
-286	val_286	2008-12-31	12
-187	val_187	2008-12-31	12
-176	val_176	2008-12-31	12
-54	val_54	2008-12-31	12
-459	val_459	2008-12-31	12
-51	val_51	2008-12-31	12
-138	val_138	2008-12-31	12
-103	val_103	2008-12-31	12
-239	val_239	2008-12-31	12
-213	val_213	2008-12-31	12
-216	val_216	2008-12-31	12
-430	val_430	2008-12-31	12
-278	val_278	2008-12-31	12
-176	val_176	2008-12-31	12
-289	val_289	2008-12-31	12
-221	val_221	2008-12-31	12
-65	val_65	2008-12-31	12
-318	val_318	2008-12-31	12
-332	val_332	2008-12-31	12
-311	val_311	2008-12-31	12
-275	val_275	2008-12-31	12
-137	val_137	2008-12-31	12
-241	val_241	2008-12-31	12
-83	val_83	2008-12-31	12
-333	val_333	2008-12-31	12
-180	val_180	2008-12-31	12
-284	val_284	2008-12-31	12
-12	val_12	2008-12-31	12
-230	val_230	2008-12-31	12
-181	val_181	2008-12-31	12
-67	val_67	2008-12-31	12
-260	val_260	2008-12-31	12
-404	val_404	2008-12-31	12
-384	val_384	2008-12-31	12
-489	val_489	2008-12-31	12
-353	val_353	2008-12-31	12
-373	val_373	2008-12-31	12
-272	val_272	2008-12-31	12
-138	val_138	2008-12-31	12
-217	val_217	2008-12-31	12
-84	val_84	2008-12-31	12
-348	val_348	2008-12-31	12
-466	val_466	2008-12-31	12
-58	val_58	2008-12-31	12
-8	val_8	2008-12-31	12
-411	val_411	2008-12-31	12
-230	val_230	2008-12-31	12
-208	val_208	2008-12-31	12
-348	val_348	2008-12-31	12
-24	val_24	2008-12-31	12
-463	val_463	2008-12-31	12
-431	val_431	2008-12-31	12
-179	val_179	2008-12-31	12
-172	val_172	2008-12-31	12
-42	val_42	2008-12-31	12
-129	val_129	2008-12-31	12
-158	val_158	2008-12-31	12
-119	val_119	2008-12-31	12
-496	val_496	2008-12-31	12
-0	val_0	2008-12-31	12
-322	val_322	2008-12-31	12
-197	val_197	2008-12-31	12
-468	val_468	2008-12-31	12
-393	val_393	2008-12-31	12
-454	val_454	2008-12-31	12
-100	val_100	2008-12-31	12
-298	val_298	2008-12-31	12
-199	val_199	2008-12-31	12
-191	val_191	2008-12-31	12
-418	val_418	2008-12-31	12
-96	val_96	2008-12-31	12
-26	val_26	2008-12-31	12
-165	val_165	2008-12-31	12
-327	val_327	2008-12-31	12
-230	val_230	2008-12-31	12
-205	val_205	2008-12-31	12
-120	val_120	2008-12-31	12
-131	val_131	2008-12-31	12
-51	val_51	2008-12-31	12
-404	val_404	2008-12-31	12
-43	val_43	2008-12-31	12
-436	val_436	2008-12-31	12
-156	val_156	2008-12-31	12
-469	val_469	2008-12-31	12
-468	val_468	2008-12-31	12
-308	val_308	2008-12-31	12
-95	val_95	2008-12-31	12
-196	val_196	2008-12-31	12
-288	val_288	2008-12-31	12
-481	val_481	2008-12-31	12
-457	val_457	2008-12-31	12
-98	val_98	2008-12-31	12
-282	val_282	2008-12-31	12
-197	val_197	2008-12-31	12
-187	val_187	2008-12-31	12
-318	val_318	2008-12-31	12
-318	val_318	2008-12-31	12
-409	val_409	2008-12-31	12
-470	val_470	2008-12-31	12
-137	val_137	2008-12-31	12
-369	val_369	2008-12-31	12
-316	val_316	2008-12-31	12
-169	val_169	2008-12-31	12
-413	val_413	2008-12-31	12
-85	val_85	2008-12-31	12
-77	val_77	2008-12-31	12
-0	val_0	2008-12-31	12
-490	val_490	2008-12-31	12
-87	val_87	2008-12-31	12
-364	val_364	2008-12-31	12
-179	val_179	2008-12-31	12
-118	val_118	2008-12-31	12
-134	val_134	2008-12-31	12
-395	val_395	2008-12-31	12
-282	val_282	2008-12-31	12
-138	val_138	2008-12-31	12
-238	val_238	2008-12-31	12
-419	val_419	2008-12-31	12
-15	val_15	2008-12-31	12
-118	val_118	2008-12-31	12
-72	val_72	2008-12-31	12
-90	val_90	2008-12-31	12
-307	val_307	2008-12-31	12
-19	val_19	2008-12-31	12
-435	val_435	2008-12-31	12
-10	val_10	2008-12-31	12
-277	val_277	2008-12-31	12
-273	val_273	2008-12-31	12
-306	val_306	2008-12-31	12
-224	val_224	2008-12-31	12
-309	val_309	2008-12-31	12
-389	val_389	2008-12-31	12
-327	val_327	2008-12-31	12
-242	val_242	2008-12-31	12
-369	val_369	2008-12-31	12
-392	val_392	2008-12-31	12
-272	val_272	2008-12-31	12
-331	val_331	2008-12-31	12
-401	val_401	2008-12-31	12
-242	val_242	2008-12-31	12
-452	val_452	2008-12-31	12
-177	val_177	2008-12-31	12
-226	val_226	2008-12-31	12
-5	val_5	2008-12-31	12
-497	val_497	2008-12-31	12
-402	val_402	2008-12-31	12
-396	val_396	2008-12-31	12
-317	val_317	2008-12-31	12
-395	val_395	2008-12-31	12
-58	val_58	2008-12-31	12
-35	val_35	2008-12-31	12
-336	val_336	2008-12-31	12
-95	val_95	2008-12-31	12
-11	val_11	2008-12-31	12
-168	val_168	2008-12-31	12
-34	val_34	2008-12-31	12
-229	val_229	2008-12-31	12
-233	val_233	2008-12-31	12
-143	val_143	2008-12-31	12
-472	val_472	2008-12-31	12
-322	val_322	2008-12-31	12
-498	val_498	2008-12-31	12
-160	val_160	2008-12-31	12
-195	val_195	2008-12-31	12
-42	val_42	2008-12-31	12
-321	val_321	2008-12-31	12
-430	val_430	2008-12-31	12
-119	val_119	2008-12-31	12
-489	val_489	2008-12-31	12
-458	val_458	2008-12-31	12
-78	val_78	2008-12-31	12
-76	val_76	2008-12-31	12
-41	val_41	2008-12-31	12
-223	val_223	2008-12-31	12
-492	val_492	2008-12-31	12
-149	val_149	2008-12-31	12
-449	val_449	2008-12-31	12
-218	val_218	2008-12-31	12
-228	val_228	2008-12-31	12
-138	val_138	2008-12-31	12
-453	val_453	2008-12-31	12
-30	val_30	2008-12-31	12
-209	val_209	2008-12-31	12
-64	val_64	2008-12-31	12
-468	val_468	2008-12-31	12
-76	val_76	2008-12-31	12
-74	val_74	2008-12-31	12
-342	val_342	2008-12-31	12
-69	val_69	2008-12-31	12
-230	val_230	2008-12-31	12
-33	val_33	2008-12-31	12
-368	val_368	2008-12-31	12
-103	val_103	2008-12-31	12
-296	val_296	2008-12-31	12
-113	val_113	2008-12-31	12
-216	val_216	2008-12-31	12
-367	val_367	2008-12-31	12
-344	val_344	2008-12-31	12
-167	val_167	2008-12-31	12
-274	val_274	2008-12-31	12
-219	val_219	2008-12-31	12
-239	val_239	2008-12-31	12
-485	val_485	2008-12-31	12
-116	val_116	2008-12-31	12
-223	val_223	2008-12-31	12
-256	val_256	2008-12-31	12
-263	val_263	2008-12-31	12
-70	val_70	2008-12-31	12
-487	val_487	2008-12-31	12
-480	val_480	2008-12-31	12
-401	val_401	2008-12-31	12
-288	val_288	2008-12-31	12
-191	val_191	2008-12-31	12
-5	val_5	2008-12-31	12
-244	val_244	2008-12-31	12
-438	val_438	2008-12-31	12
-128	val_128	2008-12-31	12
-467	val_467	2008-12-31	12
-432	val_432	2008-12-31	12
-202	val_202	2008-12-31	12
-316	val_316	2008-12-31	12
-229	val_229	2008-12-31	12
-469	val_469	2008-12-31	12
-463	val_463	2008-12-31	12
-280	val_280	2008-12-31	12
-2	val_2	2008-12-31	12
-35	val_35	2008-12-31	12
-283	val_283	2008-12-31	12
-331	val_331	2008-12-31	12
-235	val_235	2008-12-31	12
-80	val_80	2008-12-31	12
-44	val_44	2008-12-31	12
-193	val_193	2008-12-31	12
-321	val_321	2008-12-31	12
-335	val_335	2008-12-31	12
-104	val_104	2008-12-31	12
-466	val_466	2008-12-31	12
-366	val_366	2008-12-31	12
-175	val_175	2008-12-31	12
-403	val_403	2008-12-31	12
-483	val_483	2008-12-31	12
-53	val_53	2008-12-31	12
-105	val_105	2008-12-31	12
-257	val_257	2008-12-31	12
-406	val_406	2008-12-31	12
-409	val_409	2008-12-31	12
-190	val_190	2008-12-31	12
-406	val_406	2008-12-31	12
-401	val_401	2008-12-31	12
-114	val_114	2008-12-31	12
-258	val_258	2008-12-31	12
-90	val_90	2008-12-31	12
-203	val_203	2008-12-31	12
-262	val_262	2008-12-31	12
-348	val_348	2008-12-31	12
-424	val_424	2008-12-31	12
-12	val_12	2008-12-31	12
-396	val_396	2008-12-31	12
-201	val_201	2008-12-31	12
-217	val_217	2008-12-31	12
-164	val_164	2008-12-31	12
-431	val_431	2008-12-31	12
-454	val_454	2008-12-31	12
-478	val_478	2008-12-31	12
-298	val_298	2008-12-31	12
-125	val_125	2008-12-31	12
-431	val_431	2008-12-31	12
-164	val_164	2008-12-31	12
-424	val_424	2008-12-31	12
-187	val_187	2008-12-31	12
-382	val_382	2008-12-31	12
-5	val_5	2008-12-31	12
-70	val_70	2008-12-31	12
-397	val_397	2008-12-31	12
-480	val_480	2008-12-31	12
-291	val_291	2008-12-31	12
-24	val_24	2008-12-31	12
-351	val_351	2008-12-31	12
-255	val_255	2008-12-31	12
-104	val_104	2008-12-31	12
-70	val_70	2008-12-31	12
-163	val_163	2008-12-31	12
-438	val_438	2008-12-31	12
-119	val_119	2008-12-31	12
-414	val_414	2008-12-31	12
-200	val_200	2008-12-31	12
-491	val_491	2008-12-31	12
-237	val_237	2008-12-31	12
-439	val_439	2008-12-31	12
-360	val_360	2008-12-31	12
-248	val_248	2008-12-31	12
-479	val_479	2008-12-31	12
-305	val_305	2008-12-31	12
-417	val_417	2008-12-31	12
-199	val_199	2008-12-31	12
-444	val_444	2008-12-31	12
-120	val_120	2008-12-31	12
-429	val_429	2008-12-31	12
-169	val_169	2008-12-31	12
-443	val_443	2008-12-31	12
-323	val_323	2008-12-31	12
-325	val_325	2008-12-31	12
-277	val_277	2008-12-31	12
-230	val_230	2008-12-31	12
-478	val_478	2008-12-31	12
-178	val_178	2008-12-31	12
-468	val_468	2008-12-31	12
-310	val_310	2008-12-31	12
-317	val_317	2008-12-31	12
-333	val_333	2008-12-31	12
-493	val_493	2008-12-31	12
-460	val_460	2008-12-31	12
-207	val_207	2008-12-31	12
-249	val_249	2008-12-31	12
-265	val_265	2008-12-31	12
-480	val_480	2008-12-31	12
-83	val_83	2008-12-31	12
-136	val_136	2008-12-31	12
-353	val_353	2008-12-31	12
-172	val_172	2008-12-31	12
-214	val_214	2008-12-31	12
-462	val_462	2008-12-31	12
-233	val_233	2008-12-31	12
-406	val_406	2008-12-31	12
-133	val_133	2008-12-31	12
-175	val_175	2008-12-31	12
-189	val_189	2008-12-31	12
-454	val_454	2008-12-31	12
-375	val_375	2008-12-31	12
-401	val_401	2008-12-31	12
-421	val_421	2008-12-31	12
-407	val_407	2008-12-31	12
-384	val_384	2008-12-31	12
-256	val_256	2008-12-31	12
-26	val_26	2008-12-31	12
-134	val_134	2008-12-31	12
-67	val_67	2008-12-31	12
-384	val_384	2008-12-31	12
-379	val_379	2008-12-31	12
-18	val_18	2008-12-31	12
-462	val_462	2008-12-31	12
-492	val_492	2008-12-31	12
-100	val_100	2008-12-31	12
-298	val_298	2008-12-31	12
-9	val_9	2008-12-31	12
-341	val_341	2008-12-31	12
-498	val_498	2008-12-31	12
-146	val_146	2008-12-31	12
-458	val_458	2008-12-31	12
-362	val_362	2008-12-31	12
-186	val_186	2008-12-31	12
-285	val_285	2008-12-31	12
-348	val_348	2008-12-31	12
-167	val_167	2008-12-31	12
-18	val_18	2008-12-31	12
-273	val_273	2008-12-31	12
-183	val_183	2008-12-31	12
-281	val_281	2008-12-31	12
-344	val_344	2008-12-31	12
-97	val_97	2008-12-31	12
-469	val_469	2008-12-31	12
-315	val_315	2008-12-31	12
-84	val_84	2008-12-31	12
-28	val_28	2008-12-31	12
-37	val_37	2008-12-31	12
-448	val_448	2008-12-31	12
-152	val_152	2008-12-31	12
-348	val_348	2008-12-31	12
-307	val_307	2008-12-31	12
-194	val_194	2008-12-31	12
-414	val_414	2008-12-31	12
-477	val_477	2008-12-31	12
-222	val_222	2008-12-31	12
-126	val_126	2008-12-31	12
-90	val_90	2008-12-31	12
-169	val_169	2008-12-31	12
-403	val_403	2008-12-31	12
-400	val_400	2008-12-31	12
-200	val_200	2008-12-31	12
-97	val_97	2008-12-31	12
diff --git a/sql/hive/src/test/resources/golden/stats4-15-f02b95f20b526fcf2850b07ca6be4f8c b/sql/hive/src/test/resources/golden/stats4-15-f02b95f20b526fcf2850b07ca6be4f8c
deleted file mode 100644
index 2ac44ee23f45c..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-15-f02b95f20b526fcf2850b07ca6be4f8c
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	nzhang_part1        	 
-CreateTime:         	Tue Jan 28 02:18:26 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part1/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904306          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats4-16-f2bdfe1d8be2ff6f784fcf1c892e1b70 b/sql/hive/src/test/resources/golden/stats4-16-f2bdfe1d8be2ff6f784fcf1c892e1b70
deleted file mode 100644
index f0b07ca89fcb3..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-16-f2bdfe1d8be2ff6f784fcf1c892e1b70
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 12]    	 
-Database:           	default             	 
-Table:              	nzhang_part1        	 
-CreateTime:         	Tue Jan 28 02:18:26 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part1/ds=2008-04-08/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904306          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats4-17-746b888d14c1b3fa28aa8549c174f6d9 b/sql/hive/src/test/resources/golden/stats4-17-746b888d14c1b3fa28aa8549c174f6d9
deleted file mode 100644
index f94614124bcf6..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-17-746b888d14c1b3fa28aa8549c174f6d9
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-12-31, 11]    	 
-Database:           	default             	 
-Table:              	nzhang_part2        	 
-CreateTime:         	Tue Jan 28 02:18:26 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part2/ds=2008-12-31/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904306          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats4-18-dbe13731de4ab2a3c23343b78525b2f7 b/sql/hive/src/test/resources/golden/stats4-18-dbe13731de4ab2a3c23343b78525b2f7
deleted file mode 100644
index f00fb5b3bd491..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-18-dbe13731de4ab2a3c23343b78525b2f7
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-12-31, 12]    	 
-Database:           	default             	 
-Table:              	nzhang_part2        	 
-CreateTime:         	Tue Jan 28 02:18:26 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part2/ds=2008-12-31/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904306          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats4-19-3d752e1f8748ba98bebb2cef3f473cd5 b/sql/hive/src/test/resources/golden/stats4-19-3d752e1f8748ba98bebb2cef3f473cd5
deleted file mode 100644
index 7335c8d32fa68..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-19-3d752e1f8748ba98bebb2cef3f473cd5
+++ /dev/null
@@ -1,39 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:18:17 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	2                   
-	numPartitions       	2                   
-	numRows             	1000                
-	p3                  	v3                  
-	rawDataSize         	10624               
-	totalSize           	11624               
-	transient_lastDdlTime	1390904306          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats4-20-f63000f2c395b935199c9829964f98c1 b/sql/hive/src/test/resources/golden/stats4-20-f63000f2c395b935199c9829964f98c1
deleted file mode 100644
index 674cd9b0b64e5..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-20-f63000f2c395b935199c9829964f98c1
+++ /dev/null
@@ -1,39 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:18:17 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/nzhang_part2	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	2                   
-	numPartitions       	2                   
-	numRows             	1000                
-	p3                  	v3                  
-	rawDataSize         	10624               
-	totalSize           	11624               
-	transient_lastDdlTime	1390904306          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats4-21-c2faa5beb457f3e6da58d4d91b5d1cbd b/sql/hive/src/test/resources/golden/stats4-21-c2faa5beb457f3e6da58d4d91b5d1cbd
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats4-22-f709d5c1d4c9f13b7a521867674ac54c b/sql/hive/src/test/resources/golden/stats4-22-f709d5c1d4c9f13b7a521867674ac54c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats4-3-c2faa5beb457f3e6da58d4d91b5d1cbd b/sql/hive/src/test/resources/golden/stats4-3-c2faa5beb457f3e6da58d4d91b5d1cbd
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats4-4-f709d5c1d4c9f13b7a521867674ac54c b/sql/hive/src/test/resources/golden/stats4-4-f709d5c1d4c9f13b7a521867674ac54c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats4-5-cd849c4fe1229428da98947e3e43b46d b/sql/hive/src/test/resources/golden/stats4-5-cd849c4fe1229428da98947e3e43b46d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats4-6-9c0d4354b6a9be351fa32a66ff58a177 b/sql/hive/src/test/resources/golden/stats4-6-9c0d4354b6a9be351fa32a66ff58a177
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats4-7-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats4-7-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-7-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats4-8-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/stats4-8-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats4-8-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats4-9-255ad4511130fb8c9ee9d65b7c95743f b/sql/hive/src/test/resources/golden/stats4-9-255ad4511130fb8c9ee9d65b7c95743f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats5-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats5-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats5-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats5-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats5-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats5-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats5-2-6f5d06d6100de19ec398891cb2eae161 b/sql/hive/src/test/resources/golden/stats5-2-6f5d06d6100de19ec398891cb2eae161
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats5-3-96d9aa9c32a081518604959dcfac42df b/sql/hive/src/test/resources/golden/stats5-3-96d9aa9c32a081518604959dcfac42df
deleted file mode 100644
index c9a75eb6639d4..0000000000000
--- a/sql/hive/src/test/resources/golden/stats5-3-96d9aa9c32a081518604959dcfac42df
+++ /dev/null
@@ -1,19 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_src)))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        analyze_src 
-          TableScan
-            alias: analyze_src
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats5-4-dbf81a12f6c19c14dce831e942870744 b/sql/hive/src/test/resources/golden/stats5-4-dbf81a12f6c19c14dce831e942870744
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats5-5-3980dfc2e4f882dd3bf478e56929361 b/sql/hive/src/test/resources/golden/stats5-5-3980dfc2e4f882dd3bf478e56929361
deleted file mode 100644
index e622a0d3704a1..0000000000000
--- a/sql/hive/src/test/resources/golden/stats5-5-3980dfc2e4f882dd3bf478e56929361
+++ /dev/null
@@ -1,33 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:18:36 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_src	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	1                   
-	numPartitions       	0                   
-	numRows             	500                 
-	p3                  	v3                  
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904324          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats6-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats6-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats6-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats6-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats6-10-653f748fa2d690502ba4fda407841a20 b/sql/hive/src/test/resources/golden/stats6-10-653f748fa2d690502ba4fda407841a20
deleted file mode 100644
index bf589ab894312..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-10-653f748fa2d690502ba4fda407841a20
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:18:52 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-09/hr=11	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1390904332          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats6-11-1c9f833953403596ad50fd32e513642c b/sql/hive/src/test/resources/golden/stats6-11-1c9f833953403596ad50fd32e513642c
deleted file mode 100644
index e6170a33d7de7..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-11-1c9f833953403596ad50fd32e513642c
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:18:52 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-09/hr=12	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1390904332          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats6-12-fdbe263d925f28d598a71b7a4c577492 b/sql/hive/src/test/resources/golden/stats6-12-fdbe263d925f28d598a71b7a4c577492
deleted file mode 100644
index 45f6b344b04ca..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-12-fdbe263d925f28d598a71b7a4c577492
+++ /dev/null
@@ -1,39 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:18:45 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	2                   
-	numPartitions       	2                   
-	numRows             	1000                
-	p3                  	v3                  
-	rawDataSize         	10624               
-	totalSize           	11624               
-	transient_lastDdlTime	1390904348          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats6-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/stats6-2-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-2-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats6-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats6-3-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-3-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats6-4-a88c476a632cd92f748967fadb242405 b/sql/hive/src/test/resources/golden/stats6-4-a88c476a632cd92f748967fadb242405
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats6-5-66590e5ed0bd8cd22ae01ecb658c11dc b/sql/hive/src/test/resources/golden/stats6-5-66590e5ed0bd8cd22ae01ecb658c11dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats6-6-8926095434b70c83bf88c70559d38dce b/sql/hive/src/test/resources/golden/stats6-6-8926095434b70c83bf88c70559d38dce
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats6-7-6615049191cfa4026a3a870c3c3749f4 b/sql/hive/src/test/resources/golden/stats6-7-6615049191cfa4026a3a870c3c3749f4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats6-8-e15e242124e61ede9196130cb3fb69e7 b/sql/hive/src/test/resources/golden/stats6-8-e15e242124e61ede9196130cb3fb69e7
deleted file mode 100644
index 9ff40db416cb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-8-e15e242124e61ede9196130cb3fb69e7
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:18:52 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904340          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats6-9-e6b884de17a29eb476fd6cc502fc615d b/sql/hive/src/test/resources/golden/stats6-9-e6b884de17a29eb476fd6cc502fc615d
deleted file mode 100644
index 5c13e8793cc76..0000000000000
--- a/sql/hive/src/test/resources/golden/stats6-9-e6b884de17a29eb476fd6cc502fc615d
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:18:52 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-08/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904348          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats7-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats7-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats7-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats7-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats7-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats7-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats7-10-fdbe263d925f28d598a71b7a4c577492 b/sql/hive/src/test/resources/golden/stats7-10-fdbe263d925f28d598a71b7a4c577492
deleted file mode 100644
index bafa551db56e9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats7-10-fdbe263d925f28d598a71b7a4c577492
+++ /dev/null
@@ -1,38 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 14:57:42 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	2                   
-	numPartitions       	2                   
-	numRows             	1000                
-	rawDataSize         	10624               
-	totalSize           	11624               
-	transient_lastDdlTime	1389740280          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats7-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/stats7-2-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats7-2-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats7-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats7-3-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats7-3-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats7-4-a88c476a632cd92f748967fadb242405 b/sql/hive/src/test/resources/golden/stats7-4-a88c476a632cd92f748967fadb242405
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats7-5-66590e5ed0bd8cd22ae01ecb658c11dc b/sql/hive/src/test/resources/golden/stats7-5-66590e5ed0bd8cd22ae01ecb658c11dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats7-6-5d0c6aa78c9786d904728ff7adab85f2 b/sql/hive/src/test/resources/golden/stats7-6-5d0c6aa78c9786d904728ff7adab85f2
deleted file mode 100644
index d5341825cff70..0000000000000
--- a/sql/hive/src/test/resources/golden/stats7-6-5d0c6aa78c9786d904728ff7adab85f2
+++ /dev/null
@@ -1,18 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr))))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        analyze_srcpart 
-          TableScan
-            alias: analyze_srcpart
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
diff --git a/sql/hive/src/test/resources/golden/stats7-7-4912a8bbc138ab97ac0983bc90951de4 b/sql/hive/src/test/resources/golden/stats7-7-4912a8bbc138ab97ac0983bc90951de4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats7-8-e15e242124e61ede9196130cb3fb69e7 b/sql/hive/src/test/resources/golden/stats7-8-e15e242124e61ede9196130cb3fb69e7
deleted file mode 100644
index 7085e9397e59b..0000000000000
--- a/sql/hive/src/test/resources/golden/stats7-8-e15e242124e61ede9196130cb3fb69e7
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 14 14:57:50 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1389740280          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats7-9-e6b884de17a29eb476fd6cc502fc615d b/sql/hive/src/test/resources/golden/stats7-9-e6b884de17a29eb476fd6cc502fc615d
deleted file mode 100644
index a3eb8c6466032..0000000000000
--- a/sql/hive/src/test/resources/golden/stats7-9-e6b884de17a29eb476fd6cc502fc615d
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 14 14:57:50 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart/ds=2008-04-08/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1389740280          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats8-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats8-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats8-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats8-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats8-10-ce78d029b7764edce3a26336cfac6a8 b/sql/hive/src/test/resources/golden/stats8-10-ce78d029b7764edce3a26336cfac6a8
deleted file mode 100644
index d1e0c6243244d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-10-ce78d029b7764edce3a26336cfac6a8
+++ /dev/null
@@ -1,19 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 12))))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        analyze_srcpart 
-          TableScan
-            alias: analyze_srcpart
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats8-11-6615049191cfa4026a3a870c3c3749f4 b/sql/hive/src/test/resources/golden/stats8-11-6615049191cfa4026a3a870c3c3749f4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats8-12-e6b884de17a29eb476fd6cc502fc615d b/sql/hive/src/test/resources/golden/stats8-12-e6b884de17a29eb476fd6cc502fc615d
deleted file mode 100644
index 27ff9a687f5c6..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-12-e6b884de17a29eb476fd6cc502fc615d
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:19:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-08/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904374          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-13-c728cdab72daf6b0fedcf0a42f0dd07d b/sql/hive/src/test/resources/golden/stats8-13-c728cdab72daf6b0fedcf0a42f0dd07d
deleted file mode 100644
index ce340cced8844..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-13-c728cdab72daf6b0fedcf0a42f0dd07d
+++ /dev/null
@@ -1,19 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-09') (TOK_PARTVAL hr 11))))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        analyze_srcpart 
-          TableScan
-            alias: analyze_srcpart
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats8-14-ea9afc1343991ed4d410231803a174f7 b/sql/hive/src/test/resources/golden/stats8-14-ea9afc1343991ed4d410231803a174f7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats8-15-653f748fa2d690502ba4fda407841a20 b/sql/hive/src/test/resources/golden/stats8-15-653f748fa2d690502ba4fda407841a20
deleted file mode 100644
index 649828a63174b..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-15-653f748fa2d690502ba4fda407841a20
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:19:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-09/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904382          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-16-5179c0a116edc8dc9f7913fd3097bdd6 b/sql/hive/src/test/resources/golden/stats8-16-5179c0a116edc8dc9f7913fd3097bdd6
deleted file mode 100644
index e4c8bf3e0005f..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-16-5179c0a116edc8dc9f7913fd3097bdd6
+++ /dev/null
@@ -1,19 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-09') (TOK_PARTVAL hr 12))))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        analyze_srcpart 
-          TableScan
-            alias: analyze_srcpart
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats8-17-9d896286dccb22d6e1b6abd5c55eaaa4 b/sql/hive/src/test/resources/golden/stats8-17-9d896286dccb22d6e1b6abd5c55eaaa4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats8-18-1c9f833953403596ad50fd32e513642c b/sql/hive/src/test/resources/golden/stats8-18-1c9f833953403596ad50fd32e513642c
deleted file mode 100644
index 74aeceec37398..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-18-1c9f833953403596ad50fd32e513642c
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:19:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-09/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904390          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-19-dbf72430cff3df0e6e9405ad64531b16 b/sql/hive/src/test/resources/golden/stats8-19-dbf72430cff3df0e6e9405ad64531b16
deleted file mode 100644
index 25d9c86f74634..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-19-dbf72430cff3df0e6e9405ad64531b16
+++ /dev/null
@@ -1,19 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr))))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        analyze_srcpart 
-          TableScan
-            alias: analyze_srcpart
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats8-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/stats8-2-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-2-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats8-20-bff592750d1057448d2cff7694b6dad2 b/sql/hive/src/test/resources/golden/stats8-20-bff592750d1057448d2cff7694b6dad2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats8-21-e15e242124e61ede9196130cb3fb69e7 b/sql/hive/src/test/resources/golden/stats8-21-e15e242124e61ede9196130cb3fb69e7
deleted file mode 100644
index 5939c257f2f67..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-21-e15e242124e61ede9196130cb3fb69e7
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:19:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904398          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-22-e6b884de17a29eb476fd6cc502fc615d b/sql/hive/src/test/resources/golden/stats8-22-e6b884de17a29eb476fd6cc502fc615d
deleted file mode 100644
index 8dc22de8e0ee6..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-22-e6b884de17a29eb476fd6cc502fc615d
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:19:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-08/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904398          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-23-653f748fa2d690502ba4fda407841a20 b/sql/hive/src/test/resources/golden/stats8-23-653f748fa2d690502ba4fda407841a20
deleted file mode 100644
index a19f38cc6a364..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-23-653f748fa2d690502ba4fda407841a20
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:19:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-09/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904398          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-24-1c9f833953403596ad50fd32e513642c b/sql/hive/src/test/resources/golden/stats8-24-1c9f833953403596ad50fd32e513642c
deleted file mode 100644
index aead24acd518c..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-24-1c9f833953403596ad50fd32e513642c
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:19:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-09/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904398          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-25-fdbe263d925f28d598a71b7a4c577492 b/sql/hive/src/test/resources/golden/stats8-25-fdbe263d925f28d598a71b7a4c577492
deleted file mode 100644
index 5f8bf1774f144..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-25-fdbe263d925f28d598a71b7a4c577492
+++ /dev/null
@@ -1,39 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:19:10 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	4                   
-	numPartitions       	4                   
-	numRows             	2000                
-	p3                  	v3                  
-	rawDataSize         	21248               
-	totalSize           	23248               
-	transient_lastDdlTime	1390904398          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats8-3-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-3-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats8-4-a88c476a632cd92f748967fadb242405 b/sql/hive/src/test/resources/golden/stats8-4-a88c476a632cd92f748967fadb242405
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats8-5-66590e5ed0bd8cd22ae01ecb658c11dc b/sql/hive/src/test/resources/golden/stats8-5-66590e5ed0bd8cd22ae01ecb658c11dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats8-6-821e1f061960081b9b379d9bfb47f267 b/sql/hive/src/test/resources/golden/stats8-6-821e1f061960081b9b379d9bfb47f267
deleted file mode 100644
index d30acbf86a295..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-6-821e1f061960081b9b379d9bfb47f267
+++ /dev/null
@@ -1,19 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 11))))
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-  Stage-1 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-0
-    Map Reduce
-      Alias -> Map Operator Tree:
-        analyze_srcpart 
-          TableScan
-            alias: analyze_srcpart
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats8-7-8926095434b70c83bf88c70559d38dce b/sql/hive/src/test/resources/golden/stats8-7-8926095434b70c83bf88c70559d38dce
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats8-8-e15e242124e61ede9196130cb3fb69e7 b/sql/hive/src/test/resources/golden/stats8-8-e15e242124e61ede9196130cb3fb69e7
deleted file mode 100644
index 07a61fc1a8bd1..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-8-e15e242124e61ede9196130cb3fb69e7
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 28 02:19:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904366          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats8-9-fdbe263d925f28d598a71b7a4c577492 b/sql/hive/src/test/resources/golden/stats8-9-fdbe263d925f28d598a71b7a4c577492
deleted file mode 100644
index bd16e76d28d83..0000000000000
--- a/sql/hive/src/test/resources/golden/stats8-9-fdbe263d925f28d598a71b7a4c577492
+++ /dev/null
@@ -1,39 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 28 02:19:10 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	1                   
-	numPartitions       	1                   
-	numRows             	500                 
-	p3                  	v3                  
-	rawDataSize         	5312                
-	totalSize           	5812                
-	transient_lastDdlTime	1390904366          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats9-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats9-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats9-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats9-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats9-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats9-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-0-6d1832d28f897d0049de053617bd36 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-0-6d1832d28f897d0049de053617bd36
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-14-d403afd709251edba6ae13b8cc14e0b7 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-0-6f3df708fa339df236ec9375d2ad37fa
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-14-d403afd709251edba6ae13b8cc14e0b7
rename to sql/hive/src/test/resources/golden/stats_aggregator_error_1-0-6f3df708fa339df236ec9375d2ad37fa
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-1-36eee5cbac5c0c3228e499805b32f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_20_part_managed_location-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/stats_aggregator_error_1-1-36eee5cbac5c0c3228e499805b32f6
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-1-887fe99770f53e7e0a0fbdc190118612 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-1-887fe99770f53e7e0a0fbdc190118612
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-1-887fe99770f53e7e0a0fbdc190118612
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-10-a31221a0c377c14e11b14484ddaa49a6 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-10-a31221a0c377c14e11b14484ddaa49a6
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-10-a31221a0c377c14e11b14484ddaa49a6
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-10-a31221a0c377c14e11b14484ddaa49a6
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-11-d58626190cded8d09f0457739a980eb b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-11-d58626190cded8d09f0457739a980eb
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-11-d58626190cded8d09f0457739a980eb
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-11-d58626190cded8d09f0457739a980eb
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-13-a31221a0c377c14e11b14484ddaa49a6 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-13-a31221a0c377c14e11b14484ddaa49a6
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-13-a31221a0c377c14e11b14484ddaa49a6
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-13-a31221a0c377c14e11b14484ddaa49a6
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-14-60a231b64a4a0e414d0ddce1c813c614 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-14-60a231b64a4a0e414d0ddce1c813c614
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-14-60a231b64a4a0e414d0ddce1c813c614
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-14-60a231b64a4a0e414d0ddce1c813c614
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-16-a31221a0c377c14e11b14484ddaa49a6 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-16-a31221a0c377c14e11b14484ddaa49a6
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-16-a31221a0c377c14e11b14484ddaa49a6
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-16-a31221a0c377c14e11b14484ddaa49a6
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-2-2ca079278e0de95eecb5df315ce05c6 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-2-2ca079278e0de95eecb5df315ce05c6
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-2-2ca079278e0de95eecb5df315ce05c6
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-2-2ca079278e0de95eecb5df315ce05c6
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-3-66e3e0c942759f679c270698b49bfcf1 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-3-66e3e0c942759f679c270698b49bfcf1
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-3-66e3e0c942759f679c270698b49bfcf1
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-3-66e3e0c942759f679c270698b49bfcf1
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-4-d389db66cc7fd9b144445e364dac30e3 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-4-d389db66cc7fd9b144445e364dac30e3
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-4-d389db66cc7fd9b144445e364dac30e3
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-4-d389db66cc7fd9b144445e364dac30e3
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-5-63abf47101c5097e66a9c3ee92b128e3 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-5-63abf47101c5097e66a9c3ee92b128e3
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-5-63abf47101c5097e66a9c3ee92b128e3
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-5-63abf47101c5097e66a9c3ee92b128e3
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-7-a31221a0c377c14e11b14484ddaa49a6 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-7-a31221a0c377c14e11b14484ddaa49a6
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-7-a31221a0c377c14e11b14484ddaa49a6
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-7-a31221a0c377c14e11b14484ddaa49a6
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-8-11f77597cc11fc71e95c0d0d7502c5c3 b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-8-11f77597cc11fc71e95c0d0d7502c5c3
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_aggregator_error_1-8-11f77597cc11fc71e95c0d0d7502c5c3
+++ b/sql/hive/src/test/resources/golden/stats_aggregator_error_1-8-11f77597cc11fc71e95c0d0d7502c5c3
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-0-9505721cd28fc4ec94ef4baf07029027 b/sql/hive/src/test/resources/golden/stats_empty_dyn_part-0-9505721cd28fc4ec94ef4baf07029027
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-1-418ec894d08c33fd712eb358f579b7a0 b/sql/hive/src/test/resources/golden/stats_empty_dyn_part-1-418ec894d08c33fd712eb358f579b7a0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-1-418ec894d08c33fd712eb358f579b7a0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-2-c14f09f88961dbad4d800317079a9105 b/sql/hive/src/test/resources/golden/stats_empty_dyn_part-2-c14f09f88961dbad4d800317079a9105
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-2-c14f09f88961dbad4d800317079a9105
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats_empty_dyn_part-3-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-3-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-4-cc664530711607c530a2cd384e67a600 b/sql/hive/src/test/resources/golden/stats_empty_dyn_part-4-cc664530711607c530a2cd384e67a600
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_empty_dyn_part-5-76d56e06108f0c6da75aa821452fa873 b/sql/hive/src/test/resources/golden/stats_empty_dyn_part-5-76d56e06108f0c6da75aa821452fa873
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_empty_partition-1-418ec894d08c33fd712eb358f579b7a0 b/sql/hive/src/test/resources/golden/stats_empty_partition-1-418ec894d08c33fd712eb358f579b7a0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_empty_partition-1-418ec894d08c33fd712eb358f579b7a0
+++ b/sql/hive/src/test/resources/golden/stats_empty_partition-1-418ec894d08c33fd712eb358f579b7a0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_empty_partition-2-c14f09f88961dbad4d800317079a9105 b/sql/hive/src/test/resources/golden/stats_empty_partition-2-c14f09f88961dbad4d800317079a9105
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/stats_empty_partition-2-c14f09f88961dbad4d800317079a9105
+++ b/sql/hive/src/test/resources/golden/stats_empty_partition-2-c14f09f88961dbad4d800317079a9105
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/stats_empty_partition-4-aed016ae4b528521874a719a5b129a55 b/sql/hive/src/test/resources/golden/stats_empty_partition-4-aed016ae4b528521874a719a5b129a55
index 4475cb837eaa4..e01e7187693ed 100644
--- a/sql/hive/src/test/resources/golden/stats_empty_partition-4-aed016ae4b528521874a719a5b129a55
+++ b/sql/hive/src/test/resources/golden/stats_empty_partition-4-aed016ae4b528521874a719a5b129a55
@@ -1,27 +1,28 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	string              	None                
-value               	string              	None                
+key                 	string              	                    
+value               	string              	                    
 	 	 
 # Partition Information	 	 
 # col_name            	data_type           	comment             
 	 	 
-part                	string              	None                
+part                	string              	                    
 	 	 
 # Detailed Partition Information	 	 
 Partition Value:    	[1]                 	 
 Database:           	default             	 
 Table:              	tmptable            	 
-CreateTime:         	Sun Jan 05 00:32:00 PST 2014	 
+CreateTime:         	Tue Oct 21 05:20:33 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2020775722466758355/tmptable/part=1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/tmptable/part=1	 
 Partition Parameters:	 	 
+	COLUMN_STATS_ACCURATE	true                
 	numFiles            	1                   
 	numRows             	0                   
 	rawDataSize         	0                   
 	totalSize           	0                   
-	transient_lastDdlTime	1388910720          
+	transient_lastDdlTime	1413894033          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -32,4 +33,4 @@ Num Buckets:        	-1
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats_noscan_1-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats_noscan_1-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-10-e6b884de17a29eb476fd6cc502fc615d b/sql/hive/src/test/resources/golden/stats_noscan_1-10-e6b884de17a29eb476fd6cc502fc615d
deleted file mode 100644
index 5949a76d84a11..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-10-e6b884de17a29eb476fd6cc502fc615d
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 14 14:58:14 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart/ds=2008-04-08/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	0                   
-	rawDataSize         	0                   
-	totalSize           	5812                
-	transient_lastDdlTime	1389740295          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-11-653f748fa2d690502ba4fda407841a20 b/sql/hive/src/test/resources/golden/stats_noscan_1-11-653f748fa2d690502ba4fda407841a20
deleted file mode 100644
index 97f5929fd816a..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-11-653f748fa2d690502ba4fda407841a20
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 14 14:58:14 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart/ds=2008-04-09/hr=11	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1389740294          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-12-1c9f833953403596ad50fd32e513642c b/sql/hive/src/test/resources/golden/stats_noscan_1-12-1c9f833953403596ad50fd32e513642c
deleted file mode 100644
index 6d08ff47abc2c..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-12-1c9f833953403596ad50fd32e513642c
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 14 14:58:14 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart/ds=2008-04-09/hr=12	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1389740294          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-13-fdbe263d925f28d598a71b7a4c577492 b/sql/hive/src/test/resources/golden/stats_noscan_1-13-fdbe263d925f28d598a71b7a4c577492
deleted file mode 100644
index f441c8b483868..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-13-fdbe263d925f28d598a71b7a4c577492
+++ /dev/null
@@ -1,38 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 14:58:04 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	numFiles            	2                   
-	numPartitions       	2                   
-	numRows             	0                   
-	rawDataSize         	0                   
-	totalSize           	11624               
-	transient_lastDdlTime	1389740295          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-14-c51fe56935caed4f7ca6f7d9cd6a17ab b/sql/hive/src/test/resources/golden/stats_noscan_1-14-c51fe56935caed4f7ca6f7d9cd6a17ab
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-15-db563e338e4f658e5072cc60aef15480 b/sql/hive/src/test/resources/golden/stats_noscan_1-15-db563e338e4f658e5072cc60aef15480
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-16-e446db2c3ddce173d0a51bf77a489382 b/sql/hive/src/test/resources/golden/stats_noscan_1-16-e446db2c3ddce173d0a51bf77a489382
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-17-82369b182db851e06bfddb62965e03a3 b/sql/hive/src/test/resources/golden/stats_noscan_1-17-82369b182db851e06bfddb62965e03a3
deleted file mode 100644
index f263eed117b10..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-17-82369b182db851e06bfddb62965e03a3
+++ /dev/null
@@ -1,10 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart_partial) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08'))) noscan)
-
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-1
-    Stats-Aggr Operator
-
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-18-eece38f413a0a4f6f5b63cea0751d225 b/sql/hive/src/test/resources/golden/stats_noscan_1-18-eece38f413a0a4f6f5b63cea0751d225
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-19-1e9c23f4b90d83f4e8c5c7f08365928e b/sql/hive/src/test/resources/golden/stats_noscan_1-19-1e9c23f4b90d83f4e8c5c7f08365928e
deleted file mode 100644
index 6a115f7e6e61a..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-19-1e9c23f4b90d83f4e8c5c7f08365928e
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial	 
-CreateTime:         	Tue Jan 14 14:58:26 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart_partial/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	0                   
-	rawDataSize         	0                   
-	totalSize           	5812                
-	transient_lastDdlTime	1389740307          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/stats_noscan_1-2-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-2-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-20-9871c619bb5bf0a28f8d60e6332a614f b/sql/hive/src/test/resources/golden/stats_noscan_1-20-9871c619bb5bf0a28f8d60e6332a614f
deleted file mode 100644
index 1d96413b915da..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-20-9871c619bb5bf0a28f8d60e6332a614f
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial	 
-CreateTime:         	Tue Jan 14 14:58:26 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart_partial/ds=2008-04-08/hr=12	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	0                   
-	rawDataSize         	0                   
-	totalSize           	5812                
-	transient_lastDdlTime	1389740307          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-21-35a7cb6076ef7bd253ea9c1f5d009709 b/sql/hive/src/test/resources/golden/stats_noscan_1-21-35a7cb6076ef7bd253ea9c1f5d009709
deleted file mode 100644
index 4c4380e63b7f9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-21-35a7cb6076ef7bd253ea9c1f5d009709
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial	 
-CreateTime:         	Tue Jan 14 14:58:27 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart_partial/ds=2008-04-09/hr=11	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1389740307          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-22-521b49d223a32056025fb8dbd371a72a b/sql/hive/src/test/resources/golden/stats_noscan_1-22-521b49d223a32056025fb8dbd371a72a
deleted file mode 100644
index ba90d8d3d2477..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-22-521b49d223a32056025fb8dbd371a72a
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 12]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial	 
-CreateTime:         	Tue Jan 14 14:58:27 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart_partial/ds=2008-04-09/hr=12	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1389740307          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-23-fba14d8647e0c8ca214b724486af7763 b/sql/hive/src/test/resources/golden/stats_noscan_1-23-fba14d8647e0c8ca214b724486af7763
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats_noscan_1-3-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-3-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-4-7938a68104e163566da69ccc70a18f2c b/sql/hive/src/test/resources/golden/stats_noscan_1-4-7938a68104e163566da69ccc70a18f2c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-5-66590e5ed0bd8cd22ae01ecb658c11dc b/sql/hive/src/test/resources/golden/stats_noscan_1-5-66590e5ed0bd8cd22ae01ecb658c11dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-6-a1fd405e5175757aaa80033162c87670 b/sql/hive/src/test/resources/golden/stats_noscan_1-6-a1fd405e5175757aaa80033162c87670
deleted file mode 100644
index b671e68cc4c3b..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-6-a1fd405e5175757aaa80033162c87670
+++ /dev/null
@@ -1,10 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 11))) noscan)
-
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-1
-    Stats-Aggr Operator
-
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-7-497861ae04753ffbb63ee43d89eedd9e b/sql/hive/src/test/resources/golden/stats_noscan_1-7-497861ae04753ffbb63ee43d89eedd9e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-8-623f3701ead20fff786c203d23dd60ae b/sql/hive/src/test/resources/golden/stats_noscan_1-8-623f3701ead20fff786c203d23dd60ae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_noscan_1-9-a9ee4584e32fa7b934a1e6ab5403de44 b/sql/hive/src/test/resources/golden/stats_noscan_1-9-a9ee4584e32fa7b934a1e6ab5403de44
deleted file mode 100644
index 96499c0fb0f7e..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_noscan_1-9-a9ee4584e32fa7b934a1e6ab5403de44
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	int                 	None                
-value               	string              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart     	 
-CreateTime:         	Tue Jan 14 14:58:14 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse488810659186490763/analyze_srcpart/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	0                   
-	rawDataSize         	0                   
-	totalSize           	5812                
-	transient_lastDdlTime	1389740294          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats_partscan_1-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats_partscan_1-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-10-c06f6ce9878b7eededf8c2a085ffb380 b/sql/hive/src/test/resources/golden/stats_partscan_1-10-c06f6ce9878b7eededf8c2a085ffb380
deleted file mode 100644
index b5fc469438c83..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-10-c06f6ce9878b7eededf8c2a085ffb380
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial_scan	 
-CreateTime:         	Tue Jan 14 14:36:56 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2847673635801001933/analyze_srcpart_partial_scan/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1389739016          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-11-418ec894d08c33fd712eb358f579b7a0 b/sql/hive/src/test/resources/golden/stats_partscan_1-11-418ec894d08c33fd712eb358f579b7a0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-11-418ec894d08c33fd712eb358f579b7a0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-12-3e423642a5a00dc66cc709f474a3ecef b/sql/hive/src/test/resources/golden/stats_partscan_1-12-3e423642a5a00dc66cc709f474a3ecef
deleted file mode 100644
index cc58ef9026786..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-12-3e423642a5a00dc66cc709f474a3ecef
+++ /dev/null
@@ -1,14 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart_partial_scan) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 11))) partialscan)
-
-STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
-
-STAGE PLANS:
-  Stage: Stage-2
-    Partial Scan Statistics
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-13-2fe3131322b6c82e217f27e95581e681 b/sql/hive/src/test/resources/golden/stats_partscan_1-13-2fe3131322b6c82e217f27e95581e681
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-14-2c66f128acea649e8debc3c4b04fcb9c b/sql/hive/src/test/resources/golden/stats_partscan_1-14-2c66f128acea649e8debc3c4b04fcb9c
deleted file mode 100644
index 3243fe9fb497d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-14-2c66f128acea649e8debc3c4b04fcb9c
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial_scan	 
-CreateTime:         	Tue Jan 14 14:36:56 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2847673635801001933/analyze_srcpart_partial_scan/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	4812                
-	totalSize           	7456                
-	transient_lastDdlTime	1389739019          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-15-c05c5dc6a34b2a55526a43f2c900ad40 b/sql/hive/src/test/resources/golden/stats_partscan_1-15-c05c5dc6a34b2a55526a43f2c900ad40
deleted file mode 100644
index 6accd64d06d69..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-15-c05c5dc6a34b2a55526a43f2c900ad40
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial_scan	 
-CreateTime:         	Tue Jan 14 14:36:56 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse2847673635801001933/analyze_srcpart_partial_scan/ds=2008-04-09/hr=11	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1389739016          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-16-f93e6c408fcb4570fda5b09e4c7b1040 b/sql/hive/src/test/resources/golden/stats_partscan_1-16-f93e6c408fcb4570fda5b09e4c7b1040
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/stats_partscan_1-2-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-2-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats_partscan_1-3-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-3-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-4-c95dc367df88c9e5cf77157f29ba2daf b/sql/hive/src/test/resources/golden/stats_partscan_1-4-c95dc367df88c9e5cf77157f29ba2daf
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-4-c95dc367df88c9e5cf77157f29ba2daf
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-5-6e53a3ac93113f20db3a12f1dcf30e86 b/sql/hive/src/test/resources/golden/stats_partscan_1-5-6e53a3ac93113f20db3a12f1dcf30e86
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-5-6e53a3ac93113f20db3a12f1dcf30e86
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-6-84967075baa3e56fff2a23f8ab9ba076 b/sql/hive/src/test/resources/golden/stats_partscan_1-6-84967075baa3e56fff2a23f8ab9ba076
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-6-84967075baa3e56fff2a23f8ab9ba076
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-7-2ee5d706fe3a3bcc38b795f6e94970ea b/sql/hive/src/test/resources/golden/stats_partscan_1-7-2ee5d706fe3a3bcc38b795f6e94970ea
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1-7-2ee5d706fe3a3bcc38b795f6e94970ea
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-8-29279aa84d6ba9dea9e56b493504dd30 b/sql/hive/src/test/resources/golden/stats_partscan_1-8-29279aa84d6ba9dea9e56b493504dd30
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1-9-90d41ae72606f9616cb7b1303f997348 b/sql/hive/src/test/resources/golden/stats_partscan_1-9-90d41ae72606f9616cb7b1303f997348
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-0-e7bfbd9422685e9a3a6c9bd4965f828f b/sql/hive/src/test/resources/golden/stats_partscan_1_23-0-e7bfbd9422685e9a3a6c9bd4965f828f
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-0-e7bfbd9422685e9a3a6c9bd4965f828f
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-1-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-1-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-1-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-10-c06f6ce9878b7eededf8c2a085ffb380 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-10-c06f6ce9878b7eededf8c2a085ffb380
deleted file mode 100644
index 942b7cfe07235..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-10-c06f6ce9878b7eededf8c2a085ffb380
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial_scan	 
-CreateTime:         	Tue Jan 28 02:20:21 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart_partial_scan/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1390904421          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-11-418ec894d08c33fd712eb358f579b7a0 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-11-418ec894d08c33fd712eb358f579b7a0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-11-418ec894d08c33fd712eb358f579b7a0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-12-3e423642a5a00dc66cc709f474a3ecef b/sql/hive/src/test/resources/golden/stats_partscan_1_23-12-3e423642a5a00dc66cc709f474a3ecef
deleted file mode 100644
index 9c704a6ef4126..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-12-3e423642a5a00dc66cc709f474a3ecef
+++ /dev/null
@@ -1,15 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_ANALYZE (TOK_TAB (TOK_TABNAME analyze_srcpart_partial_scan) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr 11))) partialscan)
-
-STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
-
-STAGE PLANS:
-  Stage: Stage-2
-    Partial Scan Statistics
-
-  Stage: Stage-1
-    Stats-Aggr Operator
-
-
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-13-2fe3131322b6c82e217f27e95581e681 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-13-2fe3131322b6c82e217f27e95581e681
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-14-2c66f128acea649e8debc3c4b04fcb9c b/sql/hive/src/test/resources/golden/stats_partscan_1_23-14-2c66f128acea649e8debc3c4b04fcb9c
deleted file mode 100644
index feee75f095d0d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-14-2c66f128acea649e8debc3c4b04fcb9c
+++ /dev/null
@@ -1,36 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-08, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial_scan	 
-CreateTime:         	Tue Jan 28 02:20:21 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart_partial_scan/ds=2008-04-08/hr=11	 
-Partition Parameters:	 	 
-	numFiles            	1                   
-	numRows             	500                 
-	rawDataSize         	4812                
-	totalSize           	7456                
-	transient_lastDdlTime	1390904425          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-15-c05c5dc6a34b2a55526a43f2c900ad40 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-15-c05c5dc6a34b2a55526a43f2c900ad40
deleted file mode 100644
index f39d366764c95..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-15-c05c5dc6a34b2a55526a43f2c900ad40
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-value               	string              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-hr                  	string              	None                
-	 	 
-# Detailed Partition Information	 	 
-Partition Value:    	[2008-04-09, 11]    	 
-Database:           	default             	 
-Table:              	analyze_srcpart_partial_scan	 
-CreateTime:         	Tue Jan 28 02:20:21 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse6423793619827660847/analyze_srcpart_partial_scan/ds=2008-04-09/hr=11	 
-Partition Parameters:	 	 
-	transient_lastDdlTime	1390904421          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-16-f93e6c408fcb4570fda5b09e4c7b1040 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-16-f93e6c408fcb4570fda5b09e4c7b1040
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-2-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-2-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/stats_partscan_1_23-3-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-3-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-4-c95dc367df88c9e5cf77157f29ba2daf b/sql/hive/src/test/resources/golden/stats_partscan_1_23-4-c95dc367df88c9e5cf77157f29ba2daf
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-4-c95dc367df88c9e5cf77157f29ba2daf
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-5-6e53a3ac93113f20db3a12f1dcf30e86 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-5-6e53a3ac93113f20db3a12f1dcf30e86
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-5-6e53a3ac93113f20db3a12f1dcf30e86
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-6-84967075baa3e56fff2a23f8ab9ba076 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-6-84967075baa3e56fff2a23f8ab9ba076
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-6-84967075baa3e56fff2a23f8ab9ba076
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-7-2ee5d706fe3a3bcc38b795f6e94970ea b/sql/hive/src/test/resources/golden/stats_partscan_1_23-7-2ee5d706fe3a3bcc38b795f6e94970ea
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_partscan_1_23-7-2ee5d706fe3a3bcc38b795f6e94970ea
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-8-b158e24051ecb66b8af46743917771ca b/sql/hive/src/test/resources/golden/stats_partscan_1_23-8-b158e24051ecb66b8af46743917771ca
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/stats_partscan_1_23-9-90d41ae72606f9616cb7b1303f997348 b/sql/hive/src/test/resources/golden/stats_partscan_1_23-9-90d41ae72606f9616cb7b1303f997348
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/stats_publisher_error_1-1-36eee5cbac5c0c3228e499805b32f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_20_part_managed_location-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/stats_publisher_error_1-1-36eee5cbac5c0c3228e499805b32f6
diff --git a/sql/hive/src/test/resources/golden/stats_publisher_error_1-1-887fe99770f53e7e0a0fbdc190118612 b/sql/hive/src/test/resources/golden/stats_publisher_error_1-1-887fe99770f53e7e0a0fbdc190118612
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/stats_publisher_error_1-1-887fe99770f53e7e0a0fbdc190118612
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/str_to_map-0-aefd618b58ad6c37956755b6572cbc73 b/sql/hive/src/test/resources/golden/str_to_map-0-aefd618b58ad6c37956755b6572cbc73
deleted file mode 100644
index 9bed96d855476..0000000000000
--- a/sql/hive/src/test/resources/golden/str_to_map-0-aefd618b58ad6c37956755b6572cbc73
+++ /dev/null
@@ -1 +0,0 @@
-str_to_map(text, delimiter1, delimiter2) - Creates a map by parsing text 
diff --git a/sql/hive/src/test/resources/golden/str_to_map-1-c66590cd4ac0a6f9c2bd88db7efcb16c b/sql/hive/src/test/resources/golden/str_to_map-1-c66590cd4ac0a6f9c2bd88db7efcb16c
deleted file mode 100644
index 1da11fbfa31d5..0000000000000
--- a/sql/hive/src/test/resources/golden/str_to_map-1-c66590cd4ac0a6f9c2bd88db7efcb16c
+++ /dev/null
@@ -1,2 +0,0 @@
-str_to_map(text, delimiter1, delimiter2) - Creates a map by parsing text 
-Split text into key-value pairs using two delimiters. The first delimiter seperates pairs, and the second delimiter sperates key and value. If only one parameter is given, default delimiters are used: ',' as delimiter1 and '=' as delimiter2.
diff --git a/sql/hive/src/test/resources/golden/str_to_map-10-32997010bba305ec40812df254490730 b/sql/hive/src/test/resources/golden/str_to_map-10-32997010bba305ec40812df254490730
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/str_to_map-11-d99f1b631bc42a6a57c581025230537a b/sql/hive/src/test/resources/golden/str_to_map-11-d99f1b631bc42a6a57c581025230537a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/str_to_map-12-f793eb7b36a1d6379b90e241df62c72e b/sql/hive/src/test/resources/golden/str_to_map-12-f793eb7b36a1d6379b90e241df62c72e
deleted file mode 100644
index cd4d4a384163b..0000000000000
--- a/sql/hive/src/test/resources/golden/str_to_map-12-f793eb7b36a1d6379b90e241df62c72e
+++ /dev/null
@@ -1,3 +0,0 @@
-444
-444
-444
diff --git a/sql/hive/src/test/resources/golden/str_to_map-13-32997010bba305ec40812df254490730 b/sql/hive/src/test/resources/golden/str_to_map-13-32997010bba305ec40812df254490730
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/str_to_map-2-28d8e6677d025e1266ce95ae23dfd9ec b/sql/hive/src/test/resources/golden/str_to_map-2-28d8e6677d025e1266ce95ae23dfd9ec
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/str_to_map-3-8b76ce17aa29e2eb37a4b953e9c80e66 b/sql/hive/src/test/resources/golden/str_to_map-3-8b76ce17aa29e2eb37a4b953e9c80e66
deleted file mode 100644
index e8183f05f5db6..0000000000000
--- a/sql/hive/src/test/resources/golden/str_to_map-3-8b76ce17aa29e2eb37a4b953e9c80e66
+++ /dev/null
@@ -1,3 +0,0 @@
-1
-1
-1
diff --git a/sql/hive/src/test/resources/golden/str_to_map-4-f356516aec917fe0d34df4dc8d9d0f95 b/sql/hive/src/test/resources/golden/str_to_map-4-f356516aec917fe0d34df4dc8d9d0f95
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/str_to_map-5-269cfcefe2ea6c19ac6c824d3259bbda b/sql/hive/src/test/resources/golden/str_to_map-5-269cfcefe2ea6c19ac6c824d3259bbda
deleted file mode 100644
index 62813f9d10491..0000000000000
--- a/sql/hive/src/test/resources/golden/str_to_map-5-269cfcefe2ea6c19ac6c824d3259bbda
+++ /dev/null
@@ -1,3 +0,0 @@
-{"b":"2","a":"1","c":"3"}
-{"b":"2","a":"1","c":"3"}
-{"b":"2","a":"1","c":"3"}
diff --git a/sql/hive/src/test/resources/golden/str_to_map-6-53a3d5f99c0fbe17179cb01387a7ccaf b/sql/hive/src/test/resources/golden/str_to_map-6-53a3d5f99c0fbe17179cb01387a7ccaf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/str_to_map-7-5641678c53ce6ef1dbce3994843cfcad b/sql/hive/src/test/resources/golden/str_to_map-7-5641678c53ce6ef1dbce3994843cfcad
deleted file mode 100644
index 62813f9d10491..0000000000000
--- a/sql/hive/src/test/resources/golden/str_to_map-7-5641678c53ce6ef1dbce3994843cfcad
+++ /dev/null
@@ -1,3 +0,0 @@
-{"b":"2","a":"1","c":"3"}
-{"b":"2","a":"1","c":"3"}
-{"b":"2","a":"1","c":"3"}
diff --git a/sql/hive/src/test/resources/golden/str_to_map-8-84121d964faad3547f0e5cce9d268612 b/sql/hive/src/test/resources/golden/str_to_map-8-84121d964faad3547f0e5cce9d268612
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/str_to_map-9-903f3b3137bfa3f4594fec7b604c62bd b/sql/hive/src/test/resources/golden/str_to_map-9-903f3b3137bfa3f4594fec7b604c62bd
deleted file mode 100644
index e8183f05f5db6..0000000000000
--- a/sql/hive/src/test/resources/golden/str_to_map-9-903f3b3137bfa3f4594fec7b604c62bd
+++ /dev/null
@@ -1,3 +0,0 @@
-1
-1
-1
diff --git a/sql/hive/src/test/resources/golden/string literal-0-ff43f1a7d06479b78622c8bb10e9f9a1 b/sql/hive/src/test/resources/golden/string literal-0-ff43f1a7d06479b78622c8bb10e9f9a1
index 8938b37682023..cfb0572d8663d 100644
--- a/sql/hive/src/test/resources/golden/string literal-0-ff43f1a7d06479b78622c8bb10e9f9a1	
+++ b/sql/hive/src/test/resources/golden/string literal-0-ff43f1a7d06479b78622c8bb10e9f9a1	
@@ -497,4 +497,4 @@ test
 test
 test
 test
-test
\ No newline at end of file
+test
diff --git a/sql/hive/src/test/resources/golden/subq2-1-235919a7ddb574662158503b8052e7ca b/sql/hive/src/test/resources/golden/subq2-1-235919a7ddb574662158503b8052e7ca
index 5b3f76a5f7797..0bc48337e2dc0 100644
--- a/sql/hive/src/test/resources/golden/subq2-1-235919a7ddb574662158503b8052e7ca
+++ b/sql/hive/src/test/resources/golden/subq2-1-235919a7ddb574662158503b8052e7ca
@@ -255,4 +255,4 @@
 495	1
 496	1
 497	1
-498	3
\ No newline at end of file
+498	3
diff --git a/sql/hive/src/test/resources/golden/subquery-alias.attr-0-fc8183d758151be72b3d75d9df124504 b/sql/hive/src/test/resources/golden/subquery-alias.attr-0-fc8183d758151be72b3d75d9df124504
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/subquery-alias.attr-0-fc8183d758151be72b3d75d9df124504
+++ b/sql/hive/src/test/resources/golden/subquery-alias.attr-0-fc8183d758151be72b3d75d9df124504
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-0-3b2fa9592648fc07c0d43e0d7d7f9411 b/sql/hive/src/test/resources/golden/symlink_text_input_format-0-3b2fa9592648fc07c0d43e0d7d7f9411
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-1-d498fb503b8f47db4741fdad3d266b4a b/sql/hive/src/test/resources/golden/symlink_text_input_format-1-d498fb503b8f47db4741fdad3d266b4a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-2-42119039bf8023f90b7f474f235c5dd5 b/sql/hive/src/test/resources/golden/symlink_text_input_format-2-42119039bf8023f90b7f474f235c5dd5
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/symlink_text_input_format-2-42119039bf8023f90b7f474f235c5dd5
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-3-77b57147024eb6b28cc9f525fdaab615 b/sql/hive/src/test/resources/golden/symlink_text_input_format-3-77b57147024eb6b28cc9f525fdaab615
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/symlink_text_input_format-3-77b57147024eb6b28cc9f525fdaab615
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-4-676cb274a770a6b9ca86df5dc7f912d4 b/sql/hive/src/test/resources/golden/symlink_text_input_format-4-676cb274a770a6b9ca86df5dc7f912d4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-5-ef3052815ec41b5957627698ba06707b b/sql/hive/src/test/resources/golden/symlink_text_input_format-5-ef3052815ec41b5957627698ba06707b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-6-891be0baec05e358a647dcca77724446 b/sql/hive/src/test/resources/golden/symlink_text_input_format-6-891be0baec05e358a647dcca77724446
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-7-208bcc9c918cbeb52907c8871be19cd5 b/sql/hive/src/test/resources/golden/symlink_text_input_format-7-208bcc9c918cbeb52907c8871be19cd5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-8-fb2e7127e07ad9f7e97ad3df3eba3a35 b/sql/hive/src/test/resources/golden/symlink_text_input_format-8-fb2e7127e07ad9f7e97ad3df3eba3a35
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/symlink_text_input_format-9-433d5dbbcf75ff68d6897f31baa46841 b/sql/hive/src/test/resources/golden/symlink_text_input_format-9-433d5dbbcf75ff68d6897f31baa46841
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/symlink_text_input_format-9-433d5dbbcf75ff68d6897f31baa46841
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/table.attr-0-26c9d24eb6305ea2106c26bdca38012e b/sql/hive/src/test/resources/golden/table.attr-0-26c9d24eb6305ea2106c26bdca38012e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/table.attr-0-26c9d24eb6305ea2106c26bdca38012e
+++ b/sql/hive/src/test/resources/golden/table.attr-0-26c9d24eb6305ea2106c26bdca38012e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/tablename_with_select-1-736d6a05e1fe3249a84eb58f7cd806d2 b/sql/hive/src/test/resources/golden/tablename_with_select-1-736d6a05e1fe3249a84eb58f7cd806d2
index 92cca3b6f1e24..95e2ae6a0fd50 100644
--- a/sql/hive/src/test/resources/golden/tablename_with_select-1-736d6a05e1fe3249a84eb58f7cd806d2
+++ b/sql/hive/src/test/resources/golden/tablename_with_select-1-736d6a05e1fe3249a84eb58f7cd806d2
@@ -1,2 +1,2 @@
-a                   	int                 	None                
-b                   	string              	None                
\ No newline at end of file
+a                   	int                 	                    
+b                   	string              	                    
diff --git a/sql/hive/src/test/resources/golden/tablename_with_select-3-35c08c648a66f09f2cf7cfa6019c2113 b/sql/hive/src/test/resources/golden/tablename_with_select-3-35c08c648a66f09f2cf7cfa6019c2113
index 55d9485999072..b70e127e82d05 100644
--- a/sql/hive/src/test/resources/golden/tablename_with_select-3-35c08c648a66f09f2cf7cfa6019c2113
+++ b/sql/hive/src/test/resources/golden/tablename_with_select-3-35c08c648a66f09f2cf7cfa6019c2113
@@ -497,4 +497,4 @@
 497	val_497
 498	val_498
 498	val_498
-498	val_498
\ No newline at end of file
+498	val_498
diff --git a/sql/hive/src/test/resources/golden/test_boolean_whereclause-0-b38bf01368da26ec9c60e9433a9c59a1 b/sql/hive/src/test/resources/golden/test_boolean_whereclause-0-b38bf01368da26ec9c60e9433a9c59a1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/test_boolean_whereclause-1-3e38e42c5b72364c5461c626f312be8c b/sql/hive/src/test/resources/golden/test_boolean_whereclause-1-3e38e42c5b72364c5461c626f312be8c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/test_boolean_whereclause-2-183920d856ad75e6d1e15121d3cd7364 b/sql/hive/src/test/resources/golden/test_boolean_whereclause-2-183920d856ad75e6d1e15121d3cd7364
deleted file mode 100644
index d00491fd7e5bb..0000000000000
--- a/sql/hive/src/test/resources/golden/test_boolean_whereclause-2-183920d856ad75e6d1e15121d3cd7364
+++ /dev/null
@@ -1 +0,0 @@
-1
diff --git a/sql/hive/src/test/resources/golden/exim_20_part_managed_location-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/timestamp_1-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_20_part_managed_location-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/timestamp_1-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-0-d362501d0176855077e65f8faf067fa8 b/sql/hive/src/test/resources/golden/timestamp_1-0-d362501d0176855077e65f8faf067fa8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-1-1d7cf3a2512fa1876b422b79bbe05426 b/sql/hive/src/test/resources/golden/timestamp_1-1-1d7cf3a2512fa1876b422b79bbe05426
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-16-166ae99e823bc1bcbee39a466f9e3551 b/sql/hive/src/test/resources/golden/timestamp_1-1-d362501d0176855077e65f8faf067fa8
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-16-166ae99e823bc1bcbee39a466f9e3551
rename to sql/hive/src/test/resources/golden/timestamp_1-1-d362501d0176855077e65f8faf067fa8
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-18-67f274bf16de625cf4e85af0c6185cac b/sql/hive/src/test/resources/golden/timestamp_1-10-67f274bf16de625cf4e85af0c6185cac
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-18-67f274bf16de625cf4e85af0c6185cac
rename to sql/hive/src/test/resources/golden/timestamp_1-10-67f274bf16de625cf4e85af0c6185cac
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-10-343c75daac6695917608c17db8bf473e b/sql/hive/src/test/resources/golden/timestamp_1-11-343c75daac6695917608c17db8bf473e
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-10-343c75daac6695917608c17db8bf473e
rename to sql/hive/src/test/resources/golden/timestamp_1-11-343c75daac6695917608c17db8bf473e
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-12-6328d3b3dfd295dd5ec453ffb47ff4d0 b/sql/hive/src/test/resources/golden/timestamp_1-12-6328d3b3dfd295dd5ec453ffb47ff4d0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-11-cf19f7359a6d3456c4526b2c69f92d6a b/sql/hive/src/test/resources/golden/timestamp_1-12-cf19f7359a6d3456c4526b2c69f92d6a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-11-cf19f7359a6d3456c4526b2c69f92d6a
rename to sql/hive/src/test/resources/golden/timestamp_1-12-cf19f7359a6d3456c4526b2c69f92d6a
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-19-46ca52d697918f2327b2293d9fd57d15 b/sql/hive/src/test/resources/golden/timestamp_1-13-d242038c04dd4ee6075c7eebc0f75f17
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-19-46ca52d697918f2327b2293d9fd57d15
rename to sql/hive/src/test/resources/golden/timestamp_1-13-d242038c04dd4ee6075c7eebc0f75f17
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-13-25f6ec69328af6cba76899194e0dd84e b/sql/hive/src/test/resources/golden/timestamp_1-14-90269c1e50c7ae8e75ca9cc297982135
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-13-25f6ec69328af6cba76899194e0dd84e
rename to sql/hive/src/test/resources/golden/timestamp_1-14-90269c1e50c7ae8e75ca9cc297982135
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-14-e6bfca320c4ee3aff39cf2f179d57da6 b/sql/hive/src/test/resources/golden/timestamp_1-15-e6bfca320c4ee3aff39cf2f179d57da6
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-14-e6bfca320c4ee3aff39cf2f179d57da6
rename to sql/hive/src/test/resources/golden/timestamp_1-15-e6bfca320c4ee3aff39cf2f179d57da6
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-15-d0291a9bd42054b2732cb4f54cf39ae7 b/sql/hive/src/test/resources/golden/timestamp_1-16-d0291a9bd42054b2732cb4f54cf39ae7
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-15-d0291a9bd42054b2732cb4f54cf39ae7
rename to sql/hive/src/test/resources/golden/timestamp_1-16-d0291a9bd42054b2732cb4f54cf39ae7
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-16-e7b398d2a8107a42419c83771bda41e6 b/sql/hive/src/test/resources/golden/timestamp_1-17-e7b398d2a8107a42419c83771bda41e6
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-16-e7b398d2a8107a42419c83771bda41e6
rename to sql/hive/src/test/resources/golden/timestamp_1-17-e7b398d2a8107a42419c83771bda41e6
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-17-a3eeec08bccae78d0d94ad2cb923e1cf b/sql/hive/src/test/resources/golden/timestamp_1-18-a3eeec08bccae78d0d94ad2cb923e1cf
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-17-a3eeec08bccae78d0d94ad2cb923e1cf
rename to sql/hive/src/test/resources/golden/timestamp_1-18-a3eeec08bccae78d0d94ad2cb923e1cf
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-27-67f274bf16de625cf4e85af0c6185cac b/sql/hive/src/test/resources/golden/timestamp_1-19-67f274bf16de625cf4e85af0c6185cac
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-27-67f274bf16de625cf4e85af0c6185cac
rename to sql/hive/src/test/resources/golden/timestamp_1-19-67f274bf16de625cf4e85af0c6185cac
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-4-475d50465b23adfb70e67122425ede9e b/sql/hive/src/test/resources/golden/timestamp_1-2-1d7cf3a2512fa1876b422b79bbe05426
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-4-475d50465b23adfb70e67122425ede9e
rename to sql/hive/src/test/resources/golden/timestamp_1-2-1d7cf3a2512fa1876b422b79bbe05426
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-2-74f477a8b726f5193dd42ef378a793c4 b/sql/hive/src/test/resources/golden/timestamp_1-2-74f477a8b726f5193dd42ef378a793c4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-19-343c75daac6695917608c17db8bf473e b/sql/hive/src/test/resources/golden/timestamp_1-20-343c75daac6695917608c17db8bf473e
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-19-343c75daac6695917608c17db8bf473e
rename to sql/hive/src/test/resources/golden/timestamp_1-20-343c75daac6695917608c17db8bf473e
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-20-cf19f7359a6d3456c4526b2c69f92d6a b/sql/hive/src/test/resources/golden/timestamp_1-21-cf19f7359a6d3456c4526b2c69f92d6a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-20-cf19f7359a6d3456c4526b2c69f92d6a
rename to sql/hive/src/test/resources/golden/timestamp_1-21-cf19f7359a6d3456c4526b2c69f92d6a
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-21-d8fff1a6c464e50eb955babfafb0b98e b/sql/hive/src/test/resources/golden/timestamp_1-21-d8fff1a6c464e50eb955babfafb0b98e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-5-7ca5ebad57578206b8830da6746fb952 b/sql/hive/src/test/resources/golden/timestamp_1-22-cdb04b49b836e0244f6f0857aea7da8a
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-5-7ca5ebad57578206b8830da6746fb952
rename to sql/hive/src/test/resources/golden/timestamp_1-22-cdb04b49b836e0244f6f0857aea7da8a
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-22-25f6ec69328af6cba76899194e0dd84e b/sql/hive/src/test/resources/golden/timestamp_1-23-90269c1e50c7ae8e75ca9cc297982135
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-22-25f6ec69328af6cba76899194e0dd84e
rename to sql/hive/src/test/resources/golden/timestamp_1-23-90269c1e50c7ae8e75ca9cc297982135
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-23-e6bfca320c4ee3aff39cf2f179d57da6 b/sql/hive/src/test/resources/golden/timestamp_1-24-e6bfca320c4ee3aff39cf2f179d57da6
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-23-e6bfca320c4ee3aff39cf2f179d57da6
rename to sql/hive/src/test/resources/golden/timestamp_1-24-e6bfca320c4ee3aff39cf2f179d57da6
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-24-d0291a9bd42054b2732cb4f54cf39ae7 b/sql/hive/src/test/resources/golden/timestamp_1-25-d0291a9bd42054b2732cb4f54cf39ae7
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-24-d0291a9bd42054b2732cb4f54cf39ae7
rename to sql/hive/src/test/resources/golden/timestamp_1-25-d0291a9bd42054b2732cb4f54cf39ae7
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-25-e7b398d2a8107a42419c83771bda41e6 b/sql/hive/src/test/resources/golden/timestamp_1-26-e7b398d2a8107a42419c83771bda41e6
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-25-e7b398d2a8107a42419c83771bda41e6
rename to sql/hive/src/test/resources/golden/timestamp_1-26-e7b398d2a8107a42419c83771bda41e6
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-26-a3eeec08bccae78d0d94ad2cb923e1cf b/sql/hive/src/test/resources/golden/timestamp_1-27-a3eeec08bccae78d0d94ad2cb923e1cf
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-26-a3eeec08bccae78d0d94ad2cb923e1cf
rename to sql/hive/src/test/resources/golden/timestamp_1-27-a3eeec08bccae78d0d94ad2cb923e1cf
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-36-67f274bf16de625cf4e85af0c6185cac b/sql/hive/src/test/resources/golden/timestamp_1-28-67f274bf16de625cf4e85af0c6185cac
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-36-67f274bf16de625cf4e85af0c6185cac
rename to sql/hive/src/test/resources/golden/timestamp_1-28-67f274bf16de625cf4e85af0c6185cac
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-28-343c75daac6695917608c17db8bf473e b/sql/hive/src/test/resources/golden/timestamp_1-29-343c75daac6695917608c17db8bf473e
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-28-343c75daac6695917608c17db8bf473e
rename to sql/hive/src/test/resources/golden/timestamp_1-29-343c75daac6695917608c17db8bf473e
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-6-1c033f86ad59eb6ab59008d12cd00e7 b/sql/hive/src/test/resources/golden/timestamp_1-3-74f477a8b726f5193dd42ef378a793c4
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-6-1c033f86ad59eb6ab59008d12cd00e7
rename to sql/hive/src/test/resources/golden/timestamp_1-3-74f477a8b726f5193dd42ef378a793c4
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-3-819633b45e3e1779bca6bcb7b77fe5a1 b/sql/hive/src/test/resources/golden/timestamp_1-3-819633b45e3e1779bca6bcb7b77fe5a1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-30-273256141c33eb88194cad22eb940d21 b/sql/hive/src/test/resources/golden/timestamp_1-30-273256141c33eb88194cad22eb940d21
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-29-cf19f7359a6d3456c4526b2c69f92d6a b/sql/hive/src/test/resources/golden/timestamp_1-30-cf19f7359a6d3456c4526b2c69f92d6a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-29-cf19f7359a6d3456c4526b2c69f92d6a
rename to sql/hive/src/test/resources/golden/timestamp_1-30-cf19f7359a6d3456c4526b2c69f92d6a
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-8-4bf6c5e938afa4f937b69d2a095c675c b/sql/hive/src/test/resources/golden/timestamp_1-31-9587976bd7e6caa5b667975c14e8dd53
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-8-4bf6c5e938afa4f937b69d2a095c675c
rename to sql/hive/src/test/resources/golden/timestamp_1-31-9587976bd7e6caa5b667975c14e8dd53
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-31-25f6ec69328af6cba76899194e0dd84e b/sql/hive/src/test/resources/golden/timestamp_1-32-90269c1e50c7ae8e75ca9cc297982135
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-31-25f6ec69328af6cba76899194e0dd84e
rename to sql/hive/src/test/resources/golden/timestamp_1-32-90269c1e50c7ae8e75ca9cc297982135
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-32-e6bfca320c4ee3aff39cf2f179d57da6 b/sql/hive/src/test/resources/golden/timestamp_1-33-e6bfca320c4ee3aff39cf2f179d57da6
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-32-e6bfca320c4ee3aff39cf2f179d57da6
rename to sql/hive/src/test/resources/golden/timestamp_1-33-e6bfca320c4ee3aff39cf2f179d57da6
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-33-d0291a9bd42054b2732cb4f54cf39ae7 b/sql/hive/src/test/resources/golden/timestamp_1-34-d0291a9bd42054b2732cb4f54cf39ae7
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-33-d0291a9bd42054b2732cb4f54cf39ae7
rename to sql/hive/src/test/resources/golden/timestamp_1-34-d0291a9bd42054b2732cb4f54cf39ae7
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-34-e7b398d2a8107a42419c83771bda41e6 b/sql/hive/src/test/resources/golden/timestamp_1-35-e7b398d2a8107a42419c83771bda41e6
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-34-e7b398d2a8107a42419c83771bda41e6
rename to sql/hive/src/test/resources/golden/timestamp_1-35-e7b398d2a8107a42419c83771bda41e6
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-35-a3eeec08bccae78d0d94ad2cb923e1cf b/sql/hive/src/test/resources/golden/timestamp_1-36-a3eeec08bccae78d0d94ad2cb923e1cf
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-35-a3eeec08bccae78d0d94ad2cb923e1cf
rename to sql/hive/src/test/resources/golden/timestamp_1-36-a3eeec08bccae78d0d94ad2cb923e1cf
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-45-67f274bf16de625cf4e85af0c6185cac b/sql/hive/src/test/resources/golden/timestamp_1-37-67f274bf16de625cf4e85af0c6185cac
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-45-67f274bf16de625cf4e85af0c6185cac
rename to sql/hive/src/test/resources/golden/timestamp_1-37-67f274bf16de625cf4e85af0c6185cac
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-37-343c75daac6695917608c17db8bf473e b/sql/hive/src/test/resources/golden/timestamp_1-38-343c75daac6695917608c17db8bf473e
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-37-343c75daac6695917608c17db8bf473e
rename to sql/hive/src/test/resources/golden/timestamp_1-38-343c75daac6695917608c17db8bf473e
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-39-b2fe5cc7c8ee62d3bb0c120c9a6c305d b/sql/hive/src/test/resources/golden/timestamp_1-39-b2fe5cc7c8ee62d3bb0c120c9a6c305d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-38-cf19f7359a6d3456c4526b2c69f92d6a b/sql/hive/src/test/resources/golden/timestamp_1-39-cf19f7359a6d3456c4526b2c69f92d6a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-38-cf19f7359a6d3456c4526b2c69f92d6a
rename to sql/hive/src/test/resources/golden/timestamp_1-39-cf19f7359a6d3456c4526b2c69f92d6a
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_2-5-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/timestamp_1-4-d833b177fac3162215468dde991f71d1
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_2-5-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/timestamp_1-4-d833b177fac3162215468dde991f71d1
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_3-5-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/timestamp_1-40-4ebcf4bcc059feba0fd9f76f26193f3b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_3-5-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/timestamp_1-40-4ebcf4bcc059feba0fd9f76f26193f3b
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-4-25f6ec69328af6cba76899194e0dd84e b/sql/hive/src/test/resources/golden/timestamp_1-41-90269c1e50c7ae8e75ca9cc297982135
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-4-25f6ec69328af6cba76899194e0dd84e
rename to sql/hive/src/test/resources/golden/timestamp_1-41-90269c1e50c7ae8e75ca9cc297982135
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-41-e6bfca320c4ee3aff39cf2f179d57da6 b/sql/hive/src/test/resources/golden/timestamp_1-42-e6bfca320c4ee3aff39cf2f179d57da6
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-41-e6bfca320c4ee3aff39cf2f179d57da6
rename to sql/hive/src/test/resources/golden/timestamp_1-42-e6bfca320c4ee3aff39cf2f179d57da6
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-42-d0291a9bd42054b2732cb4f54cf39ae7 b/sql/hive/src/test/resources/golden/timestamp_1-43-d0291a9bd42054b2732cb4f54cf39ae7
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-42-d0291a9bd42054b2732cb4f54cf39ae7
rename to sql/hive/src/test/resources/golden/timestamp_1-43-d0291a9bd42054b2732cb4f54cf39ae7
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-43-e7b398d2a8107a42419c83771bda41e6 b/sql/hive/src/test/resources/golden/timestamp_1-44-e7b398d2a8107a42419c83771bda41e6
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-43-e7b398d2a8107a42419c83771bda41e6
rename to sql/hive/src/test/resources/golden/timestamp_1-44-e7b398d2a8107a42419c83771bda41e6
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-44-a3eeec08bccae78d0d94ad2cb923e1cf b/sql/hive/src/test/resources/golden/timestamp_1-45-a3eeec08bccae78d0d94ad2cb923e1cf
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-44-a3eeec08bccae78d0d94ad2cb923e1cf
rename to sql/hive/src/test/resources/golden/timestamp_1-45-a3eeec08bccae78d0d94ad2cb923e1cf
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-54-67f274bf16de625cf4e85af0c6185cac b/sql/hive/src/test/resources/golden/timestamp_1-46-67f274bf16de625cf4e85af0c6185cac
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-54-67f274bf16de625cf4e85af0c6185cac
rename to sql/hive/src/test/resources/golden/timestamp_1-46-67f274bf16de625cf4e85af0c6185cac
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-46-343c75daac6695917608c17db8bf473e b/sql/hive/src/test/resources/golden/timestamp_1-47-343c75daac6695917608c17db8bf473e
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-46-343c75daac6695917608c17db8bf473e
rename to sql/hive/src/test/resources/golden/timestamp_1-47-343c75daac6695917608c17db8bf473e
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-48-7029255241de8e8b9710801319990044 b/sql/hive/src/test/resources/golden/timestamp_1-48-7029255241de8e8b9710801319990044
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-47-cf19f7359a6d3456c4526b2c69f92d6a b/sql/hive/src/test/resources/golden/timestamp_1-48-cf19f7359a6d3456c4526b2c69f92d6a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-47-cf19f7359a6d3456c4526b2c69f92d6a
rename to sql/hive/src/test/resources/golden/timestamp_1-48-cf19f7359a6d3456c4526b2c69f92d6a
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_4-5-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/timestamp_1-49-7a59f9f939efc4b96f8159d00b39ed3
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_4-5-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/timestamp_1-49-7a59f9f939efc4b96f8159d00b39ed3
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-40-25f6ec69328af6cba76899194e0dd84e b/sql/hive/src/test/resources/golden/timestamp_1-5-90269c1e50c7ae8e75ca9cc297982135
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-40-25f6ec69328af6cba76899194e0dd84e
rename to sql/hive/src/test/resources/golden/timestamp_1-5-90269c1e50c7ae8e75ca9cc297982135
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-49-25f6ec69328af6cba76899194e0dd84e b/sql/hive/src/test/resources/golden/timestamp_1-50-90269c1e50c7ae8e75ca9cc297982135
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-49-25f6ec69328af6cba76899194e0dd84e
rename to sql/hive/src/test/resources/golden/timestamp_1-50-90269c1e50c7ae8e75ca9cc297982135
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-5-e6bfca320c4ee3aff39cf2f179d57da6 b/sql/hive/src/test/resources/golden/timestamp_1-51-e6bfca320c4ee3aff39cf2f179d57da6
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-5-e6bfca320c4ee3aff39cf2f179d57da6
rename to sql/hive/src/test/resources/golden/timestamp_1-51-e6bfca320c4ee3aff39cf2f179d57da6
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-51-d0291a9bd42054b2732cb4f54cf39ae7 b/sql/hive/src/test/resources/golden/timestamp_1-52-d0291a9bd42054b2732cb4f54cf39ae7
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-51-d0291a9bd42054b2732cb4f54cf39ae7
rename to sql/hive/src/test/resources/golden/timestamp_1-52-d0291a9bd42054b2732cb4f54cf39ae7
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-52-e7b398d2a8107a42419c83771bda41e6 b/sql/hive/src/test/resources/golden/timestamp_1-53-e7b398d2a8107a42419c83771bda41e6
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-52-e7b398d2a8107a42419c83771bda41e6
rename to sql/hive/src/test/resources/golden/timestamp_1-53-e7b398d2a8107a42419c83771bda41e6
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-53-a3eeec08bccae78d0d94ad2cb923e1cf b/sql/hive/src/test/resources/golden/timestamp_1-54-a3eeec08bccae78d0d94ad2cb923e1cf
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-53-a3eeec08bccae78d0d94ad2cb923e1cf
rename to sql/hive/src/test/resources/golden/timestamp_1-54-a3eeec08bccae78d0d94ad2cb923e1cf
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-9-67f274bf16de625cf4e85af0c6185cac b/sql/hive/src/test/resources/golden/timestamp_1-55-67f274bf16de625cf4e85af0c6185cac
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-9-67f274bf16de625cf4e85af0c6185cac
rename to sql/hive/src/test/resources/golden/timestamp_1-55-67f274bf16de625cf4e85af0c6185cac
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-55-343c75daac6695917608c17db8bf473e b/sql/hive/src/test/resources/golden/timestamp_1-56-343c75daac6695917608c17db8bf473e
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-55-343c75daac6695917608c17db8bf473e
rename to sql/hive/src/test/resources/golden/timestamp_1-56-343c75daac6695917608c17db8bf473e
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-56-cf19f7359a6d3456c4526b2c69f92d6a b/sql/hive/src/test/resources/golden/timestamp_1-57-cf19f7359a6d3456c4526b2c69f92d6a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-56-cf19f7359a6d3456c4526b2c69f92d6a
rename to sql/hive/src/test/resources/golden/timestamp_1-57-cf19f7359a6d3456c4526b2c69f92d6a
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-57-d362501d0176855077e65f8faf067fa8 b/sql/hive/src/test/resources/golden/timestamp_1-57-d362501d0176855077e65f8faf067fa8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-13-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/timestamp_1-58-d362501d0176855077e65f8faf067fa8
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_5-13-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/timestamp_1-58-d362501d0176855077e65f8faf067fa8
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-50-e6bfca320c4ee3aff39cf2f179d57da6 b/sql/hive/src/test/resources/golden/timestamp_1-6-e6bfca320c4ee3aff39cf2f179d57da6
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-50-e6bfca320c4ee3aff39cf2f179d57da6
rename to sql/hive/src/test/resources/golden/timestamp_1-6-e6bfca320c4ee3aff39cf2f179d57da6
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-6-d0291a9bd42054b2732cb4f54cf39ae7 b/sql/hive/src/test/resources/golden/timestamp_1-7-d0291a9bd42054b2732cb4f54cf39ae7
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-6-d0291a9bd42054b2732cb4f54cf39ae7
rename to sql/hive/src/test/resources/golden/timestamp_1-7-d0291a9bd42054b2732cb4f54cf39ae7
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-7-e7b398d2a8107a42419c83771bda41e6 b/sql/hive/src/test/resources/golden/timestamp_1-8-e7b398d2a8107a42419c83771bda41e6
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-7-e7b398d2a8107a42419c83771bda41e6
rename to sql/hive/src/test/resources/golden/timestamp_1-8-e7b398d2a8107a42419c83771bda41e6
diff --git a/sql/hive/src/test/resources/golden/timestamp_1-8-a3eeec08bccae78d0d94ad2cb923e1cf b/sql/hive/src/test/resources/golden/timestamp_1-9-a3eeec08bccae78d0d94ad2cb923e1cf
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_1-8-a3eeec08bccae78d0d94ad2cb923e1cf
rename to sql/hive/src/test/resources/golden/timestamp_1-9-a3eeec08bccae78d0d94ad2cb923e1cf
diff --git a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/timestamp_2-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_21_export_authsuccess-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/timestamp_2-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-0-ea7192a4a5a985bcc8aab9aa79d9f028 b/sql/hive/src/test/resources/golden/timestamp_2-0-ea7192a4a5a985bcc8aab9aa79d9f028
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-1-61dbdf6d26c2a3f1143f6fdae999b1b4 b/sql/hive/src/test/resources/golden/timestamp_2-1-61dbdf6d26c2a3f1143f6fdae999b1b4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-20-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/timestamp_2-1-ea7192a4a5a985bcc8aab9aa79d9f028
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_5-20-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/timestamp_2-1-ea7192a4a5a985bcc8aab9aa79d9f028
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-18-252aebfe7882335d31bfc53a8705b7a b/sql/hive/src/test/resources/golden/timestamp_2-10-252aebfe7882335d31bfc53a8705b7a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-18-252aebfe7882335d31bfc53a8705b7a
rename to sql/hive/src/test/resources/golden/timestamp_2-10-252aebfe7882335d31bfc53a8705b7a
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-10-5181279a0bf8939fe46ddacae015dad8 b/sql/hive/src/test/resources/golden/timestamp_2-11-5181279a0bf8939fe46ddacae015dad8
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-10-5181279a0bf8939fe46ddacae015dad8
rename to sql/hive/src/test/resources/golden/timestamp_2-11-5181279a0bf8939fe46ddacae015dad8
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-11-240fce5f58794fa051824e8732c00c03 b/sql/hive/src/test/resources/golden/timestamp_2-12-240fce5f58794fa051824e8732c00c03
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-11-240fce5f58794fa051824e8732c00c03
rename to sql/hive/src/test/resources/golden/timestamp_2-12-240fce5f58794fa051824e8732c00c03
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-12-7350308cbf49d6ebd6599d3802750acd b/sql/hive/src/test/resources/golden/timestamp_2-12-7350308cbf49d6ebd6599d3802750acd
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_5-5-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/timestamp_2-13-5f450162886ccc79af149541527f5643
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_5-5-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/timestamp_2-13-5f450162886ccc79af149541527f5643
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-4-935d0d2492beab99bbbba26ba62a1db4 b/sql/hive/src/test/resources/golden/timestamp_2-14-25f6ec69328af6cba76899194e0dd84e
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_3-4-935d0d2492beab99bbbba26ba62a1db4
rename to sql/hive/src/test/resources/golden/timestamp_2-14-25f6ec69328af6cba76899194e0dd84e
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-14-93c769be4cff93bea6e62bfe4e2a8742 b/sql/hive/src/test/resources/golden/timestamp_2-15-93c769be4cff93bea6e62bfe4e2a8742
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-14-93c769be4cff93bea6e62bfe4e2a8742
rename to sql/hive/src/test/resources/golden/timestamp_2-15-93c769be4cff93bea6e62bfe4e2a8742
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-15-5bdbf67419cc060b82d091d80ce59bf9 b/sql/hive/src/test/resources/golden/timestamp_2-16-5bdbf67419cc060b82d091d80ce59bf9
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-15-5bdbf67419cc060b82d091d80ce59bf9
rename to sql/hive/src/test/resources/golden/timestamp_2-16-5bdbf67419cc060b82d091d80ce59bf9
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-16-de3c42ab06c17ae895fd7deaf7bd9571 b/sql/hive/src/test/resources/golden/timestamp_2-17-de3c42ab06c17ae895fd7deaf7bd9571
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-16-de3c42ab06c17ae895fd7deaf7bd9571
rename to sql/hive/src/test/resources/golden/timestamp_2-17-de3c42ab06c17ae895fd7deaf7bd9571
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-17-da3937d21b7c2cfe1e624e812ae1d3ef b/sql/hive/src/test/resources/golden/timestamp_2-18-da3937d21b7c2cfe1e624e812ae1d3ef
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-17-da3937d21b7c2cfe1e624e812ae1d3ef
rename to sql/hive/src/test/resources/golden/timestamp_2-18-da3937d21b7c2cfe1e624e812ae1d3ef
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-27-252aebfe7882335d31bfc53a8705b7a b/sql/hive/src/test/resources/golden/timestamp_2-19-252aebfe7882335d31bfc53a8705b7a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-27-252aebfe7882335d31bfc53a8705b7a
rename to sql/hive/src/test/resources/golden/timestamp_2-19-252aebfe7882335d31bfc53a8705b7a
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_6-9-7b84dbb0895a623d460c70678bd74a64 b/sql/hive/src/test/resources/golden/timestamp_2-2-61dbdf6d26c2a3f1143f6fdae999b1b4
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_6-9-7b84dbb0895a623d460c70678bd74a64
rename to sql/hive/src/test/resources/golden/timestamp_2-2-61dbdf6d26c2a3f1143f6fdae999b1b4
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-2-a5092ff0f5a3d3b8f4171994932d4d19 b/sql/hive/src/test/resources/golden/timestamp_2-2-a5092ff0f5a3d3b8f4171994932d4d19
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-19-5181279a0bf8939fe46ddacae015dad8 b/sql/hive/src/test/resources/golden/timestamp_2-20-5181279a0bf8939fe46ddacae015dad8
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-19-5181279a0bf8939fe46ddacae015dad8
rename to sql/hive/src/test/resources/golden/timestamp_2-20-5181279a0bf8939fe46ddacae015dad8
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-20-240fce5f58794fa051824e8732c00c03 b/sql/hive/src/test/resources/golden/timestamp_2-21-240fce5f58794fa051824e8732c00c03
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-20-240fce5f58794fa051824e8732c00c03
rename to sql/hive/src/test/resources/golden/timestamp_2-21-240fce5f58794fa051824e8732c00c03
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-21-5eb58e5d3c5b9f766f0b497bf59c47b b/sql/hive/src/test/resources/golden/timestamp_2-21-5eb58e5d3c5b9f766f0b497bf59c47b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_7-5-8b5d511014e1776743cacaf77f68d5fb b/sql/hive/src/test/resources/golden/timestamp_2-22-469fe94fb60f4b00809190c303434641
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_7-5-8b5d511014e1776743cacaf77f68d5fb
rename to sql/hive/src/test/resources/golden/timestamp_2-22-469fe94fb60f4b00809190c303434641
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-2-f96a9d88327951bd93f672dc2463ecd4 b/sql/hive/src/test/resources/golden/timestamp_2-23-25f6ec69328af6cba76899194e0dd84e
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-2-f96a9d88327951bd93f672dc2463ecd4
rename to sql/hive/src/test/resources/golden/timestamp_2-23-25f6ec69328af6cba76899194e0dd84e
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-23-93c769be4cff93bea6e62bfe4e2a8742 b/sql/hive/src/test/resources/golden/timestamp_2-24-93c769be4cff93bea6e62bfe4e2a8742
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-23-93c769be4cff93bea6e62bfe4e2a8742
rename to sql/hive/src/test/resources/golden/timestamp_2-24-93c769be4cff93bea6e62bfe4e2a8742
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-24-5bdbf67419cc060b82d091d80ce59bf9 b/sql/hive/src/test/resources/golden/timestamp_2-25-5bdbf67419cc060b82d091d80ce59bf9
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-24-5bdbf67419cc060b82d091d80ce59bf9
rename to sql/hive/src/test/resources/golden/timestamp_2-25-5bdbf67419cc060b82d091d80ce59bf9
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-25-de3c42ab06c17ae895fd7deaf7bd9571 b/sql/hive/src/test/resources/golden/timestamp_2-26-de3c42ab06c17ae895fd7deaf7bd9571
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-25-de3c42ab06c17ae895fd7deaf7bd9571
rename to sql/hive/src/test/resources/golden/timestamp_2-26-de3c42ab06c17ae895fd7deaf7bd9571
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-26-da3937d21b7c2cfe1e624e812ae1d3ef b/sql/hive/src/test/resources/golden/timestamp_2-27-da3937d21b7c2cfe1e624e812ae1d3ef
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-26-da3937d21b7c2cfe1e624e812ae1d3ef
rename to sql/hive/src/test/resources/golden/timestamp_2-27-da3937d21b7c2cfe1e624e812ae1d3ef
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-36-252aebfe7882335d31bfc53a8705b7a b/sql/hive/src/test/resources/golden/timestamp_2-28-252aebfe7882335d31bfc53a8705b7a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-36-252aebfe7882335d31bfc53a8705b7a
rename to sql/hive/src/test/resources/golden/timestamp_2-28-252aebfe7882335d31bfc53a8705b7a
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-28-5181279a0bf8939fe46ddacae015dad8 b/sql/hive/src/test/resources/golden/timestamp_2-29-5181279a0bf8939fe46ddacae015dad8
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-28-5181279a0bf8939fe46ddacae015dad8
rename to sql/hive/src/test/resources/golden/timestamp_2-29-5181279a0bf8939fe46ddacae015dad8
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_8-5-8b5d511014e1776743cacaf77f68d5fb b/sql/hive/src/test/resources/golden/timestamp_2-3-a5092ff0f5a3d3b8f4171994932d4d19
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_8-5-8b5d511014e1776743cacaf77f68d5fb
rename to sql/hive/src/test/resources/golden/timestamp_2-3-a5092ff0f5a3d3b8f4171994932d4d19
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-3-a95a52c3a66e1f211ea04a0a10bd3b74 b/sql/hive/src/test/resources/golden/timestamp_2-3-a95a52c3a66e1f211ea04a0a10bd3b74
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-29-240fce5f58794fa051824e8732c00c03 b/sql/hive/src/test/resources/golden/timestamp_2-30-240fce5f58794fa051824e8732c00c03
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-29-240fce5f58794fa051824e8732c00c03
rename to sql/hive/src/test/resources/golden/timestamp_2-30-240fce5f58794fa051824e8732c00c03
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-30-ffe6b6ddaaba84152074f7781fba2243 b/sql/hive/src/test/resources/golden/timestamp_2-30-ffe6b6ddaaba84152074f7781fba2243
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_9-5-8b5d511014e1776743cacaf77f68d5fb b/sql/hive/src/test/resources/golden/timestamp_2-31-8f506498acf0c99c30960a00981ef460
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_9-5-8b5d511014e1776743cacaf77f68d5fb
rename to sql/hive/src/test/resources/golden/timestamp_2-31-8f506498acf0c99c30960a00981ef460
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-4-4fa8a36edbefde4427c2ab2cf30e6399 b/sql/hive/src/test/resources/golden/timestamp_2-32-25f6ec69328af6cba76899194e0dd84e
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-4-4fa8a36edbefde4427c2ab2cf30e6399
rename to sql/hive/src/test/resources/golden/timestamp_2-32-25f6ec69328af6cba76899194e0dd84e
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-32-93c769be4cff93bea6e62bfe4e2a8742 b/sql/hive/src/test/resources/golden/timestamp_2-33-93c769be4cff93bea6e62bfe4e2a8742
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-32-93c769be4cff93bea6e62bfe4e2a8742
rename to sql/hive/src/test/resources/golden/timestamp_2-33-93c769be4cff93bea6e62bfe4e2a8742
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-33-5bdbf67419cc060b82d091d80ce59bf9 b/sql/hive/src/test/resources/golden/timestamp_2-34-5bdbf67419cc060b82d091d80ce59bf9
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-33-5bdbf67419cc060b82d091d80ce59bf9
rename to sql/hive/src/test/resources/golden/timestamp_2-34-5bdbf67419cc060b82d091d80ce59bf9
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-34-de3c42ab06c17ae895fd7deaf7bd9571 b/sql/hive/src/test/resources/golden/timestamp_2-35-de3c42ab06c17ae895fd7deaf7bd9571
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-34-de3c42ab06c17ae895fd7deaf7bd9571
rename to sql/hive/src/test/resources/golden/timestamp_2-35-de3c42ab06c17ae895fd7deaf7bd9571
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-35-da3937d21b7c2cfe1e624e812ae1d3ef b/sql/hive/src/test/resources/golden/timestamp_2-36-da3937d21b7c2cfe1e624e812ae1d3ef
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-35-da3937d21b7c2cfe1e624e812ae1d3ef
rename to sql/hive/src/test/resources/golden/timestamp_2-36-da3937d21b7c2cfe1e624e812ae1d3ef
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-45-252aebfe7882335d31bfc53a8705b7a b/sql/hive/src/test/resources/golden/timestamp_2-37-252aebfe7882335d31bfc53a8705b7a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-45-252aebfe7882335d31bfc53a8705b7a
rename to sql/hive/src/test/resources/golden/timestamp_2-37-252aebfe7882335d31bfc53a8705b7a
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-37-5181279a0bf8939fe46ddacae015dad8 b/sql/hive/src/test/resources/golden/timestamp_2-38-5181279a0bf8939fe46ddacae015dad8
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-37-5181279a0bf8939fe46ddacae015dad8
rename to sql/hive/src/test/resources/golden/timestamp_2-38-5181279a0bf8939fe46ddacae015dad8
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-38-240fce5f58794fa051824e8732c00c03 b/sql/hive/src/test/resources/golden/timestamp_2-39-240fce5f58794fa051824e8732c00c03
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-38-240fce5f58794fa051824e8732c00c03
rename to sql/hive/src/test/resources/golden/timestamp_2-39-240fce5f58794fa051824e8732c00c03
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-39-8236608f28681eac5503195096a34181 b/sql/hive/src/test/resources/golden/timestamp_2-39-8236608f28681eac5503195096a34181
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-10-ebc7ac3b2dfdb958d161cd7c8f947a72 b/sql/hive/src/test/resources/golden/timestamp_2-4-81d6d29dcb3fd12a519426dff64411d2
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-10-ebc7ac3b2dfdb958d161cd7c8f947a72
rename to sql/hive/src/test/resources/golden/timestamp_2-4-81d6d29dcb3fd12a519426dff64411d2
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-12-c166396bbdb62490f56ab0bc84aaa7d9 b/sql/hive/src/test/resources/golden/timestamp_2-40-972a007e54d1c09e9ac9549c19a32dbb
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-12-c166396bbdb62490f56ab0bc84aaa7d9
rename to sql/hive/src/test/resources/golden/timestamp_2-40-972a007e54d1c09e9ac9549c19a32dbb
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-5-7e4fb6e8ba01df422e4c67e06a0c8453 b/sql/hive/src/test/resources/golden/timestamp_2-41-25f6ec69328af6cba76899194e0dd84e
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-5-7e4fb6e8ba01df422e4c67e06a0c8453
rename to sql/hive/src/test/resources/golden/timestamp_2-41-25f6ec69328af6cba76899194e0dd84e
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-41-93c769be4cff93bea6e62bfe4e2a8742 b/sql/hive/src/test/resources/golden/timestamp_2-42-93c769be4cff93bea6e62bfe4e2a8742
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-41-93c769be4cff93bea6e62bfe4e2a8742
rename to sql/hive/src/test/resources/golden/timestamp_2-42-93c769be4cff93bea6e62bfe4e2a8742
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-42-5bdbf67419cc060b82d091d80ce59bf9 b/sql/hive/src/test/resources/golden/timestamp_2-43-5bdbf67419cc060b82d091d80ce59bf9
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-42-5bdbf67419cc060b82d091d80ce59bf9
rename to sql/hive/src/test/resources/golden/timestamp_2-43-5bdbf67419cc060b82d091d80ce59bf9
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-43-de3c42ab06c17ae895fd7deaf7bd9571 b/sql/hive/src/test/resources/golden/timestamp_2-44-de3c42ab06c17ae895fd7deaf7bd9571
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-43-de3c42ab06c17ae895fd7deaf7bd9571
rename to sql/hive/src/test/resources/golden/timestamp_2-44-de3c42ab06c17ae895fd7deaf7bd9571
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-44-da3937d21b7c2cfe1e624e812ae1d3ef b/sql/hive/src/test/resources/golden/timestamp_2-45-da3937d21b7c2cfe1e624e812ae1d3ef
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-44-da3937d21b7c2cfe1e624e812ae1d3ef
rename to sql/hive/src/test/resources/golden/timestamp_2-45-da3937d21b7c2cfe1e624e812ae1d3ef
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-54-252aebfe7882335d31bfc53a8705b7a b/sql/hive/src/test/resources/golden/timestamp_2-46-252aebfe7882335d31bfc53a8705b7a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-54-252aebfe7882335d31bfc53a8705b7a
rename to sql/hive/src/test/resources/golden/timestamp_2-46-252aebfe7882335d31bfc53a8705b7a
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-46-5181279a0bf8939fe46ddacae015dad8 b/sql/hive/src/test/resources/golden/timestamp_2-47-5181279a0bf8939fe46ddacae015dad8
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-46-5181279a0bf8939fe46ddacae015dad8
rename to sql/hive/src/test/resources/golden/timestamp_2-47-5181279a0bf8939fe46ddacae015dad8
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-47-240fce5f58794fa051824e8732c00c03 b/sql/hive/src/test/resources/golden/timestamp_2-48-240fce5f58794fa051824e8732c00c03
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-47-240fce5f58794fa051824e8732c00c03
rename to sql/hive/src/test/resources/golden/timestamp_2-48-240fce5f58794fa051824e8732c00c03
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-48-654e5533ec6dc911996abc7e47af8ccb b/sql/hive/src/test/resources/golden/timestamp_2-48-654e5533ec6dc911996abc7e47af8ccb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-13-c8a51e8a269da4c4ae98ac105a573f3c b/sql/hive/src/test/resources/golden/timestamp_2-49-650d2727b007638e0ed39b37c9498d66
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-13-c8a51e8a269da4c4ae98ac105a573f3c
rename to sql/hive/src/test/resources/golden/timestamp_2-49-650d2727b007638e0ed39b37c9498d66
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-7-510c0a2a57dc5df8588bd13c4152f8bc b/sql/hive/src/test/resources/golden/timestamp_2-5-25f6ec69328af6cba76899194e0dd84e
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-7-510c0a2a57dc5df8588bd13c4152f8bc
rename to sql/hive/src/test/resources/golden/timestamp_2-5-25f6ec69328af6cba76899194e0dd84e
diff --git a/sql/hive/src/test/resources/golden/timestamp_comparison-8-659d5b1ae8200f13f265270e52a3dd65 b/sql/hive/src/test/resources/golden/timestamp_2-50-25f6ec69328af6cba76899194e0dd84e
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_comparison-8-659d5b1ae8200f13f265270e52a3dd65
rename to sql/hive/src/test/resources/golden/timestamp_2-50-25f6ec69328af6cba76899194e0dd84e
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-5-93c769be4cff93bea6e62bfe4e2a8742 b/sql/hive/src/test/resources/golden/timestamp_2-51-93c769be4cff93bea6e62bfe4e2a8742
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-5-93c769be4cff93bea6e62bfe4e2a8742
rename to sql/hive/src/test/resources/golden/timestamp_2-51-93c769be4cff93bea6e62bfe4e2a8742
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-51-5bdbf67419cc060b82d091d80ce59bf9 b/sql/hive/src/test/resources/golden/timestamp_2-52-5bdbf67419cc060b82d091d80ce59bf9
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-51-5bdbf67419cc060b82d091d80ce59bf9
rename to sql/hive/src/test/resources/golden/timestamp_2-52-5bdbf67419cc060b82d091d80ce59bf9
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-52-de3c42ab06c17ae895fd7deaf7bd9571 b/sql/hive/src/test/resources/golden/timestamp_2-53-de3c42ab06c17ae895fd7deaf7bd9571
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-52-de3c42ab06c17ae895fd7deaf7bd9571
rename to sql/hive/src/test/resources/golden/timestamp_2-53-de3c42ab06c17ae895fd7deaf7bd9571
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-53-da3937d21b7c2cfe1e624e812ae1d3ef b/sql/hive/src/test/resources/golden/timestamp_2-54-da3937d21b7c2cfe1e624e812ae1d3ef
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-53-da3937d21b7c2cfe1e624e812ae1d3ef
rename to sql/hive/src/test/resources/golden/timestamp_2-54-da3937d21b7c2cfe1e624e812ae1d3ef
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-9-252aebfe7882335d31bfc53a8705b7a b/sql/hive/src/test/resources/golden/timestamp_2-55-252aebfe7882335d31bfc53a8705b7a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-9-252aebfe7882335d31bfc53a8705b7a
rename to sql/hive/src/test/resources/golden/timestamp_2-55-252aebfe7882335d31bfc53a8705b7a
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-55-5181279a0bf8939fe46ddacae015dad8 b/sql/hive/src/test/resources/golden/timestamp_2-56-5181279a0bf8939fe46ddacae015dad8
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-55-5181279a0bf8939fe46ddacae015dad8
rename to sql/hive/src/test/resources/golden/timestamp_2-56-5181279a0bf8939fe46ddacae015dad8
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-56-240fce5f58794fa051824e8732c00c03 b/sql/hive/src/test/resources/golden/timestamp_2-57-240fce5f58794fa051824e8732c00c03
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-56-240fce5f58794fa051824e8732c00c03
rename to sql/hive/src/test/resources/golden/timestamp_2-57-240fce5f58794fa051824e8732c00c03
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-57-ea7192a4a5a985bcc8aab9aa79d9f028 b/sql/hive/src/test/resources/golden/timestamp_2-57-ea7192a4a5a985bcc8aab9aa79d9f028
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-14-46c4a3675c8de0510b648856a193f3e7 b/sql/hive/src/test/resources/golden/timestamp_2-58-ea7192a4a5a985bcc8aab9aa79d9f028
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-14-46c4a3675c8de0510b648856a193f3e7
rename to sql/hive/src/test/resources/golden/timestamp_2-58-ea7192a4a5a985bcc8aab9aa79d9f028
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-50-93c769be4cff93bea6e62bfe4e2a8742 b/sql/hive/src/test/resources/golden/timestamp_2-6-93c769be4cff93bea6e62bfe4e2a8742
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-50-93c769be4cff93bea6e62bfe4e2a8742
rename to sql/hive/src/test/resources/golden/timestamp_2-6-93c769be4cff93bea6e62bfe4e2a8742
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-6-5bdbf67419cc060b82d091d80ce59bf9 b/sql/hive/src/test/resources/golden/timestamp_2-7-5bdbf67419cc060b82d091d80ce59bf9
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-6-5bdbf67419cc060b82d091d80ce59bf9
rename to sql/hive/src/test/resources/golden/timestamp_2-7-5bdbf67419cc060b82d091d80ce59bf9
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-7-de3c42ab06c17ae895fd7deaf7bd9571 b/sql/hive/src/test/resources/golden/timestamp_2-8-de3c42ab06c17ae895fd7deaf7bd9571
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-7-de3c42ab06c17ae895fd7deaf7bd9571
rename to sql/hive/src/test/resources/golden/timestamp_2-8-de3c42ab06c17ae895fd7deaf7bd9571
diff --git a/sql/hive/src/test/resources/golden/timestamp_2-8-da3937d21b7c2cfe1e624e812ae1d3ef b/sql/hive/src/test/resources/golden/timestamp_2-9-da3937d21b7c2cfe1e624e812ae1d3ef
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_2-8-da3937d21b7c2cfe1e624e812ae1d3ef
rename to sql/hive/src/test/resources/golden/timestamp_2-9-da3937d21b7c2cfe1e624e812ae1d3ef
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-0-165256158e3db1ce19c3c9db3c8011d2 b/sql/hive/src/test/resources/golden/timestamp_3-0-165256158e3db1ce19c3c9db3c8011d2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/timestamp_3-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_21_export_authsuccess-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/timestamp_3-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-16-54f3a2a97939d3eca8a601b74ef30dea b/sql/hive/src/test/resources/golden/timestamp_3-1-165256158e3db1ce19c3c9db3c8011d2
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-16-54f3a2a97939d3eca8a601b74ef30dea
rename to sql/hive/src/test/resources/golden/timestamp_3-1-165256158e3db1ce19c3c9db3c8011d2
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-1-81edf5107270547641586aa02b4e7d9b b/sql/hive/src/test/resources/golden/timestamp_3-1-81edf5107270547641586aa02b4e7d9b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-9-ffc79abb874323e165963aa39f460a9b b/sql/hive/src/test/resources/golden/timestamp_3-10-ffc79abb874323e165963aa39f460a9b
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_3-9-ffc79abb874323e165963aa39f460a9b
rename to sql/hive/src/test/resources/golden/timestamp_3-10-ffc79abb874323e165963aa39f460a9b
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-10-7b1ec929239ee305ea9da46ebb990c67 b/sql/hive/src/test/resources/golden/timestamp_3-11-7b1ec929239ee305ea9da46ebb990c67
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_3-10-7b1ec929239ee305ea9da46ebb990c67
rename to sql/hive/src/test/resources/golden/timestamp_3-11-7b1ec929239ee305ea9da46ebb990c67
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-12-165256158e3db1ce19c3c9db3c8011d2 b/sql/hive/src/test/resources/golden/timestamp_3-12-165256158e3db1ce19c3c9db3c8011d2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-11-a63f40f6c4a022c16f8cf810e3b7ed2a b/sql/hive/src/test/resources/golden/timestamp_3-12-a63f40f6c4a022c16f8cf810e3b7ed2a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_3-11-a63f40f6c4a022c16f8cf810e3b7ed2a
rename to sql/hive/src/test/resources/golden/timestamp_3-12-a63f40f6c4a022c16f8cf810e3b7ed2a
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-13-7d225bcfa35f20da7dd63e7f8a413a77 b/sql/hive/src/test/resources/golden/timestamp_3-13-7d225bcfa35f20da7dd63e7f8a413a77
new file mode 100644
index 0000000000000..4cfaa1b324da3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp_3-13-7d225bcfa35f20da7dd63e7f8a413a77
@@ -0,0 +1 @@
+2011-04-29 20:46:56.4485	1.3041352164485E9	1	1.3041352164485E9	1.3041352164485E9
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-17-236d51792f4913b5858f367e3cff2c60 b/sql/hive/src/test/resources/golden/timestamp_3-14-165256158e3db1ce19c3c9db3c8011d2
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-17-236d51792f4913b5858f367e3cff2c60
rename to sql/hive/src/test/resources/golden/timestamp_3-14-165256158e3db1ce19c3c9db3c8011d2
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-2-7a012a0d98729da25b5ac374855dcee4 b/sql/hive/src/test/resources/golden/timestamp_3-2-7a012a0d98729da25b5ac374855dcee4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-19-1e77dcdd6f54334dbae6a4d11ad6ff64 b/sql/hive/src/test/resources/golden/timestamp_3-2-81edf5107270547641586aa02b4e7d9b
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-19-1e77dcdd6f54334dbae6a4d11ad6ff64
rename to sql/hive/src/test/resources/golden/timestamp_3-2-81edf5107270547641586aa02b4e7d9b
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-3-6143888a940bfcac1133330764f5a31a b/sql/hive/src/test/resources/golden/timestamp_3-3-6143888a940bfcac1133330764f5a31a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-20-90c84358c50e51be5ce210bd7dec9bc6 b/sql/hive/src/test/resources/golden/timestamp_3-3-7a012a0d98729da25b5ac374855dcee4
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-20-90c84358c50e51be5ce210bd7dec9bc6
rename to sql/hive/src/test/resources/golden/timestamp_3-3-7a012a0d98729da25b5ac374855dcee4
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-22-27e2e662d517f32952145cc2a51bf564 b/sql/hive/src/test/resources/golden/timestamp_3-4-86514381187b246a5685577c1968c559
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-22-27e2e662d517f32952145cc2a51bf564
rename to sql/hive/src/test/resources/golden/timestamp_3-4-86514381187b246a5685577c1968c559
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-0-35e75ee310b66710e2e88cf0fecca670 b/sql/hive/src/test/resources/golden/timestamp_3-5-935d0d2492beab99bbbba26ba62a1db4
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-0-35e75ee310b66710e2e88cf0fecca670
rename to sql/hive/src/test/resources/golden/timestamp_3-5-935d0d2492beab99bbbba26ba62a1db4
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-5-8fe348d5d9b9903a26eda32d308b8e41 b/sql/hive/src/test/resources/golden/timestamp_3-6-8fe348d5d9b9903a26eda32d308b8e41
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_3-5-8fe348d5d9b9903a26eda32d308b8e41
rename to sql/hive/src/test/resources/golden/timestamp_3-6-8fe348d5d9b9903a26eda32d308b8e41
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-6-6be5fe01c502cd24db32a3781c97a703 b/sql/hive/src/test/resources/golden/timestamp_3-7-6be5fe01c502cd24db32a3781c97a703
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_3-6-6be5fe01c502cd24db32a3781c97a703
rename to sql/hive/src/test/resources/golden/timestamp_3-7-6be5fe01c502cd24db32a3781c97a703
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-7-6066ba0451cd0fcfac4bea6376e72add b/sql/hive/src/test/resources/golden/timestamp_3-8-6066ba0451cd0fcfac4bea6376e72add
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_3-7-6066ba0451cd0fcfac4bea6376e72add
rename to sql/hive/src/test/resources/golden/timestamp_3-8-6066ba0451cd0fcfac4bea6376e72add
diff --git a/sql/hive/src/test/resources/golden/timestamp_3-8-22e03daa775eab145d39ec0730953f7e b/sql/hive/src/test/resources/golden/timestamp_3-9-22e03daa775eab145d39ec0730953f7e
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_3-8-22e03daa775eab145d39ec0730953f7e
rename to sql/hive/src/test/resources/golden/timestamp_3-9-22e03daa775eab145d39ec0730953f7e
diff --git a/sql/hive/src/test/resources/golden/exim_21_export_authsuccess-4-7368973ec1870dd3b237c37eb3857b1e b/sql/hive/src/test/resources/golden/timestamp_comparison-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_21_export_authsuccess-4-7368973ec1870dd3b237c37eb3857b1e
rename to sql/hive/src/test/resources/golden/timestamp_comparison-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-12-bfcc534e73e320a1cfad9c584678d870 b/sql/hive/src/test/resources/golden/timestamp_comparison-1-4b68f7ad0f8cf337e42bf16a45e15818
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-12-bfcc534e73e320a1cfad9c584678d870
rename to sql/hive/src/test/resources/golden/timestamp_comparison-1-4b68f7ad0f8cf337e42bf16a45e15818
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-13-a2bddaa5db1841bb4617239b9f17a06d b/sql/hive/src/test/resources/golden/timestamp_comparison-2-60557e7bd2822c89fa8b076a9d0520fc
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-13-a2bddaa5db1841bb4617239b9f17a06d
rename to sql/hive/src/test/resources/golden/timestamp_comparison-2-60557e7bd2822c89fa8b076a9d0520fc
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-1-cbead694a25ec357d69fd008776e19c9 b/sql/hive/src/test/resources/golden/timestamp_comparison-3-f96a9d88327951bd93f672dc2463ecd4
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-1-cbead694a25ec357d69fd008776e19c9
rename to sql/hive/src/test/resources/golden/timestamp_comparison-3-f96a9d88327951bd93f672dc2463ecd4
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-14-773801b833cf72d35016916b786275b5 b/sql/hive/src/test/resources/golden/timestamp_comparison-4-13e17ed811165196416f777cbc162592
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-14-773801b833cf72d35016916b786275b5
rename to sql/hive/src/test/resources/golden/timestamp_comparison-4-13e17ed811165196416f777cbc162592
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-2-7f24ad5f9bdc0afb6bade7c85490c845 b/sql/hive/src/test/resources/golden/timestamp_comparison-5-4fa8a36edbefde4427c2ab2cf30e6399
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-2-7f24ad5f9bdc0afb6bade7c85490c845
rename to sql/hive/src/test/resources/golden/timestamp_comparison-5-4fa8a36edbefde4427c2ab2cf30e6399
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-3-266b9601a9154438ab95550fcd36494c b/sql/hive/src/test/resources/golden/timestamp_comparison-6-7e4fb6e8ba01df422e4c67e06a0c8453
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-3-266b9601a9154438ab95550fcd36494c
rename to sql/hive/src/test/resources/golden/timestamp_comparison-6-7e4fb6e8ba01df422e4c67e06a0c8453
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-15-4071ed0ff57b53963d5ee662fa9db0b0 b/sql/hive/src/test/resources/golden/timestamp_comparison-7-8c8e73673a950f6b3d960b08fcea076f
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-15-4071ed0ff57b53963d5ee662fa9db0b0
rename to sql/hive/src/test/resources/golden/timestamp_comparison-7-8c8e73673a950f6b3d960b08fcea076f
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-4-2602c9a6c910ec7fdd439212c648333d b/sql/hive/src/test/resources/golden/timestamp_comparison-8-510c0a2a57dc5df8588bd13c4152f8bc
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-4-2602c9a6c910ec7fdd439212c648333d
rename to sql/hive/src/test/resources/golden/timestamp_comparison-8-510c0a2a57dc5df8588bd13c4152f8bc
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-5-a9fe3bd1d2c99c89f019e92e5e8a7cad b/sql/hive/src/test/resources/golden/timestamp_comparison-9-659d5b1ae8200f13f265270e52a3dd65
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-5-a9fe3bd1d2c99c89f019e92e5e8a7cad
rename to sql/hive/src/test/resources/golden/timestamp_comparison-9-659d5b1ae8200f13f265270e52a3dd65
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-23-6775cb6aee040e22508cf3cac0b55f06 b/sql/hive/src/test/resources/golden/timestamp_lazy-2-bb5a4a13274290029bd07d95c2f92563
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-23-6775cb6aee040e22508cf3cac0b55f06
rename to sql/hive/src/test/resources/golden/timestamp_lazy-2-bb5a4a13274290029bd07d95c2f92563
diff --git a/sql/hive/src/test/resources/golden/timestamp_lazy-2-cdb72e0c24fd9277a41fe0c7b1392e34 b/sql/hive/src/test/resources/golden/timestamp_lazy-2-cdb72e0c24fd9277a41fe0c7b1392e34
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-24-eb2b625279d8794390d7e2dc8f2dc907 b/sql/hive/src/test/resources/golden/timestamp_null-2-51762cf5079877abf7d81127738f4e5
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-24-eb2b625279d8794390d7e2dc8f2dc907
rename to sql/hive/src/test/resources/golden/timestamp_null-2-51762cf5079877abf7d81127738f4e5
diff --git a/sql/hive/src/test/resources/golden/timestamp_null-2-b3071984dee42c9e698e947fcbc2a1e8 b/sql/hive/src/test/resources/golden/timestamp_null-2-b3071984dee42c9e698e947fcbc2a1e8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/timestamp_udf-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/timestamp_udf-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-0-79914c5347620c6e62a8e0b9a95984af b/sql/hive/src/test/resources/golden/timestamp_udf-0-79914c5347620c6e62a8e0b9a95984af
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-1-59fc1842a23369235d42ed040d45fb3d b/sql/hive/src/test/resources/golden/timestamp_udf-1-59fc1842a23369235d42ed040d45fb3d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-26-138e93f004f7bd16e63bbf8d9090af21 b/sql/hive/src/test/resources/golden/timestamp_udf-1-79914c5347620c6e62a8e0b9a95984af
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-26-138e93f004f7bd16e63bbf8d9090af21
rename to sql/hive/src/test/resources/golden/timestamp_udf-1-79914c5347620c6e62a8e0b9a95984af
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-18-cb033ecad964a2623bc633ac1d3f752a b/sql/hive/src/test/resources/golden/timestamp_udf-10-287614364eaa3fb82aad08c6b62cc938
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-18-cb033ecad964a2623bc633ac1d3f752a
rename to sql/hive/src/test/resources/golden/timestamp_udf-10-287614364eaa3fb82aad08c6b62cc938
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-10-dbc23736a61d9482d13cacada02a7a09 b/sql/hive/src/test/resources/golden/timestamp_udf-11-dbc23736a61d9482d13cacada02a7a09
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-10-dbc23736a61d9482d13cacada02a7a09
rename to sql/hive/src/test/resources/golden/timestamp_udf-11-dbc23736a61d9482d13cacada02a7a09
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-11-442cf850a0cc1f1dcfdeaeffbffb2c35 b/sql/hive/src/test/resources/golden/timestamp_udf-12-442cf850a0cc1f1dcfdeaeffbffb2c35
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-11-442cf850a0cc1f1dcfdeaeffbffb2c35
rename to sql/hive/src/test/resources/golden/timestamp_udf-12-442cf850a0cc1f1dcfdeaeffbffb2c35
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-12-51959036fd4ac4f1e24f4e06eb9b0b6 b/sql/hive/src/test/resources/golden/timestamp_udf-13-51959036fd4ac4f1e24f4e06eb9b0b6
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-12-51959036fd4ac4f1e24f4e06eb9b0b6
rename to sql/hive/src/test/resources/golden/timestamp_udf-13-51959036fd4ac4f1e24f4e06eb9b0b6
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-13-6ab3f356deaf807e8accc37e1f4849a b/sql/hive/src/test/resources/golden/timestamp_udf-14-6ab3f356deaf807e8accc37e1f4849a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-13-6ab3f356deaf807e8accc37e1f4849a
rename to sql/hive/src/test/resources/golden/timestamp_udf-14-6ab3f356deaf807e8accc37e1f4849a
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-14-c745a1016461403526d44928a269c1de b/sql/hive/src/test/resources/golden/timestamp_udf-15-c745a1016461403526d44928a269c1de
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-14-c745a1016461403526d44928a269c1de
rename to sql/hive/src/test/resources/golden/timestamp_udf-15-c745a1016461403526d44928a269c1de
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-15-7ab76c4458c7f78038c8b1df0fdeafbe b/sql/hive/src/test/resources/golden/timestamp_udf-16-7ab76c4458c7f78038c8b1df0fdeafbe
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-15-7ab76c4458c7f78038c8b1df0fdeafbe
rename to sql/hive/src/test/resources/golden/timestamp_udf-16-7ab76c4458c7f78038c8b1df0fdeafbe
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-16-b36e87e17ca24d82072220bff559c718 b/sql/hive/src/test/resources/golden/timestamp_udf-17-b36e87e17ca24d82072220bff559c718
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-16-b36e87e17ca24d82072220bff559c718
rename to sql/hive/src/test/resources/golden/timestamp_udf-17-b36e87e17ca24d82072220bff559c718
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-17-dad44d2d4a421286e9da080271bd2639 b/sql/hive/src/test/resources/golden/timestamp_udf-18-dad44d2d4a421286e9da080271bd2639
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-17-dad44d2d4a421286e9da080271bd2639
rename to sql/hive/src/test/resources/golden/timestamp_udf-18-dad44d2d4a421286e9da080271bd2639
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-19-79914c5347620c6e62a8e0b9a95984af b/sql/hive/src/test/resources/golden/timestamp_udf-19-79914c5347620c6e62a8e0b9a95984af
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-9-287614364eaa3fb82aad08c6b62cc938 b/sql/hive/src/test/resources/golden/timestamp_udf-19-cb033ecad964a2623bc633ac1d3f752a
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-9-287614364eaa3fb82aad08c6b62cc938
rename to sql/hive/src/test/resources/golden/timestamp_udf-19-cb033ecad964a2623bc633ac1d3f752a
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-27-93153fd67c1d19bb9ad64f98294e4981 b/sql/hive/src/test/resources/golden/timestamp_udf-2-59fc1842a23369235d42ed040d45fb3d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-27-93153fd67c1d19bb9ad64f98294e4981
rename to sql/hive/src/test/resources/golden/timestamp_udf-2-59fc1842a23369235d42ed040d45fb3d
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-2-9039f474f9a96e9f15ace528faeed923 b/sql/hive/src/test/resources/golden/timestamp_udf-2-9039f474f9a96e9f15ace528faeed923
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-20-59fc1842a23369235d42ed040d45fb3d b/sql/hive/src/test/resources/golden/timestamp_udf-20-59fc1842a23369235d42ed040d45fb3d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-28-ca369ab23c32070e2d42ba8df036175f b/sql/hive/src/test/resources/golden/timestamp_udf-20-79914c5347620c6e62a8e0b9a95984af
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-28-ca369ab23c32070e2d42ba8df036175f
rename to sql/hive/src/test/resources/golden/timestamp_udf-20-79914c5347620c6e62a8e0b9a95984af
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-30-4095695e88e23dd42acb1749a83bdbb7 b/sql/hive/src/test/resources/golden/timestamp_udf-21-59fc1842a23369235d42ed040d45fb3d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-30-4095695e88e23dd42acb1749a83bdbb7
rename to sql/hive/src/test/resources/golden/timestamp_udf-21-59fc1842a23369235d42ed040d45fb3d
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-31-d9ba55c20c3f5df262e81cbf5dab5387 b/sql/hive/src/test/resources/golden/timestamp_udf-3-9039f474f9a96e9f15ace528faeed923
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-31-d9ba55c20c3f5df262e81cbf5dab5387
rename to sql/hive/src/test/resources/golden/timestamp_udf-3-9039f474f9a96e9f15ace528faeed923
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-3-b0fd4ca3b22eb732a32772399331352f b/sql/hive/src/test/resources/golden/timestamp_udf-3-b0fd4ca3b22eb732a32772399331352f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-4-80ce02ec84ee8abcb046367ca37279cc b/sql/hive/src/test/resources/golden/timestamp_udf-4-80ce02ec84ee8abcb046367ca37279cc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-33-c88ee457dee7bb141a198a2ae39d787f b/sql/hive/src/test/resources/golden/timestamp_udf-4-b0fd4ca3b22eb732a32772399331352f
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-33-c88ee457dee7bb141a198a2ae39d787f
rename to sql/hive/src/test/resources/golden/timestamp_udf-4-b0fd4ca3b22eb732a32772399331352f
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-34-c04167e880fe3e942ce77e75d660f1ef b/sql/hive/src/test/resources/golden/timestamp_udf-5-66868a2b075de978784011e9955483d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-34-c04167e880fe3e942ce77e75d660f1ef
rename to sql/hive/src/test/resources/golden/timestamp_udf-5-66868a2b075de978784011e9955483d
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-5-1124399033bcadf3874fb48f593392d b/sql/hive/src/test/resources/golden/timestamp_udf-6-1124399033bcadf3874fb48f593392d
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-5-1124399033bcadf3874fb48f593392d
rename to sql/hive/src/test/resources/golden/timestamp_udf-6-1124399033bcadf3874fb48f593392d
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-6-5810193ce35d38c23f4fc4b4979d60a4 b/sql/hive/src/test/resources/golden/timestamp_udf-7-5810193ce35d38c23f4fc4b4979d60a4
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-6-5810193ce35d38c23f4fc4b4979d60a4
rename to sql/hive/src/test/resources/golden/timestamp_udf-7-5810193ce35d38c23f4fc4b4979d60a4
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-7-250e640a6a818f989f3f3280b00f64f9 b/sql/hive/src/test/resources/golden/timestamp_udf-8-250e640a6a818f989f3f3280b00f64f9
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-7-250e640a6a818f989f3f3280b00f64f9
rename to sql/hive/src/test/resources/golden/timestamp_udf-8-250e640a6a818f989f3f3280b00f64f9
diff --git a/sql/hive/src/test/resources/golden/timestamp_udf-8-975df43df015d86422965af456f87a94 b/sql/hive/src/test/resources/golden/timestamp_udf-9-975df43df015d86422965af456f87a94
similarity index 100%
rename from sql/hive/src/test/resources/golden/timestamp_udf-8-975df43df015d86422965af456f87a94
rename to sql/hive/src/test/resources/golden/timestamp_udf-9-975df43df015d86422965af456f87a94
diff --git a/sql/hive/src/test/resources/golden/transform with SerDe-0-cdc393f3914c879787efe523f692b1e0 b/sql/hive/src/test/resources/golden/transform with SerDe-0-cdc393f3914c879787efe523f692b1e0
new file mode 100644
index 0000000000000..7aae61e5eb82f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/transform with SerDe-0-cdc393f3914c879787efe523f692b1e0	
@@ -0,0 +1,500 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+302	val_302
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/transform with SerDe3-0-58a8b7eb07a949bc44dccb723222957f b/sql/hive/src/test/resources/golden/transform with SerDe3-0-58a8b7eb07a949bc44dccb723222957f
new file mode 100644
index 0000000000000..7aae61e5eb82f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/transform with SerDe3-0-58a8b7eb07a949bc44dccb723222957f	
@@ -0,0 +1,500 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+302	val_302
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/transform with SerDe4-0-ba9ad2499a7408cb350c7abafaf9ea97 b/sql/hive/src/test/resources/golden/transform with SerDe4-0-ba9ad2499a7408cb350c7abafaf9ea97
new file mode 100644
index 0000000000000..7aae61e5eb82f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/transform with SerDe4-0-ba9ad2499a7408cb350c7abafaf9ea97	
@@ -0,0 +1,500 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+302	val_302
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/transform with custom field delimiter-0-703cca3c02ced422feb11dc13b744484 b/sql/hive/src/test/resources/golden/transform with custom field delimiter-0-703cca3c02ced422feb11dc13b744484
new file mode 100644
index 0000000000000..e34118512c1d7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/transform with custom field delimiter-0-703cca3c02ced422feb11dc13b744484	
@@ -0,0 +1,500 @@
+238
+86
+311
+27
+165
+409
+255
+278
+98
+484
+265
+193
+401
+150
+273
+224
+369
+66
+128
+213
+146
+406
+429
+374
+152
+469
+145
+495
+37
+327
+281
+277
+209
+15
+82
+403
+166
+417
+430
+252
+292
+219
+287
+153
+193
+338
+446
+459
+394
+237
+482
+174
+413
+494
+207
+199
+466
+208
+174
+399
+396
+247
+417
+489
+162
+377
+397
+309
+365
+266
+439
+342
+367
+325
+167
+195
+475
+17
+113
+155
+203
+339
+0
+455
+128
+311
+316
+57
+302
+205
+149
+438
+345
+129
+170
+20
+489
+157
+378
+221
+92
+111
+47
+72
+4
+280
+35
+427
+277
+208
+356
+399
+169
+382
+498
+125
+386
+437
+469
+192
+286
+187
+176
+54
+459
+51
+138
+103
+239
+213
+216
+430
+278
+176
+289
+221
+65
+318
+332
+311
+275
+137
+241
+83
+333
+180
+284
+12
+230
+181
+67
+260
+404
+384
+489
+353
+373
+272
+138
+217
+84
+348
+466
+58
+8
+411
+230
+208
+348
+24
+463
+431
+179
+172
+42
+129
+158
+119
+496
+0
+322
+197
+468
+393
+454
+100
+298
+199
+191
+418
+96
+26
+165
+327
+230
+205
+120
+131
+51
+404
+43
+436
+156
+469
+468
+308
+95
+196
+288
+481
+457
+98
+282
+197
+187
+318
+318
+409
+470
+137
+369
+316
+169
+413
+85
+77
+0
+490
+87
+364
+179
+118
+134
+395
+282
+138
+238
+419
+15
+118
+72
+90
+307
+19
+435
+10
+277
+273
+306
+224
+309
+389
+327
+242
+369
+392
+272
+331
+401
+242
+452
+177
+226
+5
+497
+402
+396
+317
+395
+58
+35
+336
+95
+11
+168
+34
+229
+233
+143
+472
+322
+498
+160
+195
+42
+321
+430
+119
+489
+458
+78
+76
+41
+223
+492
+149
+449
+218
+228
+138
+453
+30
+209
+64
+468
+76
+74
+342
+69
+230
+33
+368
+103
+296
+113
+216
+367
+344
+167
+274
+219
+239
+485
+116
+223
+256
+263
+70
+487
+480
+401
+288
+191
+5
+244
+438
+128
+467
+432
+202
+316
+229
+469
+463
+280
+2
+35
+283
+331
+235
+80
+44
+193
+321
+335
+104
+466
+366
+175
+403
+483
+53
+105
+257
+406
+409
+190
+406
+401
+114
+258
+90
+203
+262
+348
+424
+12
+396
+201
+217
+164
+431
+454
+478
+298
+125
+431
+164
+424
+187
+382
+5
+70
+397
+480
+291
+24
+351
+255
+104
+70
+163
+438
+119
+414
+200
+491
+237
+439
+360
+248
+479
+305
+417
+199
+444
+120
+429
+169
+443
+323
+325
+277
+230
+478
+178
+468
+310
+317
+333
+493
+460
+207
+249
+265
+480
+83
+136
+353
+172
+214
+462
+233
+406
+133
+175
+189
+454
+375
+401
+421
+407
+384
+256
+26
+134
+67
+384
+379
+18
+462
+492
+100
+298
+9
+341
+498
+146
+458
+362
+186
+285
+348
+167
+18
+273
+183
+281
+344
+97
+469
+315
+84
+28
+37
+448
+152
+348
+307
+194
+414
+477
+222
+126
+90
+169
+403
+400
+200
+97
diff --git a/sql/hive/src/test/resources/golden/transform with custom field delimiter-0-82639dda9ba42df817466dffe2929174 b/sql/hive/src/test/resources/golden/transform with custom field delimiter-0-82639dda9ba42df817466dffe2929174
new file mode 100644
index 0000000000000..e34118512c1d7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/transform with custom field delimiter-0-82639dda9ba42df817466dffe2929174	
@@ -0,0 +1,500 @@
+238
+86
+311
+27
+165
+409
+255
+278
+98
+484
+265
+193
+401
+150
+273
+224
+369
+66
+128
+213
+146
+406
+429
+374
+152
+469
+145
+495
+37
+327
+281
+277
+209
+15
+82
+403
+166
+417
+430
+252
+292
+219
+287
+153
+193
+338
+446
+459
+394
+237
+482
+174
+413
+494
+207
+199
+466
+208
+174
+399
+396
+247
+417
+489
+162
+377
+397
+309
+365
+266
+439
+342
+367
+325
+167
+195
+475
+17
+113
+155
+203
+339
+0
+455
+128
+311
+316
+57
+302
+205
+149
+438
+345
+129
+170
+20
+489
+157
+378
+221
+92
+111
+47
+72
+4
+280
+35
+427
+277
+208
+356
+399
+169
+382
+498
+125
+386
+437
+469
+192
+286
+187
+176
+54
+459
+51
+138
+103
+239
+213
+216
+430
+278
+176
+289
+221
+65
+318
+332
+311
+275
+137
+241
+83
+333
+180
+284
+12
+230
+181
+67
+260
+404
+384
+489
+353
+373
+272
+138
+217
+84
+348
+466
+58
+8
+411
+230
+208
+348
+24
+463
+431
+179
+172
+42
+129
+158
+119
+496
+0
+322
+197
+468
+393
+454
+100
+298
+199
+191
+418
+96
+26
+165
+327
+230
+205
+120
+131
+51
+404
+43
+436
+156
+469
+468
+308
+95
+196
+288
+481
+457
+98
+282
+197
+187
+318
+318
+409
+470
+137
+369
+316
+169
+413
+85
+77
+0
+490
+87
+364
+179
+118
+134
+395
+282
+138
+238
+419
+15
+118
+72
+90
+307
+19
+435
+10
+277
+273
+306
+224
+309
+389
+327
+242
+369
+392
+272
+331
+401
+242
+452
+177
+226
+5
+497
+402
+396
+317
+395
+58
+35
+336
+95
+11
+168
+34
+229
+233
+143
+472
+322
+498
+160
+195
+42
+321
+430
+119
+489
+458
+78
+76
+41
+223
+492
+149
+449
+218
+228
+138
+453
+30
+209
+64
+468
+76
+74
+342
+69
+230
+33
+368
+103
+296
+113
+216
+367
+344
+167
+274
+219
+239
+485
+116
+223
+256
+263
+70
+487
+480
+401
+288
+191
+5
+244
+438
+128
+467
+432
+202
+316
+229
+469
+463
+280
+2
+35
+283
+331
+235
+80
+44
+193
+321
+335
+104
+466
+366
+175
+403
+483
+53
+105
+257
+406
+409
+190
+406
+401
+114
+258
+90
+203
+262
+348
+424
+12
+396
+201
+217
+164
+431
+454
+478
+298
+125
+431
+164
+424
+187
+382
+5
+70
+397
+480
+291
+24
+351
+255
+104
+70
+163
+438
+119
+414
+200
+491
+237
+439
+360
+248
+479
+305
+417
+199
+444
+120
+429
+169
+443
+323
+325
+277
+230
+478
+178
+468
+310
+317
+333
+493
+460
+207
+249
+265
+480
+83
+136
+353
+172
+214
+462
+233
+406
+133
+175
+189
+454
+375
+401
+421
+407
+384
+256
+26
+134
+67
+384
+379
+18
+462
+492
+100
+298
+9
+341
+498
+146
+458
+362
+186
+285
+348
+167
+18
+273
+183
+281
+344
+97
+469
+315
+84
+28
+37
+448
+152
+348
+307
+194
+414
+477
+222
+126
+90
+169
+403
+400
+200
+97
diff --git a/sql/hive/src/test/resources/golden/transform with custom field delimiter2-0-e8713b21483e1efb78ee90b61530479b b/sql/hive/src/test/resources/golden/transform with custom field delimiter2-0-e8713b21483e1efb78ee90b61530479b
new file mode 100644
index 0000000000000..7aae61e5eb82f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/transform with custom field delimiter2-0-e8713b21483e1efb78ee90b61530479b	
@@ -0,0 +1,500 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+302	val_302
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/transform with custom field delimiter2-0-e8d2b2e60551f69bfb44e555f5cff064 b/sql/hive/src/test/resources/golden/transform with custom field delimiter2-0-e8d2b2e60551f69bfb44e555f5cff064
new file mode 100644
index 0000000000000..7aae61e5eb82f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/transform with custom field delimiter2-0-e8d2b2e60551f69bfb44e555f5cff064	
@@ -0,0 +1,500 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+302	val_302
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/transform with custom field delimiter3-0-d4f4f471819345e9ce1964e281ea5289 b/sql/hive/src/test/resources/golden/transform with custom field delimiter3-0-d4f4f471819345e9ce1964e281ea5289
new file mode 100644
index 0000000000000..7aae61e5eb82f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/transform with custom field delimiter3-0-d4f4f471819345e9ce1964e281ea5289	
@@ -0,0 +1,500 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+302	val_302
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/transform-0-d81d055660f6ef3d9cc60dd673a8c0fe b/sql/hive/src/test/resources/golden/transform-0-d81d055660f6ef3d9cc60dd673a8c0fe
index d23e05acf7ba5..e34118512c1d7 100644
--- a/sql/hive/src/test/resources/golden/transform-0-d81d055660f6ef3d9cc60dd673a8c0fe
+++ b/sql/hive/src/test/resources/golden/transform-0-d81d055660f6ef3d9cc60dd673a8c0fe
@@ -497,4 +497,4 @@
 403
 400
 200
-97
\ No newline at end of file
+97
diff --git a/sql/hive/src/test/resources/golden/transform1-0-b6919fc48901e388c869c84ae0211102 b/sql/hive/src/test/resources/golden/transform1-0-b6919fc48901e388c869c84ae0211102
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/transform1-1-bb8804b6b511cb7e0c4dbdf2b978f737 b/sql/hive/src/test/resources/golden/transform1-1-bb8804b6b511cb7e0c4dbdf2b978f737
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/transform1-2-25d6cab86c36d65fabf5645db3126a19 b/sql/hive/src/test/resources/golden/transform1-2-25d6cab86c36d65fabf5645db3126a19
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/transform1-3-8324a70d533a06a5171c1016b1fea7c3 b/sql/hive/src/test/resources/golden/transform1-3-8324a70d533a06a5171c1016b1fea7c3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/transform1-4-65527bae8e73262255ef83082c6968f9 b/sql/hive/src/test/resources/golden/transform1-4-65527bae8e73262255ef83082c6968f9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/transform1-5-e0037a3f97ce0127a40d163af4c20ad5 b/sql/hive/src/test/resources/golden/transform1-5-e0037a3f97ce0127a40d163af4c20ad5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/transform1-6-3b862abd732c9e9f0db50ad0b9dae6f b/sql/hive/src/test/resources/golden/transform1-6-3b862abd732c9e9f0db50ad0b9dae6f
deleted file mode 100644
index c6f628b1a3eef..0000000000000
--- a/sql/hive/src/test/resources/golden/transform1-6-3b862abd732c9e9f0db50ad0b9dae6f
+++ /dev/null
@@ -1 +0,0 @@
-[0,1,2]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/transform_ppr1-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/transform_ppr1-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/transform_ppr1-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/transform_ppr1-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/transform_ppr1-2-8de14457978564d5fe4ab9a1c2a87d47 b/sql/hive/src/test/resources/golden/transform_ppr1-2-8de14457978564d5fe4ab9a1c2a87d47
index 839efe2d57956..9370a501a5179 100644
--- a/sql/hive/src/test/resources/golden/transform_ppr1-2-8de14457978564d5fe4ab9a1c2a87d47
+++ b/sql/hive/src/test/resources/golden/transform_ppr1-2-8de14457978564d5fe4ab9a1c2a87d47
@@ -165,4 +165,4 @@
 98	val_98
 98	val_98
 98	val_98
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/transform_ppr2-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/transform_ppr2-0-ae225e86c2ae20519ffdf23190454161
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/transform_ppr2-0-ae225e86c2ae20519ffdf23190454161
+++ b/sql/hive/src/test/resources/golden/transform_ppr2-0-ae225e86c2ae20519ffdf23190454161
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/transform_ppr2-2-636c4938673a273299c8569295d27c99 b/sql/hive/src/test/resources/golden/transform_ppr2-2-636c4938673a273299c8569295d27c99
index 839efe2d57956..9370a501a5179 100644
--- a/sql/hive/src/test/resources/golden/transform_ppr2-2-636c4938673a273299c8569295d27c99
+++ b/sql/hive/src/test/resources/golden/transform_ppr2-2-636c4938673a273299c8569295d27c99
@@ -165,4 +165,4 @@
 98	val_98
 98	val_98
 98	val_98
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/trivial join ON clause-0-3b6afcbd622aa111ee260bebc763613d b/sql/hive/src/test/resources/golden/trivial join ON clause-0-3b6afcbd622aa111ee260bebc763613d
index 66fafbdf72b63..3df4716f0b05f 100644
--- a/sql/hive/src/test/resources/golden/trivial join ON clause-0-3b6afcbd622aa111ee260bebc763613d	
+++ b/sql/hive/src/test/resources/golden/trivial join ON clause-0-3b6afcbd622aa111ee260bebc763613d	
@@ -1025,4 +1025,4 @@
 200	val_200	200	val_200
 200	val_200	200	val_200
 97	val_97	97	val_97
-97	val_97	97	val_97
\ No newline at end of file
+97	val_97	97	val_97
diff --git a/sql/hive/src/test/resources/golden/trivial join where clause-0-25ffeb9d5e570c8b62b6ae2829655fe3 b/sql/hive/src/test/resources/golden/trivial join where clause-0-25ffeb9d5e570c8b62b6ae2829655fe3
index 66fafbdf72b63..3df4716f0b05f 100644
--- a/sql/hive/src/test/resources/golden/trivial join where clause-0-25ffeb9d5e570c8b62b6ae2829655fe3	
+++ b/sql/hive/src/test/resources/golden/trivial join where clause-0-25ffeb9d5e570c8b62b6ae2829655fe3	
@@ -1025,4 +1025,4 @@
 200	val_200	200	val_200
 200	val_200	200	val_200
 97	val_97	97	val_97
-97	val_97	97	val_97
\ No newline at end of file
+97	val_97	97	val_97
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-36-deb3f1793f51d1edf011a8405abf4968 b/sql/hive/src/test/resources/golden/truncate_column-0-616cad77ad5e7ac74da0d7425a7869a
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-36-deb3f1793f51d1edf011a8405abf4968
rename to sql/hive/src/test/resources/golden/truncate_column-0-616cad77ad5e7ac74da0d7425a7869a
diff --git a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/truncate_column-1-418ec894d08c33fd712eb358f579b7a0
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/truncate_column-1-418ec894d08c33fd712eb358f579b7a0
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-37-7871722f392f801a868e0e2fb372c610 b/sql/hive/src/test/resources/golden/truncate_column_merge-0-46e8cc1556fa8586802a26267a906acf
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-37-7871722f392f801a868e0e2fb372c610
rename to sql/hive/src/test/resources/golden/truncate_column_merge-0-46e8cc1556fa8586802a26267a906acf
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-39-b71bdaa2b7c4b5c51a9773c123e5306d b/sql/hive/src/test/resources/golden/truncate_table-0-d16efe9bac079f0c5fc6cc424a8fa3eb
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-39-b71bdaa2b7c4b5c51a9773c123e5306d
rename to sql/hive/src/test/resources/golden/truncate_table-0-d16efe9bac079f0c5fc6cc424a8fa3eb
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-40-89aa7bab4272546e06cd7e504380d96b b/sql/hive/src/test/resources/golden/truncate_table-1-7fc255c86d7c3a9ff088f9eb29a42565
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-40-89aa7bab4272546e06cd7e504380d96b
rename to sql/hive/src/test/resources/golden/truncate_table-1-7fc255c86d7c3a9ff088f9eb29a42565
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-42-348b0126cb1d214fea58d4af9d3dbf67 b/sql/hive/src/test/resources/golden/truncate_table-1-ec0e3744208003f18c33a1f2c4c1e2c6
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-42-348b0126cb1d214fea58d4af9d3dbf67
rename to sql/hive/src/test/resources/golden/truncate_table-1-ec0e3744208003f18c33a1f2c4c1e2c6
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-43-83889e7dc73d796cc869160b6b35102c b/sql/hive/src/test/resources/golden/truncate_table-10-9ba46fdca3f0f4da8991cb5c7b01efdb
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-43-83889e7dc73d796cc869160b6b35102c
rename to sql/hive/src/test/resources/golden/truncate_table-10-9ba46fdca3f0f4da8991cb5c7b01efdb
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-45-41462b2e60bf44571a7b1fb435374d6a b/sql/hive/src/test/resources/golden/truncate_table-10-c32b771845f4d5a0330e2cfa09f89a7f
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-45-41462b2e60bf44571a7b1fb435374d6a
rename to sql/hive/src/test/resources/golden/truncate_table-10-c32b771845f4d5a0330e2cfa09f89a7f
diff --git a/sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/truncate_table-11-6e0b877ea24fa88c5461b02f7bda0746
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_22_import_exist_authsuccess-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/truncate_table-11-6e0b877ea24fa88c5461b02f7bda0746
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-46-8aea6edf0481e2a10f14879acba62648 b/sql/hive/src/test/resources/golden/truncate_table-12-7dee32ebe9887833a9ae2ea6e5568028
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-46-8aea6edf0481e2a10f14879acba62648
rename to sql/hive/src/test/resources/golden/truncate_table-12-7dee32ebe9887833a9ae2ea6e5568028
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-47-4999db9eb86d9455c1d75e97b052f279 b/sql/hive/src/test/resources/golden/truncate_table-13-3230cfbe1871330193c3190c77582fe
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-47-4999db9eb86d9455c1d75e97b052f279
rename to sql/hive/src/test/resources/golden/truncate_table-13-3230cfbe1871330193c3190c77582fe
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-48-fecb9f2203aeb2ac4b693a97badde6fa b/sql/hive/src/test/resources/golden/truncate_table-14-ae23925663d7e9b7e97c42b66086d835
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-48-fecb9f2203aeb2ac4b693a97badde6fa
rename to sql/hive/src/test/resources/golden/truncate_table-14-ae23925663d7e9b7e97c42b66086d835
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-49-f0f18d5fa2824735799edc4bdeb1afb2 b/sql/hive/src/test/resources/golden/truncate_table-15-7850dc059f9d00eb9439d477e92cb913
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-49-f0f18d5fa2824735799edc4bdeb1afb2
rename to sql/hive/src/test/resources/golden/truncate_table-15-7850dc059f9d00eb9439d477e92cb913
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-5-e906be6d27c9dfcffd4af171541639ad b/sql/hive/src/test/resources/golden/truncate_table-16-623e41aa678d5abc8341a8cee0ac8f94
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-5-e906be6d27c9dfcffd4af171541639ad
rename to sql/hive/src/test/resources/golden/truncate_table-16-623e41aa678d5abc8341a8cee0ac8f94
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-51-461847b174096e7a255fb07cb35ab434 b/sql/hive/src/test/resources/golden/truncate_table-17-8c71d29e7db6a8d1cb5746458c7741e6
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-51-461847b174096e7a255fb07cb35ab434
rename to sql/hive/src/test/resources/golden/truncate_table-17-8c71d29e7db6a8d1cb5746458c7741e6
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-52-8da967e7c2210ad044ba8b08d1685065 b/sql/hive/src/test/resources/golden/truncate_table-18-64d431f93d8a44fb143cb4b87d63a105
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-52-8da967e7c2210ad044ba8b08d1685065
rename to sql/hive/src/test/resources/golden/truncate_table-18-64d431f93d8a44fb143cb4b87d63a105
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-54-fdc295aaefba56548a22dfcddc2a94f2 b/sql/hive/src/test/resources/golden/truncate_table-19-1325d566d66f21a06543271c73a95a6f
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-54-fdc295aaefba56548a22dfcddc2a94f2
rename to sql/hive/src/test/resources/golden/truncate_table-19-1325d566d66f21a06543271c73a95a6f
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-55-74bfe3fbf9d68a75013fba1c3c7bbd7c b/sql/hive/src/test/resources/golden/truncate_table-2-fc4118284bf8301cf0d1056c388f963a
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-55-74bfe3fbf9d68a75013fba1c3c7bbd7c
rename to sql/hive/src/test/resources/golden/truncate_table-2-fc4118284bf8301cf0d1056c388f963a
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-56-1013d1ad014aa203b1dce26085b09c01 b/sql/hive/src/test/resources/golden/truncate_table-20-91f869cc79191b87d31cfd0eca2839f4
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-56-1013d1ad014aa203b1dce26085b09c01
rename to sql/hive/src/test/resources/golden/truncate_table-20-91f869cc79191b87d31cfd0eca2839f4
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-58-825135844e8ac6d8d5254cc961ec8fd0 b/sql/hive/src/test/resources/golden/truncate_table-21-f635675d59df31843e7be41af7b9e4fa
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-58-825135844e8ac6d8d5254cc961ec8fd0
rename to sql/hive/src/test/resources/golden/truncate_table-21-f635675d59df31843e7be41af7b9e4fa
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-59-e671e63f6b70094048563a9c33748c97 b/sql/hive/src/test/resources/golden/truncate_table-22-f121fdc101603a8220c0f18e867f581e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-59-e671e63f6b70094048563a9c33748c97
rename to sql/hive/src/test/resources/golden/truncate_table-22-f121fdc101603a8220c0f18e867f581e
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-6-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/truncate_table-23-63988ac685a3bd645787116353f024d2
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-6-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/truncate_table-23-63988ac685a3bd645787116353f024d2
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-61-51824b04f2a008f63e1469695e60d9c8 b/sql/hive/src/test/resources/golden/truncate_table-3-ecca1d24f36175932911a6e7a78ece2d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-61-51824b04f2a008f63e1469695e60d9c8
rename to sql/hive/src/test/resources/golden/truncate_table-3-ecca1d24f36175932911a6e7a78ece2d
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-62-18b0757f6d9e29808061ca3763b8b6d9 b/sql/hive/src/test/resources/golden/truncate_table-4-88e636ed8bdf647a02ff269aa3ebfe62
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-62-18b0757f6d9e29808061ca3763b8b6d9
rename to sql/hive/src/test/resources/golden/truncate_table-4-88e636ed8bdf647a02ff269aa3ebfe62
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-67-b4c5b3eeb74085711866a2eec27bcb37 b/sql/hive/src/test/resources/golden/truncate_table-5-42aeecc67917d731e60fc46bde021d49
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-67-b4c5b3eeb74085711866a2eec27bcb37
rename to sql/hive/src/test/resources/golden/truncate_table-5-42aeecc67917d731e60fc46bde021d49
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-68-b4fec0996399be2239961594897d6715 b/sql/hive/src/test/resources/golden/truncate_table-6-5a6776344f711298f27a8f1d3b47d107
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-68-b4fec0996399be2239961594897d6715
rename to sql/hive/src/test/resources/golden/truncate_table-6-5a6776344f711298f27a8f1d3b47d107
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-7-b6c452a800ff333aacb863bb3243c15b b/sql/hive/src/test/resources/golden/truncate_table-7-1ad5d350714e3d4ea17201153772d58d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-7-b6c452a800ff333aacb863bb3243c15b
rename to sql/hive/src/test/resources/golden/truncate_table-7-1ad5d350714e3d4ea17201153772d58d
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-71-2d5403943a3efdf9fd3eccb6927499cc b/sql/hive/src/test/resources/golden/truncate_table-7-65e270fb0b61886aa85255d77eb65794
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-71-2d5403943a3efdf9fd3eccb6927499cc
rename to sql/hive/src/test/resources/golden/truncate_table-7-65e270fb0b61886aa85255d77eb65794
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-72-ca16ff548ebb9bab4b76f2e339064a9e b/sql/hive/src/test/resources/golden/truncate_table-8-76c754eac44c7254b45807255d4dbc3a
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-72-ca16ff548ebb9bab4b76f2e339064a9e
rename to sql/hive/src/test/resources/golden/truncate_table-8-76c754eac44c7254b45807255d4dbc3a
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-75-6f7caef1c773268350c9cf94ad85be01 b/sql/hive/src/test/resources/golden/truncate_table-8-e7699db3640f3b9b1fe44d6b8c9b507e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-75-6f7caef1c773268350c9cf94ad85be01
rename to sql/hive/src/test/resources/golden/truncate_table-8-e7699db3640f3b9b1fe44d6b8c9b507e
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-76-4931c5a72a5231f67317d27ca025bb97 b/sql/hive/src/test/resources/golden/truncate_table-9-eedfbb9479ac6c1b955b8e9b41994da4
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-76-4931c5a72a5231f67317d27ca025bb97
rename to sql/hive/src/test/resources/golden/truncate_table-9-eedfbb9479ac6c1b955b8e9b41994da4
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-8-c0ea81b686236d661166912040a16ea7 b/sql/hive/src/test/resources/golden/truncate_table-9-f4286b5657674a6a6b6bc6680f72f89a
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-8-c0ea81b686236d661166912040a16ea7
rename to sql/hive/src/test/resources/golden/truncate_table-9-f4286b5657674a6a6b6bc6680f72f89a
diff --git a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/type_cast_1-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/type_cast_1-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/type_cast_1-0-60ea21e6e7d054a65f959fc89acf1b3d b/sql/hive/src/test/resources/golden/type_cast_1-0-60ea21e6e7d054a65f959fc89acf1b3d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_skew_1-9-f0ee61903aeacb758e2eada242e5e14 b/sql/hive/src/test/resources/golden/type_cast_1-1-60ea21e6e7d054a65f959fc89acf1b3d
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_skew_1-9-f0ee61903aeacb758e2eada242e5e14
rename to sql/hive/src/test/resources/golden/type_cast_1-1-60ea21e6e7d054a65f959fc89acf1b3d
diff --git a/sql/hive/src/test/resources/golden/type_cast_1-2-53a667981ad567b2ab977f67d65c5825 b/sql/hive/src/test/resources/golden/type_cast_1-2-53a667981ad567b2ab977f67d65c5825
new file mode 100644
index 0000000000000..7ed6ff82de6bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/type_cast_1-2-53a667981ad567b2ab977f67d65c5825
@@ -0,0 +1 @@
+5
diff --git a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/type_widening-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/type_widening-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/type_widening-0-630ac2c7e7dea4837384ccd572209229 b/sql/hive/src/test/resources/golden/type_widening-0-630ac2c7e7dea4837384ccd572209229
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_test_1-6-b76bf9f6c92f83c9a5f351f8460d1e3b b/sql/hive/src/test/resources/golden/type_widening-1-630ac2c7e7dea4837384ccd572209229
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_test_1-6-b76bf9f6c92f83c9a5f351f8460d1e3b
rename to sql/hive/src/test/resources/golden/type_widening-1-630ac2c7e7dea4837384ccd572209229
diff --git a/sql/hive/src/test/resources/golden/type_widening-1-cfbdf2b6fca84c6e23d4e691d2221bd6 b/sql/hive/src/test/resources/golden/type_widening-1-cfbdf2b6fca84c6e23d4e691d2221bd6
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/type_widening-1-cfbdf2b6fca84c6e23d4e691d2221bd6
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/type_widening-2-a0ba6952d9bf830d1d1ea7aebd3784a2 b/sql/hive/src/test/resources/golden/type_widening-2-a0ba6952d9bf830d1d1ea7aebd3784a2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/type_widening-2-cfbdf2b6fca84c6e23d4e691d2221bd6
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_23_import_part_authsuccess-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/type_widening-2-cfbdf2b6fca84c6e23d4e691d2221bd6
diff --git a/sql/hive/src/test/resources/golden/type_widening-3-65da8c67f6903286168acb39ac67fc04 b/sql/hive/src/test/resources/golden/type_widening-3-65da8c67f6903286168acb39ac67fc04
deleted file mode 100644
index cf940f4c5faa8..0000000000000
--- a/sql/hive/src/test/resources/golden/type_widening-3-65da8c67f6903286168acb39ac67fc04
+++ /dev/null
@@ -1,1000 +0,0 @@
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
-9223372036854775807
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/hook_context_cs-0-e319c8574a6cd8739e5fd5984ceed3cf b/sql/hive/src/test/resources/golden/type_widening-3-a0ba6952d9bf830d1d1ea7aebd3784a2
similarity index 100%
rename from sql/hive/src/test/resources/golden/hook_context_cs-0-e319c8574a6cd8739e5fd5984ceed3cf
rename to sql/hive/src/test/resources/golden/type_widening-3-a0ba6952d9bf830d1d1ea7aebd3784a2
diff --git a/sql/hive/src/test/resources/golden/type_widening-4-65da8c67f6903286168acb39ac67fc04 b/sql/hive/src/test/resources/golden/type_widening-4-65da8c67f6903286168acb39ac67fc04
new file mode 100644
index 0000000000000..00841d23b3f94
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/type_widening-4-65da8c67f6903286168acb39ac67fc04
@@ -0,0 +1,1000 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
+9223372036854775807
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-0-38512a3299e2390dd813e61a0f63f35e b/sql/hive/src/test/resources/golden/udaf_collect_set-0-38512a3299e2390dd813e61a0f63f35e
index fed3d6802023a..bb3393324db7a 100644
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-0-38512a3299e2390dd813e61a0f63f35e
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-0-38512a3299e2390dd813e61a0f63f35e
@@ -1 +1 @@
-collect_set(x) - Returns a set of objects with duplicate elements eliminated
\ No newline at end of file
+collect_set(x) - Returns a set of objects with duplicate elements eliminated
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-1-78aa199d061d2ff9ba426849ea1eb449 b/sql/hive/src/test/resources/golden/udaf_collect_set-1-78aa199d061d2ff9ba426849ea1eb449
index fed3d6802023a..bb3393324db7a 100644
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-1-78aa199d061d2ff9ba426849ea1eb449
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-1-78aa199d061d2ff9ba426849ea1eb449
@@ -1 +1 @@
-collect_set(x) - Returns a set of objects with duplicate elements eliminated
\ No newline at end of file
+collect_set(x) - Returns a set of objects with duplicate elements eliminated
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-10-c8bc33095e1a195bb7b5e579d8d78db b/sql/hive/src/test/resources/golden/udaf_collect_set-10-c8bc33095e1a195bb7b5e579d8d78db
index cedc3068ee4a1..c87ba74c9000a 100644
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-10-c8bc33095e1a195bb7b5e579d8d78db
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-10-c8bc33095e1a195bb7b5e579d8d78db
@@ -17,4 +17,4 @@
 27	["val_27"]
 28	["val_28"]
 30	["val_30"]
-33	["val_33"]
\ No newline at end of file
+33	["val_33"]
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-11-5c3768074977ef68a1b9bb72eb9ef02 b/sql/hive/src/test/resources/golden/udaf_collect_set-11-5c3768074977ef68a1b9bb72eb9ef02
new file mode 100644
index 0000000000000..337e96635cc70
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-11-5c3768074977ef68a1b9bb72eb9ef02
@@ -0,0 +1,20 @@
+0	["val_0","val_0","val_0"]
+2	["val_2"]
+4	["val_4"]
+5	["val_5","val_5","val_5"]
+8	["val_8"]
+9	["val_9"]
+10	["val_10"]
+11	["val_11"]
+12	["val_12","val_12"]
+15	["val_15","val_15"]
+17	["val_17"]
+18	["val_18","val_18"]
+19	["val_19"]
+20	["val_20"]
+24	["val_24","val_24"]
+26	["val_26","val_26"]
+27	["val_27"]
+28	["val_28"]
+30	["val_30"]
+33	["val_33"]
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-11-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/udaf_collect_set-11-863233ccd616401efb4bf83c4b9e3a52
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-11-863233ccd616401efb4bf83c4b9e3a52
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/udaf_collect_set-12-1d351f7e821fcaf66c6f7503e42fb291
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/udaf_collect_set-12-1d351f7e821fcaf66c6f7503e42fb291
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-12-a00d1791b7fa7ac5a0505d95c3d12257 b/sql/hive/src/test/resources/golden/udaf_collect_set-12-a00d1791b7fa7ac5a0505d95c3d12257
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-12-a00d1791b7fa7ac5a0505d95c3d12257
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-1-baeaf0da490037e7ada642d23013075a b/sql/hive/src/test/resources/golden/udaf_collect_set-13-a00d1791b7fa7ac5a0505d95c3d12257
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-1-baeaf0da490037e7ada642d23013075a
rename to sql/hive/src/test/resources/golden/udaf_collect_set-13-a00d1791b7fa7ac5a0505d95c3d12257
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-13-c8bc33095e1a195bb7b5e579d8d78db b/sql/hive/src/test/resources/golden/udaf_collect_set-13-c8bc33095e1a195bb7b5e579d8d78db
deleted file mode 100644
index cedc3068ee4a1..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-13-c8bc33095e1a195bb7b5e579d8d78db
+++ /dev/null
@@ -1,20 +0,0 @@
-0	["val_0"]
-2	["val_2"]
-4	["val_4"]
-5	["val_5"]
-8	["val_8"]
-9	["val_9"]
-10	["val_10"]
-11	["val_11"]
-12	["val_12"]
-15	["val_15"]
-17	["val_17"]
-18	["val_18"]
-19	["val_19"]
-20	["val_20"]
-24	["val_24"]
-26	["val_26"]
-27	["val_27"]
-28	["val_28"]
-30	["val_30"]
-33	["val_33"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-14-c8bc33095e1a195bb7b5e579d8d78db b/sql/hive/src/test/resources/golden/udaf_collect_set-14-c8bc33095e1a195bb7b5e579d8d78db
new file mode 100644
index 0000000000000..c87ba74c9000a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-14-c8bc33095e1a195bb7b5e579d8d78db
@@ -0,0 +1,20 @@
+0	["val_0"]
+2	["val_2"]
+4	["val_4"]
+5	["val_5"]
+8	["val_8"]
+9	["val_9"]
+10	["val_10"]
+11	["val_11"]
+12	["val_12"]
+15	["val_15"]
+17	["val_17"]
+18	["val_18"]
+19	["val_19"]
+20	["val_20"]
+24	["val_24"]
+26	["val_26"]
+27	["val_27"]
+28	["val_28"]
+30	["val_30"]
+33	["val_33"]
diff --git a/sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-2-e6e650bf4c6291ee2d78e5af5b60e906 b/sql/hive/src/test/resources/golden/udaf_collect_set-15-863233ccd616401efb4bf83c4b9e3a52
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_24_import_nonexist_authsuccess-2-e6e650bf4c6291ee2d78e5af5b60e906
rename to sql/hive/src/test/resources/golden/udaf_collect_set-15-863233ccd616401efb4bf83c4b9e3a52
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/udaf_collect_set-16-a00d1791b7fa7ac5a0505d95c3d12257
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-0-dbcec232623048c7748b708123e18bf0
rename to sql/hive/src/test/resources/golden/udaf_collect_set-16-a00d1791b7fa7ac5a0505d95c3d12257
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-17-c8bc33095e1a195bb7b5e579d8d78db b/sql/hive/src/test/resources/golden/udaf_collect_set-17-c8bc33095e1a195bb7b5e579d8d78db
new file mode 100644
index 0000000000000..c87ba74c9000a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-17-c8bc33095e1a195bb7b5e579d8d78db
@@ -0,0 +1,20 @@
+0	["val_0"]
+2	["val_2"]
+4	["val_4"]
+5	["val_5"]
+8	["val_8"]
+9	["val_9"]
+10	["val_10"]
+11	["val_11"]
+12	["val_12"]
+15	["val_15"]
+17	["val_17"]
+18	["val_18"]
+19	["val_19"]
+20	["val_20"]
+24	["val_24"]
+26	["val_26"]
+27	["val_27"]
+28	["val_28"]
+30	["val_30"]
+33	["val_33"]
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-2-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/udaf_collect_set-2-1d351f7e821fcaf66c6f7503e42fb291
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-2-1d351f7e821fcaf66c6f7503e42fb291
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-2-4747c35670a011344982573ba31a9bb b/sql/hive/src/test/resources/golden/udaf_collect_set-2-4747c35670a011344982573ba31a9bb
new file mode 100644
index 0000000000000..28abc06ee9140
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-2-4747c35670a011344982573ba31a9bb
@@ -0,0 +1 @@
+collect_list(x) - Returns a list of objects with duplicates
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-3-9aa348a25ca17ab5b636d3ea2d6df986 b/sql/hive/src/test/resources/golden/udaf_collect_set-3-9aa348a25ca17ab5b636d3ea2d6df986
new file mode 100644
index 0000000000000..28abc06ee9140
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-3-9aa348a25ca17ab5b636d3ea2d6df986
@@ -0,0 +1 @@
+collect_list(x) - Returns a list of objects with duplicates
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-3-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/udaf_collect_set-3-a7dc16cb82c595b18d4258a38a304b1e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-3-a7dc16cb82c595b18d4258a38a304b1e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/udaf_collect_set-4-1d351f7e821fcaf66c6f7503e42fb291
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-1-85c4f90b754cd88147d6b74e17d22063
rename to sql/hive/src/test/resources/golden/udaf_collect_set-4-1d351f7e821fcaf66c6f7503e42fb291
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-4-c8bc33095e1a195bb7b5e579d8d78db b/sql/hive/src/test/resources/golden/udaf_collect_set-4-c8bc33095e1a195bb7b5e579d8d78db
deleted file mode 100644
index cedc3068ee4a1..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-4-c8bc33095e1a195bb7b5e579d8d78db
+++ /dev/null
@@ -1,20 +0,0 @@
-0	["val_0"]
-2	["val_2"]
-4	["val_4"]
-5	["val_5"]
-8	["val_8"]
-9	["val_9"]
-10	["val_10"]
-11	["val_11"]
-12	["val_12"]
-15	["val_15"]
-17	["val_17"]
-18	["val_18"]
-19	["val_19"]
-20	["val_20"]
-24	["val_24"]
-26	["val_26"]
-27	["val_27"]
-28	["val_28"]
-30	["val_30"]
-33	["val_33"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-5-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/udaf_collect_set-5-863233ccd616401efb4bf83c4b9e3a52
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-5-863233ccd616401efb4bf83c4b9e3a52
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/udaf_collect_set-5-a7dc16cb82c595b18d4258a38a304b1e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_map_multi_distinct-2-83c59d378571a6e487aa20217bd87817
rename to sql/hive/src/test/resources/golden/udaf_collect_set-5-a7dc16cb82c595b18d4258a38a304b1e
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-6-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/udaf_collect_set-6-a7dc16cb82c595b18d4258a38a304b1e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-6-a7dc16cb82c595b18d4258a38a304b1e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-6-c8bc33095e1a195bb7b5e579d8d78db b/sql/hive/src/test/resources/golden/udaf_collect_set-6-c8bc33095e1a195bb7b5e579d8d78db
new file mode 100644
index 0000000000000..c87ba74c9000a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-6-c8bc33095e1a195bb7b5e579d8d78db
@@ -0,0 +1,20 @@
+0	["val_0"]
+2	["val_2"]
+4	["val_4"]
+5	["val_5"]
+8	["val_8"]
+9	["val_9"]
+10	["val_10"]
+11	["val_11"]
+12	["val_12"]
+15	["val_15"]
+17	["val_17"]
+18	["val_18"]
+19	["val_19"]
+20	["val_20"]
+24	["val_24"]
+26	["val_26"]
+27	["val_27"]
+28	["val_28"]
+30	["val_30"]
+33	["val_33"]
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-7-1fd4f3dcdac818ccc95c5033c6d01b56 b/sql/hive/src/test/resources/golden/udaf_collect_set-7-1fd4f3dcdac818ccc95c5033c6d01b56
new file mode 100644
index 0000000000000..337e96635cc70
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udaf_collect_set-7-1fd4f3dcdac818ccc95c5033c6d01b56
@@ -0,0 +1,20 @@
+0	["val_0","val_0","val_0"]
+2	["val_2"]
+4	["val_4"]
+5	["val_5","val_5","val_5"]
+8	["val_8"]
+9	["val_9"]
+10	["val_10"]
+11	["val_11"]
+12	["val_12","val_12"]
+15	["val_15","val_15"]
+17	["val_17"]
+18	["val_18","val_18"]
+19	["val_19"]
+20	["val_20"]
+24	["val_24","val_24"]
+26	["val_26","val_26"]
+27	["val_27"]
+28	["val_28"]
+30	["val_30"]
+33	["val_33"]
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-7-c8bc33095e1a195bb7b5e579d8d78db b/sql/hive/src/test/resources/golden/udaf_collect_set-7-c8bc33095e1a195bb7b5e579d8d78db
deleted file mode 100644
index cedc3068ee4a1..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-7-c8bc33095e1a195bb7b5e579d8d78db
+++ /dev/null
@@ -1,20 +0,0 @@
-0	["val_0"]
-2	["val_2"]
-4	["val_4"]
-5	["val_5"]
-8	["val_8"]
-9	["val_9"]
-10	["val_10"]
-11	["val_11"]
-12	["val_12"]
-15	["val_15"]
-17	["val_17"]
-18	["val_18"]
-19	["val_19"]
-20	["val_20"]
-24	["val_24"]
-26	["val_26"]
-27	["val_27"]
-28	["val_28"]
-30	["val_30"]
-33	["val_33"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-8-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/udaf_collect_set-8-1d351f7e821fcaf66c6f7503e42fb291
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-8-1d351f7e821fcaf66c6f7503e42fb291
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/udaf_collect_set-8-863233ccd616401efb4bf83c4b9e3a52
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-0-67e864faaff4c6b2a8e1c9fbd188bb66
rename to sql/hive/src/test/resources/golden/udaf_collect_set-8-863233ccd616401efb4bf83c4b9e3a52
diff --git a/sql/hive/src/test/resources/golden/udaf_collect_set-9-a00d1791b7fa7ac5a0505d95c3d12257 b/sql/hive/src/test/resources/golden/udaf_collect_set-9-a00d1791b7fa7ac5a0505d95c3d12257
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_collect_set-9-a00d1791b7fa7ac5a0505d95c3d12257
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/udaf_collect_set-9-a7dc16cb82c595b18d4258a38a304b1e
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-1-85c4f90b754cd88147d6b74e17d22063
rename to sql/hive/src/test/resources/golden/udaf_collect_set-9-a7dc16cb82c595b18d4258a38a304b1e
diff --git a/sql/hive/src/test/resources/golden/udaf_corr-2-c6f2dc536bf105650a461816ae5e330 b/sql/hive/src/test/resources/golden/udaf_corr-2-c6f2dc536bf105650a461816ae5e330
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/hook_context_cs-1-de3aa1c4674fb166b825b1e2f58d1950 b/sql/hive/src/test/resources/golden/udaf_corr-2-e886f45c8f085596ffd420f89cdc2909
similarity index 100%
rename from sql/hive/src/test/resources/golden/hook_context_cs-1-de3aa1c4674fb166b825b1e2f58d1950
rename to sql/hive/src/test/resources/golden/udaf_corr-2-e886f45c8f085596ffd420f89cdc2909
diff --git a/sql/hive/src/test/resources/golden/udaf_corr-3-ddf417dbc3b551cc8df47d950cec03e b/sql/hive/src/test/resources/golden/udaf_corr-3-ddf417dbc3b551cc8df47d950cec03e
index 6e5d422b3132e..06b8ee9e72496 100644
--- a/sql/hive/src/test/resources/golden/udaf_corr-3-ddf417dbc3b551cc8df47d950cec03e
+++ b/sql/hive/src/test/resources/golden/udaf_corr-3-ddf417dbc3b551cc8df47d950cec03e
@@ -1,2 +1,2 @@
 corr(x,y) - Returns the Pearson coefficient of correlation
-between a set of number pairs
\ No newline at end of file
+between a set of number pairs
diff --git a/sql/hive/src/test/resources/golden/udaf_corr-4-8771b2d83d14b3b641e8a77fcdc5081a b/sql/hive/src/test/resources/golden/udaf_corr-4-8771b2d83d14b3b641e8a77fcdc5081a
index fcb49ae69f74f..f516ef3a30fdf 100644
--- a/sql/hive/src/test/resources/golden/udaf_corr-4-8771b2d83d14b3b641e8a77fcdc5081a
+++ b/sql/hive/src/test/resources/golden/udaf_corr-4-8771b2d83d14b3b641e8a77fcdc5081a
@@ -6,4 +6,4 @@ a singleton set, NULL will be returned. Otherwise, it computes the following:
    COVAR_POP(x,y)/(STDDEV_POP(x)*STDDEV_POP(y))
 where neither x nor y is null,
 COVAR_POP is the population covariance,
-and STDDEV_POP is the population standard deviation.
\ No newline at end of file
+and STDDEV_POP is the population standard deviation.
diff --git a/sql/hive/src/test/resources/golden/udaf_corr-5-8abbd73784728b599f8c2a90f53da9fb b/sql/hive/src/test/resources/golden/udaf_corr-5-8abbd73784728b599f8c2a90f53da9fb
index fe3a0735d98b8..7951defec192a 100644
--- a/sql/hive/src/test/resources/golden/udaf_corr-5-8abbd73784728b599f8c2a90f53da9fb
+++ b/sql/hive/src/test/resources/golden/udaf_corr-5-8abbd73784728b599f8c2a90f53da9fb
@@ -1 +1 @@
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/hive/src/test/resources/golden/udaf_corr-6-4324e1f0a83a7491f3d4e3eef34f8727 b/sql/hive/src/test/resources/golden/udaf_corr-6-4324e1f0a83a7491f3d4e3eef34f8727
index fe3a0735d98b8..7951defec192a 100644
--- a/sql/hive/src/test/resources/golden/udaf_corr-6-4324e1f0a83a7491f3d4e3eef34f8727
+++ b/sql/hive/src/test/resources/golden/udaf_corr-6-4324e1f0a83a7491f3d4e3eef34f8727
@@ -1 +1 @@
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/hive/src/test/resources/golden/udaf_corr-7-70e701f50c3418ff91649b2bd8287da3 b/sql/hive/src/test/resources/golden/udaf_corr-7-70e701f50c3418ff91649b2bd8287da3
index fe3a0735d98b8..7951defec192a 100644
--- a/sql/hive/src/test/resources/golden/udaf_corr-7-70e701f50c3418ff91649b2bd8287da3
+++ b/sql/hive/src/test/resources/golden/udaf_corr-7-70e701f50c3418ff91649b2bd8287da3
@@ -1 +1 @@
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/hive/src/test/resources/golden/udaf_corr-8-f2f0c7735f8b24266d5aaff96644e369 b/sql/hive/src/test/resources/golden/udaf_corr-8-f2f0c7735f8b24266d5aaff96644e369
index 3f730875aef8c..946c9d58047e5 100644
--- a/sql/hive/src/test/resources/golden/udaf_corr-8-f2f0c7735f8b24266d5aaff96644e369
+++ b/sql/hive/src/test/resources/golden/udaf_corr-8-f2f0c7735f8b24266d5aaff96644e369
@@ -3,4 +3,4 @@
 3	NULL
 4	NULL
 5	NULL
-6	NULL
\ No newline at end of file
+6	NULL
diff --git a/sql/hive/src/test/resources/golden/udaf_corr-9-e2a0fa75c43279764ebca015f62bcf16 b/sql/hive/src/test/resources/golden/udaf_corr-9-e2a0fa75c43279764ebca015f62bcf16
index 5d97236e8b03f..011d78d68766d 100644
--- a/sql/hive/src/test/resources/golden/udaf_corr-9-e2a0fa75c43279764ebca015f62bcf16
+++ b/sql/hive/src/test/resources/golden/udaf_corr-9-e2a0fa75c43279764ebca015f62bcf16
@@ -1 +1 @@
-0.6633880657639323
\ No newline at end of file
+0.6633880657639323
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_pop-2-c6f2dc536bf105650a461816ae5e330 b/sql/hive/src/test/resources/golden/udaf_covar_pop-2-c6f2dc536bf105650a461816ae5e330
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-0-f880114c33c99a5f23c1465fd88f0db3 b/sql/hive/src/test/resources/golden/udaf_covar_pop-2-e886f45c8f085596ffd420f89cdc2909
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-0-f880114c33c99a5f23c1465fd88f0db3
rename to sql/hive/src/test/resources/golden/udaf_covar_pop-2-e886f45c8f085596ffd420f89cdc2909
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_pop-3-fe27ea6dae14250e8835baef3c2e20f6 b/sql/hive/src/test/resources/golden/udaf_covar_pop-3-fe27ea6dae14250e8835baef3c2e20f6
index 0fed030c22af3..eadc2e1aa99b7 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_pop-3-fe27ea6dae14250e8835baef3c2e20f6
+++ b/sql/hive/src/test/resources/golden/udaf_covar_pop-3-fe27ea6dae14250e8835baef3c2e20f6
@@ -1 +1 @@
-covar_pop(x,y) - Returns the population covariance of a set of number pairs
\ No newline at end of file
+covar_pop(x,y) - Returns the population covariance of a set of number pairs
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_pop-4-7af9231ae293d0c4b84050176e1d73ad b/sql/hive/src/test/resources/golden/udaf_covar_pop-4-7af9231ae293d0c4b84050176e1d73ad
index 4037062d2da93..7323e72fccc7c 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_pop-4-7af9231ae293d0c4b84050176e1d73ad
+++ b/sql/hive/src/test/resources/golden/udaf_covar_pop-4-7af9231ae293d0c4b84050176e1d73ad
@@ -3,4 +3,4 @@ The function takes as arguments any pair of numeric types and returns a double.
 Any pair with a NULL is ignored. If the function is applied to an empty set, NULL
 will be returned. Otherwise, it computes the following:
    (SUM(x*y)-SUM(x)*SUM(y)/COUNT(x,y))/COUNT(x,y)
-where neither x nor y is null.
\ No newline at end of file
+where neither x nor y is null.
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_pop-5-22004d36f6f3770db284644317770fcd b/sql/hive/src/test/resources/golden/udaf_covar_pop-5-22004d36f6f3770db284644317770fcd
index fe3a0735d98b8..7951defec192a 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_pop-5-22004d36f6f3770db284644317770fcd
+++ b/sql/hive/src/test/resources/golden/udaf_covar_pop-5-22004d36f6f3770db284644317770fcd
@@ -1 +1 @@
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_pop-6-bc03cfbf7ae382ce707bf83e7fb2fb8b b/sql/hive/src/test/resources/golden/udaf_covar_pop-6-bc03cfbf7ae382ce707bf83e7fb2fb8b
index fe3a0735d98b8..7951defec192a 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_pop-6-bc03cfbf7ae382ce707bf83e7fb2fb8b
+++ b/sql/hive/src/test/resources/golden/udaf_covar_pop-6-bc03cfbf7ae382ce707bf83e7fb2fb8b
@@ -1 +1 @@
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_pop-7-37e59e993e08216e6c69f88d6ac673ae b/sql/hive/src/test/resources/golden/udaf_covar_pop-7-37e59e993e08216e6c69f88d6ac673ae
index 171538eb0b00f..ba66466c2a0d0 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_pop-7-37e59e993e08216e6c69f88d6ac673ae
+++ b/sql/hive/src/test/resources/golden/udaf_covar_pop-7-37e59e993e08216e6c69f88d6ac673ae
@@ -1 +1 @@
-0.0
\ No newline at end of file
+0.0
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_pop-8-1e51388408dad651127edf940c11d91f b/sql/hive/src/test/resources/golden/udaf_covar_pop-8-1e51388408dad651127edf940c11d91f
index 848e15bc61476..104018ecd43d6 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_pop-8-1e51388408dad651127edf940c11d91f
+++ b/sql/hive/src/test/resources/golden/udaf_covar_pop-8-1e51388408dad651127edf940c11d91f
@@ -3,4 +3,4 @@
 3	0.0
 4	0.0
 5	0.0
-6	0.0
\ No newline at end of file
+6	0.0
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_pop-9-b3cc8c5b5b384622e212dbaaf3f09623 b/sql/hive/src/test/resources/golden/udaf_covar_pop-9-b3cc8c5b5b384622e212dbaaf3f09623
index 1a49bf590b346..16f4e6bd601b6 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_pop-9-b3cc8c5b5b384622e212dbaaf3f09623
+++ b/sql/hive/src/test/resources/golden/udaf_covar_pop-9-b3cc8c5b5b384622e212dbaaf3f09623
@@ -1 +1 @@
-3.624999999999999
\ No newline at end of file
+3.624999999999999
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_samp-2-c6f2dc536bf105650a461816ae5e330 b/sql/hive/src/test/resources/golden/udaf_covar_samp-2-c6f2dc536bf105650a461816ae5e330
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-1-a8bc76559014d9cdf07184208d582d25 b/sql/hive/src/test/resources/golden/udaf_covar_samp-2-e886f45c8f085596ffd420f89cdc2909
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-1-a8bc76559014d9cdf07184208d582d25
rename to sql/hive/src/test/resources/golden/udaf_covar_samp-2-e886f45c8f085596ffd420f89cdc2909
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_samp-3-7aa25da7ccb88ba67b100888b6227aaf b/sql/hive/src/test/resources/golden/udaf_covar_samp-3-7aa25da7ccb88ba67b100888b6227aaf
index 97a97c8b5965a..b301d988192fd 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_samp-3-7aa25da7ccb88ba67b100888b6227aaf
+++ b/sql/hive/src/test/resources/golden/udaf_covar_samp-3-7aa25da7ccb88ba67b100888b6227aaf
@@ -1 +1 @@
-covar_samp(x,y) - Returns the sample covariance of a set of number pairs
\ No newline at end of file
+covar_samp(x,y) - Returns the sample covariance of a set of number pairs
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_samp-4-7e705a637950911e0a18059d8bf1fd2c b/sql/hive/src/test/resources/golden/udaf_covar_samp-4-7e705a637950911e0a18059d8bf1fd2c
index e666adba8df68..de059e0e79a27 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_samp-4-7e705a637950911e0a18059d8bf1fd2c
+++ b/sql/hive/src/test/resources/golden/udaf_covar_samp-4-7e705a637950911e0a18059d8bf1fd2c
@@ -3,4 +3,4 @@ The function takes as arguments any pair of numeric types and returns a double.
 Any pair with a NULL is ignored. If the function is applied to an empty set, NULL
 will be returned. Otherwise, it computes the following:
    (SUM(x*y)-SUM(x)*SUM(y)/COUNT(x,y))/(COUNT(x,y)-1)
-where neither x nor y is null.
\ No newline at end of file
+where neither x nor y is null.
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_samp-5-2cfd48c62fcfb58936ea1222ecf6a2b b/sql/hive/src/test/resources/golden/udaf_covar_samp-5-2cfd48c62fcfb58936ea1222ecf6a2b
index fe3a0735d98b8..7951defec192a 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_samp-5-2cfd48c62fcfb58936ea1222ecf6a2b
+++ b/sql/hive/src/test/resources/golden/udaf_covar_samp-5-2cfd48c62fcfb58936ea1222ecf6a2b
@@ -1 +1 @@
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_samp-6-8a701e7f4b9ce986f7923ae083bce0f1 b/sql/hive/src/test/resources/golden/udaf_covar_samp-6-8a701e7f4b9ce986f7923ae083bce0f1
index fe3a0735d98b8..7951defec192a 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_samp-6-8a701e7f4b9ce986f7923ae083bce0f1
+++ b/sql/hive/src/test/resources/golden/udaf_covar_samp-6-8a701e7f4b9ce986f7923ae083bce0f1
@@ -1 +1 @@
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_samp-7-2702986700ff9a1c962f8b3762c1b5f3 b/sql/hive/src/test/resources/golden/udaf_covar_samp-7-2702986700ff9a1c962f8b3762c1b5f3
index 171538eb0b00f..ba66466c2a0d0 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_samp-7-2702986700ff9a1c962f8b3762c1b5f3
+++ b/sql/hive/src/test/resources/golden/udaf_covar_samp-7-2702986700ff9a1c962f8b3762c1b5f3
@@ -1 +1 @@
-0.0
\ No newline at end of file
+0.0
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_samp-8-44861ae58cf0951eeda28a5f778f778a b/sql/hive/src/test/resources/golden/udaf_covar_samp-8-44861ae58cf0951eeda28a5f778f778a
index 848e15bc61476..104018ecd43d6 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_samp-8-44861ae58cf0951eeda28a5f778f778a
+++ b/sql/hive/src/test/resources/golden/udaf_covar_samp-8-44861ae58cf0951eeda28a5f778f778a
@@ -3,4 +3,4 @@
 3	0.0
 4	0.0
 5	0.0
-6	0.0
\ No newline at end of file
+6	0.0
diff --git a/sql/hive/src/test/resources/golden/udaf_covar_samp-9-234a5b02085d2228473d7ef15a6af683 b/sql/hive/src/test/resources/golden/udaf_covar_samp-9-234a5b02085d2228473d7ef15a6af683
index 30d98234a23e9..b8adc8f23da34 100644
--- a/sql/hive/src/test/resources/golden/udaf_covar_samp-9-234a5b02085d2228473d7ef15a6af683
+++ b/sql/hive/src/test/resources/golden/udaf_covar_samp-9-234a5b02085d2228473d7ef15a6af683
@@ -1 +1 @@
-4.833333333333332
\ No newline at end of file
+4.833333333333332
diff --git a/sql/hive/src/test/resources/golden/udaf_histogram_numeric-0-86b9fb8bef8a5c51077623f1db3a0251 b/sql/hive/src/test/resources/golden/udaf_histogram_numeric-0-86b9fb8bef8a5c51077623f1db3a0251
index 3e8bb17f24d6f..df3189a887974 100644
--- a/sql/hive/src/test/resources/golden/udaf_histogram_numeric-0-86b9fb8bef8a5c51077623f1db3a0251
+++ b/sql/hive/src/test/resources/golden/udaf_histogram_numeric-0-86b9fb8bef8a5c51077623f1db3a0251
@@ -1 +1 @@
-[{"x":135.0284552845532,"y":246.0},{"x":381.39370078740143,"y":254.0}]
\ No newline at end of file
+[{"x":135.0284552845532,"y":246.0},{"x":381.39370078740143,"y":254.0}]
diff --git a/sql/hive/src/test/resources/golden/udaf_histogram_numeric-1-5303011455a61171f7eb7eb4bd0ca2a3 b/sql/hive/src/test/resources/golden/udaf_histogram_numeric-1-5303011455a61171f7eb7eb4bd0ca2a3
index 048770d4c8402..b1f27ead33687 100644
--- a/sql/hive/src/test/resources/golden/udaf_histogram_numeric-1-5303011455a61171f7eb7eb4bd0ca2a3
+++ b/sql/hive/src/test/resources/golden/udaf_histogram_numeric-1-5303011455a61171f7eb7eb4bd0ca2a3
@@ -1 +1 @@
-[{"x":96.7349397590361,"y":166.0},{"x":257.14970059880255,"y":167.0},{"x":425.6826347305388,"y":167.0}]
\ No newline at end of file
+[{"x":96.7349397590361,"y":166.0},{"x":257.14970059880255,"y":167.0},{"x":425.6826347305388,"y":167.0}]
diff --git a/sql/hive/src/test/resources/golden/udaf_histogram_numeric-2-b3b431c36154a1ae022bf01d55a6ecb1 b/sql/hive/src/test/resources/golden/udaf_histogram_numeric-2-b3b431c36154a1ae022bf01d55a6ecb1
index 8b54db8da7167..78a46aac82c11 100644
--- a/sql/hive/src/test/resources/golden/udaf_histogram_numeric-2-b3b431c36154a1ae022bf01d55a6ecb1
+++ b/sql/hive/src/test/resources/golden/udaf_histogram_numeric-2-b3b431c36154a1ae022bf01d55a6ecb1
@@ -1 +1 @@
-[{"x":9.761904761904763,"y":21.0},{"x":33.84210526315789,"y":19.0},{"x":62.75000000000001,"y":20.0},{"x":90.90322580645162,"y":31.0},{"x":122.91666666666667,"y":24.0},{"x":146.33333333333334,"y":21.0},{"x":170.70967741935485,"y":31.0},{"x":194.3571428571428,"y":28.0},{"x":214.84615384615384,"y":26.0},{"x":235.08695652173907,"y":23.0},{"x":257.80000000000007,"y":15.0},{"x":281.0333333333333,"y":30.0},{"x":298.0,"y":1.0},{"x":313.0000000000001,"y":29.0},{"x":339.5925925925926,"y":27.0},{"x":372.49999999999983,"y":24.0},{"x":402.23684210526324,"y":38.0},{"x":430.6896551724138,"y":29.0},{"x":462.32352941176464,"y":34.0},{"x":487.72413793103453,"y":29.0}]
\ No newline at end of file
+[{"x":9.761904761904763,"y":21.0},{"x":33.84210526315789,"y":19.0},{"x":62.75000000000001,"y":20.0},{"x":90.90322580645162,"y":31.0},{"x":122.91666666666667,"y":24.0},{"x":146.33333333333334,"y":21.0},{"x":170.70967741935485,"y":31.0},{"x":194.3571428571428,"y":28.0},{"x":214.84615384615384,"y":26.0},{"x":235.08695652173907,"y":23.0},{"x":257.80000000000007,"y":15.0},{"x":281.0333333333333,"y":30.0},{"x":298.0,"y":1.0},{"x":313.0000000000001,"y":29.0},{"x":339.5925925925926,"y":27.0},{"x":372.49999999999983,"y":24.0},{"x":402.23684210526324,"y":38.0},{"x":430.6896551724138,"y":29.0},{"x":462.32352941176464,"y":34.0},{"x":487.72413793103453,"y":29.0}]
diff --git a/sql/hive/src/test/resources/golden/udaf_histogram_numeric-3-ff41f4450d6ae372633fde865ae187c6 b/sql/hive/src/test/resources/golden/udaf_histogram_numeric-3-ff41f4450d6ae372633fde865ae187c6
index aed3f1e704750..4f7995f874388 100644
--- a/sql/hive/src/test/resources/golden/udaf_histogram_numeric-3-ff41f4450d6ae372633fde865ae187c6
+++ b/sql/hive/src/test/resources/golden/udaf_histogram_numeric-3-ff41f4450d6ae372633fde865ae187c6
@@ -1 +1 @@
-[{"x":0.0,"y":3.0},{"x":2.0,"y":1.0},{"x":4.75,"y":4.0},{"x":8.0,"y":1.0},{"x":9.5,"y":2.0},{"x":11.666666666666666,"y":3.0},{"x":15.0,"y":2.0},{"x":17.666666666666664,"y":3.0},{"x":19.5,"y":2.0},{"x":24.0,"y":2.0},{"x":26.333333333333336,"y":3.0},{"x":28.0,"y":1.0},{"x":30.0,"y":1.0},{"x":33.0,"y":1.0},{"x":34.75,"y":4.0},{"x":37.0,"y":2.0},{"x":41.666666666666664,"y":3.0},{"x":43.5,"y":2.0},{"x":47.0,"y":1.0},{"x":51.0,"y":2.0},{"x":53.5,"y":2.0},{"x":57.666666666666664,"y":3.0},{"x":64.5,"y":2.0},{"x":66.66666666666666,"y":3.0},{"x":69.75,"y":4.0},{"x":72.0,"y":2.0},{"x":74.0,"y":1.0},{"x":76.33333333333333,"y":3.0},{"x":78.0,"y":1.0},{"x":80.0,"y":1.0},{"x":82.0,"y":1.0},{"x":83.5,"y":4.0},{"x":85.5,"y":2.0},{"x":87.0,"y":1.0},{"x":90.0,"y":3.0},{"x":92.0,"y":1.0},{"x":95.33333333333333,"y":3.0},{"x":97.5,"y":4.0},{"x":100.0,"y":2.0},{"x":103.5,"y":4.0},{"x":105.0,"y":1.0},{"x":111.0,"y":1.0},{"x":113.33333333333333,"y":3.0},{"x":116.0,"y":1.0},{"x":118.0,"y":2.0},{"x":119.4,"y":5.0},{"x":125.33333333333333,"y":3.0},{"x":128.4,"y":5.0},{"x":131.0,"y":1.0},{"x":133.66666666666666,"y":3.0},{"x":136.66666666666666,"y":3.0},{"x":138.0,"y":4.0},{"x":143.0,"y":1.0},{"x":145.66666666666666,"y":3.0},{"x":149.33333333333331,"y":3.0},{"x":152.33333333333334,"y":3.0},{"x":155.5,"y":2.0},{"x":157.5,"y":2.0},{"x":160.0,"y":1.0},{"x":162.5,"y":2.0},{"x":164.5,"y":4.0},{"x":166.75,"y":4.0},{"x":168.8,"y":5.0},{"x":170.0,"y":1.0},{"x":172.0,"y":2.0},{"x":174.5,"y":4.0},{"x":176.33333333333331,"y":3.0},{"x":178.0,"y":1.0},{"x":179.33333333333331,"y":3.0},{"x":181.0,"y":1.0},{"x":183.0,"y":1.0},{"x":186.75,"y":4.0},{"x":189.0,"y":1.0},{"x":190.66666666666666,"y":3.0},{"x":192.75,"y":4.0},{"x":194.0,"y":1.0},{"x":195.33333333333331,"y":3.0},{"x":197.0,"y":2.0},{"x":199.4,"y":5.0},{"x":201.0,"y":1.0},{"x":202.66666666666669,"y":3.0},{"x":205.0,"y":2.0},{"x":207.0,"y":2.0},{"x":208.40000000000003,"y":5.0},{"x":213.33333333333331,"y":3.0},{"x":216.0,"y":2.0},{"x":217.33333333333331,"y":3.0},{"x":219.0,"y":2.0},{"x":221.33333333333331,"y":3.0},{"x":223.5,"y":4.0},{"x":226.0,"y":1.0},{"x":228.66666666666663,"y":3.0},{"x":230.0,"y":5.0},{"x":233.0,"y":2.0},{"x":235.0,"y":1.0},{"x":237.5,"y":4.0},{"x":239.0,"y":2.0},{"x":241.66666666666669,"y":3.0},{"x":244.0,"y":1.0},{"x":247.5,"y":2.0},{"x":249.0,"y":1.0},{"x":252.0,"y":1.0},{"x":255.5,"y":4.0},{"x":257.5,"y":2.0},{"x":260.0,"y":1.0},{"x":262.5,"y":2.0},{"x":265.3333333333333,"y":3.0},{"x":272.6,"y":5.0},{"x":274.5,"y":2.0},{"x":277.3333333333333,"y":6.0},{"x":280.0,"y":2.0},{"x":281.5,"y":4.0},{"x":283.5,"y":2.0},{"x":285.0,"y":1.0},{"x":286.5,"y":2.0},{"x":288.3333333333333,"y":3.0},{"x":291.5,"y":2.0},{"x":296.0,"y":1.0},{"x":298.0,"y":3.0},{"x":302.0,"y":1.0},{"x":305.5,"y":2.0},{"x":307.3333333333333,"y":3.0},{"x":309.0,"y":2.0},{"x":310.75,"y":4.0},{"x":315.75,"y":4.0},{"x":317.6,"y":5.0},{"x":321.5,"y":4.0},{"x":323.0,"y":1.0},{"x":325.0,"y":2.0},{"x":327.0,"y":3.0},{"x":331.3333333333333,"y":3.0},{"x":333.0,"y":2.0},{"x":335.5,"y":2.0},{"x":338.5,"y":2.0},{"x":341.66666666666663,"y":3.0},{"x":344.3333333333333,"y":3.0},{"x":348.0,"y":5.0},{"x":351.0,"y":1.0},{"x":353.0,"y":2.0},{"x":356.0,"y":1.0},{"x":360.0,"y":1.0},{"x":362.0,"y":1.0},{"x":364.5,"y":2.0},{"x":366.66666666666663,"y":3.0},{"x":368.75,"y":4.0},{"x":373.5,"y":2.0},{"x":375.0,"y":1.0},{"x":377.5,"y":2.0},{"x":379.0,"y":1.0},{"x":382.0,"y":2.0},{"x":384.0,"y":3.0},{"x":386.0,"y":1.0},{"x":389.0,"y":1.0},{"x":392.0,"y":1.0},{"x":393.5,"y":2.0},{"x":395.6,"y":5.0},{"x":397.0,"y":2.0},{"x":399.0,"y":2.0},{"x":400.0,"y":1.0},{"x":401.16666666666663,"y":6.0},{"x":403.40000000000003,"y":5.0},{"x":406.20000000000005,"y":5.0},{"x":409.0,"y":3.0},{"x":411.0,"y":1.0},{"x":413.5,"y":4.0},{"x":417.0,"y":3.0},{"x":418.5,"y":2.0},{"x":421.0,"y":1.0},{"x":424.0,"y":2.0},{"x":427.0,"y":1.0},{"x":429.6,"y":5.0},{"x":431.25,"y":4.0},{"x":435.5,"y":2.0},{"x":437.75,"y":4.0},{"x":439.0,"y":2.0},{"x":443.5,"y":2.0},{"x":446.0,"y":1.0},{"x":448.5,"y":2.0},{"x":452.5,"y":2.0},{"x":454.24999999999994,"y":4.0},{"x":457.66666666666663,"y":3.0},{"x":459.33333333333337,"y":3.0},{"x":462.5,"y":4.0},{"x":466.0,"y":3.0},{"x":467.80000000000007,"y":5.0},{"x":469.16666666666663,"y":6.0},{"x":472.0,"y":1.0},{"x":475.0,"y":1.0},{"x":477.0,"y":1.0},{"x":478.33333333333326,"y":3.0},{"x":480.25,"y":4.0},{"x":482.5,"y":2.0},{"x":484.5,"y":2.0},{"x":487.0,"y":1.0},{"x":489.2,"y":5.0},{"x":491.66666666666663,"y":3.0},{"x":493.0,"y":1.0},{"x":494.5,"y":2.0},{"x":496.0,"y":1.0},{"x":497.75,"y":4.0}]
\ No newline at end of file
+[{"x":0.0,"y":3.0},{"x":2.0,"y":1.0},{"x":4.75,"y":4.0},{"x":8.0,"y":1.0},{"x":9.5,"y":2.0},{"x":11.666666666666666,"y":3.0},{"x":15.0,"y":2.0},{"x":17.666666666666664,"y":3.0},{"x":19.5,"y":2.0},{"x":24.0,"y":2.0},{"x":26.333333333333336,"y":3.0},{"x":28.0,"y":1.0},{"x":30.0,"y":1.0},{"x":33.0,"y":1.0},{"x":34.75,"y":4.0},{"x":37.0,"y":2.0},{"x":41.666666666666664,"y":3.0},{"x":43.5,"y":2.0},{"x":47.0,"y":1.0},{"x":51.0,"y":2.0},{"x":53.5,"y":2.0},{"x":57.666666666666664,"y":3.0},{"x":64.5,"y":2.0},{"x":66.66666666666666,"y":3.0},{"x":69.75,"y":4.0},{"x":72.0,"y":2.0},{"x":74.0,"y":1.0},{"x":76.33333333333333,"y":3.0},{"x":78.0,"y":1.0},{"x":80.0,"y":1.0},{"x":82.0,"y":1.0},{"x":83.5,"y":4.0},{"x":85.5,"y":2.0},{"x":87.0,"y":1.0},{"x":90.0,"y":3.0},{"x":92.0,"y":1.0},{"x":95.33333333333333,"y":3.0},{"x":97.5,"y":4.0},{"x":100.0,"y":2.0},{"x":103.5,"y":4.0},{"x":105.0,"y":1.0},{"x":111.0,"y":1.0},{"x":113.33333333333333,"y":3.0},{"x":116.0,"y":1.0},{"x":118.0,"y":2.0},{"x":119.4,"y":5.0},{"x":125.33333333333333,"y":3.0},{"x":128.4,"y":5.0},{"x":131.0,"y":1.0},{"x":133.66666666666666,"y":3.0},{"x":136.66666666666666,"y":3.0},{"x":138.0,"y":4.0},{"x":143.0,"y":1.0},{"x":145.66666666666666,"y":3.0},{"x":149.33333333333331,"y":3.0},{"x":152.33333333333334,"y":3.0},{"x":155.5,"y":2.0},{"x":157.5,"y":2.0},{"x":160.0,"y":1.0},{"x":162.5,"y":2.0},{"x":164.5,"y":4.0},{"x":166.75,"y":4.0},{"x":168.8,"y":5.0},{"x":170.0,"y":1.0},{"x":172.0,"y":2.0},{"x":174.5,"y":4.0},{"x":176.33333333333331,"y":3.0},{"x":178.0,"y":1.0},{"x":179.33333333333331,"y":3.0},{"x":181.0,"y":1.0},{"x":183.0,"y":1.0},{"x":186.75,"y":4.0},{"x":189.0,"y":1.0},{"x":190.66666666666666,"y":3.0},{"x":192.75,"y":4.0},{"x":194.0,"y":1.0},{"x":195.33333333333331,"y":3.0},{"x":197.0,"y":2.0},{"x":199.4,"y":5.0},{"x":201.0,"y":1.0},{"x":202.66666666666669,"y":3.0},{"x":205.0,"y":2.0},{"x":207.0,"y":2.0},{"x":208.40000000000003,"y":5.0},{"x":213.33333333333331,"y":3.0},{"x":216.0,"y":2.0},{"x":217.33333333333331,"y":3.0},{"x":219.0,"y":2.0},{"x":221.33333333333331,"y":3.0},{"x":223.5,"y":4.0},{"x":226.0,"y":1.0},{"x":228.66666666666663,"y":3.0},{"x":230.0,"y":5.0},{"x":233.0,"y":2.0},{"x":235.0,"y":1.0},{"x":237.5,"y":4.0},{"x":239.0,"y":2.0},{"x":241.66666666666669,"y":3.0},{"x":244.0,"y":1.0},{"x":247.5,"y":2.0},{"x":249.0,"y":1.0},{"x":252.0,"y":1.0},{"x":255.5,"y":4.0},{"x":257.5,"y":2.0},{"x":260.0,"y":1.0},{"x":262.5,"y":2.0},{"x":265.3333333333333,"y":3.0},{"x":272.6,"y":5.0},{"x":274.5,"y":2.0},{"x":277.3333333333333,"y":6.0},{"x":280.0,"y":2.0},{"x":281.5,"y":4.0},{"x":283.5,"y":2.0},{"x":285.0,"y":1.0},{"x":286.5,"y":2.0},{"x":288.3333333333333,"y":3.0},{"x":291.5,"y":2.0},{"x":296.0,"y":1.0},{"x":298.0,"y":3.0},{"x":302.0,"y":1.0},{"x":305.5,"y":2.0},{"x":307.3333333333333,"y":3.0},{"x":309.0,"y":2.0},{"x":310.75,"y":4.0},{"x":315.75,"y":4.0},{"x":317.6,"y":5.0},{"x":321.5,"y":4.0},{"x":323.0,"y":1.0},{"x":325.0,"y":2.0},{"x":327.0,"y":3.0},{"x":331.3333333333333,"y":3.0},{"x":333.0,"y":2.0},{"x":335.5,"y":2.0},{"x":338.5,"y":2.0},{"x":341.66666666666663,"y":3.0},{"x":344.3333333333333,"y":3.0},{"x":348.0,"y":5.0},{"x":351.0,"y":1.0},{"x":353.0,"y":2.0},{"x":356.0,"y":1.0},{"x":360.0,"y":1.0},{"x":362.0,"y":1.0},{"x":364.5,"y":2.0},{"x":366.66666666666663,"y":3.0},{"x":368.75,"y":4.0},{"x":373.5,"y":2.0},{"x":375.0,"y":1.0},{"x":377.5,"y":2.0},{"x":379.0,"y":1.0},{"x":382.0,"y":2.0},{"x":384.0,"y":3.0},{"x":386.0,"y":1.0},{"x":389.0,"y":1.0},{"x":392.0,"y":1.0},{"x":393.5,"y":2.0},{"x":395.6,"y":5.0},{"x":397.0,"y":2.0},{"x":399.0,"y":2.0},{"x":400.0,"y":1.0},{"x":401.16666666666663,"y":6.0},{"x":403.40000000000003,"y":5.0},{"x":406.20000000000005,"y":5.0},{"x":409.0,"y":3.0},{"x":411.0,"y":1.0},{"x":413.5,"y":4.0},{"x":417.0,"y":3.0},{"x":418.5,"y":2.0},{"x":421.0,"y":1.0},{"x":424.0,"y":2.0},{"x":427.0,"y":1.0},{"x":429.6,"y":5.0},{"x":431.25,"y":4.0},{"x":435.5,"y":2.0},{"x":437.75,"y":4.0},{"x":439.0,"y":2.0},{"x":443.5,"y":2.0},{"x":446.0,"y":1.0},{"x":448.5,"y":2.0},{"x":452.5,"y":2.0},{"x":454.24999999999994,"y":4.0},{"x":457.66666666666663,"y":3.0},{"x":459.33333333333337,"y":3.0},{"x":462.5,"y":4.0},{"x":466.0,"y":3.0},{"x":467.80000000000007,"y":5.0},{"x":469.16666666666663,"y":6.0},{"x":472.0,"y":1.0},{"x":475.0,"y":1.0},{"x":477.0,"y":1.0},{"x":478.33333333333326,"y":3.0},{"x":480.25,"y":4.0},{"x":482.5,"y":2.0},{"x":484.5,"y":2.0},{"x":487.0,"y":1.0},{"x":489.2,"y":5.0},{"x":491.66666666666663,"y":3.0},{"x":493.0,"y":1.0},{"x":494.5,"y":2.0},{"x":496.0,"y":1.0},{"x":497.75,"y":4.0}]
diff --git a/sql/hive/src/test/resources/golden/udaf_number_format-0-eff4ef3c207d14d5121368f294697964 b/sql/hive/src/test/resources/golden/udaf_number_format-0-eff4ef3c207d14d5121368f294697964
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_number_format-1-4a03c4328565c60ca99689239f07fb16 b/sql/hive/src/test/resources/golden/udaf_number_format-1-4a03c4328565c60ca99689239f07fb16
deleted file mode 100644
index 2953abcf1e644..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_number_format-1-4a03c4328565c60ca99689239f07fb16
+++ /dev/null
@@ -1 +0,0 @@
-0.0	NULL	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-0-9ce9365f7b3f35a10b5305251c3e81ac b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-0-9ce9365f7b3f35a10b5305251c3e81ac
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-1-c7d32089880679d178dea94f1fe118e6 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-1-c7d32089880679d178dea94f1fe118e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-10-b7e588217a3cd184dbbb8d419d3e33ae b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-10-b7e588217a3cd184dbbb8d419d3e33ae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-11-5034ec132cb8b0a6bd6357a7e1abd755 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-11-5034ec132cb8b0a6bd6357a7e1abd755
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-12-914ba18b45a27894bd82302f07efc789 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-12-914ba18b45a27894bd82302f07efc789
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-13-4bd5703fa32f3283f38841acadc97adb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-13-4bd5703fa32f3283f38841acadc97adb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-14-d861a06b90896a097901d64ab9fbec53 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-14-d861a06b90896a097901d64ab9fbec53
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-15-ca796efecd0d064e9e688a17ce75d80f b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-15-ca796efecd0d064e9e688a17ce75d80f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-16-c838e13d9aafe1212a76d2cf5fe085a0 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-16-c838e13d9aafe1212a76d2cf5fe085a0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-17-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-17-b89ea2173180c8ae423d856f943e061f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-17-b89ea2173180c8ae423d856f943e061f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-18-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-18-67e864faaff4c6b2a8e1c9fbd188bb66
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-18-67e864faaff4c6b2a8e1c9fbd188bb66
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-19-b931aec4add0a66c23e444cdd5c33c5 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-19-b931aec4add0a66c23e444cdd5c33c5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-2-ac53a7ba5e8a208255008d3a71fa321a b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-2-ac53a7ba5e8a208255008d3a71fa321a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-20-6f85afbfa98a19d78ab7fd9d46ed3c0c b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-20-6f85afbfa98a19d78ab7fd9d46ed3c0c
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-20-6f85afbfa98a19d78ab7fd9d46ed3c0c
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-21-3cd4e1282d82d07785051a1cf0e9b4ff b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-21-3cd4e1282d82d07785051a1cf0e9b4ff
deleted file mode 100644
index 17c47d308f029..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-21-3cd4e1282d82d07785051a1cf0e9b4ff
+++ /dev/null
@@ -1 +0,0 @@
-254.08333333333334
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-22-ed1aec1a908310db90c5f8667631a1df b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-22-ed1aec1a908310db90c5f8667631a1df
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-22-ed1aec1a908310db90c5f8667631a1df
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-23-333d72e8bce6d11a35fc7a30418f225b b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-23-333d72e8bce6d11a35fc7a30418f225b
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-23-333d72e8bce6d11a35fc7a30418f225b
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-24-61903781f5cd75e6f11d85e7e89c1cb3 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-24-61903781f5cd75e6f11d85e7e89c1cb3
deleted file mode 100644
index 17c47d308f029..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-24-61903781f5cd75e6f11d85e7e89c1cb3
+++ /dev/null
@@ -1 +0,0 @@
-254.08333333333334
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-25-15f40568b41c4505841f5ad13c526f51 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-25-15f40568b41c4505841f5ad13c526f51
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-25-15f40568b41c4505841f5ad13c526f51
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-26-d1bc3b9a74fbf2ad41ddcd845ca9f0fb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-26-d1bc3b9a74fbf2ad41ddcd845ca9f0fb
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-26-d1bc3b9a74fbf2ad41ddcd845ca9f0fb
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-27-ee4c065e8557632a9ee348dd9223c3a1 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-27-ee4c065e8557632a9ee348dd9223c3a1
deleted file mode 100644
index 014c315649096..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-27-ee4c065e8557632a9ee348dd9223c3a1
+++ /dev/null
@@ -1 +0,0 @@
-[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-28-ba77d1a26f87385f046129b6eb7d2ec3 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-28-ba77d1a26f87385f046129b6eb7d2ec3
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-28-ba77d1a26f87385f046129b6eb7d2ec3
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-29-956d6b6bc69c8035f80de2e60eda65fb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-29-956d6b6bc69c8035f80de2e60eda65fb
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-29-956d6b6bc69c8035f80de2e60eda65fb
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-3-1dae5b2a11507c83b0f76e677a368712 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-3-1dae5b2a11507c83b0f76e677a368712
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-30-d196cc7f52bb6ae19a5e66eb2a99577c b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-30-d196cc7f52bb6ae19a5e66eb2a99577c
deleted file mode 100644
index 014c315649096..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-30-d196cc7f52bb6ae19a5e66eb2a99577c
+++ /dev/null
@@ -1 +0,0 @@
-[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-31-fe2c6a36a769f9f88a0ac9be1a4f0c28 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-31-fe2c6a36a769f9f88a0ac9be1a4f0c28
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-31-fe2c6a36a769f9f88a0ac9be1a4f0c28
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-32-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-32-dbcec232623048c7748b708123e18bf0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-32-dbcec232623048c7748b708123e18bf0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-33-f28c7b0408737da815493741c806ff80 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-33-f28c7b0408737da815493741c806ff80
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-34-6f85afbfa98a19d78ab7fd9d46ed3c0c b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-34-6f85afbfa98a19d78ab7fd9d46ed3c0c
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-34-6f85afbfa98a19d78ab7fd9d46ed3c0c
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-35-3cd4e1282d82d07785051a1cf0e9b4ff b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-35-3cd4e1282d82d07785051a1cf0e9b4ff
deleted file mode 100644
index 17c47d308f029..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-35-3cd4e1282d82d07785051a1cf0e9b4ff
+++ /dev/null
@@ -1 +0,0 @@
-254.08333333333334
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-36-ed1aec1a908310db90c5f8667631a1df b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-36-ed1aec1a908310db90c5f8667631a1df
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-36-ed1aec1a908310db90c5f8667631a1df
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-37-333d72e8bce6d11a35fc7a30418f225b b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-37-333d72e8bce6d11a35fc7a30418f225b
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-37-333d72e8bce6d11a35fc7a30418f225b
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-38-61903781f5cd75e6f11d85e7e89c1cb3 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-38-61903781f5cd75e6f11d85e7e89c1cb3
deleted file mode 100644
index 17c47d308f029..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-38-61903781f5cd75e6f11d85e7e89c1cb3
+++ /dev/null
@@ -1 +0,0 @@
-254.08333333333334
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-39-15f40568b41c4505841f5ad13c526f51 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-39-15f40568b41c4505841f5ad13c526f51
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-39-15f40568b41c4505841f5ad13c526f51
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-4-b2e21ffa55342d4f3c243728dfe6b11f b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-4-b2e21ffa55342d4f3c243728dfe6b11f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-40-d1bc3b9a74fbf2ad41ddcd845ca9f0fb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-40-d1bc3b9a74fbf2ad41ddcd845ca9f0fb
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-40-d1bc3b9a74fbf2ad41ddcd845ca9f0fb
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-41-ee4c065e8557632a9ee348dd9223c3a1 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-41-ee4c065e8557632a9ee348dd9223c3a1
deleted file mode 100644
index 014c315649096..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-41-ee4c065e8557632a9ee348dd9223c3a1
+++ /dev/null
@@ -1 +0,0 @@
-[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-42-ba77d1a26f87385f046129b6eb7d2ec3 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-42-ba77d1a26f87385f046129b6eb7d2ec3
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-42-ba77d1a26f87385f046129b6eb7d2ec3
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-43-956d6b6bc69c8035f80de2e60eda65fb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-43-956d6b6bc69c8035f80de2e60eda65fb
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-43-956d6b6bc69c8035f80de2e60eda65fb
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-44-d196cc7f52bb6ae19a5e66eb2a99577c b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-44-d196cc7f52bb6ae19a5e66eb2a99577c
deleted file mode 100644
index 014c315649096..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-44-d196cc7f52bb6ae19a5e66eb2a99577c
+++ /dev/null
@@ -1 +0,0 @@
-[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-45-fe2c6a36a769f9f88a0ac9be1a4f0c28 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-45-fe2c6a36a769f9f88a0ac9be1a4f0c28
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-45-fe2c6a36a769f9f88a0ac9be1a4f0c28
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-5-8ae1465266d28bc2e5da8d89617873c4 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-5-8ae1465266d28bc2e5da8d89617873c4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-6-69cdebe8b2d4d2bbf2eef64a8c789596 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-6-69cdebe8b2d4d2bbf2eef64a8c789596
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-7-ab438ea40bc5dddf76fd0a7a2529b8f7 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-7-ab438ea40bc5dddf76fd0a7a2529b8f7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-8-7e3cf228c457279965b7414bd05527f b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-8-7e3cf228c457279965b7414bd05527f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-9-5aea8aa95a85c46284f7c1f45978a228 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_20-9-5aea8aa95a85c46284f7c1f45978a228
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-0-477a942be95c0616c72f02a0077f9ace b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-0-477a942be95c0616c72f02a0077f9ace
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-1-c7d32089880679d178dea94f1fe118e6 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-1-c7d32089880679d178dea94f1fe118e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-10-b7e588217a3cd184dbbb8d419d3e33ae b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-10-b7e588217a3cd184dbbb8d419d3e33ae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-11-5034ec132cb8b0a6bd6357a7e1abd755 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-11-5034ec132cb8b0a6bd6357a7e1abd755
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-12-914ba18b45a27894bd82302f07efc789 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-12-914ba18b45a27894bd82302f07efc789
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-13-4bd5703fa32f3283f38841acadc97adb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-13-4bd5703fa32f3283f38841acadc97adb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-14-d861a06b90896a097901d64ab9fbec53 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-14-d861a06b90896a097901d64ab9fbec53
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-15-ca796efecd0d064e9e688a17ce75d80f b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-15-ca796efecd0d064e9e688a17ce75d80f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-16-c838e13d9aafe1212a76d2cf5fe085a0 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-16-c838e13d9aafe1212a76d2cf5fe085a0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-17-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-17-b89ea2173180c8ae423d856f943e061f
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-17-b89ea2173180c8ae423d856f943e061f
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-18-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-18-67e864faaff4c6b2a8e1c9fbd188bb66
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-18-67e864faaff4c6b2a8e1c9fbd188bb66
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-19-b931aec4add0a66c23e444cdd5c33c5 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-19-b931aec4add0a66c23e444cdd5c33c5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-2-ac53a7ba5e8a208255008d3a71fa321a b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-2-ac53a7ba5e8a208255008d3a71fa321a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-20-6f85afbfa98a19d78ab7fd9d46ed3c0c b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-20-6f85afbfa98a19d78ab7fd9d46ed3c0c
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-20-6f85afbfa98a19d78ab7fd9d46ed3c0c
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-21-3cd4e1282d82d07785051a1cf0e9b4ff b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-21-3cd4e1282d82d07785051a1cf0e9b4ff
deleted file mode 100644
index 17c47d308f029..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-21-3cd4e1282d82d07785051a1cf0e9b4ff
+++ /dev/null
@@ -1 +0,0 @@
-254.08333333333334
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-22-ed1aec1a908310db90c5f8667631a1df b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-22-ed1aec1a908310db90c5f8667631a1df
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-22-ed1aec1a908310db90c5f8667631a1df
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-23-333d72e8bce6d11a35fc7a30418f225b b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-23-333d72e8bce6d11a35fc7a30418f225b
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-23-333d72e8bce6d11a35fc7a30418f225b
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-24-61903781f5cd75e6f11d85e7e89c1cb3 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-24-61903781f5cd75e6f11d85e7e89c1cb3
deleted file mode 100644
index 17c47d308f029..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-24-61903781f5cd75e6f11d85e7e89c1cb3
+++ /dev/null
@@ -1 +0,0 @@
-254.08333333333334
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-25-15f40568b41c4505841f5ad13c526f51 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-25-15f40568b41c4505841f5ad13c526f51
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-25-15f40568b41c4505841f5ad13c526f51
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-26-d1bc3b9a74fbf2ad41ddcd845ca9f0fb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-26-d1bc3b9a74fbf2ad41ddcd845ca9f0fb
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-26-d1bc3b9a74fbf2ad41ddcd845ca9f0fb
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-27-ee4c065e8557632a9ee348dd9223c3a1 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-27-ee4c065e8557632a9ee348dd9223c3a1
deleted file mode 100644
index 014c315649096..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-27-ee4c065e8557632a9ee348dd9223c3a1
+++ /dev/null
@@ -1 +0,0 @@
-[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-28-ba77d1a26f87385f046129b6eb7d2ec3 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-28-ba77d1a26f87385f046129b6eb7d2ec3
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-28-ba77d1a26f87385f046129b6eb7d2ec3
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-29-956d6b6bc69c8035f80de2e60eda65fb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-29-956d6b6bc69c8035f80de2e60eda65fb
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-29-956d6b6bc69c8035f80de2e60eda65fb
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-3-1dae5b2a11507c83b0f76e677a368712 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-3-1dae5b2a11507c83b0f76e677a368712
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-30-d196cc7f52bb6ae19a5e66eb2a99577c b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-30-d196cc7f52bb6ae19a5e66eb2a99577c
deleted file mode 100644
index 014c315649096..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-30-d196cc7f52bb6ae19a5e66eb2a99577c
+++ /dev/null
@@ -1 +0,0 @@
-[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-31-fe2c6a36a769f9f88a0ac9be1a4f0c28 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-31-fe2c6a36a769f9f88a0ac9be1a4f0c28
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-31-fe2c6a36a769f9f88a0ac9be1a4f0c28
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-32-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-32-dbcec232623048c7748b708123e18bf0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-32-dbcec232623048c7748b708123e18bf0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-33-f28c7b0408737da815493741c806ff80 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-33-f28c7b0408737da815493741c806ff80
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-34-6f85afbfa98a19d78ab7fd9d46ed3c0c b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-34-6f85afbfa98a19d78ab7fd9d46ed3c0c
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-34-6f85afbfa98a19d78ab7fd9d46ed3c0c
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-35-3cd4e1282d82d07785051a1cf0e9b4ff b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-35-3cd4e1282d82d07785051a1cf0e9b4ff
deleted file mode 100644
index 17c47d308f029..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-35-3cd4e1282d82d07785051a1cf0e9b4ff
+++ /dev/null
@@ -1 +0,0 @@
-254.08333333333334
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-36-ed1aec1a908310db90c5f8667631a1df b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-36-ed1aec1a908310db90c5f8667631a1df
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-36-ed1aec1a908310db90c5f8667631a1df
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-37-333d72e8bce6d11a35fc7a30418f225b b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-37-333d72e8bce6d11a35fc7a30418f225b
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-37-333d72e8bce6d11a35fc7a30418f225b
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-38-61903781f5cd75e6f11d85e7e89c1cb3 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-38-61903781f5cd75e6f11d85e7e89c1cb3
deleted file mode 100644
index 17c47d308f029..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-38-61903781f5cd75e6f11d85e7e89c1cb3
+++ /dev/null
@@ -1 +0,0 @@
-254.08333333333334
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-39-15f40568b41c4505841f5ad13c526f51 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-39-15f40568b41c4505841f5ad13c526f51
deleted file mode 100644
index 60cbe79310729..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-39-15f40568b41c4505841f5ad13c526f51
+++ /dev/null
@@ -1 +0,0 @@
-255.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-4-b2e21ffa55342d4f3c243728dfe6b11f b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-4-b2e21ffa55342d4f3c243728dfe6b11f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-40-d1bc3b9a74fbf2ad41ddcd845ca9f0fb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-40-d1bc3b9a74fbf2ad41ddcd845ca9f0fb
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-40-d1bc3b9a74fbf2ad41ddcd845ca9f0fb
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-41-ee4c065e8557632a9ee348dd9223c3a1 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-41-ee4c065e8557632a9ee348dd9223c3a1
deleted file mode 100644
index 014c315649096..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-41-ee4c065e8557632a9ee348dd9223c3a1
+++ /dev/null
@@ -1 +0,0 @@
-[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-42-ba77d1a26f87385f046129b6eb7d2ec3 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-42-ba77d1a26f87385f046129b6eb7d2ec3
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-42-ba77d1a26f87385f046129b6eb7d2ec3
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-43-956d6b6bc69c8035f80de2e60eda65fb b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-43-956d6b6bc69c8035f80de2e60eda65fb
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-43-956d6b6bc69c8035f80de2e60eda65fb
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-44-d196cc7f52bb6ae19a5e66eb2a99577c b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-44-d196cc7f52bb6ae19a5e66eb2a99577c
deleted file mode 100644
index 014c315649096..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-44-d196cc7f52bb6ae19a5e66eb2a99577c
+++ /dev/null
@@ -1 +0,0 @@
-[23.355555555555558,254.08333333333334,476.5612244897959,489.50000000000006]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-45-fe2c6a36a769f9f88a0ac9be1a4f0c28 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-45-fe2c6a36a769f9f88a0ac9be1a4f0c28
deleted file mode 100644
index a8986e32ff75a..0000000000000
--- a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-45-fe2c6a36a769f9f88a0ac9be1a4f0c28
+++ /dev/null
@@ -1 +0,0 @@
-[26.0,255.5,479.0,491.0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-5-8ae1465266d28bc2e5da8d89617873c4 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-5-8ae1465266d28bc2e5da8d89617873c4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-6-69cdebe8b2d4d2bbf2eef64a8c789596 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-6-69cdebe8b2d4d2bbf2eef64a8c789596
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-7-ab438ea40bc5dddf76fd0a7a2529b8f7 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-7-ab438ea40bc5dddf76fd0a7a2529b8f7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-8-7e3cf228c457279965b7414bd05527f b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-8-7e3cf228c457279965b7414bd05527f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-9-5aea8aa95a85c46284f7c1f45978a228 b/sql/hive/src/test/resources/golden/udaf_percentile_approx_23-9-5aea8aa95a85c46284f7c1f45978a228
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf1-0-7a6f90d9c0931145bee4fe4f5caa0859 b/sql/hive/src/test/resources/golden/udf1-0-7a6f90d9c0931145bee4fe4f5caa0859
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf1-1-8281592c818ada269024ac669bec78da b/sql/hive/src/test/resources/golden/udf1-1-8281592c818ada269024ac669bec78da
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf1-2-c7648c913ec336559fb67e3ab6938c8f b/sql/hive/src/test/resources/golden/udf1-2-c7648c913ec336559fb67e3ab6938c8f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf1-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/udf1-3-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 493daf5d79c54..0000000000000
--- a/sql/hive/src/test/resources/golden/udf1-3-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1 +0,0 @@
-true	false	true	true	true	false	false	false	true	true	false	true	true	acc	abc	abb	hive	hadoop	AaAbAcA	false
diff --git a/sql/hive/src/test/resources/golden/udf2-3-c5938fcdd5675b58a4ed54269b5f5591 b/sql/hive/src/test/resources/golden/udf2-3-c5938fcdd5675b58a4ed54269b5f5591
index 398f517b25b5f..2d3e77d99bca3 100644
--- a/sql/hive/src/test/resources/golden/udf2-3-c5938fcdd5675b58a4ed54269b5f5591
+++ b/sql/hive/src/test/resources/golden/udf2-3-c5938fcdd5675b58a4ed54269b5f5591
@@ -1 +1 @@
-|	abc	|	  abc	|	abc  	|
\ No newline at end of file
+|	abc	|	  abc	|	abc  	|
diff --git a/sql/hive/src/test/resources/golden/udf3-0-66a2b926dd5439d4da9eb58743c18a8c b/sql/hive/src/test/resources/golden/udf3-0-66a2b926dd5439d4da9eb58743c18a8c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf3-1-1d04874d496d05cfe0b9d86de1111 b/sql/hive/src/test/resources/golden/udf3-1-1d04874d496d05cfe0b9d86de1111
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf3-2-25fe77d053e2bad8ae99757ce237052e b/sql/hive/src/test/resources/golden/udf3-2-25fe77d053e2bad8ae99757ce237052e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf3-3-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/udf3-3-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index b2890dc4cdb3a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf3-3-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1 +0,0 @@
-0	NULL	NULL	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-11-b5b7e4f7af5186033be12a4393dc3bb7 b/sql/hive/src/test/resources/golden/udf5-0-2a18d9570d9b676e240cda76df818c42
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-11-b5b7e4f7af5186033be12a4393dc3bb7
rename to sql/hive/src/test/resources/golden/udf5-0-2a18d9570d9b676e240cda76df818c42
diff --git a/sql/hive/src/test/resources/golden/index_creation-13-9c0ec67e79a0d50b46bd5c944c710fc4 b/sql/hive/src/test/resources/golden/udf5-1-f60851dc36f579e83d6848d7d3c589e6
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-13-9c0ec67e79a0d50b46bd5c944c710fc4
rename to sql/hive/src/test/resources/golden/udf5-1-f60851dc36f579e83d6848d7d3c589e6
diff --git a/sql/hive/src/test/resources/golden/index_creation-15-7d7e1a479e7dcd8f9d4199775e05bc1 b/sql/hive/src/test/resources/golden/udf5-2-e08fad5ccbf165f44ecabb9356e58b24
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-15-7d7e1a479e7dcd8f9d4199775e05bc1
rename to sql/hive/src/test/resources/golden/udf5-2-e08fad5ccbf165f44ecabb9356e58b24
diff --git a/sql/hive/src/test/resources/golden/udf5-3-d23017942dc49be9f5a61430201371bf b/sql/hive/src/test/resources/golden/udf5-3-d23017942dc49be9f5a61430201371bf
new file mode 100644
index 0000000000000..4cdf7737e251f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf5-3-d23017942dc49be9f5a61430201371bf
@@ -0,0 +1 @@
+2008-11-11 15:32:20	2008-11-11	1	11	2008	1	11	2008
diff --git a/sql/hive/src/test/resources/golden/index_creation-17-18ed0b70b0b6d076b96c9f73bfa721ad b/sql/hive/src/test/resources/golden/udf5-4-1b35f4ee3febf99804db1f481af80b23
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-17-18ed0b70b0b6d076b96c9f73bfa721ad
rename to sql/hive/src/test/resources/golden/udf5-4-1b35f4ee3febf99804db1f481af80b23
diff --git a/sql/hive/src/test/resources/golden/udf5-5-2125da6f09799cf7f10b838fc8f24e71 b/sql/hive/src/test/resources/golden/udf5-5-2125da6f09799cf7f10b838fc8f24e71
new file mode 100644
index 0000000000000..bfd616764b235
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf5-5-2125da6f09799cf7f10b838fc8f24e71
@@ -0,0 +1 @@
+01/13/10 11:57:40	2010-01-13 11:57:40
diff --git a/sql/hive/src/test/resources/golden/udf6-3-e579646b969eef49b09656114da52a73 b/sql/hive/src/test/resources/golden/udf6-3-e579646b969eef49b09656114da52a73
index 56a6051ca2b02..d00491fd7e5bb 100644
--- a/sql/hive/src/test/resources/golden/udf6-3-e579646b969eef49b09656114da52a73
+++ b/sql/hive/src/test/resources/golden/udf6-3-e579646b969eef49b09656114da52a73
@@ -1 +1 @@
-1
\ No newline at end of file
+1
diff --git a/sql/hive/src/test/resources/golden/udf6-5-fe336cd9850d6357980bd19139f76e b/sql/hive/src/test/resources/golden/udf6-5-fe336cd9850d6357980bd19139f76e
index 132dbea91f8a0..5657917e40f7d 100644
--- a/sql/hive/src/test/resources/golden/udf6-5-fe336cd9850d6357980bd19139f76e
+++ b/sql/hive/src/test/resources/golden/udf6-5-fe336cd9850d6357980bd19139f76e
@@ -1 +1 @@
-1	2	2	a	0.1	2	126	128	128	1.0	128
\ No newline at end of file
+1	2	2	a	0.1	2	126	128	128	1.0	128
diff --git a/sql/hive/src/test/resources/golden/udf7-3-b3afef6eb68f8e29e31d6bb6a7903045 b/sql/hive/src/test/resources/golden/udf7-3-b3afef6eb68f8e29e31d6bb6a7903045
index 51958ff45ab8d..2fcaff3dad9fe 100644
--- a/sql/hive/src/test/resources/golden/udf7-3-b3afef6eb68f8e29e31d6bb6a7903045
+++ b/sql/hive/src/test/resources/golden/udf7-3-b3afef6eb68f8e29e31d6bb6a7903045
@@ -1 +1 @@
-1.098612288668	NULL	NULL	1.098612288668	NULL	NULL	1.584962500721	NULL	NULL	0.47712125472	NULL	NULL	1.584962500721	NULL	NULL	NULL	-1.0	7.389056098931	8.0	8.0	0.125	8.0	2.0	NaN	1.0	1	8	8
+1.098612288668	NULL	NULL	1.098612288668	NULL	NULL	1.584962500721	NULL	NULL	0.47712125472	NULL	NULL	1.584962500721	NULL	NULL	NULL	-1.0	7.389056098931	8.0	8.0	0.125	8.0	2.0	NaN	1.0	1.0	8.0	8.0
diff --git a/sql/hive/src/test/resources/golden/udf8-4-9f22d5a65353432826a526b1d76eb65b b/sql/hive/src/test/resources/golden/udf8-4-9f22d5a65353432826a526b1d76eb65b
index 8a89b039b7151..60b09adaf8029 100644
--- a/sql/hive/src/test/resources/golden/udf8-4-9f22d5a65353432826a526b1d76eb65b
+++ b/sql/hive/src/test/resources/golden/udf8-4-9f22d5a65353432826a526b1d76eb65b
@@ -1 +1 @@
-1.0	1.0	1
\ No newline at end of file
+1.0	1.0	1
diff --git a/sql/hive/src/test/resources/golden/udf9-1-dd0981dc44ac24d445af5412e9f7fa8c b/sql/hive/src/test/resources/golden/udf9-1-dd0981dc44ac24d445af5412e9f7fa8c
index 83e2e121222a2..cb93f99495494 100644
--- a/sql/hive/src/test/resources/golden/udf9-1-dd0981dc44ac24d445af5412e9f7fa8c
+++ b/sql/hive/src/test/resources/golden/udf9-1-dd0981dc44ac24d445af5412e9f7fa8c
@@ -1 +1 @@
--1	2	32	-1	2009-01-01	2009-12-31	2008-03-01	2009-03-02	2008-02-28	2009-02-27	2008-12-31	2008-01-02	2008-02-28	2009-02-27	2006-02-28	2005-02-28
\ No newline at end of file
+-1	2	32	-1	2009-01-01	2009-12-31	2008-03-01	2009-03-02	2008-02-28	2009-02-27	2008-12-31	2008-01-02	2008-02-28	2009-02-27	2006-02-28	2005-02-28
diff --git a/sql/hive/src/test/resources/golden/udf_E-0-33251f00f840de3672f19d353fcfa66f b/sql/hive/src/test/resources/golden/udf_E-0-33251f00f840de3672f19d353fcfa66f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/udf_E-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby2_noskew_multi_distinct-2-83c59d378571a6e487aa20217bd87817
rename to sql/hive/src/test/resources/golden/udf_E-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/index_creation-19-98dbf83283f9e073e88ba770ec5a707f b/sql/hive/src/test/resources/golden/udf_E-1-cad0779d18f326c8e453bf2b5fe43596
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-19-98dbf83283f9e073e88ba770ec5a707f
rename to sql/hive/src/test/resources/golden/udf_E-1-cad0779d18f326c8e453bf2b5fe43596
diff --git a/sql/hive/src/test/resources/golden/udf_E-1-d744dcbbb9d70b3dc4722b80ba9d929c b/sql/hive/src/test/resources/golden/udf_E-2-d0fd9aa04fdeb948bdcf8559f7095c02
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_E-1-d744dcbbb9d70b3dc4722b80ba9d929c
rename to sql/hive/src/test/resources/golden/udf_E-2-d0fd9aa04fdeb948bdcf8559f7095c02
diff --git a/sql/hive/src/test/resources/golden/udf_E-2-72bb4231ea2a877b4d93a53cd7b6b82a b/sql/hive/src/test/resources/golden/udf_E-3-72bb4231ea2a877b4d93a53cd7b6b82a
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_E-2-72bb4231ea2a877b4d93a53cd7b6b82a
rename to sql/hive/src/test/resources/golden/udf_E-3-72bb4231ea2a877b4d93a53cd7b6b82a
diff --git a/sql/hive/src/test/resources/golden/udf_E-4-33251f00f840de3672f19d353fcfa66f b/sql/hive/src/test/resources/golden/udf_E-4-33251f00f840de3672f19d353fcfa66f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_E-3-e8924af3bf99d2e01546a965303ffd09 b/sql/hive/src/test/resources/golden/udf_E-4-e8924af3bf99d2e01546a965303ffd09
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_E-3-e8924af3bf99d2e01546a965303ffd09
rename to sql/hive/src/test/resources/golden/udf_E-4-e8924af3bf99d2e01546a965303ffd09
diff --git a/sql/hive/src/test/resources/golden/index_creation-2-9d5d11cb38f2e097f16d2db5693f4f1 b/sql/hive/src/test/resources/golden/udf_E-5-9d54c12bf727e05e9f9d67c61402a1d4
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-2-9d5d11cb38f2e097f16d2db5693f4f1
rename to sql/hive/src/test/resources/golden/udf_E-5-9d54c12bf727e05e9f9d67c61402a1d4
diff --git a/sql/hive/src/test/resources/golden/udf_E-5-d744dcbbb9d70b3dc4722b80ba9d929c b/sql/hive/src/test/resources/golden/udf_E-6-d0fd9aa04fdeb948bdcf8559f7095c02
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_E-5-d744dcbbb9d70b3dc4722b80ba9d929c
rename to sql/hive/src/test/resources/golden/udf_E-6-d0fd9aa04fdeb948bdcf8559f7095c02
diff --git a/sql/hive/src/test/resources/golden/udf_E-6-72bb4231ea2a877b4d93a53cd7b6b82a b/sql/hive/src/test/resources/golden/udf_E-7-72bb4231ea2a877b4d93a53cd7b6b82a
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_E-6-72bb4231ea2a877b4d93a53cd7b6b82a
rename to sql/hive/src/test/resources/golden/udf_E-7-72bb4231ea2a877b4d93a53cd7b6b82a
diff --git a/sql/hive/src/test/resources/golden/udf_E-7-e8924af3bf99d2e01546a965303ffd09 b/sql/hive/src/test/resources/golden/udf_E-8-e8924af3bf99d2e01546a965303ffd09
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_E-7-e8924af3bf99d2e01546a965303ffd09
rename to sql/hive/src/test/resources/golden/udf_E-8-e8924af3bf99d2e01546a965303ffd09
diff --git a/sql/hive/src/test/resources/golden/groupby_bigdata-0-e011be1172043c0c6d0fd2c0e89f361e b/sql/hive/src/test/resources/golden/udf_PI-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_bigdata-0-e011be1172043c0c6d0fd2c0e89f361e
rename to sql/hive/src/test/resources/golden/udf_PI-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_PI-0-b28e761e5564b51f98f182f561c1369f b/sql/hive/src/test/resources/golden/udf_PI-0-b28e761e5564b51f98f182f561c1369f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-21-4c8f6b48c437bf0be109fc0be1dc840e b/sql/hive/src/test/resources/golden/udf_PI-1-13fd9345fd15b654d18b2707e5274b2b
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-21-4c8f6b48c437bf0be109fc0be1dc840e
rename to sql/hive/src/test/resources/golden/udf_PI-1-13fd9345fd15b654d18b2707e5274b2b
diff --git a/sql/hive/src/test/resources/golden/udf_PI-1-1ea1f486385c62adeafcc5c52335bbf7 b/sql/hive/src/test/resources/golden/udf_PI-2-97a12f6967726e425469ecfa70177ff0
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_PI-1-1ea1f486385c62adeafcc5c52335bbf7
rename to sql/hive/src/test/resources/golden/udf_PI-2-97a12f6967726e425469ecfa70177ff0
diff --git a/sql/hive/src/test/resources/golden/udf_PI-2-9c1476a2eab7455594e97b338ee3c188 b/sql/hive/src/test/resources/golden/udf_PI-3-9c1476a2eab7455594e97b338ee3c188
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_PI-2-9c1476a2eab7455594e97b338ee3c188
rename to sql/hive/src/test/resources/golden/udf_PI-3-9c1476a2eab7455594e97b338ee3c188
diff --git a/sql/hive/src/test/resources/golden/udf_PI-3-890f3c276eff2c459d8dc79d5a71c866 b/sql/hive/src/test/resources/golden/udf_PI-4-890f3c276eff2c459d8dc79d5a71c866
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_PI-3-890f3c276eff2c459d8dc79d5a71c866
rename to sql/hive/src/test/resources/golden/udf_PI-4-890f3c276eff2c459d8dc79d5a71c866
diff --git a/sql/hive/src/test/resources/golden/udf_PI-4-b28e761e5564b51f98f182f561c1369f b/sql/hive/src/test/resources/golden/udf_PI-4-b28e761e5564b51f98f182f561c1369f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-23-e7f21f556d3aa2bedb0717a167720741 b/sql/hive/src/test/resources/golden/udf_PI-5-cd1c31c39277a02bab8e44f8c29a6c2d
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-23-e7f21f556d3aa2bedb0717a167720741
rename to sql/hive/src/test/resources/golden/udf_PI-5-cd1c31c39277a02bab8e44f8c29a6c2d
diff --git a/sql/hive/src/test/resources/golden/udf_PI-5-1ea1f486385c62adeafcc5c52335bbf7 b/sql/hive/src/test/resources/golden/udf_PI-6-97a12f6967726e425469ecfa70177ff0
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_PI-5-1ea1f486385c62adeafcc5c52335bbf7
rename to sql/hive/src/test/resources/golden/udf_PI-6-97a12f6967726e425469ecfa70177ff0
diff --git a/sql/hive/src/test/resources/golden/udf_PI-6-9c1476a2eab7455594e97b338ee3c188 b/sql/hive/src/test/resources/golden/udf_PI-7-9c1476a2eab7455594e97b338ee3c188
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_PI-6-9c1476a2eab7455594e97b338ee3c188
rename to sql/hive/src/test/resources/golden/udf_PI-7-9c1476a2eab7455594e97b338ee3c188
diff --git a/sql/hive/src/test/resources/golden/udf_PI-7-890f3c276eff2c459d8dc79d5a71c866 b/sql/hive/src/test/resources/golden/udf_PI-8-890f3c276eff2c459d8dc79d5a71c866
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_PI-7-890f3c276eff2c459d8dc79d5a71c866
rename to sql/hive/src/test/resources/golden/udf_PI-8-890f3c276eff2c459d8dc79d5a71c866
diff --git a/sql/hive/src/test/resources/golden/groupby_bigdata-1-1100bb0c115c024998d35888ae5bbd71 b/sql/hive/src/test/resources/golden/udf_abs-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_bigdata-1-1100bb0c115c024998d35888ae5bbd71
rename to sql/hive/src/test/resources/golden/udf_abs-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_abs-0-6fe2e69c338fc823d3f61c9236eb2234 b/sql/hive/src/test/resources/golden/udf_abs-0-6fe2e69c338fc823d3f61c9236eb2234
deleted file mode 100644
index b613b3b9fe967..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_abs-0-6fe2e69c338fc823d3f61c9236eb2234
+++ /dev/null
@@ -1 +0,0 @@
-abs(x) - returns the absolute value of x
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_abs-1-6fe2e69c338fc823d3f61c9236eb2234 b/sql/hive/src/test/resources/golden/udf_abs-1-6fe2e69c338fc823d3f61c9236eb2234
new file mode 100644
index 0000000000000..f1e01bf0d2fc8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_abs-1-6fe2e69c338fc823d3f61c9236eb2234
@@ -0,0 +1 @@
+abs(x) - returns the absolute value of x
diff --git a/sql/hive/src/test/resources/golden/udf_abs-1-eeb77ae8a0dcebbc0991923ca0932072 b/sql/hive/src/test/resources/golden/udf_abs-1-eeb77ae8a0dcebbc0991923ca0932072
deleted file mode 100644
index c2fb6dc49322d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_abs-1-eeb77ae8a0dcebbc0991923ca0932072
+++ /dev/null
@@ -1,6 +0,0 @@
-abs(x) - returns the absolute value of x
-Example:
-  > SELECT abs(0) FROM src LIMIT 1;
-  0
-  > SELECT abs(-5) FROM src LIMIT 1;
-  5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_abs-2-6a0ea751ac70262d478b23888dcbdc96 b/sql/hive/src/test/resources/golden/udf_abs-2-6a0ea751ac70262d478b23888dcbdc96
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_abs-2-eeb77ae8a0dcebbc0991923ca0932072 b/sql/hive/src/test/resources/golden/udf_abs-2-eeb77ae8a0dcebbc0991923ca0932072
new file mode 100644
index 0000000000000..7dea2445fa616
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_abs-2-eeb77ae8a0dcebbc0991923ca0932072
@@ -0,0 +1,6 @@
+abs(x) - returns the absolute value of x
+Example:
+  > SELECT abs(0) FROM src LIMIT 1;
+  0
+  > SELECT abs(-5) FROM src LIMIT 1;
+  5
diff --git a/sql/hive/src/test/resources/golden/index_creation-25-33c48966230b934ae8ddf74ff18bb9ca b/sql/hive/src/test/resources/golden/udf_abs-3-50cb3c23902cd29e0dbff188c71062e5
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-25-33c48966230b934ae8ddf74ff18bb9ca
rename to sql/hive/src/test/resources/golden/udf_abs-3-50cb3c23902cd29e0dbff188c71062e5
diff --git a/sql/hive/src/test/resources/golden/udf_abs-3-52f5c6cba1b9d48046073a0c2e106530 b/sql/hive/src/test/resources/golden/udf_abs-3-52f5c6cba1b9d48046073a0c2e106530
deleted file mode 100644
index f5a78bc6e59b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_abs-3-52f5c6cba1b9d48046073a0c2e106530
+++ /dev/null
@@ -1 +0,0 @@
-0	1	123	9223372036854775807	9223372036854775807
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_abs-4-30cd5a94c13e1619ee18b9551db879c b/sql/hive/src/test/resources/golden/udf_abs-4-30cd5a94c13e1619ee18b9551db879c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_abs-4-4ae7f62f8d996f0066037cecbf2e01c4 b/sql/hive/src/test/resources/golden/udf_abs-4-4ae7f62f8d996f0066037cecbf2e01c4
new file mode 100644
index 0000000000000..c7b1f75a06cb3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_abs-4-4ae7f62f8d996f0066037cecbf2e01c4
@@ -0,0 +1 @@
+0	1	123	9223372036854775807	9223372036854775807
diff --git a/sql/hive/src/test/resources/golden/udf_abs-5-343e899acb67c283391387f02aa7b5c4 b/sql/hive/src/test/resources/golden/udf_abs-5-343e899acb67c283391387f02aa7b5c4
deleted file mode 100644
index 9e57fa65e8014..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_abs-5-343e899acb67c283391387f02aa7b5c4
+++ /dev/null
@@ -1 +0,0 @@
-0.0	3.14159265	3.14159265
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-26-f85db55b3f63ae186a1b6d5cec545939 b/sql/hive/src/test/resources/golden/udf_abs-5-5cd4c198e0de884ad436864b95fece6c
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-26-f85db55b3f63ae186a1b6d5cec545939
rename to sql/hive/src/test/resources/golden/udf_abs-5-5cd4c198e0de884ad436864b95fece6c
diff --git a/sql/hive/src/test/resources/golden/udf_abs-6-7aa32a019499c6464aded2e357c6843b b/sql/hive/src/test/resources/golden/udf_abs-6-7aa32a019499c6464aded2e357c6843b
new file mode 100644
index 0000000000000..a07dd4e9d970d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_abs-6-7aa32a019499c6464aded2e357c6843b
@@ -0,0 +1 @@
+0.0	3.14159265	3.14159265
diff --git a/sql/hive/src/test/resources/golden/udf_acos-0-4f49cb5a5c87efea534d63ed76435f06 b/sql/hive/src/test/resources/golden/udf_acos-0-4f49cb5a5c87efea534d63ed76435f06
deleted file mode 100644
index 204db415144a7..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_acos-0-4f49cb5a5c87efea534d63ed76435f06
+++ /dev/null
@@ -1 +0,0 @@
-acos(x) - returns the arc cosine of x if -1<=x<=1 or NULL otherwise
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_complex_types_multi_single_reducer-0-b31bf66c43bb9f7ddc09b138b7bf36e0 b/sql/hive/src/test/resources/golden/udf_acos-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_complex_types_multi_single_reducer-0-b31bf66c43bb9f7ddc09b138b7bf36e0
rename to sql/hive/src/test/resources/golden/udf_acos-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_acos-1-4f49cb5a5c87efea534d63ed76435f06 b/sql/hive/src/test/resources/golden/udf_acos-1-4f49cb5a5c87efea534d63ed76435f06
new file mode 100644
index 0000000000000..b32b4244d6aeb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_acos-1-4f49cb5a5c87efea534d63ed76435f06
@@ -0,0 +1 @@
+acos(x) - returns the arc cosine of x if -1<=x<=1 or NULL otherwise
diff --git a/sql/hive/src/test/resources/golden/udf_acos-1-d330511cf4f626cd844b18f57f99a85f b/sql/hive/src/test/resources/golden/udf_acos-1-d330511cf4f626cd844b18f57f99a85f
deleted file mode 100644
index d49af1cbf4800..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_acos-1-d330511cf4f626cd844b18f57f99a85f
+++ /dev/null
@@ -1,6 +0,0 @@
-acos(x) - returns the arc cosine of x if -1<=x<=1 or NULL otherwise
-Example:
-  > SELECT acos(1) FROM src LIMIT 1;
-  0
-  > SELECT acos(2) FROM src LIMIT 1;
-  NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_acos-2-86fca49baf270873b46709c9eaeab87b b/sql/hive/src/test/resources/golden/udf_acos-2-86fca49baf270873b46709c9eaeab87b
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_acos-2-86fca49baf270873b46709c9eaeab87b
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_acos-2-d330511cf4f626cd844b18f57f99a85f b/sql/hive/src/test/resources/golden/udf_acos-2-d330511cf4f626cd844b18f57f99a85f
new file mode 100644
index 0000000000000..93925317cd798
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_acos-2-d330511cf4f626cd844b18f57f99a85f
@@ -0,0 +1,6 @@
+acos(x) - returns the arc cosine of x if -1<=x<=1 or NULL otherwise
+Example:
+  > SELECT acos(1) FROM src LIMIT 1;
+  0
+  > SELECT acos(2) FROM src LIMIT 1;
+  NULL
diff --git a/sql/hive/src/test/resources/golden/decimal_precision-13-2a65d450f57f8ba9f594063b96074f0e b/sql/hive/src/test/resources/golden/udf_acos-3-661a0a85283df2a5c1567d60850e362b
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_precision-13-2a65d450f57f8ba9f594063b96074f0e
rename to sql/hive/src/test/resources/golden/udf_acos-3-661a0a85283df2a5c1567d60850e362b
diff --git a/sql/hive/src/test/resources/golden/udf_acos-3-f7f199e5f3dde8056465d55aca29e884 b/sql/hive/src/test/resources/golden/udf_acos-3-f7f199e5f3dde8056465d55aca29e884
deleted file mode 100644
index 5548bdb7cf26a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_acos-3-f7f199e5f3dde8056465d55aca29e884
+++ /dev/null
@@ -1 +0,0 @@
-1.5707963267948966
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_acos-4-23d588eece08fbea7431044524f1cecf b/sql/hive/src/test/resources/golden/udf_acos-4-23d588eece08fbea7431044524f1cecf
new file mode 100644
index 0000000000000..53a6e14702ed1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_acos-4-23d588eece08fbea7431044524f1cecf
@@ -0,0 +1 @@
+1.5707963267948966
diff --git a/sql/hive/src/test/resources/golden/udf_acos-4-e66fd90808b7c0eacbfe7ddd8624d79a b/sql/hive/src/test/resources/golden/udf_acos-4-e66fd90808b7c0eacbfe7ddd8624d79a
deleted file mode 100644
index 1f2d6faad9a2c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_acos-4-e66fd90808b7c0eacbfe7ddd8624d79a
+++ /dev/null
@@ -1 +0,0 @@
-2.0943951023931957	0.7208187608700897
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_acos-5-578612589fdb1ae21ee488924848fb4e b/sql/hive/src/test/resources/golden/udf_acos-5-578612589fdb1ae21ee488924848fb4e
new file mode 100644
index 0000000000000..edee342d1d902
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_acos-5-578612589fdb1ae21ee488924848fb4e
@@ -0,0 +1 @@
+2.0943951023931957	0.7208187608700897
diff --git a/sql/hive/src/test/resources/golden/udf_acos-5-85869fd1e3a6fccaacd54a4315ae6d2e b/sql/hive/src/test/resources/golden/udf_acos-5-85869fd1e3a6fccaacd54a4315ae6d2e
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_acos-5-85869fd1e3a6fccaacd54a4315ae6d2e
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_acos-6-4d2bd33cee047e9a8bb740760c7cc3b4 b/sql/hive/src/test/resources/golden/udf_acos-6-4d2bd33cee047e9a8bb740760c7cc3b4
new file mode 100644
index 0000000000000..736991a138745
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_acos-6-4d2bd33cee047e9a8bb740760c7cc3b4
@@ -0,0 +1 @@
+NaN
diff --git a/sql/hive/src/test/resources/golden/udf_add-0-5db764318a918a5f6d7c1d95d9e86045 b/sql/hive/src/test/resources/golden/udf_add-0-5db764318a918a5f6d7c1d95d9e86045
index e8f11c444a808..9d794539b4ae0 100644
--- a/sql/hive/src/test/resources/golden/udf_add-0-5db764318a918a5f6d7c1d95d9e86045
+++ b/sql/hive/src/test/resources/golden/udf_add-0-5db764318a918a5f6d7c1d95d9e86045
@@ -1 +1 @@
-a + b - Returns a+b
\ No newline at end of file
+a + b - Returns a+b
diff --git a/sql/hive/src/test/resources/golden/udf_add-1-400b238f4e6cdf7120be566b0ef079c5 b/sql/hive/src/test/resources/golden/udf_add-1-400b238f4e6cdf7120be566b0ef079c5
index e8f11c444a808..9d794539b4ae0 100644
--- a/sql/hive/src/test/resources/golden/udf_add-1-400b238f4e6cdf7120be566b0ef079c5
+++ b/sql/hive/src/test/resources/golden/udf_add-1-400b238f4e6cdf7120be566b0ef079c5
@@ -1 +1 @@
-a + b - Returns a+b
\ No newline at end of file
+a + b - Returns a+b
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-0-dbcec232623048c7748b708123e18bf0 b/sql/hive/src/test/resources/golden/udf_array-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-0-dbcec232623048c7748b708123e18bf0
rename to sql/hive/src/test/resources/golden/udf_array-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_array-0-2e9c2a7d9325bd9a53cf9f181b6333ee b/sql/hive/src/test/resources/golden/udf_array-1-2e9c2a7d9325bd9a53cf9f181b6333ee
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_array-0-2e9c2a7d9325bd9a53cf9f181b6333ee
rename to sql/hive/src/test/resources/golden/udf_array-1-2e9c2a7d9325bd9a53cf9f181b6333ee
diff --git a/sql/hive/src/test/resources/golden/udf_array-1-570741914bb78300b0233e5f38d7f08a b/sql/hive/src/test/resources/golden/udf_array-2-570741914bb78300b0233e5f38d7f08a
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_array-1-570741914bb78300b0233e5f38d7f08a
rename to sql/hive/src/test/resources/golden/udf_array-2-570741914bb78300b0233e5f38d7f08a
diff --git a/sql/hive/src/test/resources/golden/udf_array-2-db6d3c6c67faed3ceb019cb1993502f6 b/sql/hive/src/test/resources/golden/udf_array-2-db6d3c6c67faed3ceb019cb1993502f6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-27-e4856f13692e63d61f72aaf75e50e5f1 b/sql/hive/src/test/resources/golden/udf_array-3-47818d42e5e7667d8754c3f9a4b8053a
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-27-e4856f13692e63d61f72aaf75e50e5f1
rename to sql/hive/src/test/resources/golden/udf_array-3-47818d42e5e7667d8754c3f9a4b8053a
diff --git a/sql/hive/src/test/resources/golden/udf_array-3-a5d12c41277fb158e09281169c905122 b/sql/hive/src/test/resources/golden/udf_array-4-51410e4d4d679fe5a8dd7a860f4efc47
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_array-3-a5d12c41277fb158e09281169c905122
rename to sql/hive/src/test/resources/golden/udf_array-4-51410e4d4d679fe5a8dd7a860f4efc47
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/udf_array_contains-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-1-85c4f90b754cd88147d6b74e17d22063
rename to sql/hive/src/test/resources/golden/udf_array_contains-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_array_contains-0-d9a90108b052b111e8de4433e008b25a b/sql/hive/src/test/resources/golden/udf_array_contains-0-d9a90108b052b111e8de4433e008b25a
deleted file mode 100644
index 783e7d086a5c6..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_array_contains-0-d9a90108b052b111e8de4433e008b25a
+++ /dev/null
@@ -1 +0,0 @@
-array_contains(array, value) - Returns TRUE if the array contains value.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_array_contains-1-d9a90108b052b111e8de4433e008b25a b/sql/hive/src/test/resources/golden/udf_array_contains-1-d9a90108b052b111e8de4433e008b25a
new file mode 100644
index 0000000000000..f17ecea1ab5ac
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_array_contains-1-d9a90108b052b111e8de4433e008b25a
@@ -0,0 +1 @@
+array_contains(array, value) - Returns TRUE if the array contains value.
diff --git a/sql/hive/src/test/resources/golden/udf_array_contains-1-eff16c7836252e01f3d8190cd833f79c b/sql/hive/src/test/resources/golden/udf_array_contains-1-eff16c7836252e01f3d8190cd833f79c
deleted file mode 100644
index 0ccae5649af4a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_array_contains-1-eff16c7836252e01f3d8190cd833f79c
+++ /dev/null
@@ -1,4 +0,0 @@
-array_contains(array, value) - Returns TRUE if the array contains value.
-Example:
-  > SELECT array_contains(array(1, 2, 3), 2) FROM src LIMIT 1;
-  true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_array_contains-2-42d966b28e61a465d638bffc20ac7247 b/sql/hive/src/test/resources/golden/udf_array_contains-2-42d966b28e61a465d638bffc20ac7247
deleted file mode 100644
index f32a5804e292d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_array_contains-2-42d966b28e61a465d638bffc20ac7247
+++ /dev/null
@@ -1 +0,0 @@
-true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_array_contains-2-eff16c7836252e01f3d8190cd833f79c b/sql/hive/src/test/resources/golden/udf_array_contains-2-eff16c7836252e01f3d8190cd833f79c
new file mode 100644
index 0000000000000..7f4bbfc04c1c9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_array_contains-2-eff16c7836252e01f3d8190cd833f79c
@@ -0,0 +1,4 @@
+array_contains(array, value) - Returns TRUE if the array contains value.
+Example:
+  > SELECT array_contains(array(1, 2, 3), 2) FROM src LIMIT 1;
+  true
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-6-1e29d35d9b02f6800c8939910a1e4c2f b/sql/hive/src/test/resources/golden/udf_array_contains-3-6b700cb69af298aef45b96bf5ac862d
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-6-1e29d35d9b02f6800c8939910a1e4c2f
rename to sql/hive/src/test/resources/golden/udf_array_contains-3-6b700cb69af298aef45b96bf5ac862d
diff --git a/sql/hive/src/test/resources/golden/udf_array_contains-3-ec0cd851fd8135dd9bc822d9a0432569 b/sql/hive/src/test/resources/golden/udf_array_contains-3-ec0cd851fd8135dd9bc822d9a0432569
deleted file mode 100644
index f32a5804e292d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_array_contains-3-ec0cd851fd8135dd9bc822d9a0432569
+++ /dev/null
@@ -1 +0,0 @@
-true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-7-93d8249c035f34bfca2fa041ae97f55a b/sql/hive/src/test/resources/golden/udf_array_contains-4-bdb5a7e0ab81172a438145a1c406b1e8
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-7-93d8249c035f34bfca2fa041ae97f55a
rename to sql/hive/src/test/resources/golden/udf_array_contains-4-bdb5a7e0ab81172a438145a1c406b1e8
diff --git a/sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-2-83c59d378571a6e487aa20217bd87817 b/sql/hive/src/test/resources/golden/udf_ascii-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_map_ppr_multi_distinct-2-83c59d378571a6e487aa20217bd87817
rename to sql/hive/src/test/resources/golden/udf_ascii-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_ascii-0-72924c23459330cca6a54c70b12a542c b/sql/hive/src/test/resources/golden/udf_ascii-0-72924c23459330cca6a54c70b12a542c
deleted file mode 100644
index e121c27212c66..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_ascii-0-72924c23459330cca6a54c70b12a542c
+++ /dev/null
@@ -1 +0,0 @@
-ascii(str) - returns the numeric value of the first character of str
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_ascii-1-72924c23459330cca6a54c70b12a542c b/sql/hive/src/test/resources/golden/udf_ascii-1-72924c23459330cca6a54c70b12a542c
new file mode 100644
index 0000000000000..5e2c4b7209d9b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_ascii-1-72924c23459330cca6a54c70b12a542c
@@ -0,0 +1 @@
+ascii(str) - returns the numeric value of the first character of str
diff --git a/sql/hive/src/test/resources/golden/udf_ascii-1-dab8656d7f001e85615442d60df4b6b3 b/sql/hive/src/test/resources/golden/udf_ascii-1-dab8656d7f001e85615442d60df4b6b3
deleted file mode 100644
index 7c9fb835dfe57..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_ascii-1-dab8656d7f001e85615442d60df4b6b3
+++ /dev/null
@@ -1,6 +0,0 @@
-ascii(str) - returns the numeric value of the first character of str
-Returns 0 if str is empty or NULL if str is NULL
-Example:
-  > SELECT ascii('222') FROM src LIMIT 1;  50
-  > SELECT ascii(2) FROM src LIMIT 1;
-  50
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_ascii-2-a9e207503f52d43903877fb998eabeaa b/sql/hive/src/test/resources/golden/udf_ascii-2-a9e207503f52d43903877fb998eabeaa
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_ascii-2-dab8656d7f001e85615442d60df4b6b3 b/sql/hive/src/test/resources/golden/udf_ascii-2-dab8656d7f001e85615442d60df4b6b3
new file mode 100644
index 0000000000000..87ae6b33499c4
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_ascii-2-dab8656d7f001e85615442d60df4b6b3
@@ -0,0 +1,6 @@
+ascii(str) - returns the numeric value of the first character of str
+Returns 0 if str is empty or NULL if str is NULL
+Example:
+  > SELECT ascii('222') FROM src LIMIT 1;  50
+  > SELECT ascii(2) FROM src LIMIT 1;
+  50
diff --git a/sql/hive/src/test/resources/golden/udf_ascii-3-28fc6497c9835c2ef331aba44576f1b1 b/sql/hive/src/test/resources/golden/udf_ascii-3-28fc6497c9835c2ef331aba44576f1b1
deleted file mode 100644
index 726fda4c15ed3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_ascii-3-28fc6497c9835c2ef331aba44576f1b1
+++ /dev/null
@@ -1 +0,0 @@
-70	0	33
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-28-bd20d4b59e6489082a92fcbfcc5f8dbe b/sql/hive/src/test/resources/golden/udf_ascii-3-fc25cec86e0dafaf1633c2e3a6d2fc34
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-28-bd20d4b59e6489082a92fcbfcc5f8dbe
rename to sql/hive/src/test/resources/golden/udf_ascii-3-fc25cec86e0dafaf1633c2e3a6d2fc34
diff --git a/sql/hive/src/test/resources/golden/udf_ascii-4-db9a06881a216f0252fa786d98c9bf b/sql/hive/src/test/resources/golden/udf_ascii-4-db9a06881a216f0252fa786d98c9bf
new file mode 100644
index 0000000000000..ba9d9a3b79bab
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_ascii-4-db9a06881a216f0252fa786d98c9bf
@@ -0,0 +1 @@
+70	0	33
diff --git a/sql/hive/src/test/resources/golden/groupby_multi_single_reducer-0-b31bf66c43bb9f7ddc09b138b7bf36e0 b/sql/hive/src/test/resources/golden/udf_asin-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_multi_single_reducer-0-b31bf66c43bb9f7ddc09b138b7bf36e0
rename to sql/hive/src/test/resources/golden/udf_asin-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_asin-0-99da197a53767060e3fa0250254d59cd b/sql/hive/src/test/resources/golden/udf_asin-0-99da197a53767060e3fa0250254d59cd
deleted file mode 100644
index 3a56bffc8da34..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_asin-0-99da197a53767060e3fa0250254d59cd
+++ /dev/null
@@ -1 +0,0 @@
-asin(x) - returns the arc sine of x if -1<=x<=1 or NULL otherwise
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_asin-1-3d0c3fa6121f8f5158d221074f1d4129 b/sql/hive/src/test/resources/golden/udf_asin-1-3d0c3fa6121f8f5158d221074f1d4129
deleted file mode 100644
index ef207d3f5da38..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_asin-1-3d0c3fa6121f8f5158d221074f1d4129
+++ /dev/null
@@ -1,6 +0,0 @@
-asin(x) - returns the arc sine of x if -1<=x<=1 or NULL otherwise
-Example:
-  > SELECT asin(0) FROM src LIMIT 1;
-  0
-  > SELECT asin(2) FROM src LIMIT 1;
-  NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_asin-1-99da197a53767060e3fa0250254d59cd b/sql/hive/src/test/resources/golden/udf_asin-1-99da197a53767060e3fa0250254d59cd
new file mode 100644
index 0000000000000..f5ffb5d0b3939
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_asin-1-99da197a53767060e3fa0250254d59cd
@@ -0,0 +1 @@
+asin(x) - returns the arc sine of x if -1<=x<=1 or NULL otherwise
diff --git a/sql/hive/src/test/resources/golden/udf_asin-2-3d0c3fa6121f8f5158d221074f1d4129 b/sql/hive/src/test/resources/golden/udf_asin-2-3d0c3fa6121f8f5158d221074f1d4129
new file mode 100644
index 0000000000000..80c16be805ee8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_asin-2-3d0c3fa6121f8f5158d221074f1d4129
@@ -0,0 +1,6 @@
+asin(x) - returns the arc sine of x if -1<=x<=1 or NULL otherwise
+Example:
+  > SELECT asin(0) FROM src LIMIT 1;
+  0
+  > SELECT asin(2) FROM src LIMIT 1;
+  NULL
diff --git a/sql/hive/src/test/resources/golden/udf_asin-2-da1b36ab73c6791eb3c4bd1dd2bec52f b/sql/hive/src/test/resources/golden/udf_asin-2-da1b36ab73c6791eb3c4bd1dd2bec52f
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_asin-2-da1b36ab73c6791eb3c4bd1dd2bec52f
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-2-9eadfd16be30c653625fce7b74048d9d b/sql/hive/src/test/resources/golden/udf_asin-3-4b7ee6310a49ebf784a4a712748348ac
similarity index 100%
rename from sql/hive/src/test/resources/golden/metadataonly1-2-9eadfd16be30c653625fce7b74048d9d
rename to sql/hive/src/test/resources/golden/udf_asin-3-4b7ee6310a49ebf784a4a712748348ac
diff --git a/sql/hive/src/test/resources/golden/udf_asin-3-b67069d37df3a7fb7a3e2138d8558343 b/sql/hive/src/test/resources/golden/udf_asin-3-b67069d37df3a7fb7a3e2138d8558343
deleted file mode 100644
index 171538eb0b00f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_asin-3-b67069d37df3a7fb7a3e2138d8558343
+++ /dev/null
@@ -1 +0,0 @@
-0.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_asin-4-929dabad86ef0e564802de8f663a9e66 b/sql/hive/src/test/resources/golden/udf_asin-4-929dabad86ef0e564802de8f663a9e66
deleted file mode 100644
index a9d9b65be4189..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_asin-4-929dabad86ef0e564802de8f663a9e66
+++ /dev/null
@@ -1 +0,0 @@
--0.5235987755982989	0.7208187608700897
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_asin-4-a3edf78ff8ccc629ee7d7518707b69ce b/sql/hive/src/test/resources/golden/udf_asin-4-a3edf78ff8ccc629ee7d7518707b69ce
new file mode 100644
index 0000000000000..ba66466c2a0d0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_asin-4-a3edf78ff8ccc629ee7d7518707b69ce
@@ -0,0 +1 @@
+0.0
diff --git a/sql/hive/src/test/resources/golden/udf_asin-5-1ee8715cce9831623d0af0031964d284 b/sql/hive/src/test/resources/golden/udf_asin-5-1ee8715cce9831623d0af0031964d284
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_asin-5-1ee8715cce9831623d0af0031964d284
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_asin-5-8dcbcf784496053e3b57c579aca809a6 b/sql/hive/src/test/resources/golden/udf_asin-5-8dcbcf784496053e3b57c579aca809a6
new file mode 100644
index 0000000000000..46381bfe173d5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_asin-5-8dcbcf784496053e3b57c579aca809a6
@@ -0,0 +1 @@
+-0.5235987755982989	0.7208187608700897
diff --git a/sql/hive/src/test/resources/golden/udf_asin-6-114c8141f1e831c70d70c570f0ae778f b/sql/hive/src/test/resources/golden/udf_asin-6-114c8141f1e831c70d70c570f0ae778f
new file mode 100644
index 0000000000000..736991a138745
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_asin-6-114c8141f1e831c70d70c570f0ae778f
@@ -0,0 +1 @@
+NaN
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-0-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/udf_atan-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-0-67e864faaff4c6b2a8e1c9fbd188bb66
rename to sql/hive/src/test/resources/golden/udf_atan-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_atan-0-c79ed30c2444c8493d0db98c33c9132b b/sql/hive/src/test/resources/golden/udf_atan-0-c79ed30c2444c8493d0db98c33c9132b
deleted file mode 100644
index 019676ad50bc3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_atan-0-c79ed30c2444c8493d0db98c33c9132b
+++ /dev/null
@@ -1 +0,0 @@
-atan(x) - returns the atan (arctan) of x (x is in radians)
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_atan-1-77e7ac1b89a4eac9102176cd73f67a62 b/sql/hive/src/test/resources/golden/udf_atan-1-77e7ac1b89a4eac9102176cd73f67a62
deleted file mode 100644
index ce9e52716ff44..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_atan-1-77e7ac1b89a4eac9102176cd73f67a62
+++ /dev/null
@@ -1,4 +0,0 @@
-atan(x) - returns the atan (arctan) of x (x is in radians)
-Example:
-   > SELECT atan(0) FROM src LIMIT 1;
-  0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_atan-1-c79ed30c2444c8493d0db98c33c9132b b/sql/hive/src/test/resources/golden/udf_atan-1-c79ed30c2444c8493d0db98c33c9132b
new file mode 100644
index 0000000000000..1d34577c5c6b9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_atan-1-c79ed30c2444c8493d0db98c33c9132b
@@ -0,0 +1 @@
+atan(x) - returns the atan (arctan) of x (x is in radians)
diff --git a/sql/hive/src/test/resources/golden/udf_atan-2-77e7ac1b89a4eac9102176cd73f67a62 b/sql/hive/src/test/resources/golden/udf_atan-2-77e7ac1b89a4eac9102176cd73f67a62
new file mode 100644
index 0000000000000..6d7f1d49d9e72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_atan-2-77e7ac1b89a4eac9102176cd73f67a62
@@ -0,0 +1,4 @@
+atan(x) - returns the atan (arctan) of x (x is in radians)
+Example:
+   > SELECT atan(0) FROM src LIMIT 1;
+  0
diff --git a/sql/hive/src/test/resources/golden/udf_atan-2-bf1c7875364d5518e0ac9c1ac6943764 b/sql/hive/src/test/resources/golden/udf_atan-2-bf1c7875364d5518e0ac9c1ac6943764
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_atan-2-bf1c7875364d5518e0ac9c1ac6943764
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_atan-3-9a6252f36fe5ec1387186bf47d74a139 b/sql/hive/src/test/resources/golden/udf_atan-3-9a6252f36fe5ec1387186bf47d74a139
deleted file mode 100644
index 37b3e6018d62c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_atan-3-9a6252f36fe5ec1387186bf47d74a139
+++ /dev/null
@@ -1 +0,0 @@
-0.7853981633974483	1.4056476493802699	-0.7853981633974483
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-18-fcd7af0e71d3e2d934239ba606e3ed87 b/sql/hive/src/test/resources/golden/udf_atan-3-e6f97a834028a67e6c3033949f98fbf8
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-18-fcd7af0e71d3e2d934239ba606e3ed87
rename to sql/hive/src/test/resources/golden/udf_atan-3-e6f97a834028a67e6c3033949f98fbf8
diff --git a/sql/hive/src/test/resources/golden/udf_atan-4-c79ed30c2444c8493d0db98c33c9132b b/sql/hive/src/test/resources/golden/udf_atan-4-c79ed30c2444c8493d0db98c33c9132b
deleted file mode 100644
index 019676ad50bc3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_atan-4-c79ed30c2444c8493d0db98c33c9132b
+++ /dev/null
@@ -1 +0,0 @@
-atan(x) - returns the atan (arctan) of x (x is in radians)
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_atan-4-eacd47571ba5c67f11e025d8d4de5811 b/sql/hive/src/test/resources/golden/udf_atan-4-eacd47571ba5c67f11e025d8d4de5811
new file mode 100644
index 0000000000000..fe43015585aa4
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_atan-4-eacd47571ba5c67f11e025d8d4de5811
@@ -0,0 +1 @@
+0.7853981633974483	1.4056476493802699	-0.7853981633974483
diff --git a/sql/hive/src/test/resources/golden/udf_atan-5-77e7ac1b89a4eac9102176cd73f67a62 b/sql/hive/src/test/resources/golden/udf_atan-5-77e7ac1b89a4eac9102176cd73f67a62
deleted file mode 100644
index ce9e52716ff44..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_atan-5-77e7ac1b89a4eac9102176cd73f67a62
+++ /dev/null
@@ -1,4 +0,0 @@
-atan(x) - returns the atan (arctan) of x (x is in radians)
-Example:
-   > SELECT atan(0) FROM src LIMIT 1;
-  0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_atan-5-c79ed30c2444c8493d0db98c33c9132b b/sql/hive/src/test/resources/golden/udf_atan-5-c79ed30c2444c8493d0db98c33c9132b
new file mode 100644
index 0000000000000..1d34577c5c6b9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_atan-5-c79ed30c2444c8493d0db98c33c9132b
@@ -0,0 +1 @@
+atan(x) - returns the atan (arctan) of x (x is in radians)
diff --git a/sql/hive/src/test/resources/golden/udf_atan-6-77e7ac1b89a4eac9102176cd73f67a62 b/sql/hive/src/test/resources/golden/udf_atan-6-77e7ac1b89a4eac9102176cd73f67a62
new file mode 100644
index 0000000000000..6d7f1d49d9e72
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_atan-6-77e7ac1b89a4eac9102176cd73f67a62
@@ -0,0 +1,4 @@
+atan(x) - returns the atan (arctan) of x (x is in radians)
+Example:
+   > SELECT atan(0) FROM src LIMIT 1;
+  0
diff --git a/sql/hive/src/test/resources/golden/udf_atan-6-bf1c7875364d5518e0ac9c1ac6943764 b/sql/hive/src/test/resources/golden/udf_atan-6-bf1c7875364d5518e0ac9c1ac6943764
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_atan-6-bf1c7875364d5518e0ac9c1ac6943764
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_atan-7-9a6252f36fe5ec1387186bf47d74a139 b/sql/hive/src/test/resources/golden/udf_atan-7-9a6252f36fe5ec1387186bf47d74a139
deleted file mode 100644
index 37b3e6018d62c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_atan-7-9a6252f36fe5ec1387186bf47d74a139
+++ /dev/null
@@ -1 +0,0 @@
-0.7853981633974483	1.4056476493802699	-0.7853981633974483
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-19-dcdb12fe551aa68a56921822f5d1a343 b/sql/hive/src/test/resources/golden/udf_atan-7-e6f97a834028a67e6c3033949f98fbf8
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-19-dcdb12fe551aa68a56921822f5d1a343
rename to sql/hive/src/test/resources/golden/udf_atan-7-e6f97a834028a67e6c3033949f98fbf8
diff --git a/sql/hive/src/test/resources/golden/udf_atan-8-eacd47571ba5c67f11e025d8d4de5811 b/sql/hive/src/test/resources/golden/udf_atan-8-eacd47571ba5c67f11e025d8d4de5811
new file mode 100644
index 0000000000000..fe43015585aa4
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_atan-8-eacd47571ba5c67f11e025d8d4de5811
@@ -0,0 +1 @@
+0.7853981633974483	1.4056476493802699	-0.7853981633974483
diff --git a/sql/hive/src/test/resources/golden/udf_avg-0-2d715528b290951fb9874f60d7e9b537 b/sql/hive/src/test/resources/golden/udf_avg-0-2d715528b290951fb9874f60d7e9b537
index bd171b0e9a29c..13ba346a52c93 100644
--- a/sql/hive/src/test/resources/golden/udf_avg-0-2d715528b290951fb9874f60d7e9b537
+++ b/sql/hive/src/test/resources/golden/udf_avg-0-2d715528b290951fb9874f60d7e9b537
@@ -1 +1 @@
-avg(x) - Returns the mean of a set of numbers
\ No newline at end of file
+avg(x) - Returns the mean of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_avg-1-c707c56871a903e4e022b3df5c92fc3f b/sql/hive/src/test/resources/golden/udf_avg-1-c707c56871a903e4e022b3df5c92fc3f
index bd171b0e9a29c..13ba346a52c93 100644
--- a/sql/hive/src/test/resources/golden/udf_avg-1-c707c56871a903e4e022b3df5c92fc3f
+++ b/sql/hive/src/test/resources/golden/udf_avg-1-c707c56871a903e4e022b3df5c92fc3f
@@ -1 +1 @@
-avg(x) - Returns the mean of a set of numbers
\ No newline at end of file
+avg(x) - Returns the mean of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_bigint-0-6c5b1e4b9d725caeb786bb18448a7927 b/sql/hive/src/test/resources/golden/udf_bigint-0-6c5b1e4b9d725caeb786bb18448a7927
index 27b6cd42b68cf..c538e81220b1f 100644
--- a/sql/hive/src/test/resources/golden/udf_bigint-0-6c5b1e4b9d725caeb786bb18448a7927
+++ b/sql/hive/src/test/resources/golden/udf_bigint-0-6c5b1e4b9d725caeb786bb18448a7927
@@ -1 +1 @@
-There is no documentation for function 'bigint'
\ No newline at end of file
+There is no documentation for function 'bigint'
diff --git a/sql/hive/src/test/resources/golden/udf_bigint-1-4636e4f0083ea54814995a03b7c81202 b/sql/hive/src/test/resources/golden/udf_bigint-1-4636e4f0083ea54814995a03b7c81202
index 27b6cd42b68cf..c538e81220b1f 100644
--- a/sql/hive/src/test/resources/golden/udf_bigint-1-4636e4f0083ea54814995a03b7c81202
+++ b/sql/hive/src/test/resources/golden/udf_bigint-1-4636e4f0083ea54814995a03b7c81202
@@ -1 +1 @@
-There is no documentation for function 'bigint'
\ No newline at end of file
+There is no documentation for function 'bigint'
diff --git a/sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-1-85c4f90b754cd88147d6b74e17d22063 b/sql/hive/src/test/resources/golden/udf_bin-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_ppr_multi_distinct-1-85c4f90b754cd88147d6b74e17d22063
rename to sql/hive/src/test/resources/golden/udf_bin-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_bin-0-ebbb090f6fa6b322a52bec3ba19dfe5b b/sql/hive/src/test/resources/golden/udf_bin-0-ebbb090f6fa6b322a52bec3ba19dfe5b
deleted file mode 100644
index 4790a2c1f9292..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bin-0-ebbb090f6fa6b322a52bec3ba19dfe5b
+++ /dev/null
@@ -1 +0,0 @@
-bin(n) - returns n in binary
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bin-1-843803a1b6ada107c11397af1a2f55d6 b/sql/hive/src/test/resources/golden/udf_bin-1-843803a1b6ada107c11397af1a2f55d6
deleted file mode 100644
index 4d82712e858e4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bin-1-843803a1b6ada107c11397af1a2f55d6
+++ /dev/null
@@ -1,5 +0,0 @@
-bin(n) - returns n in binary
-n is a BIGINT. Returns NULL if n is NULL.
-Example:
-  > SELECT bin(13) FROM src LIMIT 1
-  '1101'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bin-1-ebbb090f6fa6b322a52bec3ba19dfe5b b/sql/hive/src/test/resources/golden/udf_bin-1-ebbb090f6fa6b322a52bec3ba19dfe5b
new file mode 100644
index 0000000000000..ef60184c54fb5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bin-1-ebbb090f6fa6b322a52bec3ba19dfe5b
@@ -0,0 +1 @@
+bin(n) - returns n in binary
diff --git a/sql/hive/src/test/resources/golden/udf_bin-2-5ee3932ab9cd164f1005a4413a68007b b/sql/hive/src/test/resources/golden/udf_bin-2-5ee3932ab9cd164f1005a4413a68007b
deleted file mode 100644
index 20fae133f0b4d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bin-2-5ee3932ab9cd164f1005a4413a68007b
+++ /dev/null
@@ -1 +0,0 @@
-1	0	101111101011100001101100101
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bin-2-843803a1b6ada107c11397af1a2f55d6 b/sql/hive/src/test/resources/golden/udf_bin-2-843803a1b6ada107c11397af1a2f55d6
new file mode 100644
index 0000000000000..115f93fe0fd3e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bin-2-843803a1b6ada107c11397af1a2f55d6
@@ -0,0 +1,5 @@
+bin(n) - returns n in binary
+n is a BIGINT. Returns NULL if n is NULL.
+Example:
+  > SELECT bin(13) FROM src LIMIT 1
+  '1101'
diff --git a/sql/hive/src/test/resources/golden/udf_bin-3-6fda27c8567ac896538cba3f2b230ab b/sql/hive/src/test/resources/golden/udf_bin-3-6fda27c8567ac896538cba3f2b230ab
new file mode 100644
index 0000000000000..83b020539cb9a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bin-3-6fda27c8567ac896538cba3f2b230ab
@@ -0,0 +1 @@
+1	0	101111101011100001101100101
diff --git a/sql/hive/src/test/resources/golden/udf_bin-3-b72fc578a7c677e15b8598248c81901 b/sql/hive/src/test/resources/golden/udf_bin-3-b72fc578a7c677e15b8598248c81901
deleted file mode 100644
index 76ea1be7f9214..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bin-3-b72fc578a7c677e15b8598248c81901
+++ /dev/null
@@ -1 +0,0 @@
-1111111111111111111111111111111111111111111111111111111111111011
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bin-4-b9bac215e81c8d5c8324b1287542ced3 b/sql/hive/src/test/resources/golden/udf_bin-4-b9bac215e81c8d5c8324b1287542ced3
new file mode 100644
index 0000000000000..1b12d2f6de475
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bin-4-b9bac215e81c8d5c8324b1287542ced3
@@ -0,0 +1 @@
+1111111111111111111111111111111111111111111111111111111111111011
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/udf_bitmap_and-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-0-e39f59c35ebbe686a18d45d9d8bf3ab0
rename to sql/hive/src/test/resources/golden/udf_bitmap_and-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-0-abea2a2780fad42422774174cbfd603d b/sql/hive/src/test/resources/golden/udf_bitmap_and-0-abea2a2780fad42422774174cbfd603d
deleted file mode 100644
index 652a71b48011c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bitmap_and-0-abea2a2780fad42422774174cbfd603d
+++ /dev/null
@@ -1 +0,0 @@
-[13,2,4,8589934592,4096,0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-1-414291f11711df40fb8362e0a0156b25 b/sql/hive/src/test/resources/golden/udf_bitmap_and-1-414291f11711df40fb8362e0a0156b25
deleted file mode 100644
index 6d061bf1198c2..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bitmap_and-1-414291f11711df40fb8362e0a0156b25
+++ /dev/null
@@ -1 +0,0 @@
-[13,1,4,2,0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-1-c9e0d8424ec5f433565397b113ae4f57 b/sql/hive/src/test/resources/golden/udf_bitmap_and-1-c9e0d8424ec5f433565397b113ae4f57
new file mode 100644
index 0000000000000..1211192ad4be3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bitmap_and-1-c9e0d8424ec5f433565397b113ae4f57
@@ -0,0 +1 @@
+[13,2,4,8589934592,4096,0]
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-2-5a60dfc80bef392fa390adddab0c0f87 b/sql/hive/src/test/resources/golden/udf_bitmap_and-2-5a60dfc80bef392fa390adddab0c0f87
new file mode 100644
index 0000000000000..33b9ad33d4f01
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bitmap_and-2-5a60dfc80bef392fa390adddab0c0f87
@@ -0,0 +1 @@
+[13,1,4,2,0]
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-2-d550d017f9eb9176593719ea34eaae9b b/sql/hive/src/test/resources/golden/udf_bitmap_and-2-d550d017f9eb9176593719ea34eaae9b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-3-a486db1f5a06f9623a0e5abd7737b0c6 b/sql/hive/src/test/resources/golden/udf_bitmap_and-3-a486db1f5a06f9623a0e5abd7737b0c6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-29-ee8d287111069805c41c9c0032adc46f b/sql/hive/src/test/resources/golden/udf_bitmap_and-3-d550d017f9eb9176593719ea34eaae9b
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-29-ee8d287111069805c41c9c0032adc46f
rename to sql/hive/src/test/resources/golden/udf_bitmap_and-3-d550d017f9eb9176593719ea34eaae9b
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-4-6320e1c4829024f819d3883df9a782c0 b/sql/hive/src/test/resources/golden/udf_bitmap_and-4-6320e1c4829024f819d3883df9a782c0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-3-14b999fc6dfb10a3632afe14e08003e1 b/sql/hive/src/test/resources/golden/udf_bitmap_and-4-a486db1f5a06f9623a0e5abd7737b0c6
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-3-14b999fc6dfb10a3632afe14e08003e1
rename to sql/hive/src/test/resources/golden/udf_bitmap_and-4-a486db1f5a06f9623a0e5abd7737b0c6
diff --git a/sql/hive/src/test/resources/golden/index_creation-30-f880114c33c99a5f23c1465fd88f0db3 b/sql/hive/src/test/resources/golden/udf_bitmap_and-5-549dbeb1293c4c49ae08bf08acdbdf23
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-30-f880114c33c99a5f23c1465fd88f0db3
rename to sql/hive/src/test/resources/golden/udf_bitmap_and-5-549dbeb1293c4c49ae08bf08acdbdf23
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-5-ff2860a163cbe78d5affac8047199296 b/sql/hive/src/test/resources/golden/udf_bitmap_and-5-ff2860a163cbe78d5affac8047199296
deleted file mode 100644
index 43be09952b09c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bitmap_and-5-ff2860a163cbe78d5affac8047199296
+++ /dev/null
@@ -1,10 +0,0 @@
-[13,1,4,2,0]
-[13,1,4,2,0]
-[13,1,4,2,0]
-[13,1,4,2,0]
-[13,1,4,2,0]
-[13,1,4,2,0]
-[13,1,4,2,0]
-[13,1,4,2,0]
-[13,1,4,2,0]
-[13,1,4,2,0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-6-d550d017f9eb9176593719ea34eaae9b b/sql/hive/src/test/resources/golden/udf_bitmap_and-6-d550d017f9eb9176593719ea34eaae9b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_and-6-ff2860a163cbe78d5affac8047199296 b/sql/hive/src/test/resources/golden/udf_bitmap_and-6-ff2860a163cbe78d5affac8047199296
new file mode 100644
index 0000000000000..0db773930c274
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bitmap_and-6-ff2860a163cbe78d5affac8047199296
@@ -0,0 +1,10 @@
+[13,1,4,2,0]
+[13,1,4,2,0]
+[13,1,4,2,0]
+[13,1,4,2,0]
+[13,1,4,2,0]
+[13,1,4,2,0]
+[13,1,4,2,0]
+[13,1,4,2,0]
+[13,1,4,2,0]
+[13,1,4,2,0]
diff --git a/sql/hive/src/test/resources/golden/index_creation-31-a8bc76559014d9cdf07184208d582d25 b/sql/hive/src/test/resources/golden/udf_bitmap_and-7-d550d017f9eb9176593719ea34eaae9b
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-31-a8bc76559014d9cdf07184208d582d25
rename to sql/hive/src/test/resources/golden/udf_bitmap_and-7-d550d017f9eb9176593719ea34eaae9b
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/udf_bitmap_empty-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-1-ffe97dc8c1df3195982e38263fbe8717
rename to sql/hive/src/test/resources/golden/udf_bitmap_empty-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_empty-0-6c80525a4eab6761596e6ad5aa75bc90 b/sql/hive/src/test/resources/golden/udf_bitmap_empty-0-6c80525a4eab6761596e6ad5aa75bc90
deleted file mode 100644
index f32a5804e292d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bitmap_empty-0-6c80525a4eab6761596e6ad5aa75bc90
+++ /dev/null
@@ -1 +0,0 @@
-true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_empty-1-a03987655a167f5b18c37b643391a0df b/sql/hive/src/test/resources/golden/udf_bitmap_empty-1-a03987655a167f5b18c37b643391a0df
deleted file mode 100644
index 02e4a84d62c4b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bitmap_empty-1-a03987655a167f5b18c37b643391a0df
+++ /dev/null
@@ -1 +0,0 @@
-false
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-8-37229f303635a030f6cab20e0381f51f b/sql/hive/src/test/resources/golden/udf_bitmap_empty-1-a174269b5d1757398ab7f89cf1c97bfa
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-8-37229f303635a030f6cab20e0381f51f
rename to sql/hive/src/test/resources/golden/udf_bitmap_empty-1-a174269b5d1757398ab7f89cf1c97bfa
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-16-6b441df08afdc0c6c4a82670997dabb5 b/sql/hive/src/test/resources/golden/udf_bitmap_empty-2-f5d9880a3278b5632b356bbe6ecd90d3
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-16-6b441df08afdc0c6c4a82670997dabb5
rename to sql/hive/src/test/resources/golden/udf_bitmap_empty-2-f5d9880a3278b5632b356bbe6ecd90d3
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-0-377e873cf3cc23e9234ce9aa7c235d8c b/sql/hive/src/test/resources/golden/udf_bitmap_or-0-377e873cf3cc23e9234ce9aa7c235d8c
deleted file mode 100644
index 652a71b48011c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bitmap_or-0-377e873cf3cc23e9234ce9aa7c235d8c
+++ /dev/null
@@ -1 +0,0 @@
-[13,2,4,8589934592,4096,0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-18-67e864faaff4c6b2a8e1c9fbd188bb66 b/sql/hive/src/test/resources/golden/udf_bitmap_or-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-18-67e864faaff4c6b2a8e1c9fbd188bb66
rename to sql/hive/src/test/resources/golden/udf_bitmap_or-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-1-85cab84fba419b436b033e3ea07e02ef b/sql/hive/src/test/resources/golden/udf_bitmap_or-1-85cab84fba419b436b033e3ea07e02ef
deleted file mode 100644
index 163f4734f4c21..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bitmap_or-1-85cab84fba419b436b033e3ea07e02ef
+++ /dev/null
@@ -1 +0,0 @@
-[13,2,4,8589934592,4224,0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-1-cd510a5926df24d1ddbf8d0cce9d76ef b/sql/hive/src/test/resources/golden/udf_bitmap_or-1-cd510a5926df24d1ddbf8d0cce9d76ef
new file mode 100644
index 0000000000000..1211192ad4be3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bitmap_or-1-cd510a5926df24d1ddbf8d0cce9d76ef
@@ -0,0 +1 @@
+[13,2,4,8589934592,4096,0]
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-2-ab062e08acfd7e552a64ea967a0360c8 b/sql/hive/src/test/resources/golden/udf_bitmap_or-2-ab062e08acfd7e552a64ea967a0360c8
new file mode 100644
index 0000000000000..c1bd6bef82398
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bitmap_or-2-ab062e08acfd7e552a64ea967a0360c8
@@ -0,0 +1 @@
+[13,2,4,8589934592,4224,0]
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-2-d550d017f9eb9176593719ea34eaae9b b/sql/hive/src/test/resources/golden/udf_bitmap_or-2-d550d017f9eb9176593719ea34eaae9b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-3-a486db1f5a06f9623a0e5abd7737b0c6 b/sql/hive/src/test/resources/golden/udf_bitmap_or-3-a486db1f5a06f9623a0e5abd7737b0c6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-32-9d5d11cb38f2e097f16d2db5693f4f1 b/sql/hive/src/test/resources/golden/udf_bitmap_or-3-d550d017f9eb9176593719ea34eaae9b
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-32-9d5d11cb38f2e097f16d2db5693f4f1
rename to sql/hive/src/test/resources/golden/udf_bitmap_or-3-d550d017f9eb9176593719ea34eaae9b
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-4-6320e1c4829024f819d3883df9a782c0 b/sql/hive/src/test/resources/golden/udf_bitmap_or-4-6320e1c4829024f819d3883df9a782c0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-33-14b999fc6dfb10a3632afe14e08003e1 b/sql/hive/src/test/resources/golden/udf_bitmap_or-4-a486db1f5a06f9623a0e5abd7737b0c6
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-33-14b999fc6dfb10a3632afe14e08003e1
rename to sql/hive/src/test/resources/golden/udf_bitmap_or-4-a486db1f5a06f9623a0e5abd7737b0c6
diff --git a/sql/hive/src/test/resources/golden/index_creation-34-c8c5d4c45e59d041dcbbdfc5426e5fa0 b/sql/hive/src/test/resources/golden/udf_bitmap_or-5-549dbeb1293c4c49ae08bf08acdbdf23
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-34-c8c5d4c45e59d041dcbbdfc5426e5fa0
rename to sql/hive/src/test/resources/golden/udf_bitmap_or-5-549dbeb1293c4c49ae08bf08acdbdf23
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-5-ea92fff4d814552b57535ed342a5dde0 b/sql/hive/src/test/resources/golden/udf_bitmap_or-5-ea92fff4d814552b57535ed342a5dde0
deleted file mode 100644
index c977a4ce25c3f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_bitmap_or-5-ea92fff4d814552b57535ed342a5dde0
+++ /dev/null
@@ -1,10 +0,0 @@
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
-[13,2,4,8589934592,4224,0]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-6-d550d017f9eb9176593719ea34eaae9b b/sql/hive/src/test/resources/golden/udf_bitmap_or-6-d550d017f9eb9176593719ea34eaae9b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_bitmap_or-6-ea92fff4d814552b57535ed342a5dde0 b/sql/hive/src/test/resources/golden/udf_bitmap_or-6-ea92fff4d814552b57535ed342a5dde0
new file mode 100644
index 0000000000000..62f4e245e2cdd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_bitmap_or-6-ea92fff4d814552b57535ed342a5dde0
@@ -0,0 +1,10 @@
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
+[13,2,4,8589934592,4224,0]
diff --git a/sql/hive/src/test/resources/golden/index_creation-35-e78eb4d61c0ddb272fd94c5f7a8c0e84 b/sql/hive/src/test/resources/golden/udf_bitmap_or-7-d550d017f9eb9176593719ea34eaae9b
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-35-e78eb4d61c0ddb272fd94c5f7a8c0e84
rename to sql/hive/src/test/resources/golden/udf_bitmap_or-7-d550d017f9eb9176593719ea34eaae9b
diff --git a/sql/hive/src/test/resources/golden/udf_bitwise_and-0-e2374700cd32add926992d5539bd463a b/sql/hive/src/test/resources/golden/udf_bitwise_and-0-e2374700cd32add926992d5539bd463a
index 71cadba7649e8..bb00e0889dc07 100644
--- a/sql/hive/src/test/resources/golden/udf_bitwise_and-0-e2374700cd32add926992d5539bd463a
+++ b/sql/hive/src/test/resources/golden/udf_bitwise_and-0-e2374700cd32add926992d5539bd463a
@@ -1 +1 @@
-a & b - Bitwise and
\ No newline at end of file
+a & b - Bitwise and
diff --git a/sql/hive/src/test/resources/golden/udf_bitwise_and-1-2e63ac31262106160ab043027e356a4b b/sql/hive/src/test/resources/golden/udf_bitwise_and-1-2e63ac31262106160ab043027e356a4b
index ead02cba53aa4..dc67d624d14be 100644
--- a/sql/hive/src/test/resources/golden/udf_bitwise_and-1-2e63ac31262106160ab043027e356a4b
+++ b/sql/hive/src/test/resources/golden/udf_bitwise_and-1-2e63ac31262106160ab043027e356a4b
@@ -1,4 +1,4 @@
 a & b - Bitwise and
 Example:
   > SELECT 3 & 5 FROM src LIMIT 1;
-  1
\ No newline at end of file
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_bitwise_not-0-34abab2f47f90f0f194ef44aed1cdd7f b/sql/hive/src/test/resources/golden/udf_bitwise_not-0-34abab2f47f90f0f194ef44aed1cdd7f
index 8b06e374c708d..d417f76be9940 100644
--- a/sql/hive/src/test/resources/golden/udf_bitwise_not-0-34abab2f47f90f0f194ef44aed1cdd7f
+++ b/sql/hive/src/test/resources/golden/udf_bitwise_not-0-34abab2f47f90f0f194ef44aed1cdd7f
@@ -1 +1 @@
-~ n - Bitwise not
\ No newline at end of file
+~ n - Bitwise not
diff --git a/sql/hive/src/test/resources/golden/udf_bitwise_not-1-ccc0c59ea3e29b6d55e1edee7029155d b/sql/hive/src/test/resources/golden/udf_bitwise_not-1-ccc0c59ea3e29b6d55e1edee7029155d
index 413ca8a653e1f..db249e9032680 100644
--- a/sql/hive/src/test/resources/golden/udf_bitwise_not-1-ccc0c59ea3e29b6d55e1edee7029155d
+++ b/sql/hive/src/test/resources/golden/udf_bitwise_not-1-ccc0c59ea3e29b6d55e1edee7029155d
@@ -1,4 +1,4 @@
 ~ n - Bitwise not
 Example:
   > SELECT ~ 0 FROM src LIMIT 1;
-  -1
\ No newline at end of file
+  -1
diff --git a/sql/hive/src/test/resources/golden/udf_bitwise_or-0-bf5bd0beebdd89b3fbbbbb8195a9bbe6 b/sql/hive/src/test/resources/golden/udf_bitwise_or-0-bf5bd0beebdd89b3fbbbbb8195a9bbe6
index bdbd28cad97c4..32343b4afe8bb 100644
--- a/sql/hive/src/test/resources/golden/udf_bitwise_or-0-bf5bd0beebdd89b3fbbbbb8195a9bbe6
+++ b/sql/hive/src/test/resources/golden/udf_bitwise_or-0-bf5bd0beebdd89b3fbbbbb8195a9bbe6
@@ -1 +1 @@
-a | b - Bitwise or
\ No newline at end of file
+a | b - Bitwise or
diff --git a/sql/hive/src/test/resources/golden/udf_bitwise_or-1-272722c23fece2807c08191d3969c3bb b/sql/hive/src/test/resources/golden/udf_bitwise_or-1-272722c23fece2807c08191d3969c3bb
index 4c116be2acae3..8ced93b4f8e88 100644
--- a/sql/hive/src/test/resources/golden/udf_bitwise_or-1-272722c23fece2807c08191d3969c3bb
+++ b/sql/hive/src/test/resources/golden/udf_bitwise_or-1-272722c23fece2807c08191d3969c3bb
@@ -1,4 +1,4 @@
 a | b - Bitwise or
 Example:
   > SELECT 3 | 5 FROM src LIMIT 1;
-  7
\ No newline at end of file
+  7
diff --git a/sql/hive/src/test/resources/golden/udf_bitwise_xor-0-6b05142d0b7e444a3e52a11b59d60a16 b/sql/hive/src/test/resources/golden/udf_bitwise_xor-0-6b05142d0b7e444a3e52a11b59d60a16
index b483f75f1b956..778fda3969951 100644
--- a/sql/hive/src/test/resources/golden/udf_bitwise_xor-0-6b05142d0b7e444a3e52a11b59d60a16
+++ b/sql/hive/src/test/resources/golden/udf_bitwise_xor-0-6b05142d0b7e444a3e52a11b59d60a16
@@ -1 +1 @@
-a ^ b - Bitwise exclusive or
\ No newline at end of file
+a ^ b - Bitwise exclusive or
diff --git a/sql/hive/src/test/resources/golden/udf_bitwise_xor-1-8fc9b9bf1aced42754419310784e0a9f b/sql/hive/src/test/resources/golden/udf_bitwise_xor-1-8fc9b9bf1aced42754419310784e0a9f
index 03bf261d7cf51..6f1ab55c41ce7 100644
--- a/sql/hive/src/test/resources/golden/udf_bitwise_xor-1-8fc9b9bf1aced42754419310784e0a9f
+++ b/sql/hive/src/test/resources/golden/udf_bitwise_xor-1-8fc9b9bf1aced42754419310784e0a9f
@@ -1,4 +1,4 @@
 a ^ b - Bitwise exclusive or
 Example:
   > SELECT 3 ^ 5 FROM src LIMIT 1;
-  2
\ No newline at end of file
+  2
diff --git a/sql/hive/src/test/resources/golden/udf_boolean-0-a68fc036f5f5b7f9c85d71bb9627c749 b/sql/hive/src/test/resources/golden/udf_boolean-0-a68fc036f5f5b7f9c85d71bb9627c749
index 8134698dfbdcd..495b36e68ecde 100644
--- a/sql/hive/src/test/resources/golden/udf_boolean-0-a68fc036f5f5b7f9c85d71bb9627c749
+++ b/sql/hive/src/test/resources/golden/udf_boolean-0-a68fc036f5f5b7f9c85d71bb9627c749
@@ -1 +1 @@
-There is no documentation for function 'boolean'
\ No newline at end of file
+There is no documentation for function 'boolean'
diff --git a/sql/hive/src/test/resources/golden/udf_boolean-1-23178b473a9d25d6be0abe378d44fb0e b/sql/hive/src/test/resources/golden/udf_boolean-1-23178b473a9d25d6be0abe378d44fb0e
index 8134698dfbdcd..495b36e68ecde 100644
--- a/sql/hive/src/test/resources/golden/udf_boolean-1-23178b473a9d25d6be0abe378d44fb0e
+++ b/sql/hive/src/test/resources/golden/udf_boolean-1-23178b473a9d25d6be0abe378d44fb0e
@@ -1 +1 @@
-There is no documentation for function 'boolean'
\ No newline at end of file
+There is no documentation for function 'boolean'
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/udf_case-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-2-7cccbdffc32975f8935eeba14a28147
rename to sql/hive/src/test/resources/golden/udf_case-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_case-0-5bcbe4c0e0a75462160814a10b9449f4 b/sql/hive/src/test/resources/golden/udf_case-0-5bcbe4c0e0a75462160814a10b9449f4
deleted file mode 100644
index 645bd7302810f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_case-0-5bcbe4c0e0a75462160814a10b9449f4
+++ /dev/null
@@ -1 +0,0 @@
-There is no documentation for function 'case'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_case-1-54acf006155d8822a50e47729be24004 b/sql/hive/src/test/resources/golden/udf_case-1-54acf006155d8822a50e47729be24004
deleted file mode 100644
index 645bd7302810f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_case-1-54acf006155d8822a50e47729be24004
+++ /dev/null
@@ -1 +0,0 @@
-There is no documentation for function 'case'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_case-1-5bcbe4c0e0a75462160814a10b9449f4 b/sql/hive/src/test/resources/golden/udf_case-1-5bcbe4c0e0a75462160814a10b9449f4
new file mode 100644
index 0000000000000..e5bc5948a0894
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_case-1-5bcbe4c0e0a75462160814a10b9449f4
@@ -0,0 +1 @@
+There is no documentation for function 'case'
diff --git a/sql/hive/src/test/resources/golden/udf_case-2-54acf006155d8822a50e47729be24004 b/sql/hive/src/test/resources/golden/udf_case-2-54acf006155d8822a50e47729be24004
new file mode 100644
index 0000000000000..e5bc5948a0894
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_case-2-54acf006155d8822a50e47729be24004
@@ -0,0 +1 @@
+There is no documentation for function 'case'
diff --git a/sql/hive/src/test/resources/golden/udf_case-2-98ee676f92950375917f09d2e492253f b/sql/hive/src/test/resources/golden/udf_case-2-98ee676f92950375917f09d2e492253f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-36-21bcf37075b02097f16c8fc8130a83b8 b/sql/hive/src/test/resources/golden/udf_case-3-48789112d79aeb450d9f49184fc20e1c
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-36-21bcf37075b02097f16c8fc8130a83b8
rename to sql/hive/src/test/resources/golden/udf_case-3-48789112d79aeb450d9f49184fc20e1c
diff --git a/sql/hive/src/test/resources/golden/udf_case-3-ec7343402fd77807842a0eaf2497a47c b/sql/hive/src/test/resources/golden/udf_case-3-ec7343402fd77807842a0eaf2497a47c
deleted file mode 100644
index 1ca086d1dca4d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_case-3-ec7343402fd77807842a0eaf2497a47c
+++ /dev/null
@@ -1 +0,0 @@
-2	5	15	NULL	20	24
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_case-4-3b3e7ab775f45d24f39e281b0d2f8d76 b/sql/hive/src/test/resources/golden/udf_case-4-3b3e7ab775f45d24f39e281b0d2f8d76
deleted file mode 100644
index b920295f69a53..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_case-4-3b3e7ab775f45d24f39e281b0d2f8d76
+++ /dev/null
@@ -1 +0,0 @@
-yo
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_case-4-d39ed6ecd256fa99657f13709cb1c6e3 b/sql/hive/src/test/resources/golden/udf_case-4-d39ed6ecd256fa99657f13709cb1c6e3
new file mode 100644
index 0000000000000..a7f5e19894027
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_case-4-d39ed6ecd256fa99657f13709cb1c6e3
@@ -0,0 +1 @@
+2	5	15	NULL	20	24
diff --git a/sql/hive/src/test/resources/golden/udf_case-5-f53c9bb8a2d20ef7ff1fc7b3403270eb b/sql/hive/src/test/resources/golden/udf_case-5-f53c9bb8a2d20ef7ff1fc7b3403270eb
new file mode 100644
index 0000000000000..092bfb9bdf74d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_case-5-f53c9bb8a2d20ef7ff1fc7b3403270eb
@@ -0,0 +1 @@
+yo
diff --git a/sql/hive/src/test/resources/golden/udf_case-6-ff583116ba2edd78202349faf1e757dc b/sql/hive/src/test/resources/golden/udf_case-6-ff583116ba2edd78202349faf1e757dc
new file mode 100644
index 0000000000000..db46b5a72aa80
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_case-6-ff583116ba2edd78202349faf1e757dc
@@ -0,0 +1 @@
+123	123.0	abcd
diff --git a/sql/hive/src/test/resources/golden/udf_case_column_pruning-0-dd2d7a075df235f17c26bac8713e939c b/sql/hive/src/test/resources/golden/udf_case_column_pruning-0-dd2d7a075df235f17c26bac8713e939c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_case_thrift-0-e3c6c400a6588fd7b52f9540fd621cd8 b/sql/hive/src/test/resources/golden/udf_case_thrift-0-e3c6c400a6588fd7b52f9540fd621cd8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_case_thrift-1-406cf6f97996bd921cf22e1ddd2d9f96 b/sql/hive/src/test/resources/golden/udf_case_thrift-1-406cf6f97996bd921cf22e1ddd2d9f96
deleted file mode 100644
index d97a0ef52b2e8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_case_thrift-1-406cf6f97996bd921cf22e1ddd2d9f96
+++ /dev/null
@@ -1,3 +0,0 @@
-1	zero	0
-3	10 is ten	NULL
-100	default	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_ceil-0-4b40e67b8ca75729ab07df966d814e06 b/sql/hive/src/test/resources/golden/udf_ceil-0-4b40e67b8ca75729ab07df966d814e06
index 4854a3c7773ac..badaea42f1d2c 100644
--- a/sql/hive/src/test/resources/golden/udf_ceil-0-4b40e67b8ca75729ab07df966d814e06
+++ b/sql/hive/src/test/resources/golden/udf_ceil-0-4b40e67b8ca75729ab07df966d814e06
@@ -1 +1 @@
-ceil(x) - Find the smallest integer not smaller than x
\ No newline at end of file
+ceil(x) - Find the smallest integer not smaller than x
diff --git a/sql/hive/src/test/resources/golden/udf_ceil-1-f410065d893a263f375fcf74072877bb b/sql/hive/src/test/resources/golden/udf_ceil-1-f410065d893a263f375fcf74072877bb
index 23685df97892d..7d608c5ebd2a3 100644
--- a/sql/hive/src/test/resources/golden/udf_ceil-1-f410065d893a263f375fcf74072877bb
+++ b/sql/hive/src/test/resources/golden/udf_ceil-1-f410065d893a263f375fcf74072877bb
@@ -4,4 +4,4 @@ Example:
   > SELECT ceil(-0.1) FROM src LIMIT 1;
   0
   > SELECT ceil(5) FROM src LIMIT 1;
-  5
\ No newline at end of file
+  5
diff --git a/sql/hive/src/test/resources/golden/udf_ceiling-0-d5685e38400e68341e36a59671dcbdfd b/sql/hive/src/test/resources/golden/udf_ceiling-0-d5685e38400e68341e36a59671dcbdfd
index bf61b7e89d8b3..31d18aaefac89 100644
--- a/sql/hive/src/test/resources/golden/udf_ceiling-0-d5685e38400e68341e36a59671dcbdfd
+++ b/sql/hive/src/test/resources/golden/udf_ceiling-0-d5685e38400e68341e36a59671dcbdfd
@@ -1 +1 @@
-ceiling(x) - Find the smallest integer not smaller than x
\ No newline at end of file
+ceiling(x) - Find the smallest integer not smaller than x
diff --git a/sql/hive/src/test/resources/golden/udf_ceiling-1-69b4ba577d2f0f18befdaa1ee7a858df b/sql/hive/src/test/resources/golden/udf_ceiling-1-69b4ba577d2f0f18befdaa1ee7a858df
index 79a06f0ee2a5d..564d0566997d4 100644
--- a/sql/hive/src/test/resources/golden/udf_ceiling-1-69b4ba577d2f0f18befdaa1ee7a858df
+++ b/sql/hive/src/test/resources/golden/udf_ceiling-1-69b4ba577d2f0f18befdaa1ee7a858df
@@ -4,4 +4,4 @@ Example:
   > SELECT ceiling(-0.1) FROM src LIMIT 1;
   0
   > SELECT ceiling(5) FROM src LIMIT 1;
-  5
\ No newline at end of file
+  5
diff --git a/sql/hive/src/test/resources/golden/udf_coalesce-0-8d1c97c292c51447e58606f2cefa87c1 b/sql/hive/src/test/resources/golden/udf_coalesce-0-8d1c97c292c51447e58606f2cefa87c1
deleted file mode 100644
index 66313c54cc684..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_coalesce-0-8d1c97c292c51447e58606f2cefa87c1
+++ /dev/null
@@ -1 +0,0 @@
-coalesce(a1, a2, ...) - Returns the first non-null argument
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_coalesce-1-e2c086f5148f10264c09ecbd7284c809 b/sql/hive/src/test/resources/golden/udf_coalesce-1-e2c086f5148f10264c09ecbd7284c809
deleted file mode 100644
index 04d69977c3df4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_coalesce-1-e2c086f5148f10264c09ecbd7284c809
+++ /dev/null
@@ -1,4 +0,0 @@
-coalesce(a1, a2, ...) - Returns the first non-null argument
-Example:
-  > SELECT coalesce(NULL, 1, NULL) FROM src LIMIT 1;
-  1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_coalesce-2-bd78a25868e5598ea275e0be5e4c716 b/sql/hive/src/test/resources/golden/udf_coalesce-2-bd78a25868e5598ea275e0be5e4c716
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_coalesce-3-badb02b0dfa13d6a1519e6198bb511d2 b/sql/hive/src/test/resources/golden/udf_coalesce-3-badb02b0dfa13d6a1519e6198bb511d2
deleted file mode 100644
index cd5686d89eba4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_coalesce-3-badb02b0dfa13d6a1519e6198bb511d2
+++ /dev/null
@@ -1 +0,0 @@
-1	1	2	1	3	4	1	1	2	1	3	4	1.0	1.0	2.0	2.0	2.0	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_coalesce-4-83f323874d7941c463ced2aee6cc5157 b/sql/hive/src/test/resources/golden/udf_coalesce-4-83f323874d7941c463ced2aee6cc5157
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_coalesce-5-4bcad31a47f4bfc3baef75b65baa8082 b/sql/hive/src/test/resources/golden/udf_coalesce-5-4bcad31a47f4bfc3baef75b65baa8082
deleted file mode 100644
index 98339f97ef2f8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_coalesce-5-4bcad31a47f4bfc3baef75b65baa8082
+++ /dev/null
@@ -1,11 +0,0 @@
-0	0	999
-2	1	999
-4	8	value_2
-6	27	999
-8	64	999
-10	125	999
-12	216	999
-14	343	999
-16	512	999
-18	729	999
-999	999	999
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_compare_java_string-0-32b16ab99287db115e8de5214ac24b77 b/sql/hive/src/test/resources/golden/udf_compare_java_string-0-32b16ab99287db115e8de5214ac24b77
deleted file mode 100644
index 59d3f01051903..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_compare_java_string-0-32b16ab99287db115e8de5214ac24b77
+++ /dev/null
@@ -1,10 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATEFUNCTION test_udf_get_java_string 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestGetJavaString')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-
-
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-3-fffea659b633b1f269b38556a7f54634 b/sql/hive/src/test/resources/golden/udf_concat-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-3-fffea659b633b1f269b38556a7f54634
rename to sql/hive/src/test/resources/golden/udf_concat-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_concat-0-7bc53505a4e6587132870d8d0a704d2 b/sql/hive/src/test/resources/golden/udf_concat-0-7bc53505a4e6587132870d8d0a704d2
deleted file mode 100644
index a3998eeeee623..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_concat-0-7bc53505a4e6587132870d8d0a704d2
+++ /dev/null
@@ -1 +0,0 @@
-concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data  bin1, bin2, ... binN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_concat-1-765c520f239cdff1ea3f8d22ba83e031 b/sql/hive/src/test/resources/golden/udf_concat-1-765c520f239cdff1ea3f8d22ba83e031
deleted file mode 100644
index 0afd542a05033..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_concat-1-765c520f239cdff1ea3f8d22ba83e031
+++ /dev/null
@@ -1,5 +0,0 @@
-concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data  bin1, bin2, ... binN
-Returns NULL if any argument is NULL.
-Example:
-  > SELECT concat('abc', 'def') FROM src LIMIT 1;
-  'abcdef'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_concat-1-7bc53505a4e6587132870d8d0a704d2 b/sql/hive/src/test/resources/golden/udf_concat-1-7bc53505a4e6587132870d8d0a704d2
new file mode 100644
index 0000000000000..b576089faa484
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat-1-7bc53505a4e6587132870d8d0a704d2
@@ -0,0 +1 @@
+concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data  bin1, bin2, ... binN
diff --git a/sql/hive/src/test/resources/golden/udf_concat-2-2c4f33a0b709a7d00c3083e8aa5fc0d5 b/sql/hive/src/test/resources/golden/udf_concat-2-2c4f33a0b709a7d00c3083e8aa5fc0d5
deleted file mode 100644
index 69af4e63792dc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_concat-2-2c4f33a0b709a7d00c3083e8aa5fc0d5
+++ /dev/null
@@ -1 +0,0 @@
-ab	abc	NULL	NULL	a	NULL	123a	12	1	1234abcextra argument
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_concat-2-765c520f239cdff1ea3f8d22ba83e031 b/sql/hive/src/test/resources/golden/udf_concat-2-765c520f239cdff1ea3f8d22ba83e031
new file mode 100644
index 0000000000000..e303d18a08100
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat-2-765c520f239cdff1ea3f8d22ba83e031
@@ -0,0 +1,5 @@
+concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data  bin1, bin2, ... binN
+Returns NULL if any argument is NULL.
+Example:
+  > SELECT concat('abc', 'def') FROM src LIMIT 1;
+  'abcdef'
diff --git a/sql/hive/src/test/resources/golden/udf_concat-3-a38183c2685e912befe6246f1b6f93b8 b/sql/hive/src/test/resources/golden/udf_concat-3-a38183c2685e912befe6246f1b6f93b8
new file mode 100644
index 0000000000000..21f03c7abb9b9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat-3-a38183c2685e912befe6246f1b6f93b8
@@ -0,0 +1 @@
+ab	abc	NULL	NULL	a	NULL	123a	12	1	1234abcextra argument
diff --git a/sql/hive/src/test/resources/golden/udf_concat-4-a20ebbc181e5ee4a1c22ddafd212ddde b/sql/hive/src/test/resources/golden/udf_concat-4-a20ebbc181e5ee4a1c22ddafd212ddde
new file mode 100644
index 0000000000000..09b34f2aa1b69
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat-4-a20ebbc181e5ee4a1c22ddafd212ddde
@@ -0,0 +1 @@
+abcd	abcd
diff --git a/sql/hive/src/test/resources/golden/udf_concat_insert1-2-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/udf_concat_insert1-2-adc1ec67836b26b60d8547c4996bfd8f
index aecd5d9f2a5c2..f4cd225e4b2b7 100644
--- a/sql/hive/src/test/resources/golden/udf_concat_insert1-2-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/udf_concat_insert1-2-adc1ec67836b26b60d8547c4996bfd8f
@@ -54,4 +54,4 @@
 1234	95
 1234	96
 1234	97
-1234	98
\ No newline at end of file
+1234	98
diff --git a/sql/hive/src/test/resources/golden/udf_concat_insert2-2-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/udf_concat_insert2-2-adc1ec67836b26b60d8547c4996bfd8f
index 455cef3f57162..fcdbdaa75fd52 100644
--- a/sql/hive/src/test/resources/golden/udf_concat_insert2-2-adc1ec67836b26b60d8547c4996bfd8f
+++ b/sql/hive/src/test/resources/golden/udf_concat_insert2-2-adc1ec67836b26b60d8547c4996bfd8f
@@ -81,4 +81,4 @@
 1234abcextra argument	val_28
 1234abcextra argument	val_37
 1234abcextra argument	val_90
-1234abcextra argument	val_97
\ No newline at end of file
+1234abcextra argument	val_97
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-0-4c8bd7794a527e544c838408313eeaa8 b/sql/hive/src/test/resources/golden/udf_concat_ws-0-4c8bd7794a527e544c838408313eeaa8
deleted file mode 100644
index c901f70500cba..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_concat_ws-0-4c8bd7794a527e544c838408313eeaa8
+++ /dev/null
@@ -1 +0,0 @@
-concat_ws(separator, [string | array(string)]+) - returns the concatenation of the strings separated by the separator.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input16_cc-0-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/udf_concat_ws-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/input16_cc-0-43d53504df013e6b35f81811138a167a
rename to sql/hive/src/test/resources/golden/udf_concat_ws-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-1-4c8bd7794a527e544c838408313eeaa8 b/sql/hive/src/test/resources/golden/udf_concat_ws-1-4c8bd7794a527e544c838408313eeaa8
new file mode 100644
index 0000000000000..14d190e03b755
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat_ws-1-4c8bd7794a527e544c838408313eeaa8
@@ -0,0 +1 @@
+concat_ws(separator, [string | array(string)]+) - returns the concatenation of the strings separated by the separator.
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-1-b8b80f7e9bf4348367444c73020b3cab b/sql/hive/src/test/resources/golden/udf_concat_ws-1-b8b80f7e9bf4348367444c73020b3cab
deleted file mode 100644
index 3e751463e055d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_concat_ws-1-b8b80f7e9bf4348367444c73020b3cab
+++ /dev/null
@@ -1,4 +0,0 @@
-concat_ws(separator, [string | array(string)]+) - returns the concatenation of the strings separated by the separator.
-Example:
-  > SELECT concat_ws('.', 'www', array('facebook', 'com')) FROM src LIMIT 1;
-  'www.facebook.com'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-2-b8b80f7e9bf4348367444c73020b3cab b/sql/hive/src/test/resources/golden/udf_concat_ws-2-b8b80f7e9bf4348367444c73020b3cab
new file mode 100644
index 0000000000000..f3be71e54421c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat_ws-2-b8b80f7e9bf4348367444c73020b3cab
@@ -0,0 +1,4 @@
+concat_ws(separator, [string | array(string)]+) - returns the concatenation of the strings separated by the separator.
+Example:
+  > SELECT concat_ws('.', 'www', array('facebook', 'com')) FROM src LIMIT 1;
+  'www.facebook.com'
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-2-ce7c8205cdc107e1fb865d7d48b84a3c b/sql/hive/src/test/resources/golden/udf_concat_ws-2-ce7c8205cdc107e1fb865d7d48b84a3c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-3-b13a1f7f63d2a54efa331c82bd635d63 b/sql/hive/src/test/resources/golden/udf_concat_ws-3-b13a1f7f63d2a54efa331c82bd635d63
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-37-9334418431eca405f13206bd8db42a1b b/sql/hive/src/test/resources/golden/udf_concat_ws-3-ce7c8205cdc107e1fb865d7d48b84a3c
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-37-9334418431eca405f13206bd8db42a1b
rename to sql/hive/src/test/resources/golden/udf_concat_ws-3-ce7c8205cdc107e1fb865d7d48b84a3c
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-4-a507af4996b13433b0ae100fcb32358f b/sql/hive/src/test/resources/golden/udf_concat_ws-4-a507af4996b13433b0ae100fcb32358f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-38-f1f56119aede4f42221a68f6aaa42a26 b/sql/hive/src/test/resources/golden/udf_concat_ws-4-b13a1f7f63d2a54efa331c82bd635d63
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-38-f1f56119aede4f42221a68f6aaa42a26
rename to sql/hive/src/test/resources/golden/udf_concat_ws-4-b13a1f7f63d2a54efa331c82bd635d63
diff --git a/sql/hive/src/test/resources/golden/index_creation-4-c8c5d4c45e59d041dcbbdfc5426e5fa0 b/sql/hive/src/test/resources/golden/udf_concat_ws-5-a507af4996b13433b0ae100fcb32358f
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-4-c8c5d4c45e59d041dcbbdfc5426e5fa0
rename to sql/hive/src/test/resources/golden/udf_concat_ws-5-a507af4996b13433b0ae100fcb32358f
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-5-ca4f051369183cae36fc9a7bec6a9341 b/sql/hive/src/test/resources/golden/udf_concat_ws-5-ca4f051369183cae36fc9a7bec6a9341
deleted file mode 100644
index 7896fd787f3a2..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_concat_ws-5-ca4f051369183cae36fc9a7bec6a9341
+++ /dev/null
@@ -1 +0,0 @@
-xyzabc8675309	abc,xyz,8675309	NULL	abc**8675309
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-6-98276439c0605401ff89c6a5ae54be09 b/sql/hive/src/test/resources/golden/udf_concat_ws-6-98276439c0605401ff89c6a5ae54be09
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-6-ca4f051369183cae36fc9a7bec6a9341 b/sql/hive/src/test/resources/golden/udf_concat_ws-6-ca4f051369183cae36fc9a7bec6a9341
new file mode 100644
index 0000000000000..720cafd9370a7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat_ws-6-ca4f051369183cae36fc9a7bec6a9341
@@ -0,0 +1 @@
+xyzabc8675309	abc,xyz,8675309	NULL	abc**8675309
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-7-8f08128276e7e50eeb5a6932c763126c b/sql/hive/src/test/resources/golden/udf_concat_ws-7-8f08128276e7e50eeb5a6932c763126c
deleted file mode 100644
index a0ec688a3b084..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_concat_ws-7-8f08128276e7e50eeb5a6932c763126c
+++ /dev/null
@@ -1 +0,0 @@
-www.face.book.com.1234	www-face-book-com-1234	wwwFfaceFbookFcomF1234	www_face_book_com_1234	www**face**book**com**1234	www[]face[]book[]com[]1234	wwwAAAfaceAAAbookAAAcomAAA1234
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-5-e78eb4d61c0ddb272fd94c5f7a8c0e84 b/sql/hive/src/test/resources/golden/udf_concat_ws-7-97071809ba7701b78e3729996f14b591
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-5-e78eb4d61c0ddb272fd94c5f7a8c0e84
rename to sql/hive/src/test/resources/golden/udf_concat_ws-7-97071809ba7701b78e3729996f14b591
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-8-3bfc563ebf7e2cdb811766a54d84f224 b/sql/hive/src/test/resources/golden/udf_concat_ws-8-3bfc563ebf7e2cdb811766a54d84f224
new file mode 100644
index 0000000000000..93b36d28322c3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat_ws-8-3bfc563ebf7e2cdb811766a54d84f224
@@ -0,0 +1 @@
+www.face.book.com.1234	www-face-book-com-1234	wwwFfaceFbookFcomF1234	www_face_book_com_1234	www**face**book**com**1234	www[]face[]book[]com[]1234	wwwAAAfaceAAAbookAAAcomAAA1234
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-8-7c9629cc90e72046b61c0b83ebc7bab4 b/sql/hive/src/test/resources/golden/udf_concat_ws-8-7c9629cc90e72046b61c0b83ebc7bab4
deleted file mode 100644
index 0c9f2d12ba117..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_concat_ws-8-7c9629cc90e72046b61c0b83ebc7bab4
+++ /dev/null
@@ -1 +0,0 @@
-NULL	NULL	NULL	NULL	NULL	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_concat_ws-9-8f60d81b410f4825809aa510806f2df2 b/sql/hive/src/test/resources/golden/udf_concat_ws-9-8f60d81b410f4825809aa510806f2df2
new file mode 100644
index 0000000000000..edb4b1f84001b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_concat_ws-9-8f60d81b410f4825809aa510806f2df2
@@ -0,0 +1 @@
+NULL	NULL	NULL	NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/input31-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/udf_conv-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/input31-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/udf_conv-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_conv-0-d552befca345f3396464529cfde9f75a b/sql/hive/src/test/resources/golden/udf_conv-0-d552befca345f3396464529cfde9f75a
deleted file mode 100644
index b8fbe88a19971..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-0-d552befca345f3396464529cfde9f75a
+++ /dev/null
@@ -1 +0,0 @@
-conv(num, from_base, to_base) - convert num from from_base to to_base
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-1-5e5904af10b5d23f20ae28dc4b9a49ab b/sql/hive/src/test/resources/golden/udf_conv-1-5e5904af10b5d23f20ae28dc4b9a49ab
deleted file mode 100644
index 4f4b0c594c459..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-1-5e5904af10b5d23f20ae28dc4b9a49ab
+++ /dev/null
@@ -1,7 +0,0 @@
-conv(num, from_base, to_base) - convert num from from_base to to_base
-If to_base is negative, treat num as a signed integer,otherwise, treat it as an unsigned integer.
-Example:
-  > SELECT conv('100', 2, 10) FROM src LIMIT 1;
-  '4'
-  > SELECT conv(-10, 16, -10) FROM src LIMIT 1;
-  '16'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-1-d552befca345f3396464529cfde9f75a b/sql/hive/src/test/resources/golden/udf_conv-1-d552befca345f3396464529cfde9f75a
new file mode 100644
index 0000000000000..0753228c31bd6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-1-d552befca345f3396464529cfde9f75a
@@ -0,0 +1 @@
+conv(num, from_base, to_base) - convert num from from_base to to_base
diff --git a/sql/hive/src/test/resources/golden/udf_conv-10-5d38e8d3f2d4c89d57d916c3a5891a52 b/sql/hive/src/test/resources/golden/udf_conv-10-5d38e8d3f2d4c89d57d916c3a5891a52
deleted file mode 100644
index 9512cc4241554..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-10-5d38e8d3f2d4c89d57d916c3a5891a52
+++ /dev/null
@@ -1,3 +0,0 @@
-EE	568
-56	134
-137	785
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-10-f9ea15984e84250494e81e25d6a401c0 b/sql/hive/src/test/resources/golden/udf_conv-10-f9ea15984e84250494e81e25d6a401c0
new file mode 100644
index 0000000000000..191900972dd95
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-10-f9ea15984e84250494e81e25d6a401c0
@@ -0,0 +1 @@
+5	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_conv-11-2ce9111b47ed100bebc8d08de54efd1f b/sql/hive/src/test/resources/golden/udf_conv-11-2ce9111b47ed100bebc8d08de54efd1f
new file mode 100644
index 0000000000000..ad3fa0267fa6b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-11-2ce9111b47ed100bebc8d08de54efd1f
@@ -0,0 +1,3 @@
+EE	568
+56	134
+137	785
diff --git a/sql/hive/src/test/resources/golden/udf_conv-2-5e5904af10b5d23f20ae28dc4b9a49ab b/sql/hive/src/test/resources/golden/udf_conv-2-5e5904af10b5d23f20ae28dc4b9a49ab
new file mode 100644
index 0000000000000..2d66703d6f3f0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-2-5e5904af10b5d23f20ae28dc4b9a49ab
@@ -0,0 +1,7 @@
+conv(num, from_base, to_base) - convert num from from_base to to_base
+If to_base is negative, treat num as a signed integer,otherwise, treat it as an unsigned integer.
+Example:
+  > SELECT conv('100', 2, 10) FROM src LIMIT 1;
+  '4'
+  > SELECT conv(-10, 16, -10) FROM src LIMIT 1;
+  '16'
diff --git a/sql/hive/src/test/resources/golden/udf_conv-2-6d61a2118b54843716aef87fe539b595 b/sql/hive/src/test/resources/golden/udf_conv-2-6d61a2118b54843716aef87fe539b595
deleted file mode 100644
index 91e56a11fa628..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-2-6d61a2118b54843716aef87fe539b595
+++ /dev/null
@@ -1 +0,0 @@
-3HL	22	33	116ED2B2FB4
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-3-5f43d0bec0421c86c49e2546c5ee923a b/sql/hive/src/test/resources/golden/udf_conv-3-5f43d0bec0421c86c49e2546c5ee923a
new file mode 100644
index 0000000000000..4563fcc478648
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-3-5f43d0bec0421c86c49e2546c5ee923a
@@ -0,0 +1 @@
+3HL	22	33	116ED2B2FB4
diff --git a/sql/hive/src/test/resources/golden/udf_conv-3-97161f7a60851d445b23c4ebe4095a1d b/sql/hive/src/test/resources/golden/udf_conv-3-97161f7a60851d445b23c4ebe4095a1d
deleted file mode 100644
index 08f70d742b8e4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-3-97161f7a60851d445b23c4ebe4095a1d
+++ /dev/null
@@ -1 +0,0 @@
--641	B	FFFFFFFFFFFFFFFF	FFFFFFFFFFFFFFF1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-4-568e843076f358c404a8634b18541c55 b/sql/hive/src/test/resources/golden/udf_conv-4-568e843076f358c404a8634b18541c55
deleted file mode 100644
index a2a44daa718cc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-4-568e843076f358c404a8634b18541c55
+++ /dev/null
@@ -1 +0,0 @@
-FFFFFFFFFFFFFFFF	-1	FFFFFFFFFFFFFFFF	-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-4-5df8d45902a3537e67545e629a96328a b/sql/hive/src/test/resources/golden/udf_conv-4-5df8d45902a3537e67545e629a96328a
new file mode 100644
index 0000000000000..632c3705a0493
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-4-5df8d45902a3537e67545e629a96328a
@@ -0,0 +1 @@
+-641	B	FFFFFFFFFFFFFFFF	FFFFFFFFFFFFFFF1
diff --git a/sql/hive/src/test/resources/golden/udf_conv-5-3f23d98799b825a2e9594066f973d183 b/sql/hive/src/test/resources/golden/udf_conv-5-3f23d98799b825a2e9594066f973d183
deleted file mode 100644
index 6948fe3783119..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-5-3f23d98799b825a2e9594066f973d183
+++ /dev/null
@@ -1 +0,0 @@
-5	NULL	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-5-8cdbb45b8c44fa97456da5bc4a43f459 b/sql/hive/src/test/resources/golden/udf_conv-5-8cdbb45b8c44fa97456da5bc4a43f459
new file mode 100644
index 0000000000000..3a7a2ec34f909
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-5-8cdbb45b8c44fa97456da5bc4a43f459
@@ -0,0 +1 @@
+FFFFFFFFFFFFFFFF	-1	FFFFFFFFFFFFFFFF	-1
diff --git a/sql/hive/src/test/resources/golden/udf_conv-6-4981b5b92d87fd1000fa0ac26fa6163a b/sql/hive/src/test/resources/golden/udf_conv-6-4981b5b92d87fd1000fa0ac26fa6163a
deleted file mode 100644
index e1021e50fdcbb..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-6-4981b5b92d87fd1000fa0ac26fa6163a
+++ /dev/null
@@ -1 +0,0 @@
-3HL	22	33
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-6-e5430adfa782ea9094e570d339626c0f b/sql/hive/src/test/resources/golden/udf_conv-6-e5430adfa782ea9094e570d339626c0f
new file mode 100644
index 0000000000000..191900972dd95
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-6-e5430adfa782ea9094e570d339626c0f
@@ -0,0 +1 @@
+5	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_conv-7-687f9b8a09f458d771d5641eec40031b b/sql/hive/src/test/resources/golden/udf_conv-7-687f9b8a09f458d771d5641eec40031b
new file mode 100644
index 0000000000000..c5348e173c243
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-7-687f9b8a09f458d771d5641eec40031b
@@ -0,0 +1 @@
+3HL	22	33
diff --git a/sql/hive/src/test/resources/golden/udf_conv-7-77bd25ad13e3697c80464e4a2682360e b/sql/hive/src/test/resources/golden/udf_conv-7-77bd25ad13e3697c80464e4a2682360e
deleted file mode 100644
index 08f70d742b8e4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-7-77bd25ad13e3697c80464e4a2682360e
+++ /dev/null
@@ -1 +0,0 @@
--641	B	FFFFFFFFFFFFFFFF	FFFFFFFFFFFFFFF1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-8-2fae52d392251be476e0c8f6071a4aeb b/sql/hive/src/test/resources/golden/udf_conv-8-2fae52d392251be476e0c8f6071a4aeb
deleted file mode 100644
index a2a44daa718cc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-8-2fae52d392251be476e0c8f6071a4aeb
+++ /dev/null
@@ -1 +0,0 @@
-FFFFFFFFFFFFFFFF	-1	FFFFFFFFFFFFFFFF	-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_conv-8-384902bf8d45b6b56b2bdc5db550c10 b/sql/hive/src/test/resources/golden/udf_conv-8-384902bf8d45b6b56b2bdc5db550c10
new file mode 100644
index 0000000000000..632c3705a0493
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-8-384902bf8d45b6b56b2bdc5db550c10
@@ -0,0 +1 @@
+-641	B	FFFFFFFFFFFFFFFF	FFFFFFFFFFFFFFF1
diff --git a/sql/hive/src/test/resources/golden/udf_conv-9-28b833d0cd96f74c23aa7cf8c4f5a167 b/sql/hive/src/test/resources/golden/udf_conv-9-28b833d0cd96f74c23aa7cf8c4f5a167
new file mode 100644
index 0000000000000..3a7a2ec34f909
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_conv-9-28b833d0cd96f74c23aa7cf8c4f5a167
@@ -0,0 +1 @@
+FFFFFFFFFFFFFFFF	-1	FFFFFFFFFFFFFFFF	-1
diff --git a/sql/hive/src/test/resources/golden/udf_conv-9-2f0098c00c10044522cd23a4a2f54957 b/sql/hive/src/test/resources/golden/udf_conv-9-2f0098c00c10044522cd23a4a2f54957
deleted file mode 100644
index 6948fe3783119..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_conv-9-2f0098c00c10044522cd23a4a2f54957
+++ /dev/null
@@ -1 +0,0 @@
-5	NULL	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_cos-0-44f411146a7190b89f2bc8b4aa61cae3 b/sql/hive/src/test/resources/golden/udf_cos-0-44f411146a7190b89f2bc8b4aa61cae3
deleted file mode 100644
index 99a7d42bb5b57..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_cos-0-44f411146a7190b89f2bc8b4aa61cae3
+++ /dev/null
@@ -1 +0,0 @@
-cos(x) - returns the cosine of x (x is in radians)
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input31-1-c21dba410fb07a098f93430a9d21df79 b/sql/hive/src/test/resources/golden/udf_cos-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/input31-1-c21dba410fb07a098f93430a9d21df79
rename to sql/hive/src/test/resources/golden/udf_cos-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_cos-1-176030bdf43ff83ed8b3112d0c79f2f5 b/sql/hive/src/test/resources/golden/udf_cos-1-176030bdf43ff83ed8b3112d0c79f2f5
deleted file mode 100644
index 196294de1f19f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_cos-1-176030bdf43ff83ed8b3112d0c79f2f5
+++ /dev/null
@@ -1,4 +0,0 @@
-cos(x) - returns the cosine of x (x is in radians)
-Example:
-   > SELECT cos(0) FROM src LIMIT 1;
-  1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_cos-1-44f411146a7190b89f2bc8b4aa61cae3 b/sql/hive/src/test/resources/golden/udf_cos-1-44f411146a7190b89f2bc8b4aa61cae3
new file mode 100644
index 0000000000000..fa0e6975503cc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_cos-1-44f411146a7190b89f2bc8b4aa61cae3
@@ -0,0 +1 @@
+cos(x) - returns the cosine of x (x is in radians)
diff --git a/sql/hive/src/test/resources/golden/udf_cos-2-176030bdf43ff83ed8b3112d0c79f2f5 b/sql/hive/src/test/resources/golden/udf_cos-2-176030bdf43ff83ed8b3112d0c79f2f5
new file mode 100644
index 0000000000000..09d6150733802
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_cos-2-176030bdf43ff83ed8b3112d0c79f2f5
@@ -0,0 +1,4 @@
+cos(x) - returns the cosine of x (x is in radians)
+Example:
+   > SELECT cos(0) FROM src LIMIT 1;
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_cos-2-542c7c9742bdb10b614298a0e9a6fa01 b/sql/hive/src/test/resources/golden/udf_cos-2-542c7c9742bdb10b614298a0e9a6fa01
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_cos-2-542c7c9742bdb10b614298a0e9a6fa01
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-20-131900d39d9a20b431731a32fb9715f8 b/sql/hive/src/test/resources/golden/udf_cos-3-166acc86afd6ececfe43800e38f106c9
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-20-131900d39d9a20b431731a32fb9715f8
rename to sql/hive/src/test/resources/golden/udf_cos-3-166acc86afd6ececfe43800e38f106c9
diff --git a/sql/hive/src/test/resources/golden/udf_cos-3-7f30fb51fe862ef46b1ccdb3f5f9a429 b/sql/hive/src/test/resources/golden/udf_cos-3-7f30fb51fe862ef46b1ccdb3f5f9a429
deleted file mode 100644
index c0690acff887b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_cos-3-7f30fb51fe862ef46b1ccdb3f5f9a429
+++ /dev/null
@@ -1 +0,0 @@
-0.5570225467662173	7.963267107332633E-4	0.8775825618903728
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_cos-4-efda2f85872c6144280970eab84ef4d4 b/sql/hive/src/test/resources/golden/udf_cos-4-efda2f85872c6144280970eab84ef4d4
new file mode 100644
index 0000000000000..14417ab71b694
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_cos-4-efda2f85872c6144280970eab84ef4d4
@@ -0,0 +1 @@
+0.5570225467662173	7.963267107332633E-4	0.8775825618903728
diff --git a/sql/hive/src/test/resources/golden/udf_count-0-534a9b25b87d09e418645b1216949560 b/sql/hive/src/test/resources/golden/udf_count-0-534a9b25b87d09e418645b1216949560
index e01715295adc8..56cf7bdf7b040 100644
--- a/sql/hive/src/test/resources/golden/udf_count-0-534a9b25b87d09e418645b1216949560
+++ b/sql/hive/src/test/resources/golden/udf_count-0-534a9b25b87d09e418645b1216949560
@@ -1,3 +1,3 @@
 count(*) - Returns the total number of retrieved rows, including rows containing NULL values.
 count(expr) - Returns the number of rows for which the supplied expression is non-NULL.
-count(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL.
\ No newline at end of file
+count(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL.
diff --git a/sql/hive/src/test/resources/golden/udf_count-1-d566feb21bc894b97e6416b65fe5c02f b/sql/hive/src/test/resources/golden/udf_count-1-d566feb21bc894b97e6416b65fe5c02f
index e01715295adc8..56cf7bdf7b040 100644
--- a/sql/hive/src/test/resources/golden/udf_count-1-d566feb21bc894b97e6416b65fe5c02f
+++ b/sql/hive/src/test/resources/golden/udf_count-1-d566feb21bc894b97e6416b65fe5c02f
@@ -1,3 +1,3 @@
 count(*) - Returns the total number of retrieved rows, including rows containing NULL values.
 count(expr) - Returns the number of rows for which the supplied expression is non-NULL.
-count(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL.
\ No newline at end of file
+count(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL.
diff --git a/sql/hive/src/test/resources/golden/udf_count-11-3b201ca546a8b0b5e5afaa1ff6aaee3e b/sql/hive/src/test/resources/golden/udf_count-11-3b201ca546a8b0b5e5afaa1ff6aaee3e
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/udf_count-11-3b201ca546a8b0b5e5afaa1ff6aaee3e
+++ b/sql/hive/src/test/resources/golden/udf_count-11-3b201ca546a8b0b5e5afaa1ff6aaee3e
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/udf_count-12-9f41ac3eb9a6e77b3d612afc2f2b8e0e b/sql/hive/src/test/resources/golden/udf_count-12-9f41ac3eb9a6e77b3d612afc2f2b8e0e
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/udf_count-12-9f41ac3eb9a6e77b3d612afc2f2b8e0e
+++ b/sql/hive/src/test/resources/golden/udf_count-12-9f41ac3eb9a6e77b3d612afc2f2b8e0e
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/udf_count-13-9286bc5f08bf4db183719b1b49dc5b7 b/sql/hive/src/test/resources/golden/udf_count-13-9286bc5f08bf4db183719b1b49dc5b7
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/udf_count-13-9286bc5f08bf4db183719b1b49dc5b7
+++ b/sql/hive/src/test/resources/golden/udf_count-13-9286bc5f08bf4db183719b1b49dc5b7
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/udf_count-3-e43165f41597d2a1c9e8cf780b99a4a8 b/sql/hive/src/test/resources/golden/udf_count-3-e43165f41597d2a1c9e8cf780b99a4a8
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/udf_count-3-e43165f41597d2a1c9e8cf780b99a4a8
+++ b/sql/hive/src/test/resources/golden/udf_count-3-e43165f41597d2a1c9e8cf780b99a4a8
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/udf_count-5-bdee61c35a27bfab974e2ba199d5dfa4 b/sql/hive/src/test/resources/golden/udf_count-5-bdee61c35a27bfab974e2ba199d5dfa4
index e0da8ae09ae60..7536e3d326192 100644
--- a/sql/hive/src/test/resources/golden/udf_count-5-bdee61c35a27bfab974e2ba199d5dfa4
+++ b/sql/hive/src/test/resources/golden/udf_count-5-bdee61c35a27bfab974e2ba199d5dfa4
@@ -1 +1 @@
-309
\ No newline at end of file
+309
diff --git a/sql/hive/src/test/resources/golden/udf_count-7-b975ad0d5f293508ce4832a7b19399b6 b/sql/hive/src/test/resources/golden/udf_count-7-b975ad0d5f293508ce4832a7b19399b6
index e0da8ae09ae60..7536e3d326192 100644
--- a/sql/hive/src/test/resources/golden/udf_count-7-b975ad0d5f293508ce4832a7b19399b6
+++ b/sql/hive/src/test/resources/golden/udf_count-7-b975ad0d5f293508ce4832a7b19399b6
@@ -1 +1 @@
-309
\ No newline at end of file
+309
diff --git a/sql/hive/src/test/resources/golden/udf_count-9-75b3d8a0dac332ea00af5ef8971ca643 b/sql/hive/src/test/resources/golden/udf_count-9-75b3d8a0dac332ea00af5ef8971ca643
index eb1f49486af7c..1b79f38e25b24 100644
--- a/sql/hive/src/test/resources/golden/udf_count-9-75b3d8a0dac332ea00af5ef8971ca643
+++ b/sql/hive/src/test/resources/golden/udf_count-9-75b3d8a0dac332ea00af5ef8971ca643
@@ -1 +1 @@
-500
\ No newline at end of file
+500
diff --git a/sql/hive/src/test/resources/golden/udf_date_add-0-74d34471bfa0880230d8e3351eb0ab45 b/sql/hive/src/test/resources/golden/udf_date_add-0-74d34471bfa0880230d8e3351eb0ab45
index 83e0fc4e63a25..2e77bafd12f7d 100644
--- a/sql/hive/src/test/resources/golden/udf_date_add-0-74d34471bfa0880230d8e3351eb0ab45
+++ b/sql/hive/src/test/resources/golden/udf_date_add-0-74d34471bfa0880230d8e3351eb0ab45
@@ -1 +1 @@
-date_add(start_date, num_days) - Returns the date that is num_days after start_date.
\ No newline at end of file
+date_add(start_date, num_days) - Returns the date that is num_days after start_date.
diff --git a/sql/hive/src/test/resources/golden/udf_date_add-1-efb60fcbd6d78ad35257fb1ec39ace2 b/sql/hive/src/test/resources/golden/udf_date_add-1-efb60fcbd6d78ad35257fb1ec39ace2
index 83b9851499f71..3c91e138d7bd5 100644
--- a/sql/hive/src/test/resources/golden/udf_date_add-1-efb60fcbd6d78ad35257fb1ec39ace2
+++ b/sql/hive/src/test/resources/golden/udf_date_add-1-efb60fcbd6d78ad35257fb1ec39ace2
@@ -2,4 +2,4 @@ date_add(start_date, num_days) - Returns the date that is num_days after start_d
 start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. num_days is a number. The time part of start_date is ignored.
 Example:
    > SELECT date_add('2009-30-07', 1) FROM src LIMIT 1;
-  '2009-31-07'
\ No newline at end of file
+  '2009-31-07'
diff --git a/sql/hive/src/test/resources/golden/udf_date_sub-0-f8fbf85026da1b0778fd325d9b5dae33 b/sql/hive/src/test/resources/golden/udf_date_sub-0-f8fbf85026da1b0778fd325d9b5dae33
index 7dec81f2b6d4e..3ee6ac4820852 100644
--- a/sql/hive/src/test/resources/golden/udf_date_sub-0-f8fbf85026da1b0778fd325d9b5dae33
+++ b/sql/hive/src/test/resources/golden/udf_date_sub-0-f8fbf85026da1b0778fd325d9b5dae33
@@ -1 +1 @@
-date_sub(start_date, num_days) - Returns the date that is num_days before start_date.
\ No newline at end of file
+date_sub(start_date, num_days) - Returns the date that is num_days before start_date.
diff --git a/sql/hive/src/test/resources/golden/udf_date_sub-1-7efeb74367835ade71e5e42b22f8ced4 b/sql/hive/src/test/resources/golden/udf_date_sub-1-7efeb74367835ade71e5e42b22f8ced4
index 105b63424062b..29d663f35c586 100644
--- a/sql/hive/src/test/resources/golden/udf_date_sub-1-7efeb74367835ade71e5e42b22f8ced4
+++ b/sql/hive/src/test/resources/golden/udf_date_sub-1-7efeb74367835ade71e5e42b22f8ced4
@@ -2,4 +2,4 @@ date_sub(start_date, num_days) - Returns the date that is num_days before start_
 start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. num_days is a number. The time part of start_date is ignored.
 Example:
    > SELECT date_sub('2009-30-07', 1) FROM src LIMIT 1;
-  '2009-29-07'
\ No newline at end of file
+  '2009-29-07'
diff --git a/sql/hive/src/test/resources/golden/udf_datediff-0-3bd040a96a4568e7ea4922faa408ada5 b/sql/hive/src/test/resources/golden/udf_datediff-0-3bd040a96a4568e7ea4922faa408ada5
index b59d456397d53..64cae647c8005 100644
--- a/sql/hive/src/test/resources/golden/udf_datediff-0-3bd040a96a4568e7ea4922faa408ada5
+++ b/sql/hive/src/test/resources/golden/udf_datediff-0-3bd040a96a4568e7ea4922faa408ada5
@@ -1 +1 @@
-datediff(date1, date2) - Returns the number of days between date1 and date2
\ No newline at end of file
+datediff(date1, date2) - Returns the number of days between date1 and date2
diff --git a/sql/hive/src/test/resources/golden/udf_datediff-1-34ae7a68b13c2bc9a89f61acf2edd4c5 b/sql/hive/src/test/resources/golden/udf_datediff-1-34ae7a68b13c2bc9a89f61acf2edd4c5
index c240df94594d4..7ccaee7ad3bd4 100644
--- a/sql/hive/src/test/resources/golden/udf_datediff-1-34ae7a68b13c2bc9a89f61acf2edd4c5
+++ b/sql/hive/src/test/resources/golden/udf_datediff-1-34ae7a68b13c2bc9a89f61acf2edd4c5
@@ -2,4 +2,4 @@ datediff(date1, date2) - Returns the number of days between date1 and date2
 date1 and date2 are strings in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. The time parts are ignored.If date1 is earlier than date2, the result is negative.
 Example:
    > SELECT datediff('2009-30-07', '2009-31-07') FROM src LIMIT 1;
-  1
\ No newline at end of file
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_day-0-c4c503756384ff1220222d84fd25e756 b/sql/hive/src/test/resources/golden/udf_day-0-c4c503756384ff1220222d84fd25e756
index 11e32a4509b3f..d4017178b4e6b 100644
--- a/sql/hive/src/test/resources/golden/udf_day-0-c4c503756384ff1220222d84fd25e756
+++ b/sql/hive/src/test/resources/golden/udf_day-0-c4c503756384ff1220222d84fd25e756
@@ -1 +1 @@
-day(date) - Returns the date of the month of date
\ No newline at end of file
+day(date) - Returns the date of the month of date
diff --git a/sql/hive/src/test/resources/golden/udf_day-1-87168babe1110fe4c38269843414ca4 b/sql/hive/src/test/resources/golden/udf_day-1-87168babe1110fe4c38269843414ca4
index 9da0858ba92a5..6135aafa50860 100644
--- a/sql/hive/src/test/resources/golden/udf_day-1-87168babe1110fe4c38269843414ca4
+++ b/sql/hive/src/test/resources/golden/udf_day-1-87168babe1110fe4c38269843414ca4
@@ -3,4 +3,4 @@ Synonyms: dayofmonth
 date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'.
 Example:
    > SELECT day('2009-30-07', 1) FROM src LIMIT 1;
-  30
\ No newline at end of file
+  30
diff --git a/sql/hive/src/test/resources/golden/udf_dayofmonth-0-7b2caf942528656555cf19c261a18502 b/sql/hive/src/test/resources/golden/udf_dayofmonth-0-7b2caf942528656555cf19c261a18502
index 33e90a2af295f..47a7018d9d5ac 100644
--- a/sql/hive/src/test/resources/golden/udf_dayofmonth-0-7b2caf942528656555cf19c261a18502
+++ b/sql/hive/src/test/resources/golden/udf_dayofmonth-0-7b2caf942528656555cf19c261a18502
@@ -1 +1 @@
-dayofmonth(date) - Returns the date of the month of date
\ No newline at end of file
+dayofmonth(date) - Returns the date of the month of date
diff --git a/sql/hive/src/test/resources/golden/udf_dayofmonth-1-ca24d07102ad264d79ff30c64a73a7e8 b/sql/hive/src/test/resources/golden/udf_dayofmonth-1-ca24d07102ad264d79ff30c64a73a7e8
index ee9911af3248c..d9490e20a3b6d 100644
--- a/sql/hive/src/test/resources/golden/udf_dayofmonth-1-ca24d07102ad264d79ff30c64a73a7e8
+++ b/sql/hive/src/test/resources/golden/udf_dayofmonth-1-ca24d07102ad264d79ff30c64a73a7e8
@@ -3,4 +3,4 @@ Synonyms: day
 date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'.
 Example:
    > SELECT dayofmonth('2009-30-07', 1) FROM src LIMIT 1;
-  30
\ No newline at end of file
+  30
diff --git a/sql/hive/src/test/resources/golden/input32-0-823920925ca9c8a2ca9016f52c0f4ee b/sql/hive/src/test/resources/golden/udf_degrees-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/input32-0-823920925ca9c8a2ca9016f52c0f4ee
rename to sql/hive/src/test/resources/golden/udf_degrees-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_degrees-0-85f4957cd7cd6c517f6941af1289d8ae b/sql/hive/src/test/resources/golden/udf_degrees-0-85f4957cd7cd6c517f6941af1289d8ae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-6-21bcf37075b02097f16c8fc8130a83b8 b/sql/hive/src/test/resources/golden/udf_degrees-1-f24ce67606944e23a4adc79f91cf0c17
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-6-21bcf37075b02097f16c8fc8130a83b8
rename to sql/hive/src/test/resources/golden/udf_degrees-1-f24ce67606944e23a4adc79f91cf0c17
diff --git a/sql/hive/src/test/resources/golden/udf_degrees-1-aabc6065a03b7da809376cc127af47d7 b/sql/hive/src/test/resources/golden/udf_degrees-2-aaee5dd4e87eaae3e65a585e07f1a3e4
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_degrees-1-aabc6065a03b7da809376cc127af47d7
rename to sql/hive/src/test/resources/golden/udf_degrees-2-aaee5dd4e87eaae3e65a585e07f1a3e4
diff --git a/sql/hive/src/test/resources/golden/udf_degrees-2-2fd3a55901fe765f8f154531a7f5fd6b b/sql/hive/src/test/resources/golden/udf_degrees-3-2fd3a55901fe765f8f154531a7f5fd6b
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_degrees-2-2fd3a55901fe765f8f154531a7f5fd6b
rename to sql/hive/src/test/resources/golden/udf_degrees-3-2fd3a55901fe765f8f154531a7f5fd6b
diff --git a/sql/hive/src/test/resources/golden/udf_degrees-3-42f653c3c3cc3c94bb9ab9c5a4d1ca96 b/sql/hive/src/test/resources/golden/udf_degrees-4-42f653c3c3cc3c94bb9ab9c5a4d1ca96
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_degrees-3-42f653c3c3cc3c94bb9ab9c5a4d1ca96
rename to sql/hive/src/test/resources/golden/udf_degrees-4-42f653c3c3cc3c94bb9ab9c5a4d1ca96
diff --git a/sql/hive/src/test/resources/golden/udf_degrees-4-85f4957cd7cd6c517f6941af1289d8ae b/sql/hive/src/test/resources/golden/udf_degrees-4-85f4957cd7cd6c517f6941af1289d8ae
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_creation-7-9334418431eca405f13206bd8db42a1b b/sql/hive/src/test/resources/golden/udf_degrees-5-3a6468b02be2605c91b31987e76fb9a8
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-7-9334418431eca405f13206bd8db42a1b
rename to sql/hive/src/test/resources/golden/udf_degrees-5-3a6468b02be2605c91b31987e76fb9a8
diff --git a/sql/hive/src/test/resources/golden/udf_degrees-5-aabc6065a03b7da809376cc127af47d7 b/sql/hive/src/test/resources/golden/udf_degrees-6-aaee5dd4e87eaae3e65a585e07f1a3e4
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_degrees-5-aabc6065a03b7da809376cc127af47d7
rename to sql/hive/src/test/resources/golden/udf_degrees-6-aaee5dd4e87eaae3e65a585e07f1a3e4
diff --git a/sql/hive/src/test/resources/golden/udf_degrees-6-2fd3a55901fe765f8f154531a7f5fd6b b/sql/hive/src/test/resources/golden/udf_degrees-7-2fd3a55901fe765f8f154531a7f5fd6b
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_degrees-6-2fd3a55901fe765f8f154531a7f5fd6b
rename to sql/hive/src/test/resources/golden/udf_degrees-7-2fd3a55901fe765f8f154531a7f5fd6b
diff --git a/sql/hive/src/test/resources/golden/udf_degrees-7-42f653c3c3cc3c94bb9ab9c5a4d1ca96 b/sql/hive/src/test/resources/golden/udf_degrees-8-42f653c3c3cc3c94bb9ab9c5a4d1ca96
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_degrees-7-42f653c3c3cc3c94bb9ab9c5a4d1ca96
rename to sql/hive/src/test/resources/golden/udf_degrees-8-42f653c3c3cc3c94bb9ab9c5a4d1ca96
diff --git a/sql/hive/src/test/resources/golden/udf_div-0-31d31c7d5c544327dabfd874c88314db b/sql/hive/src/test/resources/golden/udf_div-0-31d31c7d5c544327dabfd874c88314db
deleted file mode 100644
index d98718752a36f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_div-0-31d31c7d5c544327dabfd874c88314db
+++ /dev/null
@@ -1 +0,0 @@
-a div b - Divide a by b rounded to the long integer
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input32-1-c21dba410fb07a098f93430a9d21df79 b/sql/hive/src/test/resources/golden/udf_div-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/input32-1-c21dba410fb07a098f93430a9d21df79
rename to sql/hive/src/test/resources/golden/udf_div-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_div-1-31d31c7d5c544327dabfd874c88314db b/sql/hive/src/test/resources/golden/udf_div-1-31d31c7d5c544327dabfd874c88314db
new file mode 100644
index 0000000000000..b02a7003ce768
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_div-1-31d31c7d5c544327dabfd874c88314db
@@ -0,0 +1 @@
+a div b - Divide a by b rounded to the long integer
diff --git a/sql/hive/src/test/resources/golden/udf_div-1-f23a07b1c6b1a98b303863188c10a8d8 b/sql/hive/src/test/resources/golden/udf_div-1-f23a07b1c6b1a98b303863188c10a8d8
deleted file mode 100644
index 59265a74f9b3f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_div-1-f23a07b1c6b1a98b303863188c10a8d8
+++ /dev/null
@@ -1,4 +0,0 @@
-a div b - Divide a by b rounded to the long integer
-Example:
-  > SELECT 3 div 2 FROM src LIMIT 1;
-  1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_div-2-c71acf88a51fc6d2b23bbb91db2d7b b/sql/hive/src/test/resources/golden/udf_div-2-c71acf88a51fc6d2b23bbb91db2d7b
deleted file mode 100644
index 56a6051ca2b02..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_div-2-c71acf88a51fc6d2b23bbb91db2d7b
+++ /dev/null
@@ -1 +0,0 @@
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_div-2-f23a07b1c6b1a98b303863188c10a8d8 b/sql/hive/src/test/resources/golden/udf_div-2-f23a07b1c6b1a98b303863188c10a8d8
new file mode 100644
index 0000000000000..6b79eff6e6092
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_div-2-f23a07b1c6b1a98b303863188c10a8d8
@@ -0,0 +1,4 @@
+a div b - Divide a by b rounded to the long integer
+Example:
+  > SELECT 3 div 2 FROM src LIMIT 1;
+  1
diff --git a/sql/hive/src/test/resources/golden/decimal_2-24-8c2a8f1f3b792d5017be42078b15b94e b/sql/hive/src/test/resources/golden/udf_div-3-5111340caad64e36370d9d4bc4db5f27
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-24-8c2a8f1f3b792d5017be42078b15b94e
rename to sql/hive/src/test/resources/golden/udf_div-3-5111340caad64e36370d9d4bc4db5f27
diff --git a/sql/hive/src/test/resources/golden/udf_divide-0-1af8b249439ee5b7d4978c31669bc208 b/sql/hive/src/test/resources/golden/udf_divide-0-1af8b249439ee5b7d4978c31669bc208
deleted file mode 100644
index 8b623e47785f6..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_divide-0-1af8b249439ee5b7d4978c31669bc208
+++ /dev/null
@@ -1 +0,0 @@
-a / b - Divide a by b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_divide-1-fa932664bae88683a222b71ac45fb840 b/sql/hive/src/test/resources/golden/udf_divide-1-fa932664bae88683a222b71ac45fb840
deleted file mode 100644
index 2acf2b6a64648..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_divide-1-fa932664bae88683a222b71ac45fb840
+++ /dev/null
@@ -1,4 +0,0 @@
-a / b - Divide a by b
-Example:
-  > SELECT 3 / 2 FROM src LIMIT 1;
-  1.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_divide-2-ce54773b1babe6dde982e4e91ebaeb50 b/sql/hive/src/test/resources/golden/udf_divide-2-ce54773b1babe6dde982e4e91ebaeb50
deleted file mode 100644
index 400122e60f599..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_divide-2-ce54773b1babe6dde982e4e91ebaeb50
+++ /dev/null
@@ -1 +0,0 @@
-1.5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_double-0-aa32d73a99587cae2f1efd9a2094d617 b/sql/hive/src/test/resources/golden/udf_double-0-aa32d73a99587cae2f1efd9a2094d617
index 54de9e9f8edec..1d0a61f1b635f 100644
--- a/sql/hive/src/test/resources/golden/udf_double-0-aa32d73a99587cae2f1efd9a2094d617
+++ b/sql/hive/src/test/resources/golden/udf_double-0-aa32d73a99587cae2f1efd9a2094d617
@@ -1 +1 @@
-There is no documentation for function 'double'
\ No newline at end of file
+There is no documentation for function 'double'
diff --git a/sql/hive/src/test/resources/golden/udf_double-1-79380157cbd6624d760335f8291e6fb4 b/sql/hive/src/test/resources/golden/udf_double-1-79380157cbd6624d760335f8291e6fb4
index 54de9e9f8edec..1d0a61f1b635f 100644
--- a/sql/hive/src/test/resources/golden/udf_double-1-79380157cbd6624d760335f8291e6fb4
+++ b/sql/hive/src/test/resources/golden/udf_double-1-79380157cbd6624d760335f8291e6fb4
@@ -1 +1 @@
-There is no documentation for function 'double'
\ No newline at end of file
+There is no documentation for function 'double'
diff --git a/sql/hive/src/test/resources/golden/input32-2-1ba7748b3d2f8908c2e81771ab229316 b/sql/hive/src/test/resources/golden/udf_elt-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/input32-2-1ba7748b3d2f8908c2e81771ab229316
rename to sql/hive/src/test/resources/golden/udf_elt-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_elt-0-b46b060da76d1772db998c26a62a608f b/sql/hive/src/test/resources/golden/udf_elt-1-b46b060da76d1772db998c26a62a608f
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_elt-0-b46b060da76d1772db998c26a62a608f
rename to sql/hive/src/test/resources/golden/udf_elt-1-b46b060da76d1772db998c26a62a608f
diff --git a/sql/hive/src/test/resources/golden/udf_elt-2-5b58f1cfb0392452bf5c28a37d51508a b/sql/hive/src/test/resources/golden/udf_elt-2-5b58f1cfb0392452bf5c28a37d51508a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_elt-1-e9f1bf17ad2a6f7bf3e40798ceebdbf4 b/sql/hive/src/test/resources/golden/udf_elt-2-e9f1bf17ad2a6f7bf3e40798ceebdbf4
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_elt-1-e9f1bf17ad2a6f7bf3e40798ceebdbf4
rename to sql/hive/src/test/resources/golden/udf_elt-2-e9f1bf17ad2a6f7bf3e40798ceebdbf4
diff --git a/sql/hive/src/test/resources/golden/index_creation-8-f1f56119aede4f42221a68f6aaa42a26 b/sql/hive/src/test/resources/golden/udf_elt-3-c2554fac72a2a51bb33faae16aec3507
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-8-f1f56119aede4f42221a68f6aaa42a26
rename to sql/hive/src/test/resources/golden/udf_elt-3-c2554fac72a2a51bb33faae16aec3507
diff --git a/sql/hive/src/test/resources/golden/udf_elt-3-f3be980cf4fa166f299c6ec79e981814 b/sql/hive/src/test/resources/golden/udf_elt-4-533ad9c703c320a6556c09dd5f9ac351
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_elt-3-f3be980cf4fa166f299c6ec79e981814
rename to sql/hive/src/test/resources/golden/udf_elt-4-533ad9c703c320a6556c09dd5f9ac351
diff --git a/sql/hive/src/test/resources/golden/join_view-4-763ab5853bff619e6525c01e46b2a923 b/sql/hive/src/test/resources/golden/udf_equal-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/join_view-4-763ab5853bff619e6525c01e46b2a923
rename to sql/hive/src/test/resources/golden/udf_equal-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_equal-0-36b6cdf7c5f68c91155569b1622f5876 b/sql/hive/src/test/resources/golden/udf_equal-1-36b6cdf7c5f68c91155569b1622f5876
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_equal-0-36b6cdf7c5f68c91155569b1622f5876
rename to sql/hive/src/test/resources/golden/udf_equal-1-36b6cdf7c5f68c91155569b1622f5876
diff --git a/sql/hive/src/test/resources/golden/udf_equal-1-2422b50b96502dde8b661acdfebd8892 b/sql/hive/src/test/resources/golden/udf_equal-2-2422b50b96502dde8b661acdfebd8892
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_equal-1-2422b50b96502dde8b661acdfebd8892
rename to sql/hive/src/test/resources/golden/udf_equal-2-2422b50b96502dde8b661acdfebd8892
diff --git a/sql/hive/src/test/resources/golden/udf_equal-2-e0faab0f5e736c24bcc5503aeac55053 b/sql/hive/src/test/resources/golden/udf_equal-3-e0faab0f5e736c24bcc5503aeac55053
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_equal-2-e0faab0f5e736c24bcc5503aeac55053
rename to sql/hive/src/test/resources/golden/udf_equal-3-e0faab0f5e736c24bcc5503aeac55053
diff --git a/sql/hive/src/test/resources/golden/udf_equal-3-39d8d6f197803de927f0af5409ec2f33 b/sql/hive/src/test/resources/golden/udf_equal-4-39d8d6f197803de927f0af5409ec2f33
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_equal-3-39d8d6f197803de927f0af5409ec2f33
rename to sql/hive/src/test/resources/golden/udf_equal-4-39d8d6f197803de927f0af5409ec2f33
diff --git a/sql/hive/src/test/resources/golden/udf_equal-4-94ac2476006425e1b3bcddf29ad07b16 b/sql/hive/src/test/resources/golden/udf_equal-5-ee018fc3267dbdd55b60ed4e6f56c9ca
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_equal-4-94ac2476006425e1b3bcddf29ad07b16
rename to sql/hive/src/test/resources/golden/udf_equal-5-ee018fc3267dbdd55b60ed4e6f56c9ca
diff --git a/sql/hive/src/test/resources/golden/udf_equal-5-878650cf21e9360a07d204c8ffb0cde7 b/sql/hive/src/test/resources/golden/udf_equal-6-878650cf21e9360a07d204c8ffb0cde7
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_equal-5-878650cf21e9360a07d204c8ffb0cde7
rename to sql/hive/src/test/resources/golden/udf_equal-6-878650cf21e9360a07d204c8ffb0cde7
diff --git a/sql/hive/src/test/resources/golden/udf_equal-6-1635ef051fecdfc7891d9f5a9a3a545e b/sql/hive/src/test/resources/golden/udf_equal-7-1635ef051fecdfc7891d9f5a9a3a545e
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_equal-6-1635ef051fecdfc7891d9f5a9a3a545e
rename to sql/hive/src/test/resources/golden/udf_equal-7-1635ef051fecdfc7891d9f5a9a3a545e
diff --git a/sql/hive/src/test/resources/golden/udf_equal-7-78f1b96c199e307714fa1b804e5bae27 b/sql/hive/src/test/resources/golden/udf_equal-8-276101b04b10b7cd6d59061a8cbf42d2
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_equal-7-78f1b96c199e307714fa1b804e5bae27
rename to sql/hive/src/test/resources/golden/udf_equal-8-276101b04b10b7cd6d59061a8cbf42d2
diff --git a/sql/hive/src/test/resources/golden/udf_exp-0-814f16e1acabe30740d7b815e4b5cc3a b/sql/hive/src/test/resources/golden/udf_exp-0-814f16e1acabe30740d7b815e4b5cc3a
index 9b3dbf316d1fb..2a5080be93ac4 100644
--- a/sql/hive/src/test/resources/golden/udf_exp-0-814f16e1acabe30740d7b815e4b5cc3a
+++ b/sql/hive/src/test/resources/golden/udf_exp-0-814f16e1acabe30740d7b815e4b5cc3a
@@ -1 +1 @@
-exp(x) - Returns e to the power of x
\ No newline at end of file
+exp(x) - Returns e to the power of x
diff --git a/sql/hive/src/test/resources/golden/udf_exp-1-d10d879c74951e9a1f1717cb1a2488c6 b/sql/hive/src/test/resources/golden/udf_exp-1-d10d879c74951e9a1f1717cb1a2488c6
index a42c95bb8d213..b5a4d037f4014 100644
--- a/sql/hive/src/test/resources/golden/udf_exp-1-d10d879c74951e9a1f1717cb1a2488c6
+++ b/sql/hive/src/test/resources/golden/udf_exp-1-d10d879c74951e9a1f1717cb1a2488c6
@@ -1,4 +1,4 @@
 exp(x) - Returns e to the power of x
 Example:
    > SELECT exp(0) FROM src LIMIT 1;
-  1
\ No newline at end of file
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_field-0-277b4a4dcb38cabb6df50147c77e0a33 b/sql/hive/src/test/resources/golden/udf_field-0-277b4a4dcb38cabb6df50147c77e0a33
deleted file mode 100644
index a30bc26f5ba58..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_field-0-277b4a4dcb38cabb6df50147c77e0a33
+++ /dev/null
@@ -1 +0,0 @@
-field(str, str1, str2, ...) - returns the index of str in the str1,str2,... list or 0 if not found
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part15-0-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/udf_field-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/load_dyn_part15-0-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/udf_field-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_field-1-277b4a4dcb38cabb6df50147c77e0a33 b/sql/hive/src/test/resources/golden/udf_field-1-277b4a4dcb38cabb6df50147c77e0a33
new file mode 100644
index 0000000000000..2e6133785ac7c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_field-1-277b4a4dcb38cabb6df50147c77e0a33
@@ -0,0 +1 @@
+field(str, str1, str2, ...) - returns the index of str in the str1,str2,... list or 0 if not found
diff --git a/sql/hive/src/test/resources/golden/udf_field-1-379d8580693493f620a8f4084709324f b/sql/hive/src/test/resources/golden/udf_field-1-379d8580693493f620a8f4084709324f
deleted file mode 100644
index bb55c0a1db1f5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_field-1-379d8580693493f620a8f4084709324f
+++ /dev/null
@@ -1,2 +0,0 @@
-field(str, str1, str2, ...) - returns the index of str in the str1,str2,... list or 0 if not found
-All primitive types are supported, arguments are compared using str.equals(x). If str is NULL, the return value is 0.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_field-10-7982ea72163dbc4cd45f53454edf66c8 b/sql/hive/src/test/resources/golden/udf_field-10-7982ea72163dbc4cd45f53454edf66c8
deleted file mode 100644
index 275f46482425d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_field-10-7982ea72163dbc4cd45f53454edf66c8
+++ /dev/null
@@ -1,2 +0,0 @@
-86	val_86	0	0	2	1	1	0	0
-66	val_66	1	1	0	0	0	0	0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_creation-9-bf40d4d50d050f2f8342c07f5a9dcf0c b/sql/hive/src/test/resources/golden/udf_field-10-ca9db7e6bb687606bc273d1f6c191035
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_creation-9-bf40d4d50d050f2f8342c07f5a9dcf0c
rename to sql/hive/src/test/resources/golden/udf_field-10-ca9db7e6bb687606bc273d1f6c191035
diff --git a/sql/hive/src/test/resources/golden/udf_field-11-7982ea72163dbc4cd45f53454edf66c8 b/sql/hive/src/test/resources/golden/udf_field-11-7982ea72163dbc4cd45f53454edf66c8
new file mode 100644
index 0000000000000..a13456f1bfdda
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_field-11-7982ea72163dbc4cd45f53454edf66c8
@@ -0,0 +1,2 @@
+86	val_86	0	0	2	1	1	0	0
+66	val_66	1	1	0	0	0	0	0
diff --git a/sql/hive/src/test/resources/golden/udf_field-2-379d8580693493f620a8f4084709324f b/sql/hive/src/test/resources/golden/udf_field-2-379d8580693493f620a8f4084709324f
new file mode 100644
index 0000000000000..f9d418fee7b53
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_field-2-379d8580693493f620a8f4084709324f
@@ -0,0 +1,2 @@
+field(str, str1, str2, ...) - returns the index of str in the str1,str2,... list or 0 if not found
+All primitive types are supported, arguments are compared using str.equals(x). If str is NULL, the return value is 0.
diff --git a/sql/hive/src/test/resources/golden/udf_field-2-d2c6583a79d77aabe388a52ec164c38b b/sql/hive/src/test/resources/golden/udf_field-2-d2c6583a79d77aabe388a52ec164c38b
deleted file mode 100644
index c42823854fb0f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_field-2-d2c6583a79d77aabe388a52ec164c38b
+++ /dev/null
@@ -1 +0,0 @@
-0	0	0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/div-0-5e7fc5719c7265bc1d6af52005ebad03 b/sql/hive/src/test/resources/golden/udf_field-3-5960d42257b272f6ba043406229bbf26
similarity index 100%
rename from sql/hive/src/test/resources/golden/div-0-5e7fc5719c7265bc1d6af52005ebad03
rename to sql/hive/src/test/resources/golden/udf_field-3-5960d42257b272f6ba043406229bbf26
diff --git a/sql/hive/src/test/resources/golden/udf_field-3-fea09e934696af40bb604b40225bbc98 b/sql/hive/src/test/resources/golden/udf_field-3-fea09e934696af40bb604b40225bbc98
deleted file mode 100644
index 5869234249808..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_field-3-fea09e934696af40bb604b40225bbc98
+++ /dev/null
@@ -1 +0,0 @@
-1	2	3	4	4
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_field-4-212d8b2297bf6a3311d24d68de67b5c6 b/sql/hive/src/test/resources/golden/udf_field-4-212d8b2297bf6a3311d24d68de67b5c6
new file mode 100644
index 0000000000000..e5449f0bfa473
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_field-4-212d8b2297bf6a3311d24d68de67b5c6
@@ -0,0 +1 @@
+1	2	3	4	4
diff --git a/sql/hive/src/test/resources/golden/udf_field-4-b0815d34893d6cba8c07d0a0721c1d29 b/sql/hive/src/test/resources/golden/udf_field-4-b0815d34893d6cba8c07d0a0721c1d29
deleted file mode 100644
index 5869234249808..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_field-4-b0815d34893d6cba8c07d0a0721c1d29
+++ /dev/null
@@ -1 +0,0 @@
-1	2	3	4	4
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_field-5-2d7c5cbe891c4a9dda34f9842f8e0828 b/sql/hive/src/test/resources/golden/udf_field-5-2d7c5cbe891c4a9dda34f9842f8e0828
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_field-5-949c2de113b214d83734c0c177f04b6b b/sql/hive/src/test/resources/golden/udf_field-5-949c2de113b214d83734c0c177f04b6b
new file mode 100644
index 0000000000000..e5449f0bfa473
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_field-5-949c2de113b214d83734c0c177f04b6b
@@ -0,0 +1 @@
+1	2	3	4	4
diff --git a/sql/hive/src/test/resources/golden/index_serde-0-6560d12b69d55e5297a145ebc4bb0cb3 b/sql/hive/src/test/resources/golden/udf_field-6-2d7c5cbe891c4a9dda34f9842f8e0828
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_serde-0-6560d12b69d55e5297a145ebc4bb0cb3
rename to sql/hive/src/test/resources/golden/udf_field-6-2d7c5cbe891c4a9dda34f9842f8e0828
diff --git a/sql/hive/src/test/resources/golden/udf_field-6-de02aaf3bbb137ba032810bb9ad7a3a3 b/sql/hive/src/test/resources/golden/udf_field-6-de02aaf3bbb137ba032810bb9ad7a3a3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_serde-11-309e916d683a1a12ab62565697cb0046 b/sql/hive/src/test/resources/golden/udf_field-7-3fd8b0c333acdf28c676315b03e2e10
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_serde-11-309e916d683a1a12ab62565697cb0046
rename to sql/hive/src/test/resources/golden/udf_field-7-3fd8b0c333acdf28c676315b03e2e10
diff --git a/sql/hive/src/test/resources/golden/udf_field-7-6aa3518e9f55299754521e959e9376ef b/sql/hive/src/test/resources/golden/udf_field-7-6aa3518e9f55299754521e959e9376ef
deleted file mode 100644
index a76563207da24..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_field-7-6aa3518e9f55299754521e959e9376ef
+++ /dev/null
@@ -1,2 +0,0 @@
-86	val_86	0	0	2	0	0	0	0	0	2	0
-66	val_66	1	1	0	0	0	1	0	0	2	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_field-8-66dc6c81db0ac9b2075783b0d8976083 b/sql/hive/src/test/resources/golden/udf_field-8-66dc6c81db0ac9b2075783b0d8976083
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_field-8-6aa3518e9f55299754521e959e9376ef b/sql/hive/src/test/resources/golden/udf_field-8-6aa3518e9f55299754521e959e9376ef
new file mode 100644
index 0000000000000..9af9d61b8b135
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_field-8-6aa3518e9f55299754521e959e9376ef
@@ -0,0 +1,2 @@
+86	val_86	0	0	2	0	0	0	0	0	2	0
+66	val_66	1	1	0	0	0	1	0	0	2	2
diff --git a/sql/hive/src/test/resources/golden/index_serde-12-d590fd7cb9d433143de490d75686dd4 b/sql/hive/src/test/resources/golden/udf_field-9-66dc6c81db0ac9b2075783b0d8976083
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_serde-12-d590fd7cb9d433143de490d75686dd4
rename to sql/hive/src/test/resources/golden/udf_field-9-66dc6c81db0ac9b2075783b0d8976083
diff --git a/sql/hive/src/test/resources/golden/udf_field-9-f053f2d16812aa60b6dd1cab61e90a95 b/sql/hive/src/test/resources/golden/udf_field-9-f053f2d16812aa60b6dd1cab61e90a95
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-0-18d3e88b18c18a00598146a3307276f2 b/sql/hive/src/test/resources/golden/udf_find_in_set-0-18d3e88b18c18a00598146a3307276f2
deleted file mode 100644
index f14679978b79e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-0-18d3e88b18c18a00598146a3307276f2
+++ /dev/null
@@ -1 +0,0 @@
-find_in_set(str,str_array) - Returns the first occurrence  of str in str_array where str_array is a comma-delimited string. Returns null if either argument is null. Returns 0 if the first argument has any commas.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/load_dyn_part15-1-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/udf_find_in_set-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/load_dyn_part15-1-16367c381d4b189b3640c92511244bfe
rename to sql/hive/src/test/resources/golden/udf_find_in_set-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-1-18d3e88b18c18a00598146a3307276f2 b/sql/hive/src/test/resources/golden/udf_find_in_set-1-18d3e88b18c18a00598146a3307276f2
new file mode 100644
index 0000000000000..342fb7fad55b0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_find_in_set-1-18d3e88b18c18a00598146a3307276f2
@@ -0,0 +1 @@
+find_in_set(str,str_array) - Returns the first occurrence  of str in str_array where str_array is a comma-delimited string. Returns null if either argument is null. Returns 0 if the first argument has any commas.
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-1-5fb7a7a1725749dc3853eb80fba19246 b/sql/hive/src/test/resources/golden/udf_find_in_set-1-5fb7a7a1725749dc3853eb80fba19246
deleted file mode 100644
index 1bebc68416bff..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-1-5fb7a7a1725749dc3853eb80fba19246
+++ /dev/null
@@ -1,7 +0,0 @@
-find_in_set(str,str_array) - Returns the first occurrence  of str in str_array where str_array is a comma-delimited string. Returns null if either argument is null. Returns 0 if the first argument has any commas.
-Example:
-  > SELECT find_in_set('ab','abc,b,ab,c,def') FROM src LIMIT 1;
-  3
-  > SELECT * FROM src1 WHERE NOT find_in_set(key,'311,128,345,956')=0;
-  311  val_311
-  128
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-10-16355c6b7e169b3c0ef506c149c6853c b/sql/hive/src/test/resources/golden/udf_find_in_set-10-16355c6b7e169b3c0ef506c149c6853c
deleted file mode 100644
index 56a6051ca2b02..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-10-16355c6b7e169b3c0ef506c149c6853c
+++ /dev/null
@@ -1 +0,0 @@
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-21-a5e28f4eb819e5a5e292e279f2990a7a b/sql/hive/src/test/resources/golden/udf_find_in_set-10-df21f44247d7275a292520c1605c4aab
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-21-a5e28f4eb819e5a5e292e279f2990a7a
rename to sql/hive/src/test/resources/golden/udf_find_in_set-10-df21f44247d7275a292520c1605c4aab
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-11-5a8515684c458d3fffea539a3d170e3a b/sql/hive/src/test/resources/golden/udf_find_in_set-11-5a8515684c458d3fffea539a3d170e3a
deleted file mode 100644
index 56a6051ca2b02..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-11-5a8515684c458d3fffea539a3d170e3a
+++ /dev/null
@@ -1 +0,0 @@
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/decimal_2-29-659f627f0ff5fe2296a8a0a7daed6298 b/sql/hive/src/test/resources/golden/udf_find_in_set-11-91f8c37820f31d0d1b16029a59a185ad
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-29-659f627f0ff5fe2296a8a0a7daed6298
rename to sql/hive/src/test/resources/golden/udf_find_in_set-11-91f8c37820f31d0d1b16029a59a185ad
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-12-3fb21e2befb41ba72a1bbffa645c1e3 b/sql/hive/src/test/resources/golden/udf_find_in_set-12-3fb21e2befb41ba72a1bbffa645c1e3
deleted file mode 100644
index d8263ee986059..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-12-3fb21e2befb41ba72a1bbffa645c1e3
+++ /dev/null
@@ -1 +0,0 @@
-2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_bigdata-2-3688b45adbdb190d58799c0b6d601055 b/sql/hive/src/test/resources/golden/udf_find_in_set-12-692f41c998bbc6bec0f374eae4356739
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_bigdata-2-3688b45adbdb190d58799c0b6d601055
rename to sql/hive/src/test/resources/golden/udf_find_in_set-12-692f41c998bbc6bec0f374eae4356739
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-13-2c986a80620c9238e1f663fc591760a b/sql/hive/src/test/resources/golden/udf_find_in_set-13-2c986a80620c9238e1f663fc591760a
deleted file mode 100644
index e440e5c842586..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-13-2c986a80620c9238e1f663fc591760a
+++ /dev/null
@@ -1 +0,0 @@
-3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-13-45e5ae8f60de2c41f189db7922a04917 b/sql/hive/src/test/resources/golden/udf_find_in_set-13-45e5ae8f60de2c41f189db7922a04917
new file mode 100644
index 0000000000000..0cfbf08886fca
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_find_in_set-13-45e5ae8f60de2c41f189db7922a04917
@@ -0,0 +1 @@
+2
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-14-189def133b1871ce8345a8123811a6b5 b/sql/hive/src/test/resources/golden/udf_find_in_set-14-189def133b1871ce8345a8123811a6b5
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-14-189def133b1871ce8345a8123811a6b5
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/decimal_2-25-14face5c7104382196e65741a199c36 b/sql/hive/src/test/resources/golden/udf_find_in_set-14-8e410ecfad2d408ad7d2554ccd3a6621
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-25-14face5c7104382196e65741a199c36
rename to sql/hive/src/test/resources/golden/udf_find_in_set-14-8e410ecfad2d408ad7d2554ccd3a6621
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-15-671bff8f50feea55015a8412fc6e5ceb b/sql/hive/src/test/resources/golden/udf_find_in_set-15-671bff8f50feea55015a8412fc6e5ceb
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-15-671bff8f50feea55015a8412fc6e5ceb
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/load_exist_part_authsuccess-2-7368973ec1870dd3b237c37eb3857b1e b/sql/hive/src/test/resources/golden/udf_find_in_set-15-c50e6ff95c05bfa854b33b03db858cd9
similarity index 100%
rename from sql/hive/src/test/resources/golden/load_exist_part_authsuccess-2-7368973ec1870dd3b237c37eb3857b1e
rename to sql/hive/src/test/resources/golden/udf_find_in_set-15-c50e6ff95c05bfa854b33b03db858cd9
diff --git a/sql/hive/src/test/resources/golden/load_nonpart_authsuccess-1-7368973ec1870dd3b237c37eb3857b1e b/sql/hive/src/test/resources/golden/udf_find_in_set-16-8e17f41ae6e8b1075af4790a8fd88e13
similarity index 100%
rename from sql/hive/src/test/resources/golden/load_nonpart_authsuccess-1-7368973ec1870dd3b237c37eb3857b1e
rename to sql/hive/src/test/resources/golden/udf_find_in_set-16-8e17f41ae6e8b1075af4790a8fd88e13
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-16-d5d22082588c5fc30ef502237c5797f4 b/sql/hive/src/test/resources/golden/udf_find_in_set-16-d5d22082588c5fc30ef502237c5797f4
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-16-d5d22082588c5fc30ef502237c5797f4
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-17-5b556a29e1685605bcc47bce60cf66c8 b/sql/hive/src/test/resources/golden/udf_find_in_set-17-5b556a29e1685605bcc47bce60cf66c8
deleted file mode 100644
index 99f516951ae7d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-17-5b556a29e1685605bcc47bce60cf66c8
+++ /dev/null
@@ -1,2 +0,0 @@
-311	val_311
-128	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/load_part_authsuccess-1-7368973ec1870dd3b237c37eb3857b1e b/sql/hive/src/test/resources/golden/udf_find_in_set-17-fe61f992f2d971d006155bdec3143803
similarity index 100%
rename from sql/hive/src/test/resources/golden/load_part_authsuccess-1-7368973ec1870dd3b237c37eb3857b1e
rename to sql/hive/src/test/resources/golden/udf_find_in_set-17-fe61f992f2d971d006155bdec3143803
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-18-5b556a29e1685605bcc47bce60cf66c8 b/sql/hive/src/test/resources/golden/udf_find_in_set-18-5b556a29e1685605bcc47bce60cf66c8
new file mode 100644
index 0000000000000..01228944b05a5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_find_in_set-18-5b556a29e1685605bcc47bce60cf66c8
@@ -0,0 +1,2 @@
+311	val_311
+128	
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-2-5fb7a7a1725749dc3853eb80fba19246 b/sql/hive/src/test/resources/golden/udf_find_in_set-2-5fb7a7a1725749dc3853eb80fba19246
new file mode 100644
index 0000000000000..d856144af1f86
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_find_in_set-2-5fb7a7a1725749dc3853eb80fba19246
@@ -0,0 +1,7 @@
+find_in_set(str,str_array) - Returns the first occurrence  of str in str_array where str_array is a comma-delimited string. Returns null if either argument is null. Returns 0 if the first argument has any commas.
+Example:
+  > SELECT find_in_set('ab','abc,b,ab,c,def') FROM src LIMIT 1;
+  3
+  > SELECT * FROM src1 WHERE NOT find_in_set(key,'311,128,345,956')=0;
+  311  val_311
+  128
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-2-b3823bdc04a7f98951b55c3e30d2a772 b/sql/hive/src/test/resources/golden/udf_find_in_set-2-b3823bdc04a7f98951b55c3e30d2a772
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-3-132b7bc7812db7683eb3bff607275d0e b/sql/hive/src/test/resources/golden/udf_find_in_set-3-132b7bc7812db7683eb3bff607275d0e
deleted file mode 100644
index 5817cbcff62ac..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-3-132b7bc7812db7683eb3bff607275d0e
+++ /dev/null
@@ -1,25 +0,0 @@
-1
-NULL
-1
-NULL
-NULL
-NULL
-1
-1
-1
-NULL
-NULL
-NULL
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-NULL
-NULL
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_serde-2-fd1b220f4eafb0ba9b519a156e3c87c b/sql/hive/src/test/resources/golden/udf_find_in_set-3-b3823bdc04a7f98951b55c3e30d2a772
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_serde-2-fd1b220f4eafb0ba9b519a156e3c87c
rename to sql/hive/src/test/resources/golden/udf_find_in_set-3-b3823bdc04a7f98951b55c3e30d2a772
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-4-132b7bc7812db7683eb3bff607275d0e b/sql/hive/src/test/resources/golden/udf_find_in_set-4-132b7bc7812db7683eb3bff607275d0e
new file mode 100644
index 0000000000000..df725cb2c6aa7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_find_in_set-4-132b7bc7812db7683eb3bff607275d0e
@@ -0,0 +1,25 @@
+1
+NULL
+1
+NULL
+NULL
+NULL
+1
+1
+1
+NULL
+NULL
+NULL
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+NULL
+NULL
+NULL
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-4-a35471c87ba597a6d3c7c880704cac0b b/sql/hive/src/test/resources/golden/udf_find_in_set-4-a35471c87ba597a6d3c7c880704cac0b
deleted file mode 100644
index 56a6051ca2b02..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-4-a35471c87ba597a6d3c7c880704cac0b
+++ /dev/null
@@ -1 +0,0 @@
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/groupby_sort_11-17-5298cc011d107ad06c365d132cab6fc9 b/sql/hive/src/test/resources/golden/udf_find_in_set-5-6f25b5bba89e1fcae171f5d595acc4ee
similarity index 100%
rename from sql/hive/src/test/resources/golden/groupby_sort_11-17-5298cc011d107ad06c365d132cab6fc9
rename to sql/hive/src/test/resources/golden/udf_find_in_set-5-6f25b5bba89e1fcae171f5d595acc4ee
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-5-ddaa3551dffb1169b2fbb671f373b82f b/sql/hive/src/test/resources/golden/udf_find_in_set-5-ddaa3551dffb1169b2fbb671f373b82f
deleted file mode 100644
index d8263ee986059..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-5-ddaa3551dffb1169b2fbb671f373b82f
+++ /dev/null
@@ -1 +0,0 @@
-2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-6-591e070365f19c65e453b98b88f5f823 b/sql/hive/src/test/resources/golden/udf_find_in_set-6-591e070365f19c65e453b98b88f5f823
deleted file mode 100644
index e440e5c842586..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-6-591e070365f19c65e453b98b88f5f823
+++ /dev/null
@@ -1 +0,0 @@
-3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-6-7bf387b94afb755faca4ad73bb7c42ba b/sql/hive/src/test/resources/golden/udf_find_in_set-6-7bf387b94afb755faca4ad73bb7c42ba
new file mode 100644
index 0000000000000..0cfbf08886fca
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_find_in_set-6-7bf387b94afb755faca4ad73bb7c42ba
@@ -0,0 +1 @@
+2
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-7-72d05b5cf99388d539adec38c40978c3 b/sql/hive/src/test/resources/golden/udf_find_in_set-7-72d05b5cf99388d539adec38c40978c3
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-7-72d05b5cf99388d539adec38c40978c3
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/decimal_2-26-4ea9fdaf7131c085df8f93ffb64956e5 b/sql/hive/src/test/resources/golden/udf_find_in_set-7-730d5e95ef748ad946eceefbcd633826
similarity index 100%
rename from sql/hive/src/test/resources/golden/decimal_2-26-4ea9fdaf7131c085df8f93ffb64956e5
rename to sql/hive/src/test/resources/golden/udf_find_in_set-7-730d5e95ef748ad946eceefbcd633826
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-8-780771cad9bec96a216aea8ab293c941 b/sql/hive/src/test/resources/golden/udf_find_in_set-8-780771cad9bec96a216aea8ab293c941
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-8-780771cad9bec96a216aea8ab293c941
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/loadpart_err-0-8d7fa992960b4c8ec2cb874be479de37 b/sql/hive/src/test/resources/golden/udf_find_in_set-8-ea11724531f191940e455d13878a0e69
similarity index 100%
rename from sql/hive/src/test/resources/golden/loadpart_err-0-8d7fa992960b4c8ec2cb874be479de37
rename to sql/hive/src/test/resources/golden/udf_find_in_set-8-ea11724531f191940e455d13878a0e69
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-22-93278c10d642fa242f303d89b3b1961d b/sql/hive/src/test/resources/golden/udf_find_in_set-9-81f9999ed1b063ce7f17d89bd0162777
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-22-93278c10d642fa242f303d89b3b1961d
rename to sql/hive/src/test/resources/golden/udf_find_in_set-9-81f9999ed1b063ce7f17d89bd0162777
diff --git a/sql/hive/src/test/resources/golden/udf_find_in_set-9-d59f5aabe1ea0963f9328065c699d175 b/sql/hive/src/test/resources/golden/udf_find_in_set-9-d59f5aabe1ea0963f9328065c699d175
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_find_in_set-9-d59f5aabe1ea0963f9328065c699d175
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_float-0-7987032f1c9dcad07001445f3ca1f7a7 b/sql/hive/src/test/resources/golden/udf_float-0-7987032f1c9dcad07001445f3ca1f7a7
index 9d15b5f5956d7..467e25bc261c2 100644
--- a/sql/hive/src/test/resources/golden/udf_float-0-7987032f1c9dcad07001445f3ca1f7a7
+++ b/sql/hive/src/test/resources/golden/udf_float-0-7987032f1c9dcad07001445f3ca1f7a7
@@ -1 +1 @@
-There is no documentation for function 'float'
\ No newline at end of file
+There is no documentation for function 'float'
diff --git a/sql/hive/src/test/resources/golden/udf_float-1-2abdfb4c67fe3aec2bc9cc128f407e6b b/sql/hive/src/test/resources/golden/udf_float-1-2abdfb4c67fe3aec2bc9cc128f407e6b
index 9d15b5f5956d7..467e25bc261c2 100644
--- a/sql/hive/src/test/resources/golden/udf_float-1-2abdfb4c67fe3aec2bc9cc128f407e6b
+++ b/sql/hive/src/test/resources/golden/udf_float-1-2abdfb4c67fe3aec2bc9cc128f407e6b
@@ -1 +1 @@
-There is no documentation for function 'float'
\ No newline at end of file
+There is no documentation for function 'float'
diff --git a/sql/hive/src/test/resources/golden/udf_floor-0-e35abe1d5534243e96d71bd0c28761d6 b/sql/hive/src/test/resources/golden/udf_floor-0-e35abe1d5534243e96d71bd0c28761d6
index c76710bfcc1b5..de1563b40b836 100644
--- a/sql/hive/src/test/resources/golden/udf_floor-0-e35abe1d5534243e96d71bd0c28761d6
+++ b/sql/hive/src/test/resources/golden/udf_floor-0-e35abe1d5534243e96d71bd0c28761d6
@@ -1 +1 @@
-floor(x) - Find the largest integer not greater than x
\ No newline at end of file
+floor(x) - Find the largest integer not greater than x
diff --git a/sql/hive/src/test/resources/golden/udf_floor-1-497a1ddbcf738aead319fde4f90f5248 b/sql/hive/src/test/resources/golden/udf_floor-1-497a1ddbcf738aead319fde4f90f5248
index 3f73eea16e183..ab6951202eb2a 100644
--- a/sql/hive/src/test/resources/golden/udf_floor-1-497a1ddbcf738aead319fde4f90f5248
+++ b/sql/hive/src/test/resources/golden/udf_floor-1-497a1ddbcf738aead319fde4f90f5248
@@ -3,4 +3,4 @@ Example:
   > SELECT floor(-0.1) FROM src LIMIT 1;
   -1
   > SELECT floor(5) FROM src LIMIT 1;
-  5
\ No newline at end of file
+  5
diff --git a/sql/hive/src/test/resources/golden/lock3-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/udf_format_number-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/lock3-3-16367c381d4b189b3640c92511244bfe
rename to sql/hive/src/test/resources/golden/udf_format_number-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-0-e86d559aeb84a4cc017a103182c22bfb b/sql/hive/src/test/resources/golden/udf_format_number-0-e86d559aeb84a4cc017a103182c22bfb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-1-525f133cfff86d44afdeeda667c365a5 b/sql/hive/src/test/resources/golden/udf_format_number-1-525f133cfff86d44afdeeda667c365a5
deleted file mode 100644
index c981e1726c070..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_format_number-1-525f133cfff86d44afdeeda667c365a5
+++ /dev/null
@@ -1 +0,0 @@
-format_number(X, D) - Formats the number X to a format like '#,###,###.##', rounded to D decimal places, and returns the result as a string. If D is 0, the result has no decimal point or fractional part. This is supposed to function like MySQL's FORMAT
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_serde-3-afcf2a156ccd4f79a0489b4593908d79 b/sql/hive/src/test/resources/golden/udf_format_number-1-e86d559aeb84a4cc017a103182c22bfb
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_serde-3-afcf2a156ccd4f79a0489b4593908d79
rename to sql/hive/src/test/resources/golden/udf_format_number-1-e86d559aeb84a4cc017a103182c22bfb
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-10-3bddca6913ea7e281e223b0603010b77 b/sql/hive/src/test/resources/golden/udf_format_number-10-3bddca6913ea7e281e223b0603010b77
new file mode 100644
index 0000000000000..8ee27a864b6dd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_format_number-10-3bddca6913ea7e281e223b0603010b77
@@ -0,0 +1 @@
+-9,223,372,036,854,775,807.0000000000	9,223,372,036,854,775,807.00000000000000000000	0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005	179,769,313,486,231,570,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-2-525f133cfff86d44afdeeda667c365a5 b/sql/hive/src/test/resources/golden/udf_format_number-2-525f133cfff86d44afdeeda667c365a5
new file mode 100644
index 0000000000000..14a40602519b8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_format_number-2-525f133cfff86d44afdeeda667c365a5
@@ -0,0 +1 @@
+format_number(X, D) - Formats the number X to a format like '#,###,###.##', rounded to D decimal places, and returns the result as a string. If D is 0, the result has no decimal point or fractional part. This is supposed to function like MySQL's FORMAT
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-2-591f302d5c1cd24e153a598fa0b352fd b/sql/hive/src/test/resources/golden/udf_format_number-2-591f302d5c1cd24e153a598fa0b352fd
deleted file mode 100644
index b2aa527ca0ddb..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_format_number-2-591f302d5c1cd24e153a598fa0b352fd
+++ /dev/null
@@ -1,4 +0,0 @@
-format_number(X, D) - Formats the number X to a format like '#,###,###.##', rounded to D decimal places, and returns the result as a string. If D is 0, the result has no decimal point or fractional part. This is supposed to function like MySQL's FORMAT
-Example:
-  > SELECT format_number(12332.123456, 4) FROM src LIMIT 1;
-  '12,332.1235'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-3-591f302d5c1cd24e153a598fa0b352fd b/sql/hive/src/test/resources/golden/udf_format_number-3-591f302d5c1cd24e153a598fa0b352fd
new file mode 100644
index 0000000000000..def95a79e7375
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_format_number-3-591f302d5c1cd24e153a598fa0b352fd
@@ -0,0 +1,4 @@
+format_number(X, D) - Formats the number X to a format like '#,###,###.##', rounded to D decimal places, and returns the result as a string. If D is 0, the result has no decimal point or fractional part. This is supposed to function like MySQL's FORMAT
+Example:
+  > SELECT format_number(12332.123456, 4) FROM src LIMIT 1;
+  '12,332.1235'
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-3-c89564db1ab953e28b050b9740f2650c b/sql/hive/src/test/resources/golden/udf_format_number-3-c89564db1ab953e28b050b9740f2650c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-4-295d41a2146a27320c2be90499343260 b/sql/hive/src/test/resources/golden/udf_format_number-4-295d41a2146a27320c2be90499343260
deleted file mode 100644
index 89e118cc62bf3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_format_number-4-295d41a2146a27320c2be90499343260
+++ /dev/null
@@ -1 +0,0 @@
-12,332.1235	12,332.1000	12,332
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/index_serde-5-e6ff4b23b7f102e359afb4d53a1dedc3 b/sql/hive/src/test/resources/golden/udf_format_number-4-7969ffc4e80f7214a8eead8e1084368a
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_serde-5-e6ff4b23b7f102e359afb4d53a1dedc3
rename to sql/hive/src/test/resources/golden/udf_format_number-4-7969ffc4e80f7214a8eead8e1084368a
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-5-881f33f6727a30629bde6e4b178cf7d9 b/sql/hive/src/test/resources/golden/udf_format_number-5-881f33f6727a30629bde6e4b178cf7d9
deleted file mode 100644
index 33e21fa7dbfc4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_format_number-5-881f33f6727a30629bde6e4b178cf7d9
+++ /dev/null
@@ -1 +0,0 @@
-0.123456789000	12,345,678.12346	1,234,567.1234568	123,456
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-5-da5bf00d45d2bd758489f661caffd4dc b/sql/hive/src/test/resources/golden/udf_format_number-5-da5bf00d45d2bd758489f661caffd4dc
new file mode 100644
index 0000000000000..055b84b4b35d8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_format_number-5-da5bf00d45d2bd758489f661caffd4dc
@@ -0,0 +1 @@
+12,332.1235	12,332.1000	12,332
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-6-6dfca21d142652fec9017ba828a226c8 b/sql/hive/src/test/resources/golden/udf_format_number-6-6dfca21d142652fec9017ba828a226c8
new file mode 100644
index 0000000000000..9f12f9921318d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_format_number-6-6dfca21d142652fec9017ba828a226c8
@@ -0,0 +1 @@
+0.123456789000	12,345,678.12346	1,234,567.1234568	123,456
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-6-a6720a128716e179e18933992ca899b3 b/sql/hive/src/test/resources/golden/udf_format_number-6-a6720a128716e179e18933992ca899b3
deleted file mode 100644
index 07b05cd16f5a3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_format_number-6-a6720a128716e179e18933992ca899b3
+++ /dev/null
@@ -1 +0,0 @@
--123,456	-1,234,567.12	-0.123456789000000	-12,345.1235
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-7-31eda4b0f31406add3a61e2503590113 b/sql/hive/src/test/resources/golden/udf_format_number-7-31eda4b0f31406add3a61e2503590113
new file mode 100644
index 0000000000000..032768d688943
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_format_number-7-31eda4b0f31406add3a61e2503590113
@@ -0,0 +1 @@
+-123,456	-1,234,567.12	-0.123456789000000	-12,345.1235
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-7-84a460780828b0b9a2235314cfc24766 b/sql/hive/src/test/resources/golden/udf_format_number-7-84a460780828b0b9a2235314cfc24766
deleted file mode 100644
index 2b600a6a83aa8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_format_number-7-84a460780828b0b9a2235314cfc24766
+++ /dev/null
@@ -1 +0,0 @@
-0.0000	0.0	0.0	0.0	-0.0000
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-8-b297476c6348209933132202030eb8ea b/sql/hive/src/test/resources/golden/udf_format_number-8-b297476c6348209933132202030eb8ea
new file mode 100644
index 0000000000000..8077e5a60e4ef
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_format_number-8-b297476c6348209933132202030eb8ea
@@ -0,0 +1 @@
+0.0000	0.0	0.0	0.0	-0.0000
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-8-e7eedc849c74ce7d33c559067dd9ca0e b/sql/hive/src/test/resources/golden/udf_format_number-8-e7eedc849c74ce7d33c559067dd9ca0e
deleted file mode 100644
index d027b2cb0b94a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_format_number-8-e7eedc849c74ce7d33c559067dd9ca0e
+++ /dev/null
@@ -1 +0,0 @@
-0	1.0000	12.00	123.00000	1,234.0000000
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-9-407a0a7c277bb4c5c94ce16533ce1646 b/sql/hive/src/test/resources/golden/udf_format_number-9-407a0a7c277bb4c5c94ce16533ce1646
deleted file mode 100644
index afdec63c8dfca..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_format_number-9-407a0a7c277bb4c5c94ce16533ce1646
+++ /dev/null
@@ -1 +0,0 @@
--9,223,372,036,854,775,807.0000000000	9,223,372,036,854,775,807.00000000000000000000	0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005	179,769,313,486,231,570,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_format_number-9-a21fbe58ff475634c8ed9829c6b8c187 b/sql/hive/src/test/resources/golden/udf_format_number-9-a21fbe58ff475634c8ed9829c6b8c187
new file mode 100644
index 0000000000000..f9f98b94234f3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_format_number-9-a21fbe58ff475634c8ed9829c6b8c187
@@ -0,0 +1 @@
+0	1.0000	12.00	123.00000	1,234.0000000
diff --git a/sql/hive/src/test/resources/golden/udf_from_unixtime-0-c3adaeede5c48d232473d78acf0eed7f b/sql/hive/src/test/resources/golden/udf_from_unixtime-0-c3adaeede5c48d232473d78acf0eed7f
index 10ce106f121ba..38550ea419625 100644
--- a/sql/hive/src/test/resources/golden/udf_from_unixtime-0-c3adaeede5c48d232473d78acf0eed7f
+++ b/sql/hive/src/test/resources/golden/udf_from_unixtime-0-c3adaeede5c48d232473d78acf0eed7f
@@ -1 +1 @@
-from_unixtime(unix_time, format) - returns unix_time in the specified format
\ No newline at end of file
+from_unixtime(unix_time, format) - returns unix_time in the specified format
diff --git a/sql/hive/src/test/resources/golden/udf_from_unixtime-1-d1a511d2084c7c621b5f638908c8db65 b/sql/hive/src/test/resources/golden/udf_from_unixtime-1-d1a511d2084c7c621b5f638908c8db65
index 20f4d2b35c49a..ef15f822d80f5 100644
--- a/sql/hive/src/test/resources/golden/udf_from_unixtime-1-d1a511d2084c7c621b5f638908c8db65
+++ b/sql/hive/src/test/resources/golden/udf_from_unixtime-1-d1a511d2084c7c621b5f638908c8db65
@@ -1,4 +1,4 @@
 from_unixtime(unix_time, format) - returns unix_time in the specified format
 Example:
   > SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss') FROM src LIMIT 1;
-  '1970-01-01 00:00:00'
\ No newline at end of file
+  '1970-01-01 00:00:00'
diff --git a/sql/hive/src/test/resources/golden/udf_get_json_object-0-c08e7139c00878b98d396e65d958100f b/sql/hive/src/test/resources/golden/udf_get_json_object-0-c08e7139c00878b98d396e65d958100f
deleted file mode 100644
index 4e4f3f7255fd3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_get_json_object-0-c08e7139c00878b98d396e65d958100f
+++ /dev/null
@@ -1 +0,0 @@
-get_json_object(json_txt, path) - Extract a json object from path 
diff --git a/sql/hive/src/test/resources/golden/udf_get_json_object-1-706bcfd51431ec7f2b80145837f94917 b/sql/hive/src/test/resources/golden/udf_get_json_object-1-706bcfd51431ec7f2b80145837f94917
deleted file mode 100644
index 0e1dcf934a023..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_get_json_object-1-706bcfd51431ec7f2b80145837f94917
+++ /dev/null
@@ -1,16 +0,0 @@
-get_json_object(json_txt, path) - Extract a json object from path 
-Extract json object from a json string based on json path specified, and return json string of the extracted json object. It will return null if the input json string is invalid.
-A limited version of JSONPath supported:
-  $   : Root object
-  .   : Child operator
-  []  : Subscript operator for array
-  *   : Wildcard for []
-Syntax not supported that's worth noticing:
-  ''  : Zero length string as key
-  ..  : Recursive descent
-  &amp;#064;   : Current object/element
-  ()  : Script expression
-  ?() : Filter (script) expression.
-  [,] : Union operator
-  [start:end:step] : array slice operator
-
diff --git a/sql/hive/src/test/resources/golden/udf_get_json_object-2-2a18d9570d9b676e240cda76df818c42 b/sql/hive/src/test/resources/golden/udf_get_json_object-2-2a18d9570d9b676e240cda76df818c42
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_get_json_object-3-f60851dc36f579e83d6848d7d3c589e6 b/sql/hive/src/test/resources/golden/udf_get_json_object-3-f60851dc36f579e83d6848d7d3c589e6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_get_json_object-4-4f08101fd66fb25d7b322d47773e49f3 b/sql/hive/src/test/resources/golden/udf_get_json_object-4-4f08101fd66fb25d7b322d47773e49f3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/lock3-4-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/udf_greaterthan-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/lock3-4-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/udf_greaterthan-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthan-0-99d268829a124103cb3429c53fdc4de4 b/sql/hive/src/test/resources/golden/udf_greaterthan-0-99d268829a124103cb3429c53fdc4de4
deleted file mode 100644
index 54d6096d07bbe..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_greaterthan-0-99d268829a124103cb3429c53fdc4de4
+++ /dev/null
@@ -1 +0,0 @@
-a > b - Returns TRUE if a is greater than b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthan-1-8aab8e39726a986e10e1e572939fd63c b/sql/hive/src/test/resources/golden/udf_greaterthan-1-8aab8e39726a986e10e1e572939fd63c
deleted file mode 100644
index 54d6096d07bbe..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_greaterthan-1-8aab8e39726a986e10e1e572939fd63c
+++ /dev/null
@@ -1 +0,0 @@
-a > b - Returns TRUE if a is greater than b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthan-1-99d268829a124103cb3429c53fdc4de4 b/sql/hive/src/test/resources/golden/udf_greaterthan-1-99d268829a124103cb3429c53fdc4de4
new file mode 100644
index 0000000000000..1eec522da2a1e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_greaterthan-1-99d268829a124103cb3429c53fdc4de4
@@ -0,0 +1 @@
+a > b - Returns TRUE if a is greater than b
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthan-2-79ba62f35a9432647e31c6722b44fe6f b/sql/hive/src/test/resources/golden/udf_greaterthan-2-79ba62f35a9432647e31c6722b44fe6f
deleted file mode 100644
index 679b0376125f0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_greaterthan-2-79ba62f35a9432647e31c6722b44fe6f
+++ /dev/null
@@ -1 +0,0 @@
-true	false	false	false
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthan-2-8aab8e39726a986e10e1e572939fd63c b/sql/hive/src/test/resources/golden/udf_greaterthan-2-8aab8e39726a986e10e1e572939fd63c
new file mode 100644
index 0000000000000..1eec522da2a1e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_greaterthan-2-8aab8e39726a986e10e1e572939fd63c
@@ -0,0 +1 @@
+a > b - Returns TRUE if a is greater than b
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthan-3-75fcadcdc6c050f1c7e70c71dc89c800 b/sql/hive/src/test/resources/golden/udf_greaterthan-3-75fcadcdc6c050f1c7e70c71dc89c800
new file mode 100644
index 0000000000000..096c64e2afd93
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_greaterthan-3-75fcadcdc6c050f1c7e70c71dc89c800
@@ -0,0 +1 @@
+true	false	false	false
diff --git a/sql/hive/src/test/resources/golden/lock4-0-9583a6f9fe5ca6c74ff44d7d064fed92 b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/lock4-0-9583a6f9fe5ca6c74ff44d7d064fed92
rename to sql/hive/src/test/resources/golden/udf_greaterthanorequal-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-0-a7214027a91abf6501881e2af313347a b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-0-a7214027a91abf6501881e2af313347a
deleted file mode 100644
index abf7dfdab730c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-0-a7214027a91abf6501881e2af313347a
+++ /dev/null
@@ -1 +0,0 @@
-a >= b - Returns TRUE if a is not smaller than b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-1-3669f2008e7f428f365efadbcb5ae451 b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-1-3669f2008e7f428f365efadbcb5ae451
deleted file mode 100644
index abf7dfdab730c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-1-3669f2008e7f428f365efadbcb5ae451
+++ /dev/null
@@ -1 +0,0 @@
-a >= b - Returns TRUE if a is not smaller than b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-1-a7214027a91abf6501881e2af313347a b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-1-a7214027a91abf6501881e2af313347a
new file mode 100644
index 0000000000000..d5422146acd0e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-1-a7214027a91abf6501881e2af313347a
@@ -0,0 +1 @@
+a >= b - Returns TRUE if a is not smaller than b
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-2-3669f2008e7f428f365efadbcb5ae451 b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-2-3669f2008e7f428f365efadbcb5ae451
new file mode 100644
index 0000000000000..d5422146acd0e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-2-3669f2008e7f428f365efadbcb5ae451
@@ -0,0 +1 @@
+a >= b - Returns TRUE if a is not smaller than b
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-2-d2690cc7713e91201bb10ef291c95819 b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-2-d2690cc7713e91201bb10ef291c95819
deleted file mode 100644
index 1fb1894fc21f4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-2-d2690cc7713e91201bb10ef291c95819
+++ /dev/null
@@ -1 +0,0 @@
-true	false	true	true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_greaterthanorequal-3-631662997e0c8d24d80b5d64a17446d2 b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-3-631662997e0c8d24d80b5d64a17446d2
new file mode 100644
index 0000000000000..435a5f7b0efb3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_greaterthanorequal-3-631662997e0c8d24d80b5d64a17446d2
@@ -0,0 +1 @@
+true	false	true	true
diff --git a/sql/hive/src/test/resources/golden/lock4-4-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/udf_hash-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/lock4-4-16367c381d4b189b3640c92511244bfe
rename to sql/hive/src/test/resources/golden/udf_hash-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_hash-0-b9e3a3986320d275982797140edfccf4 b/sql/hive/src/test/resources/golden/udf_hash-1-b9e3a3986320d275982797140edfccf4
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_hash-0-b9e3a3986320d275982797140edfccf4
rename to sql/hive/src/test/resources/golden/udf_hash-1-b9e3a3986320d275982797140edfccf4
diff --git a/sql/hive/src/test/resources/golden/udf_hash-1-a18646b51501d0b1beb967dc79afbd1a b/sql/hive/src/test/resources/golden/udf_hash-2-a18646b51501d0b1beb967dc79afbd1a
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_hash-1-a18646b51501d0b1beb967dc79afbd1a
rename to sql/hive/src/test/resources/golden/udf_hash-2-a18646b51501d0b1beb967dc79afbd1a
diff --git a/sql/hive/src/test/resources/golden/udf_hash-2-cc121f3c38a7a522abd824940fe04285 b/sql/hive/src/test/resources/golden/udf_hash-2-cc121f3c38a7a522abd824940fe04285
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/index_serde-9-c85e061ea9c5b90ca69b7450faad14b6 b/sql/hive/src/test/resources/golden/udf_hash-3-2646a87ce26c383a9dafea9b56281ab7
similarity index 100%
rename from sql/hive/src/test/resources/golden/index_serde-9-c85e061ea9c5b90ca69b7450faad14b6
rename to sql/hive/src/test/resources/golden/udf_hash-3-2646a87ce26c383a9dafea9b56281ab7
diff --git a/sql/hive/src/test/resources/golden/udf_hash-3-23991312391d518aacf3d4469c816eae b/sql/hive/src/test/resources/golden/udf_hash-4-d1368c2e3cd113e46202156b44811987
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_hash-3-23991312391d518aacf3d4469c816eae
rename to sql/hive/src/test/resources/golden/udf_hash-4-d1368c2e3cd113e46202156b44811987
diff --git a/sql/hive/src/test/resources/golden/lock4-5-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/udf_hex-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/lock4-5-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/udf_hex-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_hex-0-c8b923c23d5eb31446780f28acbd4e16 b/sql/hive/src/test/resources/golden/udf_hex-0-c8b923c23d5eb31446780f28acbd4e16
deleted file mode 100644
index f87a6117eacf7..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hex-0-c8b923c23d5eb31446780f28acbd4e16
+++ /dev/null
@@ -1 +0,0 @@
-hex(n, bin, or str) - Convert the argument to hexadecimal 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_hex-1-c8b923c23d5eb31446780f28acbd4e16 b/sql/hive/src/test/resources/golden/udf_hex-1-c8b923c23d5eb31446780f28acbd4e16
new file mode 100644
index 0000000000000..c45cea8bc13b8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_hex-1-c8b923c23d5eb31446780f28acbd4e16
@@ -0,0 +1 @@
+hex(n, bin, or str) - Convert the argument to hexadecimal 
diff --git a/sql/hive/src/test/resources/golden/udf_hex-1-d55348c0ccd133b7abb690f6949b520c b/sql/hive/src/test/resources/golden/udf_hex-1-d55348c0ccd133b7abb690f6949b520c
deleted file mode 100644
index e8cee4afc3a80..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hex-1-d55348c0ccd133b7abb690f6949b520c
+++ /dev/null
@@ -1,8 +0,0 @@
-hex(n, bin, or str) - Convert the argument to hexadecimal 
-If the argument is a string, returns two hex digits for each character in the string.
-If the argument is a number or binary, returns the hexadecimal representation.
-Example:
-  > SELECT hex(17) FROM src LIMIT 1;
-  'H1'
-  > SELECT hex('Facebook') FROM src LIMIT 1;
-  '46616365626F6F6B'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_hex-2-332aa8b643b3f6bebd32c558ad4b1559 b/sql/hive/src/test/resources/golden/udf_hex-2-332aa8b643b3f6bebd32c558ad4b1559
deleted file mode 100644
index 34eb75a6c784a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hex-2-332aa8b643b3f6bebd32c558ad4b1559
+++ /dev/null
@@ -1 +0,0 @@
-46616365626F6F6B	00	71776572747975696F706173646667686A6B6C
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_hex-2-d55348c0ccd133b7abb690f6949b520c b/sql/hive/src/test/resources/golden/udf_hex-2-d55348c0ccd133b7abb690f6949b520c
new file mode 100644
index 0000000000000..bcc46336d0fd5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_hex-2-d55348c0ccd133b7abb690f6949b520c
@@ -0,0 +1,8 @@
+hex(n, bin, or str) - Convert the argument to hexadecimal 
+If the argument is a string, returns two hex digits for each character in the string.
+If the argument is a number or binary, returns the hexadecimal representation.
+Example:
+  > SELECT hex(17) FROM src LIMIT 1;
+  'H1'
+  > SELECT hex('Facebook') FROM src LIMIT 1;
+  '46616365626F6F6B'
diff --git a/sql/hive/src/test/resources/golden/udf_hex-3-3a1de5be8ce350612ee6a93303107470 b/sql/hive/src/test/resources/golden/udf_hex-3-3a1de5be8ce350612ee6a93303107470
new file mode 100644
index 0000000000000..b0ffe57c8e161
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_hex-3-3a1de5be8ce350612ee6a93303107470
@@ -0,0 +1 @@
+46616365626F6F6B	00	71776572747975696F706173646667686A6B6C
diff --git a/sql/hive/src/test/resources/golden/udf_hex-3-963ad47b5fa3898a71f3c62f592e34bf b/sql/hive/src/test/resources/golden/udf_hex-3-963ad47b5fa3898a71f3c62f592e34bf
deleted file mode 100644
index 2a45250c67a63..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hex-3-963ad47b5fa3898a71f3c62f592e34bf
+++ /dev/null
@@ -1 +0,0 @@
-1	0	FACEB005
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_hex-4-a7f99c9ad67c837658b924c0a979cf01 b/sql/hive/src/test/resources/golden/udf_hex-4-a7f99c9ad67c837658b924c0a979cf01
new file mode 100644
index 0000000000000..8c56faa0f9db9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_hex-4-a7f99c9ad67c837658b924c0a979cf01
@@ -0,0 +1 @@
+1	0	FACEB005
diff --git a/sql/hive/src/test/resources/golden/udf_hex-4-e07a02365ad1f5b47e0a08fec64aebea b/sql/hive/src/test/resources/golden/udf_hex-4-e07a02365ad1f5b47e0a08fec64aebea
deleted file mode 100644
index 50d9557967ac4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hex-4-e07a02365ad1f5b47e0a08fec64aebea
+++ /dev/null
@@ -1 +0,0 @@
-FFFFFFFFFFFFFFFB
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_hex-5-1a9b53bd38a3693e66f7c03092e15c8e b/sql/hive/src/test/resources/golden/udf_hex-5-1a9b53bd38a3693e66f7c03092e15c8e
new file mode 100644
index 0000000000000..b766160c67704
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_hex-5-1a9b53bd38a3693e66f7c03092e15c8e
@@ -0,0 +1 @@
+FFFFFFFFFFFFFFFB
diff --git a/sql/hive/src/test/resources/golden/udf_hour-0-ba1c46a403b807fe0a28b85e62d869ce b/sql/hive/src/test/resources/golden/udf_hour-0-ba1c46a403b807fe0a28b85e62d869ce
deleted file mode 100644
index 48911456dc339..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hour-0-ba1c46a403b807fe0a28b85e62d869ce
+++ /dev/null
@@ -1 +0,0 @@
-hour(date) - Returns the hour of date
diff --git a/sql/hive/src/test/resources/golden/udf_hour-1-3db41b9fe9966a45e663bc42cf182c04 b/sql/hive/src/test/resources/golden/udf_hour-1-3db41b9fe9966a45e663bc42cf182c04
deleted file mode 100644
index be1a966635202..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hour-1-3db41b9fe9966a45e663bc42cf182c04
+++ /dev/null
@@ -1,7 +0,0 @@
-hour(date) - Returns the hour of date
-date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'.
-Example:
-   > SELECT hour('2009-07-30 12:58:59') FROM src LIMIT 1;
-  12
-  > SELECT hour('12:58:59') FROM src LIMIT 1;
-  12
diff --git a/sql/hive/src/test/resources/golden/udf_hour-2-ace1054795b20abd5ae829814cfe15a b/sql/hive/src/test/resources/golden/udf_hour-2-ace1054795b20abd5ae829814cfe15a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_hour-3-415b0842ab0818c82baf9fbf07180613 b/sql/hive/src/test/resources/golden/udf_hour-3-415b0842ab0818c82baf9fbf07180613
deleted file mode 100644
index 1a35f9f158133..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hour-3-415b0842ab0818c82baf9fbf07180613
+++ /dev/null
@@ -1 +0,0 @@
-13	13	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_hour-4-73bfac513b993dedbe143306865a44a b/sql/hive/src/test/resources/golden/udf_hour-4-73bfac513b993dedbe143306865a44a
deleted file mode 100644
index b1bd38b62a080..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_hour-4-73bfac513b993dedbe143306865a44a
+++ /dev/null
@@ -1 +0,0 @@
-13
diff --git a/sql/hive/src/test/resources/golden/merge4-0-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/udf_if-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/merge4-0-b12e5c70d6d29757471b900b6160fa8a
rename to sql/hive/src/test/resources/golden/udf_if-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_if-1-b7ffa85b5785cccef2af1b285348cc2c b/sql/hive/src/test/resources/golden/udf_if-1-b7ffa85b5785cccef2af1b285348cc2c
new file mode 100644
index 0000000000000..2cf0d9d61882e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_if-1-b7ffa85b5785cccef2af1b285348cc2c
@@ -0,0 +1 @@
+There is no documentation for function 'if'
diff --git a/sql/hive/src/test/resources/golden/udf_if-2-30cf7f51f92b5684e556deff3032d49a b/sql/hive/src/test/resources/golden/udf_if-2-30cf7f51f92b5684e556deff3032d49a
new file mode 100644
index 0000000000000..2cf0d9d61882e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_if-2-30cf7f51f92b5684e556deff3032d49a
@@ -0,0 +1 @@
+There is no documentation for function 'if'
diff --git a/sql/hive/src/test/resources/golden/infer_const_type-0-e231c5154b18cbc0baa082a7461dd13e b/sql/hive/src/test/resources/golden/udf_if-3-59e90bb74481aaf35480076806daf365
similarity index 100%
rename from sql/hive/src/test/resources/golden/infer_const_type-0-e231c5154b18cbc0baa082a7461dd13e
rename to sql/hive/src/test/resources/golden/udf_if-3-59e90bb74481aaf35480076806daf365
diff --git a/sql/hive/src/test/resources/golden/udf_if-4-c4f4d2c83281f9c2380b5efac55fe6eb b/sql/hive/src/test/resources/golden/udf_if-4-c4f4d2c83281f9c2380b5efac55fe6eb
new file mode 100644
index 0000000000000..a29e96cbd1db7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_if-4-c4f4d2c83281f9c2380b5efac55fe6eb
@@ -0,0 +1 @@
+1	1	1	1	NULL	2
diff --git a/sql/hive/src/test/resources/golden/infer_const_type-1-c836a98522549d2a3fd43998afd8ae94 b/sql/hive/src/test/resources/golden/udf_if-5-841a8068d35a42179d3654e1a2848c43
similarity index 100%
rename from sql/hive/src/test/resources/golden/infer_const_type-1-c836a98522549d2a3fd43998afd8ae94
rename to sql/hive/src/test/resources/golden/udf_if-5-841a8068d35a42179d3654e1a2848c43
diff --git a/sql/hive/src/test/resources/golden/udf_if-6-508f9140dd33931c7b9ad336dceb32cf b/sql/hive/src/test/resources/golden/udf_if-6-508f9140dd33931c7b9ad336dceb32cf
new file mode 100644
index 0000000000000..f0669b86989d0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_if-6-508f9140dd33931c7b9ad336dceb32cf
@@ -0,0 +1 @@
+128	1.1	ABC	12.3
diff --git a/sql/hive/src/test/resources/golden/udf_in-0-b21369b3d0dd47d347e0e0af25f06ce4 b/sql/hive/src/test/resources/golden/udf_in-0-b21369b3d0dd47d347e0e0af25f06ce4
deleted file mode 100644
index ee6e42ce6a83d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_in-0-b21369b3d0dd47d347e0e0af25f06ce4
+++ /dev/null
@@ -1 +0,0 @@
-true	false	true	true	false	true	true	true	NULL	NULL	true
diff --git a/sql/hive/src/test/resources/golden/udf_in-1-ce5f3a3da5f3602a23fc107325dd13d7 b/sql/hive/src/test/resources/golden/udf_in-1-ce5f3a3da5f3602a23fc107325dd13d7
deleted file mode 100644
index 993d93304f95e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_in-1-ce5f3a3da5f3602a23fc107325dd13d7
+++ /dev/null
@@ -1,3 +0,0 @@
-238
-86
-238
diff --git a/sql/hive/src/test/resources/golden/udf_in_file-0-1775b929e50cae8b3e957d99c5378f59 b/sql/hive/src/test/resources/golden/udf_in_file-0-1775b929e50cae8b3e957d99c5378f59
deleted file mode 100644
index cd15a08a539ab..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_in_file-0-1775b929e50cae8b3e957d99c5378f59
+++ /dev/null
@@ -1 +0,0 @@
-in_file(str, filename) - Returns true if str appears in the file
diff --git a/sql/hive/src/test/resources/golden/udf_in_file-1-2f23153970a569a4643574dde8d78a58 b/sql/hive/src/test/resources/golden/udf_in_file-1-2f23153970a569a4643574dde8d78a58
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_index-0-a277ac394cae40cb55d1ef3aa5add260 b/sql/hive/src/test/resources/golden/udf_index-0-a277ac394cae40cb55d1ef3aa5add260
deleted file mode 100644
index df5a0561fb8f5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_index-0-a277ac394cae40cb55d1ef3aa5add260
+++ /dev/null
@@ -1 +0,0 @@
-Function '`index`' does not exist.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge4-1-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/udf_index-0-e91e3e5a22029b9b979ccbbef97add66
similarity index 100%
rename from sql/hive/src/test/resources/golden/merge4-1-593999fae618b6b38322bc9ae4e0c027
rename to sql/hive/src/test/resources/golden/udf_index-0-e91e3e5a22029b9b979ccbbef97add66
diff --git a/sql/hive/src/test/resources/golden/udf_index-1-1f5e109131b0c67ebea521fa4902a8f6 b/sql/hive/src/test/resources/golden/udf_index-1-1f5e109131b0c67ebea521fa4902a8f6
deleted file mode 100644
index df5a0561fb8f5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_index-1-1f5e109131b0c67ebea521fa4902a8f6
+++ /dev/null
@@ -1 +0,0 @@
-Function '`index`' does not exist.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_index-1-a277ac394cae40cb55d1ef3aa5add260 b/sql/hive/src/test/resources/golden/udf_index-1-a277ac394cae40cb55d1ef3aa5add260
new file mode 100644
index 0000000000000..bb0a912824bbd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_index-1-a277ac394cae40cb55d1ef3aa5add260
@@ -0,0 +1 @@
+Function '`index`' does not exist.
diff --git a/sql/hive/src/test/resources/golden/udf_index-2-1f5e109131b0c67ebea521fa4902a8f6 b/sql/hive/src/test/resources/golden/udf_index-2-1f5e109131b0c67ebea521fa4902a8f6
new file mode 100644
index 0000000000000..bb0a912824bbd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_index-2-1f5e109131b0c67ebea521fa4902a8f6
@@ -0,0 +1 @@
+Function '`index`' does not exist.
diff --git a/sql/hive/src/test/resources/golden/udf_instr-0-2e76f819563dbaba4beb51e3a130b922 b/sql/hive/src/test/resources/golden/udf_instr-0-2e76f819563dbaba4beb51e3a130b922
deleted file mode 100644
index ae27b5efea045..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_instr-0-2e76f819563dbaba4beb51e3a130b922
+++ /dev/null
@@ -1 +0,0 @@
-instr(str, substr) - Returns the index of the first occurance of substr in str
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge4-2-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/udf_instr-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/merge4-2-43d53504df013e6b35f81811138a167a
rename to sql/hive/src/test/resources/golden/udf_instr-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_instr-1-2e76f819563dbaba4beb51e3a130b922 b/sql/hive/src/test/resources/golden/udf_instr-1-2e76f819563dbaba4beb51e3a130b922
new file mode 100644
index 0000000000000..06461b525b058
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_instr-1-2e76f819563dbaba4beb51e3a130b922
@@ -0,0 +1 @@
+instr(str, substr) - Returns the index of the first occurance of substr in str
diff --git a/sql/hive/src/test/resources/golden/udf_instr-1-32da357fc754badd6e3898dcc8989182 b/sql/hive/src/test/resources/golden/udf_instr-1-32da357fc754badd6e3898dcc8989182
deleted file mode 100644
index 35de2f0d8653f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_instr-1-32da357fc754badd6e3898dcc8989182
+++ /dev/null
@@ -1,4 +0,0 @@
-instr(str, substr) - Returns the index of the first occurance of substr in str
-Example:
-  > SELECT instr('Facebook', 'boo') FROM src LIMIT 1;
-  5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_instr-2-10147893f38fc08dad4fa7f2bc843fc2 b/sql/hive/src/test/resources/golden/udf_instr-2-10147893f38fc08dad4fa7f2bc843fc2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_instr-2-32da357fc754badd6e3898dcc8989182 b/sql/hive/src/test/resources/golden/udf_instr-2-32da357fc754badd6e3898dcc8989182
new file mode 100644
index 0000000000000..5a8c34271f443
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_instr-2-32da357fc754badd6e3898dcc8989182
@@ -0,0 +1,4 @@
+instr(str, substr) - Returns the index of the first occurance of substr in str
+Example:
+  > SELECT instr('Facebook', 'boo') FROM src LIMIT 1;
+  5
diff --git a/sql/hive/src/test/resources/golden/udf_instr-3-2cb7f5ef9ec07402e3cae7b5279ebe12 b/sql/hive/src/test/resources/golden/udf_instr-3-2cb7f5ef9ec07402e3cae7b5279ebe12
deleted file mode 100644
index 4d34e6df7039b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_instr-3-2cb7f5ef9ec07402e3cae7b5279ebe12
+++ /dev/null
@@ -1 +0,0 @@
-1	0	2	2	0	0	2	3	4	2	3	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/infer_const_type-2-d8590c7336ae771b7a685bb544e8d2bd b/sql/hive/src/test/resources/golden/udf_instr-3-c40fbd09410b11388ce7a6e9bea5846f
similarity index 100%
rename from sql/hive/src/test/resources/golden/infer_const_type-2-d8590c7336ae771b7a685bb544e8d2bd
rename to sql/hive/src/test/resources/golden/udf_instr-3-c40fbd09410b11388ce7a6e9bea5846f
diff --git a/sql/hive/src/test/resources/golden/udf_instr-4-7017a441a31abc235d9359440cefda49 b/sql/hive/src/test/resources/golden/udf_instr-4-7017a441a31abc235d9359440cefda49
new file mode 100644
index 0000000000000..8883b1a631ab7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_instr-4-7017a441a31abc235d9359440cefda49
@@ -0,0 +1 @@
+1	0	2	2	0	0	2	3	4	2	3	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_int-0-c24c3d4e15b5cdf081fee0a8c8ea13ba b/sql/hive/src/test/resources/golden/udf_int-0-c24c3d4e15b5cdf081fee0a8c8ea13ba
index cda33a8e2e555..5549ad557f6b2 100644
--- a/sql/hive/src/test/resources/golden/udf_int-0-c24c3d4e15b5cdf081fee0a8c8ea13ba
+++ b/sql/hive/src/test/resources/golden/udf_int-0-c24c3d4e15b5cdf081fee0a8c8ea13ba
@@ -1 +1 @@
-There is no documentation for function 'int'
\ No newline at end of file
+There is no documentation for function 'int'
diff --git a/sql/hive/src/test/resources/golden/udf_int-1-3f0405ff93adfe8b3402b118567867d7 b/sql/hive/src/test/resources/golden/udf_int-1-3f0405ff93adfe8b3402b118567867d7
index cda33a8e2e555..5549ad557f6b2 100644
--- a/sql/hive/src/test/resources/golden/udf_int-1-3f0405ff93adfe8b3402b118567867d7
+++ b/sql/hive/src/test/resources/golden/udf_int-1-3f0405ff93adfe8b3402b118567867d7
@@ -1 +1 @@
-There is no documentation for function 'int'
\ No newline at end of file
+There is no documentation for function 'int'
diff --git a/sql/hive/src/test/resources/golden/udf_isnotnull-0-44584503014c378bb916b38e1879bfb6 b/sql/hive/src/test/resources/golden/udf_isnotnull-0-44584503014c378bb916b38e1879bfb6
index 723b5aaf2af6b..4ae9ea2a16e87 100644
--- a/sql/hive/src/test/resources/golden/udf_isnotnull-0-44584503014c378bb916b38e1879bfb6
+++ b/sql/hive/src/test/resources/golden/udf_isnotnull-0-44584503014c378bb916b38e1879bfb6
@@ -1 +1 @@
-isnotnull a - Returns true if a is not NULL and false otherwise
\ No newline at end of file
+isnotnull a - Returns true if a is not NULL and false otherwise
diff --git a/sql/hive/src/test/resources/golden/udf_isnotnull-1-6ca2ea7938c7dac331c603ad343c1a7 b/sql/hive/src/test/resources/golden/udf_isnotnull-1-6ca2ea7938c7dac331c603ad343c1a7
index 723b5aaf2af6b..4ae9ea2a16e87 100644
--- a/sql/hive/src/test/resources/golden/udf_isnotnull-1-6ca2ea7938c7dac331c603ad343c1a7
+++ b/sql/hive/src/test/resources/golden/udf_isnotnull-1-6ca2ea7938c7dac331c603ad343c1a7
@@ -1 +1 @@
-isnotnull a - Returns true if a is not NULL and false otherwise
\ No newline at end of file
+isnotnull a - Returns true if a is not NULL and false otherwise
diff --git a/sql/hive/src/test/resources/golden/udf_isnull-0-ac8e7827d760108923509f9ea1691d53 b/sql/hive/src/test/resources/golden/udf_isnull-0-ac8e7827d760108923509f9ea1691d53
index 99510c6f1db15..237cdf99168d0 100644
--- a/sql/hive/src/test/resources/golden/udf_isnull-0-ac8e7827d760108923509f9ea1691d53
+++ b/sql/hive/src/test/resources/golden/udf_isnull-0-ac8e7827d760108923509f9ea1691d53
@@ -1 +1 @@
-isnull a - Returns true if a is NULL and false otherwise
\ No newline at end of file
+isnull a - Returns true if a is NULL and false otherwise
diff --git a/sql/hive/src/test/resources/golden/udf_isnull-1-55d9d04204f30cde4aa2667db88db262 b/sql/hive/src/test/resources/golden/udf_isnull-1-55d9d04204f30cde4aa2667db88db262
index 99510c6f1db15..237cdf99168d0 100644
--- a/sql/hive/src/test/resources/golden/udf_isnull-1-55d9d04204f30cde4aa2667db88db262
+++ b/sql/hive/src/test/resources/golden/udf_isnull-1-55d9d04204f30cde4aa2667db88db262
@@ -1 +1 @@
-isnull a - Returns true if a is NULL and false otherwise
\ No newline at end of file
+isnull a - Returns true if a is NULL and false otherwise
diff --git a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-0-ac8e7827d760108923509f9ea1691d53 b/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-0-ac8e7827d760108923509f9ea1691d53
deleted file mode 100644
index 99510c6f1db15..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-0-ac8e7827d760108923509f9ea1691d53
+++ /dev/null
@@ -1 +0,0 @@
-isnull a - Returns true if a is NULL and false otherwise
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-1-55d9d04204f30cde4aa2667db88db262 b/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-1-55d9d04204f30cde4aa2667db88db262
deleted file mode 100644
index 99510c6f1db15..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-1-55d9d04204f30cde4aa2667db88db262
+++ /dev/null
@@ -1 +0,0 @@
-isnull a - Returns true if a is NULL and false otherwise
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-2-44584503014c378bb916b38e1879bfb6 b/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-2-44584503014c378bb916b38e1879bfb6
deleted file mode 100644
index 723b5aaf2af6b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-2-44584503014c378bb916b38e1879bfb6
+++ /dev/null
@@ -1 +0,0 @@
-isnotnull a - Returns true if a is not NULL and false otherwise
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-3-6ca2ea7938c7dac331c603ad343c1a7 b/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-3-6ca2ea7938c7dac331c603ad343c1a7
deleted file mode 100644
index 723b5aaf2af6b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-3-6ca2ea7938c7dac331c603ad343c1a7
+++ /dev/null
@@ -1 +0,0 @@
-isnotnull a - Returns true if a is not NULL and false otherwise
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-4-3dd03048c0152f565b21b6d3b7b010f1 b/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-4-3dd03048c0152f565b21b6d3b7b010f1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-5-253ed8f6f8659120af927871f57d81a1 b/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-5-253ed8f6f8659120af927871f57d81a1
deleted file mode 100644
index eedfbc67d50c7..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-5-253ed8f6f8659120af927871f57d81a1
+++ /dev/null
@@ -1 +0,0 @@
-true	true	true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-6-9daf0ab0e3b8315018341d6f72bd3174 b/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-6-9daf0ab0e3b8315018341d6f72bd3174
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-7-bb1030dea18d2a0c2c00a6e2de835d6b b/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-7-bb1030dea18d2a0c2c00a6e2de835d6b
deleted file mode 100644
index eedfbc67d50c7..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_isnull_isnotnull-7-bb1030dea18d2a0c2c00a6e2de835d6b
+++ /dev/null
@@ -1 +0,0 @@
-true	true	true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge4-3-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/udf_java_method-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/merge4-3-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/udf_java_method-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_java_method-0-991b98a25032b21802bc2a1efde606c7 b/sql/hive/src/test/resources/golden/udf_java_method-0-991b98a25032b21802bc2a1efde606c7
index b703a30abffba..91e538becfc96 100644
--- a/sql/hive/src/test/resources/golden/udf_java_method-0-991b98a25032b21802bc2a1efde606c7
+++ b/sql/hive/src/test/resources/golden/udf_java_method-0-991b98a25032b21802bc2a1efde606c7
@@ -1 +1 @@
-java_method(class,method[,arg1[,arg2..]]) calls method with reflection
\ No newline at end of file
+java_method(class,method[,arg1[,arg2..]]) calls method with reflection
diff --git a/sql/hive/src/test/resources/golden/udf_java_method-1-991b98a25032b21802bc2a1efde606c7 b/sql/hive/src/test/resources/golden/udf_java_method-1-991b98a25032b21802bc2a1efde606c7
new file mode 100644
index 0000000000000..91e538becfc96
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_java_method-1-991b98a25032b21802bc2a1efde606c7
@@ -0,0 +1 @@
+java_method(class,method[,arg1[,arg2..]]) calls method with reflection
diff --git a/sql/hive/src/test/resources/golden/udf_java_method-1-a3b94d9f2c2caf85a588b6686a64630a b/sql/hive/src/test/resources/golden/udf_java_method-1-a3b94d9f2c2caf85a588b6686a64630a
index 07375dc92d625..6315f678b46f8 100644
--- a/sql/hive/src/test/resources/golden/udf_java_method-1-a3b94d9f2c2caf85a588b6686a64630a
+++ b/sql/hive/src/test/resources/golden/udf_java_method-1-a3b94d9f2c2caf85a588b6686a64630a
@@ -1,3 +1,4 @@
 java_method(class,method[,arg1[,arg2..]]) calls method with reflection
 Synonyms: reflect
 Use this UDF to call Java methods by matching the argument signature
+
diff --git a/sql/hive/src/test/resources/golden/udf_java_method-2-a3b94d9f2c2caf85a588b6686a64630a b/sql/hive/src/test/resources/golden/udf_java_method-2-a3b94d9f2c2caf85a588b6686a64630a
new file mode 100644
index 0000000000000..6315f678b46f8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_java_method-2-a3b94d9f2c2caf85a588b6686a64630a
@@ -0,0 +1,4 @@
+java_method(class,method[,arg1[,arg2..]]) calls method with reflection
+Synonyms: reflect
+Use this UDF to call Java methods by matching the argument signature
+
diff --git a/sql/hive/src/test/resources/golden/input14_limit-0-13ab74a58da514fe01dbeda0c3e79883 b/sql/hive/src/test/resources/golden/udf_java_method-3-9ae6daaf9783d3d6577231320727582a
similarity index 100%
rename from sql/hive/src/test/resources/golden/input14_limit-0-13ab74a58da514fe01dbeda0c3e79883
rename to sql/hive/src/test/resources/golden/udf_java_method-3-9ae6daaf9783d3d6577231320727582a
diff --git a/sql/hive/src/test/resources/golden/udf_java_method-3-c526dfd4d9eac718ced9afb3cf9a62fd b/sql/hive/src/test/resources/golden/udf_java_method-3-c526dfd4d9eac718ced9afb3cf9a62fd
index 9b93703dae806..51ff65ea1870f 100644
--- a/sql/hive/src/test/resources/golden/udf_java_method-3-c526dfd4d9eac718ced9afb3cf9a62fd
+++ b/sql/hive/src/test/resources/golden/udf_java_method-3-c526dfd4d9eac718ced9afb3cf9a62fd
@@ -1 +1 @@
-1	true	3	2	3	2.718281828459045	1.0
\ No newline at end of file
+1	true	3	2	3	2.718281828459045	1.0
diff --git a/sql/hive/src/test/resources/golden/udf_java_method-4-2fc4554258492a1d92c89a8dbad6c1c3 b/sql/hive/src/test/resources/golden/udf_java_method-4-2fc4554258492a1d92c89a8dbad6c1c3
new file mode 100644
index 0000000000000..51ff65ea1870f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_java_method-4-2fc4554258492a1d92c89a8dbad6c1c3
@@ -0,0 +1 @@
+1	true	3	2	3	2.718281828459045	1.0
diff --git a/sql/hive/src/test/resources/golden/udf_lcase-0-649df2b742e6a03d0e0e364f5bee76ad b/sql/hive/src/test/resources/golden/udf_lcase-0-649df2b742e6a03d0e0e364f5bee76ad
index ec6779df2818d..afe1bf6cd351a 100644
--- a/sql/hive/src/test/resources/golden/udf_lcase-0-649df2b742e6a03d0e0e364f5bee76ad
+++ b/sql/hive/src/test/resources/golden/udf_lcase-0-649df2b742e6a03d0e0e364f5bee76ad
@@ -1 +1 @@
-lcase(str) - Returns str with all characters changed to lowercase
\ No newline at end of file
+lcase(str) - Returns str with all characters changed to lowercase
diff --git a/sql/hive/src/test/resources/golden/udf_lcase-1-d947c47e03bedbfd4954853cc134c66e b/sql/hive/src/test/resources/golden/udf_lcase-1-d947c47e03bedbfd4954853cc134c66e
index 4da30f6c23a1b..191001deca9f7 100644
--- a/sql/hive/src/test/resources/golden/udf_lcase-1-d947c47e03bedbfd4954853cc134c66e
+++ b/sql/hive/src/test/resources/golden/udf_lcase-1-d947c47e03bedbfd4954853cc134c66e
@@ -2,4 +2,4 @@ lcase(str) - Returns str with all characters changed to lowercase
 Synonyms: lower
 Example:
   > SELECT lcase('Facebook') FROM src LIMIT 1;
-  'facebook'
\ No newline at end of file
+  'facebook'
diff --git a/sql/hive/src/test/resources/golden/udf_length-0-38364b60c3a2409f53c9aa2dae19903b b/sql/hive/src/test/resources/golden/udf_length-0-38364b60c3a2409f53c9aa2dae19903b
deleted file mode 100644
index d4d2bd508b44f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_length-0-38364b60c3a2409f53c9aa2dae19903b
+++ /dev/null
@@ -1 +0,0 @@
-length(str | binary) - Returns the length of str or number of bytes in binary data
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/merge4-4-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/udf_length-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/merge4-4-16367c381d4b189b3640c92511244bfe
rename to sql/hive/src/test/resources/golden/udf_length-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_length-1-38364b60c3a2409f53c9aa2dae19903b b/sql/hive/src/test/resources/golden/udf_length-1-38364b60c3a2409f53c9aa2dae19903b
new file mode 100644
index 0000000000000..3e2bae7430e67
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_length-1-38364b60c3a2409f53c9aa2dae19903b
@@ -0,0 +1 @@
+length(str | binary) - Returns the length of str or number of bytes in binary data
diff --git a/sql/hive/src/test/resources/golden/udf_length-1-f183e1f8ae516bb483132ed106289b67 b/sql/hive/src/test/resources/golden/udf_length-1-f183e1f8ae516bb483132ed106289b67
deleted file mode 100644
index 79c1c54639266..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_length-1-f183e1f8ae516bb483132ed106289b67
+++ /dev/null
@@ -1,4 +0,0 @@
-length(str | binary) - Returns the length of str or number of bytes in binary data
-Example:
-  > SELECT length('Facebook') FROM src LIMIT 1;
-  8
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input14_limit-1-be9934fc5e6ecb9854eb7531a5929dcf b/sql/hive/src/test/resources/golden/udf_length-10-de456a5765db4a06110d9483985aa4a6
similarity index 100%
rename from sql/hive/src/test/resources/golden/input14_limit-1-be9934fc5e6ecb9854eb7531a5929dcf
rename to sql/hive/src/test/resources/golden/udf_length-10-de456a5765db4a06110d9483985aa4a6
diff --git a/sql/hive/src/test/resources/golden/udf_length-10-f3a9bd30540345db0f69b6847014b333 b/sql/hive/src/test/resources/golden/udf_length-10-f3a9bd30540345db0f69b6847014b333
deleted file mode 100644
index d8263ee986059..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_length-10-f3a9bd30540345db0f69b6847014b333
+++ /dev/null
@@ -1 +0,0 @@
-2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_length-11-f3a9bd30540345db0f69b6847014b333 b/sql/hive/src/test/resources/golden/udf_length-11-f3a9bd30540345db0f69b6847014b333
new file mode 100644
index 0000000000000..0cfbf08886fca
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_length-11-f3a9bd30540345db0f69b6847014b333
@@ -0,0 +1 @@
+2
diff --git a/sql/hive/src/test/resources/golden/udf_length-2-af46cb6887618240836eaf5be8afbba6 b/sql/hive/src/test/resources/golden/udf_length-2-af46cb6887618240836eaf5be8afbba6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_length-2-f183e1f8ae516bb483132ed106289b67 b/sql/hive/src/test/resources/golden/udf_length-2-f183e1f8ae516bb483132ed106289b67
new file mode 100644
index 0000000000000..f1d3b1648718d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_length-2-f183e1f8ae516bb483132ed106289b67
@@ -0,0 +1,4 @@
+length(str | binary) - Returns the length of str or number of bytes in binary data
+Example:
+  > SELECT length('Facebook') FROM src LIMIT 1;
+  8
diff --git a/sql/hive/src/test/resources/golden/input14_limit-2-780cdc89e0e736790124b6bdac827951 b/sql/hive/src/test/resources/golden/udf_length-3-af46cb6887618240836eaf5be8afbba6
similarity index 100%
rename from sql/hive/src/test/resources/golden/input14_limit-2-780cdc89e0e736790124b6bdac827951
rename to sql/hive/src/test/resources/golden/udf_length-3-af46cb6887618240836eaf5be8afbba6
diff --git a/sql/hive/src/test/resources/golden/udf_length-3-dcd6404afce1103d5054527e6c216d6d b/sql/hive/src/test/resources/golden/udf_length-3-dcd6404afce1103d5054527e6c216d6d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_length-4-ba49ba4e6505c74bc33b5003f3930c43 b/sql/hive/src/test/resources/golden/udf_length-4-ba49ba4e6505c74bc33b5003f3930c43
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input16_cc-1-5180e975a6babd51752706f1799e7df5 b/sql/hive/src/test/resources/golden/udf_length-4-dcd6404afce1103d5054527e6c216d6d
similarity index 100%
rename from sql/hive/src/test/resources/golden/input16_cc-1-5180e975a6babd51752706f1799e7df5
rename to sql/hive/src/test/resources/golden/udf_length-4-dcd6404afce1103d5054527e6c216d6d
diff --git a/sql/hive/src/test/resources/golden/udf_length-5-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/udf_length-5-adc1ec67836b26b60d8547c4996bfd8f
deleted file mode 100644
index 6ecb2baabb297..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_length-5-adc1ec67836b26b60d8547c4996bfd8f
+++ /dev/null
@@ -1,25 +0,0 @@
-7
-0
-7
-6
-7
-7
-7
-7
-6
-7
-7
-7
-7
-7
-7
-0
-0
-6
-0
-7
-7
-7
-0
-0
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input17-0-13ab74a58da514fe01dbeda0c3e79883 b/sql/hive/src/test/resources/golden/udf_length-5-ba49ba4e6505c74bc33b5003f3930c43
similarity index 100%
rename from sql/hive/src/test/resources/golden/input17-0-13ab74a58da514fe01dbeda0c3e79883
rename to sql/hive/src/test/resources/golden/udf_length-5-ba49ba4e6505c74bc33b5003f3930c43
diff --git a/sql/hive/src/test/resources/golden/udf_length-6-460dffb0f1ab0ac0ebc4fd545809aa9a b/sql/hive/src/test/resources/golden/udf_length-6-460dffb0f1ab0ac0ebc4fd545809aa9a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_length-6-adc1ec67836b26b60d8547c4996bfd8f b/sql/hive/src/test/resources/golden/udf_length-6-adc1ec67836b26b60d8547c4996bfd8f
new file mode 100644
index 0000000000000..c77515b2f479b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_length-6-adc1ec67836b26b60d8547c4996bfd8f
@@ -0,0 +1,25 @@
+7
+0
+7
+6
+7
+7
+7
+7
+6
+7
+7
+7
+7
+7
+7
+0
+0
+6
+0
+7
+7
+7
+0
+0
+0
diff --git a/sql/hive/src/test/resources/golden/input17-1-be9cde5e769f171f60f61a7739de8f17 b/sql/hive/src/test/resources/golden/udf_length-7-460dffb0f1ab0ac0ebc4fd545809aa9a
similarity index 100%
rename from sql/hive/src/test/resources/golden/input17-1-be9cde5e769f171f60f61a7739de8f17
rename to sql/hive/src/test/resources/golden/udf_length-7-460dffb0f1ab0ac0ebc4fd545809aa9a
diff --git a/sql/hive/src/test/resources/golden/udf_length-7-8f28e6c488df47d89dca670f04a7563f b/sql/hive/src/test/resources/golden/udf_length-7-8f28e6c488df47d89dca670f04a7563f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_length-8-5e0fe761b7520651c3446ce7f9179caf b/sql/hive/src/test/resources/golden/udf_length-8-5e0fe761b7520651c3446ce7f9179caf
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input17-2-21166e268096f6ec67f4f57ec333e901 b/sql/hive/src/test/resources/golden/udf_length-8-8f28e6c488df47d89dca670f04a7563f
similarity index 100%
rename from sql/hive/src/test/resources/golden/input17-2-21166e268096f6ec67f4f57ec333e901
rename to sql/hive/src/test/resources/golden/udf_length-8-8f28e6c488df47d89dca670f04a7563f
diff --git a/sql/hive/src/test/resources/golden/udf_length-9-de456a5765db4a06110d9483985aa4a6 b/sql/hive/src/test/resources/golden/udf_length-9-de456a5765db4a06110d9483985aa4a6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input19-1-f2832e249ab28bb3fb8e472012c5ffc b/sql/hive/src/test/resources/golden/udf_length-9-e41b220da98996f997b26ba7ef457a84
similarity index 100%
rename from sql/hive/src/test/resources/golden/input19-1-f2832e249ab28bb3fb8e472012c5ffc
rename to sql/hive/src/test/resources/golden/udf_length-9-e41b220da98996f997b26ba7ef457a84
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-9-2c3b29a3df9b07e7add15e4e7c561644 b/sql/hive/src/test/resources/golden/udf_lessthan-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/metadataonly1-9-2c3b29a3df9b07e7add15e4e7c561644
rename to sql/hive/src/test/resources/golden/udf_lessthan-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_lessthan-0-a0d9e8b51e3d13685b3889db38f22427 b/sql/hive/src/test/resources/golden/udf_lessthan-0-a0d9e8b51e3d13685b3889db38f22427
deleted file mode 100644
index b43707d550c4b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lessthan-0-a0d9e8b51e3d13685b3889db38f22427
+++ /dev/null
@@ -1 +0,0 @@
-a < b - Returns TRUE if a is less than b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lessthan-1-952c655a1092a410e5346f1205cb8142 b/sql/hive/src/test/resources/golden/udf_lessthan-1-952c655a1092a410e5346f1205cb8142
deleted file mode 100644
index b43707d550c4b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lessthan-1-952c655a1092a410e5346f1205cb8142
+++ /dev/null
@@ -1 +0,0 @@
-a < b - Returns TRUE if a is less than b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lessthan-1-a0d9e8b51e3d13685b3889db38f22427 b/sql/hive/src/test/resources/golden/udf_lessthan-1-a0d9e8b51e3d13685b3889db38f22427
new file mode 100644
index 0000000000000..d7621677e2fce
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lessthan-1-a0d9e8b51e3d13685b3889db38f22427
@@ -0,0 +1 @@
+a < b - Returns TRUE if a is less than b
diff --git a/sql/hive/src/test/resources/golden/udf_lessthan-2-92fa47f7af4a03ce1a965472eaad23a7 b/sql/hive/src/test/resources/golden/udf_lessthan-2-92fa47f7af4a03ce1a965472eaad23a7
deleted file mode 100644
index 4364012093724..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lessthan-2-92fa47f7af4a03ce1a965472eaad23a7
+++ /dev/null
@@ -1 +0,0 @@
-false	true	false	false
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lessthan-2-952c655a1092a410e5346f1205cb8142 b/sql/hive/src/test/resources/golden/udf_lessthan-2-952c655a1092a410e5346f1205cb8142
new file mode 100644
index 0000000000000..d7621677e2fce
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lessthan-2-952c655a1092a410e5346f1205cb8142
@@ -0,0 +1 @@
+a < b - Returns TRUE if a is less than b
diff --git a/sql/hive/src/test/resources/golden/udf_lessthan-3-677a1383983c94ba8008535b5a193153 b/sql/hive/src/test/resources/golden/udf_lessthan-3-677a1383983c94ba8008535b5a193153
new file mode 100644
index 0000000000000..5926a6ac7746d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lessthan-3-677a1383983c94ba8008535b5a193153
@@ -0,0 +1 @@
+false	true	false	false
diff --git a/sql/hive/src/test/resources/golden/newline-1-a19a19272149c732977c37e043910505 b/sql/hive/src/test/resources/golden/udf_lessthanorequal-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/newline-1-a19a19272149c732977c37e043910505
rename to sql/hive/src/test/resources/golden/udf_lessthanorequal-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_lessthanorequal-0-b3b021456c85da76d1879191886a425b b/sql/hive/src/test/resources/golden/udf_lessthanorequal-0-b3b021456c85da76d1879191886a425b
deleted file mode 100644
index c36acd7ce80bc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lessthanorequal-0-b3b021456c85da76d1879191886a425b
+++ /dev/null
@@ -1 +0,0 @@
-a <= b - Returns TRUE if a is not greater than b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lessthanorequal-1-869d983466744ad73b109211e1638869 b/sql/hive/src/test/resources/golden/udf_lessthanorequal-1-869d983466744ad73b109211e1638869
deleted file mode 100644
index c36acd7ce80bc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lessthanorequal-1-869d983466744ad73b109211e1638869
+++ /dev/null
@@ -1 +0,0 @@
-a <= b - Returns TRUE if a is not greater than b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lessthanorequal-1-b3b021456c85da76d1879191886a425b b/sql/hive/src/test/resources/golden/udf_lessthanorequal-1-b3b021456c85da76d1879191886a425b
new file mode 100644
index 0000000000000..7f18733b6e47e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lessthanorequal-1-b3b021456c85da76d1879191886a425b
@@ -0,0 +1 @@
+a <= b - Returns TRUE if a is not greater than b
diff --git a/sql/hive/src/test/resources/golden/udf_lessthanorequal-2-56775013e20ecf2287e07e83eccf2e0c b/sql/hive/src/test/resources/golden/udf_lessthanorequal-2-56775013e20ecf2287e07e83eccf2e0c
deleted file mode 100644
index 38e013b6914c2..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lessthanorequal-2-56775013e20ecf2287e07e83eccf2e0c
+++ /dev/null
@@ -1 +0,0 @@
-false	true	true	true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lessthanorequal-2-869d983466744ad73b109211e1638869 b/sql/hive/src/test/resources/golden/udf_lessthanorequal-2-869d983466744ad73b109211e1638869
new file mode 100644
index 0000000000000..7f18733b6e47e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lessthanorequal-2-869d983466744ad73b109211e1638869
@@ -0,0 +1 @@
+a <= b - Returns TRUE if a is not greater than b
diff --git a/sql/hive/src/test/resources/golden/udf_lessthanorequal-3-947dd56091ae1ef399ab32ce58317667 b/sql/hive/src/test/resources/golden/udf_lessthanorequal-3-947dd56091ae1ef399ab32ce58317667
new file mode 100644
index 0000000000000..fc2c7b1da1920
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lessthanorequal-3-947dd56091ae1ef399ab32ce58317667
@@ -0,0 +1 @@
+false	true	true	true
diff --git a/sql/hive/src/test/resources/golden/orc_create-15-e54b12bec275bc53ffe4b7252a99bab8 b/sql/hive/src/test/resources/golden/udf_like-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/orc_create-15-e54b12bec275bc53ffe4b7252a99bab8
rename to sql/hive/src/test/resources/golden/udf_like-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_like-0-e0ba9a953e50554bdcbc55585cffde09 b/sql/hive/src/test/resources/golden/udf_like-1-e0ba9a953e50554bdcbc55585cffde09
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_like-0-e0ba9a953e50554bdcbc55585cffde09
rename to sql/hive/src/test/resources/golden/udf_like-1-e0ba9a953e50554bdcbc55585cffde09
diff --git a/sql/hive/src/test/resources/golden/udf_like-1-9781f89d352c506e972ad2a1d58ec03a b/sql/hive/src/test/resources/golden/udf_like-2-9781f89d352c506e972ad2a1d58ec03a
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_like-1-9781f89d352c506e972ad2a1d58ec03a
rename to sql/hive/src/test/resources/golden/udf_like-2-9781f89d352c506e972ad2a1d58ec03a
diff --git a/sql/hive/src/test/resources/golden/udf_like-2-dbc46cb33f0dd356af03006d9492f8b7 b/sql/hive/src/test/resources/golden/udf_like-2-dbc46cb33f0dd356af03006d9492f8b7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input21-1-70b803742328eacc69eb1ed044a5c6b8 b/sql/hive/src/test/resources/golden/udf_like-3-dbc46cb33f0dd356af03006d9492f8b7
similarity index 100%
rename from sql/hive/src/test/resources/golden/input21-1-70b803742328eacc69eb1ed044a5c6b8
rename to sql/hive/src/test/resources/golden/udf_like-3-dbc46cb33f0dd356af03006d9492f8b7
diff --git a/sql/hive/src/test/resources/golden/udf_like-3-bef03784eab9d5e8404fd24960dea4fc b/sql/hive/src/test/resources/golden/udf_like-4-bef03784eab9d5e8404fd24960dea4fc
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_like-3-bef03784eab9d5e8404fd24960dea4fc
rename to sql/hive/src/test/resources/golden/udf_like-4-bef03784eab9d5e8404fd24960dea4fc
diff --git a/sql/hive/src/test/resources/golden/udf_like-4-af5fe5d5d176f751747bf14055d00a12 b/sql/hive/src/test/resources/golden/udf_like-5-47bfd4d65090dab890b467ae06cf3bd5
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_like-4-af5fe5d5d176f751747bf14055d00a12
rename to sql/hive/src/test/resources/golden/udf_like-5-47bfd4d65090dab890b467ae06cf3bd5
diff --git a/sql/hive/src/test/resources/golden/udf_ln-0-779eed5722a0efaa85efe24c559072b4 b/sql/hive/src/test/resources/golden/udf_ln-0-779eed5722a0efaa85efe24c559072b4
index 0b7372cb446d9..134096b19d8c3 100644
--- a/sql/hive/src/test/resources/golden/udf_ln-0-779eed5722a0efaa85efe24c559072b4
+++ b/sql/hive/src/test/resources/golden/udf_ln-0-779eed5722a0efaa85efe24c559072b4
@@ -1 +1 @@
-ln(x) - Returns the natural logarithm of x
\ No newline at end of file
+ln(x) - Returns the natural logarithm of x
diff --git a/sql/hive/src/test/resources/golden/udf_ln-1-60e3541b3c703d6413869d774df9b7e4 b/sql/hive/src/test/resources/golden/udf_ln-1-60e3541b3c703d6413869d774df9b7e4
index bdf2f49ab17ca..c4a1a46f93370 100644
--- a/sql/hive/src/test/resources/golden/udf_ln-1-60e3541b3c703d6413869d774df9b7e4
+++ b/sql/hive/src/test/resources/golden/udf_ln-1-60e3541b3c703d6413869d774df9b7e4
@@ -1,4 +1,4 @@
 ln(x) - Returns the natural logarithm of x
 Example:
   > SELECT ln(1) FROM src LIMIT 1;
-  0
\ No newline at end of file
+  0
diff --git a/sql/hive/src/test/resources/golden/orc_create-17-f42f66edf8a9f6ffdbe83608cc353beb b/sql/hive/src/test/resources/golden/udf_locate-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/orc_create-17-f42f66edf8a9f6ffdbe83608cc353beb
rename to sql/hive/src/test/resources/golden/udf_locate-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_locate-0-6e41693c9c6dceea4d7fab4c02884e4e b/sql/hive/src/test/resources/golden/udf_locate-0-6e41693c9c6dceea4d7fab4c02884e4e
deleted file mode 100644
index 63b152162407b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_locate-0-6e41693c9c6dceea4d7fab4c02884e4e
+++ /dev/null
@@ -1 +0,0 @@
-locate(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_locate-1-6e41693c9c6dceea4d7fab4c02884e4e b/sql/hive/src/test/resources/golden/udf_locate-1-6e41693c9c6dceea4d7fab4c02884e4e
new file mode 100644
index 0000000000000..84bea329540d1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_locate-1-6e41693c9c6dceea4d7fab4c02884e4e
@@ -0,0 +1 @@
+locate(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos
diff --git a/sql/hive/src/test/resources/golden/udf_locate-1-d9b5934457931447874d6bb7c13de478 b/sql/hive/src/test/resources/golden/udf_locate-1-d9b5934457931447874d6bb7c13de478
deleted file mode 100644
index ea5465b67683e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_locate-1-d9b5934457931447874d6bb7c13de478
+++ /dev/null
@@ -1,4 +0,0 @@
-locate(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos
-Example:
-  > SELECT locate('bar', 'foobarbar', 5) FROM src LIMIT 1;
-  7
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_locate-2-849e7058dbbd7d422a9f3eb08d85b15c b/sql/hive/src/test/resources/golden/udf_locate-2-849e7058dbbd7d422a9f3eb08d85b15c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_locate-2-d9b5934457931447874d6bb7c13de478 b/sql/hive/src/test/resources/golden/udf_locate-2-d9b5934457931447874d6bb7c13de478
new file mode 100644
index 0000000000000..092e12586b9e8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_locate-2-d9b5934457931447874d6bb7c13de478
@@ -0,0 +1,4 @@
+locate(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos
+Example:
+  > SELECT locate('bar', 'foobarbar', 5) FROM src LIMIT 1;
+  7
diff --git a/sql/hive/src/test/resources/golden/udf_locate-3-2a260e4b8e909eb5e848bf31a07f2531 b/sql/hive/src/test/resources/golden/udf_locate-3-2a260e4b8e909eb5e848bf31a07f2531
deleted file mode 100644
index cd97bbb17fa81..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_locate-3-2a260e4b8e909eb5e848bf31a07f2531
+++ /dev/null
@@ -1 +0,0 @@
-1	0	2	2	4	4	0	0	2	3	4	2	3	NULL	NULL	0	0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input22-1-b7f46eccd104e6ed1b29e2de45089f66 b/sql/hive/src/test/resources/golden/udf_locate-3-ce4a131f99dc9befa926027380b38dbb
similarity index 100%
rename from sql/hive/src/test/resources/golden/input22-1-b7f46eccd104e6ed1b29e2de45089f66
rename to sql/hive/src/test/resources/golden/udf_locate-3-ce4a131f99dc9befa926027380b38dbb
diff --git a/sql/hive/src/test/resources/golden/udf_locate-4-104cbfb3b59ad563810ddd7304a58b1b b/sql/hive/src/test/resources/golden/udf_locate-4-104cbfb3b59ad563810ddd7304a58b1b
new file mode 100644
index 0000000000000..f98ccc3556b42
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_locate-4-104cbfb3b59ad563810ddd7304a58b1b
@@ -0,0 +1 @@
+1	0	2	2	4	4	0	0	2	3	4	2	3	NULL	NULL	0	0
diff --git a/sql/hive/src/test/resources/golden/udf_log-0-ca773bc1afa66218f3c13dee676bd87a b/sql/hive/src/test/resources/golden/udf_log-0-ca773bc1afa66218f3c13dee676bd87a
index e0dba6b06d9a4..c2c4ca2cd4385 100644
--- a/sql/hive/src/test/resources/golden/udf_log-0-ca773bc1afa66218f3c13dee676bd87a
+++ b/sql/hive/src/test/resources/golden/udf_log-0-ca773bc1afa66218f3c13dee676bd87a
@@ -1 +1 @@
-log([b], x) - Returns the logarithm of x with base b
\ No newline at end of file
+log([b], x) - Returns the logarithm of x with base b
diff --git a/sql/hive/src/test/resources/golden/udf_log-1-db9dd44bf8c6225f835819a8cdf20d70 b/sql/hive/src/test/resources/golden/udf_log-1-db9dd44bf8c6225f835819a8cdf20d70
index 3dedfb4ddfd88..d6bc6ebbfd2d1 100644
--- a/sql/hive/src/test/resources/golden/udf_log-1-db9dd44bf8c6225f835819a8cdf20d70
+++ b/sql/hive/src/test/resources/golden/udf_log-1-db9dd44bf8c6225f835819a8cdf20d70
@@ -1,4 +1,4 @@
 log([b], x) - Returns the logarithm of x with base b
 Example:
   > SELECT log(13, 13) FROM src LIMIT 1;
-  1
\ No newline at end of file
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_log10-0-35028570b378a2c7ea25b6bf6a4fac1f b/sql/hive/src/test/resources/golden/udf_log10-0-35028570b378a2c7ea25b6bf6a4fac1f
index 61749c48f158e..b9540eb85a360 100644
--- a/sql/hive/src/test/resources/golden/udf_log10-0-35028570b378a2c7ea25b6bf6a4fac1f
+++ b/sql/hive/src/test/resources/golden/udf_log10-0-35028570b378a2c7ea25b6bf6a4fac1f
@@ -1 +1 @@
-log10(x) - Returns the logarithm of x with base 10
\ No newline at end of file
+log10(x) - Returns the logarithm of x with base 10
diff --git a/sql/hive/src/test/resources/golden/udf_log10-1-abf1173290ef905d24d422faf7801fe3 b/sql/hive/src/test/resources/golden/udf_log10-1-abf1173290ef905d24d422faf7801fe3
index 68ce39dd2b084..b71baf542f924 100644
--- a/sql/hive/src/test/resources/golden/udf_log10-1-abf1173290ef905d24d422faf7801fe3
+++ b/sql/hive/src/test/resources/golden/udf_log10-1-abf1173290ef905d24d422faf7801fe3
@@ -1,4 +1,4 @@
 log10(x) - Returns the logarithm of x with base 10
 Example:
   > SELECT log10(10) FROM src LIMIT 1;
-  1
\ No newline at end of file
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_log2-0-6c9ae9d4deb1b42500ad2796a99e2bc6 b/sql/hive/src/test/resources/golden/udf_log2-0-6c9ae9d4deb1b42500ad2796a99e2bc6
index 177008a543627..8214317d87b0b 100644
--- a/sql/hive/src/test/resources/golden/udf_log2-0-6c9ae9d4deb1b42500ad2796a99e2bc6
+++ b/sql/hive/src/test/resources/golden/udf_log2-0-6c9ae9d4deb1b42500ad2796a99e2bc6
@@ -1 +1 @@
-log2(x) - Returns the logarithm of x with base 2
\ No newline at end of file
+log2(x) - Returns the logarithm of x with base 2
diff --git a/sql/hive/src/test/resources/golden/udf_log2-1-a79f0dce2cfc000b11a3b5299f02db56 b/sql/hive/src/test/resources/golden/udf_log2-1-a79f0dce2cfc000b11a3b5299f02db56
index 4b856f91a7a14..579ad959cd884 100644
--- a/sql/hive/src/test/resources/golden/udf_log2-1-a79f0dce2cfc000b11a3b5299f02db56
+++ b/sql/hive/src/test/resources/golden/udf_log2-1-a79f0dce2cfc000b11a3b5299f02db56
@@ -1,4 +1,4 @@
 log2(x) - Returns the logarithm of x with base 2
 Example:
   > SELECT log2(2) FROM src LIMIT 1;
-  1
\ No newline at end of file
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_logic_java_boolean-0-2e7b9484514a049bbf72a4a0af5ee127 b/sql/hive/src/test/resources/golden/udf_logic_java_boolean-0-2e7b9484514a049bbf72a4a0af5ee127
deleted file mode 100644
index d9d0dc035c9ab..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_logic_java_boolean-0-2e7b9484514a049bbf72a4a0af5ee127
+++ /dev/null
@@ -1,10 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATEFUNCTION test_udf_get_java_boolean 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestGetJavaBoolean')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-
-
diff --git a/sql/hive/src/test/resources/golden/udf_lower-0-257a0065c0e0df1d0b35a0c6eb30a668 b/sql/hive/src/test/resources/golden/udf_lower-0-257a0065c0e0df1d0b35a0c6eb30a668
index 9bfc44922ea38..de8e1518c93d7 100644
--- a/sql/hive/src/test/resources/golden/udf_lower-0-257a0065c0e0df1d0b35a0c6eb30a668
+++ b/sql/hive/src/test/resources/golden/udf_lower-0-257a0065c0e0df1d0b35a0c6eb30a668
@@ -1 +1 @@
-lower(str) - Returns str with all characters changed to lowercase
\ No newline at end of file
+lower(str) - Returns str with all characters changed to lowercase
diff --git a/sql/hive/src/test/resources/golden/udf_lower-1-550f0a6da388596a775d921b9da995c b/sql/hive/src/test/resources/golden/udf_lower-1-550f0a6da388596a775d921b9da995c
index da912776ab009..532f348d7da6f 100644
--- a/sql/hive/src/test/resources/golden/udf_lower-1-550f0a6da388596a775d921b9da995c
+++ b/sql/hive/src/test/resources/golden/udf_lower-1-550f0a6da388596a775d921b9da995c
@@ -2,4 +2,4 @@ lower(str) - Returns str with all characters changed to lowercase
 Synonyms: lcase
 Example:
   > SELECT lower('Facebook') FROM src LIMIT 1;
-  'facebook'
\ No newline at end of file
+  'facebook'
diff --git a/sql/hive/src/test/resources/golden/udf_lower-3-61b2e3e72180c80d52cf9bed18125e08 b/sql/hive/src/test/resources/golden/udf_lower-3-61b2e3e72180c80d52cf9bed18125e08
index 9640d1794dcbc..8fbb5d4429d96 100644
--- a/sql/hive/src/test/resources/golden/udf_lower-3-61b2e3e72180c80d52cf9bed18125e08
+++ b/sql/hive/src/test/resources/golden/udf_lower-3-61b2e3e72180c80d52cf9bed18125e08
@@ -1 +1 @@
-abc 123	ABC 123
\ No newline at end of file
+abc 123	ABC 123
diff --git a/sql/hive/src/test/resources/golden/orc_create-32-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/udf_lpad-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/orc_create-32-16367c381d4b189b3640c92511244bfe
rename to sql/hive/src/test/resources/golden/udf_lpad-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_lpad-0-995646acf1e23cea7825412915921bef b/sql/hive/src/test/resources/golden/udf_lpad-0-995646acf1e23cea7825412915921bef
deleted file mode 100644
index 672d2d07f68b5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lpad-0-995646acf1e23cea7825412915921bef
+++ /dev/null
@@ -1 +0,0 @@
-lpad(str, len, pad) - Returns str, left-padded with pad to a length of len
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lpad-1-995646acf1e23cea7825412915921bef b/sql/hive/src/test/resources/golden/udf_lpad-1-995646acf1e23cea7825412915921bef
new file mode 100644
index 0000000000000..e756eab5119d2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lpad-1-995646acf1e23cea7825412915921bef
@@ -0,0 +1 @@
+lpad(str, len, pad) - Returns str, left-padded with pad to a length of len
diff --git a/sql/hive/src/test/resources/golden/udf_lpad-1-f58bb0fd11cb70cf197c01555ac924a8 b/sql/hive/src/test/resources/golden/udf_lpad-1-f58bb0fd11cb70cf197c01555ac924a8
deleted file mode 100644
index 66db6624a376a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lpad-1-f58bb0fd11cb70cf197c01555ac924a8
+++ /dev/null
@@ -1,6 +0,0 @@
-lpad(str, len, pad) - Returns str, left-padded with pad to a length of len
-If str is longer than len, the return value is shortened to len characters.
-Example:
-  > SELECT lpad('hi', 5, '??') FROM src LIMIT 1;
-  '???hi'  > SELECT lpad('hi', 1, '??') FROM src LIMIT 1;
-  'h'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_lpad-2-e779f6358f58919e49fcd6e4b4d007b2 b/sql/hive/src/test/resources/golden/udf_lpad-2-e779f6358f58919e49fcd6e4b4d007b2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_lpad-2-f58bb0fd11cb70cf197c01555ac924a8 b/sql/hive/src/test/resources/golden/udf_lpad-2-f58bb0fd11cb70cf197c01555ac924a8
new file mode 100644
index 0000000000000..122971dbc2df9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lpad-2-f58bb0fd11cb70cf197c01555ac924a8
@@ -0,0 +1,6 @@
+lpad(str, len, pad) - Returns str, left-padded with pad to a length of len
+If str is longer than len, the return value is shortened to len characters.
+Example:
+  > SELECT lpad('hi', 5, '??') FROM src LIMIT 1;
+  '???hi'  > SELECT lpad('hi', 1, '??') FROM src LIMIT 1;
+  'h'
diff --git a/sql/hive/src/test/resources/golden/udf_lpad-3-5b04264ae9ada1304acd0410ce31f2ae b/sql/hive/src/test/resources/golden/udf_lpad-3-5b04264ae9ada1304acd0410ce31f2ae
deleted file mode 100644
index 63a7235050df3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_lpad-3-5b04264ae9ada1304acd0410ce31f2ae
+++ /dev/null
@@ -1 +0,0 @@
-h	...hi	1231hi
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input30-0-582c5fcbe2fe12cc8e7b21225583d96c b/sql/hive/src/test/resources/golden/udf_lpad-3-ea9a05f035dedfe15d3a7f3d7756a2d7
similarity index 100%
rename from sql/hive/src/test/resources/golden/input30-0-582c5fcbe2fe12cc8e7b21225583d96c
rename to sql/hive/src/test/resources/golden/udf_lpad-3-ea9a05f035dedfe15d3a7f3d7756a2d7
diff --git a/sql/hive/src/test/resources/golden/udf_lpad-4-48234ef55a8ec06cd4b570b9b9edab73 b/sql/hive/src/test/resources/golden/udf_lpad-4-48234ef55a8ec06cd4b570b9b9edab73
new file mode 100644
index 0000000000000..f75d0d891636a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_lpad-4-48234ef55a8ec06cd4b570b9b9edab73
@@ -0,0 +1 @@
+h	...hi	1231hi
diff --git a/sql/hive/src/test/resources/golden/udf_ltrim-0-398a623504c47bcd64fe8d200c41402f b/sql/hive/src/test/resources/golden/udf_ltrim-0-398a623504c47bcd64fe8d200c41402f
index 0c9f6fd1c269e..419d80fe502a3 100644
--- a/sql/hive/src/test/resources/golden/udf_ltrim-0-398a623504c47bcd64fe8d200c41402f
+++ b/sql/hive/src/test/resources/golden/udf_ltrim-0-398a623504c47bcd64fe8d200c41402f
@@ -1 +1 @@
-ltrim(str) - Removes the leading space characters from str 
\ No newline at end of file
+ltrim(str) - Removes the leading space characters from str 
diff --git a/sql/hive/src/test/resources/golden/udf_ltrim-1-658d495908097792a0e33a77becac2 b/sql/hive/src/test/resources/golden/udf_ltrim-1-658d495908097792a0e33a77becac2
index 3ee2e690d28c1..fc994e90aaa05 100644
--- a/sql/hive/src/test/resources/golden/udf_ltrim-1-658d495908097792a0e33a77becac2
+++ b/sql/hive/src/test/resources/golden/udf_ltrim-1-658d495908097792a0e33a77becac2
@@ -1,4 +1,4 @@
 ltrim(str) - Removes the leading space characters from str 
 Example:
   > SELECT ltrim('   facebook') FROM src LIMIT 1;
-  'facebook'
\ No newline at end of file
+  'facebook'
diff --git a/sql/hive/src/test/resources/golden/orc_create-34-3b03210f94ec40db9ab02620645014d1 b/sql/hive/src/test/resources/golden/udf_map-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/orc_create-34-3b03210f94ec40db9ab02620645014d1
rename to sql/hive/src/test/resources/golden/udf_map-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_map-0-9feb9f29115f94b3bb4f6a36160bd17e b/sql/hive/src/test/resources/golden/udf_map-1-9feb9f29115f94b3bb4f6a36160bd17e
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_map-0-9feb9f29115f94b3bb4f6a36160bd17e
rename to sql/hive/src/test/resources/golden/udf_map-1-9feb9f29115f94b3bb4f6a36160bd17e
diff --git a/sql/hive/src/test/resources/golden/udf_map-1-1f8cd98df9bf7b2528506551fef87dcf b/sql/hive/src/test/resources/golden/udf_map-2-1f8cd98df9bf7b2528506551fef87dcf
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_map-1-1f8cd98df9bf7b2528506551fef87dcf
rename to sql/hive/src/test/resources/golden/udf_map-2-1f8cd98df9bf7b2528506551fef87dcf
diff --git a/sql/hive/src/test/resources/golden/udf_map-2-a3f90085abab46205e732b4c27b18340 b/sql/hive/src/test/resources/golden/udf_map-2-a3f90085abab46205e732b4c27b18340
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input30-1-90c0d1a75de78c405413fd627caea4ab b/sql/hive/src/test/resources/golden/udf_map-3-be7b52baa973b8b59b7ca63fea19aa99
similarity index 100%
rename from sql/hive/src/test/resources/golden/input30-1-90c0d1a75de78c405413fd627caea4ab
rename to sql/hive/src/test/resources/golden/udf_map-3-be7b52baa973b8b59b7ca63fea19aa99
diff --git a/sql/hive/src/test/resources/golden/udf_map-3-75348d588d3452e6cb35f5197f4ebeb1 b/sql/hive/src/test/resources/golden/udf_map-4-60cb9c30285f7a9f99377ccbd143eb06
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_map-3-75348d588d3452e6cb35f5197f4ebeb1
rename to sql/hive/src/test/resources/golden/udf_map-4-60cb9c30285f7a9f99377ccbd143eb06
diff --git a/sql/hive/src/test/resources/golden/udf_map_keys-0-e86d559aeb84a4cc017a103182c22bfb b/sql/hive/src/test/resources/golden/udf_map_keys-0-e86d559aeb84a4cc017a103182c22bfb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_map_keys-1-9a5714f8790219e9a9708a2c45fc87aa b/sql/hive/src/test/resources/golden/udf_map_keys-1-9a5714f8790219e9a9708a2c45fc87aa
deleted file mode 100644
index b88b74b978b8f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_map_keys-1-9a5714f8790219e9a9708a2c45fc87aa
+++ /dev/null
@@ -1 +0,0 @@
-map_keys(map) - Returns an unordered array containing the keys of the input map.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_map_keys-2-731b529a9a234473312b7d1db15be75f b/sql/hive/src/test/resources/golden/udf_map_keys-2-731b529a9a234473312b7d1db15be75f
deleted file mode 100644
index b88b74b978b8f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_map_keys-2-731b529a9a234473312b7d1db15be75f
+++ /dev/null
@@ -1 +0,0 @@
-map_keys(map) - Returns an unordered array containing the keys of the input map.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_map_keys-3-a3d6a9c2dd5db33c58513ffba1a4103e b/sql/hive/src/test/resources/golden/udf_map_keys-3-a3d6a9c2dd5db33c58513ffba1a4103e
deleted file mode 100644
index 3a26a2e5e94d5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_map_keys-3-a3d6a9c2dd5db33c58513ffba1a4103e
+++ /dev/null
@@ -1 +0,0 @@
-[1,2,3]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_map_keys-4-10cb8da631c1c058dacbbe4834a5928a b/sql/hive/src/test/resources/golden/udf_map_keys-4-10cb8da631c1c058dacbbe4834a5928a
deleted file mode 100644
index 9b4c194f58a8e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_map_keys-4-10cb8da631c1c058dacbbe4834a5928a
+++ /dev/null
@@ -1 +0,0 @@
-["b","a","c"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_map_values-0-e86d559aeb84a4cc017a103182c22bfb b/sql/hive/src/test/resources/golden/udf_map_values-0-e86d559aeb84a4cc017a103182c22bfb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_map_values-1-a1d9810d9793d4ca2d17f539ca72bd08 b/sql/hive/src/test/resources/golden/udf_map_values-1-a1d9810d9793d4ca2d17f539ca72bd08
deleted file mode 100644
index e811d1976a24c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_map_values-1-a1d9810d9793d4ca2d17f539ca72bd08
+++ /dev/null
@@ -1 +0,0 @@
-map_values(map) - Returns an unordered array containing the values of the input map.
diff --git a/sql/hive/src/test/resources/golden/udf_map_values-2-ed39a40cbe55bb33d2bc19f0941dae69 b/sql/hive/src/test/resources/golden/udf_map_values-2-ed39a40cbe55bb33d2bc19f0941dae69
deleted file mode 100644
index e811d1976a24c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_map_values-2-ed39a40cbe55bb33d2bc19f0941dae69
+++ /dev/null
@@ -1 +0,0 @@
-map_values(map) - Returns an unordered array containing the values of the input map.
diff --git a/sql/hive/src/test/resources/golden/udf_map_values-3-ea2d6bed07d285bc8a7d7cab4cbf69d1 b/sql/hive/src/test/resources/golden/udf_map_values-3-ea2d6bed07d285bc8a7d7cab4cbf69d1
deleted file mode 100644
index 9d804e48987c6..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_map_values-3-ea2d6bed07d285bc8a7d7cab4cbf69d1
+++ /dev/null
@@ -1 +0,0 @@
-["a","b","c"]
diff --git a/sql/hive/src/test/resources/golden/udf_map_values-4-a000d06dd3941756b4bb9ccc46f3620e b/sql/hive/src/test/resources/golden/udf_map_values-4-a000d06dd3941756b4bb9ccc46f3620e
deleted file mode 100644
index be56bd1673eaa..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_map_values-4-a000d06dd3941756b4bb9ccc46f3620e
+++ /dev/null
@@ -1 +0,0 @@
-[2,1,3]
diff --git a/sql/hive/src/test/resources/golden/udf_max-0-ac7d002a46f773ab680ed8c1ac97821f b/sql/hive/src/test/resources/golden/udf_max-0-ac7d002a46f773ab680ed8c1ac97821f
deleted file mode 100644
index c6cd9573ed8ed..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-0-ac7d002a46f773ab680ed8c1ac97821f
+++ /dev/null
@@ -1 +0,0 @@
-max(expr) - Returns the maximum value of expr
diff --git a/sql/hive/src/test/resources/golden/udf_max-1-14afa1f14687893233a662f0f32a40c9 b/sql/hive/src/test/resources/golden/udf_max-1-14afa1f14687893233a662f0f32a40c9
deleted file mode 100644
index c6cd9573ed8ed..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-1-14afa1f14687893233a662f0f32a40c9
+++ /dev/null
@@ -1 +0,0 @@
-max(expr) - Returns the maximum value of expr
diff --git a/sql/hive/src/test/resources/golden/udf_max-10-1e9209f5b2ba926df36b692a4dcd09f6 b/sql/hive/src/test/resources/golden/udf_max-10-1e9209f5b2ba926df36b692a4dcd09f6
deleted file mode 100644
index 21096f0f69749..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-10-1e9209f5b2ba926df36b692a4dcd09f6
+++ /dev/null
@@ -1 +0,0 @@
-{"col1":498,"col2":"val_498"}	{"col1":498,"col2":"val_498"}
diff --git a/sql/hive/src/test/resources/golden/udf_max-11-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/udf_max-11-863233ccd616401efb4bf83c4b9e3a52
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-11-863233ccd616401efb4bf83c4b9e3a52
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_max-12-a00d1791b7fa7ac5a0505d95c3d12257 b/sql/hive/src/test/resources/golden/udf_max-12-a00d1791b7fa7ac5a0505d95c3d12257
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-12-a00d1791b7fa7ac5a0505d95c3d12257
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_max-13-1e9209f5b2ba926df36b692a4dcd09f6 b/sql/hive/src/test/resources/golden/udf_max-13-1e9209f5b2ba926df36b692a4dcd09f6
deleted file mode 100644
index 21096f0f69749..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-13-1e9209f5b2ba926df36b692a4dcd09f6
+++ /dev/null
@@ -1 +0,0 @@
-{"col1":498,"col2":"val_498"}	{"col1":498,"col2":"val_498"}
diff --git a/sql/hive/src/test/resources/golden/udf_max-2-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/udf_max-2-1d351f7e821fcaf66c6f7503e42fb291
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-2-1d351f7e821fcaf66c6f7503e42fb291
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_max-3-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/udf_max-3-a7dc16cb82c595b18d4258a38a304b1e
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-3-a7dc16cb82c595b18d4258a38a304b1e
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_max-4-1e9209f5b2ba926df36b692a4dcd09f6 b/sql/hive/src/test/resources/golden/udf_max-4-1e9209f5b2ba926df36b692a4dcd09f6
deleted file mode 100644
index 21096f0f69749..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-4-1e9209f5b2ba926df36b692a4dcd09f6
+++ /dev/null
@@ -1 +0,0 @@
-{"col1":498,"col2":"val_498"}	{"col1":498,"col2":"val_498"}
diff --git a/sql/hive/src/test/resources/golden/udf_max-5-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/udf_max-5-863233ccd616401efb4bf83c4b9e3a52
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-5-863233ccd616401efb4bf83c4b9e3a52
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_max-6-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/udf_max-6-a7dc16cb82c595b18d4258a38a304b1e
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-6-a7dc16cb82c595b18d4258a38a304b1e
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_max-7-1e9209f5b2ba926df36b692a4dcd09f6 b/sql/hive/src/test/resources/golden/udf_max-7-1e9209f5b2ba926df36b692a4dcd09f6
deleted file mode 100644
index 21096f0f69749..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-7-1e9209f5b2ba926df36b692a4dcd09f6
+++ /dev/null
@@ -1 +0,0 @@
-{"col1":498,"col2":"val_498"}	{"col1":498,"col2":"val_498"}
diff --git a/sql/hive/src/test/resources/golden/udf_max-8-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/udf_max-8-1d351f7e821fcaf66c6f7503e42fb291
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-8-1d351f7e821fcaf66c6f7503e42fb291
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_max-9-a00d1791b7fa7ac5a0505d95c3d12257 b/sql/hive/src/test/resources/golden/udf_max-9-a00d1791b7fa7ac5a0505d95c3d12257
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_max-9-a00d1791b7fa7ac5a0505d95c3d12257
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_min-0-1a67398a47b4232c3786485b451d1ff8 b/sql/hive/src/test/resources/golden/udf_min-0-1a67398a47b4232c3786485b451d1ff8
deleted file mode 100644
index 0d59cabcde8fb..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-0-1a67398a47b4232c3786485b451d1ff8
+++ /dev/null
@@ -1 +0,0 @@
-min(expr) - Returns the minimum value of expr
diff --git a/sql/hive/src/test/resources/golden/udf_min-1-69d749d0bca0ebe56e930009e30f4f19 b/sql/hive/src/test/resources/golden/udf_min-1-69d749d0bca0ebe56e930009e30f4f19
deleted file mode 100644
index 0d59cabcde8fb..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-1-69d749d0bca0ebe56e930009e30f4f19
+++ /dev/null
@@ -1 +0,0 @@
-min(expr) - Returns the minimum value of expr
diff --git a/sql/hive/src/test/resources/golden/udf_min-10-191613d4d46d1884d0694fcd8c5fb802 b/sql/hive/src/test/resources/golden/udf_min-10-191613d4d46d1884d0694fcd8c5fb802
deleted file mode 100644
index b26a7e93c5d9f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-10-191613d4d46d1884d0694fcd8c5fb802
+++ /dev/null
@@ -1 +0,0 @@
-{"col1":0,"col2":"val_0"}	{"col1":0,"col2":"val_0"}
diff --git a/sql/hive/src/test/resources/golden/udf_min-11-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/udf_min-11-863233ccd616401efb4bf83c4b9e3a52
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-11-863233ccd616401efb4bf83c4b9e3a52
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_min-12-a00d1791b7fa7ac5a0505d95c3d12257 b/sql/hive/src/test/resources/golden/udf_min-12-a00d1791b7fa7ac5a0505d95c3d12257
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-12-a00d1791b7fa7ac5a0505d95c3d12257
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_min-13-191613d4d46d1884d0694fcd8c5fb802 b/sql/hive/src/test/resources/golden/udf_min-13-191613d4d46d1884d0694fcd8c5fb802
deleted file mode 100644
index b26a7e93c5d9f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-13-191613d4d46d1884d0694fcd8c5fb802
+++ /dev/null
@@ -1 +0,0 @@
-{"col1":0,"col2":"val_0"}	{"col1":0,"col2":"val_0"}
diff --git a/sql/hive/src/test/resources/golden/udf_min-2-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/udf_min-2-1d351f7e821fcaf66c6f7503e42fb291
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-2-1d351f7e821fcaf66c6f7503e42fb291
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_min-3-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/udf_min-3-a7dc16cb82c595b18d4258a38a304b1e
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-3-a7dc16cb82c595b18d4258a38a304b1e
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_min-4-191613d4d46d1884d0694fcd8c5fb802 b/sql/hive/src/test/resources/golden/udf_min-4-191613d4d46d1884d0694fcd8c5fb802
deleted file mode 100644
index b26a7e93c5d9f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-4-191613d4d46d1884d0694fcd8c5fb802
+++ /dev/null
@@ -1 +0,0 @@
-{"col1":0,"col2":"val_0"}	{"col1":0,"col2":"val_0"}
diff --git a/sql/hive/src/test/resources/golden/udf_min-5-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/udf_min-5-863233ccd616401efb4bf83c4b9e3a52
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-5-863233ccd616401efb4bf83c4b9e3a52
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_min-6-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/udf_min-6-a7dc16cb82c595b18d4258a38a304b1e
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-6-a7dc16cb82c595b18d4258a38a304b1e
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_min-7-191613d4d46d1884d0694fcd8c5fb802 b/sql/hive/src/test/resources/golden/udf_min-7-191613d4d46d1884d0694fcd8c5fb802
deleted file mode 100644
index b26a7e93c5d9f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-7-191613d4d46d1884d0694fcd8c5fb802
+++ /dev/null
@@ -1 +0,0 @@
-{"col1":0,"col2":"val_0"}	{"col1":0,"col2":"val_0"}
diff --git a/sql/hive/src/test/resources/golden/udf_min-8-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/udf_min-8-1d351f7e821fcaf66c6f7503e42fb291
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-8-1d351f7e821fcaf66c6f7503e42fb291
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_min-9-a00d1791b7fa7ac5a0505d95c3d12257 b/sql/hive/src/test/resources/golden/udf_min-9-a00d1791b7fa7ac5a0505d95c3d12257
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_min-9-a00d1791b7fa7ac5a0505d95c3d12257
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_minute-0-9a38997c1f41f4afe00faa0abc471aee b/sql/hive/src/test/resources/golden/udf_minute-0-9a38997c1f41f4afe00faa0abc471aee
index b6bd6049a90b9..231e4f382566d 100644
--- a/sql/hive/src/test/resources/golden/udf_minute-0-9a38997c1f41f4afe00faa0abc471aee
+++ b/sql/hive/src/test/resources/golden/udf_minute-0-9a38997c1f41f4afe00faa0abc471aee
@@ -1 +1 @@
-minute(date) - Returns the minute of date
\ No newline at end of file
+minute(date) - Returns the minute of date
diff --git a/sql/hive/src/test/resources/golden/udf_minute-1-16995573ac4f4a1b047ad6ee88699e48 b/sql/hive/src/test/resources/golden/udf_minute-1-16995573ac4f4a1b047ad6ee88699e48
index 64c3cda697e1e..ea842ea174ae4 100644
--- a/sql/hive/src/test/resources/golden/udf_minute-1-16995573ac4f4a1b047ad6ee88699e48
+++ b/sql/hive/src/test/resources/golden/udf_minute-1-16995573ac4f4a1b047ad6ee88699e48
@@ -4,4 +4,4 @@ Example:
    > SELECT minute('2009-07-30 12:58:59') FROM src LIMIT 1;
   58
   > SELECT minute('12:58:59') FROM src LIMIT 1;
-  58
\ No newline at end of file
+  58
diff --git a/sql/hive/src/test/resources/golden/udf_minute-3-270055c684846e87444b037226cf554c b/sql/hive/src/test/resources/golden/udf_minute-3-270055c684846e87444b037226cf554c
index 5bd5433375d5c..e9ca4578409c8 100644
--- a/sql/hive/src/test/resources/golden/udf_minute-3-270055c684846e87444b037226cf554c
+++ b/sql/hive/src/test/resources/golden/udf_minute-3-270055c684846e87444b037226cf554c
@@ -1 +1 @@
-14	14	NULL
\ No newline at end of file
+14	14	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_modulo-0-4e06551d4aa9464492e0f53374a280d5 b/sql/hive/src/test/resources/golden/udf_modulo-0-4e06551d4aa9464492e0f53374a280d5
index 1bcf0c71008b0..6df0bfb4fecf8 100644
--- a/sql/hive/src/test/resources/golden/udf_modulo-0-4e06551d4aa9464492e0f53374a280d5
+++ b/sql/hive/src/test/resources/golden/udf_modulo-0-4e06551d4aa9464492e0f53374a280d5
@@ -1 +1 @@
-a % b - Returns the remainder when dividing a by b
\ No newline at end of file
+a % b - Returns the remainder when dividing a by b
diff --git a/sql/hive/src/test/resources/golden/udf_modulo-1-cc0579c83ca1b36fa8a3a1622d19e877 b/sql/hive/src/test/resources/golden/udf_modulo-1-cc0579c83ca1b36fa8a3a1622d19e877
index 1bcf0c71008b0..6df0bfb4fecf8 100644
--- a/sql/hive/src/test/resources/golden/udf_modulo-1-cc0579c83ca1b36fa8a3a1622d19e877
+++ b/sql/hive/src/test/resources/golden/udf_modulo-1-cc0579c83ca1b36fa8a3a1622d19e877
@@ -1 +1 @@
-a % b - Returns the remainder when dividing a by b
\ No newline at end of file
+a % b - Returns the remainder when dividing a by b
diff --git a/sql/hive/src/test/resources/golden/udf_month-0-9a38997c1f41f4afe00faa0abc471aee b/sql/hive/src/test/resources/golden/udf_month-0-9a38997c1f41f4afe00faa0abc471aee
index b6bd6049a90b9..231e4f382566d 100644
--- a/sql/hive/src/test/resources/golden/udf_month-0-9a38997c1f41f4afe00faa0abc471aee
+++ b/sql/hive/src/test/resources/golden/udf_month-0-9a38997c1f41f4afe00faa0abc471aee
@@ -1 +1 @@
-minute(date) - Returns the minute of date
\ No newline at end of file
+minute(date) - Returns the minute of date
diff --git a/sql/hive/src/test/resources/golden/udf_month-1-16995573ac4f4a1b047ad6ee88699e48 b/sql/hive/src/test/resources/golden/udf_month-1-16995573ac4f4a1b047ad6ee88699e48
index 64c3cda697e1e..ea842ea174ae4 100644
--- a/sql/hive/src/test/resources/golden/udf_month-1-16995573ac4f4a1b047ad6ee88699e48
+++ b/sql/hive/src/test/resources/golden/udf_month-1-16995573ac4f4a1b047ad6ee88699e48
@@ -4,4 +4,4 @@ Example:
    > SELECT minute('2009-07-30 12:58:59') FROM src LIMIT 1;
   58
   > SELECT minute('12:58:59') FROM src LIMIT 1;
-  58
\ No newline at end of file
+  58
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-10-767848077fc2929ccedfd28e50564b19 b/sql/hive/src/test/resources/golden/udf_named_struct-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/parallel_orderby-10-767848077fc2929ccedfd28e50564b19
rename to sql/hive/src/test/resources/golden/udf_named_struct-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_named_struct-0-8f0ea83364b78634fbb3752c5a5c725 b/sql/hive/src/test/resources/golden/udf_named_struct-0-8f0ea83364b78634fbb3752c5a5c725
new file mode 100644
index 0000000000000..9bff96e7fa20e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_named_struct-0-8f0ea83364b78634fbb3752c5a5c725
@@ -0,0 +1 @@
+named_struct(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values
diff --git a/sql/hive/src/test/resources/golden/udf_named_struct-1-380c9638cc6ea8ea42f187bf0cedf350 b/sql/hive/src/test/resources/golden/udf_named_struct-1-380c9638cc6ea8ea42f187bf0cedf350
new file mode 100644
index 0000000000000..9bff96e7fa20e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_named_struct-1-380c9638cc6ea8ea42f187bf0cedf350
@@ -0,0 +1 @@
+named_struct(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values
diff --git a/sql/hive/src/test/resources/golden/udf_named_struct-1-8f0ea83364b78634fbb3752c5a5c725 b/sql/hive/src/test/resources/golden/udf_named_struct-1-8f0ea83364b78634fbb3752c5a5c725
new file mode 100644
index 0000000000000..9bff96e7fa20e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_named_struct-1-8f0ea83364b78634fbb3752c5a5c725
@@ -0,0 +1 @@
+named_struct(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values
diff --git a/sql/hive/src/test/resources/golden/input30-4-f0ebd08e7675b19ae831824ef4d9e223 b/sql/hive/src/test/resources/golden/udf_named_struct-2-22a79ac608b1249306f82f4bdc669b17
similarity index 100%
rename from sql/hive/src/test/resources/golden/input30-4-f0ebd08e7675b19ae831824ef4d9e223
rename to sql/hive/src/test/resources/golden/udf_named_struct-2-22a79ac608b1249306f82f4bdc669b17
diff --git a/sql/hive/src/test/resources/golden/udf_named_struct-2-380c9638cc6ea8ea42f187bf0cedf350 b/sql/hive/src/test/resources/golden/udf_named_struct-2-380c9638cc6ea8ea42f187bf0cedf350
new file mode 100644
index 0000000000000..9bff96e7fa20e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_named_struct-2-380c9638cc6ea8ea42f187bf0cedf350
@@ -0,0 +1 @@
+named_struct(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values
diff --git a/sql/hive/src/test/resources/golden/input30-5-38734677c27b5f90a8df5da6e6351c76 b/sql/hive/src/test/resources/golden/udf_named_struct-3-c069e28293a12a813f8e881f776bae90
similarity index 100%
rename from sql/hive/src/test/resources/golden/input30-5-38734677c27b5f90a8df5da6e6351c76
rename to sql/hive/src/test/resources/golden/udf_named_struct-3-c069e28293a12a813f8e881f776bae90
diff --git a/sql/hive/src/test/resources/golden/udf_named_struct-3-d7e4a555934307155784904ff9df188b b/sql/hive/src/test/resources/golden/udf_named_struct-3-d7e4a555934307155784904ff9df188b
new file mode 100644
index 0000000000000..de25f51b5b56d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_named_struct-3-d7e4a555934307155784904ff9df188b
@@ -0,0 +1 @@
+{"foo":1,"bar":2}	1
diff --git a/sql/hive/src/test/resources/golden/udf_named_struct-4-b499d4120e009f222f2fab160a9006d7 b/sql/hive/src/test/resources/golden/udf_named_struct-4-b499d4120e009f222f2fab160a9006d7
new file mode 100644
index 0000000000000..de25f51b5b56d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_named_struct-4-b499d4120e009f222f2fab160a9006d7
@@ -0,0 +1 @@
+{"foo":1,"bar":2}	1
diff --git a/sql/hive/src/test/resources/golden/udf_negative-0-1b770ec6fb07bb771af2231a9723ec8 b/sql/hive/src/test/resources/golden/udf_negative-0-1b770ec6fb07bb771af2231a9723ec8
deleted file mode 100644
index 3142f7f94be9e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-0-1b770ec6fb07bb771af2231a9723ec8
+++ /dev/null
@@ -1 +0,0 @@
-negative a - Returns -a
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-3-8ae9591fe39cd390619181e9664a92c1 b/sql/hive/src/test/resources/golden/udf_negative-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/parallel_orderby-3-8ae9591fe39cd390619181e9664a92c1
rename to sql/hive/src/test/resources/golden/udf_negative-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_negative-1-1b770ec6fb07bb771af2231a9723ec8 b/sql/hive/src/test/resources/golden/udf_negative-1-1b770ec6fb07bb771af2231a9723ec8
new file mode 100644
index 0000000000000..7ba1b77782f07
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_negative-1-1b770ec6fb07bb771af2231a9723ec8
@@ -0,0 +1 @@
+negative a - Returns -a
diff --git a/sql/hive/src/test/resources/golden/udf_negative-1-5f64266721b1ed31cfe84ee2f2377bdf b/sql/hive/src/test/resources/golden/udf_negative-1-5f64266721b1ed31cfe84ee2f2377bdf
deleted file mode 100644
index 3142f7f94be9e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-1-5f64266721b1ed31cfe84ee2f2377bdf
+++ /dev/null
@@ -1 +0,0 @@
-negative a - Returns -a
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-23-828558020ce907ffa7e847762a5e2358 b/sql/hive/src/test/resources/golden/udf_negative-10-1cd28efecc0d26f463221195f5e39956
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-23-828558020ce907ffa7e847762a5e2358
rename to sql/hive/src/test/resources/golden/udf_negative-10-1cd28efecc0d26f463221195f5e39956
diff --git a/sql/hive/src/test/resources/golden/udf_negative-2-5f64266721b1ed31cfe84ee2f2377bdf b/sql/hive/src/test/resources/golden/udf_negative-2-5f64266721b1ed31cfe84ee2f2377bdf
new file mode 100644
index 0000000000000..7ba1b77782f07
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_negative-2-5f64266721b1ed31cfe84ee2f2377bdf
@@ -0,0 +1 @@
+negative a - Returns -a
diff --git a/sql/hive/src/test/resources/golden/udf_negative-2-a6863d2c5fc8c3131fe70080a011392c b/sql/hive/src/test/resources/golden/udf_negative-2-a6863d2c5fc8c3131fe70080a011392c
deleted file mode 100644
index 0f4a4dce9dd3d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-2-a6863d2c5fc8c3131fe70080a011392c
+++ /dev/null
@@ -1 +0,0 @@
-a - b - Returns the difference a-b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_negative-3-a6863d2c5fc8c3131fe70080a011392c b/sql/hive/src/test/resources/golden/udf_negative-3-a6863d2c5fc8c3131fe70080a011392c
new file mode 100644
index 0000000000000..1ad8be1242f00
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_negative-3-a6863d2c5fc8c3131fe70080a011392c
@@ -0,0 +1 @@
+a - b - Returns the difference a-b
diff --git a/sql/hive/src/test/resources/golden/udf_negative-3-b90eec030fee9cbd177f9615b782d722 b/sql/hive/src/test/resources/golden/udf_negative-3-b90eec030fee9cbd177f9615b782d722
deleted file mode 100644
index 0f4a4dce9dd3d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-3-b90eec030fee9cbd177f9615b782d722
+++ /dev/null
@@ -1 +0,0 @@
-a - b - Returns the difference a-b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_negative-4-b90eec030fee9cbd177f9615b782d722 b/sql/hive/src/test/resources/golden/udf_negative-4-b90eec030fee9cbd177f9615b782d722
new file mode 100644
index 0000000000000..1ad8be1242f00
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_negative-4-b90eec030fee9cbd177f9615b782d722
@@ -0,0 +1 @@
+a - b - Returns the difference a-b
diff --git a/sql/hive/src/test/resources/golden/udf_negative-4-e27bf3f44ccb2e051877da8a4455f50c b/sql/hive/src/test/resources/golden/udf_negative-4-e27bf3f44ccb2e051877da8a4455f50c
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-4-e27bf3f44ccb2e051877da8a4455f50c
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-24-e8ca597d87932af16c0cf29d662e92da b/sql/hive/src/test/resources/golden/udf_negative-5-771e76b0acd8ddb128781da7819d0e47
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-24-e8ca597d87932af16c0cf29d662e92da
rename to sql/hive/src/test/resources/golden/udf_negative-5-771e76b0acd8ddb128781da7819d0e47
diff --git a/sql/hive/src/test/resources/golden/udf_negative-5-93d7dd808d4af59bda601faf249a9e b/sql/hive/src/test/resources/golden/udf_negative-5-93d7dd808d4af59bda601faf249a9e
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-5-93d7dd808d4af59bda601faf249a9e
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_negative-6-6758b00c5acc7aac320238accf299219 b/sql/hive/src/test/resources/golden/udf_negative-6-6758b00c5acc7aac320238accf299219
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-6-6758b00c5acc7aac320238accf299219
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-25-86245727f90de9ce65a12c97a03a5635 b/sql/hive/src/test/resources/golden/udf_negative-6-f62c4a097c592871d896a7dc47c42f61
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-25-86245727f90de9ce65a12c97a03a5635
rename to sql/hive/src/test/resources/golden/udf_negative-6-f62c4a097c592871d896a7dc47c42f61
diff --git a/sql/hive/src/test/resources/golden/udf_negative-7-6d8783f0ed7a4b7058c95f90da3fb4b b/sql/hive/src/test/resources/golden/udf_negative-7-6d8783f0ed7a4b7058c95f90da3fb4b
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-7-6d8783f0ed7a4b7058c95f90da3fb4b
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-26-552d7ec5a4e0c93dc59a61973e2d63a2 b/sql/hive/src/test/resources/golden/udf_negative-7-f838053f5ca5c8746dc299473dff0490
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-26-552d7ec5a4e0c93dc59a61973e2d63a2
rename to sql/hive/src/test/resources/golden/udf_negative-7-f838053f5ca5c8746dc299473dff0490
diff --git a/sql/hive/src/test/resources/golden/udf_negative-8-634af0478ed9ed44b851cd7ef834a489 b/sql/hive/src/test/resources/golden/udf_negative-8-634af0478ed9ed44b851cd7ef834a489
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-8-634af0478ed9ed44b851cd7ef834a489
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-27-b61509b01b2fe3e7e4b72fedc74ff4f9 b/sql/hive/src/test/resources/golden/udf_negative-8-f4f23aa6f634913d194a69261af8f3f6
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-27-b61509b01b2fe3e7e4b72fedc74ff4f9
rename to sql/hive/src/test/resources/golden/udf_negative-8-f4f23aa6f634913d194a69261af8f3f6
diff --git a/sql/hive/src/test/resources/golden/udf_negative-9-80b4c1fe00f7997838bba64a2cb5f8aa b/sql/hive/src/test/resources/golden/udf_negative-9-80b4c1fe00f7997838bba64a2cb5f8aa
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_negative-9-80b4c1fe00f7997838bba64a2cb5f8aa
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-0-df3f6ff9fdf525a7c617e4c33d5c81a4 b/sql/hive/src/test/resources/golden/udf_negative-9-f6a78fa3ea0f519d0e4abc5be7a960e5
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_byte-0-df3f6ff9fdf525a7c617e4c33d5c81a4
rename to sql/hive/src/test/resources/golden/udf_negative-9-f6a78fa3ea0f519d0e4abc5be7a960e5
diff --git a/sql/hive/src/test/resources/golden/udf_not-0-9ddee9ccebe4acbf903da1f42fe55dbc b/sql/hive/src/test/resources/golden/udf_not-0-9ddee9ccebe4acbf903da1f42fe55dbc
index 1ad92784d47ac..85b491240e711 100644
--- a/sql/hive/src/test/resources/golden/udf_not-0-9ddee9ccebe4acbf903da1f42fe55dbc
+++ b/sql/hive/src/test/resources/golden/udf_not-0-9ddee9ccebe4acbf903da1f42fe55dbc
@@ -1 +1 @@
-not a - Logical not
\ No newline at end of file
+not a - Logical not
diff --git a/sql/hive/src/test/resources/golden/udf_not-1-efefc8302b02224d20f4bb0f159a6911 b/sql/hive/src/test/resources/golden/udf_not-1-efefc8302b02224d20f4bb0f159a6911
index 6520d74f02566..0956574686405 100644
--- a/sql/hive/src/test/resources/golden/udf_not-1-efefc8302b02224d20f4bb0f159a6911
+++ b/sql/hive/src/test/resources/golden/udf_not-1-efefc8302b02224d20f4bb0f159a6911
@@ -1,2 +1,2 @@
 not a - Logical not
-Synonyms: !
\ No newline at end of file
+Synonyms: !
diff --git a/sql/hive/src/test/resources/golden/udf_not-2-7e63750d3027ced0e3452ad4eb0df117 b/sql/hive/src/test/resources/golden/udf_not-2-7e63750d3027ced0e3452ad4eb0df117
index c0e84e95d01c2..e6b0fa3967745 100644
--- a/sql/hive/src/test/resources/golden/udf_not-2-7e63750d3027ced0e3452ad4eb0df117
+++ b/sql/hive/src/test/resources/golden/udf_not-2-7e63750d3027ced0e3452ad4eb0df117
@@ -1 +1 @@
-! a - Logical not
\ No newline at end of file
+! a - Logical not
diff --git a/sql/hive/src/test/resources/golden/udf_not-3-aa0c674f9ce0feba86448448a211bd2a b/sql/hive/src/test/resources/golden/udf_not-3-aa0c674f9ce0feba86448448a211bd2a
index 05cb1f6ab03d7..00f749069b4a3 100644
--- a/sql/hive/src/test/resources/golden/udf_not-3-aa0c674f9ce0feba86448448a211bd2a
+++ b/sql/hive/src/test/resources/golden/udf_not-3-aa0c674f9ce0feba86448448a211bd2a
@@ -1,2 +1,2 @@
 ! a - Logical not
-Synonyms: not
\ No newline at end of file
+Synonyms: not
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-0-27c0d57f7c7c48ceb7bc671f7160254e b/sql/hive/src/test/resources/golden/udf_notequal-0-27c0d57f7c7c48ceb7bc671f7160254e
deleted file mode 100644
index 9e2108ff72b18..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_notequal-0-27c0d57f7c7c48ceb7bc671f7160254e
+++ /dev/null
@@ -1 +0,0 @@
-a <> b - Returns TRUE if a is not equal to b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-4-a928b93138e9c5547c40ff2024b2b4b6 b/sql/hive/src/test/resources/golden/udf_notequal-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/parallel_orderby-4-a928b93138e9c5547c40ff2024b2b4b6
rename to sql/hive/src/test/resources/golden/udf_notequal-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-1-27c0d57f7c7c48ceb7bc671f7160254e b/sql/hive/src/test/resources/golden/udf_notequal-1-27c0d57f7c7c48ceb7bc671f7160254e
new file mode 100644
index 0000000000000..5bcfd313a8ff6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_notequal-1-27c0d57f7c7c48ceb7bc671f7160254e
@@ -0,0 +1 @@
+a <> b - Returns TRUE if a is not equal to b
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-1-a7f0d1064f8f398ef504008015fddf9a b/sql/hive/src/test/resources/golden/udf_notequal-1-a7f0d1064f8f398ef504008015fddf9a
deleted file mode 100644
index 162f808ed5b67..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_notequal-1-a7f0d1064f8f398ef504008015fddf9a
+++ /dev/null
@@ -1,2 +0,0 @@
-a <> b - Returns TRUE if a is not equal to b
-Synonyms: !=
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-2-7d815b1218c85e4cf69d6780cab17520 b/sql/hive/src/test/resources/golden/udf_notequal-2-7d815b1218c85e4cf69d6780cab17520
deleted file mode 100644
index 2dab01a3b2765..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_notequal-2-7d815b1218c85e4cf69d6780cab17520
+++ /dev/null
@@ -1 +0,0 @@
-a != b - Returns TRUE if a is not equal to b
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-2-a7f0d1064f8f398ef504008015fddf9a b/sql/hive/src/test/resources/golden/udf_notequal-2-a7f0d1064f8f398ef504008015fddf9a
new file mode 100644
index 0000000000000..d8be357a858c4
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_notequal-2-a7f0d1064f8f398ef504008015fddf9a
@@ -0,0 +1,2 @@
+a <> b - Returns TRUE if a is not equal to b
+Synonyms: !=
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-3-7d815b1218c85e4cf69d6780cab17520 b/sql/hive/src/test/resources/golden/udf_notequal-3-7d815b1218c85e4cf69d6780cab17520
new file mode 100644
index 0000000000000..aeffc394679cf
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_notequal-3-7d815b1218c85e4cf69d6780cab17520
@@ -0,0 +1 @@
+a != b - Returns TRUE if a is not equal to b
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-3-b72baeb22fad61bb31ce2d2e69375f57 b/sql/hive/src/test/resources/golden/udf_notequal-3-b72baeb22fad61bb31ce2d2e69375f57
deleted file mode 100644
index 1f73486ba2896..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_notequal-3-b72baeb22fad61bb31ce2d2e69375f57
+++ /dev/null
@@ -1,2 +0,0 @@
-a != b - Returns TRUE if a is not equal to b
-Synonyms: <>
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-4-b72baeb22fad61bb31ce2d2e69375f57 b/sql/hive/src/test/resources/golden/udf_notequal-4-b72baeb22fad61bb31ce2d2e69375f57
new file mode 100644
index 0000000000000..32e73741d4cbc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_notequal-4-b72baeb22fad61bb31ce2d2e69375f57
@@ -0,0 +1,2 @@
+a != b - Returns TRUE if a is not equal to b
+Synonyms: <>
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-4-eb04e5ee00967799c913e8a5b424a332 b/sql/hive/src/test/resources/golden/udf_notequal-4-eb04e5ee00967799c913e8a5b424a332
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-5-e361b9cf294c4aa25018b081a2c05e07 b/sql/hive/src/test/resources/golden/udf_notequal-5-e361b9cf294c4aa25018b081a2c05e07
deleted file mode 100644
index f82286d8bb069..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_notequal-5-e361b9cf294c4aa25018b081a2c05e07
+++ /dev/null
@@ -1,499 +0,0 @@
-238	val_238
-86	val_86
-311	val_311
-27	val_27
-165	val_165
-409	val_409
-255	val_255
-278	val_278
-98	val_98
-484	val_484
-265	val_265
-193	val_193
-401	val_401
-150	val_150
-273	val_273
-224	val_224
-369	val_369
-66	val_66
-128	val_128
-213	val_213
-146	val_146
-406	val_406
-429	val_429
-374	val_374
-152	val_152
-469	val_469
-145	val_145
-495	val_495
-37	val_37
-327	val_327
-281	val_281
-277	val_277
-209	val_209
-15	val_15
-82	val_82
-403	val_403
-166	val_166
-417	val_417
-430	val_430
-252	val_252
-292	val_292
-219	val_219
-287	val_287
-153	val_153
-193	val_193
-338	val_338
-446	val_446
-459	val_459
-394	val_394
-237	val_237
-482	val_482
-174	val_174
-413	val_413
-494	val_494
-207	val_207
-199	val_199
-466	val_466
-208	val_208
-174	val_174
-399	val_399
-396	val_396
-247	val_247
-417	val_417
-489	val_489
-162	val_162
-377	val_377
-397	val_397
-309	val_309
-365	val_365
-266	val_266
-439	val_439
-342	val_342
-367	val_367
-325	val_325
-167	val_167
-195	val_195
-475	val_475
-17	val_17
-113	val_113
-155	val_155
-203	val_203
-339	val_339
-0	val_0
-455	val_455
-128	val_128
-311	val_311
-316	val_316
-57	val_57
-205	val_205
-149	val_149
-438	val_438
-345	val_345
-129	val_129
-170	val_170
-20	val_20
-489	val_489
-157	val_157
-378	val_378
-221	val_221
-92	val_92
-111	val_111
-47	val_47
-72	val_72
-4	val_4
-280	val_280
-35	val_35
-427	val_427
-277	val_277
-208	val_208
-356	val_356
-399	val_399
-169	val_169
-382	val_382
-498	val_498
-125	val_125
-386	val_386
-437	val_437
-469	val_469
-192	val_192
-286	val_286
-187	val_187
-176	val_176
-54	val_54
-459	val_459
-51	val_51
-138	val_138
-103	val_103
-239	val_239
-213	val_213
-216	val_216
-430	val_430
-278	val_278
-176	val_176
-289	val_289
-221	val_221
-65	val_65
-318	val_318
-332	val_332
-311	val_311
-275	val_275
-137	val_137
-241	val_241
-83	val_83
-333	val_333
-180	val_180
-284	val_284
-12	val_12
-230	val_230
-181	val_181
-67	val_67
-260	val_260
-404	val_404
-384	val_384
-489	val_489
-353	val_353
-373	val_373
-272	val_272
-138	val_138
-217	val_217
-84	val_84
-348	val_348
-466	val_466
-58	val_58
-8	val_8
-411	val_411
-230	val_230
-208	val_208
-348	val_348
-24	val_24
-463	val_463
-431	val_431
-179	val_179
-172	val_172
-42	val_42
-129	val_129
-158	val_158
-119	val_119
-496	val_496
-0	val_0
-322	val_322
-197	val_197
-468	val_468
-393	val_393
-454	val_454
-100	val_100
-298	val_298
-199	val_199
-191	val_191
-418	val_418
-96	val_96
-26	val_26
-165	val_165
-327	val_327
-230	val_230
-205	val_205
-120	val_120
-131	val_131
-51	val_51
-404	val_404
-43	val_43
-436	val_436
-156	val_156
-469	val_469
-468	val_468
-308	val_308
-95	val_95
-196	val_196
-288	val_288
-481	val_481
-457	val_457
-98	val_98
-282	val_282
-197	val_197
-187	val_187
-318	val_318
-318	val_318
-409	val_409
-470	val_470
-137	val_137
-369	val_369
-316	val_316
-169	val_169
-413	val_413
-85	val_85
-77	val_77
-0	val_0
-490	val_490
-87	val_87
-364	val_364
-179	val_179
-118	val_118
-134	val_134
-395	val_395
-282	val_282
-138	val_138
-238	val_238
-419	val_419
-15	val_15
-118	val_118
-72	val_72
-90	val_90
-307	val_307
-19	val_19
-435	val_435
-10	val_10
-277	val_277
-273	val_273
-306	val_306
-224	val_224
-309	val_309
-389	val_389
-327	val_327
-242	val_242
-369	val_369
-392	val_392
-272	val_272
-331	val_331
-401	val_401
-242	val_242
-452	val_452
-177	val_177
-226	val_226
-5	val_5
-497	val_497
-402	val_402
-396	val_396
-317	val_317
-395	val_395
-58	val_58
-35	val_35
-336	val_336
-95	val_95
-11	val_11
-168	val_168
-34	val_34
-229	val_229
-233	val_233
-143	val_143
-472	val_472
-322	val_322
-498	val_498
-160	val_160
-195	val_195
-42	val_42
-321	val_321
-430	val_430
-119	val_119
-489	val_489
-458	val_458
-78	val_78
-76	val_76
-41	val_41
-223	val_223
-492	val_492
-149	val_149
-449	val_449
-218	val_218
-228	val_228
-138	val_138
-453	val_453
-30	val_30
-209	val_209
-64	val_64
-468	val_468
-76	val_76
-74	val_74
-342	val_342
-69	val_69
-230	val_230
-33	val_33
-368	val_368
-103	val_103
-296	val_296
-113	val_113
-216	val_216
-367	val_367
-344	val_344
-167	val_167
-274	val_274
-219	val_219
-239	val_239
-485	val_485
-116	val_116
-223	val_223
-256	val_256
-263	val_263
-70	val_70
-487	val_487
-480	val_480
-401	val_401
-288	val_288
-191	val_191
-5	val_5
-244	val_244
-438	val_438
-128	val_128
-467	val_467
-432	val_432
-202	val_202
-316	val_316
-229	val_229
-469	val_469
-463	val_463
-280	val_280
-2	val_2
-35	val_35
-283	val_283
-331	val_331
-235	val_235
-80	val_80
-44	val_44
-193	val_193
-321	val_321
-335	val_335
-104	val_104
-466	val_466
-366	val_366
-175	val_175
-403	val_403
-483	val_483
-53	val_53
-105	val_105
-257	val_257
-406	val_406
-409	val_409
-190	val_190
-406	val_406
-401	val_401
-114	val_114
-258	val_258
-90	val_90
-203	val_203
-262	val_262
-348	val_348
-424	val_424
-12	val_12
-396	val_396
-201	val_201
-217	val_217
-164	val_164
-431	val_431
-454	val_454
-478	val_478
-298	val_298
-125	val_125
-431	val_431
-164	val_164
-424	val_424
-187	val_187
-382	val_382
-5	val_5
-70	val_70
-397	val_397
-480	val_480
-291	val_291
-24	val_24
-351	val_351
-255	val_255
-104	val_104
-70	val_70
-163	val_163
-438	val_438
-119	val_119
-414	val_414
-200	val_200
-491	val_491
-237	val_237
-439	val_439
-360	val_360
-248	val_248
-479	val_479
-305	val_305
-417	val_417
-199	val_199
-444	val_444
-120	val_120
-429	val_429
-169	val_169
-443	val_443
-323	val_323
-325	val_325
-277	val_277
-230	val_230
-478	val_478
-178	val_178
-468	val_468
-310	val_310
-317	val_317
-333	val_333
-493	val_493
-460	val_460
-207	val_207
-249	val_249
-265	val_265
-480	val_480
-83	val_83
-136	val_136
-353	val_353
-172	val_172
-214	val_214
-462	val_462
-233	val_233
-406	val_406
-133	val_133
-175	val_175
-189	val_189
-454	val_454
-375	val_375
-401	val_401
-421	val_421
-407	val_407
-384	val_384
-256	val_256
-26	val_26
-134	val_134
-67	val_67
-384	val_384
-379	val_379
-18	val_18
-462	val_462
-492	val_492
-100	val_100
-298	val_298
-9	val_9
-341	val_341
-498	val_498
-146	val_146
-458	val_458
-362	val_362
-186	val_186
-285	val_285
-348	val_348
-167	val_167
-18	val_18
-273	val_273
-183	val_183
-281	val_281
-344	val_344
-97	val_97
-469	val_469
-315	val_315
-84	val_84
-28	val_28
-37	val_37
-448	val_448
-152	val_152
-348	val_348
-307	val_307
-194	val_194
-414	val_414
-477	val_477
-222	val_222
-126	val_126
-90	val_90
-169	val_169
-403	val_403
-400	val_400
-200	val_200
-97	val_97
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input31-2-705764f8f7cab9378964af30b83f7fe b/sql/hive/src/test/resources/golden/udf_notequal-5-eb04e5ee00967799c913e8a5b424a332
similarity index 100%
rename from sql/hive/src/test/resources/golden/input31-2-705764f8f7cab9378964af30b83f7fe
rename to sql/hive/src/test/resources/golden/udf_notequal-5-eb04e5ee00967799c913e8a5b424a332
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-6-46a6514f2d7e6a097035ec1559df0096 b/sql/hive/src/test/resources/golden/udf_notequal-6-46a6514f2d7e6a097035ec1559df0096
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-6-e361b9cf294c4aa25018b081a2c05e07 b/sql/hive/src/test/resources/golden/udf_notequal-6-e361b9cf294c4aa25018b081a2c05e07
new file mode 100644
index 0000000000000..36244b327dc2f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_notequal-6-e361b9cf294c4aa25018b081a2c05e07
@@ -0,0 +1,499 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/input31-3-50c905261882f7fd8539fdd91e68151f b/sql/hive/src/test/resources/golden/udf_notequal-7-46a6514f2d7e6a097035ec1559df0096
similarity index 100%
rename from sql/hive/src/test/resources/golden/input31-3-50c905261882f7fd8539fdd91e68151f
rename to sql/hive/src/test/resources/golden/udf_notequal-7-46a6514f2d7e6a097035ec1559df0096
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-7-a71fea4e9514cda0da9542a7701613dd b/sql/hive/src/test/resources/golden/udf_notequal-7-a71fea4e9514cda0da9542a7701613dd
deleted file mode 100644
index f82286d8bb069..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_notequal-7-a71fea4e9514cda0da9542a7701613dd
+++ /dev/null
@@ -1,499 +0,0 @@
-238	val_238
-86	val_86
-311	val_311
-27	val_27
-165	val_165
-409	val_409
-255	val_255
-278	val_278
-98	val_98
-484	val_484
-265	val_265
-193	val_193
-401	val_401
-150	val_150
-273	val_273
-224	val_224
-369	val_369
-66	val_66
-128	val_128
-213	val_213
-146	val_146
-406	val_406
-429	val_429
-374	val_374
-152	val_152
-469	val_469
-145	val_145
-495	val_495
-37	val_37
-327	val_327
-281	val_281
-277	val_277
-209	val_209
-15	val_15
-82	val_82
-403	val_403
-166	val_166
-417	val_417
-430	val_430
-252	val_252
-292	val_292
-219	val_219
-287	val_287
-153	val_153
-193	val_193
-338	val_338
-446	val_446
-459	val_459
-394	val_394
-237	val_237
-482	val_482
-174	val_174
-413	val_413
-494	val_494
-207	val_207
-199	val_199
-466	val_466
-208	val_208
-174	val_174
-399	val_399
-396	val_396
-247	val_247
-417	val_417
-489	val_489
-162	val_162
-377	val_377
-397	val_397
-309	val_309
-365	val_365
-266	val_266
-439	val_439
-342	val_342
-367	val_367
-325	val_325
-167	val_167
-195	val_195
-475	val_475
-17	val_17
-113	val_113
-155	val_155
-203	val_203
-339	val_339
-0	val_0
-455	val_455
-128	val_128
-311	val_311
-316	val_316
-57	val_57
-205	val_205
-149	val_149
-438	val_438
-345	val_345
-129	val_129
-170	val_170
-20	val_20
-489	val_489
-157	val_157
-378	val_378
-221	val_221
-92	val_92
-111	val_111
-47	val_47
-72	val_72
-4	val_4
-280	val_280
-35	val_35
-427	val_427
-277	val_277
-208	val_208
-356	val_356
-399	val_399
-169	val_169
-382	val_382
-498	val_498
-125	val_125
-386	val_386
-437	val_437
-469	val_469
-192	val_192
-286	val_286
-187	val_187
-176	val_176
-54	val_54
-459	val_459
-51	val_51
-138	val_138
-103	val_103
-239	val_239
-213	val_213
-216	val_216
-430	val_430
-278	val_278
-176	val_176
-289	val_289
-221	val_221
-65	val_65
-318	val_318
-332	val_332
-311	val_311
-275	val_275
-137	val_137
-241	val_241
-83	val_83
-333	val_333
-180	val_180
-284	val_284
-12	val_12
-230	val_230
-181	val_181
-67	val_67
-260	val_260
-404	val_404
-384	val_384
-489	val_489
-353	val_353
-373	val_373
-272	val_272
-138	val_138
-217	val_217
-84	val_84
-348	val_348
-466	val_466
-58	val_58
-8	val_8
-411	val_411
-230	val_230
-208	val_208
-348	val_348
-24	val_24
-463	val_463
-431	val_431
-179	val_179
-172	val_172
-42	val_42
-129	val_129
-158	val_158
-119	val_119
-496	val_496
-0	val_0
-322	val_322
-197	val_197
-468	val_468
-393	val_393
-454	val_454
-100	val_100
-298	val_298
-199	val_199
-191	val_191
-418	val_418
-96	val_96
-26	val_26
-165	val_165
-327	val_327
-230	val_230
-205	val_205
-120	val_120
-131	val_131
-51	val_51
-404	val_404
-43	val_43
-436	val_436
-156	val_156
-469	val_469
-468	val_468
-308	val_308
-95	val_95
-196	val_196
-288	val_288
-481	val_481
-457	val_457
-98	val_98
-282	val_282
-197	val_197
-187	val_187
-318	val_318
-318	val_318
-409	val_409
-470	val_470
-137	val_137
-369	val_369
-316	val_316
-169	val_169
-413	val_413
-85	val_85
-77	val_77
-0	val_0
-490	val_490
-87	val_87
-364	val_364
-179	val_179
-118	val_118
-134	val_134
-395	val_395
-282	val_282
-138	val_138
-238	val_238
-419	val_419
-15	val_15
-118	val_118
-72	val_72
-90	val_90
-307	val_307
-19	val_19
-435	val_435
-10	val_10
-277	val_277
-273	val_273
-306	val_306
-224	val_224
-309	val_309
-389	val_389
-327	val_327
-242	val_242
-369	val_369
-392	val_392
-272	val_272
-331	val_331
-401	val_401
-242	val_242
-452	val_452
-177	val_177
-226	val_226
-5	val_5
-497	val_497
-402	val_402
-396	val_396
-317	val_317
-395	val_395
-58	val_58
-35	val_35
-336	val_336
-95	val_95
-11	val_11
-168	val_168
-34	val_34
-229	val_229
-233	val_233
-143	val_143
-472	val_472
-322	val_322
-498	val_498
-160	val_160
-195	val_195
-42	val_42
-321	val_321
-430	val_430
-119	val_119
-489	val_489
-458	val_458
-78	val_78
-76	val_76
-41	val_41
-223	val_223
-492	val_492
-149	val_149
-449	val_449
-218	val_218
-228	val_228
-138	val_138
-453	val_453
-30	val_30
-209	val_209
-64	val_64
-468	val_468
-76	val_76
-74	val_74
-342	val_342
-69	val_69
-230	val_230
-33	val_33
-368	val_368
-103	val_103
-296	val_296
-113	val_113
-216	val_216
-367	val_367
-344	val_344
-167	val_167
-274	val_274
-219	val_219
-239	val_239
-485	val_485
-116	val_116
-223	val_223
-256	val_256
-263	val_263
-70	val_70
-487	val_487
-480	val_480
-401	val_401
-288	val_288
-191	val_191
-5	val_5
-244	val_244
-438	val_438
-128	val_128
-467	val_467
-432	val_432
-202	val_202
-316	val_316
-229	val_229
-469	val_469
-463	val_463
-280	val_280
-2	val_2
-35	val_35
-283	val_283
-331	val_331
-235	val_235
-80	val_80
-44	val_44
-193	val_193
-321	val_321
-335	val_335
-104	val_104
-466	val_466
-366	val_366
-175	val_175
-403	val_403
-483	val_483
-53	val_53
-105	val_105
-257	val_257
-406	val_406
-409	val_409
-190	val_190
-406	val_406
-401	val_401
-114	val_114
-258	val_258
-90	val_90
-203	val_203
-262	val_262
-348	val_348
-424	val_424
-12	val_12
-396	val_396
-201	val_201
-217	val_217
-164	val_164
-431	val_431
-454	val_454
-478	val_478
-298	val_298
-125	val_125
-431	val_431
-164	val_164
-424	val_424
-187	val_187
-382	val_382
-5	val_5
-70	val_70
-397	val_397
-480	val_480
-291	val_291
-24	val_24
-351	val_351
-255	val_255
-104	val_104
-70	val_70
-163	val_163
-438	val_438
-119	val_119
-414	val_414
-200	val_200
-491	val_491
-237	val_237
-439	val_439
-360	val_360
-248	val_248
-479	val_479
-305	val_305
-417	val_417
-199	val_199
-444	val_444
-120	val_120
-429	val_429
-169	val_169
-443	val_443
-323	val_323
-325	val_325
-277	val_277
-230	val_230
-478	val_478
-178	val_178
-468	val_468
-310	val_310
-317	val_317
-333	val_333
-493	val_493
-460	val_460
-207	val_207
-249	val_249
-265	val_265
-480	val_480
-83	val_83
-136	val_136
-353	val_353
-172	val_172
-214	val_214
-462	val_462
-233	val_233
-406	val_406
-133	val_133
-175	val_175
-189	val_189
-454	val_454
-375	val_375
-401	val_401
-421	val_421
-407	val_407
-384	val_384
-256	val_256
-26	val_26
-134	val_134
-67	val_67
-384	val_384
-379	val_379
-18	val_18
-462	val_462
-492	val_492
-100	val_100
-298	val_298
-9	val_9
-341	val_341
-498	val_498
-146	val_146
-458	val_458
-362	val_362
-186	val_186
-285	val_285
-348	val_348
-167	val_167
-18	val_18
-273	val_273
-183	val_183
-281	val_281
-344	val_344
-97	val_97
-469	val_469
-315	val_315
-84	val_84
-28	val_28
-37	val_37
-448	val_448
-152	val_152
-348	val_348
-307	val_307
-194	val_194
-414	val_414
-477	val_477
-222	val_222
-126	val_126
-90	val_90
-169	val_169
-403	val_403
-400	val_400
-200	val_200
-97	val_97
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_notequal-8-a71fea4e9514cda0da9542a7701613dd b/sql/hive/src/test/resources/golden/udf_notequal-8-a71fea4e9514cda0da9542a7701613dd
new file mode 100644
index 0000000000000..36244b327dc2f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_notequal-8-a71fea4e9514cda0da9542a7701613dd
@@ -0,0 +1,499 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/parallel_orderby-5-a58344acc57aaa38e2cb5f11a0576681 b/sql/hive/src/test/resources/golden/udf_notop-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/parallel_orderby-5-a58344acc57aaa38e2cb5f11a0576681
rename to sql/hive/src/test/resources/golden/udf_notop-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_notop-0-825431072651228a5a9de7f85a0498d6 b/sql/hive/src/test/resources/golden/udf_notop-0-825431072651228a5a9de7f85a0498d6
deleted file mode 100644
index a55e3339049e8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_notop-0-825431072651228a5a9de7f85a0498d6
+++ /dev/null
@@ -1 +0,0 @@
-false	true	false	false	true	false	true	false	true	true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_notop-1-1ce21a9b4492969c1a97612b0ccc19f2 b/sql/hive/src/test/resources/golden/udf_notop-1-1ce21a9b4492969c1a97612b0ccc19f2
new file mode 100644
index 0000000000000..5ffd61b380318
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_notop-1-1ce21a9b4492969c1a97612b0ccc19f2
@@ -0,0 +1 @@
+false	true	false	false	true	false	true	false	true	true
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/udf_nvl-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_decode_name-2-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/udf_nvl-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_nvl-0-df7383141df0bb71ccb57f3eef9775b4 b/sql/hive/src/test/resources/golden/udf_nvl-0-df7383141df0bb71ccb57f3eef9775b4
deleted file mode 100644
index 5ffcb851d56f9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_nvl-0-df7383141df0bb71ccb57f3eef9775b4
+++ /dev/null
@@ -1 +0,0 @@
-nvl(value,default_value) - Returns default value if value is null else returns value
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_nvl-1-6ec6c4b23c742fc604c9937a25b0b092 b/sql/hive/src/test/resources/golden/udf_nvl-1-6ec6c4b23c742fc604c9937a25b0b092
deleted file mode 100644
index 5afff220da695..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_nvl-1-6ec6c4b23c742fc604c9937a25b0b092
+++ /dev/null
@@ -1,4 +0,0 @@
-nvl(value,default_value) - Returns default value if value is null else returns value
-Example:
-  > SELECT nvl(null,'bla') FROM src LIMIT 1;
-  bla
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_nvl-1-df7383141df0bb71ccb57f3eef9775b4 b/sql/hive/src/test/resources/golden/udf_nvl-1-df7383141df0bb71ccb57f3eef9775b4
new file mode 100644
index 0000000000000..b043150b9c901
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_nvl-1-df7383141df0bb71ccb57f3eef9775b4
@@ -0,0 +1 @@
+nvl(value,default_value) - Returns default value if value is null else returns value
diff --git a/sql/hive/src/test/resources/golden/udf_nvl-2-175ed7006e8907b65e0e5357f00a0def b/sql/hive/src/test/resources/golden/udf_nvl-2-175ed7006e8907b65e0e5357f00a0def
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_nvl-2-6ec6c4b23c742fc604c9937a25b0b092 b/sql/hive/src/test/resources/golden/udf_nvl-2-6ec6c4b23c742fc604c9937a25b0b092
new file mode 100644
index 0000000000000..4daa1bbffa621
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_nvl-2-6ec6c4b23c742fc604c9937a25b0b092
@@ -0,0 +1,4 @@
+nvl(value,default_value) - Returns default value if value is null else returns value
+Example:
+  > SELECT nvl(null,'bla') FROM src LIMIT 1;
+  bla
diff --git a/sql/hive/src/test/resources/golden/input31-4-2f886fa357df9342733551fa1b53f913 b/sql/hive/src/test/resources/golden/udf_nvl-3-47199a1c23cb1cc6827c601bb66513d3
similarity index 100%
rename from sql/hive/src/test/resources/golden/input31-4-2f886fa357df9342733551fa1b53f913
rename to sql/hive/src/test/resources/golden/udf_nvl-3-47199a1c23cb1cc6827c601bb66513d3
diff --git a/sql/hive/src/test/resources/golden/udf_nvl-3-7dd762d0da31b9bb0edbabaad1a4dce0 b/sql/hive/src/test/resources/golden/udf_nvl-3-7dd762d0da31b9bb0edbabaad1a4dce0
deleted file mode 100644
index 2087e17494459..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_nvl-3-7dd762d0da31b9bb0edbabaad1a4dce0
+++ /dev/null
@@ -1 +0,0 @@
-1	5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_nvl-4-656661e80deb75729fef313d5e2bd330 b/sql/hive/src/test/resources/golden/udf_nvl-4-656661e80deb75729fef313d5e2bd330
new file mode 100644
index 0000000000000..273bc7331072a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_nvl-4-656661e80deb75729fef313d5e2bd330
@@ -0,0 +1 @@
+1	5
diff --git a/sql/hive/src/test/resources/golden/udf_or-0-c404aa929eb0dd87269121f8f99ada70 b/sql/hive/src/test/resources/golden/udf_or-0-c404aa929eb0dd87269121f8f99ada70
index 9138f44ad2a41..f0e20ea40509c 100644
--- a/sql/hive/src/test/resources/golden/udf_or-0-c404aa929eb0dd87269121f8f99ada70
+++ b/sql/hive/src/test/resources/golden/udf_or-0-c404aa929eb0dd87269121f8f99ada70
@@ -1 +1 @@
-a or b - Logical or
\ No newline at end of file
+a or b - Logical or
diff --git a/sql/hive/src/test/resources/golden/udf_or-1-e21a84ffd26beabb958518ca5e46d6e0 b/sql/hive/src/test/resources/golden/udf_or-1-e21a84ffd26beabb958518ca5e46d6e0
index 9138f44ad2a41..f0e20ea40509c 100644
--- a/sql/hive/src/test/resources/golden/udf_or-1-e21a84ffd26beabb958518ca5e46d6e0
+++ b/sql/hive/src/test/resources/golden/udf_or-1-e21a84ffd26beabb958518ca5e46d6e0
@@ -1 +1 @@
-a or b - Logical or
\ No newline at end of file
+a or b - Logical or
diff --git a/sql/hive/src/test/resources/golden/udf_parse_url-0-7571c0423df7bf158ea9ca98142b26b8 b/sql/hive/src/test/resources/golden/udf_parse_url-0-7571c0423df7bf158ea9ca98142b26b8
index 0535b085e50c2..1ce92fd715822 100644
--- a/sql/hive/src/test/resources/golden/udf_parse_url-0-7571c0423df7bf158ea9ca98142b26b8
+++ b/sql/hive/src/test/resources/golden/udf_parse_url-0-7571c0423df7bf158ea9ca98142b26b8
@@ -1 +1 @@
-parse_url(url, partToExtract[, key]) - extracts a part from a URL
\ No newline at end of file
+parse_url(url, partToExtract[, key]) - extracts a part from a URL
diff --git a/sql/hive/src/test/resources/golden/udf_parse_url-1-67adfb10d4a35c4d031f26adde9f61ab b/sql/hive/src/test/resources/golden/udf_parse_url-1-67adfb10d4a35c4d031f26adde9f61ab
index 7178e07a07e48..bd448a4e4cade 100644
--- a/sql/hive/src/test/resources/golden/udf_parse_url-1-67adfb10d4a35c4d031f26adde9f61ab
+++ b/sql/hive/src/test/resources/golden/udf_parse_url-1-67adfb10d4a35c4d031f26adde9f61ab
@@ -7,4 +7,4 @@ Example:
   > SELECT parse_url('http://facebook.com/path/p1.php?query=1', 'QUERY') FROM src LIMIT 1;
   'query=1'
   > SELECT parse_url('http://facebook.com/path/p1.php?query=1', 'QUERY', 'query') FROM src LIMIT 1;
-  '1'
\ No newline at end of file
+  '1'
diff --git a/sql/hive/src/test/resources/golden/udf_parse_url-3-3a43b1f94ffb8082419bd7cc0b371ce2 b/sql/hive/src/test/resources/golden/udf_parse_url-3-3a43b1f94ffb8082419bd7cc0b371ce2
index f5483d4e3dafe..e3ece483b53fd 100644
--- a/sql/hive/src/test/resources/golden/udf_parse_url-3-3a43b1f94ffb8082419bd7cc0b371ce2
+++ b/sql/hive/src/test/resources/golden/udf_parse_url-3-3a43b1f94ffb8082419bd7cc0b371ce2
@@ -1 +1 @@
-facebook.com	/path1/p.php	k1=v1&k2=v2	Ref1	v2	v1	NULL	/path1/p.php?k1=v1&k2=v2	http	NULL	facebook.com
\ No newline at end of file
+facebook.com	/path1/p.php	k1=v1&k2=v2	Ref1	v2	v1	NULL	/path1/p.php?k1=v1&k2=v2	http	NULL	facebook.com
diff --git a/sql/hive/src/test/resources/golden/udf_percentile-0-8f99f54ff944f252e47d0af1f4ed1553 b/sql/hive/src/test/resources/golden/udf_percentile-0-8f99f54ff944f252e47d0af1f4ed1553
deleted file mode 100644
index 2025042f5d493..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_percentile-0-8f99f54ff944f252e47d0af1f4ed1553
+++ /dev/null
@@ -1 +0,0 @@
-percentile(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1]).pc can be a double or double array
diff --git a/sql/hive/src/test/resources/golden/udf_percentile-1-c0825a744cd14917d2c904d014449a4a b/sql/hive/src/test/resources/golden/udf_percentile-1-c0825a744cd14917d2c904d014449a4a
deleted file mode 100644
index 2025042f5d493..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_percentile-1-c0825a744cd14917d2c904d014449a4a
+++ /dev/null
@@ -1 +0,0 @@
-percentile(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1]).pc can be a double or double array
diff --git a/sql/hive/src/test/resources/golden/udf_percentile-2-1d351f7e821fcaf66c6f7503e42fb291 b/sql/hive/src/test/resources/golden/udf_percentile-2-1d351f7e821fcaf66c6f7503e42fb291
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_percentile-2-1d351f7e821fcaf66c6f7503e42fb291
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/udf_percentile-3-a7dc16cb82c595b18d4258a38a304b1e b/sql/hive/src/test/resources/golden/udf_percentile-3-a7dc16cb82c595b18d4258a38a304b1e
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_percentile-3-a7dc16cb82c595b18d4258a38a304b1e
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/udf_pmod-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_decode_name-3-16367c381d4b189b3640c92511244bfe
rename to sql/hive/src/test/resources/golden/udf_pmod-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-1-ed67184beaf84c0542117c26651938e1 b/sql/hive/src/test/resources/golden/udf_pmod-1-ed67184beaf84c0542117c26651938e1
new file mode 100644
index 0000000000000..5d2fc352ee060
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_pmod-1-ed67184beaf84c0542117c26651938e1
@@ -0,0 +1 @@
+a pmod b - Compute the positive modulo
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-10-cab94a0c6b36a489aab9f3f305b92772 b/sql/hive/src/test/resources/golden/udf_pmod-10-cab94a0c6b36a489aab9f3f305b92772
new file mode 100644
index 0000000000000..ab842acd48b3c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_pmod-10-cab94a0c6b36a489aab9f3f305b92772
@@ -0,0 +1 @@
+6.890000000000011	51.699999999999996	18.090000000000003
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-11-7ca6baa647215c334419d1bb8a527449 b/sql/hive/src/test/resources/golden/udf_pmod-11-7ca6baa647215c334419d1bb8a527449
new file mode 100644
index 0000000000000..0b46af11c4516
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_pmod-11-7ca6baa647215c334419d1bb8a527449
@@ -0,0 +1 @@
+6.89	51.7	18.09
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-2-90f75e01dcee85253a501d53b8562dae b/sql/hive/src/test/resources/golden/udf_pmod-2-90f75e01dcee85253a501d53b8562dae
new file mode 100644
index 0000000000000..5d2fc352ee060
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_pmod-2-90f75e01dcee85253a501d53b8562dae
@@ -0,0 +1 @@
+a pmod b - Compute the positive modulo
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-0-ebe447e8fb03de8844951250fe50320a b/sql/hive/src/test/resources/golden/udf_pmod-3-8d6dd8a5e7a519fdc5261e4193e3464f
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-0-ebe447e8fb03de8844951250fe50320a
rename to sql/hive/src/test/resources/golden/udf_pmod-3-8d6dd8a5e7a519fdc5261e4193e3464f
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-4-608e04ca8855780fb9e60486759b19b8 b/sql/hive/src/test/resources/golden/udf_pmod-4-608e04ca8855780fb9e60486759b19b8
new file mode 100644
index 0000000000000..5eb0813b60eb6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_pmod-4-608e04ca8855780fb9e60486759b19b8
@@ -0,0 +1 @@
+8	51	15
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-5-e3ca9fe032dd6f71e33ddf367ef5e2cf b/sql/hive/src/test/resources/golden/udf_pmod-5-e3ca9fe032dd6f71e33ddf367ef5e2cf
new file mode 100644
index 0000000000000..e21e4b08e7a62
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_pmod-5-e3ca9fe032dd6f71e33ddf367ef5e2cf
@@ -0,0 +1 @@
+5	50	0
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-6-c5c810e71bed8e56c1bac59b7d9c16c5 b/sql/hive/src/test/resources/golden/udf_pmod-6-c5c810e71bed8e56c1bac59b7d9c16c5
new file mode 100644
index 0000000000000..e0bc2a844fb46
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_pmod-6-c5c810e71bed8e56c1bac59b7d9c16c5
@@ -0,0 +1 @@
+8	51	16
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-7-3665a6414590bb2aff522dfe847dbc0e b/sql/hive/src/test/resources/golden/udf_pmod-7-3665a6414590bb2aff522dfe847dbc0e
new file mode 100644
index 0000000000000..e0bc2a844fb46
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_pmod-7-3665a6414590bb2aff522dfe847dbc0e
@@ -0,0 +1 @@
+8	51	16
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-8-95f85c34076952af0640b596365b27ca b/sql/hive/src/test/resources/golden/udf_pmod-8-95f85c34076952af0640b596365b27ca
new file mode 100644
index 0000000000000..e0bc2a844fb46
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_pmod-8-95f85c34076952af0640b596365b27ca
@@ -0,0 +1 @@
+8	51	16
diff --git a/sql/hive/src/test/resources/golden/udf_pmod-9-798ef5064b61d0ae403e3e11c8fd749b b/sql/hive/src/test/resources/golden/udf_pmod-9-798ef5064b61d0ae403e3e11c8fd749b
new file mode 100644
index 0000000000000..48371142e9b5d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_pmod-9-798ef5064b61d0ae403e3e11c8fd749b
@@ -0,0 +1 @@
+6.8899984	51.700005	18.089996
diff --git a/sql/hive/src/test/resources/golden/udf_positive-0-50ee5b92ad14e9f49d7ce6df7847c9b2 b/sql/hive/src/test/resources/golden/udf_positive-0-50ee5b92ad14e9f49d7ce6df7847c9b2
index 7c98729cc195c..6374b08a607ab 100644
--- a/sql/hive/src/test/resources/golden/udf_positive-0-50ee5b92ad14e9f49d7ce6df7847c9b2
+++ b/sql/hive/src/test/resources/golden/udf_positive-0-50ee5b92ad14e9f49d7ce6df7847c9b2
@@ -1 +1 @@
-positive a - Returns a
\ No newline at end of file
+positive a - Returns a
diff --git a/sql/hive/src/test/resources/golden/udf_positive-1-1b98434a841d2248ed985c5f6ba2cc3c b/sql/hive/src/test/resources/golden/udf_positive-1-1b98434a841d2248ed985c5f6ba2cc3c
index 7c98729cc195c..6374b08a607ab 100644
--- a/sql/hive/src/test/resources/golden/udf_positive-1-1b98434a841d2248ed985c5f6ba2cc3c
+++ b/sql/hive/src/test/resources/golden/udf_positive-1-1b98434a841d2248ed985c5f6ba2cc3c
@@ -1 +1 @@
-positive a - Returns a
\ No newline at end of file
+positive a - Returns a
diff --git a/sql/hive/src/test/resources/golden/udf_positive-2-610d421e590f035c24e29694a68b0d23 b/sql/hive/src/test/resources/golden/udf_positive-2-610d421e590f035c24e29694a68b0d23
index e8f11c444a808..9d794539b4ae0 100644
--- a/sql/hive/src/test/resources/golden/udf_positive-2-610d421e590f035c24e29694a68b0d23
+++ b/sql/hive/src/test/resources/golden/udf_positive-2-610d421e590f035c24e29694a68b0d23
@@ -1 +1 @@
-a + b - Returns a+b
\ No newline at end of file
+a + b - Returns a+b
diff --git a/sql/hive/src/test/resources/golden/udf_positive-3-400b238f4e6cdf7120be566b0ef079c5 b/sql/hive/src/test/resources/golden/udf_positive-3-400b238f4e6cdf7120be566b0ef079c5
index e8f11c444a808..9d794539b4ae0 100644
--- a/sql/hive/src/test/resources/golden/udf_positive-3-400b238f4e6cdf7120be566b0ef079c5
+++ b/sql/hive/src/test/resources/golden/udf_positive-3-400b238f4e6cdf7120be566b0ef079c5
@@ -1 +1 @@
-a + b - Returns a+b
\ No newline at end of file
+a + b - Returns a+b
diff --git a/sql/hive/src/test/resources/golden/udf_pow-0-c7f5178951dd45dc2a41c16729314d81 b/sql/hive/src/test/resources/golden/udf_pow-0-c7f5178951dd45dc2a41c16729314d81
index 43f197e360c82..67377963d2aa1 100644
--- a/sql/hive/src/test/resources/golden/udf_pow-0-c7f5178951dd45dc2a41c16729314d81
+++ b/sql/hive/src/test/resources/golden/udf_pow-0-c7f5178951dd45dc2a41c16729314d81
@@ -1 +1 @@
-pow(x1, x2) - raise x1 to the power of x2
\ No newline at end of file
+pow(x1, x2) - raise x1 to the power of x2
diff --git a/sql/hive/src/test/resources/golden/udf_pow-1-3c22c000c35144135aedbc7052f10803 b/sql/hive/src/test/resources/golden/udf_pow-1-3c22c000c35144135aedbc7052f10803
index afc2e4462a1a8..ded9d10a595ff 100644
--- a/sql/hive/src/test/resources/golden/udf_pow-1-3c22c000c35144135aedbc7052f10803
+++ b/sql/hive/src/test/resources/golden/udf_pow-1-3c22c000c35144135aedbc7052f10803
@@ -2,4 +2,4 @@ pow(x1, x2) - raise x1 to the power of x2
 Synonyms: power
 Example:
   > SELECT pow(2, 3) FROM src LIMIT 1;
-  8
\ No newline at end of file
+  8
diff --git a/sql/hive/src/test/resources/golden/udf_power-0-57001d802c281743322d28bbc520cd4 b/sql/hive/src/test/resources/golden/udf_power-0-57001d802c281743322d28bbc520cd4
index 5e3a6a8f31fd3..90e23c3255b77 100644
--- a/sql/hive/src/test/resources/golden/udf_power-0-57001d802c281743322d28bbc520cd4
+++ b/sql/hive/src/test/resources/golden/udf_power-0-57001d802c281743322d28bbc520cd4
@@ -1 +1 @@
-power(x1, x2) - raise x1 to the power of x2
\ No newline at end of file
+power(x1, x2) - raise x1 to the power of x2
diff --git a/sql/hive/src/test/resources/golden/udf_power-1-ebd0398b2cb03f382a16382ddac13426 b/sql/hive/src/test/resources/golden/udf_power-1-ebd0398b2cb03f382a16382ddac13426
index c3414e29768e3..4890e2e989d34 100644
--- a/sql/hive/src/test/resources/golden/udf_power-1-ebd0398b2cb03f382a16382ddac13426
+++ b/sql/hive/src/test/resources/golden/udf_power-1-ebd0398b2cb03f382a16382ddac13426
@@ -2,4 +2,4 @@ power(x1, x2) - raise x1 to the power of x2
 Synonyms: pow
 Example:
   > SELECT power(2, 3) FROM src LIMIT 1;
-  8
\ No newline at end of file
+  8
diff --git a/sql/hive/src/test/resources/golden/udf_printf-0-e86d559aeb84a4cc017a103182c22bfb b/sql/hive/src/test/resources/golden/udf_printf-0-e86d559aeb84a4cc017a103182c22bfb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_printf-1-19c61fce27310ab2590062d643f7b26e b/sql/hive/src/test/resources/golden/udf_printf-1-19c61fce27310ab2590062d643f7b26e
deleted file mode 100644
index 1635ff88dd768..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_printf-1-19c61fce27310ab2590062d643f7b26e
+++ /dev/null
@@ -1 +0,0 @@
-printf(String format, Obj... args) - function that can format strings according to printf-style format strings
diff --git a/sql/hive/src/test/resources/golden/udf_printf-2-25aa6950cae2bb781c336378f63ceaee b/sql/hive/src/test/resources/golden/udf_printf-2-25aa6950cae2bb781c336378f63ceaee
deleted file mode 100644
index 62440ee68e145..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_printf-2-25aa6950cae2bb781c336378f63ceaee
+++ /dev/null
@@ -1,4 +0,0 @@
-printf(String format, Obj... args) - function that can format strings according to printf-style format strings
-Example:
-  > SELECT printf("Hello World %d %s", 100, "days")FROM src LIMIT 1;
-  "Hello World 100 days"
diff --git a/sql/hive/src/test/resources/golden/udf_printf-3-9c568a0473888396bd46507e8b330c36 b/sql/hive/src/test/resources/golden/udf_printf-3-9c568a0473888396bd46507e8b330c36
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_printf-4-91728e546b450bdcbb05ef30f13be475 b/sql/hive/src/test/resources/golden/udf_printf-4-91728e546b450bdcbb05ef30f13be475
deleted file mode 100644
index 39cb945991403..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_printf-4-91728e546b450bdcbb05ef30f13be475
+++ /dev/null
@@ -1 +0,0 @@
-Hello World 100 days
diff --git a/sql/hive/src/test/resources/golden/udf_printf-5-3141a0421605b091ee5a9e99d7d605fb b/sql/hive/src/test/resources/golden/udf_printf-5-3141a0421605b091ee5a9e99d7d605fb
deleted file mode 100644
index 04bf5e552a576..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_printf-5-3141a0421605b091ee5a9e99d7d605fb
+++ /dev/null
@@ -1 +0,0 @@
-All Type Test: false, A, 15000, 1.234000e+01, +27183.2401, 2300.41, 32, corret, 0x1.002p8
diff --git a/sql/hive/src/test/resources/golden/udf_printf-6-ec37b73012f3cbbbc0422744b0db8294 b/sql/hive/src/test/resources/golden/udf_printf-6-ec37b73012f3cbbbc0422744b0db8294
deleted file mode 100644
index 2e9f7509968a3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_printf-6-ec37b73012f3cbbbc0422744b0db8294
+++ /dev/null
@@ -1 +0,0 @@
-Color red, String Null: null, number1 123456, number2 00089, Integer Null: null, hex 0xff, float  3.14 Double Null: null
diff --git a/sql/hive/src/test/resources/golden/udf_printf-7-5769f3a5b3300ca1d8b861229e976126 b/sql/hive/src/test/resources/golden/udf_printf-7-5769f3a5b3300ca1d8b861229e976126
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-4-e90740a9a52c37a964ee204379f01412 b/sql/hive/src/test/resources/golden/udf_radians-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_decode_name-4-e90740a9a52c37a964ee204379f01412
rename to sql/hive/src/test/resources/golden/udf_radians-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_radians-0-f899daf93b02ca681e0230a792c65e86 b/sql/hive/src/test/resources/golden/udf_radians-0-f899daf93b02ca681e0230a792c65e86
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input32-3-b0070890240c15d647af59f41b77ba3d b/sql/hive/src/test/resources/golden/udf_radians-1-58b73fc96927d447d1225f021eaa378
similarity index 100%
rename from sql/hive/src/test/resources/golden/input32-3-b0070890240c15d647af59f41b77ba3d
rename to sql/hive/src/test/resources/golden/udf_radians-1-58b73fc96927d447d1225f021eaa378
diff --git a/sql/hive/src/test/resources/golden/udf_radians-1-eaaa62dd3935ff3152323dfafd136e93 b/sql/hive/src/test/resources/golden/udf_radians-1-eaaa62dd3935ff3152323dfafd136e93
deleted file mode 100644
index 116b6bc461ed0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_radians-1-eaaa62dd3935ff3152323dfafd136e93
+++ /dev/null
@@ -1 +0,0 @@
-1.000000357564167
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_radians-10-9f1b8ddf9be2e5f9b9b7ff4f00cfb66 b/sql/hive/src/test/resources/golden/udf_radians-10-9f1b8ddf9be2e5f9b9b7ff4f00cfb66
new file mode 100644
index 0000000000000..b00bf83e61d82
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_radians-10-9f1b8ddf9be2e5f9b9b7ff4f00cfb66
@@ -0,0 +1,5 @@
+radians(x) - Converts degrees to radians
+Example:
+  > SELECT radians(90) FROM src LIMIT 1;
+  1.5707963267949mo
+
diff --git a/sql/hive/src/test/resources/golden/udf_radians-2-bcaca433f704f71cf9d44c238a33c7b3 b/sql/hive/src/test/resources/golden/udf_radians-2-bcaca433f704f71cf9d44c238a33c7b3
deleted file mode 100644
index aaf3b31fef488..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_radians-2-bcaca433f704f71cf9d44c238a33c7b3
+++ /dev/null
@@ -1 +0,0 @@
-2.4999991485811655
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_radians-2-cb8462f25c18b7405c41a50e52476d04 b/sql/hive/src/test/resources/golden/udf_radians-2-cb8462f25c18b7405c41a50e52476d04
new file mode 100644
index 0000000000000..6b0996864478b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_radians-2-cb8462f25c18b7405c41a50e52476d04
@@ -0,0 +1 @@
+1.000000357564167
diff --git a/sql/hive/src/test/resources/golden/udf_radians-3-65e16c7b13de48a5d36793d0c7d35e14 b/sql/hive/src/test/resources/golden/udf_radians-3-65e16c7b13de48a5d36793d0c7d35e14
deleted file mode 100644
index 73ad88be4ef3d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_radians-3-65e16c7b13de48a5d36793d0c7d35e14
+++ /dev/null
@@ -1 +0,0 @@
-radians(x) - Converts degrees to radians
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_radians-3-bd00297cb26f599913b14a635e768be3 b/sql/hive/src/test/resources/golden/udf_radians-3-bd00297cb26f599913b14a635e768be3
new file mode 100644
index 0000000000000..1f204866982ad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_radians-3-bd00297cb26f599913b14a635e768be3
@@ -0,0 +1 @@
+2.4999991485811655
diff --git a/sql/hive/src/test/resources/golden/udf_radians-4-65e16c7b13de48a5d36793d0c7d35e14 b/sql/hive/src/test/resources/golden/udf_radians-4-65e16c7b13de48a5d36793d0c7d35e14
new file mode 100644
index 0000000000000..99e71b16f18ad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_radians-4-65e16c7b13de48a5d36793d0c7d35e14
@@ -0,0 +1 @@
+radians(x) - Converts degrees to radians
diff --git a/sql/hive/src/test/resources/golden/udf_radians-4-9f1b8ddf9be2e5f9b9b7ff4f00cfb66 b/sql/hive/src/test/resources/golden/udf_radians-4-9f1b8ddf9be2e5f9b9b7ff4f00cfb66
deleted file mode 100644
index e0237c0058f55..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_radians-4-9f1b8ddf9be2e5f9b9b7ff4f00cfb66
+++ /dev/null
@@ -1,4 +0,0 @@
-radians(x) - Converts degrees to radians
-Example:
-  > SELECT radians(90) FROM src LIMIT 1;
-  1.5707963267949mo
diff --git a/sql/hive/src/test/resources/golden/udf_radians-5-9f1b8ddf9be2e5f9b9b7ff4f00cfb66 b/sql/hive/src/test/resources/golden/udf_radians-5-9f1b8ddf9be2e5f9b9b7ff4f00cfb66
new file mode 100644
index 0000000000000..b00bf83e61d82
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_radians-5-9f1b8ddf9be2e5f9b9b7ff4f00cfb66
@@ -0,0 +1,5 @@
+radians(x) - Converts degrees to radians
+Example:
+  > SELECT radians(90) FROM src LIMIT 1;
+  1.5707963267949mo
+
diff --git a/sql/hive/src/test/resources/golden/udf_radians-5-f899daf93b02ca681e0230a792c65e86 b/sql/hive/src/test/resources/golden/udf_radians-5-f899daf93b02ca681e0230a792c65e86
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input32-4-d0e1378a30e21e0198e47d9b668ee1f6 b/sql/hive/src/test/resources/golden/udf_radians-6-70c9e7199b5898e2c3a4943ec58da113
similarity index 100%
rename from sql/hive/src/test/resources/golden/input32-4-d0e1378a30e21e0198e47d9b668ee1f6
rename to sql/hive/src/test/resources/golden/udf_radians-6-70c9e7199b5898e2c3a4943ec58da113
diff --git a/sql/hive/src/test/resources/golden/udf_radians-6-eaaa62dd3935ff3152323dfafd136e93 b/sql/hive/src/test/resources/golden/udf_radians-6-eaaa62dd3935ff3152323dfafd136e93
deleted file mode 100644
index 116b6bc461ed0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_radians-6-eaaa62dd3935ff3152323dfafd136e93
+++ /dev/null
@@ -1 +0,0 @@
-1.000000357564167
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_radians-7-bcaca433f704f71cf9d44c238a33c7b3 b/sql/hive/src/test/resources/golden/udf_radians-7-bcaca433f704f71cf9d44c238a33c7b3
deleted file mode 100644
index aaf3b31fef488..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_radians-7-bcaca433f704f71cf9d44c238a33c7b3
+++ /dev/null
@@ -1 +0,0 @@
-2.4999991485811655
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_radians-7-cb8462f25c18b7405c41a50e52476d04 b/sql/hive/src/test/resources/golden/udf_radians-7-cb8462f25c18b7405c41a50e52476d04
new file mode 100644
index 0000000000000..6b0996864478b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_radians-7-cb8462f25c18b7405c41a50e52476d04
@@ -0,0 +1 @@
+1.000000357564167
diff --git a/sql/hive/src/test/resources/golden/udf_radians-8-65e16c7b13de48a5d36793d0c7d35e14 b/sql/hive/src/test/resources/golden/udf_radians-8-65e16c7b13de48a5d36793d0c7d35e14
deleted file mode 100644
index 73ad88be4ef3d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_radians-8-65e16c7b13de48a5d36793d0c7d35e14
+++ /dev/null
@@ -1 +0,0 @@
-radians(x) - Converts degrees to radians
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_radians-8-bd00297cb26f599913b14a635e768be3 b/sql/hive/src/test/resources/golden/udf_radians-8-bd00297cb26f599913b14a635e768be3
new file mode 100644
index 0000000000000..1f204866982ad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_radians-8-bd00297cb26f599913b14a635e768be3
@@ -0,0 +1 @@
+2.4999991485811655
diff --git a/sql/hive/src/test/resources/golden/udf_radians-9-65e16c7b13de48a5d36793d0c7d35e14 b/sql/hive/src/test/resources/golden/udf_radians-9-65e16c7b13de48a5d36793d0c7d35e14
new file mode 100644
index 0000000000000..99e71b16f18ad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_radians-9-65e16c7b13de48a5d36793d0c7d35e14
@@ -0,0 +1 @@
+radians(x) - Converts degrees to radians
diff --git a/sql/hive/src/test/resources/golden/udf_radians-9-9f1b8ddf9be2e5f9b9b7ff4f00cfb66 b/sql/hive/src/test/resources/golden/udf_radians-9-9f1b8ddf9be2e5f9b9b7ff4f00cfb66
deleted file mode 100644
index e0237c0058f55..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_radians-9-9f1b8ddf9be2e5f9b9b7ff4f00cfb66
+++ /dev/null
@@ -1,4 +0,0 @@
-radians(x) - Converts degrees to radians
-Example:
-  > SELECT radians(90) FROM src LIMIT 1;
-  1.5707963267949mo
diff --git a/sql/hive/src/test/resources/golden/udf_rand-0-f6d991f4e0dfea517dfc3dcaf1ff6af2 b/sql/hive/src/test/resources/golden/udf_rand-0-f6d991f4e0dfea517dfc3dcaf1ff6af2
index d19c5b50fe5c4..a4de35428cc8f 100644
--- a/sql/hive/src/test/resources/golden/udf_rand-0-f6d991f4e0dfea517dfc3dcaf1ff6af2
+++ b/sql/hive/src/test/resources/golden/udf_rand-0-f6d991f4e0dfea517dfc3dcaf1ff6af2
@@ -1 +1 @@
-rand([seed]) - Returns a pseudorandom number between 0 and 1
\ No newline at end of file
+rand([seed]) - Returns a pseudorandom number between 0 and 1
diff --git a/sql/hive/src/test/resources/golden/udf_rand-1-c6229b8f2ca3001663229cfb8ee4763e b/sql/hive/src/test/resources/golden/udf_rand-1-c6229b8f2ca3001663229cfb8ee4763e
index d19c5b50fe5c4..a4de35428cc8f 100644
--- a/sql/hive/src/test/resources/golden/udf_rand-1-c6229b8f2ca3001663229cfb8ee4763e
+++ b/sql/hive/src/test/resources/golden/udf_rand-1-c6229b8f2ca3001663229cfb8ee4763e
@@ -1 +1 @@
-rand([seed]) - Returns a pseudorandom number between 0 and 1
\ No newline at end of file
+rand([seed]) - Returns a pseudorandom number between 0 and 1
diff --git a/sql/hive/src/test/resources/golden/udf_reflect-0-904138e2a1f831c308b7f0aacc859ae1 b/sql/hive/src/test/resources/golden/udf_reflect-0-904138e2a1f831c308b7f0aacc859ae1
deleted file mode 100644
index 1d7658151cd62..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_reflect-0-904138e2a1f831c308b7f0aacc859ae1
+++ /dev/null
@@ -1 +0,0 @@
-reflect(class,method[,arg1[,arg2..]]) calls method with reflection
diff --git a/sql/hive/src/test/resources/golden/udf_reflect-1-21ec7caa253c7f95b7cf60191140e2ee b/sql/hive/src/test/resources/golden/udf_reflect-1-21ec7caa253c7f95b7cf60191140e2ee
deleted file mode 100644
index ddf986c01e5b1..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_reflect-1-21ec7caa253c7f95b7cf60191140e2ee
+++ /dev/null
@@ -1,4 +0,0 @@
-reflect(class,method[,arg1[,arg2..]]) calls method with reflection
-Synonyms: java_method
-Use this UDF to call Java methods by matching the argument signature
-
diff --git a/sql/hive/src/test/resources/golden/udf_reflect-2-b868357466bab2f04685c2dc73604cf0 b/sql/hive/src/test/resources/golden/udf_reflect-2-b868357466bab2f04685c2dc73604cf0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_reflect2-0-7bec330c7bc6f71cbaf9bf1883d1b184 b/sql/hive/src/test/resources/golden/udf_reflect2-0-7bec330c7bc6f71cbaf9bf1883d1b184
deleted file mode 100644
index cd35e5b290db5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_reflect2-0-7bec330c7bc6f71cbaf9bf1883d1b184
+++ /dev/null
@@ -1 +0,0 @@
-reflect2(arg0,method[,arg1[,arg2..]]) calls method of arg0 with reflection
diff --git a/sql/hive/src/test/resources/golden/udf_reflect2-1-c5a05379f482215a5a484bed0299bf19 b/sql/hive/src/test/resources/golden/udf_reflect2-1-c5a05379f482215a5a484bed0299bf19
deleted file mode 100644
index 48ef97292ab62..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_reflect2-1-c5a05379f482215a5a484bed0299bf19
+++ /dev/null
@@ -1,3 +0,0 @@
-reflect2(arg0,method[,arg1[,arg2..]]) calls method of arg0 with reflection
-Use this UDF to call Java methods by matching the argument signature
-
diff --git a/sql/hive/src/test/resources/golden/udf_reflect2-2-effc057c78c00b0af26a4ac0f5f116ca b/sql/hive/src/test/resources/golden/udf_reflect2-2-effc057c78c00b0af26a4ac0f5f116ca
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_decode_name-8-bff58433eb2b500bb1d2f6ea495a5f20 b/sql/hive/src/test/resources/golden/udf_regexp-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_decode_name-8-bff58433eb2b500bb1d2f6ea495a5f20
rename to sql/hive/src/test/resources/golden/udf_regexp-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_regexp-0-19917611f74aedc0922560f7f2595948 b/sql/hive/src/test/resources/golden/udf_regexp-1-19917611f74aedc0922560f7f2595948
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_regexp-0-19917611f74aedc0922560f7f2595948
rename to sql/hive/src/test/resources/golden/udf_regexp-1-19917611f74aedc0922560f7f2595948
diff --git a/sql/hive/src/test/resources/golden/udf_regexp-1-f7f0527cd47612d7f256edd5f8963800 b/sql/hive/src/test/resources/golden/udf_regexp-2-f7f0527cd47612d7f256edd5f8963800
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_regexp-1-f7f0527cd47612d7f256edd5f8963800
rename to sql/hive/src/test/resources/golden/udf_regexp-2-f7f0527cd47612d7f256edd5f8963800
diff --git a/sql/hive/src/test/resources/golden/udf_regexp-2-5e1798db3ba058e7b202d8a98f228b11 b/sql/hive/src/test/resources/golden/udf_regexp-3-59aff54bae544ee620141e4e629f167a
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_regexp-2-5e1798db3ba058e7b202d8a98f228b11
rename to sql/hive/src/test/resources/golden/udf_regexp-3-59aff54bae544ee620141e4e629f167a
diff --git a/sql/hive/src/test/resources/golden/udf_regexp_extract-0-e251e1a4b1e191814f26c54b14ab6cd9 b/sql/hive/src/test/resources/golden/udf_regexp_extract-0-e251e1a4b1e191814f26c54b14ab6cd9
index 429057caf71f0..6f4b3cea94c03 100644
--- a/sql/hive/src/test/resources/golden/udf_regexp_extract-0-e251e1a4b1e191814f26c54b14ab6cd9
+++ b/sql/hive/src/test/resources/golden/udf_regexp_extract-0-e251e1a4b1e191814f26c54b14ab6cd9
@@ -1 +1 @@
-regexp_extract(str, regexp[, idx]) - extracts a group that matches regexp
\ No newline at end of file
+regexp_extract(str, regexp[, idx]) - extracts a group that matches regexp
diff --git a/sql/hive/src/test/resources/golden/udf_regexp_extract-1-8add879ab5904bd805412ef8723276fb b/sql/hive/src/test/resources/golden/udf_regexp_extract-1-8add879ab5904bd805412ef8723276fb
index 30e237490811f..fc9d4aa686db9 100644
--- a/sql/hive/src/test/resources/golden/udf_regexp_extract-1-8add879ab5904bd805412ef8723276fb
+++ b/sql/hive/src/test/resources/golden/udf_regexp_extract-1-8add879ab5904bd805412ef8723276fb
@@ -1,4 +1,4 @@
 regexp_extract(str, regexp[, idx]) - extracts a group that matches regexp
 Example:
   > SELECT regexp_extract('100-200', '(\d+)-(\d+)', 1) FROM src LIMIT 1;
-  '100'
\ No newline at end of file
+  '100'
diff --git a/sql/hive/src/test/resources/golden/udf_regexp_replace-0-3ef9cc6da51dd1f5c6b71cf8a464ca0e b/sql/hive/src/test/resources/golden/udf_regexp_replace-0-3ef9cc6da51dd1f5c6b71cf8a464ca0e
index 1a38701d68875..193093b0edcaa 100644
--- a/sql/hive/src/test/resources/golden/udf_regexp_replace-0-3ef9cc6da51dd1f5c6b71cf8a464ca0e
+++ b/sql/hive/src/test/resources/golden/udf_regexp_replace-0-3ef9cc6da51dd1f5c6b71cf8a464ca0e
@@ -1 +1 @@
-regexp_replace(str, regexp, rep) - replace all substrings of str that match regexp with rep
\ No newline at end of file
+regexp_replace(str, regexp, rep) - replace all substrings of str that match regexp with rep
diff --git a/sql/hive/src/test/resources/golden/udf_regexp_replace-1-e79b45aa220d3c4c3b4523ac9c897bc b/sql/hive/src/test/resources/golden/udf_regexp_replace-1-e79b45aa220d3c4c3b4523ac9c897bc
index bebfabaf637fb..6e3577aba5da2 100644
--- a/sql/hive/src/test/resources/golden/udf_regexp_replace-1-e79b45aa220d3c4c3b4523ac9c897bc
+++ b/sql/hive/src/test/resources/golden/udf_regexp_replace-1-e79b45aa220d3c4c3b4523ac9c897bc
@@ -1,4 +1,4 @@
 regexp_replace(str, regexp, rep) - replace all substrings of str that match regexp with rep
 Example:
   > SELECT regexp_replace('100-200', '(\d+)', 'num') FROM src LIMIT 1;
-  'num-num'
\ No newline at end of file
+  'num-num'
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-2-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/udf_repeat-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_special_char-2-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/udf_repeat-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_repeat-0-fdeae3e88f27ef148281d69ac8c4b23b b/sql/hive/src/test/resources/golden/udf_repeat-0-fdeae3e88f27ef148281d69ac8c4b23b
deleted file mode 100644
index 694c367436f3c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_repeat-0-fdeae3e88f27ef148281d69ac8c4b23b
+++ /dev/null
@@ -1 +0,0 @@
-repeat(str, n) - repeat str n times 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_repeat-1-836be47190989d8975a09a545ecbfe0b b/sql/hive/src/test/resources/golden/udf_repeat-1-836be47190989d8975a09a545ecbfe0b
deleted file mode 100644
index 5df19ba1c2cbf..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_repeat-1-836be47190989d8975a09a545ecbfe0b
+++ /dev/null
@@ -1,4 +0,0 @@
-repeat(str, n) - repeat str n times 
-Example:
-  > SELECT repeat('123', 2) FROM src LIMIT 1;
-  '123123'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_repeat-1-fdeae3e88f27ef148281d69ac8c4b23b b/sql/hive/src/test/resources/golden/udf_repeat-1-fdeae3e88f27ef148281d69ac8c4b23b
new file mode 100644
index 0000000000000..23a6a30503468
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_repeat-1-fdeae3e88f27ef148281d69ac8c4b23b
@@ -0,0 +1 @@
+repeat(str, n) - repeat str n times 
diff --git a/sql/hive/src/test/resources/golden/udf_repeat-2-836be47190989d8975a09a545ecbfe0b b/sql/hive/src/test/resources/golden/udf_repeat-2-836be47190989d8975a09a545ecbfe0b
new file mode 100644
index 0000000000000..4f4b491b2807c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_repeat-2-836be47190989d8975a09a545ecbfe0b
@@ -0,0 +1,4 @@
+repeat(str, n) - repeat str n times 
+Example:
+  > SELECT repeat('123', 2) FROM src LIMIT 1;
+  '123123'
diff --git a/sql/hive/src/test/resources/golden/udf_repeat-2-e1dbea7182ec1653e1123b5b67a6d20a b/sql/hive/src/test/resources/golden/udf_repeat-2-e1dbea7182ec1653e1123b5b67a6d20a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input32-5-8789d32fc5b784fe2d171566732c573e b/sql/hive/src/test/resources/golden/udf_repeat-3-3a3180b4d7c59ee477ce4bebf8e6adec
similarity index 100%
rename from sql/hive/src/test/resources/golden/input32-5-8789d32fc5b784fe2d171566732c573e
rename to sql/hive/src/test/resources/golden/udf_repeat-3-3a3180b4d7c59ee477ce4bebf8e6adec
diff --git a/sql/hive/src/test/resources/golden/udf_repeat-3-ba9dd02f59c74d63d60d60b6231a0365 b/sql/hive/src/test/resources/golden/udf_repeat-3-ba9dd02f59c74d63d60d60b6231a0365
deleted file mode 100644
index 45425cf087c09..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_repeat-3-ba9dd02f59c74d63d60d60b6231a0365
+++ /dev/null
@@ -1 +0,0 @@
-FacebookFacebookFacebook			
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_repeat-4-64c5fce0c5ad4c26680a842aa740dc57 b/sql/hive/src/test/resources/golden/udf_repeat-4-64c5fce0c5ad4c26680a842aa740dc57
new file mode 100644
index 0000000000000..5a355c1c58fc7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_repeat-4-64c5fce0c5ad4c26680a842aa740dc57
@@ -0,0 +1 @@
+FacebookFacebookFacebook			
diff --git a/sql/hive/src/test/resources/golden/udf_rlike-0-6ec6ef55ac041208627454e16b501d38 b/sql/hive/src/test/resources/golden/udf_rlike-0-6ec6ef55ac041208627454e16b501d38
index fd9ac3081a1cc..068aeb36f6477 100644
--- a/sql/hive/src/test/resources/golden/udf_rlike-0-6ec6ef55ac041208627454e16b501d38
+++ b/sql/hive/src/test/resources/golden/udf_rlike-0-6ec6ef55ac041208627454e16b501d38
@@ -1 +1 @@
-str rlike regexp - Returns true if str matches regexp and false otherwise
\ No newline at end of file
+str rlike regexp - Returns true if str matches regexp and false otherwise
diff --git a/sql/hive/src/test/resources/golden/udf_rlike-1-829611a596e0c87431006f7247d25eca b/sql/hive/src/test/resources/golden/udf_rlike-1-829611a596e0c87431006f7247d25eca
index 43b06945caa54..ab346793b9d3d 100644
--- a/sql/hive/src/test/resources/golden/udf_rlike-1-829611a596e0c87431006f7247d25eca
+++ b/sql/hive/src/test/resources/golden/udf_rlike-1-829611a596e0c87431006f7247d25eca
@@ -2,4 +2,4 @@ str rlike regexp - Returns true if str matches regexp and false otherwise
 Synonyms: regexp
 Example:
   > SELECT 'fb' rlike '.*' FROM src LIMIT 1;
-  true
\ No newline at end of file
+  true
diff --git a/sql/hive/src/test/resources/golden/udf_round-0-10b53ca1f15fd7879365926f86512d15 b/sql/hive/src/test/resources/golden/udf_round-0-10b53ca1f15fd7879365926f86512d15
index e4586b2e73a93..49fdc0a774e70 100644
--- a/sql/hive/src/test/resources/golden/udf_round-0-10b53ca1f15fd7879365926f86512d15
+++ b/sql/hive/src/test/resources/golden/udf_round-0-10b53ca1f15fd7879365926f86512d15
@@ -1 +1 @@
-round(x[, d]) - round x to d decimal places
\ No newline at end of file
+round(x[, d]) - round x to d decimal places
diff --git a/sql/hive/src/test/resources/golden/partition_special_char-3-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/udf_round-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_special_char-3-16367c381d4b189b3640c92511244bfe
rename to sql/hive/src/test/resources/golden/udf_round-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_round-1-10b53ca1f15fd7879365926f86512d15 b/sql/hive/src/test/resources/golden/udf_round-1-10b53ca1f15fd7879365926f86512d15
new file mode 100644
index 0000000000000..49fdc0a774e70
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round-1-10b53ca1f15fd7879365926f86512d15
@@ -0,0 +1 @@
+round(x[, d]) - round x to d decimal places
diff --git a/sql/hive/src/test/resources/golden/udf_round-1-2367bcc43510dedc80bdb6707e434da8 b/sql/hive/src/test/resources/golden/udf_round-1-2367bcc43510dedc80bdb6707e434da8
index c0d5b480e9751..862adeae821ff 100644
--- a/sql/hive/src/test/resources/golden/udf_round-1-2367bcc43510dedc80bdb6707e434da8
+++ b/sql/hive/src/test/resources/golden/udf_round-1-2367bcc43510dedc80bdb6707e434da8
@@ -1,4 +1,4 @@
 round(x[, d]) - round x to d decimal places
 Example:
   > SELECT round(12.3456, 1) FROM src LIMIT 1;
-  12.3'
\ No newline at end of file
+  12.3'
diff --git a/sql/hive/src/test/resources/golden/udf_round-2-2367bcc43510dedc80bdb6707e434da8 b/sql/hive/src/test/resources/golden/udf_round-2-2367bcc43510dedc80bdb6707e434da8
new file mode 100644
index 0000000000000..862adeae821ff
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round-2-2367bcc43510dedc80bdb6707e434da8
@@ -0,0 +1,4 @@
+round(x[, d]) - round x to d decimal places
+Example:
+  > SELECT round(12.3456, 1) FROM src LIMIT 1;
+  12.3'
diff --git a/sql/hive/src/test/resources/golden/udf_round-2-9ffa2b573360cd879338de46d91ab374 b/sql/hive/src/test/resources/golden/udf_round-2-9ffa2b573360cd879338de46d91ab374
deleted file mode 100644
index 0924e3869076d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round-2-9ffa2b573360cd879338de46d91ab374
+++ /dev/null
@@ -1 +0,0 @@
-NULL	NULL	NULL	Infinity	NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round-3-42a221909d3f7ed51bed01a65670461c b/sql/hive/src/test/resources/golden/udf_round-3-42a221909d3f7ed51bed01a65670461c
deleted file mode 100644
index 3b9c30929a240..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round-3-42a221909d3f7ed51bed01a65670461c
+++ /dev/null
@@ -1 +0,0 @@
-55555	55555.0	55555.0	55555.0	55555.0	55560.0	55600.0	56000.0	60000.0	100000.0	0.0	0.0	0.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round-3-fa4d11da8e1eba258ed191ed5f1447de b/sql/hive/src/test/resources/golden/udf_round-3-fa4d11da8e1eba258ed191ed5f1447de
new file mode 100644
index 0000000000000..fc6e4224259d8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round-3-fa4d11da8e1eba258ed191ed5f1447de
@@ -0,0 +1 @@
+NULL	NULL	NULL	NULL	NaN
diff --git a/sql/hive/src/test/resources/golden/udf_round-4-b87ccaa1e0a87c558b56d59a8a074396 b/sql/hive/src/test/resources/golden/udf_round-4-b87ccaa1e0a87c558b56d59a8a074396
new file mode 100644
index 0000000000000..f8d833cc0880e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round-4-b87ccaa1e0a87c558b56d59a8a074396
@@ -0,0 +1 @@
+55555	55555	55555	55555	55555	55560	55600	56000	60000	100000	0	0	0
diff --git a/sql/hive/src/test/resources/golden/udf_round-4-dc80ec5189a4c6ce07688df3debe7de4 b/sql/hive/src/test/resources/golden/udf_round-4-dc80ec5189a4c6ce07688df3debe7de4
deleted file mode 100644
index 1f243f6cbc6db..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round-4-dc80ec5189a4c6ce07688df3debe7de4
+++ /dev/null
@@ -1 +0,0 @@
-125.0	125.0	125.3	125.32	125.315	125.315	130.0	100.0	0.0	0.0	-125.0	-125.0	-125.3	-125.32	-125.315	-125.315	-130.0	-100.0	0.0	0.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round-5-441d0075081ae87579c959d714c4922d b/sql/hive/src/test/resources/golden/udf_round-5-441d0075081ae87579c959d714c4922d
new file mode 100644
index 0000000000000..389ab6417f19d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round-5-441d0075081ae87579c959d714c4922d
@@ -0,0 +1 @@
+125.0	125.0	125.3	125.32	125.315	125.315	130.0	100.0	0.0	0.0	-125.0	-125.0	-125.3	-125.32	-125.315	-125.315	-130.0	-100.0	0.0	0.0
diff --git a/sql/hive/src/test/resources/golden/udf_round-5-a2414e7845ffafc61f75465508a1850a b/sql/hive/src/test/resources/golden/udf_round-5-a2414e7845ffafc61f75465508a1850a
deleted file mode 100644
index 918404cec8047..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round-5-a2414e7845ffafc61f75465508a1850a
+++ /dev/null
@@ -1 +0,0 @@
-0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	3.0	3.1	3.14	3.142	3.1416	3.14159	3.141593	3.1415927	3.14159265	3.141592654	3.1415926536	3.14159265359	3.14159265359	3.1415926535898	3.1415926535898	3.14159265358979	3.141592653589793	3.141592653589793
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round-6-4658ec3bc034b43e0477bf2474939449 b/sql/hive/src/test/resources/golden/udf_round-6-4658ec3bc034b43e0477bf2474939449
new file mode 100644
index 0000000000000..3b083b3e26c4a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round-6-4658ec3bc034b43e0477bf2474939449
@@ -0,0 +1 @@
+0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	3.0	3.1	3.14	3.142	3.1416	3.14159	3.141593	3.1415927	3.14159265	3.141592654	3.1415926536	3.14159265359	3.14159265359	3.1415926535898	3.1415926535898	3.14159265358979	3.141592653589793	3.141592653589793
diff --git a/sql/hive/src/test/resources/golden/udf_round-6-48439efa5c34e7589ab5003ed916f12b b/sql/hive/src/test/resources/golden/udf_round-6-48439efa5c34e7589ab5003ed916f12b
deleted file mode 100644
index af105563af144..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round-6-48439efa5c34e7589ab5003ed916f12b
+++ /dev/null
@@ -1 +0,0 @@
-1809242.315111134	-1809242.315111134
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round-7-74ff5a4862c80bd8fd84bede1a0320d b/sql/hive/src/test/resources/golden/udf_round-7-74ff5a4862c80bd8fd84bede1a0320d
new file mode 100644
index 0000000000000..c3496bf5b6d1b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round-7-74ff5a4862c80bd8fd84bede1a0320d
@@ -0,0 +1 @@
+1809242.315111134	-1809242.315111134	1809242.315111134	-1809242.315111134
diff --git a/sql/hive/src/test/resources/golden/udf_round_2-0-91afaf77ef4061fa20005a3c87dfef32 b/sql/hive/src/test/resources/golden/udf_round_2-0-91afaf77ef4061fa20005a3c87dfef32
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_round_2-1-5e44354af73865d03e9088c0232f16ce b/sql/hive/src/test/resources/golden/udf_round_2-1-5e44354af73865d03e9088c0232f16ce
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_round_2-2-83f91f60dcb2036b61b8b21f18281298 b/sql/hive/src/test/resources/golden/udf_round_2-2-83f91f60dcb2036b61b8b21f18281298
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_2-2-83f91f60dcb2036b61b8b21f18281298
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round_2-3-c62cf7b74a91f605cf669e34a9315f93 b/sql/hive/src/test/resources/golden/udf_round_2-3-c62cf7b74a91f605cf669e34a9315f93
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_2-3-c62cf7b74a91f605cf669e34a9315f93
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round_2-4-797fa8ed05cb953327d0f6306b30d6c3 b/sql/hive/src/test/resources/golden/udf_round_2-4-797fa8ed05cb953327d0f6306b30d6c3
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_2-4-797fa8ed05cb953327d0f6306b30d6c3
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round_2-5-e41b862db8cc76c1fe004cf006fad50b b/sql/hive/src/test/resources/golden/udf_round_2-5-e41b862db8cc76c1fe004cf006fad50b
deleted file mode 100644
index f40e633f703c9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_2-5-e41b862db8cc76c1fe004cf006fad50b
+++ /dev/null
@@ -1 +0,0 @@
-Infinity	Infinity	Infinity	Infinity
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat11-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/udf_round_3-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat11-0-66ee62178e3576fb38cb09800cb610bf
rename to sql/hive/src/test/resources/golden/udf_round_3-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-0-8415af605db167315e4d9d3c69d89e6c b/sql/hive/src/test/resources/golden/udf_round_3-0-8415af605db167315e4d9d3c69d89e6c
deleted file mode 100644
index 3714de0db18dc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_3-0-8415af605db167315e4d9d3c69d89e6c
+++ /dev/null
@@ -1 +0,0 @@
--128	127	0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-1-15a7f123f596e28e6f238063ba4e3d6d b/sql/hive/src/test/resources/golden/udf_round_3-1-15a7f123f596e28e6f238063ba4e3d6d
new file mode 100644
index 0000000000000..0a104d81ef51e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round_3-1-15a7f123f596e28e6f238063ba4e3d6d
@@ -0,0 +1 @@
+-128	127	0
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-1-3ecc9de58a7ea3af5b2315ca119403d0 b/sql/hive/src/test/resources/golden/udf_round_3-1-3ecc9de58a7ea3af5b2315ca119403d0
deleted file mode 100644
index a9265d7b8a1b7..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_3-1-3ecc9de58a7ea3af5b2315ca119403d0
+++ /dev/null
@@ -1 +0,0 @@
--32768	32767	-129	128
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-2-5e7ada966f726ceb748f98c20eab4f10 b/sql/hive/src/test/resources/golden/udf_round_3-2-5e7ada966f726ceb748f98c20eab4f10
deleted file mode 100644
index 2d25f54073df7..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_3-2-5e7ada966f726ceb748f98c20eab4f10
+++ /dev/null
@@ -1 +0,0 @@
--2147483648	2147483647	-32769	32768
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-2-a03bf4e99027d4814a32c84d89d42cca b/sql/hive/src/test/resources/golden/udf_round_3-2-a03bf4e99027d4814a32c84d89d42cca
new file mode 100644
index 0000000000000..972dee75fae8a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round_3-2-a03bf4e99027d4814a32c84d89d42cca
@@ -0,0 +1 @@
+-32768	32767	-129	128
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-3-e3c5b35d67ef3de2800a1836718e8ac9 b/sql/hive/src/test/resources/golden/udf_round_3-3-e3c5b35d67ef3de2800a1836718e8ac9
new file mode 100644
index 0000000000000..50928a309cf3a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round_3-3-e3c5b35d67ef3de2800a1836718e8ac9
@@ -0,0 +1 @@
+-2147483648	2147483647	-32769	32768
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-3-e94ab3326df006c7203ead86752f16a9 b/sql/hive/src/test/resources/golden/udf_round_3-3-e94ab3326df006c7203ead86752f16a9
deleted file mode 100644
index 41a2624c6cfeb..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_3-3-e94ab3326df006c7203ead86752f16a9
+++ /dev/null
@@ -1 +0,0 @@
--9223372036854775808	9223372036854775807	-2147483649	2147483648
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-4-8449fbdabbb4b1e6beab89be0af498f2 b/sql/hive/src/test/resources/golden/udf_round_3-4-8449fbdabbb4b1e6beab89be0af498f2
new file mode 100644
index 0000000000000..c9e55e0ec0b95
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round_3-4-8449fbdabbb4b1e6beab89be0af498f2
@@ -0,0 +1 @@
+-9223372036854775808	9223372036854775807	-2147483649	2147483648
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-4-c3227c37fbbddd79a423bd6abe67a6d7 b/sql/hive/src/test/resources/golden/udf_round_3-4-c3227c37fbbddd79a423bd6abe67a6d7
deleted file mode 100644
index 98d3f53dfc442..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_round_3-4-c3227c37fbbddd79a423bd6abe67a6d7
+++ /dev/null
@@ -1 +0,0 @@
-126.0	127.0	32766.0	32767.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_round_3-5-3844531c8cff115d6a33636db0a26ad b/sql/hive/src/test/resources/golden/udf_round_3-5-3844531c8cff115d6a33636db0a26ad
new file mode 100644
index 0000000000000..3898a62cae1b5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_round_3-5-3844531c8cff115d6a33636db0a26ad
@@ -0,0 +1 @@
+126.0	127.0	32766.0	32767.0
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat12-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/udf_rpad-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat12-0-66ee62178e3576fb38cb09800cb610bf
rename to sql/hive/src/test/resources/golden/udf_rpad-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_rpad-0-56de896c5fe8b40b22a9ed55ed79889c b/sql/hive/src/test/resources/golden/udf_rpad-0-56de896c5fe8b40b22a9ed55ed79889c
deleted file mode 100644
index 7cb2d71d4b80d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_rpad-0-56de896c5fe8b40b22a9ed55ed79889c
+++ /dev/null
@@ -1 +0,0 @@
-rpad(str, len, pad) - Returns str, right-padded with pad to a length of len
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_rpad-1-48d17e5d5d6188300d048f987fab2ca0 b/sql/hive/src/test/resources/golden/udf_rpad-1-48d17e5d5d6188300d048f987fab2ca0
deleted file mode 100644
index 2b198b8dc96c4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_rpad-1-48d17e5d5d6188300d048f987fab2ca0
+++ /dev/null
@@ -1,6 +0,0 @@
-rpad(str, len, pad) - Returns str, right-padded with pad to a length of len
-If str is longer than len, the return value is shortened to len characters.
-Example:
-  > SELECT rpad('hi', 5, '??') FROM src LIMIT 1;
-  'hi???'  > SELECT rpad('hi', 1, '??') FROM src LIMIT 1;
-  'h'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_rpad-1-56de896c5fe8b40b22a9ed55ed79889c b/sql/hive/src/test/resources/golden/udf_rpad-1-56de896c5fe8b40b22a9ed55ed79889c
new file mode 100644
index 0000000000000..f451030a3a142
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_rpad-1-56de896c5fe8b40b22a9ed55ed79889c
@@ -0,0 +1 @@
+rpad(str, len, pad) - Returns str, right-padded with pad to a length of len
diff --git a/sql/hive/src/test/resources/golden/udf_rpad-2-48835c7f49d78f8a6da2a90e11514b3 b/sql/hive/src/test/resources/golden/udf_rpad-2-48835c7f49d78f8a6da2a90e11514b3
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_rpad-2-48d17e5d5d6188300d048f987fab2ca0 b/sql/hive/src/test/resources/golden/udf_rpad-2-48d17e5d5d6188300d048f987fab2ca0
new file mode 100644
index 0000000000000..67dcf2427362d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_rpad-2-48d17e5d5d6188300d048f987fab2ca0
@@ -0,0 +1,6 @@
+rpad(str, len, pad) - Returns str, right-padded with pad to a length of len
+If str is longer than len, the return value is shortened to len characters.
+Example:
+  > SELECT rpad('hi', 5, '??') FROM src LIMIT 1;
+  'hi???'  > SELECT rpad('hi', 1, '??') FROM src LIMIT 1;
+  'h'
diff --git a/sql/hive/src/test/resources/golden/input37-0-86e2e274650fb56651607ea10d356fc0 b/sql/hive/src/test/resources/golden/udf_rpad-3-66acb969c28a8e376782ccd0d442b450
similarity index 100%
rename from sql/hive/src/test/resources/golden/input37-0-86e2e274650fb56651607ea10d356fc0
rename to sql/hive/src/test/resources/golden/udf_rpad-3-66acb969c28a8e376782ccd0d442b450
diff --git a/sql/hive/src/test/resources/golden/udf_rpad-3-dcad885650ea575ab1ac5cfeb76cf871 b/sql/hive/src/test/resources/golden/udf_rpad-3-dcad885650ea575ab1ac5cfeb76cf871
deleted file mode 100644
index b2645e365c8a9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_rpad-3-dcad885650ea575ab1ac5cfeb76cf871
+++ /dev/null
@@ -1 +0,0 @@
-h	hi...	hi1231
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_rpad-4-299dee5a72aad2a2738d7841a89bb71b b/sql/hive/src/test/resources/golden/udf_rpad-4-299dee5a72aad2a2738d7841a89bb71b
new file mode 100644
index 0000000000000..0d73ca82abf89
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_rpad-4-299dee5a72aad2a2738d7841a89bb71b
@@ -0,0 +1 @@
+h	hi...	hi1231
diff --git a/sql/hive/src/test/resources/golden/udf_rtrim-0-7acca21c725995febdf2a9c1fdf0535a b/sql/hive/src/test/resources/golden/udf_rtrim-0-7acca21c725995febdf2a9c1fdf0535a
index 94d6aeed8f533..d0d2416d7ee90 100644
--- a/sql/hive/src/test/resources/golden/udf_rtrim-0-7acca21c725995febdf2a9c1fdf0535a
+++ b/sql/hive/src/test/resources/golden/udf_rtrim-0-7acca21c725995febdf2a9c1fdf0535a
@@ -1 +1 @@
-rtrim(str) - Removes the trailing space characters from str 
\ No newline at end of file
+rtrim(str) - Removes the trailing space characters from str 
diff --git a/sql/hive/src/test/resources/golden/udf_rtrim-1-66d61255134c09d37cbfedd757ae47fd b/sql/hive/src/test/resources/golden/udf_rtrim-1-66d61255134c09d37cbfedd757ae47fd
index 957e608a7c732..697cdcc8198b3 100644
--- a/sql/hive/src/test/resources/golden/udf_rtrim-1-66d61255134c09d37cbfedd757ae47fd
+++ b/sql/hive/src/test/resources/golden/udf_rtrim-1-66d61255134c09d37cbfedd757ae47fd
@@ -1,4 +1,4 @@
 rtrim(str) - Removes the trailing space characters from str 
 Example:
   > SELECT rtrim('facebook   ') FROM src LIMIT 1;
-  'facebook'
\ No newline at end of file
+  'facebook'
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat13-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/udf_second-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat13-0-66ee62178e3576fb38cb09800cb610bf
rename to sql/hive/src/test/resources/golden/udf_second-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_second-0-e004a6f20da3fa6db868ec847b217ff2 b/sql/hive/src/test/resources/golden/udf_second-0-e004a6f20da3fa6db868ec847b217ff2
deleted file mode 100644
index 577c90254cb5a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_second-0-e004a6f20da3fa6db868ec847b217ff2
+++ /dev/null
@@ -1 +0,0 @@
-second(date) - Returns the second of date
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_second-1-3525f55f4f13253c42b3abaa53d77888 b/sql/hive/src/test/resources/golden/udf_second-1-3525f55f4f13253c42b3abaa53d77888
deleted file mode 100644
index 4b48294e5b9ad..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_second-1-3525f55f4f13253c42b3abaa53d77888
+++ /dev/null
@@ -1,7 +0,0 @@
-second(date) - Returns the second of date
-date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'.
-Example:
-   > SELECT second('2009-07-30 12:58:59') FROM src LIMIT 1;
-  59
-  > SELECT second('12:58:59') FROM src LIMIT 1;
-  59
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_second-1-e004a6f20da3fa6db868ec847b217ff2 b/sql/hive/src/test/resources/golden/udf_second-1-e004a6f20da3fa6db868ec847b217ff2
new file mode 100644
index 0000000000000..ad2b24b8eb11c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_second-1-e004a6f20da3fa6db868ec847b217ff2
@@ -0,0 +1 @@
+second(date) - Returns the second of date
diff --git a/sql/hive/src/test/resources/golden/udf_second-2-3525f55f4f13253c42b3abaa53d77888 b/sql/hive/src/test/resources/golden/udf_second-2-3525f55f4f13253c42b3abaa53d77888
new file mode 100644
index 0000000000000..9dc38c3e79129
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_second-2-3525f55f4f13253c42b3abaa53d77888
@@ -0,0 +1,7 @@
+second(date) - Returns the second of date
+date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'.
+Example:
+   > SELECT second('2009-07-30 12:58:59') FROM src LIMIT 1;
+  59
+  > SELECT second('12:58:59') FROM src LIMIT 1;
+  59
diff --git a/sql/hive/src/test/resources/golden/udf_second-2-d678372e3837a16be245d2e33482f17f b/sql/hive/src/test/resources/golden/udf_second-2-d678372e3837a16be245d2e33482f17f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_second-3-2496e4d3c64ca028184431c2930d82cf b/sql/hive/src/test/resources/golden/udf_second-3-2496e4d3c64ca028184431c2930d82cf
deleted file mode 100644
index 4d5ef5cf4a699..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_second-3-2496e4d3c64ca028184431c2930d82cf
+++ /dev/null
@@ -1 +0,0 @@
-15	15	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input37-1-6bb557a5cfe7ceaa2f749494ea32b9e3 b/sql/hive/src/test/resources/golden/udf_second-3-d678372e3837a16be245d2e33482f17f
similarity index 100%
rename from sql/hive/src/test/resources/golden/input37-1-6bb557a5cfe7ceaa2f749494ea32b9e3
rename to sql/hive/src/test/resources/golden/udf_second-3-d678372e3837a16be245d2e33482f17f
diff --git a/sql/hive/src/test/resources/golden/udf_second-4-2496e4d3c64ca028184431c2930d82cf b/sql/hive/src/test/resources/golden/udf_second-4-2496e4d3c64ca028184431c2930d82cf
new file mode 100644
index 0000000000000..3ddfab9c754f4
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_second-4-2496e4d3c64ca028184431c2930d82cf
@@ -0,0 +1 @@
+15	15	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-0-ec0dad44fa033691a731f6e4c6b5cf7f b/sql/hive/src/test/resources/golden/udf_sentences-0-ec0dad44fa033691a731f6e4c6b5cf7f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-1-2dc07f4f0f0cb20d08c424e067ed8f69 b/sql/hive/src/test/resources/golden/udf_sentences-1-2dc07f4f0f0cb20d08c424e067ed8f69
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-10-ce188a53f69129c14cbf378d2c3f6630 b/sql/hive/src/test/resources/golden/udf_sentences-10-ce188a53f69129c14cbf378d2c3f6630
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-11-3c8672506e23434187caf4e0064a8a80 b/sql/hive/src/test/resources/golden/udf_sentences-11-3c8672506e23434187caf4e0064a8a80
deleted file mode 100644
index fea4f860c8465..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sentences-11-3c8672506e23434187caf4e0064a8a80
+++ /dev/null
@@ -1,31 +0,0 @@
-41626672616765
-446174656E
-48697665
-496E74657270756E6B74696F6E
-4D756C7469706C65
-53C3A4747A65
-554446
-5665727765636873656C756E67
-5765726B7A657567
-616C73
-61757367657A656963686E65746573
-646965
-646965
-6469657365
-646F6368
-65696E
-66756E6B74696F6E69657274
-66C3BC72
-676562696C646574656E
-696D6D6572
-697374
-697374
-6D61736368696E656C6C65
-6E6F6368
-7363686C65636874
-756E64
-756E64
-7669656C6C6569636874
-7669656C7365697469676572
-766F6E
-C39C6265727365747A756E67
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-12-d55c04a079ca97402509868f24921685 b/sql/hive/src/test/resources/golden/udf_sentences-12-d55c04a079ca97402509868f24921685
deleted file mode 100644
index c49ca6bb5a1b0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sentences-12-d55c04a079ca97402509868f24921685
+++ /dev/null
@@ -1 +0,0 @@
-[["Hive","is","an","excellent","tool","for","data","querying","and","perhaps","more","versatile","than","machine","translation"],["Multiple","ill-formed","sentences","confounding","punctuation","and","yet","this","UDF","still","works"]]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-2-1f218343f90e698fb9ed81c4f51d3d14 b/sql/hive/src/test/resources/golden/udf_sentences-2-1f218343f90e698fb9ed81c4f51d3d14
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-3-ce188a53f69129c14cbf378d2c3f6630 b/sql/hive/src/test/resources/golden/udf_sentences-3-ce188a53f69129c14cbf378d2c3f6630
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-4-3c8672506e23434187caf4e0064a8a80 b/sql/hive/src/test/resources/golden/udf_sentences-4-3c8672506e23434187caf4e0064a8a80
deleted file mode 100644
index b798628e56686..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sentences-4-3c8672506e23434187caf4e0064a8a80
+++ /dev/null
@@ -1,50 +0,0 @@
-48697665
-48697665
-554446
-6175746F6D617469717565
-6175746F6D617469717565
-6365
-636F6E667573696F6E
-6465
-6465
-646573
-646F6E6EC3A96573
-646F6E6EC3A96573
-656E636F7265
-657374
-657374
-6574
-6574
-6574
-657863656C6C656E74
-657863656C6C656E74
-666F6E6374696F6E6E65
-666F726DC3A96573
-6C61
-6C61
-6C61
-6C6573
-6C6573
-6D616C
-6D756C7469706C6573
-6F7574696C
-6F7574696C
-706575742DC3AA747265
-706575742DC3AA747265
-70687261736573
-706C7573
-706C7573
-706F6C7976616C656E74
-706F6C7976616C656E74
-706F6E6374756174696F6E
-706F7572
-706F7572
-706F757274616E74
-717565
-717565
-72657175C3AA746573
-72657175C3AA746573
-74726164756374696F6E
-74726164756374696F6E
-756E
-756E
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-5-60823f4912be26bee1a0b52a0a9588a9 b/sql/hive/src/test/resources/golden/udf_sentences-5-60823f4912be26bee1a0b52a0a9588a9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-6-27b7eeae683a87507f35e61fd4ce67de b/sql/hive/src/test/resources/golden/udf_sentences-6-27b7eeae683a87507f35e61fd4ce67de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-7-ec0dad44fa033691a731f6e4c6b5cf7f b/sql/hive/src/test/resources/golden/udf_sentences-7-ec0dad44fa033691a731f6e4c6b5cf7f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-8-2dc07f4f0f0cb20d08c424e067ed8f69 b/sql/hive/src/test/resources/golden/udf_sentences-8-2dc07f4f0f0cb20d08c424e067ed8f69
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sentences-9-68c61b4882802e416d5adaa2de440b59 b/sql/hive/src/test/resources/golden/udf_sentences-9-68c61b4882802e416d5adaa2de440b59
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sign-0-14f3c3034ac8039fc81681003bbf5e0e b/sql/hive/src/test/resources/golden/udf_sign-0-14f3c3034ac8039fc81681003bbf5e0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-0-e39f59c35ebbe686a18d45d9d8bf3ab0 b/sql/hive/src/test/resources/golden/udf_sign-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat14-0-e39f59c35ebbe686a18d45d9d8bf3ab0
rename to sql/hive/src/test/resources/golden/udf_sign-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/input39-0-7bd12162381231be9d578797818957a7 b/sql/hive/src/test/resources/golden/udf_sign-1-cddd6ec2a7dfc2f8f7e35bc39df541f9
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39-0-7bd12162381231be9d578797818957a7
rename to sql/hive/src/test/resources/golden/udf_sign-1-cddd6ec2a7dfc2f8f7e35bc39df541f9
diff --git a/sql/hive/src/test/resources/golden/udf_sign-1-e6cbbd93f7d4fa596ecb7ef44d12c159 b/sql/hive/src/test/resources/golden/udf_sign-1-e6cbbd93f7d4fa596ecb7ef44d12c159
deleted file mode 100644
index 171538eb0b00f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-1-e6cbbd93f7d4fa596ecb7ef44d12c159
+++ /dev/null
@@ -1 +0,0 @@
-0.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-10-9a5326b0bf612fed4ce0b04770bebc16 b/sql/hive/src/test/resources/golden/udf_sign-10-9a5326b0bf612fed4ce0b04770bebc16
new file mode 100644
index 0000000000000..d3827e75a5cad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-10-9a5326b0bf612fed4ce0b04770bebc16
@@ -0,0 +1 @@
+1.0
diff --git a/sql/hive/src/test/resources/golden/udf_sign-10-fc7341f89c3cd3c43e436242d8aa61fa b/sql/hive/src/test/resources/golden/udf_sign-10-fc7341f89c3cd3c43e436242d8aa61fa
deleted file mode 100644
index 3345fbb8f2c35..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-10-fc7341f89c3cd3c43e436242d8aa61fa
+++ /dev/null
@@ -1 +0,0 @@
-sign(x) - returns the sign of x )
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-11-74237f5ecc497813cf9738b21647847a b/sql/hive/src/test/resources/golden/udf_sign-11-74237f5ecc497813cf9738b21647847a
deleted file mode 100644
index bdacec8810e77..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-11-74237f5ecc497813cf9738b21647847a
+++ /dev/null
@@ -1,4 +0,0 @@
-sign(x) - returns the sign of x )
-Example:
-   > SELECT sign(40) FROM src LIMIT 1;
-  1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-11-fc7341f89c3cd3c43e436242d8aa61fa b/sql/hive/src/test/resources/golden/udf_sign-11-fc7341f89c3cd3c43e436242d8aa61fa
new file mode 100644
index 0000000000000..60533947bcfb9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-11-fc7341f89c3cd3c43e436242d8aa61fa
@@ -0,0 +1 @@
+sign(x) - returns the sign of x )
diff --git a/sql/hive/src/test/resources/golden/udf_sign-12-74237f5ecc497813cf9738b21647847a b/sql/hive/src/test/resources/golden/udf_sign-12-74237f5ecc497813cf9738b21647847a
new file mode 100644
index 0000000000000..d888f9e9cda19
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-12-74237f5ecc497813cf9738b21647847a
@@ -0,0 +1,4 @@
+sign(x) - returns the sign of x )
+Example:
+   > SELECT sign(40) FROM src LIMIT 1;
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_sign-2-85b743f0eed93904789cde4d1b5eafef b/sql/hive/src/test/resources/golden/udf_sign-2-85b743f0eed93904789cde4d1b5eafef
deleted file mode 100644
index 31a5b0b81dc51..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-2-85b743f0eed93904789cde4d1b5eafef
+++ /dev/null
@@ -1 +0,0 @@
--1.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-2-fba3eb5c16eca01b0c0f5918dbbffbc7 b/sql/hive/src/test/resources/golden/udf_sign-2-fba3eb5c16eca01b0c0f5918dbbffbc7
new file mode 100644
index 0000000000000..ba66466c2a0d0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-2-fba3eb5c16eca01b0c0f5918dbbffbc7
@@ -0,0 +1 @@
+0.0
diff --git a/sql/hive/src/test/resources/golden/udf_sign-3-9eeb3985359429abba5d1dd702c66b0d b/sql/hive/src/test/resources/golden/udf_sign-3-9eeb3985359429abba5d1dd702c66b0d
deleted file mode 100644
index 9f8e9b69a33f4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-3-9eeb3985359429abba5d1dd702c66b0d
+++ /dev/null
@@ -1 +0,0 @@
-1.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-3-d3e4489fc6873b5dbc4fe3e99ef13900 b/sql/hive/src/test/resources/golden/udf_sign-3-d3e4489fc6873b5dbc4fe3e99ef13900
new file mode 100644
index 0000000000000..18e16e38c5de6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-3-d3e4489fc6873b5dbc4fe3e99ef13900
@@ -0,0 +1 @@
+-1.0
diff --git a/sql/hive/src/test/resources/golden/udf_sign-4-9a5326b0bf612fed4ce0b04770bebc16 b/sql/hive/src/test/resources/golden/udf_sign-4-9a5326b0bf612fed4ce0b04770bebc16
new file mode 100644
index 0000000000000..d3827e75a5cad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-4-9a5326b0bf612fed4ce0b04770bebc16
@@ -0,0 +1 @@
+1.0
diff --git a/sql/hive/src/test/resources/golden/udf_sign-4-fc7341f89c3cd3c43e436242d8aa61fa b/sql/hive/src/test/resources/golden/udf_sign-4-fc7341f89c3cd3c43e436242d8aa61fa
deleted file mode 100644
index 3345fbb8f2c35..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-4-fc7341f89c3cd3c43e436242d8aa61fa
+++ /dev/null
@@ -1 +0,0 @@
-sign(x) - returns the sign of x )
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-5-74237f5ecc497813cf9738b21647847a b/sql/hive/src/test/resources/golden/udf_sign-5-74237f5ecc497813cf9738b21647847a
deleted file mode 100644
index bdacec8810e77..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-5-74237f5ecc497813cf9738b21647847a
+++ /dev/null
@@ -1,4 +0,0 @@
-sign(x) - returns the sign of x )
-Example:
-   > SELECT sign(40) FROM src LIMIT 1;
-  1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-5-fc7341f89c3cd3c43e436242d8aa61fa b/sql/hive/src/test/resources/golden/udf_sign-5-fc7341f89c3cd3c43e436242d8aa61fa
new file mode 100644
index 0000000000000..60533947bcfb9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-5-fc7341f89c3cd3c43e436242d8aa61fa
@@ -0,0 +1 @@
+sign(x) - returns the sign of x )
diff --git a/sql/hive/src/test/resources/golden/udf_sign-6-14f3c3034ac8039fc81681003bbf5e0e b/sql/hive/src/test/resources/golden/udf_sign-6-14f3c3034ac8039fc81681003bbf5e0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sign-6-74237f5ecc497813cf9738b21647847a b/sql/hive/src/test/resources/golden/udf_sign-6-74237f5ecc497813cf9738b21647847a
new file mode 100644
index 0000000000000..d888f9e9cda19
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-6-74237f5ecc497813cf9738b21647847a
@@ -0,0 +1,4 @@
+sign(x) - returns the sign of x )
+Example:
+   > SELECT sign(40) FROM src LIMIT 1;
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_sign-7-e6cbbd93f7d4fa596ecb7ef44d12c159 b/sql/hive/src/test/resources/golden/udf_sign-7-e6cbbd93f7d4fa596ecb7ef44d12c159
deleted file mode 100644
index 171538eb0b00f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-7-e6cbbd93f7d4fa596ecb7ef44d12c159
+++ /dev/null
@@ -1 +0,0 @@
-0.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-1-92a6ee4486a9fc8fc7bc567e42b9e2a3 b/sql/hive/src/test/resources/golden/udf_sign-7-ed2aaa1a416c0cccc04de970424e1860
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39-1-92a6ee4486a9fc8fc7bc567e42b9e2a3
rename to sql/hive/src/test/resources/golden/udf_sign-7-ed2aaa1a416c0cccc04de970424e1860
diff --git a/sql/hive/src/test/resources/golden/udf_sign-8-85b743f0eed93904789cde4d1b5eafef b/sql/hive/src/test/resources/golden/udf_sign-8-85b743f0eed93904789cde4d1b5eafef
deleted file mode 100644
index 31a5b0b81dc51..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-8-85b743f0eed93904789cde4d1b5eafef
+++ /dev/null
@@ -1 +0,0 @@
--1.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-8-fba3eb5c16eca01b0c0f5918dbbffbc7 b/sql/hive/src/test/resources/golden/udf_sign-8-fba3eb5c16eca01b0c0f5918dbbffbc7
new file mode 100644
index 0000000000000..ba66466c2a0d0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-8-fba3eb5c16eca01b0c0f5918dbbffbc7
@@ -0,0 +1 @@
+0.0
diff --git a/sql/hive/src/test/resources/golden/udf_sign-9-9eeb3985359429abba5d1dd702c66b0d b/sql/hive/src/test/resources/golden/udf_sign-9-9eeb3985359429abba5d1dd702c66b0d
deleted file mode 100644
index 9f8e9b69a33f4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sign-9-9eeb3985359429abba5d1dd702c66b0d
+++ /dev/null
@@ -1 +0,0 @@
-1.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sign-9-d3e4489fc6873b5dbc4fe3e99ef13900 b/sql/hive/src/test/resources/golden/udf_sign-9-d3e4489fc6873b5dbc4fe3e99ef13900
new file mode 100644
index 0000000000000..18e16e38c5de6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sign-9-d3e4489fc6873b5dbc4fe3e99ef13900
@@ -0,0 +1 @@
+-1.0
diff --git a/sql/hive/src/test/resources/golden/udf_sin-0-40b50393869eb0bcde66e36fe41078ee b/sql/hive/src/test/resources/golden/udf_sin-0-40b50393869eb0bcde66e36fe41078ee
deleted file mode 100644
index 6155d9a2eae55..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sin-0-40b50393869eb0bcde66e36fe41078ee
+++ /dev/null
@@ -1 +0,0 @@
-sin(x) - returns the sine of x (x is in radians)
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-1-ffe97dc8c1df3195982e38263fbe8717 b/sql/hive/src/test/resources/golden/udf_sin-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat14-1-ffe97dc8c1df3195982e38263fbe8717
rename to sql/hive/src/test/resources/golden/udf_sin-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_sin-1-2f867f432fb322e21dce353d7eb50c63 b/sql/hive/src/test/resources/golden/udf_sin-1-2f867f432fb322e21dce353d7eb50c63
deleted file mode 100644
index a39ed8840b916..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sin-1-2f867f432fb322e21dce353d7eb50c63
+++ /dev/null
@@ -1,4 +0,0 @@
-sin(x) - returns the sine of x (x is in radians)
-Example:
-   > SELECT sin(0) FROM src LIMIT 1;
-  0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sin-1-40b50393869eb0bcde66e36fe41078ee b/sql/hive/src/test/resources/golden/udf_sin-1-40b50393869eb0bcde66e36fe41078ee
new file mode 100644
index 0000000000000..86bbf99dbf067
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sin-1-40b50393869eb0bcde66e36fe41078ee
@@ -0,0 +1 @@
+sin(x) - returns the sine of x (x is in radians)
diff --git a/sql/hive/src/test/resources/golden/udf_sin-2-2f867f432fb322e21dce353d7eb50c63 b/sql/hive/src/test/resources/golden/udf_sin-2-2f867f432fb322e21dce353d7eb50c63
new file mode 100644
index 0000000000000..0efbed397abf9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sin-2-2f867f432fb322e21dce353d7eb50c63
@@ -0,0 +1,4 @@
+sin(x) - returns the sine of x (x is in radians)
+Example:
+   > SELECT sin(0) FROM src LIMIT 1;
+  0
diff --git a/sql/hive/src/test/resources/golden/udf_sin-2-3f8e9d5a3e69f39a99090840be2b5cab b/sql/hive/src/test/resources/golden/udf_sin-2-3f8e9d5a3e69f39a99090840be2b5cab
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sin-2-3f8e9d5a3e69f39a99090840be2b5cab
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-0-8a246f07cfa1a8e96a2bf0600f8bd95a b/sql/hive/src/test/resources/golden/udf_sin-3-1d3a615e3aa252a317daa601811820b1
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-0-8a246f07cfa1a8e96a2bf0600f8bd95a
rename to sql/hive/src/test/resources/golden/udf_sin-3-1d3a615e3aa252a317daa601811820b1
diff --git a/sql/hive/src/test/resources/golden/udf_sin-3-e3b2b18e530eb504ea5017ca348f12e3 b/sql/hive/src/test/resources/golden/udf_sin-3-e3b2b18e530eb504ea5017ca348f12e3
deleted file mode 100644
index ee21925b0ccc5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sin-3-e3b2b18e530eb504ea5017ca348f12e3
+++ /dev/null
@@ -1 +0,0 @@
-0.8304973704919705	0.9999996829318346	-0.479425538604203
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sin-4-951fb8d311f52ab59d3bacd37d3e611a b/sql/hive/src/test/resources/golden/udf_sin-4-951fb8d311f52ab59d3bacd37d3e611a
new file mode 100644
index 0000000000000..4ca4af756468c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sin-4-951fb8d311f52ab59d3bacd37d3e611a
@@ -0,0 +1 @@
+0.8304973704919705	0.9999996829318346	-0.479425538604203
diff --git a/sql/hive/src/test/resources/golden/udf_size-0-9c49a52514b1b940a7e8bb93c35eda62 b/sql/hive/src/test/resources/golden/udf_size-0-9c49a52514b1b940a7e8bb93c35eda62
deleted file mode 100644
index d8952629349a9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_size-0-9c49a52514b1b940a7e8bb93c35eda62
+++ /dev/null
@@ -1 +0,0 @@
-size(a) - Returns the size of a
diff --git a/sql/hive/src/test/resources/golden/udf_size-1-3608160636eaa7e053171bdcefc0b1a8 b/sql/hive/src/test/resources/golden/udf_size-1-3608160636eaa7e053171bdcefc0b1a8
deleted file mode 100644
index d8952629349a9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_size-1-3608160636eaa7e053171bdcefc0b1a8
+++ /dev/null
@@ -1 +0,0 @@
-size(a) - Returns the size of a
diff --git a/sql/hive/src/test/resources/golden/udf_size-2-96d41c9f054662827d1b6b63f5dd8db7 b/sql/hive/src/test/resources/golden/udf_size-2-96d41c9f054662827d1b6b63f5dd8db7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_size-3-63df892215b6ce189d6c3e20cfc4bdbd b/sql/hive/src/test/resources/golden/udf_size-3-63df892215b6ce189d6c3e20cfc4bdbd
deleted file mode 100644
index 08a708b090282..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_size-3-63df892215b6ce189d6c3e20cfc4bdbd
+++ /dev/null
@@ -1 +0,0 @@
-3	1	1	-1
diff --git a/sql/hive/src/test/resources/golden/udf_smallint-0-f28e857ef74c967303855c21dc60c042 b/sql/hive/src/test/resources/golden/udf_smallint-0-f28e857ef74c967303855c21dc60c042
index 572ecb0462eb7..132d6138e4983 100644
--- a/sql/hive/src/test/resources/golden/udf_smallint-0-f28e857ef74c967303855c21dc60c042
+++ b/sql/hive/src/test/resources/golden/udf_smallint-0-f28e857ef74c967303855c21dc60c042
@@ -1 +1 @@
-There is no documentation for function 'smallint'
\ No newline at end of file
+There is no documentation for function 'smallint'
diff --git a/sql/hive/src/test/resources/golden/udf_smallint-1-37d8db74267ae370d6a076b3057c5ed6 b/sql/hive/src/test/resources/golden/udf_smallint-1-37d8db74267ae370d6a076b3057c5ed6
index 572ecb0462eb7..132d6138e4983 100644
--- a/sql/hive/src/test/resources/golden/udf_smallint-1-37d8db74267ae370d6a076b3057c5ed6
+++ b/sql/hive/src/test/resources/golden/udf_smallint-1-37d8db74267ae370d6a076b3057c5ed6
@@ -1 +1 @@
-There is no documentation for function 'smallint'
\ No newline at end of file
+There is no documentation for function 'smallint'
diff --git a/sql/hive/src/test/resources/golden/udf_sort_array-10-9e047718e5fea6ea79124f1e899f1c13 b/sql/hive/src/test/resources/golden/udf_sort_array-10-9e047718e5fea6ea79124f1e899f1c13
new file mode 100644
index 0000000000000..9d33cd51fef04
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_sort_array-10-9e047718e5fea6ea79124f1e899f1c13
@@ -0,0 +1 @@
+[1,2,3,4,5]	[1,2,7,8,9]	[4,8,16,32,64]	[1,100,246,357,1000]	[false,true]	[1.414,1.618,2.718,3.141]	[1.41421,1.61803,2.71828,3.14159]	["","aramis","athos","portos"]	["1970-01-05 13:51:04.042","1970-01-07 00:54:54.442","1970-01-16 12:50:35.242"]
diff --git a/sql/hive/src/test/resources/golden/udf_sort_array-4-3edb0151fae0622cb79cd04156cb4c44 b/sql/hive/src/test/resources/golden/udf_sort_array-4-3edb0151fae0622cb79cd04156cb4c44
deleted file mode 100644
index 2e9458debfd0f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sort_array-4-3edb0151fae0622cb79cd04156cb4c44
+++ /dev/null
@@ -1 +0,0 @@
-["a","b","c","d","e","f","g"]
diff --git a/sql/hive/src/test/resources/golden/udf_sort_array-6-f754ac1296d16e168abb3d0ebcc35bd3 b/sql/hive/src/test/resources/golden/udf_sort_array-6-f754ac1296d16e168abb3d0ebcc35bd3
deleted file mode 100644
index e1968ef44a2eb..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sort_array-6-f754ac1296d16e168abb3d0ebcc35bd3
+++ /dev/null
@@ -1 +0,0 @@
-[1,2,3,4,5,6,7,8,9]
diff --git a/sql/hive/src/test/resources/golden/udf_sort_array-7-a9c52a8684cefc095470f5a93f63d2a8 b/sql/hive/src/test/resources/golden/udf_sort_array-7-a9c52a8684cefc095470f5a93f63d2a8
deleted file mode 100644
index b6c12ace4162b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_sort_array-7-a9c52a8684cefc095470f5a93f63d2a8
+++ /dev/null
@@ -1 +0,0 @@
-[-3.445,0.777,1.0,1.325,2.003,2.333,9.0]
diff --git a/sql/hive/src/test/resources/golden/udf_sort_array-8-d79f0084177230a7a2845c4791c22d25 b/sql/hive/src/test/resources/golden/udf_sort_array-8-d79f0084177230a7a2845c4791c22d25
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_sort_array-9-45ef2679e195a269878527d5f264488a b/sql/hive/src/test/resources/golden/udf_sort_array-9-45ef2679e195a269878527d5f264488a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-13-3b0f76816be2c1b18a2058027a19bc9f b/sql/hive/src/test/resources/golden/udf_space-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat14-13-3b0f76816be2c1b18a2058027a19bc9f
rename to sql/hive/src/test/resources/golden/udf_space-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_space-0-91e879c9f01d90eab7bf12fcef256010 b/sql/hive/src/test/resources/golden/udf_space-0-91e879c9f01d90eab7bf12fcef256010
deleted file mode 100644
index ed5bda787df23..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_space-0-91e879c9f01d90eab7bf12fcef256010
+++ /dev/null
@@ -1 +0,0 @@
-space(n) - returns n spaces
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_space-1-91e879c9f01d90eab7bf12fcef256010 b/sql/hive/src/test/resources/golden/udf_space-1-91e879c9f01d90eab7bf12fcef256010
new file mode 100644
index 0000000000000..a443bc3cbf0bd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_space-1-91e879c9f01d90eab7bf12fcef256010
@@ -0,0 +1 @@
+space(n) - returns n spaces
diff --git a/sql/hive/src/test/resources/golden/udf_space-1-e4eaf5e96807e122548cb43be9a26754 b/sql/hive/src/test/resources/golden/udf_space-1-e4eaf5e96807e122548cb43be9a26754
deleted file mode 100644
index 5713d4b0464be..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_space-1-e4eaf5e96807e122548cb43be9a26754
+++ /dev/null
@@ -1,4 +0,0 @@
-space(n) - returns n spaces
-Example:
-   > SELECT space(2) FROM src LIMIT 1;
-  '  '
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_space-2-a23a06eef37709e8587647a74bbfa7e8 b/sql/hive/src/test/resources/golden/udf_space-2-a23a06eef37709e8587647a74bbfa7e8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_space-2-e4eaf5e96807e122548cb43be9a26754 b/sql/hive/src/test/resources/golden/udf_space-2-e4eaf5e96807e122548cb43be9a26754
new file mode 100644
index 0000000000000..25a7583ef01e9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_space-2-e4eaf5e96807e122548cb43be9a26754
@@ -0,0 +1,4 @@
+space(n) - returns n spaces
+Example:
+   > SELECT space(2) FROM src LIMIT 1;
+  '  '
diff --git a/sql/hive/src/test/resources/golden/udf_space-3-59903e27d8188d6209e007ff643d5956 b/sql/hive/src/test/resources/golden/udf_space-3-59903e27d8188d6209e007ff643d5956
deleted file mode 100644
index 85a16b2abe5ef..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_space-3-59903e27d8188d6209e007ff643d5956
+++ /dev/null
@@ -1 +0,0 @@
-10	0	1	0	0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39-10-28bf1b34c04a048da339eddd4c1fd779 b/sql/hive/src/test/resources/golden/udf_space-3-a1b9dad63547f7ba73a5230d650983b0
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39-10-28bf1b34c04a048da339eddd4c1fd779
rename to sql/hive/src/test/resources/golden/udf_space-3-a1b9dad63547f7ba73a5230d650983b0
diff --git a/sql/hive/src/test/resources/golden/udf_space-4-7adb35ad867ba3e980d937a0038ac1a5 b/sql/hive/src/test/resources/golden/udf_space-4-7adb35ad867ba3e980d937a0038ac1a5
deleted file mode 100644
index 8dfaf2745f666..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_space-4-7adb35ad867ba3e980d937a0038ac1a5
+++ /dev/null
@@ -1 +0,0 @@
-          		 		
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_space-4-d9de5746edd753507c3f054e1bee7702 b/sql/hive/src/test/resources/golden/udf_space-4-d9de5746edd753507c3f054e1bee7702
new file mode 100644
index 0000000000000..6f07be9b1d043
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_space-4-d9de5746edd753507c3f054e1bee7702
@@ -0,0 +1 @@
+10	0	1	0	0
diff --git a/sql/hive/src/test/resources/golden/udf_space-5-ce5288dcc60f9412109930bd56752a65 b/sql/hive/src/test/resources/golden/udf_space-5-ce5288dcc60f9412109930bd56752a65
new file mode 100644
index 0000000000000..8f243e851f12f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_space-5-ce5288dcc60f9412109930bd56752a65
@@ -0,0 +1 @@
+          		 		
diff --git a/sql/hive/src/test/resources/golden/udf_split-0-7accac7fc71ba74d61c01a69d3978338 b/sql/hive/src/test/resources/golden/udf_split-0-7accac7fc71ba74d61c01a69d3978338
deleted file mode 100644
index 6b183ccfb17b4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_split-0-7accac7fc71ba74d61c01a69d3978338
+++ /dev/null
@@ -1 +0,0 @@
-split(str, regex) - Splits str around occurances that match regex
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_split-1-ebe303dfc3db504deffe5c355efd0fd1 b/sql/hive/src/test/resources/golden/udf_split-1-ebe303dfc3db504deffe5c355efd0fd1
deleted file mode 100644
index 9e5c522da155b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_split-1-ebe303dfc3db504deffe5c355efd0fd1
+++ /dev/null
@@ -1,4 +0,0 @@
-split(str, regex) - Splits str around occurances that match regex
-Example:
-  > SELECT split('oneAtwoBthreeC', '[ABC]') FROM src LIMIT 1;
-  ["one", "two", "three"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_split-2-7bba11f8fc359f2d4b863cda11c643f9 b/sql/hive/src/test/resources/golden/udf_split-2-7bba11f8fc359f2d4b863cda11c643f9
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_split-3-c08ccb3ccd2bdc89c5c3745b63305f23 b/sql/hive/src/test/resources/golden/udf_split-3-c08ccb3ccd2bdc89c5c3745b63305f23
deleted file mode 100644
index 9174f1a92557e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_split-3-c08ccb3ccd2bdc89c5c3745b63305f23
+++ /dev/null
@@ -1 +0,0 @@
-["a","b","c"]	["one","two","three"]	[]	["5","4","1","2"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_sqrt-0-d5e4a818c2b3255ef0e32876cd8ed240 b/sql/hive/src/test/resources/golden/udf_sqrt-0-d5e4a818c2b3255ef0e32876cd8ed240
index eadd6eeb8b149..01efc31b30ede 100644
--- a/sql/hive/src/test/resources/golden/udf_sqrt-0-d5e4a818c2b3255ef0e32876cd8ed240
+++ b/sql/hive/src/test/resources/golden/udf_sqrt-0-d5e4a818c2b3255ef0e32876cd8ed240
@@ -1 +1 @@
-sqrt(x) - returns the square root of x
\ No newline at end of file
+sqrt(x) - returns the square root of x
diff --git a/sql/hive/src/test/resources/golden/udf_sqrt-1-323cb8b7f5f2c93bdb1b5aed77c08e77 b/sql/hive/src/test/resources/golden/udf_sqrt-1-323cb8b7f5f2c93bdb1b5aed77c08e77
index 74df32f221278..16b77c4130fb6 100644
--- a/sql/hive/src/test/resources/golden/udf_sqrt-1-323cb8b7f5f2c93bdb1b5aed77c08e77
+++ b/sql/hive/src/test/resources/golden/udf_sqrt-1-323cb8b7f5f2c93bdb1b5aed77c08e77
@@ -1,4 +1,4 @@
 sqrt(x) - returns the square root of x
 Example:
    > SELECT sqrt(4) FROM src LIMIT 1;
-  2
\ No newline at end of file
+  2
diff --git a/sql/hive/src/test/resources/golden/udf_std-0-e3613484de2b3fa707995720ec3f8a5b b/sql/hive/src/test/resources/golden/udf_std-0-e3613484de2b3fa707995720ec3f8a5b
index 2966c01c1b7b2..5cedcfd415c88 100644
--- a/sql/hive/src/test/resources/golden/udf_std-0-e3613484de2b3fa707995720ec3f8a5b
+++ b/sql/hive/src/test/resources/golden/udf_std-0-e3613484de2b3fa707995720ec3f8a5b
@@ -1 +1 @@
-std(x) - Returns the standard deviation of a set of numbers
\ No newline at end of file
+std(x) - Returns the standard deviation of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_std-1-6759bde0e50a3607b7c3fd5a93cbd027 b/sql/hive/src/test/resources/golden/udf_std-1-6759bde0e50a3607b7c3fd5a93cbd027
index 388eaa1c7439a..d54ebfbd6fb1a 100644
--- a/sql/hive/src/test/resources/golden/udf_std-1-6759bde0e50a3607b7c3fd5a93cbd027
+++ b/sql/hive/src/test/resources/golden/udf_std-1-6759bde0e50a3607b7c3fd5a93cbd027
@@ -1,2 +1,2 @@
 std(x) - Returns the standard deviation of a set of numbers
-Synonyms: stddev_pop, stddev
\ No newline at end of file
+Synonyms: stddev_pop, stddev
diff --git a/sql/hive/src/test/resources/golden/udf_stddev-0-ad7627185d89a60b83ce19966eddbc92 b/sql/hive/src/test/resources/golden/udf_stddev-0-ad7627185d89a60b83ce19966eddbc92
index 74c6e1eacc379..c8b11307792b3 100644
--- a/sql/hive/src/test/resources/golden/udf_stddev-0-ad7627185d89a60b83ce19966eddbc92
+++ b/sql/hive/src/test/resources/golden/udf_stddev-0-ad7627185d89a60b83ce19966eddbc92
@@ -1 +1 @@
-stddev(x) - Returns the standard deviation of a set of numbers
\ No newline at end of file
+stddev(x) - Returns the standard deviation of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_stddev-1-18e1d598820013453fad45852e1a303d b/sql/hive/src/test/resources/golden/udf_stddev-1-18e1d598820013453fad45852e1a303d
index 1c461b68c1440..5f674788180e8 100644
--- a/sql/hive/src/test/resources/golden/udf_stddev-1-18e1d598820013453fad45852e1a303d
+++ b/sql/hive/src/test/resources/golden/udf_stddev-1-18e1d598820013453fad45852e1a303d
@@ -1,2 +1,2 @@
 stddev(x) - Returns the standard deviation of a set of numbers
-Synonyms: stddev_pop, std
\ No newline at end of file
+Synonyms: stddev_pop, std
diff --git a/sql/hive/src/test/resources/golden/udf_stddev_pop-0-96788538f1f20eb879a1add4bb5f9d12 b/sql/hive/src/test/resources/golden/udf_stddev_pop-0-96788538f1f20eb879a1add4bb5f9d12
index 741771707f280..98b461bd09708 100644
--- a/sql/hive/src/test/resources/golden/udf_stddev_pop-0-96788538f1f20eb879a1add4bb5f9d12
+++ b/sql/hive/src/test/resources/golden/udf_stddev_pop-0-96788538f1f20eb879a1add4bb5f9d12
@@ -1 +1 @@
-Function 'udf_stddev_pop' does not exist.
\ No newline at end of file
+Function 'udf_stddev_pop' does not exist.
diff --git a/sql/hive/src/test/resources/golden/udf_stddev_pop-1-6286ef94de26050617bf69c17a3b4a10 b/sql/hive/src/test/resources/golden/udf_stddev_pop-1-6286ef94de26050617bf69c17a3b4a10
index 741771707f280..98b461bd09708 100644
--- a/sql/hive/src/test/resources/golden/udf_stddev_pop-1-6286ef94de26050617bf69c17a3b4a10
+++ b/sql/hive/src/test/resources/golden/udf_stddev_pop-1-6286ef94de26050617bf69c17a3b4a10
@@ -1 +1 @@
-Function 'udf_stddev_pop' does not exist.
\ No newline at end of file
+Function 'udf_stddev_pop' does not exist.
diff --git a/sql/hive/src/test/resources/golden/udf_stddev_samp-0-29e22949ef00f5ece1b5fd6bb1923a4 b/sql/hive/src/test/resources/golden/udf_stddev_samp-0-29e22949ef00f5ece1b5fd6bb1923a4
index c1fb091d2c425..cc3c37142270e 100644
--- a/sql/hive/src/test/resources/golden/udf_stddev_samp-0-29e22949ef00f5ece1b5fd6bb1923a4
+++ b/sql/hive/src/test/resources/golden/udf_stddev_samp-0-29e22949ef00f5ece1b5fd6bb1923a4
@@ -1 +1 @@
-stddev_samp(x) - Returns the sample standard deviation of a set of numbers
\ No newline at end of file
+stddev_samp(x) - Returns the sample standard deviation of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_stddev_samp-1-ac7c0f92fe3b72287df2c7a719280bc4 b/sql/hive/src/test/resources/golden/udf_stddev_samp-1-ac7c0f92fe3b72287df2c7a719280bc4
index c1fb091d2c425..cc3c37142270e 100644
--- a/sql/hive/src/test/resources/golden/udf_stddev_samp-1-ac7c0f92fe3b72287df2c7a719280bc4
+++ b/sql/hive/src/test/resources/golden/udf_stddev_samp-1-ac7c0f92fe3b72287df2c7a719280bc4
@@ -1 +1 @@
-stddev_samp(x) - Returns the sample standard deviation of a set of numbers
\ No newline at end of file
+stddev_samp(x) - Returns the sample standard deviation of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_stddev_samp-2-29e22949ef00f5ece1b5fd6bb1923a4 b/sql/hive/src/test/resources/golden/udf_stddev_samp-2-29e22949ef00f5ece1b5fd6bb1923a4
index c1fb091d2c425..cc3c37142270e 100644
--- a/sql/hive/src/test/resources/golden/udf_stddev_samp-2-29e22949ef00f5ece1b5fd6bb1923a4
+++ b/sql/hive/src/test/resources/golden/udf_stddev_samp-2-29e22949ef00f5ece1b5fd6bb1923a4
@@ -1 +1 @@
-stddev_samp(x) - Returns the sample standard deviation of a set of numbers
\ No newline at end of file
+stddev_samp(x) - Returns the sample standard deviation of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_stddev_samp-3-ac7c0f92fe3b72287df2c7a719280bc4 b/sql/hive/src/test/resources/golden/udf_stddev_samp-3-ac7c0f92fe3b72287df2c7a719280bc4
index c1fb091d2c425..cc3c37142270e 100644
--- a/sql/hive/src/test/resources/golden/udf_stddev_samp-3-ac7c0f92fe3b72287df2c7a719280bc4
+++ b/sql/hive/src/test/resources/golden/udf_stddev_samp-3-ac7c0f92fe3b72287df2c7a719280bc4
@@ -1 +1 @@
-stddev_samp(x) - Returns the sample standard deviation of a set of numbers
\ No newline at end of file
+stddev_samp(x) - Returns the sample standard deviation of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_string-0-17412ad1c1a827411caa7b5e891b6ac3 b/sql/hive/src/test/resources/golden/udf_string-0-17412ad1c1a827411caa7b5e891b6ac3
index c61c4e1135410..4568e35cb1a21 100644
--- a/sql/hive/src/test/resources/golden/udf_string-0-17412ad1c1a827411caa7b5e891b6ac3
+++ b/sql/hive/src/test/resources/golden/udf_string-0-17412ad1c1a827411caa7b5e891b6ac3
@@ -1 +1 @@
-There is no documentation for function 'string'
\ No newline at end of file
+There is no documentation for function 'string'
diff --git a/sql/hive/src/test/resources/golden/udf_string-1-53b00551846b7f8bb27874b3a466e68d b/sql/hive/src/test/resources/golden/udf_string-1-53b00551846b7f8bb27874b3a466e68d
index c61c4e1135410..4568e35cb1a21 100644
--- a/sql/hive/src/test/resources/golden/udf_string-1-53b00551846b7f8bb27874b3a466e68d
+++ b/sql/hive/src/test/resources/golden/udf_string-1-53b00551846b7f8bb27874b3a466e68d
@@ -1 +1 @@
-There is no documentation for function 'string'
\ No newline at end of file
+There is no documentation for function 'string'
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-14-b89ea2173180c8ae423d856f943e061f b/sql/hive/src/test/resources/golden/udf_struct-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat14-14-b89ea2173180c8ae423d856f943e061f
rename to sql/hive/src/test/resources/golden/udf_struct-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_struct-0-f41043b7d9f14fa5e998c90454c7bdb1 b/sql/hive/src/test/resources/golden/udf_struct-0-f41043b7d9f14fa5e998c90454c7bdb1
new file mode 100644
index 0000000000000..062cb1bc683b1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_struct-0-f41043b7d9f14fa5e998c90454c7bdb1
@@ -0,0 +1 @@
+struct(col1, col2, col3, ...) - Creates a struct with the given field values
diff --git a/sql/hive/src/test/resources/golden/udf_struct-1-8ccdb20153debdab789ea8ad0228e2eb b/sql/hive/src/test/resources/golden/udf_struct-1-8ccdb20153debdab789ea8ad0228e2eb
new file mode 100644
index 0000000000000..062cb1bc683b1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_struct-1-8ccdb20153debdab789ea8ad0228e2eb
@@ -0,0 +1 @@
+struct(col1, col2, col3, ...) - Creates a struct with the given field values
diff --git a/sql/hive/src/test/resources/golden/udf_struct-1-f41043b7d9f14fa5e998c90454c7bdb1 b/sql/hive/src/test/resources/golden/udf_struct-1-f41043b7d9f14fa5e998c90454c7bdb1
new file mode 100644
index 0000000000000..062cb1bc683b1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_struct-1-f41043b7d9f14fa5e998c90454c7bdb1
@@ -0,0 +1 @@
+struct(col1, col2, col3, ...) - Creates a struct with the given field values
diff --git a/sql/hive/src/test/resources/golden/input39-2-ee667c6ab003bb83f6bf1c72153eba39 b/sql/hive/src/test/resources/golden/udf_struct-2-4a62774a6de7571c8d2bcb77da63f8f3
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39-2-ee667c6ab003bb83f6bf1c72153eba39
rename to sql/hive/src/test/resources/golden/udf_struct-2-4a62774a6de7571c8d2bcb77da63f8f3
diff --git a/sql/hive/src/test/resources/golden/udf_struct-2-8ccdb20153debdab789ea8ad0228e2eb b/sql/hive/src/test/resources/golden/udf_struct-2-8ccdb20153debdab789ea8ad0228e2eb
new file mode 100644
index 0000000000000..062cb1bc683b1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_struct-2-8ccdb20153debdab789ea8ad0228e2eb
@@ -0,0 +1 @@
+struct(col1, col2, col3, ...) - Creates a struct with the given field values
diff --git a/sql/hive/src/test/resources/golden/input39-3-9e1699caf2caa347fa8ee6e9b6e7da6e b/sql/hive/src/test/resources/golden/udf_struct-3-71361a92b74c4d026ac7ae6e1e6746f1
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39-3-9e1699caf2caa347fa8ee6e9b6e7da6e
rename to sql/hive/src/test/resources/golden/udf_struct-3-71361a92b74c4d026ac7ae6e1e6746f1
diff --git a/sql/hive/src/test/resources/golden/udf_struct-3-abffdaacb0c7076ab538fbeec072daa2 b/sql/hive/src/test/resources/golden/udf_struct-3-abffdaacb0c7076ab538fbeec072daa2
new file mode 100644
index 0000000000000..ff1a28fa47f18
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_struct-3-abffdaacb0c7076ab538fbeec072daa2
@@ -0,0 +1 @@
+{"col1":1}	{"col1":1,"col2":"a"}	1	a
diff --git a/sql/hive/src/test/resources/golden/udf_struct-4-b196b5d8849d52bbe5e2ee683f29e051 b/sql/hive/src/test/resources/golden/udf_struct-4-b196b5d8849d52bbe5e2ee683f29e051
new file mode 100644
index 0000000000000..ff1a28fa47f18
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_struct-4-b196b5d8849d52bbe5e2ee683f29e051
@@ -0,0 +1 @@
+{"col1":1}	{"col1":1,"col2":"a"}	1	a
diff --git a/sql/hive/src/test/resources/golden/udf_substr-0-20fb50d79b45264548b953e37d837fcd b/sql/hive/src/test/resources/golden/udf_substr-0-20fb50d79b45264548b953e37d837fcd
deleted file mode 100644
index c4efdaebc8aab..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-0-20fb50d79b45264548b953e37d837fcd
+++ /dev/null
@@ -1 +0,0 @@
-substr(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstr(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
diff --git a/sql/hive/src/test/resources/golden/udf_substr-1-2af34de7fd584c5f1ead85b3891b0920 b/sql/hive/src/test/resources/golden/udf_substr-1-2af34de7fd584c5f1ead85b3891b0920
deleted file mode 100644
index 44958996269c9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-1-2af34de7fd584c5f1ead85b3891b0920
+++ /dev/null
@@ -1,10 +0,0 @@
-substr(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstr(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
-Synonyms: substring
-pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str.
-Example:
-   > SELECT substr('Facebook', 5) FROM src LIMIT 1;
-  'book'
-  > SELECT substr('Facebook', -5) FROM src LIMIT 1;
-  'ebook'
-  > SELECT substr('Facebook', 5, 1) FROM src LIMIT 1;
-  'b'
diff --git a/sql/hive/src/test/resources/golden/udf_substr-2-d0268ad73ab5d94af15d9e1703b424d1 b/sql/hive/src/test/resources/golden/udf_substr-2-d0268ad73ab5d94af15d9e1703b424d1
deleted file mode 100644
index 9516b839a3956..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-2-d0268ad73ab5d94af15d9e1703b424d1
+++ /dev/null
@@ -1 +0,0 @@
-NULL	NULL	NULL	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_substr-3-fe643fe7e2fd8b65e3ca0dbf6c98a122 b/sql/hive/src/test/resources/golden/udf_substr-3-fe643fe7e2fd8b65e3ca0dbf6c98a122
deleted file mode 100644
index 7f6c057b5eebd..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-3-fe643fe7e2fd8b65e3ca0dbf6c98a122
+++ /dev/null
@@ -1 +0,0 @@
-												
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_substr-4-f7933c0bb28e9a815555edfa3764524a b/sql/hive/src/test/resources/golden/udf_substr-4-f7933c0bb28e9a815555edfa3764524a
deleted file mode 100644
index 042ca15005869..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-4-f7933c0bb28e9a815555edfa3764524a
+++ /dev/null
@@ -1 +0,0 @@
-CDEF	CDEF	CDEFG	CDEFG	ABC	ABC	BC	C	ABC	BC	A	A	A
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_substr-5-4f29c1273b8010ce2d05882fc44793d8 b/sql/hive/src/test/resources/golden/udf_substr-5-4f29c1273b8010ce2d05882fc44793d8
deleted file mode 100644
index 85346dc923b4a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-5-4f29c1273b8010ce2d05882fc44793d8
+++ /dev/null
@@ -1 +0,0 @@
-A	AB	ABC	ABC	A	AB	ABC	ABC	B	BC	BC	BC	C	C	C	C	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_substr-6-ebbb2e30ccf4a2f3a9525325529a9fe6 b/sql/hive/src/test/resources/golden/udf_substr-6-ebbb2e30ccf4a2f3a9525325529a9fe6
deleted file mode 100644
index 9d607f4d84214..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-6-ebbb2e30ccf4a2f3a9525325529a9fe6
+++ /dev/null
@@ -1 +0,0 @@
-C	C	C	C	B	BC	BC	BC	A	AB	ABC	ABC	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_substr-7-1742c72a1a8a6c6fd3eb2449d4747496 b/sql/hive/src/test/resources/golden/udf_substr-7-1742c72a1a8a6c6fd3eb2449d4747496
deleted file mode 100644
index 042ca15005869..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-7-1742c72a1a8a6c6fd3eb2449d4747496
+++ /dev/null
@@ -1 +0,0 @@
-CDEF	CDEF	CDEFG	CDEFG	ABC	ABC	BC	C	ABC	BC	A	A	A
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_substr-8-ba6ca6bac87ca99aabd60b8e76537ade b/sql/hive/src/test/resources/golden/udf_substr-8-ba6ca6bac87ca99aabd60b8e76537ade
deleted file mode 100644
index c14d8d4279c5f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-8-ba6ca6bac87ca99aabd60b8e76537ade
+++ /dev/null
@@ -1 +0,0 @@
-NULL	NULL	NULL	NULL	NULL	A	AB	ABC	ABC	A	AB	ABC	ABC	B	BC	BC	BC	C	C	C	C		C	C	C	C	B	BC	BC	BC	A	AB	ABC	ABC	
diff --git a/sql/hive/src/test/resources/golden/udf_substr-9-a9aa82080133620d017160f6a644455d b/sql/hive/src/test/resources/golden/udf_substr-9-a9aa82080133620d017160f6a644455d
deleted file mode 100644
index 94a57d86c88fd..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_substr-9-a9aa82080133620d017160f6a644455d
+++ /dev/null
@@ -1 +0,0 @@
-玩	玩	玩玩玩 abc	玩玩玩
diff --git a/sql/hive/src/test/resources/golden/udf_substring-0-8297700b238f417dea2bd60ba72a6ece b/sql/hive/src/test/resources/golden/udf_substring-0-8297700b238f417dea2bd60ba72a6ece
index b0ade52e69a9d..6a2376d85e7bc 100644
--- a/sql/hive/src/test/resources/golden/udf_substring-0-8297700b238f417dea2bd60ba72a6ece
+++ b/sql/hive/src/test/resources/golden/udf_substring-0-8297700b238f417dea2bd60ba72a6ece
@@ -1 +1 @@
-substring(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstring(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
\ No newline at end of file
+substring(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstring(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
diff --git a/sql/hive/src/test/resources/golden/udf_substring-1-e5df65419ecd9e837dadfcdd7f9074f8 b/sql/hive/src/test/resources/golden/udf_substring-1-e5df65419ecd9e837dadfcdd7f9074f8
index 1d61dc4518087..32941e727ce38 100644
--- a/sql/hive/src/test/resources/golden/udf_substring-1-e5df65419ecd9e837dadfcdd7f9074f8
+++ b/sql/hive/src/test/resources/golden/udf_substring-1-e5df65419ecd9e837dadfcdd7f9074f8
@@ -7,4 +7,4 @@ Example:
   > SELECT substring('Facebook', -5) FROM src LIMIT 1;
   'ebook'
   > SELECT substring('Facebook', 5, 1) FROM src LIMIT 1;
-  'b'
\ No newline at end of file
+  'b'
diff --git a/sql/hive/src/test/resources/golden/udf_subtract-0-8f69db6aad14e23c9c32bf21bd0b3bf3 b/sql/hive/src/test/resources/golden/udf_subtract-0-8f69db6aad14e23c9c32bf21bd0b3bf3
index 0f4a4dce9dd3d..1ad8be1242f00 100644
--- a/sql/hive/src/test/resources/golden/udf_subtract-0-8f69db6aad14e23c9c32bf21bd0b3bf3
+++ b/sql/hive/src/test/resources/golden/udf_subtract-0-8f69db6aad14e23c9c32bf21bd0b3bf3
@@ -1 +1 @@
-a - b - Returns the difference a-b
\ No newline at end of file
+a - b - Returns the difference a-b
diff --git a/sql/hive/src/test/resources/golden/udf_subtract-1-b90eec030fee9cbd177f9615b782d722 b/sql/hive/src/test/resources/golden/udf_subtract-1-b90eec030fee9cbd177f9615b782d722
index 0f4a4dce9dd3d..1ad8be1242f00 100644
--- a/sql/hive/src/test/resources/golden/udf_subtract-1-b90eec030fee9cbd177f9615b782d722
+++ b/sql/hive/src/test/resources/golden/udf_subtract-1-b90eec030fee9cbd177f9615b782d722
@@ -1 +1 @@
-a - b - Returns the difference a-b
\ No newline at end of file
+a - b - Returns the difference a-b
diff --git a/sql/hive/src/test/resources/golden/udf_sum-0-d5b30a6a291025b1f9334ce8944dab31 b/sql/hive/src/test/resources/golden/udf_sum-0-d5b30a6a291025b1f9334ce8944dab31
index 744935026110c..3c8fbd8c4d900 100644
--- a/sql/hive/src/test/resources/golden/udf_sum-0-d5b30a6a291025b1f9334ce8944dab31
+++ b/sql/hive/src/test/resources/golden/udf_sum-0-d5b30a6a291025b1f9334ce8944dab31
@@ -1 +1 @@
-sum(x) - Returns the sum of a set of numbers
\ No newline at end of file
+sum(x) - Returns the sum of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_sum-1-ddae1a511d3371122ab79918be5b495b b/sql/hive/src/test/resources/golden/udf_sum-1-ddae1a511d3371122ab79918be5b495b
index 744935026110c..3c8fbd8c4d900 100644
--- a/sql/hive/src/test/resources/golden/udf_sum-1-ddae1a511d3371122ab79918be5b495b
+++ b/sql/hive/src/test/resources/golden/udf_sum-1-ddae1a511d3371122ab79918be5b495b
@@ -1 +1 @@
-sum(x) - Returns the sum of a set of numbers
\ No newline at end of file
+sum(x) - Returns the sum of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_sum-2-d5b30a6a291025b1f9334ce8944dab31 b/sql/hive/src/test/resources/golden/udf_sum-2-d5b30a6a291025b1f9334ce8944dab31
index 744935026110c..3c8fbd8c4d900 100644
--- a/sql/hive/src/test/resources/golden/udf_sum-2-d5b30a6a291025b1f9334ce8944dab31
+++ b/sql/hive/src/test/resources/golden/udf_sum-2-d5b30a6a291025b1f9334ce8944dab31
@@ -1 +1 @@
-sum(x) - Returns the sum of a set of numbers
\ No newline at end of file
+sum(x) - Returns the sum of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_sum-3-ddae1a511d3371122ab79918be5b495b b/sql/hive/src/test/resources/golden/udf_sum-3-ddae1a511d3371122ab79918be5b495b
index 744935026110c..3c8fbd8c4d900 100644
--- a/sql/hive/src/test/resources/golden/udf_sum-3-ddae1a511d3371122ab79918be5b495b
+++ b/sql/hive/src/test/resources/golden/udf_sum-3-ddae1a511d3371122ab79918be5b495b
@@ -1 +1 @@
-sum(x) - Returns the sum of a set of numbers
\ No newline at end of file
+sum(x) - Returns the sum of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-16-86473a0498e4361e4db0b4a22f2e8571 b/sql/hive/src/test/resources/golden/udf_tan-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat14-16-86473a0498e4361e4db0b4a22f2e8571
rename to sql/hive/src/test/resources/golden/udf_tan-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_tan-0-c21aa640b4edabf6613dd705d029c878 b/sql/hive/src/test/resources/golden/udf_tan-0-c21aa640b4edabf6613dd705d029c878
deleted file mode 100644
index c91072e69740b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_tan-0-c21aa640b4edabf6613dd705d029c878
+++ /dev/null
@@ -1 +0,0 @@
-tan(x) - returns the tangent of x (x is in radians)
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_tan-1-a8c8eaa832aa9a4345b2fb9cd5e1d505 b/sql/hive/src/test/resources/golden/udf_tan-1-a8c8eaa832aa9a4345b2fb9cd5e1d505
deleted file mode 100644
index 4cc0d466f00f4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_tan-1-a8c8eaa832aa9a4345b2fb9cd5e1d505
+++ /dev/null
@@ -1,4 +0,0 @@
-tan(x) - returns the tangent of x (x is in radians)
-Example:
-   > SELECT tan(0) FROM src LIMIT 1;
-  1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_tan-1-c21aa640b4edabf6613dd705d029c878 b/sql/hive/src/test/resources/golden/udf_tan-1-c21aa640b4edabf6613dd705d029c878
new file mode 100644
index 0000000000000..3ac5f19b2f2f3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_tan-1-c21aa640b4edabf6613dd705d029c878
@@ -0,0 +1 @@
+tan(x) - returns the tangent of x (x is in radians)
diff --git a/sql/hive/src/test/resources/golden/udf_tan-2-27a29c68f846824990e9e443ac95da85 b/sql/hive/src/test/resources/golden/udf_tan-2-27a29c68f846824990e9e443ac95da85
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_tan-2-27a29c68f846824990e9e443ac95da85
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_tan-2-a8c8eaa832aa9a4345b2fb9cd5e1d505 b/sql/hive/src/test/resources/golden/udf_tan-2-a8c8eaa832aa9a4345b2fb9cd5e1d505
new file mode 100644
index 0000000000000..f26ecfe4a29d3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_tan-2-a8c8eaa832aa9a4345b2fb9cd5e1d505
@@ -0,0 +1,4 @@
+tan(x) - returns the tangent of x (x is in radians)
+Example:
+   > SELECT tan(0) FROM src LIMIT 1;
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_tan-3-77bedd6c76bdc33428d13a894f468a97 b/sql/hive/src/test/resources/golden/udf_tan-3-77bedd6c76bdc33428d13a894f468a97
deleted file mode 100644
index 04ec33c11c3b4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_tan-3-77bedd6c76bdc33428d13a894f468a97
+++ /dev/null
@@ -1 +0,0 @@
-1.5574077246549023	-0.29100619138474915	-1.5574077246549023
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-0-a032eb8f819689a374852c20336d5cc b/sql/hive/src/test/resources/golden/udf_tan-3-8b46b68ff11c5fb05fb4fd7605895f0b
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_string-0-a032eb8f819689a374852c20336d5cc
rename to sql/hive/src/test/resources/golden/udf_tan-3-8b46b68ff11c5fb05fb4fd7605895f0b
diff --git a/sql/hive/src/test/resources/golden/udf_tan-4-769fde617744ccfaa29cefec81b8704c b/sql/hive/src/test/resources/golden/udf_tan-4-769fde617744ccfaa29cefec81b8704c
new file mode 100644
index 0000000000000..e9f2fa411fa33
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_tan-4-769fde617744ccfaa29cefec81b8704c
@@ -0,0 +1 @@
+1.5574077246549023	-0.29100619138474915	-1.5574077246549023
diff --git a/sql/hive/src/test/resources/golden/udf_tan-4-c21aa640b4edabf6613dd705d029c878 b/sql/hive/src/test/resources/golden/udf_tan-4-c21aa640b4edabf6613dd705d029c878
deleted file mode 100644
index c91072e69740b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_tan-4-c21aa640b4edabf6613dd705d029c878
+++ /dev/null
@@ -1 +0,0 @@
-tan(x) - returns the tangent of x (x is in radians)
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_tan-5-a8c8eaa832aa9a4345b2fb9cd5e1d505 b/sql/hive/src/test/resources/golden/udf_tan-5-a8c8eaa832aa9a4345b2fb9cd5e1d505
deleted file mode 100644
index 4cc0d466f00f4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_tan-5-a8c8eaa832aa9a4345b2fb9cd5e1d505
+++ /dev/null
@@ -1,4 +0,0 @@
-tan(x) - returns the tangent of x (x is in radians)
-Example:
-   > SELECT tan(0) FROM src LIMIT 1;
-  1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_tan-5-c21aa640b4edabf6613dd705d029c878 b/sql/hive/src/test/resources/golden/udf_tan-5-c21aa640b4edabf6613dd705d029c878
new file mode 100644
index 0000000000000..3ac5f19b2f2f3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_tan-5-c21aa640b4edabf6613dd705d029c878
@@ -0,0 +1 @@
+tan(x) - returns the tangent of x (x is in radians)
diff --git a/sql/hive/src/test/resources/golden/udf_tan-6-27a29c68f846824990e9e443ac95da85 b/sql/hive/src/test/resources/golden/udf_tan-6-27a29c68f846824990e9e443ac95da85
deleted file mode 100644
index fe3a0735d98b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_tan-6-27a29c68f846824990e9e443ac95da85
+++ /dev/null
@@ -1 +0,0 @@
-NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_tan-6-a8c8eaa832aa9a4345b2fb9cd5e1d505 b/sql/hive/src/test/resources/golden/udf_tan-6-a8c8eaa832aa9a4345b2fb9cd5e1d505
new file mode 100644
index 0000000000000..f26ecfe4a29d3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_tan-6-a8c8eaa832aa9a4345b2fb9cd5e1d505
@@ -0,0 +1,4 @@
+tan(x) - returns the tangent of x (x is in radians)
+Example:
+   > SELECT tan(0) FROM src LIMIT 1;
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_tan-7-77bedd6c76bdc33428d13a894f468a97 b/sql/hive/src/test/resources/golden/udf_tan-7-77bedd6c76bdc33428d13a894f468a97
deleted file mode 100644
index 04ec33c11c3b4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_tan-7-77bedd6c76bdc33428d13a894f468a97
+++ /dev/null
@@ -1 +0,0 @@
-1.5574077246549023	-0.29100619138474915	-1.5574077246549023
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_tan-7-8b46b68ff11c5fb05fb4fd7605895f0b b/sql/hive/src/test/resources/golden/udf_tan-7-8b46b68ff11c5fb05fb4fd7605895f0b
new file mode 100644
index 0000000000000..7951defec192a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_tan-7-8b46b68ff11c5fb05fb4fd7605895f0b
@@ -0,0 +1 @@
+NULL
diff --git a/sql/hive/src/test/resources/golden/udf_tan-8-769fde617744ccfaa29cefec81b8704c b/sql/hive/src/test/resources/golden/udf_tan-8-769fde617744ccfaa29cefec81b8704c
new file mode 100644
index 0000000000000..e9f2fa411fa33
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_tan-8-769fde617744ccfaa29cefec81b8704c
@@ -0,0 +1 @@
+1.5574077246549023	-0.29100619138474915	-1.5574077246549023
diff --git a/sql/hive/src/test/resources/golden/udf_testlength-0-b91edca0cfe8a8a8ff3b7a7fbc6e23ad b/sql/hive/src/test/resources/golden/udf_testlength-0-b91edca0cfe8a8a8ff3b7a7fbc6e23ad
deleted file mode 100644
index 89617d8af0085..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_testlength-0-b91edca0cfe8a8a8ff3b7a7fbc6e23ad
+++ /dev/null
@@ -1,10 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATEFUNCTION testlength 'org.apache.hadoop.hive.ql.udf.UDFTestLength')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-
-
diff --git a/sql/hive/src/test/resources/golden/udf_testlength2-0-3da4fe901124f2bbf3d02822652c4e55 b/sql/hive/src/test/resources/golden/udf_testlength2-0-3da4fe901124f2bbf3d02822652c4e55
deleted file mode 100644
index 6d967660b3d41..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_testlength2-0-3da4fe901124f2bbf3d02822652c4e55
+++ /dev/null
@@ -1,10 +0,0 @@
-ABSTRACT SYNTAX TREE:
-  (TOK_CREATEFUNCTION testlength2 'org.apache.hadoop.hive.ql.udf.UDFTestLength2')
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-
-
diff --git a/sql/hive/src/test/resources/golden/udf_tinyint-0-4a2b3d48ca445776d4a2d8c445f9633d b/sql/hive/src/test/resources/golden/udf_tinyint-0-4a2b3d48ca445776d4a2d8c445f9633d
index 1b13d42908539..898364b6d3e0b 100644
--- a/sql/hive/src/test/resources/golden/udf_tinyint-0-4a2b3d48ca445776d4a2d8c445f9633d
+++ b/sql/hive/src/test/resources/golden/udf_tinyint-0-4a2b3d48ca445776d4a2d8c445f9633d
@@ -1 +1 @@
-There is no documentation for function 'tinyint'
\ No newline at end of file
+There is no documentation for function 'tinyint'
diff --git a/sql/hive/src/test/resources/golden/udf_tinyint-1-417de1aeb44510aa9746729f9ff3b426 b/sql/hive/src/test/resources/golden/udf_tinyint-1-417de1aeb44510aa9746729f9ff3b426
index 1b13d42908539..898364b6d3e0b 100644
--- a/sql/hive/src/test/resources/golden/udf_tinyint-1-417de1aeb44510aa9746729f9ff3b426
+++ b/sql/hive/src/test/resources/golden/udf_tinyint-1-417de1aeb44510aa9746729f9ff3b426
@@ -1 +1 @@
-There is no documentation for function 'tinyint'
\ No newline at end of file
+There is no documentation for function 'tinyint'
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat14-2-7cccbdffc32975f8935eeba14a28147 b/sql/hive/src/test/resources/golden/udf_to_byte-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat14-2-7cccbdffc32975f8935eeba14a28147
rename to sql/hive/src/test/resources/golden/udf_to_byte-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-1-86bb7f03311e7ea8bda76f24412bc9f3 b/sql/hive/src/test/resources/golden/udf_to_byte-1-86bb7f03311e7ea8bda76f24412bc9f3
deleted file mode 100644
index d00491fd7e5bb..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_byte-1-86bb7f03311e7ea8bda76f24412bc9f3
+++ /dev/null
@@ -1 +0,0 @@
-1
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-1-94eb069fb446b7758f7e06386486bec9 b/sql/hive/src/test/resources/golden/udf_to_byte-1-94eb069fb446b7758f7e06386486bec9
new file mode 100644
index 0000000000000..7951defec192a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_byte-1-94eb069fb446b7758f7e06386486bec9
@@ -0,0 +1 @@
+NULL
diff --git a/sql/hive/src/test/resources/golden/input16_cc-2-9ea7bc94b5383e71adb678f1dac1c619 b/sql/hive/src/test/resources/golden/udf_to_byte-2-233102b562824cf38010868478e91e1
similarity index 100%
rename from sql/hive/src/test/resources/golden/input16_cc-2-9ea7bc94b5383e71adb678f1dac1c619
rename to sql/hive/src/test/resources/golden/udf_to_byte-2-233102b562824cf38010868478e91e1
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-2-108a5c01e12e8823f6affc3d1664a1fc b/sql/hive/src/test/resources/golden/udf_to_byte-3-5dc0e4c21764683d98700860d2c8ab31
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_byte-2-108a5c01e12e8823f6affc3d1664a1fc
rename to sql/hive/src/test/resources/golden/udf_to_byte-3-5dc0e4c21764683d98700860d2c8ab31
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-4-a8b51dcda7b83f0fb8fb05d572b17579 b/sql/hive/src/test/resources/golden/udf_to_byte-4-a8b51dcda7b83f0fb8fb05d572b17579
deleted file mode 100644
index 3a2e3f4984a0e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_byte-4-a8b51dcda7b83f0fb8fb05d572b17579
+++ /dev/null
@@ -1 +0,0 @@
--1
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-3-78e5f8f149031d91a1aa3ae3372a8b b/sql/hive/src/test/resources/golden/udf_to_byte-4-dafb27507b4d30fd2231680f9ea80c82
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_byte-3-78e5f8f149031d91a1aa3ae3372a8b
rename to sql/hive/src/test/resources/golden/udf_to_byte-4-dafb27507b4d30fd2231680f9ea80c82
diff --git a/sql/hive/src/test/resources/golden/exim_02_part-5-677ddd4b14eb6f19cfcf0c3d57f54e22 b/sql/hive/src/test/resources/golden/udf_to_byte-5-eb6600cd2260e8e75253e7844c0d7dc2
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_part-5-677ddd4b14eb6f19cfcf0c3d57f54e22
rename to sql/hive/src/test/resources/golden/udf_to_byte-5-eb6600cd2260e8e75253e7844c0d7dc2
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-5-2adc360a8c6a9f4eac05f186ced2d67 b/sql/hive/src/test/resources/golden/udf_to_byte-6-489cd2d26b9efde2cdbff19254289371
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_byte-5-2adc360a8c6a9f4eac05f186ced2d67
rename to sql/hive/src/test/resources/golden/udf_to_byte-6-489cd2d26b9efde2cdbff19254289371
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-6-75067817a1fe70c8c03c18ec7ed2f404 b/sql/hive/src/test/resources/golden/udf_to_byte-7-1eaba393f93af1763dd761172fb78d52
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_byte-6-75067817a1fe70c8c03c18ec7ed2f404
rename to sql/hive/src/test/resources/golden/udf_to_byte-7-1eaba393f93af1763dd761172fb78d52
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-7-4b9de0160a65b212cbe42059d98aa718 b/sql/hive/src/test/resources/golden/udf_to_byte-8-8fe36cf8fba87514744a89fe50414f79
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_byte-7-4b9de0160a65b212cbe42059d98aa718
rename to sql/hive/src/test/resources/golden/udf_to_byte-8-8fe36cf8fba87514744a89fe50414f79
diff --git a/sql/hive/src/test/resources/golden/udf_to_byte-8-7bf107e0fcc94ab00a5481a9f67aec25 b/sql/hive/src/test/resources/golden/udf_to_byte-9-322163c32973ccc3a5168463db7a8589
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_byte-8-7bf107e0fcc94ab00a5481a9f67aec25
rename to sql/hive/src/test/resources/golden/udf_to_byte-9-322163c32973ccc3a5168463db7a8589
diff --git a/sql/hive/src/test/resources/golden/udf_to_date-0-ab5c4edc1825010642bd24f4cfc26166 b/sql/hive/src/test/resources/golden/udf_to_date-0-ab5c4edc1825010642bd24f4cfc26166
index 54a99d2a94ef4..caf10b953f895 100644
--- a/sql/hive/src/test/resources/golden/udf_to_date-0-ab5c4edc1825010642bd24f4cfc26166
+++ b/sql/hive/src/test/resources/golden/udf_to_date-0-ab5c4edc1825010642bd24f4cfc26166
@@ -1 +1 @@
-to_date(expr) - Extracts the date part of the date or datetime expression expr
\ No newline at end of file
+to_date(expr) - Extracts the date part of the date or datetime expression expr
diff --git a/sql/hive/src/test/resources/golden/udf_to_date-1-da3c817bc5f4458078c6199390ac915e b/sql/hive/src/test/resources/golden/udf_to_date-1-da3c817bc5f4458078c6199390ac915e
index 81ddc59fcb531..d9cc4956e5f84 100644
--- a/sql/hive/src/test/resources/golden/udf_to_date-1-da3c817bc5f4458078c6199390ac915e
+++ b/sql/hive/src/test/resources/golden/udf_to_date-1-da3c817bc5f4458078c6199390ac915e
@@ -1,4 +1,4 @@
 to_date(expr) - Extracts the date part of the date or datetime expression expr
 Example:
-   > SELECT to_date('2009-30-07 04:17:52') FROM src LIMIT 1;
-  '2009-30-07'
\ No newline at end of file
+   > SELECT to_date('2009-07-30 04:17:52') FROM src LIMIT 1;
+  '2009-07-30'
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat15-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/udf_to_double-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat15-0-66ee62178e3576fb38cb09800cb610bf
rename to sql/hive/src/test/resources/golden/udf_to_double-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-1-9d7f149dc28bd312425392c3f2abea72 b/sql/hive/src/test/resources/golden/udf_to_double-1-9d7f149dc28bd312425392c3f2abea72
new file mode 100644
index 0000000000000..7951defec192a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_double-1-9d7f149dc28bd312425392c3f2abea72
@@ -0,0 +1 @@
+NULL
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-2-e5df309104b260ff9145229d119a774d b/sql/hive/src/test/resources/golden/udf_to_double-2-e5df309104b260ff9145229d119a774d
new file mode 100644
index 0000000000000..d3827e75a5cad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_double-2-e5df309104b260ff9145229d119a774d
@@ -0,0 +1 @@
+1.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-3-abd1a8fc84fcd692891c1ac242492e45 b/sql/hive/src/test/resources/golden/udf_to_double-3-abd1a8fc84fcd692891c1ac242492e45
new file mode 100644
index 0000000000000..319fde05380bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_double-3-abd1a8fc84fcd692891c1ac242492e45
@@ -0,0 +1 @@
+-7.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-4-3651962b1a5fac4f1dc02f0403e68471 b/sql/hive/src/test/resources/golden/udf_to_double-4-3651962b1a5fac4f1dc02f0403e68471
new file mode 100644
index 0000000000000..8c1c4fe62b6c2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_double-4-3651962b1a5fac4f1dc02f0403e68471
@@ -0,0 +1 @@
+-18.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-5-39bd92a64ad9d5f57d477bf668e08da5 b/sql/hive/src/test/resources/golden/udf_to_double-5-39bd92a64ad9d5f57d477bf668e08da5
new file mode 100644
index 0000000000000..1b650de78904f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_double-5-39bd92a64ad9d5f57d477bf668e08da5
@@ -0,0 +1 @@
+-129.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-6-b65b3f3b72ce068c2b954850fe5fc2a6 b/sql/hive/src/test/resources/golden/udf_to_double-6-b65b3f3b72ce068c2b954850fe5fc2a6
new file mode 100644
index 0000000000000..3a3bd0df03b5b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_double-6-b65b3f3b72ce068c2b954850fe5fc2a6
@@ -0,0 +1 @@
+-1025.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-7-ab78e74674e92847fd44db0d21c2a145 b/sql/hive/src/test/resources/golden/udf_to_double-7-ab78e74674e92847fd44db0d21c2a145
new file mode 100644
index 0000000000000..38f7ad5afa0ab
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_double-7-ab78e74674e92847fd44db0d21c2a145
@@ -0,0 +1 @@
+-3.140000104904175
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-6-4181d264a7af3c6488da2f1db20dc384 b/sql/hive/src/test/resources/golden/udf_to_double-8-22e97175b71ca7fd8668130f5a757aee
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_string-6-4181d264a7af3c6488da2f1db20dc384
rename to sql/hive/src/test/resources/golden/udf_to_double-8-22e97175b71ca7fd8668130f5a757aee
diff --git a/sql/hive/src/test/resources/golden/udf_to_double-9-53d0629f93ae811965bb4658e1aa3cb9 b/sql/hive/src/test/resources/golden/udf_to_double-9-53d0629f93ae811965bb4658e1aa3cb9
new file mode 100644
index 0000000000000..f45d1f04dc920
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_double-9-53d0629f93ae811965bb4658e1aa3cb9
@@ -0,0 +1 @@
+-38.14
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat16-0-66ee62178e3576fb38cb09800cb610bf b/sql/hive/src/test/resources/golden/udf_to_float-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat16-0-66ee62178e3576fb38cb09800cb610bf
rename to sql/hive/src/test/resources/golden/udf_to_float-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_to_float-1-6f0f1660c78aa1318ae1da4a2afdd9dd b/sql/hive/src/test/resources/golden/udf_to_float-1-6f0f1660c78aa1318ae1da4a2afdd9dd
new file mode 100644
index 0000000000000..7951defec192a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_float-1-6f0f1660c78aa1318ae1da4a2afdd9dd
@@ -0,0 +1 @@
+NULL
diff --git a/sql/hive/src/test/resources/golden/udf_to_float-2-ef6719eced842e7efe970665b41f8c0 b/sql/hive/src/test/resources/golden/udf_to_float-2-ef6719eced842e7efe970665b41f8c0
new file mode 100644
index 0000000000000..d3827e75a5cad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_float-2-ef6719eced842e7efe970665b41f8c0
@@ -0,0 +1 @@
+1.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_float-3-bfb661d2179679c317a7b088837258d3 b/sql/hive/src/test/resources/golden/udf_to_float-3-bfb661d2179679c317a7b088837258d3
new file mode 100644
index 0000000000000..319fde05380bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_float-3-bfb661d2179679c317a7b088837258d3
@@ -0,0 +1 @@
+-7.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_float-4-477519ea558ef60feb754d442c1b13cc b/sql/hive/src/test/resources/golden/udf_to_float-4-477519ea558ef60feb754d442c1b13cc
new file mode 100644
index 0000000000000..8c1c4fe62b6c2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_float-4-477519ea558ef60feb754d442c1b13cc
@@ -0,0 +1 @@
+-18.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_float-5-3ea0b65c600c2a6c0a2f20b36bc02c0a b/sql/hive/src/test/resources/golden/udf_to_float-5-3ea0b65c600c2a6c0a2f20b36bc02c0a
new file mode 100644
index 0000000000000..1b650de78904f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_float-5-3ea0b65c600c2a6c0a2f20b36bc02c0a
@@ -0,0 +1 @@
+-129.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_float-6-6bf596b8ac0a57d7df844cca1c94a0c7 b/sql/hive/src/test/resources/golden/udf_to_float-6-6bf596b8ac0a57d7df844cca1c94a0c7
new file mode 100644
index 0000000000000..3a3bd0df03b5b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_float-6-6bf596b8ac0a57d7df844cca1c94a0c7
@@ -0,0 +1 @@
+-1025.0
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-7-567bc77f96e7dc8c89bae912e9a3af15 b/sql/hive/src/test/resources/golden/udf_to_float-7-e4b449ba415538aac9c9ec421d8bcce8
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_string-7-567bc77f96e7dc8c89bae912e9a3af15
rename to sql/hive/src/test/resources/golden/udf_to_float-7-e4b449ba415538aac9c9ec421d8bcce8
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-8-a70b03d79ebd989c8141f9d70dbca8ea b/sql/hive/src/test/resources/golden/udf_to_float-8-c46d1c40e52bef886e56ae1e07892bb7
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_string-8-a70b03d79ebd989c8141f9d70dbca8ea
rename to sql/hive/src/test/resources/golden/udf_to_float-8-c46d1c40e52bef886e56ae1e07892bb7
diff --git a/sql/hive/src/test/resources/golden/udf_to_float-9-c5545924be7d13b1f4a13cb2bd0c17cc b/sql/hive/src/test/resources/golden/udf_to_float-9-c5545924be7d13b1f4a13cb2bd0c17cc
new file mode 100644
index 0000000000000..f45d1f04dc920
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_float-9-c5545924be7d13b1f4a13cb2bd0c17cc
@@ -0,0 +1 @@
+-38.14
diff --git a/sql/hive/src/test/resources/golden/ppd_transform-0-ae225e86c2ae20519ffdf23190454161 b/sql/hive/src/test/resources/golden/udf_to_long-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppd_transform-0-ae225e86c2ae20519ffdf23190454161
rename to sql/hive/src/test/resources/golden/udf_to_long-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-1-61e6679e5a37889bc596590bde0228f0 b/sql/hive/src/test/resources/golden/udf_to_long-1-61e6679e5a37889bc596590bde0228f0
deleted file mode 100644
index d00491fd7e5bb..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_long-1-61e6679e5a37889bc596590bde0228f0
+++ /dev/null
@@ -1 +0,0 @@
-1
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-1-8aeb3ba62d4a0cecfff363741e8042f6 b/sql/hive/src/test/resources/golden/udf_to_long-1-8aeb3ba62d4a0cecfff363741e8042f6
new file mode 100644
index 0000000000000..7951defec192a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_long-1-8aeb3ba62d4a0cecfff363741e8042f6
@@ -0,0 +1 @@
+NULL
diff --git a/sql/hive/src/test/resources/golden/loadpart_err-1-8e68053c7f96f268ae1e25082e9b7517 b/sql/hive/src/test/resources/golden/udf_to_long-2-7e8fa1ae8d00a121ec14941a48d24947
similarity index 100%
rename from sql/hive/src/test/resources/golden/loadpart_err-1-8e68053c7f96f268ae1e25082e9b7517
rename to sql/hive/src/test/resources/golden/udf_to_long-2-7e8fa1ae8d00a121ec14941a48d24947
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-2-5efd128fd5cbfbf1794c7da16d4b18d5 b/sql/hive/src/test/resources/golden/udf_to_long-3-6e5936fba8e7486beb9ab998548bbe9b
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-2-5efd128fd5cbfbf1794c7da16d4b18d5
rename to sql/hive/src/test/resources/golden/udf_to_long-3-6e5936fba8e7486beb9ab998548bbe9b
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-3-cb3318ba365833316645e1b9890f4613 b/sql/hive/src/test/resources/golden/udf_to_long-4-8c284b082a256abf0426d4f6f1971703
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-3-cb3318ba365833316645e1b9890f4613
rename to sql/hive/src/test/resources/golden/udf_to_long-4-8c284b082a256abf0426d4f6f1971703
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-4-55b7b7d046541d63e30f76111734c0f6 b/sql/hive/src/test/resources/golden/udf_to_long-5-6bb29b93f6b0f1427ba93efb4e78810a
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-4-55b7b7d046541d63e30f76111734c0f6
rename to sql/hive/src/test/resources/golden/udf_to_long-5-6bb29b93f6b0f1427ba93efb4e78810a
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-5-a0d382d243a226f8d4381970b6831c3d b/sql/hive/src/test/resources/golden/udf_to_long-6-290b5a4ce01563482e81b3b532ebf9db
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-5-a0d382d243a226f8d4381970b6831c3d
rename to sql/hive/src/test/resources/golden/udf_to_long-6-290b5a4ce01563482e81b3b532ebf9db
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-6-47ab11eae68329cc80232fc4089479f0 b/sql/hive/src/test/resources/golden/udf_to_long-7-da20f84586dac3e50ee9d5b9078f44db
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-6-47ab11eae68329cc80232fc4089479f0
rename to sql/hive/src/test/resources/golden/udf_to_long-7-da20f84586dac3e50ee9d5b9078f44db
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-7-b87c0cabb166f33984cc1b191694918e b/sql/hive/src/test/resources/golden/udf_to_long-8-90f068b4b6275bdd1c4c431fb7fa90e2
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-7-b87c0cabb166f33984cc1b191694918e
rename to sql/hive/src/test/resources/golden/udf_to_long-8-90f068b4b6275bdd1c4c431fb7fa90e2
diff --git a/sql/hive/src/test/resources/golden/udf_to_long-8-52a24d7040db321a842f9201d245ee9 b/sql/hive/src/test/resources/golden/udf_to_long-9-cc8b79539085fe0e00f672b562c51cd0
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_long-8-52a24d7040db321a842f9201d245ee9
rename to sql/hive/src/test/resources/golden/udf_to_long-9-cc8b79539085fe0e00f672b562c51cd0
diff --git a/sql/hive/src/test/resources/golden/ppd_transform-1-f18babdee8d2d4206ce4f2a93b6575f9 b/sql/hive/src/test/resources/golden/udf_to_short-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppd_transform-1-f18babdee8d2d4206ce4f2a93b6575f9
rename to sql/hive/src/test/resources/golden/udf_to_short-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-1-32c4a61f9166c2a9d7038553d589bd9b b/sql/hive/src/test/resources/golden/udf_to_short-1-32c4a61f9166c2a9d7038553d589bd9b
deleted file mode 100644
index d00491fd7e5bb..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_short-1-32c4a61f9166c2a9d7038553d589bd9b
+++ /dev/null
@@ -1 +0,0 @@
-1
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-1-94f15fe043839493107058a06a210cf7 b/sql/hive/src/test/resources/golden/udf_to_short-1-94f15fe043839493107058a06a210cf7
new file mode 100644
index 0000000000000..7951defec192a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_to_short-1-94f15fe043839493107058a06a210cf7
@@ -0,0 +1 @@
+NULL
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-5-9eadfd16be30c653625fce7b74048d9d b/sql/hive/src/test/resources/golden/udf_to_short-2-981e4cb6654fde7eb4634c7ad72f8570
similarity index 100%
rename from sql/hive/src/test/resources/golden/metadataonly1-5-9eadfd16be30c653625fce7b74048d9d
rename to sql/hive/src/test/resources/golden/udf_to_short-2-981e4cb6654fde7eb4634c7ad72f8570
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-2-a4ab375c99efbfe0d5a45941552509c8 b/sql/hive/src/test/resources/golden/udf_to_short-3-b259ee30ecf279bb4ad12d1515ca2767
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-2-a4ab375c99efbfe0d5a45941552509c8
rename to sql/hive/src/test/resources/golden/udf_to_short-3-b259ee30ecf279bb4ad12d1515ca2767
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-3-8d5108d9585d8ca6ca3eefac05afc3ce b/sql/hive/src/test/resources/golden/udf_to_short-4-40ffb132d5641645e2b8043dc056fb0
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-3-8d5108d9585d8ca6ca3eefac05afc3ce
rename to sql/hive/src/test/resources/golden/udf_to_short-4-40ffb132d5641645e2b8043dc056fb0
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-4-8d76a5a410e5533f2ae625504478cda8 b/sql/hive/src/test/resources/golden/udf_to_short-5-5d6c46b0154d1073c035a79dbf612479
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-4-8d76a5a410e5533f2ae625504478cda8
rename to sql/hive/src/test/resources/golden/udf_to_short-5-5d6c46b0154d1073c035a79dbf612479
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-5-d467dc39319796ff5cbe600ff6530ad5 b/sql/hive/src/test/resources/golden/udf_to_short-6-6561b41835a21f973cbbc2dd80eef87f
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-5-d467dc39319796ff5cbe600ff6530ad5
rename to sql/hive/src/test/resources/golden/udf_to_short-6-6561b41835a21f973cbbc2dd80eef87f
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-6-c3b304122e874868f37c6fbaeccf0397 b/sql/hive/src/test/resources/golden/udf_to_short-7-9f83813005b639a23901ca6ff87ff473
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-6-c3b304122e874868f37c6fbaeccf0397
rename to sql/hive/src/test/resources/golden/udf_to_short-7-9f83813005b639a23901ca6ff87ff473
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-7-827f3181c216fd2e990637c9a091bf0d b/sql/hive/src/test/resources/golden/udf_to_short-8-885656e165feb3a674cf636dbf08716c
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-7-827f3181c216fd2e990637c9a091bf0d
rename to sql/hive/src/test/resources/golden/udf_to_short-8-885656e165feb3a674cf636dbf08716c
diff --git a/sql/hive/src/test/resources/golden/udf_to_short-8-d35f9502fc7b4205e18ecdf53189bdc4 b/sql/hive/src/test/resources/golden/udf_to_short-9-750382fa1a1b3ed5dca0d549d3a68996
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_short-8-d35f9502fc7b4205e18ecdf53189bdc4
rename to sql/hive/src/test/resources/golden/udf_to_short-9-750382fa1a1b3ed5dca0d549d3a68996
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-1-b461f0e6e98991aede40c7c68561dc44 b/sql/hive/src/test/resources/golden/udf_to_string-1-b461f0e6e98991aede40c7c68561dc44
deleted file mode 100644
index ef2f5130b8575..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_string-1-b461f0e6e98991aede40c7c68561dc44
+++ /dev/null
@@ -1 +0,0 @@
-TRUE
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-2-bc6b37e1d5a8d28df8a957c88104c9a5 b/sql/hive/src/test/resources/golden/udf_to_string-2-bc6b37e1d5a8d28df8a957c88104c9a5
deleted file mode 100644
index d00491fd7e5bb..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_string-2-bc6b37e1d5a8d28df8a957c88104c9a5
+++ /dev/null
@@ -1 +0,0 @@
-1
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-3-ddbb829eedc6aa1e27169303a7957d15 b/sql/hive/src/test/resources/golden/udf_to_string-3-ddbb829eedc6aa1e27169303a7957d15
deleted file mode 100644
index 6cc8a61f8f6ad..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_string-3-ddbb829eedc6aa1e27169303a7957d15
+++ /dev/null
@@ -1 +0,0 @@
--18
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-4-c20301e9bbf10143bb9bf67cd7367c21 b/sql/hive/src/test/resources/golden/udf_to_string-4-c20301e9bbf10143bb9bf67cd7367c21
deleted file mode 100644
index 9828ff22b667b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_string-4-c20301e9bbf10143bb9bf67cd7367c21
+++ /dev/null
@@ -1 +0,0 @@
--129
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-5-1040b37847d20ef29d545934316303 b/sql/hive/src/test/resources/golden/udf_to_string-5-1040b37847d20ef29d545934316303
deleted file mode 100644
index 450a6125550e5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_string-5-1040b37847d20ef29d545934316303
+++ /dev/null
@@ -1 +0,0 @@
--1025
diff --git a/sql/hive/src/test/resources/golden/udf_to_string-9-51677fbf5d2fc5478853cec1df039e3b b/sql/hive/src/test/resources/golden/udf_to_string-9-51677fbf5d2fc5478853cec1df039e3b
deleted file mode 100644
index bc56c4d89448a..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_to_string-9-51677fbf5d2fc5478853cec1df039e3b
+++ /dev/null
@@ -1 +0,0 @@
-Foo
diff --git a/sql/hive/src/test/resources/golden/ppd_transform-4-145c2779dadb5bd921dc2baac608b803 b/sql/hive/src/test/resources/golden/udf_translate-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/ppd_transform-4-145c2779dadb5bd921dc2baac608b803
rename to sql/hive/src/test/resources/golden/udf_translate-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_translate-0-7fe940a2b26fa19a3cfee39e56fb1241 b/sql/hive/src/test/resources/golden/udf_translate-0-7fe940a2b26fa19a3cfee39e56fb1241
deleted file mode 100644
index 4255dc76f501e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-0-7fe940a2b26fa19a3cfee39e56fb1241
+++ /dev/null
@@ -1 +0,0 @@
-translate(input, from, to) - translates the input string by replacing the characters present in the from string with the corresponding characters in the to string
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_translate-1-7fe940a2b26fa19a3cfee39e56fb1241 b/sql/hive/src/test/resources/golden/udf_translate-1-7fe940a2b26fa19a3cfee39e56fb1241
new file mode 100644
index 0000000000000..e92c71fd3ccf9
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-1-7fe940a2b26fa19a3cfee39e56fb1241
@@ -0,0 +1 @@
+translate(input, from, to) - translates the input string by replacing the characters present in the from string with the corresponding characters in the to string
diff --git a/sql/hive/src/test/resources/golden/udf_translate-1-f7aa38a33ca0df73b7a1e6b6da4b7fe8 b/sql/hive/src/test/resources/golden/udf_translate-1-f7aa38a33ca0df73b7a1e6b6da4b7fe8
deleted file mode 100644
index e7beead53b399..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-1-f7aa38a33ca0df73b7a1e6b6da4b7fe8
+++ /dev/null
@@ -1,13 +0,0 @@
-translate(input, from, to) - translates the input string by replacing the characters present in the from string with the corresponding characters in the to string
-translate(string input, string from, string to) is an equivalent function to translate in PostGreSQL. It works on a character by character basis on the input string (first parameter). A character in the input is checked for presence in the from string (second parameter). If a match happens, the character from to string (third parameter) which appears at the same index as the character in from string is obtained. This character is emitted in the output string  instead of the original character from the input string. If the to string is shorter than the from string, there may not be a character present at the same index in the to string. In such a case, nothing is emitted for the original character and it's deleted from the output string.
-For example,
-
-translate('abcdef', 'adc', '19') returns '1b9ef' replacing 'a' with '1', 'd' with '9' and removing 'c' from the input string
-
-translate('a b c d', ' ', '') return 'abcd' removing all spaces from the input string
-
-If the same character is present multiple times in the input string, the first occurence of the character is the one that's considered for matching. However, it is not recommended to have the same character more than once in the from string since it's not required and adds to confusion.
-
-For example,
-
-translate('abcdef', 'ada', '192') returns '1bc9ef' replaces 'a' with '1' and 'd' with '9' ignoring the second occurence of 'a' in the from string mapping it to '2'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_translate-10-2f9daada8878081cb8954880ad5a65c4 b/sql/hive/src/test/resources/golden/udf_translate-10-2f9daada8878081cb8954880ad5a65c4
deleted file mode 100644
index 7d44692e4f7c0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-10-2f9daada8878081cb8954880ad5a65c4
+++ /dev/null
@@ -1 +0,0 @@
-12cd	12cd
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_translate-10-ca7c17e78c6a3d4e19dbd66622a87eae b/sql/hive/src/test/resources/golden/udf_translate-10-ca7c17e78c6a3d4e19dbd66622a87eae
new file mode 100644
index 0000000000000..ae8343d33bc11
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-10-ca7c17e78c6a3d4e19dbd66622a87eae
@@ -0,0 +1 @@
+NULL	NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_translate-11-40c4e7adff4dde739d7797d212892c5a b/sql/hive/src/test/resources/golden/udf_translate-11-40c4e7adff4dde739d7797d212892c5a
new file mode 100644
index 0000000000000..2e2f35d06c042
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-11-40c4e7adff4dde739d7797d212892c5a
@@ -0,0 +1 @@
+12cd	12cd
diff --git a/sql/hive/src/test/resources/golden/udf_translate-11-76b7a339d5c62808b9f4f78816d4c55b b/sql/hive/src/test/resources/golden/udf_translate-11-76b7a339d5c62808b9f4f78816d4c55b
deleted file mode 100644
index 90e0d872f8bfc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-11-76b7a339d5c62808b9f4f78816d4c55b
+++ /dev/null
@@ -1 +0,0 @@
-123d
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_translate-12-a5b3e5fadeec1e03371160879f060b05 b/sql/hive/src/test/resources/golden/udf_translate-12-a5b3e5fadeec1e03371160879f060b05
deleted file mode 100644
index 0770d02cb2303..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-12-a5b3e5fadeec1e03371160879f060b05
+++ /dev/null
@@ -1 +0,0 @@
-Ãbcd
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_translate-12-d81fd3267ec96cff31079142bf5d49bf b/sql/hive/src/test/resources/golden/udf_translate-12-d81fd3267ec96cff31079142bf5d49bf
new file mode 100644
index 0000000000000..36284d8236d0e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-12-d81fd3267ec96cff31079142bf5d49bf
@@ -0,0 +1 @@
+123d
diff --git a/sql/hive/src/test/resources/golden/udf_translate-13-26085a3eba1a1b34684ec4e6c1723527 b/sql/hive/src/test/resources/golden/udf_translate-13-26085a3eba1a1b34684ec4e6c1723527
new file mode 100644
index 0000000000000..0208f1f4980c2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-13-26085a3eba1a1b34684ec4e6c1723527
@@ -0,0 +1 @@
+Ãbcd
diff --git a/sql/hive/src/test/resources/golden/udf_translate-2-42aba80bf1913dd7c64545831f476c58 b/sql/hive/src/test/resources/golden/udf_translate-2-42aba80bf1913dd7c64545831f476c58
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_translate-2-f7aa38a33ca0df73b7a1e6b6da4b7fe8 b/sql/hive/src/test/resources/golden/udf_translate-2-f7aa38a33ca0df73b7a1e6b6da4b7fe8
new file mode 100644
index 0000000000000..9ced4ee32cf0b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-2-f7aa38a33ca0df73b7a1e6b6da4b7fe8
@@ -0,0 +1,13 @@
+translate(input, from, to) - translates the input string by replacing the characters present in the from string with the corresponding characters in the to string
+translate(string input, string from, string to) is an equivalent function to translate in PostGreSQL. It works on a character by character basis on the input string (first parameter). A character in the input is checked for presence in the from string (second parameter). If a match happens, the character from to string (third parameter) which appears at the same index as the character in from string is obtained. This character is emitted in the output string  instead of the original character from the input string. If the to string is shorter than the from string, there may not be a character present at the same index in the to string. In such a case, nothing is emitted for the original character and it's deleted from the output string.
+For example,
+
+translate('abcdef', 'adc', '19') returns '1b9ef' replacing 'a' with '1', 'd' with '9' and removing 'c' from the input string
+
+translate('a b c d', ' ', '') return 'abcd' removing all spaces from the input string
+
+If the same character is present multiple times in the input string, the first occurence of the character is the one that's considered for matching. However, it is not recommended to have the same character more than once in the from string since it's not required and adds to confusion.
+
+For example,
+
+translate('abcdef', 'ada', '192') returns '1bc9ef' replaces 'a' with '1' and 'd' with '9' ignoring the second occurence of 'a' in the from string mapping it to '2'
diff --git a/sql/hive/src/test/resources/golden/udf_translate-3-20904c8be8fed5cbd2d66ead6248a60a b/sql/hive/src/test/resources/golden/udf_translate-3-20904c8be8fed5cbd2d66ead6248a60a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input39-4-53453776bf062f28d371fc7336b7eae2 b/sql/hive/src/test/resources/golden/udf_translate-3-42aba80bf1913dd7c64545831f476c58
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39-4-53453776bf062f28d371fc7336b7eae2
rename to sql/hive/src/test/resources/golden/udf_translate-3-42aba80bf1913dd7c64545831f476c58
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-0-4c33233bafd910d69c2b8845d35abffe b/sql/hive/src/test/resources/golden/udf_translate-4-20904c8be8fed5cbd2d66ead6248a60a
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39_hadoop20-0-4c33233bafd910d69c2b8845d35abffe
rename to sql/hive/src/test/resources/golden/udf_translate-4-20904c8be8fed5cbd2d66ead6248a60a
diff --git a/sql/hive/src/test/resources/golden/udf_translate-4-5d4abaf86254bacaa545c769bd7e50ba b/sql/hive/src/test/resources/golden/udf_translate-4-5d4abaf86254bacaa545c769bd7e50ba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-1-92a6ee4486a9fc8fc7bc567e42b9e2a3 b/sql/hive/src/test/resources/golden/udf_translate-5-5d4abaf86254bacaa545c769bd7e50ba
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39_hadoop20-1-92a6ee4486a9fc8fc7bc567e42b9e2a3
rename to sql/hive/src/test/resources/golden/udf_translate-5-5d4abaf86254bacaa545c769bd7e50ba
diff --git a/sql/hive/src/test/resources/golden/udf_translate-5-f2637240d227f9732d3db76f2e9d3a59 b/sql/hive/src/test/resources/golden/udf_translate-5-f2637240d227f9732d3db76f2e9d3a59
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_translate-6-55122cc5ea4f49e737fee58945f3f21b b/sql/hive/src/test/resources/golden/udf_translate-6-55122cc5ea4f49e737fee58945f3f21b
deleted file mode 100644
index 0a54ee40ecb13..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-6-55122cc5ea4f49e737fee58945f3f21b
+++ /dev/null
@@ -1 +0,0 @@
-12cd	12d
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-2-ee667c6ab003bb83f6bf1c72153eba39 b/sql/hive/src/test/resources/golden/udf_translate-6-f2637240d227f9732d3db76f2e9d3a59
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39_hadoop20-2-ee667c6ab003bb83f6bf1c72153eba39
rename to sql/hive/src/test/resources/golden/udf_translate-6-f2637240d227f9732d3db76f2e9d3a59
diff --git a/sql/hive/src/test/resources/golden/udf_translate-7-856c9d0157c34ab85cc6c83d560bfd47 b/sql/hive/src/test/resources/golden/udf_translate-7-856c9d0157c34ab85cc6c83d560bfd47
deleted file mode 100644
index 0a54ee40ecb13..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-7-856c9d0157c34ab85cc6c83d560bfd47
+++ /dev/null
@@ -1 +0,0 @@
-12cd	12d
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_translate-7-f8de3ab54db5d6a44fddb542b3d99704 b/sql/hive/src/test/resources/golden/udf_translate-7-f8de3ab54db5d6a44fddb542b3d99704
new file mode 100644
index 0000000000000..fbf71ff5ad9eb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-7-f8de3ab54db5d6a44fddb542b3d99704
@@ -0,0 +1 @@
+12cd	12d
diff --git a/sql/hive/src/test/resources/golden/udf_translate-8-1747ed8fbb4ef889df3db937ee51e2b0 b/sql/hive/src/test/resources/golden/udf_translate-8-1747ed8fbb4ef889df3db937ee51e2b0
new file mode 100644
index 0000000000000..fbf71ff5ad9eb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-8-1747ed8fbb4ef889df3db937ee51e2b0
@@ -0,0 +1 @@
+12cd	12d
diff --git a/sql/hive/src/test/resources/golden/udf_translate-8-f56e5c0a7fdd81d6ca0bb84f2cadcf11 b/sql/hive/src/test/resources/golden/udf_translate-8-f56e5c0a7fdd81d6ca0bb84f2cadcf11
deleted file mode 100644
index 50c123df9d1d3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-8-f56e5c0a7fdd81d6ca0bb84f2cadcf11
+++ /dev/null
@@ -1 +0,0 @@
-1bc
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_translate-9-ee69663d7662760973b72785595be2b1 b/sql/hive/src/test/resources/golden/udf_translate-9-ee69663d7662760973b72785595be2b1
deleted file mode 100644
index b98d7e1c34024..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_translate-9-ee69663d7662760973b72785595be2b1
+++ /dev/null
@@ -1 +0,0 @@
-NULL	NULL	NULL	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_translate-9-f1bd0dd5226ee632db3c72c5fc2aaeb0 b/sql/hive/src/test/resources/golden/udf_translate-9-f1bd0dd5226ee632db3c72c5fc2aaeb0
new file mode 100644
index 0000000000000..bb456baa0a608
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_translate-9-f1bd0dd5226ee632db3c72c5fc2aaeb0
@@ -0,0 +1 @@
+1bc
diff --git a/sql/hive/src/test/resources/golden/udf_trim-0-18aa2b7ff8d263124ea47c9e27fc672f b/sql/hive/src/test/resources/golden/udf_trim-0-18aa2b7ff8d263124ea47c9e27fc672f
index f0620a190ae4e..598aeb205875b 100644
--- a/sql/hive/src/test/resources/golden/udf_trim-0-18aa2b7ff8d263124ea47c9e27fc672f
+++ b/sql/hive/src/test/resources/golden/udf_trim-0-18aa2b7ff8d263124ea47c9e27fc672f
@@ -1 +1 @@
-trim(str) - Removes the leading and trailing space characters from str 
\ No newline at end of file
+trim(str) - Removes the leading and trailing space characters from str 
diff --git a/sql/hive/src/test/resources/golden/udf_trim-1-e23715e112959e6840b6feed2ecf38a7 b/sql/hive/src/test/resources/golden/udf_trim-1-e23715e112959e6840b6feed2ecf38a7
index 49a13dd64b286..b3ec336a877ee 100644
--- a/sql/hive/src/test/resources/golden/udf_trim-1-e23715e112959e6840b6feed2ecf38a7
+++ b/sql/hive/src/test/resources/golden/udf_trim-1-e23715e112959e6840b6feed2ecf38a7
@@ -1,4 +1,4 @@
 trim(str) - Removes the leading and trailing space characters from str 
 Example:
   > SELECT trim('   facebook  ') FROM src LIMIT 1;
-  'facebook'
\ No newline at end of file
+  'facebook'
diff --git a/sql/hive/src/test/resources/golden/udf_ucase-0-8f8c18102eb02df524106be5ea49f23d b/sql/hive/src/test/resources/golden/udf_ucase-0-8f8c18102eb02df524106be5ea49f23d
index 057c231dccf0e..66412c3ba77a4 100644
--- a/sql/hive/src/test/resources/golden/udf_ucase-0-8f8c18102eb02df524106be5ea49f23d
+++ b/sql/hive/src/test/resources/golden/udf_ucase-0-8f8c18102eb02df524106be5ea49f23d
@@ -1 +1 @@
-ucase(str) - Returns str with all characters changed to uppercase
\ No newline at end of file
+ucase(str) - Returns str with all characters changed to uppercase
diff --git a/sql/hive/src/test/resources/golden/udf_ucase-1-640713eb89dbb09dbb6e5b472fc0ec8f b/sql/hive/src/test/resources/golden/udf_ucase-1-640713eb89dbb09dbb6e5b472fc0ec8f
index 128d1e21fae83..814e4f0d39dcf 100644
--- a/sql/hive/src/test/resources/golden/udf_ucase-1-640713eb89dbb09dbb6e5b472fc0ec8f
+++ b/sql/hive/src/test/resources/golden/udf_ucase-1-640713eb89dbb09dbb6e5b472fc0ec8f
@@ -2,4 +2,4 @@ ucase(str) - Returns str with all characters changed to uppercase
 Synonyms: upper
 Example:
   > SELECT ucase('Facebook') FROM src LIMIT 1;
-  'FACEBOOK'
\ No newline at end of file
+  'FACEBOOK'
diff --git a/sql/hive/src/test/resources/golden/udf_unhex-0-11eb3cc5216d5446f4165007203acc47 b/sql/hive/src/test/resources/golden/udf_unhex-0-11eb3cc5216d5446f4165007203acc47
deleted file mode 100644
index 44b2a42cc26c5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_unhex-0-11eb3cc5216d5446f4165007203acc47
+++ /dev/null
@@ -1 +0,0 @@
-unhex(str) - Converts hexadecimal argument to binary
diff --git a/sql/hive/src/test/resources/golden/udf_unhex-1-a660886085b8651852b9b77934848ae4 b/sql/hive/src/test/resources/golden/udf_unhex-1-a660886085b8651852b9b77934848ae4
deleted file mode 100644
index 97af3b812a429..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_unhex-1-a660886085b8651852b9b77934848ae4
+++ /dev/null
@@ -1,14 +0,0 @@
-unhex(str) - Converts hexadecimal argument to binary
-Performs the inverse operation of HEX(str). That is, it interprets
-each pair of hexadecimal digits in the argument as a number and
-converts it to the byte representation of the number. The
-resulting characters are returned as a binary string.
-
-Example:
-> SELECT DECODE(UNHEX('4D7953514C'), 'UTF-8') from src limit 1;
-'MySQL'
-
-The characters in the argument string must be legal hexadecimal
-digits: '0' .. '9', 'A' .. 'F', 'a' .. 'f'. If UNHEX() encounters
-any nonhexadecimal digits in the argument, it returns NULL. Also,
-if there are an odd number of characters a leading 0 is appended.
diff --git a/sql/hive/src/test/resources/golden/udf_unhex-2-78ba44cd7dae6619772c7620cb39b68 b/sql/hive/src/test/resources/golden/udf_unhex-2-78ba44cd7dae6619772c7620cb39b68
deleted file mode 100644
index b4a6f2b692227..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_unhex-2-78ba44cd7dae6619772c7620cb39b68
+++ /dev/null
@@ -1 +0,0 @@
-MySQL	1267	a	-4	
diff --git a/sql/hive/src/test/resources/golden/udf_unhex-3-8823f9cc76adaf32a9a6110df99ce4d6 b/sql/hive/src/test/resources/golden/udf_unhex-3-8823f9cc76adaf32a9a6110df99ce4d6
deleted file mode 100644
index 3a67adaf0a9a8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_unhex-3-8823f9cc76adaf32a9a6110df99ce4d6
+++ /dev/null
@@ -1 +0,0 @@
-NULL	NULL	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_union-0-e3498ef95bc9d8c8ce55e75962b4a92c b/sql/hive/src/test/resources/golden/udf_union-0-e3498ef95bc9d8c8ce55e75962b4a92c
deleted file mode 100644
index 3f3bb2b7ce02e..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_union-0-e3498ef95bc9d8c8ce55e75962b4a92c
+++ /dev/null
@@ -1 +0,0 @@
-create_union(tag, obj1, obj2, obj3, ...) - Creates a union with the object for given tag
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_union-1-f6f01250718087029165e23badc02cd6 b/sql/hive/src/test/resources/golden/udf_union-1-f6f01250718087029165e23badc02cd6
deleted file mode 100644
index 748ed97055401..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_union-1-f6f01250718087029165e23badc02cd6
+++ /dev/null
@@ -1,4 +0,0 @@
-create_union(tag, obj1, obj2, obj3, ...) - Creates a union with the object for given tag
-Example:
-  > SELECT create_union(1, 1, "one") FROM src LIMIT 1;
-  one
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_union-2-6af20858e3209d7cf37f736805ec5182 b/sql/hive/src/test/resources/golden/udf_union-2-6af20858e3209d7cf37f736805ec5182
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_union-3-705d165fec6761744dd19b142c566d61 b/sql/hive/src/test/resources/golden/udf_union-3-705d165fec6761744dd19b142c566d61
deleted file mode 100644
index 4b9ae5402aa53..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_union-3-705d165fec6761744dd19b142c566d61
+++ /dev/null
@@ -1,2 +0,0 @@
-{0:238}	{1:"val_238"}	{1:{"col1":2,"col2":"b"}}
-{0:86}	{0:2.0}	{1:{"col1":2,"col2":"b"}}
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/print_header-0-860e298a0b70e7a531431e9386ddc0e7 b/sql/hive/src/test/resources/golden/udf_unix_timestamp-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/print_header-0-860e298a0b70e7a531431e9386ddc0e7
rename to sql/hive/src/test/resources/golden/udf_unix_timestamp-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_unix_timestamp-0-d555c8cd733572bfa8cd3362da9480cb b/sql/hive/src/test/resources/golden/udf_unix_timestamp-1-d555c8cd733572bfa8cd3362da9480cb
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_unix_timestamp-0-d555c8cd733572bfa8cd3362da9480cb
rename to sql/hive/src/test/resources/golden/udf_unix_timestamp-1-d555c8cd733572bfa8cd3362da9480cb
diff --git a/sql/hive/src/test/resources/golden/udf_unix_timestamp-2-28c40e51e55bed62693e626efda5d9c5 b/sql/hive/src/test/resources/golden/udf_unix_timestamp-2-28c40e51e55bed62693e626efda5d9c5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udf_unix_timestamp-1-8a9dbadae706047715cf5f903ff4a724 b/sql/hive/src/test/resources/golden/udf_unix_timestamp-2-8a9dbadae706047715cf5f903ff4a724
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_unix_timestamp-1-8a9dbadae706047715cf5f903ff4a724
rename to sql/hive/src/test/resources/golden/udf_unix_timestamp-2-8a9dbadae706047715cf5f903ff4a724
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-3-9e1699caf2caa347fa8ee6e9b6e7da6e b/sql/hive/src/test/resources/golden/udf_unix_timestamp-3-28c40e51e55bed62693e626efda5d9c5
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39_hadoop20-3-9e1699caf2caa347fa8ee6e9b6e7da6e
rename to sql/hive/src/test/resources/golden/udf_unix_timestamp-3-28c40e51e55bed62693e626efda5d9c5
diff --git a/sql/hive/src/test/resources/golden/udf_unix_timestamp-3-732b21d386f2002b87eaf02d0b9951ed b/sql/hive/src/test/resources/golden/udf_unix_timestamp-3-732b21d386f2002b87eaf02d0b9951ed
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-4-53453776bf062f28d371fc7336b7eae2 b/sql/hive/src/test/resources/golden/udf_unix_timestamp-4-6059ff48788d0fb8317fd331172ecea9
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39_hadoop20-4-53453776bf062f28d371fc7336b7eae2
rename to sql/hive/src/test/resources/golden/udf_unix_timestamp-4-6059ff48788d0fb8317fd331172ecea9
diff --git a/sql/hive/src/test/resources/golden/udf_unix_timestamp-4-b2e42ebb75cecf09961d36587797f6d0 b/sql/hive/src/test/resources/golden/udf_unix_timestamp-5-b2e42ebb75cecf09961d36587797f6d0
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_unix_timestamp-4-b2e42ebb75cecf09961d36587797f6d0
rename to sql/hive/src/test/resources/golden/udf_unix_timestamp-5-b2e42ebb75cecf09961d36587797f6d0
diff --git a/sql/hive/src/test/resources/golden/udf_unix_timestamp-5-31243f5cb64356425b9f95ba011ac9d6 b/sql/hive/src/test/resources/golden/udf_unix_timestamp-6-31243f5cb64356425b9f95ba011ac9d6
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_unix_timestamp-5-31243f5cb64356425b9f95ba011ac9d6
rename to sql/hive/src/test/resources/golden/udf_unix_timestamp-6-31243f5cb64356425b9f95ba011ac9d6
diff --git a/sql/hive/src/test/resources/golden/udf_unix_timestamp-6-9b0f20bde1aaf9102b67a5498b167f31 b/sql/hive/src/test/resources/golden/udf_unix_timestamp-7-9b0f20bde1aaf9102b67a5498b167f31
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_unix_timestamp-6-9b0f20bde1aaf9102b67a5498b167f31
rename to sql/hive/src/test/resources/golden/udf_unix_timestamp-7-9b0f20bde1aaf9102b67a5498b167f31
diff --git a/sql/hive/src/test/resources/golden/udf_unix_timestamp-7-47f433ff6ccce4c666440cc1a228a96d b/sql/hive/src/test/resources/golden/udf_unix_timestamp-8-47f433ff6ccce4c666440cc1a228a96d
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_unix_timestamp-7-47f433ff6ccce4c666440cc1a228a96d
rename to sql/hive/src/test/resources/golden/udf_unix_timestamp-8-47f433ff6ccce4c666440cc1a228a96d
diff --git a/sql/hive/src/test/resources/golden/udf_upper-0-47dc226b0435f668df20fe0e84293ead b/sql/hive/src/test/resources/golden/udf_upper-0-47dc226b0435f668df20fe0e84293ead
index 2e239cca5b860..40014886d9646 100644
--- a/sql/hive/src/test/resources/golden/udf_upper-0-47dc226b0435f668df20fe0e84293ead
+++ b/sql/hive/src/test/resources/golden/udf_upper-0-47dc226b0435f668df20fe0e84293ead
@@ -1 +1 @@
-upper(str) - Returns str with all characters changed to uppercase
\ No newline at end of file
+upper(str) - Returns str with all characters changed to uppercase
diff --git a/sql/hive/src/test/resources/golden/udf_upper-1-f81f11c3563dbc9ff80496c4b10bcd1d b/sql/hive/src/test/resources/golden/udf_upper-1-f81f11c3563dbc9ff80496c4b10bcd1d
index 673307e284b2e..0fb1bf1823d84 100644
--- a/sql/hive/src/test/resources/golden/udf_upper-1-f81f11c3563dbc9ff80496c4b10bcd1d
+++ b/sql/hive/src/test/resources/golden/udf_upper-1-f81f11c3563dbc9ff80496c4b10bcd1d
@@ -2,4 +2,4 @@ upper(str) - Returns str with all characters changed to uppercase
 Synonyms: ucase
 Example:
   > SELECT upper('Facebook') FROM src LIMIT 1;
-  'FACEBOOK'
\ No newline at end of file
+  'FACEBOOK'
diff --git a/sql/hive/src/test/resources/golden/udf_var_pop-0-3187e740690ccc1988a19fea4202a6de b/sql/hive/src/test/resources/golden/udf_var_pop-0-3187e740690ccc1988a19fea4202a6de
index 9dbf241e28107..79e293fd4ed52 100644
--- a/sql/hive/src/test/resources/golden/udf_var_pop-0-3187e740690ccc1988a19fea4202a6de
+++ b/sql/hive/src/test/resources/golden/udf_var_pop-0-3187e740690ccc1988a19fea4202a6de
@@ -1 +1 @@
-var_pop(x) - Returns the variance of a set of numbers
\ No newline at end of file
+var_pop(x) - Returns the variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_var_pop-1-fd25e5226312bf54d918858511814766 b/sql/hive/src/test/resources/golden/udf_var_pop-1-fd25e5226312bf54d918858511814766
index 234a65bd8255d..4b4de5db812e9 100644
--- a/sql/hive/src/test/resources/golden/udf_var_pop-1-fd25e5226312bf54d918858511814766
+++ b/sql/hive/src/test/resources/golden/udf_var_pop-1-fd25e5226312bf54d918858511814766
@@ -1,2 +1,2 @@
 var_pop(x) - Returns the variance of a set of numbers
-Synonyms: variance
\ No newline at end of file
+Synonyms: variance
diff --git a/sql/hive/src/test/resources/golden/udf_var_samp-0-b918928871d1b7f944315558c230c229 b/sql/hive/src/test/resources/golden/udf_var_samp-0-b918928871d1b7f944315558c230c229
index 673deeb031b44..42a442f7a2ded 100644
--- a/sql/hive/src/test/resources/golden/udf_var_samp-0-b918928871d1b7f944315558c230c229
+++ b/sql/hive/src/test/resources/golden/udf_var_samp-0-b918928871d1b7f944315558c230c229
@@ -1 +1 @@
-var_samp(x) - Returns the sample variance of a set of numbers
\ No newline at end of file
+var_samp(x) - Returns the sample variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_var_samp-1-59032ed5856fd4aa17c3e8e6721eec2b b/sql/hive/src/test/resources/golden/udf_var_samp-1-59032ed5856fd4aa17c3e8e6721eec2b
index 673deeb031b44..42a442f7a2ded 100644
--- a/sql/hive/src/test/resources/golden/udf_var_samp-1-59032ed5856fd4aa17c3e8e6721eec2b
+++ b/sql/hive/src/test/resources/golden/udf_var_samp-1-59032ed5856fd4aa17c3e8e6721eec2b
@@ -1 +1 @@
-var_samp(x) - Returns the sample variance of a set of numbers
\ No newline at end of file
+var_samp(x) - Returns the sample variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_var_samp-2-b918928871d1b7f944315558c230c229 b/sql/hive/src/test/resources/golden/udf_var_samp-2-b918928871d1b7f944315558c230c229
index 673deeb031b44..42a442f7a2ded 100644
--- a/sql/hive/src/test/resources/golden/udf_var_samp-2-b918928871d1b7f944315558c230c229
+++ b/sql/hive/src/test/resources/golden/udf_var_samp-2-b918928871d1b7f944315558c230c229
@@ -1 +1 @@
-var_samp(x) - Returns the sample variance of a set of numbers
\ No newline at end of file
+var_samp(x) - Returns the sample variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_var_samp-3-59032ed5856fd4aa17c3e8e6721eec2b b/sql/hive/src/test/resources/golden/udf_var_samp-3-59032ed5856fd4aa17c3e8e6721eec2b
index 673deeb031b44..42a442f7a2ded 100644
--- a/sql/hive/src/test/resources/golden/udf_var_samp-3-59032ed5856fd4aa17c3e8e6721eec2b
+++ b/sql/hive/src/test/resources/golden/udf_var_samp-3-59032ed5856fd4aa17c3e8e6721eec2b
@@ -1 +1 @@
-var_samp(x) - Returns the sample variance of a set of numbers
\ No newline at end of file
+var_samp(x) - Returns the sample variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_variance-0-fd23be1baa8b5ffa0d4519560d3fca87 b/sql/hive/src/test/resources/golden/udf_variance-0-fd23be1baa8b5ffa0d4519560d3fca87
index f58ae0769d5a9..260300b7d3bd9 100644
--- a/sql/hive/src/test/resources/golden/udf_variance-0-fd23be1baa8b5ffa0d4519560d3fca87
+++ b/sql/hive/src/test/resources/golden/udf_variance-0-fd23be1baa8b5ffa0d4519560d3fca87
@@ -1 +1 @@
-variance(x) - Returns the variance of a set of numbers
\ No newline at end of file
+variance(x) - Returns the variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_variance-1-c1856abae908b05bfd6183189b4fd06a b/sql/hive/src/test/resources/golden/udf_variance-1-c1856abae908b05bfd6183189b4fd06a
index e17528e607aba..b14cf43bc0a95 100644
--- a/sql/hive/src/test/resources/golden/udf_variance-1-c1856abae908b05bfd6183189b4fd06a
+++ b/sql/hive/src/test/resources/golden/udf_variance-1-c1856abae908b05bfd6183189b4fd06a
@@ -1,2 +1,2 @@
 variance(x) - Returns the variance of a set of numbers
-Synonyms: var_pop
\ No newline at end of file
+Synonyms: var_pop
diff --git a/sql/hive/src/test/resources/golden/udf_variance-2-3187e740690ccc1988a19fea4202a6de b/sql/hive/src/test/resources/golden/udf_variance-2-3187e740690ccc1988a19fea4202a6de
index 9dbf241e28107..79e293fd4ed52 100644
--- a/sql/hive/src/test/resources/golden/udf_variance-2-3187e740690ccc1988a19fea4202a6de
+++ b/sql/hive/src/test/resources/golden/udf_variance-2-3187e740690ccc1988a19fea4202a6de
@@ -1 +1 @@
-var_pop(x) - Returns the variance of a set of numbers
\ No newline at end of file
+var_pop(x) - Returns the variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_variance-3-fd25e5226312bf54d918858511814766 b/sql/hive/src/test/resources/golden/udf_variance-3-fd25e5226312bf54d918858511814766
index 234a65bd8255d..4b4de5db812e9 100644
--- a/sql/hive/src/test/resources/golden/udf_variance-3-fd25e5226312bf54d918858511814766
+++ b/sql/hive/src/test/resources/golden/udf_variance-3-fd25e5226312bf54d918858511814766
@@ -1,2 +1,2 @@
 var_pop(x) - Returns the variance of a set of numbers
-Synonyms: variance
\ No newline at end of file
+Synonyms: variance
diff --git a/sql/hive/src/test/resources/golden/udf_variance-4-fd23be1baa8b5ffa0d4519560d3fca87 b/sql/hive/src/test/resources/golden/udf_variance-4-fd23be1baa8b5ffa0d4519560d3fca87
index f58ae0769d5a9..260300b7d3bd9 100644
--- a/sql/hive/src/test/resources/golden/udf_variance-4-fd23be1baa8b5ffa0d4519560d3fca87
+++ b/sql/hive/src/test/resources/golden/udf_variance-4-fd23be1baa8b5ffa0d4519560d3fca87
@@ -1 +1 @@
-variance(x) - Returns the variance of a set of numbers
\ No newline at end of file
+variance(x) - Returns the variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_variance-5-c1856abae908b05bfd6183189b4fd06a b/sql/hive/src/test/resources/golden/udf_variance-5-c1856abae908b05bfd6183189b4fd06a
index e17528e607aba..b14cf43bc0a95 100644
--- a/sql/hive/src/test/resources/golden/udf_variance-5-c1856abae908b05bfd6183189b4fd06a
+++ b/sql/hive/src/test/resources/golden/udf_variance-5-c1856abae908b05bfd6183189b4fd06a
@@ -1,2 +1,2 @@
 variance(x) - Returns the variance of a set of numbers
-Synonyms: var_pop
\ No newline at end of file
+Synonyms: var_pop
diff --git a/sql/hive/src/test/resources/golden/udf_variance-6-3187e740690ccc1988a19fea4202a6de b/sql/hive/src/test/resources/golden/udf_variance-6-3187e740690ccc1988a19fea4202a6de
index 9dbf241e28107..79e293fd4ed52 100644
--- a/sql/hive/src/test/resources/golden/udf_variance-6-3187e740690ccc1988a19fea4202a6de
+++ b/sql/hive/src/test/resources/golden/udf_variance-6-3187e740690ccc1988a19fea4202a6de
@@ -1 +1 @@
-var_pop(x) - Returns the variance of a set of numbers
\ No newline at end of file
+var_pop(x) - Returns the variance of a set of numbers
diff --git a/sql/hive/src/test/resources/golden/udf_variance-7-fd25e5226312bf54d918858511814766 b/sql/hive/src/test/resources/golden/udf_variance-7-fd25e5226312bf54d918858511814766
index 234a65bd8255d..4b4de5db812e9 100644
--- a/sql/hive/src/test/resources/golden/udf_variance-7-fd25e5226312bf54d918858511814766
+++ b/sql/hive/src/test/resources/golden/udf_variance-7-fd25e5226312bf54d918858511814766
@@ -1,2 +1,2 @@
 var_pop(x) - Returns the variance of a set of numbers
-Synonyms: variance
\ No newline at end of file
+Synonyms: variance
diff --git a/sql/hive/src/test/resources/golden/rcfile_bigdata-0-e011be1172043c0c6d0fd2c0e89f361e b/sql/hive/src/test/resources/golden/udf_weekofyear-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_bigdata-0-e011be1172043c0c6d0fd2c0e89f361e
rename to sql/hive/src/test/resources/golden/udf_weekofyear-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_weekofyear-0-d6b4490b549a358be375511e39627dc2 b/sql/hive/src/test/resources/golden/udf_weekofyear-0-d6b4490b549a358be375511e39627dc2
deleted file mode 100644
index 29e3b370b03b8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_weekofyear-0-d6b4490b549a358be375511e39627dc2
+++ /dev/null
@@ -1 +0,0 @@
-weekofyear(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_weekofyear-1-b7bbdfabe6054a66701250fd70065ddd b/sql/hive/src/test/resources/golden/udf_weekofyear-1-b7bbdfabe6054a66701250fd70065ddd
deleted file mode 100644
index c7939a11937c5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_weekofyear-1-b7bbdfabe6054a66701250fd70065ddd
+++ /dev/null
@@ -1,6 +0,0 @@
-weekofyear(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.
-Examples:
-  > SELECT weekofyear('2008-02-20') FROM src LIMIT 1;
-  8
-  > SELECT weekofyear('1980-12-31 12:59:59') FROM src LIMIT 1;
-  1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_weekofyear-1-d6b4490b549a358be375511e39627dc2 b/sql/hive/src/test/resources/golden/udf_weekofyear-1-d6b4490b549a358be375511e39627dc2
new file mode 100644
index 0000000000000..dca04d210339a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_weekofyear-1-d6b4490b549a358be375511e39627dc2
@@ -0,0 +1 @@
+weekofyear(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.
diff --git a/sql/hive/src/test/resources/golden/udf_weekofyear-2-57ec578b3e226b6971e0fc0694b513d6 b/sql/hive/src/test/resources/golden/udf_weekofyear-2-57ec578b3e226b6971e0fc0694b513d6
deleted file mode 100644
index 20d1dc50ce1f7..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_weekofyear-2-57ec578b3e226b6971e0fc0694b513d6
+++ /dev/null
@@ -1 +0,0 @@
-1	1	2	1	52	8	52	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_weekofyear-2-b7bbdfabe6054a66701250fd70065ddd b/sql/hive/src/test/resources/golden/udf_weekofyear-2-b7bbdfabe6054a66701250fd70065ddd
new file mode 100644
index 0000000000000..337257dabd8d1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_weekofyear-2-b7bbdfabe6054a66701250fd70065ddd
@@ -0,0 +1,6 @@
+weekofyear(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.
+Examples:
+  > SELECT weekofyear('2008-02-20') FROM src LIMIT 1;
+  8
+  > SELECT weekofyear('1980-12-31 12:59:59') FROM src LIMIT 1;
+  1
diff --git a/sql/hive/src/test/resources/golden/udf_weekofyear-3-d5dd3abb6c8c7046a85dd05f51126285 b/sql/hive/src/test/resources/golden/udf_weekofyear-3-d5dd3abb6c8c7046a85dd05f51126285
new file mode 100644
index 0000000000000..9d0163569f7b3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_weekofyear-3-d5dd3abb6c8c7046a85dd05f51126285
@@ -0,0 +1 @@
+1	1	2	1	52	8	52	1
diff --git a/sql/hive/src/test/resources/golden/rcfile_bigdata-1-6d0d6f4de136f56ab91987e19df8e178 b/sql/hive/src/test/resources/golden/udf_when-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_bigdata-1-6d0d6f4de136f56ab91987e19df8e178
rename to sql/hive/src/test/resources/golden/udf_when-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_when-0-88b97c6722176393e9b3d089559d2d11 b/sql/hive/src/test/resources/golden/udf_when-0-88b97c6722176393e9b3d089559d2d11
deleted file mode 100644
index 2e36162dbaa33..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_when-0-88b97c6722176393e9b3d089559d2d11
+++ /dev/null
@@ -1 +0,0 @@
-There is no documentation for function 'when'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_when-1-7365d5fe39dc7a025c942dad8fb9f0d4 b/sql/hive/src/test/resources/golden/udf_when-1-7365d5fe39dc7a025c942dad8fb9f0d4
deleted file mode 100644
index 2e36162dbaa33..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_when-1-7365d5fe39dc7a025c942dad8fb9f0d4
+++ /dev/null
@@ -1 +0,0 @@
-There is no documentation for function 'when'
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_when-1-88b97c6722176393e9b3d089559d2d11 b/sql/hive/src/test/resources/golden/udf_when-1-88b97c6722176393e9b3d089559d2d11
new file mode 100644
index 0000000000000..3626da8447ca0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_when-1-88b97c6722176393e9b3d089559d2d11
@@ -0,0 +1 @@
+There is no documentation for function 'when'
diff --git a/sql/hive/src/test/resources/golden/udf_when-2-7365d5fe39dc7a025c942dad8fb9f0d4 b/sql/hive/src/test/resources/golden/udf_when-2-7365d5fe39dc7a025c942dad8fb9f0d4
new file mode 100644
index 0000000000000..3626da8447ca0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_when-2-7365d5fe39dc7a025c942dad8fb9f0d4
@@ -0,0 +1 @@
+There is no documentation for function 'when'
diff --git a/sql/hive/src/test/resources/golden/udf_when-2-ff1118e7d06a4725e1e98a6d70a59295 b/sql/hive/src/test/resources/golden/udf_when-2-ff1118e7d06a4725e1e98a6d70a59295
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input39_hadoop20-9-28bf1b34c04a048da339eddd4c1fd779 b/sql/hive/src/test/resources/golden/udf_when-3-734890c41528b9d918db66b0582228a4
similarity index 100%
rename from sql/hive/src/test/resources/golden/input39_hadoop20-9-28bf1b34c04a048da339eddd4c1fd779
rename to sql/hive/src/test/resources/golden/udf_when-3-734890c41528b9d918db66b0582228a4
diff --git a/sql/hive/src/test/resources/golden/udf_when-3-e63043e8d6ecf300c1fcf8654176896f b/sql/hive/src/test/resources/golden/udf_when-3-e63043e8d6ecf300c1fcf8654176896f
deleted file mode 100644
index 6929fdca28eb8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_when-3-e63043e8d6ecf300c1fcf8654176896f
+++ /dev/null
@@ -1 +0,0 @@
-2	9	14	NULL	24	NULL
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_when-4-c57d6eb11efc29ce3a9c450488f3d750 b/sql/hive/src/test/resources/golden/udf_when-4-c57d6eb11efc29ce3a9c450488f3d750
new file mode 100644
index 0000000000000..b28b0050514f3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_when-4-c57d6eb11efc29ce3a9c450488f3d750
@@ -0,0 +1 @@
+2	9	14	NULL	24	NULL
diff --git a/sql/hive/src/test/resources/golden/udf_when-5-6ed21e998c0fc32c39f6375136f55de6 b/sql/hive/src/test/resources/golden/udf_when-5-6ed21e998c0fc32c39f6375136f55de6
new file mode 100644
index 0000000000000..db46b5a72aa80
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_when-5-6ed21e998c0fc32c39f6375136f55de6
@@ -0,0 +1 @@
+123	123.0	abcd
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-0-d877ca1eefa4344eae30ad3ef2039b00 b/sql/hive/src/test/resources/golden/udf_xpath-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge1-0-d877ca1eefa4344eae30ad3ef2039b00
rename to sql/hive/src/test/resources/golden/udf_xpath-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-0-622670bd1cbf4bf0cf04a553006e3c8 b/sql/hive/src/test/resources/golden/udf_xpath-0-622670bd1cbf4bf0cf04a553006e3c8
deleted file mode 100644
index 18d9720653a5b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath-0-622670bd1cbf4bf0cf04a553006e3c8
+++ /dev/null
@@ -1 +0,0 @@
-xpath(xml, xpath) - Returns a string array of values within xml nodes that match the xpath expression
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-1-5a6f10392053a4eabe62f5cabb073a71 b/sql/hive/src/test/resources/golden/udf_xpath-1-5a6f10392053a4eabe62f5cabb073a71
deleted file mode 100644
index 3b092dafd32c2..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath-1-5a6f10392053a4eabe62f5cabb073a71
+++ /dev/null
@@ -1,8 +0,0 @@
-xpath(xml, xpath) - Returns a string array of values within xml nodes that match the xpath expression
-Example:
-  > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/text()') FROM src LIMIT 1
-  []
-  > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/b/text()') FROM src LIMIT 1
-  ["b1","b2","b3"]
-  > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/c/text()') FROM src LIMIT 1
-  ["c1","c2"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-1-622670bd1cbf4bf0cf04a553006e3c8 b/sql/hive/src/test/resources/golden/udf_xpath-1-622670bd1cbf4bf0cf04a553006e3c8
new file mode 100644
index 0000000000000..d53b034f8fc6d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath-1-622670bd1cbf4bf0cf04a553006e3c8
@@ -0,0 +1 @@
+xpath(xml, xpath) - Returns a string array of values within xml nodes that match the xpath expression
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-2-5a6f10392053a4eabe62f5cabb073a71 b/sql/hive/src/test/resources/golden/udf_xpath-2-5a6f10392053a4eabe62f5cabb073a71
new file mode 100644
index 0000000000000..bff795649a7ea
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath-2-5a6f10392053a4eabe62f5cabb073a71
@@ -0,0 +1,8 @@
+xpath(xml, xpath) - Returns a string array of values within xml nodes that match the xpath expression
+Example:
+  > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/text()') FROM src LIMIT 1
+  []
+  > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/b/text()') FROM src LIMIT 1
+  ["b1","b2","b3"]
+  > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/c/text()') FROM src LIMIT 1
+  ["c1","c2"]
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-2-6b0a9d3874868d88d54ae133c978753d b/sql/hive/src/test/resources/golden/udf_xpath-2-6b0a9d3874868d88d54ae133c978753d
deleted file mode 100644
index 0637a088a01e8..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath-2-6b0a9d3874868d88d54ae133c978753d
+++ /dev/null
@@ -1 +0,0 @@
-[]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-3-5700d81a9c2a22dcca287bf8439277ea b/sql/hive/src/test/resources/golden/udf_xpath-3-5700d81a9c2a22dcca287bf8439277ea
deleted file mode 100644
index 8e8aff2387621..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath-3-5700d81a9c2a22dcca287bf8439277ea
+++ /dev/null
@@ -1 +0,0 @@
-["b1","b2","b3","c1","c2"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-3-f0b9adf99c68290c86d0f40f45111e18 b/sql/hive/src/test/resources/golden/udf_xpath-3-f0b9adf99c68290c86d0f40f45111e18
new file mode 100644
index 0000000000000..fe51488c7066f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath-3-f0b9adf99c68290c86d0f40f45111e18
@@ -0,0 +1 @@
+[]
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-4-2960b453dd1dcf9ab7d4156c717d9e0a b/sql/hive/src/test/resources/golden/udf_xpath-4-2960b453dd1dcf9ab7d4156c717d9e0a
deleted file mode 100644
index e518b1f37e124..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath-4-2960b453dd1dcf9ab7d4156c717d9e0a
+++ /dev/null
@@ -1 +0,0 @@
-["b1","b2","b3"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-4-6d5e7ed902ac051f0cdba43d7a30434e b/sql/hive/src/test/resources/golden/udf_xpath-4-6d5e7ed902ac051f0cdba43d7a30434e
new file mode 100644
index 0000000000000..96e245a7eb37c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath-4-6d5e7ed902ac051f0cdba43d7a30434e
@@ -0,0 +1 @@
+["b1","b2","b3","c1","c2"]
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-5-7395e1cd3b543316a753978f556975e0 b/sql/hive/src/test/resources/golden/udf_xpath-5-7395e1cd3b543316a753978f556975e0
deleted file mode 100644
index 45615b772ba35..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath-5-7395e1cd3b543316a753978f556975e0
+++ /dev/null
@@ -1 +0,0 @@
-["c1","c2"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-5-b66a64f91dd812fda2859863855988b5 b/sql/hive/src/test/resources/golden/udf_xpath-5-b66a64f91dd812fda2859863855988b5
new file mode 100644
index 0000000000000..44def14ea2c68
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath-5-b66a64f91dd812fda2859863855988b5
@@ -0,0 +1 @@
+["b1","b2","b3"]
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-6-108134b2ae4a8de5f6257d6c6391fb3f b/sql/hive/src/test/resources/golden/udf_xpath-6-108134b2ae4a8de5f6257d6c6391fb3f
deleted file mode 100644
index 327bf3a1e76cc..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath-6-108134b2ae4a8de5f6257d6c6391fb3f
+++ /dev/null
@@ -1 +0,0 @@
-["b1","c1"]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-6-d4f95ebc0057639138900722c74ee17a b/sql/hive/src/test/resources/golden/udf_xpath-6-d4f95ebc0057639138900722c74ee17a
new file mode 100644
index 0000000000000..6a97cc328aa70
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath-6-d4f95ebc0057639138900722c74ee17a
@@ -0,0 +1 @@
+["c1","c2"]
diff --git a/sql/hive/src/test/resources/golden/udf_xpath-7-6b8fceac3bd654662f067da60670e1d9 b/sql/hive/src/test/resources/golden/udf_xpath-7-6b8fceac3bd654662f067da60670e1d9
new file mode 100644
index 0000000000000..8b4abae5acff8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath-7-6b8fceac3bd654662f067da60670e1d9
@@ -0,0 +1 @@
+["b1","c1"]
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-1-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge1-1-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/udf_xpath_boolean-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-0-8b2f3f8132bfc8344e243cdaf45eb371 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-0-8b2f3f8132bfc8344e243cdaf45eb371
deleted file mode 100644
index 134e9e99b7423..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_boolean-0-8b2f3f8132bfc8344e243cdaf45eb371
+++ /dev/null
@@ -1 +0,0 @@
-xpath_boolean(xml, xpath) - Evaluates a boolean xpath expression
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-1-8b2f3f8132bfc8344e243cdaf45eb371 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-1-8b2f3f8132bfc8344e243cdaf45eb371
new file mode 100644
index 0000000000000..d47e3fe7d34a1
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_boolean-1-8b2f3f8132bfc8344e243cdaf45eb371
@@ -0,0 +1 @@
+xpath_boolean(xml, xpath) - Evaluates a boolean xpath expression
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-1-9e935539c9a3f3a118145096e7f978c4 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-1-9e935539c9a3f3a118145096e7f978c4
deleted file mode 100644
index 624889cc0ac67..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_boolean-1-9e935539c9a3f3a118145096e7f978c4
+++ /dev/null
@@ -1,6 +0,0 @@
-xpath_boolean(xml, xpath) - Evaluates a boolean xpath expression
-Example:
-  > SELECT xpath_boolean('<a><b>1</b></a>','a/b') FROM src LIMIT 1;
-  true
-  > SELECT xpath_boolean('<a><b>1</b></a>','a/b = 2') FROM src LIMIT 1;
-  false
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-2-66148c16fde3008348a24cec5d9ba02b b/sql/hive/src/test/resources/golden/udf_xpath_boolean-2-66148c16fde3008348a24cec5d9ba02b
deleted file mode 100644
index f32a5804e292d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_boolean-2-66148c16fde3008348a24cec5d9ba02b
+++ /dev/null
@@ -1 +0,0 @@
-true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-2-9e935539c9a3f3a118145096e7f978c4 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-2-9e935539c9a3f3a118145096e7f978c4
new file mode 100644
index 0000000000000..4f3c2652aad11
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_boolean-2-9e935539c9a3f3a118145096e7f978c4
@@ -0,0 +1,6 @@
+xpath_boolean(xml, xpath) - Evaluates a boolean xpath expression
+Example:
+  > SELECT xpath_boolean('<a><b>1</b></a>','a/b') FROM src LIMIT 1;
+  true
+  > SELECT xpath_boolean('<a><b>1</b></a>','a/b = 2') FROM src LIMIT 1;
+  false
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-3-9b97a08303a7a89e7575687f6d7ba435 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-3-9b97a08303a7a89e7575687f6d7ba435
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_boolean-3-9b97a08303a7a89e7575687f6d7ba435
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-3-c6cf7ede46f0412fe7a37ac52061b060 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-3-c6cf7ede46f0412fe7a37ac52061b060
deleted file mode 100644
index 02e4a84d62c4b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_boolean-3-c6cf7ede46f0412fe7a37ac52061b060
+++ /dev/null
@@ -1 +0,0 @@
-false
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-17-85342c694d7f35e7eedb24e850d0c7df b/sql/hive/src/test/resources/golden/udf_xpath_boolean-4-284ce7d6fc6850ca853111145784286b
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-17-85342c694d7f35e7eedb24e850d0c7df
rename to sql/hive/src/test/resources/golden/udf_xpath_boolean-4-284ce7d6fc6850ca853111145784286b
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-4-45d2e8baee72a0d741eb0a976af3a965 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-4-45d2e8baee72a0d741eb0a976af3a965
deleted file mode 100644
index f32a5804e292d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_boolean-4-45d2e8baee72a0d741eb0a976af3a965
+++ /dev/null
@@ -1 +0,0 @@
-true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-5-95a0dfa897ba9050ad751a78aeb72f3d b/sql/hive/src/test/resources/golden/udf_xpath_boolean-5-95a0dfa897ba9050ad751a78aeb72f3d
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_boolean-5-95a0dfa897ba9050ad751a78aeb72f3d
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-5-dbc57416174a6ba26f29dfc13f91f302 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-5-dbc57416174a6ba26f29dfc13f91f302
deleted file mode 100644
index 02e4a84d62c4b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_boolean-5-dbc57416174a6ba26f29dfc13f91f302
+++ /dev/null
@@ -1 +0,0 @@
-false
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-6-45d3c4fa4f86577fc26e9de7fc552e8f b/sql/hive/src/test/resources/golden/udf_xpath_boolean-6-45d3c4fa4f86577fc26e9de7fc552e8f
deleted file mode 100644
index 02e4a84d62c4b..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_boolean-6-45d3c4fa4f86577fc26e9de7fc552e8f
+++ /dev/null
@@ -1 +0,0 @@
-false
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_to_boolean-9-be623247e4dbf119b43458b72d1be017 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-6-796c717c9d52d3efa374d12fe65259e6
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_to_boolean-9-be623247e4dbf119b43458b72d1be017
rename to sql/hive/src/test/resources/golden/udf_xpath_boolean-6-796c717c9d52d3efa374d12fe65259e6
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-7-824c07ed4ef40cd509fea55575e43303 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-7-824c07ed4ef40cd509fea55575e43303
deleted file mode 100644
index f32a5804e292d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_boolean-7-824c07ed4ef40cd509fea55575e43303
+++ /dev/null
@@ -1 +0,0 @@
-true
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-7-cc3ae9a7505e04a2e9b950442a81a559 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-7-cc3ae9a7505e04a2e9b950442a81a559
new file mode 100644
index 0000000000000..c508d5366f70b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_boolean-7-cc3ae9a7505e04a2e9b950442a81a559
@@ -0,0 +1 @@
+false
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_boolean-8-62a1af222d4e12c59cef71b979c6e58 b/sql/hive/src/test/resources/golden/udf_xpath_boolean-8-62a1af222d4e12c59cef71b979c6e58
new file mode 100644
index 0000000000000..27ba77ddaf615
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_boolean-8-62a1af222d4e12c59cef71b979c6e58
@@ -0,0 +1 @@
+true
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-0-39199612969071d58b24034a2d17ca67 b/sql/hive/src/test/resources/golden/udf_xpath_double-0-39199612969071d58b24034a2d17ca67
deleted file mode 100644
index b56f4d00a8a74..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-0-39199612969071d58b24034a2d17ca67
+++ /dev/null
@@ -1 +0,0 @@
-xpath_number(xml, xpath) - Returns a double value that matches the xpath expression
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-10-f94fdb0b79dcf73989e6fbce87355753 b/sql/hive/src/test/resources/golden/udf_xpath_double-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge1-10-f94fdb0b79dcf73989e6fbce87355753
rename to sql/hive/src/test/resources/golden/udf_xpath_double-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-1-2d5ceab0a14d6e837ed153e1a2235bb2 b/sql/hive/src/test/resources/golden/udf_xpath_double-1-2d5ceab0a14d6e837ed153e1a2235bb2
deleted file mode 100644
index b6b6227174647..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-1-2d5ceab0a14d6e837ed153e1a2235bb2
+++ /dev/null
@@ -1,5 +0,0 @@
-xpath_number(xml, xpath) - Returns a double value that matches the xpath expression
-Synonyms: xpath_double
-Example:
-  > SELECT xpath_number('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
-  3.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-1-39199612969071d58b24034a2d17ca67 b/sql/hive/src/test/resources/golden/udf_xpath_double-1-39199612969071d58b24034a2d17ca67
new file mode 100644
index 0000000000000..a9cb2ecb60e28
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-1-39199612969071d58b24034a2d17ca67
@@ -0,0 +1 @@
+xpath_number(xml, xpath) - Returns a double value that matches the xpath expression
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-10-ad0f9117f6f52590d38e619e998a6648 b/sql/hive/src/test/resources/golden/udf_xpath_double-10-ad0f9117f6f52590d38e619e998a6648
new file mode 100644
index 0000000000000..fe6b09a7d14cc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-10-ad0f9117f6f52590d38e619e998a6648
@@ -0,0 +1 @@
+15.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-10-fe9ae5013ee4f11b357128ee5ffb56c0 b/sql/hive/src/test/resources/golden/udf_xpath_double-10-fe9ae5013ee4f11b357128ee5ffb56c0
deleted file mode 100644
index 7104585f9a439..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-10-fe9ae5013ee4f11b357128ee5ffb56c0
+++ /dev/null
@@ -1 +0,0 @@
-7.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-11-a1960676c1b2dc76aa178ea288d801d2 b/sql/hive/src/test/resources/golden/udf_xpath_double-11-a1960676c1b2dc76aa178ea288d801d2
deleted file mode 100644
index 6e6366051638f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-11-a1960676c1b2dc76aa178ea288d801d2
+++ /dev/null
@@ -1 +0,0 @@
-5.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-11-bf1f96ce71879c17d91ac9df44c36d29 b/sql/hive/src/test/resources/golden/udf_xpath_double-11-bf1f96ce71879c17d91ac9df44c36d29
new file mode 100644
index 0000000000000..4fedf1d20e157
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-11-bf1f96ce71879c17d91ac9df44c36d29
@@ -0,0 +1 @@
+7.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-12-9621353ce5c583ca34216d357c5bb0eb b/sql/hive/src/test/resources/golden/udf_xpath_double-12-9621353ce5c583ca34216d357c5bb0eb
new file mode 100644
index 0000000000000..819e07a22435f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-12-9621353ce5c583ca34216d357c5bb0eb
@@ -0,0 +1 @@
+5.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-2-2d5ceab0a14d6e837ed153e1a2235bb2 b/sql/hive/src/test/resources/golden/udf_xpath_double-2-2d5ceab0a14d6e837ed153e1a2235bb2
new file mode 100644
index 0000000000000..0f303917eb1bd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-2-2d5ceab0a14d6e837ed153e1a2235bb2
@@ -0,0 +1,5 @@
+xpath_number(xml, xpath) - Returns a double value that matches the xpath expression
+Synonyms: xpath_double
+Example:
+  > SELECT xpath_number('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
+  3.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-2-a4d22cea9dffaf26b485b3eb08963789 b/sql/hive/src/test/resources/golden/udf_xpath_double-2-a4d22cea9dffaf26b485b3eb08963789
deleted file mode 100644
index 5b6de7db2f9f3..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-2-a4d22cea9dffaf26b485b3eb08963789
+++ /dev/null
@@ -1 +0,0 @@
-xpath_double(xml, xpath) - Returns a double value that matches the xpath expression
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-3-a4d22cea9dffaf26b485b3eb08963789 b/sql/hive/src/test/resources/golden/udf_xpath_double-3-a4d22cea9dffaf26b485b3eb08963789
new file mode 100644
index 0000000000000..20ff7713a919d
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-3-a4d22cea9dffaf26b485b3eb08963789
@@ -0,0 +1 @@
+xpath_double(xml, xpath) - Returns a double value that matches the xpath expression
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-3-ea050c5b9e74d4b87b037236ef9e4fc2 b/sql/hive/src/test/resources/golden/udf_xpath_double-3-ea050c5b9e74d4b87b037236ef9e4fc2
deleted file mode 100644
index b8accbcae1e17..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-3-ea050c5b9e74d4b87b037236ef9e4fc2
+++ /dev/null
@@ -1,5 +0,0 @@
-xpath_double(xml, xpath) - Returns a double value that matches the xpath expression
-Synonyms: xpath_number
-Example:
-  > SELECT xpath_double('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
-  3.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-4-93f47057c68385cff3b6f5c42307590c b/sql/hive/src/test/resources/golden/udf_xpath_double-4-93f47057c68385cff3b6f5c42307590c
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-4-93f47057c68385cff3b6f5c42307590c
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-4-ea050c5b9e74d4b87b037236ef9e4fc2 b/sql/hive/src/test/resources/golden/udf_xpath_double-4-ea050c5b9e74d4b87b037236ef9e4fc2
new file mode 100644
index 0000000000000..89c174f371b88
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-4-ea050c5b9e74d4b87b037236ef9e4fc2
@@ -0,0 +1,5 @@
+xpath_double(xml, xpath) - Returns a double value that matches the xpath expression
+Synonyms: xpath_number
+Example:
+  > SELECT xpath_double('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
+  3.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-5-1e0514d71f99da09e01a414a4e01d046 b/sql/hive/src/test/resources/golden/udf_xpath_double-5-1e0514d71f99da09e01a414a4e01d046
new file mode 100644
index 0000000000000..736991a138745
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-5-1e0514d71f99da09e01a414a4e01d046
@@ -0,0 +1 @@
+NaN
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-5-c811a2353f5baf585da8654acd13b0e5 b/sql/hive/src/test/resources/golden/udf_xpath_double-5-c811a2353f5baf585da8654acd13b0e5
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-5-c811a2353f5baf585da8654acd13b0e5
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-6-10fcb281fd8db12dd1ac41380b8030c6 b/sql/hive/src/test/resources/golden/udf_xpath_double-6-10fcb281fd8db12dd1ac41380b8030c6
deleted file mode 100644
index 0a8ac8c629540..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-6-10fcb281fd8db12dd1ac41380b8030c6
+++ /dev/null
@@ -1 +0,0 @@
-8.0E19
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-6-6a3985167fedd579f7bd5745133a3524 b/sql/hive/src/test/resources/golden/udf_xpath_double-6-6a3985167fedd579f7bd5745133a3524
new file mode 100644
index 0000000000000..736991a138745
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-6-6a3985167fedd579f7bd5745133a3524
@@ -0,0 +1 @@
+NaN
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-7-4532934141d2adb1f4ff56e65e8cf14c b/sql/hive/src/test/resources/golden/udf_xpath_double-7-4532934141d2adb1f4ff56e65e8cf14c
deleted file mode 100644
index 171538eb0b00f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-7-4532934141d2adb1f4ff56e65e8cf14c
+++ /dev/null
@@ -1 +0,0 @@
-0.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-7-d97e93fb4b69522300f505e04b6674c8 b/sql/hive/src/test/resources/golden/udf_xpath_double-7-d97e93fb4b69522300f505e04b6674c8
new file mode 100644
index 0000000000000..f65000a36a478
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-7-d97e93fb4b69522300f505e04b6674c8
@@ -0,0 +1 @@
+8.0E19
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-8-547e750f5e401511db56283e66d1231d b/sql/hive/src/test/resources/golden/udf_xpath_double-8-547e750f5e401511db56283e66d1231d
deleted file mode 100644
index 9f8e9b69a33f4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-8-547e750f5e401511db56283e66d1231d
+++ /dev/null
@@ -1 +0,0 @@
-1.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-8-fce6cafa07b75c9843c1c1964e84fc10 b/sql/hive/src/test/resources/golden/udf_xpath_double-8-fce6cafa07b75c9843c1c1964e84fc10
new file mode 100644
index 0000000000000..ba66466c2a0d0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-8-fce6cafa07b75c9843c1c1964e84fc10
@@ -0,0 +1 @@
+0.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-9-b45b8b60031ac43ed2ffcd883ba2f19e b/sql/hive/src/test/resources/golden/udf_xpath_double-9-b45b8b60031ac43ed2ffcd883ba2f19e
deleted file mode 100644
index 3d3be3c32ed85..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_double-9-b45b8b60031ac43ed2ffcd883ba2f19e
+++ /dev/null
@@ -1 +0,0 @@
-15.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_double-9-f27361521c35bf4f6581dba4c185d550 b/sql/hive/src/test/resources/golden/udf_xpath_double-9-f27361521c35bf4f6581dba4c185d550
new file mode 100644
index 0000000000000..d3827e75a5cad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_double-9-f27361521c35bf4f6581dba4c185d550
@@ -0,0 +1 @@
+1.0
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-2-690b2898f94ef32f01ded0ddb737a056 b/sql/hive/src/test/resources/golden/udf_xpath_float-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge1-2-690b2898f94ef32f01ded0ddb737a056
rename to sql/hive/src/test/resources/golden/udf_xpath_float-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-0-7483dafee0dc7334eecabba31977e791 b/sql/hive/src/test/resources/golden/udf_xpath_float-0-7483dafee0dc7334eecabba31977e791
deleted file mode 100644
index ea4df0ae58e12..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-0-7483dafee0dc7334eecabba31977e791
+++ /dev/null
@@ -1 +0,0 @@
-xpath_float(xml, xpath) - Returns a float value that matches the xpath expression
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-1-7483dafee0dc7334eecabba31977e791 b/sql/hive/src/test/resources/golden/udf_xpath_float-1-7483dafee0dc7334eecabba31977e791
new file mode 100644
index 0000000000000..b7dea42c78212
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-1-7483dafee0dc7334eecabba31977e791
@@ -0,0 +1 @@
+xpath_float(xml, xpath) - Returns a float value that matches the xpath expression
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-1-f6ddff2daba006787aeb861ca6f9d37a b/sql/hive/src/test/resources/golden/udf_xpath_float-1-f6ddff2daba006787aeb861ca6f9d37a
deleted file mode 100644
index 6bc4d4a46de72..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-1-f6ddff2daba006787aeb861ca6f9d37a
+++ /dev/null
@@ -1,4 +0,0 @@
-xpath_float(xml, xpath) - Returns a float value that matches the xpath expression
-Example:
-  > SELECT xpath_float('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
-  3.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-10-89ae28cf8e6b6f422d307a0085774cab b/sql/hive/src/test/resources/golden/udf_xpath_float-10-89ae28cf8e6b6f422d307a0085774cab
new file mode 100644
index 0000000000000..819e07a22435f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-10-89ae28cf8e6b6f422d307a0085774cab
@@ -0,0 +1 @@
+5.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-2-70b7180f7bcd1d09727ad73084788d16 b/sql/hive/src/test/resources/golden/udf_xpath_float-2-70b7180f7bcd1d09727ad73084788d16
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-2-70b7180f7bcd1d09727ad73084788d16
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-2-f6ddff2daba006787aeb861ca6f9d37a b/sql/hive/src/test/resources/golden/udf_xpath_float-2-f6ddff2daba006787aeb861ca6f9d37a
new file mode 100644
index 0000000000000..02b139e7a3610
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-2-f6ddff2daba006787aeb861ca6f9d37a
@@ -0,0 +1,4 @@
+xpath_float(xml, xpath) - Returns a float value that matches the xpath expression
+Example:
+  > SELECT xpath_float('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
+  3.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-3-79b7e57efe31a0373c39f0ba79b5bd54 b/sql/hive/src/test/resources/golden/udf_xpath_float-3-79b7e57efe31a0373c39f0ba79b5bd54
deleted file mode 100644
index b955cad2a39a0..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-3-79b7e57efe31a0373c39f0ba79b5bd54
+++ /dev/null
@@ -1 +0,0 @@
-NaN
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-3-b743a9cb9f8688220e0a6346856f18ce b/sql/hive/src/test/resources/golden/udf_xpath_float-3-b743a9cb9f8688220e0a6346856f18ce
new file mode 100644
index 0000000000000..736991a138745
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-3-b743a9cb9f8688220e0a6346856f18ce
@@ -0,0 +1 @@
+NaN
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-4-6720ee0163b0f0ddd2ab72fa9ab935e3 b/sql/hive/src/test/resources/golden/udf_xpath_float-4-6720ee0163b0f0ddd2ab72fa9ab935e3
new file mode 100644
index 0000000000000..736991a138745
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-4-6720ee0163b0f0ddd2ab72fa9ab935e3
@@ -0,0 +1 @@
+NaN
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-4-6e1f61ebe10c5fde60148e3a31706352 b/sql/hive/src/test/resources/golden/udf_xpath_float-4-6e1f61ebe10c5fde60148e3a31706352
deleted file mode 100644
index 0a8ac8c629540..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-4-6e1f61ebe10c5fde60148e3a31706352
+++ /dev/null
@@ -1 +0,0 @@
-8.0E19
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-5-410760f9560157587fbba7a677e12b9f b/sql/hive/src/test/resources/golden/udf_xpath_float-5-410760f9560157587fbba7a677e12b9f
deleted file mode 100644
index 171538eb0b00f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-5-410760f9560157587fbba7a677e12b9f
+++ /dev/null
@@ -1 +0,0 @@
-0.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-5-f5e1100f6e8de31081042413b4039fb2 b/sql/hive/src/test/resources/golden/udf_xpath_float-5-f5e1100f6e8de31081042413b4039fb2
new file mode 100644
index 0000000000000..f65000a36a478
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-5-f5e1100f6e8de31081042413b4039fb2
@@ -0,0 +1 @@
+8.0E19
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-6-5e8457209d15467af7f14c09dfadb894 b/sql/hive/src/test/resources/golden/udf_xpath_float-6-5e8457209d15467af7f14c09dfadb894
new file mode 100644
index 0000000000000..ba66466c2a0d0
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-6-5e8457209d15467af7f14c09dfadb894
@@ -0,0 +1 @@
+0.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-6-d83a5002f52e3acb7dbb725bb434eebf b/sql/hive/src/test/resources/golden/udf_xpath_float-6-d83a5002f52e3acb7dbb725bb434eebf
deleted file mode 100644
index 9f8e9b69a33f4..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-6-d83a5002f52e3acb7dbb725bb434eebf
+++ /dev/null
@@ -1 +0,0 @@
-1.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-7-580ffe4dabef758c4fcb52050b315615 b/sql/hive/src/test/resources/golden/udf_xpath_float-7-580ffe4dabef758c4fcb52050b315615
new file mode 100644
index 0000000000000..d3827e75a5cad
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-7-580ffe4dabef758c4fcb52050b315615
@@ -0,0 +1 @@
+1.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-7-b57c58f9343a82846f54ef60b216dfaa b/sql/hive/src/test/resources/golden/udf_xpath_float-7-b57c58f9343a82846f54ef60b216dfaa
deleted file mode 100644
index 3d3be3c32ed85..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-7-b57c58f9343a82846f54ef60b216dfaa
+++ /dev/null
@@ -1 +0,0 @@
-15.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-8-3a62c730d4e5d9be6fb01307577d6f48 b/sql/hive/src/test/resources/golden/udf_xpath_float-8-3a62c730d4e5d9be6fb01307577d6f48
deleted file mode 100644
index 7104585f9a439..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-8-3a62c730d4e5d9be6fb01307577d6f48
+++ /dev/null
@@ -1 +0,0 @@
-7.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-8-73cfa6fe399ca3e240b84b47ee4f8bc8 b/sql/hive/src/test/resources/golden/udf_xpath_float-8-73cfa6fe399ca3e240b84b47ee4f8bc8
new file mode 100644
index 0000000000000..fe6b09a7d14cc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-8-73cfa6fe399ca3e240b84b47ee4f8bc8
@@ -0,0 +1 @@
+15.0
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-9-16793f703b552fcb24f7aea9bb8a2401 b/sql/hive/src/test/resources/golden/udf_xpath_float-9-16793f703b552fcb24f7aea9bb8a2401
deleted file mode 100644
index 6e6366051638f..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_float-9-16793f703b552fcb24f7aea9bb8a2401
+++ /dev/null
@@ -1 +0,0 @@
-5.0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_float-9-bf66b884da717d46f0b12c3b8cf8313a b/sql/hive/src/test/resources/golden/udf_xpath_float-9-bf66b884da717d46f0b12c3b8cf8313a
new file mode 100644
index 0000000000000..4fedf1d20e157
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_float-9-bf66b884da717d46f0b12c3b8cf8313a
@@ -0,0 +1 @@
+7.0
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge1-3-f88c85ce3cbcdc607bce650e1ccff4ee b/sql/hive/src/test/resources/golden/udf_xpath_int-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge1-3-f88c85ce3cbcdc607bce650e1ccff4ee
rename to sql/hive/src/test/resources/golden/udf_xpath_int-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-0-a9ed102a70e7e9d505be1555f0c7eb12 b/sql/hive/src/test/resources/golden/udf_xpath_int-0-a9ed102a70e7e9d505be1555f0c7eb12
deleted file mode 100644
index 67ab19492e405..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-0-a9ed102a70e7e9d505be1555f0c7eb12
+++ /dev/null
@@ -1 +0,0 @@
-xpath_int(xml, xpath) - Returns an integer value that matches the xpath expression
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-1-26ddf7e46a66065909e9e547f0459910 b/sql/hive/src/test/resources/golden/udf_xpath_int-1-26ddf7e46a66065909e9e547f0459910
deleted file mode 100644
index 33349b0b22ad9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-1-26ddf7e46a66065909e9e547f0459910
+++ /dev/null
@@ -1,4 +0,0 @@
-xpath_int(xml, xpath) - Returns an integer value that matches the xpath expression
-Example:
-  > SELECT xpath_int('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
-  3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-1-a9ed102a70e7e9d505be1555f0c7eb12 b/sql/hive/src/test/resources/golden/udf_xpath_int-1-a9ed102a70e7e9d505be1555f0c7eb12
new file mode 100644
index 0000000000000..000f90d69e7f7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_int-1-a9ed102a70e7e9d505be1555f0c7eb12
@@ -0,0 +1 @@
+xpath_int(xml, xpath) - Returns an integer value that matches the xpath expression
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-10-ea25feb474c40c6985152093d4dbb13a b/sql/hive/src/test/resources/golden/udf_xpath_int-10-ea25feb474c40c6985152093d4dbb13a
new file mode 100644
index 0000000000000..7ed6ff82de6bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_int-10-ea25feb474c40c6985152093d4dbb13a
@@ -0,0 +1 @@
+5
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-2-26ddf7e46a66065909e9e547f0459910 b/sql/hive/src/test/resources/golden/udf_xpath_int-2-26ddf7e46a66065909e9e547f0459910
new file mode 100644
index 0000000000000..f969b9a48d10f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_int-2-26ddf7e46a66065909e9e547f0459910
@@ -0,0 +1,4 @@
+xpath_int(xml, xpath) - Returns an integer value that matches the xpath expression
+Example:
+  > SELECT xpath_int('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
+  3
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-2-f10e246ebfd5f82545a3df64f51f58ba b/sql/hive/src/test/resources/golden/udf_xpath_int-2-f10e246ebfd5f82545a3df64f51f58ba
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-2-f10e246ebfd5f82545a3df64f51f58ba
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-0-f94fdb0b79dcf73989e6fbce87355753 b/sql/hive/src/test/resources/golden/udf_xpath_int-3-a04ed6832ab542d6ee5903039511a826
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge2-0-f94fdb0b79dcf73989e6fbce87355753
rename to sql/hive/src/test/resources/golden/udf_xpath_int-3-a04ed6832ab542d6ee5903039511a826
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-3-eaa4a790c726cfa76a247d08a6ee7aba b/sql/hive/src/test/resources/golden/udf_xpath_int-3-eaa4a790c726cfa76a247d08a6ee7aba
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-3-eaa4a790c726cfa76a247d08a6ee7aba
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-4-b9ce6e38b52d6ffb9fc62f5e0a373890 b/sql/hive/src/test/resources/golden/udf_xpath_int-4-b9ce6e38b52d6ffb9fc62f5e0a373890
deleted file mode 100644
index 55a89f5564d21..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-4-b9ce6e38b52d6ffb9fc62f5e0a373890
+++ /dev/null
@@ -1 +0,0 @@
-2147483647
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-1-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/udf_xpath_int-4-bf5a4dbb7a98abc91111a3798b56809f
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge2-1-a4fb8359a2179ec70777aad6366071b7
rename to sql/hive/src/test/resources/golden/udf_xpath_int-4-bf5a4dbb7a98abc91111a3798b56809f
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-5-7907dda43c9b4e9e0b5b01cf20413cd7 b/sql/hive/src/test/resources/golden/udf_xpath_int-5-7907dda43c9b4e9e0b5b01cf20413cd7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-5-7907dda43c9b4e9e0b5b01cf20413cd7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-5-f49db0ecb889722ec68d1753c084b0e1 b/sql/hive/src/test/resources/golden/udf_xpath_int-5-f49db0ecb889722ec68d1753c084b0e1
new file mode 100644
index 0000000000000..a51fa7d1efef6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_int-5-f49db0ecb889722ec68d1753c084b0e1
@@ -0,0 +1 @@
+2147483647
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-6-9bfa4fdc7d551742ff23efad8507ba0a b/sql/hive/src/test/resources/golden/udf_xpath_int-6-9bfa4fdc7d551742ff23efad8507ba0a
deleted file mode 100644
index 56a6051ca2b02..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-6-9bfa4fdc7d551742ff23efad8507ba0a
+++ /dev/null
@@ -1 +0,0 @@
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-2-690b2898f94ef32f01ded0ddb737a056 b/sql/hive/src/test/resources/golden/udf_xpath_int-6-ac509f06f01c02924adef220404fc515
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge2-2-690b2898f94ef32f01ded0ddb737a056
rename to sql/hive/src/test/resources/golden/udf_xpath_int-6-ac509f06f01c02924adef220404fc515
diff --git a/sql/hive/src/test/resources/golden/metadataonly1-7-92512b7ba2cb393d1335dcc2bcf5c2bc b/sql/hive/src/test/resources/golden/udf_xpath_int-7-87ff12d650afb8f511d906778450fae7
similarity index 100%
rename from sql/hive/src/test/resources/golden/metadataonly1-7-92512b7ba2cb393d1335dcc2bcf5c2bc
rename to sql/hive/src/test/resources/golden/udf_xpath_int-7-87ff12d650afb8f511d906778450fae7
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-7-e7026efa45ba8af0f569fcbe4f7119a4 b/sql/hive/src/test/resources/golden/udf_xpath_int-7-e7026efa45ba8af0f569fcbe4f7119a4
deleted file mode 100644
index 3f10ffe7a4c47..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-7-e7026efa45ba8af0f569fcbe4f7119a4
+++ /dev/null
@@ -1 +0,0 @@
-15
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-8-a175811eca252aa77c16a44fbb0ee7b2 b/sql/hive/src/test/resources/golden/udf_xpath_int-8-a175811eca252aa77c16a44fbb0ee7b2
new file mode 100644
index 0000000000000..60d3b2f4a4cd5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_int-8-a175811eca252aa77c16a44fbb0ee7b2
@@ -0,0 +1 @@
+15
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-8-d31e667d1c2ade0bec52cddf8f2b3833 b/sql/hive/src/test/resources/golden/udf_xpath_int-8-d31e667d1c2ade0bec52cddf8f2b3833
deleted file mode 100644
index c7930257dfef5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-8-d31e667d1c2ade0bec52cddf8f2b3833
+++ /dev/null
@@ -1 +0,0 @@
-7
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-9-7da88f589199f5ca873780fb22614573 b/sql/hive/src/test/resources/golden/udf_xpath_int-9-7da88f589199f5ca873780fb22614573
new file mode 100644
index 0000000000000..7f8f011eb73d6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_int-9-7da88f589199f5ca873780fb22614573
@@ -0,0 +1 @@
+7
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_int-9-e6c2548098879ee747a933c755c4c869 b/sql/hive/src/test/resources/golden/udf_xpath_int-9-e6c2548098879ee747a933c755c4c869
deleted file mode 100644
index 7813681f5b41c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_int-9-e6c2548098879ee747a933c755c4c869
+++ /dev/null
@@ -1 +0,0 @@
-5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge2-3-778b659dba30ece782a956d87b1a31eb b/sql/hive/src/test/resources/golden/udf_xpath_long-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge2-3-778b659dba30ece782a956d87b1a31eb
rename to sql/hive/src/test/resources/golden/udf_xpath_long-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-0-d274b272251e95ec2e8139bfa471bd0b b/sql/hive/src/test/resources/golden/udf_xpath_long-0-d274b272251e95ec2e8139bfa471bd0b
deleted file mode 100644
index 748fd827e8545..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-0-d274b272251e95ec2e8139bfa471bd0b
+++ /dev/null
@@ -1 +0,0 @@
-xpath_long(xml, xpath) - Returns a long value that matches the xpath expression
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-1-43fbf4d944cf7eaf57b4d6166b803fb6 b/sql/hive/src/test/resources/golden/udf_xpath_long-1-43fbf4d944cf7eaf57b4d6166b803fb6
deleted file mode 100644
index 65241b6ce9b64..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-1-43fbf4d944cf7eaf57b4d6166b803fb6
+++ /dev/null
@@ -1,4 +0,0 @@
-xpath_long(xml, xpath) - Returns a long value that matches the xpath expression
-Example:
-  > SELECT xpath_long('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
-  3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-1-d274b272251e95ec2e8139bfa471bd0b b/sql/hive/src/test/resources/golden/udf_xpath_long-1-d274b272251e95ec2e8139bfa471bd0b
new file mode 100644
index 0000000000000..ee1a589d6652b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_long-1-d274b272251e95ec2e8139bfa471bd0b
@@ -0,0 +1 @@
+xpath_long(xml, xpath) - Returns a long value that matches the xpath expression
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-10-caeac94758a40493a5227fcdb8ec2f87 b/sql/hive/src/test/resources/golden/udf_xpath_long-10-caeac94758a40493a5227fcdb8ec2f87
new file mode 100644
index 0000000000000..7ed6ff82de6bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_long-10-caeac94758a40493a5227fcdb8ec2f87
@@ -0,0 +1 @@
+5
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-2-43fbf4d944cf7eaf57b4d6166b803fb6 b/sql/hive/src/test/resources/golden/udf_xpath_long-2-43fbf4d944cf7eaf57b4d6166b803fb6
new file mode 100644
index 0000000000000..b9eb5a4a99c02
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_long-2-43fbf4d944cf7eaf57b4d6166b803fb6
@@ -0,0 +1,4 @@
+xpath_long(xml, xpath) - Returns a long value that matches the xpath expression
+Example:
+  > SELECT xpath_long('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
+  3
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-2-d697d943b1f7e7762d804064d11b905b b/sql/hive/src/test/resources/golden/udf_xpath_long-2-d697d943b1f7e7762d804064d11b905b
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-2-d697d943b1f7e7762d804064d11b905b
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-0-f94fdb0b79dcf73989e6fbce87355753 b/sql/hive/src/test/resources/golden/udf_xpath_long-3-9df8d27e31d96e0b35b9b40910d4bd98
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge3-0-f94fdb0b79dcf73989e6fbce87355753
rename to sql/hive/src/test/resources/golden/udf_xpath_long-3-9df8d27e31d96e0b35b9b40910d4bd98
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-3-c0b20c651ae5a352322067f97bd6ae5d b/sql/hive/src/test/resources/golden/udf_xpath_long-3-c0b20c651ae5a352322067f97bd6ae5d
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-3-c0b20c651ae5a352322067f97bd6ae5d
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-1-690b2898f94ef32f01ded0ddb737a056 b/sql/hive/src/test/resources/golden/udf_xpath_long-4-3211913c56521887d30e3d1a50762b3f
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge3-1-690b2898f94ef32f01ded0ddb737a056
rename to sql/hive/src/test/resources/golden/udf_xpath_long-4-3211913c56521887d30e3d1a50762b3f
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-4-821e1cdea837bee7a8c2745bc3b85b9 b/sql/hive/src/test/resources/golden/udf_xpath_long-4-821e1cdea837bee7a8c2745bc3b85b9
deleted file mode 100644
index 996d127e59365..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-4-821e1cdea837bee7a8c2745bc3b85b9
+++ /dev/null
@@ -1 +0,0 @@
-9223372036854775807
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-5-22a71b88c5bcb3db6e299a88ab791d4d b/sql/hive/src/test/resources/golden/udf_xpath_long-5-22a71b88c5bcb3db6e299a88ab791d4d
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-5-22a71b88c5bcb3db6e299a88ab791d4d
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-5-d580a8f01a546baddd939b95722e6354 b/sql/hive/src/test/resources/golden/udf_xpath_long-5-d580a8f01a546baddd939b95722e6354
new file mode 100644
index 0000000000000..2045006edaf5e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_long-5-d580a8f01a546baddd939b95722e6354
@@ -0,0 +1 @@
+9223372036854775807
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge3-2-778b659dba30ece782a956d87b1a31eb b/sql/hive/src/test/resources/golden/udf_xpath_long-6-b695348ed3faec63be2c07d0d4afaaf3
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge3-2-778b659dba30ece782a956d87b1a31eb
rename to sql/hive/src/test/resources/golden/udf_xpath_long-6-b695348ed3faec63be2c07d0d4afaaf3
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-6-f2460325cf46c830631d8bc32565c787 b/sql/hive/src/test/resources/golden/udf_xpath_long-6-f2460325cf46c830631d8bc32565c787
deleted file mode 100644
index 56a6051ca2b02..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-6-f2460325cf46c830631d8bc32565c787
+++ /dev/null
@@ -1 +0,0 @@
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-7-b3f1d4b505151180b82fddb18cf795d0 b/sql/hive/src/test/resources/golden/udf_xpath_long-7-b3f1d4b505151180b82fddb18cf795d0
deleted file mode 100644
index 3f10ffe7a4c47..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-7-b3f1d4b505151180b82fddb18cf795d0
+++ /dev/null
@@ -1 +0,0 @@
-15
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/newline-0-43392a20a8d249a279d50d96578e6a1b b/sql/hive/src/test/resources/golden/udf_xpath_long-7-ed5af6d7451107a753b2c7ff130ac73b
similarity index 100%
rename from sql/hive/src/test/resources/golden/newline-0-43392a20a8d249a279d50d96578e6a1b
rename to sql/hive/src/test/resources/golden/udf_xpath_long-7-ed5af6d7451107a753b2c7ff130ac73b
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-8-2f952b56682969bb203fa3d9102f7015 b/sql/hive/src/test/resources/golden/udf_xpath_long-8-2f952b56682969bb203fa3d9102f7015
new file mode 100644
index 0000000000000..60d3b2f4a4cd5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_long-8-2f952b56682969bb203fa3d9102f7015
@@ -0,0 +1 @@
+15
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-8-c21bebec7d1a4aec99fba6b0a9a03083 b/sql/hive/src/test/resources/golden/udf_xpath_long-8-c21bebec7d1a4aec99fba6b0a9a03083
deleted file mode 100644
index c7930257dfef5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-8-c21bebec7d1a4aec99fba6b0a9a03083
+++ /dev/null
@@ -1 +0,0 @@
-7
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-9-7bafedc7b884df49a9f6752360969bf1 b/sql/hive/src/test/resources/golden/udf_xpath_long-9-7bafedc7b884df49a9f6752360969bf1
deleted file mode 100644
index 7813681f5b41c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_long-9-7bafedc7b884df49a9f6752360969bf1
+++ /dev/null
@@ -1 +0,0 @@
-5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_long-9-947b6e08ba9c7defd75d00412f9bc4fd b/sql/hive/src/test/resources/golden/udf_xpath_long-9-947b6e08ba9c7defd75d00412f9bc4fd
new file mode 100644
index 0000000000000..7f8f011eb73d6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_long-9-947b6e08ba9c7defd75d00412f9bc4fd
@@ -0,0 +1 @@
+7
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-0-f94fdb0b79dcf73989e6fbce87355753 b/sql/hive/src/test/resources/golden/udf_xpath_short-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge4-0-f94fdb0b79dcf73989e6fbce87355753
rename to sql/hive/src/test/resources/golden/udf_xpath_short-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-0-7d5231aed9cbbf68cd740791f9e5be17 b/sql/hive/src/test/resources/golden/udf_xpath_short-0-7d5231aed9cbbf68cd740791f9e5be17
deleted file mode 100644
index 22ccb9ca9defa..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-0-7d5231aed9cbbf68cd740791f9e5be17
+++ /dev/null
@@ -1 +0,0 @@
-xpath_short(xml, xpath) - Returns a short value that matches the xpath expression
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-1-5d45932563b78e0b435b8cfebfe3cc2 b/sql/hive/src/test/resources/golden/udf_xpath_short-1-5d45932563b78e0b435b8cfebfe3cc2
deleted file mode 100644
index 931ce2abb3d4d..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-1-5d45932563b78e0b435b8cfebfe3cc2
+++ /dev/null
@@ -1,4 +0,0 @@
-xpath_short(xml, xpath) - Returns a short value that matches the xpath expression
-Example:
-  > SELECT xpath_short('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
-  3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-1-7d5231aed9cbbf68cd740791f9e5be17 b/sql/hive/src/test/resources/golden/udf_xpath_short-1-7d5231aed9cbbf68cd740791f9e5be17
new file mode 100644
index 0000000000000..63ab4f15e8c84
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_short-1-7d5231aed9cbbf68cd740791f9e5be17
@@ -0,0 +1 @@
+xpath_short(xml, xpath) - Returns a short value that matches the xpath expression
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-10-b537709676634250e13914e76cd9a530 b/sql/hive/src/test/resources/golden/udf_xpath_short-10-b537709676634250e13914e76cd9a530
new file mode 100644
index 0000000000000..7ed6ff82de6bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_short-10-b537709676634250e13914e76cd9a530
@@ -0,0 +1 @@
+5
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-2-10e9d4899d2fd352b58010c778c1f7a8 b/sql/hive/src/test/resources/golden/udf_xpath_short-2-10e9d4899d2fd352b58010c778c1f7a8
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-2-10e9d4899d2fd352b58010c778c1f7a8
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-2-5d45932563b78e0b435b8cfebfe3cc2 b/sql/hive/src/test/resources/golden/udf_xpath_short-2-5d45932563b78e0b435b8cfebfe3cc2
new file mode 100644
index 0000000000000..ecb5e9a83109c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_short-2-5d45932563b78e0b435b8cfebfe3cc2
@@ -0,0 +1,4 @@
+xpath_short(xml, xpath) - Returns a short value that matches the xpath expression
+Example:
+  > SELECT xpath_short('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
+  3
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-3-5ac84f46610107b1846f4a9b26a69576 b/sql/hive/src/test/resources/golden/udf_xpath_short-3-5ac84f46610107b1846f4a9b26a69576
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-3-5ac84f46610107b1846f4a9b26a69576
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-1-690b2898f94ef32f01ded0ddb737a056 b/sql/hive/src/test/resources/golden/udf_xpath_short-3-c09b4ae6886fa58dcdd728bef45e7efa
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge4-1-690b2898f94ef32f01ded0ddb737a056
rename to sql/hive/src/test/resources/golden/udf_xpath_short-3-c09b4ae6886fa58dcdd728bef45e7efa
diff --git a/sql/hive/src/test/resources/golden/rcfile_merge4-2-778b659dba30ece782a956d87b1a31eb b/sql/hive/src/test/resources/golden/udf_xpath_short-4-84f5f6bb2fdc0987d281d52a53a4b24e
similarity index 100%
rename from sql/hive/src/test/resources/golden/rcfile_merge4-2-778b659dba30ece782a956d87b1a31eb
rename to sql/hive/src/test/resources/golden/udf_xpath_short-4-84f5f6bb2fdc0987d281d52a53a4b24e
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-4-8a300079521fefbe0d2f943851c1c53c b/sql/hive/src/test/resources/golden/udf_xpath_short-4-8a300079521fefbe0d2f943851c1c53c
deleted file mode 100644
index d7d17fcbef95c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-4-8a300079521fefbe0d2f943851c1c53c
+++ /dev/null
@@ -1 +0,0 @@
--1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-5-51f5de44cf1d5289fa5892ffe16e473e b/sql/hive/src/test/resources/golden/udf_xpath_short-5-51f5de44cf1d5289fa5892ffe16e473e
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-5-51f5de44cf1d5289fa5892ffe16e473e
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/exim_02_part-6-a9f93b4185a714e4f6d14171d10a6c07 b/sql/hive/src/test/resources/golden/udf_xpath_short-5-c09fd0565ed041c773fee9bd0436e861
similarity index 100%
rename from sql/hive/src/test/resources/golden/exim_02_part-6-a9f93b4185a714e4f6d14171d10a6c07
rename to sql/hive/src/test/resources/golden/udf_xpath_short-5-c09fd0565ed041c773fee9bd0436e861
diff --git a/sql/hive/src/test/resources/golden/remote_script-2-a19a19272149c732977c37e043910505 b/sql/hive/src/test/resources/golden/udf_xpath_short-6-16ced3de15d4ec87a4e7001376551758
similarity index 100%
rename from sql/hive/src/test/resources/golden/remote_script-2-a19a19272149c732977c37e043910505
rename to sql/hive/src/test/resources/golden/udf_xpath_short-6-16ced3de15d4ec87a4e7001376551758
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-6-fc85e2c7b89fe11adb7b6b1fb696bd04 b/sql/hive/src/test/resources/golden/udf_xpath_short-6-fc85e2c7b89fe11adb7b6b1fb696bd04
deleted file mode 100644
index 56a6051ca2b02..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-6-fc85e2c7b89fe11adb7b6b1fb696bd04
+++ /dev/null
@@ -1 +0,0 @@
-1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/partition_wise_fileformat17-1-df4a3bf439eadc5ada3303a9b5e6aa46 b/sql/hive/src/test/resources/golden/udf_xpath_short-7-8ffdf20c15f3ed81bb5a92c61d200ae2
similarity index 100%
rename from sql/hive/src/test/resources/golden/partition_wise_fileformat17-1-df4a3bf439eadc5ada3303a9b5e6aa46
rename to sql/hive/src/test/resources/golden/udf_xpath_short-7-8ffdf20c15f3ed81bb5a92c61d200ae2
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-7-e24ee123f331429c22de0a06054d0d5d b/sql/hive/src/test/resources/golden/udf_xpath_short-7-e24ee123f331429c22de0a06054d0d5d
deleted file mode 100644
index 3f10ffe7a4c47..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-7-e24ee123f331429c22de0a06054d0d5d
+++ /dev/null
@@ -1 +0,0 @@
-15
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-8-102ad2dea8d94528b402d980a45d53d4 b/sql/hive/src/test/resources/golden/udf_xpath_short-8-102ad2dea8d94528b402d980a45d53d4
new file mode 100644
index 0000000000000..60d3b2f4a4cd5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_short-8-102ad2dea8d94528b402d980a45d53d4
@@ -0,0 +1 @@
+15
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-8-f8256e7c0dc4ac31303c1e21c8fcba95 b/sql/hive/src/test/resources/golden/udf_xpath_short-8-f8256e7c0dc4ac31303c1e21c8fcba95
deleted file mode 100644
index c7930257dfef5..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-8-f8256e7c0dc4ac31303c1e21c8fcba95
+++ /dev/null
@@ -1 +0,0 @@
-7
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-9-22088ba0fb00eaa28e3460ca018b343e b/sql/hive/src/test/resources/golden/udf_xpath_short-9-22088ba0fb00eaa28e3460ca018b343e
deleted file mode 100644
index 7813681f5b41c..0000000000000
--- a/sql/hive/src/test/resources/golden/udf_xpath_short-9-22088ba0fb00eaa28e3460ca018b343e
+++ /dev/null
@@ -1 +0,0 @@
-5
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_short-9-d571e18b7d8ad44fef2e0b2424f34a0d b/sql/hive/src/test/resources/golden/udf_xpath_short-9-d571e18b7d8ad44fef2e0b2424f34a0d
new file mode 100644
index 0000000000000..7f8f011eb73d6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udf_xpath_short-9-d571e18b7d8ad44fef2e0b2424f34a0d
@@ -0,0 +1 @@
+7
diff --git a/sql/hive/src/test/resources/golden/sample_islocalmode_hook-0-86a409d8b868dc5f1a3bd1e04c2bc28c b/sql/hive/src/test/resources/golden/udf_xpath_string-0-50131c0ba7b7a6b65c789a5a8497bada
similarity index 100%
rename from sql/hive/src/test/resources/golden/sample_islocalmode_hook-0-86a409d8b868dc5f1a3bd1e04c2bc28c
rename to sql/hive/src/test/resources/golden/udf_xpath_string-0-50131c0ba7b7a6b65c789a5a8497bada
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-0-e315d11e9feb29177b5cb1e221c7cfa4 b/sql/hive/src/test/resources/golden/udf_xpath_string-1-e315d11e9feb29177b5cb1e221c7cfa4
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-0-e315d11e9feb29177b5cb1e221c7cfa4
rename to sql/hive/src/test/resources/golden/udf_xpath_string-1-e315d11e9feb29177b5cb1e221c7cfa4
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-8-8dcf59ffc63ea3b225fcdd33dc2be9fc b/sql/hive/src/test/resources/golden/udf_xpath_string-10-d87fb71039c9d2419d750a0721c5696f
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-8-8dcf59ffc63ea3b225fcdd33dc2be9fc
rename to sql/hive/src/test/resources/golden/udf_xpath_string-10-d87fb71039c9d2419d750a0721c5696f
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-1-110b583cde6cd23c486d8223c444cbe9 b/sql/hive/src/test/resources/golden/udf_xpath_string-2-110b583cde6cd23c486d8223c444cbe9
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-1-110b583cde6cd23c486d8223c444cbe9
rename to sql/hive/src/test/resources/golden/udf_xpath_string-2-110b583cde6cd23c486d8223c444cbe9
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-2-a147b4eaa40b03355f666ea660cbff1f b/sql/hive/src/test/resources/golden/udf_xpath_string-3-17e4d8122b93a1ebdba6c1d2cf9ce0c4
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-2-a147b4eaa40b03355f666ea660cbff1f
rename to sql/hive/src/test/resources/golden/udf_xpath_string-3-17e4d8122b93a1ebdba6c1d2cf9ce0c4
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-3-a62072b86a6044d5b97911d662899b5a b/sql/hive/src/test/resources/golden/udf_xpath_string-4-302630fe7dac2cc61fe7d36ead0f41ab
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-3-a62072b86a6044d5b97911d662899b5a
rename to sql/hive/src/test/resources/golden/udf_xpath_string-4-302630fe7dac2cc61fe7d36ead0f41ab
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-4-152b0424ec68120f4ef15269eac0528a b/sql/hive/src/test/resources/golden/udf_xpath_string-5-19357ba9cb87d3a5717543d2afdc96e2
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-4-152b0424ec68120f4ef15269eac0528a
rename to sql/hive/src/test/resources/golden/udf_xpath_string-5-19357ba9cb87d3a5717543d2afdc96e2
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-5-2ab527e587583138ca764f5518a88c14 b/sql/hive/src/test/resources/golden/udf_xpath_string-6-4837e470f745487fae4f498b3a2946bc
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-5-2ab527e587583138ca764f5518a88c14
rename to sql/hive/src/test/resources/golden/udf_xpath_string-6-4837e470f745487fae4f498b3a2946bc
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-6-58204a4df0fbf861028a1a94fb20ec61 b/sql/hive/src/test/resources/golden/udf_xpath_string-7-8e42951d002e3c4034b4a51928442706
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-6-58204a4df0fbf861028a1a94fb20ec61
rename to sql/hive/src/test/resources/golden/udf_xpath_string-7-8e42951d002e3c4034b4a51928442706
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-7-b5b211e4abd14990c28007c6638cb44f b/sql/hive/src/test/resources/golden/udf_xpath_string-8-fdfa4e17d70608dcc634c9e1e8a8f288
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-7-b5b211e4abd14990c28007c6638cb44f
rename to sql/hive/src/test/resources/golden/udf_xpath_string-8-fdfa4e17d70608dcc634c9e1e8a8f288
diff --git a/sql/hive/src/test/resources/golden/udf_xpath_string-9-647aca756f43e791dd5497e1b4c6af89 b/sql/hive/src/test/resources/golden/udf_xpath_string-9-84109613320bd05abccd1058044d62c3
similarity index 100%
rename from sql/hive/src/test/resources/golden/udf_xpath_string-9-647aca756f43e791dd5497e1b4c6af89
rename to sql/hive/src/test/resources/golden/udf_xpath_string-9-84109613320bd05abccd1058044d62c3
diff --git a/sql/hive/src/test/resources/golden/udtf_stack-0-665a27212319e1b2d1ad6b6dbaa3ce9a b/sql/hive/src/test/resources/golden/udtf_stack-0-665a27212319e1b2d1ad6b6dbaa3ce9a
deleted file mode 100644
index 599bf1880a83a..0000000000000
--- a/sql/hive/src/test/resources/golden/udtf_stack-0-665a27212319e1b2d1ad6b6dbaa3ce9a
+++ /dev/null
@@ -1 +0,0 @@
-stack(n, cols...) - turns k columns into n rows of size k/n each
diff --git a/sql/hive/src/test/resources/golden/udtf_stack-1-879ca1a8453ced55a8617b390670a4e1 b/sql/hive/src/test/resources/golden/udtf_stack-1-879ca1a8453ced55a8617b390670a4e1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/udtf_stack-2-e9b1b8a1b6172a00deeb15a07f928dc5 b/sql/hive/src/test/resources/golden/udtf_stack-2-e9b1b8a1b6172a00deeb15a07f928dc5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/unicode_notation-1-3de206f543c9e1525c54547f076b99c3 b/sql/hive/src/test/resources/golden/unicode_notation-1-3de206f543c9e1525c54547f076b99c3
index e82f93fe65c5e..f00ecaf75c084 100644
--- a/sql/hive/src/test/resources/golden/unicode_notation-1-3de206f543c9e1525c54547f076b99c3
+++ b/sql/hive/src/test/resources/golden/unicode_notation-1-3de206f543c9e1525c54547f076b99c3
@@ -1,18 +1,18 @@
 # col_name            	data_type           	comment             
 	 	 
-a                   	string              	None                
+a                   	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 03 18:39:52 PST 2014	 
+CreateTime:         	Tue Oct 21 05:37:51 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5393816597631538368/k1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/k1	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1388803192          
+	transient_lastDdlTime	1413895071          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -24,4 +24,4 @@ Bucket Columns:     	[]
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
 	field.delim         	\u0001              
-	serialization.format	\u0001              
\ No newline at end of file
+	serialization.format	\u0001              
diff --git a/sql/hive/src/test/resources/golden/unicode_notation-4-3de206f543c9e1525c54547f076b99c3 b/sql/hive/src/test/resources/golden/unicode_notation-4-3de206f543c9e1525c54547f076b99c3
index e82f93fe65c5e..f00ecaf75c084 100644
--- a/sql/hive/src/test/resources/golden/unicode_notation-4-3de206f543c9e1525c54547f076b99c3
+++ b/sql/hive/src/test/resources/golden/unicode_notation-4-3de206f543c9e1525c54547f076b99c3
@@ -1,18 +1,18 @@
 # col_name            	data_type           	comment             
 	 	 
-a                   	string              	None                
+a                   	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 03 18:39:52 PST 2014	 
+CreateTime:         	Tue Oct 21 05:37:51 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5393816597631538368/k1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/k1	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1388803192          
+	transient_lastDdlTime	1413895071          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -24,4 +24,4 @@ Bucket Columns:     	[]
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
 	field.delim         	\u0001              
-	serialization.format	\u0001              
\ No newline at end of file
+	serialization.format	\u0001              
diff --git a/sql/hive/src/test/resources/golden/unicode_notation-7-3de206f543c9e1525c54547f076b99c3 b/sql/hive/src/test/resources/golden/unicode_notation-7-3de206f543c9e1525c54547f076b99c3
index 2be72c6e4e8c7..79e197569a51c 100644
--- a/sql/hive/src/test/resources/golden/unicode_notation-7-3de206f543c9e1525c54547f076b99c3
+++ b/sql/hive/src/test/resources/golden/unicode_notation-7-3de206f543c9e1525c54547f076b99c3
@@ -1,18 +1,18 @@
 # col_name            	data_type           	comment             
 	 	 
-a                   	string              	None                
+a                   	string              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 03 18:39:52 PST 2014	 
+CreateTime:         	Tue Oct 21 05:37:51 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5393816597631538368/k1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/k1	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1388803192          
+	transient_lastDdlTime	1413895071          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -24,4 +24,4 @@ Bucket Columns:     	[]
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
 	field.delim         	|                   
-	serialization.format	|                   
\ No newline at end of file
+	serialization.format	|                   
diff --git a/sql/hive/src/test/resources/golden/union10-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union10-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union10-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/union10-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union10-4-7f83822f19aa9b973198fe4c42c66856 b/sql/hive/src/test/resources/golden/union10-4-7f83822f19aa9b973198fe4c42c66856
index 1d5891034ddec..6489eb562dbf7 100644
--- a/sql/hive/src/test/resources/golden/union10-4-7f83822f19aa9b973198fe4c42c66856
+++ b/sql/hive/src/test/resources/golden/union10-4-7f83822f19aa9b973198fe4c42c66856
@@ -1,3 +1,3 @@
 tst1	500
 tst2	500
-tst3	500
\ No newline at end of file
+tst3	500
diff --git a/sql/hive/src/test/resources/golden/union11-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union11-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union11-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/union11-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union11-2-cd756f39e22e121bdbd51400662aa47f b/sql/hive/src/test/resources/golden/union11-2-cd756f39e22e121bdbd51400662aa47f
index 1ec0096b865a7..c86307863b92d 100644
--- a/sql/hive/src/test/resources/golden/union11-2-cd756f39e22e121bdbd51400662aa47f
+++ b/sql/hive/src/test/resources/golden/union11-2-cd756f39e22e121bdbd51400662aa47f
@@ -1,3 +1,3 @@
 tst1	1
 tst2	1
-tst3	1
\ No newline at end of file
+tst3	1
diff --git a/sql/hive/src/test/resources/golden/union12-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union12-0-863233ccd616401efb4bf83c4b9e3a52
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union12-0-863233ccd616401efb4bf83c4b9e3a52
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union12-1-9d2793d1cfd2645ac7f373a0a127e599 b/sql/hive/src/test/resources/golden/union12-1-9d2793d1cfd2645ac7f373a0a127e599
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union12-2-69ba763b72862bf0f8b2bdf1a712e3b5 b/sql/hive/src/test/resources/golden/union12-2-69ba763b72862bf0f8b2bdf1a712e3b5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union13-1-534d0853c5fc094404f65ca4631c1c20 b/sql/hive/src/test/resources/golden/union13-1-534d0853c5fc094404f65ca4631c1c20
index 2209b11d74282..f085b06e588d4 100644
--- a/sql/hive/src/test/resources/golden/union13-1-534d0853c5fc094404f65ca4631c1c20
+++ b/sql/hive/src/test/resources/golden/union13-1-534d0853c5fc094404f65ca4631c1c20
@@ -997,4 +997,4 @@
 200	val_200
 200	val_200
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/union14-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union14-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union14-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/union14-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union14-2-8e01b2f4a18ad41a622e0aadbe680398 b/sql/hive/src/test/resources/golden/union14-2-8e01b2f4a18ad41a622e0aadbe680398
index 07f67df79dcbd..5ba4e492aa0f4 100644
--- a/sql/hive/src/test/resources/golden/union14-2-8e01b2f4a18ad41a622e0aadbe680398
+++ b/sql/hive/src/test/resources/golden/union14-2-8e01b2f4a18ad41a622e0aadbe680398
@@ -14,4 +14,4 @@ NULL	10
 406	1
 66	1
 98	1
-tst1	1
\ No newline at end of file
+tst1	1
diff --git a/sql/hive/src/test/resources/golden/union15-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union15-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union15-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/union15-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union15-2-a89acfb4bbc044c483b94e28152a41e0 b/sql/hive/src/test/resources/golden/union15-2-a89acfb4bbc044c483b94e28152a41e0
index b00b55f6fa92d..20cb88d87f6bf 100644
--- a/sql/hive/src/test/resources/golden/union15-2-a89acfb4bbc044c483b94e28152a41e0
+++ b/sql/hive/src/test/resources/golden/union15-2-a89acfb4bbc044c483b94e28152a41e0
@@ -14,4 +14,4 @@ NULL	20
 406	2
 66	2
 98	2
-tst1	1
\ No newline at end of file
+tst1	1
diff --git a/sql/hive/src/test/resources/golden/union16-1-9f76074598f9b55d8afbb5659737a382 b/sql/hive/src/test/resources/golden/union16-1-9f76074598f9b55d8afbb5659737a382
index 6f680072350ab..eeb33ca418288 100644
--- a/sql/hive/src/test/resources/golden/union16-1-9f76074598f9b55d8afbb5659737a382
+++ b/sql/hive/src/test/resources/golden/union16-1-9f76074598f9b55d8afbb5659737a382
@@ -1 +1 @@
-12500
\ No newline at end of file
+12500
diff --git a/sql/hive/src/test/resources/golden/union17-4-a6b1560ec2eb0bd64c9dd2c8daae99c5 b/sql/hive/src/test/resources/golden/union17-4-a6b1560ec2eb0bd64c9dd2c8daae99c5
index a5a9e42fff209..4d199c29dd4db 100644
--- a/sql/hive/src/test/resources/golden/union17-4-a6b1560ec2eb0bd64c9dd2c8daae99c5
+++ b/sql/hive/src/test/resources/golden/union17-4-a6b1560ec2eb0bd64c9dd2c8daae99c5
@@ -307,4 +307,4 @@
 96	1
 97	1
 98	1
-tst1	1
\ No newline at end of file
+tst1	1
diff --git a/sql/hive/src/test/resources/golden/union17-5-404392d6faff5db5f36b4aa87ac8e8c9 b/sql/hive/src/test/resources/golden/union17-5-404392d6faff5db5f36b4aa87ac8e8c9
index be13b26eadc86..5b6f2e2d4f773 100644
--- a/sql/hive/src/test/resources/golden/union17-5-404392d6faff5db5f36b4aa87ac8e8c9
+++ b/sql/hive/src/test/resources/golden/union17-5-404392d6faff5db5f36b4aa87ac8e8c9
@@ -307,4 +307,4 @@
 96	val_96	1
 97	val_97	1
 98	val_98	1
-tst1	500	1
\ No newline at end of file
+tst1	500	1
diff --git a/sql/hive/src/test/resources/golden/union18-4-1799ebb147238db6032fd6fe2fd36878 b/sql/hive/src/test/resources/golden/union18-4-1799ebb147238db6032fd6fe2fd36878
index 3dee790d7c2ca..11a5a2c531a3d 100644
--- a/sql/hive/src/test/resources/golden/union18-4-1799ebb147238db6032fd6fe2fd36878
+++ b/sql/hive/src/test/resources/golden/union18-4-1799ebb147238db6032fd6fe2fd36878
@@ -498,4 +498,4 @@
 97	val_97
 98	val_98
 98	val_98
-tst1	500
\ No newline at end of file
+tst1	500
diff --git a/sql/hive/src/test/resources/golden/union18-5-b12dcddfa4f02a14318f6564947c98a0 b/sql/hive/src/test/resources/golden/union18-5-b12dcddfa4f02a14318f6564947c98a0
index e438a64050723..b95429f0c39e8 100644
--- a/sql/hive/src/test/resources/golden/union18-5-b12dcddfa4f02a14318f6564947c98a0
+++ b/sql/hive/src/test/resources/golden/union18-5-b12dcddfa4f02a14318f6564947c98a0
@@ -498,4 +498,4 @@
 97	val_97	val_97
 98	val_98	val_98
 98	val_98	val_98
-tst1	500	500
\ No newline at end of file
+tst1	500	500
diff --git a/sql/hive/src/test/resources/golden/union19-4-1799ebb147238db6032fd6fe2fd36878 b/sql/hive/src/test/resources/golden/union19-4-1799ebb147238db6032fd6fe2fd36878
index e4dfefc43e26a..f21cd3313298c 100644
--- a/sql/hive/src/test/resources/golden/union19-4-1799ebb147238db6032fd6fe2fd36878
+++ b/sql/hive/src/test/resources/golden/union19-4-1799ebb147238db6032fd6fe2fd36878
@@ -307,4 +307,4 @@
 96	1
 97	2
 98	2
-tst1	1
\ No newline at end of file
+tst1	1
diff --git a/sql/hive/src/test/resources/golden/union19-5-b12dcddfa4f02a14318f6564947c98a0 b/sql/hive/src/test/resources/golden/union19-5-b12dcddfa4f02a14318f6564947c98a0
index e438a64050723..b95429f0c39e8 100644
--- a/sql/hive/src/test/resources/golden/union19-5-b12dcddfa4f02a14318f6564947c98a0
+++ b/sql/hive/src/test/resources/golden/union19-5-b12dcddfa4f02a14318f6564947c98a0
@@ -498,4 +498,4 @@
 97	val_97	val_97
 98	val_98	val_98
 98	val_98	val_98
-tst1	500	500
\ No newline at end of file
+tst1	500	500
diff --git a/sql/hive/src/test/resources/golden/union2-1-90d739774cb96e7d0d96513c1c9968b4 b/sql/hive/src/test/resources/golden/union2-1-90d739774cb96e7d0d96513c1c9968b4
index e37d32abba426..83b33d238dab9 100644
--- a/sql/hive/src/test/resources/golden/union2-1-90d739774cb96e7d0d96513c1c9968b4
+++ b/sql/hive/src/test/resources/golden/union2-1-90d739774cb96e7d0d96513c1c9968b4
@@ -1 +1 @@
-1000
\ No newline at end of file
+1000
diff --git a/sql/hive/src/test/resources/golden/union20-1-968e353589f1fddb914242beb25be94c b/sql/hive/src/test/resources/golden/union20-1-968e353589f1fddb914242beb25be94c
index 385b8df6703a0..f8539c4817e21 100644
--- a/sql/hive/src/test/resources/golden/union20-1-968e353589f1fddb914242beb25be94c
+++ b/sql/hive/src/test/resources/golden/union20-1-968e353589f1fddb914242beb25be94c
@@ -20,4 +20,4 @@
 5	val_5	5	val_5
 5	val_5	5	val_5
 9	val_9	9	val_9
-tst1	500	tst1	500
\ No newline at end of file
+tst1	500	tst1	500
diff --git a/sql/hive/src/test/resources/golden/union21-0-ecfd22e2a24ed9f113229c80a2aaee9c b/sql/hive/src/test/resources/golden/union21-0-ecfd22e2a24ed9f113229c80a2aaee9c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union21-1-fb1497f4c21bf7d28162f27d50320d13 b/sql/hive/src/test/resources/golden/union21-1-fb1497f4c21bf7d28162f27d50320d13
deleted file mode 100644
index 26a2aff52e9a9..0000000000000
--- a/sql/hive/src/test/resources/golden/union21-1-fb1497f4c21bf7d28162f27d50320d13
+++ /dev/null
@@ -1,536 +0,0 @@
-NULL	2
-0	7
-001	2
-002	2
-004	1
-01	1
-013	1
-02	1
-021	2
-03	1
-032	5
-034	3
-051	1
-061	1
-062	1
-063	1
-064	1
-07	3
-071	1
-074	1
-08	1
-081	1
-082	2
-084	3
-09	3
-091	1
-094	1
-1	500
-10	2
-100	2
-102	1
-103	2
-104	7
-105	1
-11	2
-111	2
-113	5
-114	2
-116	1
-118	2
-119	3
-12	2
-120	2
-122	2
-123	2
-124	1
-125	2
-126	1
-128	3
-129	2
-131	2
-133	3
-134	5
-136	1
-137	2
-138	4
-14	1
-142	1
-143	2
-145	1
-146	2
-149	2
-15	4
-150	1
-152	2
-153	2
-155	1
-156	1
-157	1
-158	1
-160	1
-162	1
-163	1
-164	2
-165	2
-166	1
-167	3
-168	1
-169	4
-17	1
-170	1
-172	2
-174	2
-175	2
-176	2
-177	1
-178	1
-179	2
-18	2
-180	1
-181	2
-182	2
-183	1
-184	1
-186	1
-187	3
-189	1
-19	1
-190	1
-191	4
-192	2
-193	3
-194	2
-195	2
-196	1
-197	2
-199	3
-2	2
-20	2
-200	2
-201	1
-202	2
-203	3
-204	1
-205	2
-207	2
-208	3
-209	2
-21	2
-213	2
-214	1
-216	2
-217	2
-218	1
-219	2
-221	2
-222	2
-223	4
-224	2
-226	1
-228	1
-229	2
-230	5
-233	3
-234	1
-235	1
-237	2
-238	2
-239	2
-24	4
-241	1
-242	4
-243	2
-244	1
-247	1
-248	1
-249	1
-251	2
-252	2
-254	1
-255	2
-256	2
-257	1
-258	1
-26	2
-260	1
-261	1
-262	2
-263	2
-264	2
-265	2
-266	1
-27	3
-271	2
-272	4
-273	3
-274	2
-275	1
-277	4
-278	2
-28	2
-280	2
-281	2
-282	4
-283	3
-284	2
-285	1
-286	1
-287	1
-288	2
-289	1
-29	1
-291	2
-292	2
-293	1
-294	2
-296	1
-298	3
-30	2
-301	2
-302	3
-304	3
-305	1
-306	1
-307	2
-308	1
-309	2
-310	1
-311	5
-312	2
-314	2
-315	1
-316	3
-317	2
-318	3
-321	2
-322	4
-323	2
-325	2
-327	3
-33	2
-331	3
-332	3
-333	4
-335	1
-336	1
-338	1
-339	1
-34	2
-341	2
-342	2
-344	3
-345	1
-348	5
-35	4
-351	2
-353	4
-354	1
-356	1
-360	1
-361	1
-362	2
-364	3
-365	1
-366	1
-367	2
-368	1
-369	3
-37	2
-372	3
-373	2
-374	1
-375	1
-377	1
-378	1
-379	1
-38	2
-381	1
-382	3
-384	4
-386	1
-389	1
-391	3
-392	1
-393	2
-394	2
-395	2
-396	3
-397	2
-399	2
-4	2
-40	1
-400	1
-401	7
-402	1
-403	3
-404	4
-406	4
-407	1
-409	3
-41	1
-411	2
-412	1
-413	2
-414	4
-417	3
-418	1
-419	1
-42	4
-421	1
-422	2
-424	4
-427	1
-429	2
-43	2
-430	3
-431	5
-432	1
-435	1
-436	1
-437	1
-438	3
-439	2
-44	2
-442	1
-443	3
-444	2
-446	1
-448	1
-449	1
-45	1
-452	1
-453	1
-454	6
-455	1
-457	1
-458	2
-459	2
-46	1
-460	1
-461	2
-462	2
-463	3
-466	3
-467	1
-468	4
-469	5
-47	2
-470	1
-471	2
-472	2
-473	1
-475	1
-477	1
-478	2
-479	1
-48	2
-480	3
-481	1
-482	2
-483	4
-484	2
-485	1
-487	1
-489	4
-490	1
-491	2
-492	2
-493	2
-494	2
-495	1
-496	1
-497	1
-498	3
-5	6
-50	1
-501	1
-502	2
-503	1
-51	4
-513	1
-521	2
-523	2
-53	4
-532	1
-533	1
-534	1
-54	1
-541	1
-543	1
-551	1
-552	2
-554	1
-56	1
-561	2
-562	2
-563	1
-57	1
-571	2
-572	1
-573	1
-574	1
-58	3
-582	1
-584	1
-59	2
-591	2
-593	2
-594	1
-60	1
-603	1
-604	4
-611	1
-612	2
-613	3
-62	2
-621	1
-622	1
-631	1
-633	1
-634	1
-64	1
-641	2
-644	1
-65	1
-651	1
-652	2
-653	1
-66	2
-661	1
-662	1
-663	1
-664	3
-67	4
-671	2
-68	1
-681	1
-682	1
-683	1
-69	2
-691	1
-692	1
-693	3
-694	1
-70	4
-702	2
-703	2
-704	1
-71	1
-712	2
-713	2
-714	3
-72	3
-723	3
-724	1
-73	2
-731	2
-732	2
-734	1
-74	2
-742	1
-75	1
-751	1
-752	1
-754	1
-76	4
-761	3
-763	2
-764	1
-77	2
-771	1
-772	4
-773	1
-774	1
-78	2
-781	3
-782	1
-784	1
-79	2
-791	2
-793	2
-794	1
-8	2
-80	2
-802	3
-803	1
-81	2
-811	2
-812	1
-813	3
-814	1
-82	2
-821	3
-822	1
-83	2
-831	4
-832	2
-833	1
-834	3
-84	2
-842	1
-843	5
-844	1
-85	3
-851	1
-852	1
-854	2
-86	1
-861	1
-863	1
-864	4
-87	2
-871	1
-872	2
-873	1
-874	2
-882	2
-89	2
-892	3
-894	3
-9	2
-90	4
-902	2
-903	2
-904	3
-91	1
-911	3
-912	2
-914	1
-92	1
-921	2
-922	2
-924	2
-932	2
-933	1
-934	2
-941	2
-942	1
-944	1
-95	2
-954	2
-96	2
-961	4
-963	3
-964	5
-97	2
-971	2
-973	1
-974	1
-98	2
-981	1
-982	1
-983	1
-984	4
-991	3
-993	2
-record_0	1
-record_1	1
-record_2	1
-record_3	1
-record_4	1
-record_5	1
-record_6	1
-record_7	1
-record_8	1
-record_9	1
diff --git a/sql/hive/src/test/resources/golden/union23-1-7830963417e3535034962e2597970ddd b/sql/hive/src/test/resources/golden/union23-1-7830963417e3535034962e2597970ddd
index bf0f76662bd2c..14ac2a0543eb7 100644
--- a/sql/hive/src/test/resources/golden/union23-1-7830963417e3535034962e2597970ddd
+++ b/sql/hive/src/test/resources/golden/union23-1-7830963417e3535034962e2597970ddd
@@ -997,4 +997,4 @@
 98	val_98
 98	val_98
 98	val_98
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/union27-3-ab84df3813ff23be99f148449610e530 b/sql/hive/src/test/resources/golden/union27-3-ab84df3813ff23be99f148449610e530
index 199095f1f9848..61d7280758de9 100644
--- a/sql/hive/src/test/resources/golden/union27-3-ab84df3813ff23be99f148449610e530
+++ b/sql/hive/src/test/resources/golden/union27-3-ab84df3813ff23be99f148449610e530
@@ -5,4 +5,4 @@
 97	val_97
 97	val_97
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/union28-3-b1d75ba0d33a452619e41f70e69616e9 b/sql/hive/src/test/resources/golden/union28-3-b1d75ba0d33a452619e41f70e69616e9
index 293f324297419..0e14af56f8b08 100644
--- a/sql/hive/src/test/resources/golden/union28-3-b1d75ba0d33a452619e41f70e69616e9
+++ b/sql/hive/src/test/resources/golden/union28-3-b1d75ba0d33a452619e41f70e69616e9
@@ -17,4 +17,4 @@
 8	val_8
 8	val_8
 8	val_8
-9	val_9
\ No newline at end of file
+9	val_9
diff --git a/sql/hive/src/test/resources/golden/union29-3-b1d75ba0d33a452619e41f70e69616e9 b/sql/hive/src/test/resources/golden/union29-3-b1d75ba0d33a452619e41f70e69616e9
index 87a971baed428..ba254c7ab023b 100644
--- a/sql/hive/src/test/resources/golden/union29-3-b1d75ba0d33a452619e41f70e69616e9
+++ b/sql/hive/src/test/resources/golden/union29-3-b1d75ba0d33a452619e41f70e69616e9
@@ -17,4 +17,4 @@
 5	val_5
 5	val_5
 5	val_5
-5	val_5
\ No newline at end of file
+5	val_5
diff --git a/sql/hive/src/test/resources/golden/union30-3-b1d75ba0d33a452619e41f70e69616e9 b/sql/hive/src/test/resources/golden/union30-3-b1d75ba0d33a452619e41f70e69616e9
index a6f502e6c3e3a..996f02bc6496c 100644
--- a/sql/hive/src/test/resources/golden/union30-3-b1d75ba0d33a452619e41f70e69616e9
+++ b/sql/hive/src/test/resources/golden/union30-3-b1d75ba0d33a452619e41f70e69616e9
@@ -17,4 +17,4 @@
 5	val_5
 5	val_5
 5	val_5
-5	val_5
\ No newline at end of file
+5	val_5
diff --git a/sql/hive/src/test/resources/golden/union31-14-c36a1d8de2713f722ec42bc4686d6125 b/sql/hive/src/test/resources/golden/union31-14-c36a1d8de2713f722ec42bc4686d6125
index dadf53962e7a1..b1fb9cce36fa2 100644
--- a/sql/hive/src/test/resources/golden/union31-14-c36a1d8de2713f722ec42bc4686d6125
+++ b/sql/hive/src/test/resources/golden/union31-14-c36a1d8de2713f722ec42bc4686d6125
@@ -3,4 +3,4 @@
 4	2
 5	6
 8	2
-9	2
\ No newline at end of file
+9	2
diff --git a/sql/hive/src/test/resources/golden/union31-15-5df6435aed6e0a6a6853480a027b911e b/sql/hive/src/test/resources/golden/union31-15-5df6435aed6e0a6a6853480a027b911e
index dadf53962e7a1..b1fb9cce36fa2 100644
--- a/sql/hive/src/test/resources/golden/union31-15-5df6435aed6e0a6a6853480a027b911e
+++ b/sql/hive/src/test/resources/golden/union31-15-5df6435aed6e0a6a6853480a027b911e
@@ -3,4 +3,4 @@
 4	2
 5	6
 8	2
-9	2
\ No newline at end of file
+9	2
diff --git a/sql/hive/src/test/resources/golden/union31-24-df38c8164af7cc164c728b8178da72c5 b/sql/hive/src/test/resources/golden/union31-24-df38c8164af7cc164c728b8178da72c5
index ede645acaf95b..48255af904349 100644
--- a/sql/hive/src/test/resources/golden/union31-24-df38c8164af7cc164c728b8178da72c5
+++ b/sql/hive/src/test/resources/golden/union31-24-df38c8164af7cc164c728b8178da72c5
@@ -3,4 +3,4 @@
 4	2
 5	2
 8	2
-9	2
\ No newline at end of file
+9	2
diff --git a/sql/hive/src/test/resources/golden/union31-25-1485e295a99908e1862eae397b814045 b/sql/hive/src/test/resources/golden/union31-25-1485e295a99908e1862eae397b814045
index ede645acaf95b..48255af904349 100644
--- a/sql/hive/src/test/resources/golden/union31-25-1485e295a99908e1862eae397b814045
+++ b/sql/hive/src/test/resources/golden/union31-25-1485e295a99908e1862eae397b814045
@@ -3,4 +3,4 @@
 4	2
 5	2
 8	2
-9	2
\ No newline at end of file
+9	2
diff --git a/sql/hive/src/test/resources/golden/union31-8-ba92b89786ffaecd74a740705e0fa0cb b/sql/hive/src/test/resources/golden/union31-8-ba92b89786ffaecd74a740705e0fa0cb
index dadf53962e7a1..b1fb9cce36fa2 100644
--- a/sql/hive/src/test/resources/golden/union31-8-ba92b89786ffaecd74a740705e0fa0cb
+++ b/sql/hive/src/test/resources/golden/union31-8-ba92b89786ffaecd74a740705e0fa0cb
@@ -3,4 +3,4 @@
 4	2
 5	6
 8	2
-9	2
\ No newline at end of file
+9	2
diff --git a/sql/hive/src/test/resources/golden/union31-9-56dfdb30edd8a687f9aa9cad29b42760 b/sql/hive/src/test/resources/golden/union31-9-56dfdb30edd8a687f9aa9cad29b42760
index a71793e8c52a3..c7f78ecade3e8 100644
--- a/sql/hive/src/test/resources/golden/union31-9-56dfdb30edd8a687f9aa9cad29b42760
+++ b/sql/hive/src/test/resources/golden/union31-9-56dfdb30edd8a687f9aa9cad29b42760
@@ -3,4 +3,4 @@ val_2	2
 val_4	2
 val_5	6
 val_8	2
-val_9	2
\ No newline at end of file
+val_9	2
diff --git a/sql/hive/src/test/resources/golden/union32-0-4c7f0fb27ce3a8f80b02dab6fc5cb45e b/sql/hive/src/test/resources/golden/union32-0-4c7f0fb27ce3a8f80b02dab6fc5cb45e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union32-1-e6c80e7d6171ae5fc428506e57dc8753 b/sql/hive/src/test/resources/golden/union32-1-e6c80e7d6171ae5fc428506e57dc8753
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union32-10-44a9b373ee7d43a4ef2bc4b8a708601b b/sql/hive/src/test/resources/golden/union32-10-44a9b373ee7d43a4ef2bc4b8a708601b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union32-11-40d51bc9958b27c36ef647f0598fdee5 b/sql/hive/src/test/resources/golden/union32-11-40d51bc9958b27c36ef647f0598fdee5
deleted file mode 100644
index 9bf0de2f06c9a..0000000000000
--- a/sql/hive/src/test/resources/golden/union32-11-40d51bc9958b27c36ef647f0598fdee5
+++ /dev/null
@@ -1,32 +0,0 @@
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-2.0	2.0
-2.0	2.0
-4.0	4.0
-4.0	4.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5.0
-8.0	8.0
-8.0	8.0
-9.0	9.0
-9.0	9.0
diff --git a/sql/hive/src/test/resources/golden/union32-2-8e39fef33c859ef83912d0fcda319218 b/sql/hive/src/test/resources/golden/union32-2-8e39fef33c859ef83912d0fcda319218
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union32-3-d31e252450077ac54f4cb18a9ad95a84 b/sql/hive/src/test/resources/golden/union32-3-d31e252450077ac54f4cb18a9ad95a84
deleted file mode 100644
index 462dca3124c41..0000000000000
--- a/sql/hive/src/test/resources/golden/union32-3-d31e252450077ac54f4cb18a9ad95a84
+++ /dev/null
@@ -1,20 +0,0 @@
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-2.0
-2.0
-4.0
-4.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-8.0
-8.0
-9.0
-9.0
diff --git a/sql/hive/src/test/resources/golden/union32-4-79787e084ca15d479cee3a7e1ed2281e b/sql/hive/src/test/resources/golden/union32-4-79787e084ca15d479cee3a7e1ed2281e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union32-5-51c997d0a1103b60764bbb8316a38746 b/sql/hive/src/test/resources/golden/union32-5-51c997d0a1103b60764bbb8316a38746
deleted file mode 100644
index 2e662f39d1572..0000000000000
--- a/sql/hive/src/test/resources/golden/union32-5-51c997d0a1103b60764bbb8316a38746
+++ /dev/null
@@ -1,32 +0,0 @@
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-2.0
-2.0
-4.0
-4.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-8.0
-8.0
-9.0
-9.0
diff --git a/sql/hive/src/test/resources/golden/union32-6-96fa13d8790bbfa1d6109b7cbf890d1b b/sql/hive/src/test/resources/golden/union32-6-96fa13d8790bbfa1d6109b7cbf890d1b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union32-7-f936440d63f4e1027dda4de605660633 b/sql/hive/src/test/resources/golden/union32-7-f936440d63f4e1027dda4de605660633
deleted file mode 100644
index 2e662f39d1572..0000000000000
--- a/sql/hive/src/test/resources/golden/union32-7-f936440d63f4e1027dda4de605660633
+++ /dev/null
@@ -1,32 +0,0 @@
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-0.0
-2.0
-2.0
-4.0
-4.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-5.0
-8.0
-8.0
-9.0
-9.0
diff --git a/sql/hive/src/test/resources/golden/union32-8-f42d83f502a7b8d3a36331e0f5621cfb b/sql/hive/src/test/resources/golden/union32-8-f42d83f502a7b8d3a36331e0f5621cfb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union32-9-74fb695786df4c024288ae23ac8c00db b/sql/hive/src/test/resources/golden/union32-9-74fb695786df4c024288ae23ac8c00db
deleted file mode 100644
index 106f1874451ff..0000000000000
--- a/sql/hive/src/test/resources/golden/union32-9-74fb695786df4c024288ae23ac8c00db
+++ /dev/null
@@ -1,32 +0,0 @@
-0.0	0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-0.0	0
-0.0	0.0
-0.0	0.0
-0.0	0.0
-2.0	2.0
-2.0	2
-4.0	4
-4.0	4.0
-5.0	5
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5
-5.0	5.0
-5.0	5.0
-5.0	5.0
-5.0	5
-5.0	5.0
-5.0	5.0
-5.0	5.0
-8.0	8.0
-8.0	8
-9.0	9
-9.0	9.0
diff --git a/sql/hive/src/test/resources/golden/union34-10-da2b79118c21ac45ce85001fa61b0043 b/sql/hive/src/test/resources/golden/union34-10-da2b79118c21ac45ce85001fa61b0043
index d572335ff5185..7369ee2317540 100644
--- a/sql/hive/src/test/resources/golden/union34-10-da2b79118c21ac45ce85001fa61b0043
+++ b/sql/hive/src/test/resources/golden/union34-10-da2b79118c21ac45ce85001fa61b0043
@@ -27,4 +27,4 @@
 86	val_86
 98	val_98
 98	val_98
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/union34-4-101829a66cab2efd31dcb0d86e302956 b/sql/hive/src/test/resources/golden/union34-4-101829a66cab2efd31dcb0d86e302956
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input3_limit-0-27fc8f7d7456a761e1d0c2c075b84dc6 b/sql/hive/src/test/resources/golden/union34-4-70479e10c016e5ac448394dbadb32794
similarity index 100%
rename from sql/hive/src/test/resources/golden/input3_limit-0-27fc8f7d7456a761e1d0c2c075b84dc6
rename to sql/hive/src/test/resources/golden/union34-4-70479e10c016e5ac448394dbadb32794
diff --git a/sql/hive/src/test/resources/golden/union34-5-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/union34-5-24ca942f094b14b92086305cc125e833
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union34-5-24ca942f094b14b92086305cc125e833
+++ b/sql/hive/src/test/resources/golden/union34-5-24ca942f094b14b92086305cc125e833
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union34-7-da2b79118c21ac45ce85001fa61b0043 b/sql/hive/src/test/resources/golden/union34-7-da2b79118c21ac45ce85001fa61b0043
index d572335ff5185..7369ee2317540 100644
--- a/sql/hive/src/test/resources/golden/union34-7-da2b79118c21ac45ce85001fa61b0043
+++ b/sql/hive/src/test/resources/golden/union34-7-da2b79118c21ac45ce85001fa61b0043
@@ -27,4 +27,4 @@
 86	val_86
 98	val_98
 98	val_98
-98	val_98
\ No newline at end of file
+98	val_98
diff --git a/sql/hive/src/test/resources/golden/union34-8-b1e2ade89ae898650f0be4f796d8947b b/sql/hive/src/test/resources/golden/union34-8-b1e2ade89ae898650f0be4f796d8947b
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union34-8-b1e2ade89ae898650f0be4f796d8947b
+++ b/sql/hive/src/test/resources/golden/union34-8-b1e2ade89ae898650f0be4f796d8947b
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union4-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union4-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union4-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/union4-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union4-4-7f83822f19aa9b973198fe4c42c66856 b/sql/hive/src/test/resources/golden/union4-4-7f83822f19aa9b973198fe4c42c66856
index 948aca9180ba9..abc382d9b3571 100644
--- a/sql/hive/src/test/resources/golden/union4-4-7f83822f19aa9b973198fe4c42c66856
+++ b/sql/hive/src/test/resources/golden/union4-4-7f83822f19aa9b973198fe4c42c66856
@@ -1,2 +1,2 @@
 tst1	500
-tst2	500
\ No newline at end of file
+tst2	500
diff --git a/sql/hive/src/test/resources/golden/union5-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union5-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union5-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/union5-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union5-2-2c19c8d564b010eeb42deee63d66a292 b/sql/hive/src/test/resources/golden/union5-2-2c19c8d564b010eeb42deee63d66a292
index c0c7d9f5e8ef5..9daa3f92b46e0 100644
--- a/sql/hive/src/test/resources/golden/union5-2-2c19c8d564b010eeb42deee63d66a292
+++ b/sql/hive/src/test/resources/golden/union5-2-2c19c8d564b010eeb42deee63d66a292
@@ -1,2 +1,2 @@
 tst1	1
-tst2	1
\ No newline at end of file
+tst2	1
diff --git a/sql/hive/src/test/resources/golden/union6-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union6-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union6-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/union6-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union6-4-a03959cc5aaa8f6521a73e6dae04cd15 b/sql/hive/src/test/resources/golden/union6-4-a03959cc5aaa8f6521a73e6dae04cd15
index b5e91e032212a..6f79451ce69b5 100644
--- a/sql/hive/src/test/resources/golden/union6-4-a03959cc5aaa8f6521a73e6dae04cd15
+++ b/sql/hive/src/test/resources/golden/union6-4-a03959cc5aaa8f6521a73e6dae04cd15
@@ -23,4 +23,4 @@ NULL	val_484
 406	val_406
 66	val_66
 98	val_98
-tst1	500
\ No newline at end of file
+tst1	500
diff --git a/sql/hive/src/test/resources/golden/union7-0-863233ccd616401efb4bf83c4b9e3a52 b/sql/hive/src/test/resources/golden/union7-0-863233ccd616401efb4bf83c4b9e3a52
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union7-0-863233ccd616401efb4bf83c4b9e3a52
+++ b/sql/hive/src/test/resources/golden/union7-0-863233ccd616401efb4bf83c4b9e3a52
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union7-2-55d6e503a281acf3289a7874c0fba3f5 b/sql/hive/src/test/resources/golden/union7-2-55d6e503a281acf3289a7874c0fba3f5
index 07f67df79dcbd..5ba4e492aa0f4 100644
--- a/sql/hive/src/test/resources/golden/union7-2-55d6e503a281acf3289a7874c0fba3f5
+++ b/sql/hive/src/test/resources/golden/union7-2-55d6e503a281acf3289a7874c0fba3f5
@@ -14,4 +14,4 @@ NULL	10
 406	1
 66	1
 98	1
-tst1	1
\ No newline at end of file
+tst1	1
diff --git a/sql/hive/src/test/resources/golden/union8-1-1b422e4c1c8b97775518f760b995c771 b/sql/hive/src/test/resources/golden/union8-1-1b422e4c1c8b97775518f760b995c771
index 3617909902993..52982787dbb4a 100644
--- a/sql/hive/src/test/resources/golden/union8-1-1b422e4c1c8b97775518f760b995c771
+++ b/sql/hive/src/test/resources/golden/union8-1-1b422e4c1c8b97775518f760b995c771
@@ -1497,4 +1497,4 @@
 200	val_200
 97	val_97
 97	val_97
-97	val_97
\ No newline at end of file
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/union9-1-a77ee9f723b3b17a3a02164c5d0000c1 b/sql/hive/src/test/resources/golden/union9-1-a77ee9f723b3b17a3a02164c5d0000c1
index 37021f4a27201..3d86ec6498f3f 100644
--- a/sql/hive/src/test/resources/golden/union9-1-a77ee9f723b3b17a3a02164c5d0000c1
+++ b/sql/hive/src/test/resources/golden/union9-1-a77ee9f723b3b17a3a02164c5d0000c1
@@ -1 +1 @@
-1500
\ No newline at end of file
+1500
diff --git a/sql/hive/src/test/resources/golden/union_date-4-d812f7feef3b6857aeca9007f0af44c b/sql/hive/src/test/resources/golden/union_date-4-d812f7feef3b6857aeca9007f0af44c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input3_limit-1-7b46b8baf9c7628da9c190c96b917057 b/sql/hive/src/test/resources/golden/union_date-4-d85fe746334b430941c5db3665e744d4
similarity index 100%
rename from sql/hive/src/test/resources/golden/input3_limit-1-7b46b8baf9c7628da9c190c96b917057
rename to sql/hive/src/test/resources/golden/union_date-4-d85fe746334b430941c5db3665e744d4
diff --git a/sql/hive/src/test/resources/golden/input3_limit-2-c6583bdb759c8a050238a32a6ce8273d b/sql/hive/src/test/resources/golden/union_date-5-82eebfded24cef08e0a881d1bcca02b1
similarity index 100%
rename from sql/hive/src/test/resources/golden/input3_limit-2-c6583bdb759c8a050238a32a6ce8273d
rename to sql/hive/src/test/resources/golden/union_date-5-82eebfded24cef08e0a881d1bcca02b1
diff --git a/sql/hive/src/test/resources/golden/union_date-5-b54839e0200bec94aa751fec8c5dbd3d b/sql/hive/src/test/resources/golden/union_date-5-b54839e0200bec94aa751fec8c5dbd3d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_null-0-27e98c4939abf1ad4445b4e715b0262a b/sql/hive/src/test/resources/golden/union_null-0-27e98c4939abf1ad4445b4e715b0262a
deleted file mode 100644
index 468f774ef5146..0000000000000
--- a/sql/hive/src/test/resources/golden/union_null-0-27e98c4939abf1ad4445b4e715b0262a
+++ /dev/null
@@ -1,10 +0,0 @@
-val_238
-NULL
-val_86
-NULL
-val_311
-NULL
-val_27
-NULL
-val_165
-NULL
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_1-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_1-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_1-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_1-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-10-3ef350a0f7bbc1c54774e33dd54d9e46 b/sql/hive/src/test/resources/golden/union_remove_1-10-3ef350a0f7bbc1c54774e33dd54d9e46
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_1-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index c203753d8e42d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_1-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-values              	bigint              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:49:42 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069004          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_1-12-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_1-12-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_1-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_1-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_1-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_1-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_1-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_1-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_1-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_1-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-6-a6c043a89a9c3456af8ee065cb17239 b/sql/hive/src/test/resources/golden/union_remove_1-6-a6c043a89a9c3456af8ee065cb17239
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-7-48f70528347f5201f387d28dae37a14a b/sql/hive/src/test/resources/golden/union_remove_1-7-48f70528347f5201f387d28dae37a14a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_1-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_1-9-19865a08066d80cb069ae6312c465ee6 b/sql/hive/src/test/resources/golden/union_remove_1-9-19865a08066d80cb069ae6312c465ee6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_10-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_10-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-10-7eaf13bc61bd4b43f4da67c347768598 b/sql/hive/src/test/resources/golden/union_remove_10-10-7eaf13bc61bd4b43f4da67c347768598
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-11-b62595b91d2d9e03a010b49ab81725d5 b/sql/hive/src/test/resources/golden/union_remove_10-11-b62595b91d2d9e03a010b49ab81725d5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-12-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_10-12-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 94a711cdab117..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-12-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:50:08 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069036          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-13-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_10-13-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-13-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_10-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-3-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_10-3-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-3-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-4-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_10-4-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-4-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-5-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_10-5-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-5-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-6-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_10-6-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_10-6-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-7-a7ba0436265932086d2a2e228356971 b/sql/hive/src/test/resources/golden/union_remove_10-7-a7ba0436265932086d2a2e228356971
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-8-68f30e8e5c05bbedbda95d88ab6f3ee2 b/sql/hive/src/test/resources/golden/union_remove_10-8-68f30e8e5c05bbedbda95d88ab6f3ee2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_10-9-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_10-9-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_11-0-3c29684bfd2df7439ee0551eb42cfa0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-0-3c29684bfd2df7439ee0551eb42cfa0
+++ b/sql/hive/src/test/resources/golden/union_remove_11-0-3c29684bfd2df7439ee0551eb42cfa0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_11-1-16a6a293f1d2ce481b1d2482b1d5787c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ b/sql/hive/src/test/resources/golden/union_remove_11-1-16a6a293f1d2ce481b1d2482b1d5787c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-12-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_11-12-ea111d286c70e4a0c6a68a7420dc7b7
index d9cfa9e952c3a..4c47424eb1160 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-12-ea111d286c70e4a0c6a68a7420dc7b7
+++ b/sql/hive/src/test/resources/golden/union_remove_11-12-ea111d286c70e4a0c6a68a7420dc7b7
@@ -1,19 +1,24 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
+key                 	string              	                    
+values              	bigint              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 03 19:05:51 PST 2014	 
+CreateTime:         	Tue Oct 21 05:59:44 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5393816597631538368/outputtbl1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/outputtbl1	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1388804758          
+	COLUMN_STATS_ACCURATE	false               
+	numFiles            	1                   
+	numRows             	-1                  
+	rawDataSize         	-1                  
+	totalSize           	184                 
+	transient_lastDdlTime	1413896391          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
@@ -24,4 +29,4 @@ Num Buckets:        	-1
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-13-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_11-13-43d53504df013e6b35f81811138a167a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-13-43d53504df013e6b35f81811138a167a
+++ b/sql/hive/src/test/resources/golden/union_remove_11-13-43d53504df013e6b35f81811138a167a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-14-e409e7032445097ace016b1876d95b3e b/sql/hive/src/test/resources/golden/union_remove_11-14-e409e7032445097ace016b1876d95b3e
index 2817d74afee13..8930b40bed84f 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-14-e409e7032445097ace016b1876d95b3e
+++ b/sql/hive/src/test/resources/golden/union_remove_11-14-e409e7032445097ace016b1876d95b3e
@@ -15,4 +15,4 @@
 8	2
 8	2
 8	3
-8	3
\ No newline at end of file
+8	3
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_11-2-cafed8ca348b243372b9114910be1557
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-2-cafed8ca348b243372b9114910be1557
+++ b/sql/hive/src/test/resources/golden/union_remove_11-2-cafed8ca348b243372b9114910be1557
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-3-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_11-3-b12e5c70d6d29757471b900b6160fa8a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-3-b12e5c70d6d29757471b900b6160fa8a
+++ b/sql/hive/src/test/resources/golden/union_remove_11-3-b12e5c70d6d29757471b900b6160fa8a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-4-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_11-4-593999fae618b6b38322bc9ae4e0c027
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-4-593999fae618b6b38322bc9ae4e0c027
+++ b/sql/hive/src/test/resources/golden/union_remove_11-4-593999fae618b6b38322bc9ae4e0c027
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-5-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_11-5-6f53d5613262d393d82d159ec5dc16dc
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-5-6f53d5613262d393d82d159ec5dc16dc
+++ b/sql/hive/src/test/resources/golden/union_remove_11-5-6f53d5613262d393d82d159ec5dc16dc
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-6-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_11-6-a2a411ad6620aa1ab24550ade336e785
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_11-6-a2a411ad6620aa1ab24550ade336e785
+++ b/sql/hive/src/test/resources/golden/union_remove_11-6-a2a411ad6620aa1ab24550ade336e785
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_11-9-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_11-9-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input3_limit-3-2a87d8faa18a6311376812bd0453fece b/sql/hive/src/test/resources/golden/union_remove_11-9-94da21f150ed2c56046b80e46da8884d
similarity index 100%
rename from sql/hive/src/test/resources/golden/input3_limit-3-2a87d8faa18a6311376812bd0453fece
rename to sql/hive/src/test/resources/golden/union_remove_11-9-94da21f150ed2c56046b80e46da8884d
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_12-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_12-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-10-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_12-10-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-11-a667f24e26435cd2a29fef0ee45bab3c b/sql/hive/src/test/resources/golden/union_remove_12-11-a667f24e26435cd2a29fef0ee45bab3c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-12-c696750d8b7b65b00bf9d402cf7abaa5 b/sql/hive/src/test/resources/golden/union_remove_12-12-c696750d8b7b65b00bf9d402cf7abaa5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-13-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_12-13-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 57d5146698ee7..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-13-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:50:41 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069061          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-14-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_12-14-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-14-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_12-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-3-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/union_remove_12-3-24ca942f094b14b92086305cc125e833
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-3-24ca942f094b14b92086305cc125e833
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-4-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_12-4-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-4-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-5-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_12-5-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-5-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-6-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_12-6-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-6-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-7-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_12-7-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_12-7-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-8-9dd030d38eece4630dec5951fc8a0622 b/sql/hive/src/test/resources/golden/union_remove_12-8-9dd030d38eece4630dec5951fc8a0622
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_12-9-68f30e8e5c05bbedbda95d88ab6f3ee2 b/sql/hive/src/test/resources/golden/union_remove_12-9-68f30e8e5c05bbedbda95d88ab6f3ee2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_13-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_13-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-10-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_13-10-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-11-ed5d1c2bfc3dd1b2f5321bc8800e69e8 b/sql/hive/src/test/resources/golden/union_remove_13-11-ed5d1c2bfc3dd1b2f5321bc8800e69e8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-12-27c0fa25c4f67fc1e0e113aca6dd47af b/sql/hive/src/test/resources/golden/union_remove_13-12-27c0fa25c4f67fc1e0e113aca6dd47af
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-13-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_13-13-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index c914024c14c8e..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-13-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:51:05 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069086          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-14-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_13-14-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-14-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_13-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-3-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/union_remove_13-3-24ca942f094b14b92086305cc125e833
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-3-24ca942f094b14b92086305cc125e833
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-4-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_13-4-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-4-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-5-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_13-5-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-5-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-6-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_13-6-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-6-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-7-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_13-7-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_13-7-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-8-234ecbc3f8c0e4686d3586f81cf191eb b/sql/hive/src/test/resources/golden/union_remove_13-8-234ecbc3f8c0e4686d3586f81cf191eb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_13-9-68f30e8e5c05bbedbda95d88ab6f3ee2 b/sql/hive/src/test/resources/golden/union_remove_13-9-68f30e8e5c05bbedbda95d88ab6f3ee2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_14-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_14-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-10-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_14-10-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-11-a667f24e26435cd2a29fef0ee45bab3c b/sql/hive/src/test/resources/golden/union_remove_14-11-a667f24e26435cd2a29fef0ee45bab3c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-12-c696750d8b7b65b00bf9d402cf7abaa5 b/sql/hive/src/test/resources/golden/union_remove_14-12-c696750d8b7b65b00bf9d402cf7abaa5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-13-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_14-13-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 5a55428afbe1a..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-13-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:51:30 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069108          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-14-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_14-14-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-14-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_14-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-3-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_14-3-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-3-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-4-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_14-4-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-4-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-5-24ca942f094b14b92086305cc125e833 b/sql/hive/src/test/resources/golden/union_remove_14-5-24ca942f094b14b92086305cc125e833
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-5-24ca942f094b14b92086305cc125e833
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-6-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_14-6-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-6-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-7-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_14-7-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_14-7-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-8-ed33d620523b2634285698a83f433b6d b/sql/hive/src/test/resources/golden/union_remove_14-8-ed33d620523b2634285698a83f433b6d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_14-9-68f30e8e5c05bbedbda95d88ab6f3ee2 b/sql/hive/src/test/resources/golden/union_remove_14-9-68f30e8e5c05bbedbda95d88ab6f3ee2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_15-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_15-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-10-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_15-10-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-11-93b7341b523213ab6e58169459bc6818 b/sql/hive/src/test/resources/golden/union_remove_15-11-93b7341b523213ab6e58169459bc6818
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-12-616cc477ed00e691dbc2b310d1c6dd12 b/sql/hive/src/test/resources/golden/union_remove_15-12-616cc477ed00e691dbc2b310d1c6dd12
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-13-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_15-13-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index a2441e330ffc6..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-13-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:51:52 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069112          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-14-37f9f7bc2d7456046a9f967347337e47 b/sql/hive/src/test/resources/golden/union_remove_15-14-37f9f7bc2d7456046a9f967347337e47
deleted file mode 100644
index a58273a1b054e..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-14-37f9f7bc2d7456046a9f967347337e47
+++ /dev/null
@@ -1,2 +0,0 @@
-ds=1
-ds=2
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-15-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_15-15-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-15-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_15-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_15-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_15-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-5-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/union_remove_15-5-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-5-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-6-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/union_remove_15-6-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-6-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-7-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_15-7-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_15-7-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-8-552c4eba867e7408fc8652ff0a19170d b/sql/hive/src/test/resources/golden/union_remove_15-8-552c4eba867e7408fc8652ff0a19170d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_15-9-a63925fb2aa8c5df6854c248e674b0ef b/sql/hive/src/test/resources/golden/union_remove_15-9-a63925fb2aa8c5df6854c248e674b0ef
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_16-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_16-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-10-a63925fb2aa8c5df6854c248e674b0ef b/sql/hive/src/test/resources/golden/union_remove_16-10-a63925fb2aa8c5df6854c248e674b0ef
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-11-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_16-11-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-12-93b7341b523213ab6e58169459bc6818 b/sql/hive/src/test/resources/golden/union_remove_16-12-93b7341b523213ab6e58169459bc6818
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-13-616cc477ed00e691dbc2b310d1c6dd12 b/sql/hive/src/test/resources/golden/union_remove_16-13-616cc477ed00e691dbc2b310d1c6dd12
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-14-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_16-14-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index bec8202dcbcad..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-14-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:52:19 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069139          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-15-37f9f7bc2d7456046a9f967347337e47 b/sql/hive/src/test/resources/golden/union_remove_16-15-37f9f7bc2d7456046a9f967347337e47
deleted file mode 100644
index a58273a1b054e..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-15-37f9f7bc2d7456046a9f967347337e47
+++ /dev/null
@@ -1,2 +0,0 @@
-ds=1
-ds=2
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-16-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_16-16-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-16-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_16-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-3-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_16-3-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-3-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-4-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_16-4-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-4-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-5-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_16-5-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-5-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-6-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_16-6-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-6-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-7-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/union_remove_16-7-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-7-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-8-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/union_remove_16-8-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_16-8-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_16-9-ec47ba0fc527a4a04d452a009d59147a b/sql/hive/src/test/resources/golden/union_remove_16-9-ec47ba0fc527a4a04d452a009d59147a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_17-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_17-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-10-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_17-10-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-11-a55dcc374cb69e225a4f1da56bbd7a68 b/sql/hive/src/test/resources/golden/union_remove_17-11-a55dcc374cb69e225a4f1da56bbd7a68
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-12-c91289e16ad403babfc91c093ac9b86d b/sql/hive/src/test/resources/golden/union_remove_17-12-c91289e16ad403babfc91c093ac9b86d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-13-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_17-13-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index d0b37e7625a64..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-13-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Tue Jan 14 14:38:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse4075462935071533647/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1389739098          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-14-37f9f7bc2d7456046a9f967347337e47 b/sql/hive/src/test/resources/golden/union_remove_17-14-37f9f7bc2d7456046a9f967347337e47
deleted file mode 100644
index c21f4017362c1..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-14-37f9f7bc2d7456046a9f967347337e47
+++ /dev/null
@@ -1,2 +0,0 @@
-ds=1
-ds=2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-15-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_17-15-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-15-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-16-626a252f75285872c1d72706f7f972c6 b/sql/hive/src/test/resources/golden/union_remove_17-16-626a252f75285872c1d72706f7f972c6
deleted file mode 100644
index 1ab5c482feca9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-16-626a252f75285872c1d72706f7f972c6
+++ /dev/null
@@ -1,6 +0,0 @@
-1	1	1
-2	1	1
-3	1	1
-7	1	1
-8	1	1
-8	1	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-17-1252985379f11ae4b98d2a0e2f121b8a b/sql/hive/src/test/resources/golden/union_remove_17-17-1252985379f11ae4b98d2a0e2f121b8a
deleted file mode 100644
index 94ec062e2f4f3..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-17-1252985379f11ae4b98d2a0e2f121b8a
+++ /dev/null
@@ -1,6 +0,0 @@
-1	2	2
-2	2	2
-3	2	2
-7	2	2
-8	2	2
-8	2	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_17-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_17-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_17-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-5-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/union_remove_17-5-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-5-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-6-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/union_remove_17-6-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-6-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-7-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_17-7-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_17-7-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-8-67e57f56d1106a57329bf75c491e3c8b b/sql/hive/src/test/resources/golden/union_remove_17-8-67e57f56d1106a57329bf75c491e3c8b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_17-9-a63925fb2aa8c5df6854c248e674b0ef b/sql/hive/src/test/resources/golden/union_remove_17-9-a63925fb2aa8c5df6854c248e674b0ef
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_18-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_18-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-10-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_18-10-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-11-37ae8e8b4f39f3ff5516afd44715c2ad b/sql/hive/src/test/resources/golden/union_remove_18-11-37ae8e8b4f39f3ff5516afd44715c2ad
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-12-461a24ba6c00f60d75afba62da2ac4f1 b/sql/hive/src/test/resources/golden/union_remove_18-12-461a24ba6c00f60d75afba62da2ac4f1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-13-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_18-13-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 971e095df7be8..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-13-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,32 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-values              	bigint              	None                
-	 	 
-# Partition Information	 	 
-# col_name            	data_type           	comment             
-	 	 
-ds                  	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:52:47 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069167          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-14-37f9f7bc2d7456046a9f967347337e47 b/sql/hive/src/test/resources/golden/union_remove_18-14-37f9f7bc2d7456046a9f967347337e47
deleted file mode 100644
index 5690d94a65857..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-14-37f9f7bc2d7456046a9f967347337e47
+++ /dev/null
@@ -1,6 +0,0 @@
-ds=11
-ds=12
-ds=13
-ds=17
-ds=18
-ds=28
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-15-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_18-15-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-15-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_18-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_18-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_18-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-5-16367c381d4b189b3640c92511244bfe b/sql/hive/src/test/resources/golden/union_remove_18-5-16367c381d4b189b3640c92511244bfe
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-5-16367c381d4b189b3640c92511244bfe
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-6-a4fb8359a2179ec70777aad6366071b7 b/sql/hive/src/test/resources/golden/union_remove_18-6-a4fb8359a2179ec70777aad6366071b7
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-6-a4fb8359a2179ec70777aad6366071b7
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-7-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_18-7-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_18-7-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-8-b1b996b2e72ca14150af7c82dbc6e139 b/sql/hive/src/test/resources/golden/union_remove_18-8-b1b996b2e72ca14150af7c82dbc6e139
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_18-9-ea467d0fee062a23c720cf47eacfef08 b/sql/hive/src/test/resources/golden/union_remove_18-9-ea467d0fee062a23c720cf47eacfef08
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_19-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_19-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-10-8d1e5af37e7992708bf15ab7d887405b b/sql/hive/src/test/resources/golden/union_remove_19-10-8d1e5af37e7992708bf15ab7d887405b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_19-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 0d0d3a30af36c..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-values              	bigint              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:53:14 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069215          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-12-68e15b1729669c4cb2291dcabfea4387 b/sql/hive/src/test/resources/golden/union_remove_19-12-68e15b1729669c4cb2291dcabfea4387
deleted file mode 100644
index 719e4a7bd4dbb..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-12-68e15b1729669c4cb2291dcabfea4387
+++ /dev/null
@@ -1,10 +0,0 @@
-1	1
-2	1
-3	1
-7	1
-8	2
-1	1
-2	1
-3	1
-7	1
-8	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-13-b8b9df8f376df228e3c8ae65defe2801 b/sql/hive/src/test/resources/golden/union_remove_19-13-b8b9df8f376df228e3c8ae65defe2801
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-14-eed866a1ad3106a306322519f4bb52f2 b/sql/hive/src/test/resources/golden/union_remove_19-14-eed866a1ad3106a306322519f4bb52f2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-15-68e15b1729669c4cb2291dcabfea4387 b/sql/hive/src/test/resources/golden/union_remove_19-15-68e15b1729669c4cb2291dcabfea4387
deleted file mode 100644
index 361f2ab78eae3..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-15-68e15b1729669c4cb2291dcabfea4387
+++ /dev/null
@@ -1,2 +0,0 @@
-7	1
-7	1
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-16-471f8e794fd712dce2e40334b383e08e b/sql/hive/src/test/resources/golden/union_remove_19-16-471f8e794fd712dce2e40334b383e08e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-17-ae0f7d5734ca67cbfebed70c4657e330 b/sql/hive/src/test/resources/golden/union_remove_19-17-ae0f7d5734ca67cbfebed70c4657e330
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-18-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_19-18-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-18-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_19-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_19-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_19-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_19-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_19-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-6-a6c043a89a9c3456af8ee065cb17239 b/sql/hive/src/test/resources/golden/union_remove_19-6-a6c043a89a9c3456af8ee065cb17239
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-7-48f70528347f5201f387d28dae37a14a b/sql/hive/src/test/resources/golden/union_remove_19-7-48f70528347f5201f387d28dae37a14a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_19-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_19-9-8a08edd1aa63fd3b051da82246793259 b/sql/hive/src/test/resources/golden/union_remove_19-9-8a08edd1aa63fd3b051da82246793259
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_2-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_2-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_2-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_2-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-10-2309570010c3e679b884c100de57d002 b/sql/hive/src/test/resources/golden/union_remove_2-10-2309570010c3e679b884c100de57d002
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_2-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index d3c821fa22182..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_2-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-values              	bigint              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:53:36 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069242          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_2-12-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_2-12-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_2-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_2-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_2-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_2-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_2-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_2-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_2-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_2-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-6-1acf02741a2ff987d3e00ae9722c26e b/sql/hive/src/test/resources/golden/union_remove_2-6-1acf02741a2ff987d3e00ae9722c26e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-7-48f70528347f5201f387d28dae37a14a b/sql/hive/src/test/resources/golden/union_remove_2-7-48f70528347f5201f387d28dae37a14a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_2-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_2-9-4a9974900fb5bc2fc8e5d614f5026c0f b/sql/hive/src/test/resources/golden/union_remove_2-9-4a9974900fb5bc2fc8e5d614f5026c0f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_20-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_20-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_20-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_20-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-10-89c57c91facbf54299e08955e3783ea6 b/sql/hive/src/test/resources/golden/union_remove_20-10-89c57c91facbf54299e08955e3783ea6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_20-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 978049640929c..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_20-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-values              	bigint              	None                
-key                 	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:54:06 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069268          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_20-12-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_20-12-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_20-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_20-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_20-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_20-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_20-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_20-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_20-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_20-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-6-a73143117ffec1225f6d492e5aa577e b/sql/hive/src/test/resources/golden/union_remove_20-6-a73143117ffec1225f6d492e5aa577e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-7-82f81adc097c247475fd29076e0cb85f b/sql/hive/src/test/resources/golden/union_remove_20-7-82f81adc097c247475fd29076e0cb85f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_20-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_20-9-683949ae07de12da0b7e7ba7f4450daa b/sql/hive/src/test/resources/golden/union_remove_20-9-683949ae07de12da0b7e7ba7f4450daa
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_21-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_21-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_21-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_21-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-10-e19469a1b70be25caaf670fe68f0a747 b/sql/hive/src/test/resources/golden/union_remove_21-10-e19469a1b70be25caaf670fe68f0a747
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_21-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index ce7e0438f9eff..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_21-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,26 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:54:32 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069293          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_21-12-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_21-12-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_21-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_21-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_21-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_21-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_21-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_21-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_21-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_21-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-6-aeba356b56f8659963d8b2dc07a84a6f b/sql/hive/src/test/resources/golden/union_remove_21-6-aeba356b56f8659963d8b2dc07a84a6f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-7-5716c408db679fb88352eaceb1703bd7 b/sql/hive/src/test/resources/golden/union_remove_21-7-5716c408db679fb88352eaceb1703bd7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_21-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_21-9-deadbce171926623b0586587fbbcd144 b/sql/hive/src/test/resources/golden/union_remove_21-9-deadbce171926623b0586587fbbcd144
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_22-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_22-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-10-403471d96c56b565272d2e4c4926d240 b/sql/hive/src/test/resources/golden/union_remove_22-10-403471d96c56b565272d2e4c4926d240
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_22-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index c261ce48bdfb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,28 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-values              	bigint              	None                
-values2             	bigint              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:54:58 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069319          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-12-68e15b1729669c4cb2291dcabfea4387 b/sql/hive/src/test/resources/golden/union_remove_22-12-68e15b1729669c4cb2291dcabfea4387
deleted file mode 100644
index d36ac6b834792..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-12-68e15b1729669c4cb2291dcabfea4387
+++ /dev/null
@@ -1,10 +0,0 @@
-1	1	1
-2	1	1
-3	1	1
-7	1	1
-8	2	2
-1	1	1
-2	1	1
-3	1	1
-7	1	1
-8	2	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-13-a28b876b5df29140ef2bf62b4d0de3fd b/sql/hive/src/test/resources/golden/union_remove_22-13-a28b876b5df29140ef2bf62b4d0de3fd
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-14-8eb6d4bed951caa7ba9fe6b3a83d76af b/sql/hive/src/test/resources/golden/union_remove_22-14-8eb6d4bed951caa7ba9fe6b3a83d76af
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-15-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_22-15-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-15-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_22-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_22-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_22-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_22-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_22-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-6-722acd65700dd132dc6b4bc8c56f4ce0 b/sql/hive/src/test/resources/golden/union_remove_22-6-722acd65700dd132dc6b4bc8c56f4ce0
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-7-46da090f5a2c73b175207cf63ff46653 b/sql/hive/src/test/resources/golden/union_remove_22-7-46da090f5a2c73b175207cf63ff46653
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_22-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_22-9-b3edbbee0543ff268db4059afb9cb2cb b/sql/hive/src/test/resources/golden/union_remove_22-9-b3edbbee0543ff268db4059afb9cb2cb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_23-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_23-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_23-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_23-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-10-613ce50facecdc8d7bf8806a8ff17c13 b/sql/hive/src/test/resources/golden/union_remove_23-10-613ce50facecdc8d7bf8806a8ff17c13
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_23-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 50297abd4e9a6..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_23-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-values              	bigint              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:55:20 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069345          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_23-12-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_23-12-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_23-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_23-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_23-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_23-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_23-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_23-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_23-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_23-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-6-b4e2e8cf42d1f3d4f54615a7b3d4c9a7 b/sql/hive/src/test/resources/golden/union_remove_23-6-b4e2e8cf42d1f3d4f54615a7b3d4c9a7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-7-48f70528347f5201f387d28dae37a14a b/sql/hive/src/test/resources/golden/union_remove_23-7-48f70528347f5201f387d28dae37a14a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_23-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_23-9-2dca12ca51c74540e7cdbbb05e336ed5 b/sql/hive/src/test/resources/golden/union_remove_23-9-2dca12ca51c74540e7cdbbb05e336ed5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_24-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_24-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_24-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_24-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-10-6d89089b1eead05510dbccad5fcc4805 b/sql/hive/src/test/resources/golden/union_remove_24-10-6d89089b1eead05510dbccad5fcc4805
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_24-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index f3c452ebe9284..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_24-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	double              	None                
-values              	bigint              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:55:49 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069371          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_24-12-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_24-12-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_24-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_24-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_24-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_24-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_24-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_24-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_24-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_24-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-6-72ae7a9640ae611f61ac954ac1a4b682 b/sql/hive/src/test/resources/golden/union_remove_24-6-72ae7a9640ae611f61ac954ac1a4b682
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-7-44a7b0f14b4f5151c37498367ad7fe1e b/sql/hive/src/test/resources/golden/union_remove_24-7-44a7b0f14b4f5151c37498367ad7fe1e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_24-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_24-9-4c0550cc9c28de25993c1f98de39168f b/sql/hive/src/test/resources/golden/union_remove_24-9-4c0550cc9c28de25993c1f98de39168f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_3-0-3c29684bfd2df7439ee0551eb42cfa0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-0-3c29684bfd2df7439ee0551eb42cfa0
+++ b/sql/hive/src/test/resources/golden/union_remove_3-0-3c29684bfd2df7439ee0551eb42cfa0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_3-1-16a6a293f1d2ce481b1d2482b1d5787c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ b/sql/hive/src/test/resources/golden/union_remove_3-1-16a6a293f1d2ce481b1d2482b1d5787c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_3-11-ea111d286c70e4a0c6a68a7420dc7b7
index d46af80f0d1ae..8cd7d42b20df5 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ b/sql/hive/src/test/resources/golden/union_remove_3-11-ea111d286c70e4a0c6a68a7420dc7b7
@@ -1,19 +1,24 @@
 # col_name            	data_type           	comment             
 	 	 
-key                 	string              	None                
-values              	bigint              	None                
+key                 	string              	                    
+values              	bigint              	                    
 	 	 
 # Detailed Table Information	 	 
 Database:           	default             	 
 Owner:              	marmbrus            	 
-CreateTime:         	Fri Jan 03 19:09:55 PST 2014	 
+CreateTime:         	Tue Oct 21 06:00:02 PDT 2014	 
 LastAccessTime:     	UNKNOWN             	 
 Protect Mode:       	None                	 
 Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse5393816597631538368/outputtbl1	 
+Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sparkHiveWarehouse1201055597819413730/outputtbl1	 
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	transient_lastDdlTime	1388805002          
+	COLUMN_STATS_ACCURATE	false               
+	numFiles            	1                   
+	numRows             	-1                  
+	rawDataSize         	-1                  
+	totalSize           	72                  
+	transient_lastDdlTime	1413896409          
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
@@ -24,4 +29,4 @@ Num Buckets:        	-1
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
-	serialization.format	1                   
\ No newline at end of file
+	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_3-12-43d53504df013e6b35f81811138a167a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-12-43d53504df013e6b35f81811138a167a
+++ b/sql/hive/src/test/resources/golden/union_remove_3-12-43d53504df013e6b35f81811138a167a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-13-e409e7032445097ace016b1876d95b3e b/sql/hive/src/test/resources/golden/union_remove_3-13-e409e7032445097ace016b1876d95b3e
index 2817d74afee13..8930b40bed84f 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-13-e409e7032445097ace016b1876d95b3e
+++ b/sql/hive/src/test/resources/golden/union_remove_3-13-e409e7032445097ace016b1876d95b3e
@@ -15,4 +15,4 @@
 8	2
 8	2
 8	3
-8	3
\ No newline at end of file
+8	3
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_3-2-cafed8ca348b243372b9114910be1557
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-2-cafed8ca348b243372b9114910be1557
+++ b/sql/hive/src/test/resources/golden/union_remove_3-2-cafed8ca348b243372b9114910be1557
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_3-3-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-3-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/union_remove_3-3-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_3-4-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-4-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/union_remove_3-4-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_3-5-a2a411ad6620aa1ab24550ade336e785
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_3-5-a2a411ad6620aa1ab24550ade336e785
+++ b/sql/hive/src/test/resources/golden/union_remove_3-5-a2a411ad6620aa1ab24550ade336e785
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_3-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_3-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input3_limit-4-70dad45d534146923fce88b2ffb99b0d b/sql/hive/src/test/resources/golden/union_remove_3-8-94da21f150ed2c56046b80e46da8884d
similarity index 100%
rename from sql/hive/src/test/resources/golden/input3_limit-4-70dad45d534146923fce88b2ffb99b0d
rename to sql/hive/src/test/resources/golden/union_remove_3-8-94da21f150ed2c56046b80e46da8884d
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_4-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_4-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-10-19865a08066d80cb069ae6312c465ee6 b/sql/hive/src/test/resources/golden/union_remove_4-10-19865a08066d80cb069ae6312c465ee6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-11-3ef350a0f7bbc1c54774e33dd54d9e46 b/sql/hive/src/test/resources/golden/union_remove_4-11-3ef350a0f7bbc1c54774e33dd54d9e46
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-12-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_4-12-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 67faa5b13d984..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-12-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-values              	bigint              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:56:16 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069398          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-13-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_4-13-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-13-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_4-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-3-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_4-3-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-3-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-4-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_4-4-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-4-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_4-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-6-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_4-6-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_4-6-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-7-90fcd4eb330919ad92aecb8a5bf30ead b/sql/hive/src/test/resources/golden/union_remove_4-7-90fcd4eb330919ad92aecb8a5bf30ead
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-8-48f70528347f5201f387d28dae37a14a b/sql/hive/src/test/resources/golden/union_remove_4-8-48f70528347f5201f387d28dae37a14a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_4-9-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_4-9-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_5-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_5-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-10-4a9974900fb5bc2fc8e5d614f5026c0f b/sql/hive/src/test/resources/golden/union_remove_5-10-4a9974900fb5bc2fc8e5d614f5026c0f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-11-2309570010c3e679b884c100de57d002 b/sql/hive/src/test/resources/golden/union_remove_5-11-2309570010c3e679b884c100de57d002
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-12-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_5-12-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 19aeb0b602859..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-12-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	None                
-values              	bigint              	None                
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:56:42 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069430          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
-InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-13-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_5-13-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-13-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_5-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-3-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_5-3-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-3-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-4-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_5-4-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-4-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-5-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_5-5-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-5-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-6-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_5-6-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_5-6-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-7-4da6ca94af4caf4426e5676a32b70375 b/sql/hive/src/test/resources/golden/union_remove_5-7-4da6ca94af4caf4426e5676a32b70375
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-8-48f70528347f5201f387d28dae37a14a b/sql/hive/src/test/resources/golden/union_remove_5-8-48f70528347f5201f387d28dae37a14a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_5-9-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_5-9-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_6-0-3c29684bfd2df7439ee0551eb42cfa0
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-0-3c29684bfd2df7439ee0551eb42cfa0
+++ b/sql/hive/src/test/resources/golden/union_remove_6-0-3c29684bfd2df7439ee0551eb42cfa0
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_6-1-16a6a293f1d2ce481b1d2482b1d5787c
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ b/sql/hive/src/test/resources/golden/union_remove_6-1-16a6a293f1d2ce481b1d2482b1d5787c
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_6-12-43d53504df013e6b35f81811138a167a
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-12-43d53504df013e6b35f81811138a167a
+++ b/sql/hive/src/test/resources/golden/union_remove_6-12-43d53504df013e6b35f81811138a167a
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-13-e409e7032445097ace016b1876d95b3e b/sql/hive/src/test/resources/golden/union_remove_6-13-e409e7032445097ace016b1876d95b3e
index 8e7ee8a2b47bb..b4ea93dc7dd89 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-13-e409e7032445097ace016b1876d95b3e
+++ b/sql/hive/src/test/resources/golden/union_remove_6-13-e409e7032445097ace016b1876d95b3e
@@ -7,4 +7,4 @@
 7	1
 7	1
 8	2
-8	2
\ No newline at end of file
+8	2
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-14-f74b28904e86047150396bc42680ca38 b/sql/hive/src/test/resources/golden/union_remove_6-14-f74b28904e86047150396bc42680ca38
index 8e7ee8a2b47bb..b4ea93dc7dd89 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-14-f74b28904e86047150396bc42680ca38
+++ b/sql/hive/src/test/resources/golden/union_remove_6-14-f74b28904e86047150396bc42680ca38
@@ -7,4 +7,4 @@
 7	1
 7	1
 8	2
-8	2
\ No newline at end of file
+8	2
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_6-2-cafed8ca348b243372b9114910be1557
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-2-cafed8ca348b243372b9114910be1557
+++ b/sql/hive/src/test/resources/golden/union_remove_6-2-cafed8ca348b243372b9114910be1557
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_6-3-dc129f70e75cd575ce8c0de288884523
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-3-dc129f70e75cd575ce8c0de288884523
+++ b/sql/hive/src/test/resources/golden/union_remove_6-3-dc129f70e75cd575ce8c0de288884523
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_6-4-a572a07cd60fd4607ddd7613db8a64ab
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-4-a572a07cd60fd4607ddd7613db8a64ab
+++ b/sql/hive/src/test/resources/golden/union_remove_6-4-a572a07cd60fd4607ddd7613db8a64ab
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_6-5-a2a411ad6620aa1ab24550ade336e785
index c227083464fb9..573541ac9702d 100644
--- a/sql/hive/src/test/resources/golden/union_remove_6-5-a2a411ad6620aa1ab24550ade336e785
+++ b/sql/hive/src/test/resources/golden/union_remove_6-5-a2a411ad6620aa1ab24550ade336e785
@@ -1 +1 @@
-0
\ No newline at end of file
+0
diff --git a/sql/hive/src/test/resources/golden/union_remove_6-9-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_6-9-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input3_limit-5-3664b564747487df13a5d109837219b5 b/sql/hive/src/test/resources/golden/union_remove_6-9-94da21f150ed2c56046b80e46da8884d
similarity index 100%
rename from sql/hive/src/test/resources/golden/input3_limit-5-3664b564747487df13a5d109837219b5
rename to sql/hive/src/test/resources/golden/union_remove_6-9-94da21f150ed2c56046b80e46da8884d
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_7-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_7-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_7-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_7-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-10-3ef350a0f7bbc1c54774e33dd54d9e46 b/sql/hive/src/test/resources/golden/union_remove_7-10-3ef350a0f7bbc1c54774e33dd54d9e46
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_7-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 1c63b3851aad2..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_7-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:57:18 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069463          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_7-12-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_7-12-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_7-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_7-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_7-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_7-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_7-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_7-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_7-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_7-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-6-3744f0ebf5c002fdfcec67fbce03dfca b/sql/hive/src/test/resources/golden/union_remove_7-6-3744f0ebf5c002fdfcec67fbce03dfca
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-7-68f30e8e5c05bbedbda95d88ab6f3ee2 b/sql/hive/src/test/resources/golden/union_remove_7-7-68f30e8e5c05bbedbda95d88ab6f3ee2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_7-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_7-9-19865a08066d80cb069ae6312c465ee6 b/sql/hive/src/test/resources/golden/union_remove_7-9-19865a08066d80cb069ae6312c465ee6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_8-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_8-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_8-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_8-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-10-2309570010c3e679b884c100de57d002 b/sql/hive/src/test/resources/golden/union_remove_8-10-2309570010c3e679b884c100de57d002
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-11-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_8-11-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index f718ab2990cdd..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_8-11-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:57:48 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069495          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-12-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_8-12-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_8-12-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_8-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_8-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-3-dc129f70e75cd575ce8c0de288884523 b/sql/hive/src/test/resources/golden/union_remove_8-3-dc129f70e75cd575ce8c0de288884523
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_8-3-dc129f70e75cd575ce8c0de288884523
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-4-a572a07cd60fd4607ddd7613db8a64ab b/sql/hive/src/test/resources/golden/union_remove_8-4-a572a07cd60fd4607ddd7613db8a64ab
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_8-4-a572a07cd60fd4607ddd7613db8a64ab
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-5-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_8-5-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_8-5-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-6-895f2432b4be6fcb11641c1d063570ee b/sql/hive/src/test/resources/golden/union_remove_8-6-895f2432b4be6fcb11641c1d063570ee
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-7-68f30e8e5c05bbedbda95d88ab6f3ee2 b/sql/hive/src/test/resources/golden/union_remove_8-7-68f30e8e5c05bbedbda95d88ab6f3ee2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-8-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_8-8-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_8-9-4a9974900fb5bc2fc8e5d614f5026c0f b/sql/hive/src/test/resources/golden/union_remove_8-9-4a9974900fb5bc2fc8e5d614f5026c0f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-0-3c29684bfd2df7439ee0551eb42cfa0 b/sql/hive/src/test/resources/golden/union_remove_9-0-3c29684bfd2df7439ee0551eb42cfa0
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-0-3c29684bfd2df7439ee0551eb42cfa0
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-1-16a6a293f1d2ce481b1d2482b1d5787c b/sql/hive/src/test/resources/golden/union_remove_9-1-16a6a293f1d2ce481b1d2482b1d5787c
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-1-16a6a293f1d2ce481b1d2482b1d5787c
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-10-12cf3335c756f8715a07c5a604f10f64 b/sql/hive/src/test/resources/golden/union_remove_9-10-12cf3335c756f8715a07c5a604f10f64
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-11-4e84cd589eceda668833f8f19ec28e7c b/sql/hive/src/test/resources/golden/union_remove_9-11-4e84cd589eceda668833f8f19ec28e7c
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-12-ea111d286c70e4a0c6a68a7420dc7b7 b/sql/hive/src/test/resources/golden/union_remove_9-12-ea111d286c70e4a0c6a68a7420dc7b7
deleted file mode 100644
index 65a91a6027545..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-12-ea111d286c70e4a0c6a68a7420dc7b7
+++ /dev/null
@@ -1,27 +0,0 @@
-# col_name            	data_type           	comment             
-	 	 
-key                 	string              	from deserializer   
-values              	bigint              	from deserializer   
-	 	 
-# Detailed Table Information	 	 
-Database:           	default             	 
-Owner:              	marmbrus            	 
-CreateTime:         	Mon Feb 10 13:58:19 PST 2014	 
-LastAccessTime:     	UNKNOWN             	 
-Protect Mode:       	None                	 
-Retention:          	0                   	 
-Location:           	file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse561133087079823206/outputtbl1	 
-Table Type:         	MANAGED_TABLE       	 
-Table Parameters:	 	 
-	transient_lastDdlTime	1392069518          
-	 	 
-# Storage Information	 	 
-SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe	 
-InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
-OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
-Compressed:         	No                  	 
-Num Buckets:        	-1                  	 
-Bucket Columns:     	[]                  	 
-Sort Columns:       	[]                  	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-13-43d53504df013e6b35f81811138a167a b/sql/hive/src/test/resources/golden/union_remove_9-13-43d53504df013e6b35f81811138a167a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-13-43d53504df013e6b35f81811138a167a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-2-cafed8ca348b243372b9114910be1557 b/sql/hive/src/test/resources/golden/union_remove_9-2-cafed8ca348b243372b9114910be1557
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-2-cafed8ca348b243372b9114910be1557
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-3-b12e5c70d6d29757471b900b6160fa8a b/sql/hive/src/test/resources/golden/union_remove_9-3-b12e5c70d6d29757471b900b6160fa8a
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-3-b12e5c70d6d29757471b900b6160fa8a
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-4-593999fae618b6b38322bc9ae4e0c027 b/sql/hive/src/test/resources/golden/union_remove_9-4-593999fae618b6b38322bc9ae4e0c027
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-4-593999fae618b6b38322bc9ae4e0c027
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-5-6f53d5613262d393d82d159ec5dc16dc b/sql/hive/src/test/resources/golden/union_remove_9-5-6f53d5613262d393d82d159ec5dc16dc
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-5-6f53d5613262d393d82d159ec5dc16dc
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-6-a2a411ad6620aa1ab24550ade336e785 b/sql/hive/src/test/resources/golden/union_remove_9-6-a2a411ad6620aa1ab24550ade336e785
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_remove_9-6-a2a411ad6620aa1ab24550ade336e785
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-7-f77ac921b27860fac94cac6b352f3eb5 b/sql/hive/src/test/resources/golden/union_remove_9-7-f77ac921b27860fac94cac6b352f3eb5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-8-68f30e8e5c05bbedbda95d88ab6f3ee2 b/sql/hive/src/test/resources/golden/union_remove_9-8-68f30e8e5c05bbedbda95d88ab6f3ee2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_remove_9-9-8ff0bb1bf3da91b51d37923f1876be0e b/sql/hive/src/test/resources/golden/union_remove_9-9-8ff0bb1bf3da91b51d37923f1876be0e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_script-0-ca195b08d98d7f21fe93208499bf0ff6 b/sql/hive/src/test/resources/golden/union_script-0-ca195b08d98d7f21fe93208499bf0ff6
index 42d07ed10bad4..760e62cce0bba 100644
--- a/sql/hive/src/test/resources/golden/union_script-0-ca195b08d98d7f21fe93208499bf0ff6
+++ b/sql/hive/src/test/resources/golden/union_script-0-ca195b08d98d7f21fe93208499bf0ff6
@@ -497,4 +497,4 @@
 97
 97
 98
-98
\ No newline at end of file
+98
diff --git a/sql/hive/src/test/resources/golden/union_script-1-982cc6d7b98f8fb1055a10ef021e2769 b/sql/hive/src/test/resources/golden/union_script-1-982cc6d7b98f8fb1055a10ef021e2769
index 2f23db6a2ff4a..3bafdf359f6fa 100644
--- a/sql/hive/src/test/resources/golden/union_script-1-982cc6d7b98f8fb1055a10ef021e2769
+++ b/sql/hive/src/test/resources/golden/union_script-1-982cc6d7b98f8fb1055a10ef021e2769
@@ -997,4 +997,4 @@
 98
 98
 98
-98
\ No newline at end of file
+98
diff --git a/sql/hive/src/test/resources/golden/union_view-0-e56367a21517656c18a5bcfeecb4327d b/sql/hive/src/test/resources/golden/union_view-0-e56367a21517656c18a5bcfeecb4327d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_view-1-c790d4344144460224b0f02be7e137a8 b/sql/hive/src/test/resources/golden/union_view-1-c790d4344144460224b0f02be7e137a8
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_view-2-7e33b0744f57bdc8ebcd9d45348aef14 b/sql/hive/src/test/resources/golden/union_view-2-7e33b0744f57bdc8ebcd9d45348aef14
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_view-3-4a746bd076e063017c4d6a2f9218a6e4 b/sql/hive/src/test/resources/golden/union_view-3-4a746bd076e063017c4d6a2f9218a6e4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_view-4-d3d75f376f83b694b1dc62c46fa53f4e b/sql/hive/src/test/resources/golden/union_view-4-d3d75f376f83b694b1dc62c46fa53f4e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_view-5-5d56e4e800a2a2ea70bd1f2ac31f2245 b/sql/hive/src/test/resources/golden/union_view-5-5d56e4e800a2a2ea70bd1f2ac31f2245
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/union_view-6-c9d7dcde469d3b9a66965a64dd15e4ae b/sql/hive/src/test/resources/golden/union_view-6-c9d7dcde469d3b9a66965a64dd15e4ae
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_view-6-c9d7dcde469d3b9a66965a64dd15e4ae
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_view-7-3b03210f94ec40db9ab02620645014d1 b/sql/hive/src/test/resources/golden/union_view-7-3b03210f94ec40db9ab02620645014d1
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_view-7-3b03210f94ec40db9ab02620645014d1
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/union_view-8-35f48c7d6fa164bb84643657bc9280a8 b/sql/hive/src/test/resources/golden/union_view-8-35f48c7d6fa164bb84643657bc9280a8
deleted file mode 100644
index 573541ac9702d..0000000000000
--- a/sql/hive/src/test/resources/golden/union_view-8-35f48c7d6fa164bb84643657bc9280a8
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/sql/hive/src/test/resources/golden/varchar_2-3-a8c072e5b13997e9c79484b4af9d78da b/sql/hive/src/test/resources/golden/varchar_2-3-a8c072e5b13997e9c79484b4af9d78da
index 08661a3d7f794..6fc243e718c1b 100644
--- a/sql/hive/src/test/resources/golden/varchar_2-3-a8c072e5b13997e9c79484b4af9d78da
+++ b/sql/hive/src/test/resources/golden/varchar_2-3-a8c072e5b13997e9c79484b4af9d78da
@@ -2,4 +2,4 @@ val_0	0	3
 val_10	10	1
 val_100	200	2
 val_103	206	2
-val_104	208	2
\ No newline at end of file
+val_104	208	2
diff --git a/sql/hive/src/test/resources/golden/varchar_2-4-c7ac6a1b3e9416e42d2d1b56f017fe6f b/sql/hive/src/test/resources/golden/varchar_2-4-c7ac6a1b3e9416e42d2d1b56f017fe6f
index 08661a3d7f794..6fc243e718c1b 100644
--- a/sql/hive/src/test/resources/golden/varchar_2-4-c7ac6a1b3e9416e42d2d1b56f017fe6f
+++ b/sql/hive/src/test/resources/golden/varchar_2-4-c7ac6a1b3e9416e42d2d1b56f017fe6f
@@ -2,4 +2,4 @@ val_0	0	3
 val_10	10	1
 val_100	200	2
 val_103	206	2
-val_104	208	2
\ No newline at end of file
+val_104	208	2
diff --git a/sql/hive/src/test/resources/golden/varchar_2-5-96353c24b5f2b361f72e5c26b4135519 b/sql/hive/src/test/resources/golden/varchar_2-5-96353c24b5f2b361f72e5c26b4135519
index f0c744ca37d95..0052fe0c959c5 100644
--- a/sql/hive/src/test/resources/golden/varchar_2-5-96353c24b5f2b361f72e5c26b4135519
+++ b/sql/hive/src/test/resources/golden/varchar_2-5-96353c24b5f2b361f72e5c26b4135519
@@ -2,4 +2,4 @@ val_98	196	2
 val_97	194	2
 val_96	96	1
 val_95	190	2
-val_92	92	1
\ No newline at end of file
+val_92	92	1
diff --git a/sql/hive/src/test/resources/golden/varchar_2-6-2b62789d07b4044bc32190261bf3490f b/sql/hive/src/test/resources/golden/varchar_2-6-2b62789d07b4044bc32190261bf3490f
index f0c744ca37d95..0052fe0c959c5 100644
--- a/sql/hive/src/test/resources/golden/varchar_2-6-2b62789d07b4044bc32190261bf3490f
+++ b/sql/hive/src/test/resources/golden/varchar_2-6-2b62789d07b4044bc32190261bf3490f
@@ -2,4 +2,4 @@ val_98	196	2
 val_97	194	2
 val_96	96	1
 val_95	190	2
-val_92	92	1
\ No newline at end of file
+val_92	92	1
diff --git a/sql/hive/src/test/resources/golden/varchar_join1-10-1958143ee083437e87662cadb48c37ce b/sql/hive/src/test/resources/golden/varchar_join1-10-1958143ee083437e87662cadb48c37ce
index 35e7795b9c2a7..fa55ed96ee0b9 100644
--- a/sql/hive/src/test/resources/golden/varchar_join1-10-1958143ee083437e87662cadb48c37ce
+++ b/sql/hive/src/test/resources/golden/varchar_join1-10-1958143ee083437e87662cadb48c37ce
@@ -1,3 +1,3 @@
 1	abc	1	abc
 2	abc 	2	abc 
-3	 abc	3	 abc
\ No newline at end of file
+3	 abc	3	 abc
diff --git a/sql/hive/src/test/resources/golden/varchar_join1-11-a55f750032663f77066e4979dedea1c b/sql/hive/src/test/resources/golden/varchar_join1-11-a55f750032663f77066e4979dedea1c
index 35e7795b9c2a7..fa55ed96ee0b9 100644
--- a/sql/hive/src/test/resources/golden/varchar_join1-11-a55f750032663f77066e4979dedea1c
+++ b/sql/hive/src/test/resources/golden/varchar_join1-11-a55f750032663f77066e4979dedea1c
@@ -1,3 +1,3 @@
 1	abc	1	abc
 2	abc 	2	abc 
-3	 abc	3	 abc
\ No newline at end of file
+3	 abc	3	 abc
diff --git a/sql/hive/src/test/resources/golden/varchar_join1-6-6bb08c5baa913d9dc506aef65425ef b/sql/hive/src/test/resources/golden/varchar_join1-6-6bb08c5baa913d9dc506aef65425ef
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input4-2-b7f46eccd104e6ed1b29e2de45089f66 b/sql/hive/src/test/resources/golden/varchar_join1-6-ab4392aa5ff499ec43229425ff23e22f
similarity index 100%
rename from sql/hive/src/test/resources/golden/input4-2-b7f46eccd104e6ed1b29e2de45089f66
rename to sql/hive/src/test/resources/golden/varchar_join1-6-ab4392aa5ff499ec43229425ff23e22f
diff --git a/sql/hive/src/test/resources/golden/varchar_join1-7-341118dab140b17f0b9d2c7b101d1298 b/sql/hive/src/test/resources/golden/varchar_join1-7-341118dab140b17f0b9d2c7b101d1298
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input40-1-acb61ae95ffabcb4a8ea3444d704e6b5 b/sql/hive/src/test/resources/golden/varchar_join1-7-a01639290aaceb4b85aa6e44319f6386
similarity index 100%
rename from sql/hive/src/test/resources/golden/input40-1-acb61ae95ffabcb4a8ea3444d704e6b5
rename to sql/hive/src/test/resources/golden/varchar_join1-7-a01639290aaceb4b85aa6e44319f6386
diff --git a/sql/hive/src/test/resources/golden/varchar_join1-8-afe7304d94450481c01ddbaf6cc3f596 b/sql/hive/src/test/resources/golden/varchar_join1-8-afe7304d94450481c01ddbaf6cc3f596
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input40-4-f241eafbb8d5da3f9c1737aed7b4f94e b/sql/hive/src/test/resources/golden/varchar_join1-8-c05d7b534b51cecdc2ba2de4ce57ba37
similarity index 100%
rename from sql/hive/src/test/resources/golden/input40-4-f241eafbb8d5da3f9c1737aed7b4f94e
rename to sql/hive/src/test/resources/golden/varchar_join1-8-c05d7b534b51cecdc2ba2de4ce57ba37
diff --git a/sql/hive/src/test/resources/golden/varchar_join1-9-c152d3dc6495ef75e4872dea4b7e0f82 b/sql/hive/src/test/resources/golden/varchar_join1-9-c152d3dc6495ef75e4872dea4b7e0f82
index 35e7795b9c2a7..fa55ed96ee0b9 100644
--- a/sql/hive/src/test/resources/golden/varchar_join1-9-c152d3dc6495ef75e4872dea4b7e0f82
+++ b/sql/hive/src/test/resources/golden/varchar_join1-9-c152d3dc6495ef75e4872dea4b7e0f82
@@ -1,3 +1,3 @@
 1	abc	1	abc
 2	abc 	2	abc 
-3	 abc	3	 abc
\ No newline at end of file
+3	 abc	3	 abc
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-0-e41b0e9db7a9dbd0540e29df341933bc b/sql/hive/src/test/resources/golden/varchar_nested_types-0-e41b0e9db7a9dbd0540e29df341933bc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-1-87a28b0e800f051525899324a064b878 b/sql/hive/src/test/resources/golden/varchar_nested_types-1-87a28b0e800f051525899324a064b878
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-10-92f25849990eb777ac2711f9dd2e628 b/sql/hive/src/test/resources/golden/varchar_nested_types-10-92f25849990eb777ac2711f9dd2e628
deleted file mode 100644
index 05066db296756..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-10-92f25849990eb777ac2711f9dd2e628
+++ /dev/null
@@ -1 +0,0 @@
-c1                  	array<varchar(20)>  	None                
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-11-9780781a92fdd992f3cee080a8717238 b/sql/hive/src/test/resources/golden/varchar_nested_types-11-9780781a92fdd992f3cee080a8717238
deleted file mode 100644
index 05cd8fccf276c..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-11-9780781a92fdd992f3cee080a8717238
+++ /dev/null
@@ -1 +0,0 @@
-["val_0","val_0"]
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-12-e1a4006971319a352280fc52eabf449f b/sql/hive/src/test/resources/golden/varchar_nested_types-12-e1a4006971319a352280fc52eabf449f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-13-d4574217a243a7d506398a819cd0eab4 b/sql/hive/src/test/resources/golden/varchar_nested_types-13-d4574217a243a7d506398a819cd0eab4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-14-7c33a62195359bc89460ad65f6a5f763 b/sql/hive/src/test/resources/golden/varchar_nested_types-14-7c33a62195359bc89460ad65f6a5f763
deleted file mode 100644
index 1ec6828af9fd2..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-14-7c33a62195359bc89460ad65f6a5f763
+++ /dev/null
@@ -1 +0,0 @@
-c1                  	map<int,varchar(20)>	None                
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-15-d5e5cec4ce94e8928e69e3ffb8e0215 b/sql/hive/src/test/resources/golden/varchar_nested_types-15-d5e5cec4ce94e8928e69e3ffb8e0215
deleted file mode 100644
index 8f12062de4e27..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-15-d5e5cec4ce94e8928e69e3ffb8e0215
+++ /dev/null
@@ -1 +0,0 @@
-{0:"val_0"}
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-16-93811d146a429c44a2494b3aa4b2caa2 b/sql/hive/src/test/resources/golden/varchar_nested_types-16-93811d146a429c44a2494b3aa4b2caa2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-17-5724af3985c67a0cb69919c9bbce15dc b/sql/hive/src/test/resources/golden/varchar_nested_types-17-5724af3985c67a0cb69919c9bbce15dc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-18-d1be2ee3765a80469837ba11eb8685e1 b/sql/hive/src/test/resources/golden/varchar_nested_types-18-d1be2ee3765a80469837ba11eb8685e1
deleted file mode 100644
index 030d9d6ec0429..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-18-d1be2ee3765a80469837ba11eb8685e1
+++ /dev/null
@@ -1 +0,0 @@
-c1                  	struct<a:int,b:varchar(20),c:string>	None                
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-19-a54f9a284228e7cdce8c34b8094f2377 b/sql/hive/src/test/resources/golden/varchar_nested_types-19-a54f9a284228e7cdce8c34b8094f2377
deleted file mode 100644
index f350c1112ac2c..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-19-a54f9a284228e7cdce8c34b8094f2377
+++ /dev/null
@@ -1 +0,0 @@
-{"a":0,"b":"val_0","c":"val_0"}
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-2-ae69b82461acc2aa366d8b1f8626d6fb b/sql/hive/src/test/resources/golden/varchar_nested_types-2-ae69b82461acc2aa366d8b1f8626d6fb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-20-51a9279006f61097e68a52201daf6710 b/sql/hive/src/test/resources/golden/varchar_nested_types-20-51a9279006f61097e68a52201daf6710
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-21-1d304e09cc2a8b2824bfc04bdbc976ad b/sql/hive/src/test/resources/golden/varchar_nested_types-21-1d304e09cc2a8b2824bfc04bdbc976ad
deleted file mode 100644
index 030d9d6ec0429..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-21-1d304e09cc2a8b2824bfc04bdbc976ad
+++ /dev/null
@@ -1 +0,0 @@
-c1                  	struct<a:int,b:varchar(20),c:string>	None                
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-22-c2bf5acbe5ebc16eaf2388fd36a61fe8 b/sql/hive/src/test/resources/golden/varchar_nested_types-22-c2bf5acbe5ebc16eaf2388fd36a61fe8
deleted file mode 100644
index f350c1112ac2c..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-22-c2bf5acbe5ebc16eaf2388fd36a61fe8
+++ /dev/null
@@ -1 +0,0 @@
-{"a":0,"b":"val_0","c":"val_0"}
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-23-c7ea918777b725f2790da4fa00a3efa6 b/sql/hive/src/test/resources/golden/varchar_nested_types-23-c7ea918777b725f2790da4fa00a3efa6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-24-5a69236334a3a1c4e771206cf547d730 b/sql/hive/src/test/resources/golden/varchar_nested_types-24-5a69236334a3a1c4e771206cf547d730
deleted file mode 100644
index 030d9d6ec0429..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-24-5a69236334a3a1c4e771206cf547d730
+++ /dev/null
@@ -1 +0,0 @@
-c1                  	struct<a:int,b:varchar(20),c:string>	None                
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-25-61b030bb2220a533532d871ae0e08cdb b/sql/hive/src/test/resources/golden/varchar_nested_types-25-61b030bb2220a533532d871ae0e08cdb
deleted file mode 100644
index f350c1112ac2c..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_nested_types-25-61b030bb2220a533532d871ae0e08cdb
+++ /dev/null
@@ -1 +0,0 @@
-{"a":0,"b":"val_0","c":"val_0"}
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-26-e41b0e9db7a9dbd0540e29df341933bc b/sql/hive/src/test/resources/golden/varchar_nested_types-26-e41b0e9db7a9dbd0540e29df341933bc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-27-87a28b0e800f051525899324a064b878 b/sql/hive/src/test/resources/golden/varchar_nested_types-27-87a28b0e800f051525899324a064b878
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-28-ae69b82461acc2aa366d8b1f8626d6fb b/sql/hive/src/test/resources/golden/varchar_nested_types-28-ae69b82461acc2aa366d8b1f8626d6fb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-29-39d44d19c7963a9647fd3293eef670d4 b/sql/hive/src/test/resources/golden/varchar_nested_types-29-39d44d19c7963a9647fd3293eef670d4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-3-39d44d19c7963a9647fd3293eef670d4 b/sql/hive/src/test/resources/golden/varchar_nested_types-3-39d44d19c7963a9647fd3293eef670d4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-30-162806477075d97de16dfa6f2576b751 b/sql/hive/src/test/resources/golden/varchar_nested_types-30-162806477075d97de16dfa6f2576b751
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-31-5b28e1fdb28b365ef419008a4752ed53 b/sql/hive/src/test/resources/golden/varchar_nested_types-31-5b28e1fdb28b365ef419008a4752ed53
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-4-162806477075d97de16dfa6f2576b751 b/sql/hive/src/test/resources/golden/varchar_nested_types-4-162806477075d97de16dfa6f2576b751
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-5-5b28e1fdb28b365ef419008a4752ed53 b/sql/hive/src/test/resources/golden/varchar_nested_types-5-5b28e1fdb28b365ef419008a4752ed53
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-6-a47b99c355df4aad56dceb7f81a7fd5b b/sql/hive/src/test/resources/golden/varchar_nested_types-6-a47b99c355df4aad56dceb7f81a7fd5b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-7-8c483a7a0e148ca13a292a625f8702f1 b/sql/hive/src/test/resources/golden/varchar_nested_types-7-8c483a7a0e148ca13a292a625f8702f1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-8-94309e2b4e68ab7e25f7d9656f10b352 b/sql/hive/src/test/resources/golden/varchar_nested_types-8-94309e2b4e68ab7e25f7d9656f10b352
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_nested_types-9-f248796769bc7f57cf56a75034a45520 b/sql/hive/src/test/resources/golden/varchar_nested_types-9-f248796769bc7f57cf56a75034a45520
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-0-750a23ebdd77f32b555d4caba7ac5445 b/sql/hive/src/test/resources/golden/varchar_serde-0-750a23ebdd77f32b555d4caba7ac5445
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-1-6af2fafa0ffdb36eec00f28ed8e1d76d b/sql/hive/src/test/resources/golden/varchar_serde-1-6af2fafa0ffdb36eec00f28ed8e1d76d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-10-496280d9365ee601d9b68a91495d7160 b/sql/hive/src/test/resources/golden/varchar_serde-10-496280d9365ee601d9b68a91495d7160
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-11-2f4d5f574f659df2bd0fb97f2d50a36e b/sql/hive/src/test/resources/golden/varchar_serde-11-2f4d5f574f659df2bd0fb97f2d50a36e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-12-80727f22f1343407ba9200c86ed84280 b/sql/hive/src/test/resources/golden/varchar_serde-12-80727f22f1343407ba9200c86ed84280
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-13-4829e6e5822c73fb33ba3d619b4bd31e b/sql/hive/src/test/resources/golden/varchar_serde-13-4829e6e5822c73fb33ba3d619b4bd31e
deleted file mode 100644
index 3f02a574d3dd5..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-13-4829e6e5822c73fb33ba3d619b4bd31e
+++ /dev/null
@@ -1,5 +0,0 @@
-474	val_475
-62	val_63
-468	val_469
-272	val_273
-448	val_449
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-14-4794ee93811ce3a81f64c65aed0b8b13 b/sql/hive/src/test/resources/golden/varchar_serde-14-4794ee93811ce3a81f64c65aed0b8b13
deleted file mode 100644
index 1e2a6a6562290..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-14-4794ee93811ce3a81f64c65aed0b8b13
+++ /dev/null
@@ -1,5 +0,0 @@
-val_0	3
-val_1	2
-val_10	1
-val_100	2
-val_101	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-15-6ae634e1ae1aa0730a33396bce5a6604 b/sql/hive/src/test/resources/golden/varchar_serde-15-6ae634e1ae1aa0730a33396bce5a6604
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-16-516202183287d734d35d8c7788d22652 b/sql/hive/src/test/resources/golden/varchar_serde-16-516202183287d734d35d8c7788d22652
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-17-8d4419222a728e2bbc6a464b5a0b5f7a b/sql/hive/src/test/resources/golden/varchar_serde-17-8d4419222a728e2bbc6a464b5a0b5f7a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-18-b2dfd28d6a5b1e6f4556ad19755b739d b/sql/hive/src/test/resources/golden/varchar_serde-18-b2dfd28d6a5b1e6f4556ad19755b739d
deleted file mode 100644
index 3f02a574d3dd5..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-18-b2dfd28d6a5b1e6f4556ad19755b739d
+++ /dev/null
@@ -1,5 +0,0 @@
-474	val_475
-62	val_63
-468	val_469
-272	val_273
-448	val_449
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-19-f258df2db09e9cc0e049e85e6ad950ad b/sql/hive/src/test/resources/golden/varchar_serde-19-f258df2db09e9cc0e049e85e6ad950ad
deleted file mode 100644
index 1e2a6a6562290..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-19-f258df2db09e9cc0e049e85e6ad950ad
+++ /dev/null
@@ -1,5 +0,0 @@
-val_0	3
-val_1	2
-val_10	1
-val_100	2
-val_101	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-2-87ba3f40293b9c79fcdb3064d964232e b/sql/hive/src/test/resources/golden/varchar_serde-2-87ba3f40293b9c79fcdb3064d964232e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-20-dcbc6c836bd9b95be56dd0c9c3ea83d2 b/sql/hive/src/test/resources/golden/varchar_serde-20-dcbc6c836bd9b95be56dd0c9c3ea83d2
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-21-64536c77ae91bfb6cf7f93f178c6200b b/sql/hive/src/test/resources/golden/varchar_serde-21-64536c77ae91bfb6cf7f93f178c6200b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-22-e7fea2396e28d9db1dc9dea3aacc7b7 b/sql/hive/src/test/resources/golden/varchar_serde-22-e7fea2396e28d9db1dc9dea3aacc7b7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-23-d4dfc5f1e98b84b16b2b5b1d50917b70 b/sql/hive/src/test/resources/golden/varchar_serde-23-d4dfc5f1e98b84b16b2b5b1d50917b70
deleted file mode 100644
index 3f02a574d3dd5..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-23-d4dfc5f1e98b84b16b2b5b1d50917b70
+++ /dev/null
@@ -1,5 +0,0 @@
-474	val_475
-62	val_63
-468	val_469
-272	val_273
-448	val_449
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-24-770169b632b76cedcd6dfb87fdc46575 b/sql/hive/src/test/resources/golden/varchar_serde-24-770169b632b76cedcd6dfb87fdc46575
deleted file mode 100644
index 1e2a6a6562290..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-24-770169b632b76cedcd6dfb87fdc46575
+++ /dev/null
@@ -1,5 +0,0 @@
-val_0	3
-val_1	2
-val_10	1
-val_100	2
-val_101	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-25-3470a259b04e126c655531491787e2fc b/sql/hive/src/test/resources/golden/varchar_serde-25-3470a259b04e126c655531491787e2fc
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-26-55808e190e0ab81dcdc1feb52543ad9f b/sql/hive/src/test/resources/golden/varchar_serde-26-55808e190e0ab81dcdc1feb52543ad9f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-27-8fe526fdd347c25529a383f27ad20566 b/sql/hive/src/test/resources/golden/varchar_serde-27-8fe526fdd347c25529a383f27ad20566
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-28-5e4de93349ba89a8344bb799ad60678e b/sql/hive/src/test/resources/golden/varchar_serde-28-5e4de93349ba89a8344bb799ad60678e
deleted file mode 100644
index 3f02a574d3dd5..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-28-5e4de93349ba89a8344bb799ad60678e
+++ /dev/null
@@ -1,5 +0,0 @@
-474	val_475
-62	val_63
-468	val_469
-272	val_273
-448	val_449
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-29-42b8e7b82cd061dfd388fbf13c8f6b0c b/sql/hive/src/test/resources/golden/varchar_serde-29-42b8e7b82cd061dfd388fbf13c8f6b0c
deleted file mode 100644
index 1e2a6a6562290..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-29-42b8e7b82cd061dfd388fbf13c8f6b0c
+++ /dev/null
@@ -1,5 +0,0 @@
-val_0	3
-val_1	2
-val_10	1
-val_100	2
-val_101	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-3-d2a0276dbf0ab98a4aa5192b7c0f2709 b/sql/hive/src/test/resources/golden/varchar_serde-3-d2a0276dbf0ab98a4aa5192b7c0f2709
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-30-c7a29f8b2668b31bb34cafdc1e6f3c9a b/sql/hive/src/test/resources/golden/varchar_serde-30-c7a29f8b2668b31bb34cafdc1e6f3c9a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-31-db1e88efcd55aaef567edaa89c1c1e12 b/sql/hive/src/test/resources/golden/varchar_serde-31-db1e88efcd55aaef567edaa89c1c1e12
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-32-bffe130dfc0a3b6a072b5ebbaf092cfb b/sql/hive/src/test/resources/golden/varchar_serde-32-bffe130dfc0a3b6a072b5ebbaf092cfb
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-33-620729dc7661c22f1acdc425a7cf0364 b/sql/hive/src/test/resources/golden/varchar_serde-33-620729dc7661c22f1acdc425a7cf0364
deleted file mode 100644
index 3f02a574d3dd5..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-33-620729dc7661c22f1acdc425a7cf0364
+++ /dev/null
@@ -1,5 +0,0 @@
-474	val_475
-62	val_63
-468	val_469
-272	val_273
-448	val_449
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-34-807ee73e1cd66704dd585f7e0de954d9 b/sql/hive/src/test/resources/golden/varchar_serde-34-807ee73e1cd66704dd585f7e0de954d9
deleted file mode 100644
index 1e2a6a6562290..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-34-807ee73e1cd66704dd585f7e0de954d9
+++ /dev/null
@@ -1,5 +0,0 @@
-val_0	3
-val_1	2
-val_10	1
-val_100	2
-val_101	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-35-750a23ebdd77f32b555d4caba7ac5445 b/sql/hive/src/test/resources/golden/varchar_serde-35-750a23ebdd77f32b555d4caba7ac5445
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-36-6af2fafa0ffdb36eec00f28ed8e1d76d b/sql/hive/src/test/resources/golden/varchar_serde-36-6af2fafa0ffdb36eec00f28ed8e1d76d
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-37-87ba3f40293b9c79fcdb3064d964232e b/sql/hive/src/test/resources/golden/varchar_serde-37-87ba3f40293b9c79fcdb3064d964232e
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-38-d2a0276dbf0ab98a4aa5192b7c0f2709 b/sql/hive/src/test/resources/golden/varchar_serde-38-d2a0276dbf0ab98a4aa5192b7c0f2709
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-39-8b5f81c27c41807c757522e257a8003f b/sql/hive/src/test/resources/golden/varchar_serde-39-8b5f81c27c41807c757522e257a8003f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-4-8b5f81c27c41807c757522e257a8003f b/sql/hive/src/test/resources/golden/varchar_serde-4-8b5f81c27c41807c757522e257a8003f
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-40-787193a1679a2153c037d3e4c8192bba b/sql/hive/src/test/resources/golden/varchar_serde-40-787193a1679a2153c037d3e4c8192bba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-5-787193a1679a2153c037d3e4c8192bba b/sql/hive/src/test/resources/golden/varchar_serde-5-787193a1679a2153c037d3e4c8192bba
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-6-122f15d410249b554e12eccdfa46cc43 b/sql/hive/src/test/resources/golden/varchar_serde-6-122f15d410249b554e12eccdfa46cc43
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-7-81e68559f5ba5fe6d3c590ae75fdd810 b/sql/hive/src/test/resources/golden/varchar_serde-7-81e68559f5ba5fe6d3c590ae75fdd810
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-8-3bfa13085b001c1a333cc72d5c9f4244 b/sql/hive/src/test/resources/golden/varchar_serde-8-3bfa13085b001c1a333cc72d5c9f4244
deleted file mode 100644
index 3f02a574d3dd5..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-8-3bfa13085b001c1a333cc72d5c9f4244
+++ /dev/null
@@ -1,5 +0,0 @@
-474	val_475
-62	val_63
-468	val_469
-272	val_273
-448	val_449
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_serde-9-ec43be632e5f74057aba54c4f562c601 b/sql/hive/src/test/resources/golden/varchar_serde-9-ec43be632e5f74057aba54c4f562c601
deleted file mode 100644
index 1e2a6a6562290..0000000000000
--- a/sql/hive/src/test/resources/golden/varchar_serde-9-ec43be632e5f74057aba54c4f562c601
+++ /dev/null
@@ -1,5 +0,0 @@
-val_0	3
-val_1	2
-val_10	1
-val_100	2
-val_101	2
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/varchar_union1-10-6ec48d5fea3e4a35275956b9b4467715 b/sql/hive/src/test/resources/golden/varchar_union1-10-6ec48d5fea3e4a35275956b9b4467715
index 173eb955a485a..52bb2199e16a3 100644
--- a/sql/hive/src/test/resources/golden/varchar_union1-10-6ec48d5fea3e4a35275956b9b4467715
+++ b/sql/hive/src/test/resources/golden/varchar_union1-10-6ec48d5fea3e4a35275956b9b4467715
@@ -1,4 +1,4 @@
 1	abc
 1	abc
 2	abc 
-3	 abc
\ No newline at end of file
+3	 abc
diff --git a/sql/hive/src/test/resources/golden/varchar_union1-11-78f6e219b974e1fdf3663e46f57892a9 b/sql/hive/src/test/resources/golden/varchar_union1-11-78f6e219b974e1fdf3663e46f57892a9
index 173eb955a485a..52bb2199e16a3 100644
--- a/sql/hive/src/test/resources/golden/varchar_union1-11-78f6e219b974e1fdf3663e46f57892a9
+++ b/sql/hive/src/test/resources/golden/varchar_union1-11-78f6e219b974e1fdf3663e46f57892a9
@@ -1,4 +1,4 @@
 1	abc
 1	abc
 2	abc 
-3	 abc
\ No newline at end of file
+3	 abc
diff --git a/sql/hive/src/test/resources/golden/varchar_union1-6-67e66fa14dddc17757436539eca9ef64 b/sql/hive/src/test/resources/golden/varchar_union1-6-67e66fa14dddc17757436539eca9ef64
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input40-6-93a0c37189dfe2318ba6ad84616f0d64 b/sql/hive/src/test/resources/golden/varchar_union1-6-f338f341c5f86d0a44cabfb4f7bddc3b
similarity index 100%
rename from sql/hive/src/test/resources/golden/input40-6-93a0c37189dfe2318ba6ad84616f0d64
rename to sql/hive/src/test/resources/golden/varchar_union1-6-f338f341c5f86d0a44cabfb4f7bddc3b
diff --git a/sql/hive/src/test/resources/golden/varchar_union1-7-48766d09c5ed1b6abe9ce0b8996adf36 b/sql/hive/src/test/resources/golden/varchar_union1-7-48766d09c5ed1b6abe9ce0b8996adf36
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input43-0-2baba8070f3585debc14b6bb3c83607a b/sql/hive/src/test/resources/golden/varchar_union1-7-ea0d1fbae997b50dc34f7610480bbe29
similarity index 100%
rename from sql/hive/src/test/resources/golden/input43-0-2baba8070f3585debc14b6bb3c83607a
rename to sql/hive/src/test/resources/golden/varchar_union1-7-ea0d1fbae997b50dc34f7610480bbe29
diff --git a/sql/hive/src/test/resources/golden/varchar_union1-8-3a6c2e0125fb0c18f5f783b5d3c409d7 b/sql/hive/src/test/resources/golden/varchar_union1-8-3a6c2e0125fb0c18f5f783b5d3c409d7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input46-0-b0cdbecce0321ac452c8e13e1bfc6924 b/sql/hive/src/test/resources/golden/varchar_union1-8-f3be9a2498927d692356c2cf871d25bf
similarity index 100%
rename from sql/hive/src/test/resources/golden/input46-0-b0cdbecce0321ac452c8e13e1bfc6924
rename to sql/hive/src/test/resources/golden/varchar_union1-8-f3be9a2498927d692356c2cf871d25bf
diff --git a/sql/hive/src/test/resources/golden/varchar_union1-9-b9455ffec62df97cfec63204ce02a110 b/sql/hive/src/test/resources/golden/varchar_union1-9-b9455ffec62df97cfec63204ce02a110
index 173eb955a485a..52bb2199e16a3 100644
--- a/sql/hive/src/test/resources/golden/varchar_union1-9-b9455ffec62df97cfec63204ce02a110
+++ b/sql/hive/src/test/resources/golden/varchar_union1-9-b9455ffec62df97cfec63204ce02a110
@@ -1,4 +1,4 @@
 1	abc
 1	abc
 2	abc 
-3	 abc
\ No newline at end of file
+3	 abc
diff --git a/sql/hive/src/test/resources/golden/view-16-3077fd708f97a03d4151a1a30e4308d8 b/sql/hive/src/test/resources/golden/view-16-3077fd708f97a03d4151a1a30e4308d8
new file mode 100644
index 0000000000000..7aae61e5eb82f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view-16-3077fd708f97a03d4151a1a30e4308d8
@@ -0,0 +1,500 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+302	val_302
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/view-17-544b822e12afa24d2c64d6149e19c12c b/sql/hive/src/test/resources/golden/view-17-544b822e12afa24d2c64d6149e19c12c
new file mode 100644
index 0000000000000..7aae61e5eb82f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view-17-544b822e12afa24d2c64d6149e19c12c
@@ -0,0 +1,500 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+302	val_302
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/view-18-cd6a123a71769b082000669292e57add b/sql/hive/src/test/resources/golden/view-18-cd6a123a71769b082000669292e57add
new file mode 100644
index 0000000000000..68782c7a277bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view-18-cd6a123a71769b082000669292e57add
@@ -0,0 +1,1028 @@
+238	val_238	238
+238	val_238	238
+86	val_86	86
+311	val_311	311
+311	val_311	311
+311	val_311	311
+27	val_27	27
+165	val_165	165
+165	val_165	165
+409	val_409	409
+409	val_409	409
+409	val_409	409
+255	val_255	255
+255	val_255	255
+278	val_278	278
+278	val_278	278
+98	val_98	98
+98	val_98	98
+484	val_484	484
+265	val_265	265
+265	val_265	265
+193	val_193	193
+193	val_193	193
+193	val_193	193
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+150	val_150	150
+273	val_273	273
+273	val_273	273
+273	val_273	273
+224	val_224	224
+224	val_224	224
+369	val_369	369
+369	val_369	369
+369	val_369	369
+66	val_66	66
+128	val_128	128
+128	val_128	128
+128	val_128	128
+213	val_213	213
+213	val_213	213
+146	val_146	146
+146	val_146	146
+406	val_406	406
+406	val_406	406
+406	val_406	406
+406	val_406	406
+429	val_429	429
+429	val_429	429
+374	val_374	374
+152	val_152	152
+152	val_152	152
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+145	val_145	145
+495	val_495	495
+37	val_37	37
+37	val_37	37
+327	val_327	327
+327	val_327	327
+327	val_327	327
+281	val_281	281
+281	val_281	281
+277	val_277	277
+277	val_277	277
+277	val_277	277
+277	val_277	277
+209	val_209	209
+209	val_209	209
+15	val_15	15
+15	val_15	15
+82	val_82	82
+403	val_403	403
+403	val_403	403
+403	val_403	403
+166	val_166	166
+417	val_417	417
+417	val_417	417
+417	val_417	417
+430	val_430	430
+430	val_430	430
+430	val_430	430
+252	val_252	252
+292	val_292	292
+219	val_219	219
+219	val_219	219
+287	val_287	287
+153	val_153	153
+193	val_193	193
+193	val_193	193
+193	val_193	193
+338	val_338	338
+446	val_446	446
+459	val_459	459
+459	val_459	459
+394	val_394	394
+237	val_237	237
+237	val_237	237
+482	val_482	482
+174	val_174	174
+174	val_174	174
+413	val_413	413
+413	val_413	413
+494	val_494	494
+207	val_207	207
+207	val_207	207
+199	val_199	199
+199	val_199	199
+199	val_199	199
+466	val_466	466
+466	val_466	466
+466	val_466	466
+208	val_208	208
+208	val_208	208
+208	val_208	208
+174	val_174	174
+174	val_174	174
+399	val_399	399
+399	val_399	399
+396	val_396	396
+396	val_396	396
+396	val_396	396
+247	val_247	247
+417	val_417	417
+417	val_417	417
+417	val_417	417
+489	val_489	489
+489	val_489	489
+489	val_489	489
+489	val_489	489
+162	val_162	162
+377	val_377	377
+397	val_397	397
+397	val_397	397
+309	val_309	309
+309	val_309	309
+365	val_365	365
+266	val_266	266
+439	val_439	439
+439	val_439	439
+342	val_342	342
+342	val_342	342
+367	val_367	367
+367	val_367	367
+325	val_325	325
+325	val_325	325
+167	val_167	167
+167	val_167	167
+167	val_167	167
+195	val_195	195
+195	val_195	195
+475	val_475	475
+17	val_17	17
+113	val_113	113
+113	val_113	113
+155	val_155	155
+203	val_203	203
+203	val_203	203
+339	val_339	339
+0	val_0	0
+0	val_0	0
+0	val_0	0
+455	val_455	455
+128	val_128	128
+128	val_128	128
+128	val_128	128
+311	val_311	311
+311	val_311	311
+311	val_311	311
+316	val_316	316
+316	val_316	316
+316	val_316	316
+57	val_57	57
+302	val_302	302
+205	val_205	205
+205	val_205	205
+149	val_149	149
+149	val_149	149
+438	val_438	438
+438	val_438	438
+438	val_438	438
+345	val_345	345
+129	val_129	129
+129	val_129	129
+170	val_170	170
+20	val_20	20
+489	val_489	489
+489	val_489	489
+489	val_489	489
+489	val_489	489
+157	val_157	157
+378	val_378	378
+221	val_221	221
+221	val_221	221
+92	val_92	92
+111	val_111	111
+47	val_47	47
+72	val_72	72
+72	val_72	72
+4	val_4	4
+280	val_280	280
+280	val_280	280
+35	val_35	35
+35	val_35	35
+35	val_35	35
+427	val_427	427
+277	val_277	277
+277	val_277	277
+277	val_277	277
+277	val_277	277
+208	val_208	208
+208	val_208	208
+208	val_208	208
+356	val_356	356
+399	val_399	399
+399	val_399	399
+169	val_169	169
+169	val_169	169
+169	val_169	169
+169	val_169	169
+382	val_382	382
+382	val_382	382
+498	val_498	498
+498	val_498	498
+498	val_498	498
+125	val_125	125
+125	val_125	125
+386	val_386	386
+437	val_437	437
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+192	val_192	192
+286	val_286	286
+187	val_187	187
+187	val_187	187
+187	val_187	187
+176	val_176	176
+176	val_176	176
+54	val_54	54
+459	val_459	459
+459	val_459	459
+51	val_51	51
+51	val_51	51
+138	val_138	138
+138	val_138	138
+138	val_138	138
+138	val_138	138
+103	val_103	103
+103	val_103	103
+239	val_239	239
+239	val_239	239
+213	val_213	213
+213	val_213	213
+216	val_216	216
+216	val_216	216
+430	val_430	430
+430	val_430	430
+430	val_430	430
+278	val_278	278
+278	val_278	278
+176	val_176	176
+176	val_176	176
+289	val_289	289
+221	val_221	221
+221	val_221	221
+65	val_65	65
+318	val_318	318
+318	val_318	318
+318	val_318	318
+332	val_332	332
+311	val_311	311
+311	val_311	311
+311	val_311	311
+275	val_275	275
+137	val_137	137
+137	val_137	137
+241	val_241	241
+83	val_83	83
+83	val_83	83
+333	val_333	333
+333	val_333	333
+180	val_180	180
+284	val_284	284
+12	val_12	12
+12	val_12	12
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+181	val_181	181
+67	val_67	67
+67	val_67	67
+260	val_260	260
+404	val_404	404
+404	val_404	404
+384	val_384	384
+384	val_384	384
+384	val_384	384
+489	val_489	489
+489	val_489	489
+489	val_489	489
+489	val_489	489
+353	val_353	353
+353	val_353	353
+373	val_373	373
+272	val_272	272
+272	val_272	272
+138	val_138	138
+138	val_138	138
+138	val_138	138
+138	val_138	138
+217	val_217	217
+217	val_217	217
+84	val_84	84
+84	val_84	84
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+466	val_466	466
+466	val_466	466
+466	val_466	466
+58	val_58	58
+58	val_58	58
+8	val_8	8
+411	val_411	411
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+208	val_208	208
+208	val_208	208
+208	val_208	208
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+24	val_24	24
+24	val_24	24
+463	val_463	463
+463	val_463	463
+431	val_431	431
+431	val_431	431
+431	val_431	431
+179	val_179	179
+179	val_179	179
+172	val_172	172
+172	val_172	172
+42	val_42	42
+42	val_42	42
+129	val_129	129
+129	val_129	129
+158	val_158	158
+119	val_119	119
+119	val_119	119
+119	val_119	119
+496	val_496	496
+0	val_0	0
+0	val_0	0
+0	val_0	0
+322	val_322	322
+322	val_322	322
+197	val_197	197
+197	val_197	197
+468	val_468	468
+468	val_468	468
+468	val_468	468
+468	val_468	468
+393	val_393	393
+454	val_454	454
+454	val_454	454
+454	val_454	454
+100	val_100	100
+100	val_100	100
+298	val_298	298
+298	val_298	298
+298	val_298	298
+199	val_199	199
+199	val_199	199
+199	val_199	199
+191	val_191	191
+191	val_191	191
+418	val_418	418
+96	val_96	96
+26	val_26	26
+26	val_26	26
+165	val_165	165
+165	val_165	165
+327	val_327	327
+327	val_327	327
+327	val_327	327
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+205	val_205	205
+205	val_205	205
+120	val_120	120
+120	val_120	120
+131	val_131	131
+51	val_51	51
+51	val_51	51
+404	val_404	404
+404	val_404	404
+43	val_43	43
+436	val_436	436
+156	val_156	156
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+468	val_468	468
+468	val_468	468
+468	val_468	468
+468	val_468	468
+308	val_308	308
+95	val_95	95
+95	val_95	95
+196	val_196	196
+288	val_288	288
+288	val_288	288
+481	val_481	481
+457	val_457	457
+98	val_98	98
+98	val_98	98
+282	val_282	282
+282	val_282	282
+197	val_197	197
+197	val_197	197
+187	val_187	187
+187	val_187	187
+187	val_187	187
+318	val_318	318
+318	val_318	318
+318	val_318	318
+318	val_318	318
+318	val_318	318
+318	val_318	318
+409	val_409	409
+409	val_409	409
+409	val_409	409
+470	val_470	470
+137	val_137	137
+137	val_137	137
+369	val_369	369
+369	val_369	369
+369	val_369	369
+316	val_316	316
+316	val_316	316
+316	val_316	316
+169	val_169	169
+169	val_169	169
+169	val_169	169
+169	val_169	169
+413	val_413	413
+413	val_413	413
+85	val_85	85
+77	val_77	77
+0	val_0	0
+0	val_0	0
+0	val_0	0
+490	val_490	490
+87	val_87	87
+364	val_364	364
+179	val_179	179
+179	val_179	179
+118	val_118	118
+118	val_118	118
+134	val_134	134
+134	val_134	134
+395	val_395	395
+395	val_395	395
+282	val_282	282
+282	val_282	282
+138	val_138	138
+138	val_138	138
+138	val_138	138
+138	val_138	138
+238	val_238	238
+238	val_238	238
+419	val_419	419
+15	val_15	15
+15	val_15	15
+118	val_118	118
+118	val_118	118
+72	val_72	72
+72	val_72	72
+90	val_90	90
+90	val_90	90
+90	val_90	90
+307	val_307	307
+307	val_307	307
+19	val_19	19
+435	val_435	435
+10	val_10	10
+277	val_277	277
+277	val_277	277
+277	val_277	277
+277	val_277	277
+273	val_273	273
+273	val_273	273
+273	val_273	273
+306	val_306	306
+224	val_224	224
+224	val_224	224
+309	val_309	309
+309	val_309	309
+389	val_389	389
+327	val_327	327
+327	val_327	327
+327	val_327	327
+242	val_242	242
+242	val_242	242
+369	val_369	369
+369	val_369	369
+369	val_369	369
+392	val_392	392
+272	val_272	272
+272	val_272	272
+331	val_331	331
+331	val_331	331
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+242	val_242	242
+242	val_242	242
+452	val_452	452
+177	val_177	177
+226	val_226	226
+5	val_5	5
+5	val_5	5
+5	val_5	5
+497	val_497	497
+402	val_402	402
+396	val_396	396
+396	val_396	396
+396	val_396	396
+317	val_317	317
+317	val_317	317
+395	val_395	395
+395	val_395	395
+58	val_58	58
+58	val_58	58
+35	val_35	35
+35	val_35	35
+35	val_35	35
+336	val_336	336
+95	val_95	95
+95	val_95	95
+11	val_11	11
+168	val_168	168
+34	val_34	34
+229	val_229	229
+229	val_229	229
+233	val_233	233
+233	val_233	233
+143	val_143	143
+472	val_472	472
+322	val_322	322
+322	val_322	322
+498	val_498	498
+498	val_498	498
+498	val_498	498
+160	val_160	160
+195	val_195	195
+195	val_195	195
+42	val_42	42
+42	val_42	42
+321	val_321	321
+321	val_321	321
+430	val_430	430
+430	val_430	430
+430	val_430	430
+119	val_119	119
+119	val_119	119
+119	val_119	119
+489	val_489	489
+489	val_489	489
+489	val_489	489
+489	val_489	489
+458	val_458	458
+458	val_458	458
+78	val_78	78
+76	val_76	76
+76	val_76	76
+41	val_41	41
+223	val_223	223
+223	val_223	223
+492	val_492	492
+492	val_492	492
+149	val_149	149
+149	val_149	149
+449	val_449	449
+218	val_218	218
+228	val_228	228
+138	val_138	138
+138	val_138	138
+138	val_138	138
+138	val_138	138
+453	val_453	453
+30	val_30	30
+209	val_209	209
+209	val_209	209
+64	val_64	64
+468	val_468	468
+468	val_468	468
+468	val_468	468
+468	val_468	468
+76	val_76	76
+76	val_76	76
+74	val_74	74
+342	val_342	342
+342	val_342	342
+69	val_69	69
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+33	val_33	33
+368	val_368	368
+103	val_103	103
+103	val_103	103
+296	val_296	296
+113	val_113	113
+113	val_113	113
+216	val_216	216
+216	val_216	216
+367	val_367	367
+367	val_367	367
+344	val_344	344
+344	val_344	344
+167	val_167	167
+167	val_167	167
+167	val_167	167
+274	val_274	274
+219	val_219	219
+219	val_219	219
+239	val_239	239
+239	val_239	239
+485	val_485	485
+116	val_116	116
+223	val_223	223
+223	val_223	223
+256	val_256	256
+256	val_256	256
+263	val_263	263
+70	val_70	70
+70	val_70	70
+70	val_70	70
+487	val_487	487
+480	val_480	480
+480	val_480	480
+480	val_480	480
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+288	val_288	288
+288	val_288	288
+191	val_191	191
+191	val_191	191
+5	val_5	5
+5	val_5	5
+5	val_5	5
+244	val_244	244
+438	val_438	438
+438	val_438	438
+438	val_438	438
+128	val_128	128
+128	val_128	128
+128	val_128	128
+467	val_467	467
+432	val_432	432
+202	val_202	202
+316	val_316	316
+316	val_316	316
+316	val_316	316
+229	val_229	229
+229	val_229	229
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+463	val_463	463
+463	val_463	463
+280	val_280	280
+280	val_280	280
+2	val_2	2
+35	val_35	35
+35	val_35	35
+35	val_35	35
+283	val_283	283
+331	val_331	331
+331	val_331	331
+235	val_235	235
+80	val_80	80
+44	val_44	44
+193	val_193	193
+193	val_193	193
+193	val_193	193
+321	val_321	321
+321	val_321	321
+335	val_335	335
+104	val_104	104
+104	val_104	104
+466	val_466	466
+466	val_466	466
+466	val_466	466
+366	val_366	366
+175	val_175	175
+175	val_175	175
+403	val_403	403
+403	val_403	403
+403	val_403	403
+483	val_483	483
+53	val_53	53
+105	val_105	105
+257	val_257	257
+406	val_406	406
+406	val_406	406
+406	val_406	406
+406	val_406	406
+409	val_409	409
+409	val_409	409
+409	val_409	409
+190	val_190	190
+406	val_406	406
+406	val_406	406
+406	val_406	406
+406	val_406	406
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+114	val_114	114
+258	val_258	258
+90	val_90	90
+90	val_90	90
+90	val_90	90
+203	val_203	203
+203	val_203	203
+262	val_262	262
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+424	val_424	424
+424	val_424	424
+12	val_12	12
+12	val_12	12
+396	val_396	396
+396	val_396	396
+396	val_396	396
+201	val_201	201
+217	val_217	217
+217	val_217	217
+164	val_164	164
+164	val_164	164
+431	val_431	431
+431	val_431	431
+431	val_431	431
+454	val_454	454
+454	val_454	454
+454	val_454	454
+478	val_478	478
+478	val_478	478
+298	val_298	298
+298	val_298	298
+298	val_298	298
+125	val_125	125
+125	val_125	125
+431	val_431	431
+431	val_431	431
+431	val_431	431
+164	val_164	164
+164	val_164	164
+424	val_424	424
+424	val_424	424
+187	val_187	187
+187	val_187	187
+187	val_187	187
+382	val_382	382
+382	val_382	382
+5	val_5	5
+5	val_5	5
+5	val_5	5
+70	val_70	70
+70	val_70	70
+70	val_70	70
+397	val_397	397
+397	val_397	397
+480	val_480	480
+480	val_480	480
+480	val_480	480
+291	val_291	291
+24	val_24	24
+24	val_24	24
+351	val_351	351
+255	val_255	255
+255	val_255	255
+104	val_104	104
+104	val_104	104
+70	val_70	70
+70	val_70	70
+70	val_70	70
+163	val_163	163
+438	val_438	438
+438	val_438	438
+438	val_438	438
+119	val_119	119
+119	val_119	119
+119	val_119	119
+414	val_414	414
+414	val_414	414
+200	val_200	200
+200	val_200	200
+491	val_491	491
+237	val_237	237
+237	val_237	237
+439	val_439	439
+439	val_439	439
+360	val_360	360
+248	val_248	248
+479	val_479	479
+305	val_305	305
+417	val_417	417
+417	val_417	417
+417	val_417	417
+199	val_199	199
+199	val_199	199
+199	val_199	199
+444	val_444	444
+120	val_120	120
+120	val_120	120
+429	val_429	429
+429	val_429	429
+169	val_169	169
+169	val_169	169
+169	val_169	169
+169	val_169	169
+443	val_443	443
+323	val_323	323
+325	val_325	325
+325	val_325	325
+277	val_277	277
+277	val_277	277
+277	val_277	277
+277	val_277	277
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+478	val_478	478
+478	val_478	478
+178	val_178	178
+468	val_468	468
+468	val_468	468
+468	val_468	468
+468	val_468	468
+310	val_310	310
+317	val_317	317
+317	val_317	317
+333	val_333	333
+333	val_333	333
+493	val_493	493
+460	val_460	460
+207	val_207	207
+207	val_207	207
+249	val_249	249
+265	val_265	265
+265	val_265	265
+480	val_480	480
+480	val_480	480
+480	val_480	480
+83	val_83	83
+83	val_83	83
+136	val_136	136
+353	val_353	353
+353	val_353	353
+172	val_172	172
+172	val_172	172
+214	val_214	214
+462	val_462	462
+462	val_462	462
+233	val_233	233
+233	val_233	233
+406	val_406	406
+406	val_406	406
+406	val_406	406
+406	val_406	406
+133	val_133	133
+175	val_175	175
+175	val_175	175
+189	val_189	189
+454	val_454	454
+454	val_454	454
+454	val_454	454
+375	val_375	375
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+421	val_421	421
+407	val_407	407
+384	val_384	384
+384	val_384	384
+384	val_384	384
+256	val_256	256
+256	val_256	256
+26	val_26	26
+26	val_26	26
+134	val_134	134
+134	val_134	134
+67	val_67	67
+67	val_67	67
+384	val_384	384
+384	val_384	384
+384	val_384	384
+379	val_379	379
+18	val_18	18
+18	val_18	18
+462	val_462	462
+462	val_462	462
+492	val_492	492
+492	val_492	492
+100	val_100	100
+100	val_100	100
+298	val_298	298
+298	val_298	298
+298	val_298	298
+9	val_9	9
+341	val_341	341
+498	val_498	498
+498	val_498	498
+498	val_498	498
+146	val_146	146
+146	val_146	146
+458	val_458	458
+458	val_458	458
+362	val_362	362
+186	val_186	186
+285	val_285	285
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+167	val_167	167
+167	val_167	167
+167	val_167	167
+18	val_18	18
+18	val_18	18
+273	val_273	273
+273	val_273	273
+273	val_273	273
+183	val_183	183
+281	val_281	281
+281	val_281	281
+344	val_344	344
+344	val_344	344
+97	val_97	97
+97	val_97	97
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+315	val_315	315
+84	val_84	84
+84	val_84	84
+28	val_28	28
+37	val_37	37
+37	val_37	37
+448	val_448	448
+152	val_152	152
+152	val_152	152
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+307	val_307	307
+307	val_307	307
+194	val_194	194
+414	val_414	414
+414	val_414	414
+477	val_477	477
+222	val_222	222
+126	val_126	126
+90	val_90	90
+90	val_90	90
+90	val_90	90
+169	val_169	169
+169	val_169	169
+169	val_169	169
+169	val_169	169
+403	val_403	403
+403	val_403	403
+403	val_403	403
+400	val_400	400
+200	val_200	200
+200	val_200	200
+97	val_97	97
+97	val_97	97
diff --git a/sql/hive/src/test/resources/golden/view-19-4a8f62f10a8a7b1f6e0d1c15481590a6 b/sql/hive/src/test/resources/golden/view-19-4a8f62f10a8a7b1f6e0d1c15481590a6
new file mode 100644
index 0000000000000..7aae61e5eb82f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view-19-4a8f62f10a8a7b1f6e0d1c15481590a6
@@ -0,0 +1,500 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+302	val_302
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/view-20-92aa822cb9dc29a1d3ad37d3ebaa344a b/sql/hive/src/test/resources/golden/view-20-92aa822cb9dc29a1d3ad37d3ebaa344a
new file mode 100644
index 0000000000000..7aae61e5eb82f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view-20-92aa822cb9dc29a1d3ad37d3ebaa344a
@@ -0,0 +1,500 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+302	val_302
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/view-21-4fa118ed540dfe42748bbed1e7fb513d b/sql/hive/src/test/resources/golden/view-21-4fa118ed540dfe42748bbed1e7fb513d
new file mode 100644
index 0000000000000..68782c7a277bc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view-21-4fa118ed540dfe42748bbed1e7fb513d
@@ -0,0 +1,1028 @@
+238	val_238	238
+238	val_238	238
+86	val_86	86
+311	val_311	311
+311	val_311	311
+311	val_311	311
+27	val_27	27
+165	val_165	165
+165	val_165	165
+409	val_409	409
+409	val_409	409
+409	val_409	409
+255	val_255	255
+255	val_255	255
+278	val_278	278
+278	val_278	278
+98	val_98	98
+98	val_98	98
+484	val_484	484
+265	val_265	265
+265	val_265	265
+193	val_193	193
+193	val_193	193
+193	val_193	193
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+150	val_150	150
+273	val_273	273
+273	val_273	273
+273	val_273	273
+224	val_224	224
+224	val_224	224
+369	val_369	369
+369	val_369	369
+369	val_369	369
+66	val_66	66
+128	val_128	128
+128	val_128	128
+128	val_128	128
+213	val_213	213
+213	val_213	213
+146	val_146	146
+146	val_146	146
+406	val_406	406
+406	val_406	406
+406	val_406	406
+406	val_406	406
+429	val_429	429
+429	val_429	429
+374	val_374	374
+152	val_152	152
+152	val_152	152
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+145	val_145	145
+495	val_495	495
+37	val_37	37
+37	val_37	37
+327	val_327	327
+327	val_327	327
+327	val_327	327
+281	val_281	281
+281	val_281	281
+277	val_277	277
+277	val_277	277
+277	val_277	277
+277	val_277	277
+209	val_209	209
+209	val_209	209
+15	val_15	15
+15	val_15	15
+82	val_82	82
+403	val_403	403
+403	val_403	403
+403	val_403	403
+166	val_166	166
+417	val_417	417
+417	val_417	417
+417	val_417	417
+430	val_430	430
+430	val_430	430
+430	val_430	430
+252	val_252	252
+292	val_292	292
+219	val_219	219
+219	val_219	219
+287	val_287	287
+153	val_153	153
+193	val_193	193
+193	val_193	193
+193	val_193	193
+338	val_338	338
+446	val_446	446
+459	val_459	459
+459	val_459	459
+394	val_394	394
+237	val_237	237
+237	val_237	237
+482	val_482	482
+174	val_174	174
+174	val_174	174
+413	val_413	413
+413	val_413	413
+494	val_494	494
+207	val_207	207
+207	val_207	207
+199	val_199	199
+199	val_199	199
+199	val_199	199
+466	val_466	466
+466	val_466	466
+466	val_466	466
+208	val_208	208
+208	val_208	208
+208	val_208	208
+174	val_174	174
+174	val_174	174
+399	val_399	399
+399	val_399	399
+396	val_396	396
+396	val_396	396
+396	val_396	396
+247	val_247	247
+417	val_417	417
+417	val_417	417
+417	val_417	417
+489	val_489	489
+489	val_489	489
+489	val_489	489
+489	val_489	489
+162	val_162	162
+377	val_377	377
+397	val_397	397
+397	val_397	397
+309	val_309	309
+309	val_309	309
+365	val_365	365
+266	val_266	266
+439	val_439	439
+439	val_439	439
+342	val_342	342
+342	val_342	342
+367	val_367	367
+367	val_367	367
+325	val_325	325
+325	val_325	325
+167	val_167	167
+167	val_167	167
+167	val_167	167
+195	val_195	195
+195	val_195	195
+475	val_475	475
+17	val_17	17
+113	val_113	113
+113	val_113	113
+155	val_155	155
+203	val_203	203
+203	val_203	203
+339	val_339	339
+0	val_0	0
+0	val_0	0
+0	val_0	0
+455	val_455	455
+128	val_128	128
+128	val_128	128
+128	val_128	128
+311	val_311	311
+311	val_311	311
+311	val_311	311
+316	val_316	316
+316	val_316	316
+316	val_316	316
+57	val_57	57
+302	val_302	302
+205	val_205	205
+205	val_205	205
+149	val_149	149
+149	val_149	149
+438	val_438	438
+438	val_438	438
+438	val_438	438
+345	val_345	345
+129	val_129	129
+129	val_129	129
+170	val_170	170
+20	val_20	20
+489	val_489	489
+489	val_489	489
+489	val_489	489
+489	val_489	489
+157	val_157	157
+378	val_378	378
+221	val_221	221
+221	val_221	221
+92	val_92	92
+111	val_111	111
+47	val_47	47
+72	val_72	72
+72	val_72	72
+4	val_4	4
+280	val_280	280
+280	val_280	280
+35	val_35	35
+35	val_35	35
+35	val_35	35
+427	val_427	427
+277	val_277	277
+277	val_277	277
+277	val_277	277
+277	val_277	277
+208	val_208	208
+208	val_208	208
+208	val_208	208
+356	val_356	356
+399	val_399	399
+399	val_399	399
+169	val_169	169
+169	val_169	169
+169	val_169	169
+169	val_169	169
+382	val_382	382
+382	val_382	382
+498	val_498	498
+498	val_498	498
+498	val_498	498
+125	val_125	125
+125	val_125	125
+386	val_386	386
+437	val_437	437
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+192	val_192	192
+286	val_286	286
+187	val_187	187
+187	val_187	187
+187	val_187	187
+176	val_176	176
+176	val_176	176
+54	val_54	54
+459	val_459	459
+459	val_459	459
+51	val_51	51
+51	val_51	51
+138	val_138	138
+138	val_138	138
+138	val_138	138
+138	val_138	138
+103	val_103	103
+103	val_103	103
+239	val_239	239
+239	val_239	239
+213	val_213	213
+213	val_213	213
+216	val_216	216
+216	val_216	216
+430	val_430	430
+430	val_430	430
+430	val_430	430
+278	val_278	278
+278	val_278	278
+176	val_176	176
+176	val_176	176
+289	val_289	289
+221	val_221	221
+221	val_221	221
+65	val_65	65
+318	val_318	318
+318	val_318	318
+318	val_318	318
+332	val_332	332
+311	val_311	311
+311	val_311	311
+311	val_311	311
+275	val_275	275
+137	val_137	137
+137	val_137	137
+241	val_241	241
+83	val_83	83
+83	val_83	83
+333	val_333	333
+333	val_333	333
+180	val_180	180
+284	val_284	284
+12	val_12	12
+12	val_12	12
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+181	val_181	181
+67	val_67	67
+67	val_67	67
+260	val_260	260
+404	val_404	404
+404	val_404	404
+384	val_384	384
+384	val_384	384
+384	val_384	384
+489	val_489	489
+489	val_489	489
+489	val_489	489
+489	val_489	489
+353	val_353	353
+353	val_353	353
+373	val_373	373
+272	val_272	272
+272	val_272	272
+138	val_138	138
+138	val_138	138
+138	val_138	138
+138	val_138	138
+217	val_217	217
+217	val_217	217
+84	val_84	84
+84	val_84	84
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+466	val_466	466
+466	val_466	466
+466	val_466	466
+58	val_58	58
+58	val_58	58
+8	val_8	8
+411	val_411	411
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+208	val_208	208
+208	val_208	208
+208	val_208	208
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+24	val_24	24
+24	val_24	24
+463	val_463	463
+463	val_463	463
+431	val_431	431
+431	val_431	431
+431	val_431	431
+179	val_179	179
+179	val_179	179
+172	val_172	172
+172	val_172	172
+42	val_42	42
+42	val_42	42
+129	val_129	129
+129	val_129	129
+158	val_158	158
+119	val_119	119
+119	val_119	119
+119	val_119	119
+496	val_496	496
+0	val_0	0
+0	val_0	0
+0	val_0	0
+322	val_322	322
+322	val_322	322
+197	val_197	197
+197	val_197	197
+468	val_468	468
+468	val_468	468
+468	val_468	468
+468	val_468	468
+393	val_393	393
+454	val_454	454
+454	val_454	454
+454	val_454	454
+100	val_100	100
+100	val_100	100
+298	val_298	298
+298	val_298	298
+298	val_298	298
+199	val_199	199
+199	val_199	199
+199	val_199	199
+191	val_191	191
+191	val_191	191
+418	val_418	418
+96	val_96	96
+26	val_26	26
+26	val_26	26
+165	val_165	165
+165	val_165	165
+327	val_327	327
+327	val_327	327
+327	val_327	327
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+205	val_205	205
+205	val_205	205
+120	val_120	120
+120	val_120	120
+131	val_131	131
+51	val_51	51
+51	val_51	51
+404	val_404	404
+404	val_404	404
+43	val_43	43
+436	val_436	436
+156	val_156	156
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+468	val_468	468
+468	val_468	468
+468	val_468	468
+468	val_468	468
+308	val_308	308
+95	val_95	95
+95	val_95	95
+196	val_196	196
+288	val_288	288
+288	val_288	288
+481	val_481	481
+457	val_457	457
+98	val_98	98
+98	val_98	98
+282	val_282	282
+282	val_282	282
+197	val_197	197
+197	val_197	197
+187	val_187	187
+187	val_187	187
+187	val_187	187
+318	val_318	318
+318	val_318	318
+318	val_318	318
+318	val_318	318
+318	val_318	318
+318	val_318	318
+409	val_409	409
+409	val_409	409
+409	val_409	409
+470	val_470	470
+137	val_137	137
+137	val_137	137
+369	val_369	369
+369	val_369	369
+369	val_369	369
+316	val_316	316
+316	val_316	316
+316	val_316	316
+169	val_169	169
+169	val_169	169
+169	val_169	169
+169	val_169	169
+413	val_413	413
+413	val_413	413
+85	val_85	85
+77	val_77	77
+0	val_0	0
+0	val_0	0
+0	val_0	0
+490	val_490	490
+87	val_87	87
+364	val_364	364
+179	val_179	179
+179	val_179	179
+118	val_118	118
+118	val_118	118
+134	val_134	134
+134	val_134	134
+395	val_395	395
+395	val_395	395
+282	val_282	282
+282	val_282	282
+138	val_138	138
+138	val_138	138
+138	val_138	138
+138	val_138	138
+238	val_238	238
+238	val_238	238
+419	val_419	419
+15	val_15	15
+15	val_15	15
+118	val_118	118
+118	val_118	118
+72	val_72	72
+72	val_72	72
+90	val_90	90
+90	val_90	90
+90	val_90	90
+307	val_307	307
+307	val_307	307
+19	val_19	19
+435	val_435	435
+10	val_10	10
+277	val_277	277
+277	val_277	277
+277	val_277	277
+277	val_277	277
+273	val_273	273
+273	val_273	273
+273	val_273	273
+306	val_306	306
+224	val_224	224
+224	val_224	224
+309	val_309	309
+309	val_309	309
+389	val_389	389
+327	val_327	327
+327	val_327	327
+327	val_327	327
+242	val_242	242
+242	val_242	242
+369	val_369	369
+369	val_369	369
+369	val_369	369
+392	val_392	392
+272	val_272	272
+272	val_272	272
+331	val_331	331
+331	val_331	331
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+242	val_242	242
+242	val_242	242
+452	val_452	452
+177	val_177	177
+226	val_226	226
+5	val_5	5
+5	val_5	5
+5	val_5	5
+497	val_497	497
+402	val_402	402
+396	val_396	396
+396	val_396	396
+396	val_396	396
+317	val_317	317
+317	val_317	317
+395	val_395	395
+395	val_395	395
+58	val_58	58
+58	val_58	58
+35	val_35	35
+35	val_35	35
+35	val_35	35
+336	val_336	336
+95	val_95	95
+95	val_95	95
+11	val_11	11
+168	val_168	168
+34	val_34	34
+229	val_229	229
+229	val_229	229
+233	val_233	233
+233	val_233	233
+143	val_143	143
+472	val_472	472
+322	val_322	322
+322	val_322	322
+498	val_498	498
+498	val_498	498
+498	val_498	498
+160	val_160	160
+195	val_195	195
+195	val_195	195
+42	val_42	42
+42	val_42	42
+321	val_321	321
+321	val_321	321
+430	val_430	430
+430	val_430	430
+430	val_430	430
+119	val_119	119
+119	val_119	119
+119	val_119	119
+489	val_489	489
+489	val_489	489
+489	val_489	489
+489	val_489	489
+458	val_458	458
+458	val_458	458
+78	val_78	78
+76	val_76	76
+76	val_76	76
+41	val_41	41
+223	val_223	223
+223	val_223	223
+492	val_492	492
+492	val_492	492
+149	val_149	149
+149	val_149	149
+449	val_449	449
+218	val_218	218
+228	val_228	228
+138	val_138	138
+138	val_138	138
+138	val_138	138
+138	val_138	138
+453	val_453	453
+30	val_30	30
+209	val_209	209
+209	val_209	209
+64	val_64	64
+468	val_468	468
+468	val_468	468
+468	val_468	468
+468	val_468	468
+76	val_76	76
+76	val_76	76
+74	val_74	74
+342	val_342	342
+342	val_342	342
+69	val_69	69
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+33	val_33	33
+368	val_368	368
+103	val_103	103
+103	val_103	103
+296	val_296	296
+113	val_113	113
+113	val_113	113
+216	val_216	216
+216	val_216	216
+367	val_367	367
+367	val_367	367
+344	val_344	344
+344	val_344	344
+167	val_167	167
+167	val_167	167
+167	val_167	167
+274	val_274	274
+219	val_219	219
+219	val_219	219
+239	val_239	239
+239	val_239	239
+485	val_485	485
+116	val_116	116
+223	val_223	223
+223	val_223	223
+256	val_256	256
+256	val_256	256
+263	val_263	263
+70	val_70	70
+70	val_70	70
+70	val_70	70
+487	val_487	487
+480	val_480	480
+480	val_480	480
+480	val_480	480
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+288	val_288	288
+288	val_288	288
+191	val_191	191
+191	val_191	191
+5	val_5	5
+5	val_5	5
+5	val_5	5
+244	val_244	244
+438	val_438	438
+438	val_438	438
+438	val_438	438
+128	val_128	128
+128	val_128	128
+128	val_128	128
+467	val_467	467
+432	val_432	432
+202	val_202	202
+316	val_316	316
+316	val_316	316
+316	val_316	316
+229	val_229	229
+229	val_229	229
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+463	val_463	463
+463	val_463	463
+280	val_280	280
+280	val_280	280
+2	val_2	2
+35	val_35	35
+35	val_35	35
+35	val_35	35
+283	val_283	283
+331	val_331	331
+331	val_331	331
+235	val_235	235
+80	val_80	80
+44	val_44	44
+193	val_193	193
+193	val_193	193
+193	val_193	193
+321	val_321	321
+321	val_321	321
+335	val_335	335
+104	val_104	104
+104	val_104	104
+466	val_466	466
+466	val_466	466
+466	val_466	466
+366	val_366	366
+175	val_175	175
+175	val_175	175
+403	val_403	403
+403	val_403	403
+403	val_403	403
+483	val_483	483
+53	val_53	53
+105	val_105	105
+257	val_257	257
+406	val_406	406
+406	val_406	406
+406	val_406	406
+406	val_406	406
+409	val_409	409
+409	val_409	409
+409	val_409	409
+190	val_190	190
+406	val_406	406
+406	val_406	406
+406	val_406	406
+406	val_406	406
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+114	val_114	114
+258	val_258	258
+90	val_90	90
+90	val_90	90
+90	val_90	90
+203	val_203	203
+203	val_203	203
+262	val_262	262
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+424	val_424	424
+424	val_424	424
+12	val_12	12
+12	val_12	12
+396	val_396	396
+396	val_396	396
+396	val_396	396
+201	val_201	201
+217	val_217	217
+217	val_217	217
+164	val_164	164
+164	val_164	164
+431	val_431	431
+431	val_431	431
+431	val_431	431
+454	val_454	454
+454	val_454	454
+454	val_454	454
+478	val_478	478
+478	val_478	478
+298	val_298	298
+298	val_298	298
+298	val_298	298
+125	val_125	125
+125	val_125	125
+431	val_431	431
+431	val_431	431
+431	val_431	431
+164	val_164	164
+164	val_164	164
+424	val_424	424
+424	val_424	424
+187	val_187	187
+187	val_187	187
+187	val_187	187
+382	val_382	382
+382	val_382	382
+5	val_5	5
+5	val_5	5
+5	val_5	5
+70	val_70	70
+70	val_70	70
+70	val_70	70
+397	val_397	397
+397	val_397	397
+480	val_480	480
+480	val_480	480
+480	val_480	480
+291	val_291	291
+24	val_24	24
+24	val_24	24
+351	val_351	351
+255	val_255	255
+255	val_255	255
+104	val_104	104
+104	val_104	104
+70	val_70	70
+70	val_70	70
+70	val_70	70
+163	val_163	163
+438	val_438	438
+438	val_438	438
+438	val_438	438
+119	val_119	119
+119	val_119	119
+119	val_119	119
+414	val_414	414
+414	val_414	414
+200	val_200	200
+200	val_200	200
+491	val_491	491
+237	val_237	237
+237	val_237	237
+439	val_439	439
+439	val_439	439
+360	val_360	360
+248	val_248	248
+479	val_479	479
+305	val_305	305
+417	val_417	417
+417	val_417	417
+417	val_417	417
+199	val_199	199
+199	val_199	199
+199	val_199	199
+444	val_444	444
+120	val_120	120
+120	val_120	120
+429	val_429	429
+429	val_429	429
+169	val_169	169
+169	val_169	169
+169	val_169	169
+169	val_169	169
+443	val_443	443
+323	val_323	323
+325	val_325	325
+325	val_325	325
+277	val_277	277
+277	val_277	277
+277	val_277	277
+277	val_277	277
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+230	val_230	230
+478	val_478	478
+478	val_478	478
+178	val_178	178
+468	val_468	468
+468	val_468	468
+468	val_468	468
+468	val_468	468
+310	val_310	310
+317	val_317	317
+317	val_317	317
+333	val_333	333
+333	val_333	333
+493	val_493	493
+460	val_460	460
+207	val_207	207
+207	val_207	207
+249	val_249	249
+265	val_265	265
+265	val_265	265
+480	val_480	480
+480	val_480	480
+480	val_480	480
+83	val_83	83
+83	val_83	83
+136	val_136	136
+353	val_353	353
+353	val_353	353
+172	val_172	172
+172	val_172	172
+214	val_214	214
+462	val_462	462
+462	val_462	462
+233	val_233	233
+233	val_233	233
+406	val_406	406
+406	val_406	406
+406	val_406	406
+406	val_406	406
+133	val_133	133
+175	val_175	175
+175	val_175	175
+189	val_189	189
+454	val_454	454
+454	val_454	454
+454	val_454	454
+375	val_375	375
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+401	val_401	401
+421	val_421	421
+407	val_407	407
+384	val_384	384
+384	val_384	384
+384	val_384	384
+256	val_256	256
+256	val_256	256
+26	val_26	26
+26	val_26	26
+134	val_134	134
+134	val_134	134
+67	val_67	67
+67	val_67	67
+384	val_384	384
+384	val_384	384
+384	val_384	384
+379	val_379	379
+18	val_18	18
+18	val_18	18
+462	val_462	462
+462	val_462	462
+492	val_492	492
+492	val_492	492
+100	val_100	100
+100	val_100	100
+298	val_298	298
+298	val_298	298
+298	val_298	298
+9	val_9	9
+341	val_341	341
+498	val_498	498
+498	val_498	498
+498	val_498	498
+146	val_146	146
+146	val_146	146
+458	val_458	458
+458	val_458	458
+362	val_362	362
+186	val_186	186
+285	val_285	285
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+167	val_167	167
+167	val_167	167
+167	val_167	167
+18	val_18	18
+18	val_18	18
+273	val_273	273
+273	val_273	273
+273	val_273	273
+183	val_183	183
+281	val_281	281
+281	val_281	281
+344	val_344	344
+344	val_344	344
+97	val_97	97
+97	val_97	97
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+469	val_469	469
+315	val_315	315
+84	val_84	84
+84	val_84	84
+28	val_28	28
+37	val_37	37
+37	val_37	37
+448	val_448	448
+152	val_152	152
+152	val_152	152
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+348	val_348	348
+307	val_307	307
+307	val_307	307
+194	val_194	194
+414	val_414	414
+414	val_414	414
+477	val_477	477
+222	val_222	222
+126	val_126	126
+90	val_90	90
+90	val_90	90
+90	val_90	90
+169	val_169	169
+169	val_169	169
+169	val_169	169
+169	val_169	169
+403	val_403	403
+403	val_403	403
+403	val_403	403
+400	val_400	400
+200	val_200	200
+200	val_200	200
+97	val_97	97
+97	val_97	97
diff --git a/sql/hive/src/test/resources/golden/view-22-f83b15c828d4ec599d7827af8b25f578 b/sql/hive/src/test/resources/golden/view-22-f83b15c828d4ec599d7827af8b25f578
new file mode 100644
index 0000000000000..e34118512c1d7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view-22-f83b15c828d4ec599d7827af8b25f578
@@ -0,0 +1,500 @@
+238
+86
+311
+27
+165
+409
+255
+278
+98
+484
+265
+193
+401
+150
+273
+224
+369
+66
+128
+213
+146
+406
+429
+374
+152
+469
+145
+495
+37
+327
+281
+277
+209
+15
+82
+403
+166
+417
+430
+252
+292
+219
+287
+153
+193
+338
+446
+459
+394
+237
+482
+174
+413
+494
+207
+199
+466
+208
+174
+399
+396
+247
+417
+489
+162
+377
+397
+309
+365
+266
+439
+342
+367
+325
+167
+195
+475
+17
+113
+155
+203
+339
+0
+455
+128
+311
+316
+57
+302
+205
+149
+438
+345
+129
+170
+20
+489
+157
+378
+221
+92
+111
+47
+72
+4
+280
+35
+427
+277
+208
+356
+399
+169
+382
+498
+125
+386
+437
+469
+192
+286
+187
+176
+54
+459
+51
+138
+103
+239
+213
+216
+430
+278
+176
+289
+221
+65
+318
+332
+311
+275
+137
+241
+83
+333
+180
+284
+12
+230
+181
+67
+260
+404
+384
+489
+353
+373
+272
+138
+217
+84
+348
+466
+58
+8
+411
+230
+208
+348
+24
+463
+431
+179
+172
+42
+129
+158
+119
+496
+0
+322
+197
+468
+393
+454
+100
+298
+199
+191
+418
+96
+26
+165
+327
+230
+205
+120
+131
+51
+404
+43
+436
+156
+469
+468
+308
+95
+196
+288
+481
+457
+98
+282
+197
+187
+318
+318
+409
+470
+137
+369
+316
+169
+413
+85
+77
+0
+490
+87
+364
+179
+118
+134
+395
+282
+138
+238
+419
+15
+118
+72
+90
+307
+19
+435
+10
+277
+273
+306
+224
+309
+389
+327
+242
+369
+392
+272
+331
+401
+242
+452
+177
+226
+5
+497
+402
+396
+317
+395
+58
+35
+336
+95
+11
+168
+34
+229
+233
+143
+472
+322
+498
+160
+195
+42
+321
+430
+119
+489
+458
+78
+76
+41
+223
+492
+149
+449
+218
+228
+138
+453
+30
+209
+64
+468
+76
+74
+342
+69
+230
+33
+368
+103
+296
+113
+216
+367
+344
+167
+274
+219
+239
+485
+116
+223
+256
+263
+70
+487
+480
+401
+288
+191
+5
+244
+438
+128
+467
+432
+202
+316
+229
+469
+463
+280
+2
+35
+283
+331
+235
+80
+44
+193
+321
+335
+104
+466
+366
+175
+403
+483
+53
+105
+257
+406
+409
+190
+406
+401
+114
+258
+90
+203
+262
+348
+424
+12
+396
+201
+217
+164
+431
+454
+478
+298
+125
+431
+164
+424
+187
+382
+5
+70
+397
+480
+291
+24
+351
+255
+104
+70
+163
+438
+119
+414
+200
+491
+237
+439
+360
+248
+479
+305
+417
+199
+444
+120
+429
+169
+443
+323
+325
+277
+230
+478
+178
+468
+310
+317
+333
+493
+460
+207
+249
+265
+480
+83
+136
+353
+172
+214
+462
+233
+406
+133
+175
+189
+454
+375
+401
+421
+407
+384
+256
+26
+134
+67
+384
+379
+18
+462
+492
+100
+298
+9
+341
+498
+146
+458
+362
+186
+285
+348
+167
+18
+273
+183
+281
+344
+97
+469
+315
+84
+28
+37
+448
+152
+348
+307
+194
+414
+477
+222
+126
+90
+169
+403
+400
+200
+97
diff --git a/sql/hive/src/test/resources/golden/view-23-f6a52dd2ff5b11ea3bba2feb867f00c2 b/sql/hive/src/test/resources/golden/view-23-f6a52dd2ff5b11ea3bba2feb867f00c2
new file mode 100644
index 0000000000000..e34118512c1d7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view-23-f6a52dd2ff5b11ea3bba2feb867f00c2
@@ -0,0 +1,500 @@
+238
+86
+311
+27
+165
+409
+255
+278
+98
+484
+265
+193
+401
+150
+273
+224
+369
+66
+128
+213
+146
+406
+429
+374
+152
+469
+145
+495
+37
+327
+281
+277
+209
+15
+82
+403
+166
+417
+430
+252
+292
+219
+287
+153
+193
+338
+446
+459
+394
+237
+482
+174
+413
+494
+207
+199
+466
+208
+174
+399
+396
+247
+417
+489
+162
+377
+397
+309
+365
+266
+439
+342
+367
+325
+167
+195
+475
+17
+113
+155
+203
+339
+0
+455
+128
+311
+316
+57
+302
+205
+149
+438
+345
+129
+170
+20
+489
+157
+378
+221
+92
+111
+47
+72
+4
+280
+35
+427
+277
+208
+356
+399
+169
+382
+498
+125
+386
+437
+469
+192
+286
+187
+176
+54
+459
+51
+138
+103
+239
+213
+216
+430
+278
+176
+289
+221
+65
+318
+332
+311
+275
+137
+241
+83
+333
+180
+284
+12
+230
+181
+67
+260
+404
+384
+489
+353
+373
+272
+138
+217
+84
+348
+466
+58
+8
+411
+230
+208
+348
+24
+463
+431
+179
+172
+42
+129
+158
+119
+496
+0
+322
+197
+468
+393
+454
+100
+298
+199
+191
+418
+96
+26
+165
+327
+230
+205
+120
+131
+51
+404
+43
+436
+156
+469
+468
+308
+95
+196
+288
+481
+457
+98
+282
+197
+187
+318
+318
+409
+470
+137
+369
+316
+169
+413
+85
+77
+0
+490
+87
+364
+179
+118
+134
+395
+282
+138
+238
+419
+15
+118
+72
+90
+307
+19
+435
+10
+277
+273
+306
+224
+309
+389
+327
+242
+369
+392
+272
+331
+401
+242
+452
+177
+226
+5
+497
+402
+396
+317
+395
+58
+35
+336
+95
+11
+168
+34
+229
+233
+143
+472
+322
+498
+160
+195
+42
+321
+430
+119
+489
+458
+78
+76
+41
+223
+492
+149
+449
+218
+228
+138
+453
+30
+209
+64
+468
+76
+74
+342
+69
+230
+33
+368
+103
+296
+113
+216
+367
+344
+167
+274
+219
+239
+485
+116
+223
+256
+263
+70
+487
+480
+401
+288
+191
+5
+244
+438
+128
+467
+432
+202
+316
+229
+469
+463
+280
+2
+35
+283
+331
+235
+80
+44
+193
+321
+335
+104
+466
+366
+175
+403
+483
+53
+105
+257
+406
+409
+190
+406
+401
+114
+258
+90
+203
+262
+348
+424
+12
+396
+201
+217
+164
+431
+454
+478
+298
+125
+431
+164
+424
+187
+382
+5
+70
+397
+480
+291
+24
+351
+255
+104
+70
+163
+438
+119
+414
+200
+491
+237
+439
+360
+248
+479
+305
+417
+199
+444
+120
+429
+169
+443
+323
+325
+277
+230
+478
+178
+468
+310
+317
+333
+493
+460
+207
+249
+265
+480
+83
+136
+353
+172
+214
+462
+233
+406
+133
+175
+189
+454
+375
+401
+421
+407
+384
+256
+26
+134
+67
+384
+379
+18
+462
+492
+100
+298
+9
+341
+498
+146
+458
+362
+186
+285
+348
+167
+18
+273
+183
+281
+344
+97
+469
+315
+84
+28
+37
+448
+152
+348
+307
+194
+414
+477
+222
+126
+90
+169
+403
+400
+200
+97
diff --git a/sql/hive/src/test/resources/golden/view-3-89c80c0e90409d5e304775c9f420915a b/sql/hive/src/test/resources/golden/view-3-89c80c0e90409d5e304775c9f420915a
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input46-1-1efdd5ebfa732abdedeb10467ca71f7f b/sql/hive/src/test/resources/golden/view-3-e7dd3b24daa60d8955b22f0441f01a6a
similarity index 100%
rename from sql/hive/src/test/resources/golden/input46-1-1efdd5ebfa732abdedeb10467ca71f7f
rename to sql/hive/src/test/resources/golden/view-3-e7dd3b24daa60d8955b22f0441f01a6a
diff --git a/sql/hive/src/test/resources/golden/input46-2-4b3b3bedcb5765c5cfaa5d8e8bfb69ca b/sql/hive/src/test/resources/golden/view-5-7abee38ed087f13f03ac216ef0decf4c
similarity index 100%
rename from sql/hive/src/test/resources/golden/input46-2-4b3b3bedcb5765c5cfaa5d8e8bfb69ca
rename to sql/hive/src/test/resources/golden/view-5-7abee38ed087f13f03ac216ef0decf4c
diff --git a/sql/hive/src/test/resources/golden/view-5-f6d1bce095ecbf1aa484891392fdb07b b/sql/hive/src/test/resources/golden/view-5-f6d1bce095ecbf1aa484891392fdb07b
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/view_cast-11-b809f5d793b072146ccf577abf286003 b/sql/hive/src/test/resources/golden/view_cast-11-b809f5d793b072146ccf577abf286003
new file mode 100644
index 0000000000000..da7e68de78227
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view_cast-11-b809f5d793b072146ccf577abf286003
@@ -0,0 +1,8 @@
+1111	abc	fun	bar	NULL	2222	NULL	99999.0	99999.0
+1111	abc	fun	bar	NULL	2222	NULL	99999.0	99999.0
+2222	abc	fun	bar	NULL	4444	NULL	99999.0	99999.0
+2222	abc	fun	bar	NULL	4444	NULL	99999.0	99999.0
+3333	abc	fun	bar	NULL	6666	NULL	99999.0	10.0
+3333	abc	fun	bar	NULL	6666	NULL	99999.0	10.0
+3333	abc	fun	bar	NULL	6666	NULL	99999.0	10.0
+4444	abc	fun	bar	NULL	8888	NULL	99999.0	99999.0
diff --git a/sql/hive/src/test/resources/golden/input46-3-c185163787977498a4b84f39f983c431 b/sql/hive/src/test/resources/golden/view_cast-2-635031c0752d5b30c44dfb3dec759a6c
similarity index 100%
rename from sql/hive/src/test/resources/golden/input46-3-c185163787977498a4b84f39f983c431
rename to sql/hive/src/test/resources/golden/view_cast-2-635031c0752d5b30c44dfb3dec759a6c
diff --git a/sql/hive/src/test/resources/golden/view_cast-2-af2050aa97f0cd930cb1b8ec791007de b/sql/hive/src/test/resources/golden/view_cast-2-af2050aa97f0cd930cb1b8ec791007de
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/view_cast-3-2a232c31d056e6abc76f8ebe53ccd97 b/sql/hive/src/test/resources/golden/view_cast-3-2a232c31d056e6abc76f8ebe53ccd97
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input46-4-7f05cde078d90c25780a7d5491b20c27 b/sql/hive/src/test/resources/golden/view_cast-3-9f675e7edd64a33713e91282dc201867
similarity index 100%
rename from sql/hive/src/test/resources/golden/input46-4-7f05cde078d90c25780a7d5491b20c27
rename to sql/hive/src/test/resources/golden/view_cast-3-9f675e7edd64a33713e91282dc201867
diff --git a/sql/hive/src/test/resources/golden/view_cast-6-aa5be3380ddc7104258567b406d93cc5 b/sql/hive/src/test/resources/golden/view_cast-6-aa5be3380ddc7104258567b406d93cc5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/input46-5-f5c502e88a3dc3edb37b04af7d7955ab b/sql/hive/src/test/resources/golden/view_cast-6-b18da53e46b62d6d91efac88ba62f308
similarity index 100%
rename from sql/hive/src/test/resources/golden/input46-5-f5c502e88a3dc3edb37b04af7d7955ab
rename to sql/hive/src/test/resources/golden/view_cast-6-b18da53e46b62d6d91efac88ba62f308
diff --git a/sql/hive/src/test/resources/golden/view_inputs-2-626fa3664754125edc44b7ca7f8630db b/sql/hive/src/test/resources/golden/view_inputs-2-626fa3664754125edc44b7ca7f8630db
new file mode 100644
index 0000000000000..1f3d8a7a1fc08
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/view_inputs-2-626fa3664754125edc44b7ca7f8630db
@@ -0,0 +1 @@
+1028
diff --git a/sql/hive/src/test/resources/golden/virtual_column-0-9bacd1908e56d621913a74fe9a583d9d b/sql/hive/src/test/resources/golden/virtual_column-0-9bacd1908e56d621913a74fe9a583d9d
deleted file mode 100644
index ea00577174e43..0000000000000
--- a/sql/hive/src/test/resources/golden/virtual_column-0-9bacd1908e56d621913a74fe9a583d9d
+++ /dev/null
@@ -1,500 +0,0 @@
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	238	0
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	86	12
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	311	22
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	27	34
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	165	44
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	409	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	255	68
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	278	80
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	98	92
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	484	102
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	265	114
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	193	126
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	401	138
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	150	150
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	273	162
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	224	174
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	369	186
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	66	198
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	128	208
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	213	220
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	146	232
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	406	244
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	429	256
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	374	268
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	152	280
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	469	292
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	145	304
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	495	316
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	37	328
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	327	338
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	281	350
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	277	362
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	209	374
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	15	386
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	82	396
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	403	406
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	166	418
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	417	430
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	430	442
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	252	454
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	292	466
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	219	478
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	287	490
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	153	502
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	193	514
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	338	526
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	446	538
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	459	550
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	394	562
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	237	574
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	482	586
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	174	598
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	413	610
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	494	622
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	207	634
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	199	646
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	466	658
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	208	670
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	174	682
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	399	694
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	396	706
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	247	718
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	417	730
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	489	742
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	162	754
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	377	766
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	397	778
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	309	790
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	365	802
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	266	814
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	439	826
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	342	838
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	367	850
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	325	862
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	167	874
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	195	886
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	475	898
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	17	910
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	113	920
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	155	932
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	203	944
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	339	956
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	0	968
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	455	976
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	128	988
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	311	1000
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	316	1012
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	57	1024
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	302	1034
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	205	1046
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	149	1058
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	438	1070
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	345	1082
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	129	1094
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	170	1106
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	20	1118
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	489	1128
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	157	1140
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	378	1152
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	221	1164
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	92	1176
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	111	1186
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	47	1198
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	72	1208
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	4	1218
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	280	1226
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	35	1238
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	427	1248
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	277	1260
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	208	1272
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	356	1284
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	399	1296
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	169	1308
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	382	1320
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	498	1332
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	125	1344
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	386	1356
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	437	1368
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	469	1380
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	192	1392
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	286	1404
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	187	1416
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	176	1428
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	54	1440
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	459	1450
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	51	1462
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	138	1472
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	103	1484
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	239	1496
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	213	1508
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	216	1520
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	430	1532
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	278	1544
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	176	1556
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	289	1568
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	221	1580
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	65	1592
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	318	1602
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	332	1614
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	311	1626
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	275	1638
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	137	1650
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	241	1662
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	83	1674
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	333	1684
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	180	1696
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	284	1708
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	12	1720
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	230	1730
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	181	1742
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	67	1754
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	260	1764
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	404	1776
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	384	1788
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	489	1800
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	353	1812
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	373	1824
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	272	1836
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	138	1848
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	217	1860
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	84	1872
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	348	1882
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	466	1894
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	58	1906
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	8	1916
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	411	1924
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	230	1936
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	208	1948
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	348	1960
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	24	1972
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	463	1982
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	431	1994
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	179	2006
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	172	2018
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	42	2030
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	129	2040
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	158	2052
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	119	2064
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	496	2076
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	0	2088
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	322	2096
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	197	2108
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	468	2120
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	393	2132
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	454	2144
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	100	2156
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	298	2168
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	199	2180
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	191	2192
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	418	2204
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	96	2216
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	26	2226
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	165	2236
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	327	2248
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	230	2260
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	205	2272
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	120	2284
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	131	2296
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	51	2308
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	404	2318
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	43	2330
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	436	2340
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	156	2352
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	469	2364
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	468	2376
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	308	2388
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	95	2400
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	196	2410
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	288	2422
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	481	2434
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	457	2446
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	98	2458
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	282	2468
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	197	2480
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	187	2492
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	318	2504
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	318	2516
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	409	2528
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	470	2540
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	137	2552
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	369	2564
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	316	2576
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	169	2588
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	413	2600
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	85	2612
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	77	2622
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	0	2632
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	490	2640
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	87	2652
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	364	2662
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	179	2674
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	118	2686
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	134	2698
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	395	2710
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	282	2722
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	138	2734
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	238	2746
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	419	2758
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	15	2770
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	118	2780
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	72	2792
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	90	2802
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	307	2812
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	19	2824
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	435	2834
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	10	2846
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	277	2856
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	273	2868
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	306	2880
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	224	2892
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	309	2904
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	389	2916
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	327	2928
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	242	2940
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	369	2952
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	392	2964
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	272	2976
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	331	2988
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	401	3000
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	242	3012
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	452	3024
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	177	3036
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	226	3048
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	5	3060
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	497	3068
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	402	3080
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	396	3092
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	317	3104
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	395	3116
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	58	3128
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	35	3138
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	336	3148
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	95	3160
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	11	3170
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	168	3180
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	34	3192
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	229	3202
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	233	3214
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	143	3226
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	472	3238
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	322	3250
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	498	3262
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	160	3274
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	195	3286
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	42	3298
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	321	3308
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	430	3320
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	119	3332
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	489	3344
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	458	3356
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	78	3368
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	76	3378
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	41	3388
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	223	3398
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	492	3410
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	149	3422
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	449	3434
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	218	3446
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	228	3458
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	138	3470
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	453	3482
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	30	3494
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	209	3504
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	64	3516
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	468	3526
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	76	3538
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	74	3548
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	342	3558
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	69	3570
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	230	3580
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	33	3592
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	368	3602
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	103	3614
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	296	3626
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	113	3638
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	216	3650
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	367	3662
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	344	3674
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	167	3686
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	274	3698
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	219	3710
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	239	3722
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	485	3734
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	116	3746
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	223	3758
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	256	3770
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	263	3782
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	70	3794
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	487	3804
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	480	3816
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	401	3828
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	288	3840
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	191	3852
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	5	3864
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	244	3872
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	438	3884
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	128	3896
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	467	3908
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	432	3920
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	202	3932
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	316	3944
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	229	3956
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	469	3968
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	463	3980
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	280	3992
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	2	4004
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	35	4012
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	283	4022
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	331	4034
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	235	4046
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	80	4058
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	44	4068
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	193	4078
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	321	4090
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	335	4102
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	104	4114
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	466	4126
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	366	4138
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	175	4150
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	403	4162
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	483	4174
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	53	4186
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	105	4196
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	257	4208
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	406	4220
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	409	4232
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	190	4244
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	406	4256
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	401	4268
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	114	4280
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	258	4292
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	90	4304
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	203	4314
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	262	4326
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	348	4338
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	424	4350
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	12	4362
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	396	4372
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	201	4384
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	217	4396
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	164	4408
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	431	4420
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	454	4432
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	478	4444
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	298	4456
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	125	4468
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	431	4480
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	164	4492
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	424	4504
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	187	4516
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	382	4528
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	5	4540
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	70	4548
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	397	4558
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	480	4570
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	291	4582
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	24	4594
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	351	4604
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	255	4616
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	104	4628
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	70	4640
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	163	4650
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	438	4662
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	119	4674
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	414	4686
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	200	4698
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	491	4710
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	237	4722
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	439	4734
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	360	4746
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	248	4758
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	479	4770
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	305	4782
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	417	4794
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	199	4806
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	444	4818
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	120	4830
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	429	4842
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	169	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	443	4866
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	323	4878
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	325	4890
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	277	4902
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	230	4914
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	478	4926
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	178	4938
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	468	4950
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	310	4962
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	317	4974
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	333	4986
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	493	4998
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	460	5010
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	207	5022
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	249	5034
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	265	5046
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	480	5058
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	83	5070
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	136	5080
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	353	5092
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	172	5104
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	214	5116
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	462	5128
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	233	5140
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	406	5152
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	133	5164
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	175	5176
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	189	5188
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	454	5200
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	375	5212
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	401	5224
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	421	5236
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	407	5248
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	384	5260
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	256	5272
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	26	5284
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	134	5294
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	67	5306
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	384	5316
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	379	5328
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	18	5340
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	462	5350
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	492	5362
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	100	5374
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	298	5386
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	9	5398
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	341	5406
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	498	5418
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	146	5430
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	458	5442
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	362	5454
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	186	5466
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	285	5478
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	348	5490
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	167	5502
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	18	5514
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	273	5524
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	183	5536
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	281	5548
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	344	5560
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	97	5572
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	469	5582
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	315	5594
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	84	5606
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	28	5616
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	37	5626
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	448	5636
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	152	5648
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	348	5660
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	307	5672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	194	5684
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	414	5696
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	477	5708
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	222	5720
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	126	5732
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	90	5744
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	169	5754
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	403	5766
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	400	5778
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	200	5790
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	97	5802
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/virtual_column-1-3ebad682d4ff6ca9c806db3471cf3945 b/sql/hive/src/test/resources/golden/virtual_column-1-3ebad682d4ff6ca9c806db3471cf3945
deleted file mode 100644
index df07a9da29f01..0000000000000
--- a/sql/hive/src/test/resources/golden/virtual_column-1-3ebad682d4ff6ca9c806db3471cf3945
+++ /dev/null
@@ -1,309 +0,0 @@
-0	3
-2	1
-4	1
-5	3
-8	1
-9	1
-10	1
-11	1
-12	2
-15	2
-17	1
-18	2
-19	1
-20	1
-24	2
-26	2
-27	1
-28	1
-30	1
-33	1
-34	1
-35	3
-37	2
-41	1
-42	2
-43	1
-44	1
-47	1
-51	2
-53	1
-54	1
-57	1
-58	2
-64	1
-65	1
-66	1
-67	2
-69	1
-70	3
-72	2
-74	1
-76	2
-77	1
-78	1
-80	1
-82	1
-83	2
-84	2
-85	1
-86	1
-87	1
-90	3
-92	1
-95	2
-96	1
-97	2
-98	2
-100	2
-103	2
-104	2
-105	1
-111	1
-113	2
-114	1
-116	1
-118	2
-119	3
-120	2
-125	2
-126	1
-128	3
-129	2
-131	1
-133	1
-134	2
-136	1
-137	2
-138	4
-143	1
-145	1
-146	2
-149	2
-150	1
-152	2
-153	1
-155	1
-156	1
-157	1
-158	1
-160	1
-162	1
-163	1
-164	2
-165	2
-166	1
-167	3
-168	1
-169	4
-170	1
-172	2
-174	2
-175	2
-176	2
-177	1
-178	1
-179	2
-180	1
-181	1
-183	1
-186	1
-187	3
-189	1
-190	1
-191	2
-192	1
-193	3
-194	1
-195	2
-196	1
-197	2
-199	3
-200	2
-201	1
-202	1
-203	2
-205	2
-207	2
-208	3
-209	2
-213	2
-214	1
-216	2
-217	2
-218	1
-219	2
-221	2
-222	1
-223	2
-224	2
-226	1
-228	1
-229	2
-230	5
-233	2
-235	1
-237	2
-238	2
-239	2
-241	1
-242	2
-244	1
-247	1
-248	1
-249	1
-252	1
-255	2
-256	2
-257	1
-258	1
-260	1
-262	1
-263	1
-265	2
-266	1
-272	2
-273	3
-274	1
-275	1
-277	4
-278	2
-280	2
-281	2
-282	2
-283	1
-284	1
-285	1
-286	1
-287	1
-288	2
-289	1
-291	1
-292	1
-296	1
-298	3
-302	1
-305	1
-306	1
-307	2
-308	1
-309	2
-310	1
-311	3
-315	1
-316	3
-317	2
-318	3
-321	2
-322	2
-323	1
-325	2
-327	3
-331	2
-332	1
-333	2
-335	1
-336	1
-338	1
-339	1
-341	1
-342	2
-344	2
-345	1
-348	5
-351	1
-353	2
-356	1
-360	1
-362	1
-364	1
-365	1
-366	1
-367	2
-368	1
-369	3
-373	1
-374	1
-375	1
-377	1
-378	1
-379	1
-382	2
-384	3
-386	1
-389	1
-392	1
-393	1
-394	1
-395	2
-396	3
-397	2
-399	2
-400	1
-401	5
-402	1
-403	3
-404	2
-406	4
-407	1
-409	3
-411	1
-413	2
-414	2
-417	3
-418	1
-419	1
-421	1
-424	2
-427	1
-429	2
-430	3
-431	3
-432	1
-435	1
-436	1
-437	1
-438	3
-439	2
-443	1
-444	1
-446	1
-448	1
-449	1
-452	1
-453	1
-454	3
-455	1
-457	1
-458	2
-459	2
-460	1
-462	2
-463	2
-466	3
-467	1
-468	4
-469	5
-470	1
-472	1
-475	1
-477	1
-478	2
-479	1
-480	3
-481	1
-482	1
-483	1
-484	1
-485	1
-487	1
-489	4
-490	1
-491	1
-492	2
-493	1
-494	1
-495	1
-496	1
-497	1
-498	3
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/virtual_column-10-2915b222a58bc994246591e536d388b4 b/sql/hive/src/test/resources/golden/virtual_column-10-2915b222a58bc994246591e536d388b4
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/virtual_column-2-1536b365fe0a94b30a62364996529966 b/sql/hive/src/test/resources/golden/virtual_column-2-1536b365fe0a94b30a62364996529966
deleted file mode 100644
index 2a0cdc268347a..0000000000000
--- a/sql/hive/src/test/resources/golden/virtual_column-2-1536b365fe0a94b30a62364996529966
+++ /dev/null
@@ -1,309 +0,0 @@
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	0	[968,2632,2088]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	2	[4004]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	4	[1218]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	5	[4540,3864,3060]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	8	[1916]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	9	[5398]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	10	[2846]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	11	[3170]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	12	[4362,1720]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	15	[386,2770]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	17	[910]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	18	[5514,5340]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	19	[2824]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	20	[1118]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	24	[1972,4594]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	26	[5284,2226]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	27	[34]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	28	[5616]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	30	[3494]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	33	[3592]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	34	[3192]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	35	[1238,4012,3138]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	37	[328,5626]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	41	[3388]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	42	[3298,2030]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	43	[2330]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	44	[4068]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	47	[1198]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	51	[1462,2308]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	53	[4186]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	54	[1440]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	57	[1024]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	58	[3128,1906]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	64	[3516]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	65	[1592]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	66	[198]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	67	[5306,1754]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	69	[3570]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	70	[3794,4640,4548]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	72	[2792,1208]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	74	[3548]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	76	[3538,3378]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	77	[2622]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	78	[3368]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	80	[4058]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	82	[396]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	83	[5070,1674]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	84	[5606,1872]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	85	[2612]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	86	[12]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	87	[2652]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	90	[2802,5744,4304]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	92	[1176]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	95	[2400,3160]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	96	[2216]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	97	[5572,5802]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	98	[2458,92]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	100	[5374,2156]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	103	[3614,1484]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	104	[4628,4114]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	105	[4196]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	111	[1186]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	113	[920,3638]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	114	[4280]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	116	[3746]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	118	[2780,2686]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	119	[2064,4674,3332]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	120	[2284,4830]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	125	[4468,1344]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	126	[5732]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	128	[3896,988,208]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	129	[2040,1094]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	131	[2296]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	133	[5164]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	134	[2698,5294]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	136	[5080]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	137	[2552,1650]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	138	[2734,1848,1472,3470]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	143	[3226]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	145	[304]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	146	[232,5430]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	149	[3422,1058]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	150	[150]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	152	[280,5648]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	153	[502]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	155	[932]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	156	[2352]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	157	[1140]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	158	[2052]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	160	[3274]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	162	[754]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	163	[4650]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	164	[4492,4408]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	165	[2236,44]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	166	[418]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	167	[5502,874,3686]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	168	[3180]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	169	[5754,1308,2588,4854]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	170	[1106]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	172	[2018,5104]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	174	[682,598]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	175	[5176,4150]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	176	[1428,1556]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	177	[3036]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	178	[4938]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	179	[2674,2006]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	180	[1696]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	181	[1742]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	183	[5536]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	186	[5466]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	187	[2492,1416,4516]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	189	[5188]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	190	[4244]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	191	[3852,2192]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	192	[1392]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	193	[514,126,4078]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	194	[5684]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	195	[3286,886]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	196	[2410]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	197	[2480,2108]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	199	[646,2180,4806]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	200	[4698,5790]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	201	[4384]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	202	[3932]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	203	[4314,944]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	205	[1046,2272]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	207	[5022,634]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	208	[670,1948,1272]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	209	[3504,374]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	213	[220,1508]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	214	[5116]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	216	[1520,3650]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	217	[4396,1860]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	218	[3446]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	219	[478,3710]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	221	[1580,1164]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	222	[5720]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	223	[3398,3758]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	224	[174,2892]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	226	[3048]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	228	[3458]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	229	[3202,3956]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	230	[1730,1936,4914,2260,3580]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	233	[5140,3214]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	235	[4046]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	237	[4722,574]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	238	[0,2746]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	239	[1496,3722]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	241	[1662]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	242	[2940,3012]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	244	[3872]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	247	[718]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	248	[4758]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	249	[5034]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	252	[454]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	255	[68,4616]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	256	[5272,3770]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	257	[4208]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	258	[4292]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	260	[1764]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	262	[4326]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	263	[3782]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	265	[114,5046]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	266	[814]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	272	[1836,2976]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	273	[2868,5524,162]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	274	[3698]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	275	[1638]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	277	[4902,1260,2856,362]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	278	[1544,80]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	280	[3992,1226]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	281	[350,5548]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	282	[2468,2722]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	283	[4022]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	284	[1708]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	285	[5478]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	286	[1404]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	287	[490]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	288	[2422,3840]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	289	[1568]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	291	[4582]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	292	[466]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	296	[3626]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	298	[5386,4456,2168]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	302	[1034]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	305	[4782]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	306	[2880]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	307	[2812,5672]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	308	[2388]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	309	[790,2904]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	310	[4962]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	311	[1000,1626,22]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	315	[5594]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	316	[3944,2576,1012]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	317	[3104,4974]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	318	[2504,1602,2516]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	321	[4090,3308]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	322	[3250,2096]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	323	[4878]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	325	[4890,862]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	327	[2928,338,2248]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	331	[2988,4034]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	332	[1614]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	333	[1684,4986]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	335	[4102]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	336	[3148]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	338	[526]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	339	[956]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	341	[5406]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	342	[3558,838]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	344	[3674,5560]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	345	[1082]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	348	[5660,1882,1960,4338,5490]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	351	[4604]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	353	[1812,5092]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	356	[1284]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	360	[4746]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	362	[5454]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	364	[2662]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	365	[802]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	366	[4138]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	367	[850,3662]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	368	[3602]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	369	[186,2564,2952]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	373	[1824]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	374	[268]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	375	[5212]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	377	[766]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	378	[1152]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	379	[5328]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	382	[1320,4528]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	384	[5316,5260,1788]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	386	[1356]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	389	[2916]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	392	[2964]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	393	[2132]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	394	[562]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	395	[3116,2710]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	396	[4372,706,3092]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	397	[4558,778]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	399	[694,1296]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	400	[5778]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	401	[138,3000,3828,4268,5224]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	402	[3080]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	403	[5766,4162,406]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	404	[1776,2318]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	406	[244,4220,4256,5152]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	407	[5248]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	409	[4232,56,2528]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	411	[1924]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	413	[610,2600]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	414	[5696,4686]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	417	[730,4794,430]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	418	[2204]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	419	[2758]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	421	[5236]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	424	[4350,4504]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	427	[1248]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	429	[4842,256]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	430	[442,1532,3320]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	431	[4420,1994,4480]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	432	[3920]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	435	[2834]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	436	[2340]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	437	[1368]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	438	[3884,4662,1070]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	439	[4734,826]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	443	[4866]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	444	[4818]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	446	[538]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	448	[5636]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	449	[3434]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	452	[3024]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	453	[3482]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	454	[2144,4432,5200]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	455	[976]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	457	[2446]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	458	[3356,5442]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	459	[550,1450]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	460	[5010]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	462	[5350,5128]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	463	[1982,3980]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	466	[658,1894,4126]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	467	[3908]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	468	[3526,4950,2120,2376]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	469	[292,3968,1380,5582,2364]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	470	[2540]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	472	[3238]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	475	[898]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	477	[5708]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	478	[4444,4926]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	479	[4770]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	480	[4570,5058,3816]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	481	[2434]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	482	[586]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	483	[4174]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	484	[102]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	485	[3734]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	487	[3804]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	489	[1128,1800,3344,742]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	490	[2640]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	491	[4710]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	492	[5362,3410]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	493	[4998]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	494	[622]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	495	[316]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	496	[2076]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	497	[3068]
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src/kv1.txt	498	[5418,3262,1332]
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/virtual_column-3-c66776673c986b59b27e704664935988 b/sql/hive/src/test/resources/golden/virtual_column-3-c66776673c986b59b27e704664935988
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/virtual_column-4-e47094c927b1091e31c185db0a4e69a6 b/sql/hive/src/test/resources/golden/virtual_column-4-e47094c927b1091e31c185db0a4e69a6
deleted file mode 100644
index 94754d8bd4c82..0000000000000
--- a/sql/hive/src/test/resources/golden/virtual_column-4-e47094c927b1091e31c185db0a4e69a6
+++ /dev/null
@@ -1 +0,0 @@
-97	val_97
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/virtual_column-5-d137fa7c27bc98d5f1a33f666a07f6b7 b/sql/hive/src/test/resources/golden/virtual_column-5-d137fa7c27bc98d5f1a33f666a07f6b7
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/virtual_column-6-68d6973677af5c9f1f1f49360c3175e7 b/sql/hive/src/test/resources/golden/virtual_column-6-68d6973677af5c9f1f1f49360c3175e7
deleted file mode 100644
index c227083464fb9..0000000000000
--- a/sql/hive/src/test/resources/golden/virtual_column-6-68d6973677af5c9f1f1f49360c3175e7
+++ /dev/null
@@ -1 +0,0 @@
-0
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/virtual_column-7-20d7d672a4289fbd1a5de485a8353ac6 b/sql/hive/src/test/resources/golden/virtual_column-7-20d7d672a4289fbd1a5de485a8353ac6
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/golden/virtual_column-8-2b312255c5d1dcf9a617b1ebfd8a00f7 b/sql/hive/src/test/resources/golden/virtual_column-8-2b312255c5d1dcf9a617b1ebfd8a00f7
deleted file mode 100644
index e6902f60ff486..0000000000000
--- a/sql/hive/src/test/resources/golden/virtual_column-8-2b312255c5d1dcf9a617b1ebfd8a00f7
+++ /dev/null
@@ -1,500 +0,0 @@
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	0	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	0	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	0	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	2	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	4	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	5	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	5	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	5	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	8	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	9	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	10	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	11	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	12	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	12	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	15	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	15	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	17	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	18	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	18	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	19	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	20	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	24	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	24	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	26	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	26	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	27	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	28	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	30	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	33	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	34	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	35	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	35	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	35	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	37	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	37	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	41	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	42	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	42	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	43	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	44	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	47	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	51	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	51	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	53	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	54	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	57	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	58	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	58	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	64	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	65	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	66	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	67	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	67	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	69	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	70	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	70	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	70	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	72	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	72	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	74	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	76	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	76	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	77	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	78	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	80	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	82	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	83	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	83	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	84	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	84	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	85	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	86	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	87	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	90	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	90	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	90	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	92	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	95	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	95	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	96	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	97	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	97	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	98	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	98	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	100	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	100	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	103	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	103	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	104	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	104	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	105	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	111	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	113	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	113	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	114	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	116	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	118	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	118	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	119	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	119	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	119	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	120	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	120	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	125	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	125	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	126	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	128	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	128	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	128	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	129	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	129	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	131	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	133	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	134	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	134	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	136	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	137	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	137	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	138	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	138	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	138	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	138	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	143	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	145	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	146	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	146	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	149	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	149	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	150	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	152	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	152	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	153	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	155	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	156	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	157	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	158	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	160	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	162	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	163	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	164	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	164	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	165	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	165	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	166	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	167	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	167	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	167	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	168	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	169	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	169	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	169	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	169	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	170	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	172	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	172	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	174	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	174	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	175	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	175	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	176	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	176	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	177	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	178	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	179	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	179	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	180	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	181	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	183	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	186	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	187	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	187	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	187	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	189	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	190	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	191	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	191	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	192	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	193	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	193	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	193	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	194	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	195	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	195	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	196	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	197	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	197	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	199	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	199	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	199	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	200	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	200	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	201	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	202	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	203	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	203	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	205	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	205	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	207	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	207	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	208	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	208	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	208	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	209	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	209	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	213	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	213	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	214	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	216	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	216	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	217	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	217	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	218	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	219	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	219	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	221	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	221	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	222	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	223	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	223	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	224	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	224	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	226	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	228	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	229	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	229	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	230	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	230	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	230	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	230	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	230	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	233	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	233	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	235	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	237	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	237	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	238	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	238	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	239	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	239	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	241	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	242	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	242	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	244	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	247	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	248	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	249	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	252	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	255	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	255	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	256	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	256	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	257	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	258	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	260	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	262	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	263	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	265	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	265	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	266	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	272	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	272	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	273	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	273	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	273	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	274	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	275	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	277	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	277	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	277	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	277	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	278	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	278	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	280	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	280	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	281	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	281	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	282	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	282	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	283	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	284	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	285	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	286	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	287	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	288	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	288	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	289	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	291	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	292	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	296	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	298	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	298	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	298	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	302	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	305	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	306	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	307	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	307	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	308	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	309	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	309	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	310	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	311	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	311	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	311	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	315	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	316	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	316	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	316	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	317	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	317	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	318	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	318	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	318	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	321	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	321	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	322	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	322	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	323	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	325	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	325	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	327	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	327	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	327	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	331	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	331	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	332	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	333	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	333	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	335	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	336	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	338	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	339	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	341	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	342	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	342	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	344	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	344	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	345	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	348	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	348	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	348	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	348	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	348	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	351	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	353	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	353	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	356	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	360	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	362	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	364	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	365	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	366	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	367	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	367	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	368	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	369	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	369	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	369	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	373	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	374	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	375	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	377	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	378	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	379	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	382	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	382	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	384	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	384	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	384	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	386	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	389	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	392	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	393	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	394	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	395	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	395	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	396	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	396	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	396	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	397	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	397	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	399	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	399	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	400	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	401	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	401	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	401	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	401	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	401	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	402	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	403	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	403	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	403	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	404	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	404	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	406	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	406	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	406	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	406	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	407	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	409	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	409	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	409	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	411	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	413	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	413	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	414	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	414	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	417	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	417	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	417	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	418	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	419	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	421	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	424	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	424	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	427	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	429	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	429	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	430	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	430	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	430	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	431	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	431	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	431	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	432	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	435	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	436	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	437	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	438	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	438	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	438	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	439	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	439	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	443	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	444	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	446	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	448	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	449	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	452	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	453	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	454	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	454	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	454	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	455	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	457	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	458	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	458	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	459	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	459	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	460	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	462	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	462	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	463	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	463	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	466	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	466	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	466	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	467	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	468	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	468	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	468	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	468	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	469	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	469	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	469	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	469	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	469	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	470	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	472	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	475	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	477	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	478	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	478	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	479	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	480	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	480	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	480	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	481	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	482	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	483	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	484	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	485	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	487	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	489	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	489	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	489	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	489	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	490	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	491	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	492	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	492	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	493	3672
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	494	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	495	56
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	496	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	497	2449
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	498	1249
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	498	4854
-file:/private/var/folders/36/cjkbrr953xg2p_krwrmn8h_r0000gn/T/sharkWarehouse1091803796917701818/src_index_test_rc/000000_0	498	2449
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/golden/virtual_column-9-c152da33c1517ecfc336f72b9c133d5 b/sql/hive/src/test/resources/golden/virtual_column-9-c152da33c1517ecfc336f72b9c133d5
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/hive/src/test/resources/log4j.properties b/sql/hive/src/test/resources/log4j.properties
index 9fdb526d945e0..5bc08062d30eb 100644
--- a/sql/hive/src/test/resources/log4j.properties
+++ b/sql/hive/src/test/resources/log4j.properties
@@ -42,6 +42,12 @@ log4j.logger.org.apache.hadoop.hive.serde2.lazy.LazyStruct=OFF
 log4j.additivity.org.apache.hadoop.hive.metastore.RetryingHMSHandler=false
 log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=OFF
 
+log4j.additivity.hive.log=false
+log4j.logger.hive.log=OFF
+
+log4j.additivity.parquet.hadoop.ParquetRecordReader=false
+log4j.logger.parquet.hadoop.ParquetRecordReader=OFF
+
 log4j.additivity.hive.ql.metadata.Hive=false
 log4j.logger.hive.ql.metadata.Hive=OFF
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs.q
new file mode 100644
index 0000000000000..c640ca148b70b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs.q
@@ -0,0 +1,43 @@
+
+select
+   cdouble
+  ,Round(cdouble, 2)
+  ,Floor(cdouble)
+  ,Ceil(cdouble)
+  ,Rand(98007) as rnd
+  ,Exp(ln(cdouble))
+  ,Ln(cdouble)  
+  ,Ln(cfloat)
+  ,Log10(cdouble)
+  -- Use log2 as a representative function to test all input types.
+  ,Log2(cdouble)
+  ,Log2(cfloat)
+  ,Log2(cbigint)
+  ,Log2(cint)
+  ,Log2(csmallint)
+  ,Log2(ctinyint)
+  ,Log(2.0, cdouble)
+  ,Pow(log2(cdouble), 2.0)  
+  ,Power(log2(cdouble), 2.0)
+  ,Sqrt(cdouble)
+  ,Sqrt(cbigint)
+  ,Bin(cbigint)
+  ,Hex(cdouble)
+  ,Conv(cbigint, 10, 16)
+  ,Abs(cdouble)
+  ,Abs(ctinyint)
+  ,Pmod(cint, 3)
+  ,Sin(cdouble)
+  ,Asin(cdouble)
+  ,Cos(cdouble)
+  ,ACos(cdouble)
+  ,Atan(cdouble)
+  ,Degrees(cdouble)
+  ,Radians(cdouble)
+  ,Positive(cdouble)
+  ,Positive(cbigint)
+  ,Negative(cdouble)
+  ,Sign(cdouble)
+  ,Sign(cbigint)
+from alltypesorc order by rnd limit 400;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs_00.qv b/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs_00.qv
new file mode 100644
index 0000000000000..51f231008f6d2
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs_00.qv
@@ -0,0 +1 @@
+SET hive.vectorized.execution.enabled = false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs_01.qv b/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs_01.qv
new file mode 100644
index 0000000000000..18e02dc854baf
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientcompare/vectorized_math_funcs_01.qv
@@ -0,0 +1 @@
+SET hive.vectorized.execution.enabled = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_concatenate_indexed_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_concatenate_indexed_table.q
index 4881757a4613a..4193315d30043 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_concatenate_indexed_table.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_concatenate_indexed_table.q
@@ -1,9 +1,9 @@
 set hive.exec.concatenate.check.index=true;
 create table src_rc_concatenate_test(key int, value string) stored as rcfile;
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_concatenate_test;
-load data local inpath '../data/files/smbbucket_2.rc' into table src_rc_concatenate_test;
-load data local inpath '../data/files/smbbucket_3.rc' into table src_rc_concatenate_test;
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_concatenate_test;
+load data local inpath '../../data/files/smbbucket_2.rc' into table src_rc_concatenate_test;
+load data local inpath '../../data/files/smbbucket_3.rc' into table src_rc_concatenate_test;
 
 show table extended like `src_rc_concatenate_test`;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_invalidspec.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_invalidspec.q
index 5f9d5ef9ca09e..8cbb25cfa9725 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_invalidspec.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_invalidspec.q
@@ -2,7 +2,7 @@
 create table if not exists alter_part_invalidspec(key string, value string ) partitioned by (year string, month string) stored as textfile ;
 
 -- Load data
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_invalidspec partition (year='1996', month='10');
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_invalidspec partition (year='1996', month='12');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_invalidspec partition (year='1996', month='10');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_invalidspec partition (year='1996', month='12');
 
 alter table alter_part_invalidspec partition (year='1997') enable no_drop;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_nodrop.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_nodrop.q
index 92af30b6b5a15..3c0ff02b1ac1d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_nodrop.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_nodrop.q
@@ -2,8 +2,8 @@
 create table if not exists alter_part_nodrop_part(key string, value string ) partitioned by (year string, month string) stored as textfile ;
 
 -- Load data
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_nodrop_part partition (year='1996', month='10');
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_nodrop_part partition (year='1996', month='12');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_nodrop_part partition (year='1996', month='10');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_nodrop_part partition (year='1996', month='12');
 
 alter table alter_part_nodrop_part partition (year='1996') enable no_drop;
 alter table alter_part_nodrop_part drop partition (year='1996');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_nodrop_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_nodrop_table.q
index 135411fd320ef..f2135b1aa02e4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_nodrop_table.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_nodrop_table.q
@@ -2,8 +2,8 @@
 create table if not exists alter_part_nodrop_table(key string, value string ) partitioned by (year string, month string) stored as textfile ;
 
 -- Load data
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_nodrop_table partition (year='1996', month='10');
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_nodrop_table partition (year='1996', month='12');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_nodrop_table partition (year='1996', month='10');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_nodrop_table partition (year='1996', month='12');
 
 alter table alter_part_nodrop_table partition (year='1996') enable no_drop;
 drop table alter_part_nodrop_table;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_offline.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_offline.q
index 899145deaf8c5..7376d8bfe4a74 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_offline.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_partition_offline.q
@@ -2,8 +2,8 @@
 create table if not exists alter_part_offline (key string, value string ) partitioned by (year string, month string) stored as textfile ;
 
 -- Load data
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_offline partition (year='1996', month='10');
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_offline partition (year='1996', month='12');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_offline partition (year='1996', month='10');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_offline partition (year='1996', month='12');
 
 alter table alter_part_offline partition (year='1996') disable offline;
 select * from alter_part_offline where year = '1996';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure.q
index 26ba2878903fe..be971f1849869 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure.q
@@ -1,5 +1,5 @@
 create table alter_rename_partition_src ( col1 string ) stored as textfile ;
-load data local inpath '../data/files/test.dat' overwrite into table alter_rename_partition_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table alter_rename_partition_src ;
 create table alter_rename_partition ( col1 string ) partitioned by (pcol1 string , pcol2 string) stored as sequencefile;
 insert overwrite table alter_rename_partition partition (pCol1='old_part1:', pcol2='old_part2:') select col1 from alter_rename_partition_src ;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure2.q
index 6e51c2f7629c9..4babdda2dbe2a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure2.q
@@ -1,5 +1,5 @@
 create table alter_rename_partition_src ( col1 string ) stored as textfile ;
-load data local inpath '../data/files/test.dat' overwrite into table alter_rename_partition_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table alter_rename_partition_src ;
 create table alter_rename_partition ( col1 string ) partitioned by (pcol1 string , pcol2 string) stored as sequencefile;
 insert overwrite table alter_rename_partition partition (pCol1='old_part1:', pcol2='old_part2:') select col1 from alter_rename_partition_src ;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure3.q
index 2d4ce0b9f6051..3af807ef61217 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/alter_rename_partition_failure3.q
@@ -1,5 +1,5 @@
 create table alter_rename_partition_src ( col1 string ) stored as textfile ;
-load data local inpath '../data/files/test.dat' overwrite into table alter_rename_partition_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table alter_rename_partition_src ;
 create table alter_rename_partition ( col1 string ) partitioned by (pcol1 string , pcol2 string) stored as sequencefile;
 insert overwrite table alter_rename_partition partition (pCol1='old_part1:', pcol2='old_part2:') select col1 from alter_rename_partition_src ;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ambiguous_col1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ambiguous_col1.q
index fdf20f850e8f8..9e8bcbd1bbf78 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ambiguous_col1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ambiguous_col1.q
@@ -1,2 +1,3 @@
+set hive.support.quoted.identifiers=none;
 -- TOK_TABLE_OR_COL
 explain select * from (select `.*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ambiguous_col2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ambiguous_col2.q
index de59bc579a0fe..33d4aed3cd9ad 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ambiguous_col2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ambiguous_col2.q
@@ -1,2 +1,3 @@
+set hive.support.quoted.identifiers=none;
 -- DOT
 explain select * from (select a.`[kv].*`, b.`[kv].*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/analyze_non_existent_tbl.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/analyze_non_existent_tbl.q
new file mode 100644
index 0000000000000..78a97019f192e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/analyze_non_existent_tbl.q
@@ -0,0 +1 @@
+analyze table nonexistent compute statistics;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/archive_corrupt.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/archive_corrupt.q
index bea25391628a6..130b37b5c9d52 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/archive_corrupt.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/archive_corrupt.q
@@ -14,5 +14,5 @@ create table tstsrcpart like srcpart;
 -- to be thrown during the LOAD step. This former behavior is tested
 -- in clientpositive/archive_corrupt.q
 
-load data local inpath '../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11');
+load data local inpath '../../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11');
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_addjar.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_addjar.q
new file mode 100644
index 0000000000000..a1709dae5f5b8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_addjar.q
@@ -0,0 +1,7 @@
+set hive.security.authorization.enabled=true;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory;
+
+-- running a sql query to initialize the authorization - not needed in real HS2 mode
+show tables;
+
+add jar ${system:maven.local.repository}/org/apache/hive/hcatalog/hive-hcatalog-core/${system:hive.version}/hive-hcatalog-core-${system:hive.version}.jar;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_addpartition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_addpartition.q
new file mode 100644
index 0000000000000..8abdd2b3cde8b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_addpartition.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+set user.name=user1;
+-- check add partition without insert privilege
+create table tpart(i int, j int) partitioned by (k string);         
+
+set user.name=user2;
+alter table tpart add partition (k = 'abc');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_alter_db_owner.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_alter_db_owner.q
new file mode 100644
index 0000000000000..f716262e23bbb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_alter_db_owner.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if alter table owner fails 
+-- for now, alter db owner is allowed only for admin
+
+create database dbao;
+alter database dbao set owner user user2;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_alter_db_owner_default.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_alter_db_owner_default.q
new file mode 100644
index 0000000000000..f9049350180ee
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_alter_db_owner_default.q
@@ -0,0 +1,8 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if alter table owner fails
+alter database default set owner user user1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_all_role.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_all_role.q
new file mode 100644
index 0000000000000..de91e91923308
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_all_role.q
@@ -0,0 +1,6 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+create role all;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_default_role.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_default_role.q
new file mode 100644
index 0000000000000..42a42f65b28a9
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_default_role.q
@@ -0,0 +1,6 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+create role default;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_none_role.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_none_role.q
new file mode 100644
index 0000000000000..0d14cde6d5460
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_cannot_create_none_role.q
@@ -0,0 +1,6 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+create role None;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_caseinsensitivity.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_caseinsensitivity.q
new file mode 100644
index 0000000000000..d5ea284f14749
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_caseinsensitivity.q
@@ -0,0 +1,17 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+
+create role testrole;
+show roles;
+drop role TESTROLE;
+show roles;
+create role TESTROLE;
+show roles;
+grant role testROLE to user hive_admin_user;
+set role testrolE;
+set role adMin;
+show roles;
+create role TESTRoLE;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_func1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_func1.q
new file mode 100644
index 0000000000000..02bbe090cfba7
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_func1.q
@@ -0,0 +1,7 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=hive_test_user;
+
+-- permanent function creation should fail for non-admin roles
+create function perm_fn as 'org.apache.hadoop.hive.ql.udf.UDFAscii';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_func2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_func2.q
new file mode 100644
index 0000000000000..8760fa8d82259
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_func2.q
@@ -0,0 +1,8 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=hive_test_user;
+
+-- temp function creation should fail for non-admin roles
+create temporary function temp_fn as 'org.apache.hadoop.hive.ql.udf.UDFAscii';
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_macro1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_macro1.q
new file mode 100644
index 0000000000000..c904a100c515c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_macro1.q
@@ -0,0 +1,8 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=hive_test_user;
+
+-- temp macro creation should fail for non-admin roles
+create temporary macro mymacro1(x double) x * x;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_role_no_admin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_role_no_admin.q
new file mode 100644
index 0000000000000..a84fe64bd618e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_create_role_no_admin.q
@@ -0,0 +1,3 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+-- this test will fail because hive_test_user is not in admin role.
+create role r1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_createview.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_createview.q
new file mode 100644
index 0000000000000..9b1f2ea6c6acb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_createview.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+-- check create view without select privileges
+create table t1(i int);
+set user.name=user1;
+create view v1 as select * from t1;
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_ctas.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_ctas.q
new file mode 100644
index 0000000000000..1cf74a365d79e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_ctas.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+-- check query without select privilege fails
+create table t1(i int);
+
+set user.name=user1;
+create table t2 as select * from t1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_desc_table_nosel.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_desc_table_nosel.q
new file mode 100644
index 0000000000000..47663c9bb93e3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_desc_table_nosel.q
@@ -0,0 +1,14 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if alter table fails as different user
+create table t1(i int);
+desc t1;
+
+grant all on table t1 to user user2;
+revoke select on table t1 from user user2;
+
+set user.name=user2;
+desc t1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_dfs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_dfs.q
new file mode 100644
index 0000000000000..7d47a7b64967b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_dfs.q
@@ -0,0 +1,7 @@
+set hive.security.authorization.enabled=true;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory;
+
+-- running a sql query to initialize the authorization - not needed in real HS2 mode
+show tables;
+dfs -ls ${system:test.tmp.dir}/
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_disallow_transform.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_disallow_transform.q
new file mode 100644
index 0000000000000..64b300c8d9b2f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_disallow_transform.q
@@ -0,0 +1,3 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set role ALL;
+SELECT TRANSFORM (*) USING 'cat' AS (key, value) FROM src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_db_cascade.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_db_cascade.q
new file mode 100644
index 0000000000000..edeae9b71d7ac
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_db_cascade.q
@@ -0,0 +1,22 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- ensure that drop database cascade works
+create database dba1;
+create table dba1.tab1(i int);
+drop database dba1 cascade;
+
+-- check if drop database fails if the db has a table for which user does not have permission
+create database dba2;
+create table dba2.tab2(i int);
+
+set user.name=hive_admin_user;
+set role ADMIN;
+alter database dba2 set owner user user2;
+
+set user.name=user2;
+show current roles;
+drop database dba2 cascade ;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_db_empty.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_db_empty.q
new file mode 100644
index 0000000000000..46d4d0f92c8e3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_db_empty.q
@@ -0,0 +1,27 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if changing owner and dropping as other user works
+create database dba1;
+
+set user.name=hive_admin_user;
+set role ADMIN;
+alter database dba1 set owner user user2;
+
+set user.name=user2;
+show current roles;
+drop database dba1;
+
+
+set user.name=user1;
+-- check if dropping db as another user fails
+show current roles;
+create database dba2;
+
+set user.name=user2;
+show current roles;
+
+drop database dba2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_role_no_admin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_role_no_admin.q
new file mode 100644
index 0000000000000..a7aa17f5abfcf
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_drop_role_no_admin.q
@@ -0,0 +1,10 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+show current roles;
+create role r1;
+set role ALL;
+show current roles;
+drop role r1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_droppartition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_droppartition.q
new file mode 100644
index 0000000000000..f05e9458fa804
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_droppartition.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/authz_drop_part_1;
+
+-- check drop partition without delete privilege
+create table tpart(i int, j int) partitioned by (k string);
+alter table tpart add partition (k = 'abc') location 'file:${system:test.tmp.dir}/authz_drop_part_1' ;
+set user.name=user1;
+alter table tpart drop partition (k = 'abc');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_fail_create_db.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_fail_create_db.q
new file mode 100644
index 0000000000000..d969e39027e99
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_fail_create_db.q
@@ -0,0 +1,5 @@
+set hive.security.authorization.enabled=true;
+
+create database db_to_fail;
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_fail_drop_db.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_fail_drop_db.q
new file mode 100644
index 0000000000000..87719b0043e2e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_fail_drop_db.q
@@ -0,0 +1,5 @@
+set hive.security.authorization.enabled=false;
+create database db_fail_to_drop;
+set hive.security.authorization.enabled=true;
+
+drop database db_fail_to_drop;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_allpriv.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_allpriv.q
new file mode 100644
index 0000000000000..f3c86b97ce76f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_allpriv.q
@@ -0,0 +1,14 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE table_priv_allf(i int);
+
+-- grant insert to user2 WITH grant option
+GRANT INSERT ON table_priv_allf TO USER user2 with grant option;
+
+set user.name=user2;
+-- try grant all to user3, without having all privileges
+GRANT ALL ON table_priv_allf TO USER user3;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_dup.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_dup.q
new file mode 100644
index 0000000000000..7808cb3ec7b39
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_dup.q
@@ -0,0 +1,16 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE  tauth_gdup(i int);
+
+-- It should be possible to revert owners privileges
+revoke SELECT ON tauth_gdup from user user1;
+
+show grant user user1 on table tauth_gdup;
+
+-- Owner already has all privileges granted, another grant would become duplicate
+-- and result in error
+GRANT INSERT ON tauth_gdup TO USER user1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_fail1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_fail1.q
new file mode 100644
index 0000000000000..8dc8e45a79075
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_fail1.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE  table_priv_gfail1(i int);
+
+set user.name=user2;
+-- try grant insert to user3 as user2
+GRANT INSERT ON table_priv_gfail1 TO USER user3;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_fail_nogrant.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_fail_nogrant.q
new file mode 100644
index 0000000000000..d51c1c3507eef
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_grant_table_fail_nogrant.q
@@ -0,0 +1,14 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE table_priv_gfail1(i int);
+
+-- grant insert to user2 WITHOUT grant option
+GRANT INSERT ON table_priv_gfail1 TO USER user2;
+
+set user.name=user2;
+-- try grant insert to user3
+GRANT INSERT ON table_priv_gfail1 TO USER user3;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_insert_noinspriv.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_insert_noinspriv.q
new file mode 100644
index 0000000000000..2fa3cb260b07e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_insert_noinspriv.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+-- check insert without select priv
+create table t1(i int);
+
+set user.name=user1;
+create table user2tab(i int);
+insert into table t1 select * from user2tab;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_insert_noselectpriv.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_insert_noselectpriv.q
new file mode 100644
index 0000000000000..b9bee4ea40d40
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_insert_noselectpriv.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+-- check insert without select priv
+create table t1(i int);
+
+set user.name=user1;
+create table t2(i int);
+insert into table t2 select * from t1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_invalid_priv_v1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_invalid_priv_v1.q
new file mode 100644
index 0000000000000..2a1da23daeb18
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_invalid_priv_v1.q
@@ -0,0 +1,6 @@
+create table if not exists authorization_invalid_v1 (key int, value string);
+grant delete on table authorization_invalid_v1 to user hive_test_user;
+drop table authorization_invalid_v1;
+
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_invalid_priv_v2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_invalid_priv_v2.q
new file mode 100644
index 0000000000000..9c724085d9901
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_invalid_priv_v2.q
@@ -0,0 +1,5 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+create table if not exists authorization_invalid_v2 (key int, value string);
+grant index on table authorization_invalid_v2 to user hive_test_user;
+drop table authorization_invalid_v2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_alter_tab_rename.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_alter_tab_rename.q
new file mode 100644
index 0000000000000..8a3300cb2e378
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_alter_tab_rename.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if alter table fails as different user
+create table t1(i int);
+
+set user.name=user2;
+alter table t1 rename to tnew1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_alter_tab_serdeprop.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_alter_tab_serdeprop.q
new file mode 100644
index 0000000000000..0172c4c74c82d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_alter_tab_serdeprop.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if alter table fails as different user
+create table t1(i int);
+
+set user.name=user2;
+ALTER TABLE t1 SET SERDEPROPERTIES ('field.delim' = ',');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_drop_tab.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_drop_tab.q
new file mode 100644
index 0000000000000..2d0e52da008d8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_drop_tab.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if create table fails as different user
+create table t1(i int);
+
+set user.name=user2;
+drop table t1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_drop_view.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_drop_view.q
new file mode 100644
index 0000000000000..76bbab42b3750
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_not_owner_drop_view.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if create table fails as different user
+create table t1(i int);
+create view vt1 as select * from t1;
+
+set user.name=user2;
+drop view vt1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_priv_current_role_neg.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_priv_current_role_neg.q
new file mode 100644
index 0000000000000..bbf3b66970b6a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_priv_current_role_neg.q
@@ -0,0 +1,29 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+
+-- the test verifies that authorization is happening with privileges of the current roles
+
+-- grant privileges with grant option for table to role2 
+create role role2;
+grant role role2 to user user2;
+create table tpriv_current_role(i int);
+grant all on table tpriv_current_role to role role2 with grant option;
+
+set user.name=user2;
+-- switch to user2
+
+-- by default all roles should be in current roles, and grant to new user should work
+show current roles;
+grant all on table tpriv_current_role to user user3;
+
+set role role2;
+-- switch to role2, grant should work
+grant all on table tpriv_current_role to user user4;
+show grant user user4 on table tpriv_current_role;
+
+set role PUBLIC;
+-- set role to public, should fail as role2 is not one of the current roles
+grant all on table tpriv_current_role to user user5;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_public_create.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_public_create.q
new file mode 100644
index 0000000000000..002389f203e25
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_public_create.q
@@ -0,0 +1 @@
+create role PUBLIC;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_public_drop.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_public_drop.q
new file mode 100644
index 0000000000000..69c5a8de8b05f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_public_drop.q
@@ -0,0 +1 @@
+drop role PUBLIC;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_revoke_table_fail1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_revoke_table_fail1.q
new file mode 100644
index 0000000000000..e19bf370fa077
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_revoke_table_fail1.q
@@ -0,0 +1,14 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE table_priv_rfail1(i int);
+
+-- grant insert to user2
+GRANT INSERT ON table_priv_rfail1 TO USER user2;
+
+set user.name=user3;
+-- try dropping the privilege as user3
+REVOKE INSERT ON TABLE table_priv_rfail1 FROM USER user2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_revoke_table_fail2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_revoke_table_fail2.q
new file mode 100644
index 0000000000000..4b0cf3286ae71
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_revoke_table_fail2.q
@@ -0,0 +1,18 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE table_priv_rfai2(i int);
+
+-- grant insert to user2
+GRANT INSERT ON table_priv_rfai2 TO USER user2;
+GRANT SELECT ON table_priv_rfai2 TO USER user3 WITH GRANT OPTION;
+
+set user.name=user3;
+-- grant select as user3 to user 2
+GRANT SELECT ON table_priv_rfai2 TO USER user2;
+
+-- try dropping the privilege as user3
+REVOKE INSERT ON TABLE table_priv_rfai2 FROM USER user2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_cycles1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_cycles1.q
new file mode 100644
index 0000000000000..a819d204f56b6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_cycles1.q
@@ -0,0 +1,12 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+-- this is applicable to any security mode as check is in metastore
+create role role1;
+create role role2;
+grant role role1 to role role2;
+
+-- this will create a cycle
+grant role role2 to role role1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_cycles2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_cycles2.q
new file mode 100644
index 0000000000000..423f030630b6c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_cycles2.q
@@ -0,0 +1,24 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=hive_admin_user;
+set role ADMIN;
+-- this is applicable to any security mode as check is in metastore
+
+create role role1;
+
+create role role2;
+grant role role2 to role role1;
+
+create role role3;
+grant role role3 to role role2;
+
+create role role4;
+grant role role4 to role role3;
+
+create role role5;
+grant role role5 to role role4;
+
+-- this will create a cycle in middle of the hierarchy
+grant role role2 to role role4;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_grant.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_grant.q
new file mode 100644
index 0000000000000..c5c500a71251f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_role_grant.q
@@ -0,0 +1,22 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+
+set role ADMIN;
+
+----------------------------------------
+-- role granting with admin option
+-- since user2 doesn't have admin option for role_noadmin, last grant should fail
+----------------------------------------
+
+create role role_noadmin;
+create role src_role_wadmin;
+grant  src_role_wadmin to user user2 with admin option;
+grant  role_noadmin to user user2;
+show role grant user user2;
+
+
+set user.name=user2;
+set role role_noadmin;
+grant  src_role_wadmin to user user3;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_rolehierarchy_privs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_rolehierarchy_privs.q
new file mode 100644
index 0000000000000..d9f4c7cdb850b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_rolehierarchy_privs.q
@@ -0,0 +1,74 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+set user.name=hive_admin_user;
+show current roles;
+set role ADMIN;
+
+----------
+-- create the following user, role mapping
+-- user1 -> role1 -> role2 -> role3
+----------
+
+create role role1;
+grant role1 to user user1;
+
+create role role2;
+grant role2 to role role1;
+
+create role role3;
+grant role3 to role role2;
+
+
+create table t1(i int);
+grant select on t1 to role role3;
+
+set user.name=user1;
+show current roles;
+select * from t1;
+
+set user.name=hive_admin_user;
+show current roles;
+grant select on t1 to role role2;
+
+
+set user.name=user1;
+show current roles;
+select * from t1;
+
+set user.name=hive_admin_user;
+set role ADMIN;
+show current roles;
+revoke select on table t1 from role role2;
+
+
+create role role4;
+grant role4 to user user1;
+grant role3 to role role4;;
+
+set user.name=user1;
+show current roles;
+select * from t1;
+
+set user.name=hive_admin_user;
+show current roles;
+set role ADMIN;
+
+-- Revoke role3 from hierarchy one at a time and check permissions
+-- after revoking from both, select should fail
+revoke role3 from role role2;
+
+set user.name=user1;
+show current roles;
+select * from t1;
+
+set user.name=hive_admin_user;
+show current roles;
+set role ADMIN;
+revoke role3 from role role4;
+
+set user.name=user1;
+show current roles;
+select * from t1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_select.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_select.q
new file mode 100644
index 0000000000000..39871793af398
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_select.q
@@ -0,0 +1,9 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+-- check query without select privilege fails
+create table t1(i int);
+
+set user.name=user1;
+select * from t1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_select_view.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_select_view.q
new file mode 100644
index 0000000000000..a4071cd0d4d87
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_select_view.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+-- check create view without select privileges
+create table t1(i int);
+create view v1 as select * from t1;
+set user.name=user1;
+select * from v1;
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_set_role_neg1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_set_role_neg1.q
new file mode 100644
index 0000000000000..9ba3a82a5608e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_set_role_neg1.q
@@ -0,0 +1,6 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+-- an error should be thrown if 'set role ' is done for role that does not exist
+
+set role nosuchroleexists;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_set_role_neg2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_set_role_neg2.q
new file mode 100644
index 0000000000000..03f748fcc9b7a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_set_role_neg2.q
@@ -0,0 +1,16 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+
+-- an error should be thrown if 'set role ' is done for role that does not exist
+
+create role rset_role_neg;
+grant role rset_role_neg to user user2;
+
+set user.name=user2;
+set role rset_role_neg;
+set role public;
+set role nosuchroleexists;;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_parts_nosel.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_parts_nosel.q
new file mode 100644
index 0000000000000..d8190de950de7
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_parts_nosel.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- check if alter table fails as different user
+create table t_show_parts(i int) partitioned by (j string);
+
+set user.name=user2;
+show partitions t_show_parts;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_role_principals_no_admin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_role_principals_no_admin.q
new file mode 100644
index 0000000000000..2afe87fc30c9e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_role_principals_no_admin.q
@@ -0,0 +1,3 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+-- This test will fail because hive_test_user is not in admin role
+show principals role1; 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_role_principals_v1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_role_principals_v1.q
new file mode 100644
index 0000000000000..69cea2f2673f0
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_role_principals_v1.q
@@ -0,0 +1,2 @@
+-- This test will fail because the command is not currently supported in auth mode v1
+show principals role1; 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_roles_no_admin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_roles_no_admin.q
new file mode 100644
index 0000000000000..0fc9fca940c39
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_show_roles_no_admin.q
@@ -0,0 +1,3 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+-- This test will fail because hive_test_user is not in admin role
+show roles; 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_truncate.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_truncate.q
new file mode 100644
index 0000000000000..285600b23a149
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_truncate.q
@@ -0,0 +1,9 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+-- check add partition without insert privilege
+create table t1(i int, j int);
+set user.name=user1;
+truncate table t1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_add_partition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_add_partition.q
new file mode 100644
index 0000000000000..d82ac710cc3ba
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_add_partition.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/a_uri_add_part;
+dfs -touchz ${system:test.tmp.dir}/a_uri_add_part/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/a_uri_add_part/1.txt;
+
+create table tpart(i int, j int) partitioned by (k string);
+alter table tpart add partition (k = 'abc') location '${system:test.tmp.dir}/a_uri_add_part/';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_alterpart_loc.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_alterpart_loc.q
new file mode 100644
index 0000000000000..d38ba74d9006a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_alterpart_loc.q
@@ -0,0 +1,16 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/az_uri_alterpart_loc_perm;
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/az_uri_alterpart_loc;
+dfs -touchz ${system:test.tmp.dir}/az_uri_alterpart_loc/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/az_uri_alterpart_loc/1.txt;
+
+create table tpart(i int, j int) partitioned by (k string);
+alter table tpart add partition (k = 'abc') location '${system:test.tmp.dir}/az_uri_alterpart_loc_perm/';
+
+alter table tpart partition (k = 'abc') set location '${system:test.tmp.dir}/az_uri_alterpart_loc/';
+
+
+-- Attempt to set partition to location without permissions should fail
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_altertab_setloc.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_altertab_setloc.q
new file mode 100644
index 0000000000000..c446b8636fb32
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_altertab_setloc.q
@@ -0,0 +1,13 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/az_uri_altertab_setloc;
+dfs -touchz ${system:test.tmp.dir}/az_uri_altertab_setloc/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/az_uri_altertab_setloc/1.txt;
+
+create table t1(i int);
+
+alter table t1 set location '${system:test.tmp.dir}/az_uri_altertab_setloc/1.txt'
+
+-- Attempt to set location of table to a location without permissions should fail
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_create_table1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_create_table1.q
new file mode 100644
index 0000000000000..c8e1fb43ee317
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_create_table1.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/a_uri_crtab1;
+dfs -touchz ${system:test.tmp.dir}/a_uri_crtab1/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/a_uri_crtab1/1.txt;
+
+create table t1(i int) location '${system:test.tmp.dir}/a_uri_crtab_ext';
+
+-- Attempt to create table with dir that does not have write permission should fail
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_create_table_ext.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_create_table_ext.q
new file mode 100644
index 0000000000000..c8549b4563b2f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_create_table_ext.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/a_uri_crtab_ext;
+dfs -touchz ${system:test.tmp.dir}/a_uri_crtab_ext/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/a_uri_crtab_ext/1.txt;
+
+create external table t1(i int) location '${system:test.tmp.dir}/a_uri_crtab_ext';
+
+-- Attempt to create table with dir that does not have write permission should fail
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_createdb.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_createdb.q
new file mode 100644
index 0000000000000..edfdf5a8fc407
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_createdb.q
@@ -0,0 +1,12 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/az_uri_createdb;
+dfs -touchz ${system:test.tmp.dir}/az_uri_createdb/1.txt;
+dfs -chmod 300 ${system:test.tmp.dir}/az_uri_createdb/1.txt;
+
+create database az_test_db location '${system:test.tmp.dir}/az_uri_createdb/';
+
+-- Attempt to create db for dir without sufficient permissions should fail
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_export.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_export.q
new file mode 100644
index 0000000000000..81763916a0b81
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_export.q
@@ -0,0 +1,22 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+set hive.test.mode=true;
+set hive.test.mode.prefix=;
+set hive.test.mode.nosamplelist=export_auth_uri;
+
+
+create table export_auth_uri ( dep_id int comment "department id")
+	stored as textfile;
+
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/export_auth_uri/temp;
+dfs -rmr target/tmp/ql/test/data/exports/export_auth_uri;
+
+
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/export_auth_uri/;
+dfs -chmod 555 target/tmp/ql/test/data/exports/export_auth_uri;
+
+export table export_auth_uri to 'ql/test/data/exports/export_auth_uri';
+
+-- Attempt to export to location without sufficient permissions should fail
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_import.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_import.q
new file mode 100644
index 0000000000000..4ea4dc0a4747a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_import.q
@@ -0,0 +1,25 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+set hive.test.mode=true;
+set hive.test.mode.prefix=;
+set hive.test.mode.nosamplelist=import_auth_uri;
+
+
+create table import_auth_uri ( dep_id int comment "department id")
+	stored as textfile;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/import_auth_uri/temp;
+dfs -rmr target/tmp/ql/test/data/exports/import_auth_uri;
+export table import_auth_uri to 'ql/test/data/exports/import_auth_uri';
+drop table import_auth_uri;
+
+dfs -touchz target/tmp/ql/test/data/exports/import_auth_uri/1.txt;
+dfs -chmod 555 target/tmp/ql/test/data/exports/import_auth_uri/1.txt;
+
+create database importer;
+use importer;
+
+import from 'ql/test/data/exports/import_auth_uri';
+
+-- Attempt to import from location without sufficient permissions should fail
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_index.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_index.q
new file mode 100644
index 0000000000000..1a8f9cb2ad197
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_index.q
@@ -0,0 +1,13 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/az_uri_index;
+dfs -touchz ${system:test.tmp.dir}/az_uri_index/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/az_uri_index/1.txt;
+
+
+create table t1(i int);
+create index idt1 on table t1 (i) as 'COMPACT' WITH DEFERRED REBUILD LOCATION '${system:test.tmp.dir}/az_uri_index/';
+
+-- Attempt to use location for index that does not have permissions should fail
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_insert.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_insert.q
new file mode 100644
index 0000000000000..81b6e522c1abb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_insert.q
@@ -0,0 +1,14 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/az_uri_insert;
+dfs -touchz ${system:test.tmp.dir}/az_uri_insert/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/az_uri_insert/1.txt;
+
+create table t1(i int, j int);
+
+insert overwrite directory '${system:test.tmp.dir}/az_uri_insert/' select * from t1;
+
+-- Attempt to insert into uri without permissions should fail
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_insert_local.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_insert_local.q
new file mode 100644
index 0000000000000..0a2fd8919f455
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_insert_local.q
@@ -0,0 +1,14 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/az_uri_insert_local;
+dfs -touchz ${system:test.tmp.dir}/az_uri_insert_local/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/az_uri_insert_local/1.txt;
+
+create table t1(i int, j int);
+
+insert overwrite local directory '${system:test.tmp.dir}/az_uri_insert_local/' select * from t1;
+
+-- Attempt to insert into uri without permissions should fail
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_load_data.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_load_data.q
new file mode 100644
index 0000000000000..6af41f0cdaa23
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorization_uri_load_data.q
@@ -0,0 +1,11 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/authz_uri_load_data;
+dfs -touchz ${system:test.tmp.dir}/authz_uri_load_data/1.txt;
+dfs -chmod 555 ${system:test.tmp.dir}/authz_uri_load_data/1.txt;
+
+create table t1(i int);
+load data inpath 'pfile:${system:test.tmp.dir}/authz_uri_load_data/' overwrite into table t1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_create_tbl.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_create_tbl.q
new file mode 100644
index 0000000000000..d8beac370d4b6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_create_tbl.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set hive.security.authorization.enabled=true;
+set user.name=user33;
+create database db23221;
+use db23221;
+
+set user.name=user44;
+create table twew221(a string);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_grant_public.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_grant_public.q
new file mode 100644
index 0000000000000..bfd3165237774
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_grant_public.q
@@ -0,0 +1 @@
+grant role PUBLIC to user hive_test_user;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_revoke_public.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_revoke_public.q
new file mode 100644
index 0000000000000..2b29822371b19
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/authorize_revoke_public.q
@@ -0,0 +1 @@
+revoke role PUBLIC from user hive_test_user;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q
index 048a8fd5cfb8e..6bebb8942d613 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q
@@ -1,19 +1,19 @@
 CREATE TABLE srcbucket_mapjoin_part (key int, value string) 
   partitioned by (ds string) CLUSTERED BY (key) INTO 3 BUCKETS
   STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' 
+load data local inpath '../../data/files/srcbucket20.txt' 
   INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket21.txt' 
+load data local inpath '../../data/files/srcbucket21.txt' 
   INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' 
+load data local inpath '../../data/files/srcbucket22.txt' 
   INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string)
   partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS
   STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket22.txt'
+load data local inpath '../../data/files/srcbucket22.txt'
   INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt'
+load data local inpath '../../data/files/srcbucket23.txt'
   INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
 
 -- The number of buckets in the 2 tables above (being joined later) dont match.
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_1.q
index 9478a2f1b989c..802fcd903c0ac 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_1.q
@@ -8,10 +8,10 @@ into 2 BUCKETS stored as textfile;
 create table table2(key string, value string) clustered by (value, key)
 into 2 BUCKETS stored as textfile;
 
-load data local inpath '../data/files/T1.txt' overwrite into table table1;
+load data local inpath '../../data/files/T1.txt' overwrite into table table1;
 
-load data local inpath '../data/files/T1.txt' overwrite into table table2;
-load data local inpath '../data/files/T2.txt' overwrite into table table2;
+load data local inpath '../../data/files/T1.txt' overwrite into table table2;
+load data local inpath '../../data/files/T2.txt' overwrite into table table2;
 
 set hive.optimize.bucketmapjoin = true;
 set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_2.q
index 69afe0ae700ed..ac5abebb0b4b6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_2.q
@@ -8,13 +8,13 @@ into 2 BUCKETS stored as textfile;
 create table table2(key string, value string) clustered by (value, key)
 into 2 BUCKETS stored as textfile;
 
-load data local inpath '../data/files/T1.txt' overwrite into table table1 partition (ds='1');
-load data local inpath '../data/files/T2.txt' overwrite into table table1 partition (ds='1');
+load data local inpath '../../data/files/T1.txt' overwrite into table table1 partition (ds='1');
+load data local inpath '../../data/files/T2.txt' overwrite into table table1 partition (ds='1');
 
-load data local inpath '../data/files/T1.txt' overwrite into table table1 partition (ds='2');
+load data local inpath '../../data/files/T1.txt' overwrite into table table1 partition (ds='2');
 
-load data local inpath '../data/files/T1.txt' overwrite into table table2;
-load data local inpath '../data/files/T2.txt' overwrite into table table2;
+load data local inpath '../../data/files/T1.txt' overwrite into table table2;
+load data local inpath '../../data/files/T2.txt' overwrite into table table2;
 
 set hive.optimize.bucketmapjoin = true;
 set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/clustern1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/clustern1.q
deleted file mode 100644
index 0ff44779657ee..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/clustern1.q
+++ /dev/null
@@ -1,2 +0,0 @@
-EXPLAIN
-SELECT x.key, x.value as key FROM SRC x CLUSTER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_dp.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_dp.q
index af923504c8c4f..b4887c4115854 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_dp.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_dp.q
@@ -3,12 +3,12 @@ DROP TABLE Employee_Part;
 CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string)
 row format delimited fields terminated by '|'  stored as textfile;
 
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK');
 
 -- dynamic partitioning syntax
 explain 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_incorrect_num_keys.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_incorrect_num_keys.q
index d9725ddc7fdf6..2f8e9271ddd34 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_incorrect_num_keys.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_incorrect_num_keys.q
@@ -3,12 +3,12 @@ DROP TABLE Employee_Part;
 CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string)
 row format delimited fields terminated by '|'  stored as textfile;
 
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK');
 
 -- don't specify all partitioning keys
 explain 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q
index eb73962ba133c..34f91fc8d1de8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q
@@ -3,12 +3,12 @@ DROP TABLE Employee_Part;
 CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string)
 row format delimited fields terminated by '|'  stored as textfile;
 
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK');
 
 -- specify invalid values for the partitioning keys
 explain
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_multiple_part_clause.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_multiple_part_clause.q
index dbfaaecbdf614..49d89dd12132b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_multiple_part_clause.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_partlvl_multiple_part_clause.q
@@ -3,12 +3,12 @@ DROP TABLE Employee_Part;
 CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string)
 row format delimited fields terminated by '|'  stored as textfile;
 
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK');
-LOAD DATA LOCAL INPATH "../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK');
+LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK');
 
 -- specify partitioning clause multiple times
 explain
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl.q
index ca8548958fcd6..a4e0056bff370 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl.q
@@ -13,7 +13,7 @@ CREATE TABLE UserVisits_web_text_none (
   avgTimeOnSite int)
 row format delimited fields terminated by '|'  stored as textfile;
 
-LOAD DATA LOCAL INPATH "../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none;
+LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none;
 
 explain 
 analyze table UserVisits_web_text_none compute statistics for columns destIP;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl_complex_type.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl_complex_type.q
index 5bbd70d86b297..85a5f0a021940 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl_complex_type.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl_complex_type.q
@@ -8,7 +8,7 @@ CREATE TABLE table_complex_type (
        d MAP<STRING,ARRAY<STRING>>
        ) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/create_nested_type.txt' OVERWRITE INTO TABLE table_complex_type;
+LOAD DATA LOCAL INPATH '../../data/files/create_nested_type.txt' OVERWRITE INTO TABLE table_complex_type;
 
 
 explain 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl_incorrect_column.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl_incorrect_column.q
index ca8548958fcd6..a4e0056bff370 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl_incorrect_column.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/columnstats_tbllvl_incorrect_column.q
@@ -13,7 +13,7 @@ CREATE TABLE UserVisits_web_text_none (
   avgTimeOnSite int)
 row format delimited fields terminated by '|'  stored as textfile;
 
-LOAD DATA LOCAL INPATH "../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none;
+LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none;
 
 explain 
 analyze table UserVisits_web_text_none compute statistics for columns destIP;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/compile_processor.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/compile_processor.q
new file mode 100644
index 0000000000000..c314a940f95c2
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/compile_processor.q
@@ -0,0 +1,8 @@
+
+compile `import org.apache.hadoop.hive.ql.exec.UDF \;
+public class Pyth extsfgsfgfsends UDF {
+  public double evaluate(double a, double b){
+    return Math.sqrt((a*a) + (b*b)) \;
+  }
+} ` AS GROOVY NAMED Pyth.groovy;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/compute_stats_long.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/compute_stats_long.q
new file mode 100644
index 0000000000000..5974811280350
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/compute_stats_long.q
@@ -0,0 +1,7 @@
+create table tab_int(a int);
+
+-- insert some data
+LOAD DATA LOCAL INPATH "../../data/files/int.txt" INTO TABLE tab_int;
+
+-- compute stats should raise an error since the number of bit vectors > 1024
+select compute_stats(a, 10000) from tab_int;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonexistent_class.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonexistent_class.q
new file mode 100644
index 0000000000000..3b71e00b2eaa2
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonexistent_class.q
@@ -0,0 +1 @@
+create function default.badfunc as 'my.nonexistent.class';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonexistent_db.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonexistent_db.q
new file mode 100644
index 0000000000000..ae95391edd3e5
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonexistent_db.q
@@ -0,0 +1 @@
+create function nonexistentdb.badfunc as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonudf_class.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonudf_class.q
new file mode 100644
index 0000000000000..2083064593299
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/create_function_nonudf_class.q
@@ -0,0 +1 @@
+create function default.badfunc as 'java.lang.String';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/cte_recursion.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/cte_recursion.q
new file mode 100644
index 0000000000000..2160b4719662b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/cte_recursion.q
@@ -0,0 +1,4 @@
+explain
+with q1 as ( select key from q2 where key = '5'),
+q2 as ( select key from q1 where key = '5')
+select * from (select key from q1) a;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/cte_with_in_subquery.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/cte_with_in_subquery.q
new file mode 100644
index 0000000000000..e52a1d97db801
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/cte_with_in_subquery.q
@@ -0,0 +1 @@
+select * from (with q1 as ( select key from q2 where key = '5') select * from q1) a;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/date_literal1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/date_literal1.q
deleted file mode 100644
index b7fac0d3df346..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/date_literal1.q
+++ /dev/null
@@ -1,2 +0,0 @@
--- Not in YYYY-MM-DD format
-SELECT DATE '2001-1-1' FROM src LIMIT 2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_nodblock.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_nodblock.q
new file mode 100644
index 0000000000000..1c658c79b99ea
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_nodblock.q
@@ -0,0 +1,6 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+drop database if exists drop_nodblock;
+create database drop_nodblock;
+lock database drop_nodblock shared;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_nodbunlock.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_nodbunlock.q
new file mode 100644
index 0000000000000..ef4b323f063b6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_nodbunlock.q
@@ -0,0 +1,6 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+drop database if exists drop_nodbunlock;
+create database drop_nodbunlock;
+unlock database drop_nodbunlock;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_notablelock.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_notablelock.q
new file mode 100644
index 0000000000000..4a0c6c25c67c7
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_notablelock.q
@@ -0,0 +1,6 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+drop table if exists drop_notablelock;
+create table drop_notablelock (c int);
+lock table drop_notablelock shared;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_notableunlock.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_notableunlock.q
new file mode 100644
index 0000000000000..0b00046579f43
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dbtxnmgr_notableunlock.q
@@ -0,0 +1,6 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+drop table if exists drop_notableunlock;
+create table drop_notableunlock (c int);
+unlock table drop_notableunlock;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/deletejar.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/deletejar.q
index 7b0c92311a11a..0bd6985e031b6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/deletejar.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/deletejar.q
@@ -1,4 +1,4 @@
 
-ADD JAR ../data/files/TestSerDe.jar;
-DELETE JAR ../data/files/TestSerDe.jar;
+ADD JAR ${system:maven.local.repository}/org/apache/hive/hive-it-test-serde/${system:hive.version}/hive-it-test-serde-${system:hive.version}.jar;
+DELETE JAR ${system:maven.local.repository}/org/apache/hive/hive-it-test-serde/${system:hive.version}/hive-it-test-serde-${system:hive.version}.jar;
 CREATE TABLE DELETEJAR(KEY STRING, VALUE STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.TestSerDe' STORED AS TEXTFILE;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/drop_func_nonexistent.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/drop_func_nonexistent.q
new file mode 100644
index 0000000000000..892ef00e3f86c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/drop_func_nonexistent.q
@@ -0,0 +1,3 @@
+set hive.exec.drop.ignorenonexistent=false;
+-- Can't use DROP FUNCTION if the function doesn't exist and IF EXISTS isn't specified
+drop function nonexistent_function;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/drop_partition_filter_failure2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/drop_partition_filter_failure2.q
deleted file mode 100644
index 4d238d73a9116..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/drop_partition_filter_failure2.q
+++ /dev/null
@@ -1,11 +0,0 @@
-create table ptestfilter (a string, b int) partitioned by (c string, d int);
-describe ptestfilter;
-
-alter table ptestfilter add partition (c='US', d=1);
-alter table ptestfilter add partition (c='US', d=2);
-show partitions ptestfilter;
-
-alter table ptestfilter drop partition (c='US', d<'2');
-
-
-
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dynamic_partitions_with_whitelist.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dynamic_partitions_with_whitelist.q
index 0be2e71c94dff..0ad99d100dc07 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dynamic_partitions_with_whitelist.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/dynamic_partitions_with_whitelist.q
@@ -8,7 +8,7 @@ create table source_table like srcpart;
 
 create table dest_table like srcpart;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE source_table partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE source_table partition(ds='2008-04-08', hr=11);
 
 -- Tests creating dynamic partitions with characters not in the whitelist (i.e. 9)
 -- If the directory is not empty the hook will throw an error, instead the error should come from the metastore
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_incomplete_partition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_incomplete_partition.q
index e4f0daca92bd1..ca60d047efdd5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_incomplete_partition.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_incomplete_partition.q
@@ -3,8 +3,8 @@ CREATE TABLE exchange_part_test2 (f1 string) PARTITIONED BY (ds STRING, hr STRIN
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
-ALTER TABLE exchange_part_test1 ADD PARTITION (ds='2013-04-05', hr='h1');
-ALTER TABLE exchange_part_test1 ADD PARTITION (ds='2013-04-05', hr='h2');
+ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05', hr='h1');
+ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05', hr='h2');
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists.q
index 4d1e0a62a431f..7083edc32b98d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists.q
@@ -8,5 +8,5 @@ ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05');
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
--- exchange_part_test2 table partition (ds='2013-04-05') already exists thus this query will fail
+-- exchange_part_test1 table partition (ds='2013-04-05') already exists thus this query will fail
 alter table exchange_part_test1 exchange partition (ds='2013-04-05') with table exchange_part_test2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists2.q
index 23777db3ea93d..6dfe81a8b0568 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists2.q
@@ -9,5 +9,5 @@ ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05', hr='3');
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
--- exchange_part_test2 table partition (ds='2013-04-05', hr='3') already exists thus this query will fail
+-- exchange_part_test1 table partition (ds='2013-04-05') already exists thus this query will fail
 alter table exchange_part_test1 exchange partition (ds='2013-04-05') with table exchange_part_test2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists3.q
index 350bf248acc9c..60671e52e05d5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_exists3.q
@@ -9,5 +9,5 @@ ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05', hr='1');
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
--- exchange_part_test2 table partition (ds='2013-04-05', hr='1') already exists thus this query will fail
+-- exchange_part_test2 table partition (ds='2013-04-05') already exists thus this query will fail
 alter table exchange_part_test1 exchange partition (ds='2013-04-05') with table exchange_part_test2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_missing.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_missing.q
index 81944b3330853..38c0eda2368bd 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_missing.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exchange_partition_neg_partition_missing.q
@@ -2,5 +2,5 @@ CREATE TABLE exchange_part_test1 (f1 string) PARTITIONED BY (ds STRING);
 CREATE TABLE exchange_part_test2 (f1 string) PARTITIONED BY (ds STRING);
 SHOW PARTITIONS exchange_part_test1;
 
--- exchange_part_test1 partition (ds='2013-04-05') does not exist thus this query will fail
+-- exchange_part_test2 partition (ds='2013-04-05') does not exist thus this query will fail
 alter table exchange_part_test1 exchange partition (ds='2013-04-05') with table exchange_part_test2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_00_unsupported_schema.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_00_unsupported_schema.q
index d86ecd5785d02..6ffc33acb92ec 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_00_unsupported_schema.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_00_unsupported_schema.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id")
 	stored as textfile
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'nosuchschema://nosuchauthority/ql/test/data/exports/exim_department';
 drop table exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_01_nonpart_over_loaded.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_01_nonpart_over_loaded.q
index 5f3223152f766..970e6463e24a5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_01_nonpart_over_loaded.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_01_nonpart_over_loaded.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id")
 	stored as textfile
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -16,9 +16,9 @@ use importer;
 create table exim_department ( dep_id int comment "department identifier")
 	stored as textfile
 	tblproperties("maker"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
+load data local inpath "../../data/files/test.dat" into table exim_department;	
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_02_all_part_over_overlap.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_02_all_part_over_overlap.q
index d7204dc478d25..358918363d830 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_02_all_part_over_overlap.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_02_all_part_over_overlap.q
@@ -6,16 +6,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -27,12 +27,12 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "iso code", emp_state string comment "free-form text")
 	stored as textfile	
 	tblproperties("maker"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");			
 import from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 select * from exim_employee;
 drop table exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_03_nonpart_noncompat_colschema.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_03_nonpart_noncompat_colschema.q
index 6cd7eda455ee6..45268c21c00e3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_03_nonpart_noncompat_colschema.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_03_nonpart_noncompat_colschema.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -18,6 +18,6 @@ create table exim_department ( dep_key int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_04_nonpart_noncompat_colnumber.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_04_nonpart_noncompat_colnumber.q
index 7f3f577c433b3..cad6c90fd316e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_04_nonpart_noncompat_colnumber.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_04_nonpart_noncompat_colnumber.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -18,6 +18,6 @@ create table exim_department ( dep_id int comment "department id", dep_name stri
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_05_nonpart_noncompat_coltype.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_05_nonpart_noncompat_coltype.q
index d3ec9fff82bf9..f5f904f42af5a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_05_nonpart_noncompat_coltype.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_05_nonpart_noncompat_coltype.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -18,6 +18,6 @@ create table exim_department ( dep_id bigint comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_06_nonpart_noncompat_storage.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_06_nonpart_noncompat_storage.q
index 1cc691fc2912d..c56329c03f89f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_06_nonpart_noncompat_storage.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_06_nonpart_noncompat_storage.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -18,6 +18,6 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_07_nonpart_noncompat_ifof.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_07_nonpart_noncompat_ifof.q
index 27830ad5f93dd..afaedcd37bf72 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_07_nonpart_noncompat_ifof.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_07_nonpart_noncompat_ifof.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -21,6 +21,6 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_08_nonpart_noncompat_serde.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_08_nonpart_noncompat_serde.q
index d85048a97a876..230b28c402cc5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_08_nonpart_noncompat_serde.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_08_nonpart_noncompat_serde.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -19,6 +19,6 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_09_nonpart_noncompat_serdeparam.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_09_nonpart_noncompat_serdeparam.q
index 84b3786a161c6..c2e00a9663468 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_09_nonpart_noncompat_serdeparam.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_09_nonpart_noncompat_serdeparam.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -23,6 +23,6 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_10_nonpart_noncompat_bucketing.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_10_nonpart_noncompat_bucketing.q
index eaf9c579d51d4..a6586ead0c23f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_10_nonpart_noncompat_bucketing.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_10_nonpart_noncompat_bucketing.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -19,6 +19,6 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_11_nonpart_noncompat_sorting.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_11_nonpart_noncompat_sorting.q
index 092fd779541c3..990a686ebeea6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_11_nonpart_noncompat_sorting.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_11_nonpart_noncompat_sorting.q
@@ -5,9 +5,9 @@ create table exim_department ( dep_id int comment "department id")
 	clustered by (dep_id) sorted by (dep_id desc) into 10 buckets
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;	
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -20,6 +20,6 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_13_nonnative_import.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_13_nonnative_import.q
index 05de3d77b07b8..02537ef022d82 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_13_nonnative_import.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_13_nonnative_import.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -18,7 +18,7 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
 	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_14_nonpart_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_14_nonpart_part.q
index dc194ca814ee4..897c6747354b7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_14_nonpart_part.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_14_nonpart_part.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -19,7 +19,7 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
 	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_15_part_nonpart.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_15_part_nonpart.q
index e233707cc4db2..12013e5ccfc49 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_15_part_nonpart.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_15_part_nonpart.q
@@ -5,9 +5,9 @@ create table exim_department ( dep_id int comment "department id")
 	partitioned by (dep_org string)
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department partition (dep_org="hr");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department partition (dep_org="hr");		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -19,7 +19,7 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
 	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_16_part_noncompat_schema.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_16_part_noncompat_schema.q
index a10788e3e3b6b..d8d2b8008c9ee 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_16_part_noncompat_schema.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_16_part_noncompat_schema.q
@@ -5,9 +5,9 @@ create table exim_department ( dep_id int comment "department id")
 	partitioned by (dep_org string)
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department partition (dep_org="hr");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department partition (dep_org="hr");		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -20,7 +20,7 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import from 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
 	
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_17_part_spec_underspec.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_17_part_spec_underspec.q
index cc4a56ca34b88..82dcce9455958 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_17_part_spec_underspec.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_17_part_spec_underspec.q
@@ -6,16 +6,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -25,6 +25,6 @@ import table exim_employee partition (emp_country="us") from 'ql/test/data/expor
 describe extended exim_employee;
 select * from exim_employee;
 drop table exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_18_part_spec_missing.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_18_part_spec_missing.q
index 140e3bb3b1bbc..d92efeb9a70ef 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_18_part_spec_missing.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_18_part_spec_missing.q
@@ -6,16 +6,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -25,6 +25,6 @@ import table exim_employee partition (emp_country="us", emp_state="kl") from 'ql
 describe extended exim_employee;
 select * from exim_employee;
 drop table exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_19_external_over_existing.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_19_external_over_existing.q
index 048befe4d3d43..12d827b9c838c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_19_external_over_existing.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_19_external_over_existing.q
@@ -4,9 +4,9 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -17,7 +17,7 @@ create  table exim_department ( dep_id int comment "department id")
 	stored as textfile
 	tblproperties("creator"="krishna");
 import external table exim_department from 'ql/test/data/exports/exim_department';
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 drop table exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_20_managed_location_over_existing.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_20_managed_location_over_existing.q
index 89cbb9ecd8086..726dee53955af 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_20_managed_location_over_existing.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_20_managed_location_over_existing.q
@@ -4,17 +4,17 @@ set hive.test.mode.prefix=;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_department/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_department;
 
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile
@@ -22,9 +22,9 @@ create table exim_department ( dep_id int comment "department id")
 	tblproperties("creator"="krishna");
 import table exim_department from 'ql/test/data/exports/exim_department'
 	location 'ql/test/data/tablestore2/exim_department';
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_department;
 
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_21_part_managed_external.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_21_part_managed_external.q
index 0cbfc85258d2c..d187c78202034 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_21_part_managed_external.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_21_part_managed_external.q
@@ -6,16 +6,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -29,7 +29,7 @@ create table exim_employee ( emp_id int comment "employee id")
 	tblproperties("creator"="krishna");
 import external table exim_employee partition (emp_country="us", emp_state="tn") 
 	from 'ql/test/data/exports/exim_employee';
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 drop table exim_employee;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_22_export_authfail.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_22_export_authfail.q
index d9ab0cf0e4e50..b818686f773df 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_22_export_authfail.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_22_export_authfail.q
@@ -5,8 +5,8 @@ create table exim_department ( dep_id int) stored as textfile;
 
 set hive.security.authorization.enabled=true;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 
 set hive.security.authorization.enabled=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_23_import_exist_authfail.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_23_import_exist_authfail.q
index 2dbd534074fa8..4acefb9f0ae12 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_23_import_exist_authfail.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_23_import_exist_authfail.q
@@ -2,9 +2,9 @@ set hive.test.mode=true;
 set hive.test.mode.prefix=;
 
 create table exim_department ( dep_id int) stored as textfile;
-load data local inpath "../data/files/test.dat" into table exim_department;
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -18,5 +18,5 @@ import from 'ql/test/data/exports/exim_department';
 set hive.security.authorization.enabled=false;
 drop table exim_department;
 drop database importer;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_24_import_part_authfail.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_24_import_part_authfail.q
index ccbcee3698dae..467014e4679f6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_24_import_part_authfail.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_24_import_part_authfail.q
@@ -7,10 +7,10 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -26,6 +26,6 @@ set hive.security.authorization.enabled=true;
 import from 'ql/test/data/exports/exim_employee';
 set hive.security.authorization.enabled=false;
 
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 drop table exim_employee;
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_25_import_nonexist_authfail.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_25_import_nonexist_authfail.q
index 50bfe005c4278..595fa7e764952 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_25_import_nonexist_authfail.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/exim_25_import_nonexist_authfail.q
@@ -3,9 +3,9 @@ set hive.test.mode.prefix=;
 set hive.test.mode.nosamplelist=exim_department,exim_employee;
 
 create table exim_department ( dep_id int) stored as textfile;
-load data local inpath "../data/files/test.dat" into table exim_department;
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -19,5 +19,5 @@ set hive.security.authorization.enabled=false;
 select * from exim_department;
 drop table exim_department;
 drop database importer;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/fetchtask_ioexception.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/fetchtask_ioexception.q
index 9f44f225e955a..82230f782eac3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/fetchtask_ioexception.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/fetchtask_ioexception.q
@@ -2,6 +2,6 @@ CREATE TABLE fetchtask_ioexception (
   KEY STRING,
   VALUE STRING) STORED AS SEQUENCEFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/kv1_broken.seq' OVERWRITE INTO TABLE fetchtask_ioexception;
+LOAD DATA LOCAL INPATH '../../data/files/kv1_broken.seq' OVERWRITE INTO TABLE fetchtask_ioexception;
 
 SELECT * FROM fetchtask_ioexception;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/file_with_header_footer_negative.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/file_with_header_footer_negative.q
new file mode 100644
index 0000000000000..286cf1afb491b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/file_with_header_footer_negative.q
@@ -0,0 +1,13 @@
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test_file_with_header_footer_negative/;
+
+dfs -copyFromLocal ../data/files/header_footer_table_1 hdfs:///tmp/test_file_with_header_footer_negative/header_footer_table_1;
+
+dfs -copyFromLocal ../data/files/header_footer_table_2 hdfs:///tmp/test_file_with_header_footer_negative/header_footer_table_2;
+
+CREATE EXTERNAL TABLE header_footer_table_1 (name string, message string, id int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION 'hdfs:///tmp/test_file_with_header_footer_negative/header_footer_table_1' tblproperties ("skip.header.line.count"="1", "skip.footer.line.count"="200");
+
+SELECT * FROM header_footer_table_1;
+
+DROP TABLE header_footer_table_1;
+
+dfs -rmr hdfs:///tmp/test_file_with_header_footer_negative;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type.q
index 1cdaffd1f31a3..1ab828c8beae4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type.q
@@ -1,6 +1,6 @@
 -- begin part(string, int) pass(string, string)
 CREATE TABLE tab1 (id1 int,id2 string) PARTITIONED BY(month string,day int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' overwrite into table tab1 PARTITION(month='June', day='second');
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' overwrite into table tab1 PARTITION(month='June', day='second');
 
 select * from tab1;
 drop table tab1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type3.q
new file mode 100644
index 0000000000000..49e6a092fc127
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type3.q
@@ -0,0 +1,4 @@
+create table tab1(c int) partitioned by (i int);
+alter table tab1 add partition(i = "some name");
+
+drop table tab1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type4.q
new file mode 100644
index 0000000000000..50f486e6245cf
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/illegal_partition_type4.q
@@ -0,0 +1,3 @@
+create table tab1(s string) PARTITIONED BY(dt date, st string);
+alter table tab1 add partition (dt=date 'foo', st='foo');
+drop table tab1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/index_compact_entry_limit.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/index_compact_entry_limit.q
index 7d003e3e4b440..5bb889c027743 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/index_compact_entry_limit.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/index_compact_entry_limit.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 drop index src_index on src;
 
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/index_compact_size_limit.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/index_compact_size_limit.q
index d79674539a1b2..c6600e69b6a7c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/index_compact_size_limit.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/index_compact_size_limit.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 drop index src_index on src;
 
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insert_into5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insert_into5.q
new file mode 100644
index 0000000000000..c20c168a887c2
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insert_into5.q
@@ -0,0 +1,9 @@
+DROP TABLE if exists insert_into5_neg;
+
+CREATE TABLE insert_into5_neg (key int, value string) TBLPROPERTIES ("immutable"="true");
+
+INSERT INTO TABLE insert_into5_neg SELECT * FROM src LIMIT 100;
+
+INSERT INTO TABLE insert_into5_neg SELECT * FROM src LIMIT 100;
+
+DROP TABLE insert_into5_neg;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insert_into6.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insert_into6.q
new file mode 100644
index 0000000000000..a92ee5ca94a33
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insert_into6.q
@@ -0,0 +1,17 @@
+DROP TABLE IF EXISTS insert_into6_neg;
+
+CREATE TABLE insert_into6_neg (key int, value string)
+  PARTITIONED BY (ds string) TBLPROPERTIES("immutable"="true") ;
+
+INSERT INTO TABLE insert_into6_neg PARTITION (ds='1')
+  SELECT * FROM src LIMIT 100;
+
+INSERT INTO TABLE insert_into6_neg PARTITION (ds='2')
+  SELECT * FROM src LIMIT 100;
+
+SELECT COUNT(*) from insert_into6_neg;
+
+INSERT INTO TABLE insert_into6_neg PARTITION (ds='1')
+  SELECT * FROM src LIMIT 100;
+
+DROP TABLE insert_into6_neg;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insertover_dynapart_ifnotexists.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insertover_dynapart_ifnotexists.q
index cbf65c4ac69fe..a8f77c28a8251 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insertover_dynapart_ifnotexists.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/insertover_dynapart_ifnotexists.q
@@ -4,6 +4,6 @@ create table srcpart_dp like srcpart;
 
 create table destpart_dp like srcpart;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_dp partition(ds='2008-04-08', hr=11);
 
 insert overwrite table destpart_dp partition (ds='2008-04-08', hr) if not exists select key, value, hr from srcpart_dp where ds='2008-04-08';
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_1.q
new file mode 100644
index 0000000000000..ba7d164c77155
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_1.q
@@ -0,0 +1,2 @@
+drop table invalid_char_length_1;
+create table invalid_char_length_1 (c1 char(1000000));
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_2.q
new file mode 100644
index 0000000000000..866b43d31273d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_2.q
@@ -0,0 +1 @@
+select cast(value as char(100000)) from src limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_3.q
new file mode 100644
index 0000000000000..481b630d20489
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_char_length_3.q
@@ -0,0 +1,3 @@
+drop table invalid_char_length_3;
+create table invalid_char_length_3 (c1 char(0));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_columns.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_columns.q
deleted file mode 100644
index 14b3409cb4cc4..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/invalid_columns.q
+++ /dev/null
@@ -1,4 +0,0 @@
-ADD JAR ../data/files/TestSerDe.jar;
-CREATE TABLE DELETEJAR(KEY STRING, VALUE STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.TestSerDe' 
-STORED AS TEXTFILE
-TBLPROPERTIES('columns'='valid_colname,invalid.colname');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_alt_syntax_comma_on.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_alt_syntax_comma_on.q
new file mode 100644
index 0000000000000..e39a38e2fcd47
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_alt_syntax_comma_on.q
@@ -0,0 +1,3 @@
+explain select *
+from src s1 , 
+src s2 on s1.key = s2.key;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_cond_unqual_ambiguous.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_cond_unqual_ambiguous.q
new file mode 100644
index 0000000000000..c0da913c28812
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_cond_unqual_ambiguous.q
@@ -0,0 +1,6 @@
+
+
+explain select s1.key, s2.key
+from src s1, src s2
+where key = s2.key
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_cond_unqual_ambiguous_vc.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_cond_unqual_ambiguous_vc.q
new file mode 100644
index 0000000000000..8e219637eb0c6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/join_cond_unqual_ambiguous_vc.q
@@ -0,0 +1,5 @@
+
+explain select s1.key, s2.key
+from src s1, src s2
+where INPUT__FILE__NAME = s2.INPUT__FILE__NAME
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/limit_partition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/limit_partition.q
new file mode 100644
index 0000000000000..d59394544ccf0
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/limit_partition.q
@@ -0,0 +1,7 @@
+set hive.limit.query.max.table.partition=1;
+
+explain select * from srcpart limit 1;
+select * from srcpart limit 1;
+
+explain select * from srcpart;
+select * from srcpart;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/limit_partition_stats.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/limit_partition_stats.q
new file mode 100644
index 0000000000000..0afd4a965ab94
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/limit_partition_stats.q
@@ -0,0 +1,18 @@
+set hive.exec.dynamic.partition=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.stats.autogather=true;
+set hive.compute.query.using.stats=true;
+
+create table part (c int) partitioned by (d string);
+insert into table part partition (d)
+select hr,ds from srcpart;
+
+set hive.limit.query.max.table.partition=1;
+
+explain select count(*) from part;
+select count(*) from part;
+
+set hive.compute.query.using.stats=false;
+
+explain select count(*) from part;
+select count(*) from part;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_exist_part_authfail.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_exist_part_authfail.q
index f86cd92d9d4c7..eb72d940a5392 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_exist_part_authfail.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_exist_part_authfail.q
@@ -1,4 +1,4 @@
 create table hive_test_src ( col1 string ) partitioned by (pcol1 string) stored as textfile;
 alter table hive_test_src add partition (pcol1 = 'test_part');
 set hive.security.authorization.enabled=true;
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src partition (pcol1 = 'test_part');
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src partition (pcol1 = 'test_part');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_non_native.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_non_native.q
index 387aaed9a1e50..75a5216e00d82 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_non_native.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_non_native.q
@@ -2,4 +2,4 @@
 CREATE TABLE non_native2(key int, value string) 
 STORED BY 'org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler';
 
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE non_native2;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE non_native2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_nonpart_authfail.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_nonpart_authfail.q
index d807c698777f1..32653631ad6a4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_nonpart_authfail.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_nonpart_authfail.q
@@ -1,3 +1,3 @@
 create table hive_test_src ( col1 string ) stored as textfile;
 set hive.security.authorization.enabled=true;
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src ;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_part_authfail.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_part_authfail.q
index c409d5a94a9ce..315988dc0a959 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_part_authfail.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_part_authfail.q
@@ -1,3 +1,3 @@
 create table hive_test_src ( col1 string ) partitioned by (pcol1 string) stored as textfile;
 set hive.security.authorization.enabled=true;
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src partition (pcol1 = 'test_part');
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src partition (pcol1 = 'test_part');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_part_nospec.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_part_nospec.q
index 356c16a664386..81517991b26fa 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_part_nospec.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_part_nospec.q
@@ -1,2 +1,2 @@
 create table hive_test_src ( col1 string ) partitioned by (pcol1 string) stored as textfile;
-load data local inpath '../data/files/test.dat' into table hive_test_src;
+load data local inpath '../../data/files/test.dat' into table hive_test_src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_stored_as_dirs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_stored_as_dirs.q
index eed5651cbf20b..c56f0d408d4ad 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_stored_as_dirs.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_stored_as_dirs.q
@@ -4,4 +4,4 @@ set hive.mapred.supports.subdirectories=true;
 CREATE TABLE  if not exists stored_as_dirs_multiple (col1 STRING, col2 int, col3 STRING) 
 SKEWED BY (col1, col2) ON (('s1',1), ('s3',3), ('s13',13), ('s78',78))  stored as DIRECTORIES;
 
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE stored_as_dirs_multiple;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE stored_as_dirs_multiple;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_view_failure.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_view_failure.q
index 927f02e82bf91..64182eac8362d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_view_failure.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_view_failure.q
@@ -1,3 +1,3 @@
 DROP VIEW xxx11;
 CREATE VIEW xxx11 AS SELECT * FROM src;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE xxx11;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE xxx11;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat.q
index 16feeca22649a..f0c3b59d30ddf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat.q
@@ -3,4 +3,4 @@
 
 
 CREATE TABLE load_wrong_fileformat_T1(name STRING) STORED AS SEQUENCEFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE load_wrong_fileformat_T1;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE load_wrong_fileformat_T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat_rc_seq.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat_rc_seq.q
index 7e589fbfde620..4d79bbeb102c7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat_rc_seq.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat_rc_seq.q
@@ -3,4 +3,4 @@
 
 
 CREATE TABLE T1(name STRING) STORED AS RCFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.seq' INTO TABLE T1;
\ No newline at end of file
+LOAD DATA LOCAL INPATH '../../data/files/kv1.seq' INTO TABLE T1;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat_txt_seq.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat_txt_seq.q
index ff5ed4e2e3107..050c819a2f04b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat_txt_seq.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_fileformat_txt_seq.q
@@ -3,4 +3,4 @@
 
 
 CREATE TABLE T1(name STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.seq' INTO TABLE T1;
\ No newline at end of file
+LOAD DATA LOCAL INPATH '../../data/files/kv1.seq' INTO TABLE T1;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_noof_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_noof_part.q
index ffb64ed643b14..7f5ad754142ab 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_noof_part.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/load_wrong_noof_part.q
@@ -1,3 +1,3 @@
 
 CREATE TABLE loadpart1(a STRING, b STRING) PARTITIONED BY (ds STRING,ds1 STRING);
-LOAD DATA LOCAL INPATH '../data1/files/kv1.txt' INTO TABLE loadpart1 PARTITION(ds='2009-05-05');
+LOAD DATA LOCAL INPATH '../../data1/files/kv1.txt' INTO TABLE loadpart1 PARTITION(ds='2009-05-05');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/local_mapred_error_cache.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/local_mapred_error_cache.q
index 8f4b37a9d49f6..ed9e21dd8a1fb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/local_mapred_error_cache.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/local_mapred_error_cache.q
@@ -1,4 +1,4 @@
 set hive.exec.mode.local.auto=true;
 set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.VerifySessionStateLocalErrorsHook;
 
-FROM src SELECT TRANSFORM(key, value) USING 'python ../data/scripts/cat_error.py' AS (key, value);
+FROM src SELECT TRANSFORM(key, value) USING 'python ../../data/scripts/cat_error.py' AS (key, value);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_query_tbl_in_locked_db.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_query_tbl_in_locked_db.q
new file mode 100644
index 0000000000000..4966f2b9b2825
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_query_tbl_in_locked_db.q
@@ -0,0 +1,17 @@
+create database lockneg1;
+use lockneg1;
+
+create table tstsrcpart like default.srcpart;
+
+insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11')
+select key, value from default.srcpart where ds='2008-04-08' and hr='11';
+
+lock database lockneg1 shared;
+show locks database lockneg1;
+select count(1) from tstsrcpart where ds='2008-04-08' and hr='11';
+
+unlock database lockneg1;
+show locks database lockneg1;
+lock database lockneg1 exclusive;
+show locks database lockneg1;
+select count(1) from tstsrcpart where ds='2008-04-08' and hr='11';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_db_lock_conflict.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_db_lock_conflict.q
new file mode 100644
index 0000000000000..1f9ad90898dce
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_db_lock_conflict.q
@@ -0,0 +1,6 @@
+set hive.lock.numretries=0;
+
+create database lockneg4;
+
+lock database lockneg4 exclusive;
+lock database lockneg4 shared;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_drop_locked_db.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_drop_locked_db.q
new file mode 100644
index 0000000000000..8cbe31083b400
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_drop_locked_db.q
@@ -0,0 +1,8 @@
+set hive.lock.numretries=0;
+
+create database lockneg9;
+
+lock database lockneg9 shared;
+show locks database lockneg9;
+
+drop database lockneg9;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_lock_db_in_use.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_lock_db_in_use.q
new file mode 100644
index 0000000000000..4127a6f150a13
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/lockneg_try_lock_db_in_use.q
@@ -0,0 +1,15 @@
+set hive.lock.numretries=0;
+
+create database lockneg2;
+use lockneg2;
+
+create table tstsrcpart like default.srcpart;
+
+insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11')
+select key, value from default.srcpart where ds='2008-04-08' and hr='11';
+
+lock table tstsrcpart shared;
+show locks;
+
+lock database lockneg2 exclusive;
+show locks;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nested_complex_neg.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nested_complex_neg.q
index ac6c4ee549d85..09f13f52aeadf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nested_complex_neg.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nested_complex_neg.q
@@ -10,6 +10,6 @@ simple_string string)
 
 
 -- This should fail in as extended nesting levels are not enabled using the serdeproperty hive.serialization.extend.nesting.levels
-load data local inpath '../data/files/nested_complex.txt' overwrite into table nestedcomplex;
+load data local inpath '../../data/files/nested_complex.txt' overwrite into table nestedcomplex;
 
 select * from nestedcomplex sort by simple_int;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nopart_insert.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nopart_insert.q
index 4841f9e11c84e..6669bf62d8822 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nopart_insert.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nopart_insert.q
@@ -2,6 +2,6 @@
 CREATE TABLE nopart_insert(a STRING, b STRING) PARTITIONED BY (ds STRING);
 
 INSERT OVERWRITE TABLE nopart_insert 
-SELECT TRANSFORM(src.key, src.value) USING '../data/scripts/error_script' AS (tkey, tvalue)
+SELECT TRANSFORM(src.key, src.value) USING '../../data/scripts/error_script' AS (tkey, tvalue)
 FROM src;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nopart_load.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nopart_load.q
index 6e5ad6eb41a8b..966982fd5ce52 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nopart_load.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/nopart_load.q
@@ -1,5 +1,5 @@
 
 CREATE TABLE nopart_load(a STRING, b STRING) PARTITIONED BY (ds STRING);
 
-load data local inpath '../data/files/kv1.txt' overwrite into table nopart_load ;
+load data local inpath '../../data/files/kv1.txt' overwrite into table nopart_load ;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/notable_alias3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/notable_alias3.q
deleted file mode 100644
index 6cc3e87288a89..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/notable_alias3.q
+++ /dev/null
@@ -1,4 +0,0 @@
-CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE;
-
-FROM src
-INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, sum(src.value) WHERE src.key < 100 group by key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_char.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_char.q
new file mode 100644
index 0000000000000..745a7867264e3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_char.q
@@ -0,0 +1,3 @@
+drop table if exists parquet_char;
+
+create table parquet_char (t char(10)) stored as parquet;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_date.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_date.q
new file mode 100644
index 0000000000000..89d3602fd3e97
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_date.q
@@ -0,0 +1,3 @@
+drop table if exists parquet_date;
+
+create table parquet_date (t date) stored as parquet;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_decimal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_decimal.q
new file mode 100644
index 0000000000000..8a4973110a51f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_decimal.q
@@ -0,0 +1,3 @@
+drop table if exists parquet_decimal;
+
+create table parquet_decimal (t decimal(4,2)) stored as parquet;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_timestamp.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_timestamp.q
new file mode 100644
index 0000000000000..4ef36fa0efc49
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_timestamp.q
@@ -0,0 +1,3 @@
+drop table if exists parquet_timestamp;
+
+create table parquet_timestamp (t timestamp) stored as parquet;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_varchar.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_varchar.q
new file mode 100644
index 0000000000000..55825f76dc240
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/parquet_varchar.q
@@ -0,0 +1,3 @@
+drop table if exists parquet_varchar;
+
+create table parquet_varchar (t varchar(10)) stored as parquet;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/protectmode_part2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/protectmode_part2.q
index 72b55ea25d8e9..3fdc036996563 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/protectmode_part2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/protectmode_part2.q
@@ -4,6 +4,6 @@ drop table tbl_protectmode6;
 
 create table tbl_protectmode6  (c1 string,c2 string) partitioned by (p string);
 alter table tbl_protectmode6 add partition (p='p1');
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' OVERWRITE INTO TABLE tbl_protectmode6 partition (p='p1');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE tbl_protectmode6 partition (p='p1');
 alter table tbl_protectmode6 partition (p='p1') enable offline; 
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' OVERWRITE INTO TABLE tbl_protectmode6 partition (p='p1');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE tbl_protectmode6 partition (p='p1');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ptf_negative_AggrFuncsWithNoGBYNoPartDef.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ptf_negative_AggrFuncsWithNoGBYNoPartDef.q
index 542367ace22e0..ef372259ed3e3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ptf_negative_AggrFuncsWithNoGBYNoPartDef.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ptf_negative_AggrFuncsWithNoGBYNoPartDef.q
@@ -12,7 +12,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 -- testAggrFuncsWithNoGBYNoPartDef
 select p_mfgr, 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ptf_negative_AmbiguousWindowDefn.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ptf_negative_AmbiguousWindowDefn.q
index 95b35113e3cf0..58430423436b7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ptf_negative_AmbiguousWindowDefn.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/ptf_negative_AmbiguousWindowDefn.q
@@ -12,7 +12,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 -- testAmbiguousWindowDefn
 select p_mfgr, p_name, p_size, 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_1.q
index 8333ddc948419..a171961a683ee 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_1.q
@@ -1,2 +1,3 @@
+set hive.support.quoted.identifiers=none;
 EXPLAIN
 SELECT `+++` FROM srcpart;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_2.q
index d1aa1f1a9542d..7bac1c775522b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_2.q
@@ -1,2 +1,3 @@
+set hive.support.quoted.identifiers=none;
 EXPLAIN
 SELECT `.a.` FROM srcpart;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_groupby.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_groupby.q
index 53971916e6c96..300d145508887 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_groupby.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/regex_col_groupby.q
@@ -1,2 +1,3 @@
+set hive.support.quoted.identifiers=none;
 EXPLAIN
 SELECT `..`, count(1) FROM srcpart GROUP BY `..`;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/script_broken_pipe1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/script_broken_pipe1.q
deleted file mode 100644
index 6b1c09decfb35..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/script_broken_pipe1.q
+++ /dev/null
@@ -1,3 +0,0 @@
-set hive.exec.script.allow.partial.consumption = false;
--- Tests exception in ScriptOperator.close() by passing to the operator a small amount of data
-SELECT TRANSFORM(*) USING 'true' AS a, b FROM (SELECT TRANSFORM(*) USING 'echo' AS a, b FROM src LIMIT 1) tmp;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/script_error.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/script_error.q
index e46aed03b147f..8ca849b82d8ad 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/script_error.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/script_error.q
@@ -1,7 +1,7 @@
 EXPLAIN
-SELECT TRANSFORM(src.key, src.value) USING '../data/scripts/error_script' AS (tkey, tvalue)
+SELECT TRANSFORM(src.key, src.value) USING '../../data/scripts/error_script' AS (tkey, tvalue)
 FROM src;
 
-SELECT TRANSFORM(src.key, src.value) USING '../data/scripts/error_script' AS (tkey, tvalue)
+SELECT TRANSFORM(src.key, src.value) USING '../../data/scripts/error_script' AS (tkey, tvalue)
 FROM src;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/serde_regex2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/serde_regex2.q
index a3955744221a4..d523d03e906c1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/serde_regex2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/serde_regex2.q
@@ -16,8 +16,8 @@ WITH SERDEPROPERTIES (
 )
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH "../data/files/apache.access.log" INTO TABLE serde_regex;
-LOAD DATA LOCAL INPATH "../data/files/apache.access.2.log" INTO TABLE serde_regex;
+LOAD DATA LOCAL INPATH "../../data/files/apache.access.log" INTO TABLE serde_regex;
+LOAD DATA LOCAL INPATH "../../data/files/apache.access.2.log" INTO TABLE serde_regex;
 
 -- raise an exception 
 SELECT * FROM serde_regex ORDER BY time;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/set_hiveconf_validation2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/set_hiveconf_validation2.q
new file mode 100644
index 0000000000000..579e9408b6c35
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/set_hiveconf_validation2.q
@@ -0,0 +1,5 @@
+-- should fail: hive.fetch.task.conversion accepts minimal or more
+desc src;
+
+set hive.conf.validation=true;
+set hive.fetch.task.conversion=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_aggregator_error_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_aggregator_error_1.q
index 401cc37f67dd0..1b2872d3d7ed8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_aggregator_error_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_aggregator_error_1.q
@@ -6,7 +6,7 @@
 
 create table tmptable(key string, value string);
 
-set hive.stats.dbclass=dummy;
+set hive.stats.dbclass=custom;
 set hive.stats.default.publisher=org.apache.hadoop.hive.ql.stats.DummyStatsPublisher;
 set hive.stats.default.aggregator=org.apache.hadoop.hive.ql.stats.DummyStatsAggregator;
 set hive.test.dummystats.aggregator=connect;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_aggregator_error_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_aggregator_error_2.q
index c7e63591adde0..0fa9ff6820371 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_aggregator_error_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_aggregator_error_2.q
@@ -5,7 +5,7 @@
 
 create table tmptable(key string, value string);
 
-set hive.stats.dbclass=dummy;
+set hive.stats.dbclass=custom;
 set hive.stats.default.publisher=org.apache.hadoop.hive.ql.stats.DummyStatsPublisher;
 set hive.stats.default.aggregator="";
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_publisher_error_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_publisher_error_1.q
index 7fa0f55f2a450..be7c4f72feb9b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_publisher_error_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_publisher_error_1.q
@@ -6,7 +6,7 @@
 
 create table tmptable(key string, value string);
 
-set hive.stats.dbclass=dummy;
+set hive.stats.dbclass=custom;
 set hive.stats.default.publisher=org.apache.hadoop.hive.ql.stats.DummyStatsPublisher;
 set hive.stats.default.aggregator=org.apache.hadoop.hive.ql.stats.DummyStatsAggregator;
 set hive.test.dummystats.publisher=connect;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_publisher_error_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_publisher_error_2.q
index f82d4b54b697b..652afe7c5bfba 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_publisher_error_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/stats_publisher_error_2.q
@@ -5,7 +5,7 @@
 
 create table tmptable(key string, value string);
 
-set hive.stats.dbclass=dummy;
+set hive.stats.dbclass=custom;
 set hive.stats.default.publisher="";
 set hive.stats.default.aggregator=org.apache.hadoop.hive.ql.stats.DummyStatsAggregator;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_exists_implicit_gby.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_exists_implicit_gby.q
new file mode 100644
index 0000000000000..9013df6f938dc
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_exists_implicit_gby.q
@@ -0,0 +1,10 @@
+
+
+select * 
+from src b 
+where exists 
+  (select count(*) 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9'
+  )
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_in_groupby.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_in_groupby.q
new file mode 100644
index 0000000000000..a9bc6ee6a38cb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_in_groupby.q
@@ -0,0 +1,5 @@
+
+
+select count(*) 
+from src 
+group by src.key in (select key from src s1 where s1.key > '9')
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_in_select.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_in_select.q
new file mode 100644
index 0000000000000..1365389cb269a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_in_select.q
@@ -0,0 +1,6 @@
+
+
+
+select src.key in (select key from src s1 where s1.key > '9') 
+from src
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_multiple_cols_in_select.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_multiple_cols_in_select.q
new file mode 100644
index 0000000000000..6805c5b16b0f4
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_multiple_cols_in_select.q
@@ -0,0 +1,7 @@
+
+
+explain
+ select * 
+from src 
+where src.key in (select * from src s1 where s1.key > '9')
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_nested_subquery.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_nested_subquery.q
new file mode 100644
index 0000000000000..e8c41e6b17ae7
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_nested_subquery.q
@@ -0,0 +1,18 @@
+
+
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+select *
+from part x 
+where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name))
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_notexists_implicit_gby.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_notexists_implicit_gby.q
new file mode 100644
index 0000000000000..852b2953ff463
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_notexists_implicit_gby.q
@@ -0,0 +1,10 @@
+
+
+select * 
+from src b 
+where not exists 
+  (select sum(1)
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9'
+  )
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_shared_alias.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_shared_alias.q
new file mode 100644
index 0000000000000..d442f077c0707
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_shared_alias.q
@@ -0,0 +1,6 @@
+
+
+select *
+from src
+where src.key in (select key from src where key > '9')
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_subquery_chain.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_subquery_chain.q
new file mode 100644
index 0000000000000..8ea94c5fc6d76
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_subquery_chain.q
@@ -0,0 +1,6 @@
+
+explain
+select * 
+from src 
+where src.key in (select key from src) in (select key from src)
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_unqual_corr_expr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_unqual_corr_expr.q
new file mode 100644
index 0000000000000..99ff9ca703835
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_unqual_corr_expr.q
@@ -0,0 +1,6 @@
+
+
+select *
+from src
+where key in (select key from src)
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_windowing_corr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_windowing_corr.q
new file mode 100644
index 0000000000000..105d3d22d9d2b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_windowing_corr.q
@@ -0,0 +1,26 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+
+-- corr and windowing 
+select p_mfgr, p_name, p_size 
+from part a 
+where a.p_size in 
+  (select first_value(p_size) over(partition by p_mfgr order by p_size) 
+   from part b 
+   where a.p_brand = b.p_brand)
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_with_or_cond.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_with_or_cond.q
new file mode 100644
index 0000000000000..c2c322178f386
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/subquery_with_or_cond.q
@@ -0,0 +1,5 @@
+
+select count(*) 
+from src 
+where src.key in (select key from src s1 where s1.key > '9') or src.value is not null
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong.q
deleted file mode 100644
index 2fb5ff74cc3ce..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong.q
+++ /dev/null
@@ -1,6 +0,0 @@
-SELECT CASE '1'
-        WHEN 1 THEN 2
-        WHEN 3 THEN 4
-        ELSE 5
-       END
-FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong2.q
deleted file mode 100644
index 5772dc1a95c9d..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong2.q
+++ /dev/null
@@ -1,6 +0,0 @@
-SELECT CASE 1
-        WHEN 1 THEN '2'
-        WHEN 3 THEN 4
-        ELSE 5
-       END
-FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong3.q
deleted file mode 100644
index 5aaf0188eb9c6..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_case_type_wrong3.q
+++ /dev/null
@@ -1,6 +0,0 @@
-SELECT CASE 1
-        WHEN 1 THEN NULL
-        WHEN 3 THEN '2'
-        ELSE 7
-       END
-FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_invalid.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_invalid.q
new file mode 100644
index 0000000000000..68050fd95cd22
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_invalid.q
@@ -0,0 +1 @@
+select default.nonexistfunc() from src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_local_resource.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_local_resource.q
new file mode 100644
index 0000000000000..bcfa217737e33
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_local_resource.q
@@ -0,0 +1 @@
+create function lookup as 'org.apache.hadoop.hive.ql.udf.UDFFileLookup' using file '../../data/files/sales.txt';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_nonexistent_resource.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_nonexistent_resource.q
new file mode 100644
index 0000000000000..d37665dde69bc
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_nonexistent_resource.q
@@ -0,0 +1 @@
+create function lookup as 'org.apache.hadoop.hive.ql.udf.UDFFileLookup' using file 'nonexistent_file.txt';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_qualified_name.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_qualified_name.q
new file mode 100644
index 0000000000000..476dfa21a237f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_qualified_name.q
@@ -0,0 +1 @@
+create temporary function default.myfunc as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_when_type_wrong2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_when_type_wrong2.q
deleted file mode 100644
index 79fa65f63da56..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_when_type_wrong2.q
+++ /dev/null
@@ -1,6 +0,0 @@
-SELECT CASE
-        WHEN 1=2 THEN '2'
-        WHEN 3=4 THEN 4
-        ELSE 5
-       END
-FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_when_type_wrong3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_when_type_wrong3.q
deleted file mode 100644
index 8bb5fdd7ea377..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udf_when_type_wrong3.q
+++ /dev/null
@@ -1,6 +0,0 @@
-SELECT CASE
-        WHEN 1=2 THEN '2'
-        WHEN 3=4 THEN '5'
-        ELSE 5.3
-       END
-FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udfnull.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udfnull.q
deleted file mode 100644
index 3c4204f780004..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/udfnull.q
+++ /dev/null
@@ -1,6 +0,0 @@
-
-CREATE TEMPORARY FUNCTION example_arraysum AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleArraySum';
-
-SELECT example_arraysum(lint)FROM src_thrift;
-
-DROP TEMPORARY FUNCTION example_arraysum;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/union.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/union.q
deleted file mode 100644
index e3c5c830897ea..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/union.q
+++ /dev/null
@@ -1,4 +0,0 @@
-explain
-select s1.key as key, s1.value as value from src s1
-  UNION  ALL  
-select s2.key as key, s2.value as value from src s2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/windowing_invalid_udaf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/windowing_invalid_udaf.q
new file mode 100644
index 0000000000000..c5b593e4bb556
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/windowing_invalid_udaf.q
@@ -0,0 +1 @@
+select nonexistfunc(key) over () from src limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/windowing_ll_no_neg.q b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/windowing_ll_no_neg.q
new file mode 100644
index 0000000000000..15f8fae292bbb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientnegative/windowing_ll_no_neg.q
@@ -0,0 +1,26 @@
+DROP TABLE IF EXISTS part;
+
+-- data setup
+CREATE TABLE part(
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+
+select p_mfgr, p_name, p_size,
+min(p_retailprice),
+rank() over(distribute by p_mfgr sort by p_name)as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,-1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter1.q
index 5fd19455f93cd..312a0177fafff 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter1.q
@@ -15,7 +15,7 @@ describe extended alter1;
 alter table alter1 set serdeproperties('s1'='10', 's2' ='20');
 describe extended alter1;
 
-add jar ../data/files/TestSerDe.jar;
+add jar ${system:maven.local.repository}/org/apache/hive/hive-it-test-serde/${system:hive.version}/hive-it-test-serde-${system:hive.version}.jar;
 alter table alter1 set serde 'org.apache.hadoop.hive.serde2.TestSerDe' with serdeproperties('s1'='9');
 describe extended alter1;
 
@@ -56,7 +56,7 @@ DESCRIBE EXTENDED alter1;
 ALTER TABLE alter1 SET SERDEPROPERTIES('s1'='10', 's2' ='20');
 DESCRIBE EXTENDED alter1;
 
-add jar ../data/files/TestSerDe.jar;
+add jar ${system:maven.local.repository}/org/apache/hive/hive-it-test-serde/${system:hive.version}/hive-it-test-serde-${system:hive.version}.jar;
 ALTER TABLE alter1 SET SERDE 'org.apache.hadoop.hive.serde2.TestSerDe' WITH SERDEPROPERTIES ('s1'='9');
 DESCRIBE EXTENDED alter1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter3.q
index 3cde00748b98e..91e4e9bad0eda 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter3.q
@@ -1,5 +1,5 @@
 create table alter3_src ( col1 string ) stored as textfile ;
-load data local inpath '../data/files/test.dat' overwrite into table alter3_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table alter3_src ;
 
 create table alter3 ( col1 string ) partitioned by (pcol1 string , pcol2 string) stored as sequencefile;
 
@@ -34,7 +34,7 @@ USE alter3_db;
 SHOW TABLES;
 
 CREATE TABLE alter3_src (col1 STRING) STORED AS TEXTFILE ;
-LOAD DATA LOCAL INPATH '../data/files/test.dat' OVERWRITE INTO TABLE alter3_src ;
+LOAD DATA LOCAL INPATH '../../data/files/test.dat' OVERWRITE INTO TABLE alter3_src ;
 
 CREATE TABLE alter3 (col1 STRING) PARTITIONED BY (pcol1 STRING, pcol2 STRING) STORED AS SEQUENCEFILE;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter5.q
index 0d14f228d1c30..66c9f8dc54b91 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter5.q
@@ -3,7 +3,7 @@
 --
 
 create table alter5_src ( col1 string ) stored as textfile ;
-load data local inpath '../data/files/test.dat' overwrite into table alter5_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table alter5_src ;
 
 create table alter5 ( col1 string ) partitioned by (dt string);
 
@@ -32,7 +32,7 @@ USE alter5_db;
 SHOW TABLES;
 
 create table alter5_src ( col1 string ) stored as textfile ;
-load data local inpath '../data/files/test.dat' overwrite into table alter5_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table alter5_src ;
 
 create table alter5 ( col1 string ) partitioned by (dt string);
 alter table alter5 add partition (dt='a') location 'parta';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_char1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_char1.q
new file mode 100644
index 0000000000000..4ecb7e7389f6b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_char1.q
@@ -0,0 +1,32 @@
+drop table alter_char_1;
+
+create table alter_char_1 (key string, value string);
+insert overwrite table alter_char_1
+  select key, value from src order by key limit 5;
+
+select * from alter_char_1 order by key;
+
+-- change column to char
+alter table alter_char_1 change column value value char(20);
+-- contents should still look the same
+select * from alter_char_1 order by key;
+
+-- change column to smaller char
+alter table alter_char_1 change column value value char(3);
+-- value column should be truncated now
+select * from alter_char_1 order by key;
+
+-- change back to bigger char
+alter table alter_char_1 change column value value char(20);
+-- column values should be full size again
+select * from alter_char_1 order by key;
+
+-- add char column
+alter table alter_char_1 add columns (key2 int, value2 char(10));
+select * from alter_char_1 order by key;
+
+insert overwrite table alter_char_1
+  select key, value, key, value from src order by key limit 5;
+select * from alter_char_1 order by key;
+
+drop table alter_char_1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_char2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_char2.q
new file mode 100644
index 0000000000000..7fa9fcef11a14
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_char2.q
@@ -0,0 +1,22 @@
+
+-- alter column type, with partitioned table
+drop table if exists alter_char2;
+
+create table alter_char2 (
+  c1 char(255)
+) partitioned by (hr int);
+
+insert overwrite table alter_char2 partition (hr=1)
+  select value from src limit 1;
+
+select c1, length(c1) from alter_char2;
+
+alter table alter_char2 change column c1 c1 char(10);
+
+select hr, c1, length(c1) from alter_char2 where hr = 1;
+
+insert overwrite table alter_char2 partition (hr=2)
+  select key from src limit 1;
+
+select hr, c1, length(c1) from alter_char2 where hr = 1;
+select hr, c1, length(c1) from alter_char2 where hr = 2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_concatenate_indexed_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_concatenate_indexed_table.q
index 807ef539c8643..e1c3780a9b0ec 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_concatenate_indexed_table.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_concatenate_indexed_table.q
@@ -1,9 +1,9 @@
 set hive.exec.concatenate.check.index =false;
 create table src_rc_concatenate_test(key int, value string) stored as rcfile;
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_concatenate_test;
-load data local inpath '../data/files/smbbucket_2.rc' into table src_rc_concatenate_test;
-load data local inpath '../data/files/smbbucket_3.rc' into table src_rc_concatenate_test;
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_concatenate_test;
+load data local inpath '../../data/files/smbbucket_2.rc' into table src_rc_concatenate_test;
+load data local inpath '../../data/files/smbbucket_3.rc' into table src_rc_concatenate_test;
 
 show table extended like `src_rc_concatenate_test`;
 
@@ -26,9 +26,9 @@ create table src_rc_concatenate_test_part(key int, value string) partitioned by
 
 alter table src_rc_concatenate_test_part add partition (ds='2011');
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_concatenate_test_part partition (ds='2011');
-load data local inpath '../data/files/smbbucket_2.rc' into table src_rc_concatenate_test_part partition (ds='2011');
-load data local inpath '../data/files/smbbucket_3.rc' into table src_rc_concatenate_test_part partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_concatenate_test_part partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_2.rc' into table src_rc_concatenate_test_part partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_3.rc' into table src_rc_concatenate_test_part partition (ds='2011');
 
 show table extended like `src_rc_concatenate_test_part` partition (ds='2011');
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_db_owner.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_db_owner.q
new file mode 100644
index 0000000000000..b224f3339e4ab
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_db_owner.q
@@ -0,0 +1,9 @@
+
+create database db_alter_onr;
+describe database db_alter_onr;
+
+alter database db_alter_onr set owner user user1;
+describe database db_alter_onr;
+
+alter database db_alter_onr set owner role role1;
+describe database db_alter_onr;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge.q
index c3502739d54a9..ceabd0830a03d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge.q
@@ -1,8 +1,8 @@
 create table src_rc_merge_test(key int, value string) stored as rcfile;
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test;
-load data local inpath '../data/files/smbbucket_2.rc' into table src_rc_merge_test;
-load data local inpath '../data/files/smbbucket_3.rc' into table src_rc_merge_test;
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test;
+load data local inpath '../../data/files/smbbucket_2.rc' into table src_rc_merge_test;
+load data local inpath '../../data/files/smbbucket_3.rc' into table src_rc_merge_test;
 
 show table extended like `src_rc_merge_test`;
 
@@ -21,9 +21,9 @@ create table src_rc_merge_test_part(key int, value string) partitioned by (ds st
 
 alter table src_rc_merge_test_part add partition (ds='2011');
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test_part partition (ds='2011');
-load data local inpath '../data/files/smbbucket_2.rc' into table src_rc_merge_test_part partition (ds='2011');
-load data local inpath '../data/files/smbbucket_3.rc' into table src_rc_merge_test_part partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test_part partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_2.rc' into table src_rc_merge_test_part partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_3.rc' into table src_rc_merge_test_part partition (ds='2011');
 
 show table extended like `src_rc_merge_test_part` partition (ds='2011');
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge_2.q
index 65ddfed13dac4..e09703d1c6c85 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge_2.q
@@ -3,9 +3,9 @@ create table src_rc_merge_test_part(key int, value string) partitioned by (ds st
 alter table src_rc_merge_test_part add partition (ds='2012-01-03', ts='2012-01-03+14:46:31');
 desc extended src_rc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31');
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31');
-load data local inpath '../data/files/smbbucket_2.rc' into table src_rc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31');
-load data local inpath '../data/files/smbbucket_3.rc' into table src_rc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31');
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31');
+load data local inpath '../../data/files/smbbucket_2.rc' into table src_rc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31');
+load data local inpath '../../data/files/smbbucket_3.rc' into table src_rc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31');
 
 select count(1) from src_rc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31';
 select sum(hash(key)), sum(hash(value)) from src_rc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge_stats.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge_stats.q
index 23bae55ab2add..0af87e2c076e6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge_stats.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_merge_stats.q
@@ -1,8 +1,8 @@
 create table src_rc_merge_test_stat(key int, value string) stored as rcfile;
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test_stat;
-load data local inpath '../data/files/smbbucket_2.rc' into table src_rc_merge_test_stat;
-load data local inpath '../data/files/smbbucket_3.rc' into table src_rc_merge_test_stat;
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test_stat;
+load data local inpath '../../data/files/smbbucket_2.rc' into table src_rc_merge_test_stat;
+load data local inpath '../../data/files/smbbucket_3.rc' into table src_rc_merge_test_stat;
 
 show table extended like `src_rc_merge_test_stat`;
 desc extended src_rc_merge_test_stat;
@@ -21,9 +21,9 @@ create table src_rc_merge_test_part_stat(key int, value string) partitioned by (
 
 alter table src_rc_merge_test_part_stat add partition (ds='2011');
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test_part_stat partition (ds='2011');
-load data local inpath '../data/files/smbbucket_2.rc' into table src_rc_merge_test_part_stat partition (ds='2011');
-load data local inpath '../data/files/smbbucket_3.rc' into table src_rc_merge_test_part_stat partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test_part_stat partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_2.rc' into table src_rc_merge_test_part_stat partition (ds='2011');
+load data local inpath '../../data/files/smbbucket_3.rc' into table src_rc_merge_test_part_stat partition (ds='2011');
 
 show table extended like `src_rc_merge_test_part_stat` partition (ds='2011');
 desc extended src_rc_merge_test_part_stat;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q
index b6d1eb8f2d4b8..5dda4c08fd5b3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q
@@ -1,4 +1,4 @@
-
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S)
 create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets;
 
 alter table tst1 clustered by (key) into 8 buckets;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q
index c6a4ad24fcdde..acc028bdd81eb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q
@@ -1,3 +1,4 @@
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S)
 -- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata
 -- the partition metadata is updated as well.
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q
new file mode 100644
index 0000000000000..d81430441c720
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q
@@ -0,0 +1,85 @@
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S)
+-- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata
+-- the partition metadata is updated as well.
+
+CREATE TABLE tst1(key STRING, value STRING) PARTITIONED BY (ds STRING);
+
+DESCRIBE FORMATTED tst1;
+
+SET hive.enforce.bucketing=true;
+SET hive.enforce.sorting=true;
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
+
+-- Test an unbucketed partition gets converted to bucketed
+ALTER TABLE tst1 CLUSTERED BY (key) INTO 8 BUCKETS;
+
+DESCRIBE FORMATTED tst1;
+
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
+
+-- Test an unsorted partition gets converted to sorted
+ALTER TABLE tst1 CLUSTERED BY (key) SORTED BY (key DESC) INTO 8 BUCKETS;
+
+DESCRIBE FORMATTED tst1;
+
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
+
+-- Test changing the bucket columns
+ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (key DESC) INTO 8 BUCKETS;
+
+DESCRIBE FORMATTED tst1;
+
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
+
+-- Test changing the number of buckets
+ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (key DESC) INTO 4 BUCKETS;
+
+DESCRIBE FORMATTED tst1;
+
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
+
+-- Test changing the sort columns
+ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (value DESC) INTO 4 BUCKETS;
+
+DESCRIBE FORMATTED tst1;
+
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
+
+-- Test changing the sort order
+ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (value ASC) INTO 4 BUCKETS;
+
+DESCRIBE FORMATTED tst1;
+
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
+
+-- Test a sorted partition gets converted to unsorted
+ALTER TABLE tst1 CLUSTERED BY (value) INTO 4 BUCKETS;
+
+DESCRIBE FORMATTED tst1;
+
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
+
+-- Test a bucketed partition gets converted to unbucketed
+ALTER TABLE tst1 NOT CLUSTERED;
+
+DESCRIBE FORMATTED tst1;
+
+INSERT OVERWRITE TABLE tst1 PARTITION (ds = '1') SELECT key, value FROM src;
+
+DESCRIBE FORMATTED tst1 PARTITION (ds = '1');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q
new file mode 100644
index 0000000000000..a03992510b072
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q
@@ -0,0 +1,59 @@
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S)
+create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets;
+
+alter table tst1 clustered by (key) into 8 buckets;
+
+describe formatted tst1;
+
+set hive.enforce.bucketing=true;
+insert overwrite table tst1 partition (ds='1') select key, value from src;
+
+describe formatted tst1 partition (ds = '1');
+
+-- Test changing bucket number
+
+alter table tst1 clustered by (key) into 12 buckets;
+
+insert overwrite table tst1 partition (ds='1') select key, value from src;
+
+describe formatted tst1 partition (ds = '1');
+
+describe formatted tst1;
+
+-- Test changing bucket number of (table/partition)
+
+alter table tst1 into 4 buckets;
+
+describe formatted tst1;
+
+describe formatted tst1 partition (ds = '1');
+
+alter table tst1 partition (ds = '1') into 6 buckets;
+
+describe formatted tst1;
+
+describe formatted tst1 partition (ds = '1');
+
+-- Test adding sort order
+
+alter table tst1 clustered by (key) sorted by (key asc) into 12 buckets;
+
+describe formatted tst1;
+
+-- Test changing sort order
+
+alter table tst1 clustered by (key) sorted by (value desc) into 12 buckets;
+
+describe formatted tst1;
+
+-- Test removing test order
+
+alter table tst1 clustered by (value) into 12 buckets;
+
+describe formatted tst1;
+
+-- Test removing buckets
+
+alter table tst1 not clustered;
+
+describe formatted tst1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_partition_coltype.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_partition_coltype.q
index 5479afbbd5ab0..19c0f9d1d88ec 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_partition_coltype.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_partition_coltype.q
@@ -10,48 +10,48 @@ desc alter_coltype;
 select count(*) from alter_coltype where dt = '100x';
 
 -- alter partition key column data type for dt column.
-alter table alter_coltype partition column (dt int);
+-- alter table alter_coltype partition column (dt int);
 
 -- load a new partition using new data type.
-insert overwrite table alter_coltype partition(dt=10, ts='3.0') select * from src1;
+-- insert overwrite table alter_coltype partition(dt=10, ts='3.0') select * from src1;
 
 -- make sure the partition predicate still works. 
-select count(*) from alter_coltype where dt = '100x';
-explain extended select count(*) from alter_coltype where dt = '100x';
+-- select count(*) from alter_coltype where dt = '100x';
+-- explain extended select count(*) from alter_coltype where dt = '100x';
 
-select count(*) from alter_coltype where dt = 100;
+-- select count(*) from alter_coltype where dt = '100';
 
 -- alter partition key column data type for ts column.
-alter table alter_coltype partition column (ts double);
+-- alter table alter_coltype partition column (ts double);
 
-alter table alter_coltype partition column (dt string);
+-- alter table alter_coltype partition column (dt string);
 
 -- load a new partition using new data type.
-insert overwrite table alter_coltype partition(dt='100x', ts=3.0) select * from src1;
+-- insert overwrite table alter_coltype partition(dt='100x', ts=3.0) select * from src1;
 
 --  validate partition key column predicate can still work.
-select count(*) from alter_coltype where ts = '6:30pm';
-explain extended select count(*) from alter_coltype where ts = '6:30pm';
+-- select count(*) from alter_coltype where ts = '6:30pm';
+-- explain extended select count(*) from alter_coltype where ts = '6:30pm';
 
 --  validate partition key column predicate on two different partition column data type 
 --  can still work.
-select count(*) from alter_coltype where ts = 3.0 and dt=10;
-explain extended select count(*) from alter_coltype where ts = 3.0 and dt=10;
+-- select count(*) from alter_coltype where ts = 3.0 and dt=10;
+-- explain extended select count(*) from alter_coltype where ts = 3.0 and dt=10;
 
 -- query where multiple partition values (of different datatypes) are being selected 
-select key, value, dt, ts from alter_coltype where dt is not null;
-explain extended select key, value, dt, ts from alter_coltype where dt is not null;
+-- select key, value, dt, ts from alter_coltype where dt is not null;
+-- explain extended select key, value, dt, ts from alter_coltype where dt is not null;
 
-select count(*) from alter_coltype where ts = 3.0;
+-- select count(*) from alter_coltype where ts = 3.0;
 
 -- make sure the partition predicate still works. 
-select count(*) from alter_coltype where dt = '100x' or dt = '10';
-explain extended select count(*) from alter_coltype where dt = '100x' or dt = '10';
+-- select count(*) from alter_coltype where dt = '100x' or dt = '10';
+-- explain extended select count(*) from alter_coltype where dt = '100x' or dt = '10';
 
-desc alter_coltype;
-desc alter_coltype partition (dt='100x', ts='6:30pm');
-desc alter_coltype partition (dt='100x', ts=3.0);
-desc alter_coltype partition (dt=10, ts=3.0);
+-- desc alter_coltype;
+-- desc alter_coltype partition (dt='100x', ts='6:30pm');
+-- desc alter_coltype partition (dt='100x', ts=3.0);
+-- desc alter_coltype partition (dt=10, ts=3.0);
 
 drop table alter_coltype;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_partition_protect_mode.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_partition_protect_mode.q
index 7bcb9f071c415..7a1f3dd51d617 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_partition_protect_mode.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_partition_protect_mode.q
@@ -2,10 +2,10 @@
 create table if not exists alter_part_protect_mode(key string, value string ) partitioned by (year string, month string) stored as textfile ;
 
 -- Load data
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1996', month='10');
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1996', month='12');
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1995', month='09');
-load data local inpath '../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1994', month='07');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1996', month='10');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1996', month='12');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1995', month='09');
+load data local inpath '../../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1994', month='07');
 
 -- offline
 alter table alter_part_protect_mode partition (year='1996') disable offline;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_rename_partition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_rename_partition.q
index d498cd52a5ffb..8ebbe98824dec 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_rename_partition.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_rename_partition.q
@@ -4,7 +4,7 @@ DROP TABLE alter_rename_partition;
 SHOW TABLES;
 
 create table alter_rename_partition_src ( col1 string ) stored as textfile ;
-load data local inpath '../data/files/test.dat' overwrite into table alter_rename_partition_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table alter_rename_partition_src ;
 
 create table alter_rename_partition ( col1 string ) partitioned by (pcol1 string , pcol2 string) stored as sequencefile;
 
@@ -28,7 +28,7 @@ USE alter_rename_partition_db;
 SHOW TABLES;
 
 CREATE TABLE alter_rename_partition_src (col1 STRING) STORED AS TEXTFILE ;
-LOAD DATA LOCAL INPATH '../data/files/test.dat' OVERWRITE INTO TABLE alter_rename_partition_src ;
+LOAD DATA LOCAL INPATH '../../data/files/test.dat' OVERWRITE INTO TABLE alter_rename_partition_src ;
 
 CREATE TABLE alter_rename_partition (col1 STRING) PARTITIONED BY (pcol1 STRING, pcol2 STRING) STORED AS SEQUENCEFILE;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_varchar2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_varchar2.q
index 5a481e7f8ebfe..b870108bddd2b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_varchar2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/alter_varchar2.q
@@ -7,7 +7,7 @@ create table alter_varchar2 (
 ) partitioned by (hr int);
 
 insert overwrite table alter_varchar2 partition (hr=1)
-  select value from src limit 1;
+  select value from src tablesample (1 rows);
 
 select c1, length(c1) from alter_varchar2;
 
@@ -16,7 +16,9 @@ alter table alter_varchar2 change column c1 c1 varchar(10);
 select hr, c1, length(c1) from alter_varchar2 where hr = 1;
 
 insert overwrite table alter_varchar2 partition (hr=2)
-  select key from src limit 1;
+  select key from src tablesample (1 rows);
+
+set hive.fetch.task.conversion=more;
 
 select hr, c1, length(c1) from alter_varchar2 where hr = 1;
 select hr, c1, length(c1) from alter_varchar2 where hr = 2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ambiguous_col.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ambiguous_col.q
index e7053c1c8eb55..5ccd2c8c62dcf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ambiguous_col.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ambiguous_col.q
@@ -1,3 +1,4 @@
+set hive.support.quoted.identifiers=none;
 -- TOK_ALLCOLREF
 explain select * from (select a.key, a.* from (select * from src) a join (select * from src1) b on (a.key = b.key)) t;
 -- DOT
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_filter.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_filter.q
new file mode 100644
index 0000000000000..ec973e15969d4
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_filter.q
@@ -0,0 +1,76 @@
+set hive.stats.fetch.column.stats=true;
+
+create table if not exists loc_staging (
+  state string,
+  locid int,
+  zip bigint,
+  year int
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table loc_orc like loc_staging;
+alter table loc_orc set fileformat orc;
+
+load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging;
+
+insert overwrite table loc_orc select * from loc_staging;
+
+-- numRows: 8 rawDataSize: 796
+explain extended select * from loc_orc;
+
+-- column stats are not COMPLETE, so stats are not updated
+-- numRows: 8 rawDataSize: 796
+explain extended select * from loc_orc where state='OH';
+
+analyze table loc_orc compute statistics for columns state,locid,zip,year;
+
+-- state column has 5 distincts. numRows/countDistincts
+-- numRows: 1 rawDataSize: 102
+explain extended select * from loc_orc where state='OH';
+
+-- not equals comparison shouldn't affect number of rows
+-- numRows: 8 rawDataSize: 804
+explain extended select * from loc_orc where state!='OH';
+explain extended select * from loc_orc where state<>'OH';
+
+-- nulls are treated as constant equality comparison
+-- numRows: 1 rawDataSize: 102
+explain extended select * from loc_orc where zip is null;
+-- numRows: 1 rawDataSize: 102
+explain extended select * from loc_orc where !(zip is not null);
+
+-- not nulls are treated as inverse of nulls
+-- numRows: 7 rawDataSize: 702
+explain extended select * from loc_orc where zip is not null;
+-- numRows: 7 rawDataSize: 702
+explain extended select * from loc_orc where !(zip is null);
+
+-- NOT evaluation. true will pass all rows, false will not pass any rows
+-- numRows: 8 rawDataSize: 804
+explain extended select * from loc_orc where !false;
+-- numRows: 0 rawDataSize: 0
+explain extended select * from loc_orc where !true;
+
+-- OR evaluation. 1 row for OH and 1 row for CA
+-- numRows: 2 rawDataSize: 204
+explain extended select * from loc_orc where state='OH' or state='CA';
+
+-- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2
+-- numRows: 2 rawDataSize: 204
+explain extended select * from loc_orc where year=2001 and year is null;
+-- numRows: 1 rawDataSize: 102
+explain extended select * from loc_orc where year=2001 and state='OH' and state='FL';
+
+-- AND and OR together. left expr will yield 1 row and right will yield 1 row
+-- numRows: 3 rawDataSize: 306
+explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA');
+
+-- AND and OR together. left expr will yield 8 rows and right will yield 1 row
+-- numRows: 1 rawDataSize: 102
+explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA');
+
+-- all inequality conditions rows/3 is the rules
+-- numRows: 2 rawDataSize: 204
+explain extended select * from loc_orc where locid < 30;
+explain extended select * from loc_orc where locid > 30;
+explain extended select * from loc_orc where locid <= 30;
+explain extended select * from loc_orc where locid >= 30;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_groupby.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
new file mode 100644
index 0000000000000..05cb036b466a2
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
@@ -0,0 +1,69 @@
+set hive.stats.fetch.column.stats=true;
+
+create table if not exists loc_staging (
+  state string,
+  locid int,
+  zip bigint,
+  year int
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table loc_orc like loc_staging;
+alter table loc_orc set fileformat orc;
+
+load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging;
+
+insert overwrite table loc_orc select * from loc_staging;
+
+-- numRows: 8 rawDataSize: 796
+explain extended select * from loc_orc;
+
+-- partial column stats
+analyze table loc_orc compute statistics for columns state;
+
+-- inner group by: map - numRows: 8 reduce - numRows: 4
+-- outer group by: map - numRows: 4 reduce numRows: 2
+explain extended select a, c, min(b)
+from ( select state as a, locid as b, count(*) as c
+       from loc_orc
+       group by state,locid
+     ) sq1
+group by a,c;
+
+analyze table loc_orc compute statistics for columns state,locid,zip,year;
+
+-- only one distinct value in year column + 1 NULL value
+-- map-side GBY: numRows: 8 (map-side will not do any reduction)
+-- reduce-side GBY: numRows: 2
+explain extended select year from loc_orc group by year;
+
+-- map-side GBY: numRows: 8
+-- reduce-side GBY: numRows: 4
+explain extended select state,locid from loc_orc group by state,locid;
+
+-- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+explain extended select state,locid from loc_orc group by state,locid with cube;
+
+-- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+explain extended select state,locid from loc_orc group by state,locid with rollup;
+
+-- map-side GBY numRows: 8 reduce-side GBY numRows: 4
+explain extended select state,locid from loc_orc group by state,locid grouping sets((state));
+
+-- map-side GBY numRows: 16 reduce-side GBY numRows: 8
+explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid));
+
+-- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),());
+
+-- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),());
+
+set hive.stats.map.parallelism=10;
+
+-- map-side GBY: numRows: 80 (map-side will not do any reduction)
+-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2)
+explain extended select year from loc_orc group by year;
+
+-- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
+explain extended select state,locid from loc_orc group by state,locid with cube;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_join.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_join.q
new file mode 100644
index 0000000000000..965b0b7ed0a3e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_join.q
@@ -0,0 +1,81 @@
+set hive.stats.fetch.column.stats=true;
+
+create table if not exists emp_staging (
+  lastname string,
+  deptid int
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table if not exists dept_staging (
+  deptid int,
+  deptname string
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table if not exists loc_staging (
+  state string,
+  locid int,
+  zip bigint,
+  year int
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table if not exists emp_orc like emp_staging;
+alter table emp_orc set fileformat orc;
+
+create table if not exists dept_orc like dept_staging;
+alter table dept_orc set fileformat orc;
+
+create table loc_orc like loc_staging;
+alter table loc_orc set fileformat orc;
+
+LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging;
+LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging;
+LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging;
+
+insert overwrite table emp_orc select * from emp_staging;
+insert overwrite table dept_orc select * from dept_staging;
+insert overwrite table loc_orc select * from loc_staging;
+
+analyze table emp_orc compute statistics for columns lastname,deptid;
+analyze table dept_orc compute statistics for columns deptname,deptid;
+analyze table loc_orc compute statistics for columns state,locid,zip,year;
+
+-- number of rows
+-- emp_orc  - 6
+-- dept_orc - 4
+-- loc_orc  - 8
+
+-- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows)
+-- emp_orc.deptid - 3
+-- emp_orc.lastname - 7
+-- dept_orc.deptid - 6
+-- dept_orc.deptname - 5
+-- loc_orc.locid - 6
+-- loc_orc.state - 7
+
+-- Expected output rows: 4
+-- Reason: #rows = (6*4)/max(3,6)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid);
+
+-- 3 way join
+-- Expected output rows: 4
+-- Reason: #rows = (6*4*6)/max(3,6)*max(6,3)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid);
+
+-- Expected output rows: 5
+-- Reason: #rows = (6*4*8)/max(3,6)*max(6,6)
+explain extended select * from emp_orc e join dept_orc d  on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid);
+
+-- join keys of different types
+-- Expected output rows: 4
+-- Reason: #rows = (6*4*8)/max(3,6)*max(6,7)
+explain extended select * from emp_orc e join dept_orc d  on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state);
+
+-- multi-attribute join
+-- Expected output rows: 0
+-- Reason: #rows = (6*4)/max(3,6)*max(7,5)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname);
+
+-- 3 way and multi-attribute join
+-- Expected output rows: 0
+-- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7)
+explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state);
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_limit.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_limit.q
new file mode 100644
index 0000000000000..0a9f880b5f31e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_limit.q
@@ -0,0 +1,30 @@
+set hive.stats.fetch.column.stats=true;
+
+create table if not exists loc_staging (
+  state string,
+  locid int,
+  zip bigint,
+  year int
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table loc_orc like loc_staging;
+alter table loc_orc set fileformat orc;
+
+load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging;
+
+insert overwrite table loc_orc select * from loc_staging;
+
+analyze table loc_orc compute statistics for columns state, locid, zip, year;
+
+-- numRows: 8 rawDataSize: 796
+explain extended select * from loc_orc;
+
+-- numRows: 4 rawDataSize: 396
+explain extended select * from loc_orc limit 4;
+
+-- greater than the available number of rows
+-- numRows: 8 rawDataSize: 796
+explain extended select * from loc_orc limit 16;
+
+-- numRows: 0 rawDataSize: 0
+explain extended select * from loc_orc limit 0;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_part.q
new file mode 100644
index 0000000000000..839c7d84962fc
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_part.q
@@ -0,0 +1,85 @@
+set hive.stats.fetch.column.stats=true;
+set hive.stats.autogather=false;
+set hive.exec.dynamic.partition=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+create table if not exists loc_staging (
+  state string,
+  locid int,
+  zip bigint,
+  year string
+) row format delimited fields terminated by '|' stored as textfile;
+
+LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging;
+
+create table if not exists loc_orc (
+  state string,
+  locid int,
+  zip bigint
+) partitioned by(year string) stored as orc;
+
+-- basicStatState: NONE colStatState: NONE
+explain extended select * from loc_orc;
+
+insert overwrite table loc_orc partition(year) select * from loc_staging;
+
+-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL
+
+-- basicStatState: PARTIAL colStatState: NONE
+explain extended select * from loc_orc;
+
+-- partition level analyze statistics for specific parition
+analyze table loc_orc partition(year='2001') compute statistics;
+
+-- basicStatState: PARTIAL colStatState: NONE
+explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
+
+-- basicStatState: PARTIAL colStatState: NONE
+explain extended select * from loc_orc;
+
+-- basicStatState: COMPLETE colStatState: NONE
+explain extended select * from loc_orc where year='2001';
+
+-- partition level analyze statistics for all partitions
+analyze table loc_orc partition(year) compute statistics;
+
+-- basicStatState: COMPLETE colStatState: NONE
+explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
+
+-- basicStatState: COMPLETE colStatState: NONE
+explain extended select * from loc_orc;
+
+-- basicStatState: COMPLETE colStatState: NONE
+explain extended select * from loc_orc where year='2001' or year='__HIVE_DEFAULT_PARTITION__';
+
+-- both partitions will be pruned
+-- basicStatState: NONE colStatState: NONE
+explain extended select * from loc_orc where year='2001' and year='__HIVE_DEFAULT_PARTITION__';
+
+-- partition level partial column statistics
+analyze table loc_orc partition(year='2001') compute statistics for columns state,locid;
+
+-- basicStatState: COMPLETE colStatState: NONE
+explain extended select zip from loc_orc;
+
+-- basicStatState: COMPLETE colStatState: PARTIAL
+explain extended select state from loc_orc;
+
+-- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL
+-- basicStatState: COMPLETE colStatState: PARTIAL
+explain extended select state,locid from loc_orc;
+
+-- basicStatState: COMPLETE colStatState: COMPLETE
+explain extended select state,locid from loc_orc where year='2001';
+
+-- basicStatState: COMPLETE colStatState: NONE
+explain extended select state,locid from loc_orc where year!='2001';
+
+-- basicStatState: COMPLETE colStatState: PARTIAL
+explain extended select * from loc_orc;
+
+-- This is to test filter expression evaluation on partition column
+-- numRows: 2 dataSize: 8 basicStatState: COMPLETE colStatState: COMPLETE
+explain extended select locid from loc_orc where locid>0 and year='2001';
+explain extended select locid,year from loc_orc where locid>0 and year='2001';
+explain extended select * from (select locid,year from loc_orc) test where locid>0 and year='2001';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_select.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_select.q
new file mode 100644
index 0000000000000..5fc3f64b90180
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_select.q
@@ -0,0 +1,143 @@
+set hive.stats.fetch.column.stats=true;
+
+create table if not exists alltypes (
+ bo1 boolean,
+ ti1 tinyint,
+ si1 smallint,
+ i1 int,
+ bi1 bigint,
+ f1 float,
+ d1 double,
+ de1 decimal,
+ ts1 timestamp,
+ da1 timestamp,
+ s1 string,
+ vc1 varchar(5),
+ m1 map<string, string>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:string>
+) row format delimited fields terminated by '|'
+collection items terminated by ','
+map keys terminated by ':' stored as textfile;
+
+create table alltypes_orc like alltypes;
+alter table alltypes_orc set fileformat orc;
+
+load data local inpath '../../data/files/alltypes.txt' overwrite into table alltypes;
+
+insert overwrite table alltypes_orc select * from alltypes;
+
+-- basicStatState: COMPLETE colStatState: NONE numRows: 2 rawDataSize: 1514
+explain extended select * from alltypes_orc;
+
+-- statistics for complex types are not supported yet
+analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1, s1, vc1;
+
+-- numRows: 2 rawDataSize: 1514
+explain extended select * from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 8
+explain extended select bo1 from alltypes_orc;
+
+-- col alias renaming
+-- numRows: 2 rawDataSize: 8
+explain extended select i1 as int1 from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 174
+explain extended select s1 from alltypes_orc;
+
+-- column statistics for complex types unsupported and so statistics will not be updated
+-- numRows: 2 rawDataSize: 1514
+explain extended select m1 from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 246
+explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 0
+explain extended select null from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 8
+explain extended select 11 from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 16
+explain extended select 11L from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 16
+explain extended select 11.0 from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 178
+explain extended select "hello" from alltypes_orc;
+explain extended select cast("hello" as char(5)) from alltypes_orc;
+explain extended select cast("hello" as varchar(5)) from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 96
+explain extended select unbase64("0xe23") from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 16
+explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 80
+explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 112
+explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 224
+explain extended select cast("58.174" as DECIMAL) from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 112
+explain extended select array(1,2,3) from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 1508
+explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 112
+explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc;
+
+-- numRows: 2 rawDataSize: 250
+explain extended select CREATE_UNION(0, "hello") from alltypes_orc;
+
+-- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows
+-- numRows: 1 rawDataSize: 8
+explain extended select count(*) from alltypes_orc;
+
+-- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows
+-- numRows: 1 rawDataSize: 8
+explain extended select count(1) from alltypes_orc;
+
+-- column statistics for complex column types will be missing. data size will be calculated from available column statistics
+-- numRows: 2 rawDataSize: 254
+explain extended select *,11 from alltypes_orc;
+
+-- subquery selects
+-- inner select - numRows: 2 rawDataSize: 8
+-- outer select - numRows: 2 rawDataSize: 8
+explain extended select i1 from (select i1 from alltypes_orc limit 10) temp;
+
+-- inner select - numRows: 2 rawDataSize: 16
+-- outer select - numRows: 2 rawDataSize: 8
+explain extended select i1 from (select i1,11 from alltypes_orc limit 10) temp;
+
+-- inner select - numRows: 2 rawDataSize: 16
+-- outer select - numRows: 2 rawDataSize: 186
+explain extended select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp;
+
+-- inner select - numRows: 2 rawDataSize: 24
+-- outer select - numRows: 2 rawDataSize: 16
+explain extended select x from (select i1,11.0 as x from alltypes_orc limit 10) temp;
+
+-- inner select - numRows: 2 rawDataSize: 104
+-- outer select - numRows: 2 rawDataSize: 186
+explain extended select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp;
+
+-- inner select -  numRows: 2 rawDataSize: 186
+-- middle select - numRows: 2 rawDataSize: 178
+-- outer select -  numRows: 2 rawDataSize: 194
+explain extended select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2;
+
+-- This test is for FILTER operator where filter expression is a boolean column
+-- numRows: 2 rawDataSize: 8
+explain extended select bo1 from alltypes_orc where bo1;
+
+-- numRows: 0 rawDataSize: 0
+explain extended select bo1 from alltypes_orc where !bo1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_table.q
new file mode 100644
index 0000000000000..4140fe610d7cd
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_table.q
@@ -0,0 +1,53 @@
+set hive.stats.fetch.column.stats=true;
+set hive.stats.autogather=false;
+
+create table if not exists emp_staging (
+  lastname string,
+  deptid int
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table if not exists emp_orc like emp_staging;
+alter table emp_orc set fileformat orc;
+
+-- basicStatState: NONE colStatState: NONE
+explain extended select * from emp_orc;
+
+LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging;
+
+insert overwrite table emp_orc select * from emp_staging;
+
+-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL
+
+-- basicStatState: PARTIAL colStatState: NONE
+explain extended select * from emp_orc;
+
+-- table level analyze statistics
+analyze table emp_orc compute statistics;
+
+-- basicStatState: COMPLETE colStatState: NONE
+explain extended select * from emp_orc;
+
+-- column level partial statistics
+analyze table emp_orc compute statistics for columns deptid;
+
+-- basicStatState: COMPLETE colStatState: PARTIAL
+explain extended select * from emp_orc;
+
+-- all selected columns have statistics
+-- basicStatState: COMPLETE colStatState: COMPLETE
+explain extended select deptid from emp_orc;
+
+-- column level complete statistics
+analyze table emp_orc compute statistics for columns lastname,deptid;
+
+-- basicStatState: COMPLETE colStatState: COMPLETE
+explain extended select * from emp_orc;
+
+-- basicStatState: COMPLETE colStatState: COMPLETE
+explain extended select lastname from emp_orc;
+
+-- basicStatState: COMPLETE colStatState: COMPLETE
+explain extended select deptid from emp_orc;
+
+-- basicStatState: COMPLETE colStatState: COMPLETE
+explain extended select lastname,deptid from emp_orc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_union.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_union.q
new file mode 100644
index 0000000000000..586d9e1e2c023
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/annotate_stats_union.q
@@ -0,0 +1,55 @@
+set hive.stats.fetch.column.stats=true;
+
+create table if not exists loc_staging (
+  state string,
+  locid int,
+  zip bigint,
+  year int
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table loc_orc like loc_staging;
+alter table loc_orc set fileformat orc;
+
+load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging;
+
+insert overwrite table loc_orc select * from loc_staging;
+
+analyze table loc_orc compute statistics for columns state,locid,zip,year;
+
+-- numRows: 8 rawDataSize: 688
+explain extended select state from loc_orc;
+
+-- numRows: 16 rawDataSize: 1376
+explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp;
+
+-- numRows: 8 rawDataSize: 796
+explain extended select * from loc_orc;
+
+-- numRows: 16 rawDataSize: 1592
+explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp;
+
+create database test;
+use test;
+create table if not exists loc_staging (
+  state string,
+  locid int,
+  zip bigint,
+  year int
+) row format delimited fields terminated by '|' stored as textfile;
+
+create table loc_orc like loc_staging;
+alter table loc_orc set fileformat orc;
+
+load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging;
+
+insert overwrite table loc_orc select * from loc_staging;
+
+analyze table loc_staging compute statistics;
+analyze table loc_staging compute statistics for columns state,locid,zip,year;
+analyze table loc_orc compute statistics for columns state,locid,zip,year;
+
+-- numRows: 16 rawDataSize: 1376
+explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp;
+
+-- numRows: 16 rawDataSize: 1376
+explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ansi_sql_arithmetic.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ansi_sql_arithmetic.q
new file mode 100644
index 0000000000000..3788301ebb3d2
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ansi_sql_arithmetic.q
@@ -0,0 +1,13 @@
+
+set hive.compat=latest;
+
+-- With ansi sql arithmetic enabled, int / int => exact numeric type
+explain select cast(key as int) / cast(key as int) from src limit 1;
+select cast(key as int) / cast(key as int) from src limit 1;
+
+
+set hive.compat=0.12;
+
+-- With ansi sql arithmetic disabled, int / int => double
+explain select cast(key as int) / cast(key as int) from src limit 1;
+select cast(key as int) / cast(key as int) from src limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/archive_corrupt.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/archive_corrupt.q
index b83eab5d38137..cc9801d8871b8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/archive_corrupt.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/archive_corrupt.q
@@ -14,7 +14,7 @@ create table tstsrcpart like srcpart;
 -- to be thrown during the LOAD step. This behavior is now tested in
 -- clientnegative/archive_corrupt.q
 
-load data local inpath '../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11');
+load data local inpath '../../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11');
 
 insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12')
 select key, value from srcpart where ds='2008-04-08' and hr='12';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q
index ddc06a99bb5bc..50c0faa5e4108 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q
@@ -1,7 +1,7 @@
 set hive.archive.enabled = true;
 set hive.enforce.bucketing = true;
 
--- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S)
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20)
 
 drop table tstsrc;
 drop table tstsrcpart;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_1_sql_std.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_1_sql_std.q
new file mode 100644
index 0000000000000..79ae17ad5da2c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_1_sql_std.q
@@ -0,0 +1,36 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+
+create table src_autho_test (key STRING, value STRING) ;
+
+set hive.security.authorization.enabled=true;
+set  role ADMIN; 
+--table grant to user
+
+grant select on table src_autho_test to user user_sauth;
+
+show grant user user_sauth on table src_autho_test;
+
+
+revoke select on table src_autho_test from user user_sauth;
+show grant user user_sauth on table src_autho_test;
+
+--role
+create role src_role;
+grant role src_role to user user_sauth;
+show role grant user user_sauth;
+
+--table grant to role
+
+grant select on table src_autho_test to role src_role;
+
+show grant role src_role on table src_autho_test;
+revoke select on table src_autho_test from role src_role;
+
+-- drop role
+drop role src_role;
+
+set hive.security.authorization.enabled=false;
+drop table src_autho_test;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_2.q
index 4fc79b9ed294a..3353c534e1793 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_2.q
@@ -6,7 +6,7 @@ ALTER TABLE authorization_part SET TBLPROPERTIES ("PARTITION_LEVEL_PRIVILEGE"="T
 set hive.security.authorization.enabled=true;
 
 -- column grant to user
-grant Create on table authorization_part to user hive_test_user;
+grant Create on  authorization_part to user hive_test_user;
 grant Update on table authorization_part to user hive_test_user;
 grant Drop on table authorization_part to user hive_test_user;
 grant select on table src_auth_tmp to user hive_test_user;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_9.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_9.q
new file mode 100644
index 0000000000000..1abe659fa4476
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_9.q
@@ -0,0 +1,17 @@
+-- SORT_BEFORE_DIFF
+
+create table dummy (key string, value string);
+
+grant select on database default to user hive_test_user;
+grant select on table dummy to user hive_test_user;
+grant select (key, value) on table dummy to user hive_test_user;
+
+show grant user hive_test_user on database default;
+show grant user hive_test_user on table dummy;
+show grant user hive_test_user on all;
+
+grant select on database default to user hive_test_user2;
+grant select on table dummy to user hive_test_user2;
+grant select (key, value) on table dummy to user hive_test_user2;
+
+show grant on all;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_admin_almighty1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_admin_almighty1.q
new file mode 100644
index 0000000000000..45c4a7dc85b72
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_admin_almighty1.q
@@ -0,0 +1,17 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_test_user;
+
+-- actions from admin should work as if admin has all privileges
+
+create table t1(i int);
+set user.name=hive_admin_user;
+
+show current roles;
+set role ADMIN;
+show current roles;
+select * from t1;
+grant all on table t1 to user user1;
+show grant user user1 on table t1;
+drop table t1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_func1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_func1.q
new file mode 100644
index 0000000000000..65a7b339c2b76
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_func1.q
@@ -0,0 +1,14 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=hive_admin_user;
+
+-- admin required for create function
+set role ADMIN;
+
+create temporary function temp_fn as 'org.apache.hadoop.hive.ql.udf.UDFAscii';
+create function perm_fn as 'org.apache.hadoop.hive.ql.udf.UDFAscii';
+
+drop temporary function temp_fn;
+drop function perm_fn;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_macro1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_macro1.q
new file mode 100644
index 0000000000000..fb60500b899ec
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_macro1.q
@@ -0,0 +1,12 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=hive_admin_user;
+
+-- admin required for create macro
+set role ADMIN;
+
+create temporary macro mymacro1(x double) x * x;
+
+drop temporary macro mymacro1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_table_owner_privs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_table_owner_privs.q
new file mode 100644
index 0000000000000..17f4861cd20f8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_create_table_owner_privs.q
@@ -0,0 +1,10 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+
+create table create_table_creator_priv_test(i int);
+
+-- all privileges should have been set for user
+
+show grant user user1 on table create_table_creator_priv_test;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_grant_public_role.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_grant_public_role.q
new file mode 100644
index 0000000000000..8473178cd6607
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_grant_public_role.q
@@ -0,0 +1,18 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE  t_gpr1(i int);
+
+-- all privileges should have been set for user
+
+GRANT ALL ON t_gpr1 TO ROLE public;
+
+SHOW GRANT ON TABLE t_gpr1;
+
+set user.name=user2;
+SHOW CURRENT ROLES;
+-- user2 should be able to do a describe table, as pubic is in the current roles
+DESC t_gpr1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_grant_table_priv.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_grant_table_priv.q
new file mode 100644
index 0000000000000..02d364edb488d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_grant_table_priv.q
@@ -0,0 +1,43 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE  table_priv1(i int);
+
+-- all privileges should have been set for user
+
+-- grant insert privilege to another user
+GRANT INSERT ON table_priv1 TO USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv1;
+
+-- grant select privilege to another user with grant
+GRANT SELECT ON table_priv1 TO USER user2 with grant option;
+SHOW GRANT USER user2 ON TABLE table_priv1;
+
+set user.name=user2;
+-- change to other user - user2
+-- grant permissions to another user as user2
+GRANT SELECT ON table_priv1 TO USER user3 with grant option;
+SHOW GRANT USER user3 ON TABLE table_priv1;
+
+set user.name=user3;
+-- change to other user - user3
+-- grant permissions to another user as user3
+GRANT SELECT ON table_priv1 TO USER user4 with grant option;
+SHOW GRANT USER user4 ON TABLE table_priv1;
+
+set user.name=user1;
+-- switched back to table owner
+
+-- grant all with grant to user22
+GRANT ALL ON table_priv1 TO USER user22 with grant option;
+SHOW GRANT USER user22 ON TABLE table_priv1;
+
+set user.name=user22;
+
+-- grant all without grant to user33
+GRANT ALL ON table_priv1 TO USER user33 with grant option;
+SHOW GRANT USER user33 ON TABLE table_priv1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_index.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_index.q
new file mode 100644
index 0000000000000..1f177ffd1fadb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_index.q
@@ -0,0 +1,12 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.stats.dbclass=fs;
+set hive.security.authorization.enabled=true;
+create table t1 (a int);
+create index t1_index on table t1(a) as 'COMPACT' WITH DEFERRED REBUILD;
+desc formatted default__t1_t1_index__;
+alter index t1_index on t1 rebuild;
+
+drop table t1;
+
+set hive.security.authorization.enabled=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_owner_actions.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_owner_actions.q
new file mode 100644
index 0000000000000..85d8b1114b010
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_owner_actions.q
@@ -0,0 +1,16 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- actions that require user to be table owner
+create table t1(i int);
+
+ALTER TABLE t1 SET SERDEPROPERTIES ('field.delim' = ',');
+drop table t1;
+
+create table t1(i int);
+create view vt1 as select * from t1;
+
+drop view vt1;
+alter table t1 rename to tnew1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_owner_actions_db.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_owner_actions_db.q
new file mode 100644
index 0000000000000..36ab2600dcd2d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_owner_actions_db.q
@@ -0,0 +1,21 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=hive_admin_user;
+
+set role admin;
+-- create role, db, make role the owner of db
+create role testrole;
+grant role testrole to user hrt_1;
+create database testdb;
+alter database testdb set owner role testrole;
+desc database testdb;
+
+-- actions that require user to be db owner 
+-- create table
+use testdb;
+create table foobar (foo string, bar string);
+
+-- drop db
+drop database testdb cascade;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_parts.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_parts.q
new file mode 100644
index 0000000000000..bee091b1d349c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_parts.q
@@ -0,0 +1,19 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/a_uri_add_part1;
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/a_uri_add_part2;
+
+
+
+
+-- check add partition without insert privilege
+create table tpart(i int, j int) partitioned by (k string);
+
+alter table tpart add partition (k = '1') location '${system:test.tmp.dir}/a_uri_add_part1/';
+alter table tpart add partition (k = '2') location '${system:test.tmp.dir}/a_uri_add_part2/';
+
+select count(*) from tpart;
+
+analyze table tpart partition (k) compute statistics;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_revoke_table_priv.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_revoke_table_priv.q
new file mode 100644
index 0000000000000..ccda3b5157979
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_revoke_table_priv.q
@@ -0,0 +1,61 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set user.name=user1;
+-- current user has been set (comment line before the set cmd is resulting in parse error!!)
+
+CREATE TABLE table_priv_rev(i int);
+
+-- grant insert privilege to user2
+GRANT INSERT ON table_priv_rev TO USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+SHOW GRANT USER user2 ON ALL;
+
+-- revoke insert privilege from user2
+REVOKE INSERT ON TABLE table_priv_rev FROM USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+
+-- grant all privileges one at a time --
+-- grant insert privilege to user2
+GRANT INSERT ON table_priv_rev TO USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+SHOW GRANT USER user2 ON ALL;
+
+-- grant select privilege to user2, with grant option
+GRANT SELECT ON table_priv_rev TO USER user2 WITH GRANT OPTION;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+
+-- grant update privilege to user2
+GRANT UPDATE ON table_priv_rev TO USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+
+-- grant delete privilege to user2
+GRANT DELETE ON table_priv_rev TO USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+
+
+-- start revoking --
+-- revoke update privilege from user2
+REVOKE UPDATE ON TABLE table_priv_rev FROM USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+SHOW GRANT USER user2 ON ALL;
+
+-- revoke DELETE privilege from user2
+REVOKE DELETE ON TABLE table_priv_rev FROM USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+
+-- revoke insert privilege from user2
+REVOKE INSERT ON TABLE table_priv_rev FROM USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+
+-- revoke select privilege from user2
+REVOKE SELECT ON TABLE table_priv_rev FROM USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+SHOW GRANT USER user2 ON ALL;
+
+-- grant all followed by revoke all
+GRANT ALL ON table_priv_rev TO USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
+
+REVOKE ALL ON TABLE table_priv_rev FROM USER user2;
+SHOW GRANT USER user2 ON TABLE table_priv_rev;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_role_grant1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_role_grant1.q
new file mode 100644
index 0000000000000..f89d0dc985688
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_role_grant1.q
@@ -0,0 +1,38 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+
+-- enable sql standard authorization
+-- role granting without role keyword
+set role ADMIN;
+create role src_role2;
+grant  src_role2 to user user2 ;
+show role grant user user2;
+show roles;
+
+-- revoke role without role keyword
+revoke src_role2 from user user2;
+show role grant user user2;
+show roles;
+
+----------------------------------------
+-- role granting without role keyword, with admin option (syntax check)
+----------------------------------------
+
+create role src_role_wadmin;
+grant  src_role_wadmin to user user2 with admin option;
+show role grant user user2;
+
+-- revoke role without role keyword
+revoke src_role_wadmin from user user2;
+show role grant user user2;
+
+
+
+-- drop roles
+show roles;
+drop role src_role2;
+show roles;
+drop role src_role_wadmin;
+show roles;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_role_grant2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_role_grant2.q
new file mode 100644
index 0000000000000..984d7ed1d091a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_role_grant2.q
@@ -0,0 +1,34 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
+set hive.cli.print.header=true;
+set user.name=hive_admin_user;
+set role ADMIN;
+
+----------------------------------------
+-- role granting with admin option
+----------------------------------------
+
+create role src_role_wadmin;
+grant  src_role_wadmin to user user2 with admin option;
+show role grant user user2;
+show principals src_role_wadmin;
+
+set user.name=user2;
+set role src_role_wadmin;
+grant  src_role_wadmin to user user3;
+show role grant user user3;
+
+set user.name=hive_admin_user;
+set role ADMIN;
+show principals src_role_wadmin;
+
+set user.name=user2;
+set role src_role_wadmin;
+revoke src_role_wadmin from user user3;
+show role grant user user3;
+
+set user.name=hive_admin_user;
+set role ADMIN;
+show principals src_role_wadmin;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_set_show_current_role.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_set_show_current_role.q
new file mode 100644
index 0000000000000..6b5af6e94e16a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_set_show_current_role.q
@@ -0,0 +1,21 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set user.name=hive_admin_user;
+set role ADMIN;
+show current roles;
+
+create role r1;
+grant role r1 to user hive_admin_user;
+set role r1;
+show current roles;
+
+set role PUBLIC;
+show current roles;
+
+set role ALL;
+show current roles;
+
+set role ADMIN;
+drop role r1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_view_sqlstd.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_view_sqlstd.q
new file mode 100644
index 0000000000000..bd7bbfedf83e8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/authorization_view_sqlstd.q
@@ -0,0 +1,66 @@
+set hive.users.in.admin.role=hive_admin_user;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+set hive.security.authorization.enabled=true;
+set user.name=user1;
+
+-- Test view authorization , and 'show grant' variants
+
+create table t1(i int, j int, k int);
+show grant on table t1;
+
+-- protecting certain columns
+create view vt1 as select i,k from t1;
+
+-- protecting certain rows
+create view vt2 as select * from t1 where i > 1;
+
+show grant user user1 on all;
+
+--view grant to user
+-- try with and without table keyword
+
+grant select on vt1 to user user2;
+grant insert on table vt1 to user user3;
+
+show grant user user2 on table vt1;
+show grant user user3 on table vt1;
+
+
+set user.name=user2;
+select * from vt1;
+
+set user.name=user1;
+
+grant all on table vt2 to user user2;
+show grant user user2 on table vt2;
+show grant user user2 on all;
+
+revoke all on vt2 from user user2;
+show grant user user2 on table vt2;
+
+show grant on table vt2;
+
+
+revoke select on table vt1 from user user2;
+show grant user user2 on table vt1;
+
+show grant user user2 on all;
+
+-- grant privileges on roles for view, after next statement
+show grant user user3 on table vt1;
+
+set user.name=hive_admin_user;
+show current roles;
+set role ADMIN;
+create role role_v;
+grant  role_v to user user4 ;
+show role grant user user4;
+show roles;
+
+grant all on table vt2 to role role_v;
+show grant role role_v on table vt2;
+
+revoke delete on table vt2 from role role_v;
+show grant role role_v on table vt2;
+show grant on table vt2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join25.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join25.q
index eaf7489a17025..b8734abfd12ac 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join25.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join25.q
@@ -1,3 +1,5 @@
+set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook;
+
 set hive.auto.convert.join = true;
 set hive.mapjoin.localtask.max.memory.usage = 0.0001;
 set hive.mapjoin.check.memory.rows = 2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join32.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join32.q
index 289bfbc6da917..e7846eeecf5c3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join32.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join32.q
@@ -32,10 +32,10 @@ from studenttab10k_smb s join votertab10k_smb v
 on (s.name = v.name)
 group by s.name;
 
-load data local inpath '../data/files/empty1.txt' into table studenttab10k_smb;
-load data local inpath '../data/files/empty2.txt' into table studenttab10k_smb;
-load data local inpath '../data/files/empty1.txt' into table votertab10k_smb;
-load data local inpath '../data/files/empty2.txt' into table votertab10k_smb;
+load data local inpath '../../data/files/empty1.txt' into table studenttab10k_smb;
+load data local inpath '../../data/files/empty2.txt' into table studenttab10k_smb;
+load data local inpath '../../data/files/empty1.txt' into table votertab10k_smb;
+load data local inpath '../../data/files/empty2.txt' into table votertab10k_smb;
 
 explain select s.name, count(distinct registration)
 from studenttab10k_smb s join votertab10k_smb v
@@ -51,10 +51,10 @@ group by s.name;
 create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets;
 create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets;
 
-load data local inpath '../data/files/empty1.txt' into table studenttab10k_part partition (p='foo');
-load data local inpath '../data/files/empty2.txt' into table studenttab10k_part partition (p='foo');
-load data local inpath '../data/files/empty1.txt' into table votertab10k_part partition (p='foo');
-load data local inpath '../data/files/empty2.txt' into table votertab10k_part partition (p='foo');
+load data local inpath '../../data/files/empty1.txt' into table studenttab10k_part partition (p='foo');
+load data local inpath '../../data/files/empty2.txt' into table studenttab10k_part partition (p='foo');
+load data local inpath '../../data/files/empty1.txt' into table votertab10k_part partition (p='foo');
+load data local inpath '../../data/files/empty2.txt' into table votertab10k_part partition (p='foo');
 
 explain select s.name, count(distinct registration)
 from studenttab10k_part s join votertab10k_part v
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_filters.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_filters.q
index 458504cdc3d04..eefd2111c97b4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_filters.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_filters.q
@@ -1,7 +1,7 @@
 set hive.auto.convert.join = true;
 
 CREATE TABLE myinput1(key int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in3.txt' INTO TABLE myinput1;
+LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1;
 
 SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
 SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
@@ -38,10 +38,10 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN my
 
 CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; 
 CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS; 
-LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input1;
-LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input1;
-LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input2;
-LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input2;
 
 SET hive.optimize.bucketmapjoin = true;
 SET hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_nulls.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_nulls.q
index 766348d7c001e..d1b7bb40189a4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_nulls.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_nulls.q
@@ -1,7 +1,7 @@
 set hive.auto.convert.join = true;
 
 CREATE TABLE myinput1(key int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in1.txt' INTO TABLE myinput1;
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1;
 
 SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b;
 SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_reordering_values.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_reordering_values.q
index 46a4a0d5339a7..55bd975803d53 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_reordering_values.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_reordering_values.q
@@ -1,13 +1,13 @@
 -- HIVE-5056 RS has expression list for values, but it's ignored in MapJoinProcessor
 
 create table testsrc ( `key` int,`val` string);
-load data local inpath '../data/files/kv1.txt' overwrite into table testsrc;
+load data local inpath '../../data/files/kv1.txt' overwrite into table testsrc;
 drop table if exists orderpayment_small;
 create table orderpayment_small (`dealid` int,`date` string,`time` string, `cityid` int, `userid` int);
-insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from testsrc limit 1;
+insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from testsrc tablesample (1 rows);
 drop table if exists user_small;
 create table user_small( userid int);
-insert overwrite table user_small select key from testsrc limit 100;
+insert overwrite table user_small select key from testsrc tablesample (100 rows);
 
 set hive.auto.convert.join.noconditionaltask.size = 200;
 explain extended SELECT
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_without_localtask.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_without_localtask.q
new file mode 100644
index 0000000000000..f23e227f1ec90
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_join_without_localtask.q
@@ -0,0 +1,29 @@
+set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook;
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.use.nonstaged=true;
+
+set hive.auto.convert.join.noconditionaltask.size=100;
+
+explain
+select a.* from src a join src b on a.key=b.key limit 40;
+
+select a.* from src a join src b on a.key=b.key limit 40;
+
+explain
+select a.* from src a join src b on a.key=b.key join src c on a.value=c.value limit 40;
+
+select a.* from src a join src b on a.key=b.key join src c on a.value=c.value limit 40;
+
+set hive.auto.convert.join.noconditionaltask.size=100;
+
+explain
+select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 limit 40;
+
+select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 limit 40;
+
+set hive.mapjoin.localtask.max.memory.usage = 0.0001;
+set hive.mapjoin.check.memory.rows = 2;
+
+-- fallback to common join
+select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 limit 40;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q
index e76b560f41899..ddd2c1882ec4d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q
@@ -2,19 +2,19 @@
 
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) 
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
index f9fa1e4d4e1db..da2e26fde7069 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
@@ -1,19 +1,19 @@
 -- small 1 part, 2 bucket & big 2 part, 4 bucket
 
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.auto.convert.join=true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
index db53a656a850a..f434b33603603 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
@@ -2,19 +2,19 @@
 
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
@@ -23,9 +23,9 @@ set hive.optimize.bucketmapjoin.sortedmerge = true;
 
 CREATE TABLE bucket_medium (key string, value string) partitioned by (ds string)
 CLUSTERED BY (key) SORTED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08');
 
 explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key;
 select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q
new file mode 100644
index 0000000000000..7e9555d7917df
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q
@@ -0,0 +1,92 @@
+set hive.auto.convert.join=true;
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+
+CREATE TABLE stage_bucket_big
+(
+key BIGINT,
+value STRING
+)
+PARTITIONED BY (file_tag STRING);
+
+CREATE TABLE bucket_big
+(
+key BIGINT,
+value STRING
+)
+PARTITIONED BY (day STRING, pri bigint)
+clustered by (key) sorted by (key) into 12 buckets
+stored as RCFile;
+
+CREATE TABLE stage_bucket_small
+(
+key BIGINT,
+value string
+)
+PARTITIONED BY (file_tag STRING);
+
+CREATE TABLE bucket_small
+(
+key BIGINT,
+value string
+)
+PARTITIONED BY (pri bigint)
+clustered by (key) sorted by (key) into 12 buckets
+stored as RCFile;
+
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' overwrite into table stage_bucket_small partition (file_tag='1'); 
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' overwrite into table stage_bucket_small partition (file_tag='2'); 
+
+insert overwrite table bucket_small partition(pri) 
+select 
+key, 
+value, 
+file_tag as pri 
+from 
+stage_bucket_small 
+where file_tag between 1 and 2;
+
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' overwrite into table stage_bucket_big partition (file_tag='1'); 
+
+insert overwrite table bucket_big partition(day,pri) 
+select 
+key, 
+value, 
+'day1' as day, 
+1 as pri 
+from 
+stage_bucket_big 
+where 
+file_tag='1'; 
+
+select 
+a.key , 
+a.value , 
+b.value , 
+'day1' as day, 
+1 as pri 
+from 
+( 
+select 
+key, 
+value 
+from bucket_big where day='day1'
+) a 
+left outer join 
+( 
+select 
+key, 
+value
+from bucket_small 
+where pri between 1 and 2
+) b 
+on 
+(a.key = b.key) 
+; 
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
index 75339778af802..eef5483b5347d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
@@ -1,16 +1,16 @@
 -- small 1 part, 4 bucket & big 2 part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
index 39a695fdf039d..c094ecdb6be5e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
@@ -1,16 +1,16 @@
 -- small 2 part, 2 bucket & big 1 part, 4 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
index 6072272c478a3..18acfbfb76543 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
@@ -1,18 +1,18 @@
 -- small 2 part, 4 bucket & big 1 part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q
index a28ce3de5fc63..98d6df9b19591 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q
@@ -1,13 +1,13 @@
 -- small no part, 4 bucket & big no part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small;
 
 CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big;
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big;
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big;
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big;
 
 set hive.auto.convert.sortmerge.join=true;
 set hive.optimize.bucketmapjoin = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
index d62f6377218c6..e19cc317f36e3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
@@ -1,21 +1,21 @@
 -- small 2 part, 4 bucket & big 2 part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
index 6302a1be1477c..a66806f21a8a3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
@@ -1,21 +1,21 @@
 -- small 2 part, 2 bucket & big 2 part, 4 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
-load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_compression_enabled.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_compression_enabled.q
index 8367206231974..cb6f173ccfa76 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_compression_enabled.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_compression_enabled.q
@@ -35,7 +35,7 @@ TBLPROPERTIES ('avro.schema.literal'='{
   ]
 }');
 
-LOAD DATA LOCAL INPATH '../data/files/doctors.avro' INTO TABLE doctors4;
+LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4;
 
 set hive.exec.compress.output=true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_evolved_schemas.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_evolved_schemas.q
index 3fe8ff12b4506..f723cbcc608fb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_evolved_schemas.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_evolved_schemas.q
@@ -36,7 +36,7 @@ TBLPROPERTIES ('avro.schema.literal'='{
 
 DESCRIBE doctors_with_new_field;
 
-LOAD DATA LOCAL INPATH '../data/files/doctors.avro' INTO TABLE doctors_with_new_field;
+LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors_with_new_field;
 
 SELECT * FROM doctors_with_new_field ORDER BY first_name;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_joins.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_joins.q
index 25b77c06270d0..4c33a834668b0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_joins.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_joins.q
@@ -37,7 +37,7 @@ TBLPROPERTIES ('avro.schema.literal'='{
 
 DESCRIBE doctors4;
 
-LOAD DATA LOCAL INPATH '../data/files/doctors.avro' INTO TABLE doctors4;
+LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4;
 
 CREATE TABLE episodes
 ROW FORMAT
@@ -70,7 +70,7 @@ TBLPROPERTIES ('avro.schema.literal'='{
 
 DESCRIBE episodes;
 
-LOAD DATA LOCAL INPATH '../data/files/episodes.avro' INTO TABLE episodes;
+LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes;
 
 SELECT e.title, e.air_date, d.first_name, d.last_name, d.extra_field, e.air_date
 FROM doctors4 d JOIN episodes e ON (d.number=e.doctor)
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_nullable_fields.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_nullable_fields.q
index 584c6f740ba91..f90ceb96f5db9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_nullable_fields.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_nullable_fields.q
@@ -17,7 +17,7 @@ CREATE TABLE test_serializer(string1 STRING,
  ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n'
  STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/csv.txt' INTO TABLE test_serializer;
+LOAD DATA LOCAL INPATH '../../data/files/csv.txt' INTO TABLE test_serializer;
 
 CREATE TABLE as_avro
   ROW FORMAT
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_partitioned.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_partitioned.q
index 8e4d40f2bd59b..6fe5117026ce8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_partitioned.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_partitioned.q
@@ -28,7 +28,7 @@ TBLPROPERTIES ('avro.schema.literal'='{
   ]
 }');
 
-LOAD DATA LOCAL INPATH '../data/files/episodes.avro' INTO TABLE episodes;
+LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes;
 
 CREATE TABLE episodes_partitioned
 PARTITIONED BY (doctor_pt INT)
@@ -66,7 +66,7 @@ INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) SELECT title,
 SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 ORDER BY air_date;
 
 -- Verify that Fetch works in addition to Map
-SELECT * FROM episodes_partitioned LIMIT 5;
+SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5;
 -- Fetch w/filter to specific partition
 SELECT * FROM episodes_partitioned WHERE doctor_pt = 6;
 -- Fetch w/non-existant partition
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_sanity_test.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_sanity_test.q
index e3f8b07b30c59..dbb999503b60f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_sanity_test.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/avro_sanity_test.q
@@ -30,7 +30,7 @@ TBLPROPERTIES ('avro.schema.literal'='{
 
 DESCRIBE doctors;
 
-LOAD DATA LOCAL INPATH '../data/files/doctors.avro' INTO TABLE doctors;
+LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors;
 
 SELECT * FROM doctors ORDER BY number;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binary_constant.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binary_constant.q
index e0a8b95401d0b..4f80dc33c9666 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binary_constant.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binary_constant.q
@@ -1 +1,3 @@
-select cast(cast('a' as binary) as string) from src limit 1;
+set hive.fetch.task.conversion=more;
+
+select cast(cast('a' as binary) as string) from src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binary_table_colserde.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binary_table_colserde.q
index eadf07d14d756..1f5c98a23918f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binary_table_colserde.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binary_table_colserde.q
@@ -3,6 +3,7 @@ drop table ba_test;
 -- Everything in ba_table1.q + columnar serde in RCFILE.
 
 create table ba_test (ba_key binary, ba_val binary) stored as rcfile;
+alter table ba_test set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe';
 
 describe extended ba_test;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binarysortable_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binarysortable_1.q
index a98a2305cfcaf..39c1d25e73397 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binarysortable_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/binarysortable_1.q
@@ -3,7 +3,7 @@ ROW FORMAT DELIMITED
 FIELDS TERMINATED BY '9'
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/string.txt' INTO TABLE mytable;
+LOAD DATA LOCAL INPATH '../../data/files/string.txt' INTO TABLE mytable;
 
 EXPLAIN
 SELECT REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(key, '\001', '^A'), '\0', '^@'), '\002', '^B'), value
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q
new file mode 100644
index 0000000000000..956a61f7bd3c1
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q
@@ -0,0 +1,15 @@
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/bmjpathfilter;
+
+create table t1 (dt string) location '${system:test.tmp.dir}/bmjpathfilter/t1';
+Create table t2 (dt string) stored as orc; 
+dfs -touchz ${system:test.tmp.dir}/bmjpathfilter/t1/_SUCCESS;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; 
+SET hive.optimize.bucketmapjoin=true; 
+
+SELECT /*+ MAPJOIN(b) */ a.dt FROM t1 a JOIN t2 b ON (a.dt = b.dt);
+ 
+SET hive.optimize.bucketmapjoin=false;
+set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+
+dfs -rmr ${system:test.tmp.dir}/bmjpathfilter;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_1.q
index 33dd5d5cd2e0a..6bdb09ed64b07 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_1.q
@@ -9,8 +9,8 @@ sorted by (key, value) into 1 BUCKETS stored as textfile;
 create table table2(key string, value string) clustered by (value, key)
 sorted by (value, key) into 1 BUCKETS stored as textfile;
 
-load data local inpath '../data/files/SortCol1Col2.txt' overwrite into table table1;
-load data local inpath '../data/files/SortCol2Col1.txt' overwrite into table table2;
+load data local inpath '../../data/files/SortCol1Col2.txt' overwrite into table table1;
+load data local inpath '../../data/files/SortCol2Col1.txt' overwrite into table table2;
 
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_2.q
index d1097e70a9880..07f6d150ea053 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_2.q
@@ -9,8 +9,8 @@ sorted by (key desc, value desc) into 1 BUCKETS stored as textfile;
 create table table2(key string, value string) clustered by (value, key)
 sorted by (value desc, key desc) into 1 BUCKETS stored as textfile;
 
-load data local inpath '../data/files/SortCol1Col2.txt' overwrite into table table1;
-load data local inpath '../data/files/SortCol2Col1.txt' overwrite into table table2;
+load data local inpath '../../data/files/SortCol1Col2.txt' overwrite into table table1;
+load data local inpath '../../data/files/SortCol2Col1.txt' overwrite into table table2;
 
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
new file mode 100644
index 0000000000000..c9266a59c342a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
@@ -0,0 +1,85 @@
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000;
+
+CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting = true;
+set hive.optimize.bucketingsorting=false;
+insert overwrite table tab_part partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part;
+
+CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin;
+
+set hive.convert.join.bucket.mapjoin.tez = true;
+explain
+select a.key, a.value, b.value
+from tab a join tab_part b on a.key = b.key;
+
+-- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table.
+-- In this case the sub-query is chosen as the big table.
+explain
+select a.k1, a.v1, b.value
+from (select sum(substr(srcbucket_mapjoin.value,5)) as v1, key as k1 from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a
+join tab b on a.k1 = b.key;
+
+explain
+select a.k1, a.v1, b.value
+from (select sum(substr(tab.value,5)) as v1, key as k1 from tab_part join tab on tab_part.key = tab.key GROUP BY tab.key) a
+join tab b on a.k1 = b.key;
+
+explain
+select a.k1, a.v1, b.value
+from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab x join tab y on x.key = y.key GROUP BY x.key) a
+join tab_part b on a.k1 = b.key;
+
+-- multi-way join
+explain
+select a.key, a.value, b.value
+from tab_part a join tab b on a.key = b.key join tab c on a.key = c.key;
+
+explain
+select a.key, a.value, c.value
+from (select x.key, x.value from tab_part x join tab y on x.key = y.key) a join tab c on a.key = c.key;
+
+-- in this case sub-query is the small table
+explain
+select a.key, a.value, b.value
+from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a
+join tab_part b on a.key = b.key;
+
+set hive.map.aggr=false;
+explain
+select a.key, a.value, b.value
+from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a
+join tab_part b on a.key = b.key;
+
+-- join on non-bucketed column results in broadcast join.
+explain
+select a.key, a.value, b.value
+from tab a join tab_part b on a.value = b.value;
+
+CREATE TABLE tab1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab1
+select key,value from srcbucket_mapjoin;
+
+explain
+select a.key, a.value, b.value
+from tab1 a join tab_part b on a.key = b.key;
+
+explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value;
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q
new file mode 100644
index 0000000000000..a3588ec94ccee
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q
@@ -0,0 +1,50 @@
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000;
+
+CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting = true;
+set hive.optimize.bucketingsorting=false;
+insert overwrite table tab_part partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part;
+
+CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin;
+
+set hive.convert.join.bucket.mapjoin.tez = true;
+
+explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value;
+
+CREATE TABLE tab1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab1
+select key,value from srcbucket_mapjoin;
+
+explain
+select a.key, a.value, b.value
+from tab1 a join src b on a.key = b.key;
+
+explain
+select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key;
+
+explain
+select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key;
+
+explain
+select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key;
+
+explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key;
+
+explain select a.value, b.value from (select distinct value from tab) a join tab b on b.key = a.value;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_1.q
index 5b013995d5ba4..047a2a5230ae0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_1.q
@@ -1,18 +1,18 @@
 -- small 1 part, 2 bucket & big 2 part, 4 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.optimize.bucketmapjoin = true;
 explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_2.q
index f952f2ee6d445..d58e8448a09a8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_2.q
@@ -1,16 +1,16 @@
 -- small 1 part, 4 bucket & big 2 part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.optimize.bucketmapjoin = true;
 explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_3.q
index 461fbb196e758..fd80174f23119 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_3.q
@@ -1,16 +1,16 @@
 -- small 2 part, 2 bucket & big 1 part, 4 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
 set hive.optimize.bucketmapjoin = true;
 explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_4.q
index 366da4473b36d..5d21ea5d63218 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_4.q
@@ -1,18 +1,18 @@
 -- small 2 part, 4 bucket & big 1 part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
 set hive.optimize.bucketmapjoin = true;
 explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_5.q
index 411fed392a45e..5078072677be6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_5.q
@@ -1,13 +1,13 @@
 -- small no part, 4 bucket & big no part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small;
 
 CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big;
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big;
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big;
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big;
 
 set hive.optimize.bucketmapjoin = true;
 explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_6.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_6.q
index 204d1e9010d5d..0f7c72f4eaf43 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_6.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_6.q
@@ -1,16 +1,16 @@
 -- small no part, 4 bucket & big 2 part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small;
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small;
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small;
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.optimize.bucketmapjoin = true;
 explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_7.q
index b0bca460f303f..c528f61302743 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_7.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_7.q
@@ -1,21 +1,21 @@
 -- small 2 part, 4 bucket & big 2 part, 2 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.optimize.bucketmapjoin = true;
 explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_8.q
index 9533c5512f3dd..27c55a36f445b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_8.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketcontext_8.q
@@ -1,21 +1,21 @@
 -- small 2 part, 2 bucket & big 2 part, 4 bucket
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.optimize.bucketmapjoin = true;
 explain extended select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketizedhiveinputformat.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketizedhiveinputformat.q
index 4c7f0c9595488..d2e12e82d4a26 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketizedhiveinputformat.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketizedhiveinputformat.q
@@ -3,7 +3,7 @@ set mapred.min.split.size = 64;
 
 CREATE TABLE T1(name STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE;
 
@@ -26,8 +26,8 @@ EXPLAIN SELECT COUNT(1) FROM T2;
 SELECT COUNT(1) FROM T2;
 
 CREATE TABLE T3(name STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T3;
-LOAD DATA LOCAL INPATH '../data/files/kv2.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T3;
 
 EXPLAIN SELECT COUNT(1) FROM T3;
 SELECT COUNT(1) FROM T3;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketizedhiveinputformat_auto.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketizedhiveinputformat_auto.q
index 9cdfe8e6e9f6e..8b7535dce1ee2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketizedhiveinputformat_auto.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketizedhiveinputformat_auto.q
@@ -1,17 +1,17 @@
 CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
 CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
 
 set hive.optimize.bucketmapjoin = true;
 select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin1.q
index 2bd8e1e2e5d98..204e75988fb54 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin1.q
@@ -25,16 +25,16 @@ select /*+mapjoin(a)*/ a.key, a.value, b.value
 from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
 on a.key=b.key where b.ds="2008-04-08";
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
 
 create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint);
 create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin10.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin10.q
index cf4222bdd781e..09c0ae2bfcbc1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin10.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin10.q
@@ -2,23 +2,23 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 
 CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 3 BUCKETS;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
 
 ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin11.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin11.q
index e10ab522a296d..d330b7718c5f2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin11.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin11.q
@@ -2,25 +2,25 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 
 CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 4 BUCKETS;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
 
 
 set hive.optimize.bucketmapjoin=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin12.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin12.q
index 8139000f4a733..43a9de4e8edef 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin12.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin12.q
@@ -2,20 +2,20 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 
 CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_2 NOT CLUSTERED;
 
 CREATE TABLE srcbucket_mapjoin_part_3 (key INT, value STRING) PARTITIONED BY (part STRING)
 STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_3 CLUSTERED BY (key) INTO 2 BUCKETS;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin2.q
index fdbadfcb04c9c..108b67a9f7644 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin2.q
@@ -1,12 +1,12 @@
 CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
 
 create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint);
 create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint);
@@ -76,8 +76,8 @@ from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b
 on a.key = b.key;
 
 -- HIVE-3210
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
 
 set hive.optimize.bucketmapjoin = true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin3.q
index 8fda80231522e..78c23d5132503 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin3.q
@@ -1,16 +1,16 @@
 CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
 
 CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
 
 create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint);
 create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin4.q
index c1a8f2ab7bfa0..54626e7c48ded 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin4.q
@@ -1,17 +1,17 @@
 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
 
 CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
 
 create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint);
 create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin5.q
index 2df49b64f3227..72cffc2da2874 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin5.q
@@ -1,22 +1,22 @@
 CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
 
 CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
 
 create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint);
 create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin7.q
index 3a96c642b3e4c..a15570b77687f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin7.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin7.q
@@ -2,13 +2,13 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 
 CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0');
 
 set hive.optimize.bucketmapjoin=true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin8.q
index 5e8daa5f8f125..f467ea6cb3ea1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin8.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin8.q
@@ -2,13 +2,13 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 
 CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin9.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin9.q
index 86344a53f5a95..f1d5f581848d0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin9.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin9.q
@@ -2,14 +2,14 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 
 CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
 CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS;
 
@@ -28,8 +28,8 @@ ON a.key = b.key AND a.part = '1' and b.part = '1';
 
 ALTER TABLE srcbucket_mapjoin_part_2 DROP PARTITION (part='1');
 ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO 2 BUCKETS;
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
 
 ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative.q
index d7634333e2fee..ea140ddda91fe 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative.q
@@ -3,13 +3,13 @@
 
 
 CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
 
 CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q
index 901f0567590ec..e2c0d8c591719 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q
@@ -1,12 +1,12 @@
 CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09');
 
 set hive.optimize.bucketmapjoin = true;
 create table bucketmapjoin_tmp_result (key string , value1 string, value2 string);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative3.q
index d66e1238bb79b..6398fff909eab 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketmapjoin_negative3.q
@@ -8,21 +8,21 @@ create table test2 (key string, value string) clustered by (value) sorted by (va
 create table test3 (key string, value string) clustered by (key, value) sorted by (key, value) into 3 buckets;
 create table test4 (key string, value string) clustered by (value, key) sorted by (value, key) into 3 buckets;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE test1;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE test1;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE test1;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test1;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test1;
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test1;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE test2;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE test2;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE test2;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test2;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test2;
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test2;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE test3;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE test3;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE test3;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test3;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test3;
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test3;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE test4;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE test4;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE test4;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test4;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test4;
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test4;
 
 set hive.optimize.bucketmapjoin = true;
 -- should be allowed
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cast_to_int.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cast_to_int.q
index 729ffdc8687b5..b1551f2feb900 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cast_to_int.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cast_to_int.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 -- cast string floats to integer types
 select
   cast('1' as float),
@@ -27,4 +29,4 @@ select
   cast('127' as tinyint),
   cast('1.0a' as int),
   cast('-1.-1' as int)
-from src limit 1;
+from src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_1.q
new file mode 100644
index 0000000000000..840a84a7b3e30
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_1.q
@@ -0,0 +1,32 @@
+drop table char1;
+drop table char1_1;
+
+create table char1 (key char(10), value char(20));
+create table char1_1 (key string, value string);
+
+-- load from file
+load data local inpath '../../data/files/srcbucket0.txt' overwrite into table char1;
+select * from char1 order by key, value limit 2;
+
+-- insert overwrite, from same/different length char
+insert overwrite table char1
+  select cast(key as char(10)), cast(value as char(15)) from src order by key, value limit 2;
+select key, value from char1 order by key, value;
+
+-- insert overwrite, from string
+insert overwrite table char1
+  select key, value from src order by key, value limit 2;
+select key, value from char1 order by key, value;
+
+-- insert string from char
+insert overwrite table char1_1
+  select key, value from char1 order by key, value limit 2;
+select key, value from char1_1 order by key, value;
+
+-- respect string length
+insert overwrite table char1 
+  select key, cast(value as char(3)) from src order by key, value limit 2;
+select key, value from char1 order by key, value;
+
+drop table char1;
+drop table char1_1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_2.q
new file mode 100644
index 0000000000000..3e4900cb7c949
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_2.q
@@ -0,0 +1,36 @@
+drop table char_2;
+
+create table char_2 (
+  key char(10),
+  value char(20)
+);
+
+insert overwrite table char_2 select * from src;
+
+select value, sum(cast(key as int)), count(*) numrows
+from src
+group by value
+order by value asc
+limit 5;
+
+-- should match the query from src
+select value, sum(cast(key as int)), count(*) numrows
+from char_2
+group by value
+order by value asc
+limit 5;
+
+select value, sum(cast(key as int)), count(*) numrows
+from src
+group by value
+order by value desc
+limit 5;
+
+-- should match the query from src
+select value, sum(cast(key as int)), count(*) numrows
+from char_2
+group by value
+order by value desc
+limit 5;
+
+drop table char_2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_cast.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_cast.q
new file mode 100644
index 0000000000000..7f44d4d508537
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_cast.q
@@ -0,0 +1,92 @@
+
+-- Cast from char to other data types
+select
+  cast(cast('11' as string) as tinyint),
+  cast(cast('11' as string) as smallint),
+  cast(cast('11' as string) as int),
+  cast(cast('11' as string) as bigint),
+  cast(cast('11.00' as string) as float),
+  cast(cast('11.00' as string) as double),
+  cast(cast('11.00' as string) as decimal)
+from src limit 1;
+
+select
+  cast(cast('11' as char(10)) as tinyint),
+  cast(cast('11' as char(10)) as smallint),
+  cast(cast('11' as char(10)) as int),
+  cast(cast('11' as char(10)) as bigint),
+  cast(cast('11.00' as char(10)) as float),
+  cast(cast('11.00' as char(10)) as double),
+  cast(cast('11.00' as char(10)) as decimal)
+from src limit 1;
+
+select
+  cast(cast('2011-01-01' as string) as date),
+  cast(cast('2011-01-01 01:02:03' as string) as timestamp)
+from src limit 1;
+
+select
+  cast(cast('2011-01-01' as char(10)) as date),
+  cast(cast('2011-01-01 01:02:03' as char(30)) as timestamp)
+from src limit 1;
+
+-- no tests from string/char to boolean, that conversion doesn't look useful
+select
+  cast(cast('abc123' as string) as string),
+  cast(cast('abc123' as string) as varchar(10)),
+  cast(cast('abc123' as string) as char(10))
+from src limit 1;
+
+select
+  cast(cast('abc123' as char(10)) as string),
+  cast(cast('abc123' as char(10)) as varchar(10)),
+  cast(cast('abc123' as char(10)) as char(10))
+from src limit 1;
+
+select
+  cast(cast('abc123' as varchar(10)) as string),
+  cast(cast('abc123' as varchar(10)) as varchar(10)),
+  cast(cast('abc123' as varchar(10)) as char(10))
+from src limit 1;
+
+-- cast from other types to char
+select
+  cast(cast(11 as tinyint) as string),
+  cast(cast(11 as smallint) as string),
+  cast(cast(11 as int) as string),
+  cast(cast(11 as bigint) as string),
+  cast(cast(11.00 as float) as string),
+  cast(cast(11.00 as double) as string),
+  cast(cast(11.00 as decimal) as string)
+from src limit 1;
+
+select
+  cast(cast(11 as tinyint) as char(10)),
+  cast(cast(11 as smallint) as char(10)),
+  cast(cast(11 as int) as char(10)),
+  cast(cast(11 as bigint) as char(10)),
+  cast(cast(11.00 as float) as char(10)),
+  cast(cast(11.00 as double) as char(10)),
+  cast(cast(11.00 as decimal) as char(10))
+from src limit 1;
+
+select
+  cast(date '2011-01-01' as string),
+  cast(timestamp('2011-01-01 01:02:03') as string)
+from src limit 1;
+
+select
+  cast(date '2011-01-01' as char(10)),
+  cast(timestamp('2011-01-01 01:02:03') as char(30))
+from src limit 1;
+
+select
+  cast(true as string),
+  cast(false as string)
+from src limit 1;
+
+select
+  cast(true as char(10)),
+  cast(false as char(10))
+from src limit 1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_comparison.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_comparison.q
new file mode 100644
index 0000000000000..e1cfdb2bdb07a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_comparison.q
@@ -0,0 +1,40 @@
+
+-- Should all be true
+select
+  cast('abc' as char(10)) =  cast('abc' as char(10)),
+  cast('abc' as char(10)) <= cast('abc' as char(10)),
+  cast('abc' as char(10)) >= cast('abc' as char(10)),
+  cast('abc' as char(10)) <  cast('abd' as char(10)),
+  cast('abc' as char(10)) >  cast('abb' as char(10)),
+  cast('abc' as char(10)) <> cast('abb' as char(10))
+from src limit 1;
+
+-- Different char lengths should still compare the same
+select
+  cast('abc' as char(10)) =  cast('abc' as char(3)),
+  cast('abc' as char(10)) <= cast('abc' as char(3)),
+  cast('abc' as char(10)) >= cast('abc' as char(3)),
+  cast('abc' as char(10)) <  cast('abd' as char(3)),
+  cast('abc' as char(10)) >  cast('abb' as char(3)),
+  cast('abc' as char(10)) <> cast('abb' as char(3))
+from src limit 1;
+
+-- Should work with string types as well
+select
+  cast('abc' as char(10)) =  'abc',
+  cast('abc' as char(10)) <= 'abc',
+  cast('abc' as char(10)) >= 'abc',
+  cast('abc' as char(10)) <  'abd',
+  cast('abc' as char(10)) >  'abb',
+  cast('abc' as char(10)) <> 'abb'
+from src limit 1;
+
+-- leading space is significant for char
+select
+  cast(' abc' as char(10)) <> cast('abc' as char(10))
+from src limit 1;
+
+-- trailing space is not significant for char
+select
+  cast('abc ' as char(10)) = cast('abc' as char(10))
+from src limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_join1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_join1.q
new file mode 100644
index 0000000000000..373352dee0078
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_join1.q
@@ -0,0 +1,35 @@
+drop table char_join1_ch1;
+drop table char_join1_ch2;
+drop table char_join1_str;
+
+create table  char_join1_ch1 (
+  c1 int,
+  c2 char(10)
+);
+
+create table  char_join1_ch2 (
+  c1 int,
+  c2 char(20)
+);
+
+create table  char_join1_str (
+  c1 int,
+  c2 string
+);
+
+load data local inpath '../../data/files/vc1.txt' into table char_join1_ch1;
+load data local inpath '../../data/files/vc1.txt' into table char_join1_ch2;
+load data local inpath '../../data/files/vc1.txt' into table char_join1_str;
+
+-- Join char with same length char
+select * from char_join1_ch1 a join char_join1_ch1 b on (a.c2 = b.c2) order by a.c1;
+
+-- Join char with different length char
+select * from char_join1_ch1 a join char_join1_ch2 b on (a.c2 = b.c2) order by a.c1;
+
+-- Join char with string
+select * from char_join1_ch1 a join char_join1_str b on (a.c2 = b.c2) order by a.c1;
+
+drop table char_join1_ch1;
+drop table char_join1_ch2;
+drop table char_join1_str;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_nested_types.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_nested_types.q
new file mode 100644
index 0000000000000..c710b6c73174b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_nested_types.q
@@ -0,0 +1,53 @@
+drop table char_nested_1;
+drop table char_nested_array;
+drop table char_nested_map;
+drop table char_nested_struct;
+drop table char_nested_cta;
+drop table char_nested_view;
+
+create table char_nested_1 (key int, value char(20));
+insert overwrite table char_nested_1
+  select key, value from src order by key limit 1;
+
+-- arrays
+create table char_nested_array (c1 array<char(20)>);
+insert overwrite table char_nested_array
+  select array(value, value) from char_nested_1;
+describe char_nested_array;
+select * from char_nested_array;
+
+-- maps
+create table char_nested_map (c1 map<int, char(20)>);
+insert overwrite table char_nested_map
+  select map(key, value) from char_nested_1;
+describe char_nested_map;
+select * from char_nested_map;
+
+-- structs
+create table char_nested_struct (c1 struct<a:int, b:char(20), c:string>);
+insert overwrite table char_nested_struct
+  select named_struct('a', key,
+                      'b', value,
+                      'c', cast(value as string))
+  from char_nested_1;
+describe char_nested_struct;
+select * from char_nested_struct;
+
+-- nested type with create table as
+create table char_nested_cta as 
+  select * from char_nested_struct;
+describe char_nested_cta;
+select * from char_nested_cta;
+
+-- nested type with view
+create table char_nested_view as 
+  select * from char_nested_struct;
+describe char_nested_view;
+select * from char_nested_view;
+
+drop table char_nested_1;
+drop table char_nested_array;
+drop table char_nested_map;
+drop table char_nested_struct;
+drop table char_nested_cta;
+drop table char_nested_view;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_serde.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_serde.q
new file mode 100644
index 0000000000000..4340b4de6d597
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_serde.q
@@ -0,0 +1,102 @@
+drop table if exists char_serde_regex;
+drop table if exists char_serde_lb;
+drop table if exists char_serde_ls;
+drop table if exists char_serde_c;
+drop table if exists char_serde_lbc;
+drop table if exists char_serde_orc;
+
+--
+-- RegexSerDe
+--
+create table  char_serde_regex (
+  key char(15),
+  value char(20)
+)
+row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
+with serdeproperties (
+  "input.regex" = "([^]*)([^]*)"
+)
+stored as textfile;
+
+load data local inpath '../../data/files/srcbucket0.txt' overwrite into table char_serde_regex;
+
+select * from char_serde_regex limit 5;
+select value, count(*) from char_serde_regex group by value limit 5;
+
+--
+-- LazyBinary
+--
+create table  char_serde_lb (
+  key char(15),
+  value char(20)
+);
+alter table char_serde_lb set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe';
+
+insert overwrite table char_serde_lb
+  select key, value from char_serde_regex;
+select * from char_serde_lb limit 5;
+select value, count(*) from char_serde_lb group by value limit 5;
+
+--
+-- LazySimple
+--
+create table  char_serde_ls (
+  key char(15),
+  value char(20)
+);
+alter table char_serde_ls set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
+
+insert overwrite table char_serde_ls
+  select key, value from char_serde_lb;
+select * from char_serde_ls limit 5;
+select value, count(*) from char_serde_ls group by value limit 5;
+
+--
+-- Columnar
+--
+create table  char_serde_c (
+  key char(15),
+  value char(20)
+) stored as rcfile;
+alter table char_serde_c set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe';
+
+insert overwrite table char_serde_c
+  select key, value from char_serde_ls;
+select * from char_serde_c limit 5;
+select value, count(*) from char_serde_c group by value limit 5;
+
+--
+-- LazyBinaryColumnar
+--
+create table char_serde_lbc (
+  key char(15),
+  value char(20)
+) stored as rcfile;
+alter table char_serde_lbc set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe';
+
+insert overwrite table char_serde_lbc
+  select key, value from char_serde_c;
+select * from char_serde_lbc limit 5;
+select value, count(*) from char_serde_lbc group by value limit 5;
+
+--
+-- ORC
+--
+create table char_serde_orc (
+  key char(15),
+  value char(20)
+) stored as orc;
+alter table char_serde_orc set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde';
+
+
+insert overwrite table char_serde_orc
+  select key, value from char_serde_lbc;
+select * from char_serde_orc limit 5;
+select value, count(*) from char_serde_orc group by value limit 5;
+
+drop table if exists char_serde_regex;
+drop table if exists char_serde_lb;
+drop table if exists char_serde_ls;
+drop table if exists char_serde_c;
+drop table if exists char_serde_lbc;
+drop table if exists char_serde_orc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_udf1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_udf1.q
new file mode 100644
index 0000000000000..629d41dca9870
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_udf1.q
@@ -0,0 +1,156 @@
+drop table char_udf_1;
+
+create table char_udf_1 (c1 string, c2 string, c3 char(10), c4 char(20));
+insert overwrite table char_udf_1
+  select key, value, key, value from src where key = '238' limit 1;
+
+-- UDFs with char support
+select 
+  concat(c1, c2),
+  concat(c3, c4),
+  concat(c1, c2) = concat(c3, c4)
+from char_udf_1 limit 1;
+
+select
+  upper(c2),
+  upper(c4),
+  upper(c2) = upper(c4)
+from char_udf_1 limit 1;
+
+select
+  lower(c2),
+  lower(c4),
+  lower(c2) = lower(c4)
+from char_udf_1 limit 1;
+
+-- Scalar UDFs
+select
+  ascii(c2),
+  ascii(c4),
+  ascii(c2) = ascii(c4)
+from char_udf_1 limit 1;
+
+select 
+  concat_ws('|', c1, c2),
+  concat_ws('|', c3, c4),
+  concat_ws('|', c1, c2) = concat_ws('|', c3, c4)
+from char_udf_1 limit 1;
+
+select
+  decode(encode(c2, 'US-ASCII'), 'US-ASCII'),
+  decode(encode(c4, 'US-ASCII'), 'US-ASCII'),
+  decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII')
+from char_udf_1 limit 1;
+
+select
+  instr(c2, '_'),
+  instr(c4, '_'),
+  instr(c2, '_') = instr(c4, '_')
+from char_udf_1 limit 1;
+
+select
+  length(c2),
+  length(c4),
+  length(c2) = length(c4)
+from char_udf_1 limit 1;
+
+select
+  locate('a', 'abcdabcd', 3),
+  locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3),
+  locate('a', 'abcdabcd', 3) = locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3)
+from char_udf_1 limit 1;
+
+select
+  lpad(c2, 15, ' '),
+  lpad(c4, 15, ' '),
+  lpad(c2, 15, ' ') = lpad(c4, 15, ' ')
+from char_udf_1 limit 1;
+
+select
+  ltrim(c2),
+  ltrim(c4),
+  ltrim(c2) = ltrim(c4)
+from char_udf_1 limit 1;
+
+select
+  regexp(c2, 'val'),
+  regexp(c4, 'val'),
+  regexp(c2, 'val') = regexp(c4, 'val')
+from char_udf_1 limit 1;
+
+select
+  regexp_extract(c2, 'val_([0-9]+)', 1),
+  regexp_extract(c4, 'val_([0-9]+)', 1),
+  regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)
+from char_udf_1 limit 1;
+
+select
+  regexp_replace(c2, 'val', 'replaced'),
+  regexp_replace(c4, 'val', 'replaced'),
+  regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')
+from char_udf_1 limit 1;
+
+select
+  reverse(c2),
+  reverse(c4),
+  reverse(c2) = reverse(c4)
+from char_udf_1 limit 1;
+
+select
+  rpad(c2, 15, ' '),
+  rpad(c4, 15, ' '),
+  rpad(c2, 15, ' ') = rpad(c4, 15, ' ')
+from char_udf_1 limit 1;
+
+select
+  rtrim(c2),
+  rtrim(c4),
+  rtrim(c2) = rtrim(c4)
+from char_udf_1 limit 1;
+
+select
+  sentences('See spot run.  See jane run.'),
+  sentences(cast('See spot run.  See jane run.' as char(50)))
+from char_udf_1 limit 1;
+
+select
+  split(c2, '_'),
+  split(c4, '_')
+from char_udf_1 limit 1;
+
+select 
+  str_to_map('a:1,b:2,c:3',',',':'),
+  str_to_map(cast('a:1,b:2,c:3' as char(20)),',',':')
+from char_udf_1 limit 1;
+
+select
+  substr(c2, 1, 3),
+  substr(c4, 1, 3),
+  substr(c2, 1, 3) = substr(c4, 1, 3)
+from char_udf_1 limit 1;
+
+select
+  trim(c2),
+  trim(c4),
+  trim(c2) = trim(c4)
+from char_udf_1 limit 1;
+
+
+-- Aggregate Functions
+select
+  compute_stats(c2, 16),
+  compute_stats(c4, 16)
+from char_udf_1;
+
+select
+  min(c2),
+  min(c4)
+from char_udf_1;
+
+select
+  max(c2),
+  max(c4)
+from char_udf_1;
+
+
+drop table char_udf_1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_union1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_union1.q
new file mode 100644
index 0000000000000..2ce5e89c2aa9e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_union1.q
@@ -0,0 +1,47 @@
+drop table char_union1_ch1;
+drop table char_union1_ch2;
+drop table char_union1_str;
+
+create table  char_union1_ch1 (
+  c1 int,
+  c2 char(10)
+);
+
+create table  char_union1_ch2 (
+  c1 int,
+  c2 char(20)
+);
+
+create table  char_union1_str (
+  c1 int,
+  c2 string
+);
+
+load data local inpath '../../data/files/vc1.txt' into table char_union1_ch1;
+load data local inpath '../../data/files/vc1.txt' into table char_union1_ch2;
+load data local inpath '../../data/files/vc1.txt' into table char_union1_str;
+
+-- union char with same length char
+select * from (
+  select * from char_union1_ch1
+  union all
+  select * from char_union1_ch1 limit 1
+) q1 sort by c1;
+
+-- union char with different length char
+select * from (
+  select * from char_union1_ch1
+  union all
+  select * from char_union1_ch2 limit 1
+) q1 sort by c1;
+
+-- union char with string
+select * from (
+  select * from char_union1_ch1
+  union all
+  select * from char_union1_str limit 1
+) q1 sort by c1;
+
+drop table char_union1_ch1;
+drop table char_union1_ch2;
+drop table char_union1_str;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_varchar_udf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_varchar_udf.q
new file mode 100644
index 0000000000000..332b84087e8ef
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/char_varchar_udf.q
@@ -0,0 +1,9 @@
+DROP TABLE IF EXISTS  char_varchar_udf;
+
+CREATE TABLE char_varchar_udf (c char(8), vc varchar(10)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
+LOAD DATA LOCAL INPATH '../../data/files/char_varchar_udf.txt' INTO TABLE char_varchar_udf;
+
+SELECT ROUND(c, 2), ROUND(vc, 3) FROM char_varchar_udf;
+SELECT AVG(c), AVG(vc), SUM(c), SUM(vc) FROM char_varchar_udf;
+
+DROP TABLE char_varchar_udf;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/column_access_stats.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/column_access_stats.q
index 3c8a309991d75..fbf8bba56e52c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/column_access_stats.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/column_access_stats.q
@@ -4,7 +4,7 @@ SET hive.stats.collect.scancols=true;
 -- This test is used for testing the ColumnAccessAnalyzer
 
 CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/columnstats_partlvl.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/columnstats_partlvl.q
index 98627776d3163..9dfe8ffbc3058 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/columnstats_partlvl.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/columnstats_partlvl.q
@@ -4,8 +4,8 @@ DROP TABLE Employee_Part;
 CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double)
 row format delimited fields terminated by '|'  stored as textfile;
 
-LOAD DATA LOCAL INPATH "../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=2000.0);
-LOAD DATA LOCAL INPATH "../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=4000.0);
+LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=2000.0);
+LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=4000.0);
 
 explain 
 analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/columnstats_tbllvl.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/columnstats_tbllvl.q
index 72d88a67b5212..170fbc51916a0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/columnstats_tbllvl.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/columnstats_tbllvl.q
@@ -13,7 +13,7 @@ CREATE TABLE UserVisits_web_text_none (
   avgTimeOnSite int)
 row format delimited fields terminated by '|'  stored as textfile;
 
-LOAD DATA LOCAL INPATH "../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none;
+LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none;
 
 explain 
 analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compile_processor.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compile_processor.q
new file mode 100644
index 0000000000000..6be02ec8a7bbb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compile_processor.q
@@ -0,0 +1,12 @@
+
+compile `import org.apache.hadoop.hive.ql.exec.UDF \;
+public class Pyth extends UDF {
+  public double evaluate(double a, double b){
+    return Math.sqrt((a*a) + (b*b)) \;
+  }
+} ` AS GROOVY NAMED Pyth.groovy;
+CREATE TEMPORARY FUNCTION Pyth as 'Pyth';
+
+SELECT Pyth(3,4) FROM src tablesample (1 rows);
+
+DROP TEMPORARY FUNCTION Pyth;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_binary.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_binary.q
index c19813602782f..fd15634f202f7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_binary.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_binary.q
@@ -1,7 +1,7 @@
 create table tab_binary(a binary);
 
 -- insert some data
-LOAD DATA LOCAL INPATH "../data/files/binary.txt" INTO TABLE tab_binary;
+LOAD DATA LOCAL INPATH "../../data/files/binary.txt" INTO TABLE tab_binary;
 
 select count(*) from tab_binary;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_boolean.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_boolean.q
index dc76f7c7d57e1..cddb53f8f64f5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_boolean.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_boolean.q
@@ -1,7 +1,7 @@
 create table tab_bool(a boolean);
 
 -- insert some data
-LOAD DATA LOCAL INPATH "../data/files/bool.txt" INTO TABLE tab_bool;
+LOAD DATA LOCAL INPATH "../../data/files/bool.txt" INTO TABLE tab_bool;
 
 select count(*) from tab_bool;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_decimal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_decimal.q
new file mode 100644
index 0000000000000..77ec066dad985
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_decimal.q
@@ -0,0 +1,11 @@
+set hive.stats.autogather=true;
+
+create table tab_decimal(a decimal(10,3));
+
+-- insert some data
+LOAD DATA LOCAL INPATH "../../data/files/decimal.txt" INTO TABLE tab_decimal;
+
+select count(*) from tab_decimal;
+
+-- compute statistical summary of data
+select compute_stats(a, 18) from tab_decimal;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_double.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_double.q
index 6c6dc47e67701..7a1e0f6295dc4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_double.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_double.q
@@ -1,7 +1,7 @@
 create table tab_double(a double);
 
 -- insert some data
-LOAD DATA LOCAL INPATH "../data/files/double.txt" INTO TABLE tab_double;
+LOAD DATA LOCAL INPATH "../../data/files/double.txt" INTO TABLE tab_double;
 
 select count(*) from tab_double;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_long.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_long.q
index 7d0a1584a28cc..6a2070f7808f5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_long.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_long.q
@@ -1,7 +1,7 @@
 create table tab_int(a int);
 
 -- insert some data
-LOAD DATA LOCAL INPATH "../data/files/int.txt" INTO TABLE tab_int;
+LOAD DATA LOCAL INPATH "../../data/files/int.txt" INTO TABLE tab_int;
 
 select count(*) from tab_int;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_string.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_string.q
index f146f6b95b544..0023e7f6bd818 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_string.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/compute_stats_string.q
@@ -1,7 +1,7 @@
 create table tab_string(a string);
 
 -- insert some data
-LOAD DATA LOCAL INPATH "../data/files/string.txt" INTO TABLE tab_string;
+LOAD DATA LOCAL INPATH "../../data/files/string.txt" INTO TABLE tab_string;
 
 select count(*) from tab_string;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/constant_prop.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/constant_prop.q
index ced72d60445c2..d51b80194e90a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/constant_prop.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/constant_prop.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 SELECT NAMED_STRUCT(
          IF(ARRAY_CONTAINS(ARRAY(1, 2), 3), "F1", "B1"), 1,
@@ -7,7 +9,7 @@ SELECT NAMED_STRUCT(
          IF(ARRAY_CONTAINS(ARRAY(1, 2), 3), "F1", "B1"), 1,
          IF(ARRAY_CONTAINS(MAP_KEYS(MAP("b", "x")), "b"), "F2", "B2"), 2   
        ).F2
-       FROM src LIMIT 1;
+       FROM src tablesample (1 rows);
 
 SELECT NAMED_STRUCT(
          IF(ARRAY_CONTAINS(ARRAY(1, 2), 3), "F1", "B1"), 1,
@@ -17,4 +19,4 @@ SELECT NAMED_STRUCT(
          IF(ARRAY_CONTAINS(ARRAY(1, 2), 3), "F1", "B1"), 1,
          IF(ARRAY_CONTAINS(MAP_KEYS(MAP("b", "x")), "b"), "F2", "B2"), 2   
        ).F2
-       FROM src LIMIT 1;
+       FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer1.q
index b3fd3f760eaf0..0596f965ed89c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer1.q
@@ -104,7 +104,7 @@ FROM (SELECT x.key AS key, count(1) AS cnt
       
 set hive.optimize.correlation=false;
 -- If the key of a GroupByOperator is the right table's key in
--- a Left Outer Join, we cannot use a single MR to execute these two 
+-- a Left Outer Join, we cannot use a single MR to execute these two
 -- operators because those keys with a null value are not grouped.
 EXPLAIN
 SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
@@ -129,6 +129,29 @@ FROM (SELECT y.key AS key, count(1) AS cnt
       FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key)
       GROUP BY y.key) tmp;
 
+set hive.optimize.correlation=false;
+-- If a column of the key of a GroupByOperator is the right table's key in
+-- a Left Outer Join, we cannot use a single MR to execute these two
+-- operators because those keys with a null value are not grouped.
+EXPLAIN
+SELECT x.key, y.value, count(1) AS cnt
+FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key AND x.value = y.value)
+GROUP BY x.key, y.value;
+
+SELECT x.key, y.value, count(1) AS cnt
+FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key AND x.value = y.value)
+GROUP BY x.key, y.value;
+
+set hive.optimize.correlation=true;
+EXPLAIN
+SELECT x.key, y.value, count(1) AS cnt
+FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key AND x.value = y.value)
+GROUP BY x.key, y.value;
+
+SELECT x.key, y.value, count(1) AS cnt
+FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key AND x.value = y.value)
+GROUP BY x.key, y.value;
+
 set hive.optimize.correlation=false;
 -- If the key of a GroupByOperator is the right table's key in
 -- a Right Outer Join, these two operators will be executed in
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer4.q
index 70fcdfc0b4288..953d191fc6087 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer4.q
@@ -1,9 +1,9 @@
 CREATE TABLE T1(key INT, val STRING);
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 CREATE TABLE T2(key INT, val STRING);
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 CREATE TABLE T3(key INT, val STRING);
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T3;
 
 set hive.auto.convert.join=false;
 set hive.optimize.correlation=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer5.q
index ac836c0bfdc91..287c7a389d1a8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/correlationoptimizer5.q
@@ -1,11 +1,11 @@
 CREATE TABLE T1(key INT, val STRING);
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1;
 CREATE TABLE T2(key INT, val STRING);
-LOAD DATA LOCAL INPATH '../data/files/kv2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T2;
 CREATE TABLE T3(key INT, val STRING);
-LOAD DATA LOCAL INPATH '../data/files/kv3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE T3;
 CREATE TABLE T4(key INT, val STRING);
-LOAD DATA LOCAL INPATH '../data/files/kv5.txt' INTO TABLE T4;
+LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' INTO TABLE T4;
 
 CREATE TABLE dest_co1(key INT, val STRING);
 CREATE TABLE dest_co2(key INT, val STRING);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/count.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/count.q
index 0d66a5ec345e6..74ae9e428befe 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/count.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/count.q
@@ -1,5 +1,5 @@
 create table abcd (a int, b int, c int, d int);
-LOAD DATA LOCAL INPATH '../data/files/in4.txt' INTO TABLE abcd;
+LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd;
 
 select * from abcd;
 set hive.map.aggr=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_func1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_func1.q
new file mode 100644
index 0000000000000..ad924d345304c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_func1.q
@@ -0,0 +1,30 @@
+
+-- qtest_get_java_boolean should already be created during test initialization
+select qtest_get_java_boolean('true'), qtest_get_java_boolean('false') from src limit 1;
+
+create database mydb;
+create function mydb.func1 as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper';
+
+show functions mydb.func1;
+
+select mydb.func1('abc') from src limit 1;
+
+drop function mydb.func1;
+
+-- function should now be gone
+show functions mydb.func1;
+
+-- To test function name resolution
+create function mydb.qtest_get_java_boolean as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper';
+
+use default;
+-- unqualified function should resolve to one in default db
+select qtest_get_java_boolean('abc'), default.qtest_get_java_boolean('abc'), mydb.qtest_get_java_boolean('abc') from default.src limit 1;
+
+use mydb;
+-- unqualified function should resolve to one in mydb db
+select qtest_get_java_boolean('abc'), default.qtest_get_java_boolean('abc'), mydb.qtest_get_java_boolean('abc') from default.src limit 1;
+
+drop function mydb.qtest_get_java_boolean;
+
+drop database mydb cascade;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_like.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_like.q
index cb4d6578af476..13539a65f512f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_like.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_like.q
@@ -21,8 +21,8 @@ INSERT OVERWRITE TABLE table2 SELECT key, value FROM src WHERE key = 100;
 SELECT * FROM table1;
 SELECT * FROM table2;
 
-CREATE EXTERNAL TABLE table4 (a INT) LOCATION '${system:test.src.data.dir}/files/ext_test';
-CREATE EXTERNAL TABLE table5 LIKE table4 LOCATION '${system:test.src.data.dir}/files/ext_test';
+CREATE EXTERNAL TABLE table4 (a INT) LOCATION '${system:hive.root}/data/files/ext_test';
+CREATE EXTERNAL TABLE table5 LIKE table4 LOCATION '${system:hive.root}/data/files/ext_test';
 
 SELECT * FROM table4;
 SELECT * FROM table5;
@@ -31,5 +31,5 @@ DROP TABLE table5;
 SELECT * FROM table4;
 DROP TABLE table4;
 
-CREATE EXTERNAL TABLE table4 (a INT) LOCATION '${system:test.src.data.dir}/files/ext_test';
+CREATE EXTERNAL TABLE table4 (a INT) LOCATION '${system:hive.root}/data/files/ext_test';
 SELECT * FROM table4;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_merge_compressed.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_merge_compressed.q
index 4418b3430affc..483931b6fff4b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_merge_compressed.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_merge_compressed.q
@@ -1,6 +1,6 @@
 create table src_rc_merge_test(key int, value string) stored as rcfile;
 
-load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test;
+load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test;
 
 set hive.exec.compress.output = true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_nested_type.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_nested_type.q
index 2debd0d71d224..735b139719700 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_nested_type.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_nested_type.q
@@ -9,7 +9,7 @@ CREATE TABLE table1 (
 DESCRIBE table1;
 DESCRIBE EXTENDED table1;
 
-LOAD DATA LOCAL INPATH '../data/files/create_nested_type.txt' OVERWRITE INTO TABLE table1;
+LOAD DATA LOCAL INPATH '../../data/files/create_nested_type.txt' OVERWRITE INTO TABLE table1;
 
 SELECT * from table1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_struct_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_struct_table.q
index dd5aa63e4578a..1e5d151f286ba 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_struct_table.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_struct_table.q
@@ -4,7 +4,7 @@ row format delimited
   fields terminated by '\t'
   collection items terminated by '\001';
 
-load data local inpath '../data/files/kv1.txt'
+load data local inpath '../../data/files/kv1.txt'
 overwrite into table abc;
 
 SELECT strct, strct.a, strct.b FROM abc LIMIT 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_union_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_union_table.q
index bb0e5b989d54d..6bc4d29358c11 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_union_table.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_union_table.q
@@ -4,7 +4,7 @@ strct struct<a:int, b:string, c:string>);
 create table abc(mydata uniontype<int,double,array<string>,struct<a:int,b:string>>,
 strct struct<a:int, b:string, c:string>);
 
-load data local inpath '../data/files/union_input.txt'
+load data local inpath '../../data/files/union_input.txt'
 overwrite into table abc;
 
 SELECT * FROM abc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_view_translate.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_view_translate.q
index 2199750f42f7e..11ba9c8afc61d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_view_translate.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/create_view_translate.q
@@ -11,3 +11,14 @@ describe formatted w;
 
 drop view v;
 drop view w;
+
+
+-- HIVE-4116 Can't use views using map datatype.
+
+CREATE TABLE items (id INT, name STRING, info MAP<STRING,STRING>);
+
+explain
+CREATE VIEW priceview AS SELECT items.id, items.info['price'] FROM items;
+CREATE VIEW priceview AS SELECT items.id, items.info['price'] FROM items;
+
+select * from priceview;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cross_product_check_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cross_product_check_1.q
new file mode 100644
index 0000000000000..fb38c947b32d4
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cross_product_check_1.q
@@ -0,0 +1,26 @@
+
+create table A as
+select * from src;
+
+create table B as
+select * from src
+limit 10;
+
+set hive.auto.convert.join.noconditionaltask.size=100;
+
+explain select * from A join B;
+
+explain select * from B d1 join B d2 on d1.key = d2.key join A;
+
+explain select * from A join 
+         (select d1.key 
+          from B d1 join B d2 on d1.key = d2.key 
+          where 1 = 1 group by d1.key) od1;
+          
+explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1;
+
+explain select * from 
+(select A.key from A group by key) ss join 
+(select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1;
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cross_product_check_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cross_product_check_2.q
new file mode 100644
index 0000000000000..479d57137e946
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cross_product_check_2.q
@@ -0,0 +1,27 @@
+create table A as
+select * from src;
+
+create table B as
+select * from src
+limit 10;
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000000;
+
+explain select * from A join B;
+
+explain select * from B d1 join B d2 on d1.key = d2.key join A;
+
+explain select * from A join 
+         (select d1.key 
+          from B d1 join B d2 on d1.key = d2.key 
+          where 1 = 1 group by d1.key) od1;
+          
+explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1;
+
+explain select * from 
+(select A.key from A group by key) ss join 
+(select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1;
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas.q
index e595904b412fa..71af40e7e4140 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas.q
@@ -56,7 +56,7 @@ set hive.exec.mode.local.auto=true;
 create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10;
 
 create table nzhang_ctas6 (key string, `to` string);
-insert overwrite table nzhang_ctas6 select key, value from src limit 10;
+insert overwrite table nzhang_ctas6 select key, value from src tablesample (10 rows);
 create table nzhang_ctas7 as select key, `to` from nzhang_ctas6;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas_char.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas_char.q
new file mode 100644
index 0000000000000..ecfe74afd0273
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas_char.q
@@ -0,0 +1,22 @@
+drop table ctas_char_1;
+drop table ctas_char_2;
+drop view ctas_char_3;
+
+create table ctas_char_1 (key char(10), value string);
+insert overwrite table ctas_char_1 
+  select key, value from src sort by key, value limit 5;
+
+-- create table as with char column
+create table ctas_char_2 as select key, value from ctas_char_1;
+
+-- view with char column
+create view ctas_char_3 as select key, value from ctas_char_2;
+
+select key, value from ctas_char_1;
+select * from ctas_char_2;
+select * from ctas_char_3;
+
+
+drop table ctas_char_1;
+drop table ctas_char_2;
+drop view ctas_char_3;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas_hadoop20.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas_hadoop20.q
index 4961b971dbdec..f39689de03a55 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas_hadoop20.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ctas_hadoop20.q
@@ -58,11 +58,6 @@ create table nzhang_ctas6 (key string, `to` string);
 insert overwrite table nzhang_ctas6 select key, value from src limit 10;
 create table nzhang_ctas7 as select key, `to` from nzhang_ctas6;
 
-
-
-
-
-
-
-
-
+create table nzhang_ctas8 as select 3.14BD from nzhang_ctas6 limit 1;
+desc nzhang_ctas8;
+drop table nzhang_ctas8;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cte_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cte_1.q
new file mode 100644
index 0000000000000..ca4132f7d3fb9
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cte_1.q
@@ -0,0 +1,28 @@
+explain
+with q1 as ( select key from src where key = '5')
+select *
+from q1
+;
+
+with q1 as ( select key from src where key = '5')
+select *
+from q1
+;
+
+-- in subquery
+explain
+with q1 as ( select key from src where key = '5')
+select * from (select key from q1) a;
+
+with q1 as ( select key from src where key = '5')
+select * from (select key from q1) a;
+
+-- chaining
+explain
+with q1 as ( select key from q2 where key = '5'),
+q2 as ( select key from src where key = '5')
+select * from (select key from q1) a;
+
+with q1 as ( select key from q2 where key = '5'),
+q2 as ( select key from src where key = '5')
+select * from (select key from q1) a;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cte_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cte_2.q
new file mode 100644
index 0000000000000..b49620aca144d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/cte_2.q
@@ -0,0 +1,56 @@
+
+-- union test
+with q1 as (select * from src where key= '5'),
+q2 as (select * from src s2 where key = '4')
+select * from q1 union all select * from q2
+;
+
+-- insert test
+create table s1 like src;
+with q1 as ( select key, value from src where key = '5')
+from q1
+insert overwrite table s1
+select *
+;
+select * from s1;
+drop table s1;
+
+-- from style
+with q1 as (select * from src where key= '5')
+from q1
+select *
+;
+
+-- ctas
+create table s2 as
+with q1 as ( select key from src where key = '4')
+select * from q1
+;
+
+select * from s2;
+drop table s2;
+
+-- view test
+create view v1 as
+with q1 as ( select key from src where key = '5')
+select * from q1
+;
+
+select * from v1;
+
+drop view v1;
+
+
+-- view test, name collision
+create view v1 as
+with q1 as ( select key from src where key = '5')
+select * from q1
+;
+
+with q1 as ( select key from src where key = '4')
+select * from v1
+;
+
+drop view v1;
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/custom_input_output_format.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/custom_input_output_format.q
index ff5e86dc5a6b4..d769d05602a17 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/custom_input_output_format.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/custom_input_output_format.q
@@ -1,6 +1,7 @@
-ADD JAR ../build/ql/test/test-udfs.jar;
 CREATE TABLE src1_rot13_iof(key STRING, value STRING) 
   STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.udf.Rot13InputFormat'
             OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.udf.Rot13OutputFormat';
+DESCRIBE EXTENDED src1_rot13_iof;
+SELECT * FROM src1 ORDER BY key, value;
 INSERT OVERWRITE TABLE src1_rot13_iof SELECT * FROM src1;
-SELECT * FROM src1_rot13_iof;
+SELECT * FROM src1_rot13_iof ORDER BY key, value;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/database.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/database.q
index 9140a42b650e8..e3ceaccfa5ef6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/database.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/database.q
@@ -58,7 +58,7 @@ SHOW TABLES;
 DESCRIBE EXTENDED test_table_like;
 
 -- LOAD and SELECT
-LOAD DATA LOCAL INPATH '../data/files/test.dat'
+LOAD DATA LOCAL INPATH '../../data/files/test.dat'
 OVERWRITE INTO TABLE test_table;
 SELECT * FROM test_table;
 
@@ -146,7 +146,7 @@ CREATE TABLE db1.src(key STRING, value STRING)
 STORED AS TEXTFILE;
 
 -- LOAD into foreign table
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt'
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt'
 OVERWRITE INTO TABLE db1.src;
 
 -- SELECT from foreign table
@@ -158,7 +158,7 @@ PARTITIONED BY (ds STRING, hr STRING)
 STORED AS TEXTFILE;
 
 -- LOAD data into Partitioned foreign table
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt'
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt'
 OVERWRITE INTO TABLE db1.srcpart
 PARTITION (ds='2008-04-08', hr='11');
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/database_drop.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/database_drop.q
index 4e17c7ad5ac10..1371273245215 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/database_drop.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/database_drop.q
@@ -8,13 +8,14 @@ CREATE DATABASE db5;
 SHOW DATABASES;
 USE db5;
 
+set hive.stats.dbclass=fs;
 dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/dbcascade/temp;
 dfs -rmr ${system:test.tmp.dir}/dbcascade;
 dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/dbcascade;
 
 -- add a table, index and view
 CREATE TABLE temp_tbl (id INT, name STRING);
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE temp_tbl;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE temp_tbl;
 CREATE VIEW temp_tbl_view AS SELECT * FROM temp_tbl;
 CREATE INDEX idx1 ON TABLE temp_tbl(id) AS 'COMPACT' with DEFERRED REBUILD;
 ALTER INDEX idx1 ON temp_tbl REBUILD;
@@ -23,15 +24,15 @@ dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/dbcascade/temp_tbl2;
 dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/dbcascade/temp_tbl2_idx2;
 -- add a table, index and view with a different storage location
 CREATE TABLE temp_tbl2 (id INT, name STRING) LOCATION 'file:${system:test.tmp.dir}/dbcascade/temp_tbl2';
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' into table temp_tbl2;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' into table temp_tbl2;
 CREATE VIEW temp_tbl2_view AS SELECT * FROM temp_tbl2;
 CREATE INDEX idx2 ON TABLE temp_tbl2(id) AS 'COMPACT' with DEFERRED REBUILD LOCATION 'file:${system:test.tmp.dir}/dbcascade/temp_tbl2_idx2';
 ALTER INDEX idx2 ON temp_tbl2 REBUILD;
 
 -- add a partitioned table, index and view
 CREATE TABLE part_tab (id INT, name STRING)  PARTITIONED BY (ds string);
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE part_tab PARTITION (ds='2008-04-09');
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE part_tab PARTITION (ds='2009-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE part_tab PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE part_tab PARTITION (ds='2009-04-09');
 CREATE INDEX idx3 ON TABLE part_tab(id) AS 'COMPACT' with DEFERRED REBUILD;
 ALTER INDEX idx3 ON part_tab PARTITION (ds='2008-04-09') REBUILD;
 ALTER INDEX idx3 ON part_tab PARTITION (ds='2009-04-09') REBUILD;
@@ -41,8 +42,8 @@ dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/dbcascade/part_tab2_idx4;
 -- add a partitioned table, index and view with a different storage location
 CREATE TABLE part_tab2 (id INT, name STRING)  PARTITIONED BY (ds string)
 		LOCATION 'file:${system:test.tmp.dir}/dbcascade/part_tab2';
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE part_tab2 PARTITION (ds='2008-04-09');
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE part_tab2 PARTITION (ds='2009-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE part_tab2 PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE part_tab2 PARTITION (ds='2009-04-09');
 CREATE INDEX idx4 ON TABLE part_tab2(id) AS 'COMPACT' with DEFERRED REBUILD
 		LOCATION 'file:${system:test.tmp.dir}/dbcascade/part_tab2_idx4';
 ALTER INDEX idx4 ON part_tab2 PARTITION (ds='2008-04-09') REBUILD;
@@ -56,8 +57,8 @@ dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/dbcascade/part_tab3_idx5;
 CREATE TABLE part_tab3 (id INT, name STRING)  PARTITIONED BY (ds string)
 		LOCATION 'file:${system:test.tmp.dir}/dbcascade/part_tab3';
 ALTER TABLE part_tab3 ADD PARTITION  (ds='2007-04-09') LOCATION 'file:${system:test.tmp.dir}/dbcascade/part_tab3_p1';
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE part_tab3 PARTITION (ds='2008-04-09');
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE part_tab3 PARTITION (ds='2009-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE part_tab3 PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE part_tab3 PARTITION (ds='2009-04-09');
 CREATE INDEX idx5 ON TABLE part_tab3(id) AS 'COMPACT' with DEFERRED REBUILD
 		LOCATION 'file:${system:test.tmp.dir}/dbcascade/part_tab3_idx5';
 ALTER INDEX idx5 ON part_tab3 PARTITION (ds='2008-04-09') REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_1.q
index a2322fc1e45a0..7d89ac9268f9b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_1.q
@@ -1,21 +1,23 @@
+set hive.fetch.task.conversion=more;
+
 drop table date_1;
 
 create table date_1 (d date);
 
 insert overwrite table date_1 
-  select cast('2011-01-01' as date) from src limit 1;
+  select cast('2011-01-01' as date) from src tablesample (1 rows);
 
 select * from date_1 limit 1;
 select d, count(d) from date_1 group by d;
 
 insert overwrite table date_1 
-  select date '2011-01-01' from src limit 1;
+  select date '2011-01-01' from src tablesample (1 rows);
 
 select * from date_1 limit 1;
 select d, count(d) from date_1 group by d;
 
 insert overwrite table date_1 
-  select cast(cast('2011-01-01 00:00:00' as timestamp) as date) from src limit 1;
+  select cast(cast('2011-01-01 00:00:00' as timestamp) as date) from src tablesample (1 rows);
 
 select * from date_1 limit 1;
 select d, count(d) from date_1 group by d;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_2.q
index 0821e012ba26b..c5346c87dd25c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_2.q
@@ -8,7 +8,7 @@ create table date_2 (
   FL_NUM int
 );
 
-LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE date_2;
+LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE date_2;
 
 select fl_date, fl_num from date_2 order by fl_date asc, fl_num desc;
 select fl_date, fl_num from date_2 order by fl_date desc, fl_num asc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_3.q
index be251484459b6..383fb4e9904da 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_3.q
@@ -7,7 +7,7 @@ create table date_3 (
 alter table date_3 add columns (c2 date);
 
 insert overwrite table date_3
-  select 1, cast(cast('2011-01-01 00:00:00' as timestamp) as date) from src limit 1;
+  select 1, cast(cast('2011-01-01 00:00:00' as timestamp) as date) from src tablesample (1 rows);
 
 select * from date_3;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_4.q
index 4801a79b8795b..c840089f2efeb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_4.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 drop table date_4;
 
 create table date_4 (d date);
@@ -5,7 +7,7 @@ alter table date_4 set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 -- Test date literal syntax
 insert overwrite table date_4 
-  select date '2011-01-01' from src limit 1;
+  select date '2011-01-01' from src tablesample (1 rows);
 select d, date '2011-01-01' from date_4 limit 1;
 
 drop table date_4;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_comparison.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_comparison.q
index bdcb6c1b6e447..86c7362e295f2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_comparison.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_comparison.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 -- Comparisons against same value
 select cast('2011-05-06' as date) > 
   cast('2011-05-06' as date) from src limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_join1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_join1.q
index a5844b76e9ef8..34bb8c8990a6b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_join1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_join1.q
@@ -8,7 +8,7 @@ create table date_join1 (
   FL_NUM int
 );
 
-LOAD DATA LOCAL INPATH '../data/files/flights_join.txt' OVERWRITE INTO TABLE date_join1;
+LOAD DATA LOCAL INPATH '../../data/files/flights_join.txt' OVERWRITE INTO TABLE date_join1;
 
 -- Note that there are 2 rows with date 2000-11-28, so we should expect 4 rows with that date in the join results
 select t1.fl_num, t1.fl_date, t2.fl_num, t2.fl_date
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_serde.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_serde.q
index ffc06d270d53a..24b48206802ae 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_serde.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_serde.q
@@ -22,7 +22,7 @@ with serdeproperties (
 )
 stored as textfile;
 
-load data local inpath '../data/files/flights_tiny.txt.1' overwrite into table date_serde_regex;
+load data local inpath '../../data/files/flights_tiny.txt.1' overwrite into table date_serde_regex;
 
 select * from date_serde_regex;
 select fl_date, count(*) from date_serde_regex group by fl_date;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_udf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_udf.q
index 9696320a85fcd..c55b9f9147ada 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_udf.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/date_udf.q
@@ -17,7 +17,7 @@ create table date_udf_flight (
   ARR_DELAY float,
   FL_NUM int
 );
-LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE date_udf_flight;
+LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE date_udf_flight;
 
 -- Test UDFs with date input
 select unix_timestamp(d), year(d), month(d), day(d), dayofmonth(d),
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact1.q
new file mode 100644
index 0000000000000..6612fe8babdf8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact1.q
@@ -0,0 +1,12 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.txn.testing=true;
+
+create table T1(key string, val string) stored as textfile;
+
+set hive.txn.testing=true;
+alter table T1 compact 'major';
+
+alter table T1 compact 'minor';
+
+drop table T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact2.q
new file mode 100644
index 0000000000000..599cad9afc611
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact2.q
@@ -0,0 +1,14 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.txn.testing=true;
+
+create table T1(key string, val string) partitioned by (ds string) stored as textfile;
+
+alter table T1 add partition (ds = 'today');
+alter table T1 add partition (ds = 'yesterday');
+
+alter table T1 partition (ds = 'today') compact 'major';
+
+alter table T1 partition (ds = 'yesterday') compact 'minor';
+
+drop table T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact3.q
new file mode 100644
index 0000000000000..871d292a59ce3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_compact3.q
@@ -0,0 +1,15 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.txn.testing=true;
+
+create database D1;
+
+use D1;
+
+create table T1(key string, val string) stored as textfile;
+
+alter table T1 compact 'major';
+
+alter table T1 compact 'minor';
+
+drop table T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_ddl1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_ddl1.q
new file mode 100644
index 0000000000000..3126bd6e5460e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_ddl1.q
@@ -0,0 +1,59 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create database D1;
+
+alter database D1 set dbproperties('test'='yesthisis');
+
+drop database D1;
+
+create table T1(key string, val string) stored as textfile;
+
+create table T2 like T1;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+select * from T1;
+
+create table T3 as select * from T1;
+
+create table T4 (key char(10), val decimal(5,2), b int)
+    partitioned by (ds string)
+    clustered by (b) into 10 buckets
+    stored as orc;
+
+alter table T3 rename to newT3;
+
+alter table T2 set tblproperties ('test'='thisisatest');
+
+alter table T2 set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde';
+alter table T2 set serdeproperties ('test'='thisisatest');
+
+alter table T2 clustered by (key) into 32 buckets;
+
+alter table T4 add partition (ds='today'); 
+
+alter table T4 partition (ds='today') rename to partition(ds='yesterday');
+
+alter table T4 drop partition (ds='yesterday');
+
+alter table T4 add partition (ds='tomorrow'); 
+
+create table T5 (a string, b int);
+alter table T5 set fileformat orc;
+
+create table T7 (a string, b int);
+alter table T7 set location 'file:///tmp';
+
+alter table T2 touch;
+alter table T4 touch partition (ds='tomorrow');
+
+create view V1 as select key from T1;
+alter view V1 set tblproperties ('test'='thisisatest');
+drop view V1;
+
+
+
+drop table T1;
+drop table T2;
+drop table newT3;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query1.q
new file mode 100644
index 0000000000000..970069aca6766
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query1.q
@@ -0,0 +1,17 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create table T1(key string, val string) stored as textfile;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+select * from T1;
+
+create table T2(key string, val string) stored as textfile;
+
+insert into table T2 select * from T1;
+
+select * from T2;
+
+drop table T1;
+drop table T2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query2.q
new file mode 100644
index 0000000000000..00942e5357892
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query2.q
@@ -0,0 +1,17 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create table T1(key string, val string) stored as textfile;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+select * from T1;
+
+create table T2(key string, val string) stored as textfile;
+
+insert overwrite table T2 select * from T1;
+
+select * from T2;
+
+drop table T1;
+drop table T2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query3.q
new file mode 100644
index 0000000000000..75b642b5492cc
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query3.q
@@ -0,0 +1,21 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create table T1(key string, val string) stored as textfile;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+select * from T1;
+
+create table T2(key string, val string) partitioned by (pval string) stored as textfile;
+
+insert into table T2 partition (pval = '1') select * from T1;
+
+select * from T2;
+
+insert overwrite table T2 partition (pval = '1') select * from T1;
+
+select * from T2;
+
+drop table T1;
+drop table T2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query4.q
new file mode 100644
index 0000000000000..57eb4424ea5cc
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query4.q
@@ -0,0 +1,19 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.exec.dynamic.partition=true;
+
+create table T1(key string, val string) stored as textfile;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+select * from T1;
+
+create table T2(key string) partitioned by (val string) stored as textfile;
+
+insert overwrite table T2 partition (val) select key, val from T1;
+
+select * from T2;
+
+drop table T1;
+drop table T2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query5.q
new file mode 100644
index 0000000000000..d22b98fd7df2d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_query5.q
@@ -0,0 +1,24 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create database foo;
+
+use foo;
+
+create table T1(key string, val string) partitioned by (ds string) stored as textfile;
+
+alter table T1 add partition (ds='today');
+
+create view V1 as select key from T1;
+
+show tables;
+
+describe T1;
+
+drop view V1;
+
+drop table T1;
+
+show databases;
+
+drop database foo;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_showlocks.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_showlocks.q
new file mode 100644
index 0000000000000..7c71fdd9d230f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dbtxnmgr_showlocks.q
@@ -0,0 +1,11 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.txn.testing=true;
+
+show locks;
+
+show locks extended;
+
+show locks default;
+
+show transactions;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_1.q
index 6c689e188a908..f52b1923eb067 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_1.q
@@ -1,18 +1,22 @@
-drop table decimal_1;
+set hive.fetch.task.conversion=more;
+
+drop table if exists decimal_1;
 
-create table decimal_1 (t decimal);
+create table decimal_1 (t decimal(4,2), u decimal(5), v decimal);
 alter table decimal_1 set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
 
+desc decimal_1;
+
 insert overwrite table decimal_1
-  select cast('17.29' as decimal) from src limit 1;
-select cast(t as boolean) from decimal_1 limit 1;
-select cast(t as tinyint) from decimal_1 limit 1;
-select cast(t as smallint) from decimal_1 limit 1;
-select cast(t as int) from decimal_1 limit 1;
-select cast(t as bigint) from decimal_1 limit 1;
-select cast(t as float) from decimal_1 limit 1;
-select cast(t as double) from decimal_1 limit 1;
-select cast(t as string) from decimal_1 limit 1;
-select cast(t as timestamp) from decimal_1 limit 1;
+  select cast('17.29' as decimal(4,2)), 3.1415926BD, 3115926.54321BD from src tablesample (1 rows);
+select cast(t as boolean) from decimal_1;
+select cast(t as tinyint) from decimal_1;
+select cast(t as smallint) from decimal_1;
+select cast(t as int) from decimal_1;
+select cast(t as bigint) from decimal_1;
+select cast(t as float) from decimal_1;
+select cast(t as double) from decimal_1;
+select cast(t as string) from decimal_1;
+select cast(t as timestamp) from decimal_1;
 
 drop table decimal_1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_2.q
index 4890618a0dc32..2c4d919079423 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_2.q
@@ -1,40 +1,42 @@
+set hive.fetch.task.conversion=more;
+
 drop table decimal_2;
 
-create table decimal_2 (t decimal);
+create table decimal_2 (t decimal(18,9));
 alter table decimal_2 set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe';
 
 insert overwrite table decimal_2
-  select cast('17.29' as decimal) from src limit 1;
+  select cast('17.29' as decimal(4,2)) from src tablesample (1 rows);
 
-select cast(t as boolean) from decimal_2 limit 1;
-select cast(t as tinyint) from decimal_2 limit 1;
-select cast(t as smallint) from decimal_2 limit 1;
-select cast(t as int) from decimal_2 limit 1;
-select cast(t as bigint) from decimal_2 limit 1;
-select cast(t as float) from decimal_2 limit 1;
-select cast(t as double) from decimal_2 limit 1;
-select cast(t as string) from decimal_2 limit 1;
+select cast(t as boolean) from decimal_2;
+select cast(t as tinyint) from decimal_2;
+select cast(t as smallint) from decimal_2;
+select cast(t as int) from decimal_2;
+select cast(t as bigint) from decimal_2;
+select cast(t as float) from decimal_2;
+select cast(t as double) from decimal_2;
+select cast(t as string) from decimal_2;
 
 insert overwrite table decimal_2
-  select cast('3404045.5044003' as decimal) from src limit 1;
+  select cast('3404045.5044003' as decimal(18,9)) from src tablesample (1 rows);
 
-select cast(t as boolean) from decimal_2 limit 1;
-select cast(t as tinyint) from decimal_2 limit 1;
-select cast(t as smallint) from decimal_2 limit 1;
-select cast(t as int) from decimal_2 limit 1;
-select cast(t as bigint) from decimal_2 limit 1;
-select cast(t as float) from decimal_2 limit 1;
-select cast(t as double) from decimal_2 limit 1;
-select cast(t as string) from decimal_2 limit 1;
+select cast(t as boolean) from decimal_2;
+select cast(t as tinyint) from decimal_2;
+select cast(t as smallint) from decimal_2;
+select cast(t as int) from decimal_2;
+select cast(t as bigint) from decimal_2;
+select cast(t as float) from decimal_2;
+select cast(t as double) from decimal_2;
+select cast(t as string) from decimal_2;
 
-select cast(3.14 as decimal) from decimal_2 limit 1;
-select cast(cast(3.14 as float) as decimal) from decimal_2 limit 1;
-select cast(cast('2012-12-19 11:12:19.1234567' as timestamp) as decimal) from decimal_2 limit 1;
-select cast(true as decimal) from decimal_2 limit 1;
-select cast(3Y as decimal) from decimal_2 limit 1;
-select cast(3S as decimal) from decimal_2 limit 1;
-select cast(cast(3 as int) as decimal) from decimal_2 limit 1;
-select cast(3L as decimal) from decimal_2 limit 1;
-select cast(0.99999999999999999999 as decimal) from decimal_2 limit 1;
-select cast('0.99999999999999999999' as decimal) from decimal_2 limit 1;
+select cast(3.14 as decimal(4,2)) from decimal_2;
+select cast(cast(3.14 as float) as decimal(4,2)) from decimal_2;
+select cast(cast('2012-12-19 11:12:19.1234567' as timestamp) as decimal(30,8)) from decimal_2;
+select cast(true as decimal) from decimal_2;
+select cast(3Y as decimal) from decimal_2;
+select cast(3S as decimal) from decimal_2;
+select cast(cast(3 as int) as decimal) from decimal_2;
+select cast(3L as decimal) from decimal_2;
+select cast(0.99999999999999999999 as decimal(20,19)) from decimal_2;
+select cast('0.99999999999999999999' as decimal(20,20)) from decimal_2;
 drop table decimal_2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_3.q
index 28211e3f14cc1..e4fba06fea1a9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_3.q
@@ -1,11 +1,11 @@
 DROP TABLE IF EXISTS DECIMAL_3;
 
-CREATE TABLE DECIMAL_3(key decimal, value int) 
+CREATE TABLE DECIMAL_3(key decimal(38,18), value int)
 ROW FORMAT DELIMITED
    FIELDS TERMINATED BY ' '
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/kv7.txt' INTO TABLE DECIMAL_3;
+LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_3;
 
 SELECT * FROM DECIMAL_3 ORDER BY key, value;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_4.q
index e8a89c131cd85..699ba3cb4f558 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_4.q
@@ -1,15 +1,15 @@
 DROP TABLE IF EXISTS DECIMAL_4_1;
 DROP TABLE IF EXISTS DECIMAL_4_2;
 
-CREATE TABLE DECIMAL_4_1(key decimal, value int) 
+CREATE TABLE DECIMAL_4_1(key decimal(35,25), value int) 
 ROW FORMAT DELIMITED
    FIELDS TERMINATED BY ' '
 STORED AS TEXTFILE;
 
-CREATE TABLE DECIMAL_4_2(key decimal, value decimal) 
+CREATE TABLE DECIMAL_4_2(key decimal(35,25), value decimal(35,25)) 
 STORED AS ORC;
 
-LOAD DATA LOCAL INPATH '../data/files/kv7.txt' INTO TABLE DECIMAL_4_1;
+LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_4_1;
 
 INSERT OVERWRITE TABLE DECIMAL_4_2 SELECT key, key * 3 FROM DECIMAL_4_1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_5.q
new file mode 100644
index 0000000000000..70e5db0f70182
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_5.q
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS DECIMAL_5;
+
+CREATE TABLE DECIMAL_5(key decimal(10,5), value int)
+ROW FORMAT DELIMITED
+   FIELDS TERMINATED BY ' '
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_5;
+
+SELECT key FROM DECIMAL_5 ORDER BY key;
+
+SELECT DISTINCT key FROM DECIMAL_5 ORDER BY key;
+
+SELECT cast(key as decimal) FROM DECIMAL_5;
+
+SELECT cast(key as decimal(6,3)) FROM DECIMAL_5;
+
+DROP TABLE DECIMAL_5;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_6.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_6.q
new file mode 100644
index 0000000000000..b58e224256122
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_6.q
@@ -0,0 +1,27 @@
+DROP TABLE IF EXISTS DECIMAL_6_1;
+DROP TABLE IF EXISTS DECIMAL_6_2;
+DROP TABLE IF EXISTS DECIMAL_6_3;
+
+CREATE TABLE DECIMAL_6_1(key decimal(10,5), value int)
+ROW FORMAT DELIMITED
+   FIELDS TERMINATED BY ' '
+STORED AS TEXTFILE;
+
+CREATE TABLE DECIMAL_6_2(key decimal(17,4), value int)
+ROW FORMAT DELIMITED
+   FIELDS TERMINATED BY ' '
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/kv9.txt' INTO TABLE DECIMAL_6_1;
+LOAD DATA LOCAL INPATH '../../data/files/kv9.txt' INTO TABLE DECIMAL_6_2;
+
+SELECT T.key from (
+  SELECT key, value from DECIMAL_6_1
+  UNION ALL
+  SELECT key, value from DECIMAL_6_2
+) T order by T.key;
+
+CREATE TABLE DECIMAL_6_3 AS SELECT key + 5.5 AS k, value * 11 AS v from DECIMAL_6_1 ORDER BY v;
+
+desc DECIMAL_6_3;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_join.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_join.q
index 589fc6597dfa4..86c14d9351ac1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_join.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_join.q
@@ -1,6 +1,6 @@
 -- HIVE-5292 Join on decimal columns fails
 
-create table src_dec (key decimal, value string);
-load data local inpath '../data/files/kv1.txt' into table src_dec;
+create table src_dec (key decimal(3,0), value string);
+load data local inpath '../../data/files/kv1.txt' into table src_dec;
 
 select * from src_dec a join src_dec b on a.key=b.key+450;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_precision.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_precision.q
index 403c2be3fbc10..739352f9ef1e5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_precision.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_precision.q
@@ -1,11 +1,11 @@
 DROP TABLE IF EXISTS DECIMAL_PRECISION;
 
-CREATE TABLE DECIMAL_PRECISION(dec decimal) 
+CREATE TABLE DECIMAL_PRECISION(dec decimal(20,10))
 ROW FORMAT DELIMITED
    FIELDS TERMINATED BY ' '
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/kv8.txt' INTO TABLE DECIMAL_PRECISION;
+LOAD DATA LOCAL INPATH '../../data/files/kv8.txt' INTO TABLE DECIMAL_PRECISION;
 
 SELECT * FROM DECIMAL_PRECISION ORDER BY dec;
 
@@ -15,13 +15,14 @@ SELECT dec, dec / 9 FROM DECIMAL_PRECISION ORDER BY dec;
 SELECT dec, dec / 27 FROM DECIMAL_PRECISION ORDER BY dec;
 SELECT dec, dec * dec FROM DECIMAL_PRECISION ORDER BY dec;
 
+EXPLAIN SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION;
 SELECT avg(dec), sum(dec) FROM DECIMAL_PRECISION;
 
-SELECT dec * cast('123456789012345678901234567890.123456789' as decimal) FROM DECIMAL_PRECISION LIMIT 1;
-SELECT * from DECIMAL_PRECISION WHERE dec > cast('123456789012345678901234567890.123456789' as decimal) LIMIT 1;
-SELECT dec * 123456789012345678901234567890.123456789 FROM DECIMAL_PRECISION LIMIT 1;
+SELECT dec * cast('12345678901234567890.12345678' as decimal(38,18)) FROM DECIMAL_PRECISION LIMIT 1;
+SELECT * from DECIMAL_PRECISION WHERE dec > cast('1234567890123456789012345678.12345678' as decimal(38,18)) LIMIT 1;
+SELECT dec * 12345678901234567890.12345678 FROM DECIMAL_PRECISION LIMIT 1;
 
-SELECT MIN(cast('123456789012345678901234567890.123456789' as decimal)) FROM DECIMAL_PRECISION;
-SELECT COUNT(cast('123456789012345678901234567890.123456789' as decimal)) FROM DECIMAL_PRECISION;
+SELECT MIN(cast('12345678901234567890.12345678' as decimal(38,18))) FROM DECIMAL_PRECISION;
+SELECT COUNT(cast('12345678901234567890.12345678' as decimal(38,18))) FROM DECIMAL_PRECISION;
 
 DROP TABLE DECIMAL_PRECISION;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_serde.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_serde.q
index 3556807705ae5..cf3a86cd4d78f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_serde.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_serde.q
@@ -8,7 +8,7 @@ ROW FORMAT DELIMITED
    FIELDS TERMINATED BY ' '
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/kv7.txt' INTO TABLE DECIMAL_TEXT;
+LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_TEXT;
 
 SELECT * FROM DECIMAL_TEXT ORDER BY key, value;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_udf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_udf.q
index b5ff088d1613a..0c9f1b86a9e97 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_udf.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/decimal_udf.q
@@ -1,11 +1,13 @@
+set hive.fetch.task.conversion=more;
+
 DROP TABLE IF EXISTS DECIMAL_UDF;
 
-CREATE TABLE DECIMAL_UDF (key decimal, value int) 
+CREATE TABLE DECIMAL_UDF (key decimal(20,10), value int)
 ROW FORMAT DELIMITED
    FIELDS TERMINATED BY ' '
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/kv7.txt' INTO TABLE DECIMAL_UDF;
+LOAD DATA LOCAL INPATH '../../data/files/kv7.txt' INTO TABLE DECIMAL_UDF;
 
 -- addition
 EXPLAIN SELECT key + key FROM DECIMAL_UDF;
@@ -70,8 +72,8 @@ EXPLAIN SELECT abs(key) FROM DECIMAL_UDF;
 SELECT abs(key) FROM DECIMAL_UDF;
 
 -- avg
-EXPLAIN SELECT value, sum(key) / count(key), avg(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value;
-SELECT value, sum(key) / count(key), avg(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value;
+EXPLAIN SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value;
+SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value;
 
 -- negative
 EXPLAIN SELECT -key FROM DECIMAL_UDF;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/delimiter.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/delimiter.q
index 112ac57c3be5b..14d508c07dd3d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/delimiter.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/delimiter.q
@@ -3,7 +3,7 @@ row format delimited
 fields terminated by '\t'
 lines terminated by '\n'
 stored as textfile;
-LOAD DATA LOCAL INPATH '../data/files/in7.txt' INTO TABLE impressions;
+LOAD DATA LOCAL INPATH '../../data/files/in7.txt' INTO TABLE impressions;
 
 select * from impressions;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/desc_tbl_part_cols.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/desc_tbl_part_cols.q
new file mode 100644
index 0000000000000..89e49311fa48d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/desc_tbl_part_cols.q
@@ -0,0 +1,7 @@
+create table t1 (a int, b string) partitioned by (c int, d string);
+describe t1;
+
+set hive.display.partition.cols.separately=false;
+describe t1;
+
+set hive.display.partition.cols.separately=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/disable_file_format_check.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/disable_file_format_check.q
index 6ea4156b3493e..81a5b3a6e6c82 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/disable_file_format_check.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/disable_file_format_check.q
@@ -1,9 +1,9 @@
 set hive.fileformat.check = false;
 create table kv_fileformat_check_txt (key string, value string) stored as textfile;
-load data local inpath '../data/files/kv1.seq' overwrite into table kv_fileformat_check_txt;
+load data local inpath '../../data/files/kv1.seq' overwrite into table kv_fileformat_check_txt;
 
 create table kv_fileformat_check_seq (key string, value string) stored as sequencefile;
-load data local inpath '../data/files/kv1.txt' overwrite into table kv_fileformat_check_seq;
+load data local inpath '../../data/files/kv1.txt' overwrite into table kv_fileformat_check_seq;
 
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/disallow_incompatible_type_change_off.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/disallow_incompatible_type_change_off.q
index 2a1e7276cacfe..991b930d54ca4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/disallow_incompatible_type_change_off.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/disallow_incompatible_type_change_off.q
@@ -1,7 +1,9 @@
+set hive.fetch.task.conversion=more;
+
 SET hive.metastore.disallow.incompatible.col.type.changes=false;
 SELECT * FROM src LIMIT 1;
 CREATE TABLE test_table123 (a INT, b MAP<STRING, STRING>) PARTITIONED BY (ds STRING) STORED AS SEQUENCEFILE;
-INSERT OVERWRITE TABLE test_table123 PARTITION(ds="foo1") SELECT 1, MAP("a1", "b1") FROM src LIMIT 1;
+INSERT OVERWRITE TABLE test_table123 PARTITION(ds="foo1") SELECT 1, MAP("a1", "b1") FROM src tablesample (1 rows);
 SELECT * from test_table123 WHERE ds="foo1";
 -- This should now work as hive.metastore.disallow.incompatible.col.type.changes is false
 ALTER TABLE test_table123 REPLACE COLUMNS (a INT, b STRING);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/distinct_stats.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/distinct_stats.q
new file mode 100644
index 0000000000000..725183380b9ad
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/distinct_stats.q
@@ -0,0 +1,20 @@
+set hive.stats.autogather=true;
+
+set hive.compute.query.using.stats=true;
+create table t1 (a string, b string);
+
+insert into table t1 select * from src;
+
+analyze table t1 compute statistics for columns a,b;
+
+explain 
+select count(distinct b) from t1 group by a;
+
+explain 
+select distinct(b) from t1;
+
+explain 
+select a, count(*) from t1 group by a;
+
+drop table t1;
+set hive.compute.query.using.stats = false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/drop_partitions_filter2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/drop_partitions_filter2.q
index 798aa6d51a185..54e6a35b5adfe 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/drop_partitions_filter2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/drop_partitions_filter2.q
@@ -6,7 +6,7 @@ alter table ptestfilter add partition (c=1, d=2);
 alter table ptestFilter add partition (c=2, d=1);
 alter table ptestfilter add partition (c=2, d=2);
 alter table ptestfilter add partition (c=3, d=1);
-alter table ptestfilter add partition (c=3, d=2);
+alter table ptestfilter add partition (c=30, d=2);
 show partitions ptestfilter;
 
 alter table ptestfilter drop partition (c=1, d=1);
@@ -15,6 +15,9 @@ show partitions ptestfilter;
 alter table ptestfilter drop partition (c=2);
 show partitions ptestfilter;
 
+alter table ptestfilter drop partition (c<4);
+show partitions ptestfilter;
+
 drop table ptestfilter;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/drop_with_concurrency.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/drop_with_concurrency.q
new file mode 100644
index 0000000000000..797a27c23b01e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/drop_with_concurrency.q
@@ -0,0 +1,8 @@
+set hive.lock.numretries=1;
+set hive.lock.sleep.between.retries=1;
+set hive.support.concurrency=true;
+set hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager;
+
+drop table if exists drop_with_concurrency_1;
+create table drop_with_concurrency_1 (c1 int);
+drop table drop_with_concurrency_1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynamic_partition_skip_default.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynamic_partition_skip_default.q
index 397a2200a8972..699e58effcac0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynamic_partition_skip_default.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynamic_partition_skip_default.q
@@ -1,19 +1,19 @@
-create table dynamic_part_table(intcol int) partitioned by (partcol1 int, partcol2 int);
+create table dynamic_part_table(intcol string) partitioned by (partcol1 string, partcol2 string);
 
 set hive.exec.dynamic.partition.mode=nonstrict;
 
-insert into table dynamic_part_table partition(partcol1, partcol2) select 1, 1, 1 from src where key=150;
+insert into table dynamic_part_table partition(partcol1, partcol2) select '1', '1', '1' from src where key=150;
 
-insert into table dynamic_part_table partition(partcol1, partcol2) select 1, NULL, 1 from src where key=150;
+insert into table dynamic_part_table partition(partcol1, partcol2) select '1', NULL, '1' from src where key=150;
 
-insert into table dynamic_part_table partition(partcol1, partcol2) select 1, 1, NULL from src where key=150;
+insert into table dynamic_part_table partition(partcol1, partcol2) select '1', '1', NULL from src where key=150;
 
-insert into table dynamic_part_table partition(partcol1, partcol2) select 1, NULL, NULL from src where key=150;
+insert into table dynamic_part_table partition(partcol1, partcol2) select '1', NULL, NULL from src where key=150;
 
-explain extended select intcol from dynamic_part_table where partcol1=1 and partcol2=1;
+explain extended select intcol from dynamic_part_table where partcol1='1' and partcol2='1';
 
 set hive.exec.dynamic.partition.mode=strict;
 
-explain extended select intcol from dynamic_part_table where partcol1=1 and partcol2=1;
+explain extended select intcol from dynamic_part_table where partcol1='1' and partcol2='1';
 
-explain extended select intcol from dynamic_part_table where (partcol1=1 and partcol2=1)or (partcol1=1 and partcol2='__HIVE_DEFAULT_PARTITION__');
+explain extended select intcol from dynamic_part_table where (partcol1='1' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q
new file mode 100644
index 0000000000000..5f1a5ce809e0c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q
@@ -0,0 +1,161 @@
+set hive.optimize.sort.dynamic.partition=true;
+set hive.exec.dynamic.partition=true;
+set hive.exec.max.dynamic.partitions=1000;
+set hive.exec.max.dynamic.partitions.pernode=1000;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.vectorized.execution.enabled=true;
+set hive.enforce.bucketing=false;
+set hive.enforce.sorting=false;
+
+create table over1k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|';
+
+load data local inpath '../../data/files/over1k' into table over1k;
+
+create table over1k_orc like over1k;
+alter table over1k_orc set fileformat orc;
+insert overwrite table over1k_orc select * from over1k;
+
+create table over1k_part_orc(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (ds string, t tinyint) stored as orc;
+
+create table over1k_part_limit_orc like over1k_part_orc;
+alter table over1k_part_limit_orc set fileformat orc;
+
+create table over1k_part_buck_orc(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (t tinyint)
+       clustered by (si) into 4 buckets stored as orc;
+
+create table over1k_part_buck_sort_orc(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (t tinyint)
+       clustered by (si) 
+       sorted by (f) into 4 buckets stored as orc;
+
+-- map-only jobs converted to map-reduce job by hive.optimize.sort.dynamic.partition optimization
+explain insert overwrite table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si;
+explain insert overwrite table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10;
+explain insert overwrite table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+explain insert overwrite table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+
+insert overwrite table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si;
+insert overwrite table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10;
+insert overwrite table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+insert overwrite table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+
+-- map-reduce jobs modified by hive.optimize.sort.dynamic.partition optimization
+explain insert into table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si;
+explain insert into table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10;
+explain insert into table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+explain insert into table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+
+insert into table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si;
+insert into table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10;
+insert into table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+insert into table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+
+desc formatted over1k_part_orc partition(ds="foo",t=27);
+desc formatted over1k_part_orc partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
+desc formatted over1k_part_limit_orc partition(ds="foo",t=27);
+desc formatted over1k_part_limit_orc partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
+desc formatted over1k_part_buck_orc partition(t=27);
+desc formatted over1k_part_buck_orc partition(t="__HIVE_DEFAULT_PARTITION__");
+desc formatted over1k_part_buck_sort_orc partition(t=27);
+desc formatted over1k_part_buck_sort_orc partition(t="__HIVE_DEFAULT_PARTITION__");
+
+select count(*) from over1k_part_orc;
+select count(*) from over1k_part_limit_orc;
+select count(*) from over1k_part_buck_orc;
+select count(*) from over1k_part_buck_sort_orc;
+
+-- tests for HIVE-6883
+create table over1k_part2_orc(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (ds string, t tinyint);
+
+set hive.optimize.sort.dynamic.partition=false;
+explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i;
+set hive.optimize.sort.dynamic.partition=true;
+explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i;
+
+set hive.optimize.sort.dynamic.partition=false;
+insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i;
+
+desc formatted over1k_part2_orc partition(ds="foo",t=27);
+desc formatted over1k_part2_orc partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
+
+select * from over1k_part2_orc;
+select count(*) from over1k_part2_orc;
+
+set hive.optimize.sort.dynamic.partition=true;
+insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i;
+
+desc formatted over1k_part2_orc partition(ds="foo",t=27);
+desc formatted over1k_part2_orc partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
+
+select * from over1k_part2_orc;
+select count(*) from over1k_part2_orc;
+
+-- hadoop-1 does not honor number of reducers in local mode. There is always only 1 reducer irrespective of the number of buckets.
+-- Hence all records go to one bucket and all other buckets will be empty. Similar to HIVE-6867. However, hadoop-2 honors number
+-- of reducers and records are spread across all reducers. To avoid this inconsistency we will make number of buckets to 1 for this test.
+create table over1k_part_buck_sort2_orc(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (t tinyint)
+       clustered by (si)
+       sorted by (f) into 1 buckets;
+
+set hive.optimize.sort.dynamic.partition=false;
+explain insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+set hive.optimize.sort.dynamic.partition=true;
+explain insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+
+set hive.optimize.sort.dynamic.partition=false;
+insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+
+desc formatted over1k_part_buck_sort2_orc partition(t=27);
+desc formatted over1k_part_buck_sort2_orc partition(t="__HIVE_DEFAULT_PARTITION__");
+
+select * from over1k_part_buck_sort2_orc;
+select count(*) from over1k_part_buck_sort2_orc;
+
+set hive.optimize.sort.dynamic.partition=true;
+insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27;
+
+desc formatted over1k_part_buck_sort2_orc partition(t=27);
+desc formatted over1k_part_buck_sort2_orc partition(t="__HIVE_DEFAULT_PARTITION__");
+
+select * from over1k_part_buck_sort2_orc;
+select count(*) from over1k_part_buck_sort2_orc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q
new file mode 100644
index 0000000000000..52b5d1e0c1d8a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q
@@ -0,0 +1,155 @@
+set hive.optimize.sort.dynamic.partition=true;
+set hive.exec.dynamic.partition=true;
+set hive.exec.max.dynamic.partitions=1000;
+set hive.exec.max.dynamic.partitions.pernode=1000;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.enforce.bucketing=false;
+set hive.enforce.sorting=false;
+
+create table over1k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|';
+
+load data local inpath '../../data/files/over1k' into table over1k;
+
+create table over1k_part(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (ds string, t tinyint);
+
+create table over1k_part_limit like over1k_part;
+
+create table over1k_part_buck(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (t tinyint)
+       clustered by (si) into 4 buckets;
+
+create table over1k_part_buck_sort(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (t tinyint)
+       clustered by (si) 
+       sorted by (f) into 4 buckets;
+
+-- map-only jobs converted to map-reduce job by hive.optimize.sort.dynamic.partition optimization
+explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27;
+explain insert overwrite table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10;
+explain insert overwrite table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+explain insert overwrite table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+
+insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27;
+insert overwrite table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10;
+insert overwrite table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+insert overwrite table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+
+-- map-reduce jobs modified by hive.optimize.sort.dynamic.partition optimization
+explain insert into table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27;
+explain insert into table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10;
+explain insert into table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+explain insert into table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+
+insert into table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27;
+insert into table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10;
+insert into table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+insert into table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+
+desc formatted over1k_part partition(ds="foo",t=27);
+desc formatted over1k_part partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
+desc formatted over1k_part_limit partition(ds="foo",t=27);
+desc formatted over1k_part_limit partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
+desc formatted over1k_part_buck partition(t=27);
+desc formatted over1k_part_buck partition(t="__HIVE_DEFAULT_PARTITION__");
+desc formatted over1k_part_buck_sort partition(t=27);
+desc formatted over1k_part_buck_sort partition(t="__HIVE_DEFAULT_PARTITION__");
+
+select count(*) from over1k_part;
+select count(*) from over1k_part_limit;
+select count(*) from over1k_part_buck;
+select count(*) from over1k_part_buck_sort;
+
+-- tests for HIVE-6883
+create table over1k_part2(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (ds string, t tinyint);
+
+set hive.optimize.sort.dynamic.partition=false;
+explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i;
+set hive.optimize.sort.dynamic.partition=true;
+explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i;
+
+set hive.optimize.sort.dynamic.partition=false;
+insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i;
+
+desc formatted over1k_part2 partition(ds="foo",t=27);
+desc formatted over1k_part2 partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
+
+select * from over1k_part2;
+select count(*) from over1k_part2;
+
+set hive.optimize.sort.dynamic.partition=true;
+insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i;
+
+desc formatted over1k_part2 partition(ds="foo",t=27);
+desc formatted over1k_part2 partition(ds="foo",t="__HIVE_DEFAULT_PARTITION__");
+
+select * from over1k_part2;
+select count(*) from over1k_part2;
+
+-- hadoop-1 does not honor number of reducers in local mode. There is always only 1 reducer irrespective of the number of buckets.
+-- Hence all records go to one bucket and all other buckets will be empty. Similar to HIVE-6867. However, hadoop-2 honors number
+-- of reducers and records are spread across all reducers. To avoid this inconsistency we will make number of buckets to 1 for this test.
+create table over1k_part_buck_sort2(
+           si smallint,
+           i int,
+           b bigint,
+           f float)
+       partitioned by (t tinyint)
+       clustered by (si)
+       sorted by (f) into 1 buckets;
+
+set hive.optimize.sort.dynamic.partition=false;
+explain insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+set hive.optimize.sort.dynamic.partition=true;
+explain insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+
+set hive.optimize.sort.dynamic.partition=false;
+insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+
+desc formatted over1k_part_buck_sort2 partition(t=27);
+desc formatted over1k_part_buck_sort2 partition(t="__HIVE_DEFAULT_PARTITION__");
+
+select * from over1k_part_buck_sort2;
+select count(*) from over1k_part_buck_sort2;
+
+set hive.optimize.sort.dynamic.partition=true;
+insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27;
+
+desc formatted over1k_part_buck_sort2 partition(t=27);
+desc formatted over1k_part_buck_sort2 partition(t="__HIVE_DEFAULT_PARTITION__");
+
+select * from over1k_part_buck_sort2;
+select count(*) from over1k_part_buck_sort2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/escape1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/escape1.q
index a7f4cf79c980f..d29a7a8f105d6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/escape1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/escape1.q
@@ -8,7 +8,7 @@ DROP TABLE escape1;
 DROP TABLE escape_raw;
 
 CREATE TABLE escape_raw (s STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/escapetest.txt' INTO TABLE  escape_raw;
+LOAD DATA LOCAL INPATH '../../data/files/escapetest.txt' INTO TABLE  escape_raw;
 
 SELECT count(*) from escape_raw;
 SELECT * from escape_raw;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/escape2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/escape2.q
index 473cbf8c94e4e..24601343b1474 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/escape2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/escape2.q
@@ -10,7 +10,7 @@ DROP TABLE IF EXISTS escape2;
 DROP TABLE IF EXISTS escape_raw;
 
 CREATE TABLE escape_raw (s STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/escapetest.txt' INTO TABLE  escape_raw;
+LOAD DATA LOCAL INPATH '../../data/files/escapetest.txt' INTO TABLE  escape_raw;
 
 SELECT count(*) from escape_raw;
 SELECT * from escape_raw;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition.q
index 6e8bf8ebc1fa2..4be6e3f6d876b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition.q
@@ -3,7 +3,7 @@ CREATE TABLE exchange_part_test2 (f1 string) PARTITIONED BY (ds STRING);
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
-ALTER TABLE exchange_part_test1 ADD PARTITION (ds='2013-04-05');
+ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05');
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition2.q
index 27b335a3d7844..f346ddeec4963 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition2.q
@@ -3,7 +3,7 @@ CREATE TABLE exchange_part_test2 (f1 string) PARTITIONED BY (ds STRING, hr STRIN
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
-ALTER TABLE exchange_part_test1 ADD PARTITION (ds='2013-04-05', hr='1');
+ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05', hr='1');
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition3.q
index 7b9060d420a1e..7c076cebe8ad4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exchange_partition3.q
@@ -3,8 +3,9 @@ CREATE TABLE exchange_part_test2 (f1 string) PARTITIONED BY (ds STRING, hr STRIN
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
-ALTER TABLE exchange_part_test1 ADD PARTITION (ds='2013-04-05', hr='1');
-ALTER TABLE exchange_part_test1 ADD PARTITION (ds='2013-04-05', hr='2');
+ALTER TABLE exchange_part_test1 ADD PARTITION (ds='2014-01-03', hr='1');
+ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05', hr='1');
+ALTER TABLE exchange_part_test2 ADD PARTITION (ds='2013-04-05', hr='2');
 SHOW PARTITIONS exchange_part_test1;
 SHOW PARTITIONS exchange_part_test2;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_00_nonpart_empty.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_00_nonpart_empty.q
index 7fa96b629a0fa..8288bbfd86acc 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_00_nonpart_empty.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_00_nonpart_empty.q
@@ -1,3 +1,6 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.SessionStateConfigUserAuthenticator;
+
 set hive.test.mode=true;
 set hive.test.mode.prefix=;
 set hive.test.mode.nosamplelist=exim_department,exim_employee;
@@ -5,8 +8,8 @@ set hive.test.mode.nosamplelist=exim_department,exim_employee;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -16,7 +19,7 @@ use importer;
 import from 'ql/test/data/exports/exim_department';
 describe extended exim_department;
 show table extended like exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 select * from exim_department;
 drop table exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_01_nonpart.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_01_nonpart.q
index 9920e778d18a6..1e2eed803a01d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_01_nonpart.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_01_nonpart.q
@@ -5,9 +5,9 @@ set hive.test.mode.nosamplelist=exim_department,exim_employee;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -17,7 +17,7 @@ use importer;
 import from 'ql/test/data/exports/exim_department';
 describe extended exim_department;
 show table extended like exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 select * from exim_department;
 drop table exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_02_00_part_empty.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_02_00_part_empty.q
index 4017c83aa3dd4..474a5a4a5bb5f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_02_00_part_empty.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_02_00_part_empty.q
@@ -7,8 +7,8 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -18,7 +18,7 @@ use importer;
 import from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 show table extended like exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_02_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_02_part.q
index 21138f0263418..dbd2c6bf5c4c5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_02_part.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_02_part.q
@@ -7,10 +7,10 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -20,7 +20,7 @@ use importer;
 import from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 show table extended like exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_03_nonpart_over_compat.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_03_nonpart_over_compat.q
index 5f6bdee83cc90..47d949aa36d9b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_03_nonpart_over_compat.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_03_nonpart_over_compat.q
@@ -5,9 +5,9 @@ set hive.test.mode.nosamplelist=exim_department,exim_employee;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -21,6 +21,6 @@ import from 'ql/test/data/exports/exim_department';
 describe extended exim_department;
 select * from exim_department;
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_04_all_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_04_all_part.q
index 69c6faa30a07c..b2567fb270326 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_04_all_part.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_04_all_part.q
@@ -7,16 +7,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -26,7 +26,7 @@ use importer;
 import from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 show table extended like exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_04_evolved_parts.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_04_evolved_parts.q
index cdc02fa25c169..82df69874b47a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_04_evolved_parts.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_04_evolved_parts.q
@@ -19,8 +19,8 @@ alter table exim_employee set fileformat
 	outputformat "org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat";
 
 alter table exim_employee add partition (emp_country='in', emp_state='ka');
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -32,7 +32,7 @@ describe extended exim_employee;
 describe extended exim_employee partition (emp_country='in', emp_state='tn');
 describe extended exim_employee partition (emp_country='in', emp_state='ka');
 show table extended like exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_05_some_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_05_some_part.q
index 50a59463b1870..a2c977356c84c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_05_some_part.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_05_some_part.q
@@ -7,16 +7,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee partition (emp_state="ka") to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -26,7 +26,7 @@ use importer;
 import from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 show table extended like exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_06_one_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_06_one_part.q
index 5136090929fc5..3a612964222eb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_06_one_part.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_06_one_part.q
@@ -7,16 +7,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee partition (emp_country="in",emp_state="ka") to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -26,7 +26,7 @@ use importer;
 import from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 show table extended like exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_07_all_part_over_nonoverlap.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_07_all_part_over_nonoverlap.q
index 5b9d4ddc03c59..8c774d5a8ba84 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_07_all_part_over_nonoverlap.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_07_all_part_over_nonoverlap.q
@@ -7,16 +7,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -28,12 +28,12 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "iso code", emp_state string comment "free-form text")
 	stored as textfile	
 	tblproperties("maker"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="al");			
 import from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 select * from exim_employee;
 drop table exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_08_nonpart_rename.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_08_nonpart_rename.q
index 173f1569c501a..8a1d9454763c7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_08_nonpart_rename.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_08_nonpart_rename.q
@@ -5,9 +5,9 @@ set hive.test.mode.nosamplelist=exim_department,exim_employee,exim_imported_dept
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -17,12 +17,12 @@ create table exim_department ( dep_id int comment "department id")
 	partitioned by (emp_org string)
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department partition (emp_org="hr");
+load data local inpath "../../data/files/test.dat" into table exim_department partition (emp_org="hr");
 import table exim_imported_dept from 'ql/test/data/exports/exim_department';
 describe extended exim_imported_dept;
 select * from exim_imported_dept;
 drop table exim_imported_dept;
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_09_part_spec_nonoverlap.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_09_part_spec_nonoverlap.q
index 178b76674d862..53fc2936bb2f3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_09_part_spec_nonoverlap.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_09_part_spec_nonoverlap.q
@@ -7,16 +7,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -27,14 +27,14 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
 import table exim_employee partition (emp_country="us", emp_state="tn") from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 select * from exim_employee;
 drop table exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_10_external_managed.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_10_external_managed.q
index 413f2aa4762f5..54859eed196cd 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_10_external_managed.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_10_external_managed.q
@@ -2,18 +2,18 @@ set hive.test.mode=true;
 set hive.test.mode.prefix=;
 set hive.test.mode.nosamplelist=exim_department,exim_employee;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_department/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_department;
 create external table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	location 'ql/test/data/tablestore/exim_department'
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_department;
 
 create database importer;
 use importer;
@@ -22,6 +22,6 @@ import from 'ql/test/data/exports/exim_department';
 describe extended exim_department;
 select * from exim_department;
 drop table exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_11_managed_external.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_11_managed_external.q
index f3b2896a322a5..4fc39dcff0620 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_11_managed_external.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_11_managed_external.q
@@ -5,9 +5,9 @@ set hive.test.mode.nosamplelist=exim_department,exim_employee;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -17,7 +17,7 @@ use importer;
 import external table exim_department from 'ql/test/data/exports/exim_department';
 describe extended exim_department;
 select * from exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 select * from exim_department;
 drop table exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_12_external_location.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_12_external_location.q
index 37d063432e16a..e4d50ffe5b8f0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_12_external_location.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_12_external_location.q
@@ -5,24 +5,24 @@ set hive.test.mode.nosamplelist=exim_department,exim_employee;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/ql/test/data/exports/exim_department/temp;
+dfs -rmr ${system:test.tmp.dir}/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_department/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/ql/test/data/tablestore/exim_department/temp;
+dfs -rmr ${system:test.tmp.dir}/ql/test/data/tablestore/exim_department;
 
 import external table exim_department from 'ql/test/data/exports/exim_department' 
 	location 'ql/test/data/tablestore/exim_department';
 describe extended exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr ${system:test.tmp.dir}/ql/test/data/exports/exim_department;
 select * from exim_department;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs -rmr ${system:test.tmp.dir}/ql/test/data/tablestore/exim_department;
 select * from exim_department;
 drop table exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_13_managed_location.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_13_managed_location.q
index fb5058b840bae..909d23794be9d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_13_managed_location.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_13_managed_location.q
@@ -5,24 +5,24 @@ set hive.test.mode.nosamplelist=exim_department,exim_employee;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_department/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_department;
 
 import table exim_department from 'ql/test/data/exports/exim_department' 
 	location 'ql/test/data/tablestore/exim_department';
 describe extended exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 select * from exim_department;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_department;
 select * from exim_department;
 drop table exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_14_managed_location_over_existing.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_14_managed_location_over_existing.q
index 031b6bda6cece..dbb5fd93435a0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_14_managed_location_over_existing.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_14_managed_location_over_existing.q
@@ -5,17 +5,17 @@ set hive.test.mode.nosamplelist=exim_department,exim_employee;
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" into table exim_department;		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;		
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_department/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_department;
 
 create table exim_department ( dep_id int comment "department id") 	
 	stored as textfile
@@ -24,9 +24,9 @@ create table exim_department ( dep_id int comment "department id")
 import table exim_department from 'ql/test/data/exports/exim_department'
 	location 'ql/test/data/tablestore/exim_department';
 describe extended exim_department;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 select * from exim_department;
-dfs -rmr ../build/ql/test/data/tablestore/exim_department;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_department;
 select * from exim_department;	
 drop table exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_15_external_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_15_external_part.q
index ff088c70d7ffb..989dd6cf56547 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_15_external_part.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_15_external_part.q
@@ -7,24 +7,24 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 
 create external table exim_employee ( emp_id int comment "employee id") 	
 	comment "employee table"
@@ -32,17 +32,17 @@ create external table exim_employee ( emp_id int comment "employee id")
 	stored as textfile	
 	location 'ql/test/data/tablestore/exim_employee'
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
 import external table exim_employee partition (emp_country="us", emp_state="tn") 
 	from 'ql/test/data/exports/exim_employee';
 describe extended exim_employee;
 select * from exim_employee;	
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_16_part_external.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_16_part_external.q
index 6f4ee7a01c256..7eec358850e29 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_16_part_external.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_16_part_external.q
@@ -7,26 +7,26 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore2/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/tablestore2/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore2/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore2/exim_employee;
 
 create external table exim_employee ( emp_id int comment "employee id") 	
 	comment "employee table"
@@ -39,11 +39,11 @@ import table exim_employee partition (emp_country="us", emp_state="tn")
 	location 'ql/test/data/tablestore/exim_employee';
 show table extended like exim_employee;
 show table extended like exim_employee partition (emp_country="us", emp_state="tn");		
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
-dfs -rmr ../build/ql/test/data/tablestore2/exim_employee;
+dfs -rmr target/tmp/ql/test/data/tablestore2/exim_employee;
 
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_17_part_managed.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_17_part_managed.q
index 56ec152948aed..20cd7e0513c7b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_17_part_managed.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_17_part_managed.q
@@ -7,24 +7,24 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 
 create table exim_employee ( emp_id int comment "employee id") 	
 	comment "employee table"
@@ -39,9 +39,9 @@ alter table exim_employee add partition	(emp_country="us", emp_state="ap")
 show table extended like exim_employee;
 show table extended like exim_employee partition (emp_country="us", emp_state="tn");
 show table extended like exim_employee partition (emp_country="us", emp_state="ap");	
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_18_part_external.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_18_part_external.q
index 7aa1297dc7d02..a300b1dbf1285 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_18_part_external.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_18_part_external.q
@@ -7,16 +7,16 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -29,7 +29,7 @@ describe extended exim_employee;
 show table extended like exim_employee;
 show table extended like exim_employee partition (emp_country="us", emp_state="tn");
 select * from exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_19_00_part_external_location.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_19_00_part_external_location.q
index cb9f8efc087da..a821c75d70cca 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_19_00_part_external_location.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_19_00_part_external_location.q
@@ -7,20 +7,20 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test2.dat" 
+load data local inpath "../../data/files/test2.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 
 import external table exim_employee 
 	from 'ql/test/data/exports/exim_employee'
@@ -29,9 +29,9 @@ describe extended exim_employee;
 show table extended like exim_employee;
 show table extended like exim_employee partition (emp_country="in", emp_state="tn");
 show table extended like exim_employee partition (emp_country="in", emp_state="ka");
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_19_part_external_location.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_19_part_external_location.q
index bdbd19df70a21..be1216453bc18 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_19_part_external_location.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_19_part_external_location.q
@@ -7,24 +7,24 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 
 import external table exim_employee partition (emp_country="us", emp_state="tn") 
 	from 'ql/test/data/exports/exim_employee'
@@ -32,9 +32,9 @@ import external table exim_employee partition (emp_country="us", emp_state="tn")
 describe extended exim_employee;	
 show table extended like exim_employee;
 show table extended like exim_employee partition (emp_country="us", emp_state="tn");
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_20_part_managed_location.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_20_part_managed_location.q
index eb44961a9b7ca..000904aa6634f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_20_part_managed_location.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_20_part_managed_location.q
@@ -7,24 +7,24 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="ka");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="tn");	
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="us", emp_state="ka");		
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
 create database importer;
 use importer;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/tablestore/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/tablestore/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 
 import table exim_employee partition (emp_country="us", emp_state="tn") 
 	from 'ql/test/data/exports/exim_employee'
@@ -32,9 +32,9 @@ import table exim_employee partition (emp_country="us", emp_state="tn")
 describe extended exim_employee;	
 show table extended like exim_employee;
 show table extended like exim_employee partition (emp_country="us", emp_state="tn");
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 select * from exim_employee;
-dfs -rmr ../build/ql/test/data/tablestore/exim_employee;
+dfs -rmr target/tmp/ql/test/data/tablestore/exim_employee;
 select * from exim_employee;
 drop table exim_employee;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_21_export_authsuccess.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_21_export_authsuccess.q
index 822ed70a38c0c..293a011cb20f6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_21_export_authsuccess.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_21_export_authsuccess.q
@@ -2,13 +2,13 @@ set hive.test.mode=true;
 set hive.test.mode.prefix=;
 
 create table exim_department ( dep_id int) stored as textfile;
-load data local inpath "../data/files/test.dat" into table exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;
 
 set hive.security.authorization.enabled=true;
 
 grant Select on table exim_department to user hive_test_user;
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 
 set hive.security.authorization.enabled=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_22_import_exist_authsuccess.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_22_import_exist_authsuccess.q
index 440d08d2dc7b5..03714ab17dcbf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_22_import_exist_authsuccess.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_22_import_exist_authsuccess.q
@@ -3,9 +3,9 @@ set hive.test.mode.prefix=;
 set hive.test.mode.nosamplelist=exim_department,exim_employee;
 
 create table exim_department ( dep_id int) stored as textfile;
-load data local inpath "../data/files/test.dat" into table exim_department;
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -22,5 +22,5 @@ set hive.security.authorization.enabled=false;
 select * from exim_department;
 drop table exim_department;
 drop database importer;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_23_import_part_authsuccess.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_23_import_part_authsuccess.q
index 30fc343dd8f9c..cb6af0efbca62 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_23_import_part_authsuccess.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_23_import_part_authsuccess.q
@@ -7,10 +7,10 @@ create table exim_employee ( emp_id int comment "employee id")
 	partitioned by (emp_country string comment "two char iso code", emp_state string comment "free text")
 	stored as textfile	
 	tblproperties("creator"="krishna");
-load data local inpath "../data/files/test.dat" 
+load data local inpath "../../data/files/test.dat" 
 	into table exim_employee partition (emp_country="in", emp_state="tn");
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_employee/temp;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_employee/temp;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 export table exim_employee to 'ql/test/data/exports/exim_employee';
 drop table exim_employee;
 
@@ -29,6 +29,6 @@ import from 'ql/test/data/exports/exim_employee';
 
 set hive.security.authorization.enabled=false;
 select * from exim_employee;
-dfs -rmr ../build/ql/test/data/exports/exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
 drop table exim_employee;
 drop database importer;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_24_import_nonexist_authsuccess.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_24_import_nonexist_authsuccess.q
index 2dc5af6ce4774..8934c47372384 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_24_import_nonexist_authsuccess.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_24_import_nonexist_authsuccess.q
@@ -3,9 +3,9 @@ set hive.test.mode.prefix=;
 set hive.test.mode.nosamplelist=exim_department,exim_employee;
 
 create table exim_department ( dep_id int) stored as textfile;
-load data local inpath "../data/files/test.dat" into table exim_department;
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/exim_department/test;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+load data local inpath "../../data/files/test.dat" into table exim_department;
+dfs ${system:test.dfs.mkdir} target/tmp/ql/test/data/exports/exim_department/test;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 export table exim_department to 'ql/test/data/exports/exim_department';
 drop table exim_department;
 
@@ -20,5 +20,5 @@ set hive.security.authorization.enabled=false;
 select * from exim_department;
 drop table exim_department;
 drop database importer;
-dfs -rmr ../build/ql/test/data/exports/exim_department;
+dfs -rmr target/tmp/ql/test/data/exports/exim_department;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_hidden_files.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_hidden_files.q
new file mode 100644
index 0000000000000..f58c9f948d62b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/exim_hidden_files.q
@@ -0,0 +1,22 @@
+set hive.test.mode=true;
+set hive.test.mode.prefix=;
+set hive.test.mode.nosamplelist=exim_department,exim_employee;
+
+create table exim_employee ( emp_id int) partitioned by (emp_country string);
+load data local inpath "../../data/files/test.dat" into table exim_employee partition (emp_country="in");		
+
+dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/exim_employee/emp_country=in/_logs;
+dfs -touchz ${system:test.warehouse.dir}/exim_employee/emp_country=in/_logs/job.xml;
+export table exim_employee to 'ql/test/data/exports/exim_employee';
+drop table exim_employee;
+
+create database importer;
+use importer;
+
+import from 'ql/test/data/exports/exim_employee';
+describe formatted exim_employee;
+select * from exim_employee;
+dfs -rmr target/tmp/ql/test/data/exports/exim_employee;
+drop table exim_employee;
+drop database importer;
+use default;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/explain_rearrange.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/explain_rearrange.q
new file mode 100644
index 0000000000000..ca2da354bd367
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/explain_rearrange.q
@@ -0,0 +1,98 @@
+-- query from auto_sortmerge_join_9.q
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+set hive.auto.convert.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
+
+set hive.explain.dependency.append.tasktype=true;
+
+-- default behavior
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+  select key, count(*) as cnt1 from
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq1 group by key
+) src1
+join
+(
+  select key, count(*) as cnt1 from
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
+
+set hive.stageid.rearrange=IDONLY;
+
+-- changes id only
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+  select key, count(*) as cnt1 from
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq1 group by key
+) src1
+join
+(
+  select key, count(*) as cnt1 from
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
+
+set hive.stageid.rearrange=TRAVERSE;
+
+-- assign ids in traverse order
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+  select key, count(*) as cnt1 from
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq1 group by key
+) src1
+join
+(
+  select key, count(*) as cnt1 from
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
+
+set hive.stageid.rearrange=EXECUTION;
+
+-- assign ids in execution order
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+  select key, count(*) as cnt1 from
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq1 group by key
+) src1
+join
+(
+  select key, count(*) as cnt1 from
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/external_table_with_space_in_location_path.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/external_table_with_space_in_location_path.q
new file mode 100644
index 0000000000000..ad070464f9c4e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/external_table_with_space_in_location_path.q
@@ -0,0 +1,23 @@
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test/;
+
+dfs -copyFromLocal ../../data/files/ext_test_space hdfs:///tmp/test/ext_test_space;
+
+CREATE EXTERNAL TABLE spacetest (id int, message string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION 'hdfs:///tmp/test/ext_test_space/folder+with space';
+
+SELECT * FROM spacetest;
+
+SELECT count(*) FROM spacetest;
+
+DROP TABLE spacetest;
+
+CREATE EXTERNAL TABLE spacetestpartition (id int, message string) PARTITIONED BY (day int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
+
+ALTER TABLE spacetestpartition ADD PARTITION (day=10) LOCATION 'hdfs:///tmp/test/ext_test_space/folder+with space';
+
+SELECT * FROM spacetestpartition;
+
+SELECT count(*) FROM spacetestpartition;
+
+DROP TABLE spacetestpartition;
+
+dfs -rmr hdfs:///tmp/test;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/file_with_header_footer.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/file_with_header_footer.q
new file mode 100644
index 0000000000000..8b65c7896d590
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/file_with_header_footer.q
@@ -0,0 +1,39 @@
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test/;
+
+dfs -copyFromLocal ../../data/files/header_footer_table_1 hdfs:///tmp/test/header_footer_table_1;
+
+dfs -copyFromLocal ../../data/files/header_footer_table_2 hdfs:///tmp/test/header_footer_table_2;
+
+dfs -copyFromLocal ../../data/files/header_footer_table_3 hdfs:///tmp/test/header_footer_table_3;
+
+CREATE EXTERNAL TABLE header_footer_table_1 (name string, message string, id int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION 'hdfs:///tmp/test/header_footer_table_1' tblproperties ("skip.header.line.count"="1", "skip.footer.line.count"="2");
+
+SELECT * FROM header_footer_table_1;
+
+SELECT * FROM header_footer_table_1 WHERE id < 50;
+
+CREATE EXTERNAL TABLE header_footer_table_2 (name string, message string, id int) PARTITIONED BY (year int, month int, day int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' tblproperties ("skip.header.line.count"="1", "skip.footer.line.count"="2");
+
+ALTER TABLE header_footer_table_2 ADD PARTITION (year=2012, month=1, day=1) location 'hdfs:///tmp/test/header_footer_table_2/2012/01/01';
+
+ALTER TABLE header_footer_table_2 ADD PARTITION (year=2012, month=1, day=2) location 'hdfs:///tmp/test/header_footer_table_2/2012/01/02';
+
+ALTER TABLE header_footer_table_2 ADD PARTITION (year=2012, month=1, day=3) location 'hdfs:///tmp/test/header_footer_table_2/2012/01/03';
+
+SELECT * FROM header_footer_table_2;
+
+SELECT * FROM header_footer_table_2 WHERE id < 50;
+
+CREATE EXTERNAL TABLE emptytable (name string, message string, id int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION 'hdfs:///tmp/test/header_footer_table_3' tblproperties ("skip.header.line.count"="1", "skip.footer.line.count"="2");
+
+SELECT * FROM emptytable;
+
+SELECT * FROM emptytable WHERE id < 50;
+
+DROP TABLE header_footer_table_1;
+
+DROP TABLE header_footer_table_2;
+
+DROP TABLE emptytable;
+
+dfs -rmr hdfs:///tmp/test;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/filter_join_breaktask2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/filter_join_breaktask2.q
index f8d855b25948a..7f4258f7bc15b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/filter_join_breaktask2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/filter_join_breaktask2.q
@@ -12,11 +12,11 @@ create table T3 (c0 bigint,  c1 bigint, c2 int) partitioned by (ds string);
 
 create table T4 (c0 bigint, c1 string, c2 string, c3 string, c4 string, c5 string, c6 string, c7 string, c8 string, c9 string, c10 string, c11 string, c12 string, c13 string, c14 string, c15 string, c16 string, c17 string, c18 string, c19 string, c20 string, c21 string, c22 string, c23 string, c24 string, c25 string, c26 string, c27 string, c28 string, c29 string, c30 string, c31 string, c32 string, c33 string, c34 string, c35 string, c36 string, c37 string, c38 string, c39 string, c40 string, c41 string, c42 string, c43 string, c44 string, c45 string, c46 string, c47 string, c48 string, c49 string, c50 string, c51 string, c52 string, c53 string, c54 string, c55 string, c56 string, c57 string, c58 string, c59 string, c60 string, c61 string, c62 string, c63 string, c64 string, c65 string, c66 string, c67 bigint, c68 string, c69 string, c70 bigint, c71 bigint, c72 bigint, c73 string, c74 string, c75 string, c76 string, c77 string, c78 string, c79 string, c80 string, c81 bigint, c82 bigint, c83 bigint) partitioned by (ds string);
 
-insert overwrite table T1 partition (ds='2010-04-17') select '5', '1', '1', '1',  0, 0,4 from src limit 1;
+insert overwrite table T1 partition (ds='2010-04-17') select '5', '1', '1', '1',  0, 0,4 from src tablesample (1 rows);
 
-insert overwrite table T2 partition(ds='2010-04-17') select '5','name', NULL, '2', 'kavin',NULL, '9', 'c', '8', '0', '0', '7', '1','2', '0', '3','2', NULL, '1', NULL, '3','2','0','0','5','10' from src limit 1;
+insert overwrite table T2 partition(ds='2010-04-17') select '5','name', NULL, '2', 'kavin',NULL, '9', 'c', '8', '0', '0', '7', '1','2', '0', '3','2', NULL, '1', NULL, '3','2','0','0','5','10' from src tablesample (1 rows);
 
-insert overwrite table T3 partition (ds='2010-04-17') select 4,5,0 from src limit 1;
+insert overwrite table T3 partition (ds='2010-04-17') select 4,5,0 from src tablesample (1 rows);
 
 insert overwrite table T4 partition(ds='2010-04-17') 
 select 4,'1','1','8','4','5','1','0','9','U','2','2', '0','2','1','1','J','C','A','U', '2','s', '2',NULL, NULL, NULL,NULL, NULL, NULL,'1','j', 'S', '6',NULL,'1', '2', 'J', 'g', '1', 'e', '2', '1', '2', 'U', 'P', 'p', '3', '0', '0', '0', '1', '1', '1', '0', '0', '0', '6', '2', 'j',NULL, NULL, NULL,NULL,NULL, NULL, '5',NULL, 'j', 'j', 2, 2, 1, '2', '2', '1', '1', '1', '1', '1', '1', 1, 1, 32,NULL from src limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/filter_numeric.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/filter_numeric.q
new file mode 100644
index 0000000000000..69d543f472347
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/filter_numeric.q
@@ -0,0 +1,21 @@
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+create table partint(key string, value string) partitioned by (ds string, hr int);
+insert overwrite table partint partition(ds, hr) select * from srcpart where ds = '2008-04-08';
+
+explain select key, value, hr from partint where hr < 11;
+select key, value, hr from partint where hr < 11;
+
+explain select key, value, hr from partint where hr <= 12 and hr > 11;
+select key, value, hr from partint where hr <= 12 and hr > 11;
+
+explain select key, value, hr from partint where hr between 11 and 12;
+select key, value, hr from partint where hr between 11 and 12;
+
+explain select key, value, hr from partint where hr not between 12 and 14;
+select key, value, hr from partint where hr not between 12 and 14;
+
+explain select key, value, hr from partint where hr < 13;
+select key, value, hr from partint where hr < 13;
+
+drop table partint;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/global_limit.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/global_limit.q
index b76cf34120701..c8a08af054579 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/global_limit.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/global_limit.q
@@ -8,9 +8,9 @@ drop table gl_src_part1;
 
 
 create table gl_src1 (key int, value string) stored as textfile;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE gl_src1;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE gl_src1;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE gl_src1;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE gl_src1;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE gl_src1;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE gl_src1;
 
 
 
@@ -49,10 +49,10 @@ select key from gl_src2 ORDER BY key ASC limit 10;
 
 -- partition
 create table gl_src_part1 (key int, value string) partitioned by (p string) stored as textfile;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE gl_src_part1 partition(p='11');
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE gl_src_part1 partition(p='12');
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE gl_src_part1 partition(p='12');
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE gl_src_part1 partition(p='12');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE gl_src_part1 partition(p='11');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE gl_src_part1 partition(p='12');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE gl_src_part1 partition(p='12');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE gl_src_part1 partition(p='12');
 
 select key from gl_src_part1 where p like '1%' ORDER BY key ASC limit 10;
 select key from gl_src_part1 where p='11' ORDER BY key ASC limit 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby10.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby10.q
index db38d43fe4354..7750cb90b5ca5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby10.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby10.q
@@ -6,7 +6,7 @@ CREATE TABLE dest1(key INT, val1 INT, val2 INT);
 CREATE TABLE dest2(key INT, val1 INT, val2 INT);
 
 CREATE TABLE INPUT(key INT, value STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv5.txt' INTO TABLE INPUT;
+LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' INTO TABLE INPUT;
 
 EXPLAIN
 FROM INPUT 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby12.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby12.q
new file mode 100644
index 0000000000000..6e9aee1d11d58
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby12.q
@@ -0,0 +1,13 @@
+set hive.map.aggr=false;
+
+CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE;
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key;
+
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key;
+
+SELECT dest1.* FROM dest1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_limit.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_limit.q
index 1b6891e33a37e..55133332a8662 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_limit.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_limit.q
@@ -5,6 +5,6 @@ CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE;
 EXPLAIN
 FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key LIMIT 5;
 
-FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key LIMIT 5;
+FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key ORDER BY src.key LIMIT 5;
 
 SELECT dest1.* FROM dest1 ORDER BY dest1.key ASC , dest1.value ASC;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_map.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_map.q
index 82cff36422e62..dde37dfd47145 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_map.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_map.q
@@ -9,4 +9,4 @@ FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) G
 
 FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key;
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_map_skew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_map_skew.q
index 874995888b447..f346cb7e90147 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_map_skew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_map_skew.q
@@ -9,4 +9,4 @@ FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) G
 
 FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key;
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_noskew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_noskew.q
index 1b10f1e024819..c587b5f658f68 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_noskew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby1_noskew.q
@@ -9,4 +9,4 @@ FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5))
 
 FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key;
 
-SELECT dest_g1.* FROM dest_g1;
+SELECT dest_g1.* FROM dest_g1 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_limit.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_limit.q
index 4bc263c77f1b4..30499248cac15 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_limit.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_limit.q
@@ -1,7 +1,7 @@
 set mapred.reduce.tasks=31;
 
 EXPLAIN
-SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key LIMIT 5;
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 5;
 
-SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key LIMIT 5;
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 5;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_map.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_map.q
index c3cf598fb1c90..794ec758e9edb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_map.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_map.q
@@ -11,4 +11,4 @@ INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(s
 FROM src
 INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1);
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_map_multi_distinct.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_map_multi_distinct.q
index 25e6789b63ef2..55d1a34b3c921 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_map_multi_distinct.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_map_multi_distinct.q
@@ -11,4 +11,15 @@ INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(s
 FROM src
 INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1);
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY key;
+
+-- HIVE-5560 when group by key is used in distinct funtion, invalid result are returned
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1);
+
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1);
+
+SELECT dest1.* FROM dest1 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_noskew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_noskew.q
index c3c82d51749fe..6d7cb61e2d44a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_noskew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_noskew.q
@@ -11,4 +11,4 @@ INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr
 FROM src
 INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1);
 
-SELECT dest_g2.* FROM dest_g2;
+SELECT dest_g2.* FROM dest_g2 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_noskew_multi_distinct.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_noskew_multi_distinct.q
index b80c271afe5dc..b2450c9ea04e1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_noskew_multi_distinct.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby2_noskew_multi_distinct.q
@@ -11,4 +11,4 @@ INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr
 FROM src
 INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1);
 
-SELECT dest_g2.* FROM dest_g2;
+SELECT dest_g2.* FROM dest_g2 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby4_noskew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby4_noskew.q
index 99c2d2d5a1784..a1ebf90aadfea 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby4_noskew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby4_noskew.q
@@ -12,5 +12,5 @@ INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,
 FROM src
 INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1);
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY c1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby5_noskew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby5_noskew.q
index be60785d87ea5..e96568b398d87 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby5_noskew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby5_noskew.q
@@ -16,5 +16,5 @@ SELECT src.key, sum(substr(src.value,5))
 FROM src
 GROUP BY src.key;
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY key;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_map.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_map.q
index fbf761c3aea55..ced122fae3f50 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_map.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_map.q
@@ -11,6 +11,6 @@ INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1);
 FROM src
 INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1);
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY c1;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_map_skew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_map_skew.q
index ac79a286055d5..0d3727b052858 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_map_skew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_map_skew.q
@@ -11,6 +11,6 @@ INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1);
 FROM src
 INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1);
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY c1;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_noskew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_noskew.q
index 2c99d362ffff7..466c13222f29f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_noskew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby6_noskew.q
@@ -12,6 +12,6 @@ INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1);
 FROM src
 INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1);
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY c1;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map.q
index b1457d9349aee..2b8c5db41ea92 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map.q
@@ -18,5 +18,5 @@ FROM SRC
 INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key;
 
-SELECT DEST1.* FROM DEST1;
-SELECT DEST2.* FROM DEST2;
+SELECT DEST1.* FROM DEST1 ORDER BY key;
+SELECT DEST2.* FROM DEST2 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map_multi_single_reducer.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map_multi_single_reducer.q
index 481b3cd084f16..5895ed4599849 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map_multi_single_reducer.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map_multi_single_reducer.q
@@ -17,5 +17,5 @@ FROM SRC
 INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key;
 
-SELECT DEST1.* FROM DEST1;
-SELECT DEST2.* FROM DEST2;
+SELECT DEST1.* FROM DEST1 ORDER BY key;
+SELECT DEST2.* FROM DEST2 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map_skew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map_skew.q
index a34ac8f64b117..ee6d7bf83084e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map_skew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_map_skew.q
@@ -17,5 +17,5 @@ FROM SRC
 INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key;
 
-SELECT DEST1.* FROM DEST1;
-SELECT DEST2.* FROM DEST2;
+SELECT DEST1.* FROM DEST1 ORDER BY key;
+SELECT DEST2.* FROM DEST2 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_noskew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_noskew.q
index 94a3dcf7ec3f2..8c2308e5d75c3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_noskew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_noskew.q
@@ -18,5 +18,5 @@ FROM SRC
 INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key;
 
-SELECT DEST1.* FROM DEST1;
-SELECT DEST2.* FROM DEST2;
+SELECT DEST1.* FROM DEST1 ORDER BY key;
+SELECT DEST2.* FROM DEST2 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_noskew_multi_single_reducer.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_noskew_multi_single_reducer.q
index 802aea244da9e..e673cc61622c8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_noskew_multi_single_reducer.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby7_noskew_multi_single_reducer.q
@@ -14,8 +14,8 @@ INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY S
 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key limit 10;
 
 FROM SRC
-INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key limit 10
-INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key limit 10;
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10;
 
 SELECT DEST1.* FROM DEST1 ORDER BY key ASC, value ASC;
 SELECT DEST2.* FROM DEST2 ORDER BY key ASC, value ASC;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_map.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_map.q
index 62b6ff5ddebb6..0252e993363aa 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_map.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_map.q
@@ -14,6 +14,6 @@ FROM SRC
 INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key;
 
-SELECT DEST1.* FROM DEST1;
-SELECT DEST2.* FROM DEST2;
+SELECT DEST1.* FROM DEST1 ORDER BY key;
+SELECT DEST2.* FROM DEST2 ORDER BY key;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_map_skew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_map_skew.q
index 846fd01017172..b5e1f63a45257 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_map_skew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_map_skew.q
@@ -14,6 +14,6 @@ FROM SRC
 INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key;
 
-SELECT DEST1.* FROM DEST1;
-SELECT DEST2.* FROM DEST2;
+SELECT DEST1.* FROM DEST1 ORDER BY key;
+SELECT DEST2.* FROM DEST2 ORDER BY key;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_noskew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_noskew.q
index 20c8bef34223a..da85504ca18c6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_noskew.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby8_noskew.q
@@ -15,5 +15,5 @@ FROM SRC
 INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key;
 
-SELECT DEST1.* FROM DEST1;
-SELECT DEST2.* FROM DEST2;
+SELECT DEST1.* FROM DEST1 ORDER BY key;
+SELECT DEST2.* FROM DEST2 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_bigdata.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_bigdata.q
index 7e97f75cecc92..2e3eddcb1f866 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_bigdata.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_bigdata.q
@@ -1,7 +1,7 @@
 set hive.map.aggr.hash.percentmemory = 0.3;
 set hive.mapred.local.mem = 384;
 
-add file ../data/scripts/dumpdata_script.py;
+add file ../../data/scripts/dumpdata_script.py;
 
 select count(distinct subq.key) from
 (FROM src MAP src.key USING 'python dumpdata_script.py' AS key WHERE src.key = 10) subq;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_cube1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_cube1.q
index 46e1f00d0f224..099beb4319e09 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_cube1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_cube1.q
@@ -3,7 +3,7 @@ set hive.groupby.skewindata=false;
 
 CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 EXPLAIN
 SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_id1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
index bced21f9e494a..de4a7c3cb5e54 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
@@ -1,6 +1,6 @@
 CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 SELECT key, val, GROUPING__ID from T1 group by key, val with cube;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_id2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
index ffc627c82eaff..f451f17834502 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
@@ -1,6 +1,6 @@
 CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/groupby_groupingid.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1;
 
 set hive.groupby.skewindata = true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
index 4fba7338f1d0a..804dfb36cf2c6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
@@ -1,6 +1,6 @@
 CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; 
 
-LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1;
 
 SELECT * FROM T1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
index 9f2286cc9be31..30f1b420cc7c2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
@@ -2,7 +2,7 @@ set hive.new.job.grouping.set.cardinality=2;
 
 CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; 
 
-LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1;
 
 -- Since 4 grouping sets would be generated for the query below, an additional MR job should be created
 EXPLAIN
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
index 9a00d0a7aa77f..707737798dd6a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
@@ -4,8 +4,8 @@
 -- additional MR job is created for processing the grouping sets.
 CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; 
 
-LOAD DATA LOCAL INPATH '../data/files/grouping_sets1.txt' INTO TABLE T1;
-LOAD DATA LOCAL INPATH '../data/files/grouping_sets2.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1;
 
 set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
 set hive.new.job.grouping.set.cardinality = 30;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
index 25f1fcd793a74..ff83185d819c5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
@@ -4,7 +4,7 @@ set hive.merge.mapredfiles = false;
 
 CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; 
 
-LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1;
 
 -- This tests that cubes and rollups work fine inside sub-queries.
 EXPLAIN
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
index fb0c5913fa07f..d94bd81f84f3b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
@@ -4,7 +4,7 @@ set hive.merge.mapredfiles = false;
 
 CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; 
 
-LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1;
 
 -- This tests that cubes and rollups work fine where the source is a sub-query
 EXPLAIN
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_map_ppr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_map_ppr.q
index f0a8b72b0c83e..4a199365cf968 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_map_ppr.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_map_ppr.q
@@ -17,4 +17,4 @@ SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(s
 WHERE src.ds = '2008-04-08'
 GROUP BY substr(src.key,1,1);
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_map_ppr_multi_distinct.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_map_ppr_multi_distinct.q
index b863344485d9f..cb3ee82918611 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_map_ppr_multi_distinct.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_map_ppr_multi_distinct.q
@@ -17,4 +17,4 @@ SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(s
 WHERE src.ds = '2008-04-08'
 GROUP BY substr(src.key,1,1);
 
-SELECT dest1.* FROM dest1;
+SELECT dest1.* FROM dest1 ORDER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_resolution.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_resolution.q
new file mode 100644
index 0000000000000..663e33b4c7ad8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_resolution.q
@@ -0,0 +1,61 @@
+
+
+set hive.map.aggr=false;
+set hive.groupby.skewindata=false;
+explain select key, count(*) from src b group by b.key;
+explain select b.key, count(*) from src b group by key;
+
+set hive.map.aggr=false;
+set hive.groupby.skewindata=true;
+explain select key, count(*) from src b group by b.key;
+explain select b.key, count(*) from src b group by key;
+
+set hive.map.aggr=true;
+set hive.groupby.skewindata=false;
+explain select key, count(*) from src b group by b.key;
+explain select b.key, count(*) from src b group by key;
+
+set hive.map.aggr=true;
+set hive.groupby.skewindata=true;
+explain select key, count(*) from src b group by b.key;
+explain select b.key, count(*) from src b group by key;
+
+-- windowing after group by
+select key, count(*), rank() over(order by count(*))
+from src b
+where key < '12'
+group by b.key
+order by b.key;
+
+-- having after group by
+select key, count(*)
+from src b
+group by b.key
+having key < '12'
+order by b.key;
+
+-- having and windowing
+select key, count(*), rank() over(order by count(*))
+from src b
+group by b.key
+having key < '12'
+order by b.key
+;
+
+explain
+select key, count(*), rank() over(order by count(*))
+from src b
+group by b.key
+having key < '12'
+;
+
+-- order by
+select key 
+from src t 
+where key < '12'
+group by t.key 
+order by t.key;
+
+-- cluster by
+EXPLAIN
+SELECT x.key, x.value as key FROM SRC x CLUSTER BY key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_rollup1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_rollup1.q
index f79b0c472ebb1..ee8038c7d9f5a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_rollup1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_rollup1.q
@@ -3,7 +3,7 @@ set hive.groupby.skewindata=false;
 
 CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 EXPLAIN
 SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_1.q
index 911a11ae8990f..7401a9ca1d9bd 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_1.q
@@ -6,7 +6,7 @@ set hive.map.groupby.sorted=true;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_2.q
index 31b4ec5c74dc2..700a8af91548e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_2.q
@@ -6,7 +6,7 @@ set hive.map.groupby.sorted=true;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key) SORTED BY (val) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_3.q
index 103c57a123576..2ef8447935a66 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_3.q
@@ -6,7 +6,7 @@ set hive.map.groupby.sorted=true;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_4.q
index e43da3c93225f..3c959e381f220 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_4.q
@@ -6,7 +6,7 @@ set hive.map.groupby.sorted=true;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key, val) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_5.q
index bef5e5d2d547a..dd05238f1ccc4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_5.q
@@ -6,7 +6,7 @@ set hive.map.groupby.sorted=true;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
@@ -30,7 +30,7 @@ DROP TABLE T1;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (val, key) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
@@ -52,7 +52,7 @@ DROP TABLE T1;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (val) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_6.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_6.q
index cf076e8125612..aa09aec34b233 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_6.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_6.q
@@ -17,7 +17,7 @@ SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key;
 
 SELECT * FROM outputTbl1 ORDER BY key;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='2');
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='2');
 
 -- The plan should not be converted to a map-side group since no partition is being accessed
 EXPLAIN EXTENDED
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_7.q
index c2d42154e516c..99337859fb00b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_7.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_7.q
@@ -6,7 +6,7 @@ set hive.map.groupby.sorted=true;
 CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
 CLUSTERED BY (val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='1');
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='1');
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_8.q
index 121804e60a9da..f53295e4b2435 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_8.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_8.q
@@ -6,7 +6,7 @@ set hive.map.groupby.sorted=true;
 CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='1');
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='1');
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_9.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_9.q
index 1c3d1cdcc265d..296336d0f9f1a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_9.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_9.q
@@ -6,7 +6,7 @@ set hive.map.groupby.sorted=true;
 CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='1');
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='1');
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q
index 068c26a9c2b4b..db0faa04da0ec 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q
@@ -7,7 +7,7 @@ set hive.groupby.skewindata=true;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_test_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_test_1.q
index 8efa05e2544d4..4ec138e51a806 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_test_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/groupby_sort_test_1.q
@@ -7,7 +7,7 @@ set hive.map.groupby.sorted.testmode=true;
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 -- perform an insert to make sure there are 2 files
 INSERT OVERWRITE TABLE T1 select key, val from T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/import_exported_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/import_exported_table.q
new file mode 100644
index 0000000000000..cb147c5feab24
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/import_exported_table.q
@@ -0,0 +1,13 @@
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test_import_exported_table/;
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test_import_exported_table/exported_table/;
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test_import_exported_table/exported_table/data/;
+
+dfs -copyFromLocal ../../data/files/exported_table/_metadata hdfs:///tmp/test_import_exported_table/exported_table;
+dfs -copyFromLocal ../../data/files/exported_table/data/data hdfs:///tmp/test_import_exported_table/exported_table/data;
+
+IMPORT FROM '/tmp/test_import_exported_table/exported_table';
+DESCRIBE j1_41;
+SELECT * from j1_41;
+
+dfs -rmr hdfs:///tmp/test_import_exported_table;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auth.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auth.q
index 33a1fc581ed13..03d77f1f19b01 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auth.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auth.q
@@ -1,15 +1,18 @@
+set hive.stats.dbclass=fs;
 SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 create table foobar(key int, value string) PARTITIONED BY (ds string, hr string);
 alter table foobar add partition (ds='2008-04-08',hr='12');
 
-CREATE INDEX srcpart_auth_index ON TABLE foobar(key) as 'BITMAP' WITH DEFERRED REBUILD;
+CREATE INDEX srcpart_AUTH_index ON TABLE foobar(key) as 'BITMAP' WITH DEFERRED REBUILD;
+SHOW INDEXES ON foobar;
+
 grant select on table foobar to user hive_test_user;
-grant select on table default__foobar_srcpart_auth_index__ to user hive_test_user;
-grant update on table default__foobar_srcpart_auth_index__ to user hive_test_user;
-grant create on table default__foobar_srcpart_auth_index__ to user hive_test_user;
+grant select on table default__foobar_srcpart_auth_indeX__ to user hive_test_user;
+grant update on table default__foobar_srcpart_auth_indEx__ to user hive_test_user;
+grant create on table default__foobar_srcpart_auth_inDex__ to user hive_test_user;
 set hive.security.authorization.enabled=true;
 
-ALTER INDEX srcpart_auth_index ON foobar PARTITION (ds='2008-04-08',hr='12')  REBUILD;
+ALTER INDEX srcpart_auth_INDEX ON foobar PARTITION (ds='2008-04-08',hr='12')  REBUILD;
 set hive.security.authorization.enabled=false;
 DROP INDEX srcpart_auth_index on foobar;
 DROP TABLE foobar;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto.q
index cb8a1d6293f8d..77733aac02686 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto.q
@@ -3,6 +3,7 @@
 -- without indexing
 SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key;
 
+set hive.stats.dbclass=fs;
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
 ALTER INDEX src_index ON src REBUILD;
 
@@ -25,4 +26,4 @@ SET hive.optimize.index.filter.compact.minsize=0;
 EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key;
 SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key;
 
-DROP INDEX src_index on src;
\ No newline at end of file
+DROP INDEX src_index on src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_empty.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_empty.q
index cb32162d40a06..41f4a40823e4d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_empty.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_empty.q
@@ -3,6 +3,7 @@
 -- Create temp, and populate it with some values in src.
 CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE;
 
+set hive.stats.dbclass=fs;
 -- Build an index on temp.
 CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD;
 ALTER INDEX temp_index ON temp REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_file_format.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_file_format.q
index 790e6c223f746..2967bd60d8bc6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_file_format.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_file_format.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 -- test automatic use of index on different file formats
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
 ALTER INDEX src_index ON src REBUILD;
@@ -16,4 +17,4 @@ SET hive.optimize.index.filter.compact.minsize=0;
 EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key;
 SELECT key, value FROM src WHERE key=86 ORDER BY key;
 
-DROP INDEX src_index on src;
\ No newline at end of file
+DROP INDEX src_index on src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_mult_tables.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_mult_tables.q
index 2bf8481f1d6e2..a672e06e79332 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_mult_tables.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_mult_tables.q
@@ -4,6 +4,7 @@
 EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
 SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
 
+set hive.stats.dbclass=fs;
 
 CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
 ALTER INDEX src_index ON src REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q
index 808a04cc360a6..d78e0fd58a074 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q
@@ -4,6 +4,7 @@
 EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
 SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
 
+set hive.stats.dbclass=fs;
 
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
 ALTER INDEX src_index ON src REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_multiple.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_multiple.q
index 06e97fa76bc97..f0a91b4b8a592 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_multiple.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_multiple.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 -- With multiple indexes, make sure we choose which to use in a consistent order
 
 CREATE INDEX src_key_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
@@ -13,4 +14,4 @@ EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key;
 SELECT key, value FROM src WHERE key=86 ORDER BY key;
 
 DROP INDEX src_key_index ON src;
-DROP INDEX src_val_index ON src;
\ No newline at end of file
+DROP INDEX src_val_index ON src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_partitioned.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_partitioned.q
index 5013d29e732c4..70166b36c5f3c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_partitioned.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_partitioned.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 -- test automatic use of index on table with partitions
 CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD;
 ALTER INDEX src_part_index ON srcpart REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_self_join.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_self_join.q
index 0984a4a21ba17..1d9efbbc6d738 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_self_join.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_self_join.q
@@ -3,6 +3,7 @@
 EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
 SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
 
+set hive.stats.dbclass=fs;
 CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
 ALTER INDEX src_index ON src REBUILD;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_unused.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_unused.q
index d8f3eda1813c8..acd4194b0e7a4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_unused.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_auto_unused.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 -- test cases where the index should not be used automatically
 
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap.q
index f9deb2883960c..673c835fb9084 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 DROP INDEX srcpart_index_proj on srcpart;
 
 EXPLAIN
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap1.q
index 55633d9225fa7..adec8f1b3bfb7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap1.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 EXPLAIN
 CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
 CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap2.q
index bd15a21fa4200..1ffa6eeebbfb3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap2.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 EXPLAIN
 CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
 EXPLAIN
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap3.q
index 0d2c811459dcb..e7a093c118ba0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap3.q
@@ -1,3 +1,6 @@
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
+
 EXPLAIN
 CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
 EXPLAIN
@@ -17,10 +20,10 @@ SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
 FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
         WHERE key = 0) a
   JOIN 
-     (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
-        WHERE value = "val_0") b
-  ON
-    a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+    (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
+       WHERE value = "val_0") b
+ ON
+   a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
 EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname;
 
 INSERT OVERWRITE DIRECTORY "${system:test.tmp.dir}/index_result" 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_auto.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_auto.q
index 672ce29f1bb47..56cd44dd5b323 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_auto.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_auto.q
@@ -1,3 +1,6 @@
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
+
 -- try the query without indexing, with manual indexing, and with automatic indexing
 -- without indexing
 SELECT key, value FROM src WHERE key=0 AND value = "val_0" ORDER BY key;
@@ -21,12 +24,12 @@ SELECT * FROM default__src_src2_index__ ORDER BY value;
 EXPLAIN
 SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
 FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
-        WHERE key = 0) a
-  JOIN 
-     (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
-        WHERE value = "val_0") b
-  ON
-    a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+       WHERE key = 0) a
+ JOIN 
+    (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
+       WHERE value = "val_0") b
+ ON
+   a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
 EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname;
 
 INSERT OVERWRITE DIRECTORY "${system:test.tmp.dir}/index_result" 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q
index 90d7987594bac..3b310cee4cb29 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 -- test automatic use of index on table with partitions
 CREATE INDEX src_part_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD;
 ALTER INDEX src_part_index ON srcpart REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_compression.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_compression.q
index 2f5e5d4fb231b..32ecfb9db8a0e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_compression.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_compression.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 SET hive.exec.compress.result=true;
 CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
 ALTER INDEX src_index ON src REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_rc.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_rc.q
index 054df51c32180..26a351ea31858 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_rc.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_bitmap_rc.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 CREATE TABLE srcpart_rc (key int, value string) PARTITIONED BY (ds string, hr int) STORED AS RCFILE;
 
 INSERT OVERWRITE TABLE srcpart_rc PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact.q
index a936f1127f690..98cbec147db51 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 DROP INDEX srcpart_index_proj on srcpart;
 
 EXPLAIN
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_1.q
index 837033be403a9..97276f488e742 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_1.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 EXPLAIN
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_2.q
index 56119ac11f32f..1eb3f5c3dbcbf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_2.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 CREATE TABLE srcpart_rc (key int, value string) PARTITIONED BY (ds string, hr int) STORED AS RCFILE;
 
 INSERT OVERWRITE TABLE srcpart_rc PARTITION (ds='2008-04-08', hr=11) SELECT key, value FROM srcpart WHERE ds = '2008-04-08' AND hr = 11;
@@ -42,4 +43,4 @@ SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 SELECT key, value FROM srcpart_rc WHERE key=100 ORDER BY key;
 
 DROP INDEX srcpart_rc_index on srcpart_rc;
-DROP TABLE srcpart_rc;
\ No newline at end of file
+DROP TABLE srcpart_rc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_3.q
index f3fcb4af3d5e6..599b4ac1149ec 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_3.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 CREATE TABLE src_index_test_rc (key int, value string) STORED AS RCFILE;
 
 INSERT OVERWRITE TABLE src_index_test_rc SELECT * FROM src;
@@ -16,4 +17,4 @@ SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 SELECT key, value FROM src_index_test_rc WHERE key=100 ORDER BY key;
 
 DROP INDEX src_index on src_index_test_rc;
-DROP TABLE src_index_test_rc;
\ No newline at end of file
+DROP TABLE src_index_test_rc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_binary_search.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_binary_search.q
index d0d9a32adcba1..e72b27c781a80 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_binary_search.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compact_binary_search.q
@@ -1,6 +1,6 @@
 SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 SET hive.default.fileformat=TextFile;
-
+set hive.stats.dbclass=fs;
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
 ALTER INDEX src_index ON src REBUILD;
 
@@ -129,4 +129,4 @@ SELECT * FROM src WHERE key >= '9';
 
 SET hive.exec.post.hooks=;
 
-DROP INDEX src_index ON src;
\ No newline at end of file
+DROP INDEX src_index ON src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compression.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compression.q
index 84ed3cc932e8c..963b8f74e5f07 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compression.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_compression.q
@@ -1,4 +1,5 @@
 SET hive.exec.compress.result=true;
+set hive.stats.dbclass=fs;
 CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
 ALTER INDEX src_index ON src REBUILD;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_creation.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_creation.q
index 062821e870dce..ef020b63d5a72 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_creation.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_creation.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 drop index src_index_2 on src;
 drop index src_index_3 on src;
 drop index src_index_4 on src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_serde.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_serde.q
index a6fe16ba3b809..20186a7400494 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_serde.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_serde.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 -- Want to ensure we can build and use indices on tables stored with SerDes
 -- Build the (Avro backed) table
 CREATE TABLE doctors 
@@ -31,7 +32,7 @@ TBLPROPERTIES ('avro.schema.literal'='{
 
 DESCRIBE doctors;
 
-LOAD DATA LOCAL INPATH '../data/files/doctors.avro' INTO TABLE doctors;
+LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors;
 
 -- Create and build an index
 CREATE INDEX doctors_index ON TABLE doctors(number) AS 'COMPACT' WITH DEFERRED REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_stale.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_stale.q
index 82e15b97b6ac4..ecab2b7f6b332 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_stale.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_stale.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 -- test that stale indexes are not used
 
 CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_stale_partitioned.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_stale_partitioned.q
index e7cfeff31f6dc..a93ccf7f95fc3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_stale_partitioned.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/index_stale_partitioned.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 -- Test if index is actually being used.
 
 -- Create temp, and populate it with some values in src.
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_bucket_sort_dyn_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_bucket_sort_dyn_part.q
index 119994e91b056..728b8cc4a9497 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_bucket_sort_dyn_part.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_bucket_sort_dyn_part.q
@@ -47,12 +47,12 @@ CREATE TABLE srcpart_merge_dp LIKE srcpart;
 CREATE TABLE srcpart_merge_dp_rc LIKE srcpart;
 ALTER TABLE srcpart_merge_dp_rc SET FILEFORMAT RCFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
-LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
-LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
-LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=11);
 
-LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=12);
+LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp PARTITION(ds='2008-04-08', hr=12);
 
 INSERT OVERWRITE TABLE srcpart_merge_dp_rc PARTITION (ds = '2008-04-08', hr) 
 SELECT key, value, hr FROM srcpart_merge_dp WHERE ds = '2008-04-08';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_const_type.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_const_type.q
index a039dc5a2a040..ce5ed8419dd44 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_const_type.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_const_type.q
@@ -1,7 +1,7 @@
 DROP TABLE infertypes;
 CREATE TABLE infertypes(ti TINYINT, si SMALLINT, i INT, bi BIGINT, fl FLOAT, db DOUBLE, str STRING);
 
-LOAD DATA LOCAL INPATH '../data/files/infer_const_type.txt' OVERWRITE INTO TABLE infertypes;
+LOAD DATA LOCAL INPATH '../../data/files/infer_const_type.txt' OVERWRITE INTO TABLE infertypes;
 
 SELECT * FROM infertypes;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input13.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input13.q
index 40fbc84a981f1..620e9dcfae6cf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input13.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input13.q
@@ -7,15 +7,15 @@ FROM src
 INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100
 INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200
 INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 and src.key < 300
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/dest4.out' SELECT src.value WHERE src.key >= 300;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/dest4.out' SELECT src.value WHERE src.key >= 300;
 
 FROM src
 INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100
 INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200
 INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 and src.key < 300
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/dest4.out' SELECT src.value WHERE src.key >= 300;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/dest4.out' SELECT src.value WHERE src.key >= 300;
 
 SELECT dest1.* FROM dest1;
 SELECT dest2.* FROM dest2;
 SELECT dest3.* FROM dest3;
-dfs -cat ../build/ql/test/data/warehouse/dest4.out/*;
+dfs -cat ${system:test.warehouse.dir}/dest4.out/*;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input16.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input16.q
index 82e6d81426efc..4990d0ba1cd54 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input16.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input16.q
@@ -1,6 +1,6 @@
 -- TestSerDe is a user defined serde where the default delimiter is Ctrl-B
 DROP TABLE INPUT16;
-ADD JAR ../data/files/TestSerDe.jar;
+ADD JAR ${system:maven.local.repository}/org/apache/hive/hive-it-test-serde/${system:hive.version}/hive-it-test-serde-${system:hive.version}.jar;
 CREATE TABLE INPUT16(KEY STRING, VALUE STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.TestSerDe' STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1_cb.txt' INTO TABLE INPUT16;
+LOAD DATA LOCAL INPATH '../../data/files/kv1_cb.txt' INTO TABLE INPUT16;
 SELECT INPUT16.VALUE, INPUT16.KEY FROM INPUT16;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input16_cc.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input16_cc.q
index 5dab4103d8dda..9272a92c8102e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input16_cc.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input16_cc.q
@@ -4,8 +4,8 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 -- the user is overwriting it with ctrlC
 
 DROP TABLE INPUT16_CC;
-ADD JAR ../data/files/TestSerDe.jar;
+ADD JAR ${system:maven.local.repository}/org/apache/hive/hive-it-test-serde/${system:hive.version}/hive-it-test-serde-${system:hive.version}.jar;
 CREATE TABLE INPUT16_CC(KEY STRING, VALUE STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.TestSerDe'  with serdeproperties ('testserde.default.serialization.format'='\003', 'dummy.prop.not.used'='dummyy.val') STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1_cc.txt' INTO TABLE INPUT16_CC;
+LOAD DATA LOCAL INPATH '../../data/files/kv1_cc.txt' INTO TABLE INPUT16_CC;
 SELECT INPUT16_CC.VALUE, INPUT16_CC.KEY FROM INPUT16_CC;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input19.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input19.q
index fec44e97669dd..3dc7fec9f6669 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input19.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input19.q
@@ -1,5 +1,5 @@
 
 create table apachelog(ipaddress STRING,identd STRING,user_name STRING,finishtime STRING,requestline string,returncode INT,size INT) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe' WITH SERDEPROPERTIES (  'serialization.format'= 'org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol',  'quote.delim'= '("|\\[|\\])',  'field.delim'=' ',  'serialization.null.format'='-'  ) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/apache.access.log' INTO TABLE apachelog;
+LOAD DATA LOCAL INPATH '../../data/files/apache.access.log' INTO TABLE apachelog;
 SELECT a.* FROM apachelog a;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input20.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input20.q
index 0566ab18c2537..ff430abb8e8aa 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input20.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input20.q
@@ -1,6 +1,6 @@
 CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE;
 
-ADD FILE ../data/scripts/input20_script;
+ADD FILE ../../data/scripts/input20_script.py;
 
 EXPLAIN
 FROM (
@@ -12,7 +12,7 @@ FROM (
 ) tmap
 INSERT OVERWRITE TABLE dest1
 REDUCE tmap.key, tmap.value
-USING 'input20_script'
+USING 'python input20_script.py'
 AS key, value;
 
 FROM (
@@ -24,7 +24,7 @@ FROM (
 ) tmap
 INSERT OVERWRITE TABLE dest1
 REDUCE tmap.key, tmap.value
-USING 'input20_script'
+USING 'python input20_script.py'
 AS key, value;
 
 SELECT * FROM dest1 SORT BY key, value;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input21.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input21.q
index d7c814e58061c..43cd01e684b37 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input21.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input21.q
@@ -1,7 +1,7 @@
 
 
 CREATE TABLE src_null(a STRING, b STRING, c STRING, d STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/null.txt' INTO TABLE src_null;
+LOAD DATA LOCAL INPATH '../../data/files/null.txt' INTO TABLE src_null;
 
 EXPLAIN SELECT * FROM src_null DISTRIBUTE BY c SORT BY d;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input22.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input22.q
index 853947be57a1f..8803e4dbeb8cf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input22.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input22.q
@@ -1,5 +1,5 @@
 CREATE TABLE INPUT4(KEY STRING, VALUE STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE INPUT4;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4;
 
 EXPLAIN
 SELECT a.KEY2
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input33.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input33.q
index 7ab17515af5c7..8b6b21502001e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input33.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input33.q
@@ -1,6 +1,6 @@
 CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE;
 
-ADD FILE ../data/scripts/input20_script;
+ADD FILE ../../data/scripts/input20_script.py;
 
 EXPLAIN
 FROM (
@@ -12,7 +12,7 @@ FROM (
 ) tmap
 INSERT OVERWRITE TABLE dest1
 REDUCE tmap.key, tmap.value
-USING 'input20_script'
+USING 'python input20_script.py'
 AS (key STRING, value STRING);
 
 FROM (
@@ -24,7 +24,7 @@ FROM (
 ) tmap
 INSERT OVERWRITE TABLE dest1
 REDUCE tmap.key, tmap.value
-USING 'input20_script'
+USING 'python input20_script.py'
 AS (key STRING, value STRING);
 
 SELECT * FROM dest1 SORT BY key, value;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input37.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input37.q
index 6fd136afec444..6ded61aa23990 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input37.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input37.q
@@ -1,6 +1,6 @@
 create table documents(contents string) stored as textfile;
 
-LOAD DATA LOCAL INPATH '../data/files/docurl.txt' INTO TABLE documents;
+LOAD DATA LOCAL INPATH '../../data/files/docurl.txt' INTO TABLE documents;
 
 
 select url, count(1) 
@@ -8,7 +8,7 @@ FROM
 (
   FROM documents
   MAP documents.contents
-  USING 'java -cp ../build/ql/test/classes org.apache.hadoop.hive.scripts.extracturl' AS (url, count)
+  USING 'java -cp ../util/target/classes/ org.apache.hadoop.hive.scripts.extracturl' AS (url, count)
 ) subq
 group by url;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input3_limit.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input3_limit.q
index 3584820acaf4a..f983aca847d95 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input3_limit.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input3_limit.q
@@ -1,7 +1,7 @@
 
 CREATE TABLE T1(key STRING, value STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T1;
-LOAD DATA LOCAL INPATH '../data/files/kv2.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T1;
 
 
 CREATE TABLE T2(key STRING, value STRING);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input4.q
index 08d6d97603045..1186bbbbe6bbd 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input4.q
@@ -1,7 +1,7 @@
 CREATE TABLE INPUT4(KEY STRING, VALUE STRING) STORED AS TEXTFILE;
 EXPLAIN
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE INPUT4;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE INPUT4;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4;
 EXPLAIN FORMATTED
 SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias;
 SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input40.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input40.q
index 4166cb5f94939..ab187b5d7e7d1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input40.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input40.q
@@ -2,15 +2,15 @@
 
 
 create table tmp_insert_test (key string, value string) stored as textfile;
-load data local inpath '../data/files/kv1.txt' into table tmp_insert_test;
+load data local inpath '../../data/files/kv1.txt' into table tmp_insert_test;
 select * from tmp_insert_test;
 
 create table tmp_insert_test_p (key string, value string) partitioned by (ds string) stored as textfile;
 
-load data local inpath '../data/files/kv1.txt' into table tmp_insert_test_p partition (ds = '2009-08-01');
+load data local inpath '../../data/files/kv1.txt' into table tmp_insert_test_p partition (ds = '2009-08-01');
 select * from tmp_insert_test_p where ds= '2009-08-01'
 order by key, value;
 
-load data local inpath '../data/files/kv2.txt' into table tmp_insert_test_p partition (ds = '2009-08-01');
+load data local inpath '../../data/files/kv2.txt' into table tmp_insert_test_p partition (ds = '2009-08-01');
 select * from tmp_insert_test_p where ds= '2009-08-01'
 order by key, value;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input43.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input43.q
index 5512dc328065d..3182bbef39812 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input43.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input43.q
@@ -1,7 +1,7 @@
 drop table tst_src1;
 create table tst_src1 like src1;
-load data local inpath '../data/files/kv1.txt' into table tst_src1 ;
+load data local inpath '../../data/files/kv1.txt' into table tst_src1 ;
 select count(1) from tst_src1;
-load data local inpath '../data/files/kv1.txt' into table tst_src1 ;
+load data local inpath '../../data/files/kv1.txt' into table tst_src1 ;
 select count(1) from tst_src1;
 drop table tst_src1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input44.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input44.q
index 4557edc178b7f..2e975e58c1e93 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input44.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input44.q
@@ -3,4 +3,4 @@ CREATE TABLE dest(key INT, value STRING) STORED AS TEXTFILE;
 SET hive.output.file.extension=.txt;
 INSERT OVERWRITE TABLE dest SELECT src.* FROM src;
 
-dfs -cat ../build/ql/test/data/warehouse/dest/*.txt
\ No newline at end of file
+dfs -cat ${system:test.warehouse.dir}/dest/*.txt
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input45.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input45.q
index 633a8c6edb5cc..334da264d6e55 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input45.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input45.q
@@ -2,8 +2,8 @@ SET hive.insert.into.multilevel.dirs=true;
 
 SET hive.output.file.extension=.txt;
 
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/x/y/z/' SELECT src.* FROM src;
+INSERT OVERWRITE DIRECTORY 'target/data/x/y/z/' SELECT src.* FROM src;
 
-dfs -cat ../build/ql/test/data/x/y/z/*.txt;
+dfs -cat ${system:build.dir}/data/x/y/z/*.txt;
 
-dfs -rmr ../build/ql/test/data/x;
\ No newline at end of file
+dfs -rmr ${system:build.dir}/data/x;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input4_cb_delim.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input4_cb_delim.q
index 8c57dd3f25aa6..b18d60aa74e73 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input4_cb_delim.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input4_cb_delim.q
@@ -1,4 +1,4 @@
 CREATE TABLE INPUT4_CB(KEY STRING, VALUE STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\002' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1_cb.txt' INTO TABLE INPUT4_CB;
+LOAD DATA LOCAL INPATH '../../data/files/kv1_cb.txt' INTO TABLE INPUT4_CB;
 SELECT INPUT4_CB.VALUE, INPUT4_CB.KEY FROM INPUT4_CB;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input_dfs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input_dfs.q
index 4f5824df5c9d5..b108cbd6b23e5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input_dfs.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/input_dfs.q
@@ -1,2 +1,2 @@
-dfs -cat ../data/files/kv1.txt;
+dfs -cat ../../data/files/kv1.txt;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl5.q
index 9a7ca5da1277f..87c55a26d7b83 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl5.q
@@ -1,7 +1,7 @@
 -- test for internationalization
 -- kv4.txt contains the utf-8 character 0xE982B5E993AE which we are verifying later on
 CREATE TABLE INPUTDDL5(name STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv4.txt' INTO TABLE INPUTDDL5;
+LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE INPUTDDL5;
 DESCRIBE INPUTDDL5;
 SELECT INPUTDDL5.name from INPUTDDL5;
 SELECT count(1) FROM INPUTDDL5 WHERE INPUTDDL5.name = _UTF-8 0xE982B5E993AE;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl6.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl6.q
index d33ab8d9a78f4..6c709399a3c0d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl6.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl6.q
@@ -2,8 +2,8 @@
 -- test for describe extended table partition
 -- test for alter table drop partition
 CREATE TABLE INPUTDDL6(KEY STRING, VALUE STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE INPUTDDL6 PARTITION (ds='2008-04-09');
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE INPUTDDL6 PARTITION (ds='2008-04-08');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUTDDL6 PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUTDDL6 PARTITION (ds='2008-04-08');
 DESCRIBE EXTENDED INPUTDDL6;
 DESCRIBE EXTENDED INPUTDDL6 PARTITION (ds='2008-04-08');
 SHOW PARTITIONS INPUTDDL6;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl7.q
index 8a73935feec51..27e587a283cd3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl7.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/inputddl7.q
@@ -3,22 +3,22 @@
 
 
 CREATE TABLE T1(name STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1;
 SELECT COUNT(1) FROM T1;
 
 
 CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.seq' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.seq' INTO TABLE T2;
 SELECT COUNT(1) FROM T2;
 
 
 CREATE TABLE T3(name STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T3 PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T3 PARTITION (ds='2008-04-09');
 SELECT COUNT(1) FROM T3 where T3.ds='2008-04-09';
 
 
 CREATE TABLE T4(name STRING) PARTITIONED BY(ds STRING) STORED AS SEQUENCEFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.seq' INTO TABLE T4 PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.seq' INTO TABLE T4 PARTITION (ds='2008-04-09');
 SELECT COUNT(1) FROM T4 where T4.ds='2008-04-09';
 
 DESCRIBE EXTENDED T1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert1_overwrite_partitions.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert1_overwrite_partitions.q
index 6ad70b5673f30..6b00f977c4c68 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert1_overwrite_partitions.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert1_overwrite_partitions.q
@@ -1,8 +1,8 @@
 CREATE TABLE sourceTable (one string,two string) PARTITIONED BY (ds string,hr string);
 
-load data local inpath '../data/files/kv1.txt' INTO TABLE sourceTable partition(ds='2011-11-11', hr='11');
+load data local inpath '../../data/files/kv1.txt' INTO TABLE sourceTable partition(ds='2011-11-11', hr='11');
 
-load data local inpath '../data/files/kv3.txt' INTO TABLE sourceTable partition(ds='2011-11-11', hr='12');
+load data local inpath '../../data/files/kv3.txt' INTO TABLE sourceTable partition(ds='2011-11-11', hr='12');
 
 CREATE TABLE destinTable (one string,two string) PARTITIONED BY (ds string,hr string);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert2_overwrite_partitions.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert2_overwrite_partitions.q
index 598d30eaebba2..bd1eb752879e6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert2_overwrite_partitions.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert2_overwrite_partitions.q
@@ -4,9 +4,9 @@ CREATE DATABASE db2;
 
 CREATE TABLE db1.sourceTable (one string,two string) PARTITIONED BY (ds string);
 
-load data local inpath '../data/files/kv1.txt' INTO TABLE db1.sourceTable partition(ds='2011-11-11');
+load data local inpath '../../data/files/kv1.txt' INTO TABLE db1.sourceTable partition(ds='2011-11-11');
 
-load data local inpath '../data/files/kv3.txt' INTO TABLE db1.sourceTable partition(ds='2011-11-11');
+load data local inpath '../../data/files/kv3.txt' INTO TABLE db1.sourceTable partition(ds='2011-11-11');
 
 CREATE TABLE db2.destinTable (one string,two string) PARTITIONED BY (ds string);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert_into3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert_into3.q
index e58b212e190bb..4ff0edcd61bca 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert_into3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert_into3.q
@@ -4,10 +4,10 @@ DROP TABLE insert_into3b;
 CREATE TABLE insert_into3a (key int, value string);
 CREATE TABLE insert_into3b (key int, value string);
 
-EXPLAIN FROM src INSERT INTO TABLE insert_into3a SELECT * LIMIT 50
-                 INSERT INTO TABLE insert_into3b SELECT * LIMIT 100;
-FROM src INSERT INTO TABLE insert_into3a SELECT * LIMIT 50
-         INSERT INTO TABLE insert_into3b SELECT * LIMIT 100;
+EXPLAIN FROM src INSERT INTO TABLE insert_into3a SELECT * ORDER BY key, value LIMIT 50
+                 INSERT INTO TABLE insert_into3b SELECT * ORDER BY key, value LIMIT 100;
+FROM src INSERT INTO TABLE insert_into3a SELECT * ORDER BY key, value LIMIT 50
+         INSERT INTO TABLE insert_into3b SELECT * ORDER BY key, value LIMIT 100;
 SELECT SUM(HASH(c)) FROM (
     SELECT TRANSFORM(*) USING 'tr \t _' AS (c) FROM insert_into3a
 ) t;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert_overwrite_local_directory_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert_overwrite_local_directory_1.q
index 25c127f67f980..6d069f5411d45 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert_overwrite_local_directory_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/insert_overwrite_local_directory_1.q
@@ -1,40 +1,40 @@
-insert overwrite local directory '../data/files/local_src_table_1'
+insert overwrite local directory '../../data/files/local_src_table_1'
 select * from src ;
-dfs -cat ../data/files/local_src_table_1/000000_0;
+dfs -cat ../../data/files/local_src_table_1/000000_0;
 
-insert overwrite local directory '../data/files/local_src_table_2'
+insert overwrite local directory '../../data/files/local_src_table_2'
 row format delimited 
 FIELDS TERMINATED BY ':' 
 select * from src ;
 
-dfs -cat ../data/files/local_src_table_2/000000_0;
+dfs -cat ../../data/files/local_src_table_2/000000_0;
 
 create table array_table (a array<string>, b array<string>)
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY '\t'
 COLLECTION ITEMS TERMINATED BY ',';
 
-load data local inpath "../data/files/array_table.txt" overwrite into table array_table;
+load data local inpath "../../data/files/array_table.txt" overwrite into table array_table;
 
-insert overwrite local directory '../data/files/local_array_table_1'
+insert overwrite local directory '../../data/files/local_array_table_1'
 select * from array_table;
-dfs -cat ../data/files/local_array_table_1/000000_0;
+dfs -cat ../../data/files/local_array_table_1/000000_0;
 
-insert overwrite local directory '../data/files/local_array_table_2'
+insert overwrite local directory '../../data/files/local_array_table_2'
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY ':'
 COLLECTION ITEMS TERMINATED BY '#'
 select * from array_table;
 
-dfs -cat ../data/files/local_array_table_2/000000_0;
+dfs -cat ../../data/files/local_array_table_2/000000_0;
 
-insert overwrite local directory '../data/files/local_array_table_2_withfields'
+insert overwrite local directory '../../data/files/local_array_table_2_withfields'
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY ':'
 COLLECTION ITEMS TERMINATED BY '#'
 select b,a from array_table;
 
-dfs -cat ../data/files/local_array_table_2_withfields/000000_0;
+dfs -cat ../../data/files/local_array_table_2_withfields/000000_0;
 
 
 create table map_table (foo STRING , bar MAP<STRING, STRING>)
@@ -44,63 +44,63 @@ COLLECTION ITEMS TERMINATED BY ','
 MAP KEYS TERMINATED BY ':'
 STORED AS TEXTFILE;
 
-load data local inpath "../data/files/map_table.txt" overwrite into table map_table;
+load data local inpath "../../data/files/map_table.txt" overwrite into table map_table;
 
-insert overwrite local directory '../data/files/local_map_table_1'
+insert overwrite local directory '../../data/files/local_map_table_1'
 select * from map_table;
-dfs -cat ../data/files/local_map_table_1/000000_0;
+dfs -cat ../../data/files/local_map_table_1/000000_0;
 
-insert overwrite local directory '../data/files/local_map_table_2'
+insert overwrite local directory '../../data/files/local_map_table_2'
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY ':'
 COLLECTION ITEMS TERMINATED BY '#'
 MAP KEYS TERMINATED BY '='
 select * from map_table;
 
-dfs -cat ../data/files/local_map_table_2/000000_0;
+dfs -cat ../../data/files/local_map_table_2/000000_0;
 
-insert overwrite local directory '../data/files/local_map_table_2_withfields'
+insert overwrite local directory '../../data/files/local_map_table_2_withfields'
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY ':'
 COLLECTION ITEMS TERMINATED BY '#'
 MAP KEYS TERMINATED BY '='
 select bar,foo from map_table;
 
-dfs -cat ../data/files/local_map_table_2_withfields/000000_0;
+dfs -cat ../../data/files/local_map_table_2_withfields/000000_0;
 
-insert overwrite local directory '../data/files/local_array_table_3'
+insert overwrite local directory '../../data/files/local_array_table_3'
 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.DelimitedJSONSerDe'
 STORED AS TEXTFILE
 select * from array_table;
 
-dfs -cat ../data/files/local_array_table_3/000000_0;
+dfs -cat ../../data/files/local_array_table_3/000000_0;
 
-insert overwrite local directory '../data/files/local_map_table_3'
+insert overwrite local directory '../../data/files/local_map_table_3'
 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.DelimitedJSONSerDe'
 STORED AS TEXTFILE
 select * from map_table;
 
-dfs -cat ../data/files/local_map_table_3/000000_0;
+dfs -cat ../../data/files/local_map_table_3/000000_0;
 
-insert overwrite local directory '../data/files/local_rctable'
+insert overwrite local directory '../../data/files/local_rctable'
 STORED AS RCFILE
 select value,key from src;
 
 dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/local_rctable/temp;
 dfs -rmr ${system:test.tmp.dir}/local_rctable;
 dfs ${system:test.dfs.mkdir}  ${system:test.tmp.dir}/local_rctable;
-dfs -put ../data/files/local_rctable/000000_0 ${system:test.tmp.dir}/local_rctable/000000_0;
+dfs -put ../../data/files/local_rctable/000000_0 ${system:test.tmp.dir}/local_rctable/000000_0;
 
 create external table local_rctable(value string, key string)
 STORED AS RCFILE
 LOCATION '${system:test.tmp.dir}/local_rctable';
 
-insert overwrite local directory '../data/files/local_rctable_out'
+insert overwrite local directory '../../data/files/local_rctable_out'
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY '\t'
 select key,value from local_rctable;
 
-dfs -cat ../data/files/local_rctable_out/000000_0;
+dfs -cat ../../data/files/local_rctable_out/000000_0;
 
 drop table local_rctable;
 drop table array_table;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_1to1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_1to1.q
index b403814adb4c9..4d1ae2186e796 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_1to1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_1to1.q
@@ -1,9 +1,9 @@
 
 CREATE TABLE join_1to1_1(key1 int, key2 int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in5.txt' INTO TABLE join_1to1_1;
+LOAD DATA LOCAL INPATH '../../data/files/in5.txt' INTO TABLE join_1to1_1;
 
 CREATE TABLE join_1to1_2(key1 int, key2 int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in6.txt' INTO TABLE join_1to1_2;
+LOAD DATA LOCAL INPATH '../../data/files/in6.txt' INTO TABLE join_1to1_2;
 
 
 set hive.outerjoin.supports.filters=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_alt_syntax.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_alt_syntax.q
new file mode 100644
index 0000000000000..0b0c53803d01e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_alt_syntax.q
@@ -0,0 +1,41 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+explain select p1.p_name, p2.p_name
+from part p1 , part p2;
+
+explain select p1.p_name, p2.p_name, p3.p_name
+from part p1 ,part p2 ,part p3 
+where p1.p_name = p2.p_name and p2.p_name = p3.p_name;
+
+explain select p1.p_name, p2.p_name, p3.p_name
+from part p1 , (select p_name from part) p2 ,part p3 
+where p1.p_name = p2.p_name and p2.p_name = p3.p_name;
+
+explain select p1.p_name, p2.p_name, p3.p_name
+from part p1 , part p2 , part p3 
+where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name;
+
+explain select p1.p_name, p2.p_name, p3.p_name, p4.p_name
+from part p1 , part p2 join part p3 on p2.p_name = p1.p_name join part p4 
+where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey 
+            and p1.p_partkey = p2.p_partkey;
+            
+explain select p1.p_name, p2.p_name, p3.p_name, p4.p_name
+from part p1 join part p2 on p2.p_name = p1.p_name , part p3  , part p4 
+where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey 
+            and p1.p_partkey = p2.p_partkey;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_array.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_array.q
index e4d95a51fa927..81e984e7eb018 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_array.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_array.q
@@ -1,8 +1,8 @@
 create table tinyA(a bigint, b bigint) stored as textfile;
 create table tinyB(a bigint, bList array<int>) stored as textfile;
 
-load data local inpath '../data/files/tiny_a.txt' into table tinyA;
-load data local inpath '../data/files/tiny_b.txt' into table tinyB;
+load data local inpath '../../data/files/tiny_a.txt' into table tinyA;
+load data local inpath '../../data/files/tiny_b.txt' into table tinyB;
 
 select * from tinyA;
 select * from tinyB;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_casesensitive.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_casesensitive.q
index a9b69c5b1c448..0c0962ceceebc 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_casesensitive.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_casesensitive.q
@@ -1,8 +1,8 @@
 
 CREATE TABLE joinone(key1 int, key2 int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in5.txt' INTO TABLE joinone;
+LOAD DATA LOCAL INPATH '../../data/files/in5.txt' INTO TABLE joinone;
 
 CREATE TABLE joinTwo(key1 int, key2 int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in6.txt' INTO TABLE joinTwo;
+LOAD DATA LOCAL INPATH '../../data/files/in6.txt' INTO TABLE joinTwo;
 
 SELECT * FROM joinone JOIN joinTwo ON(joinone.key2=joinTwo.key2) ORDER BY joinone.key1 ASC, joinone.key2 ASC, joinone.value ASC, joinTwo.key1 ASC, joinTwo.key2 ASC, joinTwo.value ASC;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_1.q
new file mode 100644
index 0000000000000..7f493671b80c1
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_1.q
@@ -0,0 +1,30 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+
+
+explain select *
+from part p1 join part p2 join part p3 on p1.p_name = p2.p_name and p2.p_name = p3.p_name;
+
+explain select *
+from part p1 join part p2 join part p3 on p2.p_name = p1.p_name and p3.p_name = p2.p_name;
+
+explain select *
+from part p1 join part p2 join part p3 on p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name;
+
+explain select *
+from part p1 join part p2 join part p3 on p2.p_partkey = 1 and p3.p_name = p2.p_name;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_2.q
new file mode 100644
index 0000000000000..ca280104d9a75
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_2.q
@@ -0,0 +1,24 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+
+explain select *
+from part p1 join part p2 join part p3 on p1.p_name = p2.p_name join part p4 on p2.p_name = p3.p_name and p1.p_name = p4.p_name;
+
+explain select *
+from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 on p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey 
+            and p1.p_partkey = p2.p_partkey;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_3.q
new file mode 100644
index 0000000000000..b308838d6243b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_3.q
@@ -0,0 +1,34 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+
+
+explain select *
+from part p1 join part p2 join part p3 
+where p1.p_name = p2.p_name and p2.p_name = p3.p_name;
+
+explain select *
+from part p1 join part p2 join part p3 
+where p2.p_name = p1.p_name and p3.p_name = p2.p_name;
+
+explain select *
+from part p1 join part p2 join part p3 
+where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name;
+
+explain select *
+from part p1 join part p2 join part p3 
+where p2.p_partkey = 1 and p3.p_name = p2.p_name;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_4.q
new file mode 100644
index 0000000000000..477682e4e8ac7
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_4.q
@@ -0,0 +1,26 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+
+explain select *
+from part p1 join part p2 join part p3 on p1.p_name = p2.p_name join part p4 
+where p2.p_name = p3.p_name and p1.p_name = p4.p_name;
+
+explain select *
+from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 
+where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey 
+            and p1.p_partkey = p2.p_partkey;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual1.q
new file mode 100644
index 0000000000000..1013f51a0d806
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual1.q
@@ -0,0 +1,52 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+create table part2( 
+    p2_partkey INT,
+    p2_name STRING,
+    p2_mfgr STRING,
+    p2_brand STRING,
+    p2_type STRING,
+    p2_size INT,
+    p2_container STRING,
+    p2_retailprice DOUBLE,
+    p2_comment STRING
+);
+
+create table part3( 
+    p3_partkey INT,
+    p3_name STRING,
+    p3_mfgr STRING,
+    p3_brand STRING,
+    p3_type STRING,
+    p3_size INT,
+    p3_container STRING,
+    p3_retailprice DOUBLE,
+    p3_comment STRING
+);
+
+explain select *
+from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name and p2_name = p3_name;
+
+explain select *
+from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name and p3_name = p2_name;
+
+explain select *
+from part p1 join part2 p2 join part3 p3 on p2_partkey + p_partkey = p1.p_partkey and p3_name = p2_name;
+
+explain select *
+from part p1 join part2 p2 join part3 p3 on p2_partkey = 1 and p3_name = p2_name;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual2.q
new file mode 100644
index 0000000000000..6232357752851
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual2.q
@@ -0,0 +1,47 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+create table part2( 
+    p2_partkey INT,
+    p2_name STRING,
+    p2_mfgr STRING,
+    p2_brand STRING,
+    p2_type STRING,
+    p2_size INT,
+    p2_container STRING,
+    p2_retailprice DOUBLE,
+    p2_comment STRING
+);
+
+create table part3( 
+    p3_partkey INT,
+    p3_name STRING,
+    p3_mfgr STRING,
+    p3_brand STRING,
+    p3_type STRING,
+    p3_size INT,
+    p3_container STRING,
+    p3_retailprice DOUBLE,
+    p3_comment STRING
+);
+
+explain select *
+from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name join part p4 on p2_name = p3_name and p1.p_name = p4.p_name;
+
+explain select *
+from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name join part p4 on p2_name = p3_name and p1.p_partkey = p4.p_partkey 
+            and p1.p_partkey = p2_partkey;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual3.q
new file mode 100644
index 0000000000000..6ac86042c0452
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual3.q
@@ -0,0 +1,56 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+create table part2( 
+    p2_partkey INT,
+    p2_name STRING,
+    p2_mfgr STRING,
+    p2_brand STRING,
+    p2_type STRING,
+    p2_size INT,
+    p2_container STRING,
+    p2_retailprice DOUBLE,
+    p2_comment STRING
+);
+
+create table part3( 
+    p3_partkey INT,
+    p3_name STRING,
+    p3_mfgr STRING,
+    p3_brand STRING,
+    p3_type STRING,
+    p3_size INT,
+    p3_container STRING,
+    p3_retailprice DOUBLE,
+    p3_comment STRING
+);
+
+explain select *
+from part p1 join part2 p2 join part3 p3 
+where p1.p_name = p2_name and p2_name = p3_name;
+
+explain select *
+from part p1 join part2 p2 join part3 p3 
+where p2_name = p1.p_name and p3_name = p2_name;
+
+explain select *
+from part p1 join part2 p2 join part3 p3 
+where p2_partkey + p1.p_partkey = p1.p_partkey and p3_name = p2_name;
+
+explain select *
+from part p1 join part2 p2 join part3 p3 
+where p2_partkey = 1 and p3_name = p2_name;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual4.q
new file mode 100644
index 0000000000000..0db4d5e3cff1a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_cond_pushdown_unqual4.q
@@ -0,0 +1,49 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+create table part2( 
+    p2_partkey INT,
+    p2_name STRING,
+    p2_mfgr STRING,
+    p2_brand STRING,
+    p2_type STRING,
+    p2_size INT,
+    p2_container STRING,
+    p2_retailprice DOUBLE,
+    p2_comment STRING
+);
+
+create table part3( 
+    p3_partkey INT,
+    p3_name STRING,
+    p3_mfgr STRING,
+    p3_brand STRING,
+    p3_type STRING,
+    p3_size INT,
+    p3_container STRING,
+    p3_retailprice DOUBLE,
+    p3_comment STRING
+);
+
+explain select *
+from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name join part p4 
+where p2_name = p3_name and p1.p_name = p4.p_name;
+
+explain select *
+from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name join part p4 
+where p2_name = p3_name and p1.p_partkey = p4.p_partkey 
+            and p1.p_partkey = p2_partkey;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_filters.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_filters.q
index d54aa950a1ec2..49b6c6f920209 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_filters.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_filters.q
@@ -1,5 +1,5 @@
 CREATE TABLE myinput1(key int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in3.txt' INTO TABLE myinput1;
+LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1;
 
 SELECT * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value ORDER BY a.key ASC, a.value ASC, b.key ASC, b.value ASC;
 SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value ORDER BY a.key ASC, a.value ASC, b.key ASC, b.value ASC;
@@ -55,10 +55,10 @@ SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.valu
 
 CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; 
 CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS; 
-LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input1;
-LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input1;
-LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input2;
-LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input2;
 
 SET hive.optimize.bucketmapjoin = true;
 SET hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_hive_626.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_hive_626.q
index 31b0c8c91c10b..c4c239cae2d8a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_hive_626.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_hive_626.q
@@ -13,9 +13,9 @@ delimited fields terminated by ',' stored as textfile;
 create table hive_count (bar_id int, n int) row format delimited fields 
 terminated by ',' stored as textfile;
 
-load data local inpath '../data/files/hive_626_foo.txt' overwrite into table hive_foo;
-load data local inpath '../data/files/hive_626_bar.txt' overwrite into table hive_bar;
-load data local inpath '../data/files/hive_626_count.txt' overwrite into table hive_count;
+load data local inpath '../../data/files/hive_626_foo.txt' overwrite into table hive_foo;
+load data local inpath '../../data/files/hive_626_bar.txt' overwrite into table hive_bar;
+load data local inpath '../../data/files/hive_626_count.txt' overwrite into table hive_count;
 
 explain
 select hive_foo.foo_name, hive_bar.bar_name, n from hive_foo join hive_bar on hive_foo.foo_id =
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_merging.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_merging.q
new file mode 100644
index 0000000000000..a0046dbc41332
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_merging.q
@@ -0,0 +1,25 @@
+
+
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+explain select p1.p_size, p2.p_size 
+from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey 
+  right outer join part p3 on p2.p_partkey = p3.p_partkey and 
+              p1.p_size > 10
+;
+
+explain select p1.p_size, p2.p_size 
+from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey 
+  right outer join part p3 on p2.p_partkey = p3.p_partkey and 
+              p1.p_size > 10 and p1.p_size > p2.p_size + 10
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_nulls.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_nulls.q
index 4ff60713d6b44..047a769eb8e95 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_nulls.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_nulls.q
@@ -1,5 +1,5 @@
 CREATE TABLE myinput1(key int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in1.txt' INTO TABLE myinput1;
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1;
 
 SELECT * FROM myinput1 a JOIN myinput1 b ORDER BY a.key ASC, a.value ASC, b.key ASC, b.value ASC;
 SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ORDER BY a.key ASC, a.value ASC, b.key ASC, b.value ASC;
@@ -42,10 +42,10 @@ SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.valu
 
 CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; 
 CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS; 
-LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input1;
-LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input1;
-LOAD DATA LOCAL INPATH '../data/files/in1.txt' into table smb_input2;
-LOAD DATA LOCAL INPATH '../data/files/in2.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input2;
 
 SET hive.optimize.bucketmapJOIN = true;
 SET hive.optimize.bucketmapJOIN.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_nullsafe.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_nullsafe.q
index 05b57bce202d2..5e22517edbd72 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_nullsafe.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_nullsafe.q
@@ -1,7 +1,7 @@
 set hive.nullsafe.equijoin=true;
 
 CREATE TABLE myinput1(key int, value int);
-LOAD DATA LOCAL INPATH '../data/files/in8.txt' INTO TABLE myinput1;
+LOAD DATA LOCAL INPATH '../../data/files/in8.txt' INTO TABLE myinput1;
 
 -- merging
 explain select * from myinput1 a join myinput1 b on a.key<=>b.value ORDER BY a.key, a.value, b.key, b.value;
@@ -31,10 +31,10 @@ SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key<=>b.value OR
 -- smbs
 CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
 CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS;
-LOAD DATA LOCAL INPATH '../data/files/in8.txt' into table smb_input1;
-LOAD DATA LOCAL INPATH '../data/files/in9.txt' into table smb_input1;
-LOAD DATA LOCAL INPATH '../data/files/in8.txt' into table smb_input2;
-LOAD DATA LOCAL INPATH '../data/files/in9.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../../data/files/in8.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../../data/files/in9.txt' into table smb_input1;
+LOAD DATA LOCAL INPATH '../../data/files/in8.txt' into table smb_input2;
+LOAD DATA LOCAL INPATH '../../data/files/in9.txt' into table smb_input2;
 
 SET hive.optimize.bucketmapJOIN = true;
 SET hive.optimize.bucketmapJOIN.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder.q
index b92a79ba07ab2..b209c50b66194 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder.q
@@ -6,9 +6,9 @@ CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
 
 EXPLAIN FROM T1 a JOIN src c ON c.key+1=a.key
 SELECT a.key, a.val, c.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder2.q
index 238c0adad3128..ca1e65ebef6f0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder2.q
@@ -8,10 +8,10 @@ CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T4(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T4;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T4;
 
 EXPLAIN
 SELECT /*+ STREAMTABLE(a) */ *
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder3.q
index 1bda28fbc3d5f..994be164aa62c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder3.q
@@ -8,10 +8,10 @@ CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T4(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T4;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T4;
 
 EXPLAIN
 SELECT /*+ STREAMTABLE(a,c) */ *
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder4.q
index 126f356ef785a..16ef2046be35a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_reorder4.q
@@ -2,9 +2,9 @@ CREATE TABLE T1(key1 STRING, val1 STRING) STORED AS TEXTFILE;
 CREATE TABLE T2(key2 STRING, val2 STRING) STORED AS TEXTFILE;
 CREATE TABLE T3(key3 STRING, val3 STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
 
 set hive.auto.convert.join=true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_star.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_star.q
index 8314161975761..c95a13b9cac5a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_star.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/join_star.q
@@ -7,14 +7,14 @@ create table dim5(f9 int, f10 int);
 create table dim6(f11 int, f12 int);
 create table dim7(f13 int, f14 int);
 
-LOAD DATA LOCAL INPATH '../data/files/fact-data.txt' INTO TABLE fact;
-LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim1;
-LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim2;
-LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim3;
-LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim4;
-LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim5;
-LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim6;
-LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim7;
+LOAD DATA LOCAL INPATH '../../data/files/fact-data.txt' INTO TABLE fact;
+LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim1;
+LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim2;
+LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim3;
+LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim4;
+LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim5;
+LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim6;
+LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim7;
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.join.noconditionaltask=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lateral_view_noalias.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lateral_view_noalias.q
index 11e54a5b04c07..df7343259ae75 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lateral_view_noalias.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lateral_view_noalias.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 --HIVE-2608 Do not require AS a,b,c part in LATERAL VIEW
 EXPLAIN SELECT myTab.* from src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) myTab limit 2;
 SELECT myTab.* from src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) myTab limit 2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lateral_view_ppd.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lateral_view_ppd.q
index 7be86a6f10ea2..65ae518cd5be2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lateral_view_ppd.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lateral_view_ppd.q
@@ -11,3 +11,7 @@ SELECT value, myCol FROM (SELECT * FROM srcpart LATERAL VIEW explode(array(1,2,3
 
 EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array(1,2,3)) myTable2 AS myCol2) a WHERE key='0';
 SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array(1,2,3)) myTable2 AS myCol2) a WHERE key='0';
+
+-- HIVE-4293 Predicates following UDTF operator are removed by PPD
+EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol WHERE myCol > 1) a WHERE key='0';
+SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol WHERE myCol > 1) a WHERE key='0';
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lb_fs_stats.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lb_fs_stats.q
new file mode 100644
index 0000000000000..7f31797f314b3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/lb_fs_stats.q
@@ -0,0 +1,19 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.merge.mapfiles=false;	
+set hive.merge.mapredfiles=false; 
+set mapred.input.dir.recursive=true;
+set hive.stats.dbclass=fs;
+-- Tests truncating a column from a list bucketing table
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+CREATE TABLE test_tab (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE;
+
+ALTER TABLE test_tab SKEWED BY (key) ON ("484") STORED AS DIRECTORIES;
+
+INSERT OVERWRITE TABLE test_tab PARTITION (part = '1') SELECT * FROM src;
+
+describe formatted test_tab partition (part='1');
+
+set hive.stats.dbclass=jdbc:derby;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leadlag.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leadlag.q
index f49766771067b..5623cbfac51a7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leadlag.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leadlag.q
@@ -13,7 +13,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 --1. testLagWithPTFWindowing
 select p_mfgr, p_name,
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leadlag_queries.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leadlag_queries.q
index 6ef3bdb107a12..e53abce763865 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leadlag_queries.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leadlag_queries.q
@@ -11,7 +11,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 -- 1. testLeadUDAF
 select p_mfgr, p_retailprice,
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leftsemijoin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leftsemijoin.q
index abe3d3317157c..0c16fb8dfd988 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leftsemijoin.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leftsemijoin.q
@@ -9,9 +9,9 @@ ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
 CREATE TABLE things (id INT, name STRING) partitioned by (ds string)
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
 
-load data local inpath '../data/files/sales.txt' INTO TABLE sales;
-load data local inpath '../data/files/things.txt' INTO TABLE things partition(ds='2011-10-23');
-load data local inpath '../data/files/things2.txt' INTO TABLE things partition(ds='2011-10-24');
+load data local inpath '../../data/files/sales.txt' INTO TABLE sales;
+load data local inpath '../../data/files/things.txt' INTO TABLE things partition(ds='2011-10-23');
+load data local inpath '../../data/files/things2.txt' INTO TABLE things partition(ds='2011-10-24');
 
 SELECT name,id FROM sales ORDER BY name ASC, id ASC;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leftsemijoin_mr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leftsemijoin_mr.q
index 5813ca3c619cc..c9ebe0e8fad12 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leftsemijoin_mr.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/leftsemijoin_mr.q
@@ -1,7 +1,7 @@
 CREATE TABLE T1(key INT);
-LOAD DATA LOCAL INPATH '../data/files/leftsemijoin_mr_t1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/leftsemijoin_mr_t1.txt' INTO TABLE T1;
 CREATE TABLE T2(key INT);
-LOAD DATA LOCAL INPATH '../data/files/leftsemijoin_mr_t2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/leftsemijoin_mr_t2.txt' INTO TABLE T2;
 
 -- Run this query using TestMinimrCliDriver
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_partition_metadataonly.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_partition_metadataonly.q
new file mode 100644
index 0000000000000..e91adab59ddfa
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_partition_metadataonly.q
@@ -0,0 +1,7 @@
+set hive.limit.query.max.table.partition=1;
+
+explain select ds from srcpart where hr=11 and ds='2008-04-08';
+select ds from srcpart where hr=11 and ds='2008-04-08';
+
+explain select distinct hr from srcpart;
+select distinct hr from srcpart;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_pushdown.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_pushdown.q
index e4d0aa06bde87..adfe1e63e889f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_pushdown.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_pushdown.q
@@ -22,12 +22,17 @@ select value,avg(key + 1) from src group by value order by value limit 20;
 
 -- distincts
 explain
-select distinct(key) from src limit 20;
-select distinct(key) from src limit 20;
+select distinct(cdouble) from alltypesorc limit 20;
+select distinct(cdouble) from alltypesorc limit 20;
 
 explain
-select key, count(distinct(key)) from src group by key limit 20;
-select key, count(distinct(key)) from src group by key limit 20;
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20;
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20;
+
+-- multi distinct
+explain
+select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from alltypesorc group by ctinyint limit 20;
+select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from alltypesorc group by ctinyint limit 20;
 
 -- limit zero
 explain
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_pushdown_negative.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_pushdown_negative.q
index a86ddf14047ef..e17ded1ee1b9b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_pushdown_negative.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/limit_pushdown_negative.q
@@ -16,7 +16,3 @@ CREATE TABLE dest_3(key STRING, c1 INT);
 EXPLAIN FROM src
 INSERT OVERWRITE TABLE dest_2 SELECT value, sum(key) GROUP BY value
 INSERT OVERWRITE TABLE dest_3 SELECT value, sum(key) GROUP BY value limit 20;
-
--- nagative, multi distinct
-explain
-select count(distinct key)+count(distinct value) from src limit 20;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/list_bucket_dml_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/list_bucket_dml_2.q
index 3a39f42b2a77d..80aba5d4a526f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/list_bucket_dml_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/list_bucket_dml_2.q
@@ -48,13 +48,13 @@ set hive.optimize.listbucketing=true;
 explain extended
 select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
 select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
-select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' ORDER BY key, value;
+select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
 
 -- 51 and val_51 in the table so skewed data for 51 and val_14 should be none
 -- but query should succeed for 51 or 51 and val_14
-select * from srcpart where ds = '2008-04-08' and key = '51' ORDER BY key, value;
+select * from srcpart where ds = '2008-04-08' and key = '51' ORDER BY key, value, ds, hr;
 select * from list_bucketing_static_part where key = '51' ORDER BY key, value, ds, hr;
-select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14'  ORDER BY key, value;
+select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14'  ORDER BY key, value, ds, hr;
 select * from list_bucketing_static_part where key = '51' and value = 'val_14' ORDER BY key, value, ds, hr;
 
 -- queries with < <= > >= should work for skewed test although we don't benefit from pruning
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/list_bucket_dml_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/list_bucket_dml_4.q
index 918c817e49eff..380d148ac9f43 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/list_bucket_dml_4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/list_bucket_dml_4.q
@@ -65,7 +65,7 @@ set hive.optimize.listbucketing=true;
 explain extended
 select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
 select * from list_bucketing_static_part where ds = '2008-04-08' and  hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
-select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' ORDER BY key, value;
+select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr;
 
 -- clean up
 drop table list_bucketing_static_part;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_decimal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_decimal.q
index a6ad4b8485a26..08b21dc689424 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_decimal.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_decimal.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN SELECT -1BD, 0BD, 1BD, 3.14BD, -3.14BD, 99999999999999999BD, 99999999999999999.9999999999999BD, 1E-99BD, 1E99BD FROM src LIMIT 1;
 
 SELECT -1BD, 0BD, 1BD, 3.14BD, -3.14BD, 99999999999999999BD, 99999999999999999.9999999999999BD, 1E-99BD, 1E99BD FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_double.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_double.q
index 08836127b9958..766da699ea250 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_double.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_double.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN SELECT 3.14, -3.14, 3.14e8, 3.14e-8, -3.14e8, -3.14e-8, 3.14e+8, 3.14E8, 3.14E-8 FROM src LIMIT 1;
 SELECT 3.14, -3.14, 3.14e8, 3.14e-8, -3.14e8, -3.14e-8, 3.14e+8, 3.14E8, 3.14E-8 FROM src LIMIT 1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_ints.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_ints.q
index 9da622e246793..5fd0cfabf3076 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_ints.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_ints.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN SELECT 100, 100Y, 100S, 100L FROM src LIMIT 1;
 
 SELECT 100, 100Y, 100S, 100L FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_string.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_string.q
index 21f0890ada168..c57dc572529e1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_string.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/literal_string.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN SELECT 'face''book', 'face' 'book', 'face'
                                             'book',
                "face""book", "face" "book", "face"
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_binary_data.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_binary_data.q
index 7da363183ccf6..653918afc0fa3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_binary_data.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_binary_data.q
@@ -4,7 +4,7 @@ FIELDS TERMINATED BY '9'
 STORED AS TEXTFILE;
 -- this query loads native binary data, stores in a table and then queries it. Note that string.txt contains binary data. Also uses transform clause and then length udf.
 
-LOAD DATA LOCAL INPATH '../data/files/string.txt' INTO TABLE mytable;
+LOAD DATA LOCAL INPATH '../../data/files/string.txt' INTO TABLE mytable;
 
 create table dest1 (key binary, value int);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part1.q
index 52b4937d4a9dd..5f0a015693d9f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part1.q
@@ -23,8 +23,8 @@ insert overwrite table nzhang_part2 partition(ds='2008-12-31', hr) select key, v
 show partitions nzhang_part1;
 show partitions nzhang_part2;
 
-select * from nzhang_part1 where ds is not null and hr is not null;
-select * from nzhang_part2 where ds is not null and hr is not null;
+select * from nzhang_part1 where ds is not null and hr is not null order by ds, hr, key;
+select * from nzhang_part2 where ds is not null and hr is not null order by ds, hr, key;
 
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part10.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part10.q
index 9517664675d69..dd84599e69e6d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part10.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part10.q
@@ -19,6 +19,6 @@ insert overwrite table nzhang_part10 partition(ds='2008-12-31', hr) select key,
 
 show partitions nzhang_part10;
 
-select * from nzhang_part10 where ds is not null and hr is not null;
+select * from nzhang_part10 where ds is not null and hr is not null order by ds, hr, key;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part3.q
index e4c8c17f63048..29f951aa69f67 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part3.q
@@ -14,6 +14,6 @@ insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr
 
 insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null;
 
-select * from nzhang_part3 where ds is not null and hr is not null;
+select * from nzhang_part3 where ds is not null and hr is not null order by ds, hr, key;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part4.q
index 3f3a0c8d51b41..942c245db8b64 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part4.q
@@ -17,8 +17,8 @@ insert overwrite table nzhang_part4 partition (ds, hr) select key, value, ds, hr
 insert overwrite table nzhang_part4 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null;
 
 show partitions nzhang_part4;
-select * from nzhang_part4 where ds='2008-04-08' and hr is not null;
+select * from nzhang_part4 where ds='2008-04-08' and hr is not null order by ds, hr, key;
 
-select * from nzhang_part4 where ds is not null and hr is not null;
+select * from nzhang_part4 where ds is not null and hr is not null order by ds, hr, key;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part8.q
index 8073500c0bf07..6768e4373a056 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part8.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part8.q
@@ -20,5 +20,5 @@ insert overwrite table nzhang_part8 partition(ds='2008-12-31', hr) select key, v
 
 show partitions nzhang_part8;
 
-select * from nzhang_part8 where ds is not null and hr is not null;
+select * from nzhang_part8 where ds is not null and hr is not null order by ds, hr, key;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part9.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part9.q
index 01fa596cdf04a..4680033cbd541 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part9.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_dyn_part9.q
@@ -19,5 +19,5 @@ insert overwrite table nzhang_part9 partition (ds, hr) select key, value, ds, hr
 
 show partitions nzhang_part9;
 
-select * from nzhang_part9 where ds is not null and hr is not null;
+select * from nzhang_part9 where ds is not null and hr is not null order by ds, hr, key;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_exist_part_authsuccess.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_exist_part_authsuccess.q
index 6d2a8b82d33c1..35eb2198081f0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_exist_part_authsuccess.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_exist_part_authsuccess.q
@@ -2,4 +2,4 @@ create table hive_test_src ( col1 string ) partitioned by (pcol1 string) stored
 alter table hive_test_src add partition (pcol1 = 'test_part');
 set hive.security.authorization.enabled=true;
 grant Update on table hive_test_src to user hive_test_user;
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src partition (pcol1 = 'test_part');
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src partition (pcol1 = 'test_part');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_file_with_space_in_the_name.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_file_with_space_in_the_name.q
index 3b8951a1a782f..6bac47fb9052a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_file_with_space_in_the_name.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_file_with_space_in_the_name.q
@@ -2,4 +2,5 @@
 
 
 CREATE TABLE load_file_with_space_in_the_name(name STRING, age INT);
-LOAD DATA LOCAL INPATH '../data/files/person age.txt' INTO TABLE load_file_with_space_in_the_name;
+LOAD DATA LOCAL INPATH '../../data/files/person age.txt' INTO TABLE load_file_with_space_in_the_name;
+LOAD DATA LOCAL INPATH '../../data/files/person+age.txt' INTO TABLE load_file_with_space_in_the_name;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs.q
index c1ac29c172f60..2f06ca464ff99 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs.q
@@ -2,9 +2,9 @@
 create table load_overwrite (key string, value string) stored as textfile location 'file:${system:test.tmp.dir}/load_overwrite';
 create table load_overwrite2 (key string, value string) stored as textfile location 'file:${system:test.tmp.dir}/load2_overwrite2';
 
-load data local inpath '../data/files/kv1.txt' into table load_overwrite;
-load data local inpath '../data/files/kv2.txt' into table load_overwrite;
-load data local inpath '../data/files/kv3.txt' into table load_overwrite;
+load data local inpath '../../data/files/kv1.txt' into table load_overwrite;
+load data local inpath '../../data/files/kv2.txt' into table load_overwrite;
+load data local inpath '../../data/files/kv3.txt' into table load_overwrite;
 
 show table extended like load_overwrite;
 desc extended load_overwrite;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
index 7255324d1653d..a75758a0728d5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
@@ -4,17 +4,17 @@
 create table result (key string, value string);
 create table loader (key string, value string);
 
-load data local inpath '../data/files/kv1.txt' into table loader;
+load data local inpath '../../data/files/kv1.txt' into table loader;
 
 load data inpath '/build/ql/test/data/warehouse/loader/kv1.txt' into table result;
 show table extended like result;
 
-load data local inpath '../data/files/kv1.txt' into table loader;
+load data local inpath '../../data/files/kv1.txt' into table loader;
 
 load data inpath '/build/ql/test/data/warehouse/loader/kv1.txt' into table result;
 show table extended like result;
 
-load data local inpath '../data/files/kv1.txt' into table loader;
+load data local inpath '../../data/files/kv1.txt' into table loader;
 
 load data inpath '/build/ql/test/data/warehouse/loader/kv1.txt' into table result;
 show table extended like result;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs_overwrite.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs_overwrite.q
new file mode 100644
index 0000000000000..51a803130a660
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs_overwrite.q
@@ -0,0 +1,20 @@
+--HIVE 6209
+
+drop table target;
+drop table temp;
+
+create table target (key string, value string) stored as textfile location 'file:${system:test.tmp.dir}/target';
+create table temp (key string, value string) stored as textfile location 'file:${system:test.tmp.dir}/temp';
+
+set fs.pfile.impl.disable.cache=false;
+
+load data local inpath '../../data/files/kv1.txt' into table temp;
+load data inpath '${system:test.tmp.dir}/temp/kv1.txt' overwrite into table target;
+select count(*) from target;
+
+load data local inpath '../../data/files/kv2.txt' into table temp;
+load data inpath '${system:test.tmp.dir}/temp/kv2.txt' overwrite into table target;
+select count(*) from target;
+
+drop table target;
+drop table temp;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_hdfs_file_with_space_in_the_name.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_hdfs_file_with_space_in_the_name.q
index cce297cca46db..55ac1a8185a46 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_hdfs_file_with_space_in_the_name.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_hdfs_file_with_space_in_the_name.q
@@ -1,9 +1,10 @@
-dfs -mkdir hdfs:///tmp/test/;
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test_load_hdfs_file_with_space_in_the_name/;
 
-dfs -copyFromLocal ../data/files hdfs:///tmp/test/.;
+dfs -copyFromLocal ../../data/files hdfs:///tmp/test_load_hdfs_file_with_space_in_the_name/.;
 
 CREATE TABLE load_file_with_space_in_the_name(name STRING, age INT);
-LOAD DATA INPATH 'hdfs:///tmp/test/files/person age.txt' INTO TABLE load_file_with_space_in_the_name;
+LOAD DATA INPATH 'hdfs:///tmp/test_load_hdfs_file_with_space_in_the_name/files/person age.txt' INTO TABLE load_file_with_space_in_the_name;
+LOAD DATA INPATH 'hdfs:///tmp/test_load_hdfs_file_with_space_in_the_name/files/person+age.txt' INTO TABLE load_file_with_space_in_the_name;
 
-dfs -rmr hdfs:///tmp/test;
+dfs -rmr hdfs:///tmp/test_load_hdfs_file_with_space_in_the_name;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_nonpart_authsuccess.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_nonpart_authsuccess.q
index 40d8210e57d2a..fdee45114bb15 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_nonpart_authsuccess.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_nonpart_authsuccess.q
@@ -1,4 +1,4 @@
 create table hive_test_src ( col1 string ) stored as textfile;
 set hive.security.authorization.enabled=true;
 grant Update on table hive_test_src to user hive_test_user;
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src ;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_overwrite.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_overwrite.q
index 73853f15a94bb..080c78496a653 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_overwrite.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_overwrite.q
@@ -5,11 +5,11 @@ show table extended like load_overwrite;
 select count(*) from load_overwrite;
 
 
-load data local inpath '../data/files/kv1.txt' into table load_overwrite;
+load data local inpath '../../data/files/kv1.txt' into table load_overwrite;
 show table extended like load_overwrite;
 select count(*) from load_overwrite;
 
 
-load data local inpath '../data/files/kv1.txt' overwrite into table load_overwrite;
+load data local inpath '../../data/files/kv1.txt' overwrite into table load_overwrite;
 show table extended like load_overwrite;
 select count(*) from load_overwrite;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_part_authsuccess.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_part_authsuccess.q
index ff54324a5a50c..cee5873ca5b9f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_part_authsuccess.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_part_authsuccess.q
@@ -1,4 +1,4 @@
 create table hive_test_src ( col1 string ) partitioned by (pcol1 string) stored as textfile;
 set hive.security.authorization.enabled=true;
 grant Update on table hive_test_src to user hive_test_user;
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src partition (pcol1 = 'test_part');
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src partition (pcol1 = 'test_part');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart1.q
index 0813bb23c3746..735befef6f9c9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart1.q
@@ -2,7 +2,7 @@
 
 
 create table hive_test_src ( col1 string ) stored as textfile ;
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src ;
 
 create table hive_test_dst ( col1 string ) partitioned by ( pcol1 string , pcol2 string) stored as sequencefile;
 insert overwrite table hive_test_dst partition ( pcol1='test_part', pCol2='test_Part') select col1 from hive_test_src ;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart2.q
new file mode 100644
index 0000000000000..a252eaa00d77f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart2.q
@@ -0,0 +1,9 @@
+
+create table hive_test ( col1 string ) partitioned by ( pcol1 string , pcol2 string) stored as textfile;
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test partition (pcol1='part1',pcol2='part1') ;
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test partition (pcol2='part2',pcol1='part2') ;
+select * from hive_test where pcol1='part1' and pcol2='part1';
+select * from hive_test where pcol1='part2' and pcol2='part2';
+
+
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart_err.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart_err.q
index 6e4df215479bb..cc9c1fec3bc42 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart_err.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/loadpart_err.q
@@ -1,6 +1,6 @@
 set hive.cli.errors.ignore=true;
 
-ADD FILE ../data/scripts/error_script;
+ADD FILE ../../data/scripts/error_script;
 
 -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19, 0.20, 0.20S, 0.23)
 -- (this test is flaky so it is currently disabled for all Hadoop versions)
@@ -14,7 +14,7 @@ FROM src;
 DESCRIBE loadpart1;
 SHOW PARTITIONS loadpart1;
 
-LOAD DATA LOCAL INPATH '../data1/files/kv1.txt' INTO TABLE loadpart1 PARTITION(ds='2009-05-05');
+LOAD DATA LOCAL INPATH '../../data1/files/kv1.txt' INTO TABLE loadpart1 PARTITION(ds='2009-05-05');
 SHOW PARTITIONS loadpart1;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/macro.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/macro.q
index fd0f7f2b0cdd4..47b05ff4490fb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/macro.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/macro.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x));
 SELECT SIGMOID(2) FROM src LIMIT 1;
 EXPLAIN SELECT SIGMOID(2) FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_addjar.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_addjar.q
new file mode 100644
index 0000000000000..f56f074616678
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_addjar.q
@@ -0,0 +1,14 @@
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.use.nonstaged=false;
+
+add jar ${system:maven.local.repository}/org/apache/hive/hcatalog/hive-hcatalog-core/${system:hive.version}/hive-hcatalog-core-${system:hive.version}.jar;
+
+CREATE TABLE t1 (a string, b string)
+ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
+;
+LOAD DATA LOCAL INPATH "../../data/files/sample.json" INTO TABLE t1;
+select * from src join t1 on src.key =t1.a;
+drop table t1;
+set hive.auto.convert.join=false;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_decimal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_decimal.q
new file mode 100644
index 0000000000000..b65a7be2d25cd
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_decimal.q
@@ -0,0 +1,35 @@
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000000;
+
+CREATE TABLE over1k(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k;
+
+CREATE TABLE t1(dec decimal(4,2)) STORED AS ORC;
+INSERT INTO TABLE t1 select dec from over1k;
+CREATE TABLE t2(dec decimal(4,0)) STORED AS ORC;
+INSERT INTO TABLE t2 select dec from over1k;
+
+explain
+select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec);
+
+set hive.mapjoin.optimized.keys=false;
+
+select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec);
+
+set hive.mapjoin.optimized.keys=true;
+
+select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec);
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_hook.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_hook.q
index d6811d493263f..a9e1960a5bb05 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_hook.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_hook.q
@@ -1,4 +1,5 @@
-set hive.exec.post.hooks = org.apache.hadoop.hive.ql.hooks.MapJoinCounterHook ;
+set hive.exec.post.hooks = org.apache.hadoop.hive.ql.hooks.MapJoinCounterHook,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook;
+
 drop table dest1;
 CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q
index 3f87db28ed2e2..1eb95f6378669 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q
@@ -6,6 +6,14 @@ set hive.auto.convert.join.noconditionaltask.size=10000;
 
 explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key);
 
-explain select count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds;
+explain
+select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450';
+select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450';
+
+explain
+select count(*) from srcpart join src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds;
+select count(*) from srcpart join src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds;
+
+set hive.mapjoin.lazy.hashtable=false;
 
 select count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_memcheck.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_memcheck.q
new file mode 100644
index 0000000000000..b23361724e669
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_memcheck.q
@@ -0,0 +1,16 @@
+
+set hive.auto.convert.join = true;
+
+create table src0 like src;
+insert into table src0 select * from src where src.key < 10;
+
+set hive.mapjoin.check.memory.rows=1;
+
+explain 
+select src1.key as k1, src1.value as v1, src2.key, src2.value
+from src0 src1 inner join src0 src2 on src1.key = src2.key order by k1, v1;
+
+select src1.key as k1, src1.value as v1, src2.key, src2.value
+from src0 src1 inner join src0 src2 on src1.key = src2.key order by k1, v1;
+
+drop table src0;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_subquery2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_subquery2.q
index 9980946057fe1..aed89905238b2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_subquery2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mapjoin_subquery2.q
@@ -11,9 +11,9 @@ ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
 CREATE TABLE z (id INT, name STRING)
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
 
-load data local inpath '../data/files/x.txt' INTO TABLE x;
-load data local inpath '../data/files/y.txt' INTO TABLE y;
-load data local inpath '../data/files/z.txt' INTO TABLE z;
+load data local inpath '../../data/files/x.txt' INTO TABLE x;
+load data local inpath '../../data/files/y.txt' INTO TABLE y;
+load data local inpath '../../data/files/z.txt' INTO TABLE z;
 
 set hive.auto.convert.join=true;
 set hive.auto.convert.join.noconditionaltask=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge3.q
index aacd0cd68fc50..c5c7ea202fa76 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge3.q
@@ -37,7 +37,7 @@ where ds is not null;
 
 show partitions merge_src_part2;
 
-select * from merge_src_part2 where ds is not null ORDER BY key ASC, value ASC;
+select * from merge_src_part2 where ds is not null ORDER BY key ASC, value ASC, ds ASC;
 
 drop table merge_src_part2;
 
@@ -54,4 +54,4 @@ select key, value, ds;
 
 show partitions merge_src_part2;
 
-select * from merge_src_part2 where ds is not null ORDER BY key ASC, value ASC;
+select * from merge_src_part2 where ds is not null ORDER BY key ASC, value ASC, ds ASC;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge4.q
index 744783bd62f8d..5a167aa0f08b3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge4.q
@@ -11,14 +11,14 @@ insert overwrite table nzhang_part partition (ds='2010-08-15', hr) select key, v
 
 insert overwrite table nzhang_part partition (ds='2010-08-15', hr) select key, value, hr from srcpart where ds='2008-04-08';
 
-select * from nzhang_part;
+select * from nzhang_part ORDER BY key, value, ds, hr;
 
 explain
 insert overwrite table nzhang_part partition (ds='2010-08-15', hr=11) select key, value from srcpart where ds='2008-04-08';
 
 insert overwrite table nzhang_part partition (ds='2010-08-15', hr=11) select key, value from srcpart where ds='2008-04-08';
 
-select * from nzhang_part;
+select * from nzhang_part ORDER BY key, value, ds, hr;
 
 explain
 insert overwrite table nzhang_part partition (ds='2010-08-15', hr) 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition.q
index 1379426b601f0..ae319865f6e7b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition.q
@@ -5,15 +5,16 @@ create table srcpart_merge_dp like srcpart;
 
 create table merge_dynamic_part like srcpart;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
 
 set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; 
 set hive.merge.mapfiles=false;
 set hive.merge.mapredfiles=false;
 set hive.merge.smallfiles.avgsize=1000000000;
+set hive.optimize.sort.dynamic.partition=false;
 explain
 insert overwrite table merge_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart_merge_dp where ds='2008-04-08';
 insert overwrite table merge_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart_merge_dp where ds='2008-04-08';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition2.q
index b51c70ed03a46..73a71e6265d50 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition2.q
@@ -5,12 +5,12 @@ create table srcpart_merge_dp like srcpart;
 
 create table merge_dynamic_part like srcpart;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket0.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
-load data local inpath '../data/files/srcbucket1.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket0.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket1.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
 
 
 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; 
@@ -18,6 +18,7 @@ set hive.merge.mapfiles=true;
 set hive.merge.mapredfiles=true;
 set hive.merge.smallfiles.avgsize=3000;
 set hive.exec.compress.output=false;
+set hive.optimize.sort.dynamic.partition=false;
 
 explain
 insert overwrite table merge_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart_merge_dp where ds='2008-04-08';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition3.q
index b3bcf01ea043d..43be59e5d5f40 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition3.q
@@ -5,20 +5,20 @@ create table srcpart_merge_dp like srcpart;
 
 create table merge_dynamic_part like srcpart;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
-
-load data local inpath '../data/files/kv1.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-09', hr=11);
-load data local inpath '../data/files/kv2.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-09', hr=11);
-load data local inpath '../data/files/kv1.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-09', hr=12);
-load data local inpath '../data/files/kv2.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-09', hr=12);
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+
+load data local inpath '../../data/files/kv1.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-09', hr=11);
+load data local inpath '../../data/files/kv2.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-09', hr=11);
+load data local inpath '../../data/files/kv1.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-09', hr=12);
+load data local inpath '../../data/files/kv2.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-09', hr=12);
 
 show partitions srcpart_merge_dp;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition4.q
index ef769a042d7ce..589717096a9d9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition4.q
@@ -9,15 +9,15 @@ alter table srcpart_merge_dp_rc set fileformat RCFILE;
 create table merge_dynamic_part like srcpart;
 alter table merge_dynamic_part set fileformat RCFILE;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
 
 insert overwrite table srcpart_merge_dp_rc partition (ds = '2008-04-08', hr) 
 select key, value, hr from srcpart_merge_dp where ds = '2008-04-08';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition5.q
index a196fa05288b7..9f64724563865 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/merge_dynamic_partition5.q
@@ -8,12 +8,12 @@ alter table srcpart_merge_dp_rc set fileformat RCFILE;
 create table merge_dynamic_part like srcpart;
 alter table merge_dynamic_part set fileformat RCFILE;
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=11);
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcpart_merge_dp partition(ds='2008-04-08', hr=12);
 
 insert overwrite table srcpart_merge_dp_rc partition (ds = '2008-04-08', hr) 
 select key, value, hr from srcpart_merge_dp where ds = '2008-04-08';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_export_drop.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_export_drop.q
index 41be152e7871d..e2da61a783950 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_export_drop.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_export_drop.q
@@ -1,8 +1,8 @@
 create table tmp_meta_export_listener_drop_test (foo string);
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/exports/HIVE-3427;
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/data/exports/HIVE-3427;
 set hive.metastore.pre.event.listeners=org.apache.hadoop.hive.ql.parse.MetaDataExportListener;
-set hive.metadata.export.location=../build/ql/test/data/exports/HIVE-3427;
+set hive.metadata.export.location=${system:test.tmp.dir}/data/exports/HIVE-3427;
 set hive.move.exported.metadata.to.trash=false;
 drop table tmp_meta_export_listener_drop_test;
-dfs -rmr ../build/ql/test/data/exports/HIVE-3427;
+dfs -rmr ${system:test.tmp.dir}/data/exports/HIVE-3427;
 set hive.metastore.pre.event.listeners=;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_only_queries.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_only_queries.q
new file mode 100644
index 0000000000000..b549a56232108
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_only_queries.q
@@ -0,0 +1,77 @@
+set hive.stats.dbclass=fs;
+set hive.compute.query.using.stats=true;
+set hive.stats.autogather=true;
+create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp, 
+           dec decimal,  
+           bin binary)
+       row format delimited
+       fields terminated by '|';
+
+load data local inpath '../../data/files/over10k' into table over10k;
+
+create table stats_tbl(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+	   ts timestamp, 
+           dec decimal,  
+           bin binary);
+
+create table stats_tbl_part(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+	   ts timestamp, 
+           dec decimal,  
+           bin binary) partitioned by (dt string);
+
+
+insert overwrite table stats_tbl select * from over10k;
+
+insert into table stats_tbl_part partition (dt='2010') select * from over10k where t>0 and t<30;
+insert into table stats_tbl_part partition (dt='2011') select * from over10k where t>30 and t<60;
+insert into table stats_tbl_part partition (dt='2012') select * from over10k where t>60;
+
+explain 
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl;
+explain
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part;
+
+analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin;
+analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
+analyze table stats_tbl_part partition(dt='2011') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
+analyze table stats_tbl_part partition(dt='2012') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
+
+explain 
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl;
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl;
+explain 
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part;
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part;
+
+explain select count(ts) from stats_tbl_part;
+
+drop table stats_tbl;
+drop table stats_tbl_part;
+
+set hive.compute.query.using.stats=false;
+set hive.stats.dbclass=jdbc:derby;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q
new file mode 100644
index 0000000000000..09f4bff616a50
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q
@@ -0,0 +1,51 @@
+set hive.stats.dbclass=fs;
+set hive.compute.query.using.stats=true;
+create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp, 
+           dec decimal,  
+           bin binary)
+       row format delimited
+       fields terminated by '|';
+
+load data local inpath '../../data/files/over10k' into table over10k;
+
+create table stats_tbl_part(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp, 
+           dec decimal,  
+           bin binary) partitioned by (dt int);
+
+
+from over10k 
+insert overwrite table stats_tbl_part partition (dt=2010) select t,si,i,b,f,d,bo,s,ts,dec,bin where t>0 and t<30 
+insert overwrite table stats_tbl_part partition (dt=2014) select t,si,i,b,f,d,bo,s,ts,dec,bin where t > 30 and t<60;
+
+analyze table stats_tbl_part partition(dt) compute statistics;
+analyze table stats_tbl_part partition(dt=2010) compute statistics for columns t,si,i,b,f,d,bo,s,bin;
+analyze table stats_tbl_part partition(dt=2014) compute statistics for columns t,si,i,b,f,d,bo,s,bin;
+
+explain 
+select count(*), count(1), sum(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt = 2010;
+select count(*), count(1), sum(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt = 2010;
+explain 
+select count(*), count(1), sum(1), sum(2), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt > 2010;
+select count(*), count(1), sum(1), sum(2), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt > 2010;
+
+drop table stats_tbl_part;
+set hive.compute.query.using.stats=false;
+set hive.stats.dbclass=jdbc:derby;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mi.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mi.q
index 067c143c0e684..2a6059b3a37ff 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mi.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mi.q
@@ -15,7 +15,7 @@ GROUP BY key, value, ds, hr;
 show partitions nzhang_t1;
 show partitions nzhang_t2;
 
-select * from nzhang_t1;
-select * from nzhang_t2;
+select * from nzhang_t1 order by key, value;
+select * from nzhang_t2 order by key, value;
 
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mrr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mrr.q
new file mode 100644
index 0000000000000..9f068cc71394e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/mrr.q
@@ -0,0 +1,59 @@
+-- simple query with multiple reduce stages
+EXPLAIN SELECT key, count(value) as cnt FROM src GROUP BY key ORDER BY cnt;
+SELECT key, count(value) as cnt FROM src GROUP BY key ORDER BY cnt;
+
+set hive.auto.convert.join=false;
+-- join query with multiple reduce stages;
+EXPLAIN SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt;
+SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt;
+
+set hive.auto.convert.join=true;
+-- same query with broadcast join
+EXPLAIN SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt;
+SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt;
+
+set hive.auto.convert.join=false;
+-- query with multiple branches in the task dag
+EXPLAIN
+SELECT * 
+FROM
+  (SELECT key, count(value) as cnt 
+  FROM src GROUP BY key ORDER BY cnt) s1
+  JOIN
+  (SELECT key, count(value) as cnt 
+  FROM src GROUP BY key ORDER BY cnt) s2
+  JOIN
+  (SELECT key, count(value) as cnt 
+  FROM src GROUP BY key ORDER BY cnt) s3
+  ON (s1.key = s2.key and s1.key = s3.key)
+WHERE
+  s1.cnt > 1
+ORDER BY s1.key;
+
+SELECT * 
+FROM
+  (SELECT key, count(value) as cnt 
+  FROM src GROUP BY key ORDER BY cnt) s1
+  JOIN
+  (SELECT key, count(value) as cnt 
+  FROM src GROUP BY key ORDER BY cnt) s2
+  JOIN
+  (SELECT key, count(value) as cnt 
+  FROM src GROUP BY key ORDER BY cnt) s3
+  ON (s1.key = s2.key and s1.key = s3.key)
+WHERE
+  s1.cnt > 1
+ORDER BY s1.key;
+
+set hive.auto.convert.join=true;
+-- query with broadcast join in the reduce stage
+EXPLAIN
+SELECT *
+FROM
+  (SELECT key, count(value) as cnt FROM src GROUP BY key) s1
+  JOIN src ON (s1.key = src.key);
+
+SELECT *
+FROM
+  (SELECT key, count(value) as cnt FROM src GROUP BY key) s1
+  JOIN src ON (s1.key = src.key);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/multiMapJoin1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/multiMapJoin1.q
index 9a0a792a91897..455f550ae3ac5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/multiMapJoin1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/multiMapJoin1.q
@@ -1,3 +1,5 @@
+set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook;
+
 create table smallTbl1(key string, value string);
 insert overwrite table smallTbl1 select * from src where key < 10;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/multiMapJoin2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/multiMapJoin2.q
index ce6cf6d8d6c0d..141db4db0a3f3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/multiMapJoin2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/multiMapJoin2.q
@@ -1,3 +1,4 @@
+set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook;
 set hive.auto.convert.join=true;
 set hive.auto.convert.join.noconditionaltask=true;
 set hive.auto.convert.join.noconditionaltask.size=6000;
@@ -187,3 +188,28 @@ FROM part_table x JOIN src1 y ON (x.key = y.key);
 SELECT count(*)
 FROM part_table x JOIN src1 y ON (x.key = y.key);
 
+set hive.auto.convert.join.noconditionaltask.size=10000000;
+set hive.optimize.correlation=false;
+-- HIVE-5891 Alias conflict when merging multiple mapjoin tasks into their common
+-- child mapred task
+EXPLAIN   
+SELECT * FROM (
+  SELECT c.key FROM
+    (SELECT a.key FROM src a JOIN src b ON a.key=b.key GROUP BY a.key) tmp
+    JOIN src c ON tmp.key=c.key
+  UNION ALL
+  SELECT c.key FROM
+    (SELECT a.key FROM src a JOIN src b ON a.key=b.key GROUP BY a.key) tmp
+    JOIN src c ON tmp.key=c.key
+) x;
+
+SELECT * FROM (
+  SELECT c.key FROM
+    (SELECT a.key FROM src a JOIN src b ON a.key=b.key GROUP BY a.key) tmp
+    JOIN src c ON tmp.key=c.key
+  UNION ALL
+  SELECT c.key FROM
+    (SELECT a.key FROM src a JOIN src b ON a.key=b.key GROUP BY a.key) tmp
+    JOIN src c ON tmp.key=c.key
+) x;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nested_complex.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nested_complex.q
index b94fbb7b8be43..6fd76b859e4e4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nested_complex.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nested_complex.q
@@ -17,6 +17,6 @@ describe nestedcomplex;
 describe extended nestedcomplex;
 
 
-load data local inpath '../data/files/nested_complex.txt' overwrite into table nestedcomplex;
+load data local inpath '../../data/files/nested_complex.txt' overwrite into table nestedcomplex;
 
 select * from nestedcomplex sort by simple_int;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/newline.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/newline.q
index 722ecf6d972ca..11168fcd3b05c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/newline.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/newline.q
@@ -1,4 +1,4 @@
-add file ../data/scripts/newline.py;
+add file ../../data/scripts/newline.py;
 set hive.transform.escape.input=true;
 
 create table tmp_tmp(key string, value string) stored as rcfile;
@@ -10,10 +10,10 @@ select * from tmp_tmp ORDER BY key ASC, value ASC;
 
 drop table tmp_tmp;
 
-add file ../data/scripts/escapednewline.py;
-add file ../data/scripts/escapedtab.py;
-add file ../data/scripts/doubleescapedtab.py;
-add file ../data/scripts/escapedcarriagereturn.py;
+add file ../../data/scripts/escapednewline.py;
+add file ../../data/scripts/escapedtab.py;
+add file ../../data/scripts/doubleescapedtab.py;
+add file ../../data/scripts/escapedcarriagereturn.py;
 
 create table tmp_tmp(key string, value string) stored as rcfile;
 insert overwrite table tmp_tmp
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q
new file mode 100644
index 0000000000000..e6343e2f535c0
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q
@@ -0,0 +1,9 @@
+set hive.fetch.task.conversion=more;
+
+explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10;
+explain select cast(key as int) * 10, upper(value) from src limit 10;
+
+set hive.fetch.task.conversion.threshold=100;
+
+explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10;
+explain select cast(key as int) * 10, upper(value) from src limit 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nonreserved_keywords_input37.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nonreserved_keywords_input37.q
index 9cb89da373d29..e33b4bfcab7ba 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nonreserved_keywords_input37.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nonreserved_keywords_input37.q
@@ -1,12 +1,12 @@
 CREATE TABLE table(string string) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/docurl.txt' INTO TABLE table;
+LOAD DATA LOCAL INPATH '../../data/files/docurl.txt' INTO TABLE table;
 
 SELECT table, count(1)
 FROM
 (
   FROM table
   SELECT TRANSFORM (table.string)
-  USING 'java -cp ../build/ql/test/classes org.apache.hadoop.hive.scripts.extracturl' AS (table, count)
+  USING 'java -cp ../util/target/classes/ org.apache.hadoop.hive.scripts.extracturl' AS (table, count)
 ) subq
 GROUP BY table;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/notable_alias3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/notable_alias3.q
new file mode 100644
index 0000000000000..aa79674409d2d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/notable_alias3.q
@@ -0,0 +1,4 @@
+CREATE TABLE dest1(c string, key INT, value DOUBLE) STORED AS TEXTFILE;
+
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, sum(src.value) WHERE src.key < 100 group by key;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/null_cast.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/null_cast.q
index 48c39b81fdd8f..bd0cb8d12d88f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/null_cast.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/null_cast.q
@@ -2,10 +2,10 @@ EXPLAIN SELECT ARRAY(NULL, 0),
                ARRAY(NULL, ARRAY()),
                ARRAY(NULL, MAP()),
                ARRAY(NULL, STRUCT(0))
-        FROM src LIMIT 1;
+        FROM src tablesample (1 rows);
 
 SELECT ARRAY(NULL, 0), 
        ARRAY(NULL, ARRAY()),
        ARRAY(NULL, MAP()),
        ARRAY(NULL, STRUCT(0))
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/null_column.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/null_column.q
index fa4a8639446e5..4b43d608e86df 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/null_column.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/null_column.q
@@ -3,7 +3,7 @@
 
 
 create table temp_null(a int) stored as textfile;
-load data local inpath '../data/files/test.dat' overwrite into table temp_null;
+load data local inpath '../../data/files/test.dat' overwrite into table temp_null;
 
 select null, null from temp_null;
 
@@ -15,8 +15,8 @@ create table tt_b(a int, b string) row format serde "org.apache.hadoop.hive.serd
 insert overwrite table tt_b select null, null from temp_null;
 select * from tt_b;
 
-insert overwrite directory "../build/ql/test/data/warehouse/null_columns.out" select null, null from temp_null;
-dfs -cat ../build/ql/test/data/warehouse/null_columns.out/*;
+insert overwrite directory "target/warehouse/null_columns.out" select null, null from temp_null;
+dfs -cat ${system:test.warehouse.dir}/null_columns.out/*;
 
 
 create table temp_null2 (key string, value string) partitioned by (ds string);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformat.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformat.q
new file mode 100644
index 0000000000000..c9a7dab5eb9e7
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformat.q
@@ -0,0 +1,24 @@
+-- base table with null data
+DROP TABLE IF EXISTS base_tab;
+CREATE TABLE base_tab(a STRING, b STRING, c STRING, d STRING) STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../../data/files/null.txt' INTO TABLE base_tab;
+DESCRIBE EXTENDED base_tab;
+
+-- table with non-default null format
+DROP TABLE IF EXISTS null_tab1;
+EXPLAIN CREATE TABLE null_tab1(a STRING, b STRING) ROW FORMAT DELIMITED NULL DEFINED AS 'fooNull';
+CREATE TABLE null_tab1(a STRING, b STRING) ROW FORMAT DELIMITED NULL DEFINED AS 'fooNull';
+DESCRIBE EXTENDED null_tab1;
+SHOW CREATE TABLE null_tab1;
+
+-- load null data from another table and verify that the null is stored in the expected format
+INSERT OVERWRITE TABLE null_tab1 SELECT a,b FROM base_tab;
+dfs -cat ${system:test.warehouse.dir}/null_tab1/*;
+SELECT * FROM null_tab1;
+-- alter the null format and verify that the old null format is no longer in effect
+ALTER TABLE null_tab1 SET SERDEPROPERTIES ( 'serialization.null.format'='foo');
+SELECT * FROM null_tab1;
+
+
+DROP TABLE null_tab1;
+DROP TABLE base_tab;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformatCTAS.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformatCTAS.q
new file mode 100644
index 0000000000000..d077981d02ad0
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformatCTAS.q
@@ -0,0 +1,24 @@
+-- base table with null data
+DROP TABLE IF EXISTS base_tab;
+CREATE TABLE base_tab(a STRING, b STRING, c STRING, d STRING) STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../../data/files/null.txt' INTO TABLE base_tab;
+DESCRIBE EXTENDED base_tab;
+
+-- table with non-default null format
+DROP TABLE IF EXISTS null_tab3;
+EXPLAIN CREATE TABLE null_tab3 ROW FORMAT DELIMITED NULL DEFINED AS 'fooNull'
+   AS SELECT a, b FROM base_tab;
+CREATE TABLE null_tab3 ROW FORMAT DELIMITED NULL DEFINED AS 'fooNull'
+   AS SELECT a, b FROM base_tab;
+DESCRIBE EXTENDED null_tab3;
+SHOW CREATE TABLE null_tab3;
+
+dfs -cat ${system:test.warehouse.dir}/null_tab3/*;
+SELECT * FROM null_tab3;
+-- alter the null format and verify that the old null format is no longer in effect
+ALTER TABLE null_tab3 SET SERDEPROPERTIES ( 'serialization.null.format'='foo');
+SELECT * FROM null_tab3;
+
+
+DROP TABLE null_tab3;
+DROP TABLE base_tab;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformatdir.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformatdir.q
new file mode 100644
index 0000000000000..d29863839f728
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullformatdir.q
@@ -0,0 +1,21 @@
+-- base table with null data
+DROP TABLE IF EXISTS base_tab;
+CREATE TABLE base_tab(a STRING, b STRING, c STRING, d STRING) STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../../data/files/null.txt' INTO TABLE base_tab;
+DESCRIBE EXTENDED base_tab;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/hive_test/nullformat/tmp;
+dfs -rmr ${system:test.tmp.dir}/hive_test/nullformat/*;
+INSERT OVERWRITE LOCAL DIRECTORY '${system:test.tmp.dir}/hive_test/nullformat'
+   ROW FORMAT DELIMITED NULL DEFINED AS 'fooNull' SELECT a,b FROM base_tab;
+dfs -cat ${system:test.tmp.dir}/hive_test/nullformat/000000_0;
+
+-- load the exported data back into a table with same null format and verify null values
+DROP TABLE IF EXISTS null_tab2;
+CREATE TABLE null_tab2(a STRING, b STRING) ROW FORMAT DELIMITED NULL DEFINED AS 'fooNull';
+LOAD DATA LOCAL INPATH '${system:test.tmp.dir}/hive_test/nullformat/000000_0' INTO TABLE null_tab2;
+SELECT * FROM null_tab2;
+
+
+dfs -rmr ${system:test.tmp.dir}/hive_test/nullformat;
+DROP TABLE base_tab;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullgroup3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullgroup3.q
index a5bc9ff1f36ff..19e5b10786fc0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullgroup3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullgroup3.q
@@ -1,28 +1,28 @@
 CREATE TABLE tstparttbl(KEY STRING, VALUE STRING) PARTITIONED BY(ds string) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE tstparttbl PARTITION (ds='2008-04-09');
-LOAD DATA LOCAL INPATH '../data/files/nullfile.txt' INTO TABLE tstparttbl PARTITION (ds='2008-04-08');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE tstparttbl PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/nullfile.txt' INTO TABLE tstparttbl PARTITION (ds='2008-04-08');
 explain
 select count(1) from tstparttbl;
 select count(1) from tstparttbl;
 
 CREATE TABLE tstparttbl2(KEY STRING, VALUE STRING) PARTITIONED BY(ds string) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/nullfile.txt' INTO TABLE tstparttbl2 PARTITION (ds='2008-04-09');
-LOAD DATA LOCAL INPATH '../data/files/nullfile.txt' INTO TABLE tstparttbl2 PARTITION (ds='2008-04-08');
+LOAD DATA LOCAL INPATH '../../data/files/nullfile.txt' INTO TABLE tstparttbl2 PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/nullfile.txt' INTO TABLE tstparttbl2 PARTITION (ds='2008-04-08');
 explain
 select count(1) from tstparttbl2;
 select count(1) from tstparttbl2;
 DROP TABLE tstparttbl;
 CREATE TABLE tstparttbl(KEY STRING, VALUE STRING) PARTITIONED BY(ds string) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE tstparttbl PARTITION (ds='2008-04-09');
-LOAD DATA LOCAL INPATH '../data/files/nullfile.txt' INTO TABLE tstparttbl PARTITION (ds='2008-04-08');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE tstparttbl PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/nullfile.txt' INTO TABLE tstparttbl PARTITION (ds='2008-04-08');
 explain
 select count(1) from tstparttbl;
 select count(1) from tstparttbl;
 
 DROP TABLE tstparttbl2;
 CREATE TABLE tstparttbl2(KEY STRING, VALUE STRING) PARTITIONED BY(ds string) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/nullfile.txt' INTO TABLE tstparttbl2 PARTITION (ds='2008-04-09');
-LOAD DATA LOCAL INPATH '../data/files/nullfile.txt' INTO TABLE tstparttbl2 PARTITION (ds='2008-04-08');
+LOAD DATA LOCAL INPATH '../../data/files/nullfile.txt' INTO TABLE tstparttbl2 PARTITION (ds='2008-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/nullfile.txt' INTO TABLE tstparttbl2 PARTITION (ds='2008-04-08');
 explain
 select count(1) from tstparttbl2;
 select count(1) from tstparttbl2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullgroup5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullgroup5.q
index 12773b6159a5d..b4b68fb8b693a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullgroup5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullgroup5.q
@@ -1,10 +1,10 @@
 
 CREATE TABLE tstparttbl(KEY STRING, VALUE STRING) PARTITIONED BY(ds string) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE tstparttbl PARTITION (ds='2009-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE tstparttbl PARTITION (ds='2009-04-09');
 
 
 CREATE TABLE tstparttbl2(KEY STRING, VALUE STRING) PARTITIONED BY(ds string) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE tstparttbl2 PARTITION (ds='2009-04-09');
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE tstparttbl2 PARTITION (ds='2009-04-09');
 
 explain
 select u.* from
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullscript.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullscript.q
index 95c9e1df37679..11f4a7a78f813 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullscript.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/nullscript.q
@@ -1,7 +1,7 @@
 
 CREATE TABLE nullscript(KEY STRING, VALUE STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE nullscript;
-LOAD DATA LOCAL INPATH '../data/files/nullfile.txt' INTO TABLE nullscript;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE nullscript;
+LOAD DATA LOCAL INPATH '../../data/files/nullfile.txt' INTO TABLE nullscript;
 explain
 select transform(key) using 'cat' as key1 from nullscript;
 select transform(key) using 'cat' as key1 from nullscript;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/num_op_type_conv.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/num_op_type_conv.q
index 7f858d3e6f136..d51c2107e1586 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/num_op_type_conv.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/num_op_type_conv.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN SELECT null + 7, 1.0 - null, null + null,
                CAST(21 AS BIGINT) % CAST(5 AS TINYINT),
                CAST(21 AS BIGINT) % CAST(21 AS BIGINT),
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ops_comparison.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ops_comparison.q
index b685ae6c4e53f..ec9e8076fd533 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ops_comparison.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ops_comparison.q
@@ -1,3 +1,4 @@
+set hive.fetch.task.conversion=more;
 
 select 1.0 < 2.0 from src limit 1;
 select 2.0 < 2.0 from src limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/optrstat_groupby.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/optrstat_groupby.q
deleted file mode 100644
index 5993041405ed8..0000000000000
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/optrstat_groupby.q
+++ /dev/null
@@ -1,6 +0,0 @@
-SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.OptrStatGroupByHook;
-SET hive.exec.mode.local.auto=false;
-SET hive.task.progress=true;
--- This test executes the OptrStatGroupBy hook which prints the optr level
--- stats of GROUPBY optr present is the plan of below query
-SELECT count(1) FROM src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_analyze.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_analyze.q
new file mode 100644
index 0000000000000..915f4f0d71e63
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_analyze.q
@@ -0,0 +1,179 @@
+CREATE TABLE orc_create_people_staging (
+  id int,
+  first_name string,
+  last_name string,
+  address string,
+  salary decimal,
+  start_date timestamp,
+  state string);
+
+LOAD DATA LOCAL INPATH '../../data/files/orc_create_people.txt' OVERWRITE INTO TABLE orc_create_people_staging;
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+set hive.stats.autogather=false;
+-- non-partitioned table
+-- partial scan gather
+CREATE TABLE orc_create_people (
+  id int,
+  first_name string,
+  last_name string,
+  address string,
+  salary decimal,
+  start_date timestamp,
+  state string)
+STORED AS orc;
+
+INSERT OVERWRITE TABLE orc_create_people SELECT * FROM orc_create_people_staging ORDER BY id;
+
+set hive.stats.autogather = true;
+analyze table orc_create_people compute statistics partialscan;
+
+desc formatted orc_create_people;
+
+drop table orc_create_people;
+
+-- auto stats gather
+CREATE TABLE orc_create_people (
+  id int,
+  first_name string,
+  last_name string,
+  address string,
+  salary decimal,
+  start_date timestamp,
+  state string)
+STORED AS orc;
+
+INSERT OVERWRITE TABLE orc_create_people SELECT * FROM orc_create_people_staging ORDER BY id;
+
+desc formatted orc_create_people;
+
+drop table orc_create_people;
+
+set hive.stats.autogather=false;
+-- partitioned table
+-- partial scan gather
+CREATE TABLE orc_create_people (
+  id int,
+  first_name string,
+  last_name string,
+  address string,
+  salary decimal,
+  start_date timestamp)
+PARTITIONED BY (state string)
+STORED AS orc;
+
+INSERT OVERWRITE TABLE orc_create_people PARTITION (state)
+  SELECT * FROM orc_create_people_staging ORDER BY id;
+
+set hive.stats.autogather = true;
+analyze table orc_create_people partition(state) compute statistics partialscan;
+
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="Or");
+
+drop table orc_create_people;
+
+-- auto stats gather
+CREATE TABLE orc_create_people (
+  id int,
+  first_name string,
+  last_name string,
+  address string,
+  salary decimal,
+  start_date timestamp)
+PARTITIONED BY (state string)
+STORED AS orc;
+
+INSERT OVERWRITE TABLE orc_create_people PARTITION (state)
+  SELECT * FROM orc_create_people_staging ORDER BY id;
+
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="Or");
+
+drop table orc_create_people;
+
+set hive.stats.autogather=false;
+-- partitioned and bucketed table
+-- partial scan gather
+CREATE TABLE orc_create_people (
+  id int,
+  first_name string,
+  last_name string,
+  address string,
+  salary decimal,
+  start_date timestamp)
+PARTITIONED BY (state string)
+clustered by (first_name)
+sorted by (last_name)
+into 4 buckets
+STORED AS orc;
+
+INSERT OVERWRITE TABLE orc_create_people PARTITION (state)
+  SELECT * FROM orc_create_people_staging ORDER BY id;
+
+set hive.stats.autogather = true;
+analyze table orc_create_people partition(state) compute statistics partialscan;
+
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="Or");
+
+drop table orc_create_people;
+
+-- auto stats gather
+CREATE TABLE orc_create_people (
+  id int,
+  first_name string,
+  last_name string,
+  address string,
+  salary decimal,
+  start_date timestamp)
+PARTITIONED BY (state string)
+clustered by (first_name)
+sorted by (last_name)
+into 4 buckets
+STORED AS orc;
+
+INSERT OVERWRITE TABLE orc_create_people PARTITION (state)
+  SELECT * FROM orc_create_people_staging ORDER BY id;
+
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="Or");
+
+drop table orc_create_people;
+
+set hive.stats.autogather=false;
+-- create table with partitions containing text and ORC files.
+-- ORC files implements StatsProvidingRecordReader but text files does not.
+-- So the partition containing text file should not have statistics.
+CREATE TABLE orc_create_people (
+  id int,
+  first_name string,
+  last_name string,
+  address string,
+  salary decimal,
+  start_date timestamp)
+PARTITIONED BY (state string)
+STORED AS orc;
+
+INSERT OVERWRITE TABLE orc_create_people PARTITION (state)
+  SELECT * FROM orc_create_people_staging ORDER BY id;
+
+-- set the table to text format
+ALTER TABLE orc_create_people SET SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
+ALTER TABLE orc_create_people SET FILEFORMAT TEXTFILE;
+
+-- load the text data into a new partition
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE orc_create_people PARTITION(state="OH");
+
+-- set the table back to orc
+ALTER TABLE orc_create_people SET SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde';
+ALTER TABLE orc_create_people SET FILEFORMAT ORC;
+
+set hive.stats.autogather = true;
+analyze table orc_create_people partition(state) compute statistics noscan;
+
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="OH");
+
+drop table orc_create_people;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_create.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_create.q
index 6aca5486445c8..a82c1a55d5f6c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_create.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_create.q
@@ -40,7 +40,7 @@ set hive.default.fileformat=orc;
 CREATE TABLE orc_create (key INT, value STRING)
    PARTITIONED BY (ds string);
 
-set hive.default.fileformat=text;
+set hive.default.fileformat=TextFile;
 
 DESCRIBE FORMATTED orc_create;
 
@@ -53,7 +53,7 @@ CREATE TABLE orc_create_complex (
 
 DESCRIBE FORMATTED orc_create_complex;
 
-LOAD DATA LOCAL INPATH '../data/files/orc_create.txt' OVERWRITE INTO TABLE orc_create_staging;
+LOAD DATA LOCAL INPATH '../../data/files/orc_create.txt' OVERWRITE INTO TABLE orc_create_staging;
 
 SELECT * from orc_create_staging;
 
@@ -70,16 +70,20 @@ CREATE TABLE orc_create_people_staging (
   first_name string,
   last_name string,
   address string,
+  salary decimal,
+  start_date timestamp,
   state string);
 
-LOAD DATA LOCAL INPATH '../data/files/orc_create_people.txt'
+LOAD DATA LOCAL INPATH '../../data/files/orc_create_people.txt'
   OVERWRITE INTO TABLE orc_create_people_staging;
 
 CREATE TABLE orc_create_people (
   id int,
   first_name string,
   last_name string,
-  address string)
+  address string,
+  salary decimal,
+  start_date timestamp)
 PARTITIONED BY (state string)
 STORED AS orc;
 
@@ -92,9 +96,26 @@ SET hive.optimize.index.filter=true;
 -- test predicate push down with partition pruning
 SELECT COUNT(*) FROM orc_create_people where id < 10 and state = 'Ca';
 
+-- test predicate push down
+SELECT COUNT(*) FROM orc_create_people where id = 50;
+SELECT COUNT(*) FROM orc_create_people where id between 10 and 20;
+SELECT COUNT(*) FROM orc_create_people where id > 10 and id < 100;
+SELECT COUNT(*) FROM orc_create_people where (id + 1) = 20;
+SELECT COUNT(*) FROM orc_create_people where (id + 10) < 200;
+SELECT COUNT(*) FROM orc_create_people where id < 30  or first_name = "Rafael";
+SELECT COUNT(*) FROM orc_create_people 
+   where length(substr(first_name, 1, 2)) <= 2 and last_name like '%';
+SELECT COUNT(*) FROM orc_create_people where salary = 200.00;
+SELECT COUNT(*) FROM orc_create_people WHERE start_date IS NULL;
+SELECT COUNT(*) FROM orc_create_people WHERE YEAR(start_date) = 2014;
+
+-- test predicate push down with partition pruning
+SELECT COUNT(*) FROM orc_create_people where salary = 200.00 and state = 'Ca';
+
 -- test predicate push down with no column projection
 SELECT id, first_name, last_name, address
-  FROM orc_create_people WHERE id > 90;
+  FROM orc_create_people WHERE id > 90
+  ORDER BY id, first_name, last_name;
 
 DROP TABLE orc_create;
 DROP TABLE orc_create_complex;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_dictionary_threshold.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_dictionary_threshold.q
index f916012b8365e..a0eaab75fac3e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_dictionary_threshold.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_dictionary_threshold.q
@@ -19,7 +19,7 @@ SELECT * FROM test_orc;
 ALTER TABLE test_orc SET SERDEPROPERTIES ('orc.stripe.size' = '1');
 
 CREATE TABLE src_thousand(key STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1kv2.cogroup.txt' 
+LOAD DATA LOCAL INPATH '../../data/files/kv1kv2.cogroup.txt' 
      INTO TABLE src_thousand;
 
 set hive.exec.orc.dictionary.key.size.threshold=0.5;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_diff_part_cols.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_diff_part_cols.q
index cbfd7b359f878..0c8861e41c4bb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_diff_part_cols.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_diff_part_cols.q
@@ -10,7 +10,7 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
 -- to another partition
 -- This can produce unexpected results with CombineHiveInputFormat
 
-INSERT OVERWRITE TABLE test_orc PARTITION (part = '1') SELECT key FROM src LIMIT 5;
+INSERT OVERWRITE TABLE test_orc PARTITION (part = '1') SELECT key FROM src tablesample (5 rows);
 
 ALTER TABLE test_orc ADD COLUMNS (cnt INT);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_diff_part_cols2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_diff_part_cols2.q
new file mode 100644
index 0000000000000..f7e80a75b3ba6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_diff_part_cols2.q
@@ -0,0 +1,11 @@
+-- Create a table with one column, write to it, then add an additional column
+-- This can break reads
+
+CREATE TABLE test_orc (key STRING)
+STORED AS ORC;
+
+INSERT OVERWRITE TABLE test_orc SELECT key FROM src LIMIT 5;
+
+ALTER TABLE test_orc ADD COLUMNS (value STRING);
+
+SELECT * FROM test_orc order by key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_empty_strings.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_empty_strings.q
index 0ef57d18ccaf7..34cd6d47dfcd9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_empty_strings.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_empty_strings.q
@@ -3,13 +3,13 @@ ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
 STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
 OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat';
 
-INSERT OVERWRITE TABLE test_orc SELECT '' FROM src limit 10;
+INSERT OVERWRITE TABLE test_orc SELECT '' FROM src tablesample (10 rows);
 
 -- Test reading a column which is just empty strings
 
 SELECT * FROM test_orc; 
 
-INSERT OVERWRITE TABLE test_orc SELECT IF (key % 3 = 0, key, '') FROM src limit 10;
+INSERT OVERWRITE TABLE test_orc SELECT IF (key % 3 = 0, key, '') FROM src tablesample (10 rows);
 
 -- Test reading a column which has some empty strings
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ends_with_nulls.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ends_with_nulls.q
index 6685da7a82245..83c5a0505e57e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ends_with_nulls.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ends_with_nulls.q
@@ -10,7 +10,7 @@ ALTER TABLE test_orc SET SERDEPROPERTIES ('orc.row.index.stride' = '1000');
 -- to last index stride are the same (there's only two index strides)
 
 CREATE TABLE src_null(a STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/nulls.txt' INTO TABLE src_null;
+LOAD DATA LOCAL INPATH '../../data/files/nulls.txt' INTO TABLE src_null;
 
 INSERT OVERWRITE TABLE test_orc SELECT a FROM src_null;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_min_max.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_min_max.q
new file mode 100644
index 0000000000000..b81adf2af3e53
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_min_max.q
@@ -0,0 +1,32 @@
+create table if not exists alltypes (
+ bo boolean,
+ ti tinyint,
+ si smallint,
+ i int,
+ bi bigint,
+ f float,
+ d double,
+ de decimal(10,3),
+ ts timestamp,
+ da date,
+ s string,
+ c char(5),
+ vc varchar(5),
+ m map<string, string>,
+ l array<int>,
+ st struct<c1:int, c2:string>
+) row format delimited fields terminated by '|'
+collection items terminated by ','
+map keys terminated by ':' stored as textfile;
+
+create table alltypes_orc like alltypes;
+alter table alltypes_orc set fileformat orc;
+
+load data local inpath '../../data/files/alltypes2.txt' overwrite into table alltypes;
+
+insert overwrite table alltypes_orc select * from alltypes;
+
+select min(bo), max(bo), min(ti), max(ti), min(si), max(si), min(i), max(i), min(bi), max(bi), min(f), max(f), min(d), max(d), min(de), max(de), min(ts), max(ts), min(da), max(da), min(s), max(s), min(c), max(c), min(vc), max(vc) from alltypes;
+
+select min(bo), max(bo), min(ti), max(ti), min(si), max(si), min(i), max(i), min(bi), max(bi), min(f), max(f), min(d), max(d), min(de), max(de), min(ts), max(ts), min(da), max(da), min(s), max(s), min(c), max(c), min(vc), max(vc) from alltypes_orc;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_char.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_char.q
new file mode 100644
index 0000000000000..1f5f54ae19ee8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_char.q
@@ -0,0 +1,76 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), da date) stored as orc tblproperties("orc.stripe.size"="16777216"); 
+
+insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl;
+
+set hive.optimize.index.filter=false;
+
+-- char data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypesorc where c="apple";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c="apple";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c!="apple";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c!="apple";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c<"hello";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c<"hello";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c<="hello";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c<="hello";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c="apple ";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c="apple ";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c in ("apple", "carrot");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c in ("apple", "carrot");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c in ("apple", "hello");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c in ("apple", "hello");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c in ("carrot");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c in ("carrot");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c between "apple" and "carrot";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c between "apple" and "carrot";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c between "apple" and "zombie";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c between "apple" and "zombie";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where c between "carrot" and "carrot1";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where c between "carrot" and "carrot1";
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_date.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_date.q
new file mode 100644
index 0000000000000..c34be867e484f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_date.q
@@ -0,0 +1,97 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), da date) stored as orc tblproperties("orc.stripe.size"="16777216"); 
+
+insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl;
+
+-- date data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypesorc where da='1970-02-20';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da='1970-02-20';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da=cast('1970-02-20' as date);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da=cast('1970-02-20' as date);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da=cast('1970-02-20' as varchar(20));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da=cast('1970-02-20' as varchar(20));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da!='1970-02-20';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da!='1970-02-20';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da<'1970-02-27';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da<'1970-02-27';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da<'1970-02-29';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da<'1970-02-29';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da<'1970-02-15';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da<'1970-02-15';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da<='1970-02-20';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da<='1970-02-20';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da<='1970-02-27';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da<='1970-02-27';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da in (cast('1970-02-21' as date), cast('1970-02-27' as date));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da in (cast('1970-02-21' as date), cast('1970-02-27' as date));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da in (cast('1970-02-20' as date), cast('1970-02-27' as date));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da in (cast('1970-02-20' as date), cast('1970-02-27' as date));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da in (cast('1970-02-21' as date), cast('1970-02-22' as date));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da in (cast('1970-02-21' as date), cast('1970-02-22' as date));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da between '1970-02-19' and '1970-02-22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da between '1970-02-19' and '1970-02-22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da between '1970-02-19' and '1970-02-28';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da between '1970-02-19' and '1970-02-28';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where da between '1970-02-18' and '1970-02-19';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where da between '1970-02-18' and '1970-02-19';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_decimal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_decimal.q
new file mode 100644
index 0000000000000..a93590eacca01
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_decimal.q
@@ -0,0 +1,151 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), da date) stored as orc tblproperties("orc.stripe.size"="16777216"); 
+
+insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl;
+
+-- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypesorc where d=0.22;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d=0.22;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d='0.22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d='0.22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d=cast('0.22' as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d=cast('0.22' as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d!=0.22;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d!=0.22;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d!='0.22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d!='0.22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d!=cast('0.22' as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d!=cast('0.22' as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d<11.22;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d<11.22;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d<'11.22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d<'11.22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d<cast('11.22' as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d<cast('11.22' as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d<1;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d<1;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d<=11.22;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d<=11.22;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d<='11.22';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d<='11.22';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d<=cast('11.22' as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d<=cast('11.22' as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d<=12;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d<=12;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d in ('0.22', '1.0');
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d in ('0.22', '1.0');
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d in ('0.22', '11.22');
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d in ('0.22', '11.22');
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d in ('0.9', '1.0');
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d in ('0.9', '1.0');
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d in ('0.9', 0.22);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d in ('0.9', 0.22);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d in ('0.9', 0.22, cast('11.22' as float));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d in ('0.9', 0.22, cast('11.22' as float));
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d between 0 and 1;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d between 0 and 1;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d between 0 and 1000;
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d between 0 and 1000;
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d between 0 and '2.0';
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d between 0 and '2.0';
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d between 0 and cast(3 as float);
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d between 0 and cast(3 as float);
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where d between 1 and cast(30 as char(10));
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where d between 1 and cast(30 as char(10));
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_varchar.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_varchar.q
new file mode 100644
index 0000000000000..0fecc664e46db
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_ppd_varchar.q
@@ -0,0 +1,76 @@
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+
+create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), da date) stored as orc tblproperties("orc.stripe.size"="16777216"); 
+
+insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl;
+
+set hive.optimize.index.filter=false;
+
+-- varchar data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
+select sum(hash(*)) from newtypesorc where v="bee";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v="bee";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v!="bee";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v!="bee";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v<"world";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v<"world";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v<="world";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v<="world";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v="bee   ";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v="bee   ";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v in ("bee", "orange");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v in ("bee", "orange");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v in ("bee", "world");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v in ("bee", "world");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v in ("orange");
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v in ("orange");
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v between "bee" and "orange";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v between "bee" and "orange";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v between "bee" and "zombie";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v between "bee" and "zombie";
+
+set hive.optimize.index.filter=false;
+select sum(hash(*)) from newtypesorc where v between "orange" and "pine";
+
+set hive.optimize.index.filter=true;
+select sum(hash(*)) from newtypesorc where v between "orange" and "pine";
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q
index f5f25f00c951a..a267bfe8e13b9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q
@@ -7,7 +7,7 @@ CREATE TABLE orc_pred(t tinyint,
            bo boolean,
            s string,
            ts timestamp,
-           dec decimal,
+           dec decimal(4,2),
            bin binary)
 STORED AS ORC;
 
@@ -22,12 +22,12 @@ CREATE TABLE staging(t tinyint,
            bo boolean,
            s string,
            ts timestamp,
-           dec decimal,
+           dec decimal(4,2),
            bin binary)
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/over1k' OVERWRITE INTO TABLE staging;
+LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging;
 
 INSERT INTO TABLE orc_pred select * from staging;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_split_elimination.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_split_elimination.q
new file mode 100644
index 0000000000000..54eb23e776b88
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_split_elimination.q
@@ -0,0 +1,168 @@
+create table orc_split_elim (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc;
+
+load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+SET hive.optimize.index.filter=false;
+
+-- The above table will have 5 splits with the followings stats
+--  Stripe 1:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 2 max: 100 sum: 499902
+--    Column 2: count: 5000 min: foo max: zebra sum: 24998
+--    Column 3: count: 5000 min: 0.8 max: 8.0 sum: 39992.8
+--    Column 4: count: 5000 min: 0 max: 1.2 sum: 1.2
+--    Column 5: count: 5000
+--  Stripe 2:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 13 max: 100 sum: 499913
+--    Column 2: count: 5000 min: bar max: zebra sum: 24998
+--    Column 3: count: 5000 min: 8.0 max: 80.0 sum: 40072.0
+--    Column 4: count: 5000 min: 0 max: 2.2 sum: 2.2
+--    Column 5: count: 5000
+--  Stripe 3:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 29 max: 100 sum: 499929
+--    Column 2: count: 5000 min: cat max: zebra sum: 24998
+--    Column 3: count: 5000 min: 8.0 max: 8.0 sum: 40000.0
+--    Column 4: count: 5000 min: 0 max: 3.3 sum: 3.3
+--    Column 5: count: 5000
+--  Stripe 4:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 70 max: 100 sum: 499970
+--    Column 2: count: 5000 min: dog max: zebra sum: 24998
+--    Column 3: count: 5000 min: 1.8 max: 8.0 sum: 39993.8
+--    Column 4: count: 5000 min: 0 max: 4.4 sum: 4.4
+--    Column 5: count: 5000
+--  Stripe 5:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 5 max: 100 sum: 499905
+--    Column 2: count: 5000 min: eat max: zebra sum: 24998
+--    Column 3: count: 5000 min: 0.8 max: 8.0 sum: 39992.8
+--    Column 4: count: 5000 min: 0 max: 5.5 sum: 5.5
+--    Column 5: count: 5000
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=0;
+
+SET hive.optimize.index.filter=true;
+-- 0 mapper
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=0;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers. count should be 0
+select count(*) from orc_split_elim where userid<=0;
+
+SET hive.optimize.index.filter=true;
+-- 0 mapper
+select count(*) from orc_split_elim where userid<=0;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=2 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 1 mapper
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=2 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=5 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 2 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=5 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=13 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 3 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=13 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=29 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 4 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=29 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=70 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=70 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- partitioned table
+create table orc_split_elim_part (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (country string, year int) stored as orc;
+
+alter table orc_split_elim_part add partition(country='us', year=2000);
+alter table orc_split_elim_part add partition(country='us', year=2001);
+
+load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim_part partition(country='us', year=2000);
+load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim_part partition(country='us', year=2001);
+
+-- 10 mapper - no split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us'order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 2 mapper - split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 10 mapper - no split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' and (year=2000 or year=2001) order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 2 mapper - split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' and (year=2000 or year=2001) order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 10 mapper - no split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' and year=2000 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 1 mapper - split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' and year=2000 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 10 mapper - no split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 4 mapper - split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 10 mapper - no split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' and (year=2000 or year=2001) order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 4 mapper - split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' and (year=2000 or year=2001) order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 10 mapper - no split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' and year=2000 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 2 mapper - split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' and year=2000 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 0 mapper - no split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=70 and country='in' order by userid;
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=70 and country='us' and year=2002 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 0 mapper - split elimination
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=70 and country='in' order by userid;
+select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=70 and country='us' and year=2002 order by userid;
+SET hive.optimize.index.filter=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_vectorization_ppd.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_vectorization_ppd.q
new file mode 100644
index 0000000000000..9bdad86e41ebf
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/orc_vectorization_ppd.q
@@ -0,0 +1,69 @@
+-- create table with 1000 rows
+create table srcorc(key string, value string) stored as textfile;
+insert overwrite table srcorc select * from src;
+insert into table srcorc select * from src;
+
+-- load table with each row group having 1000 rows and stripe 1 & 2 having 5000 & 2000 rows respectively
+create table if not exists vectororc
+(s1 string,
+s2 string,
+d double,
+s3 string)
+stored as ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="100000", "orc.compress.size"="10000");
+
+-- insert creates separate orc files
+insert overwrite table vectororc select "apple", "a", rand(1), "zoo" from srcorc;
+insert into table vectororc select null, "b", rand(2), "zoo" from srcorc;
+insert into table vectororc select null, "c", rand(3), "zoo" from srcorc;
+insert into table vectororc select "apple", "d", rand(4), "zoo" from srcorc;
+insert into table vectororc select null, "e", rand(5), "z" from srcorc;
+insert into table vectororc select "apple", "f", rand(6), "z" from srcorc;
+insert into table vectororc select null, "g", rand(7), "zoo" from srcorc;
+
+-- since vectororc table has multiple orc file we will load them into a single file using another table
+create table if not exists testorc
+(s1 string,
+s2 string,
+d double,
+s3 string)
+stored as ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="100000", "orc.compress.size"="10000");
+insert overwrite table testorc select * from vectororc order by s2;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.optimize.index.filter=true;
+
+set hive.vectorized.execution.enabled=false;
+-- row group (1,4) from stripe 1 and row group (1) from stripe 2
+-- PPD ONLY
+select count(*),int(sum(d)) from testorc where s1 is not null;
+set hive.vectorized.execution.enabled=true;
+-- VECTORIZATION + PPD
+select count(*),int(sum(d)) from testorc where s1 is not null;
+
+set hive.vectorized.execution.enabled=false;
+-- row group (2,3,5) from stripe 1 and row group (2) from stripe 2
+-- PPD ONLY
+select count(*),int(sum(d)) from testorc where s2 in ("b", "c", "e", "g");
+set hive.vectorized.execution.enabled=true;
+-- VECTORIZATION + PPD
+select count(*),int(sum(d)) from testorc where s2 in ("b", "c", "e", "g");
+
+set hive.vectorized.execution.enabled=false;
+-- last row group of stripe 1 and first row group of stripe 2
+-- PPD ONLY
+select count(*),int(sum(d)) from testorc where s3="z";
+set hive.vectorized.execution.enabled=true;
+-- VECTORIZATION + PPD
+select count(*),int(sum(d)) from testorc where s3="z";
+
+set hive.vectorized.execution.enabled=false;
+-- first row group of stripe 1 and last row group of stripe 2
+-- PPD ONLY
+select count(*),int(sum(d)) from testorc where s2="a" or s2="g";
+set hive.vectorized.execution.enabled=true;
+-- VECTORIZATION + PPD
+select count(*),int(sum(d)) from testorc where s2="a" or s2="g";
+
+drop table srcorc;
+drop table vectororc;
+drop table testorc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/order_within_subquery.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/order_within_subquery.q
new file mode 100644
index 0000000000000..7fc9b44cbc149
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/order_within_subquery.q
@@ -0,0 +1,19 @@
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+
+select t1.p_name, t2.p_name 
+from (select * from part order by p_size limit 10) t1 join part t2 on t1.p_partkey = t2.p_partkey and t1.p_size = t2.p_size 
+where t1.p_partkey < 100000;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parallel_orderby.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parallel_orderby.q
index 5e09395901c08..73c3940644844 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parallel_orderby.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parallel_orderby.q
@@ -1,6 +1,6 @@
 create table src5 (key string, value string);
-load data local inpath '../data/files/kv5.txt' into table src5;
-load data local inpath '../data/files/kv5.txt' into table src5;
+load data local inpath '../../data/files/kv5.txt' into table src5;
+load data local inpath '../../data/files/kv5.txt' into table src5;
 
 set mapred.reduce.tasks = 4;
 set hive.optimize.sampling.orderby=true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_create.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_create.q
new file mode 100644
index 0000000000000..0b976bdbaf700
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_create.q
@@ -0,0 +1,36 @@
+DROP TABLE parquet_create_staging;
+DROP TABLE parquet_create;
+
+CREATE TABLE parquet_create_staging (
+    id int,
+    str string,
+    mp  MAP<STRING,STRING>,
+    lst ARRAY<STRING>,
+    strct STRUCT<A:STRING,B:STRING>
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+COLLECTION ITEMS TERMINATED BY ','
+MAP KEYS TERMINATED BY ':';
+
+CREATE TABLE parquet_create (
+    id int,
+    str string,
+    mp  MAP<STRING,STRING>,
+    lst ARRAY<STRING>,
+    strct STRUCT<A:STRING,B:STRING>
+) STORED AS PARQUET;
+
+DESCRIBE FORMATTED parquet_create;
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_create.txt' OVERWRITE INTO TABLE parquet_create_staging;
+
+SELECT * FROM parquet_create_staging;
+
+INSERT OVERWRITE TABLE parquet_create SELECT * FROM parquet_create_staging;
+
+SELECT * FROM parquet_create group by id;
+SELECT id, count(0) FROM parquet_create group by id;
+SELECT str from parquet_create;
+SELECT mp from parquet_create;
+SELECT lst from parquet_create;
+SELECT strct from parquet_create;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_ctas.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_ctas.q
new file mode 100644
index 0000000000000..652aef1b2ba53
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_ctas.q
@@ -0,0 +1,24 @@
+drop table staging;
+drop table parquet_ctas;
+drop table parquet_ctas_advanced;
+drop table parquet_ctas_alias;
+drop table parquet_ctas_mixed;
+
+create table staging (key int, value string) stored as textfile;
+insert into table staging select * from src order by key limit 10;
+
+create table parquet_ctas stored as parquet as select * from staging;
+describe parquet_ctas;
+select * from parquet_ctas;
+
+create table parquet_ctas_advanced stored as parquet as select key+1,concat(value,"value") from staging;
+describe parquet_ctas_advanced;
+select * from parquet_ctas_advanced;
+
+create table parquet_ctas_alias stored as parquet as select key+1 as mykey,concat(value,"value") as myvalue from staging;
+describe parquet_ctas_alias;
+select * from parquet_ctas_alias;
+
+create table parquet_ctas_mixed stored as parquet as select key,key+1,concat(value,"value") as myvalue from staging;
+describe parquet_ctas_mixed;
+select * from parquet_ctas_mixed;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_partitioned.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_partitioned.q
new file mode 100644
index 0000000000000..5d4f68ea43723
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_partitioned.q
@@ -0,0 +1,34 @@
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.exec.dynamic.partition=true;
+
+DROP TABLE parquet_partitioned_staging;
+DROP TABLE parquet_partitioned;
+
+CREATE TABLE parquet_partitioned_staging (
+    id int,
+    str string,
+    part string
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+CREATE TABLE parquet_partitioned (
+    id int,
+    str string
+) PARTITIONED BY (part string)
+STORED AS PARQUET;
+
+DESCRIBE FORMATTED parquet_partitioned;
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_partitioned.txt' OVERWRITE INTO TABLE parquet_partitioned_staging;
+
+SELECT * FROM parquet_partitioned_staging;
+
+INSERT OVERWRITE TABLE parquet_partitioned PARTITION (part) SELECT * FROM parquet_partitioned_staging;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SELECT * FROM parquet_partitioned ORDER BY id, str;
+SELECT part, COUNT(0) FROM parquet_partitioned GROUP BY part;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+SELECT * FROM parquet_partitioned ORDER BY id, str;
+SELECT part, COUNT(0) FROM parquet_partitioned GROUP BY part;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_types.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_types.q
new file mode 100644
index 0000000000000..5d6333c934b74
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/parquet_types.q
@@ -0,0 +1,38 @@
+DROP TABLE parquet_types_staging;
+DROP TABLE parquet_types;
+
+CREATE TABLE parquet_types_staging (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+CREATE TABLE parquet_types (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string
+) STORED AS PARQUET;
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging;
+
+INSERT OVERWRITE TABLE parquet_types SELECT * FROM parquet_types_staging;
+
+SELECT * FROM parquet_types;
+
+SELECT ctinyint,
+  MAX(cint),
+  MIN(csmallint),
+  COUNT(cstring1),
+  AVG(cfloat),
+  STDDEV_POP(cdouble)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partcols1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partcols1.q
index b7f8c64d4261f..03a5760e690e9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partcols1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partcols1.q
@@ -1,7 +1,7 @@
 
 create table test1(col1 string) partitioned by (partitionId int);
 insert overwrite table test1 partition (partitionId=1)
-  select key from src limit 10;
+  select key from src tablesample (10 rows);
 
  FROM (
  FROM test1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_date.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_date.q
index 8738afdfa099a..70a7b252154d1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_date.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_date.q
@@ -1,45 +1,58 @@
 drop table partition_date_1;
 
-create table partition_date_1 (key string, value string) partitioned by (dt date, region int);
+create table partition_date_1 (key string, value string) partitioned by (dt date, region string);
+
+insert overwrite table partition_date_1 partition(dt='2000-01-01', region= '1')
+  select * from src tablesample (10 rows);
+insert overwrite table partition_date_1 partition(dt='2000-01-01', region= '2')
+  select * from src tablesample (5 rows);
+insert overwrite table partition_date_1 partition(dt='2013-12-10', region= '2020-20-20')
+  select * from src tablesample (5 rows);
+insert overwrite table partition_date_1 partition(dt='2013-08-08', region= '1') 
+  select * from src tablesample (20 rows);
+insert overwrite table partition_date_1 partition(dt='2013-08-08', region= '10') 
+  select * from src tablesample (11 rows);
 
-insert overwrite table partition_date_1 partition(dt='2000-01-01', region=1) 
-  select * from src limit 10;
-insert overwrite table partition_date_1 partition(dt='2000-01-01', region=2) 
-  select * from src limit 5;
-insert overwrite table partition_date_1 partition(dt='2013-08-08', region=1) 
-  select * from src limit 20;
-insert overwrite table partition_date_1 partition(dt='2013-08-08', region=10) 
-  select * from src limit 11;
 
 select distinct dt from partition_date_1;
-select * from partition_date_1 where dt = '2000-01-01' and region = 2 order by key,value;
+select * from partition_date_1 where dt = '2000-01-01' and region = '2' order by key,value;
 
 -- 15
 select count(*) from partition_date_1 where dt = date '2000-01-01';
 -- 15.  Also try with string value in predicate
 select count(*) from partition_date_1 where dt = '2000-01-01';
 -- 5
-select count(*) from partition_date_1 where dt = date '2000-01-01' and region = 2;
+select count(*) from partition_date_1 where dt = date '2000-01-01' and region = '2';
 -- 11
-select count(*) from partition_date_1 where dt = date '2013-08-08' and region = 10;
+select count(*) from partition_date_1 where dt = date '2013-08-08' and region = '10';
 -- 30
-select count(*) from partition_date_1 where region = 1;
+select count(*) from partition_date_1 where region = '1';
 -- 0
-select count(*) from partition_date_1 where dt = date '2000-01-01' and region = 3;
+select count(*) from partition_date_1 where dt = date '2000-01-01' and region = '3';
 -- 0
 select count(*) from partition_date_1 where dt = date '1999-01-01';
 
 -- Try other comparison operations
 
 -- 20
-select count(*) from partition_date_1 where dt > date '2000-01-01' and region = 1;
+select count(*) from partition_date_1 where dt > date '2000-01-01' and region = '1';
 -- 10
-select count(*) from partition_date_1 where dt < date '2000-01-02' and region = 1;
+select count(*) from partition_date_1 where dt < date '2000-01-02' and region = '1';
 -- 20
-select count(*) from partition_date_1 where dt >= date '2000-01-02' and region = 1;
+select count(*) from partition_date_1 where dt >= date '2000-01-02' and region = '1';
 -- 10
-select count(*) from partition_date_1 where dt <= date '2000-01-01' and region = 1;
+select count(*) from partition_date_1 where dt <= date '2000-01-01' and region = '1';
 -- 20
-select count(*) from partition_date_1 where dt <> date '2000-01-01' and region = 1;
+select count(*) from partition_date_1 where dt <> date '2000-01-01' and region = '1';
+-- 10
+select count(*) from partition_date_1 where dt between date '1999-12-30' and date '2000-01-03' and region = '1';
+
+
+-- Try a string key with date-like strings
+
+-- 5
+select count(*) from partition_date_1 where region = '2020-20-20';
+-- 5
+select count(*) from partition_date_1 where region > '2010-01-01';
 
 drop table partition_date_1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_date2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_date2.q
index 9b84b59608503..c932ed1023637 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_date2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_date2.q
@@ -3,7 +3,7 @@ drop table partition_date2_1;
 create table partition_date2_1 (key string, value string) partitioned by (dt date, region int);
 
 -- test date literal syntax
-from (select * from src limit 1) x
+from (select * from src tablesample (1 rows)) x
 insert overwrite table partition_date2_1 partition(dt=date '2000-01-01', region=1) select *
 insert overwrite table partition_date2_1 partition(dt=date '2000-01-01', region=2) select *
 insert overwrite table partition_date2_1 partition(dt=date '1999-01-01', region=2) select *;
@@ -13,7 +13,7 @@ select * from partition_date2_1;
 
 -- insert overwrite
 insert overwrite table partition_date2_1 partition(dt=date '2000-01-01', region=2) 
-  select 'changed_key', 'changed_value' from src limit 2;
+  select 'changed_key', 'changed_value' from src tablesample (2 rows);
 select * from partition_date2_1;
 
 -- truncate
@@ -41,7 +41,7 @@ alter table partition_date2_1 partition(dt=date '1980-01-02', region=3)
 describe extended partition_date2_1  partition(dt=date '1980-01-02', region=3);
 
 insert overwrite table partition_date2_1 partition(dt=date '1980-01-02', region=3)
-  select * from src limit 2;
+  select * from src tablesample (2 rows);
 select * from partition_date2_1 order by key,value,dt,region;
 
 -- alter table set location
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_decode_name.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_decode_name.q
index ba193cd51a26d..a8381a4200f0a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_decode_name.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_decode_name.q
@@ -1,9 +1,9 @@
 create table sc as select * 
-from (select '2011-01-11', '2011-01-11+14:18:26' from src limit 1 
+from (select '2011-01-11', '2011-01-11+14:18:26' from src tablesample (1 rows)
       union all 
-      select '2011-01-11', '2011-01-11+15:18:26' from src limit 1 
+      select '2011-01-11', '2011-01-11+15:18:26' from src tablesample (1 rows)
       union all 
-      select '2011-01-11', '2011-01-11+16:18:26' from src limit 1 ) s;
+      select '2011-01-11', '2011-01-11+16:18:26' from src tablesample (1 rows) ) s;
 
 create table sc_part (key string) partitioned by (ts string) stored as rcfile;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_special_char.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_special_char.q
index 81344334dfe01..b0b1ff4db6074 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_special_char.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_special_char.q
@@ -1,9 +1,9 @@
 create table sc as select * 
-from (select '2011-01-11', '2011-01-11+14:18:26' from src limit 1 
+from (select '2011-01-11', '2011-01-11+14:18:26' from src tablesample (1 rows)
       union all 
-      select '2011-01-11', '2011-01-11+15:18:26' from src limit 1 
+      select '2011-01-11', '2011-01-11+15:18:26' from src tablesample (1 rows)
       union all 
-      select '2011-01-11', '2011-01-11+16:18:26' from src limit 1 ) s;
+      select '2011-01-11', '2011-01-11+16:18:26' from src tablesample (1 rows) ) s;
 
 create table sc_part (key string) partitioned by (ts string) stored as rcfile;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_type_check.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_type_check.q
index 7f1accadac6ea..c9bca99b9cdf6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_type_check.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_type_check.q
@@ -2,14 +2,14 @@ set hive.typecheck.on.insert = true;
 
 -- begin part(string, string) pass(string, int)
 CREATE TABLE tab1 (id1 int,id2 string) PARTITIONED BY(month string,day string) stored as textfile;
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' overwrite into table tab1 PARTITION(month='June', day=2);
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' overwrite into table tab1 PARTITION(month='June', day=2);
 
 select * from tab1;
 drop table tab1;
 
 -- begin part(string, int) pass(string, string)
 CREATE TABLE tab1 (id1 int,id2 string) PARTITIONED BY(month string,day int) stored as textfile;
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' overwrite into table tab1 PARTITION(month='June', day='2');
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' overwrite into table tab1 PARTITION(month='June', day='2');
 
 select * from tab1;
 drop table tab1;
@@ -17,7 +17,7 @@ drop table tab1;
 -- begin part(string, date) pass(string, date)
 create table tab1 (id1 int, id2 string) PARTITIONED BY(month string,day date) stored as textfile;
 alter table tab1 add partition (month='June', day='2008-01-01');
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' overwrite into table tab1 PARTITION(month='June', day='2008-01-01');
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' overwrite into table tab1 PARTITION(month='June', day='2008-01-01');
 
 select id1, id2, day from tab1 where day='2008-01-01';
 drop table tab1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_varchar1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_varchar1.q
index d700b1cbf8566..22aadd3b5359f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_varchar1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_varchar1.q
@@ -3,13 +3,13 @@ drop table partition_varchar_1;
 create table partition_varchar_1 (key string, value varchar(20)) partitioned by (dt varchar(10), region int);
 
 insert overwrite table partition_varchar_1 partition(dt='2000-01-01', region=1)
-  select * from src limit 10;
+  select * from src tablesample (10 rows);
 insert overwrite table partition_varchar_1 partition(dt='2000-01-01', region=2)
-  select * from src limit 5;
+  select * from src tablesample (5 rows);
 insert overwrite table partition_varchar_1 partition(dt='2013-08-08', region=1)
-  select * from src limit 20;
+  select * from src tablesample (20 rows);
 insert overwrite table partition_varchar_1 partition(dt='2013-08-08', region=10)
-  select * from src limit 11;
+  select * from src tablesample (11 rows);
 
 select distinct dt from partition_varchar_1;
 select * from partition_varchar_1 where dt = '2000-01-01' and region = 2 order by key,value;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_varchar2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_varchar2.q
new file mode 100644
index 0000000000000..92cb742f15011
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_varchar2.q
@@ -0,0 +1,10 @@
+drop table partition_varchar_2;
+
+create table partition_varchar_2 (key string, value varchar(20)) partitioned by (dt varchar(15), region int);
+
+insert overwrite table partition_varchar_2 partition(dt='2000-01-01', region=1)
+  select * from src order by key limit 1;
+
+select * from partition_varchar_2 where cast(dt as varchar(10)) = '2000-01-01';
+
+drop table partition_varchar_2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_wise_fileformat17.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_wise_fileformat17.q
index e9b574c1ca380..3cf488fb0337f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_wise_fileformat17.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_wise_fileformat17.q
@@ -3,9 +3,9 @@
 -- CustomSerDe(1, 2, 3) irrespective of the inserted values
 
 DROP TABLE PW17;
-ADD JAR ../build/ql/test/test-serdes.jar;
+ADD JAR ${system:maven.local.repository}/org/apache/hive/hive-it-custom-serde/${system:hive.version}/hive-it-custom-serde-${system:hive.version}.jar;
 CREATE TABLE PW17(USER STRING, COMPLEXDT ARRAY<INT>) PARTITIONED BY (YEAR STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe1';
-LOAD DATA LOCAL INPATH '../data/files/pw17.txt' INTO TABLE PW17 PARTITION (YEAR='1');
+LOAD DATA LOCAL INPATH '../../data/files/pw17.txt' INTO TABLE PW17 PARTITION (YEAR='1');
 ALTER TABLE PW17 PARTITION(YEAR='1') SET SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe2';
 ALTER TABLE PW17 SET SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe1';
 -- Without the fix HIVE-5199, will throw cast exception via FetchOperator
@@ -14,13 +14,13 @@ SELECT * FROM PW17;
 -- Test for non-parititioned table. 
 DROP TABLE PW17_2;
 CREATE TABLE PW17_2(USER STRING, COMPLEXDT ARRAY<INT>) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe1';
-LOAD DATA LOCAL INPATH '../data/files/pw17.txt' INTO TABLE PW17_2;
+LOAD DATA LOCAL INPATH '../../data/files/pw17.txt' INTO TABLE PW17_2;
 -- Without the fix HIVE-5199, will throw cast exception via MapOperator
 SELECT COUNT(*) FROM PW17_2;
 
 DROP TABLE PW17_3;
 CREATE TABLE PW17_3(USER STRING, COMPLEXDT ARRAY<ARRAY<INT> >) PARTITIONED BY (YEAR STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe3';
-LOAD DATA LOCAL INPATH '../data/files/pw17.txt' INTO TABLE PW17_3 PARTITION (YEAR='1');
+LOAD DATA LOCAL INPATH '../../data/files/pw17.txt' INTO TABLE PW17_3 PARTITION (YEAR='1');
 ALTER TABLE PW17_3 PARTITION(YEAR='1') SET SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe2';
 ALTER TABLE PW17_3 SET SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe3';
 -- Without the fix HIVE-5285, will throw cast exception via FetchOperator
@@ -28,7 +28,7 @@ SELECT * FROM PW17;
 
 DROP TABLE PW17_4;
 CREATE TABLE PW17_4(USER STRING, COMPLEXDT ARRAY<ARRAY<INT> >) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe3';
-LOAD DATA LOCAL INPATH '../data/files/pw17.txt' INTO TABLE PW17_4;
+LOAD DATA LOCAL INPATH '../../data/files/pw17.txt' INTO TABLE PW17_4;
 -- Without the fix HIVE-5285, will throw cast exception via MapOperator
 SELECT COUNT(*) FROM PW17_4;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_wise_fileformat18.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_wise_fileformat18.q
new file mode 100644
index 0000000000000..40ed2585f5122
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/partition_wise_fileformat18.q
@@ -0,0 +1,19 @@
+-- HIVE-5202 : Tests for SettableUnionObjectInspectors
+-- CustomSerDe(4,5) are used here. 
+-- The final results should be all NULL columns deserialized using 
+-- CustomSerDe(4, 5) irrespective of the inserted values
+
+DROP TABLE PW18;
+ADD JAR ${system:maven.local.repository}/org/apache/hive/hive-it-custom-serde/${system:hive.version}/hive-it-custom-serde-${system:hive.version}.jar;
+CREATE TABLE PW18(USER STRING, COMPLEXDT UNIONTYPE<INT, DOUBLE>) PARTITIONED BY (YEAR STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe5';
+LOAD DATA LOCAL INPATH '../../data/files/pw17.txt' INTO TABLE PW18 PARTITION (YEAR='1');
+ALTER TABLE PW18 PARTITION(YEAR='1') SET SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe4';
+-- Without the fix HIVE-5202, will throw unsupported data type exception.
+SELECT * FROM PW18;
+
+-- Test for non-parititioned table. 
+DROP TABLE PW18_2;
+CREATE TABLE PW18_2(USER STRING, COMPLEXDT UNIONTYPE<INT, DOUBLE>) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.CustomSerDe5';
+LOAD DATA LOCAL INPATH '../../data/files/pw17.txt' INTO TABLE PW18_2;
+-- Without the fix HIVE-5202, will throw unsupported data type exception
+SELECT COUNT(*) FROM PW18_2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/pcr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/pcr.q
index 09a39ae4e4476..3be0ff23b8124 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/pcr.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/pcr.q
@@ -127,7 +127,7 @@ create table ab(strct struct<a:int, b:string>)
 row format delimited
   fields terminated by '\t'
   collection items terminated by '\001';
-load data local inpath '../data/files/kv1.txt'
+load data local inpath '../../data/files/kv1.txt'
 overwrite into table ab;
 
 -- Create partitioned table with struct data:
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_join4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_join4.q
new file mode 100644
index 0000000000000..475d45c19ff0c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_join4.q
@@ -0,0 +1,22 @@
+create table dual(a string);
+
+set hive.optimize.ppd=true;
+drop table if exists test_tbl ;
+
+create table test_tbl (id string,name string);
+
+insert into table test_tbl
+select 'a','b' from dual;
+
+explain
+select t2.* 
+from
+(select id,name from (select id,name from test_tbl) t1 sort by id) t2
+join test_tbl t3 on (t2.id=t3.id )
+where t2.name='c' and t3.id='a';
+
+select t2.* 
+from
+(select id,name from (select id,name from test_tbl) t1 sort by id) t2
+join test_tbl t3 on (t2.id=t3.id )
+where t2.name='c' and t3.id='a';
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_multi_insert.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_multi_insert.q
index a802df1b98819..06fe7ce580c84 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_multi_insert.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_multi_insert.q
@@ -10,18 +10,18 @@ FROM src a JOIN src b ON (a.key = b.key)
 INSERT OVERWRITE TABLE mi1 SELECT a.* WHERE a.key < 100
 INSERT OVERWRITE TABLE mi2 SELECT a.key, a.value WHERE a.key >= 100 and a.key < 200
 INSERT OVERWRITE TABLE mi3 PARTITION(ds='2008-04-08', hr='12') SELECT a.key WHERE a.key >= 200 and a.key < 300
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
 
 FROM src a JOIN src b ON (a.key = b.key)
 INSERT OVERWRITE TABLE mi1 SELECT a.* WHERE a.key < 100
 INSERT OVERWRITE TABLE mi2 SELECT a.key, a.value WHERE a.key >= 100 and a.key < 200
 INSERT OVERWRITE TABLE mi3 PARTITION(ds='2008-04-08', hr='12') SELECT a.key WHERE a.key >= 200 and a.key < 300
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
 
 SELECT mi1.* FROM mi1;
 SELECT mi2.* FROM mi2;
 SELECT mi3.* FROM mi3;
-dfs -cat ../build/ql/test/data/warehouse/mi4.out/*;
+dfs -cat ${system:test.warehouse.dir}/mi4.out/*;
 
 
 set hive.ppd.remove.duplicatefilters=true;
@@ -31,15 +31,15 @@ FROM src a JOIN src b ON (a.key = b.key)
 INSERT OVERWRITE TABLE mi1 SELECT a.* WHERE a.key < 100
 INSERT OVERWRITE TABLE mi2 SELECT a.key, a.value WHERE a.key >= 100 and a.key < 200
 INSERT OVERWRITE TABLE mi3 PARTITION(ds='2008-04-08', hr='12') SELECT a.key WHERE a.key >= 200 and a.key < 300
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
 
 FROM src a JOIN src b ON (a.key = b.key)
 INSERT OVERWRITE TABLE mi1 SELECT a.* WHERE a.key < 100
 INSERT OVERWRITE TABLE mi2 SELECT a.key, a.value WHERE a.key >= 100 and a.key < 200
 INSERT OVERWRITE TABLE mi3 PARTITION(ds='2008-04-08', hr='12') SELECT a.key WHERE a.key >= 200 and a.key < 300
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/mi4.out' SELECT a.value WHERE a.key >= 300;
 
 SELECT mi1.* FROM mi1;
 SELECT mi2.* FROM mi2;
 SELECT mi3.* FROM mi3;
-dfs -cat ../build/ql/test/data/warehouse/mi4.out/*;
+dfs -cat ${system:test.warehouse.dir}/mi4.out/*;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_transform.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_transform.q
index 65a498d021f77..530ef9c4d849a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_transform.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_transform.q
@@ -36,3 +36,12 @@ FROM (
   CLUSTER BY tkey 
 ) tmap
 SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100;
+
+-- test described in HIVE-4598
+
+EXPLAIN
+FROM (
+    FROM ( SELECT * FROM src ) mapout REDUCE * USING 'cat' AS x,y
+) reduced
+insert overwrite local directory '/tmp/a' select * where x='a' or x='b'
+insert overwrite local directory '/tmp/b' select * where x='c' or x='d';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_udtf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_udtf.q
new file mode 100644
index 0000000000000..d90532cfa4bcd
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_udtf.q
@@ -0,0 +1,12 @@
+explain
+SELECT value from (
+  select explode(array(key, value)) as (value) from (
+    select * FROM src WHERE key > 400
+  ) A
+) B WHERE value < 450;
+
+SELECT value from (
+  select explode(array(key, value)) as (value) from (
+    select * FROM src WHERE key > 400
+  ) A
+) B WHERE value < 450;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_union_view.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_union_view.q
index d635e2d171469..a7606c5a5f2a5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_union_view.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppd_union_view.q
@@ -5,26 +5,26 @@ drop view v;
 create table t1_new (key string, value string) partitioned by (ds string);
 
 insert overwrite table t1_new partition (ds = '2011-10-15')
-select 'key1', 'value1' from src limit 1;
+select 'key1', 'value1' from src tablesample (1 rows);
 
 insert overwrite table t1_new partition (ds = '2011-10-16')
-select 'key2', 'value2' from src limit 1;
+select 'key2', 'value2' from src tablesample (1 rows);
 
 create table t1_old (keymap string, value string) partitioned by (ds string);
 
 insert overwrite table t1_old partition (ds = '2011-10-13')
-select 'keymap3', 'value3' from src limit 1;
+select 'keymap3', 'value3' from src tablesample (1 rows);
 
 insert overwrite table t1_old partition (ds = '2011-10-14')
-select 'keymap4', 'value4' from src limit 1;
+select 'keymap4', 'value4' from src tablesample (1 rows);
 
 create table t1_mapping (key string, keymap string) partitioned by (ds string);
 
 insert overwrite table t1_mapping partition (ds = '2011-10-13')
-select 'key3', 'keymap3' from src limit 1;
+select 'key3', 'keymap3' from src tablesample (1 rows);
 
 insert overwrite table t1_mapping partition (ds = '2011-10-14')
-select 'key4', 'keymap4' from src limit 1;
+select 'key4', 'keymap4' from src tablesample (1 rows);
 
 
 create view t1 partitioned on (ds) as
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppr_pushdown.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppr_pushdown.q
index 860dd631ce10a..440005fdee951 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppr_pushdown.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppr_pushdown.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 create table ppr_test (key string) partitioned by (ds string);
 
 alter table ppr_test add partition (ds = '1234');
@@ -9,14 +11,14 @@ alter table ppr_test add partition (ds = '12:4');
 alter table ppr_test add partition (ds = '12%4');
 alter table ppr_test add partition (ds = '12*4');
 
-insert overwrite table ppr_test partition(ds = '1234') select * from (select '1234' from src limit 1 union all select 'abcd' from src limit 1) s;
-insert overwrite table ppr_test partition(ds = '1224') select * from (select '1224' from src limit 1 union all select 'abcd' from src limit 1) s;
-insert overwrite table ppr_test partition(ds = '1214') select * from (select '1214' from src limit 1 union all select 'abcd' from src limit 1) s;
-insert overwrite table ppr_test partition(ds = '12+4') select * from (select '12+4' from src limit 1 union all select 'abcd' from src limit 1) s;
-insert overwrite table ppr_test partition(ds = '12.4') select * from (select '12.4' from src limit 1 union all select 'abcd' from src limit 1) s;
-insert overwrite table ppr_test partition(ds = '12:4') select * from (select '12:4' from src limit 1 union all select 'abcd' from src limit 1) s;
-insert overwrite table ppr_test partition(ds = '12%4') select * from (select '12%4' from src limit 1 union all select 'abcd' from src limit 1) s;
-insert overwrite table ppr_test partition(ds = '12*4') select * from (select '12*4' from src limit 1 union all select 'abcd' from src limit 1) s;
+insert overwrite table ppr_test partition(ds = '1234') select * from (select '1234' from src tablesample (1 rows) union all select 'abcd' from src tablesample (1 rows)) s;
+insert overwrite table ppr_test partition(ds = '1224') select * from (select '1224' from src tablesample (1 rows) union all select 'abcd' from src tablesample (1 rows)) s;
+insert overwrite table ppr_test partition(ds = '1214') select * from (select '1214' from src tablesample (1 rows) union all select 'abcd' from src tablesample (1 rows)) s;
+insert overwrite table ppr_test partition(ds = '12+4') select * from (select '12+4' from src tablesample (1 rows) union all select 'abcd' from src tablesample (1 rows)) s;
+insert overwrite table ppr_test partition(ds = '12.4') select * from (select '12.4' from src tablesample (1 rows) union all select 'abcd' from src tablesample (1 rows)) s;
+insert overwrite table ppr_test partition(ds = '12:4') select * from (select '12:4' from src tablesample (1 rows) union all select 'abcd' from src tablesample (1 rows)) s;
+insert overwrite table ppr_test partition(ds = '12%4') select * from (select '12%4' from src tablesample (1 rows) union all select 'abcd' from src tablesample (1 rows)) s;
+insert overwrite table ppr_test partition(ds = '12*4') select * from (select '12*4' from src tablesample (1 rows) union all select 'abcd' from src tablesample (1 rows)) s;
 
 
 select * from ppr_test where ds = '1234' order by key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppr_pushdown2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppr_pushdown2.q
index 67c0da0dfc591..8c6090653811b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppr_pushdown2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ppr_pushdown2.q
@@ -1,24 +1,26 @@
+set hive.fetch.task.conversion=more;
+
 create table ppr_test (key string) partitioned by (ds string);
 
-insert overwrite table ppr_test partition(ds='2') select '2' from src limit 1;
-insert overwrite table ppr_test partition(ds='22') select '22' from src limit 1;
+insert overwrite table ppr_test partition(ds='2') select '2' from src tablesample (1 rows);
+insert overwrite table ppr_test partition(ds='22') select '22' from src tablesample (1 rows);
 
 select * from ppr_test where ds = '2';
 select * from ppr_test where ds = '22';
 
 
 create table ppr_test2 (key string) partitioned by (ds string, s string);
-insert overwrite table ppr_test2 partition(ds='1', s='2') select '1' from src limit 1;
-insert overwrite table ppr_test2 partition(ds='2', s='1') select '2' from src limit 1;
+insert overwrite table ppr_test2 partition(ds='1', s='2') select '1' from src tablesample (1 rows);
+insert overwrite table ppr_test2 partition(ds='2', s='1') select '2' from src tablesample (1 rows);
 
 select * from ppr_test2 where s = '1';
 select * from ppr_test2 where ds = '1';
 
 
 create table ppr_test3 (key string) partitioned by (col string, ol string, l string);
-insert overwrite table ppr_test3 partition(col='1', ol='2', l = '3') select '1' from src limit 1;
-insert overwrite table ppr_test3 partition(col='1', ol='1', l = '2') select '2' from src limit 1;
-insert overwrite table ppr_test3 partition(col='1', ol='2', l = '1') select '3' from src limit 1;
+insert overwrite table ppr_test3 partition(col='1', ol='2', l = '3') select '1' from src tablesample (1 rows);
+insert overwrite table ppr_test3 partition(col='1', ol='1', l = '2') select '2' from src tablesample (1 rows);
+insert overwrite table ppr_test3 partition(col='1', ol='2', l = '1') select '3' from src tablesample (1 rows);
 
 select * from ppr_test3 where l = '1';
 select * from ppr_test3 where l = '2';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/progress_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/progress_1.q
index ad908a02ad181..22ee92634d123 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/progress_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/progress_1.q
@@ -2,7 +2,7 @@ set hive.heartbeat.interval=5;
 
 
 CREATE TABLE PROGRESS_1(key int, value string) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv6.txt' INTO TABLE PROGRESS_1;
+LOAD DATA LOCAL INPATH '../../data/files/kv6.txt' INTO TABLE PROGRESS_1;
 
 select count(1) from PROGRESS_1 t1 join PROGRESS_1 t2 on t1.key=t2.key;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf.q
index eea5415d682a1..d56b4123554c4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf.q
@@ -13,7 +13,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 --1. test1
 select p_mfgr, p_name, p_size,
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_decimal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_decimal.q
index 03f435e4539e2..9799534ff4a55 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_decimal.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_decimal.q
@@ -9,11 +9,11 @@ CREATE TABLE part(
     p_type STRING,
     p_size INT,
     p_container STRING,
-    p_retailprice DECIMAL,
+    p_retailprice DECIMAL(6,2),
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 -- 1. aggregate functions with decimal type
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_general_queries.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_general_queries.q
index 885c3b3d43e17..4fe9710d0d247 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_general_queries.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_general_queries.q
@@ -13,7 +13,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 -- 1. testNoPTFNoWindowing
 select p_mfgr, p_name, p_size
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_matchpath.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_matchpath.q
index 72eeb104d5298..0cde350f73693 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_matchpath.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_matchpath.q
@@ -10,7 +10,7 @@ ARR_DELAY float,
 FL_NUM string 
 );
 
-LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny;
+LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny;
 
 -- 1. basic Matchpath test
 select origin_city_name, fl_num, year, month, day_of_month, sz, tpath 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_rcfile.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_rcfile.q
index 535a233a9e3c4..a68c578848dcb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_rcfile.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_rcfile.q
@@ -12,7 +12,7 @@ CREATE TABLE part_rc(
     p_comment STRING
 )  STORED AS RCFILE ;
 
-LOAD DATA LOCAL INPATH '../data/files/part.rc' overwrite into table part_rc;
+LOAD DATA LOCAL INPATH '../../data/files/part.rc' overwrite into table part_rc;
 
 -- testWindowingPTFWithPartRC
 select p_mfgr, p_name, p_size, 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_register_tblfn.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_register_tblfn.q
index a2140cd049f2d..4b508e9df0449 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_register_tblfn.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_register_tblfn.q
@@ -10,7 +10,7 @@ ARR_DELAY float,
 FL_NUM string 
 );
 
-LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny;
+LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny;
 
 create temporary function matchpathtest as 'org.apache.hadoop.hive.ql.udf.ptf.MatchPath$MatchPathResolver';
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_seqfile.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_seqfile.q
index 4aa8ce11bec9c..c5d65f0efa211 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_seqfile.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ptf_seqfile.q
@@ -12,7 +12,7 @@ CREATE TABLE part_seq(
     p_comment STRING
 ) STORED AS SEQUENCEFILE ;
 
-LOAD DATA LOCAL INPATH '../data/files/part.seq' overwrite into table part_seq;
+LOAD DATA LOCAL INPATH '../../data/files/part.seq' overwrite into table part_seq;
 
 -- testWindowingPTFWithPartSeqFile
 select p_mfgr, p_name, p_size, 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q
index f198baa6e4c5b..57e8cc673cace 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q
@@ -1,3 +1,5 @@
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
 
 DROP TABLE lineitem;
 CREATE TABLE lineitem (L_ORDERKEY      INT,
@@ -19,7 +21,7 @@ CREATE TABLE lineitem (L_ORDERKEY      INT,
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY '|';
 
-LOAD DATA LOCAL INPATH '../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem;
+LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem;
 
 CREATE INDEX lineitem_lshipdate_idx ON TABLE lineitem(l_shipdate) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(l_shipdate)");
 ALTER INDEX lineitem_lshipdate_idx ON lineitem REBUILD;
@@ -156,7 +158,7 @@ DROP INDEX tbl_part_index on tblpart;
 DROP TABLE tblpart;
 
 CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'; 
-LOAD DATA LOCAL INPATH '../data/files/tbl.txt' OVERWRITE INTO TABLE tbl;
+LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl;
 
 CREATE INDEX tbl_key_idx ON TABLE tbl(key) AS 'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD IDXPROPERTIES("AGGREGATES"="count(key)");
 ALTER INDEX tbl_key_idx ON tbl REBUILD;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quote2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quote2.q
index 65b9f8776d192..c93902ab3e39c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quote2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quote2.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 SELECT
     'abc',        "abc",
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_alter.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_alter.q
new file mode 100644
index 0000000000000..a34a25af4bb96
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_alter.q
@@ -0,0 +1,21 @@
+
+set hive.support.quoted.identifiers=column;
+
+create table src_b3(`x+1` string, `!@#$%^&*()_q` string) ;
+
+alter table src_b3 
+clustered by (`!@#$%^&*()_q`) sorted by (`!@#$%^&*()_q`) into 2 buckets
+;
+
+
+-- alter partition
+create table src_p3(`x+1` string, `y&y` string) partitioned by (`!@#$%^&*()_q` string);
+
+insert overwrite table src_p3 partition(`!@#$%^&*()_q`='a') select * from src;
+show partitions src_p3;
+
+alter table src_p3 add if not exists partition(`!@#$%^&*()_q`='b');
+show partitions src_p3;
+
+alter table src_p3 partition(`!@#$%^&*()_q`='b') rename to partition(`!@#$%^&*()_q`='c');
+show partitions src_p3;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_basic.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_basic.q
new file mode 100644
index 0000000000000..680868e549cee
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_basic.q
@@ -0,0 +1,34 @@
+
+set hive.support.quoted.identifiers=column;
+
+-- basic
+create table t1(`x+1` string, `y&y` string, `!@#$%^&*()_q` string);
+describe t1;
+select `x+1`, `y&y`, `!@#$%^&*()_q` from t1;
+explain select `x+1`, `y&y`, `!@#$%^&*()_q` from t1;
+explain select `x+1`, `y&y`, `!@#$%^&*()_q` from t1 where `!@#$%^&*()_q` = '1';
+explain select `x+1`, `y&y`, `!@#$%^&*()_q` from t1 where `!@#$%^&*()_q` = '1' group by `x+1`, `y&y`, `!@#$%^&*()_q` having `!@#$%^&*()_q` = '1';
+explain select `x+1`, `y&y`, `!@#$%^&*()_q`, rank() over(partition by `!@#$%^&*()_q` order by  `y&y`)  
+from t1 where `!@#$%^&*()_q` = '1' group by `x+1`, `y&y`, `!@#$%^&*()_q` having `!@#$%^&*()_q` = '1';
+
+-- case insensitive
+explain select `X+1`, `Y&y`, `!@#$%^&*()_Q`, rank() over(partition by `!@#$%^&*()_q` order by  `y&y`)  
+from t1 where `!@#$%^&*()_q` = '1' group by `x+1`, `y&Y`, `!@#$%^&*()_q` having `!@#$%^&*()_Q` = '1';
+
+
+-- escaped back ticks
+create table t4(`x+1``` string, `y&y` string);
+describe t4;
+insert into table t4 select * from src;
+select `x+1```, `y&y`, rank() over(partition by `x+1``` order by  `y&y`)  
+from t4 where `x+1``` = '10' group by `x+1```, `y&y` having `x+1``` = '10';
+
+-- view
+create view v1 as 
+select `x+1```, `y&y`
+from t4 where `x+1``` < '200';
+
+select `x+1```, `y&y`, rank() over(partition by `x+1``` order by  `y&y`)
+from v1
+group by `x+1```, `y&y`
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_partition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_partition.q
new file mode 100644
index 0000000000000..e9416ae282228
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_partition.q
@@ -0,0 +1,24 @@
+
+set hive.support.quoted.identifiers=column;
+
+
+create table src_p(`x+1` string, `y&y` string) partitioned by (`!@#$%^&*()_q` string);
+insert overwrite table src_p partition(`!@#$%^&*()_q`='a') select * from src;
+
+show partitions src_p;
+
+explain select `x+1`, `y&y`, `!@#$%^&*()_q` 
+from src_p where `!@#$%^&*()_q` = 'a' and `x+1`='10'
+group by `x+1`, `y&y`, `!@#$%^&*()_q` having `!@#$%^&*()_q` = 'a'
+;
+
+set hive.exec.dynamic.partition.mode=nonstrict
+;
+
+create table src_p2(`x+1` string) partitioned by (`!@#$%^&*()_q` string);
+
+insert overwrite table src_p2 partition(`!@#$%^&*()_q`)
+select key, value as `!@#$%^&*()_q` from src where key < '200'
+;
+
+show partitions src_p2;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_skew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_skew.q
new file mode 100644
index 0000000000000..5c959674117f1
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_skew.q
@@ -0,0 +1,26 @@
+
+set hive.support.quoted.identifiers=column;
+
+set hive.mapred.supports.subdirectories=true;
+set hive.internal.ddl.list.bucketing.enable=true;
+set hive.optimize.skewjoin.compiletime = true;
+
+CREATE TABLE T1(`!@#$%^&*()_q` string, `y&y` string)
+SKEWED BY (`!@#$%^&*()_q`) ON ((2)) STORED AS TEXTFILE
+;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(`!@#$%^&*()_q` string, `y&y` string)
+SKEWED BY (`!@#$%^&*()_q`) ON ((2)) STORED AS TEXTFILE
+;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T2;
+
+-- a simple join query with skew on both the tables on the join key
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a. `!@#$%^&*()_q`  = b. `!@#$%^&*()_q` 
+;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_smb.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_smb.q
new file mode 100644
index 0000000000000..38d1b99c4b8e6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_smb.q
@@ -0,0 +1,34 @@
+
+set hive.support.quoted.identifiers=column;
+
+
+set hive.enforce.bucketing = true;  
+set hive.enforce.sorting = true;  
+create table src_b(`x+1` string, `!@#$%^&*()_q` string)  
+clustered by (`!@#$%^&*()_q`) sorted by (`!@#$%^&*()_q`) into 2 buckets
+;
+
+insert overwrite table src_b
+select * from src
+;
+
+create table src_b2(`x+1` string, `!@#$%^&*()_q` string)  
+clustered by (`!@#$%^&*()_q`) sorted by (`!@#$%^&*()_q`) into 2 buckets
+;
+
+insert overwrite table src_b2
+select * from src
+;
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
+
+select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q`
+from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q`
+where a.`x+1` < '11'
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_tblproperty.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_tblproperty.q
new file mode 100644
index 0000000000000..d64e9cb9d524d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/quotedid_tblproperty.q
@@ -0,0 +1,8 @@
+ADD JAR ${system:maven.local.repository}/org/apache/hive/hive-it-test-serde/${system:hive.version}/hive-it-test-serde-${system:hive.version}.jar;
+
+CREATE TABLE xyz(KEY STRING, VALUE STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.TestSerDe' 
+STORED AS TEXTFILE
+TBLPROPERTIES('columns'='valid_colname,invalid.colname')
+;
+
+describe xyz;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/rcfile_bigdata.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/rcfile_bigdata.q
index 3e83e6693b276..df460c89aa896 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/rcfile_bigdata.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/rcfile_bigdata.q
@@ -1,7 +1,7 @@
 set hive.map.aggr.hash.percentmemory = 0.3;
 set hive.mapred.local.mem = 256;
 
-add file ../data/scripts/dumpdata_script.py;
+add file ../../data/scripts/dumpdata_script.py;
 
 CREATE table columnTable_Bigdata (key STRING, value STRING)
 ROW FORMAT SERDE
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/regex_col.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/regex_col.q
index 9cfcee5a599c6..1c311fc478203 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/regex_col.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/regex_col.q
@@ -1,3 +1,5 @@
+set hive.support.quoted.identifiers=none;
+
 EXPLAIN
 SELECT * FROM srcpart;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/remote_script.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/remote_script.q
index 926601c647bc3..c4fcaaf95b6a1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/remote_script.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/remote_script.q
@@ -1,4 +1,4 @@
-dfs -put ../data/scripts/newline.py /newline.py;
+dfs -put ../../data/scripts/newline.py /newline.py;
 add file hdfs:///newline.py;
 set hive.transform.escape.input=true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/repair.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/repair.q
index 8d04d3e991c59..df199b0d7765b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/repair.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/repair.q
@@ -1,10 +1,12 @@
+DROP TABLE IF EXISTS repairtable;
+
 CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING);
 
 MSCK TABLE repairtable;
 
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/warehouse/repairtable/p1=a/p2=a;
-dfs ${system:test.dfs.mkdir} ../build/ql/test/data/warehouse/repairtable/p1=b/p2=a;
-dfs -touchz ../build/ql/test/data/warehouse/repairtable/p1=b/p2=a/datafile;
+dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=a/p2=a;
+dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=b/p2=a;
+dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=b/p2=a/datafile;
 
 MSCK TABLE repairtable;
 
@@ -12,4 +14,4 @@ MSCK REPAIR TABLE repairtable;
 
 MSCK TABLE repairtable;
 
-
+DROP TABLE repairtable;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/root_dir_external_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/root_dir_external_table.q
new file mode 100644
index 0000000000000..a0514c86ff211
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/root_dir_external_table.q
@@ -0,0 +1,11 @@
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test_root_dir_external_table;
+
+insert overwrite directory "hdfs:///tmp/test_root_dir_external_table" select key from src where (key < 20) order by key;
+
+dfs -cp /tmp/test_root_dir_external_table/000000_0 /000000_0;
+dfs -rmr hdfs:///tmp/test_root_dir_external_table;
+
+create external table roottable (key string) row format delimited fields terminated by '\\t' stored as textfile location 'hdfs:///';
+select count(*) from roottable;
+
+dfs -rmr /000000_0;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/schemeAuthority2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/schemeAuthority2.q
index ecd4d13d0e23b..b3c38bf577199 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/schemeAuthority2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/schemeAuthority2.q
@@ -1,5 +1,5 @@
-dfs -mkdir file:///tmp/test;
-dfs -mkdir hdfs:///tmp/test;
+dfs ${system:test.dfs.mkdir} file:///tmp/test;
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test;
 
 create external table dynPart (key string) partitioned by (value string, value2 string) row format delimited fields terminated by '\\t' stored as textfile;
 insert overwrite local directory "/tmp/test" select key from src where (key = 10) order by key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/scriptfile1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/scriptfile1.q
index 4f65016f3081c..2dfb12951f9d3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/scriptfile1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/scriptfile1.q
@@ -1,7 +1,9 @@
 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+-- EXCLUDE_OS_WINDOWS
 CREATE TABLE dest1(key INT, value STRING);
 
-ADD FILE src/test/scripts/testgrep;
+ADD FILE ../../ql/src/test/scripts/testgrep;
 
 FROM (
   FROM src
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/scriptfile1_win.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/scriptfile1_win.q
new file mode 100644
index 0000000000000..0008ae51c4365
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/scriptfile1_win.q
@@ -0,0 +1,16 @@
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+-- INCLUDE_OS_WINDOWS
+
+CREATE TABLE dest1(key INT, value STRING);
+
+ADD FILE src/test/scripts/testgrep_win.bat;
+
+FROM (
+  FROM src
+  SELECT TRANSFORM(src.key, src.value)
+         USING 'testgrep_win.bat' AS (tkey, tvalue)
+  CLUSTER BY tkey
+) tmap
+INSERT OVERWRITE TABLE dest1 SELECT tmap.tkey, tmap.tvalue;
+
+SELECT dest1.* FROM dest1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/select_dummy_source.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/select_dummy_source.q
new file mode 100644
index 0000000000000..25a1a81283221
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/select_dummy_source.q
@@ -0,0 +1,33 @@
+explain
+select 'a', 100;
+select 'a', 100;
+
+--evaluation
+explain
+select 1 + 1;
+select 1 + 1;
+
+-- explode (not possible for lateral view)
+explain
+select explode(array('a', 'b'));
+select explode(array('a', 'b'));
+
+set hive.fetch.task.conversion=more;
+
+explain
+select 'a', 100;
+select 'a', 100;
+
+explain
+select 1 + 1;
+select 1 + 1;
+
+explain
+select explode(array('a', 'b'));
+select explode(array('a', 'b'));
+
+-- subquery
+explain
+select 2 + 3,x from (select 1 + 2 x) X;
+select 2 + 3,x from (select 1 + 2 x) X;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/serde_regex.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/serde_regex.q
index 2a287bd877759..accdb54744cc1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/serde_regex.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/serde_regex.q
@@ -31,8 +31,8 @@ WITH SERDEPROPERTIES (
 )
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH "../data/files/apache.access.log" INTO TABLE serde_regex;
-LOAD DATA LOCAL INPATH "../data/files/apache.access.2.log" INTO TABLE serde_regex;
+LOAD DATA LOCAL INPATH "../../data/files/apache.access.log" INTO TABLE serde_regex;
+LOAD DATA LOCAL INPATH "../../data/files/apache.access.2.log" INTO TABLE serde_regex;
 
 SELECT * FROM serde_regex ORDER BY time;
 
@@ -42,7 +42,7 @@ DROP TABLE serde_regex;
 
 EXPLAIN
 CREATE TABLE serde_regex1(
-  key decimal,
+  key decimal(38,18),
   value int)
 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
 WITH SERDEPROPERTIES (
@@ -51,7 +51,7 @@ WITH SERDEPROPERTIES (
 STORED AS TEXTFILE;
 
 CREATE TABLE serde_regex1(
-  key decimal,
+  key decimal(38,18),
   value int)
 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
 WITH SERDEPROPERTIES (
@@ -59,7 +59,7 @@ WITH SERDEPROPERTIES (
 )
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH "../data/files/kv7.txt" INTO TABLE serde_regex1;
+LOAD DATA LOCAL INPATH "../../data/files/kv7.txt" INTO TABLE serde_regex1;
 
 SELECT key, value FROM serde_regex1 ORDER BY key, value;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/set_processor_namespaces.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/set_processor_namespaces.q
index 7e3d1f4d8aa41..d10239c31af62 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/set_processor_namespaces.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/set_processor_namespaces.q
@@ -24,7 +24,7 @@ set b=a;
 set c=${hiveconf:${hiveconf:b}};
 set c;
 
-set jar=${system:build.ivy.lib.dir}/default/derby-${system:derby.version}.jar;
+set jar=${system:maven.local.repository}/org/apache/derby/derby/${system:derby.version}/derby-${system:derby.version}.jar;
 
 add file ${hiveconf:jar};
 delete file ${hiveconf:jar};
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_indexes_edge_cases.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_indexes_edge_cases.q
index 5fcdf97e2db47..9758c16caa5ad 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_indexes_edge_cases.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_indexes_edge_cases.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 DROP TABLE show_idx_empty;
 DROP TABLE show_idx_full;
 
@@ -24,4 +25,4 @@ SHOW INDEXES ON show_idx_empty;
 DROP INDEX idx_1 on show_idx_full;
 DROP INDEX idx_2 on show_idx_full;
 DROP TABLE show_idx_empty;
-DROP TABLE show_idx_full;
\ No newline at end of file
+DROP TABLE show_idx_full;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_indexes_syntax.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_indexes_syntax.q
index ab588937e179b..bb43c5e1387a9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_indexes_syntax.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_indexes_syntax.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 DROP TABLE show_idx_t1;
 
 CREATE TABLE show_idx_t1(KEY STRING, VALUE STRING);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_partitions.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_partitions.q
index 7fa7b828bd72e..1fc1d8e1f2a8b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_partitions.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_partitions.q
@@ -1,4 +1,5 @@
 SHOW PARTITIONS srcpart;
+SHOW PARTITIONS default.srcpart;
 SHOW PARTITIONS srcpart PARTITION(hr='11');
 SHOW PARTITIONS srcpart PARTITION(ds='2008-04-08');
-SHOW PARTITIONS srcpart PARTITION(ds='2008-04-08', hr='12');
\ No newline at end of file
+SHOW PARTITIONS srcpart PARTITION(ds='2008-04-08', hr='12');
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_roles.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_roles.q
new file mode 100644
index 0000000000000..d8ce96a37d7a8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_roles.q
@@ -0,0 +1,4 @@
+create role role1;
+create role role2;
+
+show roles;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_tablestatus.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_tablestatus.q
index 9184d6da897c0..55fb7b67ffc83 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_tablestatus.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/show_tablestatus.q
@@ -1,3 +1,4 @@
+set hive.support.quoted.identifiers=none;
 EXPLAIN 
 SHOW TABLE EXTENDED IN default LIKE `src`;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin.q
index ad917beeef9ce..47535eab638d9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin.q
@@ -13,10 +13,10 @@ CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T4(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T4;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T4;
 
 
 EXPLAIN
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_noskew.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_noskew.q
new file mode 100644
index 0000000000000..b8ca592ab70a1
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_noskew.q
@@ -0,0 +1,9 @@
+set hive.auto.convert.join=false;
+set hive.optimize.skewjoin=true;
+
+explain
+create table noskew as select a.* from src a join src b on a.key=b.key order by a.key limit 30;
+
+create table noskew as select a.* from src a join src b on a.key=b.key order by a.key limit 30;
+
+select * from noskew;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q
index 03eab4cd6d54e..fc07742cd7422 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q
@@ -20,12 +20,12 @@ set mapred.input.dir.recursive=true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- a simple join query with skew on both the tables on the join key
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q
index 9cb919531f7c5..50cfc61962af9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q
@@ -12,16 +12,16 @@ set mapred.input.dir.recursive=true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
 
 -- This is to test the union->selectstar->filesink and skewjoin optimization
 -- Union of 3 map-reduce subqueries is performed for the skew join
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt1.q
index af446bb65c08e..504ba8be2a29e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt1.q
@@ -5,12 +5,12 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- a simple join query with skew on both the tables on the join key
 -- adding a order by at the end to make the results deterministic
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt10.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt10.q
index 199f3201afb77..f35af901704ef 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt10.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt10.q
@@ -4,7 +4,7 @@ set hive.optimize.skewjoin.compiletime = true;
 
 CREATE TABLE T1(key STRING, value STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 drop table array_valued_T1;
 create table array_valued_T1 (key string, value array<string>) SKEWED BY (key) ON ((8));
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt11.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt11.q
index ef61fb22f17a1..9e00bdcd76080 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt11.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt11.q
@@ -5,11 +5,11 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)	
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;	
        
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;	
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;	
      
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;	
        
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;	
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;	
      
 -- This test is to verify the skew join compile optimization when the join is followed
 -- by a union. Both sides of a union consist of a join, which should have used
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt12.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt12.q
index b5d9d9bc46874..171995069b77b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt12.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt12.q
@@ -5,12 +5,12 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key, val) ON ((2, 12), (8, 18)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key, val) ON ((3, 13), (8, 18)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- Both the join tables are skewed by 2 keys, and one of the skewed values
 -- is common to both the tables. The join key matches the skewed key set.
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt13.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt13.q
index 0634c4f4ff6db..5ef217c90064a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt13.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt13.q
@@ -4,16 +4,16 @@ set hive.optimize.skewjoin.compiletime = true;
 
 CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 CREATE TABLE T3(key STRING, val STRING)
 SKEWED BY (val) ON ((12)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
 
 -- This test is for skewed join compile time optimization for more than 2 tables.
 -- The join key for table 3 is different from the join key used for joining
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt14.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt14.q
index 0f031dd4fc0ae..df1a26bcc7d9d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt14.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt14.q
@@ -5,16 +5,16 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 CREATE TABLE T3(key STRING, val STRING)
 SKEWED BY (val) ON ((12)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
 
 -- This test is for skewed join compile time optimization for more than 2 tables.
 -- The join key for table 3 is different from the join key used for joining
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt15.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt15.q
index d5474a455e9dd..1db5472396db1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt15.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt15.q
@@ -4,7 +4,7 @@ set hive.optimize.skewjoin.compiletime = true;
 
 CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE tmpT1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1;
 
 -- testing skew on other data types - int
 CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2));
@@ -12,7 +12,7 @@ INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1;
 
 CREATE TABLE tmpT2(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE tmpT2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE tmpT2;
 
 CREATE TABLE T2(key INT, val STRING) SKEWED BY (key) ON ((3));
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt16.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt16.q
index 46b4f6d6e600d..915de612ded53 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt16.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt16.q
@@ -5,12 +5,12 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- One of the tables is skewed by 2 columns, and the other table is
 -- skewed by one column. Ths join is performed on the both the columns
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt17.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt17.q
index 0592ca8c3e498..2ee79cc758531 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt17.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt17.q
@@ -5,12 +5,12 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- One of the tables is skewed by 2 columns, and the other table is
 -- skewed by one column. Ths join is performed on the first skewed column
@@ -31,12 +31,12 @@ DROP TABLE T2;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- One of the tables is skewed by 2 columns, and the other table is
 -- skewed by one column. Ths join is performed on the both the columns
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt18.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt18.q
index 433fea336dfd4..9d06cc030699a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt18.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt18.q
@@ -4,7 +4,7 @@ set hive.optimize.skewjoin.compiletime = true;
 
 CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE tmpT1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1;
 
 -- testing skew on other data types - int
 CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2));
@@ -16,7 +16,7 @@ INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1;
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- Once HIVE-3445 is fixed, the compile time skew join optimization would be
 -- applicable here. Till the above jira is fixed, it would be performed as a
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt19.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt19.q
index 0b11ebe4cb696..075645f89d452 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt19.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt19.q
@@ -6,11 +6,11 @@ CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key) INTO 4 BUCKETS
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- add a test where the skewed key is also the bucketized key
 -- it should not matter, and the compile time skewed join
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt2.q
index 34fcdbfac4cb7..f7acaad18e1ea 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt2.q
@@ -5,12 +5,12 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2), (7)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- a simple query with skew on both the tables on the join key
 -- multiple skew values are present for the skewed keys
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt20.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt20.q
index f217052881e24..9b908ce21b1d2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt20.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt20.q
@@ -6,11 +6,11 @@ CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- add a test where the skewed key is also the bucketized/sorted key
 -- it should not matter, and the compile time skewed join
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt3.q
index f6002ad49802a..22ea4f06218ac 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt3.q
@@ -5,12 +5,12 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- a simple query with skew on both the tables. One of the skewed
 -- value is common to both the tables. The skewed value should not be
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt4.q
index ca83c446085fa..8496b1aa79c0a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt4.q
@@ -5,11 +5,11 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- only of the tables of the join (the left table of the join) is skewed
 -- the skewed filter would still be applied to both the tables
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt5.q
index 3d7884c5e3dcb..152de5bde72c5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt5.q
@@ -5,12 +5,12 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- One of the tables is skewed by 2 columns, and the other table is
 -- skewed by one column. Ths join is performed on the first skewed column
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt6.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt6.q
index 36cf8ceeaebb4..2e261bde66bbf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt6.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt6.q
@@ -5,12 +5,12 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key, val) ON ((2, 12), (8, 18)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key, val) ON ((3, 13), (8, 18)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- Both the join tables are skewed by 2 keys, and one of the skewed values
 -- is common to both the tables. The join key is a subset of the skewed key set:
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt7.q
index cf84f67b6a0fc..e4d9605f6f7af 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt7.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt7.q
@@ -5,16 +5,16 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
 
 -- This test is for validating skewed join compile time optimization for more than
 -- 2 tables. The join key is the same, and so a 3-way join would be performed.
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt8.q
index d0ac845f86581..85746d9611dab 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt8.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt8.q
@@ -4,16 +4,16 @@ set hive.optimize.skewjoin.compiletime = true;
 
 CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING)
 SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
 
 -- This test is for validating skewed join compile time optimization for more than
 -- 2 tables. The join key is the same, and so a 3-way join would be performed.
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt9.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt9.q
index 04834033a11e6..889ab6c3f5534 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt9.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/skewjoinopt9.q
@@ -5,11 +5,11 @@ set hive.optimize.skewjoin.compiletime = true;
 CREATE TABLE T1(key STRING, val STRING)
 SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
 
 -- no skew join compile time optimization would be performed if one of the
 -- join sources is a sub-query consisting of a union all
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_1.q
index 359513e424dba..9dee4110f5991 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_1.q
@@ -6,9 +6,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k
 create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; 
 create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE;
 
-load data local inpath '../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
-load data local inpath '../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
-load data local inpath '../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
+load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
+load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
+load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
 
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_10.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_10.q
index a79ebf62d0693..1fbe2090eaf2a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_10.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_10.q
@@ -6,11 +6,11 @@ alter table tmp_smb_bucket_10 add partition (ds = '2');
 
 -- add dummy files to make sure that the number of files in each partition is same as number of buckets
  
-load data local inpath '../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1');
-load data local inpath '../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1');
+load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1');
+load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1');
 
-load data local inpath '../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2');
-load data local inpath '../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2');
+load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2');
+load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2');
 
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_2.q
index 9d86314879d68..e2b24333ad416 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_2.q
@@ -6,9 +6,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k
 create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; 
 create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE;
 
-load data local inpath '../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
-load data local inpath '../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
-load data local inpath '../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
+load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
+load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
+load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
  
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_25.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_25.q
index 8b534e85aee1d..e43174bc0768b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_25.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_25.q
@@ -10,9 +10,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k
 create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; 
 create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE;
 
-load data local inpath '../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
-load data local inpath '../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
-load data local inpath '../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
+load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
+load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
+load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
 
 explain 
 select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_3.q
index 73b21fae250e0..b379706cc8ac5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_3.q
@@ -6,9 +6,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k
 create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; 
 create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE;
 
-load data local inpath '../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
-load data local inpath '../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
-load data local inpath '../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
+load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
+load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
+load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
 
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_4.q
index 83143b170ed5c..2b3f67ea4eea1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_4.q
@@ -6,9 +6,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k
 create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; 
 create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE;
 
-load data local inpath '../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
-load data local inpath '../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
-load data local inpath '../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
+load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
+load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
+load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
 
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_5.q
index 61ec084f64ffa..406604e621ad0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_5.q
@@ -6,9 +6,9 @@ create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (k
 create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; 
 create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE;
 
-load data local inpath '../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
-load data local inpath '../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
-load data local inpath '../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
+load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1;
+load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2;
+load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3;
 
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_7.q
index 1488b1f949527..ca1c7491b7298 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_7.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_7.q
@@ -15,8 +15,8 @@ create table smb_join_results(k1 int, v1 string, k2 int, v2 string);
 create table smb_join_results_empty_bigtable(k1 int, v1 string, k2 int, v2 string);
 create table normal_join_results(k1 int, v1 string, k2 int, v2 string);
 
-load data local inpath '../data/files/empty1.txt' into table smb_bucket4_1;
-load data local inpath '../data/files/empty2.txt' into table smb_bucket4_1;
+load data local inpath '../../data/files/empty1.txt' into table smb_bucket4_1;
+load data local inpath '../../data/files/empty2.txt' into table smb_bucket4_1;
 
 insert overwrite table smb_bucket4_2
 select * from src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_8.q
index 6f282ed441bfa..f296057d43e38 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_8.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_8.q
@@ -5,7 +5,7 @@ set hive.exec.reducers.max = 1;
 
 
 create table smb_bucket_input (key int, value string) stored as rcfile;
-load data local inpath '../data/files/smb_bucket_input.rc' into table smb_bucket_input;
+load data local inpath '../../data/files/smb_bucket_input.rc' into table smb_bucket_input;
 
 
 CREATE TABLE smb_bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/source.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/source.q
index 6fe3d211a00b0..76ca152ef55f4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/source.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/source.q
@@ -1 +1 @@
-source ../data/files/source.txt;
+source ../../data/files/source.txt;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/split.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/split.q
new file mode 100644
index 0000000000000..f5d7ff8fdd9bf
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/split.q
@@ -0,0 +1,8 @@
+DROP TABLE tmp_jo_tab_test;
+CREATE table tmp_jo_tab_test (message_line STRING)
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/input.txt'
+OVERWRITE INTO TABLE tmp_jo_tab_test;
+
+select size(split(message_line, '\t')) from tmp_jo_tab_test;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats1.q
index 0b783de153b29..359d27b31523a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats1.q
@@ -26,5 +26,5 @@ DESCRIBE FORMATTED tmptable;
 -- Load a file into a existing table
 -- Some stats (numFiles, totalSize) should be updated correctly
 -- Some other stats (numRows, rawDataSize) should be cleared
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE tmptable;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE tmptable;
 DESCRIBE FORMATTED tmptable;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats11.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats11.q
index 6618c913ea700..d037c003b7582 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats11.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats11.q
@@ -2,25 +2,25 @@ set datanucleus.cache.collections=false;
 set hive.stats.autogather=true;
 
 CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
 
 CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
 explain
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 
 desc formatted srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 desc formatted srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 desc formatted srcbucket_mapjoin_part partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
 desc formatted srcbucket_mapjoin_part partition(ds='2008-04-08');
 
 CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
 
 create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint);
 create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats18.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats18.q
index 425de64c26e83..e773cd749403f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats18.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats18.q
@@ -13,7 +13,7 @@ insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select
 -- Some other stats (numRows, rawDataSize) should be cleared
 desc formatted stats_part partition (ds='2010-04-08', hr='13');
 
-load data local inpath '../data/files/srcbucket20.txt' INTO TABLE stats_part partition (ds='2010-04-08', hr='13');
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE stats_part partition (ds='2010-04-08', hr='13');
 
 desc formatted stats_part partition (ds='2010-04-08', hr='13');
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats19.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats19.q
index da4af9655d16a..51514bd7738ff 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats19.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats19.q
@@ -1,7 +1,7 @@
 set datanucleus.cache.collections=false;
 set hive.stats.autogather=true;
 set hive.stats.reliable=true;
-set hive.stats.dbclass=dummy;
+set hive.stats.dbclass=custom;
 set hive.stats.default.publisher=org.apache.hadoop.hive.ql.stats.DummyStatsPublisher;
 set hive.stats.default.aggregator=org.apache.hadoop.hive.ql.stats.KeyVerifyingStatsAggregator;
 
@@ -56,7 +56,7 @@ insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select
 desc formatted stats_part partition (ds='2010-04-08', hr = '13');
 
 
-set hive.stats.dbclass=dummy;
+set hive.stats.dbclass=custom;
 set hive.stats.default.publisher=org.apache.hadoop.hive.ql.stats.DummyStatsPublisher;
 set hive.stats.default.aggregator=org.apache.hadoop.hive.ql.stats.KeyVerifyingStatsAggregator;
 set hive.stats.key.prefix.max.length=0;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats3.q
index 5962348d9c317..fd7e0eaca8c39 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats3.q
@@ -5,9 +5,9 @@ drop table hive_test_dst;
 
 create table hive_test_src ( col1 string ) stored as textfile ;
 explain extended
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src ;
 
-load data local inpath '../data/files/test.dat' overwrite into table hive_test_src ;
+load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src ;
 
 desc formatted hive_test_src;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats4.q
index 62580042d4ded..80a67f405cb0b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats4.q
@@ -25,8 +25,8 @@ insert overwrite table nzhang_part2 partition(ds='2008-12-31', hr) select key, v
 show partitions nzhang_part1;
 show partitions nzhang_part2;
 
-select * from nzhang_part1 where ds is not null and hr is not null;
-select * from nzhang_part2 where ds is not null and hr is not null;
+select * from nzhang_part1 where ds is not null and hr is not null order by ds, hr, key;
+select * from nzhang_part2 where ds is not null and hr is not null order by ds, hr, key;
 
 describe formatted nzhang_part1 partition(ds='2008-04-08',hr=11);
 describe formatted nzhang_part1 partition(ds='2008-04-08',hr=12);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_aggregator_error_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_aggregator_error_1.q
index 4e7d3dc547a30..5e6b0aaa1253b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_aggregator_error_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_aggregator_error_1.q
@@ -1,12 +1,12 @@
 -- In this test, there is a dummy stats aggregator which throws an error when various
--- methods are called (as indicated by the parameter hive.test.dummystats.agregator)
+-- methods are called (as indicated by the parameter hive.test.dummystats.aggregator)
 -- Since stats need not be reliable (by setting hive.stats.reliable to false), the 
 -- insert statements succeed. The insert statement succeeds even if the stats aggregator
 -- is set to null, since stats need not be reliable.
 
 create table tmptable(key string, value string);
 
-set hive.stats.dbclass=dummy;
+set hive.stats.dbclass=custom;
 set hive.stats.default.publisher=org.apache.hadoop.hive.ql.stats.DummyStatsPublisher;
 set hive.stats.default.aggregator=org.apache.hadoop.hive.ql.stats.DummyStatsAggregator;
 set hive.stats.reliable=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_counter.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_counter.q
new file mode 100644
index 0000000000000..3c1f132a68f2f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_counter.q
@@ -0,0 +1,16 @@
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=false;
+
+-- by analyze
+create table dummy1 as select * from src;
+
+analyze table dummy1 compute statistics;
+desc formatted dummy1;
+
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
+
+-- by autogather
+create table dummy2 as select * from src;
+
+desc formatted dummy2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_counter_partitioned.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_counter_partitioned.q
new file mode 100644
index 0000000000000..e1274c0cb5197
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_counter_partitioned.q
@@ -0,0 +1,45 @@
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+-- partitioned table analyze 
+
+create table dummy (key string, value string) partitioned by (ds string, hr string);
+
+load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12');
+load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11');
+
+analyze table dummy partition (ds,hr) compute statistics;
+describe formatted dummy partition (ds='2008', hr='11');
+describe formatted dummy partition (ds='2008', hr='12');
+
+drop table dummy;
+
+-- static partitioned table on insert
+
+create table dummy (key string, value string) partitioned by (ds string, hr string);
+
+insert overwrite table dummy partition (ds='10',hr='11') select * from src;
+insert overwrite table dummy partition (ds='10',hr='12') select * from src;
+
+describe formatted dummy partition (ds='10', hr='11');
+describe formatted dummy partition (ds='10', hr='12');
+
+drop table dummy;
+
+-- dynamic partitioned table on insert
+
+create table dummy (key int) partitioned by (hr int);
+                                                                                                      
+CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|';
+LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl;                           
+                                                                                                      
+insert overwrite table dummy partition (hr) select * from tbl;
+
+describe formatted dummy partition (hr=1997);
+describe formatted dummy partition (hr=1994);
+describe formatted dummy partition (hr=1998);
+describe formatted dummy partition (hr=1996);
+
+drop table tbl;
+drop table dummy; 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_invalidation.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_invalidation.q
new file mode 100644
index 0000000000000..a7fce6e3e503f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_invalidation.q
@@ -0,0 +1,15 @@
+set hive.stats.autogather=true;
+
+CREATE TABLE stats_invalid (key string, value string);
+
+insert overwrite table stats_invalid
+select * from src;
+
+analyze table stats_invalid compute statistics for columns key,value;
+
+desc formatted stats_invalid;
+alter table stats_invalid add  columns (new_col string);
+
+desc formatted stats_invalid;
+drop table stats_invalid;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_list_bucket.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_list_bucket.q
new file mode 100644
index 0000000000000..5982643741548
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_list_bucket.q
@@ -0,0 +1,45 @@
+
+set hive.mapred.supports.subdirectories=true;
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+drop table stats_list_bucket;
+drop table stats_list_bucket_1;
+
+create table stats_list_bucket (
+  c1 string,
+  c2 string
+) partitioned by (ds string, hr string)
+skewed by (c1, c2) on  (('466','val_466'),('287','val_287'),('82','val_82'))
+stored as directories
+stored as rcfile;
+
+set hive.stats.key.prefix.max.length=1;
+
+-- Make sure we use hashed IDs during stats publishing.
+-- Try partitioned table with list bucketing.
+-- The stats should show 500 rows loaded, as many rows as the src table has.
+
+insert overwrite table stats_list_bucket partition (ds = '2008-04-08',  hr = '11')
+  select key, value from src;
+
+desc formatted stats_list_bucket partition (ds = '2008-04-08',  hr = '11');
+
+-- Also try non-partitioned table with list bucketing.
+-- Stats should show the same number of rows.
+
+create table stats_list_bucket_1 (
+  c1 string,
+  c2 string
+)
+skewed by (c1, c2) on  (('466','val_466'),('287','val_287'),('82','val_82'))
+stored as directories
+stored as rcfile;
+
+insert overwrite table stats_list_bucket_1
+  select key, value from src;
+
+desc formatted stats_list_bucket_1;
+
+drop table stats_list_bucket;
+drop table stats_list_bucket_1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_noscan_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_noscan_2.q
index c934fb2051258..b106b30476c00 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_noscan_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_noscan_2.q
@@ -1,12 +1,12 @@
 
 -- test analyze table compute statistiscs [noscan] on external table 
 -- 1 test table
-CREATE EXTERNAL TABLE anaylyze_external (a INT) LOCATION '${system:test.src.data.dir}/files/ext_test';
+CREATE EXTERNAL TABLE anaylyze_external (a INT) LOCATION '${system:hive.root}/data/files/ext_test';
 SELECT * FROM anaylyze_external;
-analyze table anaylyze_external compute statistics;
-describe formatted anaylyze_external;
 analyze table anaylyze_external compute statistics noscan;
 describe formatted anaylyze_external;
+analyze table anaylyze_external compute statistics;
+describe formatted anaylyze_external;
 drop table anaylyze_external;
 
 -- 2 test partition
@@ -21,10 +21,10 @@ CREATE EXTERNAL TABLE anaylyze_external (key string, val string) partitioned by
 ALTER TABLE anaylyze_external ADD PARTITION (insertdate='2008-01-01') location 'pfile://${system:test.tmp.dir}/texternal/2008-01-01';
 select count(*) from anaylyze_external where insertdate='2008-01-01';
 -- analyze
-analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics;
-describe formatted anaylyze_external PARTITION (insertdate='2008-01-01');
 analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics noscan;
 describe formatted anaylyze_external PARTITION (insertdate='2008-01-01');
+analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics;
+describe formatted anaylyze_external PARTITION (insertdate='2008-01-01');
 dfs -rmr ${system:test.tmp.dir}/texternal;
 drop table anaylyze_external;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_only_null.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_only_null.q
new file mode 100644
index 0000000000000..b47bc48958c8d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_only_null.q
@@ -0,0 +1,41 @@
+set hive.stats.dbclass=fs;
+set hive.compute.query.using.stats=true;
+set hive.stats.autogather=true;
+CREATE TABLE temps_null(a double, b int, c STRING, d smallint) STORED AS TEXTFILE; 
+
+CREATE TABLE stats_null(a double, b int, c STRING, d smallint) STORED AS TEXTFILE; 
+
+CREATE TABLE stats_null_part(a double, b int, c STRING, d smallint) partitioned by (dt string) STORED AS TEXTFILE; 
+
+LOAD DATA LOCAL INPATH '../../data/files/null.txt' INTO TABLE temps_null;
+
+insert overwrite table stats_null select * from temps_null;
+insert into table stats_null_part partition(dt='2010') select * from temps_null where d <=5;
+
+insert into table stats_null_part partition(dt='2011') select * from temps_null where d > 5;
+explain 
+select count(*), count(a), count(b), count(c), count(d) from stats_null;
+explain 
+select count(*), count(a), count(b), count(c), count(d) from stats_null_part;
+
+
+analyze table stats_null compute statistics for columns a,b,c,d;
+analyze table stats_null_part partition(dt='2010') compute statistics for columns a,b,c,d;
+analyze table stats_null_part partition(dt='2011') compute statistics for columns a,b,c,d;
+
+describe formatted stats_null_part partition (dt='2010');
+describe formatted stats_null_part partition (dt='2011');
+
+explain 
+select count(*), count(a), count(b), count(c), count(d) from stats_null;
+explain 
+select count(*), count(a), count(b), count(c), count(d) from stats_null_part;
+
+
+select count(*), count(a), count(b), count(c), count(d) from stats_null;
+select count(*), count(a), count(b), count(c), count(d) from stats_null_part;
+drop table stats_null;
+drop table stats_null_part;
+drop table temps_null;
+set hive.compute.query.using.stats=false;
+set hive.stats.dbclass=jdbc:derby;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_publisher_error_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_publisher_error_1.q
index 6d383f213d1be..513b8e75a0c5e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_publisher_error_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/stats_publisher_error_1.q
@@ -6,7 +6,7 @@
 
 create table tmptable(key string, value string);
 
-set hive.stats.dbclass=dummy;
+set hive.stats.dbclass=custom;
 set hive.stats.default.publisher=org.apache.hadoop.hive.ql.stats.DummyStatsPublisher;
 set hive.stats.default.aggregator=org.apache.hadoop.hive.ql.stats.DummyStatsAggregator;
 set hive.stats.reliable=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/statsfs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/statsfs.q
new file mode 100644
index 0000000000000..82a2295ac27b4
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/statsfs.q
@@ -0,0 +1,63 @@
+set hive.stats.dbclass=fs;
+
+-- stats computation on partitioned table with analyze command
+
+create table t1 (key string, value string) partitioned by (ds string);
+load data local inpath '../../data/files/kv1.txt' into table t1 partition (ds = '2010');
+load data local inpath '../../data/files/kv1.txt' into table t1 partition (ds = '2011');
+
+analyze table t1 partition (ds) compute statistics;
+
+describe formatted t1 partition (ds='2010');
+describe formatted t1 partition (ds='2011');
+
+drop table t1;
+
+-- stats computation on partitioned table with autogather on insert query
+
+create table t1 (key string, value string) partitioned by (ds string);
+
+insert into table t1 partition (ds='2010') select * from src;
+insert into table t1 partition (ds='2011') select * from src;
+
+describe formatted t1 partition (ds='2010');
+describe formatted t1 partition (ds='2011');
+
+drop table t1;
+
+-- analyze stmt on unpartitioned table
+
+create table t1 (key string, value string); 
+load data local inpath '../../data/files/kv1.txt' into table t1; 
+
+analyze table t1 compute statistics;
+
+describe formatted t1 ;
+
+drop table t1;
+
+-- stats computation on unpartitioned table with autogather on insert query
+
+create table t1 (key string, value string); 
+
+insert into table t1  select * from src;
+
+describe formatted t1 ;
+
+drop table t1;
+
+-- stats computation on partitioned table with autogather on insert query with dynamic partitioning
+
+
+create table t1 (key string, value string) partitioned by (ds string, hr string);
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+insert into table t1 partition (ds,hr) select * from srcpart;
+
+describe formatted t1 partition (ds='2008-04-08',hr='11');
+describe formatted t1 partition (ds='2008-04-09',hr='12');
+
+drop table t1;
+set hive.exec.dynamic.partition.mode=strict;
+
+set hive.stats.dbclass=jdbc:derby;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/str_to_map.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/str_to_map.q
index c3b206bba6325..ae83407f84333 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/str_to_map.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/str_to_map.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 desc function str_to_map;
 desc function extended str_to_map;
 
@@ -19,7 +21,7 @@ limit 3;
 
 
 drop table tbl_s2m;
-create table tbl_s2m as select 'ABC=CC_333=444' as t from src limit 3;
+create table tbl_s2m as select 'ABC=CC_333=444' as t from src tablesample (3 rows);
 
 select str_to_map(t,'_','=')['333'] from tbl_s2m;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subq.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subq.q
index 3fb1558a90f0b..14fa321c11c2f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subq.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subq.q
@@ -2,12 +2,12 @@ EXPLAIN
 FROM (
   FROM src select src.* WHERE src.key < 100
 ) unioninput
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/union.out' SELECT unioninput.*;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/union.out' SELECT unioninput.*;
 
 FROM (
   FROM src select src.* WHERE src.key < 100
 ) unioninput
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/union.out' SELECT unioninput.*;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/union.out' SELECT unioninput.*;
 
-dfs -cat ../build/ql/test/data/warehouse/union.out/*;
+dfs -cat ${system:test.warehouse.dir}/union.out/*;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subq_where_serialization.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subq_where_serialization.q
new file mode 100644
index 0000000000000..1d539825424f0
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subq_where_serialization.q
@@ -0,0 +1,5 @@
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask.size=10000000;
+explain select src.key from src where src.key in ( select distinct key from src);
+
+set hive.auto.convert.join=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_alias.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_alias.q
new file mode 100644
index 0000000000000..ffc33dc2cf852
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_alias.q
@@ -0,0 +1,16 @@
+EXPLAIN
+FROM (
+  FROM src select src.* WHERE src.key < 100
+) as unioninput
+INSERT OVERWRITE DIRECTORY 'target/warehouse/union.out' SELECT unioninput.*;
+
+EXPLAIN
+SELECT * FROM
+( SELECT * FROM 
+   ( SELECT * FROM src as s ) as src1 
+) as src2;
+
+SELECT * FROM
+( SELECT * FROM 
+   ( SELECT * FROM src as s ) as src1 
+) as src2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_exists.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_exists.q
new file mode 100644
index 0000000000000..f812e36070023
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_exists.q
@@ -0,0 +1,45 @@
+
+
+-- no agg, corr
+explain
+select * 
+from src b 
+where exists 
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9'
+  )
+;
+
+select * 
+from src b 
+where exists 
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9'
+  )
+;
+
+-- view test
+create view cv1 as 
+select * 
+from src b 
+where exists
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9')
+;
+
+select * from cv1
+;
+
+-- sq in from
+select * 
+from (select * 
+      from src b 
+      where exists 
+          (select a.key 
+          from src a 
+          where b.value = a.value  and a.key = b.key and a.value > 'val_9')
+     ) a
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_exists_having.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_exists_having.q
new file mode 100644
index 0000000000000..690aa10527a89
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_exists_having.q
@@ -0,0 +1,60 @@
+
+
+-- no agg, corr
+explain
+select b.key, count(*) 
+from src b 
+group by b.key
+having exists 
+  (select a.key 
+  from src a 
+  where a.key = b.key and a.value > 'val_9'
+  )
+;
+
+select b.key, count(*) 
+from src b 
+group by b.key
+having exists 
+  (select a.key 
+  from src a 
+  where a.key = b.key and a.value > 'val_9'
+  )
+;
+
+-- view test
+create view cv1 as 
+select b.key, count(*) as c
+from src b
+group by b.key
+having exists
+  (select a.key
+  from src a
+  where a.key = b.key and a.value > 'val_9'
+  )
+;
+
+select * from cv1;
+
+-- sq in from
+select *
+from (select b.key, count(*) 
+  from src b 
+  group by b.key
+  having exists 
+    (select a.key 
+    from src a 
+    where a.key = b.key and a.value > 'val_9'
+    )
+) a
+;
+
+-- join on agg
+select b.key, min(b.value)
+from src b
+group by b.key
+having exists ( select a.key
+                from src a
+                where a.value > 'val_9' and a.value = min(b.value)
+                )
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_in.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_in.q
new file mode 100644
index 0000000000000..69f40f9b8ca9a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_in.q
@@ -0,0 +1,163 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+DROP TABLE lineitem;
+CREATE TABLE lineitem (L_ORDERKEY      INT,
+                                L_PARTKEY       INT,
+                                L_SUPPKEY       INT,
+                                L_LINENUMBER    INT,
+                                L_QUANTITY      DOUBLE,
+                                L_EXTENDEDPRICE DOUBLE,
+                                L_DISCOUNT      DOUBLE,
+                                L_TAX           DOUBLE,
+                                L_RETURNFLAG    STRING,
+                                L_LINESTATUS    STRING,
+                                l_shipdate      STRING,
+                                L_COMMITDATE    STRING,
+                                L_RECEIPTDATE   STRING,
+                                L_SHIPINSTRUCT  STRING,
+                                L_SHIPMODE      STRING,
+                                L_COMMENT       STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem;
+
+-- non agg, non corr
+explain
+ select * 
+from src 
+where src.key in (select key from src s1 where s1.key > '9')
+;
+
+select * 
+from src 
+where src.key in (select key from src s1 where s1.key > '9')
+order by key
+;
+
+-- non agg, corr
+explain 
+select * 
+from src b 
+where b.key in
+        (select a.key 
+         from src a 
+         where b.value = a.value and a.key > '9'
+        )
+;
+
+select * 
+from src b 
+where b.key in
+        (select a.key 
+         from src a 
+         where b.value = a.value and a.key > '9'
+        )
+order by b.key
+;
+
+-- agg, non corr
+explain
+select p_name, p_size 
+from 
+part where part.p_size in 
+	(select avg(p_size) 
+	 from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+	 where r <= 2
+	)
+;
+select p_name, p_size 
+from 
+part where part.p_size in 
+	(select avg(p_size) 
+	 from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+	 where r <= 2
+	)
+order by p_name
+;
+
+-- agg, corr
+explain
+select p_mfgr, p_name, p_size 
+from part b where b.p_size in 
+	(select min(p_size) 
+	 from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+	 where r <= 2 and b.p_mfgr = a.p_mfgr
+	)
+;
+
+select p_mfgr, p_name, p_size 
+from part b where b.p_size in 
+	(select min(p_size) 
+	 from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+	 where r <= 2 and b.p_mfgr = a.p_mfgr
+	)
+order by p_mfgr, p_name, p_size 
+;
+
+-- distinct, corr
+explain 
+select * 
+from src b 
+where b.key in
+        (select distinct a.key 
+         from src a 
+         where b.value = a.value and a.key > '9'
+        )
+;
+
+select * 
+from src b 
+where b.key in
+        (select distinct a.key 
+         from src a 
+         where b.value = a.value and a.key > '9'
+        )
+order by b.key
+;
+
+-- non agg, non corr, windowing
+select p_mfgr, p_name, p_size 
+from part 
+where part.p_size in 
+  (select first_value(p_size) over(partition by p_mfgr order by p_size) from part)
+order by p_mfgr, p_name, p_size 
+;
+
+-- non agg, non corr, with join in Parent Query
+explain
+select p.p_partkey, li.l_suppkey 
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
+;
+
+select p.p_partkey, li.l_suppkey 
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
+order by p.p_partkey, li.l_suppkey 
+;
+
+-- non agg, corr, with join in Parent Query
+select p.p_partkey, li.l_suppkey 
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+order by p.p_partkey, li.l_suppkey 
+;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_in_having.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_in_having.q
new file mode 100644
index 0000000000000..84045568f4501
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_in_having.q
@@ -0,0 +1,104 @@
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+-- non agg, non corr
+explain
+ select key, count(*) 
+from src 
+group by key
+having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
+;
+
+
+select s1.key, count(*) from src s1 where s1.key > '9' group by s1.key order by s1.key;
+
+select key, count(*) 
+from src 
+group by key
+having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key )
+order by key
+;
+
+-- non agg, corr
+explain
+ select key, value, count(*) 
+from src b
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9'  and s1.value = b.value group by s1.key )
+;
+
+-- agg, non corr
+explain
+select p_mfgr, avg(p_size)
+from part b
+group by b.p_mfgr
+having b.p_mfgr in 
+   (select p_mfgr 
+    from part
+    group by p_mfgr
+    having max(p_size) - min(p_size) < 20
+   )
+;
+
+-- join on agg
+select b.key, min(b.value)
+from src b
+group by b.key
+having b.key in ( select a.key
+                from src a
+                where a.value > 'val_9' and a.value = min(b.value)
+                )
+order by b.key
+;
+
+-- where and having
+-- Plan is:
+-- Stage 1: b semijoin sq1:src (subquery in where)
+-- Stage 2: group by Stage 1 o/p
+-- Stage 5: group by on sq2:src (subquery in having)
+-- Stage 6: Stage 2 o/p semijoin Stage 5
+explain
+select key, value, count(*) 
+from src b
+where b.key in (select key from src where src.key > '8')
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
+;
+
+set hive.auto.convert.join=true;
+-- Plan is:
+-- Stage  5: group by on sq2:src (subquery in having)
+-- Stage 10: hashtable for sq1:src (subquery in where)
+-- Stage  2: b map-side semijoin Stage 10 o/p
+-- Stage  3: Stage 2 semijoin Stage 5
+-- Stage  9: construct hastable for Stage 5 o/p
+-- Stage  6: Stage 2 map-side semijoin Stage 9
+explain
+select key, value, count(*) 
+from src b
+where b.key in (select key from src where src.key > '8')
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
+;
+
+-- non agg, non corr, windowing
+explain
+select p_mfgr, p_name, avg(p_size) 
+from part 
+group by p_mfgr, p_name
+having p_name in 
+  (select first_value(p_name) over(partition by p_mfgr order by p_size) from part)
+;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_multiinsert.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_multiinsert.q
new file mode 100644
index 0000000000000..ed36d9ef6e961
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_multiinsert.q
@@ -0,0 +1,82 @@
+set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook;
+
+CREATE TABLE src_4(
+  key STRING, 
+  value STRING
+)
+;
+
+CREATE TABLE src_5( 
+  key STRING, 
+  value STRING
+)
+;
+
+explain
+from src b 
+INSERT OVERWRITE TABLE src_4 
+  select * 
+  where b.key in 
+   (select a.key 
+    from src a 
+    where b.value = a.value and a.key > '9'
+   ) 
+INSERT OVERWRITE TABLE src_5 
+  select *  
+  where b.key not in  ( select key from src s1 where s1.key > '2') 
+  order by key 
+;
+
+from src b 
+INSERT OVERWRITE TABLE src_4 
+  select * 
+  where b.key in 
+   (select a.key 
+    from src a 
+    where b.value = a.value and a.key > '9'
+   ) 
+INSERT OVERWRITE TABLE src_5 
+  select *  
+  where b.key not in  ( select key from src s1 where s1.key > '2') 
+  order by key 
+;
+
+select * from src_4
+;
+select * from src_5
+;
+set hive.auto.convert.join=true;
+
+explain
+from src b 
+INSERT OVERWRITE TABLE src_4 
+  select * 
+  where b.key in 
+   (select a.key 
+    from src a 
+    where b.value = a.value and a.key > '9'
+   ) 
+INSERT OVERWRITE TABLE src_5 
+  select *  
+  where b.key not in  ( select key from src s1 where s1.key > '2') 
+  order by key 
+;
+
+from src b 
+INSERT OVERWRITE TABLE src_4 
+  select * 
+  where b.key in 
+   (select a.key 
+    from src a 
+    where b.value = a.value and a.key > '9'
+   ) 
+INSERT OVERWRITE TABLE src_5 
+  select *  
+  where b.key not in  ( select key from src s1 where s1.key > '2') 
+  order by key 
+;
+
+select * from src_4
+;
+select * from src_5
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notexists.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notexists.q
new file mode 100644
index 0000000000000..43a801fa9683e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notexists.q
@@ -0,0 +1,41 @@
+
+
+-- no agg, corr
+explain
+select * 
+from src b 
+where not exists 
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_2'
+  )
+;
+
+select * 
+from src b 
+where not exists 
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_2'
+  )
+;
+
+-- distinct, corr
+explain
+select * 
+from src b 
+where not exists 
+  (select distinct a.key 
+  from src a 
+  where b.value = a.value and a.value > 'val_2'
+  )
+;
+
+select * 
+from src b 
+where not exists 
+  (select a.key 
+  from src a 
+  where b.value = a.value and a.value > 'val_2'
+  )
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notexists_having.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notexists_having.q
new file mode 100644
index 0000000000000..7205d17bc4861
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notexists_having.q
@@ -0,0 +1,46 @@
+
+
+-- no agg, corr
+explain
+select * 
+from src b 
+group by key, value
+having not exists 
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_12'
+  )
+;
+
+select * 
+from src b 
+group by key, value
+having not exists 
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_12'
+  )
+;
+
+
+-- distinct, corr
+explain
+select * 
+from src b 
+group by key, value
+having not exists 
+  (select distinct a.key 
+  from src a 
+  where b.value = a.value and a.value > 'val_12'
+  )
+;
+
+select * 
+from src b 
+group by key, value
+having not exists 
+  (select distinct a.key 
+  from src a 
+  where b.value = a.value and a.value > 'val_12'
+  )
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notin.q
new file mode 100644
index 0000000000000..d5f60860313da
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notin.q
@@ -0,0 +1,143 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+DROP TABLE lineitem;
+CREATE TABLE lineitem (L_ORDERKEY      INT,
+                                L_PARTKEY       INT,
+                                L_SUPPKEY       INT,
+                                L_LINENUMBER    INT,
+                                L_QUANTITY      DOUBLE,
+                                L_EXTENDEDPRICE DOUBLE,
+                                L_DISCOUNT      DOUBLE,
+                                L_TAX           DOUBLE,
+                                L_RETURNFLAG    STRING,
+                                L_LINESTATUS    STRING,
+                                l_shipdate      STRING,
+                                L_COMMITDATE    STRING,
+                                L_RECEIPTDATE   STRING,
+                                L_SHIPINSTRUCT  STRING,
+                                L_SHIPMODE      STRING,
+                                L_COMMENT       STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem;
+
+-- non agg, non corr
+explain
+select * 
+from src 
+where src.key not in  
+  ( select key  from src s1 
+    where s1.key > '2'
+  )
+;
+
+select * 
+from src 
+where src.key not in  ( select key from src s1 where s1.key > '2')
+order by key
+;
+
+-- non agg, corr
+explain
+select p_mfgr, b.p_name, p_size 
+from part b 
+where b.p_name not in 
+  (select p_name 
+  from (select p_mfgr, p_name, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+  where r <= 2 and b.p_mfgr = a.p_mfgr 
+  )
+;
+
+select p_mfgr, b.p_name, p_size 
+from part b 
+where b.p_name not in 
+  (select p_name 
+  from (select p_mfgr, p_name, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+  where r <= 2 and b.p_mfgr = a.p_mfgr 
+  )
+order by p_mfgr, b.p_name
+;
+
+-- agg, non corr
+explain
+select p_name, p_size 
+from 
+part where part.p_size not in 
+  (select avg(p_size) 
+  from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+  where r <= 2
+  )
+;
+select p_name, p_size 
+from 
+part where part.p_size not in 
+  (select avg(p_size) 
+  from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+  where r <= 2
+  )
+order by p_name, p_size
+;
+
+-- agg, corr
+explain
+select p_mfgr, p_name, p_size 
+from part b where b.p_size not in 
+  (select min(p_size) 
+  from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+  where r <= 2 and b.p_mfgr = a.p_mfgr
+  )
+;
+
+select p_mfgr, p_name, p_size 
+from part b where b.p_size not in 
+  (select min(p_size) 
+  from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+  where r <= 2 and b.p_mfgr = a.p_mfgr
+  )
+order by p_mfgr, p_size
+;
+
+-- non agg, non corr, Group By in Parent Query
+select li.l_partkey, count(*) 
+from lineitem li 
+where li.l_linenumber = 1 and 
+  li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') 
+group by li.l_partkey
+;
+
+-- alternate not in syntax
+select * 
+from src 
+where not src.key in  ( select key from src s1 where s1.key > '2')
+order by key
+;
+
+-- null check
+create view T1_v as 
+select key from src where key <'11';
+
+create view T2_v as 
+select case when key > '104' then null else key end as key from T1_v;
+
+explain
+select * 
+from T1_v where T1_v.key not in (select T2_v.key from T2_v);
+
+select * 
+from T1_v where T1_v.key not in (select T2_v.key from T2_v);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notin_having.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notin_having.q
new file mode 100644
index 0000000000000..a586f02272a73
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_notin_having.q
@@ -0,0 +1,74 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+
+-- non agg, non corr
+explain
+select key, count(*) 
+from src 
+group by key
+having key not in  
+  ( select key  from src s1 
+    where s1.key > '12'
+  )
+;
+
+-- non agg, corr
+explain
+select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
+  from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a 
+  where min(p_retailprice) = l and r - l > 600
+  )
+;
+
+select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
+  from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a 
+  where min(p_retailprice) = l and r - l > 600
+  )
+;
+
+-- agg, non corr
+explain
+select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
+  from part a
+  group by p_mfgr
+  having max(p_retailprice) - min(p_retailprice) > 600
+  )
+;
+
+select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
+  from part a
+  group by p_mfgr
+  having max(p_retailprice) - min(p_retailprice) > 600
+  )
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_unqualcolumnrefs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_unqualcolumnrefs.q
new file mode 100644
index 0000000000000..749435c005134
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_unqualcolumnrefs.q
@@ -0,0 +1,83 @@
+DROP TABLE part;
+
+-- data setup
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+create table src11 (key1 string, value1 string);
+
+create table part2( 
+    p2_partkey INT,
+    p2_name STRING,
+    p2_mfgr STRING,
+    p2_brand STRING,
+    p2_type STRING,
+    p2_size INT,
+    p2_container STRING,
+    p2_retailprice DOUBLE,
+    p2_comment STRING
+);
+
+-- non agg, corr
+explain select * from src11 where src11.key1 in (select key from src where src11.value1 = value and key > '9');
+
+explain select * from src a where a.key in (select key from src where a.value = value and key > '9');
+
+-- agg, corr
+explain
+select p_mfgr, p_name, p_size 
+from part b where b.p_size in 
+  (select min(p2_size) 
+    from (select p2_mfgr, p2_size, rank() over(partition by p2_mfgr order by p2_size) as r from part2) a 
+    where r <= 2 and b.p_mfgr = p2_mfgr
+  )
+;
+
+
+explain
+select p_mfgr, p_name, p_size 
+from part b where b.p_size in 
+  (select min(p_size) 
+   from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+   where r <= 2 and b.p_mfgr = p_mfgr
+  )
+;
+
+-- distinct, corr
+explain 
+select * 
+from src b 
+where b.key in
+        (select distinct key 
+         from src 
+         where b.value = value and key > '9'
+        )
+;
+
+-- non agg, corr, having
+explain
+ select key, value, count(*) 
+from src b
+group by key, value
+having count(*) in (select count(*) from src where src.key > '9'  and src.value = b.value group by key )
+;
+
+-- non agg, corr
+explain
+select p_mfgr, b.p_name, p_size 
+from part b 
+where b.p_name not in 
+  (select p_name 
+  from (select p_mfgr, p_name, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
+  where r <= 2 and b.p_mfgr = p_mfgr 
+  )
+;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_views.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_views.q
new file mode 100644
index 0000000000000..9f6712fc181ff
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/subquery_views.q
@@ -0,0 +1,48 @@
+
+
+-- exists test
+create view cv1 as 
+select * 
+from src b 
+where exists
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9')
+;
+
+select * 
+from cv1 where cv1.key in (select key from cv1 c where c.key > '95') order by key;
+;
+
+
+-- not in test
+create view cv2 as 
+select * 
+from src b 
+where b.key not in
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_11'
+  )
+;
+
+select * 
+from cv2 where cv2.key in (select key from cv2 c where c.key < '11') order by key;
+;
+
+-- in where + having
+create view cv3 as
+select key, value, count(*) 
+from src b
+where b.key in (select key from src where src.key > '8')
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
+;
+
+select * from cv3 order by key;
+
+
+-- join of subquery views
+select *
+from cv3
+where cv3.key in (select key from cv1) order by key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/symlink_text_input_format.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/symlink_text_input_format.q
index bb9d6f34ed8ec..d633b97f4c931 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/symlink_text_input_format.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/symlink_text_input_format.q
@@ -1,12 +1,12 @@
-
+DROP TABLE IF EXISTS symlink_text_input_format;
 
 EXPLAIN
 CREATE TABLE symlink_text_input_format (key STRING, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat';
 
 CREATE TABLE symlink_text_input_format (key STRING, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat';
 
-dfs -cp ../data/files/symlink1.txt ../build/ql/test/data/warehouse/symlink_text_input_format/symlink1.txt;
-dfs -cp ../data/files/symlink2.txt ../build/ql/test/data/warehouse/symlink_text_input_format/symlink2.txt;
+dfs -cp ../../data/files/symlink1.txt ${system:test.warehouse.dir}/symlink_text_input_format/symlink1.txt;
+dfs -cp ../../data/files/symlink2.txt ${system:test.warehouse.dir}/symlink_text_input_format/symlink2.txt;
 
 EXPLAIN SELECT * FROM symlink_text_input_format order by key, value;
 
@@ -20,5 +20,4 @@ EXPLAIN SELECT count(1) FROM symlink_text_input_format;
 
 SELECT count(1) FROM symlink_text_input_format;
 
-
-
+DROP TABLE symlink_text_input_format;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/table_access_keys_stats.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/table_access_keys_stats.q
index 8b1a390149865..23209d85e4f15 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/table_access_keys_stats.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/table_access_keys_stats.q
@@ -4,7 +4,7 @@ SET hive.stats.collect.tablekeys=true;
 -- This test is used for testing the TableAccessAnalyzer
 
 CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
 
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/test_boolean_whereclause.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/test_boolean_whereclause.q
index d2da5ac174a15..a4f0fdb1c1f26 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/test_boolean_whereclause.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/test_boolean_whereclause.q
@@ -1,5 +1,5 @@
 create table if not exists test_boolean(dummy tinyint);
-insert overwrite table test_boolean  select 1 from src limit 1;
+insert overwrite table test_boolean  select 1 from src tablesample (1 rows);
 
 SELECT 1
 FROM (
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_dml.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_dml.q
new file mode 100644
index 0000000000000..87d251f40fd7a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_dml.q
@@ -0,0 +1,40 @@
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+-- CTAS
+EXPLAIN CREATE TABLE tmp_src AS SELECT * FROM (SELECT value, count(value) AS cnt FROM src GROUP BY value) f1 ORDER BY cnt;
+CREATE TABLE tmp_src AS SELECT * FROM (SELECT value, count(value) AS cnt FROM src GROUP BY value) f1 ORDER BY cnt;
+
+SELECT * FROM tmp_src;
+
+-- dyn partitions
+CREATE TABLE tmp_src_part (c string) PARTITIONED BY (d int);
+EXPLAIN INSERT INTO TABLE tmp_src_part PARTITION (d) SELECT * FROM tmp_src;
+INSERT INTO TABLE tmp_src_part PARTITION (d) SELECT * FROM tmp_src;
+
+SELECT * FROM tmp_src_part;
+
+-- multi insert
+CREATE TABLE even (c int, d string);
+CREATE TABLE odd (c int, d string);
+
+EXPLAIN
+FROM src
+INSERT INTO TABLE even SELECT key, value WHERE key % 2 = 0 
+INSERT INTO TABLE odd SELECT key, value WHERE key % 2 = 1;
+
+FROM src
+INSERT INTO TABLE even SELECT key, value WHERE key % 2 = 0 
+INSERT INTO TABLE odd SELECT key, value WHERE key % 2 = 1;
+
+SELECT * FROM even;
+SELECT * FROM odd;
+
+-- create empty table
+CREATE TABLE empty STORED AS orc AS SELECT * FROM tmp_src_part WHERE d = -1000;
+SELECT * FROM empty;
+
+-- drop the tables
+DROP TABLE even;
+DROP TABLE odd;
+DROP TABLE tmp_src;
+DROP TABLE tmp_src_part;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_fsstat.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_fsstat.q
new file mode 100644
index 0000000000000..7f2e28fbafa8d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_fsstat.q
@@ -0,0 +1,19 @@
+set hive.execution.engine=tez;
+CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE t1 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE t1 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE t1 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE t1 partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE t1 partition(ds='2008-04-08');
+
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting = true;
+set hive.optimize.bucketingsorting=false;
+set hive.stats.dbclass=fs;
+
+insert overwrite table tab_part partition (ds='2008-04-08')
+select key,value from t1;
+describe formatted tab_part partition(ds='2008-04-08');
+
+set hive.stats.dbclass=jdbc:derby;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_insert_overwrite_local_directory_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_insert_overwrite_local_directory_1.q
new file mode 100644
index 0000000000000..d7a652fb8c8bd
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_insert_overwrite_local_directory_1.q
@@ -0,0 +1,5 @@
+insert overwrite local directory '${system:test.tmp.dir}/tez_local_src_table_1'
+select * from src order by key limit 10 ;
+dfs -cat file:${system:test.tmp.dir}/tez_local_src_table_1/000000_0 ;
+
+dfs -rmr file:${system:test.tmp.dir}/tez_local_src_table_1/ ;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_join_tests.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_join_tests.q
new file mode 100644
index 0000000000000..f309e3fe0eed6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_join_tests.q
@@ -0,0 +1,12 @@
+explain
+select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key;
+
+select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key;
+select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key)) x right outer join src c on (x.value = c.value) order by x.key;
+select * from src1 a left outer join src b on (a.key = b.key) right outer join src c on (a.value = c.value) order by a.key;
+select * from src1 a left outer join src b on (a.key = b.key) left outer join src c on (a.value = c.value) order by a.key;
+select * from src1 a left outer join src b on (a.key = b.key) join src c on (a.key = c.key);
+select * from src1 a join src b on (a.key = b.key) join src c on (a.key = c.key);
+
+select count(*) from src1 a join src b on (a.key = b.key) join src c on (a.key = c.key);
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_joins_explain.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_joins_explain.q
new file mode 100644
index 0000000000000..9193843824f6d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_joins_explain.q
@@ -0,0 +1,5 @@
+explain
+select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key;
+
+select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_schema_evolution.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_schema_evolution.q
new file mode 100644
index 0000000000000..2f1c73f8e528a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_schema_evolution.q
@@ -0,0 +1,14 @@
+create table test (key int, value string) partitioned by (p int) stored as textfile;
+
+insert into table test partition (p=1) select * from src limit 10;
+
+alter table test set fileformat orc;
+
+insert into table test partition (p=2) select * from src limit 10;
+
+describe test;
+
+select * from test where p=1 and key > 0;
+select * from test where p=2 and key > 0;
+select * from test where key > 0;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_union.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_union.q
new file mode 100644
index 0000000000000..f80d94c4a15fd
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/tez_union.q
@@ -0,0 +1,94 @@
+set hive.auto.convert.join=true;
+
+explain 
+select s1.key as key, s1.value as value from src s1 join src s3 on s1.key=s3.key
+UNION  ALL  
+select s2.key as key, s2.value as value from src s2;
+
+create table ut as
+select s1.key as key, s1.value as value from src s1 join src s3 on s1.key=s3.key
+UNION  ALL  
+select s2.key as key, s2.value as value from src s2;
+
+select * from ut order by key, value limit 20;
+drop table ut;
+
+set hive.auto.convert.join=false;
+
+explain
+with u as (select * from src union all select * from src)
+select count(*) from (select u1.key as k1, u2.key as k2 from
+u as u1 join u as u2 on (u1.key = u2.key)) a;
+
+create table ut as
+with u as (select * from src union all select * from src)
+select count(*) as cnt from (select u1.key as k1, u2.key as k2 from
+u as u1 join u as u2 on (u1.key = u2.key)) a;
+
+select * from ut order by cnt limit 20;
+drop table ut;
+
+set hive.auto.convert.join=true;
+
+explain select s1.key as skey, u1.key as ukey from
+src s1
+join (select * from src union all select * from src) u1 on s1.key = u1.key;
+
+create table ut as
+select s1.key as skey, u1.key as ukey from
+src s1
+join (select * from src union all select * from src) u1 on s1.key = u1.key;
+
+select * from ut order by skey, ukey limit 20;
+drop table ut;
+
+explain select s1.key as skey, u1.key as ukey, s8.key as lkey from 
+src s1
+join (select s2.key as key from src s2 join src s3 on s2.key = s3.key
+      union all select s4.key from src s4 join src s5 on s4.key = s5.key
+      union all select s6.key from src s6 join src s7 on s6.key = s7.key) u1 on (s1.key = u1.key)
+join src s8 on (u1.key = s8.key)
+order by lkey;
+
+create table ut as
+select s1.key as skey, u1.key as ukey, s8.key as lkey from 
+src s1
+join (select s2.key as key from src s2 join src s3 on s2.key = s3.key
+      union all select s4.key from src s4 join src s5 on s4.key = s5.key
+      union all select s6.key from src s6 join src s7 on s6.key = s7.key) u1 on (s1.key = u1.key)
+join src s8 on (u1.key = s8.key)
+order by lkey;
+
+select * from ut order by skey, ukey, lkey limit 100;
+
+drop table ut;
+
+explain
+select s2.key as key from src s2 join src s3 on s2.key = s3.key
+union all select s4.key from src s4 join src s5 on s4.key = s5.key;
+
+create table ut as
+select s2.key as key from src s2 join src s3 on s2.key = s3.key
+union all select s4.key from src s4 join src s5 on s4.key = s5.key;
+
+select * from ut order by key limit 30;
+
+drop table ut;
+
+explain
+select * from
+(select * from src union all select * from src) u
+left outer join src s on u.key = s.key;
+
+explain
+select u.key as ukey, s.key as skey from
+(select * from src union all select * from src) u
+right outer join src s on u.key = s.key;
+
+create table ut as
+select u.key as ukey, s.key as skey from
+(select * from src union all select * from src) u
+right outer join src s on u.key = s.key;
+
+select * from ut order by ukey, skey limit 20;
+drop table ut;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_1.q
index f2c3b596af44a..ce79eefaae4d3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_1.q
@@ -1,10 +1,12 @@
+set hive.fetch.task.conversion=more;
+
 drop table timestamp_1;
 
 create table timestamp_1 (t timestamp);
 alter table timestamp_1 set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
 
-insert overwrite table timestamp_1 
-  select cast('2011-01-01 01:01:01' as timestamp) from src limit 1;
+insert overwrite table timestamp_1
+  select cast('2011-01-01 01:01:01' as timestamp) from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_1 limit 1;
 select cast(t as tinyint) from timestamp_1 limit 1;
 select cast(t as smallint) from timestamp_1 limit 1;
@@ -15,7 +17,7 @@ select cast(t as double) from timestamp_1 limit 1;
 select cast(t as string) from timestamp_1 limit 1;
 
 insert overwrite table timestamp_1
-  select '2011-01-01 01:01:01' from src limit 1;
+  select '2011-01-01 01:01:01' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_1 limit 1;
 select cast(t as tinyint) from timestamp_1 limit 1;
 select cast(t as smallint) from timestamp_1 limit 1;
@@ -26,7 +28,7 @@ select cast(t as double) from timestamp_1 limit 1;
 select cast(t as string) from timestamp_1 limit 1;
 
 insert overwrite table timestamp_1
-  select '2011-01-01 01:01:01.1' from src limit 1;
+  select '2011-01-01 01:01:01.1' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_1 limit 1;
 select cast(t as tinyint) from timestamp_1 limit 1;
 select cast(t as smallint) from timestamp_1 limit 1;
@@ -37,7 +39,7 @@ select cast(t as double) from timestamp_1 limit 1;
 select cast(t as string) from timestamp_1 limit 1;
 
 insert overwrite table timestamp_1
-  select '2011-01-01 01:01:01.0001' from src limit 1;
+  select '2011-01-01 01:01:01.0001' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_1 limit 1;
 select cast(t as tinyint) from timestamp_1 limit 1;
 select cast(t as smallint) from timestamp_1 limit 1;
@@ -48,7 +50,7 @@ select cast(t as double) from timestamp_1 limit 1;
 select cast(t as string) from timestamp_1 limit 1;
 
 insert overwrite table timestamp_1
-  select '2011-01-01 01:01:01.000100000' from src limit 1;
+  select '2011-01-01 01:01:01.000100000' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_1 limit 1;
 select cast(t as tinyint) from timestamp_1 limit 1;
 select cast(t as smallint) from timestamp_1 limit 1;
@@ -59,7 +61,7 @@ select cast(t as double) from timestamp_1 limit 1;
 select cast(t as string) from timestamp_1 limit 1;
 
 insert overwrite table timestamp_1
-  select '2011-01-01 01:01:01.001000011' from src limit 1;
+  select '2011-01-01 01:01:01.001000011' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_1 limit 1;
 select cast(t as tinyint) from timestamp_1 limit 1;
 select cast(t as smallint) from timestamp_1 limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_2.q
index b93208f48c453..351f5ca519499 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_2.q
@@ -1,10 +1,12 @@
+set hive.fetch.task.conversion=more;
+
 drop table timestamp_2;
 
 create table timestamp_2 (t timestamp);
 alter table timestamp_2 set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe';
 
-insert overwrite table timestamp_2 
-  select cast('2011-01-01 01:01:01' as timestamp) from src limit 1;
+insert overwrite table timestamp_2
+  select cast('2011-01-01 01:01:01' as timestamp) from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_2 limit 1;
 select cast(t as tinyint) from timestamp_2 limit 1;
 select cast(t as smallint) from timestamp_2 limit 1;
@@ -15,7 +17,7 @@ select cast(t as double) from timestamp_2 limit 1;
 select cast(t as string) from timestamp_2 limit 1;
 
 insert overwrite table timestamp_2
-  select '2011-01-01 01:01:01' from src limit 1;
+  select '2011-01-01 01:01:01' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_2 limit 1;
 select cast(t as tinyint) from timestamp_2 limit 1;
 select cast(t as smallint) from timestamp_2 limit 1;
@@ -26,7 +28,7 @@ select cast(t as double) from timestamp_2 limit 1;
 select cast(t as string) from timestamp_2 limit 1;
 
 insert overwrite table timestamp_2
-  select '2011-01-01 01:01:01.1' from src limit 1;
+  select '2011-01-01 01:01:01.1' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_2 limit 1;
 select cast(t as tinyint) from timestamp_2 limit 1;
 select cast(t as smallint) from timestamp_2 limit 1;
@@ -37,7 +39,7 @@ select cast(t as double) from timestamp_2 limit 1;
 select cast(t as string) from timestamp_2 limit 1;
 
 insert overwrite table timestamp_2
-  select '2011-01-01 01:01:01.0001' from src limit 1;
+  select '2011-01-01 01:01:01.0001' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_2 limit 1;
 select cast(t as tinyint) from timestamp_2 limit 1;
 select cast(t as smallint) from timestamp_2 limit 1;
@@ -48,7 +50,7 @@ select cast(t as double) from timestamp_2 limit 1;
 select cast(t as string) from timestamp_2 limit 1;
 
 insert overwrite table timestamp_2
-  select '2011-01-01 01:01:01.000100000' from src limit 1;
+  select '2011-01-01 01:01:01.000100000' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_2 limit 1;
 select cast(t as tinyint) from timestamp_2 limit 1;
 select cast(t as smallint) from timestamp_2 limit 1;
@@ -59,7 +61,7 @@ select cast(t as double) from timestamp_2 limit 1;
 select cast(t as string) from timestamp_2 limit 1;
 
 insert overwrite table timestamp_2
-  select '2011-01-01 01:01:01.001000011' from src limit 1;
+  select '2011-01-01 01:01:01.001000011' from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_2 limit 1;
 select cast(t as tinyint) from timestamp_2 limit 1;
 select cast(t as smallint) from timestamp_2 limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_3.q
index cda724f9e8f43..0e1a8d5526783 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_3.q
@@ -1,10 +1,12 @@
+set hive.fetch.task.conversion=more;
+
 drop table timestamp_3;
 
 create table timestamp_3 (t timestamp);
 alter table timestamp_3 set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
 
-insert overwrite table timestamp_3 
-  select cast(cast('1.3041352164485E9' as double) as timestamp) from src limit 1;
+insert overwrite table timestamp_3
+  select cast(cast('1.3041352164485E9' as double) as timestamp) from src tablesample (1 rows);
 select cast(t as boolean) from timestamp_3 limit 1;
 select cast(t as tinyint) from timestamp_3 limit 1;
 select cast(t as smallint) from timestamp_3 limit 1;
@@ -14,4 +16,6 @@ select cast(t as float) from timestamp_3 limit 1;
 select cast(t as double) from timestamp_3 limit 1;
 select cast(t as string) from timestamp_3 limit 1;
 
+select t, sum(t), count(*), sum(t)/count(*), avg(t) from timestamp_3 group by t;
+
 drop table timestamp_3;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_comparison.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_comparison.q
index f64ae48b85db7..30fee3cbf6013 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_comparison.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_comparison.q
@@ -1,5 +1,6 @@
+set hive.fetch.task.conversion=more;
 
-select cast('2011-05-06 07:08:09' as timestamp) > 
+select cast('2011-05-06 07:08:09' as timestamp) >
   cast('2011-05-06 07:08:09' as timestamp) from src limit 1;
 
 select cast('2011-05-06 07:08:09' as timestamp) <
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_lazy.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_lazy.q
index 7a1005295eb28..e9a0cfae7cc9a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_lazy.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_lazy.q
@@ -1,6 +1,6 @@
 drop table timestamp_lazy;
 create table timestamp_lazy (t timestamp, key string, value string);
-insert overwrite table timestamp_lazy select cast('2011-01-01 01:01:01' as timestamp), key, value from src limit 5;
+insert overwrite table timestamp_lazy select cast('2011-01-01 01:01:01' as timestamp), key, value from src tablesample (5 rows);
 
 select t,key,value from timestamp_lazy ORDER BY key ASC, value ASC;
 select t,key,value from timestamp_lazy distribute by t sort by key ASC, value ASC;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_null.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_null.q
index efd5bc4b78bf4..36f35413e9b48 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_null.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_null.q
@@ -1,6 +1,6 @@
 DROP TABLE IF EXISTS timestamp_null;
 CREATE TABLE timestamp_null (t1 TIMESTAMP);
-LOAD DATA LOCAL INPATH '../data/files/test.dat' OVERWRITE INTO TABLE timestamp_null;
+LOAD DATA LOCAL INPATH '../../data/files/test.dat' OVERWRITE INTO TABLE timestamp_null;
 
 SELECT * FROM timestamp_null LIMIT 1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_udf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_udf.q
index 2620acefee7f7..ade9fb408c092 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_udf.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/timestamp_udf.q
@@ -1,13 +1,15 @@
+set hive.fetch.task.conversion=more;
+
 drop table timestamp_udf;
 drop table timestamp_udf_string;
 
 create table timestamp_udf (t timestamp);
 create table timestamp_udf_string (t string);
-from src
+from (select * from src tablesample (1 rows)) s
   insert overwrite table timestamp_udf 
-    select '2011-05-06 07:08:09.1234567' limit 1
+    select '2011-05-06 07:08:09.1234567'
   insert overwrite table timestamp_udf_string
-    select '2011-05-06 07:08:09.1234567' limit 1;
+    select '2011-05-06 07:08:09.1234567';
 
 -- Test UDFs with Timestamp input
 select unix_timestamp(t), year(t), month(t), day(t), dayofmonth(t),
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/transform1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/transform1.q
index 962077c2ca565..3bed2b6727e7b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/transform1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/transform1.q
@@ -12,7 +12,7 @@ SELECT transform(*) USING 'cat' AS (col array<bigint>) FROM transform1_t1;
 create table transform1_t2(col array<int>);
 
 insert overwrite table transform1_t2
-select array(1,2,3) from src limit 1;
+select array(1,2,3) from src tablesample (1 rows);
 
 EXPLAIN
 SELECT transform('0\0021\0022') USING 'cat' AS (col array<int>) FROM transform1_t2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_column.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_column.q
index d756b47e464d0..0bfb23ead6869 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_column.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_column.q
@@ -5,7 +5,7 @@ ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' STORED A
 
 set hive.stats.autogather=true;
 
-INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 10;
+INSERT OVERWRITE TABLE test_tab SELECT * FROM src tablesample (10 rows);
 
 DESC FORMATTED test_tab;
 
@@ -20,7 +20,7 @@ DESC FORMATTED test_tab;
 SELECT * FROM test_tab ORDER BY value;
 
 -- Truncate multiple columns
-INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 10;
+INSERT OVERWRITE TABLE test_tab SELECT * FROM src tablesample (10 rows);
 
 TRUNCATE TABLE test_tab COLUMNS (key, value);
 
@@ -40,7 +40,7 @@ SELECT * FROM test_tab ORDER BY value;
 -- Test truncating with a binary serde
 ALTER TABLE test_tab SET SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe';
 
-INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 10;
+INSERT OVERWRITE TABLE test_tab SELECT * FROM src tablesample (10 rows);
 
 DESC FORMATTED test_tab;
 
@@ -65,7 +65,7 @@ SELECT * FROM test_tab ORDER BY value;
 -- Test truncating a partition
 CREATE TABLE test_tab_part (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE;
 
-INSERT OVERWRITE TABLE test_tab_part PARTITION (part = '1') SELECT * FROM src LIMIT 10;
+INSERT OVERWRITE TABLE test_tab_part PARTITION (part = '1') SELECT * FROM src tablesample (10 rows);
 
 DESC FORMATTED test_tab_part PARTITION (part = '1');
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_column_merge.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_column_merge.q
index a7aab357eaf2f..7a59efc4d1b01 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_column_merge.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_column_merge.q
@@ -2,9 +2,9 @@
 
 CREATE TABLE test_tab (key STRING, value STRING) STORED AS RCFILE;
 
-INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 5;
+INSERT OVERWRITE TABLE test_tab SELECT * FROM src tablesample (5 rows);
 
-INSERT INTO TABLE test_tab SELECT * FROM src LIMIT 5;
+INSERT INTO TABLE test_tab SELECT * FROM src tablesample (5 rows);
 
 -- The value should be 2 indicating the table has 2 files
 SELECT COUNT(DISTINCT INPUT__FILE__NAME) FROM test_tab;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_table.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_table.q
index c0e81e9ac051f..975c0f1ae8426 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_table.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/truncate_table.q
@@ -1,5 +1,5 @@
 create table src_truncate (key string, value string);
-load data local inpath '../data/files/kv1.txt' into table src_truncate;;
+load data local inpath '../../data/files/kv1.txt' into table src_truncate;;
 
 create table srcpart_truncate (key string, value string) partitioned by (ds string, hr string);
 alter table srcpart_truncate add partition (ds='2008-04-08', hr='11');        
@@ -7,10 +7,10 @@ alter table srcpart_truncate add partition (ds='2008-04-08', hr='12');
 alter table srcpart_truncate add partition (ds='2008-04-09', hr='11');
 alter table srcpart_truncate add partition (ds='2008-04-09', hr='12');
 
-load data local inpath '../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-08', hr='11');
-load data local inpath '../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-08', hr='12');
-load data local inpath '../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-09', hr='11');
-load data local inpath '../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-09', hr='12');
+load data local inpath '../../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-08', hr='11');
+load data local inpath '../../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-08', hr='12');
+load data local inpath '../../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-09', hr='11');
+load data local inpath '../../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-09', hr='12');
 
 set hive.fetch.task.convertion=more;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_cast_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_cast_1.q
index a1607320c7a2d..4d1d978f829ee 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_cast_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_cast_1.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 SELECT IF(false, 1, cast(2 as smallint)) + 3 FROM src LIMIT 1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_conversions_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_conversions_1.q
index 63dd66ebedd0c..4c4a828fe0ee2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_conversions_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_conversions_1.q
@@ -1,3 +1,4 @@
+set hive.fetch.task.conversion=more;
 
 -- casting from null should yield null
 select
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_widening.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_widening.q
index 0d36bc44fe08b..b18c01425c8c6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_widening.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/type_widening.q
@@ -1,3 +1,4 @@
+set hive.fetch.task.conversion=more;
 -- Check for int, bigint automatic type widening conversions in UDFs, UNIONS
 EXPLAIN SELECT COALESCE(0, 9223372036854775807) FROM src LIMIT 1;
 SELECT COALESCE(0, 9223372036854775807) FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_collect_set.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_collect_set.q
index 45aaa022dc8a4..04bea32101bdf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_collect_set.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_collect_set.q
@@ -1,6 +1,9 @@
 DESCRIBE FUNCTION collect_set;
 DESCRIBE FUNCTION EXTENDED collect_set;
 
+DESCRIBE FUNCTION collect_list;
+DESCRIBE FUNCTION EXTENDED collect_list;
+
 set hive.map.aggr = false;
 set hive.groupby.skewindata = false;
 
@@ -8,6 +11,10 @@ SELECT key, collect_set(value)
 FROM src
 GROUP BY key ORDER BY key limit 20;
 
+SELECT key, collect_list(value)
+FROM src
+GROUP BY key ORDER by key limit 20;
+
 set hive.map.aggr = true;
 set hive.groupby.skewindata = false;
 
@@ -15,6 +22,10 @@ SELECT key, collect_set(value)
 FROM src
 GROUP BY key ORDER BY key limit 20;
 
+SELECT key, collect_list(value)
+FROM src
+GROUP BY key ORDER BY key limit 20;
+
 set hive.map.aggr = false;
 set hive.groupby.skewindata = true;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_context_ngrams.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_context_ngrams.q
index dda7aaa5f2041..f065385688a1d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_context_ngrams.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_context_ngrams.q
@@ -1,5 +1,5 @@
 CREATE TABLE kafka (contents STRING);
-LOAD DATA LOCAL INPATH '../data/files/text-en.txt' INTO TABLE kafka;
+LOAD DATA LOCAL INPATH '../../data/files/text-en.txt' INTO TABLE kafka;
 set mapred.reduce.tasks=1;
 set hive.exec.reducers.max=1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_corr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_corr.q
index 6cc9ce2630dd1..a2edec4d64e4c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_corr.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_corr.q
@@ -2,7 +2,7 @@ DROP TABLE covar_tab;
 CREATE TABLE covar_tab (a INT, b INT, c INT)
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
 STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/covar_tab.txt' OVERWRITE
+LOAD DATA LOCAL INPATH '../../data/files/covar_tab.txt' OVERWRITE
 INTO TABLE covar_tab;
 
 DESCRIBE FUNCTION corr;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_covar_pop.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_covar_pop.q
index 0f5d5f35bf02a..a9937bae3c21c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_covar_pop.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_covar_pop.q
@@ -2,7 +2,7 @@ DROP TABLE covar_tab;
 CREATE TABLE covar_tab (a INT, b INT, c INT)
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
 STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/covar_tab.txt' OVERWRITE
+LOAD DATA LOCAL INPATH '../../data/files/covar_tab.txt' OVERWRITE
 INTO TABLE covar_tab;
 
 DESCRIBE FUNCTION covar_pop;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_covar_samp.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_covar_samp.q
index 72b9c4bd40049..2b50d8f238140 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_covar_samp.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_covar_samp.q
@@ -2,7 +2,7 @@ DROP TABLE covar_tab;
 CREATE TABLE covar_tab (a INT, b INT, c INT)
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
 STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/covar_tab.txt' OVERWRITE
+LOAD DATA LOCAL INPATH '../../data/files/covar_tab.txt' OVERWRITE
 INTO TABLE covar_tab;
 
 DESCRIBE FUNCTION covar_samp;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_ngrams.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_ngrams.q
index 31ffd29a88acd..6a2fde52e42f6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_ngrams.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_ngrams.q
@@ -1,5 +1,5 @@
 CREATE TABLE kafka (contents STRING);
-LOAD DATA LOCAL INPATH '../data/files/text-en.txt' INTO TABLE kafka;
+LOAD DATA LOCAL INPATH '../../data/files/text-en.txt' INTO TABLE kafka;
 set mapred.reduce.tasks=1;
 set hive.exec.reducers.max=1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile.q
new file mode 100644
index 0000000000000..8ebf01dcecb8c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile.q
@@ -0,0 +1 @@
+select percentile(cast(key as bigint), 0.3) from src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q
index 66c408d71bc1a..5b8ad7a08f5d3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q
@@ -1,10 +1,10 @@
 -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S)
 
 CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC)  INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket;
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket;
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket;
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket;
 
 create table t1 (result double);
 create table t2 (result double);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q
index 07bfb6e1fb2ab..1efa2951efd27 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q
@@ -2,10 +2,10 @@
 -- 0.23 changed input order of data in reducer task, which affects result of percentile_approx
 
 CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC)  INTO 4 BUCKETS STORED AS TEXTFILE;
-load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket;
-load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket;
-load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket;
-load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket;
 
 create table t1 (result double);
 create table t2 (result double);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_sum_list.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_sum_list.q
new file mode 100644
index 0000000000000..0d86a42128d25
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udaf_sum_list.q
@@ -0,0 +1,6 @@
+-- HIVE-5279
+-- GenericUDAFSumList has Converter which does not have default constructor
+-- After
+create temporary function sum_list as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSumList';
+
+select sum_list(array(key, key)) from src;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_E.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_E.q
index 113af61062b0d..41bdec08278a8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_E.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_E.q
@@ -1,14 +1,16 @@
-explain 
-select E() FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
+
+explain
+select E() FROM src tablesample (1 rows);
 
-select E() FROM src LIMIT 1;
+select E() FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION E;
 DESCRIBE FUNCTION EXTENDED E;
 explain 
-select E() FROM src LIMIT 1;
+select E() FROM src tablesample (1 rows);
 
-select E() FROM src LIMIT 1;
+select E() FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION E;
 DESCRIBE FUNCTION EXTENDED E;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_PI.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_PI.q
index 1fde7df5d251e..945483ecbfea8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_PI.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_PI.q
@@ -1,14 +1,16 @@
-explain 
-select PI() FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
+
+explain
+select PI() FROM src tablesample (1 rows);
 
-select PI() FROM src LIMIT 1;
+select PI() FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION PI;
 DESCRIBE FUNCTION EXTENDED PI;
 explain 
-select PI() FROM src LIMIT 1;
+select PI() FROM src tablesample (1 rows);
 
-select PI() FROM src LIMIT 1;
+select PI() FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION PI;
 DESCRIBE FUNCTION EXTENDED PI;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_abs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_abs.q
index f4f227d0dc168..0c06a5b6cd7f9 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_abs.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_abs.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION abs;
 DESCRIBE FUNCTION EXTENDED abs;
 
@@ -7,7 +9,7 @@ EXPLAIN SELECT
   abs(123),
   abs(-9223372036854775807),
   abs(9223372036854775807)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   abs(0),
@@ -15,16 +17,16 @@ SELECT
   abs(123),
   abs(-9223372036854775807),
   abs(9223372036854775807)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 EXPLAIN SELECT
   abs(0.0),
   abs(-3.14159265),
   abs(3.14159265)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   abs(0.0),
   abs(-3.14159265),
   abs(3.14159265)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_acos.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_acos.q
index 625a2aa5c6aa6..f9adc16931dd6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_acos.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_acos.q
@@ -1,14 +1,16 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION acos;
 DESCRIBE FUNCTION EXTENDED acos;
 
 SELECT acos(null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT acos(0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT acos(-0.5), asin(0.66)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT acos(2)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_array.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_array.q
index fca8fe8d1c00c..5a6a1830b1a51 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_array.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_array.q
@@ -1,8 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION array;
 DESCRIBE FUNCTION EXTENDED array;
 
 EXPLAIN SELECT array(), array()[1], array(1, 2, 3), array(1, 2, 3)[2], array(1,"a", 2, 3), array(1,"a", 2, 3)[2],
-array(array(1), array(2), array(3), array(4))[1][0] FROM src LIMIT 1;
+array(array(1), array(2), array(3), array(4))[1][0] FROM src tablesample (1 rows);
 
 SELECT array(), array()[1], array(1, 2, 3), array(1, 2, 3)[2], array(1,"a", 2, 3), array(1,"a", 2, 3)[2],
-array(array(1), array(2), array(3), array(4))[1][0] FROM src LIMIT 1;
+array(array(1), array(2), array(3), array(4))[1][0] FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_array_contains.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_array_contains.q
index 937bb0be03bd3..d2dad644065a2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_array_contains.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_array_contains.q
@@ -1,9 +1,11 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION array_contains;
 DESCRIBE FUNCTION EXTENDED array_contains;
 
 -- evalutes function for array of primitives
-SELECT array_contains(array(1, 2, 3), 1) FROM src LIMIT 1;
+SELECT array_contains(array(1, 2, 3), 1) FROM src tablesample (1 rows);
 
 -- evaluates function for nested arrays
 SELECT array_contains(array(array(1,2), array(2,3), array(3,4)), array(1,2))
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_ascii.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_ascii.q
index 53b389fd38fd8..3d885a2563520 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_ascii.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_ascii.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION ascii;
 DESCRIBE FUNCTION EXTENDED ascii;
 
@@ -5,10 +7,10 @@ EXPLAIN SELECT
   ascii('Facebook'),
   ascii(''),
   ascii('!')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   ascii('Facebook'),
   ascii(''),
   ascii('!')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_asin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_asin.q
index f95a5f57df8f8..73b77d10f0b69 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_asin.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_asin.q
@@ -1,14 +1,16 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION asin;
 DESCRIBE FUNCTION EXTENDED asin;
 
 SELECT asin(null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT asin(0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT asin(-0.5), asin(0.66)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT asin(2)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_atan.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_atan.q
index d4ef03deb81b0..090438cb0f0cb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_atan.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_atan.q
@@ -1,16 +1,18 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION atan;
 DESCRIBE FUNCTION EXTENDED atan;
 
 SELECT atan(null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT atan(1), atan(6), atan(-1.0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 DESCRIBE FUNCTION atan;
 DESCRIBE FUNCTION EXTENDED atan;
 
 SELECT atan(null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT atan(1), atan(6), atan(-1.0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_between.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_between.q
index eb3ccea82e631..b22ee9c3cecf1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_between.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_between.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 describe function between;
 describe function extended between;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bin.q
index 2b9ad62a39dbe..c5a7ac1a60bcd 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bin.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bin.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION bin;
 DESCRIBE FUNCTION EXTENDED bin;
 
@@ -5,7 +7,7 @@ SELECT
   bin(1),
   bin(0),
   bin(99992421)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- Negative numbers should be treated as two's complement (64 bit).
-SELECT bin(-5) FROM src LIMIT 1;
+SELECT bin(-5) FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_and.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_and.q
index 7ea50dac31d07..ed7711cd6d5de 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_and.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_and.q
@@ -1,11 +1,13 @@
-select ewah_bitmap_and(array(13,2,4,8589934592,4096,0), array(13,2,4,8589934592,4096,0)) from src limit 1;
-select ewah_bitmap_and(array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0)) from src limit 1;
+set hive.fetch.task.conversion=more;
+
+select ewah_bitmap_and(array(13,2,4,8589934592,4096,0), array(13,2,4,8589934592,4096,0)) from src tablesample (1 rows);
+select ewah_bitmap_and(array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0)) from src tablesample (1 rows);
 
 drop table bitmap_test;
 create table bitmap_test (a array<bigint>, b array<bigint>);
 
 insert overwrite table bitmap_test
-select array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0) from src limit 10;
+select array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0) from src tablesample (10 rows);
 
 select ewah_bitmap_and(a,b) from bitmap_test;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_empty.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_empty.q
index 88e961683e289..142b248cdd250 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_empty.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_empty.q
@@ -1,3 +1,5 @@
-select ewah_bitmap_empty(array(13,2,4,8589934592,0,0)) from src limit 1;
+set hive.fetch.task.conversion=more;
 
-select ewah_bitmap_empty(array(13,2,4,8589934592,4096,0)) from src limit 1;
+select ewah_bitmap_empty(array(13,2,4,8589934592,0,0)) from src tablesample (1 rows);
+
+select ewah_bitmap_empty(array(13,2,4,8589934592,4096,0)) from src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_or.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_or.q
index 0b71e681a53d5..00785b73faa05 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_or.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_bitmap_or.q
@@ -1,11 +1,13 @@
-select ewah_bitmap_or(array(13,2,4,8589934592,4096,0), array(13,2,4,8589934592,4096,0)) from src limit 1;
-select ewah_bitmap_or(array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0)) from src limit 1;
+set hive.fetch.task.conversion=more;
+
+select ewah_bitmap_or(array(13,2,4,8589934592,4096,0), array(13,2,4,8589934592,4096,0)) from src tablesample (1 rows);
+select ewah_bitmap_or(array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0)) from src tablesample (1 rows);
 
 drop table bitmap_test;
 create table bitmap_test (a array<bigint>, b array<bigint>);
 
 insert overwrite table bitmap_test
-select array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0) from src limit 10;
+select array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0) from src tablesample (10 rows);
 
 select ewah_bitmap_or(a,b) from bitmap_test;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_case.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_case.q
index 4f71e70e1f3bf..43573bfb12fd8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_case.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_case.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION case;
 DESCRIBE FUNCTION EXTENDED case;
 
@@ -27,7 +29,7 @@ SELECT CASE 1
         WHEN 22 THEN 23
         WHEN 21 THEN 24
        END
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT CASE 1
         WHEN 1 THEN 2
@@ -54,10 +56,27 @@ SELECT CASE 1
         WHEN 22 THEN 23
         WHEN 21 THEN 24
        END
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- verify that short-circuiting is working correctly for CASE
 -- we should never get to the ELSE branch, which would raise an exception
 SELECT CASE 1 WHEN 1 THEN 'yo'
 ELSE reflect('java.lang.String', 'bogus', 1) END
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
+
+-- Allow compatible types in when/return type
+SELECT CASE 1
+        WHEN 1 THEN 123.0BD
+        ELSE 0.0BD
+       END,
+       CASE 1
+        WHEN 1.0 THEN 123
+        WHEN 2 THEN 1.0
+        ELSE 222.02BD
+       END,
+       CASE 'abc'
+        WHEN cast('abc' as varchar(3)) THEN 'abcd'
+        WHEN 'efg' THEN cast('efgh' as varchar(10))
+        ELSE cast('ijkl' as char(4))
+       END
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_case_thrift.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_case_thrift.q
index 736bb053cddc7..2aa76f1f1d82e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_case_thrift.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_case_thrift.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 SELECT CASE src_thrift.lint[0]
         WHEN 0 THEN src_thrift.lint[0] + 1
@@ -14,7 +16,7 @@ SELECT CASE src_thrift.lint[0]
         WHEN '0' THEN src_thrift.lstring
         ELSE NULL
        END)[0]
-FROM src_thrift LIMIT 3;
+FROM src_thrift tablesample (3 rows);
 
 SELECT CASE src_thrift.lint[0]
         WHEN 0 THEN src_thrift.lint[0] + 1
@@ -31,4 +33,4 @@ SELECT CASE src_thrift.lint[0]
         WHEN '0' THEN src_thrift.lstring
         ELSE NULL
        END)[0]
-FROM src_thrift LIMIT 3;
+FROM src_thrift tablesample (3 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_coalesce.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_coalesce.q
index 48ca29cbc3ba6..d3c417babd466 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_coalesce.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_coalesce.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION coalesce;
 DESCRIBE FUNCTION EXTENDED coalesce;
 
@@ -20,7 +22,7 @@ SELECT COALESCE(1),
        COALESCE(NULL, 2.0, 3.0),
        COALESCE(2.0, NULL, 3.0),
        COALESCE(IF(TRUE, NULL, 0), NULL)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT COALESCE(1),
        COALESCE(1, 2),
@@ -40,7 +42,7 @@ SELECT COALESCE(1),
        COALESCE(NULL, 2.0, 3.0),
        COALESCE(2.0, NULL, 3.0),
        COALESCE(IF(TRUE, NULL, 0), NULL)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 EXPLAIN
 SELECT COALESCE(src_thrift.lint[1], 999),
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_compare_java_string.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_compare_java_string.q
index 6c12f81304e46..c7983b8eb2050 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_compare_java_string.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_compare_java_string.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 CREATE TEMPORARY FUNCTION test_udf_get_java_string AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestGetJavaString';
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_concat.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_concat.q
index f642f6a2d00df..e35a1cfa170ca 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_concat.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_concat.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION concat;
 DESCRIBE FUNCTION EXTENDED concat;
 
@@ -12,4 +14,10 @@ SELECT
   concat(1, 2),
   concat(1),
   concat('1234', 'abc', 'extra argument')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
+
+-- binary/mixed
+SELECT
+  concat(cast('ab' as binary), cast('cd' as binary)),
+  concat('ab', cast('cd' as binary))
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_concat_ws.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_concat_ws.q
index 6a0ce20dc37b7..538dfae06f20d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_concat_ws.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_concat_ws.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION concat_ws;
 DESCRIBE FUNCTION EXTENDED concat_ws;
 
@@ -24,7 +26,7 @@ SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'),
        concat_ws('_', array('www', 'face'), array('book', 'com', '1234')),
        concat_ws('**', 'www', array('face'), array('book', 'com', '1234')),
        concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')),
-       concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') FROM dest1 LIMIT 1;
+       concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') FROM dest1 tablesample (1 rows);
 
 SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'),
        concat_ws('-', 'www', array('face', 'book', 'com'), '1234'),
@@ -32,7 +34,7 @@ SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'),
        concat_ws('_', array('www', 'face'), array('book', 'com', '1234')),
        concat_ws('**', 'www', array('face'), array('book', 'com', '1234')),
        concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')),
-       concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') FROM dest1 LIMIT 1;
+       concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') FROM dest1 tablesample (1 rows);
 
 SELECT concat_ws(NULL, array('www', 'face', 'book', 'com'), '1234'),
        concat_ws(NULL, 'www', array('face', 'book', 'com'), '1234'),
@@ -40,4 +42,4 @@ SELECT concat_ws(NULL, array('www', 'face', 'book', 'com'), '1234'),
        concat_ws(NULL, array('www', 'face'), array('book', 'com', '1234')),
        concat_ws(NULL, 'www', array('face'), array('book', 'com', '1234')),
        concat_ws(NULL, array('www'), 'face', array('book', 'com', '1234')),
-       concat_ws(NULL, array('www'), array('face', 'book', 'com'), '1234') FROM dest1 LIMIT 1;
+       concat_ws(NULL, array('www'), array('face', 'book', 'com'), '1234') FROM dest1 tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_conv.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_conv.q
index 212bcfb57938f..c6d6cf8600c8c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_conv.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_conv.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION conv;
 DESCRIBE FUNCTION EXTENDED conv;
 
@@ -9,7 +11,7 @@ SELECT
   conv('22', 10, 10),
   conv('110011', 2, 16),
   conv('facebook', 36, 16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- Test negative numbers. If to_base is positive, the number should be handled
 -- as a two's complement (64-bit)
@@ -18,7 +20,7 @@ SELECT
   conv('1011', 2, -16),
   conv('-1', 10, 16),
   conv('-15', 10, 16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- Test overflow. If a number is two large, the result should be -1 (if signed)
 -- or MAX_LONG (if unsigned)
@@ -27,7 +29,7 @@ SELECT
   conv('9223372036854775807', 36, -16),
   conv('-9223372036854775807', 36, 16),
   conv('-9223372036854775807', 36, -16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- Test with invalid input. If one of the bases is invalid, the result should
 -- be NULL. If there is an invalid digit in the number, the longest valid
@@ -37,7 +39,7 @@ SELECT
   conv('131', 1, 5),
   conv('515', 5, 100),
   conv('10', -2, 2)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- Perform the same tests with number arguments.
 
@@ -45,31 +47,31 @@ SELECT
   conv(4521, 10, 36),
   conv(22, 10, 10),
   conv(110011, 2, 16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   conv(-641, 10, -10),
   conv(1011, 2, -16),
   conv(-1, 10, 16),
   conv(-15, 10, 16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   conv(9223372036854775807, 36, 16),
   conv(9223372036854775807, 36, -16),
   conv(-9223372036854775807, 36, 16),
   conv(-9223372036854775807, 36, -16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   conv(123455, 3, 10),
   conv(131, 1, 5),
   conv(515, 5, 100),
   conv('10', -2, 2)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- Make sure that state is properly reset.
 
 SELECT conv(key, 10, 16),
        conv(key, 16, 10)
-FROM src LIMIT 3;
+FROM src tablesample (3 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_cos.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_cos.q
index 7887c4c2603f1..11ef8d7d87b28 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_cos.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_cos.q
@@ -1,8 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION cos;
 DESCRIBE FUNCTION EXTENDED cos;
 
 SELECT cos(null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT cos(0.98), cos(1.57), cos(-0.5)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_current_database.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_current_database.q
new file mode 100644
index 0000000000000..4ada035d87726
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_current_database.q
@@ -0,0 +1,26 @@
+DESCRIBE FUNCTION current_database;
+
+explain
+select current_database();
+select current_database();
+
+create database xxx;
+use xxx;
+
+explain
+select current_database();
+select current_database();
+
+set hive.fetch.task.conversion=more;
+
+use default;
+
+explain
+select current_database();
+select current_database();
+
+use xxx;
+
+explain
+select current_database();
+select current_database();
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_degrees.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_degrees.q
index 014ca1c6e6afb..d5360fe3b22f7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_degrees.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_degrees.q
@@ -1,14 +1,16 @@
-explain 
-select degrees(PI()) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
+
+explain
+select degrees(PI()) FROM src tablesample (1 rows);
 
-select degrees(PI()) FROM src LIMIT 1;
+select degrees(PI()) FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION degrees;
 DESCRIBE FUNCTION EXTENDED degrees;
 explain 
-select degrees(PI()) FROM src LIMIT 1;
+select degrees(PI()) FROM src tablesample (1 rows);
 
-select degrees(PI()) FROM src LIMIT 1;
+select degrees(PI()) FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION degrees;
 DESCRIBE FUNCTION EXTENDED degrees;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_div.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_div.q
index 4229e625a042a..b0d2844d42346 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_div.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_div.q
@@ -1,4 +1,6 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION div;
 DESCRIBE FUNCTION EXTENDED div;
 
-SELECT 3 DIV 2 FROM SRC LIMIT 1;
+SELECT 3 DIV 2 FROM SRC tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_divide.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_divide.q
index dc4b2e7884d68..d36ba99118f51 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_divide.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_divide.q
@@ -1,4 +1,6 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION /;
 DESCRIBE FUNCTION EXTENDED /;
 
-SELECT 3 / 2 FROM SRC LIMIT 1;
+SELECT 3 / 2 FROM SRC tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_elt.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_elt.q
index c32340ac89feb..fae764965a9a6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_elt.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_elt.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION elt;
 DESCRIBE FUNCTION EXTENDED elt;
 
@@ -13,7 +15,7 @@ SELECT elt(2, 'abc', 'defg'),
        elt(null, 'abc', 'defg'),
        elt(0, 'abc', 'defg'),
        elt(3, 'abc', 'defg')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT elt(2, 'abc', 'defg'),
        elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'),
@@ -26,4 +28,4 @@ SELECT elt(2, 'abc', 'defg'),
        elt(null, 'abc', 'defg'),
        elt(0, 'abc', 'defg'),
        elt(3, 'abc', 'defg')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_equal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_equal.q
index f5e9a7df4be00..ea9b18bf1fad2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_equal.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_equal.q
@@ -1,12 +1,14 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION =;
 DESCRIBE FUNCTION EXTENDED =;
 
 DESCRIBE FUNCTION ==;
 DESCRIBE FUNCTION EXTENDED ==;
 
-SELECT true=false, false=true, false=false, true=true, NULL=NULL, true=NULL, NULL=true, false=NULL, NULL=false FROM src LIMIT 1;
+SELECT true=false, false=true, false=false, true=true, NULL=NULL, true=NULL, NULL=true, false=NULL, NULL=false FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION <=>;
 DESCRIBE FUNCTION EXTENDED <=>;
 
-SELECT true<=>false, false<=>true, false<=>false, true<=>true, NULL<=>NULL, true<=>NULL, NULL<=>true, false<=>NULL, NULL<=>false FROM src LIMIT 1;
+SELECT true<=>false, false<=>true, false<=>false, true<=>true, NULL<=>NULL, true<=>NULL, NULL<=>true, false<=>NULL, NULL<=>false FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_explode.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_explode.q
index 19af288ff8404..ae651644a778a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_explode.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_explode.q
@@ -1,22 +1,24 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION explode;
 DESCRIBE FUNCTION EXTENDED explode;
 
-EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3;
-EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol;
+EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src tablesample (1 rows);
+EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src tablesample (1 rows)) a GROUP BY a.myCol;
 
-SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3;
-SELECT explode(array(1,2,3)) AS (myCol) FROM src LIMIT 3;
-SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol;
+SELECT explode(array(1,2,3)) AS myCol FROM src tablesample (1 rows);
+SELECT explode(array(1,2,3)) AS (myCol) FROM src tablesample (1 rows);
+SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src tablesample (1 rows)) a GROUP BY a.myCol;
 
-EXPLAIN EXTENDED SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3;
-EXPLAIN EXTENDED SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3) a GROUP BY a.key, a.val;
+EXPLAIN EXTENDED SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src tablesample (1 rows);
+EXPLAIN EXTENDED SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src tablesample (1 rows)) a GROUP BY a.key, a.val;
 
-SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3;
-SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3) a GROUP BY a.key, a.val;
+SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src tablesample (1 rows);
+SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src tablesample (1 rows)) a GROUP BY a.key, a.val;
 
 drop table lazy_array_map;
 create table lazy_array_map (map_col map<int,string>, array_col array<string>);
-INSERT OVERWRITE TABLE lazy_array_map select map(1,'one',2,'two',3,'three'), array('100','200','300') FROM src LIMIT 1;
+INSERT OVERWRITE TABLE lazy_array_map select map(1,'one',2,'two',3,'three'), array('100','200','300') FROM src tablesample (1 rows);
 
 SELECT array_col, myCol from lazy_array_map lateral view explode(array_col) X AS myCol;
 SELECT map_col, myKey, myValue from lazy_array_map lateral view explode(map_col) X AS myKey, myValue;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_field.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_field.q
index e995f5cf3c084..be92c024d3ed2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_field.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_field.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION field;
 DESCRIBE FUNCTION EXTENDED field;
 
@@ -5,7 +7,7 @@ SELECT
   field("x", "a", "b", "c", "d"),
   field(NULL, "a", "b", "c", "d"),
   field(0, 1, 2, 3, 4)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   field("a", "a", "b", "c", "d"),
@@ -13,7 +15,7 @@ SELECT
   field("c", "a", "b", "c", "d"),
   field("d", "a", "b", "c", "d"),
   field("d", "a", "b", NULL, "d")
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   field(1, 1, 2, 3, 4),
@@ -21,11 +23,11 @@ SELECT
   field(3, 1, 2, 3, 4),
   field(4, 1, 2, 3, 4),
   field(4, 1, 2, NULL, 4)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 
 CREATE TABLE test_table(col1 STRING, col2 STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE test_table;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE test_table;
 
 select col1,col2,
   field("66",col1),
@@ -42,7 +44,7 @@ from test_table where col1="86" or col1="66";
 
 
 CREATE TABLE test_table1(col1 int, col2 string) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE test_table1;
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE test_table1;
 
 select col1,col2,
   field(66,col1),
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_find_in_set.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_find_in_set.q
index eac2c6e91c3d1..72c65b419808e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_find_in_set.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_find_in_set.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION find_in_set;
 DESCRIBE FUNCTION EXTENDED find_in_set; 
 
@@ -6,18 +8,18 @@ FROM src1 SELECT find_in_set(src1.key,concat(src1.key,',',src1.value));
 
 FROM src1 SELECT find_in_set(src1.key,concat(src1.key,',',src1.value));
 
-SELECT find_in_set('ab','ab,abc,abcde') FROM src1 LIMIT 1;
-SELECT find_in_set('ab','abc,ab,bbb') FROM src1 LIMIT 1;
-SELECT find_in_set('ab','def,abc,ab') FROM src1 LIMIT 1;
-SELECT find_in_set('ab','abc,abd,abf') FROM src1 LIMIT 1;
-SELECT find_in_set(null,'a,b,c') FROM src1 LIMIT 1;
-SELECT find_in_set('a',null) FROM src1 LIMIT 1;
-SELECT find_in_set('', '') FROM src1 LIMIT 1;
-SELECT find_in_set('',',') FROM src1 LIMIT 1;
-SELECT find_in_set('','a,,b') FROM src1 LIMIT 1;
-SELECT find_in_set('','a,b,') FROM src1 LIMIT 1;
-SELECT find_in_set(',','a,b,d,') FROM src1 LIMIT 1;
-SELECT find_in_set('a','') FROM src1 LIMIT 1;
-SELECT find_in_set('a,','a,b,c,d') FROM src1 LIMIT 1;
+SELECT find_in_set('ab','ab,abc,abcde') FROM src1 tablesample (1 rows);
+SELECT find_in_set('ab','abc,ab,bbb') FROM src1 tablesample (1 rows);
+SELECT find_in_set('ab','def,abc,ab') FROM src1 tablesample (1 rows);
+SELECT find_in_set('ab','abc,abd,abf') FROM src1 tablesample (1 rows);
+SELECT find_in_set(null,'a,b,c') FROM src1 tablesample (1 rows);
+SELECT find_in_set('a',null) FROM src1 tablesample (1 rows);
+SELECT find_in_set('', '') FROM src1 tablesample (1 rows);
+SELECT find_in_set('',',') FROM src1 tablesample (1 rows);
+SELECT find_in_set('','a,,b') FROM src1 tablesample (1 rows);
+SELECT find_in_set('','a,b,') FROM src1 tablesample (1 rows);
+SELECT find_in_set(',','a,b,d,') FROM src1 tablesample (1 rows);
+SELECT find_in_set('a','') FROM src1 tablesample (1 rows);
+SELECT find_in_set('a,','a,b,c,d') FROM src1 tablesample (1 rows);
 
 SELECT * FROM src1 WHERE NOT find_in_set(key,'311,128,345,2,956')=0;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_format_number.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_format_number.q
index e2084cddf0ec7..2504bd0b68306 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_format_number.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_format_number.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 use default;
 -- Test format_number() UDF
 
@@ -7,26 +9,26 @@ DESCRIBE FUNCTION EXTENDED format_number;
 EXPLAIN
 SELECT format_number(12332.123456, 4),
     format_number(12332.1,4),
-    format_number(12332.2,0) FROM src limit 1;
+    format_number(12332.2,0) FROM src tablesample (1 rows);
 
 SELECT format_number(12332.123456, 4),
     format_number(12332.1,4),
     format_number(12332.2,0)
-FROM src limit 1;
+FROM src tablesample (1 rows);
 
 -- positive numbers
 SELECT format_number(0.123456789, 12),
     format_number(12345678.123456789, 5),
     format_number(1234567.123456789, 7),
     format_number(123456.123456789, 0)
-FROM src limit 1;
+FROM src tablesample (1 rows);
 
 -- negative numbers
 SELECT format_number(-123456.123456789, 0),
     format_number(-1234567.123456789, 2),
     format_number(-0.123456789, 15),
     format_number(-12345.123456789, 4)
-FROM src limit 1;
+FROM src tablesample (1 rows);
 
 -- zeros
 SELECT format_number(0.0, 4),
@@ -34,7 +36,7 @@ SELECT format_number(0.0, 4),
     format_number(000.0000, 1),
     format_number(00000.0000, 1),
     format_number(-00.0, 4)
-FROM src limit 1;
+FROM src tablesample (1 rows);
 
 -- integers
 SELECT format_number(0, 0),
@@ -42,7 +44,7 @@ SELECT format_number(0, 0),
     format_number(12, 2),
     format_number(123, 5),
     format_number(1234, 7)
-FROM src limit 1;
+FROM src tablesample (1 rows);
 
 -- long and double boundary
 -- 9223372036854775807 is LONG_MAX
@@ -54,4 +56,4 @@ SELECT format_number(-9223372036854775807, 10),
     format_number(9223372036854775807, 20),
     format_number(4.9E-324, 324),
     format_number(1.7976931348623157E308, 308)
-FROM src limit 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_get_json_object.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_get_json_object.q
index 464f2df3dcd7a..05f7f5a9811bc 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_get_json_object.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_get_json_object.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION get_json_object;
 DESCRIBE FUNCTION EXTENDED get_json_object;
 
@@ -5,6 +7,8 @@ CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE;
 
 FROM src INSERT OVERWRITE TABLE dest1 SELECT '  abc  ' WHERE src.key = 86;
 
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 SELECT get_json_object(src_json.json, '$.owner') FROM src_json;
 
@@ -33,8 +37,8 @@ SELECT get_json_object(src_json.json, '$.fb:testid') FROM src_json;
 
 CREATE TABLE dest2(c1 STRING) STORED AS RCFILE;
 
-INSERT OVERWRITE TABLE dest2 SELECT '{"a":"b\nc"}' FROM src LIMIT 1;
+INSERT OVERWRITE TABLE dest2 SELECT '{"a":"b\nc"}' FROM src tablesample (1 rows);
 
 SELECT * FROM dest2;
 
-SELECT get_json_object(c1, '$.a') FROM dest2;
\ No newline at end of file
+SELECT get_json_object(c1, '$.a') FROM dest2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_greaterthan.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_greaterthan.q
index aea110a9431cf..230bd244ffbc7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_greaterthan.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_greaterthan.q
@@ -1,4 +1,6 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION >;
 DESCRIBE FUNCTION EXTENDED >;
 
-SELECT true>false, false>true, false>false, true>true FROM src LIMIT 1;
\ No newline at end of file
+SELECT true>false, false>true, false>false, true>true FROM src tablesample (1 rows);
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_greaterthanorequal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_greaterthanorequal.q
index 8de165b9ea9ed..025eed7dd5582 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_greaterthanorequal.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_greaterthanorequal.q
@@ -1,4 +1,6 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION >=;
 DESCRIBE FUNCTION EXTENDED >=;
 
-SELECT true>=false, false>=true, false>=false, true>=true FROM src LIMIT 1;
\ No newline at end of file
+SELECT true>=false, false>=true, false>=false, true>=true FROM src tablesample (1 rows);
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hash.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hash.q
index faf372218a107..5814a1779bd16 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hash.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hash.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION hash;
 DESCRIBE FUNCTION EXTENDED hash;
 
@@ -7,11 +9,11 @@ SELECT hash(CAST(1 AS TINYINT)), hash(CAST(2 AS SMALLINT)),
        hash(CAST(1.25 AS FLOAT)), hash(CAST(16.0 AS DOUBLE)),
        hash('400'), hash('abc'), hash(TRUE), hash(FALSE),
        hash(1, 2, 3)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT hash(CAST(1 AS TINYINT)), hash(CAST(2 AS SMALLINT)),
        hash(3), hash(CAST('123456789012' AS BIGINT)),
        hash(CAST(1.25 AS FLOAT)), hash(CAST(16.0 AS DOUBLE)),
        hash('400'), hash('abc'), hash(TRUE), hash(FALSE),
        hash(1, 2, 3)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hex.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hex.q
index 37e035ad42b00..0e5457965a71e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hex.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hex.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION hex;
 DESCRIBE FUNCTION EXTENDED hex;
 
@@ -7,14 +9,14 @@ SELECT
   hex('Facebook'),
   hex('\0'),
   hex('qwertyuiopasdfghjkl')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- If the argument is a number, hex should convert it to hexadecimal.
 SELECT
   hex(1),
   hex(0),
   hex(4207849477)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- Negative numbers should be treated as two's complement (64 bit).
-SELECT hex(-5) FROM src LIMIT 1;
+SELECT hex(-5) FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hour.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hour.q
index c5c366daa8c83..b9811e6c6f652 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hour.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_hour.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION hour;
 DESCRIBE FUNCTION EXTENDED hour;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_if.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_if.q
index 4f7c8b4a36bad..d9285ff7ce19d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_if.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_if.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION if;
 DESCRIBE FUNCTION EXTENDED if;
 
@@ -8,7 +10,7 @@ SELECT IF(TRUE, 1, 2) AS COL1,
        IF(2=2, 1, NULL) AS COL4,
        IF(2=2, NULL, 1) AS COL5,
        IF(IF(TRUE, NULL, FALSE), 1, 2) AS COL6
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 
 SELECT IF(TRUE, 1, 2) AS COL1,
@@ -17,7 +19,7 @@ SELECT IF(TRUE, 1, 2) AS COL1,
        IF(2=2, 1, NULL) AS COL4,
        IF(2=2, NULL, 1) AS COL5,
        IF(IF(TRUE, NULL, FALSE), 1, 2) AS COL6
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- Type conversions
 EXPLAIN
@@ -25,10 +27,10 @@ SELECT IF(TRUE, CAST(128 AS SMALLINT), CAST(1 AS TINYINT)) AS COL1,
        IF(FALSE, 1, 1.1) AS COL2,
        IF(FALSE, 1, 'ABC') AS COL3,
        IF(FALSE, 'ABC', 12.3) AS COL4
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT IF(TRUE, CAST(128 AS SMALLINT), CAST(1 AS TINYINT)) AS COL1,
        IF(FALSE, 1, 1.1) AS COL2,
        IF(FALSE, 1, 'ABC') AS COL3,
        IF(FALSE, 'ABC', 12.3) AS COL4
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_in.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_in.q
index 75778138742d5..a7ce3c6f0bdbb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_in.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_in.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 SELECT 1 IN (1, 2, 3),
        4 IN (1, 2, 3),
        array(1,2,3) IN (array(1,2,3)),
@@ -8,6 +10,6 @@ SELECT 1 IN (1, 2, 3),
        1 IN (1, 2, 3) OR false IN(false),
        NULL IN (1, 2, 3),
        4 IN (1, 2, 3, NULL),
-       (1+3) IN (5, 6, (1+2) + 1) FROM src LIMIT 1;
+       (1+3) IN (5, 6, (1+2) + 1) FROM src tablesample (1 rows);
 
 SELECT key FROM src WHERE key IN ("238", 86);
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_in_file.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_in_file.q
index 4da478908ddbd..9d9efe8e23d6e 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_in_file.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_in_file.q
@@ -1,12 +1,12 @@
 DESCRIBE FUNCTION in_file;
 
 EXPLAIN
-SELECT in_file("303", "../data/files/test2.dat"),
-       in_file("304", "../data/files/test2.dat"),
-       in_file(CAST(NULL AS STRING), "../data/files/test2.dat")
+SELECT in_file("303", "../../data/files/test2.dat"),
+       in_file("304", "../../data/files/test2.dat"),
+       in_file(CAST(NULL AS STRING), "../../data/files/test2.dat")
 FROM src LIMIT 1;
 
-SELECT in_file("303", "../data/files/test2.dat"),
-       in_file("304", "../data/files/test2.dat"),
-       in_file(CAST(NULL AS STRING), "../data/files/test2.dat")
+SELECT in_file("303", "../../data/files/test2.dat"),
+       in_file("304", "../../data/files/test2.dat"),
+       in_file(CAST(NULL AS STRING), "../../data/files/test2.dat")
 FROM src LIMIT 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_index.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_index.q
index 9079d0e5743f6..6844f9d0bfc6c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_index.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_index.q
@@ -1,2 +1,3 @@
+set hive.support.quoted.identifiers=none;
 DESCRIBE FUNCTION `index`;
 DESCRIBE FUNCTION EXTENDED `index`;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_inline.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_inline.q
index 39006f4b9b117..95d55f71c11cd 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_inline.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_inline.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 describe function inline;
 
 explain SELECT inline( 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_instr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_instr.q
index 20ed8e4ea0176..790a1049d1777 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_instr.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_instr.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION instr;
 DESCRIBE FUNCTION EXTENDED instr;
 
@@ -15,7 +17,7 @@ SELECT instr('abcd', 'abc'),
        instr(CAST(16.0 AS DOUBLE), '.0'),
        instr(null, 'abc'),
        instr('abcd', null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT instr('abcd', 'abc'),
        instr('abcabc', 'ccc'),
@@ -30,4 +32,4 @@ SELECT instr('abcd', 'abc'),
        instr(CAST(16.0 AS DOUBLE), '.0'),
        instr(null, 'abc'),
        instr('abcd', null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_isnull_isnotnull.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_isnull_isnotnull.q
index d1569cc7f6dbb..efb834efdc64f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_isnull_isnotnull.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_isnull_isnotnull.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION isnull;
 DESCRIBE FUNCTION EXTENDED isnull;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_java_method.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_java_method.q
index 2f28be1e9d0e1..51280b2567cca 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_java_method.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_java_method.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION java_method;
 DESCRIBE FUNCTION EXTENDED java_method;
 
@@ -11,7 +13,7 @@ SELECT java_method("java.lang.String", "valueOf", 1),
        java_method("java.lang.Math", "round", 2.5),
        java_method("java.lang.Math", "exp", 1.0),
        java_method("java.lang.Math", "floor", 1.9)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 
 SELECT java_method("java.lang.String", "valueOf", 1),
@@ -21,5 +23,5 @@ SELECT java_method("java.lang.String", "valueOf", 1),
        java_method("java.lang.Math", "round", 2.5),
        java_method("java.lang.Math", "exp", 1.0),
        java_method("java.lang.Math", "floor", 1.9)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_length.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_length.q
index b84307970d630..4413751ae647b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_length.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_length.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION length;
 DESCRIBE FUNCTION EXTENDED length;
 
@@ -9,6 +11,6 @@ DROP TABLE dest1;
 
 -- Test with non-ascii characters. 
 CREATE TABLE dest1(name STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv4.txt' INTO TABLE dest1;
+LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1;
 EXPLAIN SELECT length(dest1.name) FROM dest1;
 SELECT length(dest1.name) FROM dest1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lessthan.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lessthan.q
index a2577006a885d..03326777bfa9f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lessthan.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lessthan.q
@@ -1,4 +1,6 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION <;
 DESCRIBE FUNCTION EXTENDED <;
 
-SELECT true<false, false<true, false<false, true<true FROM src LIMIT 1;
+SELECT true<false, false<true, false<false, true<true FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lessthanorequal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lessthanorequal.q
index e741b12e72305..ae7109fb92b28 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lessthanorequal.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lessthanorequal.q
@@ -1,4 +1,6 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION <=;
 DESCRIBE FUNCTION EXTENDED <=;
 
-SELECT true<=false, false<=true, false<=false, true<=true FROM src LIMIT 1;
+SELECT true<=false, false<=true, false<=false, true<=true FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_like.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_like.q
index 02c2924fca35b..12983bdceb69b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_like.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_like.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION like;
 DESCRIBE FUNCTION EXTENDED like;
 
@@ -18,4 +20,4 @@ SELECT '1+2' LIKE '_+_',
        '112' LIKE '1+_',
        '|||' LIKE '|_|', 
        '+++' LIKE '1+_' 
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_locate.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_locate.q
index 80148d0923306..68216d597da57 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_locate.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_locate.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION locate;
 DESCRIBE FUNCTION EXTENDED locate;
 
@@ -19,7 +21,7 @@ SELECT locate('abc', 'abcd'),
        locate('abc', null),
        locate('abc', 'abcd', null),
        locate('abc', 'abcd', 'invalid number')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT locate('abc', 'abcd'),
        locate('ccc', 'abcabc'),
@@ -38,4 +40,4 @@ SELECT locate('abc', 'abcd'),
        locate('abc', null),
        locate('abc', 'abcd', null),
        locate('abc', 'abcd', 'invalid number')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_logic_java_boolean.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_logic_java_boolean.q
index 508f9fd5582e1..a4aa6bc8e861c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_logic_java_boolean.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_logic_java_boolean.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 CREATE TEMPORARY FUNCTION test_udf_get_java_boolean AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestGetJavaBoolean';
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lpad.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lpad.q
index 8879231aeeb95..937c92a29ece3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lpad.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_lpad.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION lpad;
 DESCRIBE FUNCTION EXTENDED lpad;
 
@@ -5,10 +7,10 @@ EXPLAIN SELECT
   lpad('hi', 1, '?'),
   lpad('hi', 5, '.'),
   lpad('hi', 6, '123')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   lpad('hi', 1, '?'),
   lpad('hi', 5, '.'),
   lpad('hi', 6, '123')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map.q
index e975131bd5599..e1923b9ae0973 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map.q
@@ -1,8 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION map;
 DESCRIBE FUNCTION EXTENDED map;
 
 EXPLAIN SELECT map(), map(1, "a", 2, "b", 3, "c"), map(1, 2, "a", "b"), 
-map(1, "a", 2, "b", 3, "c")[2],  map(1, 2, "a", "b")["a"], map(1, array("a"))[1][0] FROM src LIMIT 1;
+map(1, "a", 2, "b", 3, "c")[2],  map(1, 2, "a", "b")["a"], map(1, array("a"))[1][0] FROM src tablesample (1 rows);
 
 SELECT map(), map(1, "a", 2, "b", 3, "c"), map(1, 2, "a", "b"), 
-map(1, "a", 2, "b", 3, "c")[2],  map(1, 2, "a", "b")["a"], map(1, array("a"))[1][0] FROM src LIMIT 1;
+map(1, "a", 2, "b", 3, "c")[2],  map(1, 2, "a", "b")["a"], map(1, array("a"))[1][0] FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map_keys.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map_keys.q
index 7ae8d78823ce9..78778e73cd834 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map_keys.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map_keys.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 use default;
 -- Test map_keys() UDF
 
@@ -5,7 +7,7 @@ DESCRIBE FUNCTION map_keys;
 DESCRIBE FUNCTION EXTENDED map_keys;
 
 -- Evaluate function against INT valued keys
-SELECT map_keys(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1;
+SELECT map_keys(map(1, "a", 2, "b", 3, "c")) FROM src tablesample (1 rows);
 
 -- Evaluate function against STRING valued keys
-SELECT map_keys(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1;
+SELECT map_keys(map("a", 1, "b", 2, "c", 3)) FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map_values.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map_values.q
index e25b9bc669759..4b55873fb9714 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map_values.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_map_values.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 use default;
 -- Test map_values() UDF
 
@@ -5,7 +7,7 @@ DESCRIBE FUNCTION map_values;
 DESCRIBE FUNCTION EXTENDED map_values;
 
 -- Evaluate function against STRING valued values
-SELECT map_values(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1;
+SELECT map_values(map(1, "a", 2, "b", 3, "c")) FROM src tablesample (1 rows);
 
 -- Evaluate function against INT valued keys
-SELECT map_values(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1;
+SELECT map_values(map("a", 1, "b", 2, "c", 3)) FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_named_struct.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_named_struct.q
index bbf0f67d81408..ad6fd7a021fc7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_named_struct.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_named_struct.q
@@ -1,9 +1,11 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION named_struct;
 DESCRIBE FUNCTION EXTENDED named_struct;
 
 EXPLAIN
 SELECT named_struct("foo", 1, "bar", 2),
-       named_struct("foo", 1, "bar", 2).foo FROM src LIMIT 1;
+       named_struct("foo", 1, "bar", 2).foo FROM src tablesample (1 rows);
 
 SELECT named_struct("foo", 1, "bar", 2),
-       named_struct("foo", 1, "bar", 2).foo FROM src LIMIT 1;
+       named_struct("foo", 1, "bar", 2).foo FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_negative.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_negative.q
index 6c06938214077..b038c8cad57f2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_negative.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_negative.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION negative;
 DESCRIBE FUNCTION EXTENDED negative;
 
@@ -5,9 +7,9 @@ DESCRIBE FUNCTION EXTENDED negative;
 DESCRIBE FUNCTION -;
 DESCRIBE FUNCTION EXTENDED -;
 
-select - null from src limit 1;
-select - cast(null as int) from src limit 1;
-select - cast(null as smallint) from src limit 1;
-select - cast(null as bigint) from src limit 1;
-select - cast(null as double) from src limit 1;
-select - cast(null as float) from src limit 1;
+select - null from src tablesample (1 rows);
+select - cast(null as int) from src tablesample (1 rows);
+select - cast(null as smallint) from src tablesample (1 rows);
+select - cast(null as bigint) from src tablesample (1 rows);
+select - cast(null as double) from src tablesample (1 rows);
+select - cast(null as float) from src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_notequal.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_notequal.q
index e1a15098db369..138110f6edd64 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_notequal.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_notequal.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION <>;
 DESCRIBE FUNCTION EXTENDED <>;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_notop.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_notop.q
index 88396545de933..dceab7edaa1fa 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_notop.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_notop.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 SELECT 1 NOT IN (1, 2, 3),
        4 NOT IN (1, 2, 3),
        1 = 2 NOT IN (true, false),
@@ -7,4 +9,4 @@ SELECT 1 NOT IN (1, 2, 3),
        "abc" NOT RLIKE "^bc",
        "abc" NOT REGEXP "^ab",
        "abc" NOT REGEXP "^bc",
-       1 IN (1, 2) AND "abc" NOT LIKE "bc%" FROM src LIMIT 1;
+       1 IN (1, 2) AND "abc" NOT LIKE "bc%" FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_nvl.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_nvl.q
index 0133b4b90bcbc..97162576df167 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_nvl.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_nvl.q
@@ -1,3 +1,4 @@
+set hive.fetch.task.conversion=more;
 
 DESCRIBE FUNCTION nvl;
 DESCRIBE FUNCTION EXTENDED nvl;
@@ -5,9 +6,9 @@ DESCRIBE FUNCTION EXTENDED nvl;
 EXPLAIN
 SELECT NVL( 1 , 2 ) AS COL1,
        NVL( NULL, 5 ) AS COL2
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT NVL( 1 , 2 ) AS COL1,
        NVL( NULL, 5 ) AS COL2
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_pmod.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_pmod.q
index 9ff73d42b41e6..d42a2f337ebca 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_pmod.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_pmod.q
@@ -1,20 +1,22 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION pmod;
 DESCRIBE FUNCTION EXTENDED pmod;
 
 SELECT pmod(null, null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT pmod(-100,9), pmod(-50,101), pmod(-1000,29)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT pmod(100,19), pmod(50,125), pmod(300,15)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
-SELECT pmod(CAST(-100 AS TINYINT),CAST(9 AS TINYINT)), pmod(CAST(-50 AS TINYINT),CAST(101 AS TINYINT)), pmod(CAST(-100 AS TINYINT),CAST(29 AS TINYINT)) FROM src LIMIT 1;
-SELECT pmod(CAST(-100 AS SMALLINT),CAST(9 AS SMALLINT)), pmod(CAST(-50 AS SMALLINT),CAST(101 AS SMALLINT)), pmod(CAST(-100 AS SMALLINT),CAST(29 AS SMALLINT)) FROM src LIMIT 1;
-SELECT pmod(CAST(-100 AS BIGINT),CAST(9 AS BIGINT)), pmod(CAST(-50 AS BIGINT),CAST(101 AS BIGINT)), pmod(CAST(-100 AS BIGINT),CAST(29 AS BIGINT)) FROM src LIMIT 1;
+SELECT pmod(CAST(-100 AS TINYINT),CAST(9 AS TINYINT)), pmod(CAST(-50 AS TINYINT),CAST(101 AS TINYINT)), pmod(CAST(-100 AS TINYINT),CAST(29 AS TINYINT)) FROM src tablesample (1 rows);
+SELECT pmod(CAST(-100 AS SMALLINT),CAST(9 AS SMALLINT)), pmod(CAST(-50 AS SMALLINT),CAST(101 AS SMALLINT)), pmod(CAST(-100 AS SMALLINT),CAST(29 AS SMALLINT)) FROM src tablesample (1 rows);
+SELECT pmod(CAST(-100 AS BIGINT),CAST(9 AS BIGINT)), pmod(CAST(-50 AS BIGINT),CAST(101 AS BIGINT)), pmod(CAST(-100 AS BIGINT),CAST(29 AS BIGINT)) FROM src tablesample (1 rows);
 
-SELECT pmod(CAST(-100.91 AS FLOAT),CAST(9.8 AS FLOAT)), pmod(CAST(-50.1 AS FLOAT),CAST(101.8 AS FLOAT)), pmod(CAST(-100.91 AS FLOAT),CAST(29.75 AS FLOAT)) FROM src LIMIT 1;
-SELECT pmod(CAST(-100.91 AS DOUBLE),CAST(9.8 AS DOUBLE)), pmod(CAST(-50.1 AS DOUBLE),CAST(101.8 AS DOUBLE)), pmod(CAST(-100.91 AS DOUBLE),CAST(29.75 AS DOUBLE)) FROM src LIMIT 1;
-SELECT pmod(CAST(-100.91 AS DECIMAL),CAST(9.8 AS DECIMAL)), pmod(CAST(-50.1 AS DECIMAL),CAST(101.8 AS DECIMAL)), pmod(CAST(-100.91 AS DECIMAL),CAST(29.75 AS DECIMAL)) FROM src LIMIT 1;
+SELECT pmod(CAST(-100.91 AS FLOAT),CAST(9.8 AS FLOAT)), pmod(CAST(-50.1 AS FLOAT),CAST(101.8 AS FLOAT)), pmod(CAST(-100.91 AS FLOAT),CAST(29.75 AS FLOAT)) FROM src tablesample (1 rows);
+SELECT pmod(CAST(-100.91 AS DOUBLE),CAST(9.8 AS DOUBLE)), pmod(CAST(-50.1 AS DOUBLE),CAST(101.8 AS DOUBLE)), pmod(CAST(-100.91 AS DOUBLE),CAST(29.75 AS DOUBLE)) FROM src tablesample (1 rows);
+SELECT pmod(CAST(-100.91 AS DECIMAL(5,2)),CAST(9.8 AS DECIMAL(2,1))), pmod(CAST(-50.1 AS DECIMAL(3,1)),CAST(101.8 AS DECIMAL(4,1))), pmod(CAST(-100.91 AS DECIMAL(5,2)),CAST(29.75 AS DECIMAL(4,2))) FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_printf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_printf.q
index 99e89ccbf773a..115e4e56f1806 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_printf.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_printf.q
@@ -4,21 +4,23 @@ use default;
 DESCRIBE FUNCTION printf;
 DESCRIBE FUNCTION EXTENDED printf;
 
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
-SELECT printf("Hello World %d %s", 100, "days") FROM src LIMIT 1;
+SELECT printf("Hello World %d %s", 100, "days") FROM src tablesample (1 rows);
 
 -- Test Primitive Types
-SELECT printf("Hello World %d %s", 100, "days") FROM src LIMIT 1;
-SELECT printf("All Type Test: %b, %c, %d, %e, %+10.4f, %g, %h, %s, %a", false, 65, 15000, 12.3400, 27183.240051, 2300.41, 50, "corret", 256.125) FROM src LIMIT 1;
+SELECT printf("Hello World %d %s", 100, "days") FROM src tablesample (1 rows);
+SELECT printf("All Type Test: %b, %c, %d, %e, %+10.4f, %g, %h, %s, %a", false, 65, 15000, 12.3400, 27183.240051, 2300.41, 50, "corret", 256.125) FROM src tablesample (1 rows);
 
 -- Test NULL Values
-SELECT printf("Color %s, String Null: %s, number1 %d, number2 %05d, Integer Null: %d, hex %#x, float %5.2f Double Null: %f\n", "red", NULL, 123456, 89, NULL, 255, 3.14159, NULL) FROM src LIMIT 1;
+SELECT printf("Color %s, String Null: %s, number1 %d, number2 %05d, Integer Null: %d, hex %#x, float %5.2f Double Null: %f\n", "red", NULL, 123456, 89, NULL, 255, 3.14159, NULL) FROM src tablesample (1 rows);
 
 -- Test Timestamp
 create table timestamp_udf (t timestamp);
-from src
+from (select * from src tablesample (1 rows)) s
   insert overwrite table timestamp_udf
-    select '2011-05-06 07:08:09.1234567' limit 1;
+    select '2011-05-06 07:08:09.1234567';
 select printf("timestamp: %s", t) from timestamp_udf;
 drop table timestamp_udf;
 
@@ -27,7 +29,7 @@ CREATE TABLE binay_udf(key binary, value int)
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY '9'
 STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/string.txt' INTO TABLE binay_udf;
+LOAD DATA LOCAL INPATH '../../data/files/string.txt' INTO TABLE binay_udf;
 create table dest1 (key binary, value int);
 insert overwrite table dest1 select transform(*) using 'cat' as key binary, value int from binay_udf;
 select value, printf("format key: %s", key) from dest1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_radians.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_radians.q
index 001d1cf8b6464..19242bd757b41 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_radians.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_radians.q
@@ -1,16 +1,18 @@
-explain 
-select radians(57.2958) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
+
+explain
+select radians(57.2958) FROM src tablesample (1 rows);
 
-select radians(57.2958) FROM src LIMIT 1;
-select radians(143.2394) FROM src LIMIT 1;
+select radians(57.2958) FROM src tablesample (1 rows);
+select radians(143.2394) FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION radians;
 DESCRIBE FUNCTION EXTENDED radians;
 explain 
-select radians(57.2958) FROM src LIMIT 1;
+select radians(57.2958) FROM src tablesample (1 rows);
 
-select radians(57.2958) FROM src LIMIT 1;
-select radians(143.2394) FROM src LIMIT 1;
+select radians(57.2958) FROM src tablesample (1 rows);
+select radians(143.2394) FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION radians;
 DESCRIBE FUNCTION EXTENDED radians;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reflect.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reflect.q
index f357ff5049564..cef1e4a5d9195 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reflect.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reflect.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION reflect;
 DESCRIBE FUNCTION EXTENDED reflect;
 
@@ -10,7 +12,7 @@ SELECT reflect("java.lang.String", "valueOf", 1),
        reflect("java.lang.Math", "exp", 1.0),
        reflect("java.lang.Math", "floor", 1.9),
        reflect("java.lang.Integer", "valueOf", key, 16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 
 SELECT reflect("java.lang.String", "valueOf", 1),
@@ -21,4 +23,4 @@ SELECT reflect("java.lang.String", "valueOf", 1),
        reflect("java.lang.Math", "exp", 1.0),
        reflect("java.lang.Math", "floor", 1.9),
        reflect("java.lang.Integer", "valueOf", key, 16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reflect2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reflect2.q
index 9ffd755292f66..a65294b335844 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reflect2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reflect2.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION reflect2;
 DESCRIBE FUNCTION EXTENDED reflect2;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_regexp.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_regexp.q
index 3aee10963d747..12b685b32c69c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_regexp.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_regexp.q
@@ -1,6 +1,8 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION regexp;
 DESCRIBE FUNCTION EXTENDED regexp;
 
 SELECT 'fofo' REGEXP '^fo', 'fo\no' REGEXP '^fo\no$', 'Bn' REGEXP '^Ba*n', 'afofo' REGEXP 'fo',
 'afofo' REGEXP '^fo', 'Baan' REGEXP '^Ba?n', 'axe' REGEXP 'pi|apa', 'pip' REGEXP '^(pi)*$'
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_repeat.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_repeat.q
index 162085f4c71c8..91474bac2a16b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_repeat.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_repeat.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION repeat;
 DESCRIBE FUNCTION EXTENDED repeat;
 
@@ -6,11 +8,11 @@ EXPLAIN SELECT
   repeat("", 4),
   repeat("asd", 0),
   repeat("asdf", -1)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   repeat("Facebook", 3),
   repeat("", 4),
   repeat("asd", 0),
   repeat("asdf", -1)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reverse.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reverse.q
index 81f765ec5937c..89aafe3443eeb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reverse.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_reverse.q
@@ -11,5 +11,5 @@ DROP TABLE dest1;
 -- kv4.txt contains the text 0xE982B5E993AE, which should be reversed to
 -- 0xE993AEE982B5
 CREATE TABLE dest1(name STRING) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH '../data/files/kv4.txt' INTO TABLE dest1;
+LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1;
 SELECT count(1) FROM dest1 WHERE reverse(dest1.name) = _UTF-8 0xE993AEE982B5;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round.q
index 18ebba8708493..88b22749a3095 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round.q
@@ -1,15 +1,17 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION round;
 DESCRIBE FUNCTION EXTENDED round;
 
 SELECT round(null), round(null, 0), round(125, null), 
 round(1.0/0.0, 0), round(power(-1.0,0.5), 0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   round(55555), round(55555, 0), round(55555, 1), round(55555, 2), round(55555, 3),
   round(55555, -1), round(55555, -2), round(55555, -3), round(55555, -4),
   round(55555, -5), round(55555, -6), round(55555, -7), round(55555, -8)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   round(125.315), round(125.315, 0),
@@ -18,7 +20,7 @@ SELECT
   round(-125.315), round(-125.315, 0),
   round(-125.315, 1), round(-125.315, 2), round(-125.315, 3), round(-125.315, 4),
   round(-125.315, -1), round(-125.315, -2), round(-125.315, -3), round(-125.315, -4)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   round(3.141592653589793, -15), round(3.141592653589793, -16),
@@ -38,7 +40,7 @@ SELECT
   round(3.141592653589793, 12), round(3.141592653589793, 13),
   round(3.141592653589793, 13), round(3.141592653589793, 14),
   round(3.141592653589793, 15), round(3.141592653589793, 16)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
-SELECT round(1809242.3151111344, 9), round(-1809242.3151111344, 9)
-FROM src LIMIT 1;
+SELECT round(1809242.3151111344, 9), round(-1809242.3151111344, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9)
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round_2.q
index 6be30855aea1b..43988c1225cd8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round_2.q
@@ -1,8 +1,10 @@
--- test for NaN (not-a-number) 
+set hive.fetch.task.conversion=more;
+
+-- test for NaN (not-a-number)
 create table tstTbl1(n double);
 
 insert overwrite table tstTbl1
-select 'NaN' from src limit 1;
+select 'NaN' from src tablesample (1 rows);
 
 select * from tstTbl1;
 
@@ -10,4 +12,4 @@ select round(n, 1) from tstTbl1;
 select round(n) from tstTbl1;
 
 -- test for Infinity
-select round(1/0), round(1/0, 2), round(1.0/0.0), round(1.0/0.0, 2) from src limit 1;
+select round(1/0), round(1/0, 2), round(1.0/0.0), round(1.0/0.0, 2) from src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round_3.q
index 50a1f44419047..f042b6f3fa9c7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_round_3.q
@@ -1,14 +1,16 @@
+set hive.fetch.task.conversion=more;
+
 -- test for TINYINT
-select round(-128), round(127), round(0) from src limit 1;
+select round(-128), round(127), round(0) from src tablesample (1 rows);
 
 -- test for SMALLINT
-select round(-32768), round(32767), round(-129), round(128) from src limit 1;
+select round(-32768), round(32767), round(-129), round(128) from src tablesample (1 rows);
 
 -- test for INT
-select round(cast(negative(pow(2, 31)) as INT)), round(cast((pow(2, 31) - 1) as INT)), round(-32769), round(32768) from src limit 1;
+select round(cast(negative(pow(2, 31)) as INT)), round(cast((pow(2, 31) - 1) as INT)), round(-32769), round(32768) from src tablesample (1 rows);
 
 -- test for BIGINT
-select round(cast(negative(pow(2, 63)) as BIGINT)), round(cast((pow(2, 63) - 1) as BIGINT)), round(cast(negative(pow(2, 31) + 1) as BIGINT)), round(cast(pow(2, 31) as BIGINT)) from src limit 1;
+select round(cast(negative(pow(2, 63)) as BIGINT)), round(cast((pow(2, 63) - 1) as BIGINT)), round(cast(negative(pow(2, 31) + 1) as BIGINT)), round(cast(pow(2, 31) as BIGINT)) from src tablesample (1 rows);
 
 -- test for DOUBLE
-select round(126.1), round(126.7), round(32766.1), round(32766.7) from src limit 1;
+select round(126.1), round(126.7), round(32766.1), round(32766.7) from src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_rpad.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_rpad.q
index 01e5fbd429b17..4ee69e8985075 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_rpad.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_rpad.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION rpad;
 DESCRIBE FUNCTION EXTENDED rpad;
 
@@ -5,10 +7,10 @@ EXPLAIN SELECT
   rpad('hi', 1, '?'),
   rpad('hi', 5, '.'),
   rpad('hi', 6, '123')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   rpad('hi', 1, '?'),
   rpad('hi', 5, '.'),
   rpad('hi', 6, '123')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_second.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_second.q
index 1943188086ea5..f63426d7e7257 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_second.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_second.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION second;
 DESCRIBE FUNCTION EXTENDED second;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sign.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sign.q
index abceb3439ec1e..b1602e8a257ef 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sign.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sign.q
@@ -1,20 +1,22 @@
-explain 
-select sign(0) FROM src LIMIT 1;
-select sign(0) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
+
+explain
+select sign(0) FROM src tablesample (1 rows);
+select sign(0) FROM src tablesample (1 rows);
 
-select sign(-45) FROM src LIMIT 1;
+select sign(-45) FROM src tablesample (1 rows);
 
-select sign(46)  FROM src LIMIT 1;
+select sign(46)  FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION sign;
 DESCRIBE FUNCTION EXTENDED sign;
 explain 
-select sign(0) FROM src LIMIT 1;
-select sign(0) FROM src LIMIT 1;
+select sign(0) FROM src tablesample (1 rows);
+select sign(0) FROM src tablesample (1 rows);
 
-select sign(-45) FROM src LIMIT 1;
+select sign(-45) FROM src tablesample (1 rows);
 
-select sign(46)  FROM src LIMIT 1;
+select sign(46)  FROM src tablesample (1 rows);
 
 DESCRIBE FUNCTION sign;
 DESCRIBE FUNCTION EXTENDED sign;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sin.q
index abb7cac8dac9d..79745be772f6f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sin.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sin.q
@@ -1,8 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION sin;
 DESCRIBE FUNCTION EXTENDED sin;
 
 SELECT sin(null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT sin(0.98), sin(1.57), sin(-0.5)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_size.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_size.q
index 8aaa68a6e318a..f6f76a30e7258 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_size.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_size.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION size;
 DESCRIBE FUNCTION EXTENDED size;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sort_array.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sort_array.q
index ef0973212a27c..313bcf8a1ea52 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sort_array.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_sort_array.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 use default;
 -- Test sort_array() UDF
 
@@ -6,16 +8,16 @@ DESCRIBE FUNCTION EXTENDED sort_array;
 
 -- Evaluate function against STRING valued keys
 EXPLAIN
-SELECT sort_array(array("b", "d", "c", "a")) FROM src LIMIT 1;
+SELECT sort_array(array("b", "d", "c", "a")) FROM src tablesample (1 rows);
 
-SELECT sort_array(array("f", "a", "g", "c", "b", "d", "e")) FROM src LIMIT 1;
-SELECT sort_array(sort_array(array("hadoop distributed file system", "enterprise databases", "hadoop map-reduce"))) FROM src LIMIT 1;
+SELECT sort_array(array("f", "a", "g", "c", "b", "d", "e")) FROM src tablesample (1 rows);
+SELECT sort_array(sort_array(array("hadoop distributed file system", "enterprise databases", "hadoop map-reduce"))) FROM src tablesample (1 rows);
 
 -- Evaluate function against INT valued keys
-SELECT sort_array(array(2, 9, 7, 3, 5, 4, 1, 6, 8)) FROM src LIMIT 1;
+SELECT sort_array(array(2, 9, 7, 3, 5, 4, 1, 6, 8)) FROM src tablesample (1 rows);
 
 -- Evaluate function against FLOAT valued keys
-SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src LIMIT 1;
+SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src tablesample (1 rows);
 
 -- Test it against data in a table.
 CREATE TABLE dest1 (
@@ -30,7 +32,7 @@ CREATE TABLE dest1 (
 	timestamps ARRAY<TIMESTAMP>
 ) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/primitive_type_arrays.txt' OVERWRITE INTO TABLE dest1;
+LOAD DATA LOCAL INPATH '../../data/files/primitive_type_arrays.txt' OVERWRITE INTO TABLE dest1;
 
 SELECT	sort_array(tinyints), sort_array(smallints), sort_array(ints),
 	sort_array(bigints), sort_array(booleans), sort_array(floats),
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_space.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_space.q
index cf6466fb63bad..cc616f784fcea 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_space.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_space.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION space;
 DESCRIBE FUNCTION EXTENDED space;
 
@@ -7,7 +9,7 @@ EXPLAIN SELECT
   space(1),
   space(-1),
   space(-100)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   length(space(10)),
@@ -15,7 +17,7 @@ SELECT
   length(space(1)),
   length(space(-1)),
   length(space(-100))
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   space(10),
@@ -23,5 +25,5 @@ SELECT
   space(1),
   space(-1),
   space(-100)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_split.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_split.q
index f79901736cf7c..55919eac743b7 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_split.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_split.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION split;
 DESCRIBE FUNCTION EXTENDED split;
 
@@ -6,11 +8,11 @@ EXPLAIN SELECT
   split('oneAtwoBthreeC', '[ABC]'),
   split('', '.'),
   split(50401020, 0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT 
   split('a b c', ' '),
   split('oneAtwoBthreeC', '[ABC]'),
   split('', '.'),
   split(50401020, 0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_struct.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_struct.q
index 3ee19c83699bb..ee2135b509ae2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_struct.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_struct.q
@@ -1,9 +1,11 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION struct;
 DESCRIBE FUNCTION EXTENDED struct;
 
 EXPLAIN
 SELECT struct(1), struct(1, "a"), struct(1, "b", 1.5).col1, struct(1, struct("a", 1.5)).col2.col1
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT struct(1), struct(1, "a"), struct(1, "b", 1.5).col1, struct(1, struct("a", 1.5)).col2.col1
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_substr.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_substr.q
index 32757bef51c1c..2d04f904bbc6f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_substr.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_substr.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION substr;
 DESCRIBE FUNCTION EXTENDED substr;
 
@@ -5,7 +7,7 @@ SELECT
   substr(null, 1), substr(null, 1, 1),
   substr('ABC', null), substr('ABC', null, 1),
   substr('ABC', 1, null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   substr('ABC', 1, 0), substr('ABC', 1, -1), substr('ABC', 2, -100),
@@ -14,7 +16,7 @@ SELECT
   substr('ABC', 100), substr('ABC', 100, 100),
   substr('ABC', -100), substr('ABC', -100, 100),
   substr('ABC', 2147483647), substr('ABC', 2147483647, 2147483647)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   substr('ABCDEFG', 3, 4), substr('ABCDEFG', -5, 4),
@@ -22,7 +24,7 @@ SELECT
   substr('ABC', 0), substr('ABC', 1), substr('ABC', 2), substr('ABC', 3),
   substr('ABC', 1, 2147483647), substr('ABC', 2, 2147483647),
   substr('A', 0), substr('A', 1), substr('A', -1)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   substr('ABC', 0, 1), substr('ABC', 0, 2), substr('ABC', 0, 3), substr('ABC', 0, 4),
@@ -30,14 +32,14 @@ SELECT
   substr('ABC', 2, 1), substr('ABC', 2, 2), substr('ABC', 2, 3), substr('ABC', 2, 4),
   substr('ABC', 3, 1), substr('ABC', 3, 2), substr('ABC', 3, 3), substr('ABC', 3, 4),
   substr('ABC', 4, 1)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT
   substr('ABC', -1, 1), substr('ABC', -1, 2), substr('ABC', -1, 3), substr('ABC', -1, 4),
   substr('ABC', -2, 1), substr('ABC', -2, 2), substr('ABC', -2, 3), substr('ABC', -2, 4),
   substr('ABC', -3, 1), substr('ABC', -3, 2), substr('ABC', -3, 3), substr('ABC', -3, 4),
   substr('ABC', -4, 1)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- substring() is a synonim of substr(), so just perform some basic tests
 SELECT
@@ -46,7 +48,7 @@ SELECT
   substring('ABC', 0), substring('ABC', 1), substring('ABC', 2), substring('ABC', 3),
   substring('ABC', 1, 2147483647), substring('ABC', 2, 2147483647),
   substring('A', 0), substring('A', 1), substring('A', -1)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 -- test for binary substr
 SELECT
@@ -63,7 +65,7 @@ SELECT
   substr(ABC, -3, 1), substr(ABC, -3, 2), substr(ABC, -3, 3), substr(ABC, -3, 4),
   substr(ABC, -4, 1)
 FROM (
-   select CAST(concat(substr(value, 1, 0), 'ABC') as BINARY) as ABC from src LIMIT 1
+   select CAST(concat(substr(value, 1, 0), 'ABC') as BINARY) as ABC from src tablesample (1 rows)
 ) X;
 
 -- test UTF-8 substr
@@ -72,4 +74,4 @@ SELECT
   substr("abc 玩", 5),
   substr("abc 玩玩玩 abc", 5),
   substr("abc 玩玩玩 abc", 5, 3)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_tan.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_tan.q
index f103da9ecb2de..3980fe83fbcb2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_tan.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_tan.q
@@ -1,16 +1,18 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION tan;
 DESCRIBE FUNCTION EXTENDED tan;
 
 SELECT tan(null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT tan(1), tan(6), tan(-1.0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 DESCRIBE FUNCTION tan;
 DESCRIBE FUNCTION EXTENDED tan;
 
 SELECT tan(null)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT tan(1), tan(6), tan(-1.0)
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_testlength.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_testlength.q
index 322a061d645d7..c94a52133d324 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_testlength.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_testlength.q
@@ -1,12 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 CREATE TEMPORARY FUNCTION testlength AS 'org.apache.hadoop.hive.ql.udf.UDFTestLength';
 
 CREATE TEMPORARY FUNCTION testlength AS 'org.apache.hadoop.hive.ql.udf.UDFTestLength';
 
-CREATE TABLE dest1(len INT);
-
-FROM src INSERT OVERWRITE TABLE dest1 SELECT testlength(src.value);
-
-SELECT dest1.* FROM dest1;
+SELECT testlength(src.value) FROM src;
 
 DROP TEMPORARY FUNCTION testlength;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_testlength2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_testlength2.q
index 6de270902f7fb..27e46c24a83d5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_testlength2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_testlength2.q
@@ -1,12 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 EXPLAIN
 CREATE TEMPORARY FUNCTION testlength2 AS 'org.apache.hadoop.hive.ql.udf.UDFTestLength2';
 
 CREATE TEMPORARY FUNCTION testlength2 AS 'org.apache.hadoop.hive.ql.udf.UDFTestLength2';
 
-CREATE TABLE dest1(len INT);
-
-FROM src INSERT OVERWRITE TABLE dest1 SELECT testlength2(src.value);
-
-SELECT dest1.* FROM dest1;
+SELECT testlength2(src.value) FROM src;
 
 DROP TEMPORARY FUNCTION testlength2;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_boolean.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_boolean.q
index ca23f719f93e3..8bea7abcbc4d3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_boolean.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_boolean.q
@@ -1,44 +1,46 @@
--- 'true' cases: 
+set hive.fetch.task.conversion=more;
 
-SELECT CAST(CAST(1 AS TINYINT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(2 AS SMALLINT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(-4 AS INT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(-444 AS BIGINT) AS BOOLEAN) FROM src LIMIT 1;
+-- 'true' cases:
 
-SELECT CAST(CAST(7.0 AS FLOAT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(-8.0 AS DOUBLE) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(-99.0 AS DECIMAL) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST(1 AS TINYINT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(2 AS SMALLINT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-4 AS INT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-444 AS BIGINT) AS BOOLEAN) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST('Foo' AS STRING) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST(7.0 AS FLOAT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-8.0 AS DOUBLE) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-99.0 AS DECIMAL) AS BOOLEAN) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST('2011-05-06 07:08:09' as timestamp) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST('Foo' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows);
+
+SELECT CAST(CAST('2011-05-06 07:08:09' as timestamp) AS BOOLEAN) FROM src tablesample (1 rows);
 
 -- 'false' cases: 
 
-SELECT CAST(CAST(0 AS TINYINT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(0 AS SMALLINT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(0 AS INT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(0 AS BIGINT) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST(0 AS TINYINT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(0 AS SMALLINT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(0 AS INT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(0 AS BIGINT) AS BOOLEAN) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(0.0 AS FLOAT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(0.0 AS DOUBLE) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(0.0 AS DECIMAL) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST(0.0 AS FLOAT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(0.0 AS DOUBLE) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(0.0 AS DECIMAL) AS BOOLEAN) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST('' AS STRING) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST('' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(0 as timestamp) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST(0 as timestamp) AS BOOLEAN) FROM src tablesample (1 rows);
 
 -- 'NULL' cases:
-SELECT CAST(NULL AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(NULL AS BOOLEAN) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(NULL AS TINYINT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(NULL AS SMALLINT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(NULL AS INT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(NULL AS BIGINT) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST(NULL AS TINYINT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(NULL AS SMALLINT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(NULL AS INT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(NULL AS BIGINT) AS BOOLEAN) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(NULL AS FLOAT) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(NULL AS DOUBLE) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(NULL AS DECIMAL) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST(NULL AS FLOAT) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(NULL AS DOUBLE) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(NULL AS DECIMAL) AS BOOLEAN) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(NULL AS STRING) AS BOOLEAN) FROM src LIMIT 1;
-SELECT CAST(CAST(NULL as timestamp) AS BOOLEAN) FROM src LIMIT 1;
+SELECT CAST(CAST(NULL AS STRING) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST(NULL as timestamp) AS BOOLEAN) FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_byte.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_byte.q
index ded930d1115a4..aa0a250e4678d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_byte.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_byte.q
@@ -1,15 +1,17 @@
--- Conversion of main primitive types to Byte type: 
-SELECT CAST(NULL AS TINYINT) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
 
-SELECT CAST(TRUE AS TINYINT) FROM src LIMIT 1;
+-- Conversion of main primitive types to Byte type:
+SELECT CAST(NULL AS TINYINT) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-18 AS SMALLINT) AS TINYINT) FROM src LIMIT 1;
-SELECT CAST(-129 AS TINYINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-1025 AS BIGINT) AS TINYINT) FROM src LIMIT 1;
+SELECT CAST(TRUE AS TINYINT) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-3.14 AS DOUBLE) AS TINYINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS FLOAT) AS TINYINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS DECIMAL) AS TINYINT) FROM src LIMIT 1;
+SELECT CAST(CAST(-18 AS SMALLINT) AS TINYINT) FROM src tablesample (1 rows);
+SELECT CAST(-129 AS TINYINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-1025 AS BIGINT) AS TINYINT) FROM src tablesample (1 rows);
 
-SELECT CAST('-38' AS TINYINT) FROM src LIMIT 1;
+SELECT CAST(CAST(-3.14 AS DOUBLE) AS TINYINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS FLOAT) AS TINYINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS DECIMAL) AS TINYINT) FROM src tablesample (1 rows);
+
+SELECT CAST('-38' AS TINYINT) FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_double.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_double.q
index b0a248ad70bba..005ec9d24e1ec 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_double.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_double.q
@@ -1,15 +1,17 @@
--- Conversion of main primitive types to Double type: 
-SELECT CAST(NULL AS DOUBLE) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
 
-SELECT CAST(TRUE AS DOUBLE) FROM src LIMIT 1;
+-- Conversion of main primitive types to Double type:
+SELECT CAST(NULL AS DOUBLE) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-7 AS TINYINT) AS DOUBLE) FROM src LIMIT 1;
-SELECT CAST(CAST(-18 AS SMALLINT) AS DOUBLE) FROM src LIMIT 1;
-SELECT CAST(-129 AS DOUBLE) FROM src LIMIT 1;
-SELECT CAST(CAST(-1025 AS BIGINT) AS DOUBLE) FROM src LIMIT 1;
+SELECT CAST(TRUE AS DOUBLE) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-3.14 AS FLOAT) AS DOUBLE) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS DECIMAL) AS DOUBLE) FROM src LIMIT 1;
+SELECT CAST(CAST(-7 AS TINYINT) AS DOUBLE) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-18 AS SMALLINT) AS DOUBLE) FROM src tablesample (1 rows);
+SELECT CAST(-129 AS DOUBLE) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-1025 AS BIGINT) AS DOUBLE) FROM src tablesample (1 rows);
 
-SELECT CAST('-38.14' AS DOUBLE) FROM src LIMIT 1;
+SELECT CAST(CAST(-3.14 AS FLOAT) AS DOUBLE) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS DECIMAL(3,2)) AS DOUBLE) FROM src tablesample (1 rows);
+
+SELECT CAST('-38.14' AS DOUBLE) FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_float.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_float.q
index c91d18cc2f5c7..95671f15fe0ec 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_float.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_float.q
@@ -1,15 +1,17 @@
--- Conversion of main primitive types to Float type: 
-SELECT CAST(NULL AS FLOAT) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
 
-SELECT CAST(TRUE AS FLOAT) FROM src LIMIT 1;
+-- Conversion of main primitive types to Float type:
+SELECT CAST(NULL AS FLOAT) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-7 AS TINYINT) AS FLOAT) FROM src LIMIT 1;
-SELECT CAST(CAST(-18 AS SMALLINT) AS FLOAT) FROM src LIMIT 1;
-SELECT CAST(-129 AS FLOAT) FROM src LIMIT 1;
-SELECT CAST(CAST(-1025 AS BIGINT) AS FLOAT) FROM src LIMIT 1;
+SELECT CAST(TRUE AS FLOAT) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-3.14 AS DOUBLE) AS FLOAT) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS DECIMAL) AS FLOAT) FROM src LIMIT 1;
+SELECT CAST(CAST(-7 AS TINYINT) AS FLOAT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-18 AS SMALLINT) AS FLOAT) FROM src tablesample (1 rows);
+SELECT CAST(-129 AS FLOAT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-1025 AS BIGINT) AS FLOAT) FROM src tablesample (1 rows);
 
-SELECT CAST('-38.14' AS FLOAT) FROM src LIMIT 1;
+SELECT CAST(CAST(-3.14 AS DOUBLE) AS FLOAT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS DECIMAL(3,2)) AS FLOAT) FROM src tablesample (1 rows);
+
+SELECT CAST('-38.14' AS FLOAT) FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_long.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_long.q
index 45dc6f8bd32fa..706411a398463 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_long.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_long.q
@@ -1,15 +1,17 @@
--- Conversion of main primitive types to Long type: 
-SELECT CAST(NULL AS BIGINT) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
 
-SELECT CAST(TRUE AS BIGINT) FROM src LIMIT 1;
+-- Conversion of main primitive types to Long type:
+SELECT CAST(NULL AS BIGINT) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-7 AS TINYINT) AS BIGINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-18 AS SMALLINT) AS BIGINT) FROM src LIMIT 1;
-SELECT CAST(-129 AS BIGINT) FROM src LIMIT 1;
+SELECT CAST(TRUE AS BIGINT) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-3.14 AS DOUBLE) AS BIGINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS FLOAT) AS BIGINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS DECIMAL) AS BIGINT) FROM src LIMIT 1;
+SELECT CAST(CAST(-7 AS TINYINT) AS BIGINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-18 AS SMALLINT) AS BIGINT) FROM src tablesample (1 rows);
+SELECT CAST(-129 AS BIGINT) FROM src tablesample (1 rows);
 
-SELECT CAST('-38' AS BIGINT) FROM src LIMIT 1;
+SELECT CAST(CAST(-3.14 AS DOUBLE) AS BIGINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS FLOAT) AS BIGINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS DECIMAL) AS BIGINT) FROM src tablesample (1 rows);
+
+SELECT CAST('-38' AS BIGINT) FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_short.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_short.q
index 7d843c1ea12ea..5cc4e57c8c9b5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_short.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_short.q
@@ -1,15 +1,17 @@
--- Conversion of main primitive types to Short type: 
-SELECT CAST(NULL AS SMALLINT) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
 
-SELECT CAST(TRUE AS SMALLINT) FROM src LIMIT 1;
+-- Conversion of main primitive types to Short type:
+SELECT CAST(NULL AS SMALLINT) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-18 AS TINYINT) AS SMALLINT) FROM src LIMIT 1;
-SELECT CAST(-129 AS SMALLINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-1025 AS BIGINT) AS SMALLINT) FROM src LIMIT 1;
+SELECT CAST(TRUE AS SMALLINT) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-3.14 AS DOUBLE) AS SMALLINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS FLOAT) AS SMALLINT) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS DECIMAL) AS SMALLINT) FROM src LIMIT 1;
+SELECT CAST(CAST(-18 AS TINYINT) AS SMALLINT) FROM src tablesample (1 rows);
+SELECT CAST(-129 AS SMALLINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-1025 AS BIGINT) AS SMALLINT) FROM src tablesample (1 rows);
 
-SELECT CAST('-38' AS SMALLINT) FROM src LIMIT 1;
+SELECT CAST(CAST(-3.14 AS DOUBLE) AS SMALLINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS FLOAT) AS SMALLINT) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS DECIMAL) AS SMALLINT) FROM src tablesample (1 rows);
+
+SELECT CAST('-38' AS SMALLINT) FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_string.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_string.q
index 3b585e7170c60..ac4b5242e1fc4 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_string.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_string.q
@@ -1,16 +1,18 @@
--- Conversion of main primitive types to String type: 
-SELECT CAST(NULL AS STRING) FROM src LIMIT 1;
+set hive.fetch.task.conversion=more;
 
-SELECT CAST(TRUE AS STRING) FROM src LIMIT 1;
+-- Conversion of main primitive types to String type:
+SELECT CAST(NULL AS STRING) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(1 AS TINYINT) AS STRING) FROM src LIMIT 1;
-SELECT CAST(CAST(-18 AS SMALLINT) AS STRING) FROM src LIMIT 1;
-SELECT CAST(-129 AS STRING) FROM src LIMIT 1;
-SELECT CAST(CAST(-1025 AS BIGINT) AS STRING) FROM src LIMIT 1;
+SELECT CAST(TRUE AS STRING) FROM src tablesample (1 rows);
 
-SELECT CAST(CAST(-3.14 AS DOUBLE) AS STRING) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS FLOAT) AS STRING) FROM src LIMIT 1;
-SELECT CAST(CAST(-3.14 AS DECIMAL) AS STRING) FROM src LIMIT 1;
+SELECT CAST(CAST(1 AS TINYINT) AS STRING) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-18 AS SMALLINT) AS STRING) FROM src tablesample (1 rows);
+SELECT CAST(-129 AS STRING) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-1025 AS BIGINT) AS STRING) FROM src tablesample (1 rows);
 
-SELECT CAST('Foo' AS STRING) FROM src LIMIT 1;
+SELECT CAST(CAST(-3.14 AS DOUBLE) AS STRING) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS FLOAT) AS STRING) FROM src tablesample (1 rows);
+SELECT CAST(CAST(-3.14 AS DECIMAL(3,2)) AS STRING) FROM src tablesample (1 rows);
+
+SELECT CAST('Foo' AS STRING) FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_unix_timestamp.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_unix_timestamp.q
index 3024074bba4b3..0a2758edfcd4f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_unix_timestamp.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_to_unix_timestamp.q
@@ -1,8 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION to_unix_timestamp;
 DESCRIBE FUNCTION EXTENDED to_unix_timestamp;
 
 create table oneline(key int, value string);
-load data local inpath '../data/files/things.txt' into table oneline;
+load data local inpath '../../data/files/things.txt' into table oneline;
 
 SELECT
   '2009-03-20 11:30:01',
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_translate.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_translate.q
index cba6ff90f643f..21d799882f59b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_translate.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_translate.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION translate;
 DESCRIBE FUNCTION EXTENDED translate;
 
@@ -10,28 +12,28 @@ FROM src INSERT OVERWRITE TABLE table_translate SELECT 'abcd', 'ahd', '12' WHERE
 
 -- Run some queries on constant input parameters
 SELECT  translate('abcd', 'ab', '12'),
-        translate('abcd', 'abc', '12') FROM src LIMIT 1;
+        translate('abcd', 'abc', '12') FROM src tablesample (1 rows);
 
 -- Run some queries where first parameter being a table column while the other two being constants
 SELECT translate(table_input.input, 'ab', '12'),
-       translate(table_input.input, 'abc', '12') FROM table_input LIMIT 1;
+       translate(table_input.input, 'abc', '12') FROM table_input tablesample (1 rows);
 
 -- Run some queries where all parameters are coming from table columns
-SELECT translate(input_string, from_string, to_string) FROM table_translate LIMIT 1;
+SELECT translate(input_string, from_string, to_string) FROM table_translate tablesample (1 rows);
 
 -- Run some queries where some parameters are NULL
 SELECT translate(NULL, 'ab', '12'),
        translate('abcd', NULL, '12'),
        translate('abcd', 'ab', NULL),
-       translate(NULL, NULL, NULL) FROM src LIMIT 1;
+       translate(NULL, NULL, NULL) FROM src tablesample (1 rows);
 
 -- Run some queries where the same character appears several times in the from string (2nd argument) of the UDF
 SELECT translate('abcd', 'aba', '123'),
-       translate('abcd', 'aba', '12') FROM src LIMIT 1;
+       translate('abcd', 'aba', '12') FROM src tablesample (1 rows);
 
 -- Run some queries for the ignorant case when the 3rd parameter has more characters than the second one
-SELECT translate('abcd', 'abc', '1234') FROM src LIMIT 1;
+SELECT translate('abcd', 'abc', '1234') FROM src tablesample (1 rows);
 
 -- Test proper function over UTF-8 characters
-SELECT translate('Àbcd', 'À', 'Ã') FROM src LIMIT 1;
+SELECT translate('Àbcd', 'À', 'Ã') FROM src tablesample (1 rows);
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_unhex.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_unhex.q
index e80021c8b2c51..257e469ffb16c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_unhex.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_unhex.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION unhex;
 DESCRIBE FUNCTION EXTENDED unhex;
 
@@ -9,11 +11,11 @@ SELECT
   unhex('61'),
   unhex('2D34'),
   unhex('')
-FROM src limit 1;
+FROM src tablesample (1 rows);
 
 -- Bad inputs
 SELECT
   unhex('MySQL'),
   unhex('G123'),
   unhex('\0')
-FROM src limit 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_union.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_union.q
index 9140d22fb58c7..3876beb17d7ce 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_union.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_union.q
@@ -1,11 +1,13 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION create_union;
 DESCRIBE FUNCTION EXTENDED create_union;
 
 EXPLAIN
 SELECT create_union(0, key), create_union(if(key<100, 0, 1), 2.0, value),
 create_union(1, "a", struct(2, "b"))
-FROM src LIMIT 2;
+FROM src tablesample (2 rows);
 
 SELECT create_union(0, key), create_union(if(key<100, 0, 1), 2.0, value),
 create_union(1, "a", struct(2, "b"))
-FROM src LIMIT 2;
+FROM src tablesample (2 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_unix_timestamp.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_unix_timestamp.q
index 89288a1193c87..1664329c33d98 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_unix_timestamp.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_unix_timestamp.q
@@ -1,8 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION unix_timestamp;
 DESCRIBE FUNCTION EXTENDED unix_timestamp;
 
 create table oneline(key int, value string);
-load data local inpath '../data/files/things.txt' into table oneline;
+load data local inpath '../../data/files/things.txt' into table oneline;
 
 SELECT
   '2009-03-20 11:30:01',
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_using.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_using.q
new file mode 100644
index 0000000000000..093187ddc30cb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_using.q
@@ -0,0 +1,15 @@
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/udf_using;
+
+dfs -copyFromLocal ../../data/files/sales.txt hdfs:///tmp/udf_using/sales.txt;
+
+create function lookup as 'org.apache.hadoop.hive.ql.udf.UDFFileLookup' using file 'hdfs:///tmp/udf_using/sales.txt';
+
+create table udf_using (c1 string);
+insert overwrite table udf_using select 'Joe' from src limit 2;
+
+select c1, lookup(c1) from udf_using;
+
+drop table udf_using;
+drop function lookup;
+
+dfs -rmr hdfs:///tmp/udf_using;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_weekofyear.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_weekofyear.q
index 4b7b4ea55aa81..abb0a2d7d2c08 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_weekofyear.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_weekofyear.q
@@ -1,6 +1,8 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION weekofyear;
 DESCRIBE FUNCTION EXTENDED weekofyear;
 
 SELECT weekofyear('1980-01-01'), weekofyear('1980-01-06'), weekofyear('1980-01-07'), weekofyear('1980-12-31'),
 weekofyear('1984-1-1'), weekofyear('2008-02-20 00:00:00'), weekofyear('1980-12-28 23:59:59'), weekofyear('1980-12-29 23:59:59')
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_when.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_when.q
index d94a594f026c5..4eb7f6918a0dc 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_when.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_when.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION when;
 DESCRIBE FUNCTION EXTENDED when;
 
@@ -27,7 +29,7 @@ SELECT CASE
         WHEN 25=26 THEN 27
         WHEN 28=28 THEN NULL
        END
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
 
 SELECT CASE
         WHEN 1=1 THEN 2
@@ -54,4 +56,21 @@ SELECT CASE
         WHEN 25=26 THEN 27
         WHEN 28=28 THEN NULL
        END
-FROM src LIMIT 1;
+FROM src tablesample (1 rows);
+
+-- Allow compatible types to be used in return value
+SELECT CASE
+        WHEN 1=1 THEN 123.0BD
+        ELSE 0.0BD
+       END,
+       CASE
+        WHEN 1=1 THEN 123
+        WHEN 1=2 THEN 1.0
+        ELSE 222.02BD
+       END,
+       CASE
+        WHEN 1=1 THEN 'abcd'
+        WHEN 1=2 THEN cast('efgh' as varchar(10))
+        ELSE cast('ijkl' as char(4))
+       END
+FROM src tablesample (1 rows);
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath.q
index fca1ba11d85b2..1ad38abcf6ec5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath.q
@@ -1,8 +1,10 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION xpath ;
 DESCRIBE FUNCTION EXTENDED xpath ;
 
-SELECT xpath ('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/text()') FROM src LIMIT 1 ;
-SELECT xpath ('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/*/text()') FROM src LIMIT 1 ;
-SELECT xpath ('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/b/text()') FROM src LIMIT 1 ;
-SELECT xpath ('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/c/text()') FROM src LIMIT 1 ;
-SELECT xpath ('<a><b class="bb">b1</b><b>b2</b><b>b3</b><c class="bb">c1</c><c>c2</c></a>', 'a/*[@class="bb"]/text()') FROM src LIMIT 1 ;
\ No newline at end of file
+SELECT xpath ('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/text()') FROM src tablesample (1 rows) ;
+SELECT xpath ('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/*/text()') FROM src tablesample (1 rows) ;
+SELECT xpath ('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/b/text()') FROM src tablesample (1 rows) ;
+SELECT xpath ('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/c/text()') FROM src tablesample (1 rows) ;
+SELECT xpath ('<a><b class="bb">b1</b><b>b2</b><b>b3</b><c class="bb">c1</c><c>c2</c></a>', 'a/*[@class="bb"]/text()') FROM src tablesample (1 rows) ;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_boolean.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_boolean.q
index 3a6e613eb6a0e..6e3ff244b0434 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_boolean.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_boolean.q
@@ -1,9 +1,11 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION xpath_boolean ;
 DESCRIBE FUNCTION EXTENDED xpath_boolean ;
 
-SELECT xpath_boolean ('<a><b>b</b></a>', 'a/b') FROM src LIMIT 1 ;
-SELECT xpath_boolean ('<a><b>b</b></a>', 'a/c') FROM src LIMIT 1 ;
-SELECT xpath_boolean ('<a><b>b</b></a>', 'a/b = "b"') FROM src LIMIT 1 ;
-SELECT xpath_boolean ('<a><b>b</b></a>', 'a/b = "c"') FROM src LIMIT 1 ;
-SELECT xpath_boolean ('<a><b>10</b></a>', 'a/b < 10') FROM src LIMIT 1 ;
-SELECT xpath_boolean ('<a><b>10</b></a>', 'a/b = 10') FROM src LIMIT 1 ;
+SELECT xpath_boolean ('<a><b>b</b></a>', 'a/b') FROM src tablesample (1 rows) ;
+SELECT xpath_boolean ('<a><b>b</b></a>', 'a/c') FROM src tablesample (1 rows) ;
+SELECT xpath_boolean ('<a><b>b</b></a>', 'a/b = "b"') FROM src tablesample (1 rows) ;
+SELECT xpath_boolean ('<a><b>b</b></a>', 'a/b = "c"') FROM src tablesample (1 rows) ;
+SELECT xpath_boolean ('<a><b>10</b></a>', 'a/b < 10') FROM src tablesample (1 rows) ;
+SELECT xpath_boolean ('<a><b>10</b></a>', 'a/b = 10') FROM src tablesample (1 rows) ;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_double.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_double.q
index 4328747f44497..68441762e5a66 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_double.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_double.q
@@ -1,14 +1,16 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION xpath_number ;
 DESCRIBE FUNCTION EXTENDED xpath_number ;
 
 DESCRIBE FUNCTION xpath_double ;
 DESCRIBE FUNCTION EXTENDED xpath_double ;
 
-SELECT xpath_double ('<a>this is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_double ('<a>this 2 is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_double ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src LIMIT 1 ;
-SELECT xpath_double ('<a>try a boolean</a>', 'a = 10') FROM src LIMIT 1 ;
-SELECT xpath_double ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src LIMIT 1 ;
-SELECT xpath_double ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src LIMIT 1 ;
-SELECT xpath_double ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src LIMIT 1 ;
-SELECT xpath_double ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src LIMIT 1 ;
\ No newline at end of file
+SELECT xpath_double ('<a>this is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_double ('<a>this 2 is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_double ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src tablesample (1 rows) ;
+SELECT xpath_double ('<a>try a boolean</a>', 'a = 10') FROM src tablesample (1 rows) ;
+SELECT xpath_double ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src tablesample (1 rows) ;
+SELECT xpath_double ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src tablesample (1 rows) ;
+SELECT xpath_double ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src tablesample (1 rows) ;
+SELECT xpath_double ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src tablesample (1 rows) ;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_float.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_float.q
index 1f1482833c858..4596a322604a5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_float.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_float.q
@@ -1,11 +1,13 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION xpath_float ;
 DESCRIBE FUNCTION EXTENDED xpath_float ;
 
-SELECT xpath_float ('<a>this is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_float ('<a>this 2 is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_float ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src LIMIT 1 ;
-SELECT xpath_float ('<a>try a boolean</a>', 'a = 10') FROM src LIMIT 1 ;
-SELECT xpath_float ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src LIMIT 1 ;
-SELECT xpath_float ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src LIMIT 1 ;
-SELECT xpath_float ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src LIMIT 1 ;
-SELECT xpath_float ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src LIMIT 1 ;
\ No newline at end of file
+SELECT xpath_float ('<a>this is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_float ('<a>this 2 is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_float ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src tablesample (1 rows) ;
+SELECT xpath_float ('<a>try a boolean</a>', 'a = 10') FROM src tablesample (1 rows) ;
+SELECT xpath_float ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src tablesample (1 rows) ;
+SELECT xpath_float ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src tablesample (1 rows) ;
+SELECT xpath_float ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src tablesample (1 rows) ;
+SELECT xpath_float ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src tablesample (1 rows) ;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_int.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_int.q
index 9b50bd9eeb365..9f3898f1147ac 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_int.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_int.q
@@ -1,11 +1,13 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION xpath_int ;
 DESCRIBE FUNCTION EXTENDED xpath_int ;
 
-SELECT xpath_int ('<a>this is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_int ('<a>this 2 is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_int ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src LIMIT 1 ;
-SELECT xpath_int ('<a>try a boolean</a>', 'a = 10') FROM src LIMIT 1 ;
-SELECT xpath_int ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src LIMIT 1 ;
-SELECT xpath_int ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src LIMIT 1 ;
-SELECT xpath_int ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src LIMIT 1 ;
-SELECT xpath_int ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src LIMIT 1 ;
\ No newline at end of file
+SELECT xpath_int ('<a>this is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_int ('<a>this 2 is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_int ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src tablesample (1 rows) ;
+SELECT xpath_int ('<a>try a boolean</a>', 'a = 10') FROM src tablesample (1 rows) ;
+SELECT xpath_int ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src tablesample (1 rows) ;
+SELECT xpath_int ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src tablesample (1 rows) ;
+SELECT xpath_int ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src tablesample (1 rows) ;
+SELECT xpath_int ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src tablesample (1 rows) ;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_long.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_long.q
index 04ee61b1f0e9d..3a335937c54fb 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_long.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_long.q
@@ -1,11 +1,13 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION xpath_long ;
 DESCRIBE FUNCTION EXTENDED xpath_long ;
 
-SELECT xpath_long ('<a>this is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_long ('<a>this 2 is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_long ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src LIMIT 1 ;
-SELECT xpath_long ('<a>try a boolean</a>', 'a = 10') FROM src LIMIT 1 ;
-SELECT xpath_long ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src LIMIT 1 ;
-SELECT xpath_long ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src LIMIT 1 ;
-SELECT xpath_long ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src LIMIT 1 ;
-SELECT xpath_long ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src LIMIT 1 ;
+SELECT xpath_long ('<a>this is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_long ('<a>this 2 is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_long ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src tablesample (1 rows) ;
+SELECT xpath_long ('<a>try a boolean</a>', 'a = 10') FROM src tablesample (1 rows) ;
+SELECT xpath_long ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src tablesample (1 rows) ;
+SELECT xpath_long ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src tablesample (1 rows) ;
+SELECT xpath_long ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src tablesample (1 rows) ;
+SELECT xpath_long ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src tablesample (1 rows) ;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_short.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_short.q
index 6a1abdc0bdb21..073056e72e7a1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_short.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_short.q
@@ -1,11 +1,13 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION xpath_short ;
 DESCRIBE FUNCTION EXTENDED xpath_short ;
 
-SELECT xpath_short ('<a>this is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_short ('<a>this 2 is not a number</a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_short ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src LIMIT 1 ;
-SELECT xpath_short ('<a>try a boolean</a>', 'a = 10') FROM src LIMIT 1 ;
-SELECT xpath_short ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src LIMIT 1 ;
-SELECT xpath_short ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src LIMIT 1 ;
-SELECT xpath_short ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src LIMIT 1 ;
-SELECT xpath_short ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src LIMIT 1 ;
\ No newline at end of file
+SELECT xpath_short ('<a>this is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_short ('<a>this 2 is not a number</a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_short ('<a><b>2000000000</b><c>40000000000</c></a>', 'a/b * a/c') FROM src tablesample (1 rows) ;
+SELECT xpath_short ('<a>try a boolean</a>', 'a = 10') FROM src tablesample (1 rows) ;
+SELECT xpath_short ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'a/b') FROM src tablesample (1 rows) ;
+SELECT xpath_short ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/*)') FROM src tablesample (1 rows) ;
+SELECT xpath_short ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b)') FROM src tablesample (1 rows) ;
+SELECT xpath_short ('<a><b class="odd">1</b><b class="even">2</b><b class="odd">4</b><c>8</c></a>', 'sum(a/b[@class="odd"])') FROM src tablesample (1 rows) ;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_string.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_string.q
index ebbc913b71bfb..1f1731c67d114 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_string.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udf_xpath_string.q
@@ -1,11 +1,13 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION xpath_string ;
 DESCRIBE FUNCTION EXTENDED xpath_string ;
 
-SELECT xpath_string ('<a><b>bb</b><c>cc</c></a>', 'a') FROM src LIMIT 1 ;
-SELECT xpath_string ('<a><b>bb</b><c>cc</c></a>', 'a/b') FROM src LIMIT 1 ;
-SELECT xpath_string ('<a><b>bb</b><c>cc</c></a>', 'a/c') FROM src LIMIT 1 ;
-SELECT xpath_string ('<a><b>bb</b><c>cc</c></a>', 'a/d') FROM src LIMIT 1 ;
-SELECT xpath_string ('<a><b>b1</b><b>b2</b></a>', '//b') FROM src LIMIT 1 ;
-SELECT xpath_string ('<a><b>b1</b><b>b2</b></a>', 'a/b[1]') FROM src LIMIT 1 ;
-SELECT xpath_string ('<a><b>b1</b><b>b2</b></a>', 'a/b[2]') FROM src LIMIT 1 ;
-SELECT xpath_string ('<a><b>b1</b><b id="b_2">b2</b></a>', 'a/b[@id="b_2"]') FROM src LIMIT 1 ;
+SELECT xpath_string ('<a><b>bb</b><c>cc</c></a>', 'a') FROM src tablesample (1 rows) ;
+SELECT xpath_string ('<a><b>bb</b><c>cc</c></a>', 'a/b') FROM src tablesample (1 rows) ;
+SELECT xpath_string ('<a><b>bb</b><c>cc</c></a>', 'a/c') FROM src tablesample (1 rows) ;
+SELECT xpath_string ('<a><b>bb</b><c>cc</c></a>', 'a/d') FROM src tablesample (1 rows) ;
+SELECT xpath_string ('<a><b>b1</b><b>b2</b></a>', '//b') FROM src tablesample (1 rows) ;
+SELECT xpath_string ('<a><b>b1</b><b>b2</b></a>', 'a/b[1]') FROM src tablesample (1 rows) ;
+SELECT xpath_string ('<a><b>b1</b><b>b2</b></a>', 'a/b[2]') FROM src tablesample (1 rows) ;
+SELECT xpath_string ('<a><b>b1</b><b id="b_2">b2</b></a>', 'a/b[@id="b_2"]') FROM src tablesample (1 rows) ;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_explode.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_explode.q
index 638a4e9ca57e7..1d405b3560c5c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_explode.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_explode.q
@@ -1,3 +1,5 @@
+set hive.fetch.task.conversion=more;
+
 DESCRIBE FUNCTION explode;
 DESCRIBE FUNCTION EXTENDED explode;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_json_tuple.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_json_tuple.q
index 712d9598c755d..93d829d4ed175 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_json_tuple.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_json_tuple.q
@@ -2,17 +2,17 @@ create table json_t (key string, jstring string);
 
 insert overwrite table json_t
 select * from (
-  select '1', '{"f1": "value1", "f2": "value2", "f3": 3, "f5": 5.23}' from src limit 1
+  select '1', '{"f1": "value1", "f2": "value2", "f3": 3, "f5": 5.23}' from src tablesample (1 rows)
   union all
-  select '2', '{"f1": "value12", "f3": "value3", "f2": 2, "f4": 4.01}' from src limit 1
+  select '2', '{"f1": "value12", "f3": "value3", "f2": 2, "f4": 4.01}' from src tablesample (1 rows)
   union all
-  select '3', '{"f1": "value13", "f4": "value44", "f3": "value33", "f2": 2, "f5": 5.01}' from src limit 1
+  select '3', '{"f1": "value13", "f4": "value44", "f3": "value33", "f2": 2, "f5": 5.01}' from src tablesample (1 rows)
   union all
-  select '4', cast(null as string) from src limit 1
+  select '4', cast(null as string) from src tablesample (1 rows)
   union all
-  select '5', '{"f1": "", "f5": null}' from src limit 1
+  select '5', '{"f1": "", "f5": null}' from src tablesample (1 rows)
   union all
-  select '6', '[invalid JSON string]' from src limit 1
+  select '6', '[invalid JSON string]' from src tablesample (1 rows)
 ) s;
 
 explain 
@@ -40,7 +40,7 @@ select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2',
 
 CREATE TABLE dest1(c1 STRING) STORED AS RCFILE;
 
-INSERT OVERWRITE TABLE dest1 SELECT '{"a":"b\nc"}' FROM src LIMIT 1;
+INSERT OVERWRITE TABLE dest1 SELECT '{"a":"b\nc"}' FROM src tablesample (1 rows);
 
 SELECT * FROM dest1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_parse_url_tuple.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_parse_url_tuple.q
index 055e39b2973a6..0870cbc4a9950 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_parse_url_tuple.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_parse_url_tuple.q
@@ -2,17 +2,17 @@ create table url_t (key string, fullurl string);
 
 insert overwrite table url_t
 select * from (
-  select '1', 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' from src limit 1
+  select '1', 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' from src tablesample (1 rows)
   union all
-  select '2', 'https://www.socs.uts.edu.au:80/MosaicDocs-old/url-primer.html?k1=tps#chapter1' from src limit 1
+  select '2', 'https://www.socs.uts.edu.au:80/MosaicDocs-old/url-primer.html?k1=tps#chapter1' from src tablesample (1 rows)
   union all
-  select '3', 'ftp://sites.google.com/a/example.com/site/page' from src limit 1
+  select '3', 'ftp://sites.google.com/a/example.com/site/page' from src tablesample (1 rows)
   union all
-  select '4', cast(null as string) from src limit 1
+  select '4', cast(null as string) from src tablesample (1 rows)
   union all
-  select '5', 'htttp://' from src limit 1
+  select '5', 'htttp://' from src tablesample (1 rows)
   union all
-  select '6', '[invalid url string]' from src limit 1
+  select '6', '[invalid url string]' from src tablesample (1 rows)
 ) s;
 
 describe function parse_url_tuple;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_posexplode.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_posexplode.q
new file mode 100644
index 0000000000000..343f08ba6fe4b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/udtf_posexplode.q
@@ -0,0 +1,15 @@
+CREATE TABLE employees (
+name STRING,
+salary FLOAT,
+subordinates ARRAY<STRING>,
+deductions MAP<STRING, FLOAT>,
+address STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>);
+
+LOAD DATA LOCAL INPATH '../../data/files/posexplode_data.txt' INTO TABLE employees;
+
+SELECT
+  name, pos, sub
+FROM
+  employees
+LATERAL VIEW
+  posexplode(subordinates) subView AS pos, sub;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union.q
index 91bbd1b07f08b..525eccbbfe227 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union.q
@@ -6,13 +6,13 @@ FROM (
   UNION ALL
   FROM src SELECT src.* WHERE src.key > 100
 ) unioninput
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/union.out' SELECT unioninput.*;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/union.out' SELECT unioninput.*;
 
 FROM (
   FROM src select src.key, src.value WHERE src.key < 100
   UNION ALL
   FROM src SELECT src.* WHERE src.key > 100
 ) unioninput
-INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/union.out' SELECT unioninput.*;
+INSERT OVERWRITE DIRECTORY 'target/warehouse/union.out' SELECT unioninput.*;
 
-dfs -cat ../build/ql/test/data/warehouse/union.out/*;
+dfs -cat ${system:test.warehouse.dir}/union.out/*;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union34.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union34.q
index 36bc865be81fe..238b583fd86a8 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union34.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union34.q
@@ -3,7 +3,7 @@ create table src10_2 (key string, value string);
 create table src10_3 (key string, value string);
 create table src10_4 (key string, value string);
 
-from (select * from src limit 10) a
+from (select * from src tablesample (10 rows)) a
 insert overwrite table src10_1 select *
 insert overwrite table src10_2 select *
 insert overwrite table src10_3 select *
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_date.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_date.q
index e332a8af7bb1b..dd6f08e5b4e21 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_date.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_date.q
@@ -17,8 +17,8 @@ create table union_date_2 (
   FL_NUM int
 );
 
-LOAD DATA LOCAL INPATH '../data/files/flights_join.txt' OVERWRITE INTO TABLE union_date_1;
-LOAD DATA LOCAL INPATH '../data/files/flights_join.txt' OVERWRITE INTO TABLE union_date_2;
+LOAD DATA LOCAL INPATH '../../data/files/flights_join.txt' OVERWRITE INTO TABLE union_date_1;
+LOAD DATA LOCAL INPATH '../../data/files/flights_join.txt' OVERWRITE INTO TABLE union_date_2;
 
 select * from (
   select fl_num, fl_date from union_date_1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_null.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_null.q
index 70147070feaca..4368b8a5b6c36 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_null.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_null.q
@@ -1,2 +1,5 @@
 -- HIVE-2901
 select x from (select value as x from src union all select NULL as x from src)a limit 10;
+
+-- HIVE-4837
+select * from (select null as N from src1 group by key UNION ALL select null as N from src1 group by key ) a;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_1.q
index c9f920c648a92..c87b3fef1e462 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_1.q
@@ -19,7 +19,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_10.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_10.q
index a072fe3b56e43..6701952717eb5 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_10.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_10.q
@@ -24,7 +24,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_11.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_11.q
index 6250a20210f9f..4b2fa42f1b54d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_11.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_11.q
@@ -24,7 +24,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_12.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_12.q
index 168eac34a0301..69d0d0af9ff68 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_12.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_12.q
@@ -24,7 +24,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_13.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_13.q
index a88a83e152a5f..7605f0ec26a08 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_13.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_13.q
@@ -24,7 +24,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_14.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_14.q
index e588e8fd62ab9..a4fdfc8ee44ef 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_14.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_14.q
@@ -25,7 +25,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_15.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_15.q
index 237f2e7629057..e3c937b7b1e06 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_15.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_15.q
@@ -25,7 +25,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1 partition (ds)
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_16.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_16.q
index 06d5043f9cf9d..537078b080433 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_16.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_16.q
@@ -25,7 +25,7 @@ set hive.exec.dynamic.partition=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as rcfile ;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1 partition (ds)
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_17.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_17.q
index 65b8255c3e954..d70f3d3da31a2 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_17.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_17.q
@@ -22,7 +22,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1 partition (ds)
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_18.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_18.q
index 265acfd35758f..478650038cc9c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_18.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_18.q
@@ -24,7 +24,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, ds string) stored as textfile;
 create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1 partition (ds)
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_19.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_19.q
index 1450caa2d07a7..8c45953ed877c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_19.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_19.q
@@ -19,7 +19,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_2.q
index 015c146ecadf0..83cd2887ab23f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_2.q
@@ -20,7 +20,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_20.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_20.q
index ac727889273d7..f80f7c1dfdadd 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_20.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_20.q
@@ -20,7 +20,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(values bigint, key string) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_21.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_21.q
index f89744624116f..8963c2576ca76 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_21.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_21.q
@@ -20,7 +20,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_22.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_22.q
index f01053d27d0c0..b0c1ccd73db68 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_22.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_22.q
@@ -19,7 +19,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint, values2 bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_23.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_23.q
index 805dd76274c42..a1b989a0eb63c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_23.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_23.q
@@ -20,7 +20,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_24.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_24.q
index 36fd947d6035d..ec561e0979e0c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_24.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_24.q
@@ -18,7 +18,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key double, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 EXPLAIN
 INSERT OVERWRITE TABLE outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_3.q
index da0f1c0473ffe..9617f737bc552 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_3.q
@@ -20,7 +20,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_4.q
index 18d4730a12024..cae323b1ef89d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_4.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_4.q
@@ -20,7 +20,7 @@ set hive.merge.smallfiles.avgsize=1;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_5.q
index a6fbeb03ddcc3..5df84e145c709 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_5.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_5.q
@@ -22,7 +22,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_6.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_6.q
index 7ae5af30c1bae..bfce26d0fbe1f 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_6.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_6.q
@@ -17,7 +17,7 @@ create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as textfile;
 create table outputTbl2(key string, values bigint) stored as textfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 FROM (
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_7.q
index 5a639ca11700d..3a956747a473d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_7.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_7.q
@@ -21,7 +21,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_8.q
index 79b84e81181d3..a83a43e466f55 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_8.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_8.q
@@ -22,7 +22,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_9.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_9.q
index f6038384f3fa8..e71f6dd001dd6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_9.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_remove_9.q
@@ -22,7 +22,7 @@ set mapred.input.dir.recursive=true;
 create table inputTbl1(key string, val string) stored as textfile;
 create table outputTbl1(key string, values bigint) stored as rcfile;
 
-load data local inpath '../data/files/T1.txt' into table inputTbl1;
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
 
 explain
 insert overwrite table outputTbl1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_top_level.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_top_level.q
new file mode 100644
index 0000000000000..6a4b45fbc4599
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_top_level.q
@@ -0,0 +1,106 @@
+-- top level
+explain
+select * from (
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3
+) q1 order by key;
+
+select * from (
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3
+) q1 order by key;
+
+explain
+select * from (
+select s1.key as k, s2.value as v from src s1 join src s2 on (s1.key = s2.key) limit 10
+union all
+select s1.key as k, s2.value as v from src s1 join src s2 on (s1.key = s2.key) limit 10
+) q1 order by k;
+
+select * from (
+select s1.key as k, s2.value as v from src s1 join src s2 on (s1.key = s2.key) limit 10
+union all
+select s1.key as k, s2.value as v from src s1 join src s2 on (s1.key = s2.key) limit 10
+) q1 order by k;
+
+-- ctas
+explain
+create table union_top as
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3;
+
+create table union_top as
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3;
+
+select * from union_top order by key;
+
+truncate table union_top;
+
+-- insert into
+explain
+insert into table union_top
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3;
+
+insert into table union_top
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3;
+
+select * from union_top order by key;
+
+explain
+insert overwrite table union_top
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3;
+
+insert overwrite table union_top
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3;
+
+select * from union_top order by key;
+
+-- create view
+explain
+create view union_top_view as
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3;
+
+create view union_top_view as
+select key, 0 as value from src where key % 3 == 0 limit 3
+union all
+select key, 1 as value from src where key % 3 == 1 limit 3
+union all
+select key, 2 as value from src where key % 3 == 2 limit 3;
+
+select * from union_top_view order by key;
+
+drop table union_top;
+drop view union_top_view;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_view.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_view.q
index 4f8bafe37a809..b727199cf4f25 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_view.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/union_view.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 CREATE TABLE src_union_1 (key int, value string) PARTITIONED BY (ds string);
 CREATE INDEX src_union_1_key_idx ON TABLE src_union_1(key) AS 'COMPACT' WITH DEFERRED REBUILD;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/uniquejoin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/uniquejoin.q
index 51bcf22bfa0be..3bc8ef931bcd3 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/uniquejoin.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/uniquejoin.q
@@ -2,9 +2,9 @@ CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
 CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
-LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2;
-LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3;
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
 
 FROM UNIQUEJOIN PRESERVE T1 a (a.key), PRESERVE T2 b (b.key), PRESERVE T3 c (c.key)
 SELECT a.key, b.key, c.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_1.q
index 53273b3187a78..bed61f381ce96 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_1.q
@@ -5,7 +5,7 @@ create table varchar1 (key varchar(10), value varchar(20));
 create table varchar1_1 (key string, value string);
 
 -- load from file
-load data local inpath '../data/files/srcbucket0.txt' overwrite into table varchar1;
+load data local inpath '../../data/files/srcbucket0.txt' overwrite into table varchar1;
 select * from varchar1 order by key, value limit 2;
 
 -- insert overwrite, from same/different length varchar
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_cast.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_cast.q
index 550f3dc728df2..c356b1dbca6ab 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_cast.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_cast.q
@@ -1,3 +1,4 @@
+set hive.fetch.task.conversion=more;
 
 -- Cast from varchar to other data types
 select
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_comparison.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_comparison.q
index b6c6f406412fe..05cad852a2680 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_comparison.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_comparison.q
@@ -1,3 +1,4 @@
+set hive.fetch.task.conversion=more;
 
 -- Should all be true
 select
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_join1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_join1.q
index 6a19efaa3c5a1..94226879c53ab 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_join1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_join1.q
@@ -17,9 +17,9 @@ create table  varchar_join1_str (
   c2 string
 );
 
-load data local inpath '../data/files/vc1.txt' into table varchar_join1_vc1;
-load data local inpath '../data/files/vc1.txt' into table varchar_join1_vc2;
-load data local inpath '../data/files/vc1.txt' into table varchar_join1_str;
+load data local inpath '../../data/files/vc1.txt' into table varchar_join1_vc1;
+load data local inpath '../../data/files/vc1.txt' into table varchar_join1_vc2;
+load data local inpath '../../data/files/vc1.txt' into table varchar_join1_str;
 
 -- Join varchar with same length varchar
 select * from varchar_join1_vc1 a join varchar_join1_vc1 b on (a.c2 = b.c2) order by a.c1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_serde.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_serde.q
index 7351b688cf504..ea2a022b94037 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_serde.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_serde.q
@@ -18,7 +18,7 @@ with serdeproperties (
 )
 stored as textfile;
 
-load data local inpath '../data/files/srcbucket0.txt' overwrite into table varchar_serde_regex;
+load data local inpath '../../data/files/srcbucket0.txt' overwrite into table varchar_serde_regex;
 
 select * from varchar_serde_regex limit 5;
 select value, count(*) from varchar_serde_regex group by value limit 5;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_union1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_union1.q
index cf90eab33e2d0..dd3cffe0db4fe 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_union1.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/varchar_union1.q
@@ -17,9 +17,9 @@ create table  varchar_union1_str (
   c2 string
 );
 
-load data local inpath '../data/files/vc1.txt' into table varchar_union1_vc1;
-load data local inpath '../data/files/vc1.txt' into table varchar_union1_vc2;
-load data local inpath '../data/files/vc1.txt' into table varchar_union1_str;
+load data local inpath '../../data/files/vc1.txt' into table varchar_union1_vc1;
+load data local inpath '../../data/files/vc1.txt' into table varchar_union1_vc2;
+load data local inpath '../../data/files/vc1.txt' into table varchar_union1_str;
 
 -- union varchar with same length varchar
 select * from (
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_between_in.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_between_in.q
new file mode 100644
index 0000000000000..1bc66118f8ff3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_between_in.q
@@ -0,0 +1,35 @@
+SET hive.vectorized.execution.enabled=true;
+
+CREATE TABLE decimal_date_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, CAST(CAST((CAST(cint AS BIGINT) *ctinyint) AS TIMESTAMP) AS DATE) AS cdate FROM alltypesorc ORDER BY cdate;
+
+EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate;
+
+EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE));
+
+EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1;
+
+EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568);
+
+EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate;
+
+EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate;
+
+EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1;
+
+EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351;
+
+SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate;
+
+SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE));
+
+SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1;
+
+SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568);
+
+SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate;
+
+SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate;
+
+SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1;
+
+SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_coalesce.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_coalesce.q
new file mode 100644
index 0000000000000..052ab716bee1a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_coalesce.q
@@ -0,0 +1,32 @@
+SET hive.vectorized.execution.enabled=true;
+EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) 
+FROM alltypesorc
+WHERE (cdouble IS NULL) LIMIT 10;
+
+SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) 
+FROM alltypesorc
+WHERE (cdouble IS NULL) LIMIT 10;
+
+EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) 
+FROM alltypesorc
+WHERE (ctinyint IS NULL) LIMIT 10;
+
+SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) 
+FROM alltypesorc
+WHERE (ctinyint IS NULL) LIMIT 10;
+
+EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) 
+FROM alltypesorc
+WHERE (cfloat IS NULL AND cbigint IS NULL) LIMIT 10;
+
+SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) 
+FROM alltypesorc
+WHERE (cfloat IS NULL AND cbigint IS NULL) LIMIT 10;
+
+EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) 
+FROM alltypesorc 
+WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL LIMIT 10;
+
+SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) 
+FROM alltypesorc 
+WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL LIMIT 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_aggregate.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_aggregate.q
new file mode 100644
index 0000000000000..eb9146e95d437
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_aggregate.q
@@ -0,0 +1,20 @@
+CREATE TABLE decimal_vgby STORED AS ORC AS 
+	SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, 
+	CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
+	cint
+	FROM alltypesorc;
+
+SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN SELECT cint,
+	COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
+	COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2)
+	FROM decimal_vgby
+	GROUP BY cint
+	HAVING COUNT(*) > 1;
+SELECT cint,
+	COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
+	COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2)
+	FROM decimal_vgby
+	GROUP BY cint
+	HAVING COUNT(*) > 1;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_cast.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_cast.q
new file mode 100644
index 0000000000000..ea7a5b817d7c0
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_cast.q
@@ -0,0 +1,5 @@
+SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10;
+
+SELECT cdouble, cint, cboolean1, ctimestamp1, CAST(cdouble AS DECIMAL(20,10)), CAST(cint AS DECIMAL(23,14)), CAST(cboolean1 AS DECIMAL(5,2)), CAST(ctimestamp1 AS DECIMAL(15,0)) FROM alltypesorc WHERE cdouble IS NOT NULL AND cint IS NOT NULL AND cboolean1 IS NOT NULL AND ctimestamp1 IS NOT NULL LIMIT 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_expressions.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_expressions.q
new file mode 100644
index 0000000000000..a74b17bd127d6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_expressions.q
@@ -0,0 +1,5 @@
+CREATE TABLE decimal_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc;
+SET hive.vectorized.execution.enabled=true;
+EXPLAIN SELECT cdecimal1 + cdecimal2, cdecimal1 - (2*cdecimal2), ((cdecimal1+2.34)/cdecimal2), (cdecimal1 * (cdecimal2/3.4)), cdecimal1 % 10, CAST(cdecimal1 AS INT), CAST(cdecimal2 AS SMALLINT), CAST(cdecimal2 AS TINYINT), CAST(cdecimal1 AS BIGINT), CAST (cdecimal1 AS BOOLEAN), CAST(cdecimal2 AS DOUBLE), CAST(cdecimal1 AS FLOAT), CAST(cdecimal2 AS STRING), CAST(cdecimal1 AS TIMESTAMP) FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL LIMIT 10;
+
+SELECT cdecimal1 + cdecimal2, cdecimal1 - (2*cdecimal2), ((cdecimal1+2.34)/cdecimal2), (cdecimal1 * (cdecimal2/3.4)), cdecimal1 % 10, CAST(cdecimal1 AS INT), CAST(cdecimal2 AS SMALLINT), CAST(cdecimal2 AS TINYINT), CAST(cdecimal1 AS BIGINT), CAST (cdecimal1 AS BOOLEAN), CAST(cdecimal2 AS DOUBLE), CAST(cdecimal1 AS FLOAT), CAST(cdecimal2 AS STRING), CAST(cdecimal1 AS TIMESTAMP) FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL LIMIT 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q
new file mode 100644
index 0000000000000..d8b3d1a9ac13f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q
@@ -0,0 +1,19 @@
+CREATE TABLE decimal_mapjoin STORED AS ORC AS 
+  SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, 
+  CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
+  cint
+  FROM alltypesorc;
+ 
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.nonconditionaltask=true;
+SET hive.auto.convert.join.nonconditionaltask.size=1000000000;
+SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
+  FROM decimal_mapjoin l
+  JOIN decimal_mapjoin r ON l.cint = r.cint
+  WHERE l.cint = 6981;
+SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
+  FROM decimal_mapjoin l
+  JOIN decimal_mapjoin r ON l.cint = r.cint
+  WHERE l.cint = 6981;
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q
new file mode 100644
index 0000000000000..6e2c0b1fd403f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_decimal_math_funcs.q
@@ -0,0 +1,77 @@
+CREATE TABLE decimal_test STORED AS ORC AS SELECT cbigint, cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc;
+SET hive.vectorized.execution.enabled=true;
+
+-- Test math functions in vectorized mode to verify they run correctly end-to-end.
+
+explain 
+select
+   cdecimal1
+  ,Round(cdecimal1, 2)
+  ,Round(cdecimal1)
+  ,Floor(cdecimal1)
+  ,Ceil(cdecimal1)
+  ,Exp(cdecimal1)
+  ,Ln(cdecimal1)  
+  ,Log10(cdecimal1)
+  -- Use log2 as a representative function to test all input types.
+  ,Log2(cdecimal1)
+  -- Use 15601.0 to test zero handling, as there are no zeroes in the table
+  ,Log2(cdecimal1 - 15601.0)
+  ,Log(2.0, cdecimal1)
+  ,Pow(log2(cdecimal1), 2.0)  
+  ,Power(log2(cdecimal1), 2.0)
+  ,Sqrt(cdecimal1)
+  ,Abs(cdecimal1)
+  ,Sin(cdecimal1)
+  ,Asin(cdecimal1)
+  ,Cos(cdecimal1)
+  ,ACos(cdecimal1)
+  ,Atan(cdecimal1)
+  ,Degrees(cdecimal1)
+  ,Radians(cdecimal1)
+  ,Positive(cdecimal1)
+  ,Negative(cdecimal1)
+  ,Sign(cdecimal1)
+  -- Test nesting
+  ,cos(-sin(log(cdecimal1)) + 3.14159)
+from decimal_test
+-- limit output to a reasonably small number of rows
+where cbigint % 500 = 0
+-- test use of a math function in the WHERE clause
+and sin(cdecimal1) >= -1.0;
+
+select
+   cdecimal1
+  ,Round(cdecimal1, 2)
+  ,Round(cdecimal1)
+  ,Floor(cdecimal1)
+  ,Ceil(cdecimal1)
+  ,Exp(cdecimal1)
+  ,Ln(cdecimal1)  
+  ,Log10(cdecimal1)
+  -- Use log2 as a representative function to test all input types.
+  ,Log2(cdecimal1)
+  -- Use 15601.0 to test zero handling, as there are no zeroes in the table
+  ,Log2(cdecimal1 - 15601.0)
+  ,Log(2.0, cdecimal1)
+  ,Pow(log2(cdecimal1), 2.0)  
+  ,Power(log2(cdecimal1), 2.0)
+  ,Sqrt(cdecimal1)
+  ,Abs(cdecimal1)
+  ,Sin(cdecimal1)
+  ,Asin(cdecimal1)
+  ,Cos(cdecimal1)
+  ,ACos(cdecimal1)
+  ,Atan(cdecimal1)
+  ,Degrees(cdecimal1)
+  ,Radians(cdecimal1)
+  ,Positive(cdecimal1)
+  ,Negative(cdecimal1)
+  ,Sign(cdecimal1)
+  -- Test nesting
+  ,cos(-sin(log(cdecimal1)) + 3.14159)
+from decimal_test
+-- limit output to a reasonably small number of rows
+where cbigint % 500 = 0
+-- test use of a math function in the WHERE clause
+and sin(cdecimal1) >= -1.0;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_left_outer_join.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_left_outer_join.q
new file mode 100644
index 0000000000000..6e96690497390
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_left_outer_join.q
@@ -0,0 +1,21 @@
+set hive.vectorized.execution.enabled=true;
+set hive.auto.convert.join=true;
+explain 
+select count(*) from (select c.ctinyint 
+from alltypesorc c
+left outer join alltypesorc cd
+  on cd.cint = c.cint 
+left outer join alltypesorc hd
+  on hd.ctinyint = c.ctinyint
+) t1
+;
+select count(*) from (select c.ctinyint
+from alltypesorc c
+left outer join alltypesorc cd
+  on cd.cint = c.cint 
+left outer join alltypesorc hd
+  on hd.ctinyint = c.ctinyint
+) t1;
+
+set hive.auto.convert.join=false;
+set hive.vectorized.execution.enabled=false;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_non_string_partition.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_non_string_partition.q
new file mode 100644
index 0000000000000..fc1dc6d3b89f3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vector_non_string_partition.q
@@ -0,0 +1,17 @@
+SET hive.vectorized.execution.enabled=true;
+CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC;
+SET hive.exec.dynamic.partition.mode=nonstrict;
+SET hive.exec.dynamic.partition=true;
+
+INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc 
+WHERE ctinyint IS NULL AND cdouble IS NOT NULL ORDER BY cdouble;
+
+SHOW PARTITIONS non_string_part;
+
+EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10;
+
+SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10;
+
+EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10;
+
+SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_0.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_0.q
new file mode 100644
index 0000000000000..39fba7d1ac32d
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_0.q
@@ -0,0 +1,27 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT AVG(cbigint),
+       (-(AVG(cbigint))),
+       (-6432 + AVG(cbigint)),
+       STDDEV_POP(cbigint),
+       (-((-6432 + AVG(cbigint)))),
+       ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
+       VAR_SAMP(cbigint),
+       (-((-6432 + AVG(cbigint)))),
+       (-6432 + (-((-6432 + AVG(cbigint))))),
+       (-((-6432 + AVG(cbigint)))),
+       ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
+       COUNT(*),
+       SUM(cfloat),
+       (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
+       (-(VAR_SAMP(cbigint))),
+       ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
+       MIN(ctinyint),
+       (-(MIN(ctinyint)))
+FROM   alltypesorc
+WHERE  (((cstring2 LIKE '%b%')
+         OR ((79.553 != cint)
+             OR (cbigint < cdouble)))
+        OR ((ctinyint >= csmallint)
+            AND ((cboolean2 = 1)
+                 AND (3569 = ctinyint))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_1.q
new file mode 100644
index 0000000000000..745aa06f2f524
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_1.q
@@ -0,0 +1,21 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT VAR_POP(ctinyint),
+       (VAR_POP(ctinyint) / -26.28),
+       SUM(cfloat),
+       (-1.389 + SUM(cfloat)),
+       (SUM(cfloat) * (-1.389 + SUM(cfloat))),
+       MAX(ctinyint),
+       (-((SUM(cfloat) * (-1.389 + SUM(cfloat))))),
+       MAX(cint),
+       (MAX(cint) * 79.553),
+       VAR_SAMP(cdouble),
+       (10.175 % (-((SUM(cfloat) * (-1.389 + SUM(cfloat)))))),
+       COUNT(cint),
+       (-563 % MAX(cint))
+FROM   alltypesorc
+WHERE  (((cdouble > ctinyint)
+         AND (cboolean2 > 0))
+        OR ((cbigint < ctinyint)
+            OR ((cint > cbigint)
+                OR (cboolean1 < 0))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_10.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_10.q
new file mode 100644
index 0000000000000..720f38d02b072
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_10.q
@@ -0,0 +1,24 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT cdouble,
+       ctimestamp1,
+       ctinyint,
+       cboolean1,
+       cstring1,
+       (-(cdouble)),
+       (cdouble + csmallint),
+       ((cdouble + csmallint) % 33),
+       (-(cdouble)),
+       (ctinyint % cdouble),
+       (ctinyint % csmallint),
+       (-(cdouble)),
+       (cbigint * (ctinyint % csmallint)),
+       (9763215.5639 - (cdouble + csmallint)),
+       (-((-(cdouble))))
+FROM   alltypesorc
+WHERE  (((cstring2 <= '10')
+         OR ((ctinyint > cdouble)
+             AND (-5638.15 >= ctinyint)))
+        OR ((cdouble > 6981)
+            AND ((csmallint = 9763215.5639)
+                 OR (cstring1 LIKE '%a'))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_11.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_11.q
new file mode 100644
index 0000000000000..329ed28948434
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_11.q
@@ -0,0 +1,15 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT cstring1,
+       cboolean1,
+       cdouble,
+       ctimestamp1,
+       (-3728 * csmallint),
+       (cdouble - 9763215.5639),
+       (-(cdouble)),
+       ((-(cdouble)) + 6981),
+       (cdouble * -5638.15)
+FROM   alltypesorc
+WHERE  ((cstring2 = cstring1)
+        OR ((ctimestamp1 IS NULL)
+            AND (cstring1 LIKE '%a')));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_12.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_12.q
new file mode 100644
index 0000000000000..fef62fc7a0eb6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_12.q
@@ -0,0 +1,32 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT   cbigint,
+         cboolean1,
+         cstring1,
+         ctimestamp1,
+         cdouble,
+         (-6432 * cdouble),
+         (-(cbigint)),
+         COUNT(cbigint),
+         (cbigint * COUNT(cbigint)),
+         STDDEV_SAMP(cbigint),
+         ((-6432 * cdouble) / -6432),
+         (-(((-6432 * cdouble) / -6432))),
+         AVG(cdouble),
+         (-((-6432 * cdouble))),
+         (-5638.15 + cbigint),
+         SUM(cbigint),
+         (AVG(cdouble) / (-6432 * cdouble)),
+         AVG(cdouble),
+         (-((-(((-6432 * cdouble) / -6432))))),
+         (((-6432 * cdouble) / -6432) + (-((-6432 * cdouble)))),
+         STDDEV_POP(cdouble)
+FROM     alltypesorc
+WHERE    (((ctimestamp1 IS NULL)
+           AND ((cboolean1 >= cboolean2)
+                OR (ctinyint != csmallint)))
+          AND ((cstring1 LIKE '%a')
+              OR ((cboolean2 <= 1)
+                  AND (cbigint >= csmallint))))
+GROUP BY cbigint, cboolean1, cstring1, ctimestamp1, cdouble
+ORDER BY ctimestamp1, cdouble, cbigint, cstring1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_13.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_13.q
new file mode 100644
index 0000000000000..fad2585d22725
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_13.q
@@ -0,0 +1,31 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT   cboolean1,
+         ctinyint,
+         ctimestamp1,
+         cfloat,
+         cstring1,
+         (-(ctinyint)),
+         MAX(ctinyint),
+         ((-(ctinyint)) + MAX(ctinyint)),
+         SUM(cfloat),
+         (SUM(cfloat) * ((-(ctinyint)) + MAX(ctinyint))),
+         (-(SUM(cfloat))),
+         (79.553 * cfloat),
+         STDDEV_POP(cfloat),
+         (-(SUM(cfloat))),
+         STDDEV_POP(ctinyint),
+         (((-(ctinyint)) + MAX(ctinyint)) - 10.175),
+         (-((-(SUM(cfloat))))),
+         (-26.28 / (-((-(SUM(cfloat)))))),
+         MAX(cfloat),
+         ((SUM(cfloat) * ((-(ctinyint)) + MAX(ctinyint))) / ctinyint),
+         MIN(ctinyint)
+FROM     alltypesorc
+WHERE    (((cfloat < 3569)
+           AND ((10.175 >= cdouble)
+                AND (cboolean1 != 1)))
+          OR ((ctimestamp1 > -29071)
+              AND ((ctimestamp2 != -29071)
+                   AND (ctinyint < 9763215.5639))))
+GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_14.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_14.q
new file mode 100644
index 0000000000000..a121c64edc7b4
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_14.q
@@ -0,0 +1,33 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT   ctimestamp1,
+         cfloat,
+         cstring1,
+         cboolean1,
+         cdouble,
+         (-26.28 + cdouble),
+         (-((-26.28 + cdouble))),
+         STDDEV_SAMP((-((-26.28 + cdouble)))),
+         (cfloat * -26.28),
+         MAX(cfloat),
+         (-(cfloat)),
+         (-(MAX(cfloat))),
+         ((-((-26.28 + cdouble))) / 10.175),
+         STDDEV_POP(cfloat),
+         COUNT(cfloat),
+         (-(((-((-26.28 + cdouble))) / 10.175))),
+         (-1.389 % STDDEV_SAMP((-((-26.28 + cdouble))))),
+         (cfloat - cdouble),
+         VAR_POP(cfloat),
+         (VAR_POP(cfloat) % 10.175),
+         VAR_SAMP(cfloat),
+         (-((cfloat - cdouble)))
+FROM     alltypesorc
+WHERE    (((ctinyint <= cbigint)
+           AND ((cint <= cdouble)
+                OR (ctimestamp2 < ctimestamp1)))
+          AND ((cdouble < ctinyint)
+              AND ((cbigint > -257)
+                  OR (cfloat < cint))))
+GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble
+ORDER BY cstring1, cfloat, cdouble, ctimestamp1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_15.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_15.q
new file mode 100644
index 0000000000000..7daad0b2c50a5
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_15.q
@@ -0,0 +1,31 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT   cfloat,
+         cboolean1,
+         cdouble,
+         cstring1,
+         ctinyint,
+         cint,
+         ctimestamp1,
+         STDDEV_SAMP(cfloat),
+         (-26.28 - cint),
+         MIN(cdouble),
+         (cdouble * 79.553),
+         (33 % cfloat),
+         STDDEV_SAMP(ctinyint),
+         VAR_POP(ctinyint),
+         (-23 % cdouble),
+         (-(ctinyint)),
+         VAR_SAMP(cint),
+         (cint - cfloat),
+         (-23 % ctinyint),
+         (-((-26.28 - cint))),
+         STDDEV_POP(cint)
+FROM     alltypesorc
+WHERE    (((cstring2 LIKE '%ss%')
+           OR (cstring1 LIKE '10%'))
+          OR ((cint >= -75)
+              AND ((ctinyint = csmallint)
+                   AND (cdouble >= -3728))))
+GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1
+ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_16.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_16.q
new file mode 100644
index 0000000000000..39a9402680496
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_16.q
@@ -0,0 +1,20 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT   cstring1,
+         cdouble,
+         ctimestamp1,
+         (cdouble - 9763215.5639),
+         (-((cdouble - 9763215.5639))),
+         COUNT(cdouble),
+         STDDEV_SAMP(cdouble),
+         (-(STDDEV_SAMP(cdouble))),
+         (STDDEV_SAMP(cdouble) * COUNT(cdouble)),
+         MIN(cdouble),
+         (9763215.5639 / cdouble),
+         (COUNT(cdouble) / -1.389),
+         STDDEV_SAMP(cdouble)
+FROM     alltypesorc
+WHERE    ((cstring2 LIKE '%b%')
+          AND ((cdouble >= -1.389)
+              OR (cstring1 < 'a')))
+GROUP BY cstring1, cdouble, ctimestamp1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_2.q
new file mode 100644
index 0000000000000..b8647a434414b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_2.q
@@ -0,0 +1,23 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT AVG(csmallint),
+       (AVG(csmallint) % -563),
+       (AVG(csmallint) + 762),
+       SUM(cfloat),
+       VAR_POP(cbigint),
+       (-(VAR_POP(cbigint))),
+       (SUM(cfloat) - AVG(csmallint)),
+       COUNT(*),
+       (-((SUM(cfloat) - AVG(csmallint)))),
+       (VAR_POP(cbigint) - 762),
+       MIN(ctinyint),
+       ((-(VAR_POP(cbigint))) + MIN(ctinyint)),
+       AVG(cdouble),
+       (((-(VAR_POP(cbigint))) + MIN(ctinyint)) - SUM(cfloat))
+FROM   alltypesorc
+WHERE  (((ctimestamp1 < ctimestamp2)
+         AND ((cstring2 LIKE 'b%')
+              AND (cfloat <= -5638.15)))
+        OR ((cdouble < ctinyint)
+            AND ((-10669 != ctimestamp2)
+                 OR (359 > cint))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_3.q
new file mode 100644
index 0000000000000..1d53994519db3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_3.q
@@ -0,0 +1,25 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT STDDEV_SAMP(csmallint),
+       (STDDEV_SAMP(csmallint) - 10.175),
+       STDDEV_POP(ctinyint),
+       (STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)),
+       (-(STDDEV_POP(ctinyint))),
+       (STDDEV_SAMP(csmallint) % 79.553),
+       (-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))),
+       STDDEV_SAMP(cfloat),
+       (-(STDDEV_SAMP(csmallint))),
+       SUM(cfloat),
+       ((-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))) / (STDDEV_SAMP(csmallint) - 10.175)),
+       (-((STDDEV_SAMP(csmallint) - 10.175))),
+       AVG(cint),
+       (-3728 - STDDEV_SAMP(csmallint)),
+       STDDEV_POP(cint),
+       (AVG(cint) / STDDEV_SAMP(cfloat))
+FROM   alltypesorc
+WHERE  (((cint <= cfloat)
+         AND ((79.553 != cbigint)
+              AND (ctimestamp2 = -29071)))
+        OR ((cbigint > cdouble)
+            AND ((79.553 <= csmallint)
+                 AND (ctimestamp1 > ctimestamp2))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_4.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_4.q
new file mode 100644
index 0000000000000..1eb324d1902e8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_4.q
@@ -0,0 +1,23 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT SUM(cint),
+       (SUM(cint) * -563),
+       (-3728 + SUM(cint)),
+       STDDEV_POP(cdouble),
+       (-(STDDEV_POP(cdouble))),
+       AVG(cdouble),
+       ((SUM(cint) * -563) % SUM(cint)),
+       (((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)),
+       VAR_POP(cdouble),
+       (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)))),
+       ((-3728 + SUM(cint)) - (SUM(cint) * -563)),
+       MIN(ctinyint),
+       MIN(ctinyint),
+       (MIN(ctinyint) * (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)))))
+FROM   alltypesorc
+WHERE  (((csmallint >= cint)
+         OR ((-89010 >= ctinyint)
+             AND (cdouble > 79.553)))
+        OR ((-563 != cbigint)
+            AND ((ctinyint != cbigint)
+                 OR (-3728 >= cdouble))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_5.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_5.q
new file mode 100644
index 0000000000000..826b20fd4f124
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_5.q
@@ -0,0 +1,20 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT MAX(csmallint),
+       (MAX(csmallint) * -75),
+       COUNT(*),
+       ((MAX(csmallint) * -75) / COUNT(*)),
+       (6981 * MAX(csmallint)),
+       MIN(csmallint),
+       (-(MIN(csmallint))),
+       (197 % ((MAX(csmallint) * -75) / COUNT(*))),
+       SUM(cint),
+       MAX(ctinyint),
+       (-(MAX(ctinyint))),
+       ((-(MAX(ctinyint))) + MAX(ctinyint))
+FROM   alltypesorc
+WHERE  (((cboolean2 IS NOT NULL)
+         AND (cstring1 LIKE '%b%'))
+        OR ((ctinyint = cdouble)
+            AND ((ctimestamp2 IS NOT NULL)
+                 AND (cstring2 LIKE 'a'))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_6.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_6.q
new file mode 100644
index 0000000000000..2b59f10ed89c3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_6.q
@@ -0,0 +1,21 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT cboolean1,
+       cfloat,
+       cstring1,
+       (988888 * csmallint),
+       (-(csmallint)),
+       (-(cfloat)),
+       (-26.28 / cfloat),
+       (cfloat * 359),
+       (cint % ctinyint),
+       (-(cdouble)),
+       (ctinyint - -75),
+       (762 * (cint % ctinyint))
+FROM   alltypesorc
+WHERE  ((ctinyint != 0)
+        AND ((((cboolean1 <= 0)
+          AND (cboolean2 >= cboolean1))
+          OR ((cbigint IS NOT NULL)
+              AND ((cstring2 LIKE '%a')
+                   OR (cfloat <= -257))))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_7.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_7.q
new file mode 100644
index 0000000000000..20c1148659195
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_7.q
@@ -0,0 +1,25 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT cboolean1,
+       cbigint,
+       csmallint,
+       ctinyint,
+       ctimestamp1,
+       cstring1,
+       (cbigint + cbigint),
+       (csmallint % -257),
+       (-(csmallint)),
+       (-(ctinyint)),
+       ((-(ctinyint)) + 17),
+       (cbigint * (-(csmallint))),
+       (cint % csmallint),
+       (-(ctinyint)),
+       ((-(ctinyint)) % ctinyint)
+FROM   alltypesorc
+WHERE  ((ctinyint != 0)
+        AND (((ctimestamp1 <= 0)
+          OR ((ctinyint = cint)
+               OR (cstring2 LIKE 'ss')))
+          AND ((988888 < cdouble)
+              OR ((ctimestamp2 > -29071)
+                  AND (3569 >= cdouble)))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_8.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_8.q
new file mode 100644
index 0000000000000..98b3385f165a9
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_8.q
@@ -0,0 +1,23 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT ctimestamp1,
+       cdouble,
+       cboolean1,
+       cstring1,
+       cfloat,
+       (-(cdouble)),
+       (-5638.15 - cdouble),
+       (cdouble * -257),
+       (cint + cfloat),
+       ((-(cdouble)) + cbigint),
+       (-(cdouble)),
+       (-1.389 - cfloat),
+       (-(cfloat)),
+       ((-5638.15 - cdouble) + (cint + cfloat))
+FROM   alltypesorc
+WHERE  (((cstring2 IS NOT NULL)
+         AND ((ctimestamp1 <= -29071)
+             AND (ctimestamp2 != 16558)))
+        OR ((cfloat < -6432)
+            OR ((cboolean1 IS NOT NULL)
+                AND (cdouble = 988888))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_9.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_9.q
new file mode 100644
index 0000000000000..252c426d64a5e
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_9.q
@@ -0,0 +1,24 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT   cfloat,
+         cstring1,
+         cint,
+         ctimestamp1,
+         cdouble,
+         cbigint,
+         (cfloat / ctinyint),
+         (cint % cbigint),
+         (-(cdouble)),
+         (cdouble + (cfloat / ctinyint)),
+         (cdouble / cint),
+         (-((-(cdouble)))),
+         (9763215.5639 % cbigint),
+         (2563.58 + (-((-(cdouble)))))
+FROM     alltypesorc
+WHERE    (((cbigint > -23)
+           AND ((cdouble != 988888)
+                OR (cint > -863.257)))
+          AND ((ctinyint >= 33)
+              OR ((csmallint >= cbigint)
+                  OR (cfloat = cdouble))))
+ORDER BY cbigint, cfloat;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_decimal_date.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_decimal_date.q
new file mode 100644
index 0000000000000..2b82a5aa0452a
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_decimal_date.q
@@ -0,0 +1,4 @@
+CREATE TABLE date_decimal_test STORED AS ORC AS SELECT cint, cdouble, CAST (CAST (cint AS TIMESTAMP) AS DATE) AS cdate, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal FROM alltypesorc;
+SET hive.vectorized.execution.enabled=true;
+EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10;
+SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_div0.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_div0.q
new file mode 100644
index 0000000000000..69e388a28e411
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_div0.q
@@ -0,0 +1,24 @@
+SET hive.vectorized.execution.enabled = true;
+
+-- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants
+explain
+select cdouble / 0.0 from alltypesorc limit 100;
+select cdouble / 0.0 from alltypesorc limit 100;
+
+-- There are no zeros in the table, but there is 988888, so use it as zero
+
+-- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators
+explain
+select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) 
+from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100;
+select (cbigint - 988888L) as s1, cdouble / (cbigint - 988888L) as s2, 1.2 / (cbigint - 988888L) 
+from alltypesorc where cbigint > 0 and cbigint < 100000000 order by s1, s2 limit 100;
+
+-- There are no zeros in the table, but there is -200.0, so use it as zero
+
+explain
+select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 1 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) 
+from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100;
+select (cdouble + 200.0) as s1, cbigint / (cdouble + 200.0) as s2, (cdouble + 200.0) / (cdouble + 200.0), cbigint / (cdouble + 200.0), 1 / (cdouble + 200.0), 1.2 / (cdouble + 200.0) 
+from alltypesorc where cdouble >= -500 and cdouble < -199 order by s1, s2 limit 100;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_limit.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_limit.q
new file mode 100644
index 0000000000000..094a8d26a3842
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_limit.q
@@ -0,0 +1,37 @@
+SET hive.vectorized.execution.enabled=true;
+explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7;
+SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7;
+
+set hive.optimize.reducededuplication.min.reducer=1;
+set hive.limit.pushdown.memory.usage=0.3f;
+
+-- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown
+
+explain
+select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20;
+select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20;
+
+-- deduped RS
+explain
+select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20;
+select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20;
+
+-- distincts
+explain
+select distinct(ctinyint) from alltypesorc limit 20;
+select distinct(ctinyint) from alltypesorc limit 20;
+
+explain
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20;
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint limit 20;
+
+-- limit zero
+explain
+select ctinyint,cdouble from alltypesorc order by ctinyint limit 0;
+select ctinyint,cdouble from alltypesorc order by ctinyint limit 0;
+
+-- 2MR (applied to last RS)
+explain
+select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20;
+select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_nested_udf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_nested_udf.q
new file mode 100644
index 0000000000000..bb50f9b853287
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_nested_udf.q
@@ -0,0 +1,3 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT SUM(abs(ctinyint)) from alltypesorc;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_not.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_not.q
new file mode 100644
index 0000000000000..bfd3dd722190c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_not.q
@@ -0,0 +1,27 @@
+SET hive.vectorized.execution.enabled=true;
+SELECT AVG(cbigint),
+       (-(AVG(cbigint))),
+       (-6432 + AVG(cbigint)),
+       STDDEV_POP(cbigint),
+       (-((-6432 + AVG(cbigint)))),
+       ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
+       VAR_SAMP(cbigint),
+       (-((-6432 + AVG(cbigint)))),
+       (-6432 + (-((-6432 + AVG(cbigint))))),
+       (-((-6432 + AVG(cbigint)))),
+       ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
+       COUNT(*),
+       SUM(cfloat),
+       (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
+       (-(VAR_SAMP(cbigint))),
+       ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
+       MIN(ctinyint),
+       (-(MIN(ctinyint)))
+FROM   alltypesorc
+WHERE  (((cstring2 LIKE '%b%')
+         OR ((79.553 != cint)
+             OR (NOT(cbigint >= cdouble))))
+        OR ((ctinyint >= csmallint)
+            AND (NOT ((cboolean2 != 1)
+                 OR (3569 != ctinyint)))));
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_part.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_part.q
new file mode 100644
index 0000000000000..0e34585b5a8ba
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_part.q
@@ -0,0 +1,7 @@
+SET hive.vectorized.execution.enabled=true;
+CREATE TABLE alltypesorc_part(ctinyint tinyint, csmallint smallint, cint int, cbigint bigint, cfloat float, cdouble double, cstring1 string, cstring2 string, ctimestamp1 timestamp, ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned by (ds string) STORED AS ORC;
+insert overwrite table alltypesorc_part partition (ds='2011') select * from alltypesorc limit 100;
+insert overwrite table alltypesorc_part partition (ds='2012') select * from alltypesorc limit 100;
+
+select count(cdouble), cint from alltypesorc_part where ds='2011' group by cint limit 10;
+select count(*) from alltypesorc_part A join alltypesorc_part B on A.ds=B.ds;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_part_project.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_part_project.q
new file mode 100644
index 0000000000000..c68ce56fd47db
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_part_project.q
@@ -0,0 +1,7 @@
+SET hive.vectorized.execution.enabled=true;
+CREATE TABLE alltypesorc_part(ctinyint tinyint, csmallint smallint, cint int, cbigint bigint, cfloat float, cdouble double, cstring1 string, cstring2 string, ctimestamp1 timestamp, ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned by (ds string) STORED AS ORC;
+insert overwrite table alltypesorc_part partition (ds='2011') select * from alltypesorc limit 100;
+insert overwrite table alltypesorc_part partition (ds='2012') select * from alltypesorc limit 100;
+
+explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10;
+select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_pushdown.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_pushdown.q
new file mode 100644
index 0000000000000..bafe5504789e8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_pushdown.q
@@ -0,0 +1,4 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.optimize.index.filter=true;
+explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble;
+SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_short_regress.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_short_regress.q
new file mode 100644
index 0000000000000..638a31ff413b7
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorization_short_regress.q
@@ -0,0 +1,852 @@
+SET hive.vectorized.execution.enabled=true;
+
+-- If you look at ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/OrcFileGenerator.java
+-- which is the data generation class you'll see that those values are specified in the
+-- initializeFixedPointValues for each data type. When I created the queries I usedthose values
+-- where I needed scalar values to ensure that when the queries executed their predicates would be
+-- filtering on values that are guaranteed to exist.
+
+-- Beyond those values, all the other data in the alltypesorc file is random, but there is a
+-- specific pattern to the data that is important for coverage. In orc and subsequently
+-- vectorization there are a number of optimizations for certain data patterns: AllValues, NoNulls,
+-- RepeatingValue, RepeatingNull. The data in alltypesorc is generated such that each column has
+-- exactly 3 batches of each data pattern. This gives us coverage for the vector expression
+-- optimizations and ensure the metadata in appropriately set on the row batch object which are
+-- reused across batches. 
+
+-- For the queries themselves in order to efficiently cover as much of the new vectorization
+-- functionality as I could I used a number of different techniques to create the
+-- vectorization_short_regress.q test suite, primarily equivalence classes, and pairwise
+-- combinations.
+
+-- First I divided the search space into a number of dimensions such as type, aggregate function,
+-- filter operation, arithmetic operation, etc. The types were explored as equivalence classes of
+-- long, double, time, string, and bool. Also, rather than creating a very large number of small
+-- queries the resulting vectors were grouped by compatible dimensions to reduce the number of
+-- queries.
+
+-- TargetTypeClasses: Long, Timestamp, Double, String, Bool
+-- Functions: Avg, Sum, StDevP, StDev, Var, Min, Count
+-- ArithmeticOps: Add, Multiply, Subtract, Divide
+-- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual
+-- GroupBy: NoGroupByProjectAggs
+EXPLAIN SELECT AVG(cint),
+       (AVG(cint) + -3728),
+       (-((AVG(cint) + -3728))),
+       (-((-((AVG(cint) + -3728))))),
+       ((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)),
+       SUM(cdouble),
+       (-(AVG(cint))),
+       STDDEV_POP(cint),
+       (((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)) * (-((-((AVG(cint) + -3728)))))),
+       STDDEV_SAMP(csmallint),
+       (-(STDDEV_POP(cint))),
+       (STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))),
+       ((STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))) * STDDEV_POP(cint)),
+       VAR_SAMP(cint),
+       AVG(cfloat),
+       (10.175 - VAR_SAMP(cint)),
+       (-((10.175 - VAR_SAMP(cint)))),
+       ((-(STDDEV_POP(cint))) / -563),
+       STDDEV_SAMP(cint),
+       (-(((-(STDDEV_POP(cint))) / -563))),
+       (AVG(cint) / SUM(cdouble)),
+       MIN(ctinyint),
+       COUNT(csmallint),
+       (MIN(ctinyint) / ((-(STDDEV_POP(cint))) / -563)),
+       (-((AVG(cint) / SUM(cdouble))))
+FROM   alltypesorc
+WHERE  ((762 = cbigint)
+        OR ((csmallint < cfloat)
+            AND ((ctimestamp2 > -10669)
+                 AND (cdouble != cint)))
+        OR (cstring1 = 'a')
+           OR ((cbigint <= -1.389)
+               AND ((cstring2 != 'a')
+                    AND ((79.553 != cint)
+                         AND (cboolean2 != cboolean1)))));
+SELECT AVG(cint),
+       (AVG(cint) + -3728),
+       (-((AVG(cint) + -3728))),
+       (-((-((AVG(cint) + -3728))))),
+       ((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)),
+       SUM(cdouble),
+       (-(AVG(cint))),
+       STDDEV_POP(cint),
+       (((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)) * (-((-((AVG(cint) + -3728)))))),
+       STDDEV_SAMP(csmallint),
+       (-(STDDEV_POP(cint))),
+       (STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))),
+       ((STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))) * STDDEV_POP(cint)),
+       VAR_SAMP(cint),
+       AVG(cfloat),
+       (10.175 - VAR_SAMP(cint)),
+       (-((10.175 - VAR_SAMP(cint)))),
+       ((-(STDDEV_POP(cint))) / -563),
+       STDDEV_SAMP(cint),
+       (-(((-(STDDEV_POP(cint))) / -563))),
+       (AVG(cint) / SUM(cdouble)),
+       MIN(ctinyint),
+       COUNT(csmallint),
+       (MIN(ctinyint) / ((-(STDDEV_POP(cint))) / -563)),
+       (-((AVG(cint) / SUM(cdouble))))
+FROM   alltypesorc
+WHERE  ((762 = cbigint)
+        OR ((csmallint < cfloat)
+            AND ((ctimestamp2 > -10669)
+                 AND (cdouble != cint)))
+        OR (cstring1 = 'a')
+           OR ((cbigint <= -1.389)
+               AND ((cstring2 != 'a')
+                    AND ((79.553 != cint)
+                         AND (cboolean2 != cboolean1)))));
+
+-- TargetTypeClasses: Long, Bool, Double, String, Timestamp
+-- Functions: Max, VarP, StDevP, Avg, Min, StDev, Var
+-- ArithmeticOps: Divide, Multiply, Remainder, Subtract
+-- FilterOps: LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, Like, RLike
+-- GroupBy: NoGroupByProjectAggs
+EXPLAIN SELECT MAX(cint),
+       (MAX(cint) / -3728),
+       (MAX(cint) * -3728),
+       VAR_POP(cbigint),
+       (-((MAX(cint) * -3728))),
+       STDDEV_POP(csmallint),
+       (-563 % (MAX(cint) * -3728)),
+       (VAR_POP(cbigint) / STDDEV_POP(csmallint)),
+       (-(STDDEV_POP(csmallint))),
+       MAX(cdouble),
+       AVG(ctinyint),
+       (STDDEV_POP(csmallint) - 10.175),
+       MIN(cint),
+       ((MAX(cint) * -3728) % (STDDEV_POP(csmallint) - 10.175)),
+       (-(MAX(cdouble))),
+       MIN(cdouble),
+       (MAX(cdouble) % -26.28),
+       STDDEV_SAMP(csmallint),
+       (-((MAX(cint) / -3728))),
+       ((-((MAX(cint) * -3728))) % (-563 % (MAX(cint) * -3728))),
+       ((MAX(cint) / -3728) - AVG(ctinyint)),
+       (-((MAX(cint) * -3728))),
+       VAR_SAMP(cint)
+FROM   alltypesorc
+WHERE  (((cbigint <= 197)
+         AND (cint < cbigint))
+        OR ((cdouble >= -26.28)
+            AND (csmallint > cdouble))
+        OR ((ctinyint > cfloat)
+            AND (cstring1 RLIKE '.*ss.*'))
+           OR ((cfloat > 79.553)
+               AND (cstring2 LIKE '10%')));
+SELECT MAX(cint),
+       (MAX(cint) / -3728),
+       (MAX(cint) * -3728),
+       VAR_POP(cbigint),
+       (-((MAX(cint) * -3728))),
+       STDDEV_POP(csmallint),
+       (-563 % (MAX(cint) * -3728)),
+       (VAR_POP(cbigint) / STDDEV_POP(csmallint)),
+       (-(STDDEV_POP(csmallint))),
+       MAX(cdouble),
+       AVG(ctinyint),
+       (STDDEV_POP(csmallint) - 10.175),
+       MIN(cint),
+       ((MAX(cint) * -3728) % (STDDEV_POP(csmallint) - 10.175)),
+       (-(MAX(cdouble))),
+       MIN(cdouble),
+       (MAX(cdouble) % -26.28),
+       STDDEV_SAMP(csmallint),
+       (-((MAX(cint) / -3728))),
+       ((-((MAX(cint) * -3728))) % (-563 % (MAX(cint) * -3728))),
+       ((MAX(cint) / -3728) - AVG(ctinyint)),
+       (-((MAX(cint) * -3728))),
+       VAR_SAMP(cint)
+FROM   alltypesorc
+WHERE  (((cbigint <= 197)
+         AND (cint < cbigint))
+        OR ((cdouble >= -26.28)
+            AND (csmallint > cdouble))
+        OR ((ctinyint > cfloat)
+            AND (cstring1 RLIKE '.*ss.*'))
+           OR ((cfloat > 79.553)
+               AND (cstring2 LIKE '10%')));
+
+-- TargetTypeClasses: String, Long, Bool, Double, Timestamp
+-- Functions: VarP, Count, Max, StDevP, StDev, Avg
+-- ArithmeticOps: Subtract, Remainder, Multiply, Add
+-- FilterOps: Equal, LessThanOrEqual, GreaterThan, Like, LessThan
+-- GroupBy: NoGroupByProjectAggs
+EXPLAIN SELECT VAR_POP(cbigint),
+       (-(VAR_POP(cbigint))),
+       (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))),
+       COUNT(*),
+       (COUNT(*) % 79.553),
+       MAX(ctinyint),
+       (COUNT(*) - (-(VAR_POP(cbigint)))),
+       (-((-(VAR_POP(cbigint))))),
+       (-1 % (-(VAR_POP(cbigint)))),
+       COUNT(*),
+       (-(COUNT(*))),
+       STDDEV_POP(csmallint),
+       (-((-((-(VAR_POP(cbigint))))))),
+       (762 * (-(COUNT(*)))),
+       MAX(cint),
+       (MAX(ctinyint) + (762 * (-(COUNT(*))))),
+       ((-(VAR_POP(cbigint))) + MAX(cint)),
+       STDDEV_SAMP(cdouble),
+       ((-(COUNT(*))) % COUNT(*)),
+       COUNT(ctinyint),
+       AVG(ctinyint),
+       (-3728 % (MAX(ctinyint) + (762 * (-(COUNT(*))))))
+FROM   alltypesorc
+WHERE  ((ctimestamp1 = ctimestamp2)
+        OR (762 = cfloat)
+        OR (cstring1 = 'ss')
+           OR ((csmallint <= cbigint)
+               AND (1 = cboolean2))
+              OR ((cboolean1 IS NOT NULL)
+                  AND ((ctimestamp2 IS NOT NULL)
+                       AND (cstring2 > 'a'))));
+SELECT VAR_POP(cbigint),
+       (-(VAR_POP(cbigint))),
+       (VAR_POP(cbigint) - (-(VAR_POP(cbigint)))),
+       COUNT(*),
+       (COUNT(*) % 79.553),
+       MAX(ctinyint),
+       (COUNT(*) - (-(VAR_POP(cbigint)))),
+       (-((-(VAR_POP(cbigint))))),
+       (-1 % (-(VAR_POP(cbigint)))),
+       COUNT(*),
+       (-(COUNT(*))),
+       STDDEV_POP(csmallint),
+       (-((-((-(VAR_POP(cbigint))))))),
+       (762 * (-(COUNT(*)))),
+       MAX(cint),
+       (MAX(ctinyint) + (762 * (-(COUNT(*))))),
+       ((-(VAR_POP(cbigint))) + MAX(cint)),
+       STDDEV_SAMP(cdouble),
+       ((-(COUNT(*))) % COUNT(*)),
+       COUNT(ctinyint),
+       AVG(ctinyint),
+       (-3728 % (MAX(ctinyint) + (762 * (-(COUNT(*))))))
+FROM   alltypesorc
+WHERE  ((ctimestamp1 = ctimestamp2)
+        OR (762 = cfloat)
+        OR (cstring1 = 'ss')
+           OR ((csmallint <= cbigint)
+               AND (1 = cboolean2))
+              OR ((cboolean1 IS NOT NULL)
+                  AND ((ctimestamp2 IS NOT NULL)
+                       AND (cstring2 > 'a'))));
+
+-- TargetTypeClasses: String, Bool, Timestamp, Long, Double
+-- Functions: Avg, Max, StDev, VarP
+-- ArithmeticOps: Add, Divide, Remainder, Multiply
+-- FilterOps: LessThanOrEqual, NotEqual, GreaterThanOrEqual, LessThan, Equal
+-- GroupBy: NoGroupByProjectAggs
+EXPLAIN SELECT AVG(ctinyint),
+       (AVG(ctinyint) + 6981),
+       ((AVG(ctinyint) + 6981) + AVG(ctinyint)),
+       MAX(cbigint),
+       (((AVG(ctinyint) + 6981) + AVG(ctinyint)) / AVG(ctinyint)),
+       (-((AVG(ctinyint) + 6981))),
+       STDDEV_SAMP(cint),
+       (AVG(ctinyint) % (-((AVG(ctinyint) + 6981)))),
+       VAR_POP(cint),
+       VAR_POP(cbigint),
+       (-(MAX(cbigint))),
+       ((-(MAX(cbigint))) / STDDEV_SAMP(cint)),
+       MAX(cfloat),
+       (VAR_POP(cbigint) * -26.28)
+FROM   alltypesorc
+WHERE  (((ctimestamp2 <= ctimestamp1)
+         AND ((cbigint != cdouble)
+              AND ('ss' <= cstring1)))
+        OR ((csmallint < ctinyint)
+            AND (ctimestamp1 >= 0))
+           OR (cfloat = 17));
+SELECT AVG(ctinyint),
+       (AVG(ctinyint) + 6981),
+       ((AVG(ctinyint) + 6981) + AVG(ctinyint)),
+       MAX(cbigint),
+       (((AVG(ctinyint) + 6981) + AVG(ctinyint)) / AVG(ctinyint)),
+       (-((AVG(ctinyint) + 6981))),
+       STDDEV_SAMP(cint),
+       (AVG(ctinyint) % (-((AVG(ctinyint) + 6981)))),
+       VAR_POP(cint),
+       VAR_POP(cbigint),
+       (-(MAX(cbigint))),
+       ((-(MAX(cbigint))) / STDDEV_SAMP(cint)),
+       MAX(cfloat),
+       (VAR_POP(cbigint) * -26.28)
+FROM   alltypesorc
+WHERE  (((ctimestamp2 <= ctimestamp1)
+         AND ((cbigint != cdouble)
+              AND ('ss' <= cstring1)))
+        OR ((csmallint < ctinyint)
+            AND (ctimestamp1 >= 0))
+           OR (cfloat = 17));
+
+-- TargetTypeClasses: Timestamp, String, Long, Double, Bool
+-- Functions: Max, Avg, Min, Var, StDev, Count, StDevP, Sum
+-- ArithmeticOps: Multiply, Subtract, Add, Divide
+-- FilterOps: Like, NotEqual, LessThan, GreaterThanOrEqual, GreaterThan, RLike
+-- GroupBy: NoGroupByProjectColumns
+EXPLAIN SELECT cint,
+       cdouble,
+       ctimestamp2,
+       cstring1,
+       cboolean2,
+       ctinyint,
+       cfloat,
+       ctimestamp1,
+       csmallint,
+       cbigint,
+       (-3728 * cbigint),
+       (-(cint)),
+       (-863.257 - cint),
+       (-(csmallint)),
+       (csmallint - (-(csmallint))),
+       ((csmallint - (-(csmallint))) + (-(csmallint))),
+       (cint / cint),
+       ((-863.257 - cint) - -26.28),
+       (-(cfloat)),
+       (cdouble * -89010),
+       (ctinyint / 988888),
+       (-(ctinyint)),
+       (79.553 / ctinyint)
+FROM   alltypesorc
+WHERE  (((cstring1 RLIKE 'a.*')
+         AND (cstring2 LIKE '%ss%'))
+        OR ((1 != cboolean2)
+            AND ((csmallint < 79.553)
+                 AND (-257 != ctinyint)))
+        OR ((cdouble > ctinyint)
+            AND (cfloat >= cint))
+           OR ((cint < cbigint)
+               AND (ctinyint > cbigint)));
+SELECT cint,
+       cdouble,
+       ctimestamp2,
+       cstring1,
+       cboolean2,
+       ctinyint,
+       cfloat,
+       ctimestamp1,
+       csmallint,
+       cbigint,
+       (-3728 * cbigint),
+       (-(cint)),
+       (-863.257 - cint),
+       (-(csmallint)),
+       (csmallint - (-(csmallint))),
+       ((csmallint - (-(csmallint))) + (-(csmallint))),
+       (cint / cint),
+       ((-863.257 - cint) - -26.28),
+       (-(cfloat)),
+       (cdouble * -89010),
+       (ctinyint / 988888),
+       (-(ctinyint)),
+       (79.553 / ctinyint)
+FROM   alltypesorc
+WHERE  (((cstring1 RLIKE 'a.*')
+         AND (cstring2 LIKE '%ss%'))
+        OR ((1 != cboolean2)
+            AND ((csmallint < 79.553)
+                 AND (-257 != ctinyint)))
+        OR ((cdouble > ctinyint)
+            AND (cfloat >= cint))
+           OR ((cint < cbigint)
+               AND (ctinyint > cbigint)));
+
+-- TargetTypeClasses: Long, String, Double, Bool, Timestamp
+-- Functions: VarP, Var, StDev, StDevP, Max, Sum
+-- ArithmeticOps: Divide, Remainder, Subtract, Multiply
+-- FilterOps: Equal, LessThanOrEqual, LessThan, Like, GreaterThanOrEqual, NotEqual, GreaterThan
+-- GroupBy: NoGroupByProjectColumns
+EXPLAIN SELECT cint,
+       cbigint,
+       cstring1,
+       cboolean1,
+       cfloat,
+       cdouble,
+       ctimestamp2,
+       csmallint,
+       cstring2,
+       cboolean2,
+       (cint / cbigint),
+       (cbigint % 79.553),
+       (-((cint / cbigint))),
+       (10.175 % cfloat),
+       (-(cfloat)),
+       (cfloat - (-(cfloat))),
+       ((cfloat - (-(cfloat))) % -6432),
+       (cdouble * csmallint),
+       (-(cdouble)),
+       (-(cbigint)),
+       (cfloat - (cint / cbigint)),
+       (-(csmallint)),
+       (3569 % cbigint),
+       (359 - cdouble),
+       (-(csmallint))
+FROM   alltypesorc
+WHERE  (((197 > ctinyint)
+         AND (cint = cbigint))
+        OR (cbigint = 359)
+        OR (cboolean1 < 0)
+           OR ((cstring1 LIKE '%ss')
+               AND (cfloat <= ctinyint)));
+
+SELECT cint,
+       cbigint,
+       cstring1,
+       cboolean1,
+       cfloat,
+       cdouble,
+       ctimestamp2,
+       csmallint,
+       cstring2,
+       cboolean2,
+       (cint / cbigint),
+       (cbigint % 79.553),
+       (-((cint / cbigint))),
+       (10.175 % cfloat),
+       (-(cfloat)),
+       (cfloat - (-(cfloat))),
+       ((cfloat - (-(cfloat))) % -6432),
+       (cdouble * csmallint),
+       (-(cdouble)),
+       (-(cbigint)),
+       (cfloat - (cint / cbigint)),
+       (-(csmallint)),
+       (3569 % cbigint),
+       (359 - cdouble),
+       (-(csmallint))
+FROM   alltypesorc
+WHERE  (((197 > ctinyint)
+         AND (cint = cbigint))
+        OR (cbigint = 359)
+        OR (cboolean1 < 0)
+           OR ((cstring1 LIKE '%ss')
+               AND (cfloat <= ctinyint)));
+
+-- TargetTypeClasses: String, Bool, Double, Long, Timestamp
+-- Functions: Sum, Max, Avg, Var, StDevP, VarP
+-- ArithmeticOps: Add, Subtract, Divide, Multiply, Remainder
+-- FilterOps: NotEqual, GreaterThanOrEqual, Like, LessThanOrEqual, Equal, GreaterThan
+-- GroupBy: NoGroupByProjectColumns
+EXPLAIN SELECT   cint,
+         cstring1,
+         cboolean2,
+         ctimestamp2,
+         cdouble,
+         cfloat,
+         cbigint,
+         csmallint,
+         cboolean1,
+         (cint + csmallint),
+         (cbigint - ctinyint),
+         (-(cbigint)),
+         (-(cfloat)),
+         ((cbigint - ctinyint) + cbigint),
+         (cdouble / cdouble),
+         (-(cdouble)),
+         ((cint + csmallint) * (-(cbigint))),
+         ((-(cdouble)) + cbigint),
+         (-1.389 / ctinyint),
+         (cbigint % cdouble),
+         (-(csmallint)),
+         (csmallint + (cint + csmallint))
+FROM     alltypesorc
+WHERE    (((csmallint > -26.28)
+           AND (cstring2 LIKE 'ss'))
+          OR ((cdouble <= cbigint)
+              AND ((cstring1 >= 'ss')
+                   AND (cint != cdouble)))
+          OR (ctinyint = -89010)
+             OR ((cbigint <= cfloat)
+                 AND (-26.28 <= csmallint)))
+ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble;
+SELECT   cint,
+         cstring1,
+         cboolean2,
+         ctimestamp2,
+         cdouble,
+         cfloat,
+         cbigint,
+         csmallint,
+         cboolean1,
+         (cint + csmallint),
+         (cbigint - ctinyint),
+         (-(cbigint)),
+         (-(cfloat)),
+         ((cbigint - ctinyint) + cbigint),
+         (cdouble / cdouble),
+         (-(cdouble)),
+         ((cint + csmallint) * (-(cbigint))),
+         ((-(cdouble)) + cbigint),
+         (-1.389 / ctinyint),
+         (cbigint % cdouble),
+         (-(csmallint)),
+         (csmallint + (cint + csmallint))
+FROM     alltypesorc
+WHERE    (((csmallint > -26.28)
+           AND (cstring2 LIKE 'ss'))
+          OR ((cdouble <= cbigint)
+              AND ((cstring1 >= 'ss')
+                   AND (cint != cdouble)))
+          OR (ctinyint = -89010)
+             OR ((cbigint <= cfloat)
+                 AND (-26.28 <= csmallint)))
+ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble;
+
+-- TargetTypeClasses: Long, String, Double, Timestamp
+-- Functions: Avg, Min, StDevP, Sum, Var
+-- ArithmeticOps: Divide, Subtract, Multiply, Remainder
+-- FilterOps: GreaterThan, LessThan, LessThanOrEqual, GreaterThanOrEqual, Like
+-- GroupBy: NoGroupByProjectColumns
+EXPLAIN SELECT   ctimestamp1,
+         cstring2,
+         cdouble,
+         cfloat,
+         cbigint,
+         csmallint,
+         (cbigint / 3569),
+         (-257 - csmallint),
+         (-6432 * cfloat),
+         (-(cdouble)),
+         (cdouble * 10.175),
+         ((-6432 * cfloat) / cfloat),
+         (-(cfloat)),
+         (cint % csmallint),
+         (-(cdouble)),
+         (cdouble * (-(cdouble)))
+FROM     alltypesorc
+WHERE    (((-1.389 >= cint)
+           AND ((csmallint < ctinyint)
+                AND (-6432 > csmallint)))
+          OR ((cdouble >= cfloat)
+              AND (cstring2 <= 'a'))
+             OR ((cstring1 LIKE 'ss%')
+                 AND (10.175 > cbigint)))
+ORDER BY csmallint, cstring2, cdouble;
+SELECT   ctimestamp1,
+         cstring2,
+         cdouble,
+         cfloat,
+         cbigint,
+         csmallint,
+         (cbigint / 3569),
+         (-257 - csmallint),
+         (-6432 * cfloat),
+         (-(cdouble)),
+         (cdouble * 10.175),
+         ((-6432 * cfloat) / cfloat),
+         (-(cfloat)),
+         (cint % csmallint),
+         (-(cdouble)),
+         (cdouble * (-(cdouble)))
+FROM     alltypesorc
+WHERE    (((-1.389 >= cint)
+           AND ((csmallint < ctinyint)
+                AND (-6432 > csmallint)))
+          OR ((cdouble >= cfloat)
+              AND (cstring2 <= 'a'))
+             OR ((cstring1 LIKE 'ss%')
+                 AND (10.175 > cbigint)))
+ORDER BY csmallint, cstring2, cdouble;
+
+-- TargetTypeClasses: Double, String, Long
+-- Functions: StDev, Sum, VarP, Count
+-- ArithmeticOps: Remainder, Divide, Subtract
+-- FilterOps: GreaterThanOrEqual, Equal, LessThanOrEqual
+-- GroupBy: GroupBy
+EXPLAIN SELECT   csmallint,
+         (csmallint % -75),
+         STDDEV_SAMP(csmallint),
+         (-1.389 / csmallint),
+         SUM(cbigint),
+         ((csmallint % -75) / SUM(cbigint)),
+         (-((csmallint % -75))),
+         VAR_POP(ctinyint),
+         (-((-((csmallint % -75))))),
+         COUNT(*),
+         (COUNT(*) - -89010)
+FROM     alltypesorc
+WHERE    (((csmallint >= -257))
+          AND ((-6432 = csmallint)
+               OR ((cint >= cdouble)
+                   AND (ctinyint <= cint))))
+GROUP BY csmallint
+ORDER BY csmallint;
+SELECT   csmallint,
+         (csmallint % -75),
+         STDDEV_SAMP(csmallint),
+         (-1.389 / csmallint),
+         SUM(cbigint),
+         ((csmallint % -75) / SUM(cbigint)),
+         (-((csmallint % -75))),
+         VAR_POP(ctinyint),
+         (-((-((csmallint % -75))))),
+         COUNT(*),
+         (COUNT(*) - -89010)
+FROM     alltypesorc
+WHERE    (((csmallint >= -257))
+          AND ((-6432 = csmallint)
+               OR ((cint >= cdouble)
+                   AND (ctinyint <= cint))))
+GROUP BY csmallint
+ORDER BY csmallint;
+
+-- TargetTypeClasses: Long, Double, Timestamp
+-- Functions: Var, Count, Sum, VarP, StDevP
+-- ArithmeticOps: Multiply, Add, Subtract, Remainder
+-- FilterOps: GreaterThan, LessThan, Equal, LessThanOrEqual, GreaterThanOrEqual
+-- GroupBy: GroupBy
+EXPLAIN SELECT   cdouble,
+         VAR_SAMP(cdouble),
+         (2563.58 * VAR_SAMP(cdouble)),
+         (-(VAR_SAMP(cdouble))),
+         COUNT(cfloat),
+         ((2563.58 * VAR_SAMP(cdouble)) + -5638.15),
+         ((-(VAR_SAMP(cdouble))) * ((2563.58 * VAR_SAMP(cdouble)) + -5638.15)),
+         SUM(cfloat),
+         VAR_POP(cdouble),
+         (cdouble - (-(VAR_SAMP(cdouble)))),
+         STDDEV_POP(cdouble),
+         (cdouble + VAR_SAMP(cdouble)),
+         (cdouble * 762),
+         SUM(cdouble),
+         (-863.257 % (cdouble * 762)),
+         SUM(cdouble)
+FROM     alltypesorc
+WHERE    (((cdouble > 2563.58))
+          AND (((cbigint >= cint)
+                AND ((csmallint < cint)
+                     AND (cfloat < -5638.15)))
+               OR (2563.58 = ctinyint)
+                  OR ((cdouble <= cbigint)
+                      AND (-5638.15 > cbigint))))
+GROUP BY cdouble
+ORDER BY cdouble;
+SELECT   cdouble,
+         VAR_SAMP(cdouble),
+         (2563.58 * VAR_SAMP(cdouble)),
+         (-(VAR_SAMP(cdouble))),
+         COUNT(cfloat),
+         ((2563.58 * VAR_SAMP(cdouble)) + -5638.15),
+         ((-(VAR_SAMP(cdouble))) * ((2563.58 * VAR_SAMP(cdouble)) + -5638.15)),
+         SUM(cfloat),
+         VAR_POP(cdouble),
+         (cdouble - (-(VAR_SAMP(cdouble)))),
+         STDDEV_POP(cdouble),
+         (cdouble + VAR_SAMP(cdouble)),
+         (cdouble * 762),
+         SUM(cdouble),
+         (-863.257 % (cdouble * 762)),
+         SUM(cdouble)
+FROM     alltypesorc
+WHERE    (((cdouble > 2563.58))
+          AND (((cbigint >= cint)
+                AND ((csmallint < cint)
+                     AND (cfloat < -5638.15)))
+               OR (2563.58 = ctinyint)
+                  OR ((cdouble <= cbigint)
+                      AND (-5638.15 > cbigint))))
+GROUP BY cdouble
+ORDER BY cdouble;
+
+-- TargetTypeClasses: Bool, Timestamp, String, Double, Long
+-- Functions: StDevP, Avg, Count, Min, Var, VarP, Sum
+-- ArithmeticOps: Multiply, Subtract, Add, Divide, Remainder
+-- FilterOps: NotEqual, LessThan, Like, Equal, RLike
+-- GroupBy: GroupBy
+EXPLAIN SELECT   ctimestamp1,
+         cstring1,
+         STDDEV_POP(cint),
+         (STDDEV_POP(cint) * 10.175),
+         (-(STDDEV_POP(cint))),
+         AVG(csmallint),
+         (-(STDDEV_POP(cint))),
+         (-26.28 - STDDEV_POP(cint)),
+         COUNT(*),
+         (-(COUNT(*))),
+         ((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))),
+         MIN(ctinyint),
+         (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*)))),
+         (-((STDDEV_POP(cint) * 10.175))),
+         VAR_SAMP(csmallint),
+         (VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))),
+         (-((-(STDDEV_POP(cint))))),
+         ((-(COUNT(*))) / STDDEV_POP(cint)),
+         VAR_POP(cfloat),
+         (10.175 / AVG(csmallint)),
+         AVG(cint),
+         VAR_SAMP(cfloat),
+         ((VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) - (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))),
+         (-((-((STDDEV_POP(cint) * 10.175))))),
+         AVG(cfloat),
+         (((VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) - (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) * 10.175),
+         (10.175 % (10.175 / AVG(csmallint))),
+         (-(MIN(ctinyint))),
+         MIN(cdouble),
+         VAR_POP(csmallint),
+         (-(((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))))),
+         ((-(STDDEV_POP(cint))) % AVG(cfloat)),
+         (-26.28 / (-(MIN(ctinyint)))),
+         STDDEV_POP(ctinyint),
+         SUM(cint),
+         ((VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) / VAR_POP(cfloat)),
+         (-((-(COUNT(*))))),
+         COUNT(*),
+         ((VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) % -26.28)
+FROM     alltypesorc
+WHERE    (((ctimestamp1 != 0))
+          AND ((((-257 != ctinyint)
+                 AND (cboolean2 IS NOT NULL))
+                AND ((cstring1 RLIKE '.*ss')
+                     AND (-10669 < ctimestamp1)))
+               OR (ctimestamp2 = -10669)
+               OR ((ctimestamp1 < 0)
+                   AND (cstring2 LIKE '%b%'))
+                  OR (cdouble = cint)
+                     OR ((cboolean1 IS NULL)
+                         AND (cfloat < cint))))
+GROUP BY ctimestamp1, cstring1;
+SELECT   ctimestamp1,
+         cstring1,
+         STDDEV_POP(cint),
+         (STDDEV_POP(cint) * 10.175),
+         (-(STDDEV_POP(cint))),
+         AVG(csmallint),
+         (-(STDDEV_POP(cint))),
+         (-26.28 - STDDEV_POP(cint)),
+         COUNT(*),
+         (-(COUNT(*))),
+         ((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))),
+         MIN(ctinyint),
+         (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*)))),
+         (-((STDDEV_POP(cint) * 10.175))),
+         VAR_SAMP(csmallint),
+         (VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))),
+         (-((-(STDDEV_POP(cint))))),
+         ((-(COUNT(*))) / STDDEV_POP(cint)),
+         VAR_POP(cfloat),
+         (10.175 / AVG(csmallint)),
+         AVG(cint),
+         VAR_SAMP(cfloat),
+         ((VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) - (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))),
+         (-((-((STDDEV_POP(cint) * 10.175))))),
+         AVG(cfloat),
+         (((VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) - (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) * 10.175),
+         (10.175 % (10.175 / AVG(csmallint))),
+         (-(MIN(ctinyint))),
+         MIN(cdouble),
+         VAR_POP(csmallint),
+         (-(((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))))),
+         ((-(STDDEV_POP(cint))) % AVG(cfloat)),
+         (-26.28 / (-(MIN(ctinyint)))),
+         STDDEV_POP(ctinyint),
+         SUM(cint),
+         ((VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) / VAR_POP(cfloat)),
+         (-((-(COUNT(*))))),
+         COUNT(*),
+         ((VAR_SAMP(csmallint) + (((-26.28 - STDDEV_POP(cint)) * (-(STDDEV_POP(cint)))) * (-(COUNT(*))))) % -26.28)
+FROM     alltypesorc
+WHERE    (((ctimestamp1 != 0))
+          AND ((((-257 != ctinyint)
+                 AND (cboolean2 IS NOT NULL))
+                AND ((cstring1 RLIKE '.*ss')
+                     AND (-10669 < ctimestamp1)))
+               OR (ctimestamp2 = -10669)
+               OR ((ctimestamp1 < 0)
+                   AND (cstring2 LIKE '%b%'))
+                  OR (cdouble = cint)
+                     OR ((cboolean1 IS NULL)
+                         AND (cfloat < cint))))
+GROUP BY ctimestamp1, cstring1;
+
+-- TargetTypeClasses: Double, Long, String, Timestamp, Bool
+-- Functions: Max, Sum, Var, Avg, Min, VarP, StDev, StDevP
+-- ArithmeticOps: Divide, Subtract, Remainder, Add, Multiply
+-- FilterOps: GreaterThan, LessThanOrEqual, Equal, LessThan, GreaterThanOrEqual, NotEqual, Like, RLike
+-- GroupBy: GroupBy
+EXPLAIN SELECT   cboolean1,
+         MAX(cfloat),
+         (-(MAX(cfloat))),
+         (-26.28 / MAX(cfloat)),
+         SUM(cbigint),
+         (SUM(cbigint) - 10.175),
+         VAR_SAMP(cint),
+         (VAR_SAMP(cint) % MAX(cfloat)),
+         (10.175 + (-(MAX(cfloat)))),
+         AVG(cdouble),
+         ((SUM(cbigint) - 10.175) + VAR_SAMP(cint)),
+         MIN(cbigint),
+         VAR_POP(cbigint),
+         (-((10.175 + (-(MAX(cfloat)))))),
+         (79.553 / VAR_POP(cbigint)),
+         (VAR_SAMP(cint) % (79.553 / VAR_POP(cbigint))),
+         (-((10.175 + (-(MAX(cfloat)))))),
+         SUM(cint),
+         STDDEV_SAMP(ctinyint),
+         (-1.389 * MIN(cbigint)),
+         (SUM(cint) - (-1.389 * MIN(cbigint))),
+         STDDEV_POP(csmallint),
+         (-((SUM(cint) - (-1.389 * MIN(cbigint))))),
+         AVG(cint),
+         (-(AVG(cint))),
+         (AVG(cint) * SUM(cint))
+FROM     alltypesorc
+WHERE    (((cboolean1 IS NOT NULL))
+          AND (((cdouble < csmallint)
+                AND ((cboolean2 = cboolean1)
+                     AND (cbigint <= -863.257)))
+               OR ((cint >= -257)
+                   AND ((cstring1 IS NOT NULL)
+                        AND (cboolean1 >= 1)))
+               OR (cstring2 RLIKE 'b')
+                  OR ((csmallint >= ctinyint)
+                      AND (ctimestamp2 IS NULL))))
+GROUP BY cboolean1
+ORDER BY cboolean1;
+SELECT   cboolean1,
+         MAX(cfloat),
+         (-(MAX(cfloat))),
+         (-26.28 / MAX(cfloat)),
+         SUM(cbigint),
+         (SUM(cbigint) - 10.175),
+         VAR_SAMP(cint),
+         (VAR_SAMP(cint) % MAX(cfloat)),
+         (10.175 + (-(MAX(cfloat)))),
+         AVG(cdouble),
+         ((SUM(cbigint) - 10.175) + VAR_SAMP(cint)),
+         MIN(cbigint),
+         VAR_POP(cbigint),
+         (-((10.175 + (-(MAX(cfloat)))))),
+         (79.553 / VAR_POP(cbigint)),
+         (VAR_SAMP(cint) % (79.553 / VAR_POP(cbigint))),
+         (-((10.175 + (-(MAX(cfloat)))))),
+         SUM(cint),
+         STDDEV_SAMP(ctinyint),
+         (-1.389 * MIN(cbigint)),
+         (SUM(cint) - (-1.389 * MIN(cbigint))),
+         STDDEV_POP(csmallint),
+         (-((SUM(cint) - (-1.389 * MIN(cbigint))))),
+         AVG(cint),
+         (-(AVG(cint))),
+         (AVG(cint) * SUM(cint))
+FROM     alltypesorc
+WHERE    (((cboolean1 IS NOT NULL))
+          AND (((cdouble < csmallint)
+                AND ((cboolean2 = cboolean1)
+                     AND (cbigint <= -863.257)))
+               OR ((cint >= -257)
+                   AND ((cstring1 IS NOT NULL)
+                        AND (cboolean1 >= 1)))
+               OR (cstring2 RLIKE 'b')
+                  OR ((csmallint >= ctinyint)
+                      AND (ctimestamp2 IS NULL))))
+GROUP BY cboolean1
+ORDER BY cboolean1;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q
new file mode 100644
index 0000000000000..e309713795af8
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q
@@ -0,0 +1,46 @@
+create table vsmb_bucket_1(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS ORC;
+create table vsmb_bucket_2(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS ORC;
+
+create table vsmb_bucket_RC(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS RCFILE;
+
+create table vsmb_bucket_TXT(key int, value string) 
+  CLUSTERED BY (key) 
+  SORTED BY (key) INTO 1 BUCKETS 
+  STORED AS TEXTFILE;
+  
+insert into table vsmb_bucket_1 select cint, cstring1 from alltypesorc limit 2;
+insert into table vsmb_bucket_2 select cint, cstring1 from alltypesorc limit 2;
+insert into table vsmb_bucket_RC select cint, cstring1 from alltypesorc limit 2;
+insert into table vsmb_bucket_TXT select cint, cstring1 from alltypesorc limit 2;  
+
+set hive.vectorized.execution.enabled=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.auto.convert.sortmerge.join.noconditionaltask = true;
+set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+
+explain
+select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key;
+select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key;
+
+explain
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key;
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key;
+
+-- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box
+-- explain
+-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
+-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
+
+explain
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key;
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_case.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_case.q
new file mode 100644
index 0000000000000..e448d51f6bc30
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_case.q
@@ -0,0 +1,37 @@
+set hive.vectorized.execution.enabled = true
+;
+explain
+select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else "c"
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then "b"
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+;
+select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else "c"
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then "b"
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_casts.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_casts.q
new file mode 100644
index 0000000000000..3f818b18534af
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_casts.q
@@ -0,0 +1,149 @@
+SET hive.vectorized.execution.enabled = true;
+
+-- Test type casting in vectorized mode to verify end-to-end functionality.
+
+explain 
+select 
+-- to boolean
+   cast (ctinyint as boolean)
+  ,cast (csmallint as boolean)
+  ,cast (cint as boolean)
+  ,cast (cbigint as boolean)
+  ,cast (cfloat as boolean)
+  ,cast (cdouble as boolean)
+  ,cast (cboolean1 as boolean)
+  ,cast (cbigint * 0 as boolean)
+  ,cast (ctimestamp1 as boolean)
+  ,cast (cstring1 as boolean)
+-- to int family
+  ,cast (ctinyint as int)
+  ,cast (csmallint as int)
+  ,cast (cint as int)
+  ,cast (cbigint as int)
+  ,cast (cfloat as int)
+  ,cast (cdouble as int)
+  ,cast (cboolean1 as int)
+  ,cast (ctimestamp1 as int)
+  ,cast (cstring1 as int)
+  ,cast (substr(cstring1, 1, 1) as int)
+  ,cast (cfloat as tinyint)
+  ,cast (cfloat as smallint)
+  ,cast (cfloat as bigint)
+-- to float family
+  ,cast (ctinyint as double)
+  ,cast (csmallint as double)
+  ,cast (cint as double)
+  ,cast (cbigint as double)
+  ,cast (cfloat as double)
+  ,cast (cdouble as double)
+  ,cast (cboolean1 as double)
+  ,cast (ctimestamp1 as double)
+  ,cast (cstring1 as double)
+  ,cast (substr(cstring1, 1, 1) as double)
+  ,cast (cint as float)
+  ,cast (cdouble as float)
+-- to timestamp
+  ,cast (ctinyint as timestamp)
+  ,cast (csmallint as timestamp)
+  ,cast (cint as timestamp)
+  ,cast (cbigint as timestamp)
+  ,cast (cfloat as timestamp)
+  ,cast (cdouble as timestamp)
+  ,cast (cboolean1 as timestamp)
+  ,cast (cbigint * 0 as timestamp)
+  ,cast (ctimestamp1 as timestamp)
+  ,cast (cstring1 as timestamp)
+  ,cast (substr(cstring1, 1, 1) as timestamp)
+-- to string
+  ,cast (ctinyint as string)
+  ,cast (csmallint as string)
+  ,cast (cint as string)
+  ,cast (cbigint as string)
+  ,cast (cfloat as string)
+  ,cast (cdouble as string)
+  ,cast (cboolean1 as string)
+  ,cast (cbigint * 0 as string)
+  ,cast (ctimestamp1 as string)
+  ,cast (cstring1 as string)
+-- nested and expression arguments
+  ,cast (cast (cfloat as int) as float)
+  ,cast (cint * 2 as double)
+  ,cast (sin(cfloat) as string)
+  ,cast (cint as float) + cast(cboolean1 as double)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0;
+
+
+select 
+-- to boolean
+   cast (ctinyint as boolean)
+  ,cast (csmallint as boolean)
+  ,cast (cint as boolean)
+  ,cast (cbigint as boolean)
+  ,cast (cfloat as boolean)
+  ,cast (cdouble as boolean)
+  ,cast (cboolean1 as boolean)
+  ,cast (cbigint * 0 as boolean)
+  ,cast (ctimestamp1 as boolean)
+  ,cast (cstring1 as boolean)
+-- to int family
+  ,cast (ctinyint as int)
+  ,cast (csmallint as int)
+  ,cast (cint as int)
+  ,cast (cbigint as int)
+  ,cast (cfloat as int)
+  ,cast (cdouble as int)
+  ,cast (cboolean1 as int)
+  ,cast (ctimestamp1 as int)
+  ,cast (cstring1 as int)
+  ,cast (substr(cstring1, 1, 1) as int)
+  ,cast (cfloat as tinyint)
+  ,cast (cfloat as smallint)
+  ,cast (cfloat as bigint)
+-- to float family
+  ,cast (ctinyint as double)
+  ,cast (csmallint as double)
+  ,cast (cint as double)
+  ,cast (cbigint as double)
+  ,cast (cfloat as double)
+  ,cast (cdouble as double)
+  ,cast (cboolean1 as double)
+  ,cast (ctimestamp1 as double)
+  ,cast (cstring1 as double)
+  ,cast (substr(cstring1, 1, 1) as double)
+  ,cast (cint as float)
+  ,cast (cdouble as float)
+-- to timestamp
+  ,cast (ctinyint as timestamp)
+  ,cast (csmallint as timestamp)
+  ,cast (cint as timestamp)
+  ,cast (cbigint as timestamp)
+  ,cast (cfloat as timestamp)
+  ,cast (cdouble as timestamp)
+  ,cast (cboolean1 as timestamp)
+  ,cast (cbigint * 0 as timestamp)
+  ,cast (ctimestamp1 as timestamp)
+  ,cast (cstring1 as timestamp)
+  ,cast (substr(cstring1, 1, 1) as timestamp)
+-- to string
+  ,cast (ctinyint as string)
+  ,cast (csmallint as string)
+  ,cast (cint as string)
+  ,cast (cbigint as string)
+  ,cast (cfloat as string)
+  ,cast (cdouble as string)
+  ,cast (cboolean1 as string)
+  ,cast (cbigint * 0 as string)
+  ,cast (ctimestamp1 as string)
+  ,cast (cstring1 as string)
+-- nested and expression arguments
+  ,cast (cast (cfloat as int) as float)
+  ,cast (cint * 2 as double)
+  ,cast (sin(cfloat) as string)
+  ,cast (cint as float) + cast(cboolean1 as double)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0;
+
+ 
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_context.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_context.q
new file mode 100644
index 0000000000000..381e4255ca51c
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_context.q
@@ -0,0 +1,47 @@
+create table store(s_store_sk int, s_city string)
+stored as orc;
+insert overwrite table store
+select cint, cstring1
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679);
+create table store_sales(ss_store_sk int, ss_hdemo_sk int, ss_net_profit double)
+stored as orc;
+insert overwrite table store_sales
+select cint, cint, cdouble
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679);
+create table household_demographics(hd_demo_sk int)
+stored as orc;
+insert overwrite table household_demographics
+select cint
+from alltypesorc
+where cint not in (
+-3728, -563, 762, 6981, 253665376, 528534767, 626923679);
+set hive.auto.convert.join=true;
+set hive.vectorized.execution.enabled=true;
+
+
+explain 
+select store.s_city, ss_net_profit
+from store_sales
+JOIN store ON store_sales.ss_store_sk = store.s_store_sk
+JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+limit 100
+;
+
+select store.s_city, ss_net_profit
+from store_sales
+JOIN store ON store_sales.ss_store_sk = store.s_store_sk
+JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+limit 100
+;
+
+set hive.auto.convert.join=false;
+set hive.vectorized.execution.enabled=false;
+
+drop table store;
+drop table store_sales;
+drop table household_demographics;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_date_funcs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_date_funcs.q
new file mode 100644
index 0000000000000..b7aa3c28ac059
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_date_funcs.q
@@ -0,0 +1,122 @@
+SET hive.vectorized.execution.enabled = true;
+
+-- Test timestamp functions in vectorized mode to verify they run correctly end-to-end.
+
+CREATE TABLE date_udf_flight (
+  origin_city_name STRING,
+  dest_city_name STRING,
+  fl_date DATE,
+  arr_delay FLOAT,
+  fl_num INT
+);
+LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE date_udf_flight;
+
+CREATE TABLE date_udf_flight_orc (
+  fl_date DATE,
+  fl_time TIMESTAMP
+) STORED AS ORC;
+
+INSERT INTO TABLE date_udf_flight_orc SELECT fl_date, to_utc_timestamp(fl_date, 'America/Los_Angeles') FROM date_udf_flight;
+
+SELECT * FROM date_udf_flight_orc;
+
+EXPLAIN SELECT
+  to_unix_timestamp(fl_time),
+  year(fl_time),
+  month(fl_time),
+  day(fl_time),
+  dayofmonth(fl_time),
+  weekofyear(fl_time),
+  date(fl_time),
+  to_date(fl_time),
+  date_add(fl_time, 2),
+  date_sub(fl_time, 2),
+  datediff(fl_time, "2000-01-01")
+FROM date_udf_flight_orc;
+
+SELECT
+  to_unix_timestamp(fl_time),
+  year(fl_time),
+  month(fl_time),
+  day(fl_time),
+  dayofmonth(fl_time),
+  weekofyear(fl_time),
+  date(fl_time),
+  to_date(fl_time),
+  date_add(fl_time, 2),
+  date_sub(fl_time, 2),
+  datediff(fl_time, "2000-01-01")
+FROM date_udf_flight_orc;
+
+EXPLAIN SELECT
+  to_unix_timestamp(fl_date),
+  year(fl_date),
+  month(fl_date),
+  day(fl_date),
+  dayofmonth(fl_date),
+  weekofyear(fl_date),
+  date(fl_date),
+  to_date(fl_date),
+  date_add(fl_date, 2),
+  date_sub(fl_date, 2),
+  datediff(fl_date, "2000-01-01")
+FROM date_udf_flight_orc;
+
+SELECT
+  to_unix_timestamp(fl_date),
+  year(fl_date),
+  month(fl_date),
+  day(fl_date),
+  dayofmonth(fl_date),
+  weekofyear(fl_date),
+  date(fl_date),
+  to_date(fl_date),
+  date_add(fl_date, 2),
+  date_sub(fl_date, 2),
+  datediff(fl_date, "2000-01-01")
+FROM date_udf_flight_orc;
+
+EXPLAIN SELECT
+  year(fl_time) = year(fl_date),
+  month(fl_time) = month(fl_date),
+  day(fl_time) = day(fl_date),
+  dayofmonth(fl_time) = dayofmonth(fl_date),
+  weekofyear(fl_time) = weekofyear(fl_date),
+  date(fl_time) = date(fl_date),
+  to_date(fl_time) = to_date(fl_date),
+  date_add(fl_time, 2) = date_add(fl_date, 2),
+  date_sub(fl_time, 2) = date_sub(fl_date, 2),
+  datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01")
+FROM date_udf_flight_orc;
+
+-- Should all be true or NULL
+SELECT
+  year(fl_time) = year(fl_date),
+  month(fl_time) = month(fl_date),
+  day(fl_time) = day(fl_date),
+  dayofmonth(fl_time) = dayofmonth(fl_date),
+  weekofyear(fl_time) = weekofyear(fl_date),
+  date(fl_time) = date(fl_date),
+  to_date(fl_time) = to_date(fl_date),
+  date_add(fl_time, 2) = date_add(fl_date, 2),
+  date_sub(fl_time, 2) = date_sub(fl_date, 2),
+  datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01")
+FROM date_udf_flight_orc;
+
+EXPLAIN SELECT 
+  fl_date, 
+  to_date(date_add(fl_date, 2)), 
+  to_date(date_sub(fl_date, 2)),
+  datediff(fl_date, date_add(fl_date, 2)), 
+  datediff(fl_date, date_sub(fl_date, 2)),
+  datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) 
+FROM date_udf_flight_orc LIMIT 10;
+
+SELECT 
+  fl_date, 
+  to_date(date_add(fl_date, 2)), 
+  to_date(date_sub(fl_date, 2)),
+  datediff(fl_date, date_add(fl_date, 2)), 
+  datediff(fl_date, date_sub(fl_date, 2)),
+  datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) 
+FROM date_udf_flight_orc LIMIT 10;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_distinct_gby.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_distinct_gby.q
new file mode 100644
index 0000000000000..6e622007e5dcb
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_distinct_gby.q
@@ -0,0 +1,12 @@
+SET hive.vectorized.execution.enabled=true;
+
+SET hive.map.groupby.sorted=true;
+
+create table dtest(a int, b int) clustered by (a) sorted by (a) into 1 buckets stored as orc;
+insert into table dtest select c,b from (select array(300,300,300,300,300) as a, 1 as b from src limit 1) y lateral view  explode(a) t1 as c;
+
+explain select sum(distinct a), count(distinct a) from dtest;
+select sum(distinct a), count(distinct a) from dtest;
+
+explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc;
+select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_mapjoin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_mapjoin.q
new file mode 100644
index 0000000000000..f390c2caafeb2
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_mapjoin.q
@@ -0,0 +1,12 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.nonconditionaltask=true;
+SET hive.auto.convert.join.nonconditionaltask.size=1000000000;
+
+EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+  FROM alltypesorc t1
+  JOIN alltypesorc t2 ON t1.cint = t2.cint;
+
+SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+  FROM alltypesorc t1
+  JOIN alltypesorc t2 ON t1.cint = t2.cint;  
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_math_funcs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_math_funcs.q
new file mode 100644
index 0000000000000..d6b082467938f
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_math_funcs.q
@@ -0,0 +1,107 @@
+SET hive.vectorized.execution.enabled = true;
+
+-- Test math functions in vectorized mode to verify they run correctly end-to-end.
+
+explain 
+select
+   cdouble
+  ,Round(cdouble, 2)
+  ,Floor(cdouble)
+  ,Ceil(cdouble)
+  ,Rand()
+  ,Rand(98007)
+  ,Exp(ln(cdouble))
+  ,Ln(cdouble)  
+  ,Ln(cfloat)
+  ,Log10(cdouble)
+  -- Use log2 as a representative function to test all input types.
+  ,Log2(cdouble)
+  -- Use 15601.0 to test zero handling, as there are no zeroes in the table
+  ,Log2(cdouble - 15601.0)
+  ,Log2(cfloat)
+  ,Log2(cbigint)
+  ,Log2(cint)
+  ,Log2(csmallint)
+  ,Log2(ctinyint)
+  ,Log(2.0, cdouble)
+  ,Pow(log2(cdouble), 2.0)  
+  ,Power(log2(cdouble), 2.0)
+  ,Sqrt(cdouble)
+  ,Sqrt(cbigint)
+  ,Bin(cbigint)
+  ,Hex(cdouble)
+  ,Conv(cbigint, 10, 16)
+  ,Abs(cdouble)
+  ,Abs(ctinyint)
+  ,Pmod(cint, 3)
+  ,Sin(cdouble)
+  ,Asin(cdouble)
+  ,Cos(cdouble)
+  ,ACos(cdouble)
+  ,Atan(cdouble)
+  ,Degrees(cdouble)
+  ,Radians(cdouble)
+  ,Positive(cdouble)
+  ,Positive(cbigint)
+  ,Negative(cdouble)
+  ,Sign(cdouble)
+  ,Sign(cbigint)
+  -- Test nesting
+  ,cos(-sin(log(cdouble)) + 3.14159)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 500 = 0
+-- test use of a math function in the WHERE clause
+and sin(cfloat) >= -1.0;
+
+select
+   cdouble
+  ,Round(cdouble, 2)
+  ,Floor(cdouble)
+  ,Ceil(cdouble)
+  -- Omit rand() from runtime test because it's nondeterministic.
+  -- ,Rand()
+  ,Rand(98007)
+  ,Exp(ln(cdouble))
+  ,Ln(cdouble)  
+  ,Ln(cfloat)
+  ,Log10(cdouble)
+  -- Use log2 as a representative function to test all input types.
+  ,Log2(cdouble)
+  -- Use 15601.0 to test zero handling, as there are no zeroes in the table
+  ,Log2(cdouble - 15601.0)
+  ,Log2(cfloat)
+  ,Log2(cbigint)
+  ,Log2(cint)
+  ,Log2(csmallint)
+  ,Log2(ctinyint)
+  ,Log(2.0, cdouble)
+  ,Pow(log2(cdouble), 2.0)  
+  ,Power(log2(cdouble), 2.0)
+  ,Sqrt(cdouble)
+  ,Sqrt(cbigint)
+  ,Bin(cbigint)
+  ,Hex(cdouble)
+  ,Conv(cbigint, 10, 16)
+  ,Abs(cdouble)
+  ,Abs(ctinyint)
+  ,Pmod(cint, 3)
+  ,Sin(cdouble)
+  ,Asin(cdouble)
+  ,Cos(cdouble)
+  ,ACos(cdouble)
+  ,Atan(cdouble)
+  ,Degrees(cdouble)
+  ,Radians(cdouble)
+  ,Positive(cdouble)
+  ,Positive(cbigint)
+  ,Negative(cdouble)
+  ,Sign(cdouble)
+  ,Sign(cbigint)
+  -- Test nesting
+  ,cos(-sin(log(cdouble)) + 3.14159)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 500 = 0
+-- test use of a math function in the WHERE clause
+and sin(cfloat) >= -1.0;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q
new file mode 100644
index 0000000000000..ce4227cf0a700
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q
@@ -0,0 +1,8 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.nonconditionaltask=true;
+SET hive.auto.convert.join.nonconditionaltask.size=1000000000;
+
+explain select sum(t1.td) from (select  v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint;
+
+select sum(t1.td) from (select  v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_rcfile_columnar.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_rcfile_columnar.q
new file mode 100644
index 0000000000000..488d2f38859b3
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_rcfile_columnar.q
@@ -0,0 +1,18 @@
+--This query must pass even when vectorized reader is not available for
+--RC files. The query must fall back to the non-vector mode and run successfully.
+
+CREATE table columnTable (key STRING, value STRING)
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
+STORED AS
+  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat'
+  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat';
+
+FROM src
+INSERT OVERWRITE TABLE columnTable SELECT src.key, src.value LIMIT 10;
+describe columnTable;
+
+SET hive.vectorized.execution.enabled=true;
+
+SELECT key, value FROM columnTable ORDER BY key;
+
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q
new file mode 100644
index 0000000000000..6b60aa08c581b
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q
@@ -0,0 +1,10 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.auto.convert.join=false;
+
+EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+  FROM alltypesorc t1
+  JOIN alltypesorc t2 ON t1.cint = t2.cint;
+
+SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+  FROM alltypesorc t1
+  JOIN alltypesorc t2 ON t1.cint = t2.cint;  
\ No newline at end of file
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_string_funcs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_string_funcs.q
new file mode 100644
index 0000000000000..96fe53da1ea35
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_string_funcs.q
@@ -0,0 +1,46 @@
+SET hive.vectorized.execution.enabled = true;
+
+-- Test string functions in vectorized mode to verify end-to-end functionality.
+
+explain 
+select 
+   substr(cstring1, 1, 2)
+  ,substr(cstring1, 2)
+  ,lower(cstring1)
+  ,upper(cstring1)
+  ,ucase(cstring1)
+  ,length(cstring1)
+  ,trim(cstring1)
+  ,ltrim(cstring1)
+  ,rtrim(cstring1)
+  ,concat(cstring1, cstring2)
+  ,concat('>', cstring1)
+  ,concat(cstring1, '<')
+  ,concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2))
+from alltypesorc
+-- Limit the number of rows of output to a reasonable amount.
+where cbigint % 237 = 0
+-- Test function use in the WHERE clause.
+and length(substr(cstring1, 1, 2)) <= 2
+and cstring1 like '%';
+ 
+select 
+   substr(cstring1, 1, 2)
+  ,substr(cstring1, 2)
+  ,lower(cstring1)
+  ,upper(cstring1)
+  ,ucase(cstring1)
+  ,length(cstring1)
+  ,trim(cstring1)
+  ,ltrim(cstring1)
+  ,rtrim(cstring1)
+  ,concat(cstring1, cstring2)
+  ,concat('>', cstring1)
+  ,concat(cstring1, '<')
+  ,concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2))
+from alltypesorc
+-- Limit the number of rows of output to a reasonable amount.
+where cbigint % 237 = 0
+-- Test function use in the WHERE clause.
+and length(substr(cstring1, 1, 2)) <= 2
+and cstring1 like '%';
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q
new file mode 100644
index 0000000000000..95eedd3b581d6
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q
@@ -0,0 +1,124 @@
+SET hive.vectorized.execution.enabled = true;
+
+-- Test timestamp functions in vectorized mode to verify they run correctly end-to-end.
+
+CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC;
+
+INSERT OVERWRITE TABLE alltypesorc_string
+SELECT
+  to_utc_timestamp(ctimestamp1, 'America/Los_Angeles'),
+  CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING)
+FROM alltypesorc
+LIMIT 40;
+
+CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC;
+
+INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1;
+INSERT INTO TABLE alltypesorc_wrong SELECT '2000:01:01 00-00-00' FROM alltypesorc LIMIT 1;
+INSERT INTO TABLE alltypesorc_wrong SELECT '0000-00-00 99:99:99' FROM alltypesorc LIMIT 1;
+
+EXPLAIN SELECT
+  to_unix_timestamp(ctimestamp1) AS c1,
+  year(ctimestamp1),
+  month(ctimestamp1),
+  day(ctimestamp1),
+  dayofmonth(ctimestamp1),
+  weekofyear(ctimestamp1),
+  hour(ctimestamp1),
+  minute(ctimestamp1),
+  second(ctimestamp1)
+FROM alltypesorc_string
+ORDER BY c1;
+
+SELECT
+  to_unix_timestamp(ctimestamp1) AS c1,
+  year(ctimestamp1),
+  month(ctimestamp1),
+  day(ctimestamp1),
+  dayofmonth(ctimestamp1),
+  weekofyear(ctimestamp1),
+  hour(ctimestamp1),
+  minute(ctimestamp1),
+  second(ctimestamp1)
+FROM alltypesorc_string
+ORDER BY c1;
+
+EXPLAIN SELECT
+  to_unix_timestamp(stimestamp1) AS c1,
+  year(stimestamp1),
+  month(stimestamp1),
+  day(stimestamp1),
+  dayofmonth(stimestamp1),
+  weekofyear(stimestamp1),
+  hour(stimestamp1),
+  minute(stimestamp1),
+  second(stimestamp1)
+FROM alltypesorc_string
+ORDER BY c1;
+
+SELECT
+  to_unix_timestamp(stimestamp1) AS c1,
+  year(stimestamp1),
+  month(stimestamp1),
+  day(stimestamp1),
+  dayofmonth(stimestamp1),
+  weekofyear(stimestamp1),
+  hour(stimestamp1),
+  minute(stimestamp1),
+  second(stimestamp1)
+FROM alltypesorc_string
+ORDER BY c1;
+
+EXPLAIN SELECT
+  to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1,
+  year(ctimestamp1) = year(stimestamp1),
+  month(ctimestamp1) = month(stimestamp1),
+  day(ctimestamp1) = day(stimestamp1),
+  dayofmonth(ctimestamp1) = dayofmonth(stimestamp1),
+  weekofyear(ctimestamp1) = weekofyear(stimestamp1),
+  hour(ctimestamp1) = hour(stimestamp1),
+  minute(ctimestamp1) = minute(stimestamp1),
+  second(ctimestamp1) = second(stimestamp1)
+FROM alltypesorc_string
+ORDER BY c1;
+
+-- Should all be true or NULL
+SELECT
+  to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1,
+  year(ctimestamp1) = year(stimestamp1),
+  month(ctimestamp1) = month(stimestamp1),
+  day(ctimestamp1) = day(stimestamp1),
+  dayofmonth(ctimestamp1) = dayofmonth(stimestamp1),
+  weekofyear(ctimestamp1) = weekofyear(stimestamp1),
+  hour(ctimestamp1) = hour(stimestamp1),
+  minute(ctimestamp1) = minute(stimestamp1),
+  second(ctimestamp1) = second(stimestamp1)
+FROM alltypesorc_string
+ORDER BY c1;
+
+-- Wrong format. Should all be NULL.
+EXPLAIN SELECT
+  to_unix_timestamp(stimestamp1) AS c1,
+  year(stimestamp1),
+  month(stimestamp1),
+  day(stimestamp1),
+  dayofmonth(stimestamp1),
+  weekofyear(stimestamp1),
+  hour(stimestamp1),
+  minute(stimestamp1),
+  second(stimestamp1)
+FROM alltypesorc_wrong
+ORDER BY c1;
+
+SELECT
+  to_unix_timestamp(stimestamp1) AS c1,
+  year(stimestamp1),
+  month(stimestamp1),
+  day(stimestamp1),
+  dayofmonth(stimestamp1),
+  weekofyear(stimestamp1),
+  hour(stimestamp1),
+  minute(stimestamp1),
+  second(stimestamp1)
+FROM alltypesorc_wrong
+ORDER BY c1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/view.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/view.q
index 4e3d0572921d6..bc193554f9182 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/view.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/view.q
@@ -4,13 +4,13 @@ USE db1;
 CREATE TABLE table1 (key STRING, value STRING)
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt'
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt'
 OVERWRITE INTO TABLE table1;
 
 CREATE TABLE table2 (key STRING, value STRING)
 STORED AS TEXTFILE;
 
-LOAD DATA LOCAL INPATH '../data/files/kv1.txt'
+LOAD DATA LOCAL INPATH '../../data/files/kv1.txt'
 OVERWRITE INTO TABLE table2;
 
 -- relative reference, no alias
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/view_cast.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/view_cast.q
index b0b078ec628b3..95517c3bcd367 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/view_cast.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/view_cast.q
@@ -1,11 +1,11 @@
 DROP TABLE IF EXISTS atab; 
 CREATE TABLE IF NOT EXISTS atab (ks_uid BIGINT, sr_uid STRING, sr_id STRING, tstamp STRING, m_id STRING, act STRING, at_sr_uid STRING, tstamp_type STRING, original_m_id STRING, original_tstamp STRING, registered_flag TINYINT, at_ks_uid BIGINT) PARTITIONED BY (dt STRING,nt STRING); 
-LOAD DATA LOCAL INPATH '../data/files/v1.txt' INTO TABLE atab PARTITION (dt='20130312', nt='tw');
-LOAD DATA LOCAL INPATH '../data/files/v1.txt' INTO TABLE atab PARTITION (dt='20130311', nt='tw');
+LOAD DATA LOCAL INPATH '../../data/files/v1.txt' INTO TABLE atab PARTITION (dt='20130312', nt='tw');
+LOAD DATA LOCAL INPATH '../../data/files/v1.txt' INTO TABLE atab PARTITION (dt='20130311', nt='tw');
 
 DROP TABLE IF EXISTS  mstab;
 CREATE TABLE  mstab(ks_uid INT, csc INT) PARTITIONED BY (dt STRING);
-LOAD DATA LOCAL INPATH '../data/files/v2.txt' INTO TABLE mstab PARTITION (dt='20130311');
+LOAD DATA LOCAL INPATH '../../data/files/v2.txt' INTO TABLE mstab PARTITION (dt='20130311');
 
 DROP VIEW IF EXISTS aa_view_tw;
 CREATE VIEW aa_view_tw AS SELECT ks_uid, sr_id, act, at_ks_uid, at_sr_uid, from_unixtime(CAST(CAST( tstamp as BIGINT)/1000 AS BIGINT),'yyyyMMdd') AS act_date, from_unixtime(CAST(CAST( original_tstamp AS BIGINT)/1000 AS BIGINT),'yyyyMMdd') AS content_creation_date FROM atab WHERE dt='20130312' AND nt='tw' AND ks_uid != at_ks_uid;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing.q
index a7297db6104c0..2f22145518c56 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing.q
@@ -13,7 +13,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 -- 1. testWindowing
 select p_mfgr, p_name, p_size,
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_adjust_rowcontainer_sz.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_adjust_rowcontainer_sz.q
index 9c7625dcd786a..67cab9f7b273a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_adjust_rowcontainer_sz.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_adjust_rowcontainer_sz.q
@@ -13,7 +13,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 set hive.join.cache.size=1;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_columnPruning.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_columnPruning.q
index 7c4ab386145d5..24f9ff73a30a1 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_columnPruning.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_columnPruning.q
@@ -13,7 +13,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 -- 1. testQueryLevelPartitionColsNotInSelect
 select p_size,
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_expressions.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_expressions.q
index 2c3339095f22b..7e27c6b1c098d 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_expressions.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_expressions.q
@@ -13,7 +13,7 @@ CREATE TABLE part(
     p_comment STRING
 );
 
-LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part;
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
 
 drop table over10k;
 
@@ -27,12 +27,12 @@ create table over10k(
            bo boolean,
            s string,
 	   ts timestamp, 
-           dec decimal,  
+           dec decimal(4,2),  
            bin binary)
        row format delimited
        fields terminated by '|';
 
-load data local inpath '../data/files/over10k' into table over10k;
+load data local inpath '../../data/files/over10k' into table over10k;
 
 select p_mfgr, p_retailprice, p_size,
 round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_multipartitioning.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_multipartitioning.q
index bb371e900975b..1c6e1aac37a1a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_multipartitioning.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_multipartitioning.q
@@ -10,12 +10,12 @@ create table over10k(
            bo boolean,
            s string,
 	   ts timestamp, 
-           dec decimal,  
+           dec decimal(4,2),  
            bin binary)
        row format delimited
        fields terminated by '|';
 
-load data local inpath '../data/files/over10k' into table over10k;
+load data local inpath '../../data/files/over10k' into table over10k;
 
 select s, rank() over (partition by s order by si), sum(b) over (partition by s order by si) from over10k limit 100;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_navfn.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_navfn.q
index 8a9d0012593e6..05da2ba7efeaf 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_navfn.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_navfn.q
@@ -9,13 +9,13 @@ create table over10k(
            d double,
            bo boolean,
            s string,
-	   ts timestamp, 
-           dec decimal,  
+           ts timestamp, 
+           dec decimal(4,2),  
            bin binary)
        row format delimited
        fields terminated by '|';
 
-load data local inpath '../data/files/over10k' into table over10k;
+load data local inpath '../../data/files/over10k' into table over10k;
 
 select s, row_number() over (partition by d order by dec) from over10k limit 100;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_ntile.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_ntile.q
index 505c259f4b7b3..73e8192ee6e4c 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_ntile.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_ntile.q
@@ -10,12 +10,12 @@ create table over10k(
            bo boolean,
            s string,
 	   ts timestamp, 
-           dec decimal,  
+           dec decimal(4,2),  
            bin binary)
        row format delimited
        fields terminated by '|';
 
-load data local inpath '../data/files/over10k' into table over10k;
+load data local inpath '../../data/files/over10k' into table over10k;
 
 select i, ntile(10) over (partition by s order by i) from over10k limit 100;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_rank.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_rank.q
index bf76867813e4e..4b951179e09e6 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_rank.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_rank.q
@@ -10,12 +10,12 @@ create table over10k(
            bo boolean,
            s string,
 	   ts timestamp, 
-           dec decimal,  
+           dec decimal(4,2),  
            bin binary)
        row format delimited
        fields terminated by '|';
 
-load data local inpath '../data/files/over10k' into table over10k;
+load data local inpath '../../data/files/over10k' into table over10k;
 
 select s, rank() over (partition by f order by t) from over10k limit 100;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_udaf.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_udaf.q
index f22b992cd4386..0173ab7a3ac56 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_udaf.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_udaf.q
@@ -15,7 +15,7 @@ create table over10k(
        row format delimited
        fields terminated by '|';
 
-load data local inpath '../data/files/over10k' into table over10k;
+load data local inpath '../../data/files/over10k' into table over10k;
 
 select s, min(i) over (partition by s) from over10k limit 100;
 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_udaf2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_udaf2.q
new file mode 100644
index 0000000000000..b813657baee47
--- /dev/null
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_udaf2.q
@@ -0,0 +1,4 @@
+-- user-added aggregates should be usable as windowing functions
+create temporary function mysum as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum';
+
+select sum(key) over (), mysum(key) over () from src limit 1;
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_windowspec.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_windowspec.q
index 7cc1367306956..6d8ce670454d0 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_windowspec.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/windowing_windowspec.q
@@ -15,7 +15,7 @@ create table over10k(
        row format delimited
        fields terminated by '|';
 
-load data local inpath '../data/files/over10k' into table over10k;
+load data local inpath '../../data/files/over10k' into table over10k;
 
 select s, sum(b) over (partition by i order by s,b rows unbounded preceding) from over10k limit 100;
 
diff --git a/sql/hive/src/test/resources/sample.json b/sql/hive/src/test/resources/sample.json
new file mode 100644
index 0000000000000..a2c2ffd5e0330
--- /dev/null
+++ b/sql/hive/src/test/resources/sample.json
@@ -0,0 +1,2 @@
+{"a" : "2" ,"b" : "blah", "c_!@(3)":1}
+{"<d>" : {"d!" : [4, 5], "=" : [{"Dd2": null}, {"Dd2" : true}]}}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/QueryTest.scala
index 95921c3d7ae09..ba391293884bd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql
 
 import org.scalatest.FunSuite
 
+import org.apache.spark.sql.catalyst.expressions.{ExprId, AttributeReference}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.util._
 
@@ -29,23 +31,46 @@ import org.apache.spark.sql.catalyst.util._
  * It is hard to have maven allow one subproject depend on another subprojects test code.
  * So, we duplicate this code here.
  */
-class QueryTest extends FunSuite {
+class QueryTest extends PlanTest {
+
+  /**
+   * Runs the plan and makes sure the answer contains all of the keywords, or the
+   * none of keywords are listed in the answer
+   * @param rdd the [[DataFrame]] to be executed
+   * @param exists true for make sure the keywords are listed in the output, otherwise
+   *               to make sure none of the keyword are not listed in the output
+   * @param keywords keyword in string array
+   */
+  def checkExistence(rdd: DataFrame, exists: Boolean, keywords: String*) {
+    val outputs = rdd.collect().map(_.mkString).mkString
+    for (key <- keywords) {
+      if (exists) {
+        assert(outputs.contains(key), s"Failed for $rdd ($key doens't exist in result)")
+      } else {
+        assert(!outputs.contains(key), s"Failed for $rdd ($key existed in the result)")
+      }
+    }
+  }
+
   /**
    * Runs the plan and makes sure the answer matches the expected result.
-   * @param rdd the [[SchemaRDD]] to be executed
+   * @param rdd the [[DataFrame]] to be executed
    * @param expectedAnswer the expected result, can either be an Any, Seq[Product], or Seq[ Seq[Any] ].
    */
-  protected def checkAnswer(rdd: SchemaRDD, expectedAnswer: Any): Unit = {
-    val convertedAnswer = expectedAnswer match {
-      case s: Seq[_] if s.isEmpty => s
-      case s: Seq[_] if s.head.isInstanceOf[Product] &&
-        !s.head.isInstanceOf[Seq[_]] => s.map(_.asInstanceOf[Product].productIterator.toIndexedSeq)
-      case s: Seq[_] => s
-      case singleItem => Seq(Seq(singleItem))
+  protected def checkAnswer(rdd: DataFrame, expectedAnswer: Seq[Row]): Unit = {
+    val isSorted = rdd.logicalPlan.collect { case s: logical.Sort => s }.nonEmpty
+    def prepareAnswer(answer: Seq[Row]): Seq[Row] = {
+      // Converts data to types that we can do equality comparison using Scala collections.
+      // For BigDecimal type, the Scala type has a better definition of equality test (similar to
+      // Java's java.math.BigDecimal.compareTo).
+      val converted: Seq[Row] = answer.map { s =>
+        Row.fromSeq(s.toSeq.map {
+          case d: java.math.BigDecimal => BigDecimal(d)
+          case o => o
+        })
+      }
+      if (!isSorted) converted.sortBy(_.toString) else converted
     }
-
-    val isSorted = rdd.logicalPlan.collect { case s: logical.Sort => s}.nonEmpty
-    def prepareAnswer(answer: Seq[Any]) = if (!isSorted) answer.sortBy(_.toString) else answer
     val sparkAnswer = try rdd.collect().toSeq catch {
       case e: Exception =>
         fail(
@@ -53,11 +78,12 @@ class QueryTest extends FunSuite {
             |Exception thrown while executing query:
             |${rdd.queryExecution}
             |== Exception ==
-            |${stackTraceToString(e)}
+            |$e
+            |${org.apache.spark.sql.catalyst.util.stackTraceToString(e)}
           """.stripMargin)
     }
 
-    if(prepareAnswer(convertedAnswer) != prepareAnswer(sparkAnswer)) {
+    if (prepareAnswer(expectedAnswer) != prepareAnswer(sparkAnswer)) {
       fail(s"""
         |Results do not match for query:
         |${rdd.logicalPlan}
@@ -67,11 +93,22 @@ class QueryTest extends FunSuite {
         |${rdd.queryExecution.executedPlan}
         |== Results ==
         |${sideBySide(
-            s"== Correct Answer - ${convertedAnswer.size} ==" +:
-              prepareAnswer(convertedAnswer).map(_.toString),
-            s"== Spark Answer - ${sparkAnswer.size} ==" +:
-              prepareAnswer(sparkAnswer).map(_.toString)).mkString("\n")}
+        s"== Correct Answer - ${expectedAnswer.size} ==" +:
+          prepareAnswer(expectedAnswer).map(_.toString),
+        s"== Spark Answer - ${sparkAnswer.size} ==" +:
+          prepareAnswer(sparkAnswer).map(_.toString)).mkString("\n")}
       """.stripMargin)
     }
   }
+
+  protected def checkAnswer(rdd: DataFrame, expectedAnswer: Row): Unit = {
+    checkAnswer(rdd, Seq(expectedAnswer))
+  }
+
+  def sqlTest(sqlString: String, expectedAnswer: Seq[Row])(implicit sqlContext: SQLContext): Unit = {
+    test(sqlString) {
+      checkAnswer(sqlContext.sql(sqlString), expectedAnswer)
+    }
+  }
+
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
new file mode 100644
index 0000000000000..081d94b6fc020
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans
+
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, ExprId}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util._
+import org.scalatest.FunSuite
+
+/**
+ * *** DUPLICATED FROM sql/catalyst/plans. ***
+ *
+ * It is hard to have maven allow one subproject depend on another subprojects test code.
+ * So, we duplicate this code here.
+ */
+class PlanTest extends FunSuite {
+
+  /**
+   * Since attribute references are given globally unique ids during analysis,
+   * we must normalize them to check if two different queries are identical.
+   */
+  protected def normalizeExprIds(plan: LogicalPlan) = {
+    val list = plan.flatMap(_.expressions.flatMap(_.references).map(_.exprId.id))
+    val minId = if (list.isEmpty) 0 else list.min
+    plan transformAllExpressions {
+      case a: AttributeReference =>
+        AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(a.exprId.id - minId))
+    }
+  }
+
+  /** Fails the test if the two plans do not match */
+  protected def comparePlans(plan1: LogicalPlan, plan2: LogicalPlan) {
+    val normalized1 = normalizeExprIds(plan1)
+    val normalized2 = normalizeExprIds(plan2)
+    if (normalized1 != normalized2)
+      fail(
+        s"""
+          |== FAIL: Plans do not match ===
+          |${sideBySide(normalized1.treeString, normalized2.treeString).mkString("\n")}
+        """.stripMargin)
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index 2060e1f1a7a4b..7c8b5205e239e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.hive
 import org.apache.spark.sql.columnar.{InMemoryColumnarTableScan, InMemoryRelation}
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
-import org.apache.spark.sql.{QueryTest, SchemaRDD}
+import org.apache.spark.sql.{DataFrame, QueryTest}
 import org.apache.spark.storage.RDDBlockId
 
 class CachedTableSuite extends QueryTest {
@@ -28,7 +28,7 @@ class CachedTableSuite extends QueryTest {
    * Throws a test failed exception when the number of cached tables differs from the expected
    * number.
    */
-  def assertCached(query: SchemaRDD, numCachedTables: Int = 1): Unit = {
+  def assertCached(query: DataFrame, numCachedTables: Int = 1): Unit = {
     val planWithCaching = query.queryExecution.withCachedData
     val cachedData = planWithCaching collect {
       case cached: InMemoryRelation => cached
@@ -64,6 +64,12 @@ class CachedTableSuite extends QueryTest {
       sql("SELECT * FROM src"),
       preCacheResults)
 
+    assertCached(sql("SELECT * FROM src s"))
+
+    checkAnswer(
+      sql("SELECT * FROM src s"),
+      preCacheResults)
+    
     uncacheTable("src")
     assertCached(sql("SELECT * FROM src"), 0)
   }
@@ -158,4 +164,10 @@ class CachedTableSuite extends QueryTest {
     uncacheTable("src")
     assert(!isMaterialized(rddId), "Uncached in-memory table should have been unpersisted")
   }
+
+  test("CACHE TABLE with Hive UDF") {
+    sql("CACHE TABLE udfTest AS SELECT * FROM src WHERE floor(key) = 1")
+    assertCached(table("udfTest"))
+    uncacheTable("udfTest")
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
new file mode 100644
index 0000000000000..09bbd5c867e4e
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import java.util
+import java.util.{Locale, TimeZone}
+
+import org.apache.hadoop.hive.ql.udf.UDAFPercentile
+import org.apache.hadoop.hive.serde2.io.DoubleWritable
+import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, ObjectInspectorFactory, StructObjectInspector}
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
+import org.apache.hadoop.io.LongWritable
+import org.scalatest.FunSuite
+
+import org.apache.spark.sql.catalyst.expressions.{Literal, Row}
+import org.apache.spark.sql.types._
+
+class HiveInspectorSuite extends FunSuite with HiveInspectors {
+  test("Test wrap SettableStructObjectInspector") {
+    val udaf = new UDAFPercentile.PercentileLongEvaluator()
+    udaf.init()
+
+    udaf.iterate(new LongWritable(1), 0.1)
+    udaf.iterate(new LongWritable(1), 0.1)
+
+    val state = udaf.terminatePartial()
+
+    val soi = ObjectInspectorFactory.getReflectionObjectInspector(
+      classOf[UDAFPercentile.State],
+      ObjectInspectorOptions.JAVA).asInstanceOf[StructObjectInspector]
+
+    val a = unwrap(state, soi).asInstanceOf[Row]
+    val b = wrap(a, soi).asInstanceOf[UDAFPercentile.State]
+
+    val sfCounts = soi.getStructFieldRef("counts")
+    val sfPercentiles = soi.getStructFieldRef("percentiles")
+
+    assert(2 === soi.getStructFieldData(b, sfCounts)
+      .asInstanceOf[util.Map[LongWritable, LongWritable]]
+      .get(new LongWritable(1L))
+      .get())
+    assert(0.1 === soi.getStructFieldData(b, sfPercentiles)
+      .asInstanceOf[util.ArrayList[DoubleWritable]]
+      .get(0)
+      .get())
+  }
+
+  // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
+  TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
+  // Add Locale setting
+  Locale.setDefault(Locale.US)
+
+  val data =
+    Literal(true) ::
+    Literal(0.asInstanceOf[Byte]) ::
+    Literal(0.asInstanceOf[Short]) ::
+    Literal(0) ::
+    Literal(0.asInstanceOf[Long]) ::
+    Literal(0.asInstanceOf[Float]) ::
+    Literal(0.asInstanceOf[Double]) ::
+    Literal("0") ::
+    Literal(new java.sql.Date(114, 8, 23)) ::
+    Literal(Decimal(BigDecimal(123.123))) ::
+    Literal(new java.sql.Timestamp(123123)) ::
+    Literal(Array[Byte](1,2,3)) ::
+    Literal(Seq[Int](1,2,3), ArrayType(IntegerType)) ::
+    Literal(Map[Int, Int](1->2, 2->1), MapType(IntegerType, IntegerType)) ::
+    Literal(Row(1,2.0d,3.0f),
+      StructType(StructField("c1", IntegerType) ::
+      StructField("c2", DoubleType) ::
+      StructField("c3", FloatType) :: Nil)) ::
+    Nil
+
+  val row = data.map(_.eval(null))
+  val dataTypes = data.map(_.dataType)
+
+  def toWritableInspector(dataType: DataType): ObjectInspector = dataType match {
+    case ArrayType(tpe, _) =>
+      ObjectInspectorFactory.getStandardListObjectInspector(toWritableInspector(tpe))
+    case MapType(keyType, valueType, _) =>
+      ObjectInspectorFactory.getStandardMapObjectInspector(
+        toWritableInspector(keyType), toWritableInspector(valueType))
+    case StringType => PrimitiveObjectInspectorFactory.writableStringObjectInspector
+    case IntegerType => PrimitiveObjectInspectorFactory.writableIntObjectInspector
+    case DoubleType => PrimitiveObjectInspectorFactory.writableDoubleObjectInspector
+    case BooleanType => PrimitiveObjectInspectorFactory.writableBooleanObjectInspector
+    case LongType => PrimitiveObjectInspectorFactory.writableLongObjectInspector
+    case FloatType => PrimitiveObjectInspectorFactory.writableFloatObjectInspector
+    case ShortType => PrimitiveObjectInspectorFactory.writableShortObjectInspector
+    case ByteType => PrimitiveObjectInspectorFactory.writableByteObjectInspector
+    case NullType => PrimitiveObjectInspectorFactory.writableVoidObjectInspector
+    case BinaryType => PrimitiveObjectInspectorFactory.writableBinaryObjectInspector
+    case DateType => PrimitiveObjectInspectorFactory.writableDateObjectInspector
+    case TimestampType => PrimitiveObjectInspectorFactory.writableTimestampObjectInspector
+    case DecimalType() => PrimitiveObjectInspectorFactory.writableHiveDecimalObjectInspector
+    case StructType(fields) =>
+      ObjectInspectorFactory.getStandardStructObjectInspector(
+        java.util.Arrays.asList(fields.map(f => f.name) :_*),
+        java.util.Arrays.asList(fields.map(f => toWritableInspector(f.dataType)) :_*))
+  }
+
+  def checkDataType(dt1: Seq[DataType], dt2: Seq[DataType]): Unit = {
+    dt1.zip(dt2).map {
+      case (dd1, dd2) =>
+        assert(dd1.getClass === dd2.getClass)  // DecimalType doesn't has the default precision info
+    }
+  }
+
+  def checkValues(row1: Seq[Any], row2: Seq[Any]): Unit = {
+    row1.zip(row2).map {
+      case (r1, r2) => checkValue(r1, r2)
+    }
+  }
+
+  def checkValues(row1: Seq[Any], row2: Row): Unit = {
+    row1.zip(row2.toSeq).map {
+      case (r1, r2) => checkValue(r1, r2)
+    }
+  }
+
+  def checkValue(v1: Any, v2: Any): Unit = {
+    (v1, v2) match {
+      case (r1: Decimal, r2: Decimal) =>
+        // Ignore the Decimal precision
+        assert(r1.compare(r2) === 0)
+      case (r1: Array[Byte], r2: Array[Byte])
+        if r1 != null && r2 != null && r1.length == r2.length =>
+        r1.zip(r2).map { case (b1, b2) => assert(b1 === b2) }
+      case (r1, r2) => assert(r1 === r2)
+    }
+  }
+
+  test("oi => datatype => oi") {
+    val ois = dataTypes.map(toInspector)
+
+    checkDataType(ois.map(inspectorToDataType), dataTypes)
+    checkDataType(dataTypes.map(toWritableInspector).map(inspectorToDataType), dataTypes)
+  }
+
+  test("wrap / unwrap null, constant null and writables") {
+    val writableOIs = dataTypes.map(toWritableInspector)
+    val nullRow = data.map(d => null)
+
+    checkValues(nullRow, nullRow.zip(writableOIs).map {
+      case (d, oi) => unwrap(wrap(d, oi), oi)
+    })
+
+    // struct couldn't be constant, sweep it out
+    val constantExprs = data.filter(!_.dataType.isInstanceOf[StructType])
+    val constantData = constantExprs.map(_.eval())
+    val constantNullData = constantData.map(_ => null)
+    val constantWritableOIs = constantExprs.map(e => toWritableInspector(e.dataType))
+    val constantNullWritableOIs = constantExprs.map(e => toInspector(Literal(null, e.dataType)))
+
+    checkValues(constantData, constantData.zip(constantWritableOIs).map {
+      case (d, oi) => unwrap(wrap(d, oi), oi)
+    })
+
+    checkValues(constantNullData, constantData.zip(constantNullWritableOIs).map {
+      case (d, oi) => unwrap(wrap(d, oi), oi)
+    })
+
+    checkValues(constantNullData, constantNullData.zip(constantWritableOIs).map {
+      case (d, oi) => unwrap(wrap(d, oi), oi)
+    })
+  }
+
+  test("wrap / unwrap primitive writable object inspector") {
+    val writableOIs = dataTypes.map(toWritableInspector)
+
+    checkValues(row, row.zip(writableOIs).map {
+      case (data, oi) => unwrap(wrap(data, oi), oi)
+    })
+  }
+
+  test("wrap / unwrap primitive java object inspector") {
+    val ois = dataTypes.map(toInspector)
+
+    checkValues(row, row.zip(ois).map {
+      case (data, oi) => unwrap(wrap(data, oi), oi)
+    })
+  }
+
+  test("wrap / unwrap Struct Type") {
+    val dt = StructType(dataTypes.zipWithIndex.map {
+      case (t, idx) => StructField(s"c_$idx", t)
+    })
+
+    checkValues(row, unwrap(wrap(Row.fromSeq(row), toInspector(dt)), toInspector(dt)).asInstanceOf[Row])
+    checkValue(null, unwrap(wrap(null, toInspector(dt)), toInspector(dt)))
+  }
+
+  test("wrap / unwrap Array Type") {
+    val dt = ArrayType(dataTypes(0))
+
+    val d = row(0) :: row(0) :: Nil
+    checkValue(d, unwrap(wrap(d, toInspector(dt)), toInspector(dt)))
+    checkValue(null, unwrap(wrap(null, toInspector(dt)), toInspector(dt)))
+    checkValue(d, unwrap(wrap(d, toInspector(Literal(d, dt))), toInspector(Literal(d, dt))))
+    checkValue(d, unwrap(wrap(null, toInspector(Literal(d, dt))), toInspector(Literal(d, dt))))
+  }
+
+  test("wrap / unwrap Map Type") {
+    val dt = MapType(dataTypes(0), dataTypes(1))
+
+    val d = Map(row(0) -> row(1))
+    checkValue(d, unwrap(wrap(d, toInspector(dt)), toInspector(dt)))
+    checkValue(null, unwrap(wrap(null, toInspector(dt)), toInspector(dt)))
+    checkValue(d, unwrap(wrap(d, toInspector(Literal(d, dt))), toInspector(Literal(d, dt))))
+    checkValue(d, unwrap(wrap(null, toInspector(Literal(d, dt))), toInspector(Literal(d, dt))))
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
index 4a64b5f5eb1b4..aad48ada52642 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
@@ -19,7 +19,8 @@ package org.apache.spark.sql.hive
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.sql.catalyst.types.{DataType, StructType}
+import org.apache.spark.sql.test.ExamplePointUDT
+import org.apache.spark.sql.types.StructType
 
 class HiveMetastoreCatalogSuite extends FunSuite {
 
@@ -29,4 +30,10 @@ class HiveMetastoreCatalogSuite extends FunSuite {
     val datatype = HiveMetastoreTypes.toDataType(metastr)
     assert(datatype.isInstanceOf[StructType])
   }
+
+  test("udt to metastore type conversion") {
+    val udt = new ExamplePointUDT
+    assert(HiveMetastoreTypes.toMetastoreType(udt) ===
+      HiveMetastoreTypes.toMetastoreType(udt.sqlType))
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
index 7e323146f9da2..869d01eb398c5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
@@ -17,8 +17,12 @@
 
 package org.apache.spark.sql.hive
 
-import org.apache.spark.sql.QueryTest
+import java.io.File
+
+import com.google.common.io.Files
+import org.apache.spark.sql.{QueryTest, _}
 import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.types._
 
 /* Implicits */
 import org.apache.spark.sql.hive.test.TestHive._
@@ -26,6 +30,8 @@ import org.apache.spark.sql.hive.test.TestHive._
 case class TestData(key: Int, value: String)
 
 class InsertIntoHiveTableSuite extends QueryTest {
+  import org.apache.spark.sql.hive.test.TestHive.implicits._
+
   val testData = TestHive.sparkContext.parallelize(
     (1 to 100).map(i => TestData(i, i.toString)))
   testData.registerTempTable("testData")
@@ -39,7 +45,7 @@ class InsertIntoHiveTableSuite extends QueryTest {
     // Make sure the table has also been updated.
     checkAnswer(
       sql("SELECT * FROM createAndInsertTest"),
-      testData.collect().toSeq
+      testData.collect().toSeq.map(Row.fromTuple)
     )
 
     // Add more data.
@@ -48,7 +54,7 @@ class InsertIntoHiveTableSuite extends QueryTest {
     // Make sure the table has been updated.
     checkAnswer(
       sql("SELECT * FROM createAndInsertTest"),
-      testData.collect().toSeq ++ testData.collect().toSeq
+      testData.toDataFrame.collect().toSeq ++ testData.toDataFrame.collect().toSeq
     )
 
     // Now overwrite.
@@ -57,7 +63,7 @@ class InsertIntoHiveTableSuite extends QueryTest {
     // Make sure the registered table has also been updated.
     checkAnswer(
       sql("SELECT * FROM createAndInsertTest"),
-      testData.collect().toSeq
+      testData.collect().toSeq.map(Row.fromTuple)
     )
   }
 
@@ -73,4 +79,99 @@ class InsertIntoHiveTableSuite extends QueryTest {
     createTable[TestData]("createAndInsertTest")
     createTable[TestData]("createAndInsertTest")
   }
+
+  test("SPARK-4052: scala.collection.Map as value type of MapType") {
+    val schema = StructType(StructField("m", MapType(StringType, StringType), true) :: Nil)
+    val rowRDD = TestHive.sparkContext.parallelize(
+      (1 to 100).map(i => Row(scala.collection.mutable.HashMap(s"key$i" -> s"value$i"))))
+    val df = applySchema(rowRDD, schema)
+    df.registerTempTable("tableWithMapValue")
+    sql("CREATE TABLE hiveTableWithMapValue(m MAP <STRING, STRING>)")
+    sql("INSERT OVERWRITE TABLE hiveTableWithMapValue SELECT m FROM tableWithMapValue")
+
+    checkAnswer(
+      sql("SELECT * FROM hiveTableWithMapValue"),
+      rowRDD.collect().toSeq
+    )
+
+    sql("DROP TABLE hiveTableWithMapValue")
+  }
+
+  test("SPARK-4203:random partition directory order") {
+    createTable[TestData]("tmp_table")
+    val tmpDir = Files.createTempDir()
+    sql(s"CREATE TABLE table_with_partition(c1 string) PARTITIONED by (p1 string,p2 string,p3 string,p4 string,p5 string) location '${tmpDir.toURI.toString}'  ")
+    sql("INSERT OVERWRITE TABLE table_with_partition  partition (p1='a',p2='b',p3='c',p4='c',p5='1') SELECT 'blarr' FROM tmp_table")
+    sql("INSERT OVERWRITE TABLE table_with_partition  partition (p1='a',p2='b',p3='c',p4='c',p5='2') SELECT 'blarr' FROM tmp_table")
+    sql("INSERT OVERWRITE TABLE table_with_partition  partition (p1='a',p2='b',p3='c',p4='c',p5='3') SELECT 'blarr' FROM tmp_table")
+    sql("INSERT OVERWRITE TABLE table_with_partition  partition (p1='a',p2='b',p3='c',p4='c',p5='4') SELECT 'blarr' FROM tmp_table")
+    def listFolders(path: File, acc: List[String]): List[List[String]] = {
+      val dir = path.listFiles()
+      val folders = dir.filter(_.isDirectory).toList
+      if (folders.isEmpty) {
+        List(acc.reverse)
+      } else {
+        folders.flatMap(x => listFolders(x, x.getName :: acc))
+      }
+    }
+    val expected = List(
+      "p1=a"::"p2=b"::"p3=c"::"p4=c"::"p5=2"::Nil,
+      "p1=a"::"p2=b"::"p3=c"::"p4=c"::"p5=3"::Nil ,
+      "p1=a"::"p2=b"::"p3=c"::"p4=c"::"p5=1"::Nil ,
+      "p1=a"::"p2=b"::"p3=c"::"p4=c"::"p5=4"::Nil
+    )
+    assert(listFolders(tmpDir,List()).sortBy(_.toString()) == expected.sortBy(_.toString))
+    sql("DROP TABLE table_with_partition")
+    sql("DROP TABLE tmp_table")
+  }
+
+  test("Insert ArrayType.containsNull == false") {
+    val schema = StructType(Seq(
+      StructField("a", ArrayType(StringType, containsNull = false))))
+    val rowRDD = TestHive.sparkContext.parallelize((1 to 100).map(i => Row(Seq(s"value$i"))))
+    val df = applySchema(rowRDD, schema)
+    df.registerTempTable("tableWithArrayValue")
+    sql("CREATE TABLE hiveTableWithArrayValue(a Array <STRING>)")
+    sql("INSERT OVERWRITE TABLE hiveTableWithArrayValue SELECT a FROM tableWithArrayValue")
+
+    checkAnswer(
+      sql("SELECT * FROM hiveTableWithArrayValue"),
+      rowRDD.collect().toSeq)
+
+    sql("DROP TABLE hiveTableWithArrayValue")
+  }
+
+  test("Insert MapType.valueContainsNull == false") {
+    val schema = StructType(Seq(
+      StructField("m", MapType(StringType, StringType, valueContainsNull = false))))
+    val rowRDD = TestHive.sparkContext.parallelize(
+      (1 to 100).map(i => Row(Map(s"key$i" -> s"value$i"))))
+    val df = applySchema(rowRDD, schema)
+    df.registerTempTable("tableWithMapValue")
+    sql("CREATE TABLE hiveTableWithMapValue(m Map <STRING, STRING>)")
+    sql("INSERT OVERWRITE TABLE hiveTableWithMapValue SELECT m FROM tableWithMapValue")
+
+    checkAnswer(
+      sql("SELECT * FROM hiveTableWithMapValue"),
+      rowRDD.collect().toSeq)
+
+    sql("DROP TABLE hiveTableWithMapValue")
+  }
+
+  test("Insert StructType.fields.exists(_.nullable == false)") {
+    val schema = StructType(Seq(
+      StructField("s", StructType(Seq(StructField("f", StringType, nullable = false))))))
+    val rowRDD = TestHive.sparkContext.parallelize(
+      (1 to 100).map(i => Row(Row(s"value$i"))))
+    val df = applySchema(rowRDD, schema)
+    df.registerTempTable("tableWithStructValue")
+    sql("CREATE TABLE hiveTableWithStructValue(s Struct <f: STRING>)")
+    sql("INSERT OVERWRITE TABLE hiveTableWithStructValue SELECT s FROM tableWithStructValue")
+
+    checkAnswer(
+      sql("SELECT * FROM hiveTableWithStructValue"),
+      rowRDD.collect().toSeq)
+
+    sql("DROP TABLE hiveTableWithStructValue")
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
new file mode 100644
index 0000000000000..c23575fe96898
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -0,0 +1,442 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import java.io.File
+
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.commons.io.FileUtils
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.sql.catalyst.util
+import org.apache.spark.sql._
+import org.apache.spark.util.Utils
+import org.apache.spark.sql.types._
+
+/* Implicits */
+import org.apache.spark.sql.hive.test.TestHive._
+
+/**
+ * Tests for persisting tables created though the data sources API into the metastore.
+ */
+class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
+
+  import org.apache.spark.sql.hive.test.TestHive.implicits._
+
+  override def afterEach(): Unit = {
+    reset()
+    if (ctasPath.exists()) Utils.deleteRecursively(ctasPath)
+  }
+
+  val filePath = Utils.getSparkClassLoader.getResource("sample.json").getFile
+  var ctasPath: File = util.getTempFilePath("jsonCTAS").getCanonicalFile
+
+  test ("persistent JSON table") {
+    sql(
+      s"""
+        |CREATE TABLE jsonTable
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${filePath}'
+        |)
+      """.stripMargin)
+
+    checkAnswer(
+      sql("SELECT * FROM jsonTable"),
+      jsonFile(filePath).collect().toSeq)
+  }
+
+  test ("persistent JSON table with a user specified schema") {
+    sql(
+      s"""
+        |CREATE TABLE jsonTable (
+        |a string,
+        |b String,
+        |`c_!@(3)` int,
+        |`<d>` Struct<`d!`:array<int>, `=`:array<struct<Dd2: boolean>>>)
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${filePath}'
+        |)
+      """.stripMargin)
+
+    jsonFile(filePath).registerTempTable("expectedJsonTable")
+
+    checkAnswer(
+      sql("SELECT a, b, `c_!@(3)`, `<d>`.`d!`, `<d>`.`=` FROM jsonTable"),
+      sql("SELECT a, b, `c_!@(3)`, `<d>`.`d!`, `<d>`.`=` FROM expectedJsonTable").collect().toSeq)
+  }
+
+  test ("persistent JSON table with a user specified schema with a subset of fields") {
+    // This works because JSON objects are self-describing and JSONRelation can get needed
+    // field values based on field names.
+    sql(
+      s"""
+        |CREATE TABLE jsonTable (`<d>` Struct<`=`:array<struct<Dd2: boolean>>>, b String)
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${filePath}'
+        |)
+      """.stripMargin)
+
+    val innerStruct = StructType(
+      StructField("=", ArrayType(StructType(StructField("Dd2", BooleanType, true) :: Nil))) :: Nil)
+    val expectedSchema = StructType(
+      StructField("<d>", innerStruct, true) ::
+      StructField("b", StringType, true) :: Nil)
+
+    assert(expectedSchema === table("jsonTable").schema)
+
+    jsonFile(filePath).registerTempTable("expectedJsonTable")
+
+    checkAnswer(
+      sql("SELECT b, `<d>`.`=` FROM jsonTable"),
+      sql("SELECT b, `<d>`.`=` FROM expectedJsonTable").collect().toSeq)
+  }
+
+  test("resolve shortened provider names") {
+    sql(
+      s"""
+        |CREATE TABLE jsonTable
+        |USING org.apache.spark.sql.json
+        |OPTIONS (
+        |  path '${filePath}'
+        |)
+      """.stripMargin)
+
+    checkAnswer(
+      sql("SELECT * FROM jsonTable"),
+      jsonFile(filePath).collect().toSeq)
+  }
+
+  test("drop table") {
+    sql(
+      s"""
+        |CREATE TABLE jsonTable
+        |USING org.apache.spark.sql.json
+        |OPTIONS (
+        |  path '${filePath}'
+        |)
+      """.stripMargin)
+
+    checkAnswer(
+      sql("SELECT * FROM jsonTable"),
+      jsonFile(filePath).collect().toSeq)
+
+    sql("DROP TABLE jsonTable")
+
+    intercept[Exception] {
+      sql("SELECT * FROM jsonTable").collect()
+    }
+
+    assert(
+      (new File(filePath)).exists(),
+      "The table with specified path is considered as an external table, " +
+        "its data should not deleted after DROP TABLE.")
+  }
+
+  test("check change without refresh") {
+    val tempDir = File.createTempFile("sparksql", "json")
+    tempDir.delete()
+    sparkContext.parallelize(("a", "b") :: Nil).toJSON.saveAsTextFile(tempDir.getCanonicalPath)
+
+    sql(
+      s"""
+        |CREATE TABLE jsonTable
+        |USING org.apache.spark.sql.json
+        |OPTIONS (
+        |  path '${tempDir.getCanonicalPath}'
+        |)
+      """.stripMargin)
+
+    checkAnswer(
+      sql("SELECT * FROM jsonTable"),
+      Row("a", "b"))
+
+    FileUtils.deleteDirectory(tempDir)
+    sparkContext.parallelize(("a1", "b1", "c1") :: Nil).toJSON.saveAsTextFile(tempDir.getCanonicalPath)
+
+    // Schema is cached so the new column does not show. The updated values in existing columns
+    // will show.
+    checkAnswer(
+      sql("SELECT * FROM jsonTable"),
+      Row("a1", "b1"))
+
+    refreshTable("jsonTable")
+
+    // Check that the refresh worked
+    checkAnswer(
+      sql("SELECT * FROM jsonTable"),
+      Row("a1", "b1", "c1"))
+    FileUtils.deleteDirectory(tempDir)
+  }
+
+  test("drop, change, recreate") {
+    val tempDir = File.createTempFile("sparksql", "json")
+    tempDir.delete()
+    sparkContext.parallelize(("a", "b") :: Nil).toJSON.saveAsTextFile(tempDir.getCanonicalPath)
+
+    sql(
+      s"""
+        |CREATE TABLE jsonTable
+        |USING org.apache.spark.sql.json
+        |OPTIONS (
+        |  path '${tempDir.getCanonicalPath}'
+        |)
+      """.stripMargin)
+
+    checkAnswer(
+      sql("SELECT * FROM jsonTable"),
+      Row("a", "b"))
+
+    FileUtils.deleteDirectory(tempDir)
+    sparkContext.parallelize(("a", "b", "c") :: Nil).toJSON.saveAsTextFile(tempDir.getCanonicalPath)
+
+    sql("DROP TABLE jsonTable")
+
+    sql(
+      s"""
+        |CREATE TABLE jsonTable
+        |USING org.apache.spark.sql.json
+        |OPTIONS (
+        |  path '${tempDir.getCanonicalPath}'
+        |)
+      """.stripMargin)
+
+    // New table should reflect new schema.
+    checkAnswer(
+      sql("SELECT * FROM jsonTable"),
+      Row("a", "b", "c"))
+    FileUtils.deleteDirectory(tempDir)
+  }
+
+  test("invalidate cache and reload") {
+    sql(
+      s"""
+        |CREATE TABLE jsonTable (`c_!@(3)` int)
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${filePath}'
+        |)
+      """.stripMargin)
+
+    jsonFile(filePath).registerTempTable("expectedJsonTable")
+
+    checkAnswer(
+      sql("SELECT * FROM jsonTable"),
+      sql("SELECT `c_!@(3)` FROM expectedJsonTable").collect().toSeq)
+
+    // Discard the cached relation.
+    invalidateTable("jsonTable")
+
+    checkAnswer(
+      sql("SELECT * FROM jsonTable"),
+      sql("SELECT `c_!@(3)` FROM expectedJsonTable").collect().toSeq)
+
+    invalidateTable("jsonTable")
+    val expectedSchema = StructType(StructField("c_!@(3)", IntegerType, true) :: Nil)
+
+    assert(expectedSchema === table("jsonTable").schema)
+  }
+
+  test("CTAS") {
+    sql(
+      s"""
+        |CREATE TABLE jsonTable
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${filePath}'
+        |)
+      """.stripMargin)
+
+    sql(
+      s"""
+        |CREATE TABLE ctasJsonTable
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${ctasPath}'
+        |) AS
+        |SELECT * FROM jsonTable
+      """.stripMargin)
+
+    assert(table("ctasJsonTable").schema === table("jsonTable").schema)
+
+    checkAnswer(
+      sql("SELECT * FROM ctasJsonTable"),
+      sql("SELECT * FROM jsonTable").collect())
+  }
+
+  test("CTAS with IF NOT EXISTS") {
+    sql(
+      s"""
+        |CREATE TABLE jsonTable
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${filePath}'
+        |)
+      """.stripMargin)
+
+    sql(
+      s"""
+        |CREATE TABLE ctasJsonTable
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${ctasPath}'
+        |) AS
+        |SELECT * FROM jsonTable
+      """.stripMargin)
+
+    // Create the table again should trigger a AlreadyExistsException.
+    val message = intercept[RuntimeException] {
+      sql(
+        s"""
+        |CREATE TABLE ctasJsonTable
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${ctasPath}'
+        |) AS
+        |SELECT * FROM jsonTable
+      """.stripMargin)
+    }.getMessage
+    assert(message.contains("Table ctasJsonTable already exists."),
+      "We should complain that ctasJsonTable already exists")
+
+    // The following statement should be fine if it has IF NOT EXISTS.
+    // It tries to create a table ctasJsonTable with a new schema.
+    // The actual table's schema and data should not be changed.
+    sql(
+      s"""
+        |CREATE TABLE IF NOT EXISTS ctasJsonTable
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${ctasPath}'
+        |) AS
+        |SELECT a FROM jsonTable
+      """.stripMargin)
+
+    // Discard the cached relation.
+    invalidateTable("ctasJsonTable")
+
+    // Schema should not be changed.
+    assert(table("ctasJsonTable").schema === table("jsonTable").schema)
+    // Table data should not be changed.
+    checkAnswer(
+      sql("SELECT * FROM ctasJsonTable"),
+      sql("SELECT * FROM jsonTable").collect())
+  }
+
+  test("CTAS a managed table") {
+    sql(
+      s"""
+        |CREATE TABLE jsonTable
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${filePath}'
+        |)
+      """.stripMargin)
+
+    new Path("/Users/yhuai/Desktop/whatever")
+
+
+    val expectedPath = catalog.hiveDefaultTableFilePath("ctasJsonTable")
+    val filesystemPath = new Path(expectedPath)
+    val fs = filesystemPath.getFileSystem(sparkContext.hadoopConfiguration)
+    if (fs.exists(filesystemPath)) fs.delete(filesystemPath, true)
+
+    // It is a managed table when we do not specify the location.
+    sql(
+      s"""
+        |CREATE TABLE ctasJsonTable
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |
+        |) AS
+        |SELECT * FROM jsonTable
+      """.stripMargin)
+
+    assert(fs.exists(filesystemPath), s"$expectedPath should exist after we create the table.")
+
+    sql(
+      s"""
+        |CREATE TABLE loadedTable
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path '${expectedPath}'
+        |)
+      """.stripMargin)
+
+    assert(table("ctasJsonTable").schema === table("loadedTable").schema)
+
+    checkAnswer(
+      sql("SELECT * FROM ctasJsonTable"),
+      sql("SELECT * FROM loadedTable").collect()
+    )
+
+    sql("DROP TABLE ctasJsonTable")
+    assert(!fs.exists(filesystemPath), s"$expectedPath should not exist after we drop the table.")
+  }
+
+  test("SPARK-5286 Fail to drop an invalid table when using the data source API") {
+    sql(
+      s"""
+        |CREATE TABLE jsonTable
+        |USING org.apache.spark.sql.json.DefaultSource
+        |OPTIONS (
+        |  path 'it is not a path at all!'
+        |)
+      """.stripMargin)
+
+    sql("DROP TABLE jsonTable").collect().foreach(println)
+  }
+
+  test("save and load table") {
+    val originalDefaultSource = conf.defaultDataSourceName
+    conf.setConf("spark.sql.default.datasource", "org.apache.spark.sql.json")
+
+    val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
+    val df = jsonRDD(rdd)
+
+    df.saveAsTable("savedJsonTable")
+
+    checkAnswer(
+      sql("SELECT * FROM savedJsonTable"),
+      df.collect())
+
+    createTable("createdJsonTable", catalog.hiveDefaultTableFilePath("savedJsonTable"), false)
+    assert(table("createdJsonTable").schema === df.schema)
+    checkAnswer(
+      sql("SELECT * FROM createdJsonTable"),
+      df.collect())
+
+    val message = intercept[RuntimeException] {
+      createTable("createdJsonTable", filePath.toString, false)
+    }.getMessage
+    assert(message.contains("Table createdJsonTable already exists."),
+      "We should complain that ctasJsonTable already exists")
+
+    createTable("createdJsonTable", filePath.toString, true)
+    // createdJsonTable should be not changed.
+    assert(table("createdJsonTable").schema === df.schema)
+    checkAnswer(
+      sql("SELECT * FROM createdJsonTable"),
+      df.collect())
+
+    conf.setConf("spark.sql.default.datasource", originalDefaultSource)
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 14e791fe0f0ee..6f07fd5a879c0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -21,12 +21,11 @@ import org.scalatest.BeforeAndAfterAll
 
 import scala.reflect.ClassTag
 
-
-import org.apache.spark.sql.{SQLConf, QueryTest}
-import org.apache.spark.sql.catalyst.plans.logical.NativeCommand
-import org.apache.spark.sql.execution.joins.{BroadcastHashJoin, ShuffledHashJoin}
+import org.apache.spark.sql.{Row, SQLConf, QueryTest}
+import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
+import org.apache.spark.sql.hive.execution._
 
 class StatisticsSuite extends QueryTest with BeforeAndAfterAll {
   TestHive.reset()
@@ -52,19 +51,19 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll {
 
     assertAnalyzeCommand(
       "ANALYZE TABLE Table1 COMPUTE STATISTICS",
-      classOf[NativeCommand])
+      classOf[HiveNativeCommand])
     assertAnalyzeCommand(
       "ANALYZE TABLE Table1 PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS",
-      classOf[NativeCommand])
+      classOf[HiveNativeCommand])
     assertAnalyzeCommand(
       "ANALYZE TABLE Table1 PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS noscan",
-      classOf[NativeCommand])
+      classOf[HiveNativeCommand])
     assertAnalyzeCommand(
       "ANALYZE TABLE Table1 PARTITION(ds, hr) COMPUTE STATISTICS",
-      classOf[NativeCommand])
+      classOf[HiveNativeCommand])
     assertAnalyzeCommand(
       "ANALYZE TABLE Table1 PARTITION(ds, hr) COMPUTE STATISTICS noscan",
-      classOf[NativeCommand])
+      classOf[HiveNativeCommand])
 
     assertAnalyzeCommand(
       "ANALYZE TABLE Table1 COMPUTE STATISTICS nOscAn",
@@ -73,15 +72,17 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll {
 
   test("analyze MetastoreRelations") {
     def queryTotalSize(tableName: String): BigInt =
-      catalog.lookupRelation(None, tableName).statistics.sizeInBytes
+      catalog.lookupRelation(Seq(tableName)).statistics.sizeInBytes
 
     // Non-partitioned table
     sql("CREATE TABLE analyzeTable (key STRING, value STRING)").collect()
     sql("INSERT INTO TABLE analyzeTable SELECT * FROM src").collect()
     sql("INSERT INTO TABLE analyzeTable SELECT * FROM src").collect()
 
-    assert(queryTotalSize("analyzeTable") === defaultSizeInBytes)
-
+    // TODO: How does it works? needs to add it back for other hive version.
+    if (HiveShim.version =="0.12.0") {
+      assert(queryTotalSize("analyzeTable") === conf.defaultSizeInBytes)
+    }
     sql("ANALYZE TABLE analyzeTable COMPUTE STATISTICS noscan")
 
     assert(queryTotalSize("analyzeTable") === BigInt(11624))
@@ -109,7 +110,7 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll {
         |SELECT * FROM src
       """.stripMargin).collect()
 
-    assert(queryTotalSize("analyzeTable_part") === defaultSizeInBytes)
+    assert(queryTotalSize("analyzeTable_part") === conf.defaultSizeInBytes)
 
     sql("ANALYZE TABLE analyzeTable_part COMPUTE STATISTICS noscan")
 
@@ -122,7 +123,7 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll {
     intercept[NotImplementedError] {
       analyze("tempTable")
     }
-    catalog.unregisterTable(None, "tempTable")
+    catalog.unregisterTable(Seq("tempTable"))
   }
 
   test("estimates the size of a test MetastoreRelation") {
@@ -140,7 +141,7 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll {
         before: () => Unit,
         after: () => Unit,
         query: String,
-        expectedAnswer: Seq[Any],
+        expectedAnswer: Seq[Row],
         ct: ClassTag[_]) = {
       before()
 
@@ -150,8 +151,8 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll {
       val sizes = rdd.queryExecution.analyzed.collect {
         case r if ct.runtimeClass.isAssignableFrom(r.getClass) => r.statistics.sizeInBytes
       }
-      assert(sizes.size === 2 && sizes(0) <= autoBroadcastJoinThreshold
-        && sizes(1) <= autoBroadcastJoinThreshold,
+      assert(sizes.size === 2 && sizes(0) <= conf.autoBroadcastJoinThreshold
+        && sizes(1) <= conf.autoBroadcastJoinThreshold,
         s"query should contain two relations, each of which has size smaller than autoConvertSize")
 
       // Using `sparkPlan` because for relevant patterns in HashJoin to be
@@ -162,8 +163,8 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll {
 
       checkAnswer(rdd, expectedAnswer) // check correctness of output
 
-      TestHive.settings.synchronized {
-        val tmp = autoBroadcastJoinThreshold
+      TestHive.conf.settings.synchronized {
+        val tmp = conf.autoBroadcastJoinThreshold
 
         sql(s"""SET ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD}=-1""")
         rdd = sql(query)
@@ -182,7 +183,7 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll {
 
     /** Tests for MetastoreRelation */
     val metastoreQuery = """SELECT * FROM src a JOIN src b ON a.key = 238 AND a.key = b.key"""
-    val metastoreAnswer = Seq.fill(4)((238, "val_238", 238, "val_238"))
+    val metastoreAnswer = Seq.fill(4)(Row(238, "val_238", 238, "val_238"))
     mkTest(
       () => (),
       () => (),
@@ -192,4 +193,52 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll {
     )
   }
 
+  test("auto converts to broadcast left semi join, by size estimate of a relation") {
+    val leftSemiJoinQuery =
+      """SELECT * FROM src a
+        |left semi JOIN src b ON a.key=86 and a.key = b.key""".stripMargin
+    val answer = Row(86, "val_86")
+
+    var rdd = sql(leftSemiJoinQuery)
+
+    // Assert src has a size smaller than the threshold.
+    val sizes = rdd.queryExecution.analyzed.collect {
+      case r if implicitly[ClassTag[MetastoreRelation]].runtimeClass
+        .isAssignableFrom(r.getClass) =>
+        r.statistics.sizeInBytes
+    }
+    assert(sizes.size === 2 && sizes(1) <= conf.autoBroadcastJoinThreshold
+      && sizes(0) <= conf.autoBroadcastJoinThreshold,
+      s"query should contain two relations, each of which has size smaller than autoConvertSize")
+
+    // Using `sparkPlan` because for relevant patterns in HashJoin to be
+    // matched, other strategies need to be applied.
+    var bhj = rdd.queryExecution.sparkPlan.collect {
+      case j: BroadcastLeftSemiJoinHash => j
+    }
+    assert(bhj.size === 1,
+      s"actual query plans do not contain broadcast join: ${rdd.queryExecution}")
+
+    checkAnswer(rdd, answer) // check correctness of output
+
+    TestHive.conf.settings.synchronized {
+      val tmp = conf.autoBroadcastJoinThreshold
+
+      sql(s"SET ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD}=-1")
+      rdd = sql(leftSemiJoinQuery)
+      bhj = rdd.queryExecution.sparkPlan.collect {
+        case j: BroadcastLeftSemiJoinHash => j
+      }
+      assert(bhj.isEmpty, "BroadcastHashJoin still planned even though it is switched off")
+
+      val shj = rdd.queryExecution.sparkPlan.collect {
+        case j: LeftSemiJoinHash => j
+      }
+      assert(shj.size === 1,
+        "LeftSemiJoinHash should be planned when BroadcastHashJoin is turned off")
+
+      sql(s"SET ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD}=$tmp")
+    }
+
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala
new file mode 100644
index 0000000000000..85b6bc93d7122
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+/* Implicits */
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.hive.test.TestHive._
+
+case class FunctionResult(f1: String, f2: String)
+
+class UDFSuite extends QueryTest {
+  test("UDF case insensitive") {
+    udf.register("random0", () => { Math.random()})
+    udf.register("RANDOM1", () => { Math.random()})
+    udf.register("strlenScala", (_: String).length + (_:Int))
+    assert(sql("SELECT RANDOM0() FROM src LIMIT 1").head().getDouble(0) >= 0.0)
+    assert(sql("SELECT RANDOm1() FROM src LIMIT 1").head().getDouble(0) >= 0.0)
+    assert(sql("SELECT strlenscala('test', 1) FROM src LIMIT 1").head().getInt(0) === 5)
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/api/java/JavaHiveQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/api/java/JavaHiveQLSuite.scala
deleted file mode 100644
index 46b11b582b26d..0000000000000
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/api/java/JavaHiveQLSuite.scala
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive.api.java
-
-import scala.util.Try
-
-import org.scalatest.FunSuite
-
-import org.apache.spark.api.java.JavaSparkContext
-import org.apache.spark.sql.api.java.JavaSchemaRDD
-import org.apache.spark.sql.execution.ExplainCommand
-import org.apache.spark.sql.hive.test.TestHive
-
-// Implicits
-import scala.collection.JavaConversions._
-
-class JavaHiveQLSuite extends FunSuite {
-  lazy val javaCtx = new JavaSparkContext(TestHive.sparkContext)
-
-  // There is a little trickery here to avoid instantiating two HiveContexts in the same JVM
-  lazy val javaHiveCtx = new JavaHiveContext(javaCtx) {
-    override val sqlContext = TestHive
-  }
-
-  test("SELECT * FROM src") {
-    assert(
-      javaHiveCtx.sql("SELECT * FROM src").collect().map(_.getInt(0)) ===
-        TestHive.sql("SELECT * FROM src").collect().map(_.getInt(0)).toSeq)
-  }
-
-  def isExplanation(result: JavaSchemaRDD) = {
-    val explanation = result.collect().map(_.getString(0))
-    explanation.size > 1 && explanation.head.startsWith("== Physical Plan ==")
-  }
-
-  test("Query Hive native command execution result") {
-    val tableName = "test_native_commands"
-
-    assertResult(0) {
-      javaHiveCtx.sql(s"DROP TABLE IF EXISTS $tableName").count()
-    }
-
-    assertResult(0) {
-      javaHiveCtx.sql(s"CREATE TABLE $tableName(key INT, value STRING)").count()
-    }
-
-    assert(
-      javaHiveCtx
-        .sql("SHOW TABLES")
-        .collect()
-        .map(_.getString(0))
-        .contains(tableName))
-
-    assertResult(Array(Array("key", "int"), Array("value", "string"))) {
-      javaHiveCtx
-        .sql(s"describe $tableName")
-        .collect()
-        .map(row => Array(row.get(0).asInstanceOf[String], row.get(1).asInstanceOf[String]))
-        .toArray
-    }
-
-    assert(isExplanation(javaHiveCtx.sql(
-      s"EXPLAIN SELECT key, COUNT(*) FROM $tableName GROUP BY key")))
-
-    TestHive.reset()
-  }
-
-  test("Exactly once semantics for DDL and command statements") {
-    val tableName = "test_exactly_once"
-    val q0 = javaHiveCtx.sql(s"CREATE TABLE $tableName(key INT, value STRING)")
-
-    // If the table was not created, the following assertion would fail
-    assert(Try(TestHive.table(tableName)).isSuccess)
-
-    // If the CREATE TABLE command got executed again, the following assertion would fail
-    assert(Try(q0.count()).isSuccess)
-  }
-}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 79cc7a3fcc7d6..f8a957d55d57e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -22,9 +22,10 @@ import java.io._
 import org.scalatest.{BeforeAndAfterAll, FunSuite, GivenWhenThen}
 
 import org.apache.spark.Logging
+import org.apache.spark.sql.execution.{SetCommand, ExplainCommand}
+import org.apache.spark.sql.hive.DescribeCommand
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.logical.{NativeCommand => LogicalNativeCommand}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.hive.test.TestHive
 
@@ -133,7 +134,7 @@ abstract class HiveComparisonTest
 
     def isSorted(plan: LogicalPlan): Boolean = plan match {
       case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
-      case PhysicalOperation(_, _, Sort(_, _)) => true
+      case PhysicalOperation(_, _, Sort(_, true, _)) => true
       case _ => plan.children.iterator.exists(isSorted)
     }
 
@@ -142,14 +143,25 @@ abstract class HiveComparisonTest
       // Hack: Hive simply prints the result of a SET command to screen,
       // and does not return it as a query answer.
       case _: SetCommand => Seq("0")
-      case _: LogicalNativeCommand => answer.filterNot(nonDeterministicLine).filterNot(_ == "")
+      case HiveNativeCommand(c) if c.toLowerCase.contains("desc") =>
+        answer
+          .filterNot(nonDeterministicLine)
+          .map(_.replaceAll("from deserializer", ""))
+          .map(_.replaceAll("None", ""))
+          .map(_.trim)
+          .filterNot(_ == "")
+      case _: HiveNativeCommand => answer.filterNot(nonDeterministicLine).filterNot(_ == "")
       case _: ExplainCommand => answer
       case _: DescribeCommand =>
         // Filter out non-deterministic lines and lines which do not have actual results but
         // can introduce problems because of the way Hive formats these lines.
         // Then, remove empty lines. Do not sort the results.
-        answer.filterNot(
-          r => nonDeterministicLine(r) || ignoredLine(r)).map(_.trim).filterNot(_ == "")
+        answer
+          .filterNot(r => nonDeterministicLine(r) || ignoredLine(r))
+          .map(_.replaceAll("from deserializer", ""))
+          .map(_.replaceAll("None", ""))
+          .map(_.trim)
+          .filterNot(_ == "")
       case plan => if (isSorted(plan)) answer else answer.sorted
     }
     orderedAnswer.map(cleanPaths)
@@ -164,6 +176,7 @@ abstract class HiveComparisonTest
     "last_modified_by",
     "last_modified_time",
     "Owner:",
+    "COLUMN_STATS_ACCURATE",
     // The following are hive specific schema parameters which we do not need to match exactly.
     "numFiles",
     "numRows",
@@ -237,6 +250,7 @@ abstract class HiveComparisonTest
           // the system to return the wrong answer.  Since we have no intention of mirroring their
           // previously broken behavior we simply filter out changes to this setting.
           .filterNot(_ contains "hive.outerjoin.supports.filters")
+          .filterNot(_ contains "hive.exec.post.hooks")
 
       if (allQueries != queryList)
         logWarning(s"Simplifications made on unsupported operations for test $testCaseName")
@@ -345,7 +359,7 @@ abstract class HiveComparisonTest
         (queryList, hiveResults, catalystResults).zipped.foreach {
           case (query, hive, (hiveQuery, catalyst)) =>
             // Check that the results match unless its an EXPLAIN query.
-            val preparedHive = prepareAnswer(hiveQuery,hive)
+            val preparedHive = prepareAnswer(hiveQuery, hive)
 
             if ((!hiveQuery.logical.isInstanceOf[ExplainCommand]) && preparedHive != catalyst) {
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
index 4ed58f4be1167..697211222b90c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
@@ -18,37 +18,60 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
-import org.apache.spark.sql.Row
 
 /**
  * A set of tests that validates support for Hive Explain command.
  */
 class HiveExplainSuite extends QueryTest {
-  private def check(sqlCmd: String, exists: Boolean, keywords: String*) {
-    val outputs = sql(sqlCmd).collect().map(_.getString(0)).mkString
-    for (key <- keywords) {
-      if (exists) {
-        assert(outputs.contains(key), s"Failed for $sqlCmd ($key doens't exist in result)")
-      } else {
-        assert(!outputs.contains(key), s"Failed for $sqlCmd ($key existed in the result)")
-      }
-    }
+  test("explain extended command") {
+    checkExistence(sql(" explain   select * from src where key=123 "), true,
+                   "== Physical Plan ==")
+    checkExistence(sql(" explain   select * from src where key=123 "), false,
+                   "== Parsed Logical Plan ==",
+                   "== Analyzed Logical Plan ==",
+                   "== Optimized Logical Plan ==")
+    checkExistence(sql(" explain   extended select * from src where key=123 "), true,
+                   "== Parsed Logical Plan ==",
+                   "== Analyzed Logical Plan ==",
+                   "== Optimized Logical Plan ==",
+                   "== Physical Plan ==",
+                   "Code Generation", "== RDD ==")
   }
 
-  test("explain extended command") {
-    check(" explain   select * from src where key=123 ", true,
-          "== Physical Plan ==")
-    check(" explain   select * from src where key=123 ", false,
-          "== Parsed Logical Plan ==",
-          "== Analyzed Logical Plan ==", 
-          "== Optimized Logical Plan ==")
-    check(" explain   extended select * from src where key=123 ", true,
-          "== Parsed Logical Plan ==", 
-          "== Analyzed Logical Plan ==", 
-          "== Optimized Logical Plan ==", 
-          "== Physical Plan ==", 
-          "Code Generation", "== RDD ==")
+  test("explain create table command") {
+    checkExistence(sql("explain create table temp__b as select * from src limit 2"), true,
+                   "== Physical Plan ==",
+                   "InsertIntoHiveTable",
+                   "Limit",
+                   "src")
+
+    checkExistence(sql("explain extended create table temp__b as select * from src limit 2"), true,
+      "== Parsed Logical Plan ==",
+      "== Analyzed Logical Plan ==",
+      "== Optimized Logical Plan ==",
+      "== Physical Plan ==",
+      "CreateTableAsSelect",
+      "InsertIntoHiveTable",
+      "Limit",
+      "src")
+
+    checkExistence(sql(
+      """
+        | EXPLAIN EXTENDED CREATE TABLE temp__b
+        | ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
+        | WITH SERDEPROPERTIES("serde_p1"="p1","serde_p2"="p2")
+        | STORED AS RCFile
+        | TBLPROPERTIES("tbl_p1"="p11", "tbl_p2"="p22")
+        | AS SELECT * FROM src LIMIT 2
+      """.stripMargin), true,
+      "== Parsed Logical Plan ==",
+      "== Analyzed Logical Plan ==",
+      "== Optimized Logical Plan ==",
+      "== Physical Plan ==",
+      "CreateTableAsSelect",
+      "InsertIntoHiveTable",
+      "Limit",
+      "src")
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HivePlanTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HivePlanTest.scala
new file mode 100644
index 0000000000000..c939e6e99d28a
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HivePlanTest.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.hive.test.TestHive
+
+class HivePlanTest extends QueryTest {
+  import TestHive._
+
+  test("udf constant folding") {
+    val optimized = sql("SELECT cos(null) FROM src").queryExecution.optimizedPlan
+    val correctAnswer = sql("SELECT cast(null as double) FROM src").queryExecution.optimizedPlan
+
+    comparePlans(optimized, correctAnswer)
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 3e100775e4981..407d6058c33ed 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -17,23 +17,110 @@
 
 package org.apache.spark.sql.hive.execution
 
+import java.io.File
+import java.util.{Locale, TimeZone}
+
+import org.scalatest.BeforeAndAfter
+
 import scala.util.Try
 
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkFiles, SparkException}
+import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.catalyst.plans.logical.Project
+import org.apache.spark.sql.Dsl._
 import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
-import org.apache.spark.sql.{Row, SchemaRDD}
 
 case class TestData(a: Int, b: String)
 
 /**
  * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
  */
-class HiveQuerySuite extends HiveComparisonTest {
+class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
+  private val originalTimeZone = TimeZone.getDefault
+  private val originalLocale = Locale.getDefault
+
+  import org.apache.spark.sql.hive.test.TestHive.implicits._
+
+  override def beforeAll() {
+    TestHive.cacheTables = true
+    // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
+    TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
+    // Add Locale setting
+    Locale.setDefault(Locale.US)
+  }
+
+  override def afterAll() {
+    TestHive.cacheTables = false
+    TimeZone.setDefault(originalTimeZone)
+    Locale.setDefault(originalLocale)
+  }
+
+  test("SPARK-4908: concurent hive native commands") {
+    (1 to 100).par.map { _ =>
+      sql("USE default")
+      sql("SHOW TABLES")
+    }
+  }
+
+  createQueryTest("! operator",
+    """
+      |SELECT a FROM (
+      |  SELECT 1 AS a FROM src LIMIT 1 UNION ALL
+      |  SELECT 2 AS a FROM src LIMIT 1) table
+      |WHERE !(a>1)
+    """.stripMargin)
+
+  createQueryTest("constant object inspector for generic udf",
+    """SELECT named_struct(
+      lower("AA"), "10",
+      repeat(lower("AA"), 3), "11",
+      lower(repeat("AA", 3)), "12",
+      printf("Bb%d", 12), "13",
+      repeat(printf("s%d", 14), 2), "14") FROM src LIMIT 1""")
+
+  createQueryTest("NaN to Decimal",
+    "SELECT CAST(CAST('NaN' AS DOUBLE) AS DECIMAL(1,1)) FROM src LIMIT 1")
+
+  createQueryTest("constant null testing",
+    """SELECT
+      |IF(FALSE, CAST(NULL AS STRING), CAST(1 AS STRING)) AS COL1,
+      |IF(TRUE, CAST(NULL AS STRING), CAST(1 AS STRING)) AS COL2,
+      |IF(FALSE, CAST(NULL AS INT), CAST(1 AS INT)) AS COL3,
+      |IF(TRUE, CAST(NULL AS INT), CAST(1 AS INT)) AS COL4,
+      |IF(FALSE, CAST(NULL AS DOUBLE), CAST(1 AS DOUBLE)) AS COL5,
+      |IF(TRUE, CAST(NULL AS DOUBLE), CAST(1 AS DOUBLE)) AS COL6,
+      |IF(FALSE, CAST(NULL AS BOOLEAN), CAST(1 AS BOOLEAN)) AS COL7,
+      |IF(TRUE, CAST(NULL AS BOOLEAN), CAST(1 AS BOOLEAN)) AS COL8,
+      |IF(FALSE, CAST(NULL AS BIGINT), CAST(1 AS BIGINT)) AS COL9,
+      |IF(TRUE, CAST(NULL AS BIGINT), CAST(1 AS BIGINT)) AS COL10,
+      |IF(FALSE, CAST(NULL AS FLOAT), CAST(1 AS FLOAT)) AS COL11,
+      |IF(TRUE, CAST(NULL AS FLOAT), CAST(1 AS FLOAT)) AS COL12,
+      |IF(FALSE, CAST(NULL AS SMALLINT), CAST(1 AS SMALLINT)) AS COL13,
+      |IF(TRUE, CAST(NULL AS SMALLINT), CAST(1 AS SMALLINT)) AS COL14,
+      |IF(FALSE, CAST(NULL AS TINYINT), CAST(1 AS TINYINT)) AS COL15,
+      |IF(TRUE, CAST(NULL AS TINYINT), CAST(1 AS TINYINT)) AS COL16,
+      |IF(FALSE, CAST(NULL AS BINARY), CAST("1" AS BINARY)) AS COL17,
+      |IF(TRUE, CAST(NULL AS BINARY), CAST("1" AS BINARY)) AS COL18,
+      |IF(FALSE, CAST(NULL AS DATE), CAST("1970-01-01" AS DATE)) AS COL19,
+      |IF(TRUE, CAST(NULL AS DATE), CAST("1970-01-01" AS DATE)) AS COL20,
+      |IF(FALSE, CAST(NULL AS TIMESTAMP), CAST(1 AS TIMESTAMP)) AS COL21,
+      |IF(TRUE, CAST(NULL AS TIMESTAMP), CAST(1 AS TIMESTAMP)) AS COL22,
+      |IF(FALSE, CAST(NULL AS DECIMAL), CAST(1 AS DECIMAL)) AS COL23,
+      |IF(TRUE, CAST(NULL AS DECIMAL), CAST(1 AS DECIMAL)) AS COL24
+      |FROM src LIMIT 1""".stripMargin)
+
+  createQueryTest("constant array",
+  """
+    |SELECT sort_array(
+    |  sort_array(
+    |    array("hadoop distributed file system",
+    |          "enterprise databases", "hadoop map-reduce")))
+    |FROM src LIMIT 1;
+  """.stripMargin)
 
   createQueryTest("count distinct 0 values",
     """
@@ -142,7 +229,7 @@ class HiveQuerySuite extends HiveComparisonTest {
   // Jdk version leads to different query output for double, so not use createQueryTest here
   test("division") {
     val res = sql("SELECT 2 / 1, 1 / 2, 1 / 3, 1 / COUNT(*) FROM src LIMIT 1").collect().head
-    Seq(2.0, 0.5, 0.3333333333333333, 0.002).zip(res).foreach( x =>
+    Seq(2.0, 0.5, 0.3333333333333333, 0.002).zip(res.toSeq).foreach( x =>
       assert(x._1 == x._2.asInstanceOf[Double]))
   }
 
@@ -151,7 +238,7 @@ class HiveQuerySuite extends HiveComparisonTest {
 
   test("Query expressed in SQL") {
     setConf("spark.sql.dialect", "sql")
-    assert(sql("SELECT 1").collect() === Array(Seq(1)))
+    assert(sql("SELECT 1").collect() === Array(Row(1)))
     setConf("spark.sql.dialect", "hiveql")
   }
 
@@ -168,8 +255,30 @@ class HiveQuerySuite extends HiveComparisonTest {
 
   createQueryTest("Cast Timestamp to Timestamp in UDF",
     """
-       | SELECT DATEDIFF(CAST(value AS timestamp), CAST('2002-03-21 00:00:00' AS timestamp))
-       | FROM src LIMIT 1
+      | SELECT DATEDIFF(CAST(value AS timestamp), CAST('2002-03-21 00:00:00' AS timestamp))
+      | FROM src LIMIT 1
+    """.stripMargin)
+
+  createQueryTest("Date comparison test 1",
+    """
+      | SELECT
+      | CAST(CAST('1970-01-01 22:00:00' AS timestamp) AS date) ==
+      | CAST(CAST('1970-01-01 23:00:00' AS timestamp) AS date)
+      | FROM src LIMIT 1
+    """.stripMargin)
+
+  createQueryTest("Date comparison test 2",
+    "SELECT CAST(CAST(0 AS timestamp) AS date) > CAST(0 AS timestamp) FROM src LIMIT 1")
+
+  createQueryTest("Date cast",
+    """
+      | SELECT
+      | CAST(CAST(0 AS timestamp) AS date),
+      | CAST(CAST(CAST(0 AS timestamp) AS date) AS string),
+      | CAST(0 AS timestamp),
+      | CAST(CAST(0 AS timestamp) AS string),
+      | CAST(CAST(CAST('1970-01-01 23:00:00' AS timestamp) AS date) AS timestamp)
+      | FROM src LIMIT 1
     """.stripMargin)
 
   createQueryTest("Simple Average",
@@ -244,16 +353,88 @@ class HiveQuerySuite extends HiveComparisonTest {
   createQueryTest("transform",
     "SELECT TRANSFORM (key) USING 'cat' AS (tKey) FROM src")
 
+  createQueryTest("schema-less transform",
+    """
+      |SELECT TRANSFORM (key, value) USING 'cat' FROM src;
+      |SELECT TRANSFORM (*) USING 'cat' FROM src;
+    """.stripMargin)
+
+  val delimiter = "'\t'"
+
+  createQueryTest("transform with custom field delimiter",
+    s"""
+      |SELECT TRANSFORM (key) ROW FORMAT DELIMITED FIELDS TERMINATED BY ${delimiter}
+      |USING 'cat' AS (tKey) ROW FORMAT DELIMITED FIELDS TERMINATED BY ${delimiter} FROM src;
+    """.stripMargin.replaceAll("\n", " "))
+
+  createQueryTest("transform with custom field delimiter2",
+    s"""
+      |SELECT TRANSFORM (key, value) ROW FORMAT DELIMITED FIELDS TERMINATED BY ${delimiter}
+      |USING 'cat' ROW FORMAT DELIMITED FIELDS TERMINATED BY ${delimiter} FROM src;
+    """.stripMargin.replaceAll("\n", " "))
+
+  createQueryTest("transform with custom field delimiter3",
+    s"""
+      |SELECT TRANSFORM (*) ROW FORMAT DELIMITED FIELDS TERMINATED BY ${delimiter}
+      |USING 'cat' ROW FORMAT DELIMITED FIELDS TERMINATED BY ${delimiter} FROM src;
+    """.stripMargin.replaceAll("\n", " "))
+
+  createQueryTest("transform with SerDe",
+    """
+      |SELECT TRANSFORM (key, value) ROW FORMAT SERDE
+      |'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+      |USING 'cat' AS (tKey, tValue) ROW FORMAT SERDE
+      |'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' FROM src;
+    """.stripMargin.replaceAll("\n", " "))
+
+  test("transform with SerDe2") {
+
+    sql("CREATE TABLE small_src(key INT, value STRING)")
+    sql("INSERT OVERWRITE TABLE small_src SELECT key, value FROM src LIMIT 10")
+
+    val expected = sql("SELECT key FROM small_src").collect().head
+    val res = sql(
+      """
+        |SELECT TRANSFORM (key) ROW FORMAT SERDE
+        |'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+        |WITH SERDEPROPERTIES ('avro.schema.literal'='{"namespace":
+        |"testing.hive.avro.serde","name": "src","type": "record","fields":
+        |[{"name":"key","type":"int"}]}') USING 'cat' AS (tKey INT) ROW FORMAT SERDE
+        |'org.apache.hadoop.hive.serde2.avro.AvroSerDe' WITH SERDEPROPERTIES
+        |('avro.schema.literal'='{"namespace": "testing.hive.avro.serde","name":
+        |"src","type": "record","fields": [{"name":"key","type":"int"}]}')
+        |FROM small_src
+      """.stripMargin.replaceAll("\n", " ")).collect().head
+
+    assert(expected(0) === res(0))
+  }
+
+  createQueryTest("transform with SerDe3",
+    """
+      |SELECT TRANSFORM (*) ROW FORMAT SERDE
+      |'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES
+      |('serialization.last.column.takes.rest'='true') USING 'cat' AS (tKey, tValue)
+      |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+      |WITH SERDEPROPERTIES ('serialization.last.column.takes.rest'='true') FROM src;
+    """.stripMargin.replaceAll("\n", " "))
+
+  createQueryTest("transform with SerDe4",
+    """
+      |SELECT TRANSFORM (*) ROW FORMAT SERDE
+      |'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES
+      |('serialization.last.column.takes.rest'='true') USING 'cat' ROW FORMAT SERDE
+      |'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES
+      |('serialization.last.column.takes.rest'='true') FROM src;
+    """.stripMargin.replaceAll("\n", " "))
+ 
   createQueryTest("LIKE",
     "SELECT * FROM src WHERE value LIKE '%1%'")
 
   createQueryTest("DISTINCT",
     "SELECT DISTINCT key, value FROM src")
 
-  ignore("empty aggregate input") {
-    createQueryTest("empty aggregate input",
-      "SELECT SUM(key) FROM (SELECT * FROM src LIMIT 0) a")
-  }
+  createQueryTest("empty aggregate input",
+    "SELECT SUM(key) FROM (SELECT * FROM src LIMIT 0) a")
 
   createQueryTest("lateral view1",
     "SELECT tbl.* FROM src LATERAL VIEW explode(array(1,2)) tbl as a")
@@ -285,7 +466,7 @@ class HiveQuerySuite extends HiveComparisonTest {
     sql("SELECT * FROM src TABLESAMPLE(0.1 PERCENT) s")
   }
 
-  test("SchemaRDD toString") {
+  test("DataFrame toString") {
     sql("SHOW TABLES").toString
     sql("SELECT * FROM src").toString
   }
@@ -344,6 +525,15 @@ class HiveQuerySuite extends HiveComparisonTest {
   createQueryTest("select null from table",
     "SELECT null FROM src LIMIT 1")
 
+  test("predicates contains an empty AttributeSet() references") {
+    sql(
+      """
+        |SELECT a FROM (
+        |  SELECT 1 AS a FROM src LIMIT 1 ) table
+        |WHERE abs(20141202) is not null
+      """.stripMargin).collect()
+  }
+
   test("implement identity function using case statement") {
     val actual = sql("SELECT (CASE key WHEN key THEN key END) FROM src")
       .map { case Row(i: Int) => i }
@@ -376,18 +566,18 @@ class HiveQuerySuite extends HiveComparisonTest {
         TestData(2, "str2") :: Nil)
     testData.registerTempTable("REGisteredTABle")
 
-    assertResult(Array(Array(2, "str2"))) {
+    assertResult(Array(Row(2, "str2"))) {
       sql("SELECT tablealias.A, TABLEALIAS.b FROM reGisteredTABle TableAlias " +
         "WHERE TableAliaS.a > 1").collect()
     }
   }
 
-  def isExplanation(result: SchemaRDD) = {
+  def isExplanation(result: DataFrame) = {
     val explanation = result.select('plan).collect().map { case Row(plan: String) => plan }
     explanation.contains("== Physical Plan ==")
   }
 
-  test("SPARK-1704: Explain commands as a SchemaRDD") {
+  test("SPARK-1704: Explain commands as a DataFrame") {
     sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
 
     val rdd = sql("explain select key, count(value) from src group by key")
@@ -417,6 +607,25 @@ class HiveQuerySuite extends HiveComparisonTest {
     assert(sql("select key from src having key > 490").collect().size < 100)
   }
 
+  test("SPARK-5383 alias for udfs with multi output columns") {
+    assert(
+      sql("select stack(2, key, value, key, value) as (a, b) from src limit 5")
+        .collect()
+        .size == 5)
+
+    assert(
+      sql("select a, b from (select stack(2, key, value, key, value) as (a, b) from src) t limit 5")
+        .collect()
+        .size == 5)
+  }
+
+  test("SPARK-5367: resolve star expression in udf") {
+    assert(sql("select concat(*) from src limit 5").collect().size == 5)
+    assert(sql("select array(*) from src limit 5").collect().size == 5)
+    assert(sql("select concat(key, *) from src limit 5").collect().size == 5)
+    assert(sql("select array(key, *) from src limit 5").collect().size == 5)
+  }
+
   test("Query Hive native command execution result") {
     val tableName = "test_native_commands"
 
@@ -462,12 +671,12 @@ class HiveQuerySuite extends HiveComparisonTest {
     // Describe a table
     assertResult(
       Array(
-        Array("key", "int", null),
-        Array("value", "string", null),
-        Array("dt", "string", null),
-        Array("# Partition Information", "", ""),
-        Array("# col_name", "data_type", "comment"),
-        Array("dt", "string", null))
+        Row("key", "int", null),
+        Row("value", "string", null),
+        Row("dt", "string", null),
+        Row("# Partition Information", "", ""),
+        Row("# col_name", "data_type", "comment"),
+        Row("dt", "string", null))
     ) {
       sql("DESCRIBE test_describe_commands1")
         .select('col_name, 'data_type, 'comment)
@@ -477,12 +686,12 @@ class HiveQuerySuite extends HiveComparisonTest {
     // Describe a table with a fully qualified table name
     assertResult(
       Array(
-        Array("key", "int", null),
-        Array("value", "string", null),
-        Array("dt", "string", null),
-        Array("# Partition Information", "", ""),
-        Array("# col_name", "data_type", "comment"),
-        Array("dt", "string", null))
+        Row("key", "int", null),
+        Row("value", "string", null),
+        Row("dt", "string", null),
+        Row("# Partition Information", "", ""),
+        Row("# col_name", "data_type", "comment"),
+        Row("dt", "string", null))
     ) {
       sql("DESCRIBE default.test_describe_commands1")
         .select('col_name, 'data_type, 'comment)
@@ -508,19 +717,19 @@ class HiveQuerySuite extends HiveComparisonTest {
     // Describe a partition is a native command
     assertResult(
       Array(
-        Array("key", "int", "None"),
-        Array("value", "string", "None"),
-        Array("dt", "string", "None"),
-        Array("", "", ""),
-        Array("# Partition Information", "", ""),
+        Array("key", "int"),
+        Array("value", "string"),
+        Array("dt", "string"),
+        Array(""),
+        Array("# Partition Information"),
         Array("# col_name", "data_type", "comment"),
-        Array("", "", ""),
-        Array("dt", "string", "None"))
+        Array(""),
+        Array("dt", "string"))
     ) {
       sql("DESCRIBE test_describe_commands1 PARTITION (dt='2008-06-08')")
         .select('result)
         .collect()
-        .map(_.getString(0).split("\t").map(_.trim))
+        .map(_.getString(0).replaceAll("None", "").trim.split("\t").map(_.trim))
     }
 
     // Describe a registered temporary table.
@@ -532,9 +741,8 @@ class HiveQuerySuite extends HiveComparisonTest {
 
     assertResult(
       Array(
-        Array("# Registered as a temporary table", null, null),
-        Array("a", "IntegerType", null),
-        Array("b", "StringType", null))
+        Row("a", "IntegerType", null),
+        Row("b", "StringType", null))
     ) {
       sql("DESCRIBE test_describe_commands2")
         .select('col_name, 'data_type, 'comment)
@@ -561,14 +769,29 @@ class HiveQuerySuite extends HiveComparisonTest {
           |WITH serdeproperties('s1'='9')
         """.stripMargin)
     }
-    sql(s"ADD JAR $testJar")
-    sql(
-      """ALTER TABLE alter1 SET SERDE 'org.apache.hadoop.hive.serde2.TestSerDe'
-        |WITH serdeproperties('s1'='9')
-      """.stripMargin)
+    // Now only verify 0.12.0, and ignore other versions due to binary compatibility
+    // current TestSerDe.jar is from 0.12.0
+    if (HiveShim.version == "0.12.0") {
+      sql(s"ADD JAR $testJar")
+      sql(
+        """ALTER TABLE alter1 SET SERDE 'org.apache.hadoop.hive.serde2.TestSerDe'
+          |WITH serdeproperties('s1'='9')
+        """.stripMargin)
+    }
     sql("DROP TABLE alter1")
   }
 
+  test("ADD FILE command") {
+    val testFile = TestHive.getHiveFile("data/files/v1.txt").getCanonicalFile
+    sql(s"ADD FILE $testFile")
+
+    val checkAddFileRDD = sparkContext.parallelize(1 to 2, 1).mapPartitions { _ =>
+      Iterator.single(new File(SparkFiles.get("v1.txt")).canRead)
+    }
+
+    assert(checkAddFileRDD.first())
+  }
+
   case class LogEntry(filename: String, message: String)
   case class LogFile(name: String)
 
@@ -737,14 +960,14 @@ class HiveQuerySuite extends HiveComparisonTest {
     val testVal = "test.val.0"
     val nonexistentKey = "nonexistent"
     val KV = "([^=]+)=([^=]*)".r
-    def collectResults(rdd: SchemaRDD): Set[(String, String)] =
+    def collectResults(rdd: DataFrame): Set[(String, String)] =
       rdd.collect().map {
         case Row(key: String, value: String) => key -> value
         case Row(KV(key, value)) => key -> value
       }.toSet
-    clear()
+    conf.clear()
 
-    // "set" itself returns all config variables currently specified in SQLConf.
+    // "SET" itself returns all config variables currently specified in SQLConf.
     // TODO: Should we be listing the default here always? probably...
     assert(sql("SET").collect().size == 0)
 
@@ -753,44 +976,19 @@ class HiveQuerySuite extends HiveComparisonTest {
     }
 
     assert(hiveconf.get(testKey, "") == testVal)
-    assertResult(Set(testKey -> testVal)) {
-      collectResults(sql("SET"))
-    }
+    assertResult(Set(testKey -> testVal))(collectResults(sql("SET")))
+    assertResult(Set(testKey -> testVal))(collectResults(sql("SET -v")))
 
     sql(s"SET ${testKey + testKey}=${testVal + testVal}")
     assert(hiveconf.get(testKey + testKey, "") == testVal + testVal)
     assertResult(Set(testKey -> testVal, (testKey + testKey) -> (testVal + testVal))) {
       collectResults(sql("SET"))
     }
-
-    // "set key"
-    assertResult(Set(testKey -> testVal)) {
-      collectResults(sql(s"SET $testKey"))
-    }
-
-    assertResult(Set(nonexistentKey -> "<undefined>")) {
-      collectResults(sql(s"SET $nonexistentKey"))
-    }
-
-    // Assert that sql() should have the same effects as sql() by repeating the above using sql().
-    clear()
-    assert(sql("SET").collect().size == 0)
-
-    assertResult(Set(testKey -> testVal)) {
-      collectResults(sql(s"SET $testKey=$testVal"))
-    }
-
-    assert(hiveconf.get(testKey, "") == testVal)
-    assertResult(Set(testKey -> testVal)) {
-      collectResults(sql("SET"))
-    }
-
-    sql(s"SET ${testKey + testKey}=${testVal + testVal}")
-    assert(hiveconf.get(testKey + testKey, "") == testVal + testVal)
     assertResult(Set(testKey -> testVal, (testKey + testKey) -> (testVal + testVal))) {
-      collectResults(sql("SET"))
+      collectResults(sql("SET -v"))
     }
 
+    // "SET key"
     assertResult(Set(testKey -> testVal)) {
       collectResults(sql(s"SET $testKey"))
     }
@@ -799,12 +997,12 @@ class HiveQuerySuite extends HiveComparisonTest {
       collectResults(sql(s"SET $nonexistentKey"))
     }
 
-    clear()
+    conf.clear()
   }
 
   createQueryTest("select from thrift based table",
     "SELECT * from src_thrift")
-  
+
   // Put tests that depend on specific Hive settings before these last two test,
   // since they modify /clear stuff.
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
index ee9d08ff75450..178ece447758e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.hive.test.TestHive
-import org.apache.spark.sql.hive.test.TestHive._
+import org.apache.spark.sql.hive.test.TestHive.{sparkContext, sql}
+import org.apache.spark.sql.hive.test.TestHive.implicits._
 
 case class Nested(a: Int, B: Int)
 case class Data(a: Int, B: Int, n: Nested, nestedArray: Seq[Nested])
@@ -27,6 +28,17 @@ case class Data(a: Int, B: Int, n: Nested, nestedArray: Seq[Nested])
  * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
  */
 class HiveResolutionSuite extends HiveComparisonTest {
+
+  case class NestedData(a: Seq[NestedData2], B: NestedData2)
+  case class NestedData2(a: NestedData3, B: NestedData3)
+  case class NestedData3(a: Int, B: Int)
+
+  test("SPARK-3698: case insensitive test for nested data") {
+    sparkContext.makeRDD(Seq.empty[NestedData]).registerTempTable("nested")
+    // This should be successfully analyzed
+    sql("SELECT a[0].A.A from nested").queryExecution.analyzed
+  }
+
   createQueryTest("table.attr",
     "SELECT src.key FROM src ORDER BY key LIMIT 1")
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index c5736723b47c0..8fb5e050a237a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -17,6 +17,13 @@
 
 package org.apache.spark.sql.hive.execution
 
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.Dsl._
+import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.hive.test.TestHive._
+
+import org.apache.spark.util.Utils
+
 class HiveTableScanSuite extends HiveComparisonTest {
 
   createQueryTest("partition_based_table_scan_with_different_serde",
@@ -38,4 +45,48 @@ class HiveTableScanSuite extends HiveComparisonTest {
       |
       |SELECT * from part_scan_test;
     """.stripMargin)
+
+  // In unit test, kv1.txt is a small file and will be loaded as table src
+  // Since the small file will be considered as a single split, we assume
+  // Hive / SparkSQL HQL has the same output even for SORT BY
+  createQueryTest("file_split_for_small_table",
+    """
+      |SELECT key, value FROM src SORT BY key, value
+    """.stripMargin)
+
+  test("Spark-4041: lowercase issue") {
+    TestHive.sql("CREATE TABLE tb (KEY INT, VALUE STRING) STORED AS ORC")
+    TestHive.sql("insert into table tb select key, value from src")
+    TestHive.sql("select KEY from tb where VALUE='just_for_test' limit 5").collect()
+    TestHive.sql("drop table tb")
+  }
+  
+  test("Spark-4077: timestamp query for null value") {
+    TestHive.sql("DROP TABLE IF EXISTS timestamp_query_null")
+    TestHive.sql(
+      """
+        CREATE EXTERNAL TABLE timestamp_query_null (time TIMESTAMP,id INT)
+        ROW FORMAT DELIMITED
+        FIELDS TERMINATED BY ','
+        LINES TERMINATED BY '\n'
+      """.stripMargin)
+    val location = 
+      Utils.getSparkClassLoader.getResource("data/files/issue-4077-data.txt").getFile()
+     
+    TestHive.sql(s"LOAD DATA LOCAL INPATH '$location' INTO TABLE timestamp_query_null")
+    assert(TestHive.sql("SELECT time from timestamp_query_null limit 2").collect() 
+      === Array(Row(java.sql.Timestamp.valueOf("2014-12-11 00:00:00")),Row(null)))
+    TestHive.sql("DROP TABLE timestamp_query_null")
+  }
+
+  test("Spark-4959 Attributes are case sensitive when using a select query from a projection") {
+    sql("create table spark_4959 (col1 string)")
+    sql("""insert into table spark_4959 select "hi" from src limit 1""")
+    table("spark_4959").select(
+      'col1.as("CaseSensitiveColName"),
+      'col1.as("CaseSensitiveColName2")).registerTempTable("spark_4959_2")
+
+    assert(sql("select CaseSensitiveColName from spark_4959_2").head() === Row("hi"))
+    assert(sql("select casesensitivecolname from spark_4959_2").head() === Row("hi"))
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala
index 48fffe53cf2ff..ab0e0443c7faa 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala
@@ -57,4 +57,10 @@ class HiveTypeCoercionSuite extends HiveComparisonTest {
     }
     assert(numEquals === 1)
   }
+
+  test("COALESCE with different types") {
+    intercept[RuntimeException] {
+      TestHive.sql("""SELECT COALESCE(1, true, "abc") FROM src limit 1""").collect()
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala
index 872f28d514efe..1e99003d3e9b5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala
@@ -47,10 +47,12 @@ case class ListStringCaseClass(l: Seq[String])
  * A test suite for Hive custom UDFs.
  */
 class HiveUdfSuite extends QueryTest {
-  import TestHive._
+
+  import TestHive.{udf, sql}
+  import TestHive.implicits._
 
   test("spark sql udf test that returns a struct") {
-    registerFunction("getStruct", (_: Int) => Fields(1, 2, 3, 4, 5))
+    udf.register("getStruct", (_: Int) => Fields(1, 2, 3, 4, 5))
     assert(sql(
       """
         |SELECT getStruct(1).f1,
@@ -58,7 +60,14 @@ class HiveUdfSuite extends QueryTest {
         |       getStruct(1).f3,
         |       getStruct(1).f4,
         |       getStruct(1).f5 FROM src LIMIT 1
-      """.stripMargin).first() === Row(1, 2, 3, 4, 5))
+      """.stripMargin).head() === Row(1, 2, 3, 4, 5))
+  }
+  
+  test("SPARK-4785 When called with arguments referring column fields, PMOD throws NPE") {
+    checkAnswer(
+      sql("SELECT PMOD(CAST(key as INT), 10) FROM src LIMIT 1"),
+      Row(8)
+    )
   }
 
   test("hive struct udf") {
@@ -85,10 +94,21 @@ class HiveUdfSuite extends QueryTest {
   }
 
   test("SPARK-2693 udaf aggregates test") {
-    checkAnswer(sql("SELECT percentile(key,1) FROM src LIMIT 1"),
+    checkAnswer(sql("SELECT percentile(key, 1) FROM src LIMIT 1"),
       sql("SELECT max(key) FROM src").collect().toSeq)
+      
+    checkAnswer(sql("SELECT percentile(key, array(1, 1)) FROM src LIMIT 1"),
+      sql("SELECT array(max(key), max(key)) FROM src").collect().toSeq)
   }
 
+  test("Generic UDAF aggregates") {
+    checkAnswer(sql("SELECT ceiling(percentile_approx(key, 0.99999)) FROM src LIMIT 1"),
+      sql("SELECT max(key) FROM src LIMIT 1").collect().toSeq)
+      
+    checkAnswer(sql("SELECT percentile_approx(100.0, array(0.9, 0.9)) FROM src LIMIT 1"),
+      sql("SELECT array(100, 100) FROM src LIMIT 1").collect().toSeq)
+   }
+  
   test("UDFIntegerToString") {
     val testData = TestHive.sparkContext.parallelize(
       IntegerCaseClass(1) :: IntegerCaseClass(2) :: Nil)
@@ -97,7 +117,7 @@ class HiveUdfSuite extends QueryTest {
     sql(s"CREATE TEMPORARY FUNCTION testUDFIntegerToString AS '${classOf[UDFIntegerToString].getName}'")
     checkAnswer(
       sql("SELECT testUDFIntegerToString(i) FROM integerTable"), //.collect(),
-      Seq(Seq("1"), Seq("2")))
+      Seq(Row("1"), Row("2")))
     sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFIntegerToString")
 
     TestHive.reset()
@@ -113,7 +133,7 @@ class HiveUdfSuite extends QueryTest {
     sql(s"CREATE TEMPORARY FUNCTION testUDFListListInt AS '${classOf[UDFListListInt].getName}'")
     checkAnswer(
       sql("SELECT testUDFListListInt(lli) FROM listListIntTable"), //.collect(),
-      Seq(Seq(0), Seq(2), Seq(13)))
+      Seq(Row(0), Row(2), Row(13)))
     sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFListListInt")
 
     TestHive.reset()
@@ -128,7 +148,7 @@ class HiveUdfSuite extends QueryTest {
     sql(s"CREATE TEMPORARY FUNCTION testUDFListString AS '${classOf[UDFListString].getName}'")
     checkAnswer(
       sql("SELECT testUDFListString(l) FROM listStringTable"), //.collect(),
-      Seq(Seq("a,b,c"), Seq("d,e")))
+      Seq(Row("a,b,c"), Row("d,e")))
     sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFListString")
 
     TestHive.reset()
@@ -142,7 +162,7 @@ class HiveUdfSuite extends QueryTest {
     sql(s"CREATE TEMPORARY FUNCTION testStringStringUdf AS '${classOf[UDFStringString].getName}'")
     checkAnswer(
       sql("SELECT testStringStringUdf(\"hello\", s) FROM stringTable"), //.collect(),
-      Seq(Seq("hello world"), Seq("hello goodbye")))
+      Seq(Row("hello world"), Row("hello goodbye")))
     sql("DROP TEMPORARY FUNCTION IF EXISTS testStringStringUdf")
 
     TestHive.reset()
@@ -159,7 +179,7 @@ class HiveUdfSuite extends QueryTest {
     sql(s"CREATE TEMPORARY FUNCTION testUDFTwoListList AS '${classOf[UDFTwoListList].getName}'")
     checkAnswer(
       sql("SELECT testUDFTwoListList(lli, lli) FROM TwoListTable"), //.collect(),
-      Seq(Seq("0, 0"), Seq("2, 2"), Seq("13, 13")))
+      Seq(Row("0, 0"), Row("2, 2"), Row("13, 13")))
     sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFTwoListList")
 
     TestHive.reset()
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index fbe6ac765c009..22310ffadd25e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -21,6 +21,9 @@ import org.apache.spark.sql.QueryTest
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.hive.test.TestHive._
+import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
+import org.apache.spark.sql.hive.HiveShim
 
 case class Nested1(f1: Nested2)
 case class Nested2(f2: Nested3)
@@ -32,6 +35,123 @@ case class Nested3(f3: Int)
  * valid, but Hive currently cannot execute it.
  */
 class SQLQuerySuite extends QueryTest {
+
+  import org.apache.spark.sql.hive.test.TestHive.implicits._
+
+  test("SPARK-4512 Fix attribute reference resolution error when using SORT BY") {
+    checkAnswer(
+      sql("SELECT * FROM (SELECT key + key AS a FROM src SORT BY value) t ORDER BY t.a"),
+      sql("SELECT key + key as a FROM src ORDER BY a").collect().toSeq
+    )
+  }
+
+  test("CTAS with serde") {
+    sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value").collect()
+    sql(
+      """CREATE TABLE ctas2
+        | ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
+        | WITH SERDEPROPERTIES("serde_p1"="p1","serde_p2"="p2")
+        | STORED AS RCFile
+        | TBLPROPERTIES("tbl_p1"="p11", "tbl_p2"="p22")
+        | AS
+        |   SELECT key, value
+        |   FROM src
+        |   ORDER BY key, value""".stripMargin).collect()
+    sql(
+      """CREATE TABLE ctas3
+        | ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\012'
+        | STORED AS textfile AS
+        |   SELECT key, value
+        |   FROM src
+        |   ORDER BY key, value""".stripMargin).collect()
+
+    // the table schema may like (key: integer, value: string)
+    sql(
+      """CREATE TABLE IF NOT EXISTS ctas4 AS
+        | SELECT 1 AS key, value FROM src LIMIT 1""".stripMargin).collect()
+    // do nothing cause the table ctas4 already existed.
+    sql(
+      """CREATE TABLE IF NOT EXISTS ctas4 AS
+        | SELECT key, value FROM src ORDER BY key, value""".stripMargin).collect()
+
+    checkAnswer(
+      sql("SELECT k, value FROM ctas1 ORDER BY k, value"),
+      sql("SELECT key, value FROM src ORDER BY key, value").collect().toSeq)
+    checkAnswer(
+      sql("SELECT key, value FROM ctas2 ORDER BY key, value"),
+      sql(
+        """
+          SELECT key, value
+          FROM src
+          ORDER BY key, value""").collect().toSeq)
+    checkAnswer(
+      sql("SELECT key, value FROM ctas3 ORDER BY key, value"),
+      sql(
+        """
+          SELECT key, value
+          FROM src
+          ORDER BY key, value""").collect().toSeq)
+    intercept[org.apache.hadoop.hive.metastore.api.AlreadyExistsException] {
+      sql(
+        """CREATE TABLE ctas4 AS
+          | SELECT key, value FROM src ORDER BY key, value""".stripMargin).collect()
+    }
+    checkAnswer(
+      sql("SELECT key, value FROM ctas4 ORDER BY key, value"),
+      sql("SELECT key, value FROM ctas4 LIMIT 1").collect().toSeq)
+
+    checkExistence(sql("DESC EXTENDED ctas2"), true,
+      "name:key", "type:string", "name:value", "ctas2",
+      "org.apache.hadoop.hive.ql.io.RCFileInputFormat",
+      "org.apache.hadoop.hive.ql.io.RCFileOutputFormat",
+      "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe",
+      "serde_p1=p1", "serde_p2=p2", "tbl_p1=p11", "tbl_p2=p22","MANAGED_TABLE"
+    )
+
+    if (HiveShim.version =="0.13.1") {
+      sql(
+        """CREATE TABLE ctas5
+          | STORED AS parquet AS
+          |   SELECT key, value
+          |   FROM src
+          |   ORDER BY key, value""".stripMargin).collect
+
+      checkExistence(sql("DESC EXTENDED ctas5"), true,
+        "name:key", "type:string", "name:value", "ctas5",
+        "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
+        "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
+        "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
+        "MANAGED_TABLE"
+      )
+
+      val default = getConf("spark.sql.hive.convertMetastoreParquet", "true")
+      // use the Hive SerDe for parquet tables
+      sql("set spark.sql.hive.convertMetastoreParquet = false")
+      checkAnswer(
+        sql("SELECT key, value FROM ctas5 ORDER BY key, value"),
+        sql("SELECT key, value FROM src ORDER BY key, value").collect().toSeq)
+      sql(s"set spark.sql.hive.convertMetastoreParquet = $default")
+    }
+  }
+
+  test("command substitution") {
+    sql("set tbl=src")
+    checkAnswer(
+      sql("SELECT key FROM ${hiveconf:tbl} ORDER BY key, value limit 1"),
+      sql("SELECT key FROM src ORDER BY key, value limit 1").collect().toSeq)
+
+    sql("set hive.variable.substitute=false") // disable the substitution
+    sql("set tbl2=src")
+    intercept[Exception] {
+      sql("SELECT key FROM ${hiveconf:tbl2} ORDER BY key, value limit 1").collect()
+    }
+
+    sql("set hive.variable.substitute=true") // enable the substitution
+    checkAnswer(
+      sql("SELECT key FROM ${hiveconf:tbl2} ORDER BY key, value limit 1"),
+      sql("SELECT key FROM src ORDER BY key, value limit 1").collect().toSeq)
+  }
+
   test("ordering not in select") {
     checkAnswer(
       sql("SELECT key FROM src ORDER BY value"),
@@ -54,16 +174,38 @@ class SQLQuerySuite extends QueryTest {
     sparkContext.parallelize(Nested1(Nested2(Nested3(1))) :: Nil).registerTempTable("nested")
     checkAnswer(
       sql("SELECT f1.f2.f3 FROM nested"),
-      1)
+      Row(1))
+    checkAnswer(sql("CREATE TABLE test_ctas_1234 AS SELECT * from nested"),
+      Seq.empty[Row])
+    checkAnswer(
+      sql("SELECT * FROM test_ctas_1234"),
+      sql("SELECT * FROM nested").collect().toSeq)
+
+    intercept[org.apache.hadoop.hive.ql.metadata.InvalidTableException] {
+      sql("CREATE TABLE test_ctas_12345 AS SELECT * from notexists").collect()
+    }
   }
 
   test("test CTAS") {
     checkAnswer(sql("CREATE TABLE test_ctas_123 AS SELECT key, value FROM src"), Seq.empty[Row])
     checkAnswer(
-      sql("SELECT key, value FROM test_ctas_123 ORDER BY key"), 
+      sql("SELECT key, value FROM test_ctas_123 ORDER BY key"),
       sql("SELECT key, value FROM src ORDER BY key").collect().toSeq)
   }
 
+  test("SPARK-4825 save join to table") {
+    val testData = sparkContext.parallelize(1 to 10).map(i => TestData(i, i.toString))
+    sql("CREATE TABLE test1 (key INT, value STRING)")
+    testData.insertInto("test1")
+    sql("CREATE TABLE test2 (key INT, value STRING)")
+    testData.insertInto("test2")
+    testData.insertInto("test2")
+    sql("CREATE TABLE test AS SELECT COUNT(a.value) FROM test1 a JOIN test2 b ON a.key = b.key")
+    checkAnswer(
+      table("test"),
+      sql("SELECT COUNT(a.value) FROM test1 a JOIN test2 b ON a.key = b.key").collect().toSeq)
+  }
+
   test("SPARK-3708 Backticks aren't handled correctly is aliases") {
     checkAnswer(
       sql("SELECT k FROM (SELECT `key` AS `k` FROM src) a"),
@@ -75,4 +217,99 @@ class SQLQuerySuite extends QueryTest {
       sql("SELECT a.key FROM (SELECT key FROM src) `a`"),
       sql("SELECT `key` FROM src").collect().toSeq)
   }
+
+  test("SPARK-3814 Support Bitwise & operator") {
+    checkAnswer(
+      sql("SELECT case when 1&1=1 then 1 else 0 end FROM src"),
+      sql("SELECT 1 FROM src").collect().toSeq)
+  }
+
+  test("SPARK-3814 Support Bitwise | operator") {
+    checkAnswer(
+      sql("SELECT case when 1|0=1 then 1 else 0 end FROM src"),
+      sql("SELECT 1 FROM src").collect().toSeq)
+  }
+
+  test("SPARK-3814 Support Bitwise ^ operator") {
+    checkAnswer(
+      sql("SELECT case when 1^0=1 then 1 else 0 end FROM src"),
+      sql("SELECT 1 FROM src").collect().toSeq)
+  }
+
+  test("SPARK-3814 Support Bitwise ~ operator") {
+    checkAnswer(
+      sql("SELECT case when ~1=-2 then 1 else 0 end FROM src"),
+      sql("SELECT 1 FROM src").collect().toSeq)
+  }
+
+  test("SPARK-4154 Query does not work if it has 'not between' in Spark SQL and HQL") {
+    checkAnswer(sql("SELECT key FROM src WHERE key not between 0 and 10 order by key"),
+      sql("SELECT key FROM src WHERE key between 11 and 500 order by key").collect().toSeq)
+  }
+
+  test("SPARK-2554 SumDistinct partial aggregation") {
+    checkAnswer(sql("SELECT sum( distinct key) FROM src group by key order by key"),
+      sql("SELECT distinct key FROM src order by key").collect().toSeq)
+  }
+
+  test("SPARK-4963 DataFrame sample on mutable row return wrong result") {
+    sql("SELECT * FROM src WHERE key % 2 = 0")
+      .sample(withReplacement = false, fraction = 0.3)
+      .registerTempTable("sampled")
+    (1 to 10).foreach { i =>
+      checkAnswer(
+        sql("SELECT * FROM sampled WHERE key % 2 = 1"),
+        Seq.empty[Row])
+    }
+  }
+
+  test("SPARK-5284 Insert into Hive throws NPE when a inner complex type field has a null value") {
+    val schema = StructType(
+      StructField("s",
+        StructType(
+          StructField("innerStruct", StructType(StructField("s1", StringType, true) :: Nil)) ::
+            StructField("innerArray", ArrayType(IntegerType), true) ::
+            StructField("innerMap", MapType(StringType, IntegerType)) :: Nil), true) :: Nil)
+    val row = Row(Row(null, null, null))
+
+    val rowRdd = sparkContext.parallelize(row :: Nil)
+
+    applySchema(rowRdd, schema).registerTempTable("testTable")
+
+    sql(
+      """CREATE TABLE nullValuesInInnerComplexTypes
+        |  (s struct<innerStruct: struct<s1:string>,
+        |            innerArray:array<int>,
+        |            innerMap: map<string, int>>)
+      """.stripMargin).collect()
+
+    sql(
+      """
+        |INSERT OVERWRITE TABLE nullValuesInInnerComplexTypes
+        |SELECT * FROM testTable
+      """.stripMargin)
+
+    checkAnswer(
+      sql("SELECT * FROM nullValuesInInnerComplexTypes"),
+      Row(Row(null, null, null))
+    )
+
+    sql("DROP TABLE nullValuesInInnerComplexTypes")
+    dropTempTable("testTable")
+  }
+
+  test("SPARK-4296 Grouping field with Hive UDF as sub expression") {
+    val rdd = sparkContext.makeRDD( """{"a": "str", "b":"1", "c":"1970-01-01 00:00:00"}""" :: Nil)
+    jsonRDD(rdd).registerTempTable("data")
+    checkAnswer(
+      sql("SELECT concat(a, '-', b), year(c) FROM data GROUP BY concat(a, '-', b), year(c)"),
+      Row("str-1", 1970))
+
+    dropTempTable("data")
+
+    jsonRDD(rdd).registerTempTable("data")
+    checkAnswer(sql("SELECT year(c) + 1 FROM data GROUP BY year(c) + 1"), Row(1971))
+
+    dropTempTable("data")
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala
index 6f57fe8958387..581f666399492 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala
@@ -17,103 +17,66 @@
 
 package org.apache.spark.sql.parquet
 
-import java.io.File
-
-import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
-
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Row}
-import org.apache.spark.sql.catalyst.types.{DataType, StringType, IntegerType}
-import org.apache.spark.sql.{parquet, SchemaRDD}
-import org.apache.spark.util.Utils
-
-// Implicits
-import org.apache.spark.sql.hive.test.TestHive._
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.hive.test.TestHive
 
 case class Cases(lower: String, UPPER: String)
 
-class HiveParquetSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAfterEach {
-
-  val dirname = Utils.createTempDir()
-
-  var testRDD: SchemaRDD = null
-
-  override def beforeAll() {
-    // write test data
-    ParquetTestData.writeFile()
-    testRDD = parquetFile(ParquetTestData.testDir.toString)
-    testRDD.registerTempTable("testsource")
-  }
-
-  override def afterAll() {
-    Utils.deleteRecursively(ParquetTestData.testDir)
-    Utils.deleteRecursively(dirname)
-    reset() // drop all tables that were registered as part of the tests
-  }
-
-  // in case tests are failing we delete before and after each test
-  override def beforeEach() {
-    Utils.deleteRecursively(dirname)
-  }
+class HiveParquetSuite extends QueryTest with ParquetTest {
+  val sqlContext = TestHive
 
-  override def afterEach() {
-    Utils.deleteRecursively(dirname)
-  }
+  import sqlContext._
 
   test("Case insensitive attribute names") {
-    val tempFile = File.createTempFile("parquet", "")
-    tempFile.delete()
-    sparkContext.parallelize(1 to 10)
-      .map(_.toString)
-      .map(i => Cases(i, i))
-      .saveAsParquetFile(tempFile.getCanonicalPath)
-
-    parquetFile(tempFile.getCanonicalPath).registerTempTable("cases")
-    sql("SELECT upper FROM cases").collect().map(_.getString(0)) === (1 to 10).map(_.toString)
-    sql("SELECT LOWER FROM cases").collect().map(_.getString(0)) === (1 to 10).map(_.toString)
+    withParquetTable((1 to 4).map(i => Cases(i.toString, i.toString)), "cases") {
+      val expected = (1 to 4).map(i => Row(i.toString))
+      checkAnswer(sql("SELECT upper FROM cases"), expected)
+      checkAnswer(sql("SELECT LOWER FROM cases"), expected)
+    }
   }
 
   test("SELECT on Parquet table") {
-    val rdd = sql("SELECT * FROM testsource").collect()
-    assert(rdd != null)
-    assert(rdd.forall(_.size == 6))
+    val data = (1 to 4).map(i => (i, s"val_$i"))
+    withParquetTable(data, "t") {
+      checkAnswer(sql("SELECT * FROM t"), data.map(Row.fromTuple))
+    }
   }
 
   test("Simple column projection + filter on Parquet table") {
-    val rdd = sql("SELECT myboolean, mylong FROM testsource WHERE myboolean=true").collect()
-    assert(rdd.size === 5, "Filter returned incorrect number of rows")
-    assert(rdd.forall(_.getBoolean(0)), "Filter returned incorrect Boolean field value")
+    withParquetTable((1 to 4).map(i => (i % 2 == 0, i, s"val_$i")), "t") {
+      checkAnswer(
+        sql("SELECT `_1`, `_3` FROM t WHERE `_1` = true"),
+        Seq(Row(true, "val_2"), Row(true, "val_4")))
+    }
   }
 
   test("Converting Hive to Parquet Table via saveAsParquetFile") {
-    sql("SELECT * FROM src").saveAsParquetFile(dirname.getAbsolutePath)
-    parquetFile(dirname.getAbsolutePath).registerTempTable("ptable")
-    val rddOne = sql("SELECT * FROM src").collect().sortBy(_.getInt(0))
-    val rddTwo = sql("SELECT * from ptable").collect().sortBy(_.getInt(0))
-
-    compareRDDs(rddOne, rddTwo, "src (Hive)", Seq("key:Int", "value:String"))
+    withTempPath { dir =>
+      sql("SELECT * FROM src").saveAsParquetFile(dir.getCanonicalPath)
+      parquetFile(dir.getCanonicalPath).registerTempTable("p")
+      withTempTable("p") {
+        checkAnswer(
+          sql("SELECT * FROM src ORDER BY key"),
+          sql("SELECT * from p ORDER BY key").collect().toSeq)
+      }
+    }
   }
 
-  test("INSERT OVERWRITE TABLE Parquet table") {
-    sql("SELECT * FROM testsource").saveAsParquetFile(dirname.getAbsolutePath)
-    parquetFile(dirname.getAbsolutePath).registerTempTable("ptable")
-    // let's do three overwrites for good measure
-    sql("INSERT OVERWRITE TABLE ptable SELECT * FROM testsource").collect()
-    sql("INSERT OVERWRITE TABLE ptable SELECT * FROM testsource").collect()
-    sql("INSERT OVERWRITE TABLE ptable SELECT * FROM testsource").collect()
-    val rddCopy = sql("SELECT * FROM ptable").collect()
-    val rddOrig = sql("SELECT * FROM testsource").collect()
-    assert(rddCopy.size === rddOrig.size, "INSERT OVERWRITE changed size of table??")
-    compareRDDs(rddOrig, rddCopy, "testsource", ParquetTestData.testSchemaFieldNames)
-  }
 
-  private def compareRDDs(rddOne: Array[Row], rddTwo: Array[Row], tableName: String, fieldNames: Seq[String]) {
-    var counter = 0
-    (rddOne, rddTwo).zipped.foreach {
-      (a,b) => (a,b).zipped.toArray.zipWithIndex.foreach {
-        case ((value_1, value_2), index) =>
-          assert(value_1 === value_2, s"table $tableName row $counter field ${fieldNames(index)} don't match")
+  test("INSERT OVERWRITE TABLE Parquet table") {
+    withParquetTable((1 to 4).map(i => (i, s"val_$i")), "t") {
+      withTempPath { file =>
+        sql("SELECT * FROM t LIMIT 1").saveAsParquetFile(file.getCanonicalPath)
+        parquetFile(file.getCanonicalPath).registerTempTable("p")
+        withTempTable("p") {
+          // let's do three overwrites for good measure
+          sql("INSERT OVERWRITE TABLE p SELECT * FROM t")
+          sql("INSERT OVERWRITE TABLE p SELECT * FROM t")
+          sql("INSERT OVERWRITE TABLE p SELECT * FROM t")
+          checkAnswer(sql("SELECT * FROM p"), sql("SELECT * FROM t").collect().toSeq)
+        }
       }
-    counter = counter + 1
     }
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala
deleted file mode 100644
index 86adbbf3ad2d8..0000000000000
--- a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala
+++ /dev/null
@@ -1,173 +0,0 @@
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.parquet
-
-import java.io.File
-
-import org.apache.spark.sql.catalyst.expressions.Row
-import org.scalatest.BeforeAndAfterAll
-
-import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.hive.execution.HiveTableScan
-import org.apache.spark.sql.hive.test.TestHive._
-
-case class ParquetData(intField: Int, stringField: String)
-
-/**
- * Tests for our SerDe -> Native parquet scan conversion.
- */
-class ParquetMetastoreSuite extends QueryTest with BeforeAndAfterAll {
-  override def beforeAll(): Unit = {
-    val partitionedTableDir = File.createTempFile("parquettests", "sparksql")
-    partitionedTableDir.delete()
-    partitionedTableDir.mkdir()
-
-    (1 to 10).foreach { p =>
-      val partDir = new File(partitionedTableDir, s"p=$p")
-      sparkContext.makeRDD(1 to 10)
-        .map(i => ParquetData(i, s"part-$p"))
-        .saveAsParquetFile(partDir.getCanonicalPath)
-    }
-
-    sql(s"""
-    create external table partitioned_parquet
-    (
-      intField INT,
-      stringField STRING
-    )
-    PARTITIONED BY (p int)
-    ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
-     STORED AS
-     INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
-     OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
-    location '${partitionedTableDir.getCanonicalPath}'
-    """)
-
-    sql(s"""
-    create external table normal_parquet
-    (
-      intField INT,
-      stringField STRING
-    )
-    ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
-     STORED AS
-     INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
-     OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
-    location '${new File(partitionedTableDir, "p=1").getCanonicalPath}'
-    """)
-
-    (1 to 10).foreach { p =>
-      sql(s"ALTER TABLE partitioned_parquet ADD PARTITION (p=$p)")
-    }
-
-    setConf("spark.sql.hive.convertMetastoreParquet", "true")
-  }
-
-  override def afterAll(): Unit = {
-    setConf("spark.sql.hive.convertMetastoreParquet", "false")
-  }
-
-  test("project the partitioning column") {
-    checkAnswer(
-      sql("SELECT p, count(*) FROM partitioned_parquet group by p"),
-      (1, 10) ::
-      (2, 10) ::
-      (3, 10) ::
-      (4, 10) ::
-      (5, 10) ::
-      (6, 10) ::
-      (7, 10) ::
-      (8, 10) ::
-      (9, 10) ::
-      (10, 10) :: Nil
-    )
-  }
-
-  test("project partitioning and non-partitioning columns") {
-    checkAnswer(
-      sql("SELECT stringField, p, count(intField) " +
-        "FROM partitioned_parquet GROUP BY p, stringField"),
-      ("part-1", 1, 10) ::
-      ("part-2", 2, 10) ::
-      ("part-3", 3, 10) ::
-      ("part-4", 4, 10) ::
-      ("part-5", 5, 10) ::
-      ("part-6", 6, 10) ::
-      ("part-7", 7, 10) ::
-      ("part-8", 8, 10) ::
-      ("part-9", 9, 10) ::
-      ("part-10", 10, 10) :: Nil
-    )
-  }
-
-  test("simple count") {
-    checkAnswer(
-      sql("SELECT COUNT(*) FROM partitioned_parquet"),
-      100)
-  }
-
-  test("pruned count") {
-    checkAnswer(
-      sql("SELECT COUNT(*) FROM partitioned_parquet WHERE p = 1"),
-      10)
-  }
-
-  test("multi-partition pruned count") {
-    checkAnswer(
-      sql("SELECT COUNT(*) FROM partitioned_parquet WHERE p IN (1,2,3)"),
-      30)
-  }
-
-  test("non-partition predicates") {
-    checkAnswer(
-      sql("SELECT COUNT(*) FROM partitioned_parquet WHERE intField IN (1,2,3)"),
-      30)
-  }
-
-  test("sum") {
-    checkAnswer(
-      sql("SELECT SUM(intField) FROM partitioned_parquet WHERE intField IN (1,2,3) AND p = 1"),
-      1 + 2 + 3)
-  }
-
-  test("hive udfs") {
-    checkAnswer(
-      sql("SELECT concat(stringField, stringField) FROM partitioned_parquet"),
-      sql("SELECT stringField FROM partitioned_parquet").map {
-        case Row(s: String) => Row(s + s)
-      }.collect().toSeq)
-  }
-
-  test("non-part select(*)") {
-    checkAnswer(
-      sql("SELECT COUNT(*) FROM normal_parquet"),
-      10)
-  }
-
-  test("conversion is working") {
-    assert(
-      sql("SELECT * FROM normal_parquet").queryExecution.executedPlan.collect {
-        case _: HiveTableScan => true
-      }.isEmpty)
-    assert(
-      sql("SELECT * FROM normal_parquet").queryExecution.executedPlan.collect {
-        case _: ParquetTableScan => true
-      }.nonEmpty)
-  }
-}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/parquetSuites.scala
new file mode 100644
index 0000000000000..30441bbbdf817
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/parquetSuites.scala
@@ -0,0 +1,273 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.parquet
+
+import java.io.File
+
+import org.apache.spark.sql.catalyst.expressions.Row
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.hive.execution.HiveTableScan
+import org.apache.spark.sql.hive.test.TestHive._
+
+// The data where the partitioning key exists only in the directory structure.
+case class ParquetData(intField: Int, stringField: String)
+// The data that also includes the partitioning key
+case class ParquetDataWithKey(p: Int, intField: Int, stringField: String)
+
+
+/**
+ * A suite to test the automatic conversion of metastore tables with parquet data to use the
+ * built in parquet support.
+ */
+class ParquetMetastoreSuite extends ParquetPartitioningTest {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
+    sql(s"""
+      create external table partitioned_parquet
+      (
+        intField INT,
+        stringField STRING
+      )
+      PARTITIONED BY (p int)
+      ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+       STORED AS
+       INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+       OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+      location '${partitionedTableDir.getCanonicalPath}'
+    """)
+
+    sql(s"""
+      create external table partitioned_parquet_with_key
+      (
+        intField INT,
+        stringField STRING
+      )
+      PARTITIONED BY (p int)
+      ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+       STORED AS
+       INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+       OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+      location '${partitionedTableDirWithKey.getCanonicalPath}'
+    """)
+
+    sql(s"""
+      create external table normal_parquet
+      (
+        intField INT,
+        stringField STRING
+      )
+      ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+       STORED AS
+       INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+       OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+      location '${new File(partitionedTableDir, "p=1").getCanonicalPath}'
+    """)
+
+    (1 to 10).foreach { p =>
+      sql(s"ALTER TABLE partitioned_parquet ADD PARTITION (p=$p)")
+    }
+
+    (1 to 10).foreach { p =>
+      sql(s"ALTER TABLE partitioned_parquet_with_key ADD PARTITION (p=$p)")
+    }
+
+    setConf("spark.sql.hive.convertMetastoreParquet", "true")
+  }
+
+  override def afterAll(): Unit = {
+    setConf("spark.sql.hive.convertMetastoreParquet", "false")
+  }
+
+  test("conversion is working") {
+    assert(
+      sql("SELECT * FROM normal_parquet").queryExecution.executedPlan.collect {
+        case _: HiveTableScan => true
+      }.isEmpty)
+    assert(
+      sql("SELECT * FROM normal_parquet").queryExecution.executedPlan.collect {
+        case _: ParquetTableScan => true
+      }.nonEmpty)
+  }
+}
+
+/**
+ * A suite of tests for the Parquet support through the data sources API.
+ */
+class ParquetSourceSuite extends ParquetPartitioningTest {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
+    sql( s"""
+      create temporary table partitioned_parquet
+      USING org.apache.spark.sql.parquet
+      OPTIONS (
+        path '${partitionedTableDir.getCanonicalPath}'
+      )
+    """)
+
+    sql( s"""
+      create temporary table partitioned_parquet_with_key
+      USING org.apache.spark.sql.parquet
+      OPTIONS (
+        path '${partitionedTableDirWithKey.getCanonicalPath}'
+      )
+    """)
+
+    sql( s"""
+      create temporary table normal_parquet
+      USING org.apache.spark.sql.parquet
+      OPTIONS (
+        path '${new File(partitionedTableDir, "p=1").getCanonicalPath}'
+      )
+    """)
+  }
+}
+
+/**
+ * A collection of tests for parquet data with various forms of partitioning.
+ */
+abstract class ParquetPartitioningTest extends QueryTest with BeforeAndAfterAll {
+  var partitionedTableDir: File = null
+  var partitionedTableDirWithKey: File = null
+
+  import org.apache.spark.sql.hive.test.TestHive.implicits._
+
+  override def beforeAll(): Unit = {
+    partitionedTableDir = File.createTempFile("parquettests", "sparksql")
+    partitionedTableDir.delete()
+    partitionedTableDir.mkdir()
+
+    (1 to 10).foreach { p =>
+      val partDir = new File(partitionedTableDir, s"p=$p")
+      sparkContext.makeRDD(1 to 10)
+        .map(i => ParquetData(i, s"part-$p"))
+        .saveAsParquetFile(partDir.getCanonicalPath)
+    }
+
+    partitionedTableDirWithKey = File.createTempFile("parquettests", "sparksql")
+    partitionedTableDirWithKey.delete()
+    partitionedTableDirWithKey.mkdir()
+
+    (1 to 10).foreach { p =>
+      val partDir = new File(partitionedTableDirWithKey, s"p=$p")
+      sparkContext.makeRDD(1 to 10)
+        .map(i => ParquetDataWithKey(p, i, s"part-$p"))
+        .saveAsParquetFile(partDir.getCanonicalPath)
+    }
+  }
+
+  Seq("partitioned_parquet", "partitioned_parquet_with_key").foreach { table =>
+    test(s"ordering of the partitioning columns $table") {
+      checkAnswer(
+        sql(s"SELECT p, stringField FROM $table WHERE p = 1"),
+        Seq.fill(10)(Row(1, "part-1"))
+      )
+
+      checkAnswer(
+        sql(s"SELECT stringField, p FROM $table WHERE p = 1"),
+        Seq.fill(10)(Row("part-1", 1))
+      )
+    }
+
+    test(s"project the partitioning column $table") {
+      checkAnswer(
+        sql(s"SELECT p, count(*) FROM $table group by p"),
+        Row(1, 10) ::
+          Row(2, 10) ::
+          Row(3, 10) ::
+          Row(4, 10) ::
+          Row(5, 10) ::
+          Row(6, 10) ::
+          Row(7, 10) ::
+          Row(8, 10) ::
+          Row(9, 10) ::
+          Row(10, 10) :: Nil
+      )
+    }
+
+    test(s"project partitioning and non-partitioning columns $table") {
+      checkAnswer(
+        sql(s"SELECT stringField, p, count(intField) FROM $table GROUP BY p, stringField"),
+        Row("part-1", 1, 10) ::
+          Row("part-2", 2, 10) ::
+          Row("part-3", 3, 10) ::
+          Row("part-4", 4, 10) ::
+          Row("part-5", 5, 10) ::
+          Row("part-6", 6, 10) ::
+          Row("part-7", 7, 10) ::
+          Row("part-8", 8, 10) ::
+          Row("part-9", 9, 10) ::
+          Row("part-10", 10, 10) :: Nil
+      )
+    }
+
+    test(s"simple count $table") {
+      checkAnswer(
+        sql(s"SELECT COUNT(*) FROM $table"),
+        Row(100))
+    }
+
+    test(s"pruned count $table") {
+      checkAnswer(
+        sql(s"SELECT COUNT(*) FROM $table WHERE p = 1"),
+        Row(10))
+    }
+
+    test(s"non-existant partition $table") {
+      checkAnswer(
+        sql(s"SELECT COUNT(*) FROM $table WHERE p = 1000"),
+        Row(0))
+    }
+
+    test(s"multi-partition pruned count $table") {
+      checkAnswer(
+        sql(s"SELECT COUNT(*) FROM $table WHERE p IN (1,2,3)"),
+        Row(30))
+    }
+
+    test(s"non-partition predicates $table") {
+      checkAnswer(
+        sql(s"SELECT COUNT(*) FROM $table WHERE intField IN (1,2,3)"),
+        Row(30))
+    }
+
+    test(s"sum $table") {
+      checkAnswer(
+        sql(s"SELECT SUM(intField) FROM $table WHERE intField IN (1,2,3) AND p = 1"),
+        Row(1 + 2 + 3))
+    }
+
+    test(s"hive udfs $table") {
+      checkAnswer(
+        sql(s"SELECT concat(stringField, stringField) FROM $table"),
+        sql(s"SELECT stringField FROM $table").map {
+          case Row(s: String) => Row(s + s)
+        }.collect().toSeq)
+    }
+  }
+
+  test("non-part select(*)") {
+    checkAnswer(
+      sql("SELECT COUNT(*) FROM normal_parquet"),
+      Row(10))
+  }
+}
diff --git a/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala b/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala
new file mode 100644
index 0000000000000..b5a0754ff61f9
--- /dev/null
+++ b/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala
@@ -0,0 +1,252 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import java.net.URI
+import java.util.{ArrayList => JArrayList, Properties}
+
+import scala.collection.JavaConversions._
+import scala.language.implicitConversions
+
+import org.apache.hadoop.{io => hadoopIo}
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hive.common.`type`.HiveDecimal
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.ql.Context
+import org.apache.hadoop.hive.ql.metadata.{Hive, Partition, Table}
+import org.apache.hadoop.hive.ql.plan.{CreateTableDesc, FileSinkDesc, TableDesc}
+import org.apache.hadoop.hive.ql.processors._
+import org.apache.hadoop.hive.ql.stats.StatsSetupConst
+import org.apache.hadoop.hive.serde2.{ColumnProjectionUtils, Deserializer, io => hiveIo}
+import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, PrimitiveObjectInspector}
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.{HiveDecimalObjectInspector, PrimitiveObjectInspectorFactory}
+import org.apache.hadoop.hive.serde2.typeinfo.{TypeInfo, TypeInfoFactory}
+import org.apache.hadoop.io.{NullWritable, Writable}
+import org.apache.hadoop.mapred.InputFormat
+
+import org.apache.spark.sql.types.{Decimal, DecimalType}
+
+case class HiveFunctionWrapper(functionClassName: String) extends java.io.Serializable {
+  // for Serialization
+  def this() = this(null)
+
+  import org.apache.spark.util.Utils._
+  def createFunction[UDFType <: AnyRef](): UDFType = {
+    getContextOrSparkClassLoader
+      .loadClass(functionClassName).newInstance.asInstanceOf[UDFType]
+  }
+}
+
+/**
+ * A compatibility layer for interacting with Hive version 0.12.0.
+ */
+private[hive] object HiveShim {
+  val version = "0.12.0"
+
+  def getTableDesc(
+    serdeClass: Class[_ <: Deserializer],
+    inputFormatClass: Class[_ <: InputFormat[_, _]],
+    outputFormatClass: Class[_],
+    properties: Properties) = {
+    new TableDesc(serdeClass, inputFormatClass, outputFormatClass, properties)
+  }
+
+  def getStringWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.STRING,
+      getStringWritable(value))
+
+  def getIntWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.INT,
+      getIntWritable(value))
+
+  def getDoubleWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.DOUBLE,
+      getDoubleWritable(value))
+
+  def getBooleanWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.BOOLEAN,
+      getBooleanWritable(value))
+
+  def getLongWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.LONG,
+      getLongWritable(value))
+
+  def getFloatWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.FLOAT,
+      getFloatWritable(value))
+
+  def getShortWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.SHORT,
+      getShortWritable(value))
+
+  def getByteWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.BYTE,
+      getByteWritable(value))
+
+  def getBinaryWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.BINARY,
+      getBinaryWritable(value))
+
+  def getDateWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.DATE,
+      getDateWritable(value))
+
+  def getTimestampWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.TIMESTAMP,
+      getTimestampWritable(value))
+
+  def getDecimalWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.DECIMAL,
+      getDecimalWritable(value))
+
+  def getPrimitiveNullWritableConstantObjectInspector: ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      PrimitiveCategory.VOID, null)
+
+  def getStringWritable(value: Any): hadoopIo.Text =
+    if (value == null) null else new hadoopIo.Text(value.asInstanceOf[String])
+
+  def getIntWritable(value: Any): hadoopIo.IntWritable =
+    if (value == null) null else new hadoopIo.IntWritable(value.asInstanceOf[Int])
+
+  def getDoubleWritable(value: Any): hiveIo.DoubleWritable =
+    if (value == null) null else new hiveIo.DoubleWritable(value.asInstanceOf[Double])
+
+  def getBooleanWritable(value: Any): hadoopIo.BooleanWritable =
+    if (value == null) null else new hadoopIo.BooleanWritable(value.asInstanceOf[Boolean])
+
+  def getLongWritable(value: Any): hadoopIo.LongWritable =
+    if (value == null) null else new hadoopIo.LongWritable(value.asInstanceOf[Long])
+
+  def getFloatWritable(value: Any): hadoopIo.FloatWritable =
+    if (value == null) null else new hadoopIo.FloatWritable(value.asInstanceOf[Float])
+
+  def getShortWritable(value: Any): hiveIo.ShortWritable =
+    if (value == null) null else new hiveIo.ShortWritable(value.asInstanceOf[Short])
+
+  def getByteWritable(value: Any): hiveIo.ByteWritable =
+    if (value == null) null else new hiveIo.ByteWritable(value.asInstanceOf[Byte])
+
+  def getBinaryWritable(value: Any): hadoopIo.BytesWritable =
+    if (value == null) null else new hadoopIo.BytesWritable(value.asInstanceOf[Array[Byte]])
+
+  def getDateWritable(value: Any): hiveIo.DateWritable =
+    if (value == null) null else new hiveIo.DateWritable(value.asInstanceOf[Int])
+
+  def getTimestampWritable(value: Any): hiveIo.TimestampWritable =
+    if (value == null) {
+      null
+    } else {
+      new hiveIo.TimestampWritable(value.asInstanceOf[java.sql.Timestamp])
+    }
+
+  def getDecimalWritable(value: Any): hiveIo.HiveDecimalWritable =
+    if (value == null) {
+      null
+    } else {
+      new hiveIo.HiveDecimalWritable(
+        HiveShim.createDecimal(value.asInstanceOf[Decimal].toJavaBigDecimal))
+    }
+
+  def getPrimitiveNullWritable: NullWritable = NullWritable.get()
+
+  def createDriverResultsArray = new JArrayList[String]
+
+  def processResults(results: JArrayList[String]) = results
+
+  def getStatsSetupConstTotalSize = StatsSetupConst.TOTAL_SIZE
+
+  def getStatsSetupConstRawDataSize = StatsSetupConst.RAW_DATA_SIZE
+
+  def createDefaultDBIfNeeded(context: HiveContext) = {  }
+
+  def getCommandProcessor(cmd: Array[String], conf: HiveConf) = {
+    CommandProcessorFactory.get(cmd(0), conf)
+  }
+
+  def createDecimal(bd: java.math.BigDecimal): HiveDecimal = {
+    new HiveDecimal(bd)
+  }
+
+  def appendReadColumns(conf: Configuration, ids: Seq[Integer], names: Seq[String]) {
+    ColumnProjectionUtils.appendReadColumnIDs(conf, ids)
+    ColumnProjectionUtils.appendReadColumnNames(conf, names)
+  }
+
+  def getExternalTmpPath(context: Context, uri: URI) = {
+    context.getExternalTmpFileURI(uri)
+  }
+
+  def getDataLocationPath(p: Partition) = p.getPartitionPath
+
+  def getAllPartitionsOf(client: Hive, tbl: Table) =  client.getAllPartitionsForPruner(tbl)
+
+  def compatibilityBlackList = Seq(
+    "decimal_.*",
+    "udf7",
+    "drop_partitions_filter2",
+    "show_.*",
+    "serde_regex",
+    "udf_to_date",
+    "udaf_collect_set",
+    "udf_concat"
+  )
+
+  def setLocation(tbl: Table, crtTbl: CreateTableDesc): Unit = {
+    tbl.setDataLocation(new Path(crtTbl.getLocation()).toUri())
+  }
+
+  def decimalMetastoreString(decimalType: DecimalType): String = "decimal"
+
+  def decimalTypeInfo(decimalType: DecimalType): TypeInfo =
+    TypeInfoFactory.decimalTypeInfo
+
+  def decimalTypeInfoToCatalyst(inspector: PrimitiveObjectInspector): DecimalType = {
+    DecimalType.Unlimited
+  }
+
+  def toCatalystDecimal(hdoi: HiveDecimalObjectInspector, data: Any): Decimal = {
+    if (hdoi.preferWritable()) {
+      Decimal(hdoi.getPrimitiveWritableObject(data).getHiveDecimal().bigDecimalValue)
+    } else {
+      Decimal(hdoi.getPrimitiveJavaObject(data).bigDecimalValue())
+    }
+  }
+
+  def prepareWritable(w: Writable): Writable = {
+    w
+  }
+}
+
+class ShimFileSinkDesc(var dir: String, var tableInfo: TableDesc, var compressed: Boolean)
+  extends FileSinkDesc(dir, tableInfo, compressed) {
+}
diff --git a/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala b/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
new file mode 100644
index 0000000000000..e4c1809c8bb21
--- /dev/null
+++ b/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
@@ -0,0 +1,446 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import java.util.{ArrayList => JArrayList}
+import java.util.Properties
+import java.rmi.server.UID
+
+import scala.collection.JavaConversions._
+import scala.language.implicitConversions
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.io.{NullWritable, Writable}
+import org.apache.hadoop.mapred.InputFormat
+import org.apache.hadoop.hive.common.StatsSetupConst
+import org.apache.hadoop.hive.common.`type`.{HiveDecimal}
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.ql.Context
+import org.apache.hadoop.hive.ql.metadata.{Table, Hive, Partition}
+import org.apache.hadoop.hive.ql.plan.{CreateTableDesc, FileSinkDesc, TableDesc}
+import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory
+import org.apache.hadoop.hive.serde2.typeinfo.{TypeInfo, DecimalTypeInfo, TypeInfoFactory}
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.{HiveDecimalObjectInspector, PrimitiveObjectInspectorFactory}
+import org.apache.hadoop.hive.serde2.objectinspector.{PrimitiveObjectInspector, ObjectInspector}
+import org.apache.hadoop.hive.serde2.{Deserializer, ColumnProjectionUtils}
+import org.apache.hadoop.hive.serde2.{io => hiveIo}
+import org.apache.hadoop.hive.serde2.avro.AvroGenericRecordWritable
+import org.apache.hadoop.{io => hadoopIo}
+
+import org.apache.spark.Logging
+import org.apache.spark.sql.types.{Decimal, DecimalType}
+
+
+/**
+ * This class provides the UDF creation and also the UDF instance serialization and
+ * de-serialization cross process boundary.
+ * 
+ * Detail discussion can be found at https://github.com/apache/spark/pull/3640
+ *
+ * @param functionClassName UDF class name
+ */
+case class HiveFunctionWrapper(var functionClassName: String) extends java.io.Externalizable {
+  // for Serialization
+  def this() = this(null)
+
+  import java.io.{OutputStream, InputStream}
+  import com.esotericsoftware.kryo.Kryo
+  import org.apache.spark.util.Utils._
+  import org.apache.hadoop.hive.ql.exec.Utilities
+  import org.apache.hadoop.hive.ql.exec.UDF
+
+  @transient
+  private val methodDeSerialize = {
+    val method = classOf[Utilities].getDeclaredMethod(
+      "deserializeObjectByKryo",
+      classOf[Kryo],
+      classOf[InputStream],
+      classOf[Class[_]])
+    method.setAccessible(true)
+
+    method
+  }
+
+  @transient
+  private val methodSerialize = {
+    val method = classOf[Utilities].getDeclaredMethod(
+      "serializeObjectByKryo",
+      classOf[Kryo],
+      classOf[Object],
+      classOf[OutputStream])
+    method.setAccessible(true)
+
+    method
+  }
+
+  def deserializePlan[UDFType](is: java.io.InputStream, clazz: Class[_]): UDFType = {
+    methodDeSerialize.invoke(null, Utilities.runtimeSerializationKryo.get(), is, clazz)
+      .asInstanceOf[UDFType]
+  }
+
+  def serializePlan(function: AnyRef, out: java.io.OutputStream): Unit = {
+    methodSerialize.invoke(null, Utilities.runtimeSerializationKryo.get(), function, out)
+  }
+
+  private var instance: AnyRef = null
+
+  def writeExternal(out: java.io.ObjectOutput) {
+    // output the function name
+    out.writeUTF(functionClassName)
+
+    // Write a flag if instance is null or not
+    out.writeBoolean(instance != null)
+    if (instance != null) {
+      // Some of the UDF are serializable, but some others are not
+      // Hive Utilities can handle both cases
+      val baos = new java.io.ByteArrayOutputStream()
+      serializePlan(instance, baos)
+      val functionInBytes = baos.toByteArray
+
+      // output the function bytes
+      out.writeInt(functionInBytes.length)
+      out.write(functionInBytes, 0, functionInBytes.length)
+    }
+  }
+
+  def readExternal(in: java.io.ObjectInput) {
+    // read the function name
+    functionClassName = in.readUTF()
+
+    if (in.readBoolean()) {
+      // if the instance is not null
+      // read the function in bytes
+      val functionInBytesLength = in.readInt()
+      val functionInBytes = new Array[Byte](functionInBytesLength)
+      in.read(functionInBytes, 0, functionInBytesLength)
+
+      // deserialize the function object via Hive Utilities
+      instance = deserializePlan[AnyRef](new java.io.ByteArrayInputStream(functionInBytes),
+        getContextOrSparkClassLoader.loadClass(functionClassName))
+    }
+  }
+
+  def createFunction[UDFType <: AnyRef](): UDFType = {
+    if (instance != null) {
+      instance.asInstanceOf[UDFType]
+    } else {
+      val func = getContextOrSparkClassLoader
+                   .loadClass(functionClassName).newInstance.asInstanceOf[UDFType]
+      if (!func.isInstanceOf[UDF]) {
+        // We cache the function if it's no the Simple UDF,
+        // as we always have to create new instance for Simple UDF
+        instance = func
+      }
+      func
+    }
+  }
+}
+
+/**
+ * A compatibility layer for interacting with Hive version 0.13.1.
+ */
+private[hive] object HiveShim {
+  val version = "0.13.1"
+
+  def getTableDesc(
+    serdeClass: Class[_ <: Deserializer],
+    inputFormatClass: Class[_ <: InputFormat[_, _]],
+    outputFormatClass: Class[_],
+    properties: Properties) = {
+    new TableDesc(inputFormatClass, outputFormatClass, properties)
+  }
+
+
+  def getStringWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.stringTypeInfo, getStringWritable(value))
+
+  def getIntWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.intTypeInfo, getIntWritable(value))
+
+  def getDoubleWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.doubleTypeInfo, getDoubleWritable(value))
+
+  def getBooleanWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.booleanTypeInfo, getBooleanWritable(value))
+
+  def getLongWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.longTypeInfo, getLongWritable(value))
+
+  def getFloatWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.floatTypeInfo, getFloatWritable(value))
+
+  def getShortWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.shortTypeInfo, getShortWritable(value))
+
+  def getByteWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.byteTypeInfo, getByteWritable(value))
+
+  def getBinaryWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.binaryTypeInfo, getBinaryWritable(value))
+
+  def getDateWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.dateTypeInfo, getDateWritable(value))
+
+  def getTimestampWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.timestampTypeInfo, getTimestampWritable(value))
+
+  def getDecimalWritableConstantObjectInspector(value: Any): ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.decimalTypeInfo, getDecimalWritable(value))
+
+  def getPrimitiveNullWritableConstantObjectInspector: ObjectInspector =
+    PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+      TypeInfoFactory.voidTypeInfo, null)
+
+  def getStringWritable(value: Any): hadoopIo.Text =
+    if (value == null) null else new hadoopIo.Text(value.asInstanceOf[String])
+
+  def getIntWritable(value: Any): hadoopIo.IntWritable =
+    if (value == null) null else new hadoopIo.IntWritable(value.asInstanceOf[Int])
+
+  def getDoubleWritable(value: Any): hiveIo.DoubleWritable =
+    if (value == null) {
+      null
+    } else {
+      new hiveIo.DoubleWritable(value.asInstanceOf[Double])
+    }
+
+  def getBooleanWritable(value: Any): hadoopIo.BooleanWritable =
+    if (value == null) {
+      null
+    } else {
+      new hadoopIo.BooleanWritable(value.asInstanceOf[Boolean])
+    }
+
+  def getLongWritable(value: Any): hadoopIo.LongWritable =
+    if (value == null) null else new hadoopIo.LongWritable(value.asInstanceOf[Long])
+
+  def getFloatWritable(value: Any): hadoopIo.FloatWritable =
+    if (value == null) {
+      null
+    } else {
+      new hadoopIo.FloatWritable(value.asInstanceOf[Float])
+    }
+
+  def getShortWritable(value: Any): hiveIo.ShortWritable =
+    if (value == null) null else new hiveIo.ShortWritable(value.asInstanceOf[Short])
+
+  def getByteWritable(value: Any): hiveIo.ByteWritable =
+    if (value == null) null else new hiveIo.ByteWritable(value.asInstanceOf[Byte])
+
+  def getBinaryWritable(value: Any): hadoopIo.BytesWritable =
+    if (value == null) {
+      null
+    } else {
+      new hadoopIo.BytesWritable(value.asInstanceOf[Array[Byte]])
+    }
+
+  def getDateWritable(value: Any): hiveIo.DateWritable =
+    if (value == null) null else new hiveIo.DateWritable(value.asInstanceOf[Int])
+
+  def getTimestampWritable(value: Any): hiveIo.TimestampWritable =
+    if (value == null) {
+      null
+    } else {
+      new hiveIo.TimestampWritable(value.asInstanceOf[java.sql.Timestamp])
+    }
+
+  def getDecimalWritable(value: Any): hiveIo.HiveDecimalWritable =
+    if (value == null) {
+      null
+    } else {
+      // TODO precise, scale?
+      new hiveIo.HiveDecimalWritable(
+        HiveShim.createDecimal(value.asInstanceOf[Decimal].toJavaBigDecimal))
+    }
+
+  def getPrimitiveNullWritable: NullWritable = NullWritable.get()
+
+  def createDriverResultsArray = new JArrayList[Object]
+
+  def processResults(results: JArrayList[Object]) = {
+    results.map { r =>
+      r match {
+        case s: String => s
+        case a: Array[Object] => a(0).asInstanceOf[String]
+      }
+    }
+  }
+
+  def getStatsSetupConstTotalSize = StatsSetupConst.TOTAL_SIZE
+
+  def getStatsSetupConstRawDataSize = StatsSetupConst.RAW_DATA_SIZE
+
+  def createDefaultDBIfNeeded(context: HiveContext) = {
+    context.runSqlHive("CREATE DATABASE default")
+    context.runSqlHive("USE default")
+  }
+
+  def getCommandProcessor(cmd: Array[String], conf: HiveConf) = {
+    CommandProcessorFactory.get(cmd, conf)
+  }
+
+  def createDecimal(bd: java.math.BigDecimal): HiveDecimal = {
+    HiveDecimal.create(bd)
+  }
+
+  /*
+   * This function in hive-0.13 become private, but we have to do this to walkaround hive bug
+   */
+  private def appendReadColumnNames(conf: Configuration, cols: Seq[String]) {
+    val old: String = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "")
+    val result: StringBuilder = new StringBuilder(old)
+    var first: Boolean = old.isEmpty
+
+    for (col <- cols) {
+      if (first) {
+        first = false
+      } else {
+        result.append(',')
+      }
+      result.append(col)
+    }
+    conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, result.toString)
+  }
+
+  /*
+   * Cannot use ColumnProjectionUtils.appendReadColumns directly, if ids is null or empty
+   */
+  def appendReadColumns(conf: Configuration, ids: Seq[Integer], names: Seq[String]) {
+    if (ids != null && ids.size > 0) {
+      ColumnProjectionUtils.appendReadColumns(conf, ids)
+    }
+    if (names != null && names.size > 0) {
+      appendReadColumnNames(conf, names)
+    }
+  }
+
+  def getExternalTmpPath(context: Context, path: Path) = {
+    context.getExternalTmpPath(path.toUri)
+  }
+
+  def getDataLocationPath(p: Partition) = p.getDataLocation
+
+  def getAllPartitionsOf(client: Hive, tbl: Table) =  client.getAllPartitionsOf(tbl)
+
+  def compatibilityBlackList = Seq()
+
+  def setLocation(tbl: Table, crtTbl: CreateTableDesc): Unit = {
+    tbl.setDataLocation(new Path(crtTbl.getLocation()))
+  }
+
+  /*
+   * Bug introdiced in hive-0.13. FileSinkDesc is serializable, but its member path is not.
+   * Fix it through wrapper.
+   * */
+  implicit def wrapperToFileSinkDesc(w: ShimFileSinkDesc): FileSinkDesc = {
+    var f = new FileSinkDesc(new Path(w.dir), w.tableInfo, w.compressed)
+    f.setCompressCodec(w.compressCodec)
+    f.setCompressType(w.compressType)
+    f.setTableInfo(w.tableInfo)
+    f.setDestTableId(w.destTableId)
+    f
+  }
+
+  // Precision and scale to pass for unlimited decimals; these are the same as the precision and
+  // scale Hive 0.13 infers for BigDecimals from sources that don't specify them (e.g. UDFs)
+  private val UNLIMITED_DECIMAL_PRECISION = 38
+  private val UNLIMITED_DECIMAL_SCALE = 18
+
+  def decimalMetastoreString(decimalType: DecimalType): String = decimalType match {
+    case DecimalType.Fixed(precision, scale) => s"decimal($precision,$scale)"
+    case _ => s"decimal($UNLIMITED_DECIMAL_PRECISION,$UNLIMITED_DECIMAL_SCALE)"
+  }
+
+  def decimalTypeInfo(decimalType: DecimalType): TypeInfo = decimalType match {
+    case DecimalType.Fixed(precision, scale) => new DecimalTypeInfo(precision, scale)
+    case _ => new DecimalTypeInfo(UNLIMITED_DECIMAL_PRECISION, UNLIMITED_DECIMAL_SCALE)
+  }
+
+  def decimalTypeInfoToCatalyst(inspector: PrimitiveObjectInspector): DecimalType = {
+    val info = inspector.getTypeInfo.asInstanceOf[DecimalTypeInfo]
+    DecimalType(info.precision(), info.scale())
+  }
+
+  def toCatalystDecimal(hdoi: HiveDecimalObjectInspector, data: Any): Decimal = {
+    if (hdoi.preferWritable()) {
+      Decimal(hdoi.getPrimitiveWritableObject(data).getHiveDecimal().bigDecimalValue,
+        hdoi.precision(), hdoi.scale())
+    } else {
+      Decimal(hdoi.getPrimitiveJavaObject(data).bigDecimalValue(), hdoi.precision(), hdoi.scale())
+    }
+  }
+ 
+  /*
+   * Bug introduced in hive-0.13. AvroGenericRecordWritable has a member recordReaderID that
+   * is needed to initialize before serialization.
+   */
+  def prepareWritable(w: Writable): Writable = {
+    w match {
+      case w: AvroGenericRecordWritable =>
+        w.setRecordReaderID(new UID())
+      case _ =>
+    }
+    w
+  }
+}
+
+/*
+ * Bug introduced in hive-0.13. FileSinkDesc is serilizable, but its member path is not.
+ * Fix it through wrapper.
+ */
+class ShimFileSinkDesc(var dir: String, var tableInfo: TableDesc, var compressed: Boolean)
+  extends Serializable with Logging {
+  var compressCodec: String = _
+  var compressType: String = _
+  var destTableId: Int = _
+
+  def setCompressed(compressed: Boolean) {
+    this.compressed = compressed
+  }
+
+  def getDirName = dir
+
+  def setDestTableId(destTableId: Int) {
+    this.destTableId = destTableId
+  }
+
+  def setTableInfo(tableInfo: TableDesc) {
+    this.tableInfo = tableInfo
+  }
+
+  def setCompressCodec(intermediateCompressorCodec: String) {
+    compressCodec = intermediateCompressorCodec
+  }
+
+  def setCompressType(intermediateCompressType: String) {
+    compressType = intermediateCompressType
+  }
+}
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 12f900c91eb98..1e92ba686a57d 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -40,18 +40,37 @@
       <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
+
+    <!-- Explicit listing of transitive deps that are shaded. Otherwise, odd compiler crashes. -->
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-server</artifactId>
     </dependency>
     <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-plus</artifactId>
     </dependency>
     <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-util</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-http</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-servlet</artifactId>
+    </dependency>
+    <!-- End of shaded deps. -->
+
+    <dependency>
+      <groupId>org.scala-lang</groupId>
+      <artifactId>scala-library</artifactId>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
@@ -73,18 +92,13 @@
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-      </plugin>
-      
-      <!-- 
-           This plugin forces the generation of jar containing streaming test classes, 
+      <!--
+           This plugin forces the generation of jar containing streaming test classes,
            so that the tests classes of external modules can use them. The two execution profiles
            are necessary - first one for 'mvn package', second one for 'mvn test-compile'. Ideally,
-           'mvn compile' should not compile test classes and therefore should not need this. 
+           'mvn compile' should not compile test classes and therefore should not need this.
            However, an open Maven bug (http://jira.codehaus.org/browse/MNG-3559)
-           causes the compilation to fail if streaming test-jar is not generated. Hence, the 
+           causes the compilation to fail if streaming test-jar is not generated. Hence, the
            second execution profile for 'mvn test-compile'.
       -->
       <plugin>
@@ -105,6 +119,22 @@
           </execution>
         </executions>
       </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <configuration>
+          <shadeTestJar>true</shadeTestJar>
+        </configuration>
+      </plugin>
     </plugins>
+    <resources>
+      <resource>
+        <directory>../python</directory>
+        <includes>
+          <include>pyspark/streaming/*.py</include>
+        </includes>
+      </resource>
+    </resources>
   </build>
 </project>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ContextWaiter.scala b/streaming/src/main/scala/org/apache/spark/streaming/ContextWaiter.scala
index a0aeacbc733bd..fdbbe2aa6ef08 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ContextWaiter.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ContextWaiter.scala
@@ -17,30 +17,63 @@
 
 package org.apache.spark.streaming
 
+import java.util.concurrent.TimeUnit
+import java.util.concurrent.locks.ReentrantLock
+
 private[streaming] class ContextWaiter {
+
+  private val lock = new ReentrantLock()
+  private val condition = lock.newCondition()
+
+  // Guarded by "lock"
   private var error: Throwable = null
-  private var stopped: Boolean = false
 
-  def notifyError(e: Throwable) = synchronized {
-    error = e
-    notifyAll()
-  }
+  // Guarded by "lock"
+  private var stopped: Boolean = false
 
-  def notifyStop() = synchronized {
-    stopped = true
-    notifyAll()
+  def notifyError(e: Throwable): Unit = {
+    lock.lock()
+    try {
+      error = e
+      condition.signalAll()
+    } finally {
+      lock.unlock()
+    }
   }
 
-  def waitForStopOrError(timeout: Long = -1) = synchronized {
-    // If already had error, then throw it
-    if (error != null) {
-      throw error
+  def notifyStop(): Unit = {
+    lock.lock()
+    try {
+      stopped = true
+      condition.signalAll()
+    } finally {
+      lock.unlock()
     }
+  }
 
-    // If not already stopped, then wait
-    if (!stopped) {
-      if (timeout < 0) wait() else wait(timeout)
+  /**
+   * Return `true` if it's stopped; or throw the reported error if `notifyError` has been called; or
+   * `false` if the waiting time detectably elapsed before return from the method.
+   */
+  def waitForStopOrError(timeout: Long = -1): Boolean = {
+    lock.lock()
+    try {
+      if (timeout < 0) {
+        while (!stopped && error == null) {
+          condition.await()
+        }
+      } else {
+        var nanos = TimeUnit.MILLISECONDS.toNanos(timeout)
+        while (!stopped && error == null && nanos > 0) {
+          nanos = condition.awaitNanos(nanos)
+        }
+      }
+      // If already had error, then throw it
       if (error != null) throw error
+      // already stopped or timeout
+      stopped
+    } finally {
+      lock.unlock()
     }
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
index b4adf0e9651a8..0e285d6088ec1 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
@@ -22,6 +22,7 @@ import java.io.{ObjectInputStream, IOException, ObjectOutputStream}
 import org.apache.spark.Logging
 import org.apache.spark.streaming.scheduler.Job
 import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream, InputDStream}
+import org.apache.spark.util.Utils
 
 final private[streaming] class DStreamGraph extends Serializable with Logging {
 
@@ -159,8 +160,16 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     }
   }
 
+  /**
+   * Get the maximum remember duration across all the input streams. This is a conservative but
+   * safe remember duration which can be used to perform cleanup operations.
+   */
+  def getMaxInputStreamRememberDuration(): Duration = {
+    inputStreams.map { _.rememberDuration }.maxBy { _.milliseconds }
+  }
+
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     logDebug("DStreamGraph.writeObject used")
     this.synchronized {
       checkpointInProgress = true
@@ -172,7 +181,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
   }
 
   @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream) {
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
     logDebug("DStreamGraph.readObject used")
     this.synchronized {
       checkpointInProgress = true
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 23d6d1c5e50fa..ba3f23434f24c 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -22,16 +22,17 @@ import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.Map
 import scala.collection.mutable.Queue
-import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
 import akka.actor.{Props, SupervisorStrategy}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
-import org.apache.hadoop.io.{LongWritable, Text}
+import org.apache.hadoop.io.{BytesWritable, LongWritable, Text}
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
 import org.apache.spark._
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.input.FixedLengthBinaryInputFormat
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.dstream._
@@ -121,6 +122,11 @@ class StreamingContext private[streaming] (
     }
   }
 
+  if (sc.conf.get("spark.master") == "local" || sc.conf.get("spark.master") == "local[1]") {
+    logWarning("spark.master should be set as local[n], n > 1 in local mode if you have receivers" +
+      " to get data, otherwise Spark jobs will not get resources to process the received data.")
+  }
+
   private[streaming] val conf = sc.conf
 
   private[streaming] val env = SparkEnv.get
@@ -187,7 +193,7 @@ class StreamingContext private[streaming] (
   /**
    * Set each DStreams in this context to remember RDDs it generated in the last given duration.
    * DStreams remember RDDs only for a limited duration of time and releases them for garbage
-   * collection. This method allows the developer to specify how to long to remember the RDDs (
+   * collection. This method allows the developer to specify how long to remember the RDDs (
    * if the developer wishes to query old data outside the DStream computation).
    * @param duration Minimum duration that each DStream should remember its RDDs
    */
@@ -355,6 +361,30 @@ class StreamingContext private[streaming] (
     new FileInputDStream[K, V, F](this, directory, filter, newFilesOnly)
   }
 
+  /**
+   * Create a input stream that monitors a Hadoop-compatible filesystem
+   * for new files and reads them using the given key-value types and input format.
+   * Files must be written to the monitored directory by "moving" them from another
+   * location within the same file system. File names starting with . are ignored.
+   * @param directory HDFS directory to monitor for new file
+   * @param filter Function to filter paths to process
+   * @param newFilesOnly Should process only new files and ignore existing files in the directory
+   * @param conf Hadoop configuration
+   * @tparam K Key type for reading HDFS file
+   * @tparam V Value type for reading HDFS file
+   * @tparam F Input format for reading HDFS file
+   */
+  def fileStream[
+    K: ClassTag,
+    V: ClassTag,
+    F <: NewInputFormat[K, V]: ClassTag
+  ] (directory: String,
+     filter: Path => Boolean,
+     newFilesOnly: Boolean,
+     conf: Configuration): InputDStream[(K, V)] = {
+    new FileInputDStream[K, V, F](this, directory, filter, newFilesOnly, Option(conf))
+  }
+
   /**
    * Create a input stream that monitors a Hadoop-compatible filesystem
    * for new files and reads them as text files (using key as LongWritable, value
@@ -367,6 +397,37 @@ class StreamingContext private[streaming] (
     fileStream[LongWritable, Text, TextInputFormat](directory).map(_._2.toString)
   }
 
+  /**
+   * :: Experimental ::
+   *
+   * Create an input stream that monitors a Hadoop-compatible filesystem
+   * for new files and reads them as flat binary files, assuming a fixed length per record,
+   * generating one byte array per record. Files must be written to the monitored directory
+   * by "moving" them from another location within the same file system. File names
+   * starting with . are ignored.
+   *
+   * '''Note:''' We ensure that the byte array for each record in the
+   * resulting RDDs of the DStream has the provided record length.
+   *
+   * @param directory HDFS directory to monitor for new file
+   * @param recordLength length of each record in bytes
+   */
+  @Experimental
+  def binaryRecordsStream(
+      directory: String,
+      recordLength: Int): DStream[Array[Byte]] = {
+    val conf = sc_.hadoopConfiguration
+    conf.setInt(FixedLengthBinaryInputFormat.RECORD_LENGTH_PROPERTY, recordLength)
+    val br = fileStream[LongWritable, BytesWritable, FixedLengthBinaryInputFormat](
+      directory, FileInputDStream.defaultFilter : Path => Boolean, newFilesOnly=true, conf)
+    val data = br.map { case (k, v) =>
+      val bytes = v.getBytes
+      assert(bytes.length == recordLength, "Byte array does not have correct length")
+      bytes
+    }
+    data
+  }
+
   /**
    * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
@@ -436,10 +497,10 @@ class StreamingContext private[streaming] (
 
   /**
    * Start the execution of the streams.
+   *
+   * @throws SparkException if the context has already been started or stopped.
    */
   def start(): Unit = synchronized {
-    // Throw exception if the context has already been started once
-    // or if a stopped context is being started again
     if (state == Started) {
       throw new SparkException("StreamingContext has already been started")
     }
@@ -465,15 +526,30 @@ class StreamingContext private[streaming] (
    * will be thrown in this thread.
    * @param timeout time to wait in milliseconds
    */
+  @deprecated("Use awaitTerminationOrTimeout(Long) instead", "1.3.0")
   def awaitTermination(timeout: Long) {
     waiter.waitForStopOrError(timeout)
   }
 
+  /**
+   * Wait for the execution to stop. Any exceptions that occurs during the execution
+   * will be thrown in this thread.
+   *
+   * @param timeout time to wait in milliseconds
+   * @return `true` if it's stopped; or throw the reported error during the execution; or `false`
+   *         if the waiting time elapsed before returning from the method.
+   */
+  def awaitTerminationOrTimeout(timeout: Long): Boolean = {
+    waiter.waitForStopOrError(timeout)
+  }
+
   /**
    * Stop the execution of the streams immediately (does not wait for all received data
    * to be processed).
-   * @param stopSparkContext Stop the associated SparkContext or not
    *
+   * @param stopSparkContext if true, stops the associated SparkContext. The underlying SparkContext
+   *                         will be stopped regardless of whether this StreamingContext has been
+   *                         started.
    */
   def stop(stopSparkContext: Boolean = true): Unit = synchronized {
     stop(stopSparkContext, false)
@@ -482,25 +558,27 @@ class StreamingContext private[streaming] (
   /**
    * Stop the execution of the streams, with option of ensuring all received data
    * has been processed.
-   * @param stopSparkContext Stop the associated SparkContext or not
-   * @param stopGracefully Stop gracefully by waiting for the processing of all
+   *
+   * @param stopSparkContext if true, stops the associated SparkContext. The underlying SparkContext
+   *                         will be stopped regardless of whether this StreamingContext has been
+   *                         started.
+   * @param stopGracefully if true, stops gracefully by waiting for the processing of all
    *                       received data to be completed
    */
   def stop(stopSparkContext: Boolean, stopGracefully: Boolean): Unit = synchronized {
-    // Warn (but not fail) if context is stopped twice,
-    // or context is stopped before starting
-    if (state == Initialized) {
-      logWarning("StreamingContext has not been started yet")
-      return
+    state match {
+      case Initialized => logWarning("StreamingContext has not been started yet")
+      case Stopped => logWarning("StreamingContext has already been stopped")
+      case Started =>
+        scheduler.stop(stopGracefully)
+        logInfo("StreamingContext stopped successfully")
+        waiter.notifyStop()
     }
-    if (state == Stopped) {
-      logWarning("StreamingContext has already been stopped")
-      return
-    } // no need to throw an exception as its okay to stop twice
-    scheduler.stop(stopGracefully)
-    logInfo("StreamingContext stopped successfully")
-    waiter.notifyStop()
+    // Even if the streaming context has not been started, we still need to stop the SparkContext.
+    // Even if we have already stopped, we still need to attempt to stop the SparkContext because
+    // a user might stop(stopSparkContext = false) and then call stop(stopSparkContext = true).
     if (stopSparkContext) sc.stop()
+    // The state should always be Stopped after calling `stop()`, even if we haven't started yet:
     state = Stopped
   }
 }
@@ -514,9 +592,11 @@ object StreamingContext extends Logging {
 
   private[streaming] val DEFAULT_CLEANER_TTL = 3600
 
-  implicit def toPairDStreamFunctions[K, V](stream: DStream[(K, V)])
+  @deprecated("Replaced by implicit functions in the DStream companion object. This is " +
+    "kept here only for backward compatibility.", "1.3.0")
+  def toPairDStreamFunctions[K, V](stream: DStream[(K, V)])
       (implicit kt: ClassTag[K], vt: ClassTag[V], ord: Ordering[K] = null) = {
-    new PairDStreamFunctions[K, V](stream)
+    DStream.toPairDStreamFunctions(stream)(kt, vt, ord)
   }
 
   /**
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingSource.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingSource.scala
index e35a568ddf115..9697437dd2fe5 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingSource.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingSource.scala
@@ -29,9 +29,17 @@ private[streaming] class StreamingSource(ssc: StreamingContext) extends Source {
   private val streamingListener = ssc.progressListener
 
   private def registerGauge[T](name: String, f: StreamingJobProgressListener => T,
-      defaultValue: T) {
+      defaultValue: T): Unit = {
+    registerGaugeWithOption[T](name,
+      (l: StreamingJobProgressListener) => Option(f(streamingListener)), defaultValue)
+  }
+
+  private def registerGaugeWithOption[T](
+      name: String,
+      f: StreamingJobProgressListener => Option[T],
+      defaultValue: T): Unit = {
     metricRegistry.register(MetricRegistry.name("streaming", name), new Gauge[T] {
-      override def getValue: T = Option(f(streamingListener)).getOrElse(defaultValue)
+      override def getValue: T = f(streamingListener).getOrElse(defaultValue)
     })
   }
 
@@ -41,6 +49,12 @@ private[streaming] class StreamingSource(ssc: StreamingContext) extends Source {
   // Gauge for number of total completed batches
   registerGauge("totalCompletedBatches", _.numTotalCompletedBatches, 0L)
 
+  // Gauge for number of total received records
+  registerGauge("totalReceivedRecords", _.numTotalReceivedRecords, 0L)
+
+  // Gauge for number of total processed records
+  registerGauge("totalProcessedRecords", _.numTotalProcessedRecords, 0L)
+
   // Gauge for number of unprocessed batches
   registerGauge("unprocessedBatches", _.numUnprocessedBatches, 0L)
 
@@ -55,19 +69,30 @@ private[streaming] class StreamingSource(ssc: StreamingContext) extends Source {
 
   // Gauge for last completed batch, useful for monitoring the streaming job's running status,
   // displayed data -1 for any abnormal condition.
-  registerGauge("lastCompletedBatch_submissionTime",
-    _.lastCompletedBatch.map(_.submissionTime).getOrElse(-1L), -1L)
-  registerGauge("lastCompletedBatch_processStartTime",
-    _.lastCompletedBatch.flatMap(_.processingStartTime).getOrElse(-1L), -1L)
-  registerGauge("lastCompletedBatch_processEndTime",
-    _.lastCompletedBatch.flatMap(_.processingEndTime).getOrElse(-1L), -1L)
+  registerGaugeWithOption("lastCompletedBatch_submissionTime",
+    _.lastCompletedBatch.map(_.submissionTime), -1L)
+  registerGaugeWithOption("lastCompletedBatch_processingStartTime",
+    _.lastCompletedBatch.flatMap(_.processingStartTime), -1L)
+  registerGaugeWithOption("lastCompletedBatch_processingEndTime",
+    _.lastCompletedBatch.flatMap(_.processingEndTime), -1L)
+
+  // Gauge for last completed batch's delay information.
+  registerGaugeWithOption("lastCompletedBatch_processingDelay",
+    _.lastCompletedBatch.flatMap(_.processingDelay), -1L)
+  registerGaugeWithOption("lastCompletedBatch_schedulingDelay",
+    _.lastCompletedBatch.flatMap(_.schedulingDelay), -1L)
+  registerGaugeWithOption("lastCompletedBatch_totalDelay",
+    _.lastCompletedBatch.flatMap(_.totalDelay), -1L)
 
   // Gauge for last received batch, useful for monitoring the streaming job's running status,
   // displayed data -1 for any abnormal condition.
-  registerGauge("lastReceivedBatch_submissionTime",
-    _.lastCompletedBatch.map(_.submissionTime).getOrElse(-1L), -1L)
-  registerGauge("lastReceivedBatch_processStartTime",
-    _.lastCompletedBatch.flatMap(_.processingStartTime).getOrElse(-1L), -1L)
-  registerGauge("lastReceivedBatch_processEndTime",
-    _.lastCompletedBatch.flatMap(_.processingEndTime).getOrElse(-1L), -1L)
+  registerGaugeWithOption("lastReceivedBatch_submissionTime",
+    _.lastCompletedBatch.map(_.submissionTime), -1L)
+  registerGaugeWithOption("lastReceivedBatch_processingStartTime",
+    _.lastCompletedBatch.flatMap(_.processingStartTime), -1L)
+  registerGaugeWithOption("lastReceivedBatch_processingEndTime",
+    _.lastCompletedBatch.flatMap(_.processingEndTime), -1L)
+
+  // Gauge for last received batch records.
+  registerGauge("lastReceivedBatch_records", _.lastReceivedBatchRecords.values.sum, 0L)
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
index 2a7004e56ef53..c382a12f4d099 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
@@ -51,7 +51,15 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
    * operator, so this DStream will be registered as an output stream and there materialized.
    */
   def print(): Unit = {
-    dstream.print()
+    print(10)
+  }
+
+  /**
+   * Print the first num elements of each RDD generated in this DStream. This is an output
+   * operator, so this DStream will be registered as an output stream and there materialized.
+   */
+  def print(num: Int): Unit = {
+    dstream.print(num)
   }
 
   /**
@@ -203,7 +211,9 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
    * @param slideDuration  sliding interval of the window (i.e., the interval after which
    *                       the new DStream will generate RDDs); must be a multiple of this
    *                       DStream's batching interval
+   * @deprecated As this API is not Java compatible.
    */
+  @deprecated("Use Java-compatible version of reduceByWindow", "1.3.0")
   def reduceByWindow(
       reduceFunc: (T, T) => T,
       windowDuration: Duration,
@@ -212,6 +222,24 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
     dstream.reduceByWindow(reduceFunc, windowDuration, slideDuration)
   }
 
+  /**
+   * Return a new DStream in which each RDD has a single element generated by reducing all
+   * elements in a sliding window over this DStream.
+   * @param reduceFunc associative reduce function
+   * @param windowDuration width of the window; must be a multiple of this DStream's
+   *                       batching interval
+   * @param slideDuration  sliding interval of the window (i.e., the interval after which
+   *                       the new DStream will generate RDDs); must be a multiple of this
+   *                       DStream's batching interval
+   */
+  def reduceByWindow(
+      reduceFunc: JFunction2[T, T, T],
+      windowDuration: Duration,
+      slideDuration: Duration
+    ): JavaDStream[T] = {
+    dstream.reduceByWindow(reduceFunc, windowDuration, slideDuration)
+  }
+
   /**
    * Return a new DStream in which each RDD has a single element generated by reducing all
    * elements in a sliding window over this DStream. However, the reduction is done incrementally
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
index 59d4423086ef0..de124cf40eff1 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
@@ -36,7 +36,6 @@ import org.apache.spark.api.java.function.{Function => JFunction, Function2 => J
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming._
-import org.apache.spark.streaming.StreamingContext._
 import org.apache.spark.streaming.dstream.DStream
 
 /**
@@ -492,6 +491,25 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
     dstream.updateStateByKey(convertUpdateStateFunction(updateFunc), partitioner)
   }
 
+  /**
+   * Return a new "state" DStream where the state for each key is updated by applying
+   * the given function on the previous state of the key and the new values of the key.
+   * org.apache.spark.Partitioner is used to control the partitioning of each RDD.
+   * @param updateFunc State update function. If `this` function returns None, then
+   *                   corresponding state key-value pair will be eliminated.
+   * @param partitioner Partitioner for controlling the partitioning of each RDD in the new
+   *                    DStream.
+   * @param initialRDD initial state value of each key.
+   * @tparam S State type
+   */
+  def updateStateByKey[S](
+      updateFunc: JFunction2[JList[V], Optional[S], Optional[S]],
+      partitioner: Partitioner,
+      initialRDD: JavaPairRDD[K, S]
+  ): JavaPairDStream[K, S] = {
+    implicit val cm: ClassTag[S] = fakeClassTag
+    dstream.updateStateByKey(convertUpdateStateFunction(updateFunc), partitioner, initialRDD)
+  }
 
   /**
    * Return a new DStream by applying a map function to the value of each key-value pairs in
@@ -796,6 +814,6 @@ object JavaPairDStream {
 
   def scalaToJavaLong[K: ClassTag](dstream: JavaPairDStream[K, Long])
   : JavaPairDStream[K, JLong] = {
-    StreamingContext.toPairDStreamFunctions(dstream.dstream).mapValues(new JLong(_))
+    DStream.toPairDStreamFunctions(dstream.dstream).mapValues(new JLong(_))
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index 7db66c69a6d73..e3db01c1e12c6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -17,17 +17,19 @@
 
 package org.apache.spark.streaming.api.java
 
+import java.lang.{Boolean => JBoolean}
+import java.io.{Closeable, InputStream}
+import java.util.{List => JList, Map => JMap}
 
 import scala.collection.JavaConversions._
 import scala.reflect.ClassTag
 
-import java.io.{Closeable, InputStream}
-import java.util.{List => JList, Map => JMap}
-
 import akka.actor.{Props, SupervisorStrategy}
+import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 
 import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.{JavaPairRDD, JavaRDD, JavaSparkContext}
 import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2}
 import org.apache.spark.rdd.RDD
@@ -176,7 +178,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
 
   /**
    * Create an input stream from network source hostname:port. Data is received using
-   * a TCP socket and the receive bytes it interepreted as object using the given
+   * a TCP socket and the receive bytes it interpreted as object using the given
    * converter.
    * @param hostname      Hostname to connect to for receiving data
    * @param port          Port to connect to for receiving data
@@ -208,6 +210,24 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
     ssc.textFileStream(directory)
   }
 
+  /**
+   * :: Experimental ::
+   *
+   * Create an input stream that monitors a Hadoop-compatible filesystem
+   * for new files and reads them as flat binary files with fixed record lengths,
+   * yielding byte arrays
+   *
+   * '''Note:''' We ensure that the byte array for each record in the
+   * resulting RDDs of the DStream has the provided record length.
+   *
+   * @param directory HDFS directory to monitor for new files
+   * @param recordLength The length at which to split the records
+   */
+  @Experimental
+  def binaryRecordsStream(directory: String, recordLength: Int): JavaDStream[Array[Byte]] = {
+    ssc.binaryRecordsStream(directory, recordLength)
+  }
+
   /**
    * Create an input stream from network source hostname:port, where data is received
    * as serialized blocks (serialized using the Spark's serializer) that can be directly
@@ -250,21 +270,84 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
    * Files must be written to the monitored directory by "moving" them from another
    * location within the same file system. File names starting with . are ignored.
    * @param directory HDFS directory to monitor for new file
+   * @param kClass class of key for reading HDFS file
+   * @param vClass class of value for reading HDFS file
+   * @param fClass class of input format for reading HDFS file
    * @tparam K Key type for reading HDFS file
    * @tparam V Value type for reading HDFS file
    * @tparam F Input format for reading HDFS file
    */
   def fileStream[K, V, F <: NewInputFormat[K, V]](
-      directory: String): JavaPairInputDStream[K, V] = {
-    implicit val cmk: ClassTag[K] =
-      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K]]
-    implicit val cmv: ClassTag[V] =
-      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]]
-    implicit val cmf: ClassTag[F] =
-      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[F]]
+      directory: String,
+      kClass: Class[K],
+      vClass: Class[V],
+      fClass: Class[F]): JavaPairInputDStream[K, V] = {
+    implicit val cmk: ClassTag[K] = ClassTag(kClass)
+    implicit val cmv: ClassTag[V] = ClassTag(vClass)
+    implicit val cmf: ClassTag[F] = ClassTag(fClass)
     ssc.fileStream[K, V, F](directory)
   }
 
+  /**
+   * Create an input stream that monitors a Hadoop-compatible filesystem
+   * for new files and reads them using the given key-value types and input format.
+   * Files must be written to the monitored directory by "moving" them from another
+   * location within the same file system. File names starting with . are ignored.
+   * @param directory HDFS directory to monitor for new file
+   * @param kClass class of key for reading HDFS file
+   * @param vClass class of value for reading HDFS file
+   * @param fClass class of input format for reading HDFS file
+   * @param filter Function to filter paths to process
+   * @param newFilesOnly Should process only new files and ignore existing files in the directory
+   * @tparam K Key type for reading HDFS file
+   * @tparam V Value type for reading HDFS file
+   * @tparam F Input format for reading HDFS file
+   */
+  def fileStream[K, V, F <: NewInputFormat[K, V]](
+      directory: String,
+      kClass: Class[K],
+      vClass: Class[V],
+      fClass: Class[F],
+      filter: JFunction[Path, JBoolean],
+      newFilesOnly: Boolean): JavaPairInputDStream[K, V] = {
+    implicit val cmk: ClassTag[K] = ClassTag(kClass)
+    implicit val cmv: ClassTag[V] = ClassTag(vClass)
+    implicit val cmf: ClassTag[F] = ClassTag(fClass)
+    def fn = (x: Path) => filter.call(x).booleanValue()
+    ssc.fileStream[K, V, F](directory, fn, newFilesOnly)
+  }
+
+  /**
+   * Create an input stream that monitors a Hadoop-compatible filesystem
+   * for new files and reads them using the given key-value types and input format.
+   * Files must be written to the monitored directory by "moving" them from another
+   * location within the same file system. File names starting with . are ignored.
+   * @param directory HDFS directory to monitor for new file
+   * @param kClass class of key for reading HDFS file
+   * @param vClass class of value for reading HDFS file
+   * @param fClass class of input format for reading HDFS file
+   * @param filter Function to filter paths to process
+   * @param newFilesOnly Should process only new files and ignore existing files in the directory
+   * @param conf Hadoop configuration
+   * @tparam K Key type for reading HDFS file
+   * @tparam V Value type for reading HDFS file
+   * @tparam F Input format for reading HDFS file
+   */
+  def fileStream[K, V, F <: NewInputFormat[K, V]](
+      directory: String,
+      kClass: Class[K],
+      vClass: Class[V],
+      fClass: Class[F],
+      filter: JFunction[Path, JBoolean],
+      newFilesOnly: Boolean,
+      conf: Configuration): JavaPairInputDStream[K, V] = {
+    implicit val cmk: ClassTag[K] = ClassTag(kClass)
+    implicit val cmv: ClassTag[V] = ClassTag(vClass)
+    implicit val cmf: ClassTag[F] = ClassTag(fClass)
+    def fn = (x: Path) => filter.call(x).booleanValue()
+    ssc.fileStream[K, V, F](directory, fn, newFilesOnly, conf)
+  }
+
   /**
    * Create an input stream with any arbitrary user implemented actor receiver.
    * @param props Props object defining creation of the actor
@@ -479,7 +562,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
   /**
    * Sets each DStreams in this context to remember RDDs it generated in the last given duration.
    * DStreams remember RDDs only for a limited duration of duration and releases them for garbage
-   * collection. This method allows the developer to specify how to long to remember the RDDs (
+   * collection. This method allows the developer to specify how long to remember the RDDs (
    * if the developer wishes to query old data outside the DStream computation).
    * @param duration Minimum duration that each DStream should remember its RDDs
    */
@@ -514,10 +597,23 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
    * will be thrown in this thread.
    * @param timeout time to wait in milliseconds
    */
+  @deprecated("Use awaitTerminationOrTimeout(Long) instead", "1.3.0")
   def awaitTermination(timeout: Long): Unit = {
     ssc.awaitTermination(timeout)
   }
 
+  /**
+   * Wait for the execution to stop. Any exceptions that occurs during the execution
+   * will be thrown in this thread.
+   *
+   * @param timeout time to wait in milliseconds
+   * @return `true` if it's stopped; or throw the reported error during the execution; or `false`
+   *         if the waiting time elapsed before returning from the method.
+   */
+  def awaitTerminationOrTimeout(timeout: Long): Boolean = {
+    ssc.awaitTerminationOrTimeout(timeout)
+  }
+
   /**
    * Stop the execution of the streams. Will stop the associated JavaSparkContext as well.
    */
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
index 213dff6a76354..7053f47ec69a2 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
@@ -33,6 +33,7 @@ import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.{Interval, Duration, Time}
 import org.apache.spark.streaming.dstream._
 import org.apache.spark.streaming.api.java._
+import org.apache.spark.util.Utils
 
 
 /**
@@ -73,13 +74,13 @@ private[python] class TransformFunction(@transient var pfunc: PythonTransformFun
     pfunc.call(time.milliseconds, rdds)
   }
 
-  private def writeObject(out: ObjectOutputStream): Unit = {
+  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
     val bytes = PythonTransformFunctionSerializer.serialize(pfunc)
     out.writeInt(bytes.length)
     out.write(bytes)
   }
 
-  private def readObject(in: ObjectInputStream): Unit = {
+  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
     val length = in.readInt()
     val bytes = new Array[Byte](length)
     in.readFully(bytes)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index 65f7ccd318684..b874f561c12eb 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -20,18 +20,18 @@ package org.apache.spark.streaming.dstream
 
 import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
 
-import scala.deprecated
 import scala.collection.mutable.HashMap
+import scala.language.implicitConversions
 import scala.reflect.ClassTag
 import scala.util.matching.Regex
 
 import org.apache.spark.{Logging, SparkException}
-import org.apache.spark.rdd.{BlockRDD, RDD}
+import org.apache.spark.rdd.{BlockRDD, PairRDDFunctions, RDD}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming._
-import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.streaming.StreamingContext.rddToFileName
 import org.apache.spark.streaming.scheduler.Job
-import org.apache.spark.util.{CallSite, MetadataCleaner}
+import org.apache.spark.util.{CallSite, MetadataCleaner, Utils}
 
 /**
  * A Discretized Stream (DStream), the basic abstraction in Spark Streaming, is a continuous
@@ -48,8 +48,7 @@ import org.apache.spark.util.{CallSite, MetadataCleaner}
  * `window`. In addition, [[org.apache.spark.streaming.dstream.PairDStreamFunctions]] contains
  * operations available only on DStreams of key-value pairs, such as `groupByKeyAndWindow` and
  * `join`. These operations are automatically available on any DStream of pairs
- * (e.g., DStream[(Int, Int)] through implicit conversions when
- * `org.apache.spark.streaming.StreamingContext._` is imported.
+ * (e.g., DStream[(Int, Int)] through implicit conversions.
  *
  * DStreams internally is characterized by a few basic properties:
  *  - A list of other DStreams that the DStream depends on
@@ -254,7 +253,7 @@ abstract class DStream[T: ClassTag] (
   }
 
   private[streaming] def remember(duration: Duration) {
-    if (duration != null && duration > rememberDuration) {
+    if (duration != null && (rememberDuration == null || duration > rememberDuration)) {
       rememberDuration = duration
       logInfo("Duration for remembering RDDs set to " + rememberDuration + " for " + this)
     }
@@ -293,7 +292,13 @@ abstract class DStream[T: ClassTag] (
         // set this DStream's creation site, generate RDDs and then restore the previous call site.
         val prevCallSite = ssc.sparkContext.getCallSite()
         ssc.sparkContext.setCallSite(creationSite)
-        val rddOption = compute(time)
+        // Disable checks for existing output directories in jobs launched by the streaming
+        // scheduler, since we may need to write output to an existing directory during checkpoint
+        // recovery; see SPARK-4835 for more details. We need to have this call here because
+        // compute() might cause Spark jobs to be launched.
+        val rddOption = PairRDDFunctions.disableOutputSpecValidation.withValue(true) {
+          compute(time)
+        }
         ssc.sparkContext.setCallSite(prevCallSite)
 
         rddOption.foreach { case newRDD =>
@@ -400,7 +405,7 @@ abstract class DStream[T: ClassTag] (
   }
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     logDebug(this.getClass().getSimpleName + ".writeObject used")
     if (graph != null) {
       graph.synchronized {
@@ -423,7 +428,7 @@ abstract class DStream[T: ClassTag] (
   }
 
   @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream) {
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
     logDebug(this.getClass().getSimpleName + ".readObject used")
     ois.defaultReadObject()
     generatedRDDs = new HashMap[Time, RDD[T]] ()
@@ -606,13 +611,21 @@ abstract class DStream[T: ClassTag] (
    * operator, so this DStream will be registered as an output stream and there materialized.
    */
   def print() {
+    print(10)
+  }
+
+  /**
+   * Print the first num elements of each RDD generated in this DStream. This is an output
+   * operator, so this DStream will be registered as an output stream and there materialized.
+   */
+  def print(num: Int) {
     def foreachFunc = (rdd: RDD[T], time: Time) => {
-      val first11 = rdd.take(11)
+      val firstNum = rdd.take(num + 1)
       println ("-------------------------------------------")
       println ("Time: " + time)
       println ("-------------------------------------------")
-      first11.take(10).foreach(println)
-      if (first11.size > 10) println("...")
+      firstNum.take(num).foreach(println)
+      if (firstNum.size > num) println("...")
       println()
     }
     new ForEachDStream(this, context.sparkContext.clean(foreachFunc)).register()
@@ -802,10 +815,21 @@ abstract class DStream[T: ClassTag] (
   }
 }
 
-private[streaming] object DStream {
+object DStream {
+
+  // `toPairDStreamFunctions` was in SparkContext before 1.3 and users had to
+  // `import StreamingContext._` to enable it. Now we move it here to make the compiler find
+  // it automatically. However, we still keep the old function in StreamingContext for backward
+  // compatibility and forward to the following function directly.
+
+  implicit def toPairDStreamFunctions[K, V](stream: DStream[(K, V)])
+      (implicit kt: ClassTag[K], vt: ClassTag[V], ord: Ordering[K] = null):
+    PairDStreamFunctions[K, V] = {
+    new PairDStreamFunctions[K, V](stream)
+  }
 
   /** Get the creation site of a DStream from the stack trace of when the DStream is created. */
-  def getCreationSite(): CallSite = {
+  private[streaming] def getCreationSite(): CallSite = {
     val SPARK_CLASS_REGEX = """^org\.apache\.spark""".r
     val SPARK_STREAMING_TESTCLASS_REGEX = """^org\.apache\.spark\.streaming\.test""".r
     val SPARK_EXAMPLES_CLASS_REGEX = """^org\.apache\.spark\.examples""".r
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
index f33c0ceafdf42..0dc72790fbdbd 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
@@ -24,6 +24,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.hadoop.fs.FileSystem
 import org.apache.spark.Logging
 import org.apache.spark.streaming.Time
+import org.apache.spark.util.Utils
 
 private[streaming]
 class DStreamCheckpointData[T: ClassTag] (dstream: DStream[T])
@@ -119,7 +120,7 @@ class DStreamCheckpointData[T: ClassTag] (dstream: DStream[T])
   }
 
   @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
+  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
     logDebug(this.getClass().getSimpleName + ".writeObject used")
     if (dstream.context.graph != null) {
       dstream.context.graph.synchronized {
@@ -142,7 +143,7 @@ class DStreamCheckpointData[T: ClassTag] (dstream: DStream[T])
   }
 
   @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream) {
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
     logDebug(this.getClass().getSimpleName + ".readObject used")
     ois.defaultReadObject()
     timeToOldestCheckpointFileTime = new HashMap[Time, Time]
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
index 9eecbfaef363f..6379b88527ec8 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
@@ -17,42 +17,102 @@
 
 package org.apache.spark.streaming.dstream
 
-import java.io.{ObjectInputStream, IOException}
-import scala.collection.mutable.{HashSet, HashMap}
+import java.io.{IOException, ObjectInputStream}
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.mutable
 import scala.reflect.ClassTag
-import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
+
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.rdd.UnionRDD
-import org.apache.spark.streaming.{StreamingContext, Time}
-import org.apache.spark.util.TimeStampedHashMap
 
+import org.apache.spark.rdd.{RDD, UnionRDD}
+import org.apache.spark.streaming._
+import org.apache.spark.util.{TimeStampedHashMap, Utils}
 
+/**
+ * This class represents an input stream that monitors a Hadoop-compatible filesystem for new
+ * files and creates a stream out of them. The way it works as follows.
+ *
+ * At each batch interval, the file system is queried for files in the given directory and
+ * detected new files are selected for that batch. In this case "new" means files that
+ * became visible to readers during that time period. Some extra care is needed to deal
+ * with the fact that files may become visible after they are created. For this purpose, this
+ * class remembers the information about the files selected in past batches for
+ * a certain duration (say, "remember window") as shown in the figure below.
+ *
+ *                      |<----- remember window ----->|
+ * ignore threshold --->|                             |<--- current batch time
+ *                      |____.____.____.____.____.____|
+ *                      |    |    |    |    |    |    |
+ * ---------------------|----|----|----|----|----|----|-----------------------> Time
+ *                      |____|____|____|____|____|____|
+ *                             remembered batches
+ *
+ * The trailing end of the window is the "ignore threshold" and all files whose mod times
+ * are less than this threshold are assumed to have already been selected and are therefore
+ * ignored. Files whose mod times are within the "remember window" are checked against files
+ * that have already been selected. At a high level, this is how new files are identified in
+ * each batch - files whose mod times are greater than the ignore threshold and
+ * have not been considered within the remember window. See the documentation on the method
+ * `isNewFile` for more details.
+ *
+ * This makes some assumptions from the underlying file system that the system is monitoring.
+ * - The clock of the file system is assumed to synchronized with the clock of the machine running
+ *   the streaming app.
+ * - If a file is to be visible in the directory listings, it must be visible within a certain
+ *   duration of the mod time of the file. This duration is the "remember window", which is set to
+ *   1 minute (see `FileInputDStream.MIN_REMEMBER_DURATION`). Otherwise, the file will never be
+ *   selected as the mod time will be less than the ignore threshold when it becomes visible.
+ * - Once a file is visible, the mod time cannot change. If it does due to appends, then the
+ *   processing semantics are undefined.
+ */
 private[streaming]
-class FileInputDStream[K: ClassTag, V: ClassTag, F <: NewInputFormat[K,V] : ClassTag](
+class FileInputDStream[K, V, F <: NewInputFormat[K,V]](
     @transient ssc_ : StreamingContext,
     directory: String,
     filter: Path => Boolean = FileInputDStream.defaultFilter,
-    newFilesOnly: Boolean = true)
+    newFilesOnly: Boolean = true,
+    conf: Option[Configuration] = None)
+    (implicit km: ClassTag[K], vm: ClassTag[V], fm: ClassTag[F])
   extends InputDStream[(K, V)](ssc_) {
 
+  // This is a def so that it works during checkpoint recovery:
+  private def clock = ssc.scheduler.clock
+
+  // Data to be saved as part of the streaming checkpoints
   protected[streaming] override val checkpointData = new FileInputDStreamCheckpointData
 
-  // files found in the last interval
-  private val lastFoundFiles = new HashSet[String]
+  // Initial ignore threshold based on which old, existing files in the directory (at the time of
+  // starting the streaming application) will be ignored or considered
+  private val initialModTimeIgnoreThreshold = if (newFilesOnly) clock.currentTime() else 0L
+
+  /*
+   * Make sure that the information of files selected in the last few batches are remembered.
+   * This would allow us to filter away not-too-old files which have already been recently
+   * selected and processed.
+   */
+  private val numBatchesToRemember = FileInputDStream.calculateNumBatchesToRemember(slideDuration)
+  private val durationToRemember = slideDuration * numBatchesToRemember
+  remember(durationToRemember)
+
+  // Map of batch-time to selected file info for the remembered batches
+  // This is a concurrent map because it's also accessed in unit tests
+  @transient private[streaming] var batchTimeToSelectedFiles =
+    new mutable.HashMap[Time, Array[String]] with mutable.SynchronizedMap[Time, Array[String]]
+
+  // Set of files that were selected in the remembered batches
+  @transient private var recentlySelectedFiles = new mutable.HashSet[String]()
 
-  // Files with mod time earlier than this is ignored. This is updated every interval
-  // such that in the current interval, files older than any file found in the
-  // previous interval will be ignored. Obviously this time keeps moving forward.
-  private var ignoreTime = if (newFilesOnly) System.currentTimeMillis() else 0L
+  // Read-through cache of file mod times, used to speed up mod time lookups
+  @transient private var fileToModTime = new TimeStampedHashMap[String, Long](true)
+
+  // Timestamp of the last round of finding files
+  @transient private var lastNewFileFindingTime = 0L
 
-  // Latest file mod time seen till any point of time
   @transient private var path_ : Path = null
   @transient private var fs_ : FileSystem = null
-  @transient private[streaming] var files = new HashMap[Time, Array[String]]
-  @transient private var fileModTimes = new TimeStampedHashMap[String, Long](true)
-  @transient private var lastNewFileFindingTime = 0L
 
   override def start() { }
 
@@ -68,95 +128,165 @@ class FileInputDStream[K: ClassTag, V: ClassTag, F <: NewInputFormat[K,V] : Clas
    * the previous call.
    */
   override def compute(validTime: Time): Option[RDD[(K, V)]] = {
-    assert(validTime.milliseconds >= ignoreTime,
-      "Trying to get new files for a really old time [" + validTime + " < " + ignoreTime + "]")
-
     // Find new files
-    val (newFiles, minNewFileModTime) = findNewFiles(validTime.milliseconds)
+    val newFiles = findNewFiles(validTime.milliseconds)
     logInfo("New files at time " + validTime + ":\n" + newFiles.mkString("\n"))
-    if (!newFiles.isEmpty) {
-      lastFoundFiles.clear()
-      lastFoundFiles ++= newFiles
-      ignoreTime = minNewFileModTime
-    }
-    files += ((validTime, newFiles.toArray))
+    batchTimeToSelectedFiles += ((validTime, newFiles))
+    recentlySelectedFiles ++= newFiles
     Some(filesToRDD(newFiles))
   }
 
   /** Clear the old time-to-files mappings along with old RDDs */
   protected[streaming] override def clearMetadata(time: Time) {
     super.clearMetadata(time)
-    val oldFiles = files.filter(_._1 < (time - rememberDuration))
-    files --= oldFiles.keys
+    val oldFiles = batchTimeToSelectedFiles.filter(_._1 < (time - rememberDuration))
+    batchTimeToSelectedFiles --= oldFiles.keys
+    recentlySelectedFiles --= oldFiles.values.flatten
     logInfo("Cleared " + oldFiles.size + " old files that were older than " +
       (time - rememberDuration) + ": " + oldFiles.keys.mkString(", "))
     logDebug("Cleared files are:\n" +
       oldFiles.map(p => (p._1, p._2.mkString(", "))).mkString("\n"))
     // Delete file mod times that weren't accessed in the last round of getting new files
-    fileModTimes.clearOldValues(lastNewFileFindingTime - 1)
+    fileToModTime.clearOldValues(lastNewFileFindingTime - 1)
   }
 
   /**
-   * Find files which have modification timestamp <= current time and return a 3-tuple of
-   * (new files found, latest modification time among them, files with latest modification time)
+   * Find new files for the batch of `currentTime`. This is done by first calculating the
+   * ignore threshold for file mod times, and then getting a list of files filtered based on
+   * the current batch time and the ignore threshold. The ignore threshold is the max of
+   * initial ignore threshold and the trailing end of the remember window (that is, which ever
+   * is later in time).
    */
-  private def findNewFiles(currentTime: Long): (Seq[String], Long) = {
-    logDebug("Trying to get new files for time " + currentTime)
-    lastNewFileFindingTime = System.currentTimeMillis
-    val filter = new CustomPathFilter(currentTime)
-    val newFiles = fs.listStatus(directoryPath, filter).map(_.getPath.toString)
-    val timeTaken = System.currentTimeMillis - lastNewFileFindingTime
-    logInfo("Finding new files took " + timeTaken + " ms")
-    logDebug("# cached file times = " + fileModTimes.size)
-    if (timeTaken > slideDuration.milliseconds) {
-      logWarning(
-        "Time taken to find new files exceeds the batch size. " +
-          "Consider increasing the batch size or reduceing the number of " +
-          "files in the monitored directory."
+  private def findNewFiles(currentTime: Long): Array[String] = {
+    try {
+      lastNewFileFindingTime = clock.currentTime()
+
+      // Calculate ignore threshold
+      val modTimeIgnoreThreshold = math.max(
+        initialModTimeIgnoreThreshold,   // initial threshold based on newFilesOnly setting
+        currentTime - durationToRemember.milliseconds  // trailing end of the remember window
       )
+      logDebug(s"Getting new files for time $currentTime, " +
+        s"ignoring files older than $modTimeIgnoreThreshold")
+      val filter = new PathFilter {
+        def accept(path: Path): Boolean = isNewFile(path, currentTime, modTimeIgnoreThreshold)
+      }
+      val newFiles = fs.listStatus(directoryPath, filter).map(_.getPath.toString)
+      val timeTaken = clock.currentTime() - lastNewFileFindingTime
+      logInfo("Finding new files took " + timeTaken + " ms")
+      logDebug("# cached file times = " + fileToModTime.size)
+      if (timeTaken > slideDuration.milliseconds) {
+        logWarning(
+          "Time taken to find new files exceeds the batch size. " +
+            "Consider increasing the batch size or reducing the number of " +
+            "files in the monitored directory."
+        )
+      }
+      newFiles
+    } catch {
+      case e: Exception =>
+        logWarning("Error finding new files", e)
+        reset()
+        Array.empty
+    }
+  }
+
+  /**
+   * Identify whether the given `path` is a new file for the batch of `currentTime`. For it to be
+   * accepted, it has to pass the following criteria.
+   * - It must pass the user-provided file filter.
+   * - It must be newer than the ignore threshold. It is assumed that files older than the ignore
+   *   threshold have already been considered or are existing files before start
+   *   (when newFileOnly = true).
+   * - It must not be present in the recently selected files that this class remembers.
+   * - It must not be newer than the time of the batch (i.e. `currentTime` for which this
+   *   file is being tested. This can occur if the driver was recovered, and the missing batches
+   *   (during downtime) are being generated. In that case, a batch of time T may be generated
+   *   at time T+x. Say x = 5. If that batch T contains file of mod time T+5, then bad things can
+   *   happen. Let's say the selected files are remembered for 60 seconds.  At time t+61,
+   *   the batch of time t is forgotten, and the ignore threshold is still T+1.
+   *   The files with mod time T+5 are not remembered and cannot be ignored (since, t+5 > t+1).
+   *   Hence they can get selected as new files again. To prevent this, files whose mod time is more
+   *   than current batch time are not considered.
+   */
+  private def isNewFile(path: Path, currentTime: Long, modTimeIgnoreThreshold: Long): Boolean = {
+    val pathStr = path.toString
+    // Reject file if it does not satisfy filter
+    if (!filter(path)) {
+      logDebug(s"$pathStr rejected by filter")
+      return false
+    }
+    // Reject file if it was created before the ignore time
+    val modTime = getFileModTime(path)
+    if (modTime <= modTimeIgnoreThreshold) {
+      // Use <= instead of < to avoid SPARK-4518
+      logDebug(s"$pathStr ignored as mod time $modTime <= ignore time $modTimeIgnoreThreshold")
+      return false
     }
-    (newFiles, filter.minNewFileModTime)
+    // Reject file if mod time > current batch time
+    if (modTime > currentTime) {
+      logDebug(s"$pathStr not selected as mod time $modTime > current time $currentTime")
+      return false
+    }
+    // Reject file if it was considered earlier
+    if (recentlySelectedFiles.contains(pathStr)) {
+      logDebug(s"$pathStr already considered")
+      return false
+    }
+    logDebug(s"$pathStr accepted with mod time $modTime")
+    return true
   }
 
   /** Generate one RDD from an array of files */
   private def filesToRDD(files: Seq[String]): RDD[(K, V)] = {
-    val fileRDDs = files.map(file => context.sparkContext.newAPIHadoopFile[K, V, F](file))
-    files.zip(fileRDDs).foreach { case (file, rdd) => {
+    val fileRDDs = files.map(file =>{
+      val rdd = conf match {
+        case Some(config) => context.sparkContext.newAPIHadoopFile(
+          file,
+          fm.runtimeClass.asInstanceOf[Class[F]],
+          km.runtimeClass.asInstanceOf[Class[K]],
+          vm.runtimeClass.asInstanceOf[Class[V]],
+          config)
+        case None => context.sparkContext.newAPIHadoopFile[K, V, F](file)
+      }
       if (rdd.partitions.size == 0) {
         logError("File " + file + " has no data in it. Spark Streaming can only ingest " +
           "files that have been \"moved\" to the directory assigned to the file stream. " +
           "Refer to the streaming programming guide for more details.")
       }
-    }}
+      rdd
+    })
     new UnionRDD(context.sparkContext, fileRDDs)
   }
 
+  /** Get file mod time from cache or fetch it from the file system */
+  private def getFileModTime(path: Path) = {
+    fileToModTime.getOrElseUpdate(path.toString, fs.getFileStatus(path).getModificationTime())
+  }
+
   private def directoryPath: Path = {
     if (path_ == null) path_ = new Path(directory)
     path_
   }
 
   private def fs: FileSystem = {
-    if (fs_ == null) fs_ = directoryPath.getFileSystem(new Configuration())
+    if (fs_ == null) fs_ = directoryPath.getFileSystem(ssc.sparkContext.hadoopConfiguration)
     fs_
   }
 
-  private def getFileModTime(path: Path) = {
-    // Get file mod time from cache or fetch it from the file system
-    fileModTimes.getOrElseUpdate(path.toString, fs.getFileStatus(path).getModificationTime())
-  }
-
   private def reset()  {
     fs_ = null
   }
 
   @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream) {
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
     logDebug(this.getClass().getSimpleName + ".readObject used")
     ois.defaultReadObject()
-    generatedRDDs = new HashMap[Time, RDD[(K,V)]] ()
-    files = new HashMap[Time, Array[String]]
-    fileModTimes = new TimeStampedHashMap[String, Long](true)
+    generatedRDDs = new mutable.HashMap[Time, RDD[(K,V)]] ()
+    batchTimeToSelectedFiles =
+      new mutable.HashMap[Time, Array[String]] with mutable.SynchronizedMap[Time, Array[String]]
+    recentlySelectedFiles = new mutable.HashSet[String]()
+    fileToModTime = new TimeStampedHashMap[String, Long](true)
   }
 
   /**
@@ -166,11 +296,11 @@ class FileInputDStream[K: ClassTag, V: ClassTag, F <: NewInputFormat[K,V] : Clas
   private[streaming]
   class FileInputDStreamCheckpointData extends DStreamCheckpointData(this) {
 
-    def hadoopFiles = data.asInstanceOf[HashMap[Time, Array[String]]]
+    def hadoopFiles = data.asInstanceOf[mutable.HashMap[Time, Array[String]]]
 
     override def update(time: Time) {
       hadoopFiles.clear()
-      hadoopFiles ++= files
+      hadoopFiles ++= batchTimeToSelectedFiles
     }
 
     override def cleanup(time: Time) { }
@@ -181,7 +311,8 @@ class FileInputDStream[K: ClassTag, V: ClassTag, F <: NewInputFormat[K,V] : Clas
           // Restore the metadata in both files and generatedRDDs
           logInfo("Restoring files for time " + t + " - " +
             f.mkString("[", ", ", "]") )
-          files += ((t, f))
+          batchTimeToSelectedFiles += ((t, f))
+          recentlySelectedFiles ++= f
           generatedRDDs += ((t, filesToRDD(f)))
         }
       }
@@ -192,57 +323,25 @@ class FileInputDStream[K: ClassTag, V: ClassTag, F <: NewInputFormat[K,V] : Clas
         hadoopFiles.map(p => (p._1, p._2.mkString(", "))).mkString("\n") + "\n]"
     }
   }
+}
+
+private[streaming]
+object FileInputDStream {
 
   /**
-   * Custom PathFilter class to find new files that
-   * ... have modification time more than ignore time
-   * ... have not been seen in the last interval
-   * ... have modification time less than maxModTime
+   * Minimum duration of remembering the information of selected files. Files with mod times
+   * older than this "window" of remembering will be ignored. So if new files are visible
+   * within this window, then the file will get selected in the next batch.
    */
-  private[streaming]
-  class CustomPathFilter(maxModTime: Long) extends PathFilter {
+  private val MIN_REMEMBER_DURATION = Minutes(1)
 
-    // Minimum of the mod times of new files found in the current interval
-    var minNewFileModTime = -1L
+  def defaultFilter(path: Path): Boolean = !path.getName().startsWith(".")
 
-    def accept(path: Path): Boolean = {
-      try {
-        if (!filter(path)) {  // Reject file if it does not satisfy filter
-          logDebug("Rejected by filter " + path)
-          return false
-        }
-        // Reject file if it was found in the last interval
-        if (lastFoundFiles.contains(path.toString)) {
-          logDebug("Mod time equal to last mod time, but file considered already")
-          return false
-        }
-        val modTime = getFileModTime(path)
-        logDebug("Mod time for " + path + " is " + modTime)
-        if (modTime < ignoreTime) {
-          // Reject file if it was created before the ignore time (or, before last interval)
-          logDebug("Mod time " + modTime + " less than ignore time " + ignoreTime)
-          return false
-        } else if (modTime > maxModTime) {
-          // Reject file if it is too new that considering it may give errors
-          logDebug("Mod time more than ")
-          return false
-        }
-        if (minNewFileModTime < 0 || modTime < minNewFileModTime) {
-          minNewFileModTime = modTime
-        }
-        logDebug("Accepted " + path)
-      } catch {
-        case fnfe: java.io.FileNotFoundException =>
-          logWarning("Error finding new files", fnfe)
-          reset()
-          return false
-      }
-      true
-    }
+  /**
+   * Calculate the number of last batches to remember, such that all the files selected in
+   * at least last MIN_REMEMBER_DURATION duration can be remembered.
+   */
+  def calculateNumBatchesToRemember(batchDuration: Duration): Int = {
+    math.ceil(MIN_REMEMBER_DURATION.milliseconds.toDouble / batchDuration.milliseconds).toInt
   }
 }
-
-private[streaming]
-object FileInputDStream {
-  def defaultFilter(path: Path): Boolean = !path.getName().startsWith(".")
-}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala
index 905bc723f69a9..1361c30395b57 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala
@@ -38,6 +38,7 @@ class ForEachDStream[T: ClassTag] (
     parent.getOrCompute(time) match {
       case Some(rdd) =>
         val jobFunc = () => {
+          ssc.sparkContext.setCallSite(creationSite)
           foreachFunc(rdd, time)
         }
         Some(new Job(time, jobFunc))
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
index 9467595d307a2..8a58571632447 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
@@ -17,25 +17,20 @@
 
 package org.apache.spark.streaming.dstream
 
-import org.apache.spark.streaming.StreamingContext._
-
-import org.apache.spark.{Partitioner, HashPartitioner}
-import org.apache.spark.SparkContext._
-import org.apache.spark.rdd.RDD
-
 import scala.collection.mutable.ArrayBuffer
 import scala.reflect.ClassTag
 
-import org.apache.hadoop.mapred.JobConf
-import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat}
-import org.apache.hadoop.mapred.OutputFormat
 import org.apache.hadoop.conf.Configuration
-import org.apache.spark.streaming.{Time, Duration}
+import org.apache.hadoop.mapred.{JobConf, OutputFormat}
+import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat}
+
+import org.apache.spark.{HashPartitioner, Partitioner, SerializableWritable}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.{Duration, Time}
+import org.apache.spark.streaming.StreamingContext.rddToFileName
 
 /**
  * Extra functions available on DStream of (key, value) pairs through an implicit conversion.
- * Import `org.apache.spark.streaming.StreamingContext._` at the top of your program to use
- * these functions.
  */
 class PairDStreamFunctions[K, V](self: DStream[(K,V)])
     (implicit kt: ClassTag[K], vt: ClassTag[V], ord: Ordering[K])
@@ -398,10 +393,9 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * Return a new "state" DStream where the state for each key is updated by applying
    * the given function on the previous state of the key and the new values of each key.
    * org.apache.spark.Partitioner is used to control the partitioning of each RDD.
-   * @param updateFunc State update function. If `this` function returns None, then
-   *                   corresponding state key-value pair will be eliminated. Note, that
-   *                   this function may generate a different a tuple with a different key
-   *                   than the input key. It is up to the developer to decide whether to
+   * @param updateFunc State update function. Note, that this function may generate a different
+   *                   tuple with a different key than the input key. Therefore keys may be removed
+   *                   or added in this way. It is up to the developer to decide whether to
    *                   remember the partitioner despite the key being changed.
    * @param partitioner Partitioner for controlling the partitioning of each RDD in the new
    *                    DStream
@@ -413,7 +407,53 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       partitioner: Partitioner,
       rememberPartitioner: Boolean
     ): DStream[(K, S)] = {
-     new StateDStream(self, ssc.sc.clean(updateFunc), partitioner, rememberPartitioner)
+     new StateDStream(self, ssc.sc.clean(updateFunc), partitioner, rememberPartitioner, None)
+  }
+
+  /**
+   * Return a new "state" DStream where the state for each key is updated by applying
+   * the given function on the previous state of the key and the new values of the key.
+   * org.apache.spark.Partitioner is used to control the partitioning of each RDD.
+   * @param updateFunc State update function. If `this` function returns None, then
+   *                   corresponding state key-value pair will be eliminated.
+   * @param partitioner Partitioner for controlling the partitioning of each RDD in the new
+   *                    DStream.
+   * @param initialRDD initial state value of each key.
+   * @tparam S State type
+   */
+  def updateStateByKey[S: ClassTag](
+      updateFunc: (Seq[V], Option[S]) => Option[S],
+      partitioner: Partitioner,
+      initialRDD: RDD[(K, S)]
+    ): DStream[(K, S)] = {
+    val newUpdateFunc = (iterator: Iterator[(K, Seq[V], Option[S])]) => {
+      iterator.flatMap(t => updateFunc(t._2, t._3).map(s => (t._1, s)))
+    }
+    updateStateByKey(newUpdateFunc, partitioner, true, initialRDD)
+  }
+
+  /**
+   * Return a new "state" DStream where the state for each key is updated by applying
+   * the given function on the previous state of the key and the new values of each key.
+   * org.apache.spark.Partitioner is used to control the partitioning of each RDD.
+   * @param updateFunc State update function. Note, that this function may generate a different
+   *                   tuple with a different key than the input key. Therefore keys may be removed
+   *                   or added in this way. It is up to the developer to decide whether to
+   *                   remember the  partitioner despite the key being changed.
+   * @param partitioner Partitioner for controlling the partitioning of each RDD in the new
+   *                    DStream
+   * @param rememberPartitioner Whether to remember the paritioner object in the generated RDDs.
+   * @param initialRDD initial state value of each key.
+   * @tparam S State type
+   */
+  def updateStateByKey[S: ClassTag](
+      updateFunc: (Iterator[(K, Seq[V], Option[S])]) => Iterator[(K, S)],
+      partitioner: Partitioner,
+      rememberPartitioner: Boolean,
+      initialRDD: RDD[(K, S)]
+    ): DStream[(K, S)] = {
+     new StateDStream(self, ssc.sc.clean(updateFunc), partitioner,
+       rememberPartitioner, Some(initialRDD))
   }
 
   /**
@@ -626,11 +666,13 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       keyClass: Class[_],
       valueClass: Class[_],
       outputFormatClass: Class[_ <: OutputFormat[_, _]],
-      conf: JobConf = new JobConf
+      conf: JobConf = new JobConf(ssc.sparkContext.hadoopConfiguration)
     ) {
+    // Wrap conf in SerializableWritable so that ForeachDStream can be serialized for checkpoints
+    val serializableConf = new SerializableWritable(conf)
     val saveFunc = (rdd: RDD[(K, V)], time: Time) => {
       val file = rddToFileName(prefix, suffix, time)
-      rdd.saveAsHadoopFile(file, keyClass, valueClass, outputFormatClass, conf)
+      rdd.saveAsHadoopFile(file, keyClass, valueClass, outputFormatClass, serializableConf.value)
     }
     self.foreachRDD(saveFunc)
   }
@@ -657,11 +699,14 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       keyClass: Class[_],
       valueClass: Class[_],
       outputFormatClass: Class[_ <: NewOutputFormat[_, _]],
-      conf: Configuration = new Configuration
+      conf: Configuration = ssc.sparkContext.hadoopConfiguration
     ) {
+    // Wrap conf in SerializableWritable so that ForeachDStream can be serialized for checkpoints
+    val serializableConf = new SerializableWritable(conf)
     val saveFunc = (rdd: RDD[(K, V)], time: Time) => {
       val file = rddToFileName(prefix, suffix, time)
-      rdd.saveAsNewAPIHadoopFile(file, keyClass, valueClass, outputFormatClass, conf)
+      rdd.saveAsNewAPIHadoopFile(
+        file, keyClass, valueClass, outputFormatClass, serializableConf.value)
     }
     self.foreachRDD(saveFunc)
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala
index 391e40924f38a..8be04314c4285 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala
@@ -17,19 +17,19 @@
 
 package org.apache.spark.streaming.dstream
 
-import scala.collection.mutable.HashMap
 import scala.reflect.ClassTag
 
 import org.apache.spark.rdd.{BlockRDD, RDD}
-import org.apache.spark.storage.BlockId
+import org.apache.spark.storage.{BlockId, StorageLevel}
 import org.apache.spark.streaming._
-import org.apache.spark.streaming.receiver.Receiver
+import org.apache.spark.streaming.rdd.WriteAheadLogBackedBlockRDD
+import org.apache.spark.streaming.receiver.{Receiver, WriteAheadLogBasedStoreResult}
 import org.apache.spark.streaming.scheduler.ReceivedBlockInfo
 
 /**
  * Abstract class for defining any [[org.apache.spark.streaming.dstream.InputDStream]]
  * that has to start a receiver on worker nodes to receive external data.
- * Specific implementations of NetworkInputDStream must
+ * Specific implementations of ReceiverInputDStream must
  * define `the getReceiver()` function that gets the receiver object of type
  * [[org.apache.spark.streaming.receiver.Receiver]] that will be sent
  * to the workers to receive data.
@@ -39,56 +39,59 @@ import org.apache.spark.streaming.scheduler.ReceivedBlockInfo
 abstract class ReceiverInputDStream[T: ClassTag](@transient ssc_ : StreamingContext)
   extends InputDStream[T](ssc_) {
 
-  /** Keeps all received blocks information */
-  private lazy val receivedBlockInfo = new HashMap[Time, Array[ReceivedBlockInfo]]
-
-  /** This is an unique identifier for the network input stream. */
+  /** This is an unique identifier for the receiver input stream. */
   val id = ssc.getNewReceiverStreamId()
 
   /**
    * Gets the receiver object that will be sent to the worker nodes
    * to receive data. This method needs to defined by any specific implementation
-   * of a NetworkInputDStream.
+   * of a ReceiverInputDStream.
    */
   def getReceiver(): Receiver[T]
 
-  // Nothing to start or stop as both taken care of by the ReceiverInputTracker.
+  // Nothing to start or stop as both taken care of by the ReceiverTracker.
   def start() {}
 
   def stop() {}
 
-  /** Ask ReceiverInputTracker for received data blocks and generates RDDs with them. */
+  /**
+   * Generates RDDs with blocks received by the receiver of this stream. */
   override def compute(validTime: Time): Option[RDD[T]] = {
-    // If this is called for any time before the start time of the context,
-    // then this returns an empty RDD. This may happen when recovering from a
-    // master failure
-    if (validTime >= graph.startTime) {
-      val blockInfo = ssc.scheduler.receiverTracker.getReceivedBlockInfo(id)
-      receivedBlockInfo(validTime) = blockInfo
-      val blockIds = blockInfo.map(_.blockId.asInstanceOf[BlockId])
-      Some(new BlockRDD[T](ssc.sc, blockIds))
-    } else {
-      Some(new BlockRDD[T](ssc.sc, Array[BlockId]()))
-    }
-  }
+    val blockRDD = {
 
-  /** Get information on received blocks. */
-  private[streaming] def getReceivedBlockInfo(time: Time) = {
-    receivedBlockInfo.get(time).getOrElse(Array.empty[ReceivedBlockInfo])
-  }
+      if (validTime < graph.startTime) {
+        // If this is called for any time before the start time of the context,
+        // then this returns an empty RDD. This may happen when recovering from a
+        // driver failure without any write ahead log to recover pre-failure data.
+        new BlockRDD[T](ssc.sc, Array.empty)
+      } else {
+        // Otherwise, ask the tracker for all the blocks that have been allocated to this stream
+        // for this batch
+        val blockInfos =
+          ssc.scheduler.receiverTracker.getBlocksOfBatch(validTime).get(id).getOrElse(Seq.empty)
+        val blockStoreResults = blockInfos.map { _.blockStoreResult }
+        val blockIds = blockStoreResults.map { _.blockId.asInstanceOf[BlockId] }.toArray
 
-  /**
-   * Clear metadata that are older than `rememberDuration` of this DStream.
-   * This is an internal method that should not be called directly. This
-   * implementation overrides the default implementation to clear received
-   * block information.
-   */
-  private[streaming] override def clearMetadata(time: Time) {
-    super.clearMetadata(time)
-    val oldReceivedBlocks = receivedBlockInfo.filter(_._1 <= (time - rememberDuration))
-    receivedBlockInfo --= oldReceivedBlocks.keys
-    logDebug("Cleared " + oldReceivedBlocks.size + " RDDs that were older than " +
-      (time - rememberDuration) + ": " + oldReceivedBlocks.keys.mkString(", "))
+        // Check whether all the results are of the same type
+        val resultTypes = blockStoreResults.map { _.getClass }.distinct
+        if (resultTypes.size > 1) {
+          logWarning("Multiple result types in block information, WAL information will be ignored.")
+        }
+
+        // If all the results are of type WriteAheadLogBasedStoreResult, then create
+        // WriteAheadLogBackedBlockRDD else create simple BlockRDD.
+        if (resultTypes.size == 1 && resultTypes.head == classOf[WriteAheadLogBasedStoreResult]) {
+          val logSegments = blockStoreResults.map {
+            _.asInstanceOf[WriteAheadLogBasedStoreResult].segment
+          }.toArray
+          // Since storeInBlockManager = false, the storage level does not matter.
+          new WriteAheadLogBackedBlockRDD[T](ssc.sparkContext,
+            blockIds, logSegments, storeInBlockManager = false, StorageLevel.MEMORY_ONLY_SER)
+        } else {
+          new BlockRDD[T](ssc.sc, blockIds)
+        }
+      }
+    }
+    Some(blockRDD)
   }
 }
-
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala
index 1a47089e513c4..c0a5af0b65cc3 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.streaming.dstream
 
-import org.apache.spark.streaming.StreamingContext._
-
 import org.apache.spark.rdd.RDD
 import org.apache.spark.rdd.{CoGroupedRDD, MapPartitionsRDD}
 import org.apache.spark.Partitioner
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
index 7e22268767de7..ebb04dd35b9a2 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
@@ -30,7 +30,8 @@ class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag](
     parent: DStream[(K, V)],
     updateFunc: (Iterator[(K, Seq[V], Option[S])]) => Iterator[(K, S)],
     partitioner: Partitioner,
-    preservePartitioning: Boolean
+    preservePartitioning: Boolean,
+    initialRDD : Option[RDD[(K, S)]]
   ) extends DStream[(K, S)](parent.ssc) {
 
   super.persist(StorageLevel.MEMORY_ONLY_SER)
@@ -41,6 +42,25 @@ class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag](
 
   override val mustCheckpoint = true
 
+  private [this] def computeUsingPreviousRDD (
+    parentRDD : RDD[(K, V)], prevStateRDD : RDD[(K, S)]) = {
+    // Define the function for the mapPartition operation on cogrouped RDD;
+    // first map the cogrouped tuple to tuples of required type,
+    // and then apply the update function
+    val updateFuncLocal = updateFunc
+    val finalFunc = (iterator: Iterator[(K, (Iterable[V], Iterable[S]))]) => {
+      val i = iterator.map(t => {
+        val itr = t._2._2.iterator
+        val headOption = if(itr.hasNext) Some(itr.next) else None
+        (t._1, t._2._1.toSeq, headOption)
+      })
+      updateFuncLocal(i)
+    }
+    val cogroupedRDD = parentRDD.cogroup(prevStateRDD, partitioner)
+    val stateRDD = cogroupedRDD.mapPartitions(finalFunc, preservePartitioning)
+    Some(stateRDD)
+  }
+
   override def compute(validTime: Time): Option[RDD[(K, S)]] = {
 
     // Try to get the previous state RDD
@@ -51,25 +71,7 @@ class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag](
         // Try to get the parent RDD
         parent.getOrCompute(validTime) match {
           case Some(parentRDD) => {   // If parent RDD exists, then compute as usual
-
-            // Define the function for the mapPartition operation on cogrouped RDD;
-            // first map the cogrouped tuple to tuples of required type,
-            // and then apply the update function
-            val updateFuncLocal = updateFunc
-            val finalFunc = (iterator: Iterator[(K, (Iterable[V], Iterable[S]))]) => {
-              val i = iterator.map(t => {
-                val itr = t._2._2.iterator
-                val headOption = itr.hasNext match {
-                  case true => Some(itr.next())
-                  case false => None
-                }
-                (t._1, t._2._1.toSeq, headOption)
-              })
-              updateFuncLocal(i)
-            }
-            val cogroupedRDD = parentRDD.cogroup(prevStateRDD, partitioner)
-            val stateRDD = cogroupedRDD.mapPartitions(finalFunc, preservePartitioning)
-            Some(stateRDD)
+            computeUsingPreviousRDD (parentRDD, prevStateRDD)
           }
           case None => {    // If parent RDD does not exist
 
@@ -90,19 +92,25 @@ class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag](
         // Try to get the parent RDD
         parent.getOrCompute(validTime) match {
           case Some(parentRDD) => {   // If parent RDD exists, then compute as usual
+            initialRDD match {
+              case None => {
+                // Define the function for the mapPartition operation on grouped RDD;
+                // first map the grouped tuple to tuples of required type,
+                // and then apply the update function
+                val updateFuncLocal = updateFunc
+                val finalFunc = (iterator : Iterator[(K, Iterable[V])]) => {
+                  updateFuncLocal (iterator.map (tuple => (tuple._1, tuple._2.toSeq, None)))
+                }
 
-            // Define the function for the mapPartition operation on grouped RDD;
-            // first map the grouped tuple to tuples of required type,
-            // and then apply the update function
-            val updateFuncLocal = updateFunc
-            val finalFunc = (iterator: Iterator[(K, Iterable[V])]) => {
-              updateFuncLocal(iterator.map(tuple => (tuple._1, tuple._2.toSeq, None)))
+                val groupedRDD = parentRDD.groupByKey (partitioner)
+                val sessionRDD = groupedRDD.mapPartitions (finalFunc, preservePartitioning)
+                // logDebug("Generating state RDD for time " + validTime + " (first)")
+                Some (sessionRDD)
+              }
+              case Some (initialStateRDD) => {
+                computeUsingPreviousRDD(parentRDD, initialStateRDD)
+              }
             }
-
-            val groupedRDD = parentRDD.groupByKey(partitioner)
-            val sessionRDD = groupedRDD.mapPartitions(finalFunc, preservePartitioning)
-            // logDebug("Generating state RDD for time " + validTime + " (first)")
-            Some(sessionRDD)
           }
           case None => { // If parent RDD does not exist, then nothing to do!
             // logDebug("Not generating state RDD (no previous state, no parent)")
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
index 7cd4554282ca1..71b61856e23c0 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.streaming.dstream
 
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{PairRDDFunctions, RDD}
 import org.apache.spark.streaming.{Duration, Time}
 import scala.reflect.ClassTag
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala
index 57429a15329a1..abbc40befa95b 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala
@@ -28,17 +28,10 @@ private[streaming]
 class UnionDStream[T: ClassTag](parents: Array[DStream[T]])
   extends DStream[T](parents.head.ssc) {
 
-  if (parents.length == 0) {
-    throw new IllegalArgumentException("Empty array of parents")
-  }
-
-  if (parents.map(_.ssc).distinct.size > 1) {
-    throw new IllegalArgumentException("Array of parents have different StreamingContexts")
-  }
-
-  if (parents.map(_.slideDuration).distinct.size > 1) {
-    throw new IllegalArgumentException("Array of parents have different slide times")
-  }
+  require(parents.length > 0, "List of DStreams to union is empty")
+  require(parents.map(_.ssc).distinct.size == 1, "Some of the DStreams have different contexts")
+  require(parents.map(_.slideDuration).distinct.size == 1,
+    "Some of the DStreams have different slide durations")
 
   override def dependencies = parents.toList
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/package.scala b/streaming/src/main/scala/org/apache/spark/streaming/package.scala
index 4dd985cf5a178..2153ae0d34184 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/package.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/package.scala
@@ -26,7 +26,7 @@ package org.apache.spark
  * available only on DStreams
  * of key-value pairs, such as `groupByKey` and `reduceByKey`. These operations are automatically
  * available on any DStream of the right type (e.g. DStream[(Int, Int)] through implicit
- * conversions when you `import org.apache.spark.streaming.StreamingContext._`.
+ * conversions.
  *
  * For the Java API of Spark Streaming, take a look at the
  * [[org.apache.spark.streaming.api.java.JavaStreamingContext]] which serves as the entry point, and
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
new file mode 100644
index 0000000000000..dd1e96334952f
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.rdd
+
+import scala.reflect.ClassTag
+
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark._
+import org.apache.spark.rdd.BlockRDD
+import org.apache.spark.storage.{BlockId, StorageLevel}
+import org.apache.spark.streaming.util.{HdfsUtils, WriteAheadLogFileSegment, WriteAheadLogRandomReader}
+
+/**
+ * Partition class for [[org.apache.spark.streaming.rdd.WriteAheadLogBackedBlockRDD]].
+ * It contains information about the id of the blocks having this partition's data and
+ * the segment of the write ahead log that backs the partition.
+ * @param index index of the partition
+ * @param blockId id of the block having the partition data
+ * @param segment segment of the write ahead log having the partition data
+ */
+private[streaming]
+class WriteAheadLogBackedBlockRDDPartition(
+    val index: Int,
+    val blockId: BlockId,
+    val segment: WriteAheadLogFileSegment)
+  extends Partition
+
+
+/**
+ * This class represents a special case of the BlockRDD where the data blocks in
+ * the block manager are also backed by segments in write ahead logs. For reading
+ * the data, this RDD first looks up the blocks by their ids in the block manager.
+ * If it does not find them, it looks up the corresponding file segment.
+ *
+ * @param sc SparkContext
+ * @param blockIds Ids of the blocks that contains this RDD's data
+ * @param segments Segments in write ahead logs that contain this RDD's data
+ * @param storeInBlockManager Whether to store in the block manager after reading from the segment
+ * @param storageLevel storage level to store when storing in block manager
+ *                     (applicable when storeInBlockManager = true)
+ */
+private[streaming]
+class WriteAheadLogBackedBlockRDD[T: ClassTag](
+    @transient sc: SparkContext,
+    @transient blockIds: Array[BlockId],
+    @transient segments: Array[WriteAheadLogFileSegment],
+    storeInBlockManager: Boolean,
+    storageLevel: StorageLevel)
+  extends BlockRDD[T](sc, blockIds) {
+
+  require(
+    blockIds.length == segments.length,
+    s"Number of block ids (${blockIds.length}) must be " +
+      s"the same as number of segments (${segments.length}})!")
+
+  // Hadoop configuration is not serializable, so broadcast it as a serializable.
+  @transient private val hadoopConfig = sc.hadoopConfiguration
+  private val broadcastedHadoopConf = new SerializableWritable(hadoopConfig)
+
+  override def getPartitions: Array[Partition] = {
+    assertValid()
+    Array.tabulate(blockIds.size) { i =>
+      new WriteAheadLogBackedBlockRDDPartition(i, blockIds(i), segments(i))
+    }
+  }
+
+  /**
+   * Gets the partition data by getting the corresponding block from the block manager.
+   * If the block does not exist, then the data is read from the corresponding segment
+   * in write ahead log files.
+   */
+  override def compute(split: Partition, context: TaskContext): Iterator[T] = {
+    assertValid()
+    val hadoopConf = broadcastedHadoopConf.value
+    val blockManager = SparkEnv.get.blockManager
+    val partition = split.asInstanceOf[WriteAheadLogBackedBlockRDDPartition]
+    val blockId = partition.blockId
+    blockManager.get(blockId) match {
+      case Some(block) => // Data is in Block Manager
+        val iterator = block.data.asInstanceOf[Iterator[T]]
+        logDebug(s"Read partition data of $this from block manager, block $blockId")
+        iterator
+      case None => // Data not found in Block Manager, grab it from write ahead log file
+        val reader = new WriteAheadLogRandomReader(partition.segment.path, hadoopConf)
+        val dataRead = reader.read(partition.segment)
+        reader.close()
+        logInfo(s"Read partition data of $this from write ahead log, segment ${partition.segment}")
+        if (storeInBlockManager) {
+          blockManager.putBytes(blockId, dataRead, storageLevel)
+          logDebug(s"Stored partition data of $this into block manager with level $storageLevel")
+          dataRead.rewind()
+        }
+        blockManager.dataDeserialize(blockId, dataRead).asInstanceOf[Iterator[T]]
+    }
+  }
+
+  /**
+   * Get the preferred location of the partition. This returns the locations of the block
+   * if it is present in the block manager, else it returns the location of the
+   * corresponding segment in HDFS.
+   */
+  override def getPreferredLocations(split: Partition): Seq[String] = {
+    val partition = split.asInstanceOf[WriteAheadLogBackedBlockRDDPartition]
+    val blockLocations = getBlockIdLocations().get(partition.blockId)
+    def segmentLocations = HdfsUtils.getFileSegmentLocations(
+      partition.segment.path, partition.segment.offset, partition.segment.length, hadoopConfig)
+    blockLocations.getOrElse(segmentLocations)
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ActorReceiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ActorReceiver.scala
index 1868a1ebc7b4a..a7d63bd4f2dbf 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ActorReceiver.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ActorReceiver.scala
@@ -123,7 +123,7 @@ private[streaming] case class ByteBufferData(bytes: ByteBuffer) extends ActorRec
  * As Actors can also be used to receive data from almost any stream source.
  * A nice set of abstraction(s) for actors as receivers is already provided for
  * a few general cases. It is thus exposed as an API where user may come with
- * his own Actor to run as receiver for Spark Streaming input source.
+ * their own Actor to run as receiver for Spark Streaming input source.
  *
  * This starts a supervisor actor which starts workers and also provides
  * [http://doc.akka.io/docs/akka/snapshot/scala/fault-tolerance.html fault-tolerance].
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
index 0316b6862f195..79263a7183977 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
@@ -27,9 +27,38 @@ import org.apache.spark.streaming.util.{RecurringTimer, SystemClock}
 
 /** Listener object for BlockGenerator events */
 private[streaming] trait BlockGeneratorListener {
-  /** Called when a new block needs to be pushed */
+  /**
+   * Called after a data item is added into the BlockGenerator. The data addition and this
+   * callback are synchronized with the block generation and its associated callback,
+   * so block generation waits for the active data addition+callback to complete. This is useful
+   * for updating metadata on successful buffering of a data item, specifically that metadata
+   * that will be useful when a block is generated. Any long blocking operation in this callback
+   * will hurt the throughput.
+   */
+  def onAddData(data: Any, metadata: Any)
+
+  /**
+   * Called when a new block of data is generated by the block generator. The block generation
+   * and this callback are synchronized with the data addition and its associated callback, so
+   * the data addition waits for the block generation+callback to complete. This is useful
+   * for updating metadata when a block has been generated, specifically metadata that will
+   * be useful when the block has been successfully stored. Any long blocking operation in this
+   * callback will hurt the throughput.
+   */
+  def onGenerateBlock(blockId: StreamBlockId)
+
+  /**
+   * Called when a new block is ready to be pushed. Callers are supposed to store the block into
+   * Spark in this method. Internally this is called from a single
+   * thread, that is not synchronized with any other callbacks. Hence it is okay to do long
+   * blocking operation in this callback.
+   */
   def onPushBlock(blockId: StreamBlockId, arrayBuffer: ArrayBuffer[_])
-  /** Called when an error has occurred in BlockGenerator */
+
+  /**
+   * Called when an error has occurred in the BlockGenerator. Can be called form many places
+   * so better to not do any long block operation in this callback.
+   */
   def onError(message: String, throwable: Throwable)
 }
 
@@ -80,9 +109,20 @@ private[streaming] class BlockGenerator(
    * Push a single data item into the buffer. All received data items
    * will be periodically pushed into BlockManager.
    */
-  def += (data: Any): Unit = synchronized {
+  def addData (data: Any): Unit = synchronized {
+    waitToPush()
+    currentBuffer += data
+  }
+
+  /**
+   * Push a single data item into the buffer. After buffering the data, the
+   * `BlockGeneratorListener.onAddData` callback will be called. All received data items
+   * will be periodically pushed into BlockManager.
+   */
+  def addDataWithCallback(data: Any, metadata: Any) = synchronized {
     waitToPush()
     currentBuffer += data
+    listener.onAddData(data, metadata)
   }
 
   /** Change the buffer to which single records are added to. */
@@ -93,14 +133,15 @@ private[streaming] class BlockGenerator(
       if (newBlockBuffer.size > 0) {
         val blockId = StreamBlockId(receiverId, time - blockInterval)
         val newBlock = new Block(blockId, newBlockBuffer)
+        listener.onGenerateBlock(blockId)
         blocksForPushing.put(newBlock)  // put is blocking when queue is full
         logDebug("Last element in " + blockId + " is " + newBlockBuffer.last)
       }
     } catch {
       case ie: InterruptedException =>
         logInfo("Block updating timer thread was interrupted")
-      case t: Throwable =>
-        reportError("Error in block updating thread", t)
+      case e: Exception =>
+        reportError("Error in block updating thread", e)
     }
   }
 
@@ -126,8 +167,8 @@ private[streaming] class BlockGenerator(
     } catch {
       case ie: InterruptedException =>
         logInfo("Block pushing thread was interrupted")
-      case t: Throwable =>
-        reportError("Error in block pushing thread", t)
+      case e: Exception =>
+        reportError("Error in block pushing thread", e)
     }
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala
new file mode 100644
index 0000000000000..47968afef2dbf
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.receiver
+
+import java.nio.ByteBuffer
+
+import scala.collection.mutable.ArrayBuffer
+import scala.language.existentials
+
+/** Trait representing a received block */
+private[streaming] sealed trait ReceivedBlock
+
+/** class representing a block received as an ArrayBuffer */
+private[streaming] case class ArrayBufferBlock(arrayBuffer: ArrayBuffer[_]) extends ReceivedBlock
+
+/** class representing a block received as an Iterator */
+private[streaming] case class IteratorBlock(iterator: Iterator[_]) extends ReceivedBlock
+
+/** class representing a block received as an ByteBuffer */
+private[streaming] case class ByteBufferBlock(byteBuffer: ByteBuffer) extends ReceivedBlock
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
new file mode 100644
index 0000000000000..f7a8ebee8a544
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.receiver
+
+import scala.concurrent.{Await, ExecutionContext, Future}
+import scala.concurrent.duration._
+import scala.language.{existentials, postfixOps}
+
+import WriteAheadLogBasedBlockHandler._
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.{Logging, SparkConf, SparkException}
+import org.apache.spark.storage._
+import org.apache.spark.streaming.util.{Clock, SystemClock, WriteAheadLogFileSegment, WriteAheadLogManager}
+import org.apache.spark.util.Utils
+
+/** Trait that represents the metadata related to storage of blocks */
+private[streaming] trait ReceivedBlockStoreResult {
+  def blockId: StreamBlockId  // Any implementation of this trait will store a block id
+}
+
+/** Trait that represents a class that handles the storage of blocks received by receiver */
+private[streaming] trait ReceivedBlockHandler {
+
+  /** Store a received block with the given block id and return related metadata */
+  def storeBlock(blockId: StreamBlockId, receivedBlock: ReceivedBlock): ReceivedBlockStoreResult
+
+  /** Cleanup old blocks older than the given threshold time */
+  def cleanupOldBlocks(threshTime: Long)
+}
+
+
+/**
+ * Implementation of [[org.apache.spark.streaming.receiver.ReceivedBlockStoreResult]]
+ * that stores the metadata related to storage of blocks using
+ * [[org.apache.spark.streaming.receiver.BlockManagerBasedBlockHandler]]
+ */
+private[streaming] case class BlockManagerBasedStoreResult(blockId: StreamBlockId)
+  extends ReceivedBlockStoreResult
+
+
+/**
+ * Implementation of a [[org.apache.spark.streaming.receiver.ReceivedBlockHandler]] which
+ * stores the received blocks into a block manager with the specified storage level.
+ */
+private[streaming] class BlockManagerBasedBlockHandler(
+    blockManager: BlockManager, storageLevel: StorageLevel)
+  extends ReceivedBlockHandler with Logging {
+  
+  def storeBlock(blockId: StreamBlockId, block: ReceivedBlock): ReceivedBlockStoreResult = {
+    val putResult: Seq[(BlockId, BlockStatus)] = block match {
+      case ArrayBufferBlock(arrayBuffer) =>
+        blockManager.putIterator(blockId, arrayBuffer.iterator, storageLevel, tellMaster = true)
+      case IteratorBlock(iterator) =>
+        blockManager.putIterator(blockId, iterator, storageLevel, tellMaster = true)
+      case ByteBufferBlock(byteBuffer) =>
+        blockManager.putBytes(blockId, byteBuffer, storageLevel, tellMaster = true)
+      case o =>
+        throw new SparkException(
+          s"Could not store $blockId to block manager, unexpected block type ${o.getClass.getName}")
+    }
+    if (!putResult.map { _._1 }.contains(blockId)) {
+      throw new SparkException(
+        s"Could not store $blockId to block manager with storage level $storageLevel")
+    }
+    BlockManagerBasedStoreResult(blockId)
+  }
+
+  def cleanupOldBlocks(threshTime: Long) {
+    // this is not used as blocks inserted into the BlockManager are cleared by DStream's clearing
+    // of BlockRDDs.
+  }
+}
+
+
+/**
+ * Implementation of [[org.apache.spark.streaming.receiver.ReceivedBlockStoreResult]]
+ * that stores the metadata related to storage of blocks using
+ * [[org.apache.spark.streaming.receiver.WriteAheadLogBasedBlockHandler]]
+ */
+private[streaming] case class WriteAheadLogBasedStoreResult(
+    blockId: StreamBlockId,
+    segment: WriteAheadLogFileSegment
+  ) extends ReceivedBlockStoreResult
+
+
+/**
+ * Implementation of a [[org.apache.spark.streaming.receiver.ReceivedBlockHandler]] which
+ * stores the received blocks in both, a write ahead log and a block manager.
+ */
+private[streaming] class WriteAheadLogBasedBlockHandler(
+    blockManager: BlockManager,
+    streamId: Int,
+    storageLevel: StorageLevel,
+    conf: SparkConf,
+    hadoopConf: Configuration,
+    checkpointDir: String,
+    clock: Clock = new SystemClock
+  ) extends ReceivedBlockHandler with Logging {
+
+  private val blockStoreTimeout = conf.getInt(
+    "spark.streaming.receiver.blockStoreTimeout", 30).seconds
+  private val rollingInterval = conf.getInt(
+    "spark.streaming.receiver.writeAheadLog.rollingInterval", 60)
+  private val maxFailures = conf.getInt(
+    "spark.streaming.receiver.writeAheadLog.maxFailures", 3)
+
+  private val effectiveStorageLevel = {
+    if (storageLevel.deserialized) {
+      logWarning(s"Storage level serialization ${storageLevel.deserialized} is not supported when" +
+        s" write ahead log is enabled, change to serialization false")
+    }
+    if (storageLevel.replication > 1) {
+      logWarning(s"Storage level replication ${storageLevel.replication} is unnecessary when " +
+        s"write ahead log is enabled, change to replication 1")
+    }
+
+    StorageLevel(storageLevel.useDisk, storageLevel.useMemory, storageLevel.useOffHeap, false, 1)
+  }
+
+  if (storageLevel != effectiveStorageLevel) {
+    logWarning(s"User defined storage level $storageLevel is changed to effective storage level " +
+      s"$effectiveStorageLevel when write ahead log is enabled")
+  }
+
+  // Manages rolling log files
+  private val logManager = new WriteAheadLogManager(
+    checkpointDirToLogDir(checkpointDir, streamId),
+    hadoopConf, rollingInterval, maxFailures,
+    callerName = this.getClass.getSimpleName,
+    clock = clock
+  )
+
+  // For processing futures used in parallel block storing into block manager and write ahead log
+  // # threads = 2, so that both writing to BM and WAL can proceed in parallel
+  implicit private val executionContext = ExecutionContext.fromExecutorService(
+    Utils.newDaemonFixedThreadPool(2, this.getClass.getSimpleName))
+
+  /**
+   * This implementation stores the block into the block manager as well as a write ahead log.
+   * It does this in parallel, using Scala Futures, and returns only after the block has
+   * been stored in both places.
+   */
+  def storeBlock(blockId: StreamBlockId, block: ReceivedBlock): ReceivedBlockStoreResult = {
+
+    // Serialize the block so that it can be inserted into both
+    val serializedBlock = block match {
+      case ArrayBufferBlock(arrayBuffer) =>
+        blockManager.dataSerialize(blockId, arrayBuffer.iterator)
+      case IteratorBlock(iterator) =>
+        blockManager.dataSerialize(blockId, iterator)
+      case ByteBufferBlock(byteBuffer) =>
+        byteBuffer
+      case _ =>
+        throw new Exception(s"Could not push $blockId to block manager, unexpected block type")
+    }
+
+    // Store the block in block manager
+    val storeInBlockManagerFuture = Future {
+      val putResult =
+        blockManager.putBytes(blockId, serializedBlock, effectiveStorageLevel, tellMaster = true)
+      if (!putResult.map { _._1 }.contains(blockId)) {
+        throw new SparkException(
+          s"Could not store $blockId to block manager with storage level $storageLevel")
+      }
+    }
+
+    // Store the block in write ahead log
+    val storeInWriteAheadLogFuture = Future {
+      logManager.writeToLog(serializedBlock)
+    }
+
+    // Combine the futures, wait for both to complete, and return the write ahead log segment
+    val combinedFuture = storeInBlockManagerFuture.zip(storeInWriteAheadLogFuture).map(_._2)
+    val segment = Await.result(combinedFuture, blockStoreTimeout)
+    WriteAheadLogBasedStoreResult(blockId, segment)
+  }
+
+  def cleanupOldBlocks(threshTime: Long) {
+    logManager.cleanupOldLogs(threshTime, waitForCompletion = false)
+  }
+
+  def stop() {
+    logManager.stop()
+  }
+}
+
+private[streaming] object WriteAheadLogBasedBlockHandler {
+  def checkpointDirToLogDir(checkpointDir: String, streamId: Int): String = {
+    new Path(checkpointDir, new Path("receivedData", streamId.toString)).toString
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverMessage.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverMessage.scala
index bf39d1e891cae..7bf3c33319491 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverMessage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverMessage.scala
@@ -17,7 +17,10 @@
 
 package org.apache.spark.streaming.receiver
 
-/** Messages sent to the NetworkReceiver. */
-private[streaming] sealed trait ReceiverMessage
+import org.apache.spark.streaming.Time
+
+/** Messages sent to the Receiver. */
+private[streaming] sealed trait ReceiverMessage extends Serializable
 private[streaming] object StopReceiver extends ReceiverMessage
+private[streaming] case class CleanupOldBlocks(threshTime: Time) extends ReceiverMessage
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
index 53a3e6200e340..7d29ed88cfcb4 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
@@ -25,16 +25,14 @@ import scala.concurrent.Await
 
 import akka.actor.{Actor, Props}
 import akka.pattern.ask
-
 import com.google.common.base.Throwables
+import org.apache.hadoop.conf.Configuration
 
-import org.apache.spark.{Logging, SparkEnv}
-import org.apache.spark.streaming.scheduler._
-import org.apache.spark.util.{Utils, AkkaUtils}
+import org.apache.spark.{Logging, SparkEnv, SparkException}
 import org.apache.spark.storage.StreamBlockId
-import org.apache.spark.streaming.scheduler.DeregisterReceiver
-import org.apache.spark.streaming.scheduler.AddBlock
-import org.apache.spark.streaming.scheduler.RegisterReceiver
+import org.apache.spark.streaming.Time
+import org.apache.spark.streaming.scheduler._
+import org.apache.spark.util.{AkkaUtils, Utils}
 
 /**
  * Concrete implementation of [[org.apache.spark.streaming.receiver.ReceiverSupervisor]]
@@ -44,19 +42,37 @@ import org.apache.spark.streaming.scheduler.RegisterReceiver
  */
 private[streaming] class ReceiverSupervisorImpl(
     receiver: Receiver[_],
-    env: SparkEnv
+    env: SparkEnv,
+    hadoopConf: Configuration,
+    checkpointDirOption: Option[String]
   ) extends ReceiverSupervisor(receiver, env.conf) with Logging {
 
-  private val blockManager = env.blockManager
+  private val receivedBlockHandler: ReceivedBlockHandler = {
+    if (env.conf.getBoolean("spark.streaming.receiver.writeAheadLog.enable", false)) {
+      if (checkpointDirOption.isEmpty) {
+        throw new SparkException(
+          "Cannot enable receiver write-ahead log without checkpoint directory set. " +
+            "Please use streamingContext.checkpoint() to set the checkpoint directory. " +
+            "See documentation for more details.")
+      }
+      new WriteAheadLogBasedBlockHandler(env.blockManager, receiver.streamId,
+        receiver.storageLevel, env.conf, hadoopConf, checkpointDirOption.get)
+    } else {
+      new BlockManagerBasedBlockHandler(env.blockManager, receiver.storageLevel)
+    }
+  }
 
-  private val storageLevel = receiver.storageLevel
 
   /** Remote Akka actor for the ReceiverTracker */
   private val trackerActor = {
     val ip = env.conf.get("spark.driver.host", "localhost")
     val port = env.conf.getInt("spark.driver.port", 7077)
-    val url = "akka.tcp://%s@%s:%s/user/ReceiverTracker".format(
-      SparkEnv.driverActorSystemName, ip, port)
+    val url = AkkaUtils.address(
+      AkkaUtils.protocol(env.actorSystem),
+      SparkEnv.driverActorSystemName,
+      ip,
+      port,
+      "ReceiverTracker")
     env.actorSystem.actorSelection(url)
   }
 
@@ -66,18 +82,14 @@ private[streaming] class ReceiverSupervisorImpl(
   /** Akka actor for receiving messages from the ReceiverTracker in the driver */
   private val actor = env.actorSystem.actorOf(
     Props(new Actor {
-      override def preStart() {
-        logInfo("Registered receiver " + streamId)
-        val msg = RegisterReceiver(
-          streamId, receiver.getClass.getSimpleName, Utils.localHostName(), self)
-        val future = trackerActor.ask(msg)(askTimeout)
-        Await.result(future, askTimeout)
-      }
 
       override def receive() = {
         case StopReceiver =>
           logInfo("Received stop signal")
           stop("Stopped by driver", None)
+        case CleanupOldBlocks(threshTime) =>
+          logDebug("Received delete old batch signal")
+          cleanupOldBlocks(threshTime)
       }
 
       def ref = self
@@ -88,6 +100,10 @@ private[streaming] class ReceiverSupervisorImpl(
 
   /** Divides received data records into data blocks for pushing in BlockManager. */
   private val blockGenerator = new BlockGenerator(new BlockGeneratorListener {
+    def onAddData(data: Any, metadata: Any): Unit = { }
+
+    def onGenerateBlock(blockId: StreamBlockId): Unit = { }
+
     def onError(message: String, throwable: Throwable) {
       reportError(message, throwable)
     }
@@ -99,53 +115,56 @@ private[streaming] class ReceiverSupervisorImpl(
 
   /** Push a single record of received data into block generator. */
   def pushSingle(data: Any) {
-    blockGenerator += (data)
+    blockGenerator.addData(data)
   }
 
   /** Store an ArrayBuffer of received data as a data block into Spark's memory. */
   def pushArrayBuffer(
       arrayBuffer: ArrayBuffer[_],
-      optionalMetadata: Option[Any],
-      optionalBlockId: Option[StreamBlockId]
+      metadataOption: Option[Any],
+      blockIdOption: Option[StreamBlockId]
     ) {
-    val blockId = optionalBlockId.getOrElse(nextBlockId)
-    val time = System.currentTimeMillis
-    blockManager.putArray(blockId, arrayBuffer.toArray[Any], storageLevel, tellMaster = true)
-    logDebug("Pushed block " + blockId + " in " + (System.currentTimeMillis - time)  + " ms")
-    reportPushedBlock(blockId, arrayBuffer.size, optionalMetadata)
+    pushAndReportBlock(ArrayBufferBlock(arrayBuffer), metadataOption, blockIdOption)
   }
 
   /** Store a iterator of received data as a data block into Spark's memory. */
   def pushIterator(
       iterator: Iterator[_],
-      optionalMetadata: Option[Any],
-      optionalBlockId: Option[StreamBlockId]
+      metadataOption: Option[Any],
+      blockIdOption: Option[StreamBlockId]
     ) {
-    val blockId = optionalBlockId.getOrElse(nextBlockId)
-    val time = System.currentTimeMillis
-    blockManager.putIterator(blockId, iterator, storageLevel, tellMaster = true)
-    logDebug("Pushed block " + blockId + " in " + (System.currentTimeMillis - time)  + " ms")
-    reportPushedBlock(blockId, -1, optionalMetadata)
+    pushAndReportBlock(IteratorBlock(iterator), metadataOption, blockIdOption)
   }
 
   /** Store the bytes of received data as a data block into Spark's memory. */
   def pushBytes(
       bytes: ByteBuffer,
-      optionalMetadata: Option[Any],
-      optionalBlockId: Option[StreamBlockId]
+      metadataOption: Option[Any],
+      blockIdOption: Option[StreamBlockId]
     ) {
-    val blockId = optionalBlockId.getOrElse(nextBlockId)
-    val time = System.currentTimeMillis
-    blockManager.putBytes(blockId, bytes, storageLevel, tellMaster = true)
-    logDebug("Pushed block " + blockId + " in " + (System.currentTimeMillis - time)  + " ms")
-    reportPushedBlock(blockId, -1, optionalMetadata)
+    pushAndReportBlock(ByteBufferBlock(bytes), metadataOption, blockIdOption)
   }
 
-  /** Report pushed block */
-  def reportPushedBlock(blockId: StreamBlockId, numRecords: Long, optionalMetadata: Option[Any]) {
-    val blockInfo = ReceivedBlockInfo(streamId, blockId, numRecords, optionalMetadata.orNull)
-    trackerActor ! AddBlock(blockInfo)
-    logDebug("Reported block " + blockId)
+  /** Store block and report it to driver */
+  def pushAndReportBlock(
+      receivedBlock: ReceivedBlock,
+      metadataOption: Option[Any],
+      blockIdOption: Option[StreamBlockId]
+    ) {
+    val blockId = blockIdOption.getOrElse(nextBlockId)
+    val numRecords = receivedBlock match {
+      case ArrayBufferBlock(arrayBuffer) => arrayBuffer.size
+      case _ => -1
+    }
+
+    val time = System.currentTimeMillis
+    val blockStoreResult = receivedBlockHandler.storeBlock(blockId, receivedBlock)
+    logDebug(s"Pushed block $blockId in ${(System.currentTimeMillis - time)} ms")
+
+    val blockInfo = ReceivedBlockInfo(streamId, numRecords, blockStoreResult)
+    val future = trackerActor.ask(AddBlock(blockInfo))(askTimeout)
+    Await.result(future, askTimeout)
+    logDebug(s"Reported block $blockId")
   }
 
   /** Report error to the receiver tracker */
@@ -182,4 +201,9 @@ private[streaming] class ReceiverSupervisorImpl(
 
   /** Generate new block ID */
   private def nextBlockId = StreamBlockId(streamId, newBlockId.getAndIncrement)
+
+  private def cleanupOldBlocks(cleanupThreshTime: Time): Unit = {
+    logDebug(s"Cleaning up blocks older then $cleanupThreshTime")
+    receivedBlockHandler.cleanupOldBlocks(cleanupThreshTime.milliseconds)
+  }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala
index a68aecb881117..92dc113f397ca 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.streaming.scheduler
 
-import org.apache.spark.streaming.Time
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.streaming.Time
 
 /**
  * :: DeveloperApi ::
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
index 7d73ada12d107..8632c94349bf9 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
@@ -17,12 +17,14 @@
 
 package org.apache.spark.streaming.scheduler
 
-import akka.actor.{ActorRef, ActorSystem, Props, Actor}
-import org.apache.spark.{SparkException, SparkEnv, Logging}
-import org.apache.spark.streaming.{Checkpoint, Time, CheckpointWriter}
-import org.apache.spark.streaming.util.{ManualClock, RecurringTimer, Clock}
 import scala.util.{Failure, Success, Try}
 
+import akka.actor.{ActorRef, Props, Actor}
+
+import org.apache.spark.{SparkEnv, Logging}
+import org.apache.spark.streaming.{Checkpoint, CheckpointWriter, Time}
+import org.apache.spark.streaming.util.{Clock, ManualClock, RecurringTimer}
+
 /** Event classes for JobGenerator */
 private[scheduler] sealed trait JobGeneratorEvent
 private[scheduler] case class GenerateJobs(time: Time) extends JobGeneratorEvent
@@ -112,7 +114,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
       // Wait until all the received blocks in the network input tracker has
       // been consumed by network input DStreams, and jobs have been generated with them
       logInfo("Waiting for all received blocks to be consumed for job generation")
-      while(!hasTimedOut && jobScheduler.receiverTracker.hasMoreReceivedBlockIds) {
+      while(!hasTimedOut && jobScheduler.receiverTracker.hasUnallocatedBlocks) {
         Thread.sleep(pollTime)
       }
       logInfo("Waited for all received blocks to be consumed for job generation")
@@ -206,9 +208,13 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
     val timesToReschedule = (pendingTimes ++ downTimes).distinct.sorted(Time.ordering)
     logInfo("Batches to reschedule (" + timesToReschedule.size + " batches): " +
       timesToReschedule.mkString(", "))
-    timesToReschedule.foreach(time =>
+    timesToReschedule.foreach { time =>
+      // Allocate the related blocks when recovering from failure, because some blocks that were
+      // added but not allocated, are dangling in the queue after recovering, we have to allocate
+      // those blocks to the next batch, which is the batch they were supposed to go.
+      jobScheduler.receiverTracker.allocateBlocksToBatch(time) // allocate received blocks to batch
       jobScheduler.submitJobSet(JobSet(time, graph.generateJobs(time)))
-    )
+    }
 
     // Restart the timer
     timer.start(restartTime.milliseconds)
@@ -217,14 +223,18 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
 
   /** Generate jobs and perform checkpoint for the given `time`.  */
   private def generateJobs(time: Time) {
-    Try(graph.generateJobs(time)) match {
+    // Set the SparkEnv in this thread, so that job generation code can access the environment
+    // Example: BlockRDDs are created in this thread, and it needs to access BlockManager
+    // Update: This is probably redundant after threadlocal stuff in SparkEnv has been removed.
+    SparkEnv.set(ssc.env)
+    Try {
+      jobScheduler.receiverTracker.allocateBlocksToBatch(time) // allocate received blocks to batch
+      graph.generateJobs(time) // generate jobs using allocated block
+    } match {
       case Success(jobs) =>
-        val receivedBlockInfo = graph.getReceiverInputStreams.map { stream =>
-          val streamId = stream.id
-          val receivedBlockInfo = stream.getReceivedBlockInfo(time)
-          (streamId, receivedBlockInfo)
-        }.toMap
-        jobScheduler.submitJobSet(JobSet(time, jobs, receivedBlockInfo))
+        val receivedBlockInfos =
+          jobScheduler.receiverTracker.getBlocksOfBatch(time).mapValues { _.toArray }
+        jobScheduler.submitJobSet(JobSet(time, jobs, receivedBlockInfos))
       case Failure(e) =>
         jobScheduler.reportError("Error generating jobs for time " + time, e)
     }
@@ -240,6 +250,11 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
     if (shouldCheckpoint) {
       eventActor ! DoCheckpoint(time)
     } else {
+      // If checkpointing is not enabled, then delete metadata information about
+      // received blocks (block data not saved in any case). Otherwise, wait for
+      // checkpointing of this batch to complete.
+      val maxRememberDuration = graph.getMaxInputStreamRememberDuration()
+      jobScheduler.receiverTracker.cleanupOldBlocksAndBatches(time - maxRememberDuration)
       markBatchFullyProcessed(time)
     }
   }
@@ -247,6 +262,11 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   /** Clear DStream checkpoint data for the given `time`. */
   private def clearCheckpointData(time: Time) {
     ssc.graph.clearCheckpointData(time)
+
+    // All the checkpoint information about which batches have been processed, etc have
+    // been saved to checkpoints, so its safe to delete block metadata and data WAL files
+    val maxRememberDuration = graph.getMaxInputStreamRememberDuration()
+    jobScheduler.receiverTracker.cleanupOldBlocksAndBatches(time - maxRememberDuration)
     markBatchFullyProcessed(time)
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
index cfa3cd8925c80..b3ffc71904c76 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
@@ -22,6 +22,7 @@ import scala.collection.JavaConversions._
 import java.util.concurrent.{TimeUnit, ConcurrentHashMap, Executors}
 import akka.actor.{ActorRef, Actor, Props}
 import org.apache.spark.{SparkException, Logging, SparkEnv}
+import org.apache.spark.rdd.PairRDDFunctions
 import org.apache.spark.streaming._
 
 
@@ -72,7 +73,7 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
     logDebug("Stopping JobScheduler")
 
     // First, stop receiving
-    receiverTracker.stop()
+    receiverTracker.stop(processAllReceivedData)
 
     // Second, stop generating jobs. If it has to process all received data,
     // then this will wait for all the processing through JobScheduler to be over.
@@ -168,7 +169,12 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
   private class JobHandler(job: Job) extends Runnable {
     def run() {
       eventActor ! JobStarted(job)
-      job.run()
+      // Disable checks for existing output directories in jobs launched by the streaming scheduler,
+      // since we may need to write output to an existing directory during checkpoint recovery;
+      // see SPARK-4835 for more details.
+      PairRDDFunctions.disableOutputSpecValidation.withValue(true) {
+        job.run()
+      }
       eventActor ! JobCompleted(job)
     }
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala
index a69d74362173e..8c15a75b1b0e0 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.streaming.scheduler
 
-import scala.collection.mutable.{ArrayBuffer, HashSet}
+import scala.collection.mutable.HashSet
+
 import org.apache.spark.streaming.Time
 
 /** Class representing a set of Jobs
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockInfo.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockInfo.scala
new file mode 100644
index 0000000000000..94beb590f52d6
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockInfo.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.scheduler
+
+import org.apache.spark.streaming.receiver.ReceivedBlockStoreResult
+
+/** Information about blocks received by the receiver */
+private[streaming] case class ReceivedBlockInfo(
+    streamId: Int,
+    numRecords: Long,
+    blockStoreResult: ReceivedBlockStoreResult
+  )
+
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
new file mode 100644
index 0000000000000..e19ac939f9ac5
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
@@ -0,0 +1,253 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.scheduler
+
+import java.nio.ByteBuffer
+
+import scala.collection.mutable
+import scala.language.implicitConversions
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.{SparkException, Logging, SparkConf}
+import org.apache.spark.streaming.Time
+import org.apache.spark.streaming.util.{Clock, WriteAheadLogManager}
+import org.apache.spark.util.Utils
+
+/** Trait representing any event in the ReceivedBlockTracker that updates its state. */
+private[streaming] sealed trait ReceivedBlockTrackerLogEvent
+
+private[streaming] case class BlockAdditionEvent(receivedBlockInfo: ReceivedBlockInfo)
+  extends ReceivedBlockTrackerLogEvent
+private[streaming] case class BatchAllocationEvent(time: Time, allocatedBlocks: AllocatedBlocks)
+  extends ReceivedBlockTrackerLogEvent
+private[streaming] case class BatchCleanupEvent(times: Seq[Time])
+  extends ReceivedBlockTrackerLogEvent
+
+
+/** Class representing the blocks of all the streams allocated to a batch */
+private[streaming]
+case class AllocatedBlocks(streamIdToAllocatedBlocks: Map[Int, Seq[ReceivedBlockInfo]]) {
+  def getBlocksOfStream(streamId: Int): Seq[ReceivedBlockInfo] = {
+    streamIdToAllocatedBlocks.get(streamId).getOrElse(Seq.empty)
+  }
+}
+
+/**
+ * Class that keep track of all the received blocks, and allocate them to batches
+ * when required. All actions taken by this class can be saved to a write ahead log
+ * (if a checkpoint directory has been provided), so that the state of the tracker
+ * (received blocks and block-to-batch allocations) can be recovered after driver failure.
+ *
+ * Note that when any instance of this class is created with a checkpoint directory,
+ * it will try reading events from logs in the directory.
+ */
+private[streaming] class ReceivedBlockTracker(
+    conf: SparkConf,
+    hadoopConf: Configuration,
+    streamIds: Seq[Int],
+    clock: Clock,
+    checkpointDirOption: Option[String])
+  extends Logging {
+
+  private type ReceivedBlockQueue = mutable.Queue[ReceivedBlockInfo]
+
+  private val streamIdToUnallocatedBlockQueues = new mutable.HashMap[Int, ReceivedBlockQueue]
+  private val timeToAllocatedBlocks = new mutable.HashMap[Time, AllocatedBlocks]
+  private val logManagerOption = createLogManager()
+
+  private var lastAllocatedBatchTime: Time = null
+
+  // Recover block information from write ahead logs
+  recoverFromWriteAheadLogs()
+
+  /** Add received block. This event will get written to the write ahead log (if enabled). */
+  def addBlock(receivedBlockInfo: ReceivedBlockInfo): Boolean = synchronized {
+    try {
+      writeToLog(BlockAdditionEvent(receivedBlockInfo))
+      getReceivedBlockQueue(receivedBlockInfo.streamId) += receivedBlockInfo
+      logDebug(s"Stream ${receivedBlockInfo.streamId} received " +
+        s"block ${receivedBlockInfo.blockStoreResult.blockId}")
+      true
+    } catch {
+      case e: Exception =>
+        logError(s"Error adding block $receivedBlockInfo", e)
+        false
+    }
+  }
+
+  /**
+   * Allocate all unallocated blocks to the given batch.
+   * This event will get written to the write ahead log (if enabled).
+   */
+  def allocateBlocksToBatch(batchTime: Time): Unit = synchronized {
+    if (lastAllocatedBatchTime == null || batchTime > lastAllocatedBatchTime) {
+      val streamIdToBlocks = streamIds.map { streamId =>
+          (streamId, getReceivedBlockQueue(streamId).dequeueAll(x => true))
+      }.toMap
+      val allocatedBlocks = AllocatedBlocks(streamIdToBlocks)
+      writeToLog(BatchAllocationEvent(batchTime, allocatedBlocks))
+      timeToAllocatedBlocks(batchTime) = allocatedBlocks
+      lastAllocatedBatchTime = batchTime
+      allocatedBlocks
+    } else {
+      // This situation occurs when:
+      // 1. WAL is ended with BatchAllocationEvent, but without BatchCleanupEvent,
+      // possibly processed batch job or half-processed batch job need to be processed again,
+      // so the batchTime will be equal to lastAllocatedBatchTime.
+      // 2. Slow checkpointing makes recovered batch time older than WAL recovered
+      // lastAllocatedBatchTime.
+      // This situation will only occurs in recovery time.
+      logInfo(s"Possibly processed batch $batchTime need to be processed again in WAL recovery")
+    }
+  }
+
+  /** Get the blocks allocated to the given batch. */
+  def getBlocksOfBatch(batchTime: Time): Map[Int, Seq[ReceivedBlockInfo]] = synchronized {
+    timeToAllocatedBlocks.get(batchTime).map { _.streamIdToAllocatedBlocks }.getOrElse(Map.empty)
+  }
+
+  /** Get the blocks allocated to the given batch and stream. */
+  def getBlocksOfBatchAndStream(batchTime: Time, streamId: Int): Seq[ReceivedBlockInfo] = {
+    synchronized {
+      timeToAllocatedBlocks.get(batchTime).map {
+        _.getBlocksOfStream(streamId)
+      }.getOrElse(Seq.empty)
+    }
+  }
+
+  /** Check if any blocks are left to be allocated to batches. */
+  def hasUnallocatedReceivedBlocks: Boolean = synchronized {
+    !streamIdToUnallocatedBlockQueues.values.forall(_.isEmpty)
+  }
+
+  /**
+   * Get blocks that have been added but not yet allocated to any batch. This method
+   * is primarily used for testing.
+   */
+  def getUnallocatedBlocks(streamId: Int): Seq[ReceivedBlockInfo] = synchronized {
+    getReceivedBlockQueue(streamId).toSeq
+  }
+
+  /**
+   * Clean up block information of old batches. If waitForCompletion is true, this method
+   * returns only after the files are cleaned up.
+   */
+  def cleanupOldBatches(cleanupThreshTime: Time, waitForCompletion: Boolean): Unit = synchronized {
+    assert(cleanupThreshTime.milliseconds < clock.currentTime())
+    val timesToCleanup = timeToAllocatedBlocks.keys.filter { _ < cleanupThreshTime }.toSeq
+    logInfo("Deleting batches " + timesToCleanup)
+    writeToLog(BatchCleanupEvent(timesToCleanup))
+    timeToAllocatedBlocks --= timesToCleanup
+    logManagerOption.foreach(_.cleanupOldLogs(cleanupThreshTime.milliseconds, waitForCompletion))
+  }
+
+  /** Stop the block tracker. */
+  def stop() {
+    logManagerOption.foreach { _.stop() }
+  }
+
+  /**
+   * Recover all the tracker actions from the write ahead logs to recover the state (unallocated
+   * and allocated block info) prior to failure.
+   */
+  private def recoverFromWriteAheadLogs(): Unit = synchronized {
+    // Insert the recovered block information
+    def insertAddedBlock(receivedBlockInfo: ReceivedBlockInfo) {
+      logTrace(s"Recovery: Inserting added block $receivedBlockInfo")
+      getReceivedBlockQueue(receivedBlockInfo.streamId) += receivedBlockInfo
+    }
+
+    // Insert the recovered block-to-batch allocations and clear the queue of received blocks
+    // (when the blocks were originally allocated to the batch, the queue must have been cleared).
+    def insertAllocatedBatch(batchTime: Time, allocatedBlocks: AllocatedBlocks) {
+      logTrace(s"Recovery: Inserting allocated batch for time $batchTime to " +
+        s"${allocatedBlocks.streamIdToAllocatedBlocks}")
+      streamIdToUnallocatedBlockQueues.values.foreach { _.clear() }
+      lastAllocatedBatchTime = batchTime
+      timeToAllocatedBlocks.put(batchTime, allocatedBlocks)
+    }
+
+    // Cleanup the batch allocations
+    def cleanupBatches(batchTimes: Seq[Time]) {
+      logTrace(s"Recovery: Cleaning up batches $batchTimes")
+      timeToAllocatedBlocks --= batchTimes
+    }
+
+    logManagerOption.foreach { logManager =>
+      logInfo(s"Recovering from write ahead logs in ${checkpointDirOption.get}")
+      logManager.readFromLog().foreach { byteBuffer =>
+        logTrace("Recovering record " + byteBuffer)
+        Utils.deserialize[ReceivedBlockTrackerLogEvent](byteBuffer.array) match {
+          case BlockAdditionEvent(receivedBlockInfo) =>
+            insertAddedBlock(receivedBlockInfo)
+          case BatchAllocationEvent(time, allocatedBlocks) =>
+            insertAllocatedBatch(time, allocatedBlocks)
+          case BatchCleanupEvent(batchTimes) =>
+            cleanupBatches(batchTimes)
+        }
+      }
+    }
+  }
+
+  /** Write an update to the tracker to the write ahead log */
+  private def writeToLog(record: ReceivedBlockTrackerLogEvent) {
+    if (isLogManagerEnabled) {
+      logDebug(s"Writing to log $record")
+      logManagerOption.foreach { logManager =>
+        logManager.writeToLog(ByteBuffer.wrap(Utils.serialize(record)))
+      }
+    }
+  }
+
+  /** Get the queue of received blocks belonging to a particular stream */
+  private def getReceivedBlockQueue(streamId: Int): ReceivedBlockQueue = {
+    streamIdToUnallocatedBlockQueues.getOrElseUpdate(streamId, new ReceivedBlockQueue)
+  }
+
+  /** Optionally create the write ahead log manager only if the feature is enabled */
+  private def createLogManager(): Option[WriteAheadLogManager] = {
+    if (conf.getBoolean("spark.streaming.receiver.writeAheadLog.enable", false)) {
+      if (checkpointDirOption.isEmpty) {
+        throw new SparkException(
+          "Cannot enable receiver write-ahead log without checkpoint directory set. " +
+            "Please use streamingContext.checkpoint() to set the checkpoint directory. " +
+            "See documentation for more details.")
+      }
+      val logDir = ReceivedBlockTracker.checkpointDirToLogDir(checkpointDirOption.get)
+      val rollingIntervalSecs = conf.getInt(
+        "spark.streaming.receivedBlockTracker.writeAheadLog.rotationIntervalSecs", 60)
+      val logManager = new WriteAheadLogManager(logDir, hadoopConf,
+        rollingIntervalSecs = rollingIntervalSecs, clock = clock,
+        callerName = "ReceivedBlockHandlerMaster")
+      Some(logManager)
+    } else {
+      None
+    }
+  }
+
+  /** Check if the log manager is enabled. This is only used for testing purposes. */
+  private[streaming] def isLogManagerEnabled: Boolean = logManagerOption.nonEmpty
+}
+
+private[streaming] object ReceivedBlockTracker {
+  def checkpointDirToLogDir(checkpointDir: String): String = {
+    new Path(checkpointDir, "receivedBlockMetadata").toString
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
index 7149dbc12a365..b36aeb341d25e 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
@@ -17,24 +17,15 @@
 
 package org.apache.spark.streaming.scheduler
 
-import scala.collection.mutable.{HashMap, SynchronizedMap, SynchronizedQueue}
+
+import scala.collection.mutable.{HashMap, SynchronizedMap}
 import scala.language.existentials
 
 import akka.actor._
-import org.apache.spark.{Logging, SparkEnv, SparkException}
-import org.apache.spark.SparkContext._
-import org.apache.spark.storage.StreamBlockId
-import org.apache.spark.streaming.{StreamingContext, Time}
-import org.apache.spark.streaming.receiver.{Receiver, ReceiverSupervisorImpl, StopReceiver}
-import org.apache.spark.util.AkkaUtils
 
-/** Information about blocks received by the receiver */
-private[streaming] case class ReceivedBlockInfo(
-    streamId: Int,
-    blockId: StreamBlockId,
-    numRecords: Long,
-    metadata: Any
-  )
+import org.apache.spark.{Logging, SerializableWritable, SparkEnv, SparkException}
+import org.apache.spark.streaming.{StreamingContext, Time}
+import org.apache.spark.streaming.receiver.{CleanupOldBlocks, Receiver, ReceiverSupervisorImpl, StopReceiver}
 
 /**
  * Messages used by the NetworkReceiver and the ReceiverTracker to communicate
@@ -54,26 +45,31 @@ private[streaming] case class DeregisterReceiver(streamId: Int, msg: String, err
   extends ReceiverTrackerMessage
 
 /**
- * This class manages the execution of the receivers of NetworkInputDStreams. Instance of
+ * This class manages the execution of the receivers of ReceiverInputDStreams. Instance of
  * this class must be created after all input streams have been added and StreamingContext.start()
  * has been called because it needs the final set of input streams at the time of instantiation.
+ *
+ * @param skipReceiverLaunch Do not launch the receiver. This is useful for testing.
  */
 private[streaming]
-class ReceiverTracker(ssc: StreamingContext) extends Logging {
+class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false) extends Logging {
 
-  val receiverInputStreams = ssc.graph.getReceiverInputStreams()
-  val receiverInputStreamMap = Map(receiverInputStreams.map(x => (x.id, x)): _*)
-  val receiverExecutor = new ReceiverLauncher()
-  val receiverInfo = new HashMap[Int, ReceiverInfo] with SynchronizedMap[Int, ReceiverInfo]
-  val receivedBlockInfo = new HashMap[Int, SynchronizedQueue[ReceivedBlockInfo]]
-    with SynchronizedMap[Int, SynchronizedQueue[ReceivedBlockInfo]]
-  val timeout = AkkaUtils.askTimeout(ssc.conf)
-  val listenerBus = ssc.scheduler.listenerBus
+  private val receiverInputStreams = ssc.graph.getReceiverInputStreams()
+  private val receiverInputStreamIds = receiverInputStreams.map { _.id }
+  private val receiverExecutor = new ReceiverLauncher()
+  private val receiverInfo = new HashMap[Int, ReceiverInfo] with SynchronizedMap[Int, ReceiverInfo]
+  private val receivedBlockTracker = new ReceivedBlockTracker(
+    ssc.sparkContext.conf,
+    ssc.sparkContext.hadoopConfiguration,
+    receiverInputStreamIds,
+    ssc.scheduler.clock,
+    Option(ssc.checkpointDir)
+  )
+  private val listenerBus = ssc.scheduler.listenerBus
 
   // actor is created when generator starts.
   // This not being null means the tracker has been started and not stopped
-  var actor: ActorRef = null
-  var currentTime: Time = null
+  private var actor: ActorRef = null
 
   /** Start the actor and receiver execution thread. */
   def start() = synchronized {
@@ -84,54 +80,79 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
     if (!receiverInputStreams.isEmpty) {
       actor = ssc.env.actorSystem.actorOf(Props(new ReceiverTrackerActor),
         "ReceiverTracker")
-      receiverExecutor.start()
+      if (!skipReceiverLaunch) receiverExecutor.start()
       logInfo("ReceiverTracker started")
     }
   }
 
   /** Stop the receiver execution thread. */
-  def stop() = synchronized {
+  def stop(graceful: Boolean) = synchronized {
     if (!receiverInputStreams.isEmpty && actor != null) {
       // First, stop the receivers
-      receiverExecutor.stop()
+      if (!skipReceiverLaunch) receiverExecutor.stop(graceful)
 
       // Finally, stop the actor
       ssc.env.actorSystem.stop(actor)
       actor = null
+      receivedBlockTracker.stop()
       logInfo("ReceiverTracker stopped")
     }
   }
 
-  /** Return all the blocks received from a receiver. */
-  def getReceivedBlockInfo(streamId: Int): Array[ReceivedBlockInfo] = {
-    val receivedBlockInfo = getReceivedBlockInfoQueue(streamId).dequeueAll(x => true)
-    logInfo("Stream " + streamId + " received " + receivedBlockInfo.size + " blocks")
-    receivedBlockInfo.toArray
+  /** Allocate all unallocated blocks to the given batch. */
+  def allocateBlocksToBatch(batchTime: Time): Unit = {
+    if (receiverInputStreams.nonEmpty) {
+      receivedBlockTracker.allocateBlocksToBatch(batchTime)
+    }
+  }
+
+  /** Get the blocks for the given batch and all input streams. */
+  def getBlocksOfBatch(batchTime: Time): Map[Int, Seq[ReceivedBlockInfo]] = {
+    receivedBlockTracker.getBlocksOfBatch(batchTime)
   }
 
-  private def getReceivedBlockInfoQueue(streamId: Int) = {
-    receivedBlockInfo.getOrElseUpdate(streamId, new SynchronizedQueue[ReceivedBlockInfo])
+  /** Get the blocks allocated to the given batch and stream. */
+  def getBlocksOfBatchAndStream(batchTime: Time, streamId: Int): Seq[ReceivedBlockInfo] = {
+    synchronized {
+      receivedBlockTracker.getBlocksOfBatchAndStream(batchTime, streamId)
+    }
+  }
+
+  /**
+   * Clean up the data and metadata of blocks and batches that are strictly
+   * older than the threshold time. Note that this does not
+   */
+  def cleanupOldBlocksAndBatches(cleanupThreshTime: Time) {
+    // Clean up old block and batch metadata
+    receivedBlockTracker.cleanupOldBatches(cleanupThreshTime, waitForCompletion = false)
+
+    // Signal the receivers to delete old block data
+    if (ssc.conf.getBoolean("spark.streaming.receiver.writeAheadLog.enable", false)) {
+      logInfo(s"Cleanup old received batch data: $cleanupThreshTime")
+      receiverInfo.values.flatMap { info => Option(info.actor) }
+        .foreach { _ ! CleanupOldBlocks(cleanupThreshTime) }
+    }
   }
 
   /** Register a receiver */
-  def registerReceiver(
+  private def registerReceiver(
       streamId: Int,
       typ: String,
       host: String,
       receiverActor: ActorRef,
       sender: ActorRef
     ) {
-    if (!receiverInputStreamMap.contains(streamId)) {
-      throw new Exception("Register received for unexpected id " + streamId)
+    if (!receiverInputStreamIds.contains(streamId)) {
+      throw new SparkException("Register received for unexpected id " + streamId)
     }
     receiverInfo(streamId) = ReceiverInfo(
       streamId, s"${typ}-${streamId}", receiverActor, true, host)
-    ssc.scheduler.listenerBus.post(StreamingListenerReceiverStarted(receiverInfo(streamId)))
+    listenerBus.post(StreamingListenerReceiverStarted(receiverInfo(streamId)))
     logInfo("Registered receiver for stream " + streamId + " from " + sender.path.address)
   }
 
   /** Deregister a receiver */
-  def deregisterReceiver(streamId: Int, message: String, error: String) {
+  private def deregisterReceiver(streamId: Int, message: String, error: String) {
     val newReceiverInfo = receiverInfo.get(streamId) match {
       case Some(oldInfo) =>
         oldInfo.copy(actor = null, active = false, lastErrorMessage = message, lastError = error)
@@ -139,8 +160,8 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
         logWarning("No prior receiver info")
         ReceiverInfo(streamId, "", null, false, "", lastErrorMessage = message, lastError = error)
     }
-    receiverInfo(streamId) = newReceiverInfo
-    ssc.scheduler.listenerBus.post(StreamingListenerReceiverStopped(receiverInfo(streamId)))
+    receiverInfo -= streamId
+    listenerBus.post(StreamingListenerReceiverStopped(newReceiverInfo))
     val messageWithError = if (error != null && !error.isEmpty) {
       s"$message - $error"
     } else {
@@ -150,14 +171,12 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
   }
 
   /** Add new blocks for the given stream */
-  def addBlocks(receivedBlockInfo: ReceivedBlockInfo) {
-    getReceivedBlockInfoQueue(receivedBlockInfo.streamId) += receivedBlockInfo
-    logDebug("Stream " + receivedBlockInfo.streamId + " received new blocks: " +
-      receivedBlockInfo.blockId)
+  private def addBlock(receivedBlockInfo: ReceivedBlockInfo): Boolean = {
+    receivedBlockTracker.addBlock(receivedBlockInfo)
   }
 
   /** Report error sent by a receiver */
-  def reportError(streamId: Int, message: String, error: String) {
+  private def reportError(streamId: Int, message: String, error: String) {
     val newReceiverInfo = receiverInfo.get(streamId) match {
       case Some(oldInfo) =>
         oldInfo.copy(lastErrorMessage = message, lastError = error)
@@ -166,7 +185,7 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
         ReceiverInfo(streamId, "", null, false, "", lastErrorMessage = message, lastError = error)
     }
     receiverInfo(streamId) = newReceiverInfo
-    ssc.scheduler.listenerBus.post(StreamingListenerReceiverError(receiverInfo(streamId)))
+    listenerBus.post(StreamingListenerReceiverError(receiverInfo(streamId)))
     val messageWithError = if (error != null && !error.isEmpty) {
       s"$message - $error"
     } else {
@@ -176,8 +195,8 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
   }
 
   /** Check if any blocks are left to be processed */
-  def hasMoreReceivedBlockIds: Boolean = {
-    !receivedBlockInfo.values.forall(_.isEmpty)
+  def hasUnallocatedBlocks: Boolean = {
+    receivedBlockTracker.hasUnallocatedReceivedBlocks
   }
 
   /** Actor to receive messages from the receivers. */
@@ -187,7 +206,7 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
         registerReceiver(streamId, typ, host, receiverActor, sender)
         sender ! true
       case AddBlock(receivedBlockInfo) =>
-        addBlocks(receivedBlockInfo)
+        sender ! addBlock(receivedBlockInfo)
       case ReportError(streamId, message, error) =>
         reportError(streamId, message, error)
       case DeregisterReceiver(streamId, message, error) =>
@@ -199,9 +218,11 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
   /** This thread class runs all the receivers on the cluster.  */
   class ReceiverLauncher {
     @transient val env = ssc.env
+    @volatile @transient private var running = false
     @transient val thread  = new Thread() {
       override def run() {
         try {
+          SparkEnv.set(env)
           startReceivers()
         } catch {
           case ie: InterruptedException => logInfo("ReceiverLauncher interrupted")
@@ -213,7 +234,7 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
       thread.start()
     }
 
-    def stop() {
+    def stop(graceful: Boolean) {
       // Send the stop signal to all the receivers
       stopReceivers()
 
@@ -221,9 +242,19 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
       // That is, for the receivers to quit gracefully.
       thread.join(10000)
 
+      if (graceful) {
+        val pollTime = 100
+        def done = { receiverInfo.isEmpty && !running }
+        logInfo("Waiting for receiver job to terminate gracefully")
+        while(!done) {
+          Thread.sleep(pollTime)
+        }
+        logInfo("Waited for receiver job to terminate gracefully")
+      }
+
       // Check if all the receivers have been deregistered or not
       if (!receiverInfo.isEmpty) {
-        logWarning("All of the receivers have not deregistered, " + receiverInfo)
+        logWarning("Not all of the receivers have deregistered, " + receiverInfo)
       } else {
         logInfo("All of the receivers have deregistered successfully")
       }
@@ -252,6 +283,9 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
           ssc.sc.makeRDD(receivers, receivers.size)
         }
 
+      val checkpointDirOption = Option(ssc.checkpointDir)
+      val serializableHadoopConf = new SerializableWritable(ssc.sparkContext.hadoopConfiguration)
+
       // Function to start the receiver on the worker node
       val startReceiver = (iterator: Iterator[Receiver[_]]) => {
         if (!iterator.hasNext) {
@@ -259,9 +293,10 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
             "Could not start receiver as object not found.")
         }
         val receiver = iterator.next()
-        val executor = new ReceiverSupervisorImpl(receiver, SparkEnv.get)
-        executor.start()
-        executor.awaitTermination()
+        val supervisor = new ReceiverSupervisorImpl(
+          receiver, SparkEnv.get, serializableHadoopConf.value, checkpointDirOption)
+        supervisor.start()
+        supervisor.awaitTermination()
       }
       // Run the dummy Spark job to ensure that all slaves have registered.
       // This avoids all the receivers to be scheduled on the same node.
@@ -271,7 +306,9 @@ class ReceiverTracker(ssc: StreamingContext) extends Logging {
 
       // Distribute the receivers and start them
       logInfo("Starting " + receivers.length + " receivers")
-      ssc.sparkContext.runJob(tempRDD, startReceiver)
+      running = true
+      ssc.sparkContext.runJob(tempRDD, ssc.sparkContext.clean(startReceiver))
+      running = false
       logInfo("All of the receivers have been terminated")
     }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala
index ed1aa114e19d9..74dbba453f026 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala
@@ -50,9 +50,6 @@ case class StreamingListenerReceiverError(receiverInfo: ReceiverInfo)
 case class StreamingListenerReceiverStopped(receiverInfo: ReceiverInfo)
   extends StreamingListenerEvent
 
-/** An event used in the listener to shutdown the listener daemon thread. */
-private[scheduler] case object StreamingListenerShutdown extends StreamingListenerEvent
-
 /**
  * :: DeveloperApi ::
  * A listener interface for receiving information about an ongoing streaming
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListenerBus.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListenerBus.scala
index 398724d9e8130..b07d6cf347ca7 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListenerBus.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListenerBus.scala
@@ -17,83 +17,42 @@
 
 package org.apache.spark.streaming.scheduler
 
+import java.util.concurrent.atomic.AtomicBoolean
+
 import org.apache.spark.Logging
-import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
-import java.util.concurrent.LinkedBlockingQueue
+import org.apache.spark.util.AsynchronousListenerBus
 
 /** Asynchronously passes StreamingListenerEvents to registered StreamingListeners. */
-private[spark] class StreamingListenerBus() extends Logging {
-  private val listeners = new ArrayBuffer[StreamingListener]()
-    with SynchronizedBuffer[StreamingListener]
-
-  /* Cap the capacity of the SparkListenerEvent queue so we get an explicit error (rather than
-   * an OOM exception) if it's perpetually being added to more quickly than it's being drained. */
-  private val EVENT_QUEUE_CAPACITY = 10000
-  private val eventQueue = new LinkedBlockingQueue[StreamingListenerEvent](EVENT_QUEUE_CAPACITY)
-  private var queueFullErrorMessageLogged = false
-
-  val listenerThread = new Thread("StreamingListenerBus") {
-    setDaemon(true)
-    override def run() {
-      while (true) {
-        val event = eventQueue.take
-        event match {
-          case receiverStarted: StreamingListenerReceiverStarted =>
-            listeners.foreach(_.onReceiverStarted(receiverStarted))
-          case receiverError: StreamingListenerReceiverError =>
-            listeners.foreach(_.onReceiverError(receiverError))
-          case receiverStopped: StreamingListenerReceiverStopped =>
-            listeners.foreach(_.onReceiverStopped(receiverStopped))
-          case batchSubmitted: StreamingListenerBatchSubmitted =>
-            listeners.foreach(_.onBatchSubmitted(batchSubmitted))
-          case batchStarted: StreamingListenerBatchStarted =>
-            listeners.foreach(_.onBatchStarted(batchStarted))
-          case batchCompleted: StreamingListenerBatchCompleted =>
-            listeners.foreach(_.onBatchCompleted(batchCompleted))
-          case StreamingListenerShutdown =>
-            // Get out of the while loop and shutdown the daemon thread
-            return
-          case _ =>
-        }
-      }
+private[spark] class StreamingListenerBus
+  extends AsynchronousListenerBus[StreamingListener, StreamingListenerEvent]("StreamingListenerBus")
+  with Logging {
+
+  private val logDroppedEvent = new AtomicBoolean(false)
+
+  override def onPostEvent(listener: StreamingListener, event: StreamingListenerEvent): Unit = {
+    event match {
+      case receiverStarted: StreamingListenerReceiverStarted =>
+        listener.onReceiverStarted(receiverStarted)
+      case receiverError: StreamingListenerReceiverError =>
+        listener.onReceiverError(receiverError)
+      case receiverStopped: StreamingListenerReceiverStopped =>
+        listener.onReceiverStopped(receiverStopped)
+      case batchSubmitted: StreamingListenerBatchSubmitted =>
+        listener.onBatchSubmitted(batchSubmitted)
+      case batchStarted: StreamingListenerBatchStarted =>
+        listener.onBatchStarted(batchStarted)
+      case batchCompleted: StreamingListenerBatchCompleted =>
+        listener.onBatchCompleted(batchCompleted)
+      case _ =>
     }
   }
 
-  def start() {
-    listenerThread.start()
-  }
-
-  def addListener(listener: StreamingListener) {
-    listeners += listener
-  }
-
-  def post(event: StreamingListenerEvent) {
-    val eventAdded = eventQueue.offer(event)
-    if (!eventAdded && !queueFullErrorMessageLogged) {
+  override def onDropEvent(event: StreamingListenerEvent): Unit = {
+    if (logDroppedEvent.compareAndSet(false, true)) {
+      // Only log the following message once to avoid duplicated annoying logs.
       logError("Dropping StreamingListenerEvent because no remaining room in event queue. " +
         "This likely means one of the StreamingListeners is too slow and cannot keep up with the " +
         "rate at which events are being started by the scheduler.")
-      queueFullErrorMessageLogged = true
     }
   }
-
-  /**
-   * Waits until there are no more events in the queue, or until the specified time has elapsed.
-   * Used for testing only. Returns true if the queue has emptied and false is the specified time
-   * elapsed before the queue emptied.
-   */
-  def waitUntilEmpty(timeoutMillis: Int): Boolean = {
-    val finishTime = System.currentTimeMillis + timeoutMillis
-    while (!eventQueue.isEmpty) {
-      if (System.currentTimeMillis > finishTime) {
-        return false
-      }
-      /* Sleep rather than using wait/notify, because this is used only for testing and wait/notify
-       * add overhead in the general case. */
-      Thread.sleep(10)
-    }
-    true
-  }
-
-  def stop(): Unit = post(StreamingListenerShutdown)
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
index f61069b56db5e..5ee53a5c5f561 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
@@ -25,7 +25,6 @@ import org.apache.spark.streaming.scheduler.StreamingListenerBatchStarted
 import org.apache.spark.streaming.scheduler.BatchInfo
 import org.apache.spark.streaming.scheduler.StreamingListenerBatchSubmitted
 import org.apache.spark.util.Distribution
-import org.apache.spark.Logging
 
 
 private[streaming] class StreamingJobProgressListener(ssc: StreamingContext)
@@ -36,6 +35,8 @@ private[streaming] class StreamingJobProgressListener(ssc: StreamingContext)
   private val completedaBatchInfos = new Queue[BatchInfo]
   private val batchInfoLimit = ssc.conf.getInt("spark.streaming.ui.retainedBatches", 100)
   private var totalCompletedBatches = 0L
+  private var totalReceivedRecords = 0L
+  private var totalProcessedRecords = 0L
   private val receiverInfos = new HashMap[Int, ReceiverInfo]
 
   val batchDuration = ssc.graph.batchDuration.milliseconds
@@ -65,6 +66,10 @@ private[streaming] class StreamingJobProgressListener(ssc: StreamingContext)
   override def onBatchStarted(batchStarted: StreamingListenerBatchStarted) = synchronized {
     runningBatchInfos(batchStarted.batchInfo.batchTime) = batchStarted.batchInfo
     waitingBatchInfos.remove(batchStarted.batchInfo.batchTime)
+
+    batchStarted.batchInfo.receivedBlockInfo.foreach { case (_, infos) =>
+      totalReceivedRecords += infos.map(_.numRecords).sum
+    }
   }
 
   override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted) = synchronized {
@@ -73,6 +78,10 @@ private[streaming] class StreamingJobProgressListener(ssc: StreamingContext)
     completedaBatchInfos.enqueue(batchCompleted.batchInfo)
     if (completedaBatchInfos.size > batchInfoLimit) completedaBatchInfos.dequeue()
     totalCompletedBatches += 1L
+
+    batchCompleted.batchInfo.receivedBlockInfo.foreach { case (_, infos) =>
+      totalProcessedRecords += infos.map(_.numRecords).sum
+    }
   }
 
   def numReceivers = synchronized {
@@ -83,6 +92,14 @@ private[streaming] class StreamingJobProgressListener(ssc: StreamingContext)
     totalCompletedBatches
   }
 
+  def numTotalReceivedRecords: Long = synchronized {
+    totalReceivedRecords
+  }
+
+  def numTotalProcessedRecords: Long = synchronized {
+    totalProcessedRecords
+  }
+
   def numUnprocessedBatches: Long = synchronized {
     waitingBatchInfos.size + runningBatchInfos.size
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
index 1353e487c72cf..98e9a2e639e25 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
@@ -67,6 +67,12 @@ private[ui] class StreamingPage(parent: StreamingTab)
       <li>
         <strong>Waiting batches: </strong>{listener.numUnprocessedBatches}
       </li>
+      <li>
+        <strong>Received records: </strong>{listener.numTotalReceivedRecords}
+      </li>
+      <li>
+        <strong>Processed records: </strong>{listener.numTotalProcessedRecords}
+      </li>
     </ul>
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala
index 39145a3ab081a..d6d96d7ba00fd 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala
@@ -41,13 +41,7 @@ class SystemClock() extends Clock {
       return currentTime
     }
 
-    val pollTime = {
-      if (waitTime / 10.0 > minPollTime) {
-        (waitTime / 10.0).toLong
-      } else {
-        minPollTime
-      }
-    }
+    val pollTime = math.max(waitTime / 10.0, minPollTime).toLong
 
     while (true) {
       currentTime = System.currentTimeMillis()
@@ -55,12 +49,7 @@ class SystemClock() extends Clock {
       if (waitTime <= 0) {
         return currentTime
       }
-      val sleepTime =
-        if (waitTime < pollTime) {
-          waitTime
-        } else {
-          pollTime
-        }
+      val sleepTime = math.min(waitTime, pollTime)
       Thread.sleep(sleepTime)
     }
     -1
@@ -70,9 +59,11 @@ class SystemClock() extends Clock {
 private[streaming]
 class ManualClock() extends Clock {
 
-  var time = 0L
+  private var time = 0L
 
-  def currentTime() = time
+  def currentTime() = this.synchronized {
+    time
+  }
 
   def setTime(timeToSet: Long) = {
     this.synchronized {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
new file mode 100644
index 0000000000000..858ba3c9eb4e5
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.util
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs._
+
+private[streaming] object HdfsUtils {
+
+  def getOutputStream(path: String, conf: Configuration): FSDataOutputStream = {
+    val dfsPath = new Path(path)
+    val dfs = getFileSystemForPath(dfsPath, conf)
+    // If the file exists and we have append support, append instead of creating a new file
+    val stream: FSDataOutputStream = {
+      if (dfs.isFile(dfsPath)) {
+        if (conf.getBoolean("hdfs.append.support", false)) {
+          dfs.append(dfsPath)
+        } else {
+          throw new IllegalStateException("File exists and there is no append support!")
+        }
+      } else {
+        dfs.create(dfsPath)
+      }
+    }
+    stream
+  }
+
+  def getInputStream(path: String, conf: Configuration): FSDataInputStream = {
+    val dfsPath = new Path(path)
+    val dfs = getFileSystemForPath(dfsPath, conf)
+    val instream = dfs.open(dfsPath)
+    instream
+  }
+
+  def checkState(state: Boolean, errorMsg: => String) {
+    if (!state) {
+      throw new IllegalStateException(errorMsg)
+    }
+  }
+
+  /** Get the locations of the HDFS blocks containing the given file segment. */
+  def getFileSegmentLocations(
+      path: String, offset: Long, length: Long, conf: Configuration): Array[String] = {
+    val dfsPath = new Path(path)
+    val dfs = getFileSystemForPath(dfsPath, conf)
+    val fileStatus = dfs.getFileStatus(dfsPath)
+    val blockLocs = Option(dfs.getFileBlockLocations(fileStatus, offset, length))
+    blockLocs.map(_.flatMap(_.getHosts)).getOrElse(Array.empty)
+  }
+
+  def getFileSystemForPath(path: Path, conf: Configuration): FileSystem = {
+    // For local file systems, return the raw local file system, such calls to flush()
+    // actually flushes the stream.
+    val fs = path.getFileSystem(conf)
+    fs match {
+      case localFs: LocalFileSystem => localFs.getRawFileSystem
+      case _ => fs
+    }
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogFileSegment.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogFileSegment.scala
new file mode 100644
index 0000000000000..1005a2c8ec303
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogFileSegment.scala
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.util
+
+/** Class for representing a segment of data in a write ahead log file */
+private[streaming] case class WriteAheadLogFileSegment (path: String, offset: Long, length: Int)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogManager.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogManager.scala
new file mode 100644
index 0000000000000..166661b7496df
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogManager.scala
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.util
+
+import java.nio.ByteBuffer
+
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.duration.Duration
+import scala.concurrent.{Await, ExecutionContext, Future}
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.spark.Logging
+import org.apache.spark.util.Utils
+import WriteAheadLogManager._
+
+/**
+ * This class manages write ahead log files.
+ * - Writes records (bytebuffers) to periodically rotating log files.
+ * - Recovers the log files and the reads the recovered records upon failures.
+ * - Cleans up old log files.
+ *
+ * Uses [[org.apache.spark.streaming.util.WriteAheadLogWriter]] to write
+ * and [[org.apache.spark.streaming.util.WriteAheadLogReader]] to read.
+ *
+ * @param logDirectory Directory when rotating log files will be created.
+ * @param hadoopConf Hadoop configuration for reading/writing log files.
+ * @param rollingIntervalSecs The interval in seconds with which logs will be rolled over.
+ *                            Default is one minute.
+ * @param maxFailures Max number of failures that is tolerated for every attempt to write to log.
+ *                    Default is three.
+ * @param callerName Optional name of the class who is using this manager.
+ * @param clock Optional clock that is used to check for rotation interval.
+ */
+private[streaming] class WriteAheadLogManager(
+    logDirectory: String,
+    hadoopConf: Configuration,
+    rollingIntervalSecs: Int = 60,
+    maxFailures: Int = 3,
+    callerName: String = "",
+    clock: Clock = new SystemClock
+  ) extends Logging {
+
+  private val pastLogs = new ArrayBuffer[LogInfo]
+  private val callerNameTag =
+    if (callerName.nonEmpty) s" for $callerName" else ""
+  private val threadpoolName = s"WriteAheadLogManager $callerNameTag"
+  implicit private val executionContext = ExecutionContext.fromExecutorService(
+    Utils.newDaemonFixedThreadPool(1, threadpoolName))
+  override protected val logName = s"WriteAheadLogManager $callerNameTag"
+
+  private var currentLogPath: Option[String] = None
+  private var currentLogWriter: WriteAheadLogWriter = null
+  private var currentLogWriterStartTime: Long = -1L
+  private var currentLogWriterStopTime: Long = -1L
+
+  initializeOrRecover()
+
+  /**
+   * Write a byte buffer to the log file. This method synchronously writes the data in the
+   * ByteBuffer to HDFS. When this method returns, the data is guaranteed to have been flushed
+   * to HDFS, and will be available for readers to read.
+   */
+  def writeToLog(byteBuffer: ByteBuffer): WriteAheadLogFileSegment = synchronized {
+    var fileSegment: WriteAheadLogFileSegment = null
+    var failures = 0
+    var lastException: Exception = null
+    var succeeded = false
+    while (!succeeded && failures < maxFailures) {
+      try {
+        fileSegment = getLogWriter(clock.currentTime).write(byteBuffer)
+        succeeded = true
+      } catch {
+        case ex: Exception =>
+          lastException = ex
+          logWarning("Failed to write to write ahead log")
+          resetWriter()
+          failures += 1
+      }
+    }
+    if (fileSegment == null) {
+      logError(s"Failed to write to write ahead log after $failures failures")
+      throw lastException
+    }
+    fileSegment
+  }
+
+  /**
+   * Read all the existing logs from the log directory.
+   *
+   * Note that this is typically called when the caller is initializing and wants
+   * to recover past state from the write ahead logs (that is, before making any writes).
+   * If this is called after writes have been made using this manager, then it may not return
+   * the latest the records. This does not deal with currently active log files, and
+   * hence the implementation is kept simple.
+   */
+  def readFromLog(): Iterator[ByteBuffer] = synchronized {
+    val logFilesToRead = pastLogs.map{ _.path} ++ currentLogPath
+    logInfo("Reading from the logs: " + logFilesToRead.mkString("\n"))
+    logFilesToRead.iterator.map { file =>
+      logDebug(s"Creating log reader with $file")
+      new WriteAheadLogReader(file, hadoopConf)
+    } flatMap { x => x }
+  }
+
+  /**
+   * Delete the log files that are older than the threshold time.
+   *
+   * Its important to note that the threshold time is based on the time stamps used in the log
+   * files, which is usually based on the local system time. So if there is coordination necessary
+   * between the node calculating the threshTime (say, driver node), and the local system time
+   * (say, worker node), the caller has to take account of possible time skew.
+   *
+   * If waitForCompletion is set to true, this method will return only after old logs have been
+   * deleted. This should be set to true only for testing. Else the files will be deleted
+   * asynchronously.
+   */
+  def cleanupOldLogs(threshTime: Long, waitForCompletion: Boolean): Unit = {
+    val oldLogFiles = synchronized { pastLogs.filter { _.endTime < threshTime } }
+    logInfo(s"Attempting to clear ${oldLogFiles.size} old log files in $logDirectory " +
+      s"older than $threshTime: ${oldLogFiles.map { _.path }.mkString("\n")}")
+
+    def deleteFiles() {
+      oldLogFiles.foreach { logInfo =>
+        try {
+          val path = new Path(logInfo.path)
+          val fs = HdfsUtils.getFileSystemForPath(path, hadoopConf)
+          fs.delete(path, true)
+          synchronized { pastLogs -= logInfo }
+          logDebug(s"Cleared log file $logInfo")
+        } catch {
+          case ex: Exception =>
+            logWarning(s"Error clearing write ahead log file $logInfo", ex)
+        }
+      }
+      logInfo(s"Cleared log files in $logDirectory older than $threshTime")
+    }
+    if (!executionContext.isShutdown) {
+      val f = Future { deleteFiles() }
+      if (waitForCompletion) {
+        import scala.concurrent.duration._
+        Await.ready(f, 1 second)
+      }
+    }
+  }
+
+
+  /** Stop the manager, close any open log writer */
+  def stop(): Unit = synchronized {
+    if (currentLogWriter != null) {
+      currentLogWriter.close()
+    }
+    executionContext.shutdown()
+    logInfo("Stopped write ahead log manager")
+  }
+
+  /** Get the current log writer while taking care of rotation */
+  private def getLogWriter(currentTime: Long): WriteAheadLogWriter = synchronized {
+    if (currentLogWriter == null || currentTime > currentLogWriterStopTime) {
+      resetWriter()
+      currentLogPath.foreach {
+        pastLogs += LogInfo(currentLogWriterStartTime, currentLogWriterStopTime, _)
+      }
+      currentLogWriterStartTime = currentTime
+      currentLogWriterStopTime = currentTime + (rollingIntervalSecs * 1000)
+      val newLogPath = new Path(logDirectory,
+        timeToLogFile(currentLogWriterStartTime, currentLogWriterStopTime))
+      currentLogPath = Some(newLogPath.toString)
+      currentLogWriter = new WriteAheadLogWriter(currentLogPath.get, hadoopConf)
+    }
+    currentLogWriter
+  }
+
+  /** Initialize the log directory or recover existing logs inside the directory */
+  private def initializeOrRecover(): Unit = synchronized {
+    val logDirectoryPath = new Path(logDirectory)
+    val fileSystem =  HdfsUtils.getFileSystemForPath(logDirectoryPath, hadoopConf)
+
+    if (fileSystem.exists(logDirectoryPath) && fileSystem.getFileStatus(logDirectoryPath).isDir) {
+      val logFileInfo = logFilesTologInfo(fileSystem.listStatus(logDirectoryPath).map { _.getPath })
+      pastLogs.clear()
+      pastLogs ++= logFileInfo
+      logInfo(s"Recovered ${logFileInfo.size} write ahead log files from $logDirectory")
+      logDebug(s"Recovered files are:\n${logFileInfo.map(_.path).mkString("\n")}")
+    }
+  }
+
+  private def resetWriter(): Unit = synchronized {
+    if (currentLogWriter != null) {
+      currentLogWriter.close()
+      currentLogWriter = null
+    }
+  }
+}
+
+private[util] object WriteAheadLogManager {
+
+  case class LogInfo(startTime: Long, endTime: Long, path: String)
+
+  val logFileRegex = """log-(\d+)-(\d+)""".r
+
+  def timeToLogFile(startTime: Long, stopTime: Long): String = {
+    s"log-$startTime-$stopTime"
+  }
+
+  /** Convert a sequence of files to a sequence of sorted LogInfo objects */
+  def logFilesTologInfo(files: Seq[Path]): Seq[LogInfo] = {
+    files.flatMap { file =>
+      logFileRegex.findFirstIn(file.getName()) match {
+        case Some(logFileRegex(startTimeStr, stopTimeStr)) =>
+          val startTime = startTimeStr.toLong
+          val stopTime = stopTimeStr.toLong
+          Some(LogInfo(startTime, stopTime, file.toString))
+        case None =>
+          None
+      }
+    }.sortBy { _.startTime }
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogRandomReader.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogRandomReader.scala
new file mode 100644
index 0000000000000..003989092a42a
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogRandomReader.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.util
+
+import java.io.Closeable
+import java.nio.ByteBuffer
+
+import org.apache.hadoop.conf.Configuration
+
+/**
+ * A random access reader for reading write ahead log files written using
+ * [[org.apache.spark.streaming.util.WriteAheadLogWriter]]. Given the file segment info,
+ * this reads the record (bytebuffer) from the log file.
+ */
+private[streaming] class WriteAheadLogRandomReader(path: String, conf: Configuration)
+  extends Closeable {
+
+  private val instream = HdfsUtils.getInputStream(path, conf)
+  private var closed = false
+
+  def read(segment: WriteAheadLogFileSegment): ByteBuffer = synchronized {
+    assertOpen()
+    instream.seek(segment.offset)
+    val nextLength = instream.readInt()
+    HdfsUtils.checkState(nextLength == segment.length,
+      s"Expected message length to be ${segment.length}, but was $nextLength")
+    val buffer = new Array[Byte](nextLength)
+    instream.readFully(buffer)
+    ByteBuffer.wrap(buffer)
+  }
+
+  override def close(): Unit = synchronized {
+    closed = true
+    instream.close()
+  }
+
+  private def assertOpen() {
+    HdfsUtils.checkState(!closed, "Stream is closed. Create a new Reader to read from the file.")
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogReader.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogReader.scala
new file mode 100644
index 0000000000000..2afc0d1551acf
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogReader.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.util
+
+import java.io.{Closeable, EOFException}
+import java.nio.ByteBuffer
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.spark.Logging
+
+/**
+ * A reader for reading write ahead log files written using
+ * [[org.apache.spark.streaming.util.WriteAheadLogWriter]]. This reads
+ * the records (bytebuffers) in the log file sequentially and return them as an
+ * iterator of bytebuffers.
+ */
+private[streaming] class WriteAheadLogReader(path: String, conf: Configuration)
+  extends Iterator[ByteBuffer] with Closeable with Logging {
+
+  private val instream = HdfsUtils.getInputStream(path, conf)
+  private var closed = false
+  private var nextItem: Option[ByteBuffer] = None
+
+  override def hasNext: Boolean = synchronized {
+    if (closed) {
+      return false
+    }
+
+    if (nextItem.isDefined) { // handle the case where hasNext is called without calling next
+      true
+    } else {
+      try {
+        val length = instream.readInt()
+        val buffer = new Array[Byte](length)
+        instream.readFully(buffer)
+        nextItem = Some(ByteBuffer.wrap(buffer))
+        logTrace("Read next item " + nextItem.get)
+        true
+      } catch {
+        case e: EOFException =>
+          logDebug("Error reading next item, EOF reached", e)
+          close()
+          false
+        case e: Exception =>
+          logWarning("Error while trying to read data from HDFS.", e)
+          close()
+          throw e
+      }
+    }
+  }
+
+  override def next(): ByteBuffer = synchronized {
+    val data = nextItem.getOrElse {
+      close()
+      throw new IllegalStateException(
+        "next called without calling hasNext or after hasNext returned false")
+    }
+    nextItem = None // Ensure the next hasNext call loads new data.
+    data
+  }
+
+  override def close(): Unit = synchronized {
+    if (!closed) {
+      instream.close()
+    }
+    closed = true
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogWriter.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogWriter.scala
new file mode 100644
index 0000000000000..679f6a6dfd7c1
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogWriter.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.util
+
+import java.io._
+import java.net.URI
+import java.nio.ByteBuffer
+
+import scala.util.Try
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FSDataOutputStream, FileSystem}
+
+/**
+ * A writer for writing byte-buffers to a write ahead log file.
+ */
+private[streaming] class WriteAheadLogWriter(path: String, hadoopConf: Configuration)
+  extends Closeable {
+
+  private lazy val stream = HdfsUtils.getOutputStream(path, hadoopConf)
+
+  private lazy val hadoopFlushMethod = {
+    // Use reflection to get the right flush operation
+    val cls = classOf[FSDataOutputStream]
+    Try(cls.getMethod("hflush")).orElse(Try(cls.getMethod("sync"))).toOption
+  }
+
+  private var nextOffset = stream.getPos()
+  private var closed = false
+
+  /** Write the bytebuffer to the log file */
+  def write(data: ByteBuffer): WriteAheadLogFileSegment = synchronized {
+    assertOpen()
+    data.rewind() // Rewind to ensure all data in the buffer is retrieved
+    val lengthToWrite = data.remaining()
+    val segment = new WriteAheadLogFileSegment(path, nextOffset, lengthToWrite)
+    stream.writeInt(lengthToWrite)
+    if (data.hasArray) {
+      stream.write(data.array())
+    } else {
+      // If the buffer is not backed by an array, we transfer using temp array
+      // Note that despite the extra array copy, this should be faster than byte-by-byte copy
+      while (data.hasRemaining) {
+        val array = new Array[Byte](data.remaining)
+        data.get(array)
+        stream.write(array)
+      }
+    }
+    flush()
+    nextOffset = stream.getPos()
+    segment
+  }
+
+  override def close(): Unit = synchronized {
+    closed = true
+    stream.close()
+  }
+
+  private def flush() {
+    hadoopFlushMethod.foreach { _.invoke(stream) }
+    // Useful for local file system where hflush/sync does not work (HADOOP-7844)
+    stream.getWrappedStream.flush()
+  }
+
+  private def assertOpen() {
+    HdfsUtils.checkState(!closed, "Stream is closed. Create a new Writer to write to file.")
+  }
+}
diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
index 4efeb8dfbe1ad..2df8cf6a8a3df 100644
--- a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
@@ -17,13 +17,20 @@
 
 package org.apache.spark.streaming;
 
+import java.io.*;
+import java.lang.Iterable;
+import java.nio.charset.Charset;
+import java.util.*;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 import scala.Tuple2;
 
 import org.junit.Assert;
+import static org.junit.Assert.*;
 import org.junit.Test;
-import java.io.*;
-import java.util.*;
-import java.lang.Iterable;
 
 import com.google.common.base.Optional;
 import com.google.common.collect.Lists;
@@ -57,7 +64,7 @@ public void equalIterable(Iterable<?> a, Iterable<?> b) {
 
   @Test
   public void testInitialization() {
-    Assert.assertNotNull(ssc.sc());
+    Assert.assertNotNull(ssc.sparkContext());
   }
 
   @SuppressWarnings("unchecked")
@@ -299,7 +306,17 @@ public void testReduce() {
 
   @SuppressWarnings("unchecked")
   @Test
-  public void testReduceByWindow() {
+  public void testReduceByWindowWithInverse() {
+    testReduceByWindow(true);
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testReduceByWindowWithoutInverse() {
+    testReduceByWindow(false);
+  }
+
+  private void testReduceByWindow(boolean withInverse) {
     List<List<Integer>> inputData = Arrays.asList(
         Arrays.asList(1,2,3),
         Arrays.asList(4,5,6),
@@ -312,8 +329,14 @@ public void testReduceByWindow() {
         Arrays.asList(24));
 
     JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
-    JavaDStream<Integer> reducedWindowed = stream.reduceByWindow(new IntegerSum(),
+    JavaDStream<Integer> reducedWindowed = null;
+    if (withInverse) {
+      reducedWindowed = stream.reduceByWindow(new IntegerSum(),
         new IntegerDifference(), new Duration(2000), new Duration(1000));
+    } else {
+      reducedWindowed = stream.reduceByWindow(new IntegerSum(),
+        new Duration(2000), new Duration(1000));
+    }
     JavaTestUtils.attachTestOutputStream(reducedWindowed);
     List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 4, 4);
 
@@ -662,7 +685,7 @@ public void testStreamingContextTransform(){
       listOfDStreams1,
       new Function2<List<JavaRDD<?>>, Time, JavaRDD<Long>>() {
         public JavaRDD<Long> call(List<JavaRDD<?>> listOfRDDs, Time time) {
-          assert(listOfRDDs.size() == 2);
+          Assert.assertEquals(2, listOfRDDs.size());
           return null;
         }
       }
@@ -675,7 +698,7 @@ public JavaRDD<Long> call(List<JavaRDD<?>> listOfRDDs, Time time) {
       listOfDStreams2,
       new Function2<List<JavaRDD<?>>, Time, JavaPairRDD<Integer, Tuple2<Integer, String>>>() {
         public JavaPairRDD<Integer, Tuple2<Integer, String>> call(List<JavaRDD<?>> listOfRDDs, Time time) {
-          assert(listOfRDDs.size() == 3);
+          Assert.assertEquals(3, listOfRDDs.size());
           JavaRDD<Integer> rdd1 = (JavaRDD<Integer>)listOfRDDs.get(0);
           JavaRDD<Integer> rdd2 = (JavaRDD<Integer>)listOfRDDs.get(1);
           JavaRDD<Tuple2<Integer, String>> rdd3 = (JavaRDD<Tuple2<Integer, String>>)listOfRDDs.get(2);
@@ -806,15 +829,17 @@ public void testUnion() {
    * Performs an order-invariant comparison of lists representing two RDD streams. This allows
    * us to account for ordering variation within individual RDD's which occurs during windowing.
    */
-  public static <T extends Comparable<T>> void assertOrderInvariantEquals(
+  public static <T> void assertOrderInvariantEquals(
       List<List<T>> expected, List<List<T>> actual) {
+    List<Set<T>> expectedSets = new ArrayList<Set<T>>();
     for (List<T> list: expected) {
-      Collections.sort(list);
+      expectedSets.add(Collections.unmodifiableSet(new HashSet<T>(list)));
     }
+    List<Set<T>> actualSets = new ArrayList<Set<T>>();
     for (List<T> list: actual) {
-      Collections.sort(list);
+      actualSets.add(Collections.unmodifiableSet(new HashSet<T>(list)));
     }
-    Assert.assertEquals(expected, actual);
+    Assert.assertEquals(expectedSets, actualSets);
   }
 
 
@@ -967,7 +992,7 @@ public Integer call(Tuple2<String, Integer> in) throws Exception {
             });
 
     JavaTestUtils.attachTestOutputStream(reversed);
-    List<List<Tuple2<Integer, String>>> result = JavaTestUtils.runStreams(ssc, 2, 2);
+    List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 2, 2);
 
     Assert.assertEquals(expected, result);
   }
@@ -1010,7 +1035,7 @@ public Iterable<Tuple2<Integer, String>> call(Tuple2<String, Integer> in) throws
           }
         });
     JavaTestUtils.attachTestOutputStream(flatMapped);
-    List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 2, 2);
+    List<List<Tuple2<Integer, String>>> result = JavaTestUtils.runStreams(ssc, 2, 2);
 
     Assert.assertEquals(expected, result);
   }
@@ -1161,9 +1186,9 @@ public void testGroupByKeyAndWindow() {
     JavaTestUtils.attachTestOutputStream(groupWindowed);
     List<List<Tuple2<String, List<Integer>>>> result = JavaTestUtils.runStreams(ssc, 3, 3);
 
-    assert(result.size() == expected.size());
+    Assert.assertEquals(expected.size(), result.size());
     for (int i = 0; i < result.size(); i++) {
-      assert(convert(result.get(i)).equals(convert(expected.get(i))));
+      Assert.assertEquals(convert(expected.get(i)), convert(result.get(i)));
     }
   }
 
@@ -1239,6 +1264,49 @@ public Optional<Integer> call(List<Integer> values, Optional<Integer> state) {
     Assert.assertEquals(expected, result);
   }
 
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testUpdateStateByKeyWithInitial() {
+    List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream;
+
+    List<Tuple2<String, Integer>> initial = Arrays.asList (
+            new Tuple2<String, Integer> ("california", 1),
+            new Tuple2<String, Integer> ("new york", 2));
+
+    JavaRDD<Tuple2<String, Integer>> tmpRDD = ssc.sparkContext().parallelize(initial);
+    JavaPairRDD<String, Integer> initialRDD = JavaPairRDD.fromJavaRDD (tmpRDD);
+
+    List<List<Tuple2<String, Integer>>> expected = Arrays.asList(
+        Arrays.asList(new Tuple2<String, Integer>("california", 5),
+            new Tuple2<String, Integer>("new york", 7)),
+        Arrays.asList(new Tuple2<String, Integer>("california", 15),
+            new Tuple2<String, Integer>("new york", 11)),
+        Arrays.asList(new Tuple2<String, Integer>("california", 15),
+            new Tuple2<String, Integer>("new york", 11)));
+
+    JavaDStream<Tuple2<String, Integer>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaPairDStream<String, Integer> pairStream = JavaPairDStream.fromJavaDStream(stream);
+
+    JavaPairDStream<String, Integer> updated = pairStream.updateStateByKey(
+        new Function2<List<Integer>, Optional<Integer>, Optional<Integer>>() {
+        @Override
+        public Optional<Integer> call(List<Integer> values, Optional<Integer> state) {
+          int out = 0;
+          if (state.isPresent()) {
+            out = out + state.get();
+          }
+          for (Integer v: values) {
+            out = out + v;
+          }
+          return Optional.of(out);
+        }
+        }, new HashPartitioner(1), initialRDD);
+    JavaTestUtils.attachTestOutputStream(updated);
+    List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 3, 3);
+
+    assertOrderInvariantEquals(expected, result);
+  }
+
   @SuppressWarnings("unchecked")
   @Test
   public void testReduceByKeyAndWindowWithInverse() {
@@ -1338,7 +1406,7 @@ public JavaPairRDD<Integer, Integer> call(JavaPairRDD<Integer, Integer> in) thro
         });
 
     JavaTestUtils.attachTestOutputStream(sorted);
-    List<List<Tuple2<String, String>>> result = JavaTestUtils.runStreams(ssc, 2, 2);
+    List<List<Tuple2<Integer, Integer>>> result = JavaTestUtils.runStreams(ssc, 2, 2);
 
     Assert.assertEquals(expected, result);
   }
@@ -1698,13 +1766,66 @@ public Iterable<String> call(InputStream in) throws IOException {
       StorageLevel.MEMORY_ONLY());
   }
 
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testTextFileStream() throws IOException {
+    File testDir = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark");
+    List<List<String>> expected = fileTestPrepare(testDir);
+
+    JavaDStream<String> input = ssc.textFileStream(testDir.toString());
+    JavaTestUtils.attachTestOutputStream(input);
+    List<List<String>> result = JavaTestUtils.runStreams(ssc, 1, 1);
+
+    assertOrderInvariantEquals(expected, result);
+  }
+
+  @SuppressWarnings("unchecked")
   @Test
-  public void testTextFileStream() {
-    JavaDStream<String> test = ssc.textFileStream("/tmp/foo");
+  public void testFileStream() throws IOException {
+    File testDir = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark");
+    List<List<String>> expected = fileTestPrepare(testDir);
+
+    JavaPairInputDStream<LongWritable, Text> inputStream = ssc.fileStream(
+      testDir.toString(),
+      LongWritable.class,
+      Text.class,
+      TextInputFormat.class,
+      new Function<Path, Boolean>() {
+        @Override
+        public Boolean call(Path v1) throws Exception {
+          return Boolean.TRUE;
+        }
+      },
+      true);
+
+    JavaDStream<String> test = inputStream.map(
+      new Function<Tuple2<LongWritable, Text>, String>() {
+        @Override
+        public String call(Tuple2<LongWritable, Text> v1) throws Exception {
+          return v1._2().toString();
+        }
+    });
+
+    JavaTestUtils.attachTestOutputStream(test);
+    List<List<String>> result = JavaTestUtils.runStreams(ssc, 1, 1);
+
+    assertOrderInvariantEquals(expected, result);
   }
 
   @Test
   public void testRawSocketStream() {
     JavaReceiverInputDStream<String> test = ssc.rawSocketStream("localhost", 12345);
   }
+
+  private List<List<String>> fileTestPrepare(File testDir) throws IOException {
+    File existingFile = new File(testDir, "0");
+    Files.write("0\n", existingFile, Charset.forName("UTF-8"));
+    assertTrue(existingFile.setLastModified(1000) && existingFile.lastModified() == 1000);
+
+    List<List<String>> expected = Arrays.asList(
+      Arrays.asList("0")
+    );
+
+    return expected;
+  }
 }
diff --git a/streaming/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java b/streaming/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
index 6e1f01900071b..1e24da7f5f60c 100644
--- a/streaming/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
+++ b/streaming/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
@@ -17,6 +17,7 @@
 
 package org.apache.spark.streaming;
 
+import org.apache.spark.SparkConf;
 import org.apache.spark.streaming.api.java.JavaStreamingContext;
 import org.junit.After;
 import org.junit.Before;
@@ -27,8 +28,11 @@ public abstract class LocalJavaStreamingContext {
 
     @Before
     public void setUp() {
-        System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock");
-        ssc = new JavaStreamingContext("local[2]", "test", new Duration(1000));
+        SparkConf conf = new SparkConf()
+            .setMaster("local[2]")
+            .setAppName("test")
+            .set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock");
+        ssc = new JavaStreamingContext(conf, new Duration(1000));
         ssc.checkpoint("checkpoint");
     }
 
diff --git a/streaming/src/test/resources/log4j.properties b/streaming/src/test/resources/log4j.properties
index 4411d6e20c52a..9697237bfa1a3 100644
--- a/streaming/src/test/resources/log4j.properties
+++ b/streaming/src/test/resources/log4j.properties
@@ -15,11 +15,10 @@
 # limitations under the License.
 #
 
-# Set everything to be logged to the file streaming/target/unit-tests.log
+# Set everything to be logged to the file target/unit-tests.log
 log4j.rootCategory=INFO, file
-# log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.append=false
+log4j.appender.file.append=true
 log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
index 6c8bb50145367..e8f4a7779ec21 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
@@ -17,18 +17,19 @@
 
 package org.apache.spark.streaming
 
-import org.apache.spark.streaming.StreamingContext._
-
-import org.apache.spark.rdd.{BlockRDD, RDD}
-import org.apache.spark.SparkContext._
+import scala.collection.mutable
+import scala.collection.mutable.{ArrayBuffer, SynchronizedBuffer}
+import scala.language.existentials
+import scala.reflect.ClassTag
 
 import util.ManualClock
-import org.apache.spark.{SparkException, SparkConf}
-import org.apache.spark.streaming.dstream.{WindowedDStream, DStream}
-import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
-import scala.reflect.ClassTag
+
+import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.{BlockRDD, RDD}
 import org.apache.spark.storage.StorageLevel
-import scala.collection.mutable
+import org.apache.spark.streaming.dstream.{DStream, WindowedDStream}
+import org.apache.spark.HashPartitioner
 
 class BasicOperationsSuite extends TestSuiteBase {
   test("map") {
@@ -349,6 +350,79 @@ class BasicOperationsSuite extends TestSuiteBase {
     testOperation(inputData, updateStateOperation, outputData, true)
   }
 
+  test("updateStateByKey - simple with initial value RDD") {
+    val initial = Seq(("a", 1), ("c", 2))
+
+    val inputData =
+      Seq(
+        Seq("a"),
+        Seq("a", "b"),
+        Seq("a", "b", "c"),
+        Seq("a", "b"),
+        Seq("a"),
+        Seq()
+      )
+
+    val outputData =
+      Seq(
+        Seq(("a", 2), ("c", 2)),
+        Seq(("a", 3), ("b", 1), ("c", 2)),
+        Seq(("a", 4), ("b", 2), ("c", 3)),
+        Seq(("a", 5), ("b", 3), ("c", 3)),
+        Seq(("a", 6), ("b", 3), ("c", 3)),
+        Seq(("a", 6), ("b", 3), ("c", 3))
+      )
+
+    val updateStateOperation = (s: DStream[String]) => {
+      val initialRDD = s.context.sparkContext.makeRDD(initial)
+      val updateFunc = (values: Seq[Int], state: Option[Int]) => {
+        Some(values.sum + state.getOrElse(0))
+      }
+      s.map(x => (x, 1)).updateStateByKey[Int](updateFunc,
+        new HashPartitioner (numInputPartitions), initialRDD)
+    }
+
+    testOperation(inputData, updateStateOperation, outputData, true)
+  }
+
+  test("updateStateByKey - with initial value RDD") {
+    val initial = Seq(("a", 1), ("c", 2))
+
+    val inputData =
+      Seq(
+        Seq("a"),
+        Seq("a", "b"),
+        Seq("a", "b", "c"),
+        Seq("a", "b"),
+        Seq("a"),
+        Seq()
+      )
+
+    val outputData =
+      Seq(
+        Seq(("a", 2), ("c", 2)),
+        Seq(("a", 3), ("b", 1), ("c", 2)),
+        Seq(("a", 4), ("b", 2), ("c", 3)),
+        Seq(("a", 5), ("b", 3), ("c", 3)),
+        Seq(("a", 6), ("b", 3), ("c", 3)),
+        Seq(("a", 6), ("b", 3), ("c", 3))
+      )
+
+    val updateStateOperation = (s: DStream[String]) => {
+      val initialRDD = s.context.sparkContext.makeRDD(initial)
+      val updateFunc = (values: Seq[Int], state: Option[Int]) => {
+        Some(values.sum + state.getOrElse(0))
+      }
+      val newUpdateFunc = (iterator: Iterator[(String, Seq[Int], Option[Int])]) => {
+        iterator.flatMap(t => updateFunc(t._2, t._3).map(s => (t._1, s)))
+      }
+      s.map(x => (x, 1)).updateStateByKey[Int](newUpdateFunc,
+        new HashPartitioner (numInputPartitions), true, initialRDD)
+    }
+
+    testOperation(inputData, updateStateOperation, outputData, true)
+  }
+
   test("updateStateByKey - object lifecycle") {
     val inputData =
       Seq(
@@ -395,32 +469,31 @@ class BasicOperationsSuite extends TestSuiteBase {
   }
 
   test("slice") {
-    val ssc = new StreamingContext(conf, Seconds(1))
-    val input = Seq(Seq(1), Seq(2), Seq(3), Seq(4))
-    val stream = new TestInputStream[Int](ssc, input, 2)
-    stream.foreachRDD(_ => {})  // Dummy output stream
-    ssc.start()
-    Thread.sleep(2000)
-    def getInputFromSlice(fromMillis: Long, toMillis: Long) = {
-      stream.slice(new Time(fromMillis), new Time(toMillis)).flatMap(_.collect()).toSet
-    }
+    withStreamingContext(new StreamingContext(conf, Seconds(1))) { ssc =>
+      val input = Seq(Seq(1), Seq(2), Seq(3), Seq(4))
+      val stream = new TestInputStream[Int](ssc, input, 2)
+      stream.foreachRDD(_ => {})  // Dummy output stream
+      ssc.start()
+      Thread.sleep(2000)
+      def getInputFromSlice(fromMillis: Long, toMillis: Long) = {
+        stream.slice(new Time(fromMillis), new Time(toMillis)).flatMap(_.collect()).toSet
+      }
 
-    assert(getInputFromSlice(0, 1000) == Set(1))
-    assert(getInputFromSlice(0, 2000) == Set(1, 2))
-    assert(getInputFromSlice(1000, 2000) == Set(1, 2))
-    assert(getInputFromSlice(2000, 4000) == Set(2, 3, 4))
-    ssc.stop()
-    Thread.sleep(1000)
+      assert(getInputFromSlice(0, 1000) == Set(1))
+      assert(getInputFromSlice(0, 2000) == Set(1, 2))
+      assert(getInputFromSlice(1000, 2000) == Set(1, 2))
+      assert(getInputFromSlice(2000, 4000) == Set(2, 3, 4))
+    }
   }
-
   test("slice - has not been initialized") {
-    val ssc = new StreamingContext(conf, Seconds(1))
-    val input = Seq(Seq(1), Seq(2), Seq(3), Seq(4))
-    val stream = new TestInputStream[Int](ssc, input, 2)
-    val thrown = intercept[SparkException] {
-      stream.slice(new Time(0), new Time(1000))
+    withStreamingContext(new StreamingContext(conf, Seconds(1))) { ssc =>
+      val input = Seq(Seq(1), Seq(2), Seq(3), Seq(4))
+      val stream = new TestInputStream[Int](ssc, input, 2)
+      val thrown = intercept[SparkException] {
+        stream.slice(new Time(0), new Time(1000))
+      }
+      assert(thrown.getMessage.contains("has not been initialized"))
     }
-    assert(thrown.getMessage.contains("has not been initialized"))
   }
 
   val cleanupTestInput = (0 until 10).map(x => Seq(x, x + 1)).toSeq
@@ -480,73 +553,72 @@ class BasicOperationsSuite extends TestSuiteBase {
   test("rdd cleanup - input blocks and persisted RDDs") {
     // Actually receive data over through receiver to create BlockRDDs
 
-    // Start the server
-    val testServer = new TestServer()
-    testServer.start()
-
-    // Set up the streaming context and input streams
-    val ssc = new StreamingContext(conf, batchDuration)
-    val networkStream = ssc.socketTextStream("localhost", testServer.port, StorageLevel.MEMORY_AND_DISK)
-    val mappedStream = networkStream.map(_ + ".").persist()
-    val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]]
-    val outputStream = new TestOutputStream(mappedStream, outputBuffer)
-
-    outputStream.register()
-    ssc.start()
-
-    // Feed data to the server to send to the network receiver
-    val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-    val input = Seq(1, 2, 3, 4, 5, 6)
+    withTestServer(new TestServer()) { testServer =>
+      withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc =>
+        testServer.start()
+        // Set up the streaming context and input streams
+        val networkStream =
+          ssc.socketTextStream("localhost", testServer.port, StorageLevel.MEMORY_AND_DISK)
+        val mappedStream = networkStream.map(_ + ".").persist()
+        val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]]
+        val outputStream = new TestOutputStream(mappedStream, outputBuffer)
+
+        outputStream.register()
+        ssc.start()
+
+        // Feed data to the server to send to the network receiver
+        val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+        val input = Seq(1, 2, 3, 4, 5, 6)
+
+        val blockRdds = new mutable.HashMap[Time, BlockRDD[_]]
+        val persistentRddIds = new mutable.HashMap[Time, Int]
+
+        def collectRddInfo() { // get all RDD info required for verification
+          networkStream.generatedRDDs.foreach { case (time, rdd) =>
+            blockRdds(time) = rdd.asInstanceOf[BlockRDD[_]]
+          }
+          mappedStream.generatedRDDs.foreach { case (time, rdd) =>
+            persistentRddIds(time) = rdd.id
+          }
+        }
 
-    val blockRdds = new mutable.HashMap[Time, BlockRDD[_]]
-    val persistentRddIds = new mutable.HashMap[Time, Int]
+        Thread.sleep(200)
+        for (i <- 0 until input.size) {
+          testServer.send(input(i).toString + "\n")
+          Thread.sleep(200)
+          clock.addToTime(batchDuration.milliseconds)
+          collectRddInfo()
+        }
 
-    def collectRddInfo() { // get all RDD info required for verification
-      networkStream.generatedRDDs.foreach { case (time, rdd) =>
-        blockRdds(time) = rdd.asInstanceOf[BlockRDD[_]]
-      }
-      mappedStream.generatedRDDs.foreach { case (time, rdd) =>
-        persistentRddIds(time) = rdd.id
+        Thread.sleep(200)
+        collectRddInfo()
+        logInfo("Stopping server")
+        testServer.stop()
+
+        // verify data has been received
+        assert(outputBuffer.size > 0)
+        assert(blockRdds.size > 0)
+        assert(persistentRddIds.size > 0)
+
+        import Time._
+
+        val latestPersistedRddId = persistentRddIds(persistentRddIds.keySet.max)
+        val earliestPersistedRddId = persistentRddIds(persistentRddIds.keySet.min)
+        val latestBlockRdd = blockRdds(blockRdds.keySet.max)
+        val earliestBlockRdd = blockRdds(blockRdds.keySet.min)
+        // verify that the latest mapped RDD is persisted but the earliest one has been unpersisted
+        assert(ssc.sparkContext.persistentRdds.contains(latestPersistedRddId))
+        assert(!ssc.sparkContext.persistentRdds.contains(earliestPersistedRddId))
+
+        // verify that the latest input blocks are present but the earliest blocks have been removed
+        assert(latestBlockRdd.isValid)
+        assert(latestBlockRdd.collect != null)
+        assert(!earliestBlockRdd.isValid)
+        earliestBlockRdd.blockIds.foreach { blockId =>
+          assert(!ssc.sparkContext.env.blockManager.master.contains(blockId))
+        }
       }
     }
-
-    Thread.sleep(200)
-    for (i <- 0 until input.size) {
-      testServer.send(input(i).toString + "\n")
-      Thread.sleep(200)
-      clock.addToTime(batchDuration.milliseconds)
-      collectRddInfo()
-    }
-
-    Thread.sleep(200)
-    collectRddInfo()
-    logInfo("Stopping server")
-    testServer.stop()
-    logInfo("Stopping context")
-
-    // verify data has been received
-    assert(outputBuffer.size > 0)
-    assert(blockRdds.size > 0)
-    assert(persistentRddIds.size > 0)
-
-    import Time._
-
-    val latestPersistedRddId = persistentRddIds(persistentRddIds.keySet.max)
-    val earliestPersistedRddId = persistentRddIds(persistentRddIds.keySet.min)
-    val latestBlockRdd = blockRdds(blockRdds.keySet.max)
-    val earliestBlockRdd = blockRdds(blockRdds.keySet.min)
-    // verify that the latest mapped RDD is persisted but the earliest one has been unpersisted
-    assert(ssc.sparkContext.persistentRdds.contains(latestPersistedRddId))
-    assert(!ssc.sparkContext.persistentRdds.contains(earliestPersistedRddId))
-
-    // verify that the latest input blocks are present but the earliest blocks have been removed
-    assert(latestBlockRdd.isValid)
-    assert(latestBlockRdd.collect != null)
-    assert(!earliestBlockRdd.isValid)
-    earliestBlockRdd.blockIds.foreach { blockId =>
-      assert(!ssc.sparkContext.env.blockManager.master.contains(blockId))
-    }
-    ssc.stop()
   }
 
   /** Test cleanup of RDDs in DStream metadata */
@@ -560,13 +632,15 @@ class BasicOperationsSuite extends TestSuiteBase {
     // Setup the stream computation
     assert(batchDuration === Seconds(1),
       "Batch duration has changed from 1 second, check cleanup tests")
-    val ssc = setupStreams(cleanupTestInput, operation)
-    val operatedStream = ssc.graph.getOutputStreams().head.dependencies.head.asInstanceOf[DStream[T]]
-    if (rememberDuration != null) ssc.remember(rememberDuration)
-    val output = runStreams[(Int, Int)](ssc, cleanupTestInput.size, numExpectedOutput)
-    val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-    assert(clock.time === Seconds(10).milliseconds)
-    assert(output.size === numExpectedOutput)
-    operatedStream
+    withStreamingContext(setupStreams(cleanupTestInput, operation)) { ssc =>
+      val operatedStream =
+        ssc.graph.getOutputStreams().head.dependencies.head.asInstanceOf[DStream[T]]
+      if (rememberDuration != null) ssc.remember(rememberDuration)
+      val output = runStreams[(Int, Int)](ssc, cleanupTestInput.size, numExpectedOutput)
+      val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+      assert(clock.currentTime() === Seconds(10).milliseconds)
+      assert(output.size === numExpectedOutput)
+      operatedStream
+    }
   }
 }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
index e5592e52b0d2d..8f8bc61437ba5 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@@ -18,14 +18,19 @@
 package org.apache.spark.streaming
 
 import java.io.File
-import java.nio.charset.Charset
 
-import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
 import scala.reflect.ClassTag
+
+import com.google.common.base.Charsets
 import com.google.common.io.Files
-import org.apache.hadoop.fs.{Path, FileSystem}
 import org.apache.hadoop.conf.Configuration
-import org.apache.spark.streaming.StreamingContext._
+import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.io.{IntWritable, Text}
+import org.apache.hadoop.mapred.TextOutputFormat
+import org.apache.hadoop.mapreduce.lib.output.{TextOutputFormat => NewTextOutputFormat}
+import org.scalatest.concurrent.Eventually._
+
 import org.apache.spark.streaming.dstream.{DStream, FileInputDStream}
 import org.apache.spark.streaming.util.ManualClock
 import org.apache.spark.util.Utils
@@ -41,8 +46,6 @@ class CheckpointSuite extends TestSuiteBase {
 
   override def batchDuration = Milliseconds(500)
 
-  override def actuallyWait = true // to allow checkpoints to be written
-
   override def beforeFunction() {
     super.beforeFunction()
     Utils.deleteRecursively(new File(checkpointDir))
@@ -139,7 +142,6 @@ class CheckpointSuite extends TestSuiteBase {
     ssc.start()
     advanceTimeWithRealDelay(ssc, 4)
     ssc.stop()
-    System.clearProperty("spark.streaming.manualClock.jump")
     ssc = null
   }
 
@@ -205,6 +207,90 @@ class CheckpointSuite extends TestSuiteBase {
     testCheckpointedOperation(input, operation, output, 7)
   }
 
+  test("recovery with saveAsHadoopFiles operation") {
+    val tempDir = Files.createTempDir()
+    try {
+      testCheckpointedOperation(
+        Seq(Seq("a", "a", "b"), Seq("", ""), Seq(), Seq("a", "a", "b"), Seq("", ""), Seq()),
+        (s: DStream[String]) => {
+          val output = s.map(x => (x, 1)).reduceByKey(_ + _)
+          output.saveAsHadoopFiles(
+            tempDir.toURI.toString,
+            "result",
+            classOf[Text],
+            classOf[IntWritable],
+            classOf[TextOutputFormat[Text, IntWritable]])
+          output
+        },
+        Seq(Seq(("a", 2), ("b", 1)), Seq(("", 2)), Seq(), Seq(("a", 2), ("b", 1)), Seq(("", 2)), Seq()),
+        3
+      )
+    } finally {
+      Utils.deleteRecursively(tempDir)
+    }
+  }
+
+  test("recovery with saveAsNewAPIHadoopFiles operation") {
+    val tempDir = Files.createTempDir()
+    try {
+      testCheckpointedOperation(
+        Seq(Seq("a", "a", "b"), Seq("", ""), Seq(), Seq("a", "a", "b"), Seq("", ""), Seq()),
+        (s: DStream[String]) => {
+          val output = s.map(x => (x, 1)).reduceByKey(_ + _)
+          output.saveAsNewAPIHadoopFiles(
+            tempDir.toURI.toString,
+            "result",
+            classOf[Text],
+            classOf[IntWritable],
+            classOf[NewTextOutputFormat[Text, IntWritable]])
+          output
+        },
+        Seq(Seq(("a", 2), ("b", 1)), Seq(("", 2)), Seq(), Seq(("a", 2), ("b", 1)), Seq(("", 2)), Seq()),
+        3
+      )
+    } finally {
+      Utils.deleteRecursively(tempDir)
+    }
+  }
+
+  test("recovery with saveAsHadoopFile inside transform operation") {
+    // Regression test for SPARK-4835.
+    //
+    // In that issue, the problem was that `saveAsHadoopFile(s)` would fail when the last batch
+    // was restarted from a checkpoint since the output directory would already exist.  However,
+    // the other saveAsHadoopFile* tests couldn't catch this because they only tested whether the
+    // output matched correctly and not whether the post-restart batch had successfully finished
+    // without throwing any errors.  The following test reproduces the same bug with a test that
+    // actually fails because the error in saveAsHadoopFile causes transform() to fail, which
+    // prevents the expected output from being written to the output stream.
+    //
+    // This is not actually a valid use of transform, but it's being used here so that we can test
+    // the fix for SPARK-4835 independently of additional test cleanup.
+    //
+    // After SPARK-5079 is addressed, should be able to remove this test since a strengthened
+    // version of the other saveAsHadoopFile* tests would prevent regressions for this issue.
+    val tempDir = Files.createTempDir()
+    try {
+      testCheckpointedOperation(
+        Seq(Seq("a", "a", "b"), Seq("", ""), Seq(), Seq("a", "a", "b"), Seq("", ""), Seq()),
+        (s: DStream[String]) => {
+          s.transform { (rdd, time) =>
+            val output = rdd.map(x => (x, 1)).reduceByKey(_ + _)
+            output.saveAsHadoopFile(
+              new File(tempDir, "result-" + time.milliseconds).getAbsolutePath,
+              classOf[Text],
+              classOf[IntWritable],
+              classOf[TextOutputFormat[Text, IntWritable]])
+            output
+          }
+        },
+        Seq(Seq(("a", 2), ("b", 1)), Seq(("", 2)), Seq(), Seq(("a", 2), ("b", 1)), Seq(("", 2)), Seq()),
+        3
+      )
+    } finally {
+      Utils.deleteRecursively(tempDir)
+    }
+  }
 
   // This tests whether the StateDStream's RDD checkpoints works correctly such
   // that the system can recover from a master failure. This assumes as reliable,
@@ -224,109 +310,161 @@ class CheckpointSuite extends TestSuiteBase {
     testCheckpointedOperation(input, operation, output, 7)
   }
 
-
   // This tests whether file input stream remembers what files were seen before
   // the master failure and uses them again to process a large window operation.
   // It also tests whether batches, whose processing was incomplete due to the
   // failure, are re-processed or not.
   test("recovery with file input stream") {
     // Set up the streaming context and input streams
+    val batchDuration = Seconds(2)  // Due to 1-second resolution of setLastModified() on some OS's.
     val testDir = Utils.createTempDir()
-    var ssc = new StreamingContext(master, framework, Seconds(1))
-    ssc.checkpoint(checkpointDir)
-    val fileStream = ssc.textFileStream(testDir.toString)
-    // Making value 3 take large time to process, to ensure that the master
-    // shuts down in the middle of processing the 3rd batch
-    val mappedStream = fileStream.map(s => {
-      val i = s.toInt
-      if (i == 3) Thread.sleep(2000)
-      i
-    })
-
-    // Reducing over a large window to ensure that recovery from master failure
-    // requires reprocessing of all the files seen before the failure
-    val reducedStream = mappedStream.reduceByWindow(_ + _, Seconds(30), Seconds(1))
-    val outputBuffer = new ArrayBuffer[Seq[Int]]
-    var outputStream = new TestOutputStream(reducedStream, outputBuffer)
-    outputStream.register()
-    ssc.start()
-
-    // Create files and advance manual clock to process them
-    // var clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-    Thread.sleep(1000)
-    for (i <- Seq(1, 2, 3)) {
-      Files.write(i + "\n", new File(testDir, i.toString), Charset.forName("UTF-8"))
-      // wait to make sure that the file is written such that it gets shown in the file listings
-      Thread.sleep(1000)
+    val outputBuffer = new ArrayBuffer[Seq[Int]] with SynchronizedBuffer[Seq[Int]]
+
+    /**
+     * Writes a file named `i` (which contains the number `i`) to the test directory and sets its
+     * modification time to `clock`'s current time.
+     */
+    def writeFile(i: Int, clock: ManualClock): Unit = {
+      val file = new File(testDir, i.toString)
+      Files.write(i + "\n", file, Charsets.UTF_8)
+      assert(file.setLastModified(clock.currentTime()))
+      // Check that the file's modification date is actually the value we wrote, since rounding or
+      // truncation will break the test:
+      assert(file.lastModified() === clock.currentTime())
     }
-    logInfo("Output = " + outputStream.output.mkString(","))
-    assert(outputStream.output.size > 0, "No files processed before restart")
-    ssc.stop()
 
-    // Verify whether files created have been recorded correctly or not
-    var fileInputDStream = ssc.graph.getInputStreams().head.asInstanceOf[FileInputDStream[_, _, _]]
-    def recordedFiles = fileInputDStream.files.values.flatMap(x => x)
-    assert(!recordedFiles.filter(_.endsWith("1")).isEmpty)
-    assert(!recordedFiles.filter(_.endsWith("2")).isEmpty)
-    assert(!recordedFiles.filter(_.endsWith("3")).isEmpty)
-
-    // Create files while the master is down
-    for (i <- Seq(4, 5, 6)) {
-      Files.write(i + "\n", new File(testDir, i.toString), Charset.forName("UTF-8"))
-      Thread.sleep(1000)
+    /**
+     * Returns ids that identify which files which have been recorded by the file input stream.
+     */
+    def recordedFiles(ssc: StreamingContext): Seq[Int] = {
+      val fileInputDStream =
+        ssc.graph.getInputStreams().head.asInstanceOf[FileInputDStream[_, _, _]]
+      val filenames = fileInputDStream.batchTimeToSelectedFiles.values.flatten
+      filenames.map(_.split(File.separator).last.toInt).toSeq.sorted
     }
 
-    // Recover context from checkpoint file and verify whether the files that were
-    // recorded before failure were saved and successfully recovered
-    logInfo("*********** RESTARTING ************")
-    ssc = new StreamingContext(checkpointDir)
-    fileInputDStream = ssc.graph.getInputStreams().head.asInstanceOf[FileInputDStream[_, _, _]]
-    assert(!recordedFiles.filter(_.endsWith("1")).isEmpty)
-    assert(!recordedFiles.filter(_.endsWith("2")).isEmpty)
-    assert(!recordedFiles.filter(_.endsWith("3")).isEmpty)
+    try {
+      // This is a var because it's re-assigned when we restart from a checkpoint
+      var clock: ManualClock = null
+      withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc =>
+        ssc.checkpoint(checkpointDir)
+        clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+        val batchCounter = new BatchCounter(ssc)
+        val fileStream = ssc.textFileStream(testDir.toString)
+        // Make value 3 take a large time to process, to ensure that the driver
+        // shuts down in the middle of processing the 3rd batch
+        CheckpointSuite.batchThreeShouldBlockIndefinitely = true
+        val mappedStream = fileStream.map(s => {
+          val i = s.toInt
+          if (i == 3) {
+            while (CheckpointSuite.batchThreeShouldBlockIndefinitely) {
+              Thread.sleep(Long.MaxValue)
+            }
+          }
+          i
+        })
+
+        // Reducing over a large window to ensure that recovery from driver failure
+        // requires reprocessing of all the files seen before the failure
+        val reducedStream = mappedStream.reduceByWindow(_ + _, batchDuration * 30, batchDuration)
+        val outputStream = new TestOutputStream(reducedStream, outputBuffer)
+        outputStream.register()
+        ssc.start()
+
+        // Advance half a batch so that the first file is created after the StreamingContext starts
+        clock.addToTime(batchDuration.milliseconds / 2)
+        // Create files and advance manual clock to process them
+        for (i <- Seq(1, 2, 3)) {
+          writeFile(i, clock)
+          // Advance the clock after creating the file to avoid a race when
+          // setting its modification time
+          clock.addToTime(batchDuration.milliseconds)
+          if (i != 3) {
+            // Since we want to shut down while the 3rd batch is processing
+            eventually(eventuallyTimeout) {
+              assert(batchCounter.getNumCompletedBatches === i)
+            }
+          }
+        }
+        clock.addToTime(batchDuration.milliseconds)
+        eventually(eventuallyTimeout) {
+          // Wait until all files have been recorded and all batches have started
+          assert(recordedFiles(ssc) === Seq(1, 2, 3) && batchCounter.getNumStartedBatches === 3)
+        }
+        // Wait for a checkpoint to be written
+        val fs = new Path(checkpointDir).getFileSystem(ssc.sc.hadoopConfiguration)
+        eventually(eventuallyTimeout) {
+          assert(Checkpoint.getCheckpointFiles(checkpointDir, fs).size === 6)
+        }
+        ssc.stop()
+        // Check that we shut down while the third batch was being processed
+        assert(batchCounter.getNumCompletedBatches === 2)
+        assert(outputStream.output.flatten === Seq(1, 3))
+      }
 
-    // Restart stream computation
-    ssc.start()
-    for (i <- Seq(7, 8, 9)) {
-      Files.write(i + "\n", new File(testDir, i.toString), Charset.forName("UTF-8"))
-      Thread.sleep(1000)
-    }
-    Thread.sleep(1000)
-    logInfo("Output = " + outputStream.output.mkString("[", ", ", "]"))
-    assert(outputStream.output.size > 0, "No files processed after restart")
-    ssc.stop()
+      // The original StreamingContext has now been stopped.
+      CheckpointSuite.batchThreeShouldBlockIndefinitely = false
 
-    // Verify whether files created while the driver was down have been recorded or not
-    assert(!recordedFiles.filter(_.endsWith("4")).isEmpty)
-    assert(!recordedFiles.filter(_.endsWith("5")).isEmpty)
-    assert(!recordedFiles.filter(_.endsWith("6")).isEmpty)
-
-    // Verify whether new files created after recover have been recorded or not
-    assert(!recordedFiles.filter(_.endsWith("7")).isEmpty)
-    assert(!recordedFiles.filter(_.endsWith("8")).isEmpty)
-    assert(!recordedFiles.filter(_.endsWith("9")).isEmpty)
-
-    // Append the new output to the old buffer
-    outputStream = ssc.graph.getOutputStreams().head.asInstanceOf[TestOutputStream[Int]]
-    outputBuffer ++= outputStream.output
-
-    val expectedOutput = Seq(1, 3, 6, 10, 15, 21, 28, 36, 45)
-    logInfo("--------------------------------")
-    logInfo("output, size = " + outputBuffer.size)
-    outputBuffer.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-    logInfo("expected output, size = " + expectedOutput.size)
-    expectedOutput.foreach(x => logInfo("[" + x + "]"))
-    logInfo("--------------------------------")
-
-    // Verify whether all the elements received are as expected
-    val output = outputBuffer.flatMap(x => x)
-    assert(output.contains(6))  // To ensure that the 3rd input (i.e., 3) was processed
-    output.foreach(o =>         // To ensure all the inputs are correctly added cumulatively
-      assert(expectedOutput.contains(o), "Expected value " + o + " not found")
-    )
-    // To ensure that all the inputs were received correctly
-    assert(expectedOutput.last === output.last)
-    Utils.deleteRecursively(testDir)
+      // Create files while the streaming driver is down
+      for (i <- Seq(4, 5, 6)) {
+        writeFile(i, clock)
+        // Advance the clock after creating the file to avoid a race when
+        // setting its modification time
+        clock.addToTime(batchDuration.milliseconds)
+      }
+
+      // Recover context from checkpoint file and verify whether the files that were
+      // recorded before failure were saved and successfully recovered
+      logInfo("*********** RESTARTING ************")
+      withStreamingContext(new StreamingContext(checkpointDir)) { ssc =>
+        // So that the restarted StreamingContext's clock has gone forward in time since failure
+        ssc.conf.set("spark.streaming.manualClock.jump", (batchDuration * 3).milliseconds.toString)
+        val oldClockTime = clock.currentTime()
+        clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+        val batchCounter = new BatchCounter(ssc)
+        val outputStream = ssc.graph.getOutputStreams().head.asInstanceOf[TestOutputStream[Int]]
+        // Check that we remember files that were recorded before the restart
+        assert(recordedFiles(ssc) === Seq(1, 2, 3))
+
+        // Restart stream computation
+        ssc.start()
+        // Verify that the clock has traveled forward to the expected time
+        eventually(eventuallyTimeout) {
+          clock.currentTime() === oldClockTime
+        }
+        // Wait for pre-failure batch to be recomputed (3 while SSC was down plus last batch)
+        val numBatchesAfterRestart = 4
+        eventually(eventuallyTimeout) {
+          assert(batchCounter.getNumCompletedBatches === numBatchesAfterRestart)
+        }
+        for ((i, index) <- Seq(7, 8, 9).zipWithIndex) {
+          writeFile(i, clock)
+          // Advance the clock after creating the file to avoid a race when
+          // setting its modification time
+          clock.addToTime(batchDuration.milliseconds)
+          eventually(eventuallyTimeout) {
+            assert(batchCounter.getNumCompletedBatches === index + numBatchesAfterRestart + 1)
+          }
+        }
+        clock.addToTime(batchDuration.milliseconds)
+        logInfo("Output after restart = " + outputStream.output.mkString("[", ", ", "]"))
+        assert(outputStream.output.size > 0, "No files processed after restart")
+        ssc.stop()
+
+        // Verify whether files created while the driver was down (4, 5, 6) and files created after
+        // recovery (7, 8, 9) have been recorded
+        assert(recordedFiles(ssc) === (1 to 9))
+
+        // Append the new output to the old buffer
+        outputBuffer ++= outputStream.output
+
+        // Verify whether all the elements received are as expected
+        val expectedOutput = Seq(1, 3, 6, 10, 15, 21, 28, 36, 45)
+        assert(outputBuffer.flatten.toSet === expectedOutput.toSet)
+      }
+    } finally {
+      Utils.deleteRecursively(testDir)
+    }
   }
 
 
@@ -383,15 +521,21 @@ class CheckpointSuite extends TestSuiteBase {
    */
   def advanceTimeWithRealDelay[V: ClassTag](ssc: StreamingContext, numBatches: Long): Seq[Seq[V]] = {
     val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-    logInfo("Manual clock before advancing = " + clock.time)
+    logInfo("Manual clock before advancing = " + clock.currentTime())
     for (i <- 1 to numBatches.toInt) {
       clock.addToTime(batchDuration.milliseconds)
       Thread.sleep(batchDuration.milliseconds)
     }
-    logInfo("Manual clock after advancing = " + clock.time)
+    logInfo("Manual clock after advancing = " + clock.currentTime())
     Thread.sleep(batchDuration.milliseconds)
 
-    val outputStream = ssc.graph.getOutputStreams.head.asInstanceOf[TestOutputStreamWithPartitions[V]]
+    val outputStream = ssc.graph.getOutputStreams.filter { dstream =>
+      dstream.isInstanceOf[TestOutputStreamWithPartitions[V]]
+    }.head.asInstanceOf[TestOutputStreamWithPartitions[V]]
     outputStream.output.map(_.flatten)
   }
 }
+
+private object CheckpointSuite extends Serializable {
+  var batchThreeShouldBlockIndefinitely: Boolean = true
+}
\ No newline at end of file
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
index 40434b1f9b709..6500608bba87c 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
@@ -28,21 +28,16 @@ import java.io.File
  */
 class FailureSuite extends TestSuiteBase with Logging {
 
-  var directory = "FailureSuite"
+  val directory = Utils.createTempDir().getAbsolutePath
   val numBatches = 30
 
   override def batchDuration = Milliseconds(1000)
 
   override def useManualClock = false
 
-  override def beforeFunction() {
-    super.beforeFunction()
-    Utils.deleteRecursively(new File(directory))
-  }
-
   override def afterFunction() {
-    super.afterFunction()
     Utils.deleteRecursively(new File(directory))
+    super.afterFunction()
   }
 
   test("multiple failures with map") {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
index fa04fa326e370..01084a457db4f 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
@@ -28,9 +28,11 @@ import java.util.concurrent.{Executors, TimeUnit, ArrayBlockingQueue}
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer, SynchronizedQueue}
+import scala.language.postfixOps
 
 import com.google.common.io.Files
 import org.scalatest.BeforeAndAfter
+import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark.Logging
 import org.apache.spark.storage.StorageLevel
@@ -38,6 +40,9 @@ import org.apache.spark.streaming.util.ManualClock
 import org.apache.spark.util.Utils
 import org.apache.spark.streaming.receiver.{ActorHelper, Receiver}
 import org.apache.spark.rdd.RDD
+import org.apache.hadoop.io.{Text, LongWritable}
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
+import org.apache.hadoop.fs.Path
 
 class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
 
@@ -90,55 +95,64 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     }
   }
 
+  test("binary records stream") {
+    val testDir: File = null
+    try {
+      val batchDuration = Seconds(2)
+      val testDir = Utils.createTempDir()
+      // Create a file that exists before the StreamingContext is created:
+      val existingFile = new File(testDir, "0")
+      Files.write("0\n", existingFile, Charset.forName("UTF-8"))
+      assert(existingFile.setLastModified(10000) && existingFile.lastModified === 10000)
+
+      // Set up the streaming context and input streams
+      withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc =>
+        val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+        // This `setTime` call ensures that the clock is past the creation time of `existingFile`
+        clock.setTime(existingFile.lastModified + batchDuration.milliseconds)
+        val batchCounter = new BatchCounter(ssc)
+        val fileStream = ssc.binaryRecordsStream(testDir.toString, 1)
+        val outputBuffer = new ArrayBuffer[Seq[Array[Byte]]]
+          with SynchronizedBuffer[Seq[Array[Byte]]]
+        val outputStream = new TestOutputStream(fileStream, outputBuffer)
+        outputStream.register()
+        ssc.start()
+
+        // Advance the clock so that the files are created after StreamingContext starts, but
+        // not enough to trigger a batch
+        clock.addToTime(batchDuration.milliseconds / 2)
+
+        val input = Seq(1, 2, 3, 4, 5)
+        input.foreach { i =>
+          Thread.sleep(batchDuration.milliseconds)
+          val file = new File(testDir, i.toString)
+          Files.write(Array[Byte](i.toByte), file)
+          assert(file.setLastModified(clock.currentTime()))
+          assert(file.lastModified === clock.currentTime)
+          logInfo("Created file " + file)
+          // Advance the clock after creating the file to avoid a race when
+          // setting its modification time
+          clock.addToTime(batchDuration.milliseconds)
+          eventually(eventuallyTimeout) {
+            assert(batchCounter.getNumCompletedBatches === i)
+          }
+        }
 
-  test("file input stream") {
-    // Disable manual clock as FileInputDStream does not work with manual clock
-    conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
-
-    // Set up the streaming context and input streams
-    val testDir = Utils.createTempDir()
-    val ssc = new StreamingContext(conf, batchDuration)
-    val fileStream = ssc.textFileStream(testDir.toString)
-    val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]]
-    def output = outputBuffer.flatMap(x => x)
-    val outputStream = new TestOutputStream(fileStream, outputBuffer)
-    outputStream.register()
-    ssc.start()
-
-    // Create files in the temporary directory so that Spark Streaming can read data from it
-    val input = Seq(1, 2, 3, 4, 5)
-    val expectedOutput = input.map(_.toString)
-    Thread.sleep(1000)
-    for (i <- 0 until input.size) {
-      val file = new File(testDir, i.toString)
-      Files.write(input(i) + "\n", file, Charset.forName("UTF-8"))
-      logInfo("Created file " + file)
-      Thread.sleep(batchDuration.milliseconds)
-      Thread.sleep(1000)
+        val expectedOutput = input.map(i => i.toByte)
+        val obtainedOutput = outputBuffer.flatten.toList.map(i => i(0).toByte)
+        assert(obtainedOutput === expectedOutput)
+      }
+    } finally {
+      if (testDir != null) Utils.deleteRecursively(testDir)
     }
-    val startTime = System.currentTimeMillis()
-    Thread.sleep(1000)
-    val timeTaken = System.currentTimeMillis() - startTime
-    assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
-    logInfo("Stopping context")
-    ssc.stop()
-
-    // Verify whether data received by Spark Streaming was as expected
-    logInfo("--------------------------------")
-    logInfo("output, size = " + outputBuffer.size)
-    outputBuffer.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-    logInfo("expected output, size = " + expectedOutput.size)
-    expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-    logInfo("--------------------------------")
-
-    // Verify whether all the elements received are as expected
-    // (whether the elements were received one in each interval is not verified)
-    assert(output.toList === expectedOutput.toList)
+  }
 
-    Utils.deleteRecursively(testDir)
+  test("file input stream - newFilesOnly = true") {
+    testFileStream(newFilesOnly = true)
+  }
 
-    // Enable manual clock back again for other tests
-    conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
+  test("file input stream - newFilesOnly = false") {
+    testFileStream(newFilesOnly = false)
   }
 
   test("multi-thread receiver") {
@@ -180,7 +194,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     assert(output.sum === numTotalRecords)
   }
 
-  test("queue input stream - oneAtATime=true") {
+  test("queue input stream - oneAtATime = true") {
     // Set up the streaming context and input streams
     val ssc = new StreamingContext(conf, batchDuration)
     val queue = new SynchronizedQueue[RDD[String]]()
@@ -223,7 +237,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     }
   }
 
-  test("queue input stream - oneAtATime=false") {
+  test("queue input stream - oneAtATime = false") {
     // Set up the streaming context and input streams
     val ssc = new StreamingContext(conf, batchDuration)
     val queue = new SynchronizedQueue[RDD[String]]()
@@ -268,6 +282,62 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
       assert(output(i) === expectedOutput(i))
     }
   }
+
+  def testFileStream(newFilesOnly: Boolean) {
+    val testDir: File = null
+    try {
+      val batchDuration = Seconds(2)
+      val testDir = Utils.createTempDir()
+      // Create a file that exists before the StreamingContext is created:
+      val existingFile = new File(testDir, "0")
+      Files.write("0\n", existingFile, Charset.forName("UTF-8"))
+      assert(existingFile.setLastModified(10000) && existingFile.lastModified === 10000)
+
+      // Set up the streaming context and input streams
+      withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc =>
+        val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+        // This `setTime` call ensures that the clock is past the creation time of `existingFile`
+        clock.setTime(existingFile.lastModified + batchDuration.milliseconds)
+        val batchCounter = new BatchCounter(ssc)
+        val fileStream = ssc.fileStream[LongWritable, Text, TextInputFormat](
+          testDir.toString, (x: Path) => true, newFilesOnly = newFilesOnly).map(_._2.toString)
+        val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]]
+        val outputStream = new TestOutputStream(fileStream, outputBuffer)
+        outputStream.register()
+        ssc.start()
+
+        // Advance the clock so that the files are created after StreamingContext starts, but
+        // not enough to trigger a batch
+        clock.addToTime(batchDuration.milliseconds / 2)
+
+        // Over time, create files in the directory
+        val input = Seq(1, 2, 3, 4, 5)
+        input.foreach { i =>
+          val file = new File(testDir, i.toString)
+          Files.write(i + "\n", file, Charset.forName("UTF-8"))
+          assert(file.setLastModified(clock.currentTime()))
+          assert(file.lastModified === clock.currentTime)
+          logInfo("Created file " + file)
+          // Advance the clock after creating the file to avoid a race when
+          // setting its modification time
+          clock.addToTime(batchDuration.milliseconds)
+          eventually(eventuallyTimeout) {
+            assert(batchCounter.getNumCompletedBatches === i)
+          }
+        }
+
+        // Verify that all the files have been read
+        val expectedOutput = if (newFilesOnly) {
+          input.map(_.toString).toSet
+        } else {
+          (Seq(0) ++ input).map(_.toString).toSet
+        }
+        assert(outputBuffer.flatten.toSet === expectedOutput)
+      }
+    } finally {
+      if (testDir != null) Utils.deleteRecursively(testDir)
+    }
+  }
 }
 
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
index 5dbb7232009eb..e0f14fd954280 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
@@ -20,7 +20,6 @@ package org.apache.spark.streaming
 import org.apache.spark.Logging
 import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.util.Utils
-import org.apache.spark.streaming.StreamingContext._
 
 import scala.util.Random
 import scala.collection.mutable.ArrayBuffer
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/NetworkReceiverSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/NetworkReceiverSuite.scala
deleted file mode 100644
index eb6e88cf5520d..0000000000000
--- a/streaming/src/test/scala/org/apache/spark/streaming/NetworkReceiverSuite.scala
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.streaming
-
-import java.nio.ByteBuffer
-import java.util.concurrent.Semaphore
-
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.spark.SparkConf
-import org.apache.spark.storage.{StorageLevel, StreamBlockId}
-import org.apache.spark.streaming.receiver.{BlockGenerator, BlockGeneratorListener, Receiver, ReceiverSupervisor}
-import org.scalatest.FunSuite
-import org.scalatest.concurrent.Timeouts
-import org.scalatest.concurrent.Eventually._
-import org.scalatest.time.SpanSugar._
-
-/** Testsuite for testing the network receiver behavior */
-class NetworkReceiverSuite extends FunSuite with Timeouts {
-
-  test("network receiver life cycle") {
-
-    val receiver = new FakeReceiver
-    val executor = new FakeReceiverSupervisor(receiver)
-    val executorStarted = new Semaphore(0)
-
-    assert(executor.isAllEmpty)
-
-    // Thread that runs the executor
-    val executingThread = new Thread() {
-      override def run() {
-        executor.start()
-        executorStarted.release(1)
-        executor.awaitTermination()
-      }
-    }
-
-    // Start the receiver
-    executingThread.start()
-
-    // Verify that the receiver
-    intercept[Exception] {
-      failAfter(200 millis) {
-        executingThread.join()
-      }
-    }
-
-    // Ensure executor is started
-    executorStarted.acquire()
-
-    // Verify that receiver was started
-    assert(receiver.onStartCalled)
-    assert(executor.isReceiverStarted)
-    assert(receiver.isStarted)
-    assert(!receiver.isStopped())
-    assert(receiver.otherThread.isAlive)
-    eventually(timeout(100 millis), interval(10 millis)) {
-      assert(receiver.receiving)
-    }
-
-    // Verify whether the data stored by the receiver was sent to the executor
-    val byteBuffer = ByteBuffer.allocate(100)
-    val arrayBuffer = new ArrayBuffer[Int]()
-    val iterator = arrayBuffer.iterator
-    receiver.store(1)
-    receiver.store(byteBuffer)
-    receiver.store(arrayBuffer)
-    receiver.store(iterator)
-    assert(executor.singles.size === 1)
-    assert(executor.singles.head === 1)
-    assert(executor.byteBuffers.size === 1)
-    assert(executor.byteBuffers.head.eq(byteBuffer))
-    assert(executor.iterators.size === 1)
-    assert(executor.iterators.head.eq(iterator))
-    assert(executor.arrayBuffers.size === 1)
-    assert(executor.arrayBuffers.head.eq(arrayBuffer))
-
-    // Verify whether the exceptions reported by the receiver was sent to the executor
-    val exception = new Exception
-    receiver.reportError("Error", exception)
-    assert(executor.errors.size === 1)
-    assert(executor.errors.head.eq(exception))
-
-    // Verify restarting actually stops and starts the receiver
-    receiver.restart("restarting", null, 100)
-    eventually(timeout(50 millis), interval(10 millis)) {
-      // receiver will be stopped async
-      assert(receiver.isStopped)
-      assert(receiver.onStopCalled)
-    }
-    eventually(timeout(1000 millis), interval(100 millis)) {
-      // receiver will be started async
-      assert(receiver.onStartCalled)
-      assert(executor.isReceiverStarted)
-      assert(receiver.isStarted)
-      assert(!receiver.isStopped)
-      assert(receiver.receiving)
-    }
-
-    // Verify that stopping actually stops the thread
-    failAfter(100 millis) {
-      receiver.stop("test")
-      assert(receiver.isStopped)
-      assert(!receiver.otherThread.isAlive)
-
-      // The thread that started the executor should complete
-      // as stop() stops everything
-      executingThread.join()
-    }
-  }
-
-  test("block generator") {
-    val blockGeneratorListener = new FakeBlockGeneratorListener
-    val blockInterval = 200
-    val conf = new SparkConf().set("spark.streaming.blockInterval", blockInterval.toString)
-    val blockGenerator = new BlockGenerator(blockGeneratorListener, 1, conf)
-    val expectedBlocks = 5
-    val waitTime = expectedBlocks * blockInterval + (blockInterval / 2)
-    val generatedData = new ArrayBuffer[Int]
-
-    // Generate blocks
-    val startTime = System.currentTimeMillis()
-    blockGenerator.start()
-    var count = 0
-    while(System.currentTimeMillis - startTime < waitTime) {
-      blockGenerator += count
-      generatedData += count
-      count += 1
-      Thread.sleep(10)
-    }
-    blockGenerator.stop()
-
-    val recordedData = blockGeneratorListener.arrayBuffers.flatten
-    assert(blockGeneratorListener.arrayBuffers.size > 0)
-    assert(recordedData.toSet === generatedData.toSet)
-  }
-
-  test("block generator throttling") {
-    val blockGeneratorListener = new FakeBlockGeneratorListener
-    val blockInterval = 50
-    val maxRate = 200
-    val conf = new SparkConf().set("spark.streaming.blockInterval", blockInterval.toString).
-      set("spark.streaming.receiver.maxRate", maxRate.toString)
-    val blockGenerator = new BlockGenerator(blockGeneratorListener, 1, conf)
-    val expectedBlocks = 20
-    val waitTime = expectedBlocks * blockInterval
-    val expectedMessages = maxRate * waitTime / 1000
-    val expectedMessagesPerBlock = maxRate * blockInterval / 1000
-    val generatedData = new ArrayBuffer[Int]
-
-    // Generate blocks
-    val startTime = System.currentTimeMillis()
-    blockGenerator.start()
-    var count = 0
-    while(System.currentTimeMillis - startTime < waitTime) {
-      blockGenerator += count
-      generatedData += count
-      count += 1
-      Thread.sleep(1)
-    }
-    blockGenerator.stop()
-
-    val recordedData = blockGeneratorListener.arrayBuffers
-    assert(blockGeneratorListener.arrayBuffers.size > 0)
-    assert(recordedData.flatten.toSet === generatedData.toSet)
-    // recordedData size should be close to the expected rate
-    assert(recordedData.flatten.size >= expectedMessages * 0.9 &&
-      recordedData.flatten.size <= expectedMessages * 1.1 )
-    // the first and last block may be incomplete, so we slice them out
-    recordedData.slice(1, recordedData.size - 1).foreach { block =>
-      assert(block.size >= expectedMessagesPerBlock * 0.8 &&
-        block.size <= expectedMessagesPerBlock * 1.2 )
-    }
-  }
-
-  /**
-   * An implementation of NetworkReceiver that is used for testing a receiver's life cycle.
-   */
-  class FakeReceiver extends Receiver[Int](StorageLevel.MEMORY_ONLY) {
-    @volatile var otherThread: Thread = null
-    @volatile var receiving = false
-    @volatile var onStartCalled = false
-    @volatile var onStopCalled = false
-
-    def onStart() {
-      otherThread = new Thread() {
-        override def run() {
-          receiving = true
-          while(!isStopped()) {
-            Thread.sleep(10)
-          }
-        }
-      }
-      onStartCalled = true
-      otherThread.start()
-
-    }
-
-    def onStop() {
-      onStopCalled = true
-      otherThread.join()
-    }
-
-    def reset() {
-      receiving = false
-      onStartCalled = false
-      onStopCalled = false
-    }
-  }
-
-  /**
-   * An implementation of NetworkReceiverExecutor used for testing a NetworkReceiver.
-   * Instead of storing the data in the BlockManager, it stores all the data in a local buffer
-   * that can used for verifying that the data has been forwarded correctly.
-   */
-  class FakeReceiverSupervisor(receiver: FakeReceiver)
-    extends ReceiverSupervisor(receiver, new SparkConf()) {
-    val singles = new ArrayBuffer[Any]
-    val byteBuffers = new ArrayBuffer[ByteBuffer]
-    val iterators = new ArrayBuffer[Iterator[_]]
-    val arrayBuffers = new ArrayBuffer[ArrayBuffer[_]]
-    val errors = new ArrayBuffer[Throwable]
-
-    /** Check if all data structures are clean */
-    def isAllEmpty = {
-      singles.isEmpty && byteBuffers.isEmpty && iterators.isEmpty &&
-        arrayBuffers.isEmpty && errors.isEmpty
-    }
-
-    def pushSingle(data: Any) {
-      singles += data
-    }
-
-    def pushBytes(
-        bytes: ByteBuffer,
-        optionalMetadata: Option[Any],
-        optionalBlockId: Option[StreamBlockId]
-      ) {
-      byteBuffers += bytes
-    }
-
-    def pushIterator(
-        iterator: Iterator[_],
-        optionalMetadata: Option[Any],
-        optionalBlockId: Option[StreamBlockId]
-      ) {
-      iterators += iterator
-    }
-
-    def pushArrayBuffer(
-        arrayBuffer: ArrayBuffer[_],
-        optionalMetadata: Option[Any],
-        optionalBlockId: Option[StreamBlockId]
-      ) {
-      arrayBuffers +=  arrayBuffer
-    }
-
-    def reportError(message: String, throwable: Throwable) {
-      errors += throwable
-    }
-  }
-
-  /**
-   * An implementation of BlockGeneratorListener that is used to test the BlockGenerator.
-   */
-  class FakeBlockGeneratorListener(pushDelay: Long = 0) extends BlockGeneratorListener {
-    // buffer of data received as ArrayBuffers
-    val arrayBuffers = new ArrayBuffer[ArrayBuffer[Int]]
-    val errors = new ArrayBuffer[Throwable]
-
-    def onPushBlock(blockId: StreamBlockId, arrayBuffer: ArrayBuffer[_]) {
-      val bufferOfInts = arrayBuffer.map(_.asInstanceOf[Int])
-      arrayBuffers += bufferOfInts
-      Thread.sleep(0)
-    }
-
-    def onError(message: String, throwable: Throwable) {
-      errors += throwable
-    }
-  }
-}
-
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
new file mode 100644
index 0000000000000..132ff2443fc0f
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import java.io.File
+import java.nio.ByteBuffer
+
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.duration._
+import scala.language.postfixOps
+
+import akka.actor.{ActorSystem, Props}
+import com.google.common.io.Files
+import org.apache.commons.io.FileUtils
+import org.apache.hadoop.conf.Configuration
+import org.scalatest.{BeforeAndAfter, FunSuite, Matchers}
+import org.scalatest.concurrent.Eventually._
+
+import org.apache.spark._
+import org.apache.spark.network.nio.NioBlockTransferService
+import org.apache.spark.scheduler.LiveListenerBus
+import org.apache.spark.serializer.KryoSerializer
+import org.apache.spark.shuffle.hash.HashShuffleManager
+import org.apache.spark.storage._
+import org.apache.spark.streaming.receiver._
+import org.apache.spark.streaming.util._
+import org.apache.spark.util.AkkaUtils
+import WriteAheadLogBasedBlockHandler._
+import WriteAheadLogSuite._
+
+class ReceivedBlockHandlerSuite extends FunSuite with BeforeAndAfter with Matchers with Logging {
+
+  val conf = new SparkConf().set("spark.streaming.receiver.writeAheadLog.rollingInterval", "1")
+  val hadoopConf = new Configuration()
+  val storageLevel = StorageLevel.MEMORY_ONLY_SER
+  val streamId = 1
+  val securityMgr = new SecurityManager(conf)
+  val mapOutputTracker = new MapOutputTrackerMaster(conf)
+  val shuffleManager = new HashShuffleManager(conf)
+  val serializer = new KryoSerializer(conf)
+  val manualClock = new ManualClock
+  val blockManagerSize = 10000000
+
+  var actorSystem: ActorSystem = null
+  var blockManagerMaster: BlockManagerMaster = null
+  var blockManager: BlockManager = null
+  var tempDirectory: File = null
+
+  before {
+    val (actorSystem, boundPort) = AkkaUtils.createActorSystem(
+      "test", "localhost", 0, conf = conf, securityManager = securityMgr)
+    this.actorSystem = actorSystem
+    conf.set("spark.driver.port", boundPort.toString)
+
+    blockManagerMaster = new BlockManagerMaster(
+      actorSystem.actorOf(Props(new BlockManagerMasterActor(true, conf, new LiveListenerBus))),
+      conf, true)
+
+    blockManager = new BlockManager("bm", actorSystem, blockManagerMaster, serializer,
+      blockManagerSize, conf, mapOutputTracker, shuffleManager,
+      new NioBlockTransferService(conf, securityMgr), securityMgr, 0)
+    blockManager.initialize("app-id")
+
+    tempDirectory = Files.createTempDir()
+    manualClock.setTime(0)
+  }
+
+  after {
+    if (blockManager != null) {
+      blockManager.stop()
+      blockManager = null
+    }
+    if (blockManagerMaster != null) {
+      blockManagerMaster.stop()
+      blockManagerMaster = null
+    }
+    actorSystem.shutdown()
+    actorSystem.awaitTermination()
+    actorSystem = null
+
+    if (tempDirectory != null && tempDirectory.exists()) {
+      FileUtils.deleteDirectory(tempDirectory)
+      tempDirectory = null
+    }
+  }
+
+  test("BlockManagerBasedBlockHandler - store blocks") {
+    withBlockManagerBasedBlockHandler { handler =>
+      testBlockStoring(handler) { case (data, blockIds, storeResults) =>
+        // Verify the data in block manager is correct
+        val storedData = blockIds.flatMap { blockId =>
+          blockManager.getLocal(blockId).map { _.data.map {_.toString}.toList }.getOrElse(List.empty)
+        }.toList
+        storedData shouldEqual data
+
+        // Verify that the store results are instances of BlockManagerBasedStoreResult
+        assert(
+          storeResults.forall { _.isInstanceOf[BlockManagerBasedStoreResult] },
+          "Unexpected store result type"
+        )
+      }
+    }
+  }
+
+  test("BlockManagerBasedBlockHandler - handle errors in storing block") {
+    withBlockManagerBasedBlockHandler { handler =>
+      testErrorHandling(handler)
+    }
+  }
+
+  test("WriteAheadLogBasedBlockHandler - store blocks") {
+    withWriteAheadLogBasedBlockHandler { handler =>
+      testBlockStoring(handler) { case (data, blockIds, storeResults) =>
+        // Verify the data in block manager is correct
+        val storedData = blockIds.flatMap { blockId =>
+          blockManager.getLocal(blockId).map { _.data.map {_.toString}.toList }.getOrElse(List.empty)
+        }.toList
+        storedData shouldEqual data
+
+        // Verify that the store results are instances of WriteAheadLogBasedStoreResult
+        assert(
+          storeResults.forall { _.isInstanceOf[WriteAheadLogBasedStoreResult] },
+          "Unexpected store result type"
+        )
+        // Verify the data in write ahead log files is correct
+        val fileSegments = storeResults.map { _.asInstanceOf[WriteAheadLogBasedStoreResult].segment}
+        val loggedData = fileSegments.flatMap { segment =>
+          val reader = new WriteAheadLogRandomReader(segment.path, hadoopConf)
+          val bytes = reader.read(segment)
+          reader.close()
+          blockManager.dataDeserialize(generateBlockId(), bytes).toList
+        }
+        loggedData shouldEqual data
+      }
+    }
+  }
+
+  test("WriteAheadLogBasedBlockHandler - handle errors in storing block") {
+    withWriteAheadLogBasedBlockHandler { handler =>
+      testErrorHandling(handler)
+    }
+  }
+
+  test("WriteAheadLogBasedBlockHandler - cleanup old blocks") {
+    withWriteAheadLogBasedBlockHandler { handler =>
+      val blocks = Seq.tabulate(10) { i => IteratorBlock(Iterator(1 to i)) }
+      storeBlocks(handler, blocks)
+
+      val preCleanupLogFiles = getWriteAheadLogFiles()
+      preCleanupLogFiles.size should be > 1
+
+      // this depends on the number of blocks inserted using generateAndStoreData()
+      manualClock.currentTime() shouldEqual 5000L
+
+      val cleanupThreshTime = 3000L
+      handler.cleanupOldBlocks(cleanupThreshTime)
+      eventually(timeout(10000 millis), interval(10 millis)) {
+        getWriteAheadLogFiles().size should be < preCleanupLogFiles.size
+      }
+    }
+  }
+
+  /**
+   * Test storing of data using different forms of ReceivedBlocks and verify that they succeeded
+   * using the given verification function
+   */
+  private def testBlockStoring(receivedBlockHandler: ReceivedBlockHandler)
+      (verifyFunc: (Seq[String], Seq[StreamBlockId], Seq[ReceivedBlockStoreResult]) => Unit) {
+    val data = Seq.tabulate(100) { _.toString }
+
+    def storeAndVerify(blocks: Seq[ReceivedBlock]) {
+      blocks should not be empty
+      val (blockIds, storeResults) = storeBlocks(receivedBlockHandler, blocks)
+      withClue(s"Testing with ${blocks.head.getClass.getSimpleName}s:") {
+        // Verify returns store results have correct block ids
+        (storeResults.map { _.blockId }) shouldEqual blockIds
+
+        // Call handler-specific verification function
+        verifyFunc(data, blockIds, storeResults)
+      }
+    }
+
+    def dataToByteBuffer(b: Seq[String]) = blockManager.dataSerialize(generateBlockId, b.iterator)
+
+    val blocks = data.grouped(10).toSeq
+
+    storeAndVerify(blocks.map { b => IteratorBlock(b.toIterator) })
+    storeAndVerify(blocks.map { b => ArrayBufferBlock(new ArrayBuffer ++= b) })
+    storeAndVerify(blocks.map { b => ByteBufferBlock(dataToByteBuffer(b)) })
+  }
+
+  /** Test error handling when blocks that cannot be stored */
+  private def testErrorHandling(receivedBlockHandler: ReceivedBlockHandler) {
+    // Handle error in iterator (e.g. divide-by-zero error)
+    intercept[Exception] {
+      val iterator = (10 to (-10, -1)).toIterator.map { _ / 0 }
+      receivedBlockHandler.storeBlock(StreamBlockId(1, 1), IteratorBlock(iterator))
+    }
+
+    // Handler error in block manager storing (e.g. too big block)
+    intercept[SparkException] {
+      val byteBuffer = ByteBuffer.wrap(new Array[Byte](blockManagerSize + 1))
+      receivedBlockHandler.storeBlock(StreamBlockId(1, 1), ByteBufferBlock(byteBuffer))
+    }
+  }
+
+  /** Instantiate a BlockManagerBasedBlockHandler and run a code with it */
+  private def withBlockManagerBasedBlockHandler(body: BlockManagerBasedBlockHandler => Unit) {
+    body(new BlockManagerBasedBlockHandler(blockManager, storageLevel))
+  }
+
+  /** Instantiate a WriteAheadLogBasedBlockHandler and run a code with it */
+  private def withWriteAheadLogBasedBlockHandler(body: WriteAheadLogBasedBlockHandler => Unit) {
+    val receivedBlockHandler = new WriteAheadLogBasedBlockHandler(blockManager, 1,
+      storageLevel, conf, hadoopConf, tempDirectory.toString, manualClock)
+    try {
+      body(receivedBlockHandler)
+    } finally {
+      receivedBlockHandler.stop()
+    }
+  }
+
+  /** Store blocks using a handler */
+  private def storeBlocks(
+      receivedBlockHandler: ReceivedBlockHandler,
+      blocks: Seq[ReceivedBlock]
+    ): (Seq[StreamBlockId], Seq[ReceivedBlockStoreResult]) = {
+    val blockIds = Seq.fill(blocks.size)(generateBlockId())
+    val storeResults = blocks.zip(blockIds).map {
+      case (block, id) =>
+        manualClock.addToTime(500) // log rolling interval set to 1000 ms through SparkConf
+        logDebug("Inserting block " + id)
+        receivedBlockHandler.storeBlock(id, block)
+    }.toList
+    logDebug("Done inserting")
+    (blockIds, storeResults)
+  }
+
+  private def getWriteAheadLogFiles(): Seq[String] = {
+    getLogFilesInDirectory(checkpointDirToLogDir(tempDirectory.toString, streamId))
+  }
+
+  private def generateBlockId(): StreamBlockId = StreamBlockId(streamId, scala.util.Random.nextLong)
+}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
new file mode 100644
index 0000000000000..fbb7b0bfebafc
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
@@ -0,0 +1,264 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import java.io.File
+
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.duration._
+import scala.language.{implicitConversions, postfixOps}
+import scala.util.Random
+
+import com.google.common.io.Files
+import org.apache.commons.io.FileUtils
+import org.apache.hadoop.conf.Configuration
+import org.scalatest.{BeforeAndAfter, FunSuite, Matchers}
+import org.scalatest.concurrent.Eventually._
+
+import org.apache.spark.{Logging, SparkConf, SparkException}
+import org.apache.spark.storage.StreamBlockId
+import org.apache.spark.streaming.receiver.BlockManagerBasedStoreResult
+import org.apache.spark.streaming.scheduler._
+import org.apache.spark.streaming.util.{Clock, ManualClock, SystemClock, WriteAheadLogReader}
+import org.apache.spark.streaming.util.WriteAheadLogSuite._
+import org.apache.spark.util.Utils
+
+class ReceivedBlockTrackerSuite
+  extends FunSuite with BeforeAndAfter with Matchers with Logging {
+
+  val hadoopConf = new Configuration()
+  val akkaTimeout = 10 seconds
+  val streamId = 1
+
+  var allReceivedBlockTrackers = new ArrayBuffer[ReceivedBlockTracker]()
+  var checkpointDirectory: File = null
+  var conf: SparkConf = null
+
+  before {
+    conf = new SparkConf().setMaster("local[2]").setAppName("ReceivedBlockTrackerSuite")
+    checkpointDirectory = Files.createTempDir()
+  }
+
+  after {
+    allReceivedBlockTrackers.foreach { _.stop() }
+    if (checkpointDirectory != null && checkpointDirectory.exists()) {
+      FileUtils.deleteDirectory(checkpointDirectory)
+      checkpointDirectory = null
+    }
+  }
+
+  test("block addition, and block to batch allocation") {
+    val receivedBlockTracker = createTracker(setCheckpointDir = false)
+    receivedBlockTracker.isLogManagerEnabled should be (false)  // should be disable by default
+    receivedBlockTracker.getUnallocatedBlocks(streamId) shouldEqual Seq.empty
+
+    val blockInfos = generateBlockInfos()
+    blockInfos.map(receivedBlockTracker.addBlock)
+
+    // Verify added blocks are unallocated blocks
+    receivedBlockTracker.getUnallocatedBlocks(streamId) shouldEqual blockInfos
+
+    // Allocate the blocks to a batch and verify that all of them have been allocated
+    receivedBlockTracker.allocateBlocksToBatch(1)
+    receivedBlockTracker.getBlocksOfBatchAndStream(1, streamId) shouldEqual blockInfos
+    receivedBlockTracker.getUnallocatedBlocks(streamId) shouldBe empty
+
+    // Allocate no blocks to another batch
+    receivedBlockTracker.allocateBlocksToBatch(2)
+    receivedBlockTracker.getBlocksOfBatchAndStream(2, streamId) shouldBe empty
+
+    // Verify that older batches have no operation on batch allocation,
+    // will return the same blocks as previously allocated.
+    receivedBlockTracker.allocateBlocksToBatch(1)
+    receivedBlockTracker.getBlocksOfBatchAndStream(1, streamId) shouldEqual blockInfos
+
+    blockInfos.map(receivedBlockTracker.addBlock)
+    receivedBlockTracker.allocateBlocksToBatch(2)
+    receivedBlockTracker.getBlocksOfBatchAndStream(2, streamId) shouldBe empty
+    receivedBlockTracker.getUnallocatedBlocks(streamId) shouldEqual blockInfos
+  }
+
+  test("block addition, block to batch allocation and cleanup with write ahead log") {
+    val manualClock = new ManualClock
+    // Set the time increment level to twice the rotation interval so that every increment creates
+    // a new log file
+
+    def incrementTime() {
+      val timeIncrementMillis = 2000L
+      manualClock.addToTime(timeIncrementMillis)
+    }
+
+    // Generate and add blocks to the given tracker
+    def addBlockInfos(tracker: ReceivedBlockTracker): Seq[ReceivedBlockInfo] = {
+      val blockInfos = generateBlockInfos()
+      blockInfos.map(tracker.addBlock)
+      blockInfos
+    }
+
+    // Print the data present in the log ahead files in the log directory
+    def printLogFiles(message: String) {
+      val fileContents = getWriteAheadLogFiles().map { file =>
+        (s"\n>>>>> $file: <<<<<\n${getWrittenLogData(file).mkString("\n")}")
+      }.mkString("\n")
+      logInfo(s"\n\n=====================\n$message\n$fileContents\n=====================\n")
+    }
+
+    // Start tracker and add blocks
+    conf.set("spark.streaming.receiver.writeAheadLog.enable", "true")
+    conf.set("spark.streaming.receivedBlockTracker.writeAheadLog.rotationIntervalSecs", "1")
+    val tracker1 = createTracker(clock = manualClock)
+    tracker1.isLogManagerEnabled should be (true)
+
+    val blockInfos1 = addBlockInfos(tracker1)
+    tracker1.getUnallocatedBlocks(streamId).toList shouldEqual blockInfos1
+
+    // Verify whether write ahead log has correct contents
+    val expectedWrittenData1 = blockInfos1.map(BlockAdditionEvent)
+    getWrittenLogData() shouldEqual expectedWrittenData1
+    getWriteAheadLogFiles() should have size 1
+
+    // Restart tracker and verify recovered list of unallocated blocks
+    incrementTime()
+    val tracker2 = createTracker(clock = manualClock)
+    tracker2.getUnallocatedBlocks(streamId).toList shouldEqual blockInfos1
+
+    // Allocate blocks to batch and verify whether the unallocated blocks got allocated
+    val batchTime1 = manualClock.currentTime
+    tracker2.allocateBlocksToBatch(batchTime1)
+    tracker2.getBlocksOfBatchAndStream(batchTime1, streamId) shouldEqual blockInfos1
+
+    // Add more blocks and allocate to another batch
+    incrementTime()
+    val batchTime2 = manualClock.currentTime
+    val blockInfos2 = addBlockInfos(tracker2)
+    tracker2.allocateBlocksToBatch(batchTime2)
+    tracker2.getBlocksOfBatchAndStream(batchTime2, streamId) shouldEqual blockInfos2
+
+    // Verify whether log has correct contents
+    val expectedWrittenData2 = expectedWrittenData1 ++
+      Seq(createBatchAllocation(batchTime1, blockInfos1)) ++
+      blockInfos2.map(BlockAdditionEvent) ++
+      Seq(createBatchAllocation(batchTime2, blockInfos2))
+    getWrittenLogData() shouldEqual expectedWrittenData2
+
+    // Restart tracker and verify recovered state
+    incrementTime()
+    val tracker3 = createTracker(clock = manualClock)
+    tracker3.getBlocksOfBatchAndStream(batchTime1, streamId) shouldEqual blockInfos1
+    tracker3.getBlocksOfBatchAndStream(batchTime2, streamId) shouldEqual blockInfos2
+    tracker3.getUnallocatedBlocks(streamId) shouldBe empty
+
+    // Cleanup first batch but not second batch
+    val oldestLogFile = getWriteAheadLogFiles().head
+    incrementTime()
+    tracker3.cleanupOldBatches(batchTime2, waitForCompletion = true)
+
+    // Verify that the batch allocations have been cleaned, and the act has been written to log
+    tracker3.getBlocksOfBatchAndStream(batchTime1, streamId) shouldEqual Seq.empty
+    getWrittenLogData(getWriteAheadLogFiles().last) should contain(createBatchCleanup(batchTime1))
+
+    // Verify that at least one log file gets deleted
+    eventually(timeout(10 seconds), interval(10 millisecond)) {
+      getWriteAheadLogFiles() should not contain oldestLogFile
+    }
+    printLogFiles("After cleanup")
+
+    // Restart tracker and verify recovered state, specifically whether info about the first
+    // batch has been removed, but not the second batch
+    incrementTime()
+    val tracker4 = createTracker(clock = manualClock)
+    tracker4.getUnallocatedBlocks(streamId) shouldBe empty
+    tracker4.getBlocksOfBatchAndStream(batchTime1, streamId) shouldBe empty  // should be cleaned
+    tracker4.getBlocksOfBatchAndStream(batchTime2, streamId) shouldEqual blockInfos2
+  }
+
+  test("enabling write ahead log but not setting checkpoint dir") {
+    conf.set("spark.streaming.receiver.writeAheadLog.enable", "true")
+    intercept[SparkException] {
+      createTracker(setCheckpointDir = false)
+    }
+  }
+
+  test("setting checkpoint dir but not enabling write ahead log") {
+    // When WAL config is not set, log manager should not be enabled
+    val tracker1 = createTracker(setCheckpointDir = true)
+    tracker1.isLogManagerEnabled should be (false)
+
+    // When WAL is explicitly disabled, log manager should not be enabled
+    conf.set("spark.streaming.receiver.writeAheadLog.enable", "false")
+    val tracker2 = createTracker(setCheckpointDir = true)
+    tracker2.isLogManagerEnabled should be(false)
+  }
+
+  /**
+   * Create tracker object with the optional provided clock. Use fake clock if you
+   * want to control time by manually incrementing it to test log cleanup.
+   */
+  def createTracker(
+      setCheckpointDir: Boolean = true,
+      clock: Clock = new SystemClock): ReceivedBlockTracker = {
+    val cpDirOption = if (setCheckpointDir) Some(checkpointDirectory.toString) else None
+    val tracker = new ReceivedBlockTracker(conf, hadoopConf, Seq(streamId), clock, cpDirOption)
+    allReceivedBlockTrackers += tracker
+    tracker
+  }
+
+  /** Generate blocks infos using random ids */
+  def generateBlockInfos(): Seq[ReceivedBlockInfo] = {
+    List.fill(5)(ReceivedBlockInfo(streamId, 0,
+      BlockManagerBasedStoreResult(StreamBlockId(streamId, math.abs(Random.nextInt)))))
+  }
+
+  /** Get all the data written in the given write ahead log file. */
+  def getWrittenLogData(logFile: String): Seq[ReceivedBlockTrackerLogEvent] = {
+    getWrittenLogData(Seq(logFile))
+  }
+
+  /**
+   * Get all the data written in the given write ahead log files. By default, it will read all
+   * files in the test log directory.
+   */
+  def getWrittenLogData(logFiles: Seq[String] = getWriteAheadLogFiles): Seq[ReceivedBlockTrackerLogEvent] = {
+    logFiles.flatMap {
+      file => new WriteAheadLogReader(file, hadoopConf).toSeq
+    }.map { byteBuffer =>
+      Utils.deserialize[ReceivedBlockTrackerLogEvent](byteBuffer.array)
+    }.toList
+  }
+
+  /** Get all the write ahead log files in the test directory */
+  def getWriteAheadLogFiles(): Seq[String] = {
+    import ReceivedBlockTracker._
+    val logDir = checkpointDirToLogDir(checkpointDirectory.toString)
+    getLogFilesInDirectory(logDir).map { _.toString }
+  }
+
+  /** Create batch allocation object from the given info */
+  def createBatchAllocation(time: Long, blockInfos: Seq[ReceivedBlockInfo]): BatchAllocationEvent = {
+    BatchAllocationEvent(time, AllocatedBlocks(Map((streamId -> blockInfos))))
+  }
+
+  /** Create batch cleanup object from the given info */
+  def createBatchCleanup(time: Long, moreTimes: Long*): BatchCleanupEvent = {
+    BatchCleanupEvent((Seq(time) ++ moreTimes).map(Time.apply))
+  }
+
+  implicit def millisToTime(milliseconds: Long): Time = Time(milliseconds)
+
+  implicit def timeToMillis(time: Time): Long = time.milliseconds
+}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
new file mode 100644
index 0000000000000..e8c34a9ee40b9
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
@@ -0,0 +1,412 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import java.io.File
+import java.nio.ByteBuffer
+import java.util.concurrent.Semaphore
+
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+
+import com.google.common.io.Files
+import org.scalatest.concurrent.Timeouts
+import org.scalatest.concurrent.Eventually._
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.SparkConf
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.storage.StreamBlockId
+import org.apache.spark.streaming.receiver._
+import org.apache.spark.streaming.receiver.WriteAheadLogBasedBlockHandler._
+
+/** Testsuite for testing the network receiver behavior */
+class ReceiverSuite extends TestSuiteBase with Timeouts with Serializable {
+
+  test("receiver life cycle") {
+
+    val receiver = new FakeReceiver
+    val executor = new FakeReceiverSupervisor(receiver)
+    val executorStarted = new Semaphore(0)
+
+    assert(executor.isAllEmpty)
+
+    // Thread that runs the executor
+    val executingThread = new Thread() {
+      override def run() {
+        executor.start()
+        executorStarted.release(1)
+        executor.awaitTermination()
+      }
+    }
+
+    // Start the receiver
+    executingThread.start()
+
+    // Verify that the receiver
+    intercept[Exception] {
+      failAfter(200 millis) {
+        executingThread.join()
+      }
+    }
+
+    // Ensure executor is started
+    executorStarted.acquire()
+
+    // Verify that receiver was started
+    assert(receiver.onStartCalled)
+    assert(executor.isReceiverStarted)
+    assert(receiver.isStarted)
+    assert(!receiver.isStopped())
+    assert(receiver.otherThread.isAlive)
+    eventually(timeout(100 millis), interval(10 millis)) {
+      assert(receiver.receiving)
+    }
+
+    // Verify whether the data stored by the receiver was sent to the executor
+    val byteBuffer = ByteBuffer.allocate(100)
+    val arrayBuffer = new ArrayBuffer[Int]()
+    val iterator = arrayBuffer.iterator
+    receiver.store(1)
+    receiver.store(byteBuffer)
+    receiver.store(arrayBuffer)
+    receiver.store(iterator)
+    assert(executor.singles.size === 1)
+    assert(executor.singles.head === 1)
+    assert(executor.byteBuffers.size === 1)
+    assert(executor.byteBuffers.head.eq(byteBuffer))
+    assert(executor.iterators.size === 1)
+    assert(executor.iterators.head.eq(iterator))
+    assert(executor.arrayBuffers.size === 1)
+    assert(executor.arrayBuffers.head.eq(arrayBuffer))
+
+    // Verify whether the exceptions reported by the receiver was sent to the executor
+    val exception = new Exception
+    receiver.reportError("Error", exception)
+    assert(executor.errors.size === 1)
+    assert(executor.errors.head.eq(exception))
+
+    // Verify restarting actually stops and starts the receiver
+    receiver.restart("restarting", null, 100)
+    eventually(timeout(50 millis), interval(10 millis)) {
+      // receiver will be stopped async
+      assert(receiver.isStopped)
+      assert(receiver.onStopCalled)
+    }
+    eventually(timeout(1000 millis), interval(100 millis)) {
+      // receiver will be started async
+      assert(receiver.onStartCalled)
+      assert(executor.isReceiverStarted)
+      assert(receiver.isStarted)
+      assert(!receiver.isStopped)
+      assert(receiver.receiving)
+    }
+
+    // Verify that stopping actually stops the thread
+    failAfter(100 millis) {
+      receiver.stop("test")
+      assert(receiver.isStopped)
+      assert(!receiver.otherThread.isAlive)
+
+      // The thread that started the executor should complete
+      // as stop() stops everything
+      executingThread.join()
+    }
+  }
+
+  test("block generator") {
+    val blockGeneratorListener = new FakeBlockGeneratorListener
+    val blockInterval = 200
+    val conf = new SparkConf().set("spark.streaming.blockInterval", blockInterval.toString)
+    val blockGenerator = new BlockGenerator(blockGeneratorListener, 1, conf)
+    val expectedBlocks = 5
+    val waitTime = expectedBlocks * blockInterval + (blockInterval / 2)
+    val generatedData = new ArrayBuffer[Int]
+
+    // Generate blocks
+    val startTime = System.currentTimeMillis()
+    blockGenerator.start()
+    var count = 0
+    while(System.currentTimeMillis - startTime < waitTime) {
+      blockGenerator.addData(count)
+      generatedData += count
+      count += 1
+      Thread.sleep(10)
+    }
+    blockGenerator.stop()
+
+    val recordedData = blockGeneratorListener.arrayBuffers.flatten
+    assert(blockGeneratorListener.arrayBuffers.size > 0)
+    assert(recordedData.toSet === generatedData.toSet)
+  }
+
+  test("block generator throttling") {
+    val blockGeneratorListener = new FakeBlockGeneratorListener
+    val blockInterval = 100
+    val maxRate = 100
+    val conf = new SparkConf().set("spark.streaming.blockInterval", blockInterval.toString).
+      set("spark.streaming.receiver.maxRate", maxRate.toString)
+    val blockGenerator = new BlockGenerator(blockGeneratorListener, 1, conf)
+    val expectedBlocks = 20
+    val waitTime = expectedBlocks * blockInterval
+    val expectedMessages = maxRate * waitTime / 1000
+    val expectedMessagesPerBlock = maxRate * blockInterval / 1000
+    val generatedData = new ArrayBuffer[Int]
+
+    // Generate blocks
+    val startTime = System.currentTimeMillis()
+    blockGenerator.start()
+    var count = 0
+    while(System.currentTimeMillis - startTime < waitTime) {
+      blockGenerator.addData(count)
+      generatedData += count
+      count += 1
+      Thread.sleep(1)
+    }
+    blockGenerator.stop()
+
+    val recordedBlocks = blockGeneratorListener.arrayBuffers
+    val recordedData = recordedBlocks.flatten
+    assert(blockGeneratorListener.arrayBuffers.size > 0, "No blocks received")
+    assert(recordedData.toSet === generatedData.toSet, "Received data not same")
+
+    // recordedData size should be close to the expected rate
+    val minExpectedMessages = expectedMessages - 3
+    val maxExpectedMessages = expectedMessages + 1
+    val numMessages = recordedData.size
+    assert(
+      numMessages >= minExpectedMessages && numMessages <= maxExpectedMessages,
+      s"#records received = $numMessages, not between $minExpectedMessages and $maxExpectedMessages"
+    )
+
+    val minExpectedMessagesPerBlock = expectedMessagesPerBlock - 3
+    val maxExpectedMessagesPerBlock = expectedMessagesPerBlock + 1
+    val receivedBlockSizes = recordedBlocks.map { _.size }.mkString(",")
+    assert(
+      // the first and last block may be incomplete, so we slice them out
+      recordedBlocks.drop(1).dropRight(1).forall { block =>
+        block.size >= minExpectedMessagesPerBlock && block.size <= maxExpectedMessagesPerBlock
+      },
+      s"# records in received blocks = [$receivedBlockSizes], not between " +
+        s"$minExpectedMessagesPerBlock and $maxExpectedMessagesPerBlock"
+    )
+  }
+
+  /**
+   * Test whether write ahead logs are generated by received,
+   * and automatically cleaned up. The clean up must be aware of the
+   * remember duration of the input streams. E.g., input streams on which window()
+   * has been applied must remember the data for longer, and hence corresponding
+   * WALs should be cleaned later.
+   */
+  test("write ahead log - generating and cleaning") {
+    val sparkConf = new SparkConf()
+      .setMaster("local[4]")  // must be at least 3 as we are going to start 2 receivers
+      .setAppName(framework)
+      .set("spark.ui.enabled", "true")
+      .set("spark.streaming.receiver.writeAheadLog.enable", "true")
+      .set("spark.streaming.receiver.writeAheadLog.rollingInterval", "1")
+    val batchDuration = Milliseconds(500)
+    val tempDirectory = Files.createTempDir()
+    val logDirectory1 = new File(checkpointDirToLogDir(tempDirectory.getAbsolutePath, 0))
+    val logDirectory2 = new File(checkpointDirToLogDir(tempDirectory.getAbsolutePath, 1))
+    val allLogFiles1 = new mutable.HashSet[String]()
+    val allLogFiles2 = new mutable.HashSet[String]()
+    logInfo("Temp checkpoint directory = " + tempDirectory)
+
+    def getBothCurrentLogFiles(): (Seq[String], Seq[String]) = {
+      (getCurrentLogFiles(logDirectory1), getCurrentLogFiles(logDirectory2))
+    }
+
+    def getCurrentLogFiles(logDirectory: File): Seq[String] = {
+      try {
+        if (logDirectory.exists()) {
+          logDirectory1.listFiles().filter { _.getName.startsWith("log") }.map { _.toString }
+        } else {
+          Seq.empty
+        }
+      } catch {
+        case e: Exception =>
+          Seq.empty
+      }
+    }
+
+    def printLogFiles(message: String, files: Seq[String]) {
+      logInfo(s"$message (${files.size} files):\n" + files.mkString("\n"))
+    }
+
+    withStreamingContext(new StreamingContext(sparkConf, batchDuration)) { ssc =>
+      tempDirectory.deleteOnExit()
+      val receiver1 = ssc.sparkContext.clean(new FakeReceiver(sendData = true))
+      val receiver2 = ssc.sparkContext.clean(new FakeReceiver(sendData = true))
+      val receiverStream1 = ssc.receiverStream(receiver1)
+      val receiverStream2 = ssc.receiverStream(receiver2)
+      receiverStream1.register()
+      receiverStream2.window(batchDuration * 6).register()  // 3 second window
+      ssc.checkpoint(tempDirectory.getAbsolutePath())
+      ssc.start()
+
+      // Run until sufficient WAL files have been generated and
+      // the first WAL files has been deleted
+      eventually(timeout(20 seconds), interval(batchDuration.milliseconds millis)) {
+        val (logFiles1, logFiles2) = getBothCurrentLogFiles()
+        allLogFiles1 ++= logFiles1
+        allLogFiles2 ++= logFiles2
+        if (allLogFiles1.size > 0) {
+          assert(!logFiles1.contains(allLogFiles1.toSeq.sorted.head))
+        }
+        if (allLogFiles2.size > 0) {
+          assert(!logFiles2.contains(allLogFiles2.toSeq.sorted.head))
+        }
+        assert(allLogFiles1.size >= 7)
+        assert(allLogFiles2.size >= 7)
+      }
+      ssc.stop(stopSparkContext = true, stopGracefully = true)
+
+      val sortedAllLogFiles1 = allLogFiles1.toSeq.sorted
+      val sortedAllLogFiles2 = allLogFiles2.toSeq.sorted
+      val (leftLogFiles1, leftLogFiles2) = getBothCurrentLogFiles()
+
+      printLogFiles("Receiver 0: all", sortedAllLogFiles1)
+      printLogFiles("Receiver 0: left", leftLogFiles1)
+      printLogFiles("Receiver 1: all", sortedAllLogFiles2)
+      printLogFiles("Receiver 1: left", leftLogFiles2)
+
+      // Verify that necessary latest log files are not deleted
+      //   receiverStream1 needs to retain just the last batch = 1 log file
+      //   receiverStream2 needs to retain 3 seconds (3-seconds window) = 3 log files
+      assert(sortedAllLogFiles1.takeRight(1).forall(leftLogFiles1.contains))
+      assert(sortedAllLogFiles2.takeRight(3).forall(leftLogFiles2.contains))
+    }
+  }
+
+  /**
+   * An implementation of NetworkReceiverExecutor used for testing a NetworkReceiver.
+   * Instead of storing the data in the BlockManager, it stores all the data in a local buffer
+   * that can used for verifying that the data has been forwarded correctly.
+   */
+  class FakeReceiverSupervisor(receiver: FakeReceiver)
+    extends ReceiverSupervisor(receiver, new SparkConf()) {
+    val singles = new ArrayBuffer[Any]
+    val byteBuffers = new ArrayBuffer[ByteBuffer]
+    val iterators = new ArrayBuffer[Iterator[_]]
+    val arrayBuffers = new ArrayBuffer[ArrayBuffer[_]]
+    val errors = new ArrayBuffer[Throwable]
+
+    /** Check if all data structures are clean */
+    def isAllEmpty = {
+      singles.isEmpty && byteBuffers.isEmpty && iterators.isEmpty &&
+        arrayBuffers.isEmpty && errors.isEmpty
+    }
+
+    def pushSingle(data: Any) {
+      singles += data
+    }
+
+    def pushBytes(
+        bytes: ByteBuffer,
+        optionalMetadata: Option[Any],
+        optionalBlockId: Option[StreamBlockId]
+      ) {
+      byteBuffers += bytes
+    }
+
+    def pushIterator(
+        iterator: Iterator[_],
+        optionalMetadata: Option[Any],
+        optionalBlockId: Option[StreamBlockId]
+      ) {
+      iterators += iterator
+    }
+
+    def pushArrayBuffer(
+        arrayBuffer: ArrayBuffer[_],
+        optionalMetadata: Option[Any],
+        optionalBlockId: Option[StreamBlockId]
+      ) {
+      arrayBuffers +=  arrayBuffer
+    }
+
+    def reportError(message: String, throwable: Throwable) {
+      errors += throwable
+    }
+  }
+
+  /**
+   * An implementation of BlockGeneratorListener that is used to test the BlockGenerator.
+   */
+  class FakeBlockGeneratorListener(pushDelay: Long = 0) extends BlockGeneratorListener {
+    // buffer of data received as ArrayBuffers
+    val arrayBuffers = new ArrayBuffer[ArrayBuffer[Int]]
+    val errors = new ArrayBuffer[Throwable]
+
+    def onAddData(data: Any, metadata: Any) { }
+
+    def onGenerateBlock(blockId: StreamBlockId) { }
+
+    def onPushBlock(blockId: StreamBlockId, arrayBuffer: ArrayBuffer[_]) {
+      val bufferOfInts = arrayBuffer.map(_.asInstanceOf[Int])
+      arrayBuffers += bufferOfInts
+      Thread.sleep(0)
+    }
+
+    def onError(message: String, throwable: Throwable) {
+      errors += throwable
+    }
+  }
+}
+
+/**
+ * An implementation of Receiver that is used for testing a receiver's life cycle.
+ */
+class FakeReceiver(sendData: Boolean = false) extends Receiver[Int](StorageLevel.MEMORY_ONLY) {
+  @volatile var otherThread: Thread = null
+  @volatile var receiving = false
+  @volatile var onStartCalled = false
+  @volatile var onStopCalled = false
+
+  def onStart() {
+    otherThread = new Thread() {
+      override def run() {
+        receiving = true
+        var count = 0
+        while(!isStopped()) {
+          if (sendData) {
+            store(count)
+            count += 1
+          }
+          Thread.sleep(10)
+        }
+      }
+    }
+    onStartCalled = true
+    otherThread.start()
+  }
+
+  def onStop() {
+    onStopCalled = true
+    otherThread.join()
+  }
+
+  def reset() {
+    receiving = false
+    onStartCalled = false
+    onStopCalled = false
+  }
+}
+
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 655cec1573f58..2aa5e0876b6e0 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -133,11 +133,16 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     ssc.stop()
   }
 
-  test("stop before start and start after stop") {
+  test("stop before start") {
     ssc = new StreamingContext(master, appName, batchDuration)
     addInputStream(ssc).register()
     ssc.stop()  // stop before start should not throw exception
-    ssc.start()
+  }
+
+  test("start after stop") {
+    // Regression test for SPARK-4301
+    ssc = new StreamingContext(master, appName, batchDuration)
+    addInputStream(ssc).register()
     ssc.stop()
     intercept[SparkException] {
       ssc.start() // start after stop should throw exception
@@ -157,6 +162,18 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     ssc.stop()
   }
 
+  test("stop(stopSparkContext=true) after stop(stopSparkContext=false)") {
+    ssc = new StreamingContext(master, appName, batchDuration)
+    addInputStream(ssc).register()
+    ssc.stop(stopSparkContext = false)
+    assert(ssc.sc.makeRDD(1 to 100).collect().size === 100)
+    ssc.stop(stopSparkContext = true)
+    // Check that the SparkContext is actually stopped:
+    intercept[Exception] {
+      ssc.sc.makeRDD(1 to 100).collect()
+    }
+  }
+
   test("stop gracefully") {
     val conf = new SparkConf().setMaster(master).setAppName(appName)
     conf.set("spark.cleaner.ttl", "3600")
@@ -188,6 +205,32 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     }
   }
 
+  test("stop slow receiver gracefully") {
+    val conf = new SparkConf().setMaster(master).setAppName(appName)
+    conf.set("spark.streaming.gracefulStopTimeout", "20000")
+    sc = new SparkContext(conf)
+    logInfo("==================================\n\n\n")
+    ssc = new StreamingContext(sc, Milliseconds(100))
+    var runningCount = 0
+    SlowTestReceiver.receivedAllRecords = false
+    //Create test receiver that sleeps in onStop()
+    val totalNumRecords = 15
+    val recordsPerSecond = 1
+    val input = ssc.receiverStream(new SlowTestReceiver(totalNumRecords, recordsPerSecond))
+    input.count().foreachRDD { rdd =>
+      val count = rdd.first()
+      runningCount += count.toInt
+      logInfo("Count = " + count + ", Running count = " + runningCount)
+    }
+    ssc.start()
+    ssc.awaitTermination(500)
+    ssc.stop(stopSparkContext = false, stopGracefully = true)
+    logInfo("Running count = " + runningCount)
+    assert(runningCount > 0)
+    assert(runningCount == totalNumRecords)
+    Thread.sleep(100)
+  }
+
   test("awaitTermination") {
     ssc = new StreamingContext(master, appName, batchDuration)
     val inputStream = addInputStream(ssc)
@@ -261,6 +304,30 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     assert(exception.getMessage.contains("transform"), "Expected exception not thrown")
   }
 
+  test("awaitTerminationOrTimeout") {
+    ssc = new StreamingContext(master, appName, batchDuration)
+    val inputStream = addInputStream(ssc)
+    inputStream.map(x => x).register()
+
+    ssc.start()
+
+    // test whether awaitTerminationOrTimeout() return false after give amount of time
+    failAfter(1000 millis) {
+      assert(ssc.awaitTerminationOrTimeout(500) === false)
+    }
+
+    // test whether awaitTerminationOrTimeout() return true if context is stopped
+    failAfter(10000 millis) { // 10 seconds because spark takes a long time to shutdown
+      new Thread() {
+        override def run() {
+          Thread.sleep(500)
+          ssc.stop()
+        }
+      }.start()
+      assert(ssc.awaitTerminationOrTimeout(10000) === true)
+    }
+  }
+
   test("DStream and generated RDD creation sites") {
     testPackage.test()
   }
@@ -302,6 +369,38 @@ object TestReceiver {
   val counter = new AtomicInteger(1)
 }
 
+/** Custom receiver for testing whether a slow receiver can be shutdown gracefully or not */
+class SlowTestReceiver(totalRecords: Int, recordsPerSecond: Int) extends Receiver[Int](StorageLevel.MEMORY_ONLY) with Logging {
+
+  var receivingThreadOption: Option[Thread] = None
+
+  def onStart() {
+    val thread = new Thread() {
+      override def run() {
+        logInfo("Receiving started")
+        for(i <- 1 to totalRecords) {
+          Thread.sleep(1000 / recordsPerSecond)
+          store(i)
+        }
+        SlowTestReceiver.receivedAllRecords = true
+        logInfo(s"Received all $totalRecords records")
+      }
+    }
+    receivingThreadOption = Some(thread)
+    thread.start()
+  }
+
+  def onStop() {
+    // Simulate slow receiver by waiting for all records to be produced
+    while(!SlowTestReceiver.receivedAllRecords) Thread.sleep(100)
+    // no cleanup to be done, the receiving thread should stop on it own
+  }
+}
+
+object SlowTestReceiver {
+  var receivedAllRecords = false
+}
+
 /** Streaming application for testing DStream and RDD creation sites */
 package object testPackage extends Assertions {
   def test() {
@@ -319,16 +418,20 @@ package object testPackage extends Assertions {
 
       // Verify creation site of generated RDDs
       var rddGenerated = false
-      var rddCreationSiteCorrect = true
+      var rddCreationSiteCorrect = false
+      var foreachCallSiteCorrect = false
 
       inputStream.foreachRDD { rdd =>
         rddCreationSiteCorrect = rdd.creationSite == creationSite
+        foreachCallSiteCorrect =
+          rdd.sparkContext.getCallSite().shortForm.contains("StreamingContextSuite")
         rddGenerated = true
       }
       ssc.start()
 
       eventually(timeout(10000 millis), interval(10 millis)) {
         assert(rddGenerated && rddCreationSiteCorrect, "RDD creation site was not correct")
+        assert(rddGenerated && foreachCallSiteCorrect, "Call site in foreachRDD was not correct")
       }
     } finally {
       ssc.stop()
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
index 84fed95a75e67..f52562b0a0f73 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
@@ -73,8 +73,8 @@ class StreamingListenerSuite extends TestSuiteBase with Matchers {
 
     ssc.start()
     try {
-      eventually(timeout(1000 millis), interval(20 millis)) {
-        collector.startedReceiverStreamIds.size should be >= 1
+      eventually(timeout(2000 millis), interval(20 millis)) {
+        collector.startedReceiverStreamIds.size should equal (1)
         collector.startedReceiverStreamIds(0) should equal (0)
         collector.stoppedReceiverStreamIds should have size 1
         collector.stoppedReceiverStreamIds(0) should equal (0)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index 9327ff4822699..7d82c3e4aadcf 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -21,11 +21,16 @@ import java.io.{ObjectInputStream, IOException}
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.SynchronizedBuffer
+import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
 import org.scalatest.{BeforeAndAfter, FunSuite}
+import org.scalatest.time.{Span, Seconds => ScalaTestSeconds}
+import org.scalatest.concurrent.Eventually.timeout
+import org.scalatest.concurrent.PatienceConfiguration
 
 import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream}
+import org.apache.spark.streaming.scheduler.{StreamingListenerBatchStarted, StreamingListenerBatchCompleted, StreamingListener}
 import org.apache.spark.streaming.util.ManualClock
 import org.apache.spark.{SparkConf, Logging}
 import org.apache.spark.rdd.RDD
@@ -73,7 +78,7 @@ class TestOutputStream[T: ClassTag](parent: DStream[T],
 
   // This is to clear the output buffer every it is read from a checkpoint
   @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream) {
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
     ois.defaultReadObject()
     output.clear()
   }
@@ -95,7 +100,7 @@ class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
 
   // This is to clear the output buffer every it is read from a checkpoint
   @throws(classOf[IOException])
-  private def readObject(ois: ObjectInputStream) {
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
     ois.defaultReadObject()
     output.clear()
   }
@@ -103,6 +108,40 @@ class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
   def toTestOutputStream = new TestOutputStream[T](this.parent, this.output.map(_.flatten))
 }
 
+/**
+ * An object that counts the number of started / completed batches. This is implemented using a
+ * StreamingListener. Constructing a new instance automatically registers a StreamingListener on
+ * the given StreamingContext.
+ */
+class BatchCounter(ssc: StreamingContext) {
+
+  // All access to this state should be guarded by `BatchCounter.this.synchronized`
+  private var numCompletedBatches = 0
+  private var numStartedBatches = 0
+
+  private val listener = new StreamingListener {
+    override def onBatchStarted(batchStarted: StreamingListenerBatchStarted): Unit =
+      BatchCounter.this.synchronized {
+        numStartedBatches += 1
+        BatchCounter.this.notifyAll()
+      }
+    override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted): Unit =
+      BatchCounter.this.synchronized {
+        numCompletedBatches += 1
+        BatchCounter.this.notifyAll()
+      }
+  }
+  ssc.addStreamingListener(listener)
+
+  def getNumCompletedBatches: Int = this.synchronized {
+    numCompletedBatches
+  }
+
+  def getNumStartedBatches: Int = this.synchronized {
+    numStartedBatches
+  }
+}
+
 /**
  * This is the base trait for Spark Streaming testsuites. This provides basic functionality
  * to run user-defined set of input on user-defined stream operations, and verify the output.
@@ -142,6 +181,9 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
     .setMaster(master)
     .setAppName(framework)
 
+  // Timeout for use in ScalaTest `eventually` blocks
+  val eventuallyTimeout: PatienceConfiguration.Timeout = timeout(Span(10, ScalaTestSeconds))
+
   // Default before function for any streaming test suite. Override this
   // if you want to add your stuff to "before" (i.e., don't call before { } )
   def beforeFunction() {
@@ -163,6 +205,40 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
   before(beforeFunction)
   after(afterFunction)
 
+  /**
+   * Run a block of code with the given StreamingContext and automatically
+   * stop the context when the block completes or when an exception is thrown.
+   */
+  def withStreamingContext[R](ssc: StreamingContext)(block: StreamingContext => R): R = {
+    try {
+      block(ssc)
+    } finally {
+      try {
+        ssc.stop(stopSparkContext = true)
+      } catch {
+        case e: Exception =>
+          logError("Error stopping StreamingContext", e)
+      }
+    }
+  }
+
+  /**
+   * Run a block of code with the given TestServer and automatically
+   * stop the server when the block completes or when an exception is thrown.
+   */
+  def withTestServer[R](testServer: TestServer)(block: TestServer => R): R = {
+    try {
+      block(testServer)
+    } finally {
+      try {
+        testServer.stop()
+      } catch {
+        case e: Exception =>
+          logError("Error stopping TestServer", e)
+      }
+    }
+  }
+
   /**
    * Set up required DStreams to test the DStream operation using the two sequences
    * of input collections.
@@ -257,7 +333,7 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
 
       // Advance manual clock
       val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-      logInfo("Manual clock before advancing = " + clock.time)
+      logInfo("Manual clock before advancing = " + clock.currentTime())
       if (actuallyWait) {
         for (i <- 1 to numBatches) {
           logInfo("Actually waiting for " + batchDuration)
@@ -267,7 +343,7 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
       } else {
         clock.addToTime(numBatches * batchDuration.milliseconds)
       }
-      logInfo("Manual clock after advancing = " + clock.time)
+      logInfo("Manual clock after advancing = " + clock.currentTime())
 
       // Wait until expected number of output items have been generated
       val startTime = System.currentTimeMillis()
@@ -282,10 +358,8 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
       assert(output.size === numExpectedOutput, "Unexpected number of outputs generated")
 
       Thread.sleep(100) // Give some time for the forgetting old RDDs to complete
-    } catch {
-      case e: Exception => {e.printStackTrace(); throw e}
     } finally {
-      ssc.stop()
+      ssc.stop(stopSparkContext = true)
     }
     output
   }
@@ -351,9 +425,10 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
       useSet: Boolean
     ) {
     val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
-    val ssc = setupStreams[U, V](input, operation)
-    val output = runStreams[V](ssc, numBatches_, expectedOutput.size)
-    verifyOutput[V](output, expectedOutput, useSet)
+    withStreamingContext(setupStreams[U, V](input, operation)) { ssc =>
+      val output = runStreams[V](ssc, numBatches_, expectedOutput.size)
+      verifyOutput[V](output, expectedOutput, useSet)
+    }
   }
 
   /**
@@ -389,8 +464,9 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
       useSet: Boolean
     ) {
     val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
-    val ssc = setupStreams[U, V, W](input1, input2, operation)
-    val output = runStreams[W](ssc, numBatches_, expectedOutput.size)
-    verifyOutput[W](output, expectedOutput, useSet)
+    withStreamingContext(setupStreams[U, V, W](input1, input2, operation)) { ssc =>
+      val output = runStreams[W](ssc, numBatches_, expectedOutput.size)
+      verifyOutput[W](output, expectedOutput, useSet)
+    }
   }
 }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala
index 471c99fab4682..a5d2bb2fde16c 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.streaming
 
-import org.apache.spark.streaming.StreamingContext._
 import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.storage.StorageLevel
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
new file mode 100644
index 0000000000000..7a6a2f3e577dd
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.rdd
+
+import java.io.File
+
+import scala.util.Random
+
+import org.apache.hadoop.conf.Configuration
+import org.scalatest.{BeforeAndAfterEach, BeforeAndAfterAll, FunSuite}
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.storage.{BlockId, BlockManager, StorageLevel, StreamBlockId}
+import org.apache.spark.streaming.util.{WriteAheadLogFileSegment, WriteAheadLogWriter}
+import org.apache.spark.util.Utils
+
+class WriteAheadLogBackedBlockRDDSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAfterEach {
+  val conf = new SparkConf()
+    .setMaster("local[2]")
+    .setAppName(this.getClass.getSimpleName)
+  val hadoopConf = new Configuration()
+
+  var sparkContext: SparkContext = null
+  var blockManager: BlockManager = null
+  var dir: File = null
+
+  override def beforeEach(): Unit = {
+    dir = Utils.createTempDir()
+  }
+
+  override def afterEach(): Unit = {
+    Utils.deleteRecursively(dir)
+  }
+
+  override def beforeAll(): Unit = {
+    sparkContext = new SparkContext(conf)
+    blockManager = sparkContext.env.blockManager
+  }
+
+  override def afterAll(): Unit = {
+    // Copied from LocalSparkContext, simpler than to introduced test dependencies to core tests.
+    sparkContext.stop()
+    System.clearProperty("spark.driver.port")
+  }
+
+  test("Read data available in block manager and write ahead log") {
+    testRDD(5, 5)
+  }
+
+  test("Read data available only in block manager, not in write ahead log") {
+    testRDD(5, 0)
+  }
+
+  test("Read data available only in write ahead log, not in block manager") {
+    testRDD(0, 5)
+  }
+
+  test("Read data available only in write ahead log, and test storing in block manager") {
+    testRDD(0, 5, testStoreInBM = true)
+  }
+
+  test("Read data with partially available in block manager, and rest in write ahead log") {
+    testRDD(3, 2)
+  }
+
+  /**
+   * Test the WriteAheadLogBackedRDD, by writing some partitions of the data to block manager
+   * and the rest to a write ahead log, and then reading reading it all back using the RDD.
+   * It can also test if the partitions that were read from the log were again stored in
+   * block manager.
+   * @param numPartitionsInBM Number of partitions to write to the Block Manager
+   * @param numPartitionsInWAL Number of partitions to write to the Write Ahead Log
+   * @param testStoreInBM Test whether blocks read from log are stored back into block manager
+   */
+  private def testRDD(numPartitionsInBM: Int, numPartitionsInWAL: Int, testStoreInBM: Boolean = false) {
+    val numBlocks = numPartitionsInBM + numPartitionsInWAL
+    val data = Seq.fill(numBlocks, 10)(scala.util.Random.nextString(50))
+
+    // Put the necessary blocks in the block manager
+    val blockIds = Array.fill(numBlocks)(StreamBlockId(Random.nextInt(), Random.nextInt()))
+    data.zip(blockIds).take(numPartitionsInBM).foreach { case(block, blockId) =>
+      blockManager.putIterator(blockId, block.iterator, StorageLevel.MEMORY_ONLY_SER)
+    }
+
+    // Generate write ahead log segments
+    val segments = generateFakeSegments(numPartitionsInBM) ++
+      writeLogSegments(data.takeRight(numPartitionsInWAL), blockIds.takeRight(numPartitionsInWAL))
+
+    // Make sure that the left `numPartitionsInBM` blocks are in block manager, and others are not
+    require(
+      blockIds.take(numPartitionsInBM).forall(blockManager.get(_).nonEmpty),
+      "Expected blocks not in BlockManager"
+    )
+    require(
+      blockIds.takeRight(numPartitionsInWAL).forall(blockManager.get(_).isEmpty),
+      "Unexpected blocks in BlockManager"
+    )
+
+    // Make sure that the right `numPartitionsInWAL` blocks are in write ahead logs, and other are not
+    require(
+      segments.takeRight(numPartitionsInWAL).forall(s =>
+        new File(s.path.stripPrefix("file://")).exists()),
+      "Expected blocks not in write ahead log"
+    )
+    require(
+      segments.take(numPartitionsInBM).forall(s =>
+        !new File(s.path.stripPrefix("file://")).exists()),
+      "Unexpected blocks in write ahead log"
+    )
+
+    // Create the RDD and verify whether the returned data is correct
+    val rdd = new WriteAheadLogBackedBlockRDD[String](sparkContext, blockIds.toArray,
+      segments.toArray, storeInBlockManager = false, StorageLevel.MEMORY_ONLY)
+    assert(rdd.collect() === data.flatten)
+
+    if (testStoreInBM) {
+      val rdd2 = new WriteAheadLogBackedBlockRDD[String](sparkContext, blockIds.toArray,
+        segments.toArray, storeInBlockManager = true, StorageLevel.MEMORY_ONLY)
+      assert(rdd2.collect() === data.flatten)
+      assert(
+        blockIds.forall(blockManager.get(_).nonEmpty),
+        "All blocks not found in block manager"
+      )
+    }
+  }
+
+  private def writeLogSegments(
+      blockData: Seq[Seq[String]],
+      blockIds: Seq[BlockId]
+    ): Seq[WriteAheadLogFileSegment] = {
+    require(blockData.size === blockIds.size)
+    val writer = new WriteAheadLogWriter(new File(dir, "logFile").toString, hadoopConf)
+    val segments = blockData.zip(blockIds).map { case (data, id) =>
+      writer.write(blockManager.dataSerialize(id, data.iterator))
+    }
+    writer.close()
+    segments
+  }
+
+  private def generateFakeSegments(count: Int): Seq[WriteAheadLogFileSegment] = {
+    Array.fill(count)(new WriteAheadLogFileSegment("random", 0l, 0))
+  }
+}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
new file mode 100644
index 0000000000000..7ce9499dc614d
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
@@ -0,0 +1,376 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.util
+
+import java.io._
+import java.nio.ByteBuffer
+
+import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.duration._
+import scala.language.{implicitConversions, postfixOps}
+
+import WriteAheadLogSuite._
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.spark.util.Utils
+import org.scalatest.{BeforeAndAfter, FunSuite}
+import org.scalatest.concurrent.Eventually._
+
+class WriteAheadLogSuite extends FunSuite with BeforeAndAfter {
+
+  val hadoopConf = new Configuration()
+  var tempDir: File = null
+  var testDir: String = null
+  var testFile: String = null
+  var manager: WriteAheadLogManager = null
+
+  before {
+    tempDir = Utils.createTempDir()
+    testDir = tempDir.toString
+    testFile = new File(tempDir, "testFile").toString
+    if (manager != null) {
+      manager.stop()
+      manager = null
+    }
+  }
+
+  after {
+    Utils.deleteRecursively(tempDir)
+  }
+
+  test("WriteAheadLogWriter - writing data") {
+    val dataToWrite = generateRandomData()
+    val segments = writeDataUsingWriter(testFile, dataToWrite)
+    val writtenData = readDataManually(segments)
+    assert(writtenData === dataToWrite)
+  }
+
+  test("WriteAheadLogWriter - syncing of data by writing and reading immediately") {
+    val dataToWrite = generateRandomData()
+    val writer = new WriteAheadLogWriter(testFile, hadoopConf)
+    dataToWrite.foreach { data =>
+      val segment = writer.write(stringToByteBuffer(data))
+      val dataRead = readDataManually(Seq(segment)).head
+      assert(data === dataRead)
+    }
+    writer.close()
+  }
+
+  test("WriteAheadLogReader - sequentially reading data") {
+    val writtenData = generateRandomData()
+    writeDataManually(writtenData, testFile)
+    val reader = new WriteAheadLogReader(testFile, hadoopConf)
+    val readData = reader.toSeq.map(byteBufferToString)
+    assert(readData === writtenData)
+    assert(reader.hasNext === false)
+    intercept[Exception] {
+      reader.next()
+    }
+    reader.close()
+  }
+
+  test("WriteAheadLogReader - sequentially reading data written with writer") {
+    val dataToWrite = generateRandomData()
+    writeDataUsingWriter(testFile, dataToWrite)
+    val readData = readDataUsingReader(testFile)
+    assert(readData === dataToWrite)
+  }
+
+  test("WriteAheadLogReader - reading data written with writer after corrupted write") {
+    // Write data manually for testing the sequential reader
+    val dataToWrite = generateRandomData()
+    writeDataUsingWriter(testFile, dataToWrite)
+    val fileLength = new File(testFile).length()
+
+    // Append some garbage data to get the effect of a corrupted write
+    val fw = new FileWriter(testFile, true)
+    fw.append("This line appended to file!")
+    fw.close()
+
+    // Verify the data can be read and is same as the one correctly written
+    assert(readDataUsingReader(testFile) === dataToWrite)
+
+    // Corrupt the last correctly written file
+    val raf = new FileOutputStream(testFile, true).getChannel()
+    raf.truncate(fileLength - 1)
+    raf.close()
+
+    // Verify all the data except the last can be read
+    assert(readDataUsingReader(testFile) === (dataToWrite.dropRight(1)))
+  }
+
+  test("WriteAheadLogRandomReader - reading data using random reader") {
+    // Write data manually for testing the random reader
+    val writtenData = generateRandomData()
+    val segments = writeDataManually(writtenData, testFile)
+
+    // Get a random order of these segments and read them back
+    val writtenDataAndSegments = writtenData.zip(segments).toSeq.permutations.take(10).flatten
+    val reader = new WriteAheadLogRandomReader(testFile, hadoopConf)
+    writtenDataAndSegments.foreach { case (data, segment) =>
+      assert(data === byteBufferToString(reader.read(segment)))
+    }
+    reader.close()
+  }
+
+  test("WriteAheadLogRandomReader - reading data using random reader written with writer") {
+    // Write data using writer for testing the random reader
+    val data = generateRandomData()
+    val segments = writeDataUsingWriter(testFile, data)
+
+    // Read a random sequence of segments and verify read data
+    val dataAndSegments = data.zip(segments).toSeq.permutations.take(10).flatten
+    val reader = new WriteAheadLogRandomReader(testFile, hadoopConf)
+    dataAndSegments.foreach { case (data, segment) =>
+      assert(data === byteBufferToString(reader.read(segment)))
+    }
+    reader.close()
+  }
+
+  test("WriteAheadLogManager - write rotating logs") {
+    // Write data using manager
+    val dataToWrite = generateRandomData()
+    writeDataUsingManager(testDir, dataToWrite)
+
+    // Read data manually to verify the written data
+    val logFiles = getLogFilesInDirectory(testDir)
+    assert(logFiles.size > 1)
+    val writtenData = logFiles.flatMap { file => readDataManually(file)}
+    assert(writtenData === dataToWrite)
+  }
+
+  test("WriteAheadLogManager - read rotating logs") {
+    // Write data manually for testing reading through manager
+    val writtenData = (1 to 10).map { i =>
+      val data = generateRandomData()
+      val file = testDir + s"/log-$i-$i"
+      writeDataManually(data, file)
+      data
+    }.flatten
+
+    val logDirectoryPath = new Path(testDir)
+    val fileSystem = HdfsUtils.getFileSystemForPath(logDirectoryPath, hadoopConf)
+    assert(fileSystem.exists(logDirectoryPath) === true)
+
+    // Read data using manager and verify
+    val readData = readDataUsingManager(testDir)
+    assert(readData === writtenData)
+  }
+
+  test("WriteAheadLogManager - recover past logs when creating new manager") {
+    // Write data with manager, recover with new manager and verify
+    val dataToWrite = generateRandomData()
+    writeDataUsingManager(testDir, dataToWrite)
+    val logFiles = getLogFilesInDirectory(testDir)
+    assert(logFiles.size > 1)
+    val readData = readDataUsingManager(testDir)
+    assert(dataToWrite === readData)
+  }
+
+  test("WriteAheadLogManager - cleanup old logs") {
+    logCleanUpTest(waitForCompletion = false)
+  }
+
+  test("WriteAheadLogManager - cleanup old logs synchronously") {
+    logCleanUpTest(waitForCompletion = true)
+  }
+
+  private def logCleanUpTest(waitForCompletion: Boolean): Unit = {
+    // Write data with manager, recover with new manager and verify
+    val manualClock = new ManualClock
+    val dataToWrite = generateRandomData()
+    manager = writeDataUsingManager(testDir, dataToWrite, manualClock, stopManager = false)
+    val logFiles = getLogFilesInDirectory(testDir)
+    assert(logFiles.size > 1)
+
+    manager.cleanupOldLogs(manualClock.currentTime() / 2, waitForCompletion)
+
+    if (waitForCompletion) {
+      assert(getLogFilesInDirectory(testDir).size < logFiles.size)
+    } else {
+      eventually(timeout(1 second), interval(10 milliseconds)) {
+        assert(getLogFilesInDirectory(testDir).size < logFiles.size)
+      }
+    }
+  }
+
+  test("WriteAheadLogManager - handling file errors while reading rotating logs") {
+    // Generate a set of log files
+    val manualClock = new ManualClock
+    val dataToWrite1 = generateRandomData()
+    writeDataUsingManager(testDir, dataToWrite1, manualClock)
+    val logFiles1 = getLogFilesInDirectory(testDir)
+    assert(logFiles1.size > 1)
+
+
+    // Recover old files and generate a second set of log files
+    val dataToWrite2 = generateRandomData()
+    manualClock.addToTime(100000)
+    writeDataUsingManager(testDir, dataToWrite2, manualClock)
+    val logFiles2 = getLogFilesInDirectory(testDir)
+    assert(logFiles2.size > logFiles1.size)
+
+    // Read the files and verify that all the written data can be read
+    val readData1 = readDataUsingManager(testDir)
+    assert(readData1 === (dataToWrite1 ++ dataToWrite2))
+
+    // Corrupt the first set of files so that they are basically unreadable
+    logFiles1.foreach { f =>
+      val raf = new FileOutputStream(f, true).getChannel()
+      raf.truncate(1)
+      raf.close()
+    }
+
+    // Verify that the corrupted files do not prevent reading of the second set of data
+    val readData = readDataUsingManager(testDir)
+    assert(readData === dataToWrite2)
+  }
+}
+
+object WriteAheadLogSuite {
+
+  private val hadoopConf = new Configuration()
+
+  /** Write data to a file directly and return an array of the file segments written. */
+  def writeDataManually(data: Seq[String], file: String): Seq[WriteAheadLogFileSegment] = {
+    val segments = new ArrayBuffer[WriteAheadLogFileSegment]()
+    val writer = HdfsUtils.getOutputStream(file, hadoopConf)
+    data.foreach { item =>
+      val offset = writer.getPos
+      val bytes = Utils.serialize(item)
+      writer.writeInt(bytes.size)
+      writer.write(bytes)
+      segments += WriteAheadLogFileSegment(file, offset, bytes.size)
+    }
+    writer.close()
+    segments
+  }
+
+  /**
+   * Write data to a file using the writer class and return an array of the file segments written.
+   */
+  def writeDataUsingWriter(filePath: String, data: Seq[String]): Seq[WriteAheadLogFileSegment] = {
+    val writer = new WriteAheadLogWriter(filePath, hadoopConf)
+    val segments = data.map {
+      item => writer.write(item)
+    }
+    writer.close()
+    segments
+  }
+
+  /** Write data to rotating files in log directory using the manager class. */
+  def writeDataUsingManager(
+      logDirectory: String,
+      data: Seq[String],
+      manualClock: ManualClock = new ManualClock,
+      stopManager: Boolean = true
+    ): WriteAheadLogManager = {
+    if (manualClock.currentTime < 100000) manualClock.setTime(10000)
+    val manager = new WriteAheadLogManager(logDirectory, hadoopConf,
+      rollingIntervalSecs = 1, callerName = "WriteAheadLogSuite", clock = manualClock)
+    // Ensure that 500 does not get sorted after 2000, so put a high base value.
+    data.foreach { item =>
+      manualClock.addToTime(500)
+      manager.writeToLog(item)
+    }
+    if (stopManager) manager.stop()
+    manager
+  }
+
+  /** Read data from a segments of a log file directly and return the list of byte buffers.*/
+  def readDataManually(segments: Seq[WriteAheadLogFileSegment]): Seq[String] = {
+    segments.map { segment =>
+      val reader = HdfsUtils.getInputStream(segment.path, hadoopConf)
+      try {
+        reader.seek(segment.offset)
+        val bytes = new Array[Byte](segment.length)
+        reader.readInt()
+        reader.readFully(bytes)
+        val data = Utils.deserialize[String](bytes)
+        reader.close()
+        data
+      } finally {
+        reader.close()
+      }
+    }
+  }
+
+  /** Read all the data from a log file directly and return the list of byte buffers. */
+  def readDataManually(file: String): Seq[String] = {
+    val reader = HdfsUtils.getInputStream(file, hadoopConf)
+    val buffer = new ArrayBuffer[String]
+    try {
+      while (true) {
+        // Read till EOF is thrown
+        val length = reader.readInt()
+        val bytes = new Array[Byte](length)
+        reader.read(bytes)
+        buffer += Utils.deserialize[String](bytes)
+      }
+    } catch {
+      case ex: EOFException =>
+    } finally {
+      reader.close()
+    }
+    buffer
+  }
+
+  /** Read all the data from a log file using reader class and return the list of byte buffers. */
+  def readDataUsingReader(file: String): Seq[String] = {
+    val reader = new WriteAheadLogReader(file, hadoopConf)
+    val readData = reader.toList.map(byteBufferToString)
+    reader.close()
+    readData
+  }
+
+  /** Read all the data in the log file in a directory using the manager class. */
+  def readDataUsingManager(logDirectory: String): Seq[String] = {
+    val manager = new WriteAheadLogManager(logDirectory, hadoopConf,
+      callerName = "WriteAheadLogSuite")
+    val data = manager.readFromLog().map(byteBufferToString).toSeq
+    manager.stop()
+    data
+  }
+
+  /** Get the log files in a direction */
+  def getLogFilesInDirectory(directory: String): Seq[String] = {
+    val logDirectoryPath = new Path(directory)
+    val fileSystem = HdfsUtils.getFileSystemForPath(logDirectoryPath, hadoopConf)
+
+    if (fileSystem.exists(logDirectoryPath) && fileSystem.getFileStatus(logDirectoryPath).isDir) {
+      fileSystem.listStatus(logDirectoryPath).map { _.getPath() }.sortBy {
+        _.getName().split("-")(1).toLong
+      }.map {
+        _.toString.stripPrefix("file:")
+      }
+    } else {
+      Seq.empty
+    }
+  }
+
+  def generateRandomData(): Seq[String] = {
+    (1 to 100).map { _.toString }
+  }
+
+  implicit def stringToByteBuffer(str: String): ByteBuffer = {
+    ByteBuffer.wrap(Utils.serialize(str))
+  }
+
+  implicit def byteBufferToString(byteBuffer: ByteBuffer): String = {
+    Utils.deserialize[String](byteBuffer.array)
+  }
+}
diff --git a/streaming/src/test/scala/org/apache/spark/streamingtest/ImplicitSuite.scala b/streaming/src/test/scala/org/apache/spark/streamingtest/ImplicitSuite.scala
new file mode 100644
index 0000000000000..d0bf328f2b74d
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streamingtest/ImplicitSuite.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streamingtest
+
+/**
+ * A test suite to make sure all `implicit` functions work correctly.
+ *
+ * As `implicit` is a compiler feature, we don't need to run this class.
+ * What we need to do is making the compiler happy.
+ */
+class ImplicitSuite {
+
+  // We only want to test if `implict` works well with the compiler, so we don't need a real DStream.
+  def mockDStream[T]: org.apache.spark.streaming.dstream.DStream[T] = null
+
+  def testToPairDStreamFunctions(): Unit = {
+    val dstream: org.apache.spark.streaming.dstream.DStream[(Int, Int)] = mockDStream
+    dstream.groupByKey()
+  }
+}
diff --git a/tools/pom.xml b/tools/pom.xml
index b90eb0ca250c5..e7419ed2c607a 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -52,11 +52,6 @@
       <groupId>org.scala-lang</groupId>
       <artifactId>scala-compiler</artifactId>
     </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
   </dependencies>
 
   <build>
@@ -85,10 +80,6 @@
         <groupId>org.codehaus.mojo</groupId>
         <artifactId>build-helper-maven-plugin</artifactId>
       </plugin>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-      </plugin>
     </plugins>
   </build>
 </project>
diff --git a/tools/src/main/scala/org/apache/spark/tools/StoragePerfTester.scala b/tools/src/main/scala/org/apache/spark/tools/StoragePerfTester.scala
index db58eb642b56d..15ee95070a3d3 100644
--- a/tools/src/main/scala/org/apache/spark/tools/StoragePerfTester.scala
+++ b/tools/src/main/scala/org/apache/spark/tools/StoragePerfTester.scala
@@ -21,7 +21,7 @@ import java.util.concurrent.{CountDownLatch, Executors}
 import java.util.concurrent.atomic.AtomicLong
 
 import org.apache.spark.executor.ShuffleWriteMetrics
-import org.apache.spark.SparkContext
+import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.shuffle.hash.HashShuffleManager
 import org.apache.spark.util.Utils
@@ -49,13 +49,13 @@ object StoragePerfTester {
     val writeData = "1" * recordLength
     val executor = Executors.newFixedThreadPool(numMaps)
 
-    System.setProperty("spark.shuffle.compress", "false")
-    System.setProperty("spark.shuffle.sync", "true")
-    System.setProperty("spark.shuffle.manager",
-      "org.apache.spark.shuffle.hash.HashShuffleManager")
+    val conf = new SparkConf()
+      .set("spark.shuffle.compress", "false")
+      .set("spark.shuffle.sync", "true")
+      .set("spark.shuffle.manager", "org.apache.spark.shuffle.hash.HashShuffleManager")
 
     // This is only used to instantiate a BlockManager. All thread scheduling is done manually.
-    val sc = new SparkContext("local[4]", "Write Tester")
+    val sc = new SparkContext("local[4]", "Write Tester", conf)
     val hashShuffleManager = sc.env.shuffleManager.asInstanceOf[HashShuffleManager]
 
     def writeOutputBytes(mapId: Int, total: AtomicLong) = {
diff --git a/yarn/README.md b/yarn/README.md
deleted file mode 100644
index 65ee85447e04a..0000000000000
--- a/yarn/README.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# YARN DIRECTORY LAYOUT
-
-Hadoop Yarn related codes are organized in separate directories to minimize duplicated code.
-
- * common : Common codes that do not depending on specific version of Hadoop.
-
- * alpha / stable : Codes that involve specific version of Hadoop YARN API.
-
-  alpha represents 0.23 and 2.0.x
-  stable represents 2.2 and later, until the API changes again.
-
-alpha / stable will build together with common dir into a single jar
diff --git a/yarn/alpha/pom.xml b/yarn/alpha/pom.xml
deleted file mode 100644
index 7dadbba58fd82..0000000000000
--- a/yarn/alpha/pom.xml
+++ /dev/null
@@ -1,35 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>yarn-parent_2.10</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-  <properties>
-    <sbt.project.name>yarn-alpha</sbt.project.name>
-  </properties>
-
-  <groupId>org.apache.spark</groupId>
-  <artifactId>spark-yarn-alpha_2.10</artifactId>
-  <packaging>jar</packaging>
-  <name>Spark Project YARN Alpha API</name>
-
-</project>
diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
deleted file mode 100644
index 9c66c785848a5..0000000000000
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.nio.ByteBuffer
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.io.DataOutputBuffer
-import org.apache.hadoop.security.UserGroupInformation
-import org.apache.hadoop.yarn.api._
-import org.apache.hadoop.yarn.api.protocolrecords._
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.client.YarnClientImpl
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.util.Records
-
-import org.apache.spark.{Logging, SparkConf}
-import org.apache.spark.deploy.SparkHadoopUtil
-
-/**
- * Version of [[org.apache.spark.deploy.yarn.ClientBase]] tailored to YARN's alpha API.
- */
-private[spark] class Client(
-    val args: ClientArguments,
-    val hadoopConf: Configuration,
-    val sparkConf: SparkConf)
-  extends YarnClientImpl with ClientBase with Logging {
-
-  def this(clientArgs: ClientArguments, spConf: SparkConf) =
-    this(clientArgs, SparkHadoopUtil.get.newConfiguration(spConf), spConf)
-
-  def this(clientArgs: ClientArguments) = this(clientArgs, new SparkConf())
-
-  val yarnConf: YarnConfiguration = new YarnConfiguration(hadoopConf)
-
-  /* ------------------------------------------------------------------------------------- *
-   | The following methods have much in common in the stable and alpha versions of Client, |
-   | but cannot be implemented in the parent trait due to subtle API differences across    |
-   | hadoop versions.                                                                      |
-   * ------------------------------------------------------------------------------------- */
-
-  /** Submit an application running our ApplicationMaster to the ResourceManager. */
-  override def submitApplication(): ApplicationId = {
-    init(yarnConf)
-    start()
-
-    logInfo("Requesting a new application from cluster with %d NodeManagers"
-      .format(getYarnClusterMetrics.getNumNodeManagers))
-
-    // Get a new application from our RM
-    val newAppResponse = getNewApplication()
-    val appId = newAppResponse.getApplicationId()
-
-    // Verify whether the cluster has enough resources for our AM
-    verifyClusterResources(newAppResponse)
-
-    // Set up the appropriate contexts to launch our AM
-    val containerContext = createContainerLaunchContext(newAppResponse)
-    val appContext = createApplicationSubmissionContext(appId, containerContext)
-
-    // Finally, submit and monitor the application
-    logInfo(s"Submitting application ${appId.getId} to ResourceManager")
-    submitApplication(appContext)
-    appId
-  }
-
-  /**
-   * Set up a context for launching our ApplicationMaster container.
-   * In the Yarn alpha API, the memory requirements of this container must be set in
-   * the ContainerLaunchContext instead of the ApplicationSubmissionContext.
-   */
-  override def createContainerLaunchContext(newAppResponse: GetNewApplicationResponse)
-      : ContainerLaunchContext = {
-    val containerContext = super.createContainerLaunchContext(newAppResponse)
-    val capability = Records.newRecord(classOf[Resource])
-    capability.setMemory(args.amMemory + amMemoryOverhead)
-    containerContext.setResource(capability)
-    containerContext
-  }
-
-  /** Set up the context for submitting our ApplicationMaster. */
-  def createApplicationSubmissionContext(
-      appId: ApplicationId,
-      containerContext: ContainerLaunchContext): ApplicationSubmissionContext = {
-    val appContext = Records.newRecord(classOf[ApplicationSubmissionContext])
-    appContext.setApplicationId(appId)
-    appContext.setApplicationName(args.appName)
-    appContext.setQueue(args.amQueue)
-    appContext.setAMContainerSpec(containerContext)
-    appContext.setUser(UserGroupInformation.getCurrentUser.getShortUserName)
-    appContext
-  }
-
-  /**
-   * Set up security tokens for launching our ApplicationMaster container.
-   * ContainerLaunchContext#setContainerTokens is renamed `setTokens` in the stable API.
-   */
-  override def setupSecurityToken(amContainer: ContainerLaunchContext): Unit = {
-    val dob = new DataOutputBuffer()
-    credentials.writeTokenStorageToStream(dob)
-    amContainer.setContainerTokens(ByteBuffer.wrap(dob.getData()))
-  }
-
-  /**
-   * Return the security token used by this client to communicate with the ApplicationMaster.
-   * If no security is enabled, the token returned by the report is null.
-   * ApplicationReport#getClientToken is renamed `getClientToAMToken` in the stable API.
-   */
-  override def getClientToken(report: ApplicationReport): String =
-    Option(report.getClientToken).map(_.toString).getOrElse("")
-}
-
-object Client {
-  def main(argStrings: Array[String]) {
-    if (!sys.props.contains("SPARK_SUBMIT")) {
-      println("WARNING: This client is deprecated and will be removed in a " +
-        "future version of Spark. Use ./bin/spark-submit with \"--master yarn\"")
-    }
-
-    // Set an env variable indicating we are running in YARN mode.
-    // Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes
-    System.setProperty("SPARK_YARN_MODE", "true")
-    val sparkConf = new SparkConf
-
-    val args = new ClientArguments(argStrings, sparkConf)
-    new Client(args, sparkConf).run()
-  }
-}
diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
deleted file mode 100644
index 229b7a09f456b..0000000000000
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.net.URI
-import java.nio.ByteBuffer
-import java.security.PrivilegedExceptionAction
-
-import scala.collection.JavaConversions._
-import scala.collection.mutable.HashMap
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.io.DataOutputBuffer
-import org.apache.hadoop.net.NetUtils
-import org.apache.hadoop.security.UserGroupInformation
-import org.apache.hadoop.yarn.api._
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.api.protocolrecords._
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.ipc.YarnRPC
-import org.apache.hadoop.yarn.util.{Apps, ConverterUtils, Records, ProtoUtils}
-
-import org.apache.spark.{SecurityManager, SparkConf, Logging}
-
-
-class ExecutorRunnable(
-    container: Container,
-    conf: Configuration,
-    spConf: SparkConf,
-    masterAddress: String,
-    slaveId: String,
-    hostname: String,
-    executorMemory: Int,
-    executorCores: Int,
-    appAttemptId: String,
-    securityMgr: SecurityManager)
-  extends Runnable with ExecutorRunnableUtil with Logging {
-
-  var rpc: YarnRPC = YarnRPC.create(conf)
-  var cm: ContainerManager = _
-  val sparkConf = spConf
-  val yarnConf: YarnConfiguration = new YarnConfiguration(conf)
-
-  def run = {
-    logInfo("Starting Executor Container")
-    cm = connectToCM
-    startContainer
-  }
-
-  def startContainer = {
-    logInfo("Setting up ContainerLaunchContext")
-
-    val ctx = Records.newRecord(classOf[ContainerLaunchContext])
-      .asInstanceOf[ContainerLaunchContext]
-
-    ctx.setContainerId(container.getId())
-    ctx.setResource(container.getResource())
-    val localResources = prepareLocalResources
-    ctx.setLocalResources(localResources)
-
-    val env = prepareEnvironment
-    ctx.setEnvironment(env)
-
-    ctx.setUser(UserGroupInformation.getCurrentUser().getShortUserName())
-
-    val credentials = UserGroupInformation.getCurrentUser().getCredentials()
-    val dob = new DataOutputBuffer()
-    credentials.writeTokenStorageToStream(dob)
-    ctx.setContainerTokens(ByteBuffer.wrap(dob.getData()))
-
-    val commands = prepareCommand(masterAddress, slaveId, hostname, executorMemory, executorCores,
-      appAttemptId, localResources)
-    logInfo("Setting up executor with commands: " + commands)
-    ctx.setCommands(commands)
-
-    ctx.setApplicationACLs(YarnSparkHadoopUtil.getApplicationAclsForYarn(securityMgr))
-
-    // Send the start request to the ContainerManager
-    val startReq = Records.newRecord(classOf[StartContainerRequest])
-    .asInstanceOf[StartContainerRequest]
-    startReq.setContainerLaunchContext(ctx)
-    cm.startContainer(startReq)
-  }
-
-  def connectToCM: ContainerManager = {
-    val cmHostPortStr = container.getNodeId().getHost() + ":" + container.getNodeId().getPort()
-    val cmAddress = NetUtils.createSocketAddr(cmHostPortStr)
-    logInfo("Connecting to ContainerManager at " + cmHostPortStr)
-
-    // Use doAs and remoteUser here so we can add the container token and not pollute the current
-    // users credentials with all of the individual container tokens
-    val user = UserGroupInformation.createRemoteUser(container.getId().toString())
-    val containerToken = container.getContainerToken()
-    if (containerToken != null) {
-      user.addToken(ProtoUtils.convertFromProtoFormat(containerToken, cmAddress))
-    }
-
-    val proxy = user
-        .doAs(new PrivilegedExceptionAction[ContainerManager] {
-          def run: ContainerManager = {
-            rpc.getProxy(classOf[ContainerManager], cmAddress, conf).asInstanceOf[ContainerManager]
-          }
-        })
-    proxy
-  }
-
-}
diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
deleted file mode 100644
index abd37834ed3cc..0000000000000
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.util.concurrent.CopyOnWriteArrayList
-import java.util.concurrent.atomic.AtomicInteger
-
-import scala.collection.JavaConversions._
-import scala.collection.mutable.{ArrayBuffer, HashMap}
-
-import org.apache.spark.{SecurityManager, SparkConf}
-import org.apache.spark.scheduler.SplitInfo
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.yarn.api.AMRMProtocol
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest
-import org.apache.hadoop.yarn.util.Records
-
-/**
- * Acquires resources for executors from a ResourceManager and launches executors in new containers.
- */
-private[yarn] class YarnAllocationHandler(
-    conf: Configuration,
-    sparkConf: SparkConf,
-    resourceManager: AMRMProtocol,
-    appAttemptId: ApplicationAttemptId,
-    args: ApplicationMasterArguments,
-    preferredNodes: collection.Map[String, collection.Set[SplitInfo]],
-    securityMgr: SecurityManager)
-  extends YarnAllocator(conf, sparkConf, appAttemptId, args, preferredNodes, securityMgr) {
-
-  private val lastResponseId = new AtomicInteger()
-  private val releaseList: CopyOnWriteArrayList[ContainerId] = new CopyOnWriteArrayList()
-
-  override protected def allocateContainers(count: Int, pending: Int): YarnAllocateResponse = {
-    var resourceRequests: List[ResourceRequest] = null
-
-    logDebug("asking for additional executors: " + count + " with already pending: " + pending)
-    val totalNumAsk = count + pending
-    if (count <= 0) {
-      resourceRequests = List()
-    } else if (preferredHostToCount.isEmpty) {
-        logDebug("host preferences is empty")
-        resourceRequests = List(createResourceRequest(
-          AllocationType.ANY, null, totalNumAsk, YarnSparkHadoopUtil.RM_REQUEST_PRIORITY))
-    } else {
-      // request for all hosts in preferred nodes and for numExecutors -
-      // candidates.size, request by default allocation policy.
-      val hostContainerRequests: ArrayBuffer[ResourceRequest] =
-        new ArrayBuffer[ResourceRequest](preferredHostToCount.size)
-      for ((candidateHost, candidateCount) <- preferredHostToCount) {
-        val requiredCount = candidateCount - allocatedContainersOnHost(candidateHost)
-
-        if (requiredCount > 0) {
-          hostContainerRequests += createResourceRequest(
-            AllocationType.HOST,
-            candidateHost,
-            requiredCount,
-            YarnSparkHadoopUtil.RM_REQUEST_PRIORITY)
-        }
-      }
-      val rackContainerRequests: List[ResourceRequest] = createRackResourceRequests(
-        hostContainerRequests.toList)
-
-      val anyContainerRequests: ResourceRequest = createResourceRequest(
-        AllocationType.ANY,
-        resource = null,
-        totalNumAsk,
-        YarnSparkHadoopUtil.RM_REQUEST_PRIORITY)
-
-      val containerRequests: ArrayBuffer[ResourceRequest] = new ArrayBuffer[ResourceRequest](
-        hostContainerRequests.size + rackContainerRequests.size + 1)
-
-      containerRequests ++= hostContainerRequests
-      containerRequests ++= rackContainerRequests
-      containerRequests += anyContainerRequests
-
-      resourceRequests = containerRequests.toList
-    }
-
-    val req = Records.newRecord(classOf[AllocateRequest])
-    req.setResponseId(lastResponseId.incrementAndGet)
-    req.setApplicationAttemptId(appAttemptId)
-
-    req.addAllAsks(resourceRequests)
-
-    val releasedContainerList = createReleasedContainerList()
-    req.addAllReleases(releasedContainerList)
-
-    if (count > 0) {
-      logInfo("Allocating %d executor containers with %d of memory each.".format(totalNumAsk,
-        executorMemory + memoryOverhead))
-    } else {
-      logDebug("Empty allocation req ..  release : " + releasedContainerList)
-    }
-
-    for (request <- resourceRequests) {
-      logInfo("ResourceRequest (host : %s, num containers: %d, priority = %s , capability : %s)".
-        format(
-          request.getHostName,
-          request.getNumContainers,
-          request.getPriority,
-          request.getCapability))
-    }
-    new AlphaAllocateResponse(resourceManager.allocate(req).getAMResponse())
-  }
-
-  override protected def releaseContainer(container: Container) = {
-    releaseList.add(container.getId())
-  }
-
-  private def createRackResourceRequests(hostContainers: List[ResourceRequest]):
-    List[ResourceRequest] = {
-    // First generate modified racks and new set of hosts under it : then issue requests
-    val rackToCounts = new HashMap[String, Int]()
-
-    // Within this lock - used to read/write to the rack related maps too.
-    for (container <- hostContainers) {
-      val candidateHost = container.getHostName
-      val candidateNumContainers = container.getNumContainers
-      assert(YarnSparkHadoopUtil.ANY_HOST != candidateHost)
-
-      val rack = YarnSparkHadoopUtil.lookupRack(conf, candidateHost)
-      if (rack != null) {
-        var count = rackToCounts.getOrElse(rack, 0)
-        count += candidateNumContainers
-        rackToCounts.put(rack, count)
-      }
-    }
-
-    val requestedContainers: ArrayBuffer[ResourceRequest] =
-      new ArrayBuffer[ResourceRequest](rackToCounts.size)
-    for ((rack, count) <- rackToCounts){
-      requestedContainers +=
-        createResourceRequest(AllocationType.RACK, rack, count,
-          YarnSparkHadoopUtil.RM_REQUEST_PRIORITY)
-    }
-
-    requestedContainers.toList
-  }
-
-  private def createResourceRequest(
-    requestType: AllocationType.AllocationType,
-    resource:String,
-    numExecutors: Int,
-    priority: Int): ResourceRequest = {
-
-    // If hostname specified, we need atleast two requests - node local and rack local.
-    // There must be a third request - which is ANY : that will be specially handled.
-    requestType match {
-      case AllocationType.HOST => {
-        assert(YarnSparkHadoopUtil.ANY_HOST != resource)
-        val hostname = resource
-        val nodeLocal = createResourceRequestImpl(hostname, numExecutors, priority)
-
-        // Add to host->rack mapping
-        YarnSparkHadoopUtil.populateRackInfo(conf, hostname)
-
-        nodeLocal
-      }
-      case AllocationType.RACK => {
-        val rack = resource
-        createResourceRequestImpl(rack, numExecutors, priority)
-      }
-      case AllocationType.ANY => createResourceRequestImpl(
-        YarnSparkHadoopUtil.ANY_HOST, numExecutors, priority)
-      case _ => throw new IllegalArgumentException(
-        "Unexpected/unsupported request type: " + requestType)
-    }
-  }
-
-  private def createResourceRequestImpl(
-    hostname:String,
-    numExecutors: Int,
-    priority: Int): ResourceRequest = {
-
-    val rsrcRequest = Records.newRecord(classOf[ResourceRequest])
-    val memCapability = Records.newRecord(classOf[Resource])
-    // There probably is some overhead here, let's reserve a bit more memory.
-    memCapability.setMemory(executorMemory + memoryOverhead)
-    rsrcRequest.setCapability(memCapability)
-
-    val pri = Records.newRecord(classOf[Priority])
-    pri.setPriority(priority)
-    rsrcRequest.setPriority(pri)
-
-    rsrcRequest.setHostName(hostname)
-
-    rsrcRequest.setNumContainers(java.lang.Math.max(numExecutors, 0))
-    rsrcRequest
-  }
-
-  private def createReleasedContainerList(): ArrayBuffer[ContainerId] = {
-    val retval = new ArrayBuffer[ContainerId](1)
-    // Iterator on COW list ...
-    for (container <- releaseList.iterator()){
-      retval += container
-    }
-    // Remove from the original list.
-    if (!retval.isEmpty) {
-      releaseList.removeAll(retval)
-      logInfo("Releasing " + retval.size + " containers.")
-    }
-    retval
-  }
-
-  private class AlphaAllocateResponse(response: AMResponse) extends YarnAllocateResponse {
-    override def getAllocatedContainers() = response.getAllocatedContainers()
-    override def getAvailableResources() = response.getAvailableResources()
-    override def getCompletedContainersStatuses() = response.getCompletedContainersStatuses()
-  }
-
-}
diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala
deleted file mode 100644
index 7faf55bc63372..0000000000000
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import scala.collection.{Map, Set}
-import java.net.URI;
-
-import org.apache.hadoop.net.NetUtils
-import org.apache.hadoop.yarn.api._
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.api.protocolrecords._
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.ipc.YarnRPC
-import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
-
-import org.apache.spark.{Logging, SecurityManager, SparkConf}
-import org.apache.spark.scheduler.SplitInfo
-import org.apache.spark.util.Utils
-
-/**
- * YarnRMClient implementation for the Yarn alpha API.
- */
-private class YarnRMClientImpl(args: ApplicationMasterArguments) extends YarnRMClient with Logging {
-
-  private var rpc: YarnRPC = null
-  private var resourceManager: AMRMProtocol = _
-  private var uiHistoryAddress: String = _
-  private var registered: Boolean = false
-
-  override def register(
-      conf: YarnConfiguration,
-      sparkConf: SparkConf,
-      preferredNodeLocations: Map[String, Set[SplitInfo]],
-      uiAddress: String,
-      uiHistoryAddress: String,
-      securityMgr: SecurityManager) = {
-    this.rpc = YarnRPC.create(conf)
-    this.uiHistoryAddress = uiHistoryAddress
-
-    synchronized {
-      resourceManager = registerWithResourceManager(conf)
-      registerApplicationMaster(uiAddress)
-      registered = true
-    }
-
-    new YarnAllocationHandler(conf, sparkConf, resourceManager, getAttemptId(), args,
-      preferredNodeLocations, securityMgr)
-  }
-
-  override def getAttemptId() = {
-    val envs = System.getenv()
-    val containerIdString = envs.get(ApplicationConstants.AM_CONTAINER_ID_ENV)
-    val containerId = ConverterUtils.toContainerId(containerIdString)
-    val appAttemptId = containerId.getApplicationAttemptId()
-    appAttemptId
-  }
-
-  override def unregister(status: FinalApplicationStatus, diagnostics: String = "") = synchronized {
-    if (registered) {
-      val finishReq = Records.newRecord(classOf[FinishApplicationMasterRequest])
-        .asInstanceOf[FinishApplicationMasterRequest]
-      finishReq.setAppAttemptId(getAttemptId())
-      finishReq.setFinishApplicationStatus(status)
-      finishReq.setDiagnostics(diagnostics)
-      finishReq.setTrackingUrl(uiHistoryAddress)
-      resourceManager.finishApplicationMaster(finishReq)
-    }
-  }
-
-  override def getAmIpFilterParams(conf: YarnConfiguration, proxyBase: String) = {
-    val proxy = YarnConfiguration.getProxyHostAndPort(conf)
-    val parts = proxy.split(":")
-    val uriBase = "http://" + proxy + proxyBase
-    Map("PROXY_HOST" -> parts(0), "PROXY_URI_BASE" -> uriBase)
-  }
-
-  override def getMaxRegAttempts(conf: YarnConfiguration) =
-    conf.getInt(YarnConfiguration.RM_AM_MAX_RETRIES, YarnConfiguration.DEFAULT_RM_AM_MAX_RETRIES)
-
-  private def registerWithResourceManager(conf: YarnConfiguration): AMRMProtocol = {
-    val rmAddress = NetUtils.createSocketAddr(conf.get(YarnConfiguration.RM_SCHEDULER_ADDRESS,
-      YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS))
-    logInfo("Connecting to ResourceManager at " + rmAddress)
-    rpc.getProxy(classOf[AMRMProtocol], rmAddress, conf).asInstanceOf[AMRMProtocol]
-  }
-
-  private def registerApplicationMaster(uiAddress: String): RegisterApplicationMasterResponse = {
-    val appMasterRequest = Records.newRecord(classOf[RegisterApplicationMasterRequest])
-      .asInstanceOf[RegisterApplicationMasterRequest]
-    appMasterRequest.setApplicationAttemptId(getAttemptId())
-    // Setting this to master host,port - so that the ApplicationReport at client has some
-    // sensible info.
-    // Users can then monitor stderr/stdout on that node if required.
-    appMasterRequest.setHost(Utils.localHostName())
-    appMasterRequest.setRpcPort(0)
-    // remove the scheme from the url if it exists since Hadoop does not expect scheme
-    appMasterRequest.setTrackingUrl(new URI(uiAddress).getAuthority())
-    resourceManager.registerApplicationMaster(appMasterRequest)
-  }
-
-}
diff --git a/yarn/common/src/main/resources/log4j-spark-container.properties b/yarn/common/src/main/resources/log4j-spark-container.properties
deleted file mode 100644
index a1e37a0be27dd..0000000000000
--- a/yarn/common/src/main/resources/log4j-spark-container.properties
+++ /dev/null
@@ -1,24 +0,0 @@
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
-
-# Set everything to be logged to the console
-log4j.rootCategory=INFO, console
-log4j.appender.console=org.apache.log4j.ConsoleAppender
-log4j.appender.console.target=System.err
-log4j.appender.console.layout=org.apache.log4j.PatternLayout
-log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
-
-# Settings to quiet third party logs that are too verbose
-log4j.logger.org.eclipse.jetty=WARN
-log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
-log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
deleted file mode 100644
index a3c43b43848d2..0000000000000
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ /dev/null
@@ -1,553 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import scala.util.control.NonFatal
-
-import java.io.IOException
-import java.lang.reflect.InvocationTargetException
-import java.net.Socket
-import java.util.concurrent.atomic.AtomicReference
-
-import akka.actor._
-import akka.remote._
-import org.apache.hadoop.fs.{FileSystem, Path}
-import org.apache.hadoop.util.ShutdownHookManager
-import org.apache.hadoop.yarn.api._
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-
-import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext, SparkEnv}
-import org.apache.spark.SparkException
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.deploy.history.HistoryServer
-import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
-import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.AddWebUIFilter
-import org.apache.spark.util.{AkkaUtils, SignalLogger, Utils}
-
-/**
- * Common application master functionality for Spark on Yarn.
- */
-private[spark] class ApplicationMaster(args: ApplicationMasterArguments,
-  client: YarnRMClient) extends Logging {
-  // TODO: Currently, task to container is computed once (TaskSetManager) - which need not be
-  // optimal as more containers are available. Might need to handle this better.
-
-  private val sparkConf = new SparkConf()
-  private val yarnConf: YarnConfiguration = SparkHadoopUtil.get.newConfiguration(sparkConf)
-    .asInstanceOf[YarnConfiguration]
-  private val isDriver = args.userClass != null
-
-  // Default to numExecutors * 2, with minimum of 3
-  private val maxNumExecutorFailures = sparkConf.getInt("spark.yarn.max.executor.failures",
-    sparkConf.getInt("spark.yarn.max.worker.failures", math.max(args.numExecutors * 2, 3)))
-
-  @volatile private var exitCode = 0
-  @volatile private var unregistered = false
-  @volatile private var finished = false
-  @volatile private var finalStatus = FinalApplicationStatus.UNDEFINED
-  @volatile private var finalMsg: String = ""
-  @volatile private var userClassThread: Thread = _
-
-  private var reporterThread: Thread = _
-  private var allocator: YarnAllocator = _
-
-  // Fields used in client mode.
-  private var actorSystem: ActorSystem = null
-  private var actor: ActorRef = _
-
-  // Fields used in cluster mode.
-  private val sparkContextRef = new AtomicReference[SparkContext](null)
-
-  final def run(): Int = {
-    try {
-      val appAttemptId = client.getAttemptId()
-
-      if (isDriver) {
-        // Set the web ui port to be ephemeral for yarn so we don't conflict with
-        // other spark processes running on the same box
-        System.setProperty("spark.ui.port", "0")
-
-        // Set the master property to match the requested mode.
-        System.setProperty("spark.master", "yarn-cluster")
-
-        // Propagate the application ID so that YarnClusterSchedulerBackend can pick it up.
-        System.setProperty("spark.yarn.app.id", appAttemptId.getApplicationId().toString())
-      }
-
-      logInfo("ApplicationAttemptId: " + appAttemptId)
-
-      val cleanupHook = new Runnable {
-        override def run() {
-          // If the SparkContext is still registered, shut it down as a best case effort in case
-          // users do not call sc.stop or do System.exit().
-          val sc = sparkContextRef.get()
-          if (sc != null) {
-            logInfo("Invoking sc stop from shutdown hook")
-            sc.stop()
-          }
-          val maxAppAttempts = client.getMaxRegAttempts(yarnConf)
-          val isLastAttempt = client.getAttemptId().getAttemptId() >= maxAppAttempts
-
-          if (!finished) {
-            // this shouldn't ever happen, but if it does assume weird failure
-            finish(FinalApplicationStatus.FAILED,
-              ApplicationMaster.EXIT_UNCAUGHT_EXCEPTION,
-              "shutdown hook called without cleanly finishing")
-          }
-
-          if (!unregistered) {
-            // we only want to unregister if we don't want the RM to retry
-            if (finalStatus == FinalApplicationStatus.SUCCEEDED || isLastAttempt) {
-              unregister(finalStatus, finalMsg)
-              cleanupStagingDir()
-            }
-          }
-        }
-      }
-
-      // Use higher priority than FileSystem.
-      assert(ApplicationMaster.SHUTDOWN_HOOK_PRIORITY > FileSystem.SHUTDOWN_HOOK_PRIORITY)
-      ShutdownHookManager
-        .get().addShutdownHook(cleanupHook, ApplicationMaster.SHUTDOWN_HOOK_PRIORITY)
-
-      // Call this to force generation of secret so it gets populated into the
-      // Hadoop UGI. This has to happen before the startUserClass which does a
-      // doAs in order for the credentials to be passed on to the executor containers.
-      val securityMgr = new SecurityManager(sparkConf)
-
-      if (isDriver) {
-        runDriver(securityMgr)
-      } else {
-        runExecutorLauncher(securityMgr)
-      }
-    } catch {
-      case e: Exception =>
-        // catch everything else if not specifically handled
-        logError("Uncaught exception: ", e)
-        finish(FinalApplicationStatus.FAILED,
-          ApplicationMaster.EXIT_UNCAUGHT_EXCEPTION,
-          "Uncaught exception: " + e.getMessage())
-    }
-    exitCode
-  }
-
-  /**
-   * unregister is used to completely unregister the application from the ResourceManager.
-   * This means the ResourceManager will not retry the application attempt on your behalf if
-   * a failure occurred.
-   */
-  final def unregister(status: FinalApplicationStatus, diagnostics: String = null) = synchronized {
-    if (!unregistered) {
-      logInfo(s"Unregistering ApplicationMaster with $status" +
-        Option(diagnostics).map(msg => s" (diag message: $msg)").getOrElse(""))
-      unregistered = true
-      client.unregister(status, Option(diagnostics).getOrElse(""))
-    }
-  }
-
-  final def finish(status: FinalApplicationStatus, code: Int, msg: String = null) = synchronized {
-    if (!finished) {
-      logInfo(s"Final app status: ${status}, exitCode: ${code}" +
-        Option(msg).map(msg => s", (reason: $msg)").getOrElse(""))
-      exitCode = code
-      finalStatus = status
-      finalMsg = msg
-      finished = true
-      if (Thread.currentThread() != reporterThread && reporterThread != null) {
-        logDebug("shutting down reporter thread")
-        reporterThread.interrupt()
-      }
-      if (Thread.currentThread() != userClassThread && userClassThread != null) {
-        logDebug("shutting down user thread")
-        userClassThread.interrupt()
-      }
-    }
-  }
-
-  private def sparkContextInitialized(sc: SparkContext) = {
-    sparkContextRef.synchronized {
-      sparkContextRef.compareAndSet(null, sc)
-      sparkContextRef.notifyAll()
-    }
-  }
-
-  private def sparkContextStopped(sc: SparkContext) = {
-    sparkContextRef.compareAndSet(sc, null)
-  }
-
-  private def registerAM(uiAddress: String, securityMgr: SecurityManager) = {
-    val sc = sparkContextRef.get()
-
-    val appId = client.getAttemptId().getApplicationId().toString()
-    val historyAddress =
-      sparkConf.getOption("spark.yarn.historyServer.address")
-        .map { address => s"${address}${HistoryServer.UI_PATH_PREFIX}/${appId}" }
-        .getOrElse("")
-
-    allocator = client.register(yarnConf,
-      if (sc != null) sc.getConf else sparkConf,
-      if (sc != null) sc.preferredNodeLocationData else Map(),
-      uiAddress,
-      historyAddress,
-      securityMgr)
-
-    allocator.allocateResources()
-    reporterThread = launchReporterThread()
-  }
-
-  private def runDriver(securityMgr: SecurityManager): Unit = {
-    addAmIpFilter()
-    setupSystemSecurityManager()
-    userClassThread = startUserClass()
-
-    // This a bit hacky, but we need to wait until the spark.driver.port property has
-    // been set by the Thread executing the user class.
-    val sc = waitForSparkContextInitialized()
-
-    // If there is no SparkContext at this point, just fail the app.
-    if (sc == null) {
-      finish(FinalApplicationStatus.FAILED,
-        ApplicationMaster.EXIT_SC_NOT_INITED,
-        "Timed out waiting for SparkContext.")
-    } else {
-      registerAM(sc.ui.map(_.appUIAddress).getOrElse(""), securityMgr)
-      userClassThread.join()
-    }
-  }
-
-  private def runExecutorLauncher(securityMgr: SecurityManager): Unit = {
-    actorSystem = AkkaUtils.createActorSystem("sparkYarnAM", Utils.localHostName, 0,
-      conf = sparkConf, securityManager = securityMgr)._1
-    actor = waitForSparkDriver()
-    addAmIpFilter()
-    registerAM(sparkConf.get("spark.driver.appUIAddress", ""), securityMgr)
-
-    // In client mode the actor will stop the reporter thread.
-    reporterThread.join()
-  }
-
-  private def launchReporterThread(): Thread = {
-    // Ensure that progress is sent before YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS elapses.
-    val expiryInterval = yarnConf.getInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 120000)
-
-    // we want to be reasonably responsive without causing too many requests to RM.
-    val schedulerInterval =
-      sparkConf.getLong("spark.yarn.scheduler.heartbeat.interval-ms", 5000)
-
-    // must be <= expiryInterval / 2.
-    val interval = math.max(0, math.min(expiryInterval / 2, schedulerInterval))
-
-    // The number of failures in a row until Reporter thread give up
-    val reporterMaxFailures = sparkConf.getInt("spark.yarn.scheduler.reporterThread.maxFailures", 5)
-
-    val t = new Thread {
-      override def run() {
-        var failureCount = 0
-        while (!finished) {
-          try {
-            if (allocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
-              finish(FinalApplicationStatus.FAILED,
-                ApplicationMaster.EXIT_MAX_EXECUTOR_FAILURES,
-                "Max number of executor failures reached")
-            } else {
-              logDebug("Sending progress")
-              allocator.allocateResources()
-            }
-            failureCount = 0
-          } catch {
-            case i: InterruptedException =>
-            case e: Throwable => {
-              failureCount += 1
-              if (!NonFatal(e) || failureCount >= reporterMaxFailures) {
-                finish(FinalApplicationStatus.FAILED,
-                  ApplicationMaster.EXIT_REPORTER_FAILURE, "Exception was thrown " +
-                    s"${failureCount} time(s) from Reporter thread.")
-
-              } else {
-                logWarning(s"Reporter thread fails ${failureCount} time(s) in a row.", e)
-              }
-            }
-          }
-          try {
-            Thread.sleep(interval)
-          } catch {
-            case e: InterruptedException =>
-          }
-        }
-      }
-    }
-    // setting to daemon status, though this is usually not a good idea.
-    t.setDaemon(true)
-    t.setName("Reporter")
-    t.start()
-    logInfo("Started progress reporter thread - sleep time : " + interval)
-    t
-  }
-
-  /**
-   * Clean up the staging directory.
-   */
-  private def cleanupStagingDir() {
-    val fs = FileSystem.get(yarnConf)
-    var stagingDirPath: Path = null
-    try {
-      val preserveFiles = sparkConf.get("spark.yarn.preserve.staging.files", "false").toBoolean
-      if (!preserveFiles) {
-        stagingDirPath = new Path(System.getenv("SPARK_YARN_STAGING_DIR"))
-        if (stagingDirPath == null) {
-          logError("Staging directory is null")
-          return
-        }
-        logInfo("Deleting staging directory " + stagingDirPath)
-        fs.delete(stagingDirPath, true)
-      }
-    } catch {
-      case ioe: IOException =>
-        logError("Failed to cleanup staging dir " + stagingDirPath, ioe)
-    }
-  }
-
-  private def waitForSparkContextInitialized(): SparkContext = {
-    logInfo("Waiting for spark context initialization")
-    try {
-      sparkContextRef.synchronized {
-        var count = 0
-        val waitTime = 10000L
-        val numTries = sparkConf.getInt("spark.yarn.applicationMaster.waitTries", 10)
-        while (sparkContextRef.get() == null && count < numTries && !finished) {
-          logInfo("Waiting for spark context initialization ... " + count)
-          count = count + 1
-          sparkContextRef.wait(waitTime)
-        }
-
-        val sparkContext = sparkContextRef.get()
-        if (sparkContext == null) {
-          logError(("SparkContext did not initialize after waiting for %d ms. Please check earlier"
-            + " log output for errors. Failing the application.").format(numTries * waitTime))
-        }
-        sparkContext
-      }
-    }
-  }
-
-  private def waitForSparkDriver(): ActorRef = {
-    logInfo("Waiting for Spark driver to be reachable.")
-    var driverUp = false
-    var count = 0
-    val hostport = args.userArgs(0)
-    val (driverHost, driverPort) = Utils.parseHostPort(hostport)
-
-    // spark driver should already be up since it launched us, but we don't want to
-    // wait forever, so wait 100 seconds max to match the cluster mode setting.
-    // Leave this config unpublished for now. SPARK-3779 to investigating changing
-    // this config to be time based.
-    val numTries = sparkConf.getInt("spark.yarn.applicationMaster.waitTries", 1000)
-
-    while (!driverUp && !finished && count < numTries) {
-      try {
-        count = count + 1
-        val socket = new Socket(driverHost, driverPort)
-        socket.close()
-        logInfo("Driver now available: %s:%s".format(driverHost, driverPort))
-        driverUp = true
-      } catch {
-        case e: Exception =>
-          logError("Failed to connect to driver at %s:%s, retrying ...".
-            format(driverHost, driverPort))
-          Thread.sleep(100)
-      }
-    }
-
-    if (!driverUp) {
-      throw new SparkException("Failed to connect to driver!")
-    }
-
-    sparkConf.set("spark.driver.host", driverHost)
-    sparkConf.set("spark.driver.port", driverPort.toString)
-
-    val driverUrl = "akka.tcp://%s@%s:%s/user/%s".format(
-      SparkEnv.driverActorSystemName,
-      driverHost,
-      driverPort.toString,
-      CoarseGrainedSchedulerBackend.ACTOR_NAME)
-    actorSystem.actorOf(Props(new MonitorActor(driverUrl)), name = "YarnAM")
-  }
-
-  /** Add the Yarn IP filter that is required for properly securing the UI. */
-  private def addAmIpFilter() = {
-    val proxyBase = System.getenv(ApplicationConstants.APPLICATION_WEB_PROXY_BASE_ENV)
-    val amFilter = "org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter"
-    val params = client.getAmIpFilterParams(yarnConf, proxyBase)
-    if (isDriver) {
-      System.setProperty("spark.ui.filters", amFilter)
-      params.foreach { case (k, v) => System.setProperty(s"spark.$amFilter.param.$k", v) }
-    } else {
-      actor ! AddWebUIFilter(amFilter, params.toMap, proxyBase)
-    }
-  }
-
-  /**
-   * This system security manager applies to the entire process.
-   * It's main purpose is to handle the case if the user code does a System.exit.
-   * This allows us to catch that and properly set the YARN application status and
-   * cleanup if needed.
-   */
-  private def setupSystemSecurityManager(): Unit = {
-    try {
-      var stopped = false
-      System.setSecurityManager(new java.lang.SecurityManager() {
-        override def checkExit(paramInt: Int) {
-          if (!stopped) {
-            logInfo("In securityManager checkExit, exit code: " + paramInt)
-            if (paramInt == 0) {
-              finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
-            } else {
-              finish(FinalApplicationStatus.FAILED,
-                paramInt,
-                "User class exited with non-zero exit code")
-            }
-            stopped = true
-          }
-        }
-        // required for the checkExit to work properly
-        override def checkPermission(perm: java.security.Permission): Unit = {}
-      })
-    }
-    catch {
-      case e: SecurityException =>
-        finish(FinalApplicationStatus.FAILED,
-          ApplicationMaster.EXIT_SECURITY,
-          "Error in setSecurityManager")
-        logError("Error in setSecurityManager:", e)
-    }
-  }
-
-  /**
-   * Start the user class, which contains the spark driver, in a separate Thread.
-   * If the main routine exits cleanly or exits with System.exit(0) we
-   * assume it was successful, for all other cases we assume failure.
-   *
-   * Returns the user thread that was started.
-   */
-  private def startUserClass(): Thread = {
-    logInfo("Starting the user JAR in a separate Thread")
-    System.setProperty("spark.executor.instances", args.numExecutors.toString)
-    val mainMethod = Class.forName(args.userClass, false,
-      Thread.currentThread.getContextClassLoader).getMethod("main", classOf[Array[String]])
-
-    val userThread = new Thread {
-      override def run() {
-        try {
-          val mainArgs = new Array[String](args.userArgs.size)
-          args.userArgs.copyToArray(mainArgs, 0, args.userArgs.size)
-          mainMethod.invoke(null, mainArgs)
-          finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
-          logDebug("Done running users class")
-        } catch {
-          case e: InvocationTargetException =>
-            e.getCause match {
-              case _: InterruptedException =>
-                // Reporter thread can interrupt to stop user class
-              case e: Exception =>
-                finish(FinalApplicationStatus.FAILED,
-                  ApplicationMaster.EXIT_EXCEPTION_USER_CLASS,
-                  "User class threw exception: " + e.getMessage)
-                // re-throw to get it logged
-                throw e
-            }
-        }
-      }
-    }
-    userThread.setName("Driver")
-    userThread.start()
-    userThread
-  }
-
-  // Actor used to monitor the driver when running in client deploy mode.
-  private class MonitorActor(driverUrl: String) extends Actor {
-
-    var driver: ActorSelection = _
-
-    override def preStart() = {
-      logInfo("Listen to driver: " + driverUrl)
-      driver = context.actorSelection(driverUrl)
-      // Send a hello message to establish the connection, after which
-      // we can monitor Lifecycle Events.
-      driver ! "Hello"
-      context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
-    }
-
-    override def receive = {
-      case x: DisassociatedEvent =>
-        logInfo(s"Driver terminated or disconnected! Shutting down. $x")
-        finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
-      case x: AddWebUIFilter =>
-        logInfo(s"Add WebUI Filter. $x")
-        driver ! x
-    }
-
-  }
-
-}
-
-object ApplicationMaster extends Logging {
-
-  val SHUTDOWN_HOOK_PRIORITY: Int = 30
-
-  // exit codes for different causes, no reason behind the values
-  private val EXIT_SUCCESS = 0
-  private val EXIT_UNCAUGHT_EXCEPTION = 10
-  private val EXIT_MAX_EXECUTOR_FAILURES = 11
-  private val EXIT_REPORTER_FAILURE = 12
-  private val EXIT_SC_NOT_INITED = 13
-  private val EXIT_SECURITY = 14
-  private val EXIT_EXCEPTION_USER_CLASS = 15
-
-  private var master: ApplicationMaster = _
-
-  def main(args: Array[String]) = {
-    SignalLogger.register(log)
-    val amArgs = new ApplicationMasterArguments(args)
-    SparkHadoopUtil.get.runAsSparkUser { () =>
-      master = new ApplicationMaster(amArgs, new YarnRMClientImpl(amArgs))
-      System.exit(master.run())
-    }
-  }
-
-  private[spark] def sparkContextInitialized(sc: SparkContext) = {
-    master.sparkContextInitialized(sc)
-  }
-
-  private[spark] def sparkContextStopped(sc: SparkContext) = {
-    master.sparkContextStopped(sc)
-  }
-
-}
-
-/**
- * This object does not provide any special functionality. It exists so that it's easy to tell
- * apart the client-mode AM from the cluster-mode AM when using tools such as ps or jps.
- */
-object ExecutorLauncher {
-
-  def main(args: Array[String]) = {
-    ApplicationMaster.main(args)
-  }
-
-}
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
deleted file mode 100644
index a12f82d2fbe70..0000000000000
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.spark.SparkConf
-import org.apache.spark.util.{Utils, IntParam, MemoryParam}
-import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
-
-// TODO: Add code and support for ensuring that yarn resource 'tasks' are location aware !
-private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf) {
-  var addJars: String = null
-  var files: String = null
-  var archives: String = null
-  var userJar: String = null
-  var userClass: String = null
-  var userArgs: Seq[String] = Seq[String]()
-  var executorMemory = 1024 // MB
-  var executorCores = 1
-  var numExecutors = 2
-  var amQueue = sparkConf.get("spark.yarn.queue", "default")
-  var amMemory: Int = 512 // MB
-  var appName: String = "Spark"
-  var priority = 0
-
-  parseArgs(args.toList)
-  loadEnvironmentArgs()
-
-  // Additional memory to allocate to containers
-  // For now, use driver's memory overhead as our AM container's memory overhead
-  val amMemoryOverhead = sparkConf.getInt("spark.yarn.driver.memoryOverhead", 
-    math.max((MEMORY_OVERHEAD_FACTOR * amMemory).toInt, MEMORY_OVERHEAD_MIN))
-
-  val executorMemoryOverhead = sparkConf.getInt("spark.yarn.executor.memoryOverhead", 
-    math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toInt, MEMORY_OVERHEAD_MIN))
-
-  validateArgs()
-
-  /** Load any default arguments provided through environment variables and Spark properties. */
-  private def loadEnvironmentArgs(): Unit = {
-    // For backward compatibility, SPARK_YARN_DIST_{ARCHIVES/FILES} should be resolved to hdfs://,
-    // while spark.yarn.dist.{archives/files} should be resolved to file:// (SPARK-2051).
-    files = Option(files)
-      .orElse(sys.env.get("SPARK_YARN_DIST_FILES"))
-      .orElse(sparkConf.getOption("spark.yarn.dist.files").map(p => Utils.resolveURIs(p)))
-      .orNull
-    archives = Option(archives)
-      .orElse(sys.env.get("SPARK_YARN_DIST_ARCHIVES"))
-      .orElse(sparkConf.getOption("spark.yarn.dist.archives").map(p => Utils.resolveURIs(p)))
-      .orNull
-  }
-
-  /**
-   * Fail fast if any arguments provided are invalid.
-   * This is intended to be called only after the provided arguments have been parsed.
-   */
-  private def validateArgs(): Unit = {
-    if (numExecutors <= 0) {
-      throw new IllegalArgumentException(
-        "You must specify at least 1 executor!\n" + getUsageMessage())
-    }
-  }
-
-  private def parseArgs(inputArgs: List[String]): Unit = {
-    val userArgsBuffer = new ArrayBuffer[String]()
-    var args = inputArgs
-
-    while (!args.isEmpty) {
-      args match {
-        case ("--jar") :: value :: tail =>
-          userJar = value
-          args = tail
-
-        case ("--class") :: value :: tail =>
-          userClass = value
-          args = tail
-
-        case ("--args" | "--arg") :: value :: tail =>
-          if (args(0) == "--args") {
-            println("--args is deprecated. Use --arg instead.")
-          }
-          userArgsBuffer += value
-          args = tail
-
-        case ("--master-class" | "--am-class") :: value :: tail =>
-          println(s"${args(0)} is deprecated and is not used anymore.")
-          args = tail
-
-        case ("--master-memory" | "--driver-memory") :: MemoryParam(value) :: tail =>
-          if (args(0) == "--master-memory") {
-            println("--master-memory is deprecated. Use --driver-memory instead.")
-          }
-          amMemory = value
-          args = tail
-
-        case ("--num-workers" | "--num-executors") :: IntParam(value) :: tail =>
-          if (args(0) == "--num-workers") {
-            println("--num-workers is deprecated. Use --num-executors instead.")
-          }
-          numExecutors = value
-          args = tail
-
-        case ("--worker-memory" | "--executor-memory") :: MemoryParam(value) :: tail =>
-          if (args(0) == "--worker-memory") {
-            println("--worker-memory is deprecated. Use --executor-memory instead.")
-          }
-          executorMemory = value
-          args = tail
-
-        case ("--worker-cores" | "--executor-cores") :: IntParam(value) :: tail =>
-          if (args(0) == "--worker-cores") {
-            println("--worker-cores is deprecated. Use --executor-cores instead.")
-          }
-          executorCores = value
-          args = tail
-
-        case ("--queue") :: value :: tail =>
-          amQueue = value
-          args = tail
-
-        case ("--name") :: value :: tail =>
-          appName = value
-          args = tail
-
-        case ("--addJars") :: value :: tail =>
-          addJars = value
-          args = tail
-
-        case ("--files") :: value :: tail =>
-          files = value
-          args = tail
-
-        case ("--archives") :: value :: tail =>
-          archives = value
-          args = tail
-
-        case Nil =>
-
-        case _ =>
-          throw new IllegalArgumentException(getUsageMessage(args))
-      }
-    }
-
-    userArgs = userArgsBuffer.readOnly
-  }
-
-  private def getUsageMessage(unknownParam: List[String] = null): String = {
-    val message = if (unknownParam != null) s"Unknown/unsupported param $unknownParam\n" else ""
-    message +
-      "Usage: org.apache.spark.deploy.yarn.Client [options] \n" +
-      "Options:\n" +
-      "  --jar JAR_PATH             Path to your application's JAR file (required in yarn-cluster mode)\n" +
-      "  --class CLASS_NAME         Name of your application's main class (required)\n" +
-      "  --arg ARG                  Argument to be passed to your application's main class.\n" +
-      "                             Multiple invocations are possible, each will be passed in order.\n" +
-      "  --num-executors NUM        Number of executors to start (Default: 2)\n" +
-      "  --executor-cores NUM       Number of cores for the executors (Default: 1).\n" +
-      "  --driver-memory MEM        Memory for driver (e.g. 1000M, 2G) (Default: 512 Mb)\n" +
-      "  --executor-memory MEM      Memory per executor (e.g. 1000M, 2G) (Default: 1G)\n" +
-      "  --name NAME                The name of your application (Default: Spark)\n" +
-      "  --queue QUEUE              The hadoop queue to use for allocation requests (Default: 'default')\n" +
-      "  --addJars jars             Comma separated list of local jars that want SparkContext.addJar to work with.\n" +
-      "  --files files              Comma separated list of files to be distributed with the job.\n" +
-      "  --archives archives        Comma separated list of archives to be distributed with the job."
-  }
-}
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
deleted file mode 100644
index fb0e34bf5985e..0000000000000
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ /dev/null
@@ -1,816 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.net.{InetAddress, UnknownHostException, URI, URISyntaxException}
-
-import scala.collection.JavaConversions._
-import scala.collection.mutable.{HashMap, ListBuffer, Map}
-import scala.util.{Try, Success, Failure}
-
-import com.google.common.base.Objects
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs._
-import org.apache.hadoop.fs.permission.FsPermission
-import org.apache.hadoop.mapred.Master
-import org.apache.hadoop.mapreduce.MRJobConfig
-import org.apache.hadoop.security.{Credentials, UserGroupInformation}
-import org.apache.hadoop.util.StringUtils
-import org.apache.hadoop.yarn.api._
-import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
-import org.apache.hadoop.yarn.api.protocolrecords._
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.util.Records
-
-import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext, SparkException}
-
-/**
- * The entry point (starting in Client#main() and Client#run()) for launching Spark on YARN.
- * The Client submits an application to the YARN ResourceManager.
- */
-private[spark] trait ClientBase extends Logging {
-  import ClientBase._
-
-  protected val args: ClientArguments
-  protected val hadoopConf: Configuration
-  protected val sparkConf: SparkConf
-  protected val yarnConf: YarnConfiguration
-  protected val credentials = UserGroupInformation.getCurrentUser.getCredentials
-  protected val amMemoryOverhead = args.amMemoryOverhead // MB
-  protected val executorMemoryOverhead = args.executorMemoryOverhead // MB
-  private val distCacheMgr = new ClientDistributedCacheManager()
-
-  /**
-   * Fail fast if we have requested more resources per container than is available in the cluster.
-   */
-  protected def verifyClusterResources(newAppResponse: GetNewApplicationResponse): Unit = {
-    val maxMem = newAppResponse.getMaximumResourceCapability().getMemory()
-    logInfo("Verifying our application has not requested more than the maximum " +
-      s"memory capability of the cluster ($maxMem MB per container)")
-    val executorMem = args.executorMemory + executorMemoryOverhead
-    if (executorMem > maxMem) {
-      throw new IllegalArgumentException(s"Required executor memory (${args.executorMemory}" +
-        s"+$executorMemoryOverhead MB) is above the max threshold ($maxMem MB) of this cluster!")
-    }
-    val amMem = args.amMemory + amMemoryOverhead
-    if (amMem > maxMem) {
-      throw new IllegalArgumentException(s"Required AM memory (${args.amMemory}" +
-        s"+$amMemoryOverhead MB) is above the max threshold ($maxMem MB) of this cluster!")
-    }
-    logInfo("Will allocate AM container, with %d MB memory including %d MB overhead".format(
-      amMem,
-      amMemoryOverhead))
-
-    // We could add checks to make sure the entire cluster has enough resources but that involves
-    // getting all the node reports and computing ourselves.
-  }
-
-  /**
-   * Copy the given file to a remote file system (e.g. HDFS) if needed.
-   * The file is only copied if the source and destination file systems are different. This is used
-   * for preparing resources for launching the ApplicationMaster container. Exposed for testing.
-   */
-  def copyFileToRemote(
-      destDir: Path,
-      srcPath: Path,
-      replication: Short,
-      setPerms: Boolean = false): Path = {
-    val destFs = destDir.getFileSystem(hadoopConf)
-    val srcFs = srcPath.getFileSystem(hadoopConf)
-    var destPath = srcPath
-    if (!compareFs(srcFs, destFs)) {
-      destPath = new Path(destDir, srcPath.getName())
-      logInfo(s"Uploading resource $srcPath -> $destPath")
-      FileUtil.copy(srcFs, srcPath, destFs, destPath, false, hadoopConf)
-      destFs.setReplication(destPath, replication)
-      if (setPerms) {
-        destFs.setPermission(destPath, new FsPermission(APP_FILE_PERMISSION))
-      }
-    } else {
-      logInfo(s"Source and destination file systems are the same. Not copying $srcPath")
-    }
-    // Resolve any symlinks in the URI path so using a "current" symlink to point to a specific
-    // version shows the specific version in the distributed cache configuration
-    val qualifiedDestPath = destFs.makeQualified(destPath)
-    val fc = FileContext.getFileContext(qualifiedDestPath.toUri(), hadoopConf)
-    fc.resolvePath(qualifiedDestPath)
-  }
-
-  /**
-   * Given a local URI, resolve it and return a qualified local path that corresponds to the URI.
-   * This is used for preparing local resources to be included in the container launch context.
-   */
-  private def getQualifiedLocalPath(localURI: URI): Path = {
-    val qualifiedURI =
-      if (localURI.getScheme == null) {
-        // If not specified, assume this is in the local filesystem to keep the behavior
-        // consistent with that of Hadoop
-        new URI(FileSystem.getLocal(hadoopConf).makeQualified(new Path(localURI)).toString)
-      } else {
-        localURI
-      }
-    new Path(qualifiedURI)
-  }
-
-  /**
-   * Upload any resources to the distributed cache if needed. If a resource is intended to be
-   * consumed locally, set up the appropriate config for downstream code to handle it properly.
-   * This is used for setting up a container launch context for our ApplicationMaster.
-   * Exposed for testing.
-   */
-  def prepareLocalResources(appStagingDir: String): HashMap[String, LocalResource] = {
-    logInfo("Preparing resources for our AM container")
-    // Upload Spark and the application JAR to the remote file system if necessary,
-    // and add them as local resources to the application master.
-    val fs = FileSystem.get(hadoopConf)
-    val dst = new Path(fs.getHomeDirectory(), appStagingDir)
-    val nns = getNameNodesToAccess(sparkConf) + dst
-    obtainTokensForNamenodes(nns, hadoopConf, credentials)
-
-    val replication = sparkConf.getInt("spark.yarn.submit.file.replication",
-      fs.getDefaultReplication(dst)).toShort
-    val localResources = HashMap[String, LocalResource]()
-    FileSystem.mkdirs(fs, dst, new FsPermission(STAGING_DIR_PERMISSION))
-
-    val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]()
-
-    val oldLog4jConf = Option(System.getenv("SPARK_LOG4J_CONF"))
-    if (oldLog4jConf.isDefined) {
-      logWarning(
-        "SPARK_LOG4J_CONF detected in the system environment. This variable has been " +
-        "deprecated. Please refer to the \"Launching Spark on YARN\" documentation " +
-        "for alternatives.")
-    }
-
-    /**
-     * Copy the given main resource to the distributed cache if the scheme is not "local".
-     * Otherwise, set the corresponding key in our SparkConf to handle it downstream.
-     * Each resource is represented by a 4-tuple of:
-     *   (1) destination resource name,
-     *   (2) local path to the resource,
-     *   (3) Spark property key to set if the scheme is not local, and
-     *   (4) whether to set permissions for this resource
-     */
-    List(
-      (SPARK_JAR, sparkJar(sparkConf), CONF_SPARK_JAR, false),
-      (APP_JAR, args.userJar, CONF_SPARK_USER_JAR, true),
-      ("log4j.properties", oldLog4jConf.orNull, null, false)
-    ).foreach { case (destName, _localPath, confKey, setPermissions) =>
-      val localPath: String = if (_localPath != null) _localPath.trim() else ""
-      if (!localPath.isEmpty()) {
-        val localURI = new URI(localPath)
-        if (localURI.getScheme != LOCAL_SCHEME) {
-          val src = getQualifiedLocalPath(localURI)
-          val destPath = copyFileToRemote(dst, src, replication, setPermissions)
-          val destFs = FileSystem.get(destPath.toUri(), hadoopConf)
-          distCacheMgr.addResource(destFs, hadoopConf, destPath,
-            localResources, LocalResourceType.FILE, destName, statCache)
-        } else if (confKey != null) {
-          // If the resource is intended for local use only, handle this downstream
-          // by setting the appropriate property
-          sparkConf.set(confKey, localPath)
-        }
-      }
-    }
-
-    /**
-     * Do the same for any additional resources passed in through ClientArguments.
-     * Each resource category is represented by a 3-tuple of:
-     *   (1) comma separated list of resources in this category,
-     *   (2) resource type, and
-     *   (3) whether to add these resources to the classpath
-     */
-    val cachedSecondaryJarLinks = ListBuffer.empty[String]
-    List(
-      (args.addJars, LocalResourceType.FILE, true),
-      (args.files, LocalResourceType.FILE, false),
-      (args.archives, LocalResourceType.ARCHIVE, false)
-    ).foreach { case (flist, resType, addToClasspath) =>
-      if (flist != null && !flist.isEmpty()) {
-        flist.split(',').foreach { file =>
-          val localURI = new URI(file.trim())
-          if (localURI.getScheme != LOCAL_SCHEME) {
-            val localPath = new Path(localURI)
-            val linkname = Option(localURI.getFragment()).getOrElse(localPath.getName())
-            val destPath = copyFileToRemote(dst, localPath, replication)
-            distCacheMgr.addResource(
-              fs, hadoopConf, destPath, localResources, resType, linkname, statCache)
-            if (addToClasspath) {
-              cachedSecondaryJarLinks += linkname
-            }
-          } else if (addToClasspath) {
-            // Resource is intended for local use only and should be added to the class path
-            cachedSecondaryJarLinks += file.trim()
-          }
-        }
-      }
-    }
-    if (cachedSecondaryJarLinks.nonEmpty) {
-      sparkConf.set(CONF_SPARK_YARN_SECONDARY_JARS, cachedSecondaryJarLinks.mkString(","))
-    }
-
-    localResources
-  }
-
-  /**
-   * Set up the environment for launching our ApplicationMaster container.
-   */
-  private def setupLaunchEnv(stagingDir: String): HashMap[String, String] = {
-    logInfo("Setting up the launch environment for our AM container")
-    val env = new HashMap[String, String]()
-    val extraCp = sparkConf.getOption("spark.driver.extraClassPath")
-    populateClasspath(args, yarnConf, sparkConf, env, extraCp)
-    env("SPARK_YARN_MODE") = "true"
-    env("SPARK_YARN_STAGING_DIR") = stagingDir
-    env("SPARK_USER") = UserGroupInformation.getCurrentUser().getShortUserName()
-
-    // Set the environment variables to be passed on to the executors.
-    distCacheMgr.setDistFilesEnv(env)
-    distCacheMgr.setDistArchivesEnv(env)
-
-    // Pick up any environment variables for the AM provided through spark.yarn.appMasterEnv.*
-    val amEnvPrefix = "spark.yarn.appMasterEnv."
-    sparkConf.getAll
-      .filter { case (k, v) => k.startsWith(amEnvPrefix) }
-      .map { case (k, v) => (k.substring(amEnvPrefix.length), v) }
-      .foreach { case (k, v) => YarnSparkHadoopUtil.addPathToEnvironment(env, k, v) }
-
-    // Keep this for backwards compatibility but users should move to the config
-    sys.env.get("SPARK_YARN_USER_ENV").foreach { userEnvs =>
-      // Allow users to specify some environment variables.
-      YarnSparkHadoopUtil.setEnvFromInputString(env, userEnvs)
-      // Pass SPARK_YARN_USER_ENV itself to the AM so it can use it to set up executor environments.
-      env("SPARK_YARN_USER_ENV") = userEnvs
-    }
-
-    // In cluster mode, if the deprecated SPARK_JAVA_OPTS is set, we need to propagate it to
-    // executors. But we can't just set spark.executor.extraJavaOptions, because the driver's
-    // SparkContext will not let that set spark* system properties, which is expected behavior for
-    // Yarn clients. So propagate it through the environment.
-    //
-    // Note that to warn the user about the deprecation in cluster mode, some code from
-    // SparkConf#validateSettings() is duplicated here (to avoid triggering the condition
-    // described above).
-    val isLaunchingDriver = args.userClass != null
-    if (isLaunchingDriver) {
-      sys.env.get("SPARK_JAVA_OPTS").foreach { value =>
-        val warning =
-          s"""
-            |SPARK_JAVA_OPTS was detected (set to '$value').
-            |This is deprecated in Spark 1.0+.
-            |
-            |Please instead use:
-            | - ./spark-submit with conf/spark-defaults.conf to set defaults for an application
-            | - ./spark-submit with --driver-java-options to set -X options for a driver
-            | - spark.executor.extraJavaOptions to set -X options for executors
-          """.stripMargin
-        logWarning(warning)
-        for (proc <- Seq("driver", "executor")) {
-          val key = s"spark.$proc.extraJavaOptions"
-          if (sparkConf.contains(key)) {
-            throw new SparkException(s"Found both $key and SPARK_JAVA_OPTS. Use only the former.")
-          }
-        }
-        env("SPARK_JAVA_OPTS") = value
-      }
-    }
-
-    env
-  }
-
-  /**
-   * Set up a ContainerLaunchContext to launch our ApplicationMaster container.
-   * This sets up the launch environment, java options, and the command for launching the AM.
-   */
-  protected def createContainerLaunchContext(newAppResponse: GetNewApplicationResponse)
-      : ContainerLaunchContext = {
-    logInfo("Setting up container launch context for our AM")
-
-    val appId = newAppResponse.getApplicationId
-    val appStagingDir = getAppStagingDir(appId)
-    val localResources = prepareLocalResources(appStagingDir)
-    val launchEnv = setupLaunchEnv(appStagingDir)
-    val amContainer = Records.newRecord(classOf[ContainerLaunchContext])
-    amContainer.setLocalResources(localResources)
-    amContainer.setEnvironment(launchEnv)
-
-    val javaOpts = ListBuffer[String]()
-
-    // Add Xmx for AM memory
-    javaOpts += "-Xmx" + args.amMemory + "m"
-
-    val tmpDir = new Path(Environment.PWD.$(), YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR)
-    javaOpts += "-Djava.io.tmpdir=" + tmpDir
-
-    // TODO: Remove once cpuset version is pushed out.
-    // The context is, default gc for server class machines ends up using all cores to do gc -
-    // hence if there are multiple containers in same node, Spark GC affects all other containers'
-    // performance (which can be that of other Spark containers)
-    // Instead of using this, rely on cpusets by YARN to enforce "proper" Spark behavior in
-    // multi-tenant environments. Not sure how default Java GC behaves if it is limited to subset
-    // of cores on a node.
-    val useConcurrentAndIncrementalGC = launchEnv.get("SPARK_USE_CONC_INCR_GC").exists(_.toBoolean)
-    if (useConcurrentAndIncrementalGC) {
-      // In our expts, using (default) throughput collector has severe perf ramifications in
-      // multi-tenant machines
-      javaOpts += "-XX:+UseConcMarkSweepGC"
-      javaOpts += "-XX:+CMSIncrementalMode"
-      javaOpts += "-XX:+CMSIncrementalPacing"
-      javaOpts += "-XX:CMSIncrementalDutyCycleMin=0"
-      javaOpts += "-XX:CMSIncrementalDutyCycle=10"
-    }
-
-    // Forward the Spark configuration to the application master / executors.
-    // TODO: it might be nicer to pass these as an internal environment variable rather than
-    // as Java options, due to complications with string parsing of nested quotes.
-    for ((k, v) <- sparkConf.getAll) {
-      javaOpts += YarnSparkHadoopUtil.escapeForShell(s"-D$k=$v")
-    }
-
-    // Include driver-specific java options if we are launching a driver
-    val isLaunchingDriver = args.userClass != null
-    if (isLaunchingDriver) {
-      sparkConf.getOption("spark.driver.extraJavaOptions")
-        .orElse(sys.env.get("SPARK_JAVA_OPTS"))
-        .foreach(opts => javaOpts += opts)
-      sparkConf.getOption("spark.driver.libraryPath")
-        .foreach(p => javaOpts += s"-Djava.library.path=$p")
-    }
-
-    // For log4j configuration to reference
-    javaOpts += ("-Dspark.yarn.app.container.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR)
-
-    val userClass =
-      if (args.userClass != null) {
-        Seq("--class", YarnSparkHadoopUtil.escapeForShell(args.userClass))
-      } else {
-        Nil
-      }
-    val userJar =
-      if (args.userJar != null) {
-        Seq("--jar", args.userJar)
-      } else {
-        Nil
-      }
-    val amClass =
-      if (isLaunchingDriver) {
-        Class.forName("org.apache.spark.deploy.yarn.ApplicationMaster").getName
-      } else {
-        Class.forName("org.apache.spark.deploy.yarn.ExecutorLauncher").getName
-      }
-    val userArgs = args.userArgs.flatMap { arg =>
-      Seq("--arg", YarnSparkHadoopUtil.escapeForShell(arg))
-    }
-    val amArgs =
-      Seq(amClass) ++ userClass ++ userJar ++ userArgs ++
-      Seq(
-        "--executor-memory", args.executorMemory.toString,
-        "--executor-cores", args.executorCores.toString,
-        "--num-executors ", args.numExecutors.toString)
-
-    // Command for the ApplicationMaster
-    val commands = Seq(Environment.JAVA_HOME.$() + "/bin/java", "-server") ++
-      javaOpts ++ amArgs ++
-      Seq(
-        "1>", ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout",
-        "2>", ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")
-
-    // TODO: it would be nicer to just make sure there are no null commands here
-    val printableCommands = commands.map(s => if (s == null) "null" else s).toList
-    amContainer.setCommands(printableCommands)
-
-    logDebug("===============================================================================")
-    logDebug("Yarn AM launch context:")
-    logDebug(s"    user class: ${Option(args.userClass).getOrElse("N/A")}")
-    logDebug("    env:")
-    launchEnv.foreach { case (k, v) => logDebug(s"        $k -> $v") }
-    logDebug("    resources:")
-    localResources.foreach { case (k, v) => logDebug(s"        $k -> $v")}
-    logDebug("    command:")
-    logDebug(s"        ${printableCommands.mkString(" ")}")
-    logDebug("===============================================================================")
-
-    // send the acl settings into YARN to control who has access via YARN interfaces
-    val securityManager = new SecurityManager(sparkConf)
-    amContainer.setApplicationACLs(YarnSparkHadoopUtil.getApplicationAclsForYarn(securityManager))
-    setupSecurityToken(amContainer)
-    UserGroupInformation.getCurrentUser().addCredentials(credentials)
-
-    amContainer
-  }
-
-  /**
-   * Report the state of an application until it has exited, either successfully or
-   * due to some failure, then return a pair of the yarn application state (FINISHED, FAILED,
-   * KILLED, or RUNNING) and the final application state (UNDEFINED, SUCCEEDED, FAILED,
-   * or KILLED).
-   *
-   * @param appId ID of the application to monitor.
-   * @param returnOnRunning Whether to also return the application state when it is RUNNING.
-   * @param logApplicationReport Whether to log details of the application report every iteration.
-   * @return A pair of the yarn application state and the final application state.
-   */
-  def monitorApplication(
-      appId: ApplicationId,
-      returnOnRunning: Boolean = false,
-      logApplicationReport: Boolean = true): (YarnApplicationState, FinalApplicationStatus) = {
-    val interval = sparkConf.getLong("spark.yarn.report.interval", 1000)
-    var lastState: YarnApplicationState = null
-    while (true) {
-      Thread.sleep(interval)
-      val report = getApplicationReport(appId)
-      val state = report.getYarnApplicationState
-
-      if (logApplicationReport) {
-        logInfo(s"Application report for $appId (state: $state)")
-        val details = Seq[(String, String)](
-          ("client token", getClientToken(report)),
-          ("diagnostics", report.getDiagnostics),
-          ("ApplicationMaster host", report.getHost),
-          ("ApplicationMaster RPC port", report.getRpcPort.toString),
-          ("queue", report.getQueue),
-          ("start time", report.getStartTime.toString),
-          ("final status", report.getFinalApplicationStatus.toString),
-          ("tracking URL", report.getTrackingUrl),
-          ("user", report.getUser)
-        )
-
-        // Use more loggable format if value is null or empty
-        val formattedDetails = details
-          .map { case (k, v) =>
-            val newValue = Option(v).filter(_.nonEmpty).getOrElse("N/A")
-            s"\n\t $k: $newValue" }
-          .mkString("")
-
-        // If DEBUG is enabled, log report details every iteration
-        // Otherwise, log them every time the application changes state
-        if (log.isDebugEnabled) {
-          logDebug(formattedDetails)
-        } else if (lastState != state) {
-          logInfo(formattedDetails)
-        }
-      }
-
-      if (state == YarnApplicationState.FINISHED ||
-        state == YarnApplicationState.FAILED ||
-        state == YarnApplicationState.KILLED) {
-        return (state, report.getFinalApplicationStatus)
-      }
-
-      if (returnOnRunning && state == YarnApplicationState.RUNNING) {
-        return (state, report.getFinalApplicationStatus)
-      }
-
-      lastState = state
-    }
-
-    // Never reached, but keeps compiler happy
-    throw new SparkException("While loop is depleted! This should never happen...")
-  }
-
-  /**
-   * Submit an application to the ResourceManager and monitor its state.
-   * This continues until the application has exited for any reason.
-   * If the application finishes with a failed, killed, or undefined status,
-   * throw an appropriate SparkException.
-   */
-  def run(): Unit = {
-    val (yarnApplicationState, finalApplicationStatus) = monitorApplication(submitApplication())
-    if (yarnApplicationState == YarnApplicationState.FAILED ||
-      finalApplicationStatus == FinalApplicationStatus.FAILED) {
-      throw new SparkException("Application finished with failed status")
-    }
-    if (yarnApplicationState == YarnApplicationState.KILLED ||
-      finalApplicationStatus == FinalApplicationStatus.KILLED) {
-      throw new SparkException("Application is killed")
-    }
-    if (finalApplicationStatus == FinalApplicationStatus.UNDEFINED) {
-      throw new SparkException("The final status of application is undefined")
-    }
-  }
-
-  /* --------------------------------------------------------------------------------------- *
-   |  Methods that cannot be implemented here due to API differences across hadoop versions  |
-   * --------------------------------------------------------------------------------------- */
-
-  /** Submit an application running our ApplicationMaster to the ResourceManager. */
-  def submitApplication(): ApplicationId
-
-  /** Set up security tokens for launching our ApplicationMaster container. */
-  protected def setupSecurityToken(containerContext: ContainerLaunchContext): Unit
-
-  /** Get the application report from the ResourceManager for an application we have submitted. */
-  protected def getApplicationReport(appId: ApplicationId): ApplicationReport
-
-  /**
-   * Return the security token used by this client to communicate with the ApplicationMaster.
-   * If no security is enabled, the token returned by the report is null.
-   */
-  protected def getClientToken(report: ApplicationReport): String
-}
-
-private[spark] object ClientBase extends Logging {
-
-  // Alias for the Spark assembly jar and the user jar
-  val SPARK_JAR: String = "__spark__.jar"
-  val APP_JAR: String = "__app__.jar"
-
-  // URI scheme that identifies local resources
-  val LOCAL_SCHEME = "local"
-
-  // Staging directory for any temporary jars or files
-  val SPARK_STAGING: String = ".sparkStaging"
-
-  // Location of any user-defined Spark jars
-  val CONF_SPARK_JAR = "spark.yarn.jar"
-  val ENV_SPARK_JAR = "SPARK_JAR"
-
-  // Internal config to propagate the location of the user's jar to the driver/executors
-  val CONF_SPARK_USER_JAR = "spark.yarn.user.jar"
-
-  // Internal config to propagate the locations of any extra jars to add to the classpath
-  // of the executors
-  val CONF_SPARK_YARN_SECONDARY_JARS = "spark.yarn.secondary.jars"
-
-  // Staging directory is private! -> rwx--------
-  val STAGING_DIR_PERMISSION: FsPermission =
-    FsPermission.createImmutable(Integer.parseInt("700", 8).toShort)
-
-  // App files are world-wide readable and owner writable -> rw-r--r--
-  val APP_FILE_PERMISSION: FsPermission =
-    FsPermission.createImmutable(Integer.parseInt("644", 8).toShort)
-
-  /**
-   * Find the user-defined Spark jar if configured, or return the jar containing this
-   * class if not.
-   *
-   * This method first looks in the SparkConf object for the CONF_SPARK_JAR key, and in the
-   * user environment if that is not found (for backwards compatibility).
-   */
-  private def sparkJar(conf: SparkConf): String = {
-    if (conf.contains(CONF_SPARK_JAR)) {
-      conf.get(CONF_SPARK_JAR)
-    } else if (System.getenv(ENV_SPARK_JAR) != null) {
-      logWarning(
-        s"$ENV_SPARK_JAR detected in the system environment. This variable has been deprecated " +
-        s"in favor of the $CONF_SPARK_JAR configuration variable.")
-      System.getenv(ENV_SPARK_JAR)
-    } else {
-      SparkContext.jarOfClass(this.getClass).head
-    }
-  }
-
-  /**
-   * Return the path to the given application's staging directory.
-   */
-  private def getAppStagingDir(appId: ApplicationId): String = {
-    SPARK_STAGING + Path.SEPARATOR + appId.toString() + Path.SEPARATOR
-  }
-
-  /**
-   * Populate the classpath entry in the given environment map with any application
-   * classpath specified through the Hadoop and Yarn configurations.
-   */
-  def populateHadoopClasspath(conf: Configuration, env: HashMap[String, String]): Unit = {
-    val classPathElementsToAdd = getYarnAppClasspath(conf) ++ getMRAppClasspath(conf)
-    for (c <- classPathElementsToAdd.flatten) {
-      YarnSparkHadoopUtil.addPathToEnvironment(env, Environment.CLASSPATH.name, c.trim)
-    }
-  }
-
-  private def getYarnAppClasspath(conf: Configuration): Option[Seq[String]] =
-    Option(conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH)) match {
-      case Some(s) => Some(s.toSeq)
-      case None => getDefaultYarnApplicationClasspath
-  }
-
-  private def getMRAppClasspath(conf: Configuration): Option[Seq[String]] =
-    Option(conf.getStrings("mapreduce.application.classpath")) match {
-      case Some(s) => Some(s.toSeq)
-      case None => getDefaultMRApplicationClasspath
-    }
-
-  def getDefaultYarnApplicationClasspath: Option[Seq[String]] = {
-    val triedDefault = Try[Seq[String]] {
-      val field = classOf[YarnConfiguration].getField("DEFAULT_YARN_APPLICATION_CLASSPATH")
-      val value = field.get(null).asInstanceOf[Array[String]]
-      value.toSeq
-    } recoverWith {
-      case e: NoSuchFieldException => Success(Seq.empty[String])
-    }
-
-    triedDefault match {
-      case f: Failure[_] =>
-        logError("Unable to obtain the default YARN Application classpath.", f.exception)
-      case s: Success[_] =>
-        logDebug(s"Using the default YARN application classpath: ${s.get.mkString(",")}")
-    }
-
-    triedDefault.toOption
-  }
-
-  /**
-   * In Hadoop 0.23, the MR application classpath comes with the YARN application
-   * classpath. In Hadoop 2.0, it's an array of Strings, and in 2.2+ it's a String.
-   * So we need to use reflection to retrieve it.
-   */
-  def getDefaultMRApplicationClasspath: Option[Seq[String]] = {
-    val triedDefault = Try[Seq[String]] {
-      val field = classOf[MRJobConfig].getField("DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH")
-      val value = if (field.getType == classOf[String]) {
-        StringUtils.getStrings(field.get(null).asInstanceOf[String]).toArray
-      } else {
-        field.get(null).asInstanceOf[Array[String]]
-      }
-      value.toSeq
-    } recoverWith {
-      case e: NoSuchFieldException => Success(Seq.empty[String])
-    }
-
-    triedDefault match {
-      case f: Failure[_] =>
-        logError("Unable to obtain the default MR Application classpath.", f.exception)
-      case s: Success[_] =>
-        logDebug(s"Using the default MR application classpath: ${s.get.mkString(",")}")
-    }
-
-    triedDefault.toOption
-  }
-
-  /**
-   * Populate the classpath entry in the given environment map.
-   * This includes the user jar, Spark jar, and any extra application jars.
-   */
-  def populateClasspath(
-      args: ClientArguments,
-      conf: Configuration,
-      sparkConf: SparkConf,
-      env: HashMap[String, String],
-      extraClassPath: Option[String] = None): Unit = {
-    extraClassPath.foreach(addClasspathEntry(_, env))
-    addClasspathEntry(Environment.PWD.$(), env)
-
-    // Normally the users app.jar is last in case conflicts with spark jars
-    if (sparkConf.get("spark.yarn.user.classpath.first", "false").toBoolean) {
-      addUserClasspath(args, sparkConf, env)
-      addFileToClasspath(sparkJar(sparkConf), SPARK_JAR, env)
-      populateHadoopClasspath(conf, env)
-    } else {
-      addFileToClasspath(sparkJar(sparkConf), SPARK_JAR, env)
-      populateHadoopClasspath(conf, env)
-      addUserClasspath(args, sparkConf, env)
-    }
-
-    // Append all jar files under the working directory to the classpath.
-    addClasspathEntry(Environment.PWD.$() + Path.SEPARATOR + "*", env)
-  }
-
-  /**
-   * Adds the user jars which have local: URIs (or alternate names, such as APP_JAR) explicitly
-   * to the classpath.
-   */
-  private def addUserClasspath(
-      args: ClientArguments,
-      conf: SparkConf,
-      env: HashMap[String, String]): Unit = {
-
-    // If `args` is not null, we are launching an AM container.
-    // Otherwise, we are launching executor containers.
-    val (mainJar, secondaryJars) =
-      if (args != null) {
-        (args.userJar, args.addJars)
-      } else {
-        (conf.get(CONF_SPARK_USER_JAR, null), conf.get(CONF_SPARK_YARN_SECONDARY_JARS, null))
-      }
-
-    addFileToClasspath(mainJar, APP_JAR, env)
-    if (secondaryJars != null) {
-      secondaryJars.split(",").filter(_.nonEmpty).foreach { jar =>
-        addFileToClasspath(jar, null, env)
-      }
-    }
-  }
-
-  /**
-   * Adds the given path to the classpath, handling "local:" URIs correctly.
-   *
-   * If an alternate name for the file is given, and it's not a "local:" file, the alternate
-   * name will be added to the classpath (relative to the job's work directory).
-   *
-   * If not a "local:" file and no alternate name, the environment is not modified.
-   *
-   * @param path      Path to add to classpath (optional).
-   * @param fileName  Alternate name for the file (optional).
-   * @param env       Map holding the environment variables.
-   */
-  private def addFileToClasspath(
-      path: String,
-      fileName: String,
-      env: HashMap[String, String]): Unit = {
-    if (path != null) {
-      scala.util.control.Exception.ignoring(classOf[URISyntaxException]) {
-        val uri = new URI(path)
-        if (uri.getScheme == LOCAL_SCHEME) {
-          addClasspathEntry(uri.getPath, env)
-          return
-        }
-      }
-    }
-    if (fileName != null) {
-      addClasspathEntry(Environment.PWD.$() + Path.SEPARATOR + fileName, env)
-    }
-  }
-
-  /**
-   * Add the given path to the classpath entry of the given environment map.
-   * If the classpath is already set, this appends the new path to the existing classpath.
-   */
-  private def addClasspathEntry(path: String, env: HashMap[String, String]): Unit =
-    YarnSparkHadoopUtil.addPathToEnvironment(env, Environment.CLASSPATH.name, path)
-
-  /**
-   * Get the list of namenodes the user may access.
-   */
-  def getNameNodesToAccess(sparkConf: SparkConf): Set[Path] = {
-    sparkConf.get("spark.yarn.access.namenodes", "")
-      .split(",")
-      .map(_.trim())
-      .filter(!_.isEmpty)
-      .map(new Path(_))
-      .toSet
-  }
-
-  def getTokenRenewer(conf: Configuration): String = {
-    val delegTokenRenewer = Master.getMasterPrincipal(conf)
-    logDebug("delegation token renewer is: " + delegTokenRenewer)
-    if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) {
-      val errorMessage = "Can't get Master Kerberos principal for use as renewer"
-      logError(errorMessage)
-      throw new SparkException(errorMessage)
-    }
-    delegTokenRenewer
-  }
-
-  /**
-   * Obtains tokens for the namenodes passed in and adds them to the credentials.
-   */
-  def obtainTokensForNamenodes(
-      paths: Set[Path],
-      conf: Configuration,
-      creds: Credentials): Unit = {
-    if (UserGroupInformation.isSecurityEnabled()) {
-      val delegTokenRenewer = getTokenRenewer(conf)
-      paths.foreach { dst =>
-        val dstFs = dst.getFileSystem(conf)
-        logDebug("getting token for namenode: " + dst)
-        dstFs.addDelegationTokens(delegTokenRenewer, creds)
-      }
-    }
-  }
-
-  /**
-   * Return whether the two file systems are the same.
-   */
-  private def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = {
-    val srcUri = srcFs.getUri()
-    val dstUri = destFs.getUri()
-    if (srcUri.getScheme() == null || srcUri.getScheme() != dstUri.getScheme()) {
-      return false
-    }
-
-    var srcHost = srcUri.getHost()
-    var dstHost = dstUri.getHost()
-
-    // In HA or when using viewfs, the host part of the URI may not actually be a host, but the
-    // name of the HDFS namespace. Those names won't resolve, so avoid even trying if they
-    // match.
-    if (srcHost != null && dstHost != null && srcHost != dstHost) {
-      try {
-        srcHost = InetAddress.getByName(srcHost).getCanonicalHostName()
-        dstHost = InetAddress.getByName(dstHost).getCanonicalHostName()
-      } catch {
-        case e: UnknownHostException =>
-          return false
-      }
-    }
-
-    Objects.equal(srcHost, dstHost) && srcUri.getPort() == dstUri.getPort()
-  }
-
-}
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
deleted file mode 100644
index 5cb4753de2e84..0000000000000
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.net.URI
-
-import scala.collection.JavaConversions._
-import scala.collection.mutable.{HashMap, ListBuffer}
-
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.yarn.api._
-import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
-
-import org.apache.spark.{Logging, SparkConf}
-
-trait ExecutorRunnableUtil extends Logging {
-
-  val yarnConf: YarnConfiguration
-  val sparkConf: SparkConf
-  lazy val env = prepareEnvironment
-
-  def prepareCommand(
-      masterAddress: String,
-      slaveId: String,
-      hostname: String,
-      executorMemory: Int,
-      executorCores: Int,
-      appId: String,
-      localResources: HashMap[String, LocalResource]): List[String] = {
-    // Extra options for the JVM
-    val javaOpts = ListBuffer[String]()
-    // Set the JVM memory
-    val executorMemoryString = executorMemory + "m"
-    javaOpts += "-Xms" + executorMemoryString + " -Xmx" + executorMemoryString + " "
-
-    // Set extra Java options for the executor, if defined
-    sys.props.get("spark.executor.extraJavaOptions").foreach { opts =>
-      javaOpts += opts
-    }
-    sys.env.get("SPARK_JAVA_OPTS").foreach { opts =>
-      javaOpts += opts
-    }
-
-    javaOpts += "-Djava.io.tmpdir=" +
-      new Path(Environment.PWD.$(), YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR)
-
-    // Certain configs need to be passed here because they are needed before the Executor
-    // registers with the Scheduler and transfers the spark configs. Since the Executor backend
-    // uses Akka to connect to the scheduler, the akka settings are needed as well as the
-    // authentication settings.
-    sparkConf.getAll.
-      filter { case (k, v) => k.startsWith("spark.auth") || k.startsWith("spark.akka") }.
-      foreach { case (k, v) => javaOpts += YarnSparkHadoopUtil.escapeForShell(s"-D$k=$v") }
-
-    sparkConf.getAkkaConf.
-      foreach { case (k, v) => javaOpts += YarnSparkHadoopUtil.escapeForShell(s"-D$k=$v") }
-
-    // Commenting it out for now - so that people can refer to the properties if required. Remove
-    // it once cpuset version is pushed out.
-    // The context is, default gc for server class machines end up using all cores to do gc - hence
-    // if there are multiple containers in same node, spark gc effects all other containers
-    // performance (which can also be other spark containers)
-    // Instead of using this, rely on cpusets by YARN to enforce spark behaves 'properly' in
-    // multi-tenant environments. Not sure how default java gc behaves if it is limited to subset
-    // of cores on a node.
-    /*
-        else {
-          // If no java_opts specified, default to using -XX:+CMSIncrementalMode
-          // It might be possible that other modes/config is being done in spark.executor.extraJavaOptions,
-          // so we dont want to mess with it.
-          // In our expts, using (default) throughput collector has severe perf ramnifications in
-          // multi-tennent machines
-          // The options are based on
-          // http://www.oracle.com/technetwork/java/gc-tuning-5-138395.html#0.0.0.%20When%20to%20Use%20the%20Concurrent%20Low%20Pause%20Collector|outline
-          javaOpts += " -XX:+UseConcMarkSweepGC "
-          javaOpts += " -XX:+CMSIncrementalMode "
-          javaOpts += " -XX:+CMSIncrementalPacing "
-          javaOpts += " -XX:CMSIncrementalDutyCycleMin=0 "
-          javaOpts += " -XX:CMSIncrementalDutyCycle=10 "
-        }
-    */
-
-    // For log4j configuration to reference
-    javaOpts += ("-Dspark.yarn.app.container.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR)
-
-    val commands = Seq(Environment.JAVA_HOME.$() + "/bin/java",
-      "-server",
-      // Kill if OOM is raised - leverage yarn's failure handling to cause rescheduling.
-      // Not killing the task leaves various aspects of the executor and (to some extent) the jvm in
-      // an inconsistent state.
-      // TODO: If the OOM is not recoverable by rescheduling it on different node, then do
-      // 'something' to fail job ... akin to blacklisting trackers in mapred ?
-      "-XX:OnOutOfMemoryError='kill %p'") ++
-      javaOpts ++
-      Seq("org.apache.spark.executor.CoarseGrainedExecutorBackend",
-      masterAddress.toString,
-      slaveId.toString,
-      hostname.toString,
-      executorCores.toString,
-      appId,
-      "1>", ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout",
-      "2>", ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")
-
-    // TODO: it would be nicer to just make sure there are no null commands here
-    commands.map(s => if (s == null) "null" else s).toList
-  }
-
-  private def setupDistributedCache(
-      file: String,
-      rtype: LocalResourceType,
-      localResources: HashMap[String, LocalResource],
-      timestamp: String,
-      size: String,
-      vis: String): Unit = {
-    val uri = new URI(file)
-    val amJarRsrc = Records.newRecord(classOf[LocalResource])
-    amJarRsrc.setType(rtype)
-    amJarRsrc.setVisibility(LocalResourceVisibility.valueOf(vis))
-    amJarRsrc.setResource(ConverterUtils.getYarnUrlFromURI(uri))
-    amJarRsrc.setTimestamp(timestamp.toLong)
-    amJarRsrc.setSize(size.toLong)
-    localResources(uri.getFragment()) = amJarRsrc
-  }
-
-  def prepareLocalResources: HashMap[String, LocalResource] = {
-    logInfo("Preparing Local resources")
-    val localResources = HashMap[String, LocalResource]()
-
-    if (System.getenv("SPARK_YARN_CACHE_FILES") != null) {
-      val timeStamps = System.getenv("SPARK_YARN_CACHE_FILES_TIME_STAMPS").split(',')
-      val fileSizes = System.getenv("SPARK_YARN_CACHE_FILES_FILE_SIZES").split(',')
-      val distFiles = System.getenv("SPARK_YARN_CACHE_FILES").split(',')
-      val visibilities = System.getenv("SPARK_YARN_CACHE_FILES_VISIBILITIES").split(',')
-      for( i <- 0 to distFiles.length - 1) {
-        setupDistributedCache(distFiles(i), LocalResourceType.FILE, localResources, timeStamps(i),
-          fileSizes(i), visibilities(i))
-      }
-    }
-
-    if (System.getenv("SPARK_YARN_CACHE_ARCHIVES") != null) {
-      val timeStamps = System.getenv("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS").split(',')
-      val fileSizes = System.getenv("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES").split(',')
-      val distArchives = System.getenv("SPARK_YARN_CACHE_ARCHIVES").split(',')
-      val visibilities = System.getenv("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES").split(',')
-      for( i <- 0 to distArchives.length - 1) {
-        setupDistributedCache(distArchives(i), LocalResourceType.ARCHIVE, localResources,
-          timeStamps(i), fileSizes(i), visibilities(i))
-      }
-    }
-
-    logInfo("Prepared Local resources " + localResources)
-    localResources
-  }
-
-  def prepareEnvironment: HashMap[String, String] = {
-    val env = new HashMap[String, String]()
-    val extraCp = sparkConf.getOption("spark.executor.extraClassPath")
-    ClientBase.populateClasspath(null, yarnConf, sparkConf, env, extraCp)
-
-    sparkConf.getExecutorEnv.foreach { case (key, value) =>
-      // This assumes each executor environment variable set here is a path
-      // This is kept for backward compatibility and consistency with hadoop
-      YarnSparkHadoopUtil.addPathToEnvironment(env, key, value)
-    }
-
-    // Keep this for backwards compatibility but users should move to the config
-    sys.env.get("SPARK_YARN_USER_ENV").foreach { userEnvs =>
-      YarnSparkHadoopUtil.setEnvFromInputString(env, userEnvs)
-    }
-
-    System.getenv().filterKeys(_.startsWith("SPARK")).foreach { case (k, v) => env(k) = v }
-    env
-  }
-
-}
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
deleted file mode 100644
index e1af8d5a74cb1..0000000000000
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ /dev/null
@@ -1,465 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.util.{List => JList}
-import java.util.concurrent._
-import java.util.concurrent.atomic.AtomicInteger
-
-import scala.collection.JavaConversions._
-import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse
-
-import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkEnv}
-import org.apache.spark.scheduler.{SplitInfo, TaskSchedulerImpl}
-import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
-
-import com.google.common.util.concurrent.ThreadFactoryBuilder
-import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
-
-object AllocationType extends Enumeration {
-  type AllocationType = Value
-  val HOST, RACK, ANY = Value
-}
-
-// TODO:
-// Too many params.
-// Needs to be mt-safe
-// Need to refactor this to make it 'cleaner' ... right now, all computation is reactive - should
-// make it more proactive and decoupled.
-
-// Note that right now, we assume all node asks as uniform in terms of capabilities and priority
-// Refer to http://developer.yahoo.com/blogs/hadoop/posts/2011/03/mapreduce-nextgen-scheduler/ for
-// more info on how we are requesting for containers.
-
-/**
- * Common code for the Yarn container allocator. Contains all the version-agnostic code to
- * manage container allocation for a running Spark application.
- */
-private[yarn] abstract class YarnAllocator(
-    conf: Configuration,
-    sparkConf: SparkConf,
-    appAttemptId: ApplicationAttemptId,
-    args: ApplicationMasterArguments,
-    preferredNodes: collection.Map[String, collection.Set[SplitInfo]],
-    securityMgr: SecurityManager)
-  extends Logging {
-
-  // These three are locked on allocatedHostToContainersMap. Complementary data structures
-  // allocatedHostToContainersMap : containers which are running : host, Set<containerid>
-  // allocatedContainerToHostMap: container to host mapping.
-  private val allocatedHostToContainersMap =
-    new HashMap[String, collection.mutable.Set[ContainerId]]()
-
-  private val allocatedContainerToHostMap = new HashMap[ContainerId, String]()
-
-  // allocatedRackCount is populated ONLY if allocation happens (or decremented if this is an
-  // allocated node)
-  // As with the two data structures above, tightly coupled with them, and to be locked on
-  // allocatedHostToContainersMap
-  private val allocatedRackCount = new HashMap[String, Int]()
-
-  // Containers to be released in next request to RM
-  private val releasedContainers = new ConcurrentHashMap[ContainerId, Boolean]
-
-  // Number of container requests that have been sent to, but not yet allocated by the
-  // ApplicationMaster.
-  private val numPendingAllocate = new AtomicInteger()
-  private val numExecutorsRunning = new AtomicInteger()
-  // Used to generate a unique id per executor
-  private val executorIdCounter = new AtomicInteger()
-  private val numExecutorsFailed = new AtomicInteger()
-
-  private val maxExecutors = args.numExecutors
-
-  protected val executorMemory = args.executorMemory
-  protected val executorCores = args.executorCores
-  protected val (preferredHostToCount, preferredRackToCount) =
-    generateNodeToWeight(conf, preferredNodes)
-
-  // Additional memory overhead - in mb.
-  protected val memoryOverhead: Int = sparkConf.getInt("spark.yarn.executor.memoryOverhead",
-    math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toInt, MEMORY_OVERHEAD_MIN))
-
-  private val launcherPool = new ThreadPoolExecutor(
-    // max pool size of Integer.MAX_VALUE is ignored because we use an unbounded queue
-    sparkConf.getInt("spark.yarn.containerLauncherMaxThreads", 25), Integer.MAX_VALUE,
-    1, TimeUnit.MINUTES,
-    new LinkedBlockingQueue[Runnable](),
-    new ThreadFactoryBuilder().setNameFormat("ContainerLauncher #%d").setDaemon(true).build())
-  launcherPool.allowCoreThreadTimeOut(true)
-
-  def getNumExecutorsRunning: Int = numExecutorsRunning.intValue
-
-  def getNumExecutorsFailed: Int = numExecutorsFailed.intValue
-
-  def allocateResources() = {
-    val missing = maxExecutors - numPendingAllocate.get() - numExecutorsRunning.get()
-
-    // this is needed by alpha, do it here since we add numPending right after this
-    val executorsPending = numPendingAllocate.get()
-    if (missing > 0) {
-      val totalExecutorMemory = executorMemory + memoryOverhead
-      numPendingAllocate.addAndGet(missing)
-      logInfo(s"Will allocate $missing executor containers, each with $totalExecutorMemory MB " + 
-        s"memory including $memoryOverhead MB overhead")
-    } else {
-      logDebug("Empty allocation request ...")
-    }
-
-    val allocateResponse = allocateContainers(missing, executorsPending)
-    val allocatedContainers = allocateResponse.getAllocatedContainers()
-
-    if (allocatedContainers.size > 0) {
-      var numPendingAllocateNow = numPendingAllocate.addAndGet(-1 * allocatedContainers.size)
-
-      if (numPendingAllocateNow < 0) {
-        numPendingAllocateNow = numPendingAllocate.addAndGet(-1 * numPendingAllocateNow)
-      }
-
-      logDebug("""
-        Allocated containers: %d
-        Current executor count: %d
-        Containers released: %s
-        Cluster resources: %s
-        """.format(
-          allocatedContainers.size,
-          numExecutorsRunning.get(),
-          releasedContainers,
-          allocateResponse.getAvailableResources))
-
-      val hostToContainers = new HashMap[String, ArrayBuffer[Container]]()
-
-      for (container <- allocatedContainers) {
-        if (isResourceConstraintSatisfied(container)) {
-          // Add the accepted `container` to the host's list of already accepted,
-          // allocated containers
-          val host = container.getNodeId.getHost
-          val containersForHost = hostToContainers.getOrElseUpdate(host,
-            new ArrayBuffer[Container]())
-          containersForHost += container
-        } else {
-          // Release container, since it doesn't satisfy resource constraints.
-          internalReleaseContainer(container)
-        }
-      }
-
-       // Find the appropriate containers to use.
-      // TODO: Cleanup this group-by...
-      val dataLocalContainers = new HashMap[String, ArrayBuffer[Container]]()
-      val rackLocalContainers = new HashMap[String, ArrayBuffer[Container]]()
-      val offRackContainers = new HashMap[String, ArrayBuffer[Container]]()
-
-      for (candidateHost <- hostToContainers.keySet) {
-        val maxExpectedHostCount = preferredHostToCount.getOrElse(candidateHost, 0)
-        val requiredHostCount = maxExpectedHostCount - allocatedContainersOnHost(candidateHost)
-
-        val remainingContainersOpt = hostToContainers.get(candidateHost)
-        assert(remainingContainersOpt.isDefined)
-        var remainingContainers = remainingContainersOpt.get
-
-        if (requiredHostCount >= remainingContainers.size) {
-          // Since we have <= required containers, add all remaining containers to
-          // `dataLocalContainers`.
-          dataLocalContainers.put(candidateHost, remainingContainers)
-          // There are no more free containers remaining.
-          remainingContainers = null
-        } else if (requiredHostCount > 0) {
-          // Container list has more containers than we need for data locality.
-          // Split the list into two: one based on the data local container count,
-          // (`remainingContainers.size` - `requiredHostCount`), and the other to hold remaining
-          // containers.
-          val (dataLocal, remaining) = remainingContainers.splitAt(
-            remainingContainers.size - requiredHostCount)
-          dataLocalContainers.put(candidateHost, dataLocal)
-
-          // Invariant: remainingContainers == remaining
-
-          // YARN has a nasty habit of allocating a ton of containers on a host - discourage this.
-          // Add each container in `remaining` to list of containers to release. If we have an
-          // insufficient number of containers, then the next allocation cycle will reallocate
-          // (but won't treat it as data local).
-          // TODO(harvey): Rephrase this comment some more.
-          for (container <- remaining) internalReleaseContainer(container)
-          remainingContainers = null
-        }
-
-        // For rack local containers
-        if (remainingContainers != null) {
-          val rack = YarnSparkHadoopUtil.lookupRack(conf, candidateHost)
-          if (rack != null) {
-            val maxExpectedRackCount = preferredRackToCount.getOrElse(rack, 0)
-            val requiredRackCount = maxExpectedRackCount - allocatedContainersOnRack(rack) -
-              rackLocalContainers.getOrElse(rack, List()).size
-
-            if (requiredRackCount >= remainingContainers.size) {
-              // Add all remaining containers to to `dataLocalContainers`.
-              dataLocalContainers.put(rack, remainingContainers)
-              remainingContainers = null
-            } else if (requiredRackCount > 0) {
-              // Container list has more containers that we need for data locality.
-              // Split the list into two: one based on the data local container count,
-              // (`remainingContainers.size` - `requiredHostCount`), and the other to hold remaining
-              // containers.
-              val (rackLocal, remaining) = remainingContainers.splitAt(
-                remainingContainers.size - requiredRackCount)
-              val existingRackLocal = rackLocalContainers.getOrElseUpdate(rack,
-                new ArrayBuffer[Container]())
-
-              existingRackLocal ++= rackLocal
-
-              remainingContainers = remaining
-            }
-          }
-        }
-
-        if (remainingContainers != null) {
-          // Not all containers have been consumed - add them to the list of off-rack containers.
-          offRackContainers.put(candidateHost, remainingContainers)
-        }
-      }
-
-      // Now that we have split the containers into various groups, go through them in order:
-      // first host-local, then rack-local, and finally off-rack.
-      // Note that the list we create below tries to ensure that not all containers end up within
-      // a host if there is a sufficiently large number of hosts/containers.
-      val allocatedContainersToProcess = new ArrayBuffer[Container](allocatedContainers.size)
-      allocatedContainersToProcess ++= TaskSchedulerImpl.prioritizeContainers(dataLocalContainers)
-      allocatedContainersToProcess ++= TaskSchedulerImpl.prioritizeContainers(rackLocalContainers)
-      allocatedContainersToProcess ++= TaskSchedulerImpl.prioritizeContainers(offRackContainers)
-
-      // Run each of the allocated containers.
-      for (container <- allocatedContainersToProcess) {
-        val numExecutorsRunningNow = numExecutorsRunning.incrementAndGet()
-        val executorHostname = container.getNodeId.getHost
-        val containerId = container.getId
-
-        val executorMemoryOverhead = (executorMemory + memoryOverhead)
-        assert(container.getResource.getMemory >= executorMemoryOverhead)
-
-        if (numExecutorsRunningNow > maxExecutors) {
-          logInfo("""Ignoring container %s at host %s, since we already have the required number of
-            containers for it.""".format(containerId, executorHostname))
-          internalReleaseContainer(container)
-          numExecutorsRunning.decrementAndGet()
-        } else {
-          val executorId = executorIdCounter.incrementAndGet().toString
-          val driverUrl = "akka.tcp://%s@%s:%s/user/%s".format(
-            SparkEnv.driverActorSystemName,
-            sparkConf.get("spark.driver.host"),
-            sparkConf.get("spark.driver.port"),
-            CoarseGrainedSchedulerBackend.ACTOR_NAME)
-
-          logInfo("Launching container %s for on host %s".format(containerId, executorHostname))
-
-          // To be safe, remove the container from `releasedContainers`.
-          releasedContainers.remove(containerId)
-
-          val rack = YarnSparkHadoopUtil.lookupRack(conf, executorHostname)
-          allocatedHostToContainersMap.synchronized {
-            val containerSet = allocatedHostToContainersMap.getOrElseUpdate(executorHostname,
-              new HashSet[ContainerId]())
-
-            containerSet += containerId
-            allocatedContainerToHostMap.put(containerId, executorHostname)
-
-            if (rack != null) {
-              allocatedRackCount.put(rack, allocatedRackCount.getOrElse(rack, 0) + 1)
-            }
-          }
-          logInfo("Launching ExecutorRunnable. driverUrl: %s,  executorHostname: %s".format(
-            driverUrl, executorHostname))
-          val executorRunnable = new ExecutorRunnable(
-            container,
-            conf,
-            sparkConf,
-            driverUrl,
-            executorId,
-            executorHostname,
-            executorMemory,
-            executorCores,
-            appAttemptId.getApplicationId.toString,
-            securityMgr)
-          launcherPool.execute(executorRunnable)
-        }
-      }
-      logDebug("""
-        Finished allocating %s containers (from %s originally).
-        Current number of executors running: %d,
-        Released containers: %s
-        """.format(
-          allocatedContainersToProcess,
-          allocatedContainers,
-          numExecutorsRunning.get(),
-          releasedContainers))
-    }
-
-    val completedContainers = allocateResponse.getCompletedContainersStatuses()
-    if (completedContainers.size > 0) {
-      logDebug("Completed %d containers".format(completedContainers.size))
-
-      for (completedContainer <- completedContainers) {
-        val containerId = completedContainer.getContainerId
-
-        if (releasedContainers.containsKey(containerId)) {
-          // YarnAllocationHandler already marked the container for release, so remove it from
-          // `releasedContainers`.
-          releasedContainers.remove(containerId)
-        } else {
-          // Decrement the number of executors running. The next iteration of
-          // the ApplicationMaster's reporting thread will take care of allocating.
-          numExecutorsRunning.decrementAndGet()
-          logInfo("Completed container %s (state: %s, exit status: %s)".format(
-            containerId,
-            completedContainer.getState,
-            completedContainer.getExitStatus()))
-          // Hadoop 2.2.X added a ContainerExitStatus we should switch to use
-          // there are some exit status' we shouldn't necessarily count against us, but for
-          // now I think its ok as none of the containers are expected to exit
-          if (completedContainer.getExitStatus() != 0) {
-            logInfo("Container marked as failed: " + containerId)
-            numExecutorsFailed.incrementAndGet()
-          }
-        }
-
-        allocatedHostToContainersMap.synchronized {
-          if (allocatedContainerToHostMap.containsKey(containerId)) {
-            val hostOpt = allocatedContainerToHostMap.get(containerId)
-            assert(hostOpt.isDefined)
-            val host = hostOpt.get
-
-            val containerSetOpt = allocatedHostToContainersMap.get(host)
-            assert(containerSetOpt.isDefined)
-            val containerSet = containerSetOpt.get
-
-            containerSet.remove(containerId)
-            if (containerSet.isEmpty) {
-              allocatedHostToContainersMap.remove(host)
-            } else {
-              allocatedHostToContainersMap.update(host, containerSet)
-            }
-
-            allocatedContainerToHostMap.remove(containerId)
-
-            // TODO: Move this part outside the synchronized block?
-            val rack = YarnSparkHadoopUtil.lookupRack(conf, host)
-            if (rack != null) {
-              val rackCount = allocatedRackCount.getOrElse(rack, 0) - 1
-              if (rackCount > 0) {
-                allocatedRackCount.put(rack, rackCount)
-              } else {
-                allocatedRackCount.remove(rack)
-              }
-            }
-          }
-        }
-      }
-      logDebug("""
-        Finished processing %d completed containers.
-        Current number of executors running: %d,
-        Released containers: %s
-        """.format(
-          completedContainers.size,
-          numExecutorsRunning.get(),
-          releasedContainers))
-    }
-  }
-
-  protected def allocatedContainersOnHost(host: String): Int = {
-    var retval = 0
-    allocatedHostToContainersMap.synchronized {
-      retval = allocatedHostToContainersMap.getOrElse(host, Set()).size
-    }
-    retval
-  }
-
-  protected def allocatedContainersOnRack(rack: String): Int = {
-    var retval = 0
-    allocatedHostToContainersMap.synchronized {
-      retval = allocatedRackCount.getOrElse(rack, 0)
-    }
-    retval
-  }
-
-  private def isResourceConstraintSatisfied(container: Container): Boolean = {
-    container.getResource.getMemory >= (executorMemory + memoryOverhead)
-  }
-
-  // A simple method to copy the split info map.
-  private def generateNodeToWeight(
-      conf: Configuration,
-      input: collection.Map[String, collection.Set[SplitInfo]]
-    ): (Map[String, Int], Map[String, Int]) = {
-
-    if (input == null) {
-      return (Map[String, Int](), Map[String, Int]())
-    }
-
-    val hostToCount = new HashMap[String, Int]
-    val rackToCount = new HashMap[String, Int]
-
-    for ((host, splits) <- input) {
-      val hostCount = hostToCount.getOrElse(host, 0)
-      hostToCount.put(host, hostCount + splits.size)
-
-      val rack = YarnSparkHadoopUtil.lookupRack(conf, host)
-      if (rack != null) {
-        val rackCount = rackToCount.getOrElse(host, 0)
-        rackToCount.put(host, rackCount + splits.size)
-      }
-    }
-
-    (hostToCount.toMap, rackToCount.toMap)
-  }
-
-  private def internalReleaseContainer(container: Container) = {
-    releasedContainers.put(container.getId(), true)
-    releaseContainer(container)
-  }
-
-  /**
-   * Called to allocate containers in the cluster.
-   *
-   * @param count Number of containers to allocate.
-   *              If zero, should still contact RM (as a heartbeat).
-   * @param pending Number of containers pending allocate. Only used on alpha.
-   * @return Response to the allocation request.
-   */
-  protected def allocateContainers(count: Int, pending: Int): YarnAllocateResponse
-
-  /** Called to release a previously allocated container. */
-  protected def releaseContainer(container: Container): Unit
-
-  /**
-   * Defines the interface for an allocate response from the RM. This is needed since the alpha
-   * and stable interfaces differ here in ways that cannot be fixed using other routes.
-   */
-  protected trait YarnAllocateResponse {
-
-    def getAllocatedContainers(): JList[Container]
-
-    def getAvailableResources(): Resource
-
-    def getCompletedContainersStatuses(): JList[ContainerStatus]
-
-  }
-
-}
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
deleted file mode 100644
index 2510b9c9cef68..0000000000000
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import scala.collection.{Map, Set}
-
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.api.records._
-
-import org.apache.spark.{SecurityManager, SparkConf, SparkContext}
-import org.apache.spark.scheduler.SplitInfo
-
-/**
- * Interface that defines a Yarn RM client. Abstracts away Yarn version-specific functionality that
- * is used by Spark's AM.
- */
-trait YarnRMClient {
-
-  /**
-   * Registers the application master with the RM.
-   *
-   * @param conf The Yarn configuration.
-   * @param sparkConf The Spark configuration.
-   * @param preferredNodeLocations Map with hints about where to allocate containers.
-   * @param uiAddress Address of the SparkUI.
-   * @param uiHistoryAddress Address of the application on the History Server.
-   */
-  def register(
-      conf: YarnConfiguration,
-      sparkConf: SparkConf,
-      preferredNodeLocations: Map[String, Set[SplitInfo]],
-      uiAddress: String,
-      uiHistoryAddress: String,
-      securityMgr: SecurityManager): YarnAllocator
-
-  /**
-   * Unregister the AM. Guaranteed to only be called once.
-   *
-   * @param status The final status of the AM.
-   * @param diagnostics Diagnostics message to include in the final status.
-   */
-  def unregister(status: FinalApplicationStatus, diagnostics: String = ""): Unit
-
-  /** Returns the attempt ID. */
-  def getAttemptId(): ApplicationAttemptId
-
-  /** Returns the configuration for the AmIpFilter to add to the Spark UI. */
-  def getAmIpFilterParams(conf: YarnConfiguration, proxyBase: String): Map[String, String]
-
-  /** Returns the maximum number of attempts to register the AM. */
-  def getMaxRegAttempts(conf: YarnConfiguration): Int
-
-}
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
deleted file mode 100644
index e1e0144f46fe9..0000000000000
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.lang.{Boolean => JBoolean}
-import java.io.File
-import java.util.{Collections, Set => JSet}
-import java.util.regex.Matcher
-import java.util.regex.Pattern
-import java.util.concurrent.ConcurrentHashMap
-
-import scala.collection.mutable.HashMap
-
-import org.apache.hadoop.io.Text
-import org.apache.hadoop.mapred.JobConf
-import org.apache.hadoop.security.Credentials
-import org.apache.hadoop.security.UserGroupInformation
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.api.records.ApplicationAccessType
-import org.apache.hadoop.yarn.util.RackResolver
-import org.apache.hadoop.conf.Configuration
-
-import org.apache.spark.{SecurityManager, SparkConf}
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.util.Utils
-
-/**
- * Contains util methods to interact with Hadoop from spark.
- */
-class YarnSparkHadoopUtil extends SparkHadoopUtil {
-
-  override def transferCredentials(source: UserGroupInformation, dest: UserGroupInformation) {
-    dest.addCredentials(source.getCredentials())
-  }
-
-  // Note that all params which start with SPARK are propagated all the way through, so if in yarn mode, this MUST be set to true.
-  override def isYarnMode(): Boolean = { true }
-
-  // Return an appropriate (subclass) of Configuration. Creating config can initializes some hadoop subsystems
-  // Always create a new config, dont reuse yarnConf.
-  override def newConfiguration(conf: SparkConf): Configuration =
-    new YarnConfiguration(super.newConfiguration(conf))
-
-  // add any user credentials to the job conf which are necessary for running on a secure Hadoop cluster
-  override def addCredentials(conf: JobConf) {
-    val jobCreds = conf.getCredentials()
-    jobCreds.mergeAll(UserGroupInformation.getCurrentUser().getCredentials())
-  }
-
-  override def getCurrentUserCredentials(): Credentials = {
-    UserGroupInformation.getCurrentUser().getCredentials()
-  }
-
-  override def addCurrentUserCredentials(creds: Credentials) {
-    UserGroupInformation.getCurrentUser().addCredentials(creds)
-  }
-
-  override def addSecretKeyToUserCredentials(key: String, secret: String) {
-    val creds = new Credentials()
-    creds.addSecretKey(new Text(key), secret.getBytes("utf-8"))
-    addCurrentUserCredentials(creds)
-  }
-
-  override def getSecretKeyFromUserCredentials(key: String): Array[Byte] = {
-    val credentials = getCurrentUserCredentials()
-    if (credentials != null) credentials.getSecretKey(new Text(key)) else null
-  }
-
-}
-
-object YarnSparkHadoopUtil {
-  // Additional memory overhead 
-  // 7% was arrived at experimentally. In the interest of minimizing memory waste while covering
-  // the common cases. Memory overhead tends to grow with container size. 
-
-  val MEMORY_OVERHEAD_FACTOR = 0.07
-  val MEMORY_OVERHEAD_MIN = 384
-
-  val ANY_HOST = "*"
-
-  // All RM requests are issued with same priority : we do not (yet) have any distinction between
-  // request types (like map/reduce in hadoop for example)
-  val RM_REQUEST_PRIORITY = 1
-
-  // Host to rack map - saved from allocation requests. We are expecting this not to change.
-  // Note that it is possible for this to change : and ResourceManager will indicate that to us via
-  // update response to allocate. But we are punting on handling that for now.
-  private val hostToRack = new ConcurrentHashMap[String, String]()
-  private val rackToHostSet = new ConcurrentHashMap[String, JSet[String]]()
-
-  /**
-   * Add a path variable to the given environment map.
-   * If the map already contains this key, append the value to the existing value instead.
-   */
-  def addPathToEnvironment(env: HashMap[String, String], key: String, value: String): Unit = {
-    val newValue = if (env.contains(key)) { env(key) + File.pathSeparator + value } else value
-    env.put(key, newValue)
-  }
-
-  /**
-   * Set zero or more environment variables specified by the given input string.
-   * The input string is expected to take the form "KEY1=VAL1,KEY2=VAL2,KEY3=VAL3".
-   */
-  def setEnvFromInputString(env: HashMap[String, String], inputString: String): Unit = {
-    if (inputString != null && inputString.length() > 0) {
-      val childEnvs = inputString.split(",")
-      val p = Pattern.compile(environmentVariableRegex)
-      for (cEnv <- childEnvs) {
-        val parts = cEnv.split("=") // split on '='
-        val m = p.matcher(parts(1))
-        val sb = new StringBuffer
-        while (m.find()) {
-          val variable = m.group(1)
-          var replace = ""
-          if (env.get(variable) != None) {
-            replace = env.get(variable).get
-          } else {
-            // if this key is not configured for the child .. get it from the env
-            replace = System.getenv(variable)
-            if (replace == null) {
-            // the env key is note present anywhere .. simply set it
-              replace = ""
-            }
-          }
-          m.appendReplacement(sb, Matcher.quoteReplacement(replace))
-        }
-        m.appendTail(sb)
-        // This treats the environment variable as path variable delimited by `File.pathSeparator`
-        // This is kept for backward compatibility and consistency with Hadoop's behavior
-        addPathToEnvironment(env, parts(0), sb.toString)
-      }
-    }
-  }
-
-  private val environmentVariableRegex: String = {
-    if (Utils.isWindows) {
-      "%([A-Za-z_][A-Za-z0-9_]*?)%"
-    } else {
-      "\\$([A-Za-z_][A-Za-z0-9_]*)"
-    }
-  }
-
-  /**
-   * Escapes a string for inclusion in a command line executed by Yarn. Yarn executes commands
-   * using `bash -c "command arg1 arg2"` and that means plain quoting doesn't really work. The
-   * argument is enclosed in single quotes and some key characters are escaped.
-   *
-   * @param arg A single argument.
-   * @return Argument quoted for execution via Yarn's generated shell script.
-   */
-  def escapeForShell(arg: String): String = {
-    if (arg != null) {
-      val escaped = new StringBuilder("'")
-      for (i <- 0 to arg.length() - 1) {
-        arg.charAt(i) match {
-          case '$' => escaped.append("\\$")
-          case '"' => escaped.append("\\\"")
-          case '\'' => escaped.append("'\\''")
-          case c => escaped.append(c)
-        }
-      }
-      escaped.append("'").toString()
-    } else {
-      arg
-    }
-  }
-
-  def lookupRack(conf: Configuration, host: String): String = {
-    if (!hostToRack.contains(host)) {
-      populateRackInfo(conf, host)
-    }
-    hostToRack.get(host)
-  }
-
-  def populateRackInfo(conf: Configuration, hostname: String) {
-    Utils.checkHost(hostname)
-
-    if (!hostToRack.containsKey(hostname)) {
-      // If there are repeated failures to resolve, all to an ignore list.
-      val rackInfo = RackResolver.resolve(conf, hostname)
-      if (rackInfo != null && rackInfo.getNetworkLocation != null) {
-        val rack = rackInfo.getNetworkLocation
-        hostToRack.put(hostname, rack)
-        if (! rackToHostSet.containsKey(rack)) {
-          rackToHostSet.putIfAbsent(rack,
-            Collections.newSetFromMap(new ConcurrentHashMap[String, JBoolean]()))
-        }
-        rackToHostSet.get(rack).add(hostname)
-
-        // TODO(harvey): Figure out what this comment means...
-        // Since RackResolver caches, we are disabling this for now ...
-      } /* else {
-        // right ? Else we will keep calling rack resolver in case we cant resolve rack info ...
-        hostToRack.put(hostname, null)
-      } */
-    }
-  }
-
-  def getApplicationAclsForYarn(securityMgr: SecurityManager)
-      : Map[ApplicationAccessType, String] = {
-    Map[ApplicationAccessType, String] (
-      ApplicationAccessType.VIEW_APP -> securityMgr.getViewAcls,
-      ApplicationAccessType.MODIFY_APP -> securityMgr.getModifyAcls
-    )
-  }
-
-}
diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
deleted file mode 100644
index 254774a6b839e..0000000000000
--- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler.cluster
-
-import org.apache.spark._
-import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil
-import org.apache.spark.scheduler.TaskSchedulerImpl
-import org.apache.spark.util.Utils
-
-/**
- * This scheduler launches executors through Yarn - by calling into Client to launch the Spark AM.
- */
-private[spark] class YarnClientClusterScheduler(sc: SparkContext) extends TaskSchedulerImpl(sc) {
-
-  // By default, rack is unknown
-  override def getRackForHost(hostPort: String): Option[String] = {
-    val host = Utils.parseHostPort(hostPort)._1
-    Option(YarnSparkHadoopUtil.lookupRack(sc.hadoopConfiguration, host))
-  }
-}
diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
deleted file mode 100644
index 4157ff95c2794..0000000000000
--- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler.cluster
-
-import org.apache.spark._
-import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnSparkHadoopUtil}
-import org.apache.spark.scheduler.TaskSchedulerImpl
-import org.apache.spark.util.Utils
-
-/**
- * This is a simple extension to ClusterScheduler - to ensure that appropriate initialization of
- * ApplicationMaster, etc is done
- */
-private[spark] class YarnClusterScheduler(sc: SparkContext) extends TaskSchedulerImpl(sc) {
-
-  logInfo("Created YarnClusterScheduler")
-
-  // Nothing else for now ... initialize application master : which needs a SparkContext to
-  // determine how to allocate.
-  // Note that only the first creation of a SparkContext influences (and ideally, there must be
-  // only one SparkContext, right ?). Subsequent creations are ignored since executors are already
-  // allocated by then.
-
-  // By default, rack is unknown
-  override def getRackForHost(hostPort: String): Option[String] = {
-    val host = Utils.parseHostPort(hostPort)._1
-    Option(YarnSparkHadoopUtil.lookupRack(sc.hadoopConfiguration, host))
-  }
-
-  override def postStartHook() {
-    ApplicationMaster.sparkContextInitialized(sc)
-    super.postStartHook()
-    logInfo("YarnClusterScheduler.postStartHook done")
-  }
-
-  override def stop() {
-    super.stop()
-    ApplicationMaster.sparkContextStopped(sc)
-  }
-
-}
diff --git a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala
deleted file mode 100644
index 17b79ae1d82c4..0000000000000
--- a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.io.File
-import java.net.URI
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.mapreduce.MRJobConfig
-import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.mockito.Matchers._
-import org.mockito.Mockito._
-
-
-import org.scalatest.FunSuite
-import org.scalatest.Matchers
-
-import scala.collection.JavaConversions._
-import scala.collection.mutable.{ HashMap => MutableHashMap }
-import scala.reflect.ClassTag
-import scala.util.Try
-
-import org.apache.spark.{SparkException, SparkConf}
-import org.apache.spark.util.Utils
-
-class ClientBaseSuite extends FunSuite with Matchers {
-
-  test("default Yarn application classpath") {
-    ClientBase.getDefaultYarnApplicationClasspath should be(Some(Fixtures.knownDefYarnAppCP))
-  }
-
-  test("default MR application classpath") {
-    ClientBase.getDefaultMRApplicationClasspath should be(Some(Fixtures.knownDefMRAppCP))
-  }
-
-  test("resultant classpath for an application that defines a classpath for YARN") {
-    withAppConf(Fixtures.mapYARNAppConf) { conf =>
-      val env = newEnv
-      ClientBase.populateHadoopClasspath(conf, env)
-      classpath(env) should be(
-        flatten(Fixtures.knownYARNAppCP, ClientBase.getDefaultMRApplicationClasspath))
-    }
-  }
-
-  test("resultant classpath for an application that defines a classpath for MR") {
-    withAppConf(Fixtures.mapMRAppConf) { conf =>
-      val env = newEnv
-      ClientBase.populateHadoopClasspath(conf, env)
-      classpath(env) should be(
-        flatten(ClientBase.getDefaultYarnApplicationClasspath, Fixtures.knownMRAppCP))
-    }
-  }
-
-  test("resultant classpath for an application that defines both classpaths, YARN and MR") {
-    withAppConf(Fixtures.mapAppConf) { conf =>
-      val env = newEnv
-      ClientBase.populateHadoopClasspath(conf, env)
-      classpath(env) should be(flatten(Fixtures.knownYARNAppCP, Fixtures.knownMRAppCP))
-    }
-  }
-
-  private val SPARK = "local:/sparkJar"
-  private val USER = "local:/userJar"
-  private val ADDED = "local:/addJar1,local:/addJar2,/addJar3"
-
-  test("Local jar URIs") {
-    val conf = new Configuration()
-    val sparkConf = new SparkConf().set(ClientBase.CONF_SPARK_JAR, SPARK)
-    val env = new MutableHashMap[String, String]()
-    val args = new ClientArguments(Array("--jar", USER, "--addJars", ADDED), sparkConf)
-
-    ClientBase.populateClasspath(args, conf, sparkConf, env)
-
-    val cp = env("CLASSPATH").split(File.pathSeparator)
-    s"$SPARK,$USER,$ADDED".split(",").foreach({ entry =>
-      val uri = new URI(entry)
-      if (ClientBase.LOCAL_SCHEME.equals(uri.getScheme())) {
-        cp should contain (uri.getPath())
-      } else {
-        cp should not contain (uri.getPath())
-      }
-    })
-    cp should contain (Environment.PWD.$())
-    cp should contain (s"${Environment.PWD.$()}${File.separator}*")
-    cp should not contain (ClientBase.SPARK_JAR)
-    cp should not contain (ClientBase.APP_JAR)
-  }
-
-  test("Jar path propagation through SparkConf") {
-    val conf = new Configuration()
-    val sparkConf = new SparkConf().set(ClientBase.CONF_SPARK_JAR, SPARK)
-    val yarnConf = new YarnConfiguration()
-    val args = new ClientArguments(Array("--jar", USER, "--addJars", ADDED), sparkConf)
-
-    val client = spy(new DummyClient(args, conf, sparkConf, yarnConf))
-    doReturn(new Path("/")).when(client).copyFileToRemote(any(classOf[Path]),
-      any(classOf[Path]), anyShort(), anyBoolean())
-
-    val tempDir = Utils.createTempDir()
-    try {
-      client.prepareLocalResources(tempDir.getAbsolutePath())
-      sparkConf.getOption(ClientBase.CONF_SPARK_USER_JAR) should be (Some(USER))
-
-      // The non-local path should be propagated by name only, since it will end up in the app's
-      // staging dir.
-      val expected = ADDED.split(",")
-        .map(p => {
-          val uri = new URI(p)
-          if (ClientBase.LOCAL_SCHEME == uri.getScheme()) {
-            p
-          } else {
-            Option(uri.getFragment()).getOrElse(new File(p).getName())
-          }
-        })
-        .mkString(",")
-
-      sparkConf.getOption(ClientBase.CONF_SPARK_YARN_SECONDARY_JARS) should be (Some(expected))
-    } finally {
-      Utils.deleteRecursively(tempDir)
-    }
-  }
-
-  test("check access nns empty") {
-    val sparkConf = new SparkConf()
-    sparkConf.set("spark.yarn.access.namenodes", "")
-    val nns = ClientBase.getNameNodesToAccess(sparkConf)
-    nns should be(Set())
-  }
-
-  test("check access nns unset") {
-    val sparkConf = new SparkConf()
-    val nns = ClientBase.getNameNodesToAccess(sparkConf)
-    nns should be(Set())
-  }
-
-  test("check access nns") {
-    val sparkConf = new SparkConf()
-    sparkConf.set("spark.yarn.access.namenodes", "hdfs://nn1:8032")
-    val nns = ClientBase.getNameNodesToAccess(sparkConf)
-    nns should be(Set(new Path("hdfs://nn1:8032")))
-  }
-
-  test("check access nns space") {
-    val sparkConf = new SparkConf()
-    sparkConf.set("spark.yarn.access.namenodes", "hdfs://nn1:8032, ")
-    val nns = ClientBase.getNameNodesToAccess(sparkConf)
-    nns should be(Set(new Path("hdfs://nn1:8032")))
-  }
-
-  test("check access two nns") {
-    val sparkConf = new SparkConf()
-    sparkConf.set("spark.yarn.access.namenodes", "hdfs://nn1:8032,hdfs://nn2:8032")
-    val nns = ClientBase.getNameNodesToAccess(sparkConf)
-    nns should be(Set(new Path("hdfs://nn1:8032"), new Path("hdfs://nn2:8032")))
-  }
-
-  test("check token renewer") {
-    val hadoopConf = new Configuration()
-    hadoopConf.set("yarn.resourcemanager.address", "myrm:8033")
-    hadoopConf.set("yarn.resourcemanager.principal", "yarn/myrm:8032@SPARKTEST.COM")
-    val renewer = ClientBase.getTokenRenewer(hadoopConf)
-    renewer should be ("yarn/myrm:8032@SPARKTEST.COM")
-  }
-
-  test("check token renewer default") {
-    val hadoopConf = new Configuration()
-    val caught =
-      intercept[SparkException] {
-        ClientBase.getTokenRenewer(hadoopConf)
-      }
-    assert(caught.getMessage === "Can't get Master Kerberos principal for use as renewer")
-  }
-
-  object Fixtures {
-
-    val knownDefYarnAppCP: Seq[String] =
-      getFieldValue[Array[String], Seq[String]](classOf[YarnConfiguration],
-                                                "DEFAULT_YARN_APPLICATION_CLASSPATH",
-                                                Seq[String]())(a => a.toSeq)
-
-
-    val knownDefMRAppCP: Seq[String] =
-      getFieldValue2[String, Array[String], Seq[String]](
-        classOf[MRJobConfig],
-        "DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH",
-        Seq[String]())(a => a.split(","))(a => a.toSeq)
-
-    val knownYARNAppCP = Some(Seq("/known/yarn/path"))
-
-    val knownMRAppCP = Some(Seq("/known/mr/path"))
-
-    val mapMRAppConf =
-      Map("mapreduce.application.classpath" -> knownMRAppCP.map(_.mkString(":")).get)
-
-    val mapYARNAppConf =
-      Map(YarnConfiguration.YARN_APPLICATION_CLASSPATH -> knownYARNAppCP.map(_.mkString(":")).get)
-
-    val mapAppConf = mapYARNAppConf ++ mapMRAppConf
-  }
-
-  def withAppConf(m: Map[String, String] = Map())(testCode: (Configuration) => Any) {
-    val conf = new Configuration
-    m.foreach { case (k, v) => conf.set(k, v, "ClientBaseSpec") }
-    testCode(conf)
-  }
-
-  def newEnv = MutableHashMap[String, String]()
-
-  def classpath(env: MutableHashMap[String, String]) = env(Environment.CLASSPATH.name).split(":|;")
-
-  def flatten(a: Option[Seq[String]], b: Option[Seq[String]]) = (a ++ b).flatten.toArray
-
-  def getFieldValue[A, B](clazz: Class[_], field: String, defaults: => B)(mapTo: A => B): B =
-    Try(clazz.getField(field)).map(_.get(null).asInstanceOf[A]).toOption.map(mapTo).getOrElse(defaults)
-
-  def getFieldValue2[A: ClassTag, A1: ClassTag, B](
-        clazz: Class[_],
-        field: String,
-        defaults: => B)(mapTo:  A => B)(mapTo1: A1 => B) : B = {
-    Try(clazz.getField(field)).map(_.get(null)).map {
-      case v: A => mapTo(v)
-      case v1: A1 => mapTo1(v1)
-      case _ => defaults
-    }.toOption.getOrElse(defaults)
-  }
-
-  private class DummyClient(
-      val args: ClientArguments,
-      val hadoopConf: Configuration,
-      val sparkConf: SparkConf,
-      val yarnConf: YarnConfiguration) extends ClientBase {
-    override def setupSecurityToken(amContainer: ContainerLaunchContext): Unit = ???
-    override def submitApplication(): ApplicationId = ???
-    override def getApplicationReport(appId: ApplicationId): ApplicationReport = ???
-    override def getClientToken(report: ApplicationReport): String = ???
-  }
-
-}
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 8a7035c85e9f1..65344aa8738e0 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,14 +20,14 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>yarn-parent_2.10</artifactId>
-  <packaging>pom</packaging>
-  <name>Spark Project YARN Parent POM</name>
+  <artifactId>spark-yarn_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project YARN</name>
   <properties>
     <sbt.project.name>yarn</sbt.project.name>
   </properties>
@@ -58,9 +58,38 @@
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-client</artifactId>
     </dependency>
+
+    <!-- Explicit listing of transitive deps that are shaded. Otherwise, odd compiler crashes. -->
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-server</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-plus</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-util</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-http</artifactId>
+    </dependency>
     <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-servlet</artifactId>
+    </dependency>
+    <!-- End of shaded deps. -->
+
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-server-tests</artifactId>
+      <classifier>tests</classifier>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -70,89 +99,60 @@
     </dependency>
   </dependencies>
 
+  <!--
+    See SPARK-3710. hadoop-yarn-server-tests in Hadoop 2.2 fails to pull some needed
+    dependencies, so they need to be added manually for the tests to work.
+  -->
   <profiles>
     <profile>
-      <id>yarn-alpha</id>
-      <modules>
-        <module>alpha</module>
-      </modules>
-    </profile>
-
-    <profile>
-      <id>yarn</id>
-      <modules>
-        <module>stable</module>
-      </modules>
+      <id>hadoop-2.2</id>
+      <properties>
+        <jersey.version>1.9</jersey.version>
+      </properties>
+      <dependencies>
+        <dependency>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jetty</artifactId>
+          <version>6.1.26</version>
+          <exclusions>
+            <exclusion>
+              <groupId>org.mortbay.jetty</groupId>
+              <artifactId>servlet-api</artifactId>
+            </exclusion>
+          </exclusions>
+          <scope>test</scope>
+        </dependency>
+        <dependency>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-core</artifactId>
+          <version>${jersey.version}</version>
+          <scope>test</scope>
+        </dependency>
+        <dependency>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-json</artifactId>
+          <version>${jersey.version}</version>
+          <scope>test</scope>
+          <exclusions>
+            <exclusion>
+              <groupId>stax</groupId>
+              <artifactId>stax-api</artifactId>
+            </exclusion>
+          </exclusions>
+        </dependency>
+        <dependency>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-server</artifactId>
+          <version>${jersey.version}</version>
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
     </profile>
   </profiles>
 
   <build>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-deploy-plugin</artifactId>
-        <configuration>
-          <skip>true</skip>
-        </configuration>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-install-plugin</artifactId>
-        <configuration>
-          <skip>true</skip>
-        </configuration>
-      </plugin>
-      <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>build-helper-maven-plugin</artifactId>
-        <executions>
-          <execution>
-           <id>add-scala-sources</id>
-            <phase>generate-sources</phase>
-            <goals>
-              <goal>add-source</goal>
-            </goals>
-            <configuration>
-              <sources>
-                <source>src/main/scala</source>
-                <source>../common/src/main/scala</source>
-              </sources>
-            </configuration>
-          </execution>
-          <execution>
-            <id>add-scala-test-sources</id>
-            <phase>generate-test-sources</phase>
-            <goals>
-              <goal>add-test-source</goal>
-            </goals>
-            <configuration>
-              <sources>
-                <source>src/test/scala</source>
-                <source>../common/src/test/scala</source>
-              </sources>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-        <configuration>
-          <environmentVariables>
-            <SPARK_HOME>${basedir}/../..</SPARK_HOME>
-          </environmentVariables>
-        </configuration>
-      </plugin>
-    </plugins>
-
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-
-    <resources>
-      <resource>
-        <directory>../common/src/main/resources</directory>
-      </resource>
-    </resources>
   </build>
 
 </project>
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
new file mode 100644
index 0000000000000..37e98e01fddf7
--- /dev/null
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -0,0 +1,588 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import scala.util.control.NonFatal
+
+import java.io.IOException
+import java.lang.reflect.InvocationTargetException
+import java.net.Socket
+import java.util.concurrent.atomic.AtomicReference
+
+import akka.actor._
+import akka.remote._
+import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.util.ShutdownHookManager
+import org.apache.hadoop.yarn.api._
+import org.apache.hadoop.yarn.api.records._
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+
+import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext, SparkEnv}
+import org.apache.spark.SparkException
+import org.apache.spark.deploy.{PythonRunner, SparkHadoopUtil}
+import org.apache.spark.deploy.history.HistoryServer
+import org.apache.spark.scheduler.cluster.YarnSchedulerBackend
+import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
+import org.apache.spark.util.{AkkaUtils, SignalLogger, Utils}
+
+/**
+ * Common application master functionality for Spark on Yarn.
+ */
+private[spark] class ApplicationMaster(
+    args: ApplicationMasterArguments,
+    client: YarnRMClient)
+  extends Logging {
+
+  // TODO: Currently, task to container is computed once (TaskSetManager) - which need not be
+  // optimal as more containers are available. Might need to handle this better.
+
+  private val sparkConf = new SparkConf()
+  private val yarnConf: YarnConfiguration = SparkHadoopUtil.get.newConfiguration(sparkConf)
+    .asInstanceOf[YarnConfiguration]
+  private val isDriver = args.userClass != null
+
+  // Default to numExecutors * 2, with minimum of 3
+  private val maxNumExecutorFailures = sparkConf.getInt("spark.yarn.max.executor.failures",
+    sparkConf.getInt("spark.yarn.max.worker.failures", math.max(args.numExecutors * 2, 3)))
+
+  @volatile private var exitCode = 0
+  @volatile private var unregistered = false
+  @volatile private var finished = false
+  @volatile private var finalStatus = getDefaultFinalStatus
+  @volatile private var finalMsg: String = ""
+  @volatile private var userClassThread: Thread = _
+
+  private var reporterThread: Thread = _
+  private var allocator: YarnAllocator = _
+
+  // Fields used in client mode.
+  private var actorSystem: ActorSystem = null
+  private var actor: ActorRef = _
+
+  // Fields used in cluster mode.
+  private val sparkContextRef = new AtomicReference[SparkContext](null)
+
+  final def run(): Int = {
+    try {
+      val appAttemptId = client.getAttemptId()
+
+      if (isDriver) {
+        // Set the web ui port to be ephemeral for yarn so we don't conflict with
+        // other spark processes running on the same box
+        System.setProperty("spark.ui.port", "0")
+
+        // Set the master property to match the requested mode.
+        System.setProperty("spark.master", "yarn-cluster")
+
+        // Propagate the application ID so that YarnClusterSchedulerBackend can pick it up.
+        System.setProperty("spark.yarn.app.id", appAttemptId.getApplicationId().toString())
+      }
+
+      logInfo("ApplicationAttemptId: " + appAttemptId)
+
+      val fs = FileSystem.get(yarnConf)
+      val cleanupHook = new Runnable {
+        override def run() {
+          // If the SparkContext is still registered, shut it down as a best case effort in case
+          // users do not call sc.stop or do System.exit().
+          val sc = sparkContextRef.get()
+          if (sc != null) {
+            logInfo("Invoking sc stop from shutdown hook")
+            sc.stop()
+          }
+          val maxAppAttempts = client.getMaxRegAttempts(sparkConf, yarnConf)
+          val isLastAttempt = client.getAttemptId().getAttemptId() >= maxAppAttempts
+
+          if (!finished) {
+            // This happens when the user application calls System.exit(). We have the choice
+            // of either failing or succeeding at this point. We report success to avoid
+            // retrying applications that have succeeded (System.exit(0)), which means that
+            // applications that explicitly exit with a non-zero status will also show up as
+            // succeeded in the RM UI.
+            finish(finalStatus,
+              ApplicationMaster.EXIT_SUCCESS,
+              "Shutdown hook called before final status was reported.")
+          }
+
+          if (!unregistered) {
+            // we only want to unregister if we don't want the RM to retry
+            if (finalStatus == FinalApplicationStatus.SUCCEEDED || isLastAttempt) {
+              unregister(finalStatus, finalMsg)
+              cleanupStagingDir(fs)
+            }
+          }
+        }
+      }
+
+      // Use higher priority than FileSystem.
+      assert(ApplicationMaster.SHUTDOWN_HOOK_PRIORITY > FileSystem.SHUTDOWN_HOOK_PRIORITY)
+      ShutdownHookManager
+        .get().addShutdownHook(cleanupHook, ApplicationMaster.SHUTDOWN_HOOK_PRIORITY)
+
+      // Call this to force generation of secret so it gets populated into the
+      // Hadoop UGI. This has to happen before the startUserApplication which does a
+      // doAs in order for the credentials to be passed on to the executor containers.
+      val securityMgr = new SecurityManager(sparkConf)
+
+      if (isDriver) {
+        runDriver(securityMgr)
+      } else {
+        runExecutorLauncher(securityMgr)
+      }
+    } catch {
+      case e: Exception =>
+        // catch everything else if not specifically handled
+        logError("Uncaught exception: ", e)
+        finish(FinalApplicationStatus.FAILED,
+          ApplicationMaster.EXIT_UNCAUGHT_EXCEPTION,
+          "Uncaught exception: " + e.getMessage())
+    }
+    exitCode
+  }
+
+  /**
+   * Set the default final application status for client mode to UNDEFINED to handle
+   * if YARN HA restarts the application so that it properly retries. Set the final
+   * status to SUCCEEDED in cluster mode to handle if the user calls System.exit
+   * from the application code.
+   */
+  final def getDefaultFinalStatus() = {
+    if (isDriver) {
+      FinalApplicationStatus.SUCCEEDED
+    } else {
+      FinalApplicationStatus.UNDEFINED
+    }
+  }
+
+  /**
+   * unregister is used to completely unregister the application from the ResourceManager.
+   * This means the ResourceManager will not retry the application attempt on your behalf if
+   * a failure occurred.
+   */
+  final def unregister(status: FinalApplicationStatus, diagnostics: String = null) = synchronized {
+    if (!unregistered) {
+      logInfo(s"Unregistering ApplicationMaster with $status" +
+        Option(diagnostics).map(msg => s" (diag message: $msg)").getOrElse(""))
+      unregistered = true
+      client.unregister(status, Option(diagnostics).getOrElse(""))
+    }
+  }
+
+  final def finish(status: FinalApplicationStatus, code: Int, msg: String = null) = synchronized {
+    if (!finished) {
+      val inShutdown = Utils.inShutdown()
+      logInfo(s"Final app status: ${status}, exitCode: ${code}" +
+        Option(msg).map(msg => s", (reason: $msg)").getOrElse(""))
+      exitCode = code
+      finalStatus = status
+      finalMsg = msg
+      finished = true
+      if (!inShutdown && Thread.currentThread() != reporterThread && reporterThread != null) {
+        logDebug("shutting down reporter thread")
+        reporterThread.interrupt()
+      }
+      if (!inShutdown && Thread.currentThread() != userClassThread && userClassThread != null) {
+        logDebug("shutting down user thread")
+        userClassThread.interrupt()
+      }
+    }
+  }
+
+  private def sparkContextInitialized(sc: SparkContext) = {
+    sparkContextRef.synchronized {
+      sparkContextRef.compareAndSet(null, sc)
+      sparkContextRef.notifyAll()
+    }
+  }
+
+  private def sparkContextStopped(sc: SparkContext) = {
+    sparkContextRef.compareAndSet(sc, null)
+  }
+
+  private def registerAM(uiAddress: String, securityMgr: SecurityManager) = {
+    val sc = sparkContextRef.get()
+
+    val appId = client.getAttemptId().getApplicationId().toString()
+    val historyAddress =
+      sparkConf.getOption("spark.yarn.historyServer.address")
+        .map { address => s"${address}${HistoryServer.UI_PATH_PREFIX}/${appId}" }
+        .getOrElse("")
+
+    allocator = client.register(yarnConf,
+      if (sc != null) sc.getConf else sparkConf,
+      if (sc != null) sc.preferredNodeLocationData else Map(),
+      uiAddress,
+      historyAddress,
+      securityMgr)
+
+    allocator.allocateResources()
+    reporterThread = launchReporterThread()
+  }
+
+  /**
+   * Create an actor that communicates with the driver.
+   *
+   * In cluster mode, the AM and the driver belong to same process
+   * so the AM actor need not monitor lifecycle of the driver.
+   */
+  private def runAMActor(
+      host: String,
+      port: String,
+      isDriver: Boolean): Unit = {
+    
+    val driverUrl = AkkaUtils.address(
+      AkkaUtils.protocol(actorSystem),
+      SparkEnv.driverActorSystemName,
+      host,
+      port,
+      YarnSchedulerBackend.ACTOR_NAME)
+    actor = actorSystem.actorOf(Props(new AMActor(driverUrl, isDriver)), name = "YarnAM")
+  }
+
+  private def runDriver(securityMgr: SecurityManager): Unit = {
+    addAmIpFilter()
+    userClassThread = startUserApplication()
+
+    // This a bit hacky, but we need to wait until the spark.driver.port property has
+    // been set by the Thread executing the user class.
+    val sc = waitForSparkContextInitialized()
+
+    // If there is no SparkContext at this point, just fail the app.
+    if (sc == null) {
+      finish(FinalApplicationStatus.FAILED,
+        ApplicationMaster.EXIT_SC_NOT_INITED,
+        "Timed out waiting for SparkContext.")
+    } else {
+      actorSystem = sc.env.actorSystem
+      runAMActor(
+        sc.getConf.get("spark.driver.host"),
+        sc.getConf.get("spark.driver.port"),
+        isDriver = true)
+      registerAM(sc.ui.map(_.appUIAddress).getOrElse(""), securityMgr)
+      userClassThread.join()
+    }
+  }
+
+  private def runExecutorLauncher(securityMgr: SecurityManager): Unit = {
+    actorSystem = AkkaUtils.createActorSystem("sparkYarnAM", Utils.localHostName, 0,
+      conf = sparkConf, securityManager = securityMgr)._1
+    waitForSparkDriver()
+    addAmIpFilter()
+    registerAM(sparkConf.get("spark.driver.appUIAddress", ""), securityMgr)
+
+    // In client mode the actor will stop the reporter thread.
+    reporterThread.join()
+  }
+
+  private def launchReporterThread(): Thread = {
+    // Ensure that progress is sent before YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS elapses.
+    val expiryInterval = yarnConf.getInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 120000)
+
+    // we want to be reasonably responsive without causing too many requests to RM.
+    val schedulerInterval =
+      sparkConf.getLong("spark.yarn.scheduler.heartbeat.interval-ms", 5000)
+
+    // must be <= expiryInterval / 2.
+    val interval = math.max(0, math.min(expiryInterval / 2, schedulerInterval))
+
+    // The number of failures in a row until Reporter thread give up
+    val reporterMaxFailures = sparkConf.getInt("spark.yarn.scheduler.reporterThread.maxFailures", 5)
+
+    val t = new Thread {
+      override def run() {
+        var failureCount = 0
+        while (!finished) {
+          try {
+            if (allocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
+              finish(FinalApplicationStatus.FAILED,
+                ApplicationMaster.EXIT_MAX_EXECUTOR_FAILURES,
+                "Max number of executor failures reached")
+            } else {
+              logDebug("Sending progress")
+              allocator.allocateResources()
+            }
+            failureCount = 0
+          } catch {
+            case i: InterruptedException =>
+            case e: Throwable => {
+              failureCount += 1
+              if (!NonFatal(e) || failureCount >= reporterMaxFailures) {
+                finish(FinalApplicationStatus.FAILED,
+                  ApplicationMaster.EXIT_REPORTER_FAILURE, "Exception was thrown " +
+                    s"${failureCount} time(s) from Reporter thread.")
+
+              } else {
+                logWarning(s"Reporter thread fails ${failureCount} time(s) in a row.", e)
+              }
+            }
+          }
+          try {
+            Thread.sleep(interval)
+          } catch {
+            case e: InterruptedException =>
+          }
+        }
+      }
+    }
+    // setting to daemon status, though this is usually not a good idea.
+    t.setDaemon(true)
+    t.setName("Reporter")
+    t.start()
+    logInfo("Started progress reporter thread - sleep time : " + interval)
+    t
+  }
+
+  /**
+   * Clean up the staging directory.
+   */
+  private def cleanupStagingDir(fs: FileSystem) {
+    var stagingDirPath: Path = null
+    try {
+      val preserveFiles = sparkConf.getBoolean("spark.yarn.preserve.staging.files", false)
+      if (!preserveFiles) {
+        stagingDirPath = new Path(System.getenv("SPARK_YARN_STAGING_DIR"))
+        if (stagingDirPath == null) {
+          logError("Staging directory is null")
+          return
+        }
+        logInfo("Deleting staging directory " + stagingDirPath)
+        fs.delete(stagingDirPath, true)
+      }
+    } catch {
+      case ioe: IOException =>
+        logError("Failed to cleanup staging dir " + stagingDirPath, ioe)
+    }
+  }
+
+  private def waitForSparkContextInitialized(): SparkContext = {
+    logInfo("Waiting for spark context initialization")
+    sparkContextRef.synchronized {
+      val waitTries = sparkConf.getOption("spark.yarn.applicationMaster.waitTries")
+        .map(_.toLong * 10000L)
+      if (waitTries.isDefined) {
+        logWarning(
+          "spark.yarn.applicationMaster.waitTries is deprecated, use spark.yarn.am.waitTime")
+      }
+      val totalWaitTime = sparkConf.getLong("spark.yarn.am.waitTime", waitTries.getOrElse(100000L))
+      val deadline = System.currentTimeMillis() + totalWaitTime
+
+      while (sparkContextRef.get() == null && System.currentTimeMillis < deadline && !finished) {
+        logInfo("Waiting for spark context initialization ... ")
+        sparkContextRef.wait(10000L)
+      }
+
+      val sparkContext = sparkContextRef.get()
+      if (sparkContext == null) {
+        logError(("SparkContext did not initialize after waiting for %d ms. Please check earlier"
+          + " log output for errors. Failing the application.").format(totalWaitTime))
+      }
+      sparkContext
+    }
+  }
+
+  private def waitForSparkDriver(): Unit = {
+    logInfo("Waiting for Spark driver to be reachable.")
+    var driverUp = false
+    val hostport = args.userArgs(0)
+    val (driverHost, driverPort) = Utils.parseHostPort(hostport)
+
+    // Spark driver should already be up since it launched us, but we don't want to
+    // wait forever, so wait 100 seconds max to match the cluster mode setting.
+    val totalWaitTime = sparkConf.getLong("spark.yarn.am.waitTime", 100000L)
+    val deadline = System.currentTimeMillis + totalWaitTime
+
+    while (!driverUp && !finished && System.currentTimeMillis < deadline) {
+      try {
+        val socket = new Socket(driverHost, driverPort)
+        socket.close()
+        logInfo("Driver now available: %s:%s".format(driverHost, driverPort))
+        driverUp = true
+      } catch {
+        case e: Exception =>
+          logError("Failed to connect to driver at %s:%s, retrying ...".
+            format(driverHost, driverPort))
+          Thread.sleep(100L)
+      }
+    }
+
+    if (!driverUp) {
+      throw new SparkException("Failed to connect to driver!")
+    }
+
+    sparkConf.set("spark.driver.host", driverHost)
+    sparkConf.set("spark.driver.port", driverPort.toString)
+
+    runAMActor(driverHost, driverPort.toString, isDriver = false)
+  }
+
+  /** Add the Yarn IP filter that is required for properly securing the UI. */
+  private def addAmIpFilter() = {
+    val proxyBase = System.getenv(ApplicationConstants.APPLICATION_WEB_PROXY_BASE_ENV)
+    val amFilter = "org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter"
+    val params = client.getAmIpFilterParams(yarnConf, proxyBase)
+    if (isDriver) {
+      System.setProperty("spark.ui.filters", amFilter)
+      params.foreach { case (k, v) => System.setProperty(s"spark.$amFilter.param.$k", v) }
+    } else {
+      actor ! AddWebUIFilter(amFilter, params.toMap, proxyBase)
+    }
+  }
+
+  /**
+   * Start the user class, which contains the spark driver, in a separate Thread.
+   * If the main routine exits cleanly or exits with System.exit(N) for any N
+   * we assume it was successful, for all other cases we assume failure.
+   *
+   * Returns the user thread that was started.
+   */
+  private def startUserApplication(): Thread = {
+    logInfo("Starting the user application in a separate Thread")
+    System.setProperty("spark.executor.instances", args.numExecutors.toString)
+    if (args.primaryPyFile != null && args.primaryPyFile.endsWith(".py")) {
+      System.setProperty("spark.submit.pyFiles",
+        PythonRunner.formatPaths(args.pyFiles).mkString(","))
+    }
+    val mainMethod = Class.forName(args.userClass, false,
+      Thread.currentThread.getContextClassLoader).getMethod("main", classOf[Array[String]])
+
+    val userThread = new Thread {
+      override def run() {
+        try {
+          val mainArgs = new Array[String](args.userArgs.size)
+          args.userArgs.copyToArray(mainArgs, 0, args.userArgs.size)
+          mainMethod.invoke(null, mainArgs)
+          finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
+          logDebug("Done running users class")
+        } catch {
+          case e: InvocationTargetException =>
+            e.getCause match {
+              case _: InterruptedException =>
+                // Reporter thread can interrupt to stop user class
+              case e: Exception =>
+                finish(FinalApplicationStatus.FAILED,
+                  ApplicationMaster.EXIT_EXCEPTION_USER_CLASS,
+                  "User class threw exception: " + e.getMessage)
+                // re-throw to get it logged
+                throw e
+            }
+        }
+      }
+    }
+    userThread.setName("Driver")
+    userThread.start()
+    userThread
+  }
+
+  /**
+   * An actor that communicates with the driver's scheduler backend.
+   */
+  private class AMActor(driverUrl: String, isDriver: Boolean) extends Actor {
+    var driver: ActorSelection = _
+
+    override def preStart() = {
+      logInfo("Listen to driver: " + driverUrl)
+      driver = context.actorSelection(driverUrl)
+      // Send a hello message to establish the connection, after which
+      // we can monitor Lifecycle Events.
+      driver ! "Hello"
+      driver ! RegisterClusterManager
+      // In cluster mode, the AM can directly monitor the driver status instead
+      // of trying to deduce it from the lifecycle of the driver's actor
+      if (!isDriver) {
+        context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
+      }
+    }
+
+    override def receive = {
+      case x: DisassociatedEvent =>
+        logInfo(s"Driver terminated or disconnected! Shutting down. $x")
+        // In cluster mode, do not rely on the disassociated event to exit
+        // This avoids potentially reporting incorrect exit codes if the driver fails
+        if (!isDriver) {
+          finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
+        }
+
+      case x: AddWebUIFilter =>
+        logInfo(s"Add WebUI Filter. $x")
+        driver ! x
+
+      case RequestExecutors(requestedTotal) =>
+        logInfo(s"Driver requested a total number of $requestedTotal executor(s).")
+        Option(allocator) match {
+          case Some(a) => a.requestTotalExecutors(requestedTotal)
+          case None => logWarning("Container allocator is not ready to request executors yet.")
+        }
+        sender ! true
+
+      case KillExecutors(executorIds) =>
+        logInfo(s"Driver requested to kill executor(s) ${executorIds.mkString(", ")}.")
+        Option(allocator) match {
+          case Some(a) => executorIds.foreach(a.killExecutor)
+          case None => logWarning("Container allocator is not ready to kill executors yet.")
+        }
+        sender ! true
+    }
+  }
+
+}
+
+object ApplicationMaster extends Logging {
+
+  val SHUTDOWN_HOOK_PRIORITY: Int = 30
+
+  // exit codes for different causes, no reason behind the values
+  private val EXIT_SUCCESS = 0
+  private val EXIT_UNCAUGHT_EXCEPTION = 10
+  private val EXIT_MAX_EXECUTOR_FAILURES = 11
+  private val EXIT_REPORTER_FAILURE = 12
+  private val EXIT_SC_NOT_INITED = 13
+  private val EXIT_SECURITY = 14
+  private val EXIT_EXCEPTION_USER_CLASS = 15
+
+  private var master: ApplicationMaster = _
+
+  def main(args: Array[String]) = {
+    SignalLogger.register(log)
+    val amArgs = new ApplicationMasterArguments(args)
+    SparkHadoopUtil.get.runAsSparkUser { () =>
+      master = new ApplicationMaster(amArgs, new YarnRMClient(amArgs))
+      System.exit(master.run())
+    }
+  }
+
+  private[spark] def sparkContextInitialized(sc: SparkContext) = {
+    master.sparkContextInitialized(sc)
+  }
+
+  private[spark] def sparkContextStopped(sc: SparkContext) = {
+    master.sparkContextStopped(sc)
+  }
+
+}
+
+/**
+ * This object does not provide any special functionality. It exists so that it's easy to tell
+ * apart the client-mode AM from the cluster-mode AM when using tools such as ps or jps.
+ */
+object ExecutorLauncher {
+
+  def main(args: Array[String]) = {
+    ApplicationMaster.main(args)
+  }
+
+}
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
similarity index 79%
rename from yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
rename to yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
index 3e6b96fb63cea..e1a992af3aae7 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
@@ -17,16 +17,19 @@
 
 package org.apache.spark.deploy.yarn
 
-import org.apache.spark.util.IntParam
+import org.apache.spark.util.{MemoryParam, IntParam}
+import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
 import collection.mutable.ArrayBuffer
 
 class ApplicationMasterArguments(val args: Array[String]) {
   var userJar: String = null
   var userClass: String = null
+  var primaryPyFile: String = null
+  var pyFiles: String = null
   var userArgs: Seq[String] = Seq[String]()
   var executorMemory = 1024
   var executorCores = 1
-  var numExecutors = ApplicationMasterArguments.DEFAULT_NUMBER_EXECUTORS
+  var numExecutors = DEFAULT_NUMBER_EXECUTORS
 
   parseArgs(args.toList)
 
@@ -35,7 +38,7 @@ class ApplicationMasterArguments(val args: Array[String]) {
 
     var args = inputArgs
 
-    while (! args.isEmpty) {
+    while (!args.isEmpty) {
       // --num-workers, --worker-memory, and --worker-cores are deprecated since 1.0,
       // the properties with executor in their names are preferred.
       args match {
@@ -47,6 +50,14 @@ class ApplicationMasterArguments(val args: Array[String]) {
           userClass = value
           args = tail
 
+        case ("--primary-py-file") :: value :: tail =>
+          primaryPyFile = value
+          args = tail
+
+        case ("--py-files") :: value :: tail =>
+          pyFiles = value
+          args = tail
+
         case ("--args" | "--arg") :: value :: tail =>
           userArgsBuffer += value
           args = tail
@@ -55,7 +66,7 @@ class ApplicationMasterArguments(val args: Array[String]) {
           numExecutors = value
           args = tail
 
-        case ("--worker-memory" | "--executor-memory") :: IntParam(value) :: tail =>
+        case ("--worker-memory" | "--executor-memory") :: MemoryParam(value) :: tail =>
           executorMemory = value
           args = tail
 
@@ -80,8 +91,11 @@ class ApplicationMasterArguments(val args: Array[String]) {
       |Options:
       |  --jar JAR_PATH       Path to your application's JAR file
       |  --class CLASS_NAME   Name of your application's main class
+      |  --primary-py-file    A main Python file
+      |  --py-files PY_FILES  Comma-separated list of .zip, .egg, or .py files to
+      |                       place on the PYTHONPATH for Python apps.
       |  --args ARGS          Arguments to be passed to your application's main class.
-      |                       Mutliple invocations are possible, each will be passed in order.
+      |                       Multiple invocations are possible, each will be passed in order.
       |  --num-executors NUM    Number of executors to start (Default: 2)
       |  --executor-cores NUM   Number of cores for the executors (Default: 1)
       |  --executor-memory MEM  Memory per executor (e.g. 1000M, 2G) (Default: 1G)
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
new file mode 100644
index 0000000000000..91e8574e94e2f
--- /dev/null
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -0,0 +1,963 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import java.net.{InetAddress, UnknownHostException, URI, URISyntaxException}
+import java.nio.ByteBuffer
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.{ArrayBuffer, HashMap, ListBuffer, Map}
+import scala.util.{Try, Success, Failure}
+
+import com.google.common.base.Objects
+
+import org.apache.hadoop.io.DataOutputBuffer
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs._
+import org.apache.hadoop.fs.permission.FsPermission
+import org.apache.hadoop.mapred.Master
+import org.apache.hadoop.mapreduce.MRJobConfig
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
+import org.apache.hadoop.util.StringUtils
+import org.apache.hadoop.yarn.api._
+import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
+import org.apache.hadoop.yarn.api.protocolrecords._
+import org.apache.hadoop.yarn.api.records._
+import org.apache.hadoop.yarn.client.api.{YarnClient, YarnClientApplication}
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.hadoop.yarn.util.Records
+
+import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext, SparkException}
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.util.Utils
+
+private[spark] class Client(
+    val args: ClientArguments,
+    val hadoopConf: Configuration,
+    val sparkConf: SparkConf)
+  extends Logging {
+
+  import Client._
+
+  def this(clientArgs: ClientArguments, spConf: SparkConf) =
+    this(clientArgs, SparkHadoopUtil.get.newConfiguration(spConf), spConf)
+
+  def this(clientArgs: ClientArguments) = this(clientArgs, new SparkConf())
+
+  private val yarnClient = YarnClient.createYarnClient
+  private val yarnConf = new YarnConfiguration(hadoopConf)
+  private val credentials = UserGroupInformation.getCurrentUser.getCredentials
+  private val amMemoryOverhead = args.amMemoryOverhead // MB
+  private val executorMemoryOverhead = args.executorMemoryOverhead // MB
+  private val distCacheMgr = new ClientDistributedCacheManager()
+  private val isClusterMode = args.isClusterMode
+
+
+  def stop(): Unit = yarnClient.stop()
+
+  /* ------------------------------------------------------------------------------------- *
+   | The following methods have much in common in the stable and alpha versions of Client, |
+   | but cannot be implemented in the parent trait due to subtle API differences across    |
+   | hadoop versions.                                                                      |
+   * ------------------------------------------------------------------------------------- */
+
+  /**
+   * Submit an application running our ApplicationMaster to the ResourceManager.
+   *
+   * The stable Yarn API provides a convenience method (YarnClient#createApplication) for
+   * creating applications and setting up the application submission context. This was not
+   * available in the alpha API.
+   */
+  def submitApplication(): ApplicationId = {
+    yarnClient.init(yarnConf)
+    yarnClient.start()
+
+    logInfo("Requesting a new application from cluster with %d NodeManagers"
+      .format(yarnClient.getYarnClusterMetrics.getNumNodeManagers))
+
+    // Get a new application from our RM
+    val newApp = yarnClient.createApplication()
+    val newAppResponse = newApp.getNewApplicationResponse()
+    val appId = newAppResponse.getApplicationId()
+
+    // Verify whether the cluster has enough resources for our AM
+    verifyClusterResources(newAppResponse)
+
+    // Set up the appropriate contexts to launch our AM
+    val containerContext = createContainerLaunchContext(newAppResponse)
+    val appContext = createApplicationSubmissionContext(newApp, containerContext)
+
+    // Finally, submit and monitor the application
+    logInfo(s"Submitting application ${appId.getId} to ResourceManager")
+    yarnClient.submitApplication(appContext)
+    appId
+  }
+
+  /**
+   * Set up the context for submitting our ApplicationMaster.
+   * This uses the YarnClientApplication not available in the Yarn alpha API.
+   */
+  def createApplicationSubmissionContext(
+      newApp: YarnClientApplication,
+      containerContext: ContainerLaunchContext): ApplicationSubmissionContext = {
+    val appContext = newApp.getApplicationSubmissionContext
+    appContext.setApplicationName(args.appName)
+    appContext.setQueue(args.amQueue)
+    appContext.setAMContainerSpec(containerContext)
+    appContext.setApplicationType("SPARK")
+    sparkConf.getOption("spark.yarn.maxAppAttempts").map(_.toInt) match {
+      case Some(v) => appContext.setMaxAppAttempts(v)
+      case None => logDebug("spark.yarn.maxAppAttempts is not set. " +
+          "Cluster's default value will be used.")
+    }
+    val capability = Records.newRecord(classOf[Resource])
+    capability.setMemory(args.amMemory + amMemoryOverhead)
+    capability.setVirtualCores(args.amCores)
+    appContext.setResource(capability)
+    appContext
+  }
+
+  /** Set up security tokens for launching our ApplicationMaster container. */
+  private def setupSecurityToken(amContainer: ContainerLaunchContext): Unit = {
+    val dob = new DataOutputBuffer
+    credentials.writeTokenStorageToStream(dob)
+    amContainer.setTokens(ByteBuffer.wrap(dob.getData))
+  }
+
+  /** Get the application report from the ResourceManager for an application we have submitted. */
+  def getApplicationReport(appId: ApplicationId): ApplicationReport =
+    yarnClient.getApplicationReport(appId)
+
+  /**
+   * Return the security token used by this client to communicate with the ApplicationMaster.
+   * If no security is enabled, the token returned by the report is null.
+   */
+  private def getClientToken(report: ApplicationReport): String =
+    Option(report.getClientToAMToken).map(_.toString).getOrElse("")
+
+  /**
+   * Fail fast if we have requested more resources per container than is available in the cluster.
+   */
+  private def verifyClusterResources(newAppResponse: GetNewApplicationResponse): Unit = {
+    val maxMem = newAppResponse.getMaximumResourceCapability().getMemory()
+    logInfo("Verifying our application has not requested more than the maximum " +
+      s"memory capability of the cluster ($maxMem MB per container)")
+    val executorMem = args.executorMemory + executorMemoryOverhead
+    if (executorMem > maxMem) {
+      throw new IllegalArgumentException(s"Required executor memory (${args.executorMemory}" +
+        s"+$executorMemoryOverhead MB) is above the max threshold ($maxMem MB) of this cluster!")
+    }
+    val amMem = args.amMemory + amMemoryOverhead
+    if (amMem > maxMem) {
+      throw new IllegalArgumentException(s"Required AM memory (${args.amMemory}" +
+        s"+$amMemoryOverhead MB) is above the max threshold ($maxMem MB) of this cluster!")
+    }
+    logInfo("Will allocate AM container, with %d MB memory including %d MB overhead".format(
+      amMem,
+      amMemoryOverhead))
+
+    // We could add checks to make sure the entire cluster has enough resources but that involves
+    // getting all the node reports and computing ourselves.
+  }
+
+  /**
+   * Copy the given file to a remote file system (e.g. HDFS) if needed.
+   * The file is only copied if the source and destination file systems are different. This is used
+   * for preparing resources for launching the ApplicationMaster container. Exposed for testing.
+   */
+  private[yarn] def copyFileToRemote(
+      destDir: Path,
+      srcPath: Path,
+      replication: Short,
+      setPerms: Boolean = false): Path = {
+    val destFs = destDir.getFileSystem(hadoopConf)
+    val srcFs = srcPath.getFileSystem(hadoopConf)
+    var destPath = srcPath
+    if (!compareFs(srcFs, destFs)) {
+      destPath = new Path(destDir, srcPath.getName())
+      logInfo(s"Uploading resource $srcPath -> $destPath")
+      FileUtil.copy(srcFs, srcPath, destFs, destPath, false, hadoopConf)
+      destFs.setReplication(destPath, replication)
+      if (setPerms) {
+        destFs.setPermission(destPath, new FsPermission(APP_FILE_PERMISSION))
+      }
+    } else {
+      logInfo(s"Source and destination file systems are the same. Not copying $srcPath")
+    }
+    // Resolve any symlinks in the URI path so using a "current" symlink to point to a specific
+    // version shows the specific version in the distributed cache configuration
+    val qualifiedDestPath = destFs.makeQualified(destPath)
+    val fc = FileContext.getFileContext(qualifiedDestPath.toUri(), hadoopConf)
+    fc.resolvePath(qualifiedDestPath)
+  }
+
+  /**
+   * Upload any resources to the distributed cache if needed. If a resource is intended to be
+   * consumed locally, set up the appropriate config for downstream code to handle it properly.
+   * This is used for setting up a container launch context for our ApplicationMaster.
+   * Exposed for testing.
+   */
+  def prepareLocalResources(appStagingDir: String): HashMap[String, LocalResource] = {
+    logInfo("Preparing resources for our AM container")
+    // Upload Spark and the application JAR to the remote file system if necessary,
+    // and add them as local resources to the application master.
+    val fs = FileSystem.get(hadoopConf)
+    val dst = new Path(fs.getHomeDirectory(), appStagingDir)
+    val nns = getNameNodesToAccess(sparkConf) + dst
+    obtainTokensForNamenodes(nns, hadoopConf, credentials)
+
+    val replication = sparkConf.getInt("spark.yarn.submit.file.replication",
+      fs.getDefaultReplication(dst)).toShort
+    val localResources = HashMap[String, LocalResource]()
+    FileSystem.mkdirs(fs, dst, new FsPermission(STAGING_DIR_PERMISSION))
+
+    val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]()
+
+    val oldLog4jConf = Option(System.getenv("SPARK_LOG4J_CONF"))
+    if (oldLog4jConf.isDefined) {
+      logWarning(
+        "SPARK_LOG4J_CONF detected in the system environment. This variable has been " +
+          "deprecated. Please refer to the \"Launching Spark on YARN\" documentation " +
+          "for alternatives.")
+    }
+
+    /**
+     * Copy the given main resource to the distributed cache if the scheme is not "local".
+     * Otherwise, set the corresponding key in our SparkConf to handle it downstream.
+     * Each resource is represented by a 4-tuple of:
+     *   (1) destination resource name,
+     *   (2) local path to the resource,
+     *   (3) Spark property key to set if the scheme is not local, and
+     *   (4) whether to set permissions for this resource
+     */
+    List(
+      (SPARK_JAR, sparkJar(sparkConf), CONF_SPARK_JAR, false),
+      (APP_JAR, args.userJar, CONF_SPARK_USER_JAR, true),
+      ("log4j.properties", oldLog4jConf.orNull, null, false)
+    ).foreach { case (destName, _localPath, confKey, setPermissions) =>
+      val localPath: String = if (_localPath != null) _localPath.trim() else ""
+      if (!localPath.isEmpty()) {
+        val localURI = new URI(localPath)
+        if (localURI.getScheme != LOCAL_SCHEME) {
+          val src = getQualifiedLocalPath(localURI, hadoopConf)
+          val destPath = copyFileToRemote(dst, src, replication, setPermissions)
+          val destFs = FileSystem.get(destPath.toUri(), hadoopConf)
+          distCacheMgr.addResource(destFs, hadoopConf, destPath,
+            localResources, LocalResourceType.FILE, destName, statCache)
+        } else if (confKey != null) {
+          // If the resource is intended for local use only, handle this downstream
+          // by setting the appropriate property
+          sparkConf.set(confKey, localPath)
+        }
+      }
+    }
+
+    /**
+     * Do the same for any additional resources passed in through ClientArguments.
+     * Each resource category is represented by a 3-tuple of:
+     *   (1) comma separated list of resources in this category,
+     *   (2) resource type, and
+     *   (3) whether to add these resources to the classpath
+     */
+    val cachedSecondaryJarLinks = ListBuffer.empty[String]
+    List(
+      (args.addJars, LocalResourceType.FILE, true),
+      (args.files, LocalResourceType.FILE, false),
+      (args.archives, LocalResourceType.ARCHIVE, false)
+    ).foreach { case (flist, resType, addToClasspath) =>
+      if (flist != null && !flist.isEmpty()) {
+        flist.split(',').foreach { file =>
+          val localURI = new URI(file.trim())
+          if (localURI.getScheme != LOCAL_SCHEME) {
+            val localPath = new Path(localURI)
+            val linkname = Option(localURI.getFragment()).getOrElse(localPath.getName())
+            val destPath = copyFileToRemote(dst, localPath, replication)
+            distCacheMgr.addResource(
+              fs, hadoopConf, destPath, localResources, resType, linkname, statCache)
+            if (addToClasspath) {
+              cachedSecondaryJarLinks += linkname
+            }
+          } else if (addToClasspath) {
+            // Resource is intended for local use only and should be added to the class path
+            cachedSecondaryJarLinks += file.trim()
+          }
+        }
+      }
+    }
+    if (cachedSecondaryJarLinks.nonEmpty) {
+      sparkConf.set(CONF_SPARK_YARN_SECONDARY_JARS, cachedSecondaryJarLinks.mkString(","))
+    }
+
+    localResources
+  }
+
+  /**
+   * Set up the environment for launching our ApplicationMaster container.
+   */
+  private def setupLaunchEnv(stagingDir: String): HashMap[String, String] = {
+    logInfo("Setting up the launch environment for our AM container")
+    val env = new HashMap[String, String]()
+    val extraCp = sparkConf.getOption("spark.driver.extraClassPath")
+    populateClasspath(args, yarnConf, sparkConf, env, extraCp)
+    env("SPARK_YARN_MODE") = "true"
+    env("SPARK_YARN_STAGING_DIR") = stagingDir
+    env("SPARK_USER") = UserGroupInformation.getCurrentUser().getShortUserName()
+
+    // Set the environment variables to be passed on to the executors.
+    distCacheMgr.setDistFilesEnv(env)
+    distCacheMgr.setDistArchivesEnv(env)
+
+    // Pick up any environment variables for the AM provided through spark.yarn.appMasterEnv.*
+    val amEnvPrefix = "spark.yarn.appMasterEnv."
+    sparkConf.getAll
+      .filter { case (k, v) => k.startsWith(amEnvPrefix) }
+      .map { case (k, v) => (k.substring(amEnvPrefix.length), v) }
+      .foreach { case (k, v) => YarnSparkHadoopUtil.addPathToEnvironment(env, k, v) }
+
+    // Keep this for backwards compatibility but users should move to the config
+    sys.env.get("SPARK_YARN_USER_ENV").foreach { userEnvs =>
+    // Allow users to specify some environment variables.
+      YarnSparkHadoopUtil.setEnvFromInputString(env, userEnvs)
+      // Pass SPARK_YARN_USER_ENV itself to the AM so it can use it to set up executor environments.
+      env("SPARK_YARN_USER_ENV") = userEnvs
+    }
+
+    // In cluster mode, if the deprecated SPARK_JAVA_OPTS is set, we need to propagate it to
+    // executors. But we can't just set spark.executor.extraJavaOptions, because the driver's
+    // SparkContext will not let that set spark* system properties, which is expected behavior for
+    // Yarn clients. So propagate it through the environment.
+    //
+    // Note that to warn the user about the deprecation in cluster mode, some code from
+    // SparkConf#validateSettings() is duplicated here (to avoid triggering the condition
+    // described above).
+    if (isClusterMode) {
+      sys.env.get("SPARK_JAVA_OPTS").foreach { value =>
+        val warning =
+          s"""
+            |SPARK_JAVA_OPTS was detected (set to '$value').
+            |This is deprecated in Spark 1.0+.
+            |
+            |Please instead use:
+            | - ./spark-submit with conf/spark-defaults.conf to set defaults for an application
+            | - ./spark-submit with --driver-java-options to set -X options for a driver
+            | - spark.executor.extraJavaOptions to set -X options for executors
+          """.stripMargin
+        logWarning(warning)
+        for (proc <- Seq("driver", "executor")) {
+          val key = s"spark.$proc.extraJavaOptions"
+          if (sparkConf.contains(key)) {
+            throw new SparkException(s"Found both $key and SPARK_JAVA_OPTS. Use only the former.")
+          }
+        }
+        env("SPARK_JAVA_OPTS") = value
+      }
+    }
+
+    sys.env.get(ENV_DIST_CLASSPATH).foreach { dcp =>
+      env(ENV_DIST_CLASSPATH) = dcp
+    }
+
+    env
+  }
+
+  /**
+   * Set up a ContainerLaunchContext to launch our ApplicationMaster container.
+   * This sets up the launch environment, java options, and the command for launching the AM.
+   */
+  private def createContainerLaunchContext(newAppResponse: GetNewApplicationResponse)
+    : ContainerLaunchContext = {
+    logInfo("Setting up container launch context for our AM")
+
+    val appId = newAppResponse.getApplicationId
+    val appStagingDir = getAppStagingDir(appId)
+    val localResources = prepareLocalResources(appStagingDir)
+    val launchEnv = setupLaunchEnv(appStagingDir)
+    val amContainer = Records.newRecord(classOf[ContainerLaunchContext])
+    amContainer.setLocalResources(localResources)
+    amContainer.setEnvironment(launchEnv)
+
+    val javaOpts = ListBuffer[String]()
+
+    // Set the environment variable through a command prefix
+    // to append to the existing value of the variable
+    var prefixEnv: Option[String] = None
+
+    // Add Xmx for AM memory
+    javaOpts += "-Xmx" + args.amMemory + "m"
+
+    val tmpDir = new Path(
+      YarnSparkHadoopUtil.expandEnvironment(Environment.PWD),
+      YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR
+    )
+    javaOpts += "-Djava.io.tmpdir=" + tmpDir
+
+    // TODO: Remove once cpuset version is pushed out.
+    // The context is, default gc for server class machines ends up using all cores to do gc -
+    // hence if there are multiple containers in same node, Spark GC affects all other containers'
+    // performance (which can be that of other Spark containers)
+    // Instead of using this, rely on cpusets by YARN to enforce "proper" Spark behavior in
+    // multi-tenant environments. Not sure how default Java GC behaves if it is limited to subset
+    // of cores on a node.
+    val useConcurrentAndIncrementalGC = launchEnv.get("SPARK_USE_CONC_INCR_GC").exists(_.toBoolean)
+    if (useConcurrentAndIncrementalGC) {
+      // In our expts, using (default) throughput collector has severe perf ramifications in
+      // multi-tenant machines
+      javaOpts += "-XX:+UseConcMarkSweepGC"
+      javaOpts += "-XX:+CMSIncrementalMode"
+      javaOpts += "-XX:+CMSIncrementalPacing"
+      javaOpts += "-XX:CMSIncrementalDutyCycleMin=0"
+      javaOpts += "-XX:CMSIncrementalDutyCycle=10"
+    }
+
+    // Forward the Spark configuration to the application master / executors.
+    // TODO: it might be nicer to pass these as an internal environment variable rather than
+    // as Java options, due to complications with string parsing of nested quotes.
+    for ((k, v) <- sparkConf.getAll) {
+      javaOpts += YarnSparkHadoopUtil.escapeForShell(s"-D$k=$v")
+    }
+
+    // Include driver-specific java options if we are launching a driver
+    if (isClusterMode) {
+      sparkConf.getOption("spark.driver.extraJavaOptions")
+        .orElse(sys.env.get("SPARK_JAVA_OPTS"))
+        .map(Utils.splitCommandString).getOrElse(Seq.empty)
+        .foreach(opts => javaOpts += opts)
+      val libraryPaths = Seq(sys.props.get("spark.driver.extraLibraryPath"),
+        sys.props.get("spark.driver.libraryPath")).flatten
+      if (libraryPaths.nonEmpty) {
+        prefixEnv = Some(Utils.libraryPathEnvPrefix(libraryPaths))
+      }
+      if (sparkConf.getOption("spark.yarn.am.extraJavaOptions").isDefined) {
+        logWarning("spark.yarn.am.extraJavaOptions will not take effect in cluster mode")
+      }
+    } else {
+      // Validate and include yarn am specific java options in yarn-client mode.
+      val amOptsKey = "spark.yarn.am.extraJavaOptions"
+      val amOpts = sparkConf.getOption(amOptsKey)
+      amOpts.foreach { opts =>
+        if (opts.contains("-Dspark")) {
+          val msg = s"$amOptsKey is not allowed to set Spark options (was '$opts'). "
+          throw new SparkException(msg)
+        }
+        if (opts.contains("-Xmx") || opts.contains("-Xms")) {
+          val msg = s"$amOptsKey is not allowed to alter memory settings (was '$opts')."
+          throw new SparkException(msg)
+        }
+        javaOpts ++= Utils.splitCommandString(opts)
+      }
+    }
+
+    // For log4j configuration to reference
+    javaOpts += ("-Dspark.yarn.app.container.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR)
+
+    val userClass =
+      if (isClusterMode) {
+        Seq("--class", YarnSparkHadoopUtil.escapeForShell(args.userClass))
+      } else {
+        Nil
+      }
+    val userJar =
+      if (args.userJar != null) {
+        Seq("--jar", args.userJar)
+      } else {
+        Nil
+      }
+    val primaryPyFile =
+      if (args.primaryPyFile != null) {
+        Seq("--primary-py-file", args.primaryPyFile)
+      } else {
+        Nil
+      }
+    val pyFiles =
+      if (args.pyFiles != null) {
+        Seq("--py-files", args.pyFiles)
+      } else {
+        Nil
+      }
+    val amClass =
+      if (isClusterMode) {
+        Class.forName("org.apache.spark.deploy.yarn.ApplicationMaster").getName
+      } else {
+        Class.forName("org.apache.spark.deploy.yarn.ExecutorLauncher").getName
+      }
+    if (args.primaryPyFile != null && args.primaryPyFile.endsWith(".py")) {
+      args.userArgs = ArrayBuffer(args.primaryPyFile, args.pyFiles) ++ args.userArgs
+    }
+    val userArgs = args.userArgs.flatMap { arg =>
+      Seq("--arg", YarnSparkHadoopUtil.escapeForShell(arg))
+    }
+    val amArgs =
+      Seq(amClass) ++ userClass ++ userJar ++ primaryPyFile ++ pyFiles ++ userArgs ++
+        Seq(
+          "--executor-memory", args.executorMemory.toString + "m",
+          "--executor-cores", args.executorCores.toString,
+          "--num-executors ", args.numExecutors.toString)
+
+    // Command for the ApplicationMaster
+    val commands = prefixEnv ++ Seq(
+        YarnSparkHadoopUtil.expandEnvironment(Environment.JAVA_HOME) + "/bin/java", "-server"
+      ) ++
+      javaOpts ++ amArgs ++
+      Seq(
+        "1>", ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout",
+        "2>", ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")
+
+    // TODO: it would be nicer to just make sure there are no null commands here
+    val printableCommands = commands.map(s => if (s == null) "null" else s).toList
+    amContainer.setCommands(printableCommands)
+
+    logDebug("===============================================================================")
+    logDebug("Yarn AM launch context:")
+    logDebug(s"    user class: ${Option(args.userClass).getOrElse("N/A")}")
+    logDebug("    env:")
+    launchEnv.foreach { case (k, v) => logDebug(s"        $k -> $v") }
+    logDebug("    resources:")
+    localResources.foreach { case (k, v) => logDebug(s"        $k -> $v")}
+    logDebug("    command:")
+    logDebug(s"        ${printableCommands.mkString(" ")}")
+    logDebug("===============================================================================")
+
+    // send the acl settings into YARN to control who has access via YARN interfaces
+    val securityManager = new SecurityManager(sparkConf)
+    amContainer.setApplicationACLs(YarnSparkHadoopUtil.getApplicationAclsForYarn(securityManager))
+    setupSecurityToken(amContainer)
+    UserGroupInformation.getCurrentUser().addCredentials(credentials)
+
+    amContainer
+  }
+
+  /**
+   * Report the state of an application until it has exited, either successfully or
+   * due to some failure, then return a pair of the yarn application state (FINISHED, FAILED,
+   * KILLED, or RUNNING) and the final application state (UNDEFINED, SUCCEEDED, FAILED,
+   * or KILLED).
+   *
+   * @param appId ID of the application to monitor.
+   * @param returnOnRunning Whether to also return the application state when it is RUNNING.
+   * @param logApplicationReport Whether to log details of the application report every iteration.
+   * @return A pair of the yarn application state and the final application state.
+   */
+  def monitorApplication(
+      appId: ApplicationId,
+      returnOnRunning: Boolean = false,
+      logApplicationReport: Boolean = true): (YarnApplicationState, FinalApplicationStatus) = {
+    val interval = sparkConf.getLong("spark.yarn.report.interval", 1000)
+    var lastState: YarnApplicationState = null
+    while (true) {
+      Thread.sleep(interval)
+      val report = getApplicationReport(appId)
+      val state = report.getYarnApplicationState
+
+      if (logApplicationReport) {
+        logInfo(s"Application report for $appId (state: $state)")
+        val details = Seq[(String, String)](
+          ("client token", getClientToken(report)),
+          ("diagnostics", report.getDiagnostics),
+          ("ApplicationMaster host", report.getHost),
+          ("ApplicationMaster RPC port", report.getRpcPort.toString),
+          ("queue", report.getQueue),
+          ("start time", report.getStartTime.toString),
+          ("final status", report.getFinalApplicationStatus.toString),
+          ("tracking URL", report.getTrackingUrl),
+          ("user", report.getUser)
+        )
+
+        // Use more loggable format if value is null or empty
+        val formattedDetails = details
+          .map { case (k, v) =>
+          val newValue = Option(v).filter(_.nonEmpty).getOrElse("N/A")
+          s"\n\t $k: $newValue" }
+          .mkString("")
+
+        // If DEBUG is enabled, log report details every iteration
+        // Otherwise, log them every time the application changes state
+        if (log.isDebugEnabled) {
+          logDebug(formattedDetails)
+        } else if (lastState != state) {
+          logInfo(formattedDetails)
+        }
+      }
+
+      if (state == YarnApplicationState.FINISHED ||
+        state == YarnApplicationState.FAILED ||
+        state == YarnApplicationState.KILLED) {
+        return (state, report.getFinalApplicationStatus)
+      }
+
+      if (returnOnRunning && state == YarnApplicationState.RUNNING) {
+        return (state, report.getFinalApplicationStatus)
+      }
+
+      lastState = state
+    }
+
+    // Never reached, but keeps compiler happy
+    throw new SparkException("While loop is depleted! This should never happen...")
+  }
+
+  /**
+   * Submit an application to the ResourceManager and monitor its state.
+   * This continues until the application has exited for any reason.
+   * If the application finishes with a failed, killed, or undefined status,
+   * throw an appropriate SparkException.
+   */
+  def run(): Unit = {
+    val (yarnApplicationState, finalApplicationStatus) = monitorApplication(submitApplication())
+    if (yarnApplicationState == YarnApplicationState.FAILED ||
+      finalApplicationStatus == FinalApplicationStatus.FAILED) {
+      throw new SparkException("Application finished with failed status")
+    }
+    if (yarnApplicationState == YarnApplicationState.KILLED ||
+      finalApplicationStatus == FinalApplicationStatus.KILLED) {
+      throw new SparkException("Application is killed")
+    }
+    if (finalApplicationStatus == FinalApplicationStatus.UNDEFINED) {
+      throw new SparkException("The final status of application is undefined")
+    }
+  }
+}
+
+object Client extends Logging {
+  def main(argStrings: Array[String]) {
+    if (!sys.props.contains("SPARK_SUBMIT")) {
+      println("WARNING: This client is deprecated and will be removed in a " +
+        "future version of Spark. Use ./bin/spark-submit with \"--master yarn\"")
+    }
+
+    // Set an env variable indicating we are running in YARN mode.
+    // Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes
+    System.setProperty("SPARK_YARN_MODE", "true")
+    val sparkConf = new SparkConf
+
+    val args = new ClientArguments(argStrings, sparkConf)
+    new Client(args, sparkConf).run()
+  }
+
+  // Alias for the Spark assembly jar and the user jar
+  val SPARK_JAR: String = "__spark__.jar"
+  val APP_JAR: String = "__app__.jar"
+
+  // URI scheme that identifies local resources
+  val LOCAL_SCHEME = "local"
+
+  // Staging directory for any temporary jars or files
+  val SPARK_STAGING: String = ".sparkStaging"
+
+  // Location of any user-defined Spark jars
+  val CONF_SPARK_JAR = "spark.yarn.jar"
+  val ENV_SPARK_JAR = "SPARK_JAR"
+
+  // Internal config to propagate the location of the user's jar to the driver/executors
+  val CONF_SPARK_USER_JAR = "spark.yarn.user.jar"
+
+  // Internal config to propagate the locations of any extra jars to add to the classpath
+  // of the executors
+  val CONF_SPARK_YARN_SECONDARY_JARS = "spark.yarn.secondary.jars"
+
+  // Staging directory is private! -> rwx--------
+  val STAGING_DIR_PERMISSION: FsPermission =
+    FsPermission.createImmutable(Integer.parseInt("700", 8).toShort)
+
+  // App files are world-wide readable and owner writable -> rw-r--r--
+  val APP_FILE_PERMISSION: FsPermission =
+    FsPermission.createImmutable(Integer.parseInt("644", 8).toShort)
+
+  // Distribution-defined classpath to add to processes
+  val ENV_DIST_CLASSPATH = "SPARK_DIST_CLASSPATH"
+
+  /**
+   * Find the user-defined Spark jar if configured, or return the jar containing this
+   * class if not.
+   *
+   * This method first looks in the SparkConf object for the CONF_SPARK_JAR key, and in the
+   * user environment if that is not found (for backwards compatibility).
+   */
+  private def sparkJar(conf: SparkConf): String = {
+    if (conf.contains(CONF_SPARK_JAR)) {
+      conf.get(CONF_SPARK_JAR)
+    } else if (System.getenv(ENV_SPARK_JAR) != null) {
+      logWarning(
+        s"$ENV_SPARK_JAR detected in the system environment. This variable has been deprecated " +
+          s"in favor of the $CONF_SPARK_JAR configuration variable.")
+      System.getenv(ENV_SPARK_JAR)
+    } else {
+      SparkContext.jarOfClass(this.getClass).head
+    }
+  }
+
+  /**
+   * Return the path to the given application's staging directory.
+   */
+  private def getAppStagingDir(appId: ApplicationId): String = {
+    SPARK_STAGING + Path.SEPARATOR + appId.toString() + Path.SEPARATOR
+  }
+
+  /**
+   * Populate the classpath entry in the given environment map with any application
+   * classpath specified through the Hadoop and Yarn configurations.
+   */
+  private[yarn] def populateHadoopClasspath(conf: Configuration, env: HashMap[String, String])
+    : Unit = {
+    val classPathElementsToAdd = getYarnAppClasspath(conf) ++ getMRAppClasspath(conf)
+    for (c <- classPathElementsToAdd.flatten) {
+      YarnSparkHadoopUtil.addPathToEnvironment(env, Environment.CLASSPATH.name, c.trim)
+    }
+  }
+
+  private def getYarnAppClasspath(conf: Configuration): Option[Seq[String]] =
+    Option(conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH)) match {
+      case Some(s) => Some(s.toSeq)
+      case None => getDefaultYarnApplicationClasspath
+    }
+
+  private def getMRAppClasspath(conf: Configuration): Option[Seq[String]] =
+    Option(conf.getStrings("mapreduce.application.classpath")) match {
+      case Some(s) => Some(s.toSeq)
+      case None => getDefaultMRApplicationClasspath
+    }
+
+  private[yarn] def getDefaultYarnApplicationClasspath: Option[Seq[String]] = {
+    val triedDefault = Try[Seq[String]] {
+      val field = classOf[YarnConfiguration].getField("DEFAULT_YARN_APPLICATION_CLASSPATH")
+      val value = field.get(null).asInstanceOf[Array[String]]
+      value.toSeq
+    } recoverWith {
+      case e: NoSuchFieldException => Success(Seq.empty[String])
+    }
+
+    triedDefault match {
+      case f: Failure[_] =>
+        logError("Unable to obtain the default YARN Application classpath.", f.exception)
+      case s: Success[Seq[String]] =>
+        logDebug(s"Using the default YARN application classpath: ${s.get.mkString(",")}")
+    }
+
+    triedDefault.toOption
+  }
+
+  /**
+   * In Hadoop 0.23, the MR application classpath comes with the YARN application
+   * classpath. In Hadoop 2.0, it's an array of Strings, and in 2.2+ it's a String.
+   * So we need to use reflection to retrieve it.
+   */
+  private[yarn] def getDefaultMRApplicationClasspath: Option[Seq[String]] = {
+    val triedDefault = Try[Seq[String]] {
+      val field = classOf[MRJobConfig].getField("DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH")
+      val value = if (field.getType == classOf[String]) {
+        StringUtils.getStrings(field.get(null).asInstanceOf[String]).toArray
+      } else {
+        field.get(null).asInstanceOf[Array[String]]
+      }
+      value.toSeq
+    } recoverWith {
+      case e: NoSuchFieldException => Success(Seq.empty[String])
+    }
+
+    triedDefault match {
+      case f: Failure[_] =>
+        logError("Unable to obtain the default MR Application classpath.", f.exception)
+      case s: Success[Seq[String]] =>
+        logDebug(s"Using the default MR application classpath: ${s.get.mkString(",")}")
+    }
+
+    triedDefault.toOption
+  }
+
+  /**
+   * Populate the classpath entry in the given environment map.
+   * This includes the user jar, Spark jar, and any extra application jars.
+   */
+  private[yarn] def populateClasspath(
+      args: ClientArguments,
+      conf: Configuration,
+      sparkConf: SparkConf,
+      env: HashMap[String, String],
+      extraClassPath: Option[String] = None): Unit = {
+    extraClassPath.foreach(addClasspathEntry(_, env))
+    addClasspathEntry(
+      YarnSparkHadoopUtil.expandEnvironment(Environment.PWD), env
+    )
+
+    // Normally the users app.jar is last in case conflicts with spark jars
+    if (sparkConf.getBoolean("spark.yarn.user.classpath.first", false)) {
+      addUserClasspath(args, sparkConf, env)
+      addFileToClasspath(sparkJar(sparkConf), SPARK_JAR, env)
+      populateHadoopClasspath(conf, env)
+    } else {
+      addFileToClasspath(sparkJar(sparkConf), SPARK_JAR, env)
+      populateHadoopClasspath(conf, env)
+      addUserClasspath(args, sparkConf, env)
+    }
+
+    // Append all jar files under the working directory to the classpath.
+    addClasspathEntry(
+      YarnSparkHadoopUtil.expandEnvironment(Environment.PWD) + Path.SEPARATOR + "*", env
+    )
+  }
+
+  /**
+   * Adds the user jars which have local: URIs (or alternate names, such as APP_JAR) explicitly
+   * to the classpath.
+   */
+  private def addUserClasspath(
+      args: ClientArguments,
+      conf: SparkConf,
+      env: HashMap[String, String]): Unit = {
+
+    // If `args` is not null, we are launching an AM container.
+    // Otherwise, we are launching executor containers.
+    val (mainJar, secondaryJars) =
+      if (args != null) {
+        (args.userJar, args.addJars)
+      } else {
+        (conf.get(CONF_SPARK_USER_JAR, null), conf.get(CONF_SPARK_YARN_SECONDARY_JARS, null))
+      }
+
+    addFileToClasspath(mainJar, APP_JAR, env)
+    if (secondaryJars != null) {
+      secondaryJars.split(",").filter(_.nonEmpty).foreach { jar =>
+        addFileToClasspath(jar, null, env)
+      }
+    }
+  }
+
+  /**
+   * Adds the given path to the classpath, handling "local:" URIs correctly.
+   *
+   * If an alternate name for the file is given, and it's not a "local:" file, the alternate
+   * name will be added to the classpath (relative to the job's work directory).
+   *
+   * If not a "local:" file and no alternate name, the environment is not modified.
+   *
+   * @param path      Path to add to classpath (optional).
+   * @param fileName  Alternate name for the file (optional).
+   * @param env       Map holding the environment variables.
+   */
+  private def addFileToClasspath(
+      path: String,
+      fileName: String,
+      env: HashMap[String, String]): Unit = {
+    if (path != null) {
+      scala.util.control.Exception.ignoring(classOf[URISyntaxException]) {
+        val uri = new URI(path)
+        if (uri.getScheme == LOCAL_SCHEME) {
+          addClasspathEntry(uri.getPath, env)
+          return
+        }
+      }
+    }
+    if (fileName != null) {
+      addClasspathEntry(
+        YarnSparkHadoopUtil.expandEnvironment(Environment.PWD) + Path.SEPARATOR + fileName, env
+      )
+    }
+  }
+
+  /**
+   * Add the given path to the classpath entry of the given environment map.
+   * If the classpath is already set, this appends the new path to the existing classpath.
+   */
+  private def addClasspathEntry(path: String, env: HashMap[String, String]): Unit =
+    YarnSparkHadoopUtil.addPathToEnvironment(env, Environment.CLASSPATH.name, path)
+
+  /**
+   * Get the list of namenodes the user may access.
+   */
+  private[yarn] def getNameNodesToAccess(sparkConf: SparkConf): Set[Path] = {
+    sparkConf.get("spark.yarn.access.namenodes", "")
+      .split(",")
+      .map(_.trim())
+      .filter(!_.isEmpty)
+      .map(new Path(_))
+      .toSet
+  }
+
+  private[yarn] def getTokenRenewer(conf: Configuration): String = {
+    val delegTokenRenewer = Master.getMasterPrincipal(conf)
+    logDebug("delegation token renewer is: " + delegTokenRenewer)
+    if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) {
+      val errorMessage = "Can't get Master Kerberos principal for use as renewer"
+      logError(errorMessage)
+      throw new SparkException(errorMessage)
+    }
+    delegTokenRenewer
+  }
+
+  /**
+   * Obtains tokens for the namenodes passed in and adds them to the credentials.
+   */
+  private def obtainTokensForNamenodes(
+      paths: Set[Path],
+      conf: Configuration,
+      creds: Credentials): Unit = {
+    if (UserGroupInformation.isSecurityEnabled()) {
+      val delegTokenRenewer = getTokenRenewer(conf)
+      paths.foreach { dst =>
+        val dstFs = dst.getFileSystem(conf)
+        logDebug("getting token for namenode: " + dst)
+        dstFs.addDelegationTokens(delegTokenRenewer, creds)
+      }
+    }
+  }
+
+  /**
+   * Return whether the two file systems are the same.
+   */
+  private def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = {
+    val srcUri = srcFs.getUri()
+    val dstUri = destFs.getUri()
+    if (srcUri.getScheme() == null || srcUri.getScheme() != dstUri.getScheme()) {
+      return false
+    }
+
+    var srcHost = srcUri.getHost()
+    var dstHost = dstUri.getHost()
+
+    // In HA or when using viewfs, the host part of the URI may not actually be a host, but the
+    // name of the HDFS namespace. Those names won't resolve, so avoid even trying if they
+    // match.
+    if (srcHost != null && dstHost != null && srcHost != dstHost) {
+      try {
+        srcHost = InetAddress.getByName(srcHost).getCanonicalHostName()
+        dstHost = InetAddress.getByName(dstHost).getCanonicalHostName()
+      } catch {
+        case e: UnknownHostException =>
+          return false
+      }
+    }
+
+    Objects.equal(srcHost, dstHost) && srcUri.getPort() == dstUri.getPort()
+  }
+
+  /**
+   * Given a local URI, resolve it and return a qualified local path that corresponds to the URI.
+   * This is used for preparing local resources to be included in the container launch context.
+   */
+  private def getQualifiedLocalPath(localURI: URI, hadoopConf: Configuration): Path = {
+    val qualifiedURI =
+      if (localURI.getScheme == null) {
+        // If not specified, assume this is in the local filesystem to keep the behavior
+        // consistent with that of Hadoop
+        new URI(FileSystem.getLocal(hadoopConf).makeQualified(new Path(localURI)).toString)
+      } else {
+        localURI
+      }
+    new Path(qualifiedURI)
+  }
+
+}
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
new file mode 100644
index 0000000000000..3bc7eb1abf341
--- /dev/null
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
@@ -0,0 +1,261 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
+import org.apache.spark.util.{IntParam, MemoryParam, Utils}
+
+// TODO: Add code and support for ensuring that yarn resource 'tasks' are location aware !
+private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf) {
+  var addJars: String = null
+  var files: String = null
+  var archives: String = null
+  var userJar: String = null
+  var userClass: String = null
+  var pyFiles: String = null
+  var primaryPyFile: String = null
+  var userArgs: ArrayBuffer[String] = new ArrayBuffer[String]()
+  var executorMemory = 1024 // MB
+  var executorCores = 1
+  var numExecutors = DEFAULT_NUMBER_EXECUTORS
+  var amQueue = sparkConf.get("spark.yarn.queue", "default")
+  var amMemory: Int = 512 // MB
+  var amCores: Int = 1
+  var appName: String = "Spark"
+  var priority = 0
+  def isClusterMode: Boolean = userClass != null
+
+  private var driverMemory: Int = 512 // MB
+  private var driverCores: Int = 1
+  private val driverMemOverheadKey = "spark.yarn.driver.memoryOverhead"
+  private val amMemKey = "spark.yarn.am.memory"
+  private val amMemOverheadKey = "spark.yarn.am.memoryOverhead"
+  private val driverCoresKey = "spark.driver.cores"
+  private val amCoresKey = "spark.yarn.am.cores"
+  private val isDynamicAllocationEnabled =
+    sparkConf.getBoolean("spark.dynamicAllocation.enabled", false)
+
+  parseArgs(args.toList)
+  loadEnvironmentArgs()
+  validateArgs()
+
+  // Additional memory to allocate to containers
+  val amMemoryOverheadConf = if (isClusterMode) driverMemOverheadKey else amMemOverheadKey
+  val amMemoryOverhead = sparkConf.getInt(amMemoryOverheadConf,
+    math.max((MEMORY_OVERHEAD_FACTOR * amMemory).toInt, MEMORY_OVERHEAD_MIN))
+
+  val executorMemoryOverhead = sparkConf.getInt("spark.yarn.executor.memoryOverhead",
+    math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toInt, MEMORY_OVERHEAD_MIN))
+
+  /** Load any default arguments provided through environment variables and Spark properties. */
+  private def loadEnvironmentArgs(): Unit = {
+    // For backward compatibility, SPARK_YARN_DIST_{ARCHIVES/FILES} should be resolved to hdfs://,
+    // while spark.yarn.dist.{archives/files} should be resolved to file:// (SPARK-2051).
+    files = Option(files)
+      .orElse(sparkConf.getOption("spark.yarn.dist.files").map(p => Utils.resolveURIs(p)))
+      .orElse(sys.env.get("SPARK_YARN_DIST_FILES"))
+      .orNull
+    archives = Option(archives)
+      .orElse(sparkConf.getOption("spark.yarn.dist.archives").map(p => Utils.resolveURIs(p)))
+      .orElse(sys.env.get("SPARK_YARN_DIST_ARCHIVES"))
+      .orNull
+    // If dynamic allocation is enabled, start at the configured initial number of executors.
+    // Default to minExecutors if no initialExecutors is set.
+    if (isDynamicAllocationEnabled) {
+      val minExecutorsConf = "spark.dynamicAllocation.minExecutors"
+      val initialExecutorsConf = "spark.dynamicAllocation.initialExecutors"
+      val maxExecutorsConf = "spark.dynamicAllocation.maxExecutors"
+      val minNumExecutors = sparkConf.getInt(minExecutorsConf, 0)
+      val initialNumExecutors = sparkConf.getInt(initialExecutorsConf, minNumExecutors)
+      val maxNumExecutors = sparkConf.getInt(maxExecutorsConf, Integer.MAX_VALUE)
+
+      // If defined, initial executors must be between min and max
+      if (initialNumExecutors < minNumExecutors || initialNumExecutors > maxNumExecutors) {
+        throw new IllegalArgumentException(
+          s"$initialExecutorsConf must be between $minExecutorsConf and $maxNumExecutors!")
+      }
+
+      numExecutors = initialNumExecutors
+    }
+  }
+
+  /**
+   * Fail fast if any arguments provided are invalid.
+   * This is intended to be called only after the provided arguments have been parsed.
+   */
+  private def validateArgs(): Unit = {
+    if (numExecutors <= 0) {
+      throw new IllegalArgumentException(
+        "You must specify at least 1 executor!\n" + getUsageMessage())
+    }
+    if (executorCores < sparkConf.getInt("spark.task.cpus", 1)) {
+      throw new SparkException("Executor cores must not be less than " +
+        "spark.task.cpus.")
+    }
+    if (isClusterMode) {
+      for (key <- Seq(amMemKey, amMemOverheadKey, amCoresKey)) {
+        if (sparkConf.contains(key)) {
+          println(s"$key is set but does not apply in cluster mode.")
+        }
+      }
+      amMemory = driverMemory
+      amCores = driverCores
+    } else {
+      for (key <- Seq(driverMemOverheadKey, driverCoresKey)) {
+        if (sparkConf.contains(key)) {
+          println(s"$key is set but does not apply in client mode.")
+        }
+      }
+      sparkConf.getOption(amMemKey)
+        .map(Utils.memoryStringToMb)
+        .foreach { mem => amMemory = mem }
+      sparkConf.getOption(amCoresKey)
+        .map(_.toInt)
+        .foreach { cores => amCores = cores }
+    }
+  }
+
+  private def parseArgs(inputArgs: List[String]): Unit = {
+    var args = inputArgs
+
+    while (!args.isEmpty) {
+      args match {
+        case ("--jar") :: value :: tail =>
+          userJar = value
+          args = tail
+
+        case ("--class") :: value :: tail =>
+          userClass = value
+          args = tail
+
+        case ("--primary-py-file") :: value :: tail =>
+          primaryPyFile = value
+          args = tail
+
+        case ("--args" | "--arg") :: value :: tail =>
+          if (args(0) == "--args") {
+            println("--args is deprecated. Use --arg instead.")
+          }
+          userArgs += value
+          args = tail
+
+        case ("--master-class" | "--am-class") :: value :: tail =>
+          println(s"${args(0)} is deprecated and is not used anymore.")
+          args = tail
+
+        case ("--master-memory" | "--driver-memory") :: MemoryParam(value) :: tail =>
+          if (args(0) == "--master-memory") {
+            println("--master-memory is deprecated. Use --driver-memory instead.")
+          }
+          driverMemory = value
+          args = tail
+
+        case ("--driver-cores") :: IntParam(value) :: tail =>
+          driverCores = value
+          args = tail
+
+        case ("--num-workers" | "--num-executors") :: IntParam(value) :: tail =>
+          if (args(0) == "--num-workers") {
+            println("--num-workers is deprecated. Use --num-executors instead.")
+          }
+          // Dynamic allocation is not compatible with this option
+          if (isDynamicAllocationEnabled) {
+            throw new IllegalArgumentException("Explicitly setting the number " +
+              "of executors is not compatible with spark.dynamicAllocation.enabled!")
+          }
+          numExecutors = value
+          args = tail
+
+        case ("--worker-memory" | "--executor-memory") :: MemoryParam(value) :: tail =>
+          if (args(0) == "--worker-memory") {
+            println("--worker-memory is deprecated. Use --executor-memory instead.")
+          }
+          executorMemory = value
+          args = tail
+
+        case ("--worker-cores" | "--executor-cores") :: IntParam(value) :: tail =>
+          if (args(0) == "--worker-cores") {
+            println("--worker-cores is deprecated. Use --executor-cores instead.")
+          }
+          executorCores = value
+          args = tail
+
+        case ("--queue") :: value :: tail =>
+          amQueue = value
+          args = tail
+
+        case ("--name") :: value :: tail =>
+          appName = value
+          args = tail
+
+        case ("--addJars") :: value :: tail =>
+          addJars = value
+          args = tail
+
+        case ("--py-files") :: value :: tail =>
+          pyFiles = value
+          args = tail
+
+        case ("--files") :: value :: tail =>
+          files = value
+          args = tail
+
+        case ("--archives") :: value :: tail =>
+          archives = value
+          args = tail
+
+        case Nil =>
+
+        case _ =>
+          throw new IllegalArgumentException(getUsageMessage(args))
+      }
+    }
+  }
+
+  private def getUsageMessage(unknownParam: List[String] = null): String = {
+    val message = if (unknownParam != null) s"Unknown/unsupported param $unknownParam\n" else ""
+    message +
+      """
+      |Usage: org.apache.spark.deploy.yarn.Client [options]
+      |Options:
+      |  --jar JAR_PATH           Path to your application's JAR file (required in yarn-cluster
+      |                           mode)
+      |  --class CLASS_NAME       Name of your application's main class (required)
+      |  --primary-py-file        A main Python file
+      |  --arg ARG                Argument to be passed to your application's main class.
+      |                           Multiple invocations are possible, each will be passed in order.
+      |  --num-executors NUM      Number of executors to start (Default: 2)
+      |  --executor-cores NUM     Number of cores per executor (Default: 1).
+      |  --driver-memory MEM      Memory for driver (e.g. 1000M, 2G) (Default: 512 Mb)
+      |  --driver-cores NUM       Number of cores used by the driver (Default: 1).
+      |  --executor-memory MEM    Memory per executor (e.g. 1000M, 2G) (Default: 1G)
+      |  --name NAME              The name of your application (Default: Spark)
+      |  --queue QUEUE            The hadoop queue to use for allocation requests (Default:
+      |                           'default')
+      |  --addJars jars           Comma separated list of local jars that want SparkContext.addJar
+      |                           to work with.
+      |  --py-files PY_FILES      Comma-separated list of .zip, .egg, or .py files to
+      |                           place on the PYTHONPATH for Python apps.
+      |  --files files            Comma separated list of files to be distributed with the job.
+      |  --archives archives      Comma separated list of archives to be distributed with the job.
+      """.stripMargin
+  }
+}
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala
similarity index 100%
rename from yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala
rename to yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
new file mode 100644
index 0000000000000..ee2002a35f523
--- /dev/null
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -0,0 +1,276 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import java.net.URI
+import java.nio.ByteBuffer
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
+import org.apache.spark.util.Utils
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.{HashMap, ListBuffer}
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.io.DataOutputBuffer
+import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hadoop.yarn.api._
+import org.apache.hadoop.yarn.api.records._
+import org.apache.hadoop.yarn.client.api.NMClient
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.hadoop.yarn.ipc.YarnRPC
+import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
+
+import org.apache.spark.{SecurityManager, SparkConf, Logging}
+import org.apache.spark.network.util.JavaUtils
+
+class ExecutorRunnable(
+    container: Container,
+    conf: Configuration,
+    sparkConf: SparkConf,
+    masterAddress: String,
+    slaveId: String,
+    hostname: String,
+    executorMemory: Int,
+    executorCores: Int,
+    appId: String,
+    securityMgr: SecurityManager)
+  extends Runnable with Logging {
+
+  var rpc: YarnRPC = YarnRPC.create(conf)
+  var nmClient: NMClient = _
+  val yarnConf: YarnConfiguration = new YarnConfiguration(conf)
+  lazy val env = prepareEnvironment
+  
+  def run = {
+    logInfo("Starting Executor Container")
+    nmClient = NMClient.createNMClient()
+    nmClient.init(yarnConf)
+    nmClient.start()
+    startContainer
+  }
+
+  def startContainer = {
+    logInfo("Setting up ContainerLaunchContext")
+
+    val ctx = Records.newRecord(classOf[ContainerLaunchContext])
+      .asInstanceOf[ContainerLaunchContext]
+
+    val localResources = prepareLocalResources
+    ctx.setLocalResources(localResources)
+
+    ctx.setEnvironment(env)
+
+    val credentials = UserGroupInformation.getCurrentUser().getCredentials()
+    val dob = new DataOutputBuffer()
+    credentials.writeTokenStorageToStream(dob)
+    ctx.setTokens(ByteBuffer.wrap(dob.getData()))
+
+    val commands = prepareCommand(masterAddress, slaveId, hostname, executorMemory, executorCores,
+      appId, localResources)
+
+    logInfo(s"Setting up executor with environment: $env")
+    logInfo("Setting up executor with commands: " + commands)
+    ctx.setCommands(commands)
+
+    ctx.setApplicationACLs(YarnSparkHadoopUtil.getApplicationAclsForYarn(securityMgr))
+
+    // If external shuffle service is enabled, register with the Yarn shuffle service already
+    // started on the NodeManager and, if authentication is enabled, provide it with our secret
+    // key for fetching shuffle files later
+    if (sparkConf.getBoolean("spark.shuffle.service.enabled", false)) {
+      val secretString = securityMgr.getSecretKey()
+      val secretBytes =
+        if (secretString != null) {
+          // This conversion must match how the YarnShuffleService decodes our secret
+          JavaUtils.stringToBytes(secretString)
+        } else {
+          // Authentication is not enabled, so just provide dummy metadata
+          ByteBuffer.allocate(0)
+        }
+      ctx.setServiceData(Map[String, ByteBuffer]("spark_shuffle" -> secretBytes))
+    }
+
+    // Send the start request to the ContainerManager
+    nmClient.startContainer(container, ctx)
+  }
+
+  private def prepareCommand(
+      masterAddress: String,
+      slaveId: String,
+      hostname: String,
+      executorMemory: Int,
+      executorCores: Int,
+      appId: String,
+      localResources: HashMap[String, LocalResource]): List[String] = {
+    // Extra options for the JVM
+    val javaOpts = ListBuffer[String]()
+
+    // Set the environment variable through a command prefix
+    // to append to the existing value of the variable
+    var prefixEnv: Option[String] = None
+
+    // Set the JVM memory
+    val executorMemoryString = executorMemory + "m"
+    javaOpts += "-Xms" + executorMemoryString + " -Xmx" + executorMemoryString + " "
+
+    // Set extra Java options for the executor, if defined
+    sys.props.get("spark.executor.extraJavaOptions").foreach { opts =>
+      javaOpts += opts
+    }
+    sys.env.get("SPARK_JAVA_OPTS").foreach { opts =>
+      javaOpts += opts
+    }
+    sys.props.get("spark.executor.extraLibraryPath").foreach { p =>
+      prefixEnv = Some(Utils.libraryPathEnvPrefix(Seq(p)))
+    }
+
+    javaOpts += "-Djava.io.tmpdir=" +
+      new Path(
+        YarnSparkHadoopUtil.expandEnvironment(Environment.PWD),
+        YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR
+      )
+
+    // Certain configs need to be passed here because they are needed before the Executor
+    // registers with the Scheduler and transfers the spark configs. Since the Executor backend
+    // uses Akka to connect to the scheduler, the akka settings are needed as well as the
+    // authentication settings.
+    sparkConf.getAll
+      .filter { case (k, v) => SparkConf.isExecutorStartupConf(k) }
+      .foreach { case (k, v) => javaOpts += YarnSparkHadoopUtil.escapeForShell(s"-D$k=$v") }
+
+    // Commenting it out for now - so that people can refer to the properties if required. Remove
+    // it once cpuset version is pushed out.
+    // The context is, default gc for server class machines end up using all cores to do gc - hence
+    // if there are multiple containers in same node, spark gc effects all other containers
+    // performance (which can also be other spark containers)
+    // Instead of using this, rely on cpusets by YARN to enforce spark behaves 'properly' in
+    // multi-tenant environments. Not sure how default java gc behaves if it is limited to subset
+    // of cores on a node.
+    /*
+        else {
+          // If no java_opts specified, default to using -XX:+CMSIncrementalMode
+          // It might be possible that other modes/config is being done in
+          // spark.executor.extraJavaOptions, so we dont want to mess with it.
+          // In our expts, using (default) throughput collector has severe perf ramnifications in
+          // multi-tennent machines
+          // The options are based on
+          // http://www.oracle.com/technetwork/java/gc-tuning-5-138395.html#0.0.0.%20When%20to%20Use
+          // %20the%20Concurrent%20Low%20Pause%20Collector|outline
+          javaOpts += " -XX:+UseConcMarkSweepGC "
+          javaOpts += " -XX:+CMSIncrementalMode "
+          javaOpts += " -XX:+CMSIncrementalPacing "
+          javaOpts += " -XX:CMSIncrementalDutyCycleMin=0 "
+          javaOpts += " -XX:CMSIncrementalDutyCycle=10 "
+        }
+    */
+
+    // For log4j configuration to reference
+    javaOpts += ("-Dspark.yarn.app.container.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR)
+
+    val commands = prefixEnv ++ Seq(
+      YarnSparkHadoopUtil.expandEnvironment(Environment.JAVA_HOME) + "/bin/java",
+      "-server",
+      // Kill if OOM is raised - leverage yarn's failure handling to cause rescheduling.
+      // Not killing the task leaves various aspects of the executor and (to some extent) the jvm in
+      // an inconsistent state.
+      // TODO: If the OOM is not recoverable by rescheduling it on different node, then do
+      // 'something' to fail job ... akin to blacklisting trackers in mapred ?
+      "-XX:OnOutOfMemoryError='kill %p'") ++
+      javaOpts ++
+      Seq("org.apache.spark.executor.CoarseGrainedExecutorBackend",
+        masterAddress.toString,
+        slaveId.toString,
+        hostname.toString,
+        executorCores.toString,
+        appId,
+        "1>", ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout",
+        "2>", ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")
+
+    // TODO: it would be nicer to just make sure there are no null commands here
+    commands.map(s => if (s == null) "null" else s).toList
+  }
+
+  private def setupDistributedCache(
+      file: String,
+      rtype: LocalResourceType,
+      localResources: HashMap[String, LocalResource],
+      timestamp: String,
+      size: String,
+      vis: String): Unit = {
+    val uri = new URI(file)
+    val amJarRsrc = Records.newRecord(classOf[LocalResource])
+    amJarRsrc.setType(rtype)
+    amJarRsrc.setVisibility(LocalResourceVisibility.valueOf(vis))
+    amJarRsrc.setResource(ConverterUtils.getYarnUrlFromURI(uri))
+    amJarRsrc.setTimestamp(timestamp.toLong)
+    amJarRsrc.setSize(size.toLong)
+    localResources(uri.getFragment()) = amJarRsrc
+  }
+
+  private def prepareLocalResources: HashMap[String, LocalResource] = {
+    logInfo("Preparing Local resources")
+    val localResources = HashMap[String, LocalResource]()
+
+    if (System.getenv("SPARK_YARN_CACHE_FILES") != null) {
+      val timeStamps = System.getenv("SPARK_YARN_CACHE_FILES_TIME_STAMPS").split(',')
+      val fileSizes = System.getenv("SPARK_YARN_CACHE_FILES_FILE_SIZES").split(',')
+      val distFiles = System.getenv("SPARK_YARN_CACHE_FILES").split(',')
+      val visibilities = System.getenv("SPARK_YARN_CACHE_FILES_VISIBILITIES").split(',')
+      for( i <- 0 to distFiles.length - 1) {
+        setupDistributedCache(distFiles(i), LocalResourceType.FILE, localResources, timeStamps(i),
+          fileSizes(i), visibilities(i))
+      }
+    }
+
+    if (System.getenv("SPARK_YARN_CACHE_ARCHIVES") != null) {
+      val timeStamps = System.getenv("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS").split(',')
+      val fileSizes = System.getenv("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES").split(',')
+      val distArchives = System.getenv("SPARK_YARN_CACHE_ARCHIVES").split(',')
+      val visibilities = System.getenv("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES").split(',')
+      for( i <- 0 to distArchives.length - 1) {
+        setupDistributedCache(distArchives(i), LocalResourceType.ARCHIVE, localResources,
+          timeStamps(i), fileSizes(i), visibilities(i))
+      }
+    }
+
+    logInfo("Prepared Local resources " + localResources)
+    localResources
+  }
+
+  private def prepareEnvironment: HashMap[String, String] = {
+    val env = new HashMap[String, String]()
+    val extraCp = sparkConf.getOption("spark.executor.extraClassPath")
+    Client.populateClasspath(null, yarnConf, sparkConf, env, extraCp)
+
+    sparkConf.getExecutorEnv.foreach { case (key, value) =>
+      // This assumes each executor environment variable set here is a path
+      // This is kept for backward compatibility and consistency with hadoop
+      YarnSparkHadoopUtil.addPathToEnvironment(env, key, value)
+    }
+
+    // Keep this for backwards compatibility but users should move to the config
+    sys.env.get("SPARK_YARN_USER_ENV").foreach { userEnvs =>
+      YarnSparkHadoopUtil.setEnvFromInputString(env, userEnvs)
+    }
+
+    System.getenv().filterKeys(_.startsWith("SPARK")).foreach { case (k, v) => env(k) = v }
+    env
+  }
+}
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
new file mode 100644
index 0000000000000..0dbb6154b3039
--- /dev/null
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -0,0 +1,404 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import java.util.Collections
+import java.util.concurrent._
+import java.util.regex.Pattern
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
+
+import com.google.common.util.concurrent.ThreadFactoryBuilder
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.yarn.api.records._
+import org.apache.hadoop.yarn.client.api.AMRMClient
+import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
+import org.apache.hadoop.yarn.util.RackResolver
+
+import org.apache.log4j.{Level, Logger}
+
+import org.apache.spark.{SparkEnv, Logging, SecurityManager, SparkConf}
+import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
+import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
+import org.apache.spark.util.AkkaUtils
+
+/**
+ * YarnAllocator is charged with requesting containers from the YARN ResourceManager and deciding
+ * what to do with containers when YARN fulfills these requests.
+ *
+ * This class makes use of YARN's AMRMClient APIs. We interact with the AMRMClient in three ways:
+ * * Making our resource needs known, which updates local bookkeeping about containers requested.
+ * * Calling "allocate", which syncs our local container requests with the RM, and returns any
+ *   containers that YARN has granted to us.  This also functions as a heartbeat.
+ * * Processing the containers granted to us to possibly launch executors inside of them.
+ *
+ * The public methods of this class are thread-safe.  All methods that mutate state are
+ * synchronized.
+ */
+private[yarn] class YarnAllocator(
+    conf: Configuration,
+    sparkConf: SparkConf,
+    amClient: AMRMClient[ContainerRequest],
+    appAttemptId: ApplicationAttemptId,
+    args: ApplicationMasterArguments,
+    securityMgr: SecurityManager)
+  extends Logging {
+
+  import YarnAllocator._
+
+  // RackResolver logs an INFO message whenever it resolves a rack, which is way too often.
+  if (Logger.getLogger(classOf[RackResolver]).getLevel == null) {
+    Logger.getLogger(classOf[RackResolver]).setLevel(Level.WARN)
+  }
+
+  // Visible for testing.
+  val allocatedHostToContainersMap =
+    new HashMap[String, collection.mutable.Set[ContainerId]]
+  val allocatedContainerToHostMap = new HashMap[ContainerId, String]
+
+  // Containers that we no longer care about. We've either already told the RM to release them or
+  // will on the next heartbeat. Containers get removed from this map after the RM tells us they've
+  // completed.
+  private val releasedContainers = Collections.newSetFromMap[ContainerId](
+    new ConcurrentHashMap[ContainerId, java.lang.Boolean])
+
+  @volatile private var numExecutorsRunning = 0
+  // Used to generate a unique ID per executor
+  private var executorIdCounter = 0
+  @volatile private var numExecutorsFailed = 0
+
+  @volatile private var maxExecutors = args.numExecutors
+
+  // Keep track of which container is running which executor to remove the executors later
+  private val executorIdToContainer = new HashMap[String, Container]
+
+  // Executor memory in MB.
+  protected val executorMemory = args.executorMemory
+  // Additional memory overhead.
+  protected val memoryOverhead: Int = sparkConf.getInt("spark.yarn.executor.memoryOverhead",
+    math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toInt, MEMORY_OVERHEAD_MIN))
+  // Number of cores per executor.
+  protected val executorCores = args.executorCores
+  // Resource capability requested for each executors
+  private val resource = Resource.newInstance(executorMemory + memoryOverhead, executorCores)
+
+  private val launcherPool = new ThreadPoolExecutor(
+    // max pool size of Integer.MAX_VALUE is ignored because we use an unbounded queue
+    sparkConf.getInt("spark.yarn.containerLauncherMaxThreads", 25), Integer.MAX_VALUE,
+    1, TimeUnit.MINUTES,
+    new LinkedBlockingQueue[Runnable](),
+    new ThreadFactoryBuilder().setNameFormat("ContainerLauncher #%d").setDaemon(true).build())
+  launcherPool.allowCoreThreadTimeOut(true)
+
+  private val driverUrl = AkkaUtils.address(
+    AkkaUtils.protocol(securityMgr.akkaSSLOptions.enabled),
+    SparkEnv.driverActorSystemName,
+    sparkConf.get("spark.driver.host"),
+    sparkConf.get("spark.driver.port"),
+    CoarseGrainedSchedulerBackend.ACTOR_NAME)
+
+  // For testing
+  private val launchContainers = sparkConf.getBoolean("spark.yarn.launchContainers", true)
+
+  def getNumExecutorsRunning: Int = numExecutorsRunning
+
+  def getNumExecutorsFailed: Int = numExecutorsFailed
+
+  /**
+   * Number of container requests that have not yet been fulfilled.
+   */
+  def getNumPendingAllocate: Int = getNumPendingAtLocation(ANY_HOST)
+
+  /**
+   * Number of container requests at the given location that have not yet been fulfilled.
+   */
+  private def getNumPendingAtLocation(location: String): Int =
+    amClient.getMatchingRequests(RM_REQUEST_PRIORITY, location, resource).map(_.size).sum
+
+  /**
+   * Request as many executors from the ResourceManager as needed to reach the desired total.
+   */
+  def requestTotalExecutors(requestedTotal: Int): Unit = synchronized {
+    maxExecutors = requestedTotal
+  }
+
+  /**
+   * Request that the ResourceManager release the container running the specified executor.
+   */
+  def killExecutor(executorId: String): Unit = synchronized {
+    if (executorIdToContainer.contains(executorId)) {
+      val container = executorIdToContainer.remove(executorId).get
+      internalReleaseContainer(container)
+      numExecutorsRunning -= 1
+      maxExecutors -= 1
+      assert(maxExecutors >= 0, "Allocator killed more executors than are allocated!")
+    } else {
+      logWarning(s"Attempted to kill unknown executor $executorId!")
+    }
+  }
+
+  /**
+   * Request resources such that, if YARN gives us all we ask for, we'll have a number of containers
+   * equal to maxExecutors.
+   *
+   * Deal with any containers YARN has granted to us by possibly launching executors in them.
+   *
+   * This must be synchronized because variables read in this method are mutated by other methods.
+   */
+  def allocateResources(): Unit = synchronized {
+    val numPendingAllocate = getNumPendingAllocate
+    val missing = maxExecutors - numPendingAllocate - numExecutorsRunning
+
+    if (missing > 0) {
+      logInfo(s"Will request $missing executor containers, each with ${resource.getVirtualCores} " +
+        s"cores and ${resource.getMemory} MB memory including $memoryOverhead MB overhead")
+    }
+
+    addResourceRequests(missing)
+    val progressIndicator = 0.1f
+    // Poll the ResourceManager. This doubles as a heartbeat if there are no pending container
+    // requests.
+    val allocateResponse = amClient.allocate(progressIndicator)
+
+    val allocatedContainers = allocateResponse.getAllocatedContainers()
+
+    if (allocatedContainers.size > 0) {
+      logDebug("Allocated containers: %d. Current executor count: %d. Cluster resources: %s."
+        .format(
+          allocatedContainers.size,
+          numExecutorsRunning,
+          allocateResponse.getAvailableResources))
+
+      handleAllocatedContainers(allocatedContainers)
+    }
+
+    val completedContainers = allocateResponse.getCompletedContainersStatuses()
+    if (completedContainers.size > 0) {
+      logDebug("Completed %d containers".format(completedContainers.size))
+
+      processCompletedContainers(completedContainers)
+
+      logDebug("Finished processing %d completed containers. Current running executor count: %d."
+        .format(completedContainers.size, numExecutorsRunning))
+    }
+  }
+
+  /**
+   * Request numExecutors additional containers from YARN. Visible for testing.
+   */
+  def addResourceRequests(numExecutors: Int): Unit = {
+    for (i <- 0 until numExecutors) {
+      val request = new ContainerRequest(resource, null, null, RM_REQUEST_PRIORITY)
+      amClient.addContainerRequest(request)
+      val nodes = request.getNodes
+      val hostStr = if (nodes == null || nodes.isEmpty) "Any" else nodes.last
+      logInfo("Container request (host: %s, capability: %s".format(hostStr, resource))
+    }
+  }
+
+  /**
+   * Handle containers granted by the RM by launching executors on them.
+   *
+   * Due to the way the YARN allocation protocol works, certain healthy race conditions can result
+   * in YARN granting containers that we no longer need. In this case, we release them.
+   *
+   * Visible for testing.
+   */
+  def handleAllocatedContainers(allocatedContainers: Seq[Container]): Unit = {
+    val containersToUse = new ArrayBuffer[Container](allocatedContainers.size)
+
+    // Match incoming requests by host
+    val remainingAfterHostMatches = new ArrayBuffer[Container]
+    for (allocatedContainer <- allocatedContainers) {
+      matchContainerToRequest(allocatedContainer, allocatedContainer.getNodeId.getHost,
+        containersToUse, remainingAfterHostMatches)
+    }
+
+    // Match remaining by rack
+    val remainingAfterRackMatches = new ArrayBuffer[Container]
+    for (allocatedContainer <- remainingAfterHostMatches) {
+      val rack = RackResolver.resolve(conf, allocatedContainer.getNodeId.getHost).getNetworkLocation
+      matchContainerToRequest(allocatedContainer, rack, containersToUse,
+        remainingAfterRackMatches)
+    }
+
+    // Assign remaining that are neither node-local nor rack-local
+    val remainingAfterOffRackMatches = new ArrayBuffer[Container]
+    for (allocatedContainer <- remainingAfterRackMatches) {
+      matchContainerToRequest(allocatedContainer, ANY_HOST, containersToUse,
+        remainingAfterOffRackMatches)
+    }
+
+    if (!remainingAfterOffRackMatches.isEmpty) {
+      logDebug(s"Releasing ${remainingAfterOffRackMatches.size} unneeded containers that were " +
+        s"allocated to us")
+      for (container <- remainingAfterOffRackMatches) {
+        internalReleaseContainer(container)
+      }
+    }
+
+    runAllocatedContainers(containersToUse)
+
+    logInfo("Received %d containers from YARN, launching executors on %d of them."
+      .format(allocatedContainers.size, containersToUse.size))
+  }
+
+  /**
+   * Looks for requests for the given location that match the given container allocation. If it
+   * finds one, removes the request so that it won't be submitted again. Places the container into
+   * containersToUse or remaining.
+   *
+   * @param allocatedContainer container that was given to us by YARN
+   * @location resource name, either a node, rack, or *
+   * @param containersToUse list of containers that will be used
+   * @param remaining list of containers that will not be used
+   */
+  private def matchContainerToRequest(
+      allocatedContainer: Container,
+      location: String,
+      containersToUse: ArrayBuffer[Container],
+      remaining: ArrayBuffer[Container]): Unit = {
+    val matchingRequests = amClient.getMatchingRequests(allocatedContainer.getPriority, location,
+      allocatedContainer.getResource)
+
+    // Match the allocation to a request
+    if (!matchingRequests.isEmpty) {
+      val containerRequest = matchingRequests.get(0).iterator.next
+      amClient.removeContainerRequest(containerRequest)
+      containersToUse += allocatedContainer
+    } else {
+      remaining += allocatedContainer
+    }
+  }
+
+  /**
+   * Launches executors in the allocated containers.
+   */
+  private def runAllocatedContainers(containersToUse: ArrayBuffer[Container]): Unit = {
+    for (container <- containersToUse) {
+      numExecutorsRunning += 1
+      assert(numExecutorsRunning <= maxExecutors)
+      val executorHostname = container.getNodeId.getHost
+      val containerId = container.getId
+      executorIdCounter += 1
+      val executorId = executorIdCounter.toString
+
+      assert(container.getResource.getMemory >= resource.getMemory)
+
+      logInfo("Launching container %s for on host %s".format(containerId, executorHostname))
+      executorIdToContainer(executorId) = container      
+
+      val containerSet = allocatedHostToContainersMap.getOrElseUpdate(executorHostname,
+        new HashSet[ContainerId])
+
+      containerSet += containerId
+      allocatedContainerToHostMap.put(containerId, executorHostname)
+
+      val executorRunnable = new ExecutorRunnable(
+        container,
+        conf,
+        sparkConf,
+        driverUrl,
+        executorId,
+        executorHostname,
+        executorMemory,
+        executorCores,
+        appAttemptId.getApplicationId.toString,
+        securityMgr)
+      if (launchContainers) {
+        logInfo("Launching ExecutorRunnable. driverUrl: %s,  executorHostname: %s".format(
+          driverUrl, executorHostname))
+        launcherPool.execute(executorRunnable)
+      }
+    }
+  }
+
+  private def processCompletedContainers(completedContainers: Seq[ContainerStatus]): Unit = {
+    for (completedContainer <- completedContainers) {
+      val containerId = completedContainer.getContainerId
+
+      if (releasedContainers.contains(containerId)) {
+        // Already marked the container for release, so remove it from
+        // `releasedContainers`.
+        releasedContainers.remove(containerId)
+      } else {
+        // Decrement the number of executors running. The next iteration of
+        // the ApplicationMaster's reporting thread will take care of allocating.
+        numExecutorsRunning -= 1
+        logInfo("Completed container %s (state: %s, exit status: %s)".format(
+          containerId,
+          completedContainer.getState,
+          completedContainer.getExitStatus))
+        // Hadoop 2.2.X added a ContainerExitStatus we should switch to use
+        // there are some exit status' we shouldn't necessarily count against us, but for
+        // now I think its ok as none of the containers are expected to exit
+        if (completedContainer.getExitStatus == -103) { // vmem limit exceeded
+          logWarning(memLimitExceededLogMessage(
+            completedContainer.getDiagnostics,
+            VMEM_EXCEEDED_PATTERN))
+        } else if (completedContainer.getExitStatus == -104) { // pmem limit exceeded
+          logWarning(memLimitExceededLogMessage(
+            completedContainer.getDiagnostics,
+            PMEM_EXCEEDED_PATTERN))
+        } else if (completedContainer.getExitStatus != 0) {
+          logInfo("Container marked as failed: " + containerId +
+            ". Exit status: " + completedContainer.getExitStatus +
+            ". Diagnostics: " + completedContainer.getDiagnostics)
+          numExecutorsFailed += 1
+        }
+      }
+
+      if (allocatedContainerToHostMap.containsKey(containerId)) {
+        val host = allocatedContainerToHostMap.get(containerId).get
+        val containerSet = allocatedHostToContainersMap.get(host).get
+
+        containerSet.remove(containerId)
+        if (containerSet.isEmpty) {
+          allocatedHostToContainersMap.remove(host)
+        } else {
+          allocatedHostToContainersMap.update(host, containerSet)
+        }
+
+        allocatedContainerToHostMap.remove(containerId)
+      }
+    }
+  }
+
+  private def internalReleaseContainer(container: Container): Unit = {
+    releasedContainers.add(container.getId())
+    amClient.releaseAssignedContainer(container.getId())
+  }
+
+}
+
+private object YarnAllocator {
+  val MEM_REGEX = "[0-9.]+ [KMG]B"
+  val PMEM_EXCEEDED_PATTERN =
+    Pattern.compile(s"$MEM_REGEX of $MEM_REGEX physical memory used")
+  val VMEM_EXCEEDED_PATTERN =
+    Pattern.compile(s"$MEM_REGEX of $MEM_REGEX virtual memory used")
+
+  def memLimitExceededLogMessage(diagnostics: String, pattern: Pattern): String = {
+    val matcher = pattern.matcher(diagnostics)
+    val diag = if (matcher.find()) " " + matcher.group() + "." else ""
+    ("Container killed by YARN for exceeding memory limits." + diag
+      + " Consider boosting spark.yarn.executor.memoryOverhead.")
+  }
+}
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
new file mode 100644
index 0000000000000..b134751366522
--- /dev/null
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import java.util.{List => JList}
+
+import scala.collection.JavaConversions._
+import scala.collection.{Map, Set}
+import scala.util.Try
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.yarn.api.ApplicationConstants
+import org.apache.hadoop.yarn.api.records._
+import org.apache.hadoop.yarn.client.api.AMRMClient
+import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.hadoop.yarn.util.ConverterUtils
+import org.apache.hadoop.yarn.webapp.util.WebAppUtils
+
+import org.apache.spark.{Logging, SecurityManager, SparkConf}
+import org.apache.spark.scheduler.SplitInfo
+import org.apache.spark.util.Utils
+
+/**
+ * Handles registering and unregistering the application with the YARN ResourceManager.
+ */
+private[spark] class YarnRMClient(args: ApplicationMasterArguments) extends Logging {
+
+  private var amClient: AMRMClient[ContainerRequest] = _
+  private var uiHistoryAddress: String = _
+  private var registered: Boolean = false
+
+  /**
+   * Registers the application master with the RM.
+   *
+   * @param conf The Yarn configuration.
+   * @param sparkConf The Spark configuration.
+   * @param preferredNodeLocations Map with hints about where to allocate containers.
+   * @param uiAddress Address of the SparkUI.
+   * @param uiHistoryAddress Address of the application on the History Server.
+   */
+  def register(
+      conf: YarnConfiguration,
+      sparkConf: SparkConf,
+      preferredNodeLocations: Map[String, Set[SplitInfo]],
+      uiAddress: String,
+      uiHistoryAddress: String,
+      securityMgr: SecurityManager
+    ): YarnAllocator = {
+    amClient = AMRMClient.createAMRMClient()
+    amClient.init(conf)
+    amClient.start()
+    this.uiHistoryAddress = uiHistoryAddress
+
+    logInfo("Registering the ApplicationMaster")
+    synchronized {
+      amClient.registerApplicationMaster(Utils.localHostName(), 0, uiAddress)
+      registered = true
+    }
+    new YarnAllocator(conf, sparkConf, amClient, getAttemptId(), args, securityMgr)
+  }
+
+  /**
+   * Unregister the AM. Guaranteed to only be called once.
+   *
+   * @param status The final status of the AM.
+   * @param diagnostics Diagnostics message to include in the final status.
+   */
+  def unregister(status: FinalApplicationStatus, diagnostics: String = ""): Unit = synchronized {
+    if (registered) {
+      amClient.unregisterApplicationMaster(status, diagnostics, uiHistoryAddress)
+    }
+  }
+
+  /** Returns the attempt ID. */
+  def getAttemptId(): ApplicationAttemptId = {
+    val containerIdString = System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name())
+    val containerId = ConverterUtils.toContainerId(containerIdString)
+    containerId.getApplicationAttemptId()
+  }
+
+  /** Returns the configuration for the AmIpFilter to add to the Spark UI. */
+  def getAmIpFilterParams(conf: YarnConfiguration, proxyBase: String): Map[String, String] = {
+    // Figure out which scheme Yarn is using. Note the method seems to have been added after 2.2,
+    // so not all stable releases have it.
+    val prefix = Try(classOf[WebAppUtils].getMethod("getHttpSchemePrefix", classOf[Configuration])
+      .invoke(null, conf).asInstanceOf[String]).getOrElse("http://")
+
+    // If running a new enough Yarn, use the HA-aware API for retrieving the RM addresses.
+    try {
+      val method = classOf[WebAppUtils].getMethod("getProxyHostsAndPortsForAmFilter",
+        classOf[Configuration])
+      val proxies = method.invoke(null, conf).asInstanceOf[JList[String]]
+      val hosts = proxies.map { proxy => proxy.split(":")(0) }
+      val uriBases = proxies.map { proxy => prefix + proxy + proxyBase }
+      Map("PROXY_HOSTS" -> hosts.mkString(","), "PROXY_URI_BASES" -> uriBases.mkString(","))
+    } catch {
+      case e: NoSuchMethodException =>
+        val proxy = WebAppUtils.getProxyHostAndPort(conf)
+        val parts = proxy.split(":")
+        val uriBase = prefix + proxy + proxyBase
+        Map("PROXY_HOST" -> parts(0), "PROXY_URI_BASE" -> uriBase)
+    }
+  }
+
+  /** Returns the maximum number of attempts to register the AM. */
+  def getMaxRegAttempts(sparkConf: SparkConf, yarnConf: YarnConfiguration): Int = {
+    val sparkMaxAttempts = sparkConf.getOption("spark.yarn.maxAppAttempts").map(_.toInt)
+    val yarnMaxAttempts = yarnConf.getInt(
+      YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)
+    val retval: Int = sparkMaxAttempts match {
+      case Some(x) => if (x <= yarnMaxAttempts) x else yarnMaxAttempts
+      case None => yarnMaxAttempts
+    }
+
+    retval
+  }
+
+}
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
new file mode 100644
index 0000000000000..146b2c0f1a302
--- /dev/null
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import java.io.File
+import java.util.regex.Matcher
+import java.util.regex.Pattern
+
+import scala.collection.mutable.HashMap
+import scala.util.Try
+
+import org.apache.hadoop.io.Text
+import org.apache.hadoop.mapred.JobConf
+import org.apache.hadoop.security.Credentials
+import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.hadoop.yarn.api.ApplicationConstants
+import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
+import org.apache.hadoop.yarn.api.records.{Priority, ApplicationAccessType}
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.{SecurityManager, SparkConf}
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.util.Utils
+
+/**
+ * Contains util methods to interact with Hadoop from spark.
+ */
+class YarnSparkHadoopUtil extends SparkHadoopUtil {
+
+  override def transferCredentials(source: UserGroupInformation, dest: UserGroupInformation) {
+    dest.addCredentials(source.getCredentials())
+  }
+
+  // Note that all params which start with SPARK are propagated all the way through, so if in yarn
+  // mode, this MUST be set to true.
+  override def isYarnMode(): Boolean = { true }
+
+  // Return an appropriate (subclass) of Configuration. Creating a config initializes some Hadoop
+  // subsystems. Always create a new config, dont reuse yarnConf.
+  override def newConfiguration(conf: SparkConf): Configuration =
+    new YarnConfiguration(super.newConfiguration(conf))
+
+  // Add any user credentials to the job conf which are necessary for running on a secure Hadoop
+  // cluster
+  override def addCredentials(conf: JobConf) {
+    val jobCreds = conf.getCredentials()
+    jobCreds.mergeAll(UserGroupInformation.getCurrentUser().getCredentials())
+  }
+
+  override def getCurrentUserCredentials(): Credentials = {
+    UserGroupInformation.getCurrentUser().getCredentials()
+  }
+
+  override def addCurrentUserCredentials(creds: Credentials) {
+    UserGroupInformation.getCurrentUser().addCredentials(creds)
+  }
+
+  override def addSecretKeyToUserCredentials(key: String, secret: String) {
+    val creds = new Credentials()
+    creds.addSecretKey(new Text(key), secret.getBytes("utf-8"))
+    addCurrentUserCredentials(creds)
+  }
+
+  override def getSecretKeyFromUserCredentials(key: String): Array[Byte] = {
+    val credentials = getCurrentUserCredentials()
+    if (credentials != null) credentials.getSecretKey(new Text(key)) else null
+  }
+
+}
+
+object YarnSparkHadoopUtil {
+  // Additional memory overhead 
+  // 7% was arrived at experimentally. In the interest of minimizing memory waste while covering
+  // the common cases. Memory overhead tends to grow with container size. 
+
+  val MEMORY_OVERHEAD_FACTOR = 0.07
+  val MEMORY_OVERHEAD_MIN = 384
+
+  val ANY_HOST = "*"
+
+  val DEFAULT_NUMBER_EXECUTORS = 2
+
+  // All RM requests are issued with same priority : we do not (yet) have any distinction between
+  // request types (like map/reduce in hadoop for example)
+  val RM_REQUEST_PRIORITY = Priority.newInstance(1)
+
+  /**
+   * Add a path variable to the given environment map.
+   * If the map already contains this key, append the value to the existing value instead.
+   */
+  def addPathToEnvironment(env: HashMap[String, String], key: String, value: String): Unit = {
+    val newValue = if (env.contains(key)) { env(key) + getClassPathSeparator  + value } else value
+    env.put(key, newValue)
+  }
+
+  /**
+   * Set zero or more environment variables specified by the given input string.
+   * The input string is expected to take the form "KEY1=VAL1,KEY2=VAL2,KEY3=VAL3".
+   */
+  def setEnvFromInputString(env: HashMap[String, String], inputString: String): Unit = {
+    if (inputString != null && inputString.length() > 0) {
+      val childEnvs = inputString.split(",")
+      val p = Pattern.compile(environmentVariableRegex)
+      for (cEnv <- childEnvs) {
+        val parts = cEnv.split("=") // split on '='
+        val m = p.matcher(parts(1))
+        val sb = new StringBuffer
+        while (m.find()) {
+          val variable = m.group(1)
+          var replace = ""
+          if (env.get(variable) != None) {
+            replace = env.get(variable).get
+          } else {
+            // if this key is not configured for the child .. get it from the env
+            replace = System.getenv(variable)
+            if (replace == null) {
+            // the env key is note present anywhere .. simply set it
+              replace = ""
+            }
+          }
+          m.appendReplacement(sb, Matcher.quoteReplacement(replace))
+        }
+        m.appendTail(sb)
+        // This treats the environment variable as path variable delimited by `File.pathSeparator`
+        // This is kept for backward compatibility and consistency with Hadoop's behavior
+        addPathToEnvironment(env, parts(0), sb.toString)
+      }
+    }
+  }
+
+  private val environmentVariableRegex: String = {
+    if (Utils.isWindows) {
+      "%([A-Za-z_][A-Za-z0-9_]*?)%"
+    } else {
+      "\\$([A-Za-z_][A-Za-z0-9_]*)"
+    }
+  }
+
+  /**
+   * Escapes a string for inclusion in a command line executed by Yarn. Yarn executes commands
+   * using `bash -c "command arg1 arg2"` and that means plain quoting doesn't really work. The
+   * argument is enclosed in single quotes and some key characters are escaped.
+   *
+   * @param arg A single argument.
+   * @return Argument quoted for execution via Yarn's generated shell script.
+   */
+  def escapeForShell(arg: String): String = {
+    if (arg != null) {
+      val escaped = new StringBuilder("'")
+      for (i <- 0 to arg.length() - 1) {
+        arg.charAt(i) match {
+          case '$' => escaped.append("\\$")
+          case '"' => escaped.append("\\\"")
+          case '\'' => escaped.append("'\\''")
+          case c => escaped.append(c)
+        }
+      }
+      escaped.append("'").toString()
+    } else {
+      arg
+    }
+  }
+
+  def getApplicationAclsForYarn(securityMgr: SecurityManager)
+      : Map[ApplicationAccessType, String] = {
+    Map[ApplicationAccessType, String] (
+      ApplicationAccessType.VIEW_APP -> securityMgr.getViewAcls,
+      ApplicationAccessType.MODIFY_APP -> securityMgr.getModifyAcls
+    )
+  }
+
+  /**
+   * Expand environment variable using Yarn API.
+   * If environment.$$() is implemented, return the result of it.
+   * Otherwise, return the result of environment.$()
+   * Note: $$() is added in Hadoop 2.4.
+   */
+  private lazy val expandMethod =
+    Try(classOf[Environment].getMethod("$$"))
+      .getOrElse(classOf[Environment].getMethod("$"))
+
+  def expandEnvironment(environment: Environment): String =
+    expandMethod.invoke(environment).asInstanceOf[String]
+
+  /**
+   * Get class path separator using Yarn API.
+   * If ApplicationConstants.CLASS_PATH_SEPARATOR is implemented, return it.
+   * Otherwise, return File.pathSeparator
+   * Note: CLASS_PATH_SEPARATOR is added in Hadoop 2.4.
+   */
+  private lazy val classPathSeparatorField =
+    Try(classOf[ApplicationConstants].getField("CLASS_PATH_SEPARATOR"))
+      .getOrElse(classOf[File].getField("pathSeparator"))
+
+  def getClassPathSeparator(): String = {
+    classPathSeparatorField.get(null).asInstanceOf[String]
+  }
+}
diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
similarity index 75%
rename from yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
rename to yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index d948a2aeedd45..690f927e938c3 100644
--- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -17,27 +17,23 @@
 
 package org.apache.spark.scheduler.cluster
 
+import scala.collection.mutable.ArrayBuffer
+
 import org.apache.hadoop.yarn.api.records.{ApplicationId, YarnApplicationState}
+
 import org.apache.spark.{SparkException, Logging, SparkContext}
 import org.apache.spark.deploy.yarn.{Client, ClientArguments}
 import org.apache.spark.scheduler.TaskSchedulerImpl
 
-import scala.collection.mutable.ArrayBuffer
-
 private[spark] class YarnClientSchedulerBackend(
     scheduler: TaskSchedulerImpl,
     sc: SparkContext)
-  extends CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem)
+  extends YarnSchedulerBackend(scheduler, sc)
   with Logging {
 
-  if (conf.getOption("spark.scheduler.minRegisteredResourcesRatio").isEmpty) {
-    minRegisteredRatio = 0.8
-  }
-
   private var client: Client = null
   private var appId: ApplicationId = null
-  private var stopping: Boolean = false
-  private var totalExpectedExecutors = 0
+  @volatile private var stopping: Boolean = false
 
   /**
    * Create a Yarn client to submit an application to the ResourceManager.
@@ -48,7 +44,7 @@ private[spark] class YarnClientSchedulerBackend(
     val driverHost = conf.get("spark.driver.host")
     val driverPort = conf.get("spark.driver.port")
     val hostport = driverHost + ":" + driverPort
-    sc.ui.foreach { ui => conf.set("spark.driver.appUIAddress", ui.appUIHostPort) }
+    sc.ui.foreach { ui => conf.set("spark.driver.appUIAddress", ui.appUIAddress) }
 
     val argsArrayBuf = new ArrayBuffer[String]()
     argsArrayBuf += ("--arg", hostport)
@@ -69,26 +65,42 @@ private[spark] class YarnClientSchedulerBackend(
    */
   private def getExtraClientArguments: Seq[String] = {
     val extraArgs = new ArrayBuffer[String]
-    val optionTuples = // List of (target Client argument, environment variable, Spark property)
+    // List of (target Client argument, environment variable, Spark property)
+    val optionTuples =
       List(
-        ("--driver-memory", "SPARK_MASTER_MEMORY", "spark.master.memory"),
-        ("--driver-memory", "SPARK_DRIVER_MEMORY", "spark.driver.memory"),
         ("--num-executors", "SPARK_WORKER_INSTANCES", "spark.executor.instances"),
         ("--num-executors", "SPARK_EXECUTOR_INSTANCES", "spark.executor.instances"),
         ("--executor-memory", "SPARK_WORKER_MEMORY", "spark.executor.memory"),
         ("--executor-memory", "SPARK_EXECUTOR_MEMORY", "spark.executor.memory"),
         ("--executor-cores", "SPARK_WORKER_CORES", "spark.executor.cores"),
         ("--executor-cores", "SPARK_EXECUTOR_CORES", "spark.executor.cores"),
-        ("--queue", "SPARK_YARN_QUEUE", "spark.yarn.queue"),
-        ("--name", "SPARK_YARN_APP_NAME", "spark.app.name")
+        ("--queue", "SPARK_YARN_QUEUE", "spark.yarn.queue")
       )
+    // Warn against the following deprecated environment variables: env var -> suggestion
+    val deprecatedEnvVars = Map(
+      "SPARK_MASTER_MEMORY" -> "SPARK_DRIVER_MEMORY or --driver-memory through spark-submit",
+      "SPARK_WORKER_INSTANCES" -> "SPARK_WORKER_INSTANCES or --num-executors through spark-submit",
+      "SPARK_WORKER_MEMORY" -> "SPARK_EXECUTOR_MEMORY or --executor-memory through spark-submit",
+      "SPARK_WORKER_CORES" -> "SPARK_EXECUTOR_CORES or --executor-cores through spark-submit")
+    // Do the same for deprecated properties: property -> suggestion
+    val deprecatedProps = Map("spark.master.memory" -> "--driver-memory through spark-submit")
     optionTuples.foreach { case (optionName, envVar, sparkProp) =>
-      if (System.getenv(envVar) != null) {
-        extraArgs += (optionName, System.getenv(envVar))
-      } else if (sc.getConf.contains(sparkProp)) {
+      if (sc.getConf.contains(sparkProp)) {
         extraArgs += (optionName, sc.getConf.get(sparkProp))
+        if (deprecatedProps.contains(sparkProp)) {
+          logWarning(s"NOTE: $sparkProp is deprecated. Use ${deprecatedProps(sparkProp)} instead.")
+        }
+      } else if (System.getenv(envVar) != null) {
+        extraArgs += (optionName, System.getenv(envVar))
+        if (deprecatedEnvVars.contains(envVar)) {
+          logWarning(s"NOTE: $envVar is deprecated. Use ${deprecatedEnvVars(envVar)} instead.")
+        }
       }
     }
+    // The app name is a special case because "spark.app.name" is required of all applications.
+    // As a result, the corresponding "SPARK_YARN_APP_NAME" is already handled preemptively in
+    // SparkSubmitArguments if "spark.app.name" is not explicitly set by the user. (SPARK-5222)
+    sc.getConf.getOption("spark.app.name").foreach(v => extraArgs += ("--name", v))
     extraArgs
   }
 
@@ -151,14 +163,11 @@ private[spark] class YarnClientSchedulerBackend(
     logInfo("Stopped")
   }
 
-  override def sufficientResourcesRegistered(): Boolean = {
-    totalRegisteredExecutors.get() >= totalExpectedExecutors * minRegisteredRatio
-  }
-
-  override def applicationId(): String =
+  override def applicationId(): String = {
     Option(appId).map(_.toString).getOrElse {
       logWarning("Application ID is not initialized yet.")
       super.applicationId
     }
+  }
 
 }
diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
new file mode 100644
index 0000000000000..72ec4d6b34af6
--- /dev/null
+++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+import org.apache.spark._
+import org.apache.spark.deploy.yarn.ApplicationMaster
+
+/**
+ * This is a simple extension to ClusterScheduler - to ensure that appropriate initialization of
+ * ApplicationMaster, etc is done
+ */
+private[spark] class YarnClusterScheduler(sc: SparkContext) extends YarnScheduler(sc) {
+
+  logInfo("Created YarnClusterScheduler")
+
+  override def postStartHook() {
+    ApplicationMaster.sparkContextInitialized(sc)
+    super.postStartHook()
+    logInfo("YarnClusterScheduler.postStartHook done")
+  }
+
+  override def stop() {
+    super.stop()
+    ApplicationMaster.sparkContextStopped(sc)
+  }
+
+}
diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
similarity index 78%
rename from yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
rename to yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
index 3a186cfeb4eeb..b1de81e6a8b0f 100644
--- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
+++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
@@ -18,24 +18,18 @@
 package org.apache.spark.scheduler.cluster
 
 import org.apache.spark.SparkContext
-import org.apache.spark.deploy.yarn.ApplicationMasterArguments
+import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
 import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.util.IntParam
 
 private[spark] class YarnClusterSchedulerBackend(
     scheduler: TaskSchedulerImpl,
     sc: SparkContext)
-  extends CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem) {
-
-  var totalExpectedExecutors = 0
-
-  if (conf.getOption("spark.scheduler.minRegisteredResourcesRatio").isEmpty) {
-    minRegisteredRatio = 0.8
-  }
+  extends YarnSchedulerBackend(scheduler, sc) {
 
   override def start() {
     super.start()
-    totalExpectedExecutors = ApplicationMasterArguments.DEFAULT_NUMBER_EXECUTORS
+    totalExpectedExecutors = DEFAULT_NUMBER_EXECUTORS
     if (System.getenv("SPARK_EXECUTOR_INSTANCES") != null) {
       totalExpectedExecutors = IntParam.unapply(System.getenv("SPARK_EXECUTOR_INSTANCES"))
         .getOrElse(totalExpectedExecutors)
@@ -44,10 +38,6 @@ private[spark] class YarnClusterSchedulerBackend(
     totalExpectedExecutors = sc.getConf.getInt("spark.executor.instances", totalExpectedExecutors)
   }
 
-  override def sufficientResourcesRegistered(): Boolean = {
-    totalRegisteredExecutors.get() >= totalExpectedExecutors * minRegisteredRatio
-  }
-
   override def applicationId(): String =
     // In YARN Cluster mode, spark.yarn.app.id is expect to be set
     // before user application is launched.
diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnScheduler.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnScheduler.scala
new file mode 100644
index 0000000000000..4ebf3af12b381
--- /dev/null
+++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnScheduler.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+import org.apache.hadoop.yarn.util.RackResolver
+
+import org.apache.log4j.{Level, Logger}
+
+import org.apache.spark._
+import org.apache.spark.scheduler.TaskSchedulerImpl
+import org.apache.spark.util.Utils
+
+private[spark] class YarnScheduler(sc: SparkContext) extends TaskSchedulerImpl(sc) {
+
+  // RackResolver logs an INFO message whenever it resolves a rack, which is way too often.
+  if (Logger.getLogger(classOf[RackResolver]).getLevel == null) {
+    Logger.getLogger(classOf[RackResolver]).setLevel(Level.WARN)
+  }
+
+  // By default, rack is unknown
+  override def getRackForHost(hostPort: String): Option[String] = {
+    val host = Utils.parseHostPort(hostPort)._1
+    Option(RackResolver.resolve(sc.hadoopConfiguration, host).getNetworkLocation)
+  }
+}
diff --git a/yarn/src/test/resources/log4j.properties b/yarn/src/test/resources/log4j.properties
new file mode 100644
index 0000000000000..287c8e3563503
--- /dev/null
+++ b/yarn/src/test/resources/log4j.properties
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file target/unit-tests.log
+log4j.rootCategory=INFO, file
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=true
+log4j.appender.file.file=target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
+org.eclipse.jetty.LEVEL=WARN
diff --git a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala
similarity index 100%
rename from yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala
rename to yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
new file mode 100644
index 0000000000000..2bb3dcffd61d9
--- /dev/null
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import java.io.File
+import java.net.URI
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.MRJobConfig
+import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
+import org.apache.hadoop.yarn.api.records._
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.mockito.Matchers._
+import org.mockito.Mockito._
+import org.scalatest.FunSuite
+import org.scalatest.Matchers
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.{ HashMap => MutableHashMap }
+import scala.reflect.ClassTag
+import scala.util.Try
+
+import org.apache.spark.{SparkException, SparkConf}
+import org.apache.spark.util.Utils
+
+class ClientSuite extends FunSuite with Matchers {
+
+  test("default Yarn application classpath") {
+    Client.getDefaultYarnApplicationClasspath should be(Some(Fixtures.knownDefYarnAppCP))
+  }
+
+  test("default MR application classpath") {
+    Client.getDefaultMRApplicationClasspath should be(Some(Fixtures.knownDefMRAppCP))
+  }
+
+  test("resultant classpath for an application that defines a classpath for YARN") {
+    withAppConf(Fixtures.mapYARNAppConf) { conf =>
+      val env = newEnv
+      Client.populateHadoopClasspath(conf, env)
+      classpath(env) should be(
+        flatten(Fixtures.knownYARNAppCP, Client.getDefaultMRApplicationClasspath))
+    }
+  }
+
+  test("resultant classpath for an application that defines a classpath for MR") {
+    withAppConf(Fixtures.mapMRAppConf) { conf =>
+      val env = newEnv
+      Client.populateHadoopClasspath(conf, env)
+      classpath(env) should be(
+        flatten(Client.getDefaultYarnApplicationClasspath, Fixtures.knownMRAppCP))
+    }
+  }
+
+  test("resultant classpath for an application that defines both classpaths, YARN and MR") {
+    withAppConf(Fixtures.mapAppConf) { conf =>
+      val env = newEnv
+      Client.populateHadoopClasspath(conf, env)
+      classpath(env) should be(flatten(Fixtures.knownYARNAppCP, Fixtures.knownMRAppCP))
+    }
+  }
+
+  private val SPARK = "local:/sparkJar"
+  private val USER = "local:/userJar"
+  private val ADDED = "local:/addJar1,local:/addJar2,/addJar3"
+
+  test("Local jar URIs") {
+    val conf = new Configuration()
+    val sparkConf = new SparkConf().set(Client.CONF_SPARK_JAR, SPARK)
+    val env = new MutableHashMap[String, String]()
+    val args = new ClientArguments(Array("--jar", USER, "--addJars", ADDED), sparkConf)
+
+    Client.populateClasspath(args, conf, sparkConf, env)
+
+    val cp = env("CLASSPATH").split(":|;|<CPS>")
+    s"$SPARK,$USER,$ADDED".split(",").foreach({ entry =>
+      val uri = new URI(entry)
+      if (Client.LOCAL_SCHEME.equals(uri.getScheme())) {
+        cp should contain (uri.getPath())
+      } else {
+        cp should not contain (uri.getPath())
+      }
+    })
+    if (classOf[Environment].getMethods().exists(_.getName == "$$")) {
+      cp should contain("{{PWD}}")
+      cp should contain(s"{{PWD}}${Path.SEPARATOR}*")
+    } else if (Utils.isWindows) {
+      cp should contain("%PWD%")
+      cp should contain(s"%PWD%${Path.SEPARATOR}*")
+    } else {
+      cp should contain(Environment.PWD.$())
+      cp should contain(s"${Environment.PWD.$()}${File.separator}*")
+    }
+    cp should not contain (Client.SPARK_JAR)
+    cp should not contain (Client.APP_JAR)
+  }
+
+  test("Jar path propagation through SparkConf") {
+    val conf = new Configuration()
+    val sparkConf = new SparkConf().set(Client.CONF_SPARK_JAR, SPARK)
+    val args = new ClientArguments(Array("--jar", USER, "--addJars", ADDED), sparkConf)
+
+    val client = spy(new Client(args, conf, sparkConf))
+    doReturn(new Path("/")).when(client).copyFileToRemote(any(classOf[Path]),
+      any(classOf[Path]), anyShort(), anyBoolean())
+
+    val tempDir = Utils.createTempDir()
+    try {
+      client.prepareLocalResources(tempDir.getAbsolutePath())
+      sparkConf.getOption(Client.CONF_SPARK_USER_JAR) should be (Some(USER))
+
+      // The non-local path should be propagated by name only, since it will end up in the app's
+      // staging dir.
+      val expected = ADDED.split(",")
+        .map(p => {
+          val uri = new URI(p)
+          if (Client.LOCAL_SCHEME == uri.getScheme()) {
+            p
+          } else {
+            Option(uri.getFragment()).getOrElse(new File(p).getName())
+          }
+        })
+        .mkString(",")
+
+      sparkConf.getOption(Client.CONF_SPARK_YARN_SECONDARY_JARS) should be (Some(expected))
+    } finally {
+      Utils.deleteRecursively(tempDir)
+    }
+  }
+
+  test("check access nns empty") {
+    val sparkConf = new SparkConf()
+    sparkConf.set("spark.yarn.access.namenodes", "")
+    val nns = Client.getNameNodesToAccess(sparkConf)
+    nns should be(Set())
+  }
+
+  test("check access nns unset") {
+    val sparkConf = new SparkConf()
+    val nns = Client.getNameNodesToAccess(sparkConf)
+    nns should be(Set())
+  }
+
+  test("check access nns") {
+    val sparkConf = new SparkConf()
+    sparkConf.set("spark.yarn.access.namenodes", "hdfs://nn1:8032")
+    val nns = Client.getNameNodesToAccess(sparkConf)
+    nns should be(Set(new Path("hdfs://nn1:8032")))
+  }
+
+  test("check access nns space") {
+    val sparkConf = new SparkConf()
+    sparkConf.set("spark.yarn.access.namenodes", "hdfs://nn1:8032, ")
+    val nns = Client.getNameNodesToAccess(sparkConf)
+    nns should be(Set(new Path("hdfs://nn1:8032")))
+  }
+
+  test("check access two nns") {
+    val sparkConf = new SparkConf()
+    sparkConf.set("spark.yarn.access.namenodes", "hdfs://nn1:8032,hdfs://nn2:8032")
+    val nns = Client.getNameNodesToAccess(sparkConf)
+    nns should be(Set(new Path("hdfs://nn1:8032"), new Path("hdfs://nn2:8032")))
+  }
+
+  test("check token renewer") {
+    val hadoopConf = new Configuration()
+    hadoopConf.set("yarn.resourcemanager.address", "myrm:8033")
+    hadoopConf.set("yarn.resourcemanager.principal", "yarn/myrm:8032@SPARKTEST.COM")
+    val renewer = Client.getTokenRenewer(hadoopConf)
+    renewer should be ("yarn/myrm:8032@SPARKTEST.COM")
+  }
+
+  test("check token renewer default") {
+    val hadoopConf = new Configuration()
+    val caught =
+      intercept[SparkException] {
+        Client.getTokenRenewer(hadoopConf)
+      }
+    assert(caught.getMessage === "Can't get Master Kerberos principal for use as renewer")
+  }
+
+  object Fixtures {
+
+    val knownDefYarnAppCP: Seq[String] =
+      getFieldValue[Array[String], Seq[String]](classOf[YarnConfiguration],
+                                                "DEFAULT_YARN_APPLICATION_CLASSPATH",
+                                                Seq[String]())(a => a.toSeq)
+
+
+    val knownDefMRAppCP: Seq[String] =
+      getFieldValue2[String, Array[String], Seq[String]](
+        classOf[MRJobConfig],
+        "DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH",
+        Seq[String]())(a => a.split(","))(a => a.toSeq)
+
+    val knownYARNAppCP = Some(Seq("/known/yarn/path"))
+
+    val knownMRAppCP = Some(Seq("/known/mr/path"))
+
+    val mapMRAppConf =
+      Map("mapreduce.application.classpath" -> knownMRAppCP.map(_.mkString(":")).get)
+
+    val mapYARNAppConf =
+      Map(YarnConfiguration.YARN_APPLICATION_CLASSPATH -> knownYARNAppCP.map(_.mkString(":")).get)
+
+    val mapAppConf = mapYARNAppConf ++ mapMRAppConf
+  }
+
+  def withAppConf(m: Map[String, String] = Map())(testCode: (Configuration) => Any) {
+    val conf = new Configuration
+    m.foreach { case (k, v) => conf.set(k, v, "ClientSpec") }
+    testCode(conf)
+  }
+
+  def newEnv = MutableHashMap[String, String]()
+
+  def classpath(env: MutableHashMap[String, String]) = env(Environment.CLASSPATH.name).split(":|;|<CPS>")
+
+  def flatten(a: Option[Seq[String]], b: Option[Seq[String]]) = (a ++ b).flatten.toArray
+
+  def getFieldValue[A, B](clazz: Class[_], field: String, defaults: => B)(mapTo: A => B): B =
+    Try(clazz.getField(field)).map(_.get(null).asInstanceOf[A]).toOption.map(mapTo).getOrElse(defaults)
+
+  def getFieldValue2[A: ClassTag, A1: ClassTag, B](
+        clazz: Class[_],
+        field: String,
+        defaults: => B)(mapTo:  A => B)(mapTo1: A1 => B) : B = {
+    Try(clazz.getField(field)).map(_.get(null)).map {
+      case v: A => mapTo(v)
+      case v1: A1 => mapTo1(v1)
+      case _ => defaults
+    }.toOption.getOrElse(defaults)
+  }
+
+}
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
new file mode 100644
index 0000000000000..024b25f9d3365
--- /dev/null
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import java.util.{Arrays, List => JList}
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic
+import org.apache.hadoop.net.DNSToSwitchMapping
+import org.apache.hadoop.yarn.api.records._
+import org.apache.hadoop.yarn.client.api.AMRMClient
+import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
+
+import org.apache.spark.SecurityManager
+import org.apache.spark.SparkConf
+import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
+import org.apache.spark.deploy.yarn.YarnAllocator._
+import org.apache.spark.scheduler.SplitInfo
+
+import org.scalatest.{BeforeAndAfterEach, FunSuite, Matchers}
+
+class MockResolver extends DNSToSwitchMapping {
+
+  override def resolve(names: JList[String]): JList[String] = {
+    if (names.size > 0 && names.get(0) == "host3") Arrays.asList("/rack2")
+    else Arrays.asList("/rack1")
+  }
+
+  override def reloadCachedMappings() {}
+
+  def reloadCachedMappings(names: JList[String]) {}
+}
+
+class YarnAllocatorSuite extends FunSuite with Matchers with BeforeAndAfterEach {
+  val conf = new Configuration()
+  conf.setClass(
+    CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY,
+    classOf[MockResolver], classOf[DNSToSwitchMapping])
+
+  val sparkConf = new SparkConf()
+  sparkConf.set("spark.driver.host", "localhost")
+  sparkConf.set("spark.driver.port", "4040")
+  sparkConf.set("spark.yarn.jar", "notarealjar.jar")
+  sparkConf.set("spark.yarn.launchContainers", "false")
+
+  val appAttemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 0), 0)
+
+  // Resource returned by YARN.  YARN can give larger containers than requested, so give 6 cores
+  // instead of the 5 requested and 3 GB instead of the 2 requested.
+  val containerResource = Resource.newInstance(3072, 6)
+
+  var rmClient: AMRMClient[ContainerRequest] = _
+
+  var containerNum = 0
+
+  override def beforeEach() {
+    rmClient = AMRMClient.createAMRMClient()
+    rmClient.init(conf)
+    rmClient.start()
+  }
+
+  override def afterEach() {
+    rmClient.stop()
+  }
+
+  class MockSplitInfo(host: String) extends SplitInfo(null, host, null, 1, null) {
+    override def equals(other: Any) = false
+  }
+
+  def createAllocator(maxExecutors: Int = 5): YarnAllocator = {
+    val args = Array(
+      "--num-executors", s"$maxExecutors",
+      "--executor-cores", "5",
+      "--executor-memory", "2048",
+      "--jar", "somejar.jar",
+      "--class", "SomeClass")
+    new YarnAllocator(
+      conf,
+      sparkConf,
+      rmClient,
+      appAttemptId,
+      new ApplicationMasterArguments(args),
+      new SecurityManager(sparkConf))
+  }
+
+  def createContainer(host: String): Container = {
+    val containerId = ContainerId.newInstance(appAttemptId, containerNum)
+    containerNum += 1
+    val nodeId = NodeId.newInstance(host, 1000)
+    Container.newInstance(containerId, nodeId, "", containerResource, RM_REQUEST_PRIORITY, null)
+  }
+
+  test("single container allocated") {
+    // request a single container and receive it
+    val handler = createAllocator()
+    handler.addResourceRequests(1)
+    handler.getNumExecutorsRunning should be (0)
+    handler.getNumPendingAllocate should be (1)
+
+    val container = createContainer("host1")
+    handler.handleAllocatedContainers(Array(container))
+
+    handler.getNumExecutorsRunning should be (1)
+    handler.allocatedContainerToHostMap.get(container.getId).get should be ("host1")
+    handler.allocatedHostToContainersMap.get("host1").get should contain (container.getId)
+    rmClient.getMatchingRequests(container.getPriority, "host1", containerResource).size should be (0)
+  }
+
+  test("some containers allocated") {
+    // request a few containers and receive some of them
+    val handler = createAllocator()
+    handler.addResourceRequests(4)
+    handler.getNumExecutorsRunning should be (0)
+    handler.getNumPendingAllocate should be (4)
+
+    val container1 = createContainer("host1")
+    val container2 = createContainer("host1")
+    val container3 = createContainer("host2")
+    handler.handleAllocatedContainers(Array(container1, container2, container3))
+
+    handler.getNumExecutorsRunning should be (3)
+    handler.allocatedContainerToHostMap.get(container1.getId).get should be ("host1")
+    handler.allocatedContainerToHostMap.get(container2.getId).get should be ("host1")
+    handler.allocatedContainerToHostMap.get(container3.getId).get should be ("host2")
+    handler.allocatedHostToContainersMap.get("host1").get should contain (container1.getId)
+    handler.allocatedHostToContainersMap.get("host1").get should contain (container2.getId)
+    handler.allocatedHostToContainersMap.get("host2").get should contain (container3.getId)
+  }
+
+  test("receive more containers than requested") {
+    val handler = createAllocator(2)
+    handler.addResourceRequests(2)
+    handler.getNumExecutorsRunning should be (0)
+    handler.getNumPendingAllocate should be (2)
+
+    val container1 = createContainer("host1")
+    val container2 = createContainer("host2")
+    val container3 = createContainer("host4")
+    handler.handleAllocatedContainers(Array(container1, container2, container3))
+
+    handler.getNumExecutorsRunning should be (2)
+    handler.allocatedContainerToHostMap.get(container1.getId).get should be ("host1")
+    handler.allocatedContainerToHostMap.get(container2.getId).get should be ("host2")
+    handler.allocatedContainerToHostMap.contains(container3.getId) should be (false)
+    handler.allocatedHostToContainersMap.get("host1").get should contain (container1.getId)
+    handler.allocatedHostToContainersMap.get("host2").get should contain (container2.getId)
+    handler.allocatedHostToContainersMap.contains("host4") should be (false)
+  }
+
+  test("memory exceeded diagnostic regexes") {
+    val diagnostics =
+      "Container [pid=12465,containerID=container_1412887393566_0003_01_000002] is running " +
+        "beyond physical memory limits. Current usage: 2.1 MB of 2 GB physical memory used; " +
+        "5.8 GB of 4.2 GB virtual memory used. Killing container."
+    val vmemMsg = memLimitExceededLogMessage(diagnostics, VMEM_EXCEEDED_PATTERN)
+    val pmemMsg = memLimitExceededLogMessage(diagnostics, PMEM_EXCEEDED_PATTERN)
+    assert(vmemMsg.contains("5.8 GB of 4.2 GB virtual memory used."))
+    assert(pmemMsg.contains("2.1 MB of 2 GB physical memory used."))
+  }
+
+}
diff --git a/yarn/stable/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
similarity index 80%
rename from yarn/stable/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
rename to yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index d79b85e867fcd..7165918e1bfcf 100644
--- a/yarn/stable/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -45,6 +45,29 @@ class YarnClusterSuite extends FunSuite with BeforeAndAfterAll with Matchers wit
     |log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
     """.stripMargin
 
+  private val TEST_PYFILE = """
+    |import sys
+    |from operator import add
+    |
+    |from pyspark import SparkConf , SparkContext
+    |if __name__ == "__main__":
+    |    if len(sys.argv) != 3:
+    |        print >> sys.stderr, "Usage: test.py [master] [result file]"
+    |        exit(-1)
+    |    conf = SparkConf()
+    |    conf.setMaster(sys.argv[1]).setAppName("python test in yarn cluster mode")
+    |    sc = SparkContext(conf=conf)
+    |    status = open(sys.argv[2],'w')
+    |    result = "failure"
+    |    rdd = sc.parallelize(range(10))
+    |    cnt = rdd.count()
+    |    if cnt == 10:
+    |        result = "success"
+    |    status.write(result)
+    |    status.close()
+    |    sc.stop()
+    """.stripMargin
+
   private var yarnCluster: MiniYARNCluster = _
   private var tempDir: File = _
   private var fakeSparkJar: File = _
@@ -98,6 +121,9 @@ class YarnClusterSuite extends FunSuite with BeforeAndAfterAll with Matchers wit
     }
 
     fakeSparkJar = File.createTempFile("sparkJar", null, tempDir)
+    val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
+    sys.props += ("spark.yarn.appMasterEnv.SPARK_HOME" ->  sparkHome)
+    sys.props += ("spark.executorEnv.SPARK_HOME" -> sparkHome)
     sys.props += ("spark.yarn.jar" -> ("local:" + fakeSparkJar.getAbsolutePath()))
     sys.props += ("spark.executor.instances" -> "1")
     sys.props += ("spark.driver.extraClassPath" -> childClasspath)
@@ -146,6 +172,24 @@ class YarnClusterSuite extends FunSuite with BeforeAndAfterAll with Matchers wit
     assert(Utils.exceptionString(exception).contains("Application finished with failed status"))
   }
 
+  test("run Python application in yarn-cluster mode") {
+    val primaryPyFile = new File(tempDir, "test.py")
+    Files.write(TEST_PYFILE, primaryPyFile, Charsets.UTF_8)
+    val pyFile = new File(tempDir, "test2.py")
+    Files.write(TEST_PYFILE, pyFile, Charsets.UTF_8)
+    var result = File.createTempFile("result", null, tempDir)
+
+    val args = Array("--class", "org.apache.spark.deploy.PythonRunner",
+      "--primary-py-file", primaryPyFile.getAbsolutePath(),
+      "--py-files", pyFile.getAbsolutePath(),
+      "--arg", "yarn-cluster",
+      "--arg", result.getAbsolutePath(),
+      "--name", "python test in yarn-cluster mode",
+      "--num-executors", "1")
+    Client.main(args)
+    checkResult(result)
+  }
+
   /**
    * This is a workaround for an issue with yarn-cluster mode: the Client class will not provide
    * any sort of error when the job process finishes successfully, but the job itself fails. So
diff --git a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
similarity index 83%
rename from yarn/common/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
rename to yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
index 2cc5abb3a890c..b5a2db8f6225c 100644
--- a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
@@ -20,12 +20,15 @@ package org.apache.spark.deploy.yarn
 import java.io.{File, IOException}
 
 import com.google.common.io.{ByteStreams, Files}
+import org.apache.hadoop.yarn.api.ApplicationConstants
+import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.scalatest.{FunSuite, Matchers}
 
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType
 
 import org.apache.spark.{Logging, SecurityManager, SparkConf}
+import org.apache.spark.util.Utils
 
 
 class YarnSparkHadoopUtilSuite extends FunSuite with Matchers with Logging {
@@ -148,4 +151,26 @@ class YarnSparkHadoopUtilSuite extends FunSuite with Matchers with Logging {
     }
 
   }
+
+  test("test expandEnvironment result") {
+    val target = Environment.PWD
+    if (classOf[Environment].getMethods().exists(_.getName == "$$")) {
+      YarnSparkHadoopUtil.expandEnvironment(target) should be ("{{" + target + "}}")
+    } else if (Utils.isWindows) {
+      YarnSparkHadoopUtil.expandEnvironment(target) should be ("%" + target + "%")
+    } else {
+      YarnSparkHadoopUtil.expandEnvironment(target) should be ("$" + target)
+    }
+
+  }
+
+  test("test getClassPathSeparator result") {
+    if (classOf[ApplicationConstants].getFields().exists(_.getName == "CLASS_PATH_SEPARATOR")) {
+      YarnSparkHadoopUtil.getClassPathSeparator() should be ("<CPS>")
+    } else if (Utils.isWindows) {
+      YarnSparkHadoopUtil.getClassPathSeparator() should be (";")
+    } else {
+      YarnSparkHadoopUtil.getClassPathSeparator() should be (":")
+    }
+  }
 }
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
deleted file mode 100644
index fe55d70ccc370..0000000000000
--- a/yarn/stable/pom.xml
+++ /dev/null
@@ -1,95 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>yarn-parent_2.10</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-  <properties>
-    <sbt.project.name>yarn-stable</sbt.project.name>
-  </properties>
-
-  <groupId>org.apache.spark</groupId>
-  <artifactId>spark-yarn_2.10</artifactId>
-  <packaging>jar</packaging>
-  <name>Spark Project YARN Stable API</name>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-server-tests</artifactId>
-      <classifier>tests</classifier>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-
-  <!--
-    See SPARK-3710. hadoop-yarn-server-tests in Hadoop 2.2 fails to pull some needed
-    dependencies, so they need to be added manually for the tests to work.
-  -->
-  <profiles>
-    <profile>
-      <id>hadoop-2.2</id>
-      <properties>
-        <jersey.version>1.9</jersey.version>
-      </properties>
-      <dependencies>
-        <dependency>
-          <groupId>org.mortbay.jetty</groupId>
-          <artifactId>jetty</artifactId>
-          <version>6.1.26</version>
-          <exclusions>
-            <exclusion>
-              <groupId>org.mortbay.jetty</groupId>
-              <artifactId>servlet-api</artifactId>
-            </exclusion>
-          </exclusions>
-          <scope>test</scope>
-        </dependency>
-        <dependency>
-          <groupId>com.sun.jersey</groupId>
-          <artifactId>jersey-core</artifactId>
-          <version>${jersey.version}</version>
-          <scope>test</scope>
-        </dependency>
-        <dependency>
-          <groupId>com.sun.jersey</groupId>
-          <artifactId>jersey-json</artifactId>
-          <version>${jersey.version}</version>
-          <scope>test</scope>
-          <exclusions>
-            <exclusion>
-              <groupId>stax</groupId>
-              <artifactId>stax-api</artifactId>
-            </exclusion>
-          </exclusions>
-        </dependency>
-        <dependency>
-          <groupId>com.sun.jersey</groupId>
-          <artifactId>jersey-server</artifactId>
-          <version>${jersey.version}</version>
-          <scope>test</scope>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
-</project>
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
deleted file mode 100644
index addaddb711d3c..0000000000000
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.nio.ByteBuffer
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.io.DataOutputBuffer
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.client.api.{YarnClient, YarnClientApplication}
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.util.Records
-
-import org.apache.spark.{Logging, SparkConf}
-import org.apache.spark.deploy.SparkHadoopUtil
-
-/**
- * Version of [[org.apache.spark.deploy.yarn.ClientBase]] tailored to YARN's stable API.
- */
-private[spark] class Client(
-    val args: ClientArguments,
-    val hadoopConf: Configuration,
-    val sparkConf: SparkConf)
-  extends ClientBase with Logging {
-
-  def this(clientArgs: ClientArguments, spConf: SparkConf) =
-    this(clientArgs, SparkHadoopUtil.get.newConfiguration(spConf), spConf)
-
-  def this(clientArgs: ClientArguments) = this(clientArgs, new SparkConf())
-
-  val yarnClient = YarnClient.createYarnClient
-  val yarnConf = new YarnConfiguration(hadoopConf)
-
-  def stop(): Unit = yarnClient.stop()
-
-  /* ------------------------------------------------------------------------------------- *
-   | The following methods have much in common in the stable and alpha versions of Client, |
-   | but cannot be implemented in the parent trait due to subtle API differences across    |
-   | hadoop versions.                                                                      |
-   * ------------------------------------------------------------------------------------- */
-
-  /**
-   * Submit an application running our ApplicationMaster to the ResourceManager.
-   *
-   * The stable Yarn API provides a convenience method (YarnClient#createApplication) for
-   * creating applications and setting up the application submission context. This was not
-   * available in the alpha API.
-   */
-  override def submitApplication(): ApplicationId = {
-    yarnClient.init(yarnConf)
-    yarnClient.start()
-
-    logInfo("Requesting a new application from cluster with %d NodeManagers"
-      .format(yarnClient.getYarnClusterMetrics.getNumNodeManagers))
-
-    // Get a new application from our RM
-    val newApp = yarnClient.createApplication()
-    val newAppResponse = newApp.getNewApplicationResponse()
-    val appId = newAppResponse.getApplicationId()
-
-    // Verify whether the cluster has enough resources for our AM
-    verifyClusterResources(newAppResponse)
-
-    // Set up the appropriate contexts to launch our AM
-    val containerContext = createContainerLaunchContext(newAppResponse)
-    val appContext = createApplicationSubmissionContext(newApp, containerContext)
-
-    // Finally, submit and monitor the application
-    logInfo(s"Submitting application ${appId.getId} to ResourceManager")
-    yarnClient.submitApplication(appContext)
-    appId
-  }
-
-  /**
-   * Set up the context for submitting our ApplicationMaster.
-   * This uses the YarnClientApplication not available in the Yarn alpha API.
-   */
-  def createApplicationSubmissionContext(
-      newApp: YarnClientApplication,
-      containerContext: ContainerLaunchContext): ApplicationSubmissionContext = {
-    val appContext = newApp.getApplicationSubmissionContext
-    appContext.setApplicationName(args.appName)
-    appContext.setQueue(args.amQueue)
-    appContext.setAMContainerSpec(containerContext)
-    appContext.setApplicationType("SPARK")
-    val capability = Records.newRecord(classOf[Resource])
-    capability.setMemory(args.amMemory + amMemoryOverhead)
-    appContext.setResource(capability)
-    appContext
-  }
-
-  /** Set up security tokens for launching our ApplicationMaster container. */
-  override def setupSecurityToken(amContainer: ContainerLaunchContext): Unit = {
-    val dob = new DataOutputBuffer
-    credentials.writeTokenStorageToStream(dob)
-    amContainer.setTokens(ByteBuffer.wrap(dob.getData))
-  }
-
-  /** Get the application report from the ResourceManager for an application we have submitted. */
-  override def getApplicationReport(appId: ApplicationId): ApplicationReport =
-    yarnClient.getApplicationReport(appId)
-
-  /**
-   * Return the security token used by this client to communicate with the ApplicationMaster.
-   * If no security is enabled, the token returned by the report is null.
-   */
-  override def getClientToken(report: ApplicationReport): String =
-    Option(report.getClientToAMToken).map(_.toString).getOrElse("")
-}
-
-object Client {
-  def main(argStrings: Array[String]) {
-    if (!sys.props.contains("SPARK_SUBMIT")) {
-      println("WARNING: This client is deprecated and will be removed in a " +
-        "future version of Spark. Use ./bin/spark-submit with \"--master yarn\"")
-    }
-
-    // Set an env variable indicating we are running in YARN mode.
-    // Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes
-    System.setProperty("SPARK_YARN_MODE", "true")
-    val sparkConf = new SparkConf
-
-    val args = new ClientArguments(argStrings, sparkConf)
-    new Client(args, sparkConf).run()
-  }
-}
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
deleted file mode 100644
index 0b5a92d87d722..0000000000000
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.nio.ByteBuffer
-import java.security.PrivilegedExceptionAction
-
-import scala.collection.JavaConversions._
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.io.DataOutputBuffer
-import org.apache.hadoop.net.NetUtils
-import org.apache.hadoop.security.UserGroupInformation
-import org.apache.hadoop.yarn.api._
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils
-import org.apache.hadoop.yarn.api.protocolrecords._
-import org.apache.hadoop.yarn.client.api.NMClient
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.ipc.YarnRPC
-import org.apache.hadoop.yarn.util.{Apps, ConverterUtils, Records}
-
-import org.apache.spark.{SecurityManager, SparkConf, Logging}
-
-
-class ExecutorRunnable(
-    container: Container,
-    conf: Configuration,
-    spConf: SparkConf,
-    masterAddress: String,
-    slaveId: String,
-    hostname: String,
-    executorMemory: Int,
-    executorCores: Int,
-    appId: String,
-    securityMgr: SecurityManager)
-  extends Runnable with ExecutorRunnableUtil with Logging {
-
-  var rpc: YarnRPC = YarnRPC.create(conf)
-  var nmClient: NMClient = _
-  val sparkConf = spConf
-  val yarnConf: YarnConfiguration = new YarnConfiguration(conf)
-
-  def run = {
-    logInfo("Starting Executor Container")
-    nmClient = NMClient.createNMClient()
-    nmClient.init(yarnConf)
-    nmClient.start()
-    startContainer
-  }
-
-  def startContainer = {
-    logInfo("Setting up ContainerLaunchContext")
-
-    val ctx = Records.newRecord(classOf[ContainerLaunchContext])
-      .asInstanceOf[ContainerLaunchContext]
-
-    val localResources = prepareLocalResources
-    ctx.setLocalResources(localResources)
-
-    ctx.setEnvironment(env)
-
-    val credentials = UserGroupInformation.getCurrentUser().getCredentials()
-    val dob = new DataOutputBuffer()
-    credentials.writeTokenStorageToStream(dob)
-    ctx.setTokens(ByteBuffer.wrap(dob.getData()))
-
-    val commands = prepareCommand(masterAddress, slaveId, hostname, executorMemory, executorCores,
-      appId, localResources)
-
-    logInfo(s"Setting up executor with environment: $env")
-    logInfo("Setting up executor with commands: " + commands)
-    ctx.setCommands(commands)
-
-    ctx.setApplicationACLs(YarnSparkHadoopUtil.getApplicationAclsForYarn(securityMgr))
-
-    // Send the start request to the ContainerManager
-    nmClient.startContainer(container, ctx)
-  }
-
-}
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
deleted file mode 100644
index 2bbf5d7db8668..0000000000000
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import scala.collection.JavaConversions._
-import scala.collection.mutable.{ArrayBuffer, HashMap}
-
-import org.apache.spark.{SecurityManager, SparkConf} 
-import org.apache.spark.scheduler.SplitInfo
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse
-import org.apache.hadoop.yarn.client.api.AMRMClient
-import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
-import org.apache.hadoop.yarn.util.Records
-
-/**
- * Acquires resources for executors from a ResourceManager and launches executors in new containers.
- */
-private[yarn] class YarnAllocationHandler(
-    conf: Configuration,
-    sparkConf: SparkConf,
-    amClient: AMRMClient[ContainerRequest],
-    appAttemptId: ApplicationAttemptId,
-    args: ApplicationMasterArguments,
-    preferredNodes: collection.Map[String, collection.Set[SplitInfo]], 
-    securityMgr: SecurityManager)
-  extends YarnAllocator(conf, sparkConf, appAttemptId, args, preferredNodes, securityMgr) {
-
-  override protected def releaseContainer(container: Container) = {
-    amClient.releaseAssignedContainer(container.getId())
-  }
-
-  // pending isn't used on stable as the AMRMClient handles incremental asks
-  override protected def allocateContainers(count: Int, pending: Int): YarnAllocateResponse = {
-    addResourceRequests(count)
-
-    // We have already set the container request. Poll the ResourceManager for a response.
-    // This doubles as a heartbeat if there are no pending container requests.
-    val progressIndicator = 0.1f
-    new StableAllocateResponse(amClient.allocate(progressIndicator))
-  }
-
-  private def createRackResourceRequests(
-      hostContainers: ArrayBuffer[ContainerRequest]
-    ): ArrayBuffer[ContainerRequest] = {
-    // Generate modified racks and new set of hosts under it before issuing requests.
-    val rackToCounts = new HashMap[String, Int]()
-
-    for (container <- hostContainers) {
-      val candidateHost = container.getNodes.last
-      assert(YarnSparkHadoopUtil.ANY_HOST != candidateHost)
-
-      val rack = YarnSparkHadoopUtil.lookupRack(conf, candidateHost)
-      if (rack != null) {
-        var count = rackToCounts.getOrElse(rack, 0)
-        count += 1
-        rackToCounts.put(rack, count)
-      }
-    }
-
-    val requestedContainers = new ArrayBuffer[ContainerRequest](rackToCounts.size)
-    for ((rack, count) <- rackToCounts) {
-      requestedContainers ++= createResourceRequests(
-        AllocationType.RACK,
-        rack,
-        count,
-        YarnSparkHadoopUtil.RM_REQUEST_PRIORITY)
-    }
-
-    requestedContainers
-  }
-
-  private def addResourceRequests(numExecutors: Int) {
-    val containerRequests: List[ContainerRequest] =
-      if (numExecutors <= 0) {
-        logDebug("numExecutors: " + numExecutors)
-        List()
-      } else if (preferredHostToCount.isEmpty) {
-        logDebug("host preferences is empty")
-        createResourceRequests(
-          AllocationType.ANY,
-          resource = null,
-          numExecutors,
-          YarnSparkHadoopUtil.RM_REQUEST_PRIORITY).toList
-      } else {
-        // Request for all hosts in preferred nodes and for numExecutors -
-        // candidates.size, request by default allocation policy.
-        val hostContainerRequests = new ArrayBuffer[ContainerRequest](preferredHostToCount.size)
-        for ((candidateHost, candidateCount) <- preferredHostToCount) {
-          val requiredCount = candidateCount - allocatedContainersOnHost(candidateHost)
-
-          if (requiredCount > 0) {
-            hostContainerRequests ++= createResourceRequests(
-              AllocationType.HOST,
-              candidateHost,
-              requiredCount,
-              YarnSparkHadoopUtil.RM_REQUEST_PRIORITY)
-          }
-        }
-        val rackContainerRequests: List[ContainerRequest] = createRackResourceRequests(
-          hostContainerRequests).toList
-
-        val anyContainerRequests = createResourceRequests(
-          AllocationType.ANY,
-          resource = null,
-          numExecutors,
-          YarnSparkHadoopUtil.RM_REQUEST_PRIORITY)
-
-        val containerRequestBuffer = new ArrayBuffer[ContainerRequest](
-          hostContainerRequests.size + rackContainerRequests.size() + anyContainerRequests.size)
-
-        containerRequestBuffer ++= hostContainerRequests
-        containerRequestBuffer ++= rackContainerRequests
-        containerRequestBuffer ++= anyContainerRequests
-        containerRequestBuffer.toList
-      }
-
-    for (request <- containerRequests) {
-      amClient.addContainerRequest(request)
-    }
-
-    for (request <- containerRequests) {
-      val nodes = request.getNodes
-      var hostStr = if (nodes == null || nodes.isEmpty) {
-        "Any"
-      } else {
-        nodes.last
-      }
-      logInfo("Container request (host: %s, priority: %s, capability: %s".format(
-        hostStr,
-        request.getPriority().getPriority,
-        request.getCapability))
-    }
-  }
-
-  private def createResourceRequests(
-      requestType: AllocationType.AllocationType,
-      resource: String,
-      numExecutors: Int,
-      priority: Int
-    ): ArrayBuffer[ContainerRequest] = {
-
-    // If hostname is specified, then we need at least two requests - node local and rack local.
-    // There must be a third request, which is ANY. That will be specially handled.
-    requestType match {
-      case AllocationType.HOST => {
-        assert(YarnSparkHadoopUtil.ANY_HOST != resource)
-        val hostname = resource
-        val nodeLocal = constructContainerRequests(
-          Array(hostname),
-          racks = null,
-          numExecutors,
-          priority)
-
-        // Add `hostname` to the global (singleton) host->rack mapping in YarnAllocationHandler.
-        YarnSparkHadoopUtil.populateRackInfo(conf, hostname)
-        nodeLocal
-      }
-      case AllocationType.RACK => {
-        val rack = resource
-        constructContainerRequests(hosts = null, Array(rack), numExecutors, priority)
-      }
-      case AllocationType.ANY => constructContainerRequests(
-        hosts = null, racks = null, numExecutors, priority)
-      case _ => throw new IllegalArgumentException(
-        "Unexpected/unsupported request type: " + requestType)
-    }
-  }
-
-  private def constructContainerRequests(
-      hosts: Array[String],
-      racks: Array[String],
-      numExecutors: Int,
-      priority: Int
-    ): ArrayBuffer[ContainerRequest] = {
-
-    val memoryRequest = executorMemory + memoryOverhead
-    val resource = Resource.newInstance(memoryRequest, executorCores)
-
-    val prioritySetting = Records.newRecord(classOf[Priority])
-    prioritySetting.setPriority(priority)
-
-    val requests = new ArrayBuffer[ContainerRequest]()
-    for (i <- 0 until numExecutors) {
-      requests += new ContainerRequest(resource, hosts, racks, prioritySetting)
-    }
-    requests
-  }
-
-  private class StableAllocateResponse(response: AllocateResponse) extends YarnAllocateResponse {
-    override def getAllocatedContainers() = response.getAllocatedContainers()
-    override def getAvailableResources() = response.getAvailableResources()
-    override def getCompletedContainersStatuses() = response.getCompletedContainersStatuses()
-  }
-
-}
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala
deleted file mode 100644
index 8d4b96ed79933..0000000000000
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.util.{List => JList}
-
-import scala.collection.{Map, Set}
-import scala.collection.JavaConversions._
-import scala.util._
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.yarn.api._
-import org.apache.hadoop.yarn.api.protocolrecords._
-import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.client.api.AMRMClient
-import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.util.ConverterUtils
-import org.apache.hadoop.yarn.webapp.util.WebAppUtils
-
-import org.apache.spark.{Logging, SecurityManager, SparkConf}
-import org.apache.spark.scheduler.SplitInfo
-import org.apache.spark.util.Utils
-
-
-/**
- * YarnRMClient implementation for the Yarn stable API.
- */
-private class YarnRMClientImpl(args: ApplicationMasterArguments) extends YarnRMClient with Logging {
-
-  private var amClient: AMRMClient[ContainerRequest] = _
-  private var uiHistoryAddress: String = _
-  private var registered: Boolean = false
-
-  override def register(
-      conf: YarnConfiguration,
-      sparkConf: SparkConf,
-      preferredNodeLocations: Map[String, Set[SplitInfo]],
-      uiAddress: String,
-      uiHistoryAddress: String,
-      securityMgr: SecurityManager) = {
-    amClient = AMRMClient.createAMRMClient()
-    amClient.init(conf)
-    amClient.start()
-    this.uiHistoryAddress = uiHistoryAddress
-
-    logInfo("Registering the ApplicationMaster")
-    synchronized {
-      amClient.registerApplicationMaster(Utils.localHostName(), 0, uiAddress)
-      registered = true
-    }
-    new YarnAllocationHandler(conf, sparkConf, amClient, getAttemptId(), args,
-      preferredNodeLocations, securityMgr)
-  }
-
-  override def unregister(status: FinalApplicationStatus, diagnostics: String = "") = synchronized {
-    if (registered) {
-      amClient.unregisterApplicationMaster(status, diagnostics, uiHistoryAddress)
-    }
-  }
-
-  override def getAttemptId() = {
-    val containerIdString = System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name())
-    val containerId = ConverterUtils.toContainerId(containerIdString)
-    val appAttemptId = containerId.getApplicationAttemptId()
-    appAttemptId
-  }
-
-  override def getAmIpFilterParams(conf: YarnConfiguration, proxyBase: String) = {
-    // Figure out which scheme Yarn is using. Note the method seems to have been added after 2.2,
-    // so not all stable releases have it.
-    val prefix = Try(classOf[WebAppUtils].getMethod("getHttpSchemePrefix", classOf[Configuration])
-        .invoke(null, conf).asInstanceOf[String]).getOrElse("http://")
-
-    // If running a new enough Yarn, use the HA-aware API for retrieving the RM addresses.
-    try {
-      val method = classOf[WebAppUtils].getMethod("getProxyHostsAndPortsForAmFilter",
-        classOf[Configuration])
-      val proxies = method.invoke(null, conf).asInstanceOf[JList[String]]
-      val hosts = proxies.map { proxy => proxy.split(":")(0) }
-      val uriBases = proxies.map { proxy => prefix + proxy + proxyBase }
-      Map("PROXY_HOSTS" -> hosts.mkString(","), "PROXY_URI_BASES" -> uriBases.mkString(","))
-    } catch {
-      case e: NoSuchMethodException =>
-        val proxy = WebAppUtils.getProxyHostAndPort(conf)
-        val parts = proxy.split(":")
-        val uriBase = prefix + proxy + proxyBase
-        Map("PROXY_HOST" -> parts(0), "PROXY_URI_BASE" -> uriBase)
-    }
-  }
-
-  override def getMaxRegAttempts(conf: YarnConfiguration) =
-    conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)
-
-}
diff --git a/yarn/stable/src/test/resources/log4j.properties b/yarn/stable/src/test/resources/log4j.properties
deleted file mode 100644
index 9dd05f17f012b..0000000000000
--- a/yarn/stable/src/test/resources/log4j.properties
+++ /dev/null
@@ -1,28 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Set everything to be logged to the file core/target/unit-tests.log
-log4j.rootCategory=INFO, file
-log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.append=false
-log4j.appender.file.file=target/unit-tests.log
-log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
-
-# Ignore messages below warning level from Jetty, because it's a bit verbose
-log4j.logger.org.eclipse.jetty=WARN
-org.eclipse.jetty.LEVEL=WARN